summaryrefslogtreecommitdiffstats
path: root/swift/1.4.8/plugins/conf/swift.conf-gluster
diff options
context:
space:
mode:
Diffstat (limited to 'swift/1.4.8/plugins/conf/swift.conf-gluster')
-rw-r--r--swift/1.4.8/plugins/conf/swift.conf-gluster92
1 files changed, 92 insertions, 0 deletions
diff --git a/swift/1.4.8/plugins/conf/swift.conf-gluster b/swift/1.4.8/plugins/conf/swift.conf-gluster
new file mode 100644
index 00000000000..e506b6f54a8
--- /dev/null
+++ b/swift/1.4.8/plugins/conf/swift.conf-gluster
@@ -0,0 +1,92 @@
+[DEFAULT]
+Enable_plugin = yes
+
+
+[swift-hash]
+# random unique string that can never change (DO NOT LOSE)
+swift_hash_path_suffix = gluster
+
+
+# The swift-constraints section sets the basic constraints on data
+# saved in the swift cluster.
+
+[swift-constraints]
+
+# max_file_size is the largest "normal" object that can be saved in
+# the cluster. This is also the limit on the size of each segment of
+# a "large" object when using the large object manifest support.
+# This value is set in bytes. Setting it to lower than 1MiB will cause
+# some tests to fail. It is STRONGLY recommended to leave this value at
+# the default (5 * 2**30 + 2).
+
+# FIXME: Really? Gluster can handle a 2^64 sized file? And can the fronting
+# web service handle such a size? I think with UFO, we need to keep with the
+# default size from Swift and encourage users to research what size their web
+# services infrastructure can handle.
+
+max_file_size = 18446744073709551616
+
+
+# max_meta_name_length is the max number of bytes in the utf8 encoding
+# of the name portion of a metadata header.
+
+#max_meta_name_length = 128
+
+
+# max_meta_value_length is the max number of bytes in the utf8 encoding
+# of a metadata value
+
+#max_meta_value_length = 256
+
+
+# max_meta_count is the max number of metadata keys that can be stored
+# on a single account, container, or object
+
+#max_meta_count = 90
+
+
+# max_meta_overall_size is the max number of bytes in the utf8 encoding
+# of the metadata (keys + values)
+
+#max_meta_overall_size = 4096
+
+
+# max_object_name_length is the max number of bytes in the utf8 encoding of an
+# object name: Gluster FS can handle much longer file names, but the length
+# between the slashes of the URL is handled below. Remember that most web
+# clients can't handle anything greater than 2048, and those that do are
+# rather clumsy.
+
+max_object_name_length = 2048
+
+# max_object_name_component_length (GlusterFS) is the max number of bytes in
+# the utf8 encoding of an object name component (the part between the
+# slashes); this is a limit imposed by the underlying file system (for XFS it
+# is 255 bytes).
+
+max_object_name_component_length = 255
+
+# container_listing_limit is the default (and max) number of items
+# returned for a container listing request
+
+#container_listing_limit = 10000
+
+
+# account_listing_limit is the default (and max) number of items returned
+# for an account listing request
+
+#account_listing_limit = 10000
+
+
+# max_account_name_length is the max number of bytes in the utf8 encoding of
+# an account name: Gluster FS Filename limit (XFS limit?), must be the same
+# size as max_object_name_component_length above.
+
+max_account_name_length = 255
+
+
+# max_container_name_length is the max number of bytes in the utf8 encoding
+# of a container name: Gluster FS Filename limit (XFS limit?), must be the same
+# size as max_object_name_component_length above.
+
+max_container_name_length = 255
class='graph'>
-rwxr-xr-xapi/examples/autogen.sh5
-rw-r--r--api/examples/configure.ac12
-rwxr-xr-xapi/examples/getvolfile.py45
-rw-r--r--api/examples/glfsxmp.c1811
-rw-r--r--api/src/Makefile.am43
-rw-r--r--api/src/README.Symbol_Versions3
-rw-r--r--api/src/gfapi-messages.h147
-rw-r--r--api/src/gfapi.aliases201
-rw-r--r--api/src/gfapi.map283
-rw-r--r--api/src/glfs-fops.c6445
-rw-r--r--api/src/glfs-handleops.c2655
-rw-r--r--api/src/glfs-handles.h355
-rw-r--r--api/src/glfs-internal.h756
-rw-r--r--api/src/glfs-master.c183
-rw-r--r--api/src/glfs-mem-types.h35
-rw-r--r--api/src/glfs-mgmt.c1049
-rw-r--r--api/src/glfs-resolve.c1199
-rw-r--r--api/src/glfs.c1806
-rw-r--r--api/src/glfs.h1485
-rw-r--r--argp-standalone/Makefile.am38
-rw-r--r--argp-standalone/acinclude.m41084
-rw-r--r--argp-standalone/argp-ba.c26
-rw-r--r--argp-standalone/argp-eexst.c36
-rw-r--r--argp-standalone/argp-fmtstream.c475
-rw-r--r--argp-standalone/argp-fmtstream.h319
-rw-r--r--argp-standalone/argp-help.c1849
-rw-r--r--argp-standalone/argp-namefrob.h96
-rw-r--r--argp-standalone/argp-parse.c1305
-rw-r--r--argp-standalone/argp-pv.c25
-rw-r--r--argp-standalone/argp-pvh.c32
-rw-r--r--argp-standalone/argp.h602
-rwxr-xr-xargp-standalone/autogen.sh6
-rw-r--r--argp-standalone/configure.ac102
-rw-r--r--argp-standalone/mempcpy.c21
-rw-r--r--argp-standalone/strcasecmp.c29
-rw-r--r--argp-standalone/strchrnul.c23
-rw-r--r--argp-standalone/strndup.c34
-rw-r--r--argp-standalone/vsnprintf.c839
-rwxr-xr-xautogen.sh96
-rw-r--r--booster/Makefile.am1
-rw-r--r--booster/src/Makefile.am21
-rw-r--r--booster/src/booster-fd.c342
-rw-r--r--booster/src/booster-fd.h83
-rw-r--r--booster/src/booster.c3172
-rw-r--r--booster/src/booster_fstab.c452
-rw-r--r--booster/src/booster_fstab.h83
-rw-r--r--booster/src/booster_stat.c188
-rwxr-xr-xbuild-aux/checkpatch.pl4326
-rwxr-xr-xbuild-aux/config.guess.dist14
-rwxr-xr-xbuild-aux/config.sub.dist14
-rwxr-xr-xbuild-aux/pkg-version64
-rw-r--r--cli/src/Makefile.am37
-rw-r--r--cli/src/cli-cmd-global.c195
-rw-r--r--cli/src/cli-cmd-misc.c152
-rw-r--r--cli/src/cli-cmd-parser.c7168
-rw-r--r--cli/src/cli-cmd-peer.c431
-rw-r--r--cli/src/cli-cmd-snapshot.c135
-rw-r--r--cli/src/cli-cmd-system.c793
-rw-r--r--cli/src/cli-cmd-volume.c4282
-rw-r--r--cli/src/cli-cmd.c580
-rw-r--r--cli/src/cli-cmd.h173
-rw-r--r--cli/src/cli-mem-types.h41
-rw-r--r--cli/src/cli-quotad-client.c146
-rw-r--r--cli/src/cli-quotad-client.h29
-rw-r--r--cli/src/cli-rl.c535
-rw-r--r--cli/src/cli-rpc-ops.c14855
-rw-r--r--cli/src/cli-xml-output.c7523
-rw-r--r--cli/src/cli.c1233
-rw-r--r--cli/src/cli.h564
-rw-r--r--cli/src/input.c134
-rw-r--r--cli/src/registry.c524
-rw-r--r--configure.ac1700
-rw-r--r--contrib/aclocal/mkdirp.m4146
-rw-r--r--contrib/aclocal/python.m4209
-rw-r--r--contrib/fuse-include/fuse-mount.h1
-rw-r--r--contrib/fuse-include/fuse_kernel.h578
-rw-r--r--contrib/fuse-include/fuse_kernel_macfuse.h461
-rw-r--r--contrib/fuse-include/mount_util.h (renamed from contrib/fuse-util/mount_util.h)0
-rw-r--r--contrib/fuse-lib/misc.c3
-rw-r--r--contrib/fuse-lib/mount-common.c282
-rw-r--r--contrib/fuse-lib/mount-gluster-compat.h105
-rw-r--r--contrib/fuse-lib/mount.c638
-rw-r--r--contrib/fuse-util/Makefile.am8
-rw-r--r--contrib/fuse-util/fusermount.c124
-rw-r--r--contrib/fuse-util/mount_util.c64
-rw-r--r--contrib/ipaddr-py/COPYING202
-rw-r--r--contrib/ipaddr-py/MANIFEST.in3
-rw-r--r--contrib/ipaddr-py/OWNERS4
-rw-r--r--contrib/ipaddr-py/README8
-rw-r--r--contrib/ipaddr-py/ipaddr.py1907
-rwxr-xr-xcontrib/ipaddr-py/ipaddr_test.py1099
-rwxr-xr-xcontrib/ipaddr-py/setup.py36
-rwxr-xr-xcontrib/ipaddr-py/test-2to3.sh15
-rw-r--r--contrib/libexecinfo/execinfo.c442
-rw-r--r--contrib/libexecinfo/execinfo_compat.h51
-rw-r--r--contrib/libgen/basename_r.c40
-rw-r--r--contrib/libgen/dirname_r.c243
-rw-r--r--contrib/macfuse/fuse_param.h97
-rw-r--r--contrib/macfuse/mount_darwin.c459
-rw-r--r--contrib/mount/mntent.c260
-rw-r--r--contrib/mount/mntent_compat.h37
-rw-r--r--contrib/stdlib/gf_mkostemp.c110
-rw-r--r--contrib/timer-wheel/find_last_bit.c61
-rw-r--r--contrib/timer-wheel/timer-wheel.c374
-rw-r--r--contrib/timer-wheel/timer-wheel.h77
-rw-r--r--contrib/umountd/Makefile.am11
-rw-r--r--contrib/umountd/umountd.c255
-rw-r--r--contrib/userspace-rcu/rculist-extra.h42
-rw-r--r--contrib/userspace-rcu/static-wfcqueue.h685
-rw-r--r--contrib/userspace-rcu/static-wfstack.h455
-rw-r--r--contrib/userspace-rcu/wfcqueue.h216
-rw-r--r--contrib/userspace-rcu/wfstack.h178
-rw-r--r--contrib/uuid/clear.c43
-rw-r--r--contrib/uuid/compare.c55
-rw-r--r--contrib/uuid/copy.c45
-rw-r--r--contrib/uuid/gen_uuid.c679
-rw-r--r--contrib/uuid/gen_uuid_nt.c92
-rw-r--r--contrib/uuid/isnull.c48
-rw-r--r--contrib/uuid/pack.c69
-rw-r--r--contrib/uuid/parse.c79
-rw-r--r--contrib/uuid/tst_uuid.c180
-rw-r--r--contrib/uuid/unpack.c63
-rw-r--r--contrib/uuid/unparse.c76
-rw-r--r--contrib/uuid/uuid.h104
-rw-r--r--contrib/uuid/uuidP.h63
-rw-r--r--contrib/uuid/uuid_time.c171
-rw-r--r--contrib/uuid/uuid_types.h.in50
-rw-r--r--contrib/uuid/uuidd.h54
-rw-r--r--contrib/xxhash/xxhash.c1029
-rw-r--r--contrib/xxhash/xxhash.h328
-rw-r--r--contrib/xxhash/xxhsum.c1301
-rw-r--r--doc/Makefile.am13
-rw-r--r--doc/README.md26
-rw-r--r--doc/admin-guide/en-US/Administration_Guide.ent4
-rw-r--r--doc/admin-guide/en-US/Administration_Guide.xml27
-rw-r--r--doc/admin-guide/en-US/Author_Group.xml17
-rw-r--r--doc/admin-guide/en-US/Book_Info.xml28
-rw-r--r--doc/admin-guide/en-US/Chapter.xml33
-rw-r--r--doc/admin-guide/en-US/Preface.xml24
-rw-r--r--doc/admin-guide/en-US/Revision_History.xml27
-rw-r--r--doc/admin-guide/en-US/admin_ACLs.xml211
-rw-r--r--doc/admin-guide/en-US/admin_Hadoop.xml248
-rw-r--r--doc/admin-guide/en-US/admin_UFO.xml1580
-rw-r--r--doc/admin-guide/en-US/admin_commandref.xml336
-rw-r--r--doc/admin-guide/en-US/admin_console.xml29
-rw-r--r--doc/admin-guide/en-US/admin_directory_Quota.xml180
-rw-r--r--doc/admin-guide/en-US/admin_geo-replication.xml730
-rw-r--r--doc/admin-guide/en-US/admin_managing_volumes.xml742
-rw-r--r--doc/admin-guide/en-US/admin_monitoring_workload.xml878
-rw-r--r--doc/admin-guide/en-US/admin_setting_volumes.xml325
-rw-r--r--doc/admin-guide/en-US/admin_settingup_clients.xml411
-rw-r--r--doc/admin-guide/en-US/admin_start_stop_daemon.xml56
-rw-r--r--doc/admin-guide/en-US/admin_storage_pools.xml57
-rw-r--r--doc/admin-guide/en-US/admin_troubleshooting.xml508
-rw-r--r--doc/admin-guide/en-US/gfs_introduction.xml54
-rw-r--r--doc/admin-guide/en-US/glossary.xml126
-rw-r--r--doc/admin-guide/en-US/images/640px-GlusterFS_3.2_Architecture.pngbin97477 -> 0 bytes-rw-r--r--doc/admin-guide/en-US/images/Distributed_Replicated_Volume.pngbin51326 -> 0 bytes-rw-r--r--doc/admin-guide/en-US/images/Distributed_Striped_Replicated_Volume.pngbin57210 -> 0 bytes-rw-r--r--doc/admin-guide/en-US/images/Distributed_Striped_Volume.pngbin53781 -> 0 bytes-rw-r--r--doc/admin-guide/en-US/images/Distributed_Volume.pngbin47211 -> 0 bytes-rw-r--r--doc/admin-guide/en-US/images/Geo-Rep03_Internet.pngbin131824 -> 0 bytes-rw-r--r--doc/admin-guide/en-US/images/Geo-Rep04_Cascading.pngbin187341 -> 0 bytes-rw-r--r--doc/admin-guide/en-US/images/Geo-Rep_LAN.pngbin163417 -> 0 bytes-rw-r--r--doc/admin-guide/en-US/images/Geo-Rep_WAN.pngbin96291 -> 0 bytes-rw-r--r--doc/admin-guide/en-US/images/GlusterFS_3.2_Architecture.pngbin133597 -> 0 bytes-rw-r--r--doc/admin-guide/en-US/images/Hadoop_Architecture.pngbin43815 -> 0 bytes-rw-r--r--doc/admin-guide/en-US/images/Replicated_Volume.pngbin44077 -> 0 bytes-rw-r--r--doc/admin-guide/en-US/images/Striped_Replicated_Volume.pngbin50193 -> 0 bytes-rw-r--r--doc/admin-guide/en-US/images/Striped_Volume.pngbin43316 -> 0 bytes-rw-r--r--doc/admin-guide/en-US/images/UFO_Architecture.pngbin72139 -> 0 bytes-rw-r--r--doc/admin-guide/en-US/images/VSA_Architecture.pngbin38875 -> 0 bytes-rw-r--r--doc/admin-guide/en-US/images/arhitecture.png13
-rw-r--r--doc/admin-guide/en-US/images/icon.svg19
-rw-r--r--doc/admin-guide/publican.cfg12
-rw-r--r--doc/debugging/analyzing-regression-cores.md54
-rw-r--r--doc/debugging/gfid-to-path.md68
-rw-r--r--doc/debugging/mem-alloc-list.md19
-rw-r--r--doc/debugging/split-brain.md264
-rw-r--r--doc/debugging/statedump.md414
-rw-r--r--doc/developer-guide/Language-Bindings.md45
-rw-r--r--doc/developer-guide/README.md81
-rw-r--r--doc/developer-guide/Using-Gluster-Test-Framework.md271
-rw-r--r--doc/developer-guide/adding-fops.md18
-rw-r--r--doc/developer-guide/afr-locks-evolution.md91
-rw-r--r--doc/developer-guide/afr-self-heal-daemon.md92
-rw-r--r--doc/developer-guide/afr.md191
-rw-r--r--doc/developer-guide/brickmux-thread-reduction.md64
-rw-r--r--doc/developer-guide/coding-standard.md697
-rw-r--r--doc/developer-guide/commit-guidelines.md136
-rw-r--r--doc/developer-guide/daemon-management-framework.md38
-rw-r--r--doc/developer-guide/datastructure-inode.md221
-rw-r--r--doc/developer-guide/datastructure-iobuf.md259
-rw-r--r--doc/developer-guide/datastructure-mem-pool.md124
-rw-r--r--doc/developer-guide/dirops-transactions-in-dht.md273
-rw-r--r--doc/developer-guide/ec-implementation.md588
-rw-r--r--doc/developer-guide/fuse-interrupt.md211
-rw-r--r--doc/developer-guide/gfapi-symbol-versions.md270
-rw-r--r--doc/developer-guide/identifying-resource-leaks.md200
-rw-r--r--doc/developer-guide/logging-guidelines.md132
-rw-r--r--doc/developer-guide/network_compression.md71
-rw-r--r--doc/developer-guide/options-to-contribute.md212
-rw-r--r--doc/developer-guide/posix.md59
-rw-r--r--doc/developer-guide/rpc-for-glusterfs.changes-done.txt (renamed from doc/legacy/rpc-for-glusterfs.changes-done.txt)0
-rw-r--r--doc/developer-guide/rpc-for-glusterfs.new-versions.md32
-rw-r--r--doc/developer-guide/syncop.md72
-rw-r--r--doc/developer-guide/thread-naming.md104
-rw-r--r--doc/developer-guide/translator-development.md683
-rw-r--r--doc/developer-guide/unittest.md228
-rw-r--r--doc/developer-guide/versioning.md44
-rw-r--r--doc/developer-guide/write-behind.md56
-rw-r--r--doc/developer-guide/writing-a-cloudsync-plugin.md164
-rw-r--r--doc/developer-guide/xlator-classification.md221
-rw-r--r--doc/examples/legacy/Makefile.am8
-rw-r--r--doc/examples/legacy/README13
-rw-r--r--doc/examples/legacy/filter.vol23
-rw-r--r--doc/examples/legacy/io-cache.vol31
-rw-r--r--doc/examples/legacy/io-threads.vol22
-rw-r--r--doc/examples/legacy/posix-locks.vol19
-rw-r--r--doc/examples/legacy/protocol-client.vol12
-rw-r--r--doc/examples/legacy/protocol-server.vol21
-rw-r--r--doc/examples/legacy/read-ahead.vol24
-rw-r--r--doc/examples/legacy/replicate.vol118
-rw-r--r--doc/examples/legacy/stripe.vol120
-rw-r--r--doc/examples/legacy/trace.vol21
-rw-r--r--doc/examples/legacy/trash.vol20
-rw-r--r--doc/examples/legacy/write-behind.vol27
-rw-r--r--doc/features/ctime.md68
-rw-r--r--doc/features/ganesha-ha.md43
-rw-r--r--doc/gluster.8280
-rw-r--r--doc/glusterd.824
-rw-r--r--doc/glusterd.vol8
-rw-r--r--doc/glusterfs.8104
-rw-r--r--doc/glusterfs.vol.sample53
-rw-r--r--doc/glusterfsd.834
-rw-r--r--doc/glusterfsd.vol.sample44
-rw-r--r--doc/legacy/authentication.txt112
-rw-r--r--doc/legacy/booster.txt54
-rw-r--r--doc/legacy/coding-standard.pdfbin68627 -> 0 bytes-rw-r--r--doc/legacy/coding-standard.tex385
-rw-r--r--doc/legacy/errno.list.bsd.txt376
-rw-r--r--doc/legacy/errno.list.linux.txt1586
-rw-r--r--doc/legacy/errno.list.macosx.txt1513
-rw-r--r--doc/legacy/errno.list.solaris.txt206
-rw-r--r--doc/legacy/get_put_api_using_xattr.txt22
-rw-r--r--doc/legacy/hacker-guide/Makefile.am8
-rw-r--r--doc/legacy/hacker-guide/adding-fops.txt33
-rw-r--r--doc/legacy/hacker-guide/bdb.txt70
-rw-r--r--doc/legacy/hacker-guide/call-stub.txt1033
-rw-r--r--doc/legacy/hacker-guide/hacker-guide.tex309
-rw-r--r--doc/legacy/hacker-guide/lock-ahead.txt80
-rw-r--r--doc/legacy/hacker-guide/posix.txt59
-rw-r--r--doc/legacy/hacker-guide/replicate.txt206
-rw-r--r--doc/legacy/hacker-guide/write-behind.txt45
-rw-r--r--doc/legacy/handling-options.txt13
-rw-r--r--doc/legacy/mac-related-xattrs.txt21
-rw-r--r--doc/legacy/porting_guide.txt45
-rw-r--r--doc/legacy/replicate.lyx797
-rw-r--r--doc/legacy/replicate.pdfbin109057 -> 0 bytes-rw-r--r--doc/legacy/solaris-related-xattrs.txt44
-rw-r--r--doc/legacy/stat-prefetch-design.txt154
-rw-r--r--doc/legacy/translator-options.txt224
-rw-r--r--doc/mount.glusterfs.8154
-rw-r--r--doc/qa/legacy/qa-client.vol170
-rw-r--r--doc/qa/legacy/qa-high-avail-client.vol17
-rw-r--r--doc/qa/legacy/qa-high-avail-server.vol344
-rw-r--r--doc/qa/legacy/qa-server.vol284
-rw-r--r--doc/release-notes/en-US/Author_Group.xml17
-rw-r--r--doc/release-notes/en-US/Book_Info.xml28
-rw-r--r--doc/release-notes/en-US/Chapter.xml33
-rw-r--r--doc/release-notes/en-US/Download_Install.xml107
-rw-r--r--doc/release-notes/en-US/Key_Features.xml72
-rw-r--r--doc/release-notes/en-US/Known_Issues.xml164
-rw-r--r--doc/release-notes/en-US/Preface.xml24
-rw-r--r--doc/release-notes/en-US/Product_Documentation.xml12
-rw-r--r--doc/release-notes/en-US/Product_Support.xml12
-rw-r--r--doc/release-notes/en-US/Release_Notes.ent4
-rw-r--r--doc/release-notes/en-US/Release_Notes.xml17
-rw-r--r--doc/release-notes/en-US/Revision_History.xml27
-rw-r--r--doc/release-notes/en-US/Whats_New.xml90
-rw-r--r--doc/release-notes/en-US/gfs_introduction.xml54
-rw-r--r--doc/release-notes/en-US/images/640px-GlusterFS_3.2_Architecture.pngbin97477 -> 0 bytes-rw-r--r--doc/release-notes/en-US/images/icon.svg19
-rw-r--r--doc/release-notes/publican.cfg12
-rw-r--r--doc/user-guide/legacy/Makefile.am3
-rw-r--r--doc/user-guide/legacy/advanced-stripe.odgbin12648 -> 0 bytes-rw-r--r--doc/user-guide/legacy/advanced-stripe.pdfbin13382 -> 0 bytes-rw-r--r--doc/user-guide/legacy/colonO-icon.jpgbin779 -> 0 bytes-rw-r--r--doc/user-guide/legacy/fdl.texi454
-rw-r--r--doc/user-guide/legacy/fuse.odgbin13190 -> 0 bytes-rw-r--r--doc/user-guide/legacy/fuse.pdfbin14948 -> 0 bytes-rw-r--r--doc/user-guide/legacy/ha.odgbin37290 -> 0 bytes-rw-r--r--doc/user-guide/legacy/ha.pdfbin19403 -> 0 bytes-rw-r--r--doc/user-guide/legacy/stripe.odgbin10188 -> 0 bytes-rw-r--r--doc/user-guide/legacy/stripe.pdfbin11941 -> 0 bytes-rw-r--r--doc/user-guide/legacy/unify.odgbin12955 -> 0 bytes-rw-r--r--doc/user-guide/legacy/unify.pdfbin18969 -> 0 bytes-rw-r--r--doc/user-guide/legacy/user-guide.info2697
-rw-r--r--doc/user-guide/legacy/user-guide.pdfbin353986 -> 0 bytes-rw-r--r--doc/user-guide/legacy/user-guide.texi2246
-rw-r--r--doc/user-guide/legacy/xlator.odgbin12169 -> 0 bytes-rw-r--r--doc/user-guide/legacy/xlator.pdfbin14358 -> 0 bytes-rw-r--r--events/Makefile.am8
-rw-r--r--events/eventskeygen.py243
-rw-r--r--events/src/Makefile.am41
-rw-r--r--events/src/__init__.py10
-rw-r--r--events/src/eventsapiconf.py.in59
-rw-r--r--events/src/eventsconfig.json5
-rw-r--r--events/src/gf_event.py60
-rw-r--r--events/src/glustereventsd.py159
-rw-r--r--events/src/handlers.py40
-rw-r--r--events/src/peer_eventsapi.py669
-rw-r--r--events/src/utils.py445
-rw-r--r--events/tools/Makefile.am6
-rw-r--r--events/tools/eventsdash.py75
-rw-r--r--extras/LinuxRPM/Makefile.am55
-rwxr-xr-xextras/LinuxRPM/make_glusterrpms9
-rw-r--r--extras/MacOSX/Portfile26
-rw-r--r--extras/MacOSX/README.MacOSX84
-rw-r--r--extras/Makefile.am79
-rw-r--r--extras/Ubuntu/README.Ubuntu14
-rw-r--r--extras/Ubuntu/glusterfs-server.conf (renamed from extras/Ubuntu/glusterd.conf)0
-rw-r--r--extras/Ubuntu/mounting-glusterfs.conf5
-rw-r--r--extras/backend-cleanup.sh2
-rw-r--r--extras/benchmarking/Makefile.am4
-rw-r--r--extras/benchmarking/glfs-bm.c534
-rw-r--r--extras/benchmarking/rdd.c1025
-rwxr-xr-xextras/check_goto.pl45
-rwxr-xr-xextras/clang-checker.sh301
-rw-r--r--extras/cliutils/Makefile.am4
-rw-r--r--extras/cliutils/README.md233
-rw-r--r--extras/cliutils/__init__.py31
-rw-r--r--extras/cliutils/cliutils.py237
-rwxr-xr-xextras/collect-system-stats.sh52
-rw-r--r--extras/command-completion/Makefile6
-rw-r--r--extras/command-completion/README5
-rw-r--r--extras/command-completion/gluster.bash492
-rwxr-xr-xextras/control-cpu-load.sh116
-rwxr-xr-xextras/control-mem.sh128
-rw-r--r--extras/create_new_xlator/README.md24
-rwxr-xr-xextras/create_new_xlator/generate_xlator.py208
-rw-r--r--extras/create_new_xlator/new-xlator.c.tmpl151
-rw-r--r--extras/devel-tools/devel-vagrant/Vagrantfile165
-rw-r--r--extras/devel-tools/devel-vagrant/ansible/roles/cluster/tasks/main.yml5
-rw-r--r--extras/devel-tools/devel-vagrant/ansible/roles/compile-gluster/tasks/main.yml29
-rw-r--r--extras/devel-tools/devel-vagrant/ansible/roles/install-pkgs/tasks/main.yml72
-rw-r--r--extras/devel-tools/devel-vagrant/ansible/roles/iptables/tasks/main.yml3
-rw-r--r--extras/devel-tools/devel-vagrant/ansible/roles/prepare-brick/tasks/main.yml30
-rw-r--r--extras/devel-tools/devel-vagrant/ansible/roles/selinux/tasks/main.yml3
-rw-r--r--extras/devel-tools/devel-vagrant/ansible/roles/service/tasks/main.yml21
-rw-r--r--extras/devel-tools/devel-vagrant/ansible/setup.yml23
-rw-r--r--extras/devel-tools/devel-vagrant/bootstrap.sh3
-rwxr-xr-xextras/devel-tools/devel-vagrant/up.sh4
-rw-r--r--extras/devel-tools/gdb_macros84
-rwxr-xr-xextras/devel-tools/print-backtrace.sh115
-rwxr-xr-xextras/devel-tools/strace-brick.sh55
-rw-r--r--extras/distributed-testing/README28
-rw-r--r--extras/distributed-testing/distributed-test-build-env20
-rwxr-xr-xextras/distributed-testing/distributed-test-build.sh27
-rw-r--r--extras/distributed-testing/distributed-test-env48
-rwxr-xr-xextras/distributed-testing/distributed-test-runner.py859
-rwxr-xr-xextras/distributed-testing/distributed-test.sh95
-rw-r--r--extras/ec-heal-script/README.md69
-rwxr-xr-xextras/ec-heal-script/correct_pending_heals.sh415
-rwxr-xr-xextras/ec-heal-script/gfid_needing_heal_parallel.sh278
-rwxr-xr-xextras/failed-tests.py180
-rw-r--r--extras/firewalld/Makefile.am8
-rw-r--r--extras/firewalld/glusterfs.xml14
-rw-r--r--extras/ganesha/Makefile.am2
-rw-r--r--extras/ganesha/config/Makefile.am4
-rw-r--r--extras/ganesha/config/ganesha-ha.conf.sample19
-rw-r--r--extras/ganesha/ocf/Makefile.am11
-rw-r--r--extras/ganesha/ocf/ganesha_grace221
-rw-r--r--extras/ganesha/ocf/ganesha_mon234
-rw-r--r--extras/ganesha/ocf/ganesha_nfsd167
-rw-r--r--extras/ganesha/scripts/Makefile.am6
-rwxr-xr-xextras/ganesha/scripts/create-export-ganesha.sh92
-rwxr-xr-xextras/ganesha/scripts/dbus-send.sh70
-rw-r--r--extras/ganesha/scripts/ganesha-ha.sh1199
-rwxr-xr-xextras/ganesha/scripts/generate-epoch.py48
-rwxr-xr-xextras/generate-xdr-files.sh107
-rw-r--r--extras/geo-rep/Makefile.am16
-rw-r--r--extras/geo-rep/generate-gfid-file.sh70
-rwxr-xr-xextras/geo-rep/get-gfid.sh7
-rw-r--r--extras/geo-rep/gsync-sync-gfid.c109
-rw-r--r--extras/geo-rep/gsync-upgrade.sh123
-rw-r--r--extras/geo-rep/schedule_georep.py.in492
-rw-r--r--extras/geo-rep/slave-upgrade.sh102
-rwxr-xr-xextras/gfid-to-dirname.sh46
-rwxr-xr-xextras/git-branch-diff.py285
-rw-r--r--extras/gluster-rsyslog-5.8.conf51
-rw-r--r--extras/gluster-rsyslog-7.2.conf76
-rw-r--r--extras/glusterd-sysconfig6
-rw-r--r--extras/glusterd.vol.in15
-rw-r--r--extras/glusterfs-georep-logrotate61
-rwxr-xr-xextras/glusterfs-georep-upgrade.py77
-rw-r--r--extras/glusterfs-logrotate63
-rw-r--r--extras/glusterfs-mode.el225
-rw-r--r--extras/glusterfs.vim21
-rwxr-xr-x[-rw-r--r--]extras/gnfs-loganalyse.py23
-rw-r--r--extras/group-db-workload12
-rw-r--r--extras/group-distributed-virt10
-rw-r--r--extras/group-gluster-block27
-rw-r--r--extras/group-metadata-cache6
-rw-r--r--extras/group-nl-cache5
-rw-r--r--extras/group-samba11
-rw-r--r--extras/group-virt.example30
-rw-r--r--extras/hook-scripts/Makefile.am9
-rwxr-xr-xextras/hook-scripts/S40ufo-stop.py24
-rwxr-xr-xextras/hook-scripts/S56glusterd-geo-rep-create-post.sh104
-rw-r--r--extras/hook-scripts/add-brick/Makefile.am2
-rw-r--r--extras/hook-scripts/add-brick/post/Makefile.am6
-rwxr-xr-xextras/hook-scripts/add-brick/post/S10selinux-label-brick.sh100
-rwxr-xr-xextras/hook-scripts/add-brick/post/S13create-subdir-mounts.sh86
-rwxr-xr-xextras/hook-scripts/add-brick/post/disabled-quota-root-xattr-heal.sh145
-rw-r--r--extras/hook-scripts/add-brick/pre/Makefile.am6
-rwxr-xr-xextras/hook-scripts/add-brick/pre/S28Quota-enable-root-xattr-heal.sh101
-rw-r--r--extras/hook-scripts/create/Makefile.am1
-rw-r--r--extras/hook-scripts/create/post/Makefile.am8
-rwxr-xr-xextras/hook-scripts/create/post/S10selinux-label-brick.sh65
-rw-r--r--extras/hook-scripts/delete/Makefile.am1
-rw-r--r--extras/hook-scripts/delete/pre/Makefile.am8
-rwxr-xr-xextras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh73
-rw-r--r--extras/hook-scripts/reset/Makefile.am2
-rw-r--r--extras/hook-scripts/reset/post/Makefile.am1
-rw-r--r--extras/hook-scripts/reset/pre/Makefile.am1
-rw-r--r--extras/hook-scripts/set/Makefile.am2
-rw-r--r--extras/hook-scripts/set/post/Makefile.am6
-rwxr-xr-x[-rw-r--r--]extras/hook-scripts/set/post/S30samba-set.sh196
-rwxr-xr-xextras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh136
-rw-r--r--extras/hook-scripts/start/post/Makefile.am7
-rwxr-xr-x[-rw-r--r--]extras/hook-scripts/start/post/S29CTDBsetup.sh77
-rwxr-xr-x[-rw-r--r--]extras/hook-scripts/start/post/S30samba-start.sh159
-rwxr-xr-xextras/hook-scripts/start/post/S31ganesha-start.sh122
-rw-r--r--extras/hook-scripts/stop/pre/Makefile.am7
-rwxr-xr-x[-rw-r--r--]extras/hook-scripts/stop/pre/S29CTDB-teardown.sh72
-rwxr-xr-x[-rw-r--r--]extras/hook-scripts/stop/pre/S30samba-stop.sh93
-rwxr-xr-xextras/identify-hangs.sh53
-rw-r--r--extras/init.d/Makefile.am33
-rw-r--r--extras/init.d/glusterd-FreeBSD.in24
-rwxr-xr-xextras/init.d/glusterd-Redhat.in115
-rwxr-xr-xextras/init.d/glusterd-SuSE.in10
-rw-r--r--extras/init.d/glusterd.plist.in2
-rw-r--r--extras/init.d/glustereventsd-Debian.in91
-rw-r--r--extras/init.d/glustereventsd-FreeBSD.in19
-rw-r--r--extras/init.d/glustereventsd-Redhat.in129
-rw-r--r--extras/logger.conf.example13
-rwxr-xr-xextras/mount-shared-storage.sh39
-rw-r--r--extras/ocf/Makefile.am11
-rwxr-xr-xextras/ocf/glusterd.in212
-rwxr-xr-xextras/ocf/volume.in276
-rw-r--r--extras/peer_add_secret_pub.in70
-rwxr-xr-xextras/post-upgrade-script-for-quota.sh65
-rwxr-xr-xextras/pre-upgrade-script-for-quota.sh18
-rwxr-xr-xextras/profiler/glusterfs-profiler27
-rw-r--r--extras/python/Makefile.am7
-rw-r--r--extras/python/__init__.py2
-rwxr-xr-xextras/quota-metadata-cleanup.sh24
-rwxr-xr-xextras/quota-remove-xattr.sh24
-rwxr-xr-xextras/quota/contri-add.sh (renamed from extras/contri-add.sh)0
-rwxr-xr-xextras/quota/log_accounting.sh26
-rwxr-xr-xextras/quota/quota_fsck.py377
-rwxr-xr-xextras/quota/xattr_analysis.py73
-rwxr-xr-xextras/rebalance.py309
-rw-r--r--extras/rhel_install.sh9
-rw-r--r--extras/run-gluster.tmpfiles.in2
-rw-r--r--extras/snap_scheduler/Makefile.am9
-rw-r--r--extras/snap_scheduler/README.md125
-rw-r--r--extras/snap_scheduler/conf.py.in11
-rwxr-xr-xextras/snap_scheduler/gcron.py190
-rwxr-xr-xextras/snap_scheduler/snap_scheduler.py941
-rwxr-xr-xextras/statedumpparse.rb208
-rwxr-xr-xextras/stop-all-gluster-processes.sh193
-rw-r--r--extras/stripe-merge.c697
-rw-r--r--extras/systemd/Makefile.am17
-rw-r--r--extras/systemd/gluster-ta-volume.service.in13
-rw-r--r--extras/systemd/glusterd.service.in26
-rw-r--r--extras/systemd/glustereventsd.service.in16
-rw-r--r--extras/systemd/glusterfssharedstorage.service.in13
-rwxr-xr-xextras/test/bug-920583.t50
-rwxr-xr-xextras/test/gluster_commands.sh40
-rw-r--r--extras/test/ld-preload-test/README4
-rw-r--r--extras/test/ld-preload-test/ld-preload-lib.c620
-rw-r--r--extras/test/ld-preload-test/ld-preload-test.c527
-rw-r--r--extras/test/open-fd-tests.c85
-rwxr-xr-xextras/test/run.sh23
-rwxr-xr-xextras/test/stop_glusterd.sh23
-rw-r--r--extras/test/test-ffop.c971
-rwxr-xr-xextras/thin-arbiter/setup-thin-arbiter.sh184
-rw-r--r--extras/thin-arbiter/thin-arbiter.vol57
-rw-r--r--extras/volfilter.py168
-rw-r--r--extras/who-wrote-glusterfs/gitdm.aliases58
-rw-r--r--extras/who-wrote-glusterfs/gitdm.config8
-rw-r--r--extras/who-wrote-glusterfs/gitdm.domain-map29
-rwxr-xr-xextras/who-wrote-glusterfs/who-wrote-glusterfs.sh50
-rw-r--r--geo-replication/Makefile.am8
-rw-r--r--geo-replication/gsyncd.conf.in349
-rw-r--r--geo-replication/setup.py32
-rw-r--r--geo-replication/src/Makefile.am48
-rw-r--r--geo-replication/src/gsyncd.c402
-rwxr-xr-xgeo-replication/src/gverify.sh276
-rw-r--r--geo-replication/src/peer_georep-sshkey.py.in116
-rwxr-xr-xgeo-replication/src/peer_gsec_create.in24
-rw-r--r--geo-replication/src/peer_mountbroker.in211
-rw-r--r--geo-replication/src/peer_mountbroker.py.in401
-rw-r--r--geo-replication/src/procdiggy.c136
-rw-r--r--geo-replication/src/procdiggy.h21
-rwxr-xr-xgeo-replication/src/set_geo_rep_pem_keys.sh58
-rw-r--r--geo-replication/syncdaemon/Makefile.am8
-rw-r--r--geo-replication/syncdaemon/README.md (renamed from xlators/features/marker/utils/syncdaemon/README.md)42
-rw-r--r--geo-replication/syncdaemon/__codecheck.py (renamed from xlators/features/marker/utils/syncdaemon/__codecheck.py)20
-rw-r--r--geo-replication/syncdaemon/__init__.py9
-rw-r--r--geo-replication/syncdaemon/argsupgrade.py359
-rw-r--r--geo-replication/syncdaemon/conf.py.in17
-rw-r--r--geo-replication/syncdaemon/gsyncd.py325
-rw-r--r--geo-replication/syncdaemon/gsyncdconfig.py485
-rw-r--r--geo-replication/syncdaemon/gsyncdstatus.py419
-rw-r--r--geo-replication/syncdaemon/libcxattr.py112
-rw-r--r--geo-replication/syncdaemon/libgfchangelog.py143
-rw-r--r--geo-replication/syncdaemon/logutils.py77
-rw-r--r--geo-replication/syncdaemon/master.py2020
-rw-r--r--geo-replication/syncdaemon/monitor.py395
-rw-r--r--geo-replication/syncdaemon/py2py3.py184
-rw-r--r--geo-replication/syncdaemon/rconf.py31
-rw-r--r--geo-replication/syncdaemon/repce.py (renamed from xlators/features/marker/utils/syncdaemon/repce.py)82
-rw-r--r--geo-replication/syncdaemon/resource.py1583
-rw-r--r--geo-replication/syncdaemon/subcmds.py335
-rw-r--r--geo-replication/syncdaemon/syncdutils.py1115
-rw-r--r--geo-replication/test-requirements.txt7
-rw-r--r--geo-replication/tests/__init__.py9
-rw-r--r--geo-replication/tests/unit/__init__.py9
-rwxr-xr-xgeo-replication/tests/unit/test_gsyncdstatus.py193
-rw-r--r--geo-replication/tests/unit/test_syncdutils.py29
-rw-r--r--geo-replication/tox.ini32
-rw-r--r--geo-replication/unittests.sh9
-rw-r--r--glusterfs-api.pc.in12
-rw-r--r--glusterfs-hadoop/0.20.2/conf/core-site.xml38
-rw-r--r--glusterfs-hadoop/0.20.2/pom.xml36
-rw-r--r--glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFSBrickClass.java109
-rw-r--r--glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFSBrickRepl.java52
-rw-r--r--glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFSXattr.java472
-rw-r--r--glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFUSEInputStream.java205
-rw-r--r--glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFUSEOutputStream.java86
-rw-r--r--glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFileSystem.java492
-rw-r--r--glusterfs-hadoop/0.20.2/src/test/java/org/apache/hadoop/fs/glusterfs/AppTest.java38
-rwxr-xr-xglusterfs-hadoop/0.20.2/tools/build-deploy-jar.py212
-rw-r--r--glusterfs-hadoop/COPYING202
-rw-r--r--glusterfs-hadoop/README182
-rw-r--r--glusterfs.spec.in2200
-rw-r--r--glusterfsd/src/Makefile.am39
-rw-r--r--glusterfsd/src/gf_attach.c241
-rw-r--r--glusterfsd/src/glusterfsd-mem-types.h36
-rw-r--r--glusterfsd/src/glusterfsd-messages.h93
-rw-r--r--glusterfsd/src/glusterfsd-mgmt.c4522
-rw-r--r--glusterfsd/src/glusterfsd.c3722
-rw-r--r--glusterfsd/src/glusterfsd.h208
-rw-r--r--heal/Makefile.am (renamed from scheduler/nufa/Makefile.am)2
-rw-r--r--heal/src/Makefile.am29
-rw-r--r--heal/src/glfs-heal.c1793
-rw-r--r--libgfchangelog.pc.in12
-rw-r--r--libglusterd/Makefile.am (renamed from mod_glusterfs/apache/2.2/Makefile.am)2
-rw-r--r--libglusterd/src/Makefile.am31
-rw-r--r--libglusterd/src/gd-common-utils.c78
-rw-r--r--libglusterd/src/gd-common-utils.h28
-rw-r--r--libglusterd/src/libglusterd.sym2
-rw-r--r--libglusterfs/src/Makefile.am140
-rw-r--r--libglusterfs/src/async.c720
-rw-r--r--libglusterfs/src/byte-order.h301
-rw-r--r--libglusterfs/src/call-stub.c4909
-rw-r--r--libglusterfs/src/call-stub.h1134
-rw-r--r--libglusterfs/src/changelog.h115
-rw-r--r--libglusterfs/src/checksum.c51
-rw-r--r--libglusterfs/src/circ-buff.c275
-rw-r--r--libglusterfs/src/circ-buff.h63
-rw-r--r--libglusterfs/src/client_t.c825
-rw-r--r--libglusterfs/src/cluster-syncop.c1261
-rw-r--r--libglusterfs/src/common-utils.c6188
-rw-r--r--libglusterfs/src/common-utils.h487
-rw-r--r--libglusterfs/src/compat-errno.c1731
-rw-r--r--libglusterfs/src/compat-errno.h231
-rw-r--r--libglusterfs/src/compat.c906
-rw-r--r--libglusterfs/src/compat.h393
-rw-r--r--libglusterfs/src/ctx.c97
-rw-r--r--libglusterfs/src/daemon.c69
-rw-r--r--libglusterfs/src/default-args.c1651
-rw-r--r--libglusterfs/src/defaults-tmpl.c247
-rw-r--r--libglusterfs/src/defaults.c1354
-rw-r--r--libglusterfs/src/defaults.h673
-rw-r--r--libglusterfs/src/dict.c4349
-rw-r--r--libglusterfs/src/dict.h230
-rw-r--r--libglusterfs/src/event-epoll.c1032
-rw-r--r--libglusterfs/src/event-history.c83
-rw-r--r--libglusterfs/src/event-history.h43
-rw-r--r--libglusterfs/src/event-poll.c513
-rw-r--r--libglusterfs/src/event.c1064
-rw-r--r--libglusterfs/src/event.h81
-rw-r--r--libglusterfs/src/events.c136
-rw-r--r--libglusterfs/src/fd-lk.c663
-rw-r--r--libglusterfs/src/fd-lk.h66
-rw-r--r--libglusterfs/src/fd.c1723
-rw-r--r--libglusterfs/src/fd.h181
-rwxr-xr-xlibglusterfs/src/gen-defaults.py81
-rwxr-xr-xlibglusterfs/src/generator.py777
-rw-r--r--libglusterfs/src/gf-dirent.c315
-rw-r--r--libglusterfs/src/gf-dirent.h58
-rw-r--r--libglusterfs/src/gidcache.c298
-rw-r--r--libglusterfs/src/globals.c567
-rw-r--r--libglusterfs/src/globals.h45
-rw-r--r--libglusterfs/src/glusterfs.h446
-rw-r--r--libglusterfs/src/glusterfs/async.h209
-rw-r--r--libglusterfs/src/glusterfs/atomic.h459
-rw-r--r--libglusterfs/src/glusterfs/byte-order.h279
-rw-r--r--libglusterfs/src/glusterfs/call-stub.h622
-rw-r--r--libglusterfs/src/glusterfs/checksum.h (renamed from libglusterfs/src/checksum.h)6
-rw-r--r--libglusterfs/src/glusterfs/circ-buff.h61
-rw-r--r--libglusterfs/src/glusterfs/client_t.h147
-rw-r--r--libglusterfs/src/glusterfs/cluster-syncop.h227
-rw-r--r--libglusterfs/src/glusterfs/common-utils.h1256
-rw-r--r--libglusterfs/src/glusterfs/compat-errno.h238
-rw-r--r--libglusterfs/src/glusterfs/compat-uuid.h71
-rw-r--r--libglusterfs/src/glusterfs/compat.h544
-rw-r--r--libglusterfs/src/glusterfs/daemon.h (renamed from libglusterfs/src/daemon.h)11
-rw-r--r--libglusterfs/src/glusterfs/default-args.h455
-rw-r--r--libglusterfs/src/glusterfs/defaults.h1275
-rw-r--r--libglusterfs/src/glusterfs/dict.h420
-rw-r--r--libglusterfs/src/glusterfs/event-history.h40
-rw-r--r--libglusterfs/src/glusterfs/events.h34
-rw-r--r--libglusterfs/src/glusterfs/fd-lk.h59
-rw-r--r--libglusterfs/src/glusterfs/fd.h169
-rw-r--r--libglusterfs/src/glusterfs/gf-dirent.h71
-rw-r--r--libglusterfs/src/glusterfs/gf-event.h140
-rw-r--r--libglusterfs/src/glusterfs/gidcache.h (renamed from libglusterfs/src/gidcache.h)42
-rw-r--r--libglusterfs/src/glusterfs/glfs-message-id.h102
-rw-r--r--libglusterfs/src/glusterfs/globals.h188
-rw-r--r--libglusterfs/src/glusterfs/glusterfs-acl.h162
-rw-r--r--libglusterfs/src/glusterfs/glusterfs-fops.h241
-rw-r--r--libglusterfs/src/glusterfs/glusterfs.h838
-rw-r--r--libglusterfs/src/glusterfs/graph-utils.h (renamed from libglusterfs/src/graph-utils.h)12
-rw-r--r--libglusterfs/src/glusterfs/hashfn.h (renamed from libglusterfs/src/hashfn.h)12
-rw-r--r--libglusterfs/src/glusterfs/iatt.h489
-rw-r--r--libglusterfs/src/glusterfs/inode.h306
-rw-r--r--libglusterfs/src/glusterfs/iobuf.h194
-rw-r--r--libglusterfs/src/glusterfs/latency.h33
-rw-r--r--libglusterfs/src/glusterfs/libglusterfs-messages.h245
-rw-r--r--libglusterfs/src/glusterfs/list.h273
-rw-r--r--libglusterfs/src/glusterfs/lkowner.h93
-rw-r--r--libglusterfs/src/glusterfs/locking.h84
-rw-r--r--libglusterfs/src/glusterfs/logging.h383
-rw-r--r--libglusterfs/src/glusterfs/lvm-defaults.h20
-rw-r--r--libglusterfs/src/glusterfs/mem-pool.h336
-rw-r--r--libglusterfs/src/glusterfs/mem-types.h139
-rw-r--r--libglusterfs/src/glusterfs/monitoring.h21
-rw-r--r--libglusterfs/src/glusterfs/options.h327
-rw-r--r--libglusterfs/src/glusterfs/parse-utils.h50
-rw-r--r--libglusterfs/src/glusterfs/quota-common-utils.h68
-rw-r--r--libglusterfs/src/glusterfs/rbthash.h75
-rw-r--r--libglusterfs/src/glusterfs/refcount.h101
-rw-r--r--libglusterfs/src/glusterfs/revision.h1
-rw-r--r--libglusterfs/src/glusterfs/rot-buffs.h125
-rw-r--r--libglusterfs/src/glusterfs/run.h (renamed from libglusterfs/src/run.h)63
-rw-r--r--libglusterfs/src/glusterfs/stack.h555
-rw-r--r--libglusterfs/src/glusterfs/statedump.h132
-rw-r--r--libglusterfs/src/glusterfs/store.h112
-rw-r--r--libglusterfs/src/glusterfs/strfd.h34
-rw-r--r--libglusterfs/src/glusterfs/syncop-utils.h54
-rw-r--r--libglusterfs/src/glusterfs/syncop.h718
-rw-r--r--libglusterfs/src/glusterfs/syscall.h278
-rw-r--r--libglusterfs/src/glusterfs/template-component-messages.h28
-rw-r--r--libglusterfs/src/glusterfs/throttle-tbf.h74
-rw-r--r--libglusterfs/src/glusterfs/timer.h56
-rw-r--r--libglusterfs/src/glusterfs/timespec.h33
-rw-r--r--libglusterfs/src/glusterfs/trie.h52
-rw-r--r--libglusterfs/src/glusterfs/upcall-utils.h110
-rw-r--r--libglusterfs/src/glusterfs/xlator.h1106
-rw-r--r--libglusterfs/src/graph-print.c221
-rw-r--r--libglusterfs/src/graph.c1968
-rw-r--r--libglusterfs/src/graph.l66
-rw-r--r--libglusterfs/src/graph.y273
-rw-r--r--libglusterfs/src/hashfn.c269
-rw-r--r--libglusterfs/src/iatt.h331
-rw-r--r--libglusterfs/src/inode.c3390
-rw-r--r--libglusterfs/src/inode.h202
-rw-r--r--libglusterfs/src/iobuf.c1613
-rw-r--r--libglusterfs/src/iobuf.h161
-rw-r--r--libglusterfs/src/latency.c186
-rw-r--r--libglusterfs/src/latency.h27
-rw-r--r--libglusterfs/src/libglusterfs.sym1193
-rw-r--r--libglusterfs/src/list.h178
-rw-r--r--libglusterfs/src/lkowner.h83
-rw-r--r--libglusterfs/src/locking.c27
-rw-r--r--libglusterfs/src/locking.h40
-rw-r--r--libglusterfs/src/logging.c2760
-rw-r--r--libglusterfs/src/logging.h148
-rw-r--r--libglusterfs/src/mem-pool.c1220
-rw-r--r--libglusterfs/src/mem-pool.h163
-rw-r--r--libglusterfs/src/mem-types.h107
-rw-r--r--libglusterfs/src/monitoring.c282
-rw-r--r--libglusterfs/src/options.c1806
-rw-r--r--libglusterfs/src/options.h242
-rw-r--r--libglusterfs/src/parse-utils.c174
-rw-r--r--libglusterfs/src/quota-common-utils.c241
-rw-r--r--libglusterfs/src/rbthash.c667
-rw-r--r--libglusterfs/src/rbthash.h77
-rw-r--r--libglusterfs/src/refcount.c108
-rw-r--r--libglusterfs/src/revision.h1
-rw-r--r--libglusterfs/src/rot-buffs.c490
-rw-r--r--libglusterfs/src/run.c774
-rw-r--r--libglusterfs/src/scheduler.c82
-rw-r--r--libglusterfs/src/scheduler.h32
-rw-r--r--libglusterfs/src/stack.c646
-rw-r--r--libglusterfs/src/stack.h429
-rw-r--r--libglusterfs/src/statedump.c1436
-rw-r--r--libglusterfs/src/statedump.h94
-rw-r--r--libglusterfs/src/store.c744
-rw-r--r--libglusterfs/src/strfd.c93
-rw-r--r--libglusterfs/src/syncop-utils.c669
-rw-r--r--libglusterfs/src/syncop.c3824
-rw-r--r--libglusterfs/src/syncop.h205
-rw-r--r--libglusterfs/src/syscall.c756
-rw-r--r--libglusterfs/src/syscall.h142
-rw-r--r--libglusterfs/src/throttle-tbf.c290
-rw-r--r--libglusterfs/src/timer.c382
-rw-r--r--libglusterfs/src/timer.h61
-rw-r--r--libglusterfs/src/timespec.c129
-rw-r--r--libglusterfs/src/trie.c491
-rw-r--r--libglusterfs/src/trie.h51
-rw-r--r--libglusterfs/src/unittest/global_mock.c25
-rw-r--r--libglusterfs/src/unittest/log_mock.c52
-rw-r--r--libglusterfs/src/unittest/mem_pool_unittest.c483
-rw-r--r--libglusterfs/src/unittest/unittest.h47
-rw-r--r--libglusterfs/src/xlator.c1879
-rw-r--r--libglusterfs/src/xlator.h877
-rw-r--r--libglusterfsclient/src/Makefile.am16
-rw-r--r--libglusterfsclient/src/libglusterfsclient-dentry.c404
-rwxr-xr-xlibglusterfsclient/src/libglusterfsclient-internals.h289
-rwxr-xr-xlibglusterfsclient/src/libglusterfsclient.c8160
-rwxr-xr-xlibglusterfsclient/src/libglusterfsclient.h1363
-rw-r--r--mod_glusterfs/Makefile.am3
-rw-r--r--mod_glusterfs/apache/1.3/src/Makefile.am30
-rw-r--r--mod_glusterfs/apache/1.3/src/README.txt107
-rw-r--r--mod_glusterfs/apache/1.3/src/mod_glusterfs.c507
-rw-r--r--mod_glusterfs/apache/2.2/src/Makefile.am31
-rw-r--r--mod_glusterfs/apache/2.2/src/README.txt105
-rw-r--r--mod_glusterfs/apache/2.2/src/mod_glusterfs.c3627
-rw-r--r--mod_glusterfs/apache/Makefile.am10
-rw-r--r--mod_glusterfs/lighttpd/1.4/Makefile.am3
-rw-r--r--mod_glusterfs/lighttpd/1.4/Makefile.am.diff29
-rw-r--r--mod_glusterfs/lighttpd/1.4/README.txt57
-rw-r--r--mod_glusterfs/lighttpd/1.4/mod_glusterfs.c1820
-rw-r--r--mod_glusterfs/lighttpd/1.4/mod_glusterfs.h32
-rw-r--r--mod_glusterfs/lighttpd/1.5/Makefile.am3
-rw-r--r--mod_glusterfs/lighttpd/1.5/Makefile.am.diff29
-rw-r--r--mod_glusterfs/lighttpd/1.5/README.txt57
-rw-r--r--mod_glusterfs/lighttpd/1.5/mod_glusterfs.c1476
-rw-r--r--mod_glusterfs/lighttpd/1.5/mod_glusterfs.h29
-rw-r--r--mod_glusterfs/lighttpd/Makefile.am3
-rwxr-xr-xrfc.sh259
-rw-r--r--rpc/Makefile.am2
-rw-r--r--rpc/rpc-lib/src/Makefile.am27
-rw-r--r--rpc/rpc-lib/src/auth-glusterfs.c506
-rw-r--r--rpc/rpc-lib/src/auth-null.c53
-rw-r--r--rpc/rpc-lib/src/auth-unix.c92
-rw-r--r--rpc/rpc-lib/src/autoscale-threads.c22
-rw-r--r--rpc/rpc-lib/src/libgfrpc.sym68
-rw-r--r--rpc/rpc-lib/src/mgmt-pmap.c147
-rw-r--r--rpc/rpc-lib/src/protocol-common.h492
-rw-r--r--rpc/rpc-lib/src/rpc-clnt-ping.c357
-rw-r--r--rpc/rpc-lib/src/rpc-clnt-ping.h16
-rw-r--r--rpc/rpc-lib/src/rpc-clnt.c3082
-rw-r--r--rpc/rpc-lib/src/rpc-clnt.h270
-rw-r--r--rpc/rpc-lib/src/rpc-drc.c855
-rw-r--r--rpc/rpc-lib/src/rpc-drc.h97
-rw-r--r--rpc/rpc-lib/src/rpc-lib-messages.h34
-rw-r--r--rpc/rpc-lib/src/rpc-transport.c944
-rw-r--r--rpc/rpc-lib/src/rpc-transport.h320
-rw-r--r--rpc/rpc-lib/src/rpcsvc-auth.c786
-rw-r--r--rpc/rpc-lib/src/rpcsvc-common.h116
-rw-r--r--rpc/rpc-lib/src/rpcsvc.c4549
-rw-r--r--rpc/rpc-lib/src/rpcsvc.h801
-rw-r--r--rpc/rpc-lib/src/xdr-common.h77
-rw-r--r--rpc/rpc-lib/src/xdr-rpc.c272
-rw-r--r--rpc/rpc-lib/src/xdr-rpc.h82
-rw-r--r--rpc/rpc-lib/src/xdr-rpcclnt.c130
-rw-r--r--rpc/rpc-lib/src/xdr-rpcclnt.h28
-rw-r--r--rpc/rpc-transport/Makefile.am2
-rw-r--r--rpc/rpc-transport/rdma/src/Makefile.am20
-rw-r--r--rpc/rpc-transport/rdma/src/name.c698
-rw-r--r--rpc/rpc-transport/rdma/src/name.h39
-rw-r--r--rpc/rpc-transport/rdma/src/rdma.c4942
-rw-r--r--rpc/rpc-transport/rdma/src/rdma.h403
-rw-r--r--rpc/rpc-transport/socket/src/Makefile.am19
-rw-r--r--rpc/rpc-transport/socket/src/name.c1175
-rw-r--r--rpc/rpc-transport/socket/src/name.h22
-rw-r--r--rpc/rpc-transport/socket/src/socket-mem-types.h22
-rw-r--r--rpc/rpc-transport/socket/src/socket.c6270
-rw-r--r--rpc/rpc-transport/socket/src/socket.h322
-rw-r--r--rpc/xdr/src/.gitignore25
-rw-r--r--rpc/xdr/src/Makefile.am107
-rw-r--r--rpc/xdr/src/acl3-xdr.x52
-rw-r--r--rpc/xdr/src/changelog-xdr.x42
-rw-r--r--rpc/xdr/src/cli1-xdr.c459
-rw-r--r--rpc/xdr/src/cli1-xdr.h369
-rw-r--r--rpc/xdr/src/cli1-xdr.x245
-rw-r--r--rpc/xdr/src/glusterd1-xdr.c502
-rw-r--r--rpc/xdr/src/glusterd1-xdr.h264
-rw-r--r--rpc/xdr/src/glusterd1-xdr.x122
-rw-r--r--rpc/xdr/src/glusterfs3-xdr.c1922
-rw-r--r--rpc/xdr/src/glusterfs3-xdr.h1295
-rw-r--r--rpc/xdr/src/glusterfs3-xdr.x446
-rw-r--r--rpc/xdr/src/glusterfs3.h1117
-rw-r--r--rpc/xdr/src/glusterfs4-xdr.x797
-rw-r--r--rpc/xdr/src/libgfxdr.sym350
-rw-r--r--rpc/xdr/src/mount3udp.x26
-rw-r--r--rpc/xdr/src/msg-nfs3.c467
-rw-r--r--rpc/xdr/src/msg-nfs3.h163
-rw-r--r--rpc/xdr/src/nlm4-xdr.c254
-rw-r--r--rpc/xdr/src/nlm4-xdr.h254
-rw-r--r--rpc/xdr/src/nlm4-xdr.x (renamed from rpc/xdr/src/nlm4.x)78
-rw-r--r--rpc/xdr/src/nlmcbk-xdr.c37
-rw-r--r--rpc/xdr/src/nlmcbk-xdr.h74
-rw-r--r--rpc/xdr/src/nlmcbk.x33
-rw-r--r--rpc/xdr/src/nsm-xdr.c105
-rw-r--r--rpc/xdr/src/nsm-xdr.h95
-rw-r--r--rpc/xdr/src/nsm-xdr.x (renamed from rpc/xdr/src/nsm.x)19
-rw-r--r--rpc/xdr/src/portmap-xdr.c246
-rw-r--r--rpc/xdr/src/portmap-xdr.h139
-rw-r--r--rpc/xdr/src/portmap-xdr.x27
-rw-r--r--rpc/xdr/src/rpc-common-xdr.c232
-rw-r--r--rpc/xdr/src/rpc-common-xdr.h113
-rw-r--r--rpc/xdr/src/rpc-common-xdr.x37
-rw-r--r--rpc/xdr/src/rpc-pragmas.h28
-rw-r--r--rpc/xdr/src/xdr-generic.c148
-rw-r--r--rpc/xdr/src/xdr-generic.h78
-rw-r--r--rpc/xdr/src/xdr-nfs3.c2620
-rw-r--r--rpc/xdr/src/xdr-nfs3.h1447
-rwxr-xr-xrun-tests-in-vagrant.sh311
-rwxr-xr-xrun-tests.sh615
-rw-r--r--scheduler/Makefile.am3
-rw-r--r--scheduler/alu/src/Makefile.am14
-rw-r--r--scheduler/alu/src/alu-mem-types.h35
-rw-r--r--scheduler/alu/src/alu.c1019
-rw-r--r--scheduler/alu/src/alu.h89
-rw-r--r--scheduler/nufa/src/Makefile.am12
-rw-r--r--scheduler/nufa/src/nufa-mem-types.h33
-rw-r--r--scheduler/nufa/src/nufa.c429
-rw-r--r--scheduler/random/src/Makefile.am14
-rw-r--r--scheduler/random/src/random-mem-types.h32
-rw-r--r--scheduler/random/src/random.c305
-rw-r--r--scheduler/random/src/random.h46
-rw-r--r--scheduler/rr/Makefile.am3
-rw-r--r--scheduler/rr/src/Makefile.am13
-rw-r--r--scheduler/rr/src/rr-mem-types.h32
-rw-r--r--scheduler/rr/src/rr-options.c256
-rw-r--r--scheduler/rr/src/rr-options.h34
-rw-r--r--scheduler/rr/src/rr.c567
-rw-r--r--scheduler/rr/src/rr.h70
-rw-r--r--scheduler/switch/Makefile.am3
-rw-r--r--scheduler/switch/src/Makefile.am12
-rw-r--r--scheduler/switch/src/switch-mem-types.h33
-rw-r--r--scheduler/switch/src/switch.c451
-rw-r--r--site.h.in44
-rwxr-xr-xsmoke.sh83
l---------submit-for-review.sh1
-rw-r--r--swift/1.4.8/README22
-rw-r--r--swift/1.4.8/gluster-swift-plugin.spec60
-rw-r--r--swift/1.4.8/gluster-swift.spec396
-rw-r--r--swift/1.4.8/plugins/DiskDir.py484
-rw-r--r--swift/1.4.8/plugins/DiskFile.py316
-rw-r--r--swift/1.4.8/plugins/Glusterfs.py131
-rw-r--r--swift/1.4.8/plugins/__init__.py16
-rw-r--r--swift/1.4.8/plugins/conf/account-server/1.conf22
-rw-r--r--swift/1.4.8/plugins/conf/account.builderbin786843 -> 0 bytes-rw-r--r--swift/1.4.8/plugins/conf/account.ring.gzbin739 -> 0 bytes-rw-r--r--swift/1.4.8/plugins/conf/container-server/1.conf24
-rw-r--r--swift/1.4.8/plugins/conf/container.builderbin786843 -> 0 bytes-rw-r--r--swift/1.4.8/plugins/conf/container.ring.gzbin741 -> 0 bytes-rw-r--r--swift/1.4.8/plugins/conf/fs.conf9
-rw-r--r--swift/1.4.8/plugins/conf/object-server/1.conf22
-rw-r--r--swift/1.4.8/plugins/conf/object.builderbin786843 -> 0 bytes-rw-r--r--swift/1.4.8/plugins/conf/object.ring.gzbin738 -> 0 bytes-rw-r--r--swift/1.4.8/plugins/conf/proxy-server.conf21
-rw-r--r--swift/1.4.8/plugins/conf/swift.conf7
-rw-r--r--swift/1.4.8/plugins/constraints.py97
-rw-r--r--swift/1.4.8/plugins/utils.py679
-rw-r--r--swift/1.4.8/swift.diff797
-rw-r--r--tests/00-geo-rep/00-georep-verify-non-root-setup.t294
-rw-r--r--tests/00-geo-rep/00-georep-verify-setup.t110
-rw-r--r--tests/00-geo-rep/01-georep-glusterd-tests.t213
-rw-r--r--tests/00-geo-rep/bug-1600145.t109
-rw-r--r--tests/00-geo-rep/bug-1708603.t63
-rw-r--r--tests/00-geo-rep/georep-basic-dr-rsync-arbiter.t234
-rw-r--r--tests/00-geo-rep/georep-basic-dr-rsync.t258
-rw-r--r--tests/00-geo-rep/georep-basic-dr-tarssh-arbiter.t227
-rw-r--r--tests/00-geo-rep/georep-basic-dr-tarssh.t227
-rw-r--r--tests/00-geo-rep/georep-basic-rsync-ec.t224
-rw-r--r--tests/00-geo-rep/georep-basic-tarssh-ec.t223
-rw-r--r--tests/00-geo-rep/georep-config-upgrade.t132
-rw-r--r--tests/00-geo-rep/georep-stderr-hang.t128
-rw-r--r--tests/00-geo-rep/georep-upgrade.t79
-rw-r--r--tests/00-geo-rep/gsyncd.conf.old47
-rw-r--r--tests/000-flaky/basic_afr_split-brain-favorite-child-policy.t203
-rw-r--r--tests/000-flaky/basic_changelog_changelog-snapshot.t60
-rw-r--r--tests/000-flaky/basic_distribute_rebal-all-nodes-migrate.t142
-rw-r--r--tests/000-flaky/basic_ec_ec-quorum-count-partial-failure.t50
-rw-r--r--tests/000-flaky/basic_mount-nfs-auth.t342
-rw-r--r--tests/000-flaky/bugs_core_multiplex-limit-issue-151.t56
-rw-r--r--tests/000-flaky/bugs_distribute_bug-1117851.t101
-rw-r--r--tests/000-flaky/bugs_distribute_bug-1122443.t61
-rw-r--r--tests/000-flaky/bugs_glusterd_bug-857330/common.rc57
-rwxr-xr-xtests/000-flaky/bugs_glusterd_bug-857330/normal.t69
-rwxr-xr-xtests/000-flaky/bugs_glusterd_bug-857330/xml.t83
-rw-r--r--tests/000-flaky/bugs_glusterd_quorum-value-check.t37
-rw-r--r--tests/000-flaky/bugs_nfs_bug-1116503.t47
-rw-r--r--tests/000-flaky/features_lock-migration_lkmigration-set-option.t34
-rw-r--r--tests/README.md43
-rw-r--r--tests/afr.rc123
-rwxr-xr-xtests/basic/0symbol-check.t46
-rw-r--r--tests/basic/afr/add-brick-self-heal.t74
-rw-r--r--tests/basic/afr/afr-anon-inode-no-quorum.t63
-rw-r--r--tests/basic/afr/afr-anon-inode.t114
-rw-r--r--tests/basic/afr/afr-no-fsync.t20
-rw-r--r--tests/basic/afr/afr-read-hash-mode.t56
-rw-r--r--tests/basic/afr/afr-seek.t55
-rw-r--r--tests/basic/afr/afr-up.t28
-rw-r--r--tests/basic/afr/arbiter-add-brick.t86
-rw-r--r--tests/basic/afr/arbiter-cli.t30
-rw-r--r--tests/basic/afr/arbiter-mount.t48
-rw-r--r--tests/basic/afr/arbiter-remove-brick.t36
-rw-r--r--tests/basic/afr/arbiter-statfs.t41
-rw-r--r--tests/basic/afr/arbiter.t92
-rwxr-xr-xtests/basic/afr/client-side-heal.t95
-rw-r--r--tests/basic/afr/data-self-heal.t209
-rw-r--r--tests/basic/afr/durability-off.t46
-rw-r--r--tests/basic/afr/entry-self-heal-anon-dir-off.t459
-rw-r--r--tests/basic/afr/entry-self-heal.t447
-rw-r--r--tests/basic/afr/gfid-heal.t33
-rw-r--r--tests/basic/afr/gfid-mismatch-resolution-with-cli.t168
-rw-r--r--tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t229
-rw-r--r--tests/basic/afr/gfid-mismatch.t32
-rw-r--r--tests/basic/afr/gfid-self-heal.t145
-rw-r--r--tests/basic/afr/granular-esh/add-brick.t80
-rw-r--r--tests/basic/afr/granular-esh/cli.t114
-rw-r--r--tests/basic/afr/granular-esh/conservative-merge.t138
-rw-r--r--tests/basic/afr/granular-esh/granular-esh.t168
-rw-r--r--tests/basic/afr/granular-esh/granular-indices-but-non-granular-heal.t76
-rw-r--r--tests/basic/afr/granular-esh/replace-brick.t76
-rw-r--r--tests/basic/afr/halo.t61
-rw-r--r--tests/basic/afr/heal-info.t36
-rw-r--r--tests/basic/afr/heal-quota.t35
-rw-r--r--tests/basic/afr/inodelk.t87
-rw-r--r--tests/basic/afr/lk-quorum.t257
-rw-r--r--tests/basic/afr/metadata-self-heal.t133
-rw-r--r--tests/basic/afr/name-self-heal.t112
-rw-r--r--tests/basic/afr/quorum.t97
-rw-r--r--tests/basic/afr/read-subvol-data.t33
-rw-r--r--tests/basic/afr/read-subvol-entry.t35
-rw-r--r--tests/basic/afr/rename-data-loss.t72
-rw-r--r--tests/basic/afr/replace-brick-self-heal.t64
-rw-r--r--tests/basic/afr/resolve.t55
-rw-r--r--tests/basic/afr/root-squash-self-heal.t27
-rw-r--r--tests/basic/afr/self-heal.t244
-rw-r--r--tests/basic/afr/self-heald.t193
-rw-r--r--tests/basic/afr/sparse-file-self-heal.t181
-rw-r--r--tests/basic/afr/split-brain-favorite-child-policy-client-side-healing.t124
-rw-r--r--tests/basic/afr/split-brain-heal-info.t62
-rw-r--r--tests/basic/afr/split-brain-healing-ctime.t252
-rw-r--r--tests/basic/afr/split-brain-healing.t261
-rw-r--r--tests/basic/afr/split-brain-open.t38
-rw-r--r--tests/basic/afr/split-brain-resolution.t105
-rw-r--r--tests/basic/afr/stale-file-lookup.t30
-rw-r--r--tests/basic/afr/ta-check-locks.t68
-rw-r--r--tests/basic/afr/ta-read.t64
-rw-r--r--tests/basic/afr/ta-shd.t49
-rw-r--r--tests/basic/afr/ta-write-on-bad-brick.t51
-rw-r--r--tests/basic/afr/ta.t54
-rw-r--r--tests/basic/afr/tarissue.t39
-rw-r--r--tests/basic/all_squash.t74
-rwxr-xr-xtests/basic/cdc.t148
-rw-r--r--tests/basic/changelog/changelog-api.t37
-rw-r--r--tests/basic/changelog/changelog-history.t91
-rw-r--r--tests/basic/changelog/changelog-rename.t44
-rw-r--r--tests/basic/changelog/history-api.t42
-rw-r--r--tests/basic/cloudsync-sanity.t29
-rw-r--r--tests/basic/ctime/ctime-ec-heal.t70
-rw-r--r--tests/basic/ctime/ctime-ec-rebalance.t43
-rw-r--r--tests/basic/ctime/ctime-glfs-init.c68
-rw-r--r--tests/basic/ctime/ctime-glfs-init.t23
-rw-r--r--tests/basic/ctime/ctime-heal-symlinks.t65
-rw-r--r--tests/basic/ctime/ctime-mdata-legacy-files.t83
-rw-r--r--tests/basic/ctime/ctime-noatime.t49
-rw-r--r--tests/basic/ctime/ctime-readdir.c29
-rw-r--r--tests/basic/ctime/ctime-readdir.t50
-rw-r--r--tests/basic/ctime/ctime-rep-heal.t70
-rw-r--r--tests/basic/ctime/ctime-rep-rebalance.t41
-rw-r--r--tests/basic/ctime/ctime-utimesat.t28
-rw-r--r--tests/basic/distribute/brick-down.t83
-rw-r--r--tests/basic/distribute/bug-1265677-use-readdirp.t38
-rw-r--r--tests/basic/distribute/debug-xattrs.t54
-rw-r--r--tests/basic/distribute/dir-heal.t145
-rw-r--r--tests/basic/distribute/file-create.t120
-rw-r--r--tests/basic/distribute/file-rename.t1021
-rw-r--r--tests/basic/distribute/force-migration.t50
-rw-r--r--tests/basic/distribute/lookup.t54
-rw-r--r--tests/basic/distribute/non-root-unlink-stale-linkto.t51
-rw-r--r--tests/basic/distribute/spare_file_rebalance.t51
-rw-r--r--tests/basic/distribute/throttle-rebal.t56
-rw-r--r--tests/basic/ec/dht-rename.t19
-rw-r--r--tests/basic/ec/ec-1468261.t95
-rw-r--r--tests/basic/ec/ec-3-1.t14
-rw-r--r--tests/basic/ec/ec-4-1.t14
-rw-r--r--tests/basic/ec/ec-5-2.t14
-rw-r--r--tests/basic/ec/ec-6-2.t14
-rw-r--r--tests/basic/ec/ec-anonymous-fd.t42
-rw-r--r--tests/basic/ec/ec-background-heals.t105
-rw-r--r--tests/basic/ec/ec-badfd.c124
-rwxr-xr-xtests/basic/ec/ec-badfd.t26
-rw-r--r--tests/basic/ec/ec-common145
-rw-r--r--tests/basic/ec/ec-cpu-extensions.t62
-rwxr-xr-xtests/basic/ec/ec-data-heal.t75
-rw-r--r--tests/basic/ec/ec-dirty-flags.t23
-rw-r--r--tests/basic/ec/ec-discard.t205
-rw-r--r--tests/basic/ec/ec-fallocate.t72
-rw-r--r--tests/basic/ec/ec-fast-fgetxattr.c129
-rwxr-xr-xtests/basic/ec/ec-fast-fgetxattr.t40
-rw-r--r--tests/basic/ec/ec-fix-openfd.t111
-rw-r--r--tests/basic/ec/ec-internal-xattrs.t45
-rw-r--r--tests/basic/ec/ec-new-entry.t70
-rw-r--r--tests/basic/ec/ec-notify.t101
-rw-r--r--tests/basic/ec/ec-optimistic-changelog.t153
-rw-r--r--tests/basic/ec/ec-quorum-count.t167
-rw-r--r--tests/basic/ec/ec-read-mask.t114
-rw-r--r--tests/basic/ec/ec-read-policy.t52
-rw-r--r--tests/basic/ec/ec-readdir.t46
-rw-r--r--tests/basic/ec/ec-rebalance.t61
-rw-r--r--tests/basic/ec/ec-reset-brick.t50
-rw-r--r--tests/basic/ec/ec-root-heal.t34
-rw-r--r--tests/basic/ec/ec-seek.t58
-rw-r--r--tests/basic/ec/ec-stripe.t227
-rw-r--r--tests/basic/ec/ec-up.t28
-rw-r--r--tests/basic/ec/ec.t259
-rw-r--r--tests/basic/ec/gfapi-ec-open-truncate.c171
-rw-r--r--tests/basic/ec/gfapi-ec-open-truncate.t48
-rw-r--r--tests/basic/ec/heal-info.t74
-rw-r--r--tests/basic/ec/lock-contention.t62
-rwxr-xr-xtests/basic/ec/nfs.t29
-rwxr-xr-xtests/basic/ec/quota.t45
-rw-r--r--tests/basic/ec/self-heal-read-write-fail.t69
-rw-r--r--tests/basic/ec/self-heal.t235
-rw-r--r--tests/basic/ec/statedump.t28
-rw-r--r--tests/basic/exports_parsing.t57
-rw-r--r--tests/basic/fencing/afr-lock-heal-advanced.c227
-rw-r--r--tests/basic/fencing/afr-lock-heal-advanced.t115
-rw-r--r--tests/basic/fencing/afr-lock-heal-basic.c182
-rw-r--r--tests/basic/fencing/afr-lock-heal-basic.t102
-rw-r--r--tests/basic/fencing/fence-basic.c229
-rwxr-xr-xtests/basic/fencing/fence-basic.t31
-rw-r--r--tests/basic/fencing/fencing-crash-conistency.t62
-rw-r--r--tests/basic/fencing/test-fence-option.t37
-rw-r--r--tests/basic/fop-sampling.t61
-rw-r--r--tests/basic/fops-sanity.c1033
-rwxr-xr-xtests/basic/fops-sanity.t27
-rw-r--r--tests/basic/fuse/Makefile12
-rw-r--r--tests/basic/fuse/active-io-graph-switch.t65
-rw-r--r--tests/basic/fuse/seek.c82
-rwxr-xr-xtests/basic/geo-replication/marker-xattrs.t80
-rw-r--r--tests/basic/gfapi/Makefile22
-rwxr-xr-xtests/basic/gfapi/anonymous_fd.t26
-rw-r--r--tests/basic/gfapi/anonymous_fd_read_write.c106
-rw-r--r--tests/basic/gfapi/bug-1241104.c93
-rwxr-xr-xtests/basic/gfapi/bug-1241104.t30
-rw-r--r--tests/basic/gfapi/bug-1507896.c49
-rw-r--r--tests/basic/gfapi/bug-1507896.t33
-rw-r--r--tests/basic/gfapi/bug1283983.c122
-rwxr-xr-xtests/basic/gfapi/bug1283983.sh33
-rw-r--r--tests/basic/gfapi/bug1291259.c181
-rwxr-xr-xtests/basic/gfapi/bug1291259.t32
-rw-r--r--tests/basic/gfapi/bug1613098.c96
-rwxr-xr-xtests/basic/gfapi/bug1613098.t22
-rw-r--r--tests/basic/gfapi/gfapi-async-calls-test.c494
-rw-r--r--tests/basic/gfapi/gfapi-async-calls-test.t26
-rw-r--r--tests/basic/gfapi/gfapi-copy-file-range.t82
-rw-r--r--tests/basic/gfapi/gfapi-dup.c84
-rwxr-xr-xtests/basic/gfapi/gfapi-dup.t27
-rw-r--r--tests/basic/gfapi/gfapi-graph-switch-open-fd.t44
-rw-r--r--tests/basic/gfapi/gfapi-keep-writing.c129
-rw-r--r--tests/basic/gfapi/gfapi-load-volfile.c65
-rw-r--r--tests/basic/gfapi/gfapi-load-volfile.t28
-rw-r--r--tests/basic/gfapi/gfapi-ssl-load-volfile-test.c127
-rwxr-xr-xtests/basic/gfapi/gfapi-ssl-load-volfile-test.t76
-rw-r--r--tests/basic/gfapi/gfapi-ssl-test.c124
-rwxr-xr-xtests/basic/gfapi/gfapi-ssl-test.t61
-rw-r--r--tests/basic/gfapi/gfapi-statx-basic.c184
-rwxr-xr-xtests/basic/gfapi/gfapi-statx-basic.t30
-rw-r--r--tests/basic/gfapi/gfapi-trunc.c89
-rw-r--r--tests/basic/gfapi/gfapi-trunc.t22
-rw-r--r--tests/basic/gfapi/glfd-lkowner.c214
-rwxr-xr-xtests/basic/gfapi/glfd-lkowner.t27
-rw-r--r--tests/basic/gfapi/glfs-copy-file-range.c180
-rw-r--r--tests/basic/gfapi/glfs_h_creat_open.c118
-rwxr-xr-xtests/basic/gfapi/glfs_h_creat_open.t27
-rw-r--r--tests/basic/gfapi/glfs_sysrq.c60
-rwxr-xr-xtests/basic/gfapi/glfs_sysrq.t39
-rw-r--r--tests/basic/gfapi/glfs_xreaddirplus_r.c242
-rwxr-xr-xtests/basic/gfapi/glfs_xreaddirplus_r.t28
-rw-r--r--tests/basic/gfapi/glfsxmp-coverage.c1900
-rw-r--r--tests/basic/gfapi/glfsxmp.t30
-rw-r--r--tests/basic/gfapi/libgfapi-fini-hang.c62
-rwxr-xr-xtests/basic/gfapi/libgfapi-fini-hang.t42
-rw-r--r--tests/basic/gfapi/mandatory-lock-optimal.c532
-rw-r--r--tests/basic/gfapi/mandatory-lock-optimal.t38
-rw-r--r--tests/basic/gfapi/protocol-client-ssl.vol.in15
-rw-r--r--tests/basic/gfapi/protocol-client.vol.in14
-rw-r--r--tests/basic/gfapi/seek.c99
-rw-r--r--tests/basic/gfapi/sink.t13
-rw-r--r--tests/basic/gfapi/sink.vol24
-rw-r--r--tests/basic/gfapi/upcall-cache-invalidate.c209
-rwxr-xr-xtests/basic/gfapi/upcall-cache-invalidate.t30
-rw-r--r--tests/basic/gfapi/upcall-register-api.c286
-rwxr-xr-xtests/basic/gfapi/upcall-register-api.t30
-rw-r--r--tests/basic/gfid-access.t80
-rwxr-xr-xtests/basic/gfproxy.t71
-rw-r--r--tests/basic/global-threading.t104
-rw-r--r--tests/basic/glusterd-restart-shd-mux.t96
-rw-r--r--tests/basic/glusterd/arbiter-volume-probe.t25
-rw-r--r--tests/basic/glusterd/check-cloudsync-ancestry.t48
-rw-r--r--tests/basic/glusterd/disperse-create.t73
-rw-r--r--tests/basic/glusterd/heald.t92
-rw-r--r--tests/basic/glusterd/thin-arbiter-volume-probe.t25
-rw-r--r--tests/basic/glusterd/thin-arbiter-volume.t45
-rw-r--r--tests/basic/glusterd/volfile_server_switch.t46
-rw-r--r--tests/basic/glusterd/volume-brick-count.t61
-rw-r--r--tests/basic/glusterfsd-args.t5
-rw-r--r--tests/basic/graph-cleanup-brick-down-shd-mux.t64
-rw-r--r--tests/basic/hardlink-limit.t44
-rw-r--r--tests/basic/inode-leak.t31
-rw-r--r--tests/basic/inode-quota-enforcing.t100
-rw-r--r--tests/basic/ios-dump.t43
-rw-r--r--tests/basic/jbr/jbr-volgen.t39
-rwxr-xr-xtests/basic/jbr/jbr.t38
-rw-r--r--tests/basic/logchecks-messages.h105
-rw-r--r--tests/basic/logchecks.c214
-rw-r--r--tests/basic/md-cache/bug-1317785.t34
-rwxr-xr-xtests/basic/md-cache/bug-1418249.t20
-rwxr-xr-xtests/basic/meta.t45
-rw-r--r--tests/basic/metadisp/fsyncdir.c29
-rw-r--r--tests/basic/metadisp/ftruncate.c34
-rw-r--r--tests/basic/metadisp/fxattr.c107
-rw-r--r--tests/basic/metadisp/gfs-fsetxattr.c141
-rw-r--r--tests/basic/metadisp/metadisp.t316
-rw-r--r--tests/basic/metadisp/metadisp.vol14
-rw-r--r--tests/basic/mgmt_v3-locks.t120
-rw-r--r--tests/basic/mount-options.disabled143
-rwxr-xr-xtests/basic/mount.t85
-rw-r--r--tests/basic/mpx-compat.t53
-rw-r--r--tests/basic/multiple-volume-shd-mux.t46
-rw-r--r--tests/basic/multiplex.t78
-rw-r--r--tests/basic/namespace.t131
-rw-r--r--tests/basic/netgroup_parsing.t56
-rwxr-xr-xtests/basic/nl-cache.t98
-rw-r--r--tests/basic/nufa.t41
-rwxr-xr-xtests/basic/op_errnos.t34
-rw-r--r--tests/basic/open-behind/open-behind.t183
-rw-r--r--tests/basic/open-behind/tester-fd.c99
-rw-r--r--tests/basic/open-behind/tester.c444
-rw-r--r--tests/basic/open-behind/tester.h145
-rw-r--r--tests/basic/open-fd-snap-delete.t74
-rw-r--r--tests/basic/peer-parsing.t52
-rw-r--r--tests/basic/pgfid-feat.t35
-rwxr-xr-xtests/basic/playground/template-xlator-sanity.t43
-rw-r--r--tests/basic/posix/shared-statfs.t58
-rw-r--r--tests/basic/posix/zero-fill-enospace.c67
-rw-r--r--tests/basic/posix/zero-fill-enospace.t35
-rwxr-xr-xtests/basic/posixonly.t31
-rw-r--r--tests/basic/quick-read-with-upcall.t72
-rwxr-xr-xtests/basic/quota-ancestry-building.t71
-rwxr-xr-xtests/basic/quota-anon-fd-nfs.t117
-rwxr-xr-xtests/basic/quota-nfs.t66
-rw-r--r--tests/basic/quota-rename.t37
-rw-r--r--tests/basic/quota.c89
-rwxr-xr-xtests/basic/quota.t236
-rwxr-xr-xtests/basic/quota_aux_mount.t53
-rwxr-xr-xtests/basic/rpc-coverage.sh (renamed from extras/rpc-coverage.sh)57
-rwxr-xr-xtests/basic/rpc-coverage.t25
-rw-r--r--tests/basic/sdfs-sanity.t28
-rw-r--r--tests/basic/seek.c182
-rw-r--r--tests/basic/shd-mux-afr.t70
-rw-r--r--tests/basic/shd-mux-ec.t75
-rw-r--r--tests/basic/stats-dump.t55
-rwxr-xr-xtests/basic/symbol-check.sh114
-rwxr-xr-xtests/basic/trace.t55
-rw-r--r--tests/basic/uss.t421
-rw-r--r--tests/basic/volfile-sanity.t29
-rw-r--r--tests/basic/volume-scale-shd-mux.t116
-rw-r--r--tests/basic/volume-snap-scheduler.t49
-rwxr-xr-xtests/basic/volume-snapshot-clone.t129
-rwxr-xr-xtests/basic/volume-snapshot-xml.t72
-rwxr-xr-xtests/basic/volume-snapshot.t172
-rw-r--r--tests/basic/volume-status.t114
-rw-r--r--tests/basic/volume.t60
-rw-r--r--tests/basic/xlator-pass-through-sanity.t22
-rw-r--r--tests/bitrot/br-signer-threads-config-1797869.t73
-rw-r--r--tests/bitrot/br-state-check.t83
-rw-r--r--tests/bitrot/br-stub.c195
-rw-r--r--tests/bitrot/br-stub.t66
-rw-r--r--tests/bitrot/bug-1207627-bitrot-scrub-status.t52
-rw-r--r--tests/bitrot/bug-1221914.t51
-rw-r--r--tests/bitrot/bug-1244613.t95
-rw-r--r--tests/bitrot/bug-1294786.t95
-rw-r--r--tests/bitrot/bug-1373520.t71
-rw-r--r--tests/bitrot/bug-1700078.t87
-rw-r--r--tests/bitrot/bug-internal-xattrs-check-1243391.t42
-rw-r--r--tests/bugs/access-control/bug-1051896.c94
-rw-r--r--tests/bugs/access-control/bug-1051896.t34
-rw-r--r--tests/bugs/access-control/bug-1387241.c18
-rw-r--r--tests/bugs/access-control/bug-1387241.t36
-rw-r--r--tests/bugs/access-control/bug-887098-gmount-crash.t42
-rw-r--r--tests/bugs/access-control/bug-958691.t52
-rwxr-xr-xtests/bugs/bitrot/1207029-bitrot-daemon-should-start-on-valid-node.t57
-rw-r--r--tests/bugs/bitrot/1209751-bitrot-scrub-tunable-reset.t48
-rw-r--r--tests/bugs/bitrot/1209752-volume-status-should-show-bitrot-scrub-info.t70
-rw-r--r--tests/bugs/bitrot/1209818-vol-info-show-scrub-process-properly.t48
-rw-r--r--tests/bugs/bitrot/bug-1210684-scrub-pause-resume-error-handling.t39
-rw-r--r--tests/bugs/bitrot/bug-1227996.t57
-rw-r--r--tests/bugs/bitrot/bug-1228680.t48
-rw-r--r--tests/bugs/bitrot/bug-1229134-bitd-not-support-vol-set.t38
-rw-r--r--tests/bugs/bitrot/bug-1245981.t55
-rw-r--r--tests/bugs/bitrot/bug-1288490.t48
-rwxr-xr-xtests/bugs/bug-1064147.t72
-rw-r--r--tests/bugs/bug-1110262.t72
-rw-r--r--tests/bugs/bug-1138841.t25
-rwxr-xr-xtests/bugs/bug-1258069.t30
-rw-r--r--tests/bugs/bug-1368312.t86
-rw-r--r--tests/bugs/bug-1371806.t81
-rw-r--r--tests/bugs/bug-1371806_1.t48
-rw-r--r--tests/bugs/bug-1371806_2.t52
-rw-r--r--tests/bugs/bug-1371806_3.t63
-rw-r--r--tests/bugs/bug-1371806_acl.t96
-rw-r--r--tests/bugs/bug-1584517.t70
-rw-r--r--tests/bugs/bug-1620580.t67
-rw-r--r--tests/bugs/bug-1694920.t63
-rw-r--r--tests/bugs/bug-1702299.t67
-rw-r--r--tests/bugs/bug-863068.t76
-rwxr-xr-xtests/bugs/changelog/bug-1208470.t40
-rw-r--r--tests/bugs/changelog/bug-1211327.t46
-rw-r--r--tests/bugs/changelog/bug-1225542.t30
-rw-r--r--tests/bugs/changelog/bug-1321955.t60
-rw-r--r--tests/bugs/cli/bug-1004218.t26
-rw-r--r--tests/bugs/cli/bug-1022905.t37
-rw-r--r--tests/bugs/cli/bug-1030580.t53
-rw-r--r--tests/bugs/cli/bug-1047378.t12
-rw-r--r--tests/bugs/cli/bug-1047416.t71
-rw-r--r--tests/bugs/cli/bug-1077682.t24
-rwxr-xr-xtests/bugs/cli/bug-1087487.t23
-rw-r--r--tests/bugs/cli/bug-1113476.t45
-rw-r--r--tests/bugs/cli/bug-1169302.c79
-rwxr-xr-xtests/bugs/cli/bug-1169302.t55
-rwxr-xr-xtests/bugs/cli/bug-1320388.t44
-rw-r--r--tests/bugs/cli/bug-1353156-get-state-cli-validations.t147
-rw-r--r--tests/bugs/cli/bug-1378842-volume-get-all.t23
-rw-r--r--tests/bugs/cli/bug-764638.t13
-rwxr-xr-xtests/bugs/cli/bug-822830.t64
-rw-r--r--tests/bugs/cli/bug-867252.t41
-rwxr-xr-xtests/bugs/cli/bug-921215.t13
-rw-r--r--tests/bugs/cli/bug-949298.t12
-rw-r--r--tests/bugs/cli/bug-961307.t20
-rwxr-xr-xtests/bugs/cli/bug-969193.t13
-rw-r--r--tests/bugs/cli/bug-977246.t21
-rw-r--r--tests/bugs/cli/bug-982174.t36
-rw-r--r--tests/bugs/cli/bug-983317-volume-get.t45
-rw-r--r--tests/bugs/core/949327.t23
-rw-r--r--tests/bugs/core/brick-mux-fd-cleanup.t78
-rw-r--r--tests/bugs/core/bug-1110917.t39
-rw-r--r--tests/bugs/core/bug-1111557.t12
-rw-r--r--tests/bugs/core/bug-1117951.t24
-rw-r--r--tests/bugs/core/bug-1119582.t24
-rw-r--r--tests/bugs/core/bug-1135514-allow-setxattr-with-null-value.t18
-rwxr-xr-xtests/bugs/core/bug-1168803-snapd-option-validation-fix.t30
-rwxr-xr-xtests/bugs/core/bug-1402841.t-mt-dir-scan-race.t42
-rw-r--r--tests/bugs/core/bug-1421721-mpx-toggle.t25
-rw-r--r--tests/bugs/core/bug-1432542-mpx-restart-crash.t116
-rw-r--r--tests/bugs/core/bug-1650403.t113
-rw-r--r--tests/bugs/core/bug-1699025-brick-mux-detach-brick-fd-issue.t33
-rw-r--r--tests/bugs/core/bug-834465.c60
-rwxr-xr-xtests/bugs/core/bug-834465.t51
-rw-r--r--tests/bugs/core/bug-845213.t19
-rw-r--r--tests/bugs/core/bug-903336.t13
-rwxr-xr-xtests/bugs/core/bug-908146.t30
-rw-r--r--tests/bugs/core/bug-913544.t24
-rwxr-xr-xtests/bugs/core/bug-924075.t23
-rwxr-xr-xtests/bugs/core/bug-927616.t65
-rw-r--r--tests/bugs/core/bug-949242.t55
-rw-r--r--tests/bugs/core/bug-986429.t19
-rwxr-xr-xtests/bugs/core/io-stats-1322825.t67
-rwxr-xr-xtests/bugs/core/log-bug-1362520.t43
-rwxr-xr-xtests/bugs/ctime/issue-832.t32
-rw-r--r--tests/bugs/distribute/bug-1042725.t49
-rwxr-xr-xtests/bugs/distribute/bug-1066798.t88
-rwxr-xr-xtests/bugs/distribute/bug-1086228.t34
-rwxr-xr-xtests/bugs/distribute/bug-1088231.t173
-rw-r--r--tests/bugs/distribute/bug-1099890.t130
-rwxr-xr-xtests/bugs/distribute/bug-1125824.t103
-rwxr-xr-xtests/bugs/distribute/bug-1161156.t58
-rwxr-xr-xtests/bugs/distribute/bug-1161311.t164
-rw-r--r--tests/bugs/distribute/bug-1190734.t93
-rw-r--r--tests/bugs/distribute/bug-1193636.c68
-rw-r--r--tests/bugs/distribute/bug-1193636.t74
-rwxr-xr-xtests/bugs/distribute/bug-1204140.t22
-rw-r--r--tests/bugs/distribute/bug-1247563.t62
-rw-r--r--tests/bugs/distribute/bug-1368012.t51
-rw-r--r--tests/bugs/distribute/bug-1389697.t42
-rw-r--r--tests/bugs/distribute/bug-1543279.t67
-rw-r--r--tests/bugs/distribute/bug-1600379.t54
-rw-r--r--tests/bugs/distribute/bug-1667804.t63
-rwxr-xr-xtests/bugs/distribute/bug-1786679.t69
-rwxr-xr-xtests/bugs/distribute/bug-853258.t47
-rw-r--r--tests/bugs/distribute/bug-860663.c41
-rw-r--r--tests/bugs/distribute/bug-860663.t56
-rw-r--r--tests/bugs/distribute/bug-862967.t59
-rwxr-xr-xtests/bugs/distribute/bug-882278.t73
-rwxr-xr-xtests/bugs/distribute/bug-884455.t84
-rwxr-xr-xtests/bugs/distribute/bug-884597.t173
-rwxr-xr-xtests/bugs/distribute/bug-907072.t49
-rwxr-xr-xtests/bugs/distribute/bug-912564.t92
-rwxr-xr-xtests/bugs/distribute/bug-915554.t76
-rwxr-xr-xtests/bugs/distribute/bug-921408.t90
-rwxr-xr-xtests/bugs/distribute/bug-924265.t35
-rw-r--r--tests/bugs/distribute/bug-961615.t34
-rwxr-xr-xtests/bugs/distribute/bug-973073.t48
-rwxr-xr-xtests/bugs/distribute/issue-1327.t33
-rwxr-xr-xtests/bugs/distribute/overlap.py59
-rw-r--r--tests/bugs/ec/bug-1161621.t42
-rw-r--r--tests/bugs/ec/bug-1161886.c53
-rw-r--r--tests/bugs/ec/bug-1161886.t44
-rw-r--r--tests/bugs/ec/bug-1179050.t23
-rw-r--r--tests/bugs/ec/bug-1187474.t43
-rw-r--r--tests/bugs/ec/bug-1188145.t50
-rw-r--r--tests/bugs/ec/bug-1227869.t33
-rw-r--r--tests/bugs/ec/bug-1236065.t95
-rw-r--r--tests/bugs/ec/bug-1251446.t50
-rwxr-xr-xtests/bugs/ec/bug-1304988.t42
-rw-r--r--tests/bugs/ec/bug-1547662.t41
-rw-r--r--tests/bugs/ec/bug-1699866-check-reopen-fd.t34
-rw-r--r--tests/bugs/ec/bug-1708156-honor-inodelk-contention-notify-on-partial-locks.t54
-rwxr-xr-xtests/bugs/error-gen/bug-767095.t51
-rw-r--r--tests/bugs/fuse/bug-1030208.t35
-rw-r--r--tests/bugs/fuse/bug-1126048.c43
-rwxr-xr-xtests/bugs/fuse/bug-1126048.t30
-rw-r--r--tests/bugs/fuse/bug-1283103.t59
-rw-r--r--tests/bugs/fuse/bug-1309462.t50
-rw-r--r--tests/bugs/fuse/bug-1336818.t52
-rwxr-xr-xtests/bugs/fuse/bug-858215.t79
-rw-r--r--tests/bugs/fuse/bug-858488-min-free-disk.t108
-rwxr-xr-xtests/bugs/fuse/bug-924726.t45
-rw-r--r--tests/bugs/fuse/bug-963678.t57
-rwxr-xr-xtests/bugs/fuse/bug-983477.t53
-rw-r--r--tests/bugs/fuse/bug-985074.t54
-rwxr-xr-xtests/bugs/fuse/many-groups-for-acl.t123
-rwxr-xr-xtests/bugs/fuse/setup.sh20
-rwxr-xr-xtests/bugs/fuse/teardown.sh5
-rw-r--r--tests/bugs/geo-replication/bug-1111490.t35
-rw-r--r--tests/bugs/geo-replication/bug-1296496.t44
-rwxr-xr-xtests/bugs/geo-replication/bug-877293.t41
-rw-r--r--tests/bugs/gfapi/bug-1032894.t33
-rw-r--r--tests/bugs/gfapi/bug-1093594.c311
-rwxr-xr-xtests/bugs/gfapi/bug-1093594.t22
-rwxr-xr-xtests/bugs/gfapi/bug-1319374-THIS-crash.t27
-rw-r--r--tests/bugs/gfapi/bug-1319374.c131
-rw-r--r--tests/bugs/gfapi/bug-1447266/1460514.c150
-rw-r--r--tests/bugs/gfapi/bug-1447266/1460514.t26
-rw-r--r--tests/bugs/gfapi/bug-1447266/bug-1447266.c107
-rw-r--r--tests/bugs/gfapi/bug-1447266/bug-1447266.t60
-rw-r--r--tests/bugs/gfapi/bug-1630804/gfapi-bz1630804.c112
-rw-r--r--tests/bugs/gfapi/bug-1630804/gfapi-bz1630804.t25
-rw-r--r--tests/bugs/gfapi/glfs_vol_set_IO_ERR.c163
-rwxr-xr-xtests/bugs/gfapi/glfs_vol_set_IO_ERR.t20
-rwxr-xr-xtests/bugs/glusterd/859927/repl.t59
-rw-r--r--tests/bugs/glusterd/add-brick-and-validate-replicated-volume-options.t110
-rw-r--r--tests/bugs/glusterd/brick-mux-validation-in-cluster.t108
-rw-r--r--tests/bugs/glusterd/brick-mux-validation.t104
-rw-r--r--tests/bugs/glusterd/brick-mux.t81
-rw-r--r--tests/bugs/glusterd/brick-order-check-add-brick.t61
-rwxr-xr-xtests/bugs/glusterd/bug-1070734.t80
-rw-r--r--tests/bugs/glusterd/bug-1085330-and-bug-916549.t93
-rwxr-xr-xtests/bugs/glusterd/bug-1091935-brick-order-check-from-cli-to-glusterd.t93
-rw-r--r--tests/bugs/glusterd/bug-1238706-daemons-stop-on-peer-cleanup.t44
-rw-r--r--tests/bugs/glusterd/bug-1242875-do-not-pass-volinfo-quota.t38
-rw-r--r--tests/bugs/glusterd/bug-1482906-peer-file-blank-line.t29
-rw-r--r--tests/bugs/glusterd/bug-1595320.t93
-rw-r--r--tests/bugs/glusterd/bug-1696046.t113
-rw-r--r--tests/bugs/glusterd/bug-1699339.t73
-rw-r--r--tests/bugs/glusterd/bug-1720566.t50
-rw-r--r--tests/bugs/glusterd/bug-824753-file-locker.c46
-rwxr-xr-xtests/bugs/glusterd/bug-824753.t45
-rw-r--r--tests/bugs/glusterd/bug-948729/bug-948729-force.t103
-rw-r--r--tests/bugs/glusterd/bug-948729/bug-948729-mode-script.t77
-rw-r--r--tests/bugs/glusterd/bug-948729/bug-948729.t80
-rw-r--r--tests/bugs/glusterd/bug-949930.t29
-rw-r--r--tests/bugs/glusterd/check_elastic_server.t63
-rw-r--r--tests/bugs/glusterd/daemon-log-level-option.t93
-rw-r--r--tests/bugs/glusterd/df-results-post-replace-brick-operations.t61
-rw-r--r--tests/bugs/glusterd/mgmt-handshake-and-volume-sync-post-glusterd-restart.t71
-rw-r--r--tests/bugs/glusterd/optimized-basic-testcases-in-cluster.t115
-rw-r--r--tests/bugs/glusterd/optimized-basic-testcases.t305
-rw-r--r--tests/bugs/glusterd/quorum-validation.t122
-rw-r--r--tests/bugs/glusterd/rebalance-in-cluster.t52
-rw-r--r--tests/bugs/glusterd/rebalance-operations-in-single-node.t131
-rw-r--r--tests/bugs/glusterd/remove-brick-in-cluster.t60
-rw-r--r--tests/bugs/glusterd/remove-brick-testcases.t119
-rw-r--r--tests/bugs/glusterd/remove-brick-validation.t68
-rw-r--r--tests/bugs/glusterd/removing-multiple-bricks-in-single-remove-brick-command.t80
-rw-r--r--tests/bugs/glusterd/replace-brick-operations.t48
-rw-r--r--tests/bugs/glusterd/reset-brick-and-daemons-follow-quorum.t63
-rw-r--r--tests/bugs/glusterd/serialize-shd-manager-glusterd-restart.t54
-rw-r--r--tests/bugs/glusterd/snapshot-operations.t50
-rw-r--r--tests/bugs/glusterd/sync-post-glusterd-restart.t54
-rw-r--r--tests/bugs/glusterd/validating-options-for-replicated-volume.t142
-rw-r--r--tests/bugs/glusterd/validating-server-quorum.t125
-rwxr-xr-xtests/bugs/glusterfs-server/bug-852147.t85
-rwxr-xr-xtests/bugs/glusterfs-server/bug-861542.t50
-rwxr-xr-xtests/bugs/glusterfs-server/bug-864222.t30
-rw-r--r--tests/bugs/glusterfs-server/bug-873549.t17
-rwxr-xr-xtests/bugs/glusterfs-server/bug-877992.t63
-rwxr-xr-xtests/bugs/glusterfs-server/bug-887145.t99
-rw-r--r--tests/bugs/glusterfs-server/bug-889996.t19
-rwxr-xr-xtests/bugs/glusterfs-server/bug-904300.t65
-rw-r--r--tests/bugs/glusterfs-server/bug-905864.c83
-rw-r--r--tests/bugs/glusterfs-server/bug-905864.t32
-rwxr-xr-xtests/bugs/glusterfs-server/bug-912297.t44
-rw-r--r--tests/bugs/glusterfs/bug-1482528.t100
-rwxr-xr-xtests/bugs/glusterfs/bug-811493.t18
-rwxr-xr-xtests/bugs/glusterfs/bug-844688.t55
-rw-r--r--tests/bugs/glusterfs/bug-848251.t52
-rwxr-xr-xtests/bugs/glusterfs/bug-853690.t91
-rw-r--r--tests/bugs/glusterfs/bug-856455.t43
-rw-r--r--tests/bugs/glusterfs/bug-860297.t13
-rw-r--r--tests/bugs/glusterfs/bug-861015-index.t36
-rw-r--r--tests/bugs/glusterfs/bug-861015-log.t29
-rw-r--r--tests/bugs/glusterfs/bug-866459.t44
-rw-r--r--tests/bugs/glusterfs/bug-867253.t64
-rw-r--r--tests/bugs/glusterfs/bug-869724.t37
-rwxr-xr-xtests/bugs/glusterfs/bug-872923.t59
-rw-r--r--tests/bugs/glusterfs/bug-873962-spb.t40
-rwxr-xr-xtests/bugs/glusterfs/bug-873962.t107
-rwxr-xr-xtests/bugs/glusterfs/bug-879490.t37
-rwxr-xr-xtests/bugs/glusterfs/bug-879494.t37
-rwxr-xr-xtests/bugs/glusterfs/bug-892730.t77
-rw-r--r--tests/bugs/glusterfs/bug-893338.t34
-rwxr-xr-xtests/bugs/glusterfs/bug-893378.t75
-rw-r--r--tests/bugs/glusterfs/bug-895235.t23
-rwxr-xr-xtests/bugs/glusterfs/bug-896431.t89
-rwxr-xr-xtests/bugs/glusterfs/bug-902610.t66
-rw-r--r--tests/bugs/glusterfs/bug-906646.t97
-rw-r--r--tests/bugs/glusterfs/getlk_owner.c124
-rw-r--r--tests/bugs/heal-symlinks.t65
-rw-r--r--tests/bugs/index/bug-1559004-EMLINK-handling.t91
-rw-r--r--tests/bugs/io-cache/bug-858242.c84
-rwxr-xr-xtests/bugs/io-cache/bug-858242.t28
-rw-r--r--tests/bugs/io-cache/bug-read-hang.c125
-rwxr-xr-xtests/bugs/io-cache/bug-read-hang.t34
-rwxr-xr-xtests/bugs/io-stats/bug-1598548.t41
-rwxr-xr-xtests/bugs/logging/bug-823081.t41
-rwxr-xr-xtests/bugs/md-cache/afr-stale-read.t44
-rwxr-xr-xtests/bugs/md-cache/bug-1211863.t69
-rwxr-xr-xtests/bugs/md-cache/bug-1211863_unlink.t49
-rw-r--r--tests/bugs/md-cache/bug-1476324.t27
-rwxr-xr-xtests/bugs/md-cache/bug-1632503.t24
-rw-r--r--tests/bugs/md-cache/bug-1726205.t22
-rwxr-xr-xtests/bugs/md-cache/setxattr-prepoststat.t38
-rwxr-xr-xtests/bugs/nfs/bug-1053579.t114
-rw-r--r--tests/bugs/nfs/bug-1143880-fix-gNFSd-auth-crash.t24
-rw-r--r--tests/bugs/nfs/bug-1157223-symlink-mounting.t126
-rw-r--r--tests/bugs/nfs/bug-1161092-nfs-acls.t39
-rwxr-xr-xtests/bugs/nfs/bug-1166862.t69
-rw-r--r--tests/bugs/nfs/bug-1210338.c31
-rw-r--r--tests/bugs/nfs/bug-1210338.t30
-rwxr-xr-xtests/bugs/nfs/bug-1302948.t13
-rwxr-xr-xtests/bugs/nfs/bug-847622.t39
-rwxr-xr-xtests/bugs/nfs/bug-877885.t39
-rwxr-xr-xtests/bugs/nfs/bug-904065.t100
-rwxr-xr-xtests/bugs/nfs/bug-915280.t54
-rwxr-xr-xtests/bugs/nfs/bug-970070.t13
-rwxr-xr-xtests/bugs/nfs/bug-974972.t41
-rw-r--r--tests/bugs/nfs/showmount-many-clients.t43
-rwxr-xr-xtests/bugs/nfs/socket-as-fifo.py33
-rw-r--r--tests/bugs/nfs/socket-as-fifo.t25
-rw-r--r--tests/bugs/nfs/subdir-trailing-slash.t32
-rwxr-xr-xtests/bugs/nfs/zero-atime.t33
-rwxr-xr-xtests/bugs/nl-cache/bug-1451588.t25
-rw-r--r--tests/bugs/posix/bug-1034716.t60
-rwxr-xr-xtests/bugs/posix/bug-1040275-brick-uid-reset-on-volume-restart.t60
-rwxr-xr-xtests/bugs/posix/bug-1113960.t98
-rwxr-xr-xtests/bugs/posix/bug-1122028.t51
-rw-r--r--tests/bugs/posix/bug-1175711.c37
-rwxr-xr-xtests/bugs/posix/bug-1175711.t30
-rw-r--r--tests/bugs/posix/bug-1360679.t36
-rwxr-xr-xtests/bugs/posix/bug-1619720.t58
-rw-r--r--tests/bugs/posix/bug-1651445.t54
-rw-r--r--tests/bugs/posix/bug-765380.t39
-rwxr-xr-xtests/bugs/posix/bug-990028.t157
-rw-r--r--tests/bugs/posix/bug-gfid-path.t70
-rw-r--r--tests/bugs/posix/disallow-gfid-volumeid-fremovexattr.c104
-rwxr-xr-xtests/bugs/posix/disallow-gfid-volumeid-fremovexattr.t21
-rw-r--r--tests/bugs/posix/disallow-gfid-volumeid-removexattr.t26
-rw-r--r--tests/bugs/protocol/bug-1321578.t82
-rw-r--r--tests/bugs/protocol/bug-1390914.t36
-rw-r--r--tests/bugs/protocol/bug-1433815-auth-allow.t40
-rwxr-xr-xtests/bugs/protocol/bug-762989.t40
-rwxr-xr-xtests/bugs/protocol/bug-808400-dist.t32
-rw-r--r--tests/bugs/protocol/bug-808400-fcntl.c124
-rw-r--r--tests/bugs/protocol/bug-808400-flock.c100
-rwxr-xr-xtests/bugs/protocol/bug-808400-repl.t31
-rwxr-xr-xtests/bugs/protocol/bug-808400.t35
-rwxr-xr-xtests/bugs/quick-read/bug-846240.t59
-rwxr-xr-xtests/bugs/quick-read/bz1523599/bz1523599.t32
-rw-r--r--tests/bugs/quick-read/bz1523599/test_bz1523599.c198
-rw-r--r--tests/bugs/quota/afr-quota-xattr-mdata-heal.t140
-rw-r--r--tests/bugs/quota/bug-1035576.t53
-rw-r--r--tests/bugs/quota/bug-1038598.t52
-rw-r--r--tests/bugs/quota/bug-1087198.t86
-rwxr-xr-xtests/bugs/quota/bug-1104692.t26
-rw-r--r--tests/bugs/quota/bug-1153964.t81
-rw-r--r--tests/bugs/quota/bug-1178130.t44
-rw-r--r--tests/bugs/quota/bug-1235182.t61
-rw-r--r--tests/bugs/quota/bug-1243798.t46
-rw-r--r--tests/bugs/quota/bug-1250582-volume-reset-should-not-remove-quota-quota-deem-statfs.t53
-rw-r--r--tests/bugs/quota/bug-1260545.t53
-rw-r--r--tests/bugs/quota/bug-1287996.t21
-rw-r--r--tests/bugs/quota/bug-1292020.t28
-rw-r--r--tests/bugs/quota/bug-1293601.t33
-rw-r--r--tests/bugs/read-only/bug-1134822-read-only-default-in-graph.t67
-rw-r--r--tests/bugs/readdir-ahead/bug-1390050.c72
-rw-r--r--tests/bugs/readdir-ahead/bug-1390050.t29
-rwxr-xr-xtests/bugs/readdir-ahead/bug-1436090.t44
-rwxr-xr-xtests/bugs/readdir-ahead/bug-1439640.t31
-rwxr-xr-xtests/bugs/readdir-ahead/bug-1446516.t21
-rwxr-xr-xtests/bugs/readdir-ahead/bug-1512437.t23
-rw-r--r--tests/bugs/readdir-ahead/bug-1670253-consistent-metadata.t23
-rw-r--r--tests/bugs/replicate/886998/strict-readdir.t52
-rwxr-xr-xtests/bugs/replicate/bug-1015990-rep.t61
-rwxr-xr-xtests/bugs/replicate/bug-1015990.t69
-rw-r--r--tests/bugs/replicate/bug-1032927.t32
-rwxr-xr-xtests/bugs/replicate/bug-1037501.t104
-rwxr-xr-xtests/bugs/replicate/bug-1046624.t47
-rw-r--r--tests/bugs/replicate/bug-1058797.t48
-rw-r--r--tests/bugs/replicate/bug-1101647.t31
-rw-r--r--tests/bugs/replicate/bug-1130892.t76
-rw-r--r--tests/bugs/replicate/bug-1132102.t28
-rw-r--r--tests/bugs/replicate/bug-1134691-afr-lookup-metadata-heal.t56
-rw-r--r--tests/bugs/replicate/bug-1139230.t58
-rw-r--r--tests/bugs/replicate/bug-1180545.t77
-rw-r--r--tests/bugs/replicate/bug-1190069-afr-stale-index-entries.t57
-rw-r--r--tests/bugs/replicate/bug-1221481-allow-fops-on-dir-split-brain.t42
-rw-r--r--tests/bugs/replicate/bug-1238398-split-brain-resolution.t51
-rw-r--r--tests/bugs/replicate/bug-1238508-self-heal.t51
-rw-r--r--tests/bugs/replicate/bug-1250170-fsync.c56
-rw-r--r--tests/bugs/replicate/bug-1250170-fsync.t35
-rw-r--r--tests/bugs/replicate/bug-1266876-allow-reset-brick-for-same-path.t54
-rw-r--r--tests/bugs/replicate/bug-1292379.t58
-rw-r--r--tests/bugs/replicate/bug-1297695.t43
-rw-r--r--tests/bugs/replicate/bug-1305031-block-reads-on-metadata-sbrain.t40
-rw-r--r--tests/bugs/replicate/bug-1325792.t25
-rw-r--r--tests/bugs/replicate/bug-1335652.t29
-rw-r--r--tests/bugs/replicate/bug-1340623-mkdir-fails-remove-brick-started.t46
-rw-r--r--tests/bugs/replicate/bug-1341650.t63
-rw-r--r--tests/bugs/replicate/bug-1363721.t118
-rw-r--r--tests/bugs/replicate/bug-1365455.t54
-rw-r--r--tests/bugs/replicate/bug-1386188-sbrain-fav-child.t82
-rw-r--r--tests/bugs/replicate/bug-1402730.t47
-rw-r--r--tests/bugs/replicate/bug-1408712.t101
-rw-r--r--tests/bugs/replicate/bug-1417522-block-split-brain-resolution.t69
-rw-r--r--tests/bugs/replicate/bug-1433571-undo-pending-only-on-up-bricks.t79
-rw-r--r--tests/bugs/replicate/bug-1438255-do-not-mark-self-accusing-xattrs.t46
-rw-r--r--tests/bugs/replicate/bug-1448804-check-quorum-type-values.t47
-rw-r--r--tests/bugs/replicate/bug-1473026.t31
-rw-r--r--tests/bugs/replicate/bug-1477169-entry-selfheal-rename.t52
-rw-r--r--tests/bugs/replicate/bug-1480525.t18
-rw-r--r--tests/bugs/replicate/bug-1493415-gfid-heal.t78
-rw-r--r--tests/bugs/replicate/bug-1498570-client-iot-graph-check.t48
-rwxr-xr-xtests/bugs/replicate/bug-1539358-split-brain-detection.t89
-rw-r--r--tests/bugs/replicate/bug-1561129-enospc.t24
-rw-r--r--tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t72
-rw-r--r--tests/bugs/replicate/bug-1591193-assign-gfid-and-heal.t128
-rw-r--r--tests/bugs/replicate/bug-1626994-info-split-brain.t62
-rw-r--r--tests/bugs/replicate/bug-1637249-gfid-heal.t149
-rw-r--r--tests/bugs/replicate/bug-1637802-arbiter-stale-data-heal-lock.t45
-rw-r--r--tests/bugs/replicate/bug-1655050-dir-sbrain-size-policy.t55
-rwxr-xr-xtests/bugs/replicate/bug-1655052-sbrain-policy-same-size.t55
-rw-r--r--tests/bugs/replicate/bug-1655854-support-dist-to-rep3-arb-conversion.t95
-rw-r--r--tests/bugs/replicate/bug-1657783-do-not-update-read-subvol-on-rename-link.t40
-rw-r--r--tests/bugs/replicate/bug-1686568-send-truncate-on-arbiter-from-shd.t38
-rwxr-xr-xtests/bugs/replicate/bug-1696599-io-hang.t47
-rw-r--r--tests/bugs/replicate/bug-1717819-metadata-split-brain-detection.t136
-rw-r--r--tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t116
-rw-r--r--tests/bugs/replicate/bug-1728770-pass-xattrs.t52
-rw-r--r--tests/bugs/replicate/bug-1734370-entry-heal-restore-time.t102
-rw-r--r--tests/bugs/replicate/bug-1744548-heal-timeout.t47
-rw-r--r--tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t89
-rw-r--r--tests/bugs/replicate/bug-1756938-replica-3-sbrain-cli.t111
-rw-r--r--tests/bugs/replicate/bug-1761531-metadata-heal-restore-time.t74
-rw-r--r--tests/bugs/replicate/bug-1801624-entry-heal.t58
-rw-r--r--tests/bugs/replicate/bug-765564.t86
-rwxr-xr-xtests/bugs/replicate/bug-767585-gfid.t42
-rwxr-xr-xtests/bugs/replicate/bug-802417.t120
-rw-r--r--tests/bugs/replicate/bug-821056.t52
-rwxr-xr-xtests/bugs/replicate/bug-830665.t127
-rwxr-xr-xtests/bugs/replicate/bug-859581.t53
-rwxr-xr-xtests/bugs/replicate/bug-865825.t82
-rw-r--r--tests/bugs/replicate/bug-880898.t30
-rw-r--r--tests/bugs/replicate/bug-884328.t12
-rw-r--r--tests/bugs/replicate/bug-886998.t52
-rw-r--r--tests/bugs/replicate/bug-888174.t62
-rw-r--r--tests/bugs/replicate/bug-913051.t56
-rw-r--r--tests/bugs/replicate/bug-916226.t26
-rw-r--r--tests/bugs/replicate/bug-918437-sh-mtime.t71
-rwxr-xr-xtests/bugs/replicate/bug-921231.t31
-rw-r--r--tests/bugs/replicate/bug-957877.t33
-rw-r--r--tests/bugs/replicate/bug-976800.t29
-rwxr-xr-xtests/bugs/replicate/bug-977797.t96
-rw-r--r--tests/bugs/replicate/bug-978794.t29
-rwxr-xr-xtests/bugs/replicate/bug-979365.t47
-rwxr-xr-xtests/bugs/replicate/bug-986905.t27
-rw-r--r--tests/bugs/replicate/issue-1254-prioritize-enospc.t80
-rw-r--r--tests/bugs/replicate/mdata-heal-no-xattrs.t59
-rw-r--r--tests/bugs/replicate/ta-inode-refresh-read.t40
-rwxr-xr-xtests/bugs/rpc/bug-1043886.t58
-rwxr-xr-xtests/bugs/rpc/bug-847624.t29
-rw-r--r--tests/bugs/rpc/bug-884452.t47
-rwxr-xr-xtests/bugs/rpc/bug-921072.t132
-rwxr-xr-xtests/bugs/rpc/bug-954057.t63
-rw-r--r--tests/bugs/shard/bug-1245547.t35
-rw-r--r--tests/bugs/shard/bug-1248887.t39
-rw-r--r--tests/bugs/shard/bug-1250855.t34
-rw-r--r--tests/bugs/shard/bug-1251824.t109
-rw-r--r--tests/bugs/shard/bug-1256580.t34
-rw-r--r--tests/bugs/shard/bug-1258334.t40
-rw-r--r--tests/bugs/shard/bug-1259651.t40
-rw-r--r--tests/bugs/shard/bug-1260637.t42
-rw-r--r--tests/bugs/shard/bug-1261773.t14
-rw-r--r--tests/bugs/shard/bug-1272986.t35
-rw-r--r--tests/bugs/shard/bug-1342298.t23
-rw-r--r--tests/bugs/shard/bug-1468483.t58
-rw-r--r--tests/bugs/shard/bug-1488546.t25
-rw-r--r--tests/bugs/shard/bug-1568521-EEXIST.t91
-rw-r--r--tests/bugs/shard/bug-1568521.t53
-rw-r--r--tests/bugs/shard/bug-1605056-2.t34
-rw-r--r--tests/bugs/shard/bug-1605056.t63
-rw-r--r--tests/bugs/shard/bug-1669077.t29
-rw-r--r--tests/bugs/shard/bug-1696136-lru-limit-equals-deletion-rate.t34
-rw-r--r--tests/bugs/shard/bug-1696136.c122
-rw-r--r--tests/bugs/shard/bug-1696136.t33
-rw-r--r--tests/bugs/shard/bug-1705884.t32
-rw-r--r--tests/bugs/shard/bug-1738419.t29
-rw-r--r--tests/bugs/shard/bug-shard-discard.c70
-rw-r--r--tests/bugs/shard/bug-shard-discard.t65
-rw-r--r--tests/bugs/shard/bug-shard-zerofill.c60
-rw-r--r--tests/bugs/shard/bug-shard-zerofill.t46
-rw-r--r--tests/bugs/shard/configure-lru-limit.t52
-rw-r--r--tests/bugs/shard/issue-1243.t43
-rw-r--r--tests/bugs/shard/issue-1281.t34
-rw-r--r--tests/bugs/shard/issue-1425.t45
-rw-r--r--tests/bugs/shard/parallel-truncate-read.t48
-rw-r--r--tests/bugs/shard/shard-append-test.c183
-rw-r--r--tests/bugs/shard/shard-append-test.t32
-rw-r--r--tests/bugs/shard/shard-fallocate.c113
-rw-r--r--tests/bugs/shard/shard-inode-refcount-test.t30
-rw-r--r--tests/bugs/shard/unlinks-and-renames.t333
-rw-r--r--tests/bugs/shard/zero-flag.t76
-rwxr-xr-xtests/bugs/snapshot/bug-1045333.t44
-rwxr-xr-xtests/bugs/snapshot/bug-1049834.t44
-rw-r--r--tests/bugs/snapshot/bug-1064768.t20
-rw-r--r--tests/bugs/snapshot/bug-1087203.t103
-rwxr-xr-xtests/bugs/snapshot/bug-1090042.t26
-rw-r--r--tests/bugs/snapshot/bug-1109770.t65
-rw-r--r--tests/bugs/snapshot/bug-1109889.t74
-rwxr-xr-xtests/bugs/snapshot/bug-1111041.t40
-rw-r--r--tests/bugs/snapshot/bug-1112613.t49
-rw-r--r--tests/bugs/snapshot/bug-1113975.t38
-rw-r--r--tests/bugs/snapshot/bug-1155042-dont-display-deactivated-snapshots.t36
-rwxr-xr-xtests/bugs/snapshot/bug-1157991.t30
-rwxr-xr-xtests/bugs/snapshot/bug-1162462.t38
-rwxr-xr-xtests/bugs/snapshot/bug-1162498.t56
-rwxr-xr-xtests/bugs/snapshot/bug-1166197.t52
-rw-r--r--tests/bugs/snapshot/bug-1167580-set-proper-uid-and-gid-during-nfs-access.t205
-rw-r--r--tests/bugs/snapshot/bug-1168875.t96
-rw-r--r--tests/bugs/snapshot/bug-1178079.t24
-rw-r--r--tests/bugs/snapshot/bug-1202436-calculate-quota-cksum-during-snap-restore.t37
-rw-r--r--tests/bugs/snapshot/bug-1205592.t30
-rw-r--r--tests/bugs/snapshot/bug-1227646.t31
-rwxr-xr-xtests/bugs/snapshot/bug-1232430.t22
-rwxr-xr-xtests/bugs/snapshot/bug-1250387.t26
-rw-r--r--tests/bugs/snapshot/bug-1260848.t28
-rwxr-xr-xtests/bugs/snapshot/bug-1275616.t50
-rw-r--r--tests/bugs/snapshot/bug-1279327.t29
-rw-r--r--tests/bugs/snapshot/bug-1316437.t29
-rw-r--r--tests/bugs/snapshot/bug-1322772-real-path-fix-for-snapshot.t56
-rwxr-xr-xtests/bugs/snapshot/bug-1399598-uss-with-ssl.t108
-rw-r--r--tests/bugs/snapshot/bug-1482023-snpashot-issue-with-other-processes-accessing-mounted-path.t133
-rw-r--r--tests/bugs/snapshot/bug-1512451-snapshot-creation-failed-after-brick-reset.t39
-rw-r--r--tests/bugs/snapshot/bug-1597662.t58
-rw-r--r--tests/bugs/snapshot/bug-1618004-fix-memory-corruption-in-snap-import.t48
-rw-r--r--tests/bugs/stripe/bug-1002207.t55
-rw-r--r--tests/bugs/stripe/bug-1111454.t20
-rwxr-xr-xtests/bugs/trace/bug-797171.t41
-rwxr-xr-xtests/bugs/transport/bug-873367.t45
-rw-r--r--tests/bugs/unclassified/bug-1034085.t31
-rw-r--r--tests/bugs/unclassified/bug-1357397.t35
-rw-r--r--tests/bugs/unclassified/bug-874498.t64
-rw-r--r--tests/bugs/unclassified/bug-991622.t35
-rwxr-xr-xtests/bugs/upcall/bug-1227204.t29
-rwxr-xr-xtests/bugs/upcall/bug-1369430.t43
-rwxr-xr-xtests/bugs/upcall/bug-1394131.t29
-rwxr-xr-xtests/bugs/upcall/bug-1422776.t30
-rwxr-xr-xtests/bugs/upcall/bug-1458127.t36
-rwxr-xr-xtests/bugs/upcall/bug-upcall-stat.t39
-rw-r--r--tests/bugs/write-behind/bug-1058663.c123
-rw-r--r--tests/bugs/write-behind/bug-1058663.t28
-rw-r--r--tests/bugs/write-behind/bug-1279730.c149
-rwxr-xr-xtests/bugs/write-behind/bug-1279730.t37
-rw-r--r--tests/bugs/write-behind/issue-884.c267
-rwxr-xr-xtests/bugs/write-behind/issue-884.t40
-rw-r--r--tests/changelog.rc9
-rw-r--r--tests/cleanup.sh4
-rw-r--r--tests/cluster.rc218
-rw-r--r--tests/common-utils.rc7
-rw-r--r--tests/configfiles/bad_exports9
-rw-r--r--tests/configfiles/bad_netgroups5
-rw-r--r--tests/configfiles/big_exports10
-rw-r--r--tests/configfiles/exports1
-rw-r--r--tests/configfiles/exports-v61
-rw-r--r--tests/configfiles/exports_bad_opt1
-rw-r--r--tests/configfiles/netgroups4
-rw-r--r--tests/dht.rc174
-rw-r--r--tests/ec.rc18
-rw-r--r--tests/env.rc.in42
-rw-r--r--tests/experimental/.gitignore7
-rw-r--r--tests/fallocate.rc19
-rw-r--r--tests/fdl.rc12
-rwxr-xr-xtests/features/delay-gen.t52
-rw-r--r--tests/features/dh1024.pem5
-rw-r--r--tests/features/fdl-overflow.t72
-rw-r--r--tests/features/fdl.t44
-rw-r--r--tests/features/flock_interrupt.t32
-rw-r--r--tests/features/fuse-lru-limit.t43
-rw-r--r--tests/features/glfs-lease-recall.c372
-rw-r--r--tests/features/glfs-lease.c717
-rwxr-xr-xtests/features/glfs-lease.t31
-rw-r--r--tests/features/interrupt.t71
-rwxr-xr-xtests/features/ipc.t38
-rwxr-xr-xtests/features/ipctest.py28
-rw-r--r--tests/features/lock_revocation.t54
-rw-r--r--tests/features/mandatory-lock-forced.c143
-rw-r--r--tests/features/mandatory-lock-forced.t80
-rwxr-xr-xtests/features/nuke.t41
-rw-r--r--tests/features/open_and_sleep.c27
-rw-r--r--tests/features/openssl.cnf.in41
-rwxr-xr-xtests/features/readdir-ahead.t45
-rw-r--r--tests/features/recon.t59
-rwxr-xr-xtests/features/ssl-authz.t121
-rw-r--r--tests/features/ssl-ciphers.t244
-rw-r--r--tests/features/subdir-mount.t121
-rwxr-xr-xtests/features/trash.t247
-rwxr-xr-xtests/features/unhashed-auto.t125
-rwxr-xr-xtests/features/weighted-rebalance.t78
-rwxr-xr-xtests/features/worm.t117
-rw-r--r--tests/features/worm_sh.t75
-rw-r--r--tests/fileio.rc61
-rw-r--r--tests/geo-rep.rc495
-rw-r--r--tests/gfid2path/block-mount-access.t51
-rw-r--r--tests/gfid2path/get-gfid-to-path.t72
-rw-r--r--tests/gfid2path/gfid2path_fuse.t166
-rw-r--r--tests/gfid2path/gfid2path_nfs.t152
-rw-r--r--tests/glusterfind/glusterfind-basic.t84
-rw-r--r--tests/include.rc1350
-rw-r--r--tests/line-coverage/afr-heal-info.t43
-rwxr-xr-xtests/line-coverage/arbiter-coverage.t32
-rw-r--r--tests/line-coverage/cli-peer-and-volume-operations.t135
-rw-r--r--tests/line-coverage/cli-volume-top-profile-coverage.t62
-rwxr-xr-xtests/line-coverage/errorgen-coverage.t42
-rw-r--r--tests/line-coverage/log-and-brick-ops-negative-case.t82
-rwxr-xr-xtests/line-coverage/meta-max-coverage.t33
-rw-r--r--tests/line-coverage/namespace-linecoverage.t39
-rwxr-xr-xtests/line-coverage/old-protocol.t37
-rwxr-xr-xtests/line-coverage/quiesce-coverage.t44
-rw-r--r--tests/line-coverage/shard-coverage.t33
-rw-r--r--tests/line-coverage/some-features-in-libglusterfs.t67
-rw-r--r--tests/line-coverage/volfile-with-all-graph-syntax.t73
-rw-r--r--tests/nfs.rc70
-rwxr-xr-xtests/performance/open-behind.t75
-rw-r--r--tests/performance/quick-read.t55
-rw-r--r--tests/snapshot.rc484
-rw-r--r--tests/ssl.rc35
-rw-r--r--tests/thin-arbiter.rc613
-rw-r--r--tests/traps.rc22
-rw-r--r--tests/utils/arequal-checksum.c633
-rw-r--r--tests/utils/changelog/changelog.h125
-rw-r--r--tests/utils/changelog/get-history.c71
-rw-r--r--tests/utils/changelog/test-changelog-api.c98
-rw-r--r--tests/utils/changelog/test-history-api.c111
-rw-r--r--tests/utils/changelogparser.py236
-rwxr-xr-xtests/utils/create-files.py593
-rw-r--r--tests/utils/get-mdata-xattr.c152
-rwxr-xr-xtests/utils/getfattr.py133
-rwxr-xr-xtests/utils/gfid-access.py124
-rw-r--r--tests/utils/libcxattr.py108
-rwxr-xr-xtests/utils/pidof.py45
-rw-r--r--tests/utils/py2py3.py186
-rwxr-xr-xtests/utils/setfattr.py77
-rwxr-xr-xtests/utils/testn.sh16
-rw-r--r--tests/vagrant/vagrant-template-centos6/Vagrantfile55
-rw-r--r--tests/vagrant/vagrant-template-centos6/roles/daemon-services/tasks/main.yml3
-rw-r--r--tests/vagrant/vagrant-template-centos6/roles/fix-localhost/tasks/main.yml6
-rw-r--r--tests/vagrant/vagrant-template-centos6/roles/install-pkgs/tasks/main.yml92
-rw-r--r--tests/vagrant/vagrant-template-centos6/roles/iptables/tasks/main.yml3
-rw-r--r--tests/vagrant/vagrant-template-centos6/roles/mock-user/tasks/main.yml3
-rw-r--r--tests/vagrant/vagrant-template-centos6/roles/prepare-brick/tasks/main.yml6
-rw-r--r--tests/vagrant/vagrant-template-centos6/roles/remove-gluster-pkgs/tasks/main.yml4
-rw-r--r--tests/vagrant/vagrant-template-centos6/setup.yml15
-rw-r--r--tests/vagrant/vagrant-template-fedora/Vagrantfile56
-rw-r--r--tests/vagrant/vagrant-template-fedora/roles/daemon-services/tasks/main.yml3
-rw-r--r--tests/vagrant/vagrant-template-fedora/roles/fix-localhost/tasks/main.yml6
-rw-r--r--tests/vagrant/vagrant-template-fedora/roles/install-pkgs/tasks/main.yml85
-rw-r--r--tests/vagrant/vagrant-template-fedora/roles/iptables/tasks/main.yml3
-rw-r--r--tests/vagrant/vagrant-template-fedora/roles/mock-user/tasks/main.yml3
-rw-r--r--tests/vagrant/vagrant-template-fedora/roles/prepare-brick/tasks/main.yml6
-rw-r--r--tests/vagrant/vagrant-template-fedora/roles/remove-gluster-pkgs/tasks/main.yml4
-rw-r--r--tests/vagrant/vagrant-template-fedora/roles/selinux/tasks/main.yml3
-rw-r--r--tests/vagrant/vagrant-template-fedora/setup.yml16
-rw-r--r--tests/volume.rc1002
-rw-r--r--tools/Makefile.am3
-rw-r--r--tools/gfind_missing_files/Makefile.am32
-rw-r--r--tools/gfind_missing_files/gcrawler.c581
-rw-r--r--tools/gfind_missing_files/gfid_to_path.py162
-rw-r--r--tools/gfind_missing_files/gfid_to_path.sh43
-rw-r--r--tools/gfind_missing_files/gfind_missing_files.sh119
-rw-r--r--tools/glusterfind/Makefile.am24
-rwxr-xr-xtools/glusterfind/S57glusterfind-delete-post.py69
-rw-r--r--tools/glusterfind/glusterfind.in18
-rw-r--r--tools/glusterfind/src/Makefile.am16
-rw-r--r--tools/glusterfind/src/__init__.py9
-rw-r--r--tools/glusterfind/src/brickfind.py118
-rw-r--r--tools/glusterfind/src/changelog.py469
-rw-r--r--tools/glusterfind/src/changelogdata.py440
-rw-r--r--tools/glusterfind/src/conf.py31
-rw-r--r--tools/glusterfind/src/gfind_py2py3.py88
-rw-r--r--tools/glusterfind/src/libgfchangelog.py92
-rw-r--r--tools/glusterfind/src/main.py921
-rw-r--r--tools/glusterfind/src/nodeagent.py139
-rw-r--r--tools/glusterfind/src/tool.conf.in10
-rw-r--r--tools/glusterfind/src/utils.py267
-rw-r--r--tools/setgfid2path/Makefile.am5
-rw-r--r--tools/setgfid2path/gluster-setgfid2path.854
-rw-r--r--tools/setgfid2path/src/Makefile.am16
-rw-r--r--tools/setgfid2path/src/main.c130
-rw-r--r--xlators/Makefile.am12
-rw-r--r--xlators/bindings/Makefile.am1
-rw-r--r--xlators/bindings/python/src/Makefile.am19
-rw-r--r--xlators/bindings/python/src/gluster.py47
-rw-r--r--xlators/bindings/python/src/glusterstack.py55
-rw-r--r--xlators/bindings/python/src/glustertypes.py167
-rw-r--r--xlators/bindings/python/src/python.c232
-rw-r--r--xlators/bindings/python/src/testxlator.py56
-rw-r--r--xlators/cluster/Makefile.am2
-rw-r--r--xlators/cluster/afr/src/Makefile.am34
-rw-r--r--xlators/cluster/afr/src/afr-common.c10102
-rw-r--r--xlators/cluster/afr/src/afr-dir-read.c904
-rw-r--r--xlators/cluster/afr/src/afr-dir-read.h21
-rw-r--r--xlators/cluster/afr/src/afr-dir-write.c2581
-rw-r--r--xlators/cluster/afr/src/afr-dir-write.h33
-rw-r--r--xlators/cluster/afr/src/afr-inode-read.c2660
-rw-r--r--xlators/cluster/afr/src/afr-inode-read.h33
-rw-r--r--xlators/cluster/afr/src/afr-inode-write.c3659
-rw-r--r--xlators/cluster/afr/src/afr-inode-write.h81
-rw-r--r--xlators/cluster/afr/src/afr-lk-common.c2718
-rw-r--r--xlators/cluster/afr/src/afr-mem-types.h51
-rw-r--r--xlators/cluster/afr/src/afr-messages.h167
-rw-r--r--xlators/cluster/afr/src/afr-open.c666
-rw-r--r--xlators/cluster/afr/src/afr-read-txn.c494
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-algorithm.c810
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-algorithm.h32
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c4570
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.h128
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-data.c2025
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-entry.c3394
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-metadata.c1007
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-name.c616
-rw-r--r--xlators/cluster/afr/src/afr-self-heal.h370
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.c2463
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.h86
-rw-r--r--xlators/cluster/afr/src/afr-transaction.c3662
-rw-r--r--xlators/cluster/afr/src/afr-transaction.h61
-rw-r--r--xlators/cluster/afr/src/afr.c1701
-rw-r--r--xlators/cluster/afr/src/afr.h2047
-rw-r--r--xlators/cluster/afr/src/pump.c2636
-rw-r--r--xlators/cluster/afr/src/pump.h78
-rw-r--r--xlators/cluster/dht/src/Makefile.am33
-rw-r--r--xlators/cluster/dht/src/dht-common.c13983
-rw-r--r--xlators/cluster/dht/src/dht-common.h1842
-rw-r--r--xlators/cluster/dht/src/dht-diskusage.c700
-rw-r--r--xlators/cluster/dht/src/dht-hashfn.c126
-rw-r--r--xlators/cluster/dht/src/dht-helper.c2705
-rw-r--r--xlators/cluster/dht/src/dht-inode-read.c2143
-rw-r--r--xlators/cluster/dht/src/dht-inode-write.c1674
-rw-r--r--xlators/cluster/dht/src/dht-layout.c1192
-rw-r--r--xlators/cluster/dht/src/dht-linkfile.c406
-rw-r--r--xlators/cluster/dht/src/dht-lock.c1392
-rw-r--r--xlators/cluster/dht/src/dht-lock.h91
-rw-r--r--xlators/cluster/dht/src/dht-mem-types.h37
-rw-r--r--xlators/cluster/dht/src/dht-messages.h386
-rw-r--r--xlators/cluster/dht/src/dht-rebalance.c5570
-rw-r--r--xlators/cluster/dht/src/dht-rename.c2441
-rw-r--r--xlators/cluster/dht/src/dht-selfheal.c3050
-rw-r--r--xlators/cluster/dht/src/dht-shared.c1104
-rw-r--r--xlators/cluster/dht/src/dht.c688
-rw-r--r--xlators/cluster/dht/src/nufa.c1143
-rw-r--r--xlators/cluster/dht/src/switch.c1602
-rw-r--r--xlators/cluster/dht/src/unittest/dht_layout_mock.c73
-rw-r--r--xlators/cluster/dht/src/unittest/dht_layout_unittest.c127
-rw-r--r--xlators/cluster/ec/Makefile.am (renamed from libglusterfsclient/Makefile.am)0
-rw-r--r--xlators/cluster/ec/src/Makefile.am83
-rw-r--r--xlators/cluster/ec/src/ec-code-avx.c109
-rw-r--r--xlators/cluster/ec/src/ec-code-avx.h18
-rw-r--r--xlators/cluster/ec/src/ec-code-c.c11679
-rw-r--r--xlators/cluster/ec/src/ec-code-c.h27
-rw-r--r--xlators/cluster/ec/src/ec-code-intel.c594
-rw-r--r--xlators/cluster/ec/src/ec-code-intel.h191
-rw-r--r--xlators/cluster/ec/src/ec-code-sse.c101
-rw-r--r--xlators/cluster/ec/src/ec-code-sse.h18
-rw-r--r--xlators/cluster/ec/src/ec-code-x64.c144
-rw-r--r--xlators/cluster/ec/src/ec-code-x64.h18
-rw-r--r--xlators/cluster/ec/src/ec-code.c1060
-rw-r--r--xlators/cluster/ec/src/ec-code.h44
-rw-r--r--xlators/cluster/ec/src/ec-combine.c995
-rw-r--r--xlators/cluster/ec/src/ec-combine.h44
-rw-r--r--xlators/cluster/ec/src/ec-common.c3042
-rw-r--r--xlators/cluster/ec/src/ec-common.h234
-rw-r--r--xlators/cluster/ec/src/ec-data.c288
-rw-r--r--xlators/cluster/ec/src/ec-data.h35
-rw-r--r--xlators/cluster/ec/src/ec-dir-read.c647
-rw-r--r--xlators/cluster/ec/src/ec-dir-write.c1487
-rw-r--r--xlators/cluster/ec/src/ec-fops.h254
-rw-r--r--xlators/cluster/ec/src/ec-galois.c183
-rw-r--r--xlators/cluster/ec/src/ec-galois.h32
-rw-r--r--xlators/cluster/ec/src/ec-generic.c1591
-rw-r--r--xlators/cluster/ec/src/ec-gf8.c5882
-rw-r--r--xlators/cluster/ec/src/ec-gf8.h18
-rw-r--r--xlators/cluster/ec/src/ec-heal.c3367
-rw-r--r--xlators/cluster/ec/src/ec-heald.c681
-rw-r--r--xlators/cluster/ec/src/ec-heald.h30
-rw-r--r--xlators/cluster/ec/src/ec-helpers.c867
-rw-r--r--xlators/cluster/ec/src/ec-helpers.h200
-rw-r--r--xlators/cluster/ec/src/ec-inode-read.c2046
-rw-r--r--xlators/cluster/ec/src/ec-inode-write.c2369
-rw-r--r--xlators/cluster/ec/src/ec-locks.c1128
-rw-r--r--xlators/cluster/ec/src/ec-mem-types.h30
-rw-r--r--xlators/cluster/ec/src/ec-messages.h61
-rw-r--r--xlators/cluster/ec/src/ec-method.c433
-rw-r--r--xlators/cluster/ec/src/ec-method.h48
-rw-r--r--xlators/cluster/ec/src/ec-types.h690
-rw-r--r--xlators/cluster/ec/src/ec.c1873
-rw-r--r--xlators/cluster/ec/src/ec.h34
-rw-r--r--xlators/cluster/ha/Makefile.am3
-rw-r--r--xlators/cluster/ha/src/Makefile.am15
-rw-r--r--xlators/cluster/ha/src/ha-helpers.c204
-rw-r--r--xlators/cluster/ha/src/ha-mem-types.h37
-rw-r--r--xlators/cluster/ha/src/ha.c4023
-rw-r--r--xlators/cluster/ha/src/ha.h63
-rw-r--r--xlators/cluster/map/Makefile.am3
-rw-r--r--xlators/cluster/map/src/Makefile.am15
-rw-r--r--xlators/cluster/map/src/map-helper.c358
-rw-r--r--xlators/cluster/map/src/map-mem-types.h35
-rw-r--r--xlators/cluster/map/src/map.c2577
-rw-r--r--xlators/cluster/map/src/map.h77
-rw-r--r--xlators/cluster/stripe/Makefile.am3
-rw-r--r--xlators/cluster/stripe/src/Makefile.am19
-rw-r--r--xlators/cluster/stripe/src/stripe-helpers.c583
-rw-r--r--xlators/cluster/stripe/src/stripe-mem-types.h30
-rw-r--r--xlators/cluster/stripe/src/stripe.c4974
-rw-r--r--xlators/cluster/stripe/src/stripe.h285
-rw-r--r--xlators/cluster/unify/Makefile.am3
-rw-r--r--xlators/cluster/unify/src/Makefile.am16
-rw-r--r--xlators/cluster/unify/src/unify-mem-types.h41
-rw-r--r--xlators/cluster/unify/src/unify-self-heal.c1239
-rw-r--r--xlators/cluster/unify/src/unify.c4589
-rw-r--r--xlators/cluster/unify/src/unify.h146
-rw-r--r--xlators/debug/Makefile.am2
-rw-r--r--xlators/debug/delay-gen/Makefile.am (renamed from scheduler/random/Makefile.am)2
-rw-r--r--xlators/debug/delay-gen/src/Makefile.am11
-rw-r--r--xlators/debug/delay-gen/src/delay-gen-mem-types.h21
-rw-r--r--xlators/debug/delay-gen/src/delay-gen-messages.h26
-rw-r--r--xlators/debug/delay-gen/src/delay-gen.c697
-rw-r--r--xlators/debug/delay-gen/src/delay-gen.h27
-rw-r--r--xlators/debug/error-gen/src/Makefile.am8
-rw-r--r--xlators/debug/error-gen/src/error-gen-mem-types.h26
-rw-r--r--xlators/debug/error-gen/src/error-gen.c2973
-rw-r--r--xlators/debug/error-gen/src/error-gen.h61
-rw-r--r--xlators/debug/io-stats/src/Makefile.am10
-rw-r--r--xlators/debug/io-stats/src/io-stats-mem-types.h38
-rw-r--r--xlators/debug/io-stats/src/io-stats.c5876
-rw-r--r--xlators/debug/sink/Makefile.am (renamed from scheduler/alu/Makefile.am)1
-rw-r--r--xlators/debug/sink/src/Makefile.am14
-rw-r--r--xlators/debug/sink/src/sink.c94
-rw-r--r--xlators/debug/trace/src/Makefile.am9
-rw-r--r--xlators/debug/trace/src/trace-mem-types.h20
-rw-r--r--xlators/debug/trace/src/trace.c4929
-rw-r--r--xlators/debug/trace/src/trace.h55
-rw-r--r--xlators/encryption/Makefile.am3
-rw-r--r--xlators/encryption/rot-13/Makefile.am3
-rw-r--r--xlators/encryption/rot-13/src/rot-13.c210
-rw-r--r--xlators/encryption/rot-13/src/rot-13.h33
-rw-r--r--xlators/features/Makefile.am13
-rw-r--r--xlators/features/arbiter/Makefile.am3
-rw-r--r--xlators/features/arbiter/src/Makefile.am19
-rw-r--r--xlators/features/arbiter/src/arbiter-mem-types.h18
-rw-r--r--xlators/features/arbiter/src/arbiter.c380
-rw-r--r--xlators/features/arbiter/src/arbiter.h21
-rw-r--r--xlators/features/barrier/Makefile.am3
-rw-r--r--xlators/features/barrier/src/Makefile.am17
-rw-r--r--xlators/features/barrier/src/barrier-mem-types.h20
-rw-r--r--xlators/features/barrier/src/barrier.c809
-rw-r--r--xlators/features/barrier/src/barrier.h89
-rw-r--r--xlators/features/bit-rot/Makefile.am (renamed from rpc/rpc-transport/rdma/Makefile.am)0
-rw-r--r--xlators/features/bit-rot/src/Makefile.am1
-rw-r--r--xlators/features/bit-rot/src/bitd/Makefile.am23
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-bitd-messages.h101
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c78
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h50
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-scrub.c2070
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-scrub.h46
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-ssm.c124
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-ssm.h38
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot.c2232
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot.h302
-rw-r--r--xlators/features/bit-rot/src/stub/Makefile.am20
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-common.h178
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-object-version.h30
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-stub-helpers.c796
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h36
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h117
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-stub.c3590
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-stub.h515
-rw-r--r--xlators/features/changelog/Makefile.am3
-rw-r--r--xlators/features/changelog/lib/Makefile.am3
-rw-r--r--xlators/features/changelog/lib/examples/c/get-changes-multi.c90
-rw-r--r--xlators/features/changelog/lib/examples/c/get-changes.c93
-rw-r--r--xlators/features/changelog/lib/examples/c/get-history.c116
-rwxr-xr-xxlators/features/changelog/lib/examples/python/changes.py34
-rw-r--r--xlators/features/changelog/lib/examples/python/libgfchangelog.py71
-rw-r--r--xlators/features/changelog/lib/src/Makefile.am35
-rw-r--r--xlators/features/changelog/lib/src/changelog-lib-messages.h74
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-api.c224
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-helpers.c170
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-helpers.h255
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-journal-handler.c1029
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-journal.h116
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-reborp.c413
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-rpc.c98
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-rpc.h28
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog.c652
-rw-r--r--xlators/features/changelog/lib/src/gf-history-changelog.c1020
-rw-r--r--xlators/features/changelog/src/Makefile.am29
-rw-r--r--xlators/features/changelog/src/changelog-barrier.c131
-rw-r--r--xlators/features/changelog/src/changelog-encoders.c232
-rw-r--r--xlators/features/changelog/src/changelog-encoders.h50
-rw-r--r--xlators/features/changelog/src/changelog-ev-handle.c412
-rw-r--r--xlators/features/changelog/src/changelog-ev-handle.h136
-rw-r--r--xlators/features/changelog/src/changelog-helpers.c1977
-rw-r--r--xlators/features/changelog/src/changelog-helpers.h716
-rw-r--r--xlators/features/changelog/src/changelog-mem-types.h34
-rw-r--r--xlators/features/changelog/src/changelog-messages.h172
-rw-r--r--xlators/features/changelog/src/changelog-misc.h131
-rw-r--r--xlators/features/changelog/src/changelog-rpc-common.c359
-rw-r--r--xlators/features/changelog/src/changelog-rpc-common.h85
-rw-r--r--xlators/features/changelog/src/changelog-rpc.c440
-rw-r--r--xlators/features/changelog/src/changelog-rpc.h31
-rw-r--r--xlators/features/changelog/src/changelog-rt.c66
-rw-r--r--xlators/features/changelog/src/changelog-rt.h33
-rw-r--r--xlators/features/changelog/src/changelog.c2989
-rw-r--r--xlators/features/cloudsync/Makefile.am3
-rw-r--r--xlators/features/cloudsync/src/Makefile.am46
-rw-r--r--xlators/features/cloudsync/src/cloudsync-autogen-fops-tmpl.c30
-rw-r--r--xlators/features/cloudsync/src/cloudsync-autogen-fops-tmpl.h24
-rw-r--r--xlators/features/cloudsync/src/cloudsync-common.c60
-rw-r--r--xlators/features/cloudsync/src/cloudsync-common.h134
-rwxr-xr-xxlators/features/cloudsync/src/cloudsync-fops-c.py324
-rwxr-xr-xxlators/features/cloudsync/src/cloudsync-fops-h.py31
-rw-r--r--xlators/features/cloudsync/src/cloudsync-mem-types.h22
-rw-r--r--xlators/features/cloudsync/src/cloudsync-messages.h16
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/Makefile.am3
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/Makefile.am11
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/Makefile.am3
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/Makefile.am12
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3-mem-types.h19
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.c584
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.h50
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.sym1
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/Makefile.am3
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/Makefile.am12
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/archivestore.h203
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/cvlt-messages.h30
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcloudsynccvlt.sym1
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt-mem-types.h19
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.c842
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.h84
-rw-r--r--xlators/features/cloudsync/src/cloudsync.c2076
-rw-r--r--xlators/features/cloudsync/src/cloudsync.h123
-rw-r--r--xlators/features/compress/Makefile.am3
-rw-r--r--xlators/features/compress/src/Makefile.am19
-rw-r--r--xlators/features/compress/src/cdc-helper.c527
-rw-r--r--xlators/features/compress/src/cdc-mem-types.h23
-rw-r--r--xlators/features/compress/src/cdc.c348
-rw-r--r--xlators/features/compress/src/cdc.h99
-rw-r--r--xlators/features/filter/Makefile.am3
-rw-r--r--xlators/features/filter/src/Makefile.am15
-rw-r--r--xlators/features/filter/src/filter-mem-types.h30
-rw-r--r--xlators/features/filter/src/filter.c1744
-rw-r--r--xlators/features/gfid-access/Makefile.am (renamed from xlators/bindings/python/Makefile.am)0
-rw-r--r--xlators/features/gfid-access/src/Makefile.am16
-rw-r--r--xlators/features/gfid-access/src/gfid-access-mem-types.h22
-rw-r--r--xlators/features/gfid-access/src/gfid-access.c1420
-rw-r--r--xlators/features/gfid-access/src/gfid-access.h107
-rw-r--r--xlators/features/index/src/Makefile.am14
-rw-r--r--xlators/features/index/src/index-mem-types.h37
-rw-r--r--xlators/features/index/src/index-messages.h33
-rw-r--r--xlators/features/index/src/index.c3256
-rw-r--r--xlators/features/index/src/index.h110
-rw-r--r--xlators/features/leases/Makefile.am3
-rw-r--r--xlators/features/leases/src/Makefile.am20
-rw-r--r--xlators/features/leases/src/leases-internal.c1412
-rw-r--r--xlators/features/leases/src/leases-mem-types.h27
-rw-r--r--xlators/features/leases/src/leases-messages.h33
-rw-r--r--xlators/features/leases/src/leases.c1168
-rw-r--r--xlators/features/leases/src/leases.h259
-rw-r--r--xlators/features/locks/src/Makefile.am19
-rw-r--r--xlators/features/locks/src/clear.c718
-rw-r--r--xlators/features/locks/src/clear.h93
-rw-r--r--xlators/features/locks/src/common.c1970
-rw-r--r--xlators/features/locks/src/common.h280
-rw-r--r--xlators/features/locks/src/entrylk.c1496
-rw-r--r--xlators/features/locks/src/inodelk.c1487
-rw-r--r--xlators/features/locks/src/locks-mem-types.h43
-rw-r--r--xlators/features/locks/src/locks.h348
-rw-r--r--xlators/features/locks/src/pl-messages.h29
-rw-r--r--xlators/features/locks/src/posix.c6051
-rw-r--r--xlators/features/locks/src/reservelk.c611
-rw-r--r--xlators/features/locks/tests/unit-test.c118
-rw-r--r--xlators/features/mac-compat/Makefile.am3
-rw-r--r--xlators/features/mac-compat/src/Makefile.am13
-rw-r--r--xlators/features/mac-compat/src/mac-compat.c248
-rw-r--r--xlators/features/marker/Makefile.am2
-rw-r--r--xlators/features/marker/src/Makefile.am19
-rw-r--r--xlators/features/marker/src/marker-common.c109
-rw-r--r--xlators/features/marker/src/marker-common.h37
-rw-r--r--xlators/features/marker/src/marker-mem-types.h41
-rw-r--r--xlators/features/marker/src/marker-quota-helper.c611
-rw-r--r--xlators/features/marker/src/marker-quota-helper.h103
-rw-r--r--xlators/features/marker/src/marker-quota.c4040
-rw-r--r--xlators/features/marker/src/marker-quota.h213
-rw-r--r--xlators/features/marker/src/marker.c4496
-rw-r--r--xlators/features/marker/src/marker.h239
-rw-r--r--xlators/features/marker/utils/Makefile.am3
-rw-r--r--xlators/features/marker/utils/src/Makefile.am22
-rw-r--r--xlators/features/marker/utils/src/gsyncd.c342
-rw-r--r--xlators/features/marker/utils/src/procdiggy.c128
-rw-r--r--xlators/features/marker/utils/src/procdiggy.h26
-rw-r--r--xlators/features/marker/utils/syncdaemon/Makefile.am6
-rw-r--r--xlators/features/marker/utils/syncdaemon/__init__.py0
-rw-r--r--xlators/features/marker/utils/syncdaemon/configinterface.py224
-rw-r--r--xlators/features/marker/utils/syncdaemon/gconf.py20
-rw-r--r--xlators/features/marker/utils/syncdaemon/gsyncd.py415
-rw-r--r--xlators/features/marker/utils/syncdaemon/libcxattr.py72
-rw-r--r--xlators/features/marker/utils/syncdaemon/master.py915
-rw-r--r--xlators/features/marker/utils/syncdaemon/monitor.py129
-rw-r--r--xlators/features/marker/utils/syncdaemon/resource.py970
-rw-r--r--xlators/features/marker/utils/syncdaemon/syncdutils.py282
-rw-r--r--xlators/features/metadisp/Makefile.am3
-rw-r--r--xlators/features/metadisp/src/Makefile.am38
-rw-r--r--xlators/features/metadisp/src/backend.c45
-rw-r--r--xlators/features/metadisp/src/fops-tmpl.c10
-rw-r--r--xlators/features/metadisp/src/gen-fops.py160
-rw-r--r--xlators/features/metadisp/src/metadisp-create.c101
-rw-r--r--xlators/features/metadisp/src/metadisp-fops.h51
-rw-r--r--xlators/features/metadisp/src/metadisp-fsync.c54
-rw-r--r--xlators/features/metadisp/src/metadisp-lookup.c90
-rw-r--r--xlators/features/metadisp/src/metadisp-open.c70
-rw-r--r--xlators/features/metadisp/src/metadisp-readdir.c65
-rw-r--r--xlators/features/metadisp/src/metadisp-setattr.c90
-rw-r--r--xlators/features/metadisp/src/metadisp-stat.c124
-rw-r--r--xlators/features/metadisp/src/metadisp-unlink.c160
-rw-r--r--xlators/features/metadisp/src/metadisp.c46
-rw-r--r--xlators/features/metadisp/src/metadisp.h45
-rw-r--r--xlators/features/namespace/Makefile.am3
-rw-r--r--xlators/features/namespace/src/Makefile.am17
-rw-r--r--xlators/features/namespace/src/namespace.c1344
-rw-r--r--xlators/features/namespace/src/namespace.h23
-rw-r--r--xlators/features/path-convertor/Makefile.am3
-rw-r--r--xlators/features/path-convertor/src/Makefile.am14
-rw-r--r--xlators/features/path-convertor/src/path-mem-types.h32
-rw-r--r--xlators/features/path-convertor/src/path.c1239
-rw-r--r--xlators/features/quiesce/src/Makefile.am10
-rw-r--r--xlators/features/quiesce/src/quiesce-mem-types.h27
-rw-r--r--xlators/features/quiesce/src/quiesce-messages.h28
-rw-r--r--xlators/features/quiesce/src/quiesce.c3401
-rw-r--r--xlators/features/quiesce/src/quiesce.h88
-rw-r--r--xlators/features/quota/src/Makefile.am29
-rw-r--r--xlators/features/quota/src/quota-enforcer-client.c503
-rw-r--r--xlators/features/quota/src/quota-mem-types.h45
-rw-r--r--xlators/features/quota/src/quota-messages.h39
-rw-r--r--xlators/features/quota/src/quota.c6961
-rw-r--r--xlators/features/quota/src/quota.h389
-rw-r--r--xlators/features/quota/src/quotad-aggregator.c494
-rw-r--r--xlators/features/quota/src/quotad-aggregator.h38
-rw-r--r--xlators/features/quota/src/quotad-helpers.c107
-rw-r--r--xlators/features/quota/src/quotad-helpers.h24
-rw-r--r--xlators/features/quota/src/quotad.c245
-rw-r--r--xlators/features/read-only/src/Makefile.am14
-rw-r--r--xlators/features/read-only/src/read-only-common.c435
-rw-r--r--xlators/features/read-only/src/read-only-common.h140
-rw-r--r--xlators/features/read-only/src/read-only-mem-types.h20
-rw-r--r--xlators/features/read-only/src/read-only.c191
-rw-r--r--xlators/features/read-only/src/read-only.h37
-rw-r--r--xlators/features/read-only/src/worm-helper.c395
-rw-r--r--xlators/features/read-only/src/worm-helper.h44
-rw-r--r--xlators/features/read-only/src/worm.c750
-rw-r--r--xlators/features/sdfs/Makefile.am3
-rw-r--r--xlators/features/sdfs/src/Makefile.am19
-rw-r--r--xlators/features/sdfs/src/sdfs-messages.h67
-rw-r--r--xlators/features/sdfs/src/sdfs.c1479
-rw-r--r--xlators/features/sdfs/src/sdfs.h49
-rw-r--r--xlators/features/selinux/Makefile.am3
-rw-r--r--xlators/features/selinux/src/Makefile.am20
-rw-r--r--xlators/features/selinux/src/selinux-mem-types.h19
-rw-r--r--xlators/features/selinux/src/selinux-messages.h30
-rw-r--r--xlators/features/selinux/src/selinux.c323
-rw-r--r--xlators/features/selinux/src/selinux.h24
-rw-r--r--xlators/features/shard/Makefile.am3
-rw-r--r--xlators/features/shard/src/Makefile.am17
-rw-r--r--xlators/features/shard/src/shard-mem-types.h24
-rw-r--r--xlators/features/shard/src/shard-messages.h39
-rw-r--r--xlators/features/shard/src/shard.c7382
-rw-r--r--xlators/features/shard/src/shard.h348
-rw-r--r--xlators/features/snapview-client/Makefile.am1
-rw-r--r--xlators/features/snapview-client/src/Makefile.am16
-rw-r--r--xlators/features/snapview-client/src/snapview-client-mem-types.h24
-rw-r--r--xlators/features/snapview-client/src/snapview-client-messages.h71
-rw-r--r--xlators/features/snapview-client/src/snapview-client.c2791
-rw-r--r--xlators/features/snapview-client/src/snapview-client.h101
-rw-r--r--xlators/features/snapview-server/Makefile.am1
-rw-r--r--xlators/features/snapview-server/src/Makefile.am25
-rw-r--r--xlators/features/snapview-server/src/snapview-server-helpers.c715
-rw-r--r--xlators/features/snapview-server/src/snapview-server-mem-types.h25
-rw-r--r--xlators/features/snapview-server/src/snapview-server-messages.h54
-rw-r--r--xlators/features/snapview-server/src/snapview-server-mgmt.c524
-rw-r--r--xlators/features/snapview-server/src/snapview-server.c2720
-rw-r--r--xlators/features/snapview-server/src/snapview-server.h255
-rw-r--r--xlators/features/thin-arbiter/Makefile.am3
-rw-r--r--xlators/features/thin-arbiter/src/Makefile.am22
-rw-r--r--xlators/features/thin-arbiter/src/thin-arbiter-mem-types.h19
-rw-r--r--xlators/features/thin-arbiter/src/thin-arbiter-messages.h28
-rw-r--r--xlators/features/thin-arbiter/src/thin-arbiter.c661
-rw-r--r--xlators/features/thin-arbiter/src/thin-arbiter.h59
-rw-r--r--xlators/features/trash/src/Makefile.am12
-rw-r--r--xlators/features/trash/src/trash-mem-types.h32
-rw-r--r--xlators/features/trash/src/trash.c3763
-rw-r--r--xlators/features/trash/src/trash.h128
-rw-r--r--xlators/features/upcall/Makefile.am3
-rw-r--r--xlators/features/upcall/src/Makefile.am23
-rw-r--r--xlators/features/upcall/src/upcall-cache-invalidation.h18
-rw-r--r--xlators/features/upcall/src/upcall-internal.c689
-rw-r--r--xlators/features/upcall/src/upcall-mem-types.h23
-rw-r--r--xlators/features/upcall/src/upcall-messages.h29
-rw-r--r--xlators/features/upcall/src/upcall.c2505
-rw-r--r--xlators/features/upcall/src/upcall.h131
-rw-r--r--xlators/features/utime/Makefile.am3
-rw-r--r--xlators/features/utime/src/Makefile.am41
-rw-r--r--xlators/features/utime/src/utime-autogen-fops-tmpl.c28
-rw-r--r--xlators/features/utime/src/utime-autogen-fops-tmpl.h22
-rwxr-xr-xxlators/features/utime/src/utime-gen-fops-c.py147
-rwxr-xr-xxlators/features/utime/src/utime-gen-fops-h.py35
-rw-r--r--xlators/features/utime/src/utime-helpers.c110
-rw-r--r--xlators/features/utime/src/utime-helpers.h25
-rw-r--r--xlators/features/utime/src/utime-mem-types.h21
-rw-r--r--xlators/features/utime/src/utime-messages.h29
-rw-r--r--xlators/features/utime/src/utime.c392
-rw-r--r--xlators/features/utime/src/utime.h23
-rw-r--r--xlators/lib/src/libxlator.c745
-rw-r--r--xlators/lib/src/libxlator.h204
-rw-r--r--xlators/meta/Makefile.am2
-rw-r--r--xlators/meta/src/Makefile.am48
-rw-r--r--xlators/meta/src/active-link.c34
-rw-r--r--xlators/meta/src/cmdline-file.c39
-rw-r--r--xlators/meta/src/frames-file.c107
-rw-r--r--xlators/meta/src/graph-dir.c98
-rw-r--r--xlators/meta/src/graphs-dir.c67
-rw-r--r--xlators/meta/src/history-file.c44
-rw-r--r--xlators/meta/src/logfile-link.c34
-rw-r--r--xlators/meta/src/logging-dir.c44
-rw-r--r--xlators/meta/src/loglevel-file.c50
-rw-r--r--xlators/meta/src/mallinfo-file.c36
-rw-r--r--xlators/meta/src/measure-file.c49
-rw-r--r--xlators/meta/src/meminfo-file.c44
-rw-r--r--xlators/meta/src/meta-defaults.c655
-rw-r--r--xlators/meta/src/meta-helpers.c332
-rw-r--r--xlators/meta/src/meta-hooks.h48
-rw-r--r--xlators/meta/src/meta-mem-types.h36
-rw-r--r--xlators/meta/src/meta.c1355
-rw-r--r--xlators/meta/src/meta.h162
-rw-r--r--xlators/meta/src/misc.c67
-rw-r--r--xlators/meta/src/misc.h31
-rw-r--r--xlators/meta/src/name-file.c44
-rw-r--r--xlators/meta/src/option-file.c45
-rw-r--r--xlators/meta/src/options-dir.c65
-rw-r--r--xlators/meta/src/private-file.c44
-rw-r--r--xlators/meta/src/process_uuid-file.c37
-rw-r--r--xlators/meta/src/profile-file.c44
-rw-r--r--xlators/meta/src/root-dir.c77
-rw-r--r--xlators/meta/src/subvolume-link.c56
-rw-r--r--xlators/meta/src/subvolumes-dir.c62
-rw-r--r--xlators/meta/src/top-link.c40
-rw-r--r--xlators/meta/src/tree.c179
-rw-r--r--xlators/meta/src/tree.h35
-rw-r--r--xlators/meta/src/type-file.c44
-rw-r--r--xlators/meta/src/version-file.c37
-rw-r--r--xlators/meta/src/view-dir.c33
-rw-r--r--xlators/meta/src/view.c258
-rw-r--r--xlators/meta/src/view.h32
-rw-r--r--xlators/meta/src/volfile-file.c79
-rw-r--r--xlators/meta/src/xlator-dir.c97
-rw-r--r--xlators/mgmt/glusterd/src/Makefile.am79
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-bitd-svc.c206
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-bitd-svc.h40
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-bitrot.c822
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-brick-ops.c3812
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-conn-helper.c21
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-conn-helper.h21
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c191
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-conn-mgmt.h53
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-errno.h33
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-ganesha.c927
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-geo-rep.c7852
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-geo-rep.h52
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc-helper.c235
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc-helper.h51
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c478
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.h47
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handler.c8377
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handshake.c2848
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-hooks.c971
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-hooks.h109
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-locks.c870
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-locks.h57
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-log-ops.c480
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mem-types.h115
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-messages.h451
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c1144
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt.c3114
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt.h97
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mountbroker.c1212
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mountbroker.h57
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-nfs-svc.c228
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-nfs-svc.h27
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.c11292
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.h371
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-peer-utils.c1058
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-peer-utils.h82
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-pmap.c886
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-pmap.h83
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c152
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-proc-mgmt.h44
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-quota.c2736
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-quota.h17
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-quotad-svc.c217
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-quotad-svc.h31
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rcu.h36
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rebalance.c1832
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-replace-brick.c2412
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-reset-brick.c376
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rpc-ops.c3864
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-scrub-svc.c207
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-scrub-svc.h45
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-server-quorum.c486
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-server-quorum.h46
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c153
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h42
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-shd-svc.c796
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-shd-svc.h45
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-sm.c2262
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-sm.h283
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapd-svc-helper.c75
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapd-svc-helper.h32
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapd-svc.c478
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapd-svc.h42
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c4290
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapshot-utils.h169
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapshot.c10087
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-statedump.c243
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-statedump.h18
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.c6587
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.h262
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-svc-helper.c1047
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-svc-helper.h72
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c536
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h112
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-syncop.c2285
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-syncop.h131
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-tierd-svc-helper.c207
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c18206
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.h935
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.c8584
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.h371
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-ops.c4453
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-set.c3146
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.c3027
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.h1570
-rw-r--r--xlators/mount/fuse/src/Makefile.am29
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.c9762
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.h776
-rw-r--r--xlators/mount/fuse/src/fuse-helpers.c1022
-rw-r--r--xlators/mount/fuse/src/fuse-mem-types.h45
-rw-r--r--xlators/mount/fuse/src/fuse-resolve.c888
-rw-r--r--xlators/mount/fuse/utils/Makefile.am9
-rwxr-xr-xxlators/mount/fuse/utils/mount.glusterfs.in916
-rwxr-xr-xxlators/mount/fuse/utils/mount_glusterfs.in618
-rw-r--r--xlators/nfs/server/src/Makefile.am35
-rw-r--r--xlators/nfs/server/src/acl3.c933
-rw-r--r--xlators/nfs/server/src/acl3.h42
-rw-r--r--xlators/nfs/server/src/auth-cache.c496
-rw-r--r--xlators/nfs/server/src/auth-cache.h52
-rw-r--r--xlators/nfs/server/src/exports.c1484
-rw-r--r--xlators/nfs/server/src/exports.h93
-rw-r--r--xlators/nfs/server/src/mount3-auth.c642
-rw-r--r--xlators/nfs/server/src/mount3-auth.h59
-rw-r--r--xlators/nfs/server/src/mount3.c5270
-rw-r--r--xlators/nfs/server/src/mount3.h210
-rw-r--r--xlators/nfs/server/src/mount3udp_svc.c329
-rw-r--r--xlators/nfs/server/src/netgroups.c1161
-rw-r--r--xlators/nfs/server/src/netgroups.h53
-rw-r--r--xlators/nfs/server/src/nfs-common.c675
-rw-r--r--xlators/nfs/server/src/nfs-common.h73
-rw-r--r--xlators/nfs/server/src/nfs-fops.c2269
-rw-r--r--xlators/nfs/server/src/nfs-fops.h322
-rw-r--r--xlators/nfs/server/src/nfs-generics.c354
-rw-r--r--xlators/nfs/server/src/nfs-generics.h168
-rw-r--r--xlators/nfs/server/src/nfs-inodes.c790
-rw-r--r--xlators/nfs/server/src/nfs-inodes.h82
-rw-r--r--xlators/nfs/server/src/nfs-mem-types.h81
-rw-r--r--xlators/nfs/server/src/nfs-messages.h102
-rw-r--r--xlators/nfs/server/src/nfs.c2811
-rw-r--r--xlators/nfs/server/src/nfs.h177
-rw-r--r--xlators/nfs/server/src/nfs3-fh.c357
-rw-r--r--xlators/nfs/server/src/nfs3-fh.h135
-rw-r--r--xlators/nfs/server/src/nfs3-helpers.c4999
-rw-r--r--xlators/nfs/server/src/nfs3-helpers.h297
-rw-r--r--xlators/nfs/server/src/nfs3.c9057
-rw-r--r--xlators/nfs/server/src/nfs3.h399
-rw-r--r--xlators/nfs/server/src/nfsserver.sym12
-rw-r--r--xlators/nfs/server/src/nlm4.c4231
-rw-r--r--xlators/nfs/server/src/nlm4.h126
-rw-r--r--xlators/nfs/server/src/nlmcbk_svc.c182
-rw-r--r--xlators/performance/Makefile.am3
-rw-r--r--xlators/performance/io-cache/src/Makefile.am11
-rw-r--r--xlators/performance/io-cache/src/io-cache-messages.h69
-rw-r--r--xlators/performance/io-cache/src/io-cache.c3050
-rw-r--r--xlators/performance/io-cache/src/io-cache.h408
-rw-r--r--xlators/performance/io-cache/src/ioc-inode.c287
-rw-r--r--xlators/performance/io-cache/src/ioc-mem-types.h24
-rw-r--r--xlators/performance/io-cache/src/page.c1443
-rw-r--r--xlators/performance/io-threads/src/Makefile.am10
-rw-r--r--xlators/performance/io-threads/src/io-threads-messages.h41
-rw-r--r--xlators/performance/io-threads/src/io-threads.c3405
-rw-r--r--xlators/performance/io-threads/src/io-threads.h118
-rw-r--r--xlators/performance/io-threads/src/iot-mem-types.h9
-rw-r--r--xlators/performance/md-cache/src/Makefile.am14
-rw-r--r--xlators/performance/md-cache/src/md-cache-mem-types.h13
-rw-r--r--xlators/performance/md-cache/src/md-cache-messages.h29
-rw-r--r--xlators/performance/md-cache/src/md-cache.c4509
-rw-r--r--xlators/performance/nl-cache/Makefile.am3
-rw-r--r--xlators/performance/nl-cache/src/Makefile.am12
-rw-r--r--xlators/performance/nl-cache/src/nl-cache-helper.c1201
-rw-r--r--xlators/performance/nl-cache/src/nl-cache-mem-types.h27
-rw-r--r--xlators/performance/nl-cache/src/nl-cache-messages.h29
-rw-r--r--xlators/performance/nl-cache/src/nl-cache.c840
-rw-r--r--xlators/performance/nl-cache/src/nl-cache.h175
-rw-r--r--xlators/performance/open-behind/Makefile.am1
-rw-r--r--xlators/performance/open-behind/src/Makefile.am16
-rw-r--r--xlators/performance/open-behind/src/open-behind-mem-types.h22
-rw-r--r--xlators/performance/open-behind/src/open-behind-messages.h32
-rw-r--r--xlators/performance/open-behind/src/open-behind.c1101
-rw-r--r--xlators/performance/quick-read/src/Makefile.am10
-rw-r--r--xlators/performance/quick-read/src/quick-read-mem-types.h15
-rw-r--r--xlators/performance/quick-read/src/quick-read-messages.h31
-rw-r--r--xlators/performance/quick-read/src/quick-read.c4800
-rw-r--r--xlators/performance/quick-read/src/quick-read.h131
-rw-r--r--xlators/performance/read-ahead/src/Makefile.am10
-rw-r--r--xlators/performance/read-ahead/src/page.c846
-rw-r--r--xlators/performance/read-ahead/src/read-ahead-mem-types.h17
-rw-r--r--xlators/performance/read-ahead/src/read-ahead-messages.h31
-rw-r--r--xlators/performance/read-ahead/src/read-ahead.c1778
-rw-r--r--xlators/performance/read-ahead/src/read-ahead.h183
-rw-r--r--xlators/performance/readdir-ahead/Makefile.am3
-rw-r--r--xlators/performance/readdir-ahead/src/Makefile.am18
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h24
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead-messages.h30
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead.c1382
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead.h98
-rw-r--r--xlators/performance/symlink-cache/Makefile.am3
-rw-r--r--xlators/performance/symlink-cache/src/Makefile.am12
-rw-r--r--xlators/performance/symlink-cache/src/symlink-cache.c400
-rw-r--r--xlators/performance/write-behind/src/Makefile.am10
-rw-r--r--xlators/performance/write-behind/src/write-behind-mem-types.h16
-rw-r--r--xlators/performance/write-behind/src/write-behind-messages.h31
-rw-r--r--xlators/performance/write-behind/src/write-behind.c4045
-rw-r--r--xlators/playground/Makefile.am2
-rw-r--r--xlators/playground/rot-13/Makefile.am (renamed from mod_glusterfs/apache/1.3/Makefile.am)0
-rw-r--r--xlators/playground/rot-13/src/Makefile.am (renamed from xlators/encryption/rot-13/src/Makefile.am)8
-rw-r--r--xlators/playground/rot-13/src/rot-13.c166
-rw-r--r--xlators/playground/rot-13/src/rot-13.h18
-rw-r--r--xlators/playground/template/Makefile.am2
-rw-r--r--xlators/playground/template/src/Makefile.am17
-rw-r--r--xlators/playground/template/src/template.c186
-rw-r--r--xlators/playground/template/src/template.h43
-rw-r--r--xlators/protocol/auth/addr/src/Makefile.am8
-rw-r--r--xlators/protocol/auth/addr/src/addr.c492
-rw-r--r--xlators/protocol/auth/login/src/Makefile.am10
-rw-r--r--xlators/protocol/auth/login/src/login.c300
-rw-r--r--xlators/protocol/client/src/Makefile.am18
-rw-r--r--xlators/protocol/client/src/client-callback.c316
-rw-r--r--xlators/protocol/client/src/client-common.c3589
-rw-r--r--xlators/protocol/client/src/client-common.h630
-rw-r--r--xlators/protocol/client/src/client-handshake.c2841
-rw-r--r--xlators/protocol/client/src/client-helpers.c1003
-rw-r--r--xlators/protocol/client/src/client-lk.c1106
-rw-r--r--xlators/protocol/client/src/client-mem-types.h17
-rw-r--r--xlators/protocol/client/src/client-messages.h174
-rw-r--r--xlators/protocol/client/src/client-rpc-fops.c6079
-rw-r--r--xlators/protocol/client/src/client-rpc-fops_v2.c6177
-rw-r--r--xlators/protocol/client/src/client.c4385
-rw-r--r--xlators/protocol/client/src/client.h550
-rw-r--r--xlators/protocol/client/src/client3_1-fops.c5944
-rw-r--r--xlators/protocol/legacy/Makefile.am3
-rw-r--r--xlators/protocol/legacy/client/Makefile.am3
-rw-r--r--xlators/protocol/legacy/client/src/Makefile.am21
-rw-r--r--xlators/protocol/legacy/client/src/client-mem-types.h43
-rw-r--r--xlators/protocol/legacy/client/src/client-protocol.c6683
-rw-r--r--xlators/protocol/legacy/client/src/client-protocol.h178
-rw-r--r--xlators/protocol/legacy/client/src/saved-frames.c196
-rw-r--r--xlators/protocol/legacy/client/src/saved-frames.h79
-rw-r--r--xlators/protocol/legacy/lib/Makefile.am3
-rw-r--r--xlators/protocol/legacy/lib/src/Makefile.am14
-rw-r--r--xlators/protocol/legacy/lib/src/protocol.c108
-rw-r--r--xlators/protocol/legacy/lib/src/protocol.h1118
-rw-r--r--xlators/protocol/legacy/lib/src/transport.c422
-rw-r--r--xlators/protocol/legacy/lib/src/transport.h106
-rw-r--r--xlators/protocol/legacy/server/Makefile.am3
-rw-r--r--xlators/protocol/legacy/server/src/Makefile.am26
-rw-r--r--xlators/protocol/legacy/server/src/authenticate.c249
-rw-r--r--xlators/protocol/legacy/server/src/authenticate.h60
-rw-r--r--xlators/protocol/legacy/server/src/server-helpers.c622
-rw-r--r--xlators/protocol/legacy/server/src/server-helpers.h48
-rw-r--r--xlators/protocol/legacy/server/src/server-mem-types.h39
-rw-r--r--xlators/protocol/legacy/server/src/server-protocol.c6587
-rw-r--r--xlators/protocol/legacy/server/src/server-protocol.h191
-rw-r--r--xlators/protocol/legacy/server/src/server-resolve.c658
-rw-r--r--xlators/protocol/legacy/transport/Makefile.am3
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/Makefile.am1
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/Makefile.am19
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/ib-verbs-mem-types.h39
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/ib-verbs.c2625
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/ib-verbs.h220
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/name.c712
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/name.h47
-rw-r--r--xlators/protocol/legacy/transport/socket/Makefile.am1
-rw-r--r--xlators/protocol/legacy/transport/socket/src/Makefile.am19
-rw-r--r--xlators/protocol/legacy/transport/socket/src/name.c740
-rw-r--r--xlators/protocol/legacy/transport/socket/src/name.h44
-rw-r--r--xlators/protocol/legacy/transport/socket/src/socket-mem-types.h36
-rw-r--r--xlators/protocol/legacy/transport/socket/src/socket.c1625
-rw-r--r--xlators/protocol/legacy/transport/socket/src/socket.h129
-rw-r--r--xlators/protocol/server/src/Makefile.am30
-rw-r--r--xlators/protocol/server/src/authenticate.c386
-rw-r--r--xlators/protocol/server/src/authenticate.h58
-rw-r--r--xlators/protocol/server/src/server-common.c842
-rw-r--r--xlators/protocol/server/src/server-common.h199
-rw-r--r--xlators/protocol/server/src/server-handshake.c1406
-rw-r--r--xlators/protocol/server/src/server-helpers.c2416
-rw-r--r--xlators/protocol/server/src/server-helpers.h133
-rw-r--r--xlators/protocol/server/src/server-mem-types.h42
-rw-r--r--xlators/protocol/server/src/server-messages.h168
-rw-r--r--xlators/protocol/server/src/server-resolve.c925
-rw-r--r--xlators/protocol/server/src/server-rpc-fops.c6084
-rw-r--r--xlators/protocol/server/src/server-rpc-fops_v2.c6031
-rw-r--r--xlators/protocol/server/src/server.c2785
-rw-r--r--xlators/protocol/server/src/server.h334
-rw-r--r--xlators/protocol/server/src/server3_1-fops.c5691
-rw-r--r--xlators/storage/Makefile.am2
-rw-r--r--xlators/storage/bdb/Makefile.am3
-rw-r--r--xlators/storage/bdb/src/Makefile.am18
-rw-r--r--xlators/storage/bdb/src/bctx.c341
-rw-r--r--xlators/storage/bdb/src/bdb-ll.c1464
-rw-r--r--xlators/storage/bdb/src/bdb-mem-types.h42
-rw-r--r--xlators/storage/bdb/src/bdb.c3603
-rw-r--r--xlators/storage/bdb/src/bdb.h530
-rw-r--r--xlators/storage/posix/src/Makefile.am25
-rw-r--r--xlators/storage/posix/src/posix-aio.c985
-rw-r--r--xlators/storage/posix/src/posix-aio.h45
-rw-r--r--xlators/storage/posix/src/posix-common.c1524
-rw-r--r--xlators/storage/posix/src/posix-entry-ops.c2496
-rw-r--r--xlators/storage/posix/src/posix-gfid-path.c243
-rw-r--r--xlators/storage/posix/src/posix-gfid-path.h28
-rw-r--r--xlators/storage/posix/src/posix-handle.c1362
-rw-r--r--xlators/storage/posix/src/posix-handle.h334
-rw-r--r--xlators/storage/posix/src/posix-helpers.c4164
-rw-r--r--xlators/storage/posix/src/posix-inode-fd-ops.c6004
-rw-r--r--xlators/storage/posix/src/posix-inode-handle.h118
-rw-r--r--xlators/storage/posix/src/posix-mem-types.h40
-rw-r--r--xlators/storage/posix/src/posix-messages.h74
-rw-r--r--xlators/storage/posix/src/posix-metadata-disk.h31
-rw-r--r--xlators/storage/posix/src/posix-metadata.c916
-rw-r--r--xlators/storage/posix/src/posix-metadata.h71
-rw-r--r--xlators/storage/posix/src/posix.c4576
-rw-r--r--xlators/storage/posix/src/posix.h732
-rw-r--r--xlators/system/posix-acl/src/Makefile.am17
-rw-r--r--xlators/system/posix-acl/src/posix-acl-mem-types.h23
-rw-r--r--xlators/system/posix-acl/src/posix-acl-messages.h28
-rw-r--r--xlators/system/posix-acl/src/posix-acl-xattr.c253
-rw-r--r--xlators/system/posix-acl/src/posix-acl-xattr.h53
-rw-r--r--xlators/system/posix-acl/src/posix-acl.c3191
-rw-r--r--xlators/system/posix-acl/src/posix-acl.h104
-rw-r--r--xlators/xlator.sym1
2648 files changed, 674378 insertions, 348149 deletions
diff --git a/.clang-format b/.clang-format
new file mode 100644
index 00000000000..84c2efe3fad
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,107 @@
+---
+Language: Cpp
+# BasedOnStyle: Chromium
+AccessModifierOffset: -1
+AlignAfterOpenBracket: Align
+AlignConsecutiveAssignments: false
+AlignConsecutiveDeclarations: false
+AlignEscapedNewlines: Right
+AlignOperands: true
+AlignTrailingComments: true
+AllowAllParametersOfDeclarationOnNextLine: false
+AllowShortBlocksOnASingleLine: false
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: Inline
+AllowShortIfStatementsOnASingleLine: false
+AllowShortLoopsOnASingleLine: false
+AlwaysBreakAfterDefinitionReturnType: All
+AlwaysBreakAfterReturnType: All
+AlwaysBreakBeforeMultilineStrings: true
+AlwaysBreakTemplateDeclarations: true
+BinPackArguments: true
+BinPackParameters: true
+BraceWrapping:
+ AfterClass: false
+ AfterControlStatement: false
+ AfterEnum: false
+ AfterFunction: true
+ AfterNamespace: false
+ AfterObjCDeclaration: false
+ AfterStruct: false
+ AfterUnion: false
+ BeforeCatch: false
+ BeforeElse: false
+ IndentBraces: false
+ SplitEmptyFunction: true
+ SplitEmptyRecord: true
+ SplitEmptyNamespace: true
+BreakBeforeBinaryOperators: None
+BreakBeforeBraces: Linux
+BreakBeforeInheritanceComma: false
+BreakBeforeTernaryOperators: true
+BreakConstructorInitializersBeforeComma: false
+BreakConstructorInitializers: BeforeColon
+BreakAfterJavaFieldAnnotations: false
+BreakStringLiterals: true
+ColumnLimit: 80
+CommentPragmas: '^ IWYU pragma:'
+CompactNamespaces: false
+ConstructorInitializerAllOnOneLineOrOnePerLine: true
+ConstructorInitializerIndentWidth: 4
+ContinuationIndentWidth: 4
+Cpp11BracedListStyle: true
+DerivePointerAlignment: false
+DisableFormat: false
+ExperimentalAutoDetectBinPacking: false
+FixNamespaceComments: true
+ForEachMacros:
+ - foreach
+ - Q_FOREACH
+ - BOOST_FOREACH
+IncludeCategories:
+ - Regex: '^<.*\.h>'
+ Priority: 1
+ - Regex: '^<.*'
+ Priority: 2
+ - Regex: '.*'
+ Priority: 3
+IncludeIsMainRegex: '([-_](test|unittest))?$'
+IndentCaseLabels: true
+IndentWidth: 4
+IndentWrappedFunctionNames: false
+JavaScriptQuotes: Leave
+JavaScriptWrapImports: true
+KeepEmptyLinesAtTheStartOfBlocks: false
+MacroBlockBegin: ''
+MacroBlockEnd: ''
+MaxEmptyLinesToKeep: 1
+NamespaceIndentation: None
+ObjCBlockIndentWidth: 2
+ObjCSpaceAfterProperty: false
+ObjCSpaceBeforeProtocolList: false
+PenaltyBreakAssignment: 200
+PenaltyBreakBeforeFirstCallParameter: 1
+PenaltyBreakComment: 300
+PenaltyBreakFirstLessLess: 120
+PenaltyBreakString: 1000
+PenaltyExcessCharacter: 1000000
+PenaltyReturnTypeOnItsOwnLine: 200
+PointerAlignment: Right
+ReflowComments: true
+SortIncludes: false
+SortUsingDeclarations: true
+SpaceAfterCStyleCast: false
+SpaceAfterTemplateKeyword: true
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeParens: ControlStatements
+SpaceInEmptyParentheses: false
+SpacesBeforeTrailingComments: 2
+SpacesInAngles: false
+SpacesInContainerLiterals: true
+SpacesInCStyleCastParentheses: false
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+Standard: Auto
+TabWidth: 8
+UseTab: Never
+...
diff --git a/.github/ISSUE_TEMPLATE b/.github/ISSUE_TEMPLATE
new file mode 100644
index 00000000000..386ed2d8dd5
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE
@@ -0,0 +1,30 @@
+<!-- Please use this template while reporting an issue, providing as much information as possible. Failure to do so may result in a delayed response. Thank you! -->
+
+**Description of problem:**
+
+
+**The exact command to reproduce the issue**:
+
+
+**The full output of the command that failed**:
+<details>
+
+
+
+</details>
+
+**Expected results:**
+
+
+**Additional info:**
+
+
+**- The output of the `gluster volume info` command**:
+<details>
+
+
+
+</details>
+
+**- The operating system / glusterfs version**:
+
diff --git a/swift/1.4.8/plugins/conf/db_file.db b/.github/PULL_REQUEST_TEMPLATE
index e69de29bb2d..e69de29bb2d 100644
--- a/swift/1.4.8/plugins/conf/db_file.db
+++ b/.github/PULL_REQUEST_TEMPLATE
diff --git a/.github/RELEASE_TRACKER_TEMPLATE b/.github/RELEASE_TRACKER_TEMPLATE
new file mode 100644
index 00000000000..502bbd5556c
--- /dev/null
+++ b/.github/RELEASE_TRACKER_TEMPLATE
@@ -0,0 +1,12 @@
+<!-- Please use this template while creating a tracker issue -->
+
+**Description of problem:**
+A tracker issue to track the issues that will be fixed as a part of this release
+
+
+**Major or minor release**:
+
+
+**Release version**:
+
+
diff --git a/.github/stale.yml b/.github/stale.yml
new file mode 100644
index 00000000000..460e327c6ea
--- /dev/null
+++ b/.github/stale.yml
@@ -0,0 +1,25 @@
+# Number of days of inactivity before an issue becomes stale
+daysUntilStale: 210
+# Number of days of inactivity before a stale issue is closed
+daysUntilClose: 15
+# Issues with these labels will never be considered stale
+exemptLabels:
+ - pinned
+ - security
+# Label to use when marking an issue as stale
+staleLabel: wontfix
+
+# Comment to post when marking an issue as stale. Set to `false` to disable
+markComment: >
+ Thank you for your contributions.
+
+ Noticed that this issue is not having any activity in last ~6 months! We
+ are marking this issue as stale because it has not had recent activity.
+
+ It will be closed in 2 weeks if no one responds with a comment here.
+
+
+# Comment to post when closing a stale issue. Set to `false` to disable
+closeComment: >
+ Closing this issue as there was no update since my last update on issue.
+ If this is an issue which is still valid, feel free to open it.
diff --git a/.gitignore b/.gitignore
index c5371b26436..fc5ba586f8e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,31 +3,123 @@ autom4te.cache
build
config.*
configure
+cscope.*
+tags
depcomp
+INSTALL
install-sh
ltmain.sh
+Makefile
Makefile.in
missing
+stamp-h1
+stamp-h2
+test-driver
+*compile
+*.gcda
+*.gcno
*.sw?
*~
-*lo
-*la
-*o
+*.lo
+*.la
+*.o
+*.tar.gz
+*.rpm
+*.diff
+*.patch
.libs
-Makefile
-stamp-h1
+.deps
+.dirstamp
+
+# Softlinks to test and log
+log
+*.vol
+.clang-format
+
+# cmocka unit tests
+*.log
+*.trs
+*_unittest
# Generated files
-extras/init.d/glusterfs-server.plist
-extras/init.d/glusterfsd-Debian
-extras/init.d/glusterfsd-Redhat
-extras/init.d/glusterfsd-SuSE
+site.h
+*.py[co]
+api/examples/__init__.py
+api/examples/setup.py
+api/src/gfapi.map
+cli/src/gluster
+contrib/fuse-util/fusermount-glusterfs
+extras/geo-rep/gsync-sync-gfid
+extras/geo-rep/schedule_georep.py
+extras/snap_scheduler/conf.py
+extras/init.d/glusterd-Debian
+extras/init.d/glusterd-FreeBSD
+extras/init.d/glusterd-Redhat
+extras/init.d/glusterd-SuSE
+extras/init.d/glusterd.plist
+extras/ocf/glusterd
+extras/ocf/volume
+extras/run-gluster.tmpfiles
+extras/systemd/glusterd.service
+extras/systemd/gluster-ta-volume.service
+extras/systemd/glusterfssharedstorage.service
+extras/who-wrote-glusterfs/gitdm
+geo-replication/.tox
+geo-replication/gsyncd.conf
+geo-replication/src/gsyncd
+geo-replication/src/peer_gsec_create
+geo-replication/src/peer_mountbroker
+geo-replication/src/peer_mountbroker.py
+geo-replication/src/set_geo_rep_pem_keys.sh
+geo-replication/src/peer_georep-sshkey.py
+geo-replication/syncdaemon.egg-info
+geo-replication/syncdaemon/conf.py
+geo-replication/tests/unit/.coverage
+geo-replication/tests/unit/cover
+geo-replication/tests/unit/coverage.xml
+geo-replication/tests/unit/nosetests.xml
+geo-replication/tests/unit/results.html
+glusterfs-api.pc
glusterfs.spec
-libtool
-xlators/mount/fuse/utils/mount.glusterfs
-xlators/mount/fuse/utils/mount_glusterfs
-argp-standalone/libargp.a
+glusterfsd/src/glusterd
+glusterfsd/src/glusterfs
glusterfsd/src/glusterfsd
-libglusterfs/src/spec.lex.c
+glusterfsd/src/gf_attach
+heal/src/glfsheal
+libgfchangelog.pc
+libglusterfs/src/graph.lex.c
libglusterfs/src/y.tab.c
libglusterfs/src/y.tab.h
+libglusterfs/src/defaults.c
+libglusterfs/src/cli1-xdr.h
+libglusterfs/src/protocol-common.h
+libtool
+# copied XDR for cyclic libglusterfs <-> rpc-header dependency
+run-tests.sh
+!tests/basic/fuse/Makefile
+!tests/basic/gfapi/Makefile
+tests/env.rc
+tests/utils/arequal-checksum
+xlators/features/glupy/src/__init__.py
+xlators/features/glupy/src/setup.py
+xlators/mount/fuse/utils/mount.glusterfs
+xlators/mount/fuse/utils/mount_glusterfs
+extras/peer_add_secret_pub
+tools/gfind_missing_files/gcrawler
+tools/glusterfind/glusterfind
+tools/glusterfind/src/tool.conf
+# Eventing
+events/src/eventsapiconf.py
+extras/systemd/glustereventsd.service
+events/src/eventtypes.py
+libglusterfs/src/eventtypes.h
+extras/init.d/glustereventsd-Debian
+extras/init.d/glustereventsd-FreeBSD
+extras/init.d/glustereventsd-Redhat
+tools/setgfid2path/src/gluster-setgfid2path
+xlators/features/cloudsync/src/cloudsync-autogen-fops.c
+xlators/features/cloudsync/src/cloudsync-autogen-fops.h
+xlators/features/utime/src/utime-autogen-fops.c
+xlators/features/utime/src/utime-autogen-fops.h
+tests/basic/metadisp/ftruncate
+xlators/features/metadisp/src/fops.c
diff --git a/.mailmap b/.mailmap
new file mode 100644
index 00000000000..141a1667ffa
--- /dev/null
+++ b/.mailmap
@@ -0,0 +1,43 @@
+# .mailmap, see 'git short-log --help' for details
+#
+# This file needs to match extras/who-wrote-glusterfs/gitdm.aliases.
+#
+# Listing of contributors that filed patches with different email addresses.
+# Format: <name> <main-email> <alias> [<alias> ...]
+#
+
+Amar Tumballi <amarts@redhat.com> <amar@gluster.com> <amar@del.gluster.com>
+Anand Avati <avati@redhat.com> <avati@gluster.com> <avati@dev.gluster.com> <avati@amp.gluster.com> <avati@blackhole.gluster.com>
+Anush Shetty <ashetty@redhat.com> <anush@gluster.com>
+Csaba Henk <csaba@redhat.com> <csaba@gluster.com> <csaba@lowlife.hu> <csaba@zresearch.com>
+Günther Deschner <gd@redhat.com> <gd@samba.org>
+Harshavardhana <fharshav@redhat.com> <harsha@gluster.com> <harsha@zresearch.com> <harsha@dev.gluster.com> <harsha@harshavardhana.net>
+Ji-Hyeon Gim <potatogim@gluesys.com> <potatogim@potatogim.net>
+Justin Clift <justin@gluster.org> <jclift@redhat.com>
+Kaleb S. KEITHLEY <kkeithle@redhat.com> <kkeithle@f16node1.kkeithle.usersys.redhat.com> <kkeithle@linux.keithley.org>
+Kaushal M <kaushal@redhat.com> <kaushal@gluster.com>
+Kaushik BV <kbudiger@redhat.com> <kaushikbv@gluster.com>
+Krishna Srinivas <ksriniva@redhat.com> <krishna@gluster.com> <krishna@zresearch.com> <krishna@guest-laptop>
+Krishnan Parthasarathi <kparthas@redhat.com> <kp@gluster.com>
+Louis Zuckerman <louiszuckerman@gmail.com> <me@louiszuckerman.com>
+M S Vishwanath Bhat <vbhat@redhat.com> <msvbhat@gmail.com> <vishwanath@gluster.com>
+Michael Adam <madam@redhat.com> <obnox@samba.org>
+Oleksandr Natalenko <oleksandr@natalenko.name> <o.natalenko@lanet.ua>
+Patrick Uiterwijk <puiterwijk@fedoraproject.org> <patrick@puiterwijk.org>
+Pavan Sondur <pavan@gluster.com> <pavan@dev.gluster.com>
+Pete Zaitcev <zaitcev@kotori.zaitcev.us> <zaitcev@yahoo.com>
+Pranith Kumar K <pkarampu@redhat.com> <pranithk@gluster.com>
+Prashanth Pai <ppai@redhat.com> <nullpai@gmail.com>
+Raghavendra Bhat <raghavendra@redhat.com> <raghavendrabhat@gluster.com>
+Raghavendra G <rgowdapp@redhat.com> <raghavendra@gluster.com> <raghavendra@zresearch.com>
+Rahul C S <rahulcs@redhat.com> <rahulcssjce@gmail.com>
+Rajesh Amaravathi <rajesh@redhat.com> <rajesh@gluster.com> <rajesh.amaravathi@gmail.com>
+Ravishankar N <ravishankar@redhat.com> <root@ravi2.(none)>
+Sakshi Bansal <sabansal@redhat.com> <sabansal@localhost.localdomain>
+Shehjar Tikoo <shehjart@gluster.com> <shehjart@zresearch.com>
+Venky Shankar <vshankar@redhat.com> <venky@gluster.com>
+Vijay Bellur <vbellur@redhat.com> <vijay@gluster.com> <vijay@dev.gluster.com>
+Vijaykumar Koppad <vkoppad@redhat.com> <vijaykumar.koppad@gmail.com>
+Vijaikumar Mallikarjuna <vmallika@redhat.com>
+Vikas Gorur <vikas@gluster.com> <vikas@zresearch.com>
+shishir gowda <sgowda@redhat.com> <shishirng@gluster.com>
diff --git a/.testignore b/.testignore
new file mode 100644
index 00000000000..fe8f838bf2b
--- /dev/null
+++ b/.testignore
@@ -0,0 +1,64 @@
+.github/ISSUE_TEMPLATE
+.github/PULL_REQUEST_TEMPLATE
+.github/stale.yml
+.gitignore
+.mailmap
+.testignore
+.clang-format
+rfc.sh
+submit-for-review.sh
+AUTHORS
+CONTRIBUTING.md
+COPYING-GPLV2
+COPYING-LGPLV3
+ChangeLog
+INSTALL
+MAINTAINERS
+NEWS
+README.md
+THANKS
+COMMITMENT
+api/examples/README
+api/examples/getvolfile.py
+api/src/README.Symbol_Versions
+build-aux/checkpatch.pl
+contrib/fuse-lib/COPYING.LIB
+contrib/fuse-util/COPYING
+contrib/macfuse/COPYING.txt
+doc/*
+extras/FreeBSD/README.FreeBSD
+extras/Solaris/README.solaris
+extras/Ubuntu/README.Ubuntu
+extras/benchmarking/README
+extras/cliutils/README.md
+extras/command-completion/README
+extras/create_new_xlator/README.md
+extras/glusterfs.vim
+extras/glusterfs-logrotate
+extras/glusterfs-georep-logrotate
+extras/init.d/glusterd-Debian.in
+extras/init.d/glusterd-FreeBSD.in
+extras/init.d/glusterd-Redhat.in
+extras/init.d/glusterd-SuSE.in
+extras/init.d/glusterd.plist.in
+extras/init.d/glustereventsd-Debian.in
+extras/init.d/glustereventsd-FreeBSD.in
+extras/init.d/glustereventsd-Redhat.in
+extras/init.d/rhel5-load-fuse.modules
+extras/logger.conf.example
+extras/snap_scheduler/README.md
+extras/test/ld-preload-test/README
+extras/who-wrote-glusterfs/*
+extras/distributed-testing/*
+geo-replication/syncdaemon/README.md
+geo-replication/test-requirements.txt
+rpc/xdr/src/.gitignore
+tests/README.md
+xlators/experimental/README.md
+xlators/experimental/dht2/README.md
+xlators/experimental/dht2/TODO.md
+xlators/experimental/posix2/README.md
+xlators/experimental/posix2/TODO.md
+xlators/features/glupy/doc/README.md
+xlators/features/glupy/doc/TESTING
+xlators/features/glupy/doc/test.vol
diff --git a/COMMITMENT b/COMMITMENT
new file mode 100644
index 00000000000..16b75efcf29
--- /dev/null
+++ b/COMMITMENT
@@ -0,0 +1,46 @@
+Common Cure Rights Commitment
+Version 1.0
+
+Before filing or continuing to prosecute any legal proceeding or claim
+(other than a Defensive Action) arising from termination of a Covered
+License, we commit to extend to the person or entity ('you') accused
+of violating the Covered License the following provisions regarding
+cure and reinstatement, taken from GPL version 3. As used here, the
+term 'this License' refers to the specific Covered License being
+enforced.
+
+ However, if you cease all violation of this License, then your
+ license from a particular copyright holder is reinstated (a)
+ provisionally, unless and until the copyright holder explicitly
+ and finally terminates your license, and (b) permanently, if the
+ copyright holder fails to notify you of the violation by some
+ reasonable means prior to 60 days after the cessation.
+
+ Moreover, your license from a particular copyright holder is
+ reinstated permanently if the copyright holder notifies you of the
+ violation by some reasonable means, this is the first time you
+ have received notice of violation of this License (for any work)
+ from that copyright holder, and you cure the violation prior to 30
+ days after your receipt of the notice.
+
+We intend this Commitment to be irrevocable, and binding and
+enforceable against us and assignees of or successors to our
+copyrights.
+
+Definitions
+
+'Covered License' means the GNU General Public License, version 2
+(GPLv2), the GNU Lesser General Public License, version 2.1
+(LGPLv2.1), or the GNU Library General Public License, version 2
+(LGPLv2), all as published by the Free Software Foundation.
+
+'Defensive Action' means a legal proceeding or claim that We bring
+against you in response to a prior proceeding or claim initiated by
+you or your affiliate.
+
+'We' means each contributor to this repository as of the date of
+inclusion of this file, including subsidiaries of a corporate
+contributor.
+
+This work is available under a Creative Commons Attribution-ShareAlike
+4.0 International license (https://creativecommons.org/licenses/by-sa/4.0/).
diff --git a/CONTRIBUTING b/CONTRIBUTING
deleted file mode 100644
index 8b3baa7e5f6..00000000000
--- a/CONTRIBUTING
+++ /dev/null
@@ -1,25 +0,0 @@
- Developer's Certificate of Origin 1.1
-
- By making a contribution to this project, I certify that:
-
- (a) The contribution was created in whole or in part by me and I
- have the right to submit it under the open source license
- indicated in the file; or
-
- (b) The contribution is based upon previous work that, to the best
- of my knowledge, is covered under an appropriate open source
- license and I have the right under that license to submit that
- work with modifications, whether created in whole or in part
- by me, under the same open source license (unless I am
- permitted to submit under a different license), as indicated
- in the file; or
-
- (c) The contribution was provided directly to me by some other
- person who certified (a), (b) or (c) and I have not modified
- it.
-
- (d) I understand and agree that this project and the contribution
- are public and that a record of the contribution (including all
- personal information I submit with it, including my sign-off) is
- maintained indefinitely and may be redistributed consistent with
- this project or the open source license(s) involved. \ No newline at end of file
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 00000000000..65fc3497104
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,114 @@
+# GlusterFS project Contribution guidelines
+
+## Development Workflow
+
+We follow most of the details as per the [document here](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests). If you are not aware of the github workflow, it is recommended to go through them before continuing here.
+
+
+#### Get the Repository setup
+
+0. Fork Repository
+ - Fork [GlusterFS repository](https://github.com/gluster/glusterfs/fork).
+
+1. Clone Repository
+ - Clone the glusterfs repo freshly from github using below steps.
+
+```
+ git clone git@github.com:${username}/glusterfs.git
+ cd glusterfs/
+ git remote add upstream git@github.com:gluster/glusterfs.git
+```
+
+About two tasks are one time for the life time. You can continue to use the same repository for all the work in future.
+
+#### Development & Other flows
+
+0. Issue:
+ - Make sure there is an issue filed for the task you are working on.
+ - If it is not filed, open the issue with all the description.
+ - If it is a bug fix, add label "Type:Bug".
+ - If it is an RFC, provide all the documentation, and request for "DocApproved", and "SpecApproved" label.
+
+1. Code:
+ - Start coding
+ - Build and test locally
+ - Make sure clang-format is installed and is run on the patch.
+
+2. Keep up-to-date
+ - GlusterFS is a large project with many developers, so there would be one or the other patch everyday.
+ - It is critical for developer to be up-to-date with `devel` repo to be Conflict-Free when PR is opened.
+ - Git provides many options to keep up-to-date, below is one of them
+```
+ git fetch upstream
+ git rebase upstream/devel
+```
+ - It is recommended you keep pushing to your repo every day, so you don't loose any work.
+ - It can be done by `./rfc.sh` (or `git push origin HEAD:issueNNN`)
+
+2. Commit Message / PR description:
+ - The name of the branch on your personal fork can start with issueNNNN, followed by anything of your choice.
+ - PRs continue to have the title of format "component: \<title\>", like it is practiced now.
+ - When you open a PR, having a reference Issue for the commit is mandatory in GlusterFS.
+ - Commit message can have, either `Fixes: #NNNN` or `Updates: #NNNN` in a separate line in the commit message.
+ - Here, NNNN is the Issue ID in glusterfs repository.
+ - Each commit needs the author to have the "Signed-off-by: Name \<email\>" line.
+ - Can do this by `-s` option for `git commit`.
+ - If the PR is not ready for review, apply the label `work-in-progress`.
+ - Check the availability of "Draft PR" is present for you, if yes, use that instead.
+
+3. Tests:
+ - All the required smoke tests would be auto-triggered.
+ - Developers get a chance to retrigger the smoke tests using **"/recheck smoke"** as comment.
+ - The "regression" tests would be triggered by a comment **"/run regression"** from developers in the [@gluster-maintainers](https://github.com/orgs/gluster/teams/gluster-maintainers) group.
+ - Ask for help as comment in PR if you have any questions about the process!
+
+4. Review Process:
+ - `+2` : is equivalent to "Approve" from the people in the maintainer's group.
+ - `+1` : can be given by a maintainer/reviewer by explicitly stating that in the comment.
+ - `-1` : provide details on required changes and pick "Request Changes" while submitting your review.
+ - `-2` : done by adding the `DO-NOT-MERGE` label.
+
+ - Any further discussions can happen as comments in the PR.
+
+5. Making changes:
+ - There are 2 approaches to submit changes done after addressing review comments.
+ - Commit changes as a new commit on top of the original commits in the branch, and push the changes to same branch (issueNNNN)
+ - Commit changes into the same commit with `--amend` option, and do a push to the same branch with `--force` option.
+
+6. Merging:
+ - GlusterFS project follows 'Squash and Merge' method
+ - This is mainly to preserve the historic Gerrit method of one patch in `git log` for one URL link.
+ - This also makes every merge a complete patch, which has passed all tests.
+ - The merging of the patch is expected to be done by the maintainers.
+ - It can be done when all the tests (smoke and regression) pass.
+ - When the PR has 'Approved' flag from corresponding maintainer.
+ - If you feel there is delay, feel free to add a comment, discuss the same in Slack channel, or send email.
+
+## By contributing to this project, the contributor would need to agree to below.
+
+### Developer's Certificate of Origin 1.1
+
+By making a contribution to this project, I certify that:
+
+(a) The contribution was created in whole or in part by me and I
+ have the right to submit it under the open source license
+ indicated in the file; or
+
+(b) The contribution is based upon previous work that, to the best
+ of my knowledge, is covered under an appropriate open source
+ license and I have the right under that license to submit that
+ work with modifications, whether created in whole or in part
+ by me, under the same open source license (unless I am
+ permitted to submit under a different license), as indicated
+ in the file; or
+
+(c) The contribution was provided directly to me by some other
+ person who certified (a), (b) or (c) and I have not modified
+ it.
+
+(d) I understand and agree that this project and the contribution
+ are public and that a record of the contribution (including all
+ personal information I submit with it, including my sign-off) is
+ maintained indefinitely and may be redistributed consistent with
+ this project or the open source license(s) involved.
+
diff --git a/INSTALL b/INSTALL
index 88e28999df9..a56390e54fb 100644
--- a/INSTALL
+++ b/INSTALL
@@ -1,32 +1,47 @@
Installation Instructions
*************************
-Run ./configure after untaring the package.
+0. If you have cloned from git, run ./autogen.sh.
+
+1. Run ./configure.
bash# ./configure
GlusterFS configure summary
===========================
- FUSE client : yes
- Infiniband verbs : yes
- epoll IO multiplex : yes
- Berkeley-DB : yes
- libglusterfsclient : yes
- mod_glusterfs : yes
- argp-standalone : no
+ GlusterFS configure summary
+ ===========================
+ FUSE client : yes
+ Infiniband verbs : yes
+ epoll IO multiplex : yes
+ argp-standalone : no
+ fusermount : yes
+ readline : yes
+ georeplication : yes
+ Linux-AIO : yes
+ Enable Debug : no
+ Block Device xlator : yes
+ glupy : yes
+ Use syslog : yes
+ XML output : yes
+ QEMU Block formats : yes
+ Encryption xlator : yes
+
The configure summary will tell you what all components will be built with
GlusterFS. Other than 'argp-standalone' if something else says 'no', that
feature in GlusterFS will not be built. 'argp-standalone' package will only
be used if the system doesn't have a proper argp package installed.
-Now just run 'make' and later run 'make install' to install the package.
+2. Now just run 'make' and later run 'make install' to install the package.
bash# make
bash# make install
-Installation complete :-)
+Installation completed :-)
bash# glusterfs --version
Make sure your version is the latest from the release, and the one you
just installed :-)
+
+For more information on GlusterFS installation refer# http://docs.gluster.org/en/latest/Developer-guide/Building-GlusterFS/
diff --git a/MAINTAINERS b/MAINTAINERS
new file mode 100644
index 00000000000..953e8755fd9
--- /dev/null
+++ b/MAINTAINERS
@@ -0,0 +1,510 @@
+GlusterFS Maintainers
+=====================
+
+The intention of this file is not to establish who owns what portions of the
+code base, but to provide a set of names that developers can consult when they
+have a question about a particular subset and also to provide a set of names
+to be CC'd when submitting a patch to obtain appropriate review.
+
+In general, if you have a question about inclusion of a patch, you should
+consult gluster-devel@gluster.org and not any specific individual privately.
+
+Descriptions of section entries:
+
+ M: Main contact that knows and takes care of this area
+ L: Mailing list that is relevant to this area
+ W: Web-page with status/info
+ Q: Patchwork web based patch tracking system site
+ T: SCM tree type and location. Type is one of: git, hg, quilt, stgit.
+ S: Status, one of the following:
+ Supported: Someone is actually paid to look after this.
+ Maintained: Someone actually looks after it.
+ Odd Fixes: It has a maintainer but they don't have time to do
+ much other than throw the odd patch in. See below.
+ Orphan: No current maintainer [but maybe you could take the
+ role as you write your new code].
+ Obsolete: Old code. Something tagged obsolete generally means
+ it has been replaced by a better system and you
+ should be using that.
+ F: Files and directories with wildcard patterns.
+ A trailing slash includes all files and subdirectory files.
+ F: drivers/net/ all files in and below drivers/net
+ F: drivers/net/* all files in drivers/net, but not below
+ F: */net/* all files in "any top level directory"/net
+ One pattern per line. Multiple F: lines acceptable.
+ X: Files and directories that are NOT maintained, same rules as F:
+ Files exclusions are tested before file matches.
+ Can be useful for excluding a specific subdirectory, for instance:
+ F: net/
+ X: net/ipv6/
+ matches all files in and below net excluding net/ipv6/
+ K: Keyword perl extended regex pattern to match content in a
+ patch or file. For instance:
+ K: of_get_profile
+ matches patches or files that contain "of_get_profile"
+ K: \b(printk|pr_(info|err))\b
+ matches patches or files that contain one or more of the words
+ printk, pr_info or pr_err
+ One regex pattern per line. Multiple K: lines acceptable.
+ P: Peer for a component
+
+
+General Project Architects
+--------------------------
+M: Amar Tumballi <amarts@gmail.com>
+M: Xavier Hernandez <xhernandez@redhat.com>
+P: Pranith Karampuri <pranith.karampuri@phonepe.com>
+P: Atin Mukherjee <amukherj@redhat.com>
+
+xlators:
+--------
+Access Control List (ACL)
+M: Raghavendra Talur <rtalur@redhat.com>
+P: Jiffin Tony Thottan <jthottan@redhat.com>
+S: Maintained
+F: xlators/system/posix-acl/
+
+Arbiter
+M: Ravishankar N <ravishankar@redhat.com>
+P: Pranith Karampuri <pranith.karampuri@phonepe.com>
+S: Maintained
+F: xlators/features/arbiter/
+
+Automatic File Replication (AFR)
+M: Pranith Karampuri <pranith.karampuri@phonepe.com>
+M: Ravishankar N <ravishankar@redhat.com>
+P: Karthik US <ksubrahm@redhat.com>
+S: Maintained
+F: xlators/cluster/afr/
+
+Barrier
+M: Raghavendra Bhat <rabhat@redhat.com>
+P: Atin Mukherjee <amukherj@redhat.com>
+S: Maintained
+F: xlators/features/barrier
+
+BitRot
+M: Kotresh HR <khiremat@redhat.com>
+P: Raghavendra Bhat <rabhat@redhat.com>
+S: Maintained
+F: xlators/features/bit-rot/
+
+Changelog
+M: Aravinda V K <avishwan@redhat.com>
+P: Kotresh HR <khiremat@redhat.com>
+S: Maintained
+F: xlators/features/changelog/
+
+Distributed Hashing Table (DHT)
+P: Susant Palai <spalai@redhat.com>
+S: Maintained
+F: xlators/cluster/dht/
+
+Erasure Coding
+M: Pranith Karampuri <pranith.karampuri@phonepe.com>
+M: Xavier Hernandez <xhernandez@redhat.com>
+P: Ashish Pandey <aspandey@redhat.com>
+S: Maintained
+F: xlators/cluster/ec/
+
+Error-gen
+M: Raghavendra Talur <rtalur@redhat.com>
+S: Maintained
+F: xlators/debug/error-gen/
+
+FUSE Bridge
+M: Csaba Henk <chenk@redhat.com>
+P: Niels de Vos <ndevos@redhat.com>
+S: Maintained
+F: xlators/mount/
+
+Index
+M: Pranith Karampuri <pranith.karampuri@phonepe.com>
+P: Ravishankar N <ravishankar@redhat.com>
+S: Maintained
+F: xlators/features/index/
+
+IO Cache
+P: Mohammed Rafi KC <rafi.kavungal@iternity.com>
+S: Maintained
+F: xlators/performance/io-cache/
+
+IO Statistics
+M: Krutika Dhananjay <kdhananj@redhat.com>
+M: Shyam Ranganathan <srangana@redhat.com>
+S: Maintained
+F: xlators/debug/io-stats/
+
+IO threads
+M: Pranith Karampuri <pranith.karampuri@phonepe.com>
+P: Ravishankar N <ravishankar@redhat.com>
+S: Maintained
+F: xlators/performance/io-threads/
+
+Leases
+M: Poornima G <pgurusid@redhat.com>
+P: Niels de Vos <ndevos@redhat.com>
+P: Soumya Koduri <skoduri@redhat.com>
+S: Maintained
+F: xlators/features/leases/
+
+Locks
+M: Krutika Dhananjay <kdhananj@redhat.com>
+P: Xavier Hernandez <xhernandez@redhat.com>
+S: Maintained
+F: xlators/features/locks/
+
+Marker
+M: Kotresh HR <khiremat@redhat.com>
+S: Maintained
+F: xlators/features/marker/
+
+Meta
+M: Mohammed Rafi KC <rafi.kavungal@iternity.com>
+S: Maintained
+F: xlators/features/meta/
+
+Metadata-cache
+M: Poornima G <pgurusid@redhat.com>
+P: Soumya Koduri <skoduri@redhat.com>
+S: Maintained
+F: xlators/performance/md-cache/
+
+Negative-lookup Cache
+M: Poornima G <pgurusid@redhat.com>
+P: Pranith Karampuri <pranith.karampuri@phonepe.com>
+S: Maintained
+F: xlators/performance/nl-cache/
+
+gNFS
+M: Jiffin Tony Thottan <jthottan@redhat.com>
+P: Xie Changlong <xiechanglong@cmss.chinamobile.com>
+P: Amar Tumballi <amarts@gmail.com>
+S: Odd Fixes
+F: xlators/nfs/server/
+
+Open-behind
+S: Maintained
+F: xlators/performance/open-behind/
+
+Posix:
+M: Raghavendra Bhat <raghavendra@redhat.com>
+P: Kotresh HR <khiremat@redhat.com>
+P: Krutika Dhananjay <kdhananj@redhat.com>
+S: Maintained
+F: xlators/storage/posix/
+
+Quick-read
+S: Maintained
+F: xlators/performance/quick-read/
+
+Quota
+M: Shyamsundar Ranganathan <srangana@redhat.com>
+P: Hari Gowtham <hgowtham@redhat.com>
+S: Maintained
+F: xlators/features/quota/
+
+Read-ahead
+P: Csaba Henk <chenk@redhat.com>
+S: Maintained
+F: xlators/performance/read-ahead/
+
+Readdir-ahead
+S: Maintained
+F: xlators/performance/readdir-ahead/
+
+Sharding
+M: Krutika Dhananjay <kdhananj@redhat.com>
+P: Xavier Hernandez <xhernandez@redhat.com>
+S: Maintained
+F: xlators/features/shard/
+
+Trash
+M: Anoop C S <anoopcs@redhat.com>
+M: Jiffin Tony Thottan <jthottan@redhat.com>
+S: Maintained
+F: xlators/features/trash/
+
+Upcall
+M: Poornima G <pgurusid@redhat.com>
+M: Soumya Koduri <skoduri@redhat.com>
+P: Niels de Vos <ndevos@redhat.com>
+S: Maintained
+F: xlators/features/upcall/
+
+Write-behind
+P: Csaba Henk <chenk@redhat.com>
+S: Maintained
+F: xlators/performance/write-behind/
+
+Write Once Read Many
+P: Karthik US <ksubrahm@redhat.com>
+S: Maintained
+F: xlators/features/read-only/
+
+Cloudsync
+M: Susant Kumar Palai <spalai@redhat.com>
+S: Maintained
+F: xlators/features/cloudsync/
+
+Other bits of code:
+-------------------
+
+Doc
+M: Humble Chirammal <hchiramm@redhat.com>
+M: Raghavendra Talur <rtalur@redhat.com>
+S: Maintained
+F: doc/
+
+Geo Replication
+M: Aravinda V K <avishwan@redhat.com>
+M: Kotresh HR <khiremat@redhat.com>
+M: Sunny Kumar <sunkumar@redhat.com>
+S: Maintained
+F: geo-replication/
+
+Glusterfind
+M: Aravinda VK <avishwan@redhat.com>
+S: Maintained
+F: tools/glusterfind/
+
+libgfapi
+M: Niels de Vos <ndevos@redhat.com>
+P: Poornima G <pgurusid@redhat.com>
+P: Shyamsundar Ranganathan <srangana@redhat.com>
+P: Soumya Koduri <skoduri@redhat.com>
+S: Maintained
+F: api/
+
+libglusterfs
+M: Amar Tumballi <amarts@gmail.com>
+M: Xavier Hernandez <xhernandez@redhat.com>
+M: Jeff Darcy <jeff@pl.atyp.us>
+P: Kaleb Keithley <kkeithle@redhat.com>
+P: Niels de Vos <ndevos@redhat.com>
+P: Pranith Karampuri <pranith.karampuri@phonepe.com>
+P: Shyamsundar Ranganathan <srangana@redhat.com>
+S: Maintained
+F: libglusterfs/
+
+xxhash
+M: Aravinda VK <avishwan@redhat.com>
+M: Kotresh HR <khiremat@redhat.com>
+P: Yaniv Kaul <ykaul@redhat.com>
+S: Maintained
+F: contrib/xxhash/
+T: https://github.com/Cyan4973/xxHash.git
+
+Management Daemon - glusterd
+M: Atin Mukherjee <amukherj@redhat.com>
+M: Mohit Agrawal <moagrawa@redhat.com>
+M: Sanju Rakonde <srakonde@redhat.com>
+S: Maintained
+F: cli/
+F: xlators/mgmt/glusterd/
+
+Protocol
+M: Niels de Vos <ndevos@redhat.com>
+P: Mohammed Rafi KC <rafi.kavungal@iternity.com>
+S: Maintained
+F: xlators/protocol/
+
+Remote Procedure Call subsystem
+P: Mohit Agrawal <moagrawa@redhat.com>
+S: Maintained
+F: rpc/rpc-lib/
+F: rpc/xdr/
+
+Snapshot
+M: Raghavendra Bhat <raghavendra@redhat.com>
+P: Mohammed Rafi KC <rafi.kavungal@iternity.com>
+P: Sunny Kumar <sunkumar@redhat.com>
+S: Maintained
+F: xlators/mgmt/glusterd/src/glusterd-snap*
+F: extras/snap-scheduler.py
+
+Socket subsystem
+P: Krutika Dhananjay <kdhananj@redhat.com>
+P: Milind Changire <mchangir@redhat.com>
+P: Mohammed Rafi KC <rafi.kavungal@iternity.com>
+P: Mohit Agrawal <moagrawa@redhat.com>
+S: Maintained
+F: rpc/rpc-transport/socket/
+
+Testing - .t framework
+M: Raghavendra Talur <rtalur@redhat.com>
+S: Maintained
+F: tests/
+
+Utilities
+M: Aravinda VK <avishwan@redhat.com>
+P: Niels de Vos <ndevos@redhat.com>
+P: Raghavendra Talur <rtalur@redhat.com>
+P: Sachidanda Urs <surs@redhat.com>
+S: Maintained
+F: extras/
+
+Events APIs
+M: Aravinda VK <avishwan@redhat.com>
+S: Maintained
+F: events/
+F: libglusterfs/src/events*
+F: libglusterfs/src/eventtypes*
+F: extras/systemd/glustereventsd*
+
+Distribution Specific:
+----------------------
+Build:
+M: Niels de Vos <ndevos@redhat.com>
+M: Hari Gowtham <hgowtham@redhat.com>
+P: Anoop C S <anoopcs@redhat.com>
+P: Raghavendra Talur <rtalur@redhat.com>
+P: Rinku Kothiya <rkothiya@redhat.com>
+S: Maintained
+
+Debian packages on download.gluster.org
+M: packaging@gluster.org
+M: Kaleb Keithley <kkeithle@redhat.com>
+P: Sheetal Pamecha <spamecha@redhat.com>
+P: Shwetha Acharya <sacharya@redhat.com>
+S: Maintained
+W: http://download.gluster.org/pub/gluster/glusterfs/LATEST/Debian/Debian.README
+T: https://github.com/gluster/glusterfs-debian.git
+
+OpenSuSE
+M: packaging@gluster.org
+M: Kaleb Keithley <kkeithle@redhat.com>
+P: Sheetal Pamecha <spamecha@redhat.com>
+P: Shwetha Acharya <sacharya@redhat.com>
+S: Maintained
+W: https://build.opensuse.org/repositories/home:glusterfs
+W: https://download.gluster.org/pub/gluster/glusterfs/LATEST/SuSE/SuSE.README
+T: https://github.com/gluster/glusterfs-suse.git
+
+Packages for the CentOS Storage SIG
+M: centos-devel@centos.org
+M: Niels de Vos <ndevos@redhat.com>
+P: Kaleb Keithley <kkeithle@redhat.com>
+S: Maintained
+W: https://wiki.centos.org/SpecialInterestGroup/Storage/Gluster
+T: https://github.com/CentOS-Storage-SIG/glusterfs.git
+
+Ubuntu PPA
+M: packaging@gluster.org
+M: Kaleb Keithley <kkeithle@redhat.com>
+P: Sheetal Pamecha <spamecha@redhat.com>
+P: Shwetha Acharya <sacharya@redhat.com>
+S: Maintained
+W: https://launchpad.net/~gluster
+W: http://download.gluster.org/pub/gluster/glusterfs/LATEST/Ubuntu/Ubuntu.README
+T: https://github.com/gluster/glusterfs-debian.git
+
+Related projects
+----------------
+Gluster Block
+M: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
+M: Xiubo Li <xiubli@redhat.com>
+S: Maintained
+T: https://github.com/gluster/gluster-block.git
+
+GlusterFS core-utils
+M: Anoop C S <anoopcs@redhat.com>
+S: Maintained
+T: https://github.com/gluster/glusterfs-coreutils.git
+
+NFS-Ganesha FSAL plugin
+M: Jiffin Tony Thottan <jthottan@redhat.com>
+M: Kaleb Keithley <kkeithle@redhat.com>
+M: Soumya Koduri <skoduri@redhat.com>
+S: Maintained
+T: git://github.com/nfs-ganesha/nfs-ganesha.git
+F: src/nfs-ganesha~/src/FSAL/FSAL_GLUSTER/
+
+QEMU integration
+M: Niels de Vos <ndevos@redhat.com>
+M: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
+S: Maintained
+T: git://git.qemu.org/qemu.git
+F: block/gluster.c
+
+Samba VFS plugin
+M: Anoop C S <anoopcs@redhat.com>
+M: Raghavendra Talur <rtalur@redhat.com>
+M: Michael Adam <madam@redhat.com>
+M: Poornima G <pgurusid@redhat.com>
+S: Maintained
+T: git://git.samba.org/samba.git
+F: source3/modules/vfs_glusterfs.c
+
+Storhaug
+M: Jose A. Rivera <jarrpa@redhat.com>
+P: Kaleb Keithley <kkeithle@redhat.com>
+S: Maintained
+T: https://github.com/linux-ha-storage/storhaug.git
+
+Testing - Glusto-Tests
+M: Jonathan Holloway <jholloway@redhat.com>
+M: Vijay Bhaskar Reddy Avuthu <vavuthu@redhat.com>
+M: Akarsha Rai <akrai@redhat.com>
+S: Maintained
+T: https://github.com/gluster/glusto-tests.git
+
+Wireshark dissectors
+M: Niels de Vos <ndevos@redhat.com>
+S: Maintained
+W: https://forge.gluster.org/wireshark
+T: http://code.wireshark.org/git/wireshark
+F: epan/dissectors/packet-gluster*
+
+Infrastructure
+--------------
+
+Platform
+M: Michael Scherer <misc@redhat.com>
+P: Shyamsundar Ranganathan <srangana@redhat.com>
+P: Amar Tumballi <amarts@gmail.com>
+
+Continuous Integration
+M: Michael Scherer <misc@redhat.com>
+M: Deepshikha Khandelwal <dkhandel@redhat.com>
+P: Niels de Vos <ndevos@redhat.com>
+
+Special Thanks
+--------------
+
+GlusterFS would not be possible without the contributions of:
+
+
+M: Vijay Bellur <vbellur@redhat.com>
+M: Jeff Darcy <jeff@pl.atyp.us>
+M: Shreyas Siravara <sshreyas@fb.com>
+M: Kaushal M <kaushal@redhat.com>
+M: Nigel Babu
+M: Prashanth Pai
+P: Sanoj Unnikrishnan
+P: Milind Changire <mchangir@redhat.com>
+P: Sunil Kumar Acharya <sheggodu@redhat.com>
+M: Samikshan Bairagya <samikshan@gmail.com>
+M: Chris Hertel
+M: M. Mohan Kumar <mohan@in.ibm.com>
+M: Shishir Gowda <gowda.shishir@gmail.com>
+M: Brian Foster <bfoster@redhat.com>
+M: Anand Avati <avati@cs.stanford.edu>
+M: Dennis Schafroth <dennis@schafroth.com>
+M: Harshavardhana <harsha@harshavardhana.net>
+M: Krishnan Parthasarathi
+M: Justin Clift <justin@gluster.org>
+M: Venky Shankar <vshankar@redhat.com>
+M: Shravan Chandrashekar <shravantc99@gmail.com>
+M: Joseph Fernandes
+M: Vijaikumar Mallikarjuna
+M: Anand Subramanian
+M: Bharata B Rao <bharata@linux.vnet.ibm.com>
+M: Rajesh Joseph
+M: Dan Lambright
+M: Jay Vyas
+M: Luis Pabon
+M: Ira Cooper
+M: Shwetha Panduranga
+M: Nithya Balachandran
+M: Raghavendra Gowdappa
diff --git a/Makefile.am b/Makefile.am
index 6693bb8767a..98ea5c1038d 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -1,14 +1,55 @@
-EXTRA_DIST = autogen.sh COPYING-GPLV2 COPYING-LGPLV3 INSTALL README AUTHORS THANKS NEWS glusterfs.spec
+SOURCES = site.h
-SUBDIRS = argp-standalone libglusterfs rpc xlators glusterfsd $(FUSERMOUNT_SUBDIR) doc extras cli
+EXTRA_DIST = autogen.sh \
+ COPYING-GPLV2 COPYING-LGPLV3 COMMITMENT \
+ INSTALL README.md AUTHORS THANKS NEWS \
+ glusterfs.spec glusterfs-api.pc.in libgfchangelog.pc.in \
+ run-tests.sh \
+ build-aux/pkg-version \
+ contrib/umountd \
+ $(shell find $(top_srcdir)/tests -type f -print)
-CLEANFILES =
+
+SUBDIRS = $(ARGP_STANDALONE_DIR) libglusterfs rpc libglusterd api \
+ glusterfsd xlators $(FUSERMOUNT_SUBDIR) doc extras cli heal \
+ @SYNCDAEMON_SUBDIR@ @UMOUNTD_SUBDIR@ tools events
+
+pkgconfigdir = @pkgconfigdir@
+pkgconfig_DATA = glusterfs-api.pc libgfchangelog.pc
+
+CLEANFILES = glusterfs-api.pc libgfchangelog.pc contrib/umountd/Makefile
+
+clean-local:
+ find . -name '*.o' -o -name '*.lo' -o -name '.Po' | xargs rm -f
gitclean: distclean
find . -name Makefile.in -exec rm -f {} \;
- find . -name Makefile -exec rm -f {} \;
find . -name mount.glusterfs -exec rm -f {} \;
+ find . -name .deps -o -name .libs | xargs rm -rf
rm -fr autom4te.cache
rm -f missing aclocal.m4 config.h.in config.guess config.sub ltmain.sh install-sh configure depcomp
- rm -fr argp-standalone/autom4te.cache
- rm -f argp-standalone/aclocal.m4 argp-standalone/config.h.in argp-standalone/configure argp-standalone/depcomp argp-standalone/install-sh argp-standalone/missing
+
+# dist-hook gets executed with 'make dist', this is the only target getting
+# executed, a dist-hook in other Makefile.am files seem to get ignored.
+dist-hook: gen-VERSION gen-ChangeLog
+ -rm -fr $(distdir)/contrib/umountd/.deps
+ -rm -f $(distdir)/events/src/eventtypes.py
+ -rm -f $(distdir)/tests/env.rc
+ -cp -f $(top_srcdir)/build-aux/config.sub.dist $(distdir)/config.sub
+ -cp -f $(top_srcdir)/build-aux/config.guess.dist $(distdir)/config.guess
+
+.PHONY: gen-VERSION gen-ChangeLog clang-check
+
+clang-check:
+ @$(top_srcdir)/extras/clang-checker.sh
+
+gen-ChangeLog:
+ (cd $(srcdir) && git diff && echo ===== git log ==== && git log) > $(distdir)/ChangeLog
+
+.PHONY : gen-VERSION
+gen-VERSION:
+ if test -d $(top_srcdir)/.git; then \
+ cd $(top_srcdir); \
+ ./build-aux/pkg-version --full \
+ > $(abs_top_builddir)/$(distdir)/VERSION; \
+ fi
diff --git a/README b/README
deleted file mode 100644
index ce648743ee9..00000000000
--- a/README
+++ /dev/null
@@ -1 +0,0 @@
-For more info, please visit http://www.gluster.org/.
diff --git a/README.md b/README.md
new file mode 100644
index 00000000000..9d68e033782
--- /dev/null
+++ b/README.md
@@ -0,0 +1,46 @@
+# Gluster
+ Gluster is a software defined distributed storage that can scale to several
+ petabytes. It provides interfaces for object, block and file storage.
+
+## Development
+ The development workflow is documented in [Contributors guide](CONTRIBUTING.md)
+
+## Documentation
+ The Gluster documentation can be found at [Gluster Docs](http://docs.gluster.org).
+
+## Deployment
+ Quick instructions to build and install can be found in [INSTALL](INSTALL) file.
+
+## Testing
+
+ GlusterFS source contains some functional tests under `tests/` directory. All
+ these tests are run against every patch submitted for review. If you want your
+ patch to be tested, please add a `.t` test file as part of your patch submission.
+ You can also submit a patch to only add a `.t` file for the test case you are
+ aware of.
+
+ To run these tests, on your test-machine, just run `./run-tests.sh`. Don't run
+ this on a machine where you have 'production' glusterfs is running, as it would
+ blindly kill all gluster processes in each runs.
+
+ If you are sending a patch, and want to validate one or few specific tests, then
+ run a single test by running the below command.
+
+```
+ bash# /bin/bash ${path_to_gluster}/tests/basic/rpc-coverage.t
+```
+
+ You can also use `prove` tool if available in your machine, as follows.
+
+```
+ bash# prove -vmfe '/bin/bash' ${path_to_gluster}/tests/basic/rpc-coverage.t
+```
+
+
+## Maintainers
+ The list of Gluster maintainers is available in [MAINTAINERS](MAINTAINERS) file.
+
+## License
+ Gluster is dual licensed under [GPLV2](COPYING-GPLV2) and [LGPLV3+](COPYING-LGPLV3).
+
+ Please visit the [Gluster Home Page](http://www.gluster.org/) to find out more about Gluster.
diff --git a/THANKS b/THANKS
index e1ce5105d79..d056d00bb72 100644
--- a/THANKS
+++ b/THANKS
@@ -1 +1 @@
-For all of you, who use the product and help us making it more robust, useful, popular..
+For all of you, who use the product and help us make it more robust, useful, and popular.
diff --git a/api/Makefile.am b/api/Makefile.am
new file mode 100644
index 00000000000..f0ad1ee971c
--- /dev/null
+++ b/api/Makefile.am
@@ -0,0 +1 @@
+SUBDIRS = src examples
diff --git a/api/examples/Makefile.am b/api/examples/Makefile.am
new file mode 100644
index 00000000000..7112c81d6a7
--- /dev/null
+++ b/api/examples/Makefile.am
@@ -0,0 +1,6 @@
+# The bits needed for glfsxmp
+EXTRA_PROGRAMS = glfsxmp
+glfsxmp_SOURCES = glfsxmp.c
+glfsxmp_CFLAGS = $(GLFS_CFLAGS) -Wall
+glfsxmp_LDADD = $(GLFS_LIBS) -lrt
+
diff --git a/api/examples/README b/api/examples/README
new file mode 100644
index 00000000000..4d2b521f779
--- /dev/null
+++ b/api/examples/README
@@ -0,0 +1,36 @@
+This is an example application which uses libgfapi. It is
+a complete autotools based build system which demonstrates the
+required changes in configure.ac, Makefile.am etc to successfuly
+detect for and build an application against libgfapi.
+
+There are two approaches to building a libgfapi based application:
+
+1. In the presence of pkg-config in your build system.
+This is the recommended approach which is also used in this example.
+For this approach to work, you need to build glusterfs by passing
+--pkgconfigdir=/usr/lib64/pkgconfig (or the appropriate directory)
+in your distro. This already happens if you build RPMs with the
+glusterfs.spec provided in glusterfs.git. You will also need to
+install glusterfs-api RPM.
+
+2. In the absence of pkg-config in your build system.
+Make sure your LDFLAGS includes -L/path/to/lib where libgfapi.so is
+installed and -I/path/to/include/glusterfs where the 'api' directory
+containing the headers are available.
+
+glfsxmp.c
+=========
+
+glfsxmp.c is an example application which uses libgfapi
+
+Compilation Steps For glfsxmp.c
+===============================
+
+1. $./autogen.sh
+2. $./configure
+
+Note: Before running ./configure , as mentioned above, you need to
+ take care of #1 or #2 i.e. pkg-config path or LDFLAGS and
+ -I/<path> with correct values.
+
+3. $make glfsxmp
diff --git a/api/examples/autogen.sh b/api/examples/autogen.sh
new file mode 100755
index 00000000000..1fee6be11bb
--- /dev/null
+++ b/api/examples/autogen.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+aclocal
+autoconf
+automake --foreign
diff --git a/api/examples/configure.ac b/api/examples/configure.ac
new file mode 100644
index 00000000000..b80177a4e87
--- /dev/null
+++ b/api/examples/configure.ac
@@ -0,0 +1,12 @@
+
+AC_INIT([glfs-test],[0.1],[gluster-devel@nongu.org])
+
+AM_INIT_AUTOMAKE
+
+AC_CONFIG_FILES([Makefile])
+
+AC_PROG_CC
+
+PKG_CHECK_MODULES([GLFS], [glusterfs-api >= 3])
+
+AC_OUTPUT
diff --git a/api/examples/getvolfile.py b/api/examples/getvolfile.py
new file mode 100755
index 00000000000..3b2c8ab5a15
--- /dev/null
+++ b/api/examples/getvolfile.py
@@ -0,0 +1,45 @@
+#!/usr/bin/python3
+
+from __future__ import print_function
+import ctypes
+import ctypes.util
+
+api = ctypes.CDLL("libgfapi.so")
+api.glfs_get_volfile.argtypes = [ctypes.c_void_p,
+ ctypes.c_void_p,
+ ctypes.c_ulong]
+api.glfs_get_volfile.restype = ctypes.c_long
+
+
+def get_volfile(host, volume):
+ # This is set to a large value to exercise the "buffer not big enough"
+ # path. More realistically, you'd just start with a huge buffer.
+ BUF_LEN = 0
+ fs = api.glfs_new(volume)
+ # api.glfs_set_logging(fs,"/dev/stderr",7)
+ api.glfs_set_volfile_server(fs, "tcp", host, 24007)
+ api.glfs_init(fs)
+ vbuf = ctypes.create_string_buffer(BUF_LEN)
+ vlen = api.glfs_get_volfile(fs, vbuf, BUF_LEN)
+ if vlen < 0:
+ vlen = BUF_LEN - vlen
+ vbuf = ctypes.create_string_buffer(vlen)
+ vlen = api.glfs_get_volfile(fs, vbuf, vlen)
+ api.glfs_fini(fs)
+ if vlen <= 0:
+ return vlen
+ return vbuf.value[:vlen]
+
+if __name__ == "__main__":
+ import sys
+
+ try:
+ res = get_volfile(*sys.argv[1:3])
+ except:
+ print("fetching volfile failed (volume not started?)")
+
+ try:
+ for line in res.split('\n'):
+ print(line)
+ except:
+ print("bad return value %s" % res)
diff --git a/api/examples/glfsxmp.c b/api/examples/glfsxmp.c
new file mode 100644
index 00000000000..a55616ef739
--- /dev/null
+++ b/api/examples/glfsxmp.c
@@ -0,0 +1,1811 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+#include <string.h>
+#include <time.h>
+
+#define TEST_STR_LEN 2048
+
+int
+test_dirops(glfs_t *fs)
+{
+ glfs_fd_t *fd = NULL;
+ char buf[512];
+ struct dirent *entry = NULL;
+
+ fd = glfs_opendir(fs, "/");
+ if (!fd) {
+ fprintf(stderr, "/: %s\n", strerror(errno));
+ return -1;
+ }
+
+ fprintf(stderr, "Entries:\n");
+ while (glfs_readdir_r(fd, (struct dirent *)buf, &entry), entry) {
+ fprintf(stderr, "%s: %lu\n", entry->d_name, glfs_telldir(fd));
+ }
+
+ glfs_closedir(fd);
+ return 0;
+}
+
+int
+test_xattr(glfs_t *fs)
+{
+ char *filename = "/filename2";
+ char *linkfile = "/linkfile";
+ glfs_fd_t *fd = NULL;
+ char buf[512];
+ char *ptr;
+ int ret;
+
+ ret = glfs_setxattr(fs, filename, "user.testkey", "testval", 8, 0);
+ fprintf(stderr, "setxattr(%s): %d (%s)\n", filename, ret, strerror(errno));
+
+ ret = glfs_setxattr(fs, filename, "user.testkey2", "testval", 8, 0);
+ fprintf(stderr, "setxattr(%s): %d (%s)\n", filename, ret, strerror(errno));
+
+ ret = glfs_getxattr(fs, filename, "user.testkey", buf, 512);
+ fprintf(stderr, "getxattr(%s): %d (%s)\n", filename, ret, strerror(errno));
+ if (ret < 0)
+ return -1;
+
+ ret = glfs_listxattr(fs, filename, buf, 512);
+ fprintf(stderr, "listxattr(%s): %d (%s)\n", filename, ret, strerror(errno));
+ if (ret < 0)
+ return -1;
+
+ ret = glfs_symlink(fs, "filename", linkfile);
+ fprintf(stderr, "symlink(%s %s): %s\n", filename, linkfile,
+ strerror(errno));
+ if (ret < 0)
+ return -1;
+
+ ret = glfs_readlink(fs, linkfile, buf, 512);
+ fprintf(stderr, "readlink(%s) : %d (%s)\n", filename, ret, strerror(errno));
+ if (ret < 0)
+ return -1;
+
+ ret = glfs_lsetxattr(fs, filename, "user.testkey3", "testval", 8, 0);
+ fprintf(stderr, "lsetxattr(%s) : %d (%s)\n", linkfile, ret,
+ strerror(errno));
+ if (ret < 0)
+ return -1;
+
+ ret = glfs_llistxattr(fs, linkfile, buf, 512);
+ fprintf(stderr, "llistxattr(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+ if (ret < 0)
+ return -1;
+
+ ret = glfs_lgetxattr(fs, filename, "user.testkey3", buf, 512);
+ fprintf(stderr, "lgetxattr(%s): %d (%s)\n", linkfile, ret, strerror(errno));
+ if (ret < 0)
+ return -1;
+
+ for (ptr = buf; ptr < buf + ret; ptr++) {
+ printf("key=%s\n", ptr);
+ ptr += strlen(ptr);
+ }
+
+ ret = glfs_removexattr(fs, filename, "user.testkey2");
+ fprintf(stderr, "removexattr(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+
+ fd = glfs_open(fs, filename, O_RDWR);
+ fprintf(stderr, "open(%s): (%p) %s\n", filename, fd, strerror(errno));
+
+ ret = glfs_fsetxattr(fd, "user.testkey2", "testval", 8, 0);
+ fprintf(stderr, "fsetxattr(%s): %d (%s)\n", filename, ret, strerror(errno));
+
+ ret = glfs_fgetxattr(fd, "user.testkey2", buf, 512);
+ fprintf(stderr, "fgetxattr(%s): %d (%s)\n", filename, ret, strerror(errno));
+
+ ret = glfs_flistxattr(fd, buf, 512);
+ fprintf(stderr, "flistxattr(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+ if (ret < 0)
+ return -1;
+
+ for (ptr = buf; ptr < buf + ret; ptr++) {
+ printf("key=%s\n", ptr);
+ ptr += strlen(ptr);
+ }
+
+ ret = glfs_fremovexattr(fd, "user.testkey2");
+ fprintf(stderr, "fremovexattr(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+
+ glfs_close(fd);
+
+ return 0;
+}
+
+int
+test_chdir(glfs_t *fs)
+{
+ int ret = -1;
+ char *dir = "/dir";
+ char *topdir = "/topdir";
+ char *linkdir = "/linkdir";
+ char *linkdir2 = "/linkdir2";
+ char *subdir = "./subdir";
+ char *respath = NULL;
+ char pathbuf[4096];
+
+ ret = glfs_mkdir(fs, topdir, 0755);
+ fprintf(stderr, "mkdir(%s): %s\n", topdir, strerror(errno));
+ if (ret)
+ return -1;
+
+ ret = glfs_mkdir(fs, dir, 0755);
+ fprintf(stderr, "mkdir(%s): %s\n", dir, strerror(errno));
+ if (ret)
+ return -1;
+
+ respath = glfs_getcwd(fs, pathbuf, 4096);
+ fprintf(stdout, "getcwd() = %s\n", respath);
+
+ ret = glfs_symlink(fs, "topdir", linkdir);
+ if (ret) {
+ fprintf(stderr, "symlink(%s, %s): %s\n", topdir, linkdir,
+ strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_chdir(fs, linkdir);
+ if (ret) {
+ fprintf(stderr, "chdir(%s): %s\n", linkdir, strerror(errno));
+ return -1;
+ }
+
+ respath = glfs_getcwd(fs, pathbuf, 4096);
+ fprintf(stdout, "getcwd() = %s\n", respath);
+
+ respath = glfs_realpath(fs, subdir, pathbuf);
+ if (respath) {
+ fprintf(stderr, "realpath(%s) worked unexpectedly: %s\n", subdir,
+ respath);
+ return -1;
+ }
+
+ ret = glfs_mkdir(fs, subdir, 0755);
+ if (ret) {
+ fprintf(stderr, "mkdir(%s): %s\n", subdir, strerror(errno));
+ return -1;
+ }
+
+ respath = glfs_realpath(fs, subdir, pathbuf);
+ if (!respath) {
+ fprintf(stderr, "realpath(%s): %s\n", subdir, strerror(errno));
+ } else {
+ fprintf(stdout, "realpath(%s) = %s\n", subdir, respath);
+ }
+
+ ret = glfs_chdir(fs, subdir);
+ if (ret) {
+ fprintf(stderr, "chdir(%s): %s\n", subdir, strerror(errno));
+ return -1;
+ }
+
+ respath = glfs_getcwd(fs, pathbuf, 4096);
+ fprintf(stdout, "getcwd() = %s\n", respath);
+
+ respath = glfs_realpath(fs, "/linkdir/subdir", pathbuf);
+ if (!respath) {
+ fprintf(stderr, "realpath(/linkdir/subdir): %s\n", strerror(errno));
+ } else {
+ fprintf(stdout, "realpath(/linkdir/subdir) = %s\n", respath);
+ }
+
+ return 0;
+}
+
+#ifdef DEBUG
+static void
+peek_stat(struct stat *sb)
+{
+ printf("Dumping stat information:\n");
+ printf("File type: ");
+
+ switch (sb->st_mode & S_IFMT) {
+ case S_IFBLK:
+ printf("block device\n");
+ break;
+ case S_IFCHR:
+ printf("character device\n");
+ break;
+ case S_IFDIR:
+ printf("directory\n");
+ break;
+ case S_IFIFO:
+ printf("FIFO/pipe\n");
+ break;
+ case S_IFLNK:
+ printf("symlink\n");
+ break;
+ case S_IFREG:
+ printf("regular file\n");
+ break;
+ case S_IFSOCK:
+ printf("socket\n");
+ break;
+ default:
+ printf("unknown?\n");
+ break;
+ }
+
+ printf("I-node number: %ld\n", (long)sb->st_ino);
+
+ printf("Mode: %lo (octal)\n",
+ (unsigned long)sb->st_mode);
+
+ printf("Link count: %ld\n", (long)sb->st_nlink);
+ printf("Ownership: UID=%ld GID=%ld\n", (long)sb->st_uid,
+ (long)sb->st_gid);
+
+ printf("Preferred I/O block size: %ld bytes\n", (long)sb->st_blksize);
+ printf("File size: %lld bytes\n", (long long)sb->st_size);
+ printf("Blocks allocated: %lld\n", (long long)sb->st_blocks);
+
+ printf("Last status change: %s", ctime(&sb->st_ctime));
+ printf("Last file access: %s", ctime(&sb->st_atime));
+ printf("Last file modification: %s", ctime(&sb->st_mtime));
+
+ return;
+}
+
+static void
+peek_handle(unsigned char *glid)
+{
+ int i;
+
+ for (i = 0; i < GFAPI_HANDLE_LENGTH; i++) {
+ printf(":%02x:", glid[i]);
+ }
+ printf("\n");
+}
+#else /* DEBUG */
+static void
+peek_stat(struct stat *sb)
+{
+ return;
+}
+
+static void
+peek_handle(unsigned char *id)
+{
+ return;
+}
+#endif /* DEBUG */
+
+glfs_t *fs = NULL;
+char *full_parent_name = "/testdir", *parent_name = "testdir";
+
+void
+test_h_unlink(void)
+{
+ char *my_dir = "unlinkdir";
+ char *my_file = "file.txt";
+ char *my_subdir = "dir1";
+ struct glfs_object *parent = NULL, *leaf = NULL, *dir = NULL,
+ *subdir = NULL, *subleaf = NULL;
+ struct stat sb;
+ int ret;
+
+ printf("glfs_h_unlink tests: In Progress\n");
+
+ /* Prepare tests */
+ parent = glfs_h_lookupat(fs, NULL, full_parent_name, &sb, 0);
+ if (parent == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, NULL, strerror(errno));
+ printf("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ dir = glfs_h_mkdir(fs, parent, my_dir, 0755, &sb);
+ if (dir == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ my_dir, parent, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ leaf = glfs_h_creat(fs, dir, my_file, O_CREAT, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf(stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, dir, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ subdir = glfs_h_mkdir(fs, dir, my_subdir, 0755, &sb);
+ if (subdir == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ my_subdir, dir, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ subleaf = glfs_h_creat(fs, subdir, my_file, O_CREAT, 0644, &sb);
+ if (subleaf == NULL) {
+ fprintf(stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, subdir, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ /* unlink non empty directory */
+ ret = glfs_h_unlink(fs, dir, my_subdir);
+ if ((ret && errno != ENOTEMPTY) || (ret == 0)) {
+ fprintf(stderr,
+ "glfs_h_unlink: error unlinking %s: it is non empty: %s\n",
+ my_subdir, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ /* unlink regular file */
+ ret = glfs_h_unlink(fs, subdir, my_file);
+ if (ret) {
+ fprintf(stderr, "glfs_h_unlink: error unlinking %s: from (%p),%s\n",
+ my_file, subdir, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ /* unlink directory */
+ ret = glfs_h_unlink(fs, dir, my_subdir);
+ if (ret) {
+ fprintf(stderr, "glfs_h_unlink: error unlinking %s: from (%p),%s\n",
+ my_subdir, dir, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ /* unlink regular file */
+ ret = glfs_h_unlink(fs, dir, my_file);
+ if (ret) {
+ fprintf(stderr, "glfs_h_unlink: error unlinking %s: from (%p),%s\n",
+ my_file, dir, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ /* unlink non-existent regular file */
+ ret = glfs_h_unlink(fs, dir, my_file);
+ if ((ret && errno != ENOENT) || (ret == 0)) {
+ fprintf(stderr,
+ "glfs_h_unlink: error unlinking non-existent %s: invalid errno "
+ ",%d, %s\n",
+ my_file, ret, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ /* unlink non-existent directory */
+ ret = glfs_h_unlink(fs, dir, my_subdir);
+ if ((ret && errno != ENOENT) || (ret == 0)) {
+ fprintf(stderr,
+ "glfs_h_unlink: error unlinking non-existent %s: invalid "
+ "errno ,%d, %s\n",
+ my_subdir, ret, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ /* unlink directory */
+ ret = glfs_h_unlink(fs, parent, my_dir);
+ if (ret) {
+ fprintf(stderr, "glfs_h_unlink: error unlinking %s: from (%p),%s\n",
+ my_dir, dir, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ printf("glfs_h_unlink tests: PASSED\n");
+
+out:
+ if (dir)
+ glfs_h_close(dir);
+ if (leaf)
+ glfs_h_close(leaf);
+ if (subdir)
+ glfs_h_close(subdir);
+ if (subleaf)
+ glfs_h_close(subleaf);
+ if (parent)
+ glfs_h_close(parent);
+
+ return;
+}
+
+void
+test_h_getsetattrs(void)
+{
+ char *my_dir = "attrdir";
+ char *my_file = "attrfile.txt";
+ struct glfs_object *parent = NULL, *leaf = NULL, *dir = NULL;
+ struct stat sb, retsb;
+ int ret, valid;
+ struct timespec timestamp;
+
+ printf("glfs_h_getattrs and setattrs tests: In Progress\n");
+
+ /* Prepare tests */
+ parent = glfs_h_lookupat(fs, NULL, full_parent_name, &sb, 0);
+ if (parent == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, NULL, strerror(errno));
+ printf("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ dir = glfs_h_mkdir(fs, parent, my_dir, 0755, &sb);
+ if (dir == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ my_dir, parent, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ leaf = glfs_h_creat(fs, dir, my_file, O_CREAT, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf(stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, dir, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ ret = glfs_h_getattrs(fs, dir, &retsb);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_getattrs: error %s: from (%p),%s\n", my_dir,
+ dir, strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&retsb);
+ /* TODO: Compare stat information */
+
+ retsb.st_mode = 00666;
+ retsb.st_uid = 1000;
+ retsb.st_gid = 1001;
+ ret = clock_gettime(CLOCK_REALTIME, &timestamp);
+ if (ret != 0) {
+ fprintf(stderr, "clock_gettime: error %s\n", strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+ retsb.st_atim = timestamp;
+ retsb.st_mtim = timestamp;
+ valid = GFAPI_SET_ATTR_MODE | GFAPI_SET_ATTR_UID | GFAPI_SET_ATTR_GID |
+ GFAPI_SET_ATTR_ATIME | GFAPI_SET_ATTR_MTIME;
+ peek_stat(&retsb);
+
+ ret = glfs_h_setattrs(fs, dir, &retsb, valid);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_setattrs: error %s: from (%p),%s\n", my_dir,
+ dir, strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ memset(&retsb, 0, sizeof(struct stat));
+ ret = glfs_h_stat(fs, dir, &retsb);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_stat: error %s: from (%p),%s\n", my_dir, dir,
+ strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&retsb);
+
+ printf("glfs_h_getattrs and setattrs tests: PASSED\n");
+out:
+ if (parent)
+ glfs_h_close(parent);
+ if (leaf)
+ glfs_h_close(leaf);
+ if (dir)
+ glfs_h_close(dir);
+
+ return;
+}
+
+void
+test_h_truncate(void)
+{
+ char *my_dir = "truncatedir";
+ char *my_file = "file.txt";
+ struct glfs_object *root = NULL, *parent = NULL, *leaf = NULL;
+ struct stat sb;
+ glfs_fd_t *fd = NULL;
+ char buf[32];
+ off_t offset = 0;
+ int ret = 0;
+
+ printf("glfs_h_truncate tests: In Progress\n");
+
+ /* Prepare tests */
+ root = glfs_h_lookupat(fs, NULL, full_parent_name, &sb, 0);
+ if (root == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, NULL, strerror(errno));
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ parent = glfs_h_mkdir(fs, root, my_dir, 0755, &sb);
+ if (parent == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ my_dir, root, strerror(errno));
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ leaf = glfs_h_creat(fs, parent, my_file, O_CREAT, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf(stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, parent, strerror(errno));
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ fd = glfs_h_open(fs, leaf, O_RDWR);
+ if (fd == NULL) {
+ fprintf(stderr, "glfs_h_open: error on open of %s: %s\n", my_file,
+ strerror(errno));
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+
+ memcpy(buf, "abcdefghijklmnopqrstuvwxyz012345", 32);
+ ret = glfs_write(fd, buf, 32, 0);
+
+ /* run tests */
+ /* truncate lower */
+ offset = 30;
+ ret = glfs_h_truncate(fs, leaf, offset);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_truncate: error creating %s: from (%p),%s\n",
+ my_file, parent, strerror(errno));
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ ret = glfs_h_getattrs(fs, leaf, &sb);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_getattrs: error for %s (%p),%s\n", my_file,
+ leaf, strerror(errno));
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ if (sb.st_size != offset) {
+ fprintf(stderr, "glfs_h_truncate: post size mismatch\n");
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+
+ /* truncate higher */
+ offset = 32;
+ ret = glfs_h_truncate(fs, leaf, offset);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_truncate: error creating %s: from (%p),%s\n",
+ my_file, parent, strerror(errno));
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ ret = glfs_h_getattrs(fs, leaf, &sb);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_getattrs: error for %s (%p),%s\n", my_file,
+ leaf, strerror(errno));
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ if (sb.st_size != offset) {
+ fprintf(stderr, "glfs_h_truncate: post size mismatch\n");
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+
+ /* truncate equal */
+ offset = 30;
+ ret = glfs_h_truncate(fs, leaf, offset);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_truncate: error creating %s: from (%p),%s\n",
+ my_file, parent, strerror(errno));
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ ret = glfs_h_getattrs(fs, leaf, &sb);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_getattrs: error for %s (%p),%s\n", my_file,
+ leaf, strerror(errno));
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ if (sb.st_size != offset) {
+ fprintf(stderr, "glfs_h_truncate: post size mismatch\n");
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+
+ printf("glfs_h_truncate tests: PASSED\n");
+out:
+ if (fd)
+ glfs_close(fd);
+ if (root)
+ glfs_h_close(root);
+ if (parent)
+ glfs_h_close(parent);
+ if (leaf)
+ glfs_h_close(leaf);
+
+ return;
+}
+
+void
+test_h_links(void)
+{
+ char *my_dir = "linkdir";
+ char *my_file = "file.txt";
+ char *my_symlnk = "slnk.txt";
+ char *my_lnk = "lnk.txt";
+ char *linksrc_dir = "dir1";
+ char *linktgt_dir = "dir2";
+ struct glfs_object *root = NULL, *parent = NULL, *leaf = NULL,
+ *dirsrc = NULL, *dirtgt = NULL, *dleaf = NULL;
+ struct glfs_object *ln1 = NULL;
+ struct stat sb;
+ int ret;
+ char *buf = NULL;
+
+ printf("glfs_h_link(s) tests: In Progress\n");
+
+ /* Prepare tests */
+ root = glfs_h_lookupat(fs, NULL, full_parent_name, &sb, 0);
+ if (root == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, NULL, strerror(errno));
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ parent = glfs_h_mkdir(fs, root, my_dir, 0755, &sb);
+ if (parent == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ my_dir, root, strerror(errno));
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ leaf = glfs_h_creat(fs, parent, my_file, O_CREAT, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf(stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, parent, strerror(errno));
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ dirsrc = glfs_h_mkdir(fs, parent, linksrc_dir, 0755, &sb);
+ if (dirsrc == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ linksrc_dir, parent, strerror(errno));
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ dirtgt = glfs_h_mkdir(fs, parent, linktgt_dir, 0755, &sb);
+ if (dirtgt == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ linktgt_dir, parent, strerror(errno));
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ dleaf = glfs_h_creat(fs, dirsrc, my_file, O_CREAT, 0644, &sb);
+ if (dleaf == NULL) {
+ fprintf(stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, dirsrc, strerror(errno));
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ /* run tests */
+ /* sym link: /testdir/linkdir/file.txt to ./slnk.txt */
+ ln1 = glfs_h_symlink(fs, parent, my_symlnk, "./file.txt", &sb);
+ if (ln1 == NULL) {
+ fprintf(stderr, "glfs_h_symlink: error creating %s: from (%p),%s\n",
+ my_symlnk, parent, strerror(errno));
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ buf = calloc(1024, sizeof(char));
+ if (buf == NULL) {
+ fprintf(stderr, "Error allocating memory\n");
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+
+ ret = glfs_h_readlink(fs, ln1, buf, 1024);
+ if (ret <= 0) {
+ fprintf(stderr, "glfs_h_readlink: error reading %s: from (%p),%s\n",
+ my_symlnk, ln1, strerror(errno));
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ if (!(strncmp(buf, my_symlnk, strlen(my_symlnk)))) {
+ fprintf(stderr,
+ "glfs_h_readlink: error mismatch in link name: actual %s: "
+ "retrieved %s\n",
+ my_symlnk, buf);
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+
+ /* link: /testdir/linkdir/file.txt to ./lnk.txt */
+ ret = glfs_h_link(fs, leaf, parent, my_lnk);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_link: error creating %s: from (%p),%s\n",
+ my_lnk, parent, strerror(errno));
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ /* TODO: Should write content to a file and read from the link */
+
+ /* link: /testdir/linkdir/dir1/file.txt to ../dir2/slnk.txt */
+ ret = glfs_h_link(fs, dleaf, dirtgt, my_lnk);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_link: error creating %s: from (%p),%s\n",
+ my_lnk, dirtgt, strerror(errno));
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ /* TODO: Should write content to a file and read from the link */
+
+ printf("glfs_h_link(s) tests: PASSED\n");
+
+out:
+ if (root)
+ glfs_h_close(root);
+ if (parent)
+ glfs_h_close(parent);
+ if (leaf)
+ glfs_h_close(leaf);
+ if (dirsrc)
+ glfs_h_close(dirsrc);
+ if (dirtgt)
+ glfs_h_close(dirtgt);
+ if (dleaf)
+ glfs_h_close(dleaf);
+ if (ln1)
+ glfs_h_close(ln1);
+ if (buf)
+ free(buf);
+
+ return;
+}
+
+void
+test_h_rename(void)
+{
+ char *my_dir = "renamedir";
+ char *my_file = "file.txt";
+ char *src_dir = "dir1";
+ char *tgt_dir = "dir2";
+ struct glfs_object *root = NULL, *parent = NULL, *leaf = NULL,
+ *dirsrc = NULL, *dirtgt = NULL, *dleaf = NULL;
+ struct stat sb;
+ int ret;
+
+ printf("glfs_h_rename tests: In Progress\n");
+
+ /* Prepare tests */
+ root = glfs_h_lookupat(fs, NULL, full_parent_name, &sb, 0);
+ if (root == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, NULL, strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ parent = glfs_h_mkdir(fs, root, my_dir, 0755, &sb);
+ if (parent == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ my_dir, root, strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ leaf = glfs_h_creat(fs, parent, my_file, O_CREAT, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf(stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, parent, strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ dirsrc = glfs_h_mkdir(fs, parent, src_dir, 0755, &sb);
+ if (dirsrc == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ src_dir, parent, strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ dirtgt = glfs_h_mkdir(fs, parent, tgt_dir, 0755, &sb);
+ if (dirtgt == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ tgt_dir, parent, strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ dleaf = glfs_h_creat(fs, dirsrc, my_file, O_CREAT, 0644, &sb);
+ if (dleaf == NULL) {
+ fprintf(stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, dirsrc, strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ /* run tests */
+ /* Rename file.txt -> file1.txt */
+ ret = glfs_h_rename(fs, parent, "file.txt", parent, "file1.txt");
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_rename: error renaming %s to %s (%s)\n",
+ "file.txt", "file1.txt", strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+
+ /* rename dir1/file.txt -> file.txt */
+ ret = glfs_h_rename(fs, dirsrc, "file.txt", parent, "file.txt");
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_rename: error renaming %s/%s to %s (%s)\n",
+ src_dir, "file.txt", "file.txt", strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+
+ /* rename file1.txt -> file.txt (exists) */
+ ret = glfs_h_rename(fs, parent, "file1.txt", parent, "file.txt");
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_rename: error renaming %s to %s (%s)\n",
+ "file.txt", "file.txt", strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+
+ /* rename dir1 -> dir3 */
+ ret = glfs_h_rename(fs, parent, "dir1", parent, "dir3");
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_rename: error renaming %s to %s (%s)\n", "dir1",
+ "dir3", strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+
+ /* rename dir2 ->dir3 (exists) */
+ ret = glfs_h_rename(fs, parent, "dir2", parent, "dir3");
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_rename: error renaming %s to %s (%s)\n", "dir2",
+ "dir3", strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+
+ /* rename file.txt -> dir3 (fail) */
+ ret = glfs_h_rename(fs, parent, "file.txt", parent, "dir3");
+ if (ret == 0) {
+ fprintf(stderr, "glfs_h_rename: NO error renaming %s to %s (%s)\n",
+ "file.txt", "dir3", strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+
+ /* rename dir3 -> file.txt (fail) */
+ ret = glfs_h_rename(fs, parent, "dir3", parent, "file.txt");
+ if (ret == 0) {
+ fprintf(stderr, "glfs_h_rename: NO error renaming %s to %s (%s)\n",
+ "dir3", "file.txt", strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+
+ printf("glfs_h_rename tests: PASSED\n");
+
+out:
+ if (root)
+ glfs_h_close(root);
+ if (parent)
+ glfs_h_close(parent);
+ if (leaf)
+ glfs_h_close(leaf);
+ if (dirsrc)
+ glfs_h_close(dirsrc);
+ if (dirtgt)
+ glfs_h_close(dirtgt);
+ if (dleaf)
+ glfs_h_close(dleaf);
+
+ return;
+}
+
+void
+assimilatetime(struct timespec *ts, struct timespec ts_st,
+ struct timespec ts_ed)
+{
+ if ((ts_ed.tv_nsec - ts_st.tv_nsec) < 0) {
+ ts->tv_sec += ts_ed.tv_sec - ts_st.tv_sec - 1;
+ ts->tv_nsec += 1000000000 + ts_ed.tv_nsec - ts_st.tv_nsec;
+ } else {
+ ts->tv_sec += ts_ed.tv_sec - ts_st.tv_sec;
+ ts->tv_nsec += ts_ed.tv_nsec - ts_st.tv_nsec;
+ }
+
+ if (ts->tv_nsec > 1000000000) {
+ ts->tv_nsec = ts->tv_nsec - 1000000000;
+ ts->tv_sec += 1;
+ }
+
+ return;
+}
+
+#define MAX_FILES_CREATE 10
+#define MAXPATHNAME 512
+void
+test_h_performance(void)
+{
+ char *my_dir = "perftest", *full_dir_path = "/testdir/perftest";
+ char *my_file = "file_", my_file_name[MAXPATHNAME];
+ struct glfs_object *parent = NULL, *leaf = NULL, *dir = NULL;
+ struct stat sb;
+ int ret, i;
+ struct glfs_fd *fd;
+ struct timespec c_ts = {0, 0}, c_ts_st, c_ts_ed;
+ struct timespec o_ts = {0, 0}, o_ts_st, o_ts_ed;
+
+ printf("glfs_h_performance tests: In Progress\n");
+
+ /* Prepare tests */
+ parent = glfs_h_lookupat(fs, NULL, full_parent_name, &sb, 0);
+ if (parent == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, NULL, strerror(errno));
+ printf("glfs_h_performance tests: FAILED\n");
+ goto out;
+ }
+
+ dir = glfs_h_mkdir(fs, parent, my_dir, 0755, &sb);
+ if (dir == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ my_dir, parent, strerror(errno));
+ printf("glfs_h_performance tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ /* create performance */
+ ret = clock_gettime(CLOCK_REALTIME, &o_ts_st);
+ if (ret != 0) {
+ fprintf(stderr, "clock_gettime: error %s\n", strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ for (i = 0; i < MAX_FILES_CREATE; i++) {
+ sprintf(my_file_name, "%s%d", my_file, i);
+
+ ret = clock_gettime(CLOCK_REALTIME, &c_ts_st);
+ if (ret != 0) {
+ fprintf(stderr, "clock_gettime: error %s\n", strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ leaf = glfs_h_lookupat(fs, dir, my_file_name, &sb, 0);
+ if (leaf != NULL) {
+ fprintf(stderr, "glfs_h_lookup: exists %s\n", my_file_name);
+ printf("glfs_h_performance tests: FAILED\n");
+ goto out;
+ }
+
+ leaf = glfs_h_creat(fs, dir, my_file_name, O_CREAT, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf(stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, dir, strerror(errno));
+ printf("glfs_h_performance tests: FAILED\n");
+ goto out;
+ }
+
+ ret = clock_gettime(CLOCK_REALTIME, &c_ts_ed);
+ if (ret != 0) {
+ fprintf(stderr, "clock_gettime: error %s\n", strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ assimilatetime(&c_ts, c_ts_st, c_ts_ed);
+ glfs_h_close(leaf);
+ leaf = NULL;
+ }
+
+ ret = clock_gettime(CLOCK_REALTIME, &o_ts_ed);
+ if (ret != 0) {
+ fprintf(stderr, "clock_gettime: error %s\n", strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ assimilatetime(&o_ts, o_ts_st, o_ts_ed);
+
+ printf("Creation performance (handle based):\n\t# empty files:%d\n",
+ MAX_FILES_CREATE);
+ printf("\tOverall time:\n\t\tSecs:%ld\n\t\tnSecs:%ld\n", o_ts.tv_sec,
+ o_ts.tv_nsec);
+ printf("\tcreate call time time:\n\t\tSecs:%ld\n\t\tnSecs:%ld\n",
+ c_ts.tv_sec, c_ts.tv_nsec);
+
+ /* create using path */
+ c_ts.tv_sec = o_ts.tv_sec = 0;
+ c_ts.tv_nsec = o_ts.tv_nsec = 0;
+
+ sprintf(my_file_name, "%s1", full_dir_path);
+ ret = glfs_mkdir(fs, my_file_name, 0755);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_mkdir: error creating %s: from (%p),%s\n", my_dir,
+ parent, strerror(errno));
+ printf("glfs_h_performance tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ ret = clock_gettime(CLOCK_REALTIME, &o_ts_st);
+ if (ret != 0) {
+ fprintf(stderr, "clock_gettime: error %s\n", strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ for (i = 0; i < MAX_FILES_CREATE; i++) {
+ sprintf(my_file_name, "%s1/%sn%d", full_dir_path, my_file, i);
+
+ ret = clock_gettime(CLOCK_REALTIME, &c_ts_st);
+ if (ret != 0) {
+ fprintf(stderr, "clock_gettime: error %s\n", strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ ret = glfs_stat(fs, my_file_name, &sb);
+ if (ret == 0) {
+ fprintf(stderr, "glfs_stat: exists %s\n", my_file_name);
+ printf("glfs_h_performance tests: FAILED\n");
+ goto out;
+ }
+
+ fd = glfs_creat(fs, my_file_name, O_CREAT, 0644);
+ if (fd == NULL) {
+ fprintf(stderr, "glfs_creat: error creating %s: from (%p),%s\n",
+ my_file, dir, strerror(errno));
+ printf("glfs_h_performance tests: FAILED\n");
+ goto out;
+ }
+
+ ret = clock_gettime(CLOCK_REALTIME, &c_ts_ed);
+ if (ret != 0) {
+ fprintf(stderr, "clock_gettime: error %s\n", strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ assimilatetime(&c_ts, c_ts_st, c_ts_ed);
+ glfs_close(fd);
+ }
+
+ ret = clock_gettime(CLOCK_REALTIME, &o_ts_ed);
+ if (ret != 0) {
+ fprintf(stderr, "clock_gettime: error %s\n", strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ assimilatetime(&o_ts, o_ts_st, o_ts_ed);
+
+ printf("Creation performance (path based):\n\t# empty files:%d\n",
+ MAX_FILES_CREATE);
+ printf("\tOverall time:\n\t\tSecs:%ld\n\t\tnSecs:%ld\n", o_ts.tv_sec,
+ o_ts.tv_nsec);
+ printf("\tcreate call time time:\n\t\tSecs:%ld\n\t\tnSecs:%ld\n",
+ c_ts.tv_sec, c_ts.tv_nsec);
+out:
+ return;
+}
+
+int
+test_handleops(int argc, char *argv[])
+{
+ int ret = 0;
+ glfs_fd_t *fd = NULL;
+ struct stat sb = {
+ 0,
+ };
+ struct glfs_object *root = NULL, *parent = NULL, *leaf = NULL, *tmp = NULL;
+ char readbuf[32], writebuf[32];
+ unsigned char leaf_handle[GFAPI_HANDLE_LENGTH];
+
+ char *full_leaf_name = "/testdir/testfile.txt", *leaf_name = "testfile.txt",
+ *relative_leaf_name = "testdir/testfile.txt";
+ char *leaf_name1 = "testfile1.txt";
+ char *full_newparent_name = "/testdir/dir1", *newparent_name = "dir1";
+ char *full_newnod_name = "/testdir/nod1", *newnod_name = "nod1";
+
+ /* Initialize test area */
+ ret = glfs_mkdir(fs, full_parent_name, 0755);
+ if (ret != 0 && errno != EEXIST) {
+ fprintf(stderr, "%s: (%p) %s\n", full_parent_name, fd, strerror(errno));
+ printf("Test initialization failed on volume %s\n", argv[1]);
+ goto out;
+ } else if (ret != 0) {
+ printf("Found test directory %s to be existing\n", full_parent_name);
+ printf("Cleanup test directory and restart tests\n");
+ goto out;
+ }
+
+ fd = glfs_creat(fs, full_leaf_name, O_CREAT, 0644);
+ if (fd == NULL) {
+ fprintf(stderr, "%s: (%p) %s\n", full_leaf_name, fd, strerror(errno));
+ printf("Test initialization failed on volume %s\n", argv[1]);
+ goto out;
+ }
+ glfs_close(fd);
+
+ printf("Initialized the test area, within volume %s\n", argv[1]);
+
+ /* Handle based APIs test area */
+
+ /* glfs_lookupat test */
+ printf("glfs_h_lookupat tests: In Progress\n");
+ /* start at root of the volume */
+ root = glfs_h_lookupat(fs, NULL, "/", &sb, 0);
+ if (root == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n", "/",
+ NULL, strerror(errno));
+ printf("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ /* lookup a parent within root */
+ parent = glfs_h_lookupat(fs, root, parent_name, &sb, 0);
+ if (parent == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ parent_name, root, strerror(errno));
+ printf("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ /* lookup a leaf/child within the parent */
+ leaf = glfs_h_lookupat(fs, parent, leaf_name, &sb, 0);
+ if (leaf == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ leaf_name, parent, strerror(errno));
+ printf("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ /* reset */
+ glfs_h_close(root);
+ root = NULL;
+ glfs_h_close(leaf);
+ leaf = NULL;
+ glfs_h_close(parent);
+ parent = NULL;
+
+ /* check absolute paths */
+ root = glfs_h_lookupat(fs, NULL, "/", &sb, 0);
+ if (root == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n", "/",
+ NULL, strerror(errno));
+ printf("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ parent = glfs_h_lookupat(fs, NULL, full_parent_name, &sb, 0);
+ if (parent == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, root, strerror(errno));
+ printf("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ leaf = glfs_h_lookupat(fs, NULL, full_leaf_name, &sb, 0);
+ if (leaf == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_leaf_name, parent, strerror(errno));
+ printf("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ /* reset */
+ glfs_h_close(leaf);
+ leaf = NULL;
+
+ /* check multiple component paths */
+ leaf = glfs_h_lookupat(fs, root, relative_leaf_name, &sb, 0);
+ if (leaf == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ relative_leaf_name, parent, strerror(errno));
+ goto out;
+ }
+ peek_stat(&sb);
+
+ /* reset */
+ glfs_h_close(root);
+ root = NULL;
+ glfs_h_close(parent);
+ parent = NULL;
+
+ /* check symlinks in path */
+
+ /* TODO: -ve test cases */
+ /* parent invalid
+ * path invalid
+ * path does not exist after some components
+ * no parent, but relative path
+ * parent and full path? -ve?
+ */
+
+ printf("glfs_h_lookupat tests: PASSED\n");
+
+ /* glfs_openat test */
+ printf("glfs_h_open tests: In Progress\n");
+ fd = glfs_h_open(fs, leaf, O_RDWR);
+ if (fd == NULL) {
+ fprintf(stderr, "glfs_h_open: error on open of %s: %s\n",
+ full_leaf_name, strerror(errno));
+ printf("glfs_h_open tests: FAILED\n");
+ goto out;
+ }
+
+ /* test read/write based on fd */
+ memcpy(writebuf, "abcdefghijklmnopqrstuvwxyz012345", 32);
+ ret = glfs_write(fd, writebuf, 32, 0);
+
+ glfs_lseek(fd, 0, SEEK_SET);
+
+ ret = glfs_read(fd, readbuf, 32, 0);
+ if (memcmp(readbuf, writebuf, 32)) {
+ printf("Failed to read what I wrote: %s %s\n", readbuf, writebuf);
+ glfs_close(fd);
+ printf("glfs_h_open tests: FAILED\n");
+ goto out;
+ }
+
+ glfs_h_close(leaf);
+ leaf = NULL;
+ glfs_close(fd);
+
+ printf("glfs_h_open tests: PASSED\n");
+
+ /* Create tests */
+ printf("glfs_h_creat tests: In Progress\n");
+ parent = glfs_h_lookupat(fs, NULL, full_parent_name, &sb, 0);
+ if (parent == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, root, strerror(errno));
+ printf("glfs_h_creat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ leaf = glfs_h_creat(fs, parent, leaf_name1, O_CREAT, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf(stderr, "glfs_h_creat: error on create of %s: from (%p),%s\n",
+ leaf_name1, parent, strerror(errno));
+ printf("glfs_h_creat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ glfs_h_close(leaf);
+ leaf = NULL;
+
+ leaf = glfs_h_creat(fs, parent, leaf_name1, O_CREAT | O_EXCL, 0644, &sb);
+ if (leaf != NULL || errno != EEXIST) {
+ fprintf(stderr,
+ "glfs_h_creat: existing file, leaf = (%p), errno = %s\n", leaf,
+ strerror(errno));
+ printf("glfs_h_creat tests: FAILED\n");
+ if (leaf != NULL) {
+ glfs_h_close(leaf);
+ leaf = NULL;
+ }
+ }
+
+ tmp = glfs_h_creat(fs, root, parent_name, O_CREAT, 0644, &sb);
+ if (tmp != NULL || !(errno == EISDIR || errno == EINVAL)) {
+ fprintf(stderr, "glfs_h_creat: dir create, tmp = (%p), errno = %s\n",
+ leaf, strerror(errno));
+ printf("glfs_h_creat tests: FAILED\n");
+ if (tmp != NULL) {
+ glfs_h_close(tmp);
+ tmp = NULL;
+ }
+ }
+
+ /* TODO: Other combinations and -ve cases as applicable */
+ printf("glfs_h_creat tests: PASSED\n");
+
+ /* extract handle and create from handle test */
+ printf(
+ "glfs_h_extract_handle and glfs_h_create_from_handle tests: In "
+ "Progress\n");
+ /* TODO: Change the lookup to create below for a GIFD recovery failure,
+ * that needs to be fixed */
+ leaf = glfs_h_lookupat(fs, parent, leaf_name1, &sb, 0);
+ if (leaf == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ leaf_name1, parent, strerror(errno));
+ printf("glfs_h_extract_handle tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ ret = glfs_h_extract_handle(leaf, leaf_handle, GFAPI_HANDLE_LENGTH);
+ if (ret < 0) {
+ fprintf(stderr,
+ "glfs_h_extract_handle: error extracting handle of %s: %s\n",
+ full_leaf_name, strerror(errno));
+ printf("glfs_h_extract_handle tests: FAILED\n");
+ goto out;
+ }
+ peek_handle(leaf_handle);
+
+ glfs_h_close(leaf);
+ leaf = NULL;
+
+ leaf = glfs_h_create_from_handle(fs, leaf_handle, GFAPI_HANDLE_LENGTH, &sb);
+ if (leaf == NULL) {
+ fprintf(
+ stderr,
+ "glfs_h_create_from_handle: error on create of %s: from (%p),%s\n",
+ leaf_name1, leaf_handle, strerror(errno));
+ printf("glfs_h_create_from_handle tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ fd = glfs_h_open(fs, leaf, O_RDWR);
+ if (fd == NULL) {
+ fprintf(stderr, "glfs_h_open: error on open of %s: %s\n",
+ full_leaf_name, strerror(errno));
+ printf("glfs_h_create_from_handle tests: FAILED\n");
+ goto out;
+ }
+
+ /* test read/write based on fd */
+ memcpy(writebuf, "abcdefghijklmnopqrstuvwxyz012345", 32);
+ ret = glfs_write(fd, writebuf, 32, 0);
+
+ glfs_lseek(fd, 0, SEEK_SET);
+
+ ret = glfs_read(fd, readbuf, 32, 0);
+ if (memcmp(readbuf, writebuf, 32)) {
+ printf("Failed to read what I wrote: %s %s\n", writebuf, writebuf);
+ printf("glfs_h_create_from_handle tests: FAILED\n");
+ glfs_close(fd);
+ goto out;
+ }
+
+ glfs_close(fd);
+ glfs_h_close(leaf);
+ leaf = NULL;
+ glfs_h_close(parent);
+ parent = NULL;
+
+ printf(
+ "glfs_h_extract_handle and glfs_h_create_from_handle tests: PASSED\n");
+
+ /* Mkdir tests */
+ printf("glfs_h_mkdir tests: In Progress\n");
+
+ ret = glfs_rmdir(fs, full_newparent_name);
+ if (ret && errno != ENOENT) {
+ fprintf(stderr, "glfs_rmdir: Failed for %s: %s\n", full_newparent_name,
+ strerror(errno));
+ printf("glfs_h_mkdir tests: FAILED\n");
+ goto out;
+ }
+
+ parent = glfs_h_lookupat(fs, NULL, full_parent_name, &sb, 0);
+ if (parent == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, root, strerror(errno));
+ printf("glfs_h_mkdir tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ leaf = glfs_h_mkdir(fs, parent, newparent_name, 0755, &sb);
+ if (leaf == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error on mkdir of %s: from (%p),%s\n",
+ newparent_name, parent, strerror(errno));
+ printf("glfs_h_mkdir tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ glfs_h_close(leaf);
+ leaf = NULL;
+
+ leaf = glfs_h_mkdir(fs, parent, newparent_name, 0755, &sb);
+ if (leaf != NULL || errno != EEXIST) {
+ fprintf(stderr,
+ "glfs_h_mkdir: existing directory, leaf = (%p), errno = %s\n",
+ leaf, strerror(errno));
+ printf("glfs_h_mkdir tests: FAILED\n");
+ if (leaf != NULL) {
+ glfs_h_close(leaf);
+ leaf = NULL;
+ }
+ }
+
+ glfs_h_close(parent);
+ parent = NULL;
+
+ printf("glfs_h_mkdir tests: PASSED\n");
+
+ /* Mknod tests */
+ printf("glfs_h_mknod tests: In Progress\n");
+ ret = glfs_unlink(fs, full_newnod_name);
+ if (ret && errno != ENOENT) {
+ fprintf(stderr, "glfs_unlink: Failed for %s: %s\n", full_newnod_name,
+ strerror(errno));
+ printf("glfs_h_mknod tests: FAILED\n");
+ goto out;
+ }
+
+ parent = glfs_h_lookupat(fs, NULL, full_parent_name, &sb, 0);
+ if (parent == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, root, strerror(errno));
+ printf("glfs_h_mknod tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ leaf = glfs_h_mknod(fs, parent, newnod_name, S_IFIFO, 0, &sb);
+ if (leaf == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error on mkdir of %s: from (%p),%s\n",
+ newnod_name, parent, strerror(errno));
+ printf("glfs_h_mknod tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ /* TODO: create op on a FIFO node hangs, need to check and fix
+ tmp = glfs_h_creat (fs, parent, newnod_name, O_CREAT, 0644, &sb);
+ if (tmp != NULL || errno != EINVAL) {
+ fprintf (stderr, "glfs_h_creat: node create, tmp = (%p), errno =
+ %s\n", tmp, strerror (errno)); printf ("glfs_h_creat/mknod tests:
+ FAILED\n"); if (tmp != NULL) { glfs_h_close(tmp); tmp = NULL;
+ }
+ } */
+
+ glfs_h_close(leaf);
+ leaf = NULL;
+
+ leaf = glfs_h_mknod(fs, parent, newnod_name, 0644, 0, &sb);
+ if (leaf != NULL || errno != EEXIST) {
+ fprintf(stderr,
+ "glfs_h_mknod: existing node, leaf = (%p), errno = %s\n", leaf,
+ strerror(errno));
+ printf("glfs_h_mknod tests: FAILED\n");
+ if (leaf != NULL) {
+ glfs_h_close(leaf);
+ leaf = NULL;
+ }
+ }
+
+ glfs_h_close(parent);
+ parent = NULL;
+
+ printf("glfs_h_mknod tests: PASSED\n");
+
+ /* unlink tests */
+ test_h_unlink();
+
+ /* TODO: opendir tests */
+
+ /* getattr tests */
+ test_h_getsetattrs();
+
+ /* TODO: setattr tests */
+
+ /* truncate tests */
+ test_h_truncate();
+
+ /* link tests */
+ test_h_links();
+
+ /* rename tests */
+ test_h_rename();
+
+ /* performance tests */
+ test_h_performance();
+
+ /* END: New APIs test area */
+
+out:
+ /* Cleanup glfs handles */
+ if (root)
+ glfs_h_close(root);
+ if (parent)
+ glfs_h_close(parent);
+ if (leaf)
+ glfs_h_close(leaf);
+
+ return ret;
+}
+
+int
+test_write_apis(glfs_t *fs)
+{
+ /* Add more content here */
+ /* Some apis we can get are */
+ /*
+ 0. glfs_set_xlator_option()
+
+ Read/Write combinations:
+ . glfs_{p,}readv/{p,}writev
+ . glfs_pread/pwrite
+
+ tests/basic/gfapi/gfapi-async-calls-test.c
+ . glfs_read_async/write_async
+ . glfs_pread_async/pwrite_async
+ . glfs_readv_async/writev_async
+ . glfs_preadv_async/pwritev_async
+
+ . ftruncate/ftruncate_async
+ . fsync/fsync_async
+ . fdatasync/fdatasync_async
+
+ */
+
+ glfs_fd_t *fd = NULL;
+ char *filename = "/filename2";
+ int flags = O_RDWR;
+ char *buf = "some bytes!";
+ char writestr[TEST_STR_LEN];
+ struct iovec iov = {&writestr, TEST_STR_LEN};
+ int ret, i;
+
+ for (i = 0; i < TEST_STR_LEN; i++)
+ writestr[i] = 0x11;
+
+ fd = glfs_open(fs, filename, flags);
+ if (!fd)
+ fprintf(stderr, "open(%s): (%p) %s\n", filename, fd, strerror(errno));
+
+ ret = glfs_writev(fd, &iov, 1, flags);
+ if (ret < 0) {
+ fprintf(stderr, "writev(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+ }
+
+ ret = glfs_pwrite(fd, buf, 10, 4, flags, NULL, NULL);
+ if (ret < 0) {
+ fprintf(stderr, "pwrite(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+ }
+
+ ret = glfs_pwritev(fd, &iov, 1, 4, flags);
+ if (ret < 0) {
+ fprintf(stderr, "pwritev(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+ }
+
+ return 0;
+}
+
+int
+test_metadata_ops(glfs_t *fs, glfs_t *fs2)
+{
+ glfs_fd_t *fd = NULL;
+ glfs_fd_t *fd2 = NULL;
+ struct stat sb = {
+ 0,
+ };
+ struct glfs_stat gsb = {
+ 0,
+ };
+ struct statvfs sfs;
+ char readbuf[32];
+ char writebuf[32];
+
+ char *filename = "/filename2";
+ int ret;
+
+ ret = glfs_lstat(fs, filename, &sb);
+ fprintf(stderr, "lstat(%s): (%d) %s\n", filename, ret, strerror(errno));
+
+ fd = glfs_creat(fs, filename, O_RDWR, 0644);
+ fprintf(stderr, "creat(%s): (%p) %s\n", filename, fd, strerror(errno));
+
+ fd2 = glfs_open(fs2, filename, O_RDWR);
+ fprintf(stderr, "open(%s): (%p) %s\n", filename, fd, strerror(errno));
+
+ glfs_lseek(fd2, 0, SEEK_SET);
+
+ ret = glfs_read(fd2, readbuf, 32, 0);
+
+ printf("read %d, %s", ret, readbuf);
+
+ /* get stat */
+ ret = glfs_fstat(fd2, &sb);
+
+ ret = glfs_access(fs, filename, R_OK);
+
+ /* set stat */
+ /* TODO: got some errors, need to fix */
+ /* ret = glfs_fsetattr(fd2, &gsb); */
+
+ glfs_close(fd);
+ glfs_close(fd2);
+
+ filename = "/filename3";
+ ret = glfs_mknod(fs, filename, S_IFIFO, 0);
+ fprintf(stderr, "%s: (%d) %s\n", filename, ret, strerror(errno));
+
+ ret = glfs_lstat(fs, filename, &sb);
+ fprintf(stderr, "%s: (%d) %s\n", filename, ret, strerror(errno));
+
+ ret = glfs_rename(fs, filename, "/filename4");
+ fprintf(stderr, "rename(%s): (%d) %s\n", filename, ret, strerror(errno));
+
+ ret = glfs_unlink(fs, "/filename4");
+ fprintf(stderr, "unlink(%s): (%d) %s\n", "/filename4", ret,
+ strerror(errno));
+
+ filename = "/dirname2";
+ ret = glfs_mkdir(fs, filename, 0);
+ fprintf(stderr, "%s: (%d) %s\n", filename, ret, strerror(errno));
+
+ ret = glfs_lstat(fs, filename, &sb);
+ fprintf(stderr, "lstat(%s): (%d) %s\n", filename, ret, strerror(errno));
+
+ ret = glfs_rmdir(fs, filename);
+ fprintf(stderr, "rmdir(%s): (%d) %s\n", filename, ret, strerror(errno));
+}
+int
+main(int argc, char *argv[])
+{
+ glfs_t *fs2 = NULL;
+ int ret = 0;
+ glfs_fd_t *fd = NULL;
+ glfs_fd_t *fd2 = NULL;
+ struct stat sb = {
+ 0,
+ };
+ struct glfs_stat gsb = {
+ 0,
+ };
+ struct statvfs sfs;
+ char readbuf[32];
+ char writebuf[32];
+
+ char *filename = "/filename2";
+
+ if (argc != 3) {
+ printf("Expect following args\n\t%s <volname> <hostname>\n", argv[0]);
+ return -1;
+ }
+
+ fs = glfs_new(argv[1]);
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL\n");
+ return 1;
+ }
+
+ // ret = glfs_set_volfile (fs, "/tmp/posix.vol");
+
+ ret = glfs_set_volfile_server(fs, "tcp", argv[2], 24007);
+
+ // ret = glfs_set_volfile_server (fs, "unix", "/tmp/gluster.sock", 0);
+
+ ret = glfs_set_logging(fs, "/dev/stderr", 7);
+
+ ret = glfs_init(fs);
+
+ fprintf(stderr, "glfs_init: returned %d\n", ret);
+
+ if (ret)
+ goto out;
+
+ sleep(2);
+
+ fs2 = glfs_new(argv[1]);
+ if (!fs2) {
+ fprintf(stderr, "glfs_new: returned NULL\n");
+ return 1;
+ }
+
+ // ret = glfs_set_volfile (fs2, "/tmp/posix.vol");
+
+ ret = glfs_set_volfile_server(fs2, "tcp", argv[2], 24007);
+
+ ret = glfs_set_logging(fs2, "/dev/stderr", 7);
+
+ ret = glfs_init(fs2);
+
+ fprintf(stderr, "glfs_init: returned %d\n", ret);
+
+ test_metadata_ops(fs, fs2);
+
+ test_dirops(fs);
+
+ test_xattr(fs);
+
+ test_chdir(fs);
+
+ test_handleops(argc, argv);
+ // done
+
+ /* Test some extra apis */
+ test_write_apis(fs);
+
+ glfs_statvfs(fs, "/", &sfs);
+
+ glfs_fini(fs);
+ glfs_fini(fs2);
+
+ ret = 0;
+out:
+ return ret;
+}
diff --git a/api/src/Makefile.am b/api/src/Makefile.am
new file mode 100644
index 00000000000..7f9a7d17b35
--- /dev/null
+++ b/api/src/Makefile.am
@@ -0,0 +1,43 @@
+lib_LTLIBRARIES = libgfapi.la
+noinst_HEADERS = glfs-mem-types.h glfs-internal.h gfapi-messages.h
+libgfapi_HEADERS = glfs.h glfs-handles.h
+libgfapidir = $(includedir)/glusterfs/api
+
+EXTRA_DIST = gfapi.map gfapi.aliases
+
+libgfapi_la_SOURCES = glfs.c glfs-mgmt.c glfs-fops.c glfs-resolve.c \
+ glfs-handleops.c
+libgfapi_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
+ $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la \
+ $(top_builddir)/rpc/xdr/src/libgfxdr.la
+
+libgfapi_la_LDFLAGS = -version-info $(GFAPI_LT_VERSION) $(GF_LDFLAGS) \
+ $(GFAPI_EXTRA_LDFLAGS) $(ACL_LIBS)
+
+libgfapi_la_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/rpc-lib/src \
+ -I$(top_srcdir)/rpc/xdr/src \
+ -I$(top_builddir)/rpc/xdr/src \
+ -DDATADIR=\"$(localstatedir)\" \
+ -D__USE_FILE_OFFSET64 -D__USE_LARGEFILE64
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+xlator_LTLIBRARIES = api.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/mount
+# workaround for broken parallel install support in automake with LTLIBRARIES
+# http://debbugs.gnu.org/cgi/bugreport.cgi?bug=7328
+install_xlatorLTLIBRARIES = install-xlatorLTLIBRARIES
+$(install_xlatorLTLIBRARIES): install-libLTLIBRARIES
+
+api_la_SOURCES = glfs-master.c
+api_la_DEPENDENCIES = libgfapi.la
+api_la_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src \
+ -I$(top_builddir)/rpc/xdr/src
+api_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+#api_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) $(GF_LDFLAGS)
+api_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
+ $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la \
+ $(top_builddir)/rpc/xdr/src/libgfxdr.la \
+ $(top_builddir)/api/src/libgfapi.la
diff --git a/api/src/README.Symbol_Versions b/api/src/README.Symbol_Versions
new file mode 100644
index 00000000000..b6ec95f9311
--- /dev/null
+++ b/api/src/README.Symbol_Versions
@@ -0,0 +1,3 @@
+
+See ../../doc/developer-guide/gfapi-symbol-versions.md
+
diff --git a/api/src/gfapi-messages.h b/api/src/gfapi-messages.h
new file mode 100644
index 00000000000..b9223940416
--- /dev/null
+++ b/api/src/gfapi-messages.h
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2015-2018 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ * */
+
+#ifndef _GFAPI_MESSAGES_H__
+#define _GFAPI_MESSAGES_H__
+
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(API, API_MSG_MEM_ACCT_INIT_FAILED, API_MSG_MASTER_XLATOR_INIT_FAILED,
+ API_MSG_GFAPI_XLATOR_INIT_FAILED, API_MSG_VOLFILE_OPEN_FAILED,
+ API_MSG_VOL_SPEC_FILE_ERROR, API_MSG_GLFS_FSOBJ_NULL,
+ API_MSG_INVALID_ENTRY, API_MSG_FSMUTEX_LOCK_FAILED,
+ API_MSG_COND_WAIT_FAILED, API_MSG_FSMUTEX_UNLOCK_FAILED,
+ API_MSG_INODE_REFRESH_FAILED, API_MSG_GRAPH_CONSTRUCT_FAILED,
+ API_MSG_API_XLATOR_ERROR, API_MSG_XDR_PAYLOAD_FAILED,
+ API_MSG_GET_VOLINFO_CBK_FAILED, API_MSG_FETCH_VOLUUID_FAILED,
+ API_MSG_INSUFF_SIZE, API_MSG_FRAME_CREAT_FAILED,
+ API_MSG_DICT_SET_FAILED, API_MSG_XDR_DECODE_FAILED,
+ API_MSG_GET_VOLFILE_FAILED, API_MSG_WRONG_OPVERSION,
+ API_MSG_DICT_SERIALIZE_FAILED, API_MSG_REMOTE_HOST_CONN_FAILED,
+ API_MSG_VOLFILE_SERVER_EXHAUST, API_MSG_CREATE_RPC_CLIENT_FAILED,
+ API_MSG_REG_NOTIFY_FUNC_FAILED, API_MSG_REG_CBK_FUNC_FAILED,
+ API_MSG_GET_CWD_FAILED, API_MSG_FGETXATTR_FAILED,
+ API_MSG_LOCKINFO_KEY_MISSING, API_MSG_FSETXATTR_FAILED,
+ API_MSG_FSYNC_FAILED, API_MSG_FDCREATE_FAILED,
+ API_MSG_INODE_PATH_FAILED, API_MSG_SYNCOP_OPEN_FAILED,
+ API_MSG_LOCK_MIGRATE_FAILED, API_MSG_OPENFD_SKIPPED,
+ API_MSG_FIRST_LOOKUP_GRAPH_FAILED, API_MSG_CWD_GRAPH_REF_FAILED,
+ API_MSG_SWITCHED_GRAPH, API_MSG_XDR_RESPONSE_DECODE_FAILED,
+ API_MSG_VOLFILE_INFO, API_MSG_VOLFILE_CONNECTING, API_MSG_NEW_GRAPH,
+ API_MSG_ALLOC_FAILED, API_MSG_CREATE_HANDLE_FAILED,
+ API_MSG_INODE_LINK_FAILED, API_MSG_STATEDUMP_FAILED,
+ API_MSG_XREADDIRP_R_FAILED, API_MSG_LOCK_INSERT_MERGE_FAILED,
+ API_MSG_SETTING_LOCK_TYPE_FAILED, API_MSG_INODE_FIND_FAILED,
+ API_MSG_FDCTX_SET_FAILED, API_MSG_UPCALL_SYNCOP_FAILED,
+ API_MSG_INVALID_ARG, API_MSG_UPCALL_EVENT_NULL_RECEIVED,
+ API_MSG_FLAGS_HANDLE, API_MSG_FDCREATE_FAILED_ON_GRAPH,
+ API_MSG_TRANS_RDMA_DEP, API_MSG_TRANS_NOT_SUPPORTED,
+ API_MSG_FS_NOT_INIT, API_MSG_INVALID_SYSRQ,
+ API_MSG_DECODE_XDR_FAILED, API_MSG_NULL, API_MSG_CALL_NOT_SUCCESSFUL,
+ API_MSG_CALL_NOT_VALID, API_MSG_UNABLE_TO_DEL,
+ API_MSG_REMOTE_HOST_DISCONN, API_MSG_HANDLE_NOT_SET);
+
+#define API_MSG_ALLOC_FAILED_STR "Upcall allocation failed"
+#define API_MSG_LOCK_INSERT_MERGE_FAILED_STR \
+ "Lock insertion and splitting/merging failed"
+#define API_MSG_SETTING_LOCK_TYPE_FAILED_STR "Setting lock type failed"
+
+#define API_MSG_INVALID_ARG_STR "Invalid"
+#define API_MSG_INVALID_ENTRY_STR "Upcall entry validation failed"
+#define API_MSG_INODE_FIND_FAILED_STR "Unable to find inode entry"
+#define API_MSG_CREATE_HANDLE_FAILED_STR "handle creation failed"
+#define API_MSG_UPCALL_EVENT_NULL_RECEIVED_STR \
+ "Upcall_EVENT_NULL received. Skipping it"
+#define API_MSG_UPCALL_SYNCOP_FAILED_STR "Synctask for upcall failed"
+#define API_MSG_FDCREATE_FAILED_STR "Allocating anonymous fd failed"
+#define API_MSG_XREADDIRP_R_FAILED_STR "glfs_x_readdirp_r failed"
+#define API_MSG_FDCTX_SET_FAILED_STR "Setting fd ctx failed"
+#define API_MSG_FLAGS_HANDLE_STR "arg not set. Flags handled are"
+#define API_MSG_INODE_REFRESH_FAILED_STR "inode refresh failed"
+#define API_MSG_INODE_LINK_FAILED_STR "inode linking failed"
+#define API_MSG_GET_CWD_FAILED_STR "Failed to get cwd"
+#define API_MSG_FGETXATTR_FAILED_STR "fgetxattr failed"
+#define API_MSG_LOCKINFO_KEY_MISSING_STR "missing lockinfo key"
+#define API_MSG_FSYNC_FAILED_STR "fsync() failed"
+#define API_MSG_FDCREATE_FAILED_ON_GRAPH_STR "fd_create failed on graph"
+#define API_MSG_INODE_PATH_FAILED_STR "inode_path failed"
+#define API_MSG_SYNCOP_OPEN_FAILED_STR "syncop_open failed"
+#define API_MSG_LOCK_MIGRATE_FAILED_STR "lock migration failed on graph"
+#define API_MSG_OPENFD_SKIPPED_STR "skipping openfd in graph"
+#define API_MSG_FIRST_LOOKUP_GRAPH_FAILED_STR "first lookup on graph failed"
+#define API_MSG_CWD_GRAPH_REF_FAILED_STR "cwd refresh of graph failed"
+#define API_MSG_SWITCHED_GRAPH_STR "switched to graph"
+#define API_MSG_FSETXATTR_FAILED_STR "fsetxattr failed"
+#define API_MSG_MEM_ACCT_INIT_FAILED_STR "Memory accounting init failed"
+#define API_MSG_MASTER_XLATOR_INIT_FAILED_STR \
+ "master xlator for initialization failed"
+#define API_MSG_GFAPI_XLATOR_INIT_FAILED_STR \
+ "failed to initialize gfapi translator"
+#define API_MSG_VOLFILE_OPEN_FAILED_STR "volume file open failed"
+#define API_MSG_VOL_SPEC_FILE_ERROR_STR "Cannot reach volume specification file"
+#define API_MSG_TRANS_RDMA_DEP_STR \
+ "transport RDMA is deprecated, falling back to tcp"
+#define API_MSG_TRANS_NOT_SUPPORTED_STR \
+ "transport is not supported, possible values tcp|unix"
+#define API_MSG_GLFS_FSOBJ_NULL_STR "fs is NULL"
+#define API_MSG_FS_NOT_INIT_STR "fs is not properly initialized"
+#define API_MSG_FSMUTEX_LOCK_FAILED_STR \
+ "pthread lock on glfs mutex, returned error"
+#define API_MSG_FSMUTEX_UNLOCK_FAILED_STR \
+ "pthread unlock on glfs mutex, returned error"
+#define API_MSG_COND_WAIT_FAILED_STR "cond wait failed"
+#define API_MSG_INVALID_SYSRQ_STR "not a valid sysrq"
+#define API_MSG_GRAPH_CONSTRUCT_FAILED_STR "failed to construct the graph"
+#define API_MSG_API_XLATOR_ERROR_STR \
+ "api master xlator cannot be specified in volume file"
+#define API_MSG_STATEDUMP_FAILED_STR "statedump failed"
+#define API_MSG_DECODE_XDR_FAILED_STR \
+ "Failed to decode xdr response for GF_CBK_STATEDUMP"
+#define API_MSG_NULL_STR "NULL"
+#define API_MSG_XDR_PAYLOAD_FAILED_STR "failed to create XDR payload"
+#define API_MSG_CALL_NOT_SUCCESSFUL_STR \
+ "GET_VOLUME_INFO RPC call is not successful"
+#define API_MSG_XDR_RESPONSE_DECODE_FAILED_STR \
+ "Failed to decode xdr response for GET_VOLUME_INFO"
+#define API_MSG_CALL_NOT_VALID_STR \
+ "Response received for GET_VOLUME_INFO RPC is not valid"
+#define API_MSG_GET_VOLINFO_CBK_FAILED_STR \
+ "In GET_VOLUME_INFO cbk, received error"
+#define API_MSG_FETCH_VOLUUID_FAILED_STR "Unable to fetch volume UUID"
+#define API_MSG_INSUFF_SIZE_STR "Insufficient size passed"
+#define API_MSG_FRAME_CREAT_FAILED_STR "failed to create the frame"
+#define API_MSG_DICT_SET_FAILED_STR "failed to set"
+#define API_MSG_XDR_DECODE_FAILED_STR "XDR decoding error"
+#define API_MSG_GET_VOLFILE_FAILED_STR "failed to get the volume file"
+#define API_MSG_VOLFILE_INFO_STR "No change in volfile, continuing"
+#define API_MSG_UNABLE_TO_DEL_STR "unable to delete file"
+#define API_MSG_WRONG_OPVERSION_STR \
+ "Server is operating at an op-version which is not supported"
+#define API_MSG_DICT_SERIALIZE_FAILED_STR "Failed to serialize dictionary"
+#define API_MSG_REMOTE_HOST_CONN_FAILED_STR "Failed to connect to remote-host"
+#define API_MSG_REMOTE_HOST_DISCONN_STR "disconnected from remote-host"
+#define API_MSG_VOLFILE_SERVER_EXHAUST_STR "Exhausted all volfile servers"
+#define API_MSG_VOLFILE_CONNECTING_STR "connecting to next volfile server"
+#define API_MSG_CREATE_RPC_CLIENT_FAILED_STR "failed to create rpc clnt"
+#define API_MSG_REG_NOTIFY_FUNC_FAILED_STR "failed to register notify function"
+#define API_MSG_REG_CBK_FUNC_FAILED_STR "failed to register callback function"
+#define API_MSG_NEW_GRAPH_STR "New graph coming up"
+#define API_MSG_HANDLE_NOT_SET_STR "handle not set. Flags handled for xstat are"
+#endif /* !_GFAPI_MESSAGES_H__ */
diff --git a/api/src/gfapi.aliases b/api/src/gfapi.aliases
new file mode 100644
index 00000000000..bc639e6b99f
--- /dev/null
+++ b/api/src/gfapi.aliases
@@ -0,0 +1,201 @@
+
+_priv_glfs_loc_touchup _glfs_loc_touchup$GFAPI_PRIVATE_3.4.0
+_priv_glfs_active_subvol _glfs_active_subvol$GFAPI_PRIVATE_3.4.0
+_priv_glfs_subvol_done _glfs_subvol_done$GFAPI_PRIVATE_3.4.0
+_priv_glfs_init_done _glfs_init_done$GFAPI_PRIVATE_3.4.0
+_priv_glfs_resolve_at _glfs_resolve_at$GFAPI_PRIVATE_3.4.0
+
+_pub_glfs_new _glfs_new$GFAPI_3.4.0
+_pub_glfs_set_volfile _glfs_set_volfile$GFAPI_3.4.0
+_pub_glfs_set_volfile_server _glfs_set_volfile_server$GFAPI_3.4.0
+_pub_glfs_set_logging _glfs_set_logging$GFAPI_3.4.0
+_pub_glfs_init _glfs_init$GFAPI_3.4.0
+_pub_glfs_fini _glfs_fini$GFAPI_3.4.0
+_pub_glfs_open _glfs_open$GFAPI_3.4.0
+_pub_glfs_creat _glfs_creat$GFAPI_3.4.0
+_pub_glfs_close _glfs_close$GFAPI_3.4.0
+_pub_glfs_from_glfd _glfs_from_glfd$GFAPI_3.4.0
+_pub_glfs_set_xlator_option _glfs_set_xlator_option$GFAPI_3.4.0
+_pub_glfs_read _glfs_read$GFAPI_3.4.0
+_pub_glfs_write _glfs_write$GFAPI_3.4.0
+_pub_glfs_readv _glfs_readv$GFAPI_3.4.0
+_pub_glfs_writev _glfs_writev$GFAPI_3.4.0
+_pub_glfs_pread34 _glfs_pread$GFAPI_3.4.0
+_pub_glfs_pwrite34 _glfs_pwrite$GFAPI_3.4.0
+_pub_glfs_pread_async34 _glfs_pread_async$GFAPI_3.4.0
+_pub_glfs_pwrite_async34 _glfs_pwrite_async$GFAPI_3.4.0
+_pub_glfs_preadv _glfs_preadv$GFAPI_3.4.0
+_pub_glfs_pwritev _glfs_pwritev$GFAPI_3.4.0
+_pub_glfs_lseek _glfs_lseek$GFAPI_3.4.0
+_pub_glfs_ftruncate34 _glfs_ftruncate$GFAPI_3.4.0
+_pub_glfs_ftruncate_async34 _glfs_ftruncate_async$GFAPI_3.4.0
+_pub_glfs_lstat _glfs_lstat$GFAPI_3.4.0
+_pub_glfs_stat _glfs_stat$GFAPI_3.4.0
+_pub_glfs_fstat _glfs_fstat$GFAPI_3.4.0
+_pub_glfs_fsync34 _glfs_fsync$GFAPI_3.4.0
+_pub_glfs_fsync_async34 _glfs_fsync_async$GFAPI_3.4.0
+_pub_glfs_fdatasync34 _glfs_fdatasync$GFAPI_3.4.0
+_pub_glfs_fdatasync_async34 _glfs_fdatasync_async$GFAPI_3.4.0
+_pub_glfs_access _glfs_access$GFAPI_3.4.0
+_pub_glfs_symlink _glfs_symlink$GFAPI_3.4.0
+_pub_glfs_readlink _glfs_readlink$GFAPI_3.4.0
+_pub_glfs_mknod _glfs_mknod$GFAPI_3.4.0
+_pub_glfs_mkdir _glfs_mkdir$GFAPI_3.4.0
+_pub_glfs_unlink _glfs_unlink$GFAPI_3.4.0
+_pub_glfs_rmdir _glfs_rmdir$GFAPI_3.4.0
+_pub_glfs_rename _glfs_rename$GFAPI_3.4.0
+_pub_glfs_link _glfs_link$GFAPI_3.4.0
+_pub_glfs_opendir _glfs_opendir$GFAPI_3.4.0
+_pub_glfs_readdir_r _glfs_readdir_r$GFAPI_3.4.0
+_pub_glfs_readdirplus_r _glfs_readdirplus_r$GFAPI_3.4.0
+_pub_glfs_telldir _glfs_telldir$GFAPI_3.4.0
+_pub_glfs_seekdir _glfs_seekdir$GFAPI_3.4.0
+_pub_glfs_closedir _glfs_closedir$GFAPI_3.4.0
+_pub_glfs_statvfs _glfs_statvfs$GFAPI_3.4.0
+_pub_glfs_chmod _glfs_chmod$GFAPI_3.4.0
+_pub_glfs_fchmod _glfs_fchmod$GFAPI_3.4.0
+_pub_glfs_chown _glfs_chown$GFAPI_3.4.0
+_pub_glfs_lchown _glfs_lchown$GFAPI_3.4.0
+_pub_glfs_fchown _glfs_fchown$GFAPI_3.4.0
+_pub_glfs_utimens _glfs_utimens$GFAPI_3.4.0
+_pub_glfs_lutimens _glfs_lutimens$GFAPI_3.4.0
+_pub_glfs_futimens _glfs_futimens$GFAPI_3.4.0
+_pub_glfs_getxattr _glfs_getxattr$GFAPI_3.4.0
+_pub_glfs_lgetxattr _glfs_lgetxattr$GFAPI_3.4.0
+_pub_glfs_fgetxattr _glfs_fgetxattr$GFAPI_3.4.0
+_pub_glfs_listxattr _glfs_listxattr$GFAPI_3.4.0
+_pub_glfs_llistxattr _glfs_llistxattr$GFAPI_3.4.0
+_pub_glfs_flistxattr _glfs_flistxattr$GFAPI_3.4.0
+_pub_glfs_setxattr _glfs_setxattr$GFAPI_3.4.0
+_pub_glfs_lsetxattr _glfs_lsetxattr$GFAPI_3.4.0
+_pub_glfs_fsetxattr _glfs_fsetxattr$GFAPI_3.4.0
+_pub_glfs_removexattr _glfs_removexattr$GFAPI_3.4.0
+_pub_glfs_lremovexattr _glfs_lremovexattr$GFAPI_3.4.0
+_pub_glfs_fremovexattr _glfs_fremovexattr$GFAPI_3.4.0
+_pub_glfs_getcwd _glfs_getcwd$GFAPI_3.4.0
+_pub_glfs_chdir _glfs_chdir$GFAPI_3.4.0
+_pub_glfs_fchdir _glfs_fchdir$GFAPI_3.4.0
+_pub_glfs_realpath34 _glfs_realpath$GFAPI_3.4.0
+_pub_glfs_posix_lock _glfs_posix_lock$GFAPI_3.4.0
+_pub_glfs_dup _glfs_dup$GFAPI_3.4.0
+
+_pub_glfs_setfsuid _glfs_setfsuid$GFAPI_3.4.2
+_pub_glfs_setfsgid _glfs_setfsgid$GFAPI_3.4.2
+_pub_glfs_setfsgroups _glfs_setfsgroups$GFAPI_3.4.2
+_pub_glfs_h_lookupat34 _glfs_h_lookupat$GFAPI_3.4.2
+_pub_glfs_h_creat _glfs_h_creat$GFAPI_3.4.2
+_pub_glfs_h_mkdir _glfs_h_mkdir$GFAPI_3.4.2
+_pub_glfs_h_mknod _glfs_h_mknod$GFAPI_3.4.2
+_pub_glfs_h_symlink _glfs_h_symlink$GFAPI_3.4.2
+_pub_glfs_h_unlink _glfs_h_unlink$GFAPI_3.4.2
+_pub_glfs_h_close _glfs_h_close$GFAPI_3.4.2
+_pub_glfs_h_truncate _glfs_h_truncate$GFAPI_3.4.2
+_pub_glfs_h_stat _glfs_h_stat$GFAPI_3.4.2
+_pub_glfs_h_getattrs _glfs_h_getattrs$GFAPI_3.4.2
+_pub_glfs_h_setattrs _glfs_h_setattrs$GFAPI_3.4.2
+_pub_glfs_h_readlink _glfs_h_readlink$GFAPI_3.4.2
+_pub_glfs_h_link _glfs_h_link$GFAPI_3.4.2
+_pub_glfs_h_rename _glfs_h_rename$GFAPI_3.4.2
+_pub_glfs_h_extract_handle _glfs_h_extract_handle$GFAPI_3.4.2
+_pub_glfs_h_create_from_handle _glfs_h_create_from_handle$GFAPI_3.4.2
+_pub_glfs_h_opendir _glfs_h_opendir$GFAPI_3.4.2
+_pub_glfs_h_open _glfs_h_open$GFAPI_3.4.2
+
+_pub_glfs_get_volumeid _glfs_get_volumeid$GFAPI_3.5.0
+_pub_glfs_readdir _glfs_readdir$GFAPI_3.5.0
+_pub_glfs_readdirplus _glfs_readdirplus$GFAPI_3.5.0
+_pub_glfs_fallocate _glfs_fallocate$GFAPI_3.5.0
+_pub_glfs_discard _glfs_discard$GFAPI_3.5.0
+_pub_glfs_zerofill _glfs_zerofill$GFAPI_3.5.0
+_pub_glfs_caller_specific_init _glfs_caller_specific_init$GFAPI_3.5.0
+_pub_glfs_h_setxattrs _glfs_h_setxattrs$GFAPI_3.5.0
+
+_pub_glfs_unset_volfile_server _glfs_unset_volfile_server$GFAPI_3.5.1
+_pub_glfs_h_getxattrs _glfs_h_getxattrs$GFAPI_3.5.1
+_pub_glfs_h_removexattrs _glfs_h_removexattrs$GFAPI_3.5.1
+
+_pub_glfs_get_volfile _glfs_get_volfile$GFAPI_3.6.0
+_pub_glfs_h_access _glfs_h_access$GFAPI_3.6.0
+
+_pub_glfs_h_poll_upcall370 _glfs_h_poll_upcall$GFAPI_3.7.0
+_pub_glfs_h_acl_set _glfs_h_acl_set$GFAPI_3.7.0
+_pub_glfs_h_acl_get _glfs_h_acl_get$GFAPI_3.7.0
+_pub_glfs_h_statfs _glfs_h_statfs$GFAPI_3.7.0
+_pub_glfs_h_anonymous_read _glfs_h_anonymous_read$GFAPI_3.7.0
+_pub_glfs_h_anonymous_write _glfs_h_anonymous_write$GFAPI_3.7.0
+
+_priv_glfs_free_from_ctx _glfs_free_from_ctx$GFAPI_PRIVATE_3.7.0
+_priv_glfs_new_from_ctx _glfs_new_from_ctx$GFAPI_PRIVATE_3.7.0
+_priv_glfs_resolve _glfs_resolve$GFAPI_PRIVATE_3.7.0
+_priv_glfs_process_upcall_event _glfs_process_upcall_event$GFAPI_PRIVATE_3.7.0
+
+_pub_glfs_h_lookupat _glfs_h_lookupat$GFAPI_3.7.4
+
+_pub_glfs_truncate _glfs_truncate$GFAPI_3.7.15
+
+_pub_glfs_free _glfs_free$GFAPI_3.7.16
+_pub_glfs_h_poll_upcall _glfs_h_poll_upcall$GFAPI_3.7.16
+_pub_glfs_upcall_get_fs _glfs_upcall_get_fs$GFAPI_3.7.16
+_pub_glfs_upcall_get_reason _glfs_upcall_get_reason$GFAPI_3.7.16
+_pub_glfs_upcall_get_event _glfs_upcall_get_event$GFAPI_3.7.16
+_pub_glfs_upcall_inode_get_object _glfs_upcall_inode_get_object$GFAPI_3.7.16
+_pub_glfs_upcall_inode_get_flags _glfs_upcall_inode_get_flags$GFAPI_3.7.16
+_pub_glfs_upcall_inode_get_stat _glfs_upcall_inode_get_stat$GFAPI_3.7.16
+_pub_glfs_upcall_inode_get_expire _glfs_upcall_inode_get_expire$GFAPI_3.7.16
+_pub_glfs_upcall_inode_get_pobject _glfs_upcall_inode_get_pobject$GFAPI_3.7.16
+_pub_glfs_upcall_inode_get_pstat _glfs_upcall_inode_get_pstat$GFAPI_3.7.16
+_pub_glfs_upcall_inode_get_oldpobject _glfs_upcall_inode_get_oldpobject$GFAPI_3.7.16
+_pub_glfs_upcall_inode_get_oldpstat _glfs_upcall_inode_get_oldpstat$GFAPI_3.7.16
+
+_pub_glfs_realpath _glfs_realpath$GFAPI_3.7.17
+
+_pub_glfs_sysrq _glfs_sysrq$GFAPI_3.10.0
+
+_pub_glfs_fd_set_lkowner _glfs_fd_set_lkowner$GFAPI_3.10.7
+
+_pub_glfs_xreaddirplus_r _glfs_xreaddirplus_r$GFAPI_3.11.0
+_pub_glfs_xreaddirplus_r_get_stat _glfs_xreaddirplus_r_get_stat$GFAPI_3.11.0
+_pub_glfs_xreaddirplus_r_get_object _glfs_xreaddirplus_r_get_object$GFAPI_3.11.0
+_pub_glfs_object_copy _glfs_object_copy$GFAPI_3.11.0
+
+_priv_glfs_ipc _glfs_ipc$GFAPI_3.12.0
+
+_pub_glfs_upcall_register _glfs_upcall_register$GFAPI_3.13.0
+_pub_glfs_upcall_unregister _glfs_upcall_unregister$GFAPI_3.13.0
+
+_pub_glfs_setfsleaseid _glfs_setfsleaseid$GFAPI_4.0.0
+_pub_glfs_file_lock _glfs_file_lock$GFAPI_4.0.0
+_pub_glfs_lease _glfs_lease$GFAPI_4.0.0
+_pub_glfs_h_lease _glfs_h_lease$GFAPI_4.0.0
+_pub_glfs_upcall_lease_get_object _glfs_upcall_lease_get_object$GFAPI_4.1.6
+_pub_glfs_upcall_lease_get_lease_type _glfs_upcall_lease_get_lease_type$GFAPI_4.1.6
+
+_priv_glfs_statx _glfs_statx$GFAPI_6.0
+_priv_glfs_iatt_from_statx _glfs_iatt_from_statx$GFAPI_6.0
+_priv_glfs_setfspid _glfs_setfspid$GFAPI_6.1
+
+_pub_glfs_read_async _glfs_read_async$GFAPI_6.0
+_pub_glfs_write_async _glfs_write_async$GFAPI_6.0
+_pub_glfs_readv_async _glfs_readv_async$GFAPI_6.0
+_pub_glfs_writev_async _glfs_writev_async$GFAPI_6.0
+_pub_glfs_pread _glfs_pread$GFAPI_6.0
+_pub_glfs_pwrite _glfs_pwrite$GFAPI_6.0
+_pub_glfs_pread_async _glfs_pread_async$GFAPI_6.0
+_pub_glfs_pwrite_async _glfs_pwrite_async$GFAPI_6.0
+_pub_glfs_preadv_async _glfs_preadv_async$GFAPI_6.0
+_pub_glfs_pwritev_async _glfs_pwritev_async$GFAPI_6.0
+_pub_glfs_fsync _glfs_fsync$GFAPI_6.0
+_pub_glfs_fsync_async _glfs_fsync_async$GFAPI_6.0
+_pub_glfs_fdatasync _glfs_fdatasync$GFAPI_6.0
+_pub_glfs_fdatasync_async _glfs_fdatasync_async$GFAPI_6.0
+_pub_glfs_ftruncate _glfs_ftruncate$GFAPI_6.0
+_pub_glfs_ftruncate_async _glfs_ftruncate_async$GFAPI_6.0
+_pub_glfs_discard_async _glfs_discard_async$GFAPI_6.0
+_pub_glfs_zerofill_async _glfs_zerofill_async$GFAPI_6.0
+_pub_glfs_copy_file_range _glfs_copy_file_range$GFAPI_6.0
+_pub_glfs_fsetattr _glfs_fsetattr$GFAPI_6.0
+_pub_glfs_setattr _glfs_setattr$GFAPI_6.0
+
+_pub_glfs_set_statedump_path _glfs_set_statedump_path@GFAPI_7.0
+
+_pub_glfs_h_creat_open _glfs_h_creat_open@GFAPI_6.6
diff --git a/api/src/gfapi.map b/api/src/gfapi.map
new file mode 100644
index 00000000000..228ac47c084
--- /dev/null
+++ b/api/src/gfapi.map
@@ -0,0 +1,283 @@
+
+GFAPI_PRIVATE_3.4.0 {
+ global:
+ glfs_loc_touchup;
+ glfs_active_subvol;
+ glfs_subvol_done;
+ glfs_init_done;
+ glfs_resolve_at;
+ local: *;
+};
+
+GFAPI_3.4.0 {
+ global:
+ glfs_new;
+ glfs_set_volfile;
+ glfs_set_volfile_server;
+ glfs_set_logging;
+ glfs_init;
+ glfs_fini;
+ glfs_open;
+ glfs_creat;
+ glfs_close;
+ glfs_from_glfd;
+ glfs_set_xlator_option;
+ glfs_read;
+ glfs_write;
+ glfs_read_async;
+ glfs_write_async;
+ glfs_readv;
+ glfs_writev;
+ glfs_readv_async;
+ glfs_writev_async;
+ glfs_pread;
+ glfs_pwrite;
+ glfs_pread_async;
+ glfs_pwrite_async;
+ glfs_preadv;
+ glfs_pwritev;
+ glfs_preadv_async;
+ glfs_pwritev_async;
+ glfs_lseek;
+ glfs_ftruncate;
+ glfs_ftruncate_async;
+ glfs_lstat;
+ glfs_stat;
+ glfs_fstat;
+ glfs_fsync;
+ glfs_fsync_async;
+ glfs_fdatasync;
+ glfs_fdatasync_async;
+ glfs_access;
+ glfs_symlink;
+ glfs_readlink;
+ glfs_mknod;
+ glfs_mkdir;
+ glfs_unlink;
+ glfs_rmdir;
+ glfs_rename;
+ glfs_link;
+ glfs_opendir;
+ glfs_readdir_r;
+ glfs_readdirplus_r;
+ glfs_telldir;
+ glfs_seekdir;
+ glfs_closedir;
+ glfs_statvfs;
+ glfs_chmod;
+ glfs_fchmod;
+ glfs_chown;
+ glfs_lchown;
+ glfs_fchown;
+ glfs_utimens;
+ glfs_lutimens;
+ glfs_futimens;
+ glfs_getxattr;
+ glfs_lgetxattr;
+ glfs_fgetxattr;
+ glfs_listxattr;
+ glfs_llistxattr;
+ glfs_flistxattr;
+ glfs_setxattr;
+ glfs_lsetxattr;
+ glfs_fsetxattr;
+ glfs_removexattr;
+ glfs_lremovexattr;
+ glfs_fremovexattr;
+ glfs_getcwd;
+ glfs_chdir;
+ glfs_fchdir;
+ glfs_realpath;
+ glfs_posix_lock;
+ glfs_dup;
+} GFAPI_PRIVATE_3.4.0;
+
+GFAPI_3.4.2 {
+ global:
+ glfs_setfsuid;
+ glfs_setfsgid;
+ glfs_setfsgroups;
+ glfs_h_creat;
+ glfs_h_mkdir;
+ glfs_h_mknod;
+ glfs_h_symlink;
+ glfs_h_unlink;
+ glfs_h_close;
+ glfs_h_truncate;
+ glfs_h_stat;
+ glfs_h_getattrs;
+ glfs_h_setattrs;
+ glfs_h_readlink;
+ glfs_h_link;
+ glfs_h_rename;
+ glfs_h_extract_handle;
+ glfs_h_create_from_handle;
+ glfs_h_opendir;
+ glfs_h_open;
+ glfs_h_lookupat;
+} GFAPI_3.4.0;
+
+GFAPI_3.5.0 {
+ global:
+ glfs_get_volumeid;
+ glfs_readdir;
+ glfs_readdirplus;
+ glfs_fallocate;
+ glfs_discard;
+ glfs_discard_async;
+ glfs_zerofill;
+ glfs_zerofill_async;
+ glfs_caller_specific_init;
+ glfs_h_setxattrs;
+} GFAPI_3.4.2;
+
+GFAPI_3.5.1 {
+ global:
+ glfs_unset_volfile_server;
+ glfs_h_getxattrs;
+ glfs_h_removexattrs;
+} GFAPI_3.5.0;
+
+GFAPI_3.6.0 {
+ global:
+ glfs_get_volfile;
+ glfs_h_access;
+} GFAPI_3.5.1;
+
+GFAPI_3.7.0 {
+ global:
+ glfs_h_poll_upcall;
+ glfs_h_acl_set;
+ glfs_h_acl_get;
+ glfs_h_statfs;
+ glfs_h_anonymous_read;
+ glfs_h_anonymous_write;
+} GFAPI_3.6.0;
+
+GFAPI_PRIVATE_3.7.0 {
+ global:
+ glfs_free_from_ctx;
+ glfs_new_from_ctx;
+ glfs_resolve;
+ glfs_process_upcall_event;
+} GFAPI_3.7.0;
+
+GFAPI_3.7.4 {
+ global:
+ glfs_h_lookupat;
+} GFAPI_PRIVATE_3.7.0;
+
+GFAPI_3.7.15 {
+ global:
+ glfs_truncate;
+} GFAPI_3.7.4;
+
+GFAPI_3.7.16 {
+ global:
+ glfs_free;
+ glfs_upcall_get_fs;
+ glfs_upcall_get_reason;
+ glfs_upcall_get_event;
+ glfs_upcall_inode_get_object;
+ glfs_upcall_inode_get_flags;
+ glfs_upcall_inode_get_stat;
+ glfs_upcall_inode_get_expire;
+ glfs_upcall_inode_get_pobject;
+ glfs_upcall_inode_get_pstat;
+ glfs_upcall_inode_get_oldpobject;
+ glfs_upcall_inode_get_oldpstat;
+ glfs_h_poll_upcall;
+} GFAPI_3.7.15;
+
+GFAPI_3.7.17 {
+ global:
+ glfs_realpath;
+} GFAPI_3.7.16;
+
+GFAPI_3.10.0 {
+ global:
+ glfs_sysrq;
+} GFAPI_3.7.17;
+
+GFAPI_3.10.7 {
+ global:
+ glfs_fd_set_lkowner;
+} GFAPI_3.10.0;
+
+GFAPI_3.11.0 {
+ glfs_xreaddirplus_r;
+ glfs_xreaddirplus_r_get_stat;
+ glfs_xreaddirplus_r_get_object;
+ glfs_object_copy;
+} GFAPI_3.10.7;
+
+GFAPI_PRIVATE_3.12.0 {
+ global:
+ glfs_ipc;
+} GFAPI_3.11.0;
+
+GFAPI_3.13.0 {
+ global:
+ glfs_upcall_register;
+ glfs_upcall_unregister;
+} GFAPI_PRIVATE_3.12.0;
+
+GFAPI_4.0.0 {
+ global:
+ glfs_setfsleaseid;
+ glfs_file_lock;
+ glfs_lease;
+ glfs_h_lease;
+} GFAPI_3.13.0;
+
+GFAPI_4.1.6 {
+ global:
+ glfs_upcall_lease_get_object;
+ glfs_upcall_lease_get_lease_type;
+} GFAPI_4.0.0;
+
+GFAPI_PRIVATE_6.0 {
+ global:
+ glfs_statx;
+ glfs_iatt_from_statx;
+} GFAPI_4.1.6;
+
+GFAPI_6.0 {
+ global:
+ glfs_read_async;
+ glfs_write_async;
+ glfs_readv_async;
+ glfs_writev_async;
+ glfs_pread;
+ glfs_pwrite;
+ glfs_pread_async;
+ glfs_pwrite_async;
+ glfs_preadv_async;
+ glfs_pwritev_async;
+ glfs_fsync;
+ glfs_fsync_async;
+ glfs_fdatasync;
+ glfs_fdatasync_async;
+ glfs_ftruncate;
+ glfs_ftruncate_async;
+ glfs_discard_async;
+ glfs_zerofill_async;
+ glfs_copy_file_range;
+ glfs_setattr;
+ glfs_fsetattr;
+} GFAPI_PRIVATE_6.0;
+
+GFAPI_PRIVATE_6.1 {
+ global:
+ glfs_setfspid;
+} GFAPI_6.0;
+
+GFAPI_6.6 {
+ global:
+ glfs_h_creat_open;
+} GFAPI_PRIVATE_6.1;
+
+GFAPI_7.0 {
+ global:
+ glfs_set_statedump_path;
+} GFAPI_6.6;
diff --git a/api/src/glfs-fops.c b/api/src/glfs-fops.c
new file mode 100644
index 00000000000..6aa3c5602d1
--- /dev/null
+++ b/api/src/glfs-fops.c
@@ -0,0 +1,6445 @@
+
+/*
+ Copyright (c) 2012-2018 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/* for SEEK_HOLE and SEEK_DATA */
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#include <unistd.h>
+
+#include "glfs-internal.h"
+#include "glfs-mem-types.h"
+#include <glusterfs/syncop.h>
+#include "glfs.h"
+#include "gfapi-messages.h"
+#include <glusterfs/compat-errno.h>
+#include <limits.h>
+#include "glusterfs3.h"
+#include <glusterfs/iatt.h>
+
+#ifdef NAME_MAX
+#define GF_NAME_MAX NAME_MAX
+#else
+#define GF_NAME_MAX 255
+#endif
+
+struct upcall_syncop_args {
+ struct glfs *fs;
+ struct gf_upcall upcall_data;
+};
+
+#define READDIRBUF_SIZE (sizeof(struct dirent) + GF_NAME_MAX + 1)
+
+typedef void (*glfs_io_cbk34)(glfs_fd_t *fd, ssize_t ret, void *data);
+
+/*
+ * This function will mark glfd for deletion and decrement its refcount.
+ */
+int
+glfs_mark_glfd_for_deletion(struct glfs_fd *glfd)
+{
+ LOCK(&glfd->lock);
+ {
+ glfd->state = GLFD_CLOSE;
+ }
+ UNLOCK(&glfd->lock);
+
+ GF_REF_PUT(glfd);
+
+ return 0;
+}
+
+/* This function is useful for all async fops. There is chance that glfd is
+ * closed before async fop is completed. When glfd is closed we change the
+ * state to GLFD_CLOSE.
+ *
+ * This function will return _gf_true if the glfd is still valid else return
+ * _gf_false.
+ */
+gf_boolean_t
+glfs_is_glfd_still_valid(struct glfs_fd *glfd)
+{
+ gf_boolean_t ret = _gf_false;
+
+ LOCK(&glfd->lock);
+ {
+ if (glfd->state != GLFD_CLOSE)
+ ret = _gf_true;
+ }
+ UNLOCK(&glfd->lock);
+
+ return ret;
+}
+
+void
+glfd_set_state_bind(struct glfs_fd *glfd)
+{
+ LOCK(&glfd->lock);
+ {
+ glfd->state = GLFD_OPEN;
+ }
+ UNLOCK(&glfd->lock);
+
+ fd_bind(glfd->fd);
+ glfs_fd_bind(glfd);
+
+ return;
+}
+
+/*
+ * This routine is called when an upcall event of type
+ * 'GF_UPCALL_CACHE_INVALIDATION' is received.
+ * It makes a copy of the contents of the upcall cache-invalidation
+ * data received into an entry which is stored in the upcall list
+ * maintained by gfapi.
+ */
+int
+glfs_get_upcall_cache_invalidation(struct gf_upcall *to_up_data,
+ struct gf_upcall *from_up_data)
+{
+ struct gf_upcall_cache_invalidation *ca_data = NULL;
+ struct gf_upcall_cache_invalidation *f_ca_data = NULL;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO(THIS->name, to_up_data, out);
+ GF_VALIDATE_OR_GOTO(THIS->name, from_up_data, out);
+
+ f_ca_data = from_up_data->data;
+ GF_VALIDATE_OR_GOTO(THIS->name, f_ca_data, out);
+
+ ca_data = GF_CALLOC(1, sizeof(*ca_data), glfs_mt_upcall_entry_t);
+
+ if (!ca_data) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, API_MSG_ALLOC_FAILED, "entry",
+ NULL);
+ goto out;
+ }
+
+ to_up_data->data = ca_data;
+
+ ca_data->flags = f_ca_data->flags;
+ ca_data->expire_time_attr = f_ca_data->expire_time_attr;
+ ca_data->stat = f_ca_data->stat;
+ ca_data->p_stat = f_ca_data->p_stat;
+ ca_data->oldp_stat = f_ca_data->oldp_stat;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+glfs_get_upcall_lease(struct gf_upcall *to_up_data,
+ struct gf_upcall *from_up_data)
+{
+ struct gf_upcall_recall_lease *ca_data = NULL;
+ struct gf_upcall_recall_lease *f_ca_data = NULL;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO(THIS->name, to_up_data, out);
+ GF_VALIDATE_OR_GOTO(THIS->name, from_up_data, out);
+
+ f_ca_data = from_up_data->data;
+ GF_VALIDATE_OR_GOTO(THIS->name, f_ca_data, out);
+
+ ca_data = GF_CALLOC(1, sizeof(*ca_data), glfs_mt_upcall_entry_t);
+
+ if (!ca_data) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, API_MSG_ALLOC_FAILED, "entry",
+ NULL);
+ goto out;
+ }
+
+ to_up_data->data = ca_data;
+
+ ca_data->lease_type = f_ca_data->lease_type;
+ gf_uuid_copy(ca_data->tid, f_ca_data->tid);
+ ca_data->dict = f_ca_data->dict;
+
+ ret = 0;
+out:
+ return ret;
+}
+int
+glfs_loc_link(loc_t *loc, struct iatt *iatt)
+{
+ int ret = -1;
+ inode_t *old_inode = NULL;
+ uint64_t ctx_value = LOOKUP_NOT_NEEDED;
+
+ if (!loc->inode) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ old_inode = loc->inode;
+
+ /* If the inode already exists in the cache, the inode
+ * returned here points to the existing one. We need
+ * to update loc.inode accordingly.
+ */
+ loc->inode = inode_link(loc->inode, loc->parent, loc->name, iatt);
+ if (loc->inode) {
+ inode_ctx_set(loc->inode, THIS, &ctx_value);
+ inode_lookup(loc->inode);
+ inode_unref(old_inode);
+ ret = 0;
+ } else {
+ ret = -1;
+ }
+
+ return ret;
+}
+
+void
+glfs_iatt_to_stat(struct glfs *fs, struct iatt *iatt, struct stat *stat)
+{
+ iatt_to_stat(iatt, stat);
+ stat->st_dev = fs->dev_id;
+}
+
+void
+glfs_iatt_to_statx(struct glfs *fs, const struct iatt *iatt,
+ struct glfs_stat *statx)
+{
+ statx->glfs_st_mask = 0;
+
+ statx->glfs_st_mode = 0;
+ if (IATT_TYPE_VALID(iatt->ia_flags)) {
+ statx->glfs_st_mode |= st_mode_type_from_ia(iatt->ia_type);
+ statx->glfs_st_mask |= GLFS_STAT_TYPE;
+ }
+
+ if (IATT_MODE_VALID(iatt->ia_flags)) {
+ statx->glfs_st_mode |= st_mode_prot_from_ia(iatt->ia_prot);
+ statx->glfs_st_mask |= GLFS_STAT_MODE;
+ }
+
+ if (IATT_NLINK_VALID(iatt->ia_flags)) {
+ statx->glfs_st_nlink = iatt->ia_nlink;
+ statx->glfs_st_mask |= GLFS_STAT_NLINK;
+ }
+
+ if (IATT_UID_VALID(iatt->ia_flags)) {
+ statx->glfs_st_uid = iatt->ia_uid;
+ statx->glfs_st_mask |= GLFS_STAT_UID;
+ }
+
+ if (IATT_GID_VALID(iatt->ia_flags)) {
+ statx->glfs_st_gid = iatt->ia_gid;
+ statx->glfs_st_mask |= GLFS_STAT_GID;
+ }
+
+ if (IATT_ATIME_VALID(iatt->ia_flags)) {
+ statx->glfs_st_atime.tv_sec = iatt->ia_atime;
+ statx->glfs_st_atime.tv_nsec = iatt->ia_atime_nsec;
+ statx->glfs_st_mask |= GLFS_STAT_ATIME;
+ }
+
+ if (IATT_MTIME_VALID(iatt->ia_flags)) {
+ statx->glfs_st_mtime.tv_sec = iatt->ia_mtime;
+ statx->glfs_st_mtime.tv_nsec = iatt->ia_mtime_nsec;
+ statx->glfs_st_mask |= GLFS_STAT_MTIME;
+ }
+
+ if (IATT_CTIME_VALID(iatt->ia_flags)) {
+ statx->glfs_st_ctime.tv_sec = iatt->ia_ctime;
+ statx->glfs_st_ctime.tv_nsec = iatt->ia_ctime_nsec;
+ statx->glfs_st_mask |= GLFS_STAT_CTIME;
+ }
+
+ if (IATT_BTIME_VALID(iatt->ia_flags)) {
+ statx->glfs_st_btime.tv_sec = iatt->ia_btime;
+ statx->glfs_st_btime.tv_nsec = iatt->ia_btime_nsec;
+ statx->glfs_st_mask |= GLFS_STAT_BTIME;
+ }
+
+ if (IATT_INO_VALID(iatt->ia_flags)) {
+ statx->glfs_st_ino = iatt->ia_ino;
+ statx->glfs_st_mask |= GLFS_STAT_INO;
+ }
+
+ if (IATT_SIZE_VALID(iatt->ia_flags)) {
+ statx->glfs_st_size = iatt->ia_size;
+ statx->glfs_st_mask |= GLFS_STAT_SIZE;
+ }
+
+ if (IATT_BLOCKS_VALID(iatt->ia_flags)) {
+ statx->glfs_st_blocks = iatt->ia_blocks;
+ statx->glfs_st_mask |= GLFS_STAT_BLOCKS;
+ }
+
+ /* unconditionally present, encode as is */
+ statx->glfs_st_blksize = iatt->ia_blksize;
+ statx->glfs_st_rdev_major = ia_major(iatt->ia_rdev);
+ statx->glfs_st_rdev_minor = ia_minor(iatt->ia_rdev);
+ statx->glfs_st_dev_major = ia_major(fs->dev_id);
+ statx->glfs_st_dev_minor = ia_minor(fs->dev_id);
+
+ /* At present we do not read any localFS attributes and pass them along,
+ * so setting this to 0. As we start supporting file attributes we can
+ * populate the same here as well */
+ statx->glfs_st_attributes = 0;
+ statx->glfs_st_attributes_mask = 0;
+}
+
+GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_iatt_from_statx, 6.0)
+void
+priv_glfs_iatt_from_statx(struct iatt *iatt, const struct glfs_stat *statx)
+{
+ /* Most code in xlators are not checking validity flags before accessing
+ the items. Hence zero everything before setting valid items */
+ memset(iatt, 0, sizeof(struct iatt));
+
+ if (GLFS_STAT_TYPE_VALID(statx->glfs_st_mask)) {
+ iatt->ia_type = ia_type_from_st_mode(statx->glfs_st_mode);
+ iatt->ia_flags |= IATT_TYPE;
+ }
+
+ if (GLFS_STAT_MODE_VALID(statx->glfs_st_mask)) {
+ iatt->ia_prot = ia_prot_from_st_mode(statx->glfs_st_mode);
+ iatt->ia_flags |= IATT_MODE;
+ }
+
+ if (GLFS_STAT_NLINK_VALID(statx->glfs_st_mask)) {
+ iatt->ia_nlink = statx->glfs_st_nlink;
+ iatt->ia_flags |= IATT_NLINK;
+ }
+
+ if (GLFS_STAT_UID_VALID(statx->glfs_st_mask)) {
+ iatt->ia_uid = statx->glfs_st_uid;
+ iatt->ia_flags |= IATT_UID;
+ }
+
+ if (GLFS_STAT_GID_VALID(statx->glfs_st_mask)) {
+ iatt->ia_gid = statx->glfs_st_gid;
+ iatt->ia_flags |= IATT_GID;
+ }
+
+ if (GLFS_STAT_ATIME_VALID(statx->glfs_st_mask)) {
+ iatt->ia_atime = statx->glfs_st_atime.tv_sec;
+ iatt->ia_atime_nsec = statx->glfs_st_atime.tv_nsec;
+ iatt->ia_flags |= IATT_ATIME;
+ }
+
+ if (GLFS_STAT_MTIME_VALID(statx->glfs_st_mask)) {
+ iatt->ia_mtime = statx->glfs_st_mtime.tv_sec;
+ iatt->ia_mtime_nsec = statx->glfs_st_mtime.tv_nsec;
+ iatt->ia_flags |= IATT_MTIME;
+ }
+
+ if (GLFS_STAT_CTIME_VALID(statx->glfs_st_mask)) {
+ iatt->ia_ctime = statx->glfs_st_ctime.tv_sec;
+ iatt->ia_ctime_nsec = statx->glfs_st_ctime.tv_nsec;
+ iatt->ia_flags |= IATT_CTIME;
+ }
+
+ if (GLFS_STAT_BTIME_VALID(statx->glfs_st_mask)) {
+ iatt->ia_btime = statx->glfs_st_btime.tv_sec;
+ iatt->ia_btime_nsec = statx->glfs_st_btime.tv_nsec;
+ iatt->ia_flags |= IATT_BTIME;
+ }
+
+ if (GLFS_STAT_INO_VALID(statx->glfs_st_mask)) {
+ iatt->ia_ino = statx->glfs_st_ino;
+ iatt->ia_flags |= IATT_INO;
+ }
+
+ if (GLFS_STAT_SIZE_VALID(statx->glfs_st_mask)) {
+ iatt->ia_size = statx->glfs_st_size;
+ iatt->ia_flags |= IATT_SIZE;
+ }
+
+ if (GLFS_STAT_BLOCKS_VALID(statx->glfs_st_mask)) {
+ iatt->ia_blocks = statx->glfs_st_blocks;
+ iatt->ia_flags |= IATT_BLOCKS;
+ }
+
+ /* unconditionally present, encode as is */
+ iatt->ia_blksize = statx->glfs_st_blksize;
+ iatt->ia_rdev = makedev(statx->glfs_st_rdev_major,
+ statx->glfs_st_rdev_minor);
+ iatt->ia_dev = makedev(statx->glfs_st_dev_major, statx->glfs_st_dev_minor);
+ iatt->ia_attributes = statx->glfs_st_attributes;
+ iatt->ia_attributes_mask = statx->glfs_st_attributes_mask;
+}
+
+void
+glfsflags_from_gfapiflags(struct glfs_stat *stat, int *glvalid)
+{
+ *glvalid = 0;
+ if (stat->glfs_st_mask & GLFS_STAT_MODE) {
+ *glvalid |= GF_SET_ATTR_MODE;
+ }
+
+ if (stat->glfs_st_mask & GLFS_STAT_SIZE) {
+ *glvalid |= GF_SET_ATTR_SIZE;
+ }
+
+ if (stat->glfs_st_mask & GLFS_STAT_UID) {
+ *glvalid |= GF_SET_ATTR_UID;
+ }
+
+ if (stat->glfs_st_mask & GLFS_STAT_GID) {
+ *glvalid |= GF_SET_ATTR_GID;
+ }
+
+ if (stat->glfs_st_mask & GLFS_STAT_ATIME) {
+ *glvalid |= GF_SET_ATTR_ATIME;
+ }
+
+ if (stat->glfs_st_mask & GLFS_STAT_MTIME) {
+ *glvalid |= GF_SET_ATTR_MTIME;
+ }
+}
+
+int
+glfs_loc_unlink(loc_t *loc)
+{
+ inode_unlink(loc->inode, loc->parent, loc->name);
+
+ /* since glfs_h_* objects hold a reference to inode
+ * it is safe to keep lookup count to '0' */
+ if (!inode_has_dentry(loc->inode))
+ inode_forget(loc->inode, 0);
+
+ return 0;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_open, 3.4.0)
+struct glfs_fd *
+pub_glfs_open(struct glfs *fs, const char *path, int flags)
+{
+ int ret = -1;
+ struct glfs_fd *glfd = NULL;
+ xlator_t *subvol = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ int reval = 0;
+ dict_t *fop_attr = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ glfd = glfs_fd_new(fs);
+ if (!glfd)
+ goto out;
+
+retry:
+ ret = glfs_resolve(fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ if (IA_ISDIR(iatt.ia_type)) {
+ ret = -1;
+ errno = EISDIR;
+ goto out;
+ }
+
+ if (!IA_ISREG(iatt.ia_type)) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ if (glfd->fd) {
+ /* Retry. Safe to touch glfd->fd as we
+ still have not glfs_fd_bind() yet.
+ */
+ fd_unref(glfd->fd);
+ glfd->fd = NULL;
+ }
+
+ glfd->fd = fd_create(loc.inode, getpid());
+ if (!glfd->fd) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+ glfd->fd->flags = flags;
+
+ ret = get_fop_attr_thrd_key(&fop_attr);
+ if (ret)
+ gf_msg_debug("gfapi", 0, "Getting leaseid from thread failed");
+
+ ret = syncop_open(subvol, &loc, flags, glfd->fd, fop_attr, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+out:
+ loc_wipe(&loc);
+
+ if (fop_attr)
+ dict_unref(fop_attr);
+
+ if (ret && glfd) {
+ GF_REF_PUT(glfd);
+ glfd = NULL;
+ } else if (glfd) {
+ glfd_set_state_bind(glfd);
+ }
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return glfd;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_close, 3.4.0)
+int
+pub_glfs_close(struct glfs_fd *glfd)
+{
+ xlator_t *subvol = NULL;
+ int ret = -1;
+ fd_t *fd = NULL;
+ struct glfs *fs = NULL;
+ dict_t *fop_attr = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ subvol = glfs_active_subvol(glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd(glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ if (glfd->lk_owner.len != 0) {
+ ret = syncopctx_setfslkowner(&glfd->lk_owner);
+ if (ret)
+ goto out;
+ }
+ ret = get_fop_attr_thrd_key(&fop_attr);
+ if (ret)
+ gf_msg_debug("gfapi", 0, "Getting leaseid from thread failed");
+
+ ret = syncop_flush(subvol, fd, fop_attr, NULL);
+ DECODE_SYNCOP_ERR(ret);
+out:
+ fs = glfd->fs;
+
+ if (fd)
+ fd_unref(fd);
+ if (fop_attr)
+ dict_unref(fop_attr);
+
+ glfs_mark_glfd_for_deletion(glfd);
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lstat, 3.4.0)
+int
+pub_glfs_lstat(struct glfs *fs, const char *path, struct stat *stat)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ int reval = 0;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ ret = glfs_lresolve(fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (ret == 0 && stat)
+ glfs_iatt_to_stat(fs, &iatt, stat);
+out:
+ loc_wipe(&loc);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_stat, 3.4.0)
+int
+pub_glfs_stat(struct glfs *fs, const char *path, struct stat *stat)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ int reval = 0;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ ret = glfs_resolve(fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (ret == 0 && stat)
+ glfs_iatt_to_stat(fs, &iatt, stat);
+out:
+ loc_wipe(&loc);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_statx, 6.0)
+int
+priv_glfs_statx(struct glfs *fs, const char *path, const unsigned int mask,
+ struct glfs_stat *statxbuf)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ int reval = 0;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ if (path == NULL) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ if (mask & ~GLFS_STAT_ALL) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+retry:
+ ret = glfs_resolve(fs, subvol, path, &loc, &iatt, reval);
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (ret == 0 && statxbuf)
+ glfs_iatt_to_statx(fs, &iatt, statxbuf);
+out:
+ loc_wipe(&loc);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fstat, 3.4.0)
+int
+pub_glfs_fstat(struct glfs_fd *glfd, struct stat *stat)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ struct iatt iatt = {
+ 0,
+ };
+ fd_t *fd = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ GF_REF_GET(glfd);
+
+ subvol = glfs_active_subvol(glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd(glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = syncop_fstat(subvol, fd, &iatt, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ if (ret == 0 && stat)
+ glfs_iatt_to_stat(glfd->fs, &iatt, stat);
+out:
+ if (fd)
+ fd_unref(fd);
+ if (glfd)
+ GF_REF_PUT(glfd);
+
+ glfs_subvol_done(glfd->fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_creat, 3.4.0)
+struct glfs_fd *
+pub_glfs_creat(struct glfs *fs, const char *path, int flags, mode_t mode)
+{
+ int ret = -1;
+ struct glfs_fd *glfd = NULL;
+ xlator_t *subvol = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ uuid_t gfid;
+ dict_t *xattr_req = NULL;
+ int reval = 0;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ xattr_req = dict_new();
+ if (!xattr_req) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ gf_uuid_generate(gfid);
+ ret = dict_set_gfuuid(xattr_req, "gfid-req", gfid, true);
+ if (ret) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ glfd = glfs_fd_new(fs);
+ if (!glfd)
+ goto out;
+
+ /* This must be glfs_resolve() and NOT glfs_lresolve().
+ That is because open("name", O_CREAT) where "name"
+ is a danging symlink must create the dangling
+ destination.
+ */
+retry:
+ ret = glfs_resolve(fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (ret == -1 && errno != ENOENT)
+ /* Any other type of error is fatal */
+ goto out;
+
+ if (ret == -1 && errno == ENOENT && !loc.parent)
+ /* The parent directory or an ancestor even
+ higher does not exist
+ */
+ goto out;
+
+ if (loc.inode) {
+ if (flags & O_EXCL) {
+ ret = -1;
+ errno = EEXIST;
+ goto out;
+ }
+
+ if (IA_ISDIR(iatt.ia_type)) {
+ ret = -1;
+ errno = EISDIR;
+ goto out;
+ }
+
+ if (!IA_ISREG(iatt.ia_type)) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+ }
+
+ if (ret == -1 && errno == ENOENT) {
+ loc.inode = inode_new(loc.parent->table);
+ if (!loc.inode) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+ }
+
+ if (glfd->fd) {
+ /* Retry. Safe to touch glfd->fd as we
+ still have not glfs_fd_bind() yet.
+ */
+ fd_unref(glfd->fd);
+ glfd->fd = NULL;
+ }
+
+ glfd->fd = fd_create(loc.inode, getpid());
+ if (!glfd->fd) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+ glfd->fd->flags = flags;
+
+ if (get_fop_attr_thrd_key(&xattr_req))
+ gf_msg_debug("gfapi", 0, "Getting leaseid from thread failed");
+ if (ret == 0) {
+ ret = syncop_open(subvol, &loc, flags, glfd->fd, xattr_req, NULL);
+ DECODE_SYNCOP_ERR(ret);
+ } else {
+ ret = syncop_create(subvol, &loc, flags, mode, glfd->fd, &iatt,
+ xattr_req, NULL);
+ DECODE_SYNCOP_ERR(ret);
+ }
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (ret == 0)
+ ret = glfs_loc_link(&loc, &iatt);
+out:
+ loc_wipe(&loc);
+
+ if (xattr_req)
+ dict_unref(xattr_req);
+
+ if (ret && glfd) {
+ GF_REF_PUT(glfd);
+ glfd = NULL;
+ } else if (glfd) {
+ glfd_set_state_bind(glfd);
+ }
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return glfd;
+}
+
+#ifdef HAVE_SEEK_HOLE
+static int
+glfs_seek(struct glfs_fd *glfd, off_t offset, int whence)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+ gf_seek_what_t what = 0;
+ off_t off = -1;
+
+ switch (whence) {
+ case SEEK_DATA:
+ what = GF_SEEK_DATA;
+ break;
+ case SEEK_HOLE:
+ what = GF_SEEK_HOLE;
+ break;
+ default:
+ /* other SEEK_* do not make sense, all operations get an offset
+ * and the position in the fd is not tracked */
+ errno = EINVAL;
+ goto out;
+ }
+
+ subvol = glfs_active_subvol(glfd->fs);
+ if (!subvol) {
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd(glfd->fs, subvol, glfd);
+ if (!fd) {
+ errno = EBADFD;
+ goto done;
+ }
+
+ ret = syncop_seek(subvol, fd, offset, what, NULL, &off);
+ DECODE_SYNCOP_ERR(ret);
+
+ if (ret != -1)
+ glfd->offset = off;
+
+done:
+ if (fd)
+ fd_unref(fd);
+
+ glfs_subvol_done(glfd->fs, subvol);
+
+out:
+ return ret;
+}
+#endif
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lseek, 3.4.0)
+off_t
+pub_glfs_lseek(struct glfs_fd *glfd, off_t offset, int whence)
+{
+ struct stat sb = {
+ 0,
+ };
+ int ret = -1;
+ off_t off = -1;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ GF_REF_GET(glfd);
+
+ switch (whence) {
+ case SEEK_SET:
+ glfd->offset = offset;
+ ret = 0;
+ break;
+ case SEEK_CUR:
+ glfd->offset += offset;
+ ret = 0;
+ break;
+ case SEEK_END:
+ ret = pub_glfs_fstat(glfd, &sb);
+ if (ret) {
+ /* seek cannot fail :O */
+ break;
+ }
+ glfd->offset = sb.st_size + offset;
+ break;
+#ifdef HAVE_SEEK_HOLE
+ case SEEK_DATA:
+ case SEEK_HOLE:
+ ret = glfs_seek(glfd, offset, whence);
+ break;
+#endif
+ default:
+ errno = EINVAL;
+ }
+
+ if (glfd)
+ GF_REF_PUT(glfd);
+
+ __GLFS_EXIT_FS;
+
+ if (ret != -1)
+ off = glfd->offset;
+
+ return off;
+
+invalid_fs:
+ return -1;
+}
+
+static ssize_t
+glfs_preadv_common(struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt,
+ off_t offset, int flags, struct glfs_stat *poststat)
+{
+ xlator_t *subvol = NULL;
+ ssize_t ret = -1;
+ ssize_t size = -1;
+ struct iovec *iov = NULL;
+ int cnt = 0;
+ struct iobref *iobref = NULL;
+ fd_t *fd = NULL;
+ struct iatt iatt = {
+ 0,
+ };
+ dict_t *fop_attr = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ GF_REF_GET(glfd);
+
+ subvol = glfs_active_subvol(glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd(glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ size = iov_length(iovec, iovcnt);
+
+ ret = get_fop_attr_thrd_key(&fop_attr);
+ if (ret)
+ gf_msg_debug("gfapi", 0, "Getting leaseid from thread failed");
+
+ ret = syncop_readv(subvol, fd, size, offset, 0, &iov, &cnt, &iobref, &iatt,
+ fop_attr, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ if (ret >= 0 && poststat)
+ glfs_iatt_to_statx(glfd->fs, &iatt, poststat);
+
+ if (ret <= 0)
+ goto out;
+
+ size = iov_copy(iovec, iovcnt, iov, cnt); /* FIXME!!! */
+
+ glfd->offset = (offset + size);
+
+ ret = size;
+out:
+ if (iov)
+ GF_FREE(iov);
+ if (iobref)
+ iobref_unref(iobref);
+
+ if (fd)
+ fd_unref(fd);
+ if (glfd)
+ GF_REF_PUT(glfd);
+ if (fop_attr)
+ dict_unref(fop_attr);
+
+ glfs_subvol_done(glfd->fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_preadv, 3.4.0)
+ssize_t
+pub_glfs_preadv(struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt,
+ off_t offset, int flags)
+{
+ return glfs_preadv_common(glfd, iovec, iovcnt, offset, flags, NULL);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_read, 3.4.0)
+ssize_t
+pub_glfs_read(struct glfs_fd *glfd, void *buf, size_t count, int flags)
+{
+ struct iovec iov = {
+ 0,
+ };
+ ssize_t ret = 0;
+
+ if (glfd == NULL) {
+ errno = EBADF;
+ return -1;
+ }
+
+ iov.iov_base = buf;
+ iov.iov_len = count;
+
+ ret = pub_glfs_preadv(glfd, &iov, 1, glfd->offset, flags);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC(glfs_pread34, glfs_pread, 3.4.0)
+ssize_t
+pub_glfs_pread34(struct glfs_fd *glfd, void *buf, size_t count, off_t offset,
+ int flags)
+{
+ struct iovec iov = {
+ 0,
+ };
+ ssize_t ret = 0;
+
+ iov.iov_base = buf;
+ iov.iov_len = count;
+
+ ret = pub_glfs_preadv(glfd, &iov, 1, offset, flags);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_pread, 6.0)
+ssize_t
+pub_glfs_pread(struct glfs_fd *glfd, void *buf, size_t count, off_t offset,
+ int flags, struct glfs_stat *poststat)
+{
+ struct iovec iov = {
+ 0,
+ };
+ ssize_t ret = 0;
+
+ iov.iov_base = buf;
+ iov.iov_len = count;
+
+ ret = glfs_preadv_common(glfd, &iov, 1, offset, flags, poststat);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_readv, 3.4.0)
+ssize_t
+pub_glfs_readv(struct glfs_fd *glfd, const struct iovec *iov, int count,
+ int flags)
+{
+ ssize_t ret = 0;
+
+ if (glfd == NULL) {
+ errno = EBADF;
+ return -1;
+ }
+
+ ret = pub_glfs_preadv(glfd, iov, count, glfd->offset, flags);
+
+ return ret;
+}
+
+struct glfs_io {
+ struct glfs_fd *glfd;
+ int op;
+ off_t offset;
+ struct iovec *iov;
+ int count;
+ int flags;
+ gf_boolean_t oldcb;
+ union {
+ glfs_io_cbk34 fn34;
+ glfs_io_cbk fn;
+ };
+ void *data;
+};
+
+static int
+glfs_io_async_cbk(int op_ret, int op_errno, call_frame_t *frame, void *cookie,
+ struct iovec *iovec, int count, struct iatt *prebuf,
+ struct iatt *postbuf)
+{
+ struct glfs_io *gio = NULL;
+ xlator_t *subvol = NULL;
+ struct glfs *fs = NULL;
+ struct glfs_fd *glfd = NULL;
+ int ret = -1;
+ struct glfs_stat prestat = {}, *prestatp = NULL;
+ struct glfs_stat poststat = {}, *poststatp = NULL;
+
+ GF_VALIDATE_OR_GOTO("gfapi", frame, inval);
+ GF_VALIDATE_OR_GOTO("gfapi", cookie, inval);
+
+ gio = frame->local;
+ frame->local = NULL;
+ subvol = cookie;
+ glfd = gio->glfd;
+ fs = glfd->fs;
+
+ if (!glfs_is_glfd_still_valid(glfd))
+ goto err;
+
+ if (op_ret <= 0) {
+ goto out;
+ } else if (gio->op == GF_FOP_READ) {
+ if (!iovec) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ op_ret = iov_copy(gio->iov, gio->count, iovec, count);
+ glfd->offset = gio->offset + op_ret;
+ } else if (gio->op == GF_FOP_WRITE) {
+ glfd->offset = gio->offset + gio->iov->iov_len;
+ }
+
+out:
+ errno = op_errno;
+ if (gio->oldcb) {
+ gio->fn34(gio->glfd, op_ret, gio->data);
+ } else {
+ if (prebuf) {
+ prestatp = &prestat;
+ glfs_iatt_to_statx(fs, prebuf, prestatp);
+ }
+
+ if (postbuf) {
+ poststatp = &poststat;
+ glfs_iatt_to_statx(fs, postbuf, poststatp);
+ }
+
+ gio->fn(gio->glfd, op_ret, prestatp, poststatp, gio->data);
+ }
+err:
+ fd_unref(glfd->fd);
+ /* Since the async operation is complete
+ * release the ref taken during the start
+ * of async operation
+ */
+ GF_REF_PUT(glfd);
+
+ GF_FREE(gio->iov);
+ GF_FREE(gio);
+ STACK_DESTROY(frame->root);
+ glfs_subvol_done(fs, subvol);
+
+ ret = 0;
+inval:
+ return ret;
+}
+
+static int
+glfs_preadv_async_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iovec *iovec, int count,
+ struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
+{
+ glfs_io_async_cbk(op_ret, op_errno, frame, cookie, iovec, count, NULL,
+ stbuf);
+
+ return 0;
+}
+
+static int
+glfs_preadv_async_common(struct glfs_fd *glfd, const struct iovec *iovec,
+ int count, off_t offset, int flags, gf_boolean_t oldcb,
+ glfs_io_cbk fn, void *data)
+{
+ struct glfs_io *gio = NULL;
+ int ret = 0;
+ call_frame_t *frame = NULL;
+ xlator_t *subvol = NULL;
+ struct glfs *fs = NULL;
+ fd_t *fd = NULL;
+ dict_t *fop_attr = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ GF_REF_GET(glfd);
+
+ subvol = glfs_active_subvol(glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd(glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ fs = glfd->fs;
+
+ frame = syncop_create_frame(THIS);
+ if (!frame) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ gio = GF_CALLOC(1, sizeof(*gio), glfs_mt_glfs_io_t);
+ if (!gio) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ gio->iov = iov_dup(iovec, count);
+ if (!gio->iov) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ gio->op = GF_FOP_READ;
+ gio->glfd = glfd;
+ gio->count = count;
+ gio->offset = offset;
+ gio->flags = flags;
+ gio->oldcb = oldcb;
+ gio->fn = fn;
+ gio->data = data;
+
+ frame->local = gio;
+
+ ret = get_fop_attr_thrd_key(&fop_attr);
+ if (ret)
+ gf_msg_debug("gfapi", 0, "Getting leaseid from thread failed");
+
+ STACK_WIND_COOKIE(frame, glfs_preadv_async_cbk, subvol, subvol,
+ subvol->fops->readv, fd, iov_length(iovec, count), offset,
+ flags, fop_attr);
+
+out:
+ if (ret) {
+ if (fd)
+ fd_unref(fd);
+ if (glfd)
+ GF_REF_PUT(glfd);
+ if (gio) {
+ GF_FREE(gio->iov);
+ GF_FREE(gio);
+ }
+ if (frame) {
+ STACK_DESTROY(frame->root);
+ }
+ glfs_subvol_done(fs, subvol);
+ }
+ if (fop_attr)
+ dict_unref(fop_attr);
+
+ __GLFS_EXIT_FS;
+
+ return ret;
+
+invalid_fs:
+ return -1;
+}
+
+GFAPI_SYMVER_PUBLIC(glfs_preadv_async34, glfs_preadv_async, 3.4.0)
+int
+pub_glfs_preadv_async34(struct glfs_fd *glfd, const struct iovec *iovec,
+ int count, off_t offset, int flags, glfs_io_cbk34 fn,
+ void *data)
+{
+ return glfs_preadv_async_common(glfd, iovec, count, offset, flags, _gf_true,
+ (void *)fn, data);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_preadv_async, 6.0)
+int
+pub_glfs_preadv_async(struct glfs_fd *glfd, const struct iovec *iovec,
+ int count, off_t offset, int flags, glfs_io_cbk fn,
+ void *data)
+{
+ return glfs_preadv_async_common(glfd, iovec, count, offset, flags,
+ _gf_false, fn, data);
+}
+
+GFAPI_SYMVER_PUBLIC(glfs_read_async34, glfs_read_async, 3.4.0)
+int
+pub_glfs_read_async34(struct glfs_fd *glfd, void *buf, size_t count, int flags,
+ glfs_io_cbk34 fn, void *data)
+{
+ struct iovec iov = {
+ 0,
+ };
+ ssize_t ret = 0;
+
+ if (glfd == NULL) {
+ errno = EBADF;
+ return -1;
+ }
+
+ iov.iov_base = buf;
+ iov.iov_len = count;
+
+ ret = glfs_preadv_async_common(glfd, &iov, 1, glfd->offset, flags, _gf_true,
+ (void *)fn, data);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_read_async, 6.0)
+int
+pub_glfs_read_async(struct glfs_fd *glfd, void *buf, size_t count, int flags,
+ glfs_io_cbk fn, void *data)
+{
+ struct iovec iov = {
+ 0,
+ };
+ ssize_t ret = 0;
+
+ if (glfd == NULL) {
+ errno = EBADF;
+ return -1;
+ }
+
+ iov.iov_base = buf;
+ iov.iov_len = count;
+
+ ret = glfs_preadv_async_common(glfd, &iov, 1, glfd->offset, flags,
+ _gf_false, fn, data);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC(glfs_pread_async34, glfs_pread_async, 3.4.0)
+int
+pub_glfs_pread_async34(struct glfs_fd *glfd, void *buf, size_t count,
+ off_t offset, int flags, glfs_io_cbk34 fn, void *data)
+{
+ struct iovec iov = {
+ 0,
+ };
+ ssize_t ret = 0;
+
+ iov.iov_base = buf;
+ iov.iov_len = count;
+
+ ret = glfs_preadv_async_common(glfd, &iov, 1, offset, flags, _gf_true,
+ (void *)fn, data);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_pread_async, 6.0)
+int
+pub_glfs_pread_async(struct glfs_fd *glfd, void *buf, size_t count,
+ off_t offset, int flags, glfs_io_cbk fn, void *data)
+{
+ struct iovec iov = {
+ 0,
+ };
+ ssize_t ret = 0;
+
+ iov.iov_base = buf;
+ iov.iov_len = count;
+
+ ret = glfs_preadv_async_common(glfd, &iov, 1, offset, flags, _gf_false, fn,
+ data);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC(glfs_readv_async34, glfs_readv_async, 3.4.0)
+int
+pub_glfs_readv_async34(struct glfs_fd *glfd, const struct iovec *iov, int count,
+ int flags, glfs_io_cbk34 fn, void *data)
+{
+ ssize_t ret = 0;
+
+ if (glfd == NULL) {
+ errno = EBADF;
+ return -1;
+ }
+
+ ret = glfs_preadv_async_common(glfd, iov, count, glfd->offset, flags,
+ _gf_true, (void *)fn, data);
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_readv_async, 6.0)
+int
+pub_glfs_readv_async(struct glfs_fd *glfd, const struct iovec *iov, int count,
+ int flags, glfs_io_cbk fn, void *data)
+{
+ ssize_t ret = 0;
+
+ if (glfd == NULL) {
+ errno = EBADF;
+ return -1;
+ }
+
+ ret = glfs_preadv_async_common(glfd, iov, count, glfd->offset, flags,
+ _gf_false, fn, data);
+ return ret;
+}
+
+static ssize_t
+glfs_pwritev_common(struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt,
+ off_t offset, int flags, struct glfs_stat *prestat,
+ struct glfs_stat *poststat)
+{
+ xlator_t *subvol = NULL;
+ int ret = -1;
+ struct iobref *iobref = NULL;
+ struct iobuf *iobuf = NULL;
+ struct iovec iov = {
+ 0,
+ };
+ fd_t *fd = NULL;
+ struct iatt preiatt =
+ {
+ 0,
+ },
+ postiatt = {
+ 0,
+ };
+ dict_t *fop_attr = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ GF_REF_GET(glfd);
+
+ subvol = glfs_active_subvol(glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd(glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = iobuf_copy(subvol->ctx->iobuf_pool, iovec, iovcnt, &iobref, &iobuf,
+ &iov);
+ if (ret)
+ goto out;
+
+ ret = get_fop_attr_thrd_key(&fop_attr);
+ if (ret)
+ gf_msg_debug("gfapi", 0, "Getting leaseid from thread failed");
+
+ ret = syncop_writev(subvol, fd, &iov, 1, offset, iobref, flags, &preiatt,
+ &postiatt, fop_attr, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ if (ret >= 0) {
+ if (prestat)
+ glfs_iatt_to_statx(glfd->fs, &preiatt, prestat);
+ if (poststat)
+ glfs_iatt_to_statx(glfd->fs, &postiatt, poststat);
+ }
+
+ if (ret <= 0)
+ goto out;
+
+ glfd->offset = (offset + iov.iov_len);
+out:
+ if (iobuf)
+ iobuf_unref(iobuf);
+ if (iobref)
+ iobref_unref(iobref);
+ if (fd)
+ fd_unref(fd);
+ if (glfd)
+ GF_REF_PUT(glfd);
+ if (fop_attr)
+ dict_unref(fop_attr);
+
+ glfs_subvol_done(glfd->fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_copy_file_range, 6.0)
+ssize_t
+pub_glfs_copy_file_range(struct glfs_fd *glfd_in, off64_t *off_in,
+ struct glfs_fd *glfd_out, off64_t *off_out, size_t len,
+ unsigned int flags, struct glfs_stat *statbuf,
+ struct glfs_stat *prestat, struct glfs_stat *poststat)
+{
+ xlator_t *subvol = NULL;
+ int ret = -1;
+ fd_t *fd_in = NULL;
+ fd_t *fd_out = NULL;
+ struct iatt preiatt =
+ {
+ 0,
+ },
+ iattbuf =
+ {
+ 0,
+ },
+ postiatt = {
+ 0,
+ };
+ dict_t *fop_attr = NULL;
+ off64_t pos_in;
+ off64_t pos_out;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd_in, invalid_fs);
+ __GLFS_ENTRY_VALIDATE_FD(glfd_out, invalid_fs);
+
+ GF_REF_GET(glfd_in);
+ GF_REF_GET(glfd_out);
+
+ if (glfd_in->fs != glfd_out->fs) {
+ ret = -1;
+ errno = EXDEV;
+ goto out;
+ }
+
+ subvol = glfs_active_subvol(glfd_in->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd_in = glfs_resolve_fd(glfd_in->fs, subvol, glfd_in);
+ if (!fd_in) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ fd_out = glfs_resolve_fd(glfd_out->fs, subvol, glfd_out);
+ if (!fd_out) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ /*
+ * This is based on how the vfs layer in the kernel handles
+ * copy_file_range call. Upon receiving it follows the
+ * below method to consider the offset.
+ * if (off_in != NULL)
+ * use the value off_in to perform the op
+ * else if off_in == NULL
+ * use the current file offset position to perform the op
+ *
+ * For gfapi, glfd->offset is used. For a freshly opened
+ * fd, the offset is set to 0.
+ */
+ if (off_in)
+ pos_in = *off_in;
+ else
+ pos_in = glfd_in->offset;
+
+ if (off_out)
+ pos_out = *off_out;
+ else
+ pos_out = glfd_out->offset;
+
+ ret = get_fop_attr_thrd_key(&fop_attr);
+ if (ret)
+ gf_msg_debug("gfapi", 0, "Getting leaseid from thread failed");
+
+ ret = syncop_copy_file_range(subvol, fd_in, pos_in, fd_out, pos_out, len,
+ flags, &iattbuf, &preiatt, &postiatt, fop_attr,
+ NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ if (ret >= 0) {
+ pos_in += ret;
+ pos_out += ret;
+
+ if (off_in)
+ *off_in = pos_in;
+ if (off_out)
+ *off_out = pos_out;
+
+ if (statbuf)
+ glfs_iatt_to_statx(glfd_in->fs, &iattbuf, statbuf);
+ if (prestat)
+ glfs_iatt_to_statx(glfd_in->fs, &preiatt, prestat);
+ if (poststat)
+ glfs_iatt_to_statx(glfd_in->fs, &postiatt, poststat);
+ }
+
+ if (ret <= 0)
+ goto out;
+
+ /*
+ * If *off_in is NULL, then there is no offset info that can
+ * obtained from the input argument. Hence follow below method.
+ * If *off_in is NULL, then
+ * glfd->offset = offset + ret;
+ * else
+ * do nothing.
+ *
+ * According to the man page of copy_file_range, if off_in is
+ * NULL, then the offset of the source file is advanced by
+ * the return value of the fop. The same applies to off_out as
+ * well. Otherwise, if *off_in is not NULL, then the offset
+ * is not advanced by the filesystem. The entity which sends
+ * the copy_file_range call is supposed to advance the offset
+ * value in its buffer (pointed to by *off_in or *off_out)
+ * by the return value of copy_file_range.
+ */
+ if (!off_in)
+ glfd_in->offset += ret;
+
+ if (!off_out)
+ glfd_out->offset += ret;
+
+out:
+ if (fd_in)
+ fd_unref(fd_in);
+ if (fd_out)
+ fd_unref(fd_out);
+ if (glfd_in)
+ GF_REF_PUT(glfd_in);
+ if (glfd_out)
+ GF_REF_PUT(glfd_out);
+ if (fop_attr)
+ dict_unref(fop_attr);
+
+ glfs_subvol_done(glfd_in->fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_pwritev, 3.4.0)
+ssize_t
+pub_glfs_pwritev(struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt,
+ off_t offset, int flags)
+{
+ return glfs_pwritev_common(glfd, iovec, iovcnt, offset, flags, NULL, NULL);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_write, 3.4.0)
+ssize_t
+pub_glfs_write(struct glfs_fd *glfd, const void *buf, size_t count, int flags)
+{
+ struct iovec iov = {
+ 0,
+ };
+ ssize_t ret = 0;
+
+ if (glfd == NULL) {
+ errno = EBADF;
+ return -1;
+ }
+
+ iov.iov_base = (void *)buf;
+ iov.iov_len = count;
+
+ ret = pub_glfs_pwritev(glfd, &iov, 1, glfd->offset, flags);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_writev, 3.4.0)
+ssize_t
+pub_glfs_writev(struct glfs_fd *glfd, const struct iovec *iov, int count,
+ int flags)
+{
+ ssize_t ret = 0;
+
+ if (glfd == NULL) {
+ errno = EBADF;
+ return -1;
+ }
+
+ ret = pub_glfs_pwritev(glfd, iov, count, glfd->offset, flags);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC(glfs_pwrite34, glfs_pwrite, 3.4.0)
+ssize_t
+pub_glfs_pwrite34(struct glfs_fd *glfd, const void *buf, size_t count,
+ off_t offset, int flags)
+{
+ struct iovec iov = {
+ 0,
+ };
+ ssize_t ret = 0;
+
+ iov.iov_base = (void *)buf;
+ iov.iov_len = count;
+
+ ret = pub_glfs_pwritev(glfd, &iov, 1, offset, flags);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_pwrite, 6.0)
+ssize_t
+pub_glfs_pwrite(struct glfs_fd *glfd, const void *buf, size_t count,
+ off_t offset, int flags, struct glfs_stat *prestat,
+ struct glfs_stat *poststat)
+{
+ struct iovec iov = {
+ 0,
+ };
+ ssize_t ret = 0;
+
+ iov.iov_base = (void *)buf;
+ iov.iov_len = count;
+
+ ret = glfs_pwritev_common(glfd, &iov, 1, offset, flags, prestat, poststat);
+
+ return ret;
+}
+
+extern glfs_t *
+pub_glfs_from_glfd(glfs_fd_t *);
+
+static int
+glfs_pwritev_async_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ glfs_io_async_cbk(op_ret, op_errno, frame, cookie, NULL, 0, prebuf,
+ postbuf);
+
+ return 0;
+}
+
+static int
+glfs_pwritev_async_common(struct glfs_fd *glfd, const struct iovec *iovec,
+ int count, off_t offset, int flags,
+ gf_boolean_t oldcb, glfs_io_cbk fn, void *data)
+{
+ struct glfs_io *gio = NULL;
+ int ret = -1;
+ call_frame_t *frame = NULL;
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+ struct iobref *iobref = NULL;
+ struct iobuf *iobuf = NULL;
+ dict_t *fop_attr = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ /* Need to take explicit ref so that the fd
+ * is not destroyed before the fop is complete
+ */
+ GF_REF_GET(glfd);
+
+ subvol = glfs_active_subvol(glfd->fs);
+ if (!subvol) {
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd(glfd->fs, subvol, glfd);
+ if (!fd) {
+ errno = EBADFD;
+ goto out;
+ }
+
+ gio = GF_CALLOC(1, sizeof(*gio), glfs_mt_glfs_io_t);
+ if (!gio) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ gio->op = GF_FOP_WRITE;
+ gio->glfd = glfd;
+ gio->offset = offset;
+ gio->flags = flags;
+ gio->oldcb = oldcb;
+ gio->fn = fn;
+ gio->data = data;
+ gio->count = 1;
+ gio->iov = GF_CALLOC(gio->count, sizeof(*(gio->iov)), gf_common_mt_iovec);
+ if (!gio->iov) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ ret = iobuf_copy(subvol->ctx->iobuf_pool, iovec, count, &iobref, &iobuf,
+ gio->iov);
+ if (ret)
+ goto out;
+
+ frame = syncop_create_frame(THIS);
+ if (!frame) {
+ errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ frame->local = gio;
+
+ ret = get_fop_attr_thrd_key(&fop_attr);
+ if (ret)
+ gf_msg_debug("gfapi", 0, "Getting leaseid from thread failed");
+
+ STACK_WIND_COOKIE(frame, glfs_pwritev_async_cbk, subvol, subvol,
+ subvol->fops->writev, fd, gio->iov, gio->count, offset,
+ flags, iobref, fop_attr);
+
+ ret = 0;
+out:
+ if (ret) {
+ if (fd)
+ fd_unref(fd);
+ if (glfd)
+ GF_REF_PUT(glfd);
+ GF_FREE(gio);
+ /*
+ * If there is any error condition check after the frame
+ * creation, we have to destroy the frame root.
+ */
+ glfs_subvol_done(glfd->fs, subvol);
+ }
+ if (fop_attr)
+ dict_unref(fop_attr);
+
+ if (iobuf)
+ iobuf_unref(iobuf);
+ if (iobref)
+ iobref_unref(iobref);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC(glfs_pwritev_async34, glfs_pwritev_async, 3.4.0)
+int
+pub_glfs_pwritev_async34(struct glfs_fd *glfd, const struct iovec *iovec,
+ int count, off_t offset, int flags, glfs_io_cbk34 fn,
+ void *data)
+{
+ return glfs_pwritev_async_common(glfd, iovec, count, offset, flags,
+ _gf_true, (void *)fn, data);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_pwritev_async, 6.0)
+int
+pub_glfs_pwritev_async(struct glfs_fd *glfd, const struct iovec *iovec,
+ int count, off_t offset, int flags, glfs_io_cbk fn,
+ void *data)
+{
+ return glfs_pwritev_async_common(glfd, iovec, count, offset, flags,
+ _gf_false, fn, data);
+}
+
+GFAPI_SYMVER_PUBLIC(glfs_write_async34, glfs_write_async, 3.4.0)
+int
+pub_glfs_write_async34(struct glfs_fd *glfd, const void *buf, size_t count,
+ int flags, glfs_io_cbk34 fn, void *data)
+{
+ struct iovec iov = {
+ 0,
+ };
+ ssize_t ret = 0;
+
+ if (glfd == NULL) {
+ errno = EBADF;
+ return -1;
+ }
+
+ iov.iov_base = (void *)buf;
+ iov.iov_len = count;
+
+ ret = glfs_pwritev_async_common(glfd, &iov, 1, glfd->offset, flags,
+ _gf_true, (void *)fn, data);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_write_async, 6.0)
+int
+pub_glfs_write_async(struct glfs_fd *glfd, const void *buf, size_t count,
+ int flags, glfs_io_cbk fn, void *data)
+{
+ struct iovec iov = {
+ 0,
+ };
+ ssize_t ret = 0;
+
+ if (glfd == NULL) {
+ errno = EBADF;
+ return -1;
+ }
+
+ iov.iov_base = (void *)buf;
+ iov.iov_len = count;
+
+ ret = glfs_pwritev_async_common(glfd, &iov, 1, glfd->offset, flags,
+ _gf_false, fn, data);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC(glfs_pwrite_async34, glfs_pwrite_async, 3.4.0)
+int
+pub_glfs_pwrite_async34(struct glfs_fd *glfd, const void *buf, int count,
+ off_t offset, int flags, glfs_io_cbk34 fn, void *data)
+{
+ struct iovec iov = {
+ 0,
+ };
+ ssize_t ret = 0;
+
+ iov.iov_base = (void *)buf;
+ iov.iov_len = count;
+
+ ret = glfs_pwritev_async_common(glfd, &iov, 1, offset, flags, _gf_true,
+ (void *)fn, data);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_pwrite_async, 6.0)
+int
+pub_glfs_pwrite_async(struct glfs_fd *glfd, const void *buf, int count,
+ off_t offset, int flags, glfs_io_cbk fn, void *data)
+{
+ struct iovec iov = {
+ 0,
+ };
+ ssize_t ret = 0;
+
+ iov.iov_base = (void *)buf;
+ iov.iov_len = count;
+
+ ret = glfs_pwritev_async_common(glfd, &iov, 1, offset, flags, _gf_false, fn,
+ data);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC(glfs_writev_async34, glfs_writev_async, 3.4.0)
+int
+pub_glfs_writev_async34(struct glfs_fd *glfd, const struct iovec *iov,
+ int count, int flags, glfs_io_cbk34 fn, void *data)
+{
+ ssize_t ret = 0;
+
+ if (glfd == NULL) {
+ errno = EBADF;
+ return -1;
+ }
+
+ ret = glfs_pwritev_async_common(glfd, iov, count, glfd->offset, flags,
+ _gf_true, (void *)fn, data);
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_writev_async, 6.0)
+int
+pub_glfs_writev_async(struct glfs_fd *glfd, const struct iovec *iov, int count,
+ int flags, glfs_io_cbk fn, void *data)
+{
+ ssize_t ret = 0;
+
+ if (glfd == NULL) {
+ errno = EBADF;
+ return -1;
+ }
+
+ ret = glfs_pwritev_async_common(glfd, iov, count, glfd->offset, flags,
+ _gf_false, fn, data);
+ return ret;
+}
+
+static int
+glfs_fsync_common(struct glfs_fd *glfd, struct glfs_stat *prestat,
+ struct glfs_stat *poststat)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+ struct iatt preiatt =
+ {
+ 0,
+ },
+ postiatt = {
+ 0,
+ };
+ dict_t *fop_attr = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ GF_REF_GET(glfd);
+
+ subvol = glfs_active_subvol(glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd(glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = get_fop_attr_thrd_key(&fop_attr);
+ if (ret)
+ gf_msg_debug("gfapi", 0, "Getting leaseid from thread failed");
+
+ ret = syncop_fsync(subvol, fd, 0, &preiatt, &postiatt, fop_attr, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ if (ret >= 0) {
+ if (prestat)
+ glfs_iatt_to_statx(glfd->fs, &preiatt, prestat);
+ if (poststat)
+ glfs_iatt_to_statx(glfd->fs, &postiatt, poststat);
+ }
+out:
+ if (fd)
+ fd_unref(fd);
+ if (glfd)
+ GF_REF_PUT(glfd);
+ if (fop_attr)
+ dict_unref(fop_attr);
+
+ glfs_subvol_done(glfd->fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC(glfs_fsync34, glfs_fsync, 3.4.0)
+int
+pub_glfs_fsync34(struct glfs_fd *glfd)
+{
+ return glfs_fsync_common(glfd, NULL, NULL);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fsync, 6.0)
+int
+pub_glfs_fsync(struct glfs_fd *glfd, struct glfs_stat *prestat,
+ struct glfs_stat *poststat)
+{
+ return glfs_fsync_common(glfd, prestat, poststat);
+}
+
+static int
+glfs_fsync_async_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ glfs_io_async_cbk(op_ret, op_errno, frame, cookie, NULL, 0, prebuf,
+ postbuf);
+
+ return 0;
+}
+
+static int
+glfs_fsync_async_common(struct glfs_fd *glfd, gf_boolean_t oldcb,
+ glfs_io_cbk fn, void *data, int dataonly)
+{
+ struct glfs_io *gio = NULL;
+ int ret = 0;
+ call_frame_t *frame = NULL;
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+
+ /* Need to take explicit ref so that the fd
+ * is not destroyed before the fop is complete
+ */
+ GF_REF_GET(glfd);
+
+ subvol = glfs_active_subvol(glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd(glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ frame = syncop_create_frame(THIS);
+ if (!frame) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ gio = GF_CALLOC(1, sizeof(*gio), glfs_mt_glfs_io_t);
+ if (!gio) {
+ errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ gio->op = GF_FOP_FSYNC;
+ gio->glfd = glfd;
+ gio->flags = dataonly;
+ gio->oldcb = oldcb;
+ gio->fn = fn;
+ gio->data = data;
+
+ frame->local = gio;
+
+ STACK_WIND_COOKIE(frame, glfs_fsync_async_cbk, subvol, subvol,
+ subvol->fops->fsync, fd, dataonly, NULL);
+
+out:
+ if (ret) {
+ if (fd)
+ fd_unref(fd);
+ GF_REF_PUT(glfd);
+ GF_FREE(gio);
+ if (frame)
+ STACK_DESTROY(frame->root);
+ glfs_subvol_done(glfd->fs, subvol);
+ }
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC(glfs_fsync_async34, glfs_fsync_async, 3.4.0)
+int
+pub_glfs_fsync_async34(struct glfs_fd *glfd, glfs_io_cbk34 fn, void *data)
+{
+ int ret = -1;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ ret = glfs_fsync_async_common(glfd, _gf_true, (void *)fn, data, 0);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fsync_async, 6.0)
+int
+pub_glfs_fsync_async(struct glfs_fd *glfd, glfs_io_cbk fn, void *data)
+{
+ int ret = -1;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ ret = glfs_fsync_async_common(glfd, _gf_false, fn, data, 0);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+static int
+glfs_fdatasync_common(struct glfs_fd *glfd, struct glfs_stat *prestat,
+ struct glfs_stat *poststat)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+ struct iatt preiatt =
+ {
+ 0,
+ },
+ postiatt = {
+ 0,
+ };
+ dict_t *fop_attr = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ GF_REF_GET(glfd);
+
+ subvol = glfs_active_subvol(glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd(glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = get_fop_attr_thrd_key(&fop_attr);
+ if (ret)
+ gf_msg_debug("gfapi", 0, "Getting leaseid from thread failed");
+
+ ret = syncop_fsync(subvol, fd, 1, &preiatt, &postiatt, fop_attr, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ if (ret >= 0) {
+ if (prestat)
+ glfs_iatt_to_statx(glfd->fs, &preiatt, prestat);
+ if (poststat)
+ glfs_iatt_to_statx(glfd->fs, &postiatt, poststat);
+ }
+out:
+ if (fd)
+ fd_unref(fd);
+ if (glfd)
+ GF_REF_PUT(glfd);
+ if (fop_attr)
+ dict_unref(fop_attr);
+
+ glfs_subvol_done(glfd->fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC(glfs_fdatasync34, glfs_fdatasync, 3.4.0)
+int
+pub_glfs_fdatasync34(struct glfs_fd *glfd)
+{
+ return glfs_fdatasync_common(glfd, NULL, NULL);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fdatasync, 6.0)
+int
+pub_glfs_fdatasync(struct glfs_fd *glfd, struct glfs_stat *prestat,
+ struct glfs_stat *poststat)
+{
+ return glfs_fdatasync_common(glfd, prestat, poststat);
+}
+
+GFAPI_SYMVER_PUBLIC(glfs_fdatasync_async34, glfs_fdatasync_async, 3.4.0)
+int
+pub_glfs_fdatasync_async34(struct glfs_fd *glfd, glfs_io_cbk34 fn, void *data)
+{
+ int ret = -1;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ ret = glfs_fsync_async_common(glfd, _gf_true, (void *)fn, data, 1);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fdatasync_async, 6.0)
+int
+pub_glfs_fdatasync_async(struct glfs_fd *glfd, glfs_io_cbk fn, void *data)
+{
+ int ret = -1;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ ret = glfs_fsync_async_common(glfd, _gf_false, fn, data, 1);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+static int
+glfs_ftruncate_common(struct glfs_fd *glfd, off_t offset,
+ struct glfs_stat *prestat, struct glfs_stat *poststat)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+ struct iatt preiatt =
+ {
+ 0,
+ },
+ postiatt = {
+ 0,
+ };
+ dict_t *fop_attr = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ GF_REF_GET(glfd);
+
+ subvol = glfs_active_subvol(glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd(glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = get_fop_attr_thrd_key(&fop_attr);
+ if (ret)
+ gf_msg_debug("gfapi", 0, "Getting leaseid from thread failed");
+
+ ret = syncop_ftruncate(subvol, fd, offset, &preiatt, &postiatt, fop_attr,
+ NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ if (ret >= 0) {
+ if (prestat)
+ glfs_iatt_to_statx(glfd->fs, &preiatt, prestat);
+ if (poststat)
+ glfs_iatt_to_statx(glfd->fs, &postiatt, poststat);
+ }
+out:
+ if (fd)
+ fd_unref(fd);
+ if (glfd)
+ GF_REF_PUT(glfd);
+ if (fop_attr)
+ dict_unref(fop_attr);
+
+ glfs_subvol_done(glfd->fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC(glfs_ftruncate34, glfs_ftruncate, 3.4.0)
+int
+pub_glfs_ftruncate34(struct glfs_fd *glfd, off_t offset)
+{
+ return glfs_ftruncate_common(glfd, offset, NULL, NULL);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_ftruncate, 6.0)
+int
+pub_glfs_ftruncate(struct glfs_fd *glfd, off_t offset,
+ struct glfs_stat *prestat, struct glfs_stat *poststat)
+{
+ return glfs_ftruncate_common(glfd, offset, prestat, poststat);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_truncate, 3.7.15)
+int
+pub_glfs_truncate(struct glfs *fs, const char *path, off_t length)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ int reval = 0;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ ret = glfs_resolve(fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ ret = syncop_truncate(subvol, &loc, length, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+out:
+ loc_wipe(&loc);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+static int
+glfs_ftruncate_async_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ glfs_io_async_cbk(op_ret, op_errno, frame, cookie, NULL, 0, prebuf,
+ postbuf);
+
+ return 0;
+}
+
+static int
+glfs_ftruncate_async_common(struct glfs_fd *glfd, off_t offset,
+ gf_boolean_t oldcb, glfs_io_cbk fn, void *data)
+{
+ struct glfs_io *gio = NULL;
+ int ret = -1;
+ call_frame_t *frame = NULL;
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+ dict_t *fop_attr = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ /* Need to take explicit ref so that the fd
+ * is not destroyed before the fop is complete
+ */
+ GF_REF_GET(glfd);
+
+ subvol = glfs_active_subvol(glfd->fs);
+ if (!subvol) {
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd(glfd->fs, subvol, glfd);
+ if (!fd) {
+ errno = EBADFD;
+ goto out;
+ }
+
+ frame = syncop_create_frame(THIS);
+ if (!frame) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ gio = GF_CALLOC(1, sizeof(*gio), glfs_mt_glfs_io_t);
+ if (!gio) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ gio->op = GF_FOP_FTRUNCATE;
+ gio->glfd = glfd;
+ gio->offset = offset;
+ gio->oldcb = oldcb;
+ gio->fn = fn;
+ gio->data = data;
+
+ frame->local = gio;
+
+ ret = get_fop_attr_thrd_key(&fop_attr);
+ if (ret)
+ gf_msg_debug("gfapi", 0, "Getting leaseid from thread failed");
+
+ STACK_WIND_COOKIE(frame, glfs_ftruncate_async_cbk, subvol, subvol,
+ subvol->fops->ftruncate, fd, offset, fop_attr);
+
+ ret = 0;
+
+out:
+ if (ret) {
+ if (fd)
+ fd_unref(fd);
+ if (glfd)
+ GF_REF_PUT(glfd);
+ GF_FREE(gio);
+ if (frame)
+ STACK_DESTROY(frame->root);
+ glfs_subvol_done(glfd->fs, subvol);
+ }
+ if (fop_attr)
+ dict_unref(fop_attr);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC(glfs_ftruncate_async34, glfs_ftruncate_async, 3.4.0)
+int
+pub_glfs_ftruncate_async34(struct glfs_fd *glfd, off_t offset, glfs_io_cbk34 fn,
+ void *data)
+{
+ return glfs_ftruncate_async_common(glfd, offset, _gf_true, (void *)fn,
+ data);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_ftruncate_async, 6.0)
+int
+pub_glfs_ftruncate_async(struct glfs_fd *glfd, off_t offset, glfs_io_cbk fn,
+ void *data)
+{
+ return glfs_ftruncate_async_common(glfd, offset, _gf_false, fn, data);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_access, 3.4.0)
+int
+pub_glfs_access(struct glfs *fs, const char *path, int mode)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ int reval = 0;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ ret = glfs_resolve(fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ ret = syncop_access(subvol, &loc, mode, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+out:
+ loc_wipe(&loc);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_symlink, 3.4.0)
+int
+pub_glfs_symlink(struct glfs *fs, const char *data, const char *path)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ uuid_t gfid;
+ dict_t *xattr_req = NULL;
+ int reval = 0;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ xattr_req = dict_new();
+ if (!xattr_req) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ gf_uuid_generate(gfid);
+ ret = dict_set_gfuuid(xattr_req, "gfid-req", gfid, true);
+ if (ret) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+retry:
+ ret = glfs_lresolve(fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (loc.inode) {
+ errno = EEXIST;
+ ret = -1;
+ goto out;
+ }
+
+ if (ret == -1 && errno != ENOENT)
+ /* Any other type of error is fatal */
+ goto out;
+
+ if (ret == -1 && errno == ENOENT && !loc.parent)
+ /* The parent directory or an ancestor even
+ higher does not exist
+ */
+ goto out;
+
+ /* ret == -1 && errno == ENOENT */
+ loc.inode = inode_new(loc.parent->table);
+ if (!loc.inode) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ ret = syncop_symlink(subvol, &loc, data, &iatt, xattr_req, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (ret == 0)
+ ret = glfs_loc_link(&loc, &iatt);
+out:
+ loc_wipe(&loc);
+
+ if (xattr_req)
+ dict_unref(xattr_req);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_readlink, 3.4.0)
+int
+pub_glfs_readlink(struct glfs *fs, const char *path, char *buf, size_t bufsiz)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ int reval = 0;
+ char *linkval = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ ret = glfs_lresolve(fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ if (iatt.ia_type != IA_IFLNK) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ ret = syncop_readlink(subvol, &loc, &linkval, bufsiz, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+ if (ret > 0) {
+ memcpy(buf, linkval, ret);
+ GF_FREE(linkval);
+ }
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+out:
+ loc_wipe(&loc);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_mknod, 3.4.0)
+int
+pub_glfs_mknod(struct glfs *fs, const char *path, mode_t mode, dev_t dev)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ uuid_t gfid;
+ dict_t *xattr_req = NULL;
+ int reval = 0;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ xattr_req = dict_new();
+ if (!xattr_req) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ gf_uuid_generate(gfid);
+ ret = dict_set_gfuuid(xattr_req, "gfid-req", gfid, true);
+ if (ret) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+retry:
+ ret = glfs_lresolve(fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (loc.inode) {
+ errno = EEXIST;
+ ret = -1;
+ goto out;
+ }
+
+ if (ret == -1 && errno != ENOENT)
+ /* Any other type of error is fatal */
+ goto out;
+
+ if (ret == -1 && errno == ENOENT && !loc.parent)
+ /* The parent directory or an ancestor even
+ higher does not exist
+ */
+ goto out;
+
+ /* ret == -1 && errno == ENOENT */
+ loc.inode = inode_new(loc.parent->table);
+ if (!loc.inode) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ ret = syncop_mknod(subvol, &loc, mode, dev, &iatt, xattr_req, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (ret == 0)
+ ret = glfs_loc_link(&loc, &iatt);
+out:
+ loc_wipe(&loc);
+
+ if (xattr_req)
+ dict_unref(xattr_req);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_mkdir, 3.4.0)
+int
+pub_glfs_mkdir(struct glfs *fs, const char *path, mode_t mode)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ uuid_t gfid;
+ dict_t *xattr_req = NULL;
+ int reval = 0;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ xattr_req = dict_new();
+ if (!xattr_req) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ gf_uuid_generate(gfid);
+ ret = dict_set_gfuuid(xattr_req, "gfid-req", gfid, true);
+ if (ret) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+retry:
+ ret = glfs_lresolve(fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (loc.inode) {
+ errno = EEXIST;
+ ret = -1;
+ goto out;
+ }
+
+ if (ret == -1 && errno != ENOENT)
+ /* Any other type of error is fatal */
+ goto out;
+
+ if (ret == -1 && errno == ENOENT && !loc.parent)
+ /* The parent directory or an ancestor even
+ higher does not exist
+ */
+ goto out;
+
+ /* ret == -1 && errno == ENOENT */
+ loc.inode = inode_new(loc.parent->table);
+ if (!loc.inode) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ ret = syncop_mkdir(subvol, &loc, mode, &iatt, xattr_req, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (ret == 0)
+ ret = glfs_loc_link(&loc, &iatt);
+out:
+ loc_wipe(&loc);
+
+ if (xattr_req)
+ dict_unref(xattr_req);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_unlink, 3.4.0)
+int
+pub_glfs_unlink(struct glfs *fs, const char *path)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ int reval = 0;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ ret = glfs_lresolve(fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ if (iatt.ia_type == IA_IFDIR) {
+ ret = -1;
+ errno = EISDIR;
+ goto out;
+ }
+
+ /* TODO: Add leaseid */
+ ret = syncop_unlink(subvol, &loc, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (ret == 0)
+ ret = glfs_loc_unlink(&loc);
+out:
+ loc_wipe(&loc);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_rmdir, 3.4.0)
+int
+pub_glfs_rmdir(struct glfs *fs, const char *path)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ int reval = 0;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ ret = glfs_lresolve(fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ if (iatt.ia_type != IA_IFDIR) {
+ ret = -1;
+ errno = ENOTDIR;
+ goto out;
+ }
+
+ ret = syncop_rmdir(subvol, &loc, 0, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (ret == 0)
+ ret = glfs_loc_unlink(&loc);
+out:
+ loc_wipe(&loc);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_rename, 3.4.0)
+int
+pub_glfs_rename(struct glfs *fs, const char *oldpath, const char *newpath)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t oldloc = {
+ 0,
+ };
+ loc_t newloc = {
+ 0,
+ };
+ struct iatt oldiatt = {
+ 0,
+ };
+ struct iatt newiatt = {
+ 0,
+ };
+ int reval = 0;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ ret = glfs_lresolve(fs, subvol, oldpath, &oldloc, &oldiatt, reval);
+
+ ESTALE_RETRY(ret, errno, reval, &oldloc, retry);
+
+ if (ret)
+ goto out;
+retrynew:
+ ret = glfs_lresolve(fs, subvol, newpath, &newloc, &newiatt, reval);
+
+ ESTALE_RETRY(ret, errno, reval, &newloc, retrynew);
+
+ if (ret && errno != ENOENT && newloc.parent)
+ goto out;
+
+ if (newiatt.ia_type != IA_INVAL) {
+ if ((oldiatt.ia_type == IA_IFDIR) != (newiatt.ia_type == IA_IFDIR)) {
+ /* Either both old and new must be dirs,
+ * or both must be non-dirs. Else, fail.
+ */
+ ret = -1;
+ errno = EISDIR;
+ goto out;
+ }
+ }
+
+ /* TODO: - check if new or old is a prefix of the other, and fail EINVAL
+ * - Add leaseid */
+
+ ret = syncop_rename(subvol, &oldloc, &newloc, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ if (ret == -1 && errno == ESTALE) {
+ if (reval < DEFAULT_REVAL_COUNT) {
+ reval++;
+ loc_wipe(&oldloc);
+ loc_wipe(&newloc);
+ goto retry;
+ }
+ }
+
+ if (ret == 0) {
+ inode_rename(oldloc.parent->table, oldloc.parent, oldloc.name,
+ newloc.parent, newloc.name, oldloc.inode, &oldiatt);
+
+ if (newloc.inode && !inode_has_dentry(newloc.inode))
+ inode_forget(newloc.inode, 0);
+ }
+out:
+ loc_wipe(&oldloc);
+ loc_wipe(&newloc);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_link, 3.4.0)
+int
+pub_glfs_link(struct glfs *fs, const char *oldpath, const char *newpath)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t oldloc = {
+ 0,
+ };
+ loc_t newloc = {
+ 0,
+ };
+ struct iatt oldiatt = {
+ 0,
+ };
+ struct iatt newiatt = {
+ 0,
+ };
+ int reval = 0;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ ret = glfs_lresolve(fs, subvol, oldpath, &oldloc, &oldiatt, reval);
+
+ ESTALE_RETRY(ret, errno, reval, &oldloc, retry);
+
+ if (ret)
+ goto out;
+retrynew:
+ ret = glfs_lresolve(fs, subvol, newpath, &newloc, &newiatt, reval);
+
+ ESTALE_RETRY(ret, errno, reval, &newloc, retrynew);
+
+ if (ret == 0) {
+ ret = -1;
+ errno = EEXIST;
+ goto out;
+ }
+
+ if (oldiatt.ia_type == IA_IFDIR) {
+ ret = -1;
+ errno = EISDIR;
+ goto out;
+ }
+
+ /* Filling the inode of the hard link to be same as that of the
+ original file
+ */
+ if (newloc.inode) {
+ inode_unref(newloc.inode);
+ newloc.inode = NULL;
+ }
+ newloc.inode = inode_ref(oldloc.inode);
+
+ ret = syncop_link(subvol, &oldloc, &newloc, &newiatt, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ if (ret == -1 && errno == ESTALE) {
+ loc_wipe(&oldloc);
+ loc_wipe(&newloc);
+ if (reval--)
+ goto retry;
+ }
+
+ if (ret == 0)
+ ret = glfs_loc_link(&newloc, &newiatt);
+out:
+ loc_wipe(&oldloc);
+ loc_wipe(&newloc);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_opendir, 3.4.0)
+struct glfs_fd *
+pub_glfs_opendir(struct glfs *fs, const char *path)
+{
+ int ret = -1;
+ struct glfs_fd *glfd = NULL;
+ xlator_t *subvol = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ int reval = 0;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ glfd = glfs_fd_new(fs);
+ if (!glfd)
+ goto out;
+
+ INIT_LIST_HEAD(&glfd->entries);
+retry:
+ ret = glfs_resolve(fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ if (!IA_ISDIR(iatt.ia_type)) {
+ ret = -1;
+ errno = ENOTDIR;
+ goto out;
+ }
+
+ if (glfd->fd) {
+ /* Retry. Safe to touch glfd->fd as we
+ still have not glfs_fd_bind() yet.
+ */
+ fd_unref(glfd->fd);
+ glfd->fd = NULL;
+ }
+
+ glfd->fd = fd_create(loc.inode, getpid());
+ if (!glfd->fd) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ ret = syncop_opendir(subvol, &loc, glfd->fd, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+out:
+ loc_wipe(&loc);
+
+ if (ret && glfd) {
+ GF_REF_PUT(glfd);
+ glfd = NULL;
+ } else if (glfd) {
+ glfd_set_state_bind(glfd);
+ }
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return glfd;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_closedir, 3.4.0)
+int
+pub_glfs_closedir(struct glfs_fd *glfd)
+{
+ int ret = -1;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ gf_dirent_free(list_entry(&glfd->entries, gf_dirent_t, list));
+
+ glfs_mark_glfd_for_deletion(glfd);
+
+ __GLFS_EXIT_FS;
+
+ ret = 0;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_telldir, 3.4.0)
+long
+pub_glfs_telldir(struct glfs_fd *fd)
+{
+ if (fd == NULL) {
+ errno = EBADF;
+ return -1;
+ }
+
+ return fd->offset;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_seekdir, 3.4.0)
+void
+pub_glfs_seekdir(struct glfs_fd *fd, long offset)
+{
+ gf_dirent_t *entry = NULL;
+ gf_dirent_t *tmp = NULL;
+
+ if (fd == NULL) {
+ errno = EBADF;
+ return;
+ }
+
+ if (fd->offset == offset)
+ return;
+
+ fd->offset = offset;
+ fd->next = NULL;
+
+ list_for_each_entry_safe(entry, tmp, &fd->entries, list)
+ {
+ if (entry->d_off != offset)
+ continue;
+
+ if (&tmp->list != &fd->entries) {
+ /* found! */
+ fd->next = tmp;
+ return;
+ }
+ }
+ /* could not find entry at requested offset in the cache.
+ next readdir_r() will result in glfd_entry_refresh()
+ */
+}
+
+static int
+glfs_discard_async_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preop_stbuf, struct iatt *postop_stbuf,
+ dict_t *xdata)
+{
+ glfs_io_async_cbk(op_ret, op_errno, frame, cookie, NULL, 0, preop_stbuf,
+ postop_stbuf);
+
+ return 0;
+}
+
+static int
+glfs_discard_async_common(struct glfs_fd *glfd, off_t offset, size_t len,
+ gf_boolean_t oldcb, glfs_io_cbk fn, void *data)
+{
+ struct glfs_io *gio = NULL;
+ int ret = -1;
+ call_frame_t *frame = NULL;
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+ dict_t *fop_attr = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ /* Need to take explicit ref so that the fd
+ * is not destroyed before the fop is complete
+ */
+ GF_REF_GET(glfd);
+
+ subvol = glfs_active_subvol(glfd->fs);
+ if (!subvol) {
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd(glfd->fs, subvol, glfd);
+ if (!fd) {
+ errno = EBADFD;
+ goto out;
+ }
+
+ frame = syncop_create_frame(THIS);
+ if (!frame) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ gio = GF_CALLOC(1, sizeof(*gio), glfs_mt_glfs_io_t);
+ if (!gio) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ gio->op = GF_FOP_DISCARD;
+ gio->glfd = glfd;
+ gio->offset = offset;
+ gio->count = len;
+ gio->oldcb = oldcb;
+ gio->fn = fn;
+ gio->data = data;
+
+ frame->local = gio;
+ ret = get_fop_attr_thrd_key(&fop_attr);
+ if (ret)
+ gf_msg_debug("gfapi", 0, "Getting leaseid from thread failed");
+
+ STACK_WIND_COOKIE(frame, glfs_discard_async_cbk, subvol, subvol,
+ subvol->fops->discard, fd, offset, len, fop_attr);
+
+ ret = 0;
+out:
+ if (ret) {
+ if (fd)
+ fd_unref(fd);
+ if (glfd)
+ GF_REF_PUT(glfd);
+ GF_FREE(gio);
+ if (frame)
+ STACK_DESTROY(frame->root);
+ glfs_subvol_done(glfd->fs, subvol);
+ }
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC(glfs_discard_async35, glfs_discard_async, 3.5.0)
+int
+pub_glfs_discard_async35(struct glfs_fd *glfd, off_t offset, size_t len,
+ glfs_io_cbk34 fn, void *data)
+{
+ return glfs_discard_async_common(glfd, offset, len, _gf_true, (void *)fn,
+ data);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_discard_async, 6.0)
+int
+pub_glfs_discard_async(struct glfs_fd *glfd, off_t offset, size_t len,
+ glfs_io_cbk fn, void *data)
+{
+ return glfs_discard_async_common(glfd, offset, len, _gf_false, fn, data);
+}
+
+static int
+glfs_zerofill_async_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preop_stbuf, struct iatt *postop_stbuf,
+ dict_t *xdata)
+{
+ glfs_io_async_cbk(op_ret, op_errno, frame, cookie, NULL, 0, preop_stbuf,
+ postop_stbuf);
+
+ return 0;
+}
+
+static int
+glfs_zerofill_async_common(struct glfs_fd *glfd, off_t offset, off_t len,
+ gf_boolean_t oldcb, glfs_io_cbk fn, void *data)
+{
+ struct glfs_io *gio = NULL;
+ int ret = -1;
+ call_frame_t *frame = NULL;
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+ dict_t *fop_attr = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ /* Need to take explicit ref so that the fd
+ * is not destroyed before the fop is complete
+ */
+ GF_REF_GET(glfd);
+
+ subvol = glfs_active_subvol(glfd->fs);
+ if (!subvol) {
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd(glfd->fs, subvol, glfd);
+ if (!fd) {
+ errno = EBADFD;
+ goto out;
+ }
+
+ frame = syncop_create_frame(THIS);
+ if (!frame) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ gio = GF_CALLOC(1, sizeof(*gio), glfs_mt_glfs_io_t);
+ if (!gio) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ gio->op = GF_FOP_ZEROFILL;
+ gio->glfd = glfd;
+ gio->offset = offset;
+ gio->count = len;
+ gio->oldcb = oldcb;
+ gio->fn = fn;
+ gio->data = data;
+
+ frame->local = gio;
+
+ ret = get_fop_attr_thrd_key(&fop_attr);
+ if (ret)
+ gf_msg_debug("gfapi", 0, "Getting leaseid from thread failed");
+
+ STACK_WIND_COOKIE(frame, glfs_zerofill_async_cbk, subvol, subvol,
+ subvol->fops->zerofill, fd, offset, len, fop_attr);
+ ret = 0;
+out:
+ if (ret) {
+ if (fd)
+ fd_unref(fd);
+ if (glfd)
+ GF_REF_PUT(glfd);
+ GF_FREE(gio);
+ if (frame)
+ STACK_DESTROY(frame->root);
+ glfs_subvol_done(glfd->fs, subvol);
+ }
+ if (fop_attr)
+ dict_unref(fop_attr);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC(glfs_zerofill_async35, glfs_zerofill_async, 3.5.0)
+int
+pub_glfs_zerofill_async35(struct glfs_fd *glfd, off_t offset, off_t len,
+ glfs_io_cbk34 fn, void *data)
+{
+ return glfs_zerofill_async_common(glfd, offset, len, _gf_true, (void *)fn,
+ data);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_zerofill_async, 6.0)
+int
+pub_glfs_zerofill_async(struct glfs_fd *glfd, off_t offset, off_t len,
+ glfs_io_cbk fn, void *data)
+{
+ return glfs_zerofill_async_common(glfd, offset, len, _gf_false, fn, data);
+}
+
+void
+gf_dirent_to_dirent(gf_dirent_t *gf_dirent, struct dirent *dirent)
+{
+ dirent->d_ino = gf_dirent->d_ino;
+
+#ifdef _DIRENT_HAVE_D_OFF
+ dirent->d_off = gf_dirent->d_off;
+#endif
+
+#ifdef _DIRENT_HAVE_D_TYPE
+ dirent->d_type = gf_dirent->d_type;
+#endif
+
+#ifdef _DIRENT_HAVE_D_NAMLEN
+ dirent->d_namlen = strlen(gf_dirent->d_name);
+#endif
+
+ snprintf(dirent->d_name, NAME_MAX + 1, "%s", gf_dirent->d_name);
+}
+
+int
+glfd_entry_refresh(struct glfs_fd *glfd, int plus)
+{
+ xlator_t *subvol = NULL;
+ gf_dirent_t entries;
+ gf_dirent_t old;
+ gf_dirent_t *entry = NULL;
+ int ret = -1;
+ fd_t *fd = NULL;
+
+ subvol = glfs_active_subvol(glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd(glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ if (fd->inode->ia_type != IA_IFDIR) {
+ ret = -1;
+ errno = EBADF;
+ goto out;
+ }
+
+ INIT_LIST_HEAD(&entries.list);
+ INIT_LIST_HEAD(&old.list);
+
+ if (plus)
+ ret = syncop_readdirp(subvol, fd, 131072, glfd->offset, &entries, NULL,
+ NULL);
+ else
+ ret = syncop_readdir(subvol, fd, 131072, glfd->offset, &entries, NULL,
+ NULL);
+ DECODE_SYNCOP_ERR(ret);
+ if (ret >= 0) {
+ if (plus) {
+ list_for_each_entry(entry, &entries.list, list)
+ {
+ if ((!entry->inode && (!IA_ISDIR(entry->d_stat.ia_type))) ||
+ ((entry->d_stat.ia_ctime == 0) &&
+ strcmp(entry->d_name, ".") &&
+ strcmp(entry->d_name, ".."))) {
+ /* entry->inode for directories will be
+ * always set to null to force a lookup
+ * on the dentry. Hence to not degrade
+ * readdir performance, we skip lookups
+ * for directory entries. Also we will have
+ * proper stat if directory present on
+ * hashed subvolume.
+ *
+ * In addition, if the stat is invalid, force
+ * lookup to fetch proper stat.
+ */
+ gf_fill_iatt_for_dirent(entry, fd->inode, subvol);
+ }
+ }
+
+ gf_link_inodes_from_dirent(THIS, fd->inode, &entries);
+ }
+
+ list_splice_init(&glfd->entries, &old.list);
+ list_splice_init(&entries.list, &glfd->entries);
+
+ /* spurious errno is dangerous for glfd_entry_next() */
+ errno = 0;
+ }
+
+ if (ret > 0)
+ glfd->next = list_entry(glfd->entries.next, gf_dirent_t, list);
+
+ gf_dirent_free(&old);
+out:
+ if (fd)
+ fd_unref(fd);
+
+ glfs_subvol_done(glfd->fs, subvol);
+
+ return ret;
+}
+
+gf_dirent_t *
+glfd_entry_next(struct glfs_fd *glfd, int plus)
+{
+ gf_dirent_t *entry = NULL;
+ int ret = -1;
+
+ if (!glfd->offset || !glfd->next) {
+ ret = glfd_entry_refresh(glfd, plus);
+ if (ret < 0)
+ return NULL;
+ }
+
+ entry = glfd->next;
+ if (!entry)
+ return NULL;
+
+ if (&entry->next->list == &glfd->entries)
+ glfd->next = NULL;
+ else
+ glfd->next = entry->next;
+
+ glfd->offset = entry->d_off;
+
+ return entry;
+}
+
+struct dirent *
+glfs_readdirbuf_get(struct glfs_fd *glfd)
+{
+ struct dirent *buf = NULL;
+
+ LOCK(&glfd->fd->lock);
+ {
+ buf = glfd->readdirbuf;
+ if (buf) {
+ memset(buf, 0, READDIRBUF_SIZE);
+ goto unlock;
+ }
+
+ buf = GF_CALLOC(1, READDIRBUF_SIZE, glfs_mt_readdirbuf_t);
+ if (!buf) {
+ errno = ENOMEM;
+ goto unlock;
+ }
+
+ glfd->readdirbuf = buf;
+ }
+unlock:
+ UNLOCK(&glfd->fd->lock);
+
+ return buf;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_readdirplus_r, 3.4.0)
+int
+pub_glfs_readdirplus_r(struct glfs_fd *glfd, struct stat *stat,
+ struct dirent *ext, struct dirent **res)
+{
+ int ret = 0;
+ gf_dirent_t *entry = NULL;
+ struct dirent *buf = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ GF_REF_GET(glfd);
+
+ errno = 0;
+
+ if (ext)
+ buf = ext;
+ else
+ buf = glfs_readdirbuf_get(glfd);
+
+ if (!buf) {
+ errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ entry = glfd_entry_next(glfd, !!stat);
+ if (errno)
+ ret = -1;
+
+ if (res) {
+ if (entry)
+ *res = buf;
+ else
+ *res = NULL;
+ }
+
+ if (entry) {
+ gf_dirent_to_dirent(entry, buf);
+ if (stat)
+ glfs_iatt_to_stat(glfd->fs, &entry->d_stat, stat);
+ }
+
+out:
+ if (glfd)
+ GF_REF_PUT(glfd);
+
+ __GLFS_EXIT_FS;
+
+ return ret;
+
+invalid_fs:
+ return -1;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_readdir_r, 3.4.0)
+int
+pub_glfs_readdir_r(struct glfs_fd *glfd, struct dirent *buf,
+ struct dirent **res)
+{
+ return pub_glfs_readdirplus_r(glfd, 0, buf, res);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_readdirplus, 3.5.0)
+struct dirent *
+pub_glfs_readdirplus(struct glfs_fd *glfd, struct stat *stat)
+{
+ struct dirent *res = NULL;
+ int ret = -1;
+
+ ret = pub_glfs_readdirplus_r(glfd, stat, NULL, &res);
+ if (ret)
+ return NULL;
+
+ return res;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_readdir, 3.5.0)
+struct dirent *
+pub_glfs_readdir(struct glfs_fd *glfd)
+{
+ return pub_glfs_readdirplus(glfd, NULL);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_statvfs, 3.4.0)
+int
+pub_glfs_statvfs(struct glfs *fs, const char *path, struct statvfs *buf)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ int reval = 0;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ ret = glfs_resolve(fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ ret = syncop_statfs(subvol, &loc, buf, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+out:
+ loc_wipe(&loc);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_setattr, 6.0)
+int
+pub_glfs_setattr(struct glfs *fs, const char *path, struct glfs_stat *stat,
+ int follow)
+{
+ int ret = -1;
+ int glvalid;
+ xlator_t *subvol = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt riatt = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ int reval = 0;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ GF_VALIDATE_OR_GOTO("glfs_setattr", stat, out);
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ if (follow)
+ ret = glfs_resolve(fs, subvol, path, &loc, &riatt, reval);
+ else
+ ret = glfs_lresolve(fs, subvol, path, &loc, &riatt, reval);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ glfs_iatt_from_statx(&iatt, stat);
+ glfsflags_from_gfapiflags(stat, &glvalid);
+
+ /* TODO : Add leaseid */
+ ret = syncop_setattr(subvol, &loc, &iatt, glvalid, 0, 0, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+out:
+ loc_wipe(&loc);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fsetattr, 6.0)
+int
+pub_glfs_fsetattr(struct glfs_fd *glfd, struct glfs_stat *stat)
+{
+ int ret = -1;
+ int glvalid;
+ struct iatt iatt = {
+ 0,
+ };
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ GF_REF_GET(glfd);
+
+ GF_VALIDATE_OR_GOTO("glfs_fsetattr", stat, out);
+
+ subvol = glfs_active_subvol(glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd(glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ glfs_iatt_from_statx(&iatt, stat);
+ glfsflags_from_gfapiflags(stat, &glvalid);
+
+ /* TODO : Add leaseid */
+ ret = syncop_fsetattr(subvol, fd, &iatt, glvalid, 0, 0, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+out:
+ if (fd)
+ fd_unref(fd);
+ if (glfd)
+ GF_REF_PUT(glfd);
+
+ glfs_subvol_done(glfd->fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_chmod, 3.4.0)
+int
+pub_glfs_chmod(struct glfs *fs, const char *path, mode_t mode)
+{
+ int ret = -1;
+ struct glfs_stat stat = {
+ 0,
+ };
+
+ stat.glfs_st_mode = mode;
+ stat.glfs_st_mask = GLFS_STAT_MODE;
+
+ ret = glfs_setattr(fs, path, &stat, 1);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fchmod, 3.4.0)
+int
+pub_glfs_fchmod(struct glfs_fd *glfd, mode_t mode)
+{
+ int ret = -1;
+ struct glfs_stat stat = {
+ 0,
+ };
+
+ stat.glfs_st_mode = mode;
+ stat.glfs_st_mask = GLFS_STAT_MODE;
+
+ ret = glfs_fsetattr(glfd, &stat);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_chown, 3.4.0)
+int
+pub_glfs_chown(struct glfs *fs, const char *path, uid_t uid, gid_t gid)
+{
+ int ret = 0;
+ struct glfs_stat stat = {
+ 0,
+ };
+
+ if (uid != (uid_t)-1) {
+ stat.glfs_st_uid = uid;
+ stat.glfs_st_mask = GLFS_STAT_UID;
+ }
+
+ if (gid != (uid_t)-1) {
+ stat.glfs_st_gid = gid;
+ stat.glfs_st_mask = stat.glfs_st_mask | GLFS_STAT_GID;
+ }
+
+ if (stat.glfs_st_mask)
+ ret = glfs_setattr(fs, path, &stat, 1);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lchown, 3.4.0)
+int
+pub_glfs_lchown(struct glfs *fs, const char *path, uid_t uid, gid_t gid)
+{
+ int ret = 0;
+ struct glfs_stat stat = {
+ 0,
+ };
+
+ if (uid != (uid_t)-1) {
+ stat.glfs_st_uid = uid;
+ stat.glfs_st_mask = GLFS_STAT_UID;
+ }
+
+ if (gid != (uid_t)-1) {
+ stat.glfs_st_gid = gid;
+ stat.glfs_st_mask = stat.glfs_st_mask | GLFS_STAT_GID;
+ }
+
+ if (stat.glfs_st_mask)
+ ret = glfs_setattr(fs, path, &stat, 0);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fchown, 3.4.0)
+int
+pub_glfs_fchown(struct glfs_fd *glfd, uid_t uid, gid_t gid)
+{
+ int ret = 0;
+ struct glfs_stat stat = {
+ 0,
+ };
+
+ if (uid != (uid_t)-1) {
+ stat.glfs_st_uid = uid;
+ stat.glfs_st_mask = GLFS_STAT_UID;
+ }
+
+ if (gid != (uid_t)-1) {
+ stat.glfs_st_gid = gid;
+ stat.glfs_st_mask = stat.glfs_st_mask | GLFS_STAT_GID;
+ }
+
+ if (stat.glfs_st_mask)
+ ret = glfs_fsetattr(glfd, &stat);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_utimens, 3.4.0)
+int
+pub_glfs_utimens(struct glfs *fs, const char *path,
+ const struct timespec times[2])
+{
+ int ret = -1;
+ struct glfs_stat stat = {
+ 0,
+ };
+
+ stat.glfs_st_atime = times[0];
+ stat.glfs_st_mtime = times[1];
+
+ stat.glfs_st_mask = GLFS_STAT_ATIME | GLFS_STAT_MTIME;
+
+ ret = glfs_setattr(fs, path, &stat, 1);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lutimens, 3.4.0)
+int
+pub_glfs_lutimens(struct glfs *fs, const char *path,
+ const struct timespec times[2])
+{
+ int ret = -1;
+ struct glfs_stat stat = {
+ 0,
+ };
+
+ stat.glfs_st_atime = times[0];
+ stat.glfs_st_mtime = times[1];
+
+ stat.glfs_st_mask = GLFS_STAT_ATIME | GLFS_STAT_MTIME;
+
+ ret = glfs_setattr(fs, path, &stat, 0);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_futimens, 3.4.0)
+int
+pub_glfs_futimens(struct glfs_fd *glfd, const struct timespec times[2])
+{
+ int ret = -1;
+ struct glfs_stat stat = {
+ 0,
+ };
+
+ stat.glfs_st_atime = times[0];
+ stat.glfs_st_mtime = times[1];
+
+ stat.glfs_st_mask = GLFS_STAT_ATIME | GLFS_STAT_MTIME;
+
+ ret = glfs_fsetattr(glfd, &stat);
+
+ return ret;
+}
+
+int
+glfs_getxattr_process(void *value, size_t size, dict_t *xattr, const char *name)
+{
+ data_t *data = NULL;
+ int ret = -1;
+
+ data = dict_get(xattr, (char *)name);
+ if (!data) {
+ errno = ENODATA;
+ ret = -1;
+ goto out;
+ }
+
+ ret = data->len;
+ if (!value || !size)
+ goto out;
+
+ if (size < ret) {
+ ret = -1;
+ errno = ERANGE;
+ goto out;
+ }
+
+ memcpy(value, data->data, ret);
+out:
+ return ret;
+}
+
+ssize_t
+glfs_getxattr_common(struct glfs *fs, const char *path, const char *name,
+ void *value, size_t size, int follow)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ dict_t *xattr = NULL;
+ int reval = 0;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ if (!name || *name == '\0') {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ if (strlen(name) > GF_XATTR_NAME_MAX) {
+ ret = -1;
+ errno = ENAMETOOLONG;
+ goto out;
+ }
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+retry:
+ if (follow)
+ ret = glfs_resolve(fs, subvol, path, &loc, &iatt, reval);
+ else
+ ret = glfs_lresolve(fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ ret = syncop_getxattr(subvol, &loc, &xattr, name, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ ret = glfs_getxattr_process(value, size, xattr, name);
+out:
+ loc_wipe(&loc);
+
+ if (xattr)
+ dict_unref(xattr);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_getxattr, 3.4.0)
+ssize_t
+pub_glfs_getxattr(struct glfs *fs, const char *path, const char *name,
+ void *value, size_t size)
+{
+ return glfs_getxattr_common(fs, path, name, value, size, 1);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lgetxattr, 3.4.0)
+ssize_t
+pub_glfs_lgetxattr(struct glfs *fs, const char *path, const char *name,
+ void *value, size_t size)
+{
+ return glfs_getxattr_common(fs, path, name, value, size, 0);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fgetxattr, 3.4.0)
+ssize_t
+pub_glfs_fgetxattr(struct glfs_fd *glfd, const char *name, void *value,
+ size_t size)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ dict_t *xattr = NULL;
+ fd_t *fd = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ GF_REF_GET(glfd);
+
+ if (!name || *name == '\0') {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ if (strlen(name) > GF_XATTR_NAME_MAX) {
+ ret = -1;
+ errno = ENAMETOOLONG;
+ goto out;
+ }
+
+ subvol = glfs_active_subvol(glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd(glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = syncop_fgetxattr(subvol, fd, &xattr, name, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+ if (ret)
+ goto out;
+
+ ret = glfs_getxattr_process(value, size, xattr, name);
+out:
+ if (fd)
+ fd_unref(fd);
+ if (glfd)
+ GF_REF_PUT(glfd);
+ if (xattr)
+ dict_unref(xattr);
+
+ glfs_subvol_done(glfd->fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+int
+glfs_listxattr_process(void *value, size_t size, dict_t *xattr)
+{
+ int ret = -1;
+
+ if (!xattr)
+ goto out;
+
+ ret = dict_keys_join(NULL, 0, xattr, NULL);
+
+ if (!value || !size)
+ goto out;
+
+ if (size < ret) {
+ ret = -1;
+ errno = ERANGE;
+ } else {
+ dict_keys_join(value, size, xattr, NULL);
+ }
+
+out:
+ return ret;
+}
+
+ssize_t
+glfs_listxattr_common(struct glfs *fs, const char *path, void *value,
+ size_t size, int follow)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ dict_t *xattr = NULL;
+ int reval = 0;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+retry:
+ if (follow)
+ ret = glfs_resolve(fs, subvol, path, &loc, &iatt, reval);
+ else
+ ret = glfs_lresolve(fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ ret = syncop_getxattr(subvol, &loc, &xattr, NULL, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ ret = glfs_listxattr_process(value, size, xattr);
+out:
+ loc_wipe(&loc);
+
+ if (xattr)
+ dict_unref(xattr);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_listxattr, 3.4.0)
+ssize_t
+pub_glfs_listxattr(struct glfs *fs, const char *path, void *value, size_t size)
+{
+ return glfs_listxattr_common(fs, path, value, size, 1);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_llistxattr, 3.4.0)
+ssize_t
+pub_glfs_llistxattr(struct glfs *fs, const char *path, void *value, size_t size)
+{
+ return glfs_listxattr_common(fs, path, value, size, 0);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_flistxattr, 3.4.0)
+ssize_t
+pub_glfs_flistxattr(struct glfs_fd *glfd, void *value, size_t size)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ dict_t *xattr = NULL;
+ fd_t *fd = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ GF_REF_GET(glfd);
+
+ subvol = glfs_active_subvol(glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd(glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = syncop_fgetxattr(subvol, fd, &xattr, NULL, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+ if (ret)
+ goto out;
+
+ ret = glfs_listxattr_process(value, size, xattr);
+out:
+ if (fd)
+ fd_unref(fd);
+ if (glfd)
+ GF_REF_PUT(glfd);
+ if (xattr)
+ dict_unref(xattr);
+
+ glfs_subvol_done(glfd->fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+int
+glfs_setxattr_common(struct glfs *fs, const char *path, const char *name,
+ const void *value, size_t size, int flags, int follow)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ dict_t *xattr = NULL;
+ int reval = 0;
+ void *value_cp = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ if (!name || *name == '\0') {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ if (strlen(name) > GF_XATTR_NAME_MAX) {
+ ret = -1;
+ errno = ENAMETOOLONG;
+ goto out;
+ }
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+retry:
+ if (follow)
+ ret = glfs_resolve(fs, subvol, path, &loc, &iatt, reval);
+ else
+ ret = glfs_lresolve(fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ value_cp = gf_memdup(value, size);
+ GF_CHECK_ALLOC_AND_LOG(subvol->name, value_cp, ret,
+ "Failed to"
+ " duplicate setxattr value",
+ out);
+
+ xattr = dict_for_key_value(name, value_cp, size, _gf_false);
+ if (!xattr) {
+ GF_FREE(value_cp);
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ ret = syncop_setxattr(subvol, &loc, xattr, flags, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+out:
+ loc_wipe(&loc);
+ if (xattr)
+ dict_unref(xattr);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_setxattr, 3.4.0)
+int
+pub_glfs_setxattr(struct glfs *fs, const char *path, const char *name,
+ const void *value, size_t size, int flags)
+{
+ return glfs_setxattr_common(fs, path, name, value, size, flags, 1);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lsetxattr, 3.4.0)
+int
+pub_glfs_lsetxattr(struct glfs *fs, const char *path, const char *name,
+ const void *value, size_t size, int flags)
+{
+ return glfs_setxattr_common(fs, path, name, value, size, flags, 0);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fsetxattr, 3.4.0)
+int
+pub_glfs_fsetxattr(struct glfs_fd *glfd, const char *name, const void *value,
+ size_t size, int flags)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ dict_t *xattr = NULL;
+ fd_t *fd = NULL;
+ void *value_cp = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ GF_REF_GET(glfd);
+
+ if (!name || *name == '\0') {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ if (strlen(name) > GF_XATTR_NAME_MAX) {
+ ret = -1;
+ errno = ENAMETOOLONG;
+ goto out;
+ }
+
+ subvol = glfs_active_subvol(glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd(glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ value_cp = gf_memdup(value, size);
+ GF_CHECK_ALLOC_AND_LOG(subvol->name, value_cp, ret,
+ "Failed to"
+ " duplicate setxattr value",
+ out);
+
+ xattr = dict_for_key_value(name, value_cp, size, _gf_false);
+ if (!xattr) {
+ GF_FREE(value_cp);
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ ret = syncop_fsetxattr(subvol, fd, xattr, flags, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+out:
+ if (xattr)
+ dict_unref(xattr);
+
+ if (fd)
+ fd_unref(fd);
+ if (glfd)
+ GF_REF_PUT(glfd);
+
+ glfs_subvol_done(glfd->fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+int
+glfs_removexattr_common(struct glfs *fs, const char *path, const char *name,
+ int follow)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ int reval = 0;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ if (follow)
+ ret = glfs_resolve(fs, subvol, path, &loc, &iatt, reval);
+ else
+ ret = glfs_lresolve(fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ ret = syncop_removexattr(subvol, &loc, name, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+out:
+ loc_wipe(&loc);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_removexattr, 3.4.0)
+int
+pub_glfs_removexattr(struct glfs *fs, const char *path, const char *name)
+{
+ return glfs_removexattr_common(fs, path, name, 1);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lremovexattr, 3.4.0)
+int
+pub_glfs_lremovexattr(struct glfs *fs, const char *path, const char *name)
+{
+ return glfs_removexattr_common(fs, path, name, 0);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fremovexattr, 3.4.0)
+int
+pub_glfs_fremovexattr(struct glfs_fd *glfd, const char *name)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ GF_REF_GET(glfd);
+
+ subvol = glfs_active_subvol(glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd(glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = syncop_fremovexattr(subvol, fd, name, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+out:
+ if (fd)
+ fd_unref(fd);
+ if (glfd)
+ GF_REF_PUT(glfd);
+
+ glfs_subvol_done(glfd->fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fallocate, 3.5.0)
+int
+pub_glfs_fallocate(struct glfs_fd *glfd, int keep_size, off_t offset,
+ size_t len)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+ dict_t *fop_attr = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ GF_REF_GET(glfd);
+
+ subvol = glfs_active_subvol(glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd(glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = get_fop_attr_thrd_key(&fop_attr);
+ if (ret)
+ gf_msg_debug("gfapi", 0, "Getting leaseid from thread failed");
+
+ ret = syncop_fallocate(subvol, fd, keep_size, offset, len, fop_attr, NULL);
+ DECODE_SYNCOP_ERR(ret);
+out:
+ if (fd)
+ fd_unref(fd);
+ if (glfd)
+ GF_REF_PUT(glfd);
+ if (fop_attr)
+ dict_unref(fop_attr);
+
+ glfs_subvol_done(glfd->fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_discard, 3.5.0)
+int
+pub_glfs_discard(struct glfs_fd *glfd, off_t offset, size_t len)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+ dict_t *fop_attr = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ GF_REF_GET(glfd);
+
+ subvol = glfs_active_subvol(glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd(glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = get_fop_attr_thrd_key(&fop_attr);
+ if (ret)
+ gf_msg_debug("gfapi", 0, "Getting leaseid from thread failed");
+
+ ret = syncop_discard(subvol, fd, offset, len, fop_attr, NULL);
+ DECODE_SYNCOP_ERR(ret);
+out:
+ if (fd)
+ fd_unref(fd);
+ if (glfd)
+ GF_REF_PUT(glfd);
+ if (fop_attr)
+ dict_unref(fop_attr);
+
+ glfs_subvol_done(glfd->fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_zerofill, 3.5.0)
+int
+pub_glfs_zerofill(struct glfs_fd *glfd, off_t offset, off_t len)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+ dict_t *fop_attr = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ GF_REF_GET(glfd);
+
+ subvol = glfs_active_subvol(glfd->fs);
+ if (!subvol) {
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd(glfd->fs, subvol, glfd);
+ if (!fd) {
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = get_fop_attr_thrd_key(&fop_attr);
+ if (ret)
+ gf_msg_debug("gfapi", 0, "Getting leaseid from thread failed");
+
+ ret = syncop_zerofill(subvol, fd, offset, len, fop_attr, NULL);
+ DECODE_SYNCOP_ERR(ret);
+out:
+ if (fd)
+ fd_unref(fd);
+ if (glfd)
+ GF_REF_PUT(glfd);
+ if (fop_attr)
+ dict_unref(fop_attr);
+
+ glfs_subvol_done(glfd->fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_chdir, 3.4.0)
+int
+pub_glfs_chdir(struct glfs *fs, const char *path)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ int reval = 0;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ ret = glfs_resolve(fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ if (!IA_ISDIR(iatt.ia_type)) {
+ ret = -1;
+ errno = ENOTDIR;
+ goto out;
+ }
+
+ glfs_cwd_set(fs, loc.inode);
+
+out:
+ loc_wipe(&loc);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fchdir, 3.4.0)
+int
+pub_glfs_fchdir(struct glfs_fd *glfd)
+{
+ int ret = -1;
+ inode_t *inode = NULL;
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ GF_REF_GET(glfd);
+
+ subvol = glfs_active_subvol(glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd(glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ inode = fd->inode;
+
+ if (!IA_ISDIR(inode->ia_type)) {
+ ret = -1;
+ errno = ENOTDIR;
+ goto out;
+ }
+
+ glfs_cwd_set(glfd->fs, inode);
+ ret = 0;
+out:
+ if (fd)
+ fd_unref(fd);
+ if (glfd)
+ GF_REF_PUT(glfd);
+
+ glfs_subvol_done(glfd->fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+static gf_boolean_t warn_realpath = _gf_true; /* log once */
+
+static char *
+glfs_realpath_common(struct glfs *fs, const char *path, char *resolved_path,
+ gf_boolean_t warn_deprecated)
+{
+ int ret = -1;
+ char *retpath = NULL;
+ char *allocpath = NULL;
+ xlator_t *subvol = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ int reval = 0;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ if (resolved_path)
+ retpath = resolved_path;
+ else if (warn_deprecated) {
+ retpath = allocpath = malloc(PATH_MAX + 1);
+ if (warn_realpath) {
+ warn_realpath = _gf_false;
+ gf_log(THIS->name, GF_LOG_WARNING,
+ "this application "
+ "is compiled against an old version of "
+ "libgfapi, it should use glfs_free() to "
+ "release the path returned by "
+ "glfs_realpath()");
+ }
+ } else {
+ retpath = allocpath = GLFS_CALLOC(1, PATH_MAX + 1, NULL,
+ glfs_mt_realpath_t);
+ }
+
+ if (!retpath) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ ret = glfs_resolve(fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ if (loc.path) {
+ snprintf(retpath, PATH_MAX + 1, "%s", loc.path);
+ }
+
+out:
+ loc_wipe(&loc);
+
+ if (ret == -1) {
+ if (warn_deprecated && allocpath)
+ free(allocpath);
+ else if (allocpath)
+ GLFS_FREE(allocpath);
+ retpath = NULL;
+ }
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return retpath;
+}
+
+GFAPI_SYMVER_PUBLIC(glfs_realpath34, glfs_realpath, 3.4.0)
+char *
+pub_glfs_realpath34(struct glfs *fs, const char *path, char *resolved_path)
+{
+ return glfs_realpath_common(fs, path, resolved_path, _gf_true);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_realpath, 3.7.17)
+char *
+pub_glfs_realpath(struct glfs *fs, const char *path, char *resolved_path)
+{
+ return glfs_realpath_common(fs, path, resolved_path, _gf_false);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_getcwd, 3.4.0)
+char *
+pub_glfs_getcwd(struct glfs *fs, char *buf, size_t n)
+{
+ int ret = -1;
+ inode_t *inode = NULL;
+ char *path = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ if (!buf || n < 2) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ inode = glfs_cwd_get(fs);
+
+ if (!inode) {
+ strncpy(buf, "/", n);
+ ret = 0;
+ goto out;
+ }
+
+ ret = inode_path(inode, 0, &path);
+ if (n <= ret) {
+ ret = -1;
+ errno = ERANGE;
+ goto out;
+ }
+
+ strncpy(buf, path, n);
+ ret = 0;
+out:
+ GF_FREE(path);
+
+ if (inode)
+ inode_unref(inode);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ if (ret < 0)
+ return NULL;
+
+ return buf;
+}
+
+static void
+gf_flock_to_flock(struct gf_flock *gf_flock, struct flock *flock)
+{
+ flock->l_type = gf_flock->l_type;
+ flock->l_whence = gf_flock->l_whence;
+ flock->l_start = gf_flock->l_start;
+ flock->l_len = gf_flock->l_len;
+ flock->l_pid = gf_flock->l_pid;
+}
+
+static void
+gf_flock_from_flock(struct gf_flock *gf_flock, struct flock *flock)
+{
+ gf_flock->l_type = flock->l_type;
+ gf_flock->l_whence = flock->l_whence;
+ gf_flock->l_start = flock->l_start;
+ gf_flock->l_len = flock->l_len;
+ gf_flock->l_pid = flock->l_pid;
+}
+
+static int
+glfs_lock_common(struct glfs_fd *glfd, int cmd, struct flock *flock,
+ dict_t *xdata)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ struct gf_flock gf_flock = {
+ 0,
+ };
+ struct gf_flock saved_flock = {
+ 0,
+ };
+ fd_t *fd = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ if (!flock) {
+ errno = EINVAL;
+ goto out;
+ }
+
+ GF_REF_GET(glfd);
+ subvol = glfs_active_subvol(glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd(glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ /* Generate glusterfs flock structure from client flock
+ * structure to be processed by server */
+ gf_flock_from_flock(&gf_flock, flock);
+
+ /* Keep another copy of flock for split/merge of locks
+ * at client side */
+ gf_flock_from_flock(&saved_flock, flock);
+
+ if (glfd->lk_owner.len != 0) {
+ ret = syncopctx_setfslkowner(&glfd->lk_owner);
+
+ if (ret)
+ goto out;
+ }
+
+ ret = get_fop_attr_thrd_key(&xdata);
+ if (ret)
+ gf_msg_debug("gfapi", 0, "Getting leaseid from thread failed");
+
+ ret = syncop_lk(subvol, fd, cmd, &gf_flock, xdata, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ /* Convert back from gf_flock to flock as expected by application */
+ gf_flock_to_flock(&gf_flock, flock);
+
+ if (ret == 0 && (cmd == F_SETLK || cmd == F_SETLKW)) {
+ ret = fd_lk_insert_and_merge(fd, cmd, &saved_flock);
+ if (ret) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, 0,
+ API_MSG_LOCK_INSERT_MERGE_FAILED, "gfid=%s",
+ uuid_utoa(fd->inode->gfid), NULL);
+ ret = 0;
+ }
+ }
+
+out:
+ if (fd)
+ fd_unref(fd);
+ if (glfd)
+ GF_REF_PUT(glfd);
+
+ glfs_subvol_done(glfd->fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_file_lock, 4.0.0)
+int
+pub_glfs_file_lock(struct glfs_fd *glfd, int cmd, struct flock *flock,
+ glfs_lock_mode_t lk_mode)
+{
+ int ret = -1;
+ dict_t *xdata_in = NULL;
+
+ if (lk_mode == GLFS_LK_MANDATORY) {
+ /* Create a new dictionary */
+ xdata_in = dict_new();
+ if (xdata_in == NULL) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ /* Set GF_LK_MANDATORY internally within dictionary to map
+ * GLFS_LK_MANDATORY */
+ ret = dict_set_uint32(xdata_in, GF_LOCK_MODE, GF_LK_MANDATORY);
+ if (ret) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, 0,
+ API_MSG_SETTING_LOCK_TYPE_FAILED, NULL);
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+ }
+
+ ret = glfs_lock_common(glfd, cmd, flock, xdata_in);
+out:
+ if (xdata_in)
+ dict_unref(xdata_in);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_posix_lock, 3.4.0)
+int
+pub_glfs_posix_lock(struct glfs_fd *glfd, int cmd, struct flock *flock)
+{
+ return glfs_lock_common(glfd, cmd, flock, NULL);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fd_set_lkowner, 3.10.7)
+int
+pub_glfs_fd_set_lkowner(struct glfs_fd *glfd, void *data, int len)
+{
+ int ret = -1;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ if (!GF_REF_GET(glfd)) {
+ goto invalid_fs;
+ }
+
+ GF_VALIDATE_OR_GOTO(THIS->name, data, out);
+
+ if ((len <= 0) || (len > GFAPI_MAX_LOCK_OWNER_LEN)) {
+ errno = EINVAL;
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, API_MSG_INVALID_ARG,
+ "lk_owner len=%d", len, NULL);
+ goto out;
+ }
+
+ glfd->lk_owner.len = len;
+
+ memcpy(glfd->lk_owner.data, data, len);
+
+ ret = 0;
+out:
+ if (glfd)
+ GF_REF_PUT(glfd);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_dup, 3.4.0)
+struct glfs_fd *
+pub_glfs_dup(struct glfs_fd *glfd)
+{
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+ struct glfs_fd *dupfd = NULL;
+ struct glfs *fs = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ GF_REF_GET(glfd);
+
+ fs = glfd->fs;
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd(fs, subvol, glfd);
+ if (!fd) {
+ errno = EBADFD;
+ goto out;
+ }
+
+ dupfd = glfs_fd_new(fs);
+ if (!dupfd) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ dupfd->fd = fd_ref(fd);
+ dupfd->state = glfd->state;
+out:
+ if (fd)
+ fd_unref(fd);
+ if (dupfd)
+ glfs_fd_bind(dupfd);
+ if (glfd)
+ GF_REF_PUT(glfd);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return dupfd;
+}
+
+static void
+glfs_enqueue_upcall_data(struct glfs *fs, struct gf_upcall *upcall_data)
+{
+ int ret = -1;
+ upcall_entry *u_list = NULL;
+
+ if (!fs || !upcall_data)
+ goto out;
+
+ u_list = GF_CALLOC(1, sizeof(*u_list), glfs_mt_upcall_entry_t);
+
+ if (!u_list) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, ENOMEM, API_MSG_ALLOC_FAILED, "entry",
+ NULL);
+ goto out;
+ }
+
+ INIT_LIST_HEAD(&u_list->upcall_list);
+
+ gf_uuid_copy(u_list->upcall_data.gfid, upcall_data->gfid);
+ u_list->upcall_data.event_type = upcall_data->event_type;
+
+ switch (upcall_data->event_type) {
+ case GF_UPCALL_CACHE_INVALIDATION:
+ ret = glfs_get_upcall_cache_invalidation(&u_list->upcall_data,
+ upcall_data);
+ break;
+ case GF_UPCALL_RECALL_LEASE:
+ ret = glfs_get_upcall_lease(&u_list->upcall_data, upcall_data);
+ break;
+ default:
+ break;
+ }
+
+ if (ret) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, API_MSG_INVALID_ENTRY, NULL);
+ goto out;
+ }
+
+ pthread_mutex_lock(&fs->upcall_list_mutex);
+ {
+ list_add_tail(&u_list->upcall_list, &fs->upcall_list);
+ }
+ pthread_mutex_unlock(&fs->upcall_list_mutex);
+
+ ret = 0;
+
+out:
+ if (ret && u_list) {
+ GF_FREE(u_list->upcall_data.data);
+ GF_FREE(u_list);
+ }
+}
+
+static void
+glfs_free_upcall_lease(void *to_free)
+{
+ struct glfs_upcall_lease *arg = to_free;
+
+ if (!arg)
+ return;
+
+ if (arg->object)
+ glfs_h_close(arg->object);
+
+ GF_FREE(arg);
+}
+
+int
+glfs_recall_lease_fd(struct glfs *fs, struct gf_upcall *up_data)
+{
+ struct gf_upcall_recall_lease *recall_lease = NULL;
+ xlator_t *subvol = NULL;
+ int ret = 0;
+ inode_t *inode = NULL;
+ struct glfs_fd *glfd = NULL;
+ struct glfs_fd *tmp = NULL;
+ struct list_head glfd_list;
+ fd_t *fd = NULL;
+ uint64_t value = 0;
+ struct glfs_lease lease = {
+ 0,
+ };
+
+ GF_VALIDATE_OR_GOTO("gfapi", up_data, out);
+ GF_VALIDATE_OR_GOTO("gfapi", fs, out);
+
+ recall_lease = up_data->data;
+ GF_VALIDATE_OR_GOTO("gfapi", recall_lease, out);
+
+ INIT_LIST_HEAD(&glfd_list);
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ gf_msg_debug(THIS->name, 0, "Recall lease received for gfid:%s",
+ uuid_utoa(up_data->gfid));
+
+ inode = inode_find(subvol->itable, up_data->gfid);
+ if (!inode) {
+ ret = -1;
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, API_MSG_INODE_FIND_FAILED,
+ "gfid=%s", uuid_utoa(up_data->gfid), "graph_id=%d",
+ subvol->graph->id, NULL);
+ goto out;
+ }
+
+ LOCK(&inode->lock);
+ {
+ list_for_each_entry(fd, &inode->fd_list, inode_list)
+ {
+ ret = fd_ctx_get(fd, subvol, &value);
+ glfd = (struct glfs_fd *)(uintptr_t)value;
+ if (glfd) {
+ gf_msg_trace(THIS->name, 0, "glfd (%p) has held lease", glfd);
+ GF_REF_GET(glfd);
+ list_add_tail(&glfd->list, &glfd_list);
+ }
+ }
+ }
+ UNLOCK(&inode->lock);
+
+ if (!list_empty(&glfd_list)) {
+ list_for_each_entry_safe(glfd, tmp, &glfd_list, list)
+ {
+ LOCK(&glfd->lock);
+ {
+ if (glfd->state != GLFD_CLOSE) {
+ gf_msg_trace(THIS->name, 0,
+ "glfd (%p) has held lease, "
+ "calling recall cbk",
+ glfd);
+ glfd->cbk(lease, glfd->cookie);
+ }
+ }
+ UNLOCK(&glfd->lock);
+
+ list_del_init(&glfd->list);
+ GF_REF_PUT(glfd);
+ }
+ }
+
+out:
+ return ret;
+}
+
+static int
+glfs_recall_lease_upcall(struct glfs *fs, struct glfs_upcall *up_arg,
+ struct gf_upcall *up_data)
+{
+ struct gf_upcall_recall_lease *recall_lease = NULL;
+ struct glfs_object *object = NULL;
+ xlator_t *subvol = NULL;
+ int ret = -1;
+ struct glfs_upcall_lease *up_lease_arg = NULL;
+
+ GF_VALIDATE_OR_GOTO("gfapi", up_data, out);
+ GF_VALIDATE_OR_GOTO("gfapi", fs, out);
+
+ recall_lease = up_data->data;
+ GF_VALIDATE_OR_GOTO("gfapi", recall_lease, out);
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ errno = EIO;
+ goto out;
+ }
+
+ gf_msg_debug(THIS->name, 0, "Recall lease received for gfid:%s",
+ uuid_utoa(up_data->gfid));
+
+ object = glfs_h_find_handle(fs, up_data->gfid, GFAPI_HANDLE_LENGTH);
+ if (!object) {
+ /* The reason handle creation will fail is because we
+ * couldn't find the inode in the gfapi inode table.
+ *
+ * But since application would have taken inode_ref, the
+ * only case when this can happen is when it has closed
+ * the handle and hence will no more be interested in
+ * the upcall for this particular gfid.
+ */
+ gf_smsg(THIS->name, GF_LOG_DEBUG, errno, API_MSG_CREATE_HANDLE_FAILED,
+ "gfid=%s", uuid_utoa(up_data->gfid), NULL);
+ errno = ESTALE;
+ goto out;
+ }
+
+ up_lease_arg = GF_CALLOC(1, sizeof(struct glfs_upcall_lease),
+ glfs_mt_upcall_inode_t);
+ up_lease_arg->object = object;
+
+ GF_VALIDATE_OR_GOTO("glfs_recall_lease", up_lease_arg, out);
+
+ up_lease_arg->lease_type = recall_lease->lease_type;
+
+ up_arg->reason = GLFS_UPCALL_RECALL_LEASE;
+ up_arg->event = up_lease_arg;
+ up_arg->free_event = glfs_free_upcall_lease;
+
+ ret = 0;
+
+out:
+ if (ret) {
+ /* Close p_object and oldp_object as well if being referenced.*/
+ if (object)
+ glfs_h_close(object);
+
+ /* Set reason to prevent applications from using ->event */
+ up_arg->reason = GF_UPCALL_EVENT_NULL;
+ }
+ return ret;
+}
+
+static int
+upcall_syncop_args_free(struct upcall_syncop_args *args)
+{
+ dict_t *dict = NULL;
+ struct gf_upcall *upcall_data = NULL;
+
+ if (args) {
+ upcall_data = &args->upcall_data;
+ switch (upcall_data->event_type) {
+ case GF_UPCALL_CACHE_INVALIDATION:
+ dict = ((struct gf_upcall_cache_invalidation *)(upcall_data
+ ->data))
+ ->dict;
+ break;
+ case GF_UPCALL_RECALL_LEASE:
+ dict = ((struct gf_upcall_recall_lease *)(upcall_data->data))
+ ->dict;
+ break;
+ }
+ if (dict)
+ dict_unref(dict);
+
+ GF_FREE(upcall_data->client_uid);
+ GF_FREE(upcall_data->data);
+ }
+ GF_FREE(args);
+ return 0;
+}
+
+static int
+glfs_upcall_syncop_cbk(int ret, call_frame_t *frame, void *opaque)
+{
+ struct upcall_syncop_args *args = opaque;
+
+ (void)upcall_syncop_args_free(args);
+
+ return 0;
+}
+
+static int
+glfs_cbk_upcall_syncop(void *opaque)
+{
+ struct upcall_syncop_args *args = opaque;
+ struct gf_upcall *upcall_data = NULL;
+ struct glfs_upcall *up_arg = NULL;
+ struct glfs *fs;
+ int ret = -1;
+
+ fs = args->fs;
+ upcall_data = &args->upcall_data;
+
+ if (!upcall_data) {
+ goto out;
+ }
+
+ up_arg = GLFS_CALLOC(1, sizeof(struct gf_upcall), glfs_release_upcall,
+ glfs_mt_upcall_entry_t);
+ if (!up_arg) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, ENOMEM, API_MSG_ALLOC_FAILED, "entry",
+ NULL);
+ goto out;
+ }
+
+ switch (upcall_data->event_type) {
+ case GF_UPCALL_CACHE_INVALIDATION:
+ ret = glfs_h_poll_cache_invalidation(fs, up_arg, upcall_data);
+ break;
+ case GF_UPCALL_RECALL_LEASE:
+ ret = glfs_recall_lease_upcall(fs, up_arg, upcall_data);
+ break;
+ default:
+ errno = EINVAL;
+ }
+
+ /* It could so happen that the file which got
+ * upcall notification may have got deleted by
+ * the same client. In such cases up_arg->reason
+ * is set to GLFS_UPCALL_EVENT_NULL. No need to
+ * send upcall then
+ */
+ if (up_arg->reason == GLFS_UPCALL_EVENT_NULL) {
+ gf_smsg(THIS->name, GF_LOG_DEBUG, errno,
+ API_MSG_UPCALL_EVENT_NULL_RECEIVED, NULL);
+ ret = 0;
+ GLFS_FREE(up_arg);
+ goto out;
+ } else if (ret) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, API_MSG_INVALID_ENTRY, NULL);
+ GLFS_FREE(up_arg);
+ goto out;
+ }
+
+ if (fs->up_cbk && up_arg)
+ (fs->up_cbk)(up_arg, fs->up_data);
+
+ /* application takes care of calling glfs_free on up_arg post
+ * their processing */
+
+out:
+ return ret;
+}
+
+static struct gf_upcall_cache_invalidation *
+gf_copy_cache_invalidation(struct gf_upcall_cache_invalidation *src)
+{
+ struct gf_upcall_cache_invalidation *dst = NULL;
+
+ if (!src)
+ goto out;
+
+ dst = GF_CALLOC(1, sizeof(struct gf_upcall_cache_invalidation),
+ glfs_mt_upcall_entry_t);
+
+ if (!dst) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, ENOMEM, API_MSG_ALLOC_FAILED, "entry",
+ NULL);
+ goto out;
+ }
+
+ dst->flags = src->flags;
+ dst->expire_time_attr = src->expire_time_attr;
+ dst->stat = src->stat;
+ dst->p_stat = src->p_stat;
+ dst->oldp_stat = src->oldp_stat;
+
+ if (src->dict)
+ dst->dict = dict_copy_with_ref(src->dict, NULL);
+
+ return dst;
+out:
+ return NULL;
+}
+
+static struct gf_upcall_recall_lease *
+gf_copy_recall_lease(struct gf_upcall_recall_lease *src)
+{
+ struct gf_upcall_recall_lease *dst = NULL;
+
+ if (!src)
+ goto out;
+
+ dst = GF_CALLOC(1, sizeof(struct gf_upcall_recall_lease),
+ glfs_mt_upcall_entry_t);
+
+ if (!dst) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, ENOMEM, API_MSG_ALLOC_FAILED, "entry",
+ NULL);
+ goto out;
+ }
+
+ dst->lease_type = src->lease_type;
+ memcpy(dst->tid, src->tid, 16);
+
+ if (src->dict)
+ dst->dict = dict_copy_with_ref(src->dict, NULL);
+
+ return dst;
+out:
+ return NULL;
+}
+
+static struct upcall_syncop_args *
+upcall_syncop_args_init(struct glfs *fs, struct gf_upcall *upcall_data)
+{
+ struct upcall_syncop_args *args = NULL;
+ int ret = -1;
+ struct gf_upcall *t_data = NULL;
+
+ if (!fs || !upcall_data)
+ goto out;
+
+ args = GF_CALLOC(1, sizeof(struct upcall_syncop_args),
+ glfs_mt_upcall_entry_t);
+ if (!args) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, ENOMEM, API_MSG_ALLOC_FAILED,
+ "syncop args", NULL);
+ goto out;
+ }
+
+ /* Note: we are not taking any ref on fs here.
+ * Ideally applications have to unregister for upcall events
+ * or stop polling for upcall events before performing
+ * glfs_fini. And as for outstanding synctasks created, we wait
+ * for all syncenv threads to finish tasks before cleaning up the
+ * fs->ctx. Hence it seems safe to process these callback
+ * notification without taking any lock/ref.
+ */
+ args->fs = fs;
+ t_data = &(args->upcall_data);
+ t_data->client_uid = gf_strdup(upcall_data->client_uid);
+
+ gf_uuid_copy(t_data->gfid, upcall_data->gfid);
+ t_data->event_type = upcall_data->event_type;
+
+ switch (t_data->event_type) {
+ case GF_UPCALL_CACHE_INVALIDATION:
+ t_data->data = gf_copy_cache_invalidation(
+ (struct gf_upcall_cache_invalidation *)upcall_data->data);
+ break;
+ case GF_UPCALL_RECALL_LEASE:
+ t_data->data = gf_copy_recall_lease(
+ (struct gf_upcall_recall_lease *)upcall_data->data);
+ break;
+ }
+
+ if (!t_data->data)
+ goto out;
+
+ return args;
+out:
+ if (ret) {
+ if (args) {
+ GF_FREE(args->upcall_data.client_uid);
+ GF_FREE(args);
+ }
+ }
+
+ return NULL;
+}
+
+static void
+glfs_cbk_upcall_data(struct glfs *fs, struct gf_upcall *upcall_data)
+{
+ struct upcall_syncop_args *args = NULL;
+ int ret = -1;
+
+ if (!fs || !upcall_data)
+ goto out;
+
+ if (!(fs->upcall_events & upcall_data->event_type)) {
+ /* ignore events which application hasn't registered*/
+ goto out;
+ }
+
+ args = upcall_syncop_args_init(fs, upcall_data);
+
+ if (!args)
+ goto out;
+
+ ret = synctask_new(THIS->ctx->env, glfs_cbk_upcall_syncop,
+ glfs_upcall_syncop_cbk, NULL, args);
+ /* should we retry incase of failure? */
+ if (ret) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, API_MSG_UPCALL_SYNCOP_FAILED,
+ "event_type=%d", upcall_data->event_type, "gfid=%s",
+ (char *)(upcall_data->gfid), NULL);
+ upcall_syncop_args_free(args);
+ }
+
+out:
+ return;
+}
+
+/*
+ * This routine is called in case of any notification received
+ * from the server. All the upcall events are queued up in a list
+ * to be read by the applications.
+ *
+ * In case if the application registers a cbk function, that shall
+ * be called by this routine in case of any event received.
+ * The cbk fn is responsible for notifying the
+ * applications the way it desires for each event queued (for eg.,
+ * can raise a signal or broadcast a cond variable etc.)
+ *
+ * Otherwise all the upcall events are queued up in a list
+ * to be read/polled by the applications.
+ */
+GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_process_upcall_event, 3.7.0)
+void
+priv_glfs_process_upcall_event(struct glfs *fs, void *data)
+{
+ glusterfs_ctx_t *ctx = NULL;
+ struct gf_upcall *upcall_data = NULL;
+
+ DECLARE_OLD_THIS;
+
+ gf_msg_debug(THIS->name, 0, "Upcall gfapi callback is called");
+
+ __GLFS_ENTRY_VALIDATE_FS(fs, err);
+
+ if (!data)
+ goto out;
+
+ /* Unlike in I/O path, "glfs_fini" would not have freed
+ * 'fs' by the time we take lock as it waits for all epoll
+ * threads to exit including this
+ */
+ pthread_mutex_lock(&fs->mutex);
+ {
+ ctx = fs->ctx;
+
+ /* if we're not interested in upcalls (anymore), skip them */
+ if (ctx->cleanup_started || !fs->cache_upcalls) {
+ pthread_mutex_unlock(&fs->mutex);
+ goto out;
+ }
+
+ fs->pin_refcnt++;
+ }
+ pthread_mutex_unlock(&fs->mutex);
+
+ upcall_data = (struct gf_upcall *)data;
+
+ gf_msg_trace(THIS->name, 0, "Upcall gfapi gfid = %s",
+ (char *)(upcall_data->gfid));
+
+ /* *
+ * TODO: RECALL LEASE for each glfd
+ *
+ * In case of RECALL_LEASE, we could associate separate
+ * cbk function for each glfd either by
+ * - extending pub_glfs_lease to accept new args (recall_cbk_fn, cookie)
+ * - or by defining new API "glfs_register_recall_cbk_fn (glfd,
+ * recall_cbk_fn, cookie) . In such cases, flag it and instead of calling
+ * below upcall functions, define a new one to go through the glfd list and
+ * invoke each of theirs recall_cbk_fn.
+ * */
+
+ if (fs->up_cbk) { /* upcall cbk registered */
+ (void)glfs_cbk_upcall_data(fs, upcall_data);
+ } else {
+ (void)glfs_enqueue_upcall_data(fs, upcall_data);
+ }
+
+ pthread_mutex_lock(&fs->mutex);
+ {
+ fs->pin_refcnt--;
+ }
+ pthread_mutex_unlock(&fs->mutex);
+
+out:
+ __GLFS_EXIT_FS;
+err:
+ return;
+}
+
+ssize_t
+glfs_anonymous_pwritev(struct glfs *fs, struct glfs_object *object,
+ const struct iovec *iovec, int iovcnt, off_t offset,
+ int flags)
+{
+ xlator_t *subvol = NULL;
+ struct iobref *iobref = NULL;
+ struct iobuf *iobuf = NULL;
+ struct iovec iov = {
+ 0,
+ };
+ inode_t *inode = NULL;
+ fd_t *fd = NULL;
+ int ret = -1;
+ size_t size = -1;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode(fs, subvol, object);
+ if (!inode) {
+ ret = -1;
+ errno = ESTALE;
+ goto out;
+ }
+
+ fd = fd_anonymous(inode);
+ if (!fd) {
+ ret = -1;
+ gf_smsg("gfapi", GF_LOG_ERROR, ENOMEM, API_MSG_FDCREATE_FAILED, NULL);
+ errno = ENOMEM;
+ goto out;
+ }
+
+ size = iov_length(iovec, iovcnt);
+
+ iobuf = iobuf_get2(subvol->ctx->iobuf_pool, size);
+ if (!iobuf) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ iobref = iobref_new();
+ if (!iobref) {
+ iobuf_unref(iobuf);
+ errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ ret = iobref_add(iobref, iobuf);
+ if (ret) {
+ iobuf_unref(iobuf);
+ iobref_unref(iobref);
+ errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ iov_unload(iobuf_ptr(iobuf), iovec, iovcnt);
+
+ iov.iov_base = iobuf_ptr(iobuf);
+ iov.iov_len = size;
+
+ /* TODO : set leaseid */
+ ret = syncop_writev(subvol, fd, &iov, 1, offset, iobref, flags, NULL, NULL,
+ NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ iobuf_unref(iobuf);
+ iobref_unref(iobref);
+
+ if (ret <= 0)
+ goto out;
+
+out:
+
+ if (fd)
+ fd_unref(fd);
+
+ if (inode)
+ inode_unref(inode);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+ssize_t
+glfs_anonymous_preadv(struct glfs *fs, struct glfs_object *object,
+ const struct iovec *iovec, int iovcnt, off_t offset,
+ int flags)
+{
+ xlator_t *subvol = NULL;
+ struct iovec *iov = NULL;
+ struct iobref *iobref = NULL;
+ inode_t *inode = NULL;
+ fd_t *fd = NULL;
+ int cnt = 0;
+ ssize_t ret = -1;
+ ssize_t size = -1;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode(fs, subvol, object);
+ if (!inode) {
+ ret = -1;
+ errno = ESTALE;
+ goto out;
+ }
+
+ fd = fd_anonymous(inode);
+ if (!fd) {
+ ret = -1;
+ gf_smsg("gfapi", GF_LOG_ERROR, ENOMEM, API_MSG_FDCREATE_FAILED, NULL);
+ errno = ENOMEM;
+ goto out;
+ }
+
+ size = iov_length(iovec, iovcnt);
+
+ /* TODO : set leaseid */
+ ret = syncop_readv(subvol, fd, size, offset, flags, &iov, &cnt, &iobref,
+ NULL, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+ if (ret <= 0)
+ goto out;
+
+ size = iov_copy(iovec, iovcnt, iov, cnt);
+
+ ret = size;
+out:
+ if (iov)
+ GF_FREE(iov);
+ if (iobref)
+ iobref_unref(iobref);
+ if (fd)
+ fd_unref(fd);
+
+ if (inode)
+ inode_unref(inode);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+static void
+glfs_release_xreaddirp_stat(void *ptr)
+{
+ struct glfs_xreaddirp_stat *to_free = ptr;
+
+ if (to_free->object)
+ glfs_h_close(to_free->object);
+}
+
+/*
+ * Given glfd of a directory, this function does readdirp and returns
+ * xstat along with dirents.
+ */
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_xreaddirplus_r, 3.11.0)
+int
+pub_glfs_xreaddirplus_r(struct glfs_fd *glfd, uint32_t flags,
+ struct glfs_xreaddirp_stat **xstat_p,
+ struct dirent *ext, struct dirent **res)
+{
+ int ret = -1;
+ gf_dirent_t *entry = NULL;
+ struct dirent *buf = NULL;
+ struct glfs_xreaddirp_stat *xstat = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ GF_REF_GET(glfd);
+
+ GF_VALIDATE_OR_GOTO(THIS->name, xstat_p, out);
+ GF_VALIDATE_OR_GOTO(THIS->name, res, out);
+
+ errno = 0;
+
+ if (ext)
+ buf = ext;
+ else
+ buf = glfs_readdirbuf_get(glfd);
+
+ if (!buf)
+ goto out;
+
+ xstat = GLFS_CALLOC(1, sizeof(struct glfs_xreaddirp_stat),
+ glfs_release_xreaddirp_stat, glfs_mt_xreaddirp_stat_t);
+
+ if (!xstat)
+ goto out;
+
+ /* this is readdirplus operation */
+ entry = glfd_entry_next(glfd, 1);
+
+ /* XXX: Ideally when we reach EOD, errno should have been
+ * set to ENOENT. But that doesn't seem to be the case.
+ *
+ * The only way to confirm if its EOD at this point is that
+ * errno == 0 and entry == NULL
+ */
+ if (errno)
+ goto out;
+
+ if (!entry) {
+ /* reached EOD, ret = 0 */
+ ret = 0;
+ *res = NULL;
+ *xstat_p = NULL;
+
+ /* free xstat as applications shall not be using it */
+ GLFS_FREE(xstat);
+
+ goto out;
+ }
+
+ *res = buf;
+ gf_dirent_to_dirent(entry, buf);
+
+ if (flags & GFAPI_XREADDIRP_STAT) {
+ glfs_iatt_to_stat(glfd->fs, &entry->d_stat, &xstat->st);
+ xstat->flags_handled |= GFAPI_XREADDIRP_STAT;
+ }
+
+ if ((flags & GFAPI_XREADDIRP_HANDLE) &&
+ /* skip . and .. */
+ strcmp(buf->d_name, ".") && strcmp(buf->d_name, "..")) {
+ /* Now create object.
+ * We can use "glfs_h_find_handle" as well as inodes would have
+ * already got linked as part of 'gf_link_inodes_from_dirent' */
+ xstat->object = glfs_h_create_from_handle(
+ glfd->fs, entry->d_stat.ia_gfid, GFAPI_HANDLE_LENGTH, NULL);
+
+ if (xstat->object) { /* success */
+ /* note: xstat->object->inode->ref is taken
+ * This shall be unref'ed when application does
+ * glfs_free(xstat) */
+ xstat->flags_handled |= GFAPI_XREADDIRP_HANDLE;
+ }
+ }
+
+ ret = xstat->flags_handled;
+ *xstat_p = xstat;
+
+ gf_msg_debug(THIS->name, 0,
+ "xreaddirp- requested_flags (%x) , processed_flags (%x)",
+ flags, xstat->flags_handled);
+
+out:
+ GF_REF_PUT(glfd);
+
+ if (ret < 0) {
+ gf_smsg(THIS->name, GF_LOG_WARNING, errno, API_MSG_XREADDIRP_R_FAILED,
+ "reason=%s", strerror(errno), NULL);
+
+ if (xstat)
+ GLFS_FREE(xstat);
+ }
+
+ __GLFS_EXIT_FS;
+
+ return ret;
+
+invalid_fs:
+ return -1;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_xreaddirplus_get_stat, 3.11.0)
+struct stat *
+pub_glfs_xreaddirplus_get_stat(struct glfs_xreaddirp_stat *xstat)
+{
+ GF_VALIDATE_OR_GOTO("glfs_xreaddirplus_get_stat", xstat, out);
+
+ if (!xstat->flags_handled & GFAPI_XREADDIRP_STAT)
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, API_MSG_FLAGS_HANDLE,
+ "GFAPI_XREADDIRP_STAT"
+ "xstat=%p",
+ xstat, "handles=%x", xstat->flags_handled, NULL);
+ return &xstat->st;
+
+out:
+ return NULL;
+}
+
+void
+gf_lease_to_glfs_lease(struct gf_lease *gf_lease, struct glfs_lease *lease)
+{
+ u_int lease_type = gf_lease->lease_type;
+ lease->cmd = gf_lease->cmd;
+ lease->lease_type = lease_type;
+ memcpy(lease->lease_id, gf_lease->lease_id, LEASE_ID_SIZE);
+}
+
+void
+glfs_lease_to_gf_lease(struct glfs_lease *lease, struct gf_lease *gf_lease)
+{
+ u_int lease_type = lease->lease_type;
+ gf_lease->cmd = lease->cmd;
+ gf_lease->lease_type = lease_type;
+ memcpy(gf_lease->lease_id, lease->lease_id, LEASE_ID_SIZE);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lease, 4.0.0)
+int
+pub_glfs_lease(struct glfs_fd *glfd, struct glfs_lease *lease,
+ glfs_recall_cbk fn, void *data)
+{
+ int ret = -1;
+ loc_t loc = {
+ 0,
+ };
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+ struct gf_lease gf_lease = {
+ 0,
+ };
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ GF_REF_GET(glfd);
+
+ if (!is_valid_lease_id(lease->lease_id)) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ subvol = glfs_active_subvol(glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd(glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ switch (lease->lease_type) {
+ case GLFS_RD_LEASE:
+ if ((fd->flags != O_RDONLY) && !(fd->flags & O_RDWR)) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+ break;
+ case GLFS_RW_LEASE:
+ if (!((fd->flags & O_WRONLY) || (fd->flags & O_RDWR))) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+ break;
+ default:
+ if (lease->cmd != GLFS_GET_LEASE) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+ break;
+ }
+
+ /* populate loc */
+ GLFS_LOC_FILL_INODE(fd->inode, loc, out);
+
+ glfs_lease_to_gf_lease(lease, &gf_lease);
+
+ ret = syncop_lease(subvol, &loc, &gf_lease, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ gf_lease_to_glfs_lease(&gf_lease, lease);
+
+ /* TODO: Add leases for client replay
+ if (ret == 0 && (cmd == F_SETLK || cmd == F_SETLKW))
+ fd_lk_insert_and_merge (fd, cmd, &saved_flock);
+ */
+ if (ret == 0) {
+ ret = fd_ctx_set(glfd->fd, subvol, (uint64_t)(long)glfd);
+ if (ret) {
+ gf_smsg(subvol->name, GF_LOG_ERROR, ENOMEM,
+ API_MSG_FDCTX_SET_FAILED, "fd=%p", glfd->fd, NULL);
+ goto out;
+ }
+ glfd->cbk = fn;
+ glfd->cookie = data;
+ }
+
+out:
+
+ if (glfd)
+ GF_REF_PUT(glfd);
+
+ if (subvol)
+ glfs_subvol_done(glfd->fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
diff --git a/api/src/glfs-handleops.c b/api/src/glfs-handleops.c
new file mode 100644
index 00000000000..53c2ee896f9
--- /dev/null
+++ b/api/src/glfs-handleops.c
@@ -0,0 +1,2655 @@
+/*
+ * Copyright (c) 2013-2018 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+#include "glfs-internal.h"
+#include "glfs-mem-types.h"
+#include <glusterfs/syncop.h>
+#include "glfs.h"
+#include "glfs-handles.h"
+#include "gfapi-messages.h"
+
+int
+glfs_listxattr_process(void *value, size_t size, dict_t *xattr);
+
+void
+glfs_iatt_from_stat(struct stat *stat, int valid, struct iatt *iatt,
+ int *glvalid)
+{
+ /* validate in args */
+ if ((stat == NULL) || (iatt == NULL) || (glvalid == NULL)) {
+ errno = EINVAL;
+ return;
+ }
+
+ *glvalid = 0;
+
+ if (valid & GFAPI_SET_ATTR_MODE) {
+ iatt->ia_prot = ia_prot_from_st_mode(stat->st_mode);
+ *glvalid |= GF_SET_ATTR_MODE;
+ }
+
+ if (valid & GFAPI_SET_ATTR_UID) {
+ iatt->ia_uid = stat->st_uid;
+ *glvalid |= GF_SET_ATTR_UID;
+ }
+
+ if (valid & GFAPI_SET_ATTR_GID) {
+ iatt->ia_gid = stat->st_gid;
+ *glvalid |= GF_SET_ATTR_GID;
+ }
+
+ if (valid & GFAPI_SET_ATTR_ATIME) {
+ iatt->ia_atime = stat->st_atime;
+ iatt->ia_atime_nsec = ST_ATIM_NSEC(stat);
+ *glvalid |= GF_SET_ATTR_ATIME;
+ }
+
+ if (valid & GFAPI_SET_ATTR_MTIME) {
+ iatt->ia_mtime = stat->st_mtime;
+ iatt->ia_mtime_nsec = ST_MTIM_NSEC(stat);
+ *glvalid |= GF_SET_ATTR_MTIME;
+ }
+
+ return;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_lookupat, 3.7.4)
+struct glfs_object *
+pub_glfs_h_lookupat(struct glfs *fs, struct glfs_object *parent,
+ const char *path, struct stat *stat, int follow)
+{
+ int ret = 0;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ struct iatt iatt = {
+ 0,
+ };
+ struct glfs_object *object = NULL;
+ loc_t loc = {
+ 0,
+ };
+
+ DECLARE_OLD_THIS;
+
+ /* validate in args */
+ if (path == NULL) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ if (parent) {
+ inode = glfs_resolve_inode(fs, subvol, parent);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+ }
+
+ /* fop/op */
+ ret = glfs_resolve_at(fs, subvol, inode, path, &loc, &iatt, follow, 0);
+
+ /* populate out args */
+ if (!ret) {
+ if (stat)
+ glfs_iatt_to_stat(fs, &iatt, stat);
+
+ ret = glfs_create_object(&loc, &object);
+ }
+
+out:
+ loc_wipe(&loc);
+
+ if (inode)
+ inode_unref(inode);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return object;
+}
+
+GFAPI_SYMVER_PUBLIC(glfs_h_lookupat34, glfs_h_lookupat, 3.4.2)
+struct glfs_object *
+pub_glfs_h_lookupat34(struct glfs *fs, struct glfs_object *parent,
+ const char *path, struct stat *stat)
+{
+ return pub_glfs_h_lookupat(fs, parent, path, stat, 0);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_statfs, 3.7.0)
+int
+pub_glfs_h_statfs(struct glfs *fs, struct glfs_object *object,
+ struct statvfs *statvfs)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {
+ 0,
+ };
+
+ DECLARE_OLD_THIS;
+
+ /* validate in args */
+ if ((fs == NULL) || (object == NULL || statvfs == NULL)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode(fs, subvol, object);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ /* populate loc */
+ GLFS_LOC_FILL_INODE(inode, loc, out);
+
+ /* fop/op */
+ ret = syncop_statfs(subvol, &loc, statvfs, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ loc_wipe(&loc);
+
+out:
+ if (inode)
+ inode_unref(inode);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_stat, 3.4.2)
+int
+pub_glfs_h_stat(struct glfs *fs, struct glfs_object *object, struct stat *stat)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+
+ DECLARE_OLD_THIS;
+
+ /* validate in args */
+ if ((fs == NULL) || (object == NULL)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode(fs, subvol, object);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ /* populate loc */
+ GLFS_LOC_FILL_INODE(inode, loc, out);
+
+ /* fop/op */
+ ret = syncop_stat(subvol, &loc, &iatt, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ /* populate out args */
+ if (!ret && stat) {
+ glfs_iatt_to_stat(fs, &iatt, stat);
+ }
+out:
+ loc_wipe(&loc);
+
+ if (inode)
+ inode_unref(inode);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_getattrs, 3.4.2)
+int
+pub_glfs_h_getattrs(struct glfs *fs, struct glfs_object *object,
+ struct stat *stat)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ struct iatt iatt = {
+ 0,
+ };
+
+ /* validate in args */
+ if ((fs == NULL) || (object == NULL)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode(fs, subvol, object);
+ if (!inode) {
+ ret = 0;
+ errno = ESTALE;
+ goto out;
+ }
+
+ /* fop/op */
+ ret = glfs_resolve_base(fs, subvol, inode, &iatt);
+
+ /* populate out args */
+ if (!ret && stat) {
+ glfs_iatt_to_stat(fs, &iatt, stat);
+ }
+
+out:
+ if (inode)
+ inode_unref(inode);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+int
+glfs_h_getxattrs_common(struct glfs *fs, struct glfs_object *object,
+ dict_t **xattr, const char *name,
+ gf_boolean_t is_listxattr)
+{
+ int ret = 0;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {
+ 0,
+ };
+
+ /* validate in args */
+ if ((fs == NULL) || (object == NULL)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ if (!is_listxattr) {
+ if (!name || *name == '\0') {
+ errno = EINVAL;
+ return -1;
+ }
+
+ if (strlen(name) > GF_XATTR_NAME_MAX) {
+ errno = ENAMETOOLONG;
+ return -1;
+ }
+ }
+ /* get the active volume */
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode(fs, subvol, object);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ /* populate loc */
+ GLFS_LOC_FILL_INODE(inode, loc, out);
+
+ ret = syncop_getxattr(subvol, &loc, xattr, name, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+out:
+ loc_wipe(&loc);
+
+ if (inode)
+ inode_unref(inode);
+
+ glfs_subvol_done(fs, subvol);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_getxattrs, 3.5.1)
+int
+pub_glfs_h_getxattrs(struct glfs *fs, struct glfs_object *object,
+ const char *name, void *value, size_t size)
+{
+ int ret = -1;
+ dict_t *xattr = NULL;
+
+ /* validate in args */
+ if ((fs == NULL) || (object == NULL)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ ret = glfs_h_getxattrs_common(fs, object, &xattr, name, (name == NULL));
+ if (ret)
+ goto out;
+
+ /* If @name is NULL, means get all the xattrs (i.e listxattr). */
+ if (name)
+ ret = glfs_getxattr_process(value, size, xattr, name);
+ else
+ ret = glfs_listxattr_process(value, size, xattr);
+
+out:
+ if (xattr)
+ dict_unref(xattr);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_setattrs, 3.4.2)
+int
+pub_glfs_h_setattrs(struct glfs *fs, struct glfs_object *object,
+ struct stat *stat, int valid)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ int glvalid = 0;
+
+ /* validate in args */
+ if ((fs == NULL) || (object == NULL) || (stat == NULL)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode(fs, subvol, object);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ /* map valid masks from in args */
+ glfs_iatt_from_stat(stat, valid, &iatt, &glvalid);
+
+ /* populate loc */
+ GLFS_LOC_FILL_INODE(inode, loc, out);
+
+ /* fop/op */
+ ret = syncop_setattr(subvol, &loc, &iatt, glvalid, 0, 0, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+out:
+ loc_wipe(&loc);
+
+ if (inode)
+ inode_unref(inode);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_setxattrs, 3.5.0)
+int
+pub_glfs_h_setxattrs(struct glfs *fs, struct glfs_object *object,
+ const char *name, const void *value, size_t size,
+ int flags)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {
+ 0,
+ };
+ dict_t *xattr = NULL;
+ void *value_cp = NULL;
+
+ /* validate in args */
+ if ((fs == NULL) || (object == NULL) || (name == NULL) || (value == NULL)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ if (!name || *name == '\0') {
+ errno = EINVAL;
+ return -1;
+ }
+
+ if (strlen(name) > GF_XATTR_NAME_MAX) {
+ errno = ENAMETOOLONG;
+ return -1;
+ }
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode(fs, subvol, object);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ value_cp = gf_memdup(value, size);
+ GF_CHECK_ALLOC_AND_LOG(subvol->name, value_cp, ret,
+ "Failed to"
+ " duplicate setxattr value",
+ out);
+
+ xattr = dict_for_key_value(name, value_cp, size, _gf_false);
+ if (!xattr) {
+ GF_FREE(value_cp);
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ /* populate loc */
+ GLFS_LOC_FILL_INODE(inode, loc, out);
+
+ /* fop/op */
+ ret = syncop_setxattr(subvol, &loc, xattr, flags, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+out:
+ loc_wipe(&loc);
+
+ if (inode)
+ inode_unref(inode);
+
+ if (xattr)
+ dict_unref(xattr);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_removexattrs, 3.5.1)
+int
+pub_glfs_h_removexattrs(struct glfs *fs, struct glfs_object *object,
+ const char *name)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {
+ 0,
+ };
+
+ /* validate in args */
+ if ((fs == NULL) || (object == NULL) || (name == NULL)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode(fs, subvol, object);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ /* populate loc */
+ GLFS_LOC_FILL_INODE(inode, loc, out);
+
+ /* fop/op */
+ ret = syncop_removexattr(subvol, &loc, name, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+out:
+ loc_wipe(&loc);
+
+ if (inode)
+ inode_unref(inode);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_open, 3.4.2)
+struct glfs_fd *
+pub_glfs_h_open(struct glfs *fs, struct glfs_object *object, int flags)
+{
+ int ret = -1;
+ struct glfs_fd *glfd = NULL;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {
+ 0,
+ };
+ dict_t *fop_attr = NULL;
+
+ /* validate in args */
+ if ((fs == NULL) || (object == NULL)) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode(fs, subvol, object);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ /* check types to open */
+ if (IA_ISDIR(inode->ia_type)) {
+ ret = -1;
+ errno = EISDIR;
+ goto out;
+ }
+
+ if (!IA_ISREG(inode->ia_type)) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ glfd = glfs_fd_new(fs);
+ if (!glfd) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ glfd->fd = fd_create(inode, getpid());
+ if (!glfd->fd) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+ glfd->fd->flags = flags;
+
+ /* populate loc */
+ GLFS_LOC_FILL_INODE(inode, loc, out);
+
+ /* fop/op */
+ ret = get_fop_attr_thrd_key(&fop_attr);
+ if (ret)
+ gf_msg_debug("gfapi", 0, "Getting leaseid from thread failed");
+
+ ret = syncop_open(subvol, &loc, flags, glfd->fd, fop_attr, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ glfd->fd->flags = flags;
+
+out:
+ loc_wipe(&loc);
+
+ if (inode)
+ inode_unref(inode);
+ if (fop_attr)
+ dict_unref(fop_attr);
+
+ if (ret && glfd) {
+ GF_REF_PUT(glfd);
+ glfd = NULL;
+ } else if (glfd) {
+ glfd_set_state_bind(glfd);
+ }
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return glfd;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_creat, 3.4.2)
+struct glfs_object *
+pub_glfs_h_creat(struct glfs *fs, struct glfs_object *parent, const char *path,
+ int flags, mode_t mode, struct stat *stat)
+{
+ int ret = -1;
+ fd_t *fd = NULL;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ uuid_t gfid;
+ dict_t *xattr_req = NULL;
+ struct glfs_object *object = NULL;
+
+ /* validate in args */
+ if ((fs == NULL) || (parent == NULL) || (path == NULL)) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode(fs, subvol, parent);
+ if (!inode) {
+ ret = -1;
+ errno = ESTALE;
+ goto out;
+ }
+
+ xattr_req = dict_new();
+ if (!xattr_req) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ gf_uuid_generate(gfid);
+ ret = dict_set_gfuuid(xattr_req, "gfid-req", gfid, true);
+ if (ret) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ GLFS_LOC_FILL_PINODE(inode, loc, ret, errno, out, path);
+
+ fd = fd_create(loc.inode, getpid());
+ if (!fd) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+ fd->flags = flags;
+
+ /* fop/op */
+ ret = syncop_create(subvol, &loc, flags, mode, fd, &iatt, xattr_req, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ /* populate out args */
+ if (ret == 0) {
+ ret = glfs_loc_link(&loc, &iatt);
+ if (ret != 0) {
+ goto out;
+ }
+
+ if (stat)
+ glfs_iatt_to_stat(fs, &iatt, stat);
+
+ ret = glfs_create_object(&loc, &object);
+ }
+
+out:
+ if (ret && object != NULL) {
+ /* Release the held reference */
+ glfs_h_close(object);
+ object = NULL;
+ }
+
+ loc_wipe(&loc);
+
+ if (inode)
+ inode_unref(inode);
+
+ if (xattr_req)
+ dict_unref(xattr_req);
+
+ if (fd)
+ fd_unref(fd);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return object;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_creat_open, 6.6)
+struct glfs_object *
+pub_glfs_h_creat_open(struct glfs *fs, struct glfs_object *parent,
+ const char *path, int flags, mode_t mode,
+ struct stat *stat, struct glfs_fd **out_fd)
+{
+ int ret = -1;
+ struct glfs_fd *glfd = NULL;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ uuid_t gfid;
+ dict_t *xattr_req = NULL;
+ struct glfs_object *object = NULL;
+ dict_t *fop_attr = NULL;
+
+ /* validate in args */
+ if ((fs == NULL) || (parent == NULL) || (path == NULL) ||
+ (out_fd == NULL)) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode(fs, subvol, parent);
+ if (!inode) {
+ ret = -1;
+ goto out;
+ }
+
+ xattr_req = dict_new();
+ if (!xattr_req) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ gf_uuid_generate(gfid);
+ ret = dict_set_gfuuid(xattr_req, "gfid-req", gfid, true);
+ if (ret) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ GLFS_LOC_FILL_PINODE(inode, loc, ret, errno, out, path);
+
+ glfd = glfs_fd_new(fs);
+ if (!glfd) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ glfd->fd = fd_create(loc.inode, getpid());
+ if (!glfd->fd) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+ glfd->fd->flags = flags;
+
+ ret = get_fop_attr_thrd_key(&fop_attr);
+ if (ret)
+ gf_msg_debug("gfapi", 0, "Getting leaseid from thread failed");
+
+ /* fop/op */
+ ret = syncop_create(subvol, &loc, flags, mode, glfd->fd, &iatt, xattr_req,
+ NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ /* populate out args */
+ if (ret == 0) {
+ glfd->fd->flags = flags;
+
+ ret = glfs_loc_link(&loc, &iatt);
+ if (ret != 0) {
+ goto out;
+ }
+
+ if (stat)
+ glfs_iatt_to_stat(fs, &iatt, stat);
+
+ ret = glfs_create_object(&loc, &object);
+ }
+
+out:
+ if (ret && object != NULL) {
+ /* Release the held reference */
+ glfs_h_close(object);
+ object = NULL;
+ }
+
+ loc_wipe(&loc);
+
+ if (inode)
+ inode_unref(inode);
+
+ if (fop_attr)
+ dict_unref(fop_attr);
+
+ if (xattr_req)
+ dict_unref(xattr_req);
+
+ if (ret && glfd) {
+ GF_REF_PUT(glfd);
+ } else if (glfd) {
+ glfd_set_state_bind(glfd);
+ *out_fd = glfd;
+ }
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return object;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_mkdir, 3.4.2)
+struct glfs_object *
+pub_glfs_h_mkdir(struct glfs *fs, struct glfs_object *parent, const char *path,
+ mode_t mode, struct stat *stat)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ uuid_t gfid;
+ dict_t *xattr_req = NULL;
+ struct glfs_object *object = NULL;
+
+ /* validate in args */
+ if ((fs == NULL) || (parent == NULL) || (path == NULL)) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode(fs, subvol, parent);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ xattr_req = dict_new();
+ if (!xattr_req) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ gf_uuid_generate(gfid);
+ ret = dict_set_gfuuid(xattr_req, "gfid-req", gfid, true);
+ if (ret) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ GLFS_LOC_FILL_PINODE(inode, loc, ret, errno, out, path);
+
+ /* fop/op */
+ ret = syncop_mkdir(subvol, &loc, mode, &iatt, xattr_req, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ /* populate out args */
+ if (ret == 0) {
+ ret = glfs_loc_link(&loc, &iatt);
+ if (ret != 0) {
+ goto out;
+ }
+
+ if (stat)
+ glfs_iatt_to_stat(fs, &iatt, stat);
+
+ ret = glfs_create_object(&loc, &object);
+ }
+
+out:
+ if (ret && object != NULL) {
+ glfs_h_close(object);
+ object = NULL;
+ }
+
+ loc_wipe(&loc);
+
+ if (inode)
+ inode_unref(inode);
+
+ if (xattr_req)
+ dict_unref(xattr_req);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return object;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_mknod, 3.4.2)
+struct glfs_object *
+pub_glfs_h_mknod(struct glfs *fs, struct glfs_object *parent, const char *path,
+ mode_t mode, dev_t dev, struct stat *stat)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ uuid_t gfid;
+ dict_t *xattr_req = NULL;
+ struct glfs_object *object = NULL;
+
+ /* validate in args */
+ if ((fs == NULL) || (parent == NULL) || (path == NULL)) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode(fs, subvol, parent);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ xattr_req = dict_new();
+ if (!xattr_req) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ gf_uuid_generate(gfid);
+ ret = dict_set_gfuuid(xattr_req, "gfid-req", gfid, true);
+ if (ret) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ GLFS_LOC_FILL_PINODE(inode, loc, ret, errno, out, path);
+
+ /* fop/op */
+ ret = syncop_mknod(subvol, &loc, mode, dev, &iatt, xattr_req, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ /* populate out args */
+ if (ret == 0) {
+ ret = glfs_loc_link(&loc, &iatt);
+ if (ret != 0) {
+ goto out;
+ }
+
+ if (stat)
+ glfs_iatt_to_stat(fs, &iatt, stat);
+
+ ret = glfs_create_object(&loc, &object);
+ }
+out:
+ if (ret && object != NULL) {
+ glfs_h_close(object);
+ object = NULL;
+ }
+
+ loc_wipe(&loc);
+
+ if (inode)
+ inode_unref(inode);
+
+ if (xattr_req)
+ dict_unref(xattr_req);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return object;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_unlink, 3.4.2)
+int
+pub_glfs_h_unlink(struct glfs *fs, struct glfs_object *parent, const char *path)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {
+ 0,
+ };
+
+ /* validate in args */
+ if ((fs == NULL) || (parent == NULL) || (path == NULL)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode(fs, subvol, parent);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ ret = glfs_resolve_at(fs, subvol, inode, path, &loc, NULL, 0, 0);
+ if (ret != 0) {
+ goto out;
+ }
+
+ if (!IA_ISDIR(loc.inode->ia_type)) {
+ ret = syncop_unlink(subvol, &loc, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+ if (ret != 0) {
+ goto out;
+ }
+ } else {
+ ret = syncop_rmdir(subvol, &loc, 0, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+ if (ret != 0) {
+ goto out;
+ }
+ }
+
+ if (ret == 0)
+ ret = glfs_loc_unlink(&loc);
+
+out:
+ loc_wipe(&loc);
+
+ if (inode)
+ inode_unref(inode);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_opendir, 3.4.2)
+struct glfs_fd *
+pub_glfs_h_opendir(struct glfs *fs, struct glfs_object *object)
+{
+ int ret = -1;
+ struct glfs_fd *glfd = NULL;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {
+ 0,
+ };
+
+ /* validate in args */
+ if ((fs == NULL) || (object == NULL)) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode(fs, subvol, object);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ if (!IA_ISDIR(inode->ia_type)) {
+ ret = -1;
+ errno = ENOTDIR;
+ goto out;
+ }
+
+ glfd = glfs_fd_new(fs);
+ if (!glfd)
+ goto out;
+
+ INIT_LIST_HEAD(&glfd->entries);
+
+ glfd->fd = fd_create(inode, getpid());
+ if (!glfd->fd) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ GLFS_LOC_FILL_INODE(inode, loc, out);
+
+ /* fop/op */
+ ret = syncop_opendir(subvol, &loc, glfd->fd, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+out:
+ loc_wipe(&loc);
+
+ if (inode)
+ inode_unref(inode);
+
+ if (ret && glfd) {
+ GF_REF_PUT(glfd);
+ glfd = NULL;
+ } else if (glfd) {
+ glfd_set_state_bind(glfd);
+ }
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return glfd;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_access, 3.6.0)
+int
+pub_glfs_h_access(struct glfs *fs, struct glfs_object *object, int mask)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {
+ 0,
+ };
+
+ DECLARE_OLD_THIS;
+
+ /* validate in args */
+ if ((fs == NULL) || (object == NULL)) {
+ errno = EINVAL;
+ return ret;
+ }
+
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode(fs, subvol, object);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ GLFS_LOC_FILL_INODE(inode, loc, out);
+
+ /* fop/op */
+
+ ret = syncop_access(subvol, &loc, mask, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+out:
+ loc_wipe(&loc);
+
+ if (inode)
+ inode_unref(inode);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_extract_handle, 3.4.2)
+ssize_t
+pub_glfs_h_extract_handle(struct glfs_object *object, unsigned char *handle,
+ int len)
+{
+ ssize_t ret = -1;
+
+ /* validate in args */
+ if (object == NULL) {
+ errno = EINVAL;
+ goto out;
+ }
+
+ if (!handle || !len) {
+ ret = GFAPI_HANDLE_LENGTH;
+ goto out;
+ }
+
+ if (len < GFAPI_HANDLE_LENGTH) {
+ errno = ERANGE;
+ goto out;
+ }
+
+ memcpy(handle, object->gfid, GFAPI_HANDLE_LENGTH);
+
+ ret = GFAPI_HANDLE_LENGTH;
+
+out:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_create_from_handle, 3.4.2)
+struct glfs_object *
+pub_glfs_h_create_from_handle(struct glfs *fs, unsigned char *handle, int len,
+ struct stat *stat)
+{
+ loc_t loc = {
+ 0,
+ };
+ int ret = -1;
+ struct iatt iatt = {
+ 0,
+ };
+ inode_t *newinode = NULL;
+ xlator_t *subvol = NULL;
+ struct glfs_object *object = NULL;
+ uint64_t ctx_value = LOOKUP_NOT_NEEDED;
+ gf_boolean_t lookup_needed = _gf_false;
+
+ /* validate in args */
+ if ((fs == NULL) || (handle == NULL) || (len != GFAPI_HANDLE_LENGTH)) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ errno = EIO;
+ goto out;
+ }
+
+ memcpy(loc.gfid, handle, GFAPI_HANDLE_LENGTH);
+
+ /* make sure the gfid received is valid */
+ GF_VALIDATE_OR_GOTO("glfs_h_create_from_handle",
+ !(gf_uuid_is_null(loc.gfid)), out);
+
+ newinode = inode_find(subvol->itable, loc.gfid);
+ if (newinode) {
+ if (!stat) /* No need of lookup */
+ goto found;
+
+ lookup_needed = inode_needs_lookup(newinode, THIS);
+ if (lookup_needed) {
+ loc.inode = newinode;
+ } else {
+ /* populate loc */
+ GLFS_LOC_FILL_INODE(newinode, loc, fill_out);
+
+ /* fop/op */
+ ret = syncop_stat(subvol, &loc, &iatt, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ if (ret) {
+ fill_out:
+ /* Drop the reference hold in inode_find */
+ inode_unref(newinode);
+ goto out;
+ }
+
+ glfs_iatt_to_stat(fs, &iatt, stat);
+ goto found;
+ }
+ } else {
+ loc.inode = inode_new(subvol->itable);
+ if (!loc.inode) {
+ errno = ENOMEM;
+ goto out;
+ }
+ }
+
+ ret = syncop_lookup(subvol, &loc, &iatt, 0, 0, 0);
+ DECODE_SYNCOP_ERR(ret);
+ if (ret) {
+ gf_smsg(subvol->name, GF_LOG_WARNING, errno,
+ API_MSG_INODE_REFRESH_FAILED, "gfid=%s", uuid_utoa(loc.gfid),
+ "error=%s", strerror(errno), NULL);
+ goto out;
+ }
+
+ newinode = inode_link(loc.inode, 0, 0, &iatt);
+ if (newinode) {
+ if (newinode == loc.inode) {
+ inode_ctx_set(newinode, THIS, &ctx_value);
+ }
+ inode_lookup(newinode);
+ } else {
+ gf_smsg(subvol->name, GF_LOG_WARNING, errno, API_MSG_INODE_LINK_FAILED,
+ "gfid=%s", uuid_utoa(loc.gfid), NULL);
+ goto out;
+ }
+
+ /* populate stat */
+ if (stat)
+ glfs_iatt_to_stat(fs, &iatt, stat);
+
+found:
+ object = GF_CALLOC(1, sizeof(struct glfs_object), glfs_mt_glfs_object_t);
+ if (object == NULL) {
+ errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ /* populate the return object */
+ object->inode = newinode;
+ gf_uuid_copy(object->gfid, object->inode->gfid);
+
+out:
+ /* TODO: Check where the inode ref is being held? */
+ loc_wipe(&loc);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return object;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_close, 3.4.2)
+int
+pub_glfs_h_close(struct glfs_object *object)
+{
+ /* since glfs_h_* objects hold a reference to inode
+ * it is safe to keep lookup count to '0' */
+ inode_forget(object->inode, 0);
+ inode_unref(object->inode);
+ GF_FREE(object);
+
+ return 0;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_truncate, 3.4.2)
+int
+pub_glfs_h_truncate(struct glfs *fs, struct glfs_object *object, off_t offset)
+{
+ loc_t loc = {
+ 0,
+ };
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+
+ DECLARE_OLD_THIS;
+
+ /* validate in args */
+ if (object == NULL) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode(fs, subvol, object);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ GLFS_LOC_FILL_INODE(inode, loc, out);
+
+ /* fop/op */
+ ret = syncop_truncate(subvol, &loc, (off_t)offset, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ /* populate out args */
+ if (ret == 0)
+ ret = glfs_loc_unlink(&loc);
+
+out:
+ loc_wipe(&loc);
+
+ if (inode)
+ inode_unref(inode);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_symlink, 3.4.2)
+struct glfs_object *
+pub_glfs_h_symlink(struct glfs *fs, struct glfs_object *parent,
+ const char *name, const char *data, struct stat *stat)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ uuid_t gfid;
+ dict_t *xattr_req = NULL;
+ struct glfs_object *object = NULL;
+
+ DECLARE_OLD_THIS;
+
+ /* validate in args */
+ if ((parent == NULL) || (name == NULL) || (data == NULL)) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode(fs, subvol, parent);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ xattr_req = dict_new();
+ if (!xattr_req) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ gf_uuid_generate(gfid);
+ ret = dict_set_gfuuid(xattr_req, "gfid-req", gfid, true);
+ if (ret) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ GLFS_LOC_FILL_PINODE(inode, loc, ret, errno, out, name);
+
+ /* fop/op */
+ ret = syncop_symlink(subvol, &loc, data, &iatt, xattr_req, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ /* populate out args */
+ if (ret == 0) {
+ ret = glfs_loc_link(&loc, &iatt);
+ if (ret != 0) {
+ goto out;
+ }
+
+ if (stat)
+ glfs_iatt_to_stat(fs, &iatt, stat);
+
+ ret = glfs_create_object(&loc, &object);
+ }
+
+out:
+ if (ret && object != NULL) {
+ pub_glfs_h_close(object);
+ object = NULL;
+ }
+
+ loc_wipe(&loc);
+
+ if (inode)
+ inode_unref(inode);
+
+ if (xattr_req)
+ dict_unref(xattr_req);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return object;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_readlink, 3.4.2)
+int
+pub_glfs_h_readlink(struct glfs *fs, struct glfs_object *object, char *buf,
+ size_t bufsiz)
+{
+ loc_t loc = {
+ 0,
+ };
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ char *linkval = NULL;
+
+ DECLARE_OLD_THIS;
+
+ /* validate in args */
+ if ((object == NULL) || (buf == NULL)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode(fs, subvol, object);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ GLFS_LOC_FILL_INODE(inode, loc, out);
+
+ /* fop/op */
+ ret = syncop_readlink(subvol, &loc, &linkval, bufsiz, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ /* populate out args */
+ if (ret > 0)
+ memcpy(buf, linkval, ret);
+
+out:
+ loc_wipe(&loc);
+
+ if (inode)
+ inode_unref(inode);
+
+ if (linkval)
+ GF_FREE(linkval);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_link, 3.4.2)
+int
+pub_glfs_h_link(struct glfs *fs, struct glfs_object *linksrc,
+ struct glfs_object *parent, const char *name)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ inode_t *pinode = NULL;
+ loc_t oldloc = {
+ 0,
+ };
+ loc_t newloc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+
+ DECLARE_OLD_THIS;
+
+ /* validate in args */
+ if ((linksrc == NULL) || (parent == NULL) || (name == NULL)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode(fs, subvol, linksrc);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ if (inode->ia_type == IA_IFDIR) {
+ ret = -1;
+ errno = EISDIR;
+ goto out;
+ }
+
+ GLFS_LOC_FILL_INODE(inode, oldloc, out);
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ pinode = glfs_resolve_inode(fs, subvol, parent);
+ if (!pinode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ /* setup newloc based on parent */
+ newloc.parent = inode_ref(pinode);
+ newloc.name = name;
+ ret = glfs_loc_touchup(&newloc);
+ if (ret != 0) {
+ errno = EINVAL;
+ goto out;
+ }
+
+ /* Filling the inode of the hard link to be same as that of the
+ * original file
+ */
+ newloc.inode = inode_ref(inode);
+
+ /* fop/op */
+ ret = syncop_link(subvol, &oldloc, &newloc, &iatt, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ if (ret == 0)
+ ret = glfs_loc_link(&newloc, &iatt);
+out:
+ loc_wipe(&oldloc);
+ loc_wipe(&newloc);
+
+ if (inode)
+ inode_unref(inode);
+
+ if (pinode)
+ inode_unref(pinode);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_rename, 3.4.2)
+int
+pub_glfs_h_rename(struct glfs *fs, struct glfs_object *olddir,
+ const char *oldname, struct glfs_object *newdir,
+ const char *newname)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *oldpinode = NULL;
+ inode_t *newpinode = NULL;
+ loc_t oldloc = {
+ 0,
+ };
+ loc_t newloc = {
+ 0,
+ };
+ struct iatt oldiatt = {
+ 0,
+ };
+ struct iatt newiatt = {
+ 0,
+ };
+
+ DECLARE_OLD_THIS;
+
+ /* validate in args */
+ if ((olddir == NULL) || (oldname == NULL) || (newdir == NULL) ||
+ (newname == NULL)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ oldpinode = glfs_resolve_inode(fs, subvol, olddir);
+ if (!oldpinode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ ret = glfs_resolve_at(fs, subvol, oldpinode, oldname, &oldloc, &oldiatt, 0,
+ 0);
+ if (ret != 0) {
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ newpinode = glfs_resolve_inode(fs, subvol, newdir);
+ if (!newpinode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ ret = glfs_resolve_at(fs, subvol, newpinode, newname, &newloc, &newiatt, 0,
+ 0);
+
+ if (ret && errno != ENOENT && newloc.parent)
+ goto out;
+
+ if (newiatt.ia_type != IA_INVAL) {
+ if ((oldiatt.ia_type == IA_IFDIR) != (newiatt.ia_type == IA_IFDIR)) {
+ /* Either both old and new must be dirs,
+ * or both must be non-dirs. Else, fail.
+ */
+ ret = -1;
+ errno = EEXIST;
+ goto out;
+ }
+ }
+
+ /* TODO: check if new or old is a prefix of the other, and fail EINVAL */
+
+ ret = syncop_rename(subvol, &oldloc, &newloc, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ if (ret == 0) {
+ inode_rename(oldloc.parent->table, oldloc.parent, oldloc.name,
+ newloc.parent, newloc.name, oldloc.inode, &oldiatt);
+
+ if (newloc.inode && !inode_has_dentry(newloc.inode))
+ inode_forget(newloc.inode, 0);
+ }
+
+out:
+ loc_wipe(&oldloc);
+ loc_wipe(&newloc);
+
+ if (oldpinode)
+ inode_unref(oldpinode);
+
+ if (newpinode)
+ inode_unref(newpinode);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+/*
+ * Given a handle/gfid, find if the corresponding inode is present in
+ * the inode table. If yes create and return the corresponding glfs_object.
+ */
+struct glfs_object *
+glfs_h_find_handle(struct glfs *fs, unsigned char *handle, int len)
+{
+ inode_t *newinode = NULL;
+ xlator_t *subvol = NULL;
+ struct glfs_object *object = NULL;
+ uuid_t gfid;
+
+ /* validate in args */
+ if ((fs == NULL) || (handle == NULL) || (len != GFAPI_HANDLE_LENGTH)) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ errno = EIO;
+ goto out;
+ }
+
+ memcpy(gfid, handle, GFAPI_HANDLE_LENGTH);
+
+ /* make sure the gfid received is valid */
+ GF_VALIDATE_OR_GOTO("glfs_h_find_handle", !(gf_uuid_is_null(gfid)), out);
+
+ newinode = inode_find(subvol->itable, gfid);
+ if (!newinode) {
+ goto out;
+ }
+
+ object = GF_CALLOC(1, sizeof(struct glfs_object), glfs_mt_glfs_object_t);
+ if (object == NULL) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ /* populate the return object. The ref taken here
+ * is un'refed when the application does glfs_h_close() */
+ object->inode = inode_ref(newinode);
+ gf_uuid_copy(object->gfid, object->inode->gfid);
+
+out:
+ /* inode_find takes a reference. Unref it. */
+ if (newinode)
+ inode_unref(newinode);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return object;
+}
+
+static void
+glfs_free_upcall_inode(void *to_free)
+{
+ struct glfs_upcall_inode *arg = to_free;
+
+ if (!arg)
+ return;
+
+ if (arg->object)
+ glfs_h_close(arg->object);
+ if (arg->p_object)
+ glfs_h_close(arg->p_object);
+ if (arg->oldp_object)
+ glfs_h_close(arg->oldp_object);
+
+ GF_FREE(arg);
+}
+
+int
+glfs_h_poll_cache_invalidation(struct glfs *fs, struct glfs_upcall *up_arg,
+ struct gf_upcall *upcall_data)
+{
+ int ret = -1;
+ struct glfs_object *p_object = NULL;
+ struct glfs_object *oldp_object = NULL;
+ struct glfs_object *object = NULL;
+ struct gf_upcall_cache_invalidation *ca_data = NULL;
+ struct glfs_upcall_inode *up_inode_arg = NULL;
+
+ ca_data = upcall_data->data;
+ GF_VALIDATE_OR_GOTO("glfs_h_poll_cache_invalidation", ca_data, out);
+
+ object = glfs_h_find_handle(fs, upcall_data->gfid, GFAPI_HANDLE_LENGTH);
+ if (!object) {
+ /* The reason handle creation will fail is because we
+ * couldn't find the inode in the gfapi inode table.
+ *
+ * But since application would have taken inode_ref, the
+ * only case when this can happen is when it has closed
+ * the handle and hence will no more be interested in
+ * the upcall for this particular gfid.
+ */
+ gf_smsg(THIS->name, GF_LOG_DEBUG, errno, API_MSG_CREATE_HANDLE_FAILED,
+ "gfid=%s", uuid_utoa(upcall_data->gfid), NULL);
+ errno = ESTALE;
+ goto out;
+ }
+
+ up_inode_arg = GF_CALLOC(1, sizeof(struct glfs_upcall_inode),
+ glfs_mt_upcall_inode_t);
+ GF_VALIDATE_OR_GOTO("glfs_h_poll_cache_invalidation", up_inode_arg, out);
+
+ up_inode_arg->object = object;
+ up_inode_arg->flags = ca_data->flags;
+ up_inode_arg->expire_time_attr = ca_data->expire_time_attr;
+
+ /* XXX: Update stat as well in case of UP_*_TIMES.
+ * This will be addressed as part of INODE_UPDATE */
+ if (ca_data->flags & GFAPI_INODE_UPDATE_FLAGS) {
+ glfs_iatt_to_stat(fs, &ca_data->stat, &up_inode_arg->buf);
+ }
+
+ if (ca_data->flags & GFAPI_UP_PARENT_TIMES) {
+ p_object = glfs_h_find_handle(fs, ca_data->p_stat.ia_gfid,
+ GFAPI_HANDLE_LENGTH);
+ if (!p_object) {
+ gf_smsg(THIS->name, GF_LOG_DEBUG, errno,
+ API_MSG_CREATE_HANDLE_FAILED, "gfid=%s",
+ uuid_utoa(ca_data->p_stat.ia_gfid), NULL);
+ errno = ESTALE;
+ goto out;
+ }
+
+ glfs_iatt_to_stat(fs, &ca_data->p_stat, &up_inode_arg->p_buf);
+ }
+ up_inode_arg->p_object = p_object;
+
+ /* In case of RENAME, update old parent as well */
+ if (ca_data->flags & GFAPI_UP_RENAME) {
+ oldp_object = glfs_h_find_handle(fs, ca_data->oldp_stat.ia_gfid,
+ GFAPI_HANDLE_LENGTH);
+ if (!oldp_object) {
+ gf_smsg(THIS->name, GF_LOG_DEBUG, errno,
+ API_MSG_CREATE_HANDLE_FAILED, "gfid=%s",
+ uuid_utoa(ca_data->oldp_stat.ia_gfid), NULL);
+ errno = ESTALE;
+ /* By the time we receive upcall old parent_dir may
+ * have got removed. We still need to send upcall
+ * for the file/dir and current parent handles. */
+ up_inode_arg->oldp_object = NULL;
+ ret = 0;
+ }
+
+ glfs_iatt_to_stat(fs, &ca_data->oldp_stat, &up_inode_arg->oldp_buf);
+ }
+ up_inode_arg->oldp_object = oldp_object;
+
+ up_arg->reason = GLFS_UPCALL_INODE_INVALIDATE;
+ up_arg->event = up_inode_arg;
+ up_arg->free_event = glfs_free_upcall_inode;
+
+ ret = 0;
+
+out:
+ if (ret) {
+ /* Close p_object and oldp_object as well if being referenced.*/
+ if (object)
+ glfs_h_close(object);
+
+ /* Set reason to prevent applications from using ->event */
+ up_arg->reason = GLFS_UPCALL_EVENT_NULL;
+ GF_FREE(up_inode_arg);
+ }
+ return ret;
+}
+
+void
+glfs_release_upcall(void *ptr)
+{
+ struct glfs_upcall *to_free = ptr;
+
+ if (to_free->event)
+ to_free->free_event(to_free->event);
+}
+
+/*
+ * This API is used to poll for upcall events stored in the upcall list.
+ * Current users of this API is NFS-Ganesha. In case of any event received, it
+ * will be mapped appropriately into 'glfs_upcall' along with the handle object
+ * to be passed to NFS-Ganesha.
+ *
+ * On success, applications need to check if up_arg is not-NULL or errno is not
+ * ENOENT. glfs_upcall_get_reason() can be used to decide what kind of event
+ * has been received.
+ *
+ * Current supported upcall_events:
+ * GLFS_UPCALL_INODE_INVALIDATE
+ *
+ * After processing the event, applications need to free 'up_arg' by calling
+ * glfs_free().
+ *
+ * Also similar to I/Os, the application should ideally stop polling before
+ * calling glfs_fini(..). Hence making an assumption that 'fs' & ctx structures
+ * cannot be freed while in this routine.
+ */
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_poll_upcall, 3.7.16)
+int
+pub_glfs_h_poll_upcall(struct glfs *fs, struct glfs_upcall **up_arg)
+{
+ upcall_entry *u_list = NULL;
+ upcall_entry *tmp = NULL;
+ xlator_t *subvol = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ int ret = -1;
+ struct gf_upcall *upcall_data = NULL;
+
+ DECLARE_OLD_THIS;
+
+ if (!up_arg) {
+ errno = EINVAL;
+ goto err;
+ }
+
+ __GLFS_ENTRY_VALIDATE_FS(fs, err);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ errno = EIO;
+ goto restore;
+ }
+
+ /* Ideally applications should stop polling before calling
+ * 'glfs_fini'. Yet cross check if cleanup has started. */
+ pthread_mutex_lock(&fs->mutex);
+ {
+ ctx = fs->ctx;
+
+ if (ctx->cleanup_started) {
+ pthread_mutex_unlock(&fs->mutex);
+ goto out;
+ }
+
+ fs->pin_refcnt++;
+
+ /* once we call this function, the applications seems to be
+ * interested in events, enable caching them */
+ fs->cache_upcalls = _gf_true;
+ }
+ pthread_mutex_unlock(&fs->mutex);
+
+ pthread_mutex_lock(&fs->upcall_list_mutex);
+ {
+ list_for_each_entry_safe(u_list, tmp, &fs->upcall_list, upcall_list)
+ {
+ list_del_init(&u_list->upcall_list);
+ upcall_data = &u_list->upcall_data;
+ break;
+ }
+ }
+ /* No other thread can delete this entry. So unlock it */
+ pthread_mutex_unlock(&fs->upcall_list_mutex);
+
+ if (upcall_data) {
+ switch (upcall_data->event_type) {
+ case GF_UPCALL_CACHE_INVALIDATION:
+ *up_arg = GLFS_CALLOC(1, sizeof(struct gf_upcall),
+ glfs_release_upcall,
+ glfs_mt_upcall_entry_t);
+ if (!*up_arg) {
+ errno = ENOMEM;
+ break; /* goto free u_list */
+ }
+
+ /* XXX: Need to revisit this to support
+ * GLFS_UPCALL_INODE_UPDATE if required. */
+ ret = glfs_h_poll_cache_invalidation(fs, *up_arg, upcall_data);
+ if (ret || (*up_arg)->reason == GLFS_UPCALL_EVENT_NULL) {
+ /* It could so happen that the file which got
+ * upcall notification may have got deleted by
+ * the same client. Irrespective of the error,
+ * return with an error or success+ENOENT. */
+ if ((*up_arg)->reason == GLFS_UPCALL_EVENT_NULL)
+ errno = ENOENT;
+
+ GLFS_FREE(*up_arg);
+ *up_arg = NULL;
+ }
+ break;
+ case GF_UPCALL_RECALL_LEASE:
+ gf_log("glfs_h_poll_upcall", GF_LOG_DEBUG,
+ "UPCALL_RECALL_LEASE is not implemented yet");
+ /* fallthrough till we support leases */
+ case GF_UPCALL_EVENT_NULL:
+ /* no 'default:' label, to force handling all upcall events */
+ errno = ENOENT;
+ break;
+ }
+
+ GF_FREE(u_list->upcall_data.data);
+ GF_FREE(u_list);
+ } else {
+ /* fs->upcall_list was empty, no upcall events cached */
+ errno = ENOENT;
+ }
+
+ ret = 0;
+
+out:
+ pthread_mutex_lock(&fs->mutex);
+ {
+ fs->pin_refcnt--;
+ }
+ pthread_mutex_unlock(&fs->mutex);
+
+ glfs_subvol_done(fs, subvol);
+
+restore:
+ __GLFS_EXIT_FS;
+err:
+ return ret;
+}
+
+static gf_boolean_t log_upcall370 = _gf_true; /* log once */
+
+/* The old glfs_h_poll_upcall interface requires intimate knowledge of the
+ * structures that are returned to the calling application. This is not
+ * recommended, as the returned structures need to returned correctly (handles
+ * closed, memory free'd with the unavailable GF_FREE(), and possibly more.)
+ *
+ * To the best of our knowledge, only NFS-Ganesha uses the upcall events
+ * through gfapi. We keep this backwards compatibility function around so that
+ * applications using the existing implementation do not break.
+ *
+ * WARNING: this function will be removed in the future.
+ */
+GFAPI_SYMVER_PUBLIC(glfs_h_poll_upcall370, glfs_h_poll_upcall, 3.7.0)
+int
+pub_glfs_h_poll_upcall370(struct glfs *fs, struct glfs_callback_arg *up_arg)
+{
+ struct glfs_upcall *upcall = NULL;
+ int ret = -1;
+
+ if (log_upcall370) {
+ log_upcall370 = _gf_false;
+ gf_log(THIS->name, GF_LOG_WARNING,
+ "this application is "
+ "compiled against an old version of libgfapi, it "
+ "should use glfs_free() to release the structure "
+ "returned by glfs_h_poll_upcall() - for more details, "
+ "see http://review.gluster.org/14701");
+ }
+
+ ret = pub_glfs_h_poll_upcall(fs, &upcall);
+ if (ret == 0) {
+ up_arg->fs = fs;
+ if ((errno == ENOENT) || !upcall || !upcall->event) {
+ up_arg->reason = GLFS_UPCALL_EVENT_NULL;
+ goto out;
+ }
+
+ up_arg->reason = upcall->reason;
+
+ if (upcall->reason == GLFS_UPCALL_INODE_INVALIDATE) {
+ struct glfs_callback_inode_arg *cb_inode = NULL;
+ struct glfs_upcall_inode *up_inode = NULL;
+
+ cb_inode = GF_CALLOC(1, sizeof(struct glfs_callback_inode_arg),
+ glfs_mt_upcall_inode_t);
+ if (!cb_inode) {
+ errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ up_inode = upcall->event;
+
+ /* copy attributes one by one, the memory layout might
+ * be different between the old glfs_callback_inode_arg
+ * and new glfs_upcall_inode */
+ cb_inode->object = up_inode->object;
+ cb_inode->flags = up_inode->flags;
+ memcpy(&cb_inode->buf, &up_inode->buf, sizeof(struct stat));
+ cb_inode->expire_time_attr = up_inode->expire_time_attr;
+ cb_inode->p_object = up_inode->p_object;
+ memcpy(&cb_inode->p_buf, &up_inode->p_buf, sizeof(struct stat));
+ cb_inode->oldp_object = up_inode->oldp_object;
+ memcpy(&cb_inode->oldp_buf, &up_inode->oldp_buf,
+ sizeof(struct stat));
+
+ up_arg->event_arg = cb_inode;
+ }
+ }
+
+out:
+ if (upcall) {
+ /* we can not use glfs_free() here, objects need to stay */
+ GF_FREE(upcall->event);
+ GF_FREE(upcall);
+ }
+
+ return ret;
+}
+
+#ifdef HAVE_ACL_LIBACL_H
+#include <glusterfs/glusterfs-acl.h>
+#include <acl/libacl.h>
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_acl_set, 3.7.0)
+int
+pub_glfs_h_acl_set(struct glfs *fs, struct glfs_object *object,
+ const acl_type_t type, const acl_t acl)
+{
+ int ret = -1;
+ char *acl_s = NULL;
+ const char *acl_key = NULL;
+ struct glfs_object *new_object = NULL;
+
+ DECLARE_OLD_THIS;
+
+ if (!object || !acl) {
+ errno = EINVAL;
+ return ret;
+ }
+
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ acl_key = gf_posix_acl_get_key(type);
+ if (!acl_key)
+ goto out;
+
+ acl_s = acl_to_any_text(acl, NULL, ',', TEXT_ABBREVIATE | TEXT_NUMERIC_IDS);
+ if (!acl_s)
+ goto out;
+
+ if (IA_ISLNK(object->inode->ia_type)) {
+ new_object = glfs_h_resolve_symlink(fs, object);
+ if (new_object == NULL)
+ goto out;
+ } else
+ new_object = object;
+
+ ret = pub_glfs_h_setxattrs(fs, new_object, acl_key, acl_s,
+ strlen(acl_s) + 1, 0);
+
+ acl_free(acl_s);
+
+out:
+ if (IA_ISLNK(object->inode->ia_type) && new_object)
+ glfs_h_close(new_object);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_acl_get, 3.7.0)
+acl_t
+pub_glfs_h_acl_get(struct glfs *fs, struct glfs_object *object,
+ const acl_type_t type)
+{
+ int ret = 0;
+ acl_t acl = NULL;
+ char *acl_s = NULL;
+ dict_t *xattr = NULL;
+ const char *acl_key = NULL;
+ struct glfs_object *new_object = NULL;
+
+ DECLARE_OLD_THIS;
+
+ if (!object) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ acl_key = gf_posix_acl_get_key(type);
+ if (!acl_key)
+ goto out;
+
+ if (IA_ISLNK(object->inode->ia_type)) {
+ new_object = glfs_h_resolve_symlink(fs, object);
+ if (new_object == NULL)
+ goto out;
+ } else
+ new_object = object;
+
+ ret = glfs_h_getxattrs_common(fs, new_object, &xattr, acl_key, _gf_false);
+ if (ret)
+ goto out;
+
+ ret = dict_get_str(xattr, (char *)acl_key, &acl_s);
+ if (ret)
+ goto out;
+
+ acl = acl_from_text(acl_s);
+
+out:
+ if (xattr)
+ dict_unref(xattr);
+
+ if (IA_ISLNK(object->inode->ia_type) && new_object)
+ glfs_h_close(new_object);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return acl;
+}
+#else /* !HAVE_ACL_LIBACL_H */
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_acl_get, 3.7.0)
+acl_t
+pub_glfs_h_acl_get(struct glfs *fs, struct glfs_object *object,
+ const acl_type_t type)
+{
+ errno = ENOTSUP;
+ return NULL;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_acl_set, 3.7.0)
+int
+pub_glfs_h_acl_set(struct glfs *fs, struct glfs_object *object,
+ const acl_type_t type, const acl_t acl)
+{
+ errno = ENOTSUP;
+ return -1;
+}
+#endif
+
+/* The API to perform read using anonymous fd */
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_anonymous_read, 3.7.0)
+ssize_t
+pub_glfs_h_anonymous_read(struct glfs *fs, struct glfs_object *object,
+ const void *buf, size_t count, off_t offset)
+{
+ struct iovec iov = {
+ 0,
+ };
+ ssize_t ret = 0;
+
+ /* validate in args */
+ if ((fs == NULL) || (object == NULL)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ iov.iov_base = (void *)buf;
+ iov.iov_len = count;
+
+ ret = glfs_anonymous_preadv(fs, object, &iov, 1, offset, 0);
+
+ return ret;
+}
+
+/* The API to perform write using anonymous fd */
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_anonymous_write, 3.7.0)
+ssize_t
+pub_glfs_h_anonymous_write(struct glfs *fs, struct glfs_object *object,
+ const void *buf, size_t count, off_t offset)
+{
+ struct iovec iov = {
+ 0,
+ };
+ ssize_t ret = 0;
+
+ /* validate in args */
+ if ((fs == NULL) || (object == NULL)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ iov.iov_base = (void *)buf;
+ iov.iov_len = count;
+
+ ret = glfs_anonymous_pwritev(fs, object, &iov, 1, offset, 0);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_object_copy, 3.11.0)
+struct glfs_object *
+pub_glfs_object_copy(struct glfs_object *src)
+{
+ struct glfs_object *object = NULL;
+
+ GF_VALIDATE_OR_GOTO("glfs_dup_object", src, out);
+
+ object = GF_CALLOC(1, sizeof(struct glfs_object), glfs_mt_glfs_object_t);
+ if (object == NULL) {
+ errno = ENOMEM;
+ gf_smsg(THIS->name, GF_LOG_WARNING, errno, API_MSG_CREATE_HANDLE_FAILED,
+ "glfs_dup_object gfid=%s", uuid_utoa(src->inode->gfid), NULL);
+ return NULL;
+ }
+
+ object->inode = inode_ref(src->inode);
+ gf_uuid_copy(object->gfid, src->inode->gfid);
+
+out:
+ return object;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_xreaddirplus_get_object, 3.11.0)
+struct glfs_object *
+pub_glfs_xreaddirplus_get_object(struct glfs_xreaddirp_stat *xstat)
+{
+ GF_VALIDATE_OR_GOTO("glfs_xreaddirplus_get_object", xstat, out);
+
+ if (!(xstat->flags_handled & GFAPI_XREADDIRP_HANDLE))
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, API_MSG_HANDLE_NOT_SET,
+ "GFAPI_XREADDIRP_HANDLE xstat=%p", xstat, "handle=%x",
+ xstat->flags_handled, NULL);
+
+ return xstat->object;
+
+out:
+ return NULL;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_lease, 4.0.0)
+int
+pub_glfs_h_lease(struct glfs *fs, struct glfs_object *object,
+ struct glfs_lease *lease)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct gf_lease gf_lease = {
+ 0,
+ };
+
+ /* validate in args */
+ if ((fs == NULL) || (object == NULL)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode(fs, subvol, object);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ /* populate loc */
+ GLFS_LOC_FILL_INODE(inode, loc, out);
+
+ glfs_lease_to_gf_lease(lease, &gf_lease);
+
+ ret = syncop_lease(subvol, &loc, &gf_lease, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ gf_lease_to_glfs_lease(&gf_lease, lease);
+
+out:
+ loc_wipe(&loc);
+
+ if (inode)
+ inode_unref(inode);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
diff --git a/api/src/glfs-handles.h b/api/src/glfs-handles.h
new file mode 100644
index 00000000000..4d039b9c76b
--- /dev/null
+++ b/api/src/glfs-handles.h
@@ -0,0 +1,355 @@
+/*
+ Copyright (c) 2013-2018 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLFS_HANDLES_H
+#define _GLFS_HANDLES_H
+
+#include "glfs.h"
+
+/* GLFS OBJECT BASED OPERATIONS
+ *
+ * The following APIs are introduced to provide an API framework that can work
+ * with gluster objects (files and directories), instead of absolute paths.
+ *
+ * The following API set can be related to the POSIX *at interfaces (like
+ * openat (2)). The intention of these APIs is to be able to operate based
+ * on parent object and looking up or creating child objects within, OR to be
+ * used on the actual object thus looked up or created, and retrieve information
+ * regarding the same.
+ *
+ * The APIs also provide for generating an opaque invariant handle to the
+ * object, that can later be used to lookup the object, instead of the regular
+ * glfs_h_* variants. The APIs that provide this behaviour are,
+ * glfs_h_extract_handle and glfs_h_create_from_handle.
+ *
+ * The object handles can be transitioned to fd based operations as supported
+ * by glfs.h calls, using the glfs_h_open call. This provides a way to move
+ * from objects to fd's akin to moving from path to fd for required operations.
+ *
+ * NOTE: The opaque invariant handle is the GFID of the object in reality, but
+ * maintained as an opaque data value, for potential internal changes to the
+ * same without impacting the caller.
+ *
+ * NOTE: Currently looking up an object can create multiple object handles to
+ * the same, i.e distinct glfs_object *. Hence each such looked up or received
+ * handle from other calls, would need to be closed. In the future, for a given
+ * object these pointers would be the same, and an ease of use API to forget all
+ * instances of this bject would be provided (instead of a per lookup close).
+ * This should not change the APIs in their current form.
+ *
+ */
+
+/* Handle length for object handles returned from glfs_h_extract_handle or
+ * glfs_h_create_from_handle */
+#define GFAPI_HANDLE_LENGTH 16
+
+/* These flags should be in sync to the ones defined in upcall.h */
+#define GFAPI_UP_NLINK 0x00000001 /* update nlink */
+#define GFAPI_UP_MODE 0x00000002 /* update mode and ctime */
+#define GFAPI_UP_OWN 0x00000004 /* update mode,uid,gid and ctime */
+#define GFAPI_UP_SIZE 0x00000008 /* update fsize */
+#define GFAPI_UP_TIMES 0x00000010 /* update all times */
+#define GFAPI_UP_ATIME 0x00000020 /* update atime only */
+#define GFAPI_UP_PERM \
+ 0x00000040 /* update fields needed for \
+ permission checking */
+#define GFAPI_UP_RENAME \
+ 0x00000080 /* this is a rename op - \
+ delete the cache entry */
+#define GFAPI_UP_FORGET \
+ 0x00000100 /* inode_forget on server side - \
+ invalidate the cache entry */
+#define GFAPI_UP_PARENT_TIMES 0x00000200 /* update parent dir times */
+
+#define GFAPI_INODE_UPDATE_FLAGS \
+ (GFAPI_UP_NLINK | GFAPI_UP_MODE | GFAPI_UP_OWN | GFAPI_UP_SIZE | \
+ GFAPI_UP_TIMES | GFAPI_UP_ATIME)
+
+/* Portability non glibc c++ build systems */
+#ifndef __THROW
+#if defined __cplusplus
+#define __THROW throw()
+#else
+#define __THROW
+#endif
+#endif
+
+__BEGIN_DECLS
+
+/*
+ * Notes:
+ *
+ * The file object handle. One per looked up, created file/directory
+ *
+ * This had been introduced to facilitate gfid/inode based gfapi
+ * - a requirement introduced by nfs-ganesha
+ */
+struct glfs_object;
+typedef struct glfs_object glfs_object_t;
+
+/* Functions for getting details about the glfs_upcall_inode
+ *
+ * None of the pointers returned by the below functions should be free()'d,
+ * glfs_free()'d or glfs_h_close()'d by the application.
+ *
+ * Releasing of the structures is done by passing the glfs_upcall pointer
+ * to glfs_free().
+ */
+struct glfs_upcall_inode;
+typedef struct glfs_upcall_inode glfs_upcall_inode_t;
+
+glfs_object_t *
+glfs_upcall_inode_get_object(glfs_upcall_inode_t *arg) __THROW
+ GFAPI_PUBLIC(glfs_upcall_inode_get_object, 3.7.16);
+
+uint64_t
+glfs_upcall_inode_get_flags(glfs_upcall_inode_t *arg) __THROW
+ GFAPI_PUBLIC(glfs_upcall_inode_get_flags, 3.7.16);
+
+struct stat *
+glfs_upcall_inode_get_stat(glfs_upcall_inode_t *arg) __THROW
+ GFAPI_PUBLIC(glfs_upcall_inode_get_stat, 3.7.16);
+
+uint64_t
+glfs_upcall_inode_get_expire(glfs_upcall_inode_t *arg) __THROW
+ GFAPI_PUBLIC(glfs_upcall_inode_get_expire, 3.7.16);
+
+glfs_object_t *
+glfs_upcall_inode_get_pobject(glfs_upcall_inode_t *arg) __THROW
+ GFAPI_PUBLIC(glfs_upcall_inode_get_pobject, 3.7.16);
+
+struct stat *
+glfs_upcall_inode_get_pstat(glfs_upcall_inode_t *arg) __THROW
+ GFAPI_PUBLIC(glfs_upcall_inode_get_pstat, 3.7.16);
+
+glfs_object_t *
+glfs_upcall_inode_get_oldpobject(glfs_upcall_inode_t *arg) __THROW
+ GFAPI_PUBLIC(glfs_upcall_inode_get_oldpobject, 3.7.16);
+
+struct stat *
+glfs_upcall_inode_get_oldpstat(glfs_upcall_inode_t *arg) __THROW
+ GFAPI_PUBLIC(glfs_upcall_inode_get_oldpstat, 3.7.16);
+
+/* Handle based operations */
+/* Operations that generate handles */
+glfs_object_t *
+glfs_h_lookupat(glfs_t *fs, glfs_object_t *parent, const char *path,
+ struct stat *stat, int follow) __THROW
+ GFAPI_PUBLIC(glfs_h_lookupat, 3.7.4);
+
+glfs_object_t *
+glfs_h_creat(glfs_t *fs, glfs_object_t *parent, const char *path, int flags,
+ mode_t mode, struct stat *sb) __THROW
+ GFAPI_PUBLIC(glfs_h_creat, 3.4.2);
+
+glfs_object_t *
+glfs_h_mkdir(glfs_t *fs, glfs_object_t *parent, const char *path, mode_t flags,
+ struct stat *sb) __THROW GFAPI_PUBLIC(glfs_h_mkdir, 3.4.2);
+
+glfs_object_t *
+glfs_h_mknod(glfs_t *fs, glfs_object_t *parent, const char *path, mode_t mode,
+ dev_t dev, struct stat *sb) __THROW
+ GFAPI_PUBLIC(glfs_h_mknod, 3.4.2);
+
+glfs_object_t *
+glfs_h_symlink(glfs_t *fs, glfs_object_t *parent, const char *name,
+ const char *data, struct stat *stat) __THROW
+ GFAPI_PUBLIC(glfs_h_symlink, 3.4.2);
+
+/* Operations on the actual objects */
+int
+glfs_h_unlink(glfs_t *fs, glfs_object_t *parent, const char *path) __THROW
+ GFAPI_PUBLIC(glfs_h_unlink, 3.4.2);
+
+int
+glfs_h_close(glfs_object_t *object) __THROW GFAPI_PUBLIC(glfs_h_close, 3.4.2);
+
+int
+glfs_caller_specific_init(void *uid_caller_key, void *gid_caller_key,
+ void *future) __THROW
+ GFAPI_PUBLIC(glfs_caller_specific_init, 3.5.0);
+
+int
+glfs_h_truncate(glfs_t *fs, glfs_object_t *object, off_t offset) __THROW
+ GFAPI_PUBLIC(glfs_h_truncate, 3.4.2);
+
+int
+glfs_h_stat(glfs_t *fs, glfs_object_t *object, struct stat *stat) __THROW
+ GFAPI_PUBLIC(glfs_h_stat, 3.4.2);
+
+int
+glfs_h_statfs(glfs_t *fs, glfs_object_t *object, struct statvfs *stat) __THROW
+ GFAPI_PUBLIC(glfs_h_statfs, 3.7.0);
+
+int
+glfs_h_getattrs(glfs_t *fs, glfs_object_t *object, struct stat *stat) __THROW
+ GFAPI_PUBLIC(glfs_h_getattrs, 3.4.2);
+
+int
+glfs_h_getxattrs(glfs_t *fs, glfs_object_t *object, const char *name,
+ void *value, size_t size) __THROW
+ GFAPI_PUBLIC(glfs_h_getxattrs, 3.5.1);
+
+int
+glfs_h_setattrs(glfs_t *fs, glfs_object_t *object, struct stat *sb,
+ int valid) __THROW GFAPI_PUBLIC(glfs_h_setattrs, 3.4.2);
+
+int
+glfs_h_setxattrs(glfs_t *fs, glfs_object_t *object, const char *name,
+ const void *value, size_t size, int flags) __THROW
+ GFAPI_PUBLIC(glfs_h_setxattrs, 3.5.0);
+
+int
+glfs_h_readlink(glfs_t *fs, glfs_object_t *object, char *buf,
+ size_t bufsiz) __THROW GFAPI_PUBLIC(glfs_h_readlink, 3.4.2);
+
+int
+glfs_h_link(glfs_t *fs, glfs_object_t *linktgt, glfs_object_t *parent,
+ const char *name) __THROW GFAPI_PUBLIC(glfs_h_link, 3.4.2);
+
+int
+glfs_h_rename(glfs_t *fs, glfs_object_t *olddir, const char *oldname,
+ glfs_object_t *newdir, const char *newname) __THROW
+ GFAPI_PUBLIC(glfs_h_rename, 3.4.2);
+
+int
+glfs_h_removexattrs(glfs_t *fs, glfs_object_t *object, const char *name) __THROW
+ GFAPI_PUBLIC(glfs_h_removexattrs, 3.5.1);
+
+/* Operations enabling opaque invariant handle to object transitions */
+ssize_t
+glfs_h_extract_handle(glfs_object_t *object, unsigned char *handle,
+ int len) __THROW
+ GFAPI_PUBLIC(glfs_h_extract_handle, 3.4.2);
+
+/* Given a handle, looks up the inode and creates glfs_object.
+ * In addition, if provided 'stat', copies the inode attributes
+ */
+glfs_object_t *
+glfs_h_create_from_handle(glfs_t *fs, unsigned char *handle, int len,
+ struct stat *stat) __THROW
+ GFAPI_PUBLIC(glfs_h_create_from_handle, 3.4.2);
+
+/* Operations enabling object handles to fd transitions */
+glfs_fd_t *
+glfs_h_opendir(glfs_t *fs, glfs_object_t *object) __THROW
+ GFAPI_PUBLIC(glfs_h_opendir, 3.4.2);
+
+glfs_fd_t *
+glfs_h_open(glfs_t *fs, glfs_object_t *object, int flags) __THROW
+ GFAPI_PUBLIC(glfs_h_open, 3.4.2);
+
+int
+glfs_h_access(glfs_t *fs, glfs_object_t *object, int mask) __THROW
+ GFAPI_PUBLIC(glfs_h_access, 3.6.0);
+
+struct glfs_object *
+glfs_h_creat_open(struct glfs *fs, struct glfs_object *parent, const char *path,
+ int flags, mode_t mode, struct stat *stat,
+ struct glfs_fd **out_fd) __THROW
+ GFAPI_PUBLIC(glfs_h_creat_open, 6.6);
+/*
+ SYNOPSIS
+
+ glfs_h_poll_upcall: Poll for upcall events given a 'glfs' object.
+
+ DESCRIPTION
+
+ This API is used to poll for upcall events stored in the
+ upcall list. Current users of this API is NFS-Ganesha.
+ In case of any event received, it will be mapped appropriately
+ into 'glfs_upcall' along with the handle('glfs_object') to be
+ passed to NFS-Ganesha.
+
+ In case of success, applications need to check the value of
+ cbk->handle to be NON NULL before processing the upcall
+ events.
+
+ PARAMETERS
+
+ @fs: glfs object to poll the upcall events for
+ @cbk: Pointer that will contain an upcall event for use by the application.
+ Application is responsible for free'ing the structure with glfs_free().
+
+ RETURN VALUES
+
+ 0 : Success.
+ -1 : Error condition, mostly due to out of memory.
+
+*/
+
+int
+glfs_h_poll_upcall(glfs_t *fs, glfs_upcall_t **cbk) __THROW
+ GFAPI_PUBLIC(glfs_h_poll_upcall, 3.7.16);
+
+int
+glfs_h_acl_set(glfs_t *fs, glfs_object_t *object, const acl_type_t type,
+ const acl_t acl) __THROW GFAPI_PUBLIC(glfs_h_acl_set, 3.7.0);
+
+acl_t
+glfs_h_acl_get(glfs_t *fs, glfs_object_t *object, const acl_type_t type) __THROW
+ GFAPI_PUBLIC(glfs_h_acl_get, 3.7.0);
+
+size_t
+glfs_h_anonymous_write(glfs_t *fs, glfs_object_t *object, const void *buf,
+ size_t count, off_t offset) __THROW
+ GFAPI_PUBLIC(glfs_h_anonymous_write, 3.7.0);
+
+ssize_t
+glfs_h_anonymous_read(glfs_t *fs, glfs_object_t *object, const void *buf,
+ size_t count, off_t offset) __THROW
+ GFAPI_PUBLIC(glfs_h_anonymous_read, 3.7.0);
+
+/*
+ * Caution: The object returned by this object gets freed as part
+ * of 'glfs_free(xstat)'. Make sure to have a copy using 'glfs_object_copy()'
+ * to use post that.
+ */
+glfs_object_t *
+glfs_xreaddirplus_get_object(struct glfs_xreaddirp_stat *xstat) __THROW
+ GFAPI_PUBLIC(glfs_xreaddirplus_get_object, 3.11.0);
+
+/* Applications should close the object returned by this routine
+ * explicitly using 'glfs_h_close()'
+ */
+glfs_object_t *
+glfs_object_copy(glfs_object_t *src) __THROW
+ GFAPI_PUBLIC(glfs_object_copy, 3.11.0);
+
+int
+glfs_h_lease(glfs_t *fs, glfs_object_t *object, glfs_lease_t *lease) __THROW
+ GFAPI_PUBLIC(glfs_h_lease, 4.0.0);
+
+glfs_object_t *
+glfs_h_find_handle(glfs_t *fs, unsigned char *handle, int len) __THROW
+ GFAPI_PUBLIC(glfs_h_lease, 4.0.0);
+
+/* Functions for getting details about the glfs_upcall_lease
+ *
+ * None of the pointers returned by the below functions should be free()'d,
+ * glfs_free()'d or glfs_h_close()'d by the application.
+ *
+ * Releasing of the structures is done by passing the glfs_upcall pointer
+ * to glfs_free().
+ */
+struct glfs_upcall_lease;
+typedef struct glfs_upcall_lease glfs_upcall_lease_t;
+
+glfs_object_t *
+glfs_upcall_lease_get_object(glfs_upcall_lease_t *arg) __THROW
+ GFAPI_PUBLIC(glfs_upcall_lease_get_object, 4.1.6);
+
+uint32_t
+glfs_upcall_lease_get_lease_type(glfs_upcall_lease_t *arg) __THROW
+ GFAPI_PUBLIC(glfs_upcall_lease_get_lease_type, 4.1.6);
+
+__END_DECLS
+
+#endif /* !_GLFS_HANDLES_H */
diff --git a/api/src/glfs-internal.h b/api/src/glfs-internal.h
new file mode 100644
index 00000000000..7cc3b18a104
--- /dev/null
+++ b/api/src/glfs-internal.h
@@ -0,0 +1,756 @@
+/*
+ Copyright (c) 2012-2018 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLFS_INTERNAL_H
+#define _GLFS_INTERNAL_H
+
+#include <glusterfs/xlator.h>
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/upcall-utils.h>
+#include "glfs-handles.h"
+#include <glusterfs/refcount.h>
+#include <glusterfs/syncop.h>
+
+#define GLFS_SYMLINK_MAX_FOLLOW 2048
+
+#define DEFAULT_REVAL_COUNT 1
+
+/*
+ * According to pthread mutex and conditional variable ( cond,
+ * child_down_count, upcall mutex and mutex) initialization of struct glfs
+ * members, below GLFS_INIT_* flags are set in 'pthread_flags' member of struct
+ * glfs. The flags are set from glfs_init() and glfs_new_from_ctx() functions
+ * as part of fs inititialization.
+ *
+ * These flag bits are validated in glfs_fini() to destroy all or partially
+ * initialized mutex and conditional variables of glfs object.
+ * If you introduce new pthread mutex or conditional variable in glfs object,
+ * please make sure you have a flag bit intorduced here for proper cleanup
+ * in glfs_fini().
+ *
+ */
+
+#define PTHREAD_MUTEX_INIT(mutex, attr, flags, mask, label) \
+ do { \
+ int __ret = -1; \
+ __ret = pthread_mutex_init(mutex, attr); \
+ if (__ret == 0) \
+ flags |= mask; \
+ else \
+ goto label; \
+ } while (0)
+
+#define PTHREAD_MUTEX_DESTROY(mutex, flags, mask) \
+ do { \
+ if (flags & mask) \
+ (void)pthread_mutex_destroy(mutex); \
+ } while (0)
+
+#define PTHREAD_COND_INIT(cond, attr, flags, mask, label) \
+ do { \
+ int __ret = -1; \
+ __ret = pthread_cond_init(cond, attr); \
+ if (__ret == 0) \
+ flags |= mask; \
+ else \
+ goto label; \
+ } while (0)
+
+#define PTHREAD_COND_DESTROY(cond, flags, mask) \
+ do { \
+ if (flags & mask) \
+ (void)pthread_cond_destroy(cond); \
+ } while (0)
+
+#define GLFS_INIT_MUTEX 0x00000001 /* pthread_mutex_flag */
+#define GLFS_INIT_COND 0x00000002 /* pthread_cond_flag */
+#define GLFS_INIT_COND_CHILD 0x00000004 /* pthread_cond_child_down_flag */
+#define GLFS_INIT_MUTEX_UPCALL 0x00000008 /* pthread_mutex_upcall_flag */
+
+#ifndef GF_DARWIN_HOST_OS
+#ifndef GFAPI_PUBLIC
+#define GFAPI_PUBLIC(sym, ver) /**/
+#endif
+#ifndef GFAPI_PRIVATE
+#define GFAPI_PRIVATE(sym, ver) /**/
+#endif
+#if __GNUC__ >= 10
+#define GFAPI_SYMVER_PUBLIC_DEFAULT(fn, ver) \
+ __attribute__((__symver__(STR(fn) "@@GFAPI_" STR(ver))))
+
+#define GFAPI_SYMVER_PRIVATE_DEFAULT(fn, ver) \
+ __attribute__((__symver__(STR(fn) "@@GFAPI_PRIVATE_" STR(ver))))
+
+#define GFAPI_SYMVER_PUBLIC(fn1, fn2, ver) \
+ __attribute__((__symver__(STR(fn2) "@GFAPI_" STR(ver))))
+
+#define GFAPI_SYMVER_PRIVATE(fn1, fn2, ver) \
+ __attribute__((__symver__(STR(fn2) "@GFAPI_PRIVATE_" STR(ver))))
+
+#else
+#define GFAPI_SYMVER_PUBLIC_DEFAULT(fn, ver) \
+ asm(".symver pub_" STR(fn) ", " STR(fn) "@@GFAPI_" STR(ver));
+
+#define GFAPI_SYMVER_PRIVATE_DEFAULT(fn, ver) \
+ asm(".symver priv_" STR(fn) ", " STR(fn) "@@GFAPI_PRIVATE_" STR(ver));
+
+#define GFAPI_SYMVER_PUBLIC(fn1, fn2, ver) \
+ asm(".symver pub_" STR(fn1) ", " STR(fn2) "@GFAPI_" STR(ver));
+
+#define GFAPI_SYMVER_PRIVATE(fn1, fn2, ver) \
+ asm(".symver priv_" STR(fn1) ", " STR(fn2) "@GFAPI_PRIVATE_" STR(ver));
+#endif
+#define STR(str) #str
+#else
+#ifndef GFAPI_PUBLIC
+#define GFAPI_PUBLIC(sym, ver) __asm("_" __STRING(sym) "$GFAPI_" __STRING(ver));
+#endif
+#ifndef GFAPI_PRIVATE
+#define GFAPI_PRIVATE(sym, ver) \
+ __asm("_" __STRING(sym) "$GFAPI_PRIVATE_" __STRING(ver));
+#endif
+#define GFAPI_SYMVER_PUBLIC_DEFAULT(fn, dotver) /**/
+#define GFAPI_SYMVER_PRIVATE_DEFAULT(fn, dotver) /**/
+#define GFAPI_SYMVER_PUBLIC(fn1, fn2, dotver) /**/
+#define GFAPI_SYMVER_PRIVATE(fn1, fn2, dotver) /**/
+#endif
+
+#define ESTALE_RETRY(ret, errno, reval, loc, label) \
+ do { \
+ if (ret == -1 && errno == ESTALE) { \
+ if (reval < DEFAULT_REVAL_COUNT) { \
+ reval++; \
+ loc_wipe(loc); \
+ goto label; \
+ } \
+ } \
+ } while (0)
+
+#define GLFS_LOC_FILL_INODE(oinode, loc, label) \
+ do { \
+ loc.inode = inode_ref(oinode); \
+ gf_uuid_copy(loc.gfid, oinode->gfid); \
+ ret = glfs_loc_touchup(&loc); \
+ if (ret != 0) { \
+ errno = EINVAL; \
+ goto label; \
+ } \
+ } while (0)
+
+#define GLFS_LOC_FILL_PINODE(pinode, loc, ret, errno, label, path) \
+ do { \
+ loc.inode = inode_new(pinode->table); \
+ if (!loc.inode) { \
+ ret = -1; \
+ errno = ENOMEM; \
+ goto label; \
+ } \
+ loc.parent = inode_ref(pinode); \
+ loc.name = path; \
+ ret = glfs_loc_touchup(&loc); \
+ if (ret != 0) { \
+ errno = EINVAL; \
+ goto label; \
+ } \
+ } while (0)
+
+struct glfs;
+
+struct _upcall_entry {
+ struct list_head upcall_list;
+ struct gf_upcall upcall_data;
+};
+typedef struct _upcall_entry upcall_entry;
+
+typedef int (*glfs_init_cbk)(struct glfs *fs, int ret);
+
+struct glfs {
+ char *volname;
+ uuid_t vol_uuid;
+
+ glusterfs_ctx_t *ctx;
+
+ pthread_t poller;
+
+ glfs_init_cbk init_cbk;
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+ pthread_cond_t child_down_cond; /* for broadcasting CHILD_DOWN */
+ int init;
+ int ret;
+ int err;
+
+ xlator_t *active_subvol; /* active graph */
+ xlator_t *mip_subvol; /* graph for which migration is in
+ * progress */
+ xlator_t *next_subvol; /* Any new graph is put to
+ * next_subvol, the graph in
+ * next_subvol can either be moved
+ * to mip_subvol (if any IO picks it
+ * up for migration), or be
+ * destroyed (if there is a new
+ * graph, and this was never picked
+ * for migration) */
+ xlator_t *old_subvol;
+
+ char *oldvolfile;
+ ssize_t oldvollen;
+
+ inode_t *cwd;
+
+ uint32_t dev_id; /* Used to fill st_dev in struct stat */
+
+ struct list_head openfds;
+
+ gf_boolean_t migration_in_progress;
+
+ gf_boolean_t cache_upcalls; /* add upcalls to the upcall_list? */
+ struct list_head upcall_list;
+ pthread_mutex_t upcall_list_mutex; /* mutex for upcall entry list */
+
+ uint32_t pin_refcnt;
+ uint32_t pthread_flags; /* GLFS_INIT_* # defines set this flag */
+
+ uint32_t upcall_events; /* Mask of upcall events application
+ * is interested in */
+ glfs_upcall_cbk up_cbk; /* upcall cbk function to be registered */
+ void *up_data; /* Opaque data provided by application
+ * during upcall registration */
+ struct list_head waitq; /* waiting synctasks */
+};
+
+/* This enum is used to maintain the state of glfd. In case of async fops
+ * fd might be closed before the actual fop is complete. Therefore we need
+ * to track whether the fd is closed or not, instead actually closing it.*/
+enum glfs_fd_state { GLFD_INIT, GLFD_OPEN, GLFD_CLOSE };
+
+struct glfs_fd {
+ struct list_head openfds;
+ struct list_head list;
+ GF_REF_DECL;
+ struct glfs *fs;
+ enum glfs_fd_state state;
+ off_t offset;
+ fd_t *fd; /* Currently guared by @fs->mutex. TODO: per-glfd lock */
+ struct list_head entries;
+ gf_dirent_t *next;
+ struct dirent *readdirbuf;
+ gf_lkowner_t lk_owner;
+ glfs_leaseid_t lease_id; /* Stores lease_id of client in glfd */
+ gf_lock_t lock; /* lock taken before updating fd state */
+ glfs_recall_cbk cbk;
+ void *cookie;
+};
+
+/* glfs object handle introduced for the alternate gfapi implementation based
+ on glfs handles/gfid/inode
+*/
+struct glfs_object {
+ inode_t *inode;
+ uuid_t gfid;
+};
+
+struct glfs_upcall {
+ struct glfs *fs; /* glfs object */
+ enum glfs_upcall_reason reason; /* Upcall event type */
+ void *event; /* changes based in the event type */
+ void (*free_event)(void *); /* free event after the usage */
+};
+
+struct glfs_upcall_inode {
+ struct glfs_object *object; /* Object which need to be acted upon */
+ int flags; /* Cache UPDATE/INVALIDATE flags */
+ struct stat buf; /* Latest stat of this entry */
+ unsigned int expire_time_attr; /* the amount of time for which
+ * the application need to cache
+ * this entry */
+ struct glfs_object *p_object; /* parent Object to be updated */
+ struct stat p_buf; /* Latest stat of parent dir handle */
+ struct glfs_object *oldp_object; /* Old parent Object to be updated */
+ struct stat oldp_buf; /* Latest stat of old parent dir handle */
+};
+
+struct glfs_upcall_lease {
+ struct glfs_object *object; /* Object which need to be acted upon */
+ uint32_t lease_type; /* Lease type to which client can downgrade to*/
+};
+
+struct glfs_upcall_lease_fd {
+ uint32_t lease_type; /* Lease type to which client can downgrade to*/
+ void *fd_cookie; /* Object which need to be acted upon */
+};
+
+struct glfs_xreaddirp_stat {
+ struct stat
+ st; /* Stat for that dirent - corresponds to GFAPI_XREADDIRP_STAT */
+ struct glfs_object *object; /* handled for GFAPI_XREADDIRP_HANDLE */
+ uint32_t flags_handled; /* final set of flags successfulyy handled */
+};
+
+#define DEFAULT_EVENT_POOL_SIZE 16384
+#define GF_MEMPOOL_COUNT_OF_DICT_T 4096
+#define GF_MEMPOOL_COUNT_OF_DATA_T (GF_MEMPOOL_COUNT_OF_DICT_T * 4)
+#define GF_MEMPOOL_COUNT_OF_DATA_PAIR_T (GF_MEMPOOL_COUNT_OF_DICT_T * 4)
+
+#define GF_MEMPOOL_COUNT_OF_LRU_BUF_T 256
+
+typedef void(glfs_mem_release_t)(void *ptr);
+
+struct glfs_mem_header {
+ uint32_t magic;
+ size_t nmemb;
+ size_t size;
+ glfs_mem_release_t *release;
+};
+
+#define GLFS_MEM_HEADER_SIZE (sizeof(struct glfs_mem_header))
+#define GLFS_MEM_HEADER_MAGIC 0x20170830
+
+static inline void *
+__glfs_calloc(size_t nmemb, size_t size, glfs_mem_release_t release,
+ uint32_t type, const char *typestr)
+{
+ struct glfs_mem_header *header = NULL;
+
+ header = __gf_calloc(nmemb, (size + GLFS_MEM_HEADER_SIZE), type, typestr);
+ if (!header)
+ return NULL;
+
+ header->magic = GLFS_MEM_HEADER_MAGIC;
+ header->nmemb = nmemb;
+ header->size = size;
+ header->release = release;
+
+ return header + 1;
+}
+
+static inline void *
+__glfs_malloc(size_t size, glfs_mem_release_t release, uint32_t type,
+ const char *typestr)
+{
+ struct glfs_mem_header *header = NULL;
+
+ header = __gf_malloc((size + GLFS_MEM_HEADER_SIZE), type, typestr);
+ if (!header)
+ return NULL;
+
+ header->magic = GLFS_MEM_HEADER_MAGIC;
+ header->nmemb = 1;
+ header->size = size;
+ header->release = release;
+
+ return header + 1;
+}
+
+static inline void *
+__glfs_realloc(void *ptr, size_t size)
+{
+ struct glfs_mem_header *old_header = NULL;
+ struct glfs_mem_header *new_header = NULL;
+ struct glfs_mem_header tmp_header;
+ void *new_ptr = NULL;
+
+ GF_ASSERT(NULL != ptr);
+
+ old_header = (struct glfs_mem_header *)(ptr - GLFS_MEM_HEADER_SIZE);
+ GF_ASSERT(old_header->magic == GLFS_MEM_HEADER_MAGIC);
+ tmp_header = *old_header;
+
+ new_ptr = __gf_realloc(old_header, (size + GLFS_MEM_HEADER_SIZE));
+ if (!new_ptr)
+ return NULL;
+
+ new_header = (struct glfs_mem_header *)new_ptr;
+ *new_header = tmp_header;
+ new_header->size = size;
+
+ return new_header + 1;
+}
+
+static inline void
+__glfs_free(void *free_ptr)
+{
+ struct glfs_mem_header *header = NULL;
+ void *release_ptr = NULL;
+ int i = 0;
+
+ if (!free_ptr)
+ return;
+
+ header = (struct glfs_mem_header *)(free_ptr - GLFS_MEM_HEADER_SIZE);
+ GF_ASSERT(header->magic == GLFS_MEM_HEADER_MAGIC);
+
+ if (header->release) {
+ release_ptr = free_ptr;
+ for (i = 0; i < header->nmemb; i++) {
+ header->release(release_ptr);
+ release_ptr += header->size;
+ }
+ }
+
+ __gf_free(header);
+}
+
+#define GLFS_CALLOC(nmemb, size, release, type) \
+ __glfs_calloc(nmemb, size, release, type, #type)
+
+#define GLFS_MALLOC(size, release, type) \
+ __glfs_malloc(size, release, type, #type)
+
+#define GLFS_REALLOC(ptr, size) __glfs_realloc(ptr, size)
+
+#define GLFS_FREE(free_ptr) __glfs_free(free_ptr)
+
+int
+glfs_mgmt_init(struct glfs *fs);
+void
+glfs_init_done(struct glfs *fs, int ret) GFAPI_PRIVATE(glfs_init_done, 3.4.0);
+int
+glfs_process_volfp(struct glfs *fs, FILE *fp);
+int
+glfs_resolve(struct glfs *fs, xlator_t *subvol, const char *path, loc_t *loc,
+ struct iatt *iatt, int reval) GFAPI_PRIVATE(glfs_resolve, 3.7.0);
+int
+glfs_lresolve(struct glfs *fs, xlator_t *subvol, const char *path, loc_t *loc,
+ struct iatt *iatt, int reval);
+fd_t *
+glfs_resolve_fd(struct glfs *fs, xlator_t *subvol, struct glfs_fd *glfd);
+
+fd_t *
+__glfs_migrate_fd(struct glfs *fs, xlator_t *subvol, struct glfs_fd *glfd);
+
+int
+glfs_first_lookup(xlator_t *subvol);
+
+void
+glfs_process_upcall_event(struct glfs *fs, void *data)
+ GFAPI_PRIVATE(glfs_process_upcall_event, 3.7.0);
+
+#define __GLFS_ENTRY_VALIDATE_FS(fs, label) \
+ do { \
+ if (!fs) { \
+ errno = EINVAL; \
+ goto label; \
+ } \
+ old_THIS = THIS; \
+ THIS = fs->ctx->master; \
+ } while (0)
+
+#define __GLFS_EXIT_FS \
+ do { \
+ THIS = old_THIS; \
+ } while (0)
+
+#define __GLFS_ENTRY_VALIDATE_FD(glfd, label) \
+ do { \
+ if (!glfd || !glfd->fd || !glfd->fd->inode || \
+ glfd->state != GLFD_OPEN) { \
+ errno = EBADF; \
+ goto label; \
+ } \
+ old_THIS = THIS; \
+ THIS = glfd->fd->inode->table->xl->ctx->master; \
+ } while (0)
+
+#define __GLFS_LOCK_WAIT(fs) \
+ do { \
+ struct synctask *task = NULL; \
+ \
+ task = synctask_get(); \
+ \
+ if (task) { \
+ list_add_tail(&task->waitq, &fs->waitq); \
+ pthread_mutex_unlock(&fs->mutex); \
+ synctask_yield(task, NULL); \
+ pthread_mutex_lock(&fs->mutex); \
+ } else { \
+ /* non-synctask */ \
+ pthread_cond_wait(&fs->cond, &fs->mutex); \
+ } \
+ } while (0)
+
+#define __GLFS_SYNCTASK_WAKE(fs) \
+ do { \
+ struct synctask *waittask = NULL; \
+ \
+ while (!list_empty(&fs->waitq)) { \
+ waittask = list_entry(fs->waitq.next, struct synctask, waitq); \
+ list_del_init(&waittask->waitq); \
+ synctask_wake(waittask); \
+ } \
+ } while (0)
+
+/*
+ By default all lock attempts from user context must
+ use glfs_lock() and glfs_unlock(). This allows
+ for a safe implementation of graph migration where
+ we can give up the mutex during syncop calls so
+ that bottom up calls (particularly CHILD_UP notify)
+ can do a mutex_lock() on @glfs without deadlocking
+ the filesystem.
+
+ All the fops should wait for graph migration to finish
+ before starting the fops. Therefore these functions should
+ call glfs_lock with wait_for_migration as true. But waiting
+ for migration to finish in call-back path can result thread
+ dead-locks. The reason for this is we only have finite
+ number of epoll threads. so if we wait on epoll threads
+ there will not be any thread left to handle outstanding
+ rpc replies.
+*/
+static inline int
+glfs_lock(struct glfs *fs, gf_boolean_t wait_for_migration)
+{
+ pthread_mutex_lock(&fs->mutex);
+
+ while (!fs->init)
+ __GLFS_LOCK_WAIT(fs);
+
+ while (wait_for_migration && fs->migration_in_progress)
+ __GLFS_LOCK_WAIT(fs);
+
+ return 0;
+}
+
+static inline void
+glfs_unlock(struct glfs *fs)
+{
+ pthread_mutex_unlock(&fs->mutex);
+}
+
+struct glfs_fd *
+glfs_fd_new(struct glfs *fs);
+void
+glfs_fd_bind(struct glfs_fd *glfd);
+void
+glfd_set_state_bind(struct glfs_fd *glfd);
+
+xlator_t *
+glfs_active_subvol(struct glfs *fs) GFAPI_PRIVATE(glfs_active_subvol, 3.4.0);
+xlator_t *
+__glfs_active_subvol(struct glfs *fs);
+void
+glfs_subvol_done(struct glfs *fs, xlator_t *subvol)
+ GFAPI_PRIVATE(glfs_subvol_done, 3.4.0);
+
+inode_t *
+glfs_refresh_inode(xlator_t *subvol, inode_t *inode);
+
+inode_t *
+glfs_cwd_get(struct glfs *fs);
+int
+glfs_cwd_set(struct glfs *fs, inode_t *inode);
+inode_t *
+glfs_resolve_inode(struct glfs *fs, xlator_t *subvol,
+ struct glfs_object *object);
+int
+glfs_create_object(loc_t *loc, struct glfs_object **retobject);
+int
+__glfs_cwd_set(struct glfs *fs, inode_t *inode);
+
+int
+glfs_resolve_base(struct glfs *fs, xlator_t *subvol, inode_t *inode,
+ struct iatt *iatt);
+
+int
+glfs_resolve_at(struct glfs *fs, xlator_t *subvol, inode_t *at,
+ const char *origpath, loc_t *loc, struct iatt *iatt, int follow,
+ int reval) GFAPI_PRIVATE(glfs_resolve_at, 3.4.0);
+int
+glfs_loc_touchup(loc_t *loc) GFAPI_PRIVATE(glfs_loc_touchup, 3.4.0);
+void
+glfs_iatt_to_stat(struct glfs *fs, struct iatt *iatt, struct stat *stat);
+void
+glfs_iatt_from_stat(struct stat *stat, int valid, struct iatt *iatt,
+ int *gvalid);
+int
+glfs_loc_link(loc_t *loc, struct iatt *iatt);
+int
+glfs_loc_unlink(loc_t *loc);
+int
+glfs_getxattr_process(void *value, size_t size, dict_t *xattr,
+ const char *name);
+
+/* Sends RPC call to glusterd to fetch required volume info */
+int
+glfs_get_volume_info(struct glfs *fs);
+
+/*
+ SYNOPSIS
+
+ glfs_new_from_ctx: Creates a virtual mount object by taking a
+ glusterfs_ctx_t object.
+
+ DESCRIPTION
+
+ glfs_new_from_ctx() is not same as glfs_new(). It takes the
+ glusterfs_ctx_t object instead of creating one by glusterfs_ctx_new().
+ Again the usage is restricted to NFS MOUNT over UDP i.e. in
+ glfs_resolve_at() which would take fs object as input but never use
+ (purpose is not to change the ABI of glfs_resolve_at()).
+
+ PARAMETERS
+
+ @ctx: glusterfs_ctx_t object
+
+ RETURN VALUES
+
+ fs : Pointer to the newly created glfs_t object.
+ NULL : Otherwise.
+*/
+
+struct glfs *
+glfs_new_from_ctx(glusterfs_ctx_t *ctx) GFAPI_PRIVATE(glfs_new_from_ctx, 3.7.0);
+
+/*
+ SYNOPSIS
+
+ glfs_free_from_ctx: Free up the memory occupied by glfs_t object
+ created by glfs_new_from_ctx().
+
+ DESCRIPTION
+
+ The glfs_t object allocated by glfs_new_from_ctx() must be released
+ by the caller using this routine. The usage can be found
+ at glfs_fini() or NFS, MOUNT over UDP i.e.
+ __mnt3udp_get_export_subdir_inode ()
+ => glfs_resolve_at().
+
+ PARAMETERS
+
+ @fs: The glfs_t object to be deallocated.
+
+ RETURN VALUES
+
+ void
+*/
+
+void
+glfs_free_from_ctx(struct glfs *fs) GFAPI_PRIVATE(glfs_free_from_ctx, 3.7.0);
+
+int
+glfs_recall_lease_fd(struct glfs *fs, struct gf_upcall *up_data);
+
+int
+glfs_get_upcall_cache_invalidation(struct gf_upcall *to_up_data,
+ struct gf_upcall *from_up_data);
+int
+glfs_h_poll_cache_invalidation(struct glfs *fs, struct glfs_upcall *up_arg,
+ struct gf_upcall *upcall_data);
+
+ssize_t
+glfs_anonymous_preadv(struct glfs *fs, struct glfs_object *object,
+ const struct iovec *iovec, int iovcnt, off_t offset,
+ int flags);
+ssize_t
+glfs_anonymous_pwritev(struct glfs *fs, struct glfs_object *object,
+ const struct iovec *iovec, int iovcnt, off_t offset,
+ int flags);
+
+struct glfs_object *
+glfs_h_resolve_symlink(struct glfs *fs, struct glfs_object *object);
+
+/* Deprecated structures that were passed to client applications, replaced by
+ * accessor functions. Do not use these in new applications, and update older
+ * usage.
+ *
+ * See http://review.gluster.org/14701 for more details.
+ *
+ * WARNING: These structures will be removed in the future.
+ */
+struct glfs_callback_arg {
+ struct glfs *fs;
+ enum glfs_upcall_reason reason;
+ void *event_arg;
+};
+
+struct glfs_callback_inode_arg {
+ struct glfs_object *object; /* Object which need to be acted upon */
+ int flags; /* Cache UPDATE/INVALIDATE flags */
+ struct stat buf; /* Latest stat of this entry */
+ unsigned int expire_time_attr; /* the amount of time for which
+ * the application need to cache
+ * this entry
+ */
+ struct glfs_object *p_object; /* parent Object to be updated */
+ struct stat p_buf; /* Latest stat of parent dir handle */
+ struct glfs_object *oldp_object; /* Old parent Object
+ * to be updated */
+ struct stat oldp_buf; /* Latest stat of old parent
+ * dir handle */
+};
+struct dirent *
+glfs_readdirbuf_get(struct glfs_fd *glfd);
+
+gf_dirent_t *
+glfd_entry_next(struct glfs_fd *glfd, int plus);
+
+void
+gf_dirent_to_dirent(gf_dirent_t *gf_dirent, struct dirent *dirent);
+
+void
+gf_lease_to_glfs_lease(struct gf_lease *gf_lease, struct glfs_lease *lease);
+
+void
+glfs_lease_to_gf_lease(struct glfs_lease *lease, struct gf_lease *gf_lease);
+
+void
+glfs_release_upcall(void *ptr);
+
+int
+get_fop_attr_glfd(dict_t **fop_attr, struct glfs_fd *glfd);
+
+int
+set_fop_attr_glfd(struct glfs_fd *glfd);
+
+int
+get_fop_attr_thrd_key(dict_t **fop_attr);
+
+void
+unset_fop_attr(dict_t **fop_attr);
+
+/*
+ SYNOPSIS
+ glfs_statx: Fetch extended file attributes for the given path.
+
+ DESCRIPTION
+ This function fetches extended file attributes for the given path.
+
+ PARAMETERS
+ @fs: The 'virtual mount' object referencing a volume, under which file exists.
+ @path: Path of the file within the virtual mount.
+ @mask: Requested extended file attributes mask, (See mask defines above)
+
+ RETURN VALUES
+ -1 : Failure. @errno will be set with the type of failure.
+ 0 : Filled in statxbuf with appropriate masks for valid items in the
+ structure.
+
+ ERRNO VALUES
+ EINVAL: fs is invalid
+ EINVAL: mask has unsupported bits set
+ Other errors as returned by stat(2)
+ */
+
+int
+glfs_statx(struct glfs *fs, const char *path, unsigned int mask,
+ struct glfs_stat *statxbuf) GFAPI_PRIVATE(glfs_statx, 6.0);
+
+void
+glfs_iatt_from_statx(struct iatt *, const struct glfs_stat *)
+ GFAPI_PRIVATE(glfs_iatt_from_statx, 6.0);
+
+/*
+ * This API is a per thread setting, similar to glfs_setfs{u/g}id, because of
+ * the call to syncopctx_setfspid.
+ */
+int
+glfs_setfspid(struct glfs *, pid_t) GFAPI_PRIVATE(glfs_setfspid, 6.1);
+#endif /* !_GLFS_INTERNAL_H */
diff --git a/api/src/glfs-master.c b/api/src/glfs-master.c
new file mode 100644
index 00000000000..100dcc16cc0
--- /dev/null
+++ b/api/src/glfs-master.c
@@ -0,0 +1,183 @@
+/*
+ Copyright (c) 2012-2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <stdio.h>
+
+#include <glusterfs/glusterfs.h>
+
+#include "glfs-internal.h"
+#include "glfs-mem-types.h"
+#include "gfapi-messages.h"
+
+int
+graph_setup(struct glfs *fs, glusterfs_graph_t *graph)
+{
+ xlator_t *new_subvol = NULL;
+ xlator_t *old_subvol = NULL;
+ inode_table_t *itable = NULL;
+ int ret = -1;
+
+ new_subvol = graph->top;
+
+ /* This is called in a bottom-up context, it should specifically
+ NOT be glfs_lock()
+ */
+ pthread_mutex_lock(&fs->mutex);
+ {
+ if (new_subvol->switched || new_subvol == fs->active_subvol ||
+ new_subvol == fs->next_subvol || new_subvol == fs->mip_subvol) {
+ /* Spurious CHILD_UP event on old graph */
+ ret = 0;
+ goto unlock;
+ }
+
+ if (!new_subvol->itable) {
+ itable = inode_table_new(131072, new_subvol);
+ if (!itable) {
+ errno = ENOMEM;
+ ret = -1;
+ goto unlock;
+ }
+
+ new_subvol->itable = itable;
+ }
+
+ old_subvol = fs->next_subvol;
+ fs->next_subvol = new_subvol;
+ fs->next_subvol->winds++; /* first ref */
+ ret = 0;
+ }
+unlock:
+ pthread_mutex_unlock(&fs->mutex);
+
+ if (old_subvol)
+ /* wasn't picked up so far, skip */
+ glfs_subvol_done(fs, old_subvol);
+
+ return ret;
+}
+
+int
+notify(xlator_t *this, int event, void *data, ...)
+{
+ glusterfs_graph_t *graph = NULL;
+ struct glfs *fs = NULL;
+
+ graph = data;
+ fs = this->private;
+
+ switch (event) {
+ case GF_EVENT_GRAPH_NEW:
+ gf_smsg(this->name, GF_LOG_INFO, 0, API_MSG_NEW_GRAPH,
+ "graph-uuid=%s",
+ uuid_utoa((unsigned char *)graph->graph_uuid), "id=%d",
+ graph->id, NULL);
+ break;
+ case GF_EVENT_CHILD_UP:
+ pthread_mutex_lock(&fs->mutex);
+ {
+ graph->used = 1;
+ }
+ pthread_mutex_unlock(&fs->mutex);
+ graph_setup(fs, graph);
+ glfs_init_done(fs, 0);
+ break;
+ case GF_EVENT_CHILD_DOWN:
+ pthread_mutex_lock(&fs->mutex);
+ {
+ graph->used = 0;
+ pthread_cond_broadcast(&fs->child_down_cond);
+ }
+ pthread_mutex_unlock(&fs->mutex);
+ glfs_init_done(fs, 1);
+ break;
+ case GF_EVENT_CHILD_CONNECTING:
+ break;
+ case GF_EVENT_UPCALL:
+ glfs_process_upcall_event(fs, data);
+ break;
+ default:
+ gf_msg_debug(this->name, 0, "got notify event %d", event);
+ break;
+ }
+
+ return 0;
+}
+
+int
+mem_acct_init(xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init(this, glfs_mt_end + 1);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, API_MSG_MEM_ACCT_INIT_FAILED,
+ NULL);
+ return ret;
+ }
+
+ return 0;
+}
+
+int
+init(xlator_t *this)
+{
+ return 0;
+}
+
+void
+fini(xlator_t *this)
+{
+}
+
+/* place-holder fops */
+int
+glfs_forget(xlator_t *this, inode_t *inode)
+{
+ return 0;
+}
+
+int
+glfs_release(xlator_t *this, fd_t *fd)
+{
+ return 0;
+}
+
+int
+glfs_releasedir(xlator_t *this, fd_t *fd)
+{
+ return 0;
+}
+
+struct xlator_dumpops dumpops;
+
+struct xlator_fops fops;
+
+struct xlator_cbks cbks = {
+ .forget = glfs_forget,
+ .release = glfs_release,
+ .releasedir = glfs_releasedir,
+};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1},
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .identifier = "glfs-api",
+ .category = GF_MAINTAINED,
+};
diff --git a/api/src/glfs-mem-types.h b/api/src/glfs-mem-types.h
new file mode 100644
index 00000000000..bfa325a3ad9
--- /dev/null
+++ b/api/src/glfs-mem-types.h
@@ -0,0 +1,35 @@
+/*
+ Copyright (c) 2012-2017 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLFS_MEM_TYPES_H
+#define _GLFS_MEM_TYPES_H
+
+#include <glusterfs/mem-types.h>
+
+#define GF_MEM_TYPE_START (gf_common_mt_end + 1)
+
+enum glfs_mem_types_ {
+ glfs_mt_call_pool_t = GF_MEM_TYPE_START,
+ glfs_mt_xlator_t,
+ glfs_mt_glfs_fd_t,
+ glfs_mt_glfs_io_t,
+ glfs_mt_volfile_t,
+ glfs_mt_xlator_cmdline_option_t,
+ glfs_mt_server_cmdline_t,
+ glfs_mt_glfs_object_t,
+ glfs_mt_readdirbuf_t,
+ glfs_mt_upcall_entry_t,
+ glfs_mt_acl_t,
+ glfs_mt_upcall_inode_t,
+ glfs_mt_realpath_t,
+ glfs_mt_xreaddirp_stat_t,
+ glfs_mt_end
+};
+#endif
diff --git a/api/src/glfs-mgmt.c b/api/src/glfs-mgmt.c
new file mode 100644
index 00000000000..7c82b8cd162
--- /dev/null
+++ b/api/src/glfs-mgmt.c
@@ -0,0 +1,1049 @@
+/*
+ Copyright (c) 2012-2018 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <pthread.h>
+
+#include <glusterfs/glusterfs.h>
+#include "glfs.h"
+#include <glusterfs/dict.h>
+
+#include "rpc-clnt.h"
+#include "protocol-common.h"
+#include "xdr-generic.h"
+#include "rpc-common-xdr.h"
+
+#include <glusterfs/syncop.h>
+
+#include "glfs-internal.h"
+#include "gfapi-messages.h"
+#include <glusterfs/syscall.h>
+
+int
+glfs_volfile_fetch(struct glfs *fs);
+int32_t
+glfs_get_volume_info_rpc(call_frame_t *frame, xlator_t *this, struct glfs *fs);
+
+int
+glfs_process_volfp(struct glfs *fs, FILE *fp)
+{
+ glusterfs_graph_t *graph = NULL;
+ int ret = -1;
+ xlator_t *trav = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+
+ ctx = fs->ctx;
+ graph = glusterfs_graph_construct(fp);
+ if (!graph) {
+ gf_smsg("glfs", GF_LOG_ERROR, errno, API_MSG_GRAPH_CONSTRUCT_FAILED,
+ NULL);
+ goto out;
+ }
+
+ for (trav = graph->first; trav; trav = trav->next) {
+ if (strcmp(trav->type, "mount/api") == 0) {
+ gf_smsg("glfs", GF_LOG_ERROR, EINVAL, API_MSG_API_XLATOR_ERROR,
+ NULL);
+ goto out;
+ }
+ }
+
+ ret = glusterfs_graph_prepare(graph, ctx, fs->volname);
+ if (ret) {
+ glusterfs_graph_destroy(graph);
+ goto out;
+ }
+
+ ret = glusterfs_graph_activate(graph, ctx);
+
+ if (ret) {
+ glusterfs_graph_destroy(graph);
+ goto out;
+ }
+
+ gf_log_dump_graph(fp, graph);
+
+ ret = 0;
+out:
+ if (fp)
+ fclose(fp);
+
+ if (!ctx->active) {
+ ret = -1;
+ }
+
+ return ret;
+}
+
+int
+mgmt_cbk_spec(struct rpc_clnt *rpc, void *mydata, void *data)
+{
+ struct glfs *fs = NULL;
+ xlator_t *this = NULL;
+
+ this = mydata;
+ fs = this->private;
+
+ glfs_volfile_fetch(fs);
+
+ return 0;
+}
+
+int
+mgmt_cbk_event(struct rpc_clnt *rpc, void *mydata, void *data)
+{
+ return 0;
+}
+
+static int
+mgmt_cbk_statedump(struct rpc_clnt *rpc, void *mydata, void *data)
+{
+ struct glfs *fs = NULL;
+ xlator_t *this = NULL;
+ gf_statedump target_pid = {
+ 0,
+ };
+ struct iovec *iov = NULL;
+ int ret = -1;
+
+ this = mydata;
+ if (!this) {
+ gf_smsg("glfs", GF_LOG_ERROR, EINVAL, API_MSG_NULL, "mydata", NULL);
+ errno = EINVAL;
+ goto out;
+ }
+
+ fs = this->private;
+ if (!fs) {
+ gf_smsg("glfs", GF_LOG_ERROR, EINVAL, API_MSG_NULL, "glfs", NULL);
+ errno = EINVAL;
+ goto out;
+ }
+
+ iov = (struct iovec *)data;
+ if (!iov) {
+ gf_smsg("glfs", GF_LOG_ERROR, EINVAL, API_MSG_NULL, "iovec data", NULL);
+ errno = EINVAL;
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &target_pid, (xdrproc_t)xdr_gf_statedump);
+ if (ret < 0) {
+ gf_smsg("glfs", GF_LOG_ERROR, EINVAL, API_MSG_DECODE_XDR_FAILED, NULL);
+ goto out;
+ }
+
+ gf_msg_trace("glfs", 0, "statedump requested for pid: %d", target_pid.pid);
+
+ if ((uint64_t)getpid() == target_pid.pid) {
+ gf_msg_debug("glfs", 0, "Taking statedump for pid: %d", target_pid.pid);
+
+ ret = glfs_sysrq(fs, GLFS_SYSRQ_STATEDUMP);
+ if (ret < 0) {
+ gf_smsg("glfs", GF_LOG_INFO, 0, API_MSG_STATEDUMP_FAILED, NULL);
+ }
+ }
+out:
+ return ret;
+}
+
+static rpcclnt_cb_actor_t mgmt_cbk_actors[GF_CBK_MAXVALUE] = {
+ [GF_CBK_FETCHSPEC] = {"FETCHSPEC", mgmt_cbk_spec, GF_CBK_FETCHSPEC},
+ [GF_CBK_EVENT_NOTIFY] = {"EVENTNOTIFY", mgmt_cbk_event,
+ GF_CBK_EVENT_NOTIFY},
+ [GF_CBK_STATEDUMP] = {"STATEDUMP", mgmt_cbk_statedump, GF_CBK_STATEDUMP},
+};
+
+static struct rpcclnt_cb_program mgmt_cbk_prog = {
+ .progname = "GlusterFS Callback",
+ .prognum = GLUSTER_CBK_PROGRAM,
+ .progver = GLUSTER_CBK_VERSION,
+ .actors = mgmt_cbk_actors,
+ .numactors = GF_CBK_MAXVALUE,
+};
+
+static char *clnt_handshake_procs[GF_HNDSK_MAXVALUE] = {
+ [GF_HNDSK_NULL] = "NULL",
+ [GF_HNDSK_SETVOLUME] = "SETVOLUME",
+ [GF_HNDSK_GETSPEC] = "GETSPEC",
+ [GF_HNDSK_PING] = "PING",
+ [GF_HNDSK_EVENT_NOTIFY] = "EVENTNOTIFY",
+ [GF_HNDSK_GET_VOLUME_INFO] = "GETVOLUMEINFO",
+};
+
+static rpc_clnt_prog_t clnt_handshake_prog = {
+ .progname = "GlusterFS Handshake",
+ .prognum = GLUSTER_HNDSK_PROGRAM,
+ .progver = GLUSTER_HNDSK_VERSION,
+ .procnames = clnt_handshake_procs,
+};
+
+int
+mgmt_submit_request(void *req, call_frame_t *frame, glusterfs_ctx_t *ctx,
+ rpc_clnt_prog_t *prog, int procnum, fop_cbk_fn_t cbkfn,
+ xdrproc_t xdrproc)
+{
+ int ret = -1;
+ int count = 0;
+ struct iovec iov = {
+ 0,
+ };
+ struct iobuf *iobuf = NULL;
+ struct iobref *iobref = NULL;
+ ssize_t xdr_size = 0;
+
+ iobref = iobref_new();
+ if (!iobref) {
+ goto out;
+ }
+
+ if (req) {
+ xdr_size = xdr_sizeof(xdrproc, req);
+
+ iobuf = iobuf_get2(ctx->iobuf_pool, xdr_size);
+ if (!iobuf) {
+ goto out;
+ };
+
+ iobref_add(iobref, iobuf);
+
+ iov.iov_base = iobuf->ptr;
+ iov.iov_len = iobuf_pagesize(iobuf);
+
+ /* Create the xdr payload */
+ ret = xdr_serialize_generic(iov, req, xdrproc);
+ if (ret == -1) {
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, API_MSG_XDR_PAYLOAD_FAILED,
+ NULL);
+ goto out;
+ }
+ iov.iov_len = ret;
+ count = 1;
+ }
+
+ /* Send the msg */
+ ret = rpc_clnt_submit(ctx->mgmt, prog, procnum, cbkfn, &iov, count, NULL, 0,
+ iobref, frame, NULL, 0, NULL, 0, NULL);
+
+out:
+ if (iobref)
+ iobref_unref(iobref);
+
+ if (iobuf)
+ iobuf_unref(iobuf);
+ return ret;
+}
+
+/*
+ * Callback routine for 'GF_HNDSK_GET_VOLUME_INFO' rpc request
+ */
+int
+mgmt_get_volinfo_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ int ret = 0;
+ char *volume_id_str = NULL;
+ dict_t *dict = NULL;
+ gf_get_volume_info_rsp rsp = {
+ 0,
+ };
+ call_frame_t *frame = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ struct glfs *fs = NULL;
+ struct syncargs *args;
+
+ frame = myframe;
+ ctx = frame->this->ctx;
+ args = frame->local;
+
+ if (!ctx) {
+ gf_smsg(frame->this->name, GF_LOG_ERROR, EINVAL, API_MSG_NULL,
+ "context", NULL);
+ errno = EINVAL;
+ ret = -1;
+ goto out;
+ }
+
+ fs = ((xlator_t *)ctx->master)->private;
+
+ if (-1 == req->rpc_status) {
+ gf_smsg(frame->this->name, GF_LOG_ERROR, EINVAL,
+ API_MSG_CALL_NOT_SUCCESSFUL, NULL);
+ errno = EINVAL;
+ ret = -1;
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_get_volume_info_rsp);
+
+ if (ret < 0) {
+ gf_smsg(frame->this->name, GF_LOG_ERROR, 0,
+ API_MSG_XDR_RESPONSE_DECODE_FAILED, NULL);
+ goto out;
+ }
+
+ gf_msg_debug(frame->this->name, 0,
+ "Received resp to GET_VOLUME_INFO "
+ "RPC: %d",
+ rsp.op_ret);
+
+ if (rsp.op_ret == -1) {
+ errno = rsp.op_errno;
+ ret = -1;
+ goto out;
+ }
+
+ if (!rsp.dict.dict_len) {
+ gf_smsg(frame->this->name, GF_LOG_ERROR, EINVAL, API_MSG_CALL_NOT_VALID,
+ NULL);
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ dict = dict_new();
+
+ if (!dict) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
+
+ if (ret) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ ret = dict_get_str_sizen(dict, "volume_id", &volume_id_str);
+ if (ret) {
+ errno = EINVAL;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (volume_id_str) {
+ gf_msg_debug(frame->this->name, 0, "Volume Id: %s", volume_id_str);
+ pthread_mutex_lock(&fs->mutex);
+ gf_uuid_parse(volume_id_str, fs->vol_uuid);
+ pthread_mutex_unlock(&fs->mutex);
+ }
+
+ if (ret) {
+ gf_smsg(frame->this->name, GF_LOG_ERROR, errno,
+ API_MSG_GET_VOLINFO_CBK_FAILED, "error=%s", strerror(errno),
+ NULL);
+ }
+
+ if (dict)
+ dict_unref(dict);
+
+ if (rsp.dict.dict_val)
+ free(rsp.dict.dict_val);
+
+ if (rsp.op_errstr)
+ free(rsp.op_errstr);
+
+ gf_msg_debug(frame->this->name, 0, "Returning: %d", ret);
+
+ __wake(args);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_get_volumeid, 3.5.0)
+int
+pub_glfs_get_volumeid(struct glfs *fs, char *volid, size_t size)
+{
+ /* TODO: Define a global macro to store UUID size */
+ size_t uuid_size = 16;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ pthread_mutex_lock(&fs->mutex);
+ {
+ /* check if the volume uuid is initialized */
+ if (!gf_uuid_is_null(fs->vol_uuid)) {
+ pthread_mutex_unlock(&fs->mutex);
+ goto done;
+ }
+ }
+ pthread_mutex_unlock(&fs->mutex);
+
+ /* Need to fetch volume_uuid */
+ glfs_get_volume_info(fs);
+
+ if (gf_uuid_is_null(fs->vol_uuid)) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, EINVAL, API_MSG_FETCH_VOLUUID_FAILED,
+ NULL);
+ goto out;
+ }
+
+done:
+ if (!volid || !size) {
+ gf_msg_debug(THIS->name, 0, "volumeid/size is null");
+ __GLFS_EXIT_FS;
+ return uuid_size;
+ }
+
+ if (size < uuid_size) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, ERANGE, API_MSG_INSUFF_SIZE, NULL);
+ errno = ERANGE;
+ goto out;
+ }
+
+ memcpy(volid, fs->vol_uuid, uuid_size);
+
+ __GLFS_EXIT_FS;
+
+ return uuid_size;
+
+out:
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return -1;
+}
+
+int
+glfs_get_volume_info(struct glfs *fs)
+{
+ call_frame_t *frame = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ struct syncargs args = {
+ 0,
+ };
+ int ret = 0;
+
+ ctx = fs->ctx;
+ frame = create_frame(THIS, ctx->pool);
+ if (!frame) {
+ gf_smsg("glfs", GF_LOG_ERROR, ENOMEM, API_MSG_FRAME_CREAT_FAILED, NULL);
+ ret = -1;
+ goto out;
+ }
+
+ frame->local = &args;
+
+ __yawn((&args));
+
+ ret = glfs_get_volume_info_rpc(frame, THIS, fs);
+ if (ret)
+ goto out;
+
+ __yield((&args));
+
+ frame->local = NULL;
+ STACK_DESTROY(frame->root);
+
+out:
+ return ret;
+}
+
+int32_t
+glfs_get_volume_info_rpc(call_frame_t *frame, xlator_t *this, struct glfs *fs)
+{
+ gf_get_volume_info_req req = {{
+ 0,
+ }};
+ int ret = 0;
+ glusterfs_ctx_t *ctx = NULL;
+ dict_t *dict = NULL;
+ int32_t flags = 0;
+
+ if (!frame || !this || !fs) {
+ ret = -1;
+ goto out;
+ }
+
+ ctx = fs->ctx;
+
+ dict = dict_new();
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ if (fs->volname) {
+ ret = dict_set_str(dict, "volname", fs->volname);
+ if (ret)
+ goto out;
+ }
+
+ // Set the flags for the fields which we are interested in
+ flags = (int32_t)GF_GET_VOLUME_UUID; // ctx->flags;
+ ret = dict_set_int32(dict, "flags", flags);
+ if (ret) {
+ gf_smsg(frame->this->name, GF_LOG_ERROR, EINVAL,
+ API_MSG_DICT_SET_FAILED, "flags", NULL);
+ goto out;
+ }
+
+ ret = dict_allocate_and_serialize(dict, &req.dict.dict_val,
+ &req.dict.dict_len);
+
+ ret = mgmt_submit_request(&req, frame, ctx, &clnt_handshake_prog,
+ GF_HNDSK_GET_VOLUME_INFO, mgmt_get_volinfo_cbk,
+ (xdrproc_t)xdr_gf_get_volume_info_req);
+out:
+ if (dict) {
+ dict_unref(dict);
+ }
+
+ GF_FREE(req.dict.dict_val);
+
+ return ret;
+}
+
+static int
+glusterfs_oldvolfile_update(struct glfs *fs, char *volfile, ssize_t size)
+{
+ int ret = -1;
+
+ pthread_mutex_lock(&fs->mutex);
+
+ fs->oldvollen = size;
+ if (!fs->oldvolfile) {
+ fs->oldvolfile = CALLOC(1, size + 1);
+ } else {
+ fs->oldvolfile = REALLOC(fs->oldvolfile, size + 1);
+ }
+
+ if (!fs->oldvolfile) {
+ fs->oldvollen = 0;
+ } else {
+ memcpy(fs->oldvolfile, volfile, size);
+ fs->oldvollen = size;
+ ret = 0;
+ }
+
+ pthread_mutex_unlock(&fs->mutex);
+
+ return ret;
+}
+
+int
+glfs_mgmt_getspec_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf_getspec_rsp rsp = {
+ 0,
+ };
+ call_frame_t *frame = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ int ret = 0;
+ ssize_t size = 0;
+ FILE *tmpfp = NULL;
+ int need_retry = 0;
+ struct glfs *fs = NULL;
+ dict_t *dict = NULL;
+ char *servers_list = NULL;
+ int tmp_fd = -1;
+ char template[] = "/tmp/gfapi.volfile.XXXXXX";
+
+ frame = myframe;
+ ctx = frame->this->ctx;
+
+ if (!ctx) {
+ gf_smsg(frame->this->name, GF_LOG_ERROR, EINVAL, API_MSG_NULL,
+ "context", NULL);
+ errno = EINVAL;
+ ret = -1;
+ goto out;
+ }
+
+ fs = ((xlator_t *)ctx->master)->private;
+
+ if (-1 == req->rpc_status) {
+ ret = -1;
+ need_retry = 1;
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_getspec_rsp);
+ if (ret < 0) {
+ gf_smsg(frame->this->name, GF_LOG_ERROR, 0, API_MSG_XDR_DECODE_FAILED,
+ NULL);
+ ret = -1;
+ goto out;
+ }
+
+ if (-1 == rsp.op_ret) {
+ gf_smsg(frame->this->name, GF_LOG_ERROR, rsp.op_errno,
+ API_MSG_GET_VOLFILE_FAILED, "from server", NULL);
+ ret = -1;
+ errno = rsp.op_errno;
+ goto out;
+ }
+
+ if (!rsp.xdata.xdata_len) {
+ goto volfile;
+ }
+
+ dict = dict_new();
+ if (!dict) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ ret = dict_unserialize(rsp.xdata.xdata_val, rsp.xdata.xdata_len, &dict);
+ if (ret) {
+ gf_log(frame->this->name, GF_LOG_ERROR,
+ "failed to unserialize xdata to dictionary");
+ goto out;
+ }
+ dict->extra_stdfree = rsp.xdata.xdata_val;
+
+ /* glusterd2 only */
+ ret = dict_get_str(dict, "servers-list", &servers_list);
+ if (ret) {
+ goto volfile;
+ }
+
+ gf_log(frame->this->name, GF_LOG_INFO,
+ "Received list of available volfile servers: %s", servers_list);
+
+ ret = gf_process_getspec_servers_list(&ctx->cmd_args, servers_list);
+ if (ret) {
+ gf_log(frame->this->name, GF_LOG_ERROR,
+ "Failed (%s) to process servers list: %s", strerror(errno),
+ servers_list);
+ }
+
+volfile:
+ ret = 0;
+ size = rsp.op_ret;
+
+ pthread_mutex_lock(&fs->mutex);
+ if ((size == fs->oldvollen) &&
+ (memcmp(fs->oldvolfile, rsp.spec, size) == 0)) {
+ pthread_mutex_unlock(&fs->mutex);
+ gf_smsg(frame->this->name, GF_LOG_INFO, 0, API_MSG_VOLFILE_INFO, NULL);
+ goto out;
+ }
+ pthread_mutex_unlock(&fs->mutex);
+
+ /* coverity[secure_temp] mkstemp uses 0600 as the mode and is safe */
+ tmp_fd = mkstemp(template);
+ if (-1 == tmp_fd) {
+ ret = -1;
+ goto out;
+ }
+
+ /* Calling unlink so that when the file is closed or program
+ * terminates the temporary file is deleted.
+ */
+ ret = sys_unlink(template);
+ if (ret < 0) {
+ gf_smsg(frame->this->name, GF_LOG_INFO, 0, API_MSG_UNABLE_TO_DEL,
+ "template=%s", template, NULL);
+ ret = 0;
+ }
+
+ tmpfp = fdopen(tmp_fd, "w+b");
+ if (!tmpfp) {
+ ret = -1;
+ goto out;
+ }
+
+ fwrite(rsp.spec, size, 1, tmpfp);
+ fflush(tmpfp);
+ if (ferror(tmpfp)) {
+ ret = -1;
+ goto out;
+ }
+
+ /* Check if only options have changed. No need to reload the
+ * volfile if topology hasn't changed.
+ * glusterfs_volfile_reconfigure returns 3 possible return states
+ * return 0 =======> reconfiguration of options has succeeded
+ * return 1 =======> the graph has to be reconstructed and all
+ * the xlators should be inited return -1(or -ve) =======> Some Internal
+ * Error occurred during the operation
+ */
+
+ pthread_mutex_lock(&fs->mutex);
+ ret = gf_volfile_reconfigure(fs->oldvollen, tmpfp, fs->ctx, fs->oldvolfile);
+ pthread_mutex_unlock(&fs->mutex);
+
+ if (ret == 0) {
+ gf_msg_debug("glusterfsd-mgmt", 0,
+ "No need to re-load "
+ "volfile, reconfigure done");
+ ret = glusterfs_oldvolfile_update(fs, rsp.spec, size);
+ goto out;
+ }
+
+ if (ret < 0) {
+ gf_msg_debug("glusterfsd-mgmt", 0, "Reconfigure failed !!");
+ goto out;
+ }
+
+ ret = glfs_process_volfp(fs, tmpfp);
+ /* tmpfp closed */
+ tmpfp = NULL;
+ tmp_fd = -1;
+ if (ret)
+ goto out;
+
+ ret = glusterfs_oldvolfile_update(fs, rsp.spec, size);
+out:
+ STACK_DESTROY(frame->root);
+
+ if (rsp.spec)
+ free(rsp.spec);
+
+ if (dict)
+ dict_unref(dict);
+
+ // Stop if server is running at an unsupported op-version
+ if (ENOTSUP == ret) {
+ gf_smsg("mgmt", GF_LOG_ERROR, ENOTSUP, API_MSG_WRONG_OPVERSION, NULL);
+ errno = ENOTSUP;
+ glfs_init_done(fs, -1);
+ }
+
+ if (ret && ctx && !ctx->active) {
+ /* Do it only for the first time */
+ /* Failed to get the volume file, something wrong,
+ restart the process */
+ gf_smsg("glfs-mgmt", GF_LOG_ERROR, EINVAL, API_MSG_GET_VOLFILE_FAILED,
+ "key=%s", ctx->cmd_args.volfile_id, NULL);
+ if (!need_retry) {
+ if (!errno)
+ errno = EINVAL;
+ glfs_init_done(fs, -1);
+ }
+ }
+
+ if (tmpfp)
+ fclose(tmpfp);
+ else if (tmp_fd != -1)
+ sys_close(tmp_fd);
+
+ return 0;
+}
+
+int
+glfs_volfile_fetch(struct glfs *fs)
+{
+ cmd_args_t *cmd_args = NULL;
+ gf_getspec_req req = {
+ 0,
+ };
+ int ret = -1;
+ call_frame_t *frame = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ dict_t *dict = NULL;
+
+ ctx = fs->ctx;
+ cmd_args = &ctx->cmd_args;
+
+ req.key = cmd_args->volfile_id;
+ req.flags = 0;
+
+ dict = dict_new();
+ if (!dict) {
+ goto out;
+ }
+
+ // Set the supported min and max op-versions, so glusterd can make a
+ // decision
+ ret = dict_set_int32(dict, "min-op-version", GD_OP_VERSION_MIN);
+ if (ret) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, EINVAL, API_MSG_DICT_SET_FAILED,
+ "min-op-version", NULL);
+ goto out;
+ }
+
+ ret = dict_set_int32(dict, "max-op-version", GD_OP_VERSION_MAX);
+ if (ret) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, EINVAL, API_MSG_DICT_SET_FAILED,
+ "max-op-version", NULL);
+ goto out;
+ }
+
+ /* Ask for a list of volfile (glusterd2 only) servers */
+ if (GF_CLIENT_PROCESS == ctx->process_mode) {
+ req.flags = req.flags | GF_GETSPEC_FLAG_SERVERS_LIST;
+ }
+
+ ret = dict_allocate_and_serialize(dict, &req.xdata.xdata_val,
+ &req.xdata.xdata_len);
+ if (ret < 0) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, 0, API_MSG_DICT_SERIALIZE_FAILED,
+ NULL);
+ goto out;
+ }
+
+ frame = create_frame(THIS, ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = mgmt_submit_request(&req, frame, ctx, &clnt_handshake_prog,
+ GF_HNDSK_GETSPEC, glfs_mgmt_getspec_cbk,
+ (xdrproc_t)xdr_gf_getspec_req);
+out:
+ if (req.xdata.xdata_val)
+ GF_FREE(req.xdata.xdata_val);
+ if (dict)
+ dict_unref(dict);
+
+ return ret;
+}
+
+static int
+mgmt_rpc_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
+ void *data)
+{
+ xlator_t *this = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ server_cmdline_t *server = NULL;
+ rpc_transport_t *rpc_trans = NULL;
+ struct glfs *fs = NULL;
+ int ret = 0;
+ struct dnscache6 *dnscache = NULL;
+
+ this = mydata;
+ rpc_trans = rpc->conn.trans;
+
+ ctx = this->ctx;
+ if (!ctx)
+ goto out;
+
+ fs = ((xlator_t *)ctx->master)->private;
+
+ switch (event) {
+ case RPC_CLNT_DISCONNECT:
+ if (!ctx->active) {
+ if (rpc_trans->connect_failed)
+ gf_smsg("glfs-mgmt", GF_LOG_ERROR, 0,
+ API_MSG_REMOTE_HOST_CONN_FAILED, "server=%s",
+ ctx->cmd_args.volfile_server, NULL);
+ else
+ gf_smsg("glfs-mgmt", GF_LOG_INFO, 0,
+ API_MSG_REMOTE_HOST_CONN_FAILED, "server=%s",
+ ctx->cmd_args.volfile_server, NULL);
+
+ if (!rpc->disabled) {
+ /*
+ * Check if dnscache is exhausted for current server
+ * and continue until cache is exhausted
+ */
+ dnscache = rpc_trans->dnscache;
+ if (dnscache && dnscache->next) {
+ break;
+ }
+ }
+ server = ctx->cmd_args.curr_server;
+ if (server->list.next == &ctx->cmd_args.volfile_servers) {
+ errno = ENOTCONN;
+ gf_smsg("glfs-mgmt", GF_LOG_INFO, ENOTCONN,
+ API_MSG_VOLFILE_SERVER_EXHAUST, NULL);
+ glfs_init_done(fs, -1);
+ break;
+ }
+ server = list_entry(server->list.next, typeof(*server), list);
+ ctx->cmd_args.curr_server = server;
+ ctx->cmd_args.volfile_server_port = server->port;
+ ctx->cmd_args.volfile_server = server->volfile_server;
+ ctx->cmd_args.volfile_server_transport = server->transport;
+
+ ret = dict_set_str(rpc_trans->options, "transport-type",
+ server->transport);
+ if (ret != 0) {
+ gf_smsg("glfs-mgmt", GF_LOG_ERROR, ENOTCONN,
+ API_MSG_DICT_SET_FAILED, "transport-type=%s",
+ server->transport, NULL);
+ errno = ENOTCONN;
+ glfs_init_done(fs, -1);
+ break;
+ }
+
+ if (strcmp(server->transport, "unix") == 0) {
+ ret = dict_set_str(rpc_trans->options,
+ "transport.socket.connect-path",
+ server->volfile_server);
+ if (ret != 0) {
+ gf_smsg("glfs-mgmt", GF_LOG_ERROR, ENOTCONN,
+ API_MSG_DICT_SET_FAILED,
+ "socket.connect-path=%s",
+ server->volfile_server, NULL);
+ errno = ENOTCONN;
+ glfs_init_done(fs, -1);
+ break;
+ }
+ /* delete the remote-host and remote-port keys
+ * in case they were set while looping through
+ * list of volfile servers previously
+ */
+ dict_del(rpc_trans->options, "remote-host");
+ dict_del(rpc_trans->options, "remote-port");
+ } else {
+ ret = dict_set_int32(rpc_trans->options, "remote-port",
+ server->port);
+ if (ret != 0) {
+ gf_smsg("glfs-mgmt", GF_LOG_ERROR, ENOTCONN,
+ API_MSG_DICT_SET_FAILED, "remote-port=%d",
+ server->port, NULL);
+ errno = ENOTCONN;
+ glfs_init_done(fs, -1);
+ break;
+ }
+
+ ret = dict_set_str(rpc_trans->options, "remote-host",
+ server->volfile_server);
+ if (ret != 0) {
+ gf_smsg("glfs-mgmt", GF_LOG_ERROR, ENOTCONN,
+ API_MSG_DICT_SET_FAILED, "remote-host=%s",
+ server->volfile_server, NULL);
+ errno = ENOTCONN;
+ glfs_init_done(fs, -1);
+ break;
+ }
+ /* delete the "transport.socket.connect-path"
+ * key in case if it was set while looping
+ * through list of volfile servers previously
+ */
+ dict_del(rpc_trans->options,
+ "transport.socket.connect-path");
+ }
+
+ gf_smsg("glfs-mgmt", GF_LOG_INFO, 0, API_MSG_VOLFILE_CONNECTING,
+ "server=%s", server->volfile_server, "port=%d",
+ server->port, "transport=%s", server->transport, NULL);
+ }
+ break;
+ case RPC_CLNT_CONNECT:
+ ret = glfs_volfile_fetch(fs);
+ if (ret && (ctx->active == NULL)) {
+ /* Do it only for the first time */
+ /* Exit the process.. there are some wrong options */
+ gf_smsg("glfs-mgmt", GF_LOG_ERROR, EINVAL,
+ API_MSG_GET_VOLFILE_FAILED, "key=%s",
+ ctx->cmd_args.volfile_id, NULL);
+ errno = EINVAL;
+ glfs_init_done(fs, -1);
+ }
+
+ break;
+ default:
+ break;
+ }
+out:
+ return 0;
+}
+
+int
+glusterfs_mgmt_notify(int32_t op, void *data, ...)
+{
+ int ret = 0;
+
+ switch (op) {
+ case GF_EN_DEFRAG_STATUS:
+ break;
+
+ default:
+ break;
+ }
+
+ return ret;
+}
+
+int
+glfs_mgmt_init(struct glfs *fs)
+{
+ cmd_args_t *cmd_args = NULL;
+ struct rpc_clnt *rpc = NULL;
+ dict_t *options = NULL;
+ int ret = -1;
+ int port = GF_DEFAULT_BASE_PORT;
+ char *host = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+
+ ctx = fs->ctx;
+ cmd_args = &ctx->cmd_args;
+
+ if (ctx->mgmt)
+ return 0;
+
+ options = dict_new();
+ if (!options)
+ goto out;
+
+ if (cmd_args->volfile_server_port)
+ port = cmd_args->volfile_server_port;
+
+ if (cmd_args->volfile_server) {
+ host = cmd_args->volfile_server;
+ } else if (cmd_args->volfile_server_transport &&
+ !strcmp(cmd_args->volfile_server_transport, "unix")) {
+ host = DEFAULT_GLUSTERD_SOCKFILE;
+ } else {
+ host = "localhost";
+ }
+
+ if (cmd_args->volfile_server_transport &&
+ !strcmp(cmd_args->volfile_server_transport, "unix")) {
+ ret = rpc_transport_unix_options_build(options, host, 0);
+ } else {
+ xlator_cmdline_option_t *opt = find_xlator_option_in_cmd_args_t(
+ "address-family", cmd_args);
+ ret = rpc_transport_inet_options_build(options, host, port,
+ (opt ? opt->value : NULL));
+ }
+
+ if (ret)
+ goto out;
+
+ rpc = rpc_clnt_new(options, THIS, THIS->name, 8);
+ if (!rpc) {
+ ret = -1;
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, API_MSG_CREATE_RPC_CLIENT_FAILED,
+ NULL);
+ goto out;
+ }
+
+ ret = rpc_clnt_register_notify(rpc, mgmt_rpc_notify, THIS);
+ if (ret) {
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, API_MSG_REG_NOTIFY_FUNC_FAILED,
+ NULL);
+ goto out;
+ }
+
+ ret = rpcclnt_cbk_program_register(rpc, &mgmt_cbk_prog, THIS);
+ if (ret) {
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, API_MSG_REG_CBK_FUNC_FAILED,
+ NULL);
+ goto out;
+ }
+
+ ctx->notify = glusterfs_mgmt_notify;
+
+ /* This value should be set before doing the 'rpc_clnt_start()' as
+ the notify function uses this variable */
+ ctx->mgmt = rpc;
+
+ ret = rpc_clnt_start(rpc);
+out:
+ if (options)
+ dict_unref(options);
+ return ret;
+}
diff --git a/api/src/glfs-resolve.c b/api/src/glfs-resolve.c
new file mode 100644
index 00000000000..8a393ecb464
--- /dev/null
+++ b/api/src/glfs-resolve.c
@@ -0,0 +1,1199 @@
+/*
+ Copyright (c) 2012-2018 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <unistd.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <inttypes.h>
+#include <limits.h>
+
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/stack.h>
+#include <glusterfs/gf-event.h>
+#include "glfs-mem-types.h"
+#include <glusterfs/common-utils.h>
+#include <glusterfs/syncop.h>
+#include <glusterfs/call-stub.h>
+#include "gfapi-messages.h"
+#include <glusterfs/inode.h>
+#include "glfs-internal.h"
+
+#define graphid_str(subvol) \
+ (uuid_utoa((unsigned char *)subvol->graph->graph_uuid))
+int
+glfs_first_lookup_safe(xlator_t *subvol)
+{
+ loc_t loc = {
+ 0,
+ };
+ int ret = -1;
+
+ loc.inode = subvol->itable->root;
+ memset(loc.gfid, 0, 16);
+ loc.gfid[15] = 1;
+ loc.path = "/";
+ loc.name = "";
+
+ ret = syncop_lookup(subvol, &loc, 0, 0, 0, 0);
+ DECODE_SYNCOP_ERR(ret);
+
+ gf_msg_debug(subvol->name, 0, "first lookup complete %d", ret);
+
+ return ret;
+}
+
+int
+__glfs_first_lookup(struct glfs *fs, xlator_t *subvol)
+{
+ int ret = -1;
+
+ fs->migration_in_progress = 1;
+ pthread_mutex_unlock(&fs->mutex);
+ {
+ ret = glfs_first_lookup_safe(subvol);
+ }
+ pthread_mutex_lock(&fs->mutex);
+ fs->migration_in_progress = 0;
+ pthread_cond_broadcast(&fs->cond);
+
+ /* wake up other waiting tasks */
+ __GLFS_SYNCTASK_WAKE(fs);
+
+ return ret;
+}
+
+/**
+ * We have to check if need_lookup flag is set in both old and the new inodes.
+ * If its set in oldinode, then directly go ahead and do an explicit lookup.
+ * But if its not set in the oldinode, then check if the newinode is linked
+ * via readdirp. If so an explicit lookup is needed on the new inode, so that
+ * below xlators can set their respective contexts.
+ */
+inode_t *
+glfs_refresh_inode_safe(xlator_t *subvol, inode_t *oldinode,
+ gf_boolean_t need_lookup)
+{
+ loc_t loc = {
+ 0,
+ };
+ int ret = -1;
+ struct iatt iatt = {
+ 0,
+ };
+ inode_t *newinode = NULL;
+ gf_boolean_t lookup_needed = _gf_false;
+ uint64_t ctx_value = LOOKUP_NOT_NEEDED;
+
+ if (!oldinode)
+ return NULL;
+
+ if (!need_lookup && oldinode->table->xl == subvol)
+ return inode_ref(oldinode);
+
+ newinode = inode_find(subvol->itable, oldinode->gfid);
+ if (!need_lookup && newinode) {
+ lookup_needed = inode_needs_lookup(newinode, THIS);
+ if (!lookup_needed)
+ return newinode;
+ }
+
+ gf_uuid_copy(loc.gfid, oldinode->gfid);
+ if (!newinode)
+ loc.inode = inode_new(subvol->itable);
+ else
+ loc.inode = newinode;
+
+ if (!loc.inode)
+ return NULL;
+
+ ret = syncop_lookup(subvol, &loc, &iatt, 0, 0, 0);
+ DECODE_SYNCOP_ERR(ret);
+
+ if (ret) {
+ gf_smsg(subvol->name, GF_LOG_WARNING, errno,
+ API_MSG_INODE_REFRESH_FAILED, "gfid=%s",
+ uuid_utoa(oldinode->gfid), "err=%s", strerror(errno), NULL);
+ loc_wipe(&loc);
+ return NULL;
+ }
+
+ newinode = inode_link(loc.inode, 0, 0, &iatt);
+ if (newinode) {
+ if (newinode == loc.inode)
+ inode_ctx_set(newinode, THIS, &ctx_value);
+ inode_lookup(newinode);
+ } else {
+ gf_smsg(subvol->name, GF_LOG_WARNING, errno, API_MSG_INODE_LINK_FAILED,
+ "gfid=%s", uuid_utoa((unsigned char *)&iatt.ia_gfid), NULL);
+ }
+
+ loc_wipe(&loc);
+
+ return newinode;
+}
+
+inode_t *
+__glfs_refresh_inode(struct glfs *fs, xlator_t *subvol, inode_t *inode,
+ gf_boolean_t need_lookup)
+{
+ inode_t *newinode = NULL;
+
+ fs->migration_in_progress = 1;
+ pthread_mutex_unlock(&fs->mutex);
+ {
+ newinode = glfs_refresh_inode_safe(subvol, inode, need_lookup);
+ }
+ pthread_mutex_lock(&fs->mutex);
+ fs->migration_in_progress = 0;
+ pthread_cond_broadcast(&fs->cond);
+
+ /* wake up other waiting tasks */
+ __GLFS_SYNCTASK_WAKE(fs);
+
+ return newinode;
+}
+
+GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_loc_touchup, 3.4.0)
+int
+priv_glfs_loc_touchup(loc_t *loc)
+{
+ int ret = 0;
+
+ ret = loc_touchup(loc, loc->name);
+ if (ret < 0) {
+ errno = -ret;
+ ret = -1;
+ }
+
+ return ret;
+}
+
+int
+glfs_resolve_symlink(struct glfs *fs, xlator_t *subvol, inode_t *inode,
+ char **lpath)
+{
+ loc_t loc = {
+ 0,
+ };
+ char *path = NULL;
+ char *rpath = NULL;
+ int ret = -1;
+
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+ ret = inode_path(inode, NULL, &rpath);
+ if (ret < 0)
+ goto out;
+ loc.path = rpath;
+
+ ret = syncop_readlink(subvol, &loc, &path, 4096, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ if (ret < 0)
+ goto out;
+
+ if (lpath)
+ *lpath = path;
+out:
+ loc_wipe(&loc);
+ return ret;
+}
+
+int
+glfs_resolve_base(struct glfs *fs, xlator_t *subvol, inode_t *inode,
+ struct iatt *iatt)
+{
+ loc_t loc = {
+ 0,
+ };
+ int ret = -1;
+ char *path = NULL;
+
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+
+ ret = inode_path(loc.inode, NULL, &path);
+ loc.path = path;
+ if (ret < 0)
+ goto out;
+
+ ret = syncop_lookup(subvol, &loc, iatt, NULL, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+out:
+ loc_wipe(&loc);
+
+ return ret;
+}
+/*
+ * This function can be used to call named lookup on root.
+ * If you use glfs_resolve_base, that will be a nameless lookup.
+ */
+static int
+glfs_resolve_root(struct glfs *fs, xlator_t *subvol, inode_t *inode,
+ struct iatt *iatt)
+{
+ loc_t loc = {
+ 0,
+ };
+ int ret = -1;
+ char *path = NULL;
+
+ loc.inode = inode_ref(inode);
+
+ ret = inode_path(loc.inode, ".", &path);
+ loc.path = path;
+ loc.name = ".";
+ /* Having a value in loc.name will help to bypass md-cache check for
+ * nameless lookup.
+ * TODO: Re-visit on nameless lookup and md-cache.
+ * Github issue : https://github.com/gluster/glusterfs/issues/232
+ */
+ loc.parent = inode_ref(inode);
+ if (ret < 0)
+ goto out;
+
+ ret = syncop_lookup(subvol, &loc, iatt, NULL, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+out:
+ loc_wipe(&loc);
+
+ return ret;
+}
+
+inode_t *
+glfs_resolve_component(struct glfs *fs, xlator_t *subvol, inode_t *parent,
+ const char *component, struct iatt *iatt,
+ int force_lookup)
+{
+ loc_t loc = {
+ 0,
+ };
+ inode_t *inode = NULL;
+ inode_t *temp_parent = NULL;
+ int reval = 0;
+ int ret = -1;
+ int glret = -1;
+ struct iatt ciatt = {
+ 0,
+ };
+ uuid_t gfid;
+ dict_t *xattr_req = NULL;
+ uint64_t ctx_value = LOOKUP_NOT_NEEDED;
+
+ loc.parent = inode_ref(parent);
+ gf_uuid_copy(loc.pargfid, parent->gfid);
+
+ if (__is_root_gfid(parent->gfid) &&
+ ((strcmp(component, ".") == 0) || (strcmp(component, "..") == 0) ||
+ (strcmp(component, "") == 0))) {
+ if (!force_lookup) {
+ inode = inode_ref(parent);
+ } else {
+ ret = glfs_resolve_root(fs, subvol, parent, &ciatt);
+ if (!ret)
+ inode = inode_ref(parent);
+ }
+ goto found;
+ }
+ /* *
+ * if the component name is either "." or "..", it will try to
+ * resolve that if inode has a proper parent (named lookup).
+ *
+ * Below condition works like this
+ *
+ * Example 1 :
+ * Path /out_dir/dir/in_dir/.
+ * In put values :
+ * parent = in_dir
+ * component : "."
+ *
+ * Out put values:
+ * parent : dir
+ * component : "in_dir"
+ *
+ * Example 2 :
+ * Path /out_dir/dir/in_dir/..
+ * In put values :
+ * parent = in_dir
+ * component : ".."
+ *
+ * Out put values:
+ * parent : output_dir
+ * component : "dir"
+ *
+ * In case of nameless lookup, both "." and ".." retained
+ */
+
+ if (strcmp(component, ".") == 0) {
+ loc.inode = inode_ref(parent);
+ temp_parent = inode_parent(loc.inode, 0, 0);
+ if (temp_parent) {
+ inode_unref(loc.parent);
+ loc.parent = temp_parent;
+ gf_uuid_copy(loc.pargfid, temp_parent->gfid);
+ inode_find_directory_name(loc.inode, &loc.name);
+ }
+
+ } else if (strcmp(component, "..") == 0) {
+ loc.inode = inode_parent(parent, 0, 0);
+ if (loc.inode) {
+ temp_parent = inode_parent(loc.inode, 0, 0);
+ if (temp_parent) {
+ inode_unref(loc.parent);
+ loc.parent = temp_parent;
+ gf_uuid_copy(loc.pargfid, temp_parent->gfid);
+ inode_find_directory_name(loc.inode, &loc.name);
+ } else if (__is_root_gfid(loc.inode->gfid)) {
+ inode_unref(loc.parent);
+ loc.parent = inode_ref(loc.inode);
+ gf_uuid_copy(loc.pargfid, loc.inode->gfid);
+ loc.name = ".";
+ } else {
+ inode_unref(loc.inode);
+ loc.inode = NULL;
+ }
+ }
+ } else
+ loc.inode = inode_grep(parent->table, parent, component);
+
+ if (!loc.name)
+ loc.name = component;
+
+ if (loc.inode) {
+ gf_uuid_copy(loc.gfid, loc.inode->gfid);
+ reval = 1;
+
+ if (!(force_lookup || inode_needs_lookup(loc.inode, THIS))) {
+ inode = inode_ref(loc.inode);
+ goto found;
+ }
+ } else {
+ gf_uuid_generate(gfid);
+ loc.inode = inode_new(parent->table);
+ if (!loc.inode) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ xattr_req = dict_new();
+ if (!xattr_req) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ ret = dict_set_gfuuid(xattr_req, "gfid-req", gfid, true);
+ if (ret) {
+ errno = ENOMEM;
+ goto out;
+ }
+ }
+
+ glret = priv_glfs_loc_touchup(&loc);
+ if (glret < 0) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_lookup(subvol, &loc, &ciatt, NULL, xattr_req, NULL);
+ if (ret && reval) {
+ /*
+ * A stale mapping might exist for a dentry/inode that has been
+ * removed from another client.
+ */
+ if (-ret == ENOENT) {
+ inode_unlink(loc.inode, loc.parent, loc.name);
+ if (!inode_has_dentry(loc.inode))
+ inode_forget(loc.inode, 0);
+ }
+
+ inode_unref(loc.inode);
+ gf_uuid_clear(loc.gfid);
+ loc.inode = inode_new(parent->table);
+ if (!loc.inode) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ xattr_req = dict_new();
+ if (!xattr_req) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ gf_uuid_generate(gfid);
+
+ ret = dict_set_gfuuid(xattr_req, "gfid-req", gfid, true);
+ if (ret) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ ret = syncop_lookup(subvol, &loc, &ciatt, NULL, xattr_req, NULL);
+ }
+ DECODE_SYNCOP_ERR(ret);
+ if (ret)
+ goto out;
+
+ inode = inode_link(loc.inode, loc.parent, component, &ciatt);
+
+ if (!inode) {
+ gf_smsg(subvol->name, GF_LOG_WARNING, errno, API_MSG_INODE_LINK_FAILED,
+ "gfid=%s", uuid_utoa((unsigned char *)&ciatt.ia_gfid), NULL);
+ goto out;
+ } else if (inode == loc.inode)
+ inode_ctx_set(inode, THIS, &ctx_value);
+found:
+ if (inode) {
+ ciatt.ia_type = inode->ia_type;
+ inode_lookup(inode);
+ }
+ if (iatt)
+ *iatt = ciatt;
+out:
+ if (xattr_req)
+ dict_unref(xattr_req);
+ loc_wipe(&loc);
+
+ return inode;
+}
+
+GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_resolve_at, 3.4.0)
+int
+priv_glfs_resolve_at(struct glfs *fs, xlator_t *subvol, inode_t *at,
+ const char *origpath, loc_t *loc, struct iatt *iatt,
+ int follow, int reval)
+{
+ inode_t *inode = NULL;
+ inode_t *parent = NULL;
+ char *saveptr = NULL;
+ char *path = NULL;
+ char *component = NULL;
+ char *next_component = NULL;
+ int ret = -1;
+ struct iatt ciatt = {
+ 0,
+ };
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ if (origpath[0] == '\0') {
+ errno = EINVAL;
+ goto invalid_fs;
+ }
+
+ parent = NULL;
+ if (at && origpath[0] != '/') {
+ /* A relative resolution of a path which starts with '/'
+ is equal to an absolute path resolution.
+ */
+ inode = inode_ref(at);
+ } else {
+ inode = inode_ref(subvol->itable->root);
+
+ if (strcmp(origpath, "/") == 0)
+ glfs_resolve_root(fs, subvol, inode, &ciatt);
+ }
+
+ path = gf_strdup(origpath);
+ if (!path)
+ goto invalid_fs;
+
+ for (component = strtok_r(path, "/", &saveptr); component;
+ component = next_component) {
+ next_component = strtok_r(NULL, "/", &saveptr);
+
+ if (parent)
+ inode_unref(parent);
+ parent = inode;
+ inode = glfs_resolve_component(fs, subvol, parent, component, &ciatt,
+ /* force hard lookup on the last
+ component, as the caller
+ wants proper iatt filled
+ */
+ (reval || (!next_component && iatt)));
+ if (!inode) {
+ ret = -1;
+ break;
+ }
+
+ if (IA_ISLNK(ciatt.ia_type) && (next_component || follow)) {
+ /* If the component is not the last piece,
+ then following it is necessary even if
+ not requested by the caller
+ */
+ char *lpath = NULL;
+ loc_t sym_loc = {
+ 0,
+ };
+
+ if (follow > GLFS_SYMLINK_MAX_FOLLOW) {
+ errno = ELOOP;
+ ret = -1;
+ if (inode) {
+ inode_unref(inode);
+ inode = NULL;
+ }
+ break;
+ }
+
+ ret = glfs_resolve_symlink(fs, subvol, inode, &lpath);
+ inode_unref(inode);
+ inode = NULL;
+ if (ret < 0)
+ break;
+
+ ret = priv_glfs_resolve_at(fs, subvol, parent, lpath, &sym_loc,
+ /* followed iatt becomes the
+ component iatt
+ */
+ &ciatt,
+ /* always recurisvely follow while
+ following symlink
+ */
+ follow + 1, reval);
+ if (ret == 0)
+ inode = inode_ref(sym_loc.inode);
+ loc_wipe(&sym_loc);
+ GF_FREE(lpath);
+ }
+
+ if (!next_component)
+ break;
+
+ if (!IA_ISDIR(ciatt.ia_type)) {
+ /* next_component exists and this component is
+ not a directory
+ */
+ inode_unref(inode);
+ inode = NULL;
+ ret = -1;
+ errno = ENOTDIR;
+ break;
+ }
+ }
+
+ if (parent && next_component)
+ /* resolution failed mid-way */
+ goto out;
+
+ /* At this point, all components up to the last parent directory
+ have been resolved successfully (@parent). Resolution of basename
+ might have failed (@inode) if at all.
+ */
+
+ loc->parent = parent;
+ if (parent) {
+ gf_uuid_copy(loc->pargfid, parent->gfid);
+ loc->name = component;
+ }
+
+ loc->inode = inode;
+ if (inode) {
+ gf_uuid_copy(loc->gfid, inode->gfid);
+ if (iatt)
+ *iatt = ciatt;
+ ret = 0;
+ }
+
+ if (priv_glfs_loc_touchup(loc) < 0) {
+ ret = -1;
+ }
+out:
+ GF_FREE(path);
+ __GLFS_EXIT_FS;
+
+ /* do NOT loc_wipe here as only last component might be missing */
+invalid_fs:
+ return ret;
+}
+
+int
+glfs_resolve_path(struct glfs *fs, xlator_t *subvol, const char *origpath,
+ loc_t *loc, struct iatt *iatt, int follow, int reval)
+{
+ int ret = -1;
+ inode_t *cwd = NULL;
+
+ if (origpath[0] == '/')
+ return priv_glfs_resolve_at(fs, subvol, NULL, origpath, loc, iatt,
+ follow, reval);
+
+ cwd = glfs_cwd_get(fs);
+ if (NULL == cwd) {
+ gf_smsg(subvol->name, GF_LOG_WARNING, EIO, API_MSG_GET_CWD_FAILED,
+ NULL);
+ errno = EIO;
+ goto out;
+ }
+
+ ret = priv_glfs_resolve_at(fs, subvol, cwd, origpath, loc, iatt, follow,
+ reval);
+ if (cwd)
+ inode_unref(cwd);
+
+out:
+ return ret;
+}
+
+GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_resolve, 3.7.0)
+int
+priv_glfs_resolve(struct glfs *fs, xlator_t *subvol, const char *origpath,
+ loc_t *loc, struct iatt *iatt, int reval)
+{
+ int ret = -1;
+
+ ret = glfs_resolve_path(fs, subvol, origpath, loc, iatt, 1, reval);
+
+ return ret;
+}
+
+int
+glfs_lresolve(struct glfs *fs, xlator_t *subvol, const char *origpath,
+ loc_t *loc, struct iatt *iatt, int reval)
+{
+ int ret = -1;
+
+ ret = glfs_resolve_path(fs, subvol, origpath, loc, iatt, 0, reval);
+
+ return ret;
+}
+
+int
+glfs_migrate_fd_locks_safe(struct glfs *fs, xlator_t *oldsubvol, fd_t *oldfd,
+ xlator_t *newsubvol, fd_t *newfd)
+{
+ dict_t *lockinfo = NULL;
+ int ret = 0;
+ char uuid1[64];
+
+ if (!oldfd->lk_ctx || fd_lk_ctx_empty(oldfd->lk_ctx))
+ return 0;
+
+ newfd->lk_ctx = fd_lk_ctx_ref(oldfd->lk_ctx);
+
+ ret = syncop_fgetxattr(oldsubvol, oldfd, &lockinfo, GF_XATTR_LOCKINFO_KEY,
+ NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+ if (ret < 0) {
+ gf_smsg(fs->volname, GF_LOG_WARNING, errno, API_MSG_FGETXATTR_FAILED,
+ "gfid=%s", uuid_utoa_r(oldfd->inode->gfid, uuid1), "err=%s",
+ strerror(errno), "subvol=%s", graphid_str(oldsubvol), "id=%d",
+ oldsubvol->graph->id, NULL);
+ goto out;
+ }
+
+ if (!dict_get(lockinfo, GF_XATTR_LOCKINFO_KEY)) {
+ gf_smsg(fs->volname, GF_LOG_WARNING, 0, API_MSG_LOCKINFO_KEY_MISSING,
+ "gfid=%s", uuid_utoa_r(oldfd->inode->gfid, uuid1), "subvol=%s",
+ graphid_str(oldsubvol), "id=%d", oldsubvol->graph->id, NULL);
+ goto out;
+ }
+
+ ret = syncop_fsetxattr(newsubvol, newfd, lockinfo, 0, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+ if (ret < 0) {
+ gf_smsg(fs->volname, GF_LOG_WARNING, 0, API_MSG_FSETXATTR_FAILED,
+ "gfid=%s", uuid_utoa_r(newfd->inode->gfid, uuid1), "err=%s",
+ strerror(errno), "subvol=%s", graphid_str(newsubvol), "id=%d",
+ newsubvol->graph->id, NULL);
+ goto out;
+ }
+out:
+ if (lockinfo)
+ dict_unref(lockinfo);
+ return ret;
+}
+
+fd_t *
+glfs_migrate_fd_safe(struct glfs *fs, xlator_t *newsubvol, fd_t *oldfd)
+{
+ fd_t *newfd = NULL;
+ inode_t *oldinode = NULL;
+ inode_t *newinode = NULL;
+ xlator_t *oldsubvol = NULL;
+ int ret = -1;
+ loc_t loc = {
+ 0,
+ };
+ char uuid1[64];
+ dict_t *xdata = NULL;
+
+ oldinode = oldfd->inode;
+ oldsubvol = oldinode->table->xl;
+
+ if (oldsubvol == newsubvol)
+ return fd_ref(oldfd);
+
+ if (!oldsubvol->switched) {
+ xdata = dict_new();
+ if (!xdata || dict_set_int8(xdata, "last-fsync", 1)) {
+ gf_smsg(fs->volname, GF_LOG_WARNING, ENOMEM, API_MSG_FSYNC_FAILED,
+ "err=%s", "last-fsync set failed", "gfid=%s",
+ uuid_utoa_r(oldfd->inode->gfid, uuid1), "subvol=%s",
+ graphid_str(oldsubvol), "id=%d", oldsubvol->graph->id,
+ NULL);
+ }
+
+ ret = syncop_fsync(oldsubvol, oldfd, 0, NULL, NULL, xdata, NULL);
+ DECODE_SYNCOP_ERR(ret);
+ if (ret) {
+ gf_smsg(fs->volname, GF_LOG_WARNING, errno, API_MSG_FSYNC_FAILED,
+ "err=%s", strerror(errno), "gfid=%s",
+ uuid_utoa_r(oldfd->inode->gfid, uuid1), "subvol=%s",
+ graphid_str(oldsubvol), "id=%d", oldsubvol->graph->id,
+ NULL);
+ }
+ }
+
+ newinode = glfs_refresh_inode_safe(newsubvol, oldinode, _gf_false);
+ if (!newinode) {
+ gf_smsg(fs->volname, GF_LOG_WARNING, errno,
+ API_MSG_INODE_REFRESH_FAILED, "gfid=%s",
+ uuid_utoa_r(oldinode->gfid, uuid1), "err=%s", strerror(errno),
+ "subvol=%s", graphid_str(newsubvol), "id=%d",
+ newsubvol->graph->id, NULL);
+ goto out;
+ }
+
+ newfd = fd_create(newinode, getpid());
+ if (!newfd) {
+ gf_smsg(fs->volname, GF_LOG_WARNING, errno,
+ API_MSG_FDCREATE_FAILED_ON_GRAPH, "gfid=%s",
+ uuid_utoa_r(newinode->gfid, uuid1), "err=%s", strerror(errno),
+ "subvol=%s", graphid_str(newsubvol), "id=%d",
+ newsubvol->graph->id, NULL);
+ goto out;
+ }
+
+ loc.inode = inode_ref(newinode);
+
+ ret = inode_path(oldfd->inode, NULL, (char **)&loc.path);
+ if (ret < 0) {
+ gf_smsg(fs->volname, GF_LOG_INFO, 0, API_MSG_INODE_PATH_FAILED, NULL);
+ goto out;
+ }
+
+ gf_uuid_copy(loc.gfid, oldinode->gfid);
+
+ if (IA_ISDIR(oldinode->ia_type))
+ ret = syncop_opendir(newsubvol, &loc, newfd, NULL, NULL);
+ else
+ ret = syncop_open(newsubvol, &loc,
+ oldfd->flags & ~(O_TRUNC | O_EXCL | O_CREAT), newfd,
+ NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+ loc_wipe(&loc);
+
+ if (ret) {
+ gf_smsg(fs->volname, GF_LOG_WARNING, errno, API_MSG_SYNCOP_OPEN_FAILED,
+ "type=%s", IA_ISDIR(oldinode->ia_type) ? "dir" : "", "gfid=%s",
+ uuid_utoa_r(newinode->gfid, uuid1), "err=%s", strerror(errno),
+ "subvol=%s", graphid_str(newsubvol), "id=%d",
+ newsubvol->graph->id, NULL);
+ goto out;
+ }
+
+ ret = glfs_migrate_fd_locks_safe(fs, oldsubvol, oldfd, newsubvol, newfd);
+
+ if (ret) {
+ gf_smsg(fs->volname, GF_LOG_WARNING, errno, API_MSG_LOCK_MIGRATE_FAILED,
+ "gfid=%s", uuid_utoa_r(newinode->gfid, uuid1), "err=%s",
+ strerror(errno), "subvol=%s", graphid_str(newsubvol), "id=%d",
+ newsubvol->graph->id, NULL);
+ goto out;
+ }
+
+ newfd->flags = oldfd->flags;
+ fd_bind(newfd);
+out:
+ if (newinode)
+ inode_unref(newinode);
+
+ if (ret) {
+ fd_unref(newfd);
+ newfd = NULL;
+ }
+
+ if (xdata)
+ dict_unref(xdata);
+
+ return newfd;
+}
+
+fd_t *
+__glfs_migrate_fd(struct glfs *fs, xlator_t *newsubvol, struct glfs_fd *glfd)
+{
+ fd_t *oldfd = NULL;
+ fd_t *newfd = NULL;
+
+ oldfd = glfd->fd;
+
+ fs->migration_in_progress = 1;
+ pthread_mutex_unlock(&fs->mutex);
+ {
+ newfd = glfs_migrate_fd_safe(fs, newsubvol, oldfd);
+ }
+ pthread_mutex_lock(&fs->mutex);
+ fs->migration_in_progress = 0;
+ pthread_cond_broadcast(&fs->cond);
+
+ /* wake up other waiting tasks */
+ __GLFS_SYNCTASK_WAKE(fs);
+
+ return newfd;
+}
+
+fd_t *
+__glfs_resolve_fd(struct glfs *fs, xlator_t *subvol, struct glfs_fd *glfd)
+{
+ fd_t *fd = NULL;
+
+ if (glfd->fd->inode->table->xl == subvol)
+ return fd_ref(glfd->fd);
+
+ fd = __glfs_migrate_fd(fs, subvol, glfd);
+ if (!fd)
+ return NULL;
+
+ if (subvol == fs->active_subvol) {
+ fd_unref(glfd->fd);
+ glfd->fd = fd_ref(fd);
+ }
+
+ return fd;
+}
+
+fd_t *
+glfs_resolve_fd(struct glfs *fs, xlator_t *subvol, struct glfs_fd *glfd)
+{
+ fd_t *fd = NULL;
+
+ glfs_lock(fs, _gf_true);
+ {
+ fd = __glfs_resolve_fd(fs, subvol, glfd);
+ }
+ glfs_unlock(fs);
+
+ return fd;
+}
+
+void
+__glfs_migrate_openfds(struct glfs *fs, xlator_t *subvol)
+{
+ struct glfs_fd *glfd = NULL;
+ fd_t *fd = NULL;
+
+ list_for_each_entry(glfd, &fs->openfds, openfds)
+ {
+ if (gf_uuid_is_null(glfd->fd->inode->gfid)) {
+ gf_smsg(fs->volname, GF_LOG_INFO, 0, API_MSG_OPENFD_SKIPPED,
+ "glfd=%p", glfd, "glfd->fd=%p", glfd->fd, "subvol=%s",
+ graphid_str(subvol), "id=%d", subvol->graph->id, NULL);
+ /* create in progress, defer */
+ continue;
+ }
+
+ fd = __glfs_migrate_fd(fs, subvol, glfd);
+ if (fd) {
+ fd_unref(glfd->fd);
+ glfd->fd = fd;
+ }
+ }
+}
+
+/* Note that though it appears that this function executes under fs->mutex,
+ * it is not fully executed under fs->mutex. i.e. there are functions like
+ * __glfs_first_lookup, __glfs_refresh_inode, __glfs_migrate_openfds which
+ * unlocks fs->mutex before sending any network fop, and reacquire fs->mutex
+ * once the fop is complete. Hence the variable read from fs at the start of the
+ * function need not have the same value by the end of the function.
+ */
+xlator_t *
+__glfs_active_subvol(struct glfs *fs)
+{
+ xlator_t *new_subvol = NULL;
+ int ret = -1;
+ inode_t *new_cwd = NULL;
+
+ if (!fs->next_subvol)
+ return fs->active_subvol;
+
+ new_subvol = fs->mip_subvol = fs->next_subvol;
+ fs->next_subvol = NULL;
+
+ ret = __glfs_first_lookup(fs, new_subvol);
+ if (ret) {
+ gf_smsg(fs->volname, GF_LOG_INFO, errno,
+ API_MSG_FIRST_LOOKUP_GRAPH_FAILED, "subvol=%s",
+ graphid_str(new_subvol), "id=%d", new_subvol->graph->id,
+ "err=%s", strerror(errno), NULL);
+ return NULL;
+ }
+
+ if (fs->cwd) {
+ new_cwd = __glfs_refresh_inode(fs, new_subvol, fs->cwd, _gf_false);
+
+ if (!new_cwd) {
+ char buf1[64];
+ gf_smsg(fs->volname, GF_LOG_INFO, errno,
+ API_MSG_CWD_GRAPH_REF_FAILED, "buf=%s",
+ uuid_utoa_r(fs->cwd->gfid, buf1), "subvol=%s",
+ graphid_str(new_subvol), "id=%d", new_subvol->graph->id,
+ "err=%s", strerror(errno), NULL);
+ return NULL;
+ }
+ }
+
+ __glfs_migrate_openfds(fs, new_subvol);
+ /* TODO: Migrate the fds and inodes which have leases to the new graph
+ * (issue #350)*/
+
+ /* switching @active_subvol and @cwd
+ should be atomic
+ */
+ fs->old_subvol = fs->active_subvol;
+ fs->active_subvol = fs->mip_subvol;
+ fs->mip_subvol = NULL;
+
+ if (new_cwd) {
+ __glfs_cwd_set(fs, new_cwd);
+ inode_unref(new_cwd);
+ }
+
+ gf_smsg(fs->volname, GF_LOG_INFO, 0, API_MSG_SWITCHED_GRAPH, "subvol=%s",
+ graphid_str(new_subvol), "id=%d", new_subvol->graph->id, NULL);
+
+ return new_subvol;
+}
+
+GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_subvol_done, 3.4.0)
+void
+priv_glfs_subvol_done(struct glfs *fs, xlator_t *subvol)
+{
+ int ref = 0;
+ xlator_t *active_subvol = NULL;
+
+ if (!subvol)
+ return;
+
+ /* For decrementing subvol->wind ref count we need not check/wait for
+ * migration-in-progress flag.
+ * Also glfs_subvol_done is called in call-back path therefore waiting
+ * for migration-in-progress flag can lead to dead-lock.
+ */
+ glfs_lock(fs, _gf_false);
+ {
+ ref = (--subvol->winds);
+ active_subvol = fs->active_subvol;
+ }
+ glfs_unlock(fs);
+
+ if (ref == 0) {
+ assert(subvol != active_subvol);
+ xlator_notify(subvol, GF_EVENT_PARENT_DOWN, subvol, NULL);
+ }
+}
+
+GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_active_subvol, 3.4.0)
+xlator_t *
+priv_glfs_active_subvol(struct glfs *fs)
+{
+ xlator_t *subvol = NULL;
+ xlator_t *old_subvol = NULL;
+
+ glfs_lock(fs, _gf_true);
+ {
+ subvol = __glfs_active_subvol(fs);
+
+ if (subvol)
+ subvol->winds++;
+
+ if (fs->old_subvol) {
+ old_subvol = fs->old_subvol;
+ fs->old_subvol = NULL;
+ old_subvol->switched = 1;
+ }
+ }
+ glfs_unlock(fs);
+
+ if (old_subvol)
+ priv_glfs_subvol_done(fs, old_subvol);
+
+ return subvol;
+}
+
+int
+__glfs_cwd_set(struct glfs *fs, inode_t *inode)
+{
+ if (inode->table->xl != fs->active_subvol) {
+ inode = __glfs_refresh_inode(fs, fs->active_subvol, inode, _gf_false);
+ if (!inode)
+ return -1;
+ } else {
+ inode_ref(inode);
+ }
+
+ if (fs->cwd)
+ inode_unref(fs->cwd);
+
+ fs->cwd = inode;
+
+ return 0;
+}
+
+int
+glfs_cwd_set(struct glfs *fs, inode_t *inode)
+{
+ int ret = 0;
+
+ glfs_lock(fs, _gf_true);
+ {
+ ret = __glfs_cwd_set(fs, inode);
+ }
+ glfs_unlock(fs);
+
+ return ret;
+}
+
+inode_t *
+__glfs_cwd_get(struct glfs *fs)
+{
+ inode_t *cwd = NULL;
+
+ if (!fs->cwd)
+ return NULL;
+
+ if (fs->cwd->table->xl == fs->active_subvol) {
+ cwd = inode_ref(fs->cwd);
+ return cwd;
+ }
+
+ cwd = __glfs_refresh_inode(fs, fs->active_subvol, fs->cwd, _gf_false);
+
+ return cwd;
+}
+
+inode_t *
+glfs_cwd_get(struct glfs *fs)
+{
+ inode_t *cwd = NULL;
+
+ glfs_lock(fs, _gf_true);
+ {
+ cwd = __glfs_cwd_get(fs);
+ }
+ glfs_unlock(fs);
+
+ return cwd;
+}
+
+inode_t *
+__glfs_resolve_inode(struct glfs *fs, xlator_t *subvol,
+ struct glfs_object *object)
+{
+ inode_t *inode = NULL;
+ gf_boolean_t lookup_needed = _gf_false;
+
+ lookup_needed = inode_needs_lookup(object->inode, THIS);
+
+ if (!lookup_needed && object->inode->table->xl == subvol)
+ return inode_ref(object->inode);
+
+ inode = __glfs_refresh_inode(fs, fs->active_subvol, object->inode,
+ lookup_needed);
+ if (!inode)
+ return NULL;
+
+ if (subvol == fs->active_subvol) {
+ inode_unref(object->inode);
+ object->inode = inode_ref(inode);
+ }
+
+ return inode;
+}
+
+inode_t *
+glfs_resolve_inode(struct glfs *fs, xlator_t *subvol,
+ struct glfs_object *object)
+{
+ inode_t *inode = NULL;
+
+ glfs_lock(fs, _gf_true);
+ {
+ inode = __glfs_resolve_inode(fs, subvol, object);
+ }
+ glfs_unlock(fs);
+
+ return inode;
+}
+
+int
+glfs_create_object(loc_t *loc, struct glfs_object **retobject)
+{
+ struct glfs_object *object = NULL;
+
+ object = GF_CALLOC(1, sizeof(struct glfs_object), glfs_mt_glfs_object_t);
+ if (object == NULL) {
+ errno = ENOMEM;
+ return -1;
+ }
+
+ object->inode = loc->inode;
+ gf_uuid_copy(object->gfid, object->inode->gfid);
+
+ /* we hold the reference */
+ loc->inode = NULL;
+
+ *retobject = object;
+
+ return 0;
+}
+
+struct glfs_object *
+glfs_h_resolve_symlink(struct glfs *fs, struct glfs_object *object)
+{
+ xlator_t *subvol = NULL;
+ loc_t sym_loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ char *lpath = NULL;
+ int ret = 0;
+ struct glfs_object *target_object = NULL;
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ ret = glfs_resolve_symlink(fs, subvol, object->inode, &lpath);
+ if (ret < 0)
+ goto out;
+
+ ret = glfs_resolve_at(fs, subvol, NULL, lpath, &sym_loc, &iatt,
+ /* always recurisvely follow while
+ following symlink
+ */
+ 1, 0);
+ if (ret == 0)
+ ret = glfs_create_object(&sym_loc, &target_object);
+
+out:
+ loc_wipe(&sym_loc);
+ GF_FREE(lpath);
+ return target_object;
+}
diff --git a/api/src/glfs.c b/api/src/glfs.c
new file mode 100644
index 00000000000..b4bf1423f6d
--- /dev/null
+++ b/api/src/glfs.c
@@ -0,0 +1,1806 @@
+/*
+ Copyright (c) 2012-2018 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/*
+ TODO:
+ - set proper pid/lk_owner to call frames (currently buried in syncop)
+ - fix logging.c/h to store logfp and loglevel in glusterfs_ctx_t and
+ reach it via THIS.
+ - update syncop functions to accept/return xdata. ???
+ - protocol/client to reconnect immediately after portmap disconnect.
+ - handle SEEK_END failure in _lseek()
+ - handle umask (per filesystem?)
+ - make itables LRU based
+ - 0-copy for readv/writev
+ - reconcile the open/creat mess
+*/
+
+#include <unistd.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <limits.h>
+#ifdef GF_LINUX_HOST_OS
+#include <sys/prctl.h>
+#endif
+
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/stack.h>
+#include <glusterfs/gf-event.h>
+#include "glfs-mem-types.h"
+#include <glusterfs/common-utils.h>
+#include <glusterfs/syncop.h>
+#include <glusterfs/call-stub.h>
+#include <glusterfs/hashfn.h>
+#include "rpc-clnt.h"
+#include <glusterfs/statedump.h>
+#include <glusterfs/syscall.h>
+
+#include "gfapi-messages.h"
+#include "glfs.h"
+#include "glfs-internal.h"
+
+static gf_boolean_t
+vol_assigned(cmd_args_t *args)
+{
+ return args->volfile || args->volfile_server;
+}
+
+static int
+glusterfs_ctx_defaults_init(glusterfs_ctx_t *ctx)
+{
+ call_pool_t *pool = NULL;
+ int ret = -1;
+
+ if (!ctx) {
+ goto err;
+ }
+
+ ret = xlator_mem_acct_init(THIS, glfs_mt_end + 1);
+ if (ret != 0) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, ENOMEM, API_MSG_MEM_ACCT_INIT_FAILED,
+ NULL);
+ return ret;
+ }
+
+ /* reset ret to -1 so that we don't need to explicitly
+ * set it in all error paths before "goto err"
+ */
+
+ ret = -1;
+
+ ctx->process_uuid = generate_glusterfs_ctx_id();
+ if (!ctx->process_uuid) {
+ goto err;
+ }
+
+ ctx->page_size = 128 * GF_UNIT_KB;
+
+ ctx->iobuf_pool = iobuf_pool_new();
+ if (!ctx->iobuf_pool) {
+ goto err;
+ }
+
+ ctx->event_pool = gf_event_pool_new(DEFAULT_EVENT_POOL_SIZE,
+ STARTING_EVENT_THREADS);
+ if (!ctx->event_pool) {
+ goto err;
+ }
+
+ ctx->env = syncenv_new(0, 0, 0);
+ if (!ctx->env) {
+ goto err;
+ }
+
+ pool = GF_CALLOC(1, sizeof(call_pool_t), glfs_mt_call_pool_t);
+ if (!pool) {
+ goto err;
+ }
+
+ /* frame_mem_pool size 112 * 4k */
+ pool->frame_mem_pool = mem_pool_new(call_frame_t, 4096);
+ if (!pool->frame_mem_pool) {
+ goto err;
+ }
+ /* stack_mem_pool size 256 * 1024 */
+ pool->stack_mem_pool = mem_pool_new(call_stack_t, 1024);
+ if (!pool->stack_mem_pool) {
+ goto err;
+ }
+
+ ctx->stub_mem_pool = mem_pool_new(call_stub_t, 1024);
+ if (!ctx->stub_mem_pool) {
+ goto err;
+ }
+
+ ctx->dict_pool = mem_pool_new(dict_t, GF_MEMPOOL_COUNT_OF_DICT_T);
+ if (!ctx->dict_pool)
+ goto err;
+
+ ctx->dict_pair_pool = mem_pool_new(data_pair_t,
+ GF_MEMPOOL_COUNT_OF_DATA_PAIR_T);
+ if (!ctx->dict_pair_pool)
+ goto err;
+
+ ctx->dict_data_pool = mem_pool_new(data_t, GF_MEMPOOL_COUNT_OF_DATA_T);
+ if (!ctx->dict_data_pool)
+ goto err;
+
+ ctx->logbuf_pool = mem_pool_new(log_buf_t, GF_MEMPOOL_COUNT_OF_LRU_BUF_T);
+ if (!ctx->logbuf_pool)
+ goto err;
+
+ INIT_LIST_HEAD(&pool->all_frames);
+ INIT_LIST_HEAD(&ctx->cmd_args.xlator_options);
+ INIT_LIST_HEAD(&ctx->cmd_args.volfile_servers);
+
+ LOCK_INIT(&pool->lock);
+ ctx->pool = pool;
+
+ ret = 0;
+err:
+ if (ret && pool) {
+ if (pool->frame_mem_pool)
+ mem_pool_destroy(pool->frame_mem_pool);
+ if (pool->stack_mem_pool)
+ mem_pool_destroy(pool->stack_mem_pool);
+ GF_FREE(pool);
+ }
+
+ if (ret && ctx) {
+ if (ctx->stub_mem_pool)
+ mem_pool_destroy(ctx->stub_mem_pool);
+ if (ctx->dict_pool)
+ mem_pool_destroy(ctx->dict_pool);
+ if (ctx->dict_data_pool)
+ mem_pool_destroy(ctx->dict_data_pool);
+ if (ctx->dict_pair_pool)
+ mem_pool_destroy(ctx->dict_pair_pool);
+ if (ctx->logbuf_pool)
+ mem_pool_destroy(ctx->logbuf_pool);
+ }
+
+ return ret;
+}
+
+static int
+create_master(struct glfs *fs)
+{
+ int ret = 0;
+ xlator_t *master = NULL;
+
+ master = GF_CALLOC(1, sizeof(*master), glfs_mt_xlator_t);
+ if (!master)
+ goto err;
+
+ master->name = gf_strdup("gfapi");
+ if (!master->name)
+ goto err;
+
+ if (xlator_set_type(master, "mount/api") == -1) {
+ gf_smsg("glfs", GF_LOG_ERROR, 0, API_MSG_MASTER_XLATOR_INIT_FAILED,
+ "name=%s", fs->volname, NULL);
+ goto err;
+ }
+
+ master->ctx = fs->ctx;
+ master->private = fs;
+ master->options = dict_new();
+ if (!master->options)
+ goto err;
+
+ ret = xlator_init(master);
+ if (ret) {
+ gf_smsg("glfs", GF_LOG_ERROR, 0, API_MSG_GFAPI_XLATOR_INIT_FAILED,
+ NULL);
+ goto err;
+ }
+
+ fs->ctx->master = master;
+ THIS = master;
+
+ return 0;
+
+err:
+ if (master) {
+ xlator_destroy(master);
+ }
+
+ return -1;
+}
+
+static FILE *
+get_volfp(struct glfs *fs)
+{
+ cmd_args_t *cmd_args = NULL;
+ FILE *specfp = NULL;
+
+ cmd_args = &fs->ctx->cmd_args;
+
+ if ((specfp = fopen(cmd_args->volfile, "r")) == NULL) {
+ gf_smsg("glfs", GF_LOG_ERROR, errno, API_MSG_VOLFILE_OPEN_FAILED,
+ "file=%s", cmd_args->volfile, "err=%s", strerror(errno), NULL);
+ return NULL;
+ }
+
+ gf_msg_debug("glfs", 0, "loading volume file %s", cmd_args->volfile);
+
+ return specfp;
+}
+
+int
+glfs_volumes_init(struct glfs *fs)
+{
+ FILE *fp = NULL;
+ cmd_args_t *cmd_args = NULL;
+ int ret = 0;
+
+ cmd_args = &fs->ctx->cmd_args;
+
+ if (!vol_assigned(cmd_args))
+ return -1;
+
+ if (sys_access(SECURE_ACCESS_FILE, F_OK) == 0) {
+ fs->ctx->secure_mgmt = 1;
+ fs->ctx->ssl_cert_depth = glusterfs_read_secure_access_file();
+ }
+
+ if (cmd_args->volfile_server) {
+ ret = glfs_mgmt_init(fs);
+ goto out;
+ }
+
+ fp = get_volfp(fs);
+
+ if (!fp) {
+ gf_smsg("glfs", GF_LOG_ERROR, ENOENT, API_MSG_VOL_SPEC_FILE_ERROR,
+ NULL);
+ ret = -1;
+ goto out;
+ }
+
+ ret = glfs_process_volfp(fs, fp);
+ if (ret)
+ goto out;
+
+out:
+ return ret;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_set_xlator_option, 3.4.0)
+int
+pub_glfs_set_xlator_option(struct glfs *fs, const char *xlator, const char *key,
+ const char *value)
+{
+ xlator_cmdline_option_t *option = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ option = GF_CALLOC(1, sizeof(*option), glfs_mt_xlator_cmdline_option_t);
+ if (!option)
+ goto enomem;
+
+ INIT_LIST_HEAD(&option->cmd_args);
+
+ option->volume = gf_strdup(xlator);
+ if (!option->volume)
+ goto enomem;
+ option->key = gf_strdup(key);
+ if (!option->key)
+ goto enomem;
+ option->value = gf_strdup(value);
+ if (!option->value)
+ goto enomem;
+
+ list_add(&option->cmd_args, &fs->ctx->cmd_args.xlator_options);
+
+ __GLFS_EXIT_FS;
+
+ return 0;
+enomem:
+ errno = ENOMEM;
+
+ if (!option) {
+ __GLFS_EXIT_FS;
+ return -1;
+ }
+
+ GF_FREE(option->volume);
+ GF_FREE(option->key);
+ GF_FREE(option->value);
+ GF_FREE(option);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return -1;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_unset_volfile_server, 3.5.1)
+int
+pub_glfs_unset_volfile_server(struct glfs *fs, const char *transport,
+ const char *host, const int port)
+{
+ cmd_args_t *cmd_args = NULL;
+ server_cmdline_t *server = NULL;
+ server_cmdline_t *tmp = NULL;
+ char *transport_val = NULL;
+ int port_val = 0;
+ int ret = -1;
+
+ if (!fs || !host) {
+ errno = EINVAL;
+ return ret;
+ }
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ cmd_args = &fs->ctx->cmd_args;
+
+ if (transport) {
+ transport_val = gf_strdup(transport);
+ } else {
+ transport_val = gf_strdup(GF_DEFAULT_VOLFILE_TRANSPORT);
+ }
+
+ if (!transport_val) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ if (port) {
+ port_val = port;
+ } else {
+ port_val = GF_DEFAULT_BASE_PORT;
+ }
+
+ list_for_each_entry_safe(server, tmp, &cmd_args->curr_server->list, list)
+ {
+ if (!server->volfile_server || !server->transport)
+ continue;
+ if ((!strcmp(server->volfile_server, host) &&
+ !strcmp(server->transport, transport_val) &&
+ (server->port == port_val))) {
+ list_del(&server->list);
+ ret = 0;
+ goto out;
+ }
+ }
+
+out:
+ GF_FREE(transport_val);
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_set_volfile_server, 3.4.0)
+int
+pub_glfs_set_volfile_server(struct glfs *fs, const char *transport,
+ const char *host, int port)
+{
+ cmd_args_t *cmd_args = NULL;
+ int ret = -1;
+ char *server_host = NULL;
+ char *server_transport = NULL;
+
+ if (!fs || !host) {
+ errno = EINVAL;
+ return ret;
+ }
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ cmd_args = &fs->ctx->cmd_args;
+ cmd_args->max_connect_attempts = 1;
+
+ server_host = gf_strdup(host);
+ if (!server_host) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ if (transport) {
+ /* volfile fetch support over tcp|unix only */
+ if (!strcmp(transport, "tcp") || !strcmp(transport, "unix")) {
+ server_transport = gf_strdup(transport);
+ } else if (!strcmp(transport, "rdma")) {
+ server_transport = gf_strdup(GF_DEFAULT_VOLFILE_TRANSPORT);
+ gf_smsg("glfs", GF_LOG_WARNING, EINVAL, API_MSG_TRANS_RDMA_DEP,
+ NULL);
+ } else {
+ gf_smsg("glfs", GF_LOG_TRACE, EINVAL, API_MSG_TRANS_NOT_SUPPORTED,
+ "transport=%s", transport, NULL);
+ goto out;
+ }
+ } else {
+ server_transport = gf_strdup(GF_DEFAULT_VOLFILE_TRANSPORT);
+ }
+
+ if (!server_transport) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ if (!port) {
+ port = GF_DEFAULT_BASE_PORT;
+ }
+
+ if (!strcmp(server_transport, "unix")) {
+ port = 0;
+ }
+
+ ret = gf_set_volfile_server_common(cmd_args, server_host, server_transport,
+ port);
+ if (ret) {
+ gf_log("glfs", GF_LOG_ERROR, "failed to set volfile server: %s",
+ strerror(errno));
+ }
+
+out:
+ if (server_host) {
+ GF_FREE(server_host);
+ }
+
+ if (server_transport) {
+ GF_FREE(server_transport);
+ }
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+/* *
+ * Used to free the arguments allocated by glfs_set_volfile_server()
+ */
+static void
+glfs_free_volfile_servers(cmd_args_t *cmd_args)
+{
+ server_cmdline_t *server = NULL;
+ server_cmdline_t *tmp = NULL;
+
+ GF_VALIDATE_OR_GOTO(THIS->name, cmd_args, out);
+
+ list_for_each_entry_safe(server, tmp, &cmd_args->volfile_servers, list)
+ {
+ list_del_init(&server->list);
+ GF_FREE(server->volfile_server);
+ GF_FREE(server->transport);
+ GF_FREE(server);
+ }
+ cmd_args->curr_server = NULL;
+out:
+ return;
+}
+
+static void
+glfs_free_xlator_options(cmd_args_t *cmd_args)
+{
+ xlator_cmdline_option_t *xo = NULL;
+ xlator_cmdline_option_t *tmp_xo = NULL;
+
+ if (!&(cmd_args->xlator_options))
+ return;
+
+ list_for_each_entry_safe(xo, tmp_xo, &cmd_args->xlator_options, cmd_args)
+ {
+ list_del_init(&xo->cmd_args);
+ GF_FREE(xo->volume);
+ GF_FREE(xo->key);
+ GF_FREE(xo->value);
+ GF_FREE(xo);
+ }
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_setfsuid, 3.4.2)
+int
+pub_glfs_setfsuid(uid_t fsuid)
+{
+ /* TODO:
+ * - Set the THIS and restore it appropriately
+ */
+ return syncopctx_setfsuid(&fsuid);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_setfsgid, 3.4.2)
+int
+pub_glfs_setfsgid(gid_t fsgid)
+{
+ /* TODO:
+ * - Set the THIS and restore it appropriately
+ */
+ return syncopctx_setfsgid(&fsgid);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_setfsgroups, 3.4.2)
+int
+pub_glfs_setfsgroups(size_t size, const gid_t *list)
+{
+ /* TODO:
+ * - Set the THIS and restore it appropriately
+ */
+ return syncopctx_setfsgroups(size, list);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_setfsleaseid, 4.0.0)
+int
+pub_glfs_setfsleaseid(glfs_leaseid_t leaseid)
+{
+ int ret = -1;
+ char *gleaseid = NULL;
+
+ gleaseid = gf_leaseid_get();
+ if (gleaseid) {
+ if (leaseid)
+ memcpy(gleaseid, leaseid, LEASE_ID_SIZE);
+ else /* reset leaseid */
+ memset(gleaseid, 0, LEASE_ID_SIZE);
+ ret = 0;
+ }
+
+ if (ret)
+ gf_log("glfs", GF_LOG_ERROR, "failed to set leaseid: %s",
+ strerror(errno));
+ return ret;
+}
+
+int
+get_fop_attr_glfd(dict_t **fop_attr, struct glfs_fd *glfd)
+{
+ char *leaseid = NULL;
+ int ret = 0;
+ gf_boolean_t dict_create = _gf_false;
+
+ leaseid = GF_MALLOC(LEASE_ID_SIZE, gf_common_mt_char);
+ GF_CHECK_ALLOC_AND_LOG("gfapi", leaseid, ret, "lease id alloc failed", out);
+ memcpy(leaseid, glfd->lease_id, LEASE_ID_SIZE);
+ if (*fop_attr == NULL) {
+ *fop_attr = dict_new();
+ dict_create = _gf_true;
+ }
+ GF_CHECK_ALLOC_AND_LOG("gfapi", *fop_attr, ret, "dict_new failed", out);
+ ret = dict_set_bin(*fop_attr, "lease-id", leaseid, LEASE_ID_SIZE);
+out:
+ if (ret) {
+ GF_FREE(leaseid);
+ if (dict_create) {
+ if (*fop_attr)
+ dict_unref(*fop_attr);
+ *fop_attr = NULL;
+ }
+ }
+ return ret;
+}
+
+int
+set_fop_attr_glfd(struct glfs_fd *glfd)
+{
+ char *lease_id = NULL;
+ int ret = -1;
+
+ lease_id = gf_existing_leaseid();
+ if (lease_id) {
+ memcpy(glfd->lease_id, lease_id, LEASE_ID_SIZE);
+ ret = 0;
+ }
+ return ret;
+}
+
+int
+get_fop_attr_thrd_key(dict_t **fop_attr)
+{
+ char *existing_leaseid = NULL, *leaseid = NULL;
+ int ret = 0;
+ gf_boolean_t dict_create = _gf_false;
+
+ existing_leaseid = gf_existing_leaseid();
+ if (existing_leaseid) {
+ leaseid = GF_MALLOC(LEASE_ID_SIZE, gf_common_mt_char);
+ GF_CHECK_ALLOC_AND_LOG("gfapi", leaseid, ret, "lease id alloc failed",
+ out);
+ memcpy(leaseid, existing_leaseid, LEASE_ID_SIZE);
+ if (*fop_attr == NULL) {
+ *fop_attr = dict_new();
+ dict_create = _gf_true;
+ }
+ GF_CHECK_ALLOC_AND_LOG("gfapi", *fop_attr, ret, "dict_new failed", out);
+ ret = dict_set_bin(*fop_attr, "lease-id", leaseid, LEASE_ID_SIZE);
+ }
+
+out:
+ if (ret) {
+ GF_FREE(leaseid);
+ if (dict_create) {
+ if (*fop_attr)
+ dict_unref(*fop_attr);
+ *fop_attr = NULL;
+ }
+ }
+ return ret;
+}
+
+void
+unset_fop_attr(dict_t **fop_attr)
+{
+ char *lease_id = NULL;
+ lease_id = gf_existing_leaseid();
+ if (lease_id)
+ memset(lease_id, 0, LEASE_ID_SIZE);
+ if (*fop_attr) {
+ dict_unref(*fop_attr);
+ *fop_attr = NULL;
+ }
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_from_glfd, 3.4.0)
+struct glfs *
+pub_glfs_from_glfd(struct glfs_fd *glfd)
+{
+ if (glfd == NULL) {
+ errno = EBADF;
+ return NULL;
+ }
+
+ return glfd->fs;
+}
+
+static void
+glfs_fd_destroy(struct glfs_fd *glfd)
+{
+ if (!glfd)
+ return;
+
+ glfs_lock(glfd->fs, _gf_true);
+ {
+ list_del_init(&glfd->openfds);
+ }
+ glfs_unlock(glfd->fs);
+
+ if (glfd->fd) {
+ fd_unref(glfd->fd);
+ glfd->fd = NULL;
+ }
+
+ GF_FREE(glfd->readdirbuf);
+
+ GF_FREE(glfd);
+}
+
+struct glfs_fd *
+glfs_fd_new(struct glfs *fs)
+{
+ struct glfs_fd *glfd = NULL;
+
+ glfd = GF_CALLOC(1, sizeof(*glfd), glfs_mt_glfs_fd_t);
+ if (!glfd)
+ return NULL;
+
+ glfd->fs = fs;
+
+ INIT_LIST_HEAD(&glfd->openfds);
+
+ GF_REF_INIT(glfd, glfs_fd_destroy);
+
+ return glfd;
+}
+
+void
+glfs_fd_bind(struct glfs_fd *glfd)
+{
+ struct glfs *fs = NULL;
+
+ fs = glfd->fs;
+
+ glfs_lock(fs, _gf_true);
+ {
+ list_add_tail(&glfd->openfds, &fs->openfds);
+ }
+ glfs_unlock(fs);
+}
+
+static void *
+glfs_poller(void *data)
+{
+ struct glfs *fs = NULL;
+
+ fs = data;
+
+ gf_event_dispatch(fs->ctx->event_pool);
+
+ return NULL;
+}
+
+static struct glfs *
+glfs_new_fs(const char *volname)
+{
+ struct glfs *fs = NULL;
+
+ fs = CALLOC(1, sizeof(*fs));
+ if (!fs)
+ return NULL;
+
+ INIT_LIST_HEAD(&fs->openfds);
+ INIT_LIST_HEAD(&fs->upcall_list);
+ INIT_LIST_HEAD(&fs->waitq);
+
+ PTHREAD_MUTEX_INIT(&fs->mutex, NULL, fs->pthread_flags, GLFS_INIT_MUTEX,
+ err);
+
+ PTHREAD_COND_INIT(&fs->cond, NULL, fs->pthread_flags, GLFS_INIT_COND, err);
+
+ PTHREAD_COND_INIT(&fs->child_down_cond, NULL, fs->pthread_flags,
+ GLFS_INIT_COND_CHILD, err);
+
+ PTHREAD_MUTEX_INIT(&fs->upcall_list_mutex, NULL, fs->pthread_flags,
+ GLFS_INIT_MUTEX_UPCALL, err);
+
+ fs->volname = strdup(volname);
+ if (!fs->volname)
+ goto err;
+
+ fs->pin_refcnt = 0;
+ fs->upcall_events = 0;
+ fs->up_cbk = NULL;
+ fs->up_data = NULL;
+
+ return fs;
+
+err:
+ glfs_free_from_ctx(fs);
+ return NULL;
+}
+
+extern xlator_t global_xlator;
+extern glusterfs_ctx_t *global_ctx;
+extern pthread_mutex_t global_ctx_mutex;
+
+static int
+glfs_init_global_ctx()
+{
+ int ret = 0;
+ glusterfs_ctx_t *ctx = NULL;
+
+ pthread_mutex_lock(&global_ctx_mutex);
+ {
+ if (global_xlator.ctx)
+ goto unlock;
+
+ ctx = glusterfs_ctx_new();
+ if (!ctx) {
+ ret = -1;
+ goto unlock;
+ }
+
+ gf_log_globals_init(ctx, GF_LOG_NONE);
+
+ global_ctx = ctx;
+ global_xlator.ctx = global_ctx;
+
+ ret = glusterfs_ctx_defaults_init(ctx);
+ if (ret) {
+ global_ctx = NULL;
+ global_xlator.ctx = NULL;
+ goto unlock;
+ }
+ }
+unlock:
+ pthread_mutex_unlock(&global_ctx_mutex);
+
+ if (ret)
+ FREE(ctx);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_new, 3.4.0)
+struct glfs *
+pub_glfs_new(const char *volname)
+{
+ if (!volname) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ struct glfs *fs = NULL;
+ int i = 0;
+ int ret = -1;
+ glusterfs_ctx_t *ctx = NULL;
+ xlator_t *old_THIS = NULL;
+ char pname[16] = "";
+ char msg[32] = "";
+
+ if (volname[0] == '/' || volname[0] == '-') {
+ if (strncmp(volname, "/snaps/", 7) == 0) {
+ goto label;
+ }
+ errno = EINVAL;
+ return NULL;
+ }
+
+ for (i = 0; i < strlen(volname); i++) {
+ if (!isalnum(volname[i]) && (volname[i] != '_') &&
+ (volname[i] != '-')) {
+ errno = EINVAL;
+ return NULL;
+ }
+ }
+
+label:
+ /*
+ * Do this as soon as possible in case something else depends on
+ * pool allocations.
+ */
+ mem_pools_init();
+
+ fs = glfs_new_fs(volname);
+ if (!fs)
+ goto out;
+
+ ctx = glusterfs_ctx_new();
+ if (!ctx)
+ goto out;
+
+ /* first globals init, for gf_mem_acct_enable_set () */
+
+ ret = glusterfs_globals_init(ctx);
+ if (ret)
+ goto out;
+
+ old_THIS = THIS;
+ ret = glfs_init_global_ctx();
+ if (ret)
+ goto out;
+
+ /* then ctx_defaults_init, for xlator_mem_acct_init(THIS) */
+
+ ret = glusterfs_ctx_defaults_init(ctx);
+ if (ret)
+ goto out;
+
+ fs->ctx = ctx;
+ fs->ctx->process_mode = GF_CLIENT_PROCESS;
+
+ ret = glfs_set_logging(fs, "/dev/null", 0);
+ if (ret)
+ goto out;
+
+ fs->ctx->cmd_args.volfile_id = gf_strdup(volname);
+ if (!(fs->ctx->cmd_args.volfile_id)) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = -1;
+#ifdef GF_LINUX_HOST_OS
+ ret = prctl(PR_GET_NAME, (unsigned long)pname, 0, 0, 0);
+#endif
+ if (ret)
+ fs->ctx->cmd_args.process_name = gf_strdup("gfapi");
+ else {
+ snprintf(msg, sizeof(msg), "gfapi.%s", pname);
+ fs->ctx->cmd_args.process_name = gf_strdup(msg);
+ }
+ ret = 0;
+
+out:
+ if (ret) {
+ if (fs) {
+ glfs_fini(fs);
+ fs = NULL;
+ } else {
+ /* glfs_fini() calls mem_pools_fini() too */
+ mem_pools_fini();
+ }
+ }
+
+ if (old_THIS)
+ THIS = old_THIS;
+
+ return fs;
+}
+
+GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_new_from_ctx, 3.7.0)
+struct glfs *
+priv_glfs_new_from_ctx(glusterfs_ctx_t *ctx)
+{
+ struct glfs *fs = NULL;
+
+ if (!ctx)
+ goto out;
+
+ fs = glfs_new_fs("");
+ if (!fs)
+ goto out;
+
+ fs->ctx = ctx;
+
+out:
+ return fs;
+}
+
+GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_free_from_ctx, 3.7.0)
+void
+priv_glfs_free_from_ctx(struct glfs *fs)
+{
+ upcall_entry *u_list = NULL;
+ upcall_entry *tmp = NULL;
+
+ if (!fs)
+ return;
+
+ /* cleanup upcall structures */
+ list_for_each_entry_safe(u_list, tmp, &fs->upcall_list, upcall_list)
+ {
+ list_del_init(&u_list->upcall_list);
+ GF_FREE(u_list->upcall_data.data);
+ GF_FREE(u_list);
+ }
+
+ PTHREAD_MUTEX_DESTROY(&fs->mutex, fs->pthread_flags, GLFS_INIT_MUTEX);
+
+ PTHREAD_COND_DESTROY(&fs->cond, fs->pthread_flags, GLFS_INIT_COND);
+
+ PTHREAD_COND_DESTROY(&fs->child_down_cond, fs->pthread_flags,
+ GLFS_INIT_COND_CHILD);
+
+ PTHREAD_MUTEX_DESTROY(&fs->upcall_list_mutex, fs->pthread_flags,
+ GLFS_INIT_MUTEX_UPCALL);
+
+ if (fs->oldvolfile)
+ FREE(fs->oldvolfile);
+
+ FREE(fs->volname);
+
+ FREE(fs);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_set_volfile, 3.4.0)
+int
+pub_glfs_set_volfile(struct glfs *fs, const char *volfile)
+{
+ cmd_args_t *cmd_args = NULL;
+
+ cmd_args = &fs->ctx->cmd_args;
+
+ if (vol_assigned(cmd_args))
+ return -1;
+
+ cmd_args->volfile = gf_strdup(volfile);
+ if (!cmd_args->volfile)
+ return -1;
+ return 0;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_set_logging, 3.4.0)
+int
+pub_glfs_set_logging(struct glfs *fs, const char *logfile, int loglevel)
+{
+ int ret = -1;
+ char *tmplog = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ if (!logfile) {
+ ret = gf_set_log_file_path(&fs->ctx->cmd_args, fs->ctx);
+ if (ret)
+ goto out;
+ tmplog = fs->ctx->cmd_args.log_file;
+ } else {
+ tmplog = (char *)logfile;
+ }
+
+ /* finish log set parameters before init */
+ if (loglevel >= 0)
+ gf_log_set_loglevel(fs->ctx, loglevel);
+
+ ret = gf_log_init(fs->ctx, tmplog, NULL);
+ if (ret)
+ goto out;
+
+ ret = gf_log_inject_timer_event(fs->ctx);
+ if (ret)
+ goto out;
+
+out:
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+int
+glfs_init_wait(struct glfs *fs)
+{
+ int ret = -1;
+
+ /* Always a top-down call, use glfs_lock() */
+ glfs_lock(fs, _gf_true);
+ {
+ while (!fs->init)
+ pthread_cond_wait(&fs->cond, &fs->mutex);
+ ret = fs->ret;
+ errno = fs->err;
+ }
+ glfs_unlock(fs);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_init_done, 3.4.0)
+void
+priv_glfs_init_done(struct glfs *fs, int ret)
+{
+ glfs_init_cbk init_cbk;
+
+ if (!fs) {
+ gf_smsg("glfs", GF_LOG_ERROR, EINVAL, API_MSG_GLFS_FSOBJ_NULL, NULL);
+ goto out;
+ }
+
+ init_cbk = fs->init_cbk;
+
+ /* Always a bottom-up call, use mutex_lock() */
+ pthread_mutex_lock(&fs->mutex);
+ {
+ fs->init = 1;
+ fs->ret = ret;
+ fs->err = errno;
+
+ if (!init_cbk)
+ pthread_cond_broadcast(&fs->cond);
+ }
+ pthread_mutex_unlock(&fs->mutex);
+
+ if (init_cbk)
+ init_cbk(fs, ret);
+out:
+ return;
+}
+
+int
+glfs_init_common(struct glfs *fs)
+{
+ int ret = -1;
+
+ ret = create_master(fs);
+ if (ret)
+ return ret;
+
+ ret = gf_thread_create(&fs->poller, NULL, glfs_poller, fs, "glfspoll");
+ if (ret)
+ return ret;
+
+ ret = glfs_volumes_init(fs);
+ if (ret)
+ return ret;
+
+ fs->dev_id = gf_dm_hashfn(fs->volname, strlen(fs->volname));
+ return ret;
+}
+
+int
+glfs_init_async(struct glfs *fs, glfs_init_cbk cbk)
+{
+ int ret = -1;
+
+ if (!fs || !fs->ctx) {
+ gf_smsg("glfs", GF_LOG_ERROR, EINVAL, API_MSG_FS_NOT_INIT, NULL);
+ errno = EINVAL;
+ return ret;
+ }
+
+ fs->init_cbk = cbk;
+
+ ret = glfs_init_common(fs);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_init, 3.4.0)
+int
+pub_glfs_init(struct glfs *fs)
+{
+ int ret = -1;
+
+ DECLARE_OLD_THIS;
+
+ if (!fs || !fs->ctx) {
+ gf_smsg("glfs", GF_LOG_ERROR, EINVAL, API_MSG_FS_NOT_INIT, NULL);
+ errno = EINVAL;
+ return ret;
+ }
+
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ ret = glfs_init_common(fs);
+ if (ret)
+ goto out;
+
+ ret = glfs_init_wait(fs);
+out:
+ __GLFS_EXIT_FS;
+
+ /* Set the initial current working directory to "/" */
+ if (ret >= 0) {
+ ret = glfs_chdir(fs, "/");
+ }
+
+invalid_fs:
+ return ret;
+}
+
+static int
+glusterfs_ctx_destroy(glusterfs_ctx_t *ctx)
+{
+ call_pool_t *pool = NULL;
+ int ret = 0;
+ glusterfs_graph_t *trav_graph = NULL;
+ glusterfs_graph_t *tmp = NULL;
+
+ if (ctx == NULL)
+ return 0;
+
+ if (ctx->cmd_args.curr_server)
+ glfs_free_volfile_servers(&ctx->cmd_args);
+
+ glfs_free_xlator_options(&ctx->cmd_args);
+
+ /* For all the graphs, crawl through the xlator_t structs and free
+ * all its members except for the mem_acct member,
+ * as GF_FREE will be referencing it.
+ */
+ list_for_each_entry_safe(trav_graph, tmp, &ctx->graphs, list)
+ {
+ xlator_tree_free_members(trav_graph->first);
+ }
+
+ /* Free the memory pool */
+ if (ctx->stub_mem_pool)
+ mem_pool_destroy(ctx->stub_mem_pool);
+ if (ctx->dict_pool)
+ mem_pool_destroy(ctx->dict_pool);
+ if (ctx->dict_data_pool)
+ mem_pool_destroy(ctx->dict_data_pool);
+ if (ctx->dict_pair_pool)
+ mem_pool_destroy(ctx->dict_pair_pool);
+ if (ctx->logbuf_pool)
+ mem_pool_destroy(ctx->logbuf_pool);
+
+ pool = ctx->pool;
+ if (pool) {
+ if (pool->frame_mem_pool)
+ mem_pool_destroy(pool->frame_mem_pool);
+ if (pool->stack_mem_pool)
+ mem_pool_destroy(pool->stack_mem_pool);
+ LOCK_DESTROY(&pool->lock);
+ GF_FREE(pool);
+ }
+
+ /* Free the event pool */
+ ret = gf_event_pool_destroy(ctx->event_pool);
+
+ /* Free the iobuf pool */
+ iobuf_pool_destroy(ctx->iobuf_pool);
+
+ GF_FREE(ctx->process_uuid);
+ GF_FREE(ctx->cmd_args.volfile_id);
+ GF_FREE(ctx->cmd_args.process_name);
+
+ LOCK_DESTROY(&ctx->lock);
+ pthread_mutex_destroy(&ctx->notify_lock);
+ pthread_cond_destroy(&ctx->notify_cond);
+
+ /* Free all the graph structs and its containing xlator_t structs
+ * from this point there should be no reference to GF_FREE/GF_CALLOC
+ * as it will try to access mem_acct and the below function would
+ * have freed the same.
+ */
+ list_for_each_entry_safe(trav_graph, tmp, &ctx->graphs, list)
+ {
+ glusterfs_graph_destroy_residual(trav_graph);
+ }
+
+ GF_FREE(ctx->statedump_path);
+ FREE(ctx);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fini, 3.4.0)
+int
+pub_glfs_fini(struct glfs *fs)
+{
+ int ret = -1;
+ int countdown = 100;
+ xlator_t *subvol = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ glusterfs_graph_t *graph = NULL;
+ call_pool_t *call_pool = NULL;
+ int fs_init = 0;
+ int err = -1;
+ struct synctask *waittask = NULL;
+
+ DECLARE_OLD_THIS;
+
+ if (!fs) {
+ errno = EINVAL;
+ goto invalid_fs;
+ }
+
+ ctx = fs->ctx;
+ if (!ctx) {
+ goto free_fs;
+ }
+
+ THIS = fs->ctx->master;
+
+ if (ctx->mgmt) {
+ rpc_clnt_disable(ctx->mgmt);
+ }
+
+ call_pool = fs->ctx->pool;
+
+ /* Wake up any suspended synctasks */
+ while (!list_empty(&fs->waitq)) {
+ waittask = list_entry(fs->waitq.next, struct synctask, waitq);
+ list_del_init(&waittask->waitq);
+ synctask_wake(waittask);
+ }
+
+ while (countdown--) {
+ /* give some time for background frames to finish */
+ pthread_mutex_lock(&fs->mutex);
+ {
+ /* Do we need to increase countdown? */
+ if ((!call_pool->cnt) && (!fs->pin_refcnt)) {
+ gf_msg_trace("glfs", 0,
+ "call_pool_cnt - %" PRId64
+ ","
+ "pin_refcnt - %d",
+ call_pool->cnt, fs->pin_refcnt);
+
+ ctx->cleanup_started = 1;
+ pthread_mutex_unlock(&fs->mutex);
+ break;
+ }
+ }
+ pthread_mutex_unlock(&fs->mutex);
+ gf_nanosleep(100000 * GF_US_IN_NS);
+ }
+
+ /* leaked frames may exist, we ignore */
+
+ /*We deem glfs_fini as successful if there are no pending frames in the call
+ *pool*/
+ ret = (call_pool->cnt == 0) ? 0 : -1;
+
+ pthread_mutex_lock(&fs->mutex);
+ {
+ fs_init = fs->init;
+ }
+ pthread_mutex_unlock(&fs->mutex);
+
+ if (fs_init != 0) {
+ subvol = glfs_active_subvol(fs);
+ if (subvol) {
+ /* PARENT_DOWN within glfs_subvol_done() is issued
+ only on graph switch (new graph should activiate
+ and decrement the extra @winds count taken in
+ glfs_graph_setup()
+
+ Since we are explicitly destroying,
+ PARENT_DOWN is necessary
+ */
+ xlator_notify(subvol, GF_EVENT_PARENT_DOWN, subvol, 0);
+ /* Here we wait for GF_EVENT_CHILD_DOWN before exiting,
+ in case of asynchrnous cleanup
+ */
+ graph = subvol->graph;
+ err = pthread_mutex_lock(&fs->mutex);
+ if (err != 0) {
+ gf_smsg("glfs", GF_LOG_ERROR, err, API_MSG_FSMUTEX_LOCK_FAILED,
+ "error=%s", strerror(err), NULL);
+ goto fail;
+ }
+ /* check and wait for CHILD_DOWN for active subvol*/
+ {
+ while (graph->used) {
+ err = pthread_cond_wait(&fs->child_down_cond, &fs->mutex);
+ if (err != 0)
+ gf_smsg("glfs", GF_LOG_INFO, err,
+ API_MSG_COND_WAIT_FAILED, "name=%s",
+ subvol->name, "err=%s", strerror(err), NULL);
+ }
+ }
+
+ err = pthread_mutex_unlock(&fs->mutex);
+ if (err != 0) {
+ gf_smsg("glfs", GF_LOG_ERROR, err,
+ API_MSG_FSMUTEX_UNLOCK_FAILED, "error=%s",
+ strerror(err), NULL);
+ goto fail;
+ }
+ }
+ glfs_subvol_done(fs, subvol);
+ }
+
+ ctx->cleanup_started = 1;
+
+ if (fs_init != 0) {
+ /* Destroy all the inode tables of all the graphs.
+ * NOTE:
+ * - inode objects should be destroyed before calling fini()
+ * of each xlator, as fini() and forget() of the xlators
+ * can share few common locks or data structures, calling
+ * fini first might destroy those required by forget
+ * ( eg: in quick-read)
+ * - The call to inode_table_destroy_all is not required when
+ * the cleanup during graph switch is implemented to perform
+ * inode table destroy.
+ */
+ inode_table_destroy_all(ctx);
+
+ /* Call fini() of all the xlators in the active graph
+ * NOTE:
+ * - xlator fini() should be called before destroying any of
+ * the threads. (eg: fini() in protocol-client uses timer
+ * thread) */
+ glusterfs_graph_deactivate(ctx->active);
+
+ /* Join the syncenv_processor threads and cleanup
+ * syncenv resources*/
+ syncenv_destroy(ctx->env);
+
+ /* Join the poller thread */
+ if (gf_event_dispatch_destroy(ctx->event_pool) < 0)
+ ret = -1;
+ }
+
+ /* Avoid dispatching events to mgmt after freed,
+ * unreference mgmt after the event_dispatch_destroy */
+ if (ctx->mgmt) {
+ rpc_clnt_unref(ctx->mgmt);
+ ctx->mgmt = NULL;
+ }
+
+ /* log infra has to be brought down before destroying
+ * timer registry, as logging uses timer infra
+ */
+ if (gf_log_fini(ctx) != 0)
+ ret = -1;
+
+ /* Join the timer thread */
+ if (fs_init != 0) {
+ gf_timer_registry_destroy(ctx);
+ }
+
+ /* Destroy the context and the global pools */
+ if (glusterfs_ctx_destroy(ctx) != 0)
+ ret = -1;
+
+free_fs:
+ glfs_free_from_ctx(fs);
+
+ /*
+ * Do this as late as possible in case anything else has (or
+ * grows) a dependency on mem-pool allocations.
+ */
+ mem_pools_fini();
+
+fail:
+ if (!ret)
+ ret = err;
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_get_volfile, 3.6.0)
+ssize_t
+pub_glfs_get_volfile(struct glfs *fs, void *buf, size_t len)
+{
+ ssize_t res = -1;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ glfs_lock(fs, _gf_true);
+ if (len >= fs->oldvollen) {
+ gf_msg_trace("glfs", 0, "copying %zu to %p", len, buf);
+ memcpy(buf, fs->oldvolfile, len);
+ res = len;
+ } else {
+ res = len - fs->oldvollen;
+ gf_msg_trace("glfs", 0, "buffer is %zd too short", -res);
+ }
+ glfs_unlock(fs);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return res;
+}
+
+GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_ipc, 3.12.0)
+int
+priv_glfs_ipc(struct glfs *fs, int opcode, void *xd_in, void **xd_out)
+{
+ xlator_t *subvol = NULL;
+ int ret = -1;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ ret = syncop_ipc(subvol, opcode, (dict_t *)xd_in, (dict_t **)xd_out);
+ DECODE_SYNCOP_ERR(ret);
+
+out:
+ glfs_subvol_done(fs, subvol);
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_setfspid, 6.1)
+int
+priv_glfs_setfspid(struct glfs *fs, pid_t pid)
+{
+ cmd_args_t *cmd_args = NULL;
+ int ret = 0;
+
+ cmd_args = &fs->ctx->cmd_args;
+ cmd_args->client_pid = pid;
+ cmd_args->client_pid_set = 1;
+ ret = syncopctx_setfspid(&pid);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_free, 3.7.16)
+void
+pub_glfs_free(void *ptr)
+{
+ GLFS_FREE(ptr);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_get_fs, 3.7.16)
+struct glfs *
+pub_glfs_upcall_get_fs(struct glfs_upcall *arg)
+{
+ return arg->fs;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_get_reason, 3.7.16)
+enum glfs_upcall_reason
+pub_glfs_upcall_get_reason(struct glfs_upcall *arg)
+{
+ return arg->reason;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_get_event, 3.7.16)
+void *
+pub_glfs_upcall_get_event(struct glfs_upcall *arg)
+{
+ return arg->event;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_object, 3.7.16)
+struct glfs_object *
+pub_glfs_upcall_inode_get_object(struct glfs_upcall_inode *arg)
+{
+ return arg->object;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_flags, 3.7.16)
+uint64_t
+pub_glfs_upcall_inode_get_flags(struct glfs_upcall_inode *arg)
+{
+ return arg->flags;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_stat, 3.7.16)
+struct stat *
+pub_glfs_upcall_inode_get_stat(struct glfs_upcall_inode *arg)
+{
+ return &arg->buf;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_expire, 3.7.16)
+uint64_t
+pub_glfs_upcall_inode_get_expire(struct glfs_upcall_inode *arg)
+{
+ return arg->expire_time_attr;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_pobject, 3.7.16)
+struct glfs_object *
+pub_glfs_upcall_inode_get_pobject(struct glfs_upcall_inode *arg)
+{
+ return arg->p_object;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_pstat, 3.7.16)
+struct stat *
+pub_glfs_upcall_inode_get_pstat(struct glfs_upcall_inode *arg)
+{
+ return &arg->p_buf;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_oldpobject, 3.7.16)
+struct glfs_object *
+pub_glfs_upcall_inode_get_oldpobject(struct glfs_upcall_inode *arg)
+{
+ return arg->oldp_object;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_oldpstat, 3.7.16)
+struct stat *
+pub_glfs_upcall_inode_get_oldpstat(struct glfs_upcall_inode *arg)
+{
+ return &arg->oldp_buf;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_lease_get_object, 4.1.6)
+struct glfs_object *
+pub_glfs_upcall_lease_get_object(struct glfs_upcall_lease *arg)
+{
+ return arg->object;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_lease_get_lease_type, 4.1.6)
+uint32_t
+pub_glfs_upcall_lease_get_lease_type(struct glfs_upcall_lease *arg)
+{
+ return arg->lease_type;
+}
+
+/* definitions of the GLFS_SYSRQ_* chars are in glfs.h */
+static struct glfs_sysrq_help {
+ char sysrq;
+ char *msg;
+} glfs_sysrq_help[] = {{GLFS_SYSRQ_HELP, "(H)elp"},
+ {GLFS_SYSRQ_STATEDUMP, "(S)tatedump"},
+ {0, NULL}};
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_sysrq, 3.10.0)
+int
+pub_glfs_sysrq(struct glfs *fs, char sysrq)
+{
+ glusterfs_ctx_t *ctx = NULL;
+ int ret = 0;
+ int msg_len;
+ char msg[1024] = {
+ 0,
+ }; /* should not exceed 1024 chars */
+
+ if (!fs || !fs->ctx) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ ctx = fs->ctx;
+
+ switch (sysrq) {
+ case GLFS_SYSRQ_HELP: {
+ struct glfs_sysrq_help *usage = NULL;
+
+ for (usage = glfs_sysrq_help; usage->sysrq; usage++) {
+ msg_len = strlen(msg);
+ snprintf(msg + msg_len, /* append to msg */
+ sizeof(msg) - msg_len - 2,
+ /* - 2 for the " " + terminating \0 */
+ " %s", usage->msg);
+ }
+
+ /* not really an 'error', but make sure it gets logged */
+ gf_log("glfs", GF_LOG_ERROR, "available events: %s", msg);
+
+ break;
+ }
+ case GLFS_SYSRQ_STATEDUMP:
+ gf_proc_dump_info(SIGUSR1, ctx);
+ break;
+ default:
+ gf_smsg("glfs", GF_LOG_ERROR, ENOTSUP, API_MSG_INVALID_SYSRQ,
+ "sysrq=%c", sysrq, NULL);
+ errno = ENOTSUP;
+ ret = -1;
+ }
+out:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_register, 3.13.0)
+int
+pub_glfs_upcall_register(struct glfs *fs, uint32_t event_list,
+ glfs_upcall_cbk cbk, void *data)
+{
+ int ret = 0;
+
+ /* list of supported upcall events */
+ uint32_t up_events = (GLFS_EVENT_INODE_INVALIDATE |
+ GLFS_EVENT_RECALL_LEASE);
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ GF_VALIDATE_OR_GOTO(THIS->name, cbk, out);
+
+ /* Event list should be either GLFS_EVENT_ANY
+ * or list of supported individual events (up_events)
+ */
+ if ((event_list != GLFS_EVENT_ANY) && (event_list & ~up_events)) {
+ errno = EINVAL;
+ ret = -1;
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, API_MSG_INVALID_ARG,
+ "event_list=(0x%08x)", event_list, NULL);
+ goto out;
+ }
+
+ /* in case other thread does unregister */
+ pthread_mutex_lock(&fs->mutex);
+ {
+ if (event_list & GLFS_EVENT_INODE_INVALIDATE) {
+ /* @todo: Check if features.cache-invalidation is
+ * enabled.
+ */
+ fs->upcall_events |= GF_UPCALL_CACHE_INVALIDATION;
+ ret |= GLFS_EVENT_INODE_INVALIDATE;
+ }
+ if (event_list & GLFS_EVENT_RECALL_LEASE) {
+ /* @todo: Check if features.leases is enabled */
+ fs->upcall_events |= GF_UPCALL_RECALL_LEASE;
+ ret |= GLFS_EVENT_RECALL_LEASE;
+ }
+ /* Override cbk function if existing */
+ fs->up_cbk = cbk;
+ fs->up_data = data;
+ fs->cache_upcalls = _gf_true;
+ }
+ pthread_mutex_unlock(&fs->mutex);
+
+out:
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_unregister, 3.13.0)
+int
+pub_glfs_upcall_unregister(struct glfs *fs, uint32_t event_list)
+{
+ int ret = 0;
+ /* list of supported upcall events */
+ uint32_t up_events = (GLFS_EVENT_INODE_INVALIDATE |
+ GLFS_EVENT_RECALL_LEASE);
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ /* Event list should be either GLFS_EVENT_ANY
+ * or list of supported individual events (up_events)
+ */
+ if ((event_list != GLFS_EVENT_ANY) && (event_list & ~up_events)) {
+ errno = EINVAL;
+ ret = -1;
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, API_MSG_INVALID_ARG,
+ "event_list=(0x%08x)", event_list, NULL);
+ goto out;
+ }
+
+ pthread_mutex_lock(&fs->mutex);
+ {
+ /* We already checked if event_list contains list of supported
+ * upcall events. No other specific checks needed as of now for
+ * unregister */
+ fs->upcall_events &= ~(event_list);
+ ret |= ((event_list == GLFS_EVENT_ANY) ? up_events : event_list);
+
+ /* If there are no upcall events registered, reset cbk */
+ if (fs->upcall_events == 0) {
+ fs->up_cbk = NULL;
+ fs->up_data = NULL;
+ fs->cache_upcalls = _gf_false;
+ }
+ }
+ pthread_mutex_unlock(&fs->mutex);
+
+out:
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_set_statedump_path, 7.0)
+int
+pub_glfs_set_statedump_path(struct glfs *fs, const char *path)
+{
+ struct stat st;
+ int ret;
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ if (!path) {
+ gf_log("glfs", GF_LOG_ERROR, "path is NULL");
+ errno = EINVAL;
+ goto err;
+ }
+
+ /* If path is not present OR, if it is directory AND has enough permission
+ * to create files, then proceed */
+ ret = sys_stat(path, &st);
+ if (ret && errno != ENOENT) {
+ gf_log("glfs", GF_LOG_ERROR, "%s: not a valid path (%s)", path,
+ strerror(errno));
+ errno = EINVAL;
+ goto err;
+ }
+
+ if (!ret) {
+ /* file is present, now check other things */
+ if (!S_ISDIR(st.st_mode)) {
+ gf_log("glfs", GF_LOG_ERROR, "%s: path is not directory", path);
+ errno = EINVAL;
+ goto err;
+ }
+ if (sys_access(path, W_OK | X_OK) < 0) {
+ gf_log("glfs", GF_LOG_ERROR,
+ "%s: path doesn't have write permission", path);
+ errno = EPERM;
+ goto err;
+ }
+ }
+
+ /* If set, it needs to be freed, so we don't have leak */
+ GF_FREE(fs->ctx->statedump_path);
+
+ fs->ctx->statedump_path = gf_strdup(path);
+ if (!fs->ctx->statedump_path) {
+ gf_log("glfs", GF_LOG_ERROR,
+ "%s: failed to set statedump path, no memory", path);
+ errno = ENOMEM;
+ goto err;
+ }
+
+ __GLFS_EXIT_FS;
+
+ return 0;
+err:
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return -1;
+}
diff --git a/api/src/glfs.h b/api/src/glfs.h
new file mode 100644
index 00000000000..279d11d58ee
--- /dev/null
+++ b/api/src/glfs.h
@@ -0,0 +1,1485 @@
+/*
+ Copyright (c) 2012-2018 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLFS_H
+#define _GLFS_H
+
+/*
+ Enforce the following flags as libgfapi is built
+ with them, and we want programs linking against them to also
+ be built with these flags. This is necessary as it affects
+ some of the structures defined in libc headers (like struct stat)
+ and those definitions need to be consistently compiled in
+ both the library and the application.
+*/
+
+/* Values for valid flags to be used when using XXXsetattr, to set multiple
+ attribute values passed via the related stat structure.
+ */
+
+#define GFAPI_SET_ATTR_MODE 0x1
+#define GFAPI_SET_ATTR_UID 0x2
+#define GFAPI_SET_ATTR_GID 0x4
+#define GFAPI_SET_ATTR_SIZE 0x8
+#define GFAPI_SET_ATTR_ATIME 0x10
+#define GFAPI_SET_ATTR_MTIME 0x20
+
+#ifndef _FILE_OFFSET_BITS
+#define _FILE_OFFSET_BITS 64
+#endif
+
+#ifndef __USE_FILE_OFFSET64
+#define __USE_FILE_OFFSET64
+#endif
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/uio.h>
+#include <unistd.h>
+#include <sys/cdefs.h>
+#include <dirent.h>
+#include <sys/statvfs.h>
+#include <stdint.h>
+#include <sys/time.h>
+
+/*
+ * For off64_t to be defined, we need both
+ * __USE_LARGEFILE64 to be true and __off64_t_defnined to be
+ * false. But, making __USE_LARGEFILE64 true causes other issues
+ * such as redinition of stat and fstat to stat64 and fstat64
+ * respectively which again causes compilation issues.
+ * Without off64_t being defined, this will not compile as
+ * copy_file_range uses off64_t. Hence define it here. First
+ * check whether __off64_t_defined is true or not. <unistd.h>
+ * sets that flag when it defines off64_t. If __off64_t_defined
+ * is false and __USE_FILE_OFFSET64 is true, then go on to define
+ * off64_t using __off64_t.
+ */
+#ifndef GF_BSD_HOST_OS
+#if defined(__USE_FILE_OFFSET64) && !defined(__off64_t_defined)
+typedef __off64_t off64_t;
+#endif /* defined(__USE_FILE_OFFSET64) && !defined(__off64_t_defined) */
+#else
+#include <stdio.h>
+#ifndef _OFF64_T_DECLARED
+/*
+ * Including <stdio.h> (done above) should actually define
+ * _OFF64_T_DECLARED with off64_t data type being available
+ * for consumption. But, off64_t data type is not recognizable
+ * for FreeBSD versions less than 11. Hence, int64_t is typedefed
+ * to off64_t.
+ */
+#define _OFF64_T_DECLARED
+typedef int64_t off64_t;
+#endif /* _OFF64_T_DECLARED */
+#endif /* GF_BSD_HOST_OS */
+
+#if defined(HAVE_SYS_ACL_H) || (defined(USE_POSIX_ACLS) && USE_POSIX_ACLS)
+#include <sys/acl.h>
+#else
+typedef void *acl_t;
+typedef int acl_type_t;
+#endif
+
+/* Portability non glibc c++ build systems */
+#ifndef __THROW
+#if defined __cplusplus
+#define __THROW throw()
+#else
+#define __THROW
+#endif
+#endif
+
+#ifndef GF_DARWIN_HOST_OS
+#define GFAPI_PUBLIC(sym, ver) /**/
+#define GFAPI_PRIVATE(sym, ver) /**/
+#else
+#define GFAPI_PUBLIC(sym, ver) __asm("_" __STRING(sym) "$GFAPI_" __STRING(ver))
+#define GFAPI_PRIVATE(sym, ver) \
+ __asm("_" __STRING(sym) "$GFAPI_PRIVATE_" __STRING(ver))
+#endif
+
+__BEGIN_DECLS
+
+/* The filesystem object. One object per 'virtual mount' */
+struct glfs;
+typedef struct glfs glfs_t;
+
+/*
+ SYNOPSIS
+
+ glfs_new: Create a new 'virtual mount' object.
+
+ DESCRIPTION
+
+ This is most likely the very first function you will use. This function
+ will create a new glfs_t (virtual mount) object in memory.
+
+ On this newly created glfs_t, you need to be either set a volfile path
+ (glfs_set_volfile) or a volfile server (glfs_set_volfile_server).
+
+ The glfs_t object needs to be initialized with glfs_init() before you
+ can start issuing file operations on it.
+
+ PARAMETERS
+
+ @volname: Name of the volume. This identifies the server-side volume and
+ the fetched volfile (equivalent of --volfile-id command line
+ parameter to glusterfsd). When used with glfs_set_volfile() the
+ @volname has no effect (except for appearing in log messages).
+
+ RETURN VALUES
+
+ NULL : Out of memory condition.
+ Others : Pointer to the newly created glfs_t virtual mount object.
+
+*/
+
+glfs_t *
+glfs_new(const char *volname) __THROW GFAPI_PUBLIC(glfs_new, 3.4.0);
+
+/*
+ SYNOPSIS
+
+ glfs_set_volfile: Specify the path to the volume specification file.
+
+ DESCRIPTION
+
+ If you are using a static volume specification file (without dynamic
+ volume management abilities from the CLI), then specify the path to
+ the volume specification file.
+
+ This is incompatible with glfs_set_volfile_server().
+
+ PARAMETERS
+
+ @fs: The 'virtual mount' object to be configured with the volume
+ specification file.
+
+ @volfile: Path to the locally available volume specification file.
+
+ RETURN VALUES
+
+ 0 : Success.
+ -1 : Failure. @errno will be set with the type of failure.
+
+*/
+
+int
+glfs_set_volfile(glfs_t *fs, const char *volfile) __THROW
+ GFAPI_PUBLIC(glfs_set_volfile, 3.4.0);
+
+/*
+ SYNOPSIS
+
+ glfs_set_volfile_server: Specify the list of addresses for management server.
+
+ DESCRIPTION
+
+ This function specifies the list of addresses for the management server
+ (glusterd) to connect, and establish the volume configuration. The @volname
+ parameter passed to glfs_new() is the volume which will be virtually
+ mounted as the glfs_t object. All operations performed by the CLI at
+ the management server will automatically be reflected in the 'virtual
+ mount' object as it maintains a connection to glusterd and polls on
+ configuration change notifications.
+
+ This is incompatible with glfs_set_volfile().
+
+ PARAMETERS
+
+ @fs: The 'virtual mount' object to be configured with the volume
+ specification file.
+
+ @transport: String specifying the transport used to connect to the
+ management daemon. Specifying NULL will result in the
+ usage of the default (tcp) transport type. Permitted
+ values are "tcp" or "unix".
+
+ @host: String specifying the address where to find the management daemon.
+ Socket path, while using Unix domain socket as transport type.
+ This would either be
+ - FQDN (e.g : "storage01.company.com") or
+ - ASCII (e.g : "192.168.22.1") or
+ - Socket path (e.g : "/var/run/glusterd.socket")
+
+ NOTE: This API is special, multiple calls to this function with different
+ volfile servers, port or transport-type would create a list of volfile
+ servers which would be polled during `volfile_fetch_attempts()`
+
+ @port: The TCP port number where gluster management daemon is listening.
+ Specifying 0 uses the default port number GF_DEFAULT_BASE_PORT.
+ This parameter is unused if you are using a UNIX domain socket.
+
+ RETURN VALUES
+
+ 0 : Success.
+ -1 : Failure. @errno will be set with the type of failure.
+
+*/
+
+int
+glfs_set_volfile_server(glfs_t *fs, const char *transport, const char *host,
+ int port) __THROW
+ GFAPI_PUBLIC(glfs_set_volfile_server, 3.4.0);
+
+int
+glfs_unset_volfile_server(glfs_t *fs, const char *transport, const char *host,
+ int port) __THROW
+ GFAPI_PUBLIC(glfs_unset_volfile_server, 3.5.1);
+/*
+ SYNOPSIS
+
+ glfs_set_logging: Specify logging parameters.
+
+ DESCRIPTION
+
+ This function specifies logging parameters for the virtual mount.
+ Default log file is /dev/null.
+
+ PARAMETERS
+
+ @fs: The 'virtual mount' object to be configured with the logging parameters.
+
+ @logfile: The logfile to be used for logging. Will be created if it does not
+ already exist (provided system permissions allow). If NULL, a new
+ logfile will be created in default log directory associated with
+ the glusterfs installation.
+
+ @loglevel: Numerical value specifying the degree of verbosity. Higher the
+ value, more verbose the logging.
+
+ RETURN VALUES
+
+ 0 : Success.
+ -1 : Failure. @errno will be set with the type of failure.
+
+*/
+
+int
+glfs_set_logging(glfs_t *fs, const char *logfile, int loglevel) __THROW
+ GFAPI_PUBLIC(glfs_set_logging, 3.4.0);
+
+/*
+ SYNOPSIS
+
+ glfs_init: Initialize the 'virtual mount'
+
+ DESCRIPTION
+
+ This function initializes the glfs_t object. This consists of many steps:
+ - Spawn a poll-loop thread.
+ - Establish connection to management daemon and receive volume specification.
+ - Construct translator graph and initialize graph.
+ - Wait for initialization (connecting to all bricks) to complete.
+
+ PARAMETERS
+
+ @fs: The 'virtual mount' object to be initialized.
+
+ RETURN VALUES
+
+ 0 : Success.
+ -1 : Failure. @errno will be set with the type of failure.
+
+*/
+
+int
+glfs_init(glfs_t *fs) __THROW GFAPI_PUBLIC(glfs_init, 3.4.0);
+
+/*
+ SYNOPSIS
+
+ glfs_fini: Cleanup and destroy the 'virtual mount'
+
+ DESCRIPTION
+
+ This function attempts to gracefully destroy glfs_t object. An attempt is
+ made to wait for all background processing to complete before returning.
+
+ glfs_fini() must be called after all operations on glfs_t is finished.
+
+ IMPORTANT
+
+ IT IS NECESSARY TO CALL glfs_fini() ON ALL THE INITIALIZED glfs_t
+ OBJECTS BEFORE TERMINATING THE PROGRAM. THERE MAY BE CACHED AND
+ UNWRITTEN / INCOMPLETE OPERATIONS STILL IN PROGRESS EVEN THOUGH THE
+ API CALLS HAVE RETURNED. glfs_fini() WILL WAIT FOR BACKGROUND OPERATIONS
+ TO COMPLETE BEFORE RETURNING, THEREBY MAKING IT SAFE FOR THE PROGRAM TO
+ EXIT.
+
+ PARAMETERS
+
+ @fs: The 'virtual mount' object to be destroyed.
+
+ RETURN VALUES
+
+ 0 : Success.
+*/
+
+int
+glfs_fini(glfs_t *fs) __THROW GFAPI_PUBLIC(glfs_fini, 3.4.0);
+
+/*
+ SYNOPSIS
+
+ glfs_getvol: Get the volfile associated with a 'virtual mount'
+
+ DESCRIPTION
+
+ Sometimes it's useful e.g. for scripts to see the volfile, so that they
+ can parse it and find subvolumes to do things like split-brain resolution
+ or custom layouts. The API here was specifically intended to make access
+ e.g. from Python as simple as possible.
+
+ Note that the volume must be started (not necessarily mounted) for this
+ to work.
+
+ PARAMETERS
+
+ @fs: The 'virtual mount' object for which a volfile is desired
+ @buf: Pointer to a place for the volfile length to be stored
+ @len: Length of @buf
+
+ RETURN VALUES
+
+ >0: filled N bytes of buffer
+ 0: no volfile available
+ <0: volfile length exceeds @len by N bytes (@buf unchanged)
+*/
+
+ssize_t
+glfs_get_volfile(glfs_t *fs, void *buf, size_t len) __THROW
+ GFAPI_PUBLIC(glfs_get_volfile, 3.6.0);
+
+/*
+ SYNOPSIS
+
+ glfs_get_volumeid: Copy the Volume UUID stored in the glfs object fs.
+
+ DESCRIPTION
+
+ This function when invoked for the first time sends RPC call to the
+ the management server (glusterd) to fetch volume uuid and stores it
+ in the glusterfs_context linked to the glfs object fs which can be used
+ in the subsequent calls. Later it parses that UUID to convert it from
+ canonical string format into an opaque byte array and copy it into
+ the volid array. In case if either of the input parameters, volid or
+ size, is NULL, number of bytes required to copy the volume UUID is returned.
+
+ PARAMETERS
+
+ @fs: The 'virtual mount' object to be used to retrieve and store
+ volume's UUID.
+ @volid: Pointer to a place for the volume UUID to be stored
+ @size: Length of @volid
+
+ RETURN VALUES
+
+ -1 : Failure. @errno will be set with the type of failure.
+ Others : length of the volume UUID stored.
+*/
+
+int
+glfs_get_volumeid(glfs_t *fs, char *volid, size_t size) __THROW
+ GFAPI_PUBLIC(glfs_get_volumeid, 3.5.0);
+
+/*
+ * FILE OPERATION
+ *
+ * What follows are filesystem operations performed on the
+ * 'virtual mount'. The calls here are kept as close to
+ * the POSIX system calls as possible.
+ *
+ * Notes:
+ *
+ * - All paths specified, even if absolute, are relative to the
+ * root of the virtual mount and not the system root (/).
+ *
+ */
+
+/* The file descriptor object. One per open file/directory. */
+
+struct glfs_fd;
+typedef struct glfs_fd glfs_fd_t;
+
+/*
+ * Mask for request/result items in the struct glfs_stat.
+ *
+ * Query request/result mask for glfs_stat() (family of functions) and
+ * struct glfs_stat::glfs_st_mask.
+ *
+ * These bits should be set in the mask argument of glfs_stat() (family of
+ * functions) to request particular items when calling glfs_stat().
+ *
+ * NOTE: Lower order 32 bits are used to reflect statx(2) bits. For Gluster
+ * specific attrs/extensions, use higher order 32 bits.
+ *
+ */
+#define GLFS_STAT_TYPE 0x0000000000000001U /* Want/got stx_mode & S_IFMT */
+#define GLFS_STAT_MODE 0x0000000000000002U /* Want/got stx_mode & ~S_IFMT */
+#define GLFS_STAT_NLINK 0x0000000000000004U /* Want/got stx_nlink */
+#define GLFS_STAT_UID 0x0000000000000008U /* Want/got stx_uid */
+#define GLFS_STAT_GID 0x0000000000000010U /* Want/got stx_gid */
+#define GLFS_STAT_ATIME 0x0000000000000020U /* Want/got stx_atime */
+#define GLFS_STAT_MTIME 0x0000000000000040U /* Want/got stx_mtime */
+#define GLFS_STAT_CTIME 0x0000000000000080U /* Want/got stx_ctime */
+#define GLFS_STAT_INO 0x0000000000000100U /* Want/got stx_ino */
+#define GLFS_STAT_SIZE 0x0000000000000200U /* Want/got stx_size */
+#define GLFS_STAT_BLOCKS 0x0000000000000400U /* Want/got stx_blocks */
+#define GLFS_STAT_BASIC_STATS \
+ 0x00000000000007ffU /* Items in the normal stat struct */
+#define GLFS_STAT_BTIME 0x0000000000000800U /* Want/got stx_btime */
+#define GLFS_STAT_ALL 0x0000000000000fffU /* All currently supported flags */
+#define GLFS_STAT_RESERVED \
+ 0x8000000000000000U /* Reserved to denote future expansion */
+
+/* Macros for checking validity of struct glfs_stat members.*/
+#define GLFS_STAT_TYPE_VALID(stmask) (stmask & GLFS_STAT_TYPE)
+#define GLFS_STAT_MODE_VALID(stmask) (stmask & GLFS_STAT_MODE)
+#define GLFS_STAT_NLINK_VALID(stmask) (stmask & GLFS_STAT_NLINK)
+#define GLFS_STAT_UID_VALID(stmask) (stmask & GLFS_STAT_UID)
+#define GLFS_STAT_GID_VALID(stmask) (stmask & GLFS_STAT_GID)
+#define GLFS_STAT_ATIME_VALID(stmask) (stmask & GLFS_STAT_ATIME)
+#define GLFS_STAT_MTIME_VALID(stmask) (stmask & GLFS_STAT_MTIME)
+#define GLFS_STAT_CTIME_VALID(stmask) (stmask & GLFS_STAT_CTIME)
+#define GLFS_STAT_INO_VALID(stmask) (stmask & GLFS_STAT_INO)
+#define GLFS_STAT_SIZE_VALID(stmask) (stmask & GLFS_STAT_SIZE)
+#define GLFS_STAT_BLOCKS_VALID(stmask) (stmask & GLFS_STAT_BLOCKS)
+#define GLFS_STAT_BTIME_VALID(stmask) (stmask & GLFS_STAT_BTIME)
+#define GLFS_STAT_GFID_VALID(stmask) (stmask & GLFS_STAT_GFID)
+
+/*
+ * Attributes to be found in glfs_st_attributes and masked in
+ * glfs_st_attributes_mask.
+ *
+ * These give information about the features or the state of a file that might
+ * be of use to programs.
+ *
+ * NOTE: Lower order 32 bits are used to reflect statx(2) attribute bits. For
+ * Gluster specific attrs, use higher order 32 bits.
+ *
+ * NOTE: We do not support any file attributes or state as yet!
+ */
+#define GLFS_STAT_ATTR_RESERVED \
+ 0x8000000000000000U /* Reserved to denote future expansion */
+
+/* Extended file attribute structure.
+ *
+ * The caller passes a mask of what they're specifically interested in as a
+ * parameter to glfs_stat(). What glfs_stat() actually got will be indicated
+ * in glfs_st_mask upon return.
+ *
+ * For each bit in the mask argument:
+ *
+ * - if the datum is not supported:
+ *
+ * - the bit will be cleared, and
+ *
+ * - the datum value is undefined
+ *
+ * - otherwise, if explicitly requested:
+ *
+ * - the field will be filled in and the bit will be set;
+ *
+ * - otherwise, if not requested, but available in, it will be filled in
+ * anyway, and the bit will be set upon return;
+ *
+ * - otherwise the field and the bit will be cleared before returning.
+ *
+ */
+
+struct glfs_stat {
+ uint64_t glfs_st_mask; /* What results were written [uncond] */
+ uint64_t glfs_st_attributes; /* Flags conveying information about the file
+ [uncond] */
+ uint64_t glfs_st_attributes_mask; /* Mask to show what's supported in
+ st_attributes [ucond] */
+ struct timespec glfs_st_atime; /* Last access time */
+ struct timespec glfs_st_btime; /* File creation time */
+ struct timespec glfs_st_ctime; /* Last attribute change time */
+ struct timespec glfs_st_mtime; /* Last data modification time */
+ ino_t glfs_st_ino; /* Inode number */
+ off_t glfs_st_size; /* File size */
+ blkcnt_t glfs_st_blocks; /* Number of 512-byte blocks allocated */
+ uint32_t glfs_st_rdev_major; /* Device ID of special file [if bdev/cdev] */
+ uint32_t glfs_st_rdev_minor;
+ uint32_t glfs_st_dev_major; /* ID of device containing file [uncond] */
+ uint32_t glfs_st_dev_minor;
+ blksize_t glfs_st_blksize; /* Preferred general I/O size [uncond] */
+ nlink_t glfs_st_nlink; /* Number of hard links */
+ uid_t glfs_st_uid; /* User ID of owner */
+ gid_t glfs_st_gid; /* Group ID of owner */
+ mode_t glfs_st_mode; /* File mode */
+};
+
+#define GLFS_LEASE_ID_SIZE 16 /* 128bits */
+typedef char glfs_leaseid_t[GLFS_LEASE_ID_SIZE];
+
+/*
+ * PER THREAD IDENTITY MODIFIERS
+ *
+ * The following operations enable to set a per thread identity context
+ * for the glfs APIs to perform operations as. The calls here are kept as close
+ * to POSIX equivalents as possible.
+ *
+ * NOTES:
+ *
+ * - setgroups is a per thread setting, hence this is named as fsgroups to be
+ * close in naming to the fs(u/g)id APIs
+ * - Typical mode of operation is to set the IDs as required, with the
+ * supplementary groups being optionally set, make the glfs call and post the
+ * glfs operation set them back to eu/gid or uid/gid as appropriate to the
+ * caller
+ * - The groups once set, need to be unset by setting the size to 0 (in which
+ * case the list argument is a do not care)
+ * - In case of leases feature enables, setfsleaseid is used to set and reset
+ * leaseid before and after every I/O operation.
+ * - Once a process for a thread of operation choses to set the IDs, all glfs
+ * calls made from that thread would default to the IDs set for the thread.
+ * As a result use these APIs with care and ensure that the set IDs are
+ * reverted to global process defaults as required.
+ *
+ */
+int
+glfs_setfsuid(uid_t fsuid) __THROW GFAPI_PUBLIC(glfs_setfsuid, 3.4.2);
+
+int
+glfs_setfsgid(gid_t fsgid) __THROW GFAPI_PUBLIC(glfs_setfsgid, 3.4.2);
+
+int
+glfs_setfsgroups(size_t size, const gid_t *list) __THROW
+ GFAPI_PUBLIC(glfs_setfsgroups, 3.4.2);
+
+int
+glfs_setfsleaseid(glfs_leaseid_t leaseid) __THROW
+ GFAPI_PUBLIC(glfs_setfsleaseid, 4.0.0);
+
+/*
+ SYNOPSIS
+
+ glfs_open: Open a file.
+
+ DESCRIPTION
+
+ This function opens a file on a virtual mount.
+
+ PARAMETERS
+
+ @fs: The 'virtual mount' object to be initialized.
+
+ @path: Path of the file within the virtual mount.
+
+ @flags: Open flags. See open(2). O_CREAT is not supported.
+ Use glfs_creat() for creating files.
+
+ RETURN VALUES
+
+ NULL : Failure. @errno will be set with the type of failure.
+ Others : Pointer to the opened glfs_fd_t.
+
+ */
+
+glfs_fd_t *
+glfs_open(glfs_t *fs, const char *path, int flags) __THROW
+ GFAPI_PUBLIC(glfs_open, 3.4.0);
+
+/*
+ SYNOPSIS
+
+ glfs_creat: Create a file.
+
+ DESCRIPTION
+
+ This function opens a file on a virtual mount.
+
+ PARAMETERS
+
+ @fs: The 'virtual mount' object to be initialized.
+
+ @path: Path of the file within the virtual mount.
+
+ @mode: Permission of the file to be created.
+
+ @flags: Create flags. See open(2). O_EXCL is supported.
+
+ RETURN VALUES
+
+ NULL : Failure. @errno will be set with the type of failure.
+ Others : Pointer to the opened glfs_fd_t.
+
+ */
+
+glfs_fd_t *
+glfs_creat(glfs_t *fs, const char *path, int flags, mode_t mode) __THROW
+ GFAPI_PUBLIC(glfs_creat, 3.4.0);
+
+int
+glfs_close(glfs_fd_t *fd) __THROW GFAPI_PUBLIC(glfs_close, 3.4.0);
+
+glfs_t *
+glfs_from_glfd(glfs_fd_t *fd) __THROW GFAPI_PUBLIC(glfs_from_glfd, 3.4.0);
+
+int
+glfs_set_xlator_option(glfs_t *fs, const char *xlator, const char *key,
+ const char *value) __THROW
+ GFAPI_PUBLIC(glfs_set_xlator_option, 3.4.0);
+
+/*
+
+ glfs_io_cbk
+
+ The following is the function type definition of the callback
+ function pointer which has to be provided by the caller to the
+ *_async() versions of the IO calls.
+
+ The callback function is called on completion of the requested
+ IO, and the appropriate return value is returned in @ret.
+
+ In case of an error in completing the IO, @ret will be -1 and
+ @errno will be set with the appropriate error.
+
+ @ret will be same as the return value of the non _async() variant
+ of the particular call
+
+ @data is the same context pointer provided by the caller at the
+ time of issuing the async IO call. This can be used by the
+ caller to differentiate different instances of the async requests
+ in a common callback function.
+
+ @prestat and @poststat are allocated on the stack, that are auto destroyed
+ post the callback function returns.
+*/
+
+typedef void (*glfs_io_cbk)(glfs_fd_t *fd, ssize_t ret,
+ struct glfs_stat *prestat,
+ struct glfs_stat *poststat, void *data);
+
+// glfs_{read,write}[_async]
+
+ssize_t
+glfs_read(glfs_fd_t *fd, void *buf, size_t count, int flags) __THROW
+ GFAPI_PUBLIC(glfs_read, 3.4.0);
+
+ssize_t
+glfs_write(glfs_fd_t *fd, const void *buf, size_t count, int flags) __THROW
+ GFAPI_PUBLIC(glfs_write, 3.4.0);
+
+int
+glfs_read_async(glfs_fd_t *fd, void *buf, size_t count, int flags,
+ glfs_io_cbk fn, void *data) __THROW
+ GFAPI_PUBLIC(glfs_read_async, 6.0);
+
+int
+glfs_write_async(glfs_fd_t *fd, const void *buf, size_t count, int flags,
+ glfs_io_cbk fn, void *data) __THROW
+ GFAPI_PUBLIC(glfs_write_async, 6.0);
+
+// glfs_{read,write}v[_async]
+
+ssize_t
+glfs_readv(glfs_fd_t *fd, const struct iovec *iov, int iovcnt,
+ int flags) __THROW GFAPI_PUBLIC(glfs_readv, 3.4.0);
+
+ssize_t
+glfs_writev(glfs_fd_t *fd, const struct iovec *iov, int iovcnt,
+ int flags) __THROW GFAPI_PUBLIC(glfs_writev, 3.4.0);
+
+int
+glfs_readv_async(glfs_fd_t *fd, const struct iovec *iov, int count, int flags,
+ glfs_io_cbk fn, void *data) __THROW
+ GFAPI_PUBLIC(glfs_readv_async, 6.0);
+
+int
+glfs_writev_async(glfs_fd_t *fd, const struct iovec *iov, int count, int flags,
+ glfs_io_cbk fn, void *data) __THROW
+ GFAPI_PUBLIC(glfs_writev_async, 6.0);
+
+// glfs_p{read,write}[_async]
+
+ssize_t
+glfs_pread(glfs_fd_t *fd, void *buf, size_t count, off_t offset, int flags,
+ struct glfs_stat *poststat) __THROW GFAPI_PUBLIC(glfs_pread, 6.0);
+
+ssize_t
+glfs_pwrite(glfs_fd_t *fd, const void *buf, size_t count, off_t offset,
+ int flags, struct glfs_stat *prestat,
+ struct glfs_stat *poststat) __THROW GFAPI_PUBLIC(glfs_pwrite, 6.0);
+
+int
+glfs_pread_async(glfs_fd_t *fd, void *buf, size_t count, off_t offset,
+ int flags, glfs_io_cbk fn, void *data) __THROW
+ GFAPI_PUBLIC(glfs_pread_async, 6.0);
+
+int
+glfs_pwrite_async(glfs_fd_t *fd, const void *buf, int count, off_t offset,
+ int flags, glfs_io_cbk fn, void *data) __THROW
+ GFAPI_PUBLIC(glfs_pwrite_async, 6.0);
+
+// glfs_p{read,write}v[_async]
+
+ssize_t
+glfs_preadv(glfs_fd_t *fd, const struct iovec *iov, int iovcnt, off_t offset,
+ int flags) __THROW GFAPI_PUBLIC(glfs_preadv, 3.4.0);
+
+ssize_t
+glfs_pwritev(glfs_fd_t *fd, const struct iovec *iov, int iovcnt, off_t offset,
+ int flags) __THROW GFAPI_PUBLIC(glfs_pwritev, 3.4.0);
+
+int
+glfs_preadv_async(glfs_fd_t *fd, const struct iovec *iov, int count,
+ off_t offset, int flags, glfs_io_cbk fn, void *data) __THROW
+ GFAPI_PUBLIC(glfs_preadv_async, 6.0);
+
+int
+glfs_pwritev_async(glfs_fd_t *fd, const struct iovec *iov, int count,
+ off_t offset, int flags, glfs_io_cbk fn, void *data) __THROW
+ GFAPI_PUBLIC(glfs_pwritev_async, 6.0);
+
+off_t
+glfs_lseek(glfs_fd_t *fd, off_t offset, int whence) __THROW
+ GFAPI_PUBLIC(glfs_lseek, 3.4.0);
+
+ssize_t
+glfs_copy_file_range(struct glfs_fd *glfd_in, off64_t *off_in,
+ struct glfs_fd *glfd_out, off64_t *off_out, size_t len,
+ unsigned int flags, struct glfs_stat *statbuf,
+ struct glfs_stat *prestat,
+ struct glfs_stat *poststat) __THROW
+ GFAPI_PUBLIC(glfs_copy_file_range, 6.0);
+
+int
+glfs_truncate(glfs_t *fs, const char *path, off_t length) __THROW
+ GFAPI_PUBLIC(glfs_truncate, 3.7.15);
+
+int
+glfs_ftruncate(glfs_fd_t *fd, off_t length, struct glfs_stat *prestat,
+ struct glfs_stat *poststat) __THROW
+ GFAPI_PUBLIC(glfs_ftruncate, 6.0);
+
+int
+glfs_ftruncate_async(glfs_fd_t *fd, off_t length, glfs_io_cbk fn,
+ void *data) __THROW
+ GFAPI_PUBLIC(glfs_ftruncate_async, 6.0);
+
+int
+glfs_lstat(glfs_t *fs, const char *path, struct stat *buf) __THROW
+ GFAPI_PUBLIC(glfs_lstat, 3.4.0);
+
+int
+glfs_stat(glfs_t *fs, const char *path, struct stat *buf) __THROW
+ GFAPI_PUBLIC(glfs_stat, 3.4.0);
+
+int
+glfs_fstat(glfs_fd_t *fd, struct stat *buf) __THROW
+ GFAPI_PUBLIC(glfs_fstat, 3.4.0);
+
+int
+glfs_fsync(glfs_fd_t *fd, struct glfs_stat *prestat,
+ struct glfs_stat *poststat) __THROW GFAPI_PUBLIC(glfs_fsync, 6.0);
+
+int
+glfs_fsync_async(glfs_fd_t *fd, glfs_io_cbk fn, void *data) __THROW
+ GFAPI_PUBLIC(glfs_fsync_async, 6.0);
+
+int
+glfs_fdatasync(glfs_fd_t *fd, struct glfs_stat *prestat,
+ struct glfs_stat *poststat) __THROW
+ GFAPI_PUBLIC(glfs_fdatasync, 6.0);
+
+int
+glfs_fdatasync_async(glfs_fd_t *fd, glfs_io_cbk fn, void *data) __THROW
+ GFAPI_PUBLIC(glfs_fdatasync_async, 6.0);
+
+int
+glfs_access(glfs_t *fs, const char *path, int mode) __THROW
+ GFAPI_PUBLIC(glfs_access, 3.4.0);
+
+int
+glfs_symlink(glfs_t *fs, const char *oldpath, const char *newpath) __THROW
+ GFAPI_PUBLIC(glfs_symlink, 3.4.0);
+
+int
+glfs_readlink(glfs_t *fs, const char *path, char *buf, size_t bufsiz) __THROW
+ GFAPI_PUBLIC(glfs_readlink, 3.4.0);
+
+int
+glfs_mknod(glfs_t *fs, const char *path, mode_t mode, dev_t dev) __THROW
+ GFAPI_PUBLIC(glfs_mknod, 3.4.0);
+
+int
+glfs_mkdir(glfs_t *fs, const char *path, mode_t mode) __THROW
+ GFAPI_PUBLIC(glfs_mkdir, 3.4.0);
+
+int
+glfs_unlink(glfs_t *fs, const char *path) __THROW
+ GFAPI_PUBLIC(glfs_unlink, 3.4.0);
+
+int
+glfs_rmdir(glfs_t *fs, const char *path) __THROW
+ GFAPI_PUBLIC(glfs_rmdir, 3.4.0);
+
+int
+glfs_rename(glfs_t *fs, const char *oldpath, const char *newpath) __THROW
+ GFAPI_PUBLIC(glfs_rename, 3.4.0);
+
+int
+glfs_link(glfs_t *fs, const char *oldpath, const char *newpath) __THROW
+ GFAPI_PUBLIC(glfs_link, 3.4.0);
+
+glfs_fd_t *
+glfs_opendir(glfs_t *fs, const char *path) __THROW
+ GFAPI_PUBLIC(glfs_opendir, 3.4.0);
+
+/*
+ * @glfs_readdir_r and @glfs_readdirplus_r ARE thread safe AND re-entrant,
+ * but the interface has ambiguity about the size of @dirent to be allocated
+ * before calling the APIs. 512 byte buffer (for @dirent) is sufficient for
+ * all known systems which are tested againt glusterfs/gfapi, but may be
+ * insufficient in the future.
+ */
+
+int
+glfs_readdir_r(glfs_fd_t *fd, struct dirent *dirent,
+ struct dirent **result) __THROW
+ GFAPI_PUBLIC(glfs_readdir_r, 3.4.0);
+
+int
+glfs_readdirplus_r(glfs_fd_t *fd, struct stat *stat, struct dirent *dirent,
+ struct dirent **result) __THROW
+ GFAPI_PUBLIC(glfs_readdirplus_r, 3.4.0);
+
+/*
+ * @glfs_readdir and @glfs_readdirplus are NEITHER thread safe NOR re-entrant
+ * when called on the same directory handle. However they ARE thread safe
+ * AND re-entrant when called on different directory handles (which may be
+ * referring to the same directory too.)
+ */
+
+struct dirent *
+glfs_readdir(glfs_fd_t *fd) __THROW GFAPI_PUBLIC(glfs_readdir, 3.5.0);
+
+struct dirent *
+glfs_readdirplus(glfs_fd_t *fd, struct stat *stat) __THROW
+ GFAPI_PUBLIC(glfs_readdirplus, 3.5.0);
+
+long
+glfs_telldir(glfs_fd_t *fd) __THROW GFAPI_PUBLIC(glfs_telldir, 3.4.0);
+
+void
+glfs_seekdir(glfs_fd_t *fd, long offset) __THROW
+ GFAPI_PUBLIC(glfs_seekdir, 3.4.0);
+
+int
+glfs_closedir(glfs_fd_t *fd) __THROW GFAPI_PUBLIC(glfs_closedir, 3.4.0);
+
+int
+glfs_statvfs(glfs_t *fs, const char *path, struct statvfs *buf) __THROW
+ GFAPI_PUBLIC(glfs_statvfs, 3.4.0);
+
+int
+glfs_chmod(glfs_t *fs, const char *path, mode_t mode) __THROW
+ GFAPI_PUBLIC(glfs_chmod, 3.4.0);
+
+int
+glfs_fchmod(glfs_fd_t *fd, mode_t mode) __THROW
+ GFAPI_PUBLIC(glfs_fchmod, 3.4.0);
+
+int
+glfs_chown(glfs_t *fs, const char *path, uid_t uid, gid_t gid) __THROW
+ GFAPI_PUBLIC(glfs_chown, 3.4.0);
+
+int
+glfs_lchown(glfs_t *fs, const char *path, uid_t uid, gid_t gid) __THROW
+ GFAPI_PUBLIC(glfs_lchown, 3.4.0);
+
+int
+glfs_fchown(glfs_fd_t *fd, uid_t uid, gid_t gid) __THROW
+ GFAPI_PUBLIC(glfs_fchown, 3.4.0);
+
+int
+glfs_utimens(glfs_t *fs, const char *path,
+ const struct timespec times[2]) __THROW
+ GFAPI_PUBLIC(glfs_utimens, 3.4.0);
+
+int
+glfs_lutimens(glfs_t *fs, const char *path,
+ const struct timespec times[2]) __THROW
+ GFAPI_PUBLIC(glfs_lutimens, 3.4.0);
+
+int
+glfs_futimens(glfs_fd_t *fd, const struct timespec times[2]) __THROW
+ GFAPI_PUBLIC(glfs_futimens, 3.4.0);
+
+ssize_t
+glfs_getxattr(glfs_t *fs, const char *path, const char *name, void *value,
+ size_t size) __THROW GFAPI_PUBLIC(glfs_getxattr, 3.4.0);
+
+ssize_t
+glfs_lgetxattr(glfs_t *fs, const char *path, const char *name, void *value,
+ size_t size) __THROW GFAPI_PUBLIC(glfs_lgetxattr, 3.4.0);
+
+ssize_t
+glfs_fgetxattr(glfs_fd_t *fd, const char *name, void *value,
+ size_t size) __THROW GFAPI_PUBLIC(glfs_fgetxattr, 3.4.0);
+
+ssize_t
+glfs_listxattr(glfs_t *fs, const char *path, void *value, size_t size) __THROW
+ GFAPI_PUBLIC(glfs_listxattr, 3.4.0);
+
+ssize_t
+glfs_llistxattr(glfs_t *fs, const char *path, void *value, size_t size) __THROW
+ GFAPI_PUBLIC(glfs_llistxattr, 3.4.0);
+
+ssize_t
+glfs_flistxattr(glfs_fd_t *fd, void *value, size_t size) __THROW
+ GFAPI_PUBLIC(glfs_flistxattr, 3.4.0);
+
+int
+glfs_setxattr(glfs_t *fs, const char *path, const char *name, const void *value,
+ size_t size, int flags) __THROW
+ GFAPI_PUBLIC(glfs_setxattr, 3.4.0);
+
+int
+glfs_lsetxattr(glfs_t *fs, const char *path, const char *name,
+ const void *value, size_t size, int flags) __THROW
+ GFAPI_PUBLIC(glfs_lsetxattr, 3.4.0);
+
+int
+glfs_fsetxattr(glfs_fd_t *fd, const char *name, const void *value, size_t size,
+ int flags) __THROW GFAPI_PUBLIC(glfs_fsetxattr, 3.4.0);
+
+int
+glfs_removexattr(glfs_t *fs, const char *path, const char *name) __THROW
+ GFAPI_PUBLIC(glfs_removexattr, 3.4.0);
+
+int
+glfs_lremovexattr(glfs_t *fs, const char *path, const char *name) __THROW
+ GFAPI_PUBLIC(glfs_lremovexattr, 3.4.0);
+
+int
+glfs_fremovexattr(glfs_fd_t *fd, const char *name) __THROW
+ GFAPI_PUBLIC(glfs_fremovexattr, 3.4.0);
+
+int
+glfs_fallocate(glfs_fd_t *fd, int keep_size, off_t offset, size_t len) __THROW
+ GFAPI_PUBLIC(glfs_fallocate, 3.5.0);
+
+int
+glfs_discard(glfs_fd_t *fd, off_t offset, size_t len) __THROW
+ GFAPI_PUBLIC(glfs_discard, 3.5.0);
+
+int
+glfs_discard_async(glfs_fd_t *fd, off_t length, size_t lent, glfs_io_cbk fn,
+ void *data) __THROW GFAPI_PUBLIC(glfs_discard_async, 6.0);
+
+int
+glfs_zerofill(glfs_fd_t *fd, off_t offset, off_t len) __THROW
+ GFAPI_PUBLIC(glfs_zerofill, 3.5.0);
+
+int
+glfs_zerofill_async(glfs_fd_t *fd, off_t length, off_t len, glfs_io_cbk fn,
+ void *data) __THROW GFAPI_PUBLIC(glfs_zerofill_async, 6.0);
+
+char *
+glfs_getcwd(glfs_t *fs, char *buf, size_t size) __THROW
+ GFAPI_PUBLIC(glfs_getcwd, 3.4.0);
+
+int
+glfs_chdir(glfs_t *fs, const char *path) __THROW
+ GFAPI_PUBLIC(glfs_chdir, 3.4.0);
+
+int
+glfs_fchdir(glfs_fd_t *fd) __THROW GFAPI_PUBLIC(glfs_fchdir, 3.4.0);
+
+char *
+glfs_realpath(glfs_t *fs, const char *path, char *resolved_path) __THROW
+ GFAPI_PUBLIC(glfs_realpath, 3.7.17);
+
+/*
+ * @cmd and @flock are as specified in man fcntl(2).
+ */
+int
+glfs_posix_lock(glfs_fd_t *fd, int cmd, struct flock *flock) __THROW
+ GFAPI_PUBLIC(glfs_posix_lock, 3.4.0);
+
+/*
+ SYNOPSIS
+
+ glfs_file_lock: Request extended byte range lock on a file
+
+ DESCRIPTION
+
+ This function is capable of requesting either advisory or mandatory type
+ byte range locks on a file.
+
+ Note: To set a unique owner key for locks based on a particular file
+ descriptor, make use of glfs_fd_set_lkowner() api to do so before
+ requesting lock via this api. This owner key will be further consumed
+ by other incoming data modifying file operations via the same file
+ descriptor.
+
+ PARAMETERS
+
+ @fd: File descriptor
+
+ @cmd: As specified in man fcntl(2).
+
+ @flock: As specified in man fcntl(2).
+
+ @lk_mode: Required lock type from options available with the
+ enum glfs_lock_mode_t defined below.
+
+ RETURN VALUES
+
+ 0 : Success. Lock has been granted.
+ -1 : Failure. @errno will be set indicating the type of failure.
+
+ */
+
+/* Lock modes used by glfs_file_lock() */
+enum glfs_lock_mode { GLFS_LK_ADVISORY = 0, GLFS_LK_MANDATORY };
+typedef enum glfs_lock_mode glfs_lock_mode_t;
+
+int
+glfs_file_lock(glfs_fd_t *fd, int cmd, struct flock *flock,
+ glfs_lock_mode_t lk_mode) __THROW
+ GFAPI_PUBLIC(glfs_file_lock, 3.13.0);
+
+glfs_fd_t *
+glfs_dup(glfs_fd_t *fd) __THROW GFAPI_PUBLIC(glfs_dup, 3.4.0);
+
+void
+glfs_free(void *ptr) __THROW GFAPI_PUBLIC(glfs_free, 3.7.16);
+
+/*
+ * glfs_sysrq: send a system request to the @fs instance
+ *
+ * Different commands for @sysrq are possible, the defines for these are listed
+ * below the function definition.
+ *
+ * This function always returns success if the @sysrq is recognized. The return
+ * value does not way anythin about the result of the @sysrq execution. Not all
+ * @sysrq command will be able to return a success/failure status.
+ */
+int
+glfs_sysrq(glfs_t *fs, char sysrq) __THROW GFAPI_PUBLIC(glfs_sysrq, 3.10.0);
+
+#define GLFS_SYSRQ_HELP 'h' /* log a message with supported sysrq commands */
+#define GLFS_SYSRQ_STATEDUMP 's' /* create a statedump */
+
+/*
+ * Structure returned as part of xreaddirplus
+ */
+struct glfs_xreaddirp_stat;
+typedef struct glfs_xreaddirp_stat glfs_xreaddirp_stat_t;
+
+/* Request flags to be used in XREADDIRP operation */
+#define GFAPI_XREADDIRP_NULL \
+ 0x00000000 /* by default, no stat will be fetched */
+#define GFAPI_XREADDIRP_STAT 0x00000001 /* Get stat */
+#define GFAPI_XREADDIRP_HANDLE 0x00000002 /* Get object handle */
+
+/*
+ * This stat structure returned gets freed as part of glfs_free(xstat)
+ */
+struct stat *
+glfs_xreaddirplus_get_stat(glfs_xreaddirp_stat_t *xstat) __THROW
+ GFAPI_PUBLIC(glfs_xreaddirplus_get_stat, 3.11.0);
+
+/*
+ * SYNOPSIS
+ *
+ * glfs_xreaddirplus_r: Extended Readirplus operation
+ *
+ * DESCRIPTION
+ *
+ * This API does readdirplus operation, but along with stat it can fetch other
+ * extra information like object handles etc for each of the dirents returned
+ * based on requested flags. On success it returns the set of flags successfully
+ * processed.
+ *
+ * Note that there are chances that some of the requested information may not be
+ * available or returned (for example if reached EOD). Ensure to validate the
+ * returned value to determine what flags have been successfully processed
+ * & set.
+ *
+ * PARAMETERS
+ *
+ * INPUT:
+ * @glfd: GFAPI file descriptor of the directory
+ * @flags: Flags determining xreaddirp_stat requested
+ * Current available values are:
+ * GFAPI_XREADDIRP_NULL
+ * GFAPI_XREADDIRP_STAT
+ * GFAPI_XREADDIRP_HANDLE
+ * @ext: Dirent struture to copy the values to
+ * (though optional recommended to be allocated by application
+ * esp., in multi-threaded environment)
+ *
+ * OUTPUT:
+ * @res: to store the next dirent value. If NULL and return value is '0',
+ * it means it reached end of the directory.
+ * @xstat_p: Pointer to contain all the requested data returned
+ * for that dirent. Application should make use of glfs_free() API
+ * to free this pointer and the variables returned by
+ * glfs_xreaddirplus_get_*() APIs.
+ *
+ * RETURN VALUE:
+ * >=0: SUCCESS (value contains the flags successfully processed)
+ * -1: FAILURE
+ */
+int
+glfs_xreaddirplus_r(glfs_fd_t *glfd, uint32_t flags,
+ glfs_xreaddirp_stat_t **xstat_p, struct dirent *ext,
+ struct dirent **res) __THROW
+ GFAPI_PUBLIC(glfs_xreaddirplus_r, 3.11.0);
+
+#define GFAPI_MAX_LOCK_OWNER_LEN 255
+
+/*
+ *
+ * DESCRIPTION
+ *
+ * This API allows application to set lk_owner on a fd.
+ * A glfd can be associated with only single lk_owner. In case if there
+ * is need to set another lk_owner, applications can make use of
+ * 'glfs_dup' to get duplicate glfd and set new lk_owner on that second
+ * glfd.
+ *
+ * Also its not recommended to override or clear lk_owner value as the
+ * same shall be used to flush any outstanding locks while closing the fd.
+ *
+ * PARAMETERS
+ *
+ * INPUT:
+ * @glfd: GFAPI file descriptor
+ * @len: Size of lk_owner buffer. Max value can be GFAPI_MAX_LOCK_OWNER_LEN
+ * @data: lk_owner data buffer.
+ *
+ * OUTPUT:
+ * 0: SUCCESS
+ * -1: FAILURE
+ */
+int
+glfs_fd_set_lkowner(glfs_fd_t *glfd, void *data, int len) __THROW
+ GFAPI_PUBLIC(glfs_fd_set_lkowner, 3.10.7);
+
+/*
+ * Applications (currently NFS-Ganesha) can make use of this
+ * structure to read upcall notifications sent by server either
+ * by polling or registering a callback function.
+ *
+ * On success, applications need to check for 'reason' to decide
+ * if any upcall event is received.
+ *
+ * Currently supported upcall_events -
+ * GLFS_UPCALL_INODE_INVALIDATE -
+ * 'event_arg' - glfs_upcall_inode
+ *
+ * After processing the event, applications need to free 'event_arg' with
+ * glfs_free().
+ *
+ * Also similar to I/Os, the application should ideally stop polling
+ * or unregister upcall_cbk function before calling glfs_fini(..).
+ * Hence making an assumption that 'fs' & ctx structures cannot be
+ * freed while in this routine.
+ */
+struct glfs_upcall;
+typedef struct glfs_upcall glfs_upcall_t;
+
+glfs_t *
+glfs_upcall_get_fs(glfs_upcall_t *arg) __THROW
+ GFAPI_PUBLIC(glfs_upcall_get_fs, 3.7.16);
+
+enum glfs_upcall_reason {
+ GLFS_UPCALL_EVENT_NULL = 0,
+ GLFS_UPCALL_INODE_INVALIDATE, /* invalidate cache entry */
+ GLFS_UPCALL_RECALL_LEASE, /* recall lease */
+};
+typedef enum glfs_upcall_reason glfs_upcall_reason_t;
+
+glfs_upcall_reason_t
+glfs_upcall_get_reason(glfs_upcall_t *arg) __THROW
+ GFAPI_PUBLIC(glfs_upcall_get_reason, 3.7.16);
+
+/*
+ * Applications first need to make use of above API i.e,
+ * "glfs_upcall_get_reason" to determine which upcall event it has
+ * received. Post that below API - "glfs_upcall_get_event" should
+ * be used to get corresponding upcall event object.
+ *
+ * Below are the upcall_reason and corresponding upcall_event objects:
+ * ==========================================================
+ * glfs_upcall_reason - event_object
+ * ==========================================================
+ * GLFS_UPCALL_EVENT_NULL - NULL
+ * GLFS_UPCALL_INODE_INVALIDATE - struct glfs_upcall_inode
+ * GLFS_UPCALL_RECALL_LEASE - struct glfs_upcall_lease
+ *
+ * After processing upcall event, glfs_free() should be called on the
+ * glfs_upcall.
+ */
+void *
+glfs_upcall_get_event(glfs_upcall_t *arg) __THROW
+ GFAPI_PUBLIC(glfs_upcall_get_event, 3.7.16);
+
+/*
+ * SYNOPSIS
+ *
+ * glfs_upcall_cbk: Upcall callback definition
+ *
+ * This is function type definition of the callback function pointer
+ * which has to be provided by the caller while registering for any
+ * upcall events.
+ *
+ * This function is called whenever any upcall which the application
+ * has registered for is received from the server.
+ *
+ * @up_arg: Upcall structure whose contents need to be interpreted by
+ * making use of glfs_upcall_* helper routines.
+ *
+ * @data: The same context pointer provided by the caller at the time of
+ * registering of upcall events. This may be used by the caller for any
+ * of its internal use while processing upcalls.
+ */
+typedef void (*glfs_upcall_cbk)(glfs_upcall_t *up_arg, void *data);
+
+/*
+ * List of upcall events supported by gluster/gfapi
+ */
+#define GLFS_EVENT_INODE_INVALIDATE 0x00000001 /* invalidate cache entry */
+#define GLFS_EVENT_RECALL_LEASE 0x00000002 /* Recall lease */
+#define GLFS_EVENT_ANY 0xffffffff /* for all the above events */
+
+/*
+ * SYNOPSIS
+ *
+ * glfs_upcall_register: Register for upcall events
+ *
+ * DESCRIPTION
+ *
+ * This function is used to register for various upcall events application
+ * is interested in and the callback function to be invoked when such
+ * events are triggered.
+ *
+ * Multiple calls of this routine shall override cbk function. That means
+ * only one cbk function can be used for all the upcall events registered
+ * and that shall be the one last updated.
+ *
+ * PARAMETERS:
+ *
+ * INPUT:
+ * @fs: The 'virtual mount' object
+ *
+ * @event_list: List of upcall events to be registered.
+ * Current available values are:
+ * - GLFS_EVENT_INODE_INVALIDATE
+ * - GLFS_EVENT_RECALL_LEASE
+ *
+ * @cbk: The cbk routine to be invoked in case of any upcall received
+ * @data: Any opaque pointer provided by caller which shall be using while
+ * making cbk calls. This pointer may be used by caller for any of its
+ * internal use while processing upcalls. Can be NULL.
+ *
+ * RETURN VALUE:
+ * >0: SUCCESS (value contains the events successfully registered)
+ * -1: FAILURE
+ */
+int
+glfs_upcall_register(glfs_t *fs, uint32_t event_list, glfs_upcall_cbk cbk,
+ void *data) __THROW
+ GFAPI_PUBLIC(glfs_upcall_register, 3.13.0);
+
+/*
+ * SYNOPSIS
+ *
+ * glfs_upcall_unregister: Unregister for upcall events
+ *
+ * DESCRIPTION
+ *
+ * This function is used to unregister the upcall events application
+ * is not interested in. In case if the caller unregisters all the events
+ * it has registered for, it shall no more receive any upcall event.
+ *
+ * PARAMETERS:
+ *
+ * INPUT:
+ * @fs: The 'virtual mount' object
+ *
+ * @event_list: List of upcall events to be unregistered.
+ * Current available values are:
+ * - GLFS_EVENT_INODE_INVALIDATE
+ * - GLFS_EVENT_RECALL_LEASE
+ * RETURN VALUE:
+ * >0: SUCCESS (value contains the events successfully unregistered)
+ * -1: FAILURE
+ */
+int
+glfs_upcall_unregister(glfs_t *fs, uint32_t event_list) __THROW
+ GFAPI_PUBLIC(glfs_upcall_unregister, 3.13.0);
+
+/* Lease Types */
+enum glfs_lease_types {
+ GLFS_LEASE_NONE = 0,
+ GLFS_RD_LEASE = 1,
+ GLFS_RW_LEASE = 2,
+};
+typedef enum glfs_lease_types glfs_lease_types_t;
+
+/* Lease cmds */
+enum glfs_lease_cmds {
+ GLFS_GET_LEASE = 1,
+ GLFS_SET_LEASE = 2,
+ GLFS_UNLK_LEASE = 3,
+};
+typedef enum glfs_lease_cmds glfs_lease_cmds_t;
+
+struct glfs_lease {
+ glfs_lease_cmds_t cmd;
+ glfs_lease_types_t lease_type;
+ glfs_leaseid_t lease_id;
+ unsigned int lease_flags;
+};
+typedef struct glfs_lease glfs_lease_t;
+
+typedef void (*glfs_recall_cbk)(glfs_lease_t lease, void *data);
+
+/*
+ SYNOPSIS
+
+ glfs_lease: Takes a lease on a file.
+
+ DESCRIPTION
+
+ This function takes lease on an open file.
+
+ PARAMETERS
+
+ @glfd: The fd of the file on which lease should be taken,
+ this fd is returned by glfs_open/glfs_create.
+
+ @lease: Struct that defines the lease operation to be performed
+ on the file.
+ @lease.cmd - Can be one of the following values
+ GF_GET_LEASE: Get the lease type currently present on the file,
+ lease.lease_type will contain GF_RD_LEASE
+ or GF_RW_LEASE or 0 if no leases.
+ GF_SET_LEASE: Set the lease of given lease.lease_type on the file.
+ GF_UNLK_LEASE: Unlock the lease present on the given fd.
+ Note that the every lease request should have
+ a corresponding unlk_lease.
+
+ @lease.lease_type - Can be one of the following values
+ GF_RD_LEASE: Read lease on a file, shared lease.
+ GF_RW_LEASE: Read-Write lease on a file, exclusive lease.
+
+ @lease.lease_id - A unique identification of lease, 128bits.
+
+ @fn: This is the function that is invoked when the lease has to be recalled
+ @data: It is a cookie, this pointer is returned as a part of recall
+
+ fn and data field are stored as a part of glfs_fd, hence if there are multiple
+ glfs_lease calls, each of them updates the fn and data fields. glfs_recall_cbk
+ will be invoked with the last updated fn and data
+
+ RETURN VALUES
+ 0: Successful completion
+ <0: Failure. @errno will be set with the type of failure
+*/
+
+int
+glfs_lease(glfs_fd_t *glfd, glfs_lease_t *lease, glfs_recall_cbk fn,
+ void *data) __THROW GFAPI_PUBLIC(glfs_lease, 4.0.0);
+
+/*
+ SYNOPSIS
+
+ glfs_fsetattr: Function to set attributes.
+ glfs_setattr: Function to set attributes
+
+ DESCRIPTION
+
+ The functions are used to set attributes on the file.
+
+ PARAMETERS
+
+ @glfs_fsetattr
+
+ @glfd: The fd of the file for which the attributes are to be set,
+ this fd is returned by glfs_open/glfs_create.
+
+ @glfs_setattr
+
+ @fs: File object.
+
+ @path: The path of the file that is being operated on.
+
+ @follow: Flag used to resolve symlink.
+
+
+ @stat: Struct that has information about the file.
+
+ @valid: This is the mask bit, that accepts GFAPI_SET_ATTR* masks.
+ Refer glfs.h to see the mask definitions.
+
+ Both functions are similar in functionality, just that the
+ func setattr() uses file path whereas the func fsetattr()
+ uses the fd.
+
+ RETURN VALUES
+ 0: Successful completion
+ <0: Failure. @errno will be set with the type of failure
+
+ */
+
+int
+glfs_fsetattr(struct glfs_fd *glfd, struct glfs_stat *stat) __THROW
+ GFAPI_PUBLIC(glfs_fsetattr, 6.0);
+
+int
+glfs_setattr(struct glfs *fs, const char *path, struct glfs_stat *stat,
+ int follow) __THROW GFAPI_PUBLIC(glfs_setattr, 6.0);
+
+/*
+ SYNOPSIS
+
+ glfs_set_statedump_path: Function to set statedump path.
+
+ DESCRIPTION
+
+ This function is used to set statedump directory
+
+ PARAMETERS
+
+ @fs: The 'virtual mount' object to be configured with the volume
+ specification file.
+
+ @path: statedump path. Should be a directory. But the API won't fail if the
+ directory doesn't exist yet, as one may create it later.
+
+ RETURN VALUES
+
+ 0 : Success.
+ -1 : Failure. @errno will be set with the type of failure.
+
+ */
+
+int
+glfs_set_statedump_path(struct glfs *fs, const char *path) __THROW
+ GFAPI_PUBLIC(glfs_set_statedump_path, 7.0);
+
+__END_DECLS
+#endif /* !_GLFS_H */
diff --git a/argp-standalone/Makefile.am b/argp-standalone/Makefile.am
deleted file mode 100644
index 4775d4876aa..00000000000
--- a/argp-standalone/Makefile.am
+++ /dev/null
@@ -1,38 +0,0 @@
-# From glibc
-
-# Copyright (C) 1997, 2003, 2004 Free Software Foundation, Inc.
-# This file is part of the GNU C Library.
-
-# The GNU C Library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Library General Public License as
-# published by the Free Software Foundation; either version 2 of the
-# License, or (at your option) any later version.
-
-# The GNU C Library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Library General Public License for more details.
-
-# You should have received a copy of the GNU Library General Public
-# License along with the GNU C Library; see the file COPYING.LIB. If
-# not, write to the Free Software Foundation, Inc.,
-# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-AUTOMAKE_OPTIONS = foreign
-SUBDIRS = .
-
-LIBOBJS = @LIBOBJS@
-
-noinst_LIBRARIES = libargp.a
-
-noinst_HEADERS = argp.h argp-fmtstream.h argp-namefrob.h
-
-EXTRA_DIST = mempcpy.c strchrnul.c strndup.c strcasecmp.c vsnprintf.c autogen.sh
-
-# Leaves out argp-fs-xinl.c and argp-xinl.c
-libargp_a_SOURCES = argp-ba.c argp-eexst.c argp-fmtstream.c \
- argp-help.c argp-parse.c argp-pv.c \
- argp-pvh.c
-
-libargp_a_LIBADD = $(LIBOBJS)
-
-
diff --git a/argp-standalone/acinclude.m4 b/argp-standalone/acinclude.m4
deleted file mode 100644
index fb61e957dfa..00000000000
--- a/argp-standalone/acinclude.m4
+++ /dev/null
@@ -1,1084 +0,0 @@
-dnl Try to detect the type of the third arg to getsockname() et al
-AC_DEFUN([LSH_TYPE_SOCKLEN_T],
-[AH_TEMPLATE([socklen_t], [Length type used by getsockopt])
-AC_CACHE_CHECK([for socklen_t in sys/socket.h], ac_cv_type_socklen_t,
-[AC_EGREP_HEADER(socklen_t, sys/socket.h,
- [ac_cv_type_socklen_t=yes], [ac_cv_type_socklen_t=no])])
-if test $ac_cv_type_socklen_t = no; then
- AC_MSG_CHECKING(for AIX)
- AC_EGREP_CPP(yes, [
-#ifdef _AIX
- yes
-#endif
-],[
-AC_MSG_RESULT(yes)
-AC_DEFINE(socklen_t, size_t)
-],[
-AC_MSG_RESULT(no)
-AC_DEFINE(socklen_t, int)
-])
-fi
-])
-
-dnl Choose cc flags for compiling position independent code
-AC_DEFUN([LSH_CCPIC],
-[AC_MSG_CHECKING(CCPIC)
-AC_CACHE_VAL(lsh_cv_sys_ccpic,[
- if test -z "$CCPIC" ; then
- if test "$GCC" = yes ; then
- case `uname -sr` in
- BSD/OS*)
- case `uname -r` in
- 4.*) CCPIC="-fPIC";;
- *) CCPIC="";;
- esac
- ;;
- Darwin*)
- CCPIC="-fPIC"
- ;;
- SunOS\ 5.*)
- # Could also use -fPIC, if there are a large number of symbol reference
- CCPIC="-fPIC"
- ;;
- CYGWIN*)
- CCPIC=""
- ;;
- *)
- CCPIC="-fpic"
- ;;
- esac
- else
- case `uname -sr` in
- Darwin*)
- CCPIC="-fPIC"
- ;;
- IRIX*)
- CCPIC="-share"
- ;;
- hp*|HP*) CCPIC="+z"; ;;
- FreeBSD*) CCPIC="-fpic";;
- SCO_SV*) CCPIC="-KPIC -dy -Bdynamic";;
- UnixWare*|OpenUNIX*) CCPIC="-KPIC -dy -Bdynamic";;
- Solaris*) CCPIC="-KPIC -Bdynamic";;
- Windows_NT*) CCPIC="-shared" ;;
- esac
- fi
- fi
- OLD_CFLAGS="$CFLAGS"
- CFLAGS="$CFLAGS $CCPIC"
- AC_TRY_COMPILE([], [exit(0);],
- lsh_cv_sys_ccpic="$CCPIC", lsh_cv_sys_ccpic='')
- CFLAGS="$OLD_CFLAGS"
-])
-CCPIC="$lsh_cv_sys_ccpic"
-AC_MSG_RESULT($CCPIC)
-AC_SUBST([CCPIC])])
-
-dnl LSH_PATH_ADD(path-id, directory)
-AC_DEFUN([LSH_PATH_ADD],
-[AC_MSG_CHECKING($2)
-ac_exists=no
-if test -d "$2/." ; then
- ac_real_dir=`cd $2 && pwd`
- if test -n "$ac_real_dir" ; then
- ac_exists=yes
- for old in $1_REAL_DIRS ; do
- ac_found=no
- if test x$ac_real_dir = x$old ; then
- ac_found=yes;
- break;
- fi
- done
- if test $ac_found = yes ; then
- AC_MSG_RESULT(already added)
- else
- AC_MSG_RESULT(added)
- # LDFLAGS="$LDFLAGS -L $2"
- $1_REAL_DIRS="$ac_real_dir [$]$1_REAL_DIRS"
- $1_DIRS="$2 [$]$1_DIRS"
- fi
- fi
-fi
-if test $ac_exists = no ; then
- AC_MSG_RESULT(not found)
-fi
-])
-
-dnl LSH_RPATH_ADD(dir)
-AC_DEFUN([LSH_RPATH_ADD], [LSH_PATH_ADD(RPATH_CANDIDATE, $1)])
-
-dnl LSH_RPATH_INIT(candidates)
-AC_DEFUN([LSH_RPATH_INIT],
-[AC_MSG_CHECKING([for -R flag])
-RPATHFLAG=''
-case `uname -sr` in
- OSF1\ V4.*)
- RPATHFLAG="-rpath "
- ;;
- IRIX\ 6.*)
- RPATHFLAG="-rpath "
- ;;
- IRIX\ 5.*)
- RPATHFLAG="-rpath "
- ;;
- SunOS\ 5.*)
- if test "$TCC" = "yes"; then
- # tcc doesn't know about -R
- RPATHFLAG="-Wl,-R,"
- else
- RPATHFLAG=-R
- fi
- ;;
- Linux\ 2.*)
- RPATHFLAG="-Wl,-rpath,"
- ;;
- *)
- :
- ;;
-esac
-
-if test x$RPATHFLAG = x ; then
- AC_MSG_RESULT(none)
-else
- AC_MSG_RESULT([using $RPATHFLAG])
-fi
-
-RPATH_CANDIDATE_REAL_DIRS=''
-RPATH_CANDIDATE_DIRS=''
-
-AC_MSG_RESULT([Searching for libraries])
-
-for d in $1 ; do
- LSH_RPATH_ADD($d)
-done
-])
-
-dnl Try to execute a main program, and if it fails, try adding some
-dnl -R flag.
-dnl LSH_RPATH_FIX
-AC_DEFUN([LSH_RPATH_FIX],
-[if test $cross_compiling = no -a "x$RPATHFLAG" != x ; then
- ac_success=no
- AC_TRY_RUN([int main(int argc, char **argv) { return 0; }],
- ac_success=yes, ac_success=no, :)
-
- if test $ac_success = no ; then
- AC_MSG_CHECKING([Running simple test program failed. Trying -R flags])
-dnl echo RPATH_CANDIDATE_DIRS = $RPATH_CANDIDATE_DIRS
- ac_remaining_dirs=''
- ac_rpath_save_LDFLAGS="$LDFLAGS"
- for d in $RPATH_CANDIDATE_DIRS ; do
- if test $ac_success = yes ; then
- ac_remaining_dirs="$ac_remaining_dirs $d"
- else
- LDFLAGS="$RPATHFLAG$d $LDFLAGS"
-dnl echo LDFLAGS = $LDFLAGS
- AC_TRY_RUN([int main(int argc, char **argv) { return 0; }],
- [ac_success=yes
- ac_rpath_save_LDFLAGS="$LDFLAGS"
- AC_MSG_RESULT([adding $RPATHFLAG$d])
- ],
- [ac_remaining_dirs="$ac_remaining_dirs $d"], :)
- LDFLAGS="$ac_rpath_save_LDFLAGS"
- fi
- done
- RPATH_CANDIDATE_DIRS=$ac_remaining_dirs
- fi
- if test $ac_success = no ; then
- AC_MSG_RESULT(failed)
- fi
-fi
-])
-
-dnl Like AC_CHECK_LIB, but uses $KRB_LIBS rather than $LIBS.
-dnl LSH_CHECK_KRB_LIB(LIBRARY, FUNCTION, [, ACTION-IF-FOUND [,
-dnl ACTION-IF-NOT-FOUND [, OTHER-LIBRARIES]]])
-
-AC_DEFUN([LSH_CHECK_KRB_LIB],
-[AC_CHECK_LIB([$1], [$2],
- ifelse([$3], ,
- [[ac_tr_lib=HAVE_LIB`echo $1 | sed -e 's/[^a-zA-Z0-9_]/_/g' \
- -e 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/'`
- AC_DEFINE_UNQUOTED($ac_tr_lib)
- KRB_LIBS="-l$1 $KRB_LIBS"
- ]], [$3]),
- ifelse([$4], , , [$4
-])dnl
-, [$5 $KRB_LIBS])
-])
-
-dnl LSH_LIB_ARGP(ACTION-IF-OK, ACTION-IF-BAD)
-AC_DEFUN([LSH_LIB_ARGP],
-[ ac_argp_save_LIBS="$LIBS"
- ac_argp_save_LDFLAGS="$LDFLAGS"
- ac_argp_ok=no
- # First check if we can link with argp.
- AC_SEARCH_LIBS(argp_parse, argp,
- [ LSH_RPATH_FIX
- AC_CACHE_CHECK([for working argp],
- lsh_cv_lib_argp_works,
- [ AC_TRY_RUN(
-[#include <argp.h>
-#include <stdlib.h>
-
-static const struct argp_option
-options[] =
-{
- { NULL, 0, NULL, 0, NULL, 0 }
-};
-
-struct child_state
-{
- int n;
-};
-
-static error_t
-child_parser(int key, char *arg, struct argp_state *state)
-{
- struct child_state *input = (struct child_state *) state->input;
-
- switch(key)
- {
- default:
- return ARGP_ERR_UNKNOWN;
- case ARGP_KEY_END:
- if (!input->n)
- input->n = 1;
- break;
- }
- return 0;
-}
-
-const struct argp child_argp =
-{
- options,
- child_parser,
- NULL, NULL, NULL, NULL, NULL
-};
-
-struct main_state
-{
- struct child_state child;
- int m;
-};
-
-static error_t
-main_parser(int key, char *arg, struct argp_state *state)
-{
- struct main_state *input = (struct main_state *) state->input;
-
- switch(key)
- {
- default:
- return ARGP_ERR_UNKNOWN;
- case ARGP_KEY_INIT:
- state->child_inputs[0] = &input->child;
- break;
- case ARGP_KEY_END:
- if (!input->m)
- input->m = input->child.n;
-
- break;
- }
- return 0;
-}
-
-static const struct argp_child
-main_children[] =
-{
- { &child_argp, 0, "", 0 },
- { NULL, 0, NULL, 0}
-};
-
-static const struct argp
-main_argp =
-{ options, main_parser,
- NULL,
- NULL,
- main_children,
- NULL, NULL
-};
-
-int main(int argc, char **argv)
-{
- struct main_state input = { { 0 }, 0 };
- char *v[2] = { "foo", NULL };
-
- argp_parse(&main_argp, 1, v, 0, NULL, &input);
-
- if ( (input.m == 1) && (input.child.n == 1) )
- return 0;
- else
- return 1;
-}
-], lsh_cv_lib_argp_works=yes,
- lsh_cv_lib_argp_works=no,
- lsh_cv_lib_argp_works=no)])
-
- if test x$lsh_cv_lib_argp_works = xyes ; then
- ac_argp_ok=yes
- else
- # Reset link flags
- LIBS="$ac_argp_save_LIBS"
- LDFLAGS="$ac_argp_save_LDFLAGS"
- fi])
-
- if test x$ac_argp_ok = xyes ; then
- ifelse([$1],, true, [$1])
- else
- ifelse([$2],, true, [$2])
- fi
-])
-
-dnl LSH_GCC_ATTRIBUTES
-dnl Check for gcc's __attribute__ construction
-
-AC_DEFUN([LSH_GCC_ATTRIBUTES],
-[AC_CACHE_CHECK(for __attribute__,
- lsh_cv_c_attribute,
-[ AC_TRY_COMPILE([
-#include <stdlib.h>
-],
-[
-static void foo(void) __attribute__ ((noreturn));
-
-static void __attribute__ ((noreturn))
-foo(void)
-{
- exit(1);
-}
-],
-lsh_cv_c_attribute=yes,
-lsh_cv_c_attribute=no)])
-
-AH_TEMPLATE([HAVE_GCC_ATTRIBUTE], [Define if the compiler understands __attribute__])
-if test "x$lsh_cv_c_attribute" = "xyes"; then
- AC_DEFINE(HAVE_GCC_ATTRIBUTE)
-fi
-
-AH_BOTTOM(
-[#if __GNUC__ || HAVE_GCC_ATTRIBUTE
-# define NORETURN __attribute__ ((__noreturn__))
-# define PRINTF_STYLE(f, a) __attribute__ ((__format__ (__printf__, f, a)))
-# define UNUSED __attribute__ ((__unused__))
-#else
-# define NORETURN
-# define PRINTF_STYLE(f, a)
-# define UNUSED
-#endif
-])])
-
-AC_DEFUN([LSH_GCC_FUNCTION_NAME],
-[# Check for gcc's __FUNCTION__ variable
-AH_TEMPLATE([HAVE_GCC_FUNCTION],
- [Define if the compiler understands __FUNCTION__])
-AH_BOTTOM(
-[#if HAVE_GCC_FUNCTION
-# define FUNCTION_NAME __FUNCTION__
-#else
-# define FUNCTION_NAME "Unknown"
-#endif
-])
-
-AC_CACHE_CHECK(for __FUNCTION__,
- lsh_cv_c_FUNCTION,
- [ AC_TRY_COMPILE(,
- [ #if __GNUC__ == 3
- # error __FUNCTION__ is broken in gcc-3
- #endif
- void foo(void) { char c = __FUNCTION__[0]; } ],
- lsh_cv_c_FUNCTION=yes,
- lsh_cv_c_FUNCTION=no)])
-
-if test "x$lsh_cv_c_FUNCTION" = "xyes"; then
- AC_DEFINE(HAVE_GCC_FUNCTION)
-fi
-])
-
-# Check for alloca, and include the standard blurb in config.h
-AC_DEFUN([LSH_FUNC_ALLOCA],
-[AC_FUNC_ALLOCA
-AC_CHECK_HEADERS([malloc.h])
-AH_BOTTOM(
-[/* AIX requires this to be the first thing in the file. */
-#ifndef __GNUC__
-# if HAVE_ALLOCA_H
-# include <alloca.h>
-# else
-# ifdef _AIX
- #pragma alloca
-# else
-# ifndef alloca /* predefined by HP cc +Olibcalls */
-char *alloca ();
-# endif
-# endif
-# endif
-#else /* defined __GNUC__ */
-# if HAVE_ALLOCA_H
-# include <alloca.h>
-# endif
-#endif
-/* Needed for alloca on windows */
-#if HAVE_MALLOC_H
-# include <malloc.h>
-#endif
-])])
-
-AC_DEFUN([LSH_FUNC_STRERROR],
-[AC_CHECK_FUNCS(strerror)
-AH_BOTTOM(
-[#if HAVE_STRERROR
-#define STRERROR strerror
-#else
-#define STRERROR(x) (sys_errlist[x])
-#endif
-])])
-
-AC_DEFUN([LSH_FUNC_STRSIGNAL],
-[AC_CHECK_FUNCS(strsignal)
-AC_CHECK_DECLS([sys_siglist, _sys_siglist])
-AH_BOTTOM(
-[#if HAVE_STRSIGNAL
-# define STRSIGNAL strsignal
-#else /* !HAVE_STRSIGNAL */
-# if HAVE_DECL_SYS_SIGLIST
-# define STRSIGNAL(x) (sys_siglist[x])
-# else
-# if HAVE_DECL__SYS_SIGLIST
-# define STRSIGNAL(x) (_sys_siglist[x])
-# else
-# define STRSIGNAL(x) "Unknown signal"
-# if __GNUC__
-# warning Using dummy STRSIGNAL
-# endif
-# endif
-# endif
-#endif /* !HAVE_STRSIGNAL */
-])])
-
-dnl LSH_MAKE_CONDITIONAL(symbol, test)
-AC_DEFUN([LSH_MAKE_CONDITIONAL],
-[if $2 ; then
- IF_$1=''
- UNLESS_$1='# '
-else
- IF_$1='# '
- UNLESS_$1=''
-fi
-AC_SUBST(IF_$1)
-AC_SUBST(UNLESS_$1)])
-
-dnl LSH_DEPENDENCY_TRACKING
-
-dnl Defines compiler flags DEP_FLAGS to generate dependency
-dnl information, and DEP_PROCESS that is any shell commands needed for
-dnl massaging the dependency information further. Dependencies are
-dnl generated as a side effect of compilation. Dependency files
-dnl themselves are not treated as targets.
-
-AC_DEFUN([LSH_DEPENDENCY_TRACKING],
-[AC_ARG_ENABLE(dependency_tracking,
- AC_HELP_STRING([--disable-dependency-tracking],
- [Disable dependency tracking. Dependency tracking doesn't work with BSD make]),,
- [enable_dependency_tracking=yes])
-
-DEP_FLAGS=''
-DEP_PROCESS='true'
-if test x$enable_dependency_tracking = xyes ; then
- if test x$GCC = xyes ; then
- gcc_version=`gcc --version | head -1`
- case "$gcc_version" in
- 2.*|*[[!0-9.]]2.*)
- enable_dependency_tracking=no
- AC_MSG_WARN([Dependency tracking disabled, gcc-3.x is needed])
- ;;
- *)
- DEP_FLAGS='-MT $[]@ -MD -MP -MF $[]@.d'
- DEP_PROCESS='true'
- ;;
- esac
- else
- enable_dependency_tracking=no
- AC_MSG_WARN([Dependency tracking disabled])
- fi
-fi
-
-if test x$enable_dependency_tracking = xyes ; then
- DEP_INCLUDE='include '
-else
- DEP_INCLUDE='# '
-fi
-
-AC_SUBST([DEP_INCLUDE])
-AC_SUBST([DEP_FLAGS])
-AC_SUBST([DEP_PROCESS])])
-
-dnl @synopsis AX_CREATE_STDINT_H [( HEADER-TO-GENERATE [, HEADERS-TO-CHECK])]
-dnl
-dnl the "ISO C9X: 7.18 Integer types <stdint.h>" section requires the
-dnl existence of an include file <stdint.h> that defines a set of
-dnl typedefs, especially uint8_t,int32_t,uintptr_t.
-dnl Many older installations will not provide this file, but some will
-dnl have the very same definitions in <inttypes.h>. In other enviroments
-dnl we can use the inet-types in <sys/types.h> which would define the
-dnl typedefs int8_t and u_int8_t respectivly.
-dnl
-dnl This macros will create a local "_stdint.h" or the headerfile given as
-dnl an argument. In many cases that file will just "#include <stdint.h>"
-dnl or "#include <inttypes.h>", while in other environments it will provide
-dnl the set of basic 'stdint's definitions/typedefs:
-dnl int8_t,uint8_t,int16_t,uint16_t,int32_t,uint32_t,intptr_t,uintptr_t
-dnl int_least32_t.. int_fast32_t.. intmax_t
-dnl which may or may not rely on the definitions of other files,
-dnl or using the AC_CHECK_SIZEOF macro to determine the actual
-dnl sizeof each type.
-dnl
-dnl if your header files require the stdint-types you will want to create an
-dnl installable file mylib-int.h that all your other installable header
-dnl may include. So if you have a library package named "mylib", just use
-dnl AX_CREATE_STDINT_H(mylib-int.h)
-dnl in configure.ac and go to install that very header file in Makefile.am
-dnl along with the other headers (mylib.h) - and the mylib-specific headers
-dnl can simply use "#include <mylib-int.h>" to obtain the stdint-types.
-dnl
-dnl Remember, if the system already had a valid <stdint.h>, the generated
-dnl file will include it directly. No need for fuzzy HAVE_STDINT_H things...
-dnl
-dnl @, (status: used on new platforms) (see http://ac-archive.sf.net/gstdint/)
-dnl @version $Id: acinclude.m4,v 1.27 2004/11/23 21:27:35 nisse Exp $
-dnl @author Guido Draheim <guidod@gmx.de>
-
-AC_DEFUN([AX_CREATE_STDINT_H],
-[# ------ AX CREATE STDINT H -------------------------------------
-AC_MSG_CHECKING([for stdint types])
-ac_stdint_h=`echo ifelse($1, , _stdint.h, $1)`
-# try to shortcircuit - if the default include path of the compiler
-# can find a "stdint.h" header then we assume that all compilers can.
-AC_CACHE_VAL([ac_cv_header_stdint_t],[
-old_CXXFLAGS="$CXXFLAGS" ; CXXFLAGS=""
-old_CPPFLAGS="$CPPFLAGS" ; CPPFLAGS=""
-old_CFLAGS="$CFLAGS" ; CFLAGS=""
-AC_TRY_COMPILE([#include <stdint.h>],[int_least32_t v = 0;],
-[ac_cv_stdint_result="(assuming C99 compatible system)"
- ac_cv_header_stdint_t="stdint.h"; ],
-[ac_cv_header_stdint_t=""])
-CXXFLAGS="$old_CXXFLAGS"
-CPPFLAGS="$old_CPPFLAGS"
-CFLAGS="$old_CFLAGS" ])
-
-v="... $ac_cv_header_stdint_h"
-if test "$ac_stdint_h" = "stdint.h" ; then
- AC_MSG_RESULT([(are you sure you want them in ./stdint.h?)])
-elif test "$ac_stdint_h" = "inttypes.h" ; then
- AC_MSG_RESULT([(are you sure you want them in ./inttypes.h?)])
-elif test "_$ac_cv_header_stdint_t" = "_" ; then
- AC_MSG_RESULT([(putting them into $ac_stdint_h)$v])
-else
- ac_cv_header_stdint="$ac_cv_header_stdint_t"
- AC_MSG_RESULT([$ac_cv_header_stdint (shortcircuit)])
-fi
-
-if test "_$ac_cv_header_stdint_t" = "_" ; then # can not shortcircuit..
-
-dnl .....intro message done, now do a few system checks.....
-dnl btw, all CHECK_TYPE macros do automatically "DEFINE" a type, therefore
-dnl we use the autoconf implementation detail _AC CHECK_TYPE_NEW instead
-
-inttype_headers=`echo $2 | sed -e 's/,/ /g'`
-
-ac_cv_stdint_result="(no helpful system typedefs seen)"
-AC_CACHE_CHECK([for stdint uintptr_t], [ac_cv_header_stdint_x],[
- ac_cv_header_stdint_x="" # the 1997 typedefs (inttypes.h)
- AC_MSG_RESULT([(..)])
- for i in stdint.h inttypes.h sys/inttypes.h $inttype_headers ; do
- unset ac_cv_type_uintptr_t
- unset ac_cv_type_uint64_t
- _AC_CHECK_TYPE_NEW(uintptr_t,[ac_cv_header_stdint_x=$i],dnl
- continue,[#include <$i>])
- AC_CHECK_TYPE(uint64_t,[and64="/uint64_t"],[and64=""],[#include<$i>])
- ac_cv_stdint_result="(seen uintptr_t$and64 in $i)"
- break;
- done
- AC_MSG_CHECKING([for stdint uintptr_t])
- ])
-
-if test "_$ac_cv_header_stdint_x" = "_" ; then
-AC_CACHE_CHECK([for stdint uint32_t], [ac_cv_header_stdint_o],[
- ac_cv_header_stdint_o="" # the 1995 typedefs (sys/inttypes.h)
- AC_MSG_RESULT([(..)])
- for i in inttypes.h sys/inttypes.h stdint.h $inttype_headers ; do
- unset ac_cv_type_uint32_t
- unset ac_cv_type_uint64_t
- AC_CHECK_TYPE(uint32_t,[ac_cv_header_stdint_o=$i],dnl
- continue,[#include <$i>])
- AC_CHECK_TYPE(uint64_t,[and64="/uint64_t"],[and64=""],[#include<$i>])
- ac_cv_stdint_result="(seen uint32_t$and64 in $i)"
- break;
- done
- AC_MSG_CHECKING([for stdint uint32_t])
- ])
-fi
-
-if test "_$ac_cv_header_stdint_x" = "_" ; then
-if test "_$ac_cv_header_stdint_o" = "_" ; then
-AC_CACHE_CHECK([for stdint u_int32_t], [ac_cv_header_stdint_u],[
- ac_cv_header_stdint_u="" # the BSD typedefs (sys/types.h)
- AC_MSG_RESULT([(..)])
- for i in sys/types.h inttypes.h sys/inttypes.h $inttype_headers ; do
- unset ac_cv_type_u_int32_t
- unset ac_cv_type_u_int64_t
- AC_CHECK_TYPE(u_int32_t,[ac_cv_header_stdint_u=$i],dnl
- continue,[#include <$i>])
- AC_CHECK_TYPE(u_int64_t,[and64="/u_int64_t"],[and64=""],[#include<$i>])
- ac_cv_stdint_result="(seen u_int32_t$and64 in $i)"
- break;
- done
- AC_MSG_CHECKING([for stdint u_int32_t])
- ])
-fi fi
-
-dnl if there was no good C99 header file, do some typedef checks...
-if test "_$ac_cv_header_stdint_x" = "_" ; then
- AC_MSG_CHECKING([for stdint datatype model])
- AC_MSG_RESULT([(..)])
- AC_CHECK_SIZEOF(char)
- AC_CHECK_SIZEOF(short)
- AC_CHECK_SIZEOF(int)
- AC_CHECK_SIZEOF(long)
- AC_CHECK_SIZEOF(void*)
- ac_cv_stdint_char_model=""
- ac_cv_stdint_char_model="$ac_cv_stdint_char_model$ac_cv_sizeof_char"
- ac_cv_stdint_char_model="$ac_cv_stdint_char_model$ac_cv_sizeof_short"
- ac_cv_stdint_char_model="$ac_cv_stdint_char_model$ac_cv_sizeof_int"
- ac_cv_stdint_long_model=""
- ac_cv_stdint_long_model="$ac_cv_stdint_long_model$ac_cv_sizeof_int"
- ac_cv_stdint_long_model="$ac_cv_stdint_long_model$ac_cv_sizeof_long"
- ac_cv_stdint_long_model="$ac_cv_stdint_long_model$ac_cv_sizeof_voidp"
- name="$ac_cv_stdint_long_model"
- case "$ac_cv_stdint_char_model/$ac_cv_stdint_long_model" in
- 122/242) name="$name, IP16 (standard 16bit machine)" ;;
- 122/244) name="$name, LP32 (standard 32bit mac/win)" ;;
- 122/*) name="$name (unusual int16 model)" ;;
- 124/444) name="$name, ILP32 (standard 32bit unixish)" ;;
- 124/488) name="$name, LP64 (standard 64bit unixish)" ;;
- 124/448) name="$name, LLP64 (unusual 64bit unixish)" ;;
- 124/*) name="$name (unusual int32 model)" ;;
- 128/888) name="$name, ILP64 (unusual 64bit numeric)" ;;
- 128/*) name="$name (unusual int64 model)" ;;
- 222/*|444/*) name="$name (unusual dsptype)" ;;
- *) name="$name (very unusal model)" ;;
- esac
- AC_MSG_RESULT([combined for stdint datatype model... $name])
-fi
-
-if test "_$ac_cv_header_stdint_x" != "_" ; then
- ac_cv_header_stdint="$ac_cv_header_stdint_x"
-elif test "_$ac_cv_header_stdint_o" != "_" ; then
- ac_cv_header_stdint="$ac_cv_header_stdint_o"
-elif test "_$ac_cv_header_stdint_u" != "_" ; then
- ac_cv_header_stdint="$ac_cv_header_stdint_u"
-else
- ac_cv_header_stdint="stddef.h"
-fi
-
-AC_MSG_CHECKING([for extra inttypes in chosen header])
-AC_MSG_RESULT([($ac_cv_header_stdint)])
-dnl see if int_least and int_fast types are present in _this_ header.
-unset ac_cv_type_int_least32_t
-unset ac_cv_type_int_fast32_t
-AC_CHECK_TYPE(int_least32_t,,,[#include <$ac_cv_header_stdint>])
-AC_CHECK_TYPE(int_fast32_t,,,[#include<$ac_cv_header_stdint>])
-AC_CHECK_TYPE(intmax_t,,,[#include <$ac_cv_header_stdint>])
-
-fi # shortcircut to system "stdint.h"
-# ------------------ PREPARE VARIABLES ------------------------------
-if test "$GCC" = "yes" ; then
-ac_cv_stdint_message="using gnu compiler "`$CC --version | head -1`
-else
-ac_cv_stdint_message="using $CC"
-fi
-
-AC_MSG_RESULT([make use of $ac_cv_header_stdint in $ac_stdint_h dnl
-$ac_cv_stdint_result])
-
-# ----------------- DONE inttypes.h checks START header -------------
-AC_CONFIG_COMMANDS([$ac_stdint_h],[
-AC_MSG_NOTICE(creating $ac_stdint_h : $_ac_stdint_h)
-ac_stdint=$tmp/_stdint.h
-
-echo "#ifndef" $_ac_stdint_h >$ac_stdint
-echo "#define" $_ac_stdint_h "1" >>$ac_stdint
-echo "#ifndef" _GENERATED_STDINT_H >>$ac_stdint
-echo "#define" _GENERATED_STDINT_H '"'$PACKAGE $VERSION'"' >>$ac_stdint
-echo "/* generated $ac_cv_stdint_message */" >>$ac_stdint
-if test "_$ac_cv_header_stdint_t" != "_" ; then
-echo "#define _STDINT_HAVE_STDINT_H" "1" >>$ac_stdint
-fi
-
-cat >>$ac_stdint <<STDINT_EOF
-
-/* ................... shortcircuit part ........................... */
-
-#if defined HAVE_STDINT_H || defined _STDINT_HAVE_STDINT_H
-#include <stdint.h>
-#else
-#include <stddef.h>
-
-/* .................... configured part ............................ */
-
-STDINT_EOF
-
-echo "/* whether we have a C99 compatible stdint header file */" >>$ac_stdint
-if test "_$ac_cv_header_stdint_x" != "_" ; then
- ac_header="$ac_cv_header_stdint_x"
- echo "#define _STDINT_HEADER_INTPTR" '"'"$ac_header"'"' >>$ac_stdint
-else
- echo "/* #undef _STDINT_HEADER_INTPTR */" >>$ac_stdint
-fi
-
-echo "/* whether we have a C96 compatible inttypes header file */" >>$ac_stdint
-if test "_$ac_cv_header_stdint_o" != "_" ; then
- ac_header="$ac_cv_header_stdint_o"
- echo "#define _STDINT_HEADER_UINT32" '"'"$ac_header"'"' >>$ac_stdint
-else
- echo "/* #undef _STDINT_HEADER_UINT32 */" >>$ac_stdint
-fi
-
-echo "/* whether we have a BSD compatible inet types header */" >>$ac_stdint
-if test "_$ac_cv_header_stdint_u" != "_" ; then
- ac_header="$ac_cv_header_stdint_u"
- echo "#define _STDINT_HEADER_U_INT32" '"'"$ac_header"'"' >>$ac_stdint
-else
- echo "/* #undef _STDINT_HEADER_U_INT32 */" >>$ac_stdint
-fi
-
-echo "" >>$ac_stdint
-
-if test "_$ac_header" != "_" ; then if test "$ac_header" != "stddef.h" ; then
- echo "#include <$ac_header>" >>$ac_stdint
- echo "" >>$ac_stdint
-fi fi
-
-echo "/* which 64bit typedef has been found */" >>$ac_stdint
-if test "$ac_cv_type_uint64_t" = "yes" ; then
-echo "#define _STDINT_HAVE_UINT64_T" "1" >>$ac_stdint
-else
-echo "/* #undef _STDINT_HAVE_UINT64_T */" >>$ac_stdint
-fi
-if test "$ac_cv_type_u_int64_t" = "yes" ; then
-echo "#define _STDINT_HAVE_U_INT64_T" "1" >>$ac_stdint
-else
-echo "/* #undef _STDINT_HAVE_U_INT64_T */" >>$ac_stdint
-fi
-echo "" >>$ac_stdint
-
-echo "/* which type model has been detected */" >>$ac_stdint
-if test "_$ac_cv_stdint_char_model" != "_" ; then
-echo "#define _STDINT_CHAR_MODEL" "$ac_cv_stdint_char_model" >>$ac_stdint
-echo "#define _STDINT_LONG_MODEL" "$ac_cv_stdint_long_model" >>$ac_stdint
-else
-echo "/* #undef _STDINT_CHAR_MODEL // skipped */" >>$ac_stdint
-echo "/* #undef _STDINT_LONG_MODEL // skipped */" >>$ac_stdint
-fi
-echo "" >>$ac_stdint
-
-echo "/* whether int_least types were detected */" >>$ac_stdint
-if test "$ac_cv_type_int_least32_t" = "yes"; then
-echo "#define _STDINT_HAVE_INT_LEAST32_T" "1" >>$ac_stdint
-else
-echo "/* #undef _STDINT_HAVE_INT_LEAST32_T */" >>$ac_stdint
-fi
-echo "/* whether int_fast types were detected */" >>$ac_stdint
-if test "$ac_cv_type_int_fast32_t" = "yes"; then
-echo "#define _STDINT_HAVE_INT_FAST32_T" "1" >>$ac_stdint
-else
-echo "/* #undef _STDINT_HAVE_INT_FAST32_T */" >>$ac_stdint
-fi
-echo "/* whether intmax_t type was detected */" >>$ac_stdint
-if test "$ac_cv_type_intmax_t" = "yes"; then
-echo "#define _STDINT_HAVE_INTMAX_T" "1" >>$ac_stdint
-else
-echo "/* #undef _STDINT_HAVE_INTMAX_T */" >>$ac_stdint
-fi
-echo "" >>$ac_stdint
-
- cat >>$ac_stdint <<STDINT_EOF
-/* .................... detections part ............................ */
-
-/* whether we need to define bitspecific types from compiler base types */
-#ifndef _STDINT_HEADER_INTPTR
-#ifndef _STDINT_HEADER_UINT32
-#ifndef _STDINT_HEADER_U_INT32
-#define _STDINT_NEED_INT_MODEL_T
-#else
-#define _STDINT_HAVE_U_INT_TYPES
-#endif
-#endif
-#endif
-
-#ifdef _STDINT_HAVE_U_INT_TYPES
-#undef _STDINT_NEED_INT_MODEL_T
-#endif
-
-#ifdef _STDINT_CHAR_MODEL
-#if _STDINT_CHAR_MODEL+0 == 122 || _STDINT_CHAR_MODEL+0 == 124
-#ifndef _STDINT_BYTE_MODEL
-#define _STDINT_BYTE_MODEL 12
-#endif
-#endif
-#endif
-
-#ifndef _STDINT_HAVE_INT_LEAST32_T
-#define _STDINT_NEED_INT_LEAST_T
-#endif
-
-#ifndef _STDINT_HAVE_INT_FAST32_T
-#define _STDINT_NEED_INT_FAST_T
-#endif
-
-#ifndef _STDINT_HEADER_INTPTR
-#define _STDINT_NEED_INTPTR_T
-#ifndef _STDINT_HAVE_INTMAX_T
-#define _STDINT_NEED_INTMAX_T
-#endif
-#endif
-
-
-/* .................... definition part ............................ */
-
-/* some system headers have good uint64_t */
-#ifndef _HAVE_UINT64_T
-#if defined _STDINT_HAVE_UINT64_T || defined HAVE_UINT64_T
-#define _HAVE_UINT64_T
-#elif defined _STDINT_HAVE_U_INT64_T || defined HAVE_U_INT64_T
-#define _HAVE_UINT64_T
-typedef u_int64_t uint64_t;
-#endif
-#endif
-
-#ifndef _HAVE_UINT64_T
-/* .. here are some common heuristics using compiler runtime specifics */
-#if defined __STDC_VERSION__ && defined __STDC_VERSION__ >= 199901L
-#define _HAVE_UINT64_T
-typedef long long int64_t;
-typedef unsigned long long uint64_t;
-
-#elif !defined __STRICT_ANSI__
-#if defined _MSC_VER || defined __WATCOMC__ || defined __BORLANDC__
-#define _HAVE_UINT64_T
-typedef __int64 int64_t;
-typedef unsigned __int64 uint64_t;
-
-#elif defined __GNUC__ || defined __MWERKS__ || defined __ELF__
-/* note: all ELF-systems seem to have loff-support which needs 64-bit */
-#if !defined _NO_LONGLONG
-#define _HAVE_UINT64_T
-typedef long long int64_t;
-typedef unsigned long long uint64_t;
-#endif
-
-#elif defined __alpha || (defined __mips && defined _ABIN32)
-#if !defined _NO_LONGLONG
-typedef long int64_t;
-typedef unsigned long uint64_t;
-#endif
- /* compiler/cpu type to define int64_t */
-#endif
-#endif
-#endif
-
-#if defined _STDINT_HAVE_U_INT_TYPES
-/* int8_t int16_t int32_t defined by inet code, redeclare the u_intXX types */
-typedef u_int8_t uint8_t;
-typedef u_int16_t uint16_t;
-typedef u_int32_t uint32_t;
-
-/* glibc compatibility */
-#ifndef __int8_t_defined
-#define __int8_t_defined
-#endif
-#endif
-
-#ifdef _STDINT_NEED_INT_MODEL_T
-/* we must guess all the basic types. Apart from byte-adressable system, */
-/* there a few 32-bit-only dsp-systems that we guard with BYTE_MODEL 8-} */
-/* (btw, those nibble-addressable systems are way off, or so we assume) */
-
-dnl /* have a look at "64bit and data size neutrality" at */
-dnl /* http://unix.org/version2/whatsnew/login_64bit.html */
-dnl /* (the shorthand "ILP" types always have a "P" part) */
-
-#if defined _STDINT_BYTE_MODEL
-#if _STDINT_LONG_MODEL+0 == 242
-/* 2:4:2 = IP16 = a normal 16-bit system */
-typedef unsigned char uint8_t;
-typedef unsigned short uint16_t;
-typedef unsigned long uint32_t;
-#ifndef __int8_t_defined
-#define __int8_t_defined
-typedef char int8_t;
-typedef short int16_t;
-typedef long int32_t;
-#endif
-#elif _STDINT_LONG_MODEL+0 == 244 || _STDINT_LONG_MODEL == 444
-/* 2:4:4 = LP32 = a 32-bit system derived from a 16-bit */
-/* 4:4:4 = ILP32 = a normal 32-bit system */
-typedef unsigned char uint8_t;
-typedef unsigned short uint16_t;
-typedef unsigned int uint32_t;
-#ifndef __int8_t_defined
-#define __int8_t_defined
-typedef char int8_t;
-typedef short int16_t;
-typedef int int32_t;
-#endif
-#elif _STDINT_LONG_MODEL+0 == 484 || _STDINT_LONG_MODEL+0 == 488
-/* 4:8:4 = IP32 = a 32-bit system prepared for 64-bit */
-/* 4:8:8 = LP64 = a normal 64-bit system */
-typedef unsigned char uint8_t;
-typedef unsigned short uint16_t;
-typedef unsigned int uint32_t;
-#ifndef __int8_t_defined
-#define __int8_t_defined
-typedef char int8_t;
-typedef short int16_t;
-typedef int int32_t;
-#endif
-/* this system has a "long" of 64bit */
-#ifndef _HAVE_UINT64_T
-#define _HAVE_UINT64_T
-typedef unsigned long uint64_t;
-typedef long int64_t;
-#endif
-#elif _STDINT_LONG_MODEL+0 == 448
-/* LLP64 a 64-bit system derived from a 32-bit system */
-typedef unsigned char uint8_t;
-typedef unsigned short uint16_t;
-typedef unsigned int uint32_t;
-#ifndef __int8_t_defined
-#define __int8_t_defined
-typedef char int8_t;
-typedef short int16_t;
-typedef int int32_t;
-#endif
-/* assuming the system has a "long long" */
-#ifndef _HAVE_UINT64_T
-#define _HAVE_UINT64_T
-typedef unsigned long long uint64_t;
-typedef long long int64_t;
-#endif
-#else
-#define _STDINT_NO_INT32_T
-#endif
-#else
-#define _STDINT_NO_INT8_T
-#define _STDINT_NO_INT32_T
-#endif
-#endif
-
-/*
- * quote from SunOS-5.8 sys/inttypes.h:
- * Use at your own risk. As of February 1996, the committee is squarely
- * behind the fixed sized types; the "least" and "fast" types are still being
- * discussed. The probability that the "fast" types may be removed before
- * the standard is finalized is high enough that they are not currently
- * implemented.
- */
-
-#if defined _STDINT_NEED_INT_LEAST_T
-typedef int8_t int_least8_t;
-typedef int16_t int_least16_t;
-typedef int32_t int_least32_t;
-#ifdef _HAVE_UINT64_T
-typedef int64_t int_least64_t;
-#endif
-
-typedef uint8_t uint_least8_t;
-typedef uint16_t uint_least16_t;
-typedef uint32_t uint_least32_t;
-#ifdef _HAVE_UINT64_T
-typedef uint64_t uint_least64_t;
-#endif
- /* least types */
-#endif
-
-#if defined _STDINT_NEED_INT_FAST_T
-typedef int8_t int_fast8_t;
-typedef int int_fast16_t;
-typedef int32_t int_fast32_t;
-#ifdef _HAVE_UINT64_T
-typedef int64_t int_fast64_t;
-#endif
-
-typedef uint8_t uint_fast8_t;
-typedef unsigned uint_fast16_t;
-typedef uint32_t uint_fast32_t;
-#ifdef _HAVE_UINT64_T
-typedef uint64_t uint_fast64_t;
-#endif
- /* fast types */
-#endif
-
-#ifdef _STDINT_NEED_INTMAX_T
-#ifdef _HAVE_UINT64_T
-typedef int64_t intmax_t;
-typedef uint64_t uintmax_t;
-#else
-typedef long intmax_t;
-typedef unsigned long uintmax_t;
-#endif
-#endif
-
-#ifdef _STDINT_NEED_INTPTR_T
-#ifndef __intptr_t_defined
-#define __intptr_t_defined
-/* we encourage using "long" to store pointer values, never use "int" ! */
-#if _STDINT_LONG_MODEL+0 == 242 || _STDINT_LONG_MODEL+0 == 484
-typedef unsinged int uintptr_t;
-typedef int intptr_t;
-#elif _STDINT_LONG_MODEL+0 == 244 || _STDINT_LONG_MODEL+0 == 444
-typedef unsigned long uintptr_t;
-typedef long intptr_t;
-#elif _STDINT_LONG_MODEL+0 == 448 && defined _HAVE_UINT64_T
-typedef uint64_t uintptr_t;
-typedef int64_t intptr_t;
-#else /* matches typical system types ILP32 and LP64 - but not IP16 or LLP64 */
-typedef unsigned long uintptr_t;
-typedef long intptr_t;
-#endif
-#endif
-#endif
-
- /* shortcircuit*/
-#endif
- /* once */
-#endif
-#endif
-STDINT_EOF
- if cmp -s $ac_stdint_h $ac_stdint 2>/dev/null; then
- AC_MSG_NOTICE([$ac_stdint_h is unchanged])
- else
- ac_dir=`AS_DIRNAME(["$ac_stdint_h"])`
- AS_MKDIR_P(["$ac_dir"])
- rm -f $ac_stdint_h
- mv $ac_stdint $ac_stdint_h
- fi
-],[# variables for create stdint.h replacement
-PACKAGE="$PACKAGE"
-VERSION="$VERSION"
-ac_stdint_h="$ac_stdint_h"
-_ac_stdint_h=AS_TR_CPP(_$PACKAGE-$ac_stdint_h)
-ac_cv_stdint_message="$ac_cv_stdint_message"
-ac_cv_header_stdint_t="$ac_cv_header_stdint_t"
-ac_cv_header_stdint_x="$ac_cv_header_stdint_x"
-ac_cv_header_stdint_o="$ac_cv_header_stdint_o"
-ac_cv_header_stdint_u="$ac_cv_header_stdint_u"
-ac_cv_type_uint64_t="$ac_cv_type_uint64_t"
-ac_cv_type_u_int64_t="$ac_cv_type_u_int64_t"
-ac_cv_stdint_char_model="$ac_cv_stdint_char_model"
-ac_cv_stdint_long_model="$ac_cv_stdint_long_model"
-ac_cv_type_int_least32_t="$ac_cv_type_int_least32_t"
-ac_cv_type_int_fast32_t="$ac_cv_type_int_fast32_t"
-ac_cv_type_intmax_t="$ac_cv_type_intmax_t"
-])
-])
diff --git a/argp-standalone/argp-ba.c b/argp-standalone/argp-ba.c
deleted file mode 100644
index 0d3958c1151..00000000000
--- a/argp-standalone/argp-ba.c
+++ /dev/null
@@ -1,26 +0,0 @@
-/* Default definition for ARGP_PROGRAM_BUG_ADDRESS.
- Copyright (C) 1996, 1997, 1999, 2004 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Written by Miles Bader <miles@gnu.ai.mit.edu>.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Library General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
-
- You should have received a copy of the GNU Library General Public
- License along with the GNU C Library; see the file COPYING.LIB. If not,
- write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- Boston, MA 02111-1307, USA. */
-
-/* If set by the user program, it should point to string that is the
- bug-reporting address for the program. It will be printed by argp_help if
- the ARGP_HELP_BUG_ADDR flag is set (as it is by various standard help
- messages), embedded in a sentence that says something like `Report bugs to
- ADDR.'. */
-const char *argp_program_bug_address = 0;
diff --git a/argp-standalone/argp-eexst.c b/argp-standalone/argp-eexst.c
deleted file mode 100644
index 46b27847ad4..00000000000
--- a/argp-standalone/argp-eexst.c
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Default definition for ARGP_ERR_EXIT_STATUS
- Copyright (C) 1997 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Written by Miles Bader <miles@gnu.ai.mit.edu>.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Library General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
-
- You should have received a copy of the GNU Library General Public
- License along with the GNU C Library; see the file COPYING.LIB. If not,
- write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- Boston, MA 02111-1307, USA. */
-
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#if HAVE_SYSEXITS_H
-# include <sysexits.h>
-#else
-# define EX_USAGE 64
-#endif
-
-#include "argp.h"
-
-/* The exit status that argp will use when exiting due to a parsing error.
- If not defined or set by the user program, this defaults to EX_USAGE from
- <sysexits.h>. */
-error_t argp_err_exit_status = EX_USAGE;
diff --git a/argp-standalone/argp-fmtstream.c b/argp-standalone/argp-fmtstream.c
deleted file mode 100644
index 7f792854fc3..00000000000
--- a/argp-standalone/argp-fmtstream.c
+++ /dev/null
@@ -1,475 +0,0 @@
-/* Word-wrapping and line-truncating streams
- Copyright (C) 1997, 1998, 1999, 2001 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Written by Miles Bader <miles@gnu.ai.mit.edu>.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Library General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
-
- You should have received a copy of the GNU Library General Public
- License along with the GNU C Library; see the file COPYING.LIB. If not,
- write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- Boston, MA 02111-1307, USA. */
-
-/* This package emulates glibc `line_wrap_stream' semantics for systems that
- don't have that. */
-
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <stdlib.h>
-#include <string.h>
-#include <errno.h>
-#include <stdarg.h>
-#include <ctype.h>
-
-#include "argp-fmtstream.h"
-#include "argp-namefrob.h"
-
-#ifndef ARGP_FMTSTREAM_USE_LINEWRAP
-
-#ifndef isblank
-#define isblank(ch) ((ch)==' ' || (ch)=='\t')
-#endif
-
-#if defined _LIBC && defined USE_IN_LIBIO
-# include <libio/libioP.h>
-# define __vsnprintf(s, l, f, a) _IO_vsnprintf (s, l, f, a)
-#endif
-
-#define INIT_BUF_SIZE 200
-#define PRINTF_SIZE_GUESS 150
-
-/* Return an argp_fmtstream that outputs to STREAM, and which prefixes lines
- written on it with LMARGIN spaces and limits them to RMARGIN columns
- total. If WMARGIN >= 0, words that extend past RMARGIN are wrapped by
- replacing the whitespace before them with a newline and WMARGIN spaces.
- Otherwise, chars beyond RMARGIN are simply dropped until a newline.
- Returns NULL if there was an error. */
-argp_fmtstream_t
-__argp_make_fmtstream (FILE *stream,
- size_t lmargin, size_t rmargin, ssize_t wmargin)
-{
- argp_fmtstream_t fs = malloc (sizeof (struct argp_fmtstream));
- if (fs)
- {
- fs->stream = stream;
-
- fs->lmargin = lmargin;
- fs->rmargin = rmargin;
- fs->wmargin = wmargin;
- fs->point_col = 0;
- fs->point_offs = 0;
-
- fs->buf = malloc (INIT_BUF_SIZE);
- if (! fs->buf)
- {
- free (fs);
- fs = 0;
- }
- else
- {
- fs->p = fs->buf;
- fs->end = fs->buf + INIT_BUF_SIZE;
- }
- }
-
- return fs;
-}
-#ifdef weak_alias
-weak_alias (__argp_make_fmtstream, argp_make_fmtstream)
-#endif
-
-/* Flush FS to its stream, and free it (but don't close the stream). */
-void
-__argp_fmtstream_free (argp_fmtstream_t fs)
-{
- __argp_fmtstream_update (fs);
- if (fs->p > fs->buf)
- FWRITE_UNLOCKED (fs->buf, 1, fs->p - fs->buf, fs->stream);
- free (fs->buf);
- free (fs);
-}
-#ifdef weak_alias
-weak_alias (__argp_fmtstream_free, argp_fmtstream_free)
-#endif
-
-/* Process FS's buffer so that line wrapping is done from POINT_OFFS to the
- end of its buffer. This code is mostly from glibc stdio/linewrap.c. */
-void
-__argp_fmtstream_update (argp_fmtstream_t fs)
-{
- char *buf, *nl;
- size_t len;
-
- /* Scan the buffer for newlines. */
- buf = fs->buf + fs->point_offs;
- while (buf < fs->p)
- {
- size_t r;
-
- if (fs->point_col == 0 && fs->lmargin != 0)
- {
- /* We are starting a new line. Print spaces to the left margin. */
- const size_t pad = fs->lmargin;
- if (fs->p + pad < fs->end)
- {
- /* We can fit in them in the buffer by moving the
- buffer text up and filling in the beginning. */
- memmove (buf + pad, buf, fs->p - buf);
- fs->p += pad; /* Compensate for bigger buffer. */
- memset (buf, ' ', pad); /* Fill in the spaces. */
- buf += pad; /* Don't bother searching them. */
- }
- else
- {
- /* No buffer space for spaces. Must flush. */
- size_t i;
- for (i = 0; i < pad; i++)
- PUTC_UNLOCKED (' ', fs->stream);
- }
- fs->point_col = pad;
- }
-
- len = fs->p - buf;
- nl = memchr (buf, '\n', len);
-
- if (fs->point_col < 0)
- fs->point_col = 0;
-
- if (!nl)
- {
- /* The buffer ends in a partial line. */
-
- if (fs->point_col + len < fs->rmargin)
- {
- /* The remaining buffer text is a partial line and fits
- within the maximum line width. Advance point for the
- characters to be written and stop scanning. */
- fs->point_col += len;
- break;
- }
- else
- /* Set the end-of-line pointer for the code below to
- the end of the buffer. */
- nl = fs->p;
- }
- else if (fs->point_col + (nl - buf) < (ssize_t) fs->rmargin)
- {
- /* The buffer contains a full line that fits within the maximum
- line width. Reset point and scan the next line. */
- fs->point_col = 0;
- buf = nl + 1;
- continue;
- }
-
- /* This line is too long. */
- r = fs->rmargin - 1;
-
- if (fs->wmargin < 0)
- {
- /* Truncate the line by overwriting the excess with the
- newline and anything after it in the buffer. */
- if (nl < fs->p)
- {
- memmove (buf + (r - fs->point_col), nl, fs->p - nl);
- fs->p -= buf + (r - fs->point_col) - nl;
- /* Reset point for the next line and start scanning it. */
- fs->point_col = 0;
- buf += r + 1; /* Skip full line plus \n. */
- }
- else
- {
- /* The buffer ends with a partial line that is beyond the
- maximum line width. Advance point for the characters
- written, and discard those past the max from the buffer. */
- fs->point_col += len;
- fs->p -= fs->point_col - r;
- break;
- }
- }
- else
- {
- /* Do word wrap. Go to the column just past the maximum line
- width and scan back for the beginning of the word there.
- Then insert a line break. */
-
- char *p, *nextline;
- int i;
-
- p = buf + (r + 1 - fs->point_col);
- while (p >= buf && !isblank (*p))
- --p;
- nextline = p + 1; /* This will begin the next line. */
-
- if (nextline > buf)
- {
- /* Swallow separating blanks. */
- if (p >= buf)
- do
- --p;
- while (p >= buf && isblank (*p));
- nl = p + 1; /* The newline will replace the first blank. */
- }
- else
- {
- /* A single word that is greater than the maximum line width.
- Oh well. Put it on an overlong line by itself. */
- p = buf + (r + 1 - fs->point_col);
- /* Find the end of the long word. */
- do
- ++p;
- while (p < nl && !isblank (*p));
- if (p == nl)
- {
- /* It already ends a line. No fussing required. */
- fs->point_col = 0;
- buf = nl + 1;
- continue;
- }
- /* We will move the newline to replace the first blank. */
- nl = p;
- /* Swallow separating blanks. */
- do
- ++p;
- while (isblank (*p));
- /* The next line will start here. */
- nextline = p;
- }
-
- /* Note: There are a bunch of tests below for
- NEXTLINE == BUF + LEN + 1; this case is where NL happens to fall
- at the end of the buffer, and NEXTLINE is in fact empty (and so
- we need not be careful to maintain its contents). */
-
- if (nextline == buf + len + 1
- ? fs->end - nl < fs->wmargin + 1
- : nextline - (nl + 1) < fs->wmargin)
- {
- /* The margin needs more blanks than we removed. */
- if (fs->end - fs->p > fs->wmargin + 1)
- /* Make some space for them. */
- {
- size_t mv = fs->p - nextline;
- memmove (nl + 1 + fs->wmargin, nextline, mv);
- nextline = nl + 1 + fs->wmargin;
- len = nextline + mv - buf;
- *nl++ = '\n';
- }
- else
- /* Output the first line so we can use the space. */
- {
- if (nl > fs->buf)
- FWRITE_UNLOCKED (fs->buf, 1, nl - fs->buf, fs->stream);
- PUTC_UNLOCKED ('\n', fs->stream);
- len += buf - fs->buf;
- nl = buf = fs->buf;
- }
- }
- else
- /* We can fit the newline and blanks in before
- the next word. */
- *nl++ = '\n';
-
- if (nextline - nl >= fs->wmargin
- || (nextline == buf + len + 1 && fs->end - nextline >= fs->wmargin))
- /* Add blanks up to the wrap margin column. */
- for (i = 0; i < fs->wmargin; ++i)
- *nl++ = ' ';
- else
- for (i = 0; i < fs->wmargin; ++i)
- PUTC_UNLOCKED (' ', fs->stream);
-
- /* Copy the tail of the original buffer into the current buffer
- position. */
- if (nl < nextline)
- memmove (nl, nextline, buf + len - nextline);
- len -= nextline - buf;
-
- /* Continue the scan on the remaining lines in the buffer. */
- buf = nl;
-
- /* Restore bufp to include all the remaining text. */
- fs->p = nl + len;
-
- /* Reset the counter of what has been output this line. If wmargin
- is 0, we want to avoid the lmargin getting added, so we set
- point_col to a magic value of -1 in that case. */
- fs->point_col = fs->wmargin ? fs->wmargin : -1;
- }
- }
-
- /* Remember that we've scanned as far as the end of the buffer. */
- fs->point_offs = fs->p - fs->buf;
-}
-
-/* Ensure that FS has space for AMOUNT more bytes in its buffer, either by
- growing the buffer, or by flushing it. True is returned iff we succeed. */
-int
-__argp_fmtstream_ensure (struct argp_fmtstream *fs, size_t amount)
-{
- if ((size_t) (fs->end - fs->p) < amount)
- {
- ssize_t wrote;
-
- /* Flush FS's buffer. */
- __argp_fmtstream_update (fs);
-
- wrote = FWRITE_UNLOCKED (fs->buf, 1, fs->p - fs->buf, fs->stream);
- if (wrote == fs->p - fs->buf)
- {
- fs->p = fs->buf;
- fs->point_offs = 0;
- }
- else
- {
- fs->p -= wrote;
- fs->point_offs -= wrote;
- memmove (fs->buf, fs->buf + wrote, fs->p - fs->buf);
- return 0;
- }
-
- if ((size_t) (fs->end - fs->buf) < amount)
- /* Gotta grow the buffer. */
- {
- size_t new_size = fs->end - fs->buf + amount;
- char *new_buf = realloc (fs->buf, new_size);
-
- if (! new_buf)
- {
- __set_errno (ENOMEM);
- return 0;
- }
-
- fs->buf = new_buf;
- fs->end = new_buf + new_size;
- fs->p = fs->buf;
- }
- }
-
- return 1;
-}
-
-ssize_t
-__argp_fmtstream_printf (struct argp_fmtstream *fs, const char *fmt, ...)
-{
- size_t out;
- size_t avail;
- size_t size_guess = PRINTF_SIZE_GUESS; /* How much space to reserve. */
-
- do
- {
- va_list args;
-
- if (! __argp_fmtstream_ensure (fs, size_guess))
- return -1;
-
- va_start (args, fmt);
- avail = fs->end - fs->p;
- out = __vsnprintf (fs->p, avail, fmt, args);
- va_end (args);
- if (out >= avail)
- size_guess = out + 1;
- }
- while (out >= avail);
-
- fs->p += out;
-
- return out;
-}
-#ifdef weak_alias
-weak_alias (__argp_fmtstream_printf, argp_fmtstream_printf)
-#endif
-
-/* Duplicate the inline definitions in argp-fmtstream.h, for compilers
- * that don't do inlining. */
-size_t
-__argp_fmtstream_write (argp_fmtstream_t __fs,
- __const char *__str, size_t __len)
-{
- if (__fs->p + __len <= __fs->end || __argp_fmtstream_ensure (__fs, __len))
- {
- memcpy (__fs->p, __str, __len);
- __fs->p += __len;
- return __len;
- }
- else
- return 0;
-}
-
-int
-__argp_fmtstream_puts (argp_fmtstream_t __fs, __const char *__str)
-{
- size_t __len = strlen (__str);
- if (__len)
- {
- size_t __wrote = __argp_fmtstream_write (__fs, __str, __len);
- return __wrote == __len ? 0 : -1;
- }
- else
- return 0;
-}
-
-int
-__argp_fmtstream_putc (argp_fmtstream_t __fs, int __ch)
-{
- if (__fs->p < __fs->end || __argp_fmtstream_ensure (__fs, 1))
- return *__fs->p++ = __ch;
- else
- return EOF;
-}
-
-/* Set __FS's left margin to __LMARGIN and return the old value. */
-size_t
-__argp_fmtstream_set_lmargin (argp_fmtstream_t __fs, size_t __lmargin)
-{
- size_t __old;
- if ((size_t) (__fs->p - __fs->buf) > __fs->point_offs)
- __argp_fmtstream_update (__fs);
- __old = __fs->lmargin;
- __fs->lmargin = __lmargin;
- return __old;
-}
-
-/* Set __FS's right margin to __RMARGIN and return the old value. */
-size_t
-__argp_fmtstream_set_rmargin (argp_fmtstream_t __fs, size_t __rmargin)
-{
- size_t __old;
- if ((size_t) (__fs->p - __fs->buf) > __fs->point_offs)
- __argp_fmtstream_update (__fs);
- __old = __fs->rmargin;
- __fs->rmargin = __rmargin;
- return __old;
-}
-
-/* Set FS's wrap margin to __WMARGIN and return the old value. */
-size_t
-__argp_fmtstream_set_wmargin (argp_fmtstream_t __fs, size_t __wmargin)
-{
- size_t __old;
- if ((size_t) (__fs->p - __fs->buf) > __fs->point_offs)
- __argp_fmtstream_update (__fs);
- __old = __fs->wmargin;
- __fs->wmargin = __wmargin;
- return __old;
-}
-
-/* Return the column number of the current output point in __FS. */
-size_t
-__argp_fmtstream_point (argp_fmtstream_t __fs)
-{
- if ((size_t) (__fs->p - __fs->buf) > __fs->point_offs)
- __argp_fmtstream_update (__fs);
- return __fs->point_col >= 0 ? __fs->point_col : 0;
-}
-
-#endif /* !ARGP_FMTSTREAM_USE_LINEWRAP */
diff --git a/argp-standalone/argp-fmtstream.h b/argp-standalone/argp-fmtstream.h
deleted file mode 100644
index e797b119ebd..00000000000
--- a/argp-standalone/argp-fmtstream.h
+++ /dev/null
@@ -1,319 +0,0 @@
-/* Word-wrapping and line-truncating streams.
- Copyright (C) 1997, 2003 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Written by Miles Bader <miles@gnu.ai.mit.edu>.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Library General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
-
- You should have received a copy of the GNU Library General Public
- License along with the GNU C Library; see the file COPYING.LIB. If not,
- write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- Boston, MA 02111-1307, USA. */
-
-/* This package emulates glibc `line_wrap_stream' semantics for systems that
- don't have that. If the system does have it, it is just a wrapper for
- that. This header file is only used internally while compiling argp, and
- shouldn't be installed. */
-
-#ifndef _ARGP_FMTSTREAM_H
-#define _ARGP_FMTSTREAM_H
-
-#include <stdio.h>
-#include <string.h>
-
-#if HAVE_UNISTD_H
-# include <unistd.h>
-#else
-/* This is a kludge to make the code compile on windows. Perhaps it
- would be better to just replace ssize_t with int through out the
- code. */
-# define ssize_t int
-#endif
-
-#if _LIBC || (defined (HAVE_FLOCKFILE) && defined(HAVE_PUTC_UNLOCKED) \
- && defined (HAVE_FPUTS_UNLOCKED) && defined (HAVE_FWRITE_UNLOCKED) )
-/* Use locking funxtions */
-# define FLOCKFILE(f) flockfile(f)
-# define FUNLOCKFILE(f) funlockfile(f)
-# define PUTC_UNLOCKED(c, f) putc_unlocked((c), (f))
-# define FPUTS_UNLOCKED(s, f) fputs_unlocked((s), (f))
-# define FWRITE_UNLOCKED(b, s, n, f) fwrite_unlocked((b), (s), (n), (f))
-#else
-/* Disable stdio locking */
-# define FLOCKFILE(f)
-# define FUNLOCKFILE(f)
-# define PUTC_UNLOCKED(c, f) putc((c), (f))
-# define FPUTS_UNLOCKED(s, f) fputs((s), (f))
-# define FWRITE_UNLOCKED(b, s, n, f) fwrite((b), (s), (n), (f))
-#endif /* No thread safe i/o */
-
-#if (_LIBC - 0 && !defined (USE_IN_LIBIO)) \
- || (defined (__GNU_LIBRARY__) && defined (HAVE_LINEWRAP_H))
-/* line_wrap_stream is available, so use that. */
-#define ARGP_FMTSTREAM_USE_LINEWRAP
-#endif
-
-#ifdef ARGP_FMTSTREAM_USE_LINEWRAP
-/* Just be a simple wrapper for line_wrap_stream; the semantics are
- *slightly* different, as line_wrap_stream doesn't actually make a new
- object, it just modifies the given stream (reversibly) to do
- line-wrapping. Since we control who uses this code, it doesn't matter. */
-
-#include <linewrap.h>
-
-typedef FILE *argp_fmtstream_t;
-
-#define argp_make_fmtstream line_wrap_stream
-#define __argp_make_fmtstream line_wrap_stream
-#define argp_fmtstream_free line_unwrap_stream
-#define __argp_fmtstream_free line_unwrap_stream
-
-#define __argp_fmtstream_putc(fs,ch) putc(ch,fs)
-#define argp_fmtstream_putc(fs,ch) putc(ch,fs)
-#define __argp_fmtstream_puts(fs,str) fputs(str,fs)
-#define argp_fmtstream_puts(fs,str) fputs(str,fs)
-#define __argp_fmtstream_write(fs,str,len) fwrite(str,1,len,fs)
-#define argp_fmtstream_write(fs,str,len) fwrite(str,1,len,fs)
-#define __argp_fmtstream_printf fprintf
-#define argp_fmtstream_printf fprintf
-
-#define __argp_fmtstream_lmargin line_wrap_lmargin
-#define argp_fmtstream_lmargin line_wrap_lmargin
-#define __argp_fmtstream_set_lmargin line_wrap_set_lmargin
-#define argp_fmtstream_set_lmargin line_wrap_set_lmargin
-#define __argp_fmtstream_rmargin line_wrap_rmargin
-#define argp_fmtstream_rmargin line_wrap_rmargin
-#define __argp_fmtstream_set_rmargin line_wrap_set_rmargin
-#define argp_fmtstream_set_rmargin line_wrap_set_rmargin
-#define __argp_fmtstream_wmargin line_wrap_wmargin
-#define argp_fmtstream_wmargin line_wrap_wmargin
-#define __argp_fmtstream_set_wmargin line_wrap_set_wmargin
-#define argp_fmtstream_set_wmargin line_wrap_set_wmargin
-#define __argp_fmtstream_point line_wrap_point
-#define argp_fmtstream_point line_wrap_point
-
-#else /* !ARGP_FMTSTREAM_USE_LINEWRAP */
-/* Guess we have to define our own version. */
-
-#ifndef __const
-#define __const const
-#endif
-
-
-struct argp_fmtstream
-{
- FILE *stream; /* The stream we're outputting to. */
-
- size_t lmargin, rmargin; /* Left and right margins. */
- ssize_t wmargin; /* Margin to wrap to, or -1 to truncate. */
-
- /* Point in buffer to which we've processed for wrapping, but not output. */
- size_t point_offs;
- /* Output column at POINT_OFFS, or -1 meaning 0 but don't add lmargin. */
- ssize_t point_col;
-
- char *buf; /* Output buffer. */
- char *p; /* Current end of text in BUF. */
- char *end; /* Absolute end of BUF. */
-};
-
-typedef struct argp_fmtstream *argp_fmtstream_t;
-
-/* Return an argp_fmtstream that outputs to STREAM, and which prefixes lines
- written on it with LMARGIN spaces and limits them to RMARGIN columns
- total. If WMARGIN >= 0, words that extend past RMARGIN are wrapped by
- replacing the whitespace before them with a newline and WMARGIN spaces.
- Otherwise, chars beyond RMARGIN are simply dropped until a newline.
- Returns NULL if there was an error. */
-extern argp_fmtstream_t __argp_make_fmtstream (FILE *__stream,
- size_t __lmargin,
- size_t __rmargin,
- ssize_t __wmargin);
-extern argp_fmtstream_t argp_make_fmtstream (FILE *__stream,
- size_t __lmargin,
- size_t __rmargin,
- ssize_t __wmargin);
-
-/* Flush __FS to its stream, and free it (but don't close the stream). */
-extern void __argp_fmtstream_free (argp_fmtstream_t __fs);
-extern void argp_fmtstream_free (argp_fmtstream_t __fs);
-
-extern ssize_t __argp_fmtstream_printf (argp_fmtstream_t __fs,
- __const char *__fmt, ...)
- PRINTF_STYLE(2,3);
-extern ssize_t argp_fmtstream_printf (argp_fmtstream_t __fs,
- __const char *__fmt, ...)
- PRINTF_STYLE(2,3);
-
-extern int __argp_fmtstream_putc (argp_fmtstream_t __fs, int __ch);
-extern int argp_fmtstream_putc (argp_fmtstream_t __fs, int __ch);
-
-extern int __argp_fmtstream_puts (argp_fmtstream_t __fs, __const char *__str);
-extern int argp_fmtstream_puts (argp_fmtstream_t __fs, __const char *__str);
-
-extern size_t __argp_fmtstream_write (argp_fmtstream_t __fs,
- __const char *__str, size_t __len);
-extern size_t argp_fmtstream_write (argp_fmtstream_t __fs,
- __const char *__str, size_t __len);
-
-/* Access macros for various bits of state. */
-#define argp_fmtstream_lmargin(__fs) ((__fs)->lmargin)
-#define argp_fmtstream_rmargin(__fs) ((__fs)->rmargin)
-#define argp_fmtstream_wmargin(__fs) ((__fs)->wmargin)
-#define __argp_fmtstream_lmargin argp_fmtstream_lmargin
-#define __argp_fmtstream_rmargin argp_fmtstream_rmargin
-#define __argp_fmtstream_wmargin argp_fmtstream_wmargin
-
-/* Set __FS's left margin to LMARGIN and return the old value. */
-extern size_t argp_fmtstream_set_lmargin (argp_fmtstream_t __fs,
- size_t __lmargin);
-extern size_t __argp_fmtstream_set_lmargin (argp_fmtstream_t __fs,
- size_t __lmargin);
-
-/* Set __FS's right margin to __RMARGIN and return the old value. */
-extern size_t argp_fmtstream_set_rmargin (argp_fmtstream_t __fs,
- size_t __rmargin);
-extern size_t __argp_fmtstream_set_rmargin (argp_fmtstream_t __fs,
- size_t __rmargin);
-
-/* Set __FS's wrap margin to __WMARGIN and return the old value. */
-extern size_t argp_fmtstream_set_wmargin (argp_fmtstream_t __fs,
- size_t __wmargin);
-extern size_t __argp_fmtstream_set_wmargin (argp_fmtstream_t __fs,
- size_t __wmargin);
-
-/* Return the column number of the current output point in __FS. */
-extern size_t argp_fmtstream_point (argp_fmtstream_t __fs);
-extern size_t __argp_fmtstream_point (argp_fmtstream_t __fs);
-
-/* Internal routines. */
-extern void _argp_fmtstream_update (argp_fmtstream_t __fs);
-extern void __argp_fmtstream_update (argp_fmtstream_t __fs);
-extern int _argp_fmtstream_ensure (argp_fmtstream_t __fs, size_t __amount);
-extern int __argp_fmtstream_ensure (argp_fmtstream_t __fs, size_t __amount);
-
-#ifdef __OPTIMIZE__
-/* Inline versions of above routines. */
-
-#if !_LIBC
-#define __argp_fmtstream_putc argp_fmtstream_putc
-#define __argp_fmtstream_puts argp_fmtstream_puts
-#define __argp_fmtstream_write argp_fmtstream_write
-#define __argp_fmtstream_set_lmargin argp_fmtstream_set_lmargin
-#define __argp_fmtstream_set_rmargin argp_fmtstream_set_rmargin
-#define __argp_fmtstream_set_wmargin argp_fmtstream_set_wmargin
-#define __argp_fmtstream_point argp_fmtstream_point
-#define __argp_fmtstream_update _argp_fmtstream_update
-#define __argp_fmtstream_ensure _argp_fmtstream_ensure
-#endif
-
-#ifndef ARGP_FS_EI
-#define ARGP_FS_EI extern inline
-#endif
-
-ARGP_FS_EI size_t
-__argp_fmtstream_write (argp_fmtstream_t __fs,
- __const char *__str, size_t __len)
-{
- if (__fs->p + __len <= __fs->end || __argp_fmtstream_ensure (__fs, __len))
- {
- memcpy (__fs->p, __str, __len);
- __fs->p += __len;
- return __len;
- }
- else
- return 0;
-}
-
-ARGP_FS_EI int
-__argp_fmtstream_puts (argp_fmtstream_t __fs, __const char *__str)
-{
- size_t __len = strlen (__str);
- if (__len)
- {
- size_t __wrote = __argp_fmtstream_write (__fs, __str, __len);
- return __wrote == __len ? 0 : -1;
- }
- else
- return 0;
-}
-
-ARGP_FS_EI int
-__argp_fmtstream_putc (argp_fmtstream_t __fs, int __ch)
-{
- if (__fs->p < __fs->end || __argp_fmtstream_ensure (__fs, 1))
- return *__fs->p++ = __ch;
- else
- return EOF;
-}
-
-/* Set __FS's left margin to __LMARGIN and return the old value. */
-ARGP_FS_EI size_t
-__argp_fmtstream_set_lmargin (argp_fmtstream_t __fs, size_t __lmargin)
-{
- size_t __old;
- if ((size_t) (__fs->p - __fs->buf) > __fs->point_offs)
- __argp_fmtstream_update (__fs);
- __old = __fs->lmargin;
- __fs->lmargin = __lmargin;
- return __old;
-}
-
-/* Set __FS's right margin to __RMARGIN and return the old value. */
-ARGP_FS_EI size_t
-__argp_fmtstream_set_rmargin (argp_fmtstream_t __fs, size_t __rmargin)
-{
- size_t __old;
- if ((size_t) (__fs->p - __fs->buf) > __fs->point_offs)
- __argp_fmtstream_update (__fs);
- __old = __fs->rmargin;
- __fs->rmargin = __rmargin;
- return __old;
-}
-
-/* Set FS's wrap margin to __WMARGIN and return the old value. */
-ARGP_FS_EI size_t
-__argp_fmtstream_set_wmargin (argp_fmtstream_t __fs, size_t __wmargin)
-{
- size_t __old;
- if ((size_t) (__fs->p - __fs->buf) > __fs->point_offs)
- __argp_fmtstream_update (__fs);
- __old = __fs->wmargin;
- __fs->wmargin = __wmargin;
- return __old;
-}
-
-/* Return the column number of the current output point in __FS. */
-ARGP_FS_EI size_t
-__argp_fmtstream_point (argp_fmtstream_t __fs)
-{
- if ((size_t) (__fs->p - __fs->buf) > __fs->point_offs)
- __argp_fmtstream_update (__fs);
- return __fs->point_col >= 0 ? __fs->point_col : 0;
-}
-
-#if !_LIBC
-#undef __argp_fmtstream_putc
-#undef __argp_fmtstream_puts
-#undef __argp_fmtstream_write
-#undef __argp_fmtstream_set_lmargin
-#undef __argp_fmtstream_set_rmargin
-#undef __argp_fmtstream_set_wmargin
-#undef __argp_fmtstream_point
-#undef __argp_fmtstream_update
-#undef __argp_fmtstream_ensure
-#endif
-
-#endif /* __OPTIMIZE__ */
-
-#endif /* ARGP_FMTSTREAM_USE_LINEWRAP */
-
-#endif /* argp-fmtstream.h */
diff --git a/argp-standalone/argp-help.c b/argp-standalone/argp-help.c
deleted file mode 100644
index ced78c4cb26..00000000000
--- a/argp-standalone/argp-help.c
+++ /dev/null
@@ -1,1849 +0,0 @@
-/* Hierarchial argument parsing help output
- Copyright (C) 1995,96,97,98,99,2000, 2003 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Written by Miles Bader <miles@gnu.ai.mit.edu>.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Library General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
-
- You should have received a copy of the GNU Library General Public
- License along with the GNU C Library; see the file COPYING.LIB. If not,
- write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- Boston, MA 02111-1307, USA. */
-
-#ifndef _GNU_SOURCE
-# define _GNU_SOURCE 1
-#endif
-
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#if HAVE_ALLOCA_H
-#include <alloca.h>
-#endif
-
-#include <stddef.h>
-#include <stdlib.h>
-#include <string.h>
-#include <assert.h>
-#include <stdarg.h>
-#include <ctype.h>
-#if HAVE_MALLOC_H
-/* Needed, for alloca on windows */
-# include <malloc.h>
-#endif
-
-#ifndef _
-/* This is for other GNU distributions with internationalized messages. */
-# if defined HAVE_LIBINTL_H || defined _LIBC
-# include <libintl.h>
-# ifdef _LIBC
-# undef dgettext
-# define dgettext(domain, msgid) __dcgettext (domain, msgid, LC_MESSAGES)
-# endif
-# else
-# define dgettext(domain, msgid) (msgid)
-# endif
-#endif
-
-#include "argp.h"
-#include "argp-fmtstream.h"
-#include "argp-namefrob.h"
-
-
-#ifndef _LIBC
-# ifndef __strchrnul
-# define __strchrnul strchrnul
-# endif
-# ifndef __mempcpy
-# define __mempcpy mempcpy
-# endif
-/* We need to use a different name, as __strndup is likely a macro. */
-# define STRNDUP strndup
-# if HAVE_STRERROR
-# define STRERROR strerror
-# else
-# define STRERROR(x) (sys_errlist[x])
-# endif
-#else /* _LIBC */
-# define FLOCKFILE __flockfile
-# define FUNLOCKFILE __funlockfile
-# define STRNDUP __strndup
-# define STRERROR strerror
-#endif
-
-#if !_LIBC
-# if !HAVE_STRNDUP
-char *strndup (const char *s, size_t size);
-# endif /* !HAVE_STRNDUP */
-
-# if !HAVE_MEMPCPY
-void *mempcpy (void *to, const void *from, size_t size);
-# endif /* !HAVE_MEMPCPY */
-
-# if !HAVE_STRCHRNUL
-char *strchrnul(const char *s, int c);
-# endif /* !HAVE_STRCHRNUL */
-
-# if !HAVE_STRCASECMP
-int strcasecmp(const char *s1, const char *s2);
-#endif
-
-#endif /* !_LIBC */
-
-
-/* User-selectable (using an environment variable) formatting parameters.
-
- These may be specified in an environment variable called `ARGP_HELP_FMT',
- with a contents like: VAR1=VAL1,VAR2=VAL2,BOOLVAR2,no-BOOLVAR2
- Where VALn must be a positive integer. The list of variables is in the
- UPARAM_NAMES vector, below. */
-
-/* Default parameters. */
-#define DUP_ARGS 0 /* True if option argument can be duplicated. */
-#define DUP_ARGS_NOTE 1 /* True to print a note about duplicate args. */
-#define SHORT_OPT_COL 2 /* column in which short options start */
-#define LONG_OPT_COL 6 /* column in which long options start */
-#define DOC_OPT_COL 2 /* column in which doc options start */
-#define OPT_DOC_COL 29 /* column in which option text starts */
-#define HEADER_COL 1 /* column in which group headers are printed */
-#define USAGE_INDENT 12 /* indentation of wrapped usage lines */
-#define RMARGIN 79 /* right margin used for wrapping */
-
-/* User-selectable (using an environment variable) formatting parameters.
- They must all be of type `int' for the parsing code to work. */
-struct uparams
-{
- /* If true, arguments for an option are shown with both short and long
- options, even when a given option has both, e.g. `-x ARG, --longx=ARG'.
- If false, then if an option has both, the argument is only shown with
- the long one, e.g., `-x, --longx=ARG', and a message indicating that
- this really means both is printed below the options. */
- int dup_args;
-
- /* This is true if when DUP_ARGS is false, and some duplicate arguments have
- been suppressed, an explanatory message should be printed. */
- int dup_args_note;
-
- /* Various output columns. */
- int short_opt_col;
- int long_opt_col;
- int doc_opt_col;
- int opt_doc_col;
- int header_col;
- int usage_indent;
- int rmargin;
-
- int valid; /* True when the values in here are valid. */
-};
-
-/* This is a global variable, as user options are only ever read once. */
-static struct uparams uparams = {
- DUP_ARGS, DUP_ARGS_NOTE,
- SHORT_OPT_COL, LONG_OPT_COL, DOC_OPT_COL, OPT_DOC_COL, HEADER_COL,
- USAGE_INDENT, RMARGIN,
- 0
-};
-
-/* A particular uparam, and what the user name is. */
-struct uparam_name
-{
- const char *name; /* User name. */
- int is_bool; /* Whether it's `boolean'. */
- size_t uparams_offs; /* Location of the (int) field in UPARAMS. */
-};
-
-/* The name-field mappings we know about. */
-static const struct uparam_name uparam_names[] =
-{
- { "dup-args", 1, offsetof (struct uparams, dup_args) },
- { "dup-args-note", 1, offsetof (struct uparams, dup_args_note) },
- { "short-opt-col", 0, offsetof (struct uparams, short_opt_col) },
- { "long-opt-col", 0, offsetof (struct uparams, long_opt_col) },
- { "doc-opt-col", 0, offsetof (struct uparams, doc_opt_col) },
- { "opt-doc-col", 0, offsetof (struct uparams, opt_doc_col) },
- { "header-col", 0, offsetof (struct uparams, header_col) },
- { "usage-indent", 0, offsetof (struct uparams, usage_indent) },
- { "rmargin", 0, offsetof (struct uparams, rmargin) },
- { 0, 0, 0 }
-};
-
-/* Read user options from the environment, and fill in UPARAMS appropiately. */
-static void
-fill_in_uparams (const struct argp_state *state)
-{
-
- const char *var = getenv ("ARGP_HELP_FMT");
-
-#define SKIPWS(p) do { while (isspace (*p)) p++; } while (0);
-
- if (var)
- /* Parse var. */
- while (*var)
- {
- SKIPWS (var);
-
- if (isalpha (*var))
- {
- size_t var_len;
- const struct uparam_name *un;
- int unspec = 0, val = 0;
- const char *arg = var;
-
- while (isalnum (*arg) || *arg == '-' || *arg == '_')
- arg++;
- var_len = arg - var;
-
- SKIPWS (arg);
-
- if (*arg == '\0' || *arg == ',')
- unspec = 1;
- else if (*arg == '=')
- {
- arg++;
- SKIPWS (arg);
- }
-
- if (unspec)
- {
- if (var[0] == 'n' && var[1] == 'o' && var[2] == '-')
- {
- val = 0;
- var += 3;
- var_len -= 3;
- }
- else
- val = 1;
- }
- else if (isdigit (*arg))
- {
- val = atoi (arg);
- while (isdigit (*arg))
- arg++;
- SKIPWS (arg);
- }
-
- for (un = uparam_names; un->name; un++)
- if (strlen (un->name) == var_len
- && strncmp (var, un->name, var_len) == 0)
- {
- if (unspec && !un->is_bool)
- __argp_failure (state, 0, 0,
- dgettext (state->root_argp->argp_domain, "\
-%.*s: ARGP_HELP_FMT parameter requires a value"),
- (int) var_len, var);
- else
- *(int *)((char *)&uparams + un->uparams_offs) = val;
- break;
- }
- if (! un->name)
- __argp_failure (state, 0, 0,
- dgettext (state->root_argp->argp_domain, "\
-%.*s: Unknown ARGP_HELP_FMT parameter"),
- (int) var_len, var);
-
- var = arg;
- if (*var == ',')
- var++;
- }
- else if (*var)
- {
- __argp_failure (state, 0, 0,
- dgettext (state->root_argp->argp_domain,
- "Garbage in ARGP_HELP_FMT: %s"), var);
- break;
- }
- }
-}
-
-/* Returns true if OPT hasn't been marked invisible. Visibility only affects
- whether OPT is displayed or used in sorting, not option shadowing. */
-#define ovisible(opt) (! ((opt)->flags & OPTION_HIDDEN))
-
-/* Returns true if OPT is an alias for an earlier option. */
-#define oalias(opt) ((opt)->flags & OPTION_ALIAS)
-
-/* Returns true if OPT is an documentation-only entry. */
-#define odoc(opt) ((opt)->flags & OPTION_DOC)
-
-/* Returns true if OPT is the end-of-list marker for a list of options. */
-#define oend(opt) __option_is_end (opt)
-
-/* Returns true if OPT has a short option. */
-#define oshort(opt) __option_is_short (opt)
-
-/*
- The help format for a particular option is like:
-
- -xARG, -yARG, --long1=ARG, --long2=ARG Documentation...
-
- Where ARG will be omitted if there's no argument, for this option, or
- will be surrounded by "[" and "]" appropiately if the argument is
- optional. The documentation string is word-wrapped appropiately, and if
- the list of options is long enough, it will be started on a separate line.
- If there are no short options for a given option, the first long option is
- indented slighly in a way that's supposed to make most long options appear
- to be in a separate column.
-
- For example, the following output (from ps):
-
- -p PID, --pid=PID List the process PID
- --pgrp=PGRP List processes in the process group PGRP
- -P, -x, --no-parent Include processes without parents
- -Q, --all-fields Don't elide unusable fields (normally if there's
- some reason ps can't print a field for any
- process, it's removed from the output entirely)
- -r, --reverse, --gratuitously-long-reverse-option
- Reverse the order of any sort
- --session[=SID] Add the processes from the session SID (which
- defaults to the sid of the current process)
-
- Here are some more options:
- -f ZOT, --foonly=ZOT Glork a foonly
- -z, --zaza Snit a zar
-
- -?, --help Give this help list
- --usage Give a short usage message
- -V, --version Print program version
-
- The struct argp_option array for the above could look like:
-
- {
- {"pid", 'p', "PID", 0, "List the process PID"},
- {"pgrp", OPT_PGRP, "PGRP", 0, "List processes in the process group PGRP"},
- {"no-parent", 'P', 0, 0, "Include processes without parents"},
- {0, 'x', 0, OPTION_ALIAS},
- {"all-fields",'Q', 0, 0, "Don't elide unusable fields (normally"
- " if there's some reason ps can't"
- " print a field for any process, it's"
- " removed from the output entirely)" },
- {"reverse", 'r', 0, 0, "Reverse the order of any sort"},
- {"gratuitously-long-reverse-option", 0, 0, OPTION_ALIAS},
- {"session", OPT_SESS, "SID", OPTION_ARG_OPTIONAL,
- "Add the processes from the session"
- " SID (which defaults to the sid of"
- " the current process)" },
-
- {0,0,0,0, "Here are some more options:"},
- {"foonly", 'f', "ZOT", 0, "Glork a foonly"},
- {"zaza", 'z', 0, 0, "Snit a zar"},
-
- {0}
- }
-
- Note that the last three options are automatically supplied by argp_parse,
- unless you tell it not to with ARGP_NO_HELP.
-
-*/
-
-/* Returns true if CH occurs between BEG and END. */
-static int
-find_char (char ch, char *beg, char *end)
-{
- while (beg < end)
- if (*beg == ch)
- return 1;
- else
- beg++;
- return 0;
-}
-
-struct hol_cluster; /* fwd decl */
-
-struct hol_entry
-{
- /* First option. */
- const struct argp_option *opt;
- /* Number of options (including aliases). */
- unsigned num;
-
- /* A pointers into the HOL's short_options field, to the first short option
- letter for this entry. The order of the characters following this point
- corresponds to the order of options pointed to by OPT, and there are at
- most NUM. A short option recorded in a option following OPT is only
- valid if it occurs in the right place in SHORT_OPTIONS (otherwise it's
- probably been shadowed by some other entry). */
- char *short_options;
-
- /* Entries are sorted by their group first, in the order:
- 1, 2, ..., n, 0, -m, ..., -2, -1
- and then alphabetically within each group. The default is 0. */
- int group;
-
- /* The cluster of options this entry belongs to, or 0 if none. */
- struct hol_cluster *cluster;
-
- /* The argp from which this option came. */
- const struct argp *argp;
-};
-
-/* A cluster of entries to reflect the argp tree structure. */
-struct hol_cluster
-{
- /* A descriptive header printed before options in this cluster. */
- const char *header;
-
- /* Used to order clusters within the same group with the same parent,
- according to the order in which they occurred in the parent argp's child
- list. */
- int index;
-
- /* How to sort this cluster with respect to options and other clusters at the
- same depth (clusters always follow options in the same group). */
- int group;
-
- /* The cluster to which this cluster belongs, or 0 if it's at the base
- level. */
- struct hol_cluster *parent;
-
- /* The argp from which this cluster is (eventually) derived. */
- const struct argp *argp;
-
- /* The distance this cluster is from the root. */
- int depth;
-
- /* Clusters in a given hol are kept in a linked list, to make freeing them
- possible. */
- struct hol_cluster *next;
-};
-
-/* A list of options for help. */
-struct hol
-{
- /* An array of hol_entry's. */
- struct hol_entry *entries;
- /* The number of entries in this hol. If this field is zero, the others
- are undefined. */
- unsigned num_entries;
-
- /* A string containing all short options in this HOL. Each entry contains
- pointers into this string, so the order can't be messed with blindly. */
- char *short_options;
-
- /* Clusters of entries in this hol. */
- struct hol_cluster *clusters;
-};
-
-/* Create a struct hol from the options in ARGP. CLUSTER is the
- hol_cluster in which these entries occur, or 0, if at the root. */
-static struct hol *
-make_hol (const struct argp *argp, struct hol_cluster *cluster)
-{
- char *so;
- const struct argp_option *o;
- const struct argp_option *opts = argp->options;
- struct hol_entry *entry;
- unsigned num_short_options = 0;
- struct hol *hol = malloc (sizeof (struct hol));
-
- assert (hol);
-
- hol->num_entries = 0;
- hol->clusters = 0;
-
- if (opts)
- {
- int cur_group = 0;
-
- /* The first option must not be an alias. */
- assert (! oalias (opts));
-
- /* Calculate the space needed. */
- for (o = opts; ! oend (o); o++)
- {
- if (! oalias (o))
- hol->num_entries++;
- if (oshort (o))
- num_short_options++; /* This is an upper bound. */
- }
-
- hol->entries = malloc (sizeof (struct hol_entry) * hol->num_entries);
- hol->short_options = malloc (num_short_options + 1);
-
- assert (hol->entries && hol->short_options);
-
- /* Fill in the entries. */
- so = hol->short_options;
- for (o = opts, entry = hol->entries; ! oend (o); entry++)
- {
- entry->opt = o;
- entry->num = 0;
- entry->short_options = so;
- entry->group = cur_group =
- o->group
- ? o->group
- : ((!o->name && !o->key)
- ? cur_group + 1
- : cur_group);
- entry->cluster = cluster;
- entry->argp = argp;
-
- do
- {
- entry->num++;
- if (oshort (o) && ! find_char (o->key, hol->short_options, so))
- /* O has a valid short option which hasn't already been used.*/
- *so++ = o->key;
- o++;
- }
- while (! oend (o) && oalias (o));
- }
- *so = '\0'; /* null terminated so we can find the length */
- }
-
- return hol;
-}
-
-/* Add a new cluster to HOL, with the given GROUP and HEADER (taken from the
- associated argp child list entry), INDEX, and PARENT, and return a pointer
- to it. ARGP is the argp that this cluster results from. */
-static struct hol_cluster *
-hol_add_cluster (struct hol *hol, int group, const char *header, int index,
- struct hol_cluster *parent, const struct argp *argp)
-{
- struct hol_cluster *cl = malloc (sizeof (struct hol_cluster));
- if (cl)
- {
- cl->group = group;
- cl->header = header;
-
- cl->index = index;
- cl->parent = parent;
- cl->argp = argp;
- cl->depth = parent ? parent->depth + 1 : 0;
-
- cl->next = hol->clusters;
- hol->clusters = cl;
- }
- return cl;
-}
-
-/* Free HOL and any resources it uses. */
-static void
-hol_free (struct hol *hol)
-{
- struct hol_cluster *cl = hol->clusters;
-
- while (cl)
- {
- struct hol_cluster *next = cl->next;
- free (cl);
- cl = next;
- }
-
- if (hol->num_entries > 0)
- {
- free (hol->entries);
- free (hol->short_options);
- }
-
- free (hol);
-}
-
-static inline int
-hol_entry_short_iterate (const struct hol_entry *entry,
- int (*func)(const struct argp_option *opt,
- const struct argp_option *real,
- const char *domain, void *cookie),
- const char *domain, void *cookie)
-{
- unsigned nopts;
- int val = 0;
- const struct argp_option *opt, *real = entry->opt;
- char *so = entry->short_options;
-
- for (opt = real, nopts = entry->num; nopts > 0 && !val; opt++, nopts--)
- if (oshort (opt) && *so == opt->key)
- {
- if (!oalias (opt))
- real = opt;
- if (ovisible (opt))
- val = (*func)(opt, real, domain, cookie);
- so++;
- }
-
- return val;
-}
-
-static inline int
-hol_entry_long_iterate (const struct hol_entry *entry,
- int (*func)(const struct argp_option *opt,
- const struct argp_option *real,
- const char *domain, void *cookie),
- const char *domain, void *cookie)
-{
- unsigned nopts;
- int val = 0;
- const struct argp_option *opt, *real = entry->opt;
-
- for (opt = real, nopts = entry->num; nopts > 0 && !val; opt++, nopts--)
- if (opt->name)
- {
- if (!oalias (opt))
- real = opt;
- if (ovisible (opt))
- val = (*func)(opt, real, domain, cookie);
- }
-
- return val;
-}
-
-/* Iterator that returns true for the first short option. */
-static inline int
-until_short (const struct argp_option *opt, const struct argp_option *real UNUSED,
- const char *domain UNUSED, void *cookie UNUSED)
-{
- return oshort (opt) ? opt->key : 0;
-}
-
-/* Returns the first valid short option in ENTRY, or 0 if there is none. */
-static char
-hol_entry_first_short (const struct hol_entry *entry)
-{
- return hol_entry_short_iterate (entry, until_short,
- entry->argp->argp_domain, 0);
-}
-
-/* Returns the first valid long option in ENTRY, or 0 if there is none. */
-static const char *
-hol_entry_first_long (const struct hol_entry *entry)
-{
- const struct argp_option *opt;
- unsigned num;
- for (opt = entry->opt, num = entry->num; num > 0; opt++, num--)
- if (opt->name && ovisible (opt))
- return opt->name;
- return 0;
-}
-
-/* Returns the entry in HOL with the long option name NAME, or 0 if there is
- none. */
-static struct hol_entry *
-hol_find_entry (struct hol *hol, const char *name)
-{
- struct hol_entry *entry = hol->entries;
- unsigned num_entries = hol->num_entries;
-
- while (num_entries-- > 0)
- {
- const struct argp_option *opt = entry->opt;
- unsigned num_opts = entry->num;
-
- while (num_opts-- > 0)
- if (opt->name && ovisible (opt) && strcmp (opt->name, name) == 0)
- return entry;
- else
- opt++;
-
- entry++;
- }
-
- return 0;
-}
-
-/* If an entry with the long option NAME occurs in HOL, set it's special
- sort position to GROUP. */
-static void
-hol_set_group (struct hol *hol, const char *name, int group)
-{
- struct hol_entry *entry = hol_find_entry (hol, name);
- if (entry)
- entry->group = group;
-}
-
-/* Order by group: 0, 1, 2, ..., n, -m, ..., -2, -1.
- EQ is what to return if GROUP1 and GROUP2 are the same. */
-static int
-group_cmp (int group1, int group2, int eq)
-{
- if (group1 == group2)
- return eq;
- else if ((group1 < 0 && group2 < 0) || (group1 >= 0 && group2 >= 0))
- return group1 - group2;
- else
- return group2 - group1;
-}
-
-/* Compare clusters CL1 & CL2 by the order that they should appear in
- output. */
-static int
-hol_cluster_cmp (const struct hol_cluster *cl1, const struct hol_cluster *cl2)
-{
- /* If one cluster is deeper than the other, use its ancestor at the same
- level, so that finding the common ancestor is straightforward. */
- while (cl1->depth < cl2->depth)
- cl1 = cl1->parent;
- while (cl2->depth < cl1->depth)
- cl2 = cl2->parent;
-
- /* Now reduce both clusters to their ancestors at the point where both have
- a common parent; these can be directly compared. */
- while (cl1->parent != cl2->parent)
- cl1 = cl1->parent, cl2 = cl2->parent;
-
- return group_cmp (cl1->group, cl2->group, cl2->index - cl1->index);
-}
-
-/* Return the ancestor of CL that's just below the root (i.e., has a parent
- of 0). */
-static struct hol_cluster *
-hol_cluster_base (struct hol_cluster *cl)
-{
- while (cl->parent)
- cl = cl->parent;
- return cl;
-}
-
-/* Return true if CL1 is a child of CL2. */
-static int
-hol_cluster_is_child (const struct hol_cluster *cl1,
- const struct hol_cluster *cl2)
-{
- while (cl1 && cl1 != cl2)
- cl1 = cl1->parent;
- return cl1 == cl2;
-}
-
-/* Given the name of a OPTION_DOC option, modifies NAME to start at the tail
- that should be used for comparisons, and returns true iff it should be
- treated as a non-option. */
-
-/* FIXME: Can we use unsigned char * for the argument? */
-static int
-canon_doc_option (const char **name)
-{
- int non_opt;
- /* Skip initial whitespace. */
- while (isspace ( (unsigned char) **name))
- (*name)++;
- /* Decide whether this looks like an option (leading `-') or not. */
- non_opt = (**name != '-');
- /* Skip until part of name used for sorting. */
- while (**name && !isalnum ( (unsigned char) **name))
- (*name)++;
- return non_opt;
-}
-
-/* Order ENTRY1 & ENTRY2 by the order which they should appear in a help
- listing. */
-static int
-hol_entry_cmp (const struct hol_entry *entry1,
- const struct hol_entry *entry2)
-{
- /* The group numbers by which the entries should be ordered; if either is
- in a cluster, then this is just the group within the cluster. */
- int group1 = entry1->group, group2 = entry2->group;
-
- if (entry1->cluster != entry2->cluster)
- {
- /* The entries are not within the same cluster, so we can't compare them
- directly, we have to use the appropiate clustering level too. */
- if (! entry1->cluster)
- /* ENTRY1 is at the `base level', not in a cluster, so we have to
- compare it's group number with that of the base cluster in which
- ENTRY2 resides. Note that if they're in the same group, the
- clustered option always comes laster. */
- return group_cmp (group1, hol_cluster_base (entry2->cluster)->group, -1);
- else if (! entry2->cluster)
- /* Likewise, but ENTRY2's not in a cluster. */
- return group_cmp (hol_cluster_base (entry1->cluster)->group, group2, 1);
- else
- /* Both entries are in clusters, we can just compare the clusters. */
- return hol_cluster_cmp (entry1->cluster, entry2->cluster);
- }
- else if (group1 == group2)
- /* The entries are both in the same cluster and group, so compare them
- alphabetically. */
- {
- int short1 = hol_entry_first_short (entry1);
- int short2 = hol_entry_first_short (entry2);
- int doc1 = odoc (entry1->opt);
- int doc2 = odoc (entry2->opt);
- /* FIXME: Can we use unsigned char * instead? */
- const char *long1 = hol_entry_first_long (entry1);
- const char *long2 = hol_entry_first_long (entry2);
-
- if (doc1)
- doc1 = canon_doc_option (&long1);
- if (doc2)
- doc2 = canon_doc_option (&long2);
-
- if (doc1 != doc2)
- /* `documentation' options always follow normal options (or
- documentation options that *look* like normal options). */
- return doc1 - doc2;
- else if (!short1 && !short2 && long1 && long2)
- /* Only long options. */
- return __strcasecmp (long1, long2);
- else
- /* Compare short/short, long/short, short/long, using the first
- character of long options. Entries without *any* valid
- options (such as options with OPTION_HIDDEN set) will be put
- first, but as they're not displayed, it doesn't matter where
- they are. */
- {
- unsigned char first1 = short1 ? short1 : long1 ? *long1 : 0;
- unsigned char first2 = short2 ? short2 : long2 ? *long2 : 0;
-#ifdef _tolower
- int lower_cmp = _tolower (first1) - _tolower (first2);
-#else
- int lower_cmp = tolower (first1) - tolower (first2);
-#endif
- /* Compare ignoring case, except when the options are both the
- same letter, in which case lower-case always comes first. */
- /* NOTE: The subtraction below does the right thing
- even with eight-bit chars: first1 and first2 are
- converted to int *before* the subtraction. */
- return lower_cmp ? lower_cmp : first2 - first1;
- }
- }
- else
- /* Within the same cluster, but not the same group, so just compare
- groups. */
- return group_cmp (group1, group2, 0);
-}
-
-/* Version of hol_entry_cmp with correct signature for qsort. */
-static int
-hol_entry_qcmp (const void *entry1_v, const void *entry2_v)
-{
- return hol_entry_cmp (entry1_v, entry2_v);
-}
-
-/* Sort HOL by group and alphabetically by option name (with short options
- taking precedence over long). Since the sorting is for display purposes
- only, the shadowing of options isn't effected. */
-static void
-hol_sort (struct hol *hol)
-{
- if (hol->num_entries > 0)
- qsort (hol->entries, hol->num_entries, sizeof (struct hol_entry),
- hol_entry_qcmp);
-}
-
-/* Append MORE to HOL, destroying MORE in the process. Options in HOL shadow
- any in MORE with the same name. */
-static void
-hol_append (struct hol *hol, struct hol *more)
-{
- struct hol_cluster **cl_end = &hol->clusters;
-
- /* Steal MORE's cluster list, and add it to the end of HOL's. */
- while (*cl_end)
- cl_end = &(*cl_end)->next;
- *cl_end = more->clusters;
- more->clusters = 0;
-
- /* Merge entries. */
- if (more->num_entries > 0)
- {
- if (hol->num_entries == 0)
- {
- hol->num_entries = more->num_entries;
- hol->entries = more->entries;
- hol->short_options = more->short_options;
- more->num_entries = 0; /* Mark MORE's fields as invalid. */
- }
- else
- /* Append the entries in MORE to those in HOL, taking care to only add
- non-shadowed SHORT_OPTIONS values. */
- {
- unsigned left;
- char *so, *more_so;
- struct hol_entry *e;
- unsigned num_entries = hol->num_entries + more->num_entries;
- struct hol_entry *entries =
- malloc (num_entries * sizeof (struct hol_entry));
- unsigned hol_so_len = strlen (hol->short_options);
- char *short_options =
- malloc (hol_so_len + strlen (more->short_options) + 1);
-
- __mempcpy (__mempcpy (entries, hol->entries,
- hol->num_entries * sizeof (struct hol_entry)),
- more->entries,
- more->num_entries * sizeof (struct hol_entry));
-
- __mempcpy (short_options, hol->short_options, hol_so_len);
-
- /* Fix up the short options pointers from HOL. */
- for (e = entries, left = hol->num_entries; left > 0; e++, left--)
- e->short_options += (short_options - hol->short_options);
-
- /* Now add the short options from MORE, fixing up its entries
- too. */
- so = short_options + hol_so_len;
- more_so = more->short_options;
- for (left = more->num_entries; left > 0; e++, left--)
- {
- int opts_left;
- const struct argp_option *opt;
-
- e->short_options = so;
-
- for (opts_left = e->num, opt = e->opt; opts_left; opt++, opts_left--)
- {
- int ch = *more_so;
- if (oshort (opt) && ch == opt->key)
- /* The next short option in MORE_SO, CH, is from OPT. */
- {
- if (! find_char (ch, short_options,
- short_options + hol_so_len))
- /* The short option CH isn't shadowed by HOL's options,
- so add it to the sum. */
- *so++ = ch;
- more_so++;
- }
- }
- }
-
- *so = '\0';
-
- free (hol->entries);
- free (hol->short_options);
-
- hol->entries = entries;
- hol->num_entries = num_entries;
- hol->short_options = short_options;
- }
- }
-
- hol_free (more);
-}
-
-/* Inserts enough spaces to make sure STREAM is at column COL. */
-static void
-indent_to (argp_fmtstream_t stream, unsigned col)
-{
- int needed = col - __argp_fmtstream_point (stream);
- while (needed-- > 0)
- __argp_fmtstream_putc (stream, ' ');
-}
-
-/* Output to STREAM either a space, or a newline if there isn't room for at
- least ENSURE characters before the right margin. */
-static void
-space (argp_fmtstream_t stream, size_t ensure)
-{
- if (__argp_fmtstream_point (stream) + ensure
- >= __argp_fmtstream_rmargin (stream))
- __argp_fmtstream_putc (stream, '\n');
- else
- __argp_fmtstream_putc (stream, ' ');
-}
-
-/* If the option REAL has an argument, we print it in using the printf
- format REQ_FMT or OPT_FMT depending on whether it's a required or
- optional argument. */
-static void
-arg (const struct argp_option *real, const char *req_fmt, const char *opt_fmt,
- const char *domain UNUSED, argp_fmtstream_t stream)
-{
- if (real->arg)
- {
- if (real->flags & OPTION_ARG_OPTIONAL)
- __argp_fmtstream_printf (stream, opt_fmt,
- dgettext (domain, real->arg));
- else
- __argp_fmtstream_printf (stream, req_fmt,
- dgettext (domain, real->arg));
- }
-}
-
-/* Helper functions for hol_entry_help. */
-
-/* State used during the execution of hol_help. */
-struct hol_help_state
-{
- /* PREV_ENTRY should contain the previous entry printed, or 0. */
- struct hol_entry *prev_entry;
-
- /* If an entry is in a different group from the previous one, and SEP_GROUPS
- is true, then a blank line will be printed before any output. */
- int sep_groups;
-
- /* True if a duplicate option argument was suppressed (only ever set if
- UPARAMS.dup_args is false). */
- int suppressed_dup_arg;
-};
-
-/* Some state used while printing a help entry (used to communicate with
- helper functions). See the doc for hol_entry_help for more info, as most
- of the fields are copied from its arguments. */
-struct pentry_state
-{
- const struct hol_entry *entry;
- argp_fmtstream_t stream;
- struct hol_help_state *hhstate;
-
- /* True if nothing's been printed so far. */
- int first;
-
- /* If non-zero, the state that was used to print this help. */
- const struct argp_state *state;
-};
-
-/* If a user doc filter should be applied to DOC, do so. */
-static const char *
-filter_doc (const char *doc, int key, const struct argp *argp,
- const struct argp_state *state)
-{
- if (argp->help_filter)
- /* We must apply a user filter to this output. */
- {
- void *input = __argp_input (argp, state);
- return (*argp->help_filter) (key, doc, input);
- }
- else
- /* No filter. */
- return doc;
-}
-
-/* Prints STR as a header line, with the margin lines set appropiately, and
- notes the fact that groups should be separated with a blank line. ARGP is
- the argp that should dictate any user doc filtering to take place. Note
- that the previous wrap margin isn't restored, but the left margin is reset
- to 0. */
-static void
-print_header (const char *str, const struct argp *argp,
- struct pentry_state *pest)
-{
- const char *tstr = dgettext (argp->argp_domain, str);
- const char *fstr = filter_doc (tstr, ARGP_KEY_HELP_HEADER, argp, pest->state);
-
- if (fstr)
- {
- if (*fstr)
- {
- if (pest->hhstate->prev_entry)
- /* Precede with a blank line. */
- __argp_fmtstream_putc (pest->stream, '\n');
- indent_to (pest->stream, uparams.header_col);
- __argp_fmtstream_set_lmargin (pest->stream, uparams.header_col);
- __argp_fmtstream_set_wmargin (pest->stream, uparams.header_col);
- __argp_fmtstream_puts (pest->stream, fstr);
- __argp_fmtstream_set_lmargin (pest->stream, 0);
- __argp_fmtstream_putc (pest->stream, '\n');
- }
-
- pest->hhstate->sep_groups = 1; /* Separate subsequent groups. */
- }
-
- if (fstr != tstr)
- free ((char *) fstr);
-}
-
-/* Inserts a comma if this isn't the first item on the line, and then makes
- sure we're at least to column COL. If this *is* the first item on a line,
- prints any pending whitespace/headers that should precede this line. Also
- clears FIRST. */
-static void
-comma (unsigned col, struct pentry_state *pest)
-{
- if (pest->first)
- {
- const struct hol_entry *pe = pest->hhstate->prev_entry;
- const struct hol_cluster *cl = pest->entry->cluster;
-
- if (pest->hhstate->sep_groups && pe && pest->entry->group != pe->group)
- __argp_fmtstream_putc (pest->stream, '\n');
-
- if (cl && cl->header && *cl->header
- && (!pe
- || (pe->cluster != cl
- && !hol_cluster_is_child (pe->cluster, cl))))
- /* If we're changing clusters, then this must be the start of the
- ENTRY's cluster unless that is an ancestor of the previous one
- (in which case we had just popped into a sub-cluster for a bit).
- If so, then print the cluster's header line. */
- {
- int old_wm = __argp_fmtstream_wmargin (pest->stream);
- print_header (cl->header, cl->argp, pest);
- __argp_fmtstream_set_wmargin (pest->stream, old_wm);
- }
-
- pest->first = 0;
- }
- else
- __argp_fmtstream_puts (pest->stream, ", ");
-
- indent_to (pest->stream, col);
-}
-
-/* Print help for ENTRY to STREAM. */
-static void
-hol_entry_help (struct hol_entry *entry, const struct argp_state *state,
- argp_fmtstream_t stream, struct hol_help_state *hhstate)
-{
- unsigned num;
- const struct argp_option *real = entry->opt, *opt;
- char *so = entry->short_options;
- int have_long_opt = 0; /* We have any long options. */
- /* Saved margins. */
- int old_lm = __argp_fmtstream_set_lmargin (stream, 0);
- int old_wm = __argp_fmtstream_wmargin (stream);
- /* PEST is a state block holding some of our variables that we'd like to
- share with helper functions. */
-
- /* Decent initializers are a GNU extension, so don't use it here. */
- struct pentry_state pest;
- pest.entry = entry;
- pest.stream = stream;
- pest.hhstate = hhstate;
- pest.first = 1;
- pest.state = state;
-
- if (! odoc (real))
- for (opt = real, num = entry->num; num > 0; opt++, num--)
- if (opt->name && ovisible (opt))
- {
- have_long_opt = 1;
- break;
- }
-
- /* First emit short options. */
- __argp_fmtstream_set_wmargin (stream, uparams.short_opt_col); /* For truly bizarre cases. */
- for (opt = real, num = entry->num; num > 0; opt++, num--)
- if (oshort (opt) && opt->key == *so)
- /* OPT has a valid (non shadowed) short option. */
- {
- if (ovisible (opt))
- {
- comma (uparams.short_opt_col, &pest);
- __argp_fmtstream_putc (stream, '-');
- __argp_fmtstream_putc (stream, *so);
- if (!have_long_opt || uparams.dup_args)
- arg (real, " %s", "[%s]", state->root_argp->argp_domain, stream);
- else if (real->arg)
- hhstate->suppressed_dup_arg = 1;
- }
- so++;
- }
-
- /* Now, long options. */
- if (odoc (real))
- /* A `documentation' option. */
- {
- __argp_fmtstream_set_wmargin (stream, uparams.doc_opt_col);
- for (opt = real, num = entry->num; num > 0; opt++, num--)
- if (opt->name && ovisible (opt))
- {
- comma (uparams.doc_opt_col, &pest);
- /* Calling gettext here isn't quite right, since sorting will
- have been done on the original; but documentation options
- should be pretty rare anyway... */
- __argp_fmtstream_puts (stream,
- dgettext (state->root_argp->argp_domain,
- opt->name));
- }
- }
- else
- /* A real long option. */
- {
- int first_long_opt = 1;
-
- __argp_fmtstream_set_wmargin (stream, uparams.long_opt_col);
- for (opt = real, num = entry->num; num > 0; opt++, num--)
- if (opt->name && ovisible (opt))
- {
- comma (uparams.long_opt_col, &pest);
- __argp_fmtstream_printf (stream, "--%s", opt->name);
- if (first_long_opt || uparams.dup_args)
- arg (real, "=%s", "[=%s]", state->root_argp->argp_domain,
- stream);
- else if (real->arg)
- hhstate->suppressed_dup_arg = 1;
- }
- }
-
- /* Next, documentation strings. */
- __argp_fmtstream_set_lmargin (stream, 0);
-
- if (pest.first)
- {
- /* Didn't print any switches, what's up? */
- if (!oshort (real) && !real->name)
- /* This is a group header, print it nicely. */
- print_header (real->doc, entry->argp, &pest);
- else
- /* Just a totally shadowed option or null header; print nothing. */
- goto cleanup; /* Just return, after cleaning up. */
- }
- else
- {
- const char *tstr = real->doc ? dgettext (state->root_argp->argp_domain,
- real->doc) : 0;
- const char *fstr = filter_doc (tstr, real->key, entry->argp, state);
- if (fstr && *fstr)
- {
- unsigned int col = __argp_fmtstream_point (stream);
-
- __argp_fmtstream_set_lmargin (stream, uparams.opt_doc_col);
- __argp_fmtstream_set_wmargin (stream, uparams.opt_doc_col);
-
- if (col > (unsigned int) (uparams.opt_doc_col + 3))
- __argp_fmtstream_putc (stream, '\n');
- else if (col >= (unsigned int) uparams.opt_doc_col)
- __argp_fmtstream_puts (stream, " ");
- else
- indent_to (stream, uparams.opt_doc_col);
-
- __argp_fmtstream_puts (stream, fstr);
- }
- if (fstr && fstr != tstr)
- free ((char *) fstr);
-
- /* Reset the left margin. */
- __argp_fmtstream_set_lmargin (stream, 0);
- __argp_fmtstream_putc (stream, '\n');
- }
-
- hhstate->prev_entry = entry;
-
-cleanup:
- __argp_fmtstream_set_lmargin (stream, old_lm);
- __argp_fmtstream_set_wmargin (stream, old_wm);
-}
-
-/* Output a long help message about the options in HOL to STREAM. */
-static void
-hol_help (struct hol *hol, const struct argp_state *state,
- argp_fmtstream_t stream)
-{
- unsigned num;
- struct hol_entry *entry;
- struct hol_help_state hhstate = { 0, 0, 0 };
-
- for (entry = hol->entries, num = hol->num_entries; num > 0; entry++, num--)
- hol_entry_help (entry, state, stream, &hhstate);
-
- if (hhstate.suppressed_dup_arg && uparams.dup_args_note)
- {
- const char *tstr = dgettext (state->root_argp->argp_domain, "\
-Mandatory or optional arguments to long options are also mandatory or \
-optional for any corresponding short options.");
- const char *fstr = filter_doc (tstr, ARGP_KEY_HELP_DUP_ARGS_NOTE,
- state ? state->root_argp : 0, state);
- if (fstr && *fstr)
- {
- __argp_fmtstream_putc (stream, '\n');
- __argp_fmtstream_puts (stream, fstr);
- __argp_fmtstream_putc (stream, '\n');
- }
- if (fstr && fstr != tstr)
- free ((char *) fstr);
- }
-}
-
-/* Helper functions for hol_usage. */
-
-/* If OPT is a short option without an arg, append its key to the string
- pointer pointer to by COOKIE, and advance the pointer. */
-static int
-add_argless_short_opt (const struct argp_option *opt,
- const struct argp_option *real,
- const char *domain UNUSED, void *cookie)
-{
- char **snao_end = cookie;
- if (!(opt->arg || real->arg)
- && !((opt->flags | real->flags) & OPTION_NO_USAGE))
- *(*snao_end)++ = opt->key;
- return 0;
-}
-
-/* If OPT is a short option with an arg, output a usage entry for it to the
- stream pointed at by COOKIE. */
-static int
-usage_argful_short_opt (const struct argp_option *opt,
- const struct argp_option *real,
- const char *domain UNUSED, void *cookie)
-{
- argp_fmtstream_t stream = cookie;
- const char *arg = opt->arg;
- int flags = opt->flags | real->flags;
-
- if (! arg)
- arg = real->arg;
-
- if (arg && !(flags & OPTION_NO_USAGE))
- {
- arg = dgettext (domain, arg);
-
- if (flags & OPTION_ARG_OPTIONAL)
- __argp_fmtstream_printf (stream, " [-%c[%s]]", opt->key, arg);
- else
- {
- /* Manually do line wrapping so that it (probably) won't
- get wrapped at the embedded space. */
- space (stream, 6 + strlen (arg));
- __argp_fmtstream_printf (stream, "[-%c %s]", opt->key, arg);
- }
- }
-
- return 0;
-}
-
-/* Output a usage entry for the long option opt to the stream pointed at by
- COOKIE. */
-static int
-usage_long_opt (const struct argp_option *opt,
- const struct argp_option *real,
- const char *domain UNUSED, void *cookie)
-{
- argp_fmtstream_t stream = cookie;
- const char *arg = opt->arg;
- int flags = opt->flags | real->flags;
-
- if (! arg)
- arg = real->arg;
-
- if (! (flags & OPTION_NO_USAGE))
- {
- if (arg)
- {
- arg = dgettext (domain, arg);
- if (flags & OPTION_ARG_OPTIONAL)
- __argp_fmtstream_printf (stream, " [--%s[=%s]]", opt->name, arg);
- else
- __argp_fmtstream_printf (stream, " [--%s=%s]", opt->name, arg);
- }
- else
- __argp_fmtstream_printf (stream, " [--%s]", opt->name);
- }
-
- return 0;
-}
-
-/* Print a short usage description for the arguments in HOL to STREAM. */
-static void
-hol_usage (struct hol *hol, argp_fmtstream_t stream)
-{
- if (hol->num_entries > 0)
- {
- unsigned nentries;
- struct hol_entry *entry;
- char *short_no_arg_opts = alloca (strlen (hol->short_options) + 1);
- char *snao_end = short_no_arg_opts;
-
- /* First we put a list of short options without arguments. */
- for (entry = hol->entries, nentries = hol->num_entries
- ; nentries > 0
- ; entry++, nentries--)
- hol_entry_short_iterate (entry, add_argless_short_opt,
- entry->argp->argp_domain, &snao_end);
- if (snao_end > short_no_arg_opts)
- {
- *snao_end++ = 0;
- __argp_fmtstream_printf (stream, " [-%s]", short_no_arg_opts);
- }
-
- /* Now a list of short options *with* arguments. */
- for (entry = hol->entries, nentries = hol->num_entries
- ; nentries > 0
- ; entry++, nentries--)
- hol_entry_short_iterate (entry, usage_argful_short_opt,
- entry->argp->argp_domain, stream);
-
- /* Finally, a list of long options (whew!). */
- for (entry = hol->entries, nentries = hol->num_entries
- ; nentries > 0
- ; entry++, nentries--)
- hol_entry_long_iterate (entry, usage_long_opt,
- entry->argp->argp_domain, stream);
- }
-}
-
-/* Make a HOL containing all levels of options in ARGP. CLUSTER is the
- cluster in which ARGP's entries should be clustered, or 0. */
-static struct hol *
-argp_hol (const struct argp *argp, struct hol_cluster *cluster)
-{
- const struct argp_child *child = argp->children;
- struct hol *hol = make_hol (argp, cluster);
- if (child)
- while (child->argp)
- {
- struct hol_cluster *child_cluster =
- ((child->group || child->header)
- /* Put CHILD->argp within its own cluster. */
- ? hol_add_cluster (hol, child->group, child->header,
- child - argp->children, cluster, argp)
- /* Just merge it into the parent's cluster. */
- : cluster);
- hol_append (hol, argp_hol (child->argp, child_cluster)) ;
- child++;
- }
- return hol;
-}
-
-/* Calculate how many different levels with alternative args strings exist in
- ARGP. */
-static size_t
-argp_args_levels (const struct argp *argp)
-{
- size_t levels = 0;
- const struct argp_child *child = argp->children;
-
- if (argp->args_doc && strchr (argp->args_doc, '\n'))
- levels++;
-
- if (child)
- while (child->argp)
- levels += argp_args_levels ((child++)->argp);
-
- return levels;
-}
-
-/* Print all the non-option args documented in ARGP to STREAM. Any output is
- preceded by a space. LEVELS is a pointer to a byte vector the length
- returned by argp_args_levels; it should be initialized to zero, and
- updated by this routine for the next call if ADVANCE is true. True is
- returned as long as there are more patterns to output. */
-static int
-argp_args_usage (const struct argp *argp, const struct argp_state *state,
- char **levels, int advance, argp_fmtstream_t stream)
-{
- char *our_level = *levels;
- int multiple = 0;
- const struct argp_child *child = argp->children;
- const char *tdoc = dgettext (argp->argp_domain, argp->args_doc), *nl = 0;
- const char *fdoc = filter_doc (tdoc, ARGP_KEY_HELP_ARGS_DOC, argp, state);
-
- if (fdoc)
- {
- const char *cp = fdoc;
- nl = __strchrnul (cp, '\n');
- if (*nl != '\0')
- /* This is a `multi-level' args doc; advance to the correct position
- as determined by our state in LEVELS, and update LEVELS. */
- {
- int i;
- multiple = 1;
- for (i = 0; i < *our_level; i++)
- cp = nl + 1, nl = __strchrnul (cp, '\n');
- (*levels)++;
- }
-
- /* Manually do line wrapping so that it (probably) won't get wrapped at
- any embedded spaces. */
- space (stream, 1 + nl - cp);
-
- __argp_fmtstream_write (stream, cp, nl - cp);
- }
- if (fdoc && fdoc != tdoc)
- free ((char *)fdoc); /* Free user's modified doc string. */
-
- if (child)
- while (child->argp)
- advance = !argp_args_usage ((child++)->argp, state, levels, advance, stream);
-
- if (advance && multiple)
- {
- /* Need to increment our level. */
- if (*nl)
- /* There's more we can do here. */
- {
- (*our_level)++;
- advance = 0; /* Our parent shouldn't advance also. */
- }
- else if (*our_level > 0)
- /* We had multiple levels, but used them up; reset to zero. */
- *our_level = 0;
- }
-
- return !advance;
-}
-
-/* Print the documentation for ARGP to STREAM; if POST is false, then
- everything preceeding a `\v' character in the documentation strings (or
- the whole string, for those with none) is printed, otherwise, everything
- following the `\v' character (nothing for strings without). Each separate
- bit of documentation is separated a blank line, and if PRE_BLANK is true,
- then the first is as well. If FIRST_ONLY is true, only the first
- occurrence is output. Returns true if anything was output. */
-static int
-argp_doc (const struct argp *argp, const struct argp_state *state,
- int post, int pre_blank, int first_only,
- argp_fmtstream_t stream)
-{
- const char *text;
- const char *inp_text;
- void *input = 0;
- int anything = 0;
- size_t inp_text_limit = 0;
- const char *doc = dgettext (argp->argp_domain, argp->doc);
- const struct argp_child *child = argp->children;
-
- if (doc)
- {
- char *vt = strchr (doc, '\v');
- inp_text = post ? (vt ? vt + 1 : 0) : doc;
- inp_text_limit = (!post && vt) ? (vt - doc) : 0;
- }
- else
- inp_text = 0;
-
- if (argp->help_filter)
- /* We have to filter the doc strings. */
- {
- if (inp_text_limit)
- /* Copy INP_TEXT so that it's nul-terminated. */
- inp_text = STRNDUP (inp_text, inp_text_limit);
- input = __argp_input (argp, state);
- text =
- (*argp->help_filter) (post
- ? ARGP_KEY_HELP_POST_DOC
- : ARGP_KEY_HELP_PRE_DOC,
- inp_text, input);
- }
- else
- text = (const char *) inp_text;
-
- if (text)
- {
- if (pre_blank)
- __argp_fmtstream_putc (stream, '\n');
-
- if (text == inp_text && inp_text_limit)
- __argp_fmtstream_write (stream, inp_text, inp_text_limit);
- else
- __argp_fmtstream_puts (stream, text);
-
- if (__argp_fmtstream_point (stream) > __argp_fmtstream_lmargin (stream))
- __argp_fmtstream_putc (stream, '\n');
-
- anything = 1;
- }
-
- if (text && text != inp_text)
- free ((char *) text); /* Free TEXT returned from the help filter. */
- if (inp_text && inp_text_limit && argp->help_filter)
- free ((char *) inp_text); /* We copied INP_TEXT, so free it now. */
-
- if (post && argp->help_filter)
- /* Now see if we have to output a ARGP_KEY_HELP_EXTRA text. */
- {
- text = (*argp->help_filter) (ARGP_KEY_HELP_EXTRA, 0, input);
- if (text)
- {
- if (anything || pre_blank)
- __argp_fmtstream_putc (stream, '\n');
- __argp_fmtstream_puts (stream, text);
- free ((char *) text);
- if (__argp_fmtstream_point (stream)
- > __argp_fmtstream_lmargin (stream))
- __argp_fmtstream_putc (stream, '\n');
- anything = 1;
- }
- }
-
- if (child)
- while (child->argp && !(first_only && anything))
- anything |=
- argp_doc ((child++)->argp, state,
- post, anything || pre_blank, first_only,
- stream);
-
- return anything;
-}
-
-/* Output a usage message for ARGP to STREAM. If called from
- argp_state_help, STATE is the relevent parsing state. FLAGS are from the
- set ARGP_HELP_*. NAME is what to use wherever a `program name' is
- needed. */
-
-static void
-_help (const struct argp *argp, const struct argp_state *state, FILE *stream,
- unsigned flags, const char *name)
-{
- int anything = 0; /* Whether we've output anything. */
- struct hol *hol = 0;
- argp_fmtstream_t fs;
-
- if (! stream)
- return;
-
- FLOCKFILE (stream);
-
- if (! uparams.valid)
- fill_in_uparams (state);
-
- fs = __argp_make_fmtstream (stream, 0, uparams.rmargin, 0);
- if (! fs)
- {
- FUNLOCKFILE (stream);
- return;
- }
-
- if (flags & (ARGP_HELP_USAGE | ARGP_HELP_SHORT_USAGE | ARGP_HELP_LONG))
- {
- hol = argp_hol (argp, 0);
-
- /* If present, these options always come last. */
- hol_set_group (hol, "help", -1);
- hol_set_group (hol, "version", -1);
-
- hol_sort (hol);
- }
-
- if (flags & (ARGP_HELP_USAGE | ARGP_HELP_SHORT_USAGE))
- /* Print a short `Usage:' message. */
- {
- int first_pattern = 1, more_patterns;
- size_t num_pattern_levels = argp_args_levels (argp);
- char *pattern_levels = alloca (num_pattern_levels);
-
- memset (pattern_levels, 0, num_pattern_levels);
-
- do
- {
- int old_lm;
- int old_wm = __argp_fmtstream_set_wmargin (fs, uparams.usage_indent);
- char *levels = pattern_levels;
-
- if (first_pattern)
- __argp_fmtstream_printf (fs, "%s %s",
- dgettext (argp->argp_domain, "Usage:"),
- name);
- else
- __argp_fmtstream_printf (fs, "%s %s",
- dgettext (argp->argp_domain, " or: "),
- name);
-
- /* We set the lmargin as well as the wmargin, because hol_usage
- manually wraps options with newline to avoid annoying breaks. */
- old_lm = __argp_fmtstream_set_lmargin (fs, uparams.usage_indent);
-
- if (flags & ARGP_HELP_SHORT_USAGE)
- /* Just show where the options go. */
- {
- if (hol->num_entries > 0)
- __argp_fmtstream_puts (fs, dgettext (argp->argp_domain,
- " [OPTION...]"));
- }
- else
- /* Actually print the options. */
- {
- hol_usage (hol, fs);
- flags |= ARGP_HELP_SHORT_USAGE; /* But only do so once. */
- }
-
- more_patterns = argp_args_usage (argp, state, &levels, 1, fs);
-
- __argp_fmtstream_set_wmargin (fs, old_wm);
- __argp_fmtstream_set_lmargin (fs, old_lm);
-
- __argp_fmtstream_putc (fs, '\n');
- anything = 1;
-
- first_pattern = 0;
- }
- while (more_patterns);
- }
-
- if (flags & ARGP_HELP_PRE_DOC)
- anything |= argp_doc (argp, state, 0, 0, 1, fs);
-
- if (flags & ARGP_HELP_SEE)
- {
- __argp_fmtstream_printf (fs, dgettext (argp->argp_domain, "\
-Try `%s --help' or `%s --usage' for more information.\n"),
- name, name);
- anything = 1;
- }
-
- if (flags & ARGP_HELP_LONG)
- /* Print a long, detailed help message. */
- {
- /* Print info about all the options. */
- if (hol->num_entries > 0)
- {
- if (anything)
- __argp_fmtstream_putc (fs, '\n');
- hol_help (hol, state, fs);
- anything = 1;
- }
- }
-
- if (flags & ARGP_HELP_POST_DOC)
- /* Print any documentation strings at the end. */
- anything |= argp_doc (argp, state, 1, anything, 0, fs);
-
- if ((flags & ARGP_HELP_BUG_ADDR) && argp_program_bug_address)
- {
- if (anything)
- __argp_fmtstream_putc (fs, '\n');
- __argp_fmtstream_printf (fs, dgettext (argp->argp_domain,
- "Report bugs to %s.\n"),
- argp_program_bug_address);
- anything = 1;
- }
-
- FUNLOCKFILE (stream);
-
- if (hol)
- hol_free (hol);
-
- __argp_fmtstream_free (fs);
-}
-
-/* Output a usage message for ARGP to STREAM. FLAGS are from the set
- ARGP_HELP_*. NAME is what to use wherever a `program name' is needed. */
-void __argp_help (const struct argp *argp, FILE *stream,
- unsigned flags, char *name)
-{
- _help (argp, 0, stream, flags, name);
-}
-#ifdef weak_alias
-weak_alias (__argp_help, argp_help)
-#endif
-
-char *__argp_basename(char *name)
-{
- char *short_name = strrchr(name, '/');
- return short_name ? short_name + 1 : name;
-}
-
-char *
-__argp_short_program_name(const struct argp_state *state)
-{
- if (state)
- return state->name;
-#if HAVE_DECL_PROGRAM_INVOCATION_SHORT_NAME
- return program_invocation_short_name;
-#elif HAVE_DECL_PROGRAM_INVOCATION_NAME
- return __argp_basename(program_invocation_name);
-#else /* !HAVE_DECL_PROGRAM_INVOCATION_NAME */
- /* FIXME: What now? Miles suggests that it is better to use NULL,
- but currently the value is passed on directly to fputs_unlocked,
- so that requires more changes. */
-# if __GNUC__
- return "";
-# endif /* __GNUC__ */
-#endif /* !HAVE_DECL_PROGRAM_INVOCATION_NAME */
-}
-
-/* Output, if appropriate, a usage message for STATE to STREAM. FLAGS are
- from the set ARGP_HELP_*. */
-void
-__argp_state_help (const struct argp_state *state, FILE *stream, unsigned flags)
-{
- if ((!state || ! (state->flags & ARGP_NO_ERRS)) && stream)
- {
- if (state && (state->flags & ARGP_LONG_ONLY))
- flags |= ARGP_HELP_LONG_ONLY;
-
- _help (state ? state->root_argp : 0, state, stream, flags,
- __argp_short_program_name(state));
-
- if (!state || ! (state->flags & ARGP_NO_EXIT))
- {
- if (flags & ARGP_HELP_EXIT_ERR)
- exit (argp_err_exit_status);
- if (flags & ARGP_HELP_EXIT_OK)
- exit (0);
- }
- }
-}
-#ifdef weak_alias
-weak_alias (__argp_state_help, argp_state_help)
-#endif
-
-/* If appropriate, print the printf string FMT and following args, preceded
- by the program name and `:', to stderr, and followed by a `Try ... --help'
- message, then exit (1). */
-void
-__argp_error (const struct argp_state *state, const char *fmt, ...)
-{
- if (!state || !(state->flags & ARGP_NO_ERRS))
- {
- FILE *stream = state ? state->err_stream : stderr;
-
- if (stream)
- {
- va_list ap;
-
- FLOCKFILE (stream);
-
- FPUTS_UNLOCKED (__argp_short_program_name(state),
- stream);
- PUTC_UNLOCKED (':', stream);
- PUTC_UNLOCKED (' ', stream);
-
- va_start (ap, fmt);
- vfprintf (stream, fmt, ap);
- va_end (ap);
-
- PUTC_UNLOCKED ('\n', stream);
-
- __argp_state_help (state, stream, ARGP_HELP_STD_ERR);
-
- FUNLOCKFILE (stream);
- }
- }
-}
-#ifdef weak_alias
-weak_alias (__argp_error, argp_error)
-#endif
-
-/* Similar to the standard gnu error-reporting function error(), but will
- respect the ARGP_NO_EXIT and ARGP_NO_ERRS flags in STATE, and will print
- to STATE->err_stream. This is useful for argument parsing code that is
- shared between program startup (when exiting is desired) and runtime
- option parsing (when typically an error code is returned instead). The
- difference between this function and argp_error is that the latter is for
- *parsing errors*, and the former is for other problems that occur during
- parsing but don't reflect a (syntactic) problem with the input. */
-void
-__argp_failure (const struct argp_state *state, int status, int errnum,
- const char *fmt, ...)
-{
- if (!state || !(state->flags & ARGP_NO_ERRS))
- {
- FILE *stream = state ? state->err_stream : stderr;
-
- if (stream)
- {
- FLOCKFILE (stream);
-
- FPUTS_UNLOCKED (__argp_short_program_name(state),
- stream);
-
- if (fmt)
- {
- va_list ap;
-
- PUTC_UNLOCKED (':', stream);
- PUTC_UNLOCKED (' ', stream);
-
- va_start (ap, fmt);
- vfprintf (stream, fmt, ap);
- va_end (ap);
- }
-
- if (errnum)
- {
- PUTC_UNLOCKED (':', stream);
- PUTC_UNLOCKED (' ', stream);
- fputs (STRERROR (errnum), stream);
- }
-
- PUTC_UNLOCKED ('\n', stream);
-
- FUNLOCKFILE (stream);
-
- if (status && (!state || !(state->flags & ARGP_NO_EXIT)))
- exit (status);
- }
- }
-}
-#ifdef weak_alias
-weak_alias (__argp_failure, argp_failure)
-#endif
diff --git a/argp-standalone/argp-namefrob.h b/argp-standalone/argp-namefrob.h
deleted file mode 100644
index 0ce11481a7b..00000000000
--- a/argp-standalone/argp-namefrob.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/* Name frobnication for compiling argp outside of glibc
- Copyright (C) 1997 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Written by Miles Bader <miles@gnu.ai.mit.edu>.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Library General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
-
- You should have received a copy of the GNU Library General Public
- License along with the GNU C Library; see the file COPYING.LIB. If not,
- write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- Boston, MA 02111-1307, USA. */
-
-#if !_LIBC
-/* This code is written for inclusion in gnu-libc, and uses names in the
- namespace reserved for libc. If we're not compiling in libc, define those
- names to be the normal ones instead. */
-
-/* argp-parse functions */
-#undef __argp_parse
-#define __argp_parse argp_parse
-#undef __option_is_end
-#define __option_is_end _option_is_end
-#undef __option_is_short
-#define __option_is_short _option_is_short
-#undef __argp_input
-#define __argp_input _argp_input
-
-/* argp-help functions */
-#undef __argp_help
-#define __argp_help argp_help
-#undef __argp_error
-#define __argp_error argp_error
-#undef __argp_failure
-#define __argp_failure argp_failure
-#undef __argp_state_help
-#define __argp_state_help argp_state_help
-#undef __argp_usage
-#define __argp_usage argp_usage
-#undef __argp_basename
-#define __argp_basename _argp_basename
-#undef __argp_short_program_name
-#define __argp_short_program_name _argp_short_program_name
-
-/* argp-fmtstream functions */
-#undef __argp_make_fmtstream
-#define __argp_make_fmtstream argp_make_fmtstream
-#undef __argp_fmtstream_free
-#define __argp_fmtstream_free argp_fmtstream_free
-#undef __argp_fmtstream_putc
-#define __argp_fmtstream_putc argp_fmtstream_putc
-#undef __argp_fmtstream_puts
-#define __argp_fmtstream_puts argp_fmtstream_puts
-#undef __argp_fmtstream_write
-#define __argp_fmtstream_write argp_fmtstream_write
-#undef __argp_fmtstream_printf
-#define __argp_fmtstream_printf argp_fmtstream_printf
-#undef __argp_fmtstream_set_lmargin
-#define __argp_fmtstream_set_lmargin argp_fmtstream_set_lmargin
-#undef __argp_fmtstream_set_rmargin
-#define __argp_fmtstream_set_rmargin argp_fmtstream_set_rmargin
-#undef __argp_fmtstream_set_wmargin
-#define __argp_fmtstream_set_wmargin argp_fmtstream_set_wmargin
-#undef __argp_fmtstream_point
-#define __argp_fmtstream_point argp_fmtstream_point
-#undef __argp_fmtstream_update
-#define __argp_fmtstream_update _argp_fmtstream_update
-#undef __argp_fmtstream_ensure
-#define __argp_fmtstream_ensure _argp_fmtstream_ensure
-#undef __argp_fmtstream_lmargin
-#define __argp_fmtstream_lmargin argp_fmtstream_lmargin
-#undef __argp_fmtstream_rmargin
-#define __argp_fmtstream_rmargin argp_fmtstream_rmargin
-#undef __argp_fmtstream_wmargin
-#define __argp_fmtstream_wmargin argp_fmtstream_wmargin
-
-/* normal libc functions we call */
-#undef __sleep
-#define __sleep sleep
-#undef __strcasecmp
-#define __strcasecmp strcasecmp
-#undef __vsnprintf
-#define __vsnprintf vsnprintf
-
-#endif /* !_LIBC */
-
-#ifndef __set_errno
-#define __set_errno(e) (errno = (e))
-#endif
diff --git a/argp-standalone/argp-parse.c b/argp-standalone/argp-parse.c
deleted file mode 100644
index 78f7bf139b6..00000000000
--- a/argp-standalone/argp-parse.c
+++ /dev/null
@@ -1,1305 +0,0 @@
-/* Hierarchial argument parsing
- Copyright (C) 1995, 96, 97, 98, 99, 2000,2003 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Written by Miles Bader <miles@gnu.ai.mit.edu>.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Library General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
-
- You should have received a copy of the GNU Library General Public
- License along with the GNU C Library; see the file COPYING.LIB. If not,
- write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- Boston, MA 02111-1307, USA. */
-
-#ifndef _GNU_SOURCE
-# define _GNU_SOURCE 1
-#endif
-
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#if HAVE_ALLOCA_H
-#include <alloca.h>
-#endif
-
-#include <stdlib.h>
-#include <string.h>
-#if HAVE_UNISTD_H
-# include <unistd.h>
-#endif
-#include <limits.h>
-#include <assert.h>
-
-#if HAVE_MALLOC_H
-/* Needed, for alloca on windows */
-# include <malloc.h>
-#endif
-
-#ifndef _
-/* This is for other GNU distributions with internationalized messages.
- When compiling libc, the _ macro is predefined. */
-# if defined HAVE_LIBINTL_H || defined _LIBC
-# include <libintl.h>
-# ifdef _LIBC
-# undef dgettext
-# define dgettext(domain, msgid) __dcgettext (domain, msgid, LC_MESSAGES)
-# endif
-# else
-# define dgettext(domain, msgid) (msgid)
-# define gettext(msgid) (msgid)
-# endif
-#endif
-#ifndef N_
-# define N_(msgid) (msgid)
-#endif
-
-#if _LIBC - 0
-#include <bits/libc-lock.h>
-#else
-#ifdef HAVE_CTHREADS_H
-#include <cthreads.h>
-#endif
-#endif /* _LIBC */
-
-#include "argp.h"
-#include "argp-namefrob.h"
-
-
-/* The meta-argument used to prevent any further arguments being interpreted
- as options. */
-#define QUOTE "--"
-
-/* EZ alias for ARGP_ERR_UNKNOWN. */
-#define EBADKEY ARGP_ERR_UNKNOWN
-
-
-/* Default options. */
-
-/* When argp is given the --HANG switch, _ARGP_HANG is set and argp will sleep
- for one second intervals, decrementing _ARGP_HANG until it's zero. Thus
- you can force the program to continue by attaching a debugger and setting
- it to 0 yourself. */
-volatile int _argp_hang;
-
-#define OPT_PROGNAME -2
-#define OPT_USAGE -3
-#if HAVE_SLEEP && HAVE_GETPID
-#define OPT_HANG -4
-#endif
-
-static const struct argp_option argp_default_options[] =
-{
- {"help", '?', 0, 0, N_("Give this help list"), -1},
- {"usage", OPT_USAGE, 0, 0, N_("Give a short usage message"), 0 },
- {"program-name",OPT_PROGNAME,"NAME", OPTION_HIDDEN,
- N_("Set the program name"), 0},
-#if OPT_HANG
- {"HANG", OPT_HANG, "SECS", OPTION_ARG_OPTIONAL | OPTION_HIDDEN,
- N_("Hang for SECS seconds (default 3600)"), 0 },
-#endif
- {0, 0, 0, 0, 0, 0}
-};
-
-static error_t
-argp_default_parser (int key, char *arg, struct argp_state *state)
-{
- switch (key)
- {
- case '?':
- __argp_state_help (state, state->out_stream, ARGP_HELP_STD_HELP);
- break;
- case OPT_USAGE:
- __argp_state_help (state, state->out_stream,
- ARGP_HELP_USAGE | ARGP_HELP_EXIT_OK);
- break;
-
- case OPT_PROGNAME: /* Set the program name. */
-#if HAVE_DECL_PROGRAM_INVOCATION_NAME
- program_invocation_name = arg;
-#endif
- /* [Note that some systems only have PROGRAM_INVOCATION_SHORT_NAME (aka
- __PROGNAME), in which case, PROGRAM_INVOCATION_NAME is just defined
- to be that, so we have to be a bit careful here.] */
-
- /* Update what we use for messages. */
-
- state->name = __argp_basename(arg);
-
-#if HAVE_DECL_PROGRAM_INVOCATION_SHORT_NAME
- program_invocation_short_name = state->name;
-#endif
-
- if ((state->flags & (ARGP_PARSE_ARGV0 | ARGP_NO_ERRS))
- == ARGP_PARSE_ARGV0)
- /* Update what getopt uses too. */
- state->argv[0] = arg;
-
- break;
-
-#if OPT_HANG
- case OPT_HANG:
- _argp_hang = atoi (arg ? arg : "3600");
- fprintf(state->err_stream, "%s: pid = %ld\n",
- state->name, (long) getpid());
- while (_argp_hang-- > 0)
- __sleep (1);
- break;
-#endif
-
- default:
- return EBADKEY;
- }
- return 0;
-}
-
-static const struct argp argp_default_argp =
- {argp_default_options, &argp_default_parser, NULL, NULL, NULL, NULL, "libc"};
-
-
-static const struct argp_option argp_version_options[] =
-{
- {"version", 'V', 0, 0, N_("Print program version"), -1},
- {0, 0, 0, 0, 0, 0 }
-};
-
-static error_t
-argp_version_parser (int key, char *arg UNUSED, struct argp_state *state)
-{
- switch (key)
- {
- case 'V':
- if (argp_program_version_hook)
- (*argp_program_version_hook) (state->out_stream, state);
- else if (argp_program_version)
- fprintf (state->out_stream, "%s\n", argp_program_version);
- else
- __argp_error (state, dgettext (state->root_argp->argp_domain,
- "(PROGRAM ERROR) No version known!?"));
- if (! (state->flags & ARGP_NO_EXIT))
- exit (0);
- break;
- default:
- return EBADKEY;
- }
- return 0;
-}
-
-static const struct argp argp_version_argp =
- {argp_version_options, &argp_version_parser, NULL, NULL, NULL, NULL, "libc"};
-
-
-
-/* The state of a `group' during parsing. Each group corresponds to a
- particular argp structure from the tree of such descending from the top
- level argp passed to argp_parse. */
-struct group
-{
- /* This group's parsing function. */
- argp_parser_t parser;
-
- /* Which argp this group is from. */
- const struct argp *argp;
-
- /* The number of non-option args sucessfully handled by this parser. */
- unsigned args_processed;
-
- /* This group's parser's parent's group. */
- struct group *parent;
- unsigned parent_index; /* And the our position in the parent. */
-
- /* These fields are swapped into and out of the state structure when
- calling this group's parser. */
- void *input, **child_inputs;
- void *hook;
-};
-
-/* Call GROUP's parser with KEY and ARG, swapping any group-specific info
- from STATE before calling, and back into state afterwards. If GROUP has
- no parser, EBADKEY is returned. */
-static error_t
-group_parse (struct group *group, struct argp_state *state, int key, char *arg)
-{
- if (group->parser)
- {
- error_t err;
- state->hook = group->hook;
- state->input = group->input;
- state->child_inputs = group->child_inputs;
- state->arg_num = group->args_processed;
- err = (*group->parser)(key, arg, state);
- group->hook = state->hook;
- return err;
- }
- else
- return EBADKEY;
-}
-
-struct parser
-{
- const struct argp *argp;
-
- const char *posixly_correct;
-
- /* True if there are only no-option arguments left, which are just
- passed verbatim with ARGP_KEY_ARG. This is set if we encounter a
- quote, or the end of the proper options, but may be cleared again
- if the user moves the next argument pointer backwards. */
- int args_only;
-
- /* Describe how to deal with options that follow non-option ARGV-elements.
-
- If the caller did not specify anything, the default is
- REQUIRE_ORDER if the environment variable POSIXLY_CORRECT is
- defined, PERMUTE otherwise.
-
- REQUIRE_ORDER means don't recognize them as options; stop option
- processing when the first non-option is seen. This is what Unix
- does. This mode of operation is selected by either setting the
- environment variable POSIXLY_CORRECT, or using `+' as the first
- character of the list of option characters.
-
- PERMUTE is the default. We permute the contents of ARGV as we
- scan, so that eventually all the non-options are at the end. This
- allows options to be given in any order, even with programs that
- were not written to expect this.
-
- RETURN_IN_ORDER is an option available to programs that were
- written to expect options and other ARGV-elements in any order
- and that care about the ordering of the two. We describe each
- non-option ARGV-element as if it were the argument of an option
- with character code 1. Using `-' as the first character of the
- list of option characters selects this mode of operation.
-
- */
- enum { REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER } ordering;
-
- /* A segment of non-option arguments that have been skipped for
- later processing, after all options. `first_nonopt' is the index
- in ARGV of the first of them; `last_nonopt' is the index after
- the last of them.
-
- If quoted or args_only is non-zero, this segment should be empty. */
-
- /* FIXME: I'd prefer to use unsigned, but it's more consistent to
- use the same type as for state.next. */
- int first_nonopt;
- int last_nonopt;
-
- /* String of all recognized short options. Needed for ARGP_LONG_ONLY. */
- /* FIXME: Perhaps change to a pointer to a suitable bitmap instead? */
- char *short_opts;
-
- /* For parsing combined short options. */
- char *nextchar;
-
- /* States of the various parsing groups. */
- struct group *groups;
- /* The end of the GROUPS array. */
- struct group *egroup;
- /* An vector containing storage for the CHILD_INPUTS field in all groups. */
- void **child_inputs;
-
- /* State block supplied to parsing routines. */
- struct argp_state state;
-
- /* Memory used by this parser. */
- void *storage;
-};
-
-/* Search for a group defining a short option. */
-static const struct argp_option *
-find_short_option(struct parser *parser, int key, struct group **p)
-{
- struct group *group;
-
- assert(key >= 0);
- assert(isascii(key));
-
- for (group = parser->groups; group < parser->egroup; group++)
- {
- const struct argp_option *opts;
-
- for (opts = group->argp->options; !__option_is_end(opts); opts++)
- if (opts->key == key)
- {
- *p = group;
- return opts;
- }
- }
- return NULL;
-}
-
-enum match_result { MATCH_EXACT, MATCH_PARTIAL, MATCH_NO };
-
-/* If defined, allow complete.el-like abbreviations of long options. */
-#ifndef ARGP_COMPLETE
-#define ARGP_COMPLETE 0
-#endif
-
-/* Matches an encountern long-option argument ARG against an option NAME.
- * ARG is terminated by NUL or '='. */
-static enum match_result
-match_option(const char *arg, const char *name)
-{
- unsigned i, j;
- for (i = j = 0;; i++, j++)
- {
- switch(arg[i])
- {
- case '\0':
- case '=':
- return name[j] ? MATCH_PARTIAL : MATCH_EXACT;
-#if ARGP_COMPLETE
- case '-':
- while (name[j] != '-')
- if (!name[j++])
- return MATCH_NO;
- break;
-#endif
- default:
- if (arg[i] != name[j])
- return MATCH_NO;
- }
- }
-}
-
-static const struct argp_option *
-find_long_option(struct parser *parser,
- const char *arg,
- struct group **p)
-{
- struct group *group;
-
- /* Partial match found so far. */
- struct group *matched_group = NULL;
- const struct argp_option *matched_option = NULL;
-
- /* Number of partial matches. */
- int num_partial = 0;
-
- for (group = parser->groups; group < parser->egroup; group++)
- {
- const struct argp_option *opts;
-
- for (opts = group->argp->options; !__option_is_end(opts); opts++)
- {
- if (!opts->name)
- continue;
- switch (match_option(arg, opts->name))
- {
- case MATCH_NO:
- break;
- case MATCH_PARTIAL:
- num_partial++;
-
- matched_group = group;
- matched_option = opts;
-
- break;
- case MATCH_EXACT:
- /* Exact match. */
- *p = group;
- return opts;
- }
- }
- }
- if (num_partial == 1)
- {
- *p = matched_group;
- return matched_option;
- }
-
- return NULL;
-}
-
-
-/* The next usable entries in the various parser tables being filled in by
- convert_options. */
-struct parser_convert_state
-{
- struct parser *parser;
- char *short_end;
- void **child_inputs_end;
-};
-
-/* Initialize GROUP from ARGP. If CVT->SHORT_END is non-NULL, short
- options are recorded in the short options string. Returns the next
- unused group entry. CVT holds state used during the conversion. */
-static struct group *
-convert_options (const struct argp *argp,
- struct group *parent, unsigned parent_index,
- struct group *group, struct parser_convert_state *cvt)
-{
- const struct argp_option *opt = argp->options;
- const struct argp_child *children = argp->children;
-
- if (opt || argp->parser)
- {
- /* This parser needs a group. */
- if (cvt->short_end)
- {
- /* Record any short options. */
- for ( ; !__option_is_end (opt); opt++)
- if (__option_is_short(opt))
- *cvt->short_end++ = opt->key;
- }
-
- group->parser = argp->parser;
- group->argp = argp;
- group->args_processed = 0;
- group->parent = parent;
- group->parent_index = parent_index;
- group->input = 0;
- group->hook = 0;
- group->child_inputs = 0;
-
- if (children)
- /* Assign GROUP's CHILD_INPUTS field some space from
- CVT->child_inputs_end.*/
- {
- unsigned num_children = 0;
- while (children[num_children].argp)
- num_children++;
- group->child_inputs = cvt->child_inputs_end;
- cvt->child_inputs_end += num_children;
- }
- parent = group++;
- }
- else
- parent = 0;
-
- if (children)
- {
- unsigned index = 0;
- while (children->argp)
- group =
- convert_options (children++->argp, parent, index++, group, cvt);
- }
-
- return group;
-}
-/* Allocate and initialize the group structures, so that they are
- ordered as if by traversing the corresponding argp parser tree in
- pre-order. Also build the list of short options, if that is needed. */
-static void
-parser_convert (struct parser *parser, const struct argp *argp)
-{
- struct parser_convert_state cvt;
-
- cvt.parser = parser;
- cvt.short_end = parser->short_opts;
- cvt.child_inputs_end = parser->child_inputs;
-
- parser->argp = argp;
-
- if (argp)
- parser->egroup = convert_options (argp, 0, 0, parser->groups, &cvt);
- else
- parser->egroup = parser->groups; /* No parsers at all! */
-
- if (parser->short_opts)
- *cvt.short_end ='\0';
-}
-
-/* Lengths of various parser fields which we will allocated. */
-struct parser_sizes
-{
- /* Needed only ARGP_LONG_ONLY */
- size_t short_len; /* Number of short options. */
-
- size_t num_groups; /* Group structures we allocate. */
- size_t num_child_inputs; /* Child input slots. */
-};
-
-/* For ARGP, increments the NUM_GROUPS field in SZS by the total
- number of argp structures descended from it, and the SHORT_LEN by
- the total number of short options. */
-static void
-calc_sizes (const struct argp *argp, struct parser_sizes *szs)
-{
- const struct argp_child *child = argp->children;
- const struct argp_option *opt = argp->options;
-
- if (opt || argp->parser)
- {
- /* This parser needs a group. */
- szs->num_groups++;
- if (opt)
- {
- while (__option_is_short (opt++))
- szs->short_len++;
- }
- }
-
- if (child)
- while (child->argp)
- {
- calc_sizes ((child++)->argp, szs);
- szs->num_child_inputs++;
- }
-}
-
-/* Initializes PARSER to parse ARGP in a manner described by FLAGS. */
-static error_t
-parser_init (struct parser *parser, const struct argp *argp,
- int argc, char **argv, int flags, void *input)
-{
- error_t err = 0;
- struct group *group;
- struct parser_sizes szs;
-
- parser->posixly_correct = getenv ("POSIXLY_CORRECT");
-
- if (flags & ARGP_IN_ORDER)
- parser->ordering = RETURN_IN_ORDER;
- else if (flags & ARGP_NO_ARGS)
- parser->ordering = REQUIRE_ORDER;
- else if (parser->posixly_correct)
- parser->ordering = REQUIRE_ORDER;
- else
- parser->ordering = PERMUTE;
-
- szs.short_len = 0;
- szs.num_groups = 0;
- szs.num_child_inputs = 0;
-
- if (argp)
- calc_sizes (argp, &szs);
-
- if (!(flags & ARGP_LONG_ONLY))
- /* We have no use for the short option array. */
- szs.short_len = 0;
-
- /* Lengths of the various bits of storage used by PARSER. */
-#define GLEN (szs.num_groups + 1) * sizeof (struct group)
-#define CLEN (szs.num_child_inputs * sizeof (void *))
-#define SLEN (szs.short_len + 1)
-#define STORAGE(offset) ((void *) (((char *) parser->storage) + (offset)))
-
- parser->storage = malloc (GLEN + CLEN + SLEN);
- if (! parser->storage)
- return ENOMEM;
-
- parser->groups = parser->storage;
-
- parser->child_inputs = STORAGE(GLEN);
- memset (parser->child_inputs, 0, szs.num_child_inputs * sizeof (void *));
-
- if (flags & ARGP_LONG_ONLY)
- parser->short_opts = STORAGE(GLEN + CLEN);
- else
- parser->short_opts = NULL;
-
- parser_convert (parser, argp);
-
- memset (&parser->state, 0, sizeof (struct argp_state));
-
- parser->state.root_argp = parser->argp;
- parser->state.argc = argc;
- parser->state.argv = argv;
- parser->state.flags = flags;
- parser->state.err_stream = stderr;
- parser->state.out_stream = stdout;
- parser->state.pstate = parser;
-
- parser->args_only = 0;
- parser->nextchar = NULL;
- parser->first_nonopt = parser->last_nonopt = 0;
-
- /* Call each parser for the first time, giving it a chance to propagate
- values to child parsers. */
- if (parser->groups < parser->egroup)
- parser->groups->input = input;
- for (group = parser->groups;
- group < parser->egroup && (!err || err == EBADKEY);
- group++)
- {
- if (group->parent)
- /* If a child parser, get the initial input value from the parent. */
- group->input = group->parent->child_inputs[group->parent_index];
-
- if (!group->parser
- && group->argp->children && group->argp->children->argp)
- /* For the special case where no parsing function is supplied for an
- argp, propagate its input to its first child, if any (this just
- makes very simple wrapper argps more convenient). */
- group->child_inputs[0] = group->input;
-
- err = group_parse (group, &parser->state, ARGP_KEY_INIT, 0);
- }
- if (err == EBADKEY)
- err = 0; /* Some parser didn't understand. */
-
- if (err)
- return err;
-
- if (argv[0] && !(parser->state.flags & ARGP_PARSE_ARGV0))
- /* There's an argv[0]; use it for messages. */
- {
- parser->state.name = __argp_basename(argv[0]);
-
- /* Don't parse it as an argument. */
- parser->state.next = 1;
- }
- else
- parser->state.name = __argp_short_program_name(NULL);
-
- return 0;
-}
-
-/* Free any storage consumed by PARSER (but not PARSER itself). */
-static error_t
-parser_finalize (struct parser *parser,
- error_t err, int arg_ebadkey, int *end_index)
-{
- struct group *group;
-
- if (err == EBADKEY && arg_ebadkey)
- /* Suppress errors generated by unparsed arguments. */
- err = 0;
-
- if (! err)
- {
- if (parser->state.next == parser->state.argc)
- /* We successfully parsed all arguments! Call all the parsers again,
- just a few more times... */
- {
- for (group = parser->groups;
- group < parser->egroup && (!err || err==EBADKEY);
- group++)
- if (group->args_processed == 0)
- err = group_parse (group, &parser->state, ARGP_KEY_NO_ARGS, 0);
- for (group = parser->egroup - 1;
- group >= parser->groups && (!err || err==EBADKEY);
- group--)
- err = group_parse (group, &parser->state, ARGP_KEY_END, 0);
-
- if (err == EBADKEY)
- err = 0; /* Some parser didn't understand. */
-
- /* Tell the user that all arguments are parsed. */
- if (end_index)
- *end_index = parser->state.next;
- }
- else if (end_index)
- /* Return any remaining arguments to the user. */
- *end_index = parser->state.next;
- else
- /* No way to return the remaining arguments, they must be bogus. */
- {
- if (!(parser->state.flags & ARGP_NO_ERRS)
- && parser->state.err_stream)
- fprintf (parser->state.err_stream,
- dgettext (parser->argp->argp_domain,
- "%s: Too many arguments\n"),
- parser->state.name);
- err = EBADKEY;
- }
- }
-
- /* Okay, we're all done, with either an error or success; call the parsers
- to indicate which one. */
-
- if (err)
- {
- /* Maybe print an error message. */
- if (err == EBADKEY)
- /* An appropriate message describing what the error was should have
- been printed earlier. */
- __argp_state_help (&parser->state, parser->state.err_stream,
- ARGP_HELP_STD_ERR);
-
- /* Since we didn't exit, give each parser an error indication. */
- for (group = parser->groups; group < parser->egroup; group++)
- group_parse (group, &parser->state, ARGP_KEY_ERROR, 0);
- }
- else
- /* Notify parsers of success, and propagate back values from parsers. */
- {
- /* We pass over the groups in reverse order so that child groups are
- given a chance to do there processing before passing back a value to
- the parent. */
- for (group = parser->egroup - 1
- ; group >= parser->groups && (!err || err == EBADKEY)
- ; group--)
- err = group_parse (group, &parser->state, ARGP_KEY_SUCCESS, 0);
- if (err == EBADKEY)
- err = 0; /* Some parser didn't understand. */
- }
-
- /* Call parsers once more, to do any final cleanup. Errors are ignored. */
- for (group = parser->egroup - 1; group >= parser->groups; group--)
- group_parse (group, &parser->state, ARGP_KEY_FINI, 0);
-
- if (err == EBADKEY)
- err = EINVAL;
-
- free (parser->storage);
-
- return err;
-}
-
-/* Call the user parsers to parse the non-option argument VAL, at the
- current position, returning any error. The state NEXT pointer
- should point to the argument; this function will adjust it
- correctly to reflect however many args actually end up being
- consumed. */
-static error_t
-parser_parse_arg (struct parser *parser, char *val)
-{
- /* Save the starting value of NEXT */
- int index = parser->state.next;
- error_t err = EBADKEY;
- struct group *group;
- int key = 0; /* Which of ARGP_KEY_ARG[S] we used. */
-
- /* Try to parse the argument in each parser. */
- for (group = parser->groups
- ; group < parser->egroup && err == EBADKEY
- ; group++)
- {
- parser->state.next++; /* For ARGP_KEY_ARG, consume the arg. */
- key = ARGP_KEY_ARG;
- err = group_parse (group, &parser->state, key, val);
-
- if (err == EBADKEY)
- /* This parser doesn't like ARGP_KEY_ARG; try ARGP_KEY_ARGS instead. */
- {
- parser->state.next--; /* For ARGP_KEY_ARGS, put back the arg. */
- key = ARGP_KEY_ARGS;
- err = group_parse (group, &parser->state, key, 0);
- }
- }
-
- if (! err)
- {
- if (key == ARGP_KEY_ARGS)
- /* The default for ARGP_KEY_ARGS is to assume that if NEXT isn't
- changed by the user, *all* arguments should be considered
- consumed. */
- parser->state.next = parser->state.argc;
-
- if (parser->state.next > index)
- /* Remember that we successfully processed a non-option
- argument -- but only if the user hasn't gotten tricky and set
- the clock back. */
- (--group)->args_processed += (parser->state.next - index);
- else
- /* The user wants to reparse some args, so try looking for options again. */
- parser->args_only = 0;
- }
-
- return err;
-}
-
-/* Exchange two adjacent subsequences of ARGV.
- One subsequence is elements [first_nonopt,last_nonopt)
- which contains all the non-options that have been skipped so far.
- The other is elements [last_nonopt,next), which contains all
- the options processed since those non-options were skipped.
-
- `first_nonopt' and `last_nonopt' are relocated so that they describe
- the new indices of the non-options in ARGV after they are moved. */
-
-static void
-exchange (struct parser *parser)
-{
- int bottom = parser->first_nonopt;
- int middle = parser->last_nonopt;
- int top = parser->state.next;
- char **argv = parser->state.argv;
-
- char *tem;
-
- /* Exchange the shorter segment with the far end of the longer segment.
- That puts the shorter segment into the right place.
- It leaves the longer segment in the right place overall,
- but it consists of two parts that need to be swapped next. */
-
- while (top > middle && middle > bottom)
- {
- if (top - middle > middle - bottom)
- {
- /* Bottom segment is the short one. */
- int len = middle - bottom;
- register int i;
-
- /* Swap it with the top part of the top segment. */
- for (i = 0; i < len; i++)
- {
- tem = argv[bottom + i];
- argv[bottom + i] = argv[top - (middle - bottom) + i];
- argv[top - (middle - bottom) + i] = tem;
- }
- /* Exclude the moved bottom segment from further swapping. */
- top -= len;
- }
- else
- {
- /* Top segment is the short one. */
- int len = top - middle;
- register int i;
-
- /* Swap it with the bottom part of the bottom segment. */
- for (i = 0; i < len; i++)
- {
- tem = argv[bottom + i];
- argv[bottom + i] = argv[middle + i];
- argv[middle + i] = tem;
- }
- /* Exclude the moved top segment from further swapping. */
- bottom += len;
- }
- }
-
- /* Update records for the slots the non-options now occupy. */
-
- parser->first_nonopt += (parser->state.next - parser->last_nonopt);
- parser->last_nonopt = parser->state.next;
-}
-
-
-
-enum arg_type { ARG_ARG, ARG_SHORT_OPTION,
- ARG_LONG_OPTION, ARG_LONG_ONLY_OPTION,
- ARG_QUOTE };
-
-static enum arg_type
-classify_arg(struct parser *parser, char *arg, char **opt)
-{
- if (arg[0] == '-')
- /* Looks like an option... */
- switch (arg[1])
- {
- case '\0':
- /* "-" is not an option. */
- return ARG_ARG;
- case '-':
- /* Long option, or quote. */
- if (!arg[2])
- return ARG_QUOTE;
-
- /* A long option. */
- if (opt)
- *opt = arg + 2;
- return ARG_LONG_OPTION;
-
- default:
- /* Short option. But if ARGP_LONG_ONLY, it can also be a long option. */
-
- if (opt)
- *opt = arg + 1;
-
- if (parser->state.flags & ARGP_LONG_ONLY)
- {
- /* Rules from getopt.c:
-
- If long_only and the ARGV-element has the form "-f",
- where f is a valid short option, don't consider it an
- abbreviated form of a long option that starts with f.
- Otherwise there would be no way to give the -f short
- option.
-
- On the other hand, if there's a long option "fubar" and
- the ARGV-element is "-fu", do consider that an
- abbreviation of the long option, just like "--fu", and
- not "-f" with arg "u".
-
- This distinction seems to be the most useful approach. */
-
- assert(parser->short_opts);
-
- if (arg[2] || !strchr(parser->short_opts, arg[1]))
- return ARG_LONG_ONLY_OPTION;
- }
-
- return ARG_SHORT_OPTION;
- }
-
- else
- return ARG_ARG;
-}
-
-/* Parse the next argument in PARSER (as indicated by PARSER->state.next).
- Any error from the parsers is returned, and *ARGP_EBADKEY indicates
- whether a value of EBADKEY is due to an unrecognized argument (which is
- generally not fatal). */
-static error_t
-parser_parse_next (struct parser *parser, int *arg_ebadkey)
-{
- if (parser->state.quoted && parser->state.next < parser->state.quoted)
- /* The next argument pointer has been moved to before the quoted
- region, so pretend we never saw the quoting `--', and start
- looking for options again. If the `--' is still there we'll just
- process it one more time. */
- parser->state.quoted = parser->args_only = 0;
-
- /* Give FIRST_NONOPT & LAST_NONOPT rational values if NEXT has been
- moved back by the user (who may also have changed the arguments). */
- if (parser->last_nonopt > parser->state.next)
- parser->last_nonopt = parser->state.next;
- if (parser->first_nonopt > parser->state.next)
- parser->first_nonopt = parser->state.next;
-
- if (parser->nextchar)
- /* Deal with short options. */
- {
- struct group *group;
- char c;
- const struct argp_option *option;
- char *value = NULL;;
-
- assert(!parser->args_only);
-
- c = *parser->nextchar++;
-
- option = find_short_option(parser, c, &group);
- if (!option)
- {
- if (parser->posixly_correct)
- /* 1003.2 specifies the format of this message. */
- fprintf (parser->state.err_stream,
- dgettext(parser->state.root_argp->argp_domain,
- "%s: illegal option -- %c\n"),
- parser->state.name, c);
- else
- fprintf (parser->state.err_stream,
- dgettext(parser->state.root_argp->argp_domain,
- "%s: invalid option -- %c\n"),
- parser->state.name, c);
-
- *arg_ebadkey = 0;
- return EBADKEY;
- }
-
- if (!*parser->nextchar)
- parser->nextchar = NULL;
-
- if (option->arg)
- {
- value = parser->nextchar;
- parser->nextchar = NULL;
-
- if (!value
- && !(option->flags & OPTION_ARG_OPTIONAL))
- /* We need an mandatory argument. */
- {
- if (parser->state.next == parser->state.argc)
- /* Missing argument */
- {
- /* 1003.2 specifies the format of this message. */
- fprintf (parser->state.err_stream,
- dgettext(parser->state.root_argp->argp_domain,
- "%s: option requires an argument -- %c\n"),
- parser->state.name, c);
-
- *arg_ebadkey = 0;
- return EBADKEY;
- }
- value = parser->state.argv[parser->state.next++];
- }
- }
- return group_parse(group, &parser->state,
- option->key, value);
- }
- else
- /* Advance to the next ARGV-element. */
- {
- if (parser->args_only)
- {
- *arg_ebadkey = 1;
- if (parser->state.next >= parser->state.argc)
- /* We're done. */
- return EBADKEY;
- else
- return parser_parse_arg(parser,
- parser->state.argv[parser->state.next]);
- }
-
- if (parser->state.next >= parser->state.argc)
- /* Almost done. If there are non-options that we skipped
- previously, we should process them now. */
- {
- *arg_ebadkey = 1;
- if (parser->first_nonopt != parser->last_nonopt)
- {
- exchange(parser);
-
- /* Start processing the arguments we skipped previously. */
- parser->state.next = parser->first_nonopt;
-
- parser->first_nonopt = parser->last_nonopt = 0;
-
- parser->args_only = 1;
- return 0;
- }
- else
- /* Indicate that we're really done. */
- return EBADKEY;
- }
- else
- /* Look for options. */
- {
- char *arg = parser->state.argv[parser->state.next];
-
- char *optstart;
- enum arg_type token = classify_arg(parser, arg, &optstart);
-
- switch (token)
- {
- case ARG_ARG:
- switch (parser->ordering)
- {
- case PERMUTE:
- if (parser->first_nonopt == parser->last_nonopt)
- /* Skipped sequence is empty; start a new one. */
- parser->first_nonopt = parser->last_nonopt = parser->state.next;
-
- else if (parser->last_nonopt != parser->state.next)
- /* We have a non-empty skipped sequence, and
- we're not at the end-point, so move it. */
- exchange(parser);
-
- assert(parser->last_nonopt == parser->state.next);
-
- /* Skip this argument for now. */
- parser->state.next++;
- parser->last_nonopt = parser->state.next;
-
- return 0;
-
- case REQUIRE_ORDER:
- /* Implicit quote before the first argument. */
- parser->args_only = 1;
- return 0;
-
- case RETURN_IN_ORDER:
- *arg_ebadkey = 1;
- return parser_parse_arg(parser, arg);
-
- default:
- abort();
- }
- case ARG_QUOTE:
- /* Skip it, then exchange with any previous non-options. */
- parser->state.next++;
- assert (parser->last_nonopt != parser->state.next);
-
- if (parser->first_nonopt != parser->last_nonopt)
- {
- exchange(parser);
-
- /* Start processing the skipped and the quoted
- arguments. */
-
- parser->state.quoted = parser->state.next = parser->first_nonopt;
-
- /* Also empty the skipped-list, to avoid confusion
- if the user resets the next pointer. */
- parser->first_nonopt = parser->last_nonopt = 0;
- }
- else
- parser->state.quoted = parser->state.next;
-
- parser->args_only = 1;
- return 0;
-
- case ARG_LONG_ONLY_OPTION:
- case ARG_LONG_OPTION:
- {
- struct group *group;
- const struct argp_option *option;
- char *value;
-
- parser->state.next++;
- option = find_long_option(parser, optstart, &group);
-
- if (!option)
- {
- /* NOTE: This includes any "=something" in the output. */
- fprintf (parser->state.err_stream,
- dgettext(parser->state.root_argp->argp_domain,
- "%s: unrecognized option `%s'\n"),
- parser->state.name, arg);
- *arg_ebadkey = 0;
- return EBADKEY;
- }
-
- value = strchr(optstart, '=');
- if (value)
- value++;
-
- if (value && !option->arg)
- /* Unexpected argument. */
- {
- if (token == ARG_LONG_OPTION)
- /* --option */
- fprintf (parser->state.err_stream,
- dgettext(parser->state.root_argp->argp_domain,
- "%s: option `--%s' doesn't allow an argument\n"),
- parser->state.name, option->name);
- else
- /* +option or -option */
- fprintf (parser->state.err_stream,
- dgettext(parser->state.root_argp->argp_domain,
- "%s: option `%c%s' doesn't allow an argument\n"),
- parser->state.name, arg[0], option->name);
-
- *arg_ebadkey = 0;
- return EBADKEY;
- }
-
- if (option->arg && !value
- && !(option->flags & OPTION_ARG_OPTIONAL))
- /* We need an mandatory argument. */
- {
- if (parser->state.next == parser->state.argc)
- /* Missing argument */
- {
- if (token == ARG_LONG_OPTION)
- /* --option */
- fprintf (parser->state.err_stream,
- dgettext(parser->state.root_argp->argp_domain,
- "%s: option `--%s' requires an argument\n"),
- parser->state.name, option->name);
- else
- /* +option or -option */
- fprintf (parser->state.err_stream,
- dgettext(parser->state.root_argp->argp_domain,
- "%s: option `%c%s' requires an argument\n"),
- parser->state.name, arg[0], option->name);
-
- *arg_ebadkey = 0;
- return EBADKEY;
- }
-
- value = parser->state.argv[parser->state.next++];
- }
- *arg_ebadkey = 0;
- return group_parse(group, &parser->state,
- option->key, value);
- }
- case ARG_SHORT_OPTION:
- parser->state.next++;
- parser->nextchar = optstart;
- return 0;
-
- default:
- abort();
- }
- }
- }
-}
-
-/* Parse the options strings in ARGC & ARGV according to the argp in ARGP.
- FLAGS is one of the ARGP_ flags above. If END_INDEX is non-NULL, the
- index in ARGV of the first unparsed option is returned in it. If an
- unknown option is present, EINVAL is returned; if some parser routine
- returned a non-zero value, it is returned; otherwise 0 is returned. */
-error_t
-__argp_parse (const struct argp *argp, int argc, char **argv, unsigned flags,
- int *end_index, void *input)
-{
- error_t err;
- struct parser parser;
-
- /* If true, then err == EBADKEY is a result of a non-option argument failing
- to be parsed (which in some cases isn't actually an error). */
- int arg_ebadkey = 0;
-
- if (! (flags & ARGP_NO_HELP))
- /* Add our own options. */
- {
- struct argp_child *child = alloca (4 * sizeof (struct argp_child));
- struct argp *top_argp = alloca (sizeof (struct argp));
-
- /* TOP_ARGP has no options, it just serves to group the user & default
- argps. */
- memset (top_argp, 0, sizeof (*top_argp));
- top_argp->children = child;
-
- memset (child, 0, 4 * sizeof (struct argp_child));
-
- if (argp)
- (child++)->argp = argp;
- (child++)->argp = &argp_default_argp;
- if (argp_program_version || argp_program_version_hook)
- (child++)->argp = &argp_version_argp;
- child->argp = 0;
-
- argp = top_argp;
- }
-
- /* Construct a parser for these arguments. */
- err = parser_init (&parser, argp, argc, argv, flags, input);
-
- if (! err)
- /* Parse! */
- {
- while (! err)
- err = parser_parse_next (&parser, &arg_ebadkey);
- err = parser_finalize (&parser, err, arg_ebadkey, end_index);
- }
-
- return err;
-}
-#ifdef weak_alias
-weak_alias (__argp_parse, argp_parse)
-#endif
-
-/* Return the input field for ARGP in the parser corresponding to STATE; used
- by the help routines. */
-void *
-__argp_input (const struct argp *argp, const struct argp_state *state)
-{
- if (state)
- {
- struct group *group;
- struct parser *parser = state->pstate;
-
- for (group = parser->groups; group < parser->egroup; group++)
- if (group->argp == argp)
- return group->input;
- }
-
- return 0;
-}
-#ifdef weak_alias
-weak_alias (__argp_input, _argp_input)
-#endif
-
-/* Defined here, in case a user is not inlining the definitions in
- * argp.h */
-void
-__argp_usage (__const struct argp_state *__state)
-{
- __argp_state_help (__state, stderr, ARGP_HELP_STD_USAGE);
-}
-
-int
-__option_is_short (__const struct argp_option *__opt)
-{
- if (__opt->flags & OPTION_DOC)
- return 0;
- else
- {
- int __key = __opt->key;
- /* FIXME: whether or not a particular key implies a short option
- * ought not to be locale dependent. */
- return __key > 0 && isprint (__key);
- }
-}
-
-int
-__option_is_end (__const struct argp_option *__opt)
-{
- return !__opt->key && !__opt->name && !__opt->doc && !__opt->group;
-}
diff --git a/argp-standalone/argp-pv.c b/argp-standalone/argp-pv.c
deleted file mode 100644
index d7d374a66bd..00000000000
--- a/argp-standalone/argp-pv.c
+++ /dev/null
@@ -1,25 +0,0 @@
-/* Default definition for ARGP_PROGRAM_VERSION.
- Copyright (C) 1996, 1997, 1999, 2004 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Written by Miles Bader <miles@gnu.ai.mit.edu>.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Library General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
-
- You should have received a copy of the GNU Library General Public
- License along with the GNU C Library; see the file COPYING.LIB. If not,
- write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- Boston, MA 02111-1307, USA. */
-
-/* If set by the user program to a non-zero value, then a default option
- --version is added (unless the ARGP_NO_HELP flag is used), which will
- print this this string followed by a newline and exit (unless the
- ARGP_NO_EXIT flag is used). Overridden by ARGP_PROGRAM_VERSION_HOOK. */
-const char *argp_program_version = 0;
diff --git a/argp-standalone/argp-pvh.c b/argp-standalone/argp-pvh.c
deleted file mode 100644
index 829a1cda80d..00000000000
--- a/argp-standalone/argp-pvh.c
+++ /dev/null
@@ -1,32 +0,0 @@
-/* Default definition for ARGP_PROGRAM_VERSION_HOOK.
- Copyright (C) 1996, 1997, 1999, 2004 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Written by Miles Bader <miles@gnu.ai.mit.edu>.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Library General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
-
- You should have received a copy of the GNU Library General Public
- License along with the GNU C Library; see the file COPYING.LIB. If not,
- write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- Boston, MA 02111-1307, USA. */
-
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include "argp.h"
-
-/* If set by the user program to a non-zero value, then a default option
- --version is added (unless the ARGP_NO_HELP flag is used), which calls
- this function with a stream to print the version to and a pointer to the
- current parsing state, and then exits (unless the ARGP_NO_EXIT flag is
- used). This variable takes precedent over ARGP_PROGRAM_VERSION. */
-void (*argp_program_version_hook) (FILE *stream, struct argp_state *state) = 0;
diff --git a/argp-standalone/argp.h b/argp-standalone/argp.h
deleted file mode 100644
index 29d3dfe9720..00000000000
--- a/argp-standalone/argp.h
+++ /dev/null
@@ -1,602 +0,0 @@
-/* Hierarchial argument parsing.
- Copyright (C) 1995, 96, 97, 98, 99, 2003 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Written by Miles Bader <miles@gnu.ai.mit.edu>.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Library General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
-
- You should have received a copy of the GNU Library General Public
- License along with the GNU C Library; see the file COPYING.LIB. If not,
- write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- Boston, MA 02111-1307, USA. */
-
-#ifndef _ARGP_H
-#define _ARGP_H
-
-#include <stdio.h>
-#include <ctype.h>
-
-#define __need_error_t
-#include <errno.h>
-
-#ifndef __THROW
-# define __THROW
-#endif
-
-#ifndef __const
-# define __const const
-#endif
-
-#ifndef __error_t_defined
-typedef int error_t;
-# define __error_t_defined
-#endif
-
-/* FIXME: What's the right way to check for __restrict? Sun's cc seems
- not to have it. Perhaps it's easiest to just delete the use of
- __restrict from the prototypes. */
-#ifndef __restrict
-# ifndef __GNUC___
-# define __restrict
-# endif
-#endif
-
-/* NOTE: We can't use the autoconf tests, since this is supposed to be
- an installed header file and argp's config.h is of course not
- installed. */
-#ifndef PRINTF_STYLE
-# if __GNUC__ >= 2
-# define PRINTF_STYLE(f, a) __attribute__ ((__format__ (__printf__, f, a)))
-# else
-# define PRINTF_STYLE(f, a)
-# endif
-#endif
-
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* A description of a particular option. A pointer to an array of
- these is passed in the OPTIONS field of an argp structure. Each option
- entry can correspond to one long option and/or one short option; more
- names for the same option can be added by following an entry in an option
- array with options having the OPTION_ALIAS flag set. */
-struct argp_option
-{
- /* The long option name. For more than one name for the same option, you
- can use following options with the OPTION_ALIAS flag set. */
- __const char *name;
-
- /* What key is returned for this option. If > 0 and printable, then it's
- also accepted as a short option. */
- int key;
-
- /* If non-NULL, this is the name of the argument associated with this
- option, which is required unless the OPTION_ARG_OPTIONAL flag is set. */
- __const char *arg;
-
- /* OPTION_ flags. */
- int flags;
-
- /* The doc string for this option. If both NAME and KEY are 0, This string
- will be printed outdented from the normal option column, making it
- useful as a group header (it will be the first thing printed in its
- group); in this usage, it's conventional to end the string with a `:'. */
- __const char *doc;
-
- /* The group this option is in. In a long help message, options are sorted
- alphabetically within each group, and the groups presented in the order
- 0, 1, 2, ..., n, -m, ..., -2, -1. Every entry in an options array with
- if this field 0 will inherit the group number of the previous entry, or
- zero if it's the first one, unless its a group header (NAME and KEY both
- 0), in which case, the previous entry + 1 is the default. Automagic
- options such as --help are put into group -1. */
- int group;
-};
-
-/* The argument associated with this option is optional. */
-#define OPTION_ARG_OPTIONAL 0x1
-
-/* This option isn't displayed in any help messages. */
-#define OPTION_HIDDEN 0x2
-
-/* This option is an alias for the closest previous non-alias option. This
- means that it will be displayed in the same help entry, and will inherit
- fields other than NAME and KEY from the aliased option. */
-#define OPTION_ALIAS 0x4
-
-/* This option isn't actually an option (and so should be ignored by the
- actual option parser), but rather an arbitrary piece of documentation that
- should be displayed in much the same manner as the options. If this flag
- is set, then the option NAME field is displayed unmodified (e.g., no `--'
- prefix is added) at the left-margin (where a *short* option would normally
- be displayed), and the documentation string in the normal place. For
- purposes of sorting, any leading whitespace and puncuation is ignored,
- except that if the first non-whitespace character is not `-', this entry
- is displayed after all options (and OPTION_DOC entries with a leading `-')
- in the same group. */
-#define OPTION_DOC 0x8
-
-/* This option shouldn't be included in `long' usage messages (but is still
- included in help messages). This is mainly intended for options that are
- completely documented in an argp's ARGS_DOC field, in which case including
- the option in the generic usage list would be redundant. For instance,
- if ARGS_DOC is "FOO BAR\n-x BLAH", and the `-x' option's purpose is to
- distinguish these two cases, -x should probably be marked
- OPTION_NO_USAGE. */
-#define OPTION_NO_USAGE 0x10
-
-struct argp; /* fwd declare this type */
-struct argp_state; /* " */
-struct argp_child; /* " */
-
-/* The type of a pointer to an argp parsing function. */
-typedef error_t (*argp_parser_t) (int key, char *arg,
- struct argp_state *state);
-
-/* What to return for unrecognized keys. For special ARGP_KEY_ keys, such
- returns will simply be ignored. For user keys, this error will be turned
- into EINVAL (if the call to argp_parse is such that errors are propagated
- back to the user instead of exiting); returning EINVAL itself would result
- in an immediate stop to parsing in *all* cases. */
-#define ARGP_ERR_UNKNOWN E2BIG /* Hurd should never need E2BIG. XXX */
-
-/* Special values for the KEY argument to an argument parsing function.
- ARGP_ERR_UNKNOWN should be returned if they aren't understood.
-
- The sequence of keys to a parsing function is either (where each
- uppercased word should be prefixed by `ARGP_KEY_' and opt is a user key):
-
- INIT opt... NO_ARGS END SUCCESS -- No non-option arguments at all
- or INIT (opt | ARG)... END SUCCESS -- All non-option args parsed
- or INIT (opt | ARG)... SUCCESS -- Some non-option arg unrecognized
-
- The third case is where every parser returned ARGP_KEY_UNKNOWN for an
- argument, in which case parsing stops at that argument (returning the
- unparsed arguments to the caller of argp_parse if requested, or stopping
- with an error message if not).
-
- If an error occurs (either detected by argp, or because the parsing
- function returned an error value), then the parser is called with
- ARGP_KEY_ERROR, and no further calls are made. */
-
-/* This is not an option at all, but rather a command line argument. If a
- parser receiving this key returns success, the fact is recorded, and the
- ARGP_KEY_NO_ARGS case won't be used. HOWEVER, if while processing the
- argument, a parser function decrements the NEXT field of the state it's
- passed, the option won't be considered processed; this is to allow you to
- actually modify the argument (perhaps into an option), and have it
- processed again. */
-#define ARGP_KEY_ARG 0
-/* There are remaining arguments not parsed by any parser, which may be found
- starting at (STATE->argv + STATE->next). If success is returned, but
- STATE->next left untouched, it's assumed that all arguments were consume,
- otherwise, the parser should adjust STATE->next to reflect any arguments
- consumed. */
-#define ARGP_KEY_ARGS 0x1000006
-/* There are no more command line arguments at all. */
-#define ARGP_KEY_END 0x1000001
-/* Because it's common to want to do some special processing if there aren't
- any non-option args, user parsers are called with this key if they didn't
- successfully process any non-option arguments. Called just before
- ARGP_KEY_END (where more general validity checks on previously parsed
- arguments can take place). */
-#define ARGP_KEY_NO_ARGS 0x1000002
-/* Passed in before any parsing is done. Afterwards, the values of each
- element of the CHILD_INPUT field, if any, in the state structure is
- copied to each child's state to be the initial value of the INPUT field. */
-#define ARGP_KEY_INIT 0x1000003
-/* Use after all other keys, including SUCCESS & END. */
-#define ARGP_KEY_FINI 0x1000007
-/* Passed in when parsing has successfully been completed (even if there are
- still arguments remaining). */
-#define ARGP_KEY_SUCCESS 0x1000004
-/* Passed in if an error occurs. */
-#define ARGP_KEY_ERROR 0x1000005
-
-/* An argp structure contains a set of options declarations, a function to
- deal with parsing one, documentation string, a possible vector of child
- argp's, and perhaps a function to filter help output. When actually
- parsing options, getopt is called with the union of all the argp
- structures chained together through their CHILD pointers, with conflicts
- being resolved in favor of the first occurrence in the chain. */
-struct argp
-{
- /* An array of argp_option structures, terminated by an entry with both
- NAME and KEY having a value of 0. */
- __const struct argp_option *options;
-
- /* What to do with an option from this structure. KEY is the key
- associated with the option, and ARG is any associated argument (NULL if
- none was supplied). If KEY isn't understood, ARGP_ERR_UNKNOWN should be
- returned. If a non-zero, non-ARGP_ERR_UNKNOWN value is returned, then
- parsing is stopped immediately, and that value is returned from
- argp_parse(). For special (non-user-supplied) values of KEY, see the
- ARGP_KEY_ definitions below. */
- argp_parser_t parser;
-
- /* A string describing what other arguments are wanted by this program. It
- is only used by argp_usage to print the `Usage:' message. If it
- contains newlines, the strings separated by them are considered
- alternative usage patterns, and printed on separate lines (lines after
- the first are prefix by ` or: ' instead of `Usage:'). */
- __const char *args_doc;
-
- /* If non-NULL, a string containing extra text to be printed before and
- after the options in a long help message (separated by a vertical tab
- `\v' character). */
- __const char *doc;
-
- /* A vector of argp_children structures, terminated by a member with a 0
- argp field, pointing to child argps should be parsed with this one. Any
- conflicts are resolved in favor of this argp, or early argps in the
- CHILDREN list. This field is useful if you use libraries that supply
- their own argp structure, which you want to use in conjunction with your
- own. */
- __const struct argp_child *children;
-
- /* If non-zero, this should be a function to filter the output of help
- messages. KEY is either a key from an option, in which case TEXT is
- that option's help text, or a special key from the ARGP_KEY_HELP_
- defines, below, describing which other help text TEXT is. The function
- should return either TEXT, if it should be used as-is, a replacement
- string, which should be malloced, and will be freed by argp, or NULL,
- meaning `print nothing'. The value for TEXT is *after* any translation
- has been done, so if any of the replacement text also needs translation,
- that should be done by the filter function. INPUT is either the input
- supplied to argp_parse, or NULL, if argp_help was called directly. */
- char *(*help_filter) (int __key, __const char *__text, void *__input);
-
- /* If non-zero the strings used in the argp library are translated using
- the domain described by this string. Otherwise the currently installed
- default domain is used. */
- const char *argp_domain;
-};
-
-/* Possible KEY arguments to a help filter function. */
-#define ARGP_KEY_HELP_PRE_DOC 0x2000001 /* Help text preceeding options. */
-#define ARGP_KEY_HELP_POST_DOC 0x2000002 /* Help text following options. */
-#define ARGP_KEY_HELP_HEADER 0x2000003 /* Option header string. */
-#define ARGP_KEY_HELP_EXTRA 0x2000004 /* After all other documentation;
- TEXT is NULL for this key. */
-/* Explanatory note emitted when duplicate option arguments have been
- suppressed. */
-#define ARGP_KEY_HELP_DUP_ARGS_NOTE 0x2000005
-#define ARGP_KEY_HELP_ARGS_DOC 0x2000006 /* Argument doc string. */
-
-/* When an argp has a non-zero CHILDREN field, it should point to a vector of
- argp_child structures, each of which describes a subsidiary argp. */
-struct argp_child
-{
- /* The child parser. */
- __const struct argp *argp;
-
- /* Flags for this child. */
- int flags;
-
- /* If non-zero, an optional header to be printed in help output before the
- child options. As a side-effect, a non-zero value forces the child
- options to be grouped together; to achieve this effect without actually
- printing a header string, use a value of "". */
- __const char *header;
-
- /* Where to group the child options relative to the other (`consolidated')
- options in the parent argp; the values are the same as the GROUP field
- in argp_option structs, but all child-groupings follow parent options at
- a particular group level. If both this field and HEADER are zero, then
- they aren't grouped at all, but rather merged with the parent options
- (merging the child's grouping levels with the parents). */
- int group;
-};
-
-/* Parsing state. This is provided to parsing functions called by argp,
- which may examine and, as noted, modify fields. */
-struct argp_state
-{
- /* The top level ARGP being parsed. */
- __const struct argp *root_argp;
-
- /* The argument vector being parsed. May be modified. */
- int argc;
- char **argv;
-
- /* The index in ARGV of the next arg that to be parsed. May be modified. */
- int next;
-
- /* The flags supplied to argp_parse. May be modified. */
- unsigned flags;
-
- /* While calling a parsing function with a key of ARGP_KEY_ARG, this is the
- number of the current arg, starting at zero, and incremented after each
- such call returns. At all other times, this is the number of such
- arguments that have been processed. */
- unsigned arg_num;
-
- /* If non-zero, the index in ARGV of the first argument following a special
- `--' argument (which prevents anything following being interpreted as an
- option). Only set once argument parsing has proceeded past this point. */
- int quoted;
-
- /* An arbitrary pointer passed in from the user. */
- void *input;
- /* Values to pass to child parsers. This vector will be the same length as
- the number of children for the current parser. */
- void **child_inputs;
-
- /* For the parser's use. Initialized to 0. */
- void *hook;
-
- /* The name used when printing messages. This is initialized to ARGV[0],
- or PROGRAM_INVOCATION_NAME if that is unavailable. */
- char *name;
-
- /* Streams used when argp prints something. */
- FILE *err_stream; /* For errors; initialized to stderr. */
- FILE *out_stream; /* For information; initialized to stdout. */
-
- void *pstate; /* Private, for use by argp. */
-};
-
-/* Flags for argp_parse (note that the defaults are those that are
- convenient for program command line parsing): */
-
-/* Don't ignore the first element of ARGV. Normally (and always unless
- ARGP_NO_ERRS is set) the first element of the argument vector is
- skipped for option parsing purposes, as it corresponds to the program name
- in a command line. */
-#define ARGP_PARSE_ARGV0 0x01
-
-/* Don't print error messages for unknown options to stderr; unless this flag
- is set, ARGP_PARSE_ARGV0 is ignored, as ARGV[0] is used as the program
- name in the error messages. This flag implies ARGP_NO_EXIT (on the
- assumption that silent exiting upon errors is bad behaviour). */
-#define ARGP_NO_ERRS 0x02
-
-/* Don't parse any non-option args. Normally non-option args are parsed by
- calling the parse functions with a key of ARGP_KEY_ARG, and the actual arg
- as the value. Since it's impossible to know which parse function wants to
- handle it, each one is called in turn, until one returns 0 or an error
- other than ARGP_ERR_UNKNOWN; if an argument is handled by no one, the
- argp_parse returns prematurely (but with a return value of 0). If all
- args have been parsed without error, all parsing functions are called one
- last time with a key of ARGP_KEY_END. This flag needn't normally be set,
- as the normal behavior is to stop parsing as soon as some argument can't
- be handled. */
-#define ARGP_NO_ARGS 0x04
-
-/* Parse options and arguments in the same order they occur on the command
- line -- normally they're rearranged so that all options come first. */
-#define ARGP_IN_ORDER 0x08
-
-/* Don't provide the standard long option --help, which causes usage and
- option help information to be output to stdout, and exit (0) called. */
-#define ARGP_NO_HELP 0x10
-
-/* Don't exit on errors (they may still result in error messages). */
-#define ARGP_NO_EXIT 0x20
-
-/* Use the gnu getopt `long-only' rules for parsing arguments. */
-#define ARGP_LONG_ONLY 0x40
-
-/* Turns off any message-printing/exiting options. */
-#define ARGP_SILENT (ARGP_NO_EXIT | ARGP_NO_ERRS | ARGP_NO_HELP)
-
-/* Parse the options strings in ARGC & ARGV according to the options in ARGP.
- FLAGS is one of the ARGP_ flags above. If ARG_INDEX is non-NULL, the
- index in ARGV of the first unparsed option is returned in it. If an
- unknown option is present, ARGP_ERR_UNKNOWN is returned; if some parser
- routine returned a non-zero value, it is returned; otherwise 0 is
- returned. This function may also call exit unless the ARGP_NO_HELP flag
- is set. INPUT is a pointer to a value to be passed in to the parser. */
-extern error_t argp_parse (__const struct argp *__restrict argp,
- int argc, char **__restrict argv,
- unsigned flags, int *__restrict arg_index,
- void *__restrict input) __THROW;
-extern error_t __argp_parse (__const struct argp *__restrict argp,
- int argc, char **__restrict argv,
- unsigned flags, int *__restrict arg_index,
- void *__restrict input) __THROW;
-
-/* Global variables. */
-
-/* If defined or set by the user program to a non-zero value, then a default
- option --version is added (unless the ARGP_NO_HELP flag is used), which
- will print this string followed by a newline and exit (unless the
- ARGP_NO_EXIT flag is used). Overridden by ARGP_PROGRAM_VERSION_HOOK. */
-extern __const char *argp_program_version;
-
-/* If defined or set by the user program to a non-zero value, then a default
- option --version is added (unless the ARGP_NO_HELP flag is used), which
- calls this function with a stream to print the version to and a pointer to
- the current parsing state, and then exits (unless the ARGP_NO_EXIT flag is
- used). This variable takes precedent over ARGP_PROGRAM_VERSION. */
-extern void (*argp_program_version_hook) (FILE *__restrict __stream,
- struct argp_state *__restrict
- __state);
-
-/* If defined or set by the user program, it should point to string that is
- the bug-reporting address for the program. It will be printed by
- argp_help if the ARGP_HELP_BUG_ADDR flag is set (as it is by various
- standard help messages), embedded in a sentence that says something like
- `Report bugs to ADDR.'. */
-extern __const char *argp_program_bug_address;
-
-/* The exit status that argp will use when exiting due to a parsing error.
- If not defined or set by the user program, this defaults to EX_USAGE from
- <sysexits.h>. */
-extern error_t argp_err_exit_status;
-
-/* Flags for argp_help. */
-#define ARGP_HELP_USAGE 0x01 /* a Usage: message. */
-#define ARGP_HELP_SHORT_USAGE 0x02 /* " but don't actually print options. */
-#define ARGP_HELP_SEE 0x04 /* a `Try ... for more help' message. */
-#define ARGP_HELP_LONG 0x08 /* a long help message. */
-#define ARGP_HELP_PRE_DOC 0x10 /* doc string preceding long help. */
-#define ARGP_HELP_POST_DOC 0x20 /* doc string following long help. */
-#define ARGP_HELP_DOC (ARGP_HELP_PRE_DOC | ARGP_HELP_POST_DOC)
-#define ARGP_HELP_BUG_ADDR 0x40 /* bug report address */
-#define ARGP_HELP_LONG_ONLY 0x80 /* modify output appropriately to
- reflect ARGP_LONG_ONLY mode. */
-
-/* These ARGP_HELP flags are only understood by argp_state_help. */
-#define ARGP_HELP_EXIT_ERR 0x100 /* Call exit(1) instead of returning. */
-#define ARGP_HELP_EXIT_OK 0x200 /* Call exit(0) instead of returning. */
-
-/* The standard thing to do after a program command line parsing error, if an
- error message has already been printed. */
-#define ARGP_HELP_STD_ERR \
- (ARGP_HELP_SEE | ARGP_HELP_EXIT_ERR)
-/* The standard thing to do after a program command line parsing error, if no
- more specific error message has been printed. */
-#define ARGP_HELP_STD_USAGE \
- (ARGP_HELP_SHORT_USAGE | ARGP_HELP_SEE | ARGP_HELP_EXIT_ERR)
-/* The standard thing to do in response to a --help option. */
-#define ARGP_HELP_STD_HELP \
- (ARGP_HELP_SHORT_USAGE | ARGP_HELP_LONG | ARGP_HELP_EXIT_OK \
- | ARGP_HELP_DOC | ARGP_HELP_BUG_ADDR)
-
-/* Output a usage message for ARGP to STREAM. FLAGS are from the set
- ARGP_HELP_*. */
-extern void argp_help (__const struct argp *__restrict __argp,
- FILE *__restrict __stream,
- unsigned __flags, char *__restrict __name) __THROW;
-extern void __argp_help (__const struct argp *__restrict __argp,
- FILE *__restrict __stream, unsigned __flags,
- char *__name) __THROW;
-
-/* The following routines are intended to be called from within an argp
- parsing routine (thus taking an argp_state structure as the first
- argument). They may or may not print an error message and exit, depending
- on the flags in STATE -- in any case, the caller should be prepared for
- them *not* to exit, and should return an appropiate error after calling
- them. [argp_usage & argp_error should probably be called argp_state_...,
- but they're used often enough that they should be short] */
-
-/* Output, if appropriate, a usage message for STATE to STREAM. FLAGS are
- from the set ARGP_HELP_*. */
-extern void argp_state_help (__const struct argp_state *__restrict __state,
- FILE *__restrict __stream,
- unsigned int __flags) __THROW;
-extern void __argp_state_help (__const struct argp_state *__restrict __state,
- FILE *__restrict __stream,
- unsigned int __flags) __THROW;
-
-/* Possibly output the standard usage message for ARGP to stderr and exit. */
-extern void argp_usage (__const struct argp_state *__state) __THROW;
-extern void __argp_usage (__const struct argp_state *__state) __THROW;
-
-/* If appropriate, print the printf string FMT and following args, preceded
- by the program name and `:', to stderr, and followed by a `Try ... --help'
- message, then exit (1). */
-extern void argp_error (__const struct argp_state *__restrict __state,
- __const char *__restrict __fmt, ...) __THROW
- PRINTF_STYLE(2,3);
-extern void __argp_error (__const struct argp_state *__restrict __state,
- __const char *__restrict __fmt, ...) __THROW
- PRINTF_STYLE(2,3);
-
-/* Similar to the standard gnu error-reporting function error(), but will
- respect the ARGP_NO_EXIT and ARGP_NO_ERRS flags in STATE, and will print
- to STATE->err_stream. This is useful for argument parsing code that is
- shared between program startup (when exiting is desired) and runtime
- option parsing (when typically an error code is returned instead). The
- difference between this function and argp_error is that the latter is for
- *parsing errors*, and the former is for other problems that occur during
- parsing but don't reflect a (syntactic) problem with the input. */
-extern void argp_failure (__const struct argp_state *__restrict __state,
- int __status, int __errnum,
- __const char *__restrict __fmt, ...) __THROW
- PRINTF_STYLE(4,5);
-extern void __argp_failure (__const struct argp_state *__restrict __state,
- int __status, int __errnum,
- __const char *__restrict __fmt, ...) __THROW
- PRINTF_STYLE(4,5);
-
-/* Returns true if the option OPT is a valid short option. */
-extern int _option_is_short (__const struct argp_option *__opt) __THROW;
-extern int __option_is_short (__const struct argp_option *__opt) __THROW;
-
-/* Returns true if the option OPT is in fact the last (unused) entry in an
- options array. */
-extern int _option_is_end (__const struct argp_option *__opt) __THROW;
-extern int __option_is_end (__const struct argp_option *__opt) __THROW;
-
-/* Return the input field for ARGP in the parser corresponding to STATE; used
- by the help routines. */
-extern void *_argp_input (__const struct argp *__restrict __argp,
- __const struct argp_state *__restrict __state)
- __THROW;
-extern void *__argp_input (__const struct argp *__restrict __argp,
- __const struct argp_state *__restrict __state)
- __THROW;
-
-/* Used for extracting the program name from argv[0] */
-extern char *_argp_basename(char *name) __THROW;
-extern char *__argp_basename(char *name) __THROW;
-
-/* Getting the program name given an argp state */
-extern char *
-_argp_short_program_name(const struct argp_state *state) __THROW;
-extern char *
-__argp_short_program_name(const struct argp_state *state) __THROW;
-
-
-#ifdef __USE_EXTERN_INLINES
-
-# if !_LIBC
-# define __argp_usage argp_usage
-# define __argp_state_help argp_state_help
-# define __option_is_short _option_is_short
-# define __option_is_end _option_is_end
-# endif
-
-# ifndef ARGP_EI
-# define ARGP_EI extern __inline__
-# endif
-
-ARGP_EI void
-__argp_usage (__const struct argp_state *__state)
-{
- __argp_state_help (__state, stderr, ARGP_HELP_STD_USAGE);
-}
-
-ARGP_EI int
-__option_is_short (__const struct argp_option *__opt)
-{
- if (__opt->flags & OPTION_DOC)
- return 0;
- else
- {
- int __key = __opt->key;
- return __key > 0 && isprint (__key);
- }
-}
-
-ARGP_EI int
-__option_is_end (__const struct argp_option *__opt)
-{
- return !__opt->key && !__opt->name && !__opt->doc && !__opt->group;
-}
-
-# if !_LIBC
-# undef __argp_usage
-# undef __argp_state_help
-# undef __option_is_short
-# undef __option_is_end
-# endif
-#endif /* Use extern inlines. */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* argp.h */
diff --git a/argp-standalone/autogen.sh b/argp-standalone/autogen.sh
deleted file mode 100755
index 8337353b5ae..00000000000
--- a/argp-standalone/autogen.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/bin/sh
-
-aclocal -I .
-autoheader
-autoconf
-automake --add-missing --copy --foreign
diff --git a/argp-standalone/configure.ac b/argp-standalone/configure.ac
deleted file mode 100644
index 65ebc451815..00000000000
--- a/argp-standalone/configure.ac
+++ /dev/null
@@ -1,102 +0,0 @@
-dnl Process this file with autoconf to produce a configure script.
-
-dnl This configure.ac is only for building a standalone argp library.
-AC_INIT([argp], [standalone-1.3])
-AC_PREREQ(2.54)
-AC_CONFIG_SRCDIR([argp-ba.c])
-# Needed to stop autoconf from looking for files in parent directories.
-AC_CONFIG_AUX_DIR([.])
-
-AM_INIT_AUTOMAKE
-AM_CONFIG_HEADER(config.h)
-
-m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES(yes)])
-
-# GNU libc defaults to supplying the ISO C library functions only. The
-# _GNU_SOURCE define enables these extensions, in particular we want
-# errno.h to declare program_invocation_name. Enable it on all
-# systems; no problems have been reported with it so far.
-AC_GNU_SOURCE
-
-# Checks for programs.
-AC_PROG_CC
-AC_PROG_MAKE_SET
-AC_PROG_RANLIB
-AM_PROG_CC_STDC
-
-if test "x$am_cv_prog_cc_stdc" = xno ; then
- AC_ERROR([the C compiler doesn't handle ANSI-C])
-fi
-
-# Checks for libraries.
-
-# Checks for header files.
-AC_HEADER_STDC
-AC_CHECK_HEADERS(limits.h malloc.h unistd.h sysexits.h stdarg.h)
-
-# Checks for typedefs, structures, and compiler characteristics.
-AC_C_CONST
-AC_C_INLINE
-AC_TYPE_SIZE_T
-
-LSH_GCC_ATTRIBUTES
-
-# Checks for library functions.
-AC_FUNC_ALLOCA
-AC_FUNC_VPRINTF
-AC_CHECK_FUNCS(strerror sleep getpid snprintf)
-
-AC_REPLACE_FUNCS(mempcpy strndup strchrnul strcasecmp vsnprintf)
-
-dnl ARGP_CHECK_FUNC(includes, function-call [, if-found [, if-not-found]])
-AC_DEFUN([ARGP_CHECK_FUNC],
- [AS_VAR_PUSHDEF([ac_func], m4_substr([$2], 0, m4_index([$2], [(])))
- AS_VAR_PUSHDEF([ac_var], [ac_cv_func_call_]ac_func)
- AH_TEMPLATE(AS_TR_CPP(HAVE_[]ac_func),
- [Define to 1 if you have the `]ac_func[' function.])
- AC_CACHE_CHECK([for $2], ac_var,
- [AC_TRY_LINK([$1], [$2],
- [AS_VAR_SET(ac_var, yes)],
- [AS_VAR_SET(ac_var, no)])])
- if test AS_VAR_GET(ac_var) = yes ; then
- ifelse([$3],,
- [AC_DEFINE_UNQUOTED(AS_TR_CPP(HAVE_[]ac_func))],
- [$3
-])
- else
- ifelse([$4],, true, [$4])
- fi
- AS_VAR_POPDEF([ac_var])
- AS_VAR_POPDEF([ac_func])
- ])
-
-# At least on freebsd, putc_unlocked is a macro, so the standard
-# AC_CHECK_FUNCS doesn't work well.
-ARGP_CHECK_FUNC([#include <stdio.h>], [putc_unlocked('x', stdout)])
-
-AC_CHECK_FUNCS(flockfile)
-AC_CHECK_FUNCS(fputs_unlocked fwrite_unlocked)
-
-# Used only by argp-test.c, so don't use AC_REPLACE_FUNCS.
-AC_CHECK_FUNCS(strdup asprintf)
-
-AC_CHECK_DECLS([program_invocation_name, program_invocation_short_name],
- [], [], [[#include <errno.h>]])
-
-# Set these flags *last*, or else the test programs won't compile
-if test x$GCC = xyes ; then
- # Using -ggdb3 makes (some versions of) Redhat's gcc-2.96 dump core
- if "$CC" --version | grep '^2\.96$' 1>/dev/null 2>&1; then
- true
- else
- CFLAGS="$CFLAGS -ggdb3"
- fi
- CFLAGS="$CFLAGS -Wall -W \
- -Wmissing-prototypes -Wmissing-declarations -Wstrict-prototypes \
- -Waggregate-return \
- -Wpointer-arith -Wbad-function-cast -Wnested-externs"
-fi
-
-CPPFLAGS="$CPPFLAGS -I$srcdir"
-
-AC_OUTPUT(Makefile)
diff --git a/argp-standalone/mempcpy.c b/argp-standalone/mempcpy.c
deleted file mode 100644
index 21d8bd2ed94..00000000000
--- a/argp-standalone/mempcpy.c
+++ /dev/null
@@ -1,21 +0,0 @@
-/* strndup.c
- *
- */
-
-/* Written by Niels Möller <nisse@lysator.liu.se>
- *
- * This file is hereby placed in the public domain.
- */
-
-#include <string.h>
-
-void *
-mempcpy (void *, const void *, size_t) ;
-
-void *
-mempcpy (void *to, const void *from, size_t size)
-{
- memcpy(to, from, size);
- return (char *) to + size;
-}
-
diff --git a/argp-standalone/strcasecmp.c b/argp-standalone/strcasecmp.c
deleted file mode 100644
index 9c1637232fd..00000000000
--- a/argp-standalone/strcasecmp.c
+++ /dev/null
@@ -1,29 +0,0 @@
-/* strcasecmp.c
- *
- */
-
-/* Written by Niels Möller <nisse@lysator.liu.se>
- *
- * This file is hereby placed in the public domain.
- */
-
-#include <ctype.h>
-int strcasecmp(const char *, const char *);
-
-int strcasecmp(const char *s1, const char *s2)
-{
- unsigned i;
-
- for (i = 0; s1[i] && s2[i]; i++)
- {
- unsigned char c1 = tolower( (unsigned char) s1[i]);
- unsigned char c2 = tolower( (unsigned char) s2[i]);
-
- if (c1 < c2)
- return -1;
- else if (c1 > c2)
- return 1;
- }
-
- return !s2[i] - !s1[i];
-}
diff --git a/argp-standalone/strchrnul.c b/argp-standalone/strchrnul.c
deleted file mode 100644
index ee4145e4eda..00000000000
--- a/argp-standalone/strchrnul.c
+++ /dev/null
@@ -1,23 +0,0 @@
-/* strchrnul.c
- *
- */
-
-/* Written by Niels Möller <nisse@lysator.liu.se>
- *
- * This file is hereby placed in the public domain.
- */
-
-/* FIXME: What is this function supposed to do? My guess is that it is
- * like strchr, but returns a pointer to the NUL character, not a NULL
- * pointer, if the character isn't found. */
-
-char *strchrnul(const char *, int );
-
-char *strchrnul(const char *s, int c)
-{
- const char *p = s;
- while (*p && (*p != c))
- p++;
-
- return (char *) p;
-}
diff --git a/argp-standalone/strndup.c b/argp-standalone/strndup.c
deleted file mode 100644
index 4147b7a2051..00000000000
--- a/argp-standalone/strndup.c
+++ /dev/null
@@ -1,34 +0,0 @@
-/* strndup.c
- *
- */
-
-/* Written by Niels Möller <nisse@lysator.liu.se>
- *
- * This file is hereby placed in the public domain.
- */
-
-#include <stdlib.h>
-#include <string.h>
-
-char *
-strndup (const char *, size_t);
-
-char *
-strndup (const char *s, size_t size)
-{
- char *r;
- char *end = memchr(s, 0, size);
-
- if (end)
- /* Length + 1 */
- size = end - s + 1;
-
- r = malloc(size);
-
- if (size)
- {
- memcpy(r, s, size-1);
- r[size-1] = '\0';
- }
- return r;
-}
diff --git a/argp-standalone/vsnprintf.c b/argp-standalone/vsnprintf.c
deleted file mode 100644
index 33c9a5d0042..00000000000
--- a/argp-standalone/vsnprintf.c
+++ /dev/null
@@ -1,839 +0,0 @@
-/* Copied from http://www.fiction.net/blong/programs/snprintf.c */
-
-/*
- * Copyright Patrick Powell 1995
- * This code is based on code written by Patrick Powell (papowell@astart.com)
- * It may be used for any purpose as long as this notice remains intact
- * on all source code distributions
- */
-
-/**************************************************************
- * Original:
- * Patrick Powell Tue Apr 11 09:48:21 PDT 1995
- * A bombproof version of doprnt (dopr) included.
- * Sigh. This sort of thing is always nasty do deal with. Note that
- * the version here does not include floating point...
- *
- * snprintf() is used instead of sprintf() as it does limit checks
- * for string length. This covers a nasty loophole.
- *
- * The other functions are there to prevent NULL pointers from
- * causing nast effects.
- *
- * More Recently:
- * Brandon Long <blong@fiction.net> 9/15/96 for mutt 0.43
- * This was ugly. It is still ugly. I opted out of floating point
- * numbers, but the formatter understands just about everything
- * from the normal C string format, at least as far as I can tell from
- * the Solaris 2.5 printf(3S) man page.
- *
- * Brandon Long <blong@fiction.net> 10/22/97 for mutt 0.87.1
- * Ok, added some minimal floating point support, which means this
- * probably requires libm on most operating systems. Don't yet
- * support the exponent (e,E) and sigfig (g,G). Also, fmtint()
- * was pretty badly broken, it just wasn't being exercised in ways
- * which showed it, so that's been fixed. Also, formated the code
- * to mutt conventions, and removed dead code left over from the
- * original. Also, there is now a builtin-test, just compile with:
- * gcc -DTEST_SNPRINTF -o snprintf snprintf.c -lm
- * and run snprintf for results.
- *
- * Thomas Roessler <roessler@guug.de> 01/27/98 for mutt 0.89i
- * The PGP code was using unsigned hexadecimal formats.
- * Unfortunately, unsigned formats simply didn't work.
- *
- * Michael Elkins <me@cs.hmc.edu> 03/05/98 for mutt 0.90.8
- * The original code assumed that both snprintf() and vsnprintf() were
- * missing. Some systems only have snprintf() but not vsnprintf(), so
- * the code is now broken down under HAVE_SNPRINTF and HAVE_VSNPRINTF.
- *
- * Andrew Tridgell (tridge@samba.org) Oct 1998
- * fixed handling of %.0f
- * added test for HAVE_LONG_DOUBLE
- *
- * Russ Allbery <rra@stanford.edu> 2000-08-26
- * fixed return value to comply with C99
- * fixed handling of snprintf(NULL, ...)
- *
- * Niels Möller <nisse@lysator.liu.se> 2004-03-05
- * fixed calls to isdigit to use unsigned char.
- * fixed calls to va_arg; short arguments are always passed as int.
- *
- **************************************************************/
-
-#if HAVE_CONFIG_H
-# include "config.h"
-#endif
-
-#if !defined(HAVE_SNPRINTF) || !defined(HAVE_VSNPRINTF)
-
-#include <string.h>
-#include <ctype.h>
-#include <sys/types.h>
-
-/* Define this as a fall through, HAVE_STDARG_H is probably already set */
-
-#define HAVE_VARARGS_H
-
-
-/* varargs declarations: */
-
-#if defined(HAVE_STDARG_H)
-# include <stdarg.h>
-# define HAVE_STDARGS /* let's hope that works everywhere (mj) */
-# define VA_LOCAL_DECL va_list ap
-# define VA_START(f) va_start(ap, f)
-# define VA_SHIFT(v,t) ; /* no-op for ANSI */
-# define VA_END va_end(ap)
-#else
-# if defined(HAVE_VARARGS_H)
-# include <varargs.h>
-# undef HAVE_STDARGS
-# define VA_LOCAL_DECL va_list ap
-# define VA_START(f) va_start(ap) /* f is ignored! */
-# define VA_SHIFT(v,t) v = va_arg(ap,t)
-# define VA_END va_end(ap)
-# else
-/*XX ** NO VARARGS ** XX*/
-# endif
-#endif
-
-#ifdef HAVE_LONG_DOUBLE
-#define LDOUBLE long double
-#else
-#define LDOUBLE double
-#endif
-
-int snprintf (char *str, size_t count, const char *fmt, ...);
-int vsnprintf (char *str, size_t count, const char *fmt, va_list arg);
-
-static int dopr (char *buffer, size_t maxlen, const char *format,
- va_list args);
-static int fmtstr (char *buffer, size_t *currlen, size_t maxlen,
- char *value, int flags, int min, int max);
-static int fmtint (char *buffer, size_t *currlen, size_t maxlen,
- long value, int base, int min, int max, int flags);
-static int fmtfp (char *buffer, size_t *currlen, size_t maxlen,
- LDOUBLE fvalue, int min, int max, int flags);
-static int dopr_outch (char *buffer, size_t *currlen, size_t maxlen, char c );
-
-/*
- * dopr(): poor man's version of doprintf
- */
-
-/* format read states */
-#define DP_S_DEFAULT 0
-#define DP_S_FLAGS 1
-#define DP_S_MIN 2
-#define DP_S_DOT 3
-#define DP_S_MAX 4
-#define DP_S_MOD 5
-#define DP_S_CONV 6
-#define DP_S_DONE 7
-
-/* format flags - Bits */
-#define DP_F_MINUS (1 << 0)
-#define DP_F_PLUS (1 << 1)
-#define DP_F_SPACE (1 << 2)
-#define DP_F_NUM (1 << 3)
-#define DP_F_ZERO (1 << 4)
-#define DP_F_UP (1 << 5)
-#define DP_F_UNSIGNED (1 << 6)
-
-/* Conversion Flags */
-#define DP_C_SHORT 1
-#define DP_C_LONG 2
-#define DP_C_LDOUBLE 3
-
-#define char_to_int(p) (p - '0')
-#define MAX(p,q) ((p >= q) ? p : q)
-#define MIN(p,q) ((p <= q) ? p : q)
-
-static int dopr (char *buffer, size_t maxlen, const char *format, va_list args)
-{
- unsigned char ch;
- long value;
- LDOUBLE fvalue;
- char *strvalue;
- int min;
- int max;
- int state;
- int flags;
- int cflags;
- int total;
- size_t currlen;
-
- state = DP_S_DEFAULT;
- currlen = flags = cflags = min = 0;
- max = -1;
- ch = *format++;
- total = 0;
-
- while (state != DP_S_DONE)
- {
- if (ch == '\0')
- state = DP_S_DONE;
-
- switch(state)
- {
- case DP_S_DEFAULT:
- if (ch == '%')
- state = DP_S_FLAGS;
- else
- total += dopr_outch (buffer, &currlen, maxlen, ch);
- ch = *format++;
- break;
- case DP_S_FLAGS:
- switch (ch)
- {
- case '-':
- flags |= DP_F_MINUS;
- ch = *format++;
- break;
- case '+':
- flags |= DP_F_PLUS;
- ch = *format++;
- break;
- case ' ':
- flags |= DP_F_SPACE;
- ch = *format++;
- break;
- case '#':
- flags |= DP_F_NUM;
- ch = *format++;
- break;
- case '0':
- flags |= DP_F_ZERO;
- ch = *format++;
- break;
- default:
- state = DP_S_MIN;
- break;
- }
- break;
- case DP_S_MIN:
- if (isdigit(ch))
- {
- min = 10*min + char_to_int (ch);
- ch = *format++;
- }
- else if (ch == '*')
- {
- min = va_arg (args, int);
- ch = *format++;
- state = DP_S_DOT;
- }
- else
- state = DP_S_DOT;
- break;
- case DP_S_DOT:
- if (ch == '.')
- {
- state = DP_S_MAX;
- ch = *format++;
- }
- else
- state = DP_S_MOD;
- break;
- case DP_S_MAX:
- if (isdigit(ch))
- {
- if (max < 0)
- max = 0;
- max = 10*max + char_to_int (ch);
- ch = *format++;
- }
- else if (ch == '*')
- {
- max = va_arg (args, int);
- ch = *format++;
- state = DP_S_MOD;
- }
- else
- state = DP_S_MOD;
- break;
- case DP_S_MOD:
- /* Currently, we don't support Long Long, bummer */
- switch (ch)
- {
- case 'h':
- cflags = DP_C_SHORT;
- ch = *format++;
- break;
- case 'l':
- cflags = DP_C_LONG;
- ch = *format++;
- break;
- case 'L':
- cflags = DP_C_LDOUBLE;
- ch = *format++;
- break;
- default:
- break;
- }
- state = DP_S_CONV;
- break;
- case DP_S_CONV:
- switch (ch)
- {
- case 'd':
- case 'i':
- if (cflags == DP_C_SHORT)
- value = (short) va_arg (args, int);
- else if (cflags == DP_C_LONG)
- value = va_arg (args, long int);
- else
- value = va_arg (args, int);
- total += fmtint (buffer, &currlen, maxlen, value, 10, min, max, flags);
- break;
- case 'o':
- flags |= DP_F_UNSIGNED;
- if (cflags == DP_C_SHORT)
- value = (unsigned short) va_arg (args, unsigned);
- else if (cflags == DP_C_LONG)
- value = va_arg (args, unsigned long int);
- else
- value = va_arg (args, unsigned int);
- total += fmtint (buffer, &currlen, maxlen, value, 8, min, max, flags);
- break;
- case 'u':
- flags |= DP_F_UNSIGNED;
- if (cflags == DP_C_SHORT)
- value = (unsigned short) va_arg (args, unsigned);
- else if (cflags == DP_C_LONG)
- value = va_arg (args, unsigned long int);
- else
- value = va_arg (args, unsigned int);
- total += fmtint (buffer, &currlen, maxlen, value, 10, min, max, flags);
- break;
- case 'X':
- flags |= DP_F_UP;
- case 'x':
- flags |= DP_F_UNSIGNED;
- if (cflags == DP_C_SHORT)
- value = (unsigned short) va_arg (args, unsigned);
- else if (cflags == DP_C_LONG)
- value = va_arg (args, unsigned long int);
- else
- value = va_arg (args, unsigned int);
- total += fmtint (buffer, &currlen, maxlen, value, 16, min, max, flags);
- break;
- case 'f':
- if (cflags == DP_C_LDOUBLE)
- fvalue = va_arg (args, LDOUBLE);
- else
- fvalue = va_arg (args, double);
- /* um, floating point? */
- total += fmtfp (buffer, &currlen, maxlen, fvalue, min, max, flags);
- break;
- case 'E':
- flags |= DP_F_UP;
- case 'e':
- if (cflags == DP_C_LDOUBLE)
- fvalue = va_arg (args, LDOUBLE);
- else
- fvalue = va_arg (args, double);
- break;
- case 'G':
- flags |= DP_F_UP;
- case 'g':
- if (cflags == DP_C_LDOUBLE)
- fvalue = va_arg (args, LDOUBLE);
- else
- fvalue = va_arg (args, double);
- break;
- case 'c':
- total += dopr_outch (buffer, &currlen, maxlen, va_arg (args, int));
- break;
- case 's':
- strvalue = va_arg (args, char *);
- total += fmtstr (buffer, &currlen, maxlen, strvalue, flags, min, max);
- break;
- case 'p':
- strvalue = va_arg (args, void *);
- total += fmtint (buffer, &currlen, maxlen, (long) strvalue, 16, min,
- max, flags);
- break;
- case 'n':
- if (cflags == DP_C_SHORT)
- {
- short int *num;
- num = va_arg (args, short int *);
- *num = currlen;
- }
- else if (cflags == DP_C_LONG)
- {
- long int *num;
- num = va_arg (args, long int *);
- *num = currlen;
- }
- else
- {
- int *num;
- num = va_arg (args, int *);
- *num = currlen;
- }
- break;
- case '%':
- total += dopr_outch (buffer, &currlen, maxlen, ch);
- break;
- case 'w':
- /* not supported yet, treat as next char */
- ch = *format++;
- break;
- default:
- /* Unknown, skip */
- break;
- }
- ch = *format++;
- state = DP_S_DEFAULT;
- flags = cflags = min = 0;
- max = -1;
- break;
- case DP_S_DONE:
- break;
- default:
- /* hmm? */
- break; /* some picky compilers need this */
- }
- }
- if (buffer != NULL)
- {
- if (currlen < maxlen - 1)
- buffer[currlen] = '\0';
- else
- buffer[maxlen - 1] = '\0';
- }
- return total;
-}
-
-static int fmtstr (char *buffer, size_t *currlen, size_t maxlen,
- char *value, int flags, int min, int max)
-{
- int padlen, strln; /* amount to pad */
- int cnt = 0;
- int total = 0;
-
- if (value == 0)
- {
- value = "<NULL>";
- }
-
- for (strln = 0; value[strln]; ++strln); /* strlen */
- if (max >= 0 && max < strln)
- strln = max;
- padlen = min - strln;
- if (padlen < 0)
- padlen = 0;
- if (flags & DP_F_MINUS)
- padlen = -padlen; /* Left Justify */
-
- while (padlen > 0)
- {
- total += dopr_outch (buffer, currlen, maxlen, ' ');
- --padlen;
- }
- while (*value && ((max < 0) || (cnt < max)))
- {
- total += dopr_outch (buffer, currlen, maxlen, *value++);
- ++cnt;
- }
- while (padlen < 0)
- {
- total += dopr_outch (buffer, currlen, maxlen, ' ');
- ++padlen;
- }
- return total;
-}
-
-/* Have to handle DP_F_NUM (ie 0x and 0 alternates) */
-
-static int fmtint (char *buffer, size_t *currlen, size_t maxlen,
- long value, int base, int min, int max, int flags)
-{
- int signvalue = 0;
- unsigned long uvalue;
- char convert[20];
- int place = 0;
- int spadlen = 0; /* amount to space pad */
- int zpadlen = 0; /* amount to zero pad */
- int caps = 0;
- int total = 0;
-
- if (max < 0)
- max = 0;
-
- uvalue = value;
-
- if(!(flags & DP_F_UNSIGNED))
- {
- if( value < 0 ) {
- signvalue = '-';
- uvalue = -value;
- }
- else
- if (flags & DP_F_PLUS) /* Do a sign (+/i) */
- signvalue = '+';
- else
- if (flags & DP_F_SPACE)
- signvalue = ' ';
- }
-
- if (flags & DP_F_UP) caps = 1; /* Should characters be upper case? */
-
- do {
- convert[place++] =
- (caps? "0123456789ABCDEF":"0123456789abcdef")
- [uvalue % (unsigned)base ];
- uvalue = (uvalue / (unsigned)base );
- } while(uvalue && (place < 20));
- if (place == 20) place--;
- convert[place] = 0;
-
- zpadlen = max - place;
- spadlen = min - MAX (max, place) - (signvalue ? 1 : 0);
- if (zpadlen < 0) zpadlen = 0;
- if (spadlen < 0) spadlen = 0;
- if (flags & DP_F_ZERO)
- {
- zpadlen = MAX(zpadlen, spadlen);
- spadlen = 0;
- }
- if (flags & DP_F_MINUS)
- spadlen = -spadlen; /* Left Justifty */
-
-#ifdef DEBUG_SNPRINTF
- dprint (1, (debugfile, "zpad: %d, spad: %d, min: %d, max: %d, place: %d\n",
- zpadlen, spadlen, min, max, place));
-#endif
-
- /* Spaces */
- while (spadlen > 0)
- {
- total += dopr_outch (buffer, currlen, maxlen, ' ');
- --spadlen;
- }
-
- /* Sign */
- if (signvalue)
- total += dopr_outch (buffer, currlen, maxlen, signvalue);
-
- /* Zeros */
- if (zpadlen > 0)
- {
- while (zpadlen > 0)
- {
- total += dopr_outch (buffer, currlen, maxlen, '0');
- --zpadlen;
- }
- }
-
- /* Digits */
- while (place > 0)
- total += dopr_outch (buffer, currlen, maxlen, convert[--place]);
-
- /* Left Justified spaces */
- while (spadlen < 0) {
- total += dopr_outch (buffer, currlen, maxlen, ' ');
- ++spadlen;
- }
-
- return total;
-}
-
-static LDOUBLE abs_val (LDOUBLE value)
-{
- LDOUBLE result = value;
-
- if (value < 0)
- result = -value;
-
- return result;
-}
-
-static LDOUBLE pow10_argp (int exp)
-{
- LDOUBLE result = 1;
-
- while (exp)
- {
- result *= 10;
- exp--;
- }
-
- return result;
-}
-
-static long round_argp (LDOUBLE value)
-{
- long intpart;
-
- intpart = value;
- value = value - intpart;
- if (value >= 0.5)
- intpart++;
-
- return intpart;
-}
-
-static int fmtfp (char *buffer, size_t *currlen, size_t maxlen,
- LDOUBLE fvalue, int min, int max, int flags)
-{
- int signvalue = 0;
- LDOUBLE ufvalue;
- char iconvert[20];
- char fconvert[20];
- int iplace = 0;
- int fplace = 0;
- int padlen = 0; /* amount to pad */
- int zpadlen = 0;
- int caps = 0;
- int total = 0;
- long intpart;
- long fracpart;
-
- /*
- * AIX manpage says the default is 0, but Solaris says the default
- * is 6, and sprintf on AIX defaults to 6
- */
- if (max < 0)
- max = 6;
-
- ufvalue = abs_val (fvalue);
-
- if (fvalue < 0)
- signvalue = '-';
- else
- if (flags & DP_F_PLUS) /* Do a sign (+/i) */
- signvalue = '+';
- else
- if (flags & DP_F_SPACE)
- signvalue = ' ';
-
-#if 0
- if (flags & DP_F_UP) caps = 1; /* Should characters be upper case? */
-#endif
-
- intpart = ufvalue;
-
- /*
- * Sorry, we only support 9 digits past the decimal because of our
- * conversion method
- */
- if (max > 9)
- max = 9;
-
- /* We "cheat" by converting the fractional part to integer by
- * multiplying by a factor of 10
- */
- fracpart = round_argp ((pow10_argp (max)) * (ufvalue - intpart));
-
- if (fracpart >= pow10_argp (max))
- {
- intpart++;
- fracpart -= pow10_argp (max);
- }
-
-#ifdef DEBUG_SNPRINTF
- dprint (1, (debugfile, "fmtfp: %f =? %d.%d\n", fvalue, intpart, fracpart));
-#endif
-
- /* Convert integer part */
- do {
- iconvert[iplace++] =
- (caps? "0123456789ABCDEF":"0123456789abcdef")[intpart % 10];
- intpart = (intpart / 10);
- } while(intpart && (iplace < 20));
- if (iplace == 20) iplace--;
- iconvert[iplace] = 0;
-
- /* Convert fractional part */
- do {
- fconvert[fplace++] =
- (caps? "0123456789ABCDEF":"0123456789abcdef")[fracpart % 10];
- fracpart = (fracpart / 10);
- } while(fracpart && (fplace < 20));
- if (fplace == 20) fplace--;
- fconvert[fplace] = 0;
-
- /* -1 for decimal point, another -1 if we are printing a sign */
- padlen = min - iplace - max - 1 - ((signvalue) ? 1 : 0);
- zpadlen = max - fplace;
- if (zpadlen < 0)
- zpadlen = 0;
- if (padlen < 0)
- padlen = 0;
- if (flags & DP_F_MINUS)
- padlen = -padlen; /* Left Justifty */
-
- if ((flags & DP_F_ZERO) && (padlen > 0))
- {
- if (signvalue)
- {
- total += dopr_outch (buffer, currlen, maxlen, signvalue);
- --padlen;
- signvalue = 0;
- }
- while (padlen > 0)
- {
- total += dopr_outch (buffer, currlen, maxlen, '0');
- --padlen;
- }
- }
- while (padlen > 0)
- {
- total += dopr_outch (buffer, currlen, maxlen, ' ');
- --padlen;
- }
- if (signvalue)
- total += dopr_outch (buffer, currlen, maxlen, signvalue);
-
- while (iplace > 0)
- total += dopr_outch (buffer, currlen, maxlen, iconvert[--iplace]);
-
- /*
- * Decimal point. This should probably use locale to find the correct
- * char to print out.
- */
- if (max > 0)
- {
- total += dopr_outch (buffer, currlen, maxlen, '.');
-
- while (fplace > 0)
- total += dopr_outch (buffer, currlen, maxlen, fconvert[--fplace]);
- }
-
- while (zpadlen > 0)
- {
- total += dopr_outch (buffer, currlen, maxlen, '0');
- --zpadlen;
- }
-
- while (padlen < 0)
- {
- total += dopr_outch (buffer, currlen, maxlen, ' ');
- ++padlen;
- }
-
- return total;
-}
-
-static int dopr_outch (char *buffer, size_t *currlen, size_t maxlen, char c)
-{
- if (*currlen + 1 < maxlen)
- buffer[(*currlen)++] = c;
- return 1;
-}
-
-#ifndef HAVE_VSNPRINTF
-int vsnprintf (char *str, size_t count, const char *fmt, va_list args)
-{
- if (str != NULL)
- str[0] = 0;
- return dopr(str, count, fmt, args);
-}
-#endif /* !HAVE_VSNPRINTF */
-
-#ifndef HAVE_SNPRINTF
-/* VARARGS3 */
-#ifdef HAVE_STDARGS
-int snprintf (char *str,size_t count,const char *fmt,...)
-#else
-int snprintf (va_alist) va_dcl
-#endif
-{
-#ifndef HAVE_STDARGS
- char *str;
- size_t count;
- char *fmt;
-#endif
- VA_LOCAL_DECL;
- int total;
-
- VA_START (fmt);
- VA_SHIFT (str, char *);
- VA_SHIFT (count, size_t );
- VA_SHIFT (fmt, char *);
- total = vsnprintf(str, count, fmt, ap);
- VA_END;
- return total;
-}
-#endif /* !HAVE_SNPRINTF */
-
-#ifdef TEST_SNPRINTF
-#ifndef LONG_STRING
-#define LONG_STRING 1024
-#endif
-int main (void)
-{
- char buf1[LONG_STRING];
- char buf2[LONG_STRING];
- char *fp_fmt[] = {
- "%-1.5f",
- "%1.5f",
- "%123.9f",
- "%10.5f",
- "% 10.5f",
- "%+22.9f",
- "%+4.9f",
- "%01.3f",
- "%4f",
- "%3.1f",
- "%3.2f",
- "%.0f",
- "%.1f",
- NULL
- };
- double fp_nums[] = { -1.5, 134.21, 91340.2, 341.1234, 0203.9, 0.96, 0.996,
- 0.9996, 1.996, 4.136, 0};
- char *int_fmt[] = {
- "%-1.5d",
- "%1.5d",
- "%123.9d",
- "%5.5d",
- "%10.5d",
- "% 10.5d",
- "%+22.33d",
- "%01.3d",
- "%4d",
- NULL
- };
- long int_nums[] = { -1, 134, 91340, 341, 0203, 0};
- int x, y;
- int fail = 0;
- int num = 0;
-
- printf ("Testing snprintf format codes against system sprintf...\n");
-
- for (x = 0; fp_fmt[x] != NULL ; x++)
- for (y = 0; fp_nums[y] != 0 ; y++)
- {
- snprintf (buf1, sizeof (buf1), fp_fmt[x], fp_nums[y]);
- sprintf (buf2, fp_fmt[x], fp_nums[y]);
- if (strcmp (buf1, buf2))
- {
- printf("snprintf doesn't match Format: %s\n\tsnprintf = %s\n\tsprintf = %s\n",
- fp_fmt[x], buf1, buf2);
- fail++;
- }
- num++;
- }
-
- for (x = 0; int_fmt[x] != NULL ; x++)
- for (y = 0; int_nums[y] != 0 ; y++)
- {
- snprintf (buf1, sizeof (buf1), int_fmt[x], int_nums[y]);
- sprintf (buf2, int_fmt[x], int_nums[y]);
- if (strcmp (buf1, buf2))
- {
- printf("snprintf doesn't match Format: %s\n\tsnprintf = %s\n\tsprintf = %s\n",
- int_fmt[x], buf1, buf2);
- fail++;
- }
- num++;
- }
- printf ("%d tests failed out of %d.\n", fail, num);
-}
-#endif /* SNPRINTF_TEST */
-
-#endif /* !HAVE_SNPRINTF */
diff --git a/autogen.sh b/autogen.sh
index e20408bf2ea..c8cdc3f89fa 100755
--- a/autogen.sh
+++ b/autogen.sh
@@ -1,8 +1,92 @@
#!/bin/sh
-aclocal
-autoheader
-(libtoolize --automake --copy --force || glibtoolize --automake --copy --force)
-autoconf
-automake --add-missing --copy --foreign
-cd argp-standalone;./autogen.sh
+echo
+echo ... GlusterFS autogen ...
+echo
+
+## Check all dependencies are present
+MISSING=""
+
+# Check for aclocal
+env aclocal --version > /dev/null 2>&1
+if [ $? -eq 0 ]; then
+ ACLOCAL=aclocal
+else
+ MISSING="$MISSING aclocal"
+fi
+
+# Check for autoconf
+env autoconf --version > /dev/null 2>&1
+if [ $? -eq 0 ]; then
+ AUTOCONF=autoconf
+else
+ MISSING="$MISSING autoconf"
+fi
+
+# Check for autoheader
+env autoheader --version > /dev/null 2>&1
+if [ $? -eq 0 ]; then
+ AUTOHEADER=autoheader
+else
+ MISSING="$MISSING autoheader"
+fi
+
+# Check for automake
+env automake --version > /dev/null 2>&1
+if [ $? -eq 0 ]; then
+ AUTOMAKE=automake
+else
+ MISSING="$MISSING automake"
+fi
+
+# Check for libtoolize or glibtoolize
+env libtoolize --version > /dev/null 2>&1
+if [ $? -eq 0 ]; then
+ # libtoolize was found, so use it
+ TOOL=libtoolize
+else
+ # libtoolize wasn't found, so check for glibtoolize
+ env glibtoolize --version > /dev/null 2>&1
+ if [ $? -eq 0 ]; then
+ TOOL=glibtoolize
+ else
+ MISSING="$MISSING libtoolize/glibtoolize"
+ fi
+fi
+
+# Check for tar
+env tar -cf /dev/null /dev/null > /dev/null 2>&1
+if [ $? -ne 0 ]; then
+ MISSING="$MISSING tar"
+fi
+
+## If dependencies are missing, warn the user and abort
+if [ "x$MISSING" != "x" ]; then
+ echo "Aborting."
+ echo
+ echo "The following build tools are missing:"
+ echo
+ for pkg in $MISSING; do
+ echo " * $pkg"
+ done
+ echo
+ echo "Please install them and try again."
+ echo
+ exit 1
+fi
+
+## Do the autogeneration
+echo Running ${ACLOCAL}...
+$ACLOCAL -I ./contrib/aclocal
+echo Running ${AUTOHEADER}...
+$AUTOHEADER
+echo Running ${TOOL}...
+$TOOL --automake --copy --force
+echo Running ${AUTOCONF}...
+$AUTOCONF
+echo Running ${AUTOMAKE}...
+$AUTOMAKE --add-missing --force-missing --copy
+
+# Instruct user on next steps
+echo
+echo "Please proceed with configuring, compiling, and installing."
diff --git a/booster/Makefile.am b/booster/Makefile.am
deleted file mode 100644
index e1c45f3051c..00000000000
--- a/booster/Makefile.am
+++ /dev/null
@@ -1 +0,0 @@
-SUBDIRS=src \ No newline at end of file
diff --git a/booster/src/Makefile.am b/booster/src/Makefile.am
deleted file mode 100644
index d7d83abf5ee..00000000000
--- a/booster/src/Makefile.am
+++ /dev/null
@@ -1,21 +0,0 @@
-ldpreload_LTLIBRARIES = libglusterfs-booster.la
-ldpreloaddir = $(libdir)/glusterfs
-noinst_HEADERS = booster_fstab.h booster-fd.h
-libglusterfs_booster_la_SOURCES = booster.c booster_stat.c booster_fstab.c booster-fd.c
-libglusterfs_booster_la_CFLAGS = -I$(top_srcdir)/libglusterfsclient/src/ -D_GNU_SOURCE -D$(GF_HOST_OS) -fPIC -Wall \
- -pthread $(GF_BOOSTER_CFLAGS) -shared -nostartfiles
-libglusterfs_booster_la_CPPFLAGS = -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE \
- -I$(top_srcdir)/libglusterfsclient/src \
- -I$(top_srcdir)/libglusterfs/src -DDATADIR=\"$(localstatedir)\" \
- -DCONFDIR=\"$(sysconfdir)/glusterfs\" $(ARGP_STANDALONE_CPPFLAGS)
-
-libglusterfs_booster_la_LDFLAGS = -module -avoidversion
-libglusterfs_booster_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la $(top_builddir)/libglusterfsclient/src/libglusterfsclient.la
-
-CLEANFILES =
-
-uninstall-local:
- rm -f $(DESTDIR)$(ldpreloaddir)/glusterfs-booster.so
-
-install-data-hook:
- ln -sf libglusterfs-booster.so $(DESTDIR)$(ldpreloaddir)/glusterfs-booster.so
diff --git a/booster/src/booster-fd.c b/booster/src/booster-fd.c
deleted file mode 100644
index fa5b0cde2d3..00000000000
--- a/booster/src/booster-fd.c
+++ /dev/null
@@ -1,342 +0,0 @@
-/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-
-#include "booster-fd.h"
-#include <logging.h>
-#include <mem-pool.h>
-#include <stdlib.h>
-#include <errno.h>
-#include <common-utils.h>
-#include <string.h>
-
-#include <assert.h>
-
-extern fd_t *
-fd_ref (fd_t *fd);
-
-extern void
-fd_unref (fd_t *fd);
-/*
- Allocate in memory chunks of power of 2 starting from 1024B
- Assumes fdtable->lock is held
- */
-static inline uint
-gf_roundup_power_of_two (uint nr)
-{
- uint result = 1;
-
- if (nr < 0) {
- gf_log ("booster-fd", GF_LOG_ERROR, "Negative number passed");
- return -1;
- }
-
- while (result <= nr)
- result *= 2;
-
- return result;
-}
-
-#define BOOSTER_NFDBITS (sizeof (unsigned long))
-
-#define BOOSTER_FDMASK(d) (1UL << ((d) % BOOSTER_NFDBITS))
-#define BOOSTER_FDELT(d) (d / BOOSTER_NFDBITS)
-#define BOOSTER_FD_SET(set, d) (set->fd_bits[BOOSTER_FDELT(d)] |= BOOSTER_FDMASK(d))
-#define BOOSTER_FD_CLR(set, d) (set->fd_bits[BOOSTER_FDELT(d)] &= ~BOOSTER_FDMASK(d))
-#define BOOSTER_FD_ISSET(set, d) (set->fd_bits[BOOSTER_FDELT(d)] & BOOSTER_FDMASK(d))
-
-inline int
-booster_get_close_on_exec (booster_fdtable_t *fdtable, int fd)
-{
- return BOOSTER_FD_ISSET(fdtable->close_on_exec, fd);
-}
-
-inline void
-booster_set_close_on_exec (booster_fdtable_t *fdtable, int fd)
-{
- BOOSTER_FD_SET(fdtable->close_on_exec, fd);
-}
-
-int
-booster_fdtable_expand (booster_fdtable_t *fdtable, uint nr)
-{
- fd_t **oldfds = NULL, **tmp = NULL;
- uint oldmax_fds = -1;
- uint cpy = 0;
- int32_t ret = -1, bytes = 0;
- booster_fd_set_t *oldclose_on_exec = NULL;
-
- if (fdtable == NULL || nr < 0) {
- gf_log ("booster-fd", GF_LOG_ERROR, "Invalid argument");
- errno = EINVAL;
- ret = -1;
- goto out;
- }
-
- nr /= (1024 / sizeof (fd_t *));
- nr = gf_roundup_power_of_two (nr + 1);
- nr *= (1024 / sizeof (fd_t *));
-
- oldfds = fdtable->fds;
- oldmax_fds = fdtable->max_fds;
- oldclose_on_exec = fdtable->close_on_exec;
-
- fdtable->fds = CALLOC (nr, sizeof (fd_t *));
- if (fdtable->fds == NULL) {
- gf_log ("booster-fd", GF_LOG_ERROR, "Memory allocation failed");
- fdtable->fds = oldfds;
- oldfds = NULL;
- ret = -1;
- goto out;
- }
-
- fdtable->max_fds = nr;
-
- if (oldfds) {
- cpy = oldmax_fds * sizeof (fd_t *);
- memcpy (fdtable->fds, oldfds, cpy);
- }
-
- /* nr will be either less than 8 or a multiple of 8 */
- bytes = nr/8;
- bytes = bytes ? bytes : 1;
- fdtable->close_on_exec = CALLOC (bytes, 1);
- if (fdtable->close_on_exec == NULL) {
- gf_log ("booster-fd", GF_LOG_ERROR, "Memory allocation "
- "failed");
- tmp = fdtable->fds;
- fdtable->fds = oldfds;
- oldfds = tmp;
- ret = -1;
- goto out;
- }
-
- if (oldclose_on_exec != NULL) {
- bytes = oldmax_fds/8;
- cpy = bytes ? bytes : 1;
- memcpy (fdtable->close_on_exec, oldclose_on_exec, cpy);
- }
- gf_log ("booster-fd", GF_LOG_TRACE, "FD-table expanded: Old: %d,New: %d"
- , oldmax_fds, nr);
- ret = 0;
-
-out:
- FREE (oldfds);
- FREE (oldclose_on_exec);
-
- return ret;
-}
-
-booster_fdtable_t *
-booster_fdtable_alloc (void)
-{
- booster_fdtable_t *fdtable = NULL;
- int32_t ret = -1;
-
- fdtable = CALLOC (1, sizeof (*fdtable));
- GF_VALIDATE_OR_GOTO ("booster-fd", fdtable, out);
-
- LOCK_INIT (&fdtable->lock);
-
- LOCK (&fdtable->lock);
- {
- ret = booster_fdtable_expand (fdtable, 0);
- }
- UNLOCK (&fdtable->lock);
-
- if (ret == -1) {
- gf_log ("booster-fd", GF_LOG_ERROR, "FD-table allocation "
- "failed");
- FREE (fdtable);
- fdtable = NULL;
- }
-
-out:
- return fdtable;
-}
-
-fd_t **
-__booster_fdtable_get_all_fds (booster_fdtable_t *fdtable, uint *count)
-{
- fd_t **fds = NULL;
-
- if (count == NULL)
- goto out;
-
- fds = fdtable->fds;
- fdtable->fds = calloc (fdtable->max_fds, sizeof (fd_t *));
- *count = fdtable->max_fds;
-
-out:
- return fds;
-}
-
-fd_t **
-booster_fdtable_get_all_fds (booster_fdtable_t *fdtable, uint *count)
-{
- fd_t **fds = NULL;
- if (!fdtable)
- return NULL;
-
- LOCK (&fdtable->lock);
- {
- fds = __booster_fdtable_get_all_fds (fdtable, count);
- }
- UNLOCK (&fdtable->lock);
-
- return fds;
-}
-
-void
-booster_fdtable_destroy (booster_fdtable_t *fdtable)
-{
- fd_t *fd = NULL;
- fd_t **fds = NULL;
- uint fd_count = 0;
- int i = 0;
-
- if (!fdtable)
- return;
-
- LOCK (&fdtable->lock);
- {
- fds = __booster_fdtable_get_all_fds (fdtable, &fd_count);
- FREE (fdtable->fds);
- }
- UNLOCK (&fdtable->lock);
-
- if (!fds)
- goto free_table;
-
- for (i = 0; i < fd_count; i++) {
- fd = fds[i];
- if (fd != NULL)
- fd_unref (fd);
- }
- FREE (fds);
-free_table:
- LOCK_DESTROY (&fdtable->lock);
- FREE (fdtable);
-}
-
-int
-booster_fd_unused_get (booster_fdtable_t *fdtable, fd_t *fdptr, int fd)
-{
- int ret = -1;
- int error = 0;
-
- if (fdtable == NULL || fdptr == NULL || fd < 0) {
- gf_log ("booster-fd", GF_LOG_ERROR, "invalid argument");
- errno = EINVAL;
- return -1;
- }
-
- gf_log ("booster-fd", GF_LOG_TRACE, "Requested fd: %d", fd);
- LOCK (&fdtable->lock);
- {
- while (fdtable->max_fds < fd) {
- error = 0;
- error = booster_fdtable_expand (fdtable,
- fdtable->max_fds + 1);
- if (error) {
- gf_log ("booster-fd", GF_LOG_ERROR,
- "Cannot expand fdtable:%s",
- strerror (error));
- goto err;
- }
- }
-
- if (!fdtable->fds[fd]) {
- fdtable->fds[fd] = fdptr;
- fd_ref (fdptr);
- ret = fd;
- } else
- gf_log ("booster-fd", GF_LOG_ERROR, "Cannot allocate fd"
- " %d (slot not empty in fdtable)", fd);
- }
-err:
- UNLOCK (&fdtable->lock);
-
- return ret;
-}
-
-void
-booster_fd_put (booster_fdtable_t *fdtable, int fd)
-{
- fd_t *fdptr = NULL;
- if (fdtable == NULL || fd < 0) {
- gf_log ("booster-fd", GF_LOG_ERROR, "invalid argument");
- return;
- }
-
- gf_log ("booster-fd", GF_LOG_TRACE, "FD put: %d", fd);
- if (!(fd < fdtable->max_fds)) {
- gf_log ("booster-fd", GF_LOG_ERROR, "FD not in booster fd"
- " table");
- return;
- }
-
- LOCK (&fdtable->lock);
- {
- fdptr = fdtable->fds[fd];
- fdtable->fds[fd] = NULL;
- }
- UNLOCK (&fdtable->lock);
-
- if (fdptr)
- fd_unref (fdptr);
-}
-
-fd_t *
-booster_fdptr_get (booster_fdtable_t *fdtable, int fd)
-{
- fd_t *fdptr = NULL;
-
- if (fdtable == NULL || fd < 0) {
- gf_log ("booster-fd", GF_LOG_ERROR, "invalid argument");
- errno = EINVAL;
- return NULL;
- }
-
- gf_log ("booster-fd", GF_LOG_TRACE, "FD ptr request: %d", fd);
- if (!(fd < fdtable->max_fds)) {
- gf_log ("booster-fd", GF_LOG_ERROR, "FD not in booster fd"
- " table");
- errno = EINVAL;
- return NULL;
- }
-
- LOCK (&fdtable->lock);
- {
- fdptr = fdtable->fds[fd];
- if (fdptr)
- fd_ref (fdptr);
- }
- UNLOCK (&fdtable->lock);
-
- return fdptr;
-}
-
-void
-booster_fdptr_put (fd_t *booster_fd)
-{
- if (booster_fd)
- fd_unref (booster_fd);
-}
diff --git a/booster/src/booster-fd.h b/booster/src/booster-fd.h
deleted file mode 100644
index 595a112bd8d..00000000000
--- a/booster/src/booster-fd.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _BOOSTER_FD_H
-#define _BOOSTER_FD_H
-
-#include <libglusterfsclient.h>
-#include <locking.h>
-#include <list.h>
-
-/* This struct must be updated if the fd_t in fd.h changes.
- * We cannot include those headers here because unistd.h, included
- * by glusterfs headers, conflicts with the syscall prototypes we
- * define for booster.
- */
-struct _fd {
- pid_t pid;
- int32_t flags;
- int32_t refcount;
- struct list_head inode_list;
- struct _inode *inode;
- struct _dict *ctx;
- gf_lock_t lock; /* used ONLY for manipulating
- 'struct _fd_ctx' array (_ctx).*/
- struct _fd_ctx *_ctx;
-};
-typedef struct _fd fd_t;
-
-struct _booster_fd_set {
- unsigned long fd_bits[0];
-};
-typedef struct _booster_fd_set booster_fd_set_t;
-
-struct _booster_fdtable {
- booster_fd_set_t *close_on_exec;
- int refcount;
- unsigned int max_fds;
- gf_lock_t lock;
- fd_t **fds;
-};
-typedef struct _booster_fdtable booster_fdtable_t;
-
-void
-booster_set_close_on_exec (booster_fdtable_t *fdtable, int fd);
-
-int
-booster_get_close_on_exec (booster_fdtable_t *fdtable, int fd);
-
-extern int
-booster_fd_unused_get (booster_fdtable_t *fdtable, fd_t *fdptr, int fd);
-
-extern void
-booster_fd_put (booster_fdtable_t *fdtable, int fd);
-
-extern fd_t *
-booster_fdptr_get (booster_fdtable_t *fdtable, int fd);
-
-extern void
-booster_fdptr_put (fd_t *fdptr);
-
-extern void
-booster_fdtable_destroy (booster_fdtable_t *fdtable);
-
-booster_fdtable_t *
-booster_fdtable_alloc (void);
-
-#endif /* #ifndef _BOOSTER_FD_H */
diff --git a/booster/src/booster.c b/booster/src/booster.c
deleted file mode 100644
index c34ec114645..00000000000
--- a/booster/src/booster.c
+++ /dev/null
@@ -1,3172 +0,0 @@
-/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <dlfcn.h>
-#include <sys/types.h>
-#include <sys/uio.h>
-#include <stdio.h>
-#include <stdarg.h>
-#include <stdlib.h>
-#include <inttypes.h>
-#include <libglusterfsclient.h>
-#include <list.h>
-#include <pthread.h>
-#include <sys/xattr.h>
-#include <string.h>
-#include <assert.h>
-#include <errno.h>
-#include <ctype.h>
-#include <logging.h>
-#include <utime.h>
-#include <dirent.h>
-#include <sys/statfs.h>
-#include <sys/statvfs.h>
-#include <fcntl.h>
-#include "booster-fd.h"
-
-#ifndef GF_UNIT_KB
-#define GF_UNIT_KB 1024
-#endif
-
-static pthread_mutex_t cwdlock = PTHREAD_MUTEX_INITIALIZER;
-
-/* attr constructor registers this function with libc's
- * _init function as a function that must be called before
- * the main() of the program.
- */
-static void booster_lib_init (void) __attribute__((constructor));
-
-extern fd_t *
-fd_ref (fd_t *fd);
-
-extern void
-fd_unref (fd_t *fd);
-
-extern int pipe (int filedes[2]);
-/* We define these flags so that we can remove fcntl.h from the include path.
- * fcntl.h has certain defines and other lines of code that redirect the
- * application's open and open64 calls to the syscalls defined by
- * libc, for us, thats not a Good Thing (TM).
- */
-#ifndef GF_O_CREAT
-#define GF_O_CREAT 0x40
-#endif
-
-#ifndef GF_O_TRUNC
-#define GF_O_TRUNC 0x200
-#endif
-
-#ifndef GF_O_RDWR
-#define GF_O_RDWR 0x2
-#endif
-
-#ifndef GF_O_WRONLY
-#define GF_O_WRONLY 0x1
-#endif
-
-#ifndef UNIX_PATH_MAX
-#define UNIX_PATH_MAX 108
-#endif
-
-typedef enum {
- BOOSTER_OPEN,
- BOOSTER_CREAT
-} booster_op_t;
-
-struct _inode;
-struct _dict;
-
-ssize_t
-write (int fd, const void *buf, size_t count);
-
-/* open, open64, creat */
-static int (*real_open) (const char *pathname, int flags, ...);
-static int (*real_open64) (const char *pathname, int flags, ...);
-static int (*real_creat) (const char *pathname, mode_t mode);
-static int (*real_creat64) (const char *pathname, mode_t mode);
-
-/* read, readv, pread, pread64 */
-static ssize_t (*real_read) (int fd, void *buf, size_t count);
-static ssize_t (*real_readv) (int fd, const struct iovec *vector, int count);
-static ssize_t (*real_pread) (int fd, void *buf, size_t count,
- unsigned long offset);
-static ssize_t (*real_pread64) (int fd, void *buf, size_t count,
- uint64_t offset);
-
-/* write, writev, pwrite, pwrite64 */
-static ssize_t (*real_write) (int fd, const void *buf, size_t count);
-static ssize_t (*real_writev) (int fd, const struct iovec *vector, int count);
-static ssize_t (*real_pwrite) (int fd, const void *buf, size_t count,
- unsigned long offset);
-static ssize_t (*real_pwrite64) (int fd, const void *buf, size_t count,
- uint64_t offset);
-
-/* lseek, llseek, lseek64 */
-static off_t (*real_lseek) (int fildes, unsigned long offset, int whence);
-static off_t (*real_lseek64) (int fildes, uint64_t offset, int whence);
-
-/* close */
-static int (*real_close) (int fd);
-
-/* dup dup2 */
-static int (*real_dup) (int fd);
-static int (*real_dup2) (int oldfd, int newfd);
-
-static pid_t (*real_fork) (void);
-static int (*real_mkdir) (const char *pathname, mode_t mode);
-static int (*real_rmdir) (const char *pathname);
-static int (*real_chmod) (const char *pathname, mode_t mode);
-static int (*real_chown) (const char *pathname, uid_t owner, gid_t group);
-static int (*real_fchmod) (int fd, mode_t mode);
-static int (*real_fchown) (int fd, uid_t, gid_t gid);
-static int (*real_fsync) (int fd);
-static int (*real_ftruncate) (int fd, off_t length);
-static int (*real_ftruncate64) (int fd, loff_t length);
-static int (*real_link) (const char *oldpath, const char *newname);
-static int (*real_rename) (const char *oldpath, const char *newpath);
-static int (*real_utimes) (const char *path, const struct timeval times[2]);
-static int (*real_utime) (const char *path, const struct utimbuf *buf);
-static int (*real_mknod) (const char *path, mode_t mode, dev_t dev);
-static int (*real_mkfifo) (const char *path, mode_t mode);
-static int (*real_unlink) (const char *path);
-static int (*real_symlink) (const char *oldpath, const char *newpath);
-static int (*real_readlink) (const char *path, char *buf, size_t bufsize);
-static char * (*real_realpath) (const char *path, char *resolved);
-static DIR * (*real_opendir) (const char *path);
-static struct dirent * (*real_readdir) (DIR *dir);
-static struct dirent64 * (*real_readdir64) (DIR *dir);
-static int (*real_readdir_r) (DIR *dir, struct dirent *entry,
- struct dirent **result);
-static int (*real_readdir64_r) (DIR *dir, struct dirent64 *entry,
- struct dirent64 **result);
-static int (*real_closedir) (DIR *dh);
-static int (*real___xstat) (int ver, const char *path, struct stat *buf);
-static int (*real___xstat64) (int ver, const char *path, struct stat64 *buf);
-static int (*real_stat) (const char *path, struct stat *buf);
-static int (*real_stat64) (const char *path, struct stat64 *buf);
-static int (*real___fxstat) (int ver, int fd, struct stat *buf);
-static int (*real___fxstat64) (int ver, int fd, struct stat64 *buf);
-static int (*real_fstat) (int fd, struct stat *buf);
-static int (*real_fstat64) (int fd , struct stat64 *buf);
-static int (*real___lxstat) (int ver, const char *path, struct stat *buf);
-static int (*real___lxstat64) (int ver, const char *path, struct stat64 *buf);
-static int (*real_lstat) (const char *path, struct stat *buf);
-static int (*real_lstat64) (const char *path, struct stat64 *buf);
-static int (*real_statfs) (const char *path, struct statfs *buf);
-static int (*real_statfs64) (const char *path, struct statfs64 *buf);
-static int (*real_statvfs) (const char *path, struct statvfs *buf);
-static int (*real_statvfs64) (const char *path, struct statvfs64 *buf);
-static ssize_t (*real_getxattr) (const char *path, const char *name,
- void *value, size_t size);
-static ssize_t (*real_lgetxattr) (const char *path, const char *name,
- void *value, size_t size);
-static int (*real_remove) (const char* path);
-static int (*real_lchown) (const char *path, uid_t owner, gid_t group);
-static void (*real_rewinddir) (DIR *dirp);
-static void (*real_seekdir) (DIR *dirp, off_t offset);
-static off_t (*real_telldir) (DIR *dirp);
-
-static ssize_t (*real_sendfile) (int out_fd, int in_fd, off_t *offset,
- size_t count);
-static ssize_t (*real_sendfile64) (int out_fd, int in_fd, off_t *offset,
- size_t count);
-static int (*real_fcntl) (int fd, int cmd, ...);
-static int (*real_chdir) (const char *path);
-static int (*real_fchdir) (int fd);
-static char * (*real_getcwd) (char *buf, size_t size);
-static int (*real_truncate) (const char *path, off_t length);
-static int (*real_truncate64) (const char *path, loff_t length);
-static int (*real_setxattr) (const char *path, const char *name,
- const void *value, size_t size, int flags);
-static int (*real_lsetxattr) (const char *path, const char *name,
- const void *value, size_t size, int flags);
-static int (*real_fsetxattr) (int filedes, const char *name,
- const void *value, size_t size, int flags);
-
-
-#define RESOLVE(sym) do { \
- if (!real_##sym) \
- real_##sym = dlsym (RTLD_NEXT, #sym); \
- } while (0)
-
-/*TODO: set proper value */
-#define MOUNT_HASH_SIZE 256
-
-struct booster_mount {
- dev_t st_dev;
- glusterfs_handle_t handle;
- struct list_head device_list;
-};
-typedef struct booster_mount booster_mount_t;
-
-static booster_fdtable_t *booster_fdtable = NULL;
-
-extern int booster_configure (char *confpath);
-/* This is dup'ed every time VMP open/creat wants a new fd.
- * This is needed so we occupy an entry in the process' file
- * table.
- */
-int process_piped_fd = -1;
-
-static int
-booster_get_process_fd ()
-{
- return real_dup (process_piped_fd);
-}
-
-/* The following two define which file contains
- * the FSTAB configuration for VMP-based usage.
- */
-#define DEFAULT_BOOSTER_CONF CONFDIR"/booster.conf"
-#define BOOSTER_CONF_ENV_VAR "GLUSTERFS_BOOSTER_FSTAB"
-
-
-/* The following define which log file is used when
- * using the old mount point bypass approach.
- */
-#define BOOSTER_DEFAULT_LOG CONFDIR"/booster.log"
-#define BOOSTER_LOG_ENV_VAR "GLUSTERFS_BOOSTER_LOG"
-
-void
-do_open (int fd, const char *pathname, int flags, mode_t mode, booster_op_t op)
-{
- char *specfile = NULL;
- char *mount_point = NULL;
- int32_t size = 0;
- int32_t ret = -1;
- FILE *specfp = NULL;
- glusterfs_file_t fh = NULL;
- char *logfile = NULL;
- glusterfs_init_params_t iparams = {
- .loglevel = "error",
- .lookup_timeout = 600,
- .stat_timeout = 600,
- };
-
- gf_log ("booster", GF_LOG_DEBUG, "Opening using MPB: %s", pathname);
- size = fgetxattr (fd, "user.glusterfs-booster-volfile", NULL, 0);
- if (size == -1) {
- gf_log ("booster", GF_LOG_ERROR, "Xattr "
- "user.glusterfs-booster-volfile not found: %s",
- strerror (errno));
- goto out;
- }
-
- specfile = calloc (1, size);
- if (!specfile) {
- gf_log ("booster", GF_LOG_ERROR, "Memory allocation failed");
- goto out;
- }
-
- ret = fgetxattr (fd, "user.glusterfs-booster-volfile", specfile,
- size);
- if (ret == -1) {
- gf_log ("booster", GF_LOG_ERROR, "Xattr "
- "user.glusterfs-booster-volfile not found: %s",
- strerror (errno));
- goto out;
- }
-
- specfp = tmpfile ();
- if (!specfp) {
- gf_log ("booster", GF_LOG_ERROR, "Temp file creation failed"
- ": %s", strerror (errno));
- goto out;
- }
-
- ret = fwrite (specfile, size, 1, specfp);
- if (ret != 1) {
- gf_log ("booster", GF_LOG_ERROR, "Failed to write volfile: %s",
- strerror (errno));
- goto out;
- }
-
- fseek (specfp, 0L, SEEK_SET);
-
- size = fgetxattr (fd, "user.glusterfs-booster-mount", NULL, 0);
- if (size == -1) {
- gf_log ("booster", GF_LOG_ERROR, "Xattr "
- "user.glusterfs-booster-mount not found: %s",
- strerror (errno));
- goto out;
- }
-
- mount_point = calloc (size, sizeof (char));
- if (!mount_point) {
- gf_log ("booster", GF_LOG_ERROR, "Memory allocation failed");
- goto out;
- }
-
- ret = fgetxattr (fd, "user.glusterfs-booster-mount", mount_point, size);
- if (ret == -1) {
- gf_log ("booster", GF_LOG_ERROR, "Xattr "
- "user.glusterfs-booster-mount not found: %s",
- strerror (errno));
- goto out;
- }
-
- logfile = getenv (BOOSTER_LOG_ENV_VAR);
- if (logfile) {
- if (strlen (logfile) > 0)
- iparams.logfile = strdup (logfile);
- else
- iparams.logfile = strdup (BOOSTER_DEFAULT_LOG);
- } else {
- iparams.logfile = strdup (BOOSTER_DEFAULT_LOG);
- }
-
- gf_log ("booster", GF_LOG_TRACE, "Using log-file: %s", iparams.logfile);
- iparams.specfp = specfp;
-
- ret = glusterfs_mount (mount_point, &iparams);
- if (ret == -1) {
- if (errno != EEXIST) {
- gf_log ("booster", GF_LOG_ERROR, "Mount failed over"
- " glusterfs");
- goto out;
- } else
- gf_log ("booster", GF_LOG_ERROR, "Already mounted");
- }
-
- switch (op) {
- case BOOSTER_OPEN:
- gf_log ("booster", GF_LOG_TRACE, "Booster open call");
- fh = glusterfs_open (pathname, flags, mode);
- break;
-
- case BOOSTER_CREAT:
- gf_log ("booster", GF_LOG_TRACE, "Booster create call");
- fh = glusterfs_creat (pathname, mode);
- break;
- }
-
- if (!fh) {
- gf_log ("booster", GF_LOG_ERROR, "Error performing operation");
- goto out;
- }
-
- if (booster_fd_unused_get (booster_fdtable, fh, fd) == -1) {
- gf_log ("booster", GF_LOG_ERROR, "Failed to get unused FD");
- goto out;
- }
- fh = NULL;
-
-out:
- if (specfile) {
- free (specfile);
- }
-
- if (specfp) {
- fclose (specfp);
- }
-
- if (mount_point) {
- free (mount_point);
- }
-
- if (fh) {
- glusterfs_close (fh);
- }
-
- return;
-}
-
-int
-vmp_open (const char *pathname, int flags, ...)
-{
- mode_t mode = 0;
- int fd = -1;
- glusterfs_file_t fh = NULL;
- va_list ap;
-
- if (flags & GF_O_CREAT) {
- va_start (ap, flags);
- mode = va_arg (ap, mode_t);
- va_end (ap);
-
- fh = glusterfs_open (pathname, flags, mode);
- }
- else
- fh = glusterfs_open (pathname, flags);
-
- if (!fh) {
- gf_log ("booster", GF_LOG_ERROR, "VMP open failed");
- goto out;
- }
-
- fd = booster_get_process_fd ();
- if (fd == -1) {
- gf_log ("booster", GF_LOG_ERROR, "Failed to create open fd");
- goto fh_close_out;
- }
-
- if (booster_fd_unused_get (booster_fdtable, fh, fd) == -1) {
- gf_log ("booster", GF_LOG_ERROR, "Failed to map fd into table");
- goto realfd_close_out;
- }
-
- return fd;
-
-realfd_close_out:
- real_close (fd);
- fd = -1;
-
-fh_close_out:
- glusterfs_close (fh);
-
-out:
- return fd;
-}
-
-#define BOOSTER_USE_OPEN64 1
-#define BOOSTER_DONT_USE_OPEN64 0
-
-int
-booster_open (const char *pathname, int use64, int flags, ...)
-{
- int ret = -1;
- mode_t mode = 0;
- va_list ap;
- int (*my_open) (const char *pathname, int flags, ...);
-
- if (!pathname) {
- errno = EINVAL;
- goto out;
- }
-
- gf_log ("booster", GF_LOG_TRACE, "Open: %s", pathname);
- /* First try opening through the virtual mount point.
- * The difference lies in the fact that:
- * 1. We depend on libglusterfsclient library to perform
- * the translation from the path to handle.
- * 2. We do not go to the file system for the fd, instead
- * we use booster_get_process_fd (), which returns a dup'ed
- * fd of a pipe created in booster_init.
- */
- if (flags & GF_O_CREAT) {
- va_start (ap, flags);
- mode = va_arg (ap, mode_t);
- va_end (ap);
- ret = vmp_open (pathname, flags, mode);
- }
- else
- ret = vmp_open (pathname, flags);
-
- /* We receive an ENODEV if the VMP does not exist. If we
- * receive an error other than ENODEV, it means, there
- * actually was an error performing vmp_open. This must
- * be returned to the user.
- */
- if ((ret < 0) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "Error in opening file over "
- " VMP: %s", strerror (errno));
- goto out;
- }
-
- if (ret > 0) {
- gf_log ("booster", GF_LOG_TRACE, "File opened");
- goto out;
- }
-
- if (use64) {
- gf_log ("booster", GF_LOG_TRACE, "Using 64-bit open");
- my_open = real_open64;
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Using 32-bit open");
- my_open = real_open;
- }
-
- /* It is possible the RESOLVE macro is not able
- * to resolve the symbol of a function, in that case
- * we dont want to seg-fault on calling a NULL functor.
- */
- if (my_open == NULL) {
- gf_log ("booster", GF_LOG_ERROR, "open not resolved");
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
-
- if (flags & GF_O_CREAT) {
- va_start (ap, flags);
- mode = va_arg (ap, mode_t);
- va_end (ap);
-
- ret = my_open (pathname, flags, mode);
- } else
- ret = my_open (pathname, flags);
-
- if (ret != -1) {
- do_open (ret, pathname, flags, mode, BOOSTER_OPEN);
- }
-
-out:
- return ret;
-}
-
-/* This is done to over-write existing definitions of open and open64 inside
- * libc with our own copies. __REDIRECT is provided by libc.
- *
- * XXX: This will not work anywhere other than libc based systems.
- */
-int __REDIRECT (booster_false_open, (__const char *__file, int __oflag, ...),
- open) __nonnull ((1));
-int __REDIRECT (booster_false_open64, (__const char *__file, int __oflag, ...),
- open64) __nonnull ((1));
-int
-booster_false_open (const char *pathname, int flags, ...)
-{
- int ret;
- mode_t mode = 0;
- va_list ap;
-
- if (flags & GF_O_CREAT) {
- va_start (ap, flags);
- mode = va_arg (ap, mode_t);
- va_end (ap);
-
- ret = booster_open (pathname, BOOSTER_DONT_USE_OPEN64, flags,
- mode);
- }
- else
- ret = booster_open (pathname, BOOSTER_DONT_USE_OPEN64, flags);
-
- return ret;
-}
-
-int
-booster_false_open64 (const char *pathname, int flags, ...)
-{
- int ret;
- mode_t mode = 0;
- va_list ap;
-
- if (flags & GF_O_CREAT) {
- va_start (ap, flags);
- mode = va_arg (ap, mode_t);
- va_end (ap);
-
- ret = booster_open (pathname, BOOSTER_USE_OPEN64, flags, mode);
- }
- else
- ret = booster_open (pathname, BOOSTER_USE_OPEN64, flags);
-
- return ret;
-}
-
-int
-vmp_creat (const char *pathname, mode_t mode)
-{
- int fd = -1;
- glusterfs_file_t fh = NULL;
-
- fh = glusterfs_creat (pathname, mode);
- if (!fh) {
- gf_log ("booster", GF_LOG_ERROR, "Create failed: %s: %s",
- pathname, strerror (errno));
- goto out;
- }
-
- fd = booster_get_process_fd ();
- if (fd == -1) {
- gf_log ("booster", GF_LOG_ERROR, "Failed to create fd");
- goto close_out;
- }
-
- if ((booster_fd_unused_get (booster_fdtable, fh, fd)) == -1) {
- gf_log ("booster", GF_LOG_ERROR, "Failed to map unused fd");
- goto real_close_out;
- }
-
- return fd;
-
-real_close_out:
- real_close (fd);
- fd = -1;
-
-close_out:
- glusterfs_close (fh);
-
-out:
- return -1;
-}
-
-int __REDIRECT (booster_false_creat, (const char *pathname, mode_t mode),
- creat) __nonnull ((1));
-int __REDIRECT (booster_false_creat64, (const char *pathname, mode_t mode),
- creat64) __nonnull ((1));
-
-int
-booster_false_creat (const char *pathname, mode_t mode)
-{
- int ret = -1;
- if (!pathname) {
- errno = EINVAL;
- goto out;
- }
-
- gf_log ("booster", GF_LOG_TRACE, "Create: %s", pathname);
- ret = vmp_creat (pathname, mode);
-
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "VMP create failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret > 0) {
- gf_log ("booster", GF_LOG_TRACE, "File created");
- goto out;
- }
-
- if (real_creat == NULL) {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
- ret = real_creat (pathname, mode);
-
- if (ret != -1) {
- do_open (ret, pathname, GF_O_WRONLY | GF_O_TRUNC, mode,
- BOOSTER_CREAT);
- } else
- gf_log ("booster", GF_LOG_ERROR, "real create failed: %s",
- strerror (errno));
-
-out:
- return ret;
-}
-
-
-int
-booster_false_creat64 (const char *pathname, mode_t mode)
-{
- int ret = -1;
- if (!pathname) {
- errno = EINVAL;
- goto out;
- }
-
- gf_log ("booster", GF_LOG_TRACE, "Create: %s", pathname);
- ret = vmp_creat (pathname, mode);
-
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "VMP create failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret > 0) {
- gf_log ("booster", GF_LOG_TRACE, "File created");
- goto out;
- }
-
- if (real_creat64 == NULL) {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
- ret = real_creat64 (pathname, mode);
-
- if (ret != -1) {
- do_open (ret, pathname, GF_O_WRONLY | GF_O_TRUNC, mode,
- BOOSTER_CREAT);
- } else
- gf_log ("booster", GF_LOG_ERROR, "real create failed: %s",
- strerror (errno));
-
-out:
- return ret;
-}
-
-
-/* pread */
-
-ssize_t
-pread (int fd, void *buf, size_t count, unsigned long offset)
-{
- ssize_t ret;
- glusterfs_file_t glfs_fd = 0;
-
- gf_log ("booster", GF_LOG_TRACE, "pread: fd %d, count %lu, offset %lu"
- ,fd, (long unsigned)count, offset);
- glfs_fd = booster_fdptr_get (booster_fdtable, fd);
- if (!glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Not booster fd");
- if (real_pread == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_pread (fd, buf, count, offset);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_pread (glfs_fd, buf, count, offset);
- booster_fdptr_put (glfs_fd);
- }
-
- return ret;
-}
-
-
-ssize_t
-pread64 (int fd, void *buf, size_t count, uint64_t offset)
-{
- ssize_t ret;
- glusterfs_file_t glfs_fd = 0;
-
- gf_log ("booster", GF_LOG_TRACE, "pread64: fd %d, count %lu, offset %"
- PRIu64, fd, (long unsigned)count, offset);
- glfs_fd = booster_fdptr_get (booster_fdtable, fd);
- if (!glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Not booster fd");
- if (real_pread64 == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_pread64 (fd, buf, count, offset);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_pread (glfs_fd, buf, count, offset);
- booster_fdptr_put (glfs_fd);
- }
-
- return ret;
-}
-
-
-ssize_t
-read (int fd, void *buf, size_t count)
-{
- int ret;
- glusterfs_file_t glfs_fd;
-
- gf_log ("booster", GF_LOG_TRACE, "read: fd %d, count %lu", fd,
- (long unsigned)count);
- glfs_fd = booster_fdptr_get (booster_fdtable, fd);
- if (!glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Not booster fd");
- if (real_read == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_read (fd, buf, count);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_read (glfs_fd, buf, count);
- booster_fdptr_put (glfs_fd);
- }
-
- return ret;
-}
-
-
-ssize_t
-readv (int fd, const struct iovec *vector, int count)
-{
- int ret;
- glusterfs_file_t glfs_fd = 0;
-
- gf_log ("booster", GF_LOG_TRACE, "readv: fd %d, iovecs %d", fd, count);
- glfs_fd = booster_fdptr_get (booster_fdtable, fd);
- if (!glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_readv == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_readv (fd, vector, count);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_readv (glfs_fd, vector, count);
- booster_fdptr_put (glfs_fd);
- }
-
- return ret;
-}
-
-
-ssize_t
-write (int fd, const void *buf, size_t count)
-{
- int ret;
- glusterfs_file_t glfs_fd = 0;
-
- gf_log ("booster", GF_LOG_TRACE, "write: fd %d, count %"GF_PRI_SIZET,
- fd, count);
-
- glfs_fd = booster_fdptr_get (booster_fdtable, fd);
-
- if (!glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_write == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_write (fd, buf, count);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_write (glfs_fd, buf, count);
- booster_fdptr_put (glfs_fd);
- }
-
- return ret;
-}
-
-ssize_t
-writev (int fd, const struct iovec *vector, int count)
-{
- int ret = 0;
- glusterfs_file_t glfs_fd = 0;
-
- gf_log ("booster", GF_LOG_TRACE, "writev: fd %d, iovecs %d", fd, count);
- glfs_fd = booster_fdptr_get (booster_fdtable, fd);
-
- if (!glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_writev == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_writev (fd, vector, count);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_writev (glfs_fd, vector, count);
- booster_fdptr_put (glfs_fd);
- }
-
- return ret;
-}
-
-
-ssize_t
-pwrite (int fd, const void *buf, size_t count, unsigned long offset)
-{
- int ret;
- glusterfs_file_t glfs_fd = 0;
-
- gf_log ("booster", GF_LOG_TRACE, "pwrite: fd %d, count %"GF_PRI_SIZET
- ", offset %lu", fd, count, offset);
- glfs_fd = booster_fdptr_get (booster_fdtable, fd);
-
- if (!glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_pwrite == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_pwrite (fd, buf, count, offset);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_pwrite (glfs_fd, buf, count, offset);
- booster_fdptr_put (glfs_fd);
- }
-
- return ret;
-}
-
-
-ssize_t
-pwrite64 (int fd, const void *buf, size_t count, uint64_t offset)
-{
- int ret;
- glusterfs_file_t glfs_fd = 0;
-
- gf_log ("booster", GF_LOG_TRACE, "pwrite64: fd %d, count %"GF_PRI_SIZET
- ", offset %"PRIu64, fd, count, offset);
- glfs_fd = booster_fdptr_get (booster_fdtable, fd);
-
- if (!glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_pwrite64 == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_pwrite64 (fd, buf, count, offset);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_pwrite (glfs_fd, buf, count, offset);
- booster_fdptr_put (glfs_fd);
- }
-
- return ret;
-}
-
-
-int
-close (int fd)
-{
- int ret = -1;
- glusterfs_file_t glfs_fd = 0;
-
- gf_log ("booster", GF_LOG_TRACE, "close: fd %d", fd);
- glfs_fd = booster_fdptr_get (booster_fdtable, fd);
-
- if (glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- booster_fd_put (booster_fdtable, fd);
- ret = glusterfs_close (glfs_fd);
- booster_fdptr_put (glfs_fd);
- }
-
- ret = real_close (fd);
-
- return ret;
-}
-
-#ifndef _LSEEK_DECLARED
-#define _LSEEK_DECLARED
-off_t
-lseek (int filedes, unsigned long offset, int whence)
-{
- int ret;
- glusterfs_file_t glfs_fd = 0;
-
- gf_log ("booster", GF_LOG_TRACE, "lseek: fd %d, offset %ld",
- filedes, offset);
-
- glfs_fd = booster_fdptr_get (booster_fdtable, filedes);
- if (glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_lseek (glfs_fd, offset, whence);
- booster_fdptr_put (glfs_fd);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_lseek == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_lseek (filedes, offset, whence);
- }
-
- return ret;
-}
-#endif
-
-off_t
-lseek64 (int filedes, uint64_t offset, int whence)
-{
- int ret;
- glusterfs_file_t glfs_fd = 0;
-
-
- gf_log ("booster", GF_LOG_TRACE, "lseek: fd %d, offset %"PRIu64,
- filedes, offset);
- glfs_fd = booster_fdptr_get (booster_fdtable, filedes);
- if (glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_lseek (glfs_fd, offset, whence);
- booster_fdptr_put (glfs_fd);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_lseek64 == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_lseek64 (filedes, offset, whence);
- }
-
- return ret;
-}
-
-int
-dup (int oldfd)
-{
- int ret = -1, new_fd = -1;
- glusterfs_file_t glfs_fd = 0;
-
- gf_log ("booster", GF_LOG_TRACE, "dup: fd %d", oldfd);
- glfs_fd = booster_fdptr_get (booster_fdtable, oldfd);
- new_fd = real_dup (oldfd);
-
- if (new_fd >=0 && glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = booster_fd_unused_get (booster_fdtable, glfs_fd,
- new_fd);
- fd_ref ((fd_t *)glfs_fd);
- if (ret == -1) {
- gf_log ("booster", GF_LOG_ERROR,"Failed to map new fd");
- real_close (new_fd);
- }
- }
-
- if (glfs_fd) {
- booster_fdptr_put (glfs_fd);
- }
-
- return new_fd;
-}
-
-
-int
-dup2 (int oldfd, int newfd)
-{
- int ret = -1;
- glusterfs_file_t old_glfs_fd = NULL, new_glfs_fd = NULL;
-
- if (oldfd == newfd) {
- return newfd;
- }
-
- old_glfs_fd = booster_fdptr_get (booster_fdtable, oldfd);
- new_glfs_fd = booster_fdptr_get (booster_fdtable, newfd);
-
- ret = real_dup2 (oldfd, newfd);
- if (ret >= 0) {
- if (new_glfs_fd) {
- glusterfs_close (new_glfs_fd);
- booster_fdptr_put (new_glfs_fd);
- booster_fd_put (booster_fdtable, newfd);
- new_glfs_fd = 0;
- }
-
- if (old_glfs_fd) {
- ret = booster_fd_unused_get (booster_fdtable,
- old_glfs_fd, newfd);
- fd_ref ((fd_t *)old_glfs_fd);
- if (ret == -1) {
- real_close (newfd);
- }
- }
- }
-
- if (old_glfs_fd) {
- booster_fdptr_put (old_glfs_fd);
- }
-
- if (new_glfs_fd) {
- booster_fdptr_put (new_glfs_fd);
- }
-
- return ret;
-}
-
-int
-mkdir (const char *pathname, mode_t mode)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "mkdir: path %s", pathname);
- ret = glusterfs_mkdir (pathname, mode);
-
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "mkdir failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "directory created");
- return ret;
- }
-
- if (real_mkdir == NULL) {
- ret = -1;
- errno = ENOSYS;
- } else
- ret = real_mkdir (pathname, mode);
-
- return ret;
-}
-
-int
-rmdir (const char *pathname)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "rmdir: path %s", pathname);
- ret = glusterfs_rmdir (pathname);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "rmdir failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "directory removed");
- return ret;
- }
-
- if (real_rmdir == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_rmdir (pathname);
-
- return ret;
-}
-
-int
-chmod (const char *pathname, mode_t mode)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "chmod: path %s", pathname);
- ret = glusterfs_chmod (pathname, mode);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "chmod failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "chmod succeeded");
- return ret;
- }
-
- if (real_chmod == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_chmod (pathname, mode);
-
- return ret;
-}
-
-int
-chown (const char *pathname, uid_t owner, gid_t group)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "chown: path: %s", pathname);
- ret = glusterfs_chown (pathname, owner, group);
-
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "chown failed: %s\n",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "chown succeeded");
- return ret;
- }
-
- if (real_chown == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_chown (pathname, owner, group);
-
- return ret;
-}
-
-int
-fchown (int fd, uid_t owner, gid_t group)
-{
- int ret = -1;
- glusterfs_file_t fh = NULL;
-
- gf_log ("booster", GF_LOG_TRACE, "fchown: fd %d, uid %d, gid %d", fd,
- owner, group);
- fh = booster_fdptr_get (booster_fdtable, fd);
- if (!fh) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_fchown == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_fchown (fd, owner, group);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_fchown (fh, owner, group);
- booster_fdptr_put (fh);
- }
-
- return ret;
-}
-
-
-#define MOUNT_TABLE_HASH_SIZE 256
-
-
-static void booster_cleanup (void);
-static int
-booster_init (void)
-{
- char *booster_conf_path = NULL;
- int ret = -1;
- int pipefd[2];
-
- booster_fdtable = booster_fdtable_alloc ();
- if (!booster_fdtable) {
- fprintf (stderr, "cannot allocate fdtable: %s\n",
- strerror (errno));
- goto err;
- }
-
- if (pipe (pipefd) == -1) {
- gf_log ("booster-fstab", GF_LOG_ERROR, "Pipe creation failed:%s"
- , strerror (errno));
- goto err;
- }
-
- process_piped_fd = pipefd[0];
- real_close (pipefd[1]);
- /* libglusterfsclient based VMPs should be inited only
- * after the file tables are inited so that if the socket
- * calls use the fd based syscalls, the fd tables are
- * correctly initialized to return a NULL handle, on which the
- * socket calls will fall-back to the real API.
- */
- booster_conf_path = getenv (BOOSTER_CONF_ENV_VAR);
- if (booster_conf_path != NULL) {
- if (strlen (booster_conf_path) > 0)
- ret = booster_configure (booster_conf_path);
- else {
- gf_log ("booster", GF_LOG_ERROR, "%s not defined, "
- "using default path: %s", BOOSTER_CONF_ENV_VAR,
- DEFAULT_BOOSTER_CONF);
- ret = booster_configure (DEFAULT_BOOSTER_CONF);
- }
- } else {
- gf_log ("booster", GF_LOG_ERROR, "%s not defined, using default"
- " path: %s", BOOSTER_CONF_ENV_VAR,DEFAULT_BOOSTER_CONF);
- ret = booster_configure (DEFAULT_BOOSTER_CONF);
- }
-
- atexit (booster_cleanup);
- if (ret == 0)
- gf_log ("booster", GF_LOG_DEBUG, "booster is inited");
- return 0;
-
-err:
- /* Sure we return an error value here
- * but who cares about booster.
- */
- return -1;
-}
-
-
-static void
-booster_cleanup (void)
-{
- /* Ideally, we should be de-initing the fd-table
- * here but the problem is that I've seen file accesses through booster
- * continuing while the atexit registered function is called. That means
- * , we cannot dealloc the fd-table since then there could be a crash
- * while trying to determine whether a given fd is for libc or for
- * libglusterfsclient.
- * We should be satisfied with having cleaned up glusterfs contexts.
- */
- glusterfs_umount_all ();
- glusterfs_reset ();
-}
-
-int
-fchmod (int fd, mode_t mode)
-{
- int ret = -1;
- glusterfs_file_t fh = NULL;
-
- gf_log ("booster", GF_LOG_TRACE, "fchmod: fd %d, mode: 0x%x", fd, mode);
- fh = booster_fdptr_get (booster_fdtable, fd);
- if (!fh) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_fchmod == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_fchmod (fd, mode);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_fchmod (fh, mode);
- booster_fdptr_put (fh);
- }
-
- return ret;
-}
-
-int
-fsync (int fd)
-{
- int ret = -1;
- glusterfs_file_t fh = NULL;
-
- gf_log ("booster", GF_LOG_TRACE, "fsync: fd %d", fd);
- fh = booster_fdptr_get (booster_fdtable, fd);
- if (!fh) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_fsync == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_fsync (fd);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_fsync (fh);
- booster_fdptr_put (fh);
- }
-
- return ret;
-}
-
-int __REDIRECT (booster_false_ftruncate, (int fd, off_t length),
- ftruncate);
-int __REDIRECT (booster_false_ftruncate64, (int fd, loff_t length),
- ftruncate64);
-
-int
-booster_false_ftruncate (int fd, off_t length)
-{
- int ret = -1;
- glusterfs_file_t fh = NULL;
-
- gf_log ("booster", GF_LOG_TRACE, "ftruncate: fd %d, length: %"PRIu64,fd
- , length);
- fh = booster_fdptr_get (booster_fdtable, fd);
- if (!fh) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_ftruncate == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_ftruncate (fd, length);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_ftruncate (fh, length);
- booster_fdptr_put (fh);
- }
-
- return ret;
-}
-
-int
-booster_false_ftruncate64 (int fd, loff_t length)
-{
- int ret = -1;
- glusterfs_file_t fh = NULL;
-
- gf_log ("booster", GF_LOG_TRACE, "ftruncate: fd %d, length: %"PRIu64,fd
- , length);
- fh = booster_fdptr_get (booster_fdtable, fd);
- if (!fh) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_ftruncate == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_ftruncate64 (fd, length);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_ftruncate (fh, length);
- booster_fdptr_put (fh);
- }
-
- return ret;
-}
-
-int
-link (const char *old, const char *new)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "link: old: %s, new: %s", old, new);
- ret = glusterfs_link (old, new);
-
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "Link failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "link call succeeded");
- return ret;
- }
-
- if (real_link == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_link (old, new);
-
- return ret;
-}
-
-int
-rename (const char *old, const char *new)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "link: old: %s, new: %s", old, new);
- ret = glusterfs_rename (old, new);
-
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "Rename failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "Rename succeeded");
- return ret;
- }
-
- if (real_rename == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_rename (old, new);
-
- return ret;
-}
-
-int
-utimes (const char *path, const struct timeval times[2])
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "utimes: path %s", path);
- ret = glusterfs_utimes (path, times);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "utimes failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "utimes succeeded");
- return ret;
- }
-
- if (real_utimes == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_utimes (path, times);
-
- return ret;
-}
-
-int
-utime (const char *path, const struct utimbuf *buf)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "utime: path %s", path);
- ret = glusterfs_utime (path, buf);
-
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "utime failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "utime succeeded");
- return ret;
- }
-
- if (real_utime == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_utime (path, buf);
-
- return ret;
-}
-
-int
-mknod (const char *path, mode_t mode, dev_t dev)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "mknod: path %s", path);
- ret = glusterfs_mknod (path, mode, dev);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "mknod failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "mknod succeeded");
- return ret;
- }
-
- if (real_mknod) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_mknod (path, mode, dev);
-
- return ret;
-}
-
-int
-mkfifo (const char *path, mode_t mode)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "mkfifo: path %s", path);
- ret = glusterfs_mkfifo (path, mode);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "mkfifo failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "mkfifo succeeded");
- return ret;
- }
-
- if (real_mkfifo == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_mkfifo (path, mode);
-
- return ret;
-}
-
-int
-unlink (const char *path)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "unlink: path %s", path);
- ret = glusterfs_unlink (path);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "unlink failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "unlink succeeded");
- return ret;
- }
-
- if (real_unlink == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_unlink (path);
-
- return ret;
-}
-
-int
-symlink (const char *oldpath, const char *newpath)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "symlink: old: %s, new: %s",
- oldpath, newpath);
- ret = glusterfs_symlink (oldpath, newpath);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "symlink failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "symlink succeeded");
- return ret;
- }
-
- if (real_symlink == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_symlink (oldpath, newpath);
-
- return ret;
-}
-
-int
-readlink (const char *path, char *buf, size_t bufsize)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "readlink: path %s", path);
- ret = glusterfs_readlink (path, buf, bufsize);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "readlink failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret > 0) {
- gf_log ("booster", GF_LOG_TRACE, "readlink succeeded");
- return ret;
- }
-
- if (real_readlink == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_readlink (path, buf, bufsize);
-
- return ret;
-}
-
-char *
-realpath (const char *path, char *resolved_path)
-{
- char *res = NULL;
-
- gf_log ("booster", GF_LOG_TRACE, "realpath: path %s", path);
- res = glusterfs_realpath (path, resolved_path);
- if ((res == NULL) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "realpath failed: %s",
- strerror (errno));
- return res;
- }
-
- if (res != NULL) {
- gf_log ("booster", GF_LOG_TRACE, "realpath succeeded");
- return res;
- }
-
- if (real_realpath == NULL) {
- errno = ENOSYS;
- res = NULL;
- } else
- res = real_realpath (path, resolved_path);
-
- return res;
-}
-
-#define BOOSTER_GL_DIR 1
-#define BOOSTER_POSIX_DIR 2
-
-struct booster_dir_handle {
- int type;
- void *dirh;
-};
-
-DIR *
-opendir (const char *path)
-{
- glusterfs_dir_t gdir = NULL;
- struct booster_dir_handle *bh = NULL;
- DIR *pdir = NULL;
-
- gf_log ("booster", GF_LOG_TRACE, "opendir: path: %s", path);
- bh = calloc (1, sizeof (struct booster_dir_handle));
- if (!bh) {
- gf_log ("booster", GF_LOG_ERROR, "memory allocation failed");
- errno = ENOMEM;
- goto out;
- }
-
- gdir = glusterfs_opendir (path);
- if (gdir) {
- gf_log ("booster", GF_LOG_TRACE, "Gluster dir opened");
- bh->type = BOOSTER_GL_DIR;
- bh->dirh = (void *)gdir;
- goto out;
- } else if ((!gdir) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "Opendir failed");
- goto free_out;
- }
-
- if (real_opendir == NULL) {
- errno = ENOSYS;
- goto free_out;
- }
-
- pdir = real_opendir (path);
-
- if (pdir) {
- bh->type = BOOSTER_POSIX_DIR;
- bh->dirh = (void *)pdir;
- goto out;
- }
-
-free_out:
- if (bh) {
- free (bh);
- bh = NULL;
- }
-out:
- return (DIR *)bh;
-}
-
-int __REDIRECT (booster_false_readdir_r, (DIR *dir, struct dirent *entry,
- struct dirent **result), readdir_r) __nonnull ((1));
-int __REDIRECT (booster_false_readdir64_r, (DIR *dir, struct dirent64 *entry,
- struct dirent64 **result), readdir64_r) __nonnull ((1));
-
-int
-booster_false_readdir_r (DIR *dir, struct dirent *entry, struct dirent **result)
-{
- struct booster_dir_handle *bh = (struct booster_dir_handle *)dir;
- int ret = 0;
-
- if (!bh) {
- ret = errno = EFAULT;
- goto out;
- }
-
- if (bh->type == BOOSTER_GL_DIR) {
- gf_log ("booster", GF_LOG_TRACE, "readdir_r on gluster");
- ret = glusterfs_readdir_r ((glusterfs_dir_t)bh->dirh, entry,
- result);
-
- } else if (bh->type == BOOSTER_POSIX_DIR) {
- gf_log ("booster", GF_LOG_TRACE, "readdir_r on posix");
- if (real_readdir_r == NULL) {
- ret = errno = ENOSYS;
- goto out;
- }
-
- ret = real_readdir_r ((DIR *)bh->dirh, entry, result);
- } else {
- ret = errno = EINVAL;
- }
-
-out:
- return ret;
-}
-
-int
-booster_false_readdir64_r (DIR *dir, struct dirent64 *entry,
- struct dirent64 **result)
-{
- struct booster_dir_handle *bh = (struct booster_dir_handle *)dir;
- int ret = 0;
-
- if (!bh) {
- ret = errno = EFAULT;
- goto out;
- }
-
- if (bh->type == BOOSTER_GL_DIR) {
- gf_log ("booster", GF_LOG_TRACE, "readdir_r on gluster");
- ret = glusterfs_readdir_r ((glusterfs_dir_t)bh->dirh,
- (struct dirent *)entry,
- (struct dirent **)result);
- } else if (bh->type == BOOSTER_POSIX_DIR) {
- gf_log ("booster", GF_LOG_TRACE, "readdir_r on posix");
- if (real_readdir64_r == NULL) {
- ret = errno = ENOSYS;
- goto out;
- }
-
- ret = real_readdir64_r ((DIR *)bh->dirh, entry, result);
- } else {
- ret = errno = EINVAL;
- }
-
-out:
- return ret;
-}
-
-struct dirent *
-__REDIRECT (booster_false_readdir, (DIR *dir), readdir) __nonnull ((1));
-
-struct dirent64 *
-__REDIRECT (booster_false_readdir64, (DIR *dir), readdir64) __nonnull ((1));
-
-struct dirent *
-booster_false_readdir (DIR *dir)
-{
- struct booster_dir_handle *bh = (struct booster_dir_handle *)dir;
- struct dirent *dirp = NULL;
-
- if (!bh) {
- errno = EFAULT;
- goto out;
- }
-
- if (bh->type == BOOSTER_GL_DIR) {
- gf_log ("booster", GF_LOG_TRACE, "readdir on gluster");
- dirp = glusterfs_readdir ((glusterfs_dir_t)bh->dirh);
- } else if (bh->type == BOOSTER_POSIX_DIR) {
- gf_log ("booster", GF_LOG_TRACE, "readdir on posix");
- if (real_readdir == NULL) {
- errno = ENOSYS;
- dirp = NULL;
- goto out;
- }
-
- dirp = real_readdir ((DIR *)bh->dirh);
- } else {
- dirp = NULL;
- errno = EINVAL;
- }
-
-out:
- return dirp;
-}
-
-struct dirent64 *
-booster_false_readdir64 (DIR *dir)
-{
- struct booster_dir_handle *bh = (struct booster_dir_handle *)dir;
- struct dirent64 *dirp = NULL;
-
- if (!bh) {
- errno = EFAULT;
- goto out;
- }
-
- if (bh->type == BOOSTER_GL_DIR) {
- gf_log ("booster", GF_LOG_TRACE, "readdir on gluster");
- dirp = glusterfs_readdir ((glusterfs_dir_t)bh->dirh);
- } else if (bh->type == BOOSTER_POSIX_DIR) {
- gf_log ("booster", GF_LOG_TRACE, "readdir on posix");
- if (real_readdir == NULL) {
- errno = ENOSYS;
- dirp = NULL;
- goto out;
- }
-
- dirp = real_readdir64 ((DIR *)bh->dirh);
- } else {
- dirp = NULL;
- errno = EINVAL;
- }
-
-out:
- return dirp;
-}
-
-int
-closedir (DIR *dh)
-{
- struct booster_dir_handle *bh = (struct booster_dir_handle *)dh;
- int ret = -1;
-
- if (!bh) {
- errno = EFAULT;
- goto out;
- }
-
- if (bh->type == BOOSTER_GL_DIR) {
- gf_log ("booster", GF_LOG_TRACE, "closedir on gluster");
- ret = glusterfs_closedir ((glusterfs_dir_t)bh->dirh);
- } else if (bh->type == BOOSTER_POSIX_DIR) {
- gf_log ("booster", GF_LOG_TRACE, "closedir on posix");
- if (real_closedir == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_closedir ((DIR *)bh->dirh);
- } else {
- errno = EBADF;
- }
-
- if (ret == 0) {
- free (bh);
- bh = NULL;
- }
-out:
- return ret;
-}
-
-/* The real stat functions reside in booster_stat.c to
- * prevent clash with the statX prototype and functions
- * declared from sys/stat.h
- */
-int
-booster_xstat (int ver, const char *path, void *buf)
-{
- struct stat *sbuf = (struct stat *)buf;
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "xstat: path: %s", path);
- ret = glusterfs_stat (path, sbuf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "xstat failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "xstat succeeded");
- goto out;
- }
-
- if (real___xstat == NULL) {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
-
- ret = real___xstat (ver, path, sbuf);
-out:
- return ret;
-}
-
-int
-booster_xstat64 (int ver, const char *path, void *buf)
-{
- int ret = -1;
- struct stat64 *sbuf = (struct stat64 *)buf;
-
- gf_log ("booster", GF_LOG_TRACE, "xstat64: path: %s", path);
- ret = glusterfs_stat (path, (struct stat *)sbuf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "xstat64 failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "xstat64 succeeded");
- goto out;
- }
-
- if (real___xstat64 == NULL) {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
- ret = real___xstat64 (ver, path, sbuf);
-out:
- return ret;
-}
-
-int
-booster_stat (const char *path, void *buf)
-{
- struct stat *sbuf = (struct stat *)buf;
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "stat: path: %s", path);
- ret = glusterfs_stat (path, sbuf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "stat failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "stat succeeded");
- goto out;
- }
-
- if (real_stat != NULL)
- ret = real_stat (path, sbuf);
- else if (real___xstat != NULL)
- ret = real___xstat (0, path, sbuf);
- else {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
-
-out:
- return ret;
-}
-
-int
-booster_stat64 (const char *path, void *buf)
-{
- int ret = -1;
- struct stat64 *sbuf = (struct stat64 *)buf;
-
- gf_log ("booster", GF_LOG_TRACE, "stat64: %s", path);
- ret = glusterfs_stat (path, (struct stat *)sbuf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "stat64 failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "stat64 succeeded");
- goto out;
- }
-
- if (real_stat64 != NULL)
- ret = real_stat64 (path, sbuf);
- else if (real___xstat64 != NULL)
- ret = real___xstat64 (0, path, sbuf);
- else {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
-out:
- return ret;
-}
-
-int
-booster_fxstat (int ver, int fd, void *buf)
-{
- struct stat *sbuf = (struct stat *)buf;
- int ret = -1;
- glusterfs_file_t fh = NULL;
-
- gf_log ("booster", GF_LOG_TRACE, "fxstat: fd %d", fd);
- fh = booster_fdptr_get (booster_fdtable, fd);
- if (!fh) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real___fxstat == NULL) {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
- ret = real___fxstat (ver, fd, sbuf);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_fstat (fh, sbuf);
- booster_fdptr_put (fh);
- }
-
-out:
- return ret;
-}
-
-int
-booster_fxstat64 (int ver, int fd, void *buf)
-{
- int ret = -1;
- struct stat64 *sbuf = (struct stat64 *)buf;
- glusterfs_file_t fh = NULL;
-
- gf_log ("booster", GF_LOG_TRACE, "fxstat64: fd %d", fd);
- fh = booster_fdptr_get (booster_fdtable, fd);
- if (!fh) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real___fxstat64 == NULL) {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
- ret = real___fxstat64 (ver, fd, sbuf);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_fstat (fh, (struct stat *)sbuf);
- booster_fdptr_put (fh);
- }
-
-out:
- return ret;
-}
-
-int
-booster_fstat (int fd, void *buf)
-{
- struct stat *sbuf = (struct stat *)buf;
- int ret = -1;
- glusterfs_file_t fh = NULL;
-
- gf_log ("booster", GF_LOG_TRACE, "fstat: fd %d", fd);
- fh = booster_fdptr_get (booster_fdtable, fd);
- if (!fh) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_fstat != NULL)
- ret = real_fstat (fd, sbuf);
- else if (real___fxstat != NULL)
- ret = real___fxstat (0, fd, sbuf);
- else {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_fstat (fh, sbuf);
- booster_fdptr_put (fh);
- }
-
-out:
- return ret;
-}
-
-int
-booster_fstat64 (int fd, void *buf)
-{
- int ret = -1;
- struct stat64 *sbuf = (struct stat64 *)buf;
- glusterfs_file_t fh = NULL;
-
- gf_log ("booster", GF_LOG_TRACE, "fstat64: fd %d", fd);
- fh = booster_fdptr_get (booster_fdtable, fd);
- if (!fh) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_fstat64 != NULL)
- ret = real_fstat64 (fd, sbuf);
- else if (real___fxstat64 != NULL)
- /* Not sure how portable the use of 0 for
- * version number is but it works over glibc.
- * We need this because, I've
- * observed that all the above real* functors can be
- * NULL. In that case, this is our last and only option.
- */
- ret = real___fxstat64 (0, fd, sbuf);
- else {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_fstat (fh, (struct stat *)sbuf);
- booster_fdptr_put (fh);
- }
-
-out:
- return ret;
-}
-
-int
-booster_lxstat (int ver, const char *path, void *buf)
-{
- struct stat *sbuf = (struct stat *)buf;
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "lxstat: path %s", path);
- ret = glusterfs_lstat (path, sbuf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "lxstat failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "lxstat succeeded");
- goto out;
- }
-
- if (real___lxstat == NULL) {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
-
- ret = real___lxstat (ver, path, sbuf);
-out:
- return ret;
-}
-
-int
-booster_lxstat64 (int ver, const char *path, void *buf)
-{
- int ret = -1;
- struct stat64 *sbuf = (struct stat64 *)buf;
-
- gf_log ("booster", GF_LOG_TRACE, "lxstat64: path %s", path);
- ret = glusterfs_lstat (path, (struct stat *)sbuf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "lxstat64 failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "lxstat64 succeeded");
- goto out;
- }
-
- if (real___lxstat64 == NULL) {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
- ret = real___lxstat64 (ver, path, sbuf);
-out:
- return ret;
-}
-
-int
-booster_lstat (const char *path, void *buf)
-{
- struct stat *sbuf = (struct stat *)buf;
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "lstat: path %s", path);
- ret = glusterfs_lstat (path, sbuf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "lstat failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "lstat succeeded");
- goto out;
- }
-
- if (real_lstat != NULL)
- ret = real_lstat (path, sbuf);
- else if (real___lxstat != NULL)
- ret = real___lxstat (0, path, sbuf);
- else {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
-
-out:
- return ret;
-}
-
-int
-booster_lstat64 (const char *path, void *buf)
-{
- int ret = -1;
- struct stat64 *sbuf = (struct stat64 *)buf;
-
- gf_log ("booster", GF_LOG_TRACE, "lstat64: path %s", path);
- ret = glusterfs_lstat (path, (struct stat *)sbuf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "lstat64 failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "lstat64 succeeded");
- goto out;
- }
-
- if (real_lstat64 != NULL)
- ret = real_lstat64 (path, sbuf);
- else if (real___lxstat64 != NULL)
- ret = real___lxstat64 (0, path, sbuf);
- else {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
-out:
- return ret;
-}
-
-int
-booster_statfs (const char *pathname, struct statfs *buf)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "statfs: path %s", pathname);
- ret = glusterfs_statfs (pathname, buf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "statfs failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "statfs succeeded");
- goto out;
- }
-
- if (real_statfs == NULL) {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
-
- ret = real_statfs (pathname, buf);
-
-out:
- return ret;
-}
-
-int
-booster_statfs64 (const char *pathname, struct statfs64 *buf)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "stat64: path %s", pathname);
- ret = glusterfs_statfs (pathname, (struct statfs *)buf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "statfs64 failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "statfs64 succeeded");
- goto out;
- }
-
- if (real_statfs64 == NULL) {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
-
- ret = real_statfs64 (pathname, buf);
-
-out:
- return ret;
-}
-
-int
-booster_statvfs (const char *pathname, struct statvfs *buf)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "statvfs: path %s", pathname);
- ret = glusterfs_statvfs (pathname, buf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "statvfs failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "statvfs succeeded");
- goto out;
- }
-
- if (real_statvfs == NULL) {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
-
- ret = real_statvfs (pathname, buf);
-
-out:
- return ret;
-}
-
-int
-booster_statvfs64 (const char *pathname, struct statvfs64 *buf)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "statvfs64: path %s", pathname);
- ret = glusterfs_statvfs (pathname, (struct statvfs *)buf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "statvfs64 failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "statvfs64 succeeded");
- goto out;
- }
-
- if (real_statvfs64 == NULL) {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
-
- ret = real_statvfs64 (pathname, buf);
-
-out:
- return ret;
-}
-
-ssize_t
-getxattr (const char *path, const char *name, void *value, size_t size)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "getxattr: path %s, name %s", path,
- name);
- ret = glusterfs_getxattr (path, name, value, size);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "getxattr failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret > 0) {
- gf_log ("booster", GF_LOG_TRACE, "getxattr succeeded");
- return ret;
- }
-
- if (real_getxattr == NULL) {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
-
- ret = real_getxattr (path, name, value, size);
-out:
- return ret;
-}
-
-
-ssize_t
-lgetxattr (const char *path, const char *name, void *value, size_t size)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "lgetxattr: path %s, name %s", path,
- name);
- ret = glusterfs_lgetxattr (path, name, value, size);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "lgetxattr failed: %s",
- strerror (errno));
-
- goto out;
- }
-
- if (ret > 0) {
- gf_log ("booster", GF_LOG_TRACE, "lgetxattr succeeded");
- return ret;
- }
-
- if (real_lgetxattr == NULL) {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
-
- ret = real_lgetxattr (path, name, value, size);
-out:
- return ret;
-}
-
-int
-remove (const char *path)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "remove: %s", path);
- ret = glusterfs_remove (path);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "remove failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "remove succeeded");
- goto out;
- }
-
- if (real_remove == NULL) {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
- ret = real_remove (path);
-
-out:
- return ret;
-}
-
-int
-lchown (const char *path, uid_t owner, gid_t group)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "lchown: path %s", path);
- ret = glusterfs_lchown (path, owner, group);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "lchown failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_ERROR, "lchown succeeded");
- goto out;
- }
-
- if (real_lchown == NULL) {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
- ret = real_lchown (path, owner, group);
-
-out:
- return ret;
-}
-
-void
-booster_rewinddir (DIR *dir)
-{
- struct booster_dir_handle *bh = (struct booster_dir_handle *)dir;
-
- if (!bh) {
- errno = EFAULT;
- goto out;
- }
-
- if (bh->type == BOOSTER_GL_DIR) {
- gf_log ("booster", GF_LOG_TRACE, "rewinddir on glusterfs");
- glusterfs_rewinddir ((glusterfs_dir_t)bh->dirh);
- } else if (bh->type == BOOSTER_POSIX_DIR) {
- if (real_rewinddir == NULL) {
- errno = ENOSYS;
- goto out;
- }
- gf_log ("booster", GF_LOG_TRACE, "rewinddir on posix");
- real_rewinddir ((DIR *)bh->dirh);
- } else
- errno = EINVAL;
-out:
- return;
-}
-
-void
-booster_seekdir (DIR *dir, off_t offset)
-{
- struct booster_dir_handle *bh = (struct booster_dir_handle *)dir;
-
- if (!bh) {
- errno = EFAULT;
- goto out;
- }
-
- if (bh->type == BOOSTER_GL_DIR) {
- gf_log ("booster", GF_LOG_TRACE, "seekdir on glusterfs");
- glusterfs_seekdir ((glusterfs_dir_t)bh->dirh, offset);
- } else if (bh->type == BOOSTER_POSIX_DIR) {
- if (real_seekdir == NULL) {
- errno = ENOSYS;
- goto out;
- }
-
- gf_log ("booster", GF_LOG_TRACE, "seekdir on posix");
- real_seekdir ((DIR *)bh->dirh, offset);
- } else
- errno = EINVAL;
-out:
- return;
-}
-
-off_t
-booster_telldir (DIR *dir)
-{
- struct booster_dir_handle *bh = (struct booster_dir_handle *)dir;
- off_t offset = -1;
-
- if (!bh) {
- errno = EFAULT;
- goto out;
- }
-
- if (bh->type == BOOSTER_GL_DIR) {
- gf_log ("booster", GF_LOG_TRACE, "telldir on glusterfs");
- offset = glusterfs_telldir ((glusterfs_dir_t)bh->dirh);
- } else if (bh->type == BOOSTER_POSIX_DIR) {
- if (real_telldir == NULL) {
- errno = ENOSYS;
- goto out;
- }
-
- gf_log ("booster", GF_LOG_TRACE, "telldir on posix");
- offset = real_telldir ((DIR *)bh->dirh);
- } else
- errno = EINVAL;
-out:
- return offset;
-}
-
-
-pid_t
-fork (void)
-{
- pid_t pid = 0;
- char child = 0;
-
- glusterfs_log_lock ();
- {
- pid = real_fork ();
- }
- glusterfs_log_unlock ();
-
- child = (pid == 0);
- if (child) {
- booster_cleanup ();
- booster_init ();
- }
-
- return pid;
-}
-
-ssize_t
-sendfile (int out_fd, int in_fd, off_t *offset, size_t count)
-{
- glusterfs_file_t in_fh = NULL;
- ssize_t ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "sendfile: in fd %d, out fd %d, offset"
- " %"PRIu64", count %"GF_PRI_SIZET, in_fd, out_fd, *offset,
- count);
- /*
- * handle sendfile in booster only if in_fd corresponds to a glusterfs
- * file handle
- */
- in_fh = booster_fdptr_get (booster_fdtable, in_fd);
- if (!in_fh) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_sendfile == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else {
- ret = real_sendfile (out_fd, in_fd, offset, count);
- }
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_sendfile (out_fd, in_fh, offset, count);
- booster_fdptr_put (in_fh);
- }
-
- return ret;
-}
-
-ssize_t
-sendfile64 (int out_fd, int in_fd, off_t *offset, size_t count)
-{
- glusterfs_file_t in_fh = NULL;
- ssize_t ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "sendfile64: in fd %d, out fd %d,"
- " offset %"PRIu64", count %"GF_PRI_SIZET, in_fd, out_fd,
- *offset, count);
- /*
- * handle sendfile in booster only if in_fd corresponds to a glusterfs
- * file handle
- */
- in_fh = booster_fdptr_get (booster_fdtable, in_fd);
- if (!in_fh) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_sendfile64 == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else {
- ret = real_sendfile64 (out_fd, in_fd, offset, count);
- }
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_sendfile (out_fd, in_fh, offset, count);
- booster_fdptr_put (in_fh);
- }
-
- return ret;
-}
-
-
-int
-fcntl (int fd, int cmd, ...)
-{
- va_list ap;
- int ret = -1;
- long arg = 0;
- struct flock *lock = NULL;
- glusterfs_file_t glfs_fd = 0;
-
- glfs_fd = booster_fdptr_get (booster_fdtable, fd);
-
- gf_log ("booster", GF_LOG_TRACE, "fcntl: fd %d, cmd %d", fd, cmd);
- switch (cmd) {
- case F_DUPFD:
- ret = dup (fd);
- break;
- /*
- * FIXME: Consider this case when implementing F_DUPFD, F_GETFD
- * etc flags in libglusterfsclient. Commenting it out for
- * timebeing since it is defined only in linux kernel
- * versions >= 2.6.24.
- */
- /* case F_DUPFD_CLOEXEC: */
- case F_GETFD:
- if (glfs_fd != NULL) {
- ret = booster_get_close_on_exec (booster_fdtable, fd)
- ? FD_CLOEXEC : 0;
- } else {
- if (real_fcntl == NULL) {
- ret = -1;
- errno = ENOSYS;
- } else {
- ret = real_fcntl (fd, cmd);
- }
- }
- break;
-
- case F_GETFL:
- case F_GETOWN:
- case F_GETSIG:
- case F_GETLEASE:
- if (glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_fcntl (glfs_fd, cmd);
- } else {
- if (!real_fcntl) {
- errno = ENOSYS;
- goto out;
- }
-
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- ret = real_fcntl (fd, cmd);
- }
- break;
-
- case F_SETFD:
- if (glfs_fd != NULL) {
- booster_set_close_on_exec (booster_fdtable, fd);
- ret = 0;
- } else {
- if (real_fcntl == NULL) {
- ret = -1;
- errno = ENOSYS;
- } else {
- ret = real_fcntl (fd, cmd);
- }
- }
- break;
-
- case F_SETFL:
- case F_SETOWN:
- case F_SETSIG:
- case F_SETLEASE:
- case F_NOTIFY:
- va_start (ap, cmd);
- arg = va_arg (ap, long);
- va_end (ap);
-
- if (glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_fcntl (glfs_fd, cmd, arg);
- } else {
- if (!real_fcntl) {
- errno = ENOSYS;
- goto out;
- }
-
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- ret = real_fcntl (fd, cmd, arg);
- }
- break;
-
- case F_GETLK:
- case F_SETLK:
- case F_SETLKW:
-#if F_GETLK != F_GETLK64
- case F_GETLK64:
-#endif
-#if F_SETLK != F_SETLK64
- case F_SETLK64:
-#endif
-#if F_SETLKW != F_SETLKW64
- case F_SETLKW64:
-#endif
- va_start (ap, cmd);
- lock = va_arg (ap, struct flock *);
- va_end (ap);
-
- if (lock == NULL) {
- errno = EINVAL;
- goto out;
- }
-
- if (glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_fcntl (glfs_fd, cmd, lock);
- } else {
- if (!real_fcntl) {
- errno = ENOSYS;
- goto out;
- }
-
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- ret = real_fcntl (fd, cmd, lock);
- }
- break;
-
- default:
- errno = EINVAL;
- break;
- }
-
-out:
- if (glfs_fd) {
- booster_fdptr_put (glfs_fd);
- }
-
- return ret;
-}
-
-
-int
-chdir (const char *path)
-{
- int ret = -1;
- char cwd[PATH_MAX];
- char *res = NULL;
-
- gf_log ("booster", GF_LOG_TRACE, "chdir: path %s", path);
-
- pthread_mutex_lock (&cwdlock);
- {
- res = glusterfs_getcwd (cwd, PATH_MAX);
- if (res == NULL) {
- gf_log ("booster", GF_LOG_ERROR, "getcwd failed: %s",
- strerror (errno));
- goto unlock;
- }
-
- ret = glusterfs_chdir (path);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "chdir failed: %s",
- strerror (errno));
- goto unlock;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "chdir succeeded");
- goto unlock;
- }
-
- if (real_chdir == NULL) {
- errno = ENOSYS;
- ret = -1;
- goto unlock;
- }
-
- ret = real_chdir (path);
- if (ret == -1) {
- glusterfs_chdir (cwd);
- }
- }
-unlock:
- pthread_mutex_unlock (&cwdlock);
-
- return ret;
-}
-
-
-int
-fchdir (int fd)
-{
- int ret = -1;
- glusterfs_file_t glfs_fd = 0;
- char cwd[PATH_MAX];
- char *res = NULL;
-
- glfs_fd = booster_fdptr_get (booster_fdtable, fd);
-
- if (!glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_write == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else {
- ret = real_fchdir (fd);
- if (ret == 0) {
- res = real_getcwd (cwd, PATH_MAX);
- if (res == NULL) {
- gf_log ("booster", GF_LOG_ERROR,
- "getcwd failed (%s)",
- strerror (errno));
- ret = -1;
- } else {
- glusterfs_chdir (cwd);
- }
- }
- }
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_fchdir (glfs_fd);
- booster_fdptr_put (glfs_fd);
- }
-
- return ret;
-}
-
-
-char *
-getcwd (char *buf, size_t size)
-{
- char *res = NULL;
-
- res = glusterfs_getcwd (buf, size);
- if ((res == NULL) && (errno == ENODEV)) {
- res = real_getcwd (buf, size);
- }
-
- return res;
-}
-
-
-int __REDIRECT (booster_false_truncate, (const char *path, off_t length),
- truncate) __nonnull ((1));
-int __REDIRECT (booster_false_truncate64, (const char *path, loff_t length),
- truncate64) __nonnull ((1));;
-
-int
-booster_false_truncate (const char *path, off_t length)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "truncate: path (%s) length (%"PRIu64
- ")", path, length);
-
- ret = glusterfs_truncate (path, length);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "truncate failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "truncate succeeded");
- goto out;
- }
-
- if (real_truncate != NULL)
- ret = real_truncate (path, length);
- else {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
-out:
- return ret;
-}
-
-
-int
-booster_false_truncate64 (const char *path, loff_t length)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "truncate64: path (%s) length "
- "(%"PRIu64")", path, length);
-
- ret = glusterfs_truncate (path, length);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "truncate64 failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "truncate64 succeeded");
- goto out;
- }
-
- if (real_truncate64 != NULL)
- ret = real_truncate64 (path, length);
- else {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
-out:
- return ret;
-}
-
-
-int
-setxattr (const char *path, const char *name, const void *value, size_t size,
- int flags)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "setxattr: path: %s", path);
- ret = glusterfs_setxattr (path, name, value, size, flags);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "setxattr failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "setxattr succeeded");
- goto out;
- }
-
- if (real_setxattr != NULL)
- ret = real_setxattr (path, name, value, size, flags);
- else {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
-out:
- return ret;
-}
-
-
-int
-lsetxattr (const char *path, const char *name, const void *value, size_t size,
- int flags)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "lsetxattr: path: %s", path);
- ret = glusterfs_lsetxattr (path, name, value, size, flags);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "lsetxattr failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "lsetxattr succeeded");
- goto out;
- }
-
- if (real_lsetxattr != NULL)
- ret = real_lsetxattr (path, name, value, size, flags);
- else {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
-out:
- return ret;
-}
-
-
-int
-fsetxattr (int fd, const char *name, const void *value, size_t size, int flags)
-{
- int ret = -1;
- glusterfs_file_t fh = NULL;
-
- gf_log ("booster", GF_LOG_TRACE, "fsetxattr: fd %d", fd);
- fh = booster_fdptr_get (booster_fdtable, fd);
- if (!fh) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_fsetxattr != NULL)
- ret = real_fsetxattr (fd, name, value, size, flags);
- else {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_fsetxattr (fh, name, value, size, flags);
- booster_fdptr_put (fh);
- }
-
-out:
- return ret;
-}
-
-
-void
-booster_lib_init (void)
-{
-
- RESOLVE (open);
- RESOLVE (open64);
- RESOLVE (creat);
- RESOLVE (creat64);
-
- RESOLVE (read);
- RESOLVE (readv);
- RESOLVE (pread);
- RESOLVE (pread64);
-
- RESOLVE (write);
- RESOLVE (writev);
- RESOLVE (pwrite);
- RESOLVE (pwrite64);
-
- RESOLVE (lseek);
- RESOLVE (lseek64);
-
- RESOLVE (close);
-
- RESOLVE (dup);
- RESOLVE (dup2);
-
- RESOLVE (fork);
- RESOLVE (mkdir);
- RESOLVE (rmdir);
- RESOLVE (chmod);
- RESOLVE (chown);
- RESOLVE (fchmod);
- RESOLVE (fchown);
- RESOLVE (fsync);
- RESOLVE (ftruncate);
- RESOLVE (ftruncate64);
- RESOLVE (link);
- RESOLVE (rename);
- RESOLVE (utimes);
- RESOLVE (utime);
- RESOLVE (mknod);
- RESOLVE (mkfifo);
- RESOLVE (unlink);
- RESOLVE (symlink);
- RESOLVE (readlink);
- RESOLVE (realpath);
- RESOLVE (opendir);
- RESOLVE (readdir);
- RESOLVE (readdir64);
- RESOLVE (closedir);
- RESOLVE (__xstat);
- RESOLVE (__xstat64);
- RESOLVE (stat);
- RESOLVE (stat64);
- RESOLVE (__fxstat);
- RESOLVE (__fxstat64);
- RESOLVE (fstat);
- RESOLVE (fstat64);
- RESOLVE (__lxstat);
- RESOLVE (__lxstat64);
- RESOLVE (lstat);
- RESOLVE (lstat64);
- RESOLVE (statfs);
- RESOLVE (statfs64);
- RESOLVE (statvfs);
- RESOLVE (statvfs64);
- RESOLVE (getxattr);
- RESOLVE (lgetxattr);
- RESOLVE (remove);
- RESOLVE (lchown);
- RESOLVE (rewinddir);
- RESOLVE (seekdir);
- RESOLVE (telldir);
- RESOLVE (sendfile);
- RESOLVE (sendfile64);
- RESOLVE (readdir_r);
- RESOLVE (readdir64_r);
- RESOLVE (fcntl);
- RESOLVE (chdir);
- RESOLVE (fchdir);
- RESOLVE (getcwd);
- RESOLVE (truncate);
- RESOLVE (truncate64);
- RESOLVE (setxattr);
- RESOLVE (lsetxattr);
- RESOLVE (fsetxattr);
-
- /* This must be called after resolving real functions
- * above so that the socket based IO calls in libglusterfsclient
- * can fall back to a non-NULL real_XXX function pointer.
- * Calling booster_init before resolving the names above
- * results in seg-faults because the function symbols above are NULL.
- */
- booster_init ();
-}
-
diff --git a/booster/src/booster_fstab.c b/booster/src/booster_fstab.c
deleted file mode 100644
index 202249cadf3..00000000000
--- a/booster/src/booster_fstab.c
+++ /dev/null
@@ -1,452 +0,0 @@
-/* Utilities for reading/writing fstab, mtab, etc.
- Copyright (C) 1995-2000, 2001, 2002, 2003, 2006
- Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
-
-#include <alloca.h>
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include "booster_fstab.h"
-#include <stdlib.h>
-#include <libglusterfsclient.h>
-#include <errno.h>
-
-/* The default timeout for inode and stat cache. */
-#define BOOSTER_DEFAULT_ATTR_TIMEO 5 /* In Secs */
-
-/* Prepare to begin reading and/or writing mount table entries from the
- beginning of FILE. MODE is as for `fopen'. */
-glusterfs_fstab_t *
-glusterfs_fstab_init (const char *file, const char *mode)
-{
- glusterfs_fstab_t *handle = NULL;
- handle = calloc (1, sizeof (glusterfs_fstab_t));
- if (!handle) {
- gf_log ("booster-fstab", GF_LOG_ERROR, "Memory allocation"
- " failed");
- goto out;
- }
-
- gf_log ("booster-fstab", GF_LOG_DEBUG, "FSTAB file: %s", file);
- FILE *result = fopen (file,mode);
- if (result != NULL) {
- handle->fp = result;
- } else {
- gf_log ("booster-fstab", GF_LOG_ERROR, "FSTAB file open failed:"
- " %s", strerror (errno));
- free (handle);
- handle = NULL;
- }
-
-out:
-
- return handle;
-}
-
-int
-glusterfs_fstab_close (glusterfs_fstab_t *h)
-{
- if (!h)
- return -1;
-
- if (h->fp)
- fclose (h->fp);
-
- return 0;
-}
-
-/* Since the values in a line are separated by spaces, a name cannot
- contain a space. Therefore some programs encode spaces in names
- by the strings "\040". We undo the encoding when reading an entry.
- The decoding happens in place. */
-static char *
-decode_name (char *buf)
-{
- char *rp = buf;
- char *wp = buf;
-
- do
- if (rp[0] == '\\' && rp[1] == '0' && rp[2] == '4'
- && rp[3] == '0')
- {
- /* \040 is a SPACE. */
- *wp++ = ' ';
- rp += 3;
- }
- else if (rp[0] == '\\' && rp[1] == '0' && rp[2] == '1'
- && rp[3] == '1')
- {
- /* \011 is a TAB. */
- *wp++ = '\t';
- rp += 3;
- }
- else if (rp[0] == '\\' && rp[1] == '0' && rp[2] == '1'
- && rp[3] == '2')
- {
- /* \012 is a NEWLINE. */
- *wp++ = '\n';
- rp += 3;
- }
- else if (rp[0] == '\\' && rp[1] == '\\')
- {
- /* We have to escape \\ to be able to represent all
- * characters. */
- *wp++ = '\\';
- rp += 1;
- }
- else if (rp[0] == '\\' && rp[1] == '1' && rp[2] == '3'
- && rp[3] == '4')
- {
- /* \134 is also \\. */
- *wp++ = '\\';
- rp += 3;
- }
- else
- *wp++ = *rp;
- while (*rp++ != '\0');
-
- return buf;
-}
-
-
-/* Read one mount table entry from STREAM. Returns a pointer to storage
- reused on the next call, or null for EOF or error (use feof/ferror to
- check). */
-struct glusterfs_mntent *
-__glusterfs_fstab_getent (FILE *stream, struct glusterfs_mntent *mp,
- char *buffer, int bufsiz)
-{
- char *cp;
- char *head;
-
- do
- {
- char *end_ptr;
-
- if (fgets (buffer, bufsiz, stream) == NULL)
- {
- return NULL;
- }
-
- end_ptr = strchr (buffer, '\n');
- if (end_ptr != NULL) /* chop newline */
- *end_ptr = '\0';
- else
- {
- /* Not the whole line was read. Do it now but forget
- * it. */
- char tmp[1024];
- while (fgets (tmp, sizeof tmp, stream) != NULL)
- if (strchr (tmp, '\n') != NULL)
- break;
- }
-
- head = buffer + strspn (buffer, " \t");
- /* skip empty lines and comment lines: */
- }
- while (head[0] == '\0' || head[0] == '#');
-
- cp = strsep (&head, " \t");
- mp->mnt_fsname = cp != NULL ? decode_name (cp) : (char *) "";
- if (head)
- head += strspn (head, " \t");
- cp = strsep (&head, " \t");
- mp->mnt_dir = cp != NULL ? decode_name (cp) : (char *) "";
- if (head)
- head += strspn (head, " \t");
- cp = strsep (&head, " \t");
- mp->mnt_type = cp != NULL ? decode_name (cp) : (char *) "";
- if (head)
- head += strspn (head, " \t");
- cp = strsep (&head, " \t");
- mp->mnt_opts = cp != NULL ? decode_name (cp) : (char *) "";
- switch (head ? sscanf (head, " %d %d ", &mp->mnt_freq,
- &mp->mnt_passno) : 0)
- {
- case 0:
- mp->mnt_freq = 0;
- case 1:
- mp->mnt_passno = 0;
- case 2:
- break;
- }
-
- return mp;
-}
-
-struct glusterfs_mntent *
-glusterfs_fstab_getent (glusterfs_fstab_t *h)
-{
- if (!h)
- return NULL;
-
- if (!h->fp)
- return NULL;
-
- return __glusterfs_fstab_getent (h->fp, &h->tmpent, h->buf,
- GF_MNTENT_BUFSIZE);
-}
-
-/* We have to use an encoding for names if they contain spaces or tabs.
- To be able to represent all characters we also have to escape the
- backslash itself. This "function" must be a macro since we use
- `alloca'. */
-#define encode_name(name) \
- do { \
- const char *rp = name; \
- \
- while (*rp != '\0') \
- if (*rp == ' ' || *rp == '\t' || *rp == '\\') \
- break; \
- else \
- ++rp; \
- \
- if (*rp != '\0') \
- { \
- /* In the worst case the length of the string \
- * can increase to four times the current \
- * length. */ \
- char *wp; \
- \
- rp = name; \
- name = wp = (char *) alloca (strlen (name) * 4 + 1); \
- \
- do { \
- if (*rp == ' ') \
- { \
- *wp++ = '\\'; \
- *wp++ = '0'; \
- *wp++ = '4'; \
- *wp++ = '0'; \
- } \
- else if (*rp == '\t') \
- { \
- *wp++ = '\\'; \
- *wp++ = '0'; \
- *wp++ = '1'; \
- *wp++ = '1'; \
- } \
- else if (*rp == '\n') \
- { \
- *wp++ = '\\'; \
- *wp++ = '0'; \
- *wp++ = '1'; \
- *wp++ = '2'; \
- } \
- else if (*rp == '\\') \
- { \
- *wp++ = '\\'; \
- *wp++ = '\\'; \
- } \
- else \
- *wp++ = *rp; \
- } while (*rp++ != '\0'); \
- } \
- } while (0) \
-
-
-int
-glusterfs_fstab_addent (glusterfs_fstab_t *h,
- const struct glusterfs_mntent *mnt)
-{
- struct glusterfs_mntent mntcopy = *mnt;
- if (!h)
- return -1;
-
- if (!h->fp)
- return -1;
-
- if (fseek (h->fp, 0, SEEK_END))
- return -1;
-
- /* Encode spaces and tabs in the names. */
- encode_name (mntcopy.mnt_fsname);
- encode_name (mntcopy.mnt_dir);
- encode_name (mntcopy.mnt_type);
- encode_name (mntcopy.mnt_opts);
-
- return (fprintf (h->fp, "%s %s %s %s %d %d\n",
- mntcopy.mnt_fsname,
- mntcopy.mnt_dir,
- mntcopy.mnt_type,
- mntcopy.mnt_opts,
- mntcopy.mnt_freq,
- mntcopy.mnt_passno)
- < 0 ? 1 : 0);
-}
-
-
-/* Search MNT->mnt_opts for an option matching OPT.
- Returns the address of the substring, or null if none found. */
-char *
-glusterfs_fstab_hasoption (const struct glusterfs_mntent *mnt, const char *opt)
-{
- const size_t optlen = strlen (opt);
- char *rest = mnt->mnt_opts, *p;
-
- while ((p = strstr (rest, opt)) != NULL)
- {
- if ((p == rest || p[-1] == ',')
- && (p[optlen] == '\0' || p[optlen] == '=' || p[optlen] == ','))
- return p;
-
- rest = strchr (p, ',');
- if (rest == NULL)
- break;
- ++rest;
- }
-
- return NULL;
-}
-
-void
-clean_init_params (glusterfs_init_params_t *ipars)
-{
- if (!ipars)
- return;
-
- if (ipars->volume_name)
- free (ipars->volume_name);
-
- if (ipars->specfile)
- free (ipars->specfile);
-
- if (ipars->logfile)
- free (ipars->logfile);
-
- if (ipars->loglevel)
- free (ipars->loglevel);
-
- return;
-}
-
-char *
-get_option_value (char *opt)
-{
- char *val = NULL;
- char *saveptr = NULL;
- char *copy_opt = NULL;
- char *retval = NULL;
-
- copy_opt = strdup (opt);
-
- /* Get the = before the value of the option. */
- val = index (copy_opt, '=');
- if (val) {
- /* Move to start of option */
- ++val;
-
- /* Now, to create a '\0' delimited string out of the
- * options string, first get the position where the
- * next option starts, that would be the next ','.
- */
- saveptr = index (val, ',');
- if (saveptr)
- *saveptr = '\0';
- retval = strdup (val);
- }
-
- free (copy_opt);
-
- return retval;
-}
-
-void
-booster_mount (struct glusterfs_mntent *ent)
-{
- char *opt = NULL;
- glusterfs_init_params_t ipars;
- time_t timeout = BOOSTER_DEFAULT_ATTR_TIMEO;
- char *timeostr = NULL;
- char *endptr = NULL;
-
- if (!ent)
- return;
-
- gf_log ("booster-fstab", GF_LOG_DEBUG, "Mount entry: volfile: %s,"
- " VMP: %s, Type: %s, Options: %s", ent->mnt_fsname,
- ent->mnt_dir, ent->mnt_type, ent->mnt_opts);
- if ((strcmp (ent->mnt_type, "glusterfs") != 0)) {
- gf_log ("booster-fstab", GF_LOG_ERROR, "Type is not glusterfs");
- return;
- }
-
- memset (&ipars, 0, sizeof (glusterfs_init_params_t));
- if (ent->mnt_fsname)
- ipars.specfile = strdup (ent->mnt_fsname);
-
- opt = glusterfs_fstab_hasoption (ent, "subvolume");
- if (opt)
- ipars.volume_name = get_option_value (opt);
-
- opt = glusterfs_fstab_hasoption (ent, "log-file");
- if (!opt)
- opt = glusterfs_fstab_hasoption (ent, "logfile");
-
- if (opt)
- ipars.logfile = get_option_value (opt);
-
- opt = glusterfs_fstab_hasoption (ent, "log-level");
- if (!opt)
- opt = glusterfs_fstab_hasoption (ent, "loglevel");
-
- if (opt)
- ipars.loglevel = get_option_value (opt);
-
- /* Attribute cache timeout */
- opt = glusterfs_fstab_hasoption (ent, "attr_timeout");
- if (opt) {
- timeostr = get_option_value (opt);
- if (timeostr)
- timeout = strtol (timeostr, &endptr, 10);
- }
-
- ipars.lookup_timeout = timeout;
- ipars.stat_timeout = timeout;
-
- if ((glusterfs_mount (ent->mnt_dir, &ipars)) == -1)
- gf_log ("booster-fstab", GF_LOG_ERROR, "VMP mounting failed");
-
- clean_init_params (&ipars);
-}
-
-int
-booster_configure (char *confpath)
-{
- int ret = -1;
- glusterfs_fstab_t *handle = NULL;
- struct glusterfs_mntent *ent = NULL;
-
- if (!confpath)
- goto out;
-
- handle = glusterfs_fstab_init (confpath, "r");
- if (!handle)
- goto out;
-
- while ((ent = glusterfs_fstab_getent (handle)) != NULL)
- booster_mount (ent);
-
- glusterfs_fstab_close (handle);
- ret = 0;
-out:
- return ret;
-}
-
-
diff --git a/booster/src/booster_fstab.h b/booster/src/booster_fstab.h
deleted file mode 100644
index 9bab04c5aa0..00000000000
--- a/booster/src/booster_fstab.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/* Utilities for reading/writing fstab, mtab, etc.
- Copyright (C) 1995, 1996, 1997, 1998, 1999 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
-
-#ifndef GLUSTERFS_FSTAB_MNTENT_H
-#define GLUSTERFS_FSTAB_MNTENT_H 1
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "compat.h"
-
-/* General filesystem types. */
-#define GF_MNTTYPE_IGNORE "ignore" /* Ignore this entry. */
-#define GF_MNTTYPE_NFS "nfs" /* Network file system. */
-#define GF_MNTTYPE_SWAP "swap" /* Swap device. */
-
-
-/* Generic mount options. */
-#define GF_MNTOPT_DEFAULTS "defaults" /* Use all default options. */
-#define GF_MNTOPT_RO "ro" /* Read only. */
-#define GF_MNTOPT_RW "rw" /* Read/write. */
-#define GF_MNTOPT_SUID "suid" /* Set uid allowed. */
-#define GF_MNTOPT_NOSUID "nosuid" /* No set uid allowed. */
-#define GF_MNTOPT_NOAUTO "noauto" /* Do not auto mount. */
-
-
-/* Structure describing a mount table entry. */
-struct glusterfs_mntent
-{
- char *mnt_fsname; /* Device or server for filesystem. */
- char *mnt_dir; /* Directory mounted on. */
- char *mnt_type; /* Type of filesystem: ufs, nfs, etc. */
- char *mnt_opts; /* Comma-separated options for fs. */
- int mnt_freq; /* Dump frequency (in days). */
- int mnt_passno; /* Pass number for `fsck'. */
-};
-
-#define GF_MNTENT_BUFSIZE 1024
-typedef struct glusterfs_fstab_handle {
- FILE *fp;
- char buf[GF_MNTENT_BUFSIZE];
- struct glusterfs_mntent tmpent;
-}glusterfs_fstab_t;
-
-
-/* Prepare to begin reading and/or writing mount table entries from the
- beginning of FILE. MODE is as for `fopen'. */
-extern glusterfs_fstab_t *glusterfs_fstab_init (const char *file,
- const char *mode);
-
-extern struct glusterfs_mntent *glusterfs_fstab_getent (glusterfs_fstab_t *h);
-
-/* Write the mount table entry described by MNT to STREAM.
- Return zero on success, nonzero on failure. */
-extern int glusterfs_fstab_addent (glusterfs_fstab_t *h,
- const struct glusterfs_mntent *mnt);
-
-/* Close a stream opened with `glusterfs_fstab_init'. */
-extern int glusterfs_fstab_close (glusterfs_fstab_t *h);
-
-/* Search MNT->mnt_opts for an option matching OPT.
- Returns the address of the substring, or null if none found. */
-extern char *glusterfs_fstab_hasoption (const struct glusterfs_mntent *mnt,
- const char *opt);
-
-#endif
diff --git a/booster/src/booster_stat.c b/booster/src/booster_stat.c
deleted file mode 100644
index 8f76cfe37dd..00000000000
--- a/booster/src/booster_stat.c
+++ /dev/null
@@ -1,188 +0,0 @@
-/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include <sys/types.h>
-
-extern int
-booster_stat (const char *path, void *buf);
-
-extern int
-booster_stat64 (const char *path, void *buf);
-
-extern int
-booster_xstat (int ver, const char *path, void *buf);
-
-extern int
-booster_xstat64 (int ver, const char *path, void *buf);
-
-extern int
-booster_fxstat (int ver, int fd, void *buf);
-extern int
-booster_fxstat64 (int ver, int fd, void *buf);
-extern int
-booster_fstat (int fd, void *buf);
-extern int
-booster_fstat64 (int fd, void *buf);
-
-extern int
-booster_lstat (const char *path, void *buf);
-extern int
-booster_lstat64 (const char *path, void *buf);
-extern int
-booster_lxstat (int ver, const char *path, void *buf);
-extern int
-booster_lxstat64 (int ver, const char *path, void *buf);
-
-
-extern int
-booster_statfs (const char *path, void *buf);
-extern int
-booster_statfs64 (const char *path, void *buf);
-
-extern int
-booster_statvfs (const char *path, void *buf);
-
-extern int
-booster_statvfs64 (const char *path, void *buf);
-
-extern void *
-booster_readdir (void *dir);
-
-extern void
-booster_rewinddir (void *dir);
-
-extern void
-booster_seekdir (void *dir, off_t offset);
-
-extern off_t
-booster_telldir (void *dir);
-
-int
-stat (const char *path, void *buf)
-{
- return booster_stat (path, buf);
-}
-
-int
-stat64 (const char *path, void *buf)
-{
- return booster_stat64 (path, buf);
-}
-
-int
-__xstat (int ver, const char *path, void *buf)
-{
- return booster_xstat (ver, path, buf);
-}
-
-int
-__xstat64 (int ver, const char *path, void *buf)
-{
- return booster_xstat64 (ver, path, buf);
-}
-
-int
-__fxstat (int ver, int fd, void *buf)
-{
- return booster_fxstat (ver, fd, buf);
-}
-
-int
-__fxstat64 (int ver, int fd, void *buf)
-{
- return booster_fxstat64 (ver, fd, buf);
-}
-
-int
-fstat (int fd, void *buf)
-{
- return booster_fstat (fd, buf);
-}
-
-int
-fstat64 (int fd, void *buf)
-{
- return booster_fstat64 (fd, buf);
-}
-
-int
-lstat (const char *path, void *buf)
-{
- return booster_lstat (path, buf);
-}
-
-int
-lstat64 (const char *path, void *buf)
-{
- return booster_lstat64 (path, buf);
-}
-
-int
-__lxstat (int ver, const char *path, void *buf)
-{
- return booster_lxstat (ver, path, buf);
-}
-
-int
-__lxstat64 (int ver, const char *path, void *buf)
-{
- return booster_lxstat64 (ver, path, buf);
-}
-
-int
-statfs (const char *pathname, void *buf)
-{
- return booster_statfs (pathname, buf);
-}
-
-int
-statfs64 (const char *pathname, void *buf)
-{
- return booster_statfs64 (pathname, buf);
-}
-
-int
-statvfs (const char *pathname, void *buf)
-{
- return booster_statvfs (pathname, buf);
-}
-
-int
-statvfs64 (const char *pathname, void *buf)
-{
- return booster_statvfs64 (pathname, buf);
-}
-
-void
-rewinddir (void *dir)
-{
- return booster_rewinddir (dir);
-}
-
-void
-seekdir (void *dir, off_t offset)
-{
- return booster_seekdir (dir, offset);
-}
-
-off_t
-telldir (void *dir)
-{
- return booster_telldir (dir);
-}
diff --git a/build-aux/checkpatch.pl b/build-aux/checkpatch.pl
new file mode 100755
index 00000000000..17ae4e4d579
--- /dev/null
+++ b/build-aux/checkpatch.pl
@@ -0,0 +1,4326 @@
+#!/usr/bin/perl -w
+# (c) 2001, Dave Jones. (the file handling bit)
+# (c) 2005, Joel Schopp <jschopp@austin.ibm.com> (the ugly bit)
+# (c) 2007,2008, Andy Whitcroft <apw@uk.ibm.com> (new conditions, test suite)
+# (c) 2008-2010 Andy Whitcroft <apw@canonical.com>
+# (c) 2014 Gluster Community <gluster-devel@gluster.org>
+# Licensed under the terms of the GNU GPL License version 2
+
+use strict;
+use POSIX;
+
+my $P = $0;
+$P =~ s@.*/@@g;
+
+my $V = '0.32.1';
+
+use Getopt::Long qw(:config no_auto_abbrev);
+
+my $quiet = 0;
+my $tree = 1;
+my $chk_signoff = 1;
+my $chk_patch = 1;
+my $tst_only;
+my $emacs = 0;
+my $terse = 0;
+my $file = 0;
+my $check = 0;
+my $check_orig = 0;
+my $summary = 1;
+my $mailback = 0;
+my $summary_file = 0;
+my $show_types = 0;
+my $fix = 0;
+my $fix_inplace = 0;
+my $root;
+my %debug;
+my %camelcase = ();
+my %use_type = ();
+my @use = ();
+my %ignore_type = ();
+my @ignore = ();
+my $help = 0;
+my $configuration_file = ".checkpatch.conf";
+my $max_line_length = 80;
+my $ignore_perl_version = 0;
+my $minimum_perl_version = 5.10.0;
+my $gerrit_url = $ENV{GERRIT_URL};
+
+sub help {
+ my ($exitcode) = @_;
+
+ print << "EOM";
+Usage: $P [OPTION]... [FILE]...
+Version: $V
+
+Options:
+ -q, --quiet quiet
+ --patch treat FILE as patchfile (default)
+ --emacs emacs compile window format
+ --gerrit-url=STRING URL the patch was reviewed at
+ --terse one line per report
+ -f, --file treat FILE as regular source file
+ --subjective, --strict enable more subjective tests
+ --types TYPE(,TYPE2...) show only these comma separated message types
+ --ignore TYPE(,TYPE2...) ignore various comma separated message types
+ --max-line-length=n set the maximum line length, if exceeded, warn
+ --show-types show the message "types" in the output
+ --root=PATH PATH to the glusterfs tree root
+ --no-summary suppress the per-file summary
+ --mailback only produce a report in case of warnings/errors
+ --summary-file include the filename in summary
+ --debug KEY=[0|1] turn on/off debugging of KEY, where KEY is one of
+ 'values', 'possible', 'type', and 'attr' (default
+ is all off)
+ --test-only=WORD report only warnings/errors containing WORD literally
+ --fix EXPERIMENTAL - may create horrible results
+ If correctable single-line errors exist, create
+ "<inputfile>.EXPERIMENTAL-checkpatch-fixes"
+ with potential errors corrected to the preferred
+ checkpatch style
+ --fix-inplace EXPERIMENTAL - may create horrible results
+ Is the same as --fix, but overwrites the input
+ file. It's your fault if there's no backup or git
+ --ignore-perl-version override checking of perl version. expect
+ runtime errors.
+ -h, --help, --version display this help and exit
+
+When FILE is - read standard input.
+EOM
+
+exit($exitcode);
+}
+
+my $conf = which_conf($configuration_file);
+if (-f $conf) {
+ my @conf_args;
+ open(my $conffile, '<', "$conf")
+ or warn "$P: Can't find a readable $configuration_file file $!\n";
+
+ while (<$conffile>) {
+ my $line = $_;
+
+ $line =~ s/\s*\n?$//g;
+ $line =~ s/^\s*//g;
+ $line =~ s/\s+/ /g;
+
+ next if ($line =~ m/^\s*#/);
+ next if ($line =~ m/^\s*$/);
+
+ my @words = split(" ", $line);
+ foreach my $word (@words) {
+ last if ($word =~ m/^#/);
+ push (@conf_args, $word);
+ }
+ }
+ close($conffile);
+ unshift(@ARGV, @conf_args) if @conf_args;
+}
+
+GetOptions(
+ 'q|quiet+' => \$quiet,
+ 'patch!' => \$chk_patch,
+ 'emacs!' => \$emacs,
+ 'gerrit-url=s' => \$gerrit_url,
+ 'terse!' => \$terse,
+ 'f|file!' => \$file,
+ 'subjective!' => \$check,
+ 'strict!' => \$check,
+ 'ignore=s' => \@ignore,
+ 'types=s' => \@use,
+ 'show-types!' => \$show_types,
+ 'max-line-length=i' => \$max_line_length,
+ 'root=s' => \$root,
+ 'summary!' => \$summary,
+ 'mailback!' => \$mailback,
+ 'summary-file!' => \$summary_file,
+ 'fix!' => \$fix,
+ 'fix-inplace!' => \$fix_inplace,
+ 'ignore-perl-version!' => \$ignore_perl_version,
+ 'debug=s' => \%debug,
+ 'test-only=s' => \$tst_only,
+ 'h|help' => \$help,
+ 'version' => \$help
+) or help(1);
+
+help(0) if ($help);
+
+$fix = 1 if ($fix_inplace);
+$check_orig = $check;
+
+my $exit = 0;
+
+if ($^V && $^V lt $minimum_perl_version) {
+ printf "$P: requires at least perl version %vd\n", $minimum_perl_version;
+ if (!$ignore_perl_version) {
+ exit(1);
+ }
+}
+
+if ($#ARGV < 0) {
+ print "$P: no input files\n";
+ exit(1);
+}
+
+sub hash_save_array_words {
+ my ($hashRef, $arrayRef) = @_;
+
+ my @array = split(/,/, join(',', @$arrayRef));
+ foreach my $word (@array) {
+ $word =~ s/\s*\n?$//g;
+ $word =~ s/^\s*//g;
+ $word =~ s/\s+/ /g;
+ $word =~ tr/[a-z]/[A-Z]/;
+
+ next if ($word =~ m/^\s*#/);
+ next if ($word =~ m/^\s*$/);
+
+ $hashRef->{$word}++;
+ }
+}
+
+sub hash_show_words {
+ my ($hashRef, $prefix) = @_;
+
+ if ($quiet == 0 && keys %$hashRef) {
+ print "NOTE: $prefix message types:";
+ foreach my $word (sort keys %$hashRef) {
+ print " $word";
+ }
+ print "\n\n";
+ }
+}
+
+hash_save_array_words(\%ignore_type, \@ignore);
+hash_save_array_words(\%use_type, \@use);
+
+my $dbg_values = 0;
+my $dbg_possible = 0;
+my $dbg_type = 0;
+my $dbg_attr = 0;
+for my $key (keys %debug) {
+ ## no critic
+ eval "\${dbg_$key} = '$debug{$key}';";
+ die "$@" if ($@);
+}
+
+my $rpt_cleaners = 0;
+
+if ($terse) {
+ $emacs = 1;
+ $quiet++;
+}
+
+if ($tree) {
+ if (defined $root) {
+ if (!top_of_glusterfs_tree($root)) {
+ die "$P: $root: --root does not point at a valid tree\n";
+ }
+ } else {
+ if (top_of_glusterfs_tree('.')) {
+ $root = '.';
+ } elsif ($0 =~ m@(.*)/extras/[^/]*$@ &&
+ top_of_glusterfs_tree($1)) {
+ $root = $1;
+ }
+ }
+
+ if (!defined $root) {
+ print "Must be run from the top-level dir. of a GlusterFS tree\n";
+ exit(2);
+ }
+}
+
+my $emitted_corrupt = 0;
+
+our $Ident = qr{
+ [A-Za-z_][A-Za-z\d_]*
+ (?:\s*\#\#\s*[A-Za-z_][A-Za-z\d_]*)*
+ }x;
+our $Storage = qr{extern|static|asmlinkage};
+our $Sparse = qr{
+ __user|
+ __kernel|
+ __force|
+ __iomem|
+ __must_check|
+ __init_refok|
+ __kprobes|
+ __ref|
+ __rcu
+ }x;
+our $InitAttributePrefix = qr{__(?:mem|cpu|dev|net_|)};
+our $InitAttributeData = qr{$InitAttributePrefix(?:initdata\b)};
+our $InitAttributeConst = qr{$InitAttributePrefix(?:initconst\b)};
+our $InitAttributeInit = qr{$InitAttributePrefix(?:init\b)};
+our $InitAttribute = qr{$InitAttributeData|$InitAttributeConst|$InitAttributeInit};
+
+# Notes to $Attribute:
+# We need \b after 'init' otherwise 'initconst' will cause a false positive in a check
+our $Attribute = qr{
+ const|
+ __percpu|
+ __nocast|
+ __safe|
+ __bitwise__|
+ __packed__|
+ __packed2__|
+ __naked|
+ __maybe_unused|
+ __always_unused|
+ __noreturn|
+ __used|
+ __cold|
+ __noclone|
+ __deprecated|
+ __read_mostly|
+ __kprobes|
+ $InitAttribute|
+ ____cacheline_aligned|
+ ____cacheline_aligned_in_smp|
+ ____cacheline_internodealigned_in_smp|
+ __weak
+ }x;
+our $Modifier;
+our $Inline = qr{inline|__always_inline|noinline|__inline|__inline__};
+our $Member = qr{->$Ident|\.$Ident|\[[^]]*\]};
+our $Lval = qr{$Ident(?:$Member)*};
+
+our $Int_type = qr{(?i)llu|ull|ll|lu|ul|l|u};
+our $Binary = qr{(?i)0b[01]+$Int_type?};
+our $Hex = qr{(?i)0x[0-9a-f]+$Int_type?};
+our $Int = qr{[0-9]+$Int_type?};
+our $Octal = qr{0[0-7]+$Int_type?};
+our $Float_hex = qr{(?i)0x[0-9a-f]+p-?[0-9]+[fl]?};
+our $Float_dec = qr{(?i)(?:[0-9]+\.[0-9]*|[0-9]*\.[0-9]+)(?:e-?[0-9]+)?[fl]?};
+our $Float_int = qr{(?i)[0-9]+e-?[0-9]+[fl]?};
+our $Float = qr{$Float_hex|$Float_dec|$Float_int};
+our $Constant = qr{$Float|$Binary|$Octal|$Hex|$Int};
+our $Assignment = qr{\*\=|/=|%=|\+=|-=|<<=|>>=|&=|\^=|\|=|=};
+our $Compare = qr{<=|>=|==|!=|<|(?<!-)>};
+our $Arithmetic = qr{\+|-|\*|\/|%};
+our $Operators = qr{
+ <=|>=|==|!=|
+ =>|->|<<|>>|<|>|!|~|
+ &&|\|\||,|\^|\+\+|--|&|\||$Arithmetic
+ }x;
+
+our $c90_Keywords = qr{do|for|while|if|else|return|goto|continue|switch|default|case|break}x;
+
+our $NonptrType;
+our $NonptrTypeWithAttr;
+our $Type;
+our $Declare;
+
+our $NON_ASCII_UTF8 = qr{
+ [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
+ | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
+ | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte
+ | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
+ | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
+ | [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
+ | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
+}x;
+
+our $UTF8 = qr{
+ [\x09\x0A\x0D\x20-\x7E] # ASCII
+ | $NON_ASCII_UTF8
+}x;
+
+our $typeTypedefs = qr{(?x:
+ (?:__)?(?:u|s|be|le)(?:8|16|32|64)|
+ atomic_t
+)};
+
+our $logFunctions = qr{(?x:
+ printk(?:_ratelimited|_once|)|
+ (?:[a-z0-9]+_){1,2}(?:printk|emerg|alert|crit|err|warning|warn|notice|info|debug|dbg|vdbg|devel|cont|WARN)(?:_ratelimited|_once|)|
+ WARN(?:_RATELIMIT|_ONCE|)|
+ panic|
+ MODULE_[A-Z_]+|
+ seq_vprintf|seq_printf|seq_puts
+)};
+
+our $signature_tags = qr{(?xi:
+ Signed-off-by:|
+ Acked-by:|
+ Tested-by:|
+ Reviewed-by:|
+ Reviewed-on:|
+ Reported-by:|
+ Original-author:|
+ Original-Author:|
+ Original-Authors:|
+ Suggested-by:|
+ To:|
+ Cc:
+)};
+
+our $url_tags = qr{http:|https:};
+
+our @typeList = (
+ qr{void},
+ qr{(?:unsigned\s+)?char},
+ qr{(?:unsigned\s+)?short},
+ qr{(?:unsigned\s+)?int},
+ qr{(?:unsigned\s+)?long},
+ qr{(?:unsigned\s+)?long\s+int},
+ qr{(?:unsigned\s+)?long\s+long},
+ qr{(?:unsigned\s+)?long\s+long\s+int},
+ qr{unsigned},
+ qr{float},
+ qr{double},
+ qr{bool},
+ qr{struct\s+$Ident},
+ qr{union\s+$Ident},
+ qr{enum\s+$Ident},
+ qr{${Ident}_t},
+ qr{${Ident}_handler},
+ qr{${Ident}_handler_fn},
+);
+our @typeListWithAttr = (
+ @typeList,
+ qr{struct\s+$InitAttribute\s+$Ident},
+ qr{union\s+$InitAttribute\s+$Ident},
+);
+
+our @modifierList = (
+ qr{fastcall},
+);
+
+our @mode_permission_funcs = (
+ ["module_param", 3],
+ ["module_param_(?:array|named|string)", 4],
+ ["module_param_array_named", 5],
+ ["debugfs_create_(?:file|u8|u16|u32|u64|x8|x16|x32|x64|size_t|atomic_t|bool|blob|regset32|u32_array)", 2],
+ ["proc_create(?:_data|)", 2],
+ ["(?:CLASS|DEVICE|SENSOR)_ATTR", 2],
+);
+
+#Create a search pattern for all these functions to speed up a loop below
+our $mode_perms_search = "";
+foreach my $entry (@mode_permission_funcs) {
+ $mode_perms_search .= '|' if ($mode_perms_search ne "");
+ $mode_perms_search .= $entry->[0];
+}
+
+our $declaration_macros = qr{(?x:
+ (?:$Storage\s+)?(?:DECLARE|DEFINE)_[A-Z]+\s*\(|
+ (?:$Storage\s+)?LIST_HEAD\s*\(
+)};
+
+our $allowed_asm_includes = qr{(?x:
+ irq|
+ memory
+)};
+# memory.h: ARM has a custom one
+
+sub build_types {
+ my $mods = "(?x:\n" . join("|\n ", @modifierList) . "\n)";
+ my $all = "(?x:\n" . join("|\n ", @typeList) . "\n)";
+ my $allWithAttr = "(?x:\n" . join("|\n ", @typeListWithAttr) . "\n)";
+ $Modifier = qr{(?:$Attribute|$Sparse|$mods)};
+ $NonptrType = qr{
+ (?:$Modifier\s+|const\s+)*
+ (?:
+ (?:typeof|__typeof__)\s*\([^\)]*\)|
+ (?:$typeTypedefs\b)|
+ (?:${all}\b)
+ )
+ (?:\s+$Modifier|\s+const)*
+ }x;
+ $NonptrTypeWithAttr = qr{
+ (?:$Modifier\s+|const\s+)*
+ (?:
+ (?:typeof|__typeof__)\s*\([^\)]*\)|
+ (?:$typeTypedefs\b)|
+ (?:${allWithAttr}\b)
+ )
+ (?:\s+$Modifier|\s+const)*
+ }x;
+ $Type = qr{
+ $NonptrType
+ (?:(?:\s|\*|\[\])+\s*const|(?:\s|\*|\[\])+|(?:\s*\[\s*\])+)?
+ (?:\s+$Inline|\s+$Modifier)*
+ }x;
+ $Declare = qr{(?:$Storage\s+(?:$Inline\s+)?)?$Type};
+}
+build_types();
+
+our $Typecast = qr{\s*(\(\s*$NonptrType\s*\)){0,1}\s*};
+
+# Using $balanced_parens, $LvalOrFunc, or $FuncArg
+# requires at least perl version v5.10.0
+# Any use must be runtime checked with $^V
+
+our $balanced_parens = qr/(\((?:[^\(\)]++|(?-1))*\))/;
+our $LvalOrFunc = qr{((?:[\&\*]\s*)?$Lval)\s*($balanced_parens{0,1})\s*};
+our $FuncArg = qr{$Typecast{0,1}($LvalOrFunc|$Constant)};
+
+sub deparenthesize {
+ my ($string) = @_;
+ return "" if (!defined($string));
+
+ while ($string =~ /^\s*\(.*\)\s*$/) {
+ $string =~ s@^\s*\(\s*@@;
+ $string =~ s@\s*\)\s*$@@;
+ }
+
+ $string =~ s@\s+@ @g;
+
+ return $string;
+}
+
+sub seed_camelcase_file {
+ my ($file) = @_;
+
+ return if (!(-f $file));
+
+ local $/;
+
+ open(my $include_file, '<', "$file")
+ or warn "$P: Can't read '$file' $!\n";
+ my $text = <$include_file>;
+ close($include_file);
+
+ my @lines = split('\n', $text);
+
+ foreach my $line (@lines) {
+ next if ($line !~ /(?:[A-Z][a-z]|[a-z][A-Z])/);
+ if ($line =~ /^[ \t]*(?:#[ \t]*define|typedef\s+$Type)\s+(\w*(?:[A-Z][a-z]|[a-z][A-Z])\w*)/) {
+ $camelcase{$1} = 1;
+ } elsif ($line =~ /^\s*$Declare\s+(\w*(?:[A-Z][a-z]|[a-z][A-Z])\w*)\s*[\(\[,;]/) {
+ $camelcase{$1} = 1;
+ } elsif ($line =~ /^\s*(?:union|struct|enum)\s+(\w*(?:[A-Z][a-z]|[a-z][A-Z])\w*)\s*[;\{]/) {
+ $camelcase{$1} = 1;
+ }
+ }
+}
+
+my $camelcase_seeded = 0;
+sub seed_camelcase_includes {
+ return if ($camelcase_seeded);
+
+ my $files;
+ my $camelcase_cache = "";
+ my @include_files = ();
+
+ $camelcase_seeded = 1;
+
+ if (-e ".git") {
+ my $git_last_include_commit = `git log --no-merges --pretty=format:"%h%n" -1 -- include`;
+ chomp $git_last_include_commit;
+ $camelcase_cache = ".checkpatch-camelcase.git.$git_last_include_commit";
+ } else {
+ my $last_mod_date = 0;
+ $files = `find $root/include -name "*.h"`;
+ @include_files = split('\n', $files);
+ foreach my $file (@include_files) {
+ my $date = POSIX::strftime("%Y%m%d%H%M",
+ localtime((stat $file)[9]));
+ $last_mod_date = $date if ($last_mod_date < $date);
+ }
+ $camelcase_cache = ".checkpatch-camelcase.date.$last_mod_date";
+ }
+
+ if ($camelcase_cache ne "" && -f $camelcase_cache) {
+ open(my $camelcase_file, '<', "$camelcase_cache")
+ or warn "$P: Can't read '$camelcase_cache' $!\n";
+ while (<$camelcase_file>) {
+ chomp;
+ $camelcase{$_} = 1;
+ }
+ close($camelcase_file);
+ return;
+ }
+
+ if (-e ".git") {
+ $files = `git ls-files "include/*.h"`;
+ @include_files = split('\n', $files);
+ }
+
+ foreach my $file (@include_files) {
+ seed_camelcase_file($file);
+ }
+
+ if ($camelcase_cache ne "") {
+ unlink glob ".checkpatch-camelcase.*";
+ open(my $camelcase_file, '>', "$camelcase_cache")
+ or warn "$P: Can't write '$camelcase_cache' $!\n";
+ foreach (sort { lc($a) cmp lc($b) } keys(%camelcase)) {
+ print $camelcase_file ("$_\n");
+ }
+ close($camelcase_file);
+ }
+}
+
+$chk_signoff = 0 if ($file);
+
+my @rawlines = ();
+my @lines = ();
+my @fixed = ();
+my $vname;
+for my $filename (@ARGV) {
+ my $FILE;
+ if ($file) {
+ open($FILE, '-|', "diff -u /dev/null $filename") ||
+ die "$P: $filename: diff failed - $!\n";
+ } elsif ($filename eq '-') {
+ open($FILE, '<&STDIN');
+ } else {
+ open($FILE, '<', "$filename") ||
+ die "$P: $filename: open failed - $!\n";
+ }
+ if ($filename eq '-') {
+ $vname = 'Your patch';
+ } else {
+ $vname = $filename;
+ }
+ while (<$FILE>) {
+ chomp;
+ push(@rawlines, $_);
+ }
+ close($FILE);
+ if (!process($filename)) {
+ $exit = 1;
+ }
+ @rawlines = ();
+ @lines = ();
+ @fixed = ();
+}
+
+exit($exit);
+
+sub top_of_glusterfs_tree {
+ my ($root) = @_;
+
+ # Add here if the tree changes
+ my @tree_check = (
+ "api",
+ "AUTHORS",
+ "autogen.sh",
+ "build-aux",
+ "ChangeLog",
+ "cli",
+ "configure.ac",
+ "contrib",
+ "CONTRIBUTING",
+ "COPYING-GPLV2",
+ "COPYING-LGPLV3",
+ "doc",
+ "extras",
+ "geo-replication",
+ "glusterfs-api.pc.in",
+ "glusterfsd",
+ "glusterfs.spec.in",
+ "heal",
+ "INSTALL",
+ "libgfchangelog.pc.in",
+ "libglusterfs",
+ "MAINTAINERS",
+ "Makefile.am",
+ "NEWS",
+ "README.md",
+ "rfc.sh",
+ "rpc",
+ "run-tests.sh",
+ "tests",
+ "THANKS",
+ "xlators",
+ );
+
+ foreach my $check (@tree_check) {
+ if (! -e $root . '/' . $check) {
+ return 0;
+ }
+ }
+ return 1;
+}
+
+sub parse_email {
+ my ($formatted_email) = @_;
+
+ my $name = "";
+ my $address = "";
+ my $comment = "";
+
+ if ($formatted_email =~ /^(.*)<(\S+\@\S+)>(.*)$/) {
+ $name = $1;
+ $address = $2;
+ $comment = $3 if defined $3;
+ } elsif ($formatted_email =~ /^\s*<(\S+\@\S+)>(.*)$/) {
+ $address = $1;
+ $comment = $2 if defined $2;
+ } elsif ($formatted_email =~ /(\S+\@\S+)(.*)$/) {
+ $address = $1;
+ $comment = $2 if defined $2;
+ $formatted_email =~ s/$address.*$//;
+ $name = $formatted_email;
+ $name = trim($name);
+ $name =~ s/^\"|\"$//g;
+ # If there's a name left after stripping spaces and
+ # leading quotes, and the address doesn't have both
+ # leading and trailing angle brackets, the address
+ # is invalid. ie:
+ # "joe smith joe@smith.com" bad
+ # "joe smith <joe@smith.com" bad
+ if ($name ne "" && $address !~ /^<[^>]+>$/) {
+ $name = "";
+ $address = "";
+ $comment = "";
+ }
+ }
+
+ $name = trim($name);
+ $name =~ s/^\"|\"$//g;
+ $address = trim($address);
+ $address =~ s/^\<|\>$//g;
+
+ if ($name =~ /[^\w \-]/i) { ##has "must quote" chars
+ $name =~ s/(?<!\\)"/\\"/g; ##escape quotes
+ $name = "\"$name\"";
+ }
+
+ return ($name, $address, $comment);
+}
+
+sub format_email {
+ my ($name, $address) = @_;
+
+ my $formatted_email;
+
+ $name = trim($name);
+ $name =~ s/^\"|\"$//g;
+ $address = trim($address);
+
+ if ($name =~ /[^\w \-]/i) { ##has "must quote" chars
+ $name =~ s/(?<!\\)"/\\"/g; ##escape quotes
+ $name = "\"$name\"";
+ }
+
+ if ("$name" eq "") {
+ $formatted_email = "$address";
+ } else {
+ $formatted_email = "$name <$address>";
+ }
+
+ return $formatted_email;
+}
+
+sub which_conf {
+ my ($conf) = @_;
+
+ foreach my $path (split(/:/, ".:$ENV{HOME}:.scripts")) {
+ if (-e "$path/$conf") {
+ return "$path/$conf";
+ }
+ }
+
+ return "";
+}
+
+sub expand_tabs {
+ my ($str) = @_;
+
+ my $res = '';
+ my $n = 0;
+ for my $c (split(//, $str)) {
+ if ($c eq "\t") {
+ $res .= ' ';
+ $n++;
+ for (; ($n % 8) != 0; $n++) {
+ $res .= ' ';
+ }
+ next;
+ }
+ $res .= $c;
+ $n++;
+ }
+ return $res;
+}
+sub copy_spacing {
+ (my $res = shift) =~ tr/\t/ /c;
+ return $res;
+}
+
+sub line_stats {
+ my ($line) = @_;
+
+ # Drop the diff line leader and expand tabs
+ $line =~ s/^.//;
+ $line = expand_tabs($line);
+
+ # Pick the indent from the front of the line.
+ my ($white) = ($line =~ /^(\s*)/);
+
+ return (length($line), length($white));
+}
+
+my $sanitise_quote = '';
+
+sub sanitise_line_reset {
+ my ($in_comment) = @_;
+
+ if ($in_comment) {
+ $sanitise_quote = '*/';
+ } else {
+ $sanitise_quote = '';
+ }
+}
+sub sanitise_line {
+ my ($line) = @_;
+
+ my $res = '';
+ my $l = '';
+
+ my $qlen = 0;
+ my $off = 0;
+ my $c;
+
+ # Always copy over the diff marker.
+ $res = substr($line, 0, 1);
+
+ for ($off = 1; $off < length($line); $off++) {
+ $c = substr($line, $off, 1);
+
+ # Comments we are wacking completly including the begin
+ # and end, all to $;.
+ if ($sanitise_quote eq '' && substr($line, $off, 2) eq '/*') {
+ $sanitise_quote = '*/';
+
+ substr($res, $off, 2, "$;$;");
+ $off++;
+ next;
+ }
+ if ($sanitise_quote eq '*/' && substr($line, $off, 2) eq '*/') {
+ $sanitise_quote = '';
+ substr($res, $off, 2, "$;$;");
+ $off++;
+ next;
+ }
+ if ($sanitise_quote eq '' && substr($line, $off, 2) eq '//') {
+ $sanitise_quote = '//';
+
+ substr($res, $off, 2, $sanitise_quote);
+ $off++;
+ next;
+ }
+
+ # A \ in a string means ignore the next character.
+ if (($sanitise_quote eq "'" || $sanitise_quote eq '"') &&
+ $c eq "\\") {
+ substr($res, $off, 2, 'XX');
+ $off++;
+ next;
+ }
+ # Regular quotes.
+ if ($c eq "'" || $c eq '"') {
+ if ($sanitise_quote eq '') {
+ $sanitise_quote = $c;
+
+ substr($res, $off, 1, $c);
+ next;
+ } elsif ($sanitise_quote eq $c) {
+ $sanitise_quote = '';
+ }
+ }
+
+ #print "c<$c> SQ<$sanitise_quote>\n";
+ if ($off != 0 && $sanitise_quote eq '*/' && $c ne "\t") {
+ substr($res, $off, 1, $;);
+ } elsif ($off != 0 && $sanitise_quote eq '//' && $c ne "\t") {
+ substr($res, $off, 1, $;);
+ } elsif ($off != 0 && $sanitise_quote && $c ne "\t") {
+ substr($res, $off, 1, 'X');
+ } else {
+ substr($res, $off, 1, $c);
+ }
+ }
+
+ if ($sanitise_quote eq '//') {
+ $sanitise_quote = '';
+ }
+
+ # The pathname on a #include may be surrounded by '<' and '>'.
+ if ($res =~ /^.\s*\#\s*include\s+\<(.*)\>/) {
+ my $clean = 'X' x length($1);
+ $res =~ s@\<.*\>@<$clean>@;
+
+ # The whole of a #error is a string.
+ } elsif ($res =~ /^.\s*\#\s*(?:error|warning)\s+(.*)\b/) {
+ my $clean = 'X' x length($1);
+ $res =~ s@(\#\s*(?:error|warning)\s+).*@$1$clean@;
+ }
+
+ return $res;
+}
+
+sub get_quoted_string {
+ my ($line, $rawline) = @_;
+
+ return "" if ($line !~ m/(\"[X]+\")/g);
+ return substr($rawline, $-[0], $+[0] - $-[0]);
+}
+
+sub ctx_statement_block {
+ my ($linenr, $remain, $off) = @_;
+ my $line = $linenr - 1;
+ my $blk = '';
+ my $soff = $off;
+ my $coff = $off - 1;
+ my $coff_set = 0;
+
+ my $loff = 0;
+
+ my $type = '';
+ my $level = 0;
+ my @stack = ();
+ my $p;
+ my $c;
+ my $len = 0;
+
+ my $remainder;
+ while (1) {
+ @stack = (['', 0]) if ($#stack == -1);
+
+ #warn "CSB: blk<$blk> remain<$remain>\n";
+ # If we are about to drop off the end, pull in more
+ # context.
+ if ($off >= $len) {
+ for (; $remain > 0; $line++) {
+ last if (!defined $lines[$line]);
+ next if ($lines[$line] =~ /^-/);
+ $remain--;
+ $loff = $len;
+ $blk .= $lines[$line] . "\n";
+ $len = length($blk);
+ $line++;
+ last;
+ }
+ # Bail if there is no further context.
+ #warn "CSB: blk<$blk> off<$off> len<$len>\n";
+ if ($off >= $len) {
+ last;
+ }
+ if ($level == 0 && substr($blk, $off) =~ /^.\s*#\s*define/) {
+ $level++;
+ $type = '#';
+ }
+ }
+ $p = $c;
+ $c = substr($blk, $off, 1);
+ $remainder = substr($blk, $off);
+
+ #warn "CSB: c<$c> type<$type> level<$level> remainder<$remainder> coff_set<$coff_set>\n";
+
+ # Handle nested #if/#else.
+ if ($remainder =~ /^#\s*(?:ifndef|ifdef|if)\s/) {
+ push(@stack, [ $type, $level ]);
+ } elsif ($remainder =~ /^#\s*(?:else|elif)\b/) {
+ ($type, $level) = @{$stack[$#stack - 1]};
+ } elsif ($remainder =~ /^#\s*endif\b/) {
+ ($type, $level) = @{pop(@stack)};
+ }
+
+ # Statement ends at the ';' or a close '}' at the
+ # outermost level.
+ if ($level == 0 && $c eq ';') {
+ last;
+ }
+
+ # An else is really a conditional as long as its not else if
+ if ($level == 0 && $coff_set == 0 &&
+ (!defined($p) || $p =~ /(?:\s|\}|\+)/) &&
+ $remainder =~ /^(else)(?:\s|{)/ &&
+ $remainder !~ /^else\s+if\b/) {
+ $coff = $off + length($1) - 1;
+ $coff_set = 1;
+ #warn "CSB: mark coff<$coff> soff<$soff> 1<$1>\n";
+ #warn "[" . substr($blk, $soff, $coff - $soff + 1) . "]\n";
+ }
+
+ if (($type eq '' || $type eq '(') && $c eq '(') {
+ $level++;
+ $type = '(';
+ }
+ if ($type eq '(' && $c eq ')') {
+ $level--;
+ $type = ($level != 0)? '(' : '';
+
+ if ($level == 0 && $coff < $soff) {
+ $coff = $off;
+ $coff_set = 1;
+ #warn "CSB: mark coff<$coff>\n";
+ }
+ }
+ if (($type eq '' || $type eq '{') && $c eq '{') {
+ $level++;
+ $type = '{';
+ }
+ if ($type eq '{' && $c eq '}') {
+ $level--;
+ $type = ($level != 0)? '{' : '';
+
+ if ($level == 0) {
+ if (substr($blk, $off + 1, 1) eq ';') {
+ $off++;
+ }
+ last;
+ }
+ }
+ # Preprocessor commands end at the newline unless escaped.
+ if ($type eq '#' && $c eq "\n" && $p ne "\\") {
+ $level--;
+ $type = '';
+ $off++;
+ last;
+ }
+ $off++;
+ }
+ # We are truly at the end, so shuffle to the next line.
+ if ($off == $len) {
+ $loff = $len + 1;
+ $line++;
+ $remain--;
+ }
+
+ my $statement = substr($blk, $soff, $off - $soff + 1);
+ my $condition = substr($blk, $soff, $coff - $soff + 1);
+
+ #warn "STATEMENT<$statement>\n";
+ #warn "CONDITION<$condition>\n";
+
+ #print "coff<$coff> soff<$off> loff<$loff>\n";
+
+ return ($statement, $condition,
+ $line, $remain + 1, $off - $loff + 1, $level);
+}
+
+sub statement_lines {
+ my ($stmt) = @_;
+
+ # Strip the diff line prefixes and rip blank lines at start and end.
+ $stmt =~ s/(^|\n)./$1/g;
+ $stmt =~ s/^\s*//;
+ $stmt =~ s/\s*$//;
+
+ my @stmt_lines = ($stmt =~ /\n/g);
+
+ return $#stmt_lines + 2;
+}
+
+sub statement_rawlines {
+ my ($stmt) = @_;
+
+ my @stmt_lines = ($stmt =~ /\n/g);
+
+ return $#stmt_lines + 2;
+}
+
+sub statement_block_size {
+ my ($stmt) = @_;
+
+ $stmt =~ s/(^|\n)./$1/g;
+ $stmt =~ s/^\s*{//;
+ $stmt =~ s/}\s*$//;
+ $stmt =~ s/^\s*//;
+ $stmt =~ s/\s*$//;
+
+ my @stmt_lines = ($stmt =~ /\n/g);
+ my @stmt_statements = ($stmt =~ /;/g);
+
+ my $stmt_lines = $#stmt_lines + 2;
+ my $stmt_statements = $#stmt_statements + 1;
+
+ if ($stmt_lines > $stmt_statements) {
+ return $stmt_lines;
+ } else {
+ return $stmt_statements;
+ }
+}
+
+sub ctx_statement_full {
+ my ($linenr, $remain, $off) = @_;
+ my ($statement, $condition, $level);
+
+ my (@chunks);
+
+ # Grab the first conditional/block pair.
+ ($statement, $condition, $linenr, $remain, $off, $level) =
+ ctx_statement_block($linenr, $remain, $off);
+ #print "F: c<$condition> s<$statement> remain<$remain>\n";
+ push(@chunks, [ $condition, $statement ]);
+ if (!($remain > 0 && $condition =~ /^\s*(?:\n[+-])?\s*(?:if|else|do)\b/s)) {
+ return ($level, $linenr, @chunks);
+ }
+
+ # Pull in the following conditional/block pairs and see if they
+ # could continue the statement.
+ for (;;) {
+ ($statement, $condition, $linenr, $remain, $off, $level) =
+ ctx_statement_block($linenr, $remain, $off);
+ #print "C: c<$condition> s<$statement> remain<$remain>\n";
+ last if (!($remain > 0 && $condition =~ /^(?:\s*\n[+-])*\s*(?:else|do)\b/s));
+ #print "C: push\n";
+ push(@chunks, [ $condition, $statement ]);
+ }
+
+ return ($level, $linenr, @chunks);
+}
+
+sub ctx_block_get {
+ my ($linenr, $remain, $outer, $open, $close, $off) = @_;
+ my $line;
+ my $start = $linenr - 1;
+ my $blk = '';
+ my @o;
+ my @c;
+ my @res = ();
+
+ my $level = 0;
+ my @stack = ($level);
+ for ($line = $start; $remain > 0; $line++) {
+ next if ($rawlines[$line] =~ /^-/);
+ $remain--;
+
+ $blk .= $rawlines[$line];
+
+ # Handle nested #if/#else.
+ if ($lines[$line] =~ /^.\s*#\s*(?:ifndef|ifdef|if)\s/) {
+ push(@stack, $level);
+ } elsif ($lines[$line] =~ /^.\s*#\s*(?:else|elif)\b/) {
+ $level = $stack[$#stack - 1];
+ } elsif ($lines[$line] =~ /^.\s*#\s*endif\b/) {
+ $level = pop(@stack);
+ }
+
+ foreach my $c (split(//, $lines[$line])) {
+ ##print "C<$c>L<$level><$open$close>O<$off>\n";
+ if ($off > 0) {
+ $off--;
+ next;
+ }
+
+ if ($c eq $close && $level > 0) {
+ $level--;
+ last if ($level == 0);
+ } elsif ($c eq $open) {
+ $level++;
+ }
+ }
+
+ if (!$outer || $level <= 1) {
+ push(@res, $rawlines[$line]);
+ }
+
+ last if ($level == 0);
+ }
+
+ return ($level, @res);
+}
+sub ctx_block_outer {
+ my ($linenr, $remain) = @_;
+
+ my ($level, @r) = ctx_block_get($linenr, $remain, 1, '{', '}', 0);
+ return @r;
+}
+sub ctx_block {
+ my ($linenr, $remain) = @_;
+
+ my ($level, @r) = ctx_block_get($linenr, $remain, 0, '{', '}', 0);
+ return @r;
+}
+sub ctx_statement {
+ my ($linenr, $remain, $off) = @_;
+
+ my ($level, @r) = ctx_block_get($linenr, $remain, 0, '(', ')', $off);
+ return @r;
+}
+sub ctx_block_level {
+ my ($linenr, $remain) = @_;
+
+ return ctx_block_get($linenr, $remain, 0, '{', '}', 0);
+}
+sub ctx_statement_level {
+ my ($linenr, $remain, $off) = @_;
+
+ return ctx_block_get($linenr, $remain, 0, '(', ')', $off);
+}
+
+sub ctx_locate_comment {
+ my ($first_line, $end_line) = @_;
+
+ # Catch a comment on the end of the line itself.
+ my ($current_comment) = ($rawlines[$end_line - 1] =~ m@.*(/\*.*\*/)\s*(?:\\\s*)?$@);
+ return $current_comment if (defined $current_comment);
+
+ # Look through the context and try and figure out if there is a
+ # comment.
+ my $in_comment = 0;
+ $current_comment = '';
+ for (my $linenr = $first_line; $linenr < $end_line; $linenr++) {
+ my $line = $rawlines[$linenr - 1];
+ #warn " $line\n";
+ if ($linenr == $first_line and $line =~ m@^.\s*\*@) {
+ $in_comment = 1;
+ }
+ if ($line =~ m@/\*@) {
+ $in_comment = 1;
+ }
+ if (!$in_comment && $current_comment ne '') {
+ $current_comment = '';
+ }
+ $current_comment .= $line . "\n" if ($in_comment);
+ if ($line =~ m@\*/@) {
+ $in_comment = 0;
+ }
+ }
+
+ chomp($current_comment);
+ return($current_comment);
+}
+sub ctx_has_comment {
+ my ($first_line, $end_line) = @_;
+ my $cmt = ctx_locate_comment($first_line, $end_line);
+
+ ##print "LINE: $rawlines[$end_line - 1 ]\n";
+ ##print "CMMT: $cmt\n";
+
+ return ($cmt ne '');
+}
+
+sub raw_line {
+ my ($linenr, $cnt) = @_;
+
+ my $offset = $linenr - 1;
+ $cnt++;
+
+ my $line;
+ while ($cnt) {
+ $line = $rawlines[$offset++];
+ next if (defined($line) && $line =~ /^-/);
+ $cnt--;
+ }
+ return $line;
+}
+
+sub cat_vet {
+ my ($vet) = @_;
+ my ($res, $coded);
+
+ $res = '';
+ while ($vet =~ /([^[:cntrl:]]*)([[:cntrl:]]|$)/g) {
+ $res .= $1;
+ if ($2 ne '') {
+ $coded = sprintf("^%c", unpack('C', $2) + 64);
+ $res .= $coded;
+ }
+ }
+ $res =~ s/$/\$/;
+ return $res;
+}
+
+my $av_preprocessor = 0;
+my $av_pending;
+my @av_paren_type;
+my $av_pend_colon;
+
+sub annotate_reset {
+ $av_preprocessor = 0;
+ $av_pending = '_';
+ @av_paren_type = ('E');
+ $av_pend_colon = 'O';
+}
+
+sub annotate_values {
+ my ($stream, $type) = @_;
+
+ my $res;
+ my $var = '_' x length($stream);
+ my $cur = $stream;
+
+ print "$stream\n" if ($dbg_values > 1);
+
+ while (length($cur)) {
+ @av_paren_type = ('E') if ($#av_paren_type < 0);
+ print " <" . join('', @av_paren_type) .
+ "> <$type> <$av_pending>" if ($dbg_values > 1);
+ if ($cur =~ /^(\s+)/o) {
+ print "WS($1)\n" if ($dbg_values > 1);
+ if ($1 =~ /\n/ && $av_preprocessor) {
+ $type = pop(@av_paren_type);
+ $av_preprocessor = 0;
+ }
+
+ } elsif ($cur =~ /^(\(\s*$Type\s*)\)/ && $av_pending eq '_') {
+ print "CAST($1)\n" if ($dbg_values > 1);
+ push(@av_paren_type, $type);
+ $type = 'c';
+
+ } elsif ($cur =~ /^($Type)\s*(?:$Ident|,|\)|\(|\s*$)/) {
+ print "DECLARE($1)\n" if ($dbg_values > 1);
+ $type = 'T';
+
+ } elsif ($cur =~ /^($Modifier)\s*/) {
+ print "MODIFIER($1)\n" if ($dbg_values > 1);
+ $type = 'T';
+
+ } elsif ($cur =~ /^(\#\s*define\s*$Ident)(\(?)/o) {
+ print "DEFINE($1,$2)\n" if ($dbg_values > 1);
+ $av_preprocessor = 1;
+ push(@av_paren_type, $type);
+ if ($2 ne '') {
+ $av_pending = 'N';
+ }
+ $type = 'E';
+
+ } elsif ($cur =~ /^(\#\s*(?:undef\s*$Ident|include\b))/o) {
+ print "UNDEF($1)\n" if ($dbg_values > 1);
+ $av_preprocessor = 1;
+ push(@av_paren_type, $type);
+
+ } elsif ($cur =~ /^(\#\s*(?:ifdef|ifndef|if))/o) {
+ print "PRE_START($1)\n" if ($dbg_values > 1);
+ $av_preprocessor = 1;
+
+ push(@av_paren_type, $type);
+ push(@av_paren_type, $type);
+ $type = 'E';
+
+ } elsif ($cur =~ /^(\#\s*(?:else|elif))/o) {
+ print "PRE_RESTART($1)\n" if ($dbg_values > 1);
+ $av_preprocessor = 1;
+
+ push(@av_paren_type, $av_paren_type[$#av_paren_type]);
+
+ $type = 'E';
+
+ } elsif ($cur =~ /^(\#\s*(?:endif))/o) {
+ print "PRE_END($1)\n" if ($dbg_values > 1);
+
+ $av_preprocessor = 1;
+
+ # Assume all arms of the conditional end as this
+ # one does, and continue as if the #endif was not here.
+ pop(@av_paren_type);
+ push(@av_paren_type, $type);
+ $type = 'E';
+
+ } elsif ($cur =~ /^(\\\n)/o) {
+ print "PRECONT($1)\n" if ($dbg_values > 1);
+
+ } elsif ($cur =~ /^(__attribute__)\s*\(?/o) {
+ print "ATTR($1)\n" if ($dbg_values > 1);
+ $av_pending = $type;
+ $type = 'N';
+
+ } elsif ($cur =~ /^(sizeof)\s*(\()?/o) {
+ print "SIZEOF($1)\n" if ($dbg_values > 1);
+ if (defined $2) {
+ $av_pending = 'V';
+ }
+ $type = 'N';
+
+ } elsif ($cur =~ /^(if|while|for)\b/o) {
+ print "COND($1)\n" if ($dbg_values > 1);
+ $av_pending = 'E';
+ $type = 'N';
+
+ } elsif ($cur =~/^(case)/o) {
+ print "CASE($1)\n" if ($dbg_values > 1);
+ $av_pend_colon = 'C';
+ $type = 'N';
+
+ } elsif ($cur =~/^(return|else|goto|typeof|__typeof__)\b/o) {
+ print "KEYWORD($1)\n" if ($dbg_values > 1);
+ $type = 'N';
+
+ } elsif ($cur =~ /^(\()/o) {
+ print "PAREN('$1')\n" if ($dbg_values > 1);
+ push(@av_paren_type, $av_pending);
+ $av_pending = '_';
+ $type = 'N';
+
+ } elsif ($cur =~ /^(\))/o) {
+ my $new_type = pop(@av_paren_type);
+ if ($new_type ne '_') {
+ $type = $new_type;
+ print "PAREN('$1') -> $type\n"
+ if ($dbg_values > 1);
+ } else {
+ print "PAREN('$1')\n" if ($dbg_values > 1);
+ }
+
+ } elsif ($cur =~ /^($Ident)\s*\(/o) {
+ print "FUNC($1)\n" if ($dbg_values > 1);
+ $type = 'V';
+ $av_pending = 'V';
+
+ } elsif ($cur =~ /^($Ident\s*):(?:\s*\d+\s*(,|=|;))?/) {
+ if (defined $2 && $type eq 'C' || $type eq 'T') {
+ $av_pend_colon = 'B';
+ } elsif ($type eq 'E') {
+ $av_pend_colon = 'L';
+ }
+ print "IDENT_COLON($1,$type>$av_pend_colon)\n" if ($dbg_values > 1);
+ $type = 'V';
+
+ } elsif ($cur =~ /^($Ident|$Constant)/o) {
+ print "IDENT($1)\n" if ($dbg_values > 1);
+ $type = 'V';
+
+ } elsif ($cur =~ /^($Assignment)/o) {
+ print "ASSIGN($1)\n" if ($dbg_values > 1);
+ $type = 'N';
+
+ } elsif ($cur =~/^(;|{|})/) {
+ print "END($1)\n" if ($dbg_values > 1);
+ $type = 'E';
+ $av_pend_colon = 'O';
+
+ } elsif ($cur =~/^(,)/) {
+ print "COMMA($1)\n" if ($dbg_values > 1);
+ $type = 'C';
+
+ } elsif ($cur =~ /^(\?)/o) {
+ print "QUESTION($1)\n" if ($dbg_values > 1);
+ $type = 'N';
+
+ } elsif ($cur =~ /^(:)/o) {
+ print "COLON($1,$av_pend_colon)\n" if ($dbg_values > 1);
+
+ substr($var, length($res), 1, $av_pend_colon);
+ if ($av_pend_colon eq 'C' || $av_pend_colon eq 'L') {
+ $type = 'E';
+ } else {
+ $type = 'N';
+ }
+ $av_pend_colon = 'O';
+
+ } elsif ($cur =~ /^(\[)/o) {
+ print "CLOSE($1)\n" if ($dbg_values > 1);
+ $type = 'N';
+
+ } elsif ($cur =~ /^(-(?![->])|\+(?!\+)|\*|\&\&|\&)/o) {
+ my $variant;
+
+ print "OPV($1)\n" if ($dbg_values > 1);
+ if ($type eq 'V') {
+ $variant = 'B';
+ } else {
+ $variant = 'U';
+ }
+
+ substr($var, length($res), 1, $variant);
+ $type = 'N';
+
+ } elsif ($cur =~ /^($Operators)/o) {
+ print "OP($1)\n" if ($dbg_values > 1);
+ if ($1 ne '++' && $1 ne '--') {
+ $type = 'N';
+ }
+
+ } elsif ($cur =~ /(^.)/o) {
+ print "C($1)\n" if ($dbg_values > 1);
+ }
+ if (defined $1) {
+ $cur = substr($cur, length($1));
+ $res .= $type x length($1);
+ }
+ }
+
+ return ($res, $var);
+}
+
+sub possible {
+ my ($possible, $line) = @_;
+ my $notPermitted = qr{(?:
+ ^(?:
+ $Modifier|
+ $Storage|
+ $Type|
+ DEFINE_\S+
+ )$|
+ ^(?:
+ goto|
+ return|
+ case|
+ else|
+ asm|__asm__|
+ do|
+ \#|
+ \#\#|
+ )(?:\s|$)|
+ ^(?:typedef|struct|enum)\b
+ )}x;
+ warn "CHECK<$possible> ($line)\n" if ($dbg_possible > 2);
+ if ($possible !~ $notPermitted) {
+ # Check for modifiers.
+ $possible =~ s/\s*$Storage\s*//g;
+ $possible =~ s/\s*$Sparse\s*//g;
+ if ($possible =~ /^\s*$/) {
+
+ } elsif ($possible =~ /\s/) {
+ $possible =~ s/\s*$Type\s*//g;
+ for my $modifier (split(' ', $possible)) {
+ if ($modifier !~ $notPermitted) {
+ warn "MODIFIER: $modifier ($possible) ($line)\n" if ($dbg_possible);
+ push(@modifierList, $modifier);
+ }
+ }
+
+ } else {
+ warn "POSSIBLE: $possible ($line)\n" if ($dbg_possible);
+ push(@typeList, $possible);
+ }
+ build_types();
+ } else {
+ warn "NOTPOSS: $possible ($line)\n" if ($dbg_possible > 1);
+ }
+}
+
+my $prefix = '';
+
+sub show_type {
+ my ($type) = @_;
+
+ return defined $use_type{$type} if (scalar keys %use_type > 0);
+
+ return !defined $ignore_type{$type};
+}
+
+sub report {
+ my ($level, $type, $msg) = @_;
+
+ if (!show_type($type) ||
+ (defined $tst_only && $msg !~ /\Q$tst_only\E/)) {
+ return 0;
+ }
+ my $line;
+ if ($show_types) {
+ $line = "$prefix$level:$type: $msg\n";
+ } else {
+ $line = "$prefix$level: $msg\n";
+ }
+ $line = (split('\n', $line))[0] . "\n" if ($terse);
+
+ if ($quiet == 0) {
+ push(our @report, $line);
+ }
+ return 1;
+}
+
+sub report_dump {
+ our @report;
+}
+
+sub ERROR {
+ my ($type, $msg) = @_;
+
+ if (report("ERROR", $type, $msg)) {
+ our $clean = 0;
+ our $cnt_error++;
+ return 1;
+ }
+ return 0;
+}
+sub WARN {
+ my ($type, $msg) = @_;
+
+ if (report("WARNING", $type, $msg)) {
+ ## Warning is okay to submit
+ our $clean = 0;
+ our $cnt_warn++;
+ return 1;
+ }
+ return 0;
+}
+sub CHK {
+ my ($type, $msg) = @_;
+
+ if ($check && report("CHECK", $type, $msg)) {
+ our $clean = 0;
+ our $cnt_chk++;
+ return 1;
+ }
+ return 0;
+}
+
+sub check_absolute_file {
+ my ($absolute, $herecurr) = @_;
+ my $file = $absolute;
+
+ ##print "absolute<$absolute>\n";
+
+ # See if any suffix of this path is a path within the tree.
+ while ($file =~ s@^[^/]*/@@) {
+ if (-f "$root/$file") {
+ ##print "file<$file>\n";
+ last;
+ }
+ }
+ if (! -f _) {
+ return 0;
+ }
+
+ # It is, so see if the prefix is acceptable.
+ my $prefix = $absolute;
+ substr($prefix, -length($file)) = '';
+
+ ##print "prefix<$prefix>\n";
+ if ($prefix ne ".../") {
+ WARN("USE_RELATIVE_PATH",
+ "use relative pathname instead of absolute in changelog text\n" . $herecurr);
+ }
+}
+
+sub trim {
+ my ($string) = @_;
+
+ $string =~ s/^\s+|\s+$//g;
+
+ return $string;
+}
+
+sub ltrim {
+ my ($string) = @_;
+
+ $string =~ s/^\s+//;
+
+ return $string;
+}
+
+sub rtrim {
+ my ($string) = @_;
+
+ $string =~ s/\s+$//;
+
+ return $string;
+}
+
+sub string_find_replace {
+ my ($string, $find, $replace) = @_;
+
+ $string =~ s/$find/$replace/g;
+
+ return $string;
+}
+
+sub tabify {
+ my ($leading) = @_;
+
+ my $source_indent = 8;
+ my $max_spaces_before_tab = $source_indent - 1;
+ my $spaces_to_tab = " " x $source_indent;
+
+ #convert leading spaces to tabs
+ 1 while $leading =~ s@^([\t]*)$spaces_to_tab@$1\t@g;
+ #Remove spaces before a tab
+ 1 while $leading =~ s@^([\t]*)( {1,$max_spaces_before_tab})\t@$1\t@g;
+ return "$leading";
+}
+
+sub pos_last_openparen {
+ my ($line) = @_;
+
+ my $pos = 0;
+
+ my $opens = $line =~ tr/\(/\(/;
+ my $closes = $line =~ tr/\)/\)/;
+
+ my $last_openparen = 0;
+
+ if (($opens == 0) || ($closes >= $opens)) {
+ return -1;
+ }
+
+ my $len = length($line);
+
+ for ($pos = 0; $pos < $len; $pos++) {
+ my $string = substr($line, $pos);
+ if ($string =~ /^($FuncArg|$balanced_parens)/) {
+ $pos += length($1) - 1;
+ } elsif (substr($line, $pos, 1) eq '(') {
+ $last_openparen = $pos;
+ } elsif (index($string, '(') == -1) {
+ last;
+ }
+ }
+
+ return length(expand_tabs(substr($line, 0, $last_openparen))) + 1;
+}
+
+sub process {
+ my $filename = shift;
+
+ my $linenr=0;
+ my $prevline="";
+ my $prevrawline="";
+ my $stashline="";
+ my $stashrawline="";
+
+ my $length;
+ my $indent;
+ my $previndent=0;
+ my $stashindent=0;
+
+ our $clean = 1;
+ my $signoff = 0;
+ my $subject_trailing_dot = 0;
+ my $is_patch = 0;
+
+ my $in_header_lines = 1;
+ my $in_commit_log = 0; #Scanning lines before patch
+
+ my $non_utf8_charset = 0;
+
+ our @report = ();
+ our $cnt_lines = 0;
+ our $cnt_error = 0;
+ our $cnt_warn = 0;
+ our $cnt_chk = 0;
+
+ # Trace the real file/line as we go.
+ my $realfile = '';
+ my $realline = 0;
+ my $realcnt = 0;
+ my $here = '';
+ my $in_comment = 0;
+ my $comment_edge = 0;
+ my $first_line = 0;
+ my $p1_prefix = '';
+
+ my $prev_values = 'E';
+
+ # suppression flags
+ my %suppress_ifbraces;
+ my %suppress_whiletrailers;
+ my %suppress_export;
+ my $suppress_statement = 0;
+
+ my %signatures = ();
+
+ # Pre-scan the patch sanitizing the lines.
+ # Pre-scan the patch looking for any __setup documentation.
+ #
+ my @setup_docs = ();
+ my $setup_docs = 0;
+
+ my $camelcase_file_seeded = 0;
+
+ sanitise_line_reset();
+ my $line;
+ foreach my $rawline (@rawlines) {
+ $linenr++;
+ $line = $rawline;
+
+ push(@fixed, $rawline) if ($fix);
+
+ if ($rawline=~/^\+\+\+\s+(\S+)/) {
+ $setup_docs = 0;
+ if ($1 =~ m@Documentation/kernel-parameters.txt$@) {
+ $setup_docs = 1;
+ }
+ #next;
+ }
+ if ($rawline=~/^\@\@ -\d+(?:,\d+)? \+(\d+)(,(\d+))? \@\@/) {
+ $realline=$1-1;
+ if (defined $2) {
+ $realcnt=$3+1;
+ } else {
+ $realcnt=1+1;
+ }
+ $in_comment = 0;
+
+ # Guestimate if this is a continuing comment. Run
+ # the context looking for a comment "edge". If this
+ # edge is a close comment then we must be in a comment
+ # at context start.
+ my $edge;
+ my $cnt = $realcnt;
+ for (my $ln = $linenr + 1; $cnt > 0; $ln++) {
+ next if (defined $rawlines[$ln - 1] &&
+ $rawlines[$ln - 1] =~ /^-/);
+ $cnt--;
+ #print "RAW<$rawlines[$ln - 1]>\n";
+ last if (!defined $rawlines[$ln - 1]);
+ if ($rawlines[$ln - 1] =~ m@(/\*|\*/)@ &&
+ $rawlines[$ln - 1] !~ m@"[^"]*(?:/\*|\*/)[^"]*"@) {
+ ($edge) = $1;
+ last;
+ }
+ }
+ if (defined $edge && $edge eq '*/') {
+ $in_comment = 1;
+ }
+
+ # Guestimate if this is a continuing comment. If this
+ # is the start of a diff block and this line starts
+ # ' *' then it is very likely a comment.
+ if (!defined $edge &&
+ $rawlines[$linenr] =~ m@^.\s*(?:\*\*+| \*)(?:\s|$)@)
+ {
+ $in_comment = 1;
+ }
+
+ ##print "COMMENT:$in_comment edge<$edge> $rawline\n";
+ sanitise_line_reset($in_comment);
+
+ } elsif ($realcnt && $rawline =~ /^(?:\+| |$)/) {
+ # Standardise the strings and chars within the input to
+ # simplify matching -- only bother with positive lines.
+ $line = sanitise_line($rawline);
+ }
+ push(@lines, $line);
+
+ if ($realcnt > 1) {
+ $realcnt-- if ($line =~ /^(?:\+| |$)/);
+ } else {
+ $realcnt = 0;
+ }
+
+ #print "==>$rawline\n";
+ #print "-->$line\n";
+
+ if ($setup_docs && $line =~ /^\+/) {
+ push(@setup_docs, $line);
+ }
+ }
+
+ $prefix = '';
+
+ $realcnt = 0;
+ $linenr = 0;
+ foreach my $line (@lines) {
+ $linenr++;
+ my $sline = $line; #copy of $line
+ $sline =~ s/$;/ /g; #with comments as spaces
+
+ my $rawline = $rawlines[$linenr - 1];
+
+#extract the line range in the file after the patch is applied
+ if ($line=~/^\@\@ -\d+(?:,\d+)? \+(\d+)(,(\d+))? \@\@/) {
+ $is_patch = 1;
+ $first_line = $linenr + 1;
+ $realline=$1-1;
+ if (defined $2) {
+ $realcnt=$3+1;
+ } else {
+ $realcnt=1+1;
+ }
+ annotate_reset();
+ $prev_values = 'E';
+
+ %suppress_ifbraces = ();
+ %suppress_whiletrailers = ();
+ %suppress_export = ();
+ $suppress_statement = 0;
+ next;
+
+# track the line number as we move through the hunk, note that
+# new versions of GNU diff omit the leading space on completely
+# blank context lines so we need to count that too.
+ } elsif ($line =~ /^( |\+|$)/) {
+ $realline++;
+ $realcnt-- if ($realcnt != 0);
+
+ # Measure the line length and indent.
+ ($length, $indent) = line_stats($rawline);
+
+ # Track the previous line.
+ ($prevline, $stashline) = ($stashline, $line);
+ ($previndent, $stashindent) = ($stashindent, $indent);
+ ($prevrawline, $stashrawline) = ($stashrawline, $rawline);
+
+ #warn "line<$line>\n";
+
+ } elsif ($realcnt == 1) {
+ $realcnt--;
+ }
+
+ my $hunk_line = ($realcnt != 0);
+
+#make up the handle for any error we report on this line
+ $prefix = "$filename:$realline: " if ($emacs && $file);
+ $prefix = "$filename:$linenr: " if ($emacs && !$file);
+
+ $here = "#$linenr: " if (!$file);
+ $here = "#$realline: " if ($file);
+
+ my $found_file = 0;
+ # extract the filename as it passes
+ if ($line =~ /^diff --git.*?(\S+)$/) {
+ $realfile = $1;
+ $realfile =~ s@^([^/]*)/@@ if (!$file);
+ $in_commit_log = 0;
+ $found_file = 1;
+ } elsif ($line =~ /^\+\+\+\s+(\S+)/) {
+ $realfile = $1;
+ $realfile =~ s@^([^/]*)/@@ if (!$file);
+ $in_commit_log = 0;
+
+ $p1_prefix = $1;
+ if (!$file && $tree && $p1_prefix ne '' &&
+ -e "$root/$p1_prefix") {
+ WARN("PATCH_PREFIX",
+ "patch prefix '$p1_prefix' exists, appears to be a -p0 patch\n");
+ }
+
+ $found_file = 1;
+ }
+
+ if ($found_file) {
+ if ($realfile =~ m@^(drivers/net/|net/)@) {
+ $check = 1;
+ } else {
+ $check = $check_orig;
+ }
+ next;
+ }
+
+ $here .= "FILE: $realfile:$realline:" if ($realcnt != 0);
+
+ my $hereline = "$here\n$rawline\n";
+ my $herecurr = "$here\n$rawline\n";
+ my $hereprev = "$here\n$prevrawline\n$rawline\n";
+
+ $cnt_lines++ if ($realcnt != 0);
+
+# Check for incorrect file permissions
+ if ($line =~ /^new (file )?mode.*[7531]\d{0,2}$/) {
+ my $permhere = $here . "FILE: $realfile\n";
+ if ($realfile !~ m@scripts/@ &&
+ $realfile !~ /\.(py|pl|awk|sh|t)$/) {
+ ERROR("EXECUTE_PERMISSIONS",
+ "do not set execute permissions for source files\n" . $permhere);
+ }
+ }
+
+ next if ($realfile =~ /(checkpatch.pl)/);
+ next if ($realfile =~ /\.(md|txt|doc|8|pdf|tex)$/);
+
+# Check that the subject does not have a trailing dot
+ if ($in_header_lines &&
+ $line =~ /^Subject: \[PATCH\] (.+)\.(\s*)$/) {
+ $subject_trailing_dot++;
+ }
+
+# Check the patch for a signoff:
+ if ($line =~ /^\s*signed-off-by:/i) {
+ $signoff++;
+ $in_commit_log = 0;
+ }
+
+# Check signature styles
+ if (!$in_header_lines &&
+ $line =~ /^(\s*)([a-z0-9_-]+by:|$signature_tags)(\s*)(.*)/i) {
+ my $space_before = $1;
+ my $sign_off = $2;
+ my $space_after = $3;
+ my $email = $4;
+ my $ucfirst_sign_off = ucfirst(lc($sign_off));
+
+ if ($sign_off !~ /$signature_tags/) {
+ WARN("BAD_SIGN_OFF",
+ "Non-standard signature: $sign_off\n" . $herecurr);
+ }
+ if (defined $space_before && $space_before ne "") {
+ if (WARN("BAD_SIGN_OFF",
+ "Do not use whitespace before $ucfirst_sign_off\n" . $herecurr) &&
+ $fix) {
+ $fixed[$linenr - 1] =
+ "$ucfirst_sign_off $email";
+ }
+ }
+ if ($sign_off =~ /-by:$/i && $sign_off ne $ucfirst_sign_off) {
+ if (WARN("BAD_SIGN_OFF",
+ "'$ucfirst_sign_off' is the preferred signature form\n" . $herecurr) &&
+ $fix) {
+ $fixed[$linenr - 1] =
+ "$ucfirst_sign_off $email";
+ }
+
+ }
+ if (!defined $space_after || $space_after ne " ") {
+ if (WARN("BAD_SIGN_OFF",
+ "Use a single space after $ucfirst_sign_off\n" . $herecurr) &&
+ $fix) {
+ $fixed[$linenr - 1] =
+ "$ucfirst_sign_off $email";
+ }
+ }
+
+ # Check if email is really Gerrit URL
+ if ($email =~ /^($url_tags)(.*)/) {
+ my $uri = $1;
+ my $url = $2;
+ if ($uri && $url !~ /$gerrit_url/) {
+ ERROR("BAD_URL",
+ "Unrecognized url address: '$email'\n" . $herecurr);
+ }
+ } else {
+ my ($email_name, $email_address, $comment) = parse_email($email);
+ my $suggested_email = format_email(($email_name, $email_address));
+ if ($suggested_email eq "") {
+ ERROR("BAD_SIGN_OFF",
+ "Unrecognized email address: '$email'\n" . $herecurr);
+ } else {
+ my $dequoted = $suggested_email;
+ $dequoted =~ s/^"//;
+ $dequoted =~ s/" </ </;
+ # Don't force email to have quotes
+ # Allow just an angle bracketed address
+ if ("$dequoted$comment" ne $email &&
+ "<$email_address>$comment" ne $email &&
+ "$suggested_email$comment" ne $email) {
+ WARN("BAD_SIGN_OFF",
+ "email address '$email' might be better as '$suggested_email$comment'\n" . $herecurr);
+ }
+ }
+ }
+
+# Check for duplicate signatures
+ my $sig_nospace = $line;
+ $sig_nospace =~ s/\s//g;
+ $sig_nospace = lc($sig_nospace);
+ if (defined $signatures{$sig_nospace}) {
+ WARN("BAD_SIGN_OFF",
+ "Duplicate signature\n" . $herecurr);
+ } else {
+ $signatures{$sig_nospace} = 1;
+ }
+ }
+
+# Check for wrappage within a valid hunk of the file
+ if ($realcnt != 0 && $line !~ m{^(?:\+|-| |\\ No newline|$)}) {
+ ERROR("CORRUPTED_PATCH",
+ "patch seems to be corrupt (line wrapped?)\n" .
+ $herecurr) if (!$emitted_corrupt++);
+ }
+
+# Check for absolute kernel paths.
+ if ($tree) {
+ while ($line =~ m{(?:^|\s)(/\S*)}g) {
+ my $file = $1;
+
+ if ($file =~ m{^(.*?)(?::\d+)+:?$} &&
+ check_absolute_file($1, $herecurr)) {
+ #
+ } else {
+ check_absolute_file($file, $herecurr);
+ }
+ }
+ }
+
+# UTF-8 regex found at http://www.w3.org/International/questions/qa-forms-utf-8.en.php
+ if (($realfile =~ /^$/ || $line =~ /^\+/) &&
+ $rawline !~ m/^$UTF8*$/) {
+ my ($utf8_prefix) = ($rawline =~ /^($UTF8*)/);
+
+ my $blank = copy_spacing($rawline);
+ my $ptr = substr($blank, 0, length($utf8_prefix)) . "^";
+ my $hereptr = "$hereline$ptr\n";
+
+ CHK("INVALID_UTF8",
+ "Invalid UTF-8, patch and commit message should be encoded in UTF-8\n" . $hereptr);
+ }
+
+# Check if it's the start of a commit log
+# (not a header line and we haven't seen the patch filename)
+ if ($in_header_lines && $realfile =~ /^$/ &&
+ $rawline !~ /^(commit\b|from\b|[\w-]+:).+$/i) {
+ $in_header_lines = 0;
+ $in_commit_log = 1;
+ }
+
+# Check if there is UTF-8 in a commit log when a mail header has explicitly
+# declined it, i.e defined some charset where it is missing.
+ if ($in_header_lines &&
+ $rawline =~ /^Content-Type:.+charset="(.+)".*$/ &&
+ $1 !~ /utf-8/i) {
+ $non_utf8_charset = 1;
+ }
+
+ if ($in_commit_log && $non_utf8_charset && $realfile =~ /^$/ &&
+ $rawline =~ /$NON_ASCII_UTF8/) {
+ WARN("UTF8_BEFORE_PATCH",
+ "8-bit UTF-8 used in possible commit log\n" . $herecurr);
+ }
+
+# ignore non-hunk lines and lines being removed
+ next if (!$hunk_line || $line =~ /^-/);
+
+#trailing whitespace
+ if ($line =~ /^\+.*\015/) {
+ my $herevet = "$here\n" . cat_vet($rawline) . "\n";
+ if (ERROR("DOS_LINE_ENDINGS",
+ "DOS line endings\n" . $herevet) &&
+ $fix) {
+ $fixed[$linenr - 1] =~ s/[\s\015]+$//;
+ }
+ } elsif ($rawline =~ /^\+.*\S\s+$/ || $rawline =~ /^\+\s+$/) {
+ my $herevet = "$here\n" . cat_vet($rawline) . "\n";
+ if (ERROR("TRAILING_WHITESPACE",
+ "trailing whitespace\n" . $herevet) &&
+ $fix) {
+ $fixed[$linenr - 1] =~ s/\s+$//;
+ }
+
+ $rpt_cleaners = 1;
+ }
+
+ if (($realfile =~ /Makefile.*/) &&
+ ($line =~ /\+(EXTRA_[A-Z]+FLAGS).*/)) {
+ my $flag = $1;
+ my $replacement = {
+ 'EXTRA_AFLAGS' => 'asflags-y',
+ 'EXTRA_CFLAGS' => 'ccflags-y',
+ 'EXTRA_CPPFLAGS' => 'cppflags-y',
+ 'EXTRA_LDFLAGS' => 'ldflags-y',
+ };
+
+ WARN("DEPRECATED_VARIABLE",
+ "Use of $flag is deprecated, please use \`$replacement->{$flag} instead.\n" . $herecurr) if ($replacement->{$flag});
+ }
+
+# check we are in .spec file, then ignore this hunk
+ next if ($realfile eq "glusterfs.spec.in");
+
+# check we are in a valid source file if not then ignore this hunk
+ next if ($realfile !~ /\.(h|c|pl|py|l|y|sh|in)$/);
+
+#line length limit
+ if ($line =~ /^\+/ && $prevrawline !~ /\/\*\*/ &&
+ $rawline !~ /^.\s*\*\s*\@$Ident\s/ &&
+ !($line =~ /^\+\s*$logFunctions\s*\(\s*(?:(KERN_\S+\s*|[^"]*))?"[X\t]*"\s*(?:|,|\)\s*;)\s*$/ ||
+ $line =~ /^\+\s*"[^"]*"\s*(?:\s*|,|\)\s*;)\s*$/) &&
+ $length > $max_line_length)
+ {
+ WARN("LONG_LINE",
+ "line over $max_line_length characters\n" . $herecurr);
+ }
+
+# check for spaces before a quoted newline
+ if ($rawline =~ /^.*\".*\s\\n/) {
+ if (WARN("QUOTED_WHITESPACE_BEFORE_NEWLINE",
+ "unnecessary whitespace before a quoted newline\n" . $herecurr) &&
+ $fix) {
+ $fixed[$linenr - 1] =~ s/^(\+.*\".*)\s+\\n/$1\\n/;
+ }
+
+ }
+
+# check for adding lines without a newline.
+ if ($line =~ /^\+/ && defined $lines[$linenr] && $lines[$linenr] =~ /^\\ No newline at end of file/) {
+ WARN("MISSING_EOF_NEWLINE",
+ "adding a line without newline at end of file\n" . $herecurr);
+ }
+
+# check we are in a valid source file C or perl if not then ignore this hunk
+ next if ($realfile !~ /\.(h|c|pl)$/);
+
+# check for space before tabs.
+ if ($rawline =~ /^\+/ && $rawline =~ / \t/) {
+ my $herevet = "$here\n" . cat_vet($rawline) . "\n";
+ if (WARN("SPACE_BEFORE_TAB",
+ "please, no space before tabs\n" . $herevet) &&
+ $fix) {
+ while ($fixed[$linenr - 1] =~
+ s/(^\+.*) {8,8}\t/$1\t\t/) {}
+ while ($fixed[$linenr - 1] =~
+ s/(^\+.*) +\t/$1\t/) {}
+ }
+ }
+
+# check for && or || at the start of a line
+ if ($rawline =~ /^\+\s*(&&|\|\|)/) {
+ CHK("LOGICAL_CONTINUATIONS",
+ "Logical continuations should be on the previous line\n" . $hereprev);
+ }
+
+# check multi-line statement indentation matches previous line
+ if ($^V && $^V ge 5.10.0 &&
+ $prevline =~ /^\+([ \t]*)((?:$c90_Keywords(?:\s+if)\s*)|(?:$Declare\s*)?(?:$Ident|\(\s*\*\s*$Ident\s*\))\s*|$Ident\s*=\s*$Ident\s*)\(.*(\&\&|\|\||,)\s*$/) {
+ $prevline =~ /^\+(\t*)(.*)$/;
+ my $oldindent = $1;
+ my $rest = $2;
+
+ my $pos = pos_last_openparen($rest);
+ if ($pos >= 0) {
+ $line =~ /^(\+| )([ \t]*)/;
+ my $newindent = $2;
+
+ my $goodtabindent = $oldindent .
+ "\t" x ($pos / 8) .
+ " " x ($pos % 8);
+ my $goodspaceindent = $oldindent . " " x $pos;
+
+ if ($newindent ne $goodtabindent &&
+ $newindent ne $goodspaceindent) {
+
+ if (CHK("PARENTHESIS_ALIGNMENT",
+ "Alignment should match open parenthesis\n" . $hereprev) &&
+ $fix && $line =~ /^\+/) {
+ $fixed[$linenr - 1] =~
+ s/^\+[ \t]*/\+$goodtabindent/;
+ }
+ }
+ }
+ }
+
+ if ($line =~ /^\+.*\*[ \t]*\)[ \t]+(?!$Assignment|$Arithmetic)/) {
+ if (CHK("SPACING",
+ "No space is necessary after a cast\n" . $hereprev) &&
+ $fix) {
+ $fixed[$linenr - 1] =~
+ s/^(\+.*\*[ \t]*\))[ \t]+/$1/;
+ }
+ }
+
+
+# check for missing blank lines after declarations
+ if ($sline =~ /^\+\s+\S/ && #Not at char 1
+ # actual declarations
+ ($prevline =~ /^\+\s+$Declare\s*$Ident\s*[=,;:\[]/ ||
+ # foo bar; where foo is some local typedef or #define
+ $prevline =~ /^\+\s+$Ident(?:\s+|\s*\*\s*)$Ident\s*[=,;\[]/ ||
+ # known declaration macros
+ $prevline =~ /^\+\s+$declaration_macros/) &&
+ # for "else if" which can look like "$Ident $Ident"
+ !($prevline =~ /^\+\s+$c90_Keywords\b/ ||
+ # other possible extensions of declaration lines
+ $prevline =~ /(?:$Compare|$Assignment|$Operators)\s*$/ ||
+ # not starting a section or a macro "\" extended line
+ $prevline =~ /(?:\{\s*|\\)$/) &&
+ # looks like a declaration
+ !($sline =~ /^\+\s+$Declare\s*$Ident\s*[=,;:\[]/ ||
+ # foo bar; where foo is some local typedef or #define
+ $sline =~ /^\+\s+$Ident(?:\s+|\s*\*\s*)$Ident\s*[=,;\[]/ ||
+ # known declaration macros
+ $sline =~ /^\+\s+$declaration_macros/ ||
+ # start of struct or union or enum
+ $sline =~ /^\+\s+(?:union|struct|enum|typedef)\b/ ||
+ # start or end of block or continuation of declaration
+ $sline =~ /^\+\s+(?:$|[\{\}\.\#\"\?\:\(\[])/ ||
+ # bitfield continuation
+ $sline =~ /^\+\s+$Ident\s*:\s*\d+\s*[,;]/ ||
+ # other possible extensions of declaration lines
+ $sline =~ /^\+\s+\(?\s*(?:$Compare|$Assignment|$Operators)/) &&
+ # indentation of previous and current line are the same
+ (($prevline =~ /\+(\s+)\S/) && $sline =~ /^\+$1\S/)) {
+ WARN("SPACING",
+ "Missing a blank line after declarations\n" . $hereprev);
+ }
+
+# check we are in a valid C source file if not then ignore this hunk
+ next if ($realfile !~ /\.(h|c)$/);
+
+# check for RCS/CVS revision markers
+ if ($rawline =~ /^\+.*\$(Revision|Log|Id)(?:\$|)/) {
+ WARN("CVS_KEYWORD",
+ "CVS style keyword markers, these will _not_ be updated\n". $herecurr);
+ }
+
+# Check for potential 'bare' types
+ my ($stat, $cond, $line_nr_next, $remain_next, $off_next,
+ $realline_next);
+#print "LINE<$line>\n";
+ if ($linenr >= $suppress_statement &&
+ $realcnt && $sline =~ /.\s*\S/) {
+ ($stat, $cond, $line_nr_next, $remain_next, $off_next) =
+ ctx_statement_block($linenr, $realcnt, 0);
+ $stat =~ s/\n./\n /g;
+ $cond =~ s/\n./\n /g;
+
+#print "linenr<$linenr> <$stat>\n";
+ # If this statement has no statement boundaries within
+ # it there is no point in retrying a statement scan
+ # until we hit end of it.
+ my $frag = $stat; $frag =~ s/;+\s*$//;
+ if ($frag !~ /(?:{|;)/) {
+#print "skip<$line_nr_next>\n";
+ $suppress_statement = $line_nr_next;
+ }
+
+ # Find the real next line.
+ $realline_next = $line_nr_next;
+ if (defined $realline_next &&
+ (!defined $lines[$realline_next - 1] ||
+ substr($lines[$realline_next - 1], $off_next) =~ /^\s*$/)) {
+ $realline_next++;
+ }
+
+ my $s = $stat;
+ $s =~ s/{.*$//s;
+
+ # Ignore goto labels.
+ if ($s =~ /$Ident:\*$/s) {
+
+ # Ignore functions being called
+ } elsif ($s =~ /^.\s*$Ident\s*\(/s) {
+
+ } elsif ($s =~ /^.\s*else\b/s) {
+
+ # declarations always start with types
+ } elsif ($prev_values eq 'E' && $s =~ /^.\s*(?:$Storage\s+)?(?:$Inline\s+)?(?:const\s+)?((?:\s*$Ident)+?)\b(?:\s+$Sparse)?\s*\**\s*(?:$Ident|\(\*[^\)]*\))(?:\s*$Modifier)?\s*(?:;|=|,|\()/s) {
+ my $type = $1;
+ $type =~ s/\s+/ /g;
+ possible($type, "A:" . $s);
+
+ # definitions in global scope can only start with types
+ } elsif ($s =~ /^.(?:$Storage\s+)?(?:$Inline\s+)?(?:const\s+)?($Ident)\b\s*(?!:)/s) {
+ possible($1, "B:" . $s);
+ }
+
+ # any (foo ... *) is a pointer cast, and foo is a type
+ while ($s =~ /\(($Ident)(?:\s+$Sparse)*[\s\*]+\s*\)/sg) {
+ possible($1, "C:" . $s);
+ }
+
+ # Check for any sort of function declaration.
+ # int foo(something bar, other baz);
+ # void (*store_gdt)(x86_descr_ptr *);
+ if ($prev_values eq 'E' && $s =~ /^(.(?:typedef\s*)?(?:(?:$Storage|$Inline)\s*)*\s*$Type\s*(?:\b$Ident|\(\*\s*$Ident\))\s*)\(/s) {
+ my ($name_len) = length($1);
+
+ my $ctx = $s;
+ substr($ctx, 0, $name_len + 1, '');
+ $ctx =~ s/\)[^\)]*$//;
+
+ for my $arg (split(/\s*,\s*/, $ctx)) {
+ if ($arg =~ /^(?:const\s+)?($Ident)(?:\s+$Sparse)*\s*\**\s*(:?\b$Ident)?$/s || $arg =~ /^($Ident)$/s) {
+
+ possible($1, "D:" . $s);
+ }
+ }
+ }
+
+ }
+
+#
+# Checks which may be anchored in the context.
+#
+
+# Check for switch () and associated case and default
+# statements should be at the same indent.
+ if ($line=~/\bswitch\s*\(.*\)/) {
+ my $err = '';
+ my $sep = '';
+ my @ctx = ctx_block_outer($linenr, $realcnt);
+ shift(@ctx);
+ for my $ctx (@ctx) {
+ my ($clen, $cindent) = line_stats($ctx);
+ if ($ctx =~ /^\+\s*(case\s+|default:)/ &&
+ $indent != $cindent) {
+ $err .= "$sep$ctx\n";
+ $sep = '';
+ } else {
+ $sep = "[...]\n";
+ }
+ }
+ if ($err ne '') {
+ ERROR("SWITCH_CASE_INDENT_LEVEL",
+ "switch and case should be at the same indent\n$hereline$err");
+ }
+ }
+
+# if/while/etc brace do not go on next line, unless defining a do while loop,
+# or if that brace on the next line is for something else
+ if ($line =~ /(.*)\b((?:if|while|for|switch)\s*\(|do\b|else\b)/ && $line !~ /^.\s*\#/) {
+ my $pre_ctx = "$1$2";
+
+ my ($level, @ctx) = ctx_statement_level($linenr, $realcnt, 0);
+
+ if ($line =~ /^\+\t{6,}/) {
+ WARN("DEEP_INDENTATION",
+ "Too many leading tabs - consider code refactoring\n" . $herecurr);
+ }
+
+ my $ctx_cnt = $realcnt - $#ctx - 1;
+ my $ctx = join("\n", @ctx);
+
+ my $ctx_ln = $linenr;
+ my $ctx_skip = $realcnt;
+
+ while ($ctx_skip > $ctx_cnt || ($ctx_skip == $ctx_cnt &&
+ defined $lines[$ctx_ln - 1] &&
+ $lines[$ctx_ln - 1] =~ /^-/)) {
+ ##print "SKIP<$ctx_skip> CNT<$ctx_cnt>\n";
+ $ctx_skip-- if (!defined $lines[$ctx_ln - 1] || $lines[$ctx_ln - 1] !~ /^-/);
+ $ctx_ln++;
+ }
+
+ #print "realcnt<$realcnt> ctx_cnt<$ctx_cnt>\n";
+ #print "pre<$pre_ctx>\nline<$line>\nctx<$ctx>\nnext<$lines[$ctx_ln - 1]>\n";
+
+ if ($ctx !~ /{\s*/ && defined($lines[$ctx_ln -1]) && $lines[$ctx_ln - 1] =~ /^\+\s*{/) {
+ ERROR("OPEN_BRACE",
+ "that open brace { should be on the previous line\n" .
+ "$here\n$ctx\n$rawlines[$ctx_ln - 1]\n");
+ }
+ if ($level == 0 && $pre_ctx !~ /}\s*while\s*\($/ &&
+ $ctx =~ /\)\s*\;\s*$/ &&
+ defined $lines[$ctx_ln - 1])
+ {
+ my ($nlength, $nindent) = line_stats($lines[$ctx_ln - 1]);
+ if ($nindent > $indent) {
+ WARN("TRAILING_SEMICOLON",
+ "trailing semicolon indicates no statements, indent implies otherwise\n" .
+ "$here\n$ctx\n$rawlines[$ctx_ln - 1]\n");
+ }
+ }
+ }
+
+# Check relative indent for conditionals and blocks.
+ if ($line =~ /\b(?:(?:if|while|for)\s*\(|do\b)/ && $line !~ /^.\s*#/ && $line !~ /\}\s*while\s*/) {
+ ($stat, $cond, $line_nr_next, $remain_next, $off_next) =
+ ctx_statement_block($linenr, $realcnt, 0)
+ if (!defined $stat);
+ my ($s, $c) = ($stat, $cond);
+
+ substr($s, 0, length($c), '');
+
+ # Make sure we remove the line prefixes as we have
+ # none on the first line, and are going to readd them
+ # where necessary.
+ $s =~ s/\n./\n/gs;
+
+ # Find out how long the conditional actually is.
+ my @newlines = ($c =~ /\n/gs);
+ my $cond_lines = 1 + $#newlines;
+
+ # We want to check the first line inside the block
+ # starting at the end of the conditional, so remove:
+ # 1) any blank line termination
+ # 2) any opening brace { on end of the line
+ # 3) any do (...) {
+ my $continuation = 0;
+ my $check = 0;
+ $s =~ s/^.*\bdo\b//;
+ $s =~ s/^\s*{//;
+ if ($s =~ s/^\s*\\//) {
+ $continuation = 1;
+ }
+ if ($s =~ s/^\s*?\n//) {
+ $check = 1;
+ $cond_lines++;
+ }
+
+ # Also ignore a loop construct at the end of a
+ # preprocessor statement.
+ if (($prevline =~ /^.\s*#\s*define\s/ ||
+ $prevline =~ /\\\s*$/) && $continuation == 0) {
+ $check = 0;
+ }
+
+ my $cond_ptr = -1;
+ $continuation = 0;
+ while ($cond_ptr != $cond_lines) {
+ $cond_ptr = $cond_lines;
+
+ # If we see an #else/#elif then the code
+ # is not linear.
+ if ($s =~ /^\s*\#\s*(?:else|elif)/) {
+ $check = 0;
+ }
+
+ # Ignore:
+ # 1) blank lines, they should be at 0,
+ # 2) preprocessor lines, and
+ # 3) labels.
+ if ($continuation ||
+ $s =~ /^\s*?\n/ ||
+ $s =~ /^\s*#\s*?/ ||
+ $s =~ /^\s*$Ident\s*:/) {
+ $continuation = ($s =~ /^.*?\\\n/) ? 1 : 0;
+ if ($s =~ s/^.*?\n//) {
+ $cond_lines++;
+ }
+ }
+ }
+
+ my (undef, $sindent) = line_stats("+" . $s);
+ my $stat_real = raw_line($linenr, $cond_lines);
+
+ # Check if either of these lines are modified, else
+ # this is not this patch's fault.
+ if (!defined($stat_real) ||
+ $stat !~ /^\+/ && $stat_real !~ /^\+/) {
+ $check = 0;
+ }
+ if (defined($stat_real) && $cond_lines > 1) {
+ $stat_real = "[...]\n$stat_real";
+ }
+
+ #print "line<$line> prevline<$prevline> indent<$indent> sindent<$sindent> check<$check> continuation<$continuation> s<$s> cond_lines<$cond_lines> stat_real<$stat_real> stat<$stat>\n";
+
+ if ($check && (($sindent % 8) != 0 ||
+ ($sindent <= $indent && $s ne ''))) {
+ WARN("SUSPECT_CODE_INDENT",
+ "suspect code indent for conditional statements ($indent, $sindent)\n" . $herecurr . "$stat_real\n");
+ }
+ }
+
+ # Track the 'values' across context and added lines.
+ my $opline = $line; $opline =~ s/^./ /;
+ my ($curr_values, $curr_vars) =
+ annotate_values($opline . "\n", $prev_values);
+ $curr_values = $prev_values . $curr_values;
+ if ($dbg_values) {
+ my $outline = $opline; $outline =~ s/\t/ /g;
+ print "$linenr > .$outline\n";
+ print "$linenr > $curr_values\n";
+ print "$linenr > $curr_vars\n";
+ }
+ $prev_values = substr($curr_values, -1);
+
+#ignore lines not being added
+ next if ($line =~ /^[^\+]/);
+
+# TEST: allow direct testing of the type matcher.
+ if ($dbg_type) {
+ if ($line =~ /^.\s*$Declare\s*$/) {
+ ERROR("TEST_TYPE",
+ "TEST: is type\n" . $herecurr);
+ } elsif ($dbg_type > 1 && $line =~ /^.+($Declare)/) {
+ ERROR("TEST_NOT_TYPE",
+ "TEST: is not type ($1 is)\n". $herecurr);
+ }
+ next;
+ }
+# TEST: allow direct testing of the attribute matcher.
+ if ($dbg_attr) {
+ if ($line =~ /^.\s*$Modifier\s*$/) {
+ ERROR("TEST_ATTR",
+ "TEST: is attr\n" . $herecurr);
+ } elsif ($dbg_attr > 1 && $line =~ /^.+($Modifier)/) {
+ ERROR("TEST_NOT_ATTR",
+ "TEST: is not attr ($1 is)\n". $herecurr);
+ }
+ next;
+ }
+
+# check for initialisation to aggregates open brace on the next line
+ if ($line =~ /^.\s*{/ &&
+ $prevline =~ /(?:^|[^=])=\s*$/) {
+ ERROR("OPEN_BRACE",
+ "that open brace { should be on the previous line\n" . $hereprev);
+ }
+
+#
+# Checks which are anchored on the added line.
+#
+
+# check for malformed paths in #include statements (uses RAW line)
+ if ($rawline =~ m{^.\s*\#\s*include\s+[<"](.*)[">]}) {
+ my $path = $1;
+ if ($path =~ m{//}) {
+ ERROR("MALFORMED_INCLUDE",
+ "malformed #include filename\n" . $herecurr);
+ }
+ if ($path =~ "^uapi/" && $realfile =~ m@\binclude/uapi/@) {
+ ERROR("UAPI_INCLUDE",
+ "No #include in ...include/uapi/... should use a uapi/ path prefix\n" . $herecurr);
+ }
+ }
+
+# no C99 // comments
+ if ($line =~ m{//}) {
+ if (ERROR("C99_COMMENTS",
+ "do not use C99 // comments\n" . $herecurr) &&
+ $fix) {
+ my $line = $fixed[$linenr - 1];
+ if ($line =~ /\/\/(.*)$/) {
+ my $comment = trim($1);
+ $fixed[$linenr - 1] =~ s@\/\/(.*)$@/\* $comment \*/@;
+ }
+ }
+ }
+ # Remove C99 comments.
+ $line =~ s@//.*@@;
+ $opline =~ s@//.*@@;
+
+# EXPORT_SYMBOL should immediately follow the thing it is exporting, consider
+# the whole statement.
+#print "APW <$lines[$realline_next - 1]>\n";
+ if (defined $realline_next &&
+ exists $lines[$realline_next - 1] &&
+ !defined $suppress_export{$realline_next} &&
+ ($lines[$realline_next - 1] =~ /EXPORT_SYMBOL.*\((.*)\)/ ||
+ $lines[$realline_next - 1] =~ /EXPORT_UNUSED_SYMBOL.*\((.*)\)/)) {
+ # Handle definitions which produce identifiers with
+ # a prefix:
+ # XXX(foo);
+ # EXPORT_SYMBOL(something_foo);
+ my $name = $1;
+ if ($stat =~ /^(?:.\s*}\s*\n)?.([A-Z_]+)\s*\(\s*($Ident)/ &&
+ $name =~ /^${Ident}_$2/) {
+#print "FOO C name<$name>\n";
+ $suppress_export{$realline_next} = 1;
+
+ } elsif ($stat !~ /(?:
+ \n.}\s*$|
+ ^.DEFINE_$Ident\(\Q$name\E\)|
+ ^.DECLARE_$Ident\(\Q$name\E\)|
+ ^.LIST_HEAD\(\Q$name\E\)|
+ ^.(?:$Storage\s+)?$Type\s*\(\s*\*\s*\Q$name\E\s*\)\s*\(|
+ \b\Q$name\E(?:\s+$Attribute)*\s*(?:;|=|\[|\()
+ )/x) {
+#print "FOO A<$lines[$realline_next - 1]> stat<$stat> name<$name>\n";
+ $suppress_export{$realline_next} = 2;
+ } else {
+ $suppress_export{$realline_next} = 1;
+ }
+ }
+ if (!defined $suppress_export{$linenr} &&
+ $prevline =~ /^.\s*$/ &&
+ ($line =~ /EXPORT_SYMBOL.*\((.*)\)/ ||
+ $line =~ /EXPORT_UNUSED_SYMBOL.*\((.*)\)/)) {
+#print "FOO B <$lines[$linenr - 1]>\n";
+ $suppress_export{$linenr} = 2;
+ }
+ if (defined $suppress_export{$linenr} &&
+ $suppress_export{$linenr} == 2) {
+ WARN("EXPORT_SYMBOL",
+ "EXPORT_SYMBOL(foo); should immediately follow its function/variable\n" . $herecurr);
+ }
+
+# check for global initialisers.
+ if ($line =~ /^\+(\s*$Type\s*$Ident\s*(?:\s+$Modifier))*\s*=\s*(0|NULL|false)\s*;/) {
+ if (ERROR("GLOBAL_INITIALISERS",
+ "do not initialise globals to 0 or NULL\n" .
+ $herecurr) &&
+ $fix) {
+ $fixed[$linenr - 1] =~ s/($Type\s*$Ident\s*(?:\s+$Modifier))*\s*=\s*(0|NULL|false)\s*;/$1;/;
+ }
+ }
+# check for static initialisers.
+ if ($line =~ /^\+.*\bstatic\s.*=\s*(0|NULL|false)\s*;/) {
+ if (ERROR("INITIALISED_STATIC",
+ "do not initialise statics to 0 or NULL\n" .
+ $herecurr) &&
+ $fix) {
+ $fixed[$linenr - 1] =~ s/(\bstatic\s.*?)\s*=\s*(0|NULL|false)\s*;/$1;/;
+ }
+ }
+
+# check for static const char * arrays.
+ if ($line =~ /\bstatic\s+const\s+char\s*\*\s*(\w+)\s*\[\s*\]\s*=\s*/) {
+ WARN("STATIC_CONST_CHAR_ARRAY",
+ "static const char * array should probably be static const char * const\n" .
+ $herecurr);
+ }
+
+# check for static char foo[] = "bar" declarations.
+ if ($line =~ /\bstatic\s+char\s+(\w+)\s*\[\s*\]\s*=\s*"/) {
+ WARN("STATIC_CONST_CHAR_ARRAY",
+ "static char array declaration should probably be static const char\n" .
+ $herecurr);
+ }
+
+# check for non-global char *foo[] = {"bar", ...} declarations.
+ if ($line =~ /^.\s+(?:static\s+|const\s+)?char\s+\*\s*\w+\s*\[\s*\]\s*=\s*\{/) {
+ WARN("STATIC_CONST_CHAR_ARRAY",
+ "char * array declaration might be better as static const\n" .
+ $herecurr);
+ }
+
+# check for function declarations without arguments like "int foo()"
+ if ($line =~ /(\b$Type\s+$Ident)\s*\(\s*\)/) {
+ if (ERROR("FUNCTION_WITHOUT_ARGS",
+ "Bad function definition - $1() should probably be $1(void)\n" . $herecurr) &&
+ $fix) {
+ $fixed[$linenr - 1] =~ s/(\b($Type)\s+($Ident))\s*\(\s*\)/$2 $3(void)/;
+ }
+ }
+
+# * goes on variable not on type
+ # (char*[ const])
+ while ($line =~ m{(\($NonptrType(\s*(?:$Modifier\b\s*|\*\s*)+)\))}g) {
+ #print "AA<$1>\n";
+ my ($ident, $from, $to) = ($1, $2, $2);
+
+ # Should start with a space.
+ $to =~ s/^(\S)/ $1/;
+ # Should not end with a space.
+ $to =~ s/\s+$//;
+ # '*'s should not have spaces between.
+ while ($to =~ s/\*\s+\*/\*\*/) {
+ }
+
+## print "1: from<$from> to<$to> ident<$ident>\n";
+ if ($from ne $to) {
+ if (ERROR("POINTER_LOCATION",
+ "\"(foo$from)\" should be \"(foo$to)\"\n" . $herecurr) &&
+ $fix) {
+ my $sub_from = $ident;
+ my $sub_to = $ident;
+ $sub_to =~ s/\Q$from\E/$to/;
+ $fixed[$linenr - 1] =~
+ s@\Q$sub_from\E@$sub_to@;
+ }
+ }
+ }
+ while ($line =~ m{(\b$NonptrType(\s*(?:$Modifier\b\s*|\*\s*)+)($Ident))}g) {
+ #print "BB<$1>\n";
+ my ($match, $from, $to, $ident) = ($1, $2, $2, $3);
+
+ # Should start with a space.
+ $to =~ s/^(\S)/ $1/;
+ # Should not end with a space.
+ $to =~ s/\s+$//;
+ # '*'s should not have spaces between.
+ while ($to =~ s/\*\s+\*/\*\*/) {
+ }
+ # Modifiers should have spaces.
+ $to =~ s/(\b$Modifier$)/$1 /;
+
+## print "2: from<$from> to<$to> ident<$ident>\n";
+ if ($from ne $to && $ident !~ /^$Modifier$/) {
+ if (ERROR("POINTER_LOCATION",
+ "\"foo${from}bar\" should be \"foo${to}bar\"\n" . $herecurr) &&
+ $fix) {
+
+ my $sub_from = $match;
+ my $sub_to = $match;
+ $sub_to =~ s/\Q$from\E/$to/;
+ $fixed[$linenr - 1] =~
+ s@\Q$sub_from\E@$sub_to@;
+ }
+ }
+ }
+
+# function brace can't be on same line, except for #defines of do while,
+# or if closed on same line
+ if (($line=~/$Type\s*$Ident\(.*\).*\s\{/) and
+ !($line=~/\#\s*define.*do\s\{/) and !($line=~/}/)) {
+ ERROR("OPEN_BRACE",
+ "open brace '{' following function declarations go on the next line\n" . $herecurr);
+ }
+
+# open braces for enum, union and struct go on the same line.
+ if ($line =~ /^.\s*{/ &&
+ $prevline =~ /^.\s*(?:typedef\s+)?(enum|union|struct)(?:\s+$Ident)?\s*$/) {
+ ERROR("OPEN_BRACE",
+ "open brace '{' following $1 go on the same line\n" . $hereprev);
+ }
+
+# missing space after union, struct or enum definition
+ if ($line =~ /^.\s*(?:typedef\s+)?(enum|union|struct)(?:\s+$Ident){1,2}[=\{]/) {
+ if (WARN("SPACING",
+ "missing space after $1 definition\n" . $herecurr) &&
+ $fix) {
+ $fixed[$linenr - 1] =~
+ s/^(.\s*(?:typedef\s+)?(?:enum|union|struct)(?:\s+$Ident){1,2})([=\{])/$1 $2/;
+ }
+ }
+
+# Function pointer declarations
+# check spacing between type, funcptr, and args
+# canonical declaration is "type (*funcptr)(args...)"
+ if ($line =~ /^.\s*($Declare)\((\s*)\*(\s*)($Ident)(\s*)\)(\s*)\(/) {
+ my $declare = $1;
+ my $pre_pointer_space = $2;
+ my $post_pointer_space = $3;
+ my $funcname = $4;
+ my $post_funcname_space = $5;
+ my $pre_args_space = $6;
+
+# the $Declare variable will capture all spaces after the type
+# so check it for a missing trailing missing space but pointer return types
+# don't need a space so don't warn for those.
+ my $post_declare_space = "";
+ if ($declare =~ /(\s+)$/) {
+ $post_declare_space = $1;
+ $declare = rtrim($declare);
+ }
+ if ($declare !~ /\*$/ && $post_declare_space =~ /^$/) {
+ WARN("SPACING",
+ "missing space after return type\n" . $herecurr);
+ $post_declare_space = " ";
+ }
+
+# unnecessary space "type ( *funcptr)(args...)"
+ if (defined $pre_pointer_space &&
+ $pre_pointer_space =~ /^\s/) {
+ WARN("SPACING",
+ "Unnecessary space after function pointer open parenthesis\n" . $herecurr);
+ }
+
+# unnecessary space "type (* funcptr)(args...)"
+ if (defined $post_pointer_space &&
+ $post_pointer_space =~ /^\s/) {
+ WARN("SPACING",
+ "Unnecessary space before function pointer name\n" . $herecurr);
+ }
+
+# unnecessary space "type (*funcptr )(args...)"
+ if (defined $post_funcname_space &&
+ $post_funcname_space =~ /^\s/) {
+ WARN("SPACING",
+ "Unnecessary space after function pointer name\n" . $herecurr);
+ }
+
+# unnecessary space "type (*funcptr) (args...)"
+ if (defined $pre_args_space &&
+ $pre_args_space =~ /^\s/) {
+ WARN("SPACING",
+ "Unnecessary space before function pointer arguments\n" . $herecurr);
+ }
+
+ if (show_type("SPACING") && $fix) {
+ $fixed[$linenr - 1] =~
+ s/^(.\s*)$Declare\s*\(\s*\*\s*$Ident\s*\)\s*\(/$1 . $declare . $post_declare_space . '(*' . $funcname . ')('/ex;
+ }
+ }
+
+# check for spacing round square brackets; allowed:
+# 1. with a type on the left -- int [] a;
+# 2. at the beginning of a line for slice initialisers -- [0...10] = 5,
+# 3. inside a curly brace -- = { [0...10] = 5 }
+ while ($line =~ /(.*?\s)\[/g) {
+ my ($where, $prefix) = ($-[1], $1);
+ if ($prefix !~ /$Type\s+$/ &&
+ ($where != 0 || $prefix !~ /^.\s+$/) &&
+ $prefix !~ /[{,]\s+$/) {
+ if (ERROR("BRACKET_SPACE",
+ "space prohibited before open square bracket '['\n" . $herecurr) &&
+ $fix) {
+ $fixed[$linenr - 1] =~
+ s/^(\+.*?)\s+\[/$1\[/;
+ }
+ }
+ }
+
+# Check operator spacing.
+ if (!($line=~/\#\s*include/)) {
+ my $fixed_line = "";
+ my $line_fixed = 0;
+
+ my $ops = qr{
+ <<=|>>=|<=|>=|==|!=|
+ \+=|-=|\*=|\/=|%=|\^=|\|=|&=|
+ =>|->|<<|>>|<|>|=|!|~|
+ &&|\|\||,|\^|\+\+|--|&|\||\+|-|\*|\/|%|
+ \?:|\?|:
+ }x;
+ my @elements = split(/($ops|;)/, $opline);
+
+## print("element count: <" . $#elements . ">\n");
+## foreach my $el (@elements) {
+## print("el: <$el>\n");
+## }
+
+ my @fix_elements = ();
+ my $off = 0;
+
+ foreach my $el (@elements) {
+ push(@fix_elements, substr($rawline, $off, length($el)));
+ $off += length($el);
+ }
+
+ $off = 0;
+
+ my $blank = copy_spacing($opline);
+ my $last_after = -1;
+
+ for (my $n = 0; $n < $#elements; $n += 2) {
+
+ my $good = $fix_elements[$n] . $fix_elements[$n + 1];
+
+## print("n: <$n> good: <$good>\n");
+
+ $off += length($elements[$n]);
+
+ # Pick up the preceding and succeeding characters.
+ my $ca = substr($opline, 0, $off);
+ my $cc = '';
+ if (length($opline) >= ($off + length($elements[$n + 1]))) {
+ $cc = substr($opline, $off + length($elements[$n + 1]));
+ }
+ my $cb = "$ca$;$cc";
+
+ my $a = '';
+ $a = 'V' if ($elements[$n] ne '');
+ $a = 'W' if ($elements[$n] =~ /\s$/);
+ $a = 'C' if ($elements[$n] =~ /$;$/);
+ $a = 'B' if ($elements[$n] =~ /(\[|\()$/);
+ $a = 'O' if ($elements[$n] eq '');
+ $a = 'E' if ($ca =~ /^\s*$/);
+
+ my $op = $elements[$n + 1];
+
+ my $c = '';
+ if (defined $elements[$n + 2]) {
+ $c = 'V' if ($elements[$n + 2] ne '');
+ $c = 'W' if ($elements[$n + 2] =~ /^\s/);
+ $c = 'C' if ($elements[$n + 2] =~ /^$;/);
+ $c = 'B' if ($elements[$n + 2] =~ /^(\)|\]|;)/);
+ $c = 'O' if ($elements[$n + 2] eq '');
+ $c = 'E' if ($elements[$n + 2] =~ /^\s*\\$/);
+ } else {
+ $c = 'E';
+ }
+
+ my $ctx = "${a}x${c}";
+
+ my $at = "(ctx:$ctx)";
+
+ my $ptr = substr($blank, 0, $off) . "^";
+ my $hereptr = "$hereline$ptr\n";
+
+ # Pull out the value of this operator.
+ my $op_type = substr($curr_values, $off + 1, 1);
+
+ # Get the full operator variant.
+ my $opv = $op . substr($curr_vars, $off, 1);
+
+ # Ignore operators passed as parameters.
+ if ($op_type ne 'V' &&
+ $ca =~ /\s$/ && $cc =~ /^\s*,/) {
+
+# # Ignore comments
+# } elsif ($op =~ /^$;+$/) {
+
+ # ; should have either the end of line or a space or \ after it
+ } elsif ($op eq ';') {
+ if ($ctx !~ /.x[WEBC]/ &&
+ $cc !~ /^\\/ && $cc !~ /^;/) {
+ if (ERROR("SPACING",
+ "space required after that '$op' $at\n" . $hereptr)) {
+ $good = $fix_elements[$n] . trim($fix_elements[$n + 1]) . " ";
+ $line_fixed = 1;
+ }
+ }
+
+ # // is a comment
+ } elsif ($op eq '//') {
+
+ # : when part of a bitfield
+ } elsif ($opv eq ':B') {
+ # skip the bitfield test for now
+
+ # No spaces for:
+ # ->
+ } elsif ($op eq '->') {
+ if ($ctx =~ /Wx.|.xW/) {
+ if (ERROR("SPACING",
+ "spaces prohibited around that '$op' $at\n" . $hereptr)) {
+ $good = rtrim($fix_elements[$n]) . trim($fix_elements[$n + 1]);
+ if (defined $fix_elements[$n + 2]) {
+ $fix_elements[$n + 2] =~ s/^\s+//;
+ }
+ $line_fixed = 1;
+ }
+ }
+
+ # , must have a space on the right.
+ } elsif ($op eq ',') {
+ if ($ctx !~ /.x[WEC]/ && $cc !~ /^}/) {
+ if (ERROR("SPACING",
+ "space required after that '$op' $at\n" . $hereptr)) {
+ $good = $fix_elements[$n] . trim($fix_elements[$n + 1]) . " ";
+ $line_fixed = 1;
+ $last_after = $n;
+ }
+ }
+
+ # '*' as part of a type definition -- reported already.
+ } elsif ($opv eq '*_') {
+ #warn "'*' is part of type\n";
+
+ # unary operators should have a space before and
+ # none after. May be left adjacent to another
+ # unary operator, or a cast
+ } elsif ($op eq '!' || $op eq '~' ||
+ $opv eq '*U' || $opv eq '-U' ||
+ $opv eq '&U' || $opv eq '&&U') {
+ if ($ctx !~ /[WEBC]x./ && $ca !~ /(?:\)|!|~|\*|-|\&|\||\+\+|\-\-|\{)$/) {
+ if (ERROR("SPACING",
+ "space required before that '$op' $at\n" . $hereptr)) {
+ if ($n != $last_after + 2) {
+ $good = $fix_elements[$n] . " " . ltrim($fix_elements[$n + 1]);
+ $line_fixed = 1;
+ }
+ }
+ }
+ if ($op eq '*' && $cc =~/\s*$Modifier\b/) {
+ # A unary '*' may be const
+
+ } elsif ($ctx =~ /.xW/) {
+ if (ERROR("SPACING",
+ "space prohibited after that '$op' $at\n" . $hereptr)) {
+ $good = $fix_elements[$n] . rtrim($fix_elements[$n + 1]);
+ if (defined $fix_elements[$n + 2]) {
+ $fix_elements[$n + 2] =~ s/^\s+//;
+ }
+ $line_fixed = 1;
+ }
+ }
+
+ # unary ++ and unary -- are allowed no space on one side.
+ } elsif ($op eq '++' or $op eq '--') {
+ if ($ctx !~ /[WEOBC]x[^W]/ && $ctx !~ /[^W]x[WOBEC]/) {
+ if (ERROR("SPACING",
+ "space required one side of that '$op' $at\n" . $hereptr)) {
+ $good = $fix_elements[$n] . trim($fix_elements[$n + 1]) . " ";
+ $line_fixed = 1;
+ }
+ }
+ if ($ctx =~ /Wx[BE]/ ||
+ ($ctx =~ /Wx./ && $cc =~ /^;/)) {
+ if (ERROR("SPACING",
+ "space prohibited before that '$op' $at\n" . $hereptr)) {
+ $good = rtrim($fix_elements[$n]) . trim($fix_elements[$n + 1]);
+ $line_fixed = 1;
+ }
+ }
+ if ($ctx =~ /ExW/) {
+ if (ERROR("SPACING",
+ "space prohibited after that '$op' $at\n" . $hereptr)) {
+ $good = $fix_elements[$n] . trim($fix_elements[$n + 1]);
+ if (defined $fix_elements[$n + 2]) {
+ $fix_elements[$n + 2] =~ s/^\s+//;
+ }
+ $line_fixed = 1;
+ }
+ }
+
+ # << and >> may either have or not have spaces both sides
+ } elsif ($op eq '<<' or $op eq '>>' or
+ $op eq '&' or $op eq '^' or $op eq '|' or
+ $op eq '+' or $op eq '-' or
+ $op eq '*' or $op eq '/' or
+ $op eq '%')
+ {
+ if ($ctx =~ /Wx[^WCE]|[^WCE]xW/) {
+ if (ERROR("SPACING",
+ "need consistent spacing around '$op' $at\n" . $hereptr)) {
+ $good = rtrim($fix_elements[$n]) . " " . trim($fix_elements[$n + 1]) . " ";
+ if (defined $fix_elements[$n + 2]) {
+ $fix_elements[$n + 2] =~ s/^\s+//;
+ }
+ $line_fixed = 1;
+ }
+ }
+
+ # A colon needs no spaces before when it is
+ # terminating a case value or a label.
+ } elsif ($opv eq ':C' || $opv eq ':L') {
+ if ($ctx =~ /Wx./) {
+ if (ERROR("SPACING",
+ "space prohibited before that '$op' $at\n" . $hereptr)) {
+ $good = rtrim($fix_elements[$n]) . trim($fix_elements[$n + 1]);
+ $line_fixed = 1;
+ }
+ }
+
+ # All the others need spaces both sides.
+ } elsif ($ctx !~ /[EWC]x[CWE]/) {
+ my $ok = 0;
+
+ # Ignore email addresses <foo@bar>
+ if (($op eq '<' &&
+ $cc =~ /^\S+\@\S+>/) ||
+ ($op eq '>' &&
+ $ca =~ /<\S+\@\S+$/))
+ {
+ $ok = 1;
+ }
+
+ # messages are ERROR, but ?: are CHK
+ if ($ok == 0) {
+ my $msg_type = \&ERROR;
+ $msg_type = \&CHK if (($op eq '?:' || $op eq '?' || $op eq ':') && $ctx =~ /VxV/);
+
+ if (&{$msg_type}("SPACING",
+ "spaces required around that '$op' $at\n" . $hereptr)) {
+ $good = rtrim($fix_elements[$n]) . " " . trim($fix_elements[$n + 1]) . " ";
+ if (defined $fix_elements[$n + 2]) {
+ $fix_elements[$n + 2] =~ s/^\s+//;
+ }
+ $line_fixed = 1;
+ }
+ }
+ }
+ $off += length($elements[$n + 1]);
+
+## print("n: <$n> GOOD: <$good>\n");
+
+ $fixed_line = $fixed_line . $good;
+ }
+
+ if (($#elements % 2) == 0) {
+ $fixed_line = $fixed_line . $fix_elements[$#elements];
+ }
+
+ if ($fix && $line_fixed && $fixed_line ne $fixed[$linenr - 1]) {
+ $fixed[$linenr - 1] = $fixed_line;
+ }
+
+
+ }
+
+# check for whitespace before a non-naked semicolon
+ if ($line =~ /^\+.*\S\s+;\s*$/) {
+ if (WARN("SPACING",
+ "space prohibited before semicolon\n" . $herecurr) &&
+ $fix) {
+ 1 while $fixed[$linenr - 1] =~
+ s/^(\+.*\S)\s+;/$1;/;
+ }
+ }
+
+# check for multiple assignments
+ if ($line =~ /^.\s*$Lval\s*=\s*$Lval\s*=(?!=)/) {
+ CHK("MULTIPLE_ASSIGNMENTS",
+ "multiple assignments should be avoided\n" . $herecurr);
+ }
+
+## # check for multiple declarations, allowing for a function declaration
+## # continuation.
+## if ($line =~ /^.\s*$Type\s+$Ident(?:\s*=[^,{]*)?\s*,\s*$Ident.*/ &&
+## $line !~ /^.\s*$Type\s+$Ident(?:\s*=[^,{]*)?\s*,\s*$Type\s*$Ident.*/) {
+##
+## # Remove any bracketed sections to ensure we do not
+## # falsly report the parameters of functions.
+## my $ln = $line;
+## while ($ln =~ s/\([^\(\)]*\)//g) {
+## }
+## if ($ln =~ /,/) {
+## WARN("MULTIPLE_DECLARATION",
+## "declaring multiple variables together should be avoided\n" . $herecurr);
+## }
+## }
+
+#need space before brace following if, while, etc
+ if (($line =~ /\(.*\)\{/ && $line !~ /\($Type\)\{/) ||
+ $line =~ /do\{/) {
+ if (ERROR("SPACING",
+ "space required before the open brace '{'\n" . $herecurr) &&
+ $fix) {
+ $fixed[$linenr - 1] =~ s/^(\+.*(?:do|\)))\{/$1 {/;
+ }
+ }
+
+## # check for blank lines before declarations
+## if ($line =~ /^.\t+$Type\s+$Ident(?:\s*=.*)?;/ &&
+## $prevrawline =~ /^.\s*$/) {
+## WARN("SPACING",
+## "No blank lines before declarations\n" . $hereprev);
+## }
+##
+
+# closing brace should have a space following it when it has anything
+# on the line
+ if ($line =~ /}(?!(?:,|;|\)))\S/) {
+ if (ERROR("SPACING",
+ "space required after that close brace '}'\n" . $herecurr) &&
+ $fix) {
+ $fixed[$linenr - 1] =~
+ s/}((?!(?:,|;|\)))\S)/} $1/;
+ }
+ }
+
+# check spacing on square brackets
+ if ($line =~ /\[\s/ && $line !~ /\[\s*$/) {
+ if (ERROR("SPACING",
+ "space prohibited after that open square bracket '['\n" . $herecurr) &&
+ $fix) {
+ $fixed[$linenr - 1] =~
+ s/\[\s+/\[/;
+ }
+ }
+ if ($line =~ /\s\]/) {
+ if (ERROR("SPACING",
+ "space prohibited before that close square bracket ']'\n" . $herecurr) &&
+ $fix) {
+ $fixed[$linenr - 1] =~
+ s/\s+\]/\]/;
+ }
+ }
+
+# check spacing on parentheses
+ if ($line =~ /\(\s/ && $line !~ /\(\s*(?:\\)?$/ &&
+ $line !~ /for\s*\(\s+;/) {
+ if (ERROR("SPACING",
+ "space prohibited after that open parenthesis '('\n" . $herecurr) &&
+ $fix) {
+ $fixed[$linenr - 1] =~
+ s/\(\s+/\(/;
+ }
+ }
+ if ($line =~ /(\s+)\)/ && $line !~ /^.\s*\)/ &&
+ $line !~ /for\s*\(.*;\s+\)/ &&
+ $line !~ /:\s+\)/) {
+ if (ERROR("SPACING",
+ "space prohibited before that close parenthesis ')'\n" . $herecurr) &&
+ $fix) {
+ $fixed[$linenr - 1] =~
+ s/\s+\)/\)/;
+ }
+ }
+
+#goto labels aren't indented, allow a single space however
+ if ($line=~/^.\s+[A-Za-z\d_]+:(?![0-9]+)/ and
+ !($line=~/^. [A-Za-z\d_]+:/) and !($line=~/^.\s+default:/)) {
+ if (WARN("INDENTED_LABEL",
+ "labels should not be indented\n" . $herecurr) &&
+ $fix) {
+ $fixed[$linenr - 1] =~
+ s/^(.)\s+/$1/;
+ }
+ }
+
+# return is not a function
+ if (defined($stat) && $stat =~ /^.\s*return(\s*)\(/s) {
+ my $spacing = $1;
+ if ($^V && $^V ge 5.10.0 &&
+ $stat =~ /^.\s*return\s*($balanced_parens)\s*;\s*$/) {
+ my $value = $1;
+ $value = deparenthesize($value);
+ if ($value =~ m/^\s*$FuncArg\s*(?:\?|$)/) {
+ ERROR("RETURN_PARENTHESES",
+ "return is not a function, parentheses are not required\n" . $herecurr);
+ }
+ } elsif ($spacing !~ /\s+/) {
+ ERROR("SPACING",
+ "space required before the open parenthesis '('\n" . $herecurr);
+ }
+ }
+
+# unnecessary return in a void function
+# at end-of-function, with the previous line a single leading tab, then return;
+# and the line before that not a goto label target like "out:"
+ if ($sline =~ /^[ \+]}\s*$/ &&
+ $prevline =~ /^\+\treturn\s*;\s*$/ &&
+ $linenr >= 3 &&
+ $lines[$linenr - 3] =~ /^[ +]/ &&
+ $lines[$linenr - 3] !~ /^[ +]\s*$Ident\s*:/) {
+ WARN("RETURN_VOID",
+ "void function return statements are not generally useful\n" . $hereprev);
+ }
+
+# if statements using unnecessary parentheses - ie: if ((foo == bar))
+ if ($^V && $^V ge 5.10.0 &&
+ $line =~ /\bif\s*((?:\(\s*){2,})/) {
+ my $openparens = $1;
+ my $count = $openparens =~ tr@\(@\(@;
+ my $msg = "";
+ if ($line =~ /\bif\s*(?:\(\s*){$count,$count}$LvalOrFunc\s*($Compare)\s*$LvalOrFunc(?:\s*\)){$count,$count}/) {
+ my $comp = $4; #Not $1 because of $LvalOrFunc
+ $msg = " - maybe == should be = ?" if ($comp eq "==");
+ WARN("UNNECESSARY_PARENTHESES",
+ "Unnecessary parentheses$msg\n" . $herecurr);
+ }
+ }
+
+# Return of what appears to be an errno should normally be -'ve
+ if ($line =~ /^.\s*return\s*(E[A-Z]*)\s*;/) {
+ my $name = $1;
+ if ($name ne 'EOF' && $name ne 'ERROR') {
+ WARN("USE_NEGATIVE_ERRNO",
+ "return of an errno should typically be -ve (return -$1)\n" . $herecurr);
+ }
+ }
+
+# Need a space before open parenthesis after if, while etc
+ if ($line =~ /\b(if|while|for|switch)\(/) {
+ if (ERROR("SPACING",
+ "space required before the open parenthesis '('\n" . $herecurr) &&
+ $fix) {
+ $fixed[$linenr - 1] =~
+ s/\b(if|while|for|switch)\(/$1 \(/;
+ }
+ }
+
+# Check for illegal assignment in if conditional -- and check for trailing
+# statements after the conditional.
+ if ($line =~ /do\s*(?!{)/) {
+ ($stat, $cond, $line_nr_next, $remain_next, $off_next) =
+ ctx_statement_block($linenr, $realcnt, 0)
+ if (!defined $stat);
+ my ($stat_next) = ctx_statement_block($line_nr_next,
+ $remain_next, $off_next);
+ $stat_next =~ s/\n./\n /g;
+ ##print "stat<$stat> stat_next<$stat_next>\n";
+
+ if ($stat_next =~ /^\s*while\b/) {
+ # If the statement carries leading newlines,
+ # then count those as offsets.
+ my ($whitespace) =
+ ($stat_next =~ /^((?:\s*\n[+-])*\s*)/s);
+ my $offset =
+ statement_rawlines($whitespace) - 1;
+
+ $suppress_whiletrailers{$line_nr_next +
+ $offset} = 1;
+ }
+ }
+ if (!defined $suppress_whiletrailers{$linenr} &&
+ defined($stat) && defined($cond) &&
+ $line =~ /\b(?:if|while|for)\s*\(/ && $line !~ /^.\s*#/) {
+ my ($s, $c) = ($stat, $cond);
+
+ if ($c =~ /\bif\s*\(.*[^<>!=]=[^=].*/s) {
+ ERROR("ASSIGN_IN_IF",
+ "do not use assignment in if condition\n" . $herecurr);
+ }
+
+ # Find out what is on the end of the line after the
+ # conditional.
+ substr($s, 0, length($c), '');
+ $s =~ s/\n.*//g;
+ $s =~ s/$;//g; # Remove any comments
+ if (length($c) && $s !~ /^\s*{?\s*\\*\s*$/ &&
+ $c !~ /}\s*while\s*/)
+ {
+ # Find out how long the conditional actually is.
+ my @newlines = ($c =~ /\n/gs);
+ my $cond_lines = 1 + $#newlines;
+ my $stat_real = '';
+
+ $stat_real = raw_line($linenr, $cond_lines)
+ . "\n" if ($cond_lines);
+ if (defined($stat_real) && $cond_lines > 1) {
+ $stat_real = "[...]\n$stat_real";
+ }
+
+ ERROR("TRAILING_STATEMENTS",
+ "trailing statements should be on next line\n" . $herecurr . $stat_real);
+ }
+ }
+
+# Check for bitwise tests written as boolean
+ if ($line =~ /
+ (?:
+ (?:\[|\(|\&\&|\|\|)
+ \s*0[xX][0-9]+\s*
+ (?:\&\&|\|\|)
+ |
+ (?:\&\&|\|\|)
+ \s*0[xX][0-9]+\s*
+ (?:\&\&|\|\||\)|\])
+ )/x)
+ {
+ WARN("HEXADECIMAL_BOOLEAN_TEST",
+ "boolean test with hexadecimal, perhaps just 1 \& or \|?\n" . $herecurr);
+ }
+
+# if and else should not have general statements after it
+ if ($line =~ /^.\s*(?:}\s*)?else\b(.*)/) {
+ my $s = $1;
+ $s =~ s/$;//g; # Remove any comments
+ if ($s !~ /^\s*(?:\sif|(?:{|)\s*\\?\s*$)/) {
+ ERROR("TRAILING_STATEMENTS",
+ "trailing statements should be on next line\n" . $herecurr);
+ }
+ }
+# if should not continue a brace
+ if ($line =~ /}\s*if\b/) {
+ ERROR("TRAILING_STATEMENTS",
+ "trailing statements should be on next line\n" .
+ $herecurr);
+ }
+# case and default should not have general statements after them
+ if ($line =~ /^.\s*(?:case\s*.*|default\s*):/g &&
+ $line !~ /\G(?:
+ (?:\s*$;*)(?:\s*{)?(?:\s*$;*)(?:\s*\\)?\s*$|
+ \s*return\s+
+ )/xg)
+ {
+ ERROR("TRAILING_STATEMENTS",
+ "trailing statements should be on next line\n" . $herecurr);
+ }
+
+ # Check for }<nl>else {, these must be at the same
+ # indent level to be relevant to each other.
+ if ($prevline=~/}\s*$/ and $line=~/^.\s*else\s*/ and
+ $previndent == $indent) {
+ ERROR("ELSE_AFTER_BRACE",
+ "else should follow close brace '}'\n" . $hereprev);
+ }
+
+ if ($prevline=~/}\s*$/ and $line=~/^.\s*while\s*/ and
+ $previndent == $indent) {
+ my ($s, $c) = ctx_statement_block($linenr, $realcnt, 0);
+
+ # Find out what is on the end of the line after the
+ # conditional.
+ substr($s, 0, length($c), '');
+ $s =~ s/\n.*//g;
+
+ if ($s =~ /^\s*;/) {
+ ERROR("WHILE_AFTER_BRACE",
+ "while should follow close brace '}'\n" . $hereprev);
+ }
+ }
+
+#Specific variable tests
+ while ($line =~ m{($Constant|$Lval)}g) {
+ my $var = $1;
+
+#gcc binary extension
+ if ($var =~ /^$Binary$/) {
+ if (WARN("GCC_BINARY_CONSTANT",
+ "Avoid gcc v4.3+ binary constant extension: <$var>\n" . $herecurr) &&
+ $fix) {
+ my $hexval = sprintf("0x%x", oct($var));
+ $fixed[$linenr - 1] =~
+ s/\b$var\b/$hexval/;
+ }
+ }
+
+#CamelCase
+ if ($var !~ /^$Constant$/ &&
+ $var =~ /[A-Z][a-z]|[a-z][A-Z]/ &&
+#Ignore Page<foo> variants
+ $var !~ /^(?:Clear|Set|TestClear|TestSet|)Page[A-Z]/ &&
+#Ignore SI style variants like nS, mV and dB (ie: max_uV, regulator_min_uA_show)
+ $var !~ /^(?:[a-z_]*?)_?[a-z][A-Z](?:_[a-z_]+)?$/) {
+ while ($var =~ m{($Ident)}g) {
+ my $word = $1;
+ next if ($word !~ /[A-Z][a-z]|[a-z][A-Z]/);
+ if ($check) {
+ seed_camelcase_includes();
+ if (!$file && !$camelcase_file_seeded) {
+ seed_camelcase_file($realfile);
+ $camelcase_file_seeded = 1;
+ }
+ }
+ if (!defined $camelcase{$word}) {
+ $camelcase{$word} = 1;
+ CHK("CAMELCASE",
+ "Avoid CamelCase: <$word>\n" . $herecurr);
+ }
+ }
+ }
+ }
+
+#no spaces allowed after \ in define
+ if ($line =~ /\#\s*define.*\\\s+$/) {
+ if (WARN("WHITESPACE_AFTER_LINE_CONTINUATION",
+ "Whitespace after \\ makes next lines useless\n" . $herecurr) &&
+ $fix) {
+ $fixed[$linenr - 1] =~ s/\s+$//;
+ }
+ }
+
+#warn if <asm/foo.h> is #included and <linux/foo.h> is available (uses RAW line)
+ if ($tree && $rawline =~ m{^.\s*\#\s*include\s*\<asm\/(.*)\.h\>}) {
+ my $file = "$1.h";
+ my $checkfile = "include/linux/$file";
+ if (-f "$root/$checkfile" &&
+ $realfile ne $checkfile &&
+ $1 !~ /$allowed_asm_includes/)
+ {
+ if ($realfile =~ m{^arch/}) {
+ CHK("ARCH_INCLUDE_LINUX",
+ "Consider using #include <linux/$file> instead of <asm/$file>\n" . $herecurr);
+ } else {
+ WARN("INCLUDE_LINUX",
+ "Use #include <linux/$file> instead of <asm/$file>\n" . $herecurr);
+ }
+ }
+ }
+
+# multi-statement macros should be enclosed in a do while loop, grab the
+# first statement and ensure its the whole macro if its not enclosed
+# in a known good container
+ if ($realfile !~ m@/vmlinux.lds.h$@ &&
+ $line =~ /^.\s*\#\s*define\s*$Ident(\()?/) {
+ my $ln = $linenr;
+ my $cnt = $realcnt;
+ my ($off, $dstat, $dcond, $rest);
+ my $ctx = '';
+ ($dstat, $dcond, $ln, $cnt, $off) =
+ ctx_statement_block($linenr, $realcnt, 0);
+ $ctx = $dstat;
+ #print "dstat<$dstat> dcond<$dcond> cnt<$cnt> off<$off>\n";
+ #print "LINE<$lines[$ln-1]> len<" . length($lines[$ln-1]) . "\n";
+
+ $dstat =~ s/^.\s*\#\s*define\s+$Ident(?:\([^\)]*\))?\s*//;
+ $dstat =~ s/$;//g;
+ $dstat =~ s/\\\n.//g;
+ $dstat =~ s/^\s*//s;
+ $dstat =~ s/\s*$//s;
+
+ # Flatten any parentheses and braces
+ while ($dstat =~ s/\([^\(\)]*\)/1/ ||
+ $dstat =~ s/\{[^\{\}]*\}/1/ ||
+ $dstat =~ s/\[[^\[\]]*\]/1/)
+ {
+ }
+
+ # Flatten any obvious string concatentation.
+ while ($dstat =~ s/("X*")\s*$Ident/$1/ ||
+ $dstat =~ s/$Ident\s*("X*")/$1/)
+ {
+ }
+
+ my $exceptions = qr{
+ $Declare|
+ module_param_named|
+ MODULE_PARM_DESC|
+ DECLARE_PER_CPU|
+ DEFINE_PER_CPU|
+ __typeof__\(|
+ union|
+ struct|
+ \.$Ident\s*=\s*|
+ ^\"|\"$
+ }x;
+ #print "REST<$rest> dstat<$dstat> ctx<$ctx>\n";
+ if ($dstat ne '' &&
+ $dstat !~ /^(?:$Ident|-?$Constant),$/ && # 10, // foo(),
+ $dstat !~ /^(?:$Ident|-?$Constant);$/ && # foo();
+ $dstat !~ /^[!~-]?(?:$Lval|$Constant)$/ && # 10 // foo() // !foo // ~foo // -foo // foo->bar // foo.bar->baz
+ $dstat !~ /^'X'$/ && # character constants
+ $dstat !~ /$exceptions/ &&
+ $dstat !~ /^\.$Ident\s*=/ && # .foo =
+ $dstat !~ /^(?:\#\s*$Ident|\#\s*$Constant)\s*$/ && # stringification #foo
+ $dstat !~ /^do\s*$Constant\s*while\s*$Constant;?$/ && # do {...} while (...); // do {...} while (...)
+ $dstat !~ /^for\s*$Constant$/ && # for (...)
+ $dstat !~ /^for\s*$Constant\s+(?:$Ident|-?$Constant)$/ && # for (...) bar()
+ $dstat !~ /^do\s*{/ && # do {...
+ $dstat !~ /^\(\{/ && # ({...
+ $ctx !~ /^.\s*#\s*define\s+TRACE_(?:SYSTEM|INCLUDE_FILE|INCLUDE_PATH)\b/)
+ {
+ $ctx =~ s/\n*$//;
+ my $herectx = $here . "\n";
+ my $cnt = statement_rawlines($ctx);
+
+ for (my $n = 0; $n < $cnt; $n++) {
+ $herectx .= raw_line($linenr, $n) . "\n";
+ }
+
+ if ($dstat =~ /;/) {
+ ERROR("MULTISTATEMENT_MACRO_USE_DO_WHILE",
+ "Macros with multiple statements should be enclosed in a do - while loop\n" . "$herectx");
+ } else {
+ ERROR("COMPLEX_MACRO",
+ "Macros with complex values should be enclosed in parenthesis\n" . "$herectx");
+ }
+ }
+
+# check for line continuations outside of #defines, preprocessor #, and asm
+
+ } else {
+ if ($prevline !~ /^..*\\$/ &&
+ $line !~ /^\+\s*\#.*\\$/ && # preprocessor
+ $line !~ /^\+.*\b(__asm__|asm)\b.*\\$/ && # asm
+ $line =~ /^\+.*\\$/) {
+ WARN("LINE_CONTINUATIONS",
+ "Avoid unnecessary line continuations\n" . $herecurr);
+ }
+ }
+
+# do {} while (0) macro tests:
+# single-statement macros do not need to be enclosed in do while (0) loop,
+# macro should not end with a semicolon
+ if ($^V && $^V ge 5.10.0 &&
+ $realfile !~ m@/vmlinux.lds.h$@ &&
+ $line =~ /^.\s*\#\s*define\s+$Ident(\()?/) {
+ my $ln = $linenr;
+ my $cnt = $realcnt;
+ my ($off, $dstat, $dcond, $rest);
+ my $ctx = '';
+ ($dstat, $dcond, $ln, $cnt, $off) =
+ ctx_statement_block($linenr, $realcnt, 0);
+ $ctx = $dstat;
+
+ $dstat =~ s/\\\n.//g;
+
+ if ($dstat =~ /^\+\s*#\s*define\s+$Ident\s*${balanced_parens}\s*do\s*{(.*)\s*}\s*while\s*\(\s*0\s*\)\s*([;\s]*)\s*$/) {
+ my $stmts = $2;
+ my $semis = $3;
+
+ $ctx =~ s/\n*$//;
+ my $cnt = statement_rawlines($ctx);
+ my $herectx = $here . "\n";
+
+ for (my $n = 0; $n < $cnt; $n++) {
+ $herectx .= raw_line($linenr, $n) . "\n";
+ }
+
+ if (($stmts =~ tr/;/;/) == 1 &&
+ $stmts !~ /^\s*(if|while|for|switch)\b/) {
+ WARN("SINGLE_STATEMENT_DO_WHILE_MACRO",
+ "Single statement macros should not use a do {} while (0) loop\n" . "$herectx");
+ }
+ if (defined $semis && $semis ne "") {
+ WARN("DO_WHILE_MACRO_WITH_TRAILING_SEMICOLON",
+ "do {} while (0) macros should not be semicolon terminated\n" . "$herectx");
+ }
+ } elsif ($dstat =~ /^\+\s*#\s*define\s+$Ident.*;\s*$/) {
+ $ctx =~ s/\n*$//;
+ my $cnt = statement_rawlines($ctx);
+ my $herectx = $here . "\n";
+
+ for (my $n = 0; $n < $cnt; $n++) {
+ $herectx .= raw_line($linenr, $n) . "\n";
+ }
+
+ WARN("TRAILING_SEMICOLON",
+ "macros should not use a trailing semicolon\n" . "$herectx");
+ }
+ }
+
+# make sure symbols are always wrapped with VMLINUX_SYMBOL() ...
+# all assignments may have only one of the following with an assignment:
+# .
+# ALIGN(...)
+# VMLINUX_SYMBOL(...)
+ if ($realfile eq 'vmlinux.lds.h' && $line =~ /(?:(?:^|\s)$Ident\s*=|=\s*$Ident(?:\s|$))/) {
+ WARN("MISSING_VMLINUX_SYMBOL",
+ "vmlinux.lds.h needs VMLINUX_SYMBOL() around C-visible symbols\n" . $herecurr);
+ }
+
+# check for redundant bracing round if etc
+ if ($line =~ /(^.*)\bif\b/ && $1 !~ /else\s*$/) {
+ my ($level, $endln, @chunks) =
+ ctx_statement_full($linenr, $realcnt, 1);
+ #print "chunks<$#chunks> linenr<$linenr> endln<$endln> level<$level>\n";
+ #print "APW: <<$chunks[1][0]>><<$chunks[1][1]>>\n";
+ if ($#chunks > 0 && $level == 0) {
+ my @allowed = ();
+ my $allow = 0;
+ my $seen = 0;
+ my $herectx = $here . "\n";
+ my $ln = $linenr - 1;
+ for my $chunk (@chunks) {
+ my ($cond, $block) = @{$chunk};
+
+ # If the condition carries leading newlines, then count those as offsets.
+ my ($whitespace) = ($cond =~ /^((?:\s*\n[+-])*\s*)/s);
+ my $offset = statement_rawlines($whitespace) - 1;
+
+ $allowed[$allow] = 0;
+ #print "COND<$cond> whitespace<$whitespace> offset<$offset>\n";
+
+ # We have looked at and allowed this specific line.
+ $suppress_ifbraces{$ln + $offset} = 1;
+
+ $herectx .= "$rawlines[$ln + $offset]\n[...]\n";
+ $ln += statement_rawlines($block) - 1;
+
+ substr($block, 0, length($cond), '');
+
+ $seen++ if ($block =~ /^\s*{/);
+
+ #print "cond<$cond> block<$block> allowed<$allowed[$allow]>\n";
+ if (statement_lines($cond) > 1) {
+ #print "APW: ALLOWED: cond<$cond>\n";
+ $allowed[$allow] = 1;
+ }
+ if ($block =~/\b(?:if|for|while)\b/) {
+ #print "APW: ALLOWED: block<$block>\n";
+ $allowed[$allow] = 1;
+ }
+ if (statement_block_size($block) > 1) {
+ #print "APW: ALLOWED: lines block<$block>\n";
+ $allowed[$allow] = 1;
+ }
+ $allow++;
+ }
+ if ($seen) {
+ my $sum_allowed = 0;
+ foreach (@allowed) {
+ $sum_allowed += $_;
+ }
+ if ($sum_allowed != 0 && $sum_allowed != $allow
+ && $seen != $allow) {
+ CHK("BRACES",
+ "braces {} should be used on all arms of this statement\n" . $herectx);
+ }
+ }
+ }
+ }
+ if (!defined $suppress_ifbraces{$linenr - 1} &&
+ $line =~ /\b(if|while|for|else)\b/) {
+ my $allowed = 0;
+
+ # Check the pre-context.
+ if (substr($line, 0, $-[0]) =~ /(\}\s*)$/) {
+ #print "APW: ALLOWED: pre<$1>\n";
+ $allowed = 1;
+ }
+
+ my ($level, $endln, @chunks) =
+ ctx_statement_full($linenr, $realcnt, $-[0]);
+
+ # Check the condition.
+ my ($cond, $block) = @{$chunks[0]};
+ #print "CHECKING<$linenr> cond<$cond> block<$block>\n";
+ if (defined $cond) {
+ substr($block, 0, length($cond), '');
+ }
+ if (statement_lines($cond) > 1) {
+ #print "APW: ALLOWED: cond<$cond>\n";
+ $allowed = 1;
+ }
+ if ($block =~/\b(?:if|for|while)\b/) {
+ #print "APW: ALLOWED: block<$block>\n";
+ $allowed = 1;
+ }
+ if (statement_block_size($block) > 1) {
+ #print "APW: ALLOWED: lines block<$block>\n";
+ $allowed = 1;
+ }
+ # Check the post-context.
+ if (defined $chunks[1]) {
+ my ($cond, $block) = @{$chunks[1]};
+ if (defined $cond) {
+ substr($block, 0, length($cond), '');
+ }
+ if ($block =~ /^\s*\{/) {
+ #print "APW: ALLOWED: chunk-1 block<$block>\n";
+ $allowed = 1;
+ }
+ }
+ }
+
+# check for unnecessary blank lines around braces
+ if (($line =~ /^.\s*}\s*$/ && $prevrawline =~ /^.\s*$/)) {
+ CHK("BRACES",
+ "Blank lines aren't necessary before a close brace '}'\n" . $hereprev);
+ }
+ if (($rawline =~ /^.\s*$/ && $prevline =~ /^..*{\s*$/)) {
+ CHK("BRACES",
+ "Blank lines aren't necessary after an open brace '{'\n" . $hereprev);
+ }
+
+# no volatiles please
+ my $asm_volatile = qr{\b(__asm__|asm)\s+(__volatile__|volatile)\b};
+ if ($line =~ /\bvolatile\b/ && $line !~ /$asm_volatile/) {
+ WARN("VOLATILE",
+ "Use of volatile is usually wrong: see Documentation/volatile-considered-harmful.txt\n" . $herecurr);
+ }
+
+# warn about #if 0
+ if ($line =~ /^.\s*\#\s*if\s+0\b/) {
+ CHK("REDUNDANT_CODE",
+ "if this code is redundant consider removing it\n" .
+ $herecurr);
+ }
+
+# check for needless "if (<foo>) fn(<foo>)" uses
+ if ($prevline =~ /\bif\s*\(\s*($Lval)\s*\)/) {
+ my $expr = '\s*\(\s*' . quotemeta($1) . '\s*\)\s*;';
+ if ($line =~ /\b(kfree|usb_free_urb|debugfs_remove(?:_recursive)?)$expr/) {
+ WARN('NEEDLESS_IF',
+ "$1(NULL) is safe this check is probably not required\n" . $hereprev);
+ }
+ }
+
+# check for bad placement of section $InitAttribute (e.g.: __initdata)
+ if ($line =~ /(\b$InitAttribute\b)/) {
+ my $attr = $1;
+ if ($line =~ /^\+\s*static\s+(?:const\s+)?(?:$attr\s+)?($NonptrTypeWithAttr)\s+(?:$attr\s+)?($Ident(?:\[[^]]*\])?)\s*[=;]/) {
+ my $ptr = $1;
+ my $var = $2;
+ if ((($ptr =~ /\b(union|struct)\s+$attr\b/ &&
+ ERROR("MISPLACED_INIT",
+ "$attr should be placed after $var\n" . $herecurr)) ||
+ ($ptr !~ /\b(union|struct)\s+$attr\b/ &&
+ WARN("MISPLACED_INIT",
+ "$attr should be placed after $var\n" . $herecurr))) &&
+ $fix) {
+ $fixed[$linenr - 1] =~ s/(\bstatic\s+(?:const\s+)?)(?:$attr\s+)?($NonptrTypeWithAttr)\s+(?:$attr\s+)?($Ident(?:\[[^]]*\])?)\s*([=;])\s*/"$1" . trim(string_find_replace($2, "\\s*$attr\\s*", " ")) . " " . trim(string_find_replace($3, "\\s*$attr\\s*", "")) . " $attr" . ("$4" eq ";" ? ";" : " = ")/e;
+ }
+ }
+ }
+
+# check for $InitAttributeData (ie: __initdata) with const
+ if ($line =~ /\bconst\b/ && $line =~ /($InitAttributeData)/) {
+ my $attr = $1;
+ $attr =~ /($InitAttributePrefix)(.*)/;
+ my $attr_prefix = $1;
+ my $attr_type = $2;
+ if (ERROR("INIT_ATTRIBUTE",
+ "Use of const init definition must use ${attr_prefix}initconst\n" . $herecurr) &&
+ $fix) {
+ $fixed[$linenr - 1] =~
+ s/$InitAttributeData/${attr_prefix}initconst/;
+ }
+ }
+
+# check for $InitAttributeConst (ie: __initconst) without const
+ if ($line !~ /\bconst\b/ && $line =~ /($InitAttributeConst)/) {
+ my $attr = $1;
+ if (ERROR("INIT_ATTRIBUTE",
+ "Use of $attr requires a separate use of const\n" . $herecurr) &&
+ $fix) {
+ my $lead = $fixed[$linenr - 1] =~
+ /(^\+\s*(?:static\s+))/;
+ $lead = rtrim($1);
+ $lead = "$lead " if ($lead !~ /^\+$/);
+ $lead = "${lead}const ";
+ $fixed[$linenr - 1] =~ s/(^\+\s*(?:static\s+))/$lead/;
+ }
+ }
+
+# don't use __constant_<foo> functions outside of include/uapi/
+ if ($realfile !~ m@^include/uapi/@ &&
+ $line =~ /(__constant_(?:htons|ntohs|[bl]e(?:16|32|64)_to_cpu|cpu_to_[bl]e(?:16|32|64)))\s*\(/) {
+ my $constant_func = $1;
+ my $func = $constant_func;
+ $func =~ s/^__constant_//;
+ if (WARN("CONSTANT_CONVERSION",
+ "$constant_func should be $func\n" . $herecurr) &&
+ $fix) {
+ $fixed[$linenr - 1] =~ s/\b$constant_func\b/$func/g;
+ }
+ }
+
+# prefer usleep_range over udelay
+ if ($line =~ /\budelay\s*\(\s*(\d+)\s*\)/) {
+ my $delay = $1;
+ # ignore udelay's < 10, however
+ if (! ($delay < 10) ) {
+ CHK("USLEEP_RANGE",
+ "usleep_range is preferred over udelay; see Documentation/timers/timers-howto.txt\n" . $herecurr);
+ }
+ if ($delay > 2000) {
+ WARN("LONG_UDELAY",
+ "long udelay - prefer mdelay; see arch/arm/include/asm/delay.h\n" . $herecurr);
+ }
+ }
+
+# warn about unexpectedly long msleep's
+ if ($line =~ /\bmsleep\s*\((\d+)\);/) {
+ if ($1 < 20) {
+ WARN("MSLEEP",
+ "msleep < 20ms can sleep for up to 20ms; see Documentation/timers/timers-howto.txt\n" . $herecurr);
+ }
+ }
+
+# check for comparisons of jiffies
+ if ($line =~ /\bjiffies\s*$Compare|$Compare\s*jiffies\b/) {
+ WARN("JIFFIES_COMPARISON",
+ "Comparing jiffies is almost always wrong; prefer time_after, time_before and friends\n" . $herecurr);
+ }
+
+# check for comparisons of get_jiffies_64()
+ if ($line =~ /\bget_jiffies_64\s*\(\s*\)\s*$Compare|$Compare\s*get_jiffies_64\s*\(\s*\)/) {
+ WARN("JIFFIES_COMPARISON",
+ "Comparing get_jiffies_64() is almost always wrong; prefer time_after64, time_before64 and friends\n" . $herecurr);
+ }
+
+# warn about spacing in #ifdefs
+ if ($line =~ /^.\s*\#\s*(ifdef|ifndef|elif)\s\s+/) {
+ if (ERROR("SPACING",
+ "exactly one space required after that #$1\n" . $herecurr) &&
+ $fix) {
+ $fixed[$linenr - 1] =~
+ s/^(.\s*\#\s*(ifdef|ifndef|elif))\s{2,}/$1 /;
+ }
+
+ }
+
+# check for spinlock_t definitions without a comment.
+ if ($line =~ /^.\s*(struct\s+mutex|spinlock_t)\s+\S+;/ ||
+ $line =~ /^.\s*(DEFINE_MUTEX)\s*\(/) {
+ my $which = $1;
+ if (!ctx_has_comment($first_line, $linenr)) {
+ CHK("UNCOMMENTED_DEFINITION",
+ "$1 definition without comment\n" . $herecurr);
+ }
+ }
+# check for memory barriers without a comment.
+ if ($line =~ /\b(mb|rmb|wmb|read_barrier_depends|smp_mb|smp_rmb|smp_wmb|smp_read_barrier_depends)\(/) {
+ if (!ctx_has_comment($first_line, $linenr)) {
+ WARN("MEMORY_BARRIER",
+ "memory barrier without comment\n" . $herecurr);
+ }
+ }
+# check of hardware specific defines
+ if ($line =~ m@^.\s*\#\s*if.*\b(__i386__|__powerpc64__|__sun__|__s390x__)\b@ && $realfile !~ m@include/asm-@) {
+ CHK("ARCH_DEFINES",
+ "architecture specific defines should be avoided\n" . $herecurr);
+ }
+
+# Check that the storage class is at the beginning of a declaration
+ if ($line =~ /\b$Storage\b/ && $line !~ /^.\s*$Storage\b/) {
+ WARN("STORAGE_CLASS",
+ "storage class should be at the beginning of the declaration\n" . $herecurr)
+ }
+
+# check the location of the inline attribute, that it is between
+# storage class and type.
+ if ($line =~ /\b$Type\s+$Inline\b/ ||
+ $line =~ /\b$Inline\s+$Storage\b/) {
+ ERROR("INLINE_LOCATION",
+ "inline keyword should sit between storage class and type\n" . $herecurr);
+ }
+
+# Check for __inline__ and __inline, prefer inline
+ if ($realfile !~ m@\binclude/uapi/@ &&
+ $line =~ /\b(__inline__|__inline)\b/) {
+ if (WARN("INLINE",
+ "plain inline is preferred over $1\n" . $herecurr) &&
+ $fix) {
+ $fixed[$linenr - 1] =~ s/\b(__inline__|__inline)\b/inline/;
+
+ }
+ }
+
+# Check for __attribute__ packed, prefer __packed
+ if ($realfile !~ m@\binclude/uapi/@ &&
+ $line =~ /\b__attribute__\s*\(\s*\(.*\bpacked\b/) {
+ WARN("PREFER_PACKED",
+ "__packed is preferred over __attribute__((packed))\n" . $herecurr);
+ }
+
+# Check for __attribute__ aligned, prefer __aligned
+ if ($realfile !~ m@\binclude/uapi/@ &&
+ $line =~ /\b__attribute__\s*\(\s*\(.*aligned/) {
+ WARN("PREFER_ALIGNED",
+ "__aligned(size) is preferred over __attribute__((aligned(size)))\n" . $herecurr);
+ }
+
+# Check for __attribute__ format(printf, prefer __printf
+ if ($realfile !~ m@\binclude/uapi/@ &&
+ $line =~ /\b__attribute__\s*\(\s*\(\s*format\s*\(\s*printf/) {
+ if (WARN("PREFER_PRINTF",
+ "__printf(string-index, first-to-check) is preferred over __attribute__((format(printf, string-index, first-to-check)))\n" . $herecurr) &&
+ $fix) {
+ $fixed[$linenr - 1] =~ s/\b__attribute__\s*\(\s*\(\s*format\s*\(\s*printf\s*,\s*(.*)\)\s*\)\s*\)/"__printf(" . trim($1) . ")"/ex;
+
+ }
+ }
+
+# Check for __attribute__ format(scanf, prefer __scanf
+ if ($realfile !~ m@\binclude/uapi/@ &&
+ $line =~ /\b__attribute__\s*\(\s*\(\s*format\s*\(\s*scanf\b/) {
+ if (WARN("PREFER_SCANF",
+ "__scanf(string-index, first-to-check) is preferred over __attribute__((format(scanf, string-index, first-to-check)))\n" . $herecurr) &&
+ $fix) {
+ $fixed[$linenr - 1] =~ s/\b__attribute__\s*\(\s*\(\s*format\s*\(\s*scanf\s*,\s*(.*)\)\s*\)\s*\)/"__scanf(" . trim($1) . ")"/ex;
+ }
+ }
+
+# check for sizeof(&)
+ if ($line =~ /\bsizeof\s*\(\s*\&/) {
+ WARN("SIZEOF_ADDRESS",
+ "sizeof(& should be avoided\n" . $herecurr);
+ }
+
+# check for sizeof without parenthesis
+ if ($line =~ /\bsizeof\s+((?:\*\s*|)$Lval|$Type(?:\s+$Lval|))/) {
+ if (WARN("SIZEOF_PARENTHESIS",
+ "sizeof $1 should be sizeof($1)\n" . $herecurr) &&
+ $fix) {
+ $fixed[$linenr - 1] =~ s/\bsizeof\s+((?:\*\s*|)$Lval|$Type(?:\s+$Lval|))/"sizeof(" . trim($1) . ")"/ex;
+ }
+ }
+
+# check for line continuations in quoted strings with odd counts of "
+ if ($rawline =~ /\\$/ && $rawline =~ tr/"/"/ % 2) {
+ WARN("LINE_CONTINUATIONS",
+ "Avoid line continuations in quoted strings\n" . $herecurr);
+ }
+
+# check for struct spinlock declarations
+ if ($line =~ /^.\s*\bstruct\s+spinlock\s+\w+\s*;/) {
+ WARN("USE_SPINLOCK_T",
+ "struct spinlock should be spinlock_t\n" . $herecurr);
+ }
+
+# check for seq_printf uses that could be seq_puts
+ if ($sline =~ /\bseq_printf\s*\(.*"\s*\)\s*;\s*$/) {
+ my $fmt = get_quoted_string($line, $rawline);
+ if ($fmt ne "" && $fmt !~ /[^\\]\%/) {
+ if (WARN("PREFER_SEQ_PUTS",
+ "Prefer seq_puts to seq_printf\n" . $herecurr) &&
+ $fix) {
+ $fixed[$linenr - 1] =~ s/\bseq_printf\b/seq_puts/;
+ }
+ }
+ }
+
+# Check for misused memsets
+ if ($^V && $^V ge 5.10.0 &&
+ defined $stat &&
+ $stat =~ /^\+(?:.*?)\bmemset\s*\(\s*$FuncArg\s*,\s*$FuncArg\s*\,\s*$FuncArg\s*\)/s) {
+
+ my $ms_addr = $2;
+ my $ms_val = $7;
+ my $ms_size = $12;
+
+ if ($ms_size =~ /^(0x|)0$/i) {
+ ERROR("MEMSET",
+ "memset to 0's uses 0 as the 2nd argument, not the 3rd\n" . "$here\n$stat\n");
+ } elsif ($ms_size =~ /^(0x|)1$/i) {
+ WARN("MEMSET",
+ "single byte memset is suspicious. Swapped 2nd/3rd argument?\n" . "$here\n$stat\n");
+ }
+ }
+
+# typecasts on min/max could be min_t/max_t
+ if ($^V && $^V ge 5.10.0 &&
+ defined $stat &&
+ $stat =~ /^\+(?:.*?)\b(min|max)\s*\(\s*$FuncArg\s*,\s*$FuncArg\s*\)/) {
+ if (defined $2 || defined $7) {
+ my $call = $1;
+ my $cast1 = deparenthesize($2);
+ my $arg1 = $3;
+ my $cast2 = deparenthesize($7);
+ my $arg2 = $8;
+ my $cast;
+
+ if ($cast1 ne "" && $cast2 ne "" && $cast1 ne $cast2) {
+ $cast = "$cast1 or $cast2";
+ } elsif ($cast1 ne "") {
+ $cast = $cast1;
+ } else {
+ $cast = $cast2;
+ }
+ WARN("MINMAX",
+ "$call() should probably be ${call}_t($cast, $arg1, $arg2)\n" . "$here\n$stat\n");
+ }
+ }
+
+# check usleep_range arguments
+ if ($^V && $^V ge 5.10.0 &&
+ defined $stat &&
+ $stat =~ /^\+(?:.*?)\busleep_range\s*\(\s*($FuncArg)\s*,\s*($FuncArg)\s*\)/) {
+ my $min = $1;
+ my $max = $7;
+ if ($min eq $max) {
+ WARN("USLEEP_RANGE",
+ "usleep_range should not use min == max args; see Documentation/timers/timers-howto.txt\n" . "$here\n$stat\n");
+ } elsif ($min =~ /^\d+$/ && $max =~ /^\d+$/ &&
+ $min > $max) {
+ WARN("USLEEP_RANGE",
+ "usleep_range args reversed, use min then max; see Documentation/timers/timers-howto.txt\n" . "$here\n$stat\n");
+ }
+ }
+
+# check for naked sscanf
+ if ($^V && $^V ge 5.10.0 &&
+ defined $stat &&
+ $line =~ /\bsscanf\b/ &&
+ ($stat !~ /$Ident\s*=\s*sscanf\s*$balanced_parens/ &&
+ $stat !~ /\bsscanf\s*$balanced_parens\s*(?:$Compare)/ &&
+ $stat !~ /(?:$Compare)\s*\bsscanf\s*$balanced_parens/)) {
+ my $lc = $stat =~ tr@\n@@;
+ $lc = $lc + $linenr;
+ my $stat_real = raw_line($linenr, 0);
+ for (my $count = $linenr + 1; $count <= $lc; $count++) {
+ $stat_real = $stat_real . "\n" . raw_line($count, 0);
+ }
+ WARN("NAKED_SSCANF",
+ "unchecked sscanf return value\n" . "$here\n$stat_real\n");
+ }
+
+# check for simple sscanf that should be kstrto<foo>
+ if ($^V && $^V ge 5.10.0 &&
+ defined $stat &&
+ $line =~ /\bsscanf\b/) {
+ my $lc = $stat =~ tr@\n@@;
+ $lc = $lc + $linenr;
+ my $stat_real = raw_line($linenr, 0);
+ for (my $count = $linenr + 1; $count <= $lc; $count++) {
+ $stat_real = $stat_real . "\n" . raw_line($count, 0);
+ }
+ if ($stat_real =~ /\bsscanf\b\s*\(\s*$FuncArg\s*,\s*("[^"]+")/) {
+ my $format = $6;
+ my $count = $format =~ tr@%@%@;
+ if ($count == 1 &&
+ $format =~ /^"\%(?i:ll[udxi]|[udxi]ll|ll|[hl]h?[udxi]|[udxi][hl]h?|[hl]h?|[udxi])"$/) {
+ WARN("SSCANF_TO_KSTRTO",
+ "Prefer kstrto<type> to single variable sscanf\n" . "$here\n$stat_real\n");
+ }
+ }
+ }
+
+# check for new externs in .h files.
+ if ($realfile =~ /\.h$/ &&
+ $line =~ /^\+\s*(extern\s+)$Type\s*$Ident\s*\(/s) {
+ if (CHK("AVOID_EXTERNS",
+ "extern prototypes should be avoided in .h files\n" . $herecurr) &&
+ $fix) {
+ $fixed[$linenr - 1] =~ s/(.*)\bextern\b\s*(.*)/$1$2/;
+ }
+ }
+
+# check for new externs in .c files.
+ if ($realfile =~ /\.c$/ && defined $stat &&
+ $stat =~ /^.\s*(?:extern\s+)?$Type\s+($Ident)(\s*)\(/s)
+ {
+ my $function_name = $1;
+ my $paren_space = $2;
+
+ my $s = $stat;
+ if (defined $cond) {
+ substr($s, 0, length($cond), '');
+ }
+ if ($s =~ /^\s*;/ &&
+ $function_name ne 'uninitialized_var')
+ {
+ WARN("AVOID_EXTERNS",
+ "externs should be avoided in .c files\n" . $herecurr);
+ }
+
+ if ($paren_space =~ /\n/) {
+ WARN("FUNCTION_ARGUMENTS",
+ "arguments for function declarations should follow identifier\n" . $herecurr);
+ }
+
+ } elsif ($realfile =~ /\.c$/ && defined $stat &&
+ $stat =~ /^.\s*extern\s+/)
+ {
+ WARN("AVOID_EXTERNS",
+ "externs should be avoided in .c files\n" . $herecurr);
+ }
+
+# check for pointless casting of kmalloc return
+ if ($line =~ /\*\s*\)\s*[kv][czm]alloc(_node){0,1}\b/) {
+ WARN("UNNECESSARY_CASTS",
+ "unnecessary cast may hide bugs, see http://c-faq.com/malloc/mallocnocast.html\n" . $herecurr);
+ }
+
+# alloc style
+# p = alloc(sizeof(struct foo), ...) should be p = alloc(sizeof(*p), ...)
+ if ($^V && $^V ge 5.10.0 &&
+ $line =~ /\b($Lval)\s*\=\s*(?:$balanced_parens)?\s*([kv][mz]alloc(?:_node)?)\s*\(\s*(sizeof\s*\(\s*struct\s+$Lval\s*\))/) {
+ CHK("ALLOC_SIZEOF_STRUCT",
+ "Prefer $3(sizeof(*$1)...) over $3($4...)\n" . $herecurr);
+ }
+
+# check for multiple semicolons
+ if ($line =~ /;\s*;\s*$/) {
+ if (WARN("ONE_SEMICOLON",
+ "Statements terminations use 1 semicolon\n" . $herecurr) &&
+ $fix) {
+ $fixed[$linenr - 1] =~ s/(\s*;\s*){2,}$/;/g;
+ }
+ }
+
+# check for case / default statements not preceeded by break/fallthrough/switch
+ if ($line =~ /^.\s*(?:case\s+(?:$Ident|$Constant)\s*|default):/) {
+ my $has_break = 0;
+ my $has_statement = 0;
+ my $count = 0;
+ my $prevline = $linenr;
+ while ($prevline > 1 && $count < 3 && !$has_break) {
+ $prevline--;
+ my $rline = $rawlines[$prevline - 1];
+ my $fline = $lines[$prevline - 1];
+ last if ($fline =~ /^\@\@/);
+ next if ($fline =~ /^\-/);
+ next if ($fline =~ /^.(?:\s*(?:case\s+(?:$Ident|$Constant)[\s$;]*|default):[\s$;]*)*$/);
+ $has_break = 1 if ($rline =~ /fall[\s_-]*(through|thru)/i);
+ next if ($fline =~ /^.[\s$;]*$/);
+ $has_statement = 1;
+ $count++;
+ $has_break = 1 if ($fline =~ /\bswitch\b|\b(?:break\s*;[\s$;]*$|return\b|goto\b|continue\b)/);
+ }
+ if (!$has_break && $has_statement) {
+ WARN("MISSING_BREAK",
+ "Possible switch case/default not preceeded by break or fallthrough comment\n" . $herecurr);
+ }
+ }
+
+# check for switch/default statements without a break;
+ if ($^V && $^V ge 5.10.0 &&
+ defined $stat &&
+ $stat =~ /^\+[$;\s]*(?:case[$;\s]+\w+[$;\s]*:[$;\s]*|)*[$;\s]*\bdefault[$;\s]*:[$;\s]*;/g) {
+ my $ctx = '';
+ my $herectx = $here . "\n";
+ my $cnt = statement_rawlines($stat);
+ for (my $n = 0; $n < $cnt; $n++) {
+ $herectx .= raw_line($linenr, $n) . "\n";
+ }
+ WARN("DEFAULT_NO_BREAK",
+ "switch default: should use break\n" . $herectx);
+ }
+
+# check for comparisons against true and false
+ if ($line =~ /\+\s*(.*?)\b(true|false|$Lval)\s*(==|\!=)\s*(true|false|$Lval)\b(.*)$/i) {
+ my $lead = $1;
+ my $arg = $2;
+ my $test = $3;
+ my $otype = $4;
+ my $trail = $5;
+ my $op = "!";
+
+ ($arg, $otype) = ($otype, $arg) if ($arg =~ /^(?:true|false)$/i);
+
+ my $type = lc($otype);
+ if ($type =~ /^(?:true|false)$/) {
+ if (("$test" eq "==" && "$type" eq "true") ||
+ ("$test" eq "!=" && "$type" eq "false")) {
+ $op = "";
+ }
+
+ CHK("BOOL_COMPARISON",
+ "Using comparison to $otype is error prone\n" . $herecurr);
+ }
+ }
+
+# check for semaphores initialized locked
+ if ($line =~ /^.\s*sema_init.+,\W?0\W?\)/) {
+ WARN("CONSIDER_COMPLETION",
+ "consider using a completion\n" . $herecurr);
+ }
+
+# check for %L{u,d,i} in strings
+ my $string;
+ while ($line =~ /(?:^|")([X\t]*)(?:"|$)/g) {
+ $string = substr($rawline, $-[1], $+[1] - $-[1]);
+ $string =~ s/%%/__/g;
+ if ($string =~ /(?<!%)%L[udi]/) {
+ WARN("PRINTF_L",
+ "\%Ld/%Lu are not-standard C, use %lld/%llu\n" . $herecurr);
+ last;
+ }
+ }
+
+
+# Mode permission misuses where it seems decimal should be octal
+# This uses a shortcut match to avoid unnecessary uses of a slow foreach loop
+ if ($^V && $^V ge 5.10.0 &&
+ $line =~ /$mode_perms_search/) {
+ foreach my $entry (@mode_permission_funcs) {
+ my $func = $entry->[0];
+ my $arg_pos = $entry->[1];
+
+ my $skip_args = "";
+ if ($arg_pos > 1) {
+ $arg_pos--;
+ $skip_args = "(?:\\s*$FuncArg\\s*,\\s*){$arg_pos,$arg_pos}";
+ }
+ my $test = "\\b$func\\s*\\(${skip_args}([\\d]+)\\s*[,\\)]";
+ if ($line =~ /$test/) {
+ my $val = $1;
+ $val = $6 if ($skip_args ne "");
+
+ if ($val !~ /^0$/ &&
+ (($val =~ /^$Int$/ && $val !~ /^$Octal$/) ||
+ length($val) ne 4)) {
+ ERROR("NON_OCTAL_PERMISSIONS",
+ "Use 4 digit octal (0777) not decimal permissions\n" . $herecurr);
+ }
+ }
+ }
+ }
+ }
+
+ # If we have no input at all, then there is nothing to report on
+ # so just keep quiet.
+ if ($#rawlines == -1) {
+ exit(0);
+ }
+
+ # In mailback mode only produce a report in the negative, for
+ # things that appear to be patches.
+ if ($mailback && ($clean == 1 || !$is_patch)) {
+ exit(0);
+ }
+
+ # This is not a patch, and we are are in 'no-patch' mode so
+ # just keep quiet.
+ if (!$chk_patch && !$is_patch) {
+ exit(0);
+ }
+
+ if (!$is_patch) {
+ ERROR("NOT_UNIFIED_DIFF",
+ "Does not appear to be a unified-diff format patch\n");
+ }
+ if ($is_patch && $subject_trailing_dot != 0) {
+ ERROR("SUBJECT_TRAILING_DOT",
+ "The subject of the patch should not end with a dot.\n");
+ }
+ if ($is_patch && $chk_signoff && $signoff == 0) {
+ ERROR("MISSING_SIGN_OFF",
+ "Missing Signed-off-by: line(s)\n");
+ }
+
+ print report_dump();
+ if ($summary && !($clean == 1 && $quiet == 1)) {
+ print "$filename " if ($summary_file);
+ if ($cnt_error > 0) {
+ print "Patch not according to coding guidelines! please fix.\n";
+ print "total: $cnt_error errors, $cnt_warn warnings, " .
+ (($check)? "$cnt_chk checks, " : "") .
+ "$cnt_lines lines checked\n"; exit 1;
+ } else {
+ print "total: $cnt_warn warnings, " .
+ (($check)? "$cnt_chk checks, " : "") .
+ "$cnt_lines lines checked\n";
+ print "Patch found to have warnings, please fix if necessary.\n" if ($cnt_warn > 0);
+ exit 2;
+ }
+ print "\n" if ($quiet == 0);
+ }
+
+ if ($quiet == 0) {
+
+ if ($^V lt 5.10.0) {
+ print("NOTE: perl $^V is not modern enough to detect all possible issues.\n");
+ print("An upgrade to at least perl v5.10.0 is suggested.\n\n");
+ }
+
+ # If there were whitespace errors which cleanpatch can fix
+ # then suggest that.
+ if ($rpt_cleaners) {
+ print "NOTE: whitespace errors detected, you may wish to use scripts/cleanpatch or\n";
+ print " scripts/cleanfile\n\n";
+ $rpt_cleaners = 0;
+ }
+ }
+
+ hash_show_words(\%use_type, "Used");
+ hash_show_words(\%ignore_type, "Ignored");
+
+ if ($clean == 0 && $fix && "@rawlines" ne "@fixed") {
+ my $newfile = $filename;
+ $newfile .= ".EXPERIMENTAL-checkpatch-fixes" if (!$fix_inplace);
+ my $linecount = 0;
+ my $f;
+
+ open($f, '>', $newfile)
+ or die "$P: Can't open $newfile for write\n";
+ foreach my $fixed_line (@fixed) {
+ $linecount++;
+ if ($file) {
+ if ($linecount > 3) {
+ $fixed_line =~ s/^\+//;
+ print $f $fixed_line. "\n";
+ }
+ } else {
+ print $f $fixed_line . "\n";
+ }
+ }
+ close($f);
+
+ if (!$quiet) {
+ print << "EOM";
+Wrote EXPERIMENTAL --fix correction(s) to '$newfile'
+
+Do _NOT_ trust the results written to this file.
+Do _NOT_ submit these changes without inspecting them for correctness.
+
+This EXPERIMENTAL file is simply a convenience to help rewrite patches.
+No warranties, expressed or implied...
+EOM
+ }
+ }
+
+ if ($clean == 1 && $quiet == 0) {
+ print "$vname has no obvious style problems and is ready for submission.\n"
+ }
+ if ($clean == 0 && $quiet == 0) {
+ print << "EOM";
+$vname has style problems, please review.
+
+If any of these errors are false positives, please report
+them to the maintainer, see MAINTAINERS
+EOM
+ }
+
+ return $clean;
+}
diff --git a/build-aux/config.guess.dist b/build-aux/config.guess.dist
new file mode 100755
index 00000000000..881ba7a0438
--- /dev/null
+++ b/build-aux/config.guess.dist
@@ -0,0 +1,14 @@
+#!/bin/sh
+#
+# This script is intentionally left empty. Distributions that package GlusterFS
+# may want to to replace it with an updated copy from the automake project.
+#
+
+cat << EOM
+It is not expected to execute this script. When you are building from a
+released tarball (generated with 'make dist'), you are expected to pass
+--build=... and --host=... to ./configure or replace this config.guess script
+in the sources with an updated version.
+EOM
+
+exit 0
diff --git a/build-aux/config.sub.dist b/build-aux/config.sub.dist
new file mode 100755
index 00000000000..c5a0dbad282
--- /dev/null
+++ b/build-aux/config.sub.dist
@@ -0,0 +1,14 @@
+#!/bin/sh
+#
+# This script is intentionally left empty. Distributions that package GlusterFS
+# may want to to replace it with an updated copy from the automake project.
+#
+
+cat << EOM
+It is not expected to execute this script. When you are building from a
+released tarball (generated with 'make dist'), you are expected to pass
+--build=... and --host=... to ./configure or replace this config.sub script in
+the sources with an updated version.
+EOM
+
+exit 0
diff --git a/build-aux/pkg-version b/build-aux/pkg-version
new file mode 100755
index 00000000000..17ceab70c03
--- /dev/null
+++ b/build-aux/pkg-version
@@ -0,0 +1,64 @@
+#!/bin/bash
+
+# To override version/release from git,
+# create VERSION file containing text with version/release
+# eg. v3.4.0-1
+
+# One thing to note, If one does 'git clone --depth N glusterfs.git',
+# the git describe command doesn't work. Hence you notice below that
+# we have added timestamp as version (YYYY.MM.DD) and release (HH.mmss)
+PKG_VERSION=`cat VERSION 2> /dev/null || git describe --tags --match "v[0-9]*" 2>/dev/null`
+
+get_version()
+{
+ # tags and output versions:
+ # - v3.4.0 => 3.4.0 (upstream clean)
+ # - v3.4.0-1 => 3.4.0 (downstream clean)
+ # - v3.4.0-2-g34e62f => 3.4.0 (upstream dirty)
+ # - v3.4.0-1-2-g34e62f => 3.4.0 (downstream dirty)
+ AWK_VERSION='
+ BEGIN { FS="-" }
+ /^v[0-9]/ {
+ sub(/^v/,"") ; print $1
+ }'
+
+ version=$(echo $PKG_VERSION | awk "$AWK_VERSION" | tr -cd '[:alnum:].')
+ if [ "x${version}" == "x" ] ; then
+ version=$(date +%Y.%m.%d | tr -d '\n')
+ fi
+ echo $version | tr -d '\n'
+}
+
+get_release()
+{
+ # tags and output releases:
+ # - v3.4.0 => 0 (upstream clean)
+ # - v3.4.0-1 => 1 (downstream clean)
+ # - v3.4.0-2-g34e62f1 => 2.git34e62f1 (upstream dirty)
+ # - v3.4.0-1-2-g34e62f1 => 1.2.git34e62f1 (downstream dirty)
+ AWK_RELEASE='
+ BEGIN { FS="-"; OFS="." }
+ /^v[0-9]/ {
+ if (NF == 1) print 0
+ else if (NF == 2) print $2
+ else if (NF == 3) print $2, "git" substr($3, 2)
+ else if (NF == 4) print $2, $3, "git" substr($4, 2)
+ }'
+
+ release=$(echo $PKG_VERSION | awk "$AWK_RELEASE" | tr -cd '[:alnum:].')
+ if [ "x${release}" == "x" ] ; then
+ release=$(date +%H.%M%S | tr -d '\n')
+ fi
+ echo $release | tr -d '\n'
+}
+
+if test "x$1" = "x--full"; then
+ echo -n "v$(get_version)-$(get_release)"
+elif test "x$1" = "x--version"; then
+ get_version
+elif test "x$1" = "x--release"; then
+ get_release
+else
+ echo "usage: $0 [--full|--version|--release]"
+ exit 1
+fi
diff --git a/cli/src/Makefile.am b/cli/src/Makefile.am
index 80028361886..16063f27c7f 100644
--- a/cli/src/Makefile.am
+++ b/cli/src/Makefile.am
@@ -1,26 +1,37 @@
sbin_PROGRAMS = gluster
-gluster_SOURCES = cli.c registry.c input.c cli-cmd.c cli-rl.c \
+gluster_SOURCES = cli.c registry.c input.c cli-cmd.c cli-rl.c cli-cmd-global.c \
cli-cmd-volume.c cli-cmd-peer.c cli-rpc-ops.c cli-cmd-parser.c\
- cli-cmd-system.c cli-cmd-misc.c cli-xml-output.c
+ cli-cmd-system.c cli-cmd-misc.c cli-xml-output.c cli-quotad-client.c cli-cmd-snapshot.c
-gluster_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la $(GF_LDADD)\
+gluster_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la $(GF_LDADD) \
+ $(top_builddir)/libglusterd/src/libglusterd.la \
$(RLLIBS) $(top_builddir)/rpc/xdr/src/libgfxdr.la \
- $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la
+ $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la \
+ $(XML_LIBS)
-gluster_LDFLAGS = $(GF_LDFLAGS) $(GF_GLUSTERFS_LDFLAGS) $(LIBXML2_LIBS)
-noinst_HEADERS = cli.h cli-mem-types.h cli-cmd.h
+gluster_LDFLAGS = $(GF_LDFLAGS)
+noinst_HEADERS = cli.h cli-mem-types.h cli-cmd.h cli-quotad-client.h
-AM_CFLAGS = -fPIC -Wall -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/rpc/rpc-lib/src\
- -I$(top_srcdir)/rpc/xdr/src\
+AM_CPPFLAGS = $(GF_CPPFLAGS) \
+ -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/rpc/rpc-lib/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_srcdir)/libglusterd/src \
+ -I$(top_builddir)/rpc/xdr/src \
-DDATADIR=\"$(localstatedir)\" \
- -DCONFDIR=\"$(sysconfdir)/glusterfs\" $(GF_GLUSTERFS_CFLAGS)\
- -DGSYNCD_PREFIX=\"$(libexecdir)/glusterfs\"\
- -DSYNCDAEMON_COMPILE=$(SYNCDAEMON_COMPILE) -DSBIN_DIR=\"$(sbindir)\"\
- $(LIBXML2_CFLAGS)
+ -DCONFDIR=\"$(sysconfdir)/glusterfs\" \
+ -DGSYNCD_PREFIX=\"$(GLUSTERFS_LIBEXECDIR)\"\
+ -DGLFSHEAL_PREFIX=\"$(GLUSTERFS_LIBEXECDIR)\"\
+ -DSYNCDAEMON_COMPILE=$(SYNCDAEMON_COMPILE)
+
+AM_CFLAGS = -Wall $(GF_CFLAGS) $(XML_CFLAGS)
CLEANFILES =
$(top_builddir)/libglusterfs/src/libglusterfs.la:
$(MAKE) -C $(top_builddir)/libglusterfs/src/ all
+
+$(top_builddir)/libglusterd/src/libglusterd.la:
+ $(MAKE) -C $(top_builddir)/libglusterd/src/ all
+
+install-data-hook:
+ $(mkdir_p) $(DESTDIR)$(localstatedir)/run/gluster
diff --git a/cli/src/cli-cmd-global.c b/cli/src/cli-cmd-global.c
new file mode 100644
index 00000000000..2c9a5f01bb1
--- /dev/null
+++ b/cli/src/cli-cmd-global.c
@@ -0,0 +1,195 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <pthread.h>
+
+#include <sys/socket.h>
+#include <netdb.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <netinet/in.h>
+
+#include "cli.h"
+#include "cli-cmd.h"
+#include "cli-mem-types.h"
+#include "cli1-xdr.h"
+#include <glusterfs/run.h>
+#include <glusterfs/syscall.h>
+#include <glusterfs/common-utils.h>
+
+int
+cli_cmd_global_help_cbk(struct cli_state *state, struct cli_cmd_word *in_word,
+ const char **words, int wordcount);
+int
+cli_cmd_get_state_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount);
+
+int
+cli_cmd_ganesha_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount);
+
+struct cli_cmd global_cmds[] = {
+ {
+ "global help",
+ cli_cmd_global_help_cbk,
+ "list global commands",
+ },
+ {
+ "get-state [<daemon>] [[odir </path/to/output/dir/>] "
+ "[file <filename>]] [detail|volumeoptions]",
+ cli_cmd_get_state_cbk,
+ "Get local state representation of mentioned daemon",
+ },
+ {
+ "nfs-ganesha {enable| disable} ",
+ cli_cmd_ganesha_cbk,
+ "Enable/disable NFS-Ganesha support",
+ },
+ {NULL, NULL, NULL}};
+
+int
+cli_cmd_global_help_cbk(struct cli_state *state, struct cli_cmd_word *in_word,
+ const char **words, int wordcount)
+{
+ struct cli_cmd *cmd = NULL;
+ struct cli_cmd *global_cmd = NULL;
+ int count = 0;
+
+ cmd = GF_MALLOC(sizeof(global_cmds), cli_mt_cli_cmd);
+ memcpy(cmd, global_cmds, sizeof(global_cmds));
+ count = (sizeof(global_cmds) / sizeof(struct cli_cmd));
+ cli_cmd_sort(cmd, count);
+
+ cli_out("\ngluster global commands");
+ cli_out("========================\n");
+ for (global_cmd = cmd; global_cmd->pattern; global_cmd++)
+ if (_gf_false == global_cmd->disable)
+ cli_out("%s - %s", global_cmd->pattern, global_cmd->desc);
+
+ cli_out("\n");
+ GF_FREE(cmd);
+ return 0;
+}
+
+int
+cli_cmd_global_register(struct cli_state *state)
+{
+ int ret = 0;
+ struct cli_cmd *cmd = NULL;
+ for (cmd = global_cmds; cmd->pattern; cmd++) {
+ ret = cli_cmd_register(&state->tree, cmd);
+ if (ret)
+ goto out;
+ }
+out:
+ return ret;
+}
+
+int
+cli_cmd_ganesha_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+
+{
+ int sent = 0;
+ int parse_error = 0;
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *options = NULL;
+ cli_local_t *local = NULL;
+ char *op_errstr = NULL;
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_GANESHA];
+
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
+
+ ret = cli_cmd_ganesha_parse(state, words, wordcount, &options, &op_errstr);
+ if (ret) {
+ if (op_errstr) {
+ cli_err("%s", op_errstr);
+ GF_FREE(op_errstr);
+ } else
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+
+ CLI_LOCAL_INIT(local, words, frame, options);
+
+ if (proc->fn) {
+ ret = proc->fn(frame, THIS, options);
+ }
+
+out:
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out("Setting global option failed");
+ }
+
+ CLI_STACK_DESTROY(frame);
+ return ret;
+}
+
+int
+cli_cmd_get_state_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int sent = 0;
+ int parse_error = 0;
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *options = NULL;
+ cli_local_t *local = NULL;
+ char *op_errstr = NULL;
+
+ ret = cli_cmd_get_state_parse(state, words, wordcount, &options,
+ &op_errstr);
+
+ if (ret) {
+ if (op_errstr) {
+ cli_err("%s", op_errstr);
+ cli_usage_out(word->pattern);
+ GF_FREE(op_errstr);
+ } else
+ cli_usage_out(word->pattern);
+
+ parse_error = 1;
+ goto out;
+ }
+
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
+ CLI_LOCAL_INIT(local, words, frame, options);
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_GET_STATE];
+ if (proc->fn)
+ ret = proc->fn(frame, THIS, options);
+out:
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out("Getting daemon state failed");
+ }
+
+ CLI_STACK_DESTROY(frame);
+
+ return ret;
+}
diff --git a/cli/src/cli-cmd-misc.c b/cli/src/cli-cmd-misc.c
index 66d755fc2dc..e961d88da86 100644
--- a/cli/src/cli-cmd-misc.c
+++ b/cli/src/cli-cmd-misc.c
@@ -1,105 +1,117 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdint.h>
#include <pthread.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "cli.h"
#include "cli-cmd.h"
#include "cli-mem-types.h"
#include "protocol-common.h"
-extern struct rpc_clnt *global_rpc;
-
-extern rpc_clnt_prog_t *cli_rpc_prog;
-
extern struct cli_cmd volume_cmds[];
+extern struct cli_cmd bitrot_cmds[];
+extern struct cli_cmd quota_cmds[];
extern struct cli_cmd cli_probe_cmds[];
extern struct cli_cmd cli_log_cmds[];
extern struct cli_cmd cli_system_cmds[];
+extern struct cli_cmd cli_bd_cmds[];
+extern struct cli_cmd snapshot_cmds[];
+extern struct cli_cmd global_cmds[];
struct cli_cmd cli_misc_cmds[];
int
-cli_cmd_quit_cbk (struct cli_state *state, struct cli_cmd_word *word,
- const char **words, int wordcount)
+cli_cmd_quit_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
{
- exit (0);
+ exit(0);
}
-int
-cli_cmd_display_help (struct cli_state *state, struct cli_cmd_word *in_word,
- const char **words, int wordcount)
+static gf_boolean_t
+cli_is_help_command(const char *pattern)
{
- struct cli_cmd *cmd[] = {volume_cmds, cli_probe_cmds,
- cli_misc_cmds, NULL};
- struct cli_cmd *cmd_ind = NULL;
- int i = 0;
-
- /* cli_system_cmds commands for internal usage
- they are not exposed
- */
- for (i=0; cmd[i]!=NULL; i++)
- for (cmd_ind = cmd[i]; cmd_ind->pattern; cmd_ind++)
- if (_gf_false == cmd_ind->disable)
- cli_out ("%s - %s", cmd_ind->pattern,
- cmd_ind->desc);
-
- return 0;
+ /* FixFixFix
+ * This is not the best way to determine whether
+ * this is a help command
+ */
+ if (strstr(pattern, "help"))
+ return _gf_true;
+
+ return _gf_false;
}
-struct cli_cmd cli_misc_cmds[] = {
- { "quit",
- cli_cmd_quit_cbk,
- "quit"},
+int
+cli_cmd_display_help(struct cli_state *state, struct cli_cmd_word *in_word,
+ const char **words, int wordcount)
+{
+ static struct cli_cmd *cmd[] = {
+ cli_misc_cmds, cli_probe_cmds, volume_cmds, bitrot_cmds,
+ quota_cmds, snapshot_cmds, global_cmds, NULL};
+ struct cli_cmd *cmd_ind = NULL;
+ int i = 0;
+ gf_boolean_t list_all = _gf_false;
+
+ /* cli_system_cmds commands for internal usage
+ they are not exposed
+ */
+
+ /* If "help all" */
+ if (wordcount == 2)
+ list_all = _gf_true;
+
+ for (i = 0; cmd[i] != NULL; i++) {
+ for (cmd_ind = cmd[i]; cmd_ind->pattern; cmd_ind++) {
+ if ((_gf_false == cmd_ind->disable) &&
+ cli_is_help_command(cmd_ind->pattern)) {
+ if (list_all && (cmd_ind->cbk)) {
+ cmd_ind->cbk(state, in_word, words, wordcount);
+ } else {
+ cli_out(" %-25s- %s", cmd_ind->pattern, cmd_ind->desc);
+ }
+ }
+ }
+ }
+
+ cli_out("\n");
+ return 0;
+}
- { "help",
- cli_cmd_display_help,
- "display command options"},
+struct cli_cmd cli_help_cmds[] = {
+ {"help [all]", cli_cmd_display_help, "display help for command classes"},
- { "exit",
- cli_cmd_quit_cbk,
- "exit"},
+ {NULL, NULL, NULL}};
- { NULL, NULL, NULL }
-};
+struct cli_cmd cli_misc_cmds[] = {{"quit", cli_cmd_quit_cbk, "quit"},
+ {"exit", cli_cmd_quit_cbk, "exit"},
+ {NULL, NULL, NULL}};
int
-cli_cmd_misc_register (struct cli_state *state)
+cli_cmd_misc_register(struct cli_state *state)
{
- int ret = 0;
- struct cli_cmd *cmd = NULL;
-
- for (cmd = cli_misc_cmds; cmd->pattern; cmd++) {
-
- ret = cli_cmd_register (&state->tree, cmd);
- if (ret)
- goto out;
- }
+ int ret = 0;
+ struct cli_cmd *cmd = NULL;
+
+ for (cmd = cli_misc_cmds; cmd->pattern; cmd++) {
+ ret = cli_cmd_register(&state->tree, cmd);
+ if (ret)
+ goto out;
+ }
+
+ for (cmd = cli_help_cmds; cmd->pattern; cmd++) {
+ ret = cli_cmd_register(&state->tree, cmd);
+ if (ret)
+ goto out;
+ }
out:
- return ret;
+ return ret;
}
diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c
index 8199fbcbcfb..34620b4a31b 100644
--- a/cli/src/cli-cmd-parser.c
+++ b/cli/src/cli-cmd-parser.c
@@ -1,2324 +1,5946 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2010-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdint.h>
#include <pthread.h>
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+#include <fnmatch.h>
+#include <time.h>
#include "cli.h"
#include "cli-cmd.h"
#include "cli-mem-types.h"
-#include "dict.h"
+#include <glusterfs/dict.h>
+#include <glusterfs/list.h>
#include "protocol-common.h"
#include "cli1-xdr.h"
+#define MAX_SNAP_DESCRIPTION_LEN 1024
+
+static struct snap_config_opt_vals_ snap_confopt_vals[] = {
+ {.op_name = "snap-max-hard-limit",
+ .question = "Changing snapshot-max-hard-limit "
+ "will limit the creation of new snapshots "
+ "if they exceed the new limit.\n"
+ "Do you want to continue?"},
+ {.op_name = "snap-max-soft-limit",
+ .question = "If Auto-delete is enabled, snap-max-soft-limit will"
+ " trigger deletion of oldest snapshot, on the "
+ "creation of new snapshot, when the "
+ "snap-max-soft-limit is reached.\n"
+ "Do you want to change the snap-max-soft-limit?"},
+ {.op_name = "both",
+ .question = "Changing snapshot-max-hard-limit "
+ "will limit the creation of new snapshots "
+ "if they exceed the new snapshot-max-hard-limit.\n"
+ "If Auto-delete is enabled, snap-max-soft-limit will"
+ " trigger deletion of oldest snapshot, on the "
+ "creation of new snapshot, when the "
+ "snap-max-soft-limit is reached.\n"
+ "Do you want to continue?"},
+ {
+ .op_name = NULL,
+ }};
+
+enum cli_snap_config_set_types {
+ GF_SNAP_CONFIG_SET_HARD = 0,
+ GF_SNAP_CONFIG_SET_SOFT = 1,
+ GF_SNAP_CONFIG_SET_BOTH = 2,
+};
+typedef enum cli_snap_config_set_types cli_snap_config_set_types;
+
+typedef struct _cli_brick {
+ struct list_head list;
+ const char *name;
+ int32_t len;
+} cli_brick_t;
+
+int
+cli_cmd_validate_volume(char *volname);
+
static const char *
-id_sel (void *wcon)
+id_sel(void *wcon)
{
- return (const char *)wcon;
+ return (const char *)wcon;
}
static char *
-str_getunamb (const char *tok, char **opwords)
+str_getunamb(const char *tok, char **opwords)
{
- return (char *)cli_getunamb (tok, (void **)opwords, id_sel);
+ return (char *)cli_getunamb(tok, (void **)opwords, id_sel);
}
int32_t
-cli_cmd_bricks_parse (const char **words, int wordcount, int brick_index,
- char **bricks, int *brick_count)
+cli_cmd_ta_brick_parse(const char **words, int wordcount, char **ta_brick)
{
- int ret = 0;
- char *tmp_list = NULL;
- char brick_list[120000] = {0,};
- char *space = " ";
- char *delimiter = NULL;
- char *host_name = NULL;
- char *free_list_ptr = NULL;
- char *tmpptr = NULL;
- int j = 0;
- int brick_list_len = 0;
- char *tmp_host = NULL;
-
- GF_ASSERT (words);
- GF_ASSERT (wordcount);
- GF_ASSERT (bricks);
- GF_ASSERT (brick_index > 0);
- GF_ASSERT (brick_index < wordcount);
-
- strncpy (brick_list, space, strlen (space));
- brick_list_len++;
- while (brick_index < wordcount) {
- if (validate_brick_name ((char *)words[brick_index])) {
- cli_err ("Wrong brick type: %s, use <HOSTNAME>:"
- "<export-dir-abs-path>", words[brick_index]);
- ret = -1;
- goto out;
- } else {
- delimiter = strrchr (words[brick_index], ':');
- ret = cli_canonicalize_path (delimiter + 1);
- if (ret)
- goto out;
- }
-
- if ((brick_list_len + strlen (words[brick_index]) + 1) > sizeof (brick_list)) {
- cli_err ("Total brick list is larger than a request. "
- "Can take (brick_count %d)", *brick_count);
- ret = -1;
- goto out;
- }
+ char *host_name = NULL;
+ char *tmp_host = NULL;
+ char *delimiter = NULL;
+ cli_brick_t *brick = NULL;
+ int ret = 0;
+
+ GF_ASSERT(words);
+ GF_ASSERT(wordcount);
+
+ if (validate_brick_name((char *)words[wordcount - 1])) {
+ cli_err(
+ "Wrong brick type: %s, use <HOSTNAME>:"
+ "<export-dir-abs-path>",
+ words[wordcount - 1]);
+ ret = -1;
+ goto out;
+ } else {
+ delimiter = strrchr(words[wordcount - 1], ':');
+ ret = gf_canonicalize_path(delimiter + 1);
+ if (ret)
+ goto out;
+ }
- tmp_host = gf_strdup ((char *)words[brick_index]);
- if (!tmp_host) {
- gf_log ("cli", GF_LOG_ERROR, "Out of memory");
- ret = -1;
- goto out;
- }
- get_host_name (tmp_host, &host_name);
- if (!host_name) {
- ret = -1;
- gf_log("cli",GF_LOG_ERROR, "Unable to allocate "
- "memory");
- goto out;
- }
+ tmp_host = gf_strdup((char *)words[wordcount - 1]);
+ if (!tmp_host) {
+ gf_log("cli", GF_LOG_ERROR, "Out of memory");
+ ret = -1;
+ goto out;
+ }
+ get_host_name(tmp_host, &host_name);
+ if (!host_name) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR,
+ "Unable to retrieve "
+ "hostname");
+ goto out;
+ }
+
+ if (!(strcmp(host_name, "localhost") && strcmp(host_name, "127.0.0.1") &&
+ strncmp(host_name, "0.", 2))) {
+ cli_err(
+ "Please provide a valid hostname/ip other "
+ "than localhost, 127.0.0.1 or loopback "
+ "address (0.0.0.0 to 0.255.255.255).");
+ ret = -1;
+ goto out;
+ }
+ if (!valid_internet_address(host_name, _gf_false, _gf_false)) {
+ cli_err(
+ "internet address '%s' does not conform to "
+ "standards",
+ host_name);
+ }
+
+ brick = GF_MALLOC(sizeof(cli_brick_t), gf_common_list_node);
+ if (brick == NULL) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Out of memory");
+ goto out;
+ }
- if (!(strcmp (host_name, "localhost") &&
- strcmp (host_name, "127.0.0.1"))) {
- cli_err ("Please provide a valid hostname/ip other "
- "than localhost or 127.0.0.1");
- ret = -1;
- GF_FREE (tmp_host);
- goto out;
- }
- if (!valid_internet_address (host_name, _gf_false)) {
- cli_err ("internet address '%s' does not conform to "
- "standards", host_name);
- }
- GF_FREE (tmp_host);
- tmp_list = gf_strdup (brick_list + 1);
- if (free_list_ptr) {
- GF_FREE (free_list_ptr);
- free_list_ptr = NULL;
- }
- free_list_ptr = tmp_list;
- j = 0;
- while(j < *brick_count) {
- strtok_r (tmp_list, " ", &tmpptr);
- if (!(strcmp (tmp_list, words[brick_index]))) {
- ret = -1;
- cli_err ("Found duplicate"
- " exports %s",words[brick_index]);
- goto out;
- }
- tmp_list = tmpptr;
- j++;
- }
- strcat (brick_list, words[brick_index]);
- strcat (brick_list, " ");
- brick_list_len += (strlen (words[brick_index]) + 1);
- ++(*brick_count);
- ++brick_index;
- }
+ brick->name = words[wordcount - 1];
+ brick->len = strlen(words[wordcount - 1]);
+ *ta_brick = GF_MALLOC(brick->len + 3, gf_common_mt_char);
+ if (*ta_brick == NULL) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Out of memory");
+ goto out;
+ }
- *bricks = gf_strdup (brick_list);
- if (!*bricks)
- ret = -1;
+ strcat(*ta_brick, " ");
+ strcat(*ta_brick, brick->name);
+ strcat(*ta_brick, " ");
out:
- if (free_list_ptr)
- GF_FREE (free_list_ptr);
- return ret;
+ if (tmp_host) {
+ GF_FREE(tmp_host);
+ tmp_host = NULL;
+ }
+ if (brick) {
+ GF_FREE(brick);
+ brick = NULL;
+ }
+
+ return ret;
}
int32_t
-cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options)
+cli_cmd_bricks_parse(const char **words, int wordcount, int brick_index,
+ char **bricks, int *brick_count)
{
- dict_t *dict = NULL;
- char *volname = NULL;
- int ret = -1;
- gf1_cluster_type type = GF_CLUSTER_TYPE_NONE;
- int count = 1;
- int sub_count = 1;
- int brick_index = 0;
- int i = 0;
- char *trans_type = NULL;
- int32_t index = 0;
- char *bricks = NULL;
- int32_t brick_count = 0;
- char *opwords[] = { "replica", "stripe", "transport", NULL };
- char *w = NULL;
- int op_count = 0;
- int32_t replica_count = 1;
- int32_t stripe_count = 1;
-
- GF_ASSERT (words);
- GF_ASSERT (options);
+ int ret = 0;
+ char *delimiter = NULL;
+ char *host_name = NULL;
+ char *tmp_host = NULL;
+ char *bricks_str = NULL;
+ int len = 0;
+ int brick_list_len = 1; /* For initial space */
+ struct list_head brick_list = {
+ 0,
+ };
+ cli_brick_t *brick = NULL;
+
+ GF_ASSERT(words);
+ GF_ASSERT(wordcount);
+ GF_ASSERT(bricks);
+ GF_ASSERT(brick_index > 0);
+ GF_ASSERT(brick_index < wordcount);
+
+ INIT_LIST_HEAD(&brick_list);
+
+ while (brick_index < wordcount) {
+ if (validate_brick_name((char *)words[brick_index])) {
+ cli_err(
+ "Wrong brick type: %s, use <HOSTNAME>:"
+ "<export-dir-abs-path>",
+ words[brick_index]);
+ ret = -1;
+ goto out;
+ } else {
+ delimiter = strrchr(words[brick_index], ':');
+ ret = gf_canonicalize_path(delimiter + 1);
+ if (ret)
+ goto out;
+ }
- dict = dict_new ();
+ tmp_host = gf_strdup((char *)words[brick_index]);
+ if (!tmp_host) {
+ gf_log("cli", GF_LOG_ERROR, "Out of memory");
+ ret = -1;
+ goto out;
+ }
+ get_host_name(tmp_host, &host_name);
+ if (!host_name) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR,
+ "Unable to allocate "
+ "memory");
+ GF_FREE(tmp_host);
+ goto out;
+ }
- if (!dict)
+ if (!(strcmp(host_name, "localhost") &&
+ strcmp(host_name, "127.0.0.1") && strncmp(host_name, "0.", 2))) {
+ cli_err(
+ "Please provide a valid hostname/ip other "
+ "than localhost, 127.0.0.1 or loopback "
+ "address (0.0.0.0 to 0.255.255.255).");
+ ret = -1;
+ GF_FREE(tmp_host);
+ goto out;
+ }
+ if (!valid_internet_address(host_name, _gf_false, _gf_false)) {
+ cli_err(
+ "internet address '%s' does not conform to "
+ "standards",
+ host_name);
+ }
+ GF_FREE(tmp_host);
+ list_for_each_entry(brick, &brick_list, list)
+ {
+ if (strcmp(brick->name, words[brick_index]) == 0) {
+ ret = -1;
+ cli_err("Found duplicate exports %s", words[brick_index]);
goto out;
+ }
+ }
- if (wordcount < 3)
- goto out;
+ brick = GF_MALLOC(sizeof(cli_brick_t), gf_common_list_node);
+ if (brick == NULL) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Out of memory");
+ goto out;
+ }
+ len = strlen(words[brick_index]);
+ brick->name = words[brick_index];
+ brick->len = len;
+ list_add_tail(&brick->list, &brick_list);
+
+ brick_list_len += len + 1; /* Brick name + space */
+ ++(*brick_count);
+ ++brick_index;
+ }
+
+ /* If brick count is not valid exit here */
+ if (!*brick_count) {
+ cli_err("No bricks specified");
+ ret = -1;
+ goto out;
+ }
- volname = (char *)words[2];
+ brick_list_len++; /* For terminating null char */
- GF_ASSERT (volname);
+ bricks_str = GF_MALLOC(brick_list_len, gf_common_mt_char);
+ if (bricks_str == NULL) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Out of memory");
+ goto out;
+ }
+ *bricks = bricks_str;
+ *bricks_str = ' ';
+ bricks_str++;
+ while (!list_empty(&brick_list)) {
+ brick = list_first_entry(&brick_list, cli_brick_t, list);
+ list_del_init(&brick->list);
+ memcpy(bricks_str, brick->name, brick->len);
+ bricks_str[brick->len] = ' ';
+ bricks_str += brick->len + 1;
+ GF_FREE(brick);
+ }
+ *bricks_str = 0;
- /* Validate the volume name here itself */
- {
- if (volname[0] == '-')
- goto out;
+out:
+ while (!list_empty(&brick_list)) {
+ brick = list_first_entry(&brick_list, cli_brick_t, list);
+ list_del_init(&brick->list);
+ GF_FREE(brick);
+ }
- if (!strcmp (volname, "all")) {
- cli_err ("\"all\" cannot be the name of a volume.");
- goto out;
- }
+ return ret;
+}
- if (strchr (volname, '/'))
- goto out;
+int32_t
+cli_cmd_create_disperse_check(struct cli_state *state, int *disperse,
+ int *redundancy, int *data, int count)
+{
+ int i = 0;
+ int tmp = 0;
+ gf_answer_t answer = GF_ANSWER_NO;
+ char question[128];
+
+ const char *question1 =
+ "There isn't an optimal redundancy value "
+ "for this configuration. Do you want to "
+ "create the volume with redundancy 1 ?";
+
+ const char *question2 =
+ "The optimal redundancy for this "
+ "configuration is %d. Do you want to create "
+ "the volume with this value ?";
+
+ const char *question3 =
+ "This configuration is not optimal on most "
+ "workloads. Do you want to use it ?";
+
+ const char *question4 =
+ "Redundancy for this configuration is %d. "
+ "Do you want to create "
+ "the volume with this value ?";
+
+ if (*data > 0) {
+ if (*disperse > 0 && *redundancy > 0) {
+ if (*disperse != (*data + *redundancy)) {
+ cli_err(
+ "Disperse count(%d) should be equal "
+ "to sum of disperse-data count(%d) and "
+ "redundancy count(%d)",
+ *disperse, *data, *redundancy);
+ return -1;
+ }
+ } else if (*redundancy > 0) {
+ *disperse = *data + *redundancy;
+ } else if (*disperse > 0) {
+ *redundancy = *disperse - *data;
+ } else {
+ if ((count - *data) >= *data) {
+ cli_err(
+ "Please provide redundancy count "
+ "along with disperse-data count");
+ return -1;
+ } else {
+ sprintf(question, question4, count - *data);
+ answer = cli_cmd_get_confirmation(state, question);
+ if (answer == GF_ANSWER_NO)
+ return -1;
+ *redundancy = count - *data;
+ *disperse = count;
+ }
+ }
+ }
- if (strlen (volname) > 512)
- goto out;
+ if (*disperse <= 0) {
+ if (count < 3) {
+ cli_err(
+ "number of bricks must be greater "
+ "than 2");
- for (i = 0; i < strlen (volname); i++)
- if (!isalnum (volname[i]) && (volname[i] != '_') && (volname[i] != '-'))
- goto out;
+ return -1;
}
+ *disperse = count;
+ }
- if (wordcount < 4) {
- ret = -1;
- goto out;
+ if (*redundancy == -1) {
+ tmp = *disperse - 1;
+ for (i = tmp / 2; (i > 0) && ((tmp & -tmp) != tmp); i--, tmp--)
+ ;
+
+ if (i == 0) {
+ answer = cli_cmd_get_confirmation(state, question1);
+ if (answer == GF_ANSWER_NO)
+ return -1;
+
+ *redundancy = 1;
+ } else {
+ *redundancy = *disperse - tmp;
+ if (*redundancy > 1) {
+ sprintf(question, question2, *redundancy);
+ answer = cli_cmd_get_confirmation(state, question);
+ if (answer == GF_ANSWER_NO)
+ return -1;
+ }
}
- type = GF_CLUSTER_TYPE_NONE;
- index = 3;
+ tmp = 0;
+ } else {
+ tmp = *disperse - *redundancy;
+ }
- while (op_count < 3) {
- ret = -1;
- w = str_getunamb (words[index], opwords);
- if (!w) {
- break;
- } else if ((strcmp (w, "replica")) == 0) {
- switch (type) {
- case GF_CLUSTER_TYPE_STRIPE_REPLICATE:
- case GF_CLUSTER_TYPE_REPLICATE:
- cli_err ("replica option given twice");
- goto out;
- case GF_CLUSTER_TYPE_NONE:
- type = GF_CLUSTER_TYPE_REPLICATE;
- break;
- case GF_CLUSTER_TYPE_STRIPE:
- type = GF_CLUSTER_TYPE_STRIPE_REPLICATE;
- break;
- }
+ if ((*redundancy < 1) || (*redundancy > (*disperse - 1) / 2)) {
+ cli_err(
+ "redundancy must be greater than or equal to 1 and "
+ "less than %d for a disperse %d volume",
+ (*disperse + 1) / 2, *disperse);
- if (wordcount < (index+2)) {
- ret = -1;
- goto out;
- }
- replica_count = strtol (words[index+1], NULL, 0);
- if (replica_count < 2) {
- cli_err ("replica count should be greater"
- " than 1");
- ret = -1;
- goto out;
- }
- ret = dict_set_int32 (dict, "replica-count", replica_count);
- if (ret)
- goto out;
-
- index += 2;
-
- } else if ((strcmp (w, "stripe")) == 0) {
- switch (type) {
- case GF_CLUSTER_TYPE_STRIPE_REPLICATE:
- case GF_CLUSTER_TYPE_STRIPE:
- cli_err ("stripe option given twice");
- goto out;
- case GF_CLUSTER_TYPE_NONE:
- type = GF_CLUSTER_TYPE_STRIPE;
- break;
- case GF_CLUSTER_TYPE_REPLICATE:
- type = GF_CLUSTER_TYPE_STRIPE_REPLICATE;
- break;
- }
- if (wordcount < (index + 2)) {
- ret = -1;
- goto out;
- }
- stripe_count = strtol (words[index+1], NULL, 0);
- if (stripe_count < 2) {
- cli_err ("stripe count should be greater"
- " than 1");
- ret = -1;
- goto out;
- }
- ret = dict_set_int32 (dict, "stripe-count", stripe_count);
- if (ret)
- goto out;
+ return -1;
+ }
- index += 2;
+ if ((tmp & -tmp) != tmp) {
+ answer = cli_cmd_get_confirmation(state, question3);
+ if (answer == GF_ANSWER_NO)
+ return -1;
+ }
- } else if ((strcmp (w, "transport")) == 0) {
- if (trans_type) {
- cli_err ("'transport' option given more"
- " than one time");
- goto out;
- }
- if ((strcasecmp (words[index+1], "tcp") == 0)) {
- trans_type = gf_strdup ("tcp");
- } else if ((strcasecmp (words[index+1], "rdma") == 0)) {
- trans_type = gf_strdup ("rdma");
- } else if ((strcasecmp (words[index+1], "tcp,rdma") == 0) ||
- (strcasecmp (words[index+1], "rdma,tcp") == 0)) {
- trans_type = gf_strdup ("tcp,rdma");
- } else {
- gf_log ("", GF_LOG_ERROR, "incorrect transport"
- " protocol specified");
- ret = -1;
- goto out;
- }
- index += 2;
+ return 0;
+}
+
+static int32_t
+cli_validate_disperse_volume(char *word, gf1_cluster_type type,
+ const char **words, int32_t wordcount,
+ int32_t index, int32_t *disperse_count,
+ int32_t *redundancy_count, int32_t *data_count)
+{
+ int ret = -1;
+
+ switch (type) {
+ case GF_CLUSTER_TYPE_NONE:
+ case GF_CLUSTER_TYPE_DISPERSE:
+ if (strcmp(word, "disperse") == 0) {
+ if (*disperse_count >= 0) {
+ cli_err("disperse option given twice");
+ goto out;
+ }
+ if (wordcount < (index + 2)) {
+ goto out;
+ }
+ ret = gf_string2int(words[index + 1], disperse_count);
+ if (ret == -1 && errno == EINVAL) {
+ *disperse_count = 0;
+ ret = 1;
+ } else if (ret == -1) {
+ goto out;
} else {
- GF_ASSERT (!"opword mismatch");
- ret = -1;
+ if (*disperse_count < 3) {
+ cli_err(
+ "disperse count must "
+ "be greater than 2");
goto out;
+ }
+ ret = 2;
}
- op_count++;
- }
+ } else if (strcmp(word, "disperse-data") == 0) {
+ if (*data_count >= 0) {
+ cli_err("disperse-data option given twice");
+ goto out;
+ }
+ if (wordcount < (index + 2)) {
+ goto out;
+ }
+ ret = gf_string2int(words[index + 1], data_count);
+ if (ret == -1 || *data_count < 2) {
+ cli_err("disperse-data must be greater than 1");
+ goto out;
+ }
+ ret = 2;
+ } else if (strcmp(word, "redundancy") == 0) {
+ if (*redundancy_count >= 0) {
+ cli_err("redundancy option given twice");
+ goto out;
+ }
+ if (wordcount < (index + 2)) {
+ goto out;
+ }
+ ret = gf_string2int(words[index + 1], redundancy_count);
+ if (ret == -1 || *redundancy_count < 1) {
+ cli_err("redundancy must be greater than 0");
+ goto out;
+ }
+ ret = 2;
+ }
+ break;
+ case GF_CLUSTER_TYPE_REPLICATE:
+ cli_err(
+ "replicated-dispersed volume is not "
+ "supported");
+ goto out;
+ default:
+ cli_err("Invalid type given");
+ break;
+ }
+out:
+ return ret;
+}
- if (!trans_type)
- trans_type = gf_strdup ("tcp");
+int32_t
+cli_validate_volname(const char *volname)
+{
+ int32_t ret = -1;
+ int32_t i = -1;
+ int volname_len;
+ static const char *const invalid_volnames[] = {"volume",
+ "type",
+ "subvolumes",
+ "option",
+ "end-volume",
+ "all",
+ "volume_not_in_ring",
+ "description",
+ "force",
+ "snap-max-hard-limit",
+ "snap-max-soft-limit",
+ "auto-delete",
+ "activate-on-create",
+ NULL};
+
+ if (volname[0] == '-')
+ goto out;
- sub_count = stripe_count * replica_count;
+ for (i = 0; invalid_volnames[i]; i++) {
+ if (!strcmp(volname, invalid_volnames[i])) {
+ cli_err("\"%s\" cannot be the name of a volume.", volname);
+ goto out;
+ }
+ }
- /* reset the count value now */
- count = 1;
+ if (strchr(volname, '/'))
+ goto out;
- if (index >= wordcount) {
- ret = -1;
- goto out;
+ volname_len = strlen(volname);
+ if (volname_len > GD_VOLUME_NAME_MAX) {
+ cli_err("Volume name exceeds %d characters.", GD_VOLUME_NAME_MAX);
+ goto out;
+ }
+
+ for (i = 0; i < volname_len; i++) {
+ if (!isalnum(volname[i]) && (volname[i] != '_') &&
+ (volname[i] != '-')) {
+ cli_err(
+ "Volume name should not contain \"%c\""
+ " character.\nVolume names can only"
+ "contain alphanumeric, '-' and '_' "
+ "characters.",
+ volname[i]);
+ goto out;
}
+ }
- brick_index = index;
+ ret = 0;
+out:
+ return ret;
+}
- ret = cli_cmd_bricks_parse (words, wordcount, brick_index, &bricks,
- &brick_count);
- if (ret)
- goto out;
+int32_t
+cli_cmd_volume_create_parse(struct cli_state *state, const char **words,
+ int wordcount, dict_t **options, char **brick_list)
+{
+ dict_t *dict = NULL;
+ char *volname = NULL;
+ int ret = -1;
+ gf1_cluster_type type = GF_CLUSTER_TYPE_NONE;
+ int sub_count = 1;
+ int brick_index = 0;
+ char *trans_type = NULL;
+ int32_t index = 0;
+ char *bricks = NULL;
+ char *ta_brick = NULL;
+ int32_t brick_count = 0;
+ static char *opwords[] = {"replica", "stripe", "transport",
+ "disperse", "redundancy", "disperse-data",
+ "arbiter", "thin-arbiter", NULL};
+
+ char *w = NULL;
+ int op_count = 0;
+ int32_t replica_count = 1;
+ int32_t arbiter_count = 0;
+ int32_t thin_arbiter_count = 0;
+ int32_t stripe_count = 1;
+ int32_t disperse_count = -1;
+ int32_t redundancy_count = -1;
+ int32_t disperse_data_count = -1;
+ gf_boolean_t is_force = _gf_false;
+ int wc = wordcount;
+ gf_answer_t answer = GF_ANSWER_NO;
+ const char *question = NULL;
+
+ GF_ASSERT(words);
+ GF_ASSERT(options);
+
+ dict = dict_new();
+
+ if (!dict)
+ goto out;
+
+ if (wordcount < 3)
+ goto out;
+
+ volname = (char *)words[2];
+
+ GF_ASSERT(volname);
- /* If brick-count is not valid when replica or stripe is
- given, exit here */
- if (!brick_count) {
- cli_err ("No bricks specified");
+ /* Validate the volume name here itself */
+ if (cli_validate_volname(volname) < 0)
+ goto out;
+
+ if (wordcount < 4) {
+ ret = -1;
+ goto out;
+ }
+
+ type = GF_CLUSTER_TYPE_NONE;
+ index = 3;
+
+ while (op_count < 3) {
+ ret = -1;
+ w = str_getunamb(words[index], opwords);
+ if (!w) {
+ break;
+ } else if ((strcmp(w, "replica")) == 0) {
+ switch (type) {
+ case GF_CLUSTER_TYPE_STRIPE_REPLICATE:
+ case GF_CLUSTER_TYPE_REPLICATE:
+ cli_err("replica option given twice");
+ goto out;
+ case GF_CLUSTER_TYPE_NONE:
+ type = GF_CLUSTER_TYPE_REPLICATE;
+ break;
+ case GF_CLUSTER_TYPE_STRIPE:
+ cli_err("stripe option not supported");
+ goto out;
+ case GF_CLUSTER_TYPE_DISPERSE:
+ cli_err(
+ "replicated-dispersed volume is not "
+ "supported");
+ goto out;
+ default:
+ cli_err("Invalid type given");
+ goto out;
+ }
+
+ if (wordcount < (index + 2)) {
ret = -1;
goto out;
- }
+ }
- if (brick_count % sub_count) {
- if (type == GF_CLUSTER_TYPE_STRIPE)
- cli_err ("number of bricks is not a multiple of "
- "stripe count");
- else if (type == GF_CLUSTER_TYPE_REPLICATE)
- cli_err ("number of bricks is not a multiple of "
- "replica count");
- else
- cli_err ("number of bricks given doesn't match "
- "required count");
+ replica_count = strtol(words[index + 1], NULL, 0);
+ if (replica_count < 2) {
+ cli_err(
+ "replica count should be greater"
+ " than 1");
+ ret = -1;
+ goto out;
+ }
+
+ index += 2;
+ if (words[index]) {
+ if (!strcmp(words[index], "arbiter")) {
+ ret = gf_string2int(words[index + 1], &arbiter_count);
+ if ((ret == -1) || (arbiter_count != 1)) {
+ cli_err(
+ "For arbiter "
+ "configuration, "
+ "replica count must be"
+ " 2 and arbiter count "
+ "must be 1. The 3rd "
+ "brick of the replica "
+ "will be the arbiter");
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_int32(dict, "arbiter-count", arbiter_count);
+ if (ret)
+ goto out;
+ index += 2;
+ } else if (!strcmp(words[index], "thin-arbiter")) {
+ ret = gf_string2int(words[index + 1], &thin_arbiter_count);
+ if ((ret == -1) || (thin_arbiter_count != 1) ||
+ (replica_count != 2)) {
+ cli_err(
+ "For thin-arbiter "
+ "configuration, "
+ "replica count must be"
+ " 2 and thin-arbiter count "
+ "must be 1. The 3rd "
+ "brick of the replica "
+ "will be the thin-arbiter brick");
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_int32(dict, "thin-arbiter-count",
+ thin_arbiter_count);
+ if (ret)
+ goto out;
+ index += 2;
+ }
+ }
+
+ /* Do this to keep glusterd happy with sending
+ "replica 3 arbiter 1" options to server */
+ if ((arbiter_count == 1) && (replica_count == 2))
+ replica_count += arbiter_count;
+
+ if (replica_count == 2 && thin_arbiter_count == 0) {
+ if (strcmp(words[wordcount - 1], "force")) {
+ question =
+ "Replica 2 volumes are prone"
+ " to split-brain. Use "
+ "Arbiter or Replica 3 to "
+ "avoid this. See: "
+ "http://docs.gluster.org/en/latest/"
+ "Administrator%20Guide/"
+ "Split%20brain%20and%20ways%20to%20deal%20with%20it/."
+ "\nDo you still want to "
+ "continue?\n";
+ answer = cli_cmd_get_confirmation(state, question);
+ if (GF_ANSWER_NO == answer) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Volume create "
+ "cancelled, exiting");
+ ret = -1;
+ goto out;
+ }
+ }
+ }
+ ret = dict_set_int32(dict, "replica-count", replica_count);
+ if (ret)
+ goto out;
+ } else if ((strcmp(w, "stripe")) == 0) {
+ cli_err("stripe option not supported");
+ goto out;
+ } else if ((strcmp(w, "transport")) == 0) {
+ if (trans_type) {
+ cli_err(
+ "'transport' option given more"
+ " than one time");
+ goto out;
+ }
+ if ((strcasecmp(words[index + 1], "tcp") == 0)) {
+ trans_type = gf_strdup("tcp");
+ } else if ((strcasecmp(words[index + 1], "rdma") == 0)) {
+ trans_type = gf_strdup("rdma");
+ } else if ((strcasecmp(words[index + 1], "tcp,rdma") == 0) ||
+ (strcasecmp(words[index + 1], "rdma,tcp") == 0)) {
+ trans_type = gf_strdup("tcp,rdma");
+ } else {
+ gf_log("", GF_LOG_ERROR,
+ "incorrect transport"
+ " protocol specified");
ret = -1;
goto out;
+ }
+ index += 2;
+
+ } else if ((strcmp(w, "disperse") == 0) ||
+ (strcmp(w, "redundancy") == 0) ||
+ (strcmp(w, "disperse-data") == 0)) {
+ ret = cli_validate_disperse_volume(
+ w, type, words, wordcount, index, &disperse_count,
+ &redundancy_count, &disperse_data_count);
+ if (ret < 0)
+ goto out;
+ index += ret;
+ type = GF_CLUSTER_TYPE_DISPERSE;
+ } else if ((strcmp(w, "arbiter") == 0)) {
+ cli_err(
+ "arbiter option must be preceded by replica "
+ "option.");
+ ret = -1;
+ goto out;
+ } else if ((strcmp(w, "thin-arbiter") == 0)) {
+ cli_err(
+ "thin-arbiter option must be preceded by replica "
+ "option.");
+ ret = -1;
+ goto out;
+ } else {
+ GF_ASSERT(!"opword mismatch");
+ ret = -1;
+ goto out;
}
+ op_count++;
+ }
- /* Everything if parsed fine. start setting info in dict */
- ret = dict_set_str (dict, "volname", volname);
- if (ret)
- goto out;
+ if (!trans_type)
+ trans_type = gf_strdup("tcp");
- ret = dict_set_int32 (dict, "type", type);
- if (ret)
- goto out;
+ if (index >= wordcount) {
+ ret = -1;
+ goto out;
+ }
+
+ brick_index = index;
- ret = dict_set_dynstr (dict, "transport", trans_type);
+ if (strcmp(words[wordcount - 1], "force") == 0) {
+ is_force = _gf_true;
+ wc = wordcount - 1;
+ }
+
+ // Exclude the thin-arbiter-brick i.e. last brick in the bricks list
+ if (thin_arbiter_count == 1) {
+ ret = cli_cmd_bricks_parse(words, wc - 1, brick_index, &bricks,
+ &brick_count);
if (ret)
- goto out;
+ goto out;
+
+ ret = cli_cmd_ta_brick_parse(words, wc, &ta_brick);
+ } else {
+ ret = cli_cmd_bricks_parse(words, wc, brick_index, &bricks,
+ &brick_count);
+ }
- ret = dict_set_dynstr (dict, "bricks", bricks);
+ if (ret)
+ goto out;
+
+ if (type == GF_CLUSTER_TYPE_DISPERSE) {
+ ret = cli_cmd_create_disperse_check(state, &disperse_count,
+ &redundancy_count,
+ &disperse_data_count, brick_count);
+ if (!ret)
+ ret = dict_set_int32(dict, "disperse-count", disperse_count);
+ if (!ret)
+ ret = dict_set_int32(dict, "redundancy-count", redundancy_count);
if (ret)
- goto out;
+ goto out;
+
+ sub_count = disperse_count;
+ } else
+ sub_count = stripe_count * replica_count;
+
+ if (brick_count % sub_count) {
+ if (type == GF_CLUSTER_TYPE_STRIPE)
+ cli_err(
+ "number of bricks is not a multiple of "
+ "stripe count");
+ else if (type == GF_CLUSTER_TYPE_REPLICATE)
+ cli_err(
+ "number of bricks is not a multiple of "
+ "replica count");
+ else if (type == GF_CLUSTER_TYPE_DISPERSE)
+ cli_err(
+ "number of bricks is not a multiple of "
+ "disperse count");
+ else
+ cli_err(
+ "number of bricks given doesn't match "
+ "required count");
+
+ ret = -1;
+ goto out;
+ }
- ret = dict_set_int32 (dict, "count", brick_count);
+ /* Everything is parsed fine. start setting info in dict */
+ ret = dict_set_str(dict, "volname", volname);
+ if (ret)
+ goto out;
+
+ ret = dict_set_int32(dict, "type", type);
+ if (ret)
+ goto out;
+
+ ret = dict_set_dynstr(dict, "transport", trans_type);
+ if (ret)
+ goto out;
+ trans_type = NULL;
+
+ ret = dict_set_dynstr(dict, "bricks", bricks);
+ if (ret)
+ goto out;
+
+ if (thin_arbiter_count == 1) {
+ ret = dict_set_dynstr(dict, "ta-brick", ta_brick);
if (ret)
- goto out;
+ goto out;
+ }
- *options = dict;
+ ret = dict_set_int32(dict, "count", brick_count);
+ if (ret)
+ goto out;
+
+ ret = dict_set_int32(dict, "force", is_force);
+ if (ret)
+ goto out;
+ *options = dict;
+ *brick_list = bricks;
out:
- if (ret) {
- gf_log ("cli", GF_LOG_ERROR, "Unable to parse create volume CLI");
- if (dict)
- dict_destroy (dict);
- }
- return ret;
+ if (ret) {
+ GF_FREE(bricks);
+ GF_FREE(ta_brick);
+ gf_log("cli", GF_LOG_ERROR, "Unable to parse create volume CLI");
+ if (dict)
+ dict_unref(dict);
+ }
+
+ GF_FREE(trans_type);
+
+ return ret;
}
int32_t
-cli_cmd_volume_reset_parse (const char **words, int wordcount, dict_t **options)
+cli_cmd_volume_reset_parse(const char **words, int wordcount, dict_t **options)
{
- dict_t *dict = NULL;
- char *volname = NULL;
- int ret = -1;
+ dict_t *dict = NULL;
+ char *volname = NULL;
+ int ret = -1;
- GF_ASSERT (words);
- GF_ASSERT (options);
+ GF_ASSERT(words);
+ GF_ASSERT(options);
- dict = dict_new ();
+ dict = dict_new();
- if (!dict)
- goto out;
+ if (!dict)
+ goto out;
- if (wordcount < 3)
- goto out;
+ if (wordcount < 3)
+ goto out;
- if (wordcount > 5)
- goto out;
+ if (wordcount > 5)
+ goto out;
- volname = (char *)words[2];
+ volname = (char *)words[2];
- if (!volname) {
- ret = -1;
- goto out;
- }
+ if (!volname) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_str(dict, "volname", volname);
+ if (ret)
+ goto out;
- ret = dict_set_str (dict, "volname", volname);
+ if (wordcount == 3) {
+ ret = dict_set_str(dict, "key", "all");
if (ret)
- goto out;
+ goto out;
+ }
- if (wordcount == 3) {
- ret = dict_set_str (dict, "key", "all");
- if (ret)
- goto out;
+ if (wordcount >= 4) {
+ if (!strcmp("force", (char *)words[3])) {
+ ret = dict_set_int32(dict, "force", 1);
+ if (ret)
+ goto out;
+ ret = dict_set_str(dict, "key", "all");
+ if (ret)
+ goto out;
+ } else {
+ ret = dict_set_str(dict, "key", (char *)words[3]);
+ if (ret)
+ goto out;
}
+ }
- if (wordcount >= 4) {
- if (!strcmp ("force", (char*)words[3])) {
- ret = dict_set_int32 (dict, "force", 1);
- if (ret)
- goto out;
- ret = dict_set_str (dict, "key", "all");
- if (ret)
- goto out;
- } else {
- ret = dict_set_str (dict, "key", (char *)words[3]);
- if (ret)
- goto out;
- }
+ if (wordcount == 5) {
+ if (strcmp("force", (char *)words[4])) {
+ ret = -1;
+ goto out;
+ } else {
+ ret = dict_set_int32(dict, "force", 1);
+ if (ret)
+ goto out;
}
+ }
+
+ *options = dict;
+
+out:
+ if (ret && dict) {
+ dict_unref(dict);
+ }
+
+ return ret;
+}
+
+int32_t
+cli_cmd_get_state_parse(struct cli_state *state, const char **words,
+ int wordcount, dict_t **options, char **op_errstr)
+{
+ dict_t *dict = NULL;
+ int ret = -1;
+ char *odir = NULL;
+ char *filename = NULL;
+ char *daemon_name = NULL;
+ int count = 0;
+ uint32_t cmd = 0;
+
+ GF_VALIDATE_OR_GOTO("cli", options, out);
+ GF_VALIDATE_OR_GOTO("cli", words, out);
+
+ dict = dict_new();
+ if (!dict)
+ goto out;
- if (wordcount == 5) {
- if (strcmp ("force", (char*)words[4])) {
+ if (wordcount < 1 || wordcount > 7) {
+ *op_errstr = gf_strdup(
+ "Problem parsing arguments."
+ " Check usage.");
+ goto out;
+ }
+
+ if (wordcount >= 1) {
+ gf_asprintf(&daemon_name, "%s", "glusterd");
+
+ for (count = 1; count < wordcount; count++) {
+ if (strcmp(words[count], "odir") == 0 ||
+ strcmp(words[count], "file") == 0) {
+ if (strcmp(words[count], "odir") == 0) {
+ if (++count < wordcount) {
+ odir = (char *)words[count];
+ continue;
+ } else {
ret = -1;
goto out;
+ }
+ } else if (strcmp(words[count], "file") == 0) {
+ if (++count < wordcount) {
+ filename = (char *)words[count];
+ continue;
+ } else {
+ ret = -1;
+ goto out;
+ }
+ }
+ } else {
+ if (count > 1) {
+ if (count == wordcount - 1) {
+ if (strcmp(words[count], "detail") == 0) {
+ cmd = GF_CLI_GET_STATE_DETAIL;
+ continue;
+ } else if (strcmp(words[count], "volumeoptions") == 0) {
+ cmd = GF_CLI_GET_STATE_VOLOPTS;
+ continue;
+ }
+ } else {
+ *op_errstr = gf_strdup(
+ "Problem"
+ " parsing arguments. "
+ "Check usage.");
+ ret = -1;
+ goto out;
+ }
+ }
+ if (strcmp(words[count], "glusterd") == 0) {
+ continue;
} else {
- ret = dict_set_int32 (dict, "force", 1);
- if (ret)
- goto out;
+ if (count == wordcount - 1) {
+ if (strcmp(words[count], "detail") == 0) {
+ cmd = GF_CLI_GET_STATE_DETAIL;
+ continue;
+ } else if (strcmp(words[count], "volumeoptions") == 0) {
+ cmd = GF_CLI_GET_STATE_VOLOPTS;
+ continue;
+ }
+ }
+
+ *op_errstr = gf_strdup(
+ "glusterd is "
+ "the only supported daemon.");
+ ret = -1;
+ goto out;
}
+ }
}
- *options = dict;
+ ret = dict_set_dynstr(dict, "daemon", daemon_name);
+ if (ret) {
+ *op_errstr = gf_strdup(
+ "Command failed. Please check "
+ " log file for more details.");
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "Setting daemon name to dictionary failed");
+ goto out;
+ }
+ daemon_name = NULL;
+
+ if (odir) {
+ ret = dict_set_str(dict, "odir", odir);
+ if (ret) {
+ *op_errstr = gf_strdup(
+ "Command failed. Please"
+ " check log file for"
+ " more details.");
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "Setting output directory to"
+ "dictionary failed");
+ goto out;
+ }
+ }
-out:
- if (ret && dict) {
- dict_destroy (dict);
+ if (filename) {
+ ret = dict_set_str(dict, "filename", filename);
+ if (ret) {
+ *op_errstr = gf_strdup(
+ "Command failed. Please"
+ " check log file for"
+ " more details.");
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "Setting filename to dictionary failed");
+ goto out;
+ }
}
- return ret;
+ if (cmd) {
+ ret = dict_set_uint32(dict, "getstate-cmd", cmd);
+ if (ret) {
+ *op_errstr = gf_strdup(
+ "Command failed. Please"
+ " check log file for"
+ " more details.");
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "Setting "
+ "get-state command type to dictionary "
+ "failed");
+ goto out;
+ }
+ }
+ }
+
+out:
+ if (dict)
+ *options = dict;
+
+ if (ret && dict)
+ dict_unref(dict);
+
+ GF_FREE(daemon_name);
+
+ return ret;
}
int32_t
-cli_cmd_quota_parse (const char **words, int wordcount, dict_t **options)
+cli_cmd_inode_quota_parse(const char **words, int wordcount, dict_t **options)
{
- dict_t *dict = NULL;
- char *volname = NULL;
- int ret = -1;
- int i = 0;
- char key[20] = {0, };
- uint64_t value = 0;
- gf_quota_type type = GF_QUOTA_OPTION_TYPE_NONE;
- char *opwords[] = { "enable", "disable", "limit-usage",
- "remove", "list", "version", NULL };
- char *w = NULL;
+ dict_t *dict = NULL;
+ char *volname = NULL;
+ int ret = -1;
- GF_ASSERT (words);
- GF_ASSERT (options);
+ GF_ASSERT(words);
+ GF_ASSERT(options);
- dict = dict_new ();
- if (!dict)
- goto out;
+ dict = dict_new();
+ if (!dict) {
+ gf_log("cli", GF_LOG_ERROR, "dict_new failed");
+ goto out;
+ }
- if (wordcount < 4)
- goto out;
+ if (wordcount != 4)
+ goto out;
- volname = (char *)words[2];
- if (!volname) {
- ret = -1;
- goto out;
+ volname = (char *)words[2];
+ if (!volname) {
+ ret = -1;
+ goto out;
+ }
+
+ /* Validate the volume name here itself */
+ if (cli_validate_volname(volname) < 0)
+ goto out;
+
+ ret = dict_set_str(dict, "volname", volname);
+ if (ret < 0)
+ goto out;
+
+ if (strcmp(words[3], "enable") != 0) {
+ cli_out("Invalid quota option : %s", words[3]);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_int32(dict, "type", GF_QUOTA_OPTION_TYPE_ENABLE_OBJECTS);
+ if (ret < 0)
+ goto out;
+
+ *options = dict;
+out:
+ if (ret < 0) {
+ if (dict)
+ dict_unref(dict);
+ }
+
+ return ret;
+}
+
+int32_t
+cli_cmd_quota_parse(const char **words, int wordcount, dict_t **options)
+{
+ dict_t *dict = NULL;
+ char *volname = NULL;
+ int ret = -1;
+ int i = -1;
+ char key[20] = {
+ 0,
+ };
+ int64_t value = 0;
+ gf_quota_type type = GF_QUOTA_OPTION_TYPE_NONE;
+ static char *opwords[] = {"enable",
+ "disable",
+ "limit-usage",
+ "remove",
+ "list",
+ "alert-time",
+ "soft-timeout",
+ "hard-timeout",
+ "default-soft-limit",
+ "limit-objects",
+ "list-objects",
+ "remove-objects",
+ NULL};
+ char *w = NULL;
+ uint32_t time = 0;
+ double percent = 0;
+ char *end_ptr = NULL;
+ int64_t limit = 0;
+
+ GF_ASSERT(words);
+ GF_ASSERT(options);
+
+ dict = dict_new();
+ if (!dict) {
+ gf_log("cli", GF_LOG_ERROR, "dict_new failed");
+ goto out;
+ }
+
+ if (wordcount < 4) {
+ if ((wordcount == 3) && !(strcmp(words[2], "help"))) {
+ ret = 1;
}
+ goto out;
+ }
- /* Validate the volume name here itself */
- {
- if (volname[0] == '-')
- goto out;
+ volname = (char *)words[2];
+ if (!volname) {
+ ret = -1;
+ goto out;
+ }
- if (!strcmp (volname, "all")) {
- cli_err ("\"all\" cannot be the name of a volume.");
- goto out;
- }
+ /* Validate the volume name here itself */
+ if (cli_validate_volname(volname) < 0)
+ goto out;
- if (strchr (volname, '/'))
- goto out;
+ ret = dict_set_str(dict, "volname", volname);
+ if (ret < 0)
+ goto out;
- if (strlen (volname) > 512)
- goto out;
+ w = str_getunamb(words[3], opwords);
+ if (!w) {
+ cli_out("Invalid quota option : %s", words[3]);
+ ret = -1;
+ goto out;
+ }
+
+ if (strcmp(w, "enable") == 0) {
+ if (wordcount == 4) {
+ type = GF_QUOTA_OPTION_TYPE_ENABLE;
+ ret = 0;
+ goto set_type;
+ } else {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ if (strcmp(w, "disable") == 0) {
+ if (wordcount == 4) {
+ type = GF_QUOTA_OPTION_TYPE_DISABLE;
+ ret = 0;
+ goto set_type;
+ } else {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ if (strcmp(w, "limit-usage") == 0) {
+ type = GF_QUOTA_OPTION_TYPE_LIMIT_USAGE;
+ } else if (strcmp(w, "limit-objects") == 0) {
+ type = GF_QUOTA_OPTION_TYPE_LIMIT_OBJECTS;
+ }
+
+ if (type == GF_QUOTA_OPTION_TYPE_LIMIT_USAGE ||
+ type == GF_QUOTA_OPTION_TYPE_LIMIT_OBJECTS) {
+ if (wordcount < 6 || wordcount > 7) {
+ ret = -1;
+ goto out;
+ }
- for (i = 0; i < strlen (volname); i++)
- if (!isalnum (volname[i]) && (volname[i] != '_') && (volname[i] != '-'))
- goto out;
+ if (words[4][0] != '/') {
+ cli_err("Please enter absolute path");
+ ret = -1;
+ goto out;
}
+ ret = dict_set_str(dict, "path", (char *)words[4]);
+ if (ret)
+ goto out;
- ret = dict_set_str (dict, "volname", volname);
+ if (!words[5]) {
+ cli_err("Please enter the limit value to be set");
+ ret = -1;
+ goto out;
+ }
+
+ if (type == GF_QUOTA_OPTION_TYPE_LIMIT_USAGE) {
+ ret = gf_string2bytesize_int64(words[5], &value);
+ if (ret != 0 || value <= 0) {
+ if (errno == ERANGE || value <= 0) {
+ ret = -1;
+ cli_err(
+ "Please enter an integer "
+ "value in the range of "
+ "(1 - %" PRId64 ")",
+ INT64_MAX);
+ } else
+ cli_err(
+ "Please enter a correct "
+ "value");
+ goto out;
+ }
+ } else {
+ errno = 0;
+ limit = strtol(words[5], &end_ptr, 10);
+ if (errno == ERANGE || errno == EINVAL || limit <= 0 ||
+ strcmp(end_ptr, "") != 0) {
+ ret = -1;
+ cli_err(
+ "Please enter an integer value in "
+ "the range 1 - %" PRId64,
+ INT64_MAX);
+ goto out;
+ }
+ }
+
+ ret = dict_set_str(dict, "hard-limit", (char *)words[5]);
if (ret < 0)
+ goto out;
+
+ if (wordcount == 7) {
+ ret = gf_string2percent(words[6], &percent);
+ if (ret != 0 || percent > 100) {
+ ret = -1;
+ cli_err(
+ "Please enter a correct value "
+ "in the range of 0 to 100");
goto out;
+ }
- w = str_getunamb (words[3], opwords);
- if (!w) {
- ret = - 1;
+ ret = dict_set_str(dict, "soft-limit", (char *)words[6]);
+ if (ret < 0)
goto out;
}
- if ((strcmp (w, "enable")) == 0 && wordcount == 4) {
- type = GF_QUOTA_OPTION_TYPE_ENABLE;
- ret = 0;
- goto set_type;
- }
+ goto set_type;
+ }
- if (strcmp (w, "disable") == 0 && wordcount == 4) {
- type = GF_QUOTA_OPTION_TYPE_DISABLE;
- ret = 0;
- goto set_type;
+ if (strcmp(w, "remove") == 0) {
+ if (wordcount != 5) {
+ ret = -1;
+ goto out;
}
- if (strcmp (w, "limit-usage") == 0) {
- if (wordcount != 6) {
- ret = -1;
- goto out;
- }
+ type = GF_QUOTA_OPTION_TYPE_REMOVE;
+
+ if (words[4][0] != '/') {
+ cli_err("Please enter absolute path");
+ ret = -1;
+ goto out;
+ }
- type = GF_QUOTA_OPTION_TYPE_LIMIT_USAGE;
+ ret = dict_set_str(dict, "path", (char *)words[4]);
+ if (ret < 0)
+ goto out;
+ goto set_type;
+ }
+
+ if (strcmp(w, "remove-objects") == 0) {
+ if (wordcount != 5) {
+ ret = -1;
+ goto out;
+ }
- if (words[4][0] != '/') {
- cli_err ("Please enter absolute path");
+ type = GF_QUOTA_OPTION_TYPE_REMOVE_OBJECTS;
- return -2;
- }
- ret = dict_set_str (dict, "path", (char *) words[4]);
- if (ret)
- goto out;
+ if (words[4][0] != '/') {
+ cli_err("Please enter absolute path");
+ ret = -1;
+ goto out;
+ }
- if (!words[5]) {
- cli_err ("Please enter the limit value to be set");
+ ret = dict_set_str(dict, "path", (char *)words[4]);
+ if (ret < 0)
+ goto out;
+ goto set_type;
+ }
- return -2;
- }
+ if (strcmp(w, "list") == 0) {
+ type = GF_QUOTA_OPTION_TYPE_LIST;
- ret = gf_string2bytesize (words[5], &value);
- if (ret != 0) {
- cli_err ("Please enter a correct value");
- return -1;
- }
+ if (words[4] && words[4][0] != '/') {
+ cli_err("Please enter absolute path");
+ ret = -1;
+ goto out;
+ }
- ret = dict_set_str (dict, "limit", (char *) words[5]);
- if (ret < 0)
- goto out;
+ i = 4;
+ while (i < wordcount) {
+ snprintf(key, 20, "path%d", i - 4);
- goto set_type;
+ ret = dict_set_str(dict, key, (char *)words[i++]);
+ if (ret < 0)
+ goto out;
}
- if (strcmp (w, "remove") == 0) {
- if (wordcount != 5) {
- ret = -1;
- goto out;
- }
- type = GF_QUOTA_OPTION_TYPE_REMOVE;
+ ret = dict_set_int32(dict, "count", i - 4);
+ if (ret < 0)
+ goto out;
- if (words[4][0] != '/') {
- cli_err ("Please enter absolute path");
+ goto set_type;
+ }
- return -2;
- }
+ if (strcmp(w, "list-objects") == 0) {
+ type = GF_QUOTA_OPTION_TYPE_LIST_OBJECTS;
- ret = dict_set_str (dict, "path", (char *) words[4]);
- if (ret < 0)
- goto out;
- goto set_type;
+ i = 4;
+ while (i < wordcount) {
+ snprintf(key, 20, "path%d", i - 4);
+
+ ret = dict_set_str(dict, key, (char *)words[i++]);
+ if (ret < 0) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to set "
+ "quota patch in request dictionary");
+ goto out;
+ }
}
- if (strcmp (w, "list") == 0) {
- if (wordcount < 4) {
- ret = -1;
- goto out;
- }
+ ret = dict_set_int32(dict, "count", i - 4);
+ if (ret < 0) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to set quota "
+ "limit count in request dictionary");
+ goto out;
+ }
- type = GF_QUOTA_OPTION_TYPE_LIST;
+ goto set_type;
+ }
- i = 4;
- while (i < wordcount) {
- snprintf (key, 20, "path%d", i-4);
+ if (strcmp(w, "alert-time") == 0) {
+ if (wordcount != 5) {
+ ret = -1;
+ goto out;
+ }
+ type = GF_QUOTA_OPTION_TYPE_ALERT_TIME;
- ret = dict_set_str (dict, key, (char *) words [i++]);
- if (ret < 0)
- goto out;
- }
+ ret = gf_string2time(words[4], &time);
+ if (ret) {
+ cli_err(
+ "Invalid argument %s. Please enter a valid "
+ "string",
+ words[4]);
+ goto out;
+ }
- ret = dict_set_int32 (dict, "count", i - 4);
- if (ret < 0)
- goto out;
+ ret = dict_set_str(dict, "value", (char *)words[4]);
+ if (ret < 0)
+ goto out;
+ goto set_type;
+ }
+
+ if (strcmp(w, "soft-timeout") == 0) {
+ if (wordcount != 5) {
+ ret = -1;
+ goto out;
+ }
+ type = GF_QUOTA_OPTION_TYPE_SOFT_TIMEOUT;
- goto set_type;
+ ret = gf_string2time(words[4], &time);
+ if (ret) {
+ cli_err(
+ "Invalid argument %s. Please enter a valid "
+ "string",
+ words[4]);
+ goto out;
}
- if (strcmp (w, "version") == 0) {
- type = GF_QUOTA_OPTION_TYPE_VERSION;
- } else {
- GF_ASSERT (!"opword mismatch");
+ ret = dict_set_str(dict, "value", (char *)words[4]);
+ if (ret < 0)
+ goto out;
+ goto set_type;
+ }
+
+ if (strcmp(w, "hard-timeout") == 0) {
+ if (wordcount != 5) {
+ ret = -1;
+ goto out;
}
+ type = GF_QUOTA_OPTION_TYPE_HARD_TIMEOUT;
-set_type:
- ret = dict_set_int32 (dict, "type", type);
+ ret = gf_string2time(words[4], &time);
+ if (ret) {
+ cli_err(
+ "Invalid argument %s. Please enter a valid "
+ "string",
+ words[4]);
+ goto out;
+ }
+
+ ret = dict_set_str(dict, "value", (char *)words[4]);
if (ret < 0)
- goto out;
+ goto out;
+ goto set_type;
+ }
+ if (strcmp(w, "default-soft-limit") == 0) {
+ if (wordcount != 5) {
+ ret = -1;
+ goto out;
+ }
+ type = GF_QUOTA_OPTION_TYPE_DEFAULT_SOFT_LIMIT;
- *options = dict;
+ ret = dict_set_str(dict, "value", (char *)words[4]);
+ if (ret < 0)
+ goto out;
+ goto set_type;
+ } else {
+ GF_ASSERT(!"opword mismatch");
+ }
+
+set_type:
+ ret = dict_set_int32(dict, "type", type);
+ if (ret < 0)
+ goto out;
+
+ *options = dict;
out:
- if (ret < 0) {
- if (dict)
- dict_destroy (dict);
- }
+ if (ret < 0) {
+ if (dict)
+ dict_unref(dict);
+ }
- return ret;
+ return ret;
}
-static inline gf_boolean_t
-cli_is_key_spl (char *key)
+static gf_boolean_t
+cli_is_key_spl(char *key)
{
- return (strcmp (key, "group") == 0);
+ return (strcmp(key, "group") == 0);
}
-#define GLUSTERD_DEFAULT_WORKDIR "/var/lib/glusterd"
-static int
-cli_add_key_group (dict_t *dict, char *key, char *value)
+static int32_t
+cli_add_key_group_value(dict_t *dict, const char *name, const char *value,
+ int32_t id, char **op_errstr)
{
- int ret = -1;
- int opt_count = 0;
- char iter_key[1024] = {0,};
- char iter_val[1024] = {0,};
- char *saveptr = NULL;
- char *tok_key = NULL;
- char *tok_val = NULL;
- char *dkey = NULL;
- char *dval = NULL;
- char *tagpath = NULL;
- char *buf = NULL;
- char line[PATH_MAX + 256] = {0,};
- FILE *fp = NULL;
-
- ret = gf_asprintf (&tagpath, "%s/groups/%s",
- GLUSTERD_DEFAULT_WORKDIR, value);
- if (ret == -1) {
- tagpath = NULL;
- goto out;
- }
-
- fp = fopen (tagpath, "r");
- if (!fp) {
- ret = -1;
- goto out;
- }
+ char *key = NULL;
+ char *data = NULL;
+ int32_t ret = -1;
+
+ ret = gf_asprintf(&key, "%s%d", name, id);
+ if (ret < 0) {
+ goto out;
+ }
+ data = gf_strdup(value);
+ if (data == NULL) {
+ gf_log(THIS->name, GF_LOG_ERROR, "Failed to allocate memory for data");
+ ret = -1;
+ goto out;
+ }
- opt_count = 0;
- buf = line;
- while (fscanf (fp, "%s", buf) != EOF) {
+ ret = dict_set_dynstr(dict, key, data);
+ if (ret == 0) {
+ data = NULL;
+ }
- opt_count++;
- tok_key = strtok_r (line, "=", &saveptr);
- tok_val = strtok_r (NULL, "=", &saveptr);
- if (!tok_key || !tok_val) {
- ret = -1;
- goto out;
- }
+out:
+ GF_FREE(key);
+ GF_FREE(data);
- snprintf (iter_key, sizeof (iter_key), "key%d", opt_count);
- dkey = gf_strdup (tok_key);
- ret = dict_set_dynstr (dict, iter_key, dkey);
- if (ret)
- goto out;
- dkey = NULL;
+ if ((ret != 0) && (op_errstr != NULL)) {
+ *op_errstr = gf_strdup("Failed to allocate memory");
+ }
- snprintf (iter_val, sizeof (iter_val), "value%d", opt_count);
- dval = gf_strdup (tok_val);
- ret = dict_set_dynstr (dict, iter_val, dval);
- if (ret)
- goto out;
- dval = NULL;
+ return ret;
+}
+static int
+cli_add_key_group(dict_t *dict, char *key, char *value, char **op_errstr)
+{
+ int ret = -1;
+ int opt_count = 0;
+ char *saveptr = NULL;
+ char *tok_key = NULL;
+ char *tok_val = NULL;
+ char *tagpath = NULL;
+ char line[PATH_MAX + 256] = {
+ 0,
+ };
+ FILE *fp = NULL;
+
+ ret = gf_asprintf(&tagpath, "%s/groups/%s", GLUSTERD_DEFAULT_WORKDIR,
+ value);
+ if (ret == -1) {
+ tagpath = NULL;
+ goto out;
+ }
+
+ fp = fopen(tagpath, "r");
+ if (!fp) {
+ ret = -1;
+ if (op_errstr) {
+ gf_asprintf(op_errstr,
+ "Unable to open file '%s'. "
+ "Error: %s",
+ tagpath, strerror(errno));
+ }
+ goto out;
+ }
+
+ opt_count = 0;
+ while (fgets(line, sizeof(line), fp) != NULL) {
+ if (strlen(line) >= sizeof(line) - 1) {
+ ret = -1;
+ if (op_errstr != NULL) {
+ *op_errstr = gf_strdup("Line too long");
+ }
+ goto out;
}
- if (!opt_count) {
- ret = -1;
- goto out;
+ /* Treat line that start with "#" as comments */
+ if ('#' == line[0])
+ continue;
+
+ opt_count++;
+ tok_key = strtok_r(line, "=", &saveptr);
+ tok_val = strtok_r(NULL, "\r\n", &saveptr);
+ if (!tok_key || !tok_val) {
+ ret = -1;
+ if (op_errstr) {
+ gf_asprintf(op_errstr,
+ "'%s' file format "
+ "not valid.",
+ tagpath);
+ }
+ goto out;
}
- ret = dict_set_int32 (dict, "count", opt_count);
-out:
- GF_FREE (tagpath);
+ ret = cli_add_key_group_value(dict, "key", tok_key, opt_count,
+ op_errstr);
+ if (ret != 0) {
+ goto out;
+ }
+ ret = cli_add_key_group_value(dict, "value", tok_val, opt_count,
+ op_errstr);
+ if (ret != 0) {
+ goto out;
+ }
+ }
- if (ret) {
- GF_FREE (dkey);
- GF_FREE (dval);
+ if (!opt_count) {
+ ret = -1;
+ if (op_errstr) {
+ gf_asprintf(op_errstr, "'%s' file format not valid.", tagpath);
}
+ goto out;
+ }
+ ret = dict_set_int32(dict, "count", opt_count);
+out:
- if (fp)
- fclose (fp);
+ GF_FREE(tagpath);
- return ret;
+ if (fp)
+ fclose(fp);
+
+ return ret;
}
-#undef GLUSTERD_DEFAULT_WORKDIR
int32_t
-cli_cmd_volume_set_parse (const char **words, int wordcount, dict_t **options)
+cli_cmd_volume_set_parse(struct cli_state *state, const char **words,
+ int wordcount, dict_t **options, char **op_errstr)
{
- dict_t *dict = NULL;
- char *volname = NULL;
- int ret = -1;
- int count = 0;
- char *key = NULL;
- char *value = NULL;
- int i = 0;
- char str[50] = {0,};
+ dict_t *dict = NULL;
+ char *volname = NULL;
+ int ret = -1;
+ int count = 0;
+ char *key = NULL;
+ char *value = NULL;
+ int i = 0;
+ char str[50] = {
+ 0,
+ };
+ const char *question = NULL;
+ gf_answer_t answer = GF_ANSWER_NO;
+
+ GF_ASSERT(words);
+ GF_ASSERT(options);
+
+ dict = dict_new();
+
+ if (!dict)
+ goto out;
- GF_ASSERT (words);
- GF_ASSERT (options);
+ if (wordcount < 3)
+ goto out;
- dict = dict_new ();
+ volname = (char *)words[2];
- if (!dict)
- goto out;
+ GF_ASSERT(volname);
- if (wordcount < 3)
- goto out;
+ ret = dict_set_str(dict, "volname", volname);
- volname = (char *)words[2];
+ if (ret)
+ goto out;
- GF_ASSERT (volname);
+ if (!strcmp(volname, "all")) {
+ ret = dict_set_str(dict, "globalname", "All");
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR, "dict set on global key failed.");
+ goto out;
+ }
- ret = dict_set_str (dict, "volname", volname);
+ ret = dict_set_int32(dict, "hold_global_locks", _gf_true);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR, "dict set on global key failed.");
+ goto out;
+ }
+ }
+ if ((!strcmp(volname, "help") || !strcmp(volname, "help-xml")) &&
+ wordcount == 3) {
+ ret = dict_set_str(dict, volname, volname);
if (ret)
- goto out;
-
- if ((!strcmp (volname, "help") || !strcmp (volname, "help-xml"))
- && wordcount == 3 ) {
- ret = dict_set_str (dict, volname, volname);
- if (ret)
- goto out;
+ goto out;
- } else if (wordcount < 5) {
- ret = -1;
- goto out;
-
- } else if (wordcount == 5 && cli_is_key_spl ((char *)words[3])) {
- key = (char *) words[3];
- value = (char *) words[4];
- if ( !key || !value) {
- ret = -1;
- goto out;
- }
-
- ret = gf_strip_whitespace (value, strlen (value));
- if (ret == -1)
- goto out;
+ } else if (wordcount < 5) {
+ ret = -1;
+ goto out;
- ret = cli_add_key_group (dict, key, value);
- if (ret == 0)
- *options = dict;
- goto out;
+ } else if (wordcount == 5 && cli_is_key_spl((char *)words[3])) {
+ key = (char *)words[3];
+ value = (char *)words[4];
+ if (!key || !value) {
+ ret = -1;
+ goto out;
}
- for (i = 3; i < wordcount; i+=2) {
+ ret = gf_strip_whitespace(value, strlen(value));
+ if (ret == -1)
+ goto out;
- key = (char *) words[i];
- value = (char *) words[i+1];
+ if (strlen(value) == 0) {
+ ret = -1;
+ goto out;
+ }
- if ( !key || !value) {
- ret = -1;
- goto out;
- }
+ ret = cli_add_key_group(dict, key, value, op_errstr);
+ if (ret == 0)
+ *options = dict;
+ goto out;
+ }
- count++;
+ for (i = 3; i < wordcount; i += 2) {
+ key = (char *)words[i];
+ value = (char *)words[i + 1];
- ret = gf_strip_whitespace (value, strlen (value));
- if (ret == -1)
- goto out;
+ if (!key || !value) {
+ ret = -1;
+ goto out;
+ }
- if (cli_is_key_spl (key)) {
- ret = -1;
- goto out;
- }
+ count++;
- sprintf (str, "key%d", count);
- ret = dict_set_str (dict, str, key);
- if (ret)
- goto out;
+ if (fnmatch("user.*", key, FNM_NOESCAPE) != 0) {
+ ret = gf_strip_whitespace(value, strlen(value));
+ if (ret == -1)
+ goto out;
+ }
- sprintf (str, "value%d", count);
- ret = dict_set_str (dict, str, value);
+ if (strlen(value) == 0) {
+ ret = -1;
+ goto out;
+ }
- if (ret)
- goto out;
+ if (cli_is_key_spl(key)) {
+ ret = -1;
+ goto out;
}
- ret = dict_set_int32 (dict, "count", wordcount-3);
+ sprintf(str, "key%d", count);
+ ret = dict_set_str(dict, str, key);
+ if (ret)
+ goto out;
+
+ sprintf(str, "value%d", count);
+ ret = dict_set_str(dict, str, value);
if (ret)
+ goto out;
+
+ if ((!strcmp(key, "cluster.enable-shared-storage")) &&
+ (!strcmp(value, "disable"))) {
+ question =
+ "Disabling cluster.enable-shared-storage "
+ "will delete the shared storage volume"
+ "(gluster_shared_storage), which is used "
+ "by snapshot scheduler, geo-replication "
+ "and NFS-Ganesha. Do you still want to "
+ "continue?";
+ answer = cli_cmd_get_confirmation(state, question);
+ if (GF_ANSWER_NO == answer) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Operation "
+ "cancelled, exiting");
+ *op_errstr = gf_strdup("Aborted by user.");
+ ret = -1;
+ goto out;
+ }
+ }
+ if ((!strcmp(key, "nfs.disable")) && (!strcmp(value, "off"))) {
+ question =
+ "Gluster NFS is being deprecated in favor "
+ "of NFS-Ganesha Enter \"yes\" to continue "
+ "using Gluster NFS";
+ answer = cli_cmd_get_confirmation(state, question);
+ if (GF_ANSWER_NO == answer) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Operation "
+ "cancelled, exiting");
+ *op_errstr = gf_strdup("Aborted by user.");
+ ret = -1;
goto out;
+ }
+ }
+ }
- *options = dict;
+ ret = dict_set_int32(dict, "count", wordcount - 3);
+
+ if (ret)
+ goto out;
+
+ *options = dict;
out:
- if (ret)
- dict_destroy (dict);
+ if (ret && dict)
+ dict_unref(dict);
- return ret;
+ return ret;
}
int32_t
-cli_cmd_volume_add_brick_parse (const char **words, int wordcount,
- dict_t **options)
+cli_cmd_volume_add_brick_parse(struct cli_state *state, const char **words,
+ int wordcount, dict_t **options, int *ret_type)
{
- dict_t *dict = NULL;
- char *volname = NULL;
- int ret = -1;
- int brick_count = 0, brick_index = 0;
- char *bricks = NULL;
- char *opwords_cl[] = { "replica", "stripe", NULL };
- gf1_cluster_type type = GF_CLUSTER_TYPE_NONE;
- int count = 1;
- char *w = NULL;
- int index;
-
- GF_ASSERT (words);
- GF_ASSERT (options);
+ dict_t *dict = NULL;
+ char *volname = NULL;
+ int ret = -1;
+ int brick_count = 0, brick_index = 0;
+ char *bricks = NULL;
+ static char *opwords_cl[] = {"replica", "stripe", NULL};
+ gf1_cluster_type type = GF_CLUSTER_TYPE_NONE;
+ int count = 1;
+ int arbiter_count = 0;
+ char *w = NULL;
+ int index;
+ gf_boolean_t is_force = _gf_false;
+ int wc = wordcount;
+ gf_answer_t answer = GF_ANSWER_NO;
+ const char *question = NULL;
+
+ GF_ASSERT(words);
+ GF_ASSERT(options);
+
+ dict = dict_new();
+
+ if (!dict)
+ goto out;
- dict = dict_new ();
+ if (wordcount < 3)
+ goto out;
- if (!dict)
- goto out;
+ volname = (char *)words[2];
- if (wordcount < 3)
- goto out;
+ GF_ASSERT(volname);
- volname = (char *)words[2];
+ ret = dict_set_str(dict, "volname", volname);
- GF_ASSERT (volname);
+ if (ret)
+ goto out;
- ret = dict_set_str (dict, "volname", volname);
+ if (wordcount < 4) {
+ ret = -1;
+ goto out;
+ }
+ if (wordcount < 6) {
+ /* seems no options are given, go directly to the parse_brick */
+ brick_index = 3;
+ type = GF_CLUSTER_TYPE_NONE;
+ goto parse_bricks;
+ }
+ w = str_getunamb(words[3], opwords_cl);
+ if (!w) {
+ type = GF_CLUSTER_TYPE_NONE;
+ index = 3;
+ } else if ((strcmp(w, "replica")) == 0) {
+ type = GF_CLUSTER_TYPE_REPLICATE;
+ count = strtol(words[4], NULL, 0);
+ if (!count || (count < 2)) {
+ cli_err("replica count should be greater than 1");
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_int32(dict, "replica-count", count);
if (ret)
- goto out;
-
- if (wordcount < 4) {
+ goto out;
+ index = 5;
+ if (words[index] && !strcmp(words[index], "arbiter")) {
+ arbiter_count = strtol(words[6], NULL, 0);
+ if (arbiter_count != 1 || count != 3) {
+ cli_err(
+ "For arbiter configuration, replica "
+ "count must be 3 and arbiter count "
+ "must be 1. The 3rd brick of the "
+ "replica will be the arbiter");
ret = -1;
goto out;
- }
- if (wordcount < 6) {
- /* seems no options are given, go directly to the parse_brick */
- brick_index = 3;
- type = GF_CLUSTER_TYPE_NONE;
- goto parse_bricks;
+ }
+ ret = dict_set_int32(dict, "arbiter-count", arbiter_count);
+ if (ret)
+ goto out;
+ index = 7;
}
- w = str_getunamb (words[3], opwords_cl);
- if (!w) {
- type = GF_CLUSTER_TYPE_NONE;
- index = 3;
- } else if ((strcmp (w, "replica")) == 0) {
- type = GF_CLUSTER_TYPE_REPLICATE;
- if (wordcount < 5) {
- ret = -1;
- goto out;
- }
- count = strtol (words[4], NULL, 0);
- if (!count || (count < 2)) {
- cli_err ("replica count should be greater than 1");
- ret = -1;
- goto out;
+ if (count == 2) {
+ if (strcmp(words[wordcount - 1], "force")) {
+ question =
+ "Replica 2 volumes are prone to "
+ "split-brain. Use Arbiter or "
+ "Replica 3 to avoid this. See: "
+ "http://docs.gluster.org/en/latest/Administrator%20Guide/"
+ "Split%20brain%20and%20ways%20to%20deal%20with%20it/."
+ "\nDo you still want to continue?\n";
+ answer = cli_cmd_get_confirmation(state, question);
+ if (GF_ANSWER_NO == answer) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Add brick"
+ " cancelled, exiting");
+ ret = -1;
+ goto out;
}
- ret = dict_set_int32 (dict, "replica-count", count);
- if (ret)
- goto out;
- index = 5;
- } else if ((strcmp (w, "stripe")) == 0) {
- type = GF_CLUSTER_TYPE_STRIPE;
- if (wordcount < 5) {
- ret = -1;
- goto out;
- }
- count = strtol (words[4], NULL, 0);
- if (!count || (count < 2)) {
- cli_err ("stripe count should be greater than 1");
- ret = -1;
- goto out;
- }
- ret = dict_set_int32 (dict, "stripe-count", count);
- if (ret)
- goto out;
- index = 5;
- } else {
- GF_ASSERT (!"opword mismatch");
- ret = -1;
- goto out;
+ }
}
+ } else if ((strcmp(w, "stripe")) == 0) {
+ cli_err("stripe option not supported");
+ goto out;
+ } else {
+ GF_ASSERT(!"opword mismatch");
+ ret = -1;
+ goto out;
+ }
- brick_index = index;
+ brick_index = index;
parse_bricks:
- ret = cli_cmd_bricks_parse (words, wordcount, brick_index, &bricks,
- &brick_count);
- if (ret)
- goto out;
- ret = dict_set_dynstr (dict, "bricks", bricks);
- if (ret)
- goto out;
+ if (strcmp(words[wordcount - 1], "force") == 0) {
+ is_force = _gf_true;
+ wc = wordcount - 1;
+ }
- ret = dict_set_int32 (dict, "count", brick_count);
+ ret = cli_cmd_bricks_parse(words, wc, brick_index, &bricks, &brick_count);
+ if (ret)
+ goto out;
- if (ret)
- goto out;
+ ret = dict_set_dynstr(dict, "bricks", bricks);
+ if (ret)
+ goto out;
- *options = dict;
+ ret = dict_set_int32(dict, "count", brick_count);
+
+ if (ret)
+ goto out;
+
+ ret = dict_set_int32(dict, "force", is_force);
+ if (ret)
+ goto out;
+
+ *options = dict;
out:
- if (ret) {
- gf_log ("cli", GF_LOG_ERROR, "Unable to parse add-brick CLI");
- if (dict)
- dict_destroy (dict);
- }
+ if (ret_type)
+ *ret_type = type;
- return ret;
-}
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to parse add-brick CLI");
+ if (dict)
+ dict_unref(dict);
+ }
+ return ret;
+}
int32_t
-cli_cmd_volume_remove_brick_parse (const char **words, int wordcount,
- dict_t **options, int *question)
+cli_cmd_volume_remove_brick_parse(struct cli_state *state, const char **words,
+ int wordcount, dict_t **options,
+ int *question, int *brick_count,
+ int32_t *comm)
{
- dict_t *dict = NULL;
- char *volname = NULL;
- char *delimiter = NULL;
- int ret = -1;
- char key[50];
- int brick_count = 0, brick_index = 0;
- int32_t tmp_index = 0;
- int32_t j = 0;
- char *tmp_brick = NULL;
- char *tmp_brick1 = NULL;
- char *type_opword[] = { "replica", NULL };
- char *opwords[] = { "start", "commit", "stop", "status",
- "force", NULL };
- char *w = NULL;
- int32_t command = GF_OP_CMD_NONE;
- long count = 0;
-
- GF_ASSERT (words);
- GF_ASSERT (options);
+ dict_t *dict = NULL;
+ char *volname = NULL;
+ char *delimiter = NULL;
+ int ret = -1;
+ char key[50];
+ int brick_index = 0;
+ int32_t tmp_index = 0;
+ int32_t j = 0;
+ char *tmp_brick = NULL;
+ char *tmp_brick1 = NULL;
+ static char *type_opword[] = {"replica", NULL};
+ static char *opwords[] = {"start", "commit", "stop",
+ "status", "force", NULL};
+ char *w = NULL;
+ int32_t command = GF_OP_CMD_NONE;
+ long count = 0;
+ gf_answer_t answer = GF_ANSWER_NO;
+ const char *ques = NULL;
+
+ GF_ASSERT(words);
+ GF_ASSERT(options);
+
+ if (wordcount < 5)
+ goto out;
- if (wordcount < 4)
- goto out;
+ dict = dict_new();
+ if (!dict)
+ goto out;
- dict = dict_new ();
- if (!dict)
- goto out;
+ volname = (char *)words[2];
- volname = (char *)words[2];
+ GF_ASSERT(volname);
- GF_ASSERT (volname);
+ ret = dict_set_str(dict, "volname", volname);
+ if (ret)
+ goto out;
- ret = dict_set_str (dict, "volname", volname);
- if (ret)
- goto out;
+ brick_index = 3;
+ w = str_getunamb(words[3], type_opword);
+ if (w && !strcmp("replica", w)) {
+ if (wordcount < 6) {
+ ret = -1;
+ goto out;
+ }
+ count = strtol(words[4], NULL, 0);
+ if (count < 1) {
+ cli_err(
+ "replica count should be greater than 0 in "
+ "case of remove-brick");
+ ret = -1;
+ goto out;
+ }
- brick_index = 3;
- w = str_getunamb (words[3], type_opword);
- if (w && !strcmp ("replica", w)) {
- if (wordcount < 5) {
- ret = -1;
- goto out;
- }
- count = strtol (words[4], NULL, 0);
- if (count < 1) {
- cli_err ("replica count should be greater than 0 in "
- "case of remove-brick");
- ret = -1;
- goto out;
+ if (count == 2) {
+ if (strcmp(words[wordcount - 1], "force")) {
+ ques =
+ "Replica 2 volumes are prone to "
+ "split-brain. Use Arbiter or Replica 3 "
+ "to avoid this. See: "
+ "http://docs.gluster.org/en/latest/Administrator%20Guide/"
+ "Split%20brain%20and%20ways%20to%20deal%20with%20it/."
+ "\nDo you still want to continue?\n";
+ answer = cli_cmd_get_confirmation(state, ques);
+ if (GF_ANSWER_NO == answer) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Remove "
+ "brick cancelled, exiting");
+ ret = -1;
+ goto out;
}
-
- ret = dict_set_int32 (dict, "replica-count", count);
- if (ret)
- goto out;
- brick_index = 5;
- } else if (w) {
- GF_ASSERT (!"opword mismatch");
+ }
}
- w = str_getunamb (words[wordcount - 1], opwords);
- if (!w) {
- /* Should be default 'force' */
- command = GF_OP_CMD_COMMIT_FORCE;
- if (question)
- *question = 1;
+ ret = dict_set_int32(dict, "replica-count", count);
+ if (ret)
+ goto out;
+ brick_index = 5;
+ } else if (w) {
+ GF_ASSERT(!"opword mismatch");
+ }
+
+ w = str_getunamb(words[wordcount - 1], opwords);
+ if (!w) {
+ ret = -1;
+ goto out;
+ } else {
+ /* handled this option */
+ wordcount--;
+ if (!strcmp("start", w)) {
+ command = GF_OP_CMD_START;
+ if (question)
+ *question = 1;
+ } else if (!strcmp("commit", w)) {
+ command = GF_OP_CMD_COMMIT;
+ } else if (!strcmp("stop", w)) {
+ command = GF_OP_CMD_STOP;
+ } else if (!strcmp("status", w)) {
+ command = GF_OP_CMD_STATUS;
+ } else if (!strcmp("force", w)) {
+ command = GF_OP_CMD_COMMIT_FORCE;
+ if (question)
+ *question = 1;
} else {
- /* handled this option */
- wordcount--;
- if (!strcmp ("start", w)) {
- command = GF_OP_CMD_START;
- } else if (!strcmp ("commit", w)) {
- command = GF_OP_CMD_COMMIT;
- if (question)
- *question = 1;
- } else if (!strcmp ("stop", w)) {
- command = GF_OP_CMD_STOP;
- } else if (!strcmp ("status", w)) {
- command = GF_OP_CMD_STATUS;
- } else if (!strcmp ("force", w)) {
- command = GF_OP_CMD_COMMIT_FORCE;
- if (question)
- *question = 1;
- } else {
- GF_ASSERT (!"opword mismatch");
- ret = -1;
- goto out;
- }
+ GF_ASSERT(!"opword mismatch");
+ ret = -1;
+ goto out;
}
+ }
- if (wordcount < 4) {
- ret = -1;
- goto out;
- }
+ ret = dict_set_int32(dict, "command", command);
+ if (ret)
+ gf_log("cli", GF_LOG_INFO, "failed to set 'command' %d", command);
- ret = dict_set_int32 (dict, "command", command);
- if (ret)
- gf_log ("cli", GF_LOG_INFO, "failed to set 'command' %d",
- command);
+ tmp_index = brick_index;
+ tmp_brick = GF_MALLOC(2048 * sizeof(*tmp_brick), gf_common_mt_char);
+ if (!tmp_brick) {
+ gf_log("", GF_LOG_ERROR,
+ "cli_cmd_volume_remove_brick_parse: "
+ "Unable to get memory");
+ ret = -1;
+ goto out;
+ }
- tmp_index = brick_index;
- tmp_brick = GF_MALLOC(2048 * sizeof(*tmp_brick), gf_common_mt_char);
+ tmp_brick1 = GF_MALLOC(2048 * sizeof(*tmp_brick1), gf_common_mt_char);
- if (!tmp_brick) {
- gf_log ("",GF_LOG_ERROR,"cli_cmd_volume_remove_brick_parse: "
- "Unable to get memory");
- ret = -1;
+ if (!tmp_brick1) {
+ gf_log("", GF_LOG_ERROR,
+ "cli_cmd_volume_remove_brick_parse: "
+ "Unable to get memory");
+ ret = -1;
+ goto out;
+ }
+
+ while (brick_index < wordcount) {
+ if (validate_brick_name((char *)words[brick_index])) {
+ cli_err(
+ "wrong brick type: %s, use <HOSTNAME>:"
+ "<export-dir-abs-path>",
+ words[brick_index]);
+ ret = -1;
+ goto out;
+ } else {
+ delimiter = strrchr(words[brick_index], ':');
+ ret = gf_canonicalize_path(delimiter + 1);
+ if (ret)
goto out;
}
- tmp_brick1 = GF_MALLOC(2048 * sizeof(*tmp_brick1), gf_common_mt_char);
-
- if (!tmp_brick1) {
- gf_log ("",GF_LOG_ERROR,"cli_cmd_volume_remove_brick_parse: "
- "Unable to get memory");
+ j = tmp_index;
+ strcpy(tmp_brick, words[brick_index]);
+ while (j < brick_index) {
+ strcpy(tmp_brick1, words[j]);
+ if (!(strcmp(tmp_brick, tmp_brick1))) {
+ gf_log("", GF_LOG_ERROR,
+ "Duplicate bricks"
+ " found %s",
+ words[brick_index]);
+ cli_err("Duplicate bricks found %s", words[brick_index]);
ret = -1;
goto out;
+ }
+ j++;
}
+ snprintf(key, 50, "brick%d", ++(*brick_count));
+ ret = dict_set_str(dict, key, (char *)words[brick_index++]);
- while (brick_index < wordcount) {
- if (validate_brick_name ((char *)words[brick_index])) {
- cli_err ("wrong brick type: %s, use <HOSTNAME>:"
- "<export-dir-abs-path>", words[brick_index]);
- ret = -1;
- goto out;
- } else {
- delimiter = strrchr(words[brick_index], ':');
- ret = cli_canonicalize_path (delimiter + 1);
- if (ret)
- goto out;
- }
-
- j = tmp_index;
- strcpy(tmp_brick, words[brick_index]);
- while ( j < brick_index) {
- strcpy(tmp_brick1, words[j]);
- if (!(strcmp (tmp_brick, tmp_brick1))) {
- gf_log("",GF_LOG_ERROR, "Duplicate bricks"
- " found %s", words[brick_index]);
- cli_err("Duplicate bricks found %s",
- words[brick_index]);
- ret = -1;
- goto out;
- }
- j++;
- }
- snprintf (key, 50, "brick%d", ++brick_count);
- ret = dict_set_str (dict, key, (char *)words[brick_index++]);
-
- if (ret)
- goto out;
- }
+ if (ret)
+ goto out;
+ }
- ret = dict_set_int32 (dict, "count", brick_count);
+ if (command != GF_OP_CMD_STATUS && command != GF_OP_CMD_STOP) {
+ ret = dict_set_int32(dict, "count", *brick_count);
if (ret)
- goto out;
+ goto out;
+ }
- *options = dict;
+ *options = dict;
out:
- if (ret) {
- gf_log ("cli", GF_LOG_ERROR, "Unable to parse remove-brick CLI");
- if (dict)
- dict_destroy (dict);
- }
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to parse remove-brick CLI");
+ if (dict)
+ dict_unref(dict);
+ }
- if (tmp_brick)
- GF_FREE (tmp_brick);
- if (tmp_brick1)
- GF_FREE (tmp_brick1);
+ GF_FREE(tmp_brick);
+ GF_FREE(tmp_brick1);
- return ret;
-}
+ *comm = command;
+ return ret;
+}
int32_t
-cli_cmd_volume_replace_brick_parse (const char **words, int wordcount,
- dict_t **options)
+cli_cmd_brick_op_validate_bricks(const char **words, dict_t *dict, int src,
+ int dst)
{
- dict_t *dict = NULL;
- char *volname = NULL;
- int ret = -1;
- int op_index = 0;
- char *delimiter = NULL;
- gf1_cli_replace_op replace_op = GF_REPLACE_OP_NONE;
- char *opwords[] = { "start", "commit", "pause", "abort", "status",
- NULL };
- char *w = NULL;
+ int ret = -1;
+ char *delimiter = NULL;
+
+ if (validate_brick_name((char *)words[src])) {
+ cli_err(
+ "wrong brick type: %s, use "
+ "<HOSTNAME>:<export-dir-abs-path>",
+ words[3]);
+ ret = -1;
+ goto out;
+ } else {
+ delimiter = strrchr((char *)words[src], '/');
+ ret = gf_canonicalize_path(delimiter);
+ if (ret)
+ goto out;
+ }
- GF_ASSERT (words);
- GF_ASSERT (options);
+ ret = dict_set_str(dict, "src-brick", (char *)words[src]);
+ if (ret)
+ goto out;
- dict = dict_new ();
+ if (dst == -1) {
+ ret = 0;
+ goto out;
+ }
- if (!dict)
- goto out;
+ if (validate_brick_name((char *)words[dst])) {
+ cli_err(
+ "wrong brick type: %s, use "
+ "<HOSTNAME>:<export-dir-abs-path>",
+ words[dst]);
+ ret = -1;
+ goto out;
+ } else {
+ delimiter = strrchr((char *)words[dst], '/');
+ ret = gf_canonicalize_path(delimiter);
+ if (ret)
+ goto out;
+ }
- if (wordcount < 3)
- goto out;
+ ret = dict_set_str(dict, "dst-brick", (char *)words[dst]);
+ if (ret)
+ goto out;
+ ret = 0;
+out:
+ return ret;
+}
- volname = (char *)words[2];
+int32_t
+cli_cmd_volume_reset_brick_parse(const char **words, int wordcount,
+ dict_t **options)
+{
+ int ret = -1;
+ char *volname = NULL;
+ dict_t *dict = NULL;
- GF_ASSERT (volname);
+ if (wordcount < 5 || wordcount > 7)
+ goto out;
- ret = dict_set_str (dict, "volname", volname);
+ dict = dict_new();
- if (ret)
- goto out;
+ if (!dict)
+ goto out;
- if (wordcount < 4) {
- ret = -1;
- goto out;
- }
+ volname = (char *)words[2];
- if (validate_brick_name ((char *)words[3])) {
- cli_err ("wrong brick type: %s, use "
- "<HOSTNAME>:<export-dir-abs-path>", words[3]);
- ret = -1;
- goto out;
- } else {
- delimiter = strrchr ((char *)words[3], ':');
- ret = cli_canonicalize_path (delimiter + 1);
- if (ret)
- goto out;
+ ret = dict_set_str(dict, "volname", volname);
+ if (ret)
+ goto out;
+
+ if (wordcount == 5) {
+ if (strcmp(words[4], "start")) {
+ cli_err(
+ "Invalid option '%s' for reset-brick. Please "
+ "enter valid reset-brick command",
+ words[4]);
+ ret = -1;
+ goto out;
}
- ret = dict_set_str (dict, "src-brick", (char *)words[3]);
+ ret = cli_cmd_brick_op_validate_bricks(words, dict, 3, -1);
if (ret)
- goto out;
+ goto out;
- if (wordcount < 5) {
- ret = -1;
- goto out;
+ ret = dict_set_str(dict, "operation", "GF_RESET_OP_START");
+ if (ret)
+ goto out;
+ } else if (wordcount == 6) {
+ if (strcmp(words[5], "commit")) {
+ cli_err(
+ "Invalid option '%s' for reset-brick. Please "
+ "enter valid reset-brick command",
+ words[5]);
+ ret = -1;
+ goto out;
}
- if (validate_brick_name ((char *)words[4])) {
- cli_err ("wrong brick type: %s, use "
- "<HOSTNAME>:<export-dir-abs-path>", words[4]);
- ret = -1;
- goto out;
- } else {
- delimiter = strrchr ((char *)words[4], ':');
- ret = cli_canonicalize_path (delimiter + 1);
- if (ret)
- goto out;
- }
+ ret = cli_cmd_brick_op_validate_bricks(words, dict, 3, 4);
+ if (ret)
+ goto out;
+ ret = dict_set_str(dict, "operation", "GF_RESET_OP_COMMIT");
+ if (ret)
+ goto out;
+ } else if (wordcount == 7) {
+ if (strcmp(words[5], "commit") || strcmp(words[6], "force")) {
+ cli_err(
+ "Invalid option '%s %s' for reset-brick. Please "
+ "enter valid reset-brick command",
+ words[5], words[6]);
+ ret = -1;
+ goto out;
+ }
- ret = dict_set_str (dict, "dst-brick", (char *)words[4]);
+ ret = cli_cmd_brick_op_validate_bricks(words, dict, 3, 4);
+ if (ret)
+ goto out;
+ ret = dict_set_str(dict, "operation", "GF_RESET_OP_COMMIT_FORCE");
if (ret)
- goto out;
+ goto out;
+ }
- op_index = 5;
- if ((wordcount < (op_index + 1))) {
- ret = -1;
- goto out;
- }
+ *options = dict;
- w = str_getunamb (words[op_index], opwords);
+out:
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to parse reset-brick CLI");
+ if (dict)
+ dict_unref(dict);
+ }
- if (!w) {
- } else if (!strcmp ("start", w)) {
- replace_op = GF_REPLACE_OP_START;
- } else if (!strcmp ("commit", w)) {
- replace_op = GF_REPLACE_OP_COMMIT;
- } else if (!strcmp ("pause", w)) {
- replace_op = GF_REPLACE_OP_PAUSE;
- } else if (!strcmp ("abort", w)) {
- replace_op = GF_REPLACE_OP_ABORT;
- } else if (!strcmp ("status", w)) {
- replace_op = GF_REPLACE_OP_STATUS;
- } else
- GF_ASSERT (!"opword mismatch");
-
- /* commit force option */
-
- op_index = 6;
-
- if (wordcount > (op_index + 1)) {
- ret = -1;
- goto out;
- }
+ return ret;
+}
- if (wordcount == (op_index + 1)) {
- if (replace_op != GF_REPLACE_OP_COMMIT) {
- ret = -1;
- goto out;
- }
- if (!strcmp ("force", words[op_index])) {
- replace_op = GF_REPLACE_OP_COMMIT_FORCE;
- }
- }
+int32_t
+cli_cmd_volume_replace_brick_parse(const char **words, int wordcount,
+ dict_t **options)
+{
+ int ret = -1;
+ char *volname = NULL;
+ dict_t *dict = NULL;
- if (replace_op == GF_REPLACE_OP_NONE) {
- ret = -1;
- goto out;
- }
+ GF_ASSERT(words);
+ GF_ASSERT(options);
- ret = dict_set_int32 (dict, "operation", (int32_t) replace_op);
+ if (wordcount != 7) {
+ ret = -1;
+ goto out;
+ }
- if (ret)
- goto out;
+ dict = dict_new();
+ if (!dict) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to allocate dictionary");
+ goto out;
+ }
- *options = dict;
+ volname = (char *)words[2];
+
+ GF_ASSERT(volname);
+
+ ret = dict_set_str(dict, "volname", volname);
+ if (ret)
+ goto out;
+
+ ret = cli_cmd_brick_op_validate_bricks(words, dict, 3, 4);
+ if (ret)
+ goto out;
+
+ /* commit force option */
+ if (strcmp("commit", words[5]) || strcmp("force", words[6])) {
+ cli_err(
+ "Invalid option '%s' '%s' for replace-brick. Please "
+ "enter valid replace-brick command",
+ words[5], words[6]);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_str(dict, "operation", "GF_REPLACE_OP_COMMIT_FORCE");
+ if (ret)
+ goto out;
+
+ *options = dict;
out:
- if (ret) {
- gf_log ("cli", GF_LOG_ERROR, "Unable to parse replace-brick CLI");
- if (dict)
- dict_destroy (dict);
- }
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to parse reset-brick CLI");
+ if (dict)
+ dict_unref(dict);
+ }
- return ret;
+ return ret;
}
int32_t
-cli_cmd_log_filename_parse (const char **words, int wordcount, dict_t **options)
+cli_cmd_log_filename_parse(const char **words, int wordcount, dict_t **options)
{
- dict_t *dict = NULL;
- char *volname = NULL;
- char *str = NULL;
- int ret = -1;
- char *delimiter = NULL;
+ dict_t *dict = NULL;
+ char *volname = NULL;
+ char *str = NULL;
+ int ret = -1;
+ char *delimiter = NULL;
- GF_ASSERT (words);
- GF_ASSERT (options);
+ GF_ASSERT(words);
+ GF_ASSERT(options);
- dict = dict_new ();
- if (!dict)
- goto out;
+ dict = dict_new();
+ if (!dict)
+ goto out;
- volname = (char *)words[3];
- GF_ASSERT (volname);
+ volname = (char *)words[3];
+ GF_ASSERT(volname);
- ret = dict_set_str (dict, "volname", volname);
- if (ret)
- goto out;
+ ret = dict_set_str(dict, "volname", volname);
+ if (ret)
+ goto out;
- str = (char *)words[4];
- if (strchr (str, ':')) {
- delimiter = strchr (words[4], ':');
- if (!delimiter || delimiter == words[4]
- || *(delimiter+1) != '/') {
- cli_err ("wrong brick type: %s, use <HOSTNAME>:"
- "<export-dir-abs-path>", words[4]);
- ret = -1;
- goto out;
- } else {
- ret = cli_canonicalize_path (delimiter + 1);
- if (ret)
- goto out;
- }
- ret = dict_set_str (dict, "brick", str);
- if (ret)
- goto out;
- /* Path */
- str = (char *)words[5];
- ret = dict_set_str (dict, "path", str);
- if (ret)
- goto out;
+ str = (char *)words[4];
+ if (strchr(str, ':')) {
+ delimiter = strchr(words[4], ':');
+ if (!delimiter || delimiter == words[4] || *(delimiter + 1) != '/') {
+ cli_err(
+ "wrong brick type: %s, use <HOSTNAME>:"
+ "<export-dir-abs-path>",
+ words[4]);
+ ret = -1;
+ goto out;
} else {
- ret = dict_set_str (dict, "path", str);
- if (ret)
- goto out;
+ ret = gf_canonicalize_path(delimiter + 1);
+ if (ret)
+ goto out;
}
+ ret = dict_set_str(dict, "brick", str);
+ if (ret)
+ goto out;
+ /* Path */
+ str = (char *)words[5];
+ ret = dict_set_str(dict, "path", str);
+ if (ret)
+ goto out;
+ } else {
+ ret = dict_set_str(dict, "path", str);
+ if (ret)
+ goto out;
+ }
- *options = dict;
+ *options = dict;
out:
- if (ret && dict)
- dict_destroy (dict);
+ if (ret && dict)
+ dict_unref(dict);
- return ret;
+ return ret;
}
int32_t
-cli_cmd_log_level_parse (const char **words, int worcount, dict_t **options)
+cli_cmd_log_level_parse(const char **words, int worcount, dict_t **options)
{
- dict_t *dict = NULL;
- int ret = -1;
-
- GF_ASSERT (words);
- GF_ASSERT (options);
-
- /*
- * loglevel command format:
- * > volume log level <VOL> <XLATOR[*]> <LOGLEVEL>
- * > volume log level colon-o posix WARNING
- * > volume log level colon-o replicate* DEBUG
- * > volume log level coon-o * TRACE
- */
-
- GF_ASSERT ((strncmp(words[0], "volume", 6) == 0));
- GF_ASSERT ((strncmp(words[1], "log", 3) == 0));
- GF_ASSERT ((strncmp(words[2], "level", 5) == 0));
-
- ret = glusterd_check_log_level(words[5]);
- if (ret == -1) {
- cli_err("Invalid log level [%s] specified", words[5]);
- cli_err("Valid values for loglevel: (DEBUG|WARNING|ERROR"
- "|CRITICAL|NONE|TRACE)");
- goto out;
- }
+ dict_t *dict = NULL;
+ int ret = -1;
+
+ GF_ASSERT(words);
+ GF_ASSERT(options);
+
+ /*
+ * loglevel command format:
+ * > volume log level <VOL> <XLATOR[*]> <LOGLEVEL>
+ * > volume log level colon-o posix WARNING
+ * > volume log level colon-o replicate* DEBUG
+ * > volume log level coon-o * TRACE
+ */
+
+ GF_ASSERT((strncmp(words[0], "volume", 6) == 0));
+ GF_ASSERT((strncmp(words[1], "log", 3) == 0));
+ GF_ASSERT((strncmp(words[2], "level", 5) == 0));
+
+ ret = glusterd_check_log_level(words[5]);
+ if (ret == -1) {
+ cli_err("Invalid log level [%s] specified", words[5]);
+ cli_err(
+ "Valid values for loglevel: (DEBUG|WARNING|ERROR"
+ "|CRITICAL|NONE|TRACE)");
+ goto out;
+ }
- dict = dict_new ();
- if (!dict)
- goto out;
+ dict = dict_new();
+ if (!dict)
+ goto out;
- GF_ASSERT(words[3]);
- GF_ASSERT(words[4]);
+ GF_ASSERT(words[3]);
+ GF_ASSERT(words[4]);
- ret = dict_set_str (dict, "volname", (char *)words[3]);
- if (ret)
- goto out;
+ ret = dict_set_str(dict, "volname", (char *)words[3]);
+ if (ret)
+ goto out;
- ret = dict_set_str (dict, "xlator", (char *)words[4]);
- if (ret)
- goto out;
+ ret = dict_set_str(dict, "xlator", (char *)words[4]);
+ if (ret)
+ goto out;
- ret = dict_set_str (dict, "loglevel", (char *)words[5]);
- if (ret)
- goto out;
+ ret = dict_set_str(dict, "loglevel", (char *)words[5]);
+ if (ret)
+ goto out;
- *options = dict;
+ *options = dict;
- out:
- if (ret && dict)
- dict_destroy (dict);
+out:
+ if (ret && dict)
+ dict_unref(dict);
- return ret;
+ return ret;
}
int32_t
-cli_cmd_log_locate_parse (const char **words, int wordcount, dict_t **options)
+cli_cmd_log_locate_parse(const char **words, int wordcount, dict_t **options)
{
- dict_t *dict = NULL;
- char *volname = NULL;
- char *str = NULL;
- int ret = -1;
- char *delimiter = NULL;
+ dict_t *dict = NULL;
+ char *volname = NULL;
+ char *str = NULL;
+ int ret = -1;
+ char *delimiter = NULL;
- GF_ASSERT (words);
- GF_ASSERT (options);
+ GF_ASSERT(words);
+ GF_ASSERT(options);
- dict = dict_new ();
- if (!dict)
- goto out;
+ dict = dict_new();
+ if (!dict)
+ goto out;
- volname = (char *)words[3];
- GF_ASSERT (volname);
+ volname = (char *)words[3];
+ GF_ASSERT(volname);
- ret = dict_set_str (dict, "volname", volname);
- if (ret)
- goto out;
+ ret = dict_set_str(dict, "volname", volname);
+ if (ret)
+ goto out;
- if (words[4]) {
- delimiter = strchr (words[4], ':');
- if (!delimiter || delimiter == words[4]
- || *(delimiter+1) != '/') {
- cli_err ("wrong brick type: %s, use <HOSTNAME>:"
- "<export-dir-abs-path>", words[4]);
- ret = -1;
- goto out;
- } else {
- ret = cli_canonicalize_path (delimiter + 1);
- if (ret)
- goto out;
- }
- str = (char *)words[4];
- ret = dict_set_str (dict, "brick", str);
- if (ret)
- goto out;
+ if (words[4]) {
+ delimiter = strchr(words[4], ':');
+ if (!delimiter || delimiter == words[4] || *(delimiter + 1) != '/') {
+ cli_err(
+ "wrong brick type: %s, use <HOSTNAME>:"
+ "<export-dir-abs-path>",
+ words[4]);
+ ret = -1;
+ goto out;
+ } else {
+ ret = gf_canonicalize_path(delimiter + 1);
+ if (ret)
+ goto out;
}
+ str = (char *)words[4];
+ ret = dict_set_str(dict, "brick", str);
+ if (ret)
+ goto out;
+ }
- *options = dict;
+ *options = dict;
out:
- if (ret && dict)
- dict_destroy (dict);
+ if (ret && dict)
+ dict_unref(dict);
- return ret;
+ return ret;
}
int32_t
-cli_cmd_log_rotate_parse (const char **words, int wordcount, dict_t **options)
+cli_cmd_log_rotate_parse(const char **words, int wordcount, dict_t **options)
{
- dict_t *dict = NULL;
- char *volname = NULL;
- char *str = NULL;
- int ret = -1;
- char *delimiter = NULL;
+ dict_t *dict = NULL;
+ char *volname = NULL;
+ char *str = NULL;
+ int ret = -1;
+ char *delimiter = NULL;
- GF_ASSERT (words);
- GF_ASSERT (options);
+ GF_ASSERT(words);
+ GF_ASSERT(options);
- dict = dict_new ();
- if (!dict)
- goto out;
+ dict = dict_new();
+ if (!dict)
+ goto out;
- volname = (char *)words[3];
- GF_ASSERT (volname);
+ if (strcmp("rotate", words[3]) == 0)
+ volname = (char *)words[2];
+ GF_ASSERT(volname);
- ret = dict_set_str (dict, "volname", volname);
- if (ret)
- goto out;
+ ret = dict_set_str(dict, "volname", volname);
+ if (ret)
+ goto out;
- if (words[4]) {
- delimiter = strchr (words[4], ':');
- if (!delimiter || delimiter == words[4]
- || *(delimiter+1) != '/') {
- cli_err ("wrong brick type: %s, use <HOSTNAME>:"
- "<export-dir-abs-path>", words[4]);
- ret = -1;
- goto out;
- } else {
- ret = cli_canonicalize_path (delimiter + 1);
- if (ret)
- goto out;
- }
- str = (char *)words[4];
- ret = dict_set_str (dict, "brick", str);
- if (ret)
- goto out;
+ if (words[4]) {
+ delimiter = strchr(words[4], ':');
+ if (!delimiter || delimiter == words[4] || *(delimiter + 1) != '/') {
+ cli_err(
+ "wrong brick type: %s, use <HOSTNAME>:"
+ "<export-dir-abs-path>",
+ words[4]);
+ ret = -1;
+ goto out;
+ } else {
+ ret = gf_canonicalize_path(delimiter + 1);
+ if (ret)
+ goto out;
}
+ str = (char *)words[4];
+ ret = dict_set_str(dict, "brick", str);
+ if (ret)
+ goto out;
+ }
- *options = dict;
+ *options = dict;
out:
- if (ret && dict)
- dict_destroy (dict);
+ if (ret && dict)
+ dict_unref(dict);
- return ret;
+ return ret;
}
static gf_boolean_t
-gsyncd_url_check (const char *w)
+gsyncd_url_check(const char *w)
{
- return !!strpbrk (w, ":/");
+ return !!strpbrk(w, ":/");
}
static gf_boolean_t
-gsyncd_glob_check (const char *w)
+valid_slave_gsyncd_url(const char *w)
{
- return !!strpbrk (w, "*?[");
+ if (strstr(w, ":::"))
+ return _gf_false;
+ else if (strstr(w, "::"))
+ return _gf_true;
+ else
+ return _gf_false;
}
-int32_t
-cli_cmd_gsync_set_parse (const char **words, int wordcount, dict_t **options)
+static gf_boolean_t
+gsyncd_glob_check(const char *w)
{
- int32_t ret = -1;
- dict_t *dict = NULL;
- gf1_cli_gsync_set type = GF_GSYNC_OPTION_TYPE_NONE;
- char *append_str = NULL;
- size_t append_len = 0;
- char *subop = NULL;
- int i = 0;
- unsigned masteri = 0;
- unsigned slavei = 0;
- unsigned glob = 0;
- unsigned cmdi = 0;
- char *opwords[] = { "status", "start", "stop", "config",
- "log-rotate", NULL };
- char *w = NULL;
-
- GF_ASSERT (words);
- GF_ASSERT (options);
-
- dict = dict_new ();
- if (!dict)
- goto out;
-
- /* new syntax:
- *
- * volume geo-replication [$m [$s]] status
- * volume geo-replication [$m] $s config [[!]$opt [$val]]
- * volume geo-replication $m $s start|stop
- * volume geo-replication $m [$s] log-rotate
- */
+ return !!strpbrk(w, "*?[");
+}
- if (wordcount < 3)
+static int
+config_parse(const char **words, int wordcount, dict_t *dict, unsigned cmdi,
+ unsigned glob)
+{
+ int32_t ret = -1;
+ int32_t i = -1;
+ char *append_str = NULL;
+ size_t append_len = 0;
+ char *subop = NULL;
+ char *ret_chkpt = NULL;
+ struct tm checkpoint_time;
+ char chkpt_buf[20] = "";
+
+ switch ((wordcount - 1) - cmdi) {
+ case 0:
+ subop = gf_strdup("get-all");
+ break;
+ case 1:
+ if (words[cmdi + 1][0] == '!') {
+ (words[cmdi + 1])++;
+ if (gf_asprintf(&subop, "del%s", glob ? "-glob" : "") == -1)
+ subop = NULL;
+ } else
+ subop = gf_strdup("get");
+
+ ret = dict_set_str(dict, "op_name", ((char *)words[cmdi + 1]));
+ if (ret < 0)
goto out;
+ break;
+ default:
+ if (gf_asprintf(&subop, "set%s", glob ? "-glob" : "") == -1)
+ subop = NULL;
- for (i = 2; i <= 3 && i < wordcount - 1; i++) {
- if (gsyncd_glob_check (words[i]))
- glob = i;
- if (gsyncd_url_check (words[i])) {
- slavei = i;
- break;
- }
- }
+ ret = dict_set_str(dict, "op_name", ((char *)words[cmdi + 1]));
+ if (ret < 0)
+ goto out;
- if (glob && !slavei)
- /* glob is allowed only for config, thus it implies there is a
- * slave argument; but that might have not been recognized on
- * the first scan as it's url characteristics has been covered
- * by the glob syntax.
- *
- * In this case, the slave is perforce the last glob-word -- the
- * upcoming one is neither glob, nor url, so it's definitely not
- * the slave.
- */
- slavei = glob;
- if (slavei) {
- cmdi = slavei + 1;
- if (slavei == 3)
- masteri = 2;
- } else if (i <= 3) {
- /* no $s, can only be status cmd
- * (with either a single $m before it or nothing)
- * -- these conditions imply that i <= 3 after
- * the iteration and that i is the successor of
- * the (0 or 1 length) sequence of $m-s.
- */
- cmdi = i;
- if (i == 3)
- masteri = 2;
- } else
- goto out;
-
- /* now check if input really complies syntax
- * (in a somewhat redundant way, in favor
- * transparent soundness)
- */
+ /* join the varargs by spaces to get the op_value */
- if (masteri && gsyncd_url_check (words[masteri]))
- goto out;
- if (slavei && !glob && !gsyncd_url_check (words[slavei]))
+ for (i = cmdi + 2; i < wordcount; i++)
+ append_len += (strlen(words[i]) + 1);
+ /* trailing strcat will add two bytes, make space for that */
+ append_len++;
+
+ /* strcat is used on this allocation and hence expected to be
+ * initiatlized to 0. So GF_CALLOC is used.
+ */
+ append_str = GF_CALLOC(1, append_len, cli_mt_append_str);
+ if (!append_str) {
+ ret = -1;
goto out;
+ }
+
+ for (i = cmdi + 2; i < wordcount; i++) {
+ strcat(append_str, words[i]);
+ strcat(append_str, " ");
+ }
+ append_str[append_len - 2] = '\0';
+ /* "checkpoint now" is special: we resolve that "now" */
+ if ((strcmp(words[cmdi + 1], "checkpoint") == 0) &&
+ (strcmp(append_str, "now") == 0)) {
+ struct timeval tv = {
+ 0,
+ };
+
+ ret = gettimeofday(&tv, NULL);
+ if (ret == -1)
+ goto out;
+
+ GF_FREE(append_str);
+ append_str = GF_MALLOC(300, cli_mt_append_str);
+ if (!append_str) {
+ ret = -1;
+ goto out;
+ }
+ snprintf(append_str, 300, "%" GF_PRI_SECOND, tv.tv_sec);
+ } else if ((strcmp(words[cmdi + 1], "checkpoint") == 0) &&
+ (strcmp(append_str, "now") != 0)) {
+ memset(&checkpoint_time, 0, sizeof(struct tm));
+ ret_chkpt = strptime(append_str, "%Y-%m-%d %H:%M:%S",
+ &checkpoint_time);
+
+ if (ret_chkpt == NULL || *ret_chkpt != '\0') {
+ ret = -1;
+ cli_err(
+ "Invalid Checkpoint label. Use format "
+ "\"Y-m-d H:M:S\", Example: 2016-10-25 15:30:45");
+ goto out;
+ }
+ GF_FREE(append_str);
+ append_str = GF_MALLOC(300, cli_mt_append_str);
+ if (!append_str) {
+ ret = -1;
+ goto out;
+ }
+ strftime(chkpt_buf, sizeof(chkpt_buf), "%s", &checkpoint_time);
+ snprintf(append_str, 300, "%s", chkpt_buf);
+ }
- w = str_getunamb (words[cmdi], opwords);
- if (!w)
+ ret = dict_set_dynstr(dict, "op_value", append_str);
+ if (ret != 0) {
goto out;
+ }
+ append_str = NULL;
+ }
- if (strcmp (w, "status") == 0) {
- type = GF_GSYNC_OPTION_TYPE_STATUS;
+ ret = -1;
+ if (subop) {
+ ret = dict_set_dynstr(dict, "subop", subop);
+ if (!ret)
+ subop = NULL;
+ }
- if (slavei && !masteri)
- goto out;
- } else if (strcmp (w, "config") == 0) {
- type = GF_GSYNC_OPTION_TYPE_CONFIG;
+out:
+ GF_FREE(append_str);
+ GF_FREE(subop);
- if (!slavei)
- goto out;
- } else if (strcmp (w, "start") == 0) {
- type = GF_GSYNC_OPTION_TYPE_START;
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
- if (!masteri || !slavei)
- goto out;
- } else if (strcmp (w, "stop") == 0) {
- type = GF_GSYNC_OPTION_TYPE_STOP;
+/* ssh_port_parse: Parses and validates when ssh_port is given.
+ * ssh_index refers to index of ssh_port and
+ * type refers to either push-pem or no-verify
+ */
- if (!masteri || !slavei)
- goto out;
- } else if (strcmp(w, "log-rotate") == 0) {
- type = GF_GSYNC_OPTION_TYPE_ROTATE;
+static int32_t
+parse_ssh_port(const char **words, int wordcount, dict_t *dict, unsigned *cmdi,
+ int ssh_index, char *type)
+{
+ int ret = 0;
+ char *end_ptr = NULL;
+ int64_t limit = 0;
+
+ if (!strcmp((char *)words[ssh_index], "ssh-port")) {
+ if (strcmp((char *)words[ssh_index - 1], "create")) {
+ ret = -1;
+ goto out;
+ }
+ (*cmdi)++;
+ limit = strtol(words[ssh_index + 1], &end_ptr, 10);
+ if (errno == ERANGE || errno == EINVAL || limit <= 0 ||
+ strcmp(end_ptr, "") != 0) {
+ ret = -1;
+ cli_err("Please enter an integer value for ssh_port ");
+ goto out;
+ }
- if (slavei && !masteri)
- goto out;
- } else
- GF_ASSERT (!"opword mismatch");
+ ret = dict_set_int32(dict, "ssh_port", limit);
+ if (ret)
+ goto out;
+ (*cmdi)++;
+ } else if (strcmp((char *)words[ssh_index + 1], "create")) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_int32(dict, type, 1);
+ if (ret)
+ goto out;
+ (*cmdi)++;
+
+out:
+ return ret;
+}
- if (type != GF_GSYNC_OPTION_TYPE_CONFIG &&
- (cmdi < wordcount - 1 || glob))
+static int32_t
+force_push_pem_no_verify_parse(const char **words, int wordcount, dict_t *dict,
+ unsigned *cmdi)
+{
+ int32_t ret = 0;
+
+ if (!strcmp((char *)words[wordcount - 1], "force")) {
+ if ((strcmp((char *)words[wordcount - 2], "start")) &&
+ (strcmp((char *)words[wordcount - 2], "stop")) &&
+ (strcmp((char *)words[wordcount - 2], "create")) &&
+ (strcmp((char *)words[wordcount - 2], "no-verify")) &&
+ (strcmp((char *)words[wordcount - 2], "push-pem")) &&
+ (strcmp((char *)words[wordcount - 2], "pause")) &&
+ (strcmp((char *)words[wordcount - 2], "resume"))) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_int32n(dict, "force", SLEN("force"), 1);
+ if (ret)
+ goto out;
+ (*cmdi)++;
+
+ if (!strcmp((char *)words[wordcount - 2], "push-pem")) {
+ ret = parse_ssh_port(words, wordcount, dict, cmdi, wordcount - 4,
+ "push_pem");
+ if (ret)
+ goto out;
+ } else if (!strcmp((char *)words[wordcount - 2], "no-verify")) {
+ ret = parse_ssh_port(words, wordcount, dict, cmdi, wordcount - 4,
+ "no_verify");
+ if (ret)
goto out;
+ }
+ } else if (!strcmp((char *)words[wordcount - 1], "push-pem")) {
+ ret = parse_ssh_port(words, wordcount, dict, cmdi, wordcount - 3,
+ "push_pem");
+ if (ret)
+ goto out;
+ } else if (!strcmp((char *)words[wordcount - 1], "no-verify")) {
+ ret = parse_ssh_port(words, wordcount, dict, cmdi, wordcount - 3,
+ "no_verify");
+ if (ret)
+ goto out;
+ }
- /* If got so far, input is valid, assemble the message */
+out:
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
- ret = 0;
+int32_t
+cli_cmd_gsync_set_parse(struct cli_state *state, const char **words,
+ int wordcount, dict_t **options, char **errstr)
+{
+ int32_t ret = -1;
+ dict_t *dict = NULL;
+ gf1_cli_gsync_set type = GF_GSYNC_OPTION_TYPE_NONE;
+ int i = 0;
+ unsigned masteri = 0;
+ unsigned slavei = 0;
+ unsigned glob = 0;
+ unsigned cmdi = 0;
+ static char *opwords[] = {"create", "status", "start", "stop",
+ "config", "force", "delete", "ssh-port",
+ "no-verify", "push-pem", "detail", "pause",
+ "resume", NULL};
+ char *w = NULL;
+ char *save_ptr = NULL;
+ char *slave_temp = NULL;
+ char *token = NULL;
+ gf_answer_t answer = GF_ANSWER_NO;
+ const char *question = NULL;
+
+ GF_ASSERT(words);
+ GF_ASSERT(options);
+
+ dict = dict_new();
+ if (!dict)
+ goto out;
- if (masteri)
- ret = dict_set_str (dict, "master", (char *)words[masteri]);
- if (!ret && slavei)
- ret = dict_set_str (dict, "slave", (char *)words[slavei]);
- if (!ret)
- ret = dict_set_int32 (dict, "type", type);
- if (!ret && type == GF_GSYNC_OPTION_TYPE_CONFIG) {
- switch ((wordcount - 1) - cmdi) {
- case 0:
- subop = gf_strdup ("get-all");
- break;
- case 1:
- if (words[cmdi + 1][0] == '!') {
- (words[cmdi + 1])++;
- if (gf_asprintf (&subop, "del%s", glob ? "-glob" : "") == -1)
- subop = NULL;
- } else
- subop = gf_strdup ("get");
-
- ret = dict_set_str (dict, "op_name", ((char *)words[cmdi + 1]));
- if (ret < 0)
- goto out;
- break;
- default:
- if (gf_asprintf (&subop, "set%s", glob ? "-glob" : "") == -1)
- subop = NULL;
+ /* new syntax:
+ *
+ * volume geo-replication $m $s create [[ssh-port n] [[no-verify] |
+ * [push-pem]]] [force] volume geo-replication [$m [$s]] status [detail]
+ * volume geo-replication [$m] $s config [[!]$opt [$val]]
+ * volume geo-replication $m $s start|stop [force]
+ * volume geo-replication $m $s delete [reset-sync-time]
+ * volume geo-replication $m $s pause [force]
+ * volume geo-replication $m $s resume [force]
+ */
+
+ if (wordcount < 3)
+ goto out;
+
+ for (i = 2; i <= 3 && i < wordcount - 1; i++) {
+ if (gsyncd_glob_check(words[i]))
+ glob = i;
+ if (gsyncd_url_check(words[i])) {
+ slavei = i;
+ break;
+ }
+ }
+
+ if (glob && !slavei)
+ /* glob is allowed only for config, thus it implies there is a
+ * slave argument; but that might have not been recognized on
+ * the first scan as it's url characteristics has been covered
+ * by the glob syntax.
+ *
+ * In this case, the slave is perforce the last glob-word -- the
+ * upcoming one is neither glob, nor url, so it's definitely not
+ * the slave.
+ */
+ slavei = glob;
+ if (slavei) {
+ cmdi = slavei + 1;
+ if (slavei == 3)
+ masteri = 2;
+ } else if (i <= 4) {
+ if (strtail("detail", (char *)words[wordcount - 1])) {
+ cmdi = wordcount - 2;
+ if (i == 4)
+ masteri = 2;
+ } else {
+ /* no $s, can only be status cmd
+ * (with either a single $m before it or nothing)
+ * -- these conditions imply that i <= 3 after
+ * the iteration and that i is the successor of
+ * the (0 or 1 length) sequence of $m-s.
+ */
+ cmdi = i;
+ if (i == 3)
+ masteri = 2;
+ }
+ } else
+ goto out;
- ret = dict_set_str (dict, "op_name", ((char *)words[cmdi + 1]));
- if (ret < 0)
- goto out;
+ /* now check if input really complies syntax
+ * (in a somewhat redundant way, in favor
+ * transparent soundness)
+ */
- /* join the varargs by spaces to get the op_value */
+ if (masteri && gsyncd_url_check(words[masteri]))
+ goto out;
- for (i = cmdi + 2; i < wordcount; i++)
- append_len += (strlen (words[i]) + 1);
- /* trailing strcat will add two bytes, make space for that */
- append_len++;
+ if (slavei && !glob && !valid_slave_gsyncd_url(words[slavei])) {
+ gf_asprintf(errstr, "Invalid slave url: %s", words[slavei]);
+ goto out;
+ }
- append_str = GF_CALLOC (1, append_len, cli_mt_append_str);
- if (!append_str) {
- ret = -1;
- goto out;
- }
+ w = str_getunamb(words[cmdi], opwords);
+ if (!w)
+ goto out;
- for (i = cmdi + 2; i < wordcount; i++) {
- strcat (append_str, words[i]);
- strcat (append_str, " ");
- }
- append_str[append_len - 2] = '\0';
-
- /* "checkpoint now" is special: we resolve that "now" */
- if (strcmp (words[cmdi + 1], "checkpoint") == 0 &&
- strcmp (append_str, "now") == 0) {
- struct timeval tv = {0,};
- struct tm *tm = NULL;
-
- ret = gettimeofday (&tv, NULL);
- if (ret == -1)
- goto out;
- tm = localtime (&tv.tv_sec);
-
- GF_FREE (append_str);
- append_str = GF_CALLOC (1, 300, cli_mt_append_str);
- if (!append_str) {
- ret = -1;
- goto out;
- }
- strcpy (append_str, "as of ");
- strftime (append_str + strlen ("as of "),
- 300 - strlen ("as of "),
- "%Y-%m-%d %H:%M:%S", tm);
- }
+ if (strcmp(w, "create") == 0) {
+ type = GF_GSYNC_OPTION_TYPE_CREATE;
+
+ if (!masteri || !slavei)
+ goto out;
+ } else if (strcmp(w, "status") == 0) {
+ type = GF_GSYNC_OPTION_TYPE_STATUS;
+
+ if (slavei && !masteri)
+ goto out;
+ } else if (strcmp(w, "config") == 0) {
+ type = GF_GSYNC_OPTION_TYPE_CONFIG;
+
+ if (!slavei)
+ goto out;
+ } else if (strcmp(w, "start") == 0) {
+ type = GF_GSYNC_OPTION_TYPE_START;
+
+ if (!masteri || !slavei)
+ goto out;
+ } else if (strcmp(w, "stop") == 0) {
+ type = GF_GSYNC_OPTION_TYPE_STOP;
+
+ if (!masteri || !slavei)
+ goto out;
+ } else if (strcmp(w, "delete") == 0) {
+ type = GF_GSYNC_OPTION_TYPE_DELETE;
+
+ if (!masteri || !slavei)
+ goto out;
+ } else if (strcmp(w, "pause") == 0) {
+ type = GF_GSYNC_OPTION_TYPE_PAUSE;
+
+ if (!masteri || !slavei)
+ goto out;
+ } else if (strcmp(w, "resume") == 0) {
+ type = GF_GSYNC_OPTION_TYPE_RESUME;
+
+ if (!masteri || !slavei)
+ goto out;
+ } else
+ GF_ASSERT(!"opword mismatch");
+
+ ret = force_push_pem_no_verify_parse(words, wordcount, dict, &cmdi);
+ if (ret)
+ goto out;
- ret = dict_set_dynstr (dict, "op_value", append_str);
- }
+ if (strtail("detail", (char *)words[wordcount - 1])) {
+ if (!strtail("status", (char *)words[wordcount - 2])) {
+ ret = -1;
+ goto out;
+ }
- if (!subop || dict_set_dynstr (dict, "subop", subop) != 0)
- ret = -1;
+ ret = dict_set_uint32(dict, "status-detail", _gf_true);
+ if (ret)
+ goto out;
+ cmdi++;
+ }
+
+ if (type == GF_GSYNC_OPTION_TYPE_DELETE &&
+ !strcmp((char *)words[wordcount - 1], "reset-sync-time")) {
+ if (strcmp((char *)words[wordcount - 2], "delete")) {
+ ret = -1;
+ goto out;
}
+ ret = dict_set_uint32(dict, "reset-sync-time", _gf_true);
+ if (ret)
+ goto out;
+ cmdi++;
+ }
+
+ if (type != GF_GSYNC_OPTION_TYPE_CONFIG && (cmdi < wordcount - 1 || glob)) {
+ ret = -1;
+ goto out;
+ }
+
+ /* If got so far, input is valid, assemble the message */
+
+ ret = 0;
+
+ if (masteri) {
+ ret = dict_set_str(dict, "master", (char *)words[masteri]);
+ if (!ret)
+ ret = dict_set_str(dict, "volname", (char *)words[masteri]);
+ }
+ if (!ret && slavei) {
+ /* If geo-rep is created with root user using the syntax
+ * gluster vol geo-rep <mastervol> root@<slavehost> ...
+ * pass down only <slavehost> else pass as it is.
+ */
+ slave_temp = gf_strdup(words[slavei]);
+ if (slave_temp == NULL) {
+ ret = -1;
+ goto out;
+ }
+ token = strtok_r(slave_temp, "@", &save_ptr);
+ if (token && !strcmp(token, "root")) {
+ ret = dict_set_str(dict, "slave", (char *)words[slavei] + 5);
+ } else {
+ ret = dict_set_str(dict, "slave", (char *)words[slavei]);
+ }
+ }
+ if (!ret)
+ ret = dict_set_int32(dict, "type", type);
+ if (!ret && type == GF_GSYNC_OPTION_TYPE_CONFIG) {
+ if (!strcmp((char *)words[wordcount - 2], "ignore-deletes") &&
+ !strcmp((char *)words[wordcount - 1], "true")) {
+ question =
+ "There exists ~15 seconds delay for the option to take"
+ " effect from stime of the corresponding brick. Please"
+ " check the log for the time, the option is effective."
+ " Proceed";
+
+ answer = cli_cmd_get_confirmation(state, question);
+
+ if (GF_ANSWER_NO == answer) {
+ gf_log("cli", GF_LOG_INFO,
+ "Operation "
+ "cancelled, exiting");
+ *errstr = gf_strdup("Aborted by user.");
+ ret = -1;
+ goto out;
+ }
+ }
+
+ ret = config_parse(words, wordcount, dict, cmdi, glob);
+ }
out:
- if (ret) {
- if (dict)
- dict_destroy (dict);
- if (append_str)
- GF_FREE (append_str);
- } else
- *options = dict;
+ if (slave_temp)
+ GF_FREE(slave_temp);
+ if (ret && dict)
+ dict_unref(dict);
+ else
+ *options = dict;
- return ret;
+ return ret;
}
int32_t
-cli_cmd_volume_profile_parse (const char **words, int wordcount,
- dict_t **options)
+cli_cmd_volume_profile_parse(const char **words, int wordcount,
+ dict_t **options)
{
- dict_t *dict = NULL;
- char *volname = NULL;
- int ret = -1;
- gf1_cli_stats_op op = GF_CLI_STATS_NONE;
- char *opwords[] = { "start", "stop", "info", NULL };
- char *w = NULL;
+ dict_t *dict = NULL;
+ char *volname = NULL;
+ int ret = -1;
+ gf1_cli_stats_op op = GF_CLI_STATS_NONE;
+ gf1_cli_info_op info_op = GF_CLI_INFO_NONE;
+ gf_boolean_t is_peek = _gf_false;
- GF_ASSERT (words);
- GF_ASSERT (options);
+ static char *opwords[] = {"start", "stop", "info", NULL};
+ char *w = NULL;
- dict = dict_new ();
- if (!dict)
- goto out;
+ GF_ASSERT(words);
+ GF_ASSERT(options);
- if (wordcount < 4 || wordcount >5)
- goto out;
+ dict = dict_new();
+ if (!dict)
+ goto out;
- volname = (char *)words[2];
+ if (wordcount < 4)
+ goto out;
- ret = dict_set_str (dict, "volname", volname);
- if (ret)
- goto out;
+ volname = (char *)words[2];
- w = str_getunamb (words[3], opwords);
- if (!w) {
- ret = -1;
- goto out;
- }
- if (strcmp (w, "start") == 0) {
- op = GF_CLI_STATS_START;
- } else if (strcmp (w, "stop") == 0) {
- op = GF_CLI_STATS_STOP;
- } else if (strcmp (w, "info") == 0) {
- op = GF_CLI_STATS_INFO;
- } else
- GF_ASSERT (!"opword mismatch");
+ ret = dict_set_str(dict, "volname", volname);
+ if (ret)
+ goto out;
- ret = dict_set_int32 (dict, "op", (int32_t)op);
- if (ret)
- goto out;
+ w = str_getunamb(words[3], opwords);
+ if (!w) {
+ ret = -1;
+ goto out;
+ }
- if (wordcount == 5) {
- if (!strcmp (words[4], "nfs")) {
- ret = dict_set_int32 (dict, "nfs", _gf_true);
- if (ret)
- goto out;
+ if ((strcmp(w, "start") == 0 || strcmp(w, "stop") == 0) && wordcount > 5) {
+ ret = -1;
+ goto out;
+ }
+
+ if (strcmp(w, "info") == 0 && wordcount > 7) {
+ ret = -1;
+ goto out;
+ }
+
+ if (strcmp(w, "start") == 0) {
+ op = GF_CLI_STATS_START;
+ } else if (strcmp(w, "stop") == 0) {
+ op = GF_CLI_STATS_STOP;
+ } else if (strcmp(w, "info") == 0) {
+ op = GF_CLI_STATS_INFO;
+ info_op = GF_CLI_INFO_ALL;
+ if (wordcount > 4) {
+ if (strcmp(words[4], "incremental") == 0) {
+ info_op = GF_CLI_INFO_INCREMENTAL;
+ if (wordcount > 5 && strcmp(words[5], "peek") == 0) {
+ is_peek = _gf_true;
}
+ } else if (strcmp(words[4], "cumulative") == 0) {
+ info_op = GF_CLI_INFO_CUMULATIVE;
+ } else if (strcmp(words[4], "clear") == 0) {
+ info_op = GF_CLI_INFO_CLEAR;
+ } else if (strcmp(words[4], "peek") == 0) {
+ is_peek = _gf_true;
+ }
}
+ } else
+ GF_ASSERT(!"opword mismatch");
- *options = dict;
+ ret = dict_set_int32(dict, "op", (int32_t)op);
+ if (ret)
+ goto out;
+
+ ret = dict_set_int32(dict, "info-op", (int32_t)info_op);
+ if (ret)
+ goto out;
+
+ ret = dict_set_int32(dict, "peek", is_peek);
+ if (ret)
+ goto out;
+
+ if (!strcmp(words[wordcount - 1], "nfs")) {
+ ret = dict_set_int32(dict, "nfs", _gf_true);
+ if (ret)
+ goto out;
+ }
+
+ *options = dict;
out:
- if (ret && dict)
- dict_destroy (dict);
- return ret;
+ if (ret && dict)
+ dict_unref(dict);
+ return ret;
}
int32_t
-cli_cmd_volume_top_parse (const char **words, int wordcount,
- dict_t **options)
+cli_cmd_volume_top_parse(const char **words, int wordcount, dict_t **options)
{
- dict_t *dict = NULL;
- char *volname = NULL;
- char *value = NULL;
- char *key = NULL;
- int ret = -1;
- gf1_cli_stats_op op = GF_CLI_STATS_NONE;
- gf1_cli_top_op top_op = GF_CLI_TOP_NONE;
- int32_t list_cnt = -1;
- int index = 0;
- int perf = 0;
- uint32_t blk_size = 0;
- uint32_t count = 0;
- gf_boolean_t nfs = _gf_false;
- char *delimiter = NULL;
- char *opwords[] = { "open", "read", "write", "opendir",
- "readdir", "read-perf", "write-perf",
- NULL };
- char *w = NULL;
-
- GF_ASSERT (words);
- GF_ASSERT (options);
-
- dict = dict_new ();
- if (!dict)
- goto out;
-
- if (wordcount < 4)
- goto out;
+ dict_t *dict = NULL;
+ char *volname = NULL;
+ char *value = NULL;
+ char *key = NULL;
+ int ret = -1;
+ gf1_cli_stats_op op = GF_CLI_STATS_NONE;
+ gf1_cli_top_op top_op = GF_CLI_TOP_NONE;
+ int32_t list_cnt = -1;
+ int index = 0;
+ int perf = 0;
+ int32_t blk_size = 0;
+ int count = 0;
+ gf_boolean_t nfs = _gf_false;
+ char *delimiter = NULL;
+ static char *opwords[] = {"open", "read", "write",
+ "opendir", "readdir", "read-perf",
+ "write-perf", "clear", NULL};
+ char *w = NULL;
+
+ GF_ASSERT(words);
+ GF_ASSERT(options);
+
+ dict = dict_new();
+ if (!dict)
+ goto out;
- volname = (char *)words[2];
+ if (wordcount < 4)
+ goto out;
- ret = dict_set_str (dict, "volname", volname);
- if (ret)
- goto out;
+ volname = (char *)words[2];
- op = GF_CLI_STATS_TOP;
- ret = dict_set_int32 (dict, "op", (int32_t)op);
- if (ret)
- goto out;
+ ret = dict_set_str(dict, "volname", volname);
+ if (ret)
+ goto out;
- w = str_getunamb (words[3], opwords);
- if (!w) {
- ret = -1;
- goto out;
- }
- if (strcmp (w, "open") == 0) {
- top_op = GF_CLI_TOP_OPEN;
- } else if (strcmp (w, "read") == 0) {
- top_op = GF_CLI_TOP_READ;
- } else if (strcmp (w, "write") == 0) {
- top_op = GF_CLI_TOP_WRITE;
- } else if (strcmp (w, "opendir") == 0) {
- top_op = GF_CLI_TOP_OPENDIR;
- } else if (strcmp (w, "readdir") == 0) {
- top_op = GF_CLI_TOP_READDIR;
- } else if (strcmp (w, "read-perf") == 0) {
- top_op = GF_CLI_TOP_READ_PERF;
- perf = 1;
- } else if (strcmp (w, "write-perf") == 0) {
- top_op = GF_CLI_TOP_WRITE_PERF;
- perf = 1;
- } else
- GF_ASSERT (!"opword mismatch");
- ret = dict_set_int32 (dict, "top-op", (int32_t)top_op);
- if (ret)
- goto out;
+ op = GF_CLI_STATS_TOP;
+ ret = dict_set_int32(dict, "op", (int32_t)op);
+ if (ret)
+ goto out;
- if ((wordcount > 4) && !strcmp (words[4], "nfs")) {
- nfs = _gf_true;
- ret = dict_set_int32 (dict, "nfs", nfs);
- if (ret)
- goto out;
- index = 5;
- } else {
- index = 4;
+ w = str_getunamb(words[3], opwords);
+ if (!w) {
+ ret = -1;
+ goto out;
+ }
+ if (strcmp(w, "open") == 0) {
+ top_op = GF_CLI_TOP_OPEN;
+ } else if (strcmp(w, "read") == 0) {
+ top_op = GF_CLI_TOP_READ;
+ } else if (strcmp(w, "write") == 0) {
+ top_op = GF_CLI_TOP_WRITE;
+ } else if (strcmp(w, "opendir") == 0) {
+ top_op = GF_CLI_TOP_OPENDIR;
+ } else if (strcmp(w, "readdir") == 0) {
+ top_op = GF_CLI_TOP_READDIR;
+ } else if (strcmp(w, "read-perf") == 0) {
+ top_op = GF_CLI_TOP_READ_PERF;
+ perf = 1;
+ } else if (strcmp(w, "write-perf") == 0) {
+ top_op = GF_CLI_TOP_WRITE_PERF;
+ perf = 1;
+ } else if (strcmp(w, "clear") == 0) {
+ ret = dict_set_int32(dict, "clear-stats", 1);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not set clear-stats in dict");
+ goto out;
}
+ } else
+ GF_ASSERT(!"opword mismatch");
+ ret = dict_set_int32(dict, "top-op", (int32_t)top_op);
+ if (ret)
+ goto out;
- for (; index < wordcount; index+=2) {
-
- key = (char *) words[index];
- value = (char *) words[index+1];
-
- if ( key && !value ) {
- ret = -1;
- goto out;
- }
- if (!strcmp (key, "brick")) {
- delimiter = strchr (value, ':');
- if (!delimiter || delimiter == value
- || *(delimiter+1) != '/') {
- cli_err ("wrong brick type: %s, use <HOSTNAME>:"
- "<export-dir-abs-path>", value);
- ret = -1;
- goto out;
- } else {
- ret = cli_canonicalize_path (delimiter + 1);
- if (ret)
- goto out;
- }
- ret = dict_set_str (dict, "brick", value);
-
- } else if (!strcmp (key, "list-cnt")) {
- ret = gf_is_str_int (value);
- if (!ret)
- list_cnt = atoi (value);
- if (ret || (list_cnt < 0) || (list_cnt > 100)) {
- cli_err ("list-cnt should be between 0 to 100");
- ret = -1;
- goto out;
- }
- } else if (perf && !nfs && !strcmp (key, "bs")) {
- ret = gf_is_str_int (value);
- if (!ret)
- blk_size = atoi (value);
- if (ret || (blk_size <= 0)) {
- if (blk_size < 0)
- cli_err ("block size is an invalid"
- " number");
- else
- cli_err ("block size should be an "
- "integer greater than zero");
- ret = -1;
- goto out;
- }
- ret = dict_set_uint32 (dict, "blk-size", blk_size);
- } else if (perf && !nfs && !strcmp (key, "count")) {
- ret = gf_is_str_int (value);
- if (!ret)
- count = atoi(value);
- if (ret || (count <= 0)) {
- if (count < 0)
- cli_err ("count is an invalid number");
- else
- cli_err ("count should be an integer "
- "greater than zero");
-
- ret = -1;
- goto out;
- }
- ret = dict_set_uint32 (dict, "blk-cnt", count);
- } else {
- ret = -1;
- goto out;
- }
- if (ret) {
- gf_log ("", GF_LOG_WARNING, "Dict set failed for "
- "key %s", key);
- goto out;
- }
+ if ((wordcount > 4) && !strcmp(words[4], "nfs")) {
+ nfs = _gf_true;
+ ret = dict_set_int32(dict, "nfs", nfs);
+ if (ret)
+ goto out;
+ index = 5;
+ } else {
+ index = 4;
+ }
+
+ for (; index < wordcount; index += 2) {
+ key = (char *)words[index];
+ value = (char *)words[index + 1];
+
+ if (!key || !value) {
+ ret = -1;
+ goto out;
}
- if (list_cnt == -1)
- list_cnt = 100;
- ret = dict_set_int32 (dict, "list-cnt", list_cnt);
- if (ret) {
- gf_log ("", GF_LOG_WARNING, "Dict set failed for list_cnt");
+ if (!strcmp(key, "brick")) {
+ delimiter = strchr(value, ':');
+ if (!delimiter || delimiter == value || *(delimiter + 1) != '/') {
+ cli_err(
+ "wrong brick type: %s, use <HOSTNAME>:"
+ "<export-dir-abs-path>",
+ value);
+ ret = -1;
goto out;
- }
-
- if ((blk_size > 0) ^ (count > 0)) {
- cli_err ("Need to give both 'bs' and 'count'");
+ } else {
+ ret = gf_canonicalize_path(delimiter + 1);
+ if (ret)
+ goto out;
+ }
+ ret = dict_set_str(dict, "brick", value);
+
+ } else if (!strcmp(key, "list-cnt")) {
+ ret = gf_is_str_int(value);
+ if (!ret)
+ list_cnt = atoi(value);
+ if (ret || (list_cnt < 0) || (list_cnt > 100)) {
+ cli_err("list-cnt should be between 0 to 100");
ret = -1;
goto out;
- } else if (((uint64_t)blk_size * count) > (10 * GF_UNIT_GB)) {
- cli_err ("'bs * count' value %"PRIu64" is greater than "
- "maximum allowed value of 10GB",
- ((uint64_t)blk_size * count));
+ }
+ } else if (perf && !nfs && !strcmp(key, "bs")) {
+ ret = gf_is_str_int(value);
+ if (!ret)
+ blk_size = atoi(value);
+ if (ret || (blk_size <= 0)) {
+ if (blk_size < 0)
+ cli_err(
+ "block size is an invalid"
+ " number");
+ else
+ cli_err(
+ "block size should be an "
+ "integer greater than zero");
ret = -1;
goto out;
+ }
+ ret = dict_set_uint32(dict, "blk-size", (uint32_t)blk_size);
+ } else if (perf && !nfs && !strcmp(key, "count")) {
+ ret = gf_is_str_int(value);
+ if (!ret)
+ count = atoi(value);
+ if (ret || (count <= 0)) {
+ if (count < 0)
+ cli_err("count is an invalid number");
+ else
+ cli_err(
+ "count should be an integer "
+ "greater than zero");
+
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_uint32(dict, "blk-cnt", count);
+ } else {
+ ret = -1;
+ goto out;
}
+ if (ret) {
+ gf_log("", GF_LOG_WARNING,
+ "Dict set failed for "
+ "key %s",
+ key);
+ goto out;
+ }
+ }
+ if (list_cnt == -1)
+ list_cnt = 100;
+ ret = dict_set_int32(dict, "list-cnt", list_cnt);
+ if (ret) {
+ gf_log("", GF_LOG_WARNING, "Dict set failed for list_cnt");
+ goto out;
+ }
- *options = dict;
+ if ((blk_size > 0) ^ (count > 0)) {
+ cli_err("Need to give both 'bs' and 'count'");
+ ret = -1;
+ goto out;
+ } else if (((uint64_t)blk_size * count) > (10 * GF_UNIT_GB)) {
+ cli_err("'bs * count' value %" PRIu64
+ " is greater than "
+ "maximum allowed value of 10GB",
+ ((uint64_t)blk_size * count));
+ ret = -1;
+ goto out;
+ }
+
+ *options = dict;
out:
- if (ret && dict)
- dict_destroy (dict);
- return ret;
+ if (ret && dict)
+ dict_unref(dict);
+ return ret;
}
uint32_t
-cli_cmd_get_statusop (const char *arg)
+cli_cmd_get_statusop(const char *arg)
{
- int i = 0;
- uint32_t ret = GF_CLI_STATUS_NONE;
- char *w = NULL;
- char *opwords[] = {"detail", "mem", "clients", "fd",
- "inode", "callpool", NULL};
- struct {
- char *opname;
- uint32_t opcode;
- } optable[] = {
- { "detail", GF_CLI_STATUS_DETAIL },
- { "mem", GF_CLI_STATUS_MEM },
- { "clients", GF_CLI_STATUS_CLIENTS },
- { "fd", GF_CLI_STATUS_FD },
- { "inode", GF_CLI_STATUS_INODE },
- { "callpool", GF_CLI_STATUS_CALLPOOL },
- { NULL }
- };
-
- w = str_getunamb (arg, opwords);
- if (!w) {
- gf_log ("cli", GF_LOG_DEBUG,
- "Not a status op %s", arg);
- goto out;
- }
+ int i = 0;
+ uint32_t ret = GF_CLI_STATUS_NONE;
+ char *w = NULL;
+ static char *opwords[] = {"detail", "mem", "clients", "fd", "inode",
+ "callpool", "tasks", "client-list", NULL};
+ static struct {
+ char *opname;
+ uint32_t opcode;
+ } optable[] = {{"detail", GF_CLI_STATUS_DETAIL},
+ {"mem", GF_CLI_STATUS_MEM},
+ {"clients", GF_CLI_STATUS_CLIENTS},
+ {"fd", GF_CLI_STATUS_FD},
+ {"inode", GF_CLI_STATUS_INODE},
+ {"callpool", GF_CLI_STATUS_CALLPOOL},
+ {"tasks", GF_CLI_STATUS_TASKS},
+ {"client-list", GF_CLI_STATUS_CLIENT_LIST},
+ {NULL}};
+
+ w = str_getunamb(arg, opwords);
+ if (!w) {
+ gf_log("cli", GF_LOG_DEBUG, "Not a status op %s", arg);
+ goto out;
+ }
- for (i = 0; optable[i].opname; i++) {
- if (!strcmp (w, optable[i].opname)) {
- ret = optable[i].opcode;
- break;
- }
+ for (i = 0; optable[i].opname; i++) {
+ if (!strcmp(w, optable[i].opname)) {
+ ret = optable[i].opcode;
+ break;
}
+ }
- out:
- return ret;
+out:
+ return ret;
}
int
-cli_cmd_volume_status_parse (const char **words, int wordcount,
- dict_t **options)
+cli_cmd_volume_status_parse(const char **words, int wordcount, dict_t **options)
{
- dict_t *dict = NULL;
- int ret = -1;
- uint32_t cmd = 0;
-
- GF_ASSERT (options);
+ dict_t *dict = NULL;
+ int ret = -1;
+ uint32_t cmd = 0;
- dict = dict_new ();
- if (!dict)
- goto out;
+ GF_ASSERT(options);
- switch (wordcount) {
+ dict = dict_new();
+ if (!dict)
+ goto out;
+ switch (wordcount) {
case 2:
- cmd = GF_CLI_STATUS_ALL;
- ret = 0;
- break;
+ cmd = GF_CLI_STATUS_ALL;
+ ret = 0;
+ break;
case 3:
- if (!strcmp (words[2], "all")) {
- cmd = GF_CLI_STATUS_ALL;
- ret = 0;
+ if (!strcmp(words[2], "all")) {
+ cmd = GF_CLI_STATUS_ALL;
+ ret = 0;
- } else {
- cmd = GF_CLI_STATUS_VOL;
- ret = dict_set_str (dict, "volname", (char *)words[2]);
- }
+ } else {
+ cmd = GF_CLI_STATUS_VOL;
+ ret = dict_set_str(dict, "volname", (char *)words[2]);
+ }
- break;
+ break;
case 4:
- cmd = cli_cmd_get_statusop (words[3]);
-
- if (!strcmp (words[2], "all")) {
- if (cmd == GF_CLI_STATUS_NONE) {
- cli_err ("%s is not a valid status option",
- words[3]);
- ret = -1;
- goto out;
- }
- cmd |= GF_CLI_STATUS_ALL;
- ret = 0;
+ cmd = cli_cmd_get_statusop(words[3]);
- } else {
- ret = dict_set_str (dict, "volname",
- (char *)words[2]);
- if (ret)
- goto out;
-
- if (cmd == GF_CLI_STATUS_NONE) {
- if (!strcmp (words[3], "nfs")) {
- cmd |= GF_CLI_STATUS_NFS;
- } else if (!strcmp (words[3], "shd")) {
- cmd |= GF_CLI_STATUS_SHD;
- } else {
- cmd = GF_CLI_STATUS_BRICK;
- ret = dict_set_str (dict, "brick",
- (char *)words[3]);
- }
-
- } else {
- cmd |= GF_CLI_STATUS_VOL;
- ret = 0;
- }
- }
-
- break;
-
- case 5:
- if (!strcmp (words[2], "all")) {
- cli_err ("Cannot specify brick/nfs for \"all\"");
- ret = -1;
- goto out;
- }
-
- cmd = cli_cmd_get_statusop (words[4]);
+ if (!strcmp(words[2], "all")) {
if (cmd == GF_CLI_STATUS_NONE) {
- cli_err ("%s is not a valid status option",
- words[4]);
- ret = -1;
- goto out;
+ cli_err("%s is not a valid status option", words[3]);
+ ret = -1;
+ goto out;
}
+ cmd |= GF_CLI_STATUS_ALL;
+ ret = 0;
-
- ret = dict_set_str (dict, "volname", (char *)words[2]);
+ } else {
+ ret = dict_set_str(dict, "volname", (char *)words[2]);
if (ret)
- goto out;
+ goto out;
- if (!strcmp (words[3], "nfs")) {
- if (cmd == GF_CLI_STATUS_FD ||
- cmd == GF_CLI_STATUS_DETAIL) {
- cli_err ("Detail/FD status not available"
- " for NFS Servers");
- ret = -1;
- goto out;
- }
+ if (cmd == GF_CLI_STATUS_NONE) {
+ if (!strcmp(words[3], "nfs")) {
cmd |= GF_CLI_STATUS_NFS;
- } else if (!strcmp (words[3], "shd")){
- if (cmd == GF_CLI_STATUS_FD ||
- cmd == GF_CLI_STATUS_CLIENTS ||
- cmd == GF_CLI_STATUS_DETAIL) {
- cli_err ("Detail/FD/Clients status not "
- "available for Self-heal Daemons");
- ret = -1;
- goto out;
- }
+ } else if (!strcmp(words[3], "shd")) {
cmd |= GF_CLI_STATUS_SHD;
+ } else if (!strcmp(words[3], "quotad")) {
+ cmd |= GF_CLI_STATUS_QUOTAD;
+ } else if (!strcmp(words[3], "snapd")) {
+ cmd |= GF_CLI_STATUS_SNAPD;
+ } else if (!strcmp(words[3], "bitd")) {
+ cmd |= GF_CLI_STATUS_BITD;
+ } else if (!strcmp(words[3], "scrub")) {
+ cmd |= GF_CLI_STATUS_SCRUB;
+ } else {
+ cmd = GF_CLI_STATUS_BRICK;
+ ret = dict_set_str(dict, "brick", (char *)words[3]);
+ }
+
} else {
- cmd |= GF_CLI_STATUS_BRICK;
- ret = dict_set_str (dict, "brick", (char *)words[3]);
+ cmd |= GF_CLI_STATUS_VOL;
+ ret = 0;
}
- break;
+ }
- default:
+ break;
+
+ case 5:
+ if (!strcmp(words[2], "all")) {
+ cli_err("Cannot specify brick/nfs for \"all\"");
+ ret = -1;
goto out;
- }
+ }
- if (ret)
+ cmd = cli_cmd_get_statusop(words[4]);
+ if (cmd == GF_CLI_STATUS_NONE) {
+ cli_err("%s is not a valid status option", words[4]);
+ ret = -1;
goto out;
+ }
- ret = dict_set_int32 (dict, "cmd", cmd);
- if (ret)
+ ret = dict_set_str(dict, "volname", (char *)words[2]);
+ if (ret)
goto out;
- *options = dict;
+ if (!strcmp(words[3], "nfs")) {
+ if (cmd == GF_CLI_STATUS_FD || cmd == GF_CLI_STATUS_DETAIL ||
+ cmd == GF_CLI_STATUS_TASKS) {
+ cli_err(
+ "Detail/FD/Tasks status not available"
+ " for NFS Servers");
+ ret = -1;
+ goto out;
+ }
+ cmd |= GF_CLI_STATUS_NFS;
+ } else if (!strcmp(words[3], "shd")) {
+ if (cmd == GF_CLI_STATUS_FD || cmd == GF_CLI_STATUS_CLIENTS ||
+ cmd == GF_CLI_STATUS_DETAIL || cmd == GF_CLI_STATUS_TASKS) {
+ cli_err(
+ "Detail/FD/Clients/Tasks status not "
+ "available for Self-heal Daemons");
+ ret = -1;
+ goto out;
+ }
+ cmd |= GF_CLI_STATUS_SHD;
+ } else if (!strcmp(words[3], "quotad")) {
+ if (cmd == GF_CLI_STATUS_FD || cmd == GF_CLI_STATUS_CLIENTS ||
+ cmd == GF_CLI_STATUS_DETAIL || cmd == GF_CLI_STATUS_INODE) {
+ cli_err(
+ "Detail/FD/Clients/Inode status not "
+ "available for Quota Daemon");
+ ret = -1;
+ goto out;
+ }
+ cmd |= GF_CLI_STATUS_QUOTAD;
+ } else if (!strcmp(words[3], "snapd")) {
+ if (cmd == GF_CLI_STATUS_FD || cmd == GF_CLI_STATUS_CLIENTS ||
+ cmd == GF_CLI_STATUS_DETAIL || cmd == GF_CLI_STATUS_INODE) {
+ cli_err(
+ "Detail/FD/Clients/Inode status not "
+ "available for snap daemon");
+ ret = -1;
+ goto out;
+ }
+ cmd |= GF_CLI_STATUS_SNAPD;
+ } else {
+ if (cmd == GF_CLI_STATUS_TASKS) {
+ cli_err(
+ "Tasks status not available for "
+ "bricks");
+ ret = -1;
+ goto out;
+ }
+ cmd |= GF_CLI_STATUS_BRICK;
+ ret = dict_set_str(dict, "brick", (char *)words[3]);
+ }
+ break;
- out:
- if (ret && dict)
- dict_destroy (dict);
+ default:
+ goto out;
+ }
- return ret;
+ if (ret)
+ goto out;
+
+ ret = dict_set_int32(dict, "cmd", cmd);
+ if (ret)
+ goto out;
+
+ *options = dict;
+
+out:
+ if (ret && dict)
+ dict_unref(dict);
+
+ return ret;
}
gf_boolean_t
-cli_cmd_validate_dumpoption (const char *arg, char **option)
+cli_cmd_validate_dumpoption(const char *arg, char **option)
{
- char *opwords[] = {"all", "nfs", "mem", "iobuf", "callpool", "priv",
- "fd", "inode", "history", "inodectx", "fdctx",
- NULL};
- char *w = NULL;
+ static char *opwords[] = {"all", "nfs", "mem", "iobuf", "callpool",
+ "priv", "fd", "inode", "history", "inodectx",
+ "fdctx", "quotad", NULL};
+ char *w = NULL;
+
+ w = str_getunamb(arg, opwords);
+ if (!w) {
+ gf_log("cli", GF_LOG_DEBUG, "Unknown statedump option %s", arg);
+ return _gf_false;
+ }
+ *option = w;
+ return _gf_true;
+}
- w = str_getunamb (arg, opwords);
- if (!w) {
- gf_log ("cli", GF_LOG_DEBUG, "Unknown statedump option %s",
- arg);
- return _gf_false;
+int
+cli_cmd_volume_statedump_options_parse(const char **words, int wordcount,
+ dict_t **options)
+{
+ int ret = 0;
+ int i = 0;
+ dict_t *dict = NULL;
+ int option_cnt = 0;
+ char *option = NULL;
+ char *option_str = NULL;
+ char *tmp_str = NULL;
+ char *tmp = NULL;
+ char *ip_addr = NULL;
+ char *pid = NULL;
+
+ if ((wordcount >= 5) && ((strcmp(words[3], "client")) == 0)) {
+ tmp = gf_strdup(words[4]);
+ if (!tmp) {
+ ret = -1;
+ goto out;
}
- *option = w;
- return _gf_true;
+ ip_addr = strtok(tmp, ":");
+ pid = strtok(NULL, ":");
+ if (valid_internet_address(ip_addr, _gf_true, _gf_false) && pid &&
+ gf_valid_pid(pid, strlen(pid))) {
+ ret = gf_asprintf(&option_str, "%s %s %s", words[3], ip_addr, pid);
+ if (ret < 0) {
+ goto out;
+ }
+ option_cnt = 3;
+ } else {
+ ret = -1;
+ goto out;
+ }
+ } else {
+ for (i = 3; i < wordcount; i++, option_cnt++) {
+ if (!cli_cmd_validate_dumpoption(words[i], &option)) {
+ ret = -1;
+ goto out;
+ }
+ tmp_str = option_str;
+ option_str = NULL;
+ ret = gf_asprintf(&option_str, "%s%s ", tmp_str ? tmp_str : "",
+ option);
+ GF_FREE(tmp_str);
+ if (ret < 0) {
+ goto out;
+ }
+ }
+ if (option_str && (strstr(option_str, "nfs")) &&
+ strstr(option_str, "quotad")) {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ dict = dict_new();
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ /* dynamic string in dict is freed up when dict is freed up, and hence
+ if option_str is NULL pass in an duplicate empty string to the same */
+ ret = dict_set_dynstr(dict, "options",
+ (option_str ? option_str : gf_strdup("")));
+ if (ret)
+ goto out;
+ option_str = NULL;
+
+ ret = dict_set_int32(dict, "option_cnt", option_cnt);
+ if (ret)
+ goto out;
+
+ *options = dict;
+out:
+ GF_FREE(tmp);
+ GF_FREE(option_str);
+ if (ret && dict)
+ dict_unref(dict);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, "Error parsing dumpoptions");
+ return ret;
}
int
-cli_cmd_volume_statedump_options_parse (const char **words, int wordcount,
- dict_t **options)
+cli_cmd_volume_clrlks_opts_parse(const char **words, int wordcount,
+ dict_t **options)
{
- int ret = 0;
- int i = 0;
- dict_t *dict = NULL;
- int option_cnt = 0;
- char *option = NULL;
- char option_str[100] = {0,};
+ int ret = -1;
+ int i = 0;
+ dict_t *dict = NULL;
+ char *kind_opts[4] = {"blocked", "granted", "all", NULL};
+ char *types[4] = {"inode", "entry", "posix", NULL};
+ char *free_ptr = NULL;
+
+ dict = dict_new();
+ if (!dict)
+ goto out;
- for (i = 3; i < wordcount; i++, option_cnt++) {
- if (!cli_cmd_validate_dumpoption (words[i], &option)) {
- ret = -1;
- goto out;
- }
- strncat (option_str, option, strlen (option));
- strncat (option_str, " ", 1);
+ if (strcmp(words[4], "kind"))
+ goto out;
+
+ for (i = 0; kind_opts[i]; i++) {
+ if (!strcmp(words[5], kind_opts[i])) {
+ free_ptr = gf_strdup(words[5]);
+ ret = dict_set_dynstr(dict, "kind", free_ptr);
+ if (ret)
+ goto out;
+ free_ptr = NULL;
+ break;
}
+ }
+ if (i == 3)
+ goto out;
- dict = dict_new ();
- if (!dict)
+ ret = -1;
+ for (i = 0; types[i]; i++) {
+ if (!strcmp(words[6], types[i])) {
+ free_ptr = gf_strdup(words[6]);
+ ret = dict_set_dynstr(dict, "type", free_ptr);
+ if (ret)
goto out;
+ free_ptr = NULL;
+ break;
+ }
+ }
+ if (i == 3)
+ goto out;
- ret = dict_set_dynstr (dict, "options", gf_strdup (option_str));
+ if (wordcount == 8) {
+ free_ptr = gf_strdup(words[7]);
+ ret = dict_set_dynstr(dict, "opts", free_ptr);
if (ret)
+ goto out;
+ free_ptr = NULL;
+ }
+
+ ret = 0;
+ *options = dict;
+out:
+ if (ret) {
+ GF_FREE(free_ptr);
+ dict_unref(dict);
+ }
+
+ return ret;
+}
+
+static int
+extract_hostname_path_from_token(const char *tmp_words, char **hostname,
+ char **path)
+{
+ int ret = 0;
+ char *delimiter = NULL;
+ char *tmp_host = NULL;
+ char *host_name = NULL;
+ char *words = NULL;
+ int str_len = 0;
+ *hostname = NULL;
+ *path = NULL;
+
+ str_len = strlen(tmp_words) + 1;
+ words = GF_MALLOC(str_len, gf_common_mt_char);
+ if (!words) {
+ ret = -1;
+ goto out;
+ }
+
+ snprintf(words, str_len, "%s", tmp_words);
+
+ if (validate_brick_name(words)) {
+ cli_err(
+ "Wrong brick type: %s, use <HOSTNAME>:"
+ "<export-dir-abs-path>",
+ words);
+ ret = -1;
+ goto out;
+ } else {
+ delimiter = strrchr(words, ':');
+ ret = gf_canonicalize_path(delimiter + 1);
+ if (ret) {
+ goto out;
+ } else {
+ str_len = strlen(delimiter + 1) + 1;
+ *path = GF_MALLOC(str_len, gf_common_mt_char);
+ if (!*path) {
+ ret = -1;
goto out;
+ }
+ snprintf(*path, str_len, "%s", delimiter + 1);
+ }
+ }
- ret = dict_set_int32 (dict, "option_cnt", option_cnt);
- if (ret)
+ tmp_host = gf_strdup(words);
+ if (!tmp_host) {
+ gf_log("cli", GF_LOG_ERROR, "Out of memory");
+ ret = -1;
+ goto out;
+ }
+ get_host_name(tmp_host, &host_name);
+ if (!host_name) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR,
+ "Unable to allocate "
+ "memory");
+ goto out;
+ }
+ if (!(strcmp(host_name, "localhost") && strcmp(host_name, "127.0.0.1") &&
+ strncmp(host_name, "0.", 2))) {
+ cli_err(
+ "Please provide a valid hostname/ip other "
+ "than localhost, 127.0.0.1 or loopback "
+ "address (0.0.0.0 to 0.255.255.255).");
+ ret = -1;
+ goto out;
+ }
+ if (!valid_internet_address(host_name, _gf_false, _gf_false)) {
+ cli_err(
+ "internet address '%s' does not conform to "
+ "standards",
+ host_name);
+ ret = -1;
+ goto out;
+ }
+
+ str_len = strlen(host_name) + 1;
+ *hostname = GF_MALLOC(str_len, gf_common_mt_char);
+ if (!*hostname) {
+ ret = -1;
+ goto out;
+ }
+ snprintf(*hostname, str_len, "%s", host_name);
+ ret = 0;
+
+out:
+ GF_FREE(words);
+ GF_FREE(tmp_host);
+ return ret;
+}
+
+static int
+set_hostname_path_in_dict(const char *token, dict_t *dict, int heal_op)
+{
+ char *hostname = NULL;
+ char *path = NULL;
+ int ret = 0;
+
+ ret = extract_hostname_path_from_token(token, &hostname, &path);
+ if (ret)
+ goto out;
+
+ switch (heal_op) {
+ case GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK:
+ ret = dict_set_dynstr(dict, "heal-source-hostname", hostname);
+ if (ret)
+ goto out;
+ hostname = NULL;
+ ret = dict_set_dynstr(dict, "heal-source-brickpath", path);
+ if (ret) {
+ goto out;
+ }
+ path = NULL;
+ break;
+ case GF_SHD_OP_STATISTICS_HEAL_COUNT_PER_REPLICA:
+ ret = dict_set_dynstr(dict, "per-replica-cmd-hostname", hostname);
+ if (ret)
goto out;
+ hostname = NULL;
+ ret = dict_set_dynstr(dict, "per-replica-cmd-path", path);
+ if (ret) {
+ goto out;
+ }
+ path = NULL;
+ break;
+ default:
+ ret = -1;
+ break;
+ }
- *options = dict;
out:
- if (ret && dict)
- dict_destroy (dict);
- if (ret)
- gf_log ("cli", GF_LOG_ERROR, "Error parsing dumpoptions");
- return ret;
+ GF_FREE(hostname);
+ GF_FREE(path);
+ return ret;
+}
+
+static int
+heal_command_type_get(const char *command)
+{
+ int i = 0;
+ /* subcommands are set as NULL */
+ char *heal_cmds[GF_SHD_OP_HEAL_DISABLE + 1] = {
+ [GF_SHD_OP_INVALID] = NULL,
+ [GF_SHD_OP_HEAL_INDEX] = NULL,
+ [GF_SHD_OP_HEAL_FULL] = "full",
+ [GF_SHD_OP_INDEX_SUMMARY] = "info",
+ [GF_SHD_OP_SPLIT_BRAIN_FILES] = NULL,
+ [GF_SHD_OP_STATISTICS] = "statistics",
+ [GF_SHD_OP_STATISTICS_HEAL_COUNT] = NULL,
+ [GF_SHD_OP_STATISTICS_HEAL_COUNT_PER_REPLICA] = NULL,
+ [GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE] = NULL,
+ [GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK] = NULL,
+ [GF_SHD_OP_HEAL_ENABLE] = "enable",
+ [GF_SHD_OP_HEAL_DISABLE] = "disable",
+ };
+
+ for (i = 0; i <= GF_SHD_OP_HEAL_DISABLE; i++) {
+ if (heal_cmds[i] && (strcmp(heal_cmds[i], command) == 0))
+ return i;
+ }
+
+ return GF_SHD_OP_INVALID;
}
int
-cli_cmd_volume_clrlks_opts_parse (const char **words, int wordcount,
+cli_cmd_volume_heal_options_parse(const char **words, int wordcount,
dict_t **options)
{
- int ret = -1;
- int i = 0;
- dict_t *dict = NULL;
- char *kind_opts[4] = {"blocked", "granted", "all", NULL};
- char *types[4] = {"inode", "entry", "posix", NULL};
- char *free_ptr = NULL;
+ int ret = 0;
+ dict_t *dict = NULL;
+ gf_xl_afr_op_t op = GF_SHD_OP_INVALID;
- dict = dict_new ();
- if (!dict)
- goto out;
+ dict = dict_new();
+ if (!dict) {
+ gf_log(THIS->name, GF_LOG_ERROR, "Failed to create the dict");
+ ret = -1;
+ goto out;
+ }
- if (strcmp (words[4], "kind"))
- goto out;
+ ret = dict_set_str(dict, "volname", (char *)words[2]);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR, "failed to set volname");
+ goto out;
+ }
+
+ if (wordcount == 3) {
+ ret = dict_set_int32(dict, "heal-op", GF_SHD_OP_HEAL_INDEX);
+ goto done;
+ }
+
+ if (wordcount == 4) {
+ op = heal_command_type_get(words[3]);
+ if (op == GF_SHD_OP_INVALID) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_int32(dict, "heal-op", op);
+ goto done;
+ }
- for (i = 0; kind_opts[i]; i++) {
- if (!strcmp (words[5], kind_opts[i])) {
- free_ptr = gf_strdup (words[5]);
- ret = dict_set_dynstr (dict, "kind", free_ptr);
- if (ret)
- goto out;
- free_ptr = NULL;
- break;
- }
+ if (wordcount == 5) {
+ if (strcmp(words[3], "info") && strcmp(words[3], "statistics") &&
+ strcmp(words[3], "granular-entry-heal")) {
+ ret = -1;
+ goto out;
}
- if (i == 3)
- goto out;
+ if (!strcmp(words[3], "info")) {
+ if (!strcmp(words[4], "split-brain")) {
+ ret = dict_set_int32(dict, "heal-op",
+ GF_SHD_OP_SPLIT_BRAIN_FILES);
+ goto done;
+ }
+ if (!strcmp(words[4], "summary")) {
+ ret = dict_set_int32(dict, "heal-op", GF_SHD_OP_HEAL_SUMMARY);
+ goto done;
+ }
+ }
+
+ if (!strcmp(words[3], "statistics")) {
+ if (!strcmp(words[4], "heal-count")) {
+ ret = dict_set_int32(dict, "heal-op",
+ GF_SHD_OP_STATISTICS_HEAL_COUNT);
+ goto done;
+ }
+ }
+
+ if (!strcmp(words[3], "granular-entry-heal")) {
+ if (!strcmp(words[4], "enable")) {
+ ret = dict_set_int32(dict, "heal-op",
+ GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE);
+ goto done;
+ } else if (!strcmp(words[4], "disable")) {
+ ret = dict_set_int32(dict, "heal-op",
+ GF_SHD_OP_GRANULAR_ENTRY_HEAL_DISABLE);
+ goto done;
+ }
+ }
+
+ ret = -1;
+ goto out;
+ }
+ if (wordcount == 6) {
+ if (strcmp(words[3], "split-brain")) {
+ ret = -1;
+ goto out;
+ }
+ if (!strcmp(words[4], "bigger-file")) {
+ ret = dict_set_int32(dict, "heal-op",
+ GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE);
+ if (ret)
+ goto out;
+ ret = dict_set_str(dict, "file", (char *)words[5]);
+ if (ret)
+ goto out;
+ goto done;
+ }
+ if (!strcmp(words[4], "latest-mtime")) {
+ ret = dict_set_int32(dict, "heal-op",
+ GF_SHD_OP_SBRAIN_HEAL_FROM_LATEST_MTIME);
+ if (ret)
+ goto out;
+ ret = dict_set_str(dict, "file", (char *)words[5]);
+ if (ret)
+ goto out;
+ goto done;
+ }
+ if (!strcmp(words[4], "source-brick")) {
+ ret = dict_set_int32(dict, "heal-op",
+ GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK);
+ if (ret)
+ goto out;
+ ret = set_hostname_path_in_dict(words[5], dict,
+ GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK);
+ if (ret)
+ goto out;
+ goto done;
+ }
ret = -1;
- for (i = 0; types[i]; i++) {
- if (!strcmp (words[6], types[i])) {
- free_ptr = gf_strdup (words[6]);
- ret = dict_set_dynstr (dict, "type", free_ptr);
- if (ret)
- goto out;
- free_ptr = NULL;
- break;
- }
+ goto out;
+ }
+ if (wordcount == 7) {
+ if (!strcmp(words[3], "statistics") &&
+ !strcmp(words[4], "heal-count") && !strcmp(words[5], "replica")) {
+ ret = dict_set_int32(dict, "heal-op",
+ GF_SHD_OP_STATISTICS_HEAL_COUNT_PER_REPLICA);
+ if (ret)
+ goto out;
+ ret = set_hostname_path_in_dict(
+ words[6], dict, GF_SHD_OP_STATISTICS_HEAL_COUNT_PER_REPLICA);
+ if (ret)
+ goto out;
+ goto done;
}
- if (i == 3)
+ if (!strcmp(words[3], "split-brain") &&
+ !strcmp(words[4], "source-brick")) {
+ ret = dict_set_int32(dict, "heal-op",
+ GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK);
+ ret = set_hostname_path_in_dict(words[5], dict,
+ GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK);
+ if (ret)
+ goto out;
+ ret = dict_set_str(dict, "file", (char *)words[6]);
+ if (ret)
goto out;
+ goto done;
+ }
+ }
+ ret = -1;
+ goto out;
+done:
+ *options = dict;
+out:
+ if (ret && dict) {
+ dict_unref(dict);
+ *options = NULL;
+ }
- if (wordcount == 8) {
- free_ptr = gf_strdup (words[7]);
- ret = dict_set_dynstr (dict, "opts", free_ptr);
- if (ret)
- goto out;
- free_ptr = NULL;
+ return ret;
+}
+
+int
+cli_cmd_volume_defrag_parse(const char **words, int wordcount, dict_t **options)
+{
+ dict_t *dict = NULL;
+ int ret = -1;
+ char *option = NULL;
+ char *volname = NULL;
+ char *command = NULL;
+ gf_cli_defrag_type cmd = 0;
+
+ GF_ASSERT(words);
+ GF_ASSERT(options);
+
+ dict = dict_new();
+ if (!dict)
+ goto out;
+
+ if (!((wordcount == 4) || (wordcount == 5)))
+ goto out;
+
+ if (wordcount == 4) {
+ if (strcmp(words[3], "start") && strcmp(words[3], "stop") &&
+ strcmp(words[3], "status"))
+ goto out;
+ } else {
+ if (strcmp(words[3], "fix-layout") && strcmp(words[3], "start"))
+ goto out;
+ }
+
+ volname = (char *)words[2];
+
+ if (wordcount == 4) {
+ command = (char *)words[3];
+ }
+ if (wordcount == 5) {
+ if ((strcmp(words[3], "fix-layout") || strcmp(words[4], "start")) &&
+ (strcmp(words[3], "start") || strcmp(words[4], "force"))) {
+ ret = -1;
+ goto out;
+ }
+ command = (char *)words[3];
+ option = (char *)words[4];
+ }
+
+ if (strcmp(command, "start") == 0) {
+ cmd = GF_DEFRAG_CMD_START;
+ if (option && strcmp(option, "force") == 0) {
+ cmd = GF_DEFRAG_CMD_START_FORCE;
+ }
+ goto done;
+ }
+
+ if (strcmp(command, "fix-layout") == 0) {
+ cmd = GF_DEFRAG_CMD_START_LAYOUT_FIX;
+ goto done;
+ }
+ if (strcmp(command, "stop") == 0) {
+ cmd = GF_DEFRAG_CMD_STOP;
+ goto done;
+ }
+ if (strcmp(command, "status") == 0) {
+ cmd = GF_DEFRAG_CMD_STATUS;
+ }
+
+done:
+ ret = dict_set_str(dict, "volname", volname);
+
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR, "failed to set dict");
+ goto out;
+ }
+
+ ret = dict_set_int32(dict, "rebalance-command", (int32_t)cmd);
+
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR, "failed to set dict");
+ goto out;
+ }
+
+ *options = dict;
+
+out:
+ if (ret && dict)
+ dict_unref(dict);
+
+ return ret;
+}
+
+int32_t
+cli_snap_create_desc_parse(dict_t *dict, const char **words, size_t wordcount,
+ int32_t desc_opt_loc)
+{
+ int32_t ret = -1;
+ char *desc = NULL;
+ int32_t desc_len = 0;
+ int len;
+
+ desc = GF_MALLOC(MAX_SNAP_DESCRIPTION_LEN + 1, gf_common_mt_char);
+ if (!desc) {
+ ret = -1;
+ goto out;
+ }
+
+ len = strlen(words[desc_opt_loc]);
+ if (len >= MAX_SNAP_DESCRIPTION_LEN) {
+ cli_out(
+ "snapshot create: description truncated: "
+ "Description provided is longer than 1024 characters");
+ desc_len = MAX_SNAP_DESCRIPTION_LEN;
+ } else {
+ desc_len = len;
+ }
+
+ snprintf(desc, desc_len + 1, "%s", words[desc_opt_loc]);
+ /* Calculating the size of the description as given by the user */
+
+ ret = dict_set_dynstr(dict, "description", desc);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Unable to save snap "
+ "description");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret && desc)
+ GF_FREE(desc);
+
+ return ret;
+}
+
+/* Function to check whether the Volume name is repeated */
+int
+cli_check_if_volname_repeated(const char **words, unsigned int start_index,
+ uint64_t cur_index)
+{
+ uint64_t i = -1;
+ int ret = 0;
+
+ GF_ASSERT(words);
+
+ for (i = start_index; i < cur_index; i++) {
+ if (strcmp(words[i], words[cur_index]) == 0) {
+ ret = -1;
+ goto out;
+ }
+ }
+out:
+ return ret;
+}
+
+/* snapshot clone <clonename> <snapname>
+ * @arg-0, dict : Request Dictionary to be sent to server side.
+ * @arg-1, words : Contains individual words of CLI command.
+ * @arg-2, wordcount: Contains number of words present in the CLI command.
+ *
+ * return value : -1 on failure
+ * 0 on success
+ */
+int
+cli_snap_clone_parse(dict_t *dict, const char **words, int wordcount)
+{
+ uint64_t i = 0;
+ int ret = -1;
+ char *clonename = NULL;
+ unsigned int cmdi = 2;
+ /* cmdi is command index, here cmdi is "2" (gluster snapshot clone)*/
+
+ GF_ASSERT(words);
+ GF_ASSERT(dict);
+
+ if (wordcount == cmdi + 1) {
+ cli_err("Invalid Syntax.");
+ gf_log("cli", GF_LOG_ERROR,
+ "Invalid number of words for snap clone command");
+ goto out;
+ }
+
+ if (strlen(words[cmdi]) >= GLUSTERD_MAX_SNAP_NAME) {
+ cli_err(
+ "snapshot clone: failed: clonename cannot exceed "
+ "255 characters.");
+ gf_log("cli", GF_LOG_ERROR, "Clone name too long");
+
+ goto out;
+ }
+
+ clonename = (char *)words[cmdi];
+ for (i = 0; i < strlen(clonename); i++) {
+ /* Following volume name convention */
+ if (!isalnum(clonename[i]) &&
+ (clonename[i] != '_' && (clonename[i] != '-'))) {
+ /* TODO : Is this message enough?? */
+ cli_err(
+ "Clonename can contain only alphanumeric, "
+ "\"-\" and \"_\" characters");
+ goto out;
+ }
+ }
+
+ ret = dict_set_int32(dict, "volcount", 1);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not save volcount");
+ goto out;
+ }
+
+ ret = dict_set_str(dict, "clonename", (char *)words[cmdi]);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Could not save clone "
+ "name(%s)",
+ (char *)words[cmdi]);
+ goto out;
+ }
+
+ /* Filling snap name in the dictionary */
+ ret = dict_set_str(dict, "snapname", (char *)words[cmdi + 1]);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Could not "
+ "save snap name(%s)",
+ (char *)words[cmdi + 1]);
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+/* snapshot create <snapname> <vol-name(s)> [description <description>]
+ * [force]
+ * @arg-0, dict : Request Dictionary to be sent to server side.
+ * @arg-1, words : Contains individual words of CLI command.
+ * @arg-2, wordcount: Contains number of words present in the CLI command.
+ *
+ * return value : -1 on failure
+ * 0 on success
+ */
+int
+cli_snap_create_parse(dict_t *dict, const char **words, int wordcount)
+{
+ uint64_t i = 0;
+ int ret = -1;
+ uint64_t volcount = 0;
+ char key[PATH_MAX] = "";
+ char *snapname = NULL;
+ unsigned int cmdi = 2;
+ int flags = 0;
+ /* cmdi is command index, here cmdi is "2" (gluster snapshot create)*/
+
+ GF_ASSERT(words);
+ GF_ASSERT(dict);
+
+ if (wordcount <= cmdi + 1) {
+ cli_err("Invalid Syntax.");
+ gf_log("cli", GF_LOG_ERROR, "Too less words for snap create command");
+ goto out;
+ }
+
+ if (strlen(words[cmdi]) >= GLUSTERD_MAX_SNAP_NAME) {
+ cli_err(
+ "snapshot create: failed: snapname cannot exceed "
+ "255 characters.");
+ gf_log("cli", GF_LOG_ERROR, "Snapname too long");
+
+ goto out;
+ }
+
+ snapname = (char *)words[cmdi];
+ for (i = 0; i < strlen(snapname); i++) {
+ /* Following volume name convention */
+ if (!isalnum(snapname[i]) &&
+ (snapname[i] != '_' && (snapname[i] != '-'))) {
+ /* TODO : Is this message enough?? */
+ cli_err(
+ "Snapname can contain only alphanumeric, "
+ "\"-\" and \"_\" characters");
+ goto out;
+ }
+ }
+
+ ret = dict_set_str(dict, "snapname", (char *)words[cmdi]);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Could not save snap "
+ "name(%s)",
+ (char *)words[cmdi]);
+ goto out;
+ }
+
+ /* Filling volume name in the dictionary */
+ for (i = cmdi + 1;
+ i < wordcount && (strcmp(words[i], "description")) != 0 &&
+ (strcmp(words[i], "force") != 0) &&
+ (strcmp(words[i], "no-timestamp") != 0);
+ i++) {
+ volcount++;
+ /* volume index starts from 1 */
+ ret = snprintf(key, sizeof(key), "volname%" PRIu64, volcount);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_set_str(dict, key, (char *)words[i]);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Could not "
+ "save volume name(%s)",
+ (char *)words[i]);
+ goto out;
+ }
+
+ if (i >= cmdi + 2) {
+ ret = -1;
+ cli_err(
+ "Creating multiple volume snapshot is not "
+ "supported as of now");
+ goto out;
+ }
+ /* TODO : remove this above condition check once
+ * multiple volume snapshot is supported */
+ }
+
+ if (volcount == 0) {
+ ret = -1;
+ cli_err("Please provide the volume name");
+ gf_log("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ ret = dict_set_int32(dict, "volcount", volcount);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not save volcount");
+ goto out;
+ }
+
+ /* Verify how we got out of "for" loop,
+ * if it is by reaching wordcount limit then goto "out",
+ * because we need not parse for "description","force" and
+ * "no-timestamp" after this.
+ */
+ if (i == wordcount) {
+ goto out;
+ }
+
+ if (strcmp(words[i], "no-timestamp") == 0) {
+ ret = dict_set_int32n(dict, "no-timestamp", SLEN("no-timestamp"), 1);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Could not save "
+ "time-stamp option");
+ }
+ if (i == (wordcount - 1))
+ goto out;
+ i++;
+ }
+
+ if ((strcmp(words[i], "description")) == 0) {
+ ++i;
+ if (i > (wordcount - 1)) {
+ ret = -1;
+ cli_err("Please provide a description");
+ gf_log("cli", GF_LOG_ERROR, "Description not provided");
+ goto out;
+ }
+
+ ret = cli_snap_create_desc_parse(dict, words, wordcount, i);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Could not save snap "
+ "description");
+ goto out;
+ }
+
+ if (i == (wordcount - 1))
+ goto out;
+ i++;
+ /* point the index to next word.
+ * As description might be follwed by force option.
+ * Before that, check if wordcount limit is reached
+ */
+ }
+
+ if (strcmp(words[i], "force") == 0) {
+ flags = GF_CLI_FLAG_OP_FORCE;
+
+ } else {
+ ret = -1;
+ cli_err("Invalid Syntax.");
+ gf_log("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ /* Check if the command has anything after "force" keyword */
+ if (++i < wordcount) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ if (ret == 0) {
+ /*Adding force flag in either of the case i.e force set
+ * or unset*/
+ ret = dict_set_int32(dict, "flags", flags);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Could not save "
+ "snap force option");
}
+ }
+ return ret;
+}
+/* snapshot list [volname]
+ * @arg-0, dict : Request Dictionary to be sent to server side.
+ * @arg-1, words : Contains individual words of CLI command.
+ * @arg-2, wordcount: Contains number of words present in the CLI command.
+ *
+ * return value : -1 on failure
+ * 0 on success
+ */
+int
+cli_snap_list_parse(dict_t *dict, const char **words, int wordcount)
+{
+ int ret = -1;
+
+ GF_ASSERT(words);
+ GF_ASSERT(dict);
+
+ if (wordcount < 2 || wordcount > 3) {
+ gf_log("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ if (wordcount == 2) {
ret = 0;
- *options = dict;
+ goto out;
+ }
+
+ ret = dict_set_str(dict, "volname", (char *)words[2]);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to save volname in dictionary");
+ goto out;
+ }
out:
- if (ret) {
- GF_FREE (free_ptr);
- dict_unref (dict);
- }
+ return ret;
+}
- return ret;
+/* snapshot info [(snapname | volume <volname>)]
+ * @arg-0, dict : Request Dictionary to be sent to server side.
+ * @arg-1, words : Contains individual words of CLI command.
+ * @arg-2, wordcount: Contains number of words present in the CLI command.
+ *
+ * return value : -1 on failure
+ * 0 on success
+ */
+int
+cli_snap_info_parse(dict_t *dict, const char **words, int wordcount)
+{
+ int ret = -1;
+ int32_t cmd = GF_SNAP_INFO_TYPE_ALL;
+ unsigned int cmdi = 2;
+ /* cmdi is command index, here cmdi is "2" (gluster snapshot info)*/
+
+ GF_ASSERT(words);
+ GF_ASSERT(dict);
+
+ if (wordcount > 4 || wordcount < cmdi) {
+ gf_log("cli", GF_LOG_ERROR, "Invalid syntax");
+ goto out;
+ }
+
+ if (wordcount == cmdi) {
+ ret = 0;
+ goto out;
+ }
+
+ /* If 3rd word is not "volume", then it must
+ * be snapname.
+ */
+ if (strcmp(words[cmdi], "volume") != 0) {
+ ret = dict_set_str(dict, "snapname", (char *)words[cmdi]);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Unable to save "
+ "snapname %s",
+ words[cmdi]);
+ goto out;
+ }
+
+ /* Once snap name is parsed, if we encounter any other
+ * word then fail it. Invalid Syntax.
+ * example : snapshot info <snapname> word
+ */
+ if ((cmdi + 1) != wordcount) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ cmd = GF_SNAP_INFO_TYPE_SNAP;
+ ret = 0;
+ goto out;
+ /* No need to continue the parsing once we
+ * get the snapname
+ */
+ }
+
+ /* If 3rd word is "volume", then check if next word
+ * is present. As, "snapshot info volume" is an
+ * invalid command.
+ */
+ if ((cmdi + 1) == wordcount) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ ret = dict_set_str(dict, "volname", (char *)words[wordcount - 1]);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Could not save "
+ "volume name %s",
+ words[wordcount - 1]);
+ goto out;
+ }
+ cmd = GF_SNAP_INFO_TYPE_VOL;
+out:
+ if (ret == 0) {
+ ret = dict_set_int32(dict, "sub-cmd", cmd);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Could not save "
+ "type of snapshot info");
+ }
+ }
+ return ret;
}
+/* snapshot restore <snapname>
+ * @arg-0, dict : Request Dictionary to be sent to server side.
+ * @arg-1, words : Contains individual words of CLI command.
+ * @arg-2, wordcount: Contains number of words present in the CLI command.
+ *
+ * return value : -1 on failure
+ * 0 on success
+ */
int
-cli_cmd_volume_heal_options_parse (const char **words, int wordcount,
- dict_t **options)
+cli_snap_restore_parse(dict_t *dict, const char **words, int wordcount,
+ struct cli_state *state)
{
- int ret = 0;
- dict_t *dict = NULL;
+ int ret = -1;
+ const char *question = NULL;
+ gf_answer_t answer = GF_ANSWER_NO;
- dict = dict_new ();
- if (!dict)
- goto out;
+ GF_ASSERT(words);
+ GF_ASSERT(dict);
+
+ if (wordcount != 3) {
+ gf_log("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ ret = dict_set_str(dict, "snapname", (char *)words[2]);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to save snap-name %s", words[2]);
+ goto out;
+ }
+
+ question =
+ "Restore operation will replace the "
+ "original volume with the snapshotted volume. "
+ "Do you still want to continue?";
+
+ answer = cli_cmd_get_confirmation(state, question);
+ if (GF_ANSWER_NO == answer) {
+ ret = 1;
+ gf_log("cli", GF_LOG_ERROR,
+ "User cancelled a snapshot "
+ "restore operation for snap %s",
+ (char *)words[2]);
+ goto out;
+ }
+out:
+ return ret;
+}
+
+/* snapshot activate <snapname> [force]
+ * @arg-0, dict : Request Dictionary to be sent to server side.
+ * @arg-1, words : Contains individual words of CLI command.
+ * @arg-2, wordcount: Contains number of words present in the CLI command.
+ *
+ * return value : -1 on failure
+ * 0 on success
+ */
+int
+cli_snap_activate_parse(dict_t *dict, const char **words, int wordcount)
+{
+ int ret = -1;
+ int flags = 0;
+
+ GF_ASSERT(words);
+ GF_ASSERT(dict);
+
+ if ((wordcount < 3) || (wordcount > 4)) {
+ gf_log("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ ret = dict_set_str(dict, "snapname", (char *)words[2]);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to save snap-name %s", words[2]);
+ goto out;
+ }
+
+ if (wordcount == 4) {
+ if (!strcmp("force", (char *)words[3])) {
+ flags = GF_CLI_FLAG_OP_FORCE;
+ } else {
+ gf_log("cli", GF_LOG_ERROR, "Invalid option");
+ ret = -1;
+ goto out;
+ }
+ }
+ ret = dict_set_int32(dict, "flags", flags);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to save force option");
+ goto out;
+ }
+out:
+ return ret;
+}
+
+/* snapshot deactivate <snapname>
+ * @arg-0, dict : Request Dictionary to be sent to server side.
+ * @arg-1, words : Contains individual words of CLI command.
+ * @arg-2, wordcount: Contains number of words present in the CLI command.
+ *
+ * return value : -1 on failure
+ * 0 on success
+ * 1 if user cancelled the request
+ */
+int
+cli_snap_deactivate_parse(dict_t *dict, const char **words, int wordcount,
+ struct cli_state *state)
+{
+ int ret = -1;
+ gf_answer_t answer = GF_ANSWER_NO;
+ const char *question =
+ "Deactivating snap will make its "
+ "data inaccessible. Do you want to "
+ "continue?";
+
+ GF_ASSERT(words);
+ GF_ASSERT(dict);
+
+ if ((wordcount != 3)) {
+ gf_log("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ ret = dict_set_str(dict, "snapname", (char *)words[2]);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to save snap-name %s", words[2]);
+ goto out;
+ }
+
+ answer = cli_cmd_get_confirmation(state, question);
+ if (GF_ANSWER_NO == answer) {
+ ret = 1;
+ gf_log("cli", GF_LOG_DEBUG,
+ "User cancelled "
+ "snapshot deactivate operation");
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+/* snapshot delete (all | snapname | volume <volname>)
+ * @arg-0, dict : Request Dictionary to be sent to server side.
+ * @arg-1, words : Contains individual words of CLI command.
+ * @arg-2, wordcount: Contains number of words present in the CLI command.
+ *
+ * return value : -1 on failure
+ * 0 on success
+ * 1 if user cancel the operation
+ */
+int
+cli_snap_delete_parse(dict_t *dict, const char **words, int wordcount,
+ struct cli_state *state)
+{
+ int ret = -1;
+ const char *question = NULL;
+ int32_t cmd = -1;
+ unsigned int cmdi = 2;
+ gf_answer_t answer = GF_ANSWER_NO;
+
+ GF_ASSERT(words);
+ GF_ASSERT(dict);
+
+ if (wordcount > 4 || wordcount <= cmdi) {
+ gf_log("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ question =
+ "Deleting snap will erase all the information about "
+ "the snap. Do you still want to continue?";
+
+ if (strcmp(words[cmdi], "all") == 0) {
+ ret = 0;
+ cmd = GF_SNAP_DELETE_TYPE_ALL;
+ } else if (strcmp(words[cmdi], "volume") == 0) {
+ if (++cmdi == wordcount) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ ret = dict_set_str(dict, "volname", (char *)words[cmdi]);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Could not save "
+ "volume name %s",
+ words[wordcount - 1]);
+ goto out;
+ }
+ cmd = GF_SNAP_DELETE_TYPE_VOL;
+ } else {
+ ret = dict_set_str(dict, "snapname", (char *)words[cmdi]);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Unable to save "
+ "snapname %s",
+ words[2]);
+ goto out;
+ }
+ cmd = GF_SNAP_DELETE_TYPE_SNAP;
+ }
+
+ if ((cmdi + 1) != wordcount) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ if (cmd == GF_SNAP_DELETE_TYPE_SNAP) {
+ answer = cli_cmd_get_confirmation(state, question);
+ if (GF_ANSWER_NO == answer) {
+ ret = 1;
+ gf_log("cli", GF_LOG_DEBUG,
+ "User cancelled "
+ "snapshot delete operation for snap %s",
+ (char *)words[2]);
+ goto out;
+ }
+ }
+
+ ret = dict_set_int32(dict, "sub-cmd", cmd);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Could not save "
+ "type of snapshot delete");
+ }
+out:
+ return ret;
+}
+
+/* snapshot status [(snapname | volume <volname>)]
+ * @arg-0, dict : Request Dictionary to be sent to server side.
+ * @arg-1, words : Contains individual words of CLI command.
+ * @arg-2, wordcount: Contains number of words present in the CLI command.
+ *
+ * return value : -1 on failure
+ * 0 on success
+ */
+int
+cli_snap_status_parse(dict_t *dict, const char **words, int wordcount)
+{
+ int ret = -1;
+ int32_t cmd = GF_SNAP_STATUS_TYPE_ALL;
+ unsigned int cmdi = 2;
+ /* cmdi is command index, here cmdi is "2" (gluster snapshot status)*/
+
+ GF_ASSERT(words);
+ GF_ASSERT(dict);
+
+ if (wordcount > 4 || wordcount < cmdi) {
+ gf_log("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ if (wordcount == cmdi) {
+ ret = 0;
+ goto out;
+ }
+
+ /* if 3rd word is not "volume", then it must be "snapname"
+ */
+ if (strcmp(words[cmdi], "volume") != 0) {
+ ret = dict_set_str(dict, "snapname", (char *)words[cmdi]);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Count not save "
+ "snap name %s",
+ words[cmdi]);
+ goto out;
+ }
+
+ if ((cmdi + 1) != wordcount) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ ret = 0;
+ cmd = GF_SNAP_STATUS_TYPE_SNAP;
+ goto out;
+ }
+
+ /* If 3rd word is "volume", then check if next word is present.
+ * As, "snapshot info volume" is an invalid command
+ */
+ if ((cmdi + 1) == wordcount) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ ret = dict_set_str(dict, "volname", (char *)words[wordcount - 1]);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Count not save "
+ "volume name %s",
+ words[wordcount - 1]);
+ goto out;
+ }
+ cmd = GF_SNAP_STATUS_TYPE_VOL;
+
+out:
+ if (ret == 0) {
+ ret = dict_set_int32(dict, "sub-cmd", cmd);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Could not save cmd "
+ "of snapshot status");
+ }
+ }
+
+ return ret;
+}
+
+/* return value:
+ * -1 in case of failure.
+ * 0 in case of success.
+ */
+int32_t
+cli_snap_config_limit_parse(const char **words, dict_t *dict,
+ unsigned int wordcount, unsigned int index,
+ char *key)
+{
+ int ret = -1;
+ int limit = 0;
+ char *end_ptr = NULL;
+
+ GF_ASSERT(words);
+ GF_ASSERT(dict);
+ GF_ASSERT(key);
+
+ if (index >= wordcount) {
+ ret = -1;
+ cli_err("Please provide a value for %s.", key);
+ gf_log("cli", GF_LOG_ERROR, "Value not provided for %s", key);
+ goto out;
+ }
+
+ limit = strtol(words[index], &end_ptr, 10);
+
+ if (limit <= 0 || strcmp(end_ptr, "") != 0) {
+ ret = -1;
+ cli_err(
+ "Please enter an integer value "
+ "greater than zero for %s",
+ key);
+ goto out;
+ }
+
+ ret = dict_set_int32(dict, key, limit);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Could not set "
+ "%s in dictionary",
+ key);
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc(dict, "globalname", "All");
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not set global key");
+ goto out;
+ }
+ ret = dict_set_int32(dict, "hold_global_locks", _gf_true);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not set global locks");
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+/* function cli_snap_config_parse
+ * Config Syntax : gluster snapshot config [volname]
+ * [snap-max-hard-limit <count>]
+ * [snap-max-soft-limit <count>]
+ *
+ return value: <0 on failure
+ 1 if user cancels the operation, or limit value is out of
+ range
+ 0 on success
+
+ NOTE : snap-max-soft-limit can only be set for system.
+*/
+int32_t
+cli_snap_config_parse(const char **words, int wordcount, dict_t *dict,
+ struct cli_state *state)
+{
+ int ret = -1;
+ gf_answer_t answer = GF_ANSWER_NO;
+ gf_boolean_t vol_presence = _gf_false;
+ struct snap_config_opt_vals_ *conf_vals = NULL;
+ int8_t hard_limit = 0;
+ int8_t soft_limit = 0;
+ int8_t config_type = -1;
+ const char *question = NULL;
+ unsigned int cmdi = 2;
+ /* cmdi is command index, here cmdi is "2" (gluster snapshot config)*/
+
+ GF_ASSERT(words);
+ GF_ASSERT(dict);
+ GF_ASSERT(state);
+
+ if ((wordcount < 2) || (wordcount > 7)) {
+ gf_log("cli", GF_LOG_ERROR, "Invalid wordcount(%d)", wordcount);
+ goto out;
+ }
+
+ if (wordcount == 2) {
+ config_type = GF_SNAP_CONFIG_DISPLAY;
+ ret = 0;
+ goto set;
+ }
+
+ /* auto-delete cannot be a volume name */
+ /* Check whether the 3rd word is volname */
+ if (strcmp(words[cmdi], "snap-max-hard-limit") != 0 &&
+ strcmp(words[cmdi], "snap-max-soft-limit") != 0 &&
+ strcmp(words[cmdi], "auto-delete") != 0 &&
+ strcmp(words[cmdi], "activate-on-create") != 0) {
+ ret = dict_set_str(dict, "volname", (char *)words[cmdi]);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to set volname");
+ goto out;
+ }
+ cmdi++;
+ vol_presence = _gf_true;
+
+ if (cmdi == wordcount) {
+ config_type = GF_SNAP_CONFIG_DISPLAY;
+ ret = 0;
+ goto set;
+ }
+ }
+
+ config_type = GF_SNAP_CONFIG_TYPE_SET;
+
+ if (strcmp(words[cmdi], "snap-max-hard-limit") == 0) {
+ ret = cli_snap_config_limit_parse(words, dict, wordcount, ++cmdi,
+ "snap-max-hard-limit");
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to parse snap "
+ "config hard limit");
+ goto out;
+ }
+ hard_limit = 1;
+
+ if (++cmdi == wordcount) {
+ ret = 0;
+ goto set;
+ }
+ }
+
+ if (strcmp(words[cmdi], "snap-max-soft-limit") == 0) {
+ if (vol_presence == 1) {
+ ret = -1;
+ cli_err(
+ "Soft limit cannot be set to individual "
+ "volumes.");
+ gf_log("cli", GF_LOG_ERROR,
+ "Soft limit cannot be "
+ "set to volumes");
+ goto out;
+ }
+
+ ret = cli_snap_config_limit_parse(words, dict, wordcount, ++cmdi,
+ "snap-max-soft-limit");
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to parse snap "
+ "config soft limit");
+ goto out;
+ }
+
+ if (++cmdi != wordcount) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+ soft_limit = 1;
+ }
+
+ if (hard_limit || soft_limit)
+ goto set;
+
+ if (strcmp(words[cmdi], "auto-delete") == 0) {
+ if (vol_presence == 1) {
+ ret = -1;
+ cli_err(
+ "As of now, auto-delete option cannot be set "
+ "to volumes");
+ gf_log("cli", GF_LOG_ERROR,
+ "auto-delete option "
+ "cannot be set to volumes");
+ goto out;
+ }
+
+ if (++cmdi >= wordcount) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ ret = dict_set_str(dict, "auto-delete", (char *)words[cmdi]);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to set "
+ "value of auto-delete in request "
+ "dictionary");
+ goto out;
+ }
+
+ if (++cmdi != wordcount) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+ } else if (strcmp(words[cmdi], "activate-on-create") == 0) {
+ if (vol_presence == 1) {
+ ret = -1;
+ cli_err(
+ "As of now, activate-on-create option "
+ "cannot be set to volumes");
+ gf_log("cli", GF_LOG_ERROR,
+ "activate-on-create "
+ "option cannot be set to volumes");
+ goto out;
+ }
+
+ if (++cmdi >= wordcount) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ ret = dict_set_str(dict, "snap-activate-on-create",
+ (char *)words[cmdi]);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to set value "
+ "of activate-on-create in request dictionary");
+ goto out;
+ }
+
+ if (++cmdi != wordcount) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+ } else {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ ret = 0; /* Success */
+
+set:
+ ret = dict_set_int32(dict, "config-command", config_type);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Unable to set "
+ "config-command");
+ goto out;
+ }
+
+ if (config_type == GF_SNAP_CONFIG_TYPE_SET && (hard_limit || soft_limit)) {
+ conf_vals = snap_confopt_vals;
+ if (hard_limit && soft_limit) {
+ question = conf_vals[GF_SNAP_CONFIG_SET_BOTH].question;
+ } else if (soft_limit) {
+ question = conf_vals[GF_SNAP_CONFIG_SET_SOFT].question;
+ } else if (hard_limit) {
+ question = conf_vals[GF_SNAP_CONFIG_SET_HARD].question;
+ }
+
+ answer = cli_cmd_get_confirmation(state, question);
+ if (GF_ANSWER_NO == answer) {
+ ret = 1;
+ gf_log("cli", GF_LOG_DEBUG,
+ "User cancelled "
+ "snapshot config operation");
+ }
+ }
+
+out:
+ return ret;
+}
- ret = dict_set_str (dict, "volname", (char *) words[2]);
+int
+validate_op_name(const char *op, const char *opname, char **opwords)
+{
+ int ret = -1;
+ int i = 0;
+
+ GF_ASSERT(opname);
+ GF_ASSERT(opwords);
+
+ for (i = 0; opwords[i] != NULL; i++) {
+ if (strcmp(opwords[i], opname) == 0) {
+ cli_out("\"%s\" cannot be a %s", opname, op);
+ goto out;
+ }
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+int32_t
+cli_cmd_snapshot_parse(const char **words, int wordcount, dict_t **options,
+ struct cli_state *state)
+{
+ int32_t ret = -1;
+ dict_t *dict = NULL;
+ gf1_cli_snapshot type = GF_SNAP_OPTION_TYPE_NONE;
+ char *w = NULL;
+ static char *opwords[] = {"create", "delete", "restore", "activate",
+ "deactivate", "list", "status", "config",
+ "info", "clone", NULL};
+ static char *invalid_snapnames[] = {"description", "force", "volume", "all",
+ NULL};
+ static char *invalid_volnames[] = {"volume",
+ "type",
+ "subvolumes",
+ "option",
+ "end-volume",
+ "all",
+ "volume_not_in_ring",
+ "description",
+ "force",
+ "snap-max-hard-limit",
+ "snap-max-soft-limit",
+ "auto-delete",
+ "activate-on-create",
+ NULL};
+
+ GF_ASSERT(words);
+ GF_ASSERT(options);
+ GF_ASSERT(state);
+
+ dict = dict_new();
+ if (!dict)
+ goto out;
+
+ /* Lowest wordcount possible */
+ if (wordcount < 2) {
+ gf_log("", GF_LOG_ERROR, "Invalid command: Not enough arguments");
+ goto out;
+ }
+
+ w = str_getunamb(words[1], opwords);
+ if (!w) {
+ /* Checks if the operation is a valid operation */
+ gf_log("", GF_LOG_ERROR, "Opword Mismatch");
+ goto out;
+ }
+
+ if (!strcmp(w, "create")) {
+ type = GF_SNAP_OPTION_TYPE_CREATE;
+ } else if (!strcmp(w, "list")) {
+ type = GF_SNAP_OPTION_TYPE_LIST;
+ } else if (!strcmp(w, "info")) {
+ type = GF_SNAP_OPTION_TYPE_INFO;
+ } else if (!strcmp(w, "delete")) {
+ type = GF_SNAP_OPTION_TYPE_DELETE;
+ } else if (!strcmp(w, "config")) {
+ type = GF_SNAP_OPTION_TYPE_CONFIG;
+ } else if (!strcmp(w, "restore")) {
+ type = GF_SNAP_OPTION_TYPE_RESTORE;
+ } else if (!strcmp(w, "status")) {
+ type = GF_SNAP_OPTION_TYPE_STATUS;
+ } else if (!strcmp(w, "activate")) {
+ type = GF_SNAP_OPTION_TYPE_ACTIVATE;
+ } else if (!strcmp(w, "deactivate")) {
+ type = GF_SNAP_OPTION_TYPE_DEACTIVATE;
+ } else if (!strcmp(w, "clone")) {
+ type = GF_SNAP_OPTION_TYPE_CLONE;
+ }
+
+ if (type != GF_SNAP_OPTION_TYPE_CONFIG &&
+ type != GF_SNAP_OPTION_TYPE_STATUS) {
+ ret = dict_set_int32(dict, "hold_snap_locks", _gf_true);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Unable to set hold-snap-locks value "
+ "as _gf_true");
+ goto out;
+ }
+ }
+
+ /* Following commands does not require volume locks */
+ if (type == GF_SNAP_OPTION_TYPE_STATUS ||
+ type == GF_SNAP_OPTION_TYPE_ACTIVATE ||
+ type == GF_SNAP_OPTION_TYPE_DEACTIVATE) {
+ ret = dict_set_int32(dict, "hold_vol_locks", _gf_false);
if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "failed to set volname");
+ gf_log("cli", GF_LOG_ERROR,
+ "Setting volume lock "
+ "flag failed");
+ goto out;
+ }
+ }
+
+ /* Check which op is intended */
+ switch (type) {
+ case GF_SNAP_OPTION_TYPE_CREATE:
+ /* Syntax :
+ * gluster snapshot create <snapname> <vol-name(s)>
+ * [no-timestamp]
+ * [description <description>]
+ * [force]
+ */
+ /* In cases where the snapname is not given then
+ * parsing fails & snapname cannot be "description",
+ * "force" and "volume", that check is made here
+ */
+ if (wordcount == 2) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ ret = validate_op_name("snapname", words[2], invalid_snapnames);
+ if (ret) {
goto out;
+ }
+
+ ret = cli_snap_create_parse(dict, words, wordcount);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "create command parsing failed.");
+ goto out;
+ }
+ break;
+
+ case GF_SNAP_OPTION_TYPE_CLONE:
+ /* Syntax :
+ * gluster snapshot clone <clonename> <snapname>
+ */
+ /* In cases where the clonename is not given then
+ * parsing fails & snapname cannot be "description",
+ * "force" and "volume", that check is made here
+ */
+ if (wordcount == 2) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ ret = validate_op_name("clonename", words[2], invalid_volnames);
+ if (ret) {
+ goto out;
+ }
+
+ ret = cli_snap_clone_parse(dict, words, wordcount);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "clone command parsing failed.");
+ goto out;
+ }
+ break;
+
+ case GF_SNAP_OPTION_TYPE_INFO:
+ /* Syntax :
+ * gluster snapshot info [(snapname] | [vol <volname>)]
+ */
+ ret = cli_snap_info_parse(dict, words, wordcount);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to parse "
+ "snapshot info command");
+ goto out;
+ }
+ break;
+
+ case GF_SNAP_OPTION_TYPE_LIST:
+ /* Syntax :
+ * gluster snaphsot list [volname]
+ */
+
+ ret = cli_snap_list_parse(dict, words, wordcount);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to parse "
+ "snapshot list command");
+ goto out;
+ }
+ break;
+
+ case GF_SNAP_OPTION_TYPE_DELETE:
+ /* Syntax :
+ * snapshot delete (all | snapname | volume <volname>)
+ */
+ ret = cli_snap_delete_parse(dict, words, wordcount, state);
+ if (ret) {
+ /* A positive ret value means user cancelled
+ * the command */
+ if (ret < 0) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to parse "
+ "snapshot delete command");
+ }
+ goto out;
+ }
+ break;
+
+ case GF_SNAP_OPTION_TYPE_CONFIG:
+ /* snapshot config [volname] [snap-max-hard-limit <count>]
+ * [snap-max-soft-limit <percent>] */
+ ret = cli_snap_config_parse(words, wordcount, dict, state);
+ if (ret) {
+ if (ret < 0)
+ gf_log("cli", GF_LOG_ERROR,
+ "config command parsing failed.");
+ goto out;
+ }
+
+ ret = dict_set_int32(dict, "type", GF_SNAP_OPTION_TYPE_CONFIG);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Unable to set "
+ "config type");
+ ret = -1;
+ goto out;
+ }
+ break;
+
+ case GF_SNAP_OPTION_TYPE_STATUS: {
+ /* Syntax :
+ * gluster snapshot status [(snapname |
+ * volume <volname>)]
+ */
+ ret = cli_snap_status_parse(dict, words, wordcount);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to parse "
+ "snapshot status command");
+ goto out;
+ }
+ break;
}
- if (wordcount == 3) {
- ret = dict_set_int32 (dict, "heal-op", GF_AFR_OP_HEAL_INDEX);
- goto done;
+ case GF_SNAP_OPTION_TYPE_RESTORE:
+ /* Syntax:
+ * snapshot restore <snapname>
+ */
+ ret = cli_snap_restore_parse(dict, words, wordcount, state);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to parse "
+ "restore command");
+ goto out;
+ }
+ break;
+
+ case GF_SNAP_OPTION_TYPE_ACTIVATE:
+ /* Syntax:
+ * snapshot activate <snapname> [force]
+ */
+ ret = cli_snap_activate_parse(dict, words, wordcount);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to parse "
+ "start command");
+ goto out;
+ }
+ break;
+ case GF_SNAP_OPTION_TYPE_DEACTIVATE:
+ /* Syntax:
+ * snapshot deactivate <snapname>
+ */
+ ret = cli_snap_deactivate_parse(dict, words, wordcount, state);
+ if (ret) {
+ /* A positive ret value means user cancelled
+ * the command */
+ if (ret < 0) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to parse deactivate "
+ "command");
+ }
+ goto out;
+ }
+ break;
+
+ default:
+ ret = -1;
+ gf_log("", GF_LOG_ERROR, "Opword Mismatch");
+ goto out;
+ }
+
+ ret = dict_set_int32(dict, "type", type);
+ if (ret) {
+ gf_log("", GF_LOG_ERROR, "Failed to set type.");
+ goto out;
+ }
+ /* If you got so far, input is valid */
+ ret = 0;
+out:
+ if (ret) {
+ if (dict)
+ dict_unref(dict);
+ } else
+ *options = dict;
+
+ return ret;
+}
+
+int
+cli_cmd_validate_volume(char *volname)
+{
+ int i = 0;
+ int ret = -1;
+ int volname_len;
+
+ if (volname[0] == '-')
+ return ret;
+
+ if (!strcmp(volname, "all")) {
+ cli_err("\"all\" cannot be the name of a volume.");
+ return ret;
+ }
+
+ if (strchr(volname, '/')) {
+ cli_err("Volume name should not contain \"/\" character.");
+ return ret;
+ }
+
+ volname_len = strlen(volname);
+ if (volname_len > GD_VOLUME_NAME_MAX) {
+ cli_err("Volname can not exceed %d characters.", GD_VOLUME_NAME_MAX);
+ return ret;
+ }
+
+ for (i = 0; i < volname_len; i++)
+ if (!isalnum(volname[i]) && (volname[i] != '_') &&
+ (volname[i] != '-')) {
+ cli_err(
+ "Volume name should not contain \"%c\""
+ " character.\nVolume names can only"
+ "contain alphanumeric, '-' and '_' "
+ "characters.",
+ volname[i]);
+ return ret;
+ }
+
+ ret = 0;
+
+ return ret;
+}
+
+int32_t
+cli_cmd_bitrot_parse(const char **words, int wordcount, dict_t **options)
+{
+ int32_t ret = -1;
+ char *w = NULL;
+ char *volname = NULL;
+ static char *opwords[] = {"enable", "disable", "scrub-throttle",
+ "scrub-frequency", "scrub", "signing-time",
+ "signer-threads", NULL};
+ static char *scrub_throt_values[] = {"lazy", "normal", "aggressive", NULL};
+ static char *scrub_freq_values[] = {
+ "hourly", "daily", "weekly", "biweekly", "monthly", "minute", NULL};
+ static char *scrub_values[] = {"pause", "resume", "status", "ondemand",
+ NULL};
+ dict_t *dict = NULL;
+ gf_bitrot_type type = GF_BITROT_OPTION_TYPE_NONE;
+ int32_t expiry_time = 0;
+ int32_t signer_th_count = 0;
+
+ GF_ASSERT(words);
+ GF_ASSERT(options);
+
+ /* Hack to print out bitrot help properly */
+ if ((wordcount == 3) && !(strcmp(words[2], "help"))) {
+ ret = 1;
+ return ret;
+ }
+
+ if (wordcount < 4 || wordcount > 5) {
+ gf_log("cli", GF_LOG_ERROR, "Invalid syntax");
+ goto out;
+ }
+
+ dict = dict_new();
+ if (!dict)
+ goto out;
+
+ volname = (char *)words[2];
+ if (!volname) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = cli_cmd_validate_volume(volname);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to validate volume name");
+ goto out;
+ }
+
+ ret = dict_set_str(dict, "volname", volname);
+ if (ret) {
+ cli_out("Failed to set volume name in dictionary ");
+ goto out;
+ }
+
+ w = str_getunamb(words[3], opwords);
+ if (!w) {
+ cli_out("Invalid bit rot option : %s", words[3]);
+ ret = -1;
+ goto out;
+ }
+
+ if (strcmp(w, "enable") == 0) {
+ if (wordcount == 4) {
+ type = GF_BITROT_OPTION_TYPE_ENABLE;
+ ret = 0;
+ goto set_type;
+ } else {
+ ret = -1;
+ goto out;
}
+ }
+ if (strcmp(w, "disable") == 0) {
if (wordcount == 4) {
- if (!strcmp (words[3], "full")) {
- ret = dict_set_int32 (dict, "heal-op",
- GF_AFR_OP_HEAL_FULL);
- goto done;
- } else if (!strcmp (words[3], "info")) {
- ret = dict_set_int32 (dict, "heal-op",
- GF_AFR_OP_INDEX_SUMMARY);
- goto done;
- } else {
- ret = -1;
- goto out;
- }
+ type = GF_BITROT_OPTION_TYPE_DISABLE;
+ ret = 0;
+ goto set_type;
+ } else {
+ ret = -1;
+ goto out;
}
- if (wordcount == 5) {
- if (strcmp (words[3], "info")) {
- ret = -1;
- goto out;
+ }
+
+ if (!strcmp(w, "scrub-throttle")) {
+ if (!words[4]) {
+ cli_err(
+ "Missing scrub-throttle value for bitrot "
+ "option");
+ ret = -1;
+ goto out;
+ } else {
+ w = str_getunamb(words[4], scrub_throt_values);
+ if (!w) {
+ cli_err(
+ "Invalid scrub-throttle option for "
+ "bitrot");
+ ret = -1;
+ goto out;
+ } else {
+ type = GF_BITROT_OPTION_TYPE_SCRUB_THROTTLE;
+ ret = dict_set_str(dict, "scrub-throttle-value",
+ (char *)words[4]);
+ if (ret) {
+ cli_out(
+ "Failed to set scrub-throttle "
+ "value in the dict");
+ goto out;
}
- if (!strcmp (words[4], "healed")) {
- ret = dict_set_int32 (dict, "heal-op",
- GF_AFR_OP_HEALED_FILES);
- goto done;
+ goto set_type;
+ }
+ }
+ }
+
+ if (!strcmp(words[3], "scrub-frequency")) {
+ if (!words[4]) {
+ cli_err("Missing scrub-frequency value");
+ ret = -1;
+ goto out;
+ } else {
+ w = str_getunamb(words[4], scrub_freq_values);
+ if (!w) {
+ cli_err("Invalid frequency option for bitrot");
+ ret = -1;
+ goto out;
+ } else {
+ type = GF_BITROT_OPTION_TYPE_SCRUB_FREQ;
+ ret = dict_set_str(dict, "scrub-frequency-value",
+ (char *)words[4]);
+ if (ret) {
+ cli_out(
+ "Failed to set dict for "
+ "bitrot");
+ goto out;
}
- if (!strcmp (words[4], "heal-failed")) {
- ret = dict_set_int32 (dict, "heal-op",
- GF_AFR_OP_HEAL_FAILED_FILES);
- goto done;
+ goto set_type;
+ }
+ }
+ }
+
+ if (!strcmp(words[3], "scrub")) {
+ if (!words[4]) {
+ cli_err("Missing scrub value for bitrot option");
+ ret = -1;
+ goto out;
+ } else {
+ w = str_getunamb(words[4], scrub_values);
+ if (!w) {
+ cli_err("Invalid scrub option for bitrot");
+ ret = -1;
+ goto out;
+ } else {
+ if (strcmp(words[4], "status") == 0) {
+ type = GF_BITROT_CMD_SCRUB_STATUS;
+ } else if (strcmp(words[4], "ondemand") == 0) {
+ type = GF_BITROT_CMD_SCRUB_ONDEMAND;
+ } else {
+ type = GF_BITROT_OPTION_TYPE_SCRUB;
}
- if (!strcmp (words[4], "split-brain")) {
- ret = dict_set_int32 (dict, "heal-op",
- GF_AFR_OP_SPLIT_BRAIN_FILES);
- goto done;
+ ret = dict_set_str(dict, "scrub-value", (char *)words[4]);
+ if (ret) {
+ cli_out(
+ "Failed to set dict for "
+ "bitrot");
+ goto out;
}
+ goto set_type;
+ }
+ }
+ }
+
+ if (!strcmp(words[3], "signing-time")) {
+ if (!words[4]) {
+ cli_err(
+ "Missing signing-time value for bitrot "
+ "option");
+ ret = -1;
+ goto out;
+ } else {
+ type = GF_BITROT_OPTION_TYPE_EXPIRY_TIME;
+
+ expiry_time = strtol(words[4], NULL, 0);
+ if (expiry_time < 1) {
+ cli_err(
+ "Expiry time value should not be less"
+ " than 1");
ret = -1;
goto out;
+ }
+
+ ret = dict_set_uint32(dict, "expiry-time",
+ (unsigned int)expiry_time);
+ if (ret) {
+ cli_out("Failed to set dict for bitrot");
+ goto out;
+ }
+ goto set_type;
}
+ } else if (!strcmp(words[3], "signer-threads")) {
+ if (!words[4]) {
+ cli_err(
+ "Missing signer-thread value for bitrot "
+ "option");
+ ret = -1;
+ goto out;
+ } else {
+ type = GF_BITROT_OPTION_TYPE_SIGNER_THREADS;
+
+ signer_th_count = strtol(words[4], NULL, 0);
+ if (signer_th_count < 1) {
+ cli_err("signer-thread count should not be less than 1");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_uint32(dict, "signer-threads",
+ (unsigned int)signer_th_count);
+ if (ret) {
+ cli_out("Failed to set dict for bitrot");
+ goto out;
+ }
+ goto set_type;
+ }
+ } else {
+ cli_err(
+ "Invalid option %s for bitrot. Please enter valid "
+ "bitrot option",
+ words[3]);
ret = -1;
goto out;
-done:
- *options = dict;
+ }
+set_type:
+ ret = dict_set_int32(dict, "type", type);
+ if (ret < 0)
+ goto out;
+
+ *options = dict;
+
out:
- if (ret && dict) {
- dict_unref (dict);
- *options = NULL;
- }
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to parse bitrot command");
+ if (dict)
+ dict_unref(dict);
+ }
- return ret;
+ return ret;
+}
+
+/* Parsing global option for NFS-Ganesha config
+ * gluster nfs-ganesha enable/disable */
+
+int32_t
+cli_cmd_ganesha_parse(struct cli_state *state, const char **words,
+ int wordcount, dict_t **options, char **op_errstr)
+{
+ dict_t *dict = NULL;
+ int ret = -1;
+ char *key = NULL;
+ char *value = NULL;
+ char *w = NULL;
+ static char *opwords[] = {"enable", "disable", NULL};
+ const char *question = NULL;
+ gf_answer_t answer = GF_ANSWER_NO;
+
+ GF_ASSERT(words);
+ GF_ASSERT(options);
+
+ dict = dict_new();
+
+ if (!dict)
+ goto out;
+
+ if (wordcount != 2)
+ goto out;
+
+ key = (char *)words[0];
+ value = (char *)words[1];
+
+ if (!key || !value) {
+ cli_out("Usage : nfs-ganesha <enable/disable>");
+ ret = -1;
+ goto out;
+ }
+
+ ret = gf_strip_whitespace(value, strlen(value));
+ if (ret == -1)
+ goto out;
+
+ if (strcmp(key, "nfs-ganesha")) {
+ gf_asprintf(op_errstr,
+ "Global option: error: ' %s '"
+ "is not a valid global option.",
+ key);
+ ret = -1;
+ goto out;
+ }
+
+ w = str_getunamb(value, opwords);
+ if (!w) {
+ cli_out(
+ "Invalid global option \n"
+ "Usage : nfs-ganesha <enable/disable>");
+ ret = -1;
+ goto out;
+ }
+
+ if (strcmp(value, "enable") == 0) {
+ question =
+ "Enabling NFS-Ganesha requires Gluster-NFS to be "
+ "disabled across the trusted pool. Do you "
+ "still want to continue?\n";
+ } else if (strcmp(value, "disable") == 0) {
+ question =
+ "Disabling NFS-Ganesha will tear down the entire "
+ "ganesha cluster across the trusted pool. Do you "
+ "still want to continue?\n";
+ } else {
+ ret = -1;
+ goto out;
+ }
+ answer = cli_cmd_get_confirmation(state, question);
+ if (GF_ANSWER_NO == answer) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Global operation "
+ "cancelled, exiting");
+ ret = -1;
+ goto out;
+ }
+ cli_out("This will take a few minutes to complete. Please wait ..");
+
+ ret = dict_set_str(dict, "key", key);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR, "dict set on key failed");
+ goto out;
+ }
+
+ ret = dict_set_str(dict, "value", value);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR, "dict set on value failed");
+ goto out;
+ }
+
+ ret = dict_set_str(dict, "globalname", "All");
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "dict set on global"
+ " key failed.");
+ goto out;
+ }
+
+ ret = dict_set_int32(dict, "hold_global_locks", _gf_true);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "dict set on global key "
+ "failed.");
+ goto out;
+ }
+
+ *options = dict;
+out:
+ if (ret)
+ dict_unref(dict);
+
+ return ret;
}
diff --git a/cli/src/cli-cmd-peer.c b/cli/src/cli-cmd-peer.c
index 4ac1630e557..084998701d8 100644
--- a/cli/src/cli-cmd-peer.c
+++ b/cli/src/cli-cmd-peer.c
@@ -1,258 +1,317 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdint.h>
#include <pthread.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "cli.h"
#include "cli-cmd.h"
#include "cli-mem-types.h"
#include "cli1-xdr.h"
#include "protocol-common.h"
+#include <glusterfs/events.h>
-extern struct rpc_clnt *global_rpc;
-
-extern rpc_clnt_prog_t *cli_rpc_prog;
-
-int cli_cmd_peer_help_cbk (struct cli_state *state, struct cli_cmd_word *in_word,
+int
+cli_cmd_peer_help_cbk(struct cli_state *state, struct cli_cmd_word *in_word,
const char **words, int wordcount);
int
-cli_cmd_peer_probe_cbk (struct cli_state *state, struct cli_cmd_word *word,
- const char **words, int wordcount)
+cli_cmd_peer_probe_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
{
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- dict_t *dict = NULL;
- int sent = 0;
- int parse_error = 0;
-
- if (!(wordcount == 3)) {
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
- }
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *dict = NULL;
+ int sent = 0;
+ int parse_error = 0;
+ cli_local_t *local = NULL;
+
+ if (!(wordcount == 3)) {
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_PROBE];
+
+ dict = dict_new();
+ if (!dict)
+ goto out;
+
+ ret = dict_set_str(dict, "hostname", (char *)words[2]);
+ if (ret)
+ goto out;
+
+ ret = valid_internet_address((char *)words[2], _gf_false, _gf_false);
+ if (ret == 1) {
+ ret = 0;
+ } else {
+ cli_out("%s is an invalid address", words[2]);
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ ret = -1;
+ goto out;
+ }
+ /* if (words[3]) {
+ ret = dict_set_str (dict, "port", (char *)words[3]);
+ if (ret)
+ goto out;
+ }
+ */
+
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
+ CLI_LOCAL_INIT(local, words, frame, dict);
+
+ if (proc->fn) {
+ ret = proc->fn(frame, THIS, dict);
+ }
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_PROBE];
+out:
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out("Peer probe failed");
+ }
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
+ CLI_STACK_DESTROY(frame);
- dict = dict_new ();
- if (!dict)
- goto out;
+ if (ret == 0) {
+ gf_event(EVENT_PEER_ATTACH, "host=%s", (char *)words[2]);
+ }
- ret = dict_set_str (dict, "hostname", (char *)words[2]);
- if (ret)
- goto out;
-
- ret = valid_internet_address ((char *) words[2], _gf_false);
- if (ret == 1) {
- ret = 0;
- } else {
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
- }
-/* if (words[3]) {
- ret = dict_set_str (dict, "port", (char *)words[3]);
- if (ret)
- goto out;
- }
-*/
- if (proc->fn) {
- ret = proc->fn (frame, THIS, dict);
+ return ret;
+}
+
+int
+cli_cmd_peer_deprobe_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *dict = NULL;
+ int flags = 0;
+ int sent = 0;
+ int parse_error = 0;
+ cli_local_t *local = NULL;
+ gf_answer_t answer = GF_ANSWER_NO;
+ const char *question = NULL;
+
+ if ((wordcount < 3) || (wordcount > 4)) {
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+ question =
+ "All clients mounted through the peer which is getting detached need "
+ "to be remounted using one of the other active peers in the trusted "
+ "storage pool to ensure client gets notification on any changes done "
+ "on the gluster configuration and if the same has been done do you "
+ "want to proceed?";
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_DEPROBE];
+
+ dict = dict_new();
+
+ ret = dict_set_str(dict, "hostname", (char *)words[2]);
+ if (ret)
+ goto out;
+
+ /* if (words[3]) {
+ ret = dict_set_str (dict, "port", (char *)words[3]);
+ if (ret)
+ goto out;
+ }
+ */
+ if (wordcount == 4) {
+ if (!strcmp("force", words[3]))
+ flags |= GF_CLI_FLAG_OP_FORCE;
+ else {
+ ret = -1;
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
}
+ }
+ ret = dict_set_int32(dict, "flags", flags);
+ if (ret)
+ goto out;
+ answer = cli_cmd_get_confirmation(state, question);
+ if (GF_ANSWER_NO == answer) {
+ ret = 0;
+ goto out;
+ }
+
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
+ CLI_LOCAL_INIT(local, words, frame, dict);
+
+ if (proc->fn) {
+ ret = proc->fn(frame, THIS, dict);
+ }
out:
- if (ret) {
- cli_cmd_sent_status_get (&sent);
- if ((sent == 0) && (parse_error == 0))
- cli_out ("Peer probe failed");
- }
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out("Peer detach failed");
+ }
- CLI_STACK_DESTROY (frame);
+ CLI_STACK_DESTROY(frame);
- return ret;
-}
+ if (ret == 0) {
+ gf_event(EVENT_PEER_DETACH, "host=%s", (char *)words[2]);
+ }
+ return ret;
+}
int
-cli_cmd_peer_deprobe_cbk (struct cli_state *state, struct cli_cmd_word *word,
- const char **words, int wordcount)
+cli_cmd_peer_status_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
{
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- dict_t *dict = NULL;
- int flags = 0;
- int sent = 0;
- int parse_error = 0;
-
- if ((wordcount < 3) || (wordcount > 4)) {
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
- }
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ int sent = 0;
+ int parse_error = 0;
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_DEPROBE];
+ if (wordcount != 2) {
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
+ }
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_LIST_FRIENDS];
- dict = dict_new ();
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
- ret = dict_set_str (dict, "hostname", (char *)words[2]);
- if (ret)
- goto out;
-
-/* if (words[3]) {
- ret = dict_set_str (dict, "port", (char *)words[3]);
- if (ret)
- goto out;
- }
-*/
- if (wordcount == 4) {
- if (!strcmp("force", words[3]))
- flags |= GF_CLI_FLAG_OP_FORCE;
- else {
- ret = -1;
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
- }
- }
- ret = dict_set_int32 (dict, "flags", flags);
- if (ret)
- goto out;
-
- if (proc->fn) {
- ret = proc->fn (frame, THIS, dict);
- }
+ if (proc->fn) {
+ ret = proc->fn(frame, THIS, (void *)GF_CLI_LIST_PEERS);
+ }
out:
- if (ret) {
- cli_cmd_sent_status_get (&sent);
- if ((sent == 0) && (parse_error == 0))
- cli_out ("Peer detach failed");
- }
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out("Peer status failed");
+ }
- CLI_STACK_DESTROY (frame);
+ CLI_STACK_DESTROY(frame);
- return ret;
+ return ret;
}
int
-cli_cmd_peer_status_cbk (struct cli_state *state, struct cli_cmd_word *word,
- const char **words, int wordcount)
+cli_cmd_pool_list_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
{
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- int sent = 0;
- int parse_error = 0;
-
- if (wordcount != 2) {
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
- }
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ int sent = 0;
+ int parse_error = 0;
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_LIST_FRIENDS];
+ if (wordcount != 2) {
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
+ }
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_LIST_FRIENDS];
- if (proc->fn) {
- ret = proc->fn (frame, THIS, (char *)words[1] );
- }
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
+
+ if (proc->fn) {
+ ret = proc->fn(frame, THIS, (void *)GF_CLI_LIST_POOL_NODES);
+ }
out:
- if (ret) {
- cli_cmd_sent_status_get (&sent);
- if ((sent == 0) && (parse_error == 0))
- cli_out ("Peer status failed");
- }
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_err("pool list: command execution failed");
+ }
- CLI_STACK_DESTROY (frame);
+ CLI_STACK_DESTROY(frame);
- return ret;
+ return ret;
}
struct cli_cmd cli_probe_cmds[] = {
- { "peer probe <HOSTNAME>",
- cli_cmd_peer_probe_cbk,
- "probe peer specified by <HOSTNAME>"},
+ {"peer probe { <HOSTNAME> | <IP-address> }", cli_cmd_peer_probe_cbk,
+ "probe peer specified by <HOSTNAME>"},
- { "peer detach <HOSTNAME> [force]",
- cli_cmd_peer_deprobe_cbk,
- "detach peer specified by <HOSTNAME>"},
+ {"peer detach { <HOSTNAME> | <IP-address> } [force]",
+ cli_cmd_peer_deprobe_cbk, "detach peer specified by <HOSTNAME>"},
- { "peer status",
- cli_cmd_peer_status_cbk,
- "list status of peers"},
+ {"peer status", cli_cmd_peer_status_cbk, "list status of peers"},
- { "peer help",
- cli_cmd_peer_help_cbk,
- "Help command for peer "},
+ {"peer help", cli_cmd_peer_help_cbk, "display help for peer commands"},
- { NULL, NULL, NULL }
-};
+ {"pool list", cli_cmd_pool_list_cbk,
+ "list all the nodes in the pool (including localhost)"},
+
+ {NULL, NULL, NULL}};
int
-cli_cmd_peer_help_cbk (struct cli_state *state, struct cli_cmd_word *in_word,
+cli_cmd_peer_help_cbk(struct cli_state *state, struct cli_cmd_word *in_word,
const char **words, int wordcount)
{
- struct cli_cmd *cmd = NULL;
+ struct cli_cmd *cmd = NULL;
+ struct cli_cmd *probe_cmd = NULL;
+ int count = 0;
+
+ cli_out("\ngluster peer commands");
+ cli_out("======================\n");
+ cmd = GF_MALLOC(sizeof(cli_probe_cmds), cli_mt_cli_cmd);
+ memcpy(cmd, cli_probe_cmds, sizeof(cli_probe_cmds));
+ count = (sizeof(cli_probe_cmds) / sizeof(struct cli_cmd));
+ cli_cmd_sort(cmd, count);
+ for (probe_cmd = cmd; probe_cmd->pattern; probe_cmd++)
+ cli_out("%s - %s", probe_cmd->pattern, probe_cmd->desc);
- for (cmd = cli_probe_cmds; cmd->pattern; cmd++)
- cli_out ("%s - %s", cmd->pattern, cmd->desc);
+ GF_FREE(cmd);
- return 0;
+ cli_out("\n");
+ return 0;
}
int
-cli_cmd_probe_register (struct cli_state *state)
+cli_cmd_probe_register(struct cli_state *state)
{
- int ret = 0;
- struct cli_cmd *cmd = NULL;
-
- for (cmd = cli_probe_cmds; cmd->pattern; cmd++) {
+ int ret = 0;
+ struct cli_cmd *cmd = NULL;
- ret = cli_cmd_register (&state->tree, cmd);
- if (ret)
- goto out;
- }
+ for (cmd = cli_probe_cmds; cmd->pattern; cmd++) {
+ ret = cli_cmd_register(&state->tree, cmd);
+ if (ret)
+ goto out;
+ }
out:
- return ret;
+ return ret;
}
diff --git a/cli/src/cli-cmd-snapshot.c b/cli/src/cli-cmd-snapshot.c
new file mode 100644
index 00000000000..859d6b2e40d
--- /dev/null
+++ b/cli/src/cli-cmd-snapshot.c
@@ -0,0 +1,135 @@
+/*
+ Copyright (c) 2013-2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <pthread.h>
+
+#include "cli.h"
+#include "cli-cmd.h"
+#include "cli-mem-types.h"
+
+int
+cli_cmd_snapshot_help_cbk(struct cli_state *state, struct cli_cmd_word *in_word,
+ const char **words, int wordcount);
+
+int
+cli_cmd_snapshot_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int ret = 0;
+ int parse_err = 0;
+ dict_t *options = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ cli_local_t *local = NULL;
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_SNAP];
+
+ /* Parses the command entered by the user */
+ ret = cli_cmd_snapshot_parse(words, wordcount, &options, state);
+ if (ret) {
+ if (ret < 0) {
+ cli_usage_out(word->pattern);
+ parse_err = 1;
+ } else {
+ /* User might have cancelled the snapshot operation */
+ ret = 0;
+ }
+ goto out;
+ }
+
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (frame == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ CLI_LOCAL_INIT(local, words, frame, options);
+
+ if (proc->fn)
+ ret = proc->fn(frame, THIS, options);
+
+out:
+ if (ret && parse_err == 0)
+ cli_out("Snapshot command failed");
+
+ CLI_STACK_DESTROY(frame);
+
+ return ret;
+}
+
+struct cli_cmd snapshot_cmds[] = {
+ {"snapshot help", cli_cmd_snapshot_help_cbk,
+ "display help for snapshot commands"},
+ {"snapshot create <snapname> <volname> [no-timestamp] "
+ "[description <description>] [force]",
+ cli_cmd_snapshot_cbk, "Snapshot Create."},
+ {"snapshot clone <clonename> <snapname>", cli_cmd_snapshot_cbk,
+ "Snapshot Clone."},
+ {"snapshot restore <snapname>", cli_cmd_snapshot_cbk, "Snapshot Restore."},
+ {"snapshot status [(snapname | volume <volname>)]", cli_cmd_snapshot_cbk,
+ "Snapshot Status."},
+ {"snapshot info [(snapname | volume <volname>)]", cli_cmd_snapshot_cbk,
+ "Snapshot Info."},
+ {"snapshot list [volname]", cli_cmd_snapshot_cbk, "Snapshot List."},
+ {"snapshot config [volname] ([snap-max-hard-limit <count>] "
+ "[snap-max-soft-limit <percent>]) "
+ "| ([auto-delete <enable|disable>])"
+ "| ([activate-on-create <enable|disable>])",
+ cli_cmd_snapshot_cbk, "Snapshot Config."},
+ {"snapshot delete (all | snapname | volume <volname>)",
+ cli_cmd_snapshot_cbk, "Snapshot Delete."},
+ {"snapshot activate <snapname> [force]", cli_cmd_snapshot_cbk,
+ "Activate snapshot volume."},
+ {"snapshot deactivate <snapname>", cli_cmd_snapshot_cbk,
+ "Deactivate snapshot volume."},
+ {NULL, NULL, NULL}};
+
+int
+cli_cmd_snapshot_help_cbk(struct cli_state *state, struct cli_cmd_word *in_word,
+ const char **words, int wordcount)
+{
+ struct cli_cmd *cmd = NULL;
+ struct cli_cmd *snap_cmd = NULL;
+ int count = 0;
+
+ cmd = GF_MALLOC(sizeof(snapshot_cmds), cli_mt_cli_cmd);
+ memcpy(cmd, snapshot_cmds, sizeof(snapshot_cmds));
+ count = (sizeof(snapshot_cmds) / sizeof(struct cli_cmd));
+ cli_cmd_sort(cmd, count);
+
+ cli_out("\ngluster snapshot commands");
+ cli_out("=========================\n");
+
+ for (snap_cmd = cmd; snap_cmd->pattern; snap_cmd++)
+ if (_gf_false == snap_cmd->disable)
+ cli_out("%s - %s", snap_cmd->pattern, snap_cmd->desc);
+ cli_out("\n");
+
+ GF_FREE(cmd);
+ return 0;
+}
+
+int
+cli_cmd_snapshot_register(struct cli_state *state)
+{
+ int ret = 0;
+ struct cli_cmd *cmd = NULL;
+
+ for (cmd = snapshot_cmds; cmd->pattern; cmd++) {
+ ret = cli_cmd_register(&state->tree, cmd);
+ if (ret)
+ goto out;
+ }
+out:
+ return ret;
+}
diff --git a/cli/src/cli-cmd-system.c b/cli/src/cli-cmd-system.c
index 25938b8974b..801e8f4efed 100644
--- a/cli/src/cli-cmd-system.c
+++ b/cli/src/cli-cmd-system.c
@@ -1,349 +1,624 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdint.h>
#include <pthread.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "cli.h"
#include "cli-cmd.h"
#include "cli-mem-types.h"
#include "protocol-common.h"
+int
+cli_cmd_system_help_cbk(struct cli_state *state, struct cli_cmd_word *in_word,
+ const char **words, int wordcount);
-extern struct rpc_clnt *global_rpc;
-
-extern rpc_clnt_prog_t *cli_rpc_prog;
+int
+cli_cmd_copy_file_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount);
-int cli_cmd_system_help_cbk (struct cli_state *state, struct cli_cmd_word *in_word,
- const char **words, int wordcount);
+int
+cli_cmd_sys_exec_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount);
int
-cli_cmd_getspec_cbk (struct cli_state *state, struct cli_cmd_word *word,
- const char **words, int wordcount)
+cli_cmd_getspec_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
{
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- dict_t *dict = NULL;
-
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *dict = NULL;
+
+ if (wordcount != 3) {
+ cli_usage_out(word->pattern);
+ goto out;
+ }
+
+ dict = dict_new();
+ if (!dict)
+ goto out;
+
+ ret = dict_set_str(dict, "volid", (char *)words[2]);
+ if (ret)
+ goto out;
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_GETSPEC];
+ if (proc->fn) {
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+ ret = proc->fn(frame, THIS, dict);
+ }
- dict = dict_new ();
- if (!dict)
- goto out;
+out:
+ if (!proc && ret) {
+ if (wordcount > 1)
+ cli_out("Fetching spec for volume %s failed", (char *)words[2]);
+ }
- if (wordcount != 3) {
- cli_usage_out (word->pattern);
- goto out;
- }
+ if (dict)
+ dict_unref(dict);
- ret = dict_set_str (dict, "volid", (char *)words[2]);
- if (ret)
- goto out;
+ CLI_STACK_DESTROY(frame);
+ return ret;
+}
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_GETSPEC];
- if (proc->fn) {
- ret = proc->fn (frame, THIS, dict);
+int
+cli_cmd_pmap_b2p_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *dict = NULL;
+
+ if (wordcount != 4) {
+ cli_usage_out(word->pattern);
+ goto out;
+ }
+
+ dict = dict_new();
+ if (!dict)
+ goto out;
+
+ ret = dict_set_str(dict, "brick", (char *)words[3]);
+ if (ret)
+ goto out;
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_PMAP_PORTBYBRICK];
+ if (proc->fn) {
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
}
+ ret = proc->fn(frame, THIS, dict);
+ }
out:
- if (!proc && ret) {
- if (dict)
- dict_destroy (dict);
- if (wordcount > 1)
- cli_out ("Fetching spec for volume %s failed",
- (char *)words[2]);
- }
+ if (!proc && ret) {
+ if (wordcount > 1)
+ cli_out("Fetching spec for volume %s failed", (char *)words[3]);
+ }
- return ret;
+ if (dict)
+ dict_unref(dict);
+
+ CLI_STACK_DESTROY(frame);
+ return ret;
}
int
-cli_cmd_pmap_b2p_cbk (struct cli_state *state, struct cli_cmd_word *word,
- const char **words, int wordcount)
+cli_cmd_fsm_log_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
{
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- dict_t *dict = NULL;
-
- frame = create_frame (THIS, THIS->ctx->pool);
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ char *name = "";
+
+ if ((wordcount != 4) && (wordcount != 3)) {
+ cli_usage_out(word->pattern);
+ goto out;
+ }
+
+ if (wordcount == 4)
+ name = (char *)words[3];
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_FSM_LOG];
+ if (proc && proc->fn) {
+ frame = create_frame(THIS, THIS->ctx->pool);
if (!frame)
- goto out;
-
- dict = dict_new ();
- if (!dict)
- goto out;
-
- if (wordcount != 4) {
- cli_usage_out (word->pattern);
- goto out;
- }
-
- ret = dict_set_str (dict, "brick", (char *)words[3]);
- if (ret)
- goto out;
-
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_PMAP_PORTBYBRICK];
- if (proc->fn) {
- ret = proc->fn (frame, THIS, dict);
- }
+ goto out;
+ ret = proc->fn(frame, THIS, (void *)name);
+ }
+out:
+ return ret;
+}
+int
+cli_cmd_getwd_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+
+ if (wordcount != 2) {
+ cli_usage_out(word->pattern);
+ goto out;
+ }
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_GETWD];
+ if (proc && proc->fn) {
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
+ ret = proc->fn(frame, THIS, NULL);
+ }
out:
- if (!proc && ret) {
- if (dict)
- dict_destroy (dict);
- if (wordcount > 1)
- cli_out ("Fetching spec for volume %s failed",
- (char *)words[3]);
- }
+ return ret;
+}
- return ret;
+static dict_t *
+make_seq_dict(int argc, char **argv)
+{
+ char index[] = "4294967296"; // 1<<32
+ int i = 0;
+ int len;
+ int ret = 0;
+ dict_t *dict = dict_new();
+
+ if (!dict)
+ return NULL;
+
+ for (i = 0; i < argc; i++) {
+ len = snprintf(index, sizeof(index), "%d", i);
+ ret = dict_set_strn(dict, index, len, argv[i]);
+ if (ret == -1)
+ break;
+ }
+
+ if (ret) {
+ dict_unref(dict);
+ dict = NULL;
+ }
+
+ return dict;
}
int
-cli_cmd_fsm_log_cbk (struct cli_state *state, struct cli_cmd_word *word,
- const char **words, int wordcount)
+cli_cmd_mount_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
{
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- char *name = "";
-
- if ((wordcount != 4) && (wordcount != 3)) {
- cli_usage_out (word->pattern);
- goto out;
- }
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ int ret = -1;
+ dict_t *dict = NULL;
+ void *dataa[] = {NULL, NULL};
+
+ if (wordcount < 4) {
+ cli_usage_out(word->pattern);
+ goto out;
+ }
+
+ dict = make_seq_dict(wordcount - 3, (char **)words + 3);
+ if (!dict)
+ goto out;
+
+ dataa[0] = (void *)words[2];
+ dataa[1] = dict;
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_MOUNT];
+ if (proc && proc->fn) {
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
+ ret = proc->fn(frame, THIS, dataa);
+ }
- if (wordcount == 4)
- name = (char*)words[3];
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_FSM_LOG];
- if (proc && proc->fn) {
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
- ret = proc->fn (frame, THIS, (void*)name);
- }
out:
- return ret;
+ if (dict)
+ dict_unref(dict);
+
+ if (!proc && ret)
+ cli_out("Mount command failed");
+
+ return ret;
}
int
-cli_cmd_getwd_cbk (struct cli_state *state, struct cli_cmd_word *word,
+cli_cmd_umount_cbk(struct cli_state *state, struct cli_cmd_word *word,
const char **words, int wordcount)
{
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
-
- if (wordcount != 2) {
- cli_usage_out (word->pattern);
- goto out;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ int ret = -1;
+ dict_t *dict = NULL;
+
+ if (!(wordcount == 3 ||
+ (wordcount == 4 && strcmp(words[3], "lazy") == 0))) {
+ cli_usage_out(word->pattern);
+ goto out;
+ }
+
+ dict = dict_new();
+ if (!dict)
+ goto out;
+
+ ret = dict_set_str(dict, "path", (char *)words[2]);
+ if (ret != 0)
+ goto out;
+ ret = dict_set_int32(dict, "lazy", wordcount == 4);
+ if (ret != 0)
+ goto out;
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_UMOUNT];
+ if (proc && proc->fn) {
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ gf_log(THIS->name, GF_LOG_ERROR, "failed to create frame");
+ ret = -1;
+ goto out;
}
+ ret = proc->fn(frame, THIS, dict);
+ }
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_GETWD];
- if (proc && proc->fn) {
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
- ret = proc->fn (frame, THIS, NULL);
- }
out:
- return ret;
+ if (dict)
+ dict_unref(dict);
+
+ if (!proc && ret)
+ cli_out("Umount command failed");
+
+ return ret;
}
-static dict_t *
-make_seq_dict (int argc, char **argv)
+int
+cli_cmd_uuid_get_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
{
- char index[] = "4294967296"; // 1<<32
- int i = 0;
- int ret = 0;
- dict_t *dict = dict_new ();
-
- if (!dict)
- return NULL;
-
- for (i = 0; i < argc; i++) {
- snprintf(index, sizeof(index), "%d", i);
- ret = dict_set_str (dict, index, argv[i]);
- if (ret == -1)
- break;
- }
+ int ret = -1;
+ int sent = 0;
+ int parse_error = 0;
+ dict_t *dict = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ cli_local_t *local = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ if (wordcount != 3) {
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_UUID_GET];
+ frame = create_frame(this, this->ctx->pool);
+ if (!frame)
+ goto out;
+
+ dict = dict_new();
+ if (!dict)
+ goto out;
+
+ CLI_LOCAL_INIT(local, words, frame, dict);
+ if (proc->fn)
+ ret = proc->fn(frame, this, dict);
- if (ret) {
- dict_destroy (dict);
- dict = NULL;
- }
+out:
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out("uuid get failed");
+ }
- return dict;
+ if (dict)
+ dict_unref(dict);
+
+ CLI_STACK_DESTROY(frame);
+ return ret;
}
int
-cli_cmd_mount_cbk (struct cli_state *state, struct cli_cmd_word *word,
- const char **words, int wordcount)
+cli_cmd_uuid_reset_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
{
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- int ret = -1;
- dict_t *dict = NULL;
- void *dataa[] = {NULL, NULL};
-
- if (wordcount < 4) {
- cli_usage_out (word->pattern);
- goto out;
- }
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ int sent = 0;
+ int parse_error = 0;
+ gf_answer_t answer = GF_ANSWER_NO;
+ char *question = NULL;
+ cli_local_t *local = NULL;
+ dict_t *dict = NULL;
+ xlator_t *this = NULL;
+
+ question =
+ "Resetting uuid changes the uuid of local glusterd. "
+ "Do you want to continue?";
+
+ if (wordcount != 3) {
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_UUID_RESET];
+
+ this = THIS;
+ frame = create_frame(this, this->ctx->pool);
+ if (!frame)
+ goto out;
+
+ dict = dict_new();
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+ CLI_LOCAL_INIT(local, words, frame, dict);
+ answer = cli_cmd_get_confirmation(state, question);
+
+ if (GF_ANSWER_NO == answer) {
+ ret = 0;
+ goto out;
+ }
+
+ // send NULL as argument since no dictionary is sent to glusterd
+ if (proc->fn) {
+ ret = proc->fn(frame, this, dict);
+ }
- dict = make_seq_dict (wordcount - 3, (char **)words + 3);
- if (!dict)
- goto out;
+out:
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out("uuid reset failed");
+ }
- dataa[0] = (void *)words[2];
- dataa[1] = dict;
+ if (dict)
+ dict_unref(dict);
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_MOUNT];
- if (proc && proc->fn) {
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
- ret = proc->fn (frame, THIS, dataa);
- }
+ CLI_STACK_DESTROY(frame);
- out:
- if (dict)
- dict_unref (dict);
+ return ret;
+}
- if (!proc && ret)
- cli_out ("Mount command failed");
+static struct cli_cmd cli_system_cmds[] = {
+ {"system:: getspec <VOLNAME>", cli_cmd_getspec_cbk,
+ "fetch the volume file for the volume <VOLNAME>"},
- return ret;
-}
+ {"system:: portmap brick2port <BRICK>", cli_cmd_pmap_b2p_cbk,
+ "query which port <BRICK> listens on"},
-int
-cli_cmd_umount_cbk (struct cli_state *state, struct cli_cmd_word *word,
- const char **words, int wordcount)
-{
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- int ret = -1;
- dict_t *dict = NULL;
-
- if (!(wordcount == 3 ||
- (wordcount == 4 && strcmp (words[3], "lazy") == 0))) {
- cli_usage_out (word->pattern);
- goto out;
- }
+ {"system:: fsm log [<peer-name>]", cli_cmd_fsm_log_cbk,
+ "display fsm transitions"},
- dict = dict_new ();
- if (!dict)
- goto out;
+ {"system:: getwd", cli_cmd_getwd_cbk, "query glusterd work directory"},
- ret = dict_set_str (dict, "path", (char *)words[2]);
- if (ret != 0)
- goto out;
- ret = dict_set_int32 (dict, "lazy", wordcount == 4);
- if (ret != 0)
- goto out;
+ {"system:: mount <label> <args...>", cli_cmd_mount_cbk, "request a mount"},
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_UMOUNT];
- if (proc && proc->fn) {
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
- ret = proc->fn (frame, THIS, dict);
- }
+ {"system:: umount <path> [lazy]", cli_cmd_umount_cbk, "request an umount"},
- out:
- if (dict)
- dict_unref (dict);
+ {"system:: uuid get", cli_cmd_uuid_get_cbk, "get uuid of glusterd"},
- if (!proc && ret)
- cli_out ("Umount command failed");
+ {"system:: uuid reset", cli_cmd_uuid_reset_cbk,
+ "reset the uuid of glusterd"},
- return ret;
-}
+ {"system:: help", cli_cmd_system_help_cbk,
+ "display help for system commands"},
-struct cli_cmd cli_system_cmds[] = {
- { "system:: getspec <VOLID>",
- cli_cmd_getspec_cbk,
- "fetch spec for volume <VOLID>"},
+ {"system:: copy file [<filename>]", cli_cmd_copy_file_cbk,
+ "Copy file from current node's $working_dir to "
+ "$working_dir of all cluster nodes"},
- { "system:: portmap brick2port <BRICK>",
- cli_cmd_pmap_b2p_cbk,
- "query which port <BRICK> listens on"},
+ {"system:: execute <command> <args>", cli_cmd_sys_exec_cbk,
+ "Execute the command on all the nodes "
+ "in the cluster and display their output."},
- { "system:: fsm log [<peer-name>]",
- cli_cmd_fsm_log_cbk,
- "display fsm transitions"},
+ {NULL, NULL, NULL}};
- { "system:: getwd",
- cli_cmd_getwd_cbk,
- "query glusterd work directory"},
+int
+cli_cmd_sys_exec_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ char cmd_arg_name[PATH_MAX] = "";
+ char *command = NULL;
+ char *saveptr = NULL;
+ char *tmp = NULL;
+ int ret = -1;
+ int i = -1;
+ int len;
+ int cmd_args_count = 0;
+ int in_cmd_args_count = 0;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *dict = NULL;
+ cli_local_t *local = NULL;
+
+ if ((wordcount < 3) || (words[2] == NULL)) {
+ cli_usage_out(word->pattern);
+ goto out;
+ }
+
+ command = strtok_r((char *)words[2], " ", &saveptr);
+ if (command == NULL) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to parse command");
+ goto out;
+ }
+
+ dict = dict_new();
+ if (!dict)
+ goto out;
+
+ do {
+ tmp = strtok_r(NULL, " ", &saveptr);
+ if (tmp) {
+ in_cmd_args_count++;
+ snprintf(cmd_arg_name, sizeof(cmd_arg_name), "cmd_arg_%d",
+ in_cmd_args_count);
+ ret = dict_set_str(dict, cmd_arg_name, tmp);
+ if (ret) {
+ gf_log("", GF_LOG_ERROR,
+ "Unable to set "
+ "%s in dict",
+ cmd_arg_name);
+ goto out;
+ }
+ }
+ } while (tmp);
+
+ cmd_args_count = wordcount - 3;
- { "system:: mount <label> <args...>",
- cli_cmd_mount_cbk,
- "request a mount"},
+ ret = dict_set_str(dict, "command", command);
+ if (ret) {
+ gf_log("", GF_LOG_ERROR, "Unable to set command in dict");
+ goto out;
+ }
- { "system:: umount <path> [lazy]",
- cli_cmd_umount_cbk,
- "request an umount"},
+ for (i = 1; i <= cmd_args_count; i++) {
+ in_cmd_args_count++;
+ len = snprintf(cmd_arg_name, sizeof(cmd_arg_name), "cmd_arg_%d",
+ in_cmd_args_count);
+ ret = dict_set_strn(dict, cmd_arg_name, len, (char *)words[2 + i]);
+ if (ret) {
+ gf_log("", GF_LOG_ERROR, "Unable to set %s in dict", cmd_arg_name);
+ goto out;
+ }
+ }
+
+ ret = dict_set_int32(dict, "cmd_args_count", in_cmd_args_count);
+ if (ret) {
+ gf_log("", GF_LOG_ERROR, "Unable to set cmd_args_count in dict");
+ goto out;
+ }
+
+ ret = dict_set_str(dict, "volname", "N/A");
+ if (ret) {
+ gf_log("", GF_LOG_ERROR, "Unable to set volname in dict");
+ goto out;
+ }
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_SYS_EXEC];
+ if (proc->fn) {
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ gf_log(THIS->name, GF_LOG_ERROR, "failed to create frame");
+ ret = -1;
+ goto out;
+ }
+ CLI_LOCAL_INIT(local, words, frame, dict);
+ ret = proc->fn(frame, THIS, (void *)dict);
+
+ /* proc->fn is processed synchronously, which means that the
+ * execution flow won't return here until the operation is
+ * fully processed, including any related callback. For this
+ * reason, it's safe to destroy the stack here, since no one
+ * can still be using it. Additionally, it's not easy to move
+ * the stack destroy to the callback executed after completion
+ * of the operation because there are multiple things than can
+ * fail even before having queued the callback, so we would
+ * still need to destroy the stack if proc->fn returns an
+ * error. */
+ CLI_STACK_DESTROY(frame);
+ dict = NULL;
+ }
+out:
+ if (dict != NULL) {
+ dict_unref(dict);
+ }
- { "system:: help",
- cli_cmd_system_help_cbk,
- "display help for system commands"},
+ return ret;
+}
- { NULL, NULL, NULL }
-};
+int
+cli_cmd_copy_file_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ char *filename = "";
+ dict_t *dict = NULL;
+ cli_local_t *local = NULL;
+
+ if (wordcount != 4) {
+ cli_usage_out(word->pattern);
+ goto out;
+ }
+
+ dict = dict_new();
+ if (!dict)
+ goto out;
+
+ filename = (char *)words[3];
+ ret = dict_set_str(dict, "source", filename);
+ if (ret)
+ gf_log("", GF_LOG_ERROR, "Unable to set filename in dict");
+
+ ret = dict_set_str(dict, "volname", "N/A");
+ if (ret)
+ gf_log("", GF_LOG_ERROR, "Unable to set volname in dict");
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_COPY_FILE];
+ if (proc && proc->fn) {
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ gf_log(THIS->name, GF_LOG_ERROR, "failed to create frame");
+ ret = -1;
+ goto out;
+ }
+ CLI_LOCAL_INIT(local, words, frame, dict);
+ ret = proc->fn(frame, THIS, (void *)dict);
+ }
+out:
+ return ret;
+}
int
-cli_cmd_system_help_cbk (struct cli_state *state, struct cli_cmd_word *in_word,
- const char **words, int wordcount)
+cli_cmd_system_help_cbk(struct cli_state *state, struct cli_cmd_word *in_word,
+ const char **words, int wordcount)
{
- struct cli_cmd *cmd = NULL;
+ struct cli_cmd *cmd = NULL;
+ struct cli_cmd *system_cmd = NULL;
+ int count = 0;
+
+ cmd = GF_MALLOC(sizeof(cli_system_cmds), cli_mt_cli_cmd);
+ memcpy(cmd, cli_system_cmds, sizeof(cli_system_cmds));
+ count = (sizeof(cli_system_cmds) / sizeof(struct cli_cmd));
+ cli_cmd_sort(cmd, count);
- for (cmd = cli_system_cmds; cmd->pattern; cmd++)
- cli_out ("%s - %s", cmd->pattern, cmd->desc);
+ for (system_cmd = cmd; system_cmd->pattern; system_cmd++)
+ cli_out("%s - %s", system_cmd->pattern, system_cmd->desc);
- return 0;
+ GF_FREE(cmd);
+ return 0;
}
int
-cli_cmd_system_register (struct cli_state *state)
+cli_cmd_system_register(struct cli_state *state)
{
- int ret = 0;
- struct cli_cmd *cmd = NULL;
+ int ret = 0;
+ struct cli_cmd *cmd = NULL;
- for (cmd = cli_system_cmds; cmd->pattern; cmd++) {
-
- ret = cli_cmd_register (&state->tree, cmd);
- if (ret)
- goto out;
- }
+ for (cmd = cli_system_cmds; cmd->pattern; cmd++) {
+ ret = cli_cmd_register(&state->tree, cmd);
+ if (ret)
+ goto out;
+ }
out:
- return ret;
+ return ret;
}
diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c
index d6432218804..f238851586e 100644
--- a/cli/src/cli-cmd-volume.c
+++ b/cli/src/cli-cmd-volume.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
@@ -26,1925 +16,3263 @@
#include <sys/socket.h>
#include <netdb.h>
#include <sys/types.h>
+#include <sys/wait.h>
#include <netinet/in.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "cli.h"
#include "cli-cmd.h"
#include "cli-mem-types.h"
#include "cli1-xdr.h"
-#include "run.h"
+#include <glusterfs/run.h>
+#include <glusterfs/syscall.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/events.h>
-extern struct rpc_clnt *global_rpc;
+extern rpc_clnt_prog_t cli_quotad_clnt;
-extern rpc_clnt_prog_t *cli_rpc_prog;
+static int
+gf_asprintf_append(char **string_ptr, const char *format, ...);
int
-cli_cmd_volume_help_cbk (struct cli_state *state, struct cli_cmd_word *in_word,
- const char **words, int wordcount);
+cli_cmd_volume_help_cbk(struct cli_state *state, struct cli_cmd_word *in_word,
+ const char **words, int wordcount);
int
-cli_cmd_volume_info_cbk (struct cli_state *state, struct cli_cmd_word *word,
- const char **words, int wordcount)
-{
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- cli_cmd_volume_get_ctx_t ctx = {0,};
- cli_local_t *local = NULL;
- int sent = 0;
- int parse_error = 0;
-
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_GET_VOLUME];
+cli_cmd_bitrot_help_cbk(struct cli_state *state, struct cli_cmd_word *in_word,
+ const char **words, int wordcount);
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
+int
+cli_cmd_quota_help_cbk(struct cli_state *state, struct cli_cmd_word *in_word,
+ const char **words, int wordcount);
- if ((wordcount == 2) || (wordcount == 3 &&
- !strcmp (words[2], "all"))) {
- ctx.flags = GF_CLI_GET_NEXT_VOLUME;
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_GET_NEXT_VOLUME];
- } else if (wordcount == 3) {
- ctx.flags = GF_CLI_GET_VOLUME;
- ctx.volname = (char *)words[2];
- if (strlen (ctx.volname) > 1024) {
- cli_out ("Invalid volume name");
- goto out;
- }
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_GET_VOLUME];
- } else {
- cli_usage_out (word->pattern);
- parse_error = 1;
- return -1;
+int
+cli_cmd_volume_info_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ cli_cmd_volume_get_ctx_t ctx = {
+ 0,
+ };
+ cli_local_t *local = NULL;
+ int sent = 0;
+ int parse_error = 0;
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_GET_VOLUME];
+
+ if ((wordcount == 2) || (wordcount == 3 && !strcmp(words[2], "all"))) {
+ ctx.flags = GF_CLI_GET_NEXT_VOLUME;
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_GET_NEXT_VOLUME];
+ } else if (wordcount == 3) {
+ ctx.flags = GF_CLI_GET_VOLUME;
+ ctx.volname = (char *)words[2];
+ if (strlen(ctx.volname) > GD_VOLUME_NAME_MAX) {
+ cli_out("Invalid volume name");
+ goto out;
}
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_GET_VOLUME];
+ } else {
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ return -1;
+ }
- local = cli_local_get ();
+ local = cli_local_get();
- if (!local)
- goto out;
+ if (!local)
+ goto out;
- local->get_vol.flags = ctx.flags;
- if (ctx.volname)
- local->get_vol.volname = gf_strdup (ctx.volname);
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
- frame->local = local;
+ local->get_vol.flags = ctx.flags;
+ if (ctx.volname)
+ local->get_vol.volname = gf_strdup(ctx.volname);
- if (proc->fn) {
- ret = proc->fn (frame, THIS, &ctx);
- }
+ frame->local = local;
-out:
- if (ret) {
- cli_cmd_sent_status_get (&sent);
- if ((sent == 0) && (parse_error == 0))
- cli_out ("Getting Volume information failed!");
- }
+ if (proc->fn) {
+ ret = proc->fn(frame, THIS, &ctx);
+ }
- CLI_STACK_DESTROY (frame);
+out:
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out("Getting Volume information failed!");
+ }
- return ret;
+ CLI_STACK_DESTROY(frame);
+ return ret;
}
int
-cli_cmd_sync_volume_cbk (struct cli_state *state, struct cli_cmd_word *word,
- const char **words, int wordcount)
+cli_cmd_sync_volume_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
{
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- int sent = 0;
- int parse_error = 0;
- dict_t *dict = NULL;
-
- if ((wordcount < 3) || (wordcount > 4)) {
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
- }
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ int sent = 0;
+ int parse_error = 0;
+ dict_t *dict = NULL;
+ cli_local_t *local = NULL;
+ gf_answer_t answer = GF_ANSWER_NO;
+ const char *question =
+ "Sync volume may make data "
+ "inaccessible while the sync "
+ "is in progress. Do you want "
+ "to continue?";
+
+ if ((wordcount < 3) || (wordcount > 4)) {
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
+ }
- dict = dict_new ();
- if (!dict)
- goto out;
+ dict = dict_new();
+ if (!dict)
+ goto out;
- if ((wordcount == 3) || !strcmp(words[3], "all")) {
- ret = dict_set_int32 (dict, "flags", (int32_t)
- GF_CLI_SYNC_ALL);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "failed to set"
- "flag");
- goto out;
- }
- } else {
- ret = dict_set_str (dict, "volname", (char *) words[3]);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "failed to set "
- "volume");
- goto out;
- }
+ if ((wordcount == 3) || !strcmp(words[3], "all")) {
+ ret = dict_set_int32(dict, "flags", (int32_t)GF_CLI_SYNC_ALL);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "failed to set"
+ "flag");
+ goto out;
}
-
- ret = dict_set_str (dict, "hostname", (char *) words[2]);
+ } else {
+ ret = dict_set_str(dict, "volname", (char *)words[3]);
if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "failed to set hostname");
- goto out;
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "failed to set "
+ "volume");
+ goto out;
}
+ }
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_SYNC_VOLUME];
-
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
+ ret = dict_set_str(dict, "hostname", (char *)words[2]);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR, "failed to set hostname");
+ goto out;
+ }
- if (proc->fn) {
- ret = proc->fn (frame, THIS, dict);
+ if (!(state->mode & GLUSTER_MODE_SCRIPT)) {
+ answer = cli_cmd_get_confirmation(state, question);
+ if (GF_ANSWER_NO == answer) {
+ ret = 0;
+ goto out;
}
+ }
-out:
- if (ret) {
- cli_cmd_sent_status_get (&sent);
- if ((sent == 0) && (parse_error == 0))
- cli_out ("Volume sync failed");
- }
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_SYNC_VOLUME];
- if (dict)
- dict_unref (dict);
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ gf_log(THIS->name, GF_LOG_ERROR, "failed to create frame");
+ ret = -1;
+ goto out;
+ }
- CLI_STACK_DESTROY (frame);
+ CLI_LOCAL_INIT(local, words, frame, dict);
- return ret;
-}
+ if (proc->fn) {
+ ret = proc->fn(frame, THIS, dict);
+ }
-gf_ai_compare_t
-cli_cmd_compare_addrinfo (struct addrinfo *first, struct addrinfo *next)
-{
- int ret = -1;
- struct addrinfo *tmp1 = NULL;
- struct addrinfo *tmp2 = NULL;
- char firstip[NI_MAXHOST] = {0.};
- char nextip[NI_MAXHOST] = {0,};
-
- for (tmp1 = first; tmp1 != NULL; tmp1 = tmp1->ai_next) {
- ret = getnameinfo (tmp1->ai_addr, tmp1->ai_addrlen, firstip,
- NI_MAXHOST, NULL, 0, NI_NUMERICHOST);
- if (ret)
- return GF_AI_COMPARE_ERROR;
- for (tmp2 = next; tmp2 != NULL; tmp2 = tmp2->ai_next) {
- ret = getnameinfo (tmp2->ai_addr, tmp2->ai_addrlen, nextip,
- NI_MAXHOST, NULL, 0, NI_NUMERICHOST);
- if (ret)
- return GF_AI_COMPARE_ERROR;
- if (!strcmp (firstip, nextip)) {
- return GF_AI_COMPARE_MATCH;
- }
- }
- }
- return GF_AI_COMPARE_NO_MATCH;
+out:
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out("Volume sync failed");
+ }
+
+ CLI_STACK_DESTROY(frame);
+
+ return ret;
}
-/* Check for non optimal brick order for replicate :
- * Checks if bricks belonging to a replicate volume
- * are present on the same server
- */
-int32_t
-cli_cmd_check_brick_order (struct cli_state *state, const char *bricks,
- int brick_count, int sub_count)
+int
+cli_cmd_volume_create_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
{
- int ret = -1;
- int i = 0;
- int j = 0;
- int k = 0;
- addrinfo_list_t *ai_list = NULL;
- addrinfo_list_t *ai_list_tmp1 = NULL;
- addrinfo_list_t *ai_list_tmp2 = NULL;
- char *brick = NULL;
- char *brick_list = NULL;
- char *brick_list_dup = NULL;
- char *tmpptr = NULL;
- struct addrinfo *ai_info = NULL;
- gf_answer_t answer = GF_ANSWER_NO;
- const char *failed_question = NULL;
- const char *found_question = NULL;
- failed_question = "Failed to perform brick order check. "
- "Do you want to continue creating the volume? ";
- found_question = "Multiple bricks of a replicate volume are present"
- " on the same server. This setup is not optimal.\n"
- "Do you still want to continue creating the volume? ";
-
- GF_ASSERT (bricks);
- GF_ASSERT (brick_count > 0);
- GF_ASSERT (sub_count > 0);
-
- ai_list = malloc (sizeof (addrinfo_list_t));
- ai_list->info = NULL;
- INIT_LIST_HEAD (&ai_list->list);
- brick_list = gf_strdup (bricks);
- if (brick_list == NULL) {
- gf_log ("cli", GF_LOG_DEBUG, "failed to allocate memory");
- goto check_failed;
- }
- brick_list_dup = brick_list;
- /* Resolve hostnames and get addrinfo */
- while (i < brick_count) {
- ++i;
- brick = strtok_r (brick_list, " \n", &tmpptr);
- brick_list = tmpptr;
- if (brick == NULL)
- goto check_failed;
- brick = strtok_r (brick, ":", &tmpptr);
- if (brick == NULL)
- goto check_failed;
- ret = getaddrinfo (brick, NULL, NULL, &ai_info);
- if (ret)
- goto check_failed;
- ai_list_tmp1 = malloc (sizeof (addrinfo_list_t));
- if (ai_list_tmp1 == NULL)
- goto check_failed;
- ai_list_tmp1->info = ai_info;
- list_add_tail (&ai_list_tmp1->list, &ai_list->list);
- ai_list_tmp1 = NULL;
- }
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *options = NULL;
+ int sent = 0;
+ int parse_error = 0;
+ cli_local_t *local = NULL;
+ char *trans_type = NULL;
+ char *bricks = NULL;
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_CREATE_VOLUME];
+
+ ret = cli_cmd_volume_create_parse(state, words, wordcount, &options,
+ &bricks);
+
+ if (ret) {
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
+ }
- i = 0;
- ai_list_tmp1 = list_entry (ai_list->list.next, addrinfo_list_t, list);
-
- /* Check for bad brick order */
- while (i < brick_count) {
- ++i;
- ai_info = ai_list_tmp1->info;
- ai_list_tmp1 = list_entry (ai_list_tmp1->list.next,
- addrinfo_list_t, list);
- if ( 0 == i % sub_count) {
- j = 0;
- continue;
- }
- ai_list_tmp2 = ai_list_tmp1;
- k = j;
- while (k < sub_count - 1) {
- ++k;
- ret = cli_cmd_compare_addrinfo (ai_info,
- ai_list_tmp2->info);
- if (GF_AI_COMPARE_ERROR == ret)
- goto check_failed;
- if (GF_AI_COMPARE_MATCH == ret)
- goto found_bad_brick_order;
- ai_list_tmp2 = list_entry (ai_list_tmp2->list.next,
- addrinfo_list_t, list);
- }
- ++j;
- }
- gf_log ("cli", GF_LOG_INFO, "Brick order okay");
- ret = 0;
+ ret = dict_get_str(options, "transport", &trans_type);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to get transport type");
goto out;
+ }
+
+ if (state->mode & GLUSTER_MODE_WIGNORE) {
+ ret = dict_set_int32(options, "force", _gf_true);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to set force "
+ "option");
+ goto out;
+ }
+ }
-check_failed:
- gf_log ("cli", GF_LOG_INFO, "Failed bad brick order check");
- answer = cli_cmd_get_confirmation(state, failed_question);
- if (GF_ANSWER_YES == answer)
- ret = 0;
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
goto out;
+ }
+
+ CLI_LOCAL_INIT(local, words, frame, options);
+
+ if (proc->fn) {
+ ret = proc->fn(frame, THIS, options);
+ }
-found_bad_brick_order:
- gf_log ("cli", GF_LOG_INFO, "Bad brick order found");
- answer = cli_cmd_get_confirmation (state, found_question);
- if (GF_ANSWER_YES == answer)
- ret = 0;
out:
- ai_list_tmp2 = NULL;
- i = 0;
- if (brick_list_dup)
- GF_FREE (brick_list_dup);
- list_for_each_entry (ai_list_tmp1, &ai_list->list, list) {
- if (ai_list_tmp1->info)
- freeaddrinfo (ai_list_tmp1->info);
- if (ai_list_tmp2)
- free (ai_list_tmp2);
- ai_list_tmp2 = ai_list_tmp1;
- }
- free (ai_list_tmp2);
- return ret;
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out("Volume create failed");
+ }
+
+ if (ret == 0) {
+ gf_event(EVENT_VOLUME_CREATE, "name=%s;bricks=%s", (char *)words[2],
+ bricks);
+ }
+
+ CLI_STACK_DESTROY(frame);
+ return ret;
}
int
-cli_cmd_volume_create_cbk (struct cli_state *state, struct cli_cmd_word *word,
- const char **words, int wordcount)
+cli_cmd_volume_delete_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
{
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- dict_t *options = NULL;
- int sent = 0;
- int parse_error = 0;
- char *brick_list = NULL;
- int32_t brick_count = 0;
- int32_t sub_count = 0;
- int32_t type = GF_CLUSTER_TYPE_NONE;
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ char *volname = NULL;
+ gf_answer_t answer = GF_ANSWER_NO;
+ const char *question = NULL;
+ int sent = 0;
+ int parse_error = 0;
+ cli_local_t *local = NULL;
+ dict_t *dict = NULL;
+
+ question =
+ "Deleting volume will erase all information about the volume. "
+ "Do you want to continue?";
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_DELETE_VOLUME];
+
+ if (wordcount != 3) {
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+ volname = (char *)words[2];
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_CREATE_VOLUME];
+ dict = dict_new();
+ if (!dict)
+ goto out;
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
+ ret = dict_set_str(dict, "volname", volname);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_WARNING, "dict set failed");
+ goto out;
+ }
+
+ if (!strcmp(volname, GLUSTER_SHARED_STORAGE)) {
+ question =
+ "Deleting the shared storage volume"
+ "(gluster_shared_storage), will affect features "
+ "like snapshot scheduler, geo-replication "
+ "and NFS-Ganesha. Do you still want to "
+ "continue?";
+ }
+
+ answer = cli_cmd_get_confirmation(state, question);
+ if (GF_ANSWER_NO == answer) {
+ ret = 0;
+ goto out;
+ }
- ret = cli_cmd_volume_create_parse (words, wordcount, &options);
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
- if (ret) {
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
- }
- /*Check brick order if type is replicate*/
- ret = dict_get_int32 (options, "type", &type);
- if (ret) {
- gf_log ("cli", GF_LOG_ERROR, "Could not get brick type");
- goto out;
- }
- if ((type == GF_CLUSTER_TYPE_REPLICATE) ||
- (type == GF_CLUSTER_TYPE_STRIPE_REPLICATE)) {
- if ((ret = dict_get_str (options, "bricks", &brick_list)) != 0) {
- gf_log ("cli", GF_LOG_ERROR, "Replica bricks check : "
- "Could not retrieve bricks list");
- goto out;
- }
- if ((ret = dict_get_int32 (options, "count", &brick_count)) != 0) {
- gf_log ("cli", GF_LOG_ERROR, "Replica bricks check : "
- "Could not retrieve brick count");
- goto out;
- }
- if ((ret = dict_get_int32 (options, "replica-count", &sub_count)) != 0) {
- gf_log ("cli", GF_LOG_ERROR, "Replica bricks check : "
- "Could not retrieve replica count");
- goto out;
- }
- gf_log ("cli", GF_LOG_INFO, "Replicate cluster type found."
- " Checking brick order.");
- ret = cli_cmd_check_brick_order (state, brick_list, brick_count, sub_count);
- if (ret) {
- gf_log("cli", GF_LOG_INFO, "Not creating volume because of bad brick order");
- goto out;
- }
- }
- if (proc->fn) {
- ret = proc->fn (frame, THIS, options);
- }
+ CLI_LOCAL_INIT(local, words, frame, dict);
+
+ if (proc->fn) {
+ ret = proc->fn(frame, THIS, dict);
+ }
out:
- if (options)
- dict_unref (options);
- if (ret) {
- cli_cmd_sent_status_get (&sent);
- if ((sent == 0) && (parse_error == 0))
- cli_out ("Volume create failed");
- }
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out("Volume delete failed");
+ }
- CLI_STACK_DESTROY (frame);
+ CLI_STACK_DESTROY(frame);
- return ret;
-}
+ if (ret == 0 && GF_ANSWER_YES == answer) {
+ gf_event(EVENT_VOLUME_DELETE, "name=%s", (char *)words[2]);
+ }
+ return ret;
+}
int
-cli_cmd_volume_delete_cbk (struct cli_state *state, struct cli_cmd_word *word,
- const char **words, int wordcount)
+cli_cmd_volume_start_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
{
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- char *volname = NULL;
- gf_answer_t answer = GF_ANSWER_NO;
- const char *question = NULL;
- int sent = 0;
- int parse_error = 0;
-
- question = "Deleting volume will erase all information about the volume. "
- "Do you want to continue?";
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_DELETE_VOLUME];
-
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ int sent = 0;
+ int parse_error = 0;
+ dict_t *dict = NULL;
+ int flags = 0;
+ cli_local_t *local = NULL;
+
+ if (wordcount < 3 || wordcount > 4) {
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
+ }
- if (wordcount != 3) {
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
+ if (!words[2])
+ goto out;
+
+ if (wordcount == 4) {
+ if (!strcmp("force", words[3])) {
+ flags |= GF_CLI_FLAG_OP_FORCE;
+ } else {
+ ret = -1;
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
}
+ }
- answer = cli_cmd_get_confirmation (state, question);
+ dict = dict_new();
+ if (!dict) {
+ goto out;
+ }
- if (GF_ANSWER_NO == answer) {
- ret = 0;
- goto out;
- }
+ ret = dict_set_str(dict, "volname", (char *)words[2]);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR, "dict set failed");
+ goto out;
+ }
+
+ ret = dict_set_int32(dict, "flags", flags);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR, "dict set failed");
+ goto out;
+ }
- volname = (char *)words[2];
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_START_VOLUME];
- if (proc->fn) {
- ret = proc->fn (frame, THIS, volname);
- }
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
+ CLI_LOCAL_INIT(local, words, frame, dict);
+
+ if (proc->fn) {
+ ret = proc->fn(frame, THIS, dict);
+ }
out:
- if (ret) {
- cli_cmd_sent_status_get (&sent);
- if ((sent == 0) && (parse_error == 0))
- cli_out ("Volume delete failed");
- }
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out("Volume start failed");
+ }
+
+ CLI_STACK_DESTROY(frame);
- CLI_STACK_DESTROY (frame);
+ if (ret == 0) {
+ gf_event(EVENT_VOLUME_START, "name=%s;force=%d", (char *)words[2],
+ (flags & GF_CLI_FLAG_OP_FORCE));
+ }
- return ret;
+ return ret;
}
-int
-cli_cmd_volume_start_cbk (struct cli_state *state, struct cli_cmd_word *word,
- const char **words, int wordcount)
+gf_answer_t
+cli_cmd_get_confirmation(struct cli_state *state, const char *question)
{
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- int sent = 0;
- int parse_error = 0;
- dict_t *dict = NULL;
- int flags = 0;
-
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
+ char answer[5] = {
+ '\0',
+ };
+ int flush = '\0';
+ size_t len;
- if (wordcount < 3 || wordcount > 4) {
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
- }
+ if (state->mode & GLUSTER_MODE_SCRIPT)
+ return GF_ANSWER_YES;
- dict = dict_new ();
- if (!dict) {
- goto out;
- }
+ printf("%s (y/n) ", question);
- if (!words[2])
- goto out;
+ if (fgets(answer, 4, stdin) == NULL) {
+ cli_out("gluster cli read error");
+ goto out;
+ }
- ret = dict_set_str (dict, "volname", (char *)words[2]);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "dict set failed");
- goto out;
- }
+ len = strlen(answer);
- if (wordcount == 4) {
- if (!strcmp("force", words[3])) {
- flags |= GF_CLI_FLAG_OP_FORCE;
- } else {
- ret = -1;
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
- }
- }
- ret = dict_set_int32 (dict, "flags", flags);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "dict set failed");
- goto out;
- }
+ if (len && answer[len - 1] == '\n') {
+ answer[--len] = '\0';
+ } else {
+ do {
+ flush = getchar();
+ } while (flush != '\n');
+ }
- if (ret < 0) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "failed to serialize dict");
- goto out;
- }
+ if (len > 3)
+ goto out;
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_START_VOLUME];
+ if (!strcasecmp(answer, "y") || !strcasecmp(answer, "yes"))
+ return GF_ANSWER_YES;
- if (proc->fn) {
- ret = proc->fn (frame, THIS, dict);
- }
+ else if (!strcasecmp(answer, "n") || !strcasecmp(answer, "no"))
+ return GF_ANSWER_NO;
out:
- if (dict)
- dict_unref (dict);
- if (ret) {
- cli_cmd_sent_status_get (&sent);
- if ((sent == 0) && (parse_error == 0))
- cli_out ("Volume start failed");
- }
-
- CLI_STACK_DESTROY (frame);
+ cli_out("Invalid input, please enter y/n");
- return ret;
+ return GF_ANSWER_NO;
}
-gf_answer_t
-cli_cmd_get_confirmation (struct cli_state *state, const char *question)
+int
+cli_cmd_volume_stop_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
{
- char answer[5] = {'\0', };
- char flush = '\0';
- int len = 0;
-
- if (state->mode & GLUSTER_MODE_SCRIPT)
- return GF_ANSWER_YES;
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ int flags = 0;
+ gf_answer_t answer = GF_ANSWER_NO;
+ int sent = 0;
+ int parse_error = 0;
+ dict_t *dict = NULL;
+ char *volname = NULL;
+ cli_local_t *local = NULL;
+
+ const char *question =
+ "Stopping volume will make its data inaccessible. "
+ "Do you want to continue?";
+
+ if (wordcount < 3 || wordcount > 4) {
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
+ }
- printf ("%s (y/n) ", question);
+ volname = (char *)words[2];
- if (fgets (answer, 4, stdin) == NULL) {
- cli_out("gluster cli read error");
- goto out;
+ dict = dict_new();
+ ret = dict_set_str(dict, "volname", volname);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR, "dict set failed");
+ goto out;
+ }
+
+ if (!strcmp(volname, GLUSTER_SHARED_STORAGE)) {
+ question =
+ "Stopping the shared storage volume"
+ "(gluster_shared_storage), will affect features "
+ "like snapshot scheduler, geo-replication "
+ "and NFS-Ganesha. Do you still want to "
+ "continue?";
+ }
+
+ if (wordcount == 4) {
+ if (!strcmp("force", words[3])) {
+ flags |= GF_CLI_FLAG_OP_FORCE;
+ } else {
+ ret = -1;
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
}
+ }
- len = strlen (answer);
+ ret = dict_set_int32(dict, "flags", flags);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR, "dict set failed");
+ goto out;
+ }
+
+ answer = cli_cmd_get_confirmation(state, question);
- if (answer [len - 1] == '\n'){
- answer [--len] = '\0';
- } else {
- do{
- flush = getchar ();
- }while (flush != '\n');
- }
+ if (GF_ANSWER_NO == answer) {
+ ret = 0;
+ goto out;
+ }
- if (len > 3)
- goto out;
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_STOP_VOLUME];
- if (!strcasecmp (answer, "y") || !strcasecmp (answer, "yes"))
- return GF_ANSWER_YES;
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
- else if (!strcasecmp (answer, "n") || !strcasecmp (answer, "no"))
- return GF_ANSWER_NO;
+ CLI_LOCAL_INIT(local, words, frame, dict);
-out:
- cli_out ("Invalid input, please enter y/n");
+ if (proc->fn) {
+ ret = proc->fn(frame, THIS, dict);
+ }
- return GF_ANSWER_NO;
+out:
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out("Volume stop on '%s' failed", volname);
+ }
+
+ CLI_STACK_DESTROY(frame);
+ if (dict)
+ dict_unref(dict);
+
+ if (ret == 0 && GF_ANSWER_YES == answer) {
+ gf_event(EVENT_VOLUME_STOP, "name=%s;force=%d", (char *)words[2],
+ (flags & GF_CLI_FLAG_OP_FORCE));
+ }
+
+ return ret;
}
int
-cli_cmd_volume_stop_cbk (struct cli_state *state, struct cli_cmd_word *word,
- const char **words, int wordcount)
+cli_cmd_volume_rename_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
{
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- int flags = 0;
- gf_answer_t answer = GF_ANSWER_NO;
- int sent = 0;
- int parse_error = 0;
- dict_t *dict = NULL;
- char *volname = NULL;
-
- const char *question = "Stopping volume will make its data inaccessible. "
- "Do you want to continue?";
-
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
-
- if (wordcount < 3 || wordcount > 4) {
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
- }
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *dict = NULL;
+ int sent = 0;
+ int parse_error = 0;
+
+ if (wordcount != 4) {
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
+ }
- volname = (char*) words[2];
+ dict = dict_new();
+ if (!dict)
+ goto out;
- dict = dict_new ();
- ret = dict_set_str (dict, "volname", volname);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "dict set failed");
- goto out;
- }
+ ret = dict_set_str(dict, "old-volname", (char *)words[2]);
- if (wordcount == 4) {
- if (!strcmp("force", words[3])) {
- flags |= GF_CLI_FLAG_OP_FORCE;
- } else {
- ret = -1;
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
- }
- }
- ret = dict_set_int32 (dict, "flags", flags);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "dict set failed");
- goto out;
- }
+ if (ret)
+ goto out;
- answer = cli_cmd_get_confirmation (state, question);
+ ret = dict_set_str(dict, "new-volname", (char *)words[3]);
- if (GF_ANSWER_NO == answer) {
- ret = 0;
- goto out;
- }
+ if (ret)
+ goto out;
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_STOP_VOLUME];
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_RENAME_VOLUME];
- if (proc->fn) {
- ret = proc->fn (frame, THIS, dict);
+ if (proc->fn) {
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
}
+ ret = proc->fn(frame, THIS, dict);
+ }
out:
- if (ret) {
- cli_cmd_sent_status_get (&sent);
- if ((sent == 0) && (parse_error == 0))
- cli_out ("Volume stop on '%s' failed", volname);
- }
- if (dict)
- dict_unref (dict);
+ if (dict)
+ dict_unref(dict);
- CLI_STACK_DESTROY (frame);
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out("Volume rename on '%s' failed", (char *)words[2]);
+ }
- return ret;
-}
+ CLI_STACK_DESTROY(frame);
+ return ret;
+}
int
-cli_cmd_volume_rename_cbk (struct cli_state *state, struct cli_cmd_word *word,
- const char **words, int wordcount)
+cli_cmd_volume_defrag_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
{
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- dict_t *dict = NULL;
- int sent = 0;
- int parse_error = 0;
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *dict = NULL;
+ int sent = 0;
+ int parse_error = 0;
+ cli_local_t *local = NULL;
+#if (USE_EVENTS)
+ eventtypes_t event = EVENT_LAST;
+#endif
+#ifdef GF_SOLARIS_HOST_OS
+ cli_out("Command not supported on Solaris");
+ goto out;
+#endif
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
+ ret = cli_cmd_volume_defrag_parse(words, wordcount, &dict);
- dict = dict_new ();
- if (!dict)
- goto out;
+ if (ret) {
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ }
- if (wordcount != 4) {
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
- }
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_DEFRAG_VOLUME];
- ret = dict_set_str (dict, "old-volname", (char *)words[2]);
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
- if (ret)
- goto out;
+ CLI_LOCAL_INIT(local, words, frame, dict);
- ret = dict_set_str (dict, "new-volname", (char *)words[3]);
+ if (proc->fn) {
+ ret = proc->fn(frame, THIS, dict);
+ }
- if (ret)
- goto out;
+out:
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out("Volume rebalance failed");
+ } else {
+#if (USE_EVENTS)
+ if (!(strcmp(words[wordcount - 1], "start")) ||
+ !(strcmp(words[wordcount - 1], "force"))) {
+ event = EVENT_VOLUME_REBALANCE_START;
+ } else if (!strcmp(words[wordcount - 1], "stop")) {
+ event = EVENT_VOLUME_REBALANCE_STOP;
+ }
+
+ if (event != EVENT_LAST)
+ gf_event(event, "volume=%s", (char *)words[2]);
+#endif
+ }
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_RENAME_VOLUME];
+ CLI_STACK_DESTROY(frame);
- if (proc->fn) {
- ret = proc->fn (frame, THIS, dict);
- }
+ return ret;
+}
-out:
- if (dict)
- dict_destroy (dict);
+int
+cli_cmd_volume_reset_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int sent = 0;
+ int parse_error = 0;
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *options = NULL;
+ cli_local_t *local = NULL;
+#if (USE_EVENTS)
+ int ret1 = -1;
+ char *tmp_opt = NULL;
+#endif
- if (ret) {
- cli_cmd_sent_status_get (&sent);
- if ((sent == 0) && (parse_error == 0))
- cli_out ("Volume rename on '%s' failed", (char *)words[2]);
- }
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_RESET_VOLUME];
+
+ ret = cli_cmd_volume_reset_parse(words, wordcount, &options);
+ if (ret) {
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
+ CLI_LOCAL_INIT(local, words, frame, options);
- CLI_STACK_DESTROY (frame);
+ if (proc->fn) {
+ ret = proc->fn(frame, THIS, options);
+ }
- return ret;
+out:
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out("Volume reset failed");
+ }
+
+#if (USE_EVENTS)
+ if (ret == 0) {
+ ret1 = dict_get_str(options, "key", &tmp_opt);
+ if (ret1)
+ tmp_opt = "";
+
+ gf_event(EVENT_VOLUME_RESET, "name=%s;option=%s", (char *)words[2],
+ tmp_opt);
+ }
+#endif
+
+ CLI_STACK_DESTROY(frame);
+
+ return ret;
}
int
-cli_cmd_volume_defrag_cbk (struct cli_state *state, struct cli_cmd_word *word,
+cli_cmd_volume_profile_cbk(struct cli_state *state, struct cli_cmd_word *word,
const char **words, int wordcount)
{
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- dict_t *dict = NULL;
- int sent = 0;
- int parse_error = 0;
-#ifdef GF_SOLARIS_HOST_OS
- cli_out ("Command not supported on Solaris");
+ int sent = 0;
+ int parse_error = 0;
+
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *options = NULL;
+ cli_local_t *local = NULL;
+
+ ret = cli_cmd_volume_profile_parse(words, wordcount, &options);
+
+ if (ret) {
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_PROFILE_VOLUME];
+
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ gf_log(THIS->name, GF_LOG_ERROR, "failed to create frame");
+ ret = -1;
goto out;
+ }
+
+ CLI_LOCAL_INIT(local, words, frame, options);
+
+ if (proc->fn) {
+ ret = proc->fn(frame, THIS, options);
+ }
+
+out:
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out("Volume profile failed");
+ }
+
+ CLI_STACK_DESTROY(frame);
+
+ return ret;
+}
+
+int
+cli_cmd_volume_set_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int sent = 0;
+ int parse_error = 0;
+
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *options = NULL;
+ cli_local_t *local = NULL;
+ char *op_errstr = NULL;
+
+#if (USE_EVENTS)
+ int ret1 = -1;
+ int i = 1;
+ char dict_key[50] = {
+ 0,
+ };
+ char *tmp_opt = NULL;
+ char *opts_str = NULL;
+ int num_options = 0;
#endif
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_SET_VOLUME];
- dict = dict_new ();
- if (!dict)
- goto out;
+ ret = cli_cmd_volume_set_parse(state, words, wordcount, &options,
+ &op_errstr);
+ if (ret) {
+ if (op_errstr) {
+ cli_err("%s", op_errstr);
+ GF_FREE(op_errstr);
+ } else
+ cli_usage_out(word->pattern);
- if (!((wordcount == 4) || (wordcount == 5))) {
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
- }
+ parse_error = 1;
+ goto out;
+ }
- if (wordcount == 4) {
- if (strcmp (words[3], "start") && strcmp (words[3], "stop") &&
- strcmp (words[3], "status")) {
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
- }
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
+ CLI_LOCAL_INIT(local, words, frame, options);
+
+ if (proc->fn) {
+ ret = proc->fn(frame, THIS, options);
+ }
+
+out:
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out("Volume set failed");
+ }
+
+#if (USE_EVENTS)
+ if (ret == 0 && strcmp(words[2], "help") != 0) {
+ ret1 = dict_get_int32(options, "count", &num_options);
+ if (ret1) {
+ num_options = 0;
+ goto end;
} else {
- if (strcmp (words[3], "fix-layout") &&
- strcmp (words[3], "start")) {
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
- }
+ num_options = num_options / 2;
}
- ret = dict_set_str (dict, "volname", (char *)words[2]);
- if (ret)
- goto out;
-
- if (wordcount == 4) {
- ret = dict_set_str (dict, "command", (char *)words[3]);
- if (ret)
- goto out;
+ char *free_list_key[num_options];
+ char *free_list_val[num_options];
+ for (i = 0; i < num_options; i++) {
+ free_list_key[i] = NULL;
+ free_list_val[i] = NULL;
}
- if (wordcount == 5) {
- if ((strcmp (words[3], "fix-layout") ||
- strcmp (words[4], "start")) &&
- (strcmp (words[3], "start") ||
- strcmp (words[4], "force"))) {
- cli_usage_out (word->pattern);
- parse_error = 1;
- ret = -1;
- goto out;
- }
+ /* Initialize opts_str */
+ opts_str = "";
+
+ /* Prepare String in format options=KEY1,VALUE1,KEY2,VALUE2 */
+ for (i = 1; i <= num_options; i++) {
+ sprintf(dict_key, "key%d", i);
+ ret1 = dict_get_str(options, dict_key, &tmp_opt);
+ if (ret1)
+ tmp_opt = "";
- ret = dict_set_str (dict, "option", (char *)words[4]);
- if (ret)
- goto out;
- ret = dict_set_str (dict, "command", (char *)words[3]);
- if (ret)
- goto out;
+ gf_asprintf(&opts_str, "%s,%s", opts_str, tmp_opt);
+ free_list_key[i - 1] = opts_str;
+
+ sprintf(dict_key, "value%d", i);
+ ret1 = dict_get_str(options, dict_key, &tmp_opt);
+ if (ret1)
+ tmp_opt = "";
+
+ gf_asprintf(&opts_str, "%s,%s", opts_str, tmp_opt);
+ free_list_val[i - 1] = opts_str;
}
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_DEFRAG_VOLUME];
+ gf_event(EVENT_VOLUME_SET, "name=%s;options=%s", (char *)words[2],
+ opts_str);
- if (proc->fn) {
- ret = proc->fn (frame, THIS, dict);
+ /* Allocated by gf_strdup and gf_asprintf */
+ for (i = 0; i < num_options; i++) {
+ GF_FREE(free_list_key[i]);
+ GF_FREE(free_list_val[i]);
}
+ }
+#endif
-out:
- if (dict)
- dict_destroy (dict);
+end:
+ CLI_STACK_DESTROY(frame);
+
+ return ret;
+}
+
+static int
+cli_event_remove_brick_str(dict_t *options, char **event_str,
+ eventtypes_t *event)
+{
+ int ret = -1;
+ char *bricklist = NULL;
+ char *brick = NULL;
+ char *volname = NULL;
+ char key[256] = {
+ 0,
+ };
+ const char *eventstrformat = "volume=%s;bricks=%s";
+ int32_t command = 0;
+ int32_t i = 1;
+ int32_t count = 0;
+ int32_t eventstrlen = 1;
+ int bricklen = 0;
+ char *tmp_ptr = NULL;
+
+ if (!options || !event_str || !event)
+ goto out;
+ ret = dict_get_str(options, "volname", &volname);
+ if (ret || !volname) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to fetch volname");
+ ret = -1;
+ goto out;
+ }
+ /* Get the list of bricks for the event */
+ ret = dict_get_int32(options, "command", &command);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to fetch command");
+ ret = -1;
+ goto out;
+ }
+
+ switch (command) {
+ case GF_OP_CMD_START:
+ *event = EVENT_VOLUME_REMOVE_BRICK_START;
+ break;
+ case GF_OP_CMD_COMMIT:
+ *event = EVENT_VOLUME_REMOVE_BRICK_COMMIT;
+ break;
+ case GF_OP_CMD_COMMIT_FORCE:
+ *event = EVENT_VOLUME_REMOVE_BRICK_FORCE;
+ break;
+ case GF_OP_CMD_STOP:
+ *event = EVENT_VOLUME_REMOVE_BRICK_STOP;
+ break;
+ default:
+ *event = EVENT_LAST;
+ break;
+ }
+
+ ret = -1;
+
+ if (*event == EVENT_LAST) {
+ goto out;
+ }
+
+ /* I could just get this from words[] but this is cleaner in case the
+ * format changes */
+ while (i) {
+ snprintf(key, sizeof(key), "brick%d", i);
+ ret = dict_get_str(options, key, &brick);
if (ret) {
- cli_cmd_sent_status_get (&sent);
- if ((sent == 0) && (parse_error == 0))
- cli_out ("Volume rebalance failed");
+ break;
}
+ eventstrlen += strlen(brick) + 1;
+ i++;
+ }
+
+ count = --i;
+
+ eventstrlen += 1;
- CLI_STACK_DESTROY (frame);
+ bricklist = GF_CALLOC(eventstrlen, sizeof(char), gf_common_mt_char);
+ if (!bricklist) {
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "memory allocation failed for"
+ "bricklist");
+ ret = -1;
+ goto out;
+ }
+
+ tmp_ptr = bricklist;
- return ret;
+ i = 1;
+ while (i <= count) {
+ snprintf(key, sizeof(key), "brick%d", i);
+ ret = dict_get_str(options, key, &brick);
+ if (ret) {
+ break;
+ }
+ snprintf(tmp_ptr, eventstrlen, "%s ", brick);
+ bricklen = strlen(brick);
+ eventstrlen -= (bricklen + 1);
+ tmp_ptr += (bricklen + 1);
+ i++;
+ }
+
+ if (!ret) {
+ gf_asprintf(event_str, eventstrformat, volname, bricklist);
+ } else {
+ gf_asprintf(event_str, eventstrformat, volname, "<unavailable>");
+ }
+
+ ret = 0;
+out:
+ GF_FREE(bricklist);
+ return ret;
}
int
-cli_cmd_volume_reset_cbk (struct cli_state *state, struct cli_cmd_word *word,
- const char **words, int wordcount)
+cli_cmd_volume_add_brick_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
{
- int sent = 0;
- int parse_error = 0;
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *options = NULL;
+ int sent = 0;
+ int parse_error = 0;
+ gf_answer_t answer = GF_ANSWER_NO;
+ cli_local_t *local = NULL;
+
+#if (USE_EVENTS)
+ char *event_str = NULL;
+ char *bricks = NULL;
+ const char *eventstrformat = "volume=%s;bricks=%s";
+#endif
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- dict_t *options = NULL;
+ const char *question =
+ "Changing the 'stripe count' of the volume is "
+ "not a supported feature. In some cases it may result in data "
+ "loss on the volume. Also there may be issues with regular "
+ "filesystem operations on the volume after the change. Do you "
+ "really want to continue with 'stripe' count option ? ";
+
+ ret = cli_cmd_volume_add_brick_parse(state, words, wordcount, &options, 0);
+ if (ret) {
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
+ }
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_RESET_VOLUME];
+ /* TODO: there are challenges in supporting changing of
+ stripe-count, until it is properly supported give warning to user */
+ if (dict_get(options, "stripe-count")) {
+ answer = cli_cmd_get_confirmation(state, question);
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
+ if (GF_ANSWER_NO == answer) {
+ ret = 0;
+ goto out;
+ }
+ }
- ret = cli_cmd_volume_reset_parse (words, wordcount, &options);
+#if (USE_EVENTS)
+ /* Get the list of bricks for the event */
+ ret = dict_get_str(options, "bricks", &bricks);
+
+ if (!ret) {
+ gf_asprintf(&event_str, eventstrformat, (char *)words[2],
+ &bricks[1] /*Skip leading space*/);
+ } else {
+ gf_asprintf(&event_str, eventstrformat, (char *)words[2],
+ "<unavailable>");
+ }
+#endif
+
+ if (state->mode & GLUSTER_MODE_WIGNORE) {
+ ret = dict_set_int32(options, "force", _gf_true);
if (ret) {
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to set force "
+ "option");
+ goto out;
}
+ }
- if (proc->fn) {
- ret = proc->fn (frame, THIS, options);
- }
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_ADD_BRICK];
-out:
- if (options)
- dict_unref (options);
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
- if (ret) {
- cli_cmd_sent_status_get (&sent);
- if ((sent == 0) && (parse_error == 0))
- cli_out ("Volume reset failed");
- }
+ CLI_LOCAL_INIT(local, words, frame, options);
- CLI_STACK_DESTROY (frame);
+ if (proc->fn) {
+ ret = proc->fn(frame, THIS, options);
+ }
- return ret;
+out:
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out("Volume add-brick failed");
+ } else {
+#if (USE_EVENTS)
+ gf_event(EVENT_VOLUME_ADD_BRICK, "%s", event_str);
+#endif
+ }
+#if (USE_EVENTS)
+ GF_FREE(event_str);
+#endif
+ CLI_STACK_DESTROY(frame);
+ return ret;
}
int
-cli_cmd_volume_profile_cbk (struct cli_state *state, struct cli_cmd_word *word,
- const char **words, int wordcount)
+cli_get_soft_limit(dict_t *options, const char **words, dict_t *xdata)
{
- int sent = 0;
- int parse_error = 0;
+ call_frame_t *frame = NULL;
+ cli_local_t *local = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ char *default_sl = NULL;
+ char *default_sl_dup = NULL;
+ int ret = -1;
+
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
+ // We need a ref on @options to prevent CLI_STACK_DESTROY
+ // from destroying it prematurely.
+ dict_ref(options);
+ CLI_LOCAL_INIT(local, words, frame, options);
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_QUOTA];
+ ret = proc->fn(frame, THIS, options);
+
+ ret = dict_get_str(options, "default-soft-limit", &default_sl);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get default soft limit");
+ goto out;
+ }
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- dict_t *options = NULL;
+ default_sl_dup = gf_strdup(default_sl);
+ if (!default_sl_dup) {
+ ret = -1;
+ goto out;
+ }
- ret = cli_cmd_volume_profile_parse (words, wordcount, &options);
+ ret = dict_set_dynstr(xdata, "default-soft-limit", default_sl_dup);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to set default soft limit");
+ GF_FREE(default_sl_dup);
+ goto out;
+ }
- if (ret) {
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
- }
+out:
+ CLI_STACK_DESTROY(frame);
+ return ret;
+}
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_PROFILE_VOLUME];
+/* Checks if at least one limit has been set on the volume
+ *
+ * Returns true if at least one limit is set. Returns false otherwise.
+ */
+gf_boolean_t
+_limits_set_on_volume(char *volname, int type)
+{
+ gf_boolean_t limits_set = _gf_false;
+ int ret = -1;
+ char quota_conf_file[PATH_MAX] = {
+ 0,
+ };
+ int fd = -1;
+ char buf[16] = {
+ 0,
+ };
+ float version = 0.0f;
+ char gfid_type_stored = 0;
+ char gfid_type = 0;
+
+ /* TODO: fix hardcoding; Need to perform an RPC call to glusterd
+ * to fetch working directory
+ */
+ snprintf(quota_conf_file, sizeof quota_conf_file, "%s/vols/%s/quota.conf",
+ GLUSTERD_DEFAULT_WORKDIR, volname);
+ fd = open(quota_conf_file, O_RDONLY);
+ if (fd == -1)
+ goto out;
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
+ ret = quota_conf_read_version(fd, &version);
+ if (ret)
+ goto out;
- if (proc->fn) {
- ret = proc->fn (frame, THIS, options);
- }
+ if (type == GF_QUOTA_OPTION_TYPE_LIST)
+ gfid_type = GF_QUOTA_CONF_TYPE_USAGE;
+ else
+ gfid_type = GF_QUOTA_CONF_TYPE_OBJECTS;
-out:
- if (options)
- dict_unref (options);
+ /* Try to read at least one gfid of type 'gfid_type' */
+ while (1) {
+ ret = quota_conf_read_gfid(fd, buf, &gfid_type_stored, version);
+ if (ret <= 0)
+ break;
- if (ret) {
- cli_cmd_sent_status_get (&sent);
- if ((sent == 0) && (parse_error == 0))
- cli_out ("Volume profile failed");
+ if (gfid_type_stored == gfid_type) {
+ limits_set = _gf_true;
+ break;
}
+ }
+out:
+ if (fd != -1)
+ sys_close(fd);
- CLI_STACK_DESTROY (frame);
-
- return ret;
-
+ return limits_set;
}
int
-cli_cmd_volume_set_cbk (struct cli_state *state, struct cli_cmd_word *word,
- const char **words, int wordcount)
+cli_cmd_quota_handle_list_all(const char **words, dict_t *options)
{
- int sent = 0;
- int parse_error = 0;
+ int all_failed = 1;
+ int count = 0;
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ cli_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *xdata = NULL;
+ char gfid_str[UUID_CANONICAL_FORM_LEN + 1];
+ char *volname = NULL;
+ char *volname_dup = NULL;
+ unsigned char buf[16] = {0};
+ int fd = -1;
+ char quota_conf_file[PATH_MAX] = {0};
+ gf_boolean_t xml_err_flag = _gf_false;
+ char err_str[NAME_MAX] = {
+ 0,
+ };
+ int32_t type = 0;
+ char gfid_type = 0;
+ float version = 0.0f;
+ int32_t max_count = 0;
+
+ xdata = dict_new();
+ if (!xdata) {
+ ret = -1;
+ goto out;
+ }
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- dict_t *options = NULL;
+ ret = dict_get_str(options, "volname", &volname);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get volume name");
+ goto out;
+ }
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_SET_VOLUME];
+ ret = dict_get_int32(options, "type", &type);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get quota option type");
+ goto out;
+ }
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
+ ret = dict_set_int32(xdata, "type", type);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to set type in xdata");
+ goto out;
+ }
- ret = cli_cmd_volume_set_parse (words, wordcount, &options);
+ ret = cli_get_soft_limit(options, words, xdata);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to fetch default "
+ "soft-limit");
+ goto out;
+ }
+
+ /* Check if at least one limit is set on volume. No need to check for
+ * quota enabled as cli_get_soft_limit() handles that
+ */
+ if (!_limits_set_on_volume(volname, type)) {
+ snprintf(err_str, sizeof(err_str),
+ "No%s quota configured on"
+ " volume %s",
+ (type == GF_QUOTA_OPTION_TYPE_LIST) ? "" : " inode", volname);
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ xml_err_flag = _gf_true;
+ } else {
+ cli_out("quota: %s", err_str);
+ }
+ ret = 0;
+ goto out;
+ }
+
+ volname_dup = gf_strdup(volname);
+ if (!volname_dup) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_dynstr(xdata, "volume-uuid", volname_dup);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to set volume-uuid");
+ GF_FREE(volname_dup);
+ goto out;
+ }
+
+ // TODO: fix hardcoding; Need to perform an RPC call to glusterd
+ // to fetch working directory
+ snprintf(quota_conf_file, sizeof quota_conf_file, "%s/vols/%s/quota.conf",
+ GLUSTERD_DEFAULT_WORKDIR, volname);
+ fd = open(quota_conf_file, O_RDONLY);
+ if (fd == -1) {
+ // This may because no limits were yet set on the volume
+ gf_log("cli", GF_LOG_TRACE,
+ "Unable to open "
+ "quota.conf");
+ ret = 0;
+ goto out;
+ }
+
+ ret = quota_conf_read_version(fd, &version);
+ if (ret)
+ goto out;
+
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
+ CLI_LOCAL_INIT(local, words, frame, xdata);
+ proc = &cli_quotad_clnt.proctable[GF_AGGREGATOR_GETLIMIT];
+
+ for (count = 0;; count++) {
+ ret = quota_conf_read_gfid(fd, buf, &gfid_type, version);
+ if (ret == 0) {
+ break;
+ } else if (ret < 0) {
+ gf_log(THIS->name, GF_LOG_CRITICAL,
+ "Quota "
+ "configuration store may be corrupt.");
+ goto out;
+ }
+
+ if ((type == GF_QUOTA_OPTION_TYPE_LIST &&
+ gfid_type == GF_QUOTA_CONF_TYPE_OBJECTS) ||
+ (type == GF_QUOTA_OPTION_TYPE_LIST_OBJECTS &&
+ gfid_type == GF_QUOTA_CONF_TYPE_USAGE))
+ continue;
+
+ max_count++;
+ }
+ ret = dict_set_int32(xdata, "max_count", max_count);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to set max_count");
+ goto out;
+ }
+
+ ret = sys_lseek(fd, 0L, SEEK_SET);
+ if (ret < 0) {
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "failed to move offset to "
+ "the beginning: %s",
+ strerror(errno));
+ goto out;
+ }
+ ret = quota_conf_read_version(fd, &version);
+ if (ret)
+ goto out;
+
+ for (count = 0;; count++) {
+ ret = quota_conf_read_gfid(fd, buf, &gfid_type, version);
+ if (ret == 0) {
+ break;
+ } else if (ret < 0) {
+ gf_log(THIS->name, GF_LOG_CRITICAL,
+ "Quota "
+ "configuration store may be corrupt.");
+ goto out;
+ }
+
+ if ((type == GF_QUOTA_OPTION_TYPE_LIST &&
+ gfid_type == GF_QUOTA_CONF_TYPE_OBJECTS) ||
+ (type == GF_QUOTA_OPTION_TYPE_LIST_OBJECTS &&
+ gfid_type == GF_QUOTA_CONF_TYPE_USAGE))
+ continue;
+
+ uuid_utoa_r(buf, gfid_str);
+ ret = dict_set_str(xdata, "gfid", gfid_str);
if (ret) {
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
+ gf_log("cli", GF_LOG_ERROR, "Failed to set gfid");
+ goto out;
}
- if (proc->fn) {
- ret = proc->fn (frame, THIS, options);
+ ret = proc->fn(frame, THIS, xdata);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to get quota "
+ "limits for %s",
+ uuid_utoa((unsigned char *)buf));
}
-out:
- if (options)
- dict_unref (options);
+ dict_del(xdata, "gfid");
+ all_failed = all_failed && ret;
+ }
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_vol_quota_limit_list_end(local);
if (ret) {
- cli_cmd_sent_status_get (&sent);
- if ((sent == 0) && (parse_error == 0))
- cli_out ("Volume set failed");
+ gf_log("cli", GF_LOG_ERROR,
+ "Error in printing "
+ "xml output");
+ goto out;
}
+ }
- CLI_STACK_DESTROY (frame);
-
- return ret;
+ if (count > 0) {
+ ret = all_failed ? -1 : 0;
+ } else {
+ ret = 0;
+ }
+out:
+ if (xml_err_flag) {
+ ret = cli_xml_output_str("volQuota", NULL, -1, 0, err_str);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Error outputting in "
+ "xml format");
+ }
+ }
+ if (xdata)
+ dict_unref(xdata);
+
+ if (fd != -1) {
+ sys_close(fd);
+ }
+
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Could not fetch and display quota"
+ " limits");
+ }
+ CLI_STACK_DESTROY(frame);
+ return ret;
}
int
-cli_cmd_volume_add_brick_cbk (struct cli_state *state,
- struct cli_cmd_word *word, const char **words,
- int wordcount)
+cli_cmd_bitrot_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
{
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- dict_t *options = NULL;
- int sent = 0;
- int parse_error = 0;
- gf_answer_t answer = GF_ANSWER_NO;
-
- const char *question = "Changing the 'stripe count' of the volume is "
- "not a supported feature. In some cases it may result in data "
- "loss on the volume. Also there may be issues with regular "
- "filesystem operations on the volume after the change. Do you "
- "really want to continue with 'stripe' count option ? ";
-
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
+ int ret = -1;
+ int parse_err = 0;
+ call_frame_t *frame = NULL;
+ dict_t *options = NULL;
+ cli_local_t *local = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ int sent = 0;
+#if (USE_EVENTS)
+ int cmd_type = -1;
+ int ret1 = -1;
+ int event_type = -1;
+ char *tmp = NULL;
+ char *events_str = NULL;
+ char *volname = NULL;
+#endif
- ret = cli_cmd_volume_add_brick_parse (words, wordcount, &options);
+ ret = cli_cmd_bitrot_parse(words, wordcount, &options);
+ if (ret < 0) {
+ cli_usage_out(word->pattern);
+ parse_err = 1;
+ goto out;
+ }
- if (ret) {
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
- }
+ if (ret == 1) {
+ /* this is 'volume bitrot help' */
+ cli_cmd_bitrot_help_cbk(state, word, words, wordcount);
+ ret = 0;
+ goto out2;
+ }
- /* TODO: there are challenges in supporting changing of
- stripe-count, untill it is properly supported give warning to user */
- if (dict_get (options, "stripe-count")) {
- answer = cli_cmd_get_confirmation (state, question);
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
- if (GF_ANSWER_NO == answer) {
- ret = 0;
- goto out;
- }
- }
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_BITROT];
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_ADD_BRICK];
+ CLI_LOCAL_INIT(local, words, frame, options);
- if (proc->fn) {
- ret = proc->fn (frame, THIS, options);
- }
+ if (proc->fn) {
+ ret = proc->fn(frame, THIS, options);
+ }
out:
- if (options)
- dict_unref (options);
-
- if (ret) {
- cli_cmd_sent_status_get (&sent);
- if ((sent == 0) && (parse_error == 0))
- cli_out ("Volume add-brick failed");
- }
-
- CLI_STACK_DESTROY (frame);
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_err == 0))
+ cli_err(
+ "Bit rot command failed. Please check the cli "
+ "logs for more details");
+ }
+
+#if (USE_EVENTS)
+ if (ret == 0) {
+ ret1 = dict_get_int32(options, "type", &cmd_type);
+ if (ret1)
+ cmd_type = -1;
+ else {
+ ret1 = dict_get_str(options, "volname", &volname);
+ if (ret1)
+ volname = "";
+ }
+
+ switch (cmd_type) {
+ case GF_BITROT_OPTION_TYPE_ENABLE:
+ event_type = EVENT_BITROT_ENABLE;
+ break;
+ case GF_BITROT_OPTION_TYPE_DISABLE:
+ event_type = EVENT_BITROT_DISABLE;
+ break;
+ case GF_BITROT_CMD_SCRUB_ONDEMAND:
+ event_type = EVENT_BITROT_SCRUB_ONDEMAND;
+ break;
+ case GF_BITROT_OPTION_TYPE_SCRUB_THROTTLE:
+ event_type = EVENT_BITROT_SCRUB_THROTTLE;
+ ret1 = dict_get_str(options, "scrub-throttle-value", &tmp);
+ if (ret1)
+ tmp = "";
+ gf_asprintf(&events_str, "name=%s;value=%s", volname, tmp);
+ break;
+ case GF_BITROT_OPTION_TYPE_SCRUB_FREQ:
+ event_type = EVENT_BITROT_SCRUB_FREQ;
+ ret1 = dict_get_str(options, "scrub-frequency-value", &tmp);
+ if (ret1)
+ tmp = "";
+ gf_asprintf(&events_str, "name=%s;value=%s", volname, tmp);
+ break;
+ case GF_BITROT_OPTION_TYPE_SCRUB:
+ event_type = EVENT_BITROT_SCRUB_OPTION;
+ ret1 = dict_get_str(options, "scrub-value", &tmp);
+ if (ret1)
+ tmp = "";
+ gf_asprintf(&events_str, "name=%s;value=%s", volname, tmp);
+ break;
+ default:
+ break;
+ }
+
+ if (event_type > -1)
+ gf_event(event_type, "%s", events_str);
+
+ if (events_str)
+ GF_FREE(events_str);
+ }
+#endif
- return ret;
+ CLI_STACK_DESTROY(frame);
+out2:
+ return ret;
}
int
-cli_cmd_quota_cbk (struct cli_state *state, struct cli_cmd_word *word,
- const char **words, int wordcount)
+cli_cmd_quota_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
{
-
- int ret = 0;
- int parse_err = 0;
- int32_t type = 0;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- dict_t *options = NULL;
- gf_answer_t answer = GF_ANSWER_NO;
- const char *question = "Disabling quota will delete all the quota "
- "configuration. Do you want to continue?";
-
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_QUOTA];
- if (proc == NULL) {
- ret = -1;
- goto out;
+ int ret = 0;
+ int parse_err = 0;
+ int32_t type = 0;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *options = NULL;
+ gf_answer_t answer = GF_ANSWER_NO;
+ cli_local_t *local = NULL;
+ int sent = 0;
+ char *volname = NULL;
+ const char *question =
+ "Disabling quota will delete all the quota "
+ "configuration. Do you want to continue?";
+
+ // parse **words into options dictionary
+ if (strcmp(words[1], "inode-quota") == 0) {
+ ret = cli_cmd_inode_quota_parse(words, wordcount, &options);
+ if (ret < 0) {
+ cli_usage_out(word->pattern);
+ parse_err = 1;
+ goto out;
}
+ } else {
+ ret = cli_cmd_quota_parse(words, wordcount, &options);
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame) {
- ret = -1;
- goto out;
+ if (ret == 1) {
+ cli_cmd_quota_help_cbk(state, word, words, wordcount);
+ ret = 0;
+ goto out;
}
-
- ret = cli_cmd_quota_parse (words, wordcount, &options);
if (ret < 0) {
- cli_usage_out (word->pattern);
- parse_err = 1;
- goto out;
- } else if (dict_get_int32 (options, "type", &type) == 0 &&
- type == GF_QUOTA_OPTION_TYPE_DISABLE) {
- answer = cli_cmd_get_confirmation (state, question);
- if (answer == GF_ANSWER_NO)
- goto out;
+ cli_usage_out(word->pattern);
+ parse_err = 1;
+ goto out;
}
+ }
- if (proc->fn)
- ret = proc->fn (frame, THIS, options);
-
-out:
- if (options)
- dict_unref (options);
+ ret = dict_get_int32(options, "type", &type);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get opcode");
+ goto out;
+ }
+
+ // handle quota-disable and quota-list-all different from others
+ switch (type) {
+ case GF_QUOTA_OPTION_TYPE_DISABLE:
+ answer = cli_cmd_get_confirmation(state, question);
+ if (answer == GF_ANSWER_NO)
+ goto out;
+ break;
+ case GF_QUOTA_OPTION_TYPE_LIST:
+ case GF_QUOTA_OPTION_TYPE_LIST_OBJECTS:
+ if (wordcount != 4)
+ break;
+ ret = cli_cmd_quota_handle_list_all(words, options);
+ goto out;
+ default:
+ break;
+ }
+
+ ret = dict_get_str(options, "volname", &volname);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get volume name");
+ goto out;
+ }
- if (ret && parse_err == 0)
- cli_out ("Quota command failed");
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
- CLI_STACK_DESTROY (frame);
+ CLI_LOCAL_INIT(local, words, frame, options);
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_QUOTA];
- return ret;
+ if (proc->fn)
+ ret = proc->fn(frame, THIS, options);
+out:
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if (sent == 0 && parse_err == 0)
+ cli_out(
+ "Quota command failed. Please check the cli "
+ "logs for more details");
+ }
+ if (options)
+ dict_unref(options);
+
+ /* Events for Quota */
+ if (ret == 0) {
+ switch (type) {
+ case GF_QUOTA_OPTION_TYPE_ENABLE:
+ gf_event(EVENT_QUOTA_ENABLE, "volume=%s", volname);
+ break;
+ case GF_QUOTA_OPTION_TYPE_DISABLE:
+ gf_event(EVENT_QUOTA_DISABLE, "volume=%s", volname);
+ break;
+ case GF_QUOTA_OPTION_TYPE_LIMIT_USAGE:
+ gf_event(EVENT_QUOTA_SET_USAGE_LIMIT,
+ "volume=%s;"
+ "path=%s;limit=%s",
+ volname, words[4], words[5]);
+ break;
+ case GF_QUOTA_OPTION_TYPE_LIMIT_OBJECTS:
+ gf_event(EVENT_QUOTA_SET_OBJECTS_LIMIT,
+ "volume=%s;"
+ "path=%s;limit=%s",
+ volname, words[4], words[5]);
+ break;
+ case GF_QUOTA_OPTION_TYPE_REMOVE:
+ gf_event(EVENT_QUOTA_REMOVE_USAGE_LIMIT,
+ "volume=%s;"
+ "path=%s",
+ volname, words[4]);
+ break;
+ case GF_QUOTA_OPTION_TYPE_REMOVE_OBJECTS:
+ gf_event(EVENT_QUOTA_REMOVE_OBJECTS_LIMIT,
+ "volume=%s;"
+ "path=%s",
+ volname, words[4]);
+ break;
+ case GF_QUOTA_OPTION_TYPE_ALERT_TIME:
+ gf_event(EVENT_QUOTA_ALERT_TIME, "volume=%s;time=%s", volname,
+ words[4]);
+ break;
+ case GF_QUOTA_OPTION_TYPE_SOFT_TIMEOUT:
+ gf_event(EVENT_QUOTA_SOFT_TIMEOUT,
+ "volume=%s;"
+ "soft-timeout=%s",
+ volname, words[4]);
+ break;
+ case GF_QUOTA_OPTION_TYPE_HARD_TIMEOUT:
+ gf_event(EVENT_QUOTA_HARD_TIMEOUT,
+ "volume=%s;"
+ "hard-timeout=%s",
+ volname, words[4]);
+ break;
+ case GF_QUOTA_OPTION_TYPE_DEFAULT_SOFT_LIMIT:
+ gf_event(EVENT_QUOTA_DEFAULT_SOFT_LIMIT,
+ "volume=%s;"
+ "default-soft-limit=%s",
+ volname, words[4]);
+ break;
+ }
+ }
+
+ CLI_STACK_DESTROY(frame);
+ return ret;
}
int
-cli_cmd_volume_remove_brick_cbk (struct cli_state *state,
- struct cli_cmd_word *word, const char **words,
- int wordcount)
+cli_cmd_volume_remove_brick_cbk(struct cli_state *state,
+ struct cli_cmd_word *word, const char **words,
+ int wordcount)
{
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- dict_t *options = NULL;
- gf_answer_t answer = GF_ANSWER_NO;
- int sent = 0;
- int parse_error = 0;
- int need_question = 0;
-
- const char *question = "Removing brick(s) can result in data loss. "
- "Do you want to Continue?";
-
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *options = NULL;
+ gf_answer_t answer = GF_ANSWER_NO;
+ int brick_count = 0;
+ int sent = 0;
+ int parse_error = 0;
+ int need_question = 0;
+ cli_local_t *local = NULL;
+ char *volname = NULL;
+#if (USE_EVENTS)
+ eventtypes_t event = EVENT_LAST;
+ char *event_str = NULL;
+ int event_ret = -1;
+#endif
+ int32_t command = GF_OP_CMD_NONE;
+ char *question = NULL;
+
+ ret = cli_cmd_volume_remove_brick_parse(state, words, wordcount, &options,
+ &need_question, &brick_count,
+ &command);
+ if (ret) {
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+
+ if (command == GF_OP_CMD_COMMIT_FORCE) {
+ question =
+ "Remove-brick force will not migrate files from the "
+ "removed bricks, so they will no longer be available"
+ " on the volume.\nDo you want to continue?";
+ } else if (command == GF_OP_CMD_START) {
+ question =
+ "It is recommended that remove-brick be run with"
+ " cluster.force-migration option disabled to prevent"
+ " possible data corruption. Doing so will ensure that"
+ " files that receive writes during migration will not"
+ " be migrated and will need to be manually copied"
+ " after the remove-brick commit operation. Please"
+ " check the value of the option and update accordingly."
+ " \nDo you want to continue with your current"
+ " cluster.force-migration settings?";
+ }
+
+ if (!brick_count) {
+ cli_err("No bricks specified");
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ ret = -1;
+ goto out;
+ }
+ ret = dict_get_str(options, "volname", &volname);
+ if (ret || !volname) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to fetch volname");
+ ret = -1;
+ goto out;
+ }
- ret = cli_cmd_volume_remove_brick_parse (words, wordcount, &options,
- &need_question);
+#if (USE_EVENTS)
+ event_ret = cli_event_remove_brick_str(options, &event_str, &event);
+#endif
- if (ret) {
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
+ if (!strcmp(volname, GLUSTER_SHARED_STORAGE)) {
+ question =
+ "Removing brick from the shared storage volume"
+ "(gluster_shared_storage), will affect features "
+ "like snapshot scheduler, geo-replication "
+ "and NFS-Ganesha. Do you still want to "
+ "continue?";
+ need_question = _gf_true;
+ }
+
+ if (!(state->mode & GLUSTER_MODE_SCRIPT) && need_question) {
+ answer = cli_cmd_get_confirmation(state, question);
+ if (GF_ANSWER_NO == answer) {
+ ret = 0;
+ goto out;
}
+ }
- if (!(state->mode & GLUSTER_MODE_SCRIPT) && need_question) {
- /* we need to ask question only in case of 'commit or force' */
- answer = cli_cmd_get_confirmation (state, question);
- if (GF_ANSWER_NO == answer) {
- ret = 0;
- goto out;
- }
- }
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_REMOVE_BRICK];
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_REMOVE_BRICK];
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
- if (proc->fn) {
- ret = proc->fn (frame, THIS, options);
- }
+ CLI_LOCAL_INIT(local, words, frame, options);
-out:
- if (ret) {
- cli_cmd_sent_status_get (&sent);
- if ((sent == 0) && (parse_error == 0))
- cli_out ("Volume remove-brick failed");
- }
+ if (proc->fn) {
+ ret = proc->fn(frame, THIS, options);
+ }
- if (options)
- dict_unref (options);
+out:
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out("Volume remove-brick failed");
+ }
+#if (USE_EVENTS)
+ if (!ret && !event_ret)
+ gf_event(event, "%s", event_str);
+ if (event_str)
+ GF_FREE(event_str);
- CLI_STACK_DESTROY (frame);
+#endif
- return ret;
+ CLI_STACK_DESTROY(frame);
+ if (options)
+ dict_unref(options);
+ return ret;
}
int
-cli_cmd_volume_replace_brick_cbk (struct cli_state *state,
- struct cli_cmd_word *word,
- const char **words,
- int wordcount)
+cli_cmd_volume_reset_brick_cbk(struct cli_state *state,
+ struct cli_cmd_word *word, const char **words,
+ int wordcount)
{
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- dict_t *options = NULL;
- int sent = 0;
- int parse_error = 0;
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *options = NULL;
+ int sent = 0;
+ int parse_error = 0;
+ cli_local_t *local = NULL;
#ifdef GF_SOLARIS_HOST_OS
- cli_out ("Command not supported on Solaris");
- goto out;
+ cli_out("Command not supported on Solaris");
+ goto out;
#endif
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_REPLACE_BRICK];
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_RESET_BRICK];
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
+ ret = cli_cmd_volume_reset_brick_parse(words, wordcount, &options);
- ret = cli_cmd_volume_replace_brick_parse (words, wordcount, &options);
+ if (ret) {
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+ if (state->mode & GLUSTER_MODE_WIGNORE_PARTITION) {
+ ret = dict_set_int32(options, "ignore-partition", _gf_true);
if (ret) {
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to set ignore-"
+ "partition option");
+ goto out;
}
+ }
- if (proc->fn) {
- ret = proc->fn (frame, THIS, options);
- }
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
-out:
- if (options)
- dict_unref (options);
+ CLI_LOCAL_INIT(local, words, frame, options);
- if (ret) {
- cli_cmd_sent_status_get (&sent);
- if ((sent == 0) && (parse_error == 0))
- cli_out ("Volume replace-brick failed");
- }
+ if (proc->fn) {
+ ret = proc->fn(frame, THIS, options);
+ }
- CLI_STACK_DESTROY (frame);
+out:
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out("Volume reset-brick failed");
+ } else {
+ if (wordcount > 5) {
+ gf_event(EVENT_BRICK_RESET_COMMIT,
+ "Volume=%s;source-brick=%s;"
+ "destination-brick=%s",
+ (char *)words[2], (char *)words[3], (char *)words[4]);
+ } else {
+ gf_event(EVENT_BRICK_RESET_START, "Volume=%s;source-brick=%s",
+ (char *)words[2], (char *)words[3]);
+ }
+ }
+ CLI_STACK_DESTROY(frame);
- return ret;
+ return ret;
}
-
int
-cli_cmd_volume_set_transport_cbk (struct cli_state *state,
- struct cli_cmd_word *word,
- const char **words, int wordcount)
+cli_cmd_volume_replace_brick_cbk(struct cli_state *state,
+ struct cli_cmd_word *word, const char **words,
+ int wordcount)
{
- cli_cmd_broadcast_response (0);
- return 0;
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *options = NULL;
+ int sent = 0;
+ int parse_error = 0;
+ cli_local_t *local = NULL;
+
+#ifdef GF_SOLARIS_HOST_OS
+ cli_out("Command not supported on Solaris");
+ goto out;
+#endif
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_REPLACE_BRICK];
+
+ ret = cli_cmd_volume_replace_brick_parse(words, wordcount, &options);
+
+ if (ret) {
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
+ CLI_LOCAL_INIT(local, words, frame, options);
+
+ if (proc->fn) {
+ ret = proc->fn(frame, THIS, options);
+ }
+
+out:
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out("Volume replace-brick failed");
+ } else {
+ gf_event(EVENT_BRICK_REPLACE,
+ "Volume=%s;source-brick=%s;destination-brick=%s",
+ (char *)words[2], (char *)words[3], (char *)words[4]);
+ }
+ CLI_STACK_DESTROY(frame);
+
+ return ret;
}
int
-cli_cmd_volume_top_cbk (struct cli_state *state, struct cli_cmd_word *word,
- const char **words, int wordcount)
+cli_cmd_volume_set_transport_cbk(struct cli_state *state,
+ struct cli_cmd_word *word, const char **words,
+ int wordcount)
{
+ cli_cmd_broadcast_response(0);
+ return 0;
+}
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- dict_t *options = NULL;
- int sent = 0;
- int parse_error = 0;
+int
+cli_cmd_volume_top_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *options = NULL;
+ int sent = 0;
+ int parse_error = 0;
+ cli_local_t *local = NULL;
+
+ ret = cli_cmd_volume_top_parse(words, wordcount, &options);
+
+ if (ret) {
+ parse_error = 1;
+ cli_usage_out(word->pattern);
+ goto out;
+ }
- ret = cli_cmd_volume_top_parse (words, wordcount, &options);
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_TOP_VOLUME];
- if (ret) {
- parse_error = 1;
- cli_usage_out (word->pattern);
- goto out;
- }
-
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_TOP_VOLUME];
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ gf_log(THIS->name, GF_LOG_ERROR, "failed to create frame");
+ ret = -1;
+ goto out;
+ }
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
+ CLI_LOCAL_INIT(local, words, frame, options);
- if (proc->fn) {
- ret = proc->fn (frame, THIS, options);
- }
+ if (proc->fn) {
+ ret = proc->fn(frame, THIS, options);
+ }
out:
- if (options)
- dict_unref (options);
-
- if (ret) {
- cli_cmd_sent_status_get (&sent);
- if ((sent == 0) && (parse_error == 0))
- cli_out ("Volume top failed");
- }
-
- CLI_STACK_DESTROY (frame);
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out("Volume top failed");
+ }
- return ret;
+ CLI_STACK_DESTROY(frame);
+ return ret;
}
-
int
-cli_cmd_log_rotate_cbk (struct cli_state *state, struct cli_cmd_word *word,
- const char **words, int wordcount)
+cli_cmd_log_rotate_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
{
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- dict_t *options = NULL;
- int sent = 0;
- int parse_error = 0;
-
- if (!((wordcount == 4) || (wordcount == 5))) {
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
- }
-
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_LOG_ROTATE];
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *options = NULL;
+ int sent = 0;
+ int parse_error = 0;
+ cli_local_t *local = NULL;
+
+ if (!((wordcount == 4) || (wordcount == 5))) {
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
+ }
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
+ if (!(strcmp("rotate", words[3]) == 0)) {
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
+ }
- ret = cli_cmd_log_rotate_parse (words, wordcount, &options);
- if (ret)
- goto out;
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_LOG_ROTATE];
- if (proc->fn) {
- ret = proc->fn (frame, THIS, options);
- }
+ ret = cli_cmd_log_rotate_parse(words, wordcount, &options);
+ if (ret)
+ goto out;
-out:
- if (options)
- dict_destroy (options);
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ gf_log(THIS->name, GF_LOG_ERROR, "failed to create frame");
+ ret = -1;
+ goto out;
+ }
- if (ret) {
- cli_cmd_sent_status_get (&sent);
- if ((sent == 0) && (parse_error == 0))
- cli_out ("Volume log rotate failed");
- }
+ CLI_LOCAL_INIT(local, words, frame, options);
- CLI_STACK_DESTROY (frame);
+ if (proc->fn) {
+ ret = proc->fn(frame, THIS, options);
+ }
- return ret;
+out:
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out("Volume log rotate failed");
+ }
+ CLI_STACK_DESTROY(frame);
+
+ return ret;
}
#if (SYNCDAEMON_COMPILE)
static int
-cli_check_gsync_present ()
+cli_check_gsync_present()
{
- char buff[PATH_MAX] = {0, };
- runner_t runner = {0,};
- char *ptr = NULL;
- int ret = 0;
-
- ret = setenv ("_GLUSTERD_CALLED_", "1", 1);
- if (-1 == ret) {
- gf_log ("", GF_LOG_WARNING, "setenv syscall failed, hence could"
- "not assert if geo-replication is installed");
- goto out;
- }
-
- runinit (&runner);
- runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", "--version", NULL);
- runner_redir (&runner, STDOUT_FILENO, RUN_PIPE);
- ret = runner_start (&runner);
- if (ret == -1) {
- gf_log ("", GF_LOG_INFO, "geo-replication not installed");
- goto out;
- }
+ char buff[PATH_MAX] = {
+ 0,
+ };
+ runner_t runner = {
+ 0,
+ };
+ char *ptr = NULL;
+ int ret = 0;
+
+ ret = setenv("_GLUSTERD_CALLED_", "1", 1);
+ if (-1 == ret) {
+ gf_log("", GF_LOG_WARNING,
+ "setenv syscall failed, hence could"
+ "not assert if geo-replication is installed");
+ goto out;
+ }
+
+ runinit(&runner);
+ runner_add_args(&runner, GSYNCD_PREFIX "/gsyncd", "--version", NULL);
+ runner_redir(&runner, STDOUT_FILENO, RUN_PIPE);
+ ret = runner_start(&runner);
+ if (ret == -1) {
+ gf_log("", GF_LOG_INFO, "geo-replication not installed");
+ goto out;
+ }
- ptr = fgets(buff, sizeof(buff), runner_chio (&runner, STDOUT_FILENO));
- if (ptr) {
- if (!strstr (buff, "gsyncd")) {
- ret = -1;
- goto out;
- }
- } else {
- ret = -1;
- goto out;
+ ptr = fgets(buff, sizeof(buff), runner_chio(&runner, STDOUT_FILENO));
+ if (ptr) {
+ if (!strstr(buff, "gsyncd")) {
+ ret = -1;
+ goto out;
}
+ } else {
+ ret = -1;
+ goto out;
+ }
- ret = runner_end (&runner);
+ ret = runner_end(&runner);
- if (ret)
- gf_log ("", GF_LOG_ERROR, "geo-replication not installed");
+ if (ret)
+ gf_log("", GF_LOG_ERROR, "geo-replication not installed");
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret ? -1 : 0;
-
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret ? -1 : 0;
}
void
-cli_cmd_check_gsync_exists_cbk (struct cli_cmd *this)
+cli_cmd_check_gsync_exists_cbk(struct cli_cmd *this)
{
+ int ret = 0;
- int ret = 0;
-
- ret = cli_check_gsync_present ();
- if (ret)
- this->disable = _gf_true;
-
+ ret = cli_check_gsync_present();
+ if (ret)
+ this->disable = _gf_true;
}
#endif
int
-cli_cmd_volume_gsync_set_cbk (struct cli_state *state, struct cli_cmd_word *word,
- const char **words, int wordcount)
+cli_cmd_volume_gsync_set_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
{
- int ret = 0;
- int parse_err = 0;
- dict_t *options = NULL;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
-
- proc = &cli_rpc_prog->proctable [GLUSTER_CLI_GSYNC_SET];
- if (proc == NULL) {
- ret = -1;
- goto out;
- }
+ int ret = 0;
+ int parse_err = 0;
+ dict_t *options = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ cli_local_t *local = NULL;
+ char *errstr = NULL;
+#if (USE_EVENTS)
+ int ret1 = -1;
+ int cmd_type = -1;
+ int tmpi = 0;
+ char *tmp = NULL;
+ char *events_str = NULL;
+ int event_type = -1;
+#endif
- frame = create_frame (THIS, THIS->ctx->pool);
- if (frame == NULL) {
- ret = -1;
- goto out;
- }
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_GSYNC_SET];
- ret = cli_cmd_gsync_set_parse (words, wordcount, &options);
- if (ret) {
- cli_usage_out (word->pattern);
- parse_err = 1;
- goto out;
+ ret = cli_cmd_gsync_set_parse(state, words, wordcount, &options, &errstr);
+ if (ret) {
+ if (errstr) {
+ cli_err("%s", errstr);
+ GF_FREE(errstr);
+ } else {
+ cli_usage_out(word->pattern);
}
+ parse_err = 1;
+ goto out;
+ }
- if (proc->fn)
- ret = proc->fn (frame, THIS, options);
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (frame == NULL) {
+ ret = -1;
+ goto out;
+ }
-out:
- if (options)
- dict_unref (options);
+ CLI_LOCAL_INIT(local, words, frame, options);
- if (ret && parse_err == 0)
- cli_out (GEOREP" command failed");
+ if (proc->fn)
+ ret = proc->fn(frame, THIS, options);
- CLI_STACK_DESTROY (frame);
+out:
+ if (ret && parse_err == 0)
+ cli_out(GEOREP " command failed");
+
+#if (USE_EVENTS)
+ if (ret == 0) {
+ events_str = gf_strdup("");
+
+ /* Type of Geo-rep Action - Create, Start etc */
+ ret1 = dict_get_int32(options, "type", &cmd_type);
+ if (ret1)
+ cmd_type = -1;
+
+ /* Only capture Events for modification commands */
+ switch (cmd_type) {
+ case GF_GSYNC_OPTION_TYPE_CREATE:
+ event_type = EVENT_GEOREP_CREATE;
+ break;
+ case GF_GSYNC_OPTION_TYPE_START:
+ event_type = EVENT_GEOREP_START;
+ break;
+ case GF_GSYNC_OPTION_TYPE_STOP:
+ event_type = EVENT_GEOREP_STOP;
+ break;
+ case GF_GSYNC_OPTION_TYPE_PAUSE:
+ event_type = EVENT_GEOREP_PAUSE;
+ break;
+ case GF_GSYNC_OPTION_TYPE_RESUME:
+ event_type = EVENT_GEOREP_RESUME;
+ break;
+ case GF_GSYNC_OPTION_TYPE_DELETE:
+ event_type = EVENT_GEOREP_DELETE;
+ break;
+ case GF_GSYNC_OPTION_TYPE_CONFIG:
+ ret1 = dict_get_str(options, "subop", &tmp);
+ if (ret1)
+ tmp = "";
+
+ /* For Config Set additionally capture key and value */
+ /* For Config Reset capture key */
+ if (strcmp(tmp, "set") == 0) {
+ event_type = EVENT_GEOREP_CONFIG_SET;
+
+ ret1 = dict_get_str(options, "op_name", &tmp);
+ if (ret1)
+ tmp = "";
+
+ gf_asprintf_append(&events_str, "%soption=%s;", events_str,
+ tmp);
+
+ ret1 = dict_get_str(options, "op_value", &tmp);
+ if (ret1)
+ tmp = "";
+
+ gf_asprintf_append(&events_str, "%svalue=%s;", events_str,
+ tmp);
+ } else if (strcmp(tmp, "del") == 0) {
+ event_type = EVENT_GEOREP_CONFIG_RESET;
+
+ ret1 = dict_get_str(options, "op_name", &tmp);
+ if (ret1)
+ tmp = "";
+
+ gf_asprintf_append(&events_str, "%soption=%s;", events_str,
+ tmp);
+ }
+ break;
+ default:
+ break;
+ }
+
+ if (event_type > -1) {
+ /* Capture all optional arguments used */
+ ret1 = dict_get_int32(options, "force", &tmpi);
+ if (ret1 == 0) {
+ gf_asprintf_append(&events_str, "%sforce=%d;", events_str,
+ tmpi);
+ }
+ ret1 = dict_get_int32(options, "push_pem", &tmpi);
+ if (ret1 == 0) {
+ gf_asprintf_append(&events_str, "%spush_pem=%d;", events_str,
+ tmpi);
+ }
+ ret1 = dict_get_int32(options, "no_verify", &tmpi);
+ if (ret1 == 0) {
+ gf_asprintf_append(&events_str, "%sno_verify=%d;", events_str,
+ tmpi);
+ }
+
+ ret1 = dict_get_int32(options, "ssh_port", &tmpi);
+ if (ret1 == 0) {
+ gf_asprintf_append(&events_str, "%sssh_port=%d;", events_str,
+ tmpi);
+ }
+
+ ret1 = dict_get_int32(options, "reset-sync-time", &tmpi);
+ if (ret1 == 0) {
+ gf_asprintf_append(&events_str, "%sreset_sync_time=%d;",
+ events_str, tmpi);
+ }
+ /* Capture Master and Slave Info */
+ ret1 = dict_get_str(options, "master", &tmp);
+ if (ret1)
+ tmp = "";
+ gf_asprintf_append(&events_str, "%smaster=%s;", events_str, tmp);
+
+ ret1 = dict_get_str(options, "slave", &tmp);
+ if (ret1)
+ tmp = "";
+ gf_asprintf_append(&events_str, "%sslave=%s", events_str, tmp);
+
+ gf_event(event_type, "%s", events_str);
+ }
+
+ /* Allocated by gf_strdup and gf_asprintf */
+ if (events_str)
+ GF_FREE(events_str);
+ }
+#endif
- return ret;
+ CLI_STACK_DESTROY(frame);
+
+ return ret;
}
int
-cli_cmd_volume_status_cbk (struct cli_state *state,
- struct cli_cmd_word *word,
- const char **words, int wordcount)
+cli_cmd_volume_status_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
{
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- dict_t *dict = NULL;
- uint32_t cmd = 0;
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *dict = NULL;
+ uint32_t cmd = 0;
+ cli_local_t *local = NULL;
- ret = cli_cmd_volume_status_parse (words, wordcount, &dict);
+ ret = cli_cmd_volume_status_parse(words, wordcount, &dict);
- if (ret) {
- cli_usage_out (word->pattern);
- goto out;
- }
+ if (ret) {
+ cli_usage_out(word->pattern);
+ goto out;
+ }
- ret = dict_get_uint32 (dict, "cmd", &cmd);
- if (ret)
- goto out;
+ ret = dict_get_uint32(dict, "cmd", &cmd);
+ if (ret)
+ goto out;
- if (!(cmd & GF_CLI_STATUS_ALL)) {
- /* for one volume or brick */
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_STATUS_VOLUME];
- } else {
- /* volume status all or all detail */
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_STATUS_ALL];
- }
+ if (!(cmd & GF_CLI_STATUS_ALL)) {
+ /* for one volume or brick */
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_STATUS_VOLUME];
+ } else {
+ /* volume status all or all detail */
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_STATUS_ALL];
+ }
- if (!proc->fn)
- goto out;
+ if (!proc->fn) {
+ ret = -1;
+ goto out;
+ }
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ gf_log(THIS->name, GF_LOG_ERROR, "failed to create frame");
+ ret = -1;
+ goto out;
+ }
- ret = proc->fn (frame, THIS, dict);
+ CLI_LOCAL_INIT(local, words, frame, dict);
- out:
- if (dict)
- dict_unref (dict);
+ ret = proc->fn(frame, THIS, dict);
- CLI_STACK_DESTROY (frame);
+out:
+ CLI_STACK_DESTROY(frame);
- return ret;
+ return ret;
}
-
int
-cli_get_detail_status (dict_t *dict, int i, cli_volume_status_t *status)
+cli_get_detail_status(dict_t *dict, int i, cli_volume_status_t *status)
{
- uint64_t free = 0;
- uint64_t total = 0;
- char key[1024] = {0};
- int ret = 0;
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.free", i);
- ret = dict_get_uint64 (dict, key, &free);
-
- status->free = gf_uint64_2human_readable (free);
- if (!status->free)
- goto out;
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.total", i);
- ret = dict_get_uint64 (dict, key, &total);
+ uint64_t free = 0;
+ uint64_t total = 0;
+ char key[1024] = {0};
+ int ret = 0;
- status->total = gf_uint64_2human_readable (total);
- if (!status->total)
- goto out;
-
-#ifdef GF_LINUX_HOST_OS
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.device", i);
- ret = dict_get_str (dict, key, &(status->device));
- if (ret)
- status->device = NULL;
-#endif
+ snprintf(key, sizeof(key), "brick%d.free", i);
+ ret = dict_get_uint64(dict, key, &free);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.block_size", i);
- ret = dict_get_uint64 (dict, key, &(status->block_size));
- if (ret) {
- ret = 0;
- status->block_size = 0;
- }
+ status->free = gf_uint64_2human_readable(free);
+ if (!status->free)
+ goto out;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.mnt_options", i);
- ret = dict_get_str (dict, key, &(status->mount_options));
- if (ret)
- status->mount_options = NULL;
+ snprintf(key, sizeof(key), "brick%d.total", i);
+ ret = dict_get_uint64(dict, key, &total);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.fs_name", i);
- ret = dict_get_str (dict, key, &(status->fs_name));
- if (ret) {
- ret = 0;
- status->fs_name = NULL;
- }
+ status->total = gf_uint64_2human_readable(total);
+ if (!status->total)
+ goto out;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.inode_size", i);
- ret = dict_get_str (dict, key, &(status->inode_size));
- if (ret)
- status->inode_size = NULL;
+ snprintf(key, sizeof(key), "brick%d.device", i);
+ ret = dict_get_str(dict, key, &(status->device));
+ if (ret)
+ status->device = NULL;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.total_inodes", i);
- ret = dict_get_uint64 (dict, key,
- &(status->total_inodes));
- if (ret)
- status->total_inodes = 0;
+ snprintf(key, sizeof(key), "brick%d.block_size", i);
+ ret = dict_get_uint64(dict, key, &(status->block_size));
+ if (ret) {
+ ret = 0;
+ status->block_size = 0;
+ }
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.free_inodes", i);
- ret = dict_get_uint64 (dict, key, &(status->free_inodes));
- if (ret) {
- ret = 0;
- status->free_inodes = 0;
- }
+ snprintf(key, sizeof(key), "brick%d.mnt_options", i);
+ ret = dict_get_str(dict, key, &(status->mount_options));
+ if (ret)
+ status->mount_options = NULL;
+ snprintf(key, sizeof(key), "brick%d.fs_name", i);
+ ret = dict_get_str(dict, key, &(status->fs_name));
+ if (ret) {
+ ret = 0;
+ status->fs_name = NULL;
+ }
+
+ snprintf(key, sizeof(key), "brick%d.inode_size", i);
+ ret = dict_get_str(dict, key, &(status->inode_size));
+ if (ret)
+ status->inode_size = NULL;
+
+ snprintf(key, sizeof(key), "brick%d.total_inodes", i);
+ ret = dict_get_uint64(dict, key, &(status->total_inodes));
+ if (ret)
+ status->total_inodes = 0;
+
+ snprintf(key, sizeof(key), "brick%d.free_inodes", i);
+ ret = dict_get_uint64(dict, key, &(status->free_inodes));
+ if (ret) {
+ ret = 0;
+ status->free_inodes = 0;
+ }
- out:
- return ret;
+out:
+ return ret;
}
void
-cli_print_detailed_status (cli_volume_status_t *status)
+cli_print_detailed_status(cli_volume_status_t *status)
{
- cli_out ("%-20s : %-20s", "Brick", status->brick);
- cli_out ("%-20s : %-20d", "Port", status->port);
- cli_out ("%-20s : %-20c", "Online", (status->online) ? 'Y' : 'N');
- cli_out ("%-20s : %-20s", "Pid", status->pid_str);
-
-#ifdef GF_LINUX_HOST_OS
- if (status->fs_name)
- cli_out ("%-20s : %-20s", "File System", status->fs_name);
- else
- cli_out ("%-20s : %-20s", "File System", "N/A");
-
- if (status->device)
- cli_out ("%-20s : %-20s", "Device", status->device);
- else
- cli_out ("%-20s : %-20s", "Device", "N/A");
-
- if (status->mount_options) {
- cli_out ("%-20s : %-20s", "Mount Options",
- status->mount_options);
- } else {
- cli_out ("%-20s : %-20s", "Mount Options", "N/A");
- }
+ cli_out("%-20s : %-20s", "Brick", status->brick);
+
+ if (status->online) {
+ cli_out("%-20s : %-20d", "TCP Port", status->port);
+ cli_out("%-20s : %-20d", "RDMA Port", status->rdma_port);
+ } else {
+ cli_out("%-20s : %-20s", "TCP Port", "N/A");
+ cli_out("%-20s : %-20s", "RDMA Port", "N/A");
+ }
+
+ cli_out("%-20s : %-20c", "Online", (status->online) ? 'Y' : 'N');
+ cli_out("%-20s : %-20s", "Pid", status->pid_str);
+
+ if (status->fs_name)
+ cli_out("%-20s : %-20s", "File System", status->fs_name);
+ else
+ cli_out("%-20s : %-20s", "File System", "N/A");
+
+ if (status->device)
+ cli_out("%-20s : %-20s", "Device", status->device);
+ else
+ cli_out("%-20s : %-20s", "Device", "N/A");
+
+ if (status->mount_options) {
+ cli_out("%-20s : %-20s", "Mount Options", status->mount_options);
+ } else {
+ cli_out("%-20s : %-20s", "Mount Options", "N/A");
+ }
+
+ if (status->inode_size) {
+ cli_out("%-20s : %-20s", "Inode Size", status->inode_size);
+ } else {
+ cli_out("%-20s : %-20s", "Inode Size", "N/A");
+ }
+ if (status->free)
+ cli_out("%-20s : %-20s", "Disk Space Free", status->free);
+ else
+ cli_out("%-20s : %-20s", "Disk Space Free", "N/A");
+
+ if (status->total)
+ cli_out("%-20s : %-20s", "Total Disk Space", status->total);
+ else
+ cli_out("%-20s : %-20s", "Total Disk Space", "N/A");
+
+ if (status->total_inodes) {
+ cli_out("%-20s : %-20" GF_PRI_INODE, "Inode Count",
+ status->total_inodes);
+ } else {
+ cli_out("%-20s : %-20s", "Inode Count", "N/A");
+ }
+
+ if (status->free_inodes) {
+ cli_out("%-20s : %-20" GF_PRI_INODE, "Free Inodes",
+ status->free_inodes);
+ } else {
+ cli_out("%-20s : %-20s", "Free Inodes", "N/A");
+ }
+}
- if (status->inode_size) {
- cli_out ("%-20s : %-20s", "Inode Size",
- status->inode_size);
+int
+cli_print_brick_status(cli_volume_status_t *status)
+{
+ int fieldlen = CLI_VOL_STATUS_BRICK_LEN;
+ int bricklen = 0;
+ char *p = NULL;
+ int num_spaces = 0;
+
+ p = status->brick;
+ bricklen = strlen(p);
+ while (bricklen > 0) {
+ if (bricklen > fieldlen) {
+ cli_out("%.*s", fieldlen, p);
+ p += fieldlen;
+ bricklen -= fieldlen;
} else {
- cli_out ("%-20s : %-20s", "Inode Size", "N/A");
- }
-#endif
- if (status->free)
- cli_out ("%-20s : %-20s", "Disk Space Free", status->free);
- else
- cli_out ("%-20s : %-20s", "Disk Space Free", "N/A");
-
- if (status->total)
- cli_out ("%-20s : %-20s", "Total Disk Space", status->total);
- else
- cli_out ("%-20s : %-20s", "Total Disk Space", "N/A");
+ num_spaces = (fieldlen - bricklen) + 1;
+ printf("%s", p);
+ while (num_spaces-- != 0)
+ printf(" ");
+ if (status->port || status->rdma_port) {
+ if (status->online)
+ cli_out("%-10d%-11d%-8c%-5s", status->port,
+ status->rdma_port, status->online ? 'Y' : 'N',
+ status->pid_str);
+ else
+ cli_out("%-10s%-11s%-8c%-5s", "N/A", "N/A",
+ status->online ? 'Y' : 'N', status->pid_str);
+ } else
+ cli_out("%-10s%-11s%-8c%-5s", "N/A", "N/A",
+ status->online ? 'Y' : 'N', status->pid_str);
+ bricklen = 0;
+ }
+ }
+
+ return 0;
+}
+#define NEEDS_GLFS_HEAL(op) \
+ ((op == GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE) || \
+ (op == GF_SHD_OP_SBRAIN_HEAL_FROM_LATEST_MTIME) || \
+ (op == GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK) || \
+ (op == GF_SHD_OP_INDEX_SUMMARY) || (op == GF_SHD_OP_SPLIT_BRAIN_FILES) || \
+ (op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE) || \
+ (op == GF_SHD_OP_HEAL_SUMMARY))
- if (status->total_inodes) {
- cli_out ("%-20s : %-20ld", "Inode Count",
- status->total_inodes);
- } else {
- cli_out ("%-20s : %-20s", "Inode Count", "N/A");
- }
+int
+cli_launch_glfs_heal(int heal_op, dict_t *options)
+{
+ char buff[PATH_MAX] = {0};
+ runner_t runner = {0};
+ char *filename = NULL;
+ char *hostname = NULL;
+ char *path = NULL;
+ char *volname = NULL;
+ char *out = NULL;
+ int ret = 0;
+
+ runinit(&runner);
+ ret = dict_get_str(options, "volname", &volname);
+ runner_add_args(&runner, GLFSHEAL_PREFIX "/glfsheal", volname, NULL);
+ runner_redir(&runner, STDOUT_FILENO, RUN_PIPE);
+
+ switch (heal_op) {
+ case GF_SHD_OP_INDEX_SUMMARY:
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ runner_add_args(&runner, "--xml", NULL);
+ }
+ break;
+ case GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE:
+ ret = dict_get_str(options, "file", &filename);
+ runner_add_args(&runner, "bigger-file", filename, NULL);
+ break;
+ case GF_SHD_OP_SBRAIN_HEAL_FROM_LATEST_MTIME:
+ ret = dict_get_str(options, "file", &filename);
+ runner_add_args(&runner, "latest-mtime", filename, NULL);
+ break;
+ case GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK:
+ ret = dict_get_str(options, "heal-source-hostname", &hostname);
+ ret = dict_get_str(options, "heal-source-brickpath", &path);
+ runner_add_args(&runner, "source-brick", NULL);
+ runner_argprintf(&runner, "%s:%s", hostname, path);
+ if (dict_get_str(options, "file", &filename) == 0)
+ runner_argprintf(&runner, "%s", filename);
+ break;
+ case GF_SHD_OP_SPLIT_BRAIN_FILES:
+ runner_add_args(&runner, "split-brain-info", NULL);
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ runner_add_args(&runner, "--xml", NULL);
+ }
+ break;
+ case GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE:
+ case GF_SHD_OP_GRANULAR_ENTRY_HEAL_DISABLE:
+ runner_add_args(&runner, "granular-entry-heal-op", NULL);
+ break;
+ case GF_SHD_OP_HEAL_SUMMARY:
+ runner_add_args(&runner, "info-summary", NULL);
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ runner_add_args(&runner, "--xml", NULL);
+ }
+ break;
+ default:
+ ret = -1;
+ goto out;
+ }
+ if (global_state->mode & GLUSTER_MODE_GLFSHEAL_NOLOG)
+ runner_add_args(&runner, "--nolog", NULL);
+ ret = runner_start(&runner);
+ if (ret == -1)
+ goto out;
+ while ((
+ out = fgets(buff, sizeof(buff), runner_chio(&runner, STDOUT_FILENO)))) {
+ printf("%s", out);
+ }
+ ret = runner_end(&runner);
- if (status->free_inodes) {
- cli_out ("%-20s : %-20ld", "Free Inodes",
- status->free_inodes);
- } else {
- cli_out ("%-20s : %-20s", "Free Inodes", "N/A");
- }
+out:
+ return ret;
}
int
-cli_print_brick_status (cli_volume_status_t *status)
+cli_cmd_volume_heal_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
{
- int fieldlen = CLI_VOL_STATUS_BRICK_LEN;
- char buf[80] = {0,};
- int bricklen = 0;
- int i = 0;
- char *p = NULL;
- int num_tabs = 0;
-
- bricklen = strlen (status->brick);
- p = status->brick;
- while (bricklen > 0) {
- if (bricklen > fieldlen) {
- i++;
- strncpy (buf, p, min (fieldlen, (sizeof (buf)-1)));
- buf[strlen(buf) + 1] = '\0';
- cli_out ("%s", buf);
- p = status->brick + i * fieldlen;
- bricklen -= fieldlen;
- } else {
- num_tabs = (fieldlen - bricklen) / CLI_TAB_LENGTH + 1;
- printf ("%s", p);
- while (num_tabs-- != 0)
- printf ("\t");
- if (status->port)
- cli_out ("%d\t%c\t%s",
- status->port, status->online?'Y':'N',
- status->pid_str);
- else
- cli_out ("%s\t%c\t%s",
- "N/A", status->online?'Y':'N',
- status->pid_str);
- bricklen = 0;
- }
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ int sent = 0;
+ int parse_error = 0;
+ dict_t *options = NULL;
+ xlator_t *this = NULL;
+ cli_local_t *local = NULL;
+ int heal_op = 0;
+
+ this = THIS;
+
+ if (wordcount < 3) {
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+
+ ret = cli_cmd_volume_heal_options_parse(words, wordcount, &options);
+ if (ret) {
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+ ret = dict_get_int32(options, "heal-op", &heal_op);
+ if (ret < 0)
+ goto out;
+ if (NEEDS_GLFS_HEAL(heal_op)) {
+ ret = cli_launch_glfs_heal(heal_op, options);
+ if (ret < 0)
+ goto out;
+ if (heal_op != GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE)
+ goto out;
+ }
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_HEAL_VOLUME];
+
+ frame = create_frame(this, this->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
+ CLI_LOCAL_INIT(local, words, frame, options);
+
+ if (proc->fn) {
+ ret = proc->fn(frame, THIS, options);
+ }
+out:
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0) &&
+ !(global_state->mode & GLUSTER_MODE_XML)) {
+ cli_out("Volume heal failed.");
}
+ }
+
+ if (options)
+ dict_unref(options);
- return 0;
+ CLI_STACK_DESTROY(frame);
+
+ return ret;
}
int
-cli_cmd_volume_heal_cbk (struct cli_state *state, struct cli_cmd_word *word,
- const char **words, int wordcount)
+cli_cmd_volume_statedump_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
{
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- int sent = 0;
- int parse_error = 0;
- dict_t *options = NULL;
- xlator_t *this = NULL;
-
- this = THIS;
- frame = create_frame (this, this->ctx->pool);
- if (!frame)
- goto out;
-
- if (wordcount < 3) {
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
- }
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *options = NULL;
+ int sent = 0;
+ int parse_error = 0;
+ cli_local_t *local = NULL;
+
+ if (wordcount < 3) {
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
+ }
- ret = cli_cmd_volume_heal_options_parse (words, wordcount, &options);
+ if (wordcount >= 3) {
+ ret = cli_cmd_volume_statedump_options_parse(words, wordcount,
+ &options);
if (ret) {
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
+ parse_error = 1;
+ gf_log("cli", GF_LOG_ERROR,
+ "Error parsing "
+ "statedump options");
+ cli_out("Error parsing options");
+ cli_usage_out(word->pattern);
}
+ }
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_HEAL_VOLUME];
+ ret = dict_set_str(options, "volname", (char *)words[2]);
+ if (ret)
+ goto out;
- if (proc->fn) {
- ret = proc->fn (frame, THIS, options);
- }
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_STATEDUMP_VOLUME];
-out:
- if (ret) {
- cli_cmd_sent_status_get (&sent);
- if ((sent == 0) && (parse_error == 0))
- cli_out ("Volume heal failed");
- }
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
- if (options)
- dict_unref (options);
+ CLI_LOCAL_INIT(local, words, frame, options);
- CLI_STACK_DESTROY (frame);
+ if (proc->fn) {
+ ret = proc->fn(frame, THIS, options);
+ }
- return ret;
+out:
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out("Volume statedump failed");
+ }
+
+ CLI_STACK_DESTROY(frame);
+
+ return ret;
}
int
-cli_cmd_volume_statedump_cbk (struct cli_state *state, struct cli_cmd_word *word,
- const char **words, int wordcount)
+cli_cmd_volume_list_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
{
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- dict_t *options = NULL;
- int sent = 0;
- int parse_error = 0;
-
- frame = create_frame (THIS, THIS->ctx->pool);
+ int ret = -1;
+ call_frame_t *frame = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ int sent = 0;
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_LIST_VOLUME];
+ if (proc->fn) {
+ frame = create_frame(THIS, THIS->ctx->pool);
if (!frame)
- goto out;
+ goto out;
+ ret = proc->fn(frame, THIS, NULL);
+ }
- if (wordcount < 3) {
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
- }
+out:
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if (sent == 0)
+ cli_out("Volume list failed");
+ }
- if (wordcount >= 3) {
- ret = cli_cmd_volume_statedump_options_parse (words, wordcount,
- &options);
- if (ret) {
- parse_error = 1;
- gf_log ("cli", GF_LOG_ERROR, "Error parsing "
- "statedump options");
- cli_out ("Error parsing options");
- cli_usage_out (word->pattern);
- }
- }
+ CLI_STACK_DESTROY(frame);
- ret = dict_set_str (options, "volname", (char *)words[2]);
- if (ret)
- goto out;
+ return ret;
+}
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_STATEDUMP_VOLUME];
- if (proc->fn) {
- ret = proc->fn (frame, THIS, options);
- }
+int
+cli_cmd_volume_clearlocks_cbk(struct cli_state *state,
+ struct cli_cmd_word *word, const char **words,
+ int wordcount)
+{
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *options = NULL;
+ int sent = 0;
+ int parse_error = 0;
+ cli_local_t *local = NULL;
+
+ if (wordcount < 7 || wordcount > 8) {
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+
+ ret = cli_cmd_volume_clrlks_opts_parse(words, wordcount, &options);
+ if (ret) {
+ parse_error = 1;
+ gf_log("cli", GF_LOG_ERROR,
+ "Error parsing "
+ "clear-locks options");
+ cli_out("Error parsing options");
+ cli_usage_out(word->pattern);
+ }
+
+ ret = dict_set_str(options, "volname", (char *)words[2]);
+ if (ret)
+ goto out;
+
+ ret = dict_set_str(options, "path", (char *)words[3]);
+ if (ret)
+ goto out;
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_CLRLOCKS_VOLUME];
+
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
+ CLI_LOCAL_INIT(local, words, frame, options);
+
+ if (proc->fn) {
+ ret = proc->fn(frame, THIS, options);
+ }
out:
- if (ret) {
- cli_cmd_sent_status_get (&sent);
- if ((sent == 0) && (parse_error = 0))
- cli_out ("Volume statedump failed");
- }
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out("Volume clear-locks failed");
+ }
- CLI_STACK_DESTROY (frame);
+ CLI_STACK_DESTROY(frame);
- return ret;
+ return ret;
}
int
-cli_cmd_volume_list_cbk (struct cli_state *state, struct cli_cmd_word *word,
- const char **words, int wordcount)
+cli_cmd_volume_barrier_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
{
- int ret = -1;
- call_frame_t *frame = NULL;
- rpc_clnt_procedure_t *proc = NULL;
- int sent = 0;
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *options = NULL;
+ int sent = 0;
+ int parse_error = 0;
+ cli_local_t *local = NULL;
+
+ if (wordcount != 4) {
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
+ }
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
+ options = dict_new();
+ if (!options) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_str(options, "volname", (char *)words[2]);
+ if (ret)
+ goto out;
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_LIST_VOLUME];
- if (proc->fn) {
- ret = proc->fn (frame, THIS, NULL);
- }
+ ret = dict_set_str(options, "barrier", (char *)words[3]);
+ if (ret)
+ goto out;
-out:
- if (ret) {
- cli_cmd_sent_status_get (&sent);
- if (sent == 0)
- cli_out ("Volume list failed");
- }
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_BARRIER_VOLUME];
+
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
+ CLI_LOCAL_INIT(local, words, frame, options);
- CLI_STACK_DESTROY (frame);
+ if (proc->fn)
+ ret = proc->fn(frame, THIS, options);
- return ret;
+out:
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_err("Volume barrier failed");
+ }
+ CLI_STACK_DESTROY(frame);
+
+ return ret;
}
int
-cli_cmd_volume_clearlocks_cbk (struct cli_state *state,
- struct cli_cmd_word *word,
- const char **words, int wordcount)
+cli_cmd_volume_getopt_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
{
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- dict_t *options = NULL;
- int sent = 0;
- int parse_error = 0;
-
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *options = NULL;
+ int sent = 0;
+ int parse_err = 0;
+ cli_local_t *local = NULL;
+
+ if (wordcount != 4) {
+ cli_usage_out(word->pattern);
+ parse_err = 1;
+ goto out;
+ }
- if (wordcount < 7 || wordcount > 8) {
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
- }
+ options = dict_new();
+ if (!options)
+ goto out;
- ret = cli_cmd_volume_clrlks_opts_parse (words, wordcount, &options);
- if (ret) {
- parse_error = 1;
- gf_log ("cli", GF_LOG_ERROR, "Error parsing "
- "clear-locks options");
- cli_out ("Error parsing options");
- cli_usage_out (word->pattern);
- }
+ ret = dict_set_str(options, "volname", (char *)words[2]);
+ if (ret)
+ goto out;
- ret = dict_set_str (options, "volname", (char *)words[2]);
- if (ret)
- goto out;
+ ret = dict_set_str(options, "key", (char *)words[3]);
+ if (ret)
+ goto out;
- ret = dict_set_str (options, "path", (char *)words[3]);
- if (ret)
- goto out;
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_GET_VOL_OPT];
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_CLRLOCKS_VOLUME];
- if (proc->fn) {
- ret = proc->fn (frame, THIS, options);
- }
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
-out:
- if (ret) {
- cli_cmd_sent_status_get (&sent);
- if ((sent == 0) && (parse_error = 0))
- cli_out ("Volume clear-locks failed");
- }
+ CLI_LOCAL_INIT(local, words, frame, options);
- CLI_STACK_DESTROY (frame);
+ if (proc->fn)
+ ret = proc->fn(frame, THIS, options);
- return ret;
+out:
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_err == 0))
+ cli_err("Volume get option failed");
+ }
+ CLI_STACK_DESTROY(frame);
+ return ret;
}
+/* This is a bit of a hack to display the help. The current bitrot cmd
+ * format does not work well when registering the cmds.
+ * Ideally the should have been of the form
+ * gluster volume bitrot <subcommand> <volumename> ...
+ */
+
+struct cli_cmd bitrot_cmds[] = {
+
+ {"volume bitrot help", cli_cmd_bitrot_help_cbk,
+ "display help for volume bitrot commands"},
+
+ {"volume bitrot <VOLNAME> {enable|disable}", NULL, /*cli_cmd_bitrot_cbk,*/
+ "Enable/disable bitrot for volume <VOLNAME>"},
+
+ {"volume bitrot <VOLNAME> signing-time <time-in-secs>",
+ NULL, /*cli_cmd_bitrot_cbk,*/
+ "Waiting time for an object after last fd is closed to start signing "
+ "process"},
+
+ {"volume bitrot <VOLNAME> signer-threads <count>",
+ NULL, /*cli_cmd_bitrot_cbk,*/
+ "Number of signing process threads. Usually set to number of available "
+ "cores"},
+
+ {"volume bitrot <VOLNAME> scrub-throttle {lazy|normal|aggressive}",
+ NULL, /*cli_cmd_bitrot_cbk,*/
+ "Set the speed of the scrubber for volume <VOLNAME>"},
+
+ {"volume bitrot <VOLNAME> scrub-frequency {hourly|daily|weekly|biweekly"
+ "|monthly}",
+ NULL, /*cli_cmd_bitrot_cbk,*/
+ "Set the frequency of the scrubber for volume <VOLNAME>"},
+
+ {"volume bitrot <VOLNAME> scrub {pause|resume|status|ondemand}",
+ NULL, /*cli_cmd_bitrot_cbk,*/
+ "Pause/resume the scrubber for <VOLNAME>. Status displays the status of "
+ "the scrubber. ondemand starts the scrubber immediately."},
+
+ {"volume bitrot <VOLNAME> {enable|disable}\n"
+ "volume bitrot <VOLNAME> signing-time <time-in-secs>\n"
+ "volume bitrot <VOLNAME> signer-threads <count>\n"
+ "volume bitrot <volname> scrub-throttle {lazy|normal|aggressive}\n"
+ "volume bitrot <volname> scrub-frequency {hourly|daily|weekly|biweekly"
+ "|monthly}\n"
+ "volume bitrot <volname> scrub {pause|resume|status|ondemand}",
+ cli_cmd_bitrot_cbk, NULL},
+
+ {NULL, NULL, NULL}};
+
+struct cli_cmd quota_cmds[] = {
+
+ /* Quota commands */
+ {"volume quota help", cli_cmd_quota_help_cbk,
+ "display help for volume quota commands"},
+
+ {"volume quota <VOLNAME> {enable|disable|list [<path> ...]| "
+ "list-objects [<path> ...] | remove <path>| remove-objects <path> | "
+ "default-soft-limit <percent>}",
+ cli_cmd_quota_cbk, "Enable/disable and configure quota for <VOLNAME>"},
+
+ {"volume quota <VOLNAME> {limit-usage <path> <size> [<percent>]}",
+ cli_cmd_quota_cbk, "Set maximum size for <path> for <VOLNAME>"},
+
+ {"volume quota <VOLNAME> {limit-objects <path> <number> [<percent>]}",
+ cli_cmd_quota_cbk,
+ "Set the maximum number of entries allowed in <path> for <VOLNAME>"},
+
+ {"volume quota <VOLNAME> {alert-time|soft-timeout|hard-timeout} {<time>}",
+ cli_cmd_quota_cbk, "Set quota timeout for <VOLNAME>"},
+
+ {"volume inode-quota <VOLNAME> enable", cli_cmd_quota_cbk,
+ "Enable/disable inode-quota for <VOLNAME>"},
+
+ {"volume quota <VOLNAME> {enable|disable|list [<path> ...]| "
+ "list-objects [<path> ...] | remove <path>| remove-objects <path> | "
+ "default-soft-limit <percent>}\n"
+ "volume quota <VOLNAME> {limit-usage <path> <size> [<percent>]}\n"
+ "volume quota <VOLNAME> {limit-objects <path> <number> [<percent>]}\n"
+ "volume quota <VOLNAME> {alert-time|soft-timeout|hard-timeout} {<time>}",
+ cli_cmd_quota_cbk, NULL},
+
+ {NULL, NULL, NULL}};
+
struct cli_cmd volume_cmds[] = {
- { "volume info [all|<VOLNAME>]",
- cli_cmd_volume_info_cbk,
- "list information of all volumes"},
+ {"volume help", cli_cmd_volume_help_cbk,
+ "display help for volume commands"},
- { "volume create <NEW-VOLNAME> [stripe <COUNT>] [replica <COUNT>] [transport <tcp|rdma|tcp,rdma>] <NEW-BRICK> ...",
- cli_cmd_volume_create_cbk,
- "create a new volume of specified type with mentioned bricks"},
+ {"volume info [all|<VOLNAME>]", cli_cmd_volume_info_cbk,
+ "list information of all volumes"},
- { "volume delete <VOLNAME>",
- cli_cmd_volume_delete_cbk,
- "delete volume specified by <VOLNAME>"},
+ {"volume create <NEW-VOLNAME> [stripe <COUNT>] "
+ "[[replica <COUNT> [arbiter <COUNT>]]|[replica 2 thin-arbiter 1]] "
+ "[disperse [<COUNT>]] [disperse-data <COUNT>] [redundancy <COUNT>] "
+ "[transport <tcp|rdma|tcp,rdma>] <NEW-BRICK> <TA-BRICK>"
+ "... [force]",
- { "volume start <VOLNAME> [force]",
- cli_cmd_volume_start_cbk,
- "start volume specified by <VOLNAME>"},
+ cli_cmd_volume_create_cbk,
+ "create a new volume of specified type with mentioned bricks"},
- { "volume stop <VOLNAME> [force]",
- cli_cmd_volume_stop_cbk,
- "stop volume specified by <VOLNAME>"},
+ {"volume delete <VOLNAME>", cli_cmd_volume_delete_cbk,
+ "delete volume specified by <VOLNAME>"},
- /*{ "volume rename <VOLNAME> <NEW-VOLNAME>",
- cli_cmd_volume_rename_cbk,
- "rename volume <VOLNAME> to <NEW-VOLNAME>"},*/
+ {"volume start <VOLNAME> [force]", cli_cmd_volume_start_cbk,
+ "start volume specified by <VOLNAME>"},
- { "volume add-brick <VOLNAME> [<stripe|replica> <COUNT>] <NEW-BRICK> ...",
- cli_cmd_volume_add_brick_cbk,
- "add brick to volume <VOLNAME>"},
+ {"volume stop <VOLNAME> [force]", cli_cmd_volume_stop_cbk,
+ "stop volume specified by <VOLNAME>"},
- { "volume remove-brick <VOLNAME> [replica <COUNT>] <BRICK> ... {start|stop|status|commit|force}",
- cli_cmd_volume_remove_brick_cbk,
- "remove brick from volume <VOLNAME>"},
+ /*{ "volume rename <VOLNAME> <NEW-VOLNAME>",
+ cli_cmd_volume_rename_cbk,
+ "rename volume <VOLNAME> to <NEW-VOLNAME>"},*/
- { "volume rebalance <VOLNAME> [fix-layout] {start|stop|status} [force]",
- cli_cmd_volume_defrag_cbk,
- "rebalance operations"},
+ {"volume add-brick <VOLNAME> [<stripe|replica> <COUNT> "
+ "[arbiter <COUNT>]] <NEW-BRICK> ... [force]",
+ cli_cmd_volume_add_brick_cbk, "add brick to volume <VOLNAME>"},
- { "volume replace-brick <VOLNAME> <BRICK> <NEW-BRICK> {start|pause|abort|status|commit [force]}",
- cli_cmd_volume_replace_brick_cbk,
- "replace-brick operations"},
+ {"volume remove-brick <VOLNAME> [replica <COUNT>] <BRICK> ..."
+ " <start|stop|status|commit|force>",
+ cli_cmd_volume_remove_brick_cbk, "remove brick from volume <VOLNAME>"},
- /*{ "volume set-transport <VOLNAME> <TRANSPORT-TYPE> [<TRANSPORT-TYPE>] ...",
- cli_cmd_volume_set_transport_cbk,
- "set transport type for volume <VOLNAME>"},*/
+ {"volume rebalance <VOLNAME> {{fix-layout start} | {start "
+ "[force]|stop|status}}",
+ cli_cmd_volume_defrag_cbk, "rebalance operations"},
- { "volume set <VOLNAME> <KEY> <VALUE>",
- cli_cmd_volume_set_cbk,
- "set options for volume <VOLNAME>"},
+ {"volume replace-brick <VOLNAME> <SOURCE-BRICK> <NEW-BRICK> "
+ "{commit force}",
+ cli_cmd_volume_replace_brick_cbk, "replace-brick operations"},
- { "volume help",
- cli_cmd_volume_help_cbk,
- "display help for the volume command"},
+ /*{ "volume set-transport <VOLNAME> <TRANSPORT-TYPE> [<TRANSPORT-TYPE>]
+ ...", cli_cmd_volume_set_transport_cbk, "set transport type for volume
+ <VOLNAME>"},*/
- { "volume log rotate <VOLNAME> [BRICK]",
- cli_cmd_log_rotate_cbk,
- "rotate the log file for corresponding volume/brick"},
+ {"volume set <VOLNAME> <KEY> <VALUE>", cli_cmd_volume_set_cbk,
+ "set options for volume <VOLNAME>"},
- { "volume sync <HOSTNAME> [all|<VOLNAME>]",
- cli_cmd_sync_volume_cbk,
- "sync the volume information from a peer"},
+ {"volume set <VOLNAME> group <GROUP>", cli_cmd_volume_set_cbk,
+ "This option can be used for setting multiple pre-defined volume options "
+ "where group_name is a file under /var/lib/glusterd/groups containing one "
+ "key value pair per line"},
- { "volume reset <VOLNAME> [option] [force]",
- cli_cmd_volume_reset_cbk,
- "reset all the reconfigured options"},
+ {"volume log <VOLNAME> rotate [BRICK]", cli_cmd_log_rotate_cbk,
+ "rotate the log file for corresponding volume/brick"},
+
+ {"volume sync <HOSTNAME> [all|<VOLNAME>]", cli_cmd_sync_volume_cbk,
+ "sync the volume information from a peer"},
+
+ {"volume reset <VOLNAME> [option] [force]", cli_cmd_volume_reset_cbk,
+ "reset all the reconfigured options"},
#if (SYNCDAEMON_COMPILE)
- {"volume "GEOREP" [<VOLNAME>] [<SLAVE-URL>] {start|stop|config|status|log-rotate} [options...]",
- cli_cmd_volume_gsync_set_cbk,
- "Geo-sync operations",
- cli_cmd_check_gsync_exists_cbk},
+ {"volume " GEOREP " [<MASTER-VOLNAME>] [<SLAVE-IP>]::[<SLAVE-VOLNAME>] {"
+ "\\\n create [[ssh-port n] [[no-verify] \\\n | [push-pem]]] [force] \\\n"
+ " | start [force] \\\n | stop [force] \\\n | pause [force] \\\n | resume "
+ "[force] \\\n"
+ " | config [[[\\!]<option>] [<value>]] \\\n | status "
+ "[detail] \\\n | delete [reset-sync-time]} ",
+ cli_cmd_volume_gsync_set_cbk, "Geo-sync operations",
+ cli_cmd_check_gsync_exists_cbk},
#endif
- { "volume profile <VOLNAME> {start|info|stop} [nfs]",
- cli_cmd_volume_profile_cbk,
- "volume profile operations"},
+ {"volume profile <VOLNAME> {start|info [peek|incremental "
+ "[peek]|cumulative|clear]|stop} [nfs]",
+ cli_cmd_volume_profile_cbk, "volume profile operations"},
+
+ {"volume top <VOLNAME> {open|read|write|opendir|readdir|clear} [nfs|brick "
+ "<brick>] [list-cnt <value>] | "
+ "{read-perf|write-perf} [bs <size> count <count>] "
+ "[brick <brick>] [list-cnt <value>]",
+ cli_cmd_volume_top_cbk, "volume top operations"},
+
+ {"volume status [all | <VOLNAME> [nfs|shd|<BRICK>|quotad]]"
+ " [detail|clients|mem|inode|fd|callpool|tasks|client-list]",
+ cli_cmd_volume_status_cbk,
+ "display status of all or specified volume(s)/brick"},
+
+ {"volume heal <VOLNAME> [enable | disable | full |"
+ "statistics [heal-count [replica <HOSTNAME:BRICKNAME>]] |"
+ "info [summary | split-brain] |"
+ "split-brain {bigger-file <FILE> | latest-mtime <FILE> |"
+ "source-brick <HOSTNAME:BRICKNAME> [<FILE>]} |"
+ "granular-entry-heal {enable | disable}]",
+ cli_cmd_volume_heal_cbk,
+ "self-heal commands on volume specified by <VOLNAME>"},
+
+ {"volume statedump <VOLNAME> [[nfs|quotad] [all|mem|iobuf|callpool|"
+ "priv|fd|inode|history]... | [client <hostname:process-id>]]",
+ cli_cmd_volume_statedump_cbk, "perform statedump on bricks"},
+
+ {"volume list", cli_cmd_volume_list_cbk, "list all volumes in cluster"},
+
+ {"volume clear-locks <VOLNAME> <path> kind {blocked|granted|all}"
+ "{inode [range]|entry [basename]|posix [range]}",
+ cli_cmd_volume_clearlocks_cbk, "Clear locks held on path"},
+ {"volume barrier <VOLNAME> {enable|disable}", cli_cmd_volume_barrier_cbk,
+ "Barrier/unbarrier file operations on a volume"},
+ {"volume get <VOLNAME|all> <key|all>", cli_cmd_volume_getopt_cbk,
+ "Get the value of the all options or given option for volume <VOLNAME>"
+ " or all option. gluster volume get all all is to get all global "
+ "options"},
+
+ {"volume reset-brick <VOLNAME> <SOURCE-BRICK> {{start} |"
+ " {<NEW-BRICK> commit}}",
+ cli_cmd_volume_reset_brick_cbk, "reset-brick operations"},
+
+ {NULL, NULL, NULL}};
- { "volume quota <VOLNAME> <enable|disable|limit-usage|list|remove> [path] [value]",
- cli_cmd_quota_cbk,
- "quota translator specific operations"},
+int
+cli_cmd_quota_help_cbk(struct cli_state *state, struct cli_cmd_word *in_word,
+ const char **words, int wordcount)
+{
+ struct cli_cmd *cmd = NULL;
+ struct cli_cmd *quota_cmd = NULL;
+ int count = 0;
- { "volume top <VOLNAME> {[open|read|write|opendir|readdir [nfs]] "
- "|[read-perf|write-perf [nfs|{bs <size> count <count>}]]} "
- " [brick <brick>] [list-cnt <count>]",
- cli_cmd_volume_top_cbk,
- "volume top operations"},
+ cmd = GF_MALLOC(sizeof(quota_cmds), cli_mt_cli_cmd);
+ memcpy(cmd, quota_cmds, sizeof(quota_cmds));
+ count = (sizeof(quota_cmds) / sizeof(struct cli_cmd));
+ cli_cmd_sort(cmd, count);
- { "volume status [all | <VOLNAME> [nfs|shd|<BRICK>]]"
- " [detail|clients|mem|inode|fd|callpool]",
- cli_cmd_volume_status_cbk,
- "display status of all or specified volume(s)/brick"},
+ cli_out("\ngluster quota commands");
+ cli_out("=======================\n");
- { "volume heal <VOLNAME> [{full | info {healed | heal-failed | split-brain}}]",
- cli_cmd_volume_heal_cbk,
- "self-heal commands on volume specified by <VOLNAME>"},
+ for (quota_cmd = cmd; quota_cmd->pattern; quota_cmd++)
+ if ((_gf_false == quota_cmd->disable) && (quota_cmd->desc))
+ cli_out("%s - %s", quota_cmd->pattern, quota_cmd->desc);
- {"volume statedump <VOLNAME> [nfs] [all|mem|iobuf|callpool|priv|fd|"
- "inode|history]...",
- cli_cmd_volume_statedump_cbk,
- "perform statedump on bricks"},
+ cli_out("\n");
+ GF_FREE(cmd);
- {"volume list",
- cli_cmd_volume_list_cbk,
- "list all volumes in cluster"},
+ return 0;
+}
- {"volume clear-locks <VOLNAME> <path> kind {blocked|granted|all}"
- "{inode [range]|entry [basename]|posix [range]}",
- cli_cmd_volume_clearlocks_cbk,
- "Clear locks held on path"
- },
+int
+cli_cmd_bitrot_help_cbk(struct cli_state *state, struct cli_cmd_word *in_word,
+ const char **words, int wordcount)
+{
+ struct cli_cmd *cmd = NULL;
+ struct cli_cmd *bitrot_cmd = NULL;
+ int count = 0;
- { NULL, NULL, NULL }
-};
+ cmd = GF_MALLOC(sizeof(bitrot_cmds), cli_mt_cli_cmd);
+ memcpy(cmd, bitrot_cmds, sizeof(bitrot_cmds));
+ count = (sizeof(bitrot_cmds) / sizeof(struct cli_cmd));
+ cli_cmd_sort(cmd, count);
+
+ cli_out("\ngluster bitrot commands");
+ cli_out("========================\n");
+
+ for (bitrot_cmd = cmd; bitrot_cmd->pattern; bitrot_cmd++)
+ if ((_gf_false == bitrot_cmd->disable) && (bitrot_cmd->desc))
+ cli_out("%s - %s", bitrot_cmd->pattern, bitrot_cmd->desc);
+
+ cli_out("\n");
+ GF_FREE(cmd);
+
+ return 0;
+}
int
-cli_cmd_volume_help_cbk (struct cli_state *state, struct cli_cmd_word *in_word,
- const char **words, int wordcount)
+cli_cmd_volume_help_cbk(struct cli_state *state, struct cli_cmd_word *in_word,
+ const char **words, int wordcount)
{
- struct cli_cmd *cmd = NULL;
+ struct cli_cmd *cmd = NULL;
+ struct cli_cmd *vol_cmd = NULL;
+ int count = 0;
- for (cmd = volume_cmds; cmd->pattern; cmd++)
- if (_gf_false == cmd->disable)
- cli_out ("%s - %s", cmd->pattern, cmd->desc);
+ cmd = GF_MALLOC(sizeof(volume_cmds), cli_mt_cli_cmd);
+ memcpy(cmd, volume_cmds, sizeof(volume_cmds));
+ count = (sizeof(volume_cmds) / sizeof(struct cli_cmd));
+ cli_cmd_sort(cmd, count);
- return 0;
+ cli_out("\ngluster volume commands");
+ cli_out("========================\n");
+
+ for (vol_cmd = cmd; vol_cmd->pattern; vol_cmd++)
+ if (_gf_false == vol_cmd->disable)
+ cli_out("%s - %s", vol_cmd->pattern, vol_cmd->desc);
+
+ cli_out("\n");
+ GF_FREE(cmd);
+ return 0;
}
int
-cli_cmd_volume_register (struct cli_state *state)
+cli_cmd_volume_register(struct cli_state *state)
{
- int ret = 0;
- struct cli_cmd *cmd = NULL;
+ int ret = 0;
+ struct cli_cmd *cmd = NULL;
- for (cmd = volume_cmds; cmd->pattern; cmd++) {
+ for (cmd = volume_cmds; cmd->pattern; cmd++) {
+ ret = cli_cmd_register(&state->tree, cmd);
+ if (ret)
+ goto out;
+ }
+
+ for (cmd = bitrot_cmds; cmd->pattern; cmd++) {
+ ret = cli_cmd_register(&state->tree, cmd);
+ if (ret)
+ goto out;
+ }
+
+ for (cmd = quota_cmds; cmd->pattern; cmd++) {
+ ret = cli_cmd_register(&state->tree, cmd);
+ if (ret)
+ goto out;
+ }
- ret = cli_cmd_register (&state->tree, cmd);
- if (ret)
- goto out;
- }
out:
- return ret;
+ return ret;
+}
+
+static int
+gf_asprintf_append(char **string_ptr, const char *format, ...)
+{
+ va_list arg;
+ int rv = 0;
+ char *tmp = *string_ptr;
+
+ va_start(arg, format);
+ rv = gf_vasprintf(string_ptr, format, arg);
+ va_end(arg);
+
+ if (tmp)
+ GF_FREE(tmp);
+
+ return rv;
}
diff --git a/cli/src/cli-cmd.c b/cli/src/cli-cmd.c
index f2b434ac7a4..2d458b16a56 100644
--- a/cli/src/cli-cmd.c
+++ b/cli/src/cli-cmd.c
@@ -1,33 +1,18 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdint.h>
#include <pthread.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "cli.h"
#include "cli-cmd.h"
#include "cli-mem-types.h"
@@ -37,355 +22,390 @@
static int cmd_done;
static int cmd_sent;
-static pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
-static pthread_mutex_t cond_mutex = PTHREAD_MUTEX_INITIALIZER;
-static pthread_cond_t conn = PTHREAD_COND_INITIALIZER;
-static pthread_mutex_t conn_mutex = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
+static pthread_mutex_t cond_mutex = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t conn = PTHREAD_COND_INITIALIZER;
+static pthread_mutex_t conn_mutex = PTHREAD_MUTEX_INITIALIZER;
-int cli_op_ret = 0;
-int connected = 0;
-
-int cli_cmd_log_help_cbk (struct cli_state *state, struct cli_cmd_word *in_word,
- const char **words, int wordcount);
+int cli_op_ret = 0;
+static gf_boolean_t connected = _gf_false;
static unsigned
-cli_cmd_needs_connection (struct cli_cmd_word *word)
+cli_cmd_needs_connection(struct cli_cmd_word *word)
{
- if (!strcasecmp ("quit", word->word))
- return 0;
+ if (!strcasecmp("quit", word->word))
+ return 0;
- if (!strcasecmp ("help", word->word))
- return 0;
+ if (!strcasecmp("help", word->word))
+ return 0;
- if (!strcasecmp ("getwd", word->word))
- return 1;
+ if (!strcasecmp("getwd", word->word))
+ return 1;
- if (!strcasecmp ("exit", word->word))
- return 0;
+ if (!strcasecmp("exit", word->word))
+ return 0;
- return CLI_DEFAULT_CONN_TIMEOUT;
+ return cli_default_conn_timeout;
}
int
-cli_cmd_status_reset (void)
+cli_cmd_status_reset(void)
{
- int ret = 0;
-
- ret = cli_cmd_lock ();
- {
- if (ret == 0) {
- cmd_sent = 0;
- cmd_done = 0;
- }
- }
- ret = cli_cmd_unlock ();
- return ret;
+ int ret = 0;
+ ret = cli_cmd_lock();
+ {
+ if (ret == 0) {
+ cmd_sent = 0;
+ cmd_done = 0;
+ }
+ }
+ ret = cli_cmd_unlock();
+ return ret;
}
int
-cli_cmd_sent_status_get (int *status)
+cli_cmd_sent_status_get(int *status)
{
- int ret = 0;
- GF_ASSERT (status);
-
- ret = cli_cmd_lock ();
- {
- if (ret == 0)
- *status = cmd_sent;
- }
- ret = cli_cmd_unlock ();
- return ret;
+ int ret = 0;
+ GF_ASSERT(status);
+
+ ret = cli_cmd_lock();
+ {
+ if (ret == 0)
+ *status = cmd_sent;
+ }
+ ret = cli_cmd_unlock();
+ return ret;
}
int
-cli_cmd_process (struct cli_state *state, int argc, char **argv)
+cli_cmd_process(struct cli_state *state, int argc, char **argv)
{
- int ret = 0;
- struct cli_cmd_word *word = NULL;
- struct cli_cmd_word *next = NULL;
- int i = 0;
+ int ret = 0;
+ struct cli_cmd_word *word = NULL;
+ struct cli_cmd_word *next = NULL;
+ int i = 0;
- word = &state->tree.root;
+ word = &state->tree.root;
- if (!argc)
- return 0;
+ if (!argc)
+ return 0;
- for (i = 0; i < argc; i++) {
- next = cli_cmd_nextword (word, argv[i]);
+ for (i = 0; i < argc; i++) {
+ next = cli_cmd_nextword(word, argv[i]);
- word = next;
- if (!word)
- break;
+ word = next;
+ if (!word)
+ break;
- if (word->cbkfn)
- break;
- }
+ if (word->cbkfn)
+ break;
+ }
- if (!word) {
- cli_out ("unrecognized word: %s (position %d)",
- argv[i], i);
- return -1;
- }
+ if (!word) {
+ cli_out("unrecognized word: %s (position %d)\n", argv[i], i);
+ usage();
+ return -1;
+ }
- if (!word->cbkfn) {
- cli_out ("unrecognized command");
- return -1;
- }
+ if (!word->cbkfn) {
+ cli_out("unrecognized command\n");
+ usage();
+ return -1;
+ }
- if ( strcmp (word->word,"help")==0 )
- goto callback;
+ if (strcmp(word->word, "help") == 0)
+ goto callback;
- state->await_connected = cli_cmd_needs_connection (word);
+ state->await_connected = cli_cmd_needs_connection(word);
- ret = cli_cmd_await_connected (state->await_connected);
- if (ret) {
- cli_out ("Connection failed. Please check if gluster "
- "daemon is operational.");
- gf_log ("", GF_LOG_INFO, "Exiting with: %d", ret);
- exit (ret);
- }
+ ret = cli_cmd_await_connected(state->await_connected);
+ if (ret) {
+ cli_out(
+ "Connection failed. Please check if gluster "
+ "daemon is operational.");
+ gf_log("", GF_LOG_INFO, "Exiting with: %d", ret);
+ exit(ret);
+ }
callback:
- ret = word->cbkfn (state, word, (const char **)argv, argc);
- (void) cli_cmd_status_reset ();
- return ret;
+ ret = word->cbkfn(state, word, (const char **)argv, argc);
+ (void)cli_cmd_status_reset();
+ return ret;
}
int
-cli_cmd_input_token_count (const char *text)
+cli_cmd_input_token_count(const char *text)
{
- int count = 0;
- const char *trav = NULL;
- int is_spc = 1;
-
- for (trav = text; *trav; trav++) {
- if (*trav == ' ') {
- is_spc = 1;
- } else {
- if (is_spc) {
- count++;
- is_spc = 0;
- }
- }
+ int count = 0;
+ const char *trav = NULL;
+ int is_spc = 1;
+
+ for (trav = text; *trav; trav++) {
+ if (*trav == ' ') {
+ is_spc = 1;
+ } else {
+ if (is_spc) {
+ count++;
+ is_spc = 0;
+ }
}
+ }
- return count;
+ return count;
}
-
int
-cli_cmd_process_line (struct cli_state *state, const char *text)
+cli_cmd_process_line(struct cli_state *state, const char *text)
{
- int count = 0;
- char **tokens = NULL;
- char **tokenp = NULL;
- char *token = NULL;
- char *copy = NULL;
- char *saveptr = NULL;
- int i = 0;
- int ret = -1;
-
- count = cli_cmd_input_token_count (text);
-
- tokens = calloc (count + 1, sizeof (*tokens));
- if (!tokens)
- return -1;
-
- copy = strdup (text);
- if (!copy)
- goto out;
-
- tokenp = tokens;
-
- for (token = strtok_r (copy, " \t\r\n", &saveptr); token;
- token = strtok_r (NULL, " \t\r\n", &saveptr)) {
- *tokenp = strdup (token);
-
- if (!*tokenp)
- goto out;
- tokenp++;
- i++;
-
- }
-
- ret = cli_cmd_process (state, count, tokens);
+ int count = 0;
+ char **tokens = NULL;
+ char **tokenp = NULL;
+ char *token = NULL;
+ char *copy = NULL;
+ char *saveptr = NULL;
+ int i = 0;
+ int ret = -1;
+
+ count = cli_cmd_input_token_count(text);
+
+ tokens = calloc(count + 1, sizeof(*tokens));
+ if (!tokens)
+ return -1;
+
+ copy = strdup(text);
+ if (!copy)
+ goto out;
+
+ tokenp = tokens;
+
+ for (token = strtok_r(copy, " \t\r\n", &saveptr); token;
+ token = strtok_r(NULL, " \t\r\n", &saveptr)) {
+ *tokenp = strdup(token);
+
+ if (!*tokenp)
+ goto out;
+ tokenp++;
+ i++;
+ }
+
+ ret = cli_cmd_process(state, count, tokens);
out:
- if (copy)
- free (copy);
+ free(copy);
- if (tokens)
- cli_cmd_tokens_destroy (tokens);
+ if (tokens)
+ cli_cmd_tokens_destroy(tokens);
- return ret;
+ return ret;
}
-
int
-cli_cmds_register (struct cli_state *state)
+cli_cmds_register(struct cli_state *state)
{
- int ret = 0;
-
- ret = cli_cmd_volume_register (state);
- if (ret)
- goto out;
-
- ret = cli_cmd_probe_register (state);
- if (ret)
- goto out;
-
- ret = cli_cmd_system_register (state);
- if (ret)
- goto out;
-
- ret = cli_cmd_misc_register (state);
- if (ret)
- goto out;
-
+ int ret = 0;
+
+ ret = cli_cmd_volume_register(state);
+ if (ret)
+ goto out;
+
+ ret = cli_cmd_probe_register(state);
+ if (ret)
+ goto out;
+
+ ret = cli_cmd_system_register(state);
+ if (ret)
+ goto out;
+
+ ret = cli_cmd_misc_register(state);
+ if (ret)
+ goto out;
+
+ ret = cli_cmd_snapshot_register(state);
+ if (ret)
+ goto out;
+ ret = cli_cmd_global_register(state);
+ if (ret)
+ goto out;
out:
- return ret;
-}
-
-int
-cli_cmd_cond_init ()
-{
-
- pthread_mutex_init (&cond_mutex, NULL);
- pthread_cond_init (&cond, NULL);
-
- pthread_mutex_init (&conn_mutex, NULL);
- pthread_cond_init (&conn, NULL);
-
- return 0;
+ return ret;
}
int
-cli_cmd_lock ()
+cli_cmd_lock()
{
- pthread_mutex_lock (&cond_mutex);
- return 0;
+ pthread_mutex_lock(&cond_mutex);
+ return 0;
}
int
-cli_cmd_unlock ()
+cli_cmd_unlock()
{
- pthread_mutex_unlock (&cond_mutex);
- return 0;
+ pthread_mutex_unlock(&cond_mutex);
+ return 0;
}
static void
-seconds_from_now (unsigned secs, struct timespec *ts)
+seconds_from_now(unsigned secs, struct timespec *ts)
{
- struct timeval tv = {0,};
+ struct timeval tv = {
+ 0,
+ };
- gettimeofday (&tv, NULL);
+ gettimeofday(&tv, NULL);
- ts->tv_sec = tv.tv_sec + secs;
- ts->tv_nsec = tv.tv_usec * 1000;
+ ts->tv_sec = tv.tv_sec + secs;
+ ts->tv_nsec = tv.tv_usec * 1000;
}
int
-cli_cmd_await_response (unsigned time)
+cli_cmd_await_response(unsigned time)
{
- struct timespec ts = {0,};
- int ret = 0;
-
- cli_op_ret = -1;
-
- seconds_from_now (time, &ts);
- while (!cmd_done && !ret) {
- ret = pthread_cond_timedwait (&cond, &cond_mutex,
- &ts);
- }
-
- cmd_done = 0;
-
- if (ret)
- return ret;
-
- return cli_op_ret;
+ struct timespec ts = {
+ 0,
+ };
+ int ret = 0;
+
+ cli_op_ret = -1;
+
+ seconds_from_now(time, &ts);
+ while (!cmd_done && !ret) {
+ ret = pthread_cond_timedwait(&cond, &cond_mutex, &ts);
+ }
+
+ if (!cmd_done) {
+ if (ret == ETIMEDOUT)
+ cli_out("Error : Request timed out");
+ else
+ cli_out("Error : Command returned with error code:%d", ret);
+ }
+ cmd_done = 0;
+
+ return cli_op_ret;
}
+/* This function must be called _only_ after all actions associated with
+ * command processing is complete. Otherwise, gluster process may exit before
+ * reporting results to stdout/stderr. */
int
-cli_cmd_broadcast_response (int32_t status)
+cli_cmd_broadcast_response(int32_t status)
{
-
- pthread_mutex_lock (&cond_mutex);
- {
- if (!cmd_sent)
- goto out;
- cmd_done = 1;
- cli_op_ret = status;
- pthread_cond_broadcast (&cond);
- }
-
+ pthread_mutex_lock(&cond_mutex);
+ {
+ if (!cmd_sent)
+ goto out;
+ cmd_done = 1;
+ cli_op_ret = status;
+ pthread_cond_broadcast(&cond);
+ }
out:
- pthread_mutex_unlock (&cond_mutex);
- return 0;
+ pthread_mutex_unlock(&cond_mutex);
+ return 0;
}
int32_t
-cli_cmd_await_connected (unsigned conn_timo)
+cli_cmd_await_connected(unsigned conn_timo)
{
- int32_t ret = 0;
- struct timespec ts = {0,};
-
- if (!conn_timo)
- return 0;
-
- pthread_mutex_lock (&conn_mutex);
- {
- seconds_from_now (conn_timo, &ts);
- while (!connected && !ret) {
- ret = pthread_cond_timedwait (&conn, &conn_mutex,
- &ts);
- }
- }
- pthread_mutex_unlock (&conn_mutex);
+ int32_t ret = 0;
+ struct timespec ts = {
+ 0,
+ };
+ if (!conn_timo)
+ return 0;
+
+ pthread_mutex_lock(&conn_mutex);
+ {
+ seconds_from_now(conn_timo, &ts);
+ while (!connected && !ret) {
+ ret = pthread_cond_timedwait(&conn, &conn_mutex, &ts);
+ }
+ }
+ pthread_mutex_unlock(&conn_mutex);
- return ret;
+ return ret;
}
int32_t
-cli_cmd_broadcast_connected ()
+cli_cmd_broadcast_connected(gf_boolean_t status)
{
- pthread_mutex_lock (&conn_mutex);
- {
- connected = 1;
- pthread_cond_broadcast (&conn);
- }
-
- pthread_mutex_unlock (&conn_mutex);
-
- return 0;
+ pthread_mutex_lock(&conn_mutex);
+ {
+ connected = status;
+ pthread_cond_broadcast(&conn);
+ }
+ pthread_mutex_unlock(&conn_mutex);
+
+ return 0;
}
-int
-cli_cmd_submit (void *req, call_frame_t *frame,
- rpc_clnt_prog_t *prog,
- int procnum, struct iobref *iobref,
- xlator_t *this, fop_cbk_fn_t cbkfn, xdrproc_t xdrproc)
+gf_boolean_t
+cli_cmd_connected(void)
{
- int ret = -1;
- unsigned timeout = 0;
+ gf_boolean_t status;
- timeout = (GLUSTER_CLI_PROFILE_VOLUME == procnum) ?
- CLI_TOP_CMD_TIMEOUT : CLI_DEFAULT_CMD_TIMEOUT;
+ pthread_mutex_lock(&conn_mutex);
+ {
+ status = connected;
+ }
+ pthread_mutex_unlock(&conn_mutex);
- cli_cmd_lock ();
- cmd_sent = 0;
- ret = cli_submit_request (req, frame, prog,
- procnum, NULL, this, cbkfn, xdrproc);
+ return status;
+}
- if (!ret) {
- cmd_sent = 1;
- ret = cli_cmd_await_response (timeout);
- }
+int
+cli_cmd_submit(struct rpc_clnt *rpc, void *req, call_frame_t *frame,
+ rpc_clnt_prog_t *prog, int procnum, struct iobref *iobref,
+ xlator_t *this, fop_cbk_fn_t cbkfn, xdrproc_t xdrproc)
+{
+ int ret = -1;
+ unsigned timeout = 0;
+
+ if ((GLUSTER_CLI_PROFILE_VOLUME == procnum) ||
+ (GLUSTER_CLI_HEAL_VOLUME == procnum) ||
+ (GLUSTER_CLI_GANESHA == procnum))
+ timeout = cli_ten_minutes_timeout;
+ else
+ timeout = cli_default_conn_timeout;
+
+ cli_cmd_lock();
+ cmd_sent = 0;
+ ret = cli_submit_request(rpc, req, frame, prog, procnum, NULL, this, cbkfn,
+ xdrproc);
+
+ if (!ret) {
+ cmd_sent = 1;
+ ret = cli_cmd_await_response(timeout);
+ }
+
+ cli_cmd_unlock();
+
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
- cli_cmd_unlock ();
+int
+cli_cmd_pattern_cmp(void *a, void *b)
+{
+ struct cli_cmd *ia = NULL;
+ struct cli_cmd *ib = NULL;
+ int ret = 0;
+
+ ia = a;
+ ib = b;
+ if (strcmp(ia->pattern, ib->pattern) > 0)
+ ret = 1;
+ else if (strcmp(ia->pattern, ib->pattern) < 0)
+ ret = -1;
+ else
+ ret = 0;
+ return ret;
+}
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+void
+cli_cmd_sort(struct cli_cmd *cmd, int count)
+{
+ gf_array_insertionsort(cmd, 1, count - 2, sizeof(struct cli_cmd),
+ cli_cmd_pattern_cmp);
}
diff --git a/cli/src/cli-cmd.h b/cli/src/cli-cmd.h
index ba877e2c496..c1c068c7085 100644
--- a/cli/src/cli-cmd.h
+++ b/cli/src/cli-cmd.h
@@ -1,118 +1,129 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __CLI_CMD_H__
#define __CLI_CMD_H__
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include <netdb.h>
#include "cli.h"
-#include "list.h"
-
-#define CLI_STACK_DESTROY(_frame) \
- do { \
- if (_frame) { \
- if (_frame->local) { \
- gf_log ("cli", GF_LOG_DEBUG, "frame->local " \
- "is not NULL (%p)", _frame->local); \
- cli_local_wipe (_frame->local); \
- _frame->local = NULL; \
- } \
- STACK_DESTROY (_frame->root); \
- } \
- } while (0);
-
-typedef enum {
- GF_ANSWER_YES = 1,
- GF_ANSWER_NO = 2
-} gf_answer_t;
+#include <glusterfs/list.h>
+
+#define GLUSTER_SHARED_STORAGE "gluster_shared_storage"
+
+#define CLI_LOCAL_INIT(local, words, frame, dictionary) \
+ do { \
+ local = cli_local_get(); \
+ \
+ if (local) { \
+ local->words = words; \
+ if (dictionary) \
+ local->dict = dictionary; \
+ if (frame) \
+ frame->local = local; \
+ } \
+ } while (0)
+
+#define CLI_STACK_DESTROY(_frame) \
+ do { \
+ if (_frame) { \
+ if (_frame->local) { \
+ gf_log("cli", GF_LOG_DEBUG, \
+ "frame->local " \
+ "is not NULL (%p)", \
+ _frame->local); \
+ cli_local_wipe(_frame->local); \
+ _frame->local = NULL; \
+ } \
+ STACK_DESTROY(_frame->root); \
+ } \
+ } while (0);
+
+typedef enum { GF_ANSWER_YES = 1, GF_ANSWER_NO = 2 } gf_answer_t;
struct cli_cmd {
- const char *pattern;
- cli_cmd_cbk_t *cbk;
- const char *desc;
- cli_cmd_reg_cbk_t *reg_cbk; /* callback to check in runtime if the *
- * command should be enabled or disabled */
- gf_boolean_t disable;
+ const char *pattern;
+ cli_cmd_cbk_t *cbk;
+ const char *desc;
+ cli_cmd_reg_cbk_t *reg_cbk; /* callback to check in runtime if the *
+ * command should be enabled or disabled */
+ gf_boolean_t disable;
};
struct cli_cmd_volume_get_ctx_ {
- char *volname;
- int flags;
+ char *volname;
+ int flags;
};
typedef struct cli_profile_info_ {
- uint64_t fop_hits;
- double min_latency;
- double max_latency;
- double avg_latency;
- char *fop_name;
- double percentage_avg_latency;
+ uint64_t fop_hits;
+ double min_latency;
+ double max_latency;
+ double avg_latency;
+ char *fop_name;
+ double percentage_avg_latency;
} cli_profile_info_t;
-typedef struct addrinfo_list {
- struct list_head list;
- struct addrinfo *info;
-} addrinfo_list_t;
+typedef struct cli_cmd_volume_get_ctx_ cli_cmd_volume_get_ctx_t;
-typedef enum {
- GF_AI_COMPARE_NO_MATCH = 0,
- GF_AI_COMPARE_MATCH = 1,
- GF_AI_COMPARE_ERROR = 2
-} gf_ai_compare_t;
+int
+cli_cmd_volume_register(struct cli_state *state);
-typedef struct cli_cmd_volume_get_ctx_ cli_cmd_volume_get_ctx_t;
+int
+cli_cmd_probe_register(struct cli_state *state);
-int cli_cmd_volume_register (struct cli_state *state);
+int
+cli_cmd_system_register(struct cli_state *state);
-int cli_cmd_probe_register (struct cli_state *state);
+int
+cli_cmd_snapshot_register(struct cli_state *state);
-int cli_cmd_system_register (struct cli_state *state);
+int
+cli_cmd_global_register(struct cli_state *state);
-int cli_cmd_misc_register (struct cli_state *state);
+int
+cli_cmd_misc_register(struct cli_state *state);
-struct cli_cmd_word *cli_cmd_nextword (struct cli_cmd_word *word,
- const char *text);
-void cli_cmd_tokens_destroy (char **tokens);
+struct cli_cmd_word *
+cli_cmd_nextword(struct cli_cmd_word *word, const char *text);
+void
+cli_cmd_tokens_destroy(char **tokens);
-int cli_cmd_await_response (unsigned time);
+int
+cli_cmd_await_response(unsigned time);
-int cli_cmd_broadcast_response (int32_t status);
+int
+cli_cmd_broadcast_response(int32_t status);
-int cli_cmd_cond_init ();
+int
+cli_cmd_lock();
-int cli_cmd_lock ();
+int
+cli_cmd_unlock();
-int cli_cmd_unlock ();
+int
+cli_cmd_submit(struct rpc_clnt *rpc, void *req, call_frame_t *frame,
+ rpc_clnt_prog_t *prog, int procnum, struct iobref *iobref,
+ xlator_t *this, fop_cbk_fn_t cbkfn, xdrproc_t xdrproc);
int
-cli_cmd_submit (void *req, call_frame_t *frame,
- rpc_clnt_prog_t *prog,
- int procnum, struct iobref *iobref,
- xlator_t *this, fop_cbk_fn_t cbkfn, xdrproc_t xdrproc);
+cli_cmd_pattern_cmp(void *a, void *b);
+
+void
+cli_cmd_sort(struct cli_cmd *cmd, int count);
gf_answer_t
-cli_cmd_get_confirmation (struct cli_state *state, const char *question);
-int cli_cmd_sent_status_get (int *status);
+cli_cmd_get_confirmation(struct cli_state *state, const char *question);
+int
+cli_cmd_sent_status_get(int *status);
+
+gf_boolean_t
+_limits_set_on_volume(char *volname, int type);
+
#endif /* __CLI_CMD_H__ */
diff --git a/cli/src/cli-mem-types.h b/cli/src/cli-mem-types.h
index 3c49d21836b..b42b4dd86c2 100644
--- a/cli/src/cli-mem-types.h
+++ b/cli/src/cli-mem-types.h
@@ -1,39 +1,30 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __CLI_MEM_TYPES_H__
#define __CLI_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
#define CLI_MEM_TYPE_START (gf_common_mt_end + 1)
enum cli_mem_types_ {
- cli_mt_xlator_list_t = CLI_MEM_TYPE_START,
- cli_mt_xlator_t,
- cli_mt_xlator_cmdline_option_t,
- cli_mt_char,
- cli_mt_call_pool_t,
- cli_mt_cli_local_t,
- cli_mt_cli_get_vol_ctx_t,
- cli_mt_append_str,
- cli_mt_end
+ cli_mt_xlator_list_t = CLI_MEM_TYPE_START,
+ cli_mt_xlator_t,
+ cli_mt_xlator_cmdline_option_t,
+ cli_mt_char,
+ cli_mt_call_pool_t,
+ cli_mt_cli_local_t,
+ cli_mt_cli_get_vol_ctx_t,
+ cli_mt_append_str,
+ cli_mt_cli_cmd,
+ cli_mt_end
};
diff --git a/cli/src/cli-quotad-client.c b/cli/src/cli-quotad-client.c
new file mode 100644
index 00000000000..772b8f75bd9
--- /dev/null
+++ b/cli/src/cli-quotad-client.c
@@ -0,0 +1,146 @@
+/*
+ Copyright (c) 2010-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "cli-quotad-client.h"
+
+int
+cli_quotad_submit_request(void *req, call_frame_t *frame, rpc_clnt_prog_t *prog,
+ int procnum, struct iobref *iobref, xlator_t *this,
+ fop_cbk_fn_t cbkfn, xdrproc_t xdrproc)
+{
+ int ret = -1;
+ int count = 0;
+ struct iovec iov = {
+ 0,
+ };
+ struct iobuf *iobuf = NULL;
+ char new_iobref = 0;
+ ssize_t xdr_size = 0;
+
+ GF_ASSERT(this);
+
+ if (req) {
+ xdr_size = xdr_sizeof(xdrproc, req);
+ iobuf = iobuf_get2(this->ctx->iobuf_pool, xdr_size);
+ if (!iobuf) {
+ goto out;
+ };
+
+ if (!iobref) {
+ iobref = iobref_new();
+ if (!iobref) {
+ goto out;
+ }
+
+ new_iobref = 1;
+ }
+
+ iobref_add(iobref, iobuf);
+
+ iov.iov_base = iobuf->ptr;
+ iov.iov_len = iobuf_size(iobuf);
+
+ /* Create the xdr payload */
+ ret = xdr_serialize_generic(iov, req, xdrproc);
+ if (ret == -1) {
+ goto out;
+ }
+ iov.iov_len = ret;
+ count = 1;
+ }
+
+ /* Send the msg */
+ ret = rpc_clnt_submit(global_quotad_rpc, prog, procnum, cbkfn, &iov, count,
+ NULL, 0, iobref, frame, NULL, 0, NULL, 0, NULL);
+ ret = 0;
+
+out:
+ if (new_iobref)
+ iobref_unref(iobref);
+ if (iobuf)
+ iobuf_unref(iobuf);
+
+ return ret;
+}
+
+int
+cli_quotad_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
+ void *data)
+{
+ xlator_t *this = NULL;
+ int ret = 0;
+
+ this = mydata;
+
+ switch (event) {
+ case RPC_CLNT_CONNECT: {
+ gf_log(this->name, GF_LOG_TRACE, "got RPC_CLNT_CONNECT");
+ break;
+ }
+
+ case RPC_CLNT_DISCONNECT: {
+ gf_log(this->name, GF_LOG_TRACE, "got RPC_CLNT_DISCONNECT");
+ break;
+ }
+
+ default:
+ gf_log(this->name, GF_LOG_TRACE, "got some other RPC event %d",
+ event);
+ ret = 0;
+ break;
+ }
+
+ return ret;
+}
+
+struct rpc_clnt *
+cli_quotad_clnt_init(xlator_t *this, dict_t *options)
+{
+ struct rpc_clnt *rpc = NULL;
+ int ret = -1;
+
+ ret = dict_set_nstrn(options, "transport.address-family",
+ SLEN("transport.address-family"), "unix",
+ SLEN("unix"));
+ if (ret)
+ goto out;
+
+ ret = dict_set_nstrn(options, "transport-type", SLEN("transport-type"),
+ "socket", SLEN("socket"));
+ if (ret)
+ goto out;
+
+ ret = dict_set_nstrn(options, "transport.socket.connect-path",
+ SLEN("transport.socket.connect-path"),
+ "/var/run/gluster/quotad.socket",
+ SLEN("/var/run/gluster/quotad.socket"));
+ if (ret)
+ goto out;
+
+ rpc = rpc_clnt_new(options, this, this->name, 16);
+ if (!rpc)
+ goto out;
+
+ ret = rpc_clnt_register_notify(rpc, cli_quotad_notify, this);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "failed to register notify");
+ goto out;
+ }
+
+ rpc_clnt_start(rpc);
+out:
+ if (ret) {
+ if (rpc)
+ rpc_clnt_unref(rpc);
+ rpc = NULL;
+ }
+
+ return rpc;
+}
diff --git a/cli/src/cli-quotad-client.h b/cli/src/cli-quotad-client.h
new file mode 100644
index 00000000000..71a44e5916b
--- /dev/null
+++ b/cli/src/cli-quotad-client.h
@@ -0,0 +1,29 @@
+/*
+ Copyright (c) 2010-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include "cli.h"
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/compat.h>
+#include "cli-cmd.h"
+#include "cli1-xdr.h"
+#include "xdr-generic.h"
+#include "protocol-common.h"
+#include "cli-mem-types.h"
+
+int
+cli_quotad_submit_request(void *req, call_frame_t *frame, rpc_clnt_prog_t *prog,
+ int procnum, struct iobref *iobref, xlator_t *this,
+ fop_cbk_fn_t cbkfn, xdrproc_t xdrproc);
+
+struct rpc_clnt *
+cli_quotad_clnt_init(xlator_t *this, dict_t *options);
+
+int
+cli_quotad_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
+ void *data);
diff --git a/cli/src/cli-rl.c b/cli/src/cli-rl.c
index f9bf7c81923..7a38a0b882a 100644
--- a/cli/src/cli-rl.c
+++ b/cli/src/cli-rl.c
@@ -1,38 +1,23 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdint.h>
#include <pthread.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "cli.h"
#include "cli-cmd.h"
#include "cli-mem-types.h"
-#include "event.h"
+#include <glusterfs/gf-event.h>
#include <fnmatch.h>
@@ -42,384 +27,376 @@
#include <readline/readline.h>
#include <readline/history.h>
-
int
-cli_rl_out (struct cli_state *state, const char *fmt, va_list ap)
+cli_rl_out(struct cli_state *state, const char *fmt, va_list ap)
{
- int tmp_rl_point = rl_point;
- int n = rl_end;
- int ret = 0;
+ int tmp_rl_point = rl_point;
+ int n = rl_end;
+ int ret = 0;
- if (rl_end >= 0 ) {
- rl_kill_text (0, rl_end);
- rl_redisplay ();
- }
+ if (rl_end >= 0) {
+ rl_kill_text(0, rl_end);
+ rl_redisplay();
+ }
- printf ("\r%*s\r", (int)strlen (state->prompt), "");
+ printf("\r%*s\r", (int)strlen(state->prompt), "");
- ret = vprintf (fmt, ap);
+ ret = vprintf(fmt, ap);
- printf ("\n");
- fflush(stdout);
+ printf("\n");
+ fflush(stdout);
- if (n) {
- rl_do_undo ();
- rl_point = tmp_rl_point;
- rl_reset_line_state ();
- }
+ if (n) {
+ rl_do_undo();
+ rl_point = tmp_rl_point;
+ rl_reset_line_state();
+ }
- return ret;
+ return ret;
}
int
-cli_rl_err (struct cli_state *state, const char *fmt, va_list ap)
+cli_rl_err(struct cli_state *state, const char *fmt, va_list ap)
{
- int tmp_rl_point = rl_point;
- int n = rl_end;
- int ret = 0;
+ int tmp_rl_point = rl_point;
+ int n = rl_end;
+ int ret = 0;
- if (rl_end >= 0 ) {
- rl_kill_text (0, rl_end);
- rl_redisplay ();
- }
+ if (rl_end >= 0) {
+ rl_kill_text(0, rl_end);
+ rl_redisplay();
+ }
- fprintf (stderr, "\r%*s\r", (int)strlen (state->prompt), "");
+ fprintf(stderr, "\r%*s\r", (int)strlen(state->prompt), "");
- ret = vfprintf (stderr, fmt, ap);
+ ret = vfprintf(stderr, fmt, ap);
- fprintf (stderr, "\n");
- fflush(stderr);
+ fprintf(stderr, "\n");
+ fflush(stderr);
- if (n) {
- rl_do_undo ();
- rl_point = tmp_rl_point;
- rl_reset_line_state ();
- }
+ if (n) {
+ rl_do_undo();
+ rl_point = tmp_rl_point;
+ rl_reset_line_state();
+ }
- return ret;
+ return ret;
}
-
void
-cli_rl_process_line (char *line)
+cli_rl_process_line(char *line)
{
- struct cli_state *state = NULL;
- int ret = 0;
-
- state = global_state;
+ struct cli_state *state = NULL;
+ int ret = 0;
- state->rl_processing = 1;
- {
- ret = cli_cmd_process_line (state, line);
- if (ret)
- gf_log (THIS->name, GF_LOG_WARNING,
- "failed to process line");
+ state = global_state;
- add_history (line);
- }
- state->rl_processing = 0;
+ state->rl_processing = 1;
+ {
+ ret = cli_cmd_process_line(state, line);
+ if (ret)
+ gf_log(THIS->name, GF_LOG_WARNING, "failed to process line");
+ add_history(line);
+ }
+ state->rl_processing = 0;
}
-
-int
-cli_rl_stdin (int fd, int idx, void *data,
- int poll_out, int poll_in, int poll_err)
+void
+cli_rl_stdin(int fd, int idx, int gen, void *data, int poll_out, int poll_in,
+ int poll_err, char event_thread_died)
{
- rl_callback_read_char ();
+ struct cli_state *state = NULL;
- return 0;
-}
+ state = data;
+
+ rl_callback_read_char();
+ gf_event_handled(state->ctx->event_pool, fd, idx, gen);
+
+ return;
+}
char *
-cli_rl_autocomplete_entry (const char *text, int times)
+cli_rl_autocomplete_entry(const char *text, int times)
{
- struct cli_state *state = NULL;
- char *retp = NULL;
+ struct cli_state *state = NULL;
+ char *retp = NULL;
- state = global_state;
+ state = global_state;
- if (!state->matchesp)
- return NULL;
+ if (!state->matchesp)
+ return NULL;
- retp = *state->matchesp;
+ retp = *state->matchesp;
- state->matchesp++;
+ state->matchesp++;
- return retp ? strdup (retp) : NULL;
+ return retp ? strdup(retp) : NULL;
}
-
int
-cli_rl_token_count (const char *text)
+cli_rl_token_count(const char *text)
{
- int count = 0;
- const char *trav = NULL;
- int is_spc = 1;
-
- for (trav = text; *trav; trav++) {
- if (*trav == ' ') {
- is_spc = 1;
- } else {
- if (is_spc) {
- count++;
- is_spc = 0;
- }
- }
+ int count = 0;
+ const char *trav = NULL;
+ int is_spc = 1;
+
+ for (trav = text; *trav; trav++) {
+ if (*trav == ' ') {
+ is_spc = 1;
+ } else {
+ if (is_spc) {
+ count++;
+ is_spc = 0;
+ }
}
+ }
- if (is_spc)
- /* what needs to be autocompleted is a full
- new word, and not extend the last word
- */
- count++;
+ if (is_spc)
+ /* what needs to be autocompleted is a full
+ new word, and not extend the last word
+ */
+ count++;
- return count;
+ return count;
}
-
char **
-cli_rl_tokenize (const char *text)
+cli_rl_tokenize(const char *text)
{
- int count = 0;
- char **tokens = NULL;
- char **tokenp = NULL;
- char *token = NULL;
- char *copy = NULL;
- char *saveptr = NULL;
- int i = 0;
-
- count = cli_rl_token_count (text);
-
- tokens = calloc (count + 1, sizeof (*tokens));
- if (!tokens)
- return NULL;
-
- copy = strdup (text);
- if (!copy)
- goto out;
-
- tokenp = tokens;
-
- for (token = strtok_r (copy, " \t\r\n", &saveptr); token;
- token = strtok_r (NULL, " \t\r\n", &saveptr)) {
- *tokenp = strdup (token);
-
- if (!*tokenp)
- goto out;
- tokenp++;
- i++;
-
- }
+ int count = 0;
+ char **tokens = NULL;
+ char **tokenp = NULL;
+ char *token = NULL;
+ char *copy = NULL;
+ char *saveptr = NULL;
+ int i = 0;
+
+ count = cli_rl_token_count(text);
+
+ tokens = calloc(count + 1, sizeof(*tokens));
+ if (!tokens)
+ return NULL;
- if (i < count) {
- /* symbolize that what needs to be autocompleted is
- the full set of possible nextwords, and not extend
- the last word
- */
- *tokenp = strdup ("");
- if (!*tokenp)
- goto out;
- tokenp++;
- i++;
- }
+ copy = strdup(text);
+ if (!copy)
+ goto out;
+
+ tokenp = tokens;
+
+ for (token = strtok_r(copy, " \t\r\n", &saveptr); token;
+ token = strtok_r(NULL, " \t\r\n", &saveptr)) {
+ *tokenp = strdup(token);
+
+ if (!*tokenp)
+ goto out;
+ tokenp++;
+ i++;
+ }
+
+ if (i < count) {
+ /* symbolize that what needs to be autocompleted is
+ the full set of possible nextwords, and not extend
+ the last word
+ */
+ *tokenp = strdup("");
+ if (!*tokenp)
+ goto out;
+ tokenp++;
+ i++;
+ }
out:
- if (copy)
- free (copy);
+ free(copy);
- if (i < count) {
- cli_cmd_tokens_destroy (tokens);
- tokens = NULL;
- }
+ if (i < count) {
+ cli_cmd_tokens_destroy(tokens);
+ tokens = NULL;
+ }
- return tokens;
+ return tokens;
}
-
char **
-cli_rl_get_matches (struct cli_state *state, struct cli_cmd_word *word,
- const char *text)
+cli_rl_get_matches(struct cli_state *state, struct cli_cmd_word *word,
+ const char *text)
{
- char **matches = NULL;
- char **matchesp = NULL;
- struct cli_cmd_word **next = NULL;
- int count = 0;
- int len = 0;
+ char **matches = NULL;
+ char **matchesp = NULL;
+ struct cli_cmd_word **next = NULL;
+ int count = 0;
+ int len = 0;
- len = strlen (text);
+ len = strlen(text);
- if (!word->nextwords)
- return NULL;
+ if (!word->nextwords)
+ return NULL;
- for (next = word->nextwords; *next; next++)
- count++;
+ for (next = word->nextwords; *next; next++)
+ count++;
- matches = calloc (count + 1, sizeof (*matches));
- matchesp = matches;
+ matches = calloc(count + 1, sizeof(*matches));
+ matchesp = matches;
- for (next = word->nextwords; *next; next++) {
- if ((*next)->match) {
- continue;
- }
+ for (next = word->nextwords; *next; next++) {
+ if ((*next)->match) {
+ continue;
+ }
- if (strncmp ((*next)->word, text, len) == 0) {
- *matchesp = strdup ((*next)->word);
- matchesp++;
- }
+ if (strncmp((*next)->word, text, len) == 0) {
+ *matchesp = strdup((*next)->word);
+ matchesp++;
}
+ }
- return matches;
+ return matches;
}
-
int
-cli_rl_autocomplete_prepare (struct cli_state *state, const char *text)
+cli_rl_autocomplete_prepare(struct cli_state *state, const char *text)
{
- struct cli_cmd_word *word = NULL;
- struct cli_cmd_word *next = NULL;
- char **tokens = NULL;
- char **tokenp = NULL;
- char *token = NULL;
- char **matches = NULL;
-
- tokens = cli_rl_tokenize (text);
- if (!tokens)
- return 0;
-
- word = &state->tree.root;
-
- for (tokenp = tokens; (token = *tokenp); tokenp++) {
- if (!*(tokenp+1)) {
- /* last word */
- break;
- }
-
- next = cli_cmd_nextword (word, token);
- word = next;
- if (!word)
- break;
+ struct cli_cmd_word *word = NULL;
+ struct cli_cmd_word *next = NULL;
+ char **tokens = NULL;
+ char **tokenp = NULL;
+ char *token = NULL;
+ char **matches = NULL;
+
+ tokens = cli_rl_tokenize(text);
+ if (!tokens)
+ return 0;
+
+ word = &state->tree.root;
+
+ for (tokenp = tokens; (token = *tokenp); tokenp++) {
+ if (!*(tokenp + 1)) {
+ /* last word */
+ break;
}
+ next = cli_cmd_nextword(word, token);
+ word = next;
if (!word)
- goto out;
+ break;
+ }
+
+ if (!word || !token)
+ goto out;
- matches = cli_rl_get_matches (state, word, token);
+ matches = cli_rl_get_matches(state, word, token);
- state->matches = matches;
- state->matchesp = matches;
+ state->matches = matches;
+ state->matchesp = matches;
out:
- cli_cmd_tokens_destroy (tokens);
- return 0;
+ cli_cmd_tokens_destroy(tokens);
+ return 0;
}
-
int
-cli_rl_autocomplete_cleanup (struct cli_state *state)
+cli_rl_autocomplete_cleanup(struct cli_state *state)
{
- if (state->matches)
- cli_cmd_tokens_destroy (state->matches);
+ if (state->matches)
+ cli_cmd_tokens_destroy(state->matches);
- state->matches = NULL;
- state->matchesp = NULL;
+ state->matches = NULL;
+ state->matchesp = NULL;
- return 0;
+ return 0;
}
-
char **
-cli_rl_autocomplete (const char *text, int start, int end)
+cli_rl_autocomplete(const char *text, int start, int end)
{
- struct cli_state *state = NULL;
- char **matches = NULL;
- char save = 0;
+ struct cli_state *state = NULL;
+ char **matches = NULL;
+ char save = 0;
- state = global_state;
+ state = global_state;
- /* hack to make the autocompletion code neater */
- /* fake it as though the cursor is at the end of line */
+ /* hack to make the autocompletion code neater */
+ /* fake it as though the cursor is at the end of line */
- save = rl_line_buffer[rl_point];
- rl_line_buffer[rl_point] = 0;
+ save = rl_line_buffer[rl_point];
+ rl_line_buffer[rl_point] = 0;
- cli_rl_autocomplete_prepare (state, rl_line_buffer);
+ cli_rl_autocomplete_prepare(state, rl_line_buffer);
- matches = rl_completion_matches (text, cli_rl_autocomplete_entry);
+ matches = rl_completion_matches(text, cli_rl_autocomplete_entry);
- cli_rl_autocomplete_cleanup (state);
+ cli_rl_autocomplete_cleanup(state);
- rl_line_buffer[rl_point] = save;
+ rl_line_buffer[rl_point] = save;
- return matches;
+ return matches;
}
-
static char *
-complete_none (const char *txt, int times)
+complete_none(const char *txt, int times)
{
- return NULL;
+ return NULL;
}
-
void *
-cli_rl_input (void *_data)
+cli_rl_input(void *_data)
{
- struct cli_state *state = NULL;
- char *line = NULL;
+ struct cli_state *state = NULL;
+ char *line = NULL;
- state = _data;
+ state = _data;
- for (;;) {
- line = readline (state->prompt);
- if (!line)
- break;
+ fprintf(stderr,
+ "Welcome to gluster prompt, type 'help' to see the available "
+ "commands.\n");
+ for (;;) {
+ line = readline(state->prompt);
+ if (!line)
+ exit(0); // break;
- cli_rl_process_line (line);
+ if (*line)
+ cli_rl_process_line(line);
- free (line);
- }
+ free(line);
+ }
- return NULL;
+ return NULL;
}
-
int
-cli_rl_enable (struct cli_state *state)
+cli_rl_enable(struct cli_state *state)
{
- int ret = 0;
-
- rl_pre_input_hook = NULL;
- rl_attempted_completion_function = cli_rl_autocomplete;
- rl_completion_entry_function = complete_none;
-
- if (!state->rl_async) {
- ret = pthread_create (&state->input, NULL,
- cli_rl_input, state);
- if (ret == 0)
- state->rl_enabled = 1;
- goto out;
- }
+ int ret = 0;
- ret = event_register (state->ctx->event_pool, 0, cli_rl_stdin, state,
- 1, 0);
- if (ret == -1)
- goto out;
+ rl_pre_input_hook = NULL;
+ rl_attempted_completion_function = cli_rl_autocomplete;
+ rl_completion_entry_function = complete_none;
- state->rl_enabled = 1;
- rl_callback_handler_install (state->prompt, cli_rl_process_line);
+ if (!state->rl_async) {
+ ret = pthread_create(&state->input, NULL, cli_rl_input, state);
+ if (ret == 0)
+ state->rl_enabled = 1;
+ goto out;
+ }
+
+ ret = gf_event_register(state->ctx->event_pool, 0, cli_rl_stdin, state, 1,
+ 0, 0);
+ if (ret == -1)
+ goto out;
+
+ state->rl_enabled = 1;
+ rl_callback_handler_install(state->prompt, cli_rl_process_line);
out:
- return state->rl_enabled;
+ return state->rl_enabled;
}
#else /* HAVE_READLINE */
int
-cli_rl_enable (struct cli_state *state)
+cli_rl_enable(struct cli_state *state)
{
- return 0;
+ return 0;
}
#endif /* HAVE_READLINE */
diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c
index c98d986b3fb..9b6b0c7fa50 100644
--- a/cli/src/cli-rpc-ops.c
+++ b/cli/src/cli-rpc-ops.c
@@ -1,31 +1,12 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#ifndef GSYNC_CONF
-#define GSYNC_CONF GEOREP"/gsyncd.conf"
-#endif
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
/* Widths of various columns in top read/write-perf output
* Total width of top read/write-perf should be 80 chars
@@ -33,6338 +14,10936 @@
*/
#define VOL_TOP_PERF_FILENAME_DEF_WIDTH 47
#define VOL_TOP_PERF_FILENAME_ALT_WIDTH 44
-#define VOL_TOP_PERF_SPEED_WIDTH 4
-#define VOL_TOP_PERF_TIME_WIDTH 26
+#define VOL_TOP_PERF_SPEED_WIDTH 4
+#define VOL_TOP_PERF_TIME_WIDTH 26
+
+#define INDENT_MAIN_HEAD "%-25s %s "
+
+#define RETURNING "Returning %d"
+#define XML_ERROR "Error outputting to xml"
+#define XDR_DECODE_FAIL "Failed to decode xdr response"
+#define DICT_SERIALIZE_FAIL "Failed to serialize to data to dictionary"
+#define DICT_UNSERIALIZE_FAIL "Failed to unserialize the dictionary"
+
+/* Do not show estimates if greater than this number */
+#define REBAL_ESTIMATE_SEC_UPPER_LIMIT (60 * 24 * 3600)
+#define REBAL_ESTIMATE_START_TIME 600
#include "cli.h"
-#include "compat-errno.h"
+#include <glusterfs/compat-errno.h>
#include "cli-cmd.h"
#include <sys/uio.h>
#include <stdlib.h>
#include <sys/mount.h>
-#include "cli1-xdr.h"
-#include "xdr-generic.h"
-#include "protocol-common.h"
+#include <glusterfs/compat.h>
#include "cli-mem-types.h"
-#include "compat.h"
-
-#include "syscall.h"
+#include <glusterfs/syscall.h>
#include "glusterfs3.h"
#include "portmap-xdr.h"
+#include <glusterfs/byte-order.h>
-#include "run.h"
+#include <glusterfs/run.h>
+#include <glusterfs/events.h>
-extern rpc_clnt_prog_t *cli_rpc_prog;
-extern int cli_op_ret;
-extern int connected;
+enum gf_task_types { GF_TASK_TYPE_REBALANCE, GF_TASK_TYPE_REMOVE_BRICK };
-char *cli_volume_type[] = {"Distribute",
- "Stripe",
- "Replicate",
- "Striped-Replicate",
- "Distributed-Stripe",
- "Distributed-Replicate",
- "Distributed-Striped-Replicate",
-};
+rpc_clnt_prog_t cli_quotad_clnt;
+static int32_t
+gf_cli_remove_brick(call_frame_t *frame, xlator_t *this, void *data);
-char *cli_volume_status[] = {"Created",
- "Started",
- "Stopped"
+char *cli_vol_status_str[] = {
+ "Created",
+ "Started",
+ "Stopped",
};
-int32_t
-gf_cli3_1_get_volume (call_frame_t *frame, xlator_t *this,
- void *data);
+char *cli_vol_task_status_str[] = {"not started",
+ "in progress",
+ "stopped",
+ "completed",
+ "failed",
+ "fix-layout in progress",
+ "fix-layout stopped",
+ "fix-layout completed",
+ "fix-layout failed",
+ "unknown"};
+static int32_t
+gf_cli_snapshot(call_frame_t *frame, xlator_t *this, void *data);
-rpc_clnt_prog_t cli_handshake_prog = {
- .progname = "cli handshake",
- .prognum = GLUSTER_HNDSK_PROGRAM,
- .progver = GLUSTER_HNDSK_VERSION,
+static int32_t
+gf_cli_get_volume(call_frame_t *frame, xlator_t *this, void *data);
+
+static int
+cli_to_glusterd(gf_cli_req *req, call_frame_t *frame, fop_cbk_fn_t cbkfn,
+ xdrproc_t xdrproc, dict_t *dict, int procnum, xlator_t *this,
+ rpc_clnt_prog_t *prog, struct iobref *iobref);
+
+static int
+add_cli_cmd_timeout_to_dict(dict_t *dict);
+
+static rpc_clnt_prog_t cli_handshake_prog = {
+ .progname = "cli handshake",
+ .prognum = GLUSTER_HNDSK_PROGRAM,
+ .progver = GLUSTER_HNDSK_VERSION,
};
-rpc_clnt_prog_t cli_pmap_prog = {
- .progname = "cli portmap",
- .prognum = GLUSTER_PMAP_PROGRAM,
- .progver = GLUSTER_PMAP_VERSION,
+static rpc_clnt_prog_t cli_pmap_prog = {
+ .progname = "cli portmap",
+ .prognum = GLUSTER_PMAP_PROGRAM,
+ .progver = GLUSTER_PMAP_VERSION,
};
-void
-gf_cli_probe_strerror (gf1_cli_probe_rsp *rsp, char *msg, size_t len)
+static void
+gf_free_xdr_cli_rsp(gf_cli_rsp rsp)
{
- switch (rsp->op_errno) {
- case GF_PROBE_ANOTHER_CLUSTER:
- snprintf (msg, len, "%s is already part of another cluster",
- rsp->hostname);
- break;
- case GF_PROBE_VOLUME_CONFLICT:
- snprintf (msg, len, "Atleast one volume on %s conflicts with "
- "existing volumes in the cluster", rsp->hostname);
- break;
- case GF_PROBE_UNKNOWN_PEER:
- snprintf (msg, len, "%s responded with 'unknown peer' error, "
- "this could happen if %s doesn't have localhost in "
- "its peer database", rsp->hostname, rsp->hostname);
- break;
- case GF_PROBE_ADD_FAILED:
- snprintf (msg, len, "Failed to add peer information on %s" ,
- rsp->hostname);
- break;
- case GF_PROBE_QUORUM_NOT_MET:
- snprintf (msg, len, "Cluster quorum is not met. Changing "
- "peers is not allowed in this state");
- break;
- default:
- snprintf (msg, len, "Probe returned with unknown "
- "errno %d", rsp->op_errno);
- break;
- }
+ if (rsp.dict.dict_val) {
+ free(rsp.dict.dict_val);
+ }
+ if (rsp.op_errstr) {
+ free(rsp.op_errstr);
+ }
}
-int
-gf_cli3_1_probe_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
+static void
+gf_free_xdr_getspec_rsp(gf_getspec_rsp rsp)
{
- gf1_cli_probe_rsp rsp = {0,};
- int ret = -1;
- char msg[1024] = {0,};
+ if (rsp.spec) {
+ free(rsp.spec);
+ }
+ if (rsp.xdata.xdata_val) {
+ free(rsp.xdata.xdata_val);
+ }
+}
- if (-1 == req->rpc_status) {
- goto out;
- }
+static void
+gf_free_xdr_fsm_log_rsp(gf1_cli_fsm_log_rsp rsp)
+{
+ if (rsp.op_errstr) {
+ free(rsp.op_errstr);
+ }
+ if (rsp.fsm_log.fsm_log_val) {
+ free(rsp.fsm_log.fsm_log_val);
+ }
+}
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_probe_rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- //rsp.op_ret = -1;
- //rsp.op_errno = EINVAL;
- goto out;
+static int
+gf_cli_probe_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ char msg[1024] = "success";
+
+ GF_ASSERT(myframe);
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ XDR_DECODE_FAIL);
+ // rsp.op_ret = -1;
+ // rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ gf_log("cli", GF_LOG_INFO, "Received resp to probe");
+
+ if (rsp.op_errstr && rsp.op_errstr[0] != '\0') {
+ snprintf(msg, sizeof(msg), "%s", rsp.op_errstr);
+ if (rsp.op_ret) {
+ gf_log("cli", GF_LOG_ERROR, "%s", msg);
}
+ }
- gf_log ("cli", GF_LOG_INFO, "Received resp to probe");
- if (!rsp.op_ret) {
- switch (rsp.op_errno) {
- case GF_PROBE_SUCCESS:
- snprintf (msg, sizeof (msg),
- "Probe successful");
- break;
- case GF_PROBE_LOCALHOST:
- snprintf (msg, sizeof (msg),
- "Probe on localhost not needed");
- break;
- case GF_PROBE_FRIEND:
- snprintf (msg, sizeof (msg),
- "Probe on host %s port %d already"
- " in peer list", rsp.hostname,
- rsp.port);
- break;
- default:
- snprintf (msg, sizeof (msg),
- "Probe returned with unknown errno %d",
- rsp.op_errno);
- break;
- }
- }
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_str(NULL, (rsp.op_ret) ? NULL : msg, rsp.op_ret,
+ rsp.op_errno, (rsp.op_ret) ? msg : NULL);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto out;
+ }
- if (rsp.op_ret) {
- gf_cli_probe_strerror (&rsp, msg, sizeof (msg));
- gf_log ("glusterd",GF_LOG_ERROR,"Probe failed with op_ret %d"
- " and op_errno %d", rsp.op_ret, rsp.op_errno);
- }
+ if (!rsp.op_ret)
+ cli_out("peer probe: %s", msg);
+ else
+ cli_err("peer probe: failed: %s", msg);
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_str ("peerProbe", msg, rsp.op_ret,
- rsp.op_errno, NULL);
- if (ret)
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
- goto out;
- }
-#endif
- if (!rsp.op_ret)
- cli_out ("%s", msg);
- else
- cli_err ("%s", msg);
+ ret = rsp.op_ret;
- ret = rsp.op_ret;
+out:
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_cli_rsp(rsp);
+ return ret;
+}
+
+static int
+gf_cli_deprobe_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ char msg[1024] = "success";
+
+ GF_ASSERT(myframe);
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ XDR_DECODE_FAIL);
+ // rsp.op_ret = -1;
+ // rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ gf_log("cli", GF_LOG_INFO, "Received resp to deprobe");
+
+ if (rsp.op_ret) {
+ if (rsp.op_errstr[0] != '\0') {
+ snprintf(msg, sizeof(msg), "%s", rsp.op_errstr);
+ gf_log("cli", GF_LOG_ERROR, "%s", rsp.op_errstr);
+ }
+ }
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_str(NULL, (rsp.op_ret) ? NULL : msg, rsp.op_ret,
+ rsp.op_errno, (rsp.op_ret) ? msg : NULL);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto out;
+ }
+
+ if (!rsp.op_ret)
+ cli_out("peer detach: %s", msg);
+ else
+ cli_err("peer detach: failed: %s", msg);
+
+ ret = rsp.op_ret;
out:
- cli_cmd_broadcast_response (ret);
- return ret;
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_cli_rsp(rsp);
+ return ret;
}
-int
-gf_cli3_1_deprobe_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
+static int
+gf_cli_output_peer_hostnames(dict_t *dict, int count, const char *prefix)
{
- gf1_cli_deprobe_rsp rsp = {0,};
- int ret = -1;
- char msg[1024] = {0,};
+ int ret = -1;
+ char key[512] = {
+ 0,
+ };
+ int i = 0;
+ char *hostname = NULL;
+
+ cli_out("Other names:");
+ /* Starting from friend.hostname1, as friend.hostname0 will be the same
+ * as friend.hostname
+ */
+ for (i = 1; i < count; i++) {
+ ret = snprintf(key, sizeof(key), "%s.hostname%d", prefix, i);
+ ret = dict_get_strn(dict, key, ret, &hostname);
+ if (ret)
+ break;
+ cli_out("%s", hostname);
+ hostname = NULL;
+ }
- if (-1 == req->rpc_status) {
- goto out;
- }
+ return ret;
+}
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_deprobe_rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- //rsp.op_ret = -1;
- //rsp.op_errno = EINVAL;
- goto out;
- }
+static int
+gf_cli_output_peer_status(dict_t *dict, int count)
+{
+ int ret = -1;
+ char *uuid_buf = NULL;
+ char *hostname_buf = NULL;
+ int32_t i = 1;
+ char key[256] = {
+ 0,
+ };
+ int keylen;
+ char *state = NULL;
+ int32_t connected = 0;
+ const char *connected_str = NULL;
+ int hostname_count = 0;
+
+ cli_out("Number of Peers: %d", count);
+ i = 1;
+ while (i <= count) {
+ keylen = snprintf(key, sizeof(key), "friend%d.uuid", i);
+ ret = dict_get_strn(dict, key, keylen, &uuid_buf);
+ if (ret)
+ goto out;
- gf_log ("cli", GF_LOG_INFO, "Received resp to deprobe");
- if (rsp.op_ret) {
- switch (rsp.op_errno) {
- case GF_DEPROBE_LOCALHOST:
- snprintf (msg, sizeof (msg),
- "%s is localhost", rsp.hostname);
- break;
- case GF_DEPROBE_NOT_FRIEND:
- snprintf (msg, sizeof (msg),
- "%s is not part of cluster",
- rsp.hostname);
- break;
- case GF_DEPROBE_BRICK_EXIST:
- snprintf (msg, sizeof (msg),
- "Brick(s) with the peer %s exist in "
- "cluster", rsp.hostname);
- break;
- case GF_DEPROBE_FRIEND_DOWN:
- snprintf (msg, sizeof (msg),
- "One of the peers is probably down."
- " Check with 'peer status'.");
- break;
- case GF_DEPROBE_QUORUM_NOT_MET:
- snprintf (msg, sizeof (msg), "Cluster "
- "quorum is not met. Changing "
- "peers is not allowed in this"
- " state");
- break;
- default:
- snprintf (msg, sizeof (msg),
- "Detach unsuccessful\nDetach returned"
- " with unknown errno %d",
- rsp.op_errno);
- break;
- }
- gf_log ("glusterd",GF_LOG_ERROR,"Detach failed with op_ret %d"
- " and op_errno %d", rsp.op_ret, rsp.op_errno);
- } else {
- snprintf (msg, sizeof (msg), "Detach successful");
- }
+ keylen = snprintf(key, sizeof(key), "friend%d.hostname", i);
+ ret = dict_get_strn(dict, key, keylen, &hostname_buf);
+ if (ret)
+ goto out;
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_str ("peerDetach", msg, rsp.op_ret,
- rsp.op_errno, NULL);
- if (ret)
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
+ keylen = snprintf(key, sizeof(key), "friend%d.connected", i);
+ ret = dict_get_int32n(dict, key, keylen, &connected);
+ if (ret)
+ goto out;
+ if (connected)
+ connected_str = "Connected";
+ else
+ connected_str = "Disconnected";
+
+ keylen = snprintf(key, sizeof(key), "friend%d.state", i);
+ ret = dict_get_strn(dict, key, keylen, &state);
+ if (ret)
+ goto out;
+
+ cli_out("\nHostname: %s\nUuid: %s\nState: %s (%s)", hostname_buf,
+ uuid_buf, state, connected_str);
+
+ keylen = snprintf(key, sizeof(key), "friend%d.hostname_count", i);
+ ret = dict_get_int32n(dict, key, keylen, &hostname_count);
+ /* Print other addresses only if there are more than 1.
+ */
+ if ((ret == 0) && (hostname_count > 1)) {
+ snprintf(key, sizeof(key), "friend%d", i);
+ ret = gf_cli_output_peer_hostnames(dict, hostname_count, key);
+ if (ret) {
+ gf_log("cli", GF_LOG_WARNING,
+ "error outputting peer other names");
goto out;
+ }
}
-#endif
- if (!rsp.op_ret)
- cli_out ("%s", msg);
+ i++;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static int
+gf_cli_output_pool_list(dict_t *dict, int count)
+{
+ int ret = -1;
+ char *uuid_buf = NULL;
+ char *hostname_buf = NULL;
+ int32_t hostname_len = 8; /*min len 8 chars*/
+ int32_t i = 1;
+ char key[64] = {
+ 0,
+ };
+ int keylen;
+ int32_t connected = 0;
+ const char *connected_str = NULL;
+
+ if (count <= 0)
+ goto out;
+
+ while (i <= count) {
+ keylen = snprintf(key, sizeof(key), "friend%d.hostname", i);
+ ret = dict_get_strn(dict, key, keylen, &hostname_buf);
+ if (ret)
+ goto out;
+
+ ret = strlen(hostname_buf);
+ if (ret > hostname_len)
+ hostname_len = ret;
+
+ i++;
+ }
+
+ cli_out("UUID\t\t\t\t\t%-*s\tState", hostname_len, "Hostname");
+
+ i = 1;
+ while (i <= count) {
+ keylen = snprintf(key, sizeof(key), "friend%d.uuid", i);
+ ret = dict_get_strn(dict, key, keylen, &uuid_buf);
+ if (ret)
+ goto out;
+
+ keylen = snprintf(key, sizeof(key), "friend%d.hostname", i);
+ ret = dict_get_strn(dict, key, keylen, &hostname_buf);
+ if (ret)
+ goto out;
+
+ keylen = snprintf(key, sizeof(key), "friend%d.connected", i);
+ ret = dict_get_int32n(dict, key, keylen, &connected);
+ if (ret)
+ goto out;
+ if (connected)
+ connected_str = "Connected";
else
- cli_err ("%s", msg);
+ connected_str = "Disconnected";
- ret = rsp.op_ret;
+ cli_out("%s\t%-*s\t%s ", uuid_buf, hostname_len, hostname_buf,
+ connected_str);
+ i++;
+ }
+ ret = 0;
out:
- cli_cmd_broadcast_response (ret);
- return ret;
+ return ret;
}
-int
-gf_cli3_1_list_friends_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
-{
- gf1_cli_peer_list_rsp rsp = {0,};
- int ret = -1;
- dict_t *dict = NULL;
- char *uuid_buf = NULL;
- char *hostname_buf = NULL;
- int32_t i = 1;
- char key[256] = {0,};
- char *state = NULL;
- int32_t port = 0;
- int32_t connected = 0;
- char *connected_str = NULL;
- char msg[1024] = {0,};
-
- if (-1 == req->rpc_status) {
+/* function pointer for gf_cli_output_{pool_list,peer_status} */
+typedef int (*cli_friend_output_fn)(dict_t *, int);
+
+static int
+gf_cli_list_friends_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf1_cli_peer_list_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ dict_t *dict = NULL;
+ char msg[1024] = {
+ 0,
+ };
+ const char *cmd = NULL;
+ cli_friend_output_fn friend_output_fn;
+ call_frame_t *frame = NULL;
+ unsigned long flags = 0;
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ GF_ASSERT(myframe);
+
+ frame = myframe;
+
+ flags = (long)frame->local;
+
+ if (flags == GF_CLI_LIST_POOL_NODES) {
+ cmd = "pool list";
+ friend_output_fn = &gf_cli_output_pool_list;
+ } else {
+ cmd = "peer status";
+ friend_output_fn = &gf_cli_output_peer_status;
+ }
+
+ /* 'free' the flags set by gf_cli_list_friends */
+ frame->local = NULL;
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf1_cli_peer_list_rsp);
+ if (ret < 0) {
+ gf_log(frame->this->name, GF_LOG_ERROR, XDR_DECODE_FAIL);
+ // rsp.op_ret = -1;
+ // rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ gf_log("cli", GF_LOG_DEBUG, "Received resp to list: %d", rsp.op_ret);
+
+ if (!rsp.op_ret) {
+ if (!rsp.friends.friends_len) {
+ snprintf(msg, sizeof(msg), "%s: No peers present", cmd);
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_peer_status(dict, rsp.op_ret, rsp.op_errno,
+ msg);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
goto out;
+ }
+ cli_err("%s", msg);
+ ret = 0;
+ goto out;
}
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_peer_list_rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- //rsp.op_ret = -1;
- //rsp.op_errno = EINVAL;
- goto out;
+ dict = dict_new();
+
+ if (!dict) {
+ ret = -1;
+ goto out;
}
- gf_log ("cli", GF_LOG_INFO, "Received resp to list: %d",
- rsp.op_ret);
+ ret = dict_unserialize(rsp.friends.friends_val, rsp.friends.friends_len,
+ &dict);
- ret = rsp.op_ret;
+ if (ret) {
+ gf_log("", GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
+ goto out;
+ }
- if (!rsp.op_ret) {
-
- if (!rsp.friends.friends_len) {
- snprintf (msg, sizeof (msg),
- "No peers present");
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_peer_status (dict,
- rsp.op_ret,
- rsp.op_errno,
- msg);
- if (ret)
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
- goto out;
- }
-#endif
- cli_err ("%s", msg);
- ret = 0;
- goto out;
- }
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_peer_status(dict, rsp.op_ret, rsp.op_errno,
+ msg);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto out;
+ }
- dict = dict_new ();
+ ret = dict_get_int32_sizen(dict, "count", &count);
+ if (ret) {
+ goto out;
+ }
- if (!dict) {
- ret = -1;
- goto out;
- }
+ ret = friend_output_fn(dict, count);
+ if (ret) {
+ goto out;
+ }
+ } else {
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_peer_status(dict, rsp.op_ret, rsp.op_errno,
+ NULL);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ } else {
+ ret = -1;
+ }
+ goto out;
+ }
- ret = dict_unserialize (rsp.friends.friends_val,
- rsp.friends.friends_len,
- &dict);
+ ret = 0;
- if (ret) {
- gf_log ("", GF_LOG_ERROR,
- "Unable to allocate memory");
- goto out;
- }
+out:
+ if (ret)
+ cli_err("%s: failed", cmd);
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_peer_status (dict, rsp.op_ret,
- rsp.op_errno, msg);
- if (ret)
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
- goto out;
- }
-#endif
- ret = dict_get_int32 (dict, "count", &count);
+ cli_cmd_broadcast_response(ret);
- if (ret) {
- goto out;
- }
+ if (dict)
+ dict_unref(dict);
- cli_out ("Number of Peers: %d", count);
-
- while ( i <= count) {
- snprintf (key, 256, "friend%d.uuid", i);
- ret = dict_get_str (dict, key, &uuid_buf);
- if (ret)
- goto out;
-
- snprintf (key, 256, "friend%d.hostname", i);
- ret = dict_get_str (dict, key, &hostname_buf);
- if (ret)
- goto out;
-
- snprintf (key, 256, "friend%d.connected", i);
- ret = dict_get_int32 (dict, key, &connected);
- if (ret)
- goto out;
- if (connected)
- connected_str = "Connected";
- else
- connected_str = "Disconnected";
-
- snprintf (key, 256, "friend%d.port", i);
- ret = dict_get_int32 (dict, key, &port);
- if (ret)
- goto out;
-
- snprintf (key, 256, "friend%d.state", i);
- ret = dict_get_str (dict, key, &state);
- if (ret)
- goto out;
-
- if (!port) {
- cli_out ("\nHostname: %s\nUuid: %s\nState: %s "
- "(%s)",
- hostname_buf, uuid_buf, state,
- connected_str);
- } else {
- cli_out ("\nHostname: %s\nPort: %d\nUuid: %s\n"
- "State: %s (%s)", hostname_buf, port,
- uuid_buf, state, connected_str);
- }
- i++;
- }
- } else {
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_peer_status (dict, rsp.op_ret,
- rsp.op_errno, NULL);
- if (ret)
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
- } else
-#endif
- {
- ret = -1;
- }
- goto out;
- }
+ if (rsp.friends.friends_val) {
+ free(rsp.friends.friends_val);
+ }
+ return ret;
+}
+static int
+gf_cli_get_state_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ dict_t *dict = NULL;
+ char *daemon_name = NULL;
+ char *ofilepath = NULL;
+
+ GF_VALIDATE_OR_GOTO("cli", myframe, out);
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ dict = dict_new();
+
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
+ if (ret)
+ goto out;
+
+ if (rsp.op_ret) {
+ if (strcmp(rsp.op_errstr, ""))
+ cli_err("Failed to get daemon state: %s", rsp.op_errstr);
+ else
+ cli_err(
+ "Failed to get daemon state. Check glusterd"
+ " log file for more details");
+ } else {
+ ret = dict_get_str_sizen(dict, "daemon", &daemon_name);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, "Couldn't get daemon name");
- ret = 0;
+ ret = dict_get_str_sizen(dict, "ofilepath", &ofilepath);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, "Couldn't get filepath");
+
+ if (daemon_name && ofilepath)
+ cli_out("%s state dumped to %s", daemon_name, ofilepath);
+ }
+
+ ret = rsp.op_ret;
out:
- cli_cmd_broadcast_response (ret);
- if (ret)
- cli_err ("Peer status unsuccessful");
+ if (dict)
+ dict_unref(dict);
- if (dict)
- dict_destroy (dict);
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_cli_rsp(rsp);
- return ret;
+ return ret;
}
-void
-cli_out_options ( char *substr, char *optstr, char *valstr)
+static void
+cli_out_options(char *substr, char *optstr, char *valstr)
{
- char *ptr1 = NULL;
- char *ptr2 = NULL;
+ char *ptr1 = NULL;
+ char *ptr2 = NULL;
+
+ ptr1 = substr;
+ ptr2 = optstr;
+
+ while (ptr1) {
+ /* Avoiding segmentation fault. */
+ if (!ptr2)
+ return;
+ if (*ptr1 != *ptr2)
+ break;
+ ptr1++;
+ ptr2++;
+ }
+
+ if (*ptr2 == '\0')
+ return;
+ cli_out("%s: %s", ptr2, valstr);
+}
- ptr1 = substr;
- ptr2 = optstr;
+static int
+_gf_cli_output_volinfo_opts(dict_t *d, char *k, data_t *v, void *tmp)
+{
+ int ret = 0;
+ char *key = NULL;
+ char *ptr = NULL;
+ data_t *value = NULL;
+
+ key = tmp;
+
+ ptr = strstr(k, "option.");
+ if (ptr) {
+ value = v;
+ if (!value) {
+ ret = -1;
+ goto out;
+ }
+ cli_out_options(key, k, v->data);
+ }
+out:
+ return ret;
+}
- while (ptr1)
- {
- if (*ptr1 != *ptr2)
- break;
- ptr1++;
- ptr2++;
- if (!ptr1)
- return;
- if (!ptr2)
- return;
- }
+static int
+print_brick_details(dict_t *dict, int volcount, int start_index, int end_index,
+ int replica_count)
+{
+ char key[64] = {
+ 0,
+ };
+ int keylen;
+ int index = start_index;
+ int isArbiter = 0;
+ int ret = -1;
+ char *brick = NULL;
+
+ while (index <= end_index) {
+ keylen = snprintf(key, sizeof(key), "volume%d.brick%d", volcount,
+ index);
+ ret = dict_get_strn(dict, key, keylen, &brick);
+ if (ret)
+ goto out;
+ keylen = snprintf(key, sizeof(key), "volume%d.brick%d.isArbiter",
+ volcount, index);
+ if (dict_getn(dict, key, keylen))
+ isArbiter = 1;
+ else
+ isArbiter = 0;
- if (*ptr2 == '\0')
- return;
- cli_out ("%s: %s",ptr2 , valstr);
+ if (isArbiter)
+ cli_out("Brick%d: %s (arbiter)", index, brick);
+ else
+ cli_out("Brick%d: %s", index, brick);
+ index++;
+ }
+ ret = 0;
+out:
+ return ret;
}
+static void
+gf_cli_print_number_of_bricks(int type, int brick_count, int dist_count,
+ int stripe_count, int replica_count,
+ int disperse_count, int redundancy_count,
+ int arbiter_count)
+{
+ if (type == GF_CLUSTER_TYPE_NONE) {
+ cli_out("Number of Bricks: %d", brick_count);
+ } else if (type == GF_CLUSTER_TYPE_DISPERSE) {
+ cli_out("Number of Bricks: %d x (%d + %d) = %d",
+ (brick_count / dist_count), disperse_count - redundancy_count,
+ redundancy_count, brick_count);
+ } else {
+ /* For both replicate and stripe, dist_count is
+ good enough */
+ if (arbiter_count == 0) {
+ cli_out("Number of Bricks: %d x %d = %d",
+ (brick_count / dist_count), dist_count, brick_count);
+ } else {
+ cli_out("Number of Bricks: %d x (%d + %d) = %d",
+ (brick_count / dist_count), dist_count - arbiter_count,
+ arbiter_count, brick_count);
+ }
+ }
+}
-int
-gf_cli3_1_get_volume_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
-{
- int ret = -1;
- int opt_count = 0;
- int k = 0;
- int32_t i = 0;
- int32_t j = 1;
- int32_t status = 0;
- int32_t type = 0;
- int32_t brick_count = 0;
- int32_t dist_count = 0;
- int32_t stripe_count = 0;
- int32_t replica_count = 0;
- int32_t vol_type = 0;
- int32_t transport = 0;
- char *ptr = NULL;
- char *volume_id_str = NULL;
- char *brick = NULL;
- char *volname = NULL;
- dict_t *dict = NULL;
- data_pair_t *pairs = NULL;
- data_t *value = NULL;
- cli_local_t *local = NULL;
- char key[1024] = {0};
- char err_str[2048] = {0};
- gf_cli_rsp rsp = {0};
-
- if (-1 == req->rpc_status)
- goto out;
+static int
+gf_cli_get_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ int ret = -1;
+ int opt_count = 0;
+ int32_t i = 0;
+ int32_t j = 1;
+ int32_t status = 0;
+ int32_t type = 0;
+ int32_t brick_count = 0;
+ int32_t dist_count = 0;
+ int32_t stripe_count = 0;
+ int32_t replica_count = 0;
+ int32_t disperse_count = 0;
+ int32_t redundancy_count = 0;
+ int32_t arbiter_count = 0;
+ int32_t snap_count = 0;
+ int32_t thin_arbiter_count = 0;
+ int32_t vol_type = 0;
+ int32_t transport = 0;
+ char *volume_id_str = NULL;
+ char *volname = NULL;
+ char *ta_brick = NULL;
+ dict_t *dict = NULL;
+ cli_local_t *local = NULL;
+ char key[64] = {0};
+ int keylen;
+ char err_str[2048] = {0};
+ gf_cli_rsp rsp = {0};
+ char *caps __attribute__((unused)) = NULL;
+ int k __attribute__((unused)) = 0;
+ call_frame_t *frame = NULL;
+
+ GF_ASSERT(myframe);
+
+ if (-1 == req->rpc_status)
+ goto out;
+
+ frame = myframe;
+
+ GF_ASSERT(frame->local);
+
+ local = frame->local;
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(frame->this->name, GF_LOG_ERROR, XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ gf_log("cli", GF_LOG_INFO, "Received resp to get vol: %d", rsp.op_ret);
+
+ if (!rsp.dict.dict_len) {
+ if (global_state->mode & GLUSTER_MODE_XML)
+ goto xml_output;
+ cli_err("No volumes present");
+ ret = 0;
+ goto out;
+ }
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
- if (ret < 0) {
- gf_log ("cli", GF_LOG_ERROR, "error");
- goto out;
- }
+ dict = dict_new();
- gf_log ("cli", GF_LOG_INFO, "Received resp to get vol: %d",
- rsp.op_ret);
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
- if (rsp.op_ret) {
- ret = -1;
- goto out;
- }
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
- if (!rsp.dict.dict_len) {
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML)
- goto xml_output;
-#endif
- cli_out ("No volumes present");
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
+ goto out;
+ }
+
+ ret = dict_get_int32_sizen(dict, "count", &count);
+ if (ret)
+ goto out;
+
+ if (!count) {
+ switch (local->get_vol.flags) {
+ case GF_CLI_GET_NEXT_VOLUME:
+ GF_FREE(local->get_vol.volname);
+ local->get_vol.volname = NULL;
ret = 0;
goto out;
- }
- dict = dict_new ();
-
- if (!dict) {
+ case GF_CLI_GET_VOLUME:
+ snprintf(err_str, sizeof(err_str), "Volume %s does not exist",
+ local->get_vol.volname);
ret = -1;
- goto out;
+ if (!(global_state->mode & GLUSTER_MODE_XML))
+ goto out;
}
+ }
- ret = dict_unserialize (rsp.dict.dict_val,
- rsp.dict.dict_len,
- &dict);
+ if (rsp.op_ret) {
+ if (global_state->mode & GLUSTER_MODE_XML)
+ goto xml_output;
+ ret = -1;
+ goto out;
+ }
- if (ret) {
- gf_log ("cli", GF_LOG_ERROR,
- "Unable to allocate memory");
+xml_output:
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ /* For GET_NEXT_VOLUME output is already begun in
+ * and will also end in gf_cli_get_next_volume()
+ */
+ if (local->get_vol.flags == GF_CLI_GET_VOLUME) {
+ ret = cli_xml_output_vol_info_begin(local, rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
goto out;
+ }
}
- ret = dict_get_int32 (dict, "count", &count);
- if (ret)
+ if (dict) {
+ ret = cli_xml_output_vol_info(local, dict);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
goto out;
+ }
+ }
- local = ((call_frame_t *)myframe)->local;
-
- if (!count) {
- switch (local->get_vol.flags) {
-
- case GF_CLI_GET_NEXT_VOLUME:
- GF_FREE (local->get_vol.volname);
- local->get_vol.volname = NULL;
- ret = 0;
- goto out;
-
- case GF_CLI_GET_VOLUME:
- memset (err_str, 0, sizeof (err_str));
- snprintf (err_str, sizeof (err_str),
- "Volume %s does not exist",
- local->get_vol.volname);
- ret = -1;
-#if (HAVE_LIB_XML)
- if (!(global_state->mode & GLUSTER_MODE_XML))
-#endif
- {
- goto out;
- }
- }
+ if (local->get_vol.flags == GF_CLI_GET_VOLUME) {
+ ret = cli_xml_output_vol_info_end(local);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
}
+ goto out;
+ }
-#if (HAVE_LIB_XML)
-xml_output:
- if (global_state->mode & GLUSTER_MODE_XML) {
- /* For GET_NEXT_VOLUME output is already begun in
- * and will also end in gf_cli3_1_get_next_volume()
- */
- if (local->get_vol.flags == GF_CLI_GET_VOLUME) {
- ret = cli_xml_output_vol_info_begin
- (local, rsp.op_ret, rsp.op_errno,
- rsp.op_errstr);
- if (ret) {
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
- goto out;
- }
- }
+ while (i < count) {
+ cli_out(" ");
+ keylen = snprintf(key, sizeof(key), "volume%d.name", i);
+ ret = dict_get_strn(dict, key, keylen, &volname);
+ if (ret)
+ goto out;
- if (dict) {
- ret = cli_xml_output_vol_info (local, dict);
- if (ret) {
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
- goto out;
- }
- }
+ keylen = snprintf(key, sizeof(key), "volume%d.type", i);
+ ret = dict_get_int32n(dict, key, keylen, &type);
+ if (ret)
+ goto out;
- if (local->get_vol.flags == GF_CLI_GET_VOLUME) {
- ret = cli_xml_output_vol_info_end (local);
- if (ret)
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
- }
- goto out;
- }
-#endif
+ keylen = snprintf(key, sizeof(key), "volume%d.status", i);
+ ret = dict_get_int32n(dict, key, keylen, &status);
+ if (ret)
+ goto out;
- while ( i < count) {
- cli_out (" ");
- snprintf (key, 256, "volume%d.name", i);
- ret = dict_get_str (dict, key, &volname);
- if (ret)
- goto out;
+ keylen = snprintf(key, sizeof(key), "volume%d.brick_count", i);
+ ret = dict_get_int32n(dict, key, keylen, &brick_count);
+ if (ret)
+ goto out;
- snprintf (key, 256, "volume%d.type", i);
- ret = dict_get_int32 (dict, key, &type);
- if (ret)
- goto out;
+ keylen = snprintf(key, sizeof(key), "volume%d.dist_count", i);
+ ret = dict_get_int32n(dict, key, keylen, &dist_count);
+ if (ret)
+ goto out;
- snprintf (key, 256, "volume%d.status", i);
- ret = dict_get_int32 (dict, key, &status);
- if (ret)
- goto out;
+ keylen = snprintf(key, sizeof(key), "volume%d.stripe_count", i);
+ ret = dict_get_int32n(dict, key, keylen, &stripe_count);
+ if (ret)
+ goto out;
- snprintf (key, 256, "volume%d.brick_count", i);
- ret = dict_get_int32 (dict, key, &brick_count);
- if (ret)
- goto out;
+ keylen = snprintf(key, sizeof(key), "volume%d.replica_count", i);
+ ret = dict_get_int32n(dict, key, keylen, &replica_count);
+ if (ret)
+ goto out;
- snprintf (key, 256, "volume%d.dist_count", i);
- ret = dict_get_int32 (dict, key, &dist_count);
- if (ret)
- goto out;
+ keylen = snprintf(key, sizeof(key), "volume%d.disperse_count", i);
+ ret = dict_get_int32n(dict, key, keylen, &disperse_count);
+ if (ret)
+ goto out;
- snprintf (key, 256, "volume%d.stripe_count", i);
- ret = dict_get_int32 (dict, key, &stripe_count);
- if (ret)
- goto out;
+ keylen = snprintf(key, sizeof(key), "volume%d.redundancy_count", i);
+ ret = dict_get_int32n(dict, key, keylen, &redundancy_count);
+ if (ret)
+ goto out;
- snprintf (key, 256, "volume%d.replica_count", i);
- ret = dict_get_int32 (dict, key, &replica_count);
- if (ret)
- goto out;
+ keylen = snprintf(key, sizeof(key), "volume%d.arbiter_count", i);
+ ret = dict_get_int32n(dict, key, keylen, &arbiter_count);
+ if (ret)
+ goto out;
- snprintf (key, 256, "volume%d.transport", i);
- ret = dict_get_int32 (dict, key, &transport);
- if (ret)
- goto out;
+ keylen = snprintf(key, sizeof(key), "volume%d.transport", i);
+ ret = dict_get_int32n(dict, key, keylen, &transport);
+ if (ret)
+ goto out;
- snprintf (key, 256, "volume%d.volume_id", i);
- ret = dict_get_str (dict, key, &volume_id_str);
- if (ret)
- goto out;
-
- vol_type = type;
-
- // Distributed (stripe/replicate/stripe-replica) setups
- if ((type > 0) && ( dist_count < brick_count))
- vol_type = type + 3;
-
- cli_out ("Volume Name: %s", volname);
- cli_out ("Type: %s", cli_volume_type[vol_type]);
- cli_out ("Volume ID: %s", volume_id_str);
- cli_out ("Status: %s", cli_volume_status[status]);
-
- if (type == GF_CLUSTER_TYPE_STRIPE_REPLICATE) {
- cli_out ("Number of Bricks: %d x %d x %d = %d",
- (brick_count / dist_count),
- stripe_count,
- replica_count,
- brick_count);
-
- } else if (type == GF_CLUSTER_TYPE_NONE) {
- cli_out ("Number of Bricks: %d", brick_count);
-
- } else {
- /* For both replicate and stripe, dist_count is
- good enough */
- cli_out ("Number of Bricks: %d x %d = %d",
- (brick_count / dist_count),
- dist_count, brick_count);
- }
+ keylen = snprintf(key, sizeof(key), "volume%d.volume_id", i);
+ ret = dict_get_strn(dict, key, keylen, &volume_id_str);
+ if (ret)
+ goto out;
- cli_out ("Transport-type: %s",
- ((transport == 0)?"tcp":
- (transport == 1)?"rdma":
- "tcp,rdma"));
- j = 1;
+ keylen = snprintf(key, sizeof(key), "volume%d.snap_count", i);
+ ret = dict_get_int32n(dict, key, keylen, &snap_count);
+ if (ret)
+ goto out;
- GF_FREE (local->get_vol.volname);
- local->get_vol.volname = gf_strdup (volname);
+ keylen = snprintf(key, sizeof(key), "volume%d.thin_arbiter_count", i);
+ ret = dict_get_int32n(dict, key, keylen, &thin_arbiter_count);
+ if (ret)
+ goto out;
- if (brick_count)
- cli_out ("Bricks:");
+ // Distributed (stripe/replicate/stripe-replica) setups
+ vol_type = get_vol_type(type, dist_count, brick_count);
- while (j <= brick_count) {
- snprintf (key, 1024, "volume%d.brick%d", i, j);
- ret = dict_get_str (dict, key, &brick);
- if (ret)
- goto out;
+ cli_out("Volume Name: %s", volname);
+ cli_out("Type: %s", vol_type_str[vol_type]);
+ cli_out("Volume ID: %s", volume_id_str);
+ cli_out("Status: %s", cli_vol_status_str[status]);
+ cli_out("Snapshot Count: %d", snap_count);
- cli_out ("Brick%d: %s", j, brick);
- j++;
- }
+ gf_cli_print_number_of_bricks(
+ type, brick_count, dist_count, stripe_count, replica_count,
+ disperse_count, redundancy_count, arbiter_count);
- pairs = dict->members_list;
- if (!pairs) {
- ret = -1;
- goto out;
- }
+ cli_out("Transport-type: %s",
+ ((transport == 0) ? "tcp"
+ : (transport == 1) ? "rdma" : "tcp,rdma"));
+ j = 1;
- snprintf (key, 256, "volume%d.opt_count",i);
- ret = dict_get_int32 (dict, key, &opt_count);
- if (ret)
- goto out;
-
- if (!opt_count)
- goto out;
-
- cli_out ("Options Reconfigured:");
- k = 0;
-
- while (k < opt_count) {
-
- snprintf (key, 256, "volume%d.option.",i);
- while (pairs) {
- ptr = strstr (pairs->key, "option.");
- if (ptr) {
- value = pairs->value;
- if (!value) {
- ret = -1;
- goto out;
- }
- cli_out_options (key, pairs->key,
- value->data);
- }
- pairs = pairs->next;
- }
- k++;
- }
+ GF_FREE(local->get_vol.volname);
+ local->get_vol.volname = gf_strdup(volname);
+
+ cli_out("Bricks:");
+ ret = print_brick_details(dict, i, j, brick_count, replica_count);
+ if (ret)
+ goto out;
- i++;
+ if (thin_arbiter_count) {
+ snprintf(key, sizeof(key), "volume%d.thin_arbiter_brick", i);
+ ret = dict_get_str(dict, key, &ta_brick);
+ if (ret)
+ goto out;
+ cli_out("Thin-arbiter-path: %s", ta_brick);
}
+ snprintf(key, sizeof(key), "volume%d.opt_count", i);
+ ret = dict_get_int32(dict, key, &opt_count);
+ if (ret)
+ goto out;
- ret = 0;
-out:
- cli_cmd_broadcast_response (ret);
+ if (!opt_count)
+ goto out;
+
+ cli_out("Options Reconfigured:");
+
+ snprintf(key, sizeof(key), "volume%d.option.", i);
+
+ ret = dict_foreach(dict, _gf_cli_output_volinfo_opts, key);
if (ret)
- cli_err ("%s", err_str);
+ goto out;
+
+ i++;
+ }
+
+ ret = 0;
+out:
+ if (ret)
+ cli_err("%s", err_str);
- if (dict)
- dict_destroy (dict);
+ cli_cmd_broadcast_response(ret);
- if (rsp.dict.dict_val)
- free (rsp.dict.dict_val);
+ if (dict)
+ dict_unref(dict);
- if (rsp.op_errstr)
- free (rsp.op_errstr);
+ gf_free_xdr_cli_rsp(rsp);
- gf_log ("cli", GF_LOG_INFO, "Returning: %d", ret);
- return ret;
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
+ return ret;
}
-int
-gf_cli3_1_create_volume_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
+static int
+gf_cli_create_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
{
- gf_cli_rsp rsp = {0,};
- int ret = -1;
- cli_local_t *local = NULL;
- char *volname = NULL;
- dict_t *dict = NULL;
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ cli_local_t *local = NULL;
+ char *volname = NULL;
+ dict_t *rsp_dict = NULL;
+ call_frame_t *frame = NULL;
- if (-1 == req->rpc_status) {
- goto out;
- }
+ GF_ASSERT(myframe);
- local = ((call_frame_t *) (myframe))->local;
- ((call_frame_t *) (myframe))->local = NULL;
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- goto out;
- }
+ frame = myframe;
- dict = local->dict;
+ GF_ASSERT(frame->local);
- ret = dict_get_str (dict, "volname", &volname);
+ local = frame->local;
- gf_log ("cli", GF_LOG_INFO, "Received resp to create volume");
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(frame->this->name, GF_LOG_ERROR, XDR_DECODE_FAIL);
+ goto out;
+ }
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_dict ("volCreate", dict, rsp.op_ret,
- rsp.op_errno, rsp.op_errstr);
- if (ret)
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
+ gf_log("cli", GF_LOG_INFO, "Received resp to create volume");
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ if (rsp.op_ret == 0) {
+ rsp_dict = dict_new();
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len,
+ &rsp_dict);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
goto out;
+ }
}
-#endif
- if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
- cli_err ("%s", rsp.op_errstr);
- else
- cli_out ("Creation of volume %s has been %s", volname,
- (rsp.op_ret) ? "unsuccessful":
- "successful. Please start the volume to "
- "access data.");
- ret = rsp.op_ret;
+ ret = cli_xml_output_vol_create(rsp_dict, rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto out;
+ }
+
+ ret = dict_get_str_sizen(local->dict, "volname", &volname);
+ if (ret)
+ goto out;
+
+ if (rsp.op_ret && strcmp(rsp.op_errstr, ""))
+ cli_err("volume create: %s: failed: %s", volname, rsp.op_errstr);
+ else if (rsp.op_ret)
+ cli_err("volume create: %s: failed", volname);
+ else
+ cli_out(
+ "volume create: %s: success: "
+ "please start the volume to access data",
+ volname);
+
+ ret = rsp.op_ret;
out:
- cli_cmd_broadcast_response (ret);
- if (dict)
- dict_unref (dict);
- if (local)
- cli_local_wipe (local);
- if (rsp.dict.dict_val)
- free (rsp.dict.dict_val);
- if (rsp.op_errstr)
- free (rsp.op_errstr);
- return ret;
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_cli_rsp(rsp);
+
+ if (rsp_dict)
+ dict_unref(rsp_dict);
+ return ret;
}
-int
-gf_cli3_1_delete_volume_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
+static int
+gf_cli_delete_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
{
- gf_cli_rsp rsp = {0,};
- int ret = -1;
- cli_local_t *local = NULL;
- char *volname = NULL;
- call_frame_t *frame = NULL;
- dict_t *dict = NULL;
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ cli_local_t *local = NULL;
+ char *volname = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *rsp_dict = NULL;
- if (-1 == req->rpc_status) {
- goto out;
- }
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- goto out;
- }
+ GF_ASSERT(myframe);
+ frame = myframe;
- frame = myframe;
- local = frame->local;
- frame->local = NULL;
+ GF_ASSERT(frame->local);
- if (local)
- dict = local->dict;
- ret = dict_get_str (dict, "volname", &volname);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "dict get failed");
- goto out;
- }
+ local = frame->local;
- gf_log ("cli", GF_LOG_INFO, "Received resp to delete volume");
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(frame->this->name, GF_LOG_ERROR, XDR_DECODE_FAIL);
+ goto out;
+ }
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_dict ("volDelete", dict, rsp.op_ret,
- rsp.op_errno, rsp.op_errstr);
- if (ret)
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
+ gf_log("cli", GF_LOG_INFO, "Received resp to delete volume");
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ if (rsp.op_ret == 0) {
+ rsp_dict = dict_new();
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len,
+ &rsp_dict);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
goto out;
+ }
}
-#endif
- if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
- cli_err ("%s", rsp.op_errstr);
- else
- cli_out ("Deleting volume %s has been %s", volname,
- (rsp.op_ret) ? "unsuccessful": "successful");
- ret = rsp.op_ret;
+ ret = cli_xml_output_generic_volume("volDelete", rsp_dict, rsp.op_ret,
+ rsp.op_errno, rsp.op_errstr);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto out;
+ }
+
+ ret = dict_get_str_sizen(local->dict, "volname", &volname);
+ if (ret) {
+ gf_log(frame->this->name, GF_LOG_ERROR, "dict get failed");
+ goto out;
+ }
+
+ if (rsp.op_ret && strcmp(rsp.op_errstr, ""))
+ cli_err("volume delete: %s: failed: %s", volname, rsp.op_errstr);
+ else if (rsp.op_ret)
+ cli_err("volume delete: %s: failed", volname);
+ else
+ cli_out("volume delete: %s: success", volname);
+
+ ret = rsp.op_ret;
out:
- cli_cmd_broadcast_response (ret);
- cli_local_wipe (local);
- if (rsp.dict.dict_val)
- free (rsp.dict.dict_val);
- if (dict)
- dict_unref (dict);
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_cli_rsp(rsp);
- gf_log ("", GF_LOG_INFO, "Returning with %d", ret);
- return ret;
+ if (rsp_dict)
+ dict_unref(rsp_dict);
+ gf_log("", GF_LOG_DEBUG, RETURNING, ret);
+ return ret;
}
-int
-gf_cli3_1_start_volume_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
+static int
+gf_cli3_1_uuid_get_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
{
- gf_cli_rsp rsp = {0,};
- int ret = -1;
- cli_local_t *local = NULL;
- char *volname = NULL;
- call_frame_t *frame = NULL;
- dict_t *dict = NULL;
+ char *uuid_str = NULL;
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ cli_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *dict = NULL;
- if (-1 == req->rpc_status) {
- goto out;
- }
+ GF_ASSERT(myframe);
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- goto out;
- }
+ if (-1 == req->rpc_status)
+ goto out;
- frame = myframe;
+ frame = myframe;
- if (frame) {
- local = frame->local;
- frame->local = NULL;
- }
+ GF_ASSERT(frame->local);
- if (local)
- dict = local->dict;
+ local = frame->local;
- ret = dict_get_str (dict, "volname", &volname);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "dict get failed");
- goto out;
- }
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(frame->this->name, GF_LOG_ERROR, XDR_DECODE_FAIL);
+ goto out;
+ }
- gf_log ("cli", GF_LOG_INFO, "Received resp to start volume");
+ frame->local = NULL;
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_dict ("volStart", dict, rsp.op_ret,
- rsp.op_errno, rsp.op_errstr);
- if (ret)
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
- goto out;
- }
-#endif
+ gf_log("cli", GF_LOG_INFO, "Received resp to uuid get");
+
+ dict = dict_new();
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
+ goto out;
+ }
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_dict("uuidGenerate", dict, rsp.op_ret,
+ rsp.op_errno, rsp.op_errstr);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto out;
+ }
- if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
- cli_err ("%s", rsp.op_errstr);
+ if (rsp.op_ret) {
+ if (strcmp(rsp.op_errstr, "") == 0)
+ cli_err("Get uuid was unsuccessful");
else
- cli_out ("Starting volume %s has been %s", volname,
- (rsp.op_ret) ? "unsuccessful": "successful");
+ cli_err("%s", rsp.op_errstr);
- ret = rsp.op_ret;
+ } else {
+ ret = dict_get_str_sizen(dict, "uuid", &uuid_str);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get uuid from dictionary");
+ goto out;
+ }
+ cli_out("UUID: %s", uuid_str);
+ }
+ ret = rsp.op_ret;
out:
- cli_cmd_broadcast_response (ret);
- if (local)
- cli_local_wipe (local);
- if (rsp.dict.dict_val)
- free (rsp.dict.dict_val);
- if (rsp.op_errstr)
- free (rsp.op_errstr);
- if (dict)
- dict_unref (dict);
- return ret;
+ cli_cmd_broadcast_response(ret);
+ cli_local_wipe(local);
+ gf_free_xdr_cli_rsp(rsp);
+
+ if (dict)
+ dict_unref(dict);
+
+ gf_log("", GF_LOG_DEBUG, RETURNING, ret);
+ return ret;
}
-int
-gf_cli3_1_stop_volume_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
+static int
+gf_cli3_1_uuid_reset_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
{
- gf_cli_rsp rsp = {0,};
- int ret = -1;
- cli_local_t *local = NULL;
- char *volname = NULL;
- call_frame_t *frame = NULL;
- dict_t *dict = NULL;
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ cli_local_t *local = NULL;
+ call_frame_t *frame = NULL;
- if (-1 == req->rpc_status) {
- goto out;
- }
+ GF_ASSERT(myframe);
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- goto out;
- }
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
- frame = myframe;
+ frame = myframe;
- if (frame) {
- local = frame->local;
- frame->local = NULL;
- }
+ GF_ASSERT(frame->local);
- if (local) {
- dict = local->dict;
- ret = dict_get_str (dict, "volname", &volname);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "Unable to get volname from dict");
- goto out;
- }
- }
+ local = frame->local;
- gf_log ("cli", GF_LOG_INFO, "Received resp to stop volume");
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(frame->this->name, GF_LOG_ERROR, XDR_DECODE_FAIL);
+ goto out;
+ }
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_dict ("volStop", dict, rsp.op_ret,
- rsp.op_errno, rsp.op_errstr);
- if (ret)
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
- goto out;
- }
-#endif
+ frame->local = NULL;
- if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
- cli_err ("%s", rsp.op_errstr);
- else
- cli_out ("Stopping volume %s has been %s", volname,
- (rsp.op_ret) ? "unsuccessful": "successful");
- ret = rsp.op_ret;
+ gf_log("cli", GF_LOG_INFO, "Received resp to uuid reset");
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_dict("uuidReset", NULL, rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto out;
+ }
+
+ if (rsp.op_ret && strcmp(rsp.op_errstr, ""))
+ cli_err("%s", rsp.op_errstr);
+ else
+ cli_out("resetting the peer uuid has been %s",
+ (rsp.op_ret) ? "unsuccessful" : "successful");
+ ret = rsp.op_ret;
out:
- cli_cmd_broadcast_response (ret);
- if (rsp.op_errstr)
- free (rsp.op_errstr);
- if (rsp.dict.dict_val)
- free (rsp.dict.dict_val);
- if (local)
- cli_local_wipe (local);
+ cli_cmd_broadcast_response(ret);
+ cli_local_wipe(local);
+ gf_free_xdr_cli_rsp(rsp);
- return ret;
+ gf_log("", GF_LOG_DEBUG, RETURNING, ret);
+ return ret;
}
-int
-gf_cli3_1_defrag_volume_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
-{
- gf_cli_rsp rsp = {0,};
- cli_local_t *local = NULL;
- char *volname = NULL;
- call_frame_t *frame = NULL;
- char *status = "unknown";
- int cmd = 0;
- int ret = -1;
- dict_t *dict = NULL;
- dict_t *local_dict = NULL;
- uint64_t files = 0;
- uint64_t size = 0;
- uint64_t lookup = 0;
- char msg[1024] = {0,};
- gf_defrag_status_t status_rcd = GF_DEFRAG_STATUS_NOT_STARTED;
- int32_t counter = 0;
- char *node_uuid = NULL;
- char key[256] = {0,};
- int32_t i = 1;
- uint64_t failures = 0;
-
- if (-1 == req->rpc_status) {
- goto out;
- }
+static int
+gf_cli_start_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ cli_local_t *local = NULL;
+ char *volname = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *rsp_dict = NULL;
- ret = xdr_to_generic (*iov, &rsp,
- (xdrproc_t)xdr_gf_cli_rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- goto out;
- }
+ GF_ASSERT(myframe);
- frame = myframe;
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
- if (frame) {
- local = frame->local;
- frame->local = NULL;
- }
+ frame = myframe;
- if (local) {
- local_dict = local->dict;
- }
+ GF_ASSERT(frame->local);
- ret = dict_get_str (local_dict, "volname", &volname);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "Failed to get volname");
- goto out;
- }
+ local = frame->local;
- ret = dict_get_int32 (local_dict, "rebalance-command", (int32_t*)&cmd);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "Failed to get command");
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(frame->this->name, GF_LOG_ERROR, XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ gf_log("cli", GF_LOG_INFO, "Received resp to start volume");
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ if (rsp.op_ret == 0) {
+ rsp_dict = dict_new();
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len,
+ &rsp_dict);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
goto out;
+ }
}
- if (rsp.dict.dict_len) {
- /* Unserialize the dictionary */
- dict = dict_new ();
+ ret = cli_xml_output_generic_volume("volStart", rsp_dict, rsp.op_ret,
+ rsp.op_errno, rsp.op_errstr);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto out;
+ }
- ret = dict_unserialize (rsp.dict.dict_val,
- rsp.dict.dict_len,
- &dict);
- if (ret < 0) {
- gf_log ("glusterd", GF_LOG_ERROR,
- "failed to "
- "unserialize req-buffer to dictionary");
- goto out;
- }
- }
+ ret = dict_get_str_sizen(local->dict, "volname", &volname);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "dict get failed");
+ goto out;
+ }
- if (!((cmd == GF_DEFRAG_CMD_STOP) || (cmd == GF_DEFRAG_CMD_STATUS))) {
- /* All other possibility is about starting a volume */
- if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
- snprintf (msg, sizeof (msg), "%s", rsp.op_errstr);
- else
- snprintf (msg, sizeof (msg),
- "Starting rebalance on volume %s has been %s",
- volname, (rsp.op_ret) ? "unsuccessful":
- "successful");
- goto done;
- }
+ if (rsp.op_ret && strcmp(rsp.op_errstr, ""))
+ cli_err("volume start: %s: failed: %s", volname, rsp.op_errstr);
+ else if (rsp.op_ret)
+ cli_err("volume start: %s: failed", volname);
+ else
+ cli_out("volume start: %s: success", volname);
- ret = dict_get_int32 (dict, "count", &counter);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "count not set");
- goto out;
- }
+ ret = rsp.op_ret;
- if (cmd == GF_DEFRAG_CMD_STOP) {
- if (rsp.op_ret == -1) {
- if (strcmp (rsp.op_errstr, ""))
- snprintf (msg, sizeof (msg),
- "%s", rsp.op_errstr);
- else
- snprintf (msg, sizeof (msg),
- "rebalance volume %s stop failed",
- volname);
- goto done;
- } else {
- snprintf (msg, sizeof (msg),
- "Stopped rebalance process on volume %s \n",
- volname);
- }
- }
- if (cmd == GF_DEFRAG_CMD_STATUS) {
- if (rsp.op_ret == -1) {
- if (strcmp (rsp.op_errstr, ""))
- snprintf (msg, sizeof (msg),
- "%s", rsp.op_errstr);
- else
- snprintf (msg, sizeof (msg),
- "Failed to get the status of "
- "rebalance process");
- goto done;
- }
- }
- cli_out ("%40s %16s %13s %13s %13s %14s", "Node", "Rebalanced-files",
- "size", "scanned", "failures","status");
- cli_out ("%40s %16s %13s %13s %13s %14s", "---------", "-----------",
- "-----------", "-----------", "-----------", "------------");
+out:
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_cli_rsp(rsp);
- do {
- snprintf (key, 256, "node-uuid-%d", i);
- ret = dict_get_str (dict, key, &node_uuid);
- if (ret)
- gf_log (THIS->name, GF_LOG_TRACE,
- "failed to get node-uuid");
+ if (rsp_dict)
+ dict_unref(rsp_dict);
+ return ret;
+}
- memset (key, 0, 256);
- snprintf (key, 256, "files-%d", i);
- ret = dict_get_uint64 (dict, key, &files);
- if (ret)
- gf_log (THIS->name, GF_LOG_TRACE,
- "failed to get file count");
+static int
+gf_cli_stop_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ cli_local_t *local = NULL;
+ char *volname = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *rsp_dict = NULL;
- memset (key, 0, 256);
- snprintf (key, 256, "size-%d", i);
- ret = dict_get_uint64 (dict, key, &size);
- if (ret)
- gf_log (THIS->name, GF_LOG_TRACE,
- "failed to get size of xfer");
+ GF_ASSERT(myframe);
- memset (key, 0, 256);
- snprintf (key, 256, "lookups-%d", i);
- ret = dict_get_uint64 (dict, key, &lookup);
- if (ret)
- gf_log (THIS->name, GF_LOG_TRACE,
- "failed to get lookedup file count");
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
- memset (key, 0, 256);
- snprintf (key, 256, "status-%d", i);
- ret = dict_get_int32 (dict, key, (int32_t *)&status_rcd);
- if (ret)
- gf_log (THIS->name, GF_LOG_TRACE,
- "failed to get status");
+ frame = myframe;
- memset (key, 0, 256);
- snprintf (key, 256, "failures-%d", i);
- ret = dict_get_uint64 (dict, key, &failures);
- if (ret)
- gf_log (THIS->name, GF_LOG_TRACE,
- "failed to get failures count");
-
- switch (status_rcd) {
- case GF_DEFRAG_STATUS_NOT_STARTED:
- status = "not started";
- break;
- case GF_DEFRAG_STATUS_STARTED:
- status = "in progress";
- break;
- case GF_DEFRAG_STATUS_STOPPED:
- status = "stopped";
- break;
- case GF_DEFRAG_STATUS_COMPLETE:
- status = "completed";
- break;
- case GF_DEFRAG_STATUS_FAILED:
- status = "failed";
- break;
- }
- cli_out ("%40s %16"PRId64 "%13"PRId64 "%13"PRId64 "%13"PRId64
- " %14s", node_uuid, files, size, lookup, failures,
- status);
- i++;
- } while (i <= counter);
+ GF_ASSERT(frame->local);
+ local = frame->local;
-done:
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_str ("volRebalance", msg, rsp.op_ret,
- status_rcd, rsp.op_errstr);
- if (ret)
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(frame->this->name, GF_LOG_ERROR, XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ gf_log("cli", GF_LOG_INFO, "Received resp to stop volume");
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ if (rsp.op_ret == 0) {
+ rsp_dict = dict_new();
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len,
+ &rsp_dict);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
goto out;
+ }
}
-#endif
- if (rsp.op_ret)
- cli_err ("%s", msg);
- else
- cli_out ("%s", msg);
- ret = rsp.op_ret;
+
+ ret = cli_xml_output_generic_volume("volStop", rsp_dict, rsp.op_ret,
+ rsp.op_errno, rsp.op_errstr);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto out;
+ }
+
+ ret = dict_get_str_sizen(local->dict, "volname", &volname);
+ if (ret) {
+ gf_log(frame->this->name, GF_LOG_ERROR,
+ "Unable to get volname from dict");
+ goto out;
+ }
+
+ if (rsp.op_ret && strcmp(rsp.op_errstr, ""))
+ cli_err("volume stop: %s: failed: %s", volname, rsp.op_errstr);
+ else if (rsp.op_ret)
+ cli_err("volume stop: %s: failed", volname);
+ else
+ cli_out("volume stop: %s: success", volname);
+
+ ret = rsp.op_ret;
out:
- if (rsp.op_errstr)
- free (rsp.op_errstr); //malloced by xdr
- if (rsp.dict.dict_val)
- free (rsp.dict.dict_val); //malloced by xdr
- if (dict)
- dict_unref (dict);
- if (local_dict)
- dict_unref (local_dict);
- if (local)
- cli_local_wipe (local);
- cli_cmd_broadcast_response (ret);
- return ret;
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_cli_rsp(rsp);
+
+ if (rsp_dict)
+ dict_unref(rsp_dict);
+
+ return ret;
}
-int
-gf_cli3_1_rename_volume_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
+static int
+gf_cli_print_rebalance_status(dict_t *dict, enum gf_task_types task_type)
{
- gf_cli_rsp rsp = {0,};
- int ret = -1;
- char msg[1024] = {0,};
+ int ret = -1;
+ int count = 0;
+ int i = 1;
+ char key[64] = {
+ 0,
+ };
+ int keylen;
+ gf_defrag_status_t status_rcd = GF_DEFRAG_STATUS_NOT_STARTED;
+ uint64_t files = 0;
+ uint64_t size = 0;
+ uint64_t lookup = 0;
+ char *node_name = NULL;
+ uint64_t failures = 0;
+ uint64_t skipped = 0;
+ double elapsed = 0;
+ char *status_str = NULL;
+ char *size_str = NULL;
+ int32_t hrs = 0;
+ uint32_t min = 0;
+ uint32_t sec = 0;
+ gf_boolean_t fix_layout = _gf_false;
+ uint64_t max_time = 0;
+ uint64_t max_elapsed = 0;
+ uint64_t time_left = 0;
+ gf_boolean_t show_estimates = _gf_false;
+
+ ret = dict_get_int32_sizen(dict, "count", &count);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "count not set");
+ goto out;
+ }
+
+ for (i = 1; i <= count; i++) {
+ keylen = snprintf(key, sizeof(key), "status-%d", i);
+ ret = dict_get_int32n(dict, key, keylen, (int32_t *)&status_rcd);
+ /* If information from a node is missing we should skip
+ * the node and try to fetch information of other nodes.
+ * If information is not found for all nodes, we should
+ * error out.
+ */
+ if (!ret)
+ break;
+ if (ret && i == count) {
+ gf_log("cli", GF_LOG_TRACE, "failed to get status");
+ goto out;
+ }
+ }
+
+ /* Fix layout will be sent to all nodes for the volume
+ so every status should be of type
+ GF_DEFRAG_STATUS_LAYOUT_FIX*
+ */
+
+ if ((task_type == GF_TASK_TYPE_REBALANCE) &&
+ (status_rcd >= GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED)) {
+ fix_layout = _gf_true;
+ }
+
+ if (fix_layout) {
+ cli_out("%35s %41s %27s", "Node", "status", "run time in h:m:s");
+ cli_out("%35s %41s %27s", "---------", "-----------", "------------");
+ } else {
+ cli_out("%40s %16s %13s %13s %13s %13s %20s %18s", "Node",
+ "Rebalanced-files", "size", "scanned", "failures", "skipped",
+ "status",
+ "run time in"
+ " h:m:s");
+ cli_out("%40s %16s %13s %13s %13s %13s %20s %18s", "---------",
+ "-----------", "-----------", "-----------", "-----------",
+ "-----------", "------------", "--------------");
+ }
+
+ for (i = 1; i <= count; i++) {
+ /* Reset the variables to prevent carryover of values */
+ node_name = NULL;
+ files = 0;
+ size = 0;
+ lookup = 0;
+ skipped = 0;
+ status_str = NULL;
+ elapsed = 0;
+ time_left = 0;
+
+ /* Check if status is NOT_STARTED, and continue early */
+ keylen = snprintf(key, sizeof(key), "status-%d", i);
+
+ ret = dict_get_int32n(dict, key, keylen, (int32_t *)&status_rcd);
+ if (ret == -ENOENT) {
+ gf_log("cli", GF_LOG_TRACE, "count %d %d", count, i);
+ gf_log("cli", GF_LOG_TRACE, "failed to get status");
+ gf_log("cli", GF_LOG_ERROR, "node down and has failed to set dict");
+ continue;
+ /* skip this node if value not available*/
+ } else if (ret) {
+ gf_log("cli", GF_LOG_TRACE, "count %d %d", count, i);
+ gf_log("cli", GF_LOG_TRACE, "failed to get status");
+ continue;
+ /* skip this node if value not available*/
+ }
+
+ if (GF_DEFRAG_STATUS_NOT_STARTED == status_rcd)
+ continue;
+
+ if (GF_DEFRAG_STATUS_STARTED == status_rcd)
+ show_estimates = _gf_true;
+
+ keylen = snprintf(key, sizeof(key), "node-name-%d", i);
+ ret = dict_get_strn(dict, key, keylen, &node_name);
+ if (ret)
+ gf_log("cli", GF_LOG_TRACE, "failed to get node-name");
- if (-1 == req->rpc_status) {
- goto out;
- }
+ snprintf(key, sizeof(key), "files-%d", i);
+ ret = dict_get_uint64(dict, key, &files);
+ if (ret)
+ gf_log("cli", GF_LOG_TRACE, "failed to get file count");
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- goto out;
- }
+ snprintf(key, sizeof(key), "size-%d", i);
+ ret = dict_get_uint64(dict, key, &size);
+ if (ret)
+ gf_log("cli", GF_LOG_TRACE, "failed to get size of xfer");
+ snprintf(key, sizeof(key), "lookups-%d", i);
+ ret = dict_get_uint64(dict, key, &lookup);
+ if (ret)
+ gf_log("cli", GF_LOG_TRACE, "failed to get lookedup file count");
- gf_log ("cli", GF_LOG_INFO, "Received resp to probe");
- snprintf (msg, sizeof (msg), "Rename volume %s",
- (rsp.op_ret) ? "unsuccessful": "successful");
+ snprintf(key, sizeof(key), "failures-%d", i);
+ ret = dict_get_uint64(dict, key, &failures);
+ if (ret)
+ gf_log("cli", GF_LOG_TRACE, "failed to get failures count");
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_str ("volRename", msg, rsp.op_ret,
- rsp.op_errno, rsp.op_errstr);
- if (ret)
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
- goto out;
+ snprintf(key, sizeof(key), "skipped-%d", i);
+ ret = dict_get_uint64(dict, key, &skipped);
+ if (ret)
+ gf_log("cli", GF_LOG_TRACE, "failed to get skipped count");
+
+ /* For remove-brick include skipped count into failure count*/
+ if (task_type != GF_TASK_TYPE_REBALANCE) {
+ failures += skipped;
+ skipped = 0;
}
-#endif
- if (rsp.op_ret)
- cli_err ("%s", msg);
- else
- cli_out ("%s", msg);
- ret = rsp.op_ret;
+ snprintf(key, sizeof(key), "run-time-%d", i);
+ ret = dict_get_double(dict, key, &elapsed);
+ if (ret)
+ gf_log("cli", GF_LOG_TRACE, "failed to get run-time");
-out:
- cli_cmd_broadcast_response (ret);
- return ret;
-}
+ snprintf(key, sizeof(key), "time-left-%d", i);
+ ret = dict_get_uint64(dict, key, &time_left);
+ if (ret)
+ gf_log("cli", GF_LOG_TRACE, "failed to get time left");
-int
-gf_cli3_1_reset_volume_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
-{
- gf_cli_rsp rsp = {0,};
- int ret = -1;
- char msg[1024] = {0,};
+ if (elapsed > max_elapsed)
+ max_elapsed = elapsed;
- if (-1 == req->rpc_status) {
- goto out;
- }
+ if (time_left > max_time)
+ max_time = time_left;
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- goto out;
- }
+ /* Check for array bound */
+ if (status_rcd >= GF_DEFRAG_STATUS_MAX)
+ status_rcd = GF_DEFRAG_STATUS_MAX;
- gf_log ("cli", GF_LOG_INFO, "Received resp to reset");
+ status_str = cli_vol_task_status_str[status_rcd];
+ size_str = gf_uint64_2human_readable(size);
+ hrs = elapsed / 3600;
+ min = ((uint64_t)elapsed % 3600) / 60;
+ sec = ((uint64_t)elapsed % 3600) % 60;
- if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
- snprintf (msg, sizeof (msg), "%s", rsp.op_errstr);
- else
- snprintf (msg, sizeof (msg), "reset volume %s",
- (rsp.op_ret) ? "unsuccessful": "successful");
-
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_str ("volReset", msg, rsp.op_ret,
- rsp.op_errno, rsp.op_errstr);
- if (ret)
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
- goto out;
+ if (fix_layout) {
+ cli_out("%35s %50s %8d:%d:%d", node_name, status_str, hrs, min,
+ sec);
+ } else {
+ if (size_str) {
+ cli_out("%40s %16" PRIu64
+ " %13s"
+ " %13" PRIu64 " %13" PRIu64 " %13" PRIu64
+ " %20s "
+ "%8d:%02d:%02d",
+ node_name, files, size_str, lookup, failures, skipped,
+ status_str, hrs, min, sec);
+ } else {
+ cli_out("%40s %16" PRIu64 " %13" PRIu64 " %13" PRIu64
+ " %13" PRIu64 " %13" PRIu64
+ " %20s"
+ " %8d:%02d:%02d",
+ node_name, files, size, lookup, failures, skipped,
+ status_str, hrs, min, sec);
+ }
+ }
+ GF_FREE(size_str);
+ }
+
+ /* Max time will be non-zero if rebalance is still running */
+ if (max_time) {
+ hrs = max_time / 3600;
+ min = (max_time % 3600) / 60;
+ sec = (max_time % 3600) % 60;
+
+ if (hrs < REBAL_ESTIMATE_SEC_UPPER_LIMIT) {
+ cli_out(
+ "Estimated time left for rebalance to "
+ "complete : %8d:%02d:%02d",
+ hrs, min, sec);
+ } else {
+ cli_out(
+ "Estimated time left for rebalance to "
+ "complete : > 2 months. Please try again "
+ "later.");
+ }
+ } else {
+ /* Rebalance will return 0 if it could not calculate the
+ * estimates or if it is complete.
+ */
+ if (!show_estimates) {
+ goto out;
+ }
+ if (max_elapsed <= REBAL_ESTIMATE_START_TIME) {
+ cli_out(
+ "The estimated time for rebalance to complete "
+ "will be unavailable for the first 10 "
+ "minutes.");
+ } else {
+ cli_out(
+ "Rebalance estimated time unavailable. Please "
+ "try again later.");
}
-#endif
-
- if (rsp.op_ret)
- cli_err ("%s", msg);
- else
- cli_out ("%s", msg);
- ret = rsp.op_ret;
-
+ }
out:
- cli_cmd_broadcast_response (ret);
- return ret;
+ return ret;
}
-int
-gf_cli3_1_set_volume_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
+static int
+gf_cli_defrag_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
{
- gf_cli_rsp rsp = {0,};
- int ret = -1;
- dict_t *dict = NULL;
- char *help_str = NULL;
- char msg[1024] = {0,};
-
- if (-1 == req->rpc_status) {
- goto out;
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ cli_local_t *local = NULL;
+ char *volname = NULL;
+ call_frame_t *frame = NULL;
+ int cmd = 0;
+ int ret = -1;
+ dict_t *dict = NULL;
+ char msg[1024] = {
+ 0,
+ };
+ char *task_id_str = NULL;
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ GF_ASSERT(myframe);
+
+ frame = myframe;
+
+ GF_ASSERT(frame->local);
+
+ local = frame->local;
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(frame->this->name, GF_LOG_ERROR, XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ ret = dict_get_str_sizen(local->dict, "volname", &volname);
+ if (ret) {
+ gf_log(frame->this->name, GF_LOG_ERROR, "Failed to get volname");
+ goto out;
+ }
+
+ ret = dict_get_int32_sizen(local->dict, "rebalance-command",
+ (int32_t *)&cmd);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get command");
+ goto out;
+ }
+
+ if (rsp.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new();
+
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
+ if (ret < 0) {
+ gf_log("glusterd", GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
+ goto out;
}
+ }
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- goto out;
+ if (!((cmd == GF_DEFRAG_CMD_STOP) || (cmd == GF_DEFRAG_CMD_STATUS)) &&
+ !(global_state->mode & GLUSTER_MODE_XML)) {
+ ret = dict_get_str_sizen(dict, GF_REBALANCE_TID_KEY, &task_id_str);
+ if (ret) {
+ gf_log("cli", GF_LOG_WARNING, "failed to get %s from dict",
+ GF_REBALANCE_TID_KEY);
+ }
+ if (rsp.op_ret && strcmp(rsp.op_errstr, "")) {
+ snprintf(msg, sizeof(msg), "%s", rsp.op_errstr);
+ } else {
+ if (!rsp.op_ret) {
+ /* append errstr in the cli msg for successful
+ * case since unlock failures can be highlighted
+ * event though rebalance command was successful
+ */
+ snprintf(msg, sizeof(msg),
+ "Rebalance on %s has been "
+ "started successfully. Use "
+ "rebalance status command to"
+ " check status of the "
+ "rebalance process.\nID: %s",
+ volname, task_id_str);
+ } else {
+ snprintf(msg, sizeof(msg),
+ "Starting rebalance on volume %s has "
+ "been unsuccessful.",
+ volname);
+ }
+ }
+ goto done;
+ }
+
+ if (cmd == GF_DEFRAG_CMD_STOP) {
+ if (rsp.op_ret == -1) {
+ if (strcmp(rsp.op_errstr, ""))
+ snprintf(msg, sizeof(msg), "%s", rsp.op_errstr);
+ else
+ snprintf(msg, sizeof(msg), "rebalance volume %s stop failed",
+ volname);
+ goto done;
+ } else {
+ /* append errstr in the cli msg for successful case
+ * since unlock failures can be highlighted event though
+ * rebalance command was successful */
+ snprintf(msg, sizeof(msg),
+ "rebalance process may be in the middle of a "
+ "file migration.\nThe process will be fully "
+ "stopped once the migration of the file is "
+ "complete.\nPlease check rebalance process "
+ "for completion before doing any further "
+ "brick related tasks on the volume.\n%s",
+ rsp.op_errstr);
+ }
+ }
+ if (cmd == GF_DEFRAG_CMD_STATUS) {
+ if (rsp.op_ret == -1) {
+ if (strcmp(rsp.op_errstr, ""))
+ snprintf(msg, sizeof(msg), "%s", rsp.op_errstr);
+ else
+ snprintf(msg, sizeof(msg),
+ "Failed to get the status of rebalance process");
+ goto done;
+ } else {
+ snprintf(msg, sizeof(msg), "%s", rsp.op_errstr);
}
+ }
- gf_log ("cli", GF_LOG_INFO, "Received resp to set");
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_vol_rebalance(cmd, dict, rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
+ goto out;
+ }
- dict = dict_new ();
+ ret = gf_cli_print_rebalance_status(dict, GF_TASK_TYPE_REBALANCE);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, "Failed to print rebalance status");
- if (!dict) {
- ret = -1;
- goto out;
- }
+done:
+ if (global_state->mode & GLUSTER_MODE_XML)
+ cli_xml_output_str("volRebalance", msg, rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
+ else {
+ if (rsp.op_ret)
+ cli_err("volume rebalance: %s: failed%s%s", volname,
+ strlen(msg) ? ": " : "", msg);
+ else
+ cli_out("volume rebalance: %s: success%s%s", volname,
+ strlen(msg) ? ": " : "", msg);
+ }
+ ret = rsp.op_ret;
- ret = dict_unserialize (rsp.dict.dict_val, rsp.dict.dict_len, &dict);
+out:
+ gf_free_xdr_cli_rsp(rsp);
+ if (dict)
+ dict_unref(dict);
+ cli_cmd_broadcast_response(ret);
+ return ret;
+}
+static int
+gf_cli_rename_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ char msg[1024] = {
+ 0,
+ };
+
+ GF_ASSERT(myframe);
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ gf_log("cli", GF_LOG_INFO, "Received resp to probe");
+ snprintf(msg, sizeof(msg), "Rename volume %s",
+ (rsp.op_ret) ? "unsuccessful" : "successful");
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_str("volRename", msg, rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
if (ret)
- goto out;
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto out;
+ }
- if (dict_get_str (dict, "help-str", &help_str) && !msg[0])
- snprintf (msg, sizeof (msg), "Set volume %s",
- (rsp.op_ret) ? "unsuccessful": "successful");
+ if (rsp.op_ret)
+ cli_err("volume rename: failed");
+ else
+ cli_out("volume rename: success");
-#if (HAVE_LIB_XML)
- if ((global_state->mode & GLUSTER_MODE_XML) && (help_str == NULL)) {
- ret = cli_xml_output_str ("volSet", msg, rsp.op_ret,
- rsp.op_errno, rsp.op_errstr);
- if (ret)
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
- goto out;
- }
-#endif
+ ret = rsp.op_ret;
- if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
- cli_err ("%s", rsp.op_errstr);
+out:
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_cli_rsp(rsp);
+ return ret;
+}
- if (rsp.op_ret)
- cli_err ("%s", msg);
- else
- cli_out ("%s", ((help_str == NULL) ? msg : help_str));
+static int
+gf_cli_reset_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ char msg[1024] = {
+ 0,
+ };
+
+ GF_ASSERT(myframe);
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ gf_log("cli", GF_LOG_INFO, "Received resp to reset");
+
+ if (strcmp(rsp.op_errstr, ""))
+ snprintf(msg, sizeof(msg), "%s", rsp.op_errstr);
+ else
+ snprintf(msg, sizeof(msg), "reset volume %s",
+ (rsp.op_ret) ? "unsuccessful" : "successful");
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_str("volReset", msg, rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto out;
+ }
- ret = rsp.op_ret;
+ if (rsp.op_ret)
+ cli_err("volume reset: failed: %s", msg);
+ else
+ cli_out("volume reset: success: %s", msg);
+
+ ret = rsp.op_ret;
out:
- cli_cmd_broadcast_response (ret);
- return ret;
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_cli_rsp(rsp);
+ return ret;
}
-int
-gf_cli3_1_add_brick_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
+static int
+gf_cli_ganesha_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
{
- gf_cli_rsp rsp = {0,};
- int ret = -1;
- char msg[1024] = {0,};
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ dict_t *dict = NULL;
- if (-1 == req->rpc_status) {
- goto out;
- }
+ GF_ASSERT(myframe);
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- goto out;
- }
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ XDR_DECODE_FAIL);
+ goto out;
+ }
- gf_log ("cli", GF_LOG_INFO, "Received resp to add brick");
+ gf_log("cli", GF_LOG_DEBUG, "Received resp to ganesha");
- if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
- snprintf (msg, sizeof (msg), "%s", rsp.op_errstr);
- else
- snprintf (msg, sizeof (msg), "Add Brick %s",
- (rsp.op_ret) ? "unsuccessful": "successful");
+ dict = dict_new();
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_str ("volAddBrick", msg, rsp.op_ret,
- rsp.op_errno, rsp.op_errstr);
- if (ret)
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
- goto out;
- }
-#endif
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
- if (rsp.op_ret)
- cli_err ("%s", msg);
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
+ if (ret)
+ goto out;
+
+ if (rsp.op_ret) {
+ if (strcmp(rsp.op_errstr, ""))
+ cli_err("nfs-ganesha: failed: %s", rsp.op_errstr);
else
- cli_out ("%s", msg);
- ret = rsp.op_ret;
+ cli_err("nfs-ganesha: failed");
+ }
-out:
- cli_cmd_broadcast_response (ret);
- if (rsp.dict.dict_val)
- free (rsp.dict.dict_val);
- if (rsp.op_errstr)
- free (rsp.op_errstr);
- return ret;
-}
+ else {
+ cli_out("nfs-ganesha : success ");
+ }
-int
-gf_cli3_remove_brick_status_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
-{
- gf_cli_rsp rsp = {0,};
- char *status = "unknown";
- int ret = -1;
- uint64_t files = 0;
- uint64_t size = 0;
- uint64_t lookup = 0;
- dict_t *dict = NULL;
- //char msg[1024] = {0,};
- char key[256] = {0,};
- int32_t i = 1;
- int32_t counter = 0;
- char *node_uuid = 0;
- gf_defrag_status_t status_rcd = GF_DEFRAG_STATUS_NOT_STARTED;
- uint64_t failures = 0;
-
-
- if (-1 == req->rpc_status) {
- goto out;
- }
+ ret = rsp.op_ret;
- ret = xdr_to_generic (*iov, &rsp,
- (xdrproc_t)xdr_gf_cli_rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- goto out;
- }
+out:
+ if (dict)
+ dict_unref(dict);
+ cli_cmd_broadcast_response(ret);
+ return ret;
+}
- ret = rsp.op_ret;
- if (rsp.op_ret == -1) {
- if (strcmp (rsp.op_errstr, ""))
- cli_err ("%s", rsp.op_errstr);
- else
- cli_err ("failed to get the status of "
- "remove-brick process");
- goto out;
+static char *
+is_server_debug_xlator(void *myframe)
+{
+ call_frame_t *frame = NULL;
+ cli_local_t *local = NULL;
+ char **words = NULL;
+ char *key = NULL;
+ char *value = NULL;
+ char *debug_xlator = NULL;
+
+ frame = myframe;
+ local = frame->local;
+ words = (char **)local->words;
+
+ while (*words != NULL) {
+ if (strstr(*words, "trace") == NULL &&
+ strstr(*words, "error-gen") == NULL) {
+ words++;
+ continue;
+ }
+
+ key = *words;
+ words++;
+ value = *words;
+ if (value == NULL)
+ break;
+ if (strstr(value, "client")) {
+ words++;
+ continue;
+ } else {
+ if (!(strstr(value, "posix") || strstr(value, "acl") ||
+ strstr(value, "locks") || strstr(value, "io-threads") ||
+ strstr(value, "marker") || strstr(value, "index"))) {
+ words++;
+ continue;
+ } else {
+ debug_xlator = gf_strdup(key);
+ break;
+ }
}
+ }
- if (rsp.dict.dict_len) {
- /* Unserialize the dictionary */
- dict = dict_new ();
+ return debug_xlator;
+}
- ret = dict_unserialize (rsp.dict.dict_val,
- rsp.dict.dict_len,
- &dict);
- if (ret < 0) {
- gf_log ("glusterd", GF_LOG_ERROR,
- "failed to "
- "unserialize req-buffer to dictionary");
- goto out;
- }
- }
+static int
+gf_cli_set_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ dict_t *dict = NULL;
+ char *help_str = NULL;
+ char msg[1024] = {
+ 0,
+ };
+ char *debug_xlator = NULL;
+ char tmp_str[512] = {
+ 0,
+ };
+
+ GF_ASSERT(myframe);
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ gf_log("cli", GF_LOG_INFO, "Received resp to set");
+
+ dict = dict_new();
+
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
+ goto out;
+ }
+
+ /* For brick processes graph change does not happen on the fly.
+ * The process has to be restarted. So this is a check from the
+ * volume set option such that if debug xlators such as trace/errorgen
+ * are provided in the set command, warn the user.
+ */
+ debug_xlator = is_server_debug_xlator(myframe);
+
+ if (dict_get_str_sizen(dict, "help-str", &help_str) && !msg[0])
+ snprintf(msg, sizeof(msg), "Set volume %s",
+ (rsp.op_ret) ? "unsuccessful" : "successful");
+ if (rsp.op_ret == 0 && debug_xlator) {
+ snprintf(tmp_str, sizeof(tmp_str),
+ "\n%s translator has been "
+ "added to the server volume file. Please restart the"
+ " volume for enabling the translator",
+ debug_xlator);
+ }
+
+ if ((global_state->mode & GLUSTER_MODE_XML) && (help_str == NULL)) {
+ ret = cli_xml_output_str("volSet", msg, rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto out;
+ }
- ret = dict_get_int32 (dict, "count", &counter);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "count not set");
- goto out;
+ if (rsp.op_ret) {
+ if (strcmp(rsp.op_errstr, ""))
+ cli_err("volume set: failed: %s", rsp.op_errstr);
+ else
+ cli_err("volume set: failed");
+ } else {
+ if (help_str == NULL) {
+ if (debug_xlator == NULL)
+ cli_out("volume set: success");
+ else
+ cli_out("volume set: success%s", tmp_str);
+ } else {
+ cli_out("%s", help_str);
}
+ }
+ ret = rsp.op_ret;
- cli_out ("%40s %16s %13s %13s %13s %14s", "Node", "Rebalanced-files",
- "size", "scanned", "failures", "status");
- cli_out ("%40s %16s %13s %13s %13s %14s", "---------", "-----------",
- "-----------", "-----------", "-----------", "------------");
+out:
+ if (dict)
+ dict_unref(dict);
+ GF_FREE(debug_xlator);
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_cli_rsp(rsp);
+ return ret;
+}
- do {
- snprintf (key, 256, "node-uuid-%d", i);
- ret = dict_get_str (dict, key, &node_uuid);
- if (ret)
- gf_log (THIS->name, GF_LOG_TRACE,
- "failed to get node-uuid");
+int
+gf_cli_add_brick_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ char msg[1024] = {
+ 0,
+ };
+
+ GF_ASSERT(myframe);
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ gf_log("cli", GF_LOG_INFO, "Received resp to add brick");
+
+ if (rsp.op_ret && strcmp(rsp.op_errstr, ""))
+ snprintf(msg, sizeof(msg), "%s", rsp.op_errstr);
+ else
+ snprintf(msg, sizeof(msg), "Add Brick %s",
+ (rsp.op_ret) ? "unsuccessful" : "successful");
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_str("volAddBrick", msg, rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto out;
+ }
- memset (key, 0, 256);
- snprintf (key, 256, "files-%d", i);
- ret = dict_get_uint64 (dict, key, &files);
- if (ret)
- gf_log (THIS->name, GF_LOG_TRACE,
- "failed to get file count");
+ if (rsp.op_ret)
+ cli_err("volume add-brick: failed: %s", rsp.op_errstr);
+ else
+ cli_out("volume add-brick: success");
+ ret = rsp.op_ret;
- memset (key, 0, 256);
- snprintf (key, 256, "size-%d", i);
- ret = dict_get_uint64 (dict, key, &size);
- if (ret)
- gf_log (THIS->name, GF_LOG_TRACE,
- "failed to get size of xfer");
+out:
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_cli_rsp(rsp);
+ return ret;
+}
- memset (key, 0, 256);
- snprintf (key, 256, "lookups-%d", i);
- ret = dict_get_uint64 (dict, key, &lookup);
- if (ret)
- gf_log (THIS->name, GF_LOG_TRACE,
- "failed to get lookedup file count");
+static int
+gf_cli3_remove_brick_status_cbk(struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ dict_t *dict = NULL;
+ char msg[1024] = {
+ 0,
+ };
+ int32_t command = 0;
+ gf1_op_commands cmd = GF_OP_CMD_NONE;
+ cli_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ const char *cmd_str;
+
+ GF_ASSERT(myframe);
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ frame = myframe;
+
+ GF_ASSERT(frame->local);
+
+ local = frame->local;
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(frame->this->name, GF_LOG_ERROR, XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ ret = dict_get_int32_sizen(local->dict, "command", &command);
+ if (ret)
+ goto out;
+
+ cmd = command;
+
+ switch (cmd) {
+ case GF_OP_CMD_STOP:
+ cmd_str = "stop";
+ break;
+ case GF_OP_CMD_STATUS:
+ cmd_str = "status";
+ break;
+ default:
+ cmd_str = "unknown";
+ break;
+ }
+
+ ret = rsp.op_ret;
+ if (rsp.op_ret == -1) {
+ if (strcmp(rsp.op_errstr, ""))
+ snprintf(msg, sizeof(msg), "volume remove-brick %s: failed: %s",
+ cmd_str, rsp.op_errstr);
+ else
+ snprintf(msg, sizeof(msg), "volume remove-brick %s: failed",
+ cmd_str);
- memset (key, 0, 256);
- snprintf (key, 256, "status-%d", i);
- ret = dict_get_int32 (dict, key, (int32_t *)&status_rcd);
- if (ret)
- gf_log (THIS->name, GF_LOG_TRACE,
- "failed to get status");
+ if (global_state->mode & GLUSTER_MODE_XML)
+ goto xml_output;
- snprintf (key, 256, "failures-%d", i);
- ret = dict_get_uint64 (dict, key, &failures);
- if (ret)
- gf_log (THIS->name, GF_LOG_TRACE,
- "Failed to get failure on files");
-
- switch (status_rcd) {
- case GF_DEFRAG_STATUS_NOT_STARTED:
- status = "not started";
- break;
- case GF_DEFRAG_STATUS_STARTED:
- status = "in progress";
- break;
- case GF_DEFRAG_STATUS_STOPPED:
- status = "stopped";
- break;
- case GF_DEFRAG_STATUS_COMPLETE:
- status = "completed";
- break;
- case GF_DEFRAG_STATUS_FAILED:
- status = "failed";
- break;
- }
- cli_out ("%40s %16"PRId64 "%13"PRId64 "%13"PRId64 "%13"PRId64
- " %14s", node_uuid, files, size, lookup, failures,
- status);
- i++;
- } while (i <= counter);
+ cli_err("%s", msg);
+ goto out;
+ }
- //TODO: Do proper xml output
- /*
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_str ("volRemoveBrick", msg, rsp.op_ret,
- rsp.op_errno, rsp.op_errstr);
- if (ret)
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
- goto out;
- }
-#endif
+ if (rsp.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new();
- cli_out ("%s", msg);
- */
-out:
- if (rsp.dict.dict_val)
- free (rsp.dict.dict_val); //malloced by xdr
- if (dict)
- dict_unref (dict);
- cli_cmd_broadcast_response (ret);
- return ret;
-}
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
+ if (ret < 0) {
+ strncpy(msg, DICT_UNSERIALIZE_FAIL, sizeof(msg));
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ rsp.op_ret = -1;
+ goto xml_output;
+ }
-int
-gf_cli3_1_remove_brick_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
-{
- gf_cli_rsp rsp = {0,};
- int ret = -1;
- char msg[1024] = {0,};
- gf1_op_commands cmd = GF_OP_CMD_NONE;
- char *cmd_str = "unknown";
- cli_local_t *local = NULL;
- call_frame_t *frame = NULL;
-
- if (-1 == req->rpc_status) {
- goto out;
+ gf_log("cli", GF_LOG_ERROR, "%s", msg);
+ goto out;
}
+ }
- frame = myframe;
- local = frame->local;
+xml_output:
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ if (strcmp(rsp.op_errstr, "")) {
+ ret = cli_xml_output_vol_remove_brick(_gf_true, dict, rsp.op_ret,
+ rsp.op_errno, rsp.op_errstr,
+ "volRemoveBrick");
+ } else {
+ ret = cli_xml_output_vol_remove_brick(_gf_true, dict, rsp.op_ret,
+ rsp.op_errno, msg,
+ "volRemoveBrick");
+ }
+ goto out;
+ }
+
+ ret = gf_cli_print_rebalance_status(dict, GF_TASK_TYPE_REMOVE_BRICK);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to print remove-brick rebalance status");
+ goto out;
+ }
+
+ if ((cmd == GF_OP_CMD_STOP) && (rsp.op_ret == 0)) {
+ cli_out(
+ "'remove-brick' process may be in the middle of a "
+ "file migration.\nThe process will be fully stopped "
+ "once the migration of the file is complete.\nPlease "
+ "check remove-brick process for completion before "
+ "doing any further brick related tasks on the "
+ "volume.");
+ }
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- goto out;
- }
+out:
+ if (dict)
+ dict_unref(dict);
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_cli_rsp(rsp);
+ return ret;
+}
- ret = dict_get_int32 (local->dict, "command", (int32_t *)&cmd);
+static int
+gf_cli_remove_brick_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ char msg[1024] = {
+ 0,
+ };
+ gf1_op_commands cmd = GF_OP_CMD_NONE;
+ char *cmd_str = "unknown";
+ cli_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ char *task_id_str = NULL;
+ dict_t *rsp_dict = NULL;
+
+ GF_ASSERT(myframe);
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ frame = myframe;
+
+ GF_ASSERT(frame->local);
+
+ local = frame->local;
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(frame->this->name, GF_LOG_ERROR, XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ ret = dict_get_int32_sizen(local->dict, "command", (int32_t *)&cmd);
+ if (ret) {
+ gf_log("", GF_LOG_ERROR, "failed to get command");
+ goto out;
+ }
+
+ if (rsp.dict.dict_len) {
+ rsp_dict = dict_new();
+ if (!rsp_dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &rsp_dict);
if (ret) {
- gf_log ("", GF_LOG_ERROR, "failed to get command");
- goto out;
+ gf_log("cli", GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
+ goto out;
}
+ }
- switch (cmd) {
-
+ switch (cmd) {
+ case GF_OP_CMD_DETACH_START:
case GF_OP_CMD_START:
- cmd_str = "start";
- break;
+ cmd_str = "start";
+
+ ret = dict_get_str_sizen(rsp_dict, GF_REMOVE_BRICK_TID_KEY,
+ &task_id_str);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "remove-brick-id is not present in dict");
+ }
+ break;
case GF_OP_CMD_COMMIT:
- cmd_str = "commit";
- break;
+ cmd_str = "commit";
+ break;
case GF_OP_CMD_COMMIT_FORCE:
- cmd_str = "commit force";
- break;
+ cmd_str = "commit force";
+ break;
default:
- cmd_str = "unknown";
- break;
- }
-
- gf_log ("cli", GF_LOG_INFO, "Received resp to remove brick");
-
- if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
- snprintf (msg, sizeof (msg), "%s", rsp.op_errstr);
- else
- snprintf (msg, sizeof (msg), "Remove Brick %s %s", cmd_str,
- (rsp.op_ret) ? "unsuccessful": "successful");
-
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_str ("volRemoveBrick", msg, rsp.op_ret,
- rsp.op_errno, rsp.op_errstr);
- if (ret)
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
- goto out;
- }
-#endif
- if (rsp.op_ret)
- cli_err ("%s", msg);
- else
- cli_out ("%s", msg);
- ret = rsp.op_ret;
+ cmd_str = "unknown";
+ break;
+ }
+
+ gf_log("cli", GF_LOG_INFO, "Received resp to remove brick");
+
+ if (rsp.op_ret && strcmp(rsp.op_errstr, ""))
+ snprintf(msg, sizeof(msg), "%s", rsp.op_errstr);
+ else
+ snprintf(msg, sizeof(msg), "Remove Brick %s %s", cmd_str,
+ (rsp.op_ret) ? "unsuccessful" : "successful");
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_vol_remove_brick(_gf_false, rsp_dict, rsp.op_ret,
+ rsp.op_errno, msg,
+ "volRemoveBrick");
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto out;
+ }
+
+ if (rsp.op_ret) {
+ cli_err("volume remove-brick %s: failed: %s", cmd_str, msg);
+ } else {
+ cli_out("volume remove-brick %s: success", cmd_str);
+ if (GF_OP_CMD_START == cmd && task_id_str != NULL)
+ cli_out("ID: %s", task_id_str);
+ if (GF_OP_CMD_COMMIT == cmd)
+ cli_out(
+ "Check the removed bricks to ensure all files "
+ "are migrated.\nIf files with data are "
+ "found on the brick path, copy them via a "
+ "gluster mount point before re-purposing the "
+ "removed brick. ");
+ }
+
+ ret = rsp.op_ret;
out:
- if (frame)
- frame->local = NULL;
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_cli_rsp(rsp);
- if (local) {
- dict_unref (local->dict);
- cli_local_wipe (local);
- }
+ if (rsp_dict)
+ dict_unref(rsp_dict);
- cli_cmd_broadcast_response (ret);
- if (rsp.dict.dict_val)
- free (rsp.dict.dict_val);
- if (rsp.op_errstr)
- free (rsp.op_errstr);
-
- return ret;
+ return ret;
}
+static int
+gf_cli_reset_brick_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ cli_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ const char *rb_operation_str = NULL;
+ dict_t *rsp_dict = NULL;
+ char msg[1024] = {
+ 0,
+ };
+ char *reset_op = NULL;
+
+ GF_ASSERT(myframe);
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ frame = myframe;
+
+ GF_ASSERT(frame->local);
+
+ local = frame->local;
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(frame->this->name, GF_LOG_ERROR, XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ ret = dict_get_str_sizen(local->dict, "operation", &reset_op);
+ if (ret) {
+ gf_log(frame->this->name, GF_LOG_ERROR, "dict_get on operation failed");
+ goto out;
+ }
+
+ if (strcmp(reset_op, "GF_RESET_OP_START") &&
+ strcmp(reset_op, "GF_RESET_OP_COMMIT") &&
+ strcmp(reset_op, "GF_RESET_OP_COMMIT_FORCE")) {
+ ret = -1;
+ goto out;
+ }
+
+ if (rsp.dict.dict_len) {
+ /* Unserialize the dictionary */
+ rsp_dict = dict_new();
+
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &rsp_dict);
+ if (ret < 0) {
+ gf_log(frame->this->name, GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
+ goto out;
+ }
+ }
+
+ if (rsp.op_ret && (strcmp(rsp.op_errstr, ""))) {
+ rb_operation_str = rsp.op_errstr;
+ } else {
+ if (!strcmp(reset_op, "GF_RESET_OP_START")) {
+ if (rsp.op_ret)
+ rb_operation_str = "reset-brick start operation failed";
+ else
+ rb_operation_str = "reset-brick start operation successful";
+ } else if (!strcmp(reset_op, "GF_RESET_OP_COMMIT")) {
+ if (rsp.op_ret)
+ rb_operation_str = "reset-brick commit operation failed";
+ else
+ rb_operation_str = "reset-brick commit operation successful";
+ } else if (!strcmp(reset_op, "GF_RESET_OP_COMMIT_FORCE")) {
+ if (rsp.op_ret)
+ rb_operation_str = "reset-brick commit force operation failed";
+ else
+ rb_operation_str =
+ "reset-brick commit force operation successful";
+ }
+ }
+
+ gf_log("cli", GF_LOG_INFO, "Received resp to reset brick");
+ snprintf(msg, sizeof(msg), "%s",
+ rb_operation_str ? rb_operation_str : "Unknown operation");
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_vol_replace_brick(rsp_dict, rsp.op_ret,
+ rsp.op_errno, msg);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto out;
+ }
+ if (rsp.op_ret)
+ cli_err("volume reset-brick: failed: %s", msg);
+ else
+ cli_out("volume reset-brick: success: %s", msg);
+ ret = rsp.op_ret;
-int
-gf_cli3_1_replace_brick_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
-{
- gf_cli_rsp rsp = {0,};
- int ret = -1;
- cli_local_t *local = NULL;
- call_frame_t *frame = NULL;
- dict_t *dict = NULL;
- char *src_brick = NULL;
- char *dst_brick = NULL;
- char *status_reply = NULL;
- gf1_cli_replace_op replace_op = 0;
- char *rb_operation_str = NULL;
- dict_t *rsp_dict = NULL;
- char msg[1024] = {0,};
-
- if (-1 == req->rpc_status) {
- goto out;
- }
-
- frame = (call_frame_t *) myframe;
+out:
+ if (frame)
+ frame->local = NULL;
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- goto out;
- }
+ if (local)
+ cli_local_wipe(local);
- local = frame->local;
- GF_ASSERT (local);
- dict = local->dict;
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_cli_rsp(rsp);
+ if (rsp_dict)
+ dict_unref(rsp_dict);
- ret = dict_get_int32 (dict, "operation", (int32_t *)&replace_op);
- if (ret) {
- gf_log ("", GF_LOG_DEBUG,
- "dict_get on operation failed");
- goto out;
+ return ret;
+}
+static int
+gf_cli_replace_brick_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ cli_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ const char *rb_operation_str = NULL;
+ dict_t *rsp_dict = NULL;
+ char msg[1024] = {
+ 0,
+ };
+ char *replace_op = NULL;
+
+ GF_ASSERT(myframe);
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ frame = myframe;
+
+ GF_ASSERT(frame->local);
+
+ local = frame->local;
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(frame->this->name, GF_LOG_ERROR, XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ ret = dict_get_str_sizen(local->dict, "operation", &replace_op);
+ if (ret) {
+ gf_log(frame->this->name, GF_LOG_ERROR, "dict_get on operation failed");
+ goto out;
+ }
+
+ if (rsp.dict.dict_len) {
+ /* Unserialize the dictionary */
+ rsp_dict = dict_new();
+
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &rsp_dict);
+ if (ret < 0) {
+ gf_log(frame->this->name, GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
+ goto out;
}
+ }
- switch (replace_op) {
- case GF_REPLACE_OP_START:
- if (rsp.op_ret)
- rb_operation_str = "replace-brick failed to start";
- else
- rb_operation_str = "replace-brick started successfully";
- break;
+ if (!strcmp(replace_op, "GF_REPLACE_OP_COMMIT_FORCE")) {
+ if (rsp.op_ret || ret)
+ rb_operation_str = "replace-brick commit force operation failed";
+ else
+ rb_operation_str =
+ "replace-brick commit force operation successful";
+ } else {
+ gf_log(frame->this->name, GF_LOG_DEBUG, "Unknown operation");
+ }
+
+ if (rsp.op_ret && (strcmp(rsp.op_errstr, ""))) {
+ rb_operation_str = rsp.op_errstr;
+ }
+
+ gf_log("cli", GF_LOG_INFO, "Received resp to replace brick");
+ snprintf(msg, sizeof(msg), "%s",
+ rb_operation_str ? rb_operation_str : "Unknown operation");
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_vol_replace_brick(rsp_dict, rsp.op_ret,
+ rsp.op_errno, msg);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto out;
+ }
- case GF_REPLACE_OP_STATUS:
-
- if (rsp.op_ret || ret)
- rb_operation_str = "replace-brick status unknown";
- else {
- if (rsp.dict.dict_len) {
- /* Unserialize the dictionary */
- rsp_dict = dict_new ();
-
- ret = dict_unserialize (rsp.dict.dict_val,
- rsp.dict.dict_len,
- &rsp_dict);
- if (ret < 0) {
- gf_log ("glusterd", GF_LOG_ERROR,
- "failed to "
- "unserialize req-buffer to dictionary");
- goto out;
- }
- }
- ret = dict_get_str (rsp_dict, "status-reply",
- &status_reply);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "failed to"
- "get status");
- goto out;
- }
-
- rb_operation_str = status_reply;
- }
+ if (rsp.op_ret)
+ cli_err("volume replace-brick: failed: %s", msg);
+ else
+ cli_out("volume replace-brick: success: %s", msg);
+ ret = rsp.op_ret;
- break;
+out:
+ if (frame)
+ frame->local = NULL;
- case GF_REPLACE_OP_PAUSE:
- if (rsp.op_ret)
- rb_operation_str = "replace-brick pause failed";
- else
- rb_operation_str = "replace-brick paused successfully";
- break;
+ if (local)
+ cli_local_wipe(local);
- case GF_REPLACE_OP_ABORT:
- if (rsp.op_ret)
- rb_operation_str = "replace-brick abort failed";
- else
- rb_operation_str = "replace-brick aborted successfully";
- break;
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_cli_rsp(rsp);
+ if (rsp_dict)
+ dict_unref(rsp_dict);
- case GF_REPLACE_OP_COMMIT:
- case GF_REPLACE_OP_COMMIT_FORCE:
- ret = dict_get_str (dict, "src-brick", &src_brick);
- if (ret) {
- gf_log ("", GF_LOG_DEBUG,
- "dict_get on src-brick failed");
- goto out;
- }
+ return ret;
+}
- ret = dict_get_str (dict, "dst-brick", &dst_brick);
- if (ret) {
- gf_log ("", GF_LOG_DEBUG,
- "dict_get on dst-brick failed");
- goto out;
- }
+static int
+gf_cli_log_rotate_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ char msg[1024] = {
+ 0,
+ };
+
+ GF_ASSERT(myframe);
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ gf_log("cli", GF_LOG_DEBUG, "Received resp to log rotate");
+
+ if (rsp.op_ret && strcmp(rsp.op_errstr, ""))
+ snprintf(msg, sizeof(msg), "%s", rsp.op_errstr);
+ else
+ snprintf(msg, sizeof(msg), "log rotate %s",
+ (rsp.op_ret) ? "unsuccessful" : "successful");
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_str("volLogRotate", msg, rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto out;
+ }
+ if (rsp.op_ret)
+ cli_err("volume log-rotate: failed: %s", msg);
+ else
+ cli_out("volume log-rotate: success");
+ ret = rsp.op_ret;
- if (rsp.op_ret || ret)
- rb_operation_str = "replace-brick commit failed";
- else
- rb_operation_str = "replace-brick commit successful";
+out:
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_cli_rsp(rsp);
- break;
+ return ret;
+}
- default:
- gf_log ("", GF_LOG_DEBUG,
- "Unknown operation");
- break;
- }
+static int
+gf_cli_sync_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ char msg[1024] = {
+ 0,
+ };
+
+ GF_ASSERT(myframe);
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ gf_log("cli", GF_LOG_DEBUG, "Received resp to sync");
+
+ if (rsp.op_ret && strcmp(rsp.op_errstr, ""))
+ snprintf(msg, sizeof(msg), "volume sync: failed: %s", rsp.op_errstr);
+ else
+ snprintf(msg, sizeof(msg), "volume sync: %s",
+ (rsp.op_ret) ? "failed" : "success");
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_str("volSync", msg, rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto out;
+ }
- if (rsp.op_ret && (strcmp (rsp.op_errstr, ""))) {
- rb_operation_str = rsp.op_errstr;
- }
+ if (rsp.op_ret)
+ cli_err("%s", msg);
+ else
+ cli_out("%s", msg);
+ ret = rsp.op_ret;
- gf_log ("cli", GF_LOG_INFO, "Received resp to replace brick");
- snprintf (msg,sizeof (msg), "%s",
- rb_operation_str ? rb_operation_str : "Unknown operation");
+out:
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_cli_rsp(rsp);
+ return ret;
+}
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_str ("volReplaceBrick", msg, rsp.op_ret,
- rsp.op_errno, rsp.op_errstr);
- if (ret)
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
- goto out;
+static int
+print_quota_list_usage_output(cli_local_t *local, char *path, int64_t avail,
+ char *sl_str, quota_limits_t *limits,
+ quota_meta_t *used_space, gf_boolean_t sl,
+ gf_boolean_t hl, double sl_num,
+ gf_boolean_t limit_set)
+{
+ int32_t ret = -1;
+ char *used_str = NULL;
+ char *avail_str = NULL;
+ char *hl_str = NULL;
+ char *sl_val = NULL;
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_quota_xml_output(local, path, limits->hl, sl_str, sl_num,
+ used_space->size, avail, sl ? "Yes" : "No",
+ hl ? "Yes" : "No", limit_set);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to output in xml format for quota list command");
}
-#endif
+ goto out;
+ }
- if (rsp.op_ret)
- cli_err ("%s", msg);
- else
- cli_out ("%s", msg);
- ret = rsp.op_ret;
+ used_str = gf_uint64_2human_readable(used_space->size);
-out:
- if (frame)
- frame->local = NULL;
+ if (limit_set) {
+ hl_str = gf_uint64_2human_readable(limits->hl);
+ sl_val = gf_uint64_2human_readable(sl_num);
- if (local) {
- dict_unref (local->dict);
- cli_local_wipe (local);
+ if (!used_str) {
+ cli_out("%-40s %7s %7s(%s) %8" PRIu64 "%9" PRIu64
+ ""
+ "%15s %18s",
+ path, hl_str, sl_str, sl_val, used_space->size, avail,
+ sl ? "Yes" : "No", hl ? "Yes" : "No");
+ } else {
+ avail_str = gf_uint64_2human_readable(avail);
+ cli_out("%-40s %7s %7s(%s) %8s %7s %15s %20s", path, hl_str, sl_str,
+ sl_val, used_str, avail_str, sl ? "Yes" : "No",
+ hl ? "Yes" : "No");
}
+ } else {
+ cli_out("%-36s %10s %10s %14s %9s %15s %18s", path, "N/A", "N/A",
+ used_str, "N/A", "N/A", "N/A");
+ }
- cli_cmd_broadcast_response (ret);
- if (rsp.dict.dict_val)
- free (rsp.dict.dict_val);
- if (rsp_dict)
- dict_unref (rsp_dict);
+ ret = 0;
+out:
+ GF_FREE(hl_str);
+ GF_FREE(used_str);
+ GF_FREE(avail_str);
+ GF_FREE(sl_val);
- return ret;
+ return ret;
}
-
static int
-gf_cli3_1_log_rotate_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
+print_quota_list_object_output(cli_local_t *local, char *path, int64_t avail,
+ char *sl_str, quota_limits_t *limits,
+ quota_meta_t *used_space, gf_boolean_t sl,
+ gf_boolean_t hl, double sl_num,
+ gf_boolean_t limit_set)
{
- gf_cli_rsp rsp = {0,};
- int ret = -1;
- char msg[1024] = {0,};
+ int32_t ret = -1;
+ int64_t sl_val = sl_num;
- if (-1 == req->rpc_status) {
- goto out;
- }
-
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- goto out;
- }
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_quota_object_xml_output(local, path, sl_str, sl_val, limits,
+ used_space, avail, sl ? "Yes" : "No",
+ hl ? "Yes" : "No", limit_set);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to output in xml format for quota list command");
+ }
+ goto out;
+ }
+
+ if (limit_set) {
+ cli_out("%-40s %9" PRIu64 " %9s(%" PRId64 ") %10" PRIu64
+ ""
+ "%10" PRIu64 " %11" PRIu64 " %15s %20s",
+ path, limits->hl, sl_str, sl_val, used_space->file_count,
+ used_space->dir_count, avail, sl ? "Yes" : "No",
+ hl ? "Yes" : "No");
+ } else {
+ cli_out("%-40s %9s %9s %10" PRIu64 " %10" PRIu64 " %11s %15s %20s",
+ path, "N/A", "N/A", used_space->file_count,
+ used_space->dir_count, "N/A", "N/A", "N/A");
+ }
+ ret = 0;
- gf_log ("cli", GF_LOG_DEBUG, "Received resp to log rotate");
+out:
- if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
- snprintf (msg, sizeof (msg), "%s", rsp.op_errstr);
- else
- snprintf (msg, sizeof (msg), "log rotate %s",
- (rsp.op_ret) ? "unsuccessful": "successful");
+ return ret;
+}
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_str ("volLogRotate", msg, rsp.op_ret,
- rsp.op_errno, rsp.op_errstr);
- if (ret)
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
+static int
+print_quota_list_output(cli_local_t *local, char *path, char *default_sl,
+ quota_limits_t *limits, quota_meta_t *used_space,
+ int type, gf_boolean_t limit_set)
+{
+ int64_t avail = 0;
+ char percent_str[20] = {0};
+ char *sl_final = NULL;
+ int ret = -1;
+ double sl_num = 0;
+ gf_boolean_t sl = _gf_false;
+ gf_boolean_t hl = _gf_false;
+ int64_t used_size = 0;
+
+ GF_ASSERT(local);
+ GF_ASSERT(path);
+
+ if (limit_set) {
+ if (limits->sl < 0) {
+ ret = gf_string2percent(default_sl, &sl_num);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "could not convert default soft limit to percent");
+ goto out;
+ }
+ sl_num = (sl_num * limits->hl) / 100;
+ sl_final = default_sl;
+ } else {
+ sl_num = (limits->sl * limits->hl) / 100;
+ ret = snprintf(percent_str, sizeof(percent_str), "%" PRIu64 "%%",
+ limits->sl);
+ if (ret < 0)
goto out;
+ sl_final = percent_str;
}
-#endif
-
- if (rsp.op_ret)
- cli_err ("%s", msg);
+ if (type == GF_QUOTA_OPTION_TYPE_LIST)
+ used_size = used_space->size;
else
- cli_out ("%s", msg);
- ret = rsp.op_ret;
-
+ used_size = used_space->file_count + used_space->dir_count;
+
+ if (limits->hl > used_size) {
+ avail = limits->hl - used_size;
+ hl = _gf_false;
+ if (used_size > sl_num)
+ sl = _gf_true;
+ else
+ sl = _gf_false;
+ } else {
+ avail = 0;
+ hl = sl = _gf_true;
+ }
+ }
+
+ if (type == GF_QUOTA_OPTION_TYPE_LIST)
+ ret = print_quota_list_usage_output(local, path, avail, sl_final,
+ limits, used_space, sl, hl, sl_num,
+ limit_set);
+ else
+ ret = print_quota_list_object_output(local, path, avail, sl_final,
+ limits, used_space, sl, hl, sl_num,
+ limit_set);
out:
- cli_cmd_broadcast_response (ret);
- if (rsp.dict.dict_val)
- free (rsp.dict.dict_val);
-
- return ret;
+ return ret;
}
static int
-gf_cli3_1_sync_volume_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
+print_quota_list_from_mountdir(cli_local_t *local, char *mountdir,
+ char *default_sl, char *path, int type)
{
- gf_cli_rsp rsp = {0,};
- int ret = -1;
- char msg[1024] = {0,};
+ int ret = -1;
+ ssize_t xattr_size = 0;
+ quota_limits_t limits = {
+ 0,
+ };
+ quota_meta_t used_space = {
+ 0,
+ };
+ char *key = NULL;
+ gf_boolean_t limit_set = _gf_true;
+
+ GF_ASSERT(local);
+ GF_ASSERT(mountdir);
+ GF_ASSERT(path);
+
+ if (type == GF_QUOTA_OPTION_TYPE_LIST)
+ key = QUOTA_LIMIT_KEY;
+ else
+ key = QUOTA_LIMIT_OBJECTS_KEY;
+
+ ret = sys_lgetxattr(mountdir, key, (void *)&limits, sizeof(limits));
+ if (ret < 0) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to get the xattr %s on %s. Reason : %s", key, mountdir,
+ strerror(errno));
+
+ switch (errno) {
+#if defined(ENODATA)
+ case ENODATA:
+#endif
+#if defined(ENOATTR) && (ENOATTR != ENODATA)
+ case ENOATTR:
+#endif
+ /* If it's an ENOATTR, quota/inode-quota is
+ * configured(limit is set at least for one directory).
+ * The user is trying to issue 'list/list-objects'
+ * command for a directory on which quota limit is
+ * not set and we are showing the used-space in case
+ * of list-usage and showing (dir_count, file_count)
+ * in case of list-objects. Other labels are
+ * shown "N/A".
+ */
- if (-1 == req->rpc_status) {
- goto out;
- }
+ limit_set = _gf_false;
+ goto enoattr;
+ break;
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- goto out;
+ default:
+ cli_err("%-40s %s", path, strerror(errno));
+ break;
}
- gf_log ("cli", GF_LOG_DEBUG, "Received resp to sync");
+ goto out;
+ }
- if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
- snprintf (msg, sizeof (msg), "%s", rsp.op_errstr);
- else
- snprintf (msg, sizeof (msg), "volume sync: %s",
- (rsp.op_ret) ? "unsuccessful": "successful");
-
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_str ("volSync", msg, rsp.op_ret,
- rsp.op_errno, rsp.op_errstr);
- if (ret)
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
- goto out;
- }
-#endif
+ limits.hl = ntoh64(limits.hl);
+ limits.sl = ntoh64(limits.sl);
- if (rsp.op_ret)
- cli_err ("%s", msg);
- else
- cli_out ("%s", msg);
- ret = rsp.op_ret;
+enoattr:
+ xattr_size = sys_lgetxattr(mountdir, QUOTA_SIZE_KEY, NULL, 0);
+ if (xattr_size < (sizeof(int64_t) * 2) &&
+ type == GF_QUOTA_OPTION_TYPE_LIST_OBJECTS) {
+ ret = -1;
+ /* This can happen when glusterfs is upgraded from 3.6 to 3.7
+ * and the xattr healing is not completed.
+ */
+ } else if (xattr_size > (sizeof(int64_t) * 2)) {
+ ret = sys_lgetxattr(mountdir, QUOTA_SIZE_KEY, &used_space,
+ sizeof(used_space));
+ } else if (xattr_size > 0) {
+ /* This is for compatibility.
+ * Older version had only file usage
+ */
+ ret = sys_lgetxattr(mountdir, QUOTA_SIZE_KEY, &(used_space.size),
+ sizeof(used_space.size));
+ used_space.file_count = 0;
+ used_space.dir_count = 0;
+ } else {
+ ret = -1;
+ }
+
+ if (ret < 0) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get quota size on path %s: %s",
+ mountdir, strerror(errno));
+ print_quota_list_empty(path, type);
+ goto out;
+ }
+
+ used_space.size = ntoh64(used_space.size);
+ used_space.file_count = ntoh64(used_space.file_count);
+ used_space.dir_count = ntoh64(used_space.dir_count);
+
+ ret = print_quota_list_output(local, path, default_sl, &limits, &used_space,
+ type, limit_set);
out:
- cli_cmd_broadcast_response (ret);
- return ret;
+ return ret;
}
-int32_t
-gf_cli3_1_print_limit_list (char *volname, char *limit_list,
- char *op_errstr)
-{
- int64_t size = 0;
- int64_t limit_value = 0;
- int32_t i, j, k;
- int32_t len = 0, ret = -1;
- char *size_str = NULL;
- char path [PATH_MAX] = {0, };
- char ret_str [1024] = {0, };
- char value [1024] = {0, };
- char mountdir [] = "/tmp/mntXXXXXX";
- char abspath [PATH_MAX] = {0, };
- runner_t runner = {0,};
-
- GF_VALIDATE_OR_GOTO ("cli", volname, out);
- GF_VALIDATE_OR_GOTO ("cli", limit_list, out);
-
- if (!connected)
- goto out;
+static int
+gluster_remove_auxiliary_mount(char *volname)
+{
+ int ret = -1;
+ char mountdir[PATH_MAX] = {
+ 0,
+ };
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ GLUSTERD_GET_QUOTA_LIST_MOUNT_PATH(mountdir, volname, "/");
+ ret = gf_umount_lazy(this->name, mountdir, 1);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "umount on %s failed, reason : %s",
+ mountdir, strerror(errno));
+ }
+
+ return ret;
+}
- len = strlen (limit_list);
- if (len == 0) {
- cli_err ("%s", op_errstr?op_errstr:"quota limit not set ");
- goto out;
+static int
+gf_cli_print_limit_list_from_dict(cli_local_t *local, char *volname,
+ dict_t *dict, char *default_sl, int count,
+ int op_ret, int op_errno, char *op_errstr)
+{
+ int ret = -1;
+ int i = 0;
+ char key[32] = {
+ 0,
+ };
+ int keylen;
+ char mountdir[PATH_MAX] = {
+ 0,
+ };
+ char *path = NULL;
+ int type = -1;
+
+ if (!dict || count <= 0)
+ goto out;
+
+ ret = dict_get_int32_sizen(dict, "type", &type);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get quota type");
+ goto out;
+ }
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_vol_quota_limit_list_begin(local, op_ret, op_errno,
+ op_errstr);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto out;
}
+ } else {
+ print_quota_list_header(type);
+ }
- if (mkdtemp (mountdir) == NULL) {
- gf_log ("cli", GF_LOG_WARNING, "failed to create a temporary "
- "mount directory");
- ret = -1;
- goto out;
+ while (count--) {
+ keylen = snprintf(key, sizeof(key), "path%d", i++);
+
+ ret = dict_get_strn(dict, key, keylen, &path);
+ if (ret < 0) {
+ gf_log("cli", GF_LOG_DEBUG, "Path not present in limit list");
+ continue;
}
- /* Mount a temporary client to fetch the disk usage
- * of the directory on which the limit is set.
- */
- ret = runcmd (SBIN_DIR"/glusterfs", "-s",
- "localhost", "--volfile-id", volname, "-l",
- DEFAULT_LOG_FILE_DIRECTORY"/quota-list.log",
- mountdir, NULL);
+ ret = gf_canonicalize_path(path);
+ if (ret)
+ goto out;
+ GLUSTERD_GET_QUOTA_LIST_MOUNT_PATH(mountdir, volname, path);
+ ret = print_quota_list_from_mountdir(local, mountdir, default_sl, path,
+ type);
+ }
+
+out:
+ return ret;
+}
+
+static int
+print_quota_list_from_quotad(call_frame_t *frame, dict_t *rsp_dict)
+{
+ char *path = NULL;
+ char *default_sl = NULL;
+ int ret = -1;
+ cli_local_t *local = NULL;
+ dict_t *gd_rsp_dict = NULL;
+ quota_meta_t used_space = {
+ 0,
+ };
+ quota_limits_t limits = {
+ 0,
+ };
+ quota_limits_t *size_limits = NULL;
+ int32_t type = 0;
+ int32_t success_count = 0;
+
+ GF_ASSERT(frame);
+
+ local = frame->local;
+ gd_rsp_dict = local->dict;
+
+ ret = dict_get_int32_sizen(rsp_dict, "type", &type);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get type");
+ goto out;
+ }
+
+ ret = dict_get_str_sizen(rsp_dict, GET_ANCESTRY_PATH_KEY, &path);
+ if (ret) {
+ gf_log("cli", GF_LOG_WARNING, "path key is not present in dict");
+ goto out;
+ }
+
+ ret = dict_get_str_sizen(gd_rsp_dict, "default-soft-limit", &default_sl);
+ if (ret) {
+ gf_log(frame->this->name, GF_LOG_ERROR,
+ "failed to get default soft limit");
+ goto out;
+ }
+
+ if (type == GF_QUOTA_OPTION_TYPE_LIST) {
+ ret = dict_get_bin(rsp_dict, QUOTA_LIMIT_KEY, (void **)&size_limits);
if (ret) {
- gf_log ("cli", GF_LOG_WARNING, "failed to mount glusterfs client");
- ret = -1;
- goto rm_dir;
+ gf_log("cli", GF_LOG_WARNING, "limit key not present in dict on %s",
+ path);
+ goto out;
}
-
- len = strlen (limit_list);
- if (len == 0) {
- cli_out ("quota limit not set ");
- goto unmount;
+ } else {
+ ret = dict_get_bin(rsp_dict, QUOTA_LIMIT_OBJECTS_KEY,
+ (void **)&size_limits);
+ if (ret) {
+ gf_log("cli", GF_LOG_WARNING,
+ "object limit key not present in dict on %s", path);
+ goto out;
+ }
+ }
+
+ limits.hl = ntoh64(size_limits->hl);
+ limits.sl = ntoh64(size_limits->sl);
+
+ if (type == GF_QUOTA_OPTION_TYPE_LIST)
+ ret = quota_dict_get_meta(rsp_dict, QUOTA_SIZE_KEY,
+ SLEN(QUOTA_SIZE_KEY), &used_space);
+ else
+ ret = quota_dict_get_inode_meta(rsp_dict, QUOTA_SIZE_KEY,
+ SLEN(QUOTA_SIZE_KEY), &used_space);
+
+ if (ret < 0) {
+ gf_log("cli", GF_LOG_WARNING, "size key not present in dict");
+ print_quota_list_empty(path, type);
+ goto out;
+ }
+
+ LOCK(&local->lock);
+ {
+ ret = dict_get_int32_sizen(gd_rsp_dict, "quota-list-success-count",
+ &success_count);
+ if (ret)
+ success_count = 0;
+
+ ret = dict_set_int32_sizen(gd_rsp_dict, "quota-list-success-count",
+ success_count + 1);
+ }
+ UNLOCK(&local->lock);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to set quota-list-success-count in dict");
+ goto out;
+ }
+
+ if (success_count == 0) {
+ if (!(global_state->mode & GLUSTER_MODE_XML)) {
+ print_quota_list_header(type);
+ } else {
+ ret = cli_xml_output_vol_quota_limit_list_begin(local, 0, 0, NULL);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto out;
+ }
}
+ }
- i = 0;
+ ret = print_quota_list_output(local, path, default_sl, &limits, &used_space,
+ type, _gf_true);
+out:
+ return ret;
+}
- cli_out ("\tpath\t\t limit_set\t size");
- cli_out ("-----------------------------------------------------------"
- "-----------------------");
- while (i < len) {
- j = 0;
- k = 0;
+static void *
+cli_cmd_broadcast_response_detached(void *opaque)
+{
+ int32_t ret = 0;
- while (limit_list [i] != ':') {
- path [k++] = limit_list [i++];
- }
- path [k] = '\0';
+ ret = (intptr_t)opaque;
+ cli_cmd_broadcast_response(ret);
- i++; //skip ':'
+ return NULL;
+}
- while (limit_list [i] != ',' && limit_list [i] != '\0') {
- value [j++] = limit_list[i++];
- }
- value [j] = '\0';
-
- snprintf (abspath, sizeof (abspath), "%s/%s", mountdir, path);
-
- ret = sys_lgetxattr (abspath, "trusted.limit.list", (void *) ret_str, 4096);
- if (ret < 0) {
- cli_out ("%-20s %10s", path, value);
- } else {
- sscanf (ret_str, "%"PRId64",%"PRId64, &size,
- &limit_value);
- size_str = gf_uint64_2human_readable ((uint64_t) size);
- if (size_str == NULL) {
- cli_out ("%-20s %10s %20"PRId64, path,
- value, size);
- } else {
- cli_out ("%-20s %10s %20s", path,
- value, size_str);
- GF_FREE (size_str);
- }
- }
- i++;
- }
+static int32_t
+cli_quota_compare_path(struct list_head *list1, struct list_head *list2)
+{
+ struct list_node *node1 = NULL;
+ struct list_node *node2 = NULL;
+ dict_t *dict1 = NULL;
+ dict_t *dict2 = NULL;
+ char *path1 = NULL;
+ char *path2 = NULL;
+ int ret = 0;
-unmount:
+ node1 = list_entry(list1, struct list_node, list);
+ node2 = list_entry(list2, struct list_node, list);
- runinit (&runner);
- runner_add_args (&runner, "umount",
-#if GF_LINUX_HOST_OS
- "-l",
-#endif
- mountdir, NULL);
- ret = runner_run_reuse (&runner);
- if (ret)
- runner_log (&runner, "cli", GF_LOG_WARNING, "error executing");
- runner_end (&runner);
+ dict1 = node1->ptr;
+ dict2 = node2->ptr;
-rm_dir:
- rmdir (mountdir);
-out:
- return ret;
+ ret = dict_get_str_sizen(dict1, GET_ANCESTRY_PATH_KEY, &path1);
+ if (ret < 0)
+ return 0;
+
+ ret = dict_get_str_sizen(dict2, GET_ANCESTRY_PATH_KEY, &path2);
+ if (ret < 0)
+ return 0;
+
+ return strcmp(path1, path2);
}
-int
-gf_cli3_1_quota_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
-{
- gf_cli_rsp rsp = {0,};
- int ret = -1;
- dict_t *dict = NULL;
- char *volname = NULL;
- char *limit_list = NULL;
- int32_t type = 0;
- char msg[1024] = {0,};
-
- if (-1 == req->rpc_status) {
- goto out;
- }
+static int
+cli_quotad_getlimit_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ /*TODO: we need to gather the path, hard-limit, soft-limit and used space*/
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ dict_t *dict = NULL;
+ struct list_node *node = NULL;
+ struct list_node *tmpnode = NULL;
+ call_frame_t *frame = NULL;
+ cli_local_t *local = NULL;
+ int32_t list_count = 0;
+ pthread_t th_id = {
+ 0,
+ };
+ int32_t max_count = 0;
+
+ GF_ASSERT(myframe);
+
+ frame = myframe;
+
+ GF_ASSERT(frame->local);
+
+ local = frame->local;
+
+ LOCK(&local->lock);
+ {
+ ret = dict_get_int32_sizen(local->dict, "quota-list-count",
+ &list_count);
+ if (ret)
+ list_count = 0;
+
+ list_count++;
+ ret = dict_set_int32_sizen(local->dict, "quota-list-count", list_count);
+ }
+ UNLOCK(&local->lock);
+
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to set quota-list-count in dict");
+ goto out;
+ }
+
+ if (-1 == req->rpc_status) {
+ if (list_count == 0)
+ cli_err(
+ "Connection failed. Please check if quota "
+ "daemon is operational.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(frame->this->name, GF_LOG_ERROR, XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ if (rsp.op_ret) {
+ ret = -1;
+ if (strcmp(rsp.op_errstr, ""))
+ cli_err("quota command failed : %s", rsp.op_errstr);
+ else
+ cli_err("quota command : failed");
+ goto out;
+ }
+
+ if (rsp.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new();
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- goto out;
+ gf_log("cli", GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
+ goto out;
}
- if (rsp.op_ret &&
- strcmp (rsp.op_errstr, "") == 0) {
- snprintf (msg, sizeof (msg), "command unsuccessful %s",
- rsp.op_errstr);
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML)
- goto xml_output;
-#endif
- goto out;
+ node = list_node_add_order(dict, &local->dict_list,
+ cli_quota_compare_path);
+ if (node == NULL) {
+ gf_log("cli", GF_LOG_ERROR, "failed to add node to the list");
+ dict_unref(dict);
+ ret = -1;
+ goto out;
}
+ }
- if (rsp.dict.dict_len) {
- /* Unserialize the dictionary */
- dict = dict_new ();
+ ret = dict_get_int32_sizen(local->dict, "max_count", &max_count);
+ if (ret < 0) {
+ gf_log("cli", GF_LOG_ERROR, "failed to get max_count");
+ goto out;
+ }
- ret = dict_unserialize (rsp.dict.dict_val,
- rsp.dict.dict_len,
- &dict);
- if (ret < 0) {
- gf_log ("glusterd", GF_LOG_ERROR,
- "failed to "
- "unserialize req-buffer to dictionary");
- goto out;
- }
+ if (list_count == max_count) {
+ list_for_each_entry_safe(node, tmpnode, &local->dict_list, list)
+ {
+ dict = node->ptr;
+ print_quota_list_from_quotad(frame, dict);
+ list_node_del(node);
+ dict_unref(dict);
}
+ }
- ret = dict_get_str (dict, "volname", &volname);
+out:
+ /* Bad Fix: CLI holds the lock to process a command.
+ * When processing quota list command, below sequence of steps executed
+ * in the same thread and causing deadlock
+ *
+ * 1) CLI holds the lock
+ * 2) Send rpc_clnt_submit request to quotad for quota usage
+ * 3) If quotad is down, rpc_clnt_submit invokes cbk function with error
+ * 4) cbk function cli_quotad_getlimit_cbk invokes
+ * cli_cmd_broadcast_response which tries to hold lock to broadcast
+ * the results and hangs, because same thread has already holding
+ * the lock
+ *
+ * Broadcasting response in a separate thread which is not a
+ * good fix. This needs to be re-visted with better solution
+ */
+ if (ret == -1) {
+ ret = pthread_create(&th_id, NULL, cli_cmd_broadcast_response_detached,
+ (void *)-1);
if (ret)
- gf_log (THIS->name, GF_LOG_TRACE,
- "failed to get volname");
+ gf_log("cli", GF_LOG_ERROR, "pthread_create failed: %s",
+ strerror(errno));
+ } else {
+ cli_cmd_broadcast_response(ret);
+ }
+ gf_free_xdr_cli_rsp(rsp);
+
+ return ret;
+}
- ret = dict_get_str (dict, "limit_list", &limit_list);
- if (ret)
- gf_log (THIS->name, GF_LOG_TRACE,
- "failed to get limit_list");
+static int
+cli_quotad_getlimit(call_frame_t *frame, xlator_t *this, void *data)
+{
+ gf_cli_req req = {{
+ 0,
+ }};
+ int ret = 0;
+ dict_t *dict = NULL;
- ret = dict_get_int32 (dict, "type", &type);
- if (ret)
- gf_log (THIS->name, GF_LOG_TRACE,
- "failed to get type");
+ if (!frame || !this || !data) {
+ ret = -1;
+ goto out;
+ }
- if (type == GF_QUOTA_OPTION_TYPE_LIST) {
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_vol_quota_limit_list
- (volname, limit_list, rsp.op_ret,
- rsp.op_errno, rsp.op_errstr);
- if (ret)
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
- goto out;
+ dict = data;
+ ret = add_cli_cmd_timeout_to_dict(dict);
- }
-#endif
- if (limit_list) {
- gf_cli3_1_print_limit_list (volname,
- limit_list,
- rsp.op_errstr);
- } else {
- gf_log ("cli", GF_LOG_INFO, "Received resp to quota "
- "command ");
- if (rsp.op_errstr)
- snprintf (msg, sizeof (msg), "%s",
- rsp.op_errstr);
- }
- } else {
- gf_log ("cli", GF_LOG_INFO, "Received resp to quota command ");
- if (rsp.op_errstr)
- snprintf (msg, sizeof (msg), "%s", rsp.op_errstr);
- else
- snprintf (msg, sizeof (msg), "successful");
- }
+ ret = dict_allocate_and_serialize(dict, &req.dict.dict_val,
+ &req.dict.dict_len);
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_ERROR, DICT_SERIALIZE_FAIL);
-#if (HAVE_LIB_XML)
-xml_output:
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_str ("volQuota", msg, rsp.op_ret,
- rsp.op_errno, rsp.op_errstr);
- if (ret)
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
- goto out;
- }
-#endif
+ goto out;
+ }
- if (strlen (msg) > 0) {
- if (rsp.op_ret)
- cli_err ("%s", msg);
- else
- cli_out ("%s", msg);
- }
+ ret = cli_cmd_submit(global_quotad_rpc, &req, frame, &cli_quotad_clnt,
+ GF_AGGREGATOR_GETLIMIT, NULL, this,
+ cli_quotad_getlimit_cbk, (xdrproc_t)xdr_gf_cli_req);
- ret = rsp.op_ret;
out:
- cli_cmd_broadcast_response (ret);
+ GF_FREE(req.dict.dict_val);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
+ return ret;
+}
- if (rsp.dict.dict_val)
- free (rsp.dict.dict_val);
+static void
+gf_cli_quota_list(cli_local_t *local, char *volname, dict_t *dict,
+ char *default_sl, int count, int op_ret, int op_errno,
+ char *op_errstr)
+{
+ if (!cli_cmd_connected())
+ goto out;
- return ret;
+ if (count > 0) {
+ GF_VALIDATE_OR_GOTO("cli", volname, out);
+
+ gf_cli_print_limit_list_from_dict(local, volname, dict, default_sl,
+ count, op_ret, op_errno, op_errstr);
+ }
+out:
+ return;
}
-int
-gf_cli3_1_getspec_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
+static int
+gf_cli_quota_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
{
- gf_getspec_rsp rsp = {0,};
- int ret = -1;
- char *spec = NULL;
-
- if (-1 == req->rpc_status) {
- goto out;
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ dict_t *dict = NULL;
+ char *volname = NULL;
+ int32_t type = 0;
+ call_frame_t *frame = NULL;
+ char *default_sl = NULL;
+ cli_local_t *local = NULL;
+ char *default_sl_dup = NULL;
+ int32_t entry_count = 0;
+
+ GF_ASSERT(myframe);
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ frame = myframe;
+
+ GF_ASSERT(frame->local);
+
+ local = frame->local;
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(frame->this->name, GF_LOG_ERROR, XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ if (rsp.op_ret) {
+ ret = -1;
+ if (global_state->mode & GLUSTER_MODE_XML)
+ goto xml_output;
+
+ if (strcmp(rsp.op_errstr, "")) {
+ cli_err("quota command failed : %s", rsp.op_errstr);
+ if (rsp.op_ret == -ENOENT)
+ cli_err("please enter the path relative to the volume");
+ } else {
+ cli_err("quota command : failed");
}
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_getspec_rsp);
- if (ret < 0 || rsp.op_ret == -1) {
- gf_log ("", GF_LOG_ERROR, "error");
- goto out;
+ goto out;
+ }
+
+ if (rsp.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new();
+
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
+ if (ret < 0) {
+ gf_log("cli", GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
+ goto out;
}
+ }
- gf_log ("cli", GF_LOG_INFO, "Received resp to getspec");
+ gf_log("cli", GF_LOG_DEBUG, "Received resp to quota command");
- spec = GF_MALLOC (rsp.op_ret + 1, cli_mt_char);
- if (!spec) {
- gf_log("", GF_LOG_ERROR, "out of memory");
- goto out;
+ ret = dict_get_str_sizen(dict, "default-soft-limit", &default_sl);
+ if (ret)
+ gf_log(frame->this->name, GF_LOG_TRACE,
+ "failed to get default soft limit");
+
+ // default-soft-limit is part of rsp_dict only iff we sent
+ // GLUSTER_CLI_QUOTA with type being GF_QUOTA_OPTION_TYPE_LIST
+ if (default_sl) {
+ default_sl_dup = gf_strdup(default_sl);
+ if (!default_sl_dup) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_dynstr_sizen(local->dict, "default-soft-limit",
+ default_sl_dup);
+ if (ret) {
+ gf_log(frame->this->name, GF_LOG_TRACE,
+ "failed to set default soft limit");
+ GF_FREE(default_sl_dup);
}
- memcpy (spec, rsp.spec, rsp.op_ret);
- spec[rsp.op_ret] = '\0';
- cli_out ("%s", spec);
- GF_FREE (spec);
+ }
- ret = 0;
+ ret = dict_get_str_sizen(dict, "volname", &volname);
+ if (ret)
+ gf_log(frame->this->name, GF_LOG_ERROR, "failed to get volname");
-out:
- cli_cmd_broadcast_response (ret);
- return ret;
-}
+ ret = dict_get_int32_sizen(dict, "type", &type);
+ if (ret)
+ gf_log(frame->this->name, GF_LOG_TRACE, "failed to get type");
-int
-gf_cli3_1_pmap_b2p_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
-{
- pmap_port_by_brick_rsp rsp = {0,};
- int ret = -1;
- char *spec = NULL;
+ ret = dict_get_int32_sizen(dict, "count", &entry_count);
+ if (ret)
+ gf_log(frame->this->name, GF_LOG_TRACE, "failed to get count");
- if (-1 == req->rpc_status) {
- goto out;
- }
+ if ((type == GF_QUOTA_OPTION_TYPE_LIST) ||
+ (type == GF_QUOTA_OPTION_TYPE_LIST_OBJECTS)) {
+ gf_cli_quota_list(local, volname, dict, default_sl, entry_count,
+ rsp.op_ret, rsp.op_errno, rsp.op_errstr);
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_pmap_port_by_brick_rsp);
- if (ret < 0 || rsp.op_ret == -1) {
- gf_log ("", GF_LOG_ERROR, "error");
- goto out;
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_vol_quota_limit_list_end(local);
+ if (ret < 0) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ }
+ goto out;
}
+ }
- gf_log ("cli", GF_LOG_INFO, "Received resp to pmap b2p");
+xml_output:
- cli_out ("%d", rsp.port);
- GF_FREE (spec);
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_str("volQuota", NULL, rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto out;
+ }
- ret = rsp.op_ret;
+ if (!rsp.op_ret && type != GF_QUOTA_OPTION_TYPE_LIST &&
+ type != GF_QUOTA_OPTION_TYPE_LIST_OBJECTS)
+ cli_out("volume quota : success");
+ ret = rsp.op_ret;
out:
- cli_cmd_broadcast_response (ret);
- return ret;
+
+ if ((type == GF_QUOTA_OPTION_TYPE_LIST) ||
+ (type == GF_QUOTA_OPTION_TYPE_LIST_OBJECTS)) {
+ gluster_remove_auxiliary_mount(volname);
+ }
+
+ cli_cmd_broadcast_response(ret);
+ if (dict)
+ dict_unref(dict);
+
+ gf_free_xdr_cli_rsp(rsp);
+
+ return ret;
}
+static int
+gf_cli_getspec_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf_getspec_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ char *spec = NULL;
+
+ GF_ASSERT(myframe);
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_getspec_rsp);
+ if (ret < 0) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ if (rsp.op_ret == -1) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ "getspec failed");
+ ret = -1;
+ goto out;
+ }
+
+ gf_log("cli", GF_LOG_INFO, "Received resp to getspec");
+
+ spec = GF_MALLOC(rsp.op_ret + 1, cli_mt_char);
+ if (!spec) {
+ gf_log("", GF_LOG_ERROR, "out of memory");
+ ret = -1;
+ goto out;
+ }
+ memcpy(spec, rsp.spec, rsp.op_ret);
+ spec[rsp.op_ret] = '\0';
+ cli_out("%s", spec);
+ GF_FREE(spec);
+
+ ret = 0;
-int32_t
-gf_cli3_1_probe (call_frame_t *frame, xlator_t *this,
- void *data)
+out:
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_getspec_rsp(rsp);
+ return ret;
+}
+
+static int
+gf_cli_pmap_b2p_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
{
- gf1_cli_probe_req req = {0,};
- int ret = 0;
- dict_t *dict = NULL;
- char *hostname = NULL;
- int port = 0;
+ pmap_port_by_brick_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ char *spec = NULL;
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
- }
+ GF_ASSERT(myframe);
- dict = data;
- ret = dict_get_str (dict, "hostname", &hostname);
- if (ret)
- goto out;
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
- ret = dict_get_int32 (dict, "port", &port);
- if (ret)
- port = CLI_GLUSTERD_PORT;
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_pmap_port_by_brick_rsp);
+ if (ret < 0) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ if (rsp.op_ret == -1) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ "pump_b2p failed");
+ ret = -1;
+ goto out;
+ }
- req.hostname = hostname;
- req.port = port;
+ gf_log("cli", GF_LOG_INFO, "Received resp to pmap b2p");
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_PROBE, NULL,
- this, gf_cli3_1_probe_cbk,
- (xdrproc_t)xdr_gf1_cli_probe_req);
+ cli_out("%d", rsp.port);
+ GF_FREE(spec);
+
+ ret = rsp.op_ret;
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ cli_cmd_broadcast_response(ret);
+ return ret;
}
-int32_t
-gf_cli3_1_deprobe (call_frame_t *frame, xlator_t *this,
- void *data)
+static int32_t
+gf_cli_probe(call_frame_t *frame, xlator_t *this, void *data)
{
- gf1_cli_deprobe_req req = {0,};
- int ret = 0;
- dict_t *dict = NULL;
- char *hostname = NULL;
- int port = 0;
- int flags = 0;
+ gf_cli_req req = {
+ {
+ 0,
+ },
+ };
+ int ret = 0;
+ dict_t *dict = NULL;
+ int port = 0;
+
+ if (!frame || !this || !data) {
+ ret = -1;
+ goto out;
+ }
+
+ dict = data;
+
+ ret = dict_get_int32_sizen(dict, "port", &port);
+ if (ret) {
+ ret = dict_set_int32_sizen(dict, "port", CLI_GLUSTERD_PORT);
+ if (ret)
+ goto out;
+ }
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
- }
+ ret = cli_to_glusterd(&req, frame, gf_cli_probe_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict, GLUSTER_CLI_PROBE,
+ this, cli_rpc_prog, NULL);
- dict = data;
- ret = dict_get_str (dict, "hostname", &hostname);
- if (ret)
- goto out;
+out:
+ GF_FREE(req.dict.dict_val);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
+
+ return ret;
+}
- ret = dict_get_int32 (dict, "port", &port);
+static int32_t
+gf_cli_deprobe(call_frame_t *frame, xlator_t *this, void *data)
+{
+ gf_cli_req req = {
+ {
+ 0,
+ },
+ };
+ int ret = 0;
+ dict_t *dict = NULL;
+ int port = 0;
+ int flags = 0;
+
+ if (!frame || !this || !data) {
+ ret = -1;
+ goto out;
+ }
+
+ dict = data;
+ ret = dict_get_int32_sizen(dict, "port", &port);
+ if (ret) {
+ ret = dict_set_int32_sizen(dict, "port", CLI_GLUSTERD_PORT);
if (ret)
- port = CLI_GLUSTERD_PORT;
+ goto out;
+ }
- ret = dict_get_int32 (dict, "flags", &flags);
+ ret = dict_get_int32_sizen(dict, "flags", &flags);
+ if (ret) {
+ ret = dict_set_int32_sizen(dict, "flags", 0);
if (ret)
- flags = 0;
+ goto out;
+ }
- req.hostname = hostname;
- req.port = port;
- req.flags = flags;
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_DEPROBE, NULL,
- this, gf_cli3_1_deprobe_cbk,
- (xdrproc_t)xdr_gf1_cli_deprobe_req);
+ ret = cli_to_glusterd(&req, frame, gf_cli_deprobe_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict, GLUSTER_CLI_DEPROBE,
+ this, cli_rpc_prog, NULL);
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ GF_FREE(req.dict.dict_val);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
+
+ return ret;
}
-int32_t
-gf_cli3_1_list_friends (call_frame_t *frame, xlator_t *this,
- void *data)
+static int32_t
+gf_cli_list_friends(call_frame_t *frame, xlator_t *this, void *data)
{
- gf1_cli_peer_list_req req = {0,};
- int ret = 0;
+ gf1_cli_peer_list_req req = {
+ 0,
+ };
+ int ret = 0;
+ unsigned long flags = 0;
+
+ if (!frame || !this) {
+ ret = -1;
+ goto out;
+ }
+
+ GF_ASSERT(frame->local == NULL);
+
+ flags = (long)data;
+ req.flags = flags;
+ frame->local = (void *)flags;
+ ret = cli_cmd_submit(
+ NULL, &req, frame, cli_rpc_prog, GLUSTER_CLI_LIST_FRIENDS, NULL, this,
+ gf_cli_list_friends_cbk, (xdrproc_t)xdr_gf1_cli_peer_list_req);
- if (!frame || !this) {
- ret = -1;
- goto out;
- }
+out:
+ if (ret && frame) {
+ /*
+ * If everything goes fine, gf_cli_list_friends_cbk()
+ * [invoked through cli_cmd_submit()]resets the
+ * frame->local to NULL. In case cli_cmd_submit()
+ * fails in between, RESET frame->local here.
+ */
+ frame->local = NULL;
+ }
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
+ return ret;
+}
- req.flags = GF_CLI_LIST_ALL;
+static int32_t
+gf_cli_get_state(call_frame_t *frame, xlator_t *this, void *data)
+{
+ gf_cli_req req = {
+ {
+ 0,
+ },
+ };
+ int ret = 0;
+ dict_t *dict = NULL;
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_LIST_FRIENDS, NULL,
- this, gf_cli3_1_list_friends_cbk,
- (xdrproc_t) xdr_gf1_cli_peer_list_req);
+ dict = data;
-out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ ret = cli_to_glusterd(&req, frame, gf_cli_get_state_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict,
+ GLUSTER_CLI_GET_STATE, this, cli_rpc_prog, NULL);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
+ GF_FREE(req.dict.dict_val);
+
+ return ret;
}
-int32_t
-gf_cli3_1_get_next_volume (call_frame_t *frame, xlator_t *this,
- void *data)
+static int32_t
+gf_cli_get_next_volume(call_frame_t *frame, xlator_t *this, void *data)
{
+ int ret = 0;
+ cli_cmd_volume_get_ctx_t *ctx = NULL;
+ cli_local_t *local = NULL;
- int ret = 0;
- cli_cmd_volume_get_ctx_t *ctx = NULL;
- cli_local_t *local = NULL;
+ if (!frame || !this || !data) {
+ ret = -1;
+ goto out;
+ }
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
- }
-
- ctx = data;
- local = frame->local;
+ ctx = data;
+ local = frame->local;
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_vol_info_begin (local, 0, 0, "");
- if (ret) {
- gf_log ("cli", GF_LOG_ERROR, "Error outputting to xml");
- goto out;
- }
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_vol_info_begin(local, 0, 0, "");
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto out;
}
-#endif
+ }
- ret = gf_cli3_1_get_volume (frame, this, data);
+ ret = gf_cli_get_volume(frame, this, data);
+ if (!local || !local->get_vol.volname) {
+ if ((global_state->mode & GLUSTER_MODE_XML))
+ goto end_xml;
- if (!local || !local->get_vol.volname) {
-#if (HAVE_LIB_XML)
- if ((global_state->mode & GLUSTER_MODE_XML))
- goto end_xml;
-#endif
- cli_out ("No volumes present");
- goto out;
- }
+ cli_err("No volumes present");
+ goto out;
+ }
+ ctx->volname = local->get_vol.volname;
+ while (ctx->volname) {
+ ret = gf_cli_get_volume(frame, this, ctx);
+ if (ret)
+ goto out;
ctx->volname = local->get_vol.volname;
+ }
- while (ctx->volname) {
- ret = gf_cli3_1_get_volume (frame, this, ctx);
- if (ret)
- goto out;
- ctx->volname = local->get_vol.volname;
- }
-
-#if (HAVE_LIB_XML)
end_xml:
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_vol_info_end (local);
- if (ret)
- gf_log ("cli", GF_LOG_ERROR, "Error outputting to xml");
- }
-#endif
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_vol_info_end(local);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ }
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
+ return ret;
}
-int32_t
-gf_cli3_1_get_volume (call_frame_t *frame, xlator_t *this,
- void *data)
+static int32_t
+gf_cli_get_volume(call_frame_t *frame, xlator_t *this, void *data)
{
- gf_cli_req req = {{0,}};
- int ret = 0;
- cli_cmd_volume_get_ctx_t *ctx = NULL;
- dict_t *dict = NULL;
- int32_t flags = 0;
+ gf_cli_req req = {{
+ 0,
+ }};
+ int ret = 0;
+ cli_cmd_volume_get_ctx_t *ctx = NULL;
+ dict_t *dict = NULL;
+ int32_t flags = 0;
+
+ if (!this || !data) {
+ ret = -1;
+ goto out;
+ }
+
+ ctx = data;
+
+ dict = dict_new();
+ if (!dict) {
+ gf_log(THIS->name, GF_LOG_ERROR, "Failed to create the dict");
+ ret = -1;
+ goto out;
+ }
+
+ if (ctx->volname) {
+ ret = dict_set_str_sizen(dict, "volname", ctx->volname);
+ if (ret)
+ goto out;
+ }
+
+ flags = ctx->flags;
+ ret = dict_set_int32_sizen(dict, "flags", flags);
+ if (ret) {
+ gf_log(frame->this->name, GF_LOG_ERROR, "failed to set flags");
+ goto out;
+ }
+
+ ret = dict_allocate_and_serialize(dict, &req.dict.dict_val,
+ &req.dict.dict_len);
+ if (ret) {
+ gf_log(frame->this->name, GF_LOG_ERROR, DICT_SERIALIZE_FAIL);
+ goto out;
+ }
+
+ ret = cli_cmd_submit(NULL, &req, frame, cli_rpc_prog,
+ GLUSTER_CLI_GET_VOLUME, NULL, this,
+ gf_cli_get_volume_cbk, (xdrproc_t)xdr_gf_cli_req);
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
- }
+out:
+ if (dict)
+ dict_unref(dict);
- ctx = data;
+ GF_FREE(req.dict.dict_val);
- dict = dict_new ();
- if (!dict)
- goto out;
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
+ return ret;
+}
- if (ctx->volname) {
- ret = dict_set_str (dict, "volname", ctx->volname);
- if (ret)
- goto out;
- }
+static int32_t
+gf_cli3_1_uuid_get(call_frame_t *frame, xlator_t *this, void *data)
+{
+ gf_cli_req req = {{
+ 0,
+ }};
+ int ret = 0;
+ dict_t *dict = NULL;
+
+ dict = data;
+ ret = cli_to_glusterd(&req, frame, gf_cli3_1_uuid_get_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict, GLUSTER_CLI_UUID_GET,
+ this, cli_rpc_prog, NULL);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
+ GF_FREE(req.dict.dict_val);
+ return ret;
+}
- flags = ctx->flags;
- ret = dict_set_int32 (dict, "flags", flags);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "failed to set flags");
- goto out;
- }
+static int32_t
+gf_cli3_1_uuid_reset(call_frame_t *frame, xlator_t *this, void *data)
+{
+ gf_cli_req req = {{
+ 0,
+ }};
+ int ret = 0;
+ dict_t *dict = NULL;
+
+ dict = data;
+ ret = cli_to_glusterd(&req, frame, gf_cli3_1_uuid_reset_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict,
+ GLUSTER_CLI_UUID_RESET, this, cli_rpc_prog, NULL);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
+ GF_FREE(req.dict.dict_val);
+ return ret;
+}
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
+static int32_t
+gf_cli_create_volume(call_frame_t *frame, xlator_t *this, void *data)
+{
+ gf_cli_req req = {{
+ 0,
+ }};
+ int ret = 0;
+ dict_t *dict = NULL;
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_GET_VOLUME, NULL,
- this, gf_cli3_1_get_volume_cbk,
- (xdrproc_t) xdr_gf_cli_req);
+ dict = data;
-out:
- if (dict)
- dict_unref (dict);
+ ret = cli_to_glusterd(&req, frame, gf_cli_create_volume_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict,
+ GLUSTER_CLI_CREATE_VOLUME, this, cli_rpc_prog, NULL);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
- if (req.dict.dict_val)
- GF_FREE (req.dict.dict_val);
+ GF_FREE(req.dict.dict_val);
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ return ret;
}
+static int32_t
+gf_cli_delete_volume(call_frame_t *frame, xlator_t *this, void *data)
+{
+ gf_cli_req req = {{
+ 0,
+ }};
+ int ret = 0;
+ dict_t *dict = NULL;
+
+ dict = data;
-int32_t
-gf_cli3_1_create_volume (call_frame_t *frame, xlator_t *this,
- void *data)
+ ret = cli_to_glusterd(&req, frame, gf_cli_delete_volume_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict,
+ GLUSTER_CLI_DELETE_VOLUME, this, cli_rpc_prog, NULL);
+ GF_FREE(req.dict.dict_val);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
+
+ return ret;
+}
+
+static int32_t
+gf_cli_start_volume(call_frame_t *frame, xlator_t *this, void *data)
{
- gf_cli_req req = {{0,}};
- int ret = 0;
- dict_t *dict = NULL;
- cli_local_t *local = NULL;
+ gf_cli_req req = {{
+ 0,
+ }};
+ int ret = 0;
+ dict_t *dict = NULL;
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
- }
+ dict = data;
- dict = dict_ref ((dict_t *)data);
+ ret = cli_to_glusterd(&req, frame, gf_cli_start_volume_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict,
+ GLUSTER_CLI_START_VOLUME, this, cli_rpc_prog, NULL);
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to get serialized length of dict");
- goto out;
- }
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
+ GF_FREE(req.dict.dict_val);
- local = cli_local_get ();
+ return ret;
+}
- if (local) {
- local->dict = dict_ref (dict);
- frame->local = local;
- }
+static int32_t
+gf_cli_stop_volume(call_frame_t *frame, xlator_t *this, void *data)
+{
+ gf_cli_req req = {{
+ 0,
+ }};
+ int ret = 0;
+ dict_t *dict = data;
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_CREATE_VOLUME, NULL,
- this, gf_cli3_1_create_volume_cbk,
- (xdrproc_t) xdr_gf_cli_req);
+ dict = data;
+ ret = cli_to_glusterd(&req, frame, gf_cli_stop_volume_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict,
+ GLUSTER_CLI_STOP_VOLUME, this, cli_rpc_prog, NULL);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
+ GF_FREE(req.dict.dict_val);
+ return ret;
+}
-out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+static int32_t
+gf_cli_defrag_volume(call_frame_t *frame, xlator_t *this, void *data)
+{
+ gf_cli_req req = {{
+ 0,
+ }};
+ int ret = 0;
+ dict_t *dict = NULL;
- if (dict)
- dict_unref (dict);
+ dict = data;
- if (req.dict.dict_val) {
- GF_FREE (req.dict.dict_val);
- }
+ ret = cli_to_glusterd(&req, frame, gf_cli_defrag_volume_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict,
+ GLUSTER_CLI_DEFRAG_VOLUME, this, cli_rpc_prog, NULL);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
+ GF_FREE(req.dict.dict_val);
- return ret;
+ return ret;
}
-int32_t
-gf_cli3_1_delete_volume (call_frame_t *frame, xlator_t *this,
- void *data)
+static int32_t
+gf_cli_rename_volume(call_frame_t *frame, xlator_t *this, void *data)
{
- gf_cli_req req = {{0,}};
- int ret = 0;
- cli_local_t *local = NULL;
- dict_t *dict = NULL;
+ gf_cli_req req = {{
+ 0,
+ }};
+ int ret = 0;
+ dict_t *dict = NULL;
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
- }
+ if (!frame || !this || !data) {
+ ret = -1;
+ goto out;
+ }
- local = cli_local_get ();
+ dict = data;
- dict = dict_new ();
- ret = dict_set_str (dict, "volname", data);
- if (ret) {
- gf_log (THIS->name, GF_LOG_WARNING, "dict set failed");
- goto out;
- }
- if (local) {
- local->dict = dict_ref (dict);
- frame->local = local;
- }
+ ret = dict_allocate_and_serialize(dict, &req.dict.dict_val,
+ &req.dict.dict_len);
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_ERROR, DICT_SERIALIZE_FAIL);
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to get serialize dict");
- goto out;
- }
+ goto out;
+ }
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_DELETE_VOLUME, NULL,
- this, gf_cli3_1_delete_volume_cbk,
- (xdrproc_t)xdr_gf_cli_req);
+ ret = cli_cmd_submit(NULL, &req, frame, cli_rpc_prog,
+ GLUSTER_CLI_RENAME_VOLUME, NULL, this,
+ gf_cli_rename_volume_cbk, (xdrproc_t)xdr_gf_cli_req);
out:
- if (dict)
- dict_unref (dict);
- if (req.dict.dict_val)
- GF_FREE (req.dict.dict_val);
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ GF_FREE(req.dict.dict_val);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
- return ret;
+ return ret;
}
-int32_t
-gf_cli3_1_start_volume (call_frame_t *frame, xlator_t *this,
- void *data)
+static int32_t
+gf_cli_reset_volume(call_frame_t *frame, xlator_t *this, void *data)
{
- gf_cli_req req = {{0,}};
- int ret = 0;
- cli_local_t *local = NULL;
- dict_t *dict = NULL;
+ gf_cli_req req = {{
+ 0,
+ }};
+ int ret = 0;
+ dict_t *dict = NULL;
+
+ dict = data;
+
+ ret = cli_to_glusterd(&req, frame, gf_cli_reset_volume_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict,
+ GLUSTER_CLI_RESET_VOLUME, this, cli_rpc_prog, NULL);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
+ GF_FREE(req.dict.dict_val);
+ return ret;
+}
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
- }
+static int32_t
+gf_cli_ganesha(call_frame_t *frame, xlator_t *this, void *data)
+{
+ gf_cli_req req = {{
+ 0,
+ }};
+ int ret = 0;
+ dict_t *dict = NULL;
- dict = data;
- local = cli_local_get ();
+ dict = data;
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to serialize dict");
- goto out;
- }
+ ret = cli_to_glusterd(&req, frame, gf_cli_ganesha_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict, GLUSTER_CLI_GANESHA,
+ this, cli_rpc_prog, NULL);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
+ return ret;
+}
- if (local) {
- local->dict = dict_ref (dict);
- frame->local = local;
- }
+static int32_t
+gf_cli_set_volume(call_frame_t *frame, xlator_t *this, void *data)
+{
+ gf_cli_req req = {{
+ 0,
+ }};
+ int ret = 0;
+ dict_t *dict = NULL;
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_START_VOLUME, NULL,
- this, gf_cli3_1_start_volume_cbk,
- (xdrproc_t) xdr_gf_cli_req);
+ dict = data;
-out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ ret = cli_to_glusterd(&req, frame, gf_cli_set_volume_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict,
+ GLUSTER_CLI_SET_VOLUME, this, cli_rpc_prog, NULL);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
+ GF_FREE(req.dict.dict_val);
- return ret;
+ return ret;
}
int32_t
-gf_cli3_1_stop_volume (call_frame_t *frame, xlator_t *this,
- void *data)
+gf_cli_add_brick(call_frame_t *frame, xlator_t *this, void *data)
{
- gf_cli_req req = {{0,}};
- int ret = 0;
- cli_local_t *local = NULL;
- dict_t *dict = data;
+ gf_cli_req req = {{
+ 0,
+ }};
+ int ret = 0;
+ dict_t *dict = NULL;
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
- }
+ dict = data;
- local = cli_local_get ();
- dict = data;
+ ret = cli_to_glusterd(&req, frame, gf_cli_add_brick_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict,
+ GLUSTER_CLI_ADD_BRICK, this, cli_rpc_prog, NULL);
- if (local) {
- local->dict = dict_ref (dict);
- frame->local = local;
- }
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *) &req.dict.dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to serialize the data");
+ GF_FREE(req.dict.dict_val);
- goto out;
- }
-
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_STOP_VOLUME, NULL,
- this, gf_cli3_1_stop_volume_cbk,
- (xdrproc_t) xdr_gf_cli_req);
-
-out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
-
- return ret;
+ return ret;
}
-int32_t
-gf_cli3_1_defrag_volume (call_frame_t *frame, xlator_t *this,
- void *data)
-{
- gf_cli_req req = {{0,}};
- int ret = 0;
- cli_local_t *local = NULL;
- char *volname = NULL;
- char *cmd_str = NULL;
- dict_t *dict = NULL;
- gf_cli_defrag_type cmd = 0;
- dict_t *req_dict = NULL;
-
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
- }
-
- dict = data;
-
- ret = dict_get_str (dict, "volname", &volname);
- if (ret)
- gf_log ("", GF_LOG_DEBUG, "error");
+static int32_t
+gf_cli_remove_brick(call_frame_t *frame, xlator_t *this, void *data)
+{
+ gf_cli_req req = {{
+ 0,
+ }};
+ ;
+ gf_cli_req status_req = {{
+ 0,
+ }};
+ ;
+ int ret = 0;
+ dict_t *dict = NULL;
+ int32_t command = 0;
+ int32_t cmd = 0;
+
+ if (!frame || !this) {
+ ret = -1;
+ goto out;
+ }
+
+ dict = data;
+
+ ret = dict_get_int32_sizen(dict, "command", &command);
+ if (ret)
+ goto out;
+
+ if ((command != GF_OP_CMD_STATUS) && (command != GF_OP_CMD_STOP)) {
+ ret = cli_to_glusterd(
+ &req, frame, gf_cli_remove_brick_cbk, (xdrproc_t)xdr_gf_cli_req,
+ dict, GLUSTER_CLI_REMOVE_BRICK, this, cli_rpc_prog, NULL);
+ } else {
+ /* Need rebalance status to be sent :-) */
+ if (command == GF_OP_CMD_STATUS)
+ cmd |= GF_DEFRAG_CMD_STATUS;
+ else
+ cmd |= GF_DEFRAG_CMD_STOP;
- ret = dict_get_str (dict, "command", &cmd_str);
+ ret = dict_set_int32_sizen(dict, "rebalance-command", (int32_t)cmd);
if (ret) {
- gf_log ("", GF_LOG_DEBUG, "error");
- goto out;
+ gf_log(this->name, GF_LOG_ERROR, "Failed to set dict");
+ goto out;
}
- if (strcmp (cmd_str, "start") == 0) {
- cmd = GF_DEFRAG_CMD_START;
- ret = dict_get_str (dict, "option", &cmd_str);
- if (!ret) {
- if (strcmp (cmd_str, "force") == 0) {
- cmd = GF_DEFRAG_CMD_START_FORCE;
- }
- }
- goto done;
- }
+ ret = cli_to_glusterd(
+ &status_req, frame, gf_cli3_remove_brick_status_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict, GLUSTER_CLI_DEFRAG_VOLUME, this,
+ cli_rpc_prog, NULL);
+ }
- if (strcmp (cmd_str, "fix-layout") == 0) {
- cmd = GF_DEFRAG_CMD_START_LAYOUT_FIX;
- goto done;
- }
- if (strcmp (cmd_str, "stop") == 0) {
- cmd = GF_DEFRAG_CMD_STOP;
- goto done;
- }
- if (strcmp (cmd_str, "status") == 0) {
- cmd = GF_DEFRAG_CMD_STATUS;
- }
+out:
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
-done:
- local = cli_local_get ();
+ GF_FREE(req.dict.dict_val);
- req_dict = dict_new ();
- if (!req_dict) {
- ret = -1;
- goto out;
- }
+ GF_FREE(status_req.dict.dict_val);
- ret = dict_set_str (req_dict, "volname", volname);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "Failed to set dict");
- goto out;
- }
+ return ret;
+}
- ret = dict_set_int32 (req_dict, "rebalance-command", (int32_t) cmd);
+static int32_t
+gf_cli_reset_brick(call_frame_t *frame, xlator_t *this, void *data)
+{
+ gf_cli_req req = {{
+ 0,
+ }};
+ int ret = 0;
+ dict_t *dict = NULL;
+ char *dst_brick = NULL;
+ char *op = NULL;
+
+ if (!frame || !this || !data) {
+ ret = -1;
+ goto out;
+ }
+
+ dict = data;
+
+ ret = dict_get_str_sizen(dict, "operation", &op);
+ if (ret) {
+ gf_log(this->name, GF_LOG_DEBUG, "dict_get on operation failed");
+ goto out;
+ }
+
+ if (!strcmp(op, "GF_RESET_OP_COMMIT") ||
+ !strcmp(op, "GF_RESET_OP_COMMIT_FORCE")) {
+ ret = dict_get_str_sizen(dict, "dst-brick", &dst_brick);
if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "Failed to set dict");
- goto out;
+ gf_log(this->name, GF_LOG_DEBUG, "dict_get on dst-brick failed");
+ goto out;
}
+ }
- if (local) {
- local->dict = dict_ref (req_dict);
- frame->local = local;
- }
+ ret = cli_to_glusterd(&req, frame, gf_cli_reset_brick_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict,
+ GLUSTER_CLI_RESET_BRICK, this, cli_rpc_prog, NULL);
- ret = dict_allocate_and_serialize (req_dict,
- &req.dict.dict_val,
- (size_t *) &req.dict.dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to serialize the data");
+out:
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
- goto out;
- }
+ GF_FREE(req.dict.dict_val);
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_DEFRAG_VOLUME, NULL,
- this, gf_cli3_1_defrag_volume_cbk,
- (xdrproc_t) xdr_gf_cli_req);
+ return ret;
+}
+
+static int32_t
+gf_cli_replace_brick(call_frame_t *frame, xlator_t *this, void *data)
+{
+ gf_cli_req req = {{
+ 0,
+ }};
+ int ret = 0;
+ dict_t *dict = NULL;
+ int32_t op = 0;
+
+ if (!frame || !this || !data) {
+ ret = -1;
+ goto out;
+ }
+
+ dict = data;
+
+ ret = dict_get_int32_sizen(dict, "operation", &op);
+ if (ret) {
+ gf_log(this->name, GF_LOG_DEBUG, "dict_get on operation failed");
+ goto out;
+ }
+
+ ret = cli_to_glusterd(&req, frame, gf_cli_replace_brick_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict,
+ GLUSTER_CLI_REPLACE_BRICK, this, cli_rpc_prog, NULL);
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
- return ret;
+ GF_FREE(req.dict.dict_val);
+
+ return ret;
}
-int32_t
-gf_cli3_1_rename_volume (call_frame_t *frame, xlator_t *this,
- void *data)
+static int32_t
+gf_cli_log_rotate(call_frame_t *frame, xlator_t *this, void *data)
{
- gf_cli_req req = {{0,}};
- int ret = 0;
- dict_t *dict = NULL;
+ gf_cli_req req = {{
+ 0,
+ }};
+ int ret = 0;
+ dict_t *dict = NULL;
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
- }
+ dict = data;
- dict = data;
+ ret = cli_to_glusterd(&req, frame, gf_cli_log_rotate_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict,
+ GLUSTER_CLI_LOG_ROTATE, this, cli_rpc_prog, NULL);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *) &req.dict.dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to serialize the data");
+ GF_FREE(req.dict.dict_val);
+ return ret;
+}
- goto out;
- }
+static int32_t
+gf_cli_sync_volume(call_frame_t *frame, xlator_t *this, void *data)
+{
+ int ret = 0;
+ gf_cli_req req = {{
+ 0,
+ }};
+ dict_t *dict = NULL;
+
+ dict = data;
+ ret = cli_to_glusterd(&req, frame, gf_cli_sync_volume_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict,
+ GLUSTER_CLI_SYNC_VOLUME, this, cli_rpc_prog, NULL);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
+ GF_FREE(req.dict.dict_val);
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_RENAME_VOLUME, NULL,
- this, gf_cli3_1_rename_volume_cbk,
- (xdrproc_t) xdr_gf_cli_req);
+ return ret;
+}
+
+static int32_t
+gf_cli_getspec(call_frame_t *frame, xlator_t *this, void *data)
+{
+ gf_getspec_req req = {
+ 0,
+ };
+ int ret = 0;
+ dict_t *dict = NULL;
+ dict_t *op_dict = NULL;
+
+ if (!frame || !this) {
+ ret = -1;
+ goto out;
+ }
+
+ dict = data;
+
+ ret = dict_get_str_sizen(dict, "volid", &req.key);
+ if (ret)
+ goto out;
+
+ op_dict = dict_new();
+ if (!op_dict) {
+ ret = -1;
+ goto out;
+ }
+
+ // Set the supported min and max op-versions, so glusterd can make a
+ // decision
+ ret = dict_set_int32_sizen(op_dict, "min-op-version", GD_OP_VERSION_MIN);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "Failed to set min-op-version in request dict");
+ goto out;
+ }
+
+ ret = dict_set_int32_sizen(op_dict, "max-op-version", GD_OP_VERSION_MAX);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "Failed to set max-op-version in request dict");
+ goto out;
+ }
+
+ ret = dict_allocate_and_serialize(op_dict, &req.xdata.xdata_val,
+ &req.xdata.xdata_len);
+ if (ret < 0) {
+ gf_log(THIS->name, GF_LOG_ERROR, DICT_SERIALIZE_FAIL);
+ goto out;
+ }
+
+ ret = cli_cmd_submit(NULL, &req, frame, &cli_handshake_prog,
+ GF_HNDSK_GETSPEC, NULL, this, gf_cli_getspec_cbk,
+ (xdrproc_t)xdr_gf_getspec_req);
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ if (op_dict) {
+ dict_unref(op_dict);
+ }
+ GF_FREE(req.xdata.xdata_val);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
- return ret;
+ return ret;
}
-int32_t
-gf_cli3_1_reset_volume (call_frame_t *frame, xlator_t *this,
- void *data)
+static int32_t
+gf_cli_quota(call_frame_t *frame, xlator_t *this, void *data)
{
- gf_cli_req req = {{0,}};
- int ret = 0;
- dict_t *dict = NULL;
+ gf_cli_req req = {{
+ 0,
+ }};
+ int ret = 0;
+ dict_t *dict = NULL;
+
+ dict = data;
+
+ ret = cli_to_glusterd(&req, frame, gf_cli_quota_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict, GLUSTER_CLI_QUOTA,
+ this, cli_rpc_prog, NULL);
+ GF_FREE(req.dict.dict_val);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
+ return ret;
+}
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
- }
+static int32_t
+gf_cli_pmap_b2p(call_frame_t *frame, xlator_t *this, void *data)
+{
+ pmap_port_by_brick_req req = {
+ 0,
+ };
+ int ret = 0;
+ dict_t *dict = NULL;
- dict = data;
+ if (!frame || !this) {
+ ret = -1;
+ goto out;
+ }
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to get serialized length of dict");
- goto out;
- }
+ dict = data;
+
+ ret = dict_get_str_sizen(dict, "brick", &req.brick);
+ if (ret)
+ goto out;
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_RESET_VOLUME, NULL,
- this, gf_cli3_1_reset_volume_cbk,
- (xdrproc_t) xdr_gf_cli_req);
+ ret = cli_cmd_submit(NULL, &req, frame, &cli_pmap_prog, GF_PMAP_PORTBYBRICK,
+ NULL, this, gf_cli_pmap_b2p_cbk,
+ (xdrproc_t)xdr_pmap_port_by_brick_req);
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
- return ret;
+ return ret;
}
-int32_t
-gf_cli3_1_set_volume (call_frame_t *frame, xlator_t *this,
- void *data)
+static int
+gf_cli_fsm_log_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
{
- gf_cli_req req = {{0,}};
- int ret = 0;
- dict_t *dict = NULL;
+ gf1_cli_fsm_log_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ dict_t *dict = NULL;
+ int tr_count = 0;
+ char key[64] = {0};
+ int keylen;
+ int i = 0;
+ char *old_state = NULL;
+ char *new_state = NULL;
+ char *event = NULL;
+ char *time = NULL;
+
+ GF_ASSERT(myframe);
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf1_cli_fsm_log_rsp);
+ if (ret < 0) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ if (rsp.op_ret) {
+ if (strcmp(rsp.op_errstr, ""))
+ cli_err("%s", rsp.op_errstr);
+ cli_err("fsm log unsuccessful");
+ ret = rsp.op_ret;
+ goto out;
+ }
+
+ dict = dict_new();
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize(rsp.fsm_log.fsm_log_val, rsp.fsm_log.fsm_log_len,
+ &dict);
+
+ if (ret) {
+ cli_err(DICT_UNSERIALIZE_FAIL);
+ goto out;
+ }
+
+ ret = dict_get_int32_sizen(dict, "count", &tr_count);
+ if (!ret && tr_count)
+ cli_out("number of transitions: %d", tr_count);
+ else
+ cli_err("No transitions");
+ for (i = 0; i < tr_count; i++) {
+ keylen = snprintf(key, sizeof(key), "log%d-old-state", i);
+ ret = dict_get_strn(dict, key, keylen, &old_state);
+ if (ret)
+ goto out;
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
- }
+ keylen = snprintf(key, sizeof(key), "log%d-event", i);
+ ret = dict_get_strn(dict, key, keylen, &event);
+ if (ret)
+ goto out;
- dict = data;
+ keylen = snprintf(key, sizeof(key), "log%d-new-state", i);
+ ret = dict_get_strn(dict, key, keylen, &new_state);
+ if (ret)
+ goto out;
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to get serialized length of dict");
- goto out;
- }
+ keylen = snprintf(key, sizeof(key), "log%d-time", i);
+ ret = dict_get_strn(dict, key, keylen, &time);
+ if (ret)
+ goto out;
+ cli_out(
+ "Old State: [%s]\n"
+ "New State: [%s]\n"
+ "Event : [%s]\n"
+ "timestamp: [%s]\n",
+ old_state, new_state, event, time);
+ }
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_SET_VOLUME, NULL,
- this, gf_cli3_1_set_volume_cbk,
- (xdrproc_t) xdr_gf_cli_req);
+ ret = rsp.op_ret;
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ cli_cmd_broadcast_response(ret);
+ if (dict) {
+ dict_unref(dict);
+ }
+ gf_free_xdr_fsm_log_rsp(rsp);
- return ret;
+ return ret;
}
-int32_t
-gf_cli3_1_add_brick (call_frame_t *frame, xlator_t *this,
- void *data)
+static int32_t
+gf_cli_fsm_log(call_frame_t *frame, xlator_t *this, void *data)
+{
+ int ret = -1;
+ gf1_cli_fsm_log_req req = {
+ 0,
+ };
+
+ GF_ASSERT(frame);
+ GF_ASSERT(this);
+ GF_ASSERT(data);
+
+ if (!frame || !this || !data)
+ goto out;
+ req.name = data;
+ ret = cli_cmd_submit(NULL, &req, frame, cli_rpc_prog, GLUSTER_CLI_FSM_LOG,
+ NULL, this, gf_cli_fsm_log_cbk,
+ (xdrproc_t)xdr_gf1_cli_fsm_log_req);
+
+out:
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
+
+ return ret;
+}
+
+static int
+gf_cli_gsync_config_command(dict_t *dict)
{
- gf_cli_req req = {{0,}};
- int ret = 0;
- dict_t *dict = NULL;
- char *volname = NULL;
- int32_t count = 0;
+ runner_t runner = {
+ 0,
+ };
+ char *subop = NULL;
+ char *gwd = NULL;
+ char *slave = NULL;
+ char *confpath = NULL;
+ char *master = NULL;
+ char *op_name = NULL;
+ int ret = -1;
+ char conf_path[PATH_MAX] = "";
+
+ if (dict_get_str_sizen(dict, "subop", &subop) != 0)
+ return -1;
+
+ if (strcmp(subop, "get") != 0 && strcmp(subop, "get-all") != 0) {
+ cli_out(GEOREP " config updated successfully");
+ return 0;
+ }
+
+ if (dict_get_str_sizen(dict, "glusterd_workdir", &gwd) != 0 ||
+ dict_get_str_sizen(dict, "slave", &slave) != 0)
+ return -1;
+
+ if (dict_get_str_sizen(dict, "master", &master) != 0)
+ master = NULL;
+ if (dict_get_str_sizen(dict, "op_name", &op_name) != 0)
+ op_name = NULL;
+
+ ret = dict_get_str_sizen(dict, "conf_path", &confpath);
+ if (ret || !confpath) {
+ ret = snprintf(conf_path, sizeof(conf_path) - 1,
+ "%s/" GEOREP "/gsyncd_template.conf", gwd);
+ conf_path[ret] = '\0';
+ confpath = conf_path;
+ }
+
+ runinit(&runner);
+ runner_add_args(&runner, GSYNCD_PREFIX "/gsyncd", "-c", NULL);
+ runner_argprintf(&runner, "%s", confpath);
+ runner_argprintf(&runner, "--iprefix=%s", DATADIR);
+ if (master)
+ runner_argprintf(&runner, ":%s", master);
+ runner_add_arg(&runner, slave);
+ runner_argprintf(&runner, "--config-%s", subop);
+ if (op_name)
+ runner_add_arg(&runner, op_name);
+
+ return runner_run(&runner);
+}
- if (!frame || !this || !data) {
+static int
+gf_cli_print_status(char **title_values, gf_gsync_status_t **sts_vals,
+ int *spacing, int gsync_count, int number_of_fields,
+ int is_detail)
+{
+ int i = 0;
+ int j = 0;
+ int ret = 0;
+ int status_fields = 8; /* Indexed at 0 */
+ int total_spacing = 0;
+ char **output_values = NULL;
+ char *tmp = NULL;
+ char *hyphens = NULL;
+
+ /* calculating spacing for hyphens */
+ for (i = 0; i < number_of_fields; i++) {
+ /* Suppressing detail output for status */
+ if ((!is_detail) && (i > status_fields)) {
+ /* Suppressing detailed output for
+ * status */
+ continue;
+ }
+ spacing[i] += 3; /* Adding extra space to
+ distinguish between fields */
+ total_spacing += spacing[i];
+ }
+ total_spacing += 4; /* For the spacing between the fields */
+
+ /* char pointers for each field */
+ output_values = GF_MALLOC(number_of_fields * sizeof(char *),
+ gf_common_mt_char);
+ if (!output_values) {
+ ret = -1;
+ goto out;
+ }
+ for (i = 0; i < number_of_fields; i++) {
+ output_values[i] = GF_CALLOC(spacing[i] + 1, sizeof(char),
+ gf_common_mt_char);
+ if (!output_values[i]) {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ cli_out(" ");
+
+ /* setting the title "NODE", "MASTER", etc. from title_values[]
+ and printing the same */
+ for (j = 0; j < number_of_fields; j++) {
+ if ((!is_detail) && (j > status_fields)) {
+ /* Suppressing detailed output for
+ * status */
+ output_values[j][0] = '\0';
+ continue;
+ }
+ memset(output_values[j], ' ', spacing[j]);
+ memcpy(output_values[j], title_values[j], strlen(title_values[j]));
+ output_values[j][spacing[j]] = '\0';
+ }
+ cli_out("%s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s", output_values[0],
+ output_values[1], output_values[2], output_values[3],
+ output_values[4], output_values[5], output_values[6],
+ output_values[7], output_values[8], output_values[9],
+ output_values[10], output_values[11], output_values[12],
+ output_values[13], output_values[14], output_values[15]);
+
+ hyphens = GF_MALLOC((total_spacing + 1) * sizeof(char), gf_common_mt_char);
+ if (!hyphens) {
+ ret = -1;
+ goto out;
+ }
+
+ /* setting and printing the hyphens */
+ memset(hyphens, '-', total_spacing);
+ hyphens[total_spacing] = '\0';
+ cli_out("%s", hyphens);
+
+ for (i = 0; i < gsync_count; i++) {
+ for (j = 0; j < number_of_fields; j++) {
+ if ((!is_detail) && (j > status_fields)) {
+ /* Suppressing detailed output for
+ * status */
+ output_values[j][0] = '\0';
+ continue;
+ }
+ tmp = get_struct_variable(j, sts_vals[i]);
+ if (!tmp) {
+ gf_log("", GF_LOG_ERROR, "struct member empty.");
ret = -1;
goto out;
+ }
+ memset(output_values[j], ' ', spacing[j]);
+ memcpy(output_values[j], tmp, strlen(tmp));
+ output_values[j][spacing[j]] = '\0';
}
- dict = data;
+ cli_out("%s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s",
+ output_values[0], output_values[1], output_values[2],
+ output_values[3], output_values[4], output_values[5],
+ output_values[6], output_values[7], output_values[8],
+ output_values[9], output_values[10], output_values[11],
+ output_values[12], output_values[13], output_values[14],
+ output_values[15]);
+ }
- ret = dict_get_str (dict, "volname", &volname);
+out:
+ if (output_values) {
+ for (i = 0; i < number_of_fields; i++) {
+ if (output_values[i])
+ GF_FREE(output_values[i]);
+ }
+ GF_FREE(output_values);
+ }
- if (ret)
- goto out;
+ if (hyphens)
+ GF_FREE(hyphens);
- ret = dict_get_int32 (dict, "count", &count);
- if (ret)
- goto out;
+ return ret;
+}
+
+int
+gf_gsync_status_t_comparator(const void *p, const void *q)
+{
+ char *slavekey1 = NULL;
+ char *slavekey2 = NULL;
+ slavekey1 = get_struct_variable(20, (*(gf_gsync_status_t **)p));
+ slavekey2 = get_struct_variable(20, (*(gf_gsync_status_t **)q));
+ if (!slavekey1 || !slavekey2) {
+ gf_log("cli", GF_LOG_ERROR, "struct member empty.");
+ return 0;
+ }
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to get serialized length of dict");
+ return strcmp(slavekey1, slavekey2);
+}
+
+static int
+gf_cli_read_status_data(dict_t *dict, gf_gsync_status_t **sts_vals,
+ int *spacing, int gsync_count, int number_of_fields)
+{
+ char *tmp = NULL;
+ char sts_val_name[PATH_MAX] = "";
+ int ret = 0;
+ int i = 0;
+ int j = 0;
+
+ /* Storing per node status info in each object */
+ for (i = 0; i < gsync_count; i++) {
+ snprintf(sts_val_name, sizeof(sts_val_name), "status_value%d", i);
+
+ /* Fetching the values from dict, and calculating
+ the max length for each field */
+ ret = dict_get_bin(dict, sts_val_name, (void **)&(sts_vals[i]));
+ if (ret)
+ goto out;
+
+ for (j = 0; j < number_of_fields; j++) {
+ tmp = get_struct_variable(j, sts_vals[i]);
+ if (!tmp) {
+ gf_log("", GF_LOG_ERROR, "struct member empty.");
+ ret = -1;
goto out;
+ }
+ if (strlen(tmp) > spacing[j])
+ spacing[j] = strlen(tmp);
}
+ }
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_ADD_BRICK, NULL,
- this, gf_cli3_1_add_brick_cbk,
- (xdrproc_t) xdr_gf_cli_req);
+ /* Sort based on Session Slave */
+ qsort(sts_vals, gsync_count, sizeof(gf_gsync_status_t *),
+ gf_gsync_status_t_comparator);
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
- if (req.dict.dict_val) {
- GF_FREE (req.dict.dict_val);
- }
+static int
+gf_cli_gsync_status_output(dict_t *dict, gf_boolean_t is_detail)
+{
+ int gsync_count = 0;
+ int i = 0;
+ int ret = 0;
+ int spacing[16] = {0};
+ int num_of_fields = 16;
+ char errmsg[1024] = "";
+ char *master = NULL;
+ char *slave = NULL;
+ static char *title_values[] = {"MASTER NODE",
+ "MASTER VOL",
+ "MASTER BRICK",
+ "SLAVE USER",
+ "SLAVE",
+ "SLAVE NODE",
+ "STATUS",
+ "CRAWL STATUS",
+ "LAST_SYNCED",
+ "ENTRY",
+ "DATA",
+ "META",
+ "FAILURES",
+ "CHECKPOINT TIME",
+ "CHECKPOINT COMPLETED",
+ "CHECKPOINT COMPLETION TIME"};
+ gf_gsync_status_t **sts_vals = NULL;
+
+ /* Checks if any session is active or not */
+ ret = dict_get_int32_sizen(dict, "gsync-count", &gsync_count);
+ if (ret) {
+ ret = dict_get_str_sizen(dict, "master", &master);
+
+ ret = dict_get_str_sizen(dict, "slave", &slave);
+
+ if (master) {
+ if (slave)
+ snprintf(errmsg, sizeof(errmsg),
+ "No active geo-replication sessions between %s"
+ " and %s",
+ master, slave);
+ else
+ snprintf(errmsg, sizeof(errmsg),
+ "No active geo-replication sessions for %s", master);
+ } else
+ snprintf(errmsg, sizeof(errmsg),
+ "No active geo-replication sessions");
+
+ gf_log("cli", GF_LOG_INFO, "%s", errmsg);
+ cli_out("%s", errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ for (i = 0; i < num_of_fields; i++)
+ spacing[i] = strlen(title_values[i]);
+
+ /* gsync_count = number of nodes reporting output.
+ each sts_val object will store output of each
+ node */
+ sts_vals = GF_MALLOC(gsync_count * sizeof(gf_gsync_status_t *),
+ gf_common_mt_char);
+ if (!sts_vals) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = gf_cli_read_status_data(dict, sts_vals, spacing, gsync_count,
+ num_of_fields);
+ if (ret) {
+ gf_log("", GF_LOG_ERROR, "Unable to read status data");
+ goto out;
+ }
+
+ ret = gf_cli_print_status(title_values, sts_vals, spacing, gsync_count,
+ num_of_fields, is_detail);
+ if (ret) {
+ gf_log("", GF_LOG_ERROR, "Unable to print status output");
+ goto out;
+ }
- return ret;
+out:
+ if (sts_vals)
+ GF_FREE(sts_vals);
+
+ return ret;
}
-int32_t
-gf_cli3_1_remove_brick (call_frame_t *frame, xlator_t *this,
- void *data)
-{
- gf_cli_req req = {{0,}};;
- gf_cli_req status_req = {{0,}};;
- int ret = 0;
- dict_t *dict = NULL;
- int32_t command = 0;
- char *volname = NULL;
- dict_t *req_dict = NULL;
- int32_t cmd = 0;
- cli_local_t *local = NULL;
-
- if (!frame || !this || !data) {
+static int32_t
+write_contents_to_common_pem_file(dict_t *dict, int output_count)
+{
+ char *workdir = NULL;
+ char common_pem_file[PATH_MAX] = "";
+ char *output = NULL;
+ char output_name[32] = "";
+ int bytes_written = 0;
+ int fd = -1;
+ int ret = -1;
+ int i = -1;
+
+ ret = dict_get_str_sizen(dict, "glusterd_workdir", &workdir);
+ if (ret || !workdir) {
+ gf_log("", GF_LOG_ERROR, "Unable to fetch workdir");
+ ret = -1;
+ goto out;
+ }
+
+ snprintf(common_pem_file, sizeof(common_pem_file),
+ "%s/geo-replication/common_secret.pem.pub", workdir);
+
+ sys_unlink(common_pem_file);
+
+ fd = open(common_pem_file, O_WRONLY | O_CREAT, 0600);
+ if (fd == -1) {
+ gf_log("", GF_LOG_ERROR, "Failed to open %s Error : %s",
+ common_pem_file, strerror(errno));
+ ret = -1;
+ goto out;
+ }
+
+ for (i = 1; i <= output_count; i++) {
+ ret = snprintf(output_name, sizeof(output_name), "output_%d", i);
+ ret = dict_get_strn(dict, output_name, ret, &output);
+ if (ret) {
+ gf_log("", GF_LOG_ERROR, "Failed to get %s.", output_name);
+ cli_out("Unable to fetch output.");
+ }
+ if (output) {
+ bytes_written = sys_write(fd, output, strlen(output));
+ if (bytes_written != strlen(output)) {
+ gf_log("", GF_LOG_ERROR, "Failed to write to %s",
+ common_pem_file);
ret = -1;
goto out;
- }
-
- local = cli_local_get ();
- if (!local) {
+ }
+ /* Adding the new line character */
+ bytes_written = sys_write(fd, "\n", 1);
+ if (bytes_written != 1) {
+ gf_log("", GF_LOG_ERROR, "Failed to add new line char");
ret = -1;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
goto out;
+ }
+ output = NULL;
}
+ }
- frame->local = local;
-
- dict = data;
+ cli_out("Common secret pub file present at %s", common_pem_file);
+ ret = 0;
+out:
+ if (fd >= 0)
+ sys_close(fd);
- local->dict = dict_ref (dict);
+ gf_log("", GF_LOG_DEBUG, RETURNING, ret);
+ return ret;
+}
- ret = dict_get_str (dict, "volname", &volname);
- if (ret)
- goto out;
+static int
+gf_cli_sys_exec_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ int ret = -1;
+ int output_count = -1;
+ int i = -1;
+ char *output = NULL;
+ char *command = NULL;
+ char output_name[32] = "";
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ dict_t *dict = NULL;
+
+ GF_ASSERT(myframe);
+
+ if (req->rpc_status == -1) {
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ dict = dict_new();
+
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
+
+ if (ret)
+ goto out;
+
+ if (rsp.op_ret) {
+ cli_err("%s", rsp.op_errstr ? rsp.op_errstr : "Command failed.");
+ ret = rsp.op_ret;
+ goto out;
+ }
- ret = dict_get_int32 (dict, "command", &command);
- if (ret)
- goto out;
+ ret = dict_get_int32_sizen(dict, "output_count", &output_count);
+ if (ret) {
+ cli_out("Command executed successfully.");
+ ret = 0;
+ goto out;
+ }
- if ((command != GF_OP_CMD_STATUS) &&
- (command != GF_OP_CMD_STOP)) {
+ ret = dict_get_str_sizen(dict, "command", &command);
+ if (ret) {
+ gf_log("", GF_LOG_ERROR, "Unable to get command from dict");
+ goto out;
+ }
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to get serialized length of dict");
- goto out;
- }
+ if (!strcmp(command, "gsec_create")) {
+ ret = write_contents_to_common_pem_file(dict, output_count);
+ if (!ret)
+ goto out;
+ }
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_REMOVE_BRICK, NULL,
- this, gf_cli3_1_remove_brick_cbk,
- (xdrproc_t) xdr_gf_cli_req);
- } else {
- /* Need rebalance status to e sent :-) */
- req_dict = dict_new ();
- if (!req_dict) {
- ret = -1;
- goto out;
- }
+ for (i = 1; i <= output_count; i++) {
+ ret = snprintf(output_name, sizeof(output_name), "output_%d", i);
+ ret = dict_get_strn(dict, output_name, ret, &output);
+ if (ret) {
+ gf_log("", GF_LOG_ERROR, "Failed to get %s.", output_name);
+ cli_out("Unable to fetch output.");
+ }
+ if (output) {
+ cli_out("%s", output);
+ output = NULL;
+ }
+ }
- ret = dict_set_str (req_dict, "volname", volname);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "Failed to set dict");
- goto out;
- }
+ ret = 0;
+out:
+ if (dict)
+ dict_unref(dict);
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_cli_rsp(rsp);
+ return ret;
+}
- if (command == GF_OP_CMD_STATUS)
- cmd |= GF_DEFRAG_CMD_STATUS;
- else
- cmd |= GF_DEFRAG_CMD_STOP;
+static int
+gf_cli_copy_file_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ int ret = -1;
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ dict_t *dict = NULL;
- ret = dict_set_int32 (req_dict, "rebalance-command", (int32_t) cmd);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "Failed to set dict");
- goto out;
- }
+ GF_ASSERT(myframe);
- ret = dict_allocate_and_serialize (req_dict,
- &status_req.dict.dict_val,
- (size_t *) &status_req.dict.dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to serialize the data");
+ if (req->rpc_status == -1) {
+ goto out;
+ }
- goto out;
- }
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ XDR_DECODE_FAIL);
+ goto out;
+ }
- ret = cli_cmd_submit (&status_req, frame, cli_rpc_prog,
- GLUSTER_CLI_DEFRAG_VOLUME, NULL,
- this, gf_cli3_remove_brick_status_cbk,
- (xdrproc_t) xdr_gf_cli_req);
+ dict = dict_new();
- }
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
-out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
- if (req.dict.dict_val) {
- GF_FREE (req.dict.dict_val);
- }
+ if (ret)
+ goto out;
- if (status_req.dict.dict_val)
- GF_FREE (status_req.dict.dict_val);
+ if (rsp.op_ret) {
+ cli_err("%s", rsp.op_errstr ? rsp.op_errstr : "Copy unsuccessful");
+ ret = rsp.op_ret;
+ goto out;
+ }
- if (req_dict)
- dict_unref (req_dict);
+ cli_out("Successfully copied file.");
- return ret;
+out:
+ if (dict)
+ dict_unref(dict);
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_cli_rsp(rsp);
+ return ret;
}
-int32_t
-gf_cli3_1_replace_brick (call_frame_t *frame, xlator_t *this,
- void *data)
-{
- gf_cli_req req = {{0,}};
- int ret = 0;
- cli_local_t *local = NULL;
- dict_t *dict = NULL;
- char *src_brick = NULL;
- char *dst_brick = NULL;
- char *volname = NULL;
- int32_t op = 0;
-
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
- }
+static int
+gf_cli_gsync_set_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ int ret = -1;
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ dict_t *dict = NULL;
+ char *gsync_status = NULL;
+ char *master = NULL;
+ char *slave = NULL;
+ int32_t type = 0;
+ gf_boolean_t status_detail = _gf_false;
+
+ GF_ASSERT(myframe);
+
+ if (req->rpc_status == -1) {
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ dict = dict_new();
+
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
+
+ if (ret)
+ goto out;
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_vol_gsync(dict, rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto out;
+ }
- dict = data;
+ if (rsp.op_ret) {
+ cli_err("%s",
+ rsp.op_errstr ? rsp.op_errstr : GEOREP " command unsuccessful");
+ ret = rsp.op_ret;
+ goto out;
+ }
+
+ ret = dict_get_str_sizen(dict, "gsync-status", &gsync_status);
+ if (!ret)
+ cli_out("%s", gsync_status);
+
+ ret = dict_get_int32_sizen(dict, "type", &type);
+ if (ret) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ "failed to get type");
+ goto out;
+ }
+
+ switch (type) {
+ case GF_GSYNC_OPTION_TYPE_START:
+ case GF_GSYNC_OPTION_TYPE_STOP:
+ if (dict_get_str_sizen(dict, "master", &master) != 0)
+ master = "???";
+ if (dict_get_str_sizen(dict, "slave", &slave) != 0)
+ slave = "???";
+
+ cli_out(
+ "%s " GEOREP " session between %s & %s has been successful",
+ type == GF_GSYNC_OPTION_TYPE_START ? "Starting" : "Stopping",
+ master, slave);
+ break;
+
+ case GF_GSYNC_OPTION_TYPE_PAUSE:
+ case GF_GSYNC_OPTION_TYPE_RESUME:
+ if (dict_get_str_sizen(dict, "master", &master) != 0)
+ master = "???";
+ if (dict_get_str_sizen(dict, "slave", &slave) != 0)
+ slave = "???";
+
+ cli_out("%s " GEOREP " session between %s & %s has been successful",
+ type == GF_GSYNC_OPTION_TYPE_PAUSE ? "Pausing" : "Resuming",
+ master, slave);
+ break;
+
+ case GF_GSYNC_OPTION_TYPE_CONFIG:
+ ret = gf_cli_gsync_config_command(dict);
+ break;
+
+ case GF_GSYNC_OPTION_TYPE_STATUS:
+ status_detail = dict_get_str_boolean(dict, "status-detail",
+ _gf_false);
+ ret = gf_cli_gsync_status_output(dict, status_detail);
+ break;
+
+ case GF_GSYNC_OPTION_TYPE_DELETE:
+ if (dict_get_str_sizen(dict, "master", &master) != 0)
+ master = "???";
+ if (dict_get_str_sizen(dict, "slave", &slave) != 0)
+ slave = "???";
+ cli_out("Deleting " GEOREP
+ " session between %s & %s has been successful",
+ master, slave);
+ break;
+
+ case GF_GSYNC_OPTION_TYPE_CREATE:
+ if (dict_get_str_sizen(dict, "master", &master) != 0)
+ master = "???";
+ if (dict_get_str_sizen(dict, "slave", &slave) != 0)
+ slave = "???";
+ cli_out("Creating " GEOREP
+ " session between %s & %s has been successful",
+ master, slave);
+ break;
- local = cli_local_get ();
- if (!local) {
- ret = -1;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto out;
- }
+ default:
+ cli_out(GEOREP " command executed successfully");
+ }
- local->dict = dict_ref (dict);
- frame->local = local;
+out:
+ if (dict)
+ dict_unref(dict);
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_cli_rsp(rsp);
+ return ret;
+}
- ret = dict_get_int32 (dict, "operation", &op);
- if (ret) {
- gf_log (this->name, GF_LOG_DEBUG,
- "dict_get on operation failed");
- goto out;
- }
- ret = dict_get_str (dict, "volname", &volname);
- if (ret) {
- gf_log (this->name, GF_LOG_DEBUG,
- "dict_get on volname failed");
- goto out;
+static int32_t
+gf_cli_sys_exec(call_frame_t *frame, xlator_t *this, void *data)
+{
+ int ret = 0;
+ dict_t *dict = NULL;
+ gf_cli_req req = {{
+ 0,
+ }};
+
+ dict = data;
+
+ ret = cli_to_glusterd(&req, frame, gf_cli_sys_exec_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict, GLUSTER_CLI_SYS_EXEC,
+ this, cli_rpc_prog, NULL);
+ if (ret)
+ if (!frame || !this || !data) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Invalid data");
}
- ret = dict_get_str (dict, "src-brick", &src_brick);
- if (ret) {
- gf_log (this->name, GF_LOG_DEBUG,
- "dict_get on src-brick failed");
- goto out;
- }
+ GF_FREE(req.dict.dict_val);
+ return ret;
+}
- ret = dict_get_str (dict, "dst-brick", &dst_brick);
- if (ret) {
- gf_log (this->name, GF_LOG_DEBUG,
- "dict_get on dst-brick failed");
- goto out;
+static int32_t
+gf_cli_copy_file(call_frame_t *frame, xlator_t *this, void *data)
+{
+ int ret = 0;
+ dict_t *dict = NULL;
+ gf_cli_req req = {{
+ 0,
+ }};
+
+ dict = data;
+
+ ret = cli_to_glusterd(&req, frame, gf_cli_copy_file_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict,
+ GLUSTER_CLI_COPY_FILE, this, cli_rpc_prog, NULL);
+ if (ret)
+ if (!frame || !this || !data) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Invalid data");
}
- gf_log (this->name, GF_LOG_DEBUG,
- "Received command replace-brick %s with "
- "%s with operation=%d", src_brick,
- dst_brick, op);
+ GF_FREE(req.dict.dict_val);
+ return ret;
+}
+static int32_t
+gf_cli_gsync_set(call_frame_t *frame, xlator_t *this, void *data)
+{
+ int ret = 0;
+ dict_t *dict = NULL;
+ gf_cli_req req = {{
+ 0,
+ }};
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to get serialized length of dict");
- goto out;
- }
+ dict = data;
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_REPLACE_BRICK, NULL,
- this, gf_cli3_1_replace_brick_cbk,
- (xdrproc_t) xdr_gf_cli_req);
+ ret = cli_to_glusterd(&req, frame, gf_cli_gsync_set_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict,
+ GLUSTER_CLI_GSYNC_SET, this, cli_rpc_prog, NULL);
+ GF_FREE(req.dict.dict_val);
-out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
- if (req.dict.dict_val) {
- GF_FREE (req.dict.dict_val);
- }
+int
+cli_profile_info_percentage_cmp(void *a, void *b)
+{
+ cli_profile_info_t *ia = NULL;
+ cli_profile_info_t *ib = NULL;
- return ret;
-}
+ ia = a;
+ ib = b;
+ if (ia->percentage_avg_latency < ib->percentage_avg_latency)
+ return -1;
+ else if (ia->percentage_avg_latency > ib->percentage_avg_latency)
+ return 1;
+ return 0;
+}
-int32_t
-gf_cli3_1_log_rotate (call_frame_t *frame, xlator_t *this,
- void *data)
+static void
+cmd_profile_volume_brick_out(dict_t *dict, int count, int interval)
{
- gf_cli_req req = {{0,}};
- int ret = 0;
- dict_t *dict = NULL;
-
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
+ char key[256] = {0};
+ int i = 0;
+ uint64_t sec = 0;
+ uint64_t r_count = 0;
+ uint64_t w_count = 0;
+ uint64_t rb_counts[32] = {0};
+ uint64_t wb_counts[32] = {0};
+ cli_profile_info_t profile_info[GF_FOP_MAXVALUE] = {{0}};
+ cli_profile_info_t upcall_info[GF_UPCALL_FLAGS_MAXVALUE] = {
+ {0},
+ };
+ char output[128] = {0};
+ int per_line = 0;
+ char read_blocks[128] = {0};
+ char write_blocks[128] = {0};
+ int index = 0;
+ int is_header_printed = 0;
+ int ret = 0;
+ double total_percentage_latency = 0;
+
+ for (i = 0; i < 32; i++) {
+ snprintf(key, sizeof(key), "%d-%d-read-%" PRIu32, count, interval,
+ (1U << i));
+ ret = dict_get_uint64(dict, key, &rb_counts[i]);
+ if (ret) {
+ gf_log("cli", GF_LOG_DEBUG, "failed to get %s from dict", key);
}
+ }
- dict = data;
+ for (i = 0; i < 32; i++) {
+ snprintf(key, sizeof(key), "%d-%d-write-%" PRIu32, count, interval,
+ (1U << i));
+ ret = dict_get_uint64(dict, key, &wb_counts[i]);
+ if (ret) {
+ gf_log("cli", GF_LOG_DEBUG, "failed to get %s from dict", key);
+ }
+ }
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
+ for (i = 0; i < GF_UPCALL_FLAGS_MAXVALUE; i++) {
+ snprintf(key, sizeof(key), "%d-%d-%d-upcall-hits", count, interval, i);
+ ret = dict_get_uint64(dict, key, &upcall_info[i].fop_hits);
+ if (ret) {
+ gf_log("cli", GF_LOG_DEBUG, "failed to get %s from dict", key);
+ }
+ upcall_info[i].fop_name = (char *)gf_upcall_list[i];
+ }
- if (ret < 0) {
- gf_log (THIS->name, GF_LOG_ERROR, "failed to serialize dict");
- goto out;
+ for (i = 0; i < GF_FOP_MAXVALUE; i++) {
+ snprintf(key, sizeof(key), "%d-%d-%d-hits", count, interval, i);
+ ret = dict_get_uint64(dict, key, &profile_info[i].fop_hits);
+ if (ret) {
+ gf_log("cli", GF_LOG_DEBUG, "failed to get %s from dict", key);
}
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_LOG_ROTATE, NULL,
- this, gf_cli3_1_log_rotate_cbk,
- (xdrproc_t) xdr_gf_cli_req);
+ snprintf(key, sizeof(key), "%d-%d-%d-avglatency", count, interval, i);
+ ret = dict_get_double(dict, key, &profile_info[i].avg_latency);
+ if (ret) {
+ gf_log("cli", GF_LOG_DEBUG, "failed to get %s from dict", key);
+ }
+ snprintf(key, sizeof(key), "%d-%d-%d-minlatency", count, interval, i);
+ ret = dict_get_double(dict, key, &profile_info[i].min_latency);
+ if (ret) {
+ gf_log("cli", GF_LOG_DEBUG, "failed to get %s from dict", key);
+ }
-out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ snprintf(key, sizeof(key), "%d-%d-%d-maxlatency", count, interval, i);
+ ret = dict_get_double(dict, key, &profile_info[i].max_latency);
+ if (ret) {
+ gf_log("cli", GF_LOG_DEBUG, "failed to get %s from dict", key);
+ }
+ profile_info[i].fop_name = (char *)gf_fop_list[i];
- if (req.dict.dict_val)
- GF_FREE (req.dict.dict_val);
- return ret;
+ total_percentage_latency += (profile_info[i].fop_hits *
+ profile_info[i].avg_latency);
+ }
+ if (total_percentage_latency) {
+ for (i = 0; i < GF_FOP_MAXVALUE; i++) {
+ profile_info[i]
+ .percentage_avg_latency = 100 * ((profile_info[i].avg_latency *
+ profile_info[i].fop_hits) /
+ total_percentage_latency);
+ }
+ gf_array_insertionsort(profile_info, 1, GF_FOP_MAXVALUE - 1,
+ sizeof(cli_profile_info_t),
+ cli_profile_info_percentage_cmp);
+ }
+ snprintf(key, sizeof(key), "%d-%d-duration", count, interval);
+ ret = dict_get_uint64(dict, key, &sec);
+ if (ret) {
+ gf_log("cli", GF_LOG_DEBUG, "failed to get %s from dict", key);
+ }
+
+ snprintf(key, sizeof(key), "%d-%d-total-read", count, interval);
+ ret = dict_get_uint64(dict, key, &r_count);
+ if (ret) {
+ gf_log("cli", GF_LOG_DEBUG, "failed to get %s from dict", key);
+ }
+
+ snprintf(key, sizeof(key), "%d-%d-total-write", count, interval);
+ ret = dict_get_uint64(dict, key, &w_count);
+ if (ret) {
+ gf_log("cli", GF_LOG_DEBUG, "failed to get %s from dict", key);
+ }
+
+ if (interval == -1)
+ cli_out("Cumulative Stats:");
+ else
+ cli_out("Interval %d Stats:", interval);
+ snprintf(output, sizeof(output), "%14s", "Block Size:");
+ snprintf(read_blocks, sizeof(read_blocks), "%14s", "No. of Reads:");
+ snprintf(write_blocks, sizeof(write_blocks), "%14s", "No. of Writes:");
+ index = 14;
+ for (i = 0; i < 32; i++) {
+ if ((rb_counts[i] == 0) && (wb_counts[i] == 0))
+ continue;
+ per_line++;
+ snprintf(output + index, sizeof(output) - index, "%19" PRIu32 "b+ ",
+ (1U << i));
+ if (rb_counts[i]) {
+ snprintf(read_blocks + index, sizeof(read_blocks) - index,
+ "%21" PRId64 " ", rb_counts[i]);
+ } else {
+ snprintf(read_blocks + index, sizeof(read_blocks) - index, "%21s ",
+ "0");
+ }
+ if (wb_counts[i]) {
+ snprintf(write_blocks + index, sizeof(write_blocks) - index,
+ "%21" PRId64 " ", wb_counts[i]);
+ } else {
+ snprintf(write_blocks + index, sizeof(write_blocks) - index,
+ "%21s ", "0");
+ }
+ index += 22;
+ if (per_line == 3) {
+ cli_out("%s", output);
+ cli_out("%s", read_blocks);
+ cli_out("%s", write_blocks);
+ cli_out(" ");
+ per_line = 0;
+ snprintf(output, sizeof(output), "%14s", "Block Size:");
+ snprintf(read_blocks, sizeof(read_blocks), "%14s", "No. of Reads:");
+ snprintf(write_blocks, sizeof(write_blocks), "%14s",
+ "No. of Writes:");
+ index = 14;
+ }
+ }
+
+ if (per_line != 0) {
+ cli_out("%s", output);
+ cli_out("%s", read_blocks);
+ cli_out("%s", write_blocks);
+ }
+ for (i = 0; i < GF_FOP_MAXVALUE; i++) {
+ if (profile_info[i].fop_hits == 0)
+ continue;
+ if (is_header_printed == 0) {
+ cli_out("%10s %13s %13s %13s %14s %11s", "%-latency", "Avg-latency",
+ "Min-Latency", "Max-Latency", "No. of calls", "Fop");
+ cli_out("%10s %13s %13s %13s %14s %11s", "---------", "-----------",
+ "-----------", "-----------", "------------", "----");
+ is_header_printed = 1;
+ }
+ if (profile_info[i].fop_hits) {
+ cli_out(
+ "%10.2lf %10.2lf us %10.2lf us %10.2lf us"
+ " %14" PRId64 " %11s",
+ profile_info[i].percentage_avg_latency,
+ profile_info[i].avg_latency, profile_info[i].min_latency,
+ profile_info[i].max_latency, profile_info[i].fop_hits,
+ profile_info[i].fop_name);
+ }
+ }
+
+ for (i = 0; i < GF_UPCALL_FLAGS_MAXVALUE; i++) {
+ if (upcall_info[i].fop_hits == 0)
+ continue;
+ if (upcall_info[i].fop_hits) {
+ cli_out(
+ "%10.2lf %10.2lf us %10.2lf us %10.2lf us"
+ " %14" PRId64 " %11s",
+ upcall_info[i].percentage_avg_latency,
+ upcall_info[i].avg_latency, upcall_info[i].min_latency,
+ upcall_info[i].max_latency, upcall_info[i].fop_hits,
+ upcall_info[i].fop_name);
+ }
+ }
+
+ cli_out(" ");
+ cli_out("%12s: %" PRId64 " seconds", "Duration", sec);
+ cli_out("%12s: %" PRId64 " bytes", "Data Read", r_count);
+ cli_out("%12s: %" PRId64 " bytes", "Data Written", w_count);
+ cli_out(" ");
}
-int32_t
-gf_cli3_1_sync_volume (call_frame_t *frame, xlator_t *this,
- void *data)
+static int32_t
+gf_cli_profile_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
{
- int ret = 0;
- gf_cli_req req = {{0,}};
- dict_t *dict = NULL;
-
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ dict_t *dict = NULL;
+ gf1_cli_stats_op op = GF_CLI_STATS_NONE;
+ char key[64] = {0};
+ int len;
+ int interval = 0;
+ int i = 1;
+ int32_t brick_count = 0;
+ char *volname = NULL;
+ char *brick = NULL;
+ char str[1024] = {
+ 0,
+ };
+ int stats_cleared = 0;
+ gf1_cli_info_op info_op = GF_CLI_INFO_NONE;
+
+ GF_ASSERT(myframe);
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ gf_log("cli", GF_LOG_DEBUG, "Received resp to profile");
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ dict = dict_new();
+
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
+
+ if (ret) {
+ gf_log("", GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
+ goto out;
+ }
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_vol_profile(dict, rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto out;
+ }
+
+ ret = dict_get_int32_sizen(dict, "op", (int32_t *)&op);
+ if (ret)
+ goto out;
+
+ ret = dict_get_str_sizen(dict, "volname", &volname);
+ if (ret)
+ goto out;
+
+ if (rsp.op_ret && strcmp(rsp.op_errstr, "")) {
+ cli_err("%s", rsp.op_errstr);
+ } else {
+ switch (op) {
+ case GF_CLI_STATS_START:
+ cli_out("Starting volume profile on %s has been %s ", volname,
+ (rsp.op_ret) ? "unsuccessful" : "successful");
+ break;
+ case GF_CLI_STATS_STOP:
+ cli_out("Stopping volume profile on %s has been %s ", volname,
+ (rsp.op_ret) ? "unsuccessful" : "successful");
+ break;
+ case GF_CLI_STATS_INFO:
+ break;
+ default:
+ cli_out("volume profile on %s has been %s ", volname,
+ (rsp.op_ret) ? "unsuccessful" : "successful");
+ break;
}
+ }
- dict = data;
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
-
- if (ret < 0) {
- gf_log (THIS->name, GF_LOG_ERROR, "failed to serialize dict");
- goto out;
- }
+ if (rsp.op_ret) {
+ ret = rsp.op_ret;
+ goto out;
+ }
- ret = cli_cmd_submit (&req, frame,
- cli_rpc_prog, GLUSTER_CLI_SYNC_VOLUME,
- NULL, this, gf_cli3_1_sync_volume_cbk,
- (xdrproc_t) xdr_gf_cli_req);
+ if (GF_CLI_STATS_INFO != op) {
+ ret = 0;
+ goto out;
+ }
-out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- if (req.dict.dict_val)
- GF_FREE (req.dict.dict_val);
+ ret = dict_get_int32_sizen(dict, "count", &brick_count);
+ if (ret)
+ goto out;
- return ret;
-}
+ if (!brick_count) {
+ cli_err("All bricks of volume %s are down.", volname);
+ goto out;
+ }
-int32_t
-gf_cli3_1_getspec (call_frame_t *frame, xlator_t *this,
- void *data)
-{
- gf_getspec_req req = {0,};
- int ret = 0;
- dict_t *dict = NULL;
+ ret = dict_get_int32_sizen(dict, "info-op", (int32_t *)&info_op);
+ if (ret)
+ goto out;
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
+ while (i <= brick_count) {
+ len = snprintf(key, sizeof(key), "%d-brick", i);
+ ret = dict_get_strn(dict, key, len, &brick);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Couldn't get brick name");
+ goto out;
}
- dict = data;
+ ret = dict_get_str_boolean(dict, "nfs", _gf_false);
- ret = dict_get_str (dict, "volid", &req.key);
if (ret)
- goto out;
+ len = snprintf(str, sizeof(str), "NFS Server : %s", brick);
+ else
+ len = snprintf(str, sizeof(str), "Brick: %s", brick);
+ cli_out("%s", str);
+ memset(str, '-', len);
+ cli_out("%s", str);
+
+ if (GF_CLI_INFO_CLEAR == info_op) {
+ len = snprintf(key, sizeof(key), "%d-stats-cleared", i);
+ ret = dict_get_int32n(dict, key, len, &stats_cleared);
+ if (ret)
+ goto out;
+ cli_out(stats_cleared ? "Cleared stats."
+ : "Failed to clear stats.");
+ } else {
+ len = snprintf(key, sizeof(key), "%d-cumulative", i);
+ ret = dict_get_int32n(dict, key, len, &interval);
+ if (ret == 0)
+ cmd_profile_volume_brick_out(dict, i, interval);
- ret = cli_cmd_submit (&req, frame, &cli_handshake_prog,
- GF_HNDSK_GETSPEC, NULL,
- this, gf_cli3_1_getspec_cbk,
- (xdrproc_t) xdr_gf_getspec_req);
+ len = snprintf(key, sizeof(key), "%d-interval", i);
+ ret = dict_get_int32n(dict, key, len, &interval);
+ if (ret == 0)
+ cmd_profile_volume_brick_out(dict, i, interval);
+ }
+ i++;
+ }
+ ret = rsp.op_ret;
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
-
- return ret;
+ if (dict)
+ dict_unref(dict);
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_cli_rsp(rsp);
+ return ret;
}
-int32_t
-gf_cli3_1_quota (call_frame_t *frame, xlator_t *this,
- void *data)
+static int32_t
+gf_cli_profile_volume(call_frame_t *frame, xlator_t *this, void *data)
{
- gf_cli_req req = {{0,}};
- int ret = 0;
- dict_t *dict = NULL;
-
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
- }
+ int ret = -1;
+ gf_cli_req req = {{
+ 0,
+ }};
+ dict_t *dict = NULL;
- dict = data;
+ GF_ASSERT(frame);
+ GF_ASSERT(this);
+ GF_ASSERT(data);
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to get serialized length of dict");
- goto out;
- }
+ dict = data;
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_QUOTA, NULL,
- this, gf_cli3_1_quota_cbk,
- (xdrproc_t) xdr_gf_cli_req);
+ ret = cli_to_glusterd(&req, frame, gf_cli_profile_volume_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict,
+ GLUSTER_CLI_PROFILE_VOLUME, this, cli_rpc_prog, NULL);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
- GF_FREE (req.dict.dict_val);
-out:
- return ret;
+ GF_FREE(req.dict.dict_val);
+ return ret;
}
-int32_t
-gf_cli3_1_pmap_b2p (call_frame_t *frame, xlator_t *this, void *data)
+static int32_t
+gf_cli_top_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
{
- pmap_port_by_brick_req req = {0,};
- int ret = 0;
- dict_t *dict = NULL;
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ dict_t *dict = NULL;
+ gf1_cli_stats_op op = GF_CLI_STATS_NONE;
+ char key[256] = {0};
+ int keylen;
+ int i = 0;
+ int32_t brick_count = 0;
+ char brick[1024];
+ int32_t members = 0;
+ char *filename;
+ char *bricks;
+ uint64_t value = 0;
+ int32_t j = 0;
+ gf1_cli_top_op top_op = GF_CLI_TOP_NONE;
+ uint64_t nr_open = 0;
+ uint64_t max_nr_open = 0;
+ double throughput = 0;
+ double time = 0;
+ int32_t time_sec = 0;
+ long int time_usec = 0;
+ char timestr[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+ char *openfd_str = NULL;
+ gf_boolean_t nfs = _gf_false;
+ gf_boolean_t clear_stats = _gf_false;
+ int stats_cleared = 0;
+
+ GF_ASSERT(myframe);
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ gf_log("cli", GF_LOG_DEBUG, "Received resp to top");
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ if (rsp.op_ret) {
+ if (strcmp(rsp.op_errstr, ""))
+ cli_err("%s", rsp.op_errstr);
+ cli_err("volume top unsuccessful");
+ ret = rsp.op_ret;
+ goto out;
+ }
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
- }
+ dict = dict_new();
- dict = data;
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
- ret = dict_get_str (dict, "brick", &req.brick);
- if (ret)
- goto out;
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
- ret = cli_cmd_submit (&req, frame, &cli_pmap_prog,
- GF_PMAP_PORTBYBRICK, NULL,
- this, gf_cli3_1_pmap_b2p_cbk,
- (xdrproc_t) xdr_pmap_port_by_brick_req);
+ if (ret) {
+ gf_log("", GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
+ goto out;
+ }
-out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ ret = dict_get_int32_sizen(dict, "op", (int32_t *)&op);
- return ret;
-}
+ if (op != GF_CLI_STATS_TOP) {
+ ret = 0;
+ goto out;
+ }
-static int
-gf_cli3_1_fsm_log_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
-{
- gf1_cli_fsm_log_rsp rsp = {0,};
- int ret = -1;
- dict_t *dict = NULL;
- int tr_count = 0;
- char key[256] = {0};
- int i = 0;
- char *old_state = NULL;
- char *new_state = NULL;
- char *event = NULL;
- char *time = NULL;
-
- if (-1 == req->rpc_status) {
- goto out;
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_vol_top(dict, rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
}
+ goto out;
+ }
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_fsm_log_rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- goto out;
- }
+ ret = dict_get_int32_sizen(dict, "count", &brick_count);
+ if (ret)
+ goto out;
+ keylen = snprintf(key, sizeof(key), "%d-top-op", 1);
+ ret = dict_get_int32n(dict, key, keylen, (int32_t *)&top_op);
+ if (ret)
+ goto out;
- if (rsp.op_ret) {
- if (strcmp (rsp.op_errstr, ""))
- cli_err ("%s", rsp.op_errstr);
- cli_err ("fsm log unsuccessful");
- ret = rsp.op_ret;
- goto out;
- }
+ clear_stats = dict_get_str_boolean(dict, "clear-stats", _gf_false);
- dict = dict_new ();
- if (!dict) {
- ret = -1;
- goto out;
- }
+ while (i < brick_count) {
+ i++;
+ keylen = snprintf(brick, sizeof(brick), "%d-brick", i);
+ ret = dict_get_strn(dict, brick, keylen, &bricks);
+ if (ret)
+ goto out;
- ret = dict_unserialize (rsp.fsm_log.fsm_log_val,
- rsp.fsm_log.fsm_log_len,
- &dict);
+ nfs = dict_get_str_boolean(dict, "nfs", _gf_false);
- if (ret) {
- cli_err ("bad response");
+ if (clear_stats) {
+ keylen = snprintf(key, sizeof(key), "%d-stats-cleared", i);
+ ret = dict_get_int32n(dict, key, keylen, &stats_cleared);
+ if (ret)
goto out;
+ cli_out(stats_cleared ? "Cleared stats for %s %s"
+ : "Failed to clear stats for %s %s",
+ nfs ? "NFS server on" : "brick", bricks);
+ continue;
}
- ret = dict_get_int32 (dict, "count", &tr_count);
- if (tr_count)
- cli_out("number of transitions: %d", tr_count);
+ if (nfs)
+ cli_out("NFS Server : %s", bricks);
else
- cli_out("No transitions");
- for (i = 0; i < tr_count; i++) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "log%d-old-state", i);
- ret = dict_get_str (dict, key, &old_state);
- if (ret)
- goto out;
+ cli_out("Brick: %s", bricks);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "log%d-event", i);
- ret = dict_get_str (dict, key, &event);
- if (ret)
- goto out;
+ keylen = snprintf(key, sizeof(key), "%d-members", i);
+ ret = dict_get_int32n(dict, key, keylen, &members);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "log%d-new-state", i);
- ret = dict_get_str (dict, key, &new_state);
+ switch (top_op) {
+ case GF_CLI_TOP_OPEN:
+ snprintf(key, sizeof(key), "%d-current-open", i);
+ ret = dict_get_uint64(dict, key, &nr_open);
if (ret)
- goto out;
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "log%d-time", i);
- ret = dict_get_str (dict, key, &time);
+ break;
+ snprintf(key, sizeof(key), "%d-max-open", i);
+ ret = dict_get_uint64(dict, key, &max_nr_open);
+ if (ret)
+ goto out;
+ keylen = snprintf(key, sizeof(key), "%d-max-openfd-time", i);
+ ret = dict_get_strn(dict, key, keylen, &openfd_str);
if (ret)
- goto out;
- cli_out ("Old State: [%s]\n"
- "New State: [%s]\n"
- "Event : [%s]\n"
- "timestamp: [%s]\n", old_state, new_state, event, time);
+ goto out;
+ cli_out("Current open fds: %" PRIu64 ", Max open fds: %" PRIu64
+ ", Max openfd time: %s",
+ nr_open, max_nr_open, openfd_str);
+ case GF_CLI_TOP_READ:
+ case GF_CLI_TOP_WRITE:
+ case GF_CLI_TOP_OPENDIR:
+ case GF_CLI_TOP_READDIR:
+ if (!members) {
+ continue;
+ }
+ cli_out("Count\t\tfilename\n=======================");
+ break;
+ case GF_CLI_TOP_READ_PERF:
+ case GF_CLI_TOP_WRITE_PERF:
+ snprintf(key, sizeof(key), "%d-throughput", i);
+ ret = dict_get_double(dict, key, &throughput);
+ if (!ret) {
+ snprintf(key, sizeof(key), "%d-time", i);
+ ret = dict_get_double(dict, key, &time);
+ }
+ if (!ret)
+ cli_out("Throughput %.2f MBps time %.4f secs", throughput,
+ time / 1e6);
+
+ if (!members) {
+ continue;
+ }
+ cli_out("%*s %-*s %-*s", VOL_TOP_PERF_SPEED_WIDTH, "MBps",
+ VOL_TOP_PERF_FILENAME_DEF_WIDTH, "Filename",
+ VOL_TOP_PERF_TIME_WIDTH, "Time");
+ cli_out("%*s %-*s %-*s", VOL_TOP_PERF_SPEED_WIDTH,
+ "====", VOL_TOP_PERF_FILENAME_DEF_WIDTH,
+ "========", VOL_TOP_PERF_TIME_WIDTH, "====");
+ break;
+ default:
+ goto out;
}
- ret = rsp.op_ret;
+ for (j = 1; j <= members; j++) {
+ keylen = snprintf(key, sizeof(key), "%d-filename-%d", i, j);
+ ret = dict_get_strn(dict, key, keylen, &filename);
+ if (ret)
+ break;
+ snprintf(key, sizeof(key), "%d-value-%d", i, j);
+ ret = dict_get_uint64(dict, key, &value);
+ if (ret)
+ goto out;
+ if (top_op == GF_CLI_TOP_READ_PERF ||
+ top_op == GF_CLI_TOP_WRITE_PERF) {
+ keylen = snprintf(key, sizeof(key), "%d-time-sec-%d", i, j);
+ ret = dict_get_int32n(dict, key, keylen, (int32_t *)&time_sec);
+ if (ret)
+ goto out;
+ keylen = snprintf(key, sizeof(key), "%d-time-usec-%d", i, j);
+ ret = dict_get_int32n(dict, key, keylen, (int32_t *)&time_usec);
+ if (ret)
+ goto out;
+ gf_time_fmt(timestr, sizeof timestr, time_sec, gf_timefmt_FT);
+ snprintf(timestr + strlen(timestr),
+ sizeof timestr - strlen(timestr), ".%ld", time_usec);
+ if (strlen(filename) < VOL_TOP_PERF_FILENAME_DEF_WIDTH)
+ cli_out("%*" PRIu64 " %-*s %-*s", VOL_TOP_PERF_SPEED_WIDTH,
+ value, VOL_TOP_PERF_FILENAME_DEF_WIDTH, filename,
+ VOL_TOP_PERF_TIME_WIDTH, timestr);
+ else
+ cli_out("%*" PRIu64 " ...%-*s %-*s",
+ VOL_TOP_PERF_SPEED_WIDTH, value,
+ VOL_TOP_PERF_FILENAME_ALT_WIDTH,
+ filename + strlen(filename) -
+ VOL_TOP_PERF_FILENAME_ALT_WIDTH,
+ VOL_TOP_PERF_TIME_WIDTH, timestr);
+ } else {
+ cli_out("%" PRIu64 "\t\t%s", value, filename);
+ }
+ }
+ }
+ ret = rsp.op_ret;
out:
- cli_cmd_broadcast_response (ret);
- return ret;
+ cli_cmd_broadcast_response(ret);
+
+ if (dict)
+ dict_unref(dict);
+
+ gf_free_xdr_cli_rsp(rsp);
+ return ret;
}
-int32_t
-gf_cli3_1_fsm_log (call_frame_t *frame, xlator_t *this, void *data)
+static int32_t
+gf_cli_top_volume(call_frame_t *frame, xlator_t *this, void *data)
+{
+ int ret = -1;
+ gf_cli_req req = {{
+ 0,
+ }};
+ dict_t *dict = NULL;
+
+ GF_ASSERT(frame);
+ GF_ASSERT(this);
+ GF_ASSERT(data);
+
+ dict = data;
+
+ ret = cli_to_glusterd(&req, frame, gf_cli_top_volume_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict,
+ GLUSTER_CLI_PROFILE_VOLUME, this, cli_rpc_prog, NULL);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
+ GF_FREE(req.dict.dict_val);
+ return ret;
+}
+
+static int
+gf_cli_getwd_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
{
- int ret = -1;
- gf1_cli_fsm_log_req req = {0,};
+ gf1_cli_getwd_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+
+ GF_ASSERT(myframe);
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf1_cli_getwd_rsp);
+ if (ret < 0) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ if (rsp.op_ret == -1) {
+ cli_err("getwd failed");
+ ret = rsp.op_ret;
+ goto out;
+ }
- GF_ASSERT (frame);
- GF_ASSERT (this);
- GF_ASSERT (data);
+ gf_log("cli", GF_LOG_INFO, "Received resp to getwd");
- if (!frame || !this || !data)
- goto out;
- req.name = data;
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_FSM_LOG, NULL,
- this, gf_cli3_1_fsm_log_cbk,
- (xdrproc_t) xdr_gf1_cli_fsm_log_req);
+ cli_out("%s", rsp.wd);
+
+ ret = 0;
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ cli_cmd_broadcast_response(ret);
+ if (rsp.wd) {
+ free(rsp.wd);
+ }
- return ret;
+ return ret;
}
-int
-gf_cli3_1_gsync_config_command (dict_t *dict)
+static int32_t
+gf_cli_getwd(call_frame_t *frame, xlator_t *this, void *data)
{
- runner_t runner = {0,};
- char *subop = NULL;
- char *gwd = NULL;
- char *slave = NULL;
- char *master = NULL;
- char *op_name = NULL;
+ int ret = -1;
+ gf1_cli_getwd_req req = {
+ 0,
+ };
- if (dict_get_str (dict, "subop", &subop) != 0)
- return -1;
+ GF_ASSERT(frame);
+ GF_ASSERT(this);
- if (strcmp (subop, "get") != 0 && strcmp (subop, "get-all") != 0) {
- cli_out (GEOREP" config updated successfully");
- return 0;
- }
+ if (!frame || !this)
+ goto out;
- if (dict_get_str (dict, "glusterd_workdir", &gwd) != 0 ||
- dict_get_str (dict, "slave", &slave) != 0)
- return -1;
+ ret = cli_cmd_submit(NULL, &req, frame, cli_rpc_prog, GLUSTER_CLI_GETWD,
+ NULL, this, gf_cli_getwd_cbk,
+ (xdrproc_t)xdr_gf1_cli_getwd_req);
- if (dict_get_str (dict, "master", &master) != 0)
- master = NULL;
- if (dict_get_str (dict, "op_name", &op_name) != 0)
- op_name = NULL;
-
- runinit (&runner);
- runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", "-c", NULL);
- runner_argprintf (&runner, "%s/"GSYNC_CONF, gwd);
- if (master)
- runner_argprintf (&runner, ":%s", master);
- runner_add_arg (&runner, slave);
- runner_argprintf (&runner, "--config-%s", subop);
- if (op_name)
- runner_add_arg (&runner, op_name);
+out:
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
- return runner_run (&runner);
+ return ret;
}
-int
-gf_cli3_1_gsync_out_status (dict_t *dict)
+static void
+cli_print_volume_status_mempool(dict_t *dict, char *prefix)
{
- int gsync_count = 0;
- int i = 0;
- int ret = 0;
- char mst[PATH_MAX] = {0, };
- char slv[PATH_MAX]= {0, };
- char sts[PATH_MAX] = {0, };
- char hyphens[81] = {0, };
- char *mst_val = NULL;
- char *slv_val = NULL;
- char *sts_val = NULL;
+ int ret = -1;
+ int32_t mempool_count = 0;
+ char *name = NULL;
+ int32_t hotcount = 0;
+ int32_t coldcount = 0;
+ uint64_t paddedsizeof = 0;
+ uint64_t alloccount = 0;
+ int32_t maxalloc = 0;
+ uint64_t pool_misses = 0;
+ int32_t maxstdalloc = 0;
+ char key[128] = {
+ /* prefix is really small 'brick%d' really */
+ 0,
+ };
+ int keylen;
+ int i = 0;
+
+ GF_ASSERT(dict);
+ GF_ASSERT(prefix);
+
+ keylen = snprintf(key, sizeof(key), "%s.mempool-count", prefix);
+ ret = dict_get_int32n(dict, key, keylen, &mempool_count);
+ if (ret)
+ goto out;
+
+ cli_out("Mempool Stats\n-------------");
+ cli_out("%-30s %9s %9s %12s %10s %8s %8s %12s", "Name", "HotCount",
+ "ColdCount", "PaddedSizeof", "AllocCount", "MaxAlloc", "Misses",
+ "Max-StdAlloc");
+ cli_out("%-30s %9s %9s %12s %10s %8s %8s %12s", "----", "--------",
+ "---------", "------------", "----------", "--------", "--------",
+ "------------");
+
+ for (i = 0; i < mempool_count; i++) {
+ keylen = snprintf(key, sizeof(key), "%s.pool%d.name", prefix, i);
+ ret = dict_get_strn(dict, key, keylen, &name);
+ if (ret)
+ goto out;
- cli_out ("%-20s %-50s %-10s", "MASTER", "SLAVE", "STATUS");
+ keylen = snprintf(key, sizeof(key), "%s.pool%d.hotcount", prefix, i);
+ ret = dict_get_int32n(dict, key, keylen, &hotcount);
+ if (ret)
+ goto out;
- for (i=0; i<sizeof(hyphens)-1; i++)
- hyphens[i] = '-';
+ keylen = snprintf(key, sizeof(key), "%s.pool%d.coldcount", prefix, i);
+ ret = dict_get_int32n(dict, key, keylen, &coldcount);
+ if (ret)
+ goto out;
- cli_out ("%s", hyphens);
+ snprintf(key, sizeof(key), "%s.pool%d.paddedsizeof", prefix, i);
+ ret = dict_get_uint64(dict, key, &paddedsizeof);
+ if (ret)
+ goto out;
+ snprintf(key, sizeof(key), "%s.pool%d.alloccount", prefix, i);
+ ret = dict_get_uint64(dict, key, &alloccount);
+ if (ret)
+ goto out;
- ret = dict_get_int32 (dict, "gsync-count", &gsync_count);
- if (ret) {
- gf_log ("cli", GF_LOG_INFO, "No active geo-replication sessions"
- "present for the selected");
- ret = 0;
- goto out;
- }
+ keylen = snprintf(key, sizeof(key), "%s.pool%d.max_alloc", prefix, i);
+ ret = dict_get_int32n(dict, key, keylen, &maxalloc);
+ if (ret)
+ goto out;
- for (i = 1; i <= gsync_count; i++) {
- snprintf (mst, sizeof(mst), "master%d", i);
- snprintf (slv, sizeof(slv), "slave%d", i);
- snprintf (sts, sizeof(sts), "status%d", i);
+ keylen = snprintf(key, sizeof(key), "%s.pool%d.max-stdalloc", prefix,
+ i);
+ ret = dict_get_int32n(dict, key, keylen, &maxstdalloc);
+ if (ret)
+ goto out;
- ret = dict_get_str (dict, mst, &mst_val);
- if (ret)
- goto out;
+ snprintf(key, sizeof(key), "%s.pool%d.pool-misses", prefix, i);
+ ret = dict_get_uint64(dict, key, &pool_misses);
+ if (ret)
+ goto out;
- ret = dict_get_str (dict, slv, &slv_val);
- if (ret)
- goto out;
+ cli_out("%-30s %9d %9d %12" PRIu64 " %10" PRIu64 " %8d %8" PRIu64
+ " %12d",
+ name, hotcount, coldcount, paddedsizeof, alloccount, maxalloc,
+ pool_misses, maxstdalloc);
+ }
- ret = dict_get_str (dict, sts, &sts_val);
- if (ret)
- goto out;
+out:
+ return;
+}
- cli_out ("%-20s %-50s %-10s", mst_val,
- slv_val, sts_val);
+static void
+cli_print_volume_status_mem(dict_t *dict, gf_boolean_t notbrick)
+{
+ int ret = -1;
+ char *volname = NULL;
+ char *hostname = NULL;
+ char *path = NULL;
+ int online = -1;
+ char key[64] = {
+ 0,
+ };
+ int brick_index_max = -1;
+ int other_count = 0;
+ int index_max = 0;
+ int val = 0;
+ int i = 0;
+
+ GF_ASSERT(dict);
+
+ ret = dict_get_str_sizen(dict, "volname", &volname);
+ if (ret)
+ goto out;
+ cli_out("Memory status for volume : %s", volname);
+
+ ret = dict_get_int32_sizen(dict, "brick-index-max", &brick_index_max);
+ if (ret)
+ goto out;
+ ret = dict_get_int32_sizen(dict, "other-count", &other_count);
+ if (ret)
+ goto out;
+
+ index_max = brick_index_max + other_count;
+
+ for (i = 0; i <= index_max; i++) {
+ cli_out("----------------------------------------------");
+
+ snprintf(key, sizeof(key), "brick%d.hostname", i);
+ ret = dict_get_str(dict, key, &hostname);
+ if (ret)
+ continue;
+ snprintf(key, sizeof(key), "brick%d.path", i);
+ ret = dict_get_str(dict, key, &path);
+ if (ret)
+ continue;
+ if (notbrick)
+ cli_out("%s : %s", hostname, path);
+ else
+ cli_out("Brick : %s:%s", hostname, path);
+ snprintf(key, sizeof(key), "brick%d.status", i);
+ ret = dict_get_int32(dict, key, &online);
+ if (ret)
+ goto out;
+ if (!online) {
+ if (notbrick)
+ cli_out("%s is offline", hostname);
+ else
+ cli_out("Brick is offline");
+ continue;
}
- out:
- return ret;
+ cli_out("Mallinfo\n--------");
-}
+ snprintf(key, sizeof(key), "brick%d.mallinfo.arena", i);
+ ret = dict_get_int32(dict, key, &val);
+ if (ret)
+ goto out;
+ cli_out("%-8s : %d", "Arena", val);
-int
-gf_cli3_1_gsync_set_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
-{
- int ret = -1;
- gf_cli_rsp rsp = {0, };
- dict_t *dict = NULL;
- char *gsync_status = NULL;
- char *master = NULL;
- char *slave = NULL;
- int32_t type = 0;
-
- if (req->rpc_status == -1) {
- ret = -1;
- goto out;
- }
+ snprintf(key, sizeof(key), "brick%d.mallinfo.ordblks", i);
+ ret = dict_get_int32(dict, key, &val);
+ if (ret)
+ goto out;
+ cli_out("%-8s : %d", "Ordblks", val);
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR,
- "Unable to get response structure");
- goto out;
- }
+ snprintf(key, sizeof(key), "brick%d.mallinfo.smblks", i);
+ ret = dict_get_int32(dict, key, &val);
+ if (ret)
+ goto out;
+ cli_out("%-8s : %d", "Smblks", val);
- dict = dict_new ();
+ snprintf(key, sizeof(key), "brick%d.mallinfo.hblks", i);
+ ret = dict_get_int32(dict, key, &val);
+ if (ret)
+ goto out;
+ cli_out("%-8s : %d", "Hblks", val);
- if (!dict) {
- ret = -1;
- goto out;
- }
+ snprintf(key, sizeof(key), "brick%d.mallinfo.hblkhd", i);
+ ret = dict_get_int32(dict, key, &val);
+ if (ret)
+ goto out;
+ cli_out("%-8s : %d", "Hblkhd", val);
- ret = dict_unserialize (rsp.dict.dict_val, rsp.dict.dict_len, &dict);
+ snprintf(key, sizeof(key), "brick%d.mallinfo.usmblks", i);
+ ret = dict_get_int32(dict, key, &val);
+ if (ret)
+ goto out;
+ cli_out("%-8s : %d", "Usmblks", val);
+ snprintf(key, sizeof(key), "brick%d.mallinfo.fsmblks", i);
+ ret = dict_get_int32(dict, key, &val);
if (ret)
- goto out;
+ goto out;
+ cli_out("%-8s : %d", "Fsmblks", val);
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_dict ("volGeoRep", dict, rsp.op_ret,
- rsp.op_errno, rsp.op_errstr);
- if (ret)
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
- goto out;
- }
-#endif
+ snprintf(key, sizeof(key), "brick%d.mallinfo.uordblks", i);
+ ret = dict_get_int32(dict, key, &val);
+ if (ret)
+ goto out;
+ cli_out("%-8s : %d", "Uordblks", val);
- if (rsp.op_ret) {
- cli_err ("%s", rsp.op_errstr ? rsp.op_errstr :
- GEOREP" command unsuccessful");
- ret = rsp.op_ret;
- goto out;
- }
+ snprintf(key, sizeof(key), "brick%d.mallinfo.fordblks", i);
+ ret = dict_get_int32(dict, key, &val);
+ if (ret)
+ goto out;
+ cli_out("%-8s : %d", "Fordblks", val);
- ret = dict_get_str (dict, "gsync-status", &gsync_status);
- if (!ret)
- cli_out ("%s", gsync_status);
- else
- ret = 0;
+ snprintf(key, sizeof(key), "brick%d.mallinfo.keepcost", i);
+ ret = dict_get_int32(dict, key, &val);
+ if (ret)
+ goto out;
+ cli_out("%-8s : %d", "Keepcost", val);
- ret = dict_get_int32 (dict, "type", &type);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "failed to get type");
- goto out;
+ cli_out(" ");
+ snprintf(key, sizeof(key), "brick%d", i);
+ cli_print_volume_status_mempool(dict, key);
+ }
+out:
+ cli_out("----------------------------------------------\n");
+ return;
+}
+
+static void
+cli_print_volume_status_client_list(dict_t *dict, gf_boolean_t notbrick)
+{
+ int ret = -1;
+ char *volname = NULL;
+ int client_count = 0;
+ int current_count = 0;
+ char key[64] = {
+ 0,
+ };
+ int i = 0;
+ int total = 0;
+ char *name = NULL;
+ gf_boolean_t is_fuse_done = _gf_false;
+ gf_boolean_t is_gfapi_done = _gf_false;
+ gf_boolean_t is_rebalance_done = _gf_false;
+ gf_boolean_t is_glustershd_done = _gf_false;
+ gf_boolean_t is_quotad_done = _gf_false;
+ gf_boolean_t is_snapd_done = _gf_false;
+
+ GF_ASSERT(dict);
+
+ ret = dict_get_str_sizen(dict, "volname", &volname);
+ if (ret)
+ goto out;
+ cli_out("Client connections for volume %s", volname);
+
+ ret = dict_get_int32_sizen(dict, "client-count", &client_count);
+ if (ret)
+ goto out;
+
+ cli_out("%-48s %15s", "Name", "count");
+ cli_out("%-48s %15s", "-----", "------");
+ for (i = 0; i < client_count; i++) {
+ name = NULL;
+ snprintf(key, sizeof(key), "client%d.name", i);
+ ret = dict_get_str(dict, key, &name);
+
+ if (!strncmp(name, "fuse", 4)) {
+ if (!is_fuse_done) {
+ is_fuse_done = _gf_true;
+ ret = dict_get_int32_sizen(dict, "fuse-count", &current_count);
+ if (ret)
+ goto out;
+ total = total + current_count;
+ goto print;
+ }
+ continue;
+ } else if (!strncmp(name, "gfapi", 5)) {
+ if (!is_gfapi_done) {
+ is_gfapi_done = _gf_true;
+ ret = dict_get_int32_sizen(dict, "gfapi-count", &current_count);
+ if (ret)
+ goto out;
+ total = total + current_count;
+ goto print;
+ }
+ continue;
+ } else if (!strcmp(name, "rebalance")) {
+ if (!is_rebalance_done) {
+ is_rebalance_done = _gf_true;
+ ret = dict_get_int32_sizen(dict, "rebalance-count",
+ &current_count);
+ if (ret)
+ goto out;
+ total = total + current_count;
+ goto print;
+ }
+ continue;
+ } else if (!strcmp(name, "glustershd")) {
+ if (!is_glustershd_done) {
+ is_glustershd_done = _gf_true;
+ ret = dict_get_int32_sizen(dict, "glustershd-count",
+ &current_count);
+ if (ret)
+ goto out;
+ total = total + current_count;
+ goto print;
+ }
+ continue;
+ } else if (!strcmp(name, "quotad")) {
+ if (!is_quotad_done) {
+ is_quotad_done = _gf_true;
+ ret = dict_get_int32_sizen(dict, "quotad-count",
+ &current_count);
+ if (ret)
+ goto out;
+ total = total + current_count;
+ goto print;
+ }
+ continue;
+ } else if (!strcmp(name, "snapd")) {
+ if (!is_snapd_done) {
+ is_snapd_done = _gf_true;
+ ret = dict_get_int32_sizen(dict, "snapd-count", &current_count);
+ if (ret)
+ goto out;
+ total = total + current_count;
+ goto print;
+ }
+ continue;
}
- switch (type) {
- case GF_GSYNC_OPTION_TYPE_START:
- case GF_GSYNC_OPTION_TYPE_STOP:
- if (dict_get_str (dict, "master", &master) != 0)
- master = "???";
- if (dict_get_str (dict, "slave", &slave) != 0)
- slave = "???";
+ print:
+ cli_out("%-48s %15d", name, current_count);
+ }
+out:
+ cli_out("\ntotal clients for volume %s : %d ", volname, total);
+ cli_out(
+ "-----------------------------------------------------------------\n");
+ return;
+}
- cli_out ("%s " GEOREP " session between %s & %s"
- " has been successful",
- type == GF_GSYNC_OPTION_TYPE_START ?
- "Starting" : "Stopping",
- master, slave);
- break;
+static void
+cli_print_volume_status_clients(dict_t *dict, gf_boolean_t notbrick)
+{
+ int ret = -1;
+ char *volname = NULL;
+ int brick_index_max = -1;
+ int other_count = 0;
+ int index_max = 0;
+ char *hostname = NULL;
+ char *path = NULL;
+ int online = -1;
+ int client_count = 0;
+ char *clientname = NULL;
+ uint64_t bytesread = 0;
+ uint64_t byteswrite = 0;
+ uint32_t opversion = 0;
+ char key[128] = {
+ 0,
+ };
+ int i = 0;
+ int j = 0;
+
+ GF_ASSERT(dict);
+
+ ret = dict_get_str_sizen(dict, "volname", &volname);
+ if (ret)
+ goto out;
+ cli_out("Client connections for volume %s", volname);
+
+ ret = dict_get_int32_sizen(dict, "brick-index-max", &brick_index_max);
+ if (ret)
+ goto out;
+ ret = dict_get_int32_sizen(dict, "other-count", &other_count);
+ if (ret)
+ goto out;
+
+ index_max = brick_index_max + other_count;
+
+ for (i = 0; i <= index_max; i++) {
+ hostname = NULL;
+ path = NULL;
+ online = -1;
+ client_count = 0;
+ clientname = NULL;
+ bytesread = 0;
+ byteswrite = 0;
+
+ cli_out("----------------------------------------------");
+
+ snprintf(key, sizeof(key), "brick%d.hostname", i);
+ ret = dict_get_str(dict, key, &hostname);
+
+ snprintf(key, sizeof(key), "brick%d.path", i);
+ ret = dict_get_str(dict, key, &path);
+
+ if (hostname && path) {
+ if (notbrick)
+ cli_out("%s : %s", hostname, path);
+ else
+ cli_out("Brick : %s:%s", hostname, path);
+ }
+ snprintf(key, sizeof(key), "brick%d.status", i);
+ ret = dict_get_int32(dict, key, &online);
+ if (!online) {
+ if (notbrick)
+ cli_out("%s is offline", hostname);
+ else
+ cli_out("Brick is offline");
+ continue;
+ }
+
+ snprintf(key, sizeof(key), "brick%d.clientcount", i);
+ ret = dict_get_int32(dict, key, &client_count);
+
+ if (hostname && path)
+ cli_out("Clients connected : %d", client_count);
+ if (client_count == 0)
+ continue;
+
+ cli_out("%-48s %15s %15s %15s", "Hostname", "BytesRead", "BytesWritten",
+ "OpVersion");
+ cli_out("%-48s %15s %15s %15s", "--------", "---------", "------------",
+ "---------");
+ for (j = 0; j < client_count; j++) {
+ snprintf(key, sizeof(key), "brick%d.client%d.hostname", i, j);
+ ret = dict_get_str(dict, key, &clientname);
+
+ snprintf(key, sizeof(key), "brick%d.client%d.bytesread", i, j);
+ ret = dict_get_uint64(dict, key, &bytesread);
+
+ snprintf(key, sizeof(key), "brick%d.client%d.byteswrite", i, j);
+ ret = dict_get_uint64(dict, key, &byteswrite);
+
+ snprintf(key, sizeof(key), "brick%d.client%d.opversion", i, j);
+ ret = dict_get_uint32(dict, key, &opversion);
+
+ cli_out("%-48s %15" PRIu64 " %15" PRIu64 " %15" PRIu32, clientname,
+ bytesread, byteswrite, opversion);
+ }
+ }
+out:
+ cli_out("----------------------------------------------\n");
+ return;
+}
- case GF_GSYNC_OPTION_TYPE_CONFIG:
- ret = gf_cli3_1_gsync_config_command (dict);
- break;
+#ifdef DEBUG /* this function is only used in debug */
+static void
+cli_print_volume_status_inode_entry(dict_t *dict, char *prefix)
+{
+ int ret = -1;
+ char key[1024] = {
+ 0,
+ };
+ char *gfid = NULL;
+ uint64_t nlookup = 0;
+ uint32_t ref = 0;
+ int ia_type = 0;
+ char inode_type;
+
+ GF_ASSERT(dict);
+ GF_ASSERT(prefix);
+
+ snprintf(key, sizeof(key), "%s.gfid", prefix);
+ ret = dict_get_str(dict, key, &gfid);
+ if (ret)
+ goto out;
+
+ snprintf(key, sizeof(key), "%s.nlookup", prefix);
+ ret = dict_get_uint64(dict, key, &nlookup);
+ if (ret)
+ goto out;
+
+ snprintf(key, sizeof(key), "%s.ref", prefix);
+ ret = dict_get_uint32(dict, key, &ref);
+ if (ret)
+ goto out;
+
+ snprintf(key, sizeof(key), "%s.ia_type", prefix);
+ ret = dict_get_int32(dict, key, &ia_type);
+ if (ret)
+ goto out;
+
+ switch (ia_type) {
+ case IA_IFREG:
+ inode_type = 'R';
+ break;
+ case IA_IFDIR:
+ inode_type = 'D';
+ break;
+ case IA_IFLNK:
+ inode_type = 'L';
+ break;
+ case IA_IFBLK:
+ inode_type = 'B';
+ break;
+ case IA_IFCHR:
+ inode_type = 'C';
+ break;
+ case IA_IFIFO:
+ inode_type = 'F';
+ break;
+ case IA_IFSOCK:
+ inode_type = 'S';
+ break;
+ default:
+ inode_type = 'I';
+ break;
+ }
- case GF_GSYNC_OPTION_TYPE_STATUS:
- ret = gf_cli3_1_gsync_out_status (dict);
- goto out;
- default:
- cli_out (GEOREP" command executed successfully");
- }
+ cli_out("%-40s %14" PRIu64 " %14" PRIu32 " %9c", gfid, nlookup, ref,
+ inode_type);
out:
+ return;
+}
+#endif
+
+static void
+cli_print_volume_status_itables(dict_t *dict, char *prefix)
+{
+ int ret = -1;
+ char key[1024] = {
+ 0,
+ };
+ uint32_t active_size = 0;
+ uint32_t lru_size = 0;
+ uint32_t purge_size = 0;
+ uint32_t lru_limit = 0;
+#ifdef DEBUG
+ int i = 0;
+#endif
+ GF_ASSERT(dict);
+ GF_ASSERT(prefix);
+
+ snprintf(key, sizeof(key), "%s.lru_limit", prefix);
+ ret = dict_get_uint32(dict, key, &lru_limit);
+ if (ret)
+ goto out;
+ cli_out("LRU limit : %u", lru_limit);
+
+ snprintf(key, sizeof(key), "%s.active_size", prefix);
+ ret = dict_get_uint32(dict, key, &active_size);
+ if (ret)
+ goto out;
+
+#ifdef DEBUG
+ if (active_size != 0) {
+ cli_out("Active inodes:");
+ cli_out("%-40s %14s %14s %9s", "GFID", "Lookups", "Ref", "IA type");
+ cli_out("%-40s %14s %14s %9s", "----", "-------", "---", "-------");
+ }
+ for (i = 0; i < active_size; i++) {
+ snprintf(key, sizeof(key), "%s.active%d", prefix, i);
+ cli_print_volume_status_inode_entry(dict, key);
+ }
+ cli_out(" ");
+#else
+ cli_out("Active Inodes : %u", active_size);
+
+#endif
- cli_cmd_broadcast_response (ret);
+ snprintf(key, sizeof(key), "%s.lru_size", prefix);
+ ret = dict_get_uint32(dict, key, &lru_size);
+ if (ret)
+ goto out;
+
+#ifdef DEBUG
+ if (lru_size != 0) {
+ cli_out("LRU inodes:");
+ cli_out("%-40s %14s %14s %9s", "GFID", "Lookups", "Ref", "IA type");
+ cli_out("%-40s %14s %14s %9s", "----", "-------", "---", "-------");
+ }
+ for (i = 0; i < lru_size; i++) {
+ snprintf(key, sizeof(key), "%s.lru%d", prefix, i);
+ cli_print_volume_status_inode_entry(dict, key);
+ }
+ cli_out(" ");
+#else
+ cli_out("LRU Inodes : %u", lru_size);
+#endif
- if (rsp.dict.dict_val)
- free (rsp.dict.dict_val);
+ snprintf(key, sizeof(key), "%s.purge_size", prefix);
+ ret = dict_get_uint32(dict, key, &purge_size);
+ if (ret)
+ goto out;
+#ifdef DEBUG
+ if (purge_size != 0) {
+ cli_out("Purged inodes:");
+ cli_out("%-40s %14s %14s %9s", "GFID", "Lookups", "Ref", "IA type");
+ cli_out("%-40s %14s %14s %9s", "----", "-------", "---", "-------");
+ }
+ for (i = 0; i < purge_size; i++) {
+ snprintf(key, sizeof(key), "%s.purge%d", prefix, i);
+ cli_print_volume_status_inode_entry(dict, key);
+ }
+#else
+ cli_out("Purge Inodes : %u", purge_size);
+#endif
- return ret;
+out:
+ return;
}
-int32_t
-gf_cli3_1_gsync_set (call_frame_t *frame, xlator_t *this,
- void *data)
+static void
+cli_print_volume_status_inode(dict_t *dict, gf_boolean_t notbrick)
{
- int ret = 0;
- dict_t *dict = NULL;
- gf_cli_req req = {{0,}};
+ int ret = -1;
+ char *volname = NULL;
+ int brick_index_max = -1;
+ int other_count = 0;
+ int index_max = 0;
+ char *hostname = NULL;
+ char *path = NULL;
+ int online = -1;
+ int conn_count = 0;
+ char key[64] = {
+ 0,
+ };
+ int i = 0;
+ int j = 0;
+
+ GF_ASSERT(dict);
+
+ ret = dict_get_str_sizen(dict, "volname", &volname);
+ if (ret)
+ goto out;
+ cli_out("Inode tables for volume %s", volname);
+
+ ret = dict_get_int32_sizen(dict, "brick-index-max", &brick_index_max);
+ if (ret)
+ goto out;
+ ret = dict_get_int32_sizen(dict, "other-count", &other_count);
+ if (ret)
+ goto out;
+
+ index_max = brick_index_max + other_count;
+
+ for (i = 0; i <= index_max; i++) {
+ cli_out("----------------------------------------------");
+
+ snprintf(key, sizeof(key), "brick%d.hostname", i);
+ ret = dict_get_str(dict, key, &hostname);
+ if (ret)
+ goto out;
+ snprintf(key, sizeof(key), "brick%d.path", i);
+ ret = dict_get_str(dict, key, &path);
+ if (ret)
+ goto out;
+ if (notbrick)
+ cli_out("%s : %s", hostname, path);
+ else
+ cli_out("Brick : %s:%s", hostname, path);
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
+ snprintf(key, sizeof(key), "brick%d.status", i);
+ ret = dict_get_int32(dict, key, &online);
+ if (ret)
+ goto out;
+ if (!online) {
+ if (notbrick)
+ cli_out("%s is offline", hostname);
+ else
+ cli_out("Brick is offline");
+ continue;
}
- dict = data;
+ snprintf(key, sizeof(key), "brick%d.conncount", i);
+ ret = dict_get_int32(dict, key, &conn_count);
+ if (ret)
+ goto out;
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *) &req.dict.dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to serialize the data");
+ for (j = 0; j < conn_count; j++) {
+ if (conn_count > 1)
+ cli_out("Connection %d:", j + 1);
- goto out;
+ snprintf(key, sizeof(key), "brick%d.conn%d.itable", i, j);
+ cli_print_volume_status_itables(dict, key);
+ cli_out(" ");
}
+ }
+out:
+ cli_out("----------------------------------------------");
+ return;
+}
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_GSYNC_SET, NULL,
- this, gf_cli3_1_gsync_set_cbk,
- (xdrproc_t) xdr_gf_cli_req);
+void
+cli_print_volume_status_fdtable(dict_t *dict, char *prefix)
+{
+ int ret = -1;
+ char key[256] = {
+ 0,
+ };
+ int refcount = 0;
+ uint32_t maxfds = 0;
+ int firstfree = 0;
+ int openfds = 0;
+ int fd_pid = 0;
+ int fd_refcount = 0;
+ int fd_flags = 0;
+ int i = 0;
+
+ GF_ASSERT(dict);
+ GF_ASSERT(prefix);
+
+ snprintf(key, sizeof(key), "%s.refcount", prefix);
+ ret = dict_get_int32(dict, key, &refcount);
+ if (ret)
+ goto out;
+
+ snprintf(key, sizeof(key), "%s.maxfds", prefix);
+ ret = dict_get_uint32(dict, key, &maxfds);
+ if (ret)
+ goto out;
+
+ snprintf(key, sizeof(key), "%s.firstfree", prefix);
+ ret = dict_get_int32(dict, key, &firstfree);
+ if (ret)
+ goto out;
+
+ cli_out("RefCount = %d MaxFDs = %d FirstFree = %d", refcount, maxfds,
+ firstfree);
+
+ snprintf(key, sizeof(key), "%s.openfds", prefix);
+ ret = dict_get_int32(dict, key, &openfds);
+ if (ret)
+ goto out;
+ if (0 == openfds) {
+ cli_err("No open fds");
+ goto out;
+ }
+
+ cli_out("%-19s %-19s %-19s %-19s", "FD Entry", "PID", "RefCount", "Flags");
+ cli_out("%-19s %-19s %-19s %-19s", "--------", "---", "--------", "-----");
+
+ for (i = 0; i < maxfds; i++) {
+ snprintf(key, sizeof(key), "%s.fdentry%d.pid", prefix, i);
+ ret = dict_get_int32(dict, key, &fd_pid);
+ if (ret)
+ continue;
-out:
- if (req.dict.dict_val)
- GF_FREE (req.dict.dict_val);
+ snprintf(key, sizeof(key), "%s.fdentry%d.refcount", prefix, i);
+ ret = dict_get_int32(dict, key, &fd_refcount);
+ if (ret)
+ continue;
- return ret;
-}
+ snprintf(key, sizeof(key), "%s.fdentry%d.flags", prefix, i);
+ ret = dict_get_int32(dict, key, &fd_flags);
+ if (ret)
+ continue;
+ cli_out("%-19d %-19d %-19d %-19d", i, fd_pid, fd_refcount, fd_flags);
+ }
-int
-cli_profile_info_percentage_cmp (void *a, void *b)
+out:
+ return;
+}
+
+static void
+cli_print_volume_status_fd(dict_t *dict, gf_boolean_t notbrick)
{
- cli_profile_info_t *ia = NULL;
- cli_profile_info_t *ib = NULL;
- int ret = 0;
+ int ret = -1;
+ char *volname = NULL;
+ int brick_index_max = -1;
+ int other_count = 0;
+ int index_max = 0;
+ char *hostname = NULL;
+ char *path = NULL;
+ int online = -1;
+ int conn_count = 0;
+ char key[64] = {
+ 0,
+ };
+ int i = 0;
+ int j = 0;
+
+ GF_ASSERT(dict);
+
+ ret = dict_get_str_sizen(dict, "volname", &volname);
+ if (ret)
+ goto out;
+ cli_out("FD tables for volume %s", volname);
+
+ ret = dict_get_int32_sizen(dict, "brick-index-max", &brick_index_max);
+ if (ret)
+ goto out;
+ ret = dict_get_int32_sizen(dict, "other-count", &other_count);
+ if (ret)
+ goto out;
+
+ index_max = brick_index_max + other_count;
+
+ for (i = 0; i <= index_max; i++) {
+ cli_out("----------------------------------------------");
+
+ snprintf(key, sizeof(key), "brick%d.hostname", i);
+ ret = dict_get_str(dict, key, &hostname);
+ if (ret)
+ goto out;
+ snprintf(key, sizeof(key), "brick%d.path", i);
+ ret = dict_get_str(dict, key, &path);
+ if (ret)
+ goto out;
- ia = a;
- ib = b;
- if (ia->percentage_avg_latency < ib->percentage_avg_latency)
- ret = -1;
- else if (ia->percentage_avg_latency > ib->percentage_avg_latency)
- ret = 1;
+ if (notbrick)
+ cli_out("%s : %s", hostname, path);
else
- ret = 0;
- return ret;
-}
-
+ cli_out("Brick : %s:%s", hostname, path);
-void
-cmd_profile_volume_brick_out (dict_t *dict, int count, int interval)
-{
- char key[256] = {0};
- int i = 0;
- uint64_t sec = 0;
- uint64_t r_count = 0;
- uint64_t w_count = 0;
- uint64_t rb_counts[32] = {0};
- uint64_t wb_counts[32] = {0};
- cli_profile_info_t profile_info[GF_FOP_MAXVALUE] = {{0}};
- char output[128] = {0};
- int per_line = 0;
- char read_blocks[128] = {0};
- char write_blocks[128] = {0};
- int index = 0;
- int is_header_printed = 0;
- int ret = 0;
- double total_percentage_latency = 0;
-
- for (i = 0; i < 32; i++) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-%d-read-%d", count,
- interval, (1 << i));
- ret = dict_get_uint64 (dict, key, &rb_counts[i]);
- }
-
- for (i = 0; i < 32; i++) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-%d-write-%d", count, interval,
- (1<<i));
- ret = dict_get_uint64 (dict, key, &wb_counts[i]);
+ snprintf(key, sizeof(key), "brick%d.status", i);
+ ret = dict_get_int32(dict, key, &online);
+ if (ret)
+ goto out;
+ if (!online) {
+ if (notbrick)
+ cli_out("%s is offline", hostname);
+ else
+ cli_out("Brick is offline");
+ continue;
}
- for (i = 0; i < GF_FOP_MAXVALUE; i++) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-%d-%d-hits", count,
- interval, i);
- ret = dict_get_uint64 (dict, key, &profile_info[i].fop_hits);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-%d-%d-avglatency", count,
- interval, i);
- ret = dict_get_double (dict, key, &profile_info[i].avg_latency);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-%d-%d-minlatency", count,
- interval, i);
- ret = dict_get_double (dict, key, &profile_info[i].min_latency);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-%d-%d-maxlatency", count,
- interval, i);
- ret = dict_get_double (dict, key, &profile_info[i].max_latency);
- profile_info[i].fop_name = gf_fop_list[i];
-
- total_percentage_latency +=
- (profile_info[i].fop_hits * profile_info[i].avg_latency);
- }
- if (total_percentage_latency) {
- for (i = 0; i < GF_FOP_MAXVALUE; i++) {
- profile_info[i].percentage_avg_latency = 100 * (
- (profile_info[i].avg_latency* profile_info[i].fop_hits) /
- total_percentage_latency);
- }
- gf_array_insertionsort (profile_info, 1, GF_FOP_MAXVALUE - 1,
- sizeof (cli_profile_info_t),
- cli_profile_info_percentage_cmp);
+ snprintf(key, sizeof(key), "brick%d.conncount", i);
+ ret = dict_get_int32(dict, key, &conn_count);
+ if (ret)
+ goto out;
+
+ for (j = 0; j < conn_count; j++) {
+ cli_out("Connection %d:", j + 1);
+
+ snprintf(key, sizeof(key), "brick%d.conn%d.fdtable", i, j);
+ cli_print_volume_status_fdtable(dict, key);
+ cli_out(" ");
}
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-%d-duration", count, interval);
- ret = dict_get_uint64 (dict, key, &sec);
+ }
+out:
+ cli_out("----------------------------------------------");
+ return;
+}
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-%d-total-read", count, interval);
- ret = dict_get_uint64 (dict, key, &r_count);
+static void
+cli_print_volume_status_call_frame(dict_t *dict, char *prefix)
+{
+ int ret = -1;
+ char key[1024] = {
+ 0,
+ };
+ int ref_count = 0;
+ char *translator = 0;
+ int complete = 0;
+ char *parent = NULL;
+ char *wind_from = NULL;
+ char *wind_to = NULL;
+ char *unwind_from = NULL;
+ char *unwind_to = NULL;
+
+ if (!dict || !prefix)
+ return;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-%d-total-write", count, interval);
- ret = dict_get_uint64 (dict, key, &w_count);
+ snprintf(key, sizeof(key), "%s.refcount", prefix);
+ ret = dict_get_int32(dict, key, &ref_count);
+ if (ret)
+ return;
- if (ret == 0) {
- }
+ snprintf(key, sizeof(key), "%s.translator", prefix);
+ ret = dict_get_str(dict, key, &translator);
+ if (ret)
+ return;
- if (interval == -1)
- cli_out ("Cumulative Stats:");
- else
- cli_out ("Interval %d Stats:", interval);
- snprintf (output, sizeof (output), "%14s", "Block Size:");
- snprintf (read_blocks, sizeof (read_blocks), "%14s", "No. of Reads:");
- snprintf (write_blocks, sizeof (write_blocks), "%14s", "No. of Writes:");
- index = 14;
- for (i = 0; i < 32; i++) {
- if ((rb_counts[i] == 0) && (wb_counts[i] == 0))
- continue;
- per_line++;
- snprintf (output+index, sizeof (output)-index, "%19db+ ", (1<<i));
- if (rb_counts[i]) {
- snprintf (read_blocks+index, sizeof (read_blocks)-index,
- "%21"PRId64" ", rb_counts[i]);
- } else {
- snprintf (read_blocks+index, sizeof (read_blocks)-index,
- "%21s ", "0");
- }
- if (wb_counts[i]) {
- snprintf (write_blocks+index, sizeof (write_blocks)-index,
- "%21"PRId64" ", wb_counts[i]);
- } else {
- snprintf (write_blocks+index, sizeof (write_blocks)-index,
- "%21s ", "0");
- }
- index += 22;
- if (per_line == 3) {
- cli_out ("%s", output);
- cli_out ("%s", read_blocks);
- cli_out ("%s", write_blocks);
- cli_out (" ");
- per_line = 0;
- memset (output, 0, sizeof (output));
- memset (read_blocks, 0, sizeof (read_blocks));
- memset (write_blocks, 0, sizeof (write_blocks));
- snprintf (output, sizeof (output), "%14s", "Block Size:");
- snprintf (read_blocks, sizeof (read_blocks), "%14s",
- "No. of Reads:");
- snprintf (write_blocks, sizeof (write_blocks), "%14s",
- "No. of Writes:");
- index = 14;
- }
- }
+ snprintf(key, sizeof(key), "%s.complete", prefix);
+ ret = dict_get_int32(dict, key, &complete);
+ if (ret)
+ return;
- if (per_line != 0) {
- cli_out ("%s", output);
- cli_out ("%s", read_blocks);
- cli_out ("%s", write_blocks);
- }
- for (i = 0; i < GF_FOP_MAXVALUE; i++) {
- if (profile_info[i].fop_hits == 0)
- continue;
- if (is_header_printed == 0) {
- cli_out ("%10s %13s %13s %13s %14s %11s", "%-latency",
- "Avg-latency", "Min-Latency", "Max-Latency",
- "No. of calls", "Fop");
- cli_out ("%10s %13s %13s %13s %14s %11s", "---------",
- "-----------", "-----------", "-----------",
- "------------", "----");
- is_header_printed = 1;
- }
- if (profile_info[i].fop_hits) {
- cli_out ("%10.2lf %10.2lf us %10.2lf us %10.2lf us"
- " %14"PRId64" %11s",
- profile_info[i].percentage_avg_latency,
- profile_info[i].avg_latency,
- profile_info[i].min_latency,
- profile_info[i].max_latency,
- profile_info[i].fop_hits,
- profile_info[i].fop_name);
- }
- }
- cli_out (" ");
- cli_out ("%12s: %"PRId64" seconds", "Duration", sec);
- cli_out ("%12s: %"PRId64" bytes", "Data Read", r_count);
- cli_out ("%12s: %"PRId64" bytes", "Data Written", w_count);
- cli_out (" ");
+ cli_out(" Ref Count = %d", ref_count);
+ cli_out(" Translator = %s", translator);
+ cli_out(" Completed = %s", (complete ? "Yes" : "No"));
+
+ snprintf(key, sizeof(key), "%s.parent", prefix);
+ ret = dict_get_str(dict, key, &parent);
+ if (!ret)
+ cli_out(" Parent = %s", parent);
+
+ snprintf(key, sizeof(key), "%s.windfrom", prefix);
+ ret = dict_get_str(dict, key, &wind_from);
+ if (!ret)
+ cli_out(" Wind From = %s", wind_from);
+
+ snprintf(key, sizeof(key), "%s.windto", prefix);
+ ret = dict_get_str(dict, key, &wind_to);
+ if (!ret)
+ cli_out(" Wind To = %s", wind_to);
+
+ snprintf(key, sizeof(key), "%s.unwindfrom", prefix);
+ ret = dict_get_str(dict, key, &unwind_from);
+ if (!ret)
+ cli_out(" Unwind From = %s", unwind_from);
+
+ snprintf(key, sizeof(key), "%s.unwindto", prefix);
+ ret = dict_get_str(dict, key, &unwind_to);
+ if (!ret)
+ cli_out(" Unwind To = %s", unwind_to);
}
-int32_t
-gf_cli3_1_profile_volume_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
-{
- gf_cli_rsp rsp = {0,};
- int ret = -1;
- dict_t *dict = NULL;
- gf1_cli_stats_op op = GF_CLI_STATS_NONE;
- char key[256] = {0};
- int interval = 0;
- int i = 1;
- int32_t brick_count = 0;
- char *volname = NULL;
- char *brick = NULL;
- char str[1024] = {0,};
-
- if (-1 == req->rpc_status) {
- goto out;
- }
+static void
+cli_print_volume_status_call_stack(dict_t *dict, char *prefix)
+{
+ int ret = -1;
+ char key[256] = {
+ 0,
+ };
+ int uid = 0;
+ int gid = 0;
+ int pid = 0;
+ uint64_t unique = 0;
+ // char *op = NULL;
+ int count = 0;
+ int i = 0;
+
+ if (!prefix)
+ return;
- gf_log ("cli", GF_LOG_DEBUG, "Received resp to profile");
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- goto out;
- }
+ snprintf(key, sizeof(key), "%s.uid", prefix);
+ ret = dict_get_int32(dict, key, &uid);
+ if (ret)
+ return;
- dict = dict_new ();
+ snprintf(key, sizeof(key), "%s.gid", prefix);
+ ret = dict_get_int32(dict, key, &gid);
+ if (ret)
+ return;
- if (!dict) {
- ret = -1;
- goto out;
- }
+ snprintf(key, sizeof(key), "%s.pid", prefix);
+ ret = dict_get_int32(dict, key, &pid);
+ if (ret)
+ return;
- ret = dict_unserialize (rsp.dict.dict_val,
- rsp.dict.dict_len,
- &dict);
+ snprintf(key, sizeof(key), "%s.unique", prefix);
+ ret = dict_get_uint64(dict, key, &unique);
+ if (ret)
+ return;
- if (ret) {
- gf_log ("", GF_LOG_ERROR,
- "Unable to allocate memory");
- goto out;
- } else {
- dict->extra_stdfree = rsp.dict.dict_val;
- }
+ /*
+ snprintf (key, sizeof (key), "%s.op", prefix);
+ ret = dict_get_str (dict, key, &op);
+ if (ret)
+ return;
+ */
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_vol_profile (dict, rsp.op_ret,
- rsp.op_errno,
- rsp.op_errstr);
- if (ret)
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
- goto out;
- }
-#endif
+ snprintf(key, sizeof(key), "%s.count", prefix);
+ ret = dict_get_int32(dict, key, &count);
+ if (ret)
+ return;
- ret = dict_get_str (dict, "volname", &volname);
- if (ret)
- goto out;
+ cli_out(" UID : %d", uid);
+ cli_out(" GID : %d", gid);
+ cli_out(" PID : %d", pid);
+ cli_out(" Unique : %" PRIu64, unique);
+ // cli_out ("\tOp : %s", op);
+ cli_out(" Frames : %d", count);
- ret = dict_get_int32 (dict, "op", (int32_t*)&op);
- if (ret)
- goto out;
+ for (i = 0; i < count; i++) {
+ cli_out(" Frame %d", i + 1);
- if (rsp.op_ret && strcmp (rsp.op_errstr, "")) {
- cli_err ("%s", rsp.op_errstr);
- } else {
- switch (op) {
- case GF_CLI_STATS_START:
- cli_out ("Starting volume profile on %s has been %s ",
- volname,
- (rsp.op_ret) ? "unsuccessful": "successful");
- break;
- case GF_CLI_STATS_STOP:
- cli_out ("Stopping volume profile on %s has been %s ",
- volname,
- (rsp.op_ret) ? "unsuccessful": "successful");
- break;
- case GF_CLI_STATS_INFO:
- break;
- default:
- cli_out ("volume profile on %s has been %s ",
- volname,
- (rsp.op_ret) ? "unsuccessful": "successful");
- break;
- }
- }
+ snprintf(key, sizeof(key), "%s.frame%d", prefix, i);
+ cli_print_volume_status_call_frame(dict, key);
+ }
- if (rsp.op_ret) {
- ret = rsp.op_ret;
- goto out;
- }
+ cli_out(" ");
+}
- if (op != GF_CLI_STATS_INFO) {
- ret = 0;
- goto out;
+static void
+cli_print_volume_status_callpool(dict_t *dict, gf_boolean_t notbrick)
+{
+ int ret = -1;
+ char *volname = NULL;
+ int brick_index_max = -1;
+ int other_count = 0;
+ int index_max = 0;
+ char *hostname = NULL;
+ char *path = NULL;
+ int online = -1;
+ int call_count = 0;
+ char key[64] = {
+ 0,
+ };
+ int i = 0;
+ int j = 0;
+
+ GF_ASSERT(dict);
+
+ ret = dict_get_str_sizen(dict, "volname", &volname);
+ if (ret)
+ goto out;
+ cli_out("Pending calls for volume %s", volname);
+
+ ret = dict_get_int32_sizen(dict, "brick-index-max", &brick_index_max);
+ if (ret)
+ goto out;
+ ret = dict_get_int32_sizen(dict, "other-count", &other_count);
+ if (ret)
+ goto out;
+
+ index_max = brick_index_max + other_count;
+
+ for (i = 0; i <= index_max; i++) {
+ cli_out("----------------------------------------------");
+
+ snprintf(key, sizeof(key), "brick%d.hostname", i);
+ ret = dict_get_str(dict, key, &hostname);
+ if (ret)
+ goto out;
+ snprintf(key, sizeof(key), "brick%d.path", i);
+ ret = dict_get_str(dict, key, &path);
+ if (ret)
+ goto out;
+
+ if (notbrick)
+ cli_out("%s : %s", hostname, path);
+ else
+ cli_out("Brick : %s:%s", hostname, path);
+
+ snprintf(key, sizeof(key), "brick%d.status", i);
+ ret = dict_get_int32(dict, key, &online);
+ if (ret)
+ goto out;
+ if (!online) {
+ if (notbrick)
+ cli_out("%s is offline", hostname);
+ else
+ cli_out("Brick is offline");
+ continue;
}
- ret = dict_get_int32 (dict, "count", &brick_count);
+ snprintf(key, sizeof(key), "brick%d.callpool.count", i);
+ ret = dict_get_int32(dict, key, &call_count);
if (ret)
- goto out;
+ goto out;
+ cli_out("Pending calls: %d", call_count);
- if (!brick_count) {
- cli_out ("All bricks of volume %s are down.", volname);
- goto out;
- }
+ if (0 == call_count)
+ continue;
- while (i <= brick_count) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-brick", i);
- ret = dict_get_str (dict, key, &brick);
- if (ret) {
- gf_log ("cli", GF_LOG_ERROR, "Couldn't get brick name");
- goto out;
- }
+ for (j = 0; j < call_count; j++) {
+ cli_out("Call Stack%d", j + 1);
- ret = dict_get_str_boolean (dict, "nfs", _gf_false);
- if (ret)
- snprintf (str, sizeof (str), "NFS Server : %s", brick);
- else
- snprintf (str, sizeof (str), "Brick: %s", brick);
- cli_out ("%s", str);
- memset (str, '-', strlen (str));
- cli_out ("%s", str);
-
- snprintf (key, sizeof (key), "%d-cumulative", i);
- ret = dict_get_int32 (dict, key, &interval);
- if (ret == 0) {
- cmd_profile_volume_brick_out (dict, i, interval);
- }
- snprintf (key, sizeof (key), "%d-interval", i);
- ret = dict_get_int32 (dict, key, &interval);
- if (ret == 0) {
- cmd_profile_volume_brick_out (dict, i, interval);
- }
- i++;
+ snprintf(key, sizeof(key), "brick%d.callpool.stack%d", i, j);
+ cli_print_volume_status_call_stack(dict, key);
}
- ret = rsp.op_ret;
+ }
out:
- if (dict)
- dict_unref (dict);
- if (rsp.op_errstr)
- free (rsp.op_errstr);
- cli_cmd_broadcast_response (ret);
- return ret;
+ cli_out("----------------------------------------------");
+ return;
}
-int32_t
-gf_cli3_1_profile_volume (call_frame_t *frame, xlator_t *this, void *data)
+static void
+cli_print_volume_status_tasks(dict_t *dict)
{
- int ret = -1;
- gf_cli_req req = {{0,}};
- dict_t *dict = NULL;
+ int ret = -1;
+ int i = 0;
+ int j = 0;
+ int count = 0;
+ int task_count = 0;
+ int status = 0;
+ char *op = NULL;
+ char *task_id_str = NULL;
+ char *volname = NULL;
+ char key[64] = {
+ 0,
+ };
+ char task[32] = {
+ 0,
+ };
+ char *brick = NULL;
+
+ ret = dict_get_int32_sizen(dict, "tasks", &task_count);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get tasks count");
+ return;
+ }
- GF_ASSERT (frame);
- GF_ASSERT (this);
- GF_ASSERT (data);
+ ret = dict_get_str_sizen(dict, "volname", &volname);
+ if (ret)
+ goto out;
- if (!frame || !this || !data)
- goto out;
- dict = data;
+ cli_out("Task Status of Volume %s", volname);
+ cli_print_line(CLI_BRICK_STATUS_LINE_LEN);
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
+ if (task_count == 0) {
+ cli_out("There are no active volume tasks");
+ cli_out(" ");
+ return;
+ }
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to serialize the data");
+ for (i = 0; i < task_count; i++) {
+ snprintf(key, sizeof(key), "task%d.type", i);
+ ret = dict_get_str(dict, key, &op);
+ if (ret)
+ return;
+ cli_out("%-20s : %-20s", "Task", op);
+
+ snprintf(key, sizeof(key), "task%d.id", i);
+ ret = dict_get_str(dict, key, &task_id_str);
+ if (ret)
+ return;
+ cli_out("%-20s : %-20s", "ID", task_id_str);
+
+ snprintf(key, sizeof(key), "task%d.status", i);
+ ret = dict_get_int32(dict, key, &status);
+ if (ret)
+ return;
+
+ snprintf(task, sizeof(task), "task%d", i);
+ if (!strcmp(op, "Remove brick")) {
+ snprintf(key, sizeof(key), "%s.count", task);
+ ret = dict_get_int32(dict, key, &count);
+ if (ret)
goto out;
- }
+ cli_out("%-20s", "Removed bricks:");
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_PROFILE_VOLUME, NULL,
- this, gf_cli3_1_profile_volume_cbk,
- (xdrproc_t) xdr_gf_cli_req);
+ for (j = 1; j <= count; j++) {
+ snprintf(key, sizeof(key), "%s.brick%d", task, j);
+ ret = dict_get_str(dict, key, &brick);
+ if (ret)
+ goto out;
-out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ cli_out("%-20s", brick);
+ }
+ }
+ cli_out("%-20s : %-20s", "Status", cli_vol_task_status_str[status]);
+ cli_out(" ");
+ }
- if (req.dict.dict_val)
- GF_FREE (req.dict.dict_val);
- return ret;
+out:
+ return;
}
-int32_t
-gf_cli3_1_top_volume_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
-{
- gf_cli_rsp rsp = {0,};
- int ret = -1;
- dict_t *dict = NULL;
- gf1_cli_stats_op op = GF_CLI_STATS_NONE;
- char key[256] = {0};
- int i = 0;
- int32_t brick_count = 0;
- char brick[1024];
- int32_t members = 0;
- char *filename;
- char *bricks;
- uint64_t value = 0;
- int32_t j = 0;
- gf1_cli_top_op top_op = GF_CLI_TOP_NONE;
- uint64_t nr_open = 0;
- uint64_t max_nr_open = 0;
- double throughput = 0;
- double time = 0;
- long int time_sec = 0;
- long int time_usec = 0;
- struct tm *tm = NULL;
- char timestr[256] = {0, };
- char *openfd_str = NULL;
-
- if (-1 == req->rpc_status) {
- goto out;
+static int
+gf_cli_status_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ int ret = -1;
+ int brick_index_max = -1;
+ int other_count = 0;
+ int index_max = 0;
+ int i = 0;
+ int type = -1;
+ int hot_brick_count = -1;
+ int pid = -1;
+ uint32_t cmd = 0;
+ gf_boolean_t notbrick = _gf_false;
+ char key[64] = {
+ 0,
+ };
+ char *hostname = NULL;
+ char *path = NULL;
+ char *volname = NULL;
+ dict_t *dict = NULL;
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ cli_volume_status_t status = {0};
+ cli_local_t *local = NULL;
+ gf_boolean_t wipe_local = _gf_false;
+ char msg[1024] = {
+ 0,
+ };
+
+ GF_ASSERT(myframe);
+
+ if (req->rpc_status == -1)
+ goto out;
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ gf_log("cli", GF_LOG_DEBUG, "Received response to status cmd");
+
+ local = ((call_frame_t *)myframe)->local;
+ if (!local) {
+ local = cli_local_get();
+ if (!local) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Failed to get local");
+ goto out;
}
+ wipe_local = _gf_true;
+ }
- gf_log ("cli", GF_LOG_DEBUG, "Received resp to top");
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "Unable to decode response");
- goto out;
- }
+ if (rsp.op_ret) {
+ if (strcmp(rsp.op_errstr, ""))
+ snprintf(msg, sizeof(msg), "%s", rsp.op_errstr);
+ else
+ snprintf(msg, sizeof(msg),
+ "Unable to obtain volume status information.");
- if (rsp.op_ret) {
- if (strcmp (rsp.op_errstr, ""))
- cli_err ("%s", rsp.op_errstr);
- cli_err ("volume top unsuccessful");
- ret = rsp.op_ret;
- goto out;
- }
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ if (!local->all)
+ cli_xml_output_str("volStatus", msg, rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
+ ret = 0;
+ goto out;
+ }
+
+ cli_err("%s", msg);
+ if (local && local->all) {
+ ret = 0;
+ cli_out(" ");
+ } else
+ ret = -1;
+
+ goto out;
+ }
+
+ dict = dict_new();
+ if (!dict) {
+ gf_log(THIS->name, GF_LOG_ERROR, "Failed to create the dict");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
+ if (ret)
+ goto out;
+
+ ret = dict_get_uint32(dict, "cmd", &cmd);
+ if (ret)
+ goto out;
+
+ if ((cmd & GF_CLI_STATUS_ALL)) {
+ if (local && local->dict) {
+ dict_ref(dict);
+ ret = dict_set_static_ptr(local->dict, "rsp-dict", dict);
+ ret = 0;
+ } else {
+ gf_log("cli", GF_LOG_ERROR, "local not found");
+ ret = -1;
+ }
+ goto out;
+ }
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ if (!local->all) {
+ ret = cli_xml_output_vol_status_begin(local, rsp.op_ret,
+ rsp.op_errno, rsp.op_errstr);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto xml_end;
+ }
+ }
+ if (cmd & GF_CLI_STATUS_TASKS) {
+ ret = cli_xml_output_vol_status_tasks_detail(local, dict);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Error outputting to xml");
+ goto xml_end;
+ }
+ } else {
+ ret = cli_xml_output_vol_status(local, dict);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto xml_end;
+ }
+ }
+
+ xml_end:
+ if (!local->all) {
+ ret = cli_xml_output_vol_status_end(local);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ }
+ }
+ goto out;
+ }
+
+ if ((cmd & GF_CLI_STATUS_NFS) || (cmd & GF_CLI_STATUS_SHD) ||
+ (cmd & GF_CLI_STATUS_QUOTAD) || (cmd & GF_CLI_STATUS_SNAPD) ||
+ (cmd & GF_CLI_STATUS_BITD) || (cmd & GF_CLI_STATUS_SCRUB))
+ notbrick = _gf_true;
+
+ switch (cmd & GF_CLI_STATUS_MASK) {
+ case GF_CLI_STATUS_MEM:
+ cli_print_volume_status_mem(dict, notbrick);
+ goto cont;
+ break;
+ case GF_CLI_STATUS_CLIENTS:
+ cli_print_volume_status_clients(dict, notbrick);
+ goto cont;
+ break;
+ case GF_CLI_STATUS_CLIENT_LIST:
+ cli_print_volume_status_client_list(dict, notbrick);
+ goto cont;
+ break;
+ case GF_CLI_STATUS_INODE:
+ cli_print_volume_status_inode(dict, notbrick);
+ goto cont;
+ break;
+ case GF_CLI_STATUS_FD:
+ cli_print_volume_status_fd(dict, notbrick);
+ goto cont;
+ break;
+ case GF_CLI_STATUS_CALLPOOL:
+ cli_print_volume_status_callpool(dict, notbrick);
+ goto cont;
+ break;
+ case GF_CLI_STATUS_TASKS:
+ cli_print_volume_status_tasks(dict);
+ goto cont;
+ break;
+ default:
+ break;
+ }
- dict = dict_new ();
+ ret = dict_get_str_sizen(dict, "volname", &volname);
+ if (ret)
+ goto out;
- if (!dict) {
- ret = -1;
- goto out;
- }
+ ret = dict_get_int32_sizen(dict, "brick-index-max", &brick_index_max);
+ if (ret)
+ goto out;
- ret = dict_unserialize (rsp.dict.dict_val,
- rsp.dict.dict_len,
- &dict);
+ ret = dict_get_int32_sizen(dict, "other-count", &other_count);
+ if (ret)
+ goto out;
- if (ret) {
- gf_log ("", GF_LOG_ERROR,
- "Unable to allocate memory");
- goto out;
- }
+ index_max = brick_index_max + other_count;
- ret = dict_get_int32 (dict, "op", (int32_t*)&op);
+ ret = dict_get_int32_sizen(dict, "type", &type);
+ if (ret)
+ goto out;
- if (op != GF_CLI_STATS_TOP) {
- ret = 0;
- goto out;
- }
+ ret = dict_get_int32_sizen(dict, "hot_brick_count", &hot_brick_count);
+ if (ret)
+ goto out;
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_vol_top (dict, rsp.op_ret,
- rsp.op_errno,
- rsp.op_errstr);
- if (ret) {
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
- }
- goto out;
+ cli_out("Status of volume: %s", volname);
+
+ if ((cmd & GF_CLI_STATUS_DETAIL) == 0) {
+ cli_out("%-*s %s %s %s %s", CLI_VOL_STATUS_BRICK_LEN,
+ "Gluster process", "TCP Port", "RDMA Port", "Online", "Pid");
+ cli_print_line(CLI_BRICK_STATUS_LINE_LEN);
+ }
+
+ status.brick = GF_MALLOC(PATH_MAX + 256, gf_common_mt_strdup);
+ if (!status.brick) {
+ errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ for (i = 0; i <= index_max; i++) {
+ status.rdma_port = 0;
+
+ snprintf(key, sizeof(key), "brick%d.hostname", i);
+ ret = dict_get_str(dict, key, &hostname);
+ if (ret)
+ continue;
+
+ snprintf(key, sizeof(key), "brick%d.path", i);
+ ret = dict_get_str(dict, key, &path);
+ if (ret)
+ continue;
+
+ /* Brick/not-brick is handled separately here as all
+ * types of nodes are contained in the default output
+ */
+ status.brick[0] = '\0';
+ if (!strcmp(hostname, "NFS Server") ||
+ !strcmp(hostname, "Self-heal Daemon") ||
+ !strcmp(hostname, "Quota Daemon") ||
+ !strcmp(hostname, "Snapshot Daemon") ||
+ !strcmp(hostname, "Scrubber Daemon") ||
+ !strcmp(hostname, "Bitrot Daemon"))
+ snprintf(status.brick, PATH_MAX + 255, "%s on %s", hostname, path);
+ else {
+ snprintf(key, sizeof(key), "brick%d.rdma_port", i);
+ ret = dict_get_int32(dict, key, &(status.rdma_port));
+ if (ret)
+ continue;
+ snprintf(status.brick, PATH_MAX + 255, "Brick %s:%s", hostname,
+ path);
}
-#endif
- ret = dict_get_int32 (dict, "count", &brick_count);
+ snprintf(key, sizeof(key), "brick%d.port", i);
+ ret = dict_get_int32(dict, key, &(status.port));
if (ret)
- goto out;
- snprintf (key, sizeof (key), "%d-top-op", 1);
- ret = dict_get_int32 (dict, key, (int32_t*)&top_op);
+ continue;
+
+ snprintf(key, sizeof(key), "brick%d.status", i);
+ ret = dict_get_int32(dict, key, &(status.online));
if (ret)
- goto out;
- while (i < brick_count) {
- i++;
- snprintf (brick, sizeof (brick), "%d-brick", i);
- ret = dict_get_str (dict, brick, &bricks);
- if (ret)
- goto out;
- ret = dict_get_str_boolean (dict, "nfs", _gf_false);
- if (ret)
- cli_out ("NFS Server : %s", bricks);
- else
- cli_out ("Brick: %s", bricks);
-
- snprintf(key, sizeof (key), "%d-members", i);
- ret = dict_get_int32 (dict, key, &members);
-
- switch (top_op) {
- case GF_CLI_TOP_OPEN:
- snprintf (key, sizeof (key), "%d-current-open", i);
- ret = dict_get_uint64 (dict, key, &nr_open);
- if (ret)
- break;
- snprintf (key, sizeof (key), "%d-max-open", i);
- ret = dict_get_uint64 (dict, key, &max_nr_open);
- if (ret)
- goto out;
- snprintf (key, sizeof (key), "%d-max-openfd-time", i);
- ret = dict_get_str (dict, key, &openfd_str);
- if (ret)
- goto out;
- cli_out ("Current open fds: %"PRIu64", Max open"
- " fds: %"PRIu64", Max openfd time: %s", nr_open,
- max_nr_open, openfd_str);
- case GF_CLI_TOP_READ:
- case GF_CLI_TOP_WRITE:
- case GF_CLI_TOP_OPENDIR:
- case GF_CLI_TOP_READDIR:
- if (!members) {
- continue;
- }
- cli_out ("Count\t\tfilename\n=======================");
- break;
- case GF_CLI_TOP_READ_PERF:
- case GF_CLI_TOP_WRITE_PERF:
- snprintf (key, sizeof (key), "%d-throughput", i);
- ret = dict_get_double (dict, key, &throughput);
- if (!ret) {
- snprintf (key, sizeof (key), "%d-time", i);
- ret = dict_get_double (dict, key, &time);
- }
- if (!ret)
- cli_out ("Throughput %.2f MBps time %.4f secs", throughput,
- time / 1e6);
-
- if (!members) {
- continue;
- }
- cli_out ("%*s %-*s %-*s",
- VOL_TOP_PERF_SPEED_WIDTH, "MBps",
- VOL_TOP_PERF_FILENAME_DEF_WIDTH, "Filename",
- VOL_TOP_PERF_TIME_WIDTH, "Time");
- cli_out ("%*s %-*s %-*s",
- VOL_TOP_PERF_SPEED_WIDTH, "====",
- VOL_TOP_PERF_FILENAME_DEF_WIDTH, "========",
- VOL_TOP_PERF_TIME_WIDTH, "====");
- break;
- default:
- goto out;
- }
+ continue;
- for (j = 1; j <= members; j++) {
- snprintf (key, sizeof (key), "%d-filename-%d", i, j);
- ret = dict_get_str (dict, key, &filename);
- if (ret)
- break;
- snprintf (key, sizeof (key), "%d-value-%d", i, j);
- ret = dict_get_uint64 (dict, key, &value);
- if (ret)
- goto out;
- if ( top_op == GF_CLI_TOP_READ_PERF ||
- top_op == GF_CLI_TOP_WRITE_PERF) {
- snprintf (key, sizeof (key), "%d-time-sec-%d", i, j);
- ret = dict_get_int32 (dict, key, (int32_t *)&time_sec);
- if (ret)
- goto out;
- snprintf (key, sizeof (key), "%d-time-usec-%d", i, j);
- ret = dict_get_int32 (dict, key, (int32_t *)&time_usec);
- if (ret)
- goto out;
- tm = localtime (&time_sec);
- if (!tm)
- goto out;
- strftime (timestr, 256, "%Y-%m-%d %H:%M:%S", tm);
- snprintf (timestr + strlen (timestr), 256 - strlen (timestr),
- ".%"GF_PRI_SUSECONDS, time_usec);
- if (strlen (filename) < VOL_TOP_PERF_FILENAME_DEF_WIDTH)
- cli_out ("%*"PRIu64" %-*s %-*s",
- VOL_TOP_PERF_SPEED_WIDTH,
- value,
- VOL_TOP_PERF_FILENAME_DEF_WIDTH,
- filename,
- VOL_TOP_PERF_TIME_WIDTH,
- timestr);
- else
- cli_out ("%*"PRIu64" ...%-*s %-*s",
- VOL_TOP_PERF_SPEED_WIDTH,
- value,
- VOL_TOP_PERF_FILENAME_ALT_WIDTH ,
- filename + strlen (filename) -
- VOL_TOP_PERF_FILENAME_ALT_WIDTH,
- VOL_TOP_PERF_TIME_WIDTH,
- timestr);
- } else {
- cli_out ("%"PRIu64"\t\t%s", value, filename);
- }
- }
+ snprintf(key, sizeof(key), "brick%d.pid", i);
+ ret = dict_get_int32(dict, key, &pid);
+ if (ret)
+ continue;
+ if (pid == -1)
+ ret = gf_asprintf(&(status.pid_str), "%s", "N/A");
+ else
+ ret = gf_asprintf(&(status.pid_str), "%d", pid);
+
+ if (ret == -1)
+ goto out;
+
+ if ((cmd & GF_CLI_STATUS_DETAIL)) {
+ ret = cli_get_detail_status(dict, i, &status);
+ if (ret)
+ goto out;
+ cli_print_line(CLI_BRICK_STATUS_LINE_LEN);
+ cli_print_detailed_status(&status);
+ } else {
+ cli_print_brick_status(&status);
}
- ret = rsp.op_ret;
-out:
- cli_cmd_broadcast_response (ret);
+ /* Allocatated memory using gf_asprintf*/
+ GF_FREE(status.pid_str);
+ }
+ cli_out(" ");
- if (dict)
- dict_unref (dict);
+ if ((cmd & GF_CLI_STATUS_MASK) == GF_CLI_STATUS_NONE)
+ cli_print_volume_status_tasks(dict);
+cont:
+ ret = rsp.op_ret;
- if (rsp.dict.dict_val)
- free (rsp.dict.dict_val);
- return ret;
+out:
+ if (dict)
+ dict_unref(dict);
+ GF_FREE(status.brick);
+ if (local && wipe_local) {
+ cli_local_wipe(local);
+ }
+
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_cli_rsp(rsp);
+ return ret;
}
-int32_t
-gf_cli3_1_top_volume (call_frame_t *frame, xlator_t *this, void *data)
+static int32_t
+gf_cli_status_volume(call_frame_t *frame, xlator_t *this, void *data)
{
- int ret = -1;
- gf_cli_req req = {{0,}};
- dict_t *dict = NULL;
+ gf_cli_req req = {{
+ 0,
+ }};
+ int ret = -1;
+ dict_t *dict = NULL;
+
+ dict = data;
+
+ ret = cli_to_glusterd(&req, frame, gf_cli_status_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict,
+ GLUSTER_CLI_STATUS_VOLUME, this, cli_rpc_prog, NULL);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
+ GF_FREE(req.dict.dict_val);
+ return ret;
+}
- GF_ASSERT (frame);
- GF_ASSERT (this);
- GF_ASSERT (data);
+static int
+gf_cli_status_volume_all(call_frame_t *frame, xlator_t *this, void *data)
+{
+ int i = 0;
+ int ret = -1;
+ int vol_count = -1;
+ uint32_t cmd = 0;
+ char key[1024] = {0};
+ char *volname = NULL;
+ void *vol_dict = NULL;
+ dict_t *dict = NULL;
+ cli_local_t *local = NULL;
- if (!frame || !this || !data)
- goto out;
- dict = data;
+ if (!frame)
+ goto out;
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to serialize the data");
+ if (!frame->local)
+ goto out;
- goto out;
- }
+ local = frame->local;
+ ret = dict_get_uint32(local->dict, "cmd", &cmd);
+ if (ret)
+ goto out;
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_PROFILE_VOLUME, NULL,
- this, gf_cli3_1_top_volume_cbk,
- (xdrproc_t) xdr_gf_cli_req);
+ local->all = _gf_true;
-out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- if (req.dict.dict_val)
- GF_FREE (req.dict.dict_val);
- return ret;
-}
+ ret = gf_cli_status_volume(frame, this, data);
+ if (ret)
+ goto out;
-int
-gf_cli3_1_getwd_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
-{
- gf1_cli_getwd_rsp rsp = {0,};
- int ret = -1;
+ ret = dict_get_ptr(local->dict, "rsp-dict", &vol_dict);
+ if (ret)
+ goto out;
- if (-1 == req->rpc_status) {
- goto out;
- }
+ ret = dict_get_int32_sizen((dict_t *)vol_dict, "vol_count", &vol_count);
+ if (ret) {
+ cli_err("Failed to get names of volumes");
+ goto out;
+ }
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_getwd_rsp);
- if (ret < 0 || rsp.op_ret == -1) {
- gf_log ("", GF_LOG_ERROR, "error");
- goto out;
+ /* remove the "all" flag in cmd */
+ cmd &= ~GF_CLI_STATUS_ALL;
+ cmd |= GF_CLI_STATUS_VOL;
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ // TODO: Pass proper op_* values
+ ret = cli_xml_output_vol_status_begin(local, 0, 0, NULL);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto xml_end;
}
+ }
- gf_log ("cli", GF_LOG_INFO, "Received resp to getwd");
+ if (vol_count == 0 && !(global_state->mode & GLUSTER_MODE_XML)) {
+ cli_err("No volumes present");
+ ret = 0;
+ goto out;
+ }
- cli_out ("%s", rsp.wd);
+ for (i = 0; i < vol_count; i++) {
+ dict = dict_new();
+ if (!dict)
+ goto out;
- ret = 0;
+ ret = snprintf(key, sizeof(key), "vol%d", i);
+ ret = dict_get_strn(vol_dict, key, ret, &volname);
+ if (ret)
+ goto out;
-out:
- cli_cmd_broadcast_response (ret);
- return ret;
-}
+ ret = dict_set_str_sizen(dict, "volname", volname);
+ if (ret)
+ goto out;
-int32_t
-gf_cli3_1_getwd (call_frame_t *frame, xlator_t *this, void *data)
-{
- int ret = -1;
- gf1_cli_getwd_req req = {0,};
+ ret = dict_set_uint32(dict, "cmd", cmd);
+ if (ret)
+ goto out;
- GF_ASSERT (frame);
- GF_ASSERT (this);
+ ret = gf_cli_status_volume(frame, this, dict);
+ if (ret)
+ goto out;
- if (!frame || !this)
- goto out;
+ dict_unref(dict);
+ }
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_GETWD, NULL,
- this, gf_cli3_1_getwd_cbk,
- (xdrproc_t) xdr_gf1_cli_getwd_req);
+xml_end:
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_vol_status_end(local);
+ }
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, "status all failed");
- return ret;
-}
+ if (vol_dict)
+ dict_unref(vol_dict);
-void
-cli_print_volume_status_mempool (dict_t *dict, char *prefix)
-{
- int ret = -1;
- int32_t mempool_count = 0;
- char *name = NULL;
- int32_t hotcount = 0;
- int32_t coldcount = 0;
- uint64_t paddedsizeof = 0;
- uint64_t alloccount = 0;
- int32_t maxalloc = 0;
- uint64_t pool_misses = 0;
- int32_t maxstdalloc = 0;
- char key[1024] = {0,};
- int i = 0;
-
- GF_ASSERT (dict);
- GF_ASSERT (prefix);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.mempool-count",prefix);
- ret = dict_get_int32 (dict, key, &mempool_count);
- if (ret)
- goto out;
+ if (ret && dict)
+ dict_unref(dict);
- cli_out ("Mempool Stats\n-------------");
- cli_out ("%-30s %9s %9s %12s %10s %8s %8s %12s", "Name", "HotCount",
- "ColdCount", "PaddedSizeof", "AllocCount", "MaxAlloc",
- "Misses", "Max-StdAlloc");
- cli_out ("%-30s %9s %9s %12s %10s %8s %8s %12s", "----", "--------",
- "---------", "------------", "----------",
- "--------", "--------", "------------");
-
- for (i = 0; i < mempool_count; i++) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.pool%d.name", prefix, i);
- ret = dict_get_str (dict, key, &name);
- if (ret)
- goto out;
+ if (local)
+ cli_local_wipe(local);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.pool%d.hotcount", prefix, i);
- ret = dict_get_int32 (dict, key, &hotcount);
- if (ret)
- goto out;
+ if (frame)
+ frame->local = NULL;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.pool%d.coldcount", prefix, i);
- ret = dict_get_int32 (dict, key, &coldcount);
- if (ret)
- goto out;
+ return ret;
+}
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.pool%d.paddedsizeof",
- prefix, i);
- ret = dict_get_uint64 (dict, key, &paddedsizeof);
- if (ret)
- goto out;
+static int
+gf_cli_mount_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf1_cli_mount_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.pool%d.alloccount", prefix, i);
- ret = dict_get_uint64 (dict, key, &alloccount);
- if (ret)
- goto out;
+ GF_ASSERT(myframe);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.pool%d.max_alloc", prefix, i);
- ret = dict_get_int32 (dict, key, &maxalloc);
- if (ret)
- goto out;
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.pool%d.max-stdalloc", prefix, i);
- ret = dict_get_int32 (dict, key, &maxstdalloc);
- if (ret)
- goto out;
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf1_cli_mount_rsp);
+ if (ret < 0) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ XDR_DECODE_FAIL);
+ goto out;
+ }
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.pool%d.pool-misses", prefix, i);
- ret = dict_get_uint64 (dict, key, &pool_misses);
- if (ret)
- goto out;
+ gf_log("cli", GF_LOG_INFO, "Received resp to mount");
- cli_out ("%-30s %9d %9d %12"PRIu64" %10"PRIu64" %8d %8"PRIu64
- " %12d", name, hotcount, coldcount, paddedsizeof,
- alloccount, maxalloc, pool_misses, maxstdalloc);
- }
+ if (rsp.op_ret == 0) {
+ ret = 0;
+ cli_out("%s", rsp.path);
+ } else {
+ /* weird sounding but easy to parse... */
+ cli_err("%d : failed with this errno (%s)", rsp.op_errno,
+ strerror(rsp.op_errno));
+ ret = -1;
+ }
out:
- return;
-
+ cli_cmd_broadcast_response(ret);
+ if (rsp.path) {
+ free(rsp.path);
+ }
+ return ret;
}
-void
-cli_print_volume_status_mem (dict_t *dict, gf_boolean_t notbrick)
+static int32_t
+gf_cli_mount(call_frame_t *frame, xlator_t *this, void *data)
{
- int ret = -1;
- char *volname = NULL;
- char *hostname = NULL;
- char *path = NULL;
- int online = -1;
- char key[1024] = {0,};
- int brick_index_max = -1;
- int other_count = 0;
- int index_max = 0;
- int val = 0;
- int i = 0;
+ gf1_cli_mount_req req = {
+ 0,
+ };
+ int ret = -1;
+ void **dataa = data;
+ char *label = NULL;
+ dict_t *dict = NULL;
+
+ if (!frame || !this || !data)
+ goto out;
+
+ label = dataa[0];
+ dict = dataa[1];
+
+ req.label = label;
+ ret = dict_allocate_and_serialize(dict, &req.dict.dict_val,
+ &req.dict.dict_len);
+ if (ret) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = cli_cmd_submit(NULL, &req, frame, cli_rpc_prog, GLUSTER_CLI_MOUNT,
+ NULL, this, gf_cli_mount_cbk,
+ (xdrproc_t)xdr_gf1_cli_mount_req);
- GF_ASSERT (dict);
+out:
+ GF_FREE(req.dict.dict_val);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
+ return ret;
+}
- ret = dict_get_str (dict, "volname", &volname);
- if (ret)
- goto out;
- cli_out ("Memory status for volume : %s", volname);
+static int
+gf_cli_umount_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf1_cli_umount_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
- ret = dict_get_int32 (dict, "brick-index-max", &brick_index_max);
- if (ret)
- goto out;
- ret = dict_get_int32 (dict, "other-count", &other_count);
- if (ret)
- goto out;
+ GF_ASSERT(myframe);
- index_max = brick_index_max + other_count;
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
- for (i = 0; i <= index_max; i++) {
- cli_out ("----------------------------------------------");
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf1_cli_umount_rsp);
+ if (ret < 0) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ XDR_DECODE_FAIL);
+ goto out;
+ }
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.hostname", i);
- ret = dict_get_str (dict, key, &hostname);
- if (ret)
- continue;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.path", i);
- ret = dict_get_str (dict, key, &path);
- if (ret)
- continue;
- if (notbrick)
- cli_out ("%s : %s", hostname, path);
- else
- cli_out ("Brick : %s:%s", hostname, path);
+ gf_log("cli", GF_LOG_INFO, "Received resp to mount");
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.status", i);
- ret = dict_get_int32 (dict, key, &online);
- if (ret)
- goto out;
- if (!online) {
- if (notbrick)
- cli_out ("%s is offline", hostname);
- else
- cli_out ("Brick is offline");
- continue;
- }
+ if (rsp.op_ret == 0)
+ ret = 0;
+ else {
+ cli_err("umount failed");
+ ret = -1;
+ }
- cli_out ("Mallinfo\n--------");
+out:
+ cli_cmd_broadcast_response(ret);
+ return ret;
+}
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.mallinfo.arena", i);
- ret = dict_get_int32 (dict, key, &val);
- if (ret)
- goto out;
- cli_out ("%-8s : %d","Arena", val);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.mallinfo.ordblks", i);
- ret = dict_get_int32 (dict, key, &val);
- if(ret)
- goto out;
- cli_out ("%-8s : %d","Ordblks", val);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.mallinfo.smblks", i);
- ret = dict_get_int32 (dict, key, &val);
- if(ret)
- goto out;
- cli_out ("%-8s : %d","Smblks", val);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.mallinfo.hblks", i);
- ret = dict_get_int32 (dict, key, &val);
- if(ret)
- goto out;
- cli_out ("%-8s : %d", "Hblks", val);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.mallinfo.hblkhd", i);
- ret = dict_get_int32 (dict, key, &val);
- if (ret)
- goto out;
- cli_out ("%-8s : %d", "Hblkhd", val);
+static int32_t
+gf_cli_umount(call_frame_t *frame, xlator_t *this, void *data)
+{
+ gf1_cli_umount_req req = {
+ 0,
+ };
+ int ret = -1;
+ dict_t *dict = NULL;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.mallinfo.usmblks", i);
- ret = dict_get_int32 (dict, key, &val);
- if (ret)
- goto out;
- cli_out ("%-8s : %d", "Usmblks", val);
+ if (!frame || !this || !data)
+ goto out;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.mallinfo.fsmblks", i);
- ret = dict_get_int32 (dict, key, &val);
- if (ret)
- goto out;
- cli_out ("%-8s : %d", "Fsmblks", val);
+ dict = data;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.mallinfo.uordblks", i);
- ret = dict_get_int32 (dict, key, &val);
- if (ret)
- goto out;
- cli_out ("%-8s : %d", "Uordblks", val);
+ ret = dict_get_str_sizen(dict, "path", &req.path);
+ if (ret == 0)
+ ret = dict_get_int32_sizen(dict, "lazy", &req.lazy);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.mallinfo.fordblks", i);
- ret = dict_get_int32 (dict, key, &val);
- if (ret)
- goto out;
- cli_out ("%-8s : %d", "Fordblks", val);
+ if (ret) {
+ ret = -1;
+ goto out;
+ }
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.mallinfo.keepcost", i);
- ret = dict_get_int32 (dict, key, &val);
- if (ret)
- goto out;
- cli_out ("%-8s : %d", "Keepcost", val);
+ ret = cli_cmd_submit(NULL, &req, frame, cli_rpc_prog, GLUSTER_CLI_UMOUNT,
+ NULL, this, gf_cli_umount_cbk,
+ (xdrproc_t)xdr_gf1_cli_umount_req);
- cli_out (" ");
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d", i);
- cli_print_volume_status_mempool (dict, key);
- }
out:
- cli_out ("----------------------------------------------\n");
- return;
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
+ return ret;
}
-void
-cli_print_volume_status_clients (dict_t *dict, gf_boolean_t notbrick)
-{
- int ret = -1;
- char *volname = NULL;
- int brick_index_max = -1;
- int other_count = 0;
- int index_max = 0;
- char *hostname = NULL;
- char *path = NULL;
- int online = -1;
- int client_count = 0;
- char *clientname = NULL;
- uint64_t bytesread = 0;
- uint64_t byteswrite = 0;
- char key[1024] = {0,};
- int i = 0;
- int j = 0;
-
- GF_ASSERT (dict);
-
- ret = dict_get_str (dict, "volname", &volname);
+static void
+cmd_heal_volume_statistics_out(dict_t *dict, int brick)
+{
+ uint64_t num_entries = 0;
+ int ret = 0;
+ char key[256] = {0};
+ char *hostname = NULL;
+ uint64_t i = 0;
+ uint64_t healed_count = 0;
+ uint64_t split_brain_count = 0;
+ uint64_t heal_failed_count = 0;
+ char *start_time_str = NULL;
+ char *end_time_str = NULL;
+ char *crawl_type = NULL;
+ int progress = -1;
+
+ snprintf(key, sizeof key, "%d-hostname", brick);
+ ret = dict_get_str(dict, key, &hostname);
+ if (ret)
+ goto out;
+ cli_out("------------------------------------------------");
+ cli_out("\nCrawl statistics for brick no %d", brick);
+ cli_out("Hostname of brick %s", hostname);
+
+ snprintf(key, sizeof key, "statistics-%d-count", brick);
+ ret = dict_get_uint64(dict, key, &num_entries);
+ if (ret)
+ goto out;
+
+ for (i = 0; i < num_entries; i++) {
+ snprintf(key, sizeof key, "statistics_crawl_type-%d-%" PRIu64, brick,
+ i);
+ ret = dict_get_str(dict, key, &crawl_type);
if (ret)
- goto out;
- cli_out ("Client connections for volume %s", volname);
+ goto out;
- ret = dict_get_int32 (dict, "brick-index-max", &brick_index_max);
+ snprintf(key, sizeof key, "statistics_healed_cnt-%d-%" PRIu64, brick,
+ i);
+ ret = dict_get_uint64(dict, key, &healed_count);
if (ret)
- goto out;
- ret = dict_get_int32 (dict, "other-count", &other_count);
- if (ret)
- goto out;
+ goto out;
- index_max = brick_index_max + other_count;
+ snprintf(key, sizeof key, "statistics_sb_cnt-%d-%" PRIu64, brick, i);
+ ret = dict_get_uint64(dict, key, &split_brain_count);
+ if (ret)
+ goto out;
+ snprintf(key, sizeof key, "statistics_heal_failed_cnt-%d-%" PRIu64,
+ brick, i);
+ ret = dict_get_uint64(dict, key, &heal_failed_count);
+ if (ret)
+ goto out;
+ snprintf(key, sizeof key, "statistics_strt_time-%d-%" PRIu64, brick, i);
+ ret = dict_get_str(dict, key, &start_time_str);
+ if (ret)
+ goto out;
+ snprintf(key, sizeof key, "statistics_end_time-%d-%" PRIu64, brick, i);
+ ret = dict_get_str(dict, key, &end_time_str);
+ if (ret)
+ goto out;
+ snprintf(key, sizeof key, "statistics_inprogress-%d-%" PRIu64, brick,
+ i);
+ ret = dict_get_int32(dict, key, &progress);
+ if (ret)
+ goto out;
- for (i = 0; i <= index_max; i++) {
- cli_out ("----------------------------------------------");
+ cli_out("\nStarting time of crawl: %s", start_time_str);
+ if (progress == 1)
+ cli_out("Crawl is in progress");
+ else
+ cli_out("Ending time of crawl: %s", end_time_str);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.hostname", i);
- ret = dict_get_str (dict, key, &hostname);
- if (ret)
- goto out;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.path", i);
- ret = dict_get_str (dict, key, &path);
- if (ret)
- goto out;
+ cli_out("Type of crawl: %s", crawl_type);
+ cli_out("No. of entries healed: %" PRIu64, healed_count);
+ cli_out("No. of entries in split-brain: %" PRIu64, split_brain_count);
+ cli_out("No. of heal failed entries: %" PRIu64, heal_failed_count);
+ }
- if (notbrick)
- cli_out ("%s : %s", hostname, path);
- else
- cli_out ("Brick : %s:%s", hostname, path);
+out:
+ return;
+}
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.status", i);
- ret = dict_get_int32 (dict, key, &online);
- if (ret)
- goto out;
- if (!online) {
- if (notbrick)
- cli_out ("%s is offline", hostname);
- else
- cli_out ("Brick is offline");
- continue;
- }
+static void
+cmd_heal_volume_brick_out(dict_t *dict, int brick)
+{
+ uint64_t num_entries = 0;
+ int ret = 0;
+ char key[64] = {0};
+ char *hostname = NULL;
+ char *path = NULL;
+ char *status = NULL;
+ uint64_t i = 0;
+ uint32_t time = 0;
+ char timestr[GF_TIMESTR_SIZE] = {0};
+ char *shd_status = NULL;
+
+ snprintf(key, sizeof key, "%d-hostname", brick);
+ ret = dict_get_str(dict, key, &hostname);
+ if (ret)
+ goto out;
+ snprintf(key, sizeof key, "%d-path", brick);
+ ret = dict_get_str(dict, key, &path);
+ if (ret)
+ goto out;
+ cli_out("\nBrick %s:%s", hostname, path);
+
+ snprintf(key, sizeof key, "%d-status", brick);
+ ret = dict_get_str(dict, key, &status);
+ if (status && status[0] != '\0')
+ cli_out("Status: %s", status);
+
+ snprintf(key, sizeof key, "%d-shd-status", brick);
+ ret = dict_get_str(dict, key, &shd_status);
+
+ if (!shd_status) {
+ snprintf(key, sizeof key, "%d-count", brick);
+ ret = dict_get_uint64(dict, key, &num_entries);
+ cli_out("Number of entries: %" PRIu64, num_entries);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.clientcount", i);
- ret = dict_get_int32 (dict, key, &client_count);
- if (ret)
- goto out;
-
- cli_out ("Clients connected : %d", client_count);
- if (client_count == 0)
- continue;
-
- cli_out ("%-48s %15s %15s", "Hostname", "BytesRead",
- "BytesWritten");
- cli_out ("%-48s %15s %15s", "--------", "---------",
- "------------");
- for (j =0; j < client_count; j++) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key),
- "brick%d.client%d.hostname", i, j);
- ret = dict_get_str (dict, key, &clientname);
- if (ret)
- goto out;
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key),
- "brick%d.client%d.bytesread", i, j);
- ret = dict_get_uint64 (dict, key, &bytesread);
- if (ret)
- goto out;
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key),
- "brick%d.client%d.byteswrite", i, j);
- ret = dict_get_uint64 (dict, key, &byteswrite);
- if (ret)
- goto out;
-
- cli_out ("%-48s %15"PRIu64" %15"PRIu64,
- clientname, bytesread, byteswrite);
+ for (i = 0; i < num_entries; i++) {
+ snprintf(key, sizeof key, "%d-%" PRIu64, brick, i);
+ ret = dict_get_str(dict, key, &path);
+ if (ret)
+ continue;
+ time = 0;
+ snprintf(key, sizeof key, "%d-%" PRIu64 "-time", brick, i);
+ ret = dict_get_uint32(dict, key, &time);
+ if (ret || !time) {
+ cli_out("%s", path);
+ } else {
+ gf_time_fmt(timestr, sizeof timestr, time, gf_timefmt_FT);
+ if (i == 0) {
+ cli_out("at path on brick");
+ cli_out("-----------------------------------");
}
+ cli_out("%s %s", timestr, path);
+ }
}
+ }
+
out:
- cli_out ("----------------------------------------------\n");
- return;
+ return;
}
-void
-cli_print_volume_status_inode_entry (dict_t *dict, char *prefix)
+static void
+cmd_heal_volume_statistics_heal_count_out(dict_t *dict, int brick)
{
- int ret = -1;
- char key[1024] = {0,};
- char *gfid = NULL;
- uint64_t nlookup = 0;
- uint32_t ref = 0;
- int ia_type = 0;
- char inode_type;
-
- GF_ASSERT (dict);
- GF_ASSERT (prefix);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.gfid", prefix);
- ret = dict_get_str (dict, key, &gfid);
- if (ret)
- goto out;
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.nlookup", prefix);
- ret = dict_get_uint64 (dict, key, &nlookup);
+ uint64_t num_entries = 0;
+ int ret = 0;
+ char key[64] = {0};
+ char *hostname = NULL;
+ char *path = NULL;
+ char *status = NULL;
+ char *shd_status = NULL;
+
+ snprintf(key, sizeof key, "%d-hostname", brick);
+ ret = dict_get_str(dict, key, &hostname);
+ if (ret)
+ goto out;
+ snprintf(key, sizeof key, "%d-path", brick);
+ ret = dict_get_str(dict, key, &path);
+ if (ret)
+ goto out;
+ cli_out("\nBrick %s:%s", hostname, path);
+
+ snprintf(key, sizeof key, "%d-status", brick);
+ ret = dict_get_str(dict, key, &status);
+ if (status && strlen(status))
+ cli_out("Status: %s", status);
+
+ snprintf(key, sizeof key, "%d-shd-status", brick);
+ ret = dict_get_str(dict, key, &shd_status);
+
+ if (!shd_status) {
+ snprintf(key, sizeof key, "%d-hardlinks", brick);
+ ret = dict_get_uint64(dict, key, &num_entries);
if (ret)
- goto out;
+ cli_out("No gathered input for this brick");
+ else
+ cli_out("Number of entries: %" PRIu64, num_entries);
+ }
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.ref", prefix);
- ret = dict_get_uint32 (dict, key, &ref);
- if (ret)
- goto out;
+out:
+ return;
+}
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.ia_type", prefix);
- ret = dict_get_int32 (dict, key, &ia_type);
- if (ret)
- goto out;
+static int
+gf_is_cli_heal_get_command(gf_xl_afr_op_t heal_op)
+{
+ /* If the command is get command value is 1 otherwise 0, for
+ invalid commands -1 */
+ static int get_cmds[GF_SHD_OP_HEAL_DISABLE + 1] = {
+ [GF_SHD_OP_INVALID] = -1,
+ [GF_SHD_OP_HEAL_INDEX] = 0,
+ [GF_SHD_OP_HEAL_FULL] = 0,
+ [GF_SHD_OP_INDEX_SUMMARY] = 1,
+ [GF_SHD_OP_SPLIT_BRAIN_FILES] = 1,
+ [GF_SHD_OP_STATISTICS] = 1,
+ [GF_SHD_OP_STATISTICS_HEAL_COUNT] = 1,
+ [GF_SHD_OP_STATISTICS_HEAL_COUNT_PER_REPLICA] = 1,
+ [GF_SHD_OP_HEAL_ENABLE] = 0,
+ [GF_SHD_OP_HEAL_DISABLE] = 0,
+ };
+
+ if (heal_op > GF_SHD_OP_INVALID && heal_op <= GF_SHD_OP_HEAL_DISABLE)
+ return get_cmds[heal_op] == 1;
+ return _gf_false;
+}
- switch (ia_type) {
- case IA_IFREG:
- inode_type = 'R';
- break;
- case IA_IFDIR:
- inode_type = 'D';
- break;
- case IA_IFLNK:
- inode_type = 'L';
- break;
- case IA_IFBLK:
- inode_type = 'B';
- break;
- case IA_IFCHR:
- inode_type = 'C';
- break;
- case IA_IFIFO:
- inode_type = 'F';
- break;
- case IA_IFSOCK:
- inode_type = 'S';
- break;
- default:
- inode_type = 'I';
- break;
+int
+gf_cli_heal_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ cli_local_t *local = NULL;
+ char *volname = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *dict = NULL;
+ int brick_count = 0;
+ int i = 0;
+ gf_xl_afr_op_t heal_op = GF_SHD_OP_INVALID;
+ const char *operation = NULL;
+ const char *substr = NULL;
+ const char *heal_op_str = NULL;
+
+ GF_ASSERT(myframe);
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ frame = myframe;
+
+ GF_ASSERT(frame->local);
+
+ local = frame->local;
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(frame->this->name, GF_LOG_ERROR, XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ ret = dict_get_int32_sizen(local->dict, "heal-op", (int32_t *)&heal_op);
+ // TODO: Proper XML output
+ //#if (HAVE_LIB_XML)
+ // if (global_state->mode & GLUSTER_MODE_XML) {
+ // ret = cli_xml_output_dict ("volHeal", dict, rsp.op_ret,
+ // rsp.op_errno, rsp.op_errstr);
+ // if (ret)
+ // gf_log ("cli", GF_LOG_ERROR, XML_ERROR);
+ // goto out;
+ // }
+ //#endif
+
+ ret = dict_get_str_sizen(local->dict, "volname", &volname);
+ if (ret) {
+ gf_log(frame->this->name, GF_LOG_ERROR, "failed to get volname");
+ goto out;
+ }
+
+ gf_log("cli", GF_LOG_INFO, "Received resp to heal volume");
+
+ operation = "Gathering ";
+ substr = "";
+ switch (heal_op) {
+ case GF_SHD_OP_HEAL_INDEX:
+ operation = "Launching heal operation ";
+ heal_op_str = "to perform index self heal";
+ substr = "\nUse heal info commands to check status.";
+ break;
+ case GF_SHD_OP_HEAL_FULL:
+ operation = "Launching heal operation ";
+ heal_op_str = "to perform full self heal";
+ substr = "\nUse heal info commands to check status.";
+ break;
+ case GF_SHD_OP_INDEX_SUMMARY:
+ heal_op_str = "list of entries to be healed";
+ break;
+ case GF_SHD_OP_SPLIT_BRAIN_FILES:
+ heal_op_str = "list of split brain entries";
+ break;
+ case GF_SHD_OP_STATISTICS:
+ heal_op_str = "crawl statistics";
+ break;
+ case GF_SHD_OP_STATISTICS_HEAL_COUNT:
+ heal_op_str = "count of entries to be healed";
+ break;
+ case GF_SHD_OP_STATISTICS_HEAL_COUNT_PER_REPLICA:
+ heal_op_str = "count of entries to be healed per replica";
+ break;
+ case GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE:
+ case GF_SHD_OP_SBRAIN_HEAL_FROM_LATEST_MTIME:
+ case GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK:
+ case GF_SHD_OP_HEAL_SUMMARY:
+ case GF_SHD_OP_HEALED_FILES:
+ case GF_SHD_OP_HEAL_FAILED_FILES:
+ /* These cases are never hit; they're coded just to silence the
+ * compiler warnings.*/
+ break;
+
+ case GF_SHD_OP_INVALID:
+ heal_op_str = "invalid heal op";
+ break;
+ case GF_SHD_OP_HEAL_ENABLE:
+ operation = "";
+ heal_op_str = "Enable heal";
+ break;
+ case GF_SHD_OP_HEAL_DISABLE:
+ operation = "";
+ heal_op_str = "Disable heal";
+ break;
+ case GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE:
+ operation = "";
+ heal_op_str = "Enable granular entry heal";
+ break;
+ case GF_SHD_OP_GRANULAR_ENTRY_HEAL_DISABLE:
+ operation = "";
+ heal_op_str = "Disable granular entry heal";
+ break;
+ }
+
+ if (rsp.op_ret) {
+ if (strcmp(rsp.op_errstr, "")) {
+ cli_err("%s%s on volume %s has been unsuccessful:", operation,
+ heal_op_str, volname);
+ cli_err("%s", rsp.op_errstr);
}
+ ret = rsp.op_ret;
+ goto out;
+ } else {
+ cli_out("%s%s on volume %s has been successful %s", operation,
+ heal_op_str, volname, substr);
+ }
+
+ ret = rsp.op_ret;
+ if (!gf_is_cli_heal_get_command(heal_op))
+ goto out;
+
+ dict = dict_new();
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
+
+ if (ret) {
+ gf_log("", GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
+ goto out;
+ }
+
+ ret = dict_get_int32_sizen(dict, "count", &brick_count);
+ if (ret)
+ goto out;
+
+ if (!brick_count) {
+ cli_err("All bricks of volume %s are down.", volname);
+ ret = -1;
+ goto out;
+ }
+
+ switch (heal_op) {
+ case GF_SHD_OP_STATISTICS:
+ for (i = 0; i < brick_count; i++)
+ cmd_heal_volume_statistics_out(dict, i);
+ break;
+ case GF_SHD_OP_STATISTICS_HEAL_COUNT:
+ case GF_SHD_OP_STATISTICS_HEAL_COUNT_PER_REPLICA:
+ for (i = 0; i < brick_count; i++)
+ cmd_heal_volume_statistics_heal_count_out(dict, i);
+ break;
+ case GF_SHD_OP_INDEX_SUMMARY:
+ case GF_SHD_OP_SPLIT_BRAIN_FILES:
+ for (i = 0; i < brick_count; i++)
+ cmd_heal_volume_brick_out(dict, i);
+ break;
+ default:
+ break;
+ }
- cli_out ("%-40s %14"PRIu64" %14"PRIu32" %9c",
- gfid, nlookup, ref, inode_type);
+ ret = rsp.op_ret;
out:
- return;
-
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_cli_rsp(rsp);
+ if (dict)
+ dict_unref(dict);
+ return ret;
}
-void
-cli_print_volume_status_itables (dict_t *dict, char *prefix)
+static int32_t
+gf_cli_heal_volume(call_frame_t *frame, xlator_t *this, void *data)
{
- int ret = -1;
- char key[1024] = {0,};
- uint32_t active_size = 0;
- uint32_t lru_size = 0;
- uint32_t purge_size = 0;
- int i =0;
+ gf_cli_req req = {{
+ 0,
+ }};
+ int ret = 0;
+ dict_t *dict = NULL;
- GF_ASSERT (dict);
- GF_ASSERT (prefix);
+ dict = data;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.active_size", prefix);
- ret = dict_get_uint32 (dict, key, &active_size);
- if (ret)
- goto out;
- if (active_size != 0) {
- cli_out ("Active inodes:");
- cli_out ("%-40s %14s %14s %9s", "GFID", "Lookups", "Ref",
- "IA type");
- cli_out ("%-40s %14s %14s %9s", "----", "-------", "---",
- "-------");
- }
- for (i = 0; i < active_size; i++) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.active%d", prefix, i);
- cli_print_volume_status_inode_entry (dict, key);
- }
- cli_out (" ");
+ ret = cli_to_glusterd(&req, frame, gf_cli_heal_volume_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict,
+ GLUSTER_CLI_HEAL_VOLUME, this, cli_rpc_prog, NULL);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.lru_size", prefix);
- ret = dict_get_uint32 (dict, key, &lru_size);
- if (ret)
- goto out;
- if (lru_size != 0) {
- cli_out ("LRU inodes:");
- cli_out ("%-40s %14s %14s %9s", "GFID", "Lookups", "Ref",
- "IA type");
- cli_out ("%-40s %14s %14s %9s", "----", "-------", "---",
- "-------");
- }
- for (i = 0; i < lru_size; i++) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.lru%d", prefix, i);
- cli_print_volume_status_inode_entry (dict, key);
- }
- cli_out (" ");
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
+
+ GF_FREE(req.dict.dict_val);
+
+ return ret;
+}
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.purge_size", prefix);
- ret = dict_get_uint32 (dict, key, &purge_size);
+static int32_t
+gf_cli_statedump_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ char msg[1024] = "Volume statedump successful";
+
+ GF_ASSERT(myframe);
+
+ if (-1 == req->rpc_status)
+ goto out;
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ XDR_DECODE_FAIL);
+ goto out;
+ }
+ gf_log("cli", GF_LOG_DEBUG, "Received response to statedump");
+ if (rsp.op_ret)
+ snprintf(msg, sizeof(msg), "%s", rsp.op_errstr);
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_str("volStatedump", msg, rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
if (ret)
- goto out;
- if (purge_size != 0) {
- cli_out ("Purged inodes:");
- cli_out ("%-40s %14s %14s %9s", "GFID", "Lookups", "Ref",
- "IA type");
- cli_out ("%-40s %14s %14s %9s", "----", "-------", "---",
- "-------");
- }
- for (i = 0; i < purge_size; i++) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.purge%d", prefix, i);
- cli_print_volume_status_inode_entry (dict, key);
- }
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto out;
+ }
+
+ if (rsp.op_ret)
+ cli_err("volume statedump: failed: %s", msg);
+ else
+ cli_out("volume statedump: success");
+ ret = rsp.op_ret;
out:
- return;
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_cli_rsp(rsp);
+ return ret;
}
-void
-cli_print_volume_status_inode (dict_t *dict, gf_boolean_t notbrick)
+static int32_t
+gf_cli_statedump_volume(call_frame_t *frame, xlator_t *this, void *data)
{
- int ret = -1;
- char *volname = NULL;
- int brick_index_max = -1;
- int other_count = 0;
- int index_max = 0;
- char *hostname = NULL;
- char *path = NULL;
- int online = -1;
- int conn_count = 0;
- char key[1024] = {0,};
- int i = 0;
- int j = 0;
+ gf_cli_req req = {{
+ 0,
+ }};
+ dict_t *options = NULL;
+ int ret = -1;
- GF_ASSERT (dict);
+ options = data;
- ret = dict_get_str (dict, "volname", &volname);
- if (ret)
- goto out;
- cli_out ("Inode tables for volume %s", volname);
+ ret = cli_to_glusterd(
+ &req, frame, gf_cli_statedump_volume_cbk, (xdrproc_t)xdr_gf_cli_req,
+ options, GLUSTER_CLI_STATEDUMP_VOLUME, this, cli_rpc_prog, NULL);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
+
+ GF_FREE(req.dict.dict_val);
+ return ret;
+}
- ret = dict_get_int32 (dict, "brick-index-max", &brick_index_max);
+static int32_t
+gf_cli_list_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ int ret = -1;
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ dict_t *dict = NULL;
+ int vol_count = 0;
+ ;
+ char *volname = NULL;
+ char key[1024] = {
+ 0,
+ };
+ int i = 0;
+
+ GF_ASSERT(myframe);
+
+ if (-1 == req->rpc_status)
+ goto out;
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ dict = dict_new();
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
+ goto out;
+ }
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_vol_list(dict, rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
if (ret)
- goto out;
- ret = dict_get_int32 (dict, "other-count", &other_count);
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto out;
+ }
+
+ if (rsp.op_ret)
+ cli_err("%s", rsp.op_errstr);
+ else {
+ ret = dict_get_int32_sizen(dict, "count", &vol_count);
if (ret)
+ goto out;
+
+ if (vol_count == 0) {
+ cli_err("No volumes present in cluster");
+ goto out;
+ }
+ for (i = 0; i < vol_count; i++) {
+ ret = snprintf(key, sizeof(key), "volume%d", i);
+ ret = dict_get_strn(dict, key, ret, &volname);
+ if (ret)
goto out;
+ cli_out("%s", volname);
+ }
+ }
- index_max = brick_index_max + other_count;
+ ret = rsp.op_ret;
- for ( i = 0; i <= index_max; i++) {
- cli_out ("----------------------------------------------");
+out:
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_cli_rsp(rsp);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.hostname", i);
- ret = dict_get_str (dict, key, &hostname);
- if (ret)
- goto out;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.path", i);
- ret = dict_get_str (dict, key, &path);
- if (ret)
- goto out;
- if (notbrick)
- cli_out ("%s : %s", hostname, path);
- else
- cli_out ("Brick : %s:%s", hostname, path);
+ if (dict)
+ dict_unref(dict);
+ return ret;
+}
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.status", i);
- ret = dict_get_int32 (dict, key, &online);
- if (ret)
- goto out;
- if (!online) {
- if (notbrick)
- cli_out ("%s is offline", hostname);
- else
- cli_out ("Brick is offline");
- continue;
- }
+static int32_t
+gf_cli_list_volume(call_frame_t *frame, xlator_t *this, void *data)
+{
+ int ret = -1;
+ gf_cli_req req = {{
+ 0,
+ }};
+
+ ret = cli_cmd_submit(NULL, &req, frame, cli_rpc_prog,
+ GLUSTER_CLI_LIST_VOLUME, NULL, this,
+ gf_cli_list_volume_cbk, (xdrproc_t)xdr_gf_cli_req);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
+ return ret;
+}
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.conncount", i);
- ret = dict_get_int32 (dict, key, &conn_count);
- if (ret)
- goto out;
+static int32_t
+gf_cli_clearlocks_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ char *lk_summary = NULL;
+ dict_t *dict = NULL;
+
+ GF_ASSERT(myframe);
+
+ if (-1 == req->rpc_status)
+ goto out;
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ XDR_DECODE_FAIL);
+ goto out;
+ }
+ gf_log("cli", GF_LOG_DEBUG, "Received response to clear-locks");
+
+ if (rsp.op_ret) {
+ cli_err("Volume clear-locks unsuccessful");
+ cli_err("%s", rsp.op_errstr);
+
+ } else {
+ if (!rsp.dict.dict_len) {
+ cli_err("Possibly no locks cleared");
+ ret = 0;
+ goto out;
+ }
- for (j = 0; j < conn_count; j++) {
- if (conn_count > 1)
- cli_out ("Connection %d:", j+1);
+ dict = dict_new();
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.conn%d.itable",
- i, j);
- cli_print_volume_status_itables (dict, key);
- cli_out (" ");
- }
+ if (!dict) {
+ ret = -1;
+ goto out;
}
-out:
- cli_out ("----------------------------------------------");
- return;
-}
-void
-cli_print_volume_status_fdtable (dict_t *dict, char *prefix)
-{
- int ret = -1;
- char key[1024] = {0,};
- int refcount = 0;
- uint32_t maxfds = 0;
- int firstfree = 0;
- int openfds = 0;
- int fd_pid = 0;
- int fd_refcount = 0;
- int fd_flags = 0;
- int i = 0;
-
- GF_ASSERT (dict);
- GF_ASSERT (prefix);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.refcount", prefix);
- ret = dict_get_int32 (dict, key, &refcount);
- if (ret)
- goto out;
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.maxfds", prefix);
- ret = dict_get_uint32 (dict, key, &maxfds);
- if (ret)
- goto out;
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
+ goto out;
+ }
- memset (key, 0 ,sizeof (key));
- snprintf (key, sizeof (key), "%s.firstfree", prefix);
- ret = dict_get_int32 (dict, key, &firstfree);
- if (ret)
- goto out;
+ ret = dict_get_str(dict, "lk-summary", &lk_summary);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Unable to get lock summary from dictionary");
+ goto out;
+ }
+ cli_out("Volume clear-locks successful");
+ cli_out("%s", lk_summary);
+ }
- cli_out ("RefCount = %d MaxFDs = %d FirstFree = %d",
- refcount, maxfds, firstfree);
+ ret = rsp.op_ret;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.openfds", prefix);
- ret = dict_get_int32 (dict, key, &openfds);
- if (ret)
- goto out;
- if (0 == openfds) {
- cli_out ("No open fds");
- goto out;
- }
+out:
+ if (dict)
+ dict_unref(dict);
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_cli_rsp(rsp);
+ return ret;
+}
- cli_out ("%-19s %-19s %-19s %-19s", "FD Entry", "PID",
- "RefCount", "Flags");
- cli_out ("%-19s %-19s %-19s %-19s", "--------", "---",
- "--------", "-----");
+static int32_t
+gf_cli_clearlocks_volume(call_frame_t *frame, xlator_t *this, void *data)
+{
+ gf_cli_req req = {{
+ 0,
+ }};
+ dict_t *options = NULL;
+ int ret = -1;
- for (i = 0; i < maxfds ; i++) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.fdentry%d.pid", prefix, i);
- ret = dict_get_int32 (dict, key, &fd_pid);
- if (ret)
- continue;
+ options = data;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.fdentry%d.refcount",
- prefix, i);
- ret = dict_get_int32 (dict, key, &fd_refcount);
- if (ret)
- continue;
+ ret = cli_to_glusterd(
+ &req, frame, gf_cli_clearlocks_volume_cbk, (xdrproc_t)xdr_gf_cli_req,
+ options, GLUSTER_CLI_CLRLOCKS_VOLUME, this, cli_rpc_prog, NULL);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.fdentry%d.flags", prefix, i);
- ret = dict_get_int32 (dict, key, &fd_flags);
- if (ret)
- continue;
+ GF_FREE(req.dict.dict_val);
+ return ret;
+}
+
+static int32_t
+cli_snapshot_remove_reply(gf_cli_rsp *rsp, dict_t *dict, call_frame_t *frame)
+{
+ int32_t ret = -1;
+ char *snap_name = NULL;
+ int32_t delete_cmd = -1;
+ cli_local_t *local = NULL;
+
+ GF_ASSERT(frame);
+ GF_ASSERT(rsp);
+ GF_ASSERT(dict);
+
+ local = frame->local;
+
+ ret = dict_get_int32_sizen(dict, "sub-cmd", &delete_cmd);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not get sub-cmd");
+ goto end;
+ }
+
+ if ((global_state->mode & GLUSTER_MODE_XML) &&
+ (delete_cmd == GF_SNAP_DELETE_TYPE_SNAP)) {
+ ret = cli_xml_output_snap_delete_begin(local, rsp->op_ret,
+ rsp->op_errno, rsp->op_errstr);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to create xml output for delete");
+ goto end;
+ }
+ }
+
+ if (rsp->op_ret && !(global_state->mode & GLUSTER_MODE_XML)) {
+ cli_err("snapshot delete: failed: %s",
+ rsp->op_errstr ? rsp->op_errstr
+ : "Please check log file for details");
+ ret = rsp->op_ret;
+ goto out;
+ }
+
+ if (delete_cmd == GF_SNAP_DELETE_TYPE_ALL ||
+ delete_cmd == GF_SNAP_DELETE_TYPE_VOL) {
+ local = ((call_frame_t *)frame)->local;
+ if (!local) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "frame->local is NULL");
+ goto out;
+ }
+
+ /* During first call back of snapshot delete of type
+ * ALL and VOL, We will get the snapcount and snapnames.
+ * Hence to make the subsequent rpc calls for individual
+ * snapshot delete, We need to save it in local dictionary.
+ */
+ dict_copy(dict, local->dict);
+ ret = 0;
+ goto out;
+ }
- cli_out ("%-19d %-19d %-19d %-19d", i, fd_pid, fd_refcount,
- fd_flags);
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_snapshot_delete(local, dict, rsp);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to create xml output for snapshot delete command");
+ goto out;
+ }
+ /* Error out in case of the op already failed */
+ if (rsp->op_ret) {
+ ret = rsp->op_ret;
+ goto out;
+ }
+ } else {
+ ret = dict_get_str_sizen(dict, "snapname", &snap_name);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snapname");
+ goto out;
}
+ cli_out("snapshot delete: %s: snap removed successfully", snap_name);
+ }
+ ret = 0;
+
out:
- return;
+ if ((global_state->mode & GLUSTER_MODE_XML) &&
+ (delete_cmd == GF_SNAP_DELETE_TYPE_SNAP)) {
+ ret = cli_xml_output_snap_delete_end(local);
+ }
+end:
+ return ret;
}
-void
-cli_print_volume_status_fd (dict_t *dict, gf_boolean_t notbrick)
+static int
+cli_snapshot_config_display(dict_t *dict, gf_cli_rsp *rsp)
{
- int ret = -1;
- char *volname = NULL;
- int brick_index_max = -1;
- int other_count = 0;
- int index_max = 0;
- char *hostname = NULL;
- char *path = NULL;
- int online = -1;
- int conn_count = 0;
- char key[1024] = {0,};
- int i = 0;
- int j = 0;
-
- GF_ASSERT (dict);
-
- ret = dict_get_str (dict, "volname", &volname);
- if (ret)
+ char buf[PATH_MAX] = "";
+ char *volname = NULL;
+ int ret = -1;
+ int config_command = 0;
+ uint64_t value = 0;
+ uint64_t hard_limit = 0;
+ uint64_t soft_limit = 0;
+ uint64_t i = 0;
+ uint64_t voldisplaycount = 0;
+ char *auto_delete = NULL;
+ char *snap_activate = NULL;
+
+ GF_ASSERT(dict);
+ GF_ASSERT(rsp);
+
+ if (rsp->op_ret) {
+ cli_err("Snapshot Config : failed: %s",
+ rsp->op_errstr ? rsp->op_errstr
+ : "Please check log file for details");
+ ret = rsp->op_ret;
+ goto out;
+ }
+
+ ret = dict_get_int32_sizen(dict, "config-command", &config_command);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not fetch config type");
+ goto out;
+ }
+
+ ret = dict_get_uint64(dict, "snap-max-hard-limit", &hard_limit);
+ /* Ignore the error, as the key specified is optional */
+ ret = dict_get_uint64(dict, "snap-max-soft-limit", &soft_limit);
+
+ ret = dict_get_str_sizen(dict, "auto-delete", &auto_delete);
+
+ ret = dict_get_str_sizen(dict, "snap-activate-on-create", &snap_activate);
+
+ if (!hard_limit && !soft_limit &&
+ config_command != GF_SNAP_CONFIG_DISPLAY && !auto_delete &&
+ !snap_activate) {
+ ret = -1;
+ gf_log(THIS->name, GF_LOG_ERROR, "Could not fetch config-key");
+ goto out;
+ }
+
+ ret = dict_get_str_sizen(dict, "volname", &volname);
+ /* Ignore the error, as volname is optional */
+
+ if (!volname) {
+ volname = "System";
+ }
+
+ switch (config_command) {
+ case GF_SNAP_CONFIG_TYPE_SET:
+ if (hard_limit && soft_limit) {
+ cli_out(
+ "snapshot config: snap-max-hard-limit "
+ "& snap-max-soft-limit for system set successfully");
+ } else if (hard_limit) {
+ cli_out(
+ "snapshot config: snap-max-hard-limit "
+ "for %s set successfully",
+ volname);
+ } else if (soft_limit) {
+ cli_out(
+ "snapshot config: snap-max-soft-limit "
+ "for %s set successfully",
+ volname);
+ } else if (auto_delete) {
+ cli_out("snapshot config: auto-delete successfully set");
+ } else if (snap_activate) {
+ cli_out("snapshot config: activate-on-create successfully set");
+ }
+ break;
+
+ case GF_SNAP_CONFIG_DISPLAY:
+ cli_out("\nSnapshot System Configuration:");
+ ret = dict_get_uint64(dict, "snap-max-hard-limit", &value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Could not fetch snap_max_hard_limit for %s", volname);
+ ret = -1;
goto out;
- cli_out ("FD tables for volume %s", volname);
+ }
+ cli_out("snap-max-hard-limit : %" PRIu64, value);
- ret = dict_get_int32 (dict, "brick-index-max", &brick_index_max);
- if (ret)
- goto out;
- ret = dict_get_int32 (dict, "other-count", &other_count);
- if (ret)
+ ret = dict_get_uint64(dict, "snap-max-soft-limit", &soft_limit);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Could not fetch snap-max-soft-limit for %s", volname);
+ ret = -1;
goto out;
+ }
+ cli_out("snap-max-soft-limit : %" PRIu64 "%%", soft_limit);
- index_max = brick_index_max + other_count;
+ cli_out("auto-delete : %s", auto_delete);
- for (i = 0; i <= index_max; i++) {
- cli_out ("----------------------------------------------");
+ cli_out("activate-on-create : %s\n", snap_activate);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.hostname", i);
- ret = dict_get_str (dict, key, &hostname);
- if (ret)
- goto out;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.path", i);
- ret = dict_get_str (dict, key, &path);
- if (ret)
- goto out;
+ cli_out("Snapshot Volume Configuration:");
- if (notbrick)
- cli_out ("%s : %s", hostname, path);
- else
- cli_out ("Brick : %s:%s", hostname, path);
+ ret = dict_get_uint64(dict, "voldisplaycount", &voldisplaycount);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not fetch voldisplaycount");
+ ret = -1;
+ goto out;
+ }
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.status", i);
- ret = dict_get_int32 (dict, key, &online);
- if (ret)
- goto out;
- if (!online) {
- if (notbrick)
- cli_out ("%s is offline", hostname);
- else
- cli_out ("Brick is offline");
- continue;
+ for (i = 0; i < voldisplaycount; i++) {
+ ret = snprintf(buf, sizeof(buf), "volume%" PRIu64 "-volname",
+ i);
+ ret = dict_get_strn(dict, buf, ret, &volname);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not fetch %s", buf);
+ ret = -1;
+ goto out;
}
+ cli_out("\nVolume : %s", volname);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.conncount", i);
- ret = dict_get_int32 (dict, key, &conn_count);
- if (ret)
- goto out;
+ snprintf(buf, sizeof(buf),
+ "volume%" PRIu64 "-snap-max-hard-limit", i);
+ ret = dict_get_uint64(dict, buf, &value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not fetch %s", buf);
+ ret = -1;
+ goto out;
+ }
+ cli_out("snap-max-hard-limit : %" PRIu64, value);
- for (j = 0; j < conn_count; j++) {
- cli_out ("Connection %d:", j+1);
+ snprintf(buf, sizeof(buf),
+ "volume%" PRIu64 "-active-hard-limit", i);
+ ret = dict_get_uint64(dict, buf, &value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Could not fetch"
+ " effective snap_max_hard_limit for %s",
+ volname);
+ ret = -1;
+ goto out;
+ }
+ cli_out("Effective snap-max-hard-limit : %" PRIu64, value);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.conn%d.fdtable",
- i, j);
- cli_print_volume_status_fdtable (dict, key);
- cli_out (" ");
+ snprintf(buf, sizeof(buf),
+ "volume%" PRIu64 "-snap-max-soft-limit", i);
+ ret = dict_get_uint64(dict, buf, &value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not fetch %s", buf);
+ ret = -1;
+ goto out;
}
- }
+ cli_out("Effective snap-max-soft-limit : %" PRIu64
+ " "
+ "(%" PRIu64 "%%)",
+ value, soft_limit);
+ }
+ break;
+ default:
+ break;
+ }
+
+ ret = 0;
out:
- cli_out ("----------------------------------------------");
- return;
+ return ret;
}
-void
-cli_print_volume_status_call_frame (dict_t *dict, char *prefix)
+/* This function is used to print the volume related information
+ * of a snap.
+ *
+ * arg - 0, dict : Response Dictionary.
+ * arg - 1, prefix str : snaplist.snap{0..}.vol{0..}.*
+ */
+static int
+cli_get_each_volinfo_in_snap(dict_t *dict, char *keyprefix,
+ gf_boolean_t snap_driven)
{
- int ret = -1;
- char key[1024] = {0,};
- int ref_count = 0;
- char *translator = 0;
- int complete = 0;
- char *parent = NULL;
- char *wind_from = NULL;
- char *wind_to = NULL;
- char *unwind_from = NULL;
- char *unwind_to = NULL;
-
- if (!dict || !prefix)
- return;
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.refcount", prefix);
- ret = dict_get_int32 (dict, key, &ref_count);
- if (ret)
- return;
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.translator", prefix);
- ret = dict_get_str (dict, key, &translator);
- if (ret)
- return;
+ char key[PATH_MAX] = "";
+ char *get_buffer = NULL;
+ int value = 0;
+ int ret = -1;
+ char indent[5] = "\t";
+ char *volname = NULL;
+
+ GF_ASSERT(dict);
+ GF_ASSERT(keyprefix);
+
+ if (snap_driven) {
+ ret = snprintf(key, sizeof(key), "%s.volname", keyprefix);
+ if (ret < 0) {
+ goto out;
+ }
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.complete", prefix);
- ret = dict_get_int32 (dict, key, &complete);
- if (ret)
- return;
+ ret = dict_get_str(dict, key, &get_buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get %s", key);
+ goto out;
+ }
+ cli_out("%s" INDENT_MAIN_HEAD "%s", indent, "Snap Volume Name", ":",
+ get_buffer);
- cli_out (" Ref Count = %d", ref_count);
- cli_out (" Translator = %s", translator);
- cli_out (" Completed = %s", (complete ? "Yes" : "No"));
+ ret = snprintf(key, sizeof(key), "%s.origin-volname", keyprefix);
+ if (ret < 0) {
+ goto out;
+ }
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.parent", prefix);
- ret = dict_get_str (dict, key, &parent);
- if (!ret)
- cli_out (" Parent = %s", parent);
+ ret = dict_get_str(dict, key, &volname);
+ if (ret) {
+ gf_log("cli", GF_LOG_WARNING, "Failed to get %s", key);
+ cli_out("%-12s", "Origin:");
+ }
+ cli_out("%s" INDENT_MAIN_HEAD "%s", indent, "Origin Volume name", ":",
+ volname);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.windfrom", prefix);
- ret = dict_get_str (dict, key, &wind_from);
- if (!ret)
- cli_out (" Wind From = %s", wind_from);
+ ret = snprintf(key, sizeof(key), "%s.snapcount", keyprefix);
+ if (ret < 0) {
+ goto out;
+ }
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.windto", prefix);
- ret = dict_get_str (dict, key, &wind_to);
- if (!ret)
- cli_out (" Wind To = %s", wind_to);
+ ret = dict_get_int32(dict, key, &value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get %s", key);
+ goto out;
+ }
+ cli_out("%s%s %s %s %d", indent, "Snaps taken for", volname, ":",
+ value);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.unwindfrom", prefix);
- ret = dict_get_str (dict, key, &unwind_from);
- if (!ret)
- cli_out (" Unwind From = %s", unwind_from);
+ ret = snprintf(key, sizeof(key), "%s.snaps-available", keyprefix);
+ if (ret < 0) {
+ goto out;
+ }
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.unwindto", prefix);
- ret = dict_get_str (dict, key, &unwind_to);
- if (!ret)
- cli_out (" Unwind To = %s", unwind_to);
+ ret = dict_get_int32(dict, key, &value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get %s", key);
+ goto out;
+ }
+ cli_out("%s%s %s %s %d", indent, "Snaps available for", volname, ":",
+ value);
+ }
+
+ ret = snprintf(key, sizeof(key), "%s.vol-status", keyprefix);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_str(dict, key, &get_buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get %s", key);
+ goto out;
+ }
+ cli_out("%s" INDENT_MAIN_HEAD "%s", indent, "Status", ":", get_buffer);
+out:
+ return ret;
}
-void
-cli_print_volume_status_call_stack (dict_t *dict, char *prefix)
+/* This function is used to print snap related information
+ * arg - 0, dict : Response dictionary.
+ * arg - 1, prefix_str : snaplist.snap{0..}.*
+ */
+static int
+cli_get_volinfo_in_snap(dict_t *dict, char *keyprefix)
{
- int ret = -1;
- char key[1024] = {0,};
- int uid = 0;
- int gid = 0;
- int pid = 0;
- uint64_t unique = 0;
- //char *op = NULL;
- int count = 0;
- int i = 0;
+ char key[PATH_MAX] = "";
+ int i = 0;
+ int volcount = 0;
+ int ret = -1;
+
+ GF_ASSERT(dict);
+ GF_ASSERT(keyprefix);
+
+ ret = snprintf(key, sizeof(key), "%s.vol-count", keyprefix);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_int32(dict, key, &volcount);
+ for (i = 1; i <= volcount; i++) {
+ ret = snprintf(key, sizeof(key), "%s.vol%d", keyprefix, i);
+ if (ret < 0) {
+ goto out;
+ }
+ ret = cli_get_each_volinfo_in_snap(dict, key, _gf_true);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Could not list details of volume in a snap");
+ goto out;
+ }
+ cli_out(" ");
+ }
- if (!dict || !prefix)
- return;
+out:
+ return ret;
+}
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.uid", prefix);
- ret = dict_get_int32 (dict, key, &uid);
- if (ret)
- return;
+static int
+cli_get_each_snap_info(dict_t *dict, char *prefix_str, gf_boolean_t snap_driven)
+{
+ char key_buffer[PATH_MAX] = "";
+ char *get_buffer = NULL;
+ int ret = -1;
+ char indent[5] = "";
+
+ GF_ASSERT(dict);
+ GF_ASSERT(prefix_str);
+
+ if (!snap_driven)
+ strcat(indent, "\t");
+
+ ret = snprintf(key_buffer, sizeof(key_buffer), "%s.snapname", prefix_str);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_str(dict, key_buffer, &get_buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to fetch snapname %s ", key_buffer);
+ goto out;
+ }
+ cli_out("%s" INDENT_MAIN_HEAD "%s", indent, "Snapshot", ":", get_buffer);
+
+ ret = snprintf(key_buffer, sizeof(key_buffer), "%s.snap-id", prefix_str);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_str(dict, key_buffer, &get_buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to fetch snap-id %s ", key_buffer);
+ goto out;
+ }
+ cli_out("%s" INDENT_MAIN_HEAD "%s", indent, "Snap UUID", ":", get_buffer);
+
+ ret = snprintf(key_buffer, sizeof(key_buffer), "%s.snap-desc", prefix_str);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_str(dict, key_buffer, &get_buffer);
+ if (!ret) {
+ /* Ignore error for description */
+ cli_out("%s" INDENT_MAIN_HEAD "%s", indent, "Description", ":",
+ get_buffer);
+ }
+
+ ret = snprintf(key_buffer, sizeof(key_buffer), "%s.snap-time", prefix_str);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_str(dict, key_buffer, &get_buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to fetch snap-time %s ",
+ prefix_str);
+ goto out;
+ }
+ cli_out("%s" INDENT_MAIN_HEAD "%s", indent, "Created", ":", get_buffer);
+
+ if (snap_driven) {
+ cli_out("%-12s", "Snap Volumes:\n");
+ ret = cli_get_volinfo_in_snap(dict, prefix_str);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to list details of the snaps");
+ goto out;
+ }
+ }
+out:
+ return ret;
+}
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.gid", prefix);
- ret = dict_get_int32 (dict, key, &gid);
- if (ret)
- return;
+/* This is a generic function to print snap related information.
+ * arg - 0, dict : Response Dictionary
+ */
+static int
+cli_call_snapshot_info(dict_t *dict, gf_boolean_t bool_snap_driven)
+{
+ int snap_count = 0;
+ char key[32] = "";
+ int ret = -1;
+ int i = 0;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.pid", prefix);
- ret = dict_get_int32 (dict, key, &pid);
- if (ret)
- return;
+ GF_ASSERT(dict);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.unique", prefix);
- ret = dict_get_uint64 (dict, key, &unique);
- if (ret)
- return;
+ ret = dict_get_int32_sizen(dict, "snapcount", &snap_count);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to get snapcount");
+ goto out;
+ }
- /*
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.op", prefix);
- ret = dict_get_str (dict, key, &op);
- if (ret)
- return;
- */
+ if (snap_count == 0) {
+ cli_out("No snapshots present");
+ }
+ for (i = 1; i <= snap_count; i++) {
+ ret = snprintf(key, sizeof(key), "snap%d", i);
+ if (ret < 0) {
+ goto out;
+ }
+ ret = cli_get_each_snap_info(dict, key, bool_snap_driven);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to print snap details");
+ goto out;
+ }
+ }
+out:
+ return ret;
+}
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.count", prefix);
- ret = dict_get_int32 (dict, key, &count);
- if (ret)
- return;
+static int
+cli_get_snaps_in_volume(dict_t *dict)
+{
+ int ret = -1;
+ int i = 0;
+ int count = 0;
+ int avail = 0;
+ char key[32] = "";
+ char *get_buffer = NULL;
+
+ GF_ASSERT(dict);
+
+ ret = dict_get_str_sizen(dict, "origin-volname", &get_buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not fetch origin-volname");
+ goto out;
+ }
+ cli_out(INDENT_MAIN_HEAD "%s", "Volume Name", ":", get_buffer);
+
+ ret = dict_get_int32_sizen(dict, "snapcount", &avail);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not fetch snapcount");
+ goto out;
+ }
+ cli_out(INDENT_MAIN_HEAD "%d", "Snaps Taken", ":", avail);
+
+ ret = dict_get_int32_sizen(dict, "snaps-available", &count);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not fetch snaps-available");
+ goto out;
+ }
+ cli_out(INDENT_MAIN_HEAD "%d", "Snaps Available", ":", count);
+
+ for (i = 1; i <= avail; i++) {
+ snprintf(key, sizeof(key), "snap%d", i);
+ ret = cli_get_each_snap_info(dict, key, _gf_false);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to print snap details");
+ goto out;
+ }
- cli_out (" UID : %d", uid);
- cli_out (" GID : %d", gid);
- cli_out (" PID : %d", pid);
- cli_out (" Unique : %"PRIu64, unique);
- //cli_out ("\tOp : %s", op);
- cli_out (" Frames : %d", count);
+ ret = snprintf(key, sizeof(key), "snap%d.vol1", i);
+ if (ret < 0) {
+ goto out;
+ }
+ ret = cli_get_each_volinfo_in_snap(dict, key, _gf_false);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Could not get volume related information");
+ goto out;
+ }
- for (i = 0; i < count; i++) {
- cli_out (" Frame %d", i+1);
+ cli_out(" ");
+ }
+out:
+ return ret;
+}
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.frame%d", prefix, i);
- cli_print_volume_status_call_frame (dict, key);
+static int
+cli_snapshot_list(dict_t *dict)
+{
+ int snapcount = 0;
+ char key[32] = "";
+ int ret = -1;
+ int i = 0;
+ char *get_buffer = NULL;
+
+ GF_ASSERT(dict);
+
+ ret = dict_get_int32_sizen(dict, "snapcount", &snapcount);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not fetch snap count");
+ goto out;
+ }
+
+ if (snapcount == 0) {
+ cli_out("No snapshots present");
+ }
+
+ for (i = 1; i <= snapcount; i++) {
+ ret = snprintf(key, sizeof(key), "snapname%d", i);
+ if (ret < 0) {
+ goto out;
}
- cli_out (" ");
+ ret = dict_get_strn(dict, key, ret, &get_buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not get %s ", key);
+ goto out;
+ } else {
+ cli_out("%s", get_buffer);
+ }
+ }
+out:
+ return ret;
}
-void
-cli_print_volume_status_callpool (dict_t *dict, gf_boolean_t notbrick)
+static int
+cli_get_snap_volume_status(dict_t *dict, char *key_prefix)
{
- int ret = -1;
- char *volname = NULL;
- int brick_index_max = -1;
- int other_count = 0;
- int index_max = 0;
- char *hostname = NULL;
- char *path = NULL;
- int online = -1;
- int call_count = 0;
- char key[1024] = {0,};
- int i = 0;
- int j = 0;
+ int ret = -1;
+ char key[PATH_MAX] = "";
+ char *buffer = NULL;
+ int brickcount = 0;
+ int i = 0;
+ int pid = 0;
+
+ GF_ASSERT(dict);
+ GF_ASSERT(key_prefix);
+
+ ret = snprintf(key, sizeof(key), "%s.brickcount", key_prefix);
+ if (ret < 0) {
+ goto out;
+ }
+ ret = dict_get_int32(dict, key, &brickcount);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to fetch brickcount");
+ goto out;
+ }
+
+ for (i = 0; i < brickcount; i++) {
+ ret = snprintf(key, sizeof(key), "%s.brick%d.path", key_prefix, i);
+ if (ret < 0) {
+ goto out;
+ }
- GF_ASSERT (dict);
+ ret = dict_get_str(dict, key, &buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_INFO, "Unable to get Brick Path");
+ continue;
+ }
+ cli_out("\n\t%-17s %s %s", "Brick Path", ":", buffer);
- ret = dict_get_str (dict, "volname", &volname);
- if (ret)
- goto out;
- cli_out ("Pending calls for volume %s", volname);
+ ret = snprintf(key, sizeof(key), "%s.brick%d.vgname", key_prefix, i);
+ if (ret < 0) {
+ goto out;
+ }
- ret = dict_get_int32 (dict, "brick-index-max", &brick_index_max);
- if (ret)
- goto out;
- ret = dict_get_int32 (dict, "other-count", &other_count);
- if (ret)
- goto out;
+ ret = dict_get_str(dict, key, &buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_INFO, "Unable to get Volume Group");
+ cli_out("\t%-17s %s %s", "Volume Group", ":", "N/A");
+ } else
+ cli_out("\t%-17s %s %s", "Volume Group", ":", buffer);
- index_max = brick_index_max + other_count;
+ ret = snprintf(key, sizeof(key), "%s.brick%d.status", key_prefix, i);
+ if (ret < 0) {
+ goto out;
+ }
- for (i = 0; i <= index_max; i++) {
- cli_out ("----------------------------------------------");
+ ret = dict_get_str(dict, key, &buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_INFO, "Unable to get Brick Running");
+ cli_out("\t%-17s %s %s", "Brick Running", ":", "N/A");
+ } else
+ cli_out("\t%-17s %s %s", "Brick Running", ":", buffer);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.hostname", i);
- ret = dict_get_str (dict, key, &hostname);
- if (ret)
- goto out;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.path", i);
- ret = dict_get_str (dict, key, &path);
- if (ret)
- goto out;
+ ret = snprintf(key, sizeof(key), "%s.brick%d.pid", key_prefix, i);
+ if (ret < 0) {
+ goto out;
+ }
- if (notbrick)
- cli_out ("%s : %s", hostname, path);
- else
- cli_out ("Brick : %s:%s", hostname, path);
+ ret = dict_get_int32(dict, key, &pid);
+ if (ret) {
+ gf_log("cli", GF_LOG_INFO, "Unable to get pid");
+ cli_out("\t%-17s %s %s", "Brick PID", ":", "N/A");
+ } else
+ cli_out("\t%-17s %s %d", "Brick PID", ":", pid);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.status", i);
- ret = dict_get_int32 (dict, key, &online);
- if (ret)
- goto out;
- if (!online) {
- if (notbrick)
- cli_out ("%s is offline", hostname);
- else
- cli_out ("Brick is offline");
- continue;
- }
+ ret = snprintf(key, sizeof(key), "%s.brick%d.data", key_prefix, i);
+ if (ret < 0) {
+ goto out;
+ }
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.callpool.count", i);
- ret = dict_get_int32 (dict, key, &call_count);
- if (ret)
- goto out;
- cli_out ("Pending calls: %d", call_count);
+ ret = dict_get_str(dict, key, &buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_INFO, "Unable to get Data Percent");
+ cli_out("\t%-17s %s %s", "Data Percentage", ":", "N/A");
+ } else
+ cli_out("\t%-17s %s %s", "Data Percentage", ":", buffer);
+
+ ret = snprintf(key, sizeof(key), "%s.brick%d.lvsize", key_prefix, i);
+ if (ret < 0) {
+ goto out;
+ }
+ ret = dict_get_str(dict, key, &buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_INFO, "Unable to get LV Size");
+ cli_out("\t%-17s %s %s", "LV Size", ":", "N/A");
+ } else
+ cli_out("\t%-17s %s %s", "LV Size", ":", buffer);
+ }
- if (0 == call_count)
- continue;
+ ret = 0;
+out:
+ return ret;
+}
- for (j = 0; j < call_count; j++) {
- cli_out ("Call Stack%d", j+1);
+static int
+cli_get_single_snap_status(dict_t *dict, char *keyprefix)
+{
+ int ret = -1;
+ char key[64] = ""; /* keyprefix is ""status.snap0" */
+ int i = 0;
+ int volcount = 0;
+ char *get_buffer = NULL;
+
+ GF_ASSERT(dict);
+ GF_ASSERT(keyprefix);
+
+ ret = snprintf(key, sizeof(key), "%s.snapname", keyprefix);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_str(dict, key, &get_buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to get snapname");
+ goto out;
+ }
+ cli_out("\nSnap Name : %s", get_buffer);
+
+ ret = snprintf(key, sizeof(key), "%s.uuid", keyprefix);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_str(dict, key, &get_buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to get snap UUID");
+ goto out;
+ }
+ cli_out("Snap UUID : %s", get_buffer);
+
+ ret = snprintf(key, sizeof(key), "%s.volcount", keyprefix);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_int32(dict, key, &volcount);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to get volume count");
+ goto out;
+ }
+
+ for (i = 0; i < volcount; i++) {
+ ret = snprintf(key, sizeof(key), "%s.vol%d", keyprefix, i);
+ if (ret < 0) {
+ goto out;
+ }
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key),
- "brick%d.callpool.stack%d", i, j);
- cli_print_volume_status_call_stack (dict, key);
- }
+ ret = cli_get_snap_volume_status(dict, key);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not get snap volume status");
+ goto out;
}
+ }
+out:
+ return ret;
+}
+static int32_t
+cli_populate_req_dict_for_delete(dict_t *snap_dict, dict_t *dict, size_t index)
+{
+ int32_t ret = -1;
+ char key[PATH_MAX] = "";
+ char *buffer = NULL;
+
+ GF_ASSERT(snap_dict);
+ GF_ASSERT(dict);
+
+ ret = dict_set_int32_sizen(snap_dict, "sub-cmd", GF_SNAP_DELETE_TYPE_ITER);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Could not save command type in snap dictionary");
+ goto out;
+ }
+
+ ret = snprintf(key, sizeof(key), "snapname%zu", index);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_str(dict, key, &buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snapname");
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc(snap_dict, "snapname", buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to save snapname");
+ goto out;
+ }
+
+ ret = dict_set_int32_sizen(snap_dict, "type", GF_SNAP_OPTION_TYPE_DELETE);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to save command type");
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc(snap_dict, "cmd-str", "snapshot delete");
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not save command string as delete");
+ goto out;
+ }
out:
- cli_out ("----------------------------------------------");
- return;
+ return ret;
}
static int
-gf_cli3_1_status_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
-{
- int ret = -1;
- int brick_index_max = -1;
- int other_count = 0;
- int index_max = 0;
- int i = 0;
- int pid = -1;
- uint32_t cmd = 0;
- gf_boolean_t notbrick = _gf_false;
- char key[1024] = {0,};
- char *hostname = NULL;
- char *path = NULL;
- char *volname = NULL;
- dict_t *dict = NULL;
- gf_cli_rsp rsp = {0,};
- cli_volume_status_t status = {0};
- cli_local_t *local = NULL;
- char msg[1024] = {0,};
-
- if (req->rpc_status == -1)
- goto out;
+cli_populate_req_dict_for_status(dict_t *snap_dict, dict_t *dict, int index)
+{
+ int ret = -1;
+ char key[PATH_MAX] = "";
+ char *buffer = NULL;
+
+ GF_ASSERT(snap_dict);
+ GF_ASSERT(dict);
+
+ ret = dict_set_uint32(snap_dict, "sub-cmd", GF_SNAP_STATUS_TYPE_ITER);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not save command type in snap dict");
+ goto out;
+ }
+
+ ret = snprintf(key, sizeof(key), "status.snap%d.snapname", index);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_strn(dict, key, ret, &buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not get snapname");
+ goto out;
+ }
+
+ ret = dict_set_str_sizen(snap_dict, "snapname", buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not save snapname in snap dict");
+ goto out;
+ }
+
+ ret = dict_set_int32_sizen(snap_dict, "type", GF_SNAP_OPTION_TYPE_STATUS);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not save command type");
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc(snap_dict, "cmd-str", "snapshot status");
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not save command string as status");
+ goto out;
+ }
+
+ ret = dict_set_int32_sizen(snap_dict, "hold_vol_locks", _gf_false);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Setting volume lock flag failed");
+ goto out;
+ }
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
- if (ret < 0) {
- gf_log ("cli", GF_LOG_ERROR, "Volume status response error");
- goto out;
+out:
+ return ret;
+}
+
+static int
+cli_snapshot_status(dict_t *dict, gf_cli_rsp *rsp, call_frame_t *frame)
+{
+ int ret = -1;
+ int status_cmd = -1;
+ cli_local_t *local = NULL;
+
+ GF_ASSERT(dict);
+ GF_ASSERT(rsp);
+ GF_ASSERT(frame);
+
+ local = ((call_frame_t *)frame)->local;
+ if (!local) {
+ gf_log("cli", GF_LOG_ERROR, "frame->local is NULL");
+ goto out;
+ }
+
+ if (rsp->op_ret) {
+ if (rsp->op_errstr) {
+ ret = dict_set_dynstr_with_alloc(local->dict, "op_err_str",
+ rsp->op_errstr);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to set op_errstr in local dictionary");
+ goto out;
+ }
+ }
+ ret = rsp->op_ret;
+ goto out;
+ }
+
+ ret = dict_get_int32_sizen(dict, "sub-cmd", &status_cmd);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not fetch status type");
+ goto out;
+ }
+
+ if ((status_cmd != GF_SNAP_STATUS_TYPE_SNAP) &&
+ (status_cmd != GF_SNAP_STATUS_TYPE_ITER)) {
+ dict_copy(dict, local->dict);
+ goto out;
+ }
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_snapshot_status_single_snap(local, dict, "status.snap0");
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to create xml output for snapshot status");
+ }
+ } else {
+ ret = cli_get_single_snap_status(dict, "status.snap0");
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not fetch status of snap");
}
+ }
+
+out:
+ return ret;
+}
- gf_log ("cli", GF_LOG_DEBUG, "Received response to status cmd");
+static int
+gf_cli_generate_snapshot_event(gf_cli_rsp *rsp, dict_t *dict, int32_t type,
+ char *snap_name, char *volname, char *snap_uuid,
+ char *clone_name)
+{
+ int ret = -1;
+ int config_command = 0;
+ int32_t delete_cmd = -1;
+ uint64_t hard_limit = 0;
+ uint64_t soft_limit = 0;
+ char *auto_delete = NULL;
+ char *snap_activate = NULL;
+ char msg[PATH_MAX] = {
+ 0,
+ };
+ char option[512] = {
+ 0,
+ };
+
+ GF_VALIDATE_OR_GOTO("cli", dict, out);
+ GF_VALIDATE_OR_GOTO("cli", rsp, out);
+
+ switch (type) {
+ case GF_SNAP_OPTION_TYPE_CREATE:
+ if (!snap_name) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snap name");
+ goto out;
+ }
+
+ if (!volname) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get volume name");
+ goto out;
+ }
+
+ if (rsp->op_ret != 0) {
+ gf_event(EVENT_SNAPSHOT_CREATE_FAILED,
+ "snapshot_name=%s;volume_name=%s;error=%s", snap_name,
+ volname,
+ rsp->op_errstr ? rsp->op_errstr
+ : "Please check log file for details");
+ ret = 0;
+ break;
+ }
- local = ((call_frame_t *)myframe)->local;
+ if (!snap_uuid) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snap uuid");
+ goto out;
+ }
- if (rsp.op_ret) {
- if (strcmp (rsp.op_errstr, ""))
- snprintf (msg, sizeof (msg), "%s", rsp.op_errstr);
- else
- snprintf (msg, sizeof (msg), "Unable to obtain volume "
- "status information.");
-
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML) {
- cli_xml_output_str ("volStatus", msg, rsp.op_ret,
- rsp.op_errno, rsp.op_errstr);
- ret = 0;
- goto out;
- }
-#endif
- cli_err ("%s", msg);
- if (local && local->all) {
- ret = 0;
- cli_out (" ");
- } else
- ret = -1;
+ gf_event(EVENT_SNAPSHOT_CREATED,
+ "snapshot_name=%s;volume_name=%s;snapshot_uuid=%s",
+ snap_name, volname, snap_uuid);
+
+ ret = 0;
+ break;
+ case GF_SNAP_OPTION_TYPE_ACTIVATE:
+ if (!snap_name) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snap name");
goto out;
- }
+ }
- dict = dict_new ();
- if (!dict)
+ if (rsp->op_ret != 0) {
+ gf_event(EVENT_SNAPSHOT_ACTIVATE_FAILED,
+ "snapshot_name=%s;error=%s", snap_name,
+ rsp->op_errstr ? rsp->op_errstr
+ : "Please check log file for details");
+ ret = 0;
+ break;
+ }
+
+ if (!snap_uuid) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snap uuid");
goto out;
+ }
- ret = dict_unserialize (rsp.dict.dict_val,
- rsp.dict.dict_len,
- &dict);
- if (ret)
+ gf_event(EVENT_SNAPSHOT_ACTIVATED,
+ "snapshot_name=%s;snapshot_uuid=%s", snap_name, snap_uuid);
+
+ ret = 0;
+ break;
+
+ case GF_SNAP_OPTION_TYPE_DEACTIVATE:
+ if (!snap_name) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snap name");
goto out;
+ }
- ret = dict_get_uint32 (dict, "cmd", &cmd);
- if (ret)
+ if (rsp->op_ret != 0) {
+ gf_event(EVENT_SNAPSHOT_DEACTIVATE_FAILED,
+ "snapshot_name=%s;error=%s", snap_name,
+ rsp->op_errstr ? rsp->op_errstr
+ : "Please check log file for details");
+ ret = 0;
+ break;
+ }
+
+ if (!snap_uuid) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snap uuid");
goto out;
+ }
- if ((cmd & GF_CLI_STATUS_ALL)) {
- if (local) {
- local->dict = dict;
- ret = 0;
- } else {
- gf_log ("cli", GF_LOG_ERROR, "local not found");
- ret = -1;
- }
+ gf_event(EVENT_SNAPSHOT_DEACTIVATED,
+ "snapshot_name=%s;snapshot_uuid=%s", snap_name, snap_uuid);
+
+ ret = 0;
+ break;
+
+ case GF_SNAP_OPTION_TYPE_RESTORE:
+ if (!snap_name) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snap name");
goto out;
- }
+ }
- if ((cmd & GF_CLI_STATUS_NFS) || (cmd & GF_CLI_STATUS_SHD))
- notbrick = _gf_true;
+ if (rsp->op_ret != 0) {
+ gf_event(EVENT_SNAPSHOT_RESTORE_FAILED,
+ "snapshot_name=%s;error=%s", snap_name,
+ rsp->op_errstr ? rsp->op_errstr
+ : "Please check log file for details");
+ ret = 0;
+ break;
+ }
- ret = dict_get_int32 (dict, "count", &count);
- if (ret)
+ if (!snap_uuid) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snap uuid");
goto out;
- if (count == 0) {
+ }
+
+ if (!volname) {
ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Failed to get volname");
goto out;
- }
+ }
- ret = dict_get_int32 (dict, "brick-index-max", &brick_index_max);
- if (ret)
+ gf_event(EVENT_SNAPSHOT_RESTORED,
+ "snapshot_name=%s;snapshot_uuid=%s;volume_name=%s",
+ snap_name, snap_uuid, volname);
+
+ ret = 0;
+ break;
+
+ case GF_SNAP_OPTION_TYPE_DELETE:
+ ret = dict_get_int32_sizen(dict, "sub-cmd", &delete_cmd);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not get sub-cmd");
goto out;
- ret = dict_get_int32 (dict, "other-count", &other_count);
- if (ret)
+ }
+
+ /*
+ * Need not generate any event (success or failure) for delete *
+ * all, as it will trigger individual delete for all snapshots *
+ */
+ if (delete_cmd == GF_SNAP_DELETE_TYPE_ALL) {
+ ret = 0;
+ break;
+ }
+
+ if (!snap_name) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snap name");
goto out;
+ }
- index_max = brick_index_max + other_count;
+ if (rsp->op_ret != 0) {
+ gf_event(EVENT_SNAPSHOT_DELETE_FAILED,
+ "snapshot_name=%s;error=%s", snap_name,
+ rsp->op_errstr ? rsp->op_errstr
+ : "Please check log file for details");
+ ret = 0;
+ break;
+ }
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_vol_status (dict, rsp.op_ret,
- rsp.op_errno, rsp.op_errstr);
- if (ret) {
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
- }
+ if (!snap_uuid) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snap uuid");
goto out;
- }
-#endif
+ }
- status.brick = GF_CALLOC (1, PATH_MAX + 256, gf_common_mt_strdup);
-
- switch (cmd & GF_CLI_STATUS_MASK) {
- case GF_CLI_STATUS_MEM:
- cli_print_volume_status_mem (dict, notbrick);
- goto cont;
- break;
- case GF_CLI_STATUS_CLIENTS:
- cli_print_volume_status_clients (dict, notbrick);
- goto cont;
- break;
- case GF_CLI_STATUS_INODE:
- cli_print_volume_status_inode (dict, notbrick);
- goto cont;
- break;
- case GF_CLI_STATUS_FD:
- cli_print_volume_status_fd (dict, notbrick);
- goto cont;
- break;
- case GF_CLI_STATUS_CALLPOOL:
- cli_print_volume_status_callpool (dict, notbrick);
- goto cont;
- break;
- default:
- break;
- }
-
- ret = dict_get_str (dict, "volname", &volname);
- if (ret)
+ gf_event(EVENT_SNAPSHOT_DELETED,
+ "snapshot_name=%s;snapshot_uuid=%s", snap_name, snap_uuid);
+
+ ret = 0;
+ break;
+
+ case GF_SNAP_OPTION_TYPE_CLONE:
+ if (!clone_name) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get clone name");
goto out;
+ }
- cli_out ("Status of volume: %s", volname);
+ if (!snap_name) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snapname name");
+ goto out;
+ }
- if ((cmd & GF_CLI_STATUS_DETAIL) == 0) {
- cli_out ("Gluster process\t\t\t\t\t\tPort\tOnline\tPid");
- cli_print_line (CLI_BRICK_STATUS_LINE_LEN);
- }
+ if (rsp->op_ret != 0) {
+ gf_event(EVENT_SNAPSHOT_CLONE_FAILED,
+ "snapshot_name=%s;clone_name=%s;error=%s", snap_name,
+ clone_name,
+ rsp->op_errstr ? rsp->op_errstr
+ : "Please check log file for details");
+ ret = 0;
+ break;
+ }
- for (i = 0; i <= index_max; i++) {
+ if (!snap_uuid) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snap uuid");
+ goto out;
+ }
+ gf_event(EVENT_SNAPSHOT_CLONED,
+ "snapshot_name=%s;clone_name=%s;clone_uuid=%s", snap_name,
+ clone_name, snap_uuid);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.hostname", i);
- ret = dict_get_str (dict, key, &hostname);
- if (ret)
- continue;
+ ret = 0;
+ break;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.path", i);
- ret = dict_get_str (dict, key, &path);
- if (ret)
- continue;
+ case GF_SNAP_OPTION_TYPE_CONFIG:
+ if (rsp->op_ret != 0) {
+ gf_event(EVENT_SNAPSHOT_CONFIG_UPDATE_FAILED, "error=%s",
+ rsp->op_errstr ? rsp->op_errstr
+ : "Please check log file for details");
+ ret = 0;
+ break;
+ }
- /* Brick/not-brick is handled seperately here as all
- * types of nodes are contained in the default output
- */
- memset (status.brick, 0, PATH_MAX + 255);
- if (!strcmp (hostname, "NFS Server") ||
- !strcmp (hostname, "Self-heal Daemon"))
- snprintf (status.brick, PATH_MAX + 255, "%s on %s",
- hostname, path);
- else
- snprintf (status.brick, PATH_MAX + 255, "Brick %s:%s",
- hostname, path);
+ ret = dict_get_int32_sizen(dict, "config-command", &config_command);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not fetch config type");
+ goto out;
+ }
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.port", i);
- ret = dict_get_int32 (dict, key, &(status.port));
- if (ret)
- continue;
+ if (config_command == GF_SNAP_CONFIG_DISPLAY) {
+ ret = 0;
+ break;
+ }
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.status", i);
- ret = dict_get_int32 (dict, key, &(status.online));
- if (ret)
- continue;
+ /* These are optional parameters therefore ignore the error */
+ ret = dict_get_uint64(dict, "snap-max-hard-limit", &hard_limit);
+ ret = dict_get_uint64(dict, "snap-max-soft-limit", &soft_limit);
+ ret = dict_get_str_sizen(dict, "auto-delete", &auto_delete);
+ ret = dict_get_str_sizen(dict, "snap-activate-on-create",
+ &snap_activate);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.pid", i);
- ret = dict_get_int32 (dict, key, &pid);
- if (ret)
- continue;
- if (pid == -1)
- ret = gf_asprintf (&(status.pid_str), "%s", "N/A");
- else
- ret = gf_asprintf (&(status.pid_str), "%d", pid);
+ if (!hard_limit && !soft_limit && !auto_delete && !snap_activate) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR,
+ "At least one option from "
+ "snap-max-hard-limit, snap-max-soft-limit, "
+ "auto-delete and snap-activate-on-create "
+ "should be set");
+ goto out;
+ }
- if (ret == -1)
- goto out;
+ if (hard_limit || soft_limit) {
+ snprintf(option, sizeof(option), "%s=%" PRIu64,
+ hard_limit ? "hard_limit" : "soft_limit",
+ hard_limit ? hard_limit : soft_limit);
+ } else if (auto_delete || snap_activate) {
+ snprintf(option, sizeof(option), "%s=%s",
+ auto_delete ? "auto-delete" : "snap-activate",
+ auto_delete ? auto_delete : snap_activate);
+ }
- if ((cmd & GF_CLI_STATUS_DETAIL)) {
- ret = cli_get_detail_status (dict, i, &status);
- if (ret)
- goto out;
- cli_print_line (CLI_BRICK_STATUS_LINE_LEN);
- cli_print_detailed_status (&status);
+ volname = NULL;
+ ret = dict_get_str_sizen(dict, "volname", &volname);
- } else {
- cli_print_brick_status (&status);
- }
- }
- cli_out (" ");
-cont:
- ret = rsp.op_ret;
+ snprintf(msg, sizeof(msg), "config_type=%s;%s",
+ volname ? "volume_config" : "system_config", option);
-out:
- if (status.brick)
- GF_FREE (status.brick);
+ gf_event(EVENT_SNAPSHOT_CONFIG_UPDATED, "%s", msg);
- cli_cmd_broadcast_response (ret);
- return ret;
+ ret = 0;
+ break;
+
+ default:
+ gf_log("cli", GF_LOG_WARNING,
+ "Cannot generate event for unknown type.");
+ ret = 0;
+ goto out;
+ }
+
+out:
+ return ret;
}
-int32_t
-gf_cli3_1_status_volume (call_frame_t *frame, xlator_t *this,
- void *data)
+/*
+ * Fetch necessary data from dict at one place instead of *
+ * repeating the same code again and again. *
+ */
+static int
+gf_cli_snapshot_get_data_from_dict(dict_t *dict, char **snap_name,
+ char **volname, char **snap_uuid,
+ int8_t *soft_limit_flag, char **clone_name)
{
- gf_cli_req req = {{0,}};
- int ret = -1;
- dict_t *dict = NULL;
+ int ret = -1;
- if (!frame || !this || !data)
- goto out;
+ GF_VALIDATE_OR_GOTO("cli", dict, out);
- dict = data;
+ if (snap_name) {
+ ret = dict_get_str_sizen(dict, "snapname", snap_name);
+ if (ret) {
+ gf_log("cli", GF_LOG_DEBUG, "failed to get snapname from dict");
+ }
+ }
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
- if (ret < 0) {
- gf_log ("cli", GF_LOG_ERROR,
- "failed to serialize the data");
+ if (volname) {
+ ret = dict_get_str_sizen(dict, "volname1", volname);
+ if (ret) {
+ gf_log("cli", GF_LOG_DEBUG, "failed to get volname1 from dict");
+ }
+ }
- goto out;
+ if (snap_uuid) {
+ ret = dict_get_str_sizen(dict, "snapuuid", snap_uuid);
+ if (ret) {
+ gf_log("cli", GF_LOG_DEBUG, "failed to get snapuuid from dict");
+ }
+ }
+
+ if (soft_limit_flag) {
+ ret = dict_get_int8(dict, "soft-limit-reach", soft_limit_flag);
+ if (ret) {
+ gf_log("cli", GF_LOG_DEBUG,
+ "failed to get soft-limit-reach from dict");
}
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_STATUS_VOLUME, NULL,
- this, gf_cli3_1_status_cbk,
- (xdrproc_t)xdr_gf_cli_req);
+ }
- out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning: %d", ret);
- return ret;
+ if (clone_name) {
+ ret = dict_get_str_sizen(dict, "clonename", clone_name);
+ if (ret) {
+ gf_log("cli", GF_LOG_DEBUG, "failed to get clonename from dict");
+ }
+ }
+
+ ret = 0;
+out:
+ return ret;
}
-int
-gf_cli_status_volume_all (call_frame_t *frame, xlator_t *this, void *data)
+static int
+gf_cli_snapshot_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
{
- int i = 0;
- int ret = -1;
- int vol_count = -1;
- uint32_t cmd = 0;
- char key[1024] = {0};
- char *volname = NULL;
- dict_t *vol_dict = NULL;
- dict_t *dict = NULL;
- cli_local_t *local = NULL;
+ int ret = -1;
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ dict_t *dict = NULL;
+ char *snap_name = NULL;
+ char *clone_name = NULL;
+ int32_t type = 0;
+ call_frame_t *frame = NULL;
+ gf_boolean_t snap_driven = _gf_false;
+ int8_t soft_limit_flag = -1;
+ char *volname = NULL;
+ char *snap_uuid = NULL;
+
+ GF_ASSERT(myframe);
+
+ if (req->rpc_status == -1) {
+ goto out;
+ }
+
+ frame = myframe;
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(frame->this->name, GF_LOG_ERROR, XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ dict = dict_new();
+
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
+
+ if (ret)
+ goto out;
+
+ ret = dict_get_int32_sizen(dict, "type", &type);
+ if (ret) {
+ gf_log(frame->this->name, GF_LOG_ERROR, "failed to get type");
+ goto out;
+ }
+
+ ret = gf_cli_snapshot_get_data_from_dict(
+ dict, &snap_name, &volname, &snap_uuid, &soft_limit_flag, &clone_name);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to fetch data from dict.");
+ goto out;
+ }
+
+#if (USE_EVENTS)
+ ret = gf_cli_generate_snapshot_event(&rsp, dict, type, snap_name, volname,
+ snap_uuid, clone_name);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to generate snapshot event");
+ goto out;
+ }
+#endif
- dict = (dict_t *)data;
- ret = dict_get_uint32 (dict, "cmd", &cmd);
- if (ret)
+ /* Snapshot status and delete command is handled separately */
+ if (global_state->mode & GLUSTER_MODE_XML &&
+ GF_SNAP_OPTION_TYPE_STATUS != type &&
+ GF_SNAP_OPTION_TYPE_DELETE != type) {
+ ret = cli_xml_output_snapshot(type, dict, rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ }
+
+ goto out;
+ }
+
+ switch (type) {
+ case GF_SNAP_OPTION_TYPE_CREATE:
+ if (rsp.op_ret) {
+ cli_err("snapshot create: failed: %s",
+ rsp.op_errstr ? rsp.op_errstr
+ : "Please check log file for details");
+ ret = rsp.op_ret;
goto out;
+ }
- local = cli_local_get ();
- if (!local) {
+ if (!snap_name) {
ret = -1;
- gf_log ("cli", GF_LOG_ERROR, "Failed to allocate local");
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snap name");
goto out;
- }
- frame->local = local;
- local->all = _gf_true;
+ }
- ret = gf_cli3_1_status_volume (frame, this, data);
- if (ret)
+ if (!volname) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Failed to get volume name");
+ goto out;
+ }
+
+ cli_out("snapshot create: success: Snap %s created successfully",
+ snap_name);
+
+ if (soft_limit_flag == 1) {
+ cli_out(
+ "Warning: Soft-limit of volume (%s) is "
+ "reached. Snapshot creation is not possible "
+ "once hard-limit is reached.",
+ volname);
+ }
+ ret = 0;
+ break;
+
+ case GF_SNAP_OPTION_TYPE_CLONE:
+ if (rsp.op_ret) {
+ cli_err("snapshot clone: failed: %s",
+ rsp.op_errstr ? rsp.op_errstr
+ : "Please check log file for details");
+ ret = rsp.op_ret;
goto out;
+ }
- vol_dict = local->dict;
+ if (!clone_name) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get clone name");
+ goto out;
+ }
- ret = dict_get_int32 (vol_dict, "vol_count", &vol_count);
- if (ret) {
- cli_err ("Failed to get names of volumes");
+ if (!snap_name) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snapname name");
goto out;
- }
+ }
- if (vol_count == 0) {
- cli_out ("No volumes present");
- ret = 0;
+ cli_out("snapshot clone: success: Clone %s created successfully",
+ clone_name);
+
+ ret = 0;
+ break;
+
+ case GF_SNAP_OPTION_TYPE_RESTORE:
+ if (rsp.op_ret) {
+ cli_err("snapshot restore: failed: %s",
+ rsp.op_errstr ? rsp.op_errstr
+ : "Please check log file for details");
+ ret = rsp.op_ret;
goto out;
- }
+ }
- /* remove the "all" flag in cmd */
- cmd &= ~GF_CLI_STATUS_ALL;
- cmd |= GF_CLI_STATUS_VOL;
+ if (!snap_name) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snap name");
+ goto out;
+ }
- for (i = 0; i < vol_count; i++) {
+ cli_out("Snapshot restore: %s: Snap restored successfully",
+ snap_name);
- dict = dict_new ();
- if (!dict)
- goto out;
+ ret = 0;
+ break;
+ case GF_SNAP_OPTION_TYPE_ACTIVATE:
+ if (rsp.op_ret) {
+ cli_err("snapshot activate: failed: %s",
+ rsp.op_errstr ? rsp.op_errstr
+ : "Please check log file for details");
+ ret = rsp.op_ret;
+ goto out;
+ }
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "vol%d", i);
- ret = dict_get_str (vol_dict, key, &volname);
- if (ret)
- goto out;
+ if (!snap_name) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snap name");
+ goto out;
+ }
- ret = dict_set_dynstr (dict, "volname", volname);
- if (ret)
- goto out;
+ cli_out("Snapshot activate: %s: Snap activated successfully",
+ snap_name);
- ret = dict_set_uint32 (dict, "cmd", cmd);
- if (ret)
- goto out;
+ ret = 0;
+ break;
- ret = gf_cli3_1_status_volume (frame, this, dict);
- if (ret)
- goto out;
+ case GF_SNAP_OPTION_TYPE_DEACTIVATE:
+ if (rsp.op_ret) {
+ cli_err("snapshot deactivate: failed: %s",
+ rsp.op_errstr ? rsp.op_errstr
+ : "Please check log file for details");
+ ret = rsp.op_ret;
+ goto out;
+ }
- dict_unref (dict);
- }
+ if (!snap_name) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snap name");
+ goto out;
+ }
- out:
- if (ret)
- gf_log ("cli", GF_LOG_ERROR, "status all failed");
- if (ret && dict)
- dict_unref (dict);
- if (frame)
- frame->local = NULL;
- return ret;
-}
+ cli_out("Snapshot deactivate: %s: Snap deactivated successfully",
+ snap_name);
-static int
-gf_cli3_1_mount_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
-{
- gf1_cli_mount_rsp rsp = {0,};
- int ret = -1;
+ ret = 0;
+ break;
+ case GF_SNAP_OPTION_TYPE_INFO:
+ if (rsp.op_ret) {
+ cli_err("Snapshot info : failed: %s",
+ rsp.op_errstr ? rsp.op_errstr
+ : "Please check log file for details");
+ ret = rsp.op_ret;
+ goto out;
+ }
- if (-1 == req->rpc_status) {
+ snap_driven = dict_get_str_boolean(dict, "snap-driven", _gf_false);
+ if (snap_driven == _gf_true) {
+ ret = cli_call_snapshot_info(dict, snap_driven);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Snapshot info failed");
+ goto out;
+ }
+ } else if (snap_driven == _gf_false) {
+ ret = cli_get_snaps_in_volume(dict);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Snapshot info failed");
+ goto out;
+ }
+ }
+ break;
+
+ case GF_SNAP_OPTION_TYPE_CONFIG:
+ ret = cli_snapshot_config_display(dict, &rsp);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to display snapshot config output.");
+ goto out;
+ }
+ break;
+
+ case GF_SNAP_OPTION_TYPE_LIST:
+ if (rsp.op_ret) {
+ cli_err("Snapshot list : failed: %s",
+ rsp.op_errstr ? rsp.op_errstr
+ : "Please check log file for details");
+ ret = rsp.op_ret;
goto out;
- }
+ }
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_mount_rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
+ ret = cli_snapshot_list(dict);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to display snapshot list");
goto out;
- }
+ }
+ break;
- gf_log ("cli", GF_LOG_INFO, "Received resp to mount");
+ case GF_SNAP_OPTION_TYPE_DELETE:
+ ret = cli_snapshot_remove_reply(&rsp, dict, frame);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to delete snap");
+ goto out;
+ }
+ break;
- if (rsp.op_ret == 0) {
- ret = 0;
- cli_out ("%s", rsp.path);
- } else {
- /* weird sounding but easy to parse... */
- cli_err ("%d : failed with this errno (%s)",
- rsp.op_errno, strerror (rsp.op_errno));
- ret = 1;
- }
+ case GF_SNAP_OPTION_TYPE_STATUS:
+ ret = cli_snapshot_status(dict, &rsp, frame);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to display snapshot status output.");
+ goto out;
+ }
+ break;
+ default:
+ cli_err("Unknown command executed");
+ ret = -1;
+ goto out;
+ }
out:
- cli_cmd_broadcast_response (ret);
- return ret;
+ if (dict)
+ dict_unref(dict);
+ cli_cmd_broadcast_response(ret);
+
+ free(rsp.dict.dict_val);
+ free(rsp.op_errstr);
+
+ return ret;
}
int32_t
-gf_cli3_1_mount (call_frame_t *frame, xlator_t *this, void *data)
+gf_cli_snapshot_for_delete(call_frame_t *frame, xlator_t *this, void *data)
{
- gf1_cli_mount_req req = {0,};
- int ret = -1;
- void **dataa = data;
- char *label = NULL;
- dict_t *dict = NULL;
+ gf_cli_req req = {{
+ 0,
+ }};
+ int32_t ret = -1;
+ int32_t cmd = -1;
+ cli_local_t *local = NULL;
+ dict_t *snap_dict = NULL;
+ int32_t snapcount = 0;
+ int i = 0;
+ char question[PATH_MAX] = "";
+ char *volname = NULL;
+ gf_answer_t answer = GF_ANSWER_NO;
+
+ GF_VALIDATE_OR_GOTO("cli", frame, out);
+ GF_VALIDATE_OR_GOTO("cli", frame->local, out);
+ GF_VALIDATE_OR_GOTO("cli", this, out);
+ GF_VALIDATE_OR_GOTO("cli", data, out);
+
+ local = frame->local;
+
+ ret = dict_get_int32_sizen(local->dict, "sub-cmd", &cmd);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get sub-cmd");
+ goto out;
+ }
+
+ /* No need multiple RPCs for individual snapshot delete*/
+ if (cmd == GF_SNAP_DELETE_TYPE_SNAP) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = dict_get_int32_sizen(local->dict, "snapcount", &snapcount);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not get snapcount");
+ goto out;
+ }
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+#ifdef HAVE_LIB_XML
+ ret = xmlTextWriterWriteFormatElement(
+ local->writer, (xmlChar *)"snapCount", "%d", snapcount);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to write xml element \"snapCount\"");
+ goto out;
+ }
+#endif /* HAVE_LIB_XML */
+ } else if (snapcount == 0) {
+ cli_out("No snapshots present");
+ goto out;
+ }
+
+ if (cmd == GF_SNAP_DELETE_TYPE_ALL) {
+ snprintf(question, sizeof(question),
+ "System contains %d snapshot(s).\nDo you still "
+ "want to continue and delete them? ",
+ snapcount);
+ } else {
+ ret = dict_get_str_sizen(local->dict, "volname", &volname);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to fetch volname from local dictionary");
+ goto out;
+ }
- if (!frame || !this || !data)
- goto out;
+ snprintf(question, sizeof(question),
+ "Volume (%s) contains %d snapshot(s).\nDo you still want to "
+ "continue and delete them? ",
+ volname, snapcount);
+ }
- label = dataa[0];
- dict = dataa[1];
+ answer = cli_cmd_get_confirmation(global_state, question);
+ if (GF_ANSWER_NO == answer) {
+ ret = 0;
+ gf_log("cli", GF_LOG_DEBUG,
+ "User cancelled snapshot delete operation for snap delete");
+ goto out;
+ }
+
+ for (i = 1; i <= snapcount; i++) {
+ ret = -1;
- req.label = label;
- ret = dict_allocate_and_serialize (dict, &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
+ snap_dict = dict_new();
+ if (!snap_dict)
+ goto out;
+
+ ret = cli_populate_req_dict_for_delete(snap_dict, local->dict, i);
if (ret) {
- ret = -1;
- goto out;
+ gf_log("cli", GF_LOG_ERROR,
+ "Could not populate snap request dictionary");
+ goto out;
}
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_MOUNT, NULL,
- this, gf_cli3_1_mount_cbk,
- (xdrproc_t)xdr_gf1_cli_mount_req);
+ ret = cli_to_glusterd(&req, frame, gf_cli_snapshot_cbk,
+ (xdrproc_t)xdr_gf_cli_req, snap_dict,
+ GLUSTER_CLI_SNAP, this, cli_rpc_prog, NULL);
+ if (ret) {
+ /* Fail the operation if deleting one of the
+ * snapshots is failed
+ */
+ gf_log("cli", GF_LOG_ERROR,
+ "cli_to_glusterd for snapshot delete failed");
+ goto out;
+ }
+ dict_unref(snap_dict);
+ snap_dict = NULL;
+ }
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ if (snap_dict)
+ dict_unref(snap_dict);
+ GF_FREE(req.dict.dict_val);
+
+ return ret;
}
-static int
-gf_cli3_1_umount_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
+static int32_t
+gf_cli_snapshot_for_status(call_frame_t *frame, xlator_t *this, void *data)
{
- gf1_cli_umount_rsp rsp = {0,};
- int ret = -1;
+ gf_cli_req req = {{
+ 0,
+ }};
+ int ret = -1;
+ int32_t cmd = -1;
+ cli_local_t *local = NULL;
+ dict_t *snap_dict = NULL;
+ int snapcount = 0;
+ int i = 0;
+
+ GF_VALIDATE_OR_GOTO("cli", frame, out);
+ GF_VALIDATE_OR_GOTO("cli", frame->local, out);
+ GF_VALIDATE_OR_GOTO("cli", this, out);
+ GF_VALIDATE_OR_GOTO("cli", data, out);
+
+ local = frame->local;
+
+ ret = dict_get_int32_sizen(local->dict, "sub-cmd", &cmd);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get sub-cmd");
+ goto out;
+ }
+
+ /* Snapshot status of single snap (i.e. GF_SNAP_STATUS_TYPE_SNAP)
+ * is already handled. Therefore we can return from here.
+ * If want to get status of all snaps in the system or volume then
+ * we should get them one by one.*/
+ if ((cmd == GF_SNAP_STATUS_TYPE_SNAP) ||
+ (cmd == GF_SNAP_STATUS_TYPE_ITER)) {
+ ret = 0;
+ goto out;
+ }
- if (-1 == req->rpc_status) {
- goto out;
- }
+ ret = dict_get_int32_sizen(local->dict, "status.snapcount", &snapcount);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not get snapcount");
+ goto out;
+ }
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_umount_rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- goto out;
- }
+ if (snapcount == 0 && !(global_state->mode & GLUSTER_MODE_XML)) {
+ cli_out("No snapshots present");
+ }
- gf_log ("cli", GF_LOG_INFO, "Received resp to mount");
+ for (i = 0; i < snapcount; i++) {
+ ret = -1;
- if (rsp.op_ret == 0)
- ret = 0;
- else {
- cli_err ("umount failed");
- ret = 1;
+ snap_dict = dict_new();
+ if (!snap_dict)
+ goto out;
+
+ ret = cli_populate_req_dict_for_status(snap_dict, local->dict, i);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Could not populate snap request dictionary");
+ goto out;
+ }
+
+ ret = cli_to_glusterd(&req, frame, gf_cli_snapshot_cbk,
+ (xdrproc_t)xdr_gf_cli_req, snap_dict,
+ GLUSTER_CLI_SNAP, this, cli_rpc_prog, NULL);
+
+ /* Ignore the return value and error for snapshot
+ * status of type "ALL" or "VOL"
+ *
+ * Scenario : There might be case where status command
+ * and delete command might be issued at the same time.
+ * In that case when status tried to fetch detail of
+ * snap which has been deleted by concurrent command,
+ * then it will show snapshot not present. Which will
+ * not be appropriate.
+ */
+ if (ret && (cmd != GF_SNAP_STATUS_TYPE_ALL &&
+ cmd != GF_SNAP_STATUS_TYPE_VOL)) {
+ gf_log("cli", GF_LOG_ERROR,
+ "cli_to_glusterd for snapshot status failed");
+ goto out;
}
+ dict_unref(snap_dict);
+ snap_dict = NULL;
+ }
+ ret = 0;
out:
- cli_cmd_broadcast_response (ret);
- return ret;
+ if (snap_dict)
+ dict_unref(snap_dict);
+ GF_FREE(req.dict.dict_val);
+
+ return ret;
}
-int32_t
-gf_cli3_1_umount (call_frame_t *frame, xlator_t *this, void *data)
+static int32_t
+gf_cli_snapshot(call_frame_t *frame, xlator_t *this, void *data)
{
- gf1_cli_umount_req req = {0,};
- int ret = -1;
- dict_t *dict = NULL;
+ gf_cli_req req = {{
+ 0,
+ }};
+ dict_t *options = NULL;
+ int ret = -1;
+ int tmp_ret = -1;
+ cli_local_t *local = NULL;
+ char *err_str = NULL;
+ int type = -1;
- if (!frame || !this || !data)
- goto out;
+ if (!frame || !frame->local || !this || !data)
+ goto out;
- dict = data;
+ local = frame->local;
- ret = dict_get_str (dict, "path", &req.path);
- if (ret == 0)
- ret = dict_get_int32 (dict, "lazy", &req.lazy);
+ options = data;
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_snapshot_begin_composite_op(local);
if (ret) {
- ret = -1;
- goto out;
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to begin snapshot xml composite op");
+ goto out;
}
+ }
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_UMOUNT, NULL,
- this, gf_cli3_1_umount_cbk,
- (xdrproc_t)xdr_gf1_cli_umount_req);
+ ret = cli_to_glusterd(&req, frame, gf_cli_snapshot_cbk,
+ (xdrproc_t)xdr_gf_cli_req, options, GLUSTER_CLI_SNAP,
+ this, cli_rpc_prog, NULL);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "cli_to_glusterd for snapshot failed");
+ goto xmlend;
+ }
- out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
-}
+ ret = dict_get_int32_sizen(local->dict, "type", &type);
-void
-cmd_heal_volume_brick_out (dict_t *dict, int brick)
-{
- uint64_t num_entries = 0;
- int ret = 0;
- char key[256] = {0};
- char *hostname = NULL;
- char *path = NULL;
- char *status = NULL;
- uint64_t i = 0;
- uint32_t time = 0;
- char timestr[256] = {0};
- struct tm tm = {0};
- time_t ltime = 0;
-
- snprintf (key, sizeof (key), "%d-hostname", brick);
- ret = dict_get_str (dict, key, &hostname);
- if (ret)
- goto out;
- snprintf (key, sizeof (key), "%d-path", brick);
- ret = dict_get_str (dict, key, &path);
- if (ret)
- goto out;
- cli_out ("\nBrick %s:%s", hostname, path);
- snprintf (key, sizeof (key), "%d-count", brick);
- ret = dict_get_uint64 (dict, key, &num_entries);
- cli_out ("Number of entries: %"PRIu64, num_entries);
- snprintf (key, sizeof (key), "%d-status", brick);
- ret = dict_get_str (dict, key, &status);
- if (status && strlen (status))
- cli_out ("Status: %s", status);
- for (i = 0; i < num_entries; i++) {
- snprintf (key, sizeof (key), "%d-%"PRIu64, brick, i);
- ret = dict_get_str (dict, key, &path);
- if (ret)
- continue;
- time = 0;
- snprintf (key, sizeof (key), "%d-%"PRIu64"-time", brick, i);
- ret = dict_get_uint32 (dict, key, &time);
- if (!time) {
- cli_out ("%s", path);
- } else {
- ltime = time;
- memset (&tm, 0, sizeof (tm));
- if (!localtime_r (&ltime, &tm)) {
- snprintf (timestr, sizeof (timestr),
- "Invalid time");
- } else {
- strftime (timestr, sizeof (timestr),
- "%Y-%m-%d %H:%M:%S", &tm);
- }
- if (i ==0) {
- cli_out ("at path on brick");
- cli_out ("-----------------------------------");
- }
- cli_out ("%s %s", timestr, path);
- }
+ if (GF_SNAP_OPTION_TYPE_STATUS == type) {
+ ret = gf_cli_snapshot_for_status(frame, this, data);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "cli to glusterd for snapshot status command failed");
}
-out:
- return;
-}
-int
-gf_cli3_1_heal_volume_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
-{
- gf_cli_rsp rsp = {0,};
- int ret = -1;
- cli_local_t *local = NULL;
- char *volname = NULL;
- call_frame_t *frame = NULL;
- dict_t *input_dict = NULL;
- dict_t *dict = NULL;
- int brick_count = 0;
- int i = 0;
- gf_xl_afr_op_t heal_op = GF_AFR_OP_INVALID;
-
- if (-1 == req->rpc_status) {
- goto out;
- }
+ goto xmlend;
+ }
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- goto out;
+ if (GF_SNAP_OPTION_TYPE_DELETE == type) {
+ ret = gf_cli_snapshot_for_delete(frame, this, data);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "cli to glusterd for snapshot delete command failed");
}
- frame = myframe;
+ goto xmlend;
+ }
- if (frame) {
- local = frame->local;
- frame->local = NULL;
- }
+ ret = 0;
- if (local) {
- input_dict = local->dict;
- ret = dict_get_int32 (input_dict, "heal-op",
- (int32_t*)&heal_op);
- }
-//TODO: Proper XML output
-//#if (HAVE_LIB_XML)
-// if (global_state->mode & GLUSTER_MODE_XML) {
-// ret = cli_xml_output_dict ("volHeal", dict, rsp.op_ret,
-// rsp.op_errno, rsp.op_errstr);
-// if (ret)
-// gf_log ("cli", GF_LOG_ERROR,
-// "Error outputting to xml");
-// goto out;
-// }
-//#endif
-
- ret = dict_get_str (input_dict, "volname", &volname);
+xmlend:
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_snapshot_end_composite_op(local);
if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "failed to get volname");
- goto out;
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to end snapshot xml composite op");
+ goto out;
}
+ }
+out:
+ if (ret && local && GF_SNAP_OPTION_TYPE_STATUS == type) {
+ tmp_ret = dict_get_str_sizen(local->dict, "op_err_str", &err_str);
+ if (tmp_ret || !err_str) {
+ cli_err("Snapshot Status : failed: %s",
+ "Please check log file for details");
+ } else {
+ cli_err("Snapshot Status : failed: %s", err_str);
+ dict_del_sizen(local->dict, "op_err_str");
+ }
+ }
- gf_log ("cli", GF_LOG_INFO, "Received resp to heal volume");
-
- if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
- cli_err ("%s", rsp.op_errstr);
- else
- cli_out ("Heal operation on volume %s has been %s", volname,
- (rsp.op_ret) ? "unsuccessful": "successful");
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
- ret = rsp.op_ret;
- if ((heal_op == GF_AFR_OP_HEAL_FULL) ||
- (heal_op == GF_AFR_OP_HEAL_INDEX))
- goto out;
+ GF_FREE(req.dict.dict_val);
- dict = dict_new ();
- if (!dict) {
- ret = -1;
- goto out;
- }
-
- ret = dict_unserialize (rsp.dict.dict_val,
- rsp.dict.dict_len,
- &dict);
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ /* XML mode handles its own error */
+ ret = 0;
+ }
+ return ret;
+}
- if (ret) {
- gf_log ("", GF_LOG_ERROR,
- "Unable to allocate memory");
- goto out;
+static int32_t
+gf_cli_barrier_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+
+ GF_ASSERT(myframe);
+
+ if (-1 == req->rpc_status)
+ goto out;
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ XDR_DECODE_FAIL);
+ goto out;
+ }
+ gf_log("cli", GF_LOG_DEBUG, "Received response to barrier");
+
+ if (rsp.op_ret) {
+ if (rsp.op_errstr && (strlen(rsp.op_errstr) > 1)) {
+ cli_err("volume barrier: command unsuccessful : %s", rsp.op_errstr);
} else {
- dict->extra_stdfree = rsp.dict.dict_val;
- }
- ret = dict_get_int32 (dict, "count", &brick_count);
- if (ret)
- goto out;
-
- if (!brick_count) {
- cli_out ("All bricks of volume %s are down.", volname);
- goto out;
+ cli_err("volume barrier: command unsuccessful");
}
-
- for (i = 0; i < brick_count; i++)
- cmd_heal_volume_brick_out (dict, i);
- ret = rsp.op_ret;
+ } else {
+ cli_out("volume barrier: command successful");
+ }
+ ret = rsp.op_ret;
out:
- cli_cmd_broadcast_response (ret);
- if (local)
- cli_local_wipe (local);
- if (rsp.op_errstr)
- free (rsp.op_errstr);
- if (dict)
- dict_unref (dict);
- return ret;
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_cli_rsp(rsp);
+ return ret;
}
-int32_t
-gf_cli3_1_heal_volume (call_frame_t *frame, xlator_t *this,
- void *data)
+static int
+gf_cli_barrier_volume(call_frame_t *frame, xlator_t *this, void *data)
{
- gf_cli_req req = {{0,}};
- int ret = 0;
- cli_local_t *local = NULL;
- dict_t *dict = NULL;
+ gf_cli_req req = {{
+ 0,
+ }};
+ dict_t *options = NULL;
+ int ret = -1;
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
- }
+ options = data;
- dict = data;
- local = cli_local_get ();
+ ret = cli_to_glusterd(&req, frame, gf_cli_barrier_volume_cbk,
+ (xdrproc_t)xdr_gf_cli_req, options,
+ GLUSTER_CLI_BARRIER_VOLUME, this, cli_rpc_prog, NULL);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
- if (local) {
- local->dict = dict_ref (dict);
- frame->local = local;
- }
+ GF_FREE(req.dict.dict_val);
+ return ret;
+}
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to serialize the data");
+static int32_t
+gf_cli_get_vol_opt_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ dict_t *dict = NULL;
+ char *key = NULL;
+ char *value = NULL;
+ char msg[1024] = {
+ 0,
+ };
+ int i = 0;
+ char dict_key[50] = {
+ 0,
+ };
+
+ GF_ASSERT(myframe);
+
+ if (-1 == req->rpc_status)
+ goto out;
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ XDR_DECODE_FAIL);
+ goto out;
+ }
+ gf_log("cli", GF_LOG_DEBUG, "Received response to get volume option");
+
+ if (rsp.op_ret) {
+ if (strcmp(rsp.op_errstr, ""))
+ snprintf(msg, sizeof(msg), "volume get option: failed: %s",
+ rsp.op_errstr);
+ else
+ snprintf(msg, sizeof(msg), "volume get option: failed");
- goto out;
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_str("volGetopts", msg, rsp.op_ret,
+ rsp.op_errno, rsp.op_errstr);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ }
+ } else {
+ cli_err("%s", msg);
}
-
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_HEAL_VOLUME, NULL,
- this, gf_cli3_1_heal_volume_cbk,
- (xdrproc_t) xdr_gf_cli_req);
+ ret = rsp.op_ret;
+ goto out_nolog;
+ }
+ dict = dict_new();
+
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
+ goto out;
+ }
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_vol_getopts(dict, rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ ret = 0;
+ }
+ goto out;
+ }
+
+ ret = dict_get_str_sizen(dict, "warning", &value);
+ if (!ret) {
+ cli_out("%s", value);
+ }
+
+ ret = dict_get_int32_sizen(dict, "count", &count);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to retrieve count from the dictionary");
+ goto out;
+ }
+
+ if (count <= 0) {
+ gf_log("cli", GF_LOG_ERROR, "Value of count :%d is invalid", count);
+ ret = -1;
+ goto out;
+ }
+
+ cli_out("%-40s%-40s", "Option", "Value");
+ cli_out("%-40s%-40s", "------", "-----");
+ for (i = 1; i <= count; i++) {
+ ret = snprintf(dict_key, sizeof dict_key, "key%d", i);
+ ret = dict_get_strn(dict, dict_key, ret, &key);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to retrieve %s from the dictionary", dict_key);
+ goto out;
+ }
+ ret = snprintf(dict_key, sizeof dict_key, "value%d", i);
+ ret = dict_get_strn(dict, dict_key, ret, &value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to retrieve key value for %s from the dictionary",
+ dict_key);
+ goto out;
+ }
+ cli_out("%-40s%-40s", key, value);
+ }
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ if (ret) {
+ cli_out(
+ "volume get option failed. Check the cli/glusterd log "
+ "file for more details");
+ }
+
+out_nolog:
+ if (dict)
+ dict_unref(dict);
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_cli_rsp(rsp);
+ return ret;
+}
- if (req.dict.dict_val)
- GF_FREE (req.dict.dict_val);
+static int
+gf_cli_get_vol_opt(call_frame_t *frame, xlator_t *this, void *data)
+{
+ gf_cli_req req = {{
+ 0,
+ }};
+ dict_t *options = NULL;
+ int ret = -1;
+
+ options = data;
- return ret;
+ ret = cli_to_glusterd(&req, frame, gf_cli_get_vol_opt_cbk,
+ (xdrproc_t)xdr_gf_cli_req, options,
+ GLUSTER_CLI_GET_VOL_OPT, this, cli_rpc_prog, NULL);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
+
+ GF_FREE(req.dict.dict_val);
+ return ret;
}
-int32_t
-gf_cli3_1_statedump_volume_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
+static int
+add_cli_cmd_timeout_to_dict(dict_t *dict)
{
- gf_cli_rsp rsp = {0,};
- int ret = -1;
- char msg[1024] = {0,};
-
- if (-1 == req->rpc_status)
- goto out;
- ret = xdr_to_generic (*iov, &rsp,
- (xdrproc_t)xdr_gf_cli_rsp);
- if (ret < 0) {
- gf_log (THIS->name, GF_LOG_ERROR, "XDR decoding failed");
- goto out;
- }
- gf_log ("cli", GF_LOG_DEBUG, "Received response to statedump");
- if (rsp.op_ret)
- snprintf (msg, sizeof(msg), "%s", rsp.op_errstr);
- else
- snprintf (msg, sizeof (msg), "Volume statedump successful");
+ int ret = 0;
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_str ("volStatedump", msg, rsp.op_ret,
- rsp.op_errno, rsp.op_errstr);
- if (ret)
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
- goto out;
+ if (cli_default_conn_timeout > 120) {
+ ret = dict_set_uint32(dict, "timeout", cli_default_conn_timeout);
+ if (ret) {
+ gf_log("cli", GF_LOG_INFO, "Failed to save timeout to dict");
}
-#endif
-
- if (rsp.op_ret)
- cli_err ("%s", msg);
- else
- cli_out ("%s", msg);
- ret = rsp.op_ret;
+ }
+ return ret;
+}
+static int
+cli_to_glusterd(gf_cli_req *req, call_frame_t *frame, fop_cbk_fn_t cbkfn,
+ xdrproc_t xdrproc, dict_t *dict, int procnum, xlator_t *this,
+ rpc_clnt_prog_t *prog, struct iobref *iobref)
+{
+ int ret = 0;
+ size_t len = 0;
+ char *cmd = NULL;
+ int i = 0;
+ const char **words = NULL;
+ cli_local_t *local = NULL;
+
+ if (!this || !frame || !frame->local || !dict) {
+ ret = -1;
+ goto out;
+ }
+
+ local = frame->local;
+
+ if (!local->words) {
+ ret = -1;
+ goto out;
+ }
+
+ words = local->words;
+
+ while (words[i])
+ len += strlen(words[i++]) + 1;
+
+ cmd = GF_MALLOC(len + 1, gf_common_mt_char);
+ if (!cmd) {
+ ret = -1;
+ goto out;
+ }
+ cmd[0] = '\0';
+
+ for (i = 0; words[i]; i++) {
+ strncat(cmd, words[i], len - 1);
+ if (words[i + 1] != NULL)
+ strncat(cmd, " ", len - 1);
+ }
+
+ ret = dict_set_dynstr_sizen(dict, "cmd-str", cmd);
+ if (ret)
+ goto out;
+
+ ret = add_cli_cmd_timeout_to_dict(dict);
+
+ ret = dict_allocate_and_serialize(dict, &(req->dict).dict_val,
+ &(req->dict).dict_len);
+
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "failed to get serialized length of dict");
+ goto out;
+ }
+
+ ret = cli_cmd_submit(NULL, req, frame, prog, procnum, iobref, this, cbkfn,
+ (xdrproc_t)xdrproc);
out:
- cli_cmd_broadcast_response (ret);
- return ret;
+ return ret;
}
-int32_t
-gf_cli3_1_statedump_volume (call_frame_t *frame, xlator_t *this,
- void *data)
+static int
+gf_cli_print_bitrot_scrub_status(dict_t *dict)
{
- gf_cli_req req = {{0,}};
- dict_t *options = NULL;
- int ret = -1;
+ int i = 1;
+ int j = 0;
+ int ret = -1;
+ int count = 0;
+ char key[64] = {
+ 0,
+ };
+ char *volname = NULL;
+ char *node_name = NULL;
+ char *scrub_freq = NULL;
+ char *state_scrub = NULL;
+ char *scrub_impact = NULL;
+ char *bad_file_str = NULL;
+ char *scrub_log_file = NULL;
+ char *bitrot_log_file = NULL;
+ uint64_t scrub_files = 0;
+ uint64_t unsigned_files = 0;
+ uint64_t scrub_time = 0;
+ uint64_t days = 0;
+ uint64_t hours = 0;
+ uint64_t minutes = 0;
+ uint64_t seconds = 0;
+ char *last_scrub = NULL;
+ uint64_t error_count = 0;
+ int8_t scrub_running = 0;
+ char *scrub_state_op = NULL;
+
+ ret = dict_get_int32_sizen(dict, "count", &count);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "failed to get count value from dictionary");
+ goto out;
+ }
+
+ ret = dict_get_str_sizen(dict, "volname", &volname);
+ if (ret)
+ gf_log("cli", GF_LOG_TRACE, "failed to get volume name");
+
+ ret = dict_get_str_sizen(dict, "features.scrub", &state_scrub);
+ if (ret)
+ gf_log("cli", GF_LOG_TRACE, "failed to get scrub state value");
+
+ ret = dict_get_str_sizen(dict, "features.scrub-throttle", &scrub_impact);
+ if (ret)
+ gf_log("cli", GF_LOG_TRACE, "failed to get scrub impact value");
+
+ ret = dict_get_str_sizen(dict, "features.scrub-freq", &scrub_freq);
+ if (ret)
+ gf_log("cli", GF_LOG_TRACE, "failed to get scrub -freq value");
+
+ ret = dict_get_str_sizen(dict, "bitrot_log_file", &bitrot_log_file);
+ if (ret)
+ gf_log("cli", GF_LOG_TRACE, "failed to get bitrot log file location");
+
+ ret = dict_get_str_sizen(dict, "scrub_log_file", &scrub_log_file);
+ if (ret)
+ gf_log("cli", GF_LOG_TRACE, "failed to get scrubber log file location");
+
+ for (i = 1; i <= count; i++) {
+ snprintf(key, sizeof(key), "scrub-running-%d", i);
+ ret = dict_get_int8(dict, key, &scrub_running);
+ if (ret)
+ gf_log("cli", GF_LOG_TRACE, "failed to get scrubbed files");
+ if (scrub_running)
+ break;
+ }
- if (!frame || !this || !data)
- goto out;
+ if (scrub_running)
+ gf_asprintf(&scrub_state_op, "%s (In Progress)", state_scrub);
+ else
+ gf_asprintf(&scrub_state_op, "%s (Idle)", state_scrub);
- options = data;
+ cli_out("\n%s: %s\n", "Volume name ", volname);
- ret = dict_allocate_and_serialize (options,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to serialize the data");
+ cli_out("%s: %s\n", "State of scrub", scrub_state_op);
- goto out;
- }
+ cli_out("%s: %s\n", "Scrub impact", scrub_impact);
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_STATEDUMP_VOLUME, NULL,
- this, gf_cli3_1_statedump_volume_cbk,
- (xdrproc_t)xdr_gf_cli_req);
+ cli_out("%s: %s\n", "Scrub frequency", scrub_freq);
-out:
- if (options)
- dict_destroy (options);
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ cli_out("%s: %s\n", "Bitrot error log location", bitrot_log_file);
- if (req.dict.dict_val)
- GF_FREE (req.dict.dict_val);
- return ret;
-}
+ cli_out("%s: %s\n", "Scrubber error log location", scrub_log_file);
-int32_t
-gf_cli3_1_list_volume_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
-{
- int ret = -1;
- gf_cli_rsp rsp = {0,};
- dict_t *dict = NULL;
- int vol_count = 0;;
- char *volname = NULL;
- char key[1024] = {0,};
- int i = 0;
+ for (i = 1; i <= count; i++) {
+ /* Reset the variables to prevent carryover of values */
+ node_name = NULL;
+ last_scrub = NULL;
+ scrub_time = 0;
+ error_count = 0;
+ scrub_files = 0;
+ unsigned_files = 0;
- if (-1 == req->rpc_status)
- goto out;
- ret = xdr_to_generic (*iov, &rsp,
- (xdrproc_t)xdr_gf_cli_rsp);
- if (ret < 0) {
- gf_log (THIS->name, GF_LOG_ERROR, "XDR decoding failed");
- goto out;
- }
+ snprintf(key, sizeof(key), "node-name-%d", i);
+ ret = dict_get_str(dict, key, &node_name);
+ if (ret)
+ gf_log("cli", GF_LOG_TRACE, "failed to get node-name");
- dict = dict_new ();
- if (!dict) {
- ret = -1;
- goto out;
- }
+ snprintf(key, sizeof(key), "scrubbed-files-%d", i);
+ ret = dict_get_uint64(dict, key, &scrub_files);
+ if (ret)
+ gf_log("cli", GF_LOG_TRACE, "failed to get scrubbed files");
- ret = dict_unserialize (rsp.dict.dict_val, rsp.dict.dict_len, &dict);
- if (ret) {
- gf_log ("cli", GF_LOG_ERROR, "Unable to allocate memory");
- goto out;
- }
+ snprintf(key, sizeof(key), "unsigned-files-%d", i);
+ ret = dict_get_uint64(dict, key, &unsigned_files);
+ if (ret)
+ gf_log("cli", GF_LOG_TRACE, "failed to get unsigned files");
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_vol_list (dict, rsp.op_ret, rsp.op_errno,
- rsp.op_errstr);
- if (ret)
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
- goto out;
- }
-#endif
- if (rsp.op_ret)
- cli_err ("%s", rsp.op_errstr);
- else {
- ret = dict_get_int32 (dict, "count", &vol_count);
- if (ret)
- goto out;
+ snprintf(key, sizeof(key), "scrub-duration-%d", i);
+ ret = dict_get_uint64(dict, key, &scrub_time);
+ if (ret)
+ gf_log("cli", GF_LOG_TRACE, "failed to get last scrub duration");
- if (vol_count == 0) {
- cli_out ("No volumes present in cluster");
- goto out;
- }
- for (i = 0; i < vol_count; i++) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "volume%d", i);
- ret = dict_get_str (dict, key, &volname);
- if (ret)
- goto out;
- cli_out ("%s", volname);
- }
- }
+ snprintf(key, sizeof(key), "last-scrub-time-%d", i);
+ ret = dict_get_str(dict, key, &last_scrub);
+ if (ret)
+ gf_log("cli", GF_LOG_TRACE, "failed to get last scrub time");
+ snprintf(key, sizeof(key), "error-count-%d", i);
+ ret = dict_get_uint64(dict, key, &error_count);
+ if (ret)
+ gf_log("cli", GF_LOG_TRACE, "failed to get error count");
- ret = rsp.op_ret;
+ cli_out("\n%s\n",
+ "=========================================================");
-out:
- cli_cmd_broadcast_response (ret);
- return ret;
-}
+ cli_out("%s: %s\n", "Node", node_name);
-int32_t
-gf_cli3_1_list_volume (call_frame_t *frame, xlator_t *this, void *data)
-{
- int ret = -1;
- gf_cli_req req = {{0,}};
+ cli_out("%s: %" PRIu64 "\n", "Number of Scrubbed files", scrub_files);
- if (!frame || !this)
- goto out;
+ cli_out("%s: %" PRIu64 "\n", "Number of Skipped files", unsigned_files);
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_LIST_VOLUME, NULL,
- this, gf_cli3_1_list_volume_cbk,
- (xdrproc_t)xdr_gf_cli_req);
+ if ((!last_scrub) || !strcmp(last_scrub, ""))
+ cli_out("%s: %s\n", "Last completed scrub time",
+ "Scrubber pending to complete.");
+ else
+ cli_out("%s: %s\n", "Last completed scrub time", last_scrub);
+
+ /* Printing last scrub duration time in human readable form*/
+ seconds = scrub_time % 60;
+ minutes = (scrub_time / 60) % 60;
+ hours = (scrub_time / 3600) % 24;
+ days = scrub_time / 86400;
+ cli_out("%s: %" PRIu64 ":%" PRIu64 ":%" PRIu64 ":%" PRIu64 "\n",
+ "Duration of last scrub (D:M:H:M:S)", days, hours, minutes,
+ seconds);
+
+ cli_out("%s: %" PRIu64 "\n", "Error count", error_count);
+
+ if (error_count) {
+ cli_out("%s:\n", "Corrupted object's [GFID]");
+ /* Printing list of bad file's (Corrupted object's)*/
+ for (j = 0; j < error_count; j++) {
+ snprintf(key, sizeof(key), "quarantine-%d-%d", j, i);
+ ret = dict_get_str(dict, key, &bad_file_str);
+ if (!ret) {
+ cli_out("%s\n", bad_file_str);
+ }
+ }
+ }
+ }
+ cli_out("%s\n",
+ "=========================================================");
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ GF_FREE(scrub_state_op);
+ return 0;
}
-int32_t
-gf_cli3_1_clearlocks_volume_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
+static int
+gf_cli_bitrot_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
{
- gf_cli_rsp rsp = {0,};
- int ret = -1;
- char *lk_summary = NULL;
- char *volname = NULL;
- dict_t *dict = NULL;
-
- if (-1 == req->rpc_status)
- goto out;
- ret = xdr_to_generic (*iov, &rsp,
- (xdrproc_t)xdr_gf_cli_rsp);
- if (ret < 0) {
+ int ret = -1;
+ int type = 0;
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ dict_t *dict = NULL;
+ char *scrub_cmd = NULL;
+ char *volname = NULL;
+ char *cmd_str = NULL;
+ char *cmd_op = NULL;
+
+ GF_ASSERT(myframe);
+
+ if (req->rpc_status == -1) {
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ if (rsp.op_ret) {
+ ret = -1;
+ if (global_state->mode & GLUSTER_MODE_XML)
+ goto xml_output;
+
+ if (strcmp(rsp.op_errstr, ""))
+ cli_err("Bitrot command failed : %s", rsp.op_errstr);
+ else
+ cli_err("Bitrot command : failed");
- gf_log ("cli", GF_LOG_ERROR, "XDR decoding failed");
- goto out;
- }
- gf_log ("cli", GF_LOG_DEBUG, "Received response to clear-locks");
+ goto out;
+ }
- if (rsp.op_ret) {
- cli_err ("Volume clear-locks unsuccessful");
- cli_err ("%s", rsp.op_errstr);
+ if (rsp.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new();
- } else {
- if (!rsp.dict.dict_len) {
- cli_out ("Possibly no locks cleared");
- ret = 0;
- goto out;
- }
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
- dict = dict_new ();
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
- if (!dict) {
- ret = -1;
- goto out;
- }
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
+ goto out;
+ }
+ }
- ret = dict_unserialize (rsp.dict.dict_val,
- rsp.dict.dict_len,
- &dict);
+ gf_log("cli", GF_LOG_DEBUG, "Received resp to bit rot command");
- if (ret) {
- gf_log ("cli", GF_LOG_ERROR,
- "Unable to serialize response dictionary");
- goto out;
- }
+ ret = dict_get_int32_sizen(dict, "type", &type);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get command type");
+ goto out;
+ }
- ret = dict_get_str (dict, "volname", &volname);
- if (ret) {
- gf_log ("cli", GF_LOG_ERROR, "Unable to get volname "
- "from dictionary");
- goto out;
- }
+ if ((type == GF_BITROT_CMD_SCRUB_STATUS) &&
+ !(global_state->mode & GLUSTER_MODE_XML)) {
+ ret = gf_cli_print_bitrot_scrub_status(dict);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to print bitrot scrub status");
+ }
+ goto out;
+ }
+
+ /* Ignoring the error, as using dict val for cli output only */
+ ret = dict_get_str_sizen(dict, "volname", &volname);
+ if (ret)
+ gf_log("cli", GF_LOG_TRACE, "failed to get volume name");
+
+ ret = dict_get_str_sizen(dict, "scrub-value", &scrub_cmd);
+ if (ret)
+ gf_log("cli", GF_LOG_TRACE, "Failed to get scrub command");
+
+ ret = dict_get_str_sizen(dict, "cmd-str", &cmd_str);
+ if (ret)
+ gf_log("cli", GF_LOG_TRACE, "failed to get command string");
+
+ if (cmd_str)
+ cmd_op = strrchr(cmd_str, ' ') + 1;
+
+ switch (type) {
+ case GF_BITROT_OPTION_TYPE_ENABLE:
+ cli_out("volume bitrot: success bitrot enabled for volume %s",
+ volname);
+ ret = 0;
+ goto out;
+ case GF_BITROT_OPTION_TYPE_DISABLE:
+ cli_out("volume bitrot: success bitrot disabled for volume %s",
+ volname);
+ ret = 0;
+ goto out;
+ case GF_BITROT_CMD_SCRUB_ONDEMAND:
+ cli_out("volume bitrot: scrubber started ondemand for volume %s",
+ volname);
+ ret = 0;
+ goto out;
+ case GF_BITROT_OPTION_TYPE_SCRUB:
+ if (!strncmp("pause", scrub_cmd, sizeof("pause")))
+ cli_out("volume bitrot: scrubber paused for volume %s",
+ volname);
+ if (!strncmp("resume", scrub_cmd, sizeof("resume")))
+ cli_out("volume bitrot: scrubber resumed for volume %s",
+ volname);
+ ret = 0;
+ goto out;
+ case GF_BITROT_OPTION_TYPE_SCRUB_FREQ:
+ cli_out(
+ "volume bitrot: scrub-frequency is set to %s "
+ "successfully for volume %s",
+ cmd_op, volname);
+ ret = 0;
+ goto out;
+ case GF_BITROT_OPTION_TYPE_SCRUB_THROTTLE:
+ cli_out(
+ "volume bitrot: scrub-throttle is set to %s "
+ "successfully for volume %s",
+ cmd_op, volname);
+ ret = 0;
+ goto out;
+ }
- ret = dict_get_str (dict, "lk-summary", &lk_summary);
- if (ret) {
- gf_log ("cli", GF_LOG_ERROR, "Unable to get lock "
- "summary from dictionary");
- goto out;
- }
- cli_out ("Volume clear-locks successful");
- cli_out ("%s", lk_summary);
+xml_output:
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_vol_profile(dict, rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto out;
+ }
- }
+ if (!rsp.op_ret)
+ cli_out("volume bitrot: success");
- ret = rsp.op_ret;
+ ret = rsp.op_ret;
out:
- cli_cmd_broadcast_response (ret);
- return ret;
+ if (dict)
+ dict_unref(dict);
+
+ gf_free_xdr_cli_rsp(rsp);
+ cli_cmd_broadcast_response(ret);
+ return ret;
}
-int32_t
-gf_cli3_1_clearlocks_volume (call_frame_t *frame, xlator_t *this,
- void *data)
+static int32_t
+gf_cli_bitrot(call_frame_t *frame, xlator_t *this, void *data)
{
- gf_cli_req req = {{0,}};
- dict_t *options = NULL;
- int ret = -1;
+ gf_cli_req req = {{
+ 0,
+ }};
+ dict_t *options = NULL;
+ int ret = -1;
+
+ options = data;
+
+ ret = cli_to_glusterd(&req, frame, gf_cli_bitrot_cbk,
+ (xdrproc_t)xdr_gf_cli_req, options,
+ GLUSTER_CLI_BITROT, this, cli_rpc_prog, NULL);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "cli_to_glusterd for bitrot failed");
+ goto out;
+ }
- if (!frame || !this || !data)
- goto out;
+out:
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
- options = data;
+ GF_FREE(req.dict.dict_val);
- ret = dict_allocate_and_serialize (options,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
- if (ret < 0) {
- gf_log ("cli", GF_LOG_ERROR,
- "failed to serialize the data");
+ return ret;
+}
- goto out;
- }
+static struct rpc_clnt_procedure gluster_cli_actors[GLUSTER_CLI_MAXVALUE] = {
+ [GLUSTER_CLI_NULL] = {"NULL", NULL},
+ [GLUSTER_CLI_PROBE] = {"PROBE_QUERY", gf_cli_probe},
+ [GLUSTER_CLI_DEPROBE] = {"DEPROBE_QUERY", gf_cli_deprobe},
+ [GLUSTER_CLI_LIST_FRIENDS] = {"LIST_FRIENDS", gf_cli_list_friends},
+ [GLUSTER_CLI_UUID_RESET] = {"UUID_RESET", gf_cli3_1_uuid_reset},
+ [GLUSTER_CLI_UUID_GET] = {"UUID_GET", gf_cli3_1_uuid_get},
+ [GLUSTER_CLI_CREATE_VOLUME] = {"CREATE_VOLUME", gf_cli_create_volume},
+ [GLUSTER_CLI_DELETE_VOLUME] = {"DELETE_VOLUME", gf_cli_delete_volume},
+ [GLUSTER_CLI_START_VOLUME] = {"START_VOLUME", gf_cli_start_volume},
+ [GLUSTER_CLI_STOP_VOLUME] = {"STOP_VOLUME", gf_cli_stop_volume},
+ [GLUSTER_CLI_RENAME_VOLUME] = {"RENAME_VOLUME", gf_cli_rename_volume},
+ [GLUSTER_CLI_DEFRAG_VOLUME] = {"DEFRAG_VOLUME", gf_cli_defrag_volume},
+ [GLUSTER_CLI_GET_VOLUME] = {"GET_VOLUME", gf_cli_get_volume},
+ [GLUSTER_CLI_GET_NEXT_VOLUME] = {"GET_NEXT_VOLUME", gf_cli_get_next_volume},
+ [GLUSTER_CLI_SET_VOLUME] = {"SET_VOLUME", gf_cli_set_volume},
+ [GLUSTER_CLI_ADD_BRICK] = {"ADD_BRICK", gf_cli_add_brick},
+ [GLUSTER_CLI_REMOVE_BRICK] = {"REMOVE_BRICK", gf_cli_remove_brick},
+ [GLUSTER_CLI_REPLACE_BRICK] = {"REPLACE_BRICK", gf_cli_replace_brick},
+ [GLUSTER_CLI_LOG_ROTATE] = {"LOG ROTATE", gf_cli_log_rotate},
+ [GLUSTER_CLI_GETSPEC] = {"GETSPEC", gf_cli_getspec},
+ [GLUSTER_CLI_PMAP_PORTBYBRICK] = {"PMAP PORTBYBRICK", gf_cli_pmap_b2p},
+ [GLUSTER_CLI_SYNC_VOLUME] = {"SYNC_VOLUME", gf_cli_sync_volume},
+ [GLUSTER_CLI_RESET_VOLUME] = {"RESET_VOLUME", gf_cli_reset_volume},
+ [GLUSTER_CLI_FSM_LOG] = {"FSM_LOG", gf_cli_fsm_log},
+ [GLUSTER_CLI_GSYNC_SET] = {"GSYNC_SET", gf_cli_gsync_set},
+ [GLUSTER_CLI_PROFILE_VOLUME] = {"PROFILE_VOLUME", gf_cli_profile_volume},
+ [GLUSTER_CLI_QUOTA] = {"QUOTA", gf_cli_quota},
+ [GLUSTER_CLI_TOP_VOLUME] = {"TOP_VOLUME", gf_cli_top_volume},
+ [GLUSTER_CLI_GETWD] = {"GETWD", gf_cli_getwd},
+ [GLUSTER_CLI_STATUS_VOLUME] = {"STATUS_VOLUME", gf_cli_status_volume},
+ [GLUSTER_CLI_STATUS_ALL] = {"STATUS_ALL", gf_cli_status_volume_all},
+ [GLUSTER_CLI_MOUNT] = {"MOUNT", gf_cli_mount},
+ [GLUSTER_CLI_UMOUNT] = {"UMOUNT", gf_cli_umount},
+ [GLUSTER_CLI_HEAL_VOLUME] = {"HEAL_VOLUME", gf_cli_heal_volume},
+ [GLUSTER_CLI_STATEDUMP_VOLUME] = {"STATEDUMP_VOLUME",
+ gf_cli_statedump_volume},
+ [GLUSTER_CLI_LIST_VOLUME] = {"LIST_VOLUME", gf_cli_list_volume},
+ [GLUSTER_CLI_CLRLOCKS_VOLUME] = {"CLEARLOCKS_VOLUME",
+ gf_cli_clearlocks_volume},
+ [GLUSTER_CLI_COPY_FILE] = {"COPY_FILE", gf_cli_copy_file},
+ [GLUSTER_CLI_SYS_EXEC] = {"SYS_EXEC", gf_cli_sys_exec},
+ [GLUSTER_CLI_SNAP] = {"SNAP", gf_cli_snapshot},
+ [GLUSTER_CLI_BARRIER_VOLUME] = {"BARRIER VOLUME", gf_cli_barrier_volume},
+ [GLUSTER_CLI_GET_VOL_OPT] = {"GET_VOL_OPT", gf_cli_get_vol_opt},
+ [GLUSTER_CLI_BITROT] = {"BITROT", gf_cli_bitrot},
+ [GLUSTER_CLI_GET_STATE] = {"GET_STATE", gf_cli_get_state},
+ [GLUSTER_CLI_RESET_BRICK] = {"RESET_BRICK", gf_cli_reset_brick},
+ [GLUSTER_CLI_GANESHA] = {"GANESHA", gf_cli_ganesha},
+};
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_CLRLOCKS_VOLUME, NULL,
- this, gf_cli3_1_clearlocks_volume_cbk,
- (xdrproc_t)xdr_gf_cli_req);
+struct rpc_clnt_program cli_prog = {
+ .progname = "Gluster CLI",
+ .prognum = GLUSTER_CLI_PROGRAM,
+ .progver = GLUSTER_CLI_VERSION,
+ .numproc = GLUSTER_CLI_MAXVALUE,
+ .proctable = gluster_cli_actors,
+};
-out:
- if (options)
- dict_destroy (options);
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
-
- if (req.dict.dict_val)
- GF_FREE (req.dict.dict_val);
- return ret;
-}
-
-struct rpc_clnt_procedure gluster_cli_actors[GLUSTER_CLI_MAXVALUE] = {
- [GLUSTER_CLI_NULL] = {"NULL", NULL },
- [GLUSTER_CLI_PROBE] = {"PROBE_QUERY", gf_cli3_1_probe},
- [GLUSTER_CLI_DEPROBE] = {"DEPROBE_QUERY", gf_cli3_1_deprobe},
- [GLUSTER_CLI_LIST_FRIENDS] = {"LIST_FRIENDS", gf_cli3_1_list_friends},
- [GLUSTER_CLI_CREATE_VOLUME] = {"CREATE_VOLUME", gf_cli3_1_create_volume},
- [GLUSTER_CLI_DELETE_VOLUME] = {"DELETE_VOLUME", gf_cli3_1_delete_volume},
- [GLUSTER_CLI_START_VOLUME] = {"START_VOLUME", gf_cli3_1_start_volume},
- [GLUSTER_CLI_STOP_VOLUME] = {"STOP_VOLUME", gf_cli3_1_stop_volume},
- [GLUSTER_CLI_RENAME_VOLUME] = {"RENAME_VOLUME", gf_cli3_1_rename_volume},
- [GLUSTER_CLI_DEFRAG_VOLUME] = {"DEFRAG_VOLUME", gf_cli3_1_defrag_volume},
- [GLUSTER_CLI_GET_VOLUME] = {"GET_VOLUME", gf_cli3_1_get_volume},
- [GLUSTER_CLI_GET_NEXT_VOLUME] = {"GET_NEXT_VOLUME", gf_cli3_1_get_next_volume},
- [GLUSTER_CLI_SET_VOLUME] = {"SET_VOLUME", gf_cli3_1_set_volume},
- [GLUSTER_CLI_ADD_BRICK] = {"ADD_BRICK", gf_cli3_1_add_brick},
- [GLUSTER_CLI_REMOVE_BRICK] = {"REMOVE_BRICK", gf_cli3_1_remove_brick},
- [GLUSTER_CLI_REPLACE_BRICK] = {"REPLACE_BRICK", gf_cli3_1_replace_brick},
- [GLUSTER_CLI_LOG_ROTATE] = {"LOG ROTATE", gf_cli3_1_log_rotate},
- [GLUSTER_CLI_GETSPEC] = {"GETSPEC", gf_cli3_1_getspec},
- [GLUSTER_CLI_PMAP_PORTBYBRICK] = {"PMAP PORTBYBRICK", gf_cli3_1_pmap_b2p},
- [GLUSTER_CLI_SYNC_VOLUME] = {"SYNC_VOLUME", gf_cli3_1_sync_volume},
- [GLUSTER_CLI_RESET_VOLUME] = {"RESET_VOLUME", gf_cli3_1_reset_volume},
- [GLUSTER_CLI_FSM_LOG] = {"FSM_LOG", gf_cli3_1_fsm_log},
- [GLUSTER_CLI_GSYNC_SET] = {"GSYNC_SET", gf_cli3_1_gsync_set},
- [GLUSTER_CLI_PROFILE_VOLUME] = {"PROFILE_VOLUME", gf_cli3_1_profile_volume},
- [GLUSTER_CLI_QUOTA] = {"QUOTA", gf_cli3_1_quota},
- [GLUSTER_CLI_TOP_VOLUME] = {"TOP_VOLUME", gf_cli3_1_top_volume},
- [GLUSTER_CLI_GETWD] = {"GETWD", gf_cli3_1_getwd},
- [GLUSTER_CLI_STATUS_VOLUME] = {"STATUS_VOLUME", gf_cli3_1_status_volume},
- [GLUSTER_CLI_STATUS_ALL] = {"STATUS_ALL", gf_cli_status_volume_all},
- [GLUSTER_CLI_MOUNT] = {"MOUNT", gf_cli3_1_mount},
- [GLUSTER_CLI_UMOUNT] = {"UMOUNT", gf_cli3_1_umount},
- [GLUSTER_CLI_HEAL_VOLUME] = {"HEAL_VOLUME", gf_cli3_1_heal_volume},
- [GLUSTER_CLI_STATEDUMP_VOLUME] = {"STATEDUMP_VOLUME", gf_cli3_1_statedump_volume},
- [GLUSTER_CLI_LIST_VOLUME] = {"LIST_VOLUME", gf_cli3_1_list_volume},
- [GLUSTER_CLI_CLRLOCKS_VOLUME] = {"CLEARLOCKS_VOLUME", gf_cli3_1_clearlocks_volume},
+static struct rpc_clnt_procedure cli_quotad_procs[GF_AGGREGATOR_MAXVALUE] = {
+ [GF_AGGREGATOR_NULL] = {"NULL", NULL},
+ [GF_AGGREGATOR_LOOKUP] = {"LOOKUP", NULL},
+ [GF_AGGREGATOR_GETLIMIT] = {"GETLIMIT", cli_quotad_getlimit},
};
-struct rpc_clnt_program cli_prog = {
- .progname = "Gluster CLI",
- .prognum = GLUSTER_CLI_PROGRAM,
- .progver = GLUSTER_CLI_VERSION,
- .numproc = GLUSTER_CLI_MAXVALUE,
- .proctable = gluster_cli_actors,
+struct rpc_clnt_program cli_quotad_clnt = {
+ .progname = "CLI Quotad client",
+ .prognum = GLUSTER_AGGREGATOR_PROGRAM,
+ .progver = GLUSTER_AGGREGATOR_VERSION,
+ .numproc = GF_AGGREGATOR_MAXVALUE,
+ .proctable = cli_quotad_procs,
};
diff --git a/cli/src/cli-xml-output.c b/cli/src/cli-xml-output.c
index eb45a1fa04b..069de75801c 100644
--- a/cli/src/cli-xml-output.c
+++ b/cli/src/cli-xml-output.c
@@ -1,2634 +1,5839 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <stdlib.h>
#include "cli.h"
#include "cli1-xdr.h"
-#include "run.h"
-#include "compat.h"
-#include "syscall.h"
+#include <glusterfs/run.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/syscall.h>
+#include <glusterfs/upcall-utils.h>
+enum gf_task_types { GF_TASK_TYPE_REBALANCE, GF_TASK_TYPE_REMOVE_BRICK };
+
+/*
+ * IMPORTANT NOTE:
+ * All exported functions in this file which use libxml need use a
+ * #if (HAVE_LIB_XML), #else, #endif
+ * For eg,
+ * int exported_func () {
+ * #if (HAVE_LIB_XML)
+ * <Stuff using libxml>
+ * #else
+ * return 0;
+ * #endif
+ * }
+ *
+ * All other functions, which are called internally within this file need to be
+ * within #if (HAVE_LIB_XML), #endif statements
+ * For eg,
+ * #if (HAVE_LIB_XML)
+ * int internal_func ()
+ * {
+ * }
+ * #endif
+ *
+ * Following the above format ensures that all xml related code is compiled
+ * only when libxml2 is present, and also keeps the rest of the codebase free
+ * of #if (HAVE_LIB_XML)
+ */
#if (HAVE_LIB_XML)
#include <libxml/encoding.h>
#include <libxml/xmlwriter.h>
-#define XML_RET_CHECK_AND_GOTO(ret, label) do { \
- if (ret < 0) { \
- ret = -1; \
- goto label; \
- } \
- else \
- ret = 0; \
- }while (0) \
+#define XML_RET_CHECK_AND_GOTO(ret, label) \
+ do { \
+ if (ret < 0) { \
+ ret = -1; \
+ goto label; \
+ } else \
+ ret = 0; \
+ } while (0)
int
-cli_begin_xml_output (xmlTextWriterPtr *writer, xmlBufferPtr *buf)
+cli_begin_xml_output(xmlTextWriterPtr *writer, xmlDocPtr *doc)
{
- int ret = -1;
-
- *buf = xmlBufferCreateSize (8192);
- if (buf == NULL) {
- ret = -1;
- goto out;
- }
- xmlBufferSetAllocationScheme (*buf, XML_BUFFER_ALLOC_DOUBLEIT);
+ int ret = -1;
- *writer = xmlNewTextWriterMemory (*buf, 0);
- if (writer == NULL) {
- ret = -1;
- goto out;
- }
+ *writer = xmlNewTextWriterDoc(doc, 0);
+ if (*writer == NULL) {
+ ret = -1;
+ goto out;
+ }
- ret = xmlTextWriterStartDocument (*writer, "1.0", "UTF-8", "yes");
- XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = xmlTextWriterStartDocument(*writer, "1.0", "UTF-8", "yes");
+ XML_RET_CHECK_AND_GOTO(ret, out);
- /* <cliOutput> */
- ret = xmlTextWriterStartElement (*writer, (xmlChar *)"cliOutput");
- XML_RET_CHECK_AND_GOTO (ret, out);
+ /* <cliOutput> */
+ ret = xmlTextWriterStartElement(*writer, (xmlChar *)"cliOutput");
+ XML_RET_CHECK_AND_GOTO(ret, out);
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
}
int
-cli_end_xml_output (xmlTextWriterPtr writer, xmlBufferPtr buf)
+cli_end_xml_output(xmlTextWriterPtr writer, xmlDocPtr doc)
{
- int ret = -1;
+ int ret = -1;
- /* </cliOutput> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ /* </cliOutput> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- ret = xmlTextWriterEndDocument (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = xmlTextWriterEndDocument(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- cli_out ("%s", (const char *)buf->content);
+ /* Dump xml document to stdout and pretty format it */
+ xmlSaveFormatFileEnc("-", doc, "UTF-8", 1);
- xmlFreeTextWriter (writer);
- xmlBufferFree (buf);
+ xmlFreeTextWriter(writer);
+ xmlFreeDoc(doc);
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
}
int
-cli_xml_output_common (xmlTextWriterPtr writer, int op_ret, int op_errno,
- char *op_errstr)
+cli_xml_output_common(xmlTextWriterPtr writer, int op_ret, int op_errno,
+ char *op_errstr)
{
- int ret = -1;
+ int ret = -1;
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"opRet", "%d",
+ op_ret);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"opRet",
- "%d", op_ret);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"opErrno", "%d",
+ op_errno);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"opErrno",
- "%d", op_errno);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ if (op_errstr)
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"opErrstr",
+ "%s", op_errstr);
+ else
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"opErrstr",
+ "%s", "");
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"opErrstr",
- "%s", op_errstr);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ XML_RET_CHECK_AND_GOTO(ret, out);
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
}
+#endif
int
-cli_xml_output_str (char *op, char *str, int op_ret, int op_errno,
- char *op_errstr)
+cli_xml_output_str(char *op, char *str, int op_ret, int op_errno,
+ char *op_errstr)
{
- int ret = -1;
- xmlTextWriterPtr writer = NULL;
- xmlBufferPtr buf = NULL;
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
- ret = cli_begin_xml_output (&writer, &buf);
- if (ret)
- goto out;
+ ret = cli_begin_xml_output(&writer, &doc);
+ if (ret)
+ goto out;
- ret = cli_xml_output_common (writer, op_ret, op_errno, op_errstr);
- if (ret)
- goto out;
+ ret = cli_xml_output_common(writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"cliOp",
- "%s", op);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ if (op) {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"cliOp", "%s",
+ op);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"output",
- "%s", str);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ if (str) {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"output", "%s",
+ str);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
- ret = cli_end_xml_output (writer, buf);
+ ret = cli_end_xml_output(writer, doc);
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
}
-void
-cli_xml_output_data_pair (dict_t *this, char *key, data_t *value,
- void *data)
+#if (HAVE_LIB_XML)
+int
+cli_xml_output_data_pair(dict_t *this, char *key, data_t *value, void *data)
{
- int ret = -1;
- xmlTextWriterPtr *writer = NULL;
+ int ret = -1;
+ xmlTextWriterPtr *writer = NULL;
- writer = (xmlTextWriterPtr *)data;
+ writer = (xmlTextWriterPtr *)data;
- ret = xmlTextWriterWriteFormatElement (*writer, (xmlChar *)key,
- "%s", value->data);
+ ret = xmlTextWriterWriteFormatElement(*writer, (xmlChar *)key, "%s",
+ value->data);
- return;
+ XML_RET_CHECK_AND_GOTO(ret, out);
+out:
+ return ret;
}
+#endif
int
-cli_xml_output_dict ( char *op, dict_t *dict, int op_ret, int op_errno,
- char *op_errstr)
+cli_xml_output_dict(char *op, dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr)
{
- int ret = -1;
- xmlTextWriterPtr writer = NULL;
- xmlBufferPtr buf = NULL;
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
- ret = cli_begin_xml_output (&writer, &buf);
- if (ret)
- goto out;
- /* <"op"> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)op);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = cli_begin_xml_output(&writer, &doc);
+ if (ret)
+ goto out;
- dict_foreach (dict, cli_xml_output_data_pair, &writer);
+ ret = cli_xml_output_common(writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
- /* </"op"> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ /* <"op"> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)op);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- ret = cli_end_xml_output (writer, buf);
+ if (dict)
+ dict_foreach(dict, cli_xml_output_data_pair, &writer);
+
+ /* </"op"> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = cli_end_xml_output(writer, doc);
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
}
+#if (HAVE_LIB_XML)
int
-cli_xml_output_vol_status_common (xmlTextWriterPtr writer, dict_t *dict,
- int brick_index, int *online,
- gf_boolean_t *node_present)
-{
- int ret = -1;
- char *hostname = NULL;
- char *path = NULL;
- int port = 0;
- int status = 0;
- int pid = 0;
- char key[1024] = {0,};
-
- snprintf (key, sizeof (key), "brick%d.hostname", brick_index);
- ret = dict_get_str (dict, key, &hostname);
- if (ret) {
- *node_present = _gf_false;
- goto out;
+cli_xml_output_vol_status_common(xmlTextWriterPtr writer, dict_t *dict,
+ int brick_index, int *online,
+ gf_boolean_t *node_present)
+{
+ int ret = -1;
+ char *hostname = NULL;
+ char *path = NULL;
+ char *uuid = NULL;
+ int port = 0;
+ int rdma_port = 0;
+ int status = 0;
+ int pid = 0;
+ char key[1024] = {
+ 0,
+ };
+
+ snprintf(key, sizeof(key), "brick%d.hostname", brick_index);
+ ret = dict_get_str(dict, key, &hostname);
+ if (ret) {
+ *node_present = _gf_false;
+ goto out;
+ }
+ *node_present = _gf_true;
+
+ /* <node>
+ * will be closed in the calling function cli_xml_output_vol_status()*/
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"node");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"hostname", "%s",
+ hostname);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "brick%d.path", brick_index);
+ ret = dict_get_str(dict, key, &path);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"path", "%s",
+ path);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "brick%d.peerid", brick_index);
+ ret = dict_get_str(dict, key, &uuid);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"peerid", "%s",
+ uuid);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "brick%d.status", brick_index);
+ ret = dict_get_int32(dict, key, &status);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"status", "%d",
+ status);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ *online = status;
+
+ snprintf(key, sizeof(key), "brick%d.port", brick_index);
+ ret = dict_get_int32(dict, key, &port);
+ if (ret)
+ goto out;
+
+ snprintf(key, sizeof(key), "brick%d.rdma_port", brick_index);
+ ret = dict_get_int32(dict, key, &rdma_port);
+
+ /* If the process is either offline or doesn't provide a port (shd)
+ * port = "N/A"
+ * else print the port number of the process.
+ */
+
+ /*
+ * Tag 'port' can be removed once console management is started
+ * to support new tag ports.
+ */
+
+ if (*online == 1 && port != 0)
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"port", "%d",
+ port);
+ else
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"port", "%s",
+ "N/A");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"ports");
+ if (*online == 1 && (port != 0 || rdma_port != 0)) {
+ if (port) {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"tcp",
+ "%d", port);
+ } else {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"tcp",
+ "%s", "N/A");
}
- *node_present = _gf_true;
-
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"hostname",
- "%s", hostname);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.path", brick_index);
- ret = dict_get_str (dict, key, &path);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"path",
- "%s", path);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.port", brick_index);
- ret = dict_get_int32 (dict, key, &port);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"port",
- "%d", port);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ if (rdma_port) {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"rdma",
+ "%d", rdma_port);
+ } else {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"rdma",
+ "%s", "N/A");
+ }
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ } else {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"tcp", "%s",
+ "N/A");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"rdma", "%s",
+ "N/A");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "brick%d.pid", brick_index);
+ ret = dict_get_int32(dict, key, &pid);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"pid", "%d", pid);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.status", brick_index);
- ret = dict_get_int32 (dict, key, &status);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"status",
- "%d", status);
- XML_RET_CHECK_AND_GOTO (ret, out);
- *online = status;
+out:
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.pid", brick_index);
- ret = dict_get_int32 (dict, key, &pid);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"pid",
- "%d", pid);
- XML_RET_CHECK_AND_GOTO (ret, out);
+int
+cli_xml_output_vol_status_detail(xmlTextWriterPtr writer, dict_t *dict,
+ int brick_index)
+{
+ int ret = -1;
+ uint64_t size_total = 0;
+ uint64_t size_free = 0;
+ char *device = NULL;
+ uint64_t block_size = 0;
+ char *mnt_options = NULL;
+ char *fs_name = NULL;
+ char *inode_size = NULL;
+ uint64_t inodes_total = 0;
+ uint64_t inodes_free = 0;
+ char key[1024] = {
+ 0,
+ };
+
+ snprintf(key, sizeof(key), "brick%d.total", brick_index);
+ ret = dict_get_uint64(dict, key, &size_total);
+ if (!ret) {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"sizeTotal",
+ "%" PRIu64, size_total);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+ snprintf(key, sizeof(key), "brick%d.free", brick_index);
+ ret = dict_get_uint64(dict, key, &size_free);
+ if (!ret) {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"sizeFree",
+ "%" PRIu64, size_free);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+ snprintf(key, sizeof(key), "brick%d.device", brick_index);
+ ret = dict_get_str(dict, key, &device);
+ if (!ret) {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"device", "%s",
+ device);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+ snprintf(key, sizeof(key), "brick%d.block_size", brick_index);
+ ret = dict_get_uint64(dict, key, &block_size);
+ if (!ret) {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"blockSize",
+ "%" PRIu64, block_size);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+ snprintf(key, sizeof(key), "brick%d.mnt_options", brick_index);
+ ret = dict_get_str(dict, key, &mnt_options);
+ if (!ret) {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"mntOptions",
+ "%s", mnt_options);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+ snprintf(key, sizeof(key), "brick%d.fs_name", brick_index);
+ ret = dict_get_str(dict, key, &fs_name);
+ if (!ret) {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"fsName", "%s",
+ fs_name);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+ snprintf(key, sizeof(key), "brick%d.inode_size", brick_index);
+ ret = dict_get_str(dict, key, &inode_size);
+ if (!ret) {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"inodeSize",
+ "%s", fs_name);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+ snprintf(key, sizeof(key), "brick%d.total_inodes", brick_index);
+ ret = dict_get_uint64(dict, key, &inodes_total);
+ if (!ret) {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"inodesTotal",
+ "%" PRIu64, inodes_total);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+ snprintf(key, sizeof(key), "brick%d.free_inodes", brick_index);
+ ret = dict_get_uint64(dict, key, &inodes_free);
+ if (!ret) {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"inodesFree",
+ "%" PRIu64, inodes_free);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ } else {
+ ret = 0;
+ }
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
}
int
-cli_xml_output_vol_status_detail (xmlTextWriterPtr writer, dict_t *dict,
- int brick_index)
+cli_xml_output_vol_status_mempool(xmlTextWriterPtr writer, dict_t *dict,
+ char *prefix)
{
- int ret = -1;
- uint64_t size_total = 0;
- uint64_t size_free = 0;
- char *device = NULL;
- uint64_t block_size = 0;
- char *mnt_options = NULL;
- char *fs_name = NULL;
- char *inode_size = NULL;
- uint64_t inodes_total = 0;
- uint64_t inodes_free = 0;
- char key[1024] = {0,};
-
- snprintf (key, sizeof (key), "brick%d.total", brick_index);
- ret = dict_get_uint64 (dict, key, &size_total);
+ int ret = -1;
+ int mempool_count = 0;
+ char *name = NULL;
+ int hotcount = 0;
+ int coldcount = 0;
+ uint64_t paddedsizeof = 0;
+ uint64_t alloccount = 0;
+ int maxalloc = 0;
+ char key[1024] = {
+ 0,
+ };
+ int i = 0;
+
+ /* <mempool> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"mempool");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%s.mempool-count", prefix);
+ ret = dict_get_int32(dict, key, &mempool_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"count", "%d",
+ mempool_count);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ for (i = 0; i < mempool_count; i++) {
+ /* <pool> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"pool");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%s.pool%d.name", prefix, i);
+ ret = dict_get_str(dict, key, &name);
if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"sizeTotal",
- "%"PRIu64, size_total);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"name", "%s",
+ name);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.free", brick_index);
- ret = dict_get_uint64 (dict, key, &size_free);
+ snprintf(key, sizeof(key), "%s.pool%d.hotcount", prefix, i);
+ ret = dict_get_int32(dict, key, &hotcount);
if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"sizeFree",
- "%"PRIu64, size_free);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"hotCount",
+ "%d", hotcount);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.device", brick_index);
- ret = dict_get_str (dict, key, &device);
+ snprintf(key, sizeof(key), "%s.pool%d.coldcount", prefix, i);
+ ret = dict_get_int32(dict, key, &coldcount);
if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"device",
- "%s", device);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"coldCount",
+ "%d", coldcount);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.block_size", brick_index);
- ret = dict_get_uint64 (dict, key, &block_size);
+ snprintf(key, sizeof(key), "%s.pool%d.paddedsizeof", prefix, i);
+ ret = dict_get_uint64(dict, key, &paddedsizeof);
if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"blockSize",
- "%"PRIu64, block_size);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(
+ writer, (xmlChar *)"padddedSizeOf", "%" PRIu64, paddedsizeof);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.mnt_options", brick_index);
- ret = dict_get_str (dict, key, &mnt_options);
+ snprintf(key, sizeof(key), "%s.pool%d.alloccount", prefix, i);
+ ret = dict_get_uint64(dict, key, &alloccount);
if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"mntOptions",
- "%s", mnt_options);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"allocCount",
+ "%" PRIu64, alloccount);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.fs_name", brick_index);
- ret = dict_get_str (dict, key, &fs_name);
+ snprintf(key, sizeof(key), "%s.pool%d.max_alloc", prefix, i);
+ ret = dict_get_int32(dict, key, &maxalloc);
if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"fsName",
- "%s", fs_name);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- /* inode details are only available for ext 2/3/4 & xfs */
- if (!IS_EXT_FS(fs_name) || strcmp (fs_name, "xfs")) {
- ret = 0;
- goto out;
- }
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"maxAlloc",
+ "%d", maxalloc);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.inode_size", brick_index);
- ret = dict_get_str (dict, key, &inode_size);
+ snprintf(key, sizeof(key), "%s.pool%d.pool-misses", prefix, i);
+ ret = dict_get_uint64(dict, key, &alloccount);
if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"inodeSize",
- "%s", fs_name);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"poolMisses",
+ "%" PRIu64, alloccount);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.total_inodes", brick_index);
- ret = dict_get_uint64 (dict, key, &inodes_total);
+ snprintf(key, sizeof(key), "%s.pool%d.max-stdalloc", prefix, i);
+ ret = dict_get_int32(dict, key, &maxalloc);
if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"inodesTotal",
- "%"PRIu64, inodes_total);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"maxStdAlloc",
+ "%d", maxalloc);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.free_inodes", brick_index);
- ret = dict_get_uint64 (dict, key, &inodes_free);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"inodesFree",
- "%"PRIu64, inodes_free);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ /* </pool> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ /* </mempool> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
}
int
-cli_xml_output_vol_status_mempool (xmlTextWriterPtr writer, dict_t *dict,
- char *prefix)
-{
- int ret = -1;
- int mempool_count = 0;
- char *name = NULL;
- int hotcount = 0;
- int coldcount = 0;
- uint64_t paddedsizeof = 0;
- uint64_t alloccount = 0;
- int maxalloc = 0;
- char key[1024] = {0,};
- int i = 0;
-
- /* <mempool> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"mempool");
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- snprintf (key, sizeof (key), "%s.mempool-count", prefix);
- ret = dict_get_int32 (dict, key, &mempool_count);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"count",
- "%d", mempool_count);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- for (i = 0; i < mempool_count; i++) {
- /* <pool> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"pool");
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.pool%d.name", prefix, i);
- ret = dict_get_str (dict, key, &name);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer,
- (xmlChar *)"name",
- "%s", name);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.pool%d.hotcount", prefix, i);
- ret = dict_get_int32 (dict, key, &hotcount);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer,
- (xmlChar *)"hotCount",
- "%d", hotcount);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.pool%d.coldcount", prefix, i);
- ret = dict_get_int32 (dict, key, &coldcount);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer,
- (xmlChar *)"coldCount",
- "%d", coldcount);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.pool%d.paddedsizeof",
- prefix, i);
- ret = dict_get_uint64 (dict, key, &paddedsizeof);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement
- (writer, (xmlChar *)"padddedSizeOf", "%"PRIu64,
- paddedsizeof);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.pool%d.alloccount", prefix, i);
- ret = dict_get_uint64 (dict, key, &alloccount);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer,
- (xmlChar *)"allocCount",
- "%"PRIu64, alloccount);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.pool%d.max_alloc", prefix, i);
- ret = dict_get_int32 (dict, key, &maxalloc);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer,
- (xmlChar *)"maxAlloc",
- "%d", maxalloc);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.pool%d.pool-misses", prefix, i);
- ret = dict_get_uint64 (dict, key, &alloccount);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer,
- (xmlChar *)"poolMisses",
- "%"PRIu64, alloccount);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.pool%d.max-stdalloc", prefix, i);
- ret = dict_get_int32 (dict, key, &maxalloc);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer,
- (xmlChar *)"maxStdAlloc",
- "%d", maxalloc);
- XML_RET_CHECK_AND_GOTO (ret, out);
+cli_xml_output_vol_status_mem(xmlTextWriterPtr writer, dict_t *dict,
+ int brick_index)
+{
+ int ret = -1;
+ int arena = 0;
+ int ordblks = 0;
+ int smblks = 0;
+ int hblks = 0;
+ int hblkhd = 0;
+ int usmblks = 0;
+ int fsmblks = 0;
+ int uordblks = 0;
+ int fordblks = 0;
+ int keepcost = 0;
+ char key[1024] = {
+ 0,
+ };
+
+ /* <memStatus> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"memStatus");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* <mallinfo> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"mallinfo");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "brick%d.mallinfo.arena", brick_index);
+ ret = dict_get_int32(dict, key, &arena);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"arena", "%d",
+ arena);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "brick%d.mallinfo.ordblks", brick_index);
+ ret = dict_get_int32(dict, key, &ordblks);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"ordblks", "%d",
+ ordblks);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "brick%d.mallinfo.smblks", brick_index);
+ ret = dict_get_int32(dict, key, &smblks);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"smblks", "%d",
+ smblks);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "brick%d.mallinfo.hblks", brick_index);
+ ret = dict_get_int32(dict, key, &hblks);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"hblks", "%d",
+ hblks);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "brick%d.mallinfo.hblkhd", brick_index);
+ ret = dict_get_int32(dict, key, &hblkhd);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"hblkhd", "%d",
+ hblkhd);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "brick%d.mallinfo.usmblks", brick_index);
+ ret = dict_get_int32(dict, key, &usmblks);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"usmblks", "%d",
+ usmblks);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "brick%d.mallinfo.fsmblks", brick_index);
+ ret = dict_get_int32(dict, key, &fsmblks);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"fsmblks", "%d",
+ fsmblks);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "brick%d.mallinfo.uordblks", brick_index);
+ ret = dict_get_int32(dict, key, &uordblks);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"uordblks", "%d",
+ uordblks);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "brick%d.mallinfo.fordblks", brick_index);
+ ret = dict_get_int32(dict, key, &fordblks);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"fordblks", "%d",
+ fordblks);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "brick%d.mallinfo.keepcost", brick_index);
+ ret = dict_get_int32(dict, key, &keepcost);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"keepcost", "%d",
+ keepcost);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* </mallinfo> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "brick%d", brick_index);
+ ret = cli_xml_output_vol_status_mempool(writer, dict, key);
+ if (ret)
+ goto out;
+
+ /* </memStatus> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+out:
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
- /* </pool> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
- }
+int
+cli_xml_output_vol_status_clients(xmlTextWriterPtr writer, dict_t *dict,
+ int brick_index)
+{
+ int ret = -1;
+ int client_count = 0;
+ char *hostname = NULL;
+ uint64_t bytes_read = 0;
+ uint64_t bytes_write = 0;
+ uint32_t opversion = 0;
+ char key[1024] = {
+ 0,
+ };
+ int i = 0;
+
+ /* <clientsStatus> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"clientsStatus");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "brick%d.clientcount", brick_index);
+ ret = dict_get_int32(dict, key, &client_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"clientCount",
+ "%d", client_count);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ for (i = 0; i < client_count; i++) {
+ /* <client> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"client");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "brick%d.client%d.hostname", brick_index, i);
+ ret = dict_get_str(dict, key, &hostname);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"hostname",
+ "%s", hostname);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "brick%d.client%d.bytesread", brick_index,
+ i);
+ ret = dict_get_uint64(dict, key, &bytes_read);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"bytesRead",
+ "%" PRIu64, bytes_read);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "brick%d.client%d.byteswrite", brick_index,
+ i);
+ ret = dict_get_uint64(dict, key, &bytes_write);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"bytesWrite",
+ "%" PRIu64, bytes_write);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "brick%d.client%d.opversion", brick_index,
+ i);
+ ret = dict_get_uint32(dict, key, &opversion);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"opVersion",
+ "%" PRIu32, opversion);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* </client> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ /* </clientsStatus> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+out:
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
- /* </mempool> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+int
+cli_xml_output_vol_status_inode_entry(xmlTextWriterPtr writer, dict_t *dict,
+ char *prefix)
+{
+ int ret = -1;
+ char *gfid = NULL;
+ uint64_t nlookup = 0;
+ uint32_t ref = 0;
+ int ia_type = 0;
+ char key[1024] = {
+ 0,
+ };
+
+ /* <inode> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"inode");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%s.gfid", prefix);
+ ret = dict_get_str(dict, key, &gfid);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"gfid", "%s",
+ gfid);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%s.nlookup", prefix);
+ ret = dict_get_uint64(dict, key, &nlookup);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"nLookup",
+ "%" PRIu64, nlookup);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%s.ref", prefix);
+ ret = dict_get_uint32(dict, key, &ref);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"ref", "%" PRIu32,
+ ref);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%s.ia_type", prefix);
+ ret = dict_get_int32(dict, key, &ia_type);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"iaType", "%d",
+ ia_type);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* </inode> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
}
int
-cli_xml_output_vol_status_mem (xmlTextWriterPtr writer, dict_t *dict,
- int brick_index)
-{
- int ret = -1;
- int arena = 0;
- int ordblks = 0;
- int smblks = 0;
- int hblks = 0;
- int hblkhd = 0;
- int usmblks = 0;
- int fsmblks = 0;
- int uordblks = 0;
- int fordblks = 0;
- int keepcost = 0;
- char key[1024] = {0,};
-
- /* <memStatus> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"memStatus");
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- /* <mallinfo> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"mallinfo");
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- snprintf (key, sizeof (key), "brick%d.mallinfo.arena", brick_index);
- ret = dict_get_int32 (dict, key, &arena);
- if (ret)
+cli_xml_output_vol_status_itable(xmlTextWriterPtr writer, dict_t *dict,
+ char *prefix)
+{
+ int ret = -1;
+ uint32_t active_size = 0;
+ uint32_t lru_size = 0;
+ uint32_t purge_size = 0;
+ char key[1024] = {
+ 0,
+ };
+ int i = 0;
+
+ snprintf(key, sizeof(key), "%s.active_size", prefix);
+ ret = dict_get_uint32(dict, key, &active_size);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"activeSize",
+ "%" PRIu32, active_size);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ if (active_size != 0) {
+ /* <active> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"active");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ for (i = 0; i < active_size; i++) {
+ snprintf(key, sizeof(key), "%s.active%d", prefix, i);
+ ret = cli_xml_output_vol_status_inode_entry(writer, dict, key);
+ if (ret)
goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"arena",
- "%d", arena);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.mallinfo.ordblks", brick_index);
- ret = dict_get_int32 (dict, key, &ordblks);
- if (ret)
+ }
+ /* </active> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ snprintf(key, sizeof(key), "%s.lru_size", prefix);
+ ret = dict_get_uint32(dict, key, &lru_size);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"lruSize",
+ "%" PRIu32, lru_size);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ if (lru_size != 0) {
+ /* <lru> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"lru");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ for (i = 0; i < lru_size; i++) {
+ snprintf(key, sizeof(key), "%s.lru%d", prefix, i);
+ ret = cli_xml_output_vol_status_inode_entry(writer, dict, key);
+ if (ret)
goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"ordblks",
- "%d", ordblks);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.mallinfo.smblks", brick_index);
- ret = dict_get_int32 (dict, key, &smblks);
- if (ret)
+ }
+ /* </lru> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ snprintf(key, sizeof(key), "%s.purge_size", prefix);
+ ret = dict_get_uint32(dict, key, &purge_size);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"purgeSize",
+ "%" PRIu32, purge_size);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ if (purge_size != 0) {
+ /* <purge> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"purge");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ for (i = 0; i < purge_size; i++) {
+ snprintf(key, sizeof(key), "%s.purge%d", prefix, i);
+ ret = cli_xml_output_vol_status_inode_entry(writer, dict, key);
+ if (ret)
goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"smblks",
- "%d", smblks);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+ /* </purge> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.mallinfo.hblks", brick_index);
- ret = dict_get_int32 (dict, key, &hblks);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"hblks",
- "%d", hblks);
- XML_RET_CHECK_AND_GOTO (ret, out);
+out:
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.mallinfo.hblkhd", brick_index);
- ret = dict_get_int32 (dict, key, &hblkhd);
+int
+cli_xml_output_vol_status_inode(xmlTextWriterPtr writer, dict_t *dict,
+ int brick_index)
+{
+ int ret = -1;
+ int conn_count = 0;
+ char key[1024] = {
+ 0,
+ };
+ int i = 0;
+
+ /* <inodeStatus> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"inodeStatus");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "brick%d.conncount", brick_index);
+ ret = dict_get_int32(dict, key, &conn_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"connections",
+ "%d", conn_count);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ for (i = 0; i < conn_count; i++) {
+ /* <connection> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"connection");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "brick%d.conn%d.itable", brick_index, i);
+ ret = cli_xml_output_vol_status_itable(writer, dict, key);
if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"hblkhd",
- "%d", hblkhd);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ goto out;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.mallinfo.usmblks", brick_index);
- ret = dict_get_int32 (dict, key, &usmblks);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"usmblks",
- "%d", usmblks);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ /* </connection> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.mallinfo.fsmblks", brick_index);
- ret = dict_get_int32 (dict, key, &fsmblks);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"fsmblks",
- "%d", fsmblks);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ /* </inodeStatus> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.mallinfo.uordblks", brick_index);
- ret = dict_get_int32 (dict, key, &uordblks);
+out:
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+cli_xml_output_vol_status_fdtable(xmlTextWriterPtr writer, dict_t *dict,
+ char *prefix)
+{
+ int ret = -1;
+ int refcount = 0;
+ uint32_t maxfds = 0;
+ int firstfree = 0;
+ int openfds = 0;
+ int fd_pid = 0;
+ int fd_refcount = 0;
+ int fd_flags = 0;
+ char key[1024] = {
+ 0,
+ };
+ int i = 0;
+
+ /* <fdTable> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"fdTable");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%s.refcount", prefix);
+ ret = dict_get_int32(dict, key, &refcount);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"refCount", "%d",
+ refcount);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%s.maxfds", prefix);
+ ret = dict_get_uint32(dict, key, &maxfds);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"maxFds",
+ "%" PRIu32, maxfds);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%s.firstfree", prefix);
+ ret = dict_get_int32(dict, key, &firstfree);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"firstFree", "%d",
+ firstfree);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%s.openfds", prefix);
+ ret = dict_get_int32(dict, key, &openfds);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"openFds", "%d",
+ openfds);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ for (i = 0; i < maxfds; i++) {
+ snprintf(key, sizeof(key), "%s.fdentry%d.pid", prefix, i);
+ ret = dict_get_int32(dict, key, &fd_pid);
if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"uordblks",
- "%d", uordblks);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ continue;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.mallinfo.fordblks", brick_index);
- ret = dict_get_int32 (dict, key, &fordblks);
+ snprintf(key, sizeof(key), "%s.fdentry%d.refcount", prefix, i);
+ ret = dict_get_int32(dict, key, &fd_refcount);
if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"fordblks",
- "%d", fordblks);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ continue;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.mallinfo.keepcost", brick_index);
- ret = dict_get_int32 (dict, key, &keepcost);
+ snprintf(key, sizeof(key), "%s.fdentry%d.flags", prefix, i);
+ ret = dict_get_int32(dict, key, &fd_flags);
if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"keepcost",
- "%d", keepcost);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ continue;
+
+ /* <fd> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"fd");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"entry", "%d",
+ i + 1);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"pid", "%d",
+ fd_pid);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- /* </mallinfo> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"refCount",
+ "%d", fd_refcount);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d", brick_index);
- ret = cli_xml_output_vol_status_mempool (writer, dict, key);
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"flags", "%d",
+ fd_flags);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* </fd> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ /* </fdTable> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+out:
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+cli_xml_output_vol_status_fd(xmlTextWriterPtr writer, dict_t *dict,
+ int brick_index)
+{
+ int ret = -1;
+ int conn_count = 0;
+ char key[1024] = {
+ 0,
+ };
+ int i = 0;
+
+ /* <fdStatus> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"fdStatus");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "brick%d.conncount", brick_index);
+ ret = dict_get_int32(dict, key, &conn_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"connections",
+ "%d", conn_count);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ for (i = 0; i < conn_count; i++) {
+ /* <connection> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"connection");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "brick%d.conn%d.fdtable", brick_index, i);
+ ret = cli_xml_output_vol_status_fdtable(writer, dict, key);
if (ret)
- goto out;
+ goto out;
- /* </memStatus> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ /* </connection> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ /* </fdStatus> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
}
int
-cli_xml_output_vol_status_clients (xmlTextWriterPtr writer, dict_t *dict,
- int brick_index)
+cli_xml_output_vol_status_callframe(xmlTextWriterPtr writer, dict_t *dict,
+ char *prefix)
{
- int ret = -1;
- int client_count = 0;
- char *hostname = NULL;
- uint64_t bytes_read = 0;
- uint64_t bytes_write = 0;
- char key[1024] = {0,};
- int i = 0;
+ int ret = -1;
+ int ref_count = 0;
+ char *translator = NULL;
+ int complete = 0;
+ char *parent = NULL;
+ char *wind_from = NULL;
+ char *wind_to = NULL;
+ char *unwind_from = NULL;
+ char *unwind_to = NULL;
+ char key[1024] = {
+ 0,
+ };
+
+ /* <callFrame> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"callFrame");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%s.refcount", prefix);
+ ret = dict_get_int32(dict, key, &ref_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"refCount", "%d",
+ ref_count);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%s.translator", prefix);
+ ret = dict_get_str(dict, key, &translator);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"translator", "%s",
+ translator);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%s.complete", prefix);
+ ret = dict_get_int32(dict, key, &complete);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"complete", "%d",
+ complete);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%s.parent", prefix);
+ ret = dict_get_str(dict, key, &parent);
+ if (!ret) {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"parent", "%s",
+ parent);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ snprintf(key, sizeof(key), "%s.windfrom", prefix);
+ ret = dict_get_str(dict, key, &wind_from);
+ if (!ret) {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"windFrom",
+ "%s", wind_from);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ snprintf(key, sizeof(key), "%s.windto", prefix);
+ ret = dict_get_str(dict, key, &wind_to);
+ if (!ret) {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"windTo", "%s",
+ wind_to);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ snprintf(key, sizeof(key), "%s.unwindfrom", prefix);
+ ret = dict_get_str(dict, key, &unwind_from);
+ if (!ret) {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"unwindFrom",
+ "%s", unwind_from);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ snprintf(key, sizeof(key), "%s.unwindto", prefix);
+ ret = dict_get_str(dict, key, &unwind_to);
+ if (!ret) {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"unwindTo",
+ "%s", unwind_to);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ /* </callFrame> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+out:
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
- /* <clientsStatus> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"clientsStatus");
- XML_RET_CHECK_AND_GOTO (ret, out);
+int
+cli_xml_output_vol_status_callstack(xmlTextWriterPtr writer, dict_t *dict,
+ char *prefix)
+{
+ int ret = -1;
+ int uid = 0;
+ int gid = 0;
+ int pid = 0;
+ uint64_t unique = 0;
+ int frame_count = 0;
+ char key[1024] = {
+ 0,
+ };
+ int i = 0;
+
+ /* <callStack> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"callStack");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%s.uid", prefix);
+ ret = dict_get_int32(dict, key, &uid);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"uid", "%d", uid);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%s.gid", prefix);
+ ret = dict_get_int32(dict, key, &gid);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"gid", "%d", gid);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%s.pid", prefix);
+ ret = dict_get_int32(dict, key, &pid);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"pid", "%d", pid);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%s.unique", prefix);
+ ret = dict_get_uint64(dict, key, &unique);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"unique",
+ "%" PRIu64, unique);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%s.count", prefix);
+ ret = dict_get_int32(dict, key, &frame_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"frameCount", "%d",
+ frame_count);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ for (i = 0; i < frame_count; i++) {
+ snprintf(key, sizeof(key), "%s.frame%d", prefix, i);
+ ret = cli_xml_output_vol_status_callframe(writer, dict, key);
+ if (ret)
+ goto out;
+ }
- snprintf (key, sizeof (key), "brick%d.clientcount", brick_index);
- ret = dict_get_int32 (dict, key, &client_count);
+ /* </callStack> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+out:
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+cli_xml_output_vol_status_callpool(xmlTextWriterPtr writer, dict_t *dict,
+ int brick_index)
+{
+ int ret = -1;
+ int call_count = 0;
+ char key[1024] = {
+ 0,
+ };
+ int i = 0;
+
+ /* <callpoolStatus> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"callpoolStatus");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "brick%d.callpool.count", brick_index);
+ ret = dict_get_int32(dict, key, &call_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"count", "%d",
+ call_count);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ for (i = 0; i < call_count; i++) {
+ snprintf(key, sizeof(key), "brick%d.callpool.stack%d", brick_index, i);
+ ret = cli_xml_output_vol_status_callstack(writer, dict, key);
if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer,
- (xmlChar *)"clientCount",
- "%d", client_count);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- for (i = 0; i < client_count; i++) {
- /* <client> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"client");
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.client%d.hostname",
- brick_index, i);
- ret = dict_get_str (dict, key, &hostname);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer,
- (xmlChar *)"hostname",
- "%s", hostname);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.client%d.bytesread",
- brick_index, i);
- ret = dict_get_uint64 (dict, key, &bytes_read);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer,
- (xmlChar *)"bytesRead",
- "%"PRIu64, bytes_read);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.client%d.byteswrite",
- brick_index, i);
- ret = dict_get_uint64 (dict, key, &bytes_write);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer,
- (xmlChar *)"bytesWrite",
- "%"PRIu64, bytes_write);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- /* </client> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
- }
+ goto out;
+ }
+
+ /* </callpoolStatus> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- /* </clientsStatus> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
}
+#endif
int
-cli_xml_output_vol_status_inode_entry (xmlTextWriterPtr writer, dict_t *dict,
- char *prefix)
+cli_xml_output_vol_status_begin(cli_local_t *local, int op_ret, int op_errno,
+ char *op_errstr)
{
- int ret = -1;
- char *gfid = NULL;
- uint64_t nlookup = 0;
- uint32_t ref = 0;
- int ia_type = 0;
- char key[1024] = {0,};
+#if (HAVE_LIB_XML)
+ int ret = -1;
- /* <inode> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"inode");
- XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = cli_begin_xml_output(&(local->writer), &(local->doc));
+ XML_RET_CHECK_AND_GOTO(ret, out);
- snprintf (key, sizeof (key), "%s.gfid", prefix);
- ret = dict_get_str (dict, key, &gfid);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"gfid",
- "%s", gfid);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = cli_xml_output_common(local->writer, op_ret, op_errno, op_errstr);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key,0, sizeof (key));
- snprintf (key, sizeof (key), "%s.nlookup", prefix);
- ret = dict_get_uint64 (dict, key, &nlookup);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"nLookup",
- "%"PRIu64, nlookup);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ /* <volStatus> */
+ ret = xmlTextWriterStartElement(local->writer, (xmlChar *)"volStatus");
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key,0, sizeof (key));
- snprintf (key, sizeof (key), "%s.ref", prefix);
- ret = dict_get_uint32 (dict, key, &ref);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"ref",
- "%"PRIu32, ref);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ /* <volumes> */
+ ret = xmlTextWriterStartElement(local->writer, (xmlChar *)"volumes");
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key,0, sizeof (key));
- snprintf (key, sizeof (key), "%s.ia_type", prefix);
- ret = dict_get_int32 (dict, key, &ia_type);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"iaType",
- "%d", ia_type);
- XML_RET_CHECK_AND_GOTO (ret, out);
+out:
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+int
+cli_xml_output_vol_status_end(cli_local_t *local)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
- /* </inode> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ /* </volumes> */
+ ret = xmlTextWriterEndElement(local->writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ /* </volStatus> */
+ ret = xmlTextWriterEndElement(local->writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = cli_end_xml_output(local->writer, local->doc);
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
}
+#if (HAVE_LIB_XML)
int
-cli_xml_output_vol_status_itable (xmlTextWriterPtr writer, dict_t *dict,
- char *prefix)
+cli_xml_output_remove_brick_task_params(xmlTextWriterPtr writer, dict_t *dict,
+ char *prefix)
{
- int ret = -1;
- uint32_t active_size = 0;
- uint32_t lru_size = 0;
- uint32_t purge_size = 0;
- char key[1024] = {0,};
- int i = 0;
+ int ret = -1;
+ char key[1024] = {
+ 0,
+ };
+ int count = 0;
+ int i = 0;
+ char *brick = NULL;
+
+ /* <params> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"params");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%s.count", prefix);
+ ret = dict_get_int32(dict, key, &count);
+ if (ret)
+ goto out;
+
+ for (i = 1; i <= count; i++) {
+ snprintf(key, sizeof(key), "%s.brick%d", prefix, i);
+ ret = dict_get_str(dict, key, &brick);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"brick", "%s",
+ brick);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ brick = NULL;
+ }
+
+ /* </param> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+out:
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
- snprintf (key, sizeof (key), "%s.active_size", prefix);
- ret = dict_get_uint32 (dict, key, &active_size);
+int
+cli_xml_output_vol_status_tasks(cli_local_t *local, dict_t *dict)
+{
+ int ret = -1;
+ char *task_type = NULL;
+ char *task_id_str = NULL;
+ int status = 0;
+ int tasks = 0;
+ char key[1024] = {
+ 0,
+ };
+ int i = 0;
+
+ /* <tasks> */
+ ret = xmlTextWriterStartElement(local->writer, (xmlChar *)"tasks");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_int32(dict, "tasks", &tasks);
+ if (ret)
+ goto out;
+
+ for (i = 0; i < tasks; i++) {
+ /* <task> */
+ ret = xmlTextWriterStartElement(local->writer, (xmlChar *)"task");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "task%d.type", i);
+ ret = dict_get_str(dict, key, &task_type);
if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"activeSize",
- "%"PRIu32, active_size);
- XML_RET_CHECK_AND_GOTO (ret, out);
- if (active_size != 0) {
- /* <active> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"active");
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- for (i = 0; i < active_size; i++) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.active%d", prefix, i);
- ret = cli_xml_output_vol_status_inode_entry
- (writer, dict, key);
- if (ret)
- goto out;
- }
- /* </active> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
- }
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(local->writer, (xmlChar *)"type",
+ "%s", task_type);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.lru_size", prefix);
- ret = dict_get_uint32 (dict, key, &lru_size);
+ snprintf(key, sizeof(key), "task%d.id", i);
+ ret = dict_get_str(dict, key, &task_id_str);
if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"lruSize",
- "%"PRIu32, lru_size);
- XML_RET_CHECK_AND_GOTO (ret, out);
- if (lru_size != 0) {
- /* <lru> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"lru");
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- for (i = 0; i < lru_size; i++) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.lru%d", prefix, i);
- ret = cli_xml_output_vol_status_inode_entry
- (writer, dict, key);
- if (ret)
- goto out;
- }
- /* </lru> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
- }
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(local->writer, (xmlChar *)"id",
+ "%s", task_id_str);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.purge_size", prefix);
- ret = dict_get_uint32 (dict, key, &purge_size);
+ snprintf(key, sizeof(key), "task%d.status", i);
+ ret = dict_get_int32(dict, key, &status);
if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(
+ local->writer, (xmlChar *)"status", "%d", status);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(local->writer,
+ (xmlChar *)"statusStr", "%s",
+ cli_vol_task_status_str[status]);
+
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "task%d", i);
+ if (!strcmp(task_type, "Remove brick")) {
+ ret = cli_xml_output_remove_brick_task_params(local->writer, dict,
+ key);
+ if (ret)
goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"purgeSize",
- "%"PRIu32, purge_size);
- XML_RET_CHECK_AND_GOTO (ret, out);
- if (purge_size != 0) {
- /* <purge> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"purge");
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- for (i = 0; i < purge_size; i++) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.purge%d", prefix, i);
- ret = cli_xml_output_vol_status_inode_entry
- (writer, dict, key);
- if (ret)
- goto out;
- }
- /* </purge> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
}
+ /* </task> */
+ ret = xmlTextWriterEndElement(local->writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ /* </tasks> */
+ ret = xmlTextWriterEndElement(local->writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
}
+#endif
+
int
-cli_xml_output_vol_status_inode (xmlTextWriterPtr writer, dict_t *dict,
- int brick_index)
+cli_xml_output_vol_status_tasks_detail(cli_local_t *local, dict_t *dict)
{
- int ret = -1;
- int conn_count = 0;
- char key[1024] = {0,};
- int i = 0;
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ char *volname = NULL;
- /* <inodeStatus> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"inodeStatus");
- XML_RET_CHECK_AND_GOTO (ret, out);
+ /*<volume>*/
+ ret = xmlTextWriterStartElement(local->writer, (xmlChar *)"volume");
+ XML_RET_CHECK_AND_GOTO(ret, out);
- snprintf (key, sizeof (key), "brick%d.conncount", brick_index);
- ret = dict_get_int32 (dict, key, &conn_count);
- if (ret)
+ ret = dict_get_str(dict, "volname", &volname);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(local->writer, (xmlChar *)"volName",
+ "%s", volname);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = cli_xml_output_vol_status_tasks(local, dict);
+ if (ret)
+ goto out;
+
+ /* </volume> */
+ ret = xmlTextWriterEndElement(local->writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+out:
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+int
+cli_xml_output_vol_status(cli_local_t *local, dict_t *dict)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ char *volname = NULL;
+ int brick_count = 0;
+ int brick_index_max = -1;
+ int other_count = 0;
+ int index_max = 0;
+ uint32_t cmd = GF_CLI_STATUS_NONE;
+ int online = 0;
+ gf_boolean_t node_present = _gf_true;
+ int i;
+ int type = -1;
+
+ /* <volume> */
+ ret = xmlTextWriterStartElement(local->writer, (xmlChar *)"volume");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_str(dict, "volname", &volname);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(local->writer, (xmlChar *)"volName",
+ "%s", volname);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_int32(dict, "count", &brick_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(local->writer, (xmlChar *)"nodeCount",
+ "%d", brick_count);
+
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_uint32(dict, "cmd", &cmd);
+ if (ret)
+ goto out;
+
+ ret = dict_get_int32(dict, "brick-index-max", &brick_index_max);
+ if (ret)
+ goto out;
+ ret = dict_get_int32(dict, "other-count", &other_count);
+ if (ret)
+ goto out;
+
+ index_max = brick_index_max + other_count;
+
+ ret = dict_get_int32(dict, "type", &type);
+ if (ret)
+ goto out;
+
+ for (i = 0; i <= index_max; i++) {
+ ret = cli_xml_output_vol_status_common(local->writer, dict, i, &online,
+ &node_present);
+ if (ret) {
+ if (node_present)
goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"connections",
- "%d", conn_count);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- for (i = 0; i < conn_count; i++) {
- /* <connection> */
- ret = xmlTextWriterStartElement (writer,
- (xmlChar *)"connection");
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.conn%d.itable",
- brick_index, i);
- ret = cli_xml_output_vol_status_itable (writer, dict, key);
+ else
+ continue;
+ }
+
+ switch (cmd & GF_CLI_STATUS_MASK) {
+ case GF_CLI_STATUS_DETAIL:
+ ret = cli_xml_output_vol_status_detail(local->writer, dict, i);
if (ret)
+ goto out;
+ break;
+
+ case GF_CLI_STATUS_MEM:
+ if (online) {
+ ret = cli_xml_output_vol_status_mem(local->writer, dict, i);
+ if (ret)
+ goto out;
+ }
+ break;
+
+ case GF_CLI_STATUS_CLIENTS:
+ if (online) {
+ ret = cli_xml_output_vol_status_clients(local->writer, dict,
+ i);
+ if (ret)
+ goto out;
+ }
+ break;
+
+ case GF_CLI_STATUS_INODE:
+ if (online) {
+ ret = cli_xml_output_vol_status_inode(local->writer, dict,
+ i);
+ if (ret)
goto out;
+ }
+ break;
- /* </connection> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ case GF_CLI_STATUS_FD:
+ if (online) {
+ ret = cli_xml_output_vol_status_fd(local->writer, dict, i);
+ if (ret)
+ goto out;
+ }
+ break;
+
+ case GF_CLI_STATUS_CALLPOOL:
+ if (online) {
+ ret = cli_xml_output_vol_status_callpool(local->writer,
+ dict, i);
+ if (ret)
+ goto out;
+ }
+ break;
+ default:
+ break;
}
- /* </inodeStatus> */
- ret= xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ /* </node> was opened in cli_xml_output_vol_status_common()*/
+ ret = xmlTextWriterEndElement(local->writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ /* Tasks are only present when a normal volume status call is done on a
+ * single volume or on all volumes
+ */
+ if (((cmd & GF_CLI_STATUS_MASK) == GF_CLI_STATUS_NONE) &&
+ (cmd & (GF_CLI_STATUS_VOL | GF_CLI_STATUS_ALL))) {
+ ret = cli_xml_output_vol_status_tasks(local, dict);
+ if (ret)
+ goto out;
+ }
+
+ /* </volume> */
+ ret = xmlTextWriterEndElement(local->writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
}
+#if (HAVE_LIB_XML)
int
-cli_xml_output_vol_status_fdtable (xmlTextWriterPtr writer, dict_t *dict,
- char *prefix)
-{
- int ret = -1;
- int refcount = 0;
- uint32_t maxfds = 0;
- int firstfree = 0;
- int openfds = 0;
- int fd_pid = 0;
- int fd_refcount = 0;
- int fd_flags = 0;
- char key[1024] = {0,};
- int i = 0;
-
- /* <fdTable> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"fdTable");
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- snprintf (key, sizeof (key), "%s.refcount", prefix);
- ret = dict_get_int32 (dict, key, &refcount);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"refCount",
- "%d", refcount);
- XML_RET_CHECK_AND_GOTO (ret, out);
+cli_xml_output_vol_top_rw_perf(xmlTextWriterPtr writer, dict_t *dict,
+ int brick_index, int member_index)
+{
+ int ret = -1;
+ char *filename = NULL;
+ uint64_t throughput = 0;
+ struct timeval tv = {
+ 0,
+ };
+ char timestr[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+ char key[1024] = {
+ 0,
+ };
+
+ /* <file> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"file");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%d-filename-%d", brick_index, member_index);
+ ret = dict_get_str(dict, key, &filename);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"filename", "%s",
+ filename);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%d-value-%d", brick_index, member_index);
+ ret = dict_get_uint64(dict, key, &throughput);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"count",
+ "%" PRIu64, throughput);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%d-time-sec-%d", brick_index, member_index);
+ ret = dict_get_int32(dict, key, (int32_t *)&tv.tv_sec);
+ if (ret)
+ goto out;
+
+ snprintf(key, sizeof(key), "%d-time-usec-%d", brick_index, member_index);
+ ret = dict_get_int32(dict, key, (int32_t *)&tv.tv_usec);
+ if (ret)
+ goto out;
+
+ gf_time_fmt_tv(timestr, sizeof timestr, &tv, gf_timefmt_FT);
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"time", "%s",
+ timestr);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* </file> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.maxfds", prefix);
- ret = dict_get_uint32 (dict, key, &maxfds);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"maxFds",
- "%"PRIu32, maxfds);
- XML_RET_CHECK_AND_GOTO (ret, out);
+out:
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+cli_xml_output_vol_top_other(xmlTextWriterPtr writer, dict_t *dict,
+ int brick_index, int member_index)
+{
+ int ret = -1;
+ char *filename = NULL;
+ uint64_t count = 0;
+ char key[1024] = {
+ 0,
+ };
+
+ /* <file> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"file");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%d-filename-%d", brick_index, member_index);
+ ret = dict_get_str(dict, key, &filename);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"filename", "%s",
+ filename);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%d-value-%d", brick_index, member_index);
+ ret = dict_get_uint64(dict, key, &count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"count",
+ "%" PRIu64, count);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* </file> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.firstfree", prefix);
- ret = dict_get_int32 (dict, key, &firstfree);
+out:
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+#endif
+
+int
+cli_xml_output_vol_top(dict_t *dict, int op_ret, int op_errno, char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
+ int brick_count = 0;
+ int top_op = GF_CLI_TOP_NONE;
+ char *brick_name = NULL;
+ int members = 0;
+ uint64_t current_open = 0;
+ uint64_t max_open = 0;
+ char *max_open_time = NULL;
+ double throughput = 0.0;
+ double time_taken = 0.0;
+ char key[1024] = {
+ 0,
+ };
+ int i = 0;
+ int j = 0;
+
+ ret = cli_begin_xml_output(&writer, &doc);
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common(writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ /* <volTop> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"volTop");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_int32(dict, "count", &brick_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"brickCount", "%d",
+ brick_count);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_int32(dict, "1-top-op", &top_op);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"topOp", "%d",
+ top_op);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ while (i < brick_count) {
+ i++;
+
+ /* <brick> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"brick");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%d-brick", i);
+ ret = dict_get_str(dict, key, &brick_name);
if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"firstFree",
- "%d", firstfree);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"name", "%s",
+ brick_name);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.openfds", prefix);
- ret = dict_get_int32 (dict, key, &openfds);
+ snprintf(key, sizeof(key), "%d-members", i);
+ ret = dict_get_int32(dict, key, &members);
if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"openFds",
- "%d", openfds);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- for (i = 0; i < maxfds; i++) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.fdentry%d.pid", prefix, i);
- ret = dict_get_int32 (dict, key, &fd_pid);
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"members",
+ "%d", members);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ switch (top_op) {
+ case GF_CLI_TOP_OPEN:
+ snprintf(key, sizeof(key), "%d-current-open", i);
+ ret = dict_get_uint64(dict, key, &current_open);
if (ret)
- continue;
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(
+ writer, (xmlChar *)"currentOpen", "%" PRIu64, current_open);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.fdentry%d.refcount",
- prefix, i);
- ret = dict_get_int32 (dict, key, &fd_refcount);
+ snprintf(key, sizeof(key), "%d-max-open", i);
+ ret = dict_get_uint64(dict, key, &max_open);
if (ret)
- continue;
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(
+ writer, (xmlChar *)"maxOpen", "%" PRIu64, max_open);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.fdentry%d.flags", prefix, i);
- ret = dict_get_int32 (dict, key, &fd_flags);
+ snprintf(key, sizeof(key), "%d-max-openfd-time", i);
+ ret = dict_get_str(dict, key, &max_open_time);
if (ret)
- continue;
-
- /* <fd> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"fd");
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- ret = xmlTextWriterWriteFormatElement (writer,
- (xmlChar *)"entry",
- "%d", i+1);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- ret = xmlTextWriterWriteFormatElement (writer,
- (xmlChar *)"pid",
- "%d", fd_pid);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- ret = xmlTextWriterWriteFormatElement (writer,
- (xmlChar *)"refCount",
- "%d", fd_refcount);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- ret = xmlTextWriterWriteFormatElement (writer,
- (xmlChar *)"flags",
- "%d", fd_flags);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- /* </fd> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(
+ writer, (xmlChar *)"maxOpenTime", "%s", max_open_time);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ case GF_CLI_TOP_READ:
+ case GF_CLI_TOP_WRITE:
+ case GF_CLI_TOP_OPENDIR:
+ case GF_CLI_TOP_READDIR:
+
+ break;
+
+ case GF_CLI_TOP_READ_PERF:
+ case GF_CLI_TOP_WRITE_PERF:
+ snprintf(key, sizeof(key), "%d-throughput", i);
+ ret = dict_get_double(dict, key, &throughput);
+ if (!ret) {
+ snprintf(key, sizeof(key), "%d-time", i);
+ ret = dict_get_double(dict, key, &time_taken);
+ }
+
+ if (!ret) {
+ ret = xmlTextWriterWriteFormatElement(
+ writer, (xmlChar *)"throughput", "%f", throughput);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(
+ writer, (xmlChar *)"timeTaken", "%f", time_taken);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ break;
+
+ default:
+ ret = -1;
+ goto out;
}
- /* </fdTable> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ for (j = 1; j <= members; j++) {
+ if (top_op == GF_CLI_TOP_READ_PERF ||
+ top_op == GF_CLI_TOP_WRITE_PERF) {
+ ret = cli_xml_output_vol_top_rw_perf(writer, dict, i, j);
+ } else {
+ ret = cli_xml_output_vol_top_other(writer, dict, i, j);
+ }
+ if (ret)
+ goto out;
+ }
+
+ /* </brick> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ /* </volTop> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ ret = cli_end_xml_output(writer, doc);
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
}
+#if (HAVE_LIB_XML)
int
-cli_xml_output_vol_status_fd (xmlTextWriterPtr writer, dict_t *dict,
- int brick_index)
+cli_xml_output_vol_profile_stats(xmlTextWriterPtr writer, dict_t *dict,
+ int brick_index, int interval)
{
- int ret = -1;
- int conn_count = 0;
- char key[1024] = {0,};
- int i = 0;
+ int ret = -1;
+ uint64_t read_count = 0;
+ uint64_t write_count = 0;
+ uint64_t hits = 0;
+ double avg_latency = 0.0;
+ double max_latency = 0.0;
+ double min_latency = 0.0;
+ uint64_t duration = 0;
+ uint64_t total_read = 0;
+ uint64_t total_write = 0;
+ char key[1024] = {0};
+ int i = 0;
+
+ /* <cumulativeStats> || <intervalStats> */
+ if (interval == -1)
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"cumulativeStats");
+ else
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"intervalStats");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* <blockStats> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"blockStats");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ for (i = 0; i < 32; i++) {
+ /* <block> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"block");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"size",
+ "%" PRIu32, (1U << i));
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%d-%d-read-%" PRIu32, brick_index, interval,
+ (1U << i));
+ ret = dict_get_uint64(dict, key, &read_count);
+ if (ret)
+ read_count = 0;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"reads",
+ "%" PRIu64, read_count);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%d-%d-write-%" PRIu32, brick_index,
+ interval, (1U << i));
+ ret = dict_get_uint64(dict, key, &write_count);
+ if (ret)
+ write_count = 0;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"writes",
+ "%" PRIu64, write_count);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* </block> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ /* </blockStats> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* <fopStats> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"fopStats");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ for (i = 0; i < GF_FOP_MAXVALUE; i++) {
+ snprintf(key, sizeof(key), "%d-%d-%d-hits", brick_index, interval, i);
+ ret = dict_get_uint64(dict, key, &hits);
+ if (ret)
+ goto cont;
- /* <fdStatus> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"fdStatus");
- XML_RET_CHECK_AND_GOTO (ret, out);
+ snprintf(key, sizeof(key), "%d-%d-%d-avglatency", brick_index, interval,
+ i);
+ ret = dict_get_double(dict, key, &avg_latency);
+ if (ret)
+ goto cont;
+
+ snprintf(key, sizeof(key), "%d-%d-%d-minlatency", brick_index, interval,
+ i);
+ ret = dict_get_double(dict, key, &min_latency);
+ if (ret)
+ goto cont;
+
+ snprintf(key, sizeof(key), "%d-%d-%d-maxlatency", brick_index, interval,
+ i);
+ ret = dict_get_double(dict, key, &max_latency);
+ if (ret)
+ goto cont;
+
+ /* <fop> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"fop");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"name", "%s",
+ gf_fop_list[i]);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"hits",
+ "%" PRIu64, hits);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"avgLatency",
+ "%f", avg_latency);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"minLatency",
+ "%f", min_latency);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"maxLatency",
+ "%f", max_latency);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* </fop> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ cont:
+ hits = 0;
+ avg_latency = 0.0;
+ min_latency = 0.0;
+ max_latency = 0.0;
+ }
+
+ for (i = 0; i < GF_UPCALL_FLAGS_MAXVALUE; i++) {
+ hits = 0;
+ avg_latency = 0.0;
+ min_latency = 0.0;
+ max_latency = 0.0;
+
+ snprintf(key, sizeof(key), "%d-%d-%d-upcall-hits", brick_index,
+ interval, i);
+ ret = dict_get_uint64(dict, key, &hits);
+ if (ret)
+ continue;
+
+ /* <fop> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"fop");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"name", "%s",
+ gf_fop_list[i]);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"hits",
+ "%" PRIu64, hits);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"avgLatency",
+ "%f", avg_latency);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"minLatency",
+ "%f", min_latency);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"maxLatency",
+ "%f", max_latency);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* </fop> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ /* </fopStats> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%d-%d-duration", brick_index, interval);
+ ret = dict_get_uint64(dict, key, &duration);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"duration",
+ "%" PRIu64, duration);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%d-%d-total-read", brick_index, interval);
+ ret = dict_get_uint64(dict, key, &total_read);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"totalRead",
+ "%" PRIu64, total_read);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%d-%d-total-write", brick_index, interval);
+ ret = dict_get_uint64(dict, key, &total_write);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"totalWrite",
+ "%" PRIu64, total_write);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* </cumulativeStats> || </intervalStats> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+out:
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+#endif
- snprintf (key, sizeof (key), "brick%d.conncount", brick_index);
- ret = dict_get_int32 (dict, key, &conn_count);
+int
+cli_xml_output_vol_profile(dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
+ char *volname = NULL;
+ int op = GF_CLI_STATS_NONE;
+ int info_op = GF_CLI_INFO_NONE;
+ int brick_count = 0;
+ char *brick_name = NULL;
+ int interval = 0;
+ char key[1024] = {
+ 0,
+ };
+ int i = 0;
+ int stats_cleared = 0;
+
+ ret = cli_begin_xml_output(&writer, &doc);
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common(writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ /* <volProfile> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"volProfile");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_str(dict, "volname", &volname);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"volname", "%s",
+ volname);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_int32(dict, "op", &op);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"profileOp", "%d",
+ op);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ if (GF_CLI_STATS_INFO != op)
+ goto cont;
+
+ ret = dict_get_int32(dict, "count", &brick_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"brickCount", "%d",
+ brick_count);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_int32(dict, "info-op", &info_op);
+ if (ret)
+ goto out;
+
+ while (i < brick_count) {
+ i++;
+
+ /* <brick> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"brick");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%d-brick", i);
+ ret = dict_get_str(dict, key, &brick_name);
if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"brickName",
+ "%s", brick_name);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ if (GF_CLI_INFO_CLEAR == info_op) {
+ snprintf(key, sizeof(key), "%d-stats-cleared", i);
+ ret = dict_get_int32(dict, key, &stats_cleared);
+ if (ret)
goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"connections",
- "%d", conn_count);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- for (i = 0; i < conn_count; i++) {
- /* <connection> */
- ret = xmlTextWriterStartElement (writer,
- (xmlChar *)"connection");
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.conn%d.fdtable",
- brick_index, i);
- ret = cli_xml_output_vol_status_fdtable (writer, dict, key);
- if (ret)
- goto out;
- /* </connection> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = xmlTextWriterWriteFormatElement(
+ writer, (xmlChar *)"clearStats", "%s",
+ stats_cleared ? "Cleared stats." : "Failed to clear stats.");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ } else {
+ snprintf(key, sizeof(key), "%d-cumulative", i);
+ ret = dict_get_int32(dict, key, &interval);
+ if (ret == 0) {
+ ret = cli_xml_output_vol_profile_stats(writer, dict, i,
+ interval);
+ if (ret)
+ goto out;
+ }
+
+ snprintf(key, sizeof(key), "%d-interval", i);
+ ret = dict_get_int32(dict, key, &interval);
+ if (ret == 0) {
+ ret = cli_xml_output_vol_profile_stats(writer, dict, i,
+ interval);
+ if (ret)
+ goto out;
+ }
}
- /* </fdStatus> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ /* </brick> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+cont:
+ /* </volProfile> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = cli_end_xml_output(writer, doc);
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
}
int
-cli_xml_output_vol_status_callframe (xmlTextWriterPtr writer, dict_t *dict,
- char *prefix)
-{
- int ret = -1;
- int ref_count = 0;
- char *translator = NULL;
- int complete = 0;
- char *parent = NULL;
- char *wind_from = NULL;
- char *wind_to = NULL;
- char *unwind_from = NULL;
- char *unwind_to = NULL;
- char key[1024] = {0,};
-
- /* <callFrame> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"callFrame");
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- snprintf (key, sizeof (key), "%s.refcount", prefix);
- ret = dict_get_int32 (dict, key, &ref_count);
+cli_xml_output_vol_list(dict_t *dict, int op_ret, int op_errno, char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
+ int count = 0;
+ char *volname = NULL;
+ char key[1024] = {
+ 0,
+ };
+ int i = 0;
+
+ ret = cli_begin_xml_output(&writer, &doc);
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common(writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ /* <volList> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"volList");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_int32(dict, "count", &count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"count", "%d",
+ count);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ for (i = 0; i < count; i++) {
+ snprintf(key, sizeof(key), "volume%d", i);
+ ret = dict_get_str(dict, key, &volname);
if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"refCount",
- "%d", ref_count);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"volume", "%s",
+ volname);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.translator", prefix);
- ret = dict_get_str (dict, key, &translator);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"translator",
- "%s", translator);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ /* </volList> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.complete", prefix);
- ret = dict_get_int32 (dict, key, &complete);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"complete",
- "%d", complete);
- XML_RET_CHECK_AND_GOTO (ret ,out);
+ ret = cli_end_xml_output(writer, doc);
+out:
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.parent", prefix);
- ret = dict_get_str (dict, key, &parent);
- if (!ret) {
- ret = xmlTextWriterWriteFormatElement (writer,
- (xmlChar *)"parent",
- "%s", parent);
- XML_RET_CHECK_AND_GOTO (ret, out);
- }
+#if (HAVE_LIB_XML)
+int
+cli_xml_output_vol_info_option(xmlTextWriterPtr writer, char *substr,
+ char *optstr, char *valstr)
+{
+ int ret = -1;
+ char *ptr1 = NULL;
+ char *ptr2 = NULL;
+
+ ptr1 = substr;
+ ptr2 = optstr;
+
+ while (ptr1) {
+ if (*ptr1 != *ptr2)
+ break;
+ ptr1++;
+ ptr2++;
+ if (!*ptr1)
+ break;
+ if (!*ptr2)
+ break;
+ }
+ if (*ptr2 == '\0')
+ goto out;
+
+ /* <option> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"option");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"name", "%s",
+ ptr2);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"value", "%s",
+ valstr);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* </option> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+out:
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.windfrom", prefix);
- ret = dict_get_str (dict, key, &wind_from);
- if (!ret) {
- ret = xmlTextWriterWriteFormatElement (writer,
- (xmlChar *)"windFrom",
- "%s", wind_from);
- XML_RET_CHECK_AND_GOTO (ret, out);
- }
+struct tmp_xml_option_logger {
+ char *key;
+ xmlTextWriterPtr writer;
+};
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.windto", prefix);
- ret = dict_get_str (dict, key, &wind_to);
- if (!ret) {
- ret = xmlTextWriterWriteFormatElement (writer,
- (xmlChar *)"windTo",
- "%s", wind_to);
- XML_RET_CHECK_AND_GOTO (ret, out);
- }
+static int
+_output_vol_info_option(dict_t *d, char *k, data_t *v, void *data)
+{
+ int ret = 0;
+ char *ptr = NULL;
+ struct tmp_xml_option_logger *tmp = NULL;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.unwindfrom", prefix);
- ret = dict_get_str (dict, key, &unwind_from);
- if (!ret) {
- ret = xmlTextWriterWriteFormatElement (writer,
- (xmlChar *)"unwindFrom",
- "%s", unwind_from);
- XML_RET_CHECK_AND_GOTO (ret, out);
- }
+ tmp = data;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.unwindto", prefix);
- ret = dict_get_str (dict, key, &unwind_to);
- if (!ret) {
- ret = xmlTextWriterWriteFormatElement (writer,
- (xmlChar *)"unwindTo",
- "%s", unwind_to);
- XML_RET_CHECK_AND_GOTO (ret, out);
- }
+ ptr = strstr(k, "option.");
+ if (!ptr)
+ goto out;
+
+ if (!v) {
+ ret = -1;
+ goto out;
+ }
+ ret = cli_xml_output_vol_info_option(tmp->writer, tmp->key, k, v->data);
- /* </callFrame> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ return ret;
}
int
-cli_xml_output_vol_status_callstack (xmlTextWriterPtr writer, dict_t *dict,
- char *prefix)
+cli_xml_output_vol_info_options(xmlTextWriterPtr writer, dict_t *dict,
+ char *prefix)
{
- int ret = -1;
- int uid = 0;
- int gid = 0;
- int pid = 0;
- uint64_t unique = 0;
- int frame_count = 0;
- char key[1024] = {0,};
- int i = 0;
+ int ret = -1;
+ int opt_count = 0;
+ char key[1024] = {
+ 0,
+ };
+ struct tmp_xml_option_logger tmp = {
+ 0,
+ };
+
+ snprintf(key, sizeof(key), "%s.opt_count", prefix);
+ ret = dict_get_int32(dict, key, &opt_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"optCount", "%d",
+ opt_count);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* <options> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"options");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ snprintf(key, sizeof(key), "%s.option.", prefix);
+
+ tmp.key = key;
+ tmp.writer = writer;
+ ret = dict_foreach(dict, _output_vol_info_option, &tmp);
+ if (ret)
+ goto out;
+
+ /* </options> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+out:
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+#endif
+
+int
+cli_xml_output_vol_info(cli_local_t *local, dict_t *dict)
+{
+#if (HAVE_LIB_XML)
+ int ret = 0;
+ int count = 0;
+ char *volname = NULL;
+ char *volume_id = NULL;
+ char *uuid = NULL;
+ int type = 0;
+ int status = 0;
+ int brick_count = 0;
+ int dist_count = 0;
+ int stripe_count = 0;
+ int replica_count = 0;
+ int arbiter_count = 0;
+ int snap_count = 0;
+ int isArbiter = 0;
+ int disperse_count = 0;
+ int redundancy_count = 0;
+ int transport = 0;
+ char *brick = NULL;
+ char key[1024] = {
+ 0,
+ };
+ int i = 0;
+ int j = 1;
+ char *caps __attribute__((unused)) = NULL;
+ int k __attribute__((unused)) = 0;
+
+ ret = dict_get_int32(dict, "count", &count);
+ if (ret)
+ goto out;
+
+ for (i = 0; i < count; i++) {
+ /* <volume> */
+ ret = xmlTextWriterStartElement(local->writer, (xmlChar *)"volume");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "volume%d.name", i);
+ ret = dict_get_str(dict, key, &volname);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(local->writer, (xmlChar *)"name",
+ "%s", volname);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- /* <callStack> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"callStack");
- XML_RET_CHECK_AND_GOTO (ret, out);
+ snprintf(key, sizeof(key), "volume%d.volume_id", i);
+ ret = dict_get_str(dict, key, &volume_id);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(local->writer, (xmlChar *)"id",
+ "%s", volume_id);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- snprintf (key, sizeof (key), "%s.uid", prefix);
- ret = dict_get_int32 (dict, key, &uid);
+ snprintf(key, sizeof(key), "volume%d.status", i);
+ ret = dict_get_int32(dict, key, &status);
if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"uid",
- "%d", uid);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(
+ local->writer, (xmlChar *)"status", "%d", status);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(local->writer,
+ (xmlChar *)"statusStr", "%s",
+ cli_vol_status_str[status]);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "volume%d.snap_count", i);
+ ret = dict_get_int32(dict, key, &snap_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(
+ local->writer, (xmlChar *)"snapshotCount", "%d", snap_count);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.gid", prefix);
- ret = dict_get_int32 (dict, key, &gid);
+ snprintf(key, sizeof(key), "volume%d.brick_count", i);
+ ret = dict_get_int32(dict, key, &brick_count);
if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"gid",
- "%d", gid);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(
+ local->writer, (xmlChar *)"brickCount", "%d", brick_count);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.pid", prefix);
- ret = dict_get_int32 (dict, key, &pid);
+ snprintf(key, sizeof(key), "volume%d.dist_count", i);
+ ret = dict_get_int32(dict, key, &dist_count);
if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"pid",
- "%d", pid);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(local->writer,
+ (xmlChar *)"distCount", "%d",
+ (brick_count / dist_count));
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "volume%d.stripe_count", i);
+ ret = dict_get_int32(dict, key, &stripe_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(
+ local->writer, (xmlChar *)"stripeCount", "%d", stripe_count);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.unique", prefix);
- ret = dict_get_uint64 (dict, key, &unique);
+ snprintf(key, sizeof(key), "volume%d.replica_count", i);
+ ret = dict_get_int32(dict, key, &replica_count);
if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"unique",
- "%"PRIu64, unique);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(
+ local->writer, (xmlChar *)"replicaCount", "%d", replica_count);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "volume%d.arbiter_count", i);
+ ret = dict_get_int32(dict, key, &arbiter_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(
+ local->writer, (xmlChar *)"arbiterCount", "%d", arbiter_count);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.count", prefix);
- ret = dict_get_int32 (dict, key, &frame_count);
+ snprintf(key, sizeof(key), "volume%d.disperse_count", i);
+ ret = dict_get_int32(dict, key, &disperse_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(
+ local->writer, (xmlChar *)"disperseCount", "%d", disperse_count);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "volume%d.redundancy_count", i);
+ ret = dict_get_int32(dict, key, &redundancy_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(local->writer,
+ (xmlChar *)"redundancyCount",
+ "%d", redundancy_count);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "volume%d.type", i);
+ ret = dict_get_int32(dict, key, &type);
+ if (ret)
+ goto out;
+ /* For Distributed-(stripe,replicate,stipe-replicate,disperse)
+ types
+ */
+ type = get_vol_type(type, dist_count, brick_count);
+
+ ret = xmlTextWriterWriteFormatElement(local->writer, (xmlChar *)"type",
+ "%d", type);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(
+ local->writer, (xmlChar *)"typeStr", "%s", vol_type_str[type]);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "volume%d.transport", i);
+ ret = dict_get_int32(dict, key, &transport);
if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(
+ local->writer, (xmlChar *)"transport", "%d", transport);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ j = 1;
+
+ /* <bricks> */
+ ret = xmlTextWriterStartElement(local->writer, (xmlChar *)"bricks");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ while (j <= brick_count) {
+ ret = xmlTextWriterStartElement(local->writer, (xmlChar *)"brick");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "volume%d.brick%d.uuid", i, j);
+ ret = dict_get_str(dict, key, &uuid);
+ if (ret)
goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"frameCount",
- "%d", frame_count);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- for (i = 0; i < frame_count; i++) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.frame%d", prefix, i);
- ret = cli_xml_output_vol_status_callframe (writer, dict,
- key);
- if (ret)
- goto out;
+ ret = xmlTextWriterWriteFormatAttribute(
+ local->writer, (xmlChar *)"uuid", "%s", uuid);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "volume%d.brick%d", i, j);
+ ret = dict_get_str(dict, key, &brick);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatString(local->writer, "%s", brick);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(
+ local->writer, (xmlChar *)"name", "%s", brick);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(
+ local->writer, (xmlChar *)"hostUuid", "%s", uuid);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "volume%d.brick%d.isArbiter", i, j);
+ if (dict_get(dict, key))
+ isArbiter = 1;
+ else
+ isArbiter = 0;
+ ret = xmlTextWriterWriteFormatElement(
+ local->writer, (xmlChar *)"isArbiter", "%d", isArbiter);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* </brick> */
+ ret = xmlTextWriterEndElement(local->writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ j++;
}
+ /* </bricks> */
+ ret = xmlTextWriterEndElement(local->writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "volume%d", i);
+ ret = cli_xml_output_vol_info_options(local->writer, dict, key);
+ if (ret)
+ goto out;
+
+ /* </volume> */
+ ret = xmlTextWriterEndElement(local->writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
- /* </callStack> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ if (volname) {
+ GF_FREE(local->get_vol.volname);
+ local->get_vol.volname = gf_strdup(volname);
+ local->vol_count += count;
+ }
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
}
int
-cli_xml_output_vol_status_callpool (xmlTextWriterPtr writer, dict_t *dict,
- int brick_index)
+cli_xml_output_vol_info_begin(cli_local_t *local, int op_ret, int op_errno,
+ char *op_errstr)
{
- int ret = -1;
- int call_count = 0;
- char key[1024] = {0,};
- int i = 0;
+#if (HAVE_LIB_XML)
+ int ret = -1;
- /* <callpoolStatus> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"callpoolStatus");
- XML_RET_CHECK_AND_GOTO (ret, out);
+ GF_ASSERT(local);
- snprintf (key, sizeof (key), "brick%d.callpool.count", brick_index);
- ret = dict_get_int32 (dict, key, &call_count);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"count",
- "%d", call_count);
-
- for (i = 0; i < call_count; i++) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.callpool.stack%d",
- brick_index, i);
- ret = cli_xml_output_vol_status_callstack (writer, dict,
- key);
- if (ret)
- goto out;
- }
+ ret = cli_begin_xml_output(&(local->writer), &(local->doc));
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common(local->writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ /* <volInfo> */
+ ret = xmlTextWriterStartElement(local->writer, (xmlChar *)"volInfo");
+ XML_RET_CHECK_AND_GOTO(ret, out);
- /* </callpoolStatus> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ /* <volumes> */
+ ret = xmlTextWriterStartElement(local->writer, (xmlChar *)"volumes");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* Init vol count */
+ local->vol_count = 0;
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
}
int
-cli_xml_output_vol_status (dict_t *dict, int op_ret, int op_errno,
- char *op_errstr)
+cli_xml_output_vol_info_end(cli_local_t *local)
{
- int ret = -1;
- xmlTextWriterPtr writer = NULL;
- xmlBufferPtr buf = NULL;
- char *volname = NULL;
- int brick_count = 0;
- int brick_index_max = -1;
- int other_count = 0;
- int index_max = 0;
- uint32_t cmd = GF_CLI_STATUS_NONE;
- int online = 0;
- gf_boolean_t node_present = _gf_true;
- int i;
-
- ret = cli_begin_xml_output (&writer, &buf);
- if (ret)
- goto out;
+#if (HAVE_LIB_XML)
+ int ret = -1;
- ret = cli_xml_output_common (writer, op_ret, op_errno, op_errstr);
- if (ret)
- goto out;
+ GF_ASSERT(local);
- /* <volStatus> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"volStatus");
- XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = xmlTextWriterWriteFormatElement(local->writer, (xmlChar *)"count",
+ "%d", local->vol_count);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ /* </volumes> */
+ ret = xmlTextWriterEndElement(local->writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- ret = dict_get_str (dict, "volname", &volname);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"volName",
- "%s", volname);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ /* </volInfo> */
+ ret = xmlTextWriterEndElement(local->writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- ret = dict_get_int32 (dict, "count", &brick_count);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"nodeCount",
- "%d", brick_count);
+ ret = cli_end_xml_output(local->writer, local->doc);
+
+out:
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+int
+cli_xml_output_vol_quota_limit_list_end(cli_local_t *local)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+
+ ret = xmlTextWriterEndElement(local->writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = cli_end_xml_output(local->writer, local->doc);
+
+out:
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+int
+cli_xml_output_vol_quota_limit_list_begin(cli_local_t *local, int op_ret,
+ int op_errno, char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+
+ ret = cli_begin_xml_output(&(local->writer), &(local->doc));
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common(local->writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ /* <volQuota> */
+ ret = xmlTextWriterStartElement(local->writer, (xmlChar *)"volQuota");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+out:
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+#if (HAVE_LIB_XML)
+static int
+cli_xml_output_peer_hostnames(xmlTextWriterPtr writer, dict_t *dict,
+ const char *prefix, int count)
+{
+ int ret = -1;
+ int i = 0;
+ char *hostname = NULL;
+ char key[1024] = {
+ 0,
+ };
+
+ /* <hostnames> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"hostnames");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ for (i = 0; i < count; i++) {
+ ret = snprintf(key, sizeof(key), "%s.hostname%d", prefix, i);
+ if (ret < 0)
+ goto out;
+ ret = dict_get_str(dict, key, &hostname);
if (ret)
- goto out;
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"hostname",
+ "%s", hostname);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ hostname = NULL;
+ }
+
+ /* </hostnames> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+out:
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+#endif
- ret = dict_get_uint32 (dict, "cmd", &cmd);
+int
+cli_xml_output_peer_status(dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
+ int count = 0;
+ char *uuid = NULL;
+ char *hostname = NULL;
+ int connected = 0;
+ int state_id = 0;
+ char *state_str = NULL;
+ int hostname_count = 0;
+ int i = 1;
+ char key[1024] = {
+ 0,
+ };
+
+ ret = cli_begin_xml_output(&writer, &doc);
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common(writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ /* <peerStatus> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"peerStatus");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ if (!dict)
+ goto cont;
+
+ ret = dict_get_int32(dict, "count", &count);
+ if (ret)
+ goto out;
+
+ while (i <= count) {
+ /* <peer> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"peer");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "friend%d.uuid", i);
+ ret = dict_get_str(dict, key, &uuid);
if (ret)
- goto out;
+ goto out;
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"uuid", "%s",
+ uuid);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- ret = dict_get_int32 (dict, "brick-index-max", &brick_index_max);
+ snprintf(key, sizeof(key), "friend%d.hostname", i);
+ ret = dict_get_str(dict, key, &hostname);
if (ret)
- goto out;
- ret = dict_get_int32 (dict, "other-count", &other_count);
+ goto out;
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"hostname",
+ "%s", hostname);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "friend%d.hostname_count", i);
+ ret = dict_get_int32(dict, key, &hostname_count);
+ if ((ret == 0) && (hostname_count > 0)) {
+ snprintf(key, sizeof(key), "friend%d", i);
+ ret = cli_xml_output_peer_hostnames(writer, dict, key,
+ hostname_count);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ snprintf(key, sizeof(key), "friend%d.connected", i);
+ ret = dict_get_int32(dict, key, &connected);
if (ret)
- goto out;
+ goto out;
- index_max = brick_index_max + other_count;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"connected",
+ "%d", connected);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- for (i = 0; i <= index_max; i++) {
- /* <node> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"node");
- XML_RET_CHECK_AND_GOTO (ret, out);
+ snprintf(key, sizeof(key), "friend%d.stateId", i);
+ ret = dict_get_int32(dict, key, &state_id);
+ if (!ret) {
+ /* ignore */
- ret = cli_xml_output_vol_status_common (writer, dict, i,
- &online, &node_present);
- if (ret) {
- if (node_present)
- goto out;
- else
- continue;
- }
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"state",
+ "%d", state_id);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
- switch (cmd & GF_CLI_STATUS_MASK) {
- case GF_CLI_STATUS_DETAIL:
- ret = cli_xml_output_vol_status_detail (writer,
- dict, i);
- if (ret)
- goto out;
- break;
-
- case GF_CLI_STATUS_MEM:
- if (online) {
- ret = cli_xml_output_vol_status_mem
- (writer, dict, i);
- if (ret)
- goto out;
- }
- break;
-
- case GF_CLI_STATUS_CLIENTS:
- if (online) {
- ret = cli_xml_output_vol_status_clients
- (writer, dict, i);
- if (ret)
- goto out;
- }
- break;
-
- case GF_CLI_STATUS_INODE:
- if (online) {
- ret = cli_xml_output_vol_status_inode
- (writer, dict, i);
- if (ret)
- goto out;
- }
- break;
-
- case GF_CLI_STATUS_FD:
- if (online) {
- ret = cli_xml_output_vol_status_fd
- (writer, dict, i);
- if (ret)
- goto out;
- }
- break;
-
- case GF_CLI_STATUS_CALLPOOL:
- if (online) {
- ret = cli_xml_output_vol_status_callpool
- (writer, dict, i);
- if (ret)
- goto out;
- }
- break;
-
- default:
- break;
+ snprintf(key, sizeof(key), "friend%d.state", i);
+ ret = dict_get_str(dict, key, &state_str);
+ if (!ret) {
+ /* ignore */
- }
- /* </node> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"stateStr",
+ "%s", state_str);
+ XML_RET_CHECK_AND_GOTO(ret, out);
}
- /* </volStatus> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ /* </peer> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- ret = cli_end_xml_output (writer, buf);
- if (ret)
- goto out;
+ i++;
+ }
+
+cont:
+ /* </peerStatus> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = cli_end_xml_output(writer, doc);
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
}
+#if (HAVE_LIB_XML)
+/* Used for rebalance stop/status, remove-brick status */
int
-cli_xml_output_vol_top_rw_perf (xmlTextWriterPtr writer, dict_t *dict,
- int brick_index, int member_index)
-{
- int ret = -1;
- char *filename = NULL;
- uint64_t throughput = 0;
- long int time_sec = 0;
- long int time_usec = 0;
- struct tm *tm = NULL;
- char timestr[256] = {0,};
- char key[1024] = {0,};
-
- /* <file> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"file");
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- snprintf (key, sizeof (key), "%d-filename-%d", brick_index,
- member_index);
- ret = dict_get_str (dict, key, &filename);
+cli_xml_output_vol_rebalance_status(xmlTextWriterPtr writer, dict_t *dict,
+ enum gf_task_types task_type)
+{
+ int ret = -1;
+ int count = 0;
+ char *node_name = NULL;
+ char *node_uuid = NULL;
+ uint64_t files = 0;
+ uint64_t size = 0;
+ uint64_t lookups = 0;
+ int status_rcd = 0;
+ uint64_t failures = 0;
+ uint64_t skipped = 0;
+ uint64_t total_files = 0;
+ uint64_t total_size = 0;
+ uint64_t total_lookups = 0;
+ uint64_t total_failures = 0;
+ uint64_t total_skipped = 0;
+ char key[1024] = {
+ 0,
+ };
+ int i = 0;
+ int overall_status = -1;
+ double elapsed = 0;
+ double overall_elapsed = 0;
+
+ if (!dict) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = dict_get_int32(dict, "count", &count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"nodeCount", "%d",
+ count);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ while (i < count) {
+ i++;
+ /* Getting status early, to skip nodes that don't have the
+ * rebalance process started
+ */
+ snprintf(key, sizeof(key), "status-%d", i);
+ ret = dict_get_int32(dict, key, &status_rcd);
+
+ /* If glusterd is down it fails to get the status, try
+ getting status from other nodes */
if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"filename",
- "%s", filename);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ continue;
+ if (GF_DEFRAG_STATUS_NOT_STARTED == status_rcd)
+ continue;
+
+ /* <node> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"node");
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-value-%d", brick_index, member_index);
- ret = dict_get_uint64 (dict, key, &throughput);
+ snprintf(key, sizeof(key), "node-name-%d", i);
+ ret = dict_get_str(dict, key, &node_name);
if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"count",
- "%"PRIu64, throughput);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"nodeName",
+ "%s", node_name);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-time-sec-%d", brick_index,
- member_index);
- ret = dict_get_int32 (dict, key, (int32_t *)&time_sec);
+ snprintf(key, sizeof(key), "node-uuid-%d", i);
+ ret = dict_get_str(dict, key, &node_uuid);
if (ret)
- goto out;
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"id", "%s",
+ node_uuid);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-time-usec-%d", brick_index,
- member_index);
- ret = dict_get_int32 (dict, key, (int32_t *)&time_usec);
+ snprintf(key, sizeof(key), "files-%d", i);
+ ret = dict_get_uint64(dict, key, &files);
if (ret)
- goto out;
+ goto out;
+ total_files += files;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"files",
+ "%" PRIu64, files);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "size-%d", i);
+ ret = dict_get_uint64(dict, key, &size);
+ if (ret)
+ goto out;
+ total_size += size;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"size",
+ "%" PRIu64, size);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "lookups-%d", i);
+ ret = dict_get_uint64(dict, key, &lookups);
+ if (ret)
+ goto out;
+ total_lookups += lookups;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"lookups",
+ "%" PRIu64, lookups);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "failures-%d", i);
+ ret = dict_get_uint64(dict, key, &failures);
+ if (ret)
+ goto out;
- tm = localtime (&time_sec);
- if (!tm) {
- ret = -1;
- goto out;
+ snprintf(key, sizeof(key), "skipped-%d", i);
+
+ ret = dict_get_uint64(dict, key, &skipped);
+ if (ret)
+ goto out;
+
+ if (task_type == GF_TASK_TYPE_REMOVE_BRICK) {
+ failures += skipped;
+ skipped = 0;
+ }
+
+ total_failures += failures;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"failures",
+ "%" PRIu64, failures);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ total_skipped += skipped;
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"skipped",
+ "%" PRIu64, skipped);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"status", "%d",
+ status_rcd);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(
+ writer, (xmlChar *)"statusStr", "%s",
+ cli_vol_task_status_str[status_rcd]);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "run-time-%d", i);
+ ret = dict_get_double(dict, key, &elapsed);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"runtime",
+ "%.2f", elapsed);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ if (elapsed > overall_elapsed) {
+ overall_elapsed = elapsed;
}
- strftime (timestr, sizeof (timestr), "%Y-%m-%d %H:%M:%S", tm);
- snprintf (timestr + strlen (timestr),
- sizeof (timestr) - strlen (timestr),
- ".%"GF_PRI_SUSECONDS, time_usec);
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"time",
- "%s", timestr);
- XML_RET_CHECK_AND_GOTO (ret, out);
- /* </file> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ /* Rebalance has 5 states,
+ * NOT_STARTED, STARTED, STOPPED, COMPLETE, FAILED
+ * The precedence used to determine the aggregate status is as
+ * below,
+ * STARTED > FAILED > STOPPED > COMPLETE > NOT_STARTED
+ */
+ /* TODO: Move this to a common place utilities that both CLI and
+ * glusterd need.
+ * Till then if the below algorithm is changed, change it in
+ * glusterd_volume_status_aggregate_tasks_status in
+ * glusterd-utils.c
+ */
+
+ if (-1 == overall_status)
+ overall_status = status_rcd;
+ int rank[] = {[GF_DEFRAG_STATUS_STARTED] = 1,
+ [GF_DEFRAG_STATUS_FAILED] = 2,
+ [GF_DEFRAG_STATUS_STOPPED] = 3,
+ [GF_DEFRAG_STATUS_COMPLETE] = 4,
+ [GF_DEFRAG_STATUS_NOT_STARTED] = 5};
+ if (rank[status_rcd] <= rank[overall_status])
+ overall_status = status_rcd;
+
+ /* </node> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ /* Aggregate status */
+ /* <aggregate> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"aggregate");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"files",
+ "%" PRIu64, total_files);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"size", "%" PRIu64,
+ total_size);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"lookups",
+ "%" PRIu64, total_lookups);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"failures",
+ "%" PRIu64, total_failures);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"skipped",
+ "%" PRIu64, total_skipped);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ if (overall_status == -1) {
+ overall_status = status_rcd;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"status", "%d",
+ overall_status);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(
+ writer, (xmlChar *)"statusStr", "%s",
+ cli_vol_task_status_str[overall_status]);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"runtime", "%.2f",
+ overall_elapsed);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* </aggregate> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
}
+#endif
int
-cli_xml_output_vol_top_other (xmlTextWriterPtr writer, dict_t *dict,
- int brick_index, int member_index)
+cli_xml_output_vol_rebalance(gf_cli_defrag_type op, dict_t *dict, int op_ret,
+ int op_errno, char *op_errstr)
{
- int ret = -1;
- char *filename = NULL;
- uint64_t count = 0;
- char key[1024] = {0,};
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
+ char *task_id_str = NULL;
+
+ ret = cli_begin_xml_output(&writer, &doc);
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common(writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ /* <volRebalance> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"volRebalance");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_str(dict, GF_REBALANCE_TID_KEY, &task_id_str);
+ if (ret == 0) {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"task-id",
+ "%s", task_id_str);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"op", "%d", op);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ if ((GF_DEFRAG_CMD_STOP == op) || (GF_DEFRAG_CMD_STATUS == op)) {
+ ret = cli_xml_output_vol_rebalance_status(writer, dict,
+ GF_TASK_TYPE_REBALANCE);
+ if (ret)
+ goto out;
+ }
- /* <file> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"file");
- XML_RET_CHECK_AND_GOTO (ret, out);
+ /* </volRebalance> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- snprintf (key, sizeof (key), "%d-filename-%d", brick_index,
- member_index);
- ret = dict_get_str (dict, key, &filename);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"filename",
- "%s", filename);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = cli_end_xml_output(writer, doc);
+
+out:
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-value-%d", brick_index, member_index);
- ret = dict_get_uint64 (dict, key, &count);
+int
+cli_xml_output_vol_remove_brick(gf_boolean_t status_op, dict_t *dict,
+ int op_ret, int op_errno, char *op_errstr,
+ const char *op)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
+ char *task_id_str = NULL;
+
+ ret = cli_begin_xml_output(&writer, &doc);
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common(writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)op);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_str(dict, GF_REMOVE_BRICK_TID_KEY, &task_id_str);
+ if (ret == 0) {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"task-id",
+ "%s", task_id_str);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ if (status_op) {
+ ret = cli_xml_output_vol_rebalance_status(writer, dict,
+ GF_TASK_TYPE_REMOVE_BRICK);
if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"count",
- "%"PRIu64, count);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ goto out;
+ }
+
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- /* </file> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = cli_end_xml_output(writer, doc);
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
}
int
-cli_xml_output_vol_top (dict_t *dict, int op_ret, int op_errno,
- char *op_errstr)
+cli_xml_output_vol_replace_brick(dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
+
+ ret = cli_begin_xml_output(&writer, &doc);
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common(writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ ret = cli_end_xml_output(writer, doc);
+
+out:
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+int
+cli_xml_output_vol_create(dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr)
{
- int ret = -1;
- xmlTextWriterPtr writer = NULL;
- xmlBufferPtr buf = NULL;
- int brick_count = 0;
- int top_op = GF_CLI_TOP_NONE;
- char *brick_name = NULL;
- int members = 0;
- uint64_t current_open = 0;
- uint64_t max_open = 0;
- char *max_open_time = NULL;
- double throughput = 0.0;
- double time_taken = 0.0;
- char key[1024] = {0,};
- int i = 0;
- int j = 0;
-
- ret = cli_begin_xml_output (&writer, &buf);
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
+ char *volname = NULL;
+ char *volid = NULL;
+
+ ret = cli_begin_xml_output(&writer, &doc);
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common(writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ if (dict) {
+ /* <volCreate> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"volCreate");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* <volume> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"volume");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_str(dict, "volname", &volname);
if (ret)
- goto out;
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"name", "%s",
+ volname);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- ret = cli_xml_output_common (writer, op_ret, op_errno, op_errstr);
+ ret = dict_get_str(dict, "volume-id", &volid);
if (ret)
- goto out;
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"id", "%s",
+ volid);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* </volume> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* </volCreate> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ ret = cli_end_xml_output(writer, doc);
+
+out:
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+int
+cli_xml_output_generic_volume(char *op, dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
+ char *volname = NULL;
+ char *volid = NULL;
- /* <volTop> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"volTop");
- XML_RET_CHECK_AND_GOTO (ret, out);
+ GF_ASSERT(op);
- ret = dict_get_int32 (dict, "count", &brick_count);
+ ret = cli_begin_xml_output(&writer, &doc);
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common(writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ if (dict) {
+ /* <"op"> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)op);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* <volume> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"volume");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_str(dict, "volname", &volname);
if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"brickCount",
- "%d", brick_count);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"name", "%s",
+ volname);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- ret = dict_get_int32 (dict, "1-top-op", &top_op);
+ ret = dict_get_str(dict, "vol-id", &volid);
if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"topOp",
- "%d", top_op);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"id", "%s",
+ volid);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- while (i < brick_count) {
- i++;
+ /* </volume> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- /* <brick> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"brick");
- XML_RET_CHECK_AND_GOTO (ret, out);
+ /* </"op"> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-brick", i);
- ret = dict_get_str (dict, key, &brick_name);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer,
- (xmlChar *)"name",
- "%s", brick_name);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key , sizeof (key), "%d-members", i);
- ret = dict_get_int32 (dict, key, &members);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer,
- (xmlChar *)"members",
- "%d", members);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- switch (top_op) {
- case GF_CLI_TOP_OPEN:
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-current-open", i);
- ret = dict_get_uint64 (dict, key, &current_open);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement
- (writer, (xmlChar *)"currentOpen", "%"PRIu64,
- current_open);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-max-open", i);
- ret = dict_get_uint64 (dict, key, &max_open);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement
- (writer, (xmlChar *)"maxOpen", "%"PRIu64,
- max_open);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-max-openfd-time", i);
- ret = dict_get_str (dict, key, &max_open_time);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement
- (writer, (xmlChar *)"maxOpenTime", "%s",
- max_open_time);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- case GF_CLI_TOP_READ:
- case GF_CLI_TOP_WRITE:
- case GF_CLI_TOP_OPENDIR:
- case GF_CLI_TOP_READDIR:
- if (!members)
- continue;
-
- break;
-
- case GF_CLI_TOP_READ_PERF:
- case GF_CLI_TOP_WRITE_PERF:
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-throughput", i);
- ret = dict_get_double (dict, key, &throughput);
- if (!ret) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-time", i);
- ret = dict_get_double (dict, key, &time_taken);
- }
-
- if (!ret) {
- ret = xmlTextWriterWriteFormatElement
- (writer, (xmlChar *)"throughput",
- "%f", throughput);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- ret = xmlTextWriterWriteFormatElement
- (writer, (xmlChar *)"timeTaken",
- "%f", time_taken);
- }
-
- if (!members)
- continue;
-
- break;
-
- default:
- ret = -1;
- goto out;
- }
+ ret = cli_end_xml_output(writer, doc);
- for (j = 1; j <= members; j++) {
- if (top_op == GF_CLI_TOP_READ_PERF ||
- top_op == GF_CLI_TOP_WRITE_PERF) {
- ret = cli_xml_output_vol_top_rw_perf
- (writer, dict, i, j);
- } else {
- ret = cli_xml_output_vol_top_other
- (writer, dict, i, j);
- }
- if (ret)
- goto out;
- }
+out:
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+#if (HAVE_LIB_XML)
+int
+_output_gsync_config(FILE *fp, xmlTextWriterPtr writer, char *op_name)
+{
+ char resbuf[256 + PATH_MAX] = {
+ 0,
+ };
+ char *ptr = NULL;
+ char *v = NULL;
+ int blen = sizeof(resbuf);
+ int ret = 0;
+
+ for (;;) {
+ ptr = fgets(resbuf, blen, fp);
+ if (!ptr)
+ break;
+
+ v = resbuf + strlen(resbuf) - 1;
+ while (isspace(*v)) {
+ /* strip trailing space */
+ *v-- = '\0';
+ }
+ if (v == resbuf) {
+ /* skip empty line */
+ continue;
+ }
- /* </brick> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ if (op_name != NULL) {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)op_name,
+ "%s", resbuf);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ goto out;
}
- /* </volTop> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
- ret = cli_end_xml_output (writer, buf);
+ v = strchr(resbuf, ':');
+ if (!v) {
+ ret = -1;
+ goto out;
+ }
+ *v++ = '\0';
+ while (isspace(*v))
+ v++;
+ v = gf_strdup(v);
+ if (!v) {
+ ret = -1;
+ goto out;
+ }
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)resbuf, "%s",
+ v);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
}
+#endif
+#if (HAVE_LIB_XML)
int
-cli_xml_output_vol_profile_stats (xmlTextWriterPtr writer, dict_t *dict,
- int brick_index, int interval)
-{
- int ret = -1;
- uint64_t read_count = 0;
- uint64_t write_count = 0;
- uint64_t hits = 0;
- double avg_latency = 0.0;
- double max_latency = 0.0;
- double min_latency = 0.0;
- uint64_t duration = 0;
- uint64_t total_read = 0;
- uint64_t total_write = 0;
- char key[1024] = {0};
- int i = 0;
-
- /* <cumulativeStats> || <intervalStats> */
- if (interval == -1)
- ret = xmlTextWriterStartElement (writer,
- (xmlChar *)"cumulativeStats");
- else
- ret = xmlTextWriterStartElement (writer,
- (xmlChar *)"intervalStats");
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- /* <blockStats> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"blockStats");
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- for (i = 0; i < 32; i++) {
- /* <block> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"block");
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- ret = xmlTextWriterWriteFormatElement
- (writer, (xmlChar *)"size", "%"PRIu32, (1 << i));
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-%d-read-%d", brick_index,
- interval, (1 << i));
- ret = dict_get_uint64 (dict, key, &read_count);
- if (ret)
- read_count = 0;
- ret = xmlTextWriterWriteFormatElement
- (writer, (xmlChar *)"reads", "%"PRIu64, read_count);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-%d-write-%d", brick_index,
- interval, (1 << i));
- ret = dict_get_uint64 (dict, key, &write_count);
- if (ret)
- write_count = 0;
- ret = xmlTextWriterWriteFormatElement
- (writer, (xmlChar *)"writes", "%"PRIu64, write_count);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- /* </block> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
- }
+get_gsync_config(runner_t *runner,
+ int (*op_conf)(FILE *fp, xmlTextWriterPtr writer,
+ char *op_name),
+ xmlTextWriterPtr writer, char *op_name)
+{
+ int ret = 0;
- /* </blockStats> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ runner_redir(runner, STDOUT_FILENO, RUN_PIPE);
+ if (runner_start(runner) != 0) {
+ gf_log("cli", GF_LOG_ERROR, "spawning child failed");
+ return -1;
+ }
- /* <fopStats> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"fopStats");
- XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = op_conf(runner_chio(runner, STDOUT_FILENO), writer, op_name);
- for (i = 0; i < GF_FOP_MAXVALUE; i++) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-%d-%d-hits", brick_index,
- interval, i);
- ret = dict_get_uint64 (dict, key, &hits);
- if (ret)
- goto cont;
+ ret |= runner_end(runner);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, "reading data from child failed");
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-%d-%d-avglatency", brick_index,
- interval, i);
- ret = dict_get_double (dict, key, &avg_latency);
- if (ret)
- goto cont;
+ return ret ? -1 : 0;
+}
+#endif
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-%d-%d-minlatency", brick_index,
- interval, i);
- ret = dict_get_double (dict, key, &min_latency);
- if (ret)
- goto cont;
+#if (HAVE_LIB_XML)
+int
+cli_xml_generate_gsync_config(dict_t *dict, xmlTextWriterPtr writer)
+{
+ runner_t runner = {
+ 0,
+ };
+ char *subop = NULL;
+ char *gwd = NULL;
+ char *slave = NULL;
+ char *confpath = NULL;
+ char *master = NULL;
+ char *op_name = NULL;
+ int ret = -1;
+ char conf_path[PATH_MAX] = "";
+
+ if (dict_get_str(dict, "subop", &subop) != 0) {
+ ret = -1;
+ goto out;
+ }
+
+ if (strcmp(subop, "get") != 0 && strcmp(subop, "get-all") != 0) {
+ ret = xmlTextWriterWriteFormatElement(
+ writer, (xmlChar *)"message", "%s",
+ GEOREP " config updated successfully");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ ret = 0;
+ goto out;
+ }
+
+ if (dict_get_str(dict, "glusterd_workdir", &gwd) != 0 ||
+ dict_get_str(dict, "slave", &slave) != 0) {
+ ret = -1;
+ goto out;
+ }
+
+ if (dict_get_str(dict, "master", &master) != 0)
+ master = NULL;
+
+ if (dict_get_str(dict, "op_name", &op_name) != 0)
+ op_name = NULL;
+
+ ret = dict_get_str(dict, "conf_path", &confpath);
+ if (!confpath) {
+ ret = snprintf(conf_path, sizeof(conf_path) - 1,
+ "%s/" GEOREP "/gsyncd_template.conf", gwd);
+ conf_path[ret] = '\0';
+ confpath = conf_path;
+ }
+
+ runinit(&runner);
+ runner_add_args(&runner, GSYNCD_PREFIX "/gsyncd", "-c", NULL);
+ runner_argprintf(&runner, "%s", confpath);
+ runner_argprintf(&runner, "--iprefix=%s", DATADIR);
+
+ if (master)
+ runner_argprintf(&runner, ":%s", master);
+
+ runner_add_arg(&runner, slave);
+ runner_argprintf(&runner, "--config-%s", subop);
+
+ if (op_name)
+ runner_add_arg(&runner, op_name);
+
+ ret = get_gsync_config(&runner, _output_gsync_config, writer, op_name);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-%d-%d-maxlatency", brick_index,
- interval, i);
- ret = dict_get_double (dict, key, &max_latency);
- if (ret)
- goto cont;
+out:
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+#endif
- /* <fop> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"fop");
- XML_RET_CHECK_AND_GOTO (ret, out);
+#if (HAVE_LIB_XML)
+int
+cli_xml_output_vol_gsync_status(dict_t *dict, xmlTextWriterPtr writer)
+{
+ int ret = -1;
+ int i = 1;
+ int j = 0;
+ int count = 0;
+ const int number_of_fields = 20;
+ int closed = 1;
+ int session_closed = 1;
+ gf_gsync_status_t **status_values = NULL;
+ char status_value_name[PATH_MAX] = "";
+ char *tmp = NULL;
+ char *volume = NULL;
+ char *volume_next = NULL;
+ char *slave = NULL;
+ char *slave_next = NULL;
+ static const char *title_values[] = {
+ "master_node", "", "master_brick", "slave_user", "slave", "slave_node",
+ "status", "crawl_status",
+ /* last_synced */
+ "", "entry", "data", "meta", "failures",
+ /* checkpoint_time */
+ "", "checkpoint_completed",
+ /* checkpoint_completion_time */
+ "", "master_node_uuid",
+ /* last_synced_utc */
+ "last_synced",
+ /* checkpoint_time_utc */
+ "checkpoint_time",
+ /* checkpoint_completion_time_utc */
+ "checkpoint_completion_time"};
+
+ GF_ASSERT(dict);
+
+ ret = dict_get_int32(dict, "gsync-count", &count);
+ if (ret)
+ goto out;
+
+ status_values = GF_MALLOC(count * sizeof(gf_gsync_status_t *),
+ gf_common_mt_char);
+ if (!status_values) {
+ ret = -1;
+ goto out;
+ }
+
+ for (i = 0; i < count; i++) {
+ status_values[i] = GF_CALLOC(1, sizeof(gf_gsync_status_t),
+ gf_common_mt_char);
+ if (!status_values[i]) {
+ ret = -1;
+ goto out;
+ }
- ret = xmlTextWriterWriteFormatElement
- (writer, (xmlChar *)"name","%s", gf_fop_list[i]);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ snprintf(status_value_name, sizeof(status_value_name), "status_value%d",
+ i);
- ret = xmlTextWriterWriteFormatElement
- (writer, (xmlChar *)"hits", "%"PRIu64, hits);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = dict_get_bin(dict, status_value_name,
+ (void **)&(status_values[i]));
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "struct member empty.");
+ goto out;
+ }
+ }
- ret = xmlTextWriterWriteFormatElement
- (writer, (xmlChar *)"avgLatency", "%f", avg_latency);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ qsort(status_values, count, sizeof(gf_gsync_status_t *),
+ gf_gsync_status_t_comparator);
- ret = xmlTextWriterWriteFormatElement
- (writer, (xmlChar *)"minLatency", "%f", min_latency);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ for (i = 0; i < count; i++) {
+ if (closed) {
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"volume");
+ XML_RET_CHECK_AND_GOTO(ret, out);
- ret = xmlTextWriterWriteFormatElement
- (writer, (xmlChar *)"maxLatency", "%f", max_latency);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ tmp = get_struct_variable(1, status_values[i]);
+ if (!tmp) {
+ gf_log("cli", GF_LOG_ERROR, "struct member empty.");
+ ret = -1;
+ goto out;
+ }
- /* </fop> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"name",
+ "%s", tmp);
+ XML_RET_CHECK_AND_GOTO(ret, out);
-cont:
- hits = 0;
- avg_latency = 0.0;
- min_latency = 0.0;
- max_latency = 0.0;
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"sessions");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ closed = 0;
}
- /* </fopStats> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ if (session_closed) {
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"session");
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-%d-duration", brick_index, interval);
- ret = dict_get_uint64 (dict, key, &duration);
- if (ret)
+ session_closed = 0;
+
+ tmp = get_struct_variable(21, status_values[i]);
+ if (!tmp) {
+ gf_log("cli", GF_LOG_ERROR, "struct member empty.");
+ ret = -1;
goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"duration",
- "%"PRIu64, duration);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ }
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-%d-total-read", brick_index, interval);
- ret = dict_get_uint64 (dict, key, &total_read);
- if (ret)
+ ret = xmlTextWriterWriteFormatElement(
+ writer, (xmlChar *)"session_slave", "%s", tmp);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"pair");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ for (j = 0; j < number_of_fields; j++) {
+ /* XML ignore fields */
+ if (strcmp(title_values[j], "") == 0)
+ continue;
+
+ tmp = get_struct_variable(j, status_values[i]);
+ if (!tmp) {
+ gf_log("cli", GF_LOG_ERROR, "struct member empty.");
+ ret = -1;
goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"totalRead",
- "%"PRIu64, total_read);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ }
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-%d-total-write", brick_index, interval);
- ret = dict_get_uint64 (dict, key, &total_write);
- if (ret)
+ ret = xmlTextWriterWriteFormatElement(
+ writer, (xmlChar *)title_values[j], "%s", tmp);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ if (i + 1 < count) {
+ slave = get_struct_variable(20, status_values[i]);
+ slave_next = get_struct_variable(20, status_values[i + 1]);
+ volume = get_struct_variable(1, status_values[i]);
+ volume_next = get_struct_variable(1, status_values[i + 1]);
+ if (!slave || !slave_next || !volume || !volume_next) {
+ gf_log("cli", GF_LOG_ERROR, "struct member empty.");
+ ret = -1;
goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"totalWrite",
- "%"PRIu64, total_write);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ if (strcmp(volume, volume_next) != 0) {
+ closed = 1;
+ session_closed = 1;
+
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- /* </cumulativeStats> || </intervalStats> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ } else if (strcmp(slave, slave_next) != 0) {
+ session_closed = 1;
+
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+ } else {
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+ }
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ if (status_values)
+ GF_FREE(status_values);
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
}
+#endif
int
-cli_xml_output_vol_profile (dict_t *dict, int op_ret, int op_errno,
- char *op_errstr)
-{
- int ret = -1;
- xmlTextWriterPtr writer = NULL;
- xmlBufferPtr buf = NULL;
- char *volname = NULL;
- int op = GF_CLI_STATS_NONE;
- int brick_count = 0;
- char *brick_name = NULL;
- int interval = 0;
- char key[1024] = {0,};
- int i = 0;
-
- ret = cli_begin_xml_output (&writer, &buf);
- if (ret)
- goto out;
+cli_xml_output_vol_gsync(dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
+ char *master = NULL;
+ char *slave = NULL;
+ int type = 0;
+
+ GF_ASSERT(dict);
+
+ ret = cli_begin_xml_output(&writer, &doc);
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common(writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ /* <geoRep> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"geoRep");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_int32(dict, "type", &type);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get type");
+ goto out;
+ }
+
+ switch (type) {
+ case GF_GSYNC_OPTION_TYPE_START:
+ case GF_GSYNC_OPTION_TYPE_STOP:
+ case GF_GSYNC_OPTION_TYPE_PAUSE:
+ case GF_GSYNC_OPTION_TYPE_RESUME:
+ case GF_GSYNC_OPTION_TYPE_CREATE:
+ case GF_GSYNC_OPTION_TYPE_DELETE:
+ if (dict_get_str(dict, "master", &master) != 0)
+ master = "???";
+ if (dict_get_str(dict, "slave", &slave) != 0)
+ slave = "???";
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"master",
+ "%s", master);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"slave",
+ "%s", slave);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ break;
+
+ case GF_GSYNC_OPTION_TYPE_CONFIG:
+ if (op_ret == 0) {
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"config");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = cli_xml_generate_gsync_config(dict, writer);
+ if (ret)
+ goto out;
- ret = cli_xml_output_common (writer, op_ret, op_errno, op_errstr);
- if (ret)
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ break;
+ case GF_GSYNC_OPTION_TYPE_STATUS:
+ ret = cli_xml_output_vol_gsync_status(dict, writer);
+ if (ret) {
+ gf_log("cli", GF_LOG_DEBUG, "Failed to get gsync status");
goto out;
+ }
+ break;
+ default:
+ ret = 0;
+ break;
+ }
+
+ /* </geoRep> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = cli_end_xml_output(writer, doc);
+out:
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
- /* <volProfile> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"volProfile");
- XML_RET_CHECK_AND_GOTO (ret, out);
+#if (HAVE_LIB_XML)
+/* This function will generate snapshot create output in xml format.
+ *
+ * @param writer xmlTextWriterPtr
+ * @param doc xmlDocPtr
+ * @param dict dict containing create output
+ *
+ * @return 0 on success and -1 on failure
+ */
+static int
+cli_xml_snapshot_create(xmlTextWriterPtr writer, xmlDocPtr doc, dict_t *dict)
+{
+ int ret = -1;
+ char *str_value = NULL;
+
+ GF_ASSERT(writer);
+ GF_ASSERT(doc);
+ GF_ASSERT(dict);
+
+ /* <snapCreate> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"snapCreate");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* <snapshot> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"snapshot");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_str(dict, "snapname", &str_value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snap name");
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"name", "%s",
+ str_value);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_str(dict, "snapuuid", &str_value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snap uuid");
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"uuid", "%s",
+ str_value);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* </snapshot> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* </snapCreate> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = 0;
+out:
+ return ret;
+}
- ret = dict_get_str (dict, "volname", &volname);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"volname",
- "%s", volname);
- XML_RET_CHECK_AND_GOTO (ret, out);
+/* This function will generate snapshot clone output in xml format.
+ *
+ * @param writer xmlTextWriterPtr
+ * @param doc xmlDocPtr
+ * @param dict dict containing create output
+ *
+ * @return 0 on success and -1 on failure
+ */
+static int
+cli_xml_snapshot_clone(xmlTextWriterPtr writer, xmlDocPtr doc, dict_t *dict)
+{
+ int ret = -1;
+ char *str_value = NULL;
+
+ GF_VALIDATE_OR_GOTO("cli", writer, out);
+ GF_VALIDATE_OR_GOTO("cli", doc, out);
+ GF_VALIDATE_OR_GOTO("cli", dict, out);
+
+ /* <CloneCreate> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"CloneCreate");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* <volume> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"volume");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_str(dict, "clonename", &str_value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get clone name");
+ goto out;
+ }
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"name", "%s",
+ str_value);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_str(dict, "snapuuid", &str_value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get clone uuid");
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"uuid", "%s",
+ str_value);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* </volume> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* </CloneCreate> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = 0;
+out:
+ return ret;
+}
- ret = dict_get_int32 (dict, "op", &op);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"profileOp",
- "%d", op);
- XML_RET_CHECK_AND_GOTO (ret, out);
+/* This function will generate snapshot restore output in xml format.
+ *
+ * @param writer xmlTextWriterPtr
+ * @param doc xmlDocPtr
+ * @param dict dict containing restore output
+ *
+ * @return 0 on success and -1 on failure
+ */
+static int
+cli_xml_snapshot_restore(xmlTextWriterPtr writer, xmlDocPtr doc, dict_t *dict)
+{
+ int ret = -1;
+ char *str_value = NULL;
+
+ GF_ASSERT(writer);
+ GF_ASSERT(doc);
+ GF_ASSERT(dict);
+
+ /* <snapRestore> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"snapRestore");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* <volume> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"volume");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_str(dict, "volname", &str_value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get vol name");
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"name", "%s",
+ str_value);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_str(dict, "volid", &str_value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get volume id");
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"uuid", "%s",
+ str_value);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* </volume> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* <snapshot> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"snapshot");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_str(dict, "snapname", &str_value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snap name");
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"name", "%s",
+ str_value);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_str(dict, "snapuuid", &str_value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snap uuid");
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"uuid", "%s",
+ str_value);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* </snapshot> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* </snapRestore> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = 0;
+out:
- if (op != GF_CLI_STATS_INFO)
- goto cont;
+ return ret;
+}
- ret = dict_get_int32 (dict, "count", &brick_count);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"brickCount",
- "%d", brick_count);
- XML_RET_CHECK_AND_GOTO (ret, out);
+/* This function will generate snapshot list output in xml format.
+ *
+ * @param writer xmlTextWriterPtr
+ * @param doc xmlDocPtr
+ * @param dict dict containing list output
+ *
+ * @return 0 on success and -1 on failure
+ */
+static int
+cli_xml_snapshot_list(xmlTextWriterPtr writer, xmlDocPtr doc, dict_t *dict)
+{
+ int ret = -1;
+ int i = 0;
+ int snapcount = 0;
+ char *str_value = NULL;
+ char key[PATH_MAX] = "";
+
+ GF_ASSERT(writer);
+ GF_ASSERT(doc);
+ GF_ASSERT(dict);
+
+ /* <snapList> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"snapList");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_int32(dict, "snapcount", &snapcount);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snapcount");
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"count", "%d",
+ snapcount);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ for (i = 1; i <= snapcount; ++i) {
+ ret = snprintf(key, sizeof(key), "snapname%d", i);
+ if (ret < 0) {
+ goto out;
+ }
- while (i < brick_count) {
- i++;
+ ret = dict_get_str(dict, key, &str_value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not get %s ", key);
+ goto out;
+ } else {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"snapshot",
+ "%s", str_value);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+ }
- /* <brick> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"brick");
- XML_RET_CHECK_AND_GOTO (ret, out);
+ /* </snapList> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- snprintf (key, sizeof (key), "%d-brick", i);
- ret = dict_get_str (dict, key, &brick_name);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement
- (writer, (xmlChar *)"brickName", "%s", brick_name);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- snprintf (key, sizeof (key), "%d-cumulative", i);
- ret = dict_get_int32 (dict, key, &interval);
- if (ret == 0) {
- ret = cli_xml_output_vol_profile_stats
- (writer, dict, i, interval);
- if (ret)
- goto out;
- }
+ ret = 0;
+out:
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-interval", i);
- ret = dict_get_int32 (dict, key, &interval);
- if (ret == 0) {
- ret = cli_xml_output_vol_profile_stats
- (writer, dict, i, interval);
- if (ret)
- goto out;
- }
+ return ret;
+}
+
+/* This function will generate xml output for origin volume
+ * of the given snapshot.
+ *
+ * @param writer xmlTextWriterPtr
+ * @param doc xmlDocPtr
+ * @param dict dict containing info output
+ * @param keyprefix prefix for dictionary key
+ *
+ * @return 0 on success and -1 on failure
+ */
+static int
+cli_xml_snapshot_info_orig_vol(xmlTextWriterPtr writer, xmlDocPtr doc,
+ dict_t *dict, char *keyprefix)
+{
+ int ret = -1;
+ int value = 0;
+ char *buffer = NULL;
+ char key[PATH_MAX] = "";
+
+ GF_ASSERT(dict);
+ GF_ASSERT(keyprefix);
+ GF_ASSERT(writer);
+ GF_ASSERT(doc);
+
+ /* <originVolume> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"originVolume");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%sorigin-volname", keyprefix);
+
+ ret = dict_get_str(dict, key, &buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_WARNING, "Failed to get %s", key);
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"name", "%s",
+ buffer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%ssnapcount", keyprefix);
+
+ ret = dict_get_int32(dict, key, &value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get %s", key);
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"snapCount", "%d",
+ value);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%ssnaps-available", keyprefix);
+
+ ret = dict_get_int32(dict, key, &value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get %s", key);
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"snapRemaining",
+ "%d", value);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- /* </brick> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ /* </originVolume> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/* This function will generate xml output of snapshot volume info.
+ *
+ * @param writer xmlTextWriterPtr
+ * @param doc xmlDocPtr
+ * @param dict dict containing info output
+ * @param keyprefix key prefix for dictionary
+ * @param snap_driven boolean to check if output is based of volume
+ * or snapshot
+ *
+ * @return 0 on success and -1 on failure
+ */
+static int
+cli_xml_snapshot_info_snap_vol(xmlTextWriterPtr writer, xmlDocPtr doc,
+ dict_t *dict, char *keyprefix,
+ gf_boolean_t snap_driven)
+{
+ char key[PATH_MAX] = "";
+ char *buffer = NULL;
+ int ret = -1;
+
+ GF_ASSERT(dict);
+ GF_ASSERT(keyprefix);
+ GF_ASSERT(writer);
+ GF_ASSERT(doc);
+
+ /* <snapVolume> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"snapVolume");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = snprintf(key, sizeof(key), "%s.volname", keyprefix);
+ if (ret < 0)
+ goto out;
+
+ ret = dict_get_str(dict, key, &buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get %s", key);
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"name", "%s",
+ buffer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = snprintf(key, sizeof(key), "%s.vol-status", keyprefix);
+ if (ret < 0)
+ goto out;
+
+ ret = dict_get_str(dict, key, &buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get %s", key);
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"status", "%s",
+ buffer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* If the command is snap_driven then we need to show origin volume
+ * info. Else this is shown in the start of info display.*/
+ if (snap_driven) {
+ ret = snprintf(key, sizeof(key), "%s.", keyprefix);
+ if (ret < 0)
+ goto out;
+
+ ret = cli_xml_snapshot_info_orig_vol(writer, doc, dict, key);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to create "
+ "xml output for snapshot's origin volume");
+ goto out;
}
+ }
-cont:
- /* </volProfile> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ /* </snapVolume> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- ret = cli_end_xml_output (writer, buf);
+ ret = 0;
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ return ret;
+}
+/* This function will generate snapshot info of individual snapshot
+ * in xml format.
+ *
+ * @param writer xmlTextWriterPtr
+ * @param doc xmlDocPtr
+ * @param dict dict containing info output
+ * @param keyprefix key prefix for dictionary
+ * @param snap_driven boolean to check if output is based of volume
+ * or snapshot
+ *
+ * @return 0 on success and -1 on failure
+ */
+static int
+cli_xml_snapshot_info_per_snap(xmlTextWriterPtr writer, xmlDocPtr doc,
+ dict_t *dict, char *keyprefix,
+ gf_boolean_t snap_driven)
+{
+ char key_buffer[PATH_MAX] = "";
+ char *buffer = NULL;
+ int volcount = 0;
+ int ret = -1;
+ int i = 0;
+
+ GF_ASSERT(dict);
+ GF_ASSERT(keyprefix);
+ GF_ASSERT(writer);
+ GF_ASSERT(doc);
+
+ /* <snapshot> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"snapshot");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = snprintf(key_buffer, sizeof(key_buffer), "%s.snapname", keyprefix);
+ if (ret < 0)
+ goto out;
+
+ ret = dict_get_str(dict, key_buffer, &buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to fetch snapname %s ", key_buffer);
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"name", "%s",
+ buffer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = snprintf(key_buffer, sizeof(key_buffer), "%s.snap-id", keyprefix);
+ if (ret < 0)
+ goto out;
+
+ ret = dict_get_str(dict, key_buffer, &buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to fetch snap-id %s ", key_buffer);
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"uuid", "%s",
+ buffer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = snprintf(key_buffer, sizeof(key_buffer), "%s.snap-desc", keyprefix);
+ if (ret < 0)
+ goto out;
+
+ ret = dict_get_str(dict, key_buffer, &buffer);
+ if (!ret) {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"description",
+ "%s", buffer);
+ } else {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"description",
+ "%s", "");
+ }
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = snprintf(key_buffer, sizeof(key_buffer), "%s.snap-time", keyprefix);
+ if (ret < 0)
+ goto out;
+
+ ret = dict_get_str(dict, key_buffer, &buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to fetch snap-time %s ", keyprefix);
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"createTime", "%s",
+ buffer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = snprintf(key_buffer, sizeof(key_buffer), "%s.vol-count", keyprefix);
+ if (ret < 0)
+ goto out;
+
+ ret = dict_get_int32(dict, key_buffer, &volcount);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Fail to get snap vol count");
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"volCount", "%d",
+ volcount);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_int32(dict, key_buffer, &volcount);
+ /* Display info of each snapshot volume */
+ for (i = 1; i <= volcount; i++) {
+ ret = snprintf(key_buffer, sizeof(key_buffer), "%s.vol%d", keyprefix,
+ i);
+ if (ret < 0)
+ goto out;
+
+ ret = cli_xml_snapshot_info_snap_vol(writer, doc, dict, key_buffer,
+ snap_driven);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Could not list "
+ "details of volume in a snap");
+ goto out;
+ }
+ }
+
+ /* </snapshot> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+out:
+ return ret;
}
-int
-cli_xml_output_vol_list (dict_t *dict, int op_ret, int op_errno,
- char *op_errstr)
+/* This function will generate snapshot info output in xml format.
+ *
+ * @param writer xmlTextWriterPtr
+ * @param doc xmlDocPtr
+ * @param dict dict containing info output
+ *
+ * @return 0 on success and -1 on failure
+ */
+static int
+cli_xml_snapshot_info(xmlTextWriterPtr writer, xmlDocPtr doc, dict_t *dict)
{
- int ret = -1;
- xmlTextWriterPtr writer = NULL;
- xmlBufferPtr buf = NULL;
- int count = 0;
- char *volname = NULL;
- char key[1024] = {0,};
- int i = 0;
+ int ret = -1;
+ int i = 0;
+ int snapcount = 0;
+ char key[PATH_MAX] = "";
+ gf_boolean_t snap_driven = _gf_false;
+
+ GF_ASSERT(writer);
+ GF_ASSERT(doc);
+ GF_ASSERT(dict);
+
+ /* <snapInfo> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"snapInfo");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snap_driven = dict_get_str_boolean(dict, "snap-driven", _gf_false);
+
+ /* If the approach is volume based then we should display origin volume
+ * information first followed by per snap info*/
+ if (!snap_driven) {
+ ret = cli_xml_snapshot_info_orig_vol(writer, doc, dict, "");
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to create "
+ "xml output for snapshot's origin volume");
+ goto out;
+ }
+ }
- ret = cli_begin_xml_output (&writer, &buf);
- if (ret)
- goto out;
+ ret = dict_get_int32(dict, "snapcount", &snapcount);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snapcount");
+ goto out;
+ }
- ret = cli_xml_output_common (writer, op_ret, op_errno, op_errstr);
- if (ret)
- goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"count", "%d",
+ snapcount);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- /* <volList> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"volList");
- XML_RET_CHECK_AND_GOTO (ret, out);
+ /* <snapshots> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"snapshots");
+ XML_RET_CHECK_AND_GOTO(ret, out);
- ret = dict_get_int32 (dict, "count", &count);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"count",
- "%d", count);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- for (i = 0; i < count; i++) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "volume%d", i);
- ret = dict_get_str (dict, key, &volname);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer,
- (xmlChar *)"volume",
- "%s", volname);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ /* Get snapshot info of individual snapshots */
+ for (i = 1; i <= snapcount; ++i) {
+ snprintf(key, sizeof(key), "snap%d", i);
+
+ ret = cli_xml_snapshot_info_per_snap(writer, doc, dict, key,
+ snap_driven);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not get %s ", key);
+ goto out;
}
+ }
+
+ /* </snapshots> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- /* </volList> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ /* </snapInfo> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- ret = cli_end_xml_output (writer, buf);
+ ret = 0;
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+
+ return ret;
}
-int
-cli_xml_output_vol_info_option (xmlTextWriterPtr writer, char *substr,
- char *optstr, char *valstr)
-{
- int ret = -1;
- char *ptr1 = NULL;
- char *ptr2 = NULL;
-
- ptr1 = substr;
- ptr2 = optstr;
-
- while (ptr1) {
- if (*ptr1 != *ptr2)
- break;
- ptr1++;
- ptr2++;
- if (!ptr1)
- goto out;
- if (!ptr2)
- goto out;
+/* This function will generate snapshot status of individual
+ * snapshot volume in xml format.
+ *
+ * @param writer xmlTextWriterPtr
+ * @param doc xmlDocPtr
+ * @param dict dict containing status output
+ * @param keyprefix key prefix for dictionary
+ *
+ * @return 0 on success and -1 on failure
+ */
+static int
+cli_xml_snapshot_volume_status(xmlTextWriterPtr writer, xmlDocPtr doc,
+ dict_t *dict, const char *keyprefix)
+{
+ int ret = -1;
+ int brickcount = 0;
+ int i = 0;
+ int pid = 0;
+ char *buffer = NULL;
+ char key[PATH_MAX] = "";
+
+ GF_ASSERT(writer);
+ GF_ASSERT(doc);
+ GF_ASSERT(dict);
+ GF_ASSERT(keyprefix);
+
+ ret = snprintf(key, sizeof(key), "%s.brickcount", keyprefix);
+ if (ret < 0)
+ goto out;
+
+ ret = dict_get_int32(dict, key, &brickcount);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to fetch brickcount");
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"brickCount", "%d",
+ brickcount);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* Get status of every brick belonging to the snapshot volume */
+ for (i = 0; i < brickcount; i++) {
+ /* <snapInfo> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"brick");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = snprintf(key, sizeof(key), "%s.brick%d.path", keyprefix, i);
+ if (ret < 0)
+ goto out;
+
+ ret = dict_get_str(dict, key, &buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to get Brick Path");
+ /*
+ * If path itself is not present, then end *
+ * this brick's status and continue to the *
+ * brick *
+ */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ continue;
}
- if (*ptr2 == '\0')
- goto out;
- /* <option> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"option");
- XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"path", "%s",
+ buffer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = snprintf(key, sizeof(key), "%s.brick%d.vgname", keyprefix, i);
+ if (ret < 0)
+ goto out;
+
+ ret = dict_get_str(dict, key, &buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to get Volume Group");
+ ret = xmlTextWriterWriteFormatElement(
+ writer, (xmlChar *)"volumeGroup", "N/A");
+ } else
+ ret = xmlTextWriterWriteFormatElement(
+ writer, (xmlChar *)"volumeGroup", "%s", buffer);
+
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = snprintf(key, sizeof(key), "%s.brick%d.status", keyprefix, i);
+ if (ret < 0)
+ goto out;
+
+ ret = dict_get_str(dict, key, &buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_INFO, "Unable to get Brick Running");
+ ret = xmlTextWriterWriteFormatElement(
+ writer, (xmlChar *)"brick_running", "N/A");
+ } else
+ ret = xmlTextWriterWriteFormatElement(
+ writer, (xmlChar *)"brick_running", "%s", buffer);
+
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = snprintf(key, sizeof(key), "%s.brick%d.pid", keyprefix, i);
+ if (ret < 0)
+ goto out;
+
+ ret = dict_get_int32(dict, key, &pid);
+ if (ret) {
+ gf_log("cli", GF_LOG_INFO, "Unable to get pid");
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"pid",
+ "N/A");
+ } else
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"pid",
+ "%d", pid);
+
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = snprintf(key, sizeof(key), "%s.brick%d.data", keyprefix, i);
+ if (ret < 0)
+ goto out;
+
+ ret = dict_get_str(dict, key, &buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to get Data Percent");
+ ret = xmlTextWriterWriteFormatElement(
+ writer, (xmlChar *)"data_percentage", "N/A");
+ } else
+ ret = xmlTextWriterWriteFormatElement(
+ writer, (xmlChar *)"data_percentage", "%s", buffer);
+
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = snprintf(key, sizeof(key), "%s.brick%d.lvsize", keyprefix, i);
+ if (ret < 0)
+ goto out;
+
+ ret = dict_get_str(dict, key, &buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to get LV Size");
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"lvSize",
+ "N/A");
+ } else {
+ /* Truncate any newline character */
+ buffer = strtok(buffer, "\n");
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"lvSize",
+ "%s", buffer);
+ }
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"name",
- "%s", ptr2);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"value",
- "%s", valstr);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ /* </brick> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
- /* </option> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ return ret;
}
-int
-cli_xml_output_vol_info_options (xmlTextWriterPtr writer, dict_t *dict,
- char *prefix)
+/* This function will generate snapshot status of individual
+ * snapshot in xml format.
+ *
+ * @param writer xmlTextWriterPtr
+ * @param doc xmlDocPtr
+ * @param dict dict containing status output
+ *
+ * @return 0 on success and -1 on failure
+ */
+static int
+cli_xml_snapshot_status_per_snap(xmlTextWriterPtr writer, xmlDocPtr doc,
+ dict_t *dict, const char *keyprefix)
{
- int ret = -1;
- int opt_count = 0;
- data_pair_t *pairs = 0;
- data_t *value = 0;
- char *ptr = NULL;
- char key[1024] = {0,};
- int i = 0;
+ int ret = -1;
+ int volcount = 0;
+ int i = 0;
+ char *buffer = NULL;
+ char key[PATH_MAX] = "";
- pairs = dict->members_list;
- if (!pairs) {
- ret = -1;
- goto out;
+ GF_ASSERT(writer);
+ GF_ASSERT(doc);
+ GF_ASSERT(dict);
+ GF_ASSERT(keyprefix);
+
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"snapshot");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%s.snapname", keyprefix);
+
+ ret = dict_get_str(dict, key, &buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to get snapname");
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"name", "%s",
+ buffer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%s.uuid", keyprefix);
+
+ ret = dict_get_str(dict, key, &buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to get snap UUID");
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"uuid", "%s",
+ buffer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%s.volcount", keyprefix);
+
+ ret = dict_get_int32(dict, key, &volcount);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to get volume count");
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"volCount", "%d",
+ volcount);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* Get snapshot status of individual snapshot volume */
+ for (i = 0; i < volcount; i++) {
+ /* <volume> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"volume");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%s.vol%d", keyprefix, i);
+
+ ret = cli_xml_snapshot_volume_status(writer, doc, dict, key);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not get snap volume status");
+ goto out;
}
- snprintf (key, sizeof (key), "%s.opt_count", prefix);
- ret = dict_get_int32 (dict, key, &opt_count);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"optCount",
- "%d", opt_count);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- /* <options> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"options");
- XML_RET_CHECK_AND_GOTO (ret, out);
- while (i < opt_count) {
- snprintf (key, sizeof (key), "%s.option.", prefix);
- while (pairs) {
- ptr = strstr (pairs->key, "option.");
- if (ptr) {
- value = pairs->value;
- if (!value) {
- ret = -1;
- goto out;
- }
- ret = cli_xml_output_vol_info_option
- (writer, key, pairs->key, value->data);
- if (ret)
- goto out;
- }
- pairs = pairs->next;
- }
- i++;
+ /* </volume> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ /* </snapshot> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/* This function will generate snapshot status output in xml format.
+ *
+ * @param writer xmlTextWriterPtr
+ * @param doc xmlDocPtr
+ * @param dict dict containing status output
+ *
+ * @return 0 on success and -1 on failure
+ */
+static int
+cli_xml_snapshot_status(xmlTextWriterPtr writer, xmlDocPtr doc, dict_t *dict)
+{
+ int ret = -1;
+ int snapcount = 0;
+ int i = 0;
+ int status_cmd = 0;
+ char key[PATH_MAX] = "";
+
+ GF_ASSERT(writer);
+ GF_ASSERT(doc);
+ GF_ASSERT(dict);
+
+ /* <snapStatus> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"snapStatus");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_int32(dict, "sub-cmd", &status_cmd);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not fetch status type");
+ goto out;
+ }
+
+ if ((GF_SNAP_STATUS_TYPE_SNAP == status_cmd) ||
+ (GF_SNAP_STATUS_TYPE_ITER == status_cmd)) {
+ snapcount = 1;
+ } else {
+ ret = dict_get_int32(dict, "status.snapcount", &snapcount);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not get snapcount");
+ goto out;
}
- /* </options> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"count", "%d",
+ snapcount);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ for (i = 0; i < snapcount; i++) {
+ snprintf(key, sizeof(key), "status.snap%d", i);
+
+ ret = cli_xml_snapshot_status_per_snap(writer, doc, dict, key);
+ if (ret < 0) {
+ gf_log("cli", GF_LOG_ERROR,
+ "failed to create xml "
+ "output for snapshot status");
+ goto out;
+ }
+ }
+
+ /* </snapStatus> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = 0;
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+
+ return ret;
}
-int
-cli_xml_output_vol_info (cli_local_t *local, dict_t *dict)
-{
- int ret = 0;
- int count = 0;
- char *volname = NULL;
- char *volume_id = NULL;
- int type = 0;
- int status = 0;
- int brick_count = 0;
- int dist_count = 0;
- int stripe_count = 0;
- int replica_count = 0;
- int transport = 0;
- char *brick = NULL;
- char key[1024] = {0,};
- int i = 0;
- int j = 1;
-
-
- ret = dict_get_int32 (dict, "count", &count);
- if (ret)
- goto out;
+/* This function will generate snapshot config show output in xml format.
+ *
+ * @param writer xmlTextWriterPtr
+ * @param doc xmlDocPtr
+ * @param dict dict containing status output
+ *
+ * @return 0 on success and -1 on failure
+ */
+static int
+cli_xml_snapshot_config_show(xmlTextWriterPtr writer, xmlDocPtr doc,
+ dict_t *dict)
+{
+ int ret = -1;
+ uint64_t i = 0;
+ uint64_t value = 0;
+ uint64_t volcount = 0;
+ char buf[PATH_MAX] = "";
+ char *str_value = NULL;
+
+ GF_ASSERT(writer);
+ GF_ASSERT(doc);
+ GF_ASSERT(dict);
+
+ /* <systemConfig> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"systemConfig");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_uint64(dict, "snap-max-hard-limit", &value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to get "
+ "snap-max-hard-limit");
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"hardLimit",
+ "%" PRIu64, value);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_uint64(dict, "snap-max-soft-limit", &value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to get "
+ "snap-max-soft-limit");
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"softLimit",
+ "%" PRIu64 "%%", value);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_str(dict, "auto-delete", &str_value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not fetch auto-delete");
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"autoDelete", "%s",
+ str_value);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_str(dict, "snap-activate-on-create", &str_value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Could not fetch snap-activate-on-create-delete");
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"activateOnCreate",
+ "%s", str_value);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* </systemConfig> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* <volumeConfig> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"volumeConfig");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_uint64(dict, "voldisplaycount", &volcount);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not fetch volcount");
+ goto out;
+ }
+
+ /* Get config of all the volumes */
+ for (i = 0; i < volcount; i++) {
+ /* <volume> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"volume");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(buf, sizeof(buf), "volume%" PRIu64 "-volname", i);
+ ret = dict_get_str(dict, buf, &str_value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not fetch %s", buf);
+ goto out;
+ }
- for (i = 0; i < count; i++) {
- /* <volume> */
- ret = xmlTextWriterStartElement (local->writer,
- (xmlChar *)"volume");
- XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"name", "%s",
+ str_value);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "volume%d.name", i);
- ret = dict_get_str (dict, key, &volname);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (local->writer,
- (xmlChar *)"name",
- "%s", volname);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "volume%d.volume_id", i);
- ret = dict_get_str (dict, key, &volume_id);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (local->writer,
- (xmlChar *)"id",
- "%s", volume_id);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "volume%d.type", i);
- ret = dict_get_int32 (dict, key, &type);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (local->writer,
- (xmlChar *)"type",
- "%d", type);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "volume%d.status", i);
- ret = dict_get_int32 (dict, key, &status);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (local->writer,
- (xmlChar *)"status",
- "%d", status);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "volume%d.brick_count", i);
- ret = dict_get_int32 (dict, key, &brick_count);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (local->writer,
- (xmlChar *)"brickCount",
- "%d", brick_count);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "volume%d.dist_count", i);
- ret = dict_get_int32 (dict, key, &dist_count);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (local->writer,
- (xmlChar *)"distCount",
- "%d", dist_count);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "volume%d.stripe_count", i);
- ret = dict_get_int32 (dict, key, &stripe_count);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (local->writer,
- (xmlChar *)"stripeCount",
- "%d", stripe_count);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "volume%d.replica_count", i);
- ret = dict_get_int32 (dict, key, &replica_count);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (local->writer,
- (xmlChar *)"replicaCount",
- "%d", replica_count);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "volume%d.transport", i);
- ret = dict_get_int32 (dict, key, &transport);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (local->writer,
- (xmlChar *)"transport",
- "%d", transport);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- /* <bricks> */
- ret = xmlTextWriterStartElement (local->writer,
- (xmlChar *)"bricks");
- XML_RET_CHECK_AND_GOTO (ret, out);
- while (j <= brick_count) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "volume%d.brick%d", i, j);
- ret = dict_get_str (dict, key, &brick);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement
- (local->writer, (xmlChar *)"brick", "%s",
- brick);
- XML_RET_CHECK_AND_GOTO (ret, out);
- j++;
- }
- /* </bricks> */
- ret = xmlTextWriterEndElement (local->writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "volume%d", i);
- ret = cli_xml_output_vol_info_options (local->writer, dict,
- key);
- if (ret)
- goto out;
+ snprintf(buf, sizeof(buf), "volume%" PRIu64 "-snap-max-hard-limit", i);
+ ret = dict_get_uint64(dict, buf, &value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not fetch %s", buf);
+ goto out;
+ }
- /* </volume> */
- ret = xmlTextWriterEndElement (local->writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"hardLimit",
+ "%" PRIu64, value);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(buf, sizeof(buf), "volume%" PRIu64 "-active-hard-limit", i);
+ ret = dict_get_uint64(dict, buf, &value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Could not fetch"
+ " effective snap_max_hard_limit for "
+ "%s",
+ str_value);
+ goto out;
}
- GF_FREE (local->get_vol.volname);
- local->get_vol.volname = gf_strdup (volname);
- local->vol_count += count;
+ ret = xmlTextWriterWriteFormatElement(
+ writer, (xmlChar *)"effectiveHardLimit", "%" PRIu64, value);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(buf, sizeof(buf), "volume%" PRIu64 "-snap-max-soft-limit", i);
+ ret = dict_get_uint64(dict, buf, &value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not fetch %s", buf);
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"softLimit",
+ "%" PRIu64, value);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* </volume> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ /* </volume> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/* This function will generate snapshot config set output in xml format.
+ *
+ * @param writer xmlTextWriterPtr
+ * @param doc xmlDocPtr
+ * @param dict dict containing status output
+ *
+ * @return 0 on success and -1 on failure
+ */
+static int
+cli_xml_snapshot_config_set(xmlTextWriterPtr writer, xmlDocPtr doc,
+ dict_t *dict)
+{
+ int ret = -1;
+ uint64_t hard_limit = 0;
+ uint64_t soft_limit = 0;
+ char *volname = NULL;
+ char *auto_delete = NULL;
+ char *snap_activate = NULL;
+
+ GF_ASSERT(writer);
+ GF_ASSERT(doc);
+ GF_ASSERT(dict);
+
+ /* This is optional parameter therefore ignore the error */
+ ret = dict_get_uint64(dict, "snap-max-hard-limit", &hard_limit);
+ /* This is optional parameter therefore ignore the error */
+ ret = dict_get_uint64(dict, "snap-max-soft-limit", &soft_limit);
+ ret = dict_get_str(dict, "auto-delete", &auto_delete);
+ ret = dict_get_str(dict, "snap-activate-on-create", &snap_activate);
+
+ if (!hard_limit && !soft_limit && !auto_delete && !snap_activate) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR,
+ "At least one option from "
+ "snap-max-hard-limit, snap-max-soft-limit, auto-delete"
+ " and snap-activate-on-create should be set");
+ goto out;
+ }
+
+ /* Ignore the error, as volname is optional */
+ ret = dict_get_str(dict, "volname", &volname);
+
+ if (NULL == volname) {
+ /* <systemConfig> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"systemConfig");
+ } else {
+ /* <volumeConfig> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"volumeConfig");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"name", "%s",
+ volname);
+ }
+
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ if (hard_limit) {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"newHardLimit",
+ "%" PRIu64, hard_limit);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ if (soft_limit) {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"newSoftLimit",
+ "%" PRIu64, soft_limit);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ if (auto_delete) {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"autoDelete",
+ "%s", auto_delete);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ if (snap_activate) {
+ ret = xmlTextWriterWriteFormatElement(
+ writer, (xmlChar *)"activateOnCreate", "%s", snap_activate);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ /* </volumeConfig> or </systemConfig> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = 0;
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ return ret;
}
+/* This function will generate snapshot config output in xml format.
+ *
+ * @param writer xmlTextWriterPtr
+ * @param doc xmlDocPtr
+ * @param dict dict containing config output
+ *
+ * @return 0 on success and -1 on failure
+ */
+static int
+cli_xml_snapshot_config(xmlTextWriterPtr writer, xmlDocPtr doc, dict_t *dict)
+{
+ int ret = -1;
+ int config_command = 0;
+
+ GF_ASSERT(writer);
+ GF_ASSERT(doc);
+ GF_ASSERT(dict);
+
+ /* <snapConfig> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"snapConfig");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_int32(dict, "config-command", &config_command);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not fetch config type");
+ goto out;
+ }
+
+ switch (config_command) {
+ case GF_SNAP_CONFIG_TYPE_SET:
+ ret = cli_xml_snapshot_config_set(writer, doc, dict);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to create xml "
+ "output for snapshot config set command");
+ goto out;
+ }
+
+ break;
+ case GF_SNAP_CONFIG_DISPLAY:
+ ret = cli_xml_snapshot_config_show(writer, doc, dict);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to create xml "
+ "output for snapshot config show command");
+ goto out;
+ }
+ break;
+ default:
+ gf_log("cli", GF_LOG_ERROR, "Unknown config command :%d",
+ config_command);
+ ret = -1;
+ goto out;
+ }
+
+ /* </snapConfig> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = 0;
+out:
+
+ return ret;
+}
+
+/* This function will generate snapshot activate or
+ * deactivate output in xml format.
+ *
+ * @param writer xmlTextWriterPtr
+ * @param doc xmlDocPtr
+ * @param dict dict containing activate or deactivate output
+ *
+ * @return 0 on success and -1 on failure
+ */
+static int
+cli_xml_snapshot_activate_deactivate(xmlTextWriterPtr writer, xmlDocPtr doc,
+ dict_t *dict, int cmd)
+{
+ int ret = -1;
+ char *buffer = NULL;
+ char *tag = NULL;
+
+ GF_ASSERT(writer);
+ GF_ASSERT(doc);
+ GF_ASSERT(dict);
+
+ if (GF_SNAP_OPTION_TYPE_ACTIVATE == cmd) {
+ tag = "snapActivate";
+ } else if (GF_SNAP_OPTION_TYPE_DEACTIVATE == cmd) {
+ tag = "snapDeactivate";
+ } else {
+ gf_log("cli", GF_LOG_ERROR, "invalid command %d", cmd);
+ goto out;
+ }
+
+ /* <snapActivate> or <snapDeactivate> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)tag);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* <snapshot> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"snapshot");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_str(dict, "snapname", &buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snap name");
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"name", "%s",
+ buffer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_str(dict, "snapuuid", &buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snap uuid");
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"uuid", "%s",
+ buffer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* </snapshot> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* </snapActivate> or </snapDeactivate> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = 0;
+out:
+
+ return ret;
+}
+#endif /* HAVE_LIB_XML */
+
+/* This function will generate snapshot delete output in xml format.
+ *
+ * @param writer xmlTextWriterPtr
+ * @param doc xmlDocPtr
+ * @param dict dict containing delete output
+ * @param rsp cli response
+ *
+ * @return 0 on success and -1 on failure
+ */
int
-cli_xml_output_vol_info_begin (cli_local_t *local, int op_ret, int op_errno,
- char *op_errstr)
+cli_xml_snapshot_delete(cli_local_t *local, dict_t *dict, gf_cli_rsp *rsp)
{
- int ret = -1;
+ int ret = -1;
+#ifdef HAVE_LIB_XML
+ char *str_value = NULL;
+ xmlTextWriterPtr writer = local->writer;
+ xmlDocPtr doc = local->doc;
+
+ GF_ASSERT(writer);
+ GF_ASSERT(doc);
+ GF_ASSERT(dict);
+
+ /* <snapshot> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"snapshot");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_str(dict, "snapname", &str_value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snap name");
+ goto xmlend;
+ }
+
+ if (!rsp->op_ret) {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"status",
+ "Success");
+ } else {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"status",
+ "Failure");
+ XML_RET_CHECK_AND_GOTO(ret, xmlend);
+
+ ret = cli_xml_output_common(writer, rsp->op_ret, rsp->op_errno,
+ rsp->op_errstr);
+ }
+ XML_RET_CHECK_AND_GOTO(ret, xmlend);
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"name", "%s",
+ str_value);
+ XML_RET_CHECK_AND_GOTO(ret, xmlend);
+
+ ret = dict_get_str(dict, "snapuuid", &str_value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snap uuid");
+ goto xmlend;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"uuid", "%s",
+ str_value);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+xmlend:
+ /* </snapshot> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+#endif /* HAVE_LIB_XML */
+ ret = 0;
+out:
- GF_ASSERT (local);
+ return ret;
+}
- ret = cli_begin_xml_output (&(local->writer), &(local->buf));
- if (ret)
- goto out;
+int
+cli_xml_output_snap_status_begin(cli_local_t *local, int op_ret, int op_errno,
+ char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
- ret = cli_xml_output_common (local->writer, op_ret, op_errno,
- op_errstr);
- if (ret)
- goto out;
+ GF_ASSERT(local);
+
+ ret = cli_begin_xml_output(&(local->writer), &(local->doc));
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = cli_xml_output_common(local->writer, op_ret, op_errno, op_errstr);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* <snapStatus> */
+ ret = xmlTextWriterStartElement(local->writer, (xmlChar *)"snapStatus");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* <snapshots> */
+ ret = xmlTextWriterStartElement(local->writer, (xmlChar *)"snapshots");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+out:
+ gf_log("cli", GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+int
+cli_xml_output_snap_status_end(cli_local_t *local)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
- /* <volInfo> */
- ret = xmlTextWriterStartElement (local->writer, (xmlChar *)"volInfo");
- XML_RET_CHECK_AND_GOTO (ret, out);
+ GF_ASSERT(local);
- /* <volumes> */
- ret = xmlTextWriterStartElement (local->writer, (xmlChar *)"volumes");
- XML_RET_CHECK_AND_GOTO (ret, out);
+ /* </snapshots> */
+ ret = xmlTextWriterEndElement(local->writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- /* Init vol count */
- local->vol_count = 0;
+ /* </snapStatus> */
+ ret = xmlTextWriterEndElement(local->writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ ret = cli_end_xml_output(local->writer, local->doc);
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_log("cli", GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
}
int
-cli_xml_output_vol_info_end (cli_local_t *local)
+cli_xml_output_snap_delete_begin(cli_local_t *local, int op_ret, int op_errno,
+ char *op_errstr)
{
- int ret = -1;
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ int delete_cmd = -1;
- GF_ASSERT (local);
+ GF_ASSERT(local);
- ret = xmlTextWriterWriteFormatElement (local->writer,
- (xmlChar *)"count",
- "%d", local->vol_count);
+ ret = cli_begin_xml_output(&(local->writer), &(local->doc));
+ XML_RET_CHECK_AND_GOTO(ret, out);
- /* </volumes> */
- ret = xmlTextWriterEndElement (local->writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = dict_get_int32(local->dict, "sub-cmd", &delete_cmd);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get sub-cmd");
+ goto out;
+ }
- /* </volInfo> */
- ret = xmlTextWriterEndElement (local->writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = cli_xml_output_common(local->writer, op_ret, op_errno, op_errstr);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- ret = cli_end_xml_output (local->writer, local->buf);
+ /* <snapStatus> */
+ ret = xmlTextWriterStartElement(local->writer, (xmlChar *)"snapDelete");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* <snapshots> */
+ ret = xmlTextWriterStartElement(local->writer, (xmlChar *)"snapshots");
+ XML_RET_CHECK_AND_GOTO(ret, out);
out:
- gf_log ("cli", GF_LOG_ERROR, "Returning %d", ret);
- return ret;
+ gf_log("cli", GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
}
int
-cli_xml_output_vol_quota_limit_list (char *volname, char *limit_list,
- int op_ret, int op_errno,
- char *op_errstr)
-{
- int ret = -1;
- xmlTextWriterPtr writer = NULL;
- xmlBufferPtr buf = NULL;
- int64_t size = 0;
- int64_t limit_value = 0;
- int i = 0;
- int j = 0;
- int k = 0;
- int len = 0;
- char *size_str = NULL;
- char path[PATH_MAX] = {0,};
- char ret_str[1024] = {0,};
- char value[1024] = {0,};
- char mountdir[] = "/tmp/mountXXXXXX";
- char abspath[PATH_MAX] = {0,};
- runner_t runner = {0,};
-
- GF_ASSERT (volname);
- GF_ASSERT (limit_list);
-
- ret = cli_begin_xml_output (&writer, &buf);
- if (ret)
- goto out;
+cli_xml_output_snap_delete_end(cli_local_t *local)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
- ret = cli_xml_output_common (writer, op_ret, op_errno, op_errstr);
- if (ret)
+ GF_ASSERT(local);
+
+ /* </snapshots> */
+ ret = xmlTextWriterEndElement(local->writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* </snapDelete> */
+ ret = xmlTextWriterEndElement(local->writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = cli_end_xml_output(local->writer, local->doc);
+out:
+ gf_log("cli", GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+/* This function will generate xml output for all the snapshot commands
+ *
+ * @param cmd_type command type
+ * @param dict dict containing snapshot command output
+ * @param op_ret return value of the snapshot command
+ * @param op_errno errno for the snapshot command
+ * @param op_errstr error string for the snapshot command
+ *
+ * @return 0 on success and -1 on failure
+ */
+int
+cli_xml_output_snapshot(int cmd_type, dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
+
+ GF_ASSERT(dict);
+
+ ret = cli_begin_xml_output(&writer, &doc);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to output "
+ "xml begin block");
+ goto out;
+ }
+
+ ret = cli_xml_output_common(writer, op_ret, op_errno, op_errstr);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to output "
+ "xml common block");
+ goto out;
+ }
+
+ /* In case of command failure just printing the error message is good
+ * enough */
+ if (0 != op_ret) {
+ goto end;
+ }
+
+ switch (cmd_type) {
+ case GF_SNAP_OPTION_TYPE_CREATE:
+ ret = cli_xml_snapshot_create(writer, doc, dict);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to create "
+ "xml output for snapshot create command");
+ goto out;
+ }
+ break;
+ case GF_SNAP_OPTION_TYPE_CLONE:
+ ret = cli_xml_snapshot_clone(writer, doc, dict);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to create "
+ "xml output for snapshot clone command");
+ goto out;
+ }
+ break;
+ case GF_SNAP_OPTION_TYPE_RESTORE:
+ ret = cli_xml_snapshot_restore(writer, doc, dict);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to create "
+ "xml output for snapshot restore command");
goto out;
+ }
+ break;
+ case GF_SNAP_OPTION_TYPE_LIST:
+ ret = cli_xml_snapshot_list(writer, doc, dict);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to create "
+ "xml output for snapshot list command");
+ goto out;
+ }
+ break;
+ case GF_SNAP_OPTION_TYPE_STATUS:
+ ret = cli_xml_snapshot_status(writer, doc, dict);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to create"
+ "xml output for snapshot status command");
+ goto out;
+ }
+ break;
+ case GF_SNAP_OPTION_TYPE_INFO:
+ ret = cli_xml_snapshot_info(writer, doc, dict);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to create "
+ "xml output for snapshot info command");
+ goto out;
+ }
+ break;
+ case GF_SNAP_OPTION_TYPE_ACTIVATE:
+ case GF_SNAP_OPTION_TYPE_DEACTIVATE:
+ ret = cli_xml_snapshot_activate_deactivate(writer, doc, dict,
+ cmd_type);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to create "
+ "xml output for snapshot config command");
+ }
+ break;
+ case GF_SNAP_OPTION_TYPE_CONFIG:
+ ret = cli_xml_snapshot_config(writer, doc, dict);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to create "
+ "xml output for snapshot config command");
+ }
+ break;
+ default:
+ gf_log("cli", GF_LOG_ERROR, "Unexpected snapshot command: %d",
+ cmd_type);
+ goto out;
+ }
+
+end:
+ ret = cli_end_xml_output(writer, doc);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to output "
+ "xml end block");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+#else
+ return 0;
+#endif /* HAVE_LIB_XML */
+}
- /* <volQuota> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"volQuota");
- XML_RET_CHECK_AND_GOTO (ret, out);
+int
+cli_xml_snapshot_begin_composite_op(cli_local_t *local)
+{
+ int ret = -1;
+#ifdef HAVE_LIB_XML
+ int cmd = -1;
+ int type = -1;
+
+ ret = dict_get_int32(local->dict, "sub-cmd", &cmd);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to get "
+ "sub-cmd");
+ ret = 0;
+ goto out;
+ }
+
+ if (cmd == GF_SNAP_STATUS_TYPE_ITER || cmd == GF_SNAP_DELETE_TYPE_SNAP) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = dict_get_int32(local->dict, "type", &type);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to get snapshot "
+ "command type from dictionary");
+ goto out;
+ }
+
+ if (GF_SNAP_OPTION_TYPE_STATUS == type)
+ ret = cli_xml_output_snap_status_begin(local, 0, 0, NULL);
+ else if (GF_SNAP_OPTION_TYPE_DELETE == type)
+ ret = cli_xml_output_snap_delete_begin(local, 0, 0, NULL);
+
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Error creating xml output");
+ goto out;
+ }
+
+#endif /* HAVE_LIB_XML */
+ ret = 0;
+out:
+ return ret;
+}
- if (!limit_list)
- goto cont;
+int
+cli_xml_snapshot_end_composite_op(cli_local_t *local)
+{
+ int ret = -1;
+#ifdef HAVE_LIB_XML
+ int cmd = -1;
+ int type = -1;
+
+ ret = dict_get_int32(local->dict, "sub-cmd", &cmd);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to get "
+ "sub-cmd");
+ ret = 0;
+ goto out;
+ }
+
+ if (cmd == GF_SNAP_STATUS_TYPE_ITER || cmd == GF_SNAP_DELETE_TYPE_SNAP) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = dict_get_int32(local->dict, "type", &type);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to get snapshot "
+ "command type from dictionary");
+ goto out;
+ }
+
+ if (GF_SNAP_OPTION_TYPE_STATUS == type)
+ ret = cli_xml_output_snap_status_end(local);
+ else if (GF_SNAP_OPTION_TYPE_DELETE == type)
+ ret = cli_xml_output_snap_delete_end(local);
+
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Error creating xml "
+ "output");
+ goto out;
+ }
+#endif /* HAVE_LIB_XML */
+ ret = 0;
+out:
+ return ret;
+}
- len = strlen (limit_list);
- if (len == 0)
- goto cont;
+int
+cli_xml_snapshot_status_single_snap(cli_local_t *local, dict_t *dict, char *key)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
- if (mkdtemp (mountdir) == NULL) {
- gf_log ("cli", GF_LOG_ERROR, "failed to create a temporary"
- " mount directory");
- ret = -1;
- goto out;
- }
+ GF_VALIDATE_OR_GOTO("cli", (local != NULL), out);
+ GF_VALIDATE_OR_GOTO("cli", (dict != NULL), out);
+ GF_VALIDATE_OR_GOTO("cli", (key != NULL), out);
+
+ ret = cli_xml_snapshot_status_per_snap(local->writer, local->doc, dict,
+ key);
+out:
+ return ret;
+#else
+ return 0;
+#endif /* HAVE_LIB_XML */
+}
- ret = runcmd (SBIN_DIR"/glusterfs", "-s", "localhost",
- "--volfile-id", volname, "-l",
- DEFAULT_LOG_FILE_DIRECTORY"/quota-list-xml.log",
- mountdir, NULL);
+int
+cli_xml_output_vol_getopts(dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int i = 0;
+ int ret = -1;
+ int count = 0;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
+ char *key = NULL;
+ char *value = NULL;
+ char dict_key[50] = {
+ 0,
+ };
+
+ ret = cli_begin_xml_output(&writer, &doc);
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common(writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"volGetopts");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_int32(dict, "count", &count);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to retrieve count "
+ "from the dictionary");
+ goto out;
+ }
+ if (count <= 0) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Value of count :%d is "
+ "invalid",
+ count);
+ ret = -1;
+ goto out;
+ }
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"count", "%d",
+ count);
+
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ for (i = 1; i <= count; i++) {
+ sprintf(dict_key, "key%d", i);
+ ret = dict_get_str(dict, dict_key, &key);
if (ret) {
- gf_log ("cli", GF_LOG_ERROR,
- "failed to mount glusterfs client");
- ret = -1;
- goto rm_dir;
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to"
+ " retrieve %s from the "
+ "dictionary",
+ dict_key);
+ goto out;
}
+ sprintf(dict_key, "value%d", i);
+ ret = dict_get_str(dict, dict_key, &value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to "
+ "retrieve key value for %s from"
+ "the dictionary",
+ dict_key);
+ goto out;
+ }
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"Opt");
+ XML_RET_CHECK_AND_GOTO(ret, out);
- while (i < len) {
- j = 0;
- k = 0;
- size = 0;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"Option", "%s",
+ key);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- while (limit_list[i] != ':')
- path[k++] = limit_list[i++];
- path[k] = '\0';
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"Value", "%s",
+ value);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- i++;
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+ ret = cli_end_xml_output(writer, doc);
- while (limit_list[i] != ',' && limit_list[i] != '\0')
- value[j++] = limit_list[i++];
- i++;
+out:
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif /* HAVE_LIB_XML */
+}
- snprintf (abspath, sizeof (abspath), "%s/%s", mountdir, path);
- ret = sys_lgetxattr (abspath, "trusted.limit.list",
- (void *)ret_str, 4096);
- if (ret >= 0) {
- sscanf (ret_str, "%"SCNd64",%"SCNd64, &size,
- &limit_value);
- size_str = gf_uint64_2human_readable ((uint64_t)size);
- }
+int
+cli_quota_list_xml_error(cli_local_t *local, char *path, char *errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
- /* <quota> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"quota");
- XML_RET_CHECK_AND_GOTO (ret, unmount);
-
- ret = xmlTextWriterWriteFormatElement
- (writer, (xmlChar *)"path", "%s", path);
- XML_RET_CHECK_AND_GOTO (ret, unmount);
-
- ret = xmlTextWriterWriteFormatElement
- (writer, (xmlChar *)"limit", "%s", value);
- XML_RET_CHECK_AND_GOTO (ret, unmount);
-
- if (size_str) {
- ret = xmlTextWriterWriteFormatElement
- (writer, (xmlChar *)"size", "%s", size_str);
- XML_RET_CHECK_AND_GOTO (ret, unmount);
- GF_FREE (size_str);
- } else {
- ret = xmlTextWriterWriteFormatElement
- (writer, (xmlChar *)"size", "%"PRId64, size);
- XML_RET_CHECK_AND_GOTO (ret, unmount);
- }
+ ret = xmlTextWriterStartElement(local->writer, (xmlChar *)"limit");
+ XML_RET_CHECK_AND_GOTO(ret, out);
- /* </quota> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, unmount);
+ ret = xmlTextWriterWriteFormatElement(local->writer, (xmlChar *)"path",
+ "%s", path);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- }
+ ret = xmlTextWriterWriteFormatElement(local->writer, (xmlChar *)"errstr",
+ "%s", errstr);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterEndElement(local->writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
-unmount:
- runinit (&runner);
- runner_add_args (&runner, "umount",
-#if GF_LINUX_HOST_OS
- "-l",
+out:
+ return ret;
+#else
+ return 0;
#endif
- mountdir, NULL);
- ret = runner_run_reuse (&runner);
- if (ret)
- runner_log (&runner, "cli", GF_LOG_WARNING, "error executing");
- runner_end (&runner);
+}
-rm_dir:
- rmdir (mountdir);
+int
+cli_quota_xml_output(cli_local_t *local, char *path, int64_t hl_str,
+ char *sl_final, int64_t sl_num, int64_t used,
+ int64_t avail, char *sl, char *hl, gf_boolean_t limit_set)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
-cont:
- /* </volQuota> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = xmlTextWriterStartElement(local->writer, (xmlChar *)"limit");
+ XML_RET_CHECK_AND_GOTO(ret, out);
- ret = cli_end_xml_output (writer, buf);
+ ret = xmlTextWriterWriteFormatElement(local->writer, (xmlChar *)"path",
+ "%s", path);
+ XML_RET_CHECK_AND_GOTO(ret, out);
-out:
- if (size_str)
- GF_FREE (size_str);
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
-}
+ ret = xmlTextWriterWriteFormatElement(
+ local->writer, (xmlChar *)"hard_limit", !limit_set ? "N/A" : "%" PRId64,
+ hl_str);
+ XML_RET_CHECK_AND_GOTO(ret, out);
-int
-cli_xml_output_peer_status (dict_t *dict, int op_ret, int op_errno,
- char *op_errstr)
-{
- int ret = -1;
- xmlTextWriterPtr writer = NULL;
- xmlBufferPtr buf = NULL;
- int count = 0;
- char *uuid = NULL;
- char *hostname = NULL;
- int connected = 0;
- int state_id = 0;
- char *state_str = NULL;
- int port = 0;
- int i = 1;
- char key[1024] = {0,};
-
- ret = cli_begin_xml_output (&writer, &buf);
- if (ret)
- goto out;
+ ret = xmlTextWriterWriteFormatElement(local->writer,
+ (xmlChar *)"soft_limit_percent",
+ !limit_set ? "N/A" : "%s", sl_final);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- ret = cli_xml_output_common (writer, op_ret, op_errno, op_errstr);
- if (ret)
- goto out;
+ ret = xmlTextWriterWriteFormatElement(
+ local->writer, (xmlChar *)"soft_limit_value",
+ !limit_set ? "N/A" : "%" PRId64, sl_num);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- /* <peerStatus> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"peerStatus");
- XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = xmlTextWriterWriteFormatElement(
+ local->writer, (xmlChar *)"used_space", "%" PRId64, used);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- if (!dict)
- goto cont;
+ ret = xmlTextWriterWriteFormatElement(
+ local->writer, (xmlChar *)"avail_space",
+ !limit_set ? "N/A" : "%" PRId64, avail);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- ret = dict_get_int32 (dict, "count", &count);
- if (ret)
- goto out;
+ ret = xmlTextWriterWriteFormatElement(
+ local->writer, (xmlChar *)"sl_exceeded", !limit_set ? "N/A" : "%s", sl);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- while (i <= count) {
- /* <peer> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"peer");
- XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = xmlTextWriterWriteFormatElement(
+ local->writer, (xmlChar *)"hl_exceeded", !limit_set ? "N/A" : "%s", hl);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "friend%d.uuid", i);
- ret = dict_get_str (dict, key, &uuid);
- if (ret)
- goto out;
+ ret = xmlTextWriterEndElement(local->writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- ret = xmlTextWriterWriteFormatElement (writer,
- (xmlChar *)"uuid",
- "%s", uuid);
- XML_RET_CHECK_AND_GOTO (ret, out);
+out:
+ return ret;
+#else
+ return 0;
+#endif /* HAVE_LIB_XML */
+}
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "friend%d.hostname", i);
- ret = dict_get_str (dict, key, &hostname);
- if (ret)
- goto out;
+int
+cli_quota_object_xml_output(cli_local_t *local, char *path, char *sl_str,
+ int64_t sl_val, quota_limits_t *limits,
+ quota_meta_t *used_space, int64_t avail, char *sl,
+ char *hl, gf_boolean_t limit_set)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
- ret = xmlTextWriterWriteFormatElement (writer,
- (xmlChar *)"hostname",
- "%s", hostname);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = xmlTextWriterStartElement(local->writer, (xmlChar *)"limit");
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "friend%d.connected", i);
- ret = dict_get_int32 (dict, key, &connected);
- if (ret)
- goto out;
+ ret = xmlTextWriterWriteFormatElement(local->writer, (xmlChar *)"path",
+ "%s", path);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- ret = xmlTextWriterWriteFormatElement (writer,
- (xmlChar *)"connected",
- "%d", connected);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = xmlTextWriterWriteFormatElement(
+ local->writer, (xmlChar *)"hard_limit", !limit_set ? "N/A" : "%" PRId64,
+ limits->hl);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "friend%d.stateId", i);
- ret = dict_get_int32 (dict, key, &state_id);
- if (ret)
- goto out;
+ ret = xmlTextWriterWriteFormatElement(local->writer,
+ (xmlChar *)"soft_limit_percent",
+ !limit_set ? "N/A" : "%s", sl_str);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- ret = xmlTextWriterWriteFormatElement (writer,
- (xmlChar *)"state",
- "%d", state_id);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = xmlTextWriterWriteFormatElement(
+ local->writer, (xmlChar *)"soft_limit_value",
+ !limit_set ? "N/A" : "%" PRIu64, sl_val);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "friend%d.state", i);
- ret = dict_get_str (dict, key, &state_str);
- if (ret)
- goto out;
+ ret = xmlTextWriterWriteFormatElement(local->writer,
+ (xmlChar *)"file_count", "%" PRId64,
+ used_space->file_count);
- ret = xmlTextWriterWriteFormatElement (writer,
- (xmlChar *)"stateStr",
- "%s", state_str);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "friend%d.port", i);
- ret = dict_get_int32 (dict, key, &port);
- if (port != 0) {
- ret = xmlTextWriterWriteFormatElement
- (writer, (xmlChar *)"port", "%d", port);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = xmlTextWriterWriteFormatElement(local->writer, (xmlChar *)"dir_count",
+ "%" PRIu64, used_space->dir_count);
- port = 0;
- }
+ XML_RET_CHECK_AND_GOTO(ret, out);
- /* </peer> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = xmlTextWriterWriteFormatElement(local->writer, (xmlChar *)"available",
+ !limit_set ? "N/A" : "%" PRId64,
+ avail);
- i++;
- }
+ XML_RET_CHECK_AND_GOTO(ret, out);
-cont:
- /* </peerStatus> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = xmlTextWriterWriteFormatElement(
+ local->writer, (xmlChar *)"sl_exceeded", !limit_set ? "N/A" : "%s", sl);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- ret = cli_end_xml_output (writer, buf);
+ ret = xmlTextWriterWriteFormatElement(
+ local->writer, (xmlChar *)"hl_exceeded", !limit_set ? "N/A" : "%s", hl);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterEndElement(local->writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ return ret;
+#else
+ return 0;
+#endif /* HAVE_LIB_XML */
}
-
-#endif
diff --git a/cli/src/cli.c b/cli/src/cli.c
index 6a5ecf698c9..a52b39c5fb8 100644
--- a/cli/src/cli.c
+++ b/cli/src/cli.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2010-2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
@@ -39,675 +29,884 @@
#include <semaphore.h>
#include <errno.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#ifdef HAVE_MALLOC_H
#include <malloc.h>
#endif
-#ifdef HAVE_MALLOC_STATS
-#ifdef DEBUG
-#include <mcheck.h>
-#endif
-#endif
-
#include "cli.h"
+#include "cli-quotad-client.h"
#include "cli-cmd.h"
#include "cli-mem-types.h"
-#include "xlator.h"
-#include "glusterfs.h"
-#include "compat.h"
-#include "logging.h"
-#include "dict.h"
-#include "list.h"
-#include "timer.h"
-#include "stack.h"
-#include "revision.h"
-#include "common-utils.h"
-#include "event.h"
-#include "globals.h"
-#include "syscall.h"
-#include "call-stub.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/list.h>
+#include <glusterfs/timer.h>
+#include <glusterfs/stack.h>
+#include <glusterfs/revision.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/gf-event.h>
+#include <glusterfs/syscall.h>
+#include <glusterfs/call-stub.h>
#include <fnmatch.h>
#include "xdr-generic.h"
-extern int connected;
/* using argp for command line parsing */
-const char *argp_program_version = "" \
- PACKAGE_NAME" "PACKAGE_VERSION" built on "__DATE__" "__TIME__ \
- "\nRepository revision: " GLUSTERFS_REPOSITORY_REVISION "\n" \
- "Copyright (c) 2006-2011 Gluster Inc. " \
- "<http://www.gluster.com>\n" \
- "GlusterFS comes with ABSOLUTELY NO WARRANTY.\n" \
- "You may redistribute copies of GlusterFS under the terms of "\
- "the GNU General Public License.";
-
+const char *argp_program_version =
+ "" PACKAGE_NAME " " PACKAGE_VERSION
+ "\nRepository revision: " GLUSTERFS_REPOSITORY_REVISION
+ "\n"
+ "Copyright (c) 2006-2016 Red Hat, Inc. "
+ "<https://www.gluster.org/>\n"
+ "GlusterFS comes with ABSOLUTELY NO WARRANTY.\n"
+ "It is licensed to you under your choice of the GNU Lesser\n"
+ "General Public License, version 3 or any later version (LGPLv3\n"
+ "or later), or the GNU General Public License, version 2 (GPLv2),\n"
+ "in all cases as published by the Free Software Foundation.";
const char *argp_program_bug_address = "<" PACKAGE_BUGREPORT ">";
-
+struct rpc_clnt *global_quotad_rpc;
struct rpc_clnt *global_rpc;
rpc_clnt_prog_t *cli_rpc_prog;
-
extern struct rpc_clnt_program cli_prog;
+int cli_default_conn_timeout = 120;
+int cli_ten_minutes_timeout = 600;
-
-
-static char *
-generate_uuid ()
+static int
+glusterfs_ctx_defaults_init(glusterfs_ctx_t *ctx)
{
- char tmp_str[1024] = {0,};
- char hostname[256] = {0,};
- struct timeval tv = {0,};
- struct tm now = {0, };
- char now_str[32];
-
- if (gettimeofday (&tv, NULL) == -1) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "gettimeofday: failed %s",
- strerror (errno));
- }
-
- if (gethostname (hostname, 256) == -1) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "gethostname: failed %s",
- strerror (errno));
- }
+ cmd_args_t *cmd_args = NULL;
+ struct rlimit lim = {
+ 0,
+ };
+ call_pool_t *pool = NULL;
+ int ret = -1;
+
+ if (!ctx)
+ return ret;
- localtime_r (&tv.tv_sec, &now);
- strftime (now_str, 32, "%Y/%m/%d-%H:%M:%S", &now);
- snprintf (tmp_str, 1024, "%s-%d-%s:%"
-#ifdef GF_DARWIN_HOST_OS
- PRId32,
-#else
- "ld",
-#endif
- hostname, getpid(), now_str, tv.tv_usec);
+ ret = xlator_mem_acct_init(THIS, cli_mt_end);
+ if (ret != 0) {
+ gf_log("cli", GF_LOG_ERROR, "Memory accounting init failed.");
+ return ret;
+ }
+
+ /* Resetting ret to -1 to so in case of failure
+ * we can relese allocated resource.
+ */
+ ret = -1;
+
+ ctx->process_uuid = generate_glusterfs_ctx_id();
+ if (!ctx->process_uuid) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to generate uuid.");
+ goto out;
+ }
+
+ ctx->page_size = 128 * GF_UNIT_KB;
+
+ ctx->iobuf_pool = iobuf_pool_new();
+ if (!ctx->iobuf_pool) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to create iobuf pool.");
+ goto out;
+ }
+
+ ctx->event_pool = gf_event_pool_new(DEFAULT_EVENT_POOL_SIZE,
+ STARTING_EVENT_THREADS);
+ if (!ctx->event_pool) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to create event pool.");
+ goto out;
+ }
+
+ pool = GF_CALLOC(1, sizeof(call_pool_t), cli_mt_call_pool_t);
+ if (!pool) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to create call pool.");
+ goto out;
+ }
+
+ /* frame_mem_pool size 112 * 64 */
+ pool->frame_mem_pool = mem_pool_new(call_frame_t, 32);
+ if (!pool->frame_mem_pool) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to create frame mem pool.");
+ goto out;
+ }
+
+ /* stack_mem_pool size 256 * 128 */
+ pool->stack_mem_pool = mem_pool_new(call_stack_t, 16);
+
+ if (!pool->stack_mem_pool) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to create stack mem pool.");
+ goto out;
+ }
+
+ ctx->stub_mem_pool = mem_pool_new(call_stub_t, 16);
+ if (!ctx->stub_mem_pool) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to stub mem pool.");
+ goto out;
+ }
+
+ ctx->dict_pool = mem_pool_new(dict_t, 32);
+ if (!ctx->dict_pool) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to create dict pool.");
+ goto out;
+ }
+
+ ctx->dict_pair_pool = mem_pool_new(data_pair_t, 512);
+ if (!ctx->dict_pair_pool) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to create dict pair pool.");
+ goto out;
+ }
+
+ ctx->dict_data_pool = mem_pool_new(data_t, 512);
+ if (!ctx->dict_data_pool) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to create dict data pool.");
+ goto out;
+ }
+
+ ctx->logbuf_pool = mem_pool_new(log_buf_t, 256);
+ if (!ctx->logbuf_pool) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to create logbuf pool.");
+ goto out;
+ }
+
+ INIT_LIST_HEAD(&pool->all_frames);
+ LOCK_INIT(&pool->lock);
+ ctx->pool = pool;
+
+ cmd_args = &ctx->cmd_args;
+
+ INIT_LIST_HEAD(&cmd_args->xlator_options);
+
+ lim.rlim_cur = RLIM_INFINITY;
+ lim.rlim_max = RLIM_INFINITY;
+ setrlimit(RLIMIT_CORE, &lim);
+
+ ret = 0;
- return gf_strdup (tmp_str);
+out:
+ if (ret != 0) {
+ if (pool) {
+ mem_pool_destroy(pool->frame_mem_pool);
+ mem_pool_destroy(pool->stack_mem_pool);
+ }
+ GF_FREE(pool);
+ pool = NULL;
+ GF_FREE(ctx->process_uuid);
+ mem_pool_destroy(ctx->stub_mem_pool);
+ mem_pool_destroy(ctx->dict_pool);
+ mem_pool_destroy(ctx->dict_pair_pool);
+ mem_pool_destroy(ctx->dict_data_pool);
+ mem_pool_destroy(ctx->logbuf_pool);
+ }
+
+ return ret;
}
static int
-glusterfs_ctx_defaults_init (glusterfs_ctx_t *ctx)
+logging_init(glusterfs_ctx_t *ctx, struct cli_state *state)
{
- cmd_args_t *cmd_args = NULL;
- struct rlimit lim = {0, };
- call_pool_t *pool = NULL;
-
- xlator_mem_acct_init (THIS, cli_mt_end);
-
- ctx->process_uuid = generate_uuid ();
- if (!ctx->process_uuid)
- return -1;
-
- ctx->page_size = 128 * GF_UNIT_KB;
-
- ctx->iobuf_pool = iobuf_pool_new ();
- if (!ctx->iobuf_pool)
- return -1;
-
- ctx->event_pool = event_pool_new (DEFAULT_EVENT_POOL_SIZE);
- if (!ctx->event_pool)
- return -1;
-
- pool = GF_CALLOC (1, sizeof (call_pool_t),
- cli_mt_call_pool_t);
- if (!pool)
- return -1;
-
- /* frame_mem_pool size 112 * 64 */
- pool->frame_mem_pool = mem_pool_new (call_frame_t, 32);
- if (!pool->frame_mem_pool)
- return -1;
-
- /* stack_mem_pool size 256 * 128 */
- pool->stack_mem_pool = mem_pool_new (call_stack_t, 16);
-
- if (!pool->stack_mem_pool)
- return -1;
-
- ctx->stub_mem_pool = mem_pool_new (call_stub_t, 16);
- if (!ctx->stub_mem_pool)
- return -1;
-
- ctx->dict_pool = mem_pool_new (dict_t, 32);
- if (!ctx->dict_pool)
- return -1;
-
- ctx->dict_pair_pool = mem_pool_new (data_pair_t, 512);
- if (!ctx->dict_pair_pool)
- return -1;
-
- ctx->dict_data_pool = mem_pool_new (data_t, 512);
- if (!ctx->dict_data_pool)
- return -1;
-
- INIT_LIST_HEAD (&pool->all_frames);
- LOCK_INIT (&pool->lock);
- ctx->pool = pool;
-
- pthread_mutex_init (&(ctx->lock), NULL);
-
- cmd_args = &ctx->cmd_args;
-
- INIT_LIST_HEAD (&cmd_args->xlator_options);
-
- lim.rlim_cur = RLIM_INFINITY;
- lim.rlim_max = RLIM_INFINITY;
- setrlimit (RLIMIT_CORE, &lim);
-
- return 0;
+ char *log_file = state->log_file ? state->log_file
+ : DEFAULT_CLI_LOG_FILE_DIRECTORY
+ "/cli.log";
+
+ /* passing ident as NULL means to use default ident for syslog */
+ if (gf_log_init(ctx, log_file, NULL) == -1) {
+ fprintf(stderr, "ERROR: failed to open logfile %s\n", log_file);
+ }
+
+ /* CLI should not have something to DEBUG after the release,
+ hence defaulting to INFO loglevel */
+ gf_log_set_loglevel(ctx, (state->log_level == GF_LOG_NONE)
+ ? GF_LOG_INFO
+ : state->log_level);
+
+ return 0;
}
-
-static int
-logging_init (struct cli_state *state)
+int
+cli_submit_request(struct rpc_clnt *rpc, void *req, call_frame_t *frame,
+ rpc_clnt_prog_t *prog, int procnum, struct iobref *iobref,
+ xlator_t *this, fop_cbk_fn_t cbkfn, xdrproc_t xdrproc)
{
- char *log_file = state->log_file ? state->log_file :
- DEFAULT_CLI_LOG_FILE_DIRECTORY "/cli.log";
+ int ret = -1;
+ int count = 0;
+ struct iovec iov = {
+ 0,
+ };
+ struct iobuf *iobuf = NULL;
+ char new_iobref = 0;
+ ssize_t xdr_size = 0;
+
+ GF_ASSERT(this);
+
+ if (req) {
+ xdr_size = xdr_sizeof(xdrproc, req);
+ iobuf = iobuf_get2(this->ctx->iobuf_pool, xdr_size);
+ if (!iobuf) {
+ goto out;
+ };
+
+ if (!iobref) {
+ iobref = iobref_new();
+ if (!iobref) {
+ goto out;
+ }
- if (gf_log_init (log_file) == -1) {
- fprintf (stderr, "ERROR: failed to open logfile %s\n",
- log_file);
- return -1;
+ new_iobref = 1;
}
- /* CLI should not have something to DEBUG after the release,
- hence defaulting to INFO loglevel */
- gf_log_set_loglevel ((state->log_level == -1) ? GF_LOG_INFO :
- state->log_level);
+ iobref_add(iobref, iobuf);
- return 0;
-}
+ iov.iov_base = iobuf->ptr;
+ iov.iov_len = iobuf_size(iobuf);
-int
-cli_submit_request (void *req, call_frame_t *frame,
- rpc_clnt_prog_t *prog,
- int procnum, struct iobref *iobref,
- xlator_t *this, fop_cbk_fn_t cbkfn, xdrproc_t xdrproc)
-{
- int ret = -1;
- int count = 0;
- struct iovec iov = {0, };
- struct iobuf *iobuf = NULL;
- char new_iobref = 0;
- ssize_t xdr_size = 0;
-
- GF_ASSERT (this);
-
- if (req) {
- xdr_size = xdr_sizeof (xdrproc, req);
- iobuf = iobuf_get2 (this->ctx->iobuf_pool, xdr_size);
- if (!iobuf) {
- goto out;
- };
-
- if (!iobref) {
- iobref = iobref_new ();
- if (!iobref) {
- goto out;
- }
-
- new_iobref = 1;
- }
-
- iobref_add (iobref, iobuf);
-
- iov.iov_base = iobuf->ptr;
- iov.iov_len = iobuf_size (iobuf);
-
-
- /* Create the xdr payload */
- ret = xdr_serialize_generic (iov, req, xdrproc);
- if (ret == -1) {
- goto out;
- }
- iov.iov_len = ret;
- count = 1;
+ /* Create the xdr payload */
+ ret = xdr_serialize_generic(iov, req, xdrproc);
+ if (ret == -1) {
+ goto out;
}
+ iov.iov_len = ret;
+ count = 1;
+ }
- /* Send the msg */
- ret = rpc_clnt_submit (global_rpc, prog, procnum, cbkfn,
- &iov, count,
- NULL, 0, iobref, frame, NULL, 0, NULL, 0, NULL);
- ret = 0;
+ if (!rpc)
+ rpc = global_rpc;
+ /* Send the msg */
+ ret = rpc_clnt_submit(rpc, prog, procnum, cbkfn, &iov, count, NULL, 0,
+ iobref, frame, NULL, 0, NULL, 0, NULL);
+ ret = 0;
out:
- if (new_iobref)
- iobref_unref (iobref);
- if (iobuf)
- iobuf_unref (iobuf);
- return ret;
+ if (new_iobref)
+ iobref_unref(iobref);
+ if (iobuf)
+ iobuf_unref(iobuf);
+ return ret;
}
int
-cli_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
- void *data)
+cli_rpc_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
+ void *data)
{
- xlator_t *this = NULL;
- int ret = 0;
+ xlator_t *this = NULL;
+ int ret = 0;
- this = mydata;
+ this = mydata;
- switch (event) {
- case RPC_CLNT_CONNECT:
- {
-
- cli_cmd_broadcast_connected ();
- gf_log (this->name, GF_LOG_TRACE, "got RPC_CLNT_CONNECT");
- break;
+ switch (event) {
+ case RPC_CLNT_CONNECT: {
+ cli_cmd_broadcast_connected(_gf_true);
+ gf_log(this->name, GF_LOG_TRACE, "got RPC_CLNT_CONNECT");
+ break;
}
- case RPC_CLNT_DISCONNECT:
- {
- gf_log (this->name, GF_LOG_TRACE, "got RPC_CLNT_DISCONNECT");
- connected = 0;
- if (!global_state->prompt && global_state->await_connected) {
- ret = 1;
- cli_out ("Connection failed. Please check if gluster "
- "daemon is operational.");
- exit (ret);
- }
- break;
+ case RPC_CLNT_DISCONNECT: {
+ cli_cmd_broadcast_connected(_gf_false);
+ gf_log(this->name, GF_LOG_TRACE, "got RPC_CLNT_DISCONNECT");
+ if (!global_state->prompt && global_state->await_connected) {
+ ret = 1;
+ cli_out(
+ "Connection failed. Please check if gluster "
+ "daemon is operational.");
+ exit(ret);
+ }
+ break;
}
default:
- gf_log (this->name, GF_LOG_TRACE,
- "got some other RPC event %d", event);
- ret = 0;
- break;
- }
+ gf_log(this->name, GF_LOG_TRACE, "got some other RPC event %d",
+ event);
+ ret = 0;
+ break;
+ }
- return ret;
+ return ret;
+}
+
+static gf_boolean_t
+is_valid_int(char *str)
+{
+ if (*str == '-')
+ ++str;
+
+ /* Handle empty string or just "-".*/
+ if (!*str)
+ return _gf_false;
+
+ /* Check for non-digit chars in the rest of the string */
+ while (*str) {
+ if (!isdigit(*str))
+ return _gf_false;
+ else
+ ++str;
+ }
+ return _gf_true;
}
+/*
+ * ret: 0: option successfully processed
+ * 1: signalling end of option list
+ * -1: unknown option
+ * -2: parsing issue (avoid unknown option error)
+ */
int
-cli_opt_parse (char *opt, struct cli_state *state)
+cli_opt_parse(char *opt, struct cli_state *state)
{
- char *oarg;
+ char *oarg = NULL;
+ gf_boolean_t secure_mgmt_tmp = 0;
+
+ if (strcmp(opt, "") == 0)
+ return 1;
+ if (strcmp(opt, "help") == 0) {
+ cli_out(
+ " peer help - display help for peer commands\n"
+ " volume help - display help for volume commands\n"
+ " volume bitrot help - display help for volume"
+ " bitrot commands\n"
+ " volume quota help - display help for volume"
+ " quota commands\n"
+ " snapshot help - display help for snapshot commands\n"
+ " global help - list global commands\n");
+ exit(0);
+ }
+
+ if (strcmp(opt, "version") == 0) {
+ cli_out("%s", argp_program_version);
+ exit(0);
+ }
+
+ if (strcmp(opt, "print-logdir") == 0) {
+ cli_out("%s", DEFAULT_LOG_FILE_DIRECTORY);
+ exit(0);
+ }
+
+ if (strcmp(opt, "print-statedumpdir") == 0) {
+ cli_out("%s", DEFAULT_VAR_RUN_DIRECTORY);
+ exit(0);
+ }
+
+ if (strcmp(opt, "xml") == 0) {
+#if (HAVE_LIB_XML)
+ state->mode |= GLUSTER_MODE_XML;
+#else
+ cli_err("XML output not supported. Ignoring '--xml' option");
+#endif
+ return 0;
+ }
- if (strcmp (opt, "version") == 0) {
- puts (argp_program_version);
- exit (0);
- }
+ if (strcmp(opt, "nolog") == 0) {
+ state->mode |= GLUSTER_MODE_GLFSHEAL_NOLOG;
+ return 0;
+ }
- if (strcmp (opt, "xml") == 0) {
- state->mode |= GLUSTER_MODE_XML;
- return 0;
- }
+ if (strcmp(opt, "wignore-partition") == 0) {
+ state->mode |= GLUSTER_MODE_WIGNORE_PARTITION;
+ return 0;
+ }
- oarg = strtail (opt, "mode=");
- if (oarg) {
- if (strcmp (oarg, "script") == 0) {
- state->mode |= GLUSTER_MODE_SCRIPT;
- return 0;
- }
- if (strcmp (oarg, "interactive") == 0)
- return 0;
- return -1;
- }
+ if (strcmp(opt, "wignore") == 0) {
+ state->mode |= GLUSTER_MODE_WIGNORE;
+ return 0;
+ }
- oarg = strtail (opt, "remote-host=");
- if (oarg) {
- state->remote_host = oarg;
- return 0;
+ oarg = strtail(opt, "mode=");
+ if (oarg) {
+ if (strcmp(oarg, "script") == 0) {
+ state->mode |= GLUSTER_MODE_SCRIPT;
+ return 0;
}
- oarg = strtail (opt, "log-file=");
- if (oarg) {
- state->log_file = oarg;
- return 0;
+ if (strcmp(oarg, "interactive") == 0)
+ return 0;
+
+ return -1;
+ }
+
+ oarg = strtail(opt, "remote-host=");
+ if (oarg) {
+ state->remote_host = oarg;
+ return 0;
+ }
+
+ oarg = strtail(opt, "inet6");
+ if (oarg) {
+ state->address_family = "inet6";
+ return 0;
+ }
+
+ oarg = strtail(opt, "log-file=");
+ if (oarg) {
+ state->log_file = oarg;
+ return 0;
+ }
+ oarg = strtail(opt, "timeout=");
+ if (oarg) {
+ if (!is_valid_int(oarg) || atoi(oarg) <= 0) {
+ cli_err("timeout value should be a positive integer");
+ return -2; /* -2 instead of -1 to avoid unknown option
+ error */
}
+ cli_default_conn_timeout = atoi(oarg);
+ return 0;
+ }
+
+ oarg = strtail(opt, "log-level=");
+ if (oarg) {
+ int log_level = glusterd_check_log_level(oarg);
+ if (log_level == -1)
+ return -1;
+ state->log_level = (gf_loglevel_t)log_level;
+ return 0;
+ }
- oarg = strtail (opt, "log-level=");
- if (oarg) {
- state->log_level = glusterd_check_log_level(oarg);
- if (state->log_level == -1)
- return -1;
- return 0;
+ oarg = strtail(opt, "glusterd-sock=");
+ if (oarg) {
+ state->glusterd_sock = oarg;
+ return 0;
+ }
+
+ oarg = strtail(opt, "secure-mgmt=");
+ if (oarg) {
+ if (gf_string2boolean(oarg, &secure_mgmt_tmp) == 0) {
+ if (secure_mgmt_tmp) {
+ /* See declaration for why this is an int. */
+ state->ctx->secure_mgmt = 1;
+ }
+ } else {
+ cli_err("invalid secure-mgmt value (ignored)");
}
+ return 0;
+ }
- return -1;
+ return -1;
}
int
-parse_cmdline (int argc, char *argv[], struct cli_state *state)
+parse_cmdline(int argc, char *argv[], struct cli_state *state)
{
- int ret = 0;
- int i = 0;
- int j = 0;
- char *opt = NULL;
-
- state->argc=argc-1;
- state->argv=&argv[1];
-
- for (i = 0; i < state->argc; i++) {
- opt = strtail (state->argv[i], "--");
- if (opt) {
- ret = cli_opt_parse (opt, state);
- if (ret == -1) {
- cli_out ("unrecognized option --%s", opt);
- return ret;
- }
- for (j = i; j < state->argc - 1; j++)
- state->argv[j] = state->argv[j + 1];
- state->argc--;
- /* argv shifted, next check should be at i again */
- i--;
- }
+ int ret = 0;
+ int i = 0;
+ int j = 0;
+ char *opt = NULL;
+
+ state->argc = argc - 1;
+ state->argv = &argv[1];
+
+ /* Do this first so that an option can override. */
+ if (sys_access(SECURE_ACCESS_FILE, F_OK) == 0) {
+ state->ctx->secure_mgmt = 1;
+ state->ctx->ssl_cert_depth = glusterfs_read_secure_access_file();
+ }
+
+ if (state->argc > GEO_REP_CMD_CONFIG_INDEX &&
+ strtail(state->argv[GEO_REP_CMD_INDEX], "geo") &&
+ strtail(state->argv[GEO_REP_CMD_CONFIG_INDEX], "co"))
+ goto done;
+
+ for (i = 0; i < state->argc; i++) {
+ opt = strtail(state->argv[i], "--");
+ if (!opt)
+ continue;
+ ret = cli_opt_parse(opt, state);
+ if (ret == -1) {
+ cli_out("unrecognized option --%s\n", opt);
+ usage();
+ return ret;
+ } else if (ret == -2) {
+ return ret;
}
+ for (j = i; j < state->argc - 1; j++)
+ state->argv[j] = state->argv[j + 1];
+ state->argc--;
+ /* argv shifted, next check should be at i again */
+ i--;
+ if (ret == 1) {
+ /* end of cli options */
+ ret = 0;
+ break;
+ }
+ }
- return ret;
-}
+done:
+ state->argv[state->argc] = NULL;
+ return ret;
+}
int
-cli_cmd_tree_init (struct cli_cmd_tree *tree)
+cli_cmd_tree_init(struct cli_cmd_tree *tree)
{
- struct cli_cmd_word *root = NULL;
- int ret = 0;
+ struct cli_cmd_word *root = NULL;
+ int ret = 0;
- root = &tree->root;
- root->tree = tree;
+ root = &tree->root;
+ root->tree = tree;
- return ret;
+ return ret;
}
-
int
-cli_state_init (struct cli_state *state)
+cli_state_init(struct cli_state *state)
{
- struct cli_cmd_tree *tree = NULL;
- int ret = 0;
+ struct cli_cmd_tree *tree = NULL;
+ int ret = 0;
+ state->log_level = GF_LOG_NONE;
- state->remote_host = "localhost";
- state->log_level = -1;
+ tree = &state->tree;
+ tree->state = state;
- tree = &state->tree;
- tree->state = state;
+ ret = cli_cmd_tree_init(tree);
- ret = cli_cmd_tree_init (tree);
-
- return ret;
+ return ret;
}
int
-cli_usage_out (const char *usage)
+cli_usage_out(const char *usage)
{
- GF_ASSERT (usage);
- GF_ASSERT (usage[0] != '\0');
+ GF_ASSERT(usage);
- if (!usage || usage[0] == '\0')
- return -1;
+ if (!usage || usage[0] == '\0')
+ return -1;
- cli_err ("Usage: %s", usage);
- return 0;
+ cli_err("\nUsage:\n%s\n", usage);
+ return 0;
}
int
-_cli_err (const char *fmt, ...)
+_cli_err(const char *fmt, ...)
{
- struct cli_state *state = NULL;
- va_list ap;
- int ret = 0;
-
- state = global_state;
+ va_list ap;
+ int ret = 0;
+#ifdef HAVE_READLINE
+ struct cli_state *state = global_state;
+#endif
- va_start (ap, fmt);
+ va_start(ap, fmt);
#ifdef HAVE_READLINE
- if (state->rl_enabled && !state->rl_processing)
- return cli_rl_err(state, fmt, ap);
+ if (state->rl_enabled && !state->rl_processing) {
+ ret = cli_rl_err(state, fmt, ap);
+ va_end(ap);
+ return ret;
+ }
#endif
- ret = vfprintf (stderr, fmt, ap);
- fprintf (stderr, "\n");
- va_end (ap);
+ ret = vfprintf(stderr, fmt, ap);
+ fprintf(stderr, "\n");
+ va_end(ap);
- return ret;
+ return ret;
}
-
int
-_cli_out (const char *fmt, ...)
+_cli_out(const char *fmt, ...)
{
- struct cli_state *state = NULL;
- va_list ap;
- int ret = 0;
-
- state = global_state;
-
- va_start (ap, fmt);
+ va_list ap;
+ int ret = 0;
+#ifdef HAVE_READLINE
+ struct cli_state *state = global_state;
+#endif
+ va_start(ap, fmt);
#ifdef HAVE_READLINE
- if (state->rl_enabled && !state->rl_processing)
- return cli_rl_out(state, fmt, ap);
+ if (state->rl_enabled && !state->rl_processing) {
+ ret = cli_rl_out(state, fmt, ap);
+ va_end(ap);
+ return ret;
+ }
#endif
- ret = vprintf (fmt, ap);
- printf ("\n");
- va_end (ap);
+ ret = vprintf(fmt, ap);
+ printf("\n");
+ va_end(ap);
- return ret;
+ return ret;
}
struct rpc_clnt *
-cli_rpc_init (struct cli_state *state)
+cli_quotad_clnt_rpc_init(void)
{
- struct rpc_clnt *rpc = NULL;
- dict_t *options = NULL;
- int ret = -1;
- int port = CLI_GLUSTERD_PORT;
- xlator_t *this = NULL;
-
+ struct rpc_clnt *rpc = NULL;
+ dict_t *rpc_opts = NULL;
+ int ret = -1;
+
+ rpc_opts = dict_new();
+ if (!rpc_opts) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_str(rpc_opts, "transport.address-family", "unix");
+ if (ret)
+ goto out;
+
+ ret = dict_set_str(rpc_opts, "transport-type", "socket");
+ if (ret)
+ goto out;
+
+ ret = dict_set_str(rpc_opts, "transport.socket.connect-path",
+ "/var/run/gluster/quotad.socket");
+ if (ret)
+ goto out;
+
+ rpc = cli_quotad_clnt_init(THIS, rpc_opts);
+ if (!rpc)
+ goto out;
+
+ global_quotad_rpc = rpc;
+out:
+ if (rpc_opts) {
+ dict_unref(rpc_opts);
+ }
+ return rpc;
+}
- this = THIS;
- cli_rpc_prog = &cli_prog;
- options = dict_new ();
- if (!options)
- goto out;
+struct rpc_clnt *
+cli_rpc_init(struct cli_state *state)
+{
+ struct rpc_clnt *rpc = NULL;
+ dict_t *options = NULL;
+ int ret = -1;
+ int port = CLI_GLUSTERD_PORT;
+ xlator_t *this = NULL;
+#ifdef IPV6_DEFAULT
+ char *addr_family = "inet6";
+#else
+ char *addr_family = "inet";
+#endif
- ret = dict_set_str (options, "remote-host", state->remote_host);
+ this = THIS;
+ cli_rpc_prog = &cli_prog;
+
+ options = dict_new();
+ if (!options)
+ goto out;
+
+ /* If address family specified in CLI */
+ if (state->address_family) {
+ addr_family = state->address_family;
+ }
+
+ /* Connect to glusterd using the specified method, giving preference
+ * to a unix socket connection. If nothing is specified, connect to
+ * the default glusterd socket.
+ */
+ if (state->glusterd_sock) {
+ gf_log("cli", GF_LOG_INFO,
+ "Connecting to glusterd using "
+ "sockfile %s",
+ state->glusterd_sock);
+ ret = rpc_transport_unix_options_build(options, state->glusterd_sock,
+ 0);
if (ret)
- goto out;
+ goto out;
+ } else if (state->remote_host) {
+ gf_log("cli", GF_LOG_INFO,
+ "Connecting to remote glusterd at "
+ "%s",
+ state->remote_host);
+
+ ret = dict_set_str(options, "remote-host", state->remote_host);
+ if (ret)
+ goto out;
if (state->remote_port)
- port = state->remote_port;
+ port = state->remote_port;
- ret = dict_set_int32 (options, "remote-port", port);
+ ret = dict_set_int32(options, "remote-port", port);
if (ret)
- goto out;
+ goto out;
- ret = dict_set_str (options, "transport.address-family", "inet/inet6");
+ ret = dict_set_str(options, "transport.address-family", addr_family);
if (ret)
- goto out;
-
- rpc = rpc_clnt_new (options, this->ctx, this->name, 16);
+ goto out;
+ } else {
+ gf_log("cli", GF_LOG_DEBUG,
+ "Connecting to glusterd using "
+ "default socket");
+ ret = rpc_transport_unix_options_build(options,
+ DEFAULT_GLUSTERD_SOCKFILE, 0);
+ if (ret)
+ goto out;
+ }
- if (!rpc)
- goto out;
+ rpc = rpc_clnt_new(options, this, this->name, 16);
+ if (!rpc)
+ goto out;
- ret = rpc_clnt_register_notify (rpc, cli_rpc_notify, this);
- if (ret) {
- gf_log ("cli", GF_LOG_ERROR, "failed to register notify");
- goto out;
- }
+ ret = rpc_clnt_register_notify(rpc, cli_rpc_notify, this);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "failed to register notify");
+ goto out;
+ }
- rpc_clnt_start (rpc);
+ ret = rpc_clnt_start(rpc);
out:
- if (ret) {
- if (rpc)
- rpc_clnt_unref (rpc);
- rpc = NULL;
- }
- return rpc;
+ if (options)
+ dict_unref(options);
+
+ if (ret) {
+ if (rpc)
+ rpc_clnt_unref(rpc);
+ rpc = NULL;
+ }
+ return rpc;
}
cli_local_t *
-cli_local_get ()
+cli_local_get()
{
- cli_local_t *local = NULL;
+ cli_local_t *local = NULL;
- local = GF_CALLOC (1, sizeof (*local), cli_mt_cli_local_t);
+ local = GF_CALLOC(1, sizeof(*local), cli_mt_cli_local_t);
+ LOCK_INIT(&local->lock);
+ INIT_LIST_HEAD(&local->dict_list);
- return local;
+ return local;
}
void
-cli_local_wipe (cli_local_t *local)
+cli_local_wipe(cli_local_t *local)
{
- if (local) {
- if (local->get_vol.volname)
- GF_FREE (local->get_vol.volname);
- if (local->dict)
- dict_unref (local->dict);
- GF_FREE (local);
- }
-
- return;
+ if (local) {
+ GF_FREE(local->get_vol.volname);
+ if (local->dict)
+ dict_unref(local->dict);
+ GF_FREE(local);
+ }
+
+ return;
}
-/* If the path exists use realpath(3) to handle extra slashes and to resolve
- * symlinks else strip the extra slashes in the path and return */
+struct cli_state *global_state;
int
-cli_canonicalize_path (char *path)
+main(int argc, char *argv[])
{
- struct stat sb = {0};
- int ret = -1;
- char *tmppath = NULL;
- char *dir = NULL;
- char *tmpstr = NULL;
- int path_len = 0;
+ struct cli_state state = {
+ 0,
+ };
+ int ret = -1;
+ glusterfs_ctx_t *ctx = NULL;
- if (!path)
- return ret;
+ mem_pools_init();
- ret = stat (path, &sb);
- if (ret == -1) {
- /* Strip the extra slashes and return */
- tmppath = gf_strdup (path);
- if (tmppath == NULL) {
- ret = -1;
- gf_log ("cli", GF_LOG_ERROR, "Out of memory.");
- goto out;
- }
- bzero (path, strlen(path));
- path[0] = '/';
- dir = strtok_r(tmppath, "/", &tmpstr);
- while (dir) {
- strncpy ((path + path_len + 1), dir, strlen(dir));
- path_len = strlen (path);
- dir = strtok_r(NULL, "/", &tmpstr);
- if (dir)
- strncpy((path + path_len), "/", 1);
- }
- if (path_len == 0)
- path[1] = '\0';
- else
- path[path_len] = '\0';
- ret = 0;
- goto out;
- } else {
- tmppath = gf_strdup(path);
- if (tmppath == NULL) {
- ret = -1;
- gf_log ("cli", GF_LOG_ERROR, "Out of memory.");
- goto out;
- }
- if (realpath (tmppath, path) == NULL) {
- cli_out ("Path manipulation failed: %s",
- strerror(errno));
- gf_log ("cli", GF_LOG_ERROR, "Path manipulation "
- "failed: %s", strerror(errno));
- ret = -1;
- goto out;
- }
- ret = 0;
- }
-out:
- if (tmppath)
- GF_FREE(tmppath);
- return ret;
-}
+ ctx = glusterfs_ctx_new();
+ if (!ctx)
+ return ENOMEM;
-struct cli_state *global_state;
+#ifdef DEBUG
+ gf_mem_acct_enable_set(ctx);
+#endif
-int
-main (int argc, char *argv[])
-{
- struct cli_state state = {0, };
- int ret = -1;
- glusterfs_ctx_t *ctx = NULL;
+ ret = glusterfs_globals_init(ctx);
+ if (ret)
+ return ret;
- ret = glusterfs_globals_init ();
- if (ret)
- return ret;
+ THIS->ctx = ctx;
- ctx = glusterfs_ctx_get ();
- if (!ctx)
- return ENOMEM;
+ ret = glusterfs_ctx_defaults_init(ctx);
+ if (ret)
+ goto out;
- ret = glusterfs_ctx_defaults_init (ctx);
- if (ret)
- goto out;
+ cli_default_conn_timeout = 120;
+ cli_ten_minutes_timeout = 600;
- ret = cli_state_init (&state);
- if (ret)
- goto out;
+ ret = cli_state_init(&state);
+ if (ret)
+ goto out;
- state.ctx = ctx;
- global_state = &state;
+ state.ctx = ctx;
+ global_state = &state;
- ret = parse_cmdline (argc, argv, &state);
- if (ret)
- goto out;
+ ret = parse_cmdline(argc, argv, &state);
+ if (ret)
+ goto out;
- ret = logging_init (&state);
- if (ret)
- goto out;
+ ret = logging_init(ctx, &state);
+ if (ret)
+ goto out;
- global_rpc = cli_rpc_init (&state);
- if (!global_rpc)
- goto out;
+ gf_log("cli", GF_LOG_INFO, "Started running %s with version %s", argv[0],
+ PACKAGE_VERSION);
- ret = cli_cmds_register (&state);
- if (ret)
- goto out;
+ global_rpc = cli_rpc_init(&state);
+ if (!global_rpc)
+ goto out;
- ret = cli_cmd_cond_init ();
- if (ret)
- goto out;
+ global_quotad_rpc = cli_quotad_clnt_rpc_init();
+ if (!global_quotad_rpc)
+ goto out;
- ret = cli_input_init (&state);
- if (ret)
- goto out;
+ ret = cli_cmds_register(&state);
+ if (ret)
+ goto out;
+
+ ret = cli_input_init(&state);
+ if (ret)
+ goto out;
- ret = event_dispatch (ctx->event_pool);
+ ret = gf_event_dispatch(ctx->event_pool);
out:
-// glusterfs_ctx_destroy (ctx);
+ // glusterfs_ctx_destroy (ctx);
- return ret;
+ mem_pools_fini();
+
+ return ret;
}
void
-cli_print_line (int len)
+cli_print_line(int len)
{
- GF_ASSERT (len > 0);
+ GF_ASSERT(len > 0);
+
+ while (len--)
+ printf("-");
+
+ printf("\n");
+}
- while (len--)
- printf ("-");
+void
+print_quota_list_header(int type)
+{
+ if (type == GF_QUOTA_OPTION_TYPE_LIST) {
+ cli_out(
+ " Path Hard-limit "
+ " Soft-limit Used Available Soft-limit "
+ "exceeded? Hard-limit exceeded?");
+ cli_out(
+ "-----------------------------------------------------"
+ "-----------------------------------------------------"
+ "---------------------");
+ } else {
+ cli_out(
+ " Path Hard-limit "
+ " Soft-limit Files Dirs Available "
+ "Soft-limit exceeded? Hard-limit exceeded?");
+ cli_out(
+ "-----------------------------------------------------"
+ "-----------------------------------------------------"
+ "-------------------------------------");
+ }
+}
- printf ("\n");
+void
+print_quota_list_empty(char *path, int type)
+{
+ if (type == GF_QUOTA_OPTION_TYPE_LIST)
+ cli_out("%-40s %7s %9s %10s %7s %15s %20s", path, "N/A", "N/A", "N/A",
+ "N/A", "N/A", "N/A");
+ else
+ cli_out("%-40s %9s %9s %12s %10s %10s %15s %20s", path, "N/A", "N/A",
+ "N/A", "N/A", "N/A", "N/A", "N/A");
}
diff --git a/cli/src/cli.h b/cli/src/cli.h
index 894b6f934ba..c0d933e8f8a 100644
--- a/cli/src/cli.h
+++ b/cli/src/cli.h
@@ -1,344 +1,516 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __CLI_H__
#define __CLI_H__
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "rpc-clnt.h"
-#include "glusterfs.h"
+#include <glusterfs/glusterfs.h>
#include "protocol-common.h"
+#include <glusterfs/logging.h>
+#include <glusterfs/quota-common-utils.h>
+
+#include "cli1-xdr.h"
+#include "gd-common-utils.h"
#if (HAVE_LIB_XML)
#include <libxml/encoding.h>
#include <libxml/xmlwriter.h>
#endif
-#define DEFAULT_EVENT_POOL_SIZE 16384
-#define CLI_GLUSTERD_PORT 24007
-#define CLI_DEFAULT_CONN_TIMEOUT 120
-#define CLI_DEFAULT_CMD_TIMEOUT 120
-#define CLI_TOP_CMD_TIMEOUT 600 //Longer timeout for volume top
-#define DEFAULT_CLI_LOG_FILE_DIRECTORY DATADIR "/log/glusterfs"
-#define DEFAULT_LOG_FILE_DIRECTORY DATADIR "/log/glusterfs"
-#define CLI_VOL_STATUS_BRICK_LEN 55
-#define CLI_TAB_LENGTH 8
-#define CLI_BRICK_STATUS_LINE_LEN 78
+#define DEFAULT_EVENT_POOL_SIZE 16384
+#define CLI_GLUSTERD_PORT 24007
+#define DEFAULT_CLI_LOG_FILE_DIRECTORY DATADIR "/log/glusterfs"
+#define CLI_VOL_STATUS_BRICK_LEN 43
+#define CLI_TAB_LENGTH 8
+#define CLI_BRICK_STATUS_LINE_LEN 78
+
+/* Geo-rep command positional arguments' index */
+#define GEO_REP_CMD_INDEX 1
+#define GEO_REP_CMD_CONFIG_INDEX 4
enum argp_option_keys {
- ARGP_DEBUG_KEY = 133,
- ARGP_PORT_KEY = 'p',
+ ARGP_DEBUG_KEY = 133,
+ ARGP_PORT_KEY = 'p',
};
-#define GLUSTER_MODE_SCRIPT (1 << 0)
+extern int cli_default_conn_timeout;
+extern int cli_ten_minutes_timeout;
+
+typedef enum {
+ COLD_BRICK_COUNT,
+ COLD_TYPE,
+ COLD_DIST_COUNT,
+ COLD_REPLICA_COUNT,
+ COLD_ARBITER_COUNT,
+ COLD_DISPERSE_COUNT,
+ COLD_REDUNDANCY_COUNT,
+ HOT_BRICK_COUNT,
+ HOT_TYPE,
+ HOT_REPLICA_COUNT,
+ MAX
+} values;
+
+#define GLUSTER_MODE_SCRIPT (1 << 0)
#define GLUSTER_MODE_ERR_FATAL (1 << 1)
-#define GLUSTER_MODE_XML (1 << 2)
+#define GLUSTER_MODE_XML (1 << 2)
+#define GLUSTER_MODE_WIGNORE (1 << 3)
+#define GLUSTER_MODE_WIGNORE_PARTITION (1 << 4)
+#define GLUSTER_MODE_GLFSHEAL_NOLOG (1 << 5)
+
+#define GLUSTERD_GET_QUOTA_LIST_MOUNT_PATH(abspath, volname, path) \
+ do { \
+ snprintf(abspath, sizeof(abspath) - 1, \
+ DEFAULT_VAR_RUN_DIRECTORY "/%s_quota_list%s", volname, path); \
+ } while (0)
+
struct cli_state;
struct cli_cmd_word;
struct cli_cmd_tree;
struct cli_cmd;
-typedef int (cli_cmd_cbk_t)(struct cli_state *state,
- struct cli_cmd_word *word,
- const char **words,
- int wordcount);
-typedef void (cli_cmd_reg_cbk_t)( struct cli_cmd *this);
+extern char *cli_vol_status_str[];
+extern char *cli_vol_task_status_str[];
+
+typedef int(cli_cmd_cbk_t)(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount);
+typedef void(cli_cmd_reg_cbk_t)(struct cli_cmd *this);
-typedef int (cli_cmd_match_t)(struct cli_cmd_word *word);
-typedef int (cli_cmd_filler_t)(struct cli_cmd_word *word);
+typedef int(cli_cmd_match_t)(struct cli_cmd_word *word);
+typedef int(cli_cmd_filler_t)(struct cli_cmd_word *word);
struct cli_cmd_word {
- struct cli_cmd_tree *tree;
- const char *word;
- cli_cmd_filler_t *filler;
- cli_cmd_match_t *match;
- cli_cmd_cbk_t *cbkfn;
- const char *desc;
- const char *pattern;
- int nextwords_cnt;
- struct cli_cmd_word **nextwords;
+ struct cli_cmd_tree *tree;
+ const char *word;
+ cli_cmd_filler_t *filler;
+ cli_cmd_match_t *match;
+ cli_cmd_cbk_t *cbkfn;
+ const char *desc;
+ const char *pattern;
+ int nextwords_cnt;
+ struct cli_cmd_word **nextwords;
};
-
struct cli_cmd_tree {
- struct cli_state *state;
- struct cli_cmd_word root;
+ struct cli_state *state;
+ struct cli_cmd_word root;
};
-
struct cli_state {
- int argc;
- char **argv;
+ int argc;
+ char **argv;
+
+ char debug;
- char debug;
+ /* for events dispatching */
+ glusterfs_ctx_t *ctx;
- /* for events dispatching */
- glusterfs_ctx_t *ctx;
+ /* registry of known commands */
+ struct cli_cmd_tree tree;
- /* registry of known commands */
- struct cli_cmd_tree tree;
+ /* the thread which "executes" the command in non-interactive mode */
+ /* also the thread which reads from stdin in non-readline mode */
+ pthread_t input;
- /* the thread which "executes" the command in non-interactive mode */
- /* also the thread which reads from stdin in non-readline mode */
- pthread_t input;
+ /* terminal I/O */
+ const char *prompt;
+ int rl_enabled;
+ int rl_async;
+ int rl_processing;
- /* terminal I/O */
- const char *prompt;
- int rl_enabled;
- int rl_async;
- int rl_processing;
+ /* autocompletion state */
+ char **matches;
+ char **matchesp;
- /* autocompletion state */
- char **matches;
- char **matchesp;
+ char *remote_host;
+ int remote_port;
+ int mode;
+ int await_connected;
- char *remote_host;
- int remote_port;
- int mode;
- int await_connected;
+ char *log_file;
+ gf_loglevel_t log_level;
- char *log_file;
- gf_loglevel_t log_level;
+ char *glusterd_sock;
+ char *address_family;
};
struct cli_local {
- struct {
- char *volname;
- int flags;
- } get_vol;
-
- dict_t *dict;
- /* Marker for volume status all */
- gf_boolean_t all;
+ struct {
+ char *volname;
+ int flags;
+ } get_vol;
+
+ dict_t *dict;
+ const char **words;
+ /* Marker for volume status all */
+ gf_boolean_t all;
#if (HAVE_LIB_XML)
- xmlTextWriterPtr writer;
- xmlBufferPtr buf;
- int vol_count;
+ xmlTextWriterPtr writer;
+ xmlDocPtr doc;
+ int vol_count;
#endif
+ gf_lock_t lock;
+ struct list_head dict_list;
};
struct cli_volume_status {
- int port;
- int online;
- uint64_t block_size;
- uint64_t total_inodes;
- uint64_t free_inodes;
- char *brick;
- char *pid_str;
- char *free;
- char *total;
-#ifdef GF_LINUX_HOST_OS
- char *fs_name;
- char *mount_options;
- char *device;
- char *inode_size;
-#endif
+ int port;
+ int rdma_port;
+ int online;
+ uint64_t block_size;
+ uint64_t total_inodes;
+ uint64_t free_inodes;
+ char *brick;
+ char *pid_str;
+ char *free;
+ char *total;
+ char *fs_name;
+ char *mount_options;
+ char *device;
+ char *inode_size;
+};
+
+struct snap_config_opt_vals_ {
+ char *op_name;
+ char *question;
};
typedef struct cli_volume_status cli_volume_status_t;
typedef struct cli_local cli_local_t;
-typedef ssize_t (*cli_serialize_t) (struct iovec outmsg, void *args);
+typedef ssize_t (*cli_serialize_t)(struct iovec outmsg, void *args);
extern struct cli_state *global_state; /* use only in readline callback */
-typedef const char *(*cli_selector_t) (void *wcon);
+extern struct rpc_clnt *global_quotad_rpc;
-void *cli_getunamb (const char *tok, void **choices, cli_selector_t sel);
+extern struct rpc_clnt *global_rpc;
-int cli_cmd_register (struct cli_cmd_tree *tree, struct cli_cmd *cmd);
-int cli_cmds_register (struct cli_state *state);
+extern rpc_clnt_prog_t *cli_rpc_prog;
-int cli_input_init (struct cli_state *state);
+typedef const char *(*cli_selector_t)(void *wcon);
-int cli_cmd_process (struct cli_state *state, int argc, char *argv[]);
-int cli_cmd_process_line (struct cli_state *state, const char *line);
+char *
+get_struct_variable(int mem_num, gf_gsync_status_t *sts_val);
-int cli_rl_enable (struct cli_state *state);
-int cli_rl_out (struct cli_state *state, const char *fmt, va_list ap);
-int cli_rl_err (struct cli_state *state, const char *fmt, va_list ap);
+void *
+cli_getunamb(const char *tok, void **choices, cli_selector_t sel);
-int cli_usage_out (const char *usage);
+int
+cli_cmd_register(struct cli_cmd_tree *tree, struct cli_cmd *cmd);
+int
+cli_cmds_register(struct cli_state *state);
-int _cli_out (const char *fmt, ...);
-int _cli_err (const char *fmt, ...);
+int
+cli_input_init(struct cli_state *state);
-#define cli_out(fmt...) do { \
- FMT_WARN (fmt); \
- \
- _cli_out(fmt); \
- \
- } while (0)
+int
+cli_cmd_process(struct cli_state *state, int argc, char *argv[]);
+int
+cli_cmd_process_line(struct cli_state *state, const char *line);
-#define cli_err(fmt...) do { \
- FMT_WARN (fmt); \
- \
- _cli_err(fmt); \
- \
- } while (0)
+int
+cli_rl_enable(struct cli_state *state);
+int
+cli_rl_out(struct cli_state *state, const char *fmt, va_list ap);
+int
+cli_rl_err(struct cli_state *state, const char *fmt, va_list ap);
int
-cli_submit_request (void *req, call_frame_t *frame,
- rpc_clnt_prog_t *prog,
- int procnum, struct iobref *iobref,
- xlator_t *this, fop_cbk_fn_t cbkfn, xdrproc_t xdrproc);
+cli_usage_out(const char *usage);
+
+int
+_cli_out(const char *fmt, ...);
+int
+_cli_err(const char *fmt, ...);
+
+#define cli_out(fmt...) \
+ do { \
+ FMT_WARN(fmt); \
+ \
+ _cli_out(fmt); \
+ \
+ } while (0)
+
+#define cli_err(fmt...) \
+ do { \
+ FMT_WARN(fmt); \
+ \
+ _cli_err(fmt); \
+ \
+ } while (0)
+
+#define usage() \
+ do { \
+ cli_out( \
+ " Usage: gluster [options] <help> <peer>" \
+ " <pool> <volume>\n" \
+ " Options:\n" \
+ " --help Shows the help information\n" \
+ " --version Shows the version\n" \
+ " --print-logdir Shows the log directory\n" \
+ " --print-statedumpdir Shows the state dump directory\n"); \
+ \
+ } while (0)
+
+int
+cli_submit_request(struct rpc_clnt *rpc, void *req, call_frame_t *frame,
+ rpc_clnt_prog_t *prog, int procnum, struct iobref *iobref,
+ xlator_t *this, fop_cbk_fn_t cbkfn, xdrproc_t xdrproc);
int32_t
-cli_cmd_volume_create_parse (const char **words, int wordcount,
- dict_t **options);
+cli_cmd_volume_create_parse(struct cli_state *state, const char **words,
+ int wordcount, dict_t **options, char **bricks);
int32_t
-cli_cmd_volume_reset_parse (const char **words, int wordcount, dict_t **opt);
+cli_cmd_volume_reset_parse(const char **words, int wordcount, dict_t **opt);
int32_t
-cli_cmd_gsync_set_parse (const char **words, int wordcount, dict_t **opt);
+cli_cmd_gsync_set_parse(struct cli_state *state, const char **words,
+ int wordcount, dict_t **opt, char **errstr);
int32_t
-cli_cmd_quota_parse (const char **words, int wordcount, dict_t **opt);
+cli_cmd_quota_parse(const char **words, int wordcount, dict_t **opt);
int32_t
-cli_cmd_volume_set_parse (const char **words, int wordcount,
- dict_t **options);
+cli_cmd_inode_quota_parse(const char **words, int wordcount, dict_t **opt);
int32_t
-cli_cmd_volume_add_brick_parse (const char **words, int wordcount,
- dict_t **options);
+cli_cmd_bitrot_parse(const char **words, int wordcount, dict_t **opt);
int32_t
-cli_cmd_volume_remove_brick_parse (const char **words, int wordcount,
- dict_t **options, int *question);
+cli_cmd_volume_set_parse(struct cli_state *state, const char **words,
+ int wordcount, dict_t **options, char **op_errstr);
int32_t
-cli_cmd_volume_replace_brick_parse (const char **words, int wordcount,
+cli_cmd_ganesha_parse(struct cli_state *state, const char **words,
+ int wordcount, dict_t **options, char **op_errstr);
+
+int32_t
+cli_cmd_get_state_parse(struct cli_state *state, const char **words,
+ int wordcount, dict_t **options, char **op_errstr);
+
+int32_t
+cli_cmd_volume_add_brick_parse(struct cli_state *state, const char **words,
+ int wordcount, dict_t **options, int *type);
+
+int32_t
+cli_cmd_volume_remove_brick_parse(struct cli_state *state, const char **words,
+ int wordcount, dict_t **options,
+ int *question, int *brick_count,
+ int32_t *command);
+
+int32_t
+cli_cmd_volume_replace_brick_parse(const char **words, int wordcount,
dict_t **options);
int32_t
-cli_cmd_log_rotate_parse (const char **words, int wordcount, dict_t **options);
+cli_cmd_volume_reset_brick_parse(const char **words, int wordcount,
+ dict_t **options);
+
int32_t
-cli_cmd_log_locate_parse (const char **words, int wordcount, dict_t **options);
+cli_cmd_log_rotate_parse(const char **words, int wordcount, dict_t **options);
int32_t
-cli_cmd_log_filename_parse (const char **words, int wordcount, dict_t **options);
+cli_cmd_log_locate_parse(const char **words, int wordcount, dict_t **options);
+int32_t
+cli_cmd_log_filename_parse(const char **words, int wordcount, dict_t **options);
int32_t
-cli_cmd_volume_statedump_options_parse (const char **words, int wordcount,
- dict_t **options);
+cli_cmd_volume_statedump_options_parse(const char **words, int wordcount,
+ dict_t **options);
int32_t
-cli_cmd_volume_clrlks_opts_parse (const char **words, int wordcount,
- dict_t **options);
+cli_cmd_volume_clrlks_opts_parse(const char **words, int wordcount,
+ dict_t **options);
-cli_local_t * cli_local_get ();
+cli_local_t *
+cli_local_get();
void
-cli_local_wipe (cli_local_t *local);
+cli_local_wipe(cli_local_t *local);
-int32_t
-cli_cmd_await_connected ();
+gf_boolean_t
+cli_cmd_connected();
int32_t
-cli_cmd_broadcast_connected ();
+cli_cmd_await_connected(unsigned timeout);
-int
-cli_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
- void *data);
+int32_t
+cli_cmd_broadcast_connected(gf_boolean_t status);
int
-cli_canonicalize_path (char *path);
+cli_rpc_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
+ void *data);
int32_t
-cli_cmd_volume_profile_parse (const char **words, int wordcount,
- dict_t **options);
+cli_cmd_volume_profile_parse(const char **words, int wordcount,
+ dict_t **options);
int32_t
-cli_cmd_volume_top_parse (const char **words, int wordcount,
- dict_t **options);
+cli_cmd_volume_top_parse(const char **words, int wordcount, dict_t **options);
int32_t
-cli_cmd_log_level_parse (const char **words, int wordcount,
- dict_t **options);
+cli_cmd_log_level_parse(const char **words, int wordcount, dict_t **options);
int32_t
-cli_cmd_volume_status_parse (const char **words, int wordcount,
- dict_t **options);
+cli_cmd_volume_status_parse(const char **words, int wordcount,
+ dict_t **options);
int
-cli_cmd_volume_heal_options_parse (const char **words, int wordcount,
- dict_t **options);
+cli_cmd_volume_heal_options_parse(const char **words, int wordcount,
+ dict_t **options);
int
-cli_print_brick_status (cli_volume_status_t *status);
+cli_cmd_volume_defrag_parse(const char **words, int wordcount,
+ dict_t **options);
+
+int
+cli_print_brick_status(cli_volume_status_t *status);
void
-cli_print_detailed_status (cli_volume_status_t *status);
+cli_print_detailed_status(cli_volume_status_t *status);
int
-cli_get_detail_status (dict_t *dict, int i, cli_volume_status_t *status);
+cli_get_detail_status(dict_t *dict, int i, cli_volume_status_t *status);
void
-cli_print_line (int len);
+cli_print_line(int len);
-#if (HAVE_LIB_XML)
int
-cli_xml_output_str (char *op, char *str, int op_ret, int op_errno,
+cli_xml_output_str(char *op, char *str, int op_ret, int op_errno,
+ char *op_errstr);
+
+int
+cli_xml_output_dict(char *op, dict_t *dict, int op_ret, int op_errno,
char *op_errstr);
int
-cli_xml_output_dict (char *op, dict_t *dict, int op_ret, int op_errno,
- char *op_errstr);
+cli_xml_output_vol_top(dict_t *dict, int op_ret, int op_errno, char *op_errstr);
+
+int
+cli_xml_output_vol_profile(dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr);
+
+int
+cli_xml_output_vol_status_begin(cli_local_t *local, int op_ret, int op_errno,
+ char *op_errstr);
int
-cli_xml_output_vol_top (dict_t *dict, int op_ret, int op_errno,
+cli_xml_output_vol_status_end(cli_local_t *local);
+
+int
+cli_xml_output_vol_status(cli_local_t *local, dict_t *dict);
+
+int
+cli_xml_output_vol_list(dict_t *dict, int op_ret, int op_errno,
char *op_errstr);
int
-cli_xml_output_vol_profile (dict_t *dict, int op_ret, int op_errno,
- char *op_errstr);
+cli_xml_output_vol_info_begin(cli_local_t *local, int op_ret, int op_errno,
+ char *op_errstr);
+
+int
+cli_xml_output_vol_info_end(cli_local_t *local);
int
-cli_xml_output_vol_status (dict_t *dict, int op_ret, int op_errno,
+cli_xml_output_vol_info(cli_local_t *local, dict_t *dict);
+
+int
+cli_xml_output_vol_quota_limit_list_begin(cli_local_t *local, int op_ret,
+ int op_errno, char *op_errstr);
+int
+cli_xml_output_vol_quota_limit_list_end(cli_local_t *local);
+
+int
+cli_quota_list_xml_error(cli_local_t *local, char *path, char *errstr);
+
+int
+cli_quota_xml_output(cli_local_t *local, char *path, int64_t hl_str,
+ char *sl_final, int64_t sl_num, int64_t used,
+ int64_t avail, char *sl, char *hl, gf_boolean_t limit_set);
+
+int
+cli_quota_object_xml_output(cli_local_t *local, char *path, char *sl_str,
+ int64_t sl_val, quota_limits_t *limits,
+ quota_meta_t *used_space, int64_t avail, char *sl,
+ char *hl, gf_boolean_t limit_set);
+
+int
+cli_xml_output_peer_status(dict_t *dict, int op_ret, int op_errno,
char *op_errstr);
int
-cli_xml_output_vol_list (dict_t *dict, int op_ret, int op_errno,
+cli_xml_output_vol_rebalance(gf_cli_defrag_type op, dict_t *dict, int op_ret,
+ int op_errno, char *op_errstr);
+
+int
+cli_xml_output_vol_remove_brick(gf_boolean_t status_op, dict_t *dict,
+ int op_ret, int op_errno, char *op_errstr,
+ const char *op);
+
+int
+cli_xml_output_vol_replace_brick(dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr);
+
+int
+cli_xml_output_vol_create(dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr);
+
+int
+cli_xml_output_generic_volume(char *op, dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr);
+
+int
+cli_xml_output_vol_gsync(dict_t *dict, int op_ret, int op_errno,
char *op_errstr);
+int
+cli_xml_output_vol_status_tasks_detail(cli_local_t *local, dict_t *dict);
int
-cli_xml_output_vol_info_begin (cli_local_t *local, int op_ret, int op_errno,
- char *op_errstr);
+cli_xml_snapshot_delete(cli_local_t *local, dict_t *dict, gf_cli_rsp *rsp);
int
-cli_xml_output_vol_info_end (cli_local_t *local);
+cli_xml_snapshot_begin_composite_op(cli_local_t *local);
int
-cli_xml_output_vol_info (cli_local_t *local, dict_t *dict);
+cli_xml_snapshot_end_composite_op(cli_local_t *local);
int
-cli_xml_output_vol_quota_limit_list (char *volname, char *limit_list,
- int op_ret, int op_errno,
- char *op_errstr);
+cli_xml_output_snap_delete_begin(cli_local_t *local, int op_ret, int op_errno,
+ char *op_errstr);
+int
+cli_xml_output_snap_delete_end(cli_local_t *local);
int
-cli_xml_output_peer_status (dict_t *dict, int op_ret, int op_errno,
- char *op_errstr);
-#endif
+cli_xml_output_snap_status_begin(cli_local_t *local, int op_ret, int op_errno,
+ char *op_errstr);
+int
+cli_xml_output_snap_status_end(cli_local_t *local);
+int
+cli_xml_output_snapshot(int cmd_type, dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr);
+int
+cli_xml_snapshot_status_single_snap(cli_local_t *local, dict_t *dict,
+ char *key);
+int32_t
+cli_cmd_snapshot_parse(const char **words, int wordcount, dict_t **options,
+ struct cli_state *state);
+
+int
+cli_xml_output_vol_getopts(dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr);
+
+void
+print_quota_list_header(int type);
+void
+print_quota_list_empty(char *path, int type);
+
+int
+gf_gsync_status_t_comparator(const void *p, const void *q);
#endif /* __CLI_H__ */
diff --git a/cli/src/input.c b/cli/src/input.c
index a88d358745a..5ac1a20edb1 100644
--- a/cli/src/input.c
+++ b/cli/src/input.c
@@ -1,107 +1,93 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdint.h>
#include <pthread.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "cli.h"
#include "cli-mem-types.h"
#define CMDBUFSIZ 1024
void *
-cli_batch (void *d)
+cli_batch(void *d)
{
- struct cli_state *state = NULL;
- int ret = 0;
+ struct cli_state *state = NULL;
+ int ret = 0;
- state = d;
+ state = d;
- ret = cli_cmd_process (state, state->argc, state->argv);
+ ret = cli_cmd_process(state, state->argc, state->argv);
- gf_log ("", GF_LOG_INFO, "Exiting with: %d", ret);
- exit (ret);
+ gf_log("", GF_LOG_INFO, "Exiting with: %d", ret);
+ exit(-ret);
- return NULL;
+ return NULL;
}
-
void *
-cli_input (void *d)
+cli_input(void *d)
{
- struct cli_state *state = NULL;
- int ret = 0;
- char cmdbuf[CMDBUFSIZ];
- char *cmd = NULL;
- size_t len = 0;
-
- state = d;
-
- for (;;) {
- printf ("%s", state->prompt);
-
- cmd = fgets (cmdbuf, CMDBUFSIZ, stdin);
- if (!cmd)
- break;
- len = strlen(cmd);
- if (len > 0 && cmd[len - 1] == '\n') //strip trailing \n
- cmd[len - 1] = '\0';
- ret = cli_cmd_process_line (state, cmd);
- if (ret == -1 && state->mode & GLUSTER_MODE_ERR_FATAL)
- break;
- }
-
- exit (ret);
-
- return NULL;
+ struct cli_state *state = NULL;
+ int ret = 0;
+ char cmdbuf[CMDBUFSIZ];
+ char *cmd = NULL;
+ size_t len = 0;
+
+ state = d;
+
+ fprintf(stderr,
+ "Welcome to gluster prompt, type 'help' to see the available "
+ "commands.\n");
+ for (;;) {
+ printf("%s", state->prompt);
+
+ cmd = fgets(cmdbuf, CMDBUFSIZ, stdin);
+ if (!cmd)
+ break;
+ len = strlen(cmd);
+ if (len > 0 && cmd[len - 1] == '\n') // strip trailing \n
+ cmd[len - 1] = '\0';
+ ret = cli_cmd_process_line(state, cmd);
+ if (ret != 0 && state->mode & GLUSTER_MODE_ERR_FATAL)
+ break;
+ }
+
+ exit(-ret);
+
+ return NULL;
}
-
int
-cli_input_init (struct cli_state *state)
+cli_input_init(struct cli_state *state)
{
- int ret = 0;
+ int ret = 0;
- if (state->argc) {
- ret = pthread_create (&state->input, NULL, cli_batch, state);
- return ret;
- }
+ if (state->argc) {
+ ret = pthread_create(&state->input, NULL, cli_batch, state);
+ return ret;
+ }
- if (isatty (STDIN_FILENO)) {
- state->prompt = "gluster> ";
+ if (isatty(STDIN_FILENO)) {
+ state->prompt = "gluster> ";
- cli_rl_enable (state);
- } else {
- state->prompt = "";
- state->mode = GLUSTER_MODE_SCRIPT | GLUSTER_MODE_ERR_FATAL;
- }
+ cli_rl_enable(state);
+ } else {
+ state->prompt = "";
+ state->mode |= GLUSTER_MODE_SCRIPT | GLUSTER_MODE_ERR_FATAL;
+ }
- if (!state->rl_enabled)
- ret = pthread_create (&state->input, NULL, cli_input, state);
+ if (!state->rl_enabled)
+ ret = pthread_create(&state->input, NULL, cli_input, state);
- return ret;
+ return ret;
}
diff --git a/cli/src/registry.c b/cli/src/registry.c
index fc3b2decdf1..85f7686ade1 100644
--- a/cli/src/registry.c
+++ b/cli/src/registry.c
@@ -1,27 +1,12 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <stdio.h>
#include <string.h>
@@ -30,20 +15,18 @@
#include "cli.h"
#include "cli-cmd.h"
-
static int
-__is_spc (int ch)
+__is_spc(int ch)
{
- if (ch == ' ')
- return 1;
- return 0;
+ if (ch == ' ')
+ return 1;
+ return 0;
}
-
static int
-__is_div (int ch)
+__is_div(int ch)
{
- switch (ch) {
+ switch (ch) {
case '(':
case ')':
case '<':
@@ -53,369 +36,356 @@ __is_div (int ch)
case '{':
case '}':
case '|':
- return 1;
- }
+ return 1;
+ }
- return 0;
+ return 0;
}
-
static int
-__is_word (const char *word)
+__is_word(const char *word)
{
- return (!__is_div (*word) && !__is_spc (*word));
+ return (!__is_div(*word) && !__is_spc(*word));
}
-
int
-counter_char (int ch)
+counter_char(int ch)
{
- switch (ch) {
+ switch (ch) {
case '(':
- return ')';
+ return ')';
case '<':
- return '>';
+ return '>';
case '[':
- return ']';
+ return ']';
case '{':
- return '}';
- }
+ return '}';
+ }
- return -1;
+ return -1;
}
-
const char *
-__is_template_balanced (const char *template)
+__is_template_balanced(const char *template)
{
- const char *trav = NULL;
- int ch = 0;
-
- trav = template;
-
- while (*trav) {
- ch = *trav;
-
- switch (ch) {
- case '<':
- case '(':
- case '[':
- trav = __is_template_balanced (trav+1);
- if (!trav)
- return NULL;
- if (*trav != counter_char (ch))
- return NULL;
- break;
- case '>':
- case ')':
- case ']':
- return trav;
- }
+ const char *trav = NULL;
+ int ch = 0;
- trav++;
+ trav = template;
+
+ while (*trav) {
+ ch = *trav;
+
+ switch (ch) {
+ case '<':
+ case '(':
+ case '[':
+ trav = __is_template_balanced(trav + 1);
+ if (!trav)
+ return NULL;
+ if (*trav != counter_char(ch))
+ return NULL;
+ break;
+ case '>':
+ case ')':
+ case ']':
+ return trav;
}
- return trav;
-}
+ trav++;
+ }
+ return trav;
+}
int
-is_template_balanced (const char *template)
+is_template_balanced(const char *template)
{
- const char *trav = NULL;
+ const char *trav = NULL;
- trav = __is_template_balanced (template);
- if (!trav || *trav)
- return -1;
+ trav = __is_template_balanced(template);
+ if (!trav || *trav)
+ return -1;
- return 0;
+ return 0;
}
-
int
-cli_cmd_token_count (const char *template)
+cli_cmd_token_count(const char *template)
{
- int count = 0;
- const char *trav = NULL;
- int is_alnum = 0;
-
- for (trav = template; *trav; trav++) {
- switch (*trav) {
- case '<':
- case '>':
- case '(':
- case ')':
- case '[':
- case ']':
- case '{':
- case '}':
- case '|':
- count++;
- /* fall through */
- case ' ':
- is_alnum = 0;
- break;
- default:
- if (!is_alnum) {
- is_alnum = 1;
- count++;
- }
+ int count = 0;
+ const char *trav = NULL;
+ int is_alnum = 0;
+
+ for (trav = template; *trav; trav++) {
+ switch (*trav) {
+ case '<':
+ case '>':
+ case '(':
+ case ')':
+ case '[':
+ case ']':
+ case '{':
+ case '}':
+ case '|':
+ count++;
+ /* fall through */
+ case ' ':
+ is_alnum = 0;
+ break;
+ default:
+ if (!is_alnum) {
+ is_alnum = 1;
+ count++;
}
}
+ }
- return count + 1;
+ return count + 1;
}
-
void
-cli_cmd_tokens_destroy (char **tokens)
+cli_cmd_tokens_destroy(char **tokens)
{
- char **tokenp = NULL;
+ char **tokenp = NULL;
- if (!tokens)
- return;
+ if (!tokens)
+ return;
- tokenp = tokens;
- while (*tokenp) {
- free (*tokenp);
- tokenp++;
- }
+ tokenp = tokens;
+ while (*tokenp) {
+ free(*tokenp);
+ tokenp++;
+ }
- free (tokens);
+ free(tokens);
}
-
int
-cli_cmd_tokens_fill (char **tokens, const char *template)
+cli_cmd_tokens_fill(char **tokens, const char *template)
{
- const char *trav = NULL;
- char **tokenp = NULL;
- char *token = NULL;
- int ret = 0;
- int ch = 0;
+ const char *trav = NULL;
+ char **tokenp = NULL;
+ char *token = NULL;
+ int ret = 0;
+ int ch = 0;
- tokenp = tokens;
+ tokenp = tokens;
- for (trav = template; *trav; trav++) {
- ch = *trav;
+ for (trav = template; *trav; trav++) {
+ ch = *trav;
- if (__is_spc (ch))
- continue;
+ if (__is_spc(ch))
+ continue;
- if (__is_div (ch)) {
- token = calloc (2, 1);
- if (!token)
- return -1;
- token[0] = ch;
+ if (__is_div(ch)) {
+ token = calloc(2, 1);
+ if (!token)
+ return -1;
+ token[0] = ch;
- *tokenp = token;
- tokenp++;
+ *tokenp = token;
+ tokenp++;
- continue;
- }
+ continue;
+ }
- token = strdup (trav);
- *tokenp = token;
- tokenp++;
+ token = strdup(trav);
+ *tokenp = token;
+ tokenp++;
- for (token++; *token; token++) {
- if (__is_spc (*token) || __is_div (*token)) {
- *token = 0;
- break;
- }
- trav++;
- }
+ for (token++; *token; token++) {
+ if (__is_spc(*token) || __is_div(*token)) {
+ *token = 0;
+ break;
+ }
+ trav++;
}
+ }
- return ret;
+ return ret;
}
-
char **
-cli_cmd_tokenize (const char *template)
+cli_cmd_tokenize(const char *template)
{
- char **tokens = NULL;
- int ret = 0;
- int count = 0;
+ char **tokens = NULL;
+ int ret = 0;
+ int count = 0;
- ret = is_template_balanced (template);
- if (ret)
- return NULL;
+ ret = is_template_balanced(template);
+ if (ret)
+ return NULL;
- count = cli_cmd_token_count (template);
- if (count <= 0)
- return NULL;
+ count = cli_cmd_token_count(template);
+ if (count <= 0)
+ return NULL;
- tokens = calloc (count + 1, sizeof (char *));
- if (!tokens)
- return NULL;
+ tokens = calloc(count + 1, sizeof(char *));
+ if (!tokens)
+ return NULL;
- ret = cli_cmd_tokens_fill (tokens, template);
- if (ret)
- goto err;
+ ret = cli_cmd_tokens_fill(tokens, template);
+ if (ret)
+ goto err;
- return tokens;
+ return tokens;
err:
- cli_cmd_tokens_destroy (tokens);
- return NULL;
+ cli_cmd_tokens_destroy(tokens);
+ return NULL;
}
void *
-cli_getunamb (const char *tok, void **choices, cli_selector_t sel)
+cli_getunamb(const char *tok, void **choices, cli_selector_t sel)
{
- void **wcon = NULL;
- char *w = NULL;
- unsigned mn = 0;
- void *ret = NULL;
-
- if (!choices || !tok || !*tok)
- return NULL;
-
- for (wcon = choices; *wcon; wcon++) {
- w = strtail ((char *)sel (*wcon), tok);
- if (!w)
- /* no match */
- continue;
- if (!*w)
- /* exact match */
- return *wcon;
-
- ret = *wcon;
- mn++;
- }
+ void **wcon = NULL;
+ char *w = NULL;
+ unsigned mn = 0;
+ void *ret = NULL;
-#ifdef FORCE_MATCH_EXACT
+ if (!choices || !tok || !*tok)
return NULL;
+
+ for (wcon = choices; *wcon; wcon++) {
+ w = strtail((char *)sel(*wcon), tok);
+ if (!w)
+ /* no match */
+ continue;
+ if (!*w)
+ /* exact match */
+ return *wcon;
+
+ ret = *wcon;
+ mn++;
+ }
+
+#ifdef FORCE_MATCH_EXACT
+ return NULL;
#else
- return (mn == 1) ? ret : NULL;
+ return (mn == 1) ? ret : NULL;
#endif
}
static const char *
-sel_cmd_word (void *wcon)
+sel_cmd_word(void *wcon)
{
- return ((struct cli_cmd_word *)wcon)->word;
+ return ((struct cli_cmd_word *)wcon)->word;
}
struct cli_cmd_word *
-cli_cmd_nextword (struct cli_cmd_word *word, const char *token)
+cli_cmd_nextword(struct cli_cmd_word *word, const char *token)
{
- return (struct cli_cmd_word *)cli_getunamb (token,
- (void **)word->nextwords,
- sel_cmd_word);
+ return (struct cli_cmd_word *)cli_getunamb(token, (void **)word->nextwords,
+ sel_cmd_word);
}
-
struct cli_cmd_word *
-cli_cmd_newword (struct cli_cmd_word *word, const char *token)
+cli_cmd_newword(struct cli_cmd_word *word, const char *token)
{
- struct cli_cmd_word **nextwords = NULL;
- struct cli_cmd_word *nextword = NULL;
+ struct cli_cmd_word **nextwords = NULL;
+ struct cli_cmd_word *nextword = NULL;
- nextwords = realloc (word->nextwords,
- (word->nextwords_cnt + 2) * sizeof (*nextwords));
- if (!nextwords)
- return NULL;
+ nextwords = realloc(word->nextwords,
+ (word->nextwords_cnt + 2) * sizeof(*nextwords));
+ if (!nextwords)
+ return NULL;
- word->nextwords = nextwords;
+ word->nextwords = nextwords;
- nextword = calloc (1, sizeof (*nextword));
- if (!nextword)
- return NULL;
+ nextword = calloc(1, sizeof(*nextword));
+ if (!nextword)
+ return NULL;
- nextword->word = strdup (token);
- if (!nextword->word) {
- free (nextword);
- return NULL;
- }
+ nextword->word = strdup(token);
+ if (!nextword->word) {
+ free(nextword);
+ return NULL;
+ }
- nextword->tree = word->tree;
- nextwords[word->nextwords_cnt++] = nextword;
- nextwords[word->nextwords_cnt] = NULL;
+ nextword->tree = word->tree;
+ nextwords[word->nextwords_cnt++] = nextword;
+ nextwords[word->nextwords_cnt] = NULL;
- return nextword;
+ return nextword;
}
-
int
-cli_cmd_ingest (struct cli_cmd_tree *tree, char **tokens, cli_cmd_cbk_t *cbkfn,
- const char *desc, const char *pattern)
+cli_cmd_ingest(struct cli_cmd_tree *tree, char **tokens, cli_cmd_cbk_t *cbkfn,
+ const char *desc, const char *pattern)
{
- int ret = 0;
- char **tokenp = NULL;
- char *token = NULL;
- struct cli_cmd_word *word = NULL;
- struct cli_cmd_word *next = NULL;
+ int ret = 0;
+ char **tokenp = NULL;
+ char *token = NULL;
+ struct cli_cmd_word *word = NULL;
+ struct cli_cmd_word *next = NULL;
- word = &tree->root;
+ word = &tree->root;
- for (tokenp = tokens; (token = *tokenp); tokenp++) {
- if (!__is_word (token))
- break;
+ for (tokenp = tokens; (token = *tokenp); tokenp++) {
+ if (!__is_word(token))
+ break;
- next = cli_cmd_nextword (word, token);
- if (!next)
- next = cli_cmd_newword (word, token);
-
- word = next;
- if (!word)
- break;
- }
+ next = cli_cmd_nextword(word, token);
+ if (!next)
+ next = cli_cmd_newword(word, token);
+ word = next;
if (!word)
- return -1;
+ break;
+ }
- if (word->cbkfn) {
- /* warning - command already registered */
- }
+ if (!word)
+ return -1;
- word->cbkfn = cbkfn;
- word->desc = desc;
- word->pattern = pattern;
+ if (word->cbkfn) {
+ /* warning - command already registered */
+ }
- /* end of static strings in command template */
+ word->cbkfn = cbkfn;
+ word->desc = desc;
+ word->pattern = pattern;
- /* TODO: autocompletion beyond this point is just "nice to have" */
+ /* end of static strings in command template */
- return ret;
-}
+ /* TODO: autocompletion beyond this point is just "nice to have" */
+ return ret;
+}
int
-cli_cmd_register (struct cli_cmd_tree *tree, struct cli_cmd *cmd)
+cli_cmd_register(struct cli_cmd_tree *tree, struct cli_cmd *cmd)
{
- char **tokens = NULL;
- int ret = 0;
+ char **tokens = NULL;
+ int ret = 0;
- GF_ASSERT (cmd);
+ GF_ASSERT(cmd);
- if (cmd->reg_cbk)
- cmd->reg_cbk (cmd);
+ if (cmd->reg_cbk)
+ cmd->reg_cbk(cmd);
- if (cmd->disable) {
- ret = 0;
- goto out;
- }
+ if (cmd->disable) {
+ ret = 0;
+ goto out;
+ }
- tokens = cli_cmd_tokenize (cmd->pattern);
- if (!tokens) {
- ret = -1;
- goto out;
- }
+ tokens = cli_cmd_tokenize(cmd->pattern);
+ if (!tokens) {
+ ret = -1;
+ goto out;
+ }
- ret = cli_cmd_ingest (tree, tokens, cmd->cbk, cmd->desc, cmd->pattern);
- if (ret) {
- ret = -1;
- goto out;
- }
+ ret = cli_cmd_ingest(tree, tokens, cmd->cbk, cmd->desc, cmd->pattern);
+ if (ret) {
+ ret = -1;
+ goto out;
+ }
- ret = 0;
+ ret = 0;
out:
- if (tokens)
- cli_cmd_tokens_destroy (tokens);
+ if (tokens)
+ cli_cmd_tokens_destroy(tokens);
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
}
-
diff --git a/configure.ac b/configure.ac
index 7825d8a24ee..e2d6fd66cec 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,157 +1,455 @@
-dnl Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
-
-dnl This file is part of GlusterFS.
-dnl
-dnl GlusterFS is free software; you can redistribute it and/or modify
-dnl it under the terms of the GNU General Public License as published by
-dnl the Free Software Foundation; either version 3 of the License, or
-dnl (at your option) any later version.
+dnl Copyright (c) 2006-2016 Red Hat, Inc. <http://www.redhat.com>
+dnl This file is part of GlusterFS.
dnl
-dnl GlusterFS is distributed in the hope that it will be useful,
-dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
-dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-dnl GNU General Public License for more details.
-dnl
-dnl You should have received a copy of the GNU General Public License
-dnl along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-AC_INIT([glusterfs],[3.3git],[gluster-users@gluster.org],,[https://github.com/gluster/glusterfs.git])
+dnl This file is licensed to you under your choice of the GNU Lesser
+dnl General Public License, version 3 or any later version (LGPLv3 or
+dnl later), or the GNU General Public License, version 2 (GPLv2), in all
+dnl cases as published by the Free Software Foundation.
-AM_INIT_AUTOMAKE
+AC_INIT([glusterfs],
+ [m4_esyscmd([build-aux/pkg-version --version])],
+ [gluster-users@gluster.org],,[https://github.com/gluster/glusterfs.git])
-m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES(yes)])
+AC_SUBST([PACKAGE_RELEASE],
+ [m4_esyscmd([build-aux/pkg-version --release])])
-if make --help 2>&1 | grep -q no-print-directory; then
- AM_MAKEFLAGS="$AM_MAKEFLAGS --no-print-directory";
-fi
+AM_INIT_AUTOMAKE([tar-pax foreign])
-if make --help 2>&1 | grep -q quiet; then
- AM_MAKEFLAGS="$AM_MAKEFLAGS --quiet"
-fi
+# Removes warnings when using automake 1.14 around (...but option 'subdir-objects' is disabled )
+#but libglusterfs fails to build with contrib (Then are not set up that way?)
+#AM_INIT_AUTOMAKE([subdir-objects])
-if libtool --help 2>&1 | grep -q quiet; then
- AM_LIBTOOLFLAGS="--quiet";
-fi
+m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES(yes)])
-AM_CONFIG_HEADER([config.h])
+AC_CONFIG_HEADERS([config.h site.h])
AC_CONFIG_FILES([Makefile
- libglusterfs/Makefile
- libglusterfs/src/Makefile
- glusterfsd/Makefile
- glusterfsd/src/Makefile
+ libglusterfs/Makefile
+ libglusterfs/src/Makefile
+ libglusterd/Makefile
+ libglusterd/src/Makefile
+ geo-replication/src/peer_gsec_create
+ geo-replication/src/peer_mountbroker
+ geo-replication/src/peer_mountbroker.py
+ geo-replication/src/peer_georep-sshkey.py
+ extras/peer_add_secret_pub
+ geo-replication/syncdaemon/conf.py
+ geo-replication/gsyncd.conf
+ extras/snap_scheduler/conf.py
+ glusterfsd/Makefile
+ glusterfsd/src/Makefile
rpc/Makefile
rpc/rpc-lib/Makefile
rpc/rpc-lib/src/Makefile
rpc/rpc-transport/Makefile
rpc/rpc-transport/socket/Makefile
rpc/rpc-transport/socket/src/Makefile
- rpc/rpc-transport/rdma/Makefile
- rpc/rpc-transport/rdma/src/Makefile
rpc/xdr/Makefile
rpc/xdr/src/Makefile
- xlators/Makefile
- xlators/mount/Makefile
- xlators/mount/fuse/Makefile
- xlators/mount/fuse/src/Makefile
- xlators/mount/fuse/utils/mount.glusterfs
- xlators/mount/fuse/utils/mount_glusterfs
- xlators/mount/fuse/utils/Makefile
- xlators/storage/Makefile
- xlators/storage/posix/Makefile
- xlators/storage/posix/src/Makefile
- xlators/cluster/Makefile
- xlators/cluster/afr/Makefile
- xlators/cluster/afr/src/Makefile
- xlators/cluster/stripe/Makefile
- xlators/cluster/stripe/src/Makefile
- xlators/cluster/dht/Makefile
- xlators/cluster/dht/src/Makefile
- xlators/performance/Makefile
- xlators/performance/write-behind/Makefile
- xlators/performance/write-behind/src/Makefile
- xlators/performance/read-ahead/Makefile
- xlators/performance/read-ahead/src/Makefile
- xlators/performance/io-threads/Makefile
- xlators/performance/io-threads/src/Makefile
- xlators/performance/io-cache/Makefile
- xlators/performance/io-cache/src/Makefile
- xlators/performance/symlink-cache/Makefile
- xlators/performance/symlink-cache/src/Makefile
- xlators/performance/quick-read/Makefile
- xlators/performance/quick-read/src/Makefile
+ xlators/Makefile
+ xlators/meta/Makefile
+ xlators/meta/src/Makefile
+ xlators/mount/Makefile
+ xlators/mount/fuse/Makefile
+ xlators/mount/fuse/src/Makefile
+ xlators/mount/fuse/utils/mount.glusterfs
+ xlators/mount/fuse/utils/mount_glusterfs
+ xlators/mount/fuse/utils/Makefile
+ xlators/storage/Makefile
+ xlators/storage/posix/Makefile
+ xlators/storage/posix/src/Makefile
+ xlators/cluster/Makefile
+ xlators/cluster/afr/Makefile
+ xlators/cluster/afr/src/Makefile
+ xlators/cluster/dht/Makefile
+ xlators/cluster/dht/src/Makefile
+ xlators/cluster/ec/Makefile
+ xlators/cluster/ec/src/Makefile
+ xlators/performance/Makefile
+ xlators/performance/write-behind/Makefile
+ xlators/performance/write-behind/src/Makefile
+ xlators/performance/read-ahead/Makefile
+ xlators/performance/read-ahead/src/Makefile
+ xlators/performance/readdir-ahead/Makefile
+ xlators/performance/readdir-ahead/src/Makefile
+ xlators/performance/io-threads/Makefile
+ xlators/performance/io-threads/src/Makefile
+ xlators/performance/io-cache/Makefile
+ xlators/performance/io-cache/src/Makefile
+ xlators/performance/quick-read/Makefile
+ xlators/performance/quick-read/src/Makefile
+ xlators/performance/open-behind/Makefile
+ xlators/performance/open-behind/src/Makefile
xlators/performance/md-cache/Makefile
xlators/performance/md-cache/src/Makefile
- xlators/debug/Makefile
- xlators/debug/trace/Makefile
- xlators/debug/trace/src/Makefile
- xlators/debug/error-gen/Makefile
- xlators/debug/error-gen/src/Makefile
- xlators/debug/io-stats/Makefile
- xlators/debug/io-stats/src/Makefile
- xlators/protocol/Makefile
- xlators/protocol/auth/Makefile
- xlators/protocol/auth/addr/Makefile
- xlators/protocol/auth/addr/src/Makefile
- xlators/protocol/auth/login/Makefile
- xlators/protocol/auth/login/src/Makefile
- xlators/protocol/client/Makefile
- xlators/protocol/client/src/Makefile
- xlators/protocol/server/Makefile
- xlators/protocol/server/src/Makefile
- xlators/features/Makefile
- xlators/features/locks/Makefile
- xlators/features/locks/src/Makefile
- xlators/features/quota/Makefile
- xlators/features/quota/src/Makefile
+ xlators/performance/nl-cache/Makefile
+ xlators/performance/nl-cache/src/Makefile
+ xlators/debug/Makefile
+ xlators/debug/sink/Makefile
+ xlators/debug/sink/src/Makefile
+ xlators/debug/trace/Makefile
+ xlators/debug/trace/src/Makefile
+ xlators/debug/error-gen/Makefile
+ xlators/debug/error-gen/src/Makefile
+ xlators/debug/delay-gen/Makefile
+ xlators/debug/delay-gen/src/Makefile
+ xlators/debug/io-stats/Makefile
+ xlators/debug/io-stats/src/Makefile
+ xlators/protocol/Makefile
+ xlators/protocol/auth/Makefile
+ xlators/protocol/auth/addr/Makefile
+ xlators/protocol/auth/addr/src/Makefile
+ xlators/protocol/auth/login/Makefile
+ xlators/protocol/auth/login/src/Makefile
+ xlators/protocol/client/Makefile
+ xlators/protocol/client/src/Makefile
+ xlators/protocol/server/Makefile
+ xlators/protocol/server/src/Makefile
+ xlators/features/Makefile
+ xlators/features/arbiter/Makefile
+ xlators/features/arbiter/src/Makefile
+ xlators/features/thin-arbiter/Makefile
+ xlators/features/thin-arbiter/src/Makefile
+ xlators/features/changelog/Makefile
+ xlators/features/changelog/src/Makefile
+ xlators/features/changelog/lib/Makefile
+ xlators/features/changelog/lib/src/Makefile
+ xlators/features/locks/Makefile
+ xlators/features/locks/src/Makefile
+ xlators/features/quota/Makefile
+ xlators/features/quota/src/Makefile
xlators/features/marker/Makefile
xlators/features/marker/src/Makefile
- xlators/features/marker/utils/Makefile
- xlators/features/marker/utils/src/Makefile
- xlators/features/marker/utils/syncdaemon/Makefile
- xlators/features/read-only/Makefile
- xlators/features/read-only/src/Makefile
- xlators/features/mac-compat/Makefile
- xlators/features/mac-compat/src/Makefile
- xlators/features/quiesce/Makefile
- xlators/features/quiesce/src/Makefile
+ xlators/features/selinux/Makefile
+ xlators/features/selinux/src/Makefile
+ xlators/features/sdfs/Makefile
+ xlators/features/sdfs/src/Makefile
+ xlators/features/read-only/Makefile
+ xlators/features/read-only/src/Makefile
+ xlators/features/compress/Makefile
+ xlators/features/compress/src/Makefile
+ xlators/features/namespace/Makefile
+ xlators/features/namespace/src/Makefile
+ xlators/features/quiesce/Makefile
+ xlators/features/quiesce/src/Makefile
+ xlators/features/barrier/Makefile
+ xlators/features/barrier/src/Makefile
xlators/features/index/Makefile
xlators/features/index/src/Makefile
- xlators/encryption/Makefile
- xlators/encryption/rot-13/Makefile
- xlators/encryption/rot-13/src/Makefile
+ xlators/features/gfid-access/Makefile
+ xlators/features/gfid-access/src/Makefile
+ xlators/features/trash/Makefile
+ xlators/features/trash/src/Makefile
+ xlators/features/snapview-server/Makefile
+ xlators/features/snapview-server/src/Makefile
+ xlators/features/snapview-client/Makefile
+ xlators/features/snapview-client/src/Makefile
+ xlators/features/upcall/Makefile
+ xlators/features/upcall/src/Makefile
+ xlators/features/shard/Makefile
+ xlators/features/shard/src/Makefile
+ xlators/features/bit-rot/Makefile
+ xlators/features/bit-rot/src/Makefile
+ xlators/features/bit-rot/src/stub/Makefile
+ xlators/features/bit-rot/src/bitd/Makefile
+ xlators/features/leases/Makefile
+ xlators/features/leases/src/Makefile
+ xlators/features/cloudsync/Makefile
+ xlators/features/cloudsync/src/Makefile
+ xlators/features/utime/Makefile
+ xlators/features/utime/src/Makefile
+ xlators/features/cloudsync/src/cloudsync-plugins/Makefile
+ xlators/features/cloudsync/src/cloudsync-plugins/src/Makefile
+ xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/Makefile
+ xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/Makefile
+ xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/Makefile
+ xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/Makefile
+ xlators/features/metadisp/Makefile
+ xlators/features/metadisp/src/Makefile
+ xlators/playground/Makefile
+ xlators/playground/template/Makefile
+ xlators/playground/template/src/Makefile
xlators/system/Makefile
xlators/system/posix-acl/Makefile
xlators/system/posix-acl/src/Makefile
+ xlators/nfs/Makefile
+ xlators/nfs/server/Makefile
+ xlators/nfs/server/src/Makefile
+ xlators/mgmt/Makefile
+ xlators/mgmt/glusterd/Makefile
+ xlators/mgmt/glusterd/src/Makefile
cli/Makefile
cli/src/Makefile
- doc/Makefile
- extras/Makefile
- extras/init.d/Makefile
- extras/init.d/glusterd.plist
+ doc/Makefile
+ extras/Makefile
+ extras/glusterd.vol
+ extras/cliutils/Makefile
+ extras/init.d/Makefile
+ extras/init.d/glusterd.plist
extras/init.d/glusterd-Debian
extras/init.d/glusterd-Redhat
+ extras/init.d/glusterd-FreeBSD
extras/init.d/glusterd-SuSE
- extras/benchmarking/Makefile
+ extras/init.d/glustereventsd-Debian
+ extras/init.d/glustereventsd-Redhat
+ extras/init.d/glustereventsd-FreeBSD
+ extras/ganesha/Makefile
+ extras/ganesha/config/Makefile
+ extras/ganesha/scripts/Makefile
+ extras/ganesha/ocf/Makefile
+ extras/systemd/Makefile
+ extras/systemd/glusterd.service
+ extras/systemd/glustereventsd.service
+ extras/systemd/glusterfssharedstorage.service
+ extras/systemd/gluster-ta-volume.service
+ extras/run-gluster.tmpfiles
+ extras/benchmarking/Makefile
extras/hook-scripts/Makefile
+ extras/ocf/Makefile
+ extras/ocf/glusterd
+ extras/ocf/volume
+ extras/LinuxRPM/Makefile
+ extras/geo-rep/Makefile
+ extras/geo-rep/schedule_georep.py
+ extras/firewalld/Makefile
+ extras/hook-scripts/add-brick/Makefile
+ extras/hook-scripts/add-brick/pre/Makefile
+ extras/hook-scripts/add-brick/post/Makefile
+ extras/hook-scripts/create/Makefile
+ extras/hook-scripts/create/post/Makefile
+ extras/hook-scripts/delete/Makefile
+ extras/hook-scripts/delete/pre/Makefile
extras/hook-scripts/start/Makefile
extras/hook-scripts/start/post/Makefile
+ extras/hook-scripts/set/Makefile
+ extras/hook-scripts/set/post/Makefile
extras/hook-scripts/stop/Makefile
extras/hook-scripts/stop/pre/Makefile
- contrib/fuse-util/Makefile
- contrib/uuid/uuid_types.h
- xlators/nfs/Makefile
- xlators/nfs/server/Makefile
- xlators/nfs/server/src/Makefile
- xlators/mgmt/Makefile
- xlators/mgmt/glusterd/Makefile
- xlators/mgmt/glusterd/src/Makefile
- glusterfs.spec])
+ extras/hook-scripts/reset/Makefile
+ extras/hook-scripts/reset/post/Makefile
+ extras/hook-scripts/reset/pre/Makefile
+ extras/python/Makefile
+ extras/snap_scheduler/Makefile
+ events/Makefile
+ events/src/Makefile
+ events/src/eventsapiconf.py
+ events/tools/Makefile
+ contrib/fuse-util/Makefile
+ contrib/umountd/Makefile
+ glusterfs-api.pc
+ libgfchangelog.pc
+ api/Makefile
+ api/src/Makefile
+ api/examples/Makefile
+ geo-replication/Makefile
+ geo-replication/src/Makefile
+ geo-replication/syncdaemon/Makefile
+ tools/Makefile
+ tools/gfind_missing_files/Makefile
+ heal/Makefile
+ heal/src/Makefile
+ glusterfs.spec
+ tools/glusterfind/src/tool.conf
+ tools/glusterfind/glusterfind
+ tools/glusterfind/Makefile
+ tools/glusterfind/src/Makefile
+ tools/setgfid2path/Makefile
+ tools/setgfid2path/src/Makefile])
AC_CANONICAL_HOST
AC_PROG_CC
+AC_DISABLE_STATIC
AC_PROG_LIBTOOL
+AC_SUBST([shrext_cmds])
+
+AC_CHECK_PROG([RPCGEN], [rpcgen], [yes], [no])
+
+if test "x$RPCGEN" = "xno"; then
+ AC_MSG_ERROR([`rpcgen` not found, glusterfs needs `rpcgen` exiting..])
+fi
+
+# Initialize CFLAGS before usage
+AC_ARG_ENABLE([debug],
+ AC_HELP_STRING([--enable-debug],
+ [Enable debug build options.]))
+if test "x$enable_debug" = "xyes"; then
+ BUILD_DEBUG=yes
+ GF_CFLAGS="${GF_CFLAGS} -g -O0 -DDEBUG"
+else
+ BUILD_DEBUG=no
+fi
+
+SANITIZER=none
+
+AC_ARG_ENABLE([asan],
+ AC_HELP_STRING([--enable-asan],
+ [Enable Address Sanitizer support]))
+if test "x$enable_asan" = "xyes"; then
+ SANITIZER=asan
+ AC_CHECK_LIB([asan], [__asan_init], ,
+ [AC_MSG_ERROR([--enable-asan requires libasan.so, exiting])])
+ GF_CFLAGS="${GF_CFLAGS} -O2 -g -fsanitize=address -fno-omit-frame-pointer"
+ GF_LDFLAGS="${GF_LDFLAGS} -lasan"
+fi
+
+AC_ARG_ENABLE([tsan],
+ AC_HELP_STRING([--enable-tsan],
+ [Enable Thread Sanitizer support]))
+if test "x$enable_tsan" = "xyes"; then
+ if test "x$SANITIZER" != "xnone"; then
+ AC_MSG_ERROR([only one sanitizer can be enabled at once])
+ fi
+ SANITIZER=tsan
+ AC_CHECK_LIB([tsan], [__tsan_init], ,
+ [AC_MSG_ERROR([--enable-tsan requires libtsan.so, exiting])])
+ if test "x$ac_cv_lib_tsan___tsan_init" = xyes; then
+ AC_MSG_CHECKING([whether tsan API can be used])
+ saved_CFLAGS=${CFLAGS}
+ CFLAGS="${CFLAGS} -fsanitize=thread"
+ AC_COMPILE_IFELSE(
+ [AC_LANG_PROGRAM([
+ [#include <sanitizer/tsan_interface.h>]],
+ [[__tsan_create_fiber(0)]])],
+ [TSAN_API=yes], [TSAN_API=no])
+ AC_MSG_RESULT([$TSAN_API])
+ if test x$TSAN_API = "xyes"; then
+ AC_DEFINE(HAVE_TSAN_API, 1, [Define if tsan API can be used.])
+ fi
+ CFLAGS=${saved_CFLAGS}
+ fi
+ GF_CFLAGS="${GF_CFLAGS} -O2 -g -fsanitize=thread -fno-omit-frame-pointer"
+ GF_LDFLAGS="${GF_LDFLAGS} -ltsan"
+fi
+
+AC_ARG_ENABLE([ubsan],
+ AC_HELP_STRING([--enable-ubsan],
+ [Enable Undefined Behavior Sanitizer support]))
+if test "x$enable_ubsan" = "xyes"; then
+ if test "x$SANITIZER" != "xnone"; then
+ AC_MSG_ERROR([only one sanitizer can be enabled at once])
+ fi
+ SANITIZER=ubsan
+ AC_CHECK_LIB([ubsan], [__ubsan_default_options], ,
+ [AC_MSG_ERROR([--enable-ubsan requires libubsan.so, exiting])])
+ GF_CFLAGS="${GF_CFLAGS} -O2 -g -fsanitize=undefined -fno-omit-frame-pointer"
+ GF_LDFLAGS="${GF_LDFLAGS} -lubsan"
+fi
+
+# Initialize CFLAGS before usage
+BUILD_TCMALLOC=no
+AC_ARG_ENABLE([tcmalloc],
+ AC_HELP_STRING([--enable-tcmalloc],
+ [Enable linking with tcmalloc library.]))
+if test "x$enable_tcmalloc" = "xyes"; then
+ BUILD_TCMALLOC=yes
+ GF_CFLAGS="${GF_CFLAGS} -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free"
+ AC_CHECK_LIB([tcmalloc], [malloc], [],
+ [AC_MSG_ERROR([when --enable-tcmalloc is used, tcmalloc library needs to be present])])
+ GF_LDFLAGS="-ltcmalloc ${GF_LDFLAGS}"
+fi
+
+
+dnl When possible, prefer libtirpc over glibc rpc.
+dnl
+dnl On newer linux with only libtirpc, use libtirpc. (Specifying
+dnl --without-libtirpc is an error.)
+dnl
+dnl on older linux with glibc rpc and WITH libtirpc, use libtirpc
+dnl by default except when configured with --without-libtirpc.
+dnl
+dnl on old linux with glibc rpc and WITHOUT libtirpc, default to
+dnl use glibc rpc.
+dnl
+AC_ARG_WITH([libtirpc],
+ [AC_HELP_STRING([--without-libtirpc], [Use legacy glibc RPC.])],
+ [with_libtirpc="no"], [with_libtirpc="yes"])
+
+dnl ipv6-default is off by default
+dnl
+dnl ipv6-default requires libtirpc. (glibc rpc does not support IPv6.)
+dnl ipv6-default can only be enabled if libtipc is enabled.
+dnl
+AC_ARG_WITH([ipv6-default],
+ AC_HELP_STRING([--with-ipv6-default], [Set IPv6 as default.]),
+ [with_ipv6_default=${with_libtirpc}], [with_ipv6_default="no"])
+
+AC_CHECK_FILE([/etc/centos-release])
+if test "x$ac_cv_file__etc_centos_release" = "xyes"; then
+ if grep "release 6" /etc/centos-release; then
+ with_ipv6_default="no"
+ fi
+fi
+
+dnl On some distributions '-ldl' isn't automatically added to LIBS
+AC_CHECK_LIB([dl], [dlopen], [LIB_DL=-ldl])
+AC_SUBST(LIB_DL)
+
+AC_ARG_ENABLE([privport_tracking],
+ AC_HELP_STRING([--disable-privport_tracking],
+ [Disable internal tracking of privileged ports.]))
+TRACK_PRIVPORTS="yes"
+if test x"$enable_privport_tracking" = x"no"; then
+ TRACK_PRIVPORTS="no"
+ AC_DEFINE(GF_DISABLE_PRIVPORT_TRACKING, 1,
+ [Disable internal tracking of privileged ports.])
+fi
+
+case $host_os in
+ darwin*)
+ if ! test "`/usr/bin/sw_vers | grep ProductVersion: | cut -f 2 | cut -d. -f2`" -ge 7; then
+ AC_MSG_ERROR([You need at least OS X 10.7 (Lion) to build Glusterfs])
+ fi
+ # OSX version lesser than 9 has llvm/clang optimization issues which leads to various segfaults
+ if test "`/usr/bin/sw_vers | grep ProductVersion: | cut -f 2 | cut -d. -f2`" -lt 9; then
+ GF_CFLAGS="${GF_CFLAGS} -g -O0 -DDEBUG"
+ fi
+ ;;
+esac
+
+# --enable-valgrind prevents calling dlclose(), this leaks memory
+AC_ARG_ENABLE([valgrind],
+ AC_HELP_STRING([--enable-valgrind@<:@=memcheck,drd@:>@],
+ [Enable valgrind for resource leak (memcheck, which is
+ the default) or thread synchronization (drd) debugging.]))
+case x$enable_valgrind in
+ xmemcheck|xyes)
+ AC_DEFINE(RUN_WITH_MEMCHECK, 1,
+ [Define if all processes should run under 'valgrind --tool=memcheck'.])
+ VALGRIND_TOOL=memcheck
+ ;;
+ xdrd)
+ AC_DEFINE(RUN_WITH_DRD, 1,
+ [Define if all processes should run under 'valgrind --tool=drd'.])
+ VALGRIND_TOOL=drd
+ ;;
+ x|xno)
+ VALGRIND_TOOL=no
+ ;;
+ *)
+ AC_MSG_ERROR([Please specify --enable-valgrind@<:@=memcheck,drd@:>@])
+ ;;
+esac
+
+AC_ARG_WITH([previous-options],
+ [AS_HELP_STRING([--with-previous-options],
+ [read config.status for configure options])
+ ],
+ [ if test -r ./config.status && \
+ args=$(grep 'ac_cs_config=' config.status | \
+ sed -e 's/.*"\(.*\)".*/\1/' -e "s/'//g" -e "s/--with-previous-options//g") ; then
+ echo "###"
+ echo "### Rerunning as '$0 $args'"
+ echo "###"
+ exec $0 $args
+ fi
+ ])
+
+AC_ARG_WITH(pkgconfigdir,
+ [ --with-pkgconfigdir=DIR pkgconfig file in DIR @<:@LIBDIR/pkgconfig@:>@],
+ [pkgconfigdir=$withval],
+ [pkgconfigdir='${libdir}/pkgconfig'])
+AC_SUBST(pkgconfigdir)
AC_ARG_WITH(mountutildir,
[ --with-mountutildir=DIR mount helper utility in DIR @<:@/sbin@:>@],
@@ -159,6 +457,13 @@ AC_ARG_WITH(mountutildir,
[mountutildir='/sbin'])
AC_SUBST(mountutildir)
+AC_ARG_WITH(systemddir,
+ [ --with-systemddir=DIR systemd service files in DIR @<:@PREFIX/lib/systemd/system@:>@],
+ [systemddir=$withval],
+ [systemddir='${prefix}/lib/systemd/system'])
+AC_SUBST(systemddir)
+AM_CONDITIONAL([USE_SYSTEMD], test [ -d '/usr/lib/systemd/system' ])
+
AC_ARG_WITH(initdir,
[ --with-initdir=DIR init.d scripts in DIR @<:@/etc/init.d@:>@],
[initdir=$withval],
@@ -171,6 +476,27 @@ AC_ARG_WITH(launchddir,
[launchddir='/Library/LaunchDaemons'])
AC_SUBST(launchddir)
+AC_ARG_WITH(tmpfilesdir,
+ AC_HELP_STRING([--with-tmpfilesdir=DIR],
+ [tmpfiles config in DIR, disabled by default]),
+ [tmpfilesdir=$withval],
+ [tmpfilesdir=''])
+AC_SUBST(tmpfilesdir)
+
+AC_ARG_WITH([ocf],
+ [AS_HELP_STRING([--without-ocf], [build OCF-compliant cluster resource agents])],
+ ,
+ [OCF_SUBDIR='ocf'],
+ )
+AC_SUBST(OCF_SUBDIR)
+
+AC_ARG_WITH([server],
+ [AS_HELP_STRING([--without-server], [do not build server components])],
+ [with_server='no'],
+ [with_server='yes'],
+ )
+AM_CONDITIONAL([WITH_SERVER], [test x$with_server = xyes])
+
# LEX needs a check
AC_PROG_LEX
if test "x${LEX}" != "xflex" -a "x${FLEX}" != "xlex"; then
@@ -207,20 +533,110 @@ AC_CHECK_LIB([pthread], [pthread_mutex_init], , AC_MSG_ERROR([Posix threads libr
AC_CHECK_FUNC([dlopen], [has_dlopen=yes], AC_CHECK_LIB([dl], [dlopen], , AC_MSG_ERROR([Dynamic linking library required to build glusterfs])))
+AC_CHECK_LIB([readline], [rl_do_undo], [RL_UNDO="yes"], [RL_UNDO="no"])
+
+AC_CHECK_LIB([intl], [gettext])
AC_CHECK_HEADERS([sys/xattr.h])
+AC_CHECK_HEADERS([sys/ioctl.h], AC_DEFINE(HAVE_IOCTL_IN_SYS_IOCTL_H, 1, [have sys/ioctl.h]))
+
AC_CHECK_HEADERS([sys/extattr.h])
-AC_CHECK_HEADERS([openssl/md5.h])
+AC_CHECK_HEADERS([openssl/dh.h])
+
+AC_CHECK_HEADERS([openssl/ecdh.h])
+
+AC_CHECK_LIB([ssl], [SSL_CTX_get0_param], [AC_DEFINE([HAVE_SSL_CTX_GET0_PARAM], [1], [define if found OpenSSL SSL_CTX_get0_param])])
+
+dnl Math library
+AC_CHECK_LIB([m], [pow], [MATH_LIB='-lm'], [MATH_LIB=''])
+AC_SUBST(MATH_LIB)
+
+dnl depend on libuuid.so
+PKG_CHECK_MODULES([UUID], [uuid],
+ [have_uuid=yes
+ AC_DEFINE(HAVE_LIBUUID, 1, [have libuuid.so])
+ PKGCONFIG_UUID=uuid],
+ [have_uuid=no])
+AM_CONDITIONAL([HAVE_LIBUUID], [test x$have_uuid = xyes])
+
+dnl older version of libuuid (from e2fsprogs) require including uuid/uuid.h
+saved_CFLAGS=${CFLAGS}
+CFLAGS="${CFLAGS} ${UUID_CFLAGS}"
+AC_CHECK_HEADER([uuid.h], [], [AC_CHECK_HEADER([uuid/uuid.h])],
+ [[#if HAVE_UUID_H
+ #include <uuid.h>
+ #endif
+ ]])
+CFLAGS=${saved_CFLAGS}
+if test "x$ac_cv_header_uuid_uuid_h" = "xyes"; then
+ UUID_CFLAGS="${UUID_CFLAGS} -I$(pkg-config --variable=includedir uuid)/uuid"
+ have_uuid=yes
+fi
+if test "x$have_uuid" != "xyes"; then
+ case $host_os in
+ *freebsd*)
+ AC_MSG_ERROR([e2fsprogs-libuuid is required to build glusterfs])
+ ;;
+ linux*)
+ AC_MSG_ERROR([libuuid is required to build glusterfs])
+ ;;
+ *)
+ AC_MSG_ERROR([a Linux compatible libuuid is required to build glusterfs])
+ ;;
+ esac
+fi
+
+dnl libglusterfs needs uuid.h, practically everything depends on it
+GF_CFLAGS="${GF_CFLAGS} ${UUID_CFLAGS}"
+dnl PKGCONFIG_UUID is used for the dependency in *.pc.in files
+AC_SUBST(PKGCONFIG_UUID)
+
+dnl NetBSD does not support POSIX ACLs :-(
case $host_os in
- darwin*)
- if ! test "`/usr/bin/sw_vers | grep ProductVersion: | cut -f 2 | cut -d. -f2`" -ge 5; then
- AC_MSG_ERROR([You need at least OS X 10.5 (Leopard) to build Glusterfs])
- fi
- ;;
+ *netbsd* | darwin*)
+ AC_MSG_WARN([platform does not support POSIX ACLs... disabling them])
+ ACL_LIBS=''
+ USE_POSIX_ACLS='0'
+ BUILD_POSIX_ACLS='no'
+ ;;
+ *)
+ AC_CHECK_HEADERS([sys/acl.h], ,
+ AC_MSG_ERROR([Support for POSIX ACLs is required]))
+ USE_POSIX_ACLS='1'
+ BUILD_POSIX_ACLS='yes'
+ case $host_os in
+ linux*)
+ ACL_LIBS='-lacl'
+ ;;
+ solaris*)
+ ACL_LIBS='-lsec'
+ ;;
+ *freebsd*)
+ ACL_LIBS='-lc'
+ ;;
+ darwin*)
+ ACL_LIBS='-lc'
+ ;;
+ esac
+ if test "x${ACL_LIBS}" = "x-lacl"; then
+ AC_CHECK_HEADERS([acl/libacl.h], , AC_MSG_ERROR([libacl is required for building on ${host_os}]))
+ fi
+ ;;
esac
+AC_SUBST(ACL_LIBS)
+AC_SUBST(USE_POSIX_ACLS)
+
+# libglusterfs/checksum
+AC_CHECK_HEADERS([openssl/md5.h])
+AC_CHECK_LIB([z], [adler32], [ZLIB_LIBS="-lz"], AC_MSG_ERROR([zlib is required to build glusterfs]))
+AC_SUBST(ZLIB_LIBS)
+
+AC_CHECK_HEADERS([linux/falloc.h])
+
+AC_CHECK_HEADERS([linux/oom.h], AC_DEFINE(HAVE_LINUX_OOM_H, 1, [have linux/oom.h]))
dnl Mac OS X does not have spinlocks
AC_CHECK_FUNC([pthread_spin_init], [have_spinlock=yes])
@@ -244,11 +660,68 @@ if test "x${have_setfsuid}" = "xyes" -a "x${have_setfsgid}" = "xyes"; then
AC_DEFINE(HAVE_SET_FSID, 1, [define if found setfsuid setfsgid])
fi
+dnl test umount2 function
+AC_CHECK_FUNC([umount2], [have_umount2=yes])
+
+if test "x${have_umount2}" = "xyes"; then
+ AC_DEFINE(HAVE_UMOUNT2, 1, [define if found umount2])
+fi
+
+dnl Check Python Availability
+have_python=no
+dnl if the user has not specified a python, pick one
+if test -z "${PYTHON}"; then
+ case $host_os in
+ freebsd*)
+ if test -x /usr/local/bin/python3; then
+ PYTHON=/usr/local/bin/python3
+ else
+ PYTHON=/usr/local/bin/python2
+ fi
+ ;;
+ *)
+ if test -x /usr/bin/python3; then
+ PYTHON=/usr/bin/python3
+ else
+ PYTHON=/usr/bin/python2
+ fi
+ ;;
+ esac
+fi
+AM_PATH_PYTHON([2.6],,[:])
+if test -n "${PYTHON}"; then
+ have_python=yes
+fi
+AM_CONDITIONAL(HAVE_PYTHON, test "x$have_python" = "xyes")
+
+dnl Use pkg-config to get runtime search path missing from ${PYTHON}-config
+dnl Just do "true" on failure so that configure does not bail out
+dnl Note: python 2.6's devel pkg (e.g. in CentOS/RHEL 6) does not have
+dnl pkg-config files, so this work-around instead
+if test "x${PYTHON_VERSION}" = "x2.6"; then
+ PYTHON_CFLAGS=$(python-config --includes)
+ PYTHON_LIBS=$(python-config --libs)
+else
+ PKG_CHECK_MODULES([PYTHON], "python-${PYTHON_VERSION}",,true)
+fi
+
+PYTHON_CFLAGS=$(echo ${PYTHON_CFLAGS} | sed -e 's|-I|-isystem |')
+
+BUILD_PYTHON_SITE_PACKAGES=${pythondir}
+AC_SUBST(BUILD_PYTHON_SITE_PACKAGES)
+
+# Eval two times to expand fully. First eval replaces $exec_prefix into $prefix
+# Second eval will expand $prefix
+build_python_site_packages_temp="${pythondir}"
+eval build_python_site_packages_temp=\"${build_python_site_packages_temp}\"
+eval build_python_site_packages_temp=\"${build_python_site_packages_temp}\"
+BUILD_PYTHON_SITE_PACKAGES_EXPANDED=${build_python_site_packages_temp}
+AC_SUBST(BUILD_PYTHON_SITE_PACKAGES_EXPANDED)
# FUSE section
AC_ARG_ENABLE([fuse-client],
- AC_HELP_STRING([--disable-fuse-client],
- [Do not build the fuse client. NOTE: you cannot mount glusterfs without the client]))
+ AC_HELP_STRING([--disable-fuse-client],
+ [Do not build the fuse client. NOTE: you cannot mount glusterfs without the client]))
BUILD_FUSE_CLIENT=no
if test "x$enable_fuse_client" != "xno"; then
@@ -257,77 +730,64 @@ if test "x$enable_fuse_client" != "xno"; then
fi
AC_SUBST(FUSE_CLIENT_SUBDIR)
+
+AC_ARG_ENABLE([fuse-notifications],
+ AS_HELP_STRING([--disable-fuse-notifications], [Disable FUSE notifications]))
+
+AS_IF([test "x$enable_fuse_notifications" != "xno"], [
+ AC_DEFINE([HAVE_FUSE_NOTIFICATIONS], [1], [Use FUSE notifications])
+])
+
# end FUSE section
+AC_CHECK_LIB([ssl], TLS_method, [HAVE_OPENSSL_1_1="yes"], [HAVE_OPENSSL_1_1="no"])
+if test "x$HAVE_OPENSSL_1_1" = "xyes"; then
+ AC_DEFINE([HAVE_TLS_METHOD], [1], [Using OpenSSL-1.1 TLS_method])
+else
+ AC_CHECK_LIB([ssl], TLSv1_2_method, [AC_DEFINE([HAVE_TLSV1_2_METHOD], [1], [Using OpenSSL-1.0 TLSv1_2_method])])
+fi
+
+
# FUSERMOUNT section
AC_ARG_ENABLE([fusermount],
- AC_HELP_STRING([--enable-fusermount],
- [Build fusermount]))
-
-BUILD_FUSERMOUNT="no"
-if test "x$enable_fusermount" = "xyes"; then
- FUSERMOUNT_SUBDIR="contrib/fuse-util"
- BUILD_FUSERMOUNT="yes"
- AC_DEFINE(GF_FUSERMOUNT, 1, [Use our own fusermount])
+ AC_HELP_STRING([--disable-fusermount],
+ [Use system's fusermount]))
+
+BUILD_FUSERMOUNT="yes"
+if test "x$enable_fusermount" = "xno"; then
+ BUILD_FUSERMOUNT="no"
+else
+ AC_DEFINE(GF_FUSERMOUNT, 1, [Use our own fusermount])
+ FUSERMOUNT_SUBDIR="contrib/fuse-util"
fi
AC_SUBST(FUSERMOUNT_SUBDIR)
#end FUSERMOUNT section
-
# EPOLL section
AC_ARG_ENABLE([epoll],
- AC_HELP_STRING([--disable-epoll],
- [Use poll instead of epoll.]))
+ AC_HELP_STRING([--disable-epoll],
+ [Use poll instead of epoll.]))
BUILD_EPOLL=no
if test "x$enable_epoll" != "xno"; then
AC_CHECK_HEADERS([sys/epoll.h],
[BUILD_EPOLL=yes],
- [BUILD_EPOLL=no])
+ [BUILD_EPOLL=no])
fi
# end EPOLL section
-
-# IBVERBS section
-AC_ARG_ENABLE([ibverbs],
- AC_HELP_STRING([--disable-ibverbs],
- [Do not build the ibverbs transport]))
-
-if test "x$enable_ibverbs" != "xno"; then
- AC_CHECK_LIB([ibverbs],
- [ibv_get_device_list],
- [HAVE_LIBIBVERBS="yes"],
- [HAVE_LIBIBVERBS="no"])
-fi
-
-if test "x$enable_ibverbs" = "xyes" -a "x$HAVE_LIBIBVERBS" = "xno"; then
- echo "ibverbs requested but not found."
- exit 1
-fi
-
-BUILD_RDMA=no
-BUILD_IBVERBS=no
-if test "x$enable_ibverbs" != "xno" -a "x$HAVE_LIBIBVERBS" = "xyes"; then
- IBVERBS_SUBDIR=ib-verbs
- BUILD_IBVERBS=yes
- RDMA_SUBDIR=rdma
- BUILD_RDMA=yes
-fi
-
-AC_SUBST(IBVERBS_SUBDIR)
-AC_SUBST(RDMA_SUBDIR)
-# end IBVERBS section
-
-
# SYNCDAEMON section
AC_ARG_ENABLE([georeplication],
- AC_HELP_STRING([--disable-georeplication],
- [Do not install georeplication components]))
+ AC_HELP_STRING([--disable-georeplication],
+ [Do not install georeplication components]))
BUILD_SYNCDAEMON=no
case $host_os in
+ freebsd*)
+#do nothing
+ ;;
linux*)
#do nothing
;;
@@ -336,72 +796,226 @@ case $host_os in
;;
*)
#disabling geo replication for non-linux platforms
- enable_georeplication=no
+ enable_georeplication=no
;;
esac
SYNCDAEMON_COMPILE=0
-if test "x$enable_georeplication" != "xno"; then
- SYNCDAEMON_SUBDIR=utils
- SYNCDAEMON_COMPILE=1
-
- BUILD_SYNCDAEMON="yes"
- AM_PATH_PYTHON([2.4])
- echo -n "checking if python is python 2.x... "
- if echo $PYTHON_VERSION | grep ^2; then
- :
+if test "x${with_server}" = "xyes" -a "x${enable_georeplication}" != "xno"; then
+ if test "x${have_python}" = "xno" ; then
+ AC_MSG_ERROR([only python 2 and 3 are supported])
else
- echo no
- AC_MSG_ERROR([only python 2.x is supported])
- fi
- echo -n "checking if python has ctypes support... "
- if "$PYTHON" -c 'import ctypes' 2>/dev/null; then
- echo yes
- else
- echo no
- AC_MSG_ERROR([python does not have ctypes support])
+ SYNCDAEMON_SUBDIR=geo-replication
+ SYNCDAEMON_COMPILE=1
+
+ BUILD_SYNCDAEMON="yes"
+ AC_MSG_CHECKING([if python has ctypes support...])
+ if "${PYTHON}" -c 'import ctypes' 2>/dev/null; then
+ AC_MSG_RESULT("yes")
+ else
+ AC_MSG_ERROR([python does not have ctypes support])
+ fi
fi
fi
AC_SUBST(SYNCDAEMON_COMPILE)
AC_SUBST(SYNCDAEMON_SUBDIR)
# end SYNCDAEMON section
-#check if libxml is present if so enable HAVE_LIB_XML
-echo -n "checking if libxml2 is present... "
-
-PKG_CHECK_MODULES([LIBXML2], [libxml-2.0 >= 2.6.19],
- [echo "yes (features requiring libxml2 enabled)" AC_DEFINE(HAVE_LIB_XML, 1, [define if libxml2 is present])],
- [echo "no"] )
+# only install scripts from extras/geo-rep when enabled
+if test "x${with_server}" = "xyes" -a "x$enable_georeplication" != "xno"; then
+ GEOREP_EXTRAS_SUBDIR=geo-rep
+fi
+AC_SUBST(GEOREP_EXTRAS_SUBDIR)
+AM_CONDITIONAL(USE_GEOREP, test "x$enable_georeplication" != "xno")
+
+# METADISP section
+AC_ARG_ENABLE([metadisp],
+ AC_HELP_STRING([--enable-metadisp],
+ [Enable the metadata dispersal xlator]))
+BUILD_METADISP=no
+if test "x${enable_metadisp}" = "xyes"; then
+ BUILD_METADISP=yes
+fi
+AM_CONDITIONAL([BUILD_METADISP], [test "x$BUILD_METADISP" = "xyes"])
+# end METADISP section
+
+# Events section
+AC_ARG_ENABLE([events],
+ AC_HELP_STRING([--disable-events],
+ [Do not install Events components]))
+
+BUILD_EVENTS=no
+EVENTS_ENABLED=0
+EVENTS_SUBDIR=
+if test "x$enable_events" != "xno"; then
+ EVENTS_SUBDIR=events
+ EVENTS_ENABLED=1
+
+ BUILD_EVENTS="yes"
+
+ if test "x${have_python}" = "xno"; then
+ if test "x${enable_events}" = "xyes"; then
+ AC_MSG_ERROR([python 2 or 3 required. exiting.])
+ fi
+ AC_MSG_WARN([python not found, disabling events])
+ EVENTS_SUBDIR=
+ EVENTS_ENABLED=0
+ BUILD_EVENTS="no"
+ else
+ AC_DEFINE(USE_EVENTS, 1, [define if events enabled])
+ fi
+fi
+AC_SUBST(EVENTS_ENABLED)
+AC_SUBST(EVENTS_SUBDIR)
+AM_CONDITIONAL([BUILD_EVENTS], [test "x${BUILD_EVENTS}" = "xyes"])
+# end Events section
+
+# CDC xlator - check if libz is present if so enable HAVE_LIB_Z
+BUILD_CDC=yes
+PKG_CHECK_MODULES([ZLIB], [zlib >= 1.2.0],,
+ [AC_CHECK_LIB([z], [deflate], [ZLIB_LIBS="-lz"],
+ [BUILD_CDC=no])])
+echo -n "features requiring zlib enabled: "
+if test "x$BUILD_CDC" = "xyes" ; then
+ echo "yes"
+ AC_DEFINE(HAVE_LIB_Z, 1, [define if zlib is present])
+else
+ echo "no"
+fi
+AC_SUBST(ZLIB_CFLAGS)
+AC_SUBST(ZLIB_LIBS)
+# end CDC xlator secion
+
+#start firewalld section
+BUILD_FIREWALLD="no"
+AC_ARG_ENABLE([firewalld],
+ AC_HELP_STRING([--enable-firewalld],
+ [enable installation configuration for firewalld]),
+ [BUILD_FIREWALLD="${enableval}"], [BUILD_FIREWALLD="no"])
+
+if test "x${with_server}" = "xyes" -a "x${BUILD_FIREWALLD}" = "xyes"; then
+ if !(test -d /usr/lib/firewalld/services 1>/dev/null 2>&1) ; then
+ BUILD_FIREWALLD="no (firewalld not installed)"
+ fi
+fi
+AM_CONDITIONAL([USE_FIREWALLD],test ["x${BUILD_FIREWALLD}" = "xyes"])
+
+#endof firewald section
+
+# xml-output
+AC_ARG_ENABLE([xml-output],
+ AC_HELP_STRING([--disable-xml-output],
+ [Disable the xml output]))
+BUILD_XML_OUTPUT="yes"
+if test "x$enable_xml_output" != "xno"; then
+ PKG_CHECK_MODULES([XML], [libxml-2.0], [], [no_xml="yes"])
+ if test "x${no_xml}" = "x"; then
+ AC_DEFINE([HAVE_LIB_XML], [1], [Define to 1 if using libxml2.])
+ else
+ if test "x$enable_georeplication" != "xno"; then
+ AC_MSG_ERROR([libxml2 devel libraries not found])
+ else
+ AC_MSG_WARN([libxml2 devel libraries not found disabling XML support])
+ BUILD_XML_OUTPUT="no"
+ fi
-AC_SUBST(LIBXML2_CFLAGS)
-AC_SUBST(LIBXML2_LIBS)
+ fi
+else
+ if test "x$enable_georeplication" != "xno"; then
+ AC_MSG_ERROR([geo-replication requires xml output])
+ fi
+ BUILD_XML_OUTPUT="no"
+fi
+# end of xml-output
-dnl FreeBSD > 5 has execinfo as a Ported library for giving a workaround
-dnl solution to GCC backtrace functionality
+dnl cloudsync section
+BUILD_CLOUDSYNC="no"
+AC_CHECK_LIB([curl], [curl_easy_setopt], [LIBCURL="-lcurl"])
+if test -n "$LIBCURL";then
+ HAVE_LIBCURL="yes"
+fi
+AC_CHECK_HEADERS([openssl/hmac.h openssl/evp.h openssl/bio.h openssl/buffer.h], [HAVE_OPENSSL="yes"])
+if test "x$HAVE_LIBCURL" = "xyes" -a "x$HAVE_OPENSSL" = "xyes";then
+ HAVE_AMAZONS3="yes"
+fi
+AM_CONDITIONAL([BUILD_AMAZONS3_PLUGIN], [test "x$HAVE_AMAZONS3" = "xyes"])
+if test "x$HAVE_AMAZONS3" = "xyes";then
+ BUILD_CLOUDSYNC="yes"
+fi
+BUILD_CVLT_PLUGIN="no"
+case $host_os in
+#enable cvlt plugin only for linux platforms
+ linux*)
+ BUILD_CVLT_PLUGIN="yes"
+ BUILD_CLOUDSYNC="yes"
+ ;;
+ *)
+ ;;
+esac
+AM_CONDITIONAL([BUILD_CVLT_PLUGIN], [test "x$BUILD_CVLT_PLUGIN" = "xyes"])
+AM_CONDITIONAL([BUILD_CLOUDSYNC], [test "x$BUILD_CLOUDSYNC" = "xyes"])
+dnl end cloudsync section
-AC_CHECK_HEADERS([execinfo.h], [have_backtrace=yes],
- AC_CHECK_LIB([execinfo], [backtrace], [have_backtrace=yes]))
-dnl AC_MSG_ERROR([libexecinfo not found libexecinfo required.])))
+dnl SELinux feature enablement
+case $host_os in
+ linux*)
+ AC_ARG_ENABLE([selinux],
+ AC_HELP_STRING([--disable-selinux],
+ [Disable SELinux features]),
+ [USE_SELINUX="${enableval}"], [USE_SELINUX="yes"])
+ ;;
+ *)
+ USE_SELINUX=no
+ ;;
+esac
+AM_CONDITIONAL(USE_SELINUX, test "x${USE_SELINUX}" = "xyes")
+dnl end of SELinux feature enablement
+AC_CHECK_HEADERS([execinfo.h], [have_backtrace=yes])
if test "x${have_backtrace}" = "xyes"; then
AC_DEFINE(HAVE_BACKTRACE, 1, [define if found backtrace])
fi
AC_SUBST(HAVE_BACKTRACE)
+dnl Old (before C11) compiler can compile (but not link) this:
+dnl
+dnl int main () {
+dnl _Static_assert(1, "True");
+dnl return 0;
+dnl }
+dnl
+dnl assuming that _Static_assert is an implicitly declared function. So
+dnl we're trying to link just to make sure that this is not the case.
+
+AC_MSG_CHECKING([whether $CC supports C11 _Static_assert])
+AC_TRY_LINK([], [_Static_assert(1, "True");],
+ [STATIC_ASSERT=yes], [STATIC_ASSERT=no])
+
+AC_MSG_RESULT([$STATIC_ASSERT])
+if test x$STATIC_ASSERT = "xyes"; then
+ AC_DEFINE(HAVE_STATIC_ASSERT, 1, [Define if C11 _Static_assert is supported.])
+fi
+
+if test "x${have_backtrace}" != "xyes"; then
+AC_TRY_COMPILE([#include <math.h>], [double x=0.0; x=ceil(0.0);],
+ [],
+ AC_MSG_ERROR([need math library for libexecinfo]))
+fi
+
dnl glusterfs prints memory usage to stderr by sending it SIGUSR1
-AC_CHECK_FUNC([malloc_stats], [have_malloc_stats=yes])
-if test "x${have_malloc_stats}" = "xyes"; then
- AC_DEFINE(HAVE_MALLOC_STATS, 1, [define if found malloc_stats])
+AC_CHECK_FUNC([mallinfo], [have_mallinfo=yes])
+if test "x${have_mallinfo}" = "xyes"; then
+ AC_DEFINE(HAVE_MALLINFO, 1, [define if found mallinfo])
fi
-AC_SUBST(HAVE_MALLOC_STATS)
+AC_SUBST(HAVE_MALLINFO)
dnl Linux, Solaris, Cygwin
AC_CHECK_MEMBERS([struct stat.st_atim.tv_nsec])
dnl FreeBSD, NetBSD
AC_CHECK_MEMBERS([struct stat.st_atimespec.tv_nsec])
case $host_os in
- *netbsd*)
- CFLAGS=-D_INCOMPLETE_XOPEN_C063
- ;;
+ *netbsd*)
+ GF_CFLAGS="${GF_CFLAGS} -D_INCOMPLETE_XOPEN_C063 -DCONFIG_MACHINE_BSWAP_H"
+ ;;
esac
AC_CHECK_FUNC([linkat], [have_linkat=yes])
if test "x${have_linkat}" = "xyes"; then
@@ -409,18 +1023,46 @@ if test "x${have_linkat}" = "xyes"; then
fi
AC_SUBST(HAVE_LINKAT)
-dnl Check for argp
+dnl check for Monotonic clock
+AC_CHECK_LIB([rt], [clock_gettime], ,
+ AC_MSG_WARN([System doesn't have monotonic clock using contrib]))
+
+dnl check for argp, FreeBSD has the header in /usr/local/include
+case $host_os in
+ *freebsd*)
+ CFLAGS="${CFLAGS} -isystem /usr/local/include"
+ ARGP_LDADD=-largp
+ ;;
+ *netbsd*)
+ ARGP_LDADD=-largp
+ ;;
+esac
+dnl argp-standalone does not provide a pkg-config file
AC_CHECK_HEADER([argp.h], AC_DEFINE(HAVE_ARGP, 1, [have argp]))
-AC_CONFIG_SUBDIRS(argp-standalone)
-BUILD_ARGP_STANDALONE=no
-if test "x${ac_cv_header_argp_h}" = "xno"; then
- BUILD_ARGP_STANDALONE=yes
- ARGP_STANDALONE_CPPFLAGS='-I${top_srcdir}/argp-standalone'
- ARGP_STANDALONE_LDADD='${top_builddir}/argp-standalone/libargp.a'
+if test "x$ac_cv_header_argp_h" != "xyes"; then
+ AC_MSG_ERROR([argp.h not found, install libargp or argp-standalone])
fi
-
-AC_SUBST(ARGP_STANDALONE_CPPFLAGS)
-AC_SUBST(ARGP_STANDALONE_LDADD)
+AC_SUBST(ARGP_LDADD)
+
+dnl Check for atomic operation support
+AC_MSG_CHECKING([for gcc __atomic builtins])
+AC_TRY_LINK([], [int v; __atomic_load_n(&v, __ATOMIC_ACQUIRE);],
+ [have_atomic_builtins=yes], [have_atomic_builtins=no])
+if test "x${have_atomic_builtins}" = "xyes"; then
+ AC_DEFINE(HAVE_ATOMIC_BUILTINS, 1, [define if __atomic_*() builtins are available])
+fi
+AC_SUBST(HAVE_ATOMIC_BUILTINS)
+AC_MSG_RESULT([$have_atomic_builtins])
+
+dnl __sync_*() will not be needed if __atomic_*() is available
+AC_MSG_CHECKING([for gcc __sync builtins])
+AC_TRY_LINK([], [__sync_synchronize();],
+ [have_sync_builtins=yes], [have_sync_builtins=no])
+if test "x${have_sync_builtins}" = "xyes"; then
+ AC_DEFINE(HAVE_SYNC_BUILTINS, 1, [define if __sync_*() builtins are available])
+fi
+AC_SUBST(HAVE_SYNC_BUILTINS)
+AC_MSG_RESULT([$have_sync_builtins])
AC_CHECK_HEADER([malloc.h], AC_DEFINE(HAVE_MALLOC_H, 1, [have malloc.h]))
@@ -429,12 +1071,79 @@ if test "x${have_llistxattr}" = "xyes"; then
AC_DEFINE(HAVE_LLISTXATTR, 1, [define if llistxattr exists])
fi
-AC_CHECK_FUNC([fdatasync], [have_fdatasync=yes])
+AC_CHECK_FUNC([fdatasync], [have_fdatasync=no])
if test "x${have_fdatasync}" = "xyes"; then
AC_DEFINE(HAVE_FDATASYNC, 1, [define if fdatasync exists])
fi
-# Check the distribution where you are compiling glusterfs on
+AC_CHECK_FUNC([fallocate], [have_fallocate=yes])
+if test "x${have_fallocate}" = "xyes"; then
+ AC_DEFINE(HAVE_FALLOCATE, 1, [define if fallocate exists])
+fi
+
+AC_CHECK_FUNC([posix_fallocate], [have_posix_fallocate=yes])
+if test "x${have_posix_fallocate}" = "xyes"; then
+ AC_DEFINE(HAVE_POSIX_FALLOCATE, 1, [define if posix_fallocate exists])
+fi
+
+# On fedora-29, copy_file_range syscall and the libc API both are present.
+# Whereas, on some machines such as centos-7, RHEL-7, the API is not there.
+# Only the system call is present. So, this change is to determine whether
+# the API is present or not. If not, then check whether the system call is
+# present or not. Accordingly sys_copy_file_range function will first call
+# the API if it is there. Otherwise it will call syscall(SYS_copy_file_range).
+AC_CHECK_FUNC([copy_file_range], [have_copy_file_range=yes])
+if test "x${have_copy_file_range}" = "xyes"; then
+ AC_DEFINE(HAVE_COPY_FILE_RANGE, 1, [define if copy_file_range exists])
+else
+ OLD_CFLAGS=${CFLAGS}
+ CFLAGS="-D_GNU_SOURCE"
+ AC_CHECK_DECL([SYS_copy_file_range], , , [#include <sys/syscall.h>])
+ if test "x${ac_cv_have_decl_SYS_copy_file_range}" = "xyes"; then
+ AC_DEFINE(HAVE_COPY_FILE_RANGE_SYS, 1, [define if SYS_copy_file_range is available])
+ fi
+ CFLAGS=${OLD_CFLAGS}
+fi
+
+AC_CHECK_FUNC([syncfs], [have_syncfs=yes])
+if test "x${have_syncfs}" = "xyes"; then
+ AC_DEFINE(HAVE_SYNCFS, 1, [define if syncfs exists])
+else
+ OLD_CFLAGS=${CFLAGS}
+ CFLAGS="-D_GNU_SOURCE"
+ AC_CHECK_DECL([SYS_syncfs], , , [#include <sys/syscall.h>])
+ if test "x${ac_cv_have_decl_SYS_syncfs}" = "xyes"; then
+ AC_DEFINE(HAVE_SYNCFS_SYS, 1, [define if SYS_syncfs is available])
+ fi
+ CFLAGS=${OLD_CFLAGS}
+fi
+
+BUILD_NANOSECOND_TIMESTAMPS=no
+AC_CHECK_FUNC([utimensat], [have_utimensat=yes])
+if test "x${have_utimensat}" = "xyes"; then
+ BUILD_NANOSECOND_TIMESTAMPS=yes
+ AC_DEFINE(HAVE_UTIMENSAT, 1, [define if utimensat exists])
+fi
+
+OLD_CFLAGS=${CFLAGS}
+CFLAGS="-D_GNU_SOURCE"
+AC_CHECK_DECL([SEEK_HOLE], , , [#include <unistd.h>])
+if test "x${ac_cv_have_decl_SEEK_HOLE}" = "xyes"; then
+ AC_DEFINE(HAVE_SEEK_HOLE, 1, [define if SEEK_HOLE is available])
+fi
+CFLAGS=${OLD_CFLAGS}
+
+AC_CHECK_FUNC([accept4], [have_accept4=yes])
+if test "x${have_accept4}" = "xyes"; then
+ AC_DEFINE(HAVE_ACCEPT4, 1, [define if accept4 exists])
+fi
+
+AC_CHECK_FUNC([paccept], [have_paccept=yes])
+if test "x${have_paccept}" = "xyes"; then
+AC_DEFINE(HAVE_PACCEPT, 1, [define if paccept exists])
+fi
+
+# Check the distribution where you are compiling glusterfs on
GF_DISTRIBUTION=
AC_CHECK_FILE([/etc/debian_version])
@@ -454,81 +1163,438 @@ fi
AC_SUBST(GF_DISTRIBUTION)
GF_HOST_OS=""
-GF_LDFLAGS="-rdynamic"
+GF_LDFLAGS="${GF_LDFLAGS} -rdynamic"
+
+dnl see --with-libtirpc option check above, libtirpc(-devel) is required for
+dnl ipv6-default
+if test "x${with_libtirpc}" = "xyes" || test "x${with_ipv6_default}" = "xyes" ; then
+ PKG_CHECK_MODULES([TIRPC], [libtirpc],
+ [with_libtirpc="yes"; GF_CFLAGS="$GF_CFLAGS $TIRPC_CFLAGS"; GF_LDFLAGS="$GF_LDFLAGS $TIRPC_LIBS";],
+ [with_libtirpc="missing"; with_ipv6_default="no"])
+fi
+if test "x${with_libtirpc}" = "xmissing" ; then
+ AC_CHECK_HEADERS([rpc/rpc.h],[
+ AC_MSG_WARN([
+ ---------------------------------------------------------------------------------
+ libtirpc (and/or ipv6-default) were enabled but libtirpc-devel is not installed.
+ Disabling libtirpc and ipv6-default and falling back to legacy glibc rpc headers.
+ This is a transitional warning message. Eventually it will be an error message.
+ ---------------------------------------------------------------------------------])],[
+ AC_MSG_ERROR([
+ ---------------------------------------------------------------------------------
+ libtirpc (and/or ipv6-default) were enabled but libtirpc-devel is not installed
+ and there were no legacy glibc rpc headers and library to fall back to.
+ ---------------------------------------------------------------------------------])])
+fi
+
+if test "x$with_ipv6_default" = "xyes" ; then
+ GF_CFLAGS="$GF_CFLAGS -DIPV6_DEFAULT"
+fi
+
+dnl check for gcc -Werror=format-security
+saved_CFLAGS=$CFLAGS
+CFLAGS="-Wformat -Werror=format-security"
+AC_MSG_CHECKING([whether $CC accepts -Werror=format-security])
+AC_COMPILE_IFELSE([AC_LANG_PROGRAM()], [cc_werror_format_security=yes], [cc_werror_format_security=no])
+echo $cc_werror_format_security
+if test "x$cc_werror_format_security" = "xyes"; then
+ GF_CFLAGS="$GF_CFLAGS ${CFLAGS}"
+fi
+CFLAGS="$saved_CFLAGS"
+
+dnl check for gcc -Werror=implicit-function-declaration
+saved_CFLAGS=$CFLAGS
+CFLAGS="-Werror=implicit-function-declaration"
+AC_MSG_CHECKING([whether $CC accepts -Werror=implicit-function-declaration])
+AC_COMPILE_IFELSE([AC_LANG_PROGRAM()], [cc_werror_implicit=yes], [cc_werror_implicit=no])
+echo $cc_werror_implicit
+if test "x$cc_werror_implicit" = "xyes"; then
+ GF_CFLAGS="${GF_CFLAGS} ${CFLAGS}"
+fi
+CFLAGS="$saved_CFLAGS"
+
+dnl clang is mostly GCC-compatible, but its version is much lower,
+dnl so we have to check for it.
+AC_MSG_CHECKING([if compiling with clang])
+
+AC_COMPILE_IFELSE(
+[AC_LANG_PROGRAM([], [[
+#ifndef __clang__
+ not clang
+#endif
+]])],
+[CLANG=yes], [CLANG=no])
+
+AC_MSG_RESULT([$CLANG])
+
+if test "x$CLANG" = "xyes"; then
+ GF_CFLAGS="${GF_CFLAGS} -Wno-gnu"
+fi
+
+if test "x$ac_cv_header_execinfo_h" = "xno"; then
+ # The reason is that __builtin_frame_address(n) for n > 0 seems
+ # to just crash on most platforms when -fomit-stack-pointer is
+ # specified, which seems to be the default for many platforms on
+ # -O2. The documentation says that __builtin_frame_address()
+ # should return NULL in case it can't get the frame, but it
+ # seems to crash instead.
+
+ # execinfo.c in ./contrib/libexecinfo uses __builtin_frame_address(n)
+ # for providing cross platform backtrace*() functions.
+ if test "x$CLANG" = "xno"; then
+ GF_CFLAGS="${GF_CFLAGS} -fno-omit-frame-pointer"
+ fi
+fi
+
+old_prefix=$prefix
+if test "x$prefix" = xNONE; then
+ prefix=$ac_default_prefix
+fi
+old_exec_prefix=$exec_prefix
+if test "x$exec_prefix" = xNONE; then
+ exec_prefix="$(eval echo $prefix)"
+fi
+GLUSTERFS_LIBEXECDIR="$(eval echo $libexecdir)/glusterfs"
+prefix=$old_prefix
+exec_prefix=$old_exec_prefix
+
+### Dirty hacky stuff to make LOCALSTATEDIR work
+if test "x$prefix" = xNONE; then
+ test $localstatedir = '${prefix}/var' && localstatedir=$ac_default_prefix/var
+ localstatedir=/var
+fi
+localstatedir="$(eval echo ${localstatedir})"
+LOCALSTATEDIR=$localstatedir
+
+GLUSTERFSD_MISCDIR="$(eval echo ${localstatedir})/lib/misc/glusterfsd"
+
+old_prefix=$prefix
+if test "x$prefix" = xNONE; then
+ prefix=$ac_default_prefix
+fi
+GLUSTERD_VOLFILE="$(eval echo ${sysconfdir})/glusterfs/glusterd.vol"
+prefix=$old_prefix
+
+
+GFAPI_EXTRA_LDFLAGS='-Wl,--version-script=$(top_srcdir)/api/src/gfapi.map'
case $host_os in
linux*)
- dnl GF_LINUX_HOST_OS=1
GF_HOST_OS="GF_LINUX_HOST_OS"
- GF_CFLAGS="${ARGP_STANDALONE_CPPFLAGS} -O0"
- GF_GLUSTERFS_CFLAGS="${GF_CFLAGS}"
- GF_LDADD="${ARGP_STANDALONE_LDADD}"
- GF_FUSE_CFLAGS="-DFUSERMOUNT_DIR=\\\"\$(bindir)\\\""
- ;;
+ GF_FUSE_CFLAGS="-DFUSERMOUNT_DIR=\\\"\$(bindir)\\\""
+ GLUSTERD_WORKDIR="${LOCALSTATEDIR}/lib/glusterd"
+ ;;
solaris*)
GF_HOST_OS="GF_SOLARIS_HOST_OS"
- GF_CFLAGS="${ARGP_STANDALONE_CPPFLAGS} -D_REENTRANT -D_POSIX_PTHREAD_SEMANTICS -O0 -m64"
- GF_LDFLAGS=""
- GF_GLUSTERFS_CFLAGS="${GF_CFLAGS}"
- GF_LDADD="${ARGP_STANDALONE_LDADD}"
- GF_GLUSTERFS_LDFLAGS="-lnsl -lresolv -lsocket"
+ GF_CFLAGS="${GF_CFLAGS} -D_REENTRANT -D_POSIX_PTHREAD_SEMANTICS -m64"
BUILD_FUSE_CLIENT=no
FUSE_CLIENT_SUBDIR=""
- ;;
+ GLUSTERD_WORKDIR="${LOCALSTATEDIR}/lib/glusterd"
+ ;;
*netbsd*)
- GF_HOST_OS="GF_BSD_HOST_OS"
- GF_CFLAGS="${ARGP_STANDALONE_CPPFLAGS} -D_INCOMPLETE_XOPEN_C063"
- GF_GLUSTERFS_CFLAGS="${GF_CFLAGS}"
- GF_LDADD="${ARGP_STANDALONE_LDADD}"
- if test "x$ac_cv_header_execinfo_h" = "xyes"; then
- GF_GLUSTERFS_LDFLAGS="-lexecinfo"
- fi
- GF_FUSE_LDADD="-lperfuse"
- BUILD_FUSE_CLIENT=yes
- LEXLIB=""
- ;;
- *bsd*)
GF_HOST_OS="GF_BSD_HOST_OS"
- GF_CFLAGS="${ARGP_STANDALONE_CPPFLAGS} -O0"
- GF_GLUSTERFS_CFLAGS="${GF_CFLAGS}"
- GF_LDADD="${ARGP_STANDALONE_LDADD}"
- if test "x$ac_cv_header_execinfo_h" = "xyes"; then
- GF_GLUSTERFS_LDFLAGS="-lexecinfo"
- fi
- BUILD_FUSE_CLIENT=no
- ;;
+ GF_CFLAGS="${GF_CFLAGS} -D_INCOMPLETE_XOPEN_C063"
+ GF_CFLAGS="${GF_CFLAGS} -DTHREAD_UNSAFE_BASENAME"
+ GF_CFLAGS="${GF_CFLAGS} -DTHREAD_UNSAFE_DIRNAME"
+ GF_FUSE_CFLAGS="-DFUSERMOUNT_DIR=\\\"\$(sbindir)\\\""
+ GF_LDADD="${ARGP_LDADD}"
+ if test "x$ac_cv_header_execinfo_h" = "xyes"; then
+ GF_LDFLAGS="${GF_LDFLAGS} -lexecinfo"
+ fi
+ GF_FUSE_LDADD="-lperfuse"
+ BUILD_FUSE_CLIENT=yes
+ LEXLIB=""
+ BUILD_FUSERMOUNT=no
+ FUSERMOUNT_SUBDIR=""
+ GLUSTERD_WORKDIR="${LOCALSTATEDIR}/db/glusterd"
+ ;;
+ *freebsd*)
+ GF_HOST_OS="GF_BSD_HOST_OS"
+ GF_CFLAGS="${GF_CFLAGS} -DO_DSYNC=0"
+ GF_CFLAGS="${GF_CFLAGS} -Dxdr_quad_t=xdr_longlong_t"
+ GF_CFLAGS="${GF_CFLAGS} -Dxdr_u_quad_t=xdr_u_longlong_t"
+ GF_FUSE_CFLAGS="-DFUSERMOUNT_DIR=\\\"\$(sbindir)\\\""
+ GF_LDADD="${ARGP_LDADD}"
+ if test "x$ac_cv_header_execinfo_h" = "xyes"; then
+ GF_LDFLAGS="${GF_LDFLAGS} -lexecinfo"
+ fi
+ BUILD_FUSE_CLIENT=yes
+ BUILD_FUSERMOUNT=no
+ FUSERMOUNT_SUBDIR=""
+ GLUSTERD_WORKDIR="${LOCALSTATEDIR}/db/glusterd"
+ ;;
darwin*)
GF_HOST_OS="GF_DARWIN_HOST_OS"
- LIBTOOL=glibtool
- GF_CFLAGS="${ARGP_STANDALONE_CPPFLAGS} -D__DARWIN_64_BIT_INO_T -bundle -undefined suppress -flat_namespace -D_XOPEN_SOURCE -O0"
- GF_GLUSTERFS_CFLAGS="${ARGP_STANDALONE_CPPFLAGS} -D__DARWIN_64_BIT_INO_T -undefined suppress -flat_namespace -O0"
- GF_LDADD="${ARGP_STANDALONE_LDADD}"
- GF_FUSE_CFLAGS="-I\$(CONTRIBDIR)/macfuse"
- ;;
+ LIBTOOL=glibtool
+ GF_CFLAGS="${GF_CFLAGS} -D_REENTRANT -D_XOPEN_SOURCE "
+ GF_CFLAGS="${GF_CFLAGS} -D_DARWIN_USE_64_BIT_INODE "
+ GF_CFLAGS="${GF_CFLAGS} -DTHREAD_UNSAFE_BASENAME"
+ GF_CFLAGS="${GF_CFLAGS} -DTHREAD_UNSAFE_DIRNAME"
+ GF_LDADD="${ARGP_LDADD}"
+ GF_LDFLAGS="${GF_LDFLAGS}"
+ GF_FUSE_CFLAGS="-I\$(CONTRIBDIR)/macfuse"
+ BUILD_FUSERMOUNT="no"
+ FUSERMOUNT_SUBDIR=""
+ GLUSTERD_WORKDIR="${LOCALSTATEDIR}/db/glusterd"
+ GFAPI_EXTRA_LDFLAGS='-Wl,-alias_list,$(top_srcdir)/api/src/gfapi.aliases'
+ ;;
esac
+# Default value for sbindir
+prefix_temp=$prefix
+exec_prefix_temp=$exec_prefix
+
+test "${prefix}" = "NONE" && prefix="${ac_default_prefix}"
+test "${exec_prefix}" = "NONE" && exec_prefix='${prefix}'
+sbintemp="${sbindir}"
+eval sbintemp=\"${sbintemp}\"
+eval sbintemp=\"${sbintemp}\"
+SBIN_DIR=${sbintemp}
+
+sysconfdirtemp="${sysconfdir}"
+eval sysconfdirtemp=\"${sysconfdirtemp}\"
+SYSCONF_DIR=${sysconfdirtemp}
+
+prefix=$prefix_temp
+exec_prefix=$exec_prefix_temp
+
+AC_SUBST(SBIN_DIR)
+AC_SUBST(SYSCONF_DIR)
+
+# lazy umount emulation
+UMOUNTD_SUBDIR=""
+if test "x${GF_HOST_OS}" != "xGF_LINUX_HOST_OS" ; then
+ UMOUNTD_SUBDIR="contrib/umountd"
+fi
+AC_SUBST(UMOUNTD_SUBDIR)
+
+
+# enable debug section
+AC_ARG_ENABLE([debug],
+ AC_HELP_STRING([--enable-debug],
+ [Enable debug build options.]))
+
+AC_ARG_ENABLE([mempool],
+ AC_HELP_STRING([--disable-mempool],
+ [Disable the Gluster memory pooler.]))
+
+USE_MEMPOOL="yes"
+if test "x$enable_mempool" = "xno"; then
+ USE_MEMPOOL="no"
+ AC_DEFINE(GF_DISABLE_MEMPOOL, 1, [Disable the Gluster memory pooler.])
+fi
+
+# syslog section
+AC_ARG_ENABLE([syslog],
+ AC_HELP_STRING([--disable-syslog],
+ [Disable syslog for logging]))
+
+USE_SYSLOG="yes"
+if test "x$enable_syslog" != "xno"; then
+ AC_DEFINE(GF_USE_SYSLOG, 1, [Use syslog for logging])
+else
+ USE_SYSLOG="no"
+fi
+AM_CONDITIONAL([ENABLE_SYSLOG], [test x$USE_SYSLOG = xyes])
+#end syslog section
+
BUILD_READLINE=no
AC_CHECK_LIB([readline -lcurses],[readline],[RLLIBS="-lreadline -lcurses"])
AC_CHECK_LIB([readline -ltermcap],[readline],[RLLIBS="-lreadline -ltermcap"])
AC_CHECK_LIB([readline -lncurses],[readline],[RLLIBS="-lreadline -lncurses"])
-if test "x$RLLIBS" != "x"; then
- AC_DEFINE(HAVE_READLINE, 1, [readline enabled CLI])
- BUILD_READLINE=yes
+if test -n "$RLLIBS"; then
+ if test "x$RL_UNDO" = "xyes"; then
+ AC_DEFINE(HAVE_READLINE, 1, [readline enabled CLI])
+ BUILD_READLINE=yes
+ else
+ BUILD_READLINE="no (present but missing undo)"
+ fi
+
fi
BUILD_LIBAIO=no
AC_CHECK_LIB([aio],[io_setup],[LIBAIO="-laio"])
-if test "x$LIBAIO" != "x"; then
+if test -n "$LIBAIO"; then
AC_DEFINE(HAVE_LIBAIO, 1, [libaio based POSIX enabled])
BUILD_LIBAIO=yes
fi
+dnl gnfs section
+BUILD_GNFS="no"
+RPCBIND_SERVICE=""
+AC_ARG_ENABLE([gnfs],
+ AC_HELP_STRING([--enable-gnfs],
+ [Enable legacy gnfs server xlator.]))
+if test "x${with_server}" = "xyes" -a "x$enable_gnfs" = "xyes"; then
+ BUILD_GNFS="yes"
+ GF_CFLAGS="$GF_CFLAGS -DBUILD_GNFS"
+ RPCBIND_SERVICE="rpcbind.service"
+fi
+AM_CONDITIONAL([BUILD_GNFS], [test x$BUILD_GNFS = xyes])
+AC_SUBST(BUILD_GNFS)
+AC_SUBST(RPCBIND_SERVICE)
+dnl end gnfs section
+
+dnl Check for userspace-rcu
+PKG_CHECK_MODULES([URCU], [liburcu-bp], [],
+ [AC_CHECK_HEADERS([urcu-bp.h],
+ [URCU_LIBS='-lurcu-bp'],
+ AC_MSG_ERROR([liburcu-bp not found]))])
+PKG_CHECK_MODULES([URCU_CDS], [liburcu-cds >= 0.8], [],
+ [PKG_CHECK_MODULES([URCU_CDS], [liburcu-cds >= 0.7],
+ [AC_DEFINE(URCU_OLD, 1, [Define if liburcu 0.6 or 0.7 is found])
+ USE_CONTRIB_URCU='yes'],
+ [AC_CHECK_HEADERS([urcu/cds.h],
+ [AC_DEFINE(URCU_OLD, 1, [Define if liburcu 0.6 or 0.7 is found])
+ URCU_CDS_LIBS='-lurcu-cds'
+ USE_CONTRIB_URCU='yes'],
+ [AC_MSG_ERROR([liburcu-cds not found])])])])
+
+BUILD_UNITTEST="no"
+AC_ARG_ENABLE([cmocka],
+ AC_HELP_STRING([--enable-cmocka],
+ [Enable cmocka build options.]))
+if test "x$enable_cmocka" = "xyes"; then
+ BUILD_UNITTEST="yes"
+ PKG_CHECK_MODULES([UNITTEST], [cmocka >= 1.0.1], [BUILD_UNITTEST="yes"],
+ [AC_MSG_ERROR([cmocka library is required to build glusterfs])]
+ )
+fi
+AM_CONDITIONAL([UNITTEST], [test x$BUILD_UNITTEST = xyes])
+
+dnl Define UNIT_TESTING only for building cmocka binaries.
+UNITTEST_CFLAGS="${UNITTEST_CFLAGS} -DUNIT_TESTING=1"
+
+dnl Add cmocka for unit tests
+case $host_os in
+ freebsd*)
+ dnl remove --coverage on FreeBSD due to a known llvm packaging bug
+ UNITTEST_CFLAGS="${UNITTEST_CPPFLAGS} ${UNITTEST_CFLAGS} -g -DDEBUG -O0"
+ UNITTEST_LDFLAGS="${UNITTEST_LIBS} ${UNITTEST_LDFLAGS}"
+ ;;
+ *)
+ UNITTEST_CFLAGS="${UNITTEST_CPPFLAGS} ${UNITTEST_CFLAGS} -g -DDEBUG -O0 --coverage"
+ UNITTEST_LDFLAGS="${UNITTEST_LIBS} ${UNITTEST_LDFLAGS}"
+ ;;
+esac
+
+AC_SUBST(UNITTEST_CFLAGS)
+AC_SUBST(UNITTEST_LDFLAGS)
+
+AC_SUBST(CFLAGS)
+# end enable debug section
+
+# EC dynamic code generation section
+
+EC_DYNAMIC_SUPPORT="none"
+EC_DYNAMIC_ARCH="none"
+
+AC_ARG_ENABLE([ec-dynamic],
+ AC_HELP_STRING([--disable-ec-dynamic],
+ [Disable all dynamic code generation extensions for EC module]))
+
+AC_ARG_ENABLE([ec-dynamic-intel],
+ AC_HELP_STRING([--disable-ec-dynamic-intel],
+ [Disable all INTEL dynamic code generation extensions for EC module]))
+
+AC_ARG_ENABLE([ec-dynamic-arm],
+ AC_HELP_STRING([--disable-ec-dynamic-arm],
+ [Disable all ARM dynamic code generation extensions for EC module]))
+
+AC_ARG_ENABLE([ec-dynamic-x64],
+ AC_HELP_STRING([--disable-ec-dynamic-x64],
+ [Disable dynamic INTEL x64 code generation for EC module]))
+
+AC_ARG_ENABLE([ec-dynamic-sse],
+ AC_HELP_STRING([--disable-ec-dynamic-sse],
+ [Disable dynamic INTEL SSE code generation for EC module]))
+
+AC_ARG_ENABLE([ec-dynamic-avx],
+ AC_HELP_STRING([--disable-ec-dynamic-avx],
+ [Disable dynamic INTEL AVX code generation for EC module]))
+
+AC_ARG_ENABLE([ec-dynamic-neon],
+ AC_HELP_STRING([--disable-ec-dynamic-neon],
+ [Disable dynamic ARM NEON code generation for EC module]))
+
+if test "x$enable_ec_dynamic" != "xno"; then
+ case $host in
+ x86_64*)
+ if test "x$enable_ec_dynamic_intel" != "xno"; then
+ if test "x$enable_ec_dynamic_x64" != "xno"; then
+ EC_DYNAMIC_SUPPORT="$EC_DYNAMIC_SUPPORT x64"
+ AC_DEFINE(USE_EC_DYNAMIC_X64, 1, [Defined if using dynamic INTEL x64 code])
+ fi
+ if test "x$enable_ec_dynamic_sse" != "xno"; then
+ EC_DYNAMIC_SUPPORT="$EC_DYNAMIC_SUPPORT sse"
+ AC_DEFINE(USE_EC_DYNAMIC_SSE, 1, [Defined if using dynamic INTEL SSE code])
+ fi
+ if test "x$enable_ec_dynamic_avx" != "xno"; then
+ EC_DYNAMIC_SUPPORT="$EC_DYNAMIC_SUPPORT avx"
+ AC_DEFINE(USE_EC_DYNAMIC_AVX, 1, [Defined if using dynamic INTEL AVX code])
+ fi
+
+ if test "x$EC_DYNAMIC_SUPPORT" != "xnone"; then
+ EC_DYNAMIC_ARCH="intel"
+ fi
+ fi
+ ;;
+ arm*)
+ if test "x$enable_ec_dynamic_arm" != "xno"; then
+ if test "x$enable_ec_dynamic_neon" != "xno"; then
+ EC_DYNAMIC_SUPPORT="$EC_DYNAMIC_SUPPORT neon"
+ AC_DEFINE(USE_EC_DYNAMIC_NEON, 1, [Defined if using dynamic ARM NEON code])
+ fi
+
+ if test "x$EC_DYNAMIC_SUPPORT" != "xnone"; then
+ EC_DYNAMIC_ARCH="arm"
+ fi
+ fi
+ ;;
+ esac
+
+ EC_DYNAMIC_SUPPORT="${EC_DYNAMIC_SUPPORT#none }"
+fi
+
+AM_CONDITIONAL([ENABLE_EC_DYNAMIC_INTEL], [test "x$EC_DYNAMIC_ARCH" = "xintel"])
+AM_CONDITIONAL([ENABLE_EC_DYNAMIC_ARM], [test "x$EC_DYNAMIC_ARCH" = "xarm"])
+
+AM_CONDITIONAL([ENABLE_EC_DYNAMIC_X64], [test "x${EC_DYNAMIC_SUPPORT##*x64*}" = "x"])
+AM_CONDITIONAL([ENABLE_EC_DYNAMIC_SSE], [test "x${EC_DYNAMIC_SUPPORT##*sse*}" = "x"])
+AM_CONDITIONAL([ENABLE_EC_DYNAMIC_AVX], [test "x${EC_DYNAMIC_SUPPORT##*avx*}" = "x"])
+AM_CONDITIONAL([ENABLE_EC_DYNAMIC_NEON], [test "x${EC_DYNAMIC_SUPPORT##*neon*}" = "x"])
+
+AC_SUBST(USE_EC_DYNAMIC_X64)
+AC_SUBST(USE_EC_DYNAMIC_SSE)
+AC_SUBST(USE_EC_DYNAMIC_AVX)
+AC_SUBST(USE_EC_DYNAMIC_NEON)
+
+# end EC dynamic code generation section
+
+dnl libglusterfs.so uses math functions
+GF_LDADD="${GF_LDADD} ${MATH_LIB}"
+
+case $host_os in
+ dnl Can't use libtool's portable "-no-undefined" as it seems to be ignored on Linux
+ linux*)
+ GF_NO_UNDEFINED='-Wl,--no-undefined'
+ ;;
+ darwin*)
+ GF_NO_UNDEFINED='-Wl,-undefined'
+ ;;
+ *)
+ dnl There's an issue on FreeBSD with reference to __progname used in some parts of code
+ GF_NO_UNDEFINED=''
+ ;;
+esac
+dnl GF_XLATOR_DEFAULT_LDFLAGS is for most xlators that expose a common set of symbols
+GF_XLATOR_DEFAULT_LDFLAGS='-avoid-version -export-symbols $(top_srcdir)/xlators/xlator.sym $(UUID_LIBS) $(GF_NO_UNDEFINED) $(TIRPC_LIBS)'
+dnl GF_XLATOR_LDFLAGS is for xlators that expose extra symbols, e.g. dht
+GF_XLATOR_LDFLAGS='-avoid-version $(UUID_LIBS) $(GF_NO_UNDEFINED) $(TIRPC_LIBS)'
AC_SUBST(GF_HOST_OS)
-AC_SUBST(GF_GLUSTERFS_LDFLAGS)
-AC_SUBST(GF_GLUSTERFS_CFLAGS)
AC_SUBST(GF_CFLAGS)
AC_SUBST(GF_LDFLAGS)
AC_SUBST(GF_LDADD)
@@ -538,28 +1604,114 @@ AC_SUBST(RLLIBS)
AC_SUBST(LIBAIO)
AC_SUBST(AM_MAKEFLAGS)
AC_SUBST(AM_LIBTOOLFLAGS)
+AC_SUBST(GF_NO_UNDEFINED)
+AC_SUBST(GF_XLATOR_DEFAULT_LDFLAGS)
+AC_SUBST(GF_XLATOR_LDFLAGS)
+AC_SUBST(GF_XLATOR_MGNT_LIBADD)
+
+case $host_os in
+ *freebsd*)
+ GF_XLATOR_MGNT_LIBADD="-lutil -lprocstat"
+ ;;
+esac
CONTRIBDIR='$(top_srcdir)/contrib'
AC_SUBST(CONTRIBDIR)
-INCLUDES='-I$(top_srcdir)/libglusterfs/src -I$(CONTRIBDIR)/uuid'
-AC_SUBST(INCLUDES)
+GF_CPPDEFINES='-D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -D$(GF_HOST_OS)'
+GF_CPPINCLUDES='-include $(top_builddir)/config.h -include $(top_builddir)/site.h -I$(top_srcdir)/libglusterfs/src -I$(top_builddir)/libglusterfs/src'
+if test "x${USE_CONTRIB_URCU}" = "xyes"; then
+ GF_CPPINCLUDES="${GF_CPPINCLUDES} -I\$(CONTRIBDIR)/userspace-rcu"
+fi
+GF_CPPFLAGS="$GF_CPPFLAGS $GF_CPPDEFINES $GF_CPPINCLUDES"
+AC_SUBST([GF_CPPFLAGS])
+AM_CONDITIONAL([GF_LINUX_HOST_OS], test "${GF_HOST_OS}" = "GF_LINUX_HOST_OS")
AM_CONDITIONAL([GF_DARWIN_HOST_OS], test "${GF_HOST_OS}" = "GF_DARWIN_HOST_OS")
+AM_CONDITIONAL([GF_BSD_HOST_OS], test "${GF_HOST_OS}" = "GF_BSD_HOST_OS")
+if test "${GF_HOST_OS}" = "GF_BSD_HOST_OS"; then
+ AC_DEFINE(GF_BSD_HOST_OS, 1, [This is a BSD compatible OS.])
+fi
-AM_CONDITIONAL([GF_INSTALL_VAR_LIB_GLUSTERD], test ! -d /var/lib/glusterd && test -d /etc/glusterd )
+AC_SUBST(GLUSTERD_WORKDIR)
+AM_CONDITIONAL([GF_INSTALL_GLUSTERD_WORKDIR], test ! -d ${GLUSTERD_WORKDIR} && test -d ${sysconfdir}/glusterd )
+AC_SUBST(GLUSTERD_VOLFILE)
+AC_SUBST(GLUSTERFS_LIBEXECDIR)
+AC_SUBST(GLUSTERFSD_MISCDIR)
+
+dnl pkg-config versioning
+dnl
+dnl Once we released gluster-api.pc with version=7. Since then we undid the
+dnl library versioning and replaced it with symbol-versioning. The current
+dnl libgfapi.so has version 0, but the symbols have the version from the main
+dnl package at the time they were added.
+dnl
+dnl Because other packages (like samba) use the pkg-config version, we can not
+dnl drop it, or decrease the version easily. The simplest solution is to keep
+dnl the version=7 and add sub-digits for the actual package/symbol versions.
+GFAPI_VERSION="7."${PACKAGE_VERSION}
+LIBGFCHANGELOG_VERSION="0.0.1"
+AC_SUBST(GFAPI_VERSION)
+AC_SUBST(LIBGFCHANGELOG_VERSION)
+
+dnl libtool versioning
+LIBGFXDR_LT_VERSION="0:1:0"
+LIBGFRPC_LT_VERSION="0:1:0"
+LIBGLUSTERFS_LT_VERSION="0:1:0"
+LIBGFCHANGELOG_LT_VERSION="0:1:0"
+GFAPI_LT_VERSION="0:0:0"
+AC_SUBST(LIBGFXDR_LT_VERSION)
+AC_SUBST(LIBGFRPC_LT_VERSION)
+AC_SUBST(LIBGLUSTERFS_LT_VERSION)
+AC_SUBST(LIBGFCHANGELOG_LT_VERSION)
+AC_SUBST(GFAPI_LT_VERSION)
+AC_SUBST(GFAPI_EXTRA_LDFLAGS)
+
+GFAPI_LIBS="${ACL_LIBS}"
+AC_SUBST(GFAPI_LIBS)
+
+dnl this change necessary for run-tests.sh
+AC_CONFIG_FILES([tests/env.rc],[ln -s ${ac_abs_builddir}/env.rc ${ac_abs_srcdir}/env.rc 2>/dev/null])
AC_OUTPUT
echo
echo "GlusterFS configure summary"
echo "==========================="
-echo "FUSE client : $BUILD_FUSE_CLIENT"
-echo "Infiniband verbs : $BUILD_IBVERBS"
-echo "epoll IO multiplex : $BUILD_EPOLL"
-echo "argp-standalone : $BUILD_ARGP_STANDALONE"
-echo "fusermount : $BUILD_FUSERMOUNT"
-echo "readline : $BUILD_READLINE"
-echo "georeplication : $BUILD_SYNCDAEMON"
-echo "Linux-AIO : $BUILD_LIBAIO"
+echo "FUSE client : $BUILD_FUSE_CLIENT"
+echo "epoll IO multiplex : $BUILD_EPOLL"
+echo "fusermount : $BUILD_FUSERMOUNT"
+echo "readline : $BUILD_READLINE"
+echo "georeplication : $BUILD_SYNCDAEMON"
+echo "Linux-AIO : $BUILD_LIBAIO"
+echo "Enable Debug : $BUILD_DEBUG"
+echo "Run with Valgrind : $VALGRIND_TOOL"
+echo "Sanitizer enabled : $SANITIZER"
+echo "Use syslog : $USE_SYSLOG"
+echo "XML output : $BUILD_XML_OUTPUT"
+echo "Unit Tests : $BUILD_UNITTEST"
+echo "Track priv ports : $TRACK_PRIVPORTS"
+echo "POSIX ACLs : $BUILD_POSIX_ACLS"
+echo "SELinux features : $USE_SELINUX"
+echo "firewalld-config : $BUILD_FIREWALLD"
+echo "Events : $BUILD_EVENTS"
+echo "EC dynamic support : $EC_DYNAMIC_SUPPORT"
+echo "Use memory pools : $USE_MEMPOOL"
+echo "Nanosecond m/atimes : $BUILD_NANOSECOND_TIMESTAMPS"
+echo "Server components : $with_server"
+echo "Legacy gNFS server : $BUILD_GNFS"
+echo "IPV6 default : $with_ipv6_default"
+echo "Use TIRPC : $with_libtirpc"
+echo "With Python : ${PYTHON_VERSION}"
+echo "Cloudsync : $BUILD_CLOUDSYNC"
+echo "Metadata dispersal : $BUILD_METADISP"
+echo "Link with TCMALLOC : $BUILD_TCMALLOC"
echo
+
+# dnl Note: ${X^^} capitalization assumes bash >= 4.x
+if test "x$SANITIZER" != "xnone"; then
+ echo "Note: since glusterfs processes are daemon processes, use"
+ echo "'export ${SANITIZER^^}_OPTIONS=log_path=/path/to/xxx.log' to collect"
+ echo "sanitizer output. Further details and more options can be"
+ echo "found at https://github.com/google/sanitizers."
+fi
diff --git a/contrib/aclocal/mkdirp.m4 b/contrib/aclocal/mkdirp.m4
new file mode 100644
index 00000000000..d2f7edd5ccd
--- /dev/null
+++ b/contrib/aclocal/mkdirp.m4
@@ -0,0 +1,146 @@
+# Excerpt from autoconf/autoconf/programs.m4
+# This file is part of Autoconf. -*- Autoconf -*-
+# Checking for programs.
+
+# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
+# 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free Software
+# Foundation, Inc.
+
+# This file is part of Autoconf. This program is free
+# software; you can redistribute it and/or modify it under the
+# terms of the GNU General Public License as published by the
+# Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# Under Section 7 of GPL version 3, you are granted additional
+# permissions described in the Autoconf Configure Script Exception,
+# version 3.0, as published by the Free Software Foundation.
+#
+# You should have received a copy of the GNU General Public License
+# and a copy of the Autoconf Configure Script Exception along with
+# this program; see the files COPYINGv3 and COPYING.EXCEPTION
+# respectively. If not, see <http://www.gnu.org/licenses/>.
+
+# Written by David MacKenzie, with help from
+# Franc,ois Pinard, Karl Berry, Richard Pixley, Ian Lance Taylor,
+# Roland McGrath, Noah Friedman, david d zuhn, and many others.
+
+# AC_PROG_MKDIR_P
+# ---------------
+# Check whether `mkdir -p' is known to be thread-safe, and fall back to
+# install-sh -d otherwise.
+#
+# Automake 1.8 used `mkdir -m 0755 -p --' to ensure that directories
+# created by `make install' are always world readable, even if the
+# installer happens to have an overly restrictive umask (e.g. 077).
+# This was a mistake. There are at least two reasons why we must not
+# use `-m 0755':
+# - it causes special bits like SGID to be ignored,
+# - it may be too restrictive (some setups expect 775 directories).
+#
+# Do not use -m 0755 and let people choose whatever they expect by
+# setting umask.
+#
+# We cannot accept any implementation of `mkdir' that recognizes `-p'.
+# Some implementations (such as Solaris 8's) are vulnerable to race conditions:
+# if a parallel make tries to run `mkdir -p a/b' and `mkdir -p a/c'
+# concurrently, both version can detect that a/ is missing, but only
+# one can create it and the other will error out. Consequently we
+# restrict ourselves to known race-free implementations.
+#
+# Automake used to define mkdir_p as `mkdir -p .', in order to
+# allow $(mkdir_p) to be used without argument. As in
+# $(mkdir_p) $(somedir)
+# where $(somedir) is conditionally defined. However we don't do
+# that for MKDIR_P.
+# 1. before we restricted the check to GNU mkdir, `mkdir -p .' was
+# reported to fail in read-only directories. The system where this
+# happened has been forgotten.
+# 2. in practice we call $(MKDIR_P) on directories such as
+# $(MKDIR_P) "$(DESTDIR)$(somedir)"
+# and we don't want to create $(DESTDIR) if $(somedir) is empty.
+# To support the latter case, we have to write
+# test -z "$(somedir)" || $(MKDIR_P) "$(DESTDIR)$(somedir)"
+# so $(MKDIR_P) always has an argument.
+# We will have better chances of detecting a missing test if
+# $(MKDIR_P) complains about missing arguments.
+# 3. $(MKDIR_P) is named after `mkdir -p' and we don't expect this
+# to accept no argument.
+# 4. having something like `mkdir .' in the output is unsightly.
+#
+# On NextStep and OpenStep, the `mkdir' command does not
+# recognize any option. It will interpret all options as
+# directories to create.
+AN_MAKEVAR([MKDIR_P], [AC_PROG_MKDIR_P])
+AC_DEFUN_ONCE([AC_PROG_MKDIR_P],
+[AC_REQUIRE([AC_CONFIG_AUX_DIR_DEFAULT])dnl
+
+AC_MSG_CHECKING([for a thread-safe mkdir -p])
+if test -z "$MKDIR_P"; then
+ AC_CACHE_VAL([ac_cv_path_mkdir],
+ [_AS_PATH_WALK([$PATH$PATH_SEPARATOR/opt/sfw/bin],
+ [for ac_prog in mkdir gmkdir; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ AS_EXECUTABLE_P(["$as_dir/$ac_prog$ac_exec_ext"]) || continue
+ case `"$as_dir/$ac_prog$ac_exec_ext" --version 2>&1` in #(
+ 'mkdir (GNU coreutils) '* | \
+ 'mkdir (coreutils) '* | \
+ 'mkdir (fileutils) '4.1*)
+ ac_cv_path_mkdir=$as_dir/$ac_prog$ac_exec_ext
+ break 3;;
+ esac
+ done
+ done])])
+ test -d ./--version && rmdir ./--version
+ if test "${ac_cv_path_mkdir+set}" = set; then
+ MKDIR_P="$ac_cv_path_mkdir -p"
+ else
+ # As a last resort, use the slow shell script. Don't cache a
+ # value for MKDIR_P within a source directory, because that will
+ # break other packages using the cache if that directory is
+ # removed, or if the value is a relative name.
+ MKDIR_P="$ac_install_sh -d"
+ fi
+fi
+dnl status.m4 does special magic for MKDIR_P instead of AC_SUBST,
+dnl to get relative names right. However, also AC_SUBST here so
+dnl that Automake versions before 1.10 will pick it up (they do not
+dnl trace AC_SUBST_TRACE).
+dnl FIXME: Remove this once we drop support for Automake < 1.10.
+AC_SUBST([MKDIR_P])dnl
+AC_MSG_RESULT([$MKDIR_P])
+])# AC_PROG_MKDIR_P
+
+
+# From automake/m4/mkdirp.m4
+## -*- Autoconf -*-
+# Copyright (C) 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# AM_PROG_MKDIR_P
+# ---------------
+# Check for `mkdir -p'.
+AC_DEFUN([AM_PROG_MKDIR_P],
+[
+AC_REQUIRE([AC_PROG_MKDIR_P])dnl
+dnl Automake 1.8 to 1.9.6 used to define mkdir_p. We now use MKDIR_P,
+dnl while keeping a definition of mkdir_p for backward compatibility.
+dnl @MKDIR_P@ is magic: AC_OUTPUT adjusts its value for each Makefile.
+dnl However we cannot define mkdir_p as $(MKDIR_P) for the sake of
+dnl Makefile.ins that do not define MKDIR_P, so we do our own
+dnl adjustment using top_builddir (which is defined more often than
+dnl MKDIR_P).
+AC_SUBST([mkdir_p], ["$MKDIR_P"])dnl
+case $mkdir_p in
+ [[\\/$]]* | ?:[[\\/]]*) ;;
+ */*) mkdir_p="\$(top_builddir)/$mkdir_p" ;;
+esac
+])
diff --git a/contrib/aclocal/python.m4 b/contrib/aclocal/python.m4
new file mode 100644
index 00000000000..a39a9009046
--- /dev/null
+++ b/contrib/aclocal/python.m4
@@ -0,0 +1,209 @@
+## ------------------------ -*- Autoconf -*-
+## Python file handling
+## From Andrew Dalke
+## Updated by James Henstridge
+## ------------------------
+# Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009
+# Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# AM_PATH_PYTHON([MINIMUM-VERSION], [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND])
+# ---------------------------------------------------------------------------
+# Adds support for distributing Python modules and packages. To
+# install modules, copy them to $(pythondir), using the python_PYTHON
+# automake variable. To install a package with the same name as the
+# automake package, install to $(pkgpythondir), or use the
+# pkgpython_PYTHON automake variable.
+#
+# The variables $(pyexecdir) and $(pkgpyexecdir) are provided as
+# locations to install python extension modules (shared libraries).
+# Another macro is required to find the appropriate flags to compile
+# extension modules.
+#
+# If your package is configured with a different prefix to python,
+# users will have to add the install directory to the PYTHONPATH
+# environment variable, or create a .pth file (see the python
+# documentation for details).
+#
+# If the MINIMUM-VERSION argument is passed, AM_PATH_PYTHON will
+# cause an error if the version of python installed on the system
+# doesn't meet the requirement. MINIMUM-VERSION should consist of
+# numbers and dots only.
+AC_DEFUN([AM_PATH_PYTHON],
+ [
+ dnl Find a Python interpreter. Python versions prior to 2.0 are not
+ dnl supported. (2.0 was released on October 16, 2000).
+ m4_define_default([_AM_PYTHON_INTERPRETER_LIST],
+ [python python2 python3 python3.2 python3.1 python3.0 python2.7 python2.6 python2.5 python2.4 python2.3 python2.2 dnl
+python2.1 python2.0])
+
+ m4_if([$1],[],[
+ dnl No version check is needed.
+ # Find any Python interpreter.
+ if test -z "$PYTHON"; then
+ AC_PATH_PROGS([PYTHON], _AM_PYTHON_INTERPRETER_LIST, :)
+ fi
+ am_display_PYTHON=python
+ ], [
+ dnl A version check is needed.
+ if test -n "$PYTHON"; then
+ # If the user set $PYTHON, use it and don't search something else.
+ AC_MSG_CHECKING([whether $PYTHON version >= $1])
+ AM_PYTHON_CHECK_VERSION([$PYTHON], [$1],
+ [AC_MSG_RESULT(yes)],
+ [AC_MSG_ERROR(too old)])
+ am_display_PYTHON=$PYTHON
+ else
+ # Otherwise, try each interpreter until we find one that satisfies
+ # VERSION.
+ AC_CACHE_CHECK([for a Python interpreter with version >= $1],
+ [am_cv_pathless_PYTHON],[
+ for am_cv_pathless_PYTHON in _AM_PYTHON_INTERPRETER_LIST none; do
+ test "$am_cv_pathless_PYTHON" = none && break
+ AM_PYTHON_CHECK_VERSION([$am_cv_pathless_PYTHON], [$1], [break])
+ done])
+ # Set $PYTHON to the absolute path of $am_cv_pathless_PYTHON.
+ if test "$am_cv_pathless_PYTHON" = none; then
+ PYTHON=:
+ else
+ AC_PATH_PROG([PYTHON], [$am_cv_pathless_PYTHON])
+ fi
+ am_display_PYTHON=$am_cv_pathless_PYTHON
+ fi
+ ])
+
+ if test "$PYTHON" = :; then
+ dnl Run any user-specified action, or abort.
+ m4_default([$3], [AC_MSG_ERROR([no suitable Python interpreter found])])
+ else
+
+ dnl Query Python for its version number. Getting [:3] seems to be
+ dnl the best way to do this; it's what "site.py" does in the standard
+ dnl library.
+
+ AC_CACHE_CHECK([for $am_display_PYTHON version], [am_cv_python_version],
+ [am_cv_python_version=`$PYTHON -c "import sys; sys.stdout.write(sys.version[[:3]])"`])
+ AC_SUBST([PYTHON_VERSION], [$am_cv_python_version])
+
+ dnl Use the values of $prefix and $exec_prefix for the corresponding
+ dnl values of PYTHON_PREFIX and PYTHON_EXEC_PREFIX. These are made
+ dnl distinct variables so they can be overridden if need be. However,
+ dnl general consensus is that you shouldn't need this ability.
+
+ AC_SUBST([PYTHON_PREFIX], ['${prefix}'])
+ AC_SUBST([PYTHON_EXEC_PREFIX], ['${exec_prefix}'])
+
+ dnl At times (like when building shared libraries) you may want
+ dnl to know which OS platform Python thinks this is.
+
+ AC_CACHE_CHECK([for $am_display_PYTHON platform], [am_cv_python_platform],
+ [am_cv_python_platform=`$PYTHON -c "import sys; sys.stdout.write(sys.platform)"`])
+ AC_SUBST([PYTHON_PLATFORM], [$am_cv_python_platform])
+
+
+ dnl Set up 4 directories:
+
+ dnl pythondir -- where to install python scripts. This is the
+ dnl site-packages directory, not the python standard library
+ dnl directory like in previous automake betas. This behavior
+ dnl is more consistent with lispdir.m4 for example.
+ dnl Query distutils for this directory. distutils does not exist in
+ dnl Python 1.5, so we fall back to the hardcoded directory if it
+ dnl doesn't work.
+ AC_CACHE_CHECK([for $am_display_PYTHON script directory],
+ [am_cv_python_pythondir],
+ [if test "x$prefix" = xNONE
+ then
+ am_py_prefix=$ac_default_prefix
+ else
+ am_py_prefix=$prefix
+ fi
+ am_cv_python_pythondir=`$PYTHON -c "import sys; from distutils import sysconfig; sys.stdout.write(sysconfig.get_python_lib(0,0,prefix='$am_py_prefix'))" 2>/dev/null ||
+ echo "$PYTHON_PREFIX/lib/python$PYTHON_VERSION/site-packages"`
+ case $am_cv_python_pythondir in
+ $am_py_prefix*)
+ am__strip_prefix=`echo "$am_py_prefix" | sed 's|.|.|g'`
+ am_cv_python_pythondir=`echo "$am_cv_python_pythondir" | sed "s,^$am__strip_prefix,$PYTHON_PREFIX,"`
+ ;;
+ *)
+ case $am_py_prefix in
+ /usr|/System*) ;;
+ *)
+ am_cv_python_pythondir=$PYTHON_PREFIX/lib/python$PYTHON_VERSION/site-packages
+ ;;
+ esac
+ ;;
+ esac
+ ])
+ AC_SUBST([pythondir], [$am_cv_python_pythondir])
+
+ dnl pkgpythondir -- $PACKAGE directory under pythondir. Was
+ dnl PYTHON_SITE_PACKAGE in previous betas, but this naming is
+ dnl more consistent with the rest of automake.
+
+ AC_SUBST([pkgpythondir], [\${pythondir}/$PACKAGE])
+
+ dnl pyexecdir -- directory for installing python extension modules
+ dnl (shared libraries)
+ dnl Query distutils for this directory. distutils does not exist in
+ dnl Python 1.5, so we fall back to the hardcoded directory if it
+ dnl doesn't work.
+ AC_CACHE_CHECK([for $am_display_PYTHON extension module directory],
+ [am_cv_python_pyexecdir],
+ [if test "x$exec_prefix" = xNONE
+ then
+ am_py_exec_prefix=$am_py_prefix
+ else
+ am_py_exec_prefix=$exec_prefix
+ fi
+ am_cv_python_pyexecdir=`$PYTHON -c "import sys; from distutils import sysconfig; sys.stdout.write(sysconfig.get_python_lib(1,0,prefix='$am_py_exec_prefix'))" 2>/dev/null ||
+ echo "$PYTHON_EXEC_PREFIX/lib/python$PYTHON_VERSION/site-packages"`
+ case $am_cv_python_pyexecdir in
+ $am_py_exec_prefix*)
+ am__strip_prefix=`echo "$am_py_exec_prefix" | sed 's|.|.|g'`
+ am_cv_python_pyexecdir=`echo "$am_cv_python_pyexecdir" | sed "s,^$am__strip_prefix,$PYTHON_EXEC_PREFIX,"`
+ ;;
+ *)
+ case $am_py_exec_prefix in
+ /usr|/System*) ;;
+ *)
+ am_cv_python_pyexecdir=$PYTHON_EXEC_PREFIX/lib/python$PYTHON_VERSION/site-packages
+ ;;
+ esac
+ ;;
+ esac
+ ])
+ AC_SUBST([pyexecdir], [$am_cv_python_pyexecdir])
+
+ dnl pkgpyexecdir -- $(pyexecdir)/$(PACKAGE)
+
+ AC_SUBST([pkgpyexecdir], [\${pyexecdir}/$PACKAGE])
+
+ dnl Run any user-specified action.
+ $2
+ fi
+
+])
+
+
+# AM_PYTHON_CHECK_VERSION(PROG, VERSION, [ACTION-IF-TRUE], [ACTION-IF-FALSE])
+# ---------------------------------------------------------------------------
+# Run ACTION-IF-TRUE if the Python interpreter PROG has version >= VERSION.
+# Run ACTION-IF-FALSE otherwise.
+# This test uses sys.hexversion instead of the string equivalent (first
+# word of sys.version), in order to cope with versions such as 2.2c1.
+# This supports Python 2.0 or higher. (2.0 was released on October 16, 2000).
+AC_DEFUN([AM_PYTHON_CHECK_VERSION],
+ [prog="import sys
+# split strings by '.' and convert to numeric. Append some zeros
+# because we need at least 4 digits for the hex conversion.
+# map returns an iterator in Python 3.0 and a list in 2.x
+minver = list(map(int, '$2'.split('.'))) + [[0, 0, 0]]
+minverhex = 0
+# xrange is not present in Python 3.0 and range returns an iterator
+for i in list(range(0, 4)): minverhex = (minverhex << 8) + minver[[i]]
+sys.exit(sys.hexversion < minverhex)"
+ AS_IF([AM_RUN_LOG([$1 -c "$prog"])], [$3], [$4])])
diff --git a/contrib/fuse-include/fuse-mount.h b/contrib/fuse-include/fuse-mount.h
index 7a3756d92b8..7d28462de47 100644
--- a/contrib/fuse-include/fuse-mount.h
+++ b/contrib/fuse-include/fuse-mount.h
@@ -8,5 +8,6 @@
*/
void gf_fuse_unmount (const char *mountpoint, int fd);
+int gf_fuse_unmount_daemon (const char *mountpoint, int fd);
int gf_fuse_mount (const char *mountpoint, char *fsname, char *mnt_param,
pid_t *mtab_pid, int status_fd);
diff --git a/contrib/fuse-include/fuse_kernel.h b/contrib/fuse-include/fuse_kernel.h
index 9ae25d6f9c0..1e41e237e6f 100644
--- a/contrib/fuse-include/fuse_kernel.h
+++ b/contrib/fuse-include/fuse_kernel.h
@@ -60,23 +60,87 @@
* 7.13
* - make max number of background requests and congestion threshold
* tunables
+ *
+ * 7.14
+ * - add splice support to fuse device
+ *
+ * 7.15
+ * - add store notify
+ * - add retrieve notify
+ *
+ * 7.16
+ * - add BATCH_FORGET request
+ * - FUSE_IOCTL_UNRESTRICTED shall now return with array of 'struct
+ * fuse_ioctl_iovec' instead of ambiguous 'struct iovec'
+ * - add FUSE_IOCTL_32BIT flag
+ *
+ * 7.17
+ * - add FUSE_FLOCK_LOCKS and FUSE_RELEASE_FLOCK_UNLOCK
+ *
+ * 7.18
+ * - add FUSE_IOCTL_DIR flag
+ * - add FUSE_NOTIFY_DELETE
+ *
+ * 7.19
+ * - add FUSE_FALLOCATE
+ *
+ * 7.20
+ * - add FUSE_AUTO_INVAL_DATA
+ *
+ * 7.21
+ * - add FUSE_READDIRPLUS
+ * - send the requested events in POLL request
+ *
+ * 7.22
+ * - add FUSE_ASYNC_DIO
+ *
+ * 7.23
+ * - add FUSE_WRITEBACK_CACHE
+ * - add time_gran to fuse_init_out
+ * - add reserved space to fuse_init_out
+ * - add FATTR_CTIME
+ * - add ctime and ctimensec to fuse_setattr_in
+ * - add FUSE_RENAME2 request
+ * - add FUSE_NO_OPEN_SUPPORT flag
+ *
+ * 7.24
+ * - add FUSE_LSEEK for SEEK_HOLE and SEEK_DATA support
*/
#ifndef _LINUX_FUSE_H
#define _LINUX_FUSE_H
-#include <sys/types.h>
-#define __u64 uint64_t
-#define __s64 int64_t
-#define __u32 uint32_t
-#define __s32 int32_t
-#define __u16 uint16_t
+#ifdef __KERNEL__
+#include <linux/types.h>
+#else
+#include <stdint.h>
+#endif
+
+/*
+ * Version negotiation:
+ *
+ * Both the kernel and userspace send the version they support in the
+ * INIT request and reply respectively.
+ *
+ * If the major versions match then both shall use the smallest
+ * of the two minor versions for communication.
+ *
+ * If the kernel supports a larger major version, then userspace shall
+ * reply with the major version it supports, ignore the rest of the
+ * INIT message and expect a new INIT message from the kernel with a
+ * matching major version.
+ *
+ * If the library supports a larger major version, then it shall fall
+ * back to the major protocol version sent by the kernel for
+ * communication and reply with that major version (and an arbitrary
+ * supported minor version).
+ */
/** Version number of this interface */
#define FUSE_KERNEL_VERSION 7
/** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 13
+#define FUSE_KERNEL_MINOR_VERSION 24
/** The node ID of the root inode */
#define FUSE_ROOT_ID 1
@@ -85,42 +149,42 @@
userspace works under 64bit kernels */
struct fuse_attr {
- __u64 ino;
- __u64 size;
- __u64 blocks;
- __u64 atime;
- __u64 mtime;
- __u64 ctime;
- __u32 atimensec;
- __u32 mtimensec;
- __u32 ctimensec;
- __u32 mode;
- __u32 nlink;
- __u32 uid;
- __u32 gid;
- __u32 rdev;
- __u32 blksize;
- __u32 padding;
+ uint64_t ino;
+ uint64_t size;
+ uint64_t blocks;
+ uint64_t atime;
+ uint64_t mtime;
+ uint64_t ctime;
+ uint32_t atimensec;
+ uint32_t mtimensec;
+ uint32_t ctimensec;
+ uint32_t mode;
+ uint32_t nlink;
+ uint32_t uid;
+ uint32_t gid;
+ uint32_t rdev;
+ uint32_t blksize;
+ uint32_t padding;
};
struct fuse_kstatfs {
- __u64 blocks;
- __u64 bfree;
- __u64 bavail;
- __u64 files;
- __u64 ffree;
- __u32 bsize;
- __u32 namelen;
- __u32 frsize;
- __u32 padding;
- __u32 spare[6];
+ uint64_t blocks;
+ uint64_t bfree;
+ uint64_t bavail;
+ uint64_t files;
+ uint64_t ffree;
+ uint32_t bsize;
+ uint32_t namelen;
+ uint32_t frsize;
+ uint32_t padding;
+ uint32_t spare[6];
};
struct fuse_file_lock {
- __u64 start;
- __u64 end;
- __u32 type;
- __u32 pid; /* tgid */
+ uint64_t start;
+ uint64_t end;
+ uint32_t type;
+ uint32_t pid; /* tgid */
};
/**
@@ -136,6 +200,7 @@ struct fuse_file_lock {
#define FATTR_ATIME_NOW (1 << 7)
#define FATTR_MTIME_NOW (1 << 8)
#define FATTR_LOCKOWNER (1 << 9)
+#define FATTR_CTIME (1 << 10)
/**
* Flags returned by the OPEN request
@@ -151,8 +216,24 @@ struct fuse_file_lock {
/**
* INIT request/reply flags
*
+ * FUSE_ASYNC_READ: asynchronous read requests
+ * FUSE_POSIX_LOCKS: remote locking for POSIX file locks
+ * FUSE_FILE_OPS: kernel sends file handle for fstat, etc... (not yet supported)
+ * FUSE_ATOMIC_O_TRUNC: handles the O_TRUNC open flag in the filesystem
* FUSE_EXPORT_SUPPORT: filesystem handles lookups of "." and ".."
+ * FUSE_BIG_WRITES: filesystem can handle write size larger than 4kB
* FUSE_DONT_MASK: don't apply umask to file mode on create operations
+ * FUSE_SPLICE_WRITE: kernel supports splice write on the device
+ * FUSE_SPLICE_MOVE: kernel supports splice move on the device
+ * FUSE_SPLICE_READ: kernel supports splice read on the device
+ * FUSE_FLOCK_LOCKS: remote locking for BSD style file locks
+ * FUSE_HAS_IOCTL_DIR: kernel supports ioctl on directories
+ * FUSE_AUTO_INVAL_DATA: automatically invalidate cached pages
+ * FUSE_DO_READDIRPLUS: do READDIRPLUS (READDIR+LOOKUP in one)
+ * FUSE_READDIRPLUS_AUTO: adaptive readdirplus
+ * FUSE_ASYNC_DIO: asynchronous direct I/O submission
+ * FUSE_WRITEBACK_CACHE: use writeback cache for buffered writes
+ * FUSE_NO_OPEN_SUPPORT: kernel supports zero-message opens
*/
#define FUSE_ASYNC_READ (1 << 0)
#define FUSE_POSIX_LOCKS (1 << 1)
@@ -161,6 +242,17 @@ struct fuse_file_lock {
#define FUSE_EXPORT_SUPPORT (1 << 4)
#define FUSE_BIG_WRITES (1 << 5)
#define FUSE_DONT_MASK (1 << 6)
+#define FUSE_SPLICE_WRITE (1 << 7)
+#define FUSE_SPLICE_MOVE (1 << 8)
+#define FUSE_SPLICE_READ (1 << 9)
+#define FUSE_FLOCK_LOCKS (1 << 10)
+#define FUSE_HAS_IOCTL_DIR (1 << 11)
+#define FUSE_AUTO_INVAL_DATA (1 << 12)
+#define FUSE_DO_READDIRPLUS (1 << 13)
+#define FUSE_READDIRPLUS_AUTO (1 << 14)
+#define FUSE_ASYNC_DIO (1 << 15)
+#define FUSE_WRITEBACK_CACHE (1 << 16)
+#define FUSE_NO_OPEN_SUPPORT (1 << 17)
/**
* CUSE INIT request/reply flags
@@ -173,6 +265,7 @@ struct fuse_file_lock {
* Release flags
*/
#define FUSE_RELEASE_FLUSH (1 << 0)
+#define FUSE_RELEASE_FLOCK_UNLOCK (1 << 1)
/**
* Getattr flags
@@ -204,12 +297,16 @@ struct fuse_file_lock {
* FUSE_IOCTL_COMPAT: 32bit compat ioctl on 64bit machine
* FUSE_IOCTL_UNRESTRICTED: not restricted to well-formed ioctls, retry allowed
* FUSE_IOCTL_RETRY: retry with new iovecs
+ * FUSE_IOCTL_32BIT: 32bit ioctl
+ * FUSE_IOCTL_DIR: is a directory
*
* FUSE_IOCTL_MAX_IOV: maximum of in_iovecs + out_iovecs
*/
#define FUSE_IOCTL_COMPAT (1 << 0)
#define FUSE_IOCTL_UNRESTRICTED (1 << 1)
#define FUSE_IOCTL_RETRY (1 << 2)
+#define FUSE_IOCTL_32BIT (1 << 3)
+#define FUSE_IOCTL_DIR (1 << 4)
#define FUSE_IOCTL_MAX_IOV 256
@@ -259,6 +356,12 @@ enum fuse_opcode {
FUSE_DESTROY = 38,
FUSE_IOCTL = 39,
FUSE_POLL = 40,
+ FUSE_NOTIFY_REPLY = 41,
+ FUSE_BATCH_FORGET = 42,
+ FUSE_FALLOCATE = 43,
+ FUSE_READDIRPLUS = 44,
+ FUSE_RENAME2 = 45,
+ FUSE_LSEEK = 46,
/* CUSE specific operations */
CUSE_INIT = 4096,
@@ -268,6 +371,9 @@ enum fuse_notify_code {
FUSE_NOTIFY_POLL = 1,
FUSE_NOTIFY_INVAL_INODE = 2,
FUSE_NOTIFY_INVAL_ENTRY = 3,
+ FUSE_NOTIFY_STORE = 4,
+ FUSE_NOTIFY_RETRIEVE = 5,
+ FUSE_NOTIFY_DELETE = 6,
FUSE_NOTIFY_CODE_MAX,
};
@@ -277,133 +383,149 @@ enum fuse_notify_code {
#define FUSE_COMPAT_ENTRY_OUT_SIZE 120
struct fuse_entry_out {
- __u64 nodeid; /* Inode ID */
- __u64 generation; /* Inode generation: nodeid:gen must
- be unique for the fs's lifetime */
- __u64 entry_valid; /* Cache timeout for the name */
- __u64 attr_valid; /* Cache timeout for the attributes */
- __u32 entry_valid_nsec;
- __u32 attr_valid_nsec;
+ uint64_t nodeid; /* Inode ID */
+ uint64_t generation; /* Inode generation: nodeid:gen must
+ be unique for the fs's lifetime */
+ uint64_t entry_valid; /* Cache timeout for the name */
+ uint64_t attr_valid; /* Cache timeout for the attributes */
+ uint32_t entry_valid_nsec;
+ uint32_t attr_valid_nsec;
struct fuse_attr attr;
};
struct fuse_forget_in {
- __u64 nlookup;
+ uint64_t nlookup;
+};
+
+struct fuse_forget_one {
+ uint64_t nodeid;
+ uint64_t nlookup;
+};
+
+struct fuse_batch_forget_in {
+ uint32_t count;
+ uint32_t dummy;
};
struct fuse_getattr_in {
- __u32 getattr_flags;
- __u32 dummy;
- __u64 fh;
+ uint32_t getattr_flags;
+ uint32_t dummy;
+ uint64_t fh;
};
#define FUSE_COMPAT_ATTR_OUT_SIZE 96
struct fuse_attr_out {
- __u64 attr_valid; /* Cache timeout for the attributes */
- __u32 attr_valid_nsec;
- __u32 dummy;
+ uint64_t attr_valid; /* Cache timeout for the attributes */
+ uint32_t attr_valid_nsec;
+ uint32_t dummy;
struct fuse_attr attr;
};
#define FUSE_COMPAT_MKNOD_IN_SIZE 8
struct fuse_mknod_in {
- __u32 mode;
- __u32 rdev;
- __u32 umask;
- __u32 padding;
+ uint32_t mode;
+ uint32_t rdev;
+ uint32_t umask;
+ uint32_t padding;
};
struct fuse_mkdir_in {
- __u32 mode;
- __u32 umask;
+ uint32_t mode;
+ uint32_t umask;
};
struct fuse_rename_in {
- __u64 newdir;
+ uint64_t newdir;
+};
+
+struct fuse_rename2_in {
+ uint64_t newdir;
+ uint32_t flags;
+ uint32_t padding;
};
struct fuse_link_in {
- __u64 oldnodeid;
+ uint64_t oldnodeid;
};
struct fuse_setattr_in {
- __u32 valid;
- __u32 padding;
- __u64 fh;
- __u64 size;
- __u64 lock_owner;
- __u64 atime;
- __u64 mtime;
- __u64 unused2;
- __u32 atimensec;
- __u32 mtimensec;
- __u32 unused3;
- __u32 mode;
- __u32 unused4;
- __u32 uid;
- __u32 gid;
- __u32 unused5;
+ uint32_t valid;
+ uint32_t padding;
+ uint64_t fh;
+ uint64_t size;
+ uint64_t lock_owner;
+ uint64_t atime;
+ uint64_t mtime;
+ uint64_t ctime;
+ uint32_t atimensec;
+ uint32_t mtimensec;
+ uint32_t ctimensec;
+ uint32_t mode;
+ uint32_t unused4;
+ uint32_t uid;
+ uint32_t gid;
+ uint32_t unused5;
};
struct fuse_open_in {
- __u32 flags;
- __u32 unused;
+ uint32_t flags;
+ uint32_t unused;
};
struct fuse_create_in {
- __u32 flags;
- __u32 mode;
- __u32 umask;
- __u32 padding;
+ uint32_t flags;
+ uint32_t mode;
+ uint32_t umask;
+ uint32_t padding;
};
struct fuse_open_out {
- __u64 fh;
- __u32 open_flags;
- __u32 padding;
+ uint64_t fh;
+ uint32_t open_flags;
+ uint32_t padding;
};
struct fuse_release_in {
- __u64 fh;
- __u32 flags;
- __u32 release_flags;
- __u64 lock_owner;
+ uint64_t fh;
+ uint32_t flags;
+ uint32_t release_flags;
+ uint64_t lock_owner;
};
struct fuse_flush_in {
- __u64 fh;
- __u32 unused;
- __u32 padding;
- __u64 lock_owner;
+ uint64_t fh;
+ uint32_t unused;
+ uint32_t padding;
+ uint64_t lock_owner;
};
struct fuse_read_in {
- __u64 fh;
- __u64 offset;
- __u32 size;
- __u32 read_flags;
- __u64 lock_owner;
- __u32 flags;
- __u32 padding;
+ uint64_t fh;
+ uint64_t offset;
+ uint32_t size;
+ uint32_t read_flags;
+ uint64_t lock_owner;
+ uint32_t flags;
+ uint32_t padding;
};
#define FUSE_COMPAT_WRITE_IN_SIZE 24
struct fuse_write_in {
- __u64 fh;
- __u64 offset;
- __u32 size;
- __u32 write_flags;
- __u64 lock_owner;
- __u32 flags;
- __u32 padding;
+ uint64_t fh;
+ uint64_t offset;
+ uint32_t size;
+ uint32_t write_flags;
+ uint64_t lock_owner;
+ uint32_t flags;
+ uint32_t padding;
};
struct fuse_write_out {
- __u32 size;
- __u32 padding;
+ uint32_t size;
+ uint32_t padding;
};
#define FUSE_COMPAT_STATFS_SIZE 48
@@ -413,32 +535,32 @@ struct fuse_statfs_out {
};
struct fuse_fsync_in {
- __u64 fh;
- __u32 fsync_flags;
- __u32 padding;
+ uint64_t fh;
+ uint32_t fsync_flags;
+ uint32_t padding;
};
struct fuse_setxattr_in {
- __u32 size;
- __u32 flags;
+ uint32_t size;
+ uint32_t flags;
};
struct fuse_getxattr_in {
- __u32 size;
- __u32 padding;
+ uint32_t size;
+ uint32_t padding;
};
struct fuse_getxattr_out {
- __u32 size;
- __u32 padding;
+ uint32_t size;
+ uint32_t padding;
};
struct fuse_lk_in {
- __u64 fh;
- __u64 owner;
+ uint64_t fh;
+ uint64_t owner;
struct fuse_file_lock lk;
- __u32 lk_flags;
- __u32 padding;
+ uint32_t lk_flags;
+ uint32_t padding;
};
struct fuse_lk_out {
@@ -446,134 +568,206 @@ struct fuse_lk_out {
};
struct fuse_access_in {
- __u32 mask;
- __u32 padding;
+ uint32_t mask;
+ uint32_t padding;
};
struct fuse_init_in {
- __u32 major;
- __u32 minor;
- __u32 max_readahead;
- __u32 flags;
+ uint32_t major;
+ uint32_t minor;
+ uint32_t max_readahead;
+ uint32_t flags;
};
+#define FUSE_COMPAT_INIT_OUT_SIZE 8
+#define FUSE_COMPAT_22_INIT_OUT_SIZE 24
+
struct fuse_init_out {
- __u32 major;
- __u32 minor;
- __u32 max_readahead;
- __u32 flags;
- __u16 max_background;
- __u16 congestion_threshold;
- __u32 max_write;
+ uint32_t major;
+ uint32_t minor;
+ uint32_t max_readahead;
+ uint32_t flags;
+ uint16_t max_background;
+ uint16_t congestion_threshold;
+ uint32_t max_write;
+ uint32_t time_gran;
+ uint32_t unused[9];
};
#define CUSE_INIT_INFO_MAX 4096
struct cuse_init_in {
- __u32 major;
- __u32 minor;
- __u32 unused;
- __u32 flags;
+ uint32_t major;
+ uint32_t minor;
+ uint32_t unused;
+ uint32_t flags;
};
struct cuse_init_out {
- __u32 major;
- __u32 minor;
- __u32 unused;
- __u32 flags;
- __u32 max_read;
- __u32 max_write;
- __u32 dev_major; /* chardev major */
- __u32 dev_minor; /* chardev minor */
- __u32 spare[10];
+ uint32_t major;
+ uint32_t minor;
+ uint32_t unused;
+ uint32_t flags;
+ uint32_t max_read;
+ uint32_t max_write;
+ uint32_t dev_major; /* chardev major */
+ uint32_t dev_minor; /* chardev minor */
+ uint32_t spare[10];
};
struct fuse_interrupt_in {
- __u64 unique;
+ uint64_t unique;
};
struct fuse_bmap_in {
- __u64 block;
- __u32 blocksize;
- __u32 padding;
+ uint64_t block;
+ uint32_t blocksize;
+ uint32_t padding;
};
struct fuse_bmap_out {
- __u64 block;
+ uint64_t block;
};
struct fuse_ioctl_in {
- __u64 fh;
- __u32 flags;
- __u32 cmd;
- __u64 arg;
- __u32 in_size;
- __u32 out_size;
+ uint64_t fh;
+ uint32_t flags;
+ uint32_t cmd;
+ uint64_t arg;
+ uint32_t in_size;
+ uint32_t out_size;
+};
+
+struct fuse_ioctl_iovec {
+ uint64_t base;
+ uint64_t len;
};
struct fuse_ioctl_out {
- __s32 result;
- __u32 flags;
- __u32 in_iovs;
- __u32 out_iovs;
+ int32_t result;
+ uint32_t flags;
+ uint32_t in_iovs;
+ uint32_t out_iovs;
};
struct fuse_poll_in {
- __u64 fh;
- __u64 kh;
- __u32 flags;
- __u32 padding;
+ uint64_t fh;
+ uint64_t kh;
+ uint32_t flags;
+ uint32_t events;
};
struct fuse_poll_out {
- __u32 revents;
- __u32 padding;
+ uint32_t revents;
+ uint32_t padding;
};
struct fuse_notify_poll_wakeup_out {
- __u64 kh;
+ uint64_t kh;
+};
+
+struct fuse_fallocate_in {
+ uint64_t fh;
+ uint64_t offset;
+ uint64_t length;
+ uint32_t mode;
+ uint32_t padding;
};
struct fuse_in_header {
- __u32 len;
- __u32 opcode;
- __u64 unique;
- __u64 nodeid;
- __u32 uid;
- __u32 gid;
- __u32 pid;
- __u32 padding;
+ uint32_t len;
+ uint32_t opcode;
+ uint64_t unique;
+ uint64_t nodeid;
+ uint32_t uid;
+ uint32_t gid;
+ uint32_t pid;
+ uint32_t padding;
};
struct fuse_out_header {
- __u32 len;
- __s32 error;
- __u64 unique;
+ uint32_t len;
+ int32_t error;
+ uint64_t unique;
};
struct fuse_dirent {
- __u64 ino;
- __u64 off;
- __u32 namelen;
- __u32 type;
- char name[0];
+ uint64_t ino;
+ uint64_t off;
+ uint32_t namelen;
+ uint32_t type;
+ char name[];
};
#define FUSE_NAME_OFFSET offsetof(struct fuse_dirent, name)
-#define FUSE_DIRENT_ALIGN(x) (((x) + sizeof(__u64) - 1) & ~(sizeof(__u64) - 1))
+#define FUSE_DIRENT_ALIGN(x) \
+ (((x) + sizeof(uint64_t) - 1) & ~(sizeof(uint64_t) - 1))
#define FUSE_DIRENT_SIZE(d) \
FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen)
+struct fuse_direntplus {
+ struct fuse_entry_out entry_out;
+ struct fuse_dirent dirent;
+};
+
+#define FUSE_NAME_OFFSET_DIRENTPLUS \
+ offsetof(struct fuse_direntplus, dirent.name)
+#define FUSE_DIRENTPLUS_SIZE(d) \
+ FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET_DIRENTPLUS + (d)->dirent.namelen)
+
struct fuse_notify_inval_inode_out {
- __u64 ino;
- __s64 off;
- __s64 len;
+ uint64_t ino;
+ int64_t off;
+ int64_t len;
};
struct fuse_notify_inval_entry_out {
- __u64 parent;
- __u32 namelen;
- __u32 padding;
+ uint64_t parent;
+ uint32_t namelen;
+ uint32_t padding;
+};
+
+struct fuse_notify_delete_out {
+ uint64_t parent;
+ uint64_t child;
+ uint32_t namelen;
+ uint32_t padding;
+};
+
+struct fuse_notify_store_out {
+ uint64_t nodeid;
+ uint64_t offset;
+ uint32_t size;
+ uint32_t padding;
+};
+
+struct fuse_notify_retrieve_out {
+ uint64_t notify_unique;
+ uint64_t nodeid;
+ uint64_t offset;
+ uint32_t size;
+ uint32_t padding;
+};
+
+/* Matches the size of fuse_write_in */
+struct fuse_notify_retrieve_in {
+ uint64_t dummy1;
+ uint64_t offset;
+ uint32_t size;
+ uint32_t dummy2;
+ uint64_t dummy3;
+ uint64_t dummy4;
+};
+
+struct fuse_lseek_in {
+ uint64_t fh;
+ uint64_t offset;
+ int32_t whence;
+ uint32_t padding;
+};
+
+struct fuse_lseek_out {
+ uint64_t offset;
};
#endif /* _LINUX_FUSE_H */
diff --git a/contrib/fuse-include/fuse_kernel_macfuse.h b/contrib/fuse-include/fuse_kernel_macfuse.h
index 3fbf24f70aa..31bc495a552 100644
--- a/contrib/fuse-include/fuse_kernel_macfuse.h
+++ b/contrib/fuse-include/fuse_kernel_macfuse.h
@@ -61,67 +61,59 @@
userspace works under 64bit kernels */
struct fuse_attr {
- __u64 ino;
- __u64 size;
- __u64 blocks;
- __u64 atime;
- __u64 mtime;
- __u64 ctime;
-#if (__FreeBSD__ >= 10)
- __u64 crtime;
-#endif /* __FreeBSD__ >= 10 */
- __u32 atimensec;
- __u32 mtimensec;
- __u32 ctimensec;
-#if (__FreeBSD__ >= 10)
- __u32 crtimensec;
-#endif /* __FreeBSD__ >= 10 */
- __u32 mode;
- __u32 nlink;
- __u32 uid;
- __u32 gid;
- __u32 rdev;
-#if (__FreeBSD__ >= 10)
- __u32 flags; /* file flags; see chflags(2) */
-#endif /* __FreeBSD__ >= 10 */
+ __u64 ino;
+ __u64 size;
+ __u64 blocks;
+ __u64 atime;
+ __u64 mtime;
+ __u64 ctime;
+ __u64 crtime;
+ __u32 atimensec;
+ __u32 mtimensec;
+ __u32 ctimensec;
+ __u32 crtimensec;
+ __u32 mode;
+ __u32 nlink;
+ __u32 uid;
+ __u32 gid;
+ __u32 rdev;
+ __u32 flags; /* file flags; see chflags(2) */
};
struct fuse_kstatfs {
- __u64 blocks;
- __u64 bfree;
- __u64 bavail;
- __u64 files;
- __u64 ffree;
- __u32 bsize;
- __u32 namelen;
- __u32 frsize;
- __u32 padding;
- __u32 spare[6];
+ __u64 blocks;
+ __u64 bfree;
+ __u64 bavail;
+ __u64 files;
+ __u64 ffree;
+ __u32 bsize;
+ __u32 namelen;
+ __u32 frsize;
+ __u32 padding;
+ __u32 spare[6];
};
struct fuse_file_lock {
- __u64 start;
- __u64 end;
- __u32 type;
- __u32 pid; /* tgid */
+ __u64 start;
+ __u64 end;
+ __u32 type;
+ __u32 pid; /* tgid */
};
/**
* Bitmasks for fuse_setattr_in.valid
*/
-#define FATTR_MODE (1 << 0)
-#define FATTR_UID (1 << 1)
-#define FATTR_GID (1 << 2)
-#define FATTR_SIZE (1 << 3)
-#define FATTR_ATIME (1 << 4)
-#define FATTR_MTIME (1 << 5)
-#define FATTR_FH (1 << 6)
-#if (__FreeBSD__ >= 10)
-#define FATTR_CRTIME (1 << 28)
-#define FATTR_CHGTIME (1 << 29)
-#define FATTR_BKUPTIME (1 << 30)
-#define FATTR_FLAGS (1 << 31)
-#endif /* __FreeBSD__ >= 10 */
+#define FATTR_MODE (1 << 0)
+#define FATTR_UID (1 << 1)
+#define FATTR_GID (1 << 2)
+#define FATTR_SIZE (1 << 3)
+#define FATTR_ATIME (1 << 4)
+#define FATTR_MTIME (1 << 5)
+#define FATTR_FH (1 << 6)
+#define FATTR_CRTIME (1 << 28)
+#define FATTR_CHGTIME (1 << 29)
+#define FATTR_BKUPTIME (1 << 30)
+#define FATTR_FLAGS (1 << 31)
/**
* Flags returned by the OPEN request
@@ -129,311 +121,304 @@ struct fuse_file_lock {
* FOPEN_DIRECT_IO: bypass page cache for this open file
* FOPEN_KEEP_CACHE: don't invalidate the data cache on open
*/
-#define FOPEN_DIRECT_IO (1 << 0)
-#define FOPEN_KEEP_CACHE (1 << 1)
-#if (__FreeBSD__ >= 10)
-#define FOPEN_PURGE_ATTR (1 << 30)
-#define FOPEN_PURGE_UBC (1 << 31)
-#endif
+#define FOPEN_DIRECT_IO (1 << 0)
+#define FOPEN_KEEP_CACHE (1 << 1)
+#define FOPEN_PURGE_ATTR (1 << 30)
+#define FOPEN_PURGE_UBC (1 << 31)
/**
* INIT request/reply flags
*/
-#define FUSE_ASYNC_READ (1 << 0)
-#define FUSE_POSIX_LOCKS (1 << 1)
-#if (__FreeBSD__ >= 10)
-#define FUSE_CASE_INSENSITIVE (1 << 29)
-#define FUSE_VOL_RENAME (1 << 30)
-#define FUSE_XTIMES (1 << 31)
-#endif /* __FreeBSD__ >= 10 */
+#define FUSE_ASYNC_READ (1 << 0)
+#define FUSE_POSIX_LOCKS (1 << 1)
+#define FUSE_CASE_INSENSITIVE (1 << 29)
+#define FUSE_VOL_RENAME (1 << 30)
+#define FUSE_XTIMES (1 << 31)
/**
* Release flags
*/
-#define FUSE_RELEASE_FLUSH (1 << 0)
+#define FUSE_RELEASE_FLUSH (1 << 0)
enum fuse_opcode {
- FUSE_LOOKUP = 1,
- FUSE_FORGET = 2, /* no reply */
- FUSE_GETATTR = 3,
- FUSE_SETATTR = 4,
- FUSE_READLINK = 5,
- FUSE_SYMLINK = 6,
- FUSE_MKNOD = 8,
- FUSE_MKDIR = 9,
- FUSE_UNLINK = 10,
- FUSE_RMDIR = 11,
- FUSE_RENAME = 12,
- FUSE_LINK = 13,
- FUSE_OPEN = 14,
- FUSE_READ = 15,
- FUSE_WRITE = 16,
- FUSE_STATFS = 17,
- FUSE_RELEASE = 18,
- FUSE_FSYNC = 20,
- FUSE_SETXATTR = 21,
- FUSE_GETXATTR = 22,
- FUSE_LISTXATTR = 23,
- FUSE_REMOVEXATTR = 24,
- FUSE_FLUSH = 25,
- FUSE_INIT = 26,
- FUSE_OPENDIR = 27,
- FUSE_READDIR = 28,
- FUSE_RELEASEDIR = 29,
- FUSE_FSYNCDIR = 30,
- FUSE_GETLK = 31,
- FUSE_SETLK = 32,
- FUSE_SETLKW = 33,
- FUSE_ACCESS = 34,
- FUSE_CREATE = 35,
- FUSE_INTERRUPT = 36,
- FUSE_BMAP = 37,
- FUSE_DESTROY = 38,
-#if (__FreeBSD__ >= 10)
+ FUSE_LOOKUP = 1,
+ FUSE_FORGET = 2, /* no reply */
+ FUSE_GETATTR = 3,
+ FUSE_SETATTR = 4,
+ FUSE_READLINK = 5,
+ FUSE_SYMLINK = 6,
+ FUSE_MKNOD = 8,
+ FUSE_MKDIR = 9,
+ FUSE_UNLINK = 10,
+ FUSE_RMDIR = 11,
+ FUSE_RENAME = 12,
+ FUSE_LINK = 13,
+ FUSE_OPEN = 14,
+ FUSE_READ = 15,
+ FUSE_WRITE = 16,
+ FUSE_STATFS = 17,
+ FUSE_RELEASE = 18,
+ FUSE_FSYNC = 20,
+ FUSE_SETXATTR = 21,
+ FUSE_GETXATTR = 22,
+ FUSE_LISTXATTR = 23,
+ FUSE_REMOVEXATTR = 24,
+ FUSE_FLUSH = 25,
+ FUSE_INIT = 26,
+ FUSE_OPENDIR = 27,
+ FUSE_READDIR = 28,
+ FUSE_RELEASEDIR = 29,
+ FUSE_FSYNCDIR = 30,
+ FUSE_GETLK = 31,
+ FUSE_SETLK = 32,
+ FUSE_SETLKW = 33,
+ FUSE_ACCESS = 34,
+ FUSE_CREATE = 35,
+ FUSE_INTERRUPT = 36,
+ FUSE_BMAP = 37,
+ FUSE_DESTROY = 38,
+ /*
+ FUSE_IOCTL = 39,
+ FUSE_POLL = 40,
+ FUSE_NOTIFY_REPLY = 41,
+ FUSE_BATCH_FORGET = 42,
+ FUSE_FALLOCATE = 43,
+ FUSE_READDIRPLUS = 44,
+ */
+
FUSE_SETVOLNAME = 61,
- FUSE_GETXTIMES = 62,
- FUSE_EXCHANGE = 63,
-#endif /* __FreeBSD__ >= 10 */
+ FUSE_GETXTIMES = 62,
+ FUSE_EXCHANGE = 63,
};
/* The read buffer is required to be at least 8k, but may be much larger */
#define FUSE_MIN_READ_BUFFER 8192
struct fuse_entry_out {
- __u64 nodeid; /* Inode ID */
- __u64 generation; /* Inode generation: nodeid:gen must
- be unique for the fs's lifetime */
- __u64 entry_valid; /* Cache timeout for the name */
- __u64 attr_valid; /* Cache timeout for the attributes */
- __u32 entry_valid_nsec;
- __u32 attr_valid_nsec;
- struct fuse_attr attr;
+ __u64 nodeid; /* Inode ID */
+ __u64 generation; /* Inode generation: nodeid:gen must
+ be unique for the fs's lifetime */
+ __u64 entry_valid; /* Cache timeout for the name */
+ __u64 attr_valid; /* Cache timeout for the attributes */
+ __u32 entry_valid_nsec;
+ __u32 attr_valid_nsec;
+ struct fuse_attr attr;
};
struct fuse_forget_in {
- __u64 nlookup;
+ __u64 nlookup;
};
struct fuse_attr_out {
- __u64 attr_valid; /* Cache timeout for the attributes */
- __u32 attr_valid_nsec;
- __u32 dummy;
- struct fuse_attr attr;
+ __u64 attr_valid; /* Cache timeout for the attributes */
+ __u32 attr_valid_nsec;
+ __u32 dummy;
+ struct fuse_attr attr;
};
-#if (__FreeBSD__ >= 10)
struct fuse_getxtimes_out {
- __u64 bkuptime;
- __u64 crtime;
- __u32 bkuptimensec;
- __u32 crtimensec;
+ __u64 bkuptime;
+ __u64 crtime;
+ __u32 bkuptimensec;
+ __u32 crtimensec;
};
-#endif /* __FreeBSD__ >= 10 */
struct fuse_mknod_in {
- __u32 mode;
- __u32 rdev;
+ __u32 mode;
+ __u32 rdev;
};
struct fuse_mkdir_in {
- __u32 mode;
- __u32 padding;
+ __u32 mode;
+ __u32 padding;
};
struct fuse_rename_in {
- __u64 newdir;
+ __u64 newdir;
};
-#if (__FreeBSD__ >= 10)
struct fuse_exchange_in {
- __u64 olddir;
- __u64 newdir;
- __u64 options;
+ __u64 olddir;
+ __u64 newdir;
+ __u64 options;
};
-#endif /* __FreeBSD__ >= 10 */
struct fuse_link_in {
- __u64 oldnodeid;
+ __u64 oldnodeid;
};
struct fuse_setattr_in {
- __u32 valid;
- __u32 padding;
- __u64 fh;
- __u64 size;
- __u64 unused1;
- __u64 atime;
- __u64 mtime;
- __u64 unused2;
- __u32 atimensec;
- __u32 mtimensec;
- __u32 unused3;
- __u32 mode;
- __u32 unused4;
- __u32 uid;
- __u32 gid;
- __u32 unused5;
-#if (__FreeBSD__ >= 10)
- __u64 bkuptime;
- __u64 chgtime;
- __u64 crtime;
- __u32 bkuptimensec;
- __u32 chgtimensec;
- __u32 crtimensec;
- __u32 flags; /* file flags; see chflags(2) */
-#endif /* __FreeBSD__ >= 10 */
+ __u32 valid;
+ __u32 padding;
+ __u64 fh;
+ __u64 size;
+ __u64 unused1;
+ __u64 atime;
+ __u64 mtime;
+ __u64 unused2;
+ __u32 atimensec;
+ __u32 mtimensec;
+ __u32 unused3;
+ __u32 mode;
+ __u32 unused4;
+ __u32 uid;
+ __u32 gid;
+ __u32 unused5;
+ __u64 bkuptime;
+ __u64 chgtime;
+ __u64 crtime;
+ __u32 bkuptimensec;
+ __u32 chgtimensec;
+ __u32 crtimensec;
+ __u32 flags; /* file flags; see chflags(2) */
};
struct fuse_open_in {
- __u32 flags;
- __u32 mode;
+ __u32 flags;
+ __u32 mode;
};
struct fuse_open_out {
- __u64 fh;
- __u32 open_flags;
- __u32 padding;
+ __u64 fh;
+ __u32 open_flags;
+ __u32 padding;
};
struct fuse_release_in {
- __u64 fh;
- __u32 flags;
- __u32 release_flags;
- __u64 lock_owner;
+ __u64 fh;
+ __u32 flags;
+ __u32 release_flags;
+ __u64 lock_owner;
};
struct fuse_flush_in {
- __u64 fh;
- __u32 unused;
- __u32 padding;
- __u64 lock_owner;
+ __u64 fh;
+ __u32 unused;
+ __u32 padding;
+ __u64 lock_owner;
};
struct fuse_read_in {
- __u64 fh;
- __u64 offset;
- __u32 size;
- __u32 padding;
+ __u64 fh;
+ __u64 offset;
+ __u32 size;
+ __u32 padding;
};
struct fuse_write_in {
- __u64 fh;
- __u64 offset;
- __u32 size;
- __u32 write_flags;
+ __u64 fh;
+ __u64 offset;
+ __u32 size;
+ __u32 write_flags;
};
struct fuse_write_out {
- __u32 size;
- __u32 padding;
+ __u32 size;
+ __u32 padding;
};
#define FUSE_COMPAT_STATFS_SIZE 48
struct fuse_statfs_out {
- struct fuse_kstatfs st;
+ struct fuse_kstatfs st;
};
struct fuse_fsync_in {
- __u64 fh;
- __u32 fsync_flags;
- __u32 padding;
+ __u64 fh;
+ __u32 fsync_flags;
+ __u32 padding;
};
struct fuse_setxattr_in {
- __u32 size;
- __u32 flags;
-#if (__FreeBSD__ >= 10)
- __u32 position;
- __u32 padding;
-#endif /* __FreeBSD__ >= 10 */
+ __u32 size;
+ __u32 flags;
+ __u32 position;
+ __u32 padding;
};
struct fuse_getxattr_in {
- __u32 size;
- __u32 padding;
-#if (__FreeBSD__ >= 10)
- __u32 position;
- __u32 padding2;
-#endif /* __FreeBSD__ >= 10 */
+ __u32 size;
+ __u32 padding;
+ __u32 position;
+ __u32 padding2;
};
struct fuse_getxattr_out {
- __u32 size;
- __u32 padding;
+ __u32 size;
+ __u32 padding;
};
struct fuse_lk_in {
- __u64 fh;
- __u64 owner;
- struct fuse_file_lock lk;
+ __u64 fh;
+ __u64 owner;
+ struct fuse_file_lock lk;
};
struct fuse_lk_out {
- struct fuse_file_lock lk;
+ struct fuse_file_lock lk;
};
struct fuse_access_in {
- __u32 mask;
- __u32 padding;
+ __u32 mask;
+ __u32 padding;
};
struct fuse_init_in {
- __u32 major;
- __u32 minor;
- __u32 max_readahead;
- __u32 flags;
+ __u32 major;
+ __u32 minor;
+ __u32 max_readahead;
+ __u32 flags;
};
struct fuse_init_out {
- __u32 major;
- __u32 minor;
- __u32 max_readahead;
- __u32 flags;
- __u32 unused;
- __u32 max_write;
+ __u32 major;
+ __u32 minor;
+ __u32 max_readahead;
+ __u32 flags;
+ __u32 unused;
+ __u32 max_write;
};
struct fuse_interrupt_in {
- __u64 unique;
+ __u64 unique;
};
struct fuse_bmap_in {
- __u64 block;
- __u32 blocksize;
- __u32 padding;
+ __u64 block;
+ __u32 blocksize;
+ __u32 padding;
};
struct fuse_bmap_out {
- __u64 block;
+ __u64 block;
};
struct fuse_in_header {
- __u32 len;
- __u32 opcode;
- __u64 unique;
- __u64 nodeid;
- __u32 uid;
- __u32 gid;
- __u32 pid;
- __u32 padding;
+ __u32 len;
+ __u32 opcode;
+ __u64 unique;
+ __u64 nodeid;
+ __u32 uid;
+ __u32 gid;
+ __u32 pid;
+ __u32 padding;
};
struct fuse_out_header {
- __u32 len;
- __s32 error;
- __u64 unique;
+ __u32 len;
+ __s32 error;
+ __u64 unique;
};
struct fuse_dirent {
- __u64 ino;
- __u64 off;
- __u32 namelen;
- __u32 type;
- char name[0];
+ __u64 ino;
+ __u64 off;
+ __u32 namelen;
+ __u32 type;
+ char name[0];
};
#define FUSE_NAME_OFFSET offsetof(struct fuse_dirent, name)
#define FUSE_DIRENT_ALIGN(x) (((x) + sizeof(__u64) - 1) & ~(sizeof(__u64) - 1))
#define FUSE_DIRENT_SIZE(d) \
- FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen)
+ FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen)
diff --git a/contrib/fuse-util/mount_util.h b/contrib/fuse-include/mount_util.h
index f392f99f17a..f392f99f17a 100644
--- a/contrib/fuse-util/mount_util.h
+++ b/contrib/fuse-include/mount_util.h
diff --git a/contrib/fuse-lib/misc.c b/contrib/fuse-lib/misc.c
index 0c41b1a1917..1a9b418e511 100644
--- a/contrib/fuse-lib/misc.c
+++ b/contrib/fuse-lib/misc.c
@@ -10,7 +10,7 @@
#include <string.h>
#include <limits.h>
#include <fcntl.h>
-#include "glusterfs.h"
+#include "glusterfs/glusterfs.h"
#include "fuse_kernel.h"
#include "fuse-misc.h"
@@ -41,7 +41,6 @@ void
convert_fuse_file_lock (struct fuse_file_lock *fl, struct gf_flock *flock,
uint64_t lk_owner)
{
- memset (flock, 0, sizeof (struct flock));
flock->l_type = fl->type;
flock->l_whence = SEEK_SET;
flock->l_start = fl->start;
diff --git a/contrib/fuse-lib/mount-common.c b/contrib/fuse-lib/mount-common.c
new file mode 100644
index 00000000000..cffd4c01ed5
--- /dev/null
+++ b/contrib/fuse-lib/mount-common.c
@@ -0,0 +1,282 @@
+/*
+ FUSE: Filesystem in Userspace
+ Copyright (C) 2001-2007 Miklos Szeredi <miklos@szeredi.hu>
+ Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
+
+ This program can be distributed under the terms of the GNU LGPLv2.
+ See the file COPYING.LIB.
+*/
+
+#include "mount-gluster-compat.h"
+
+/*
+ * These functions (and gf_fuse_umount() in mount.c)
+ * were originally taken from libfuse as of commit 7960e99e
+ * (http://fuse.git.sourceforge.net/git/gitweb.cgi?p=fuse/fuse;a=commit;h=7960e99e)
+ * almost verbatim. What has been changed upon adoption:
+ * - style adopted to that of glusterfs
+ * - s/fprintf/gf_log/
+ * - s/free/FREE/, s/malloc/MALLOC/
+ * - there are some other minor things
+ *
+ * For changes that were made later and syncs with upstream,
+ * see the commit log and per-function comments.
+ */
+
+#ifdef GF_LINUX_HOST_OS
+/* FUSE: cherry-picked bd99f9cf */
+static int
+mtab_needs_update (const char *mnt)
+{
+ int res;
+ struct stat stbuf;
+
+ /* If mtab is within new mount, don't touch it */
+ if (strncmp (mnt, _PATH_MOUNTED, sizeof (_PATH_MOUNTED) - 1) == 0 &&
+ _PATH_MOUNTED[strlen (mnt)] == '/')
+ return 0;
+
+ /*
+ * Skip mtab update if /etc/mtab:
+ *
+ * - doesn't exist,
+ * - is a symlink,
+ * - is on a read-only filesystem.
+ */
+ res = lstat (_PATH_MOUNTED, &stbuf);
+ if (res == -1) {
+ if (errno == ENOENT)
+ return 0;
+ } else {
+ uid_t ruid;
+ int err;
+
+ if (S_ISLNK (stbuf.st_mode))
+ return 0;
+
+ ruid = getuid ();
+ if (ruid != 0)
+ setreuid (0, -1);
+
+ res = access (_PATH_MOUNTED, W_OK);
+ err = (res == -1) ? errno : 0;
+ if (ruid != 0)
+ setreuid (ruid, -1);
+
+ if (err == EROFS)
+ return 0;
+ }
+
+ return 1;
+}
+#else /* GF_LINUX_HOST_OS */
+#define mtab_needs_update(x) 1
+#endif /* GF_LINUX_HOST_OS */
+
+/* FUSE: called add_mount_legacy(); R.I.P. as of cbd3a2a8 */
+int
+fuse_mnt_add_mount (const char *progname, const char *fsname,
+ const char *mnt, const char *type, const char *opts)
+{
+ int res;
+ int status;
+ sigset_t blockmask;
+ sigset_t oldmask;
+
+ if (!mtab_needs_update (mnt))
+ return 0;
+
+ sigemptyset (&blockmask);
+ sigaddset (&blockmask, SIGCHLD);
+ res = sigprocmask (SIG_BLOCK, &blockmask, &oldmask);
+ if (res == -1) {
+ GFFUSE_LOGERR ("%s: sigprocmask: %s",
+ progname, strerror (errno));
+ return -1;
+ }
+
+ res = fork ();
+ if (res == -1) {
+ GFFUSE_LOGERR ("%s: fork: %s", progname, strerror (errno));
+ goto out_restore;
+ }
+ if (res == 0) {
+ char templ[] = "/tmp/fusermountXXXXXX";
+ char *tmp;
+
+ sigprocmask (SIG_SETMASK, &oldmask, NULL);
+ res = setuid (geteuid ());
+ if (res != 0) {
+ GFFUSE_LOGERR ("%s: setuid: %s", progname, strerror (errno));
+ exit (1);
+ }
+
+ /*
+ * hide in a directory, where mount isn't able to resolve
+ * fsname as a valid path
+ */
+ tmp = mkdtemp (templ);
+ if (!tmp) {
+ GFFUSE_LOGERR ("%s: failed to create temporary directory",
+ progname);
+ exit (1);
+ }
+ if (chdir (tmp)) {
+ GFFUSE_LOGERR ("%s: failed to chdir to %s: %s",
+ progname, tmp, strerror (errno));
+ exit (1);
+ }
+ rmdir (tmp);
+ execl (_PATH_MOUNT, _PATH_MOUNT, "-i", "-f", "-t", type,
+ "-o", opts, fsname, mnt, NULL);
+ GFFUSE_LOGERR ("%s: failed to execute %s: %s",
+ progname, _PATH_MOUNT, strerror (errno));
+ exit (1);
+ }
+
+ res = waitpid (res, &status, 0);
+ if (res == -1)
+ GFFUSE_LOGERR ("%s: waitpid: %s", progname, strerror (errno));
+ res = (res != -1 && status == 0) ? 0 : -1;
+
+ out_restore:
+ sigprocmask (SIG_SETMASK, &oldmask, NULL);
+ return res;
+}
+
+char *
+fuse_mnt_resolve_path (const char *progname, const char *orig)
+{
+ char buf[PATH_MAX];
+ char *copy;
+ char *dst;
+ char *end;
+ char *lastcomp;
+ const char *toresolv;
+
+ if (!orig[0]) {
+ GFFUSE_LOGERR ("%s: invalid mountpoint '%s'", progname, orig);
+ return NULL;
+ }
+
+ copy = strdup (orig);
+ if (copy == NULL) {
+ GFFUSE_LOGERR ("%s: failed to allocate memory", progname);
+ return NULL;
+ }
+
+ toresolv = copy;
+ lastcomp = NULL;
+ for (end = copy + strlen (copy) - 1; end > copy && *end == '/'; end --);
+ if (end[0] != '/') {
+ char *tmp;
+ end[1] = '\0';
+ tmp = strrchr (copy, '/');
+ if (tmp == NULL) {
+ lastcomp = copy;
+ toresolv = ".";
+ } else {
+ lastcomp = tmp + 1;
+ if (tmp == copy)
+ toresolv = "/";
+ }
+ if (strcmp (lastcomp, ".") == 0 || strcmp (lastcomp, "..") == 0) {
+ lastcomp = NULL;
+ toresolv = copy;
+ }
+ else if (tmp)
+ tmp[0] = '\0';
+ }
+ if (realpath (toresolv, buf) == NULL) {
+ GFFUSE_LOGERR ("%s: bad mount point %s: %s", progname, orig,
+ strerror (errno));
+ FREE (copy);
+ return NULL;
+ }
+ if (lastcomp == NULL)
+ dst = strdup (buf);
+ else {
+ dst = (char *) MALLOC (strlen (buf) + 1 + strlen (lastcomp) + 1);
+ if (dst) {
+ unsigned buflen = strlen (buf);
+ if (buflen && buf[buflen-1] == '/')
+ sprintf (dst, "%s%s", buf, lastcomp);
+ else
+ sprintf (dst, "%s/%s", buf, lastcomp);
+ }
+ }
+ FREE (copy);
+ if (dst == NULL)
+ GFFUSE_LOGERR ("%s: failed to allocate memory", progname);
+ return dst;
+}
+
+/* FUSE: to support some changes that were reverted since
+ * then, it was split in two (fuse_mnt_umount() and
+ * exec_umount()); however the actual code is same as here
+ * since 0197ce40
+ */
+int
+fuse_mnt_umount (const char *progname, const char *abs_mnt,
+ const char *rel_mnt, int lazy)
+{
+ int res;
+ int status;
+ sigset_t blockmask;
+ sigset_t oldmask;
+
+ if (!mtab_needs_update (abs_mnt)) {
+ res = umount2 (rel_mnt, lazy ? 2 : 0);
+ if (res == -1)
+ GFFUSE_LOGERR ("%s: failed to unmount %s: %s",
+ progname, abs_mnt, strerror (errno));
+ return res;
+ }
+
+ sigemptyset (&blockmask);
+ sigaddset (&blockmask, SIGCHLD);
+ res = sigprocmask (SIG_BLOCK, &blockmask, &oldmask);
+ if (res == -1) {
+ GFFUSE_LOGERR ("%s: sigprocmask: %s", progname,
+ strerror (errno));
+ return -1;
+ }
+
+ res = fork ();
+ if (res == -1) {
+ GFFUSE_LOGERR ("%s: fork: %s", progname, strerror (errno));
+ goto out_restore;
+ }
+ if (res == 0) {
+ sigprocmask (SIG_SETMASK, &oldmask, NULL);
+ res = setuid (geteuid ());
+ if (res != 0) {
+ GFFUSE_LOGERR ("%s: setuid: %s", progname, strerror (errno));
+ exit (1);
+ }
+#ifdef GF_LINUX_HOST_OS
+ execl ("umount", "umount", "-i", rel_mnt,
+ lazy ? "-l" : NULL, NULL);
+ GFFUSE_LOGERR ("%s: failed to execute umount: %s",
+ progname, strerror (errno));
+#elif __NetBSD__
+ /* exitting the filesystem causes the umount */
+ exit (0);
+#else
+ execl ("umount", "umount", "-f", rel_mnt, NULL);
+ GFFUSE_LOGERR ("%s: failed to execute umount: %s",
+ progname, strerror (errno));
+#endif /* GF_LINUX_HOST_OS */
+ exit (1);
+ }
+ res = waitpid (res, &status, 0);
+ if (res == -1)
+ GFFUSE_LOGERR ("%s: waitpid: %s", progname, strerror (errno));
+
+ if (status != 0)
+ res = -1;
+
+ out_restore:
+ sigprocmask (SIG_SETMASK, &oldmask, NULL);
+ return res;
+}
diff --git a/contrib/fuse-lib/mount-gluster-compat.h b/contrib/fuse-lib/mount-gluster-compat.h
new file mode 100644
index 00000000000..d3646d08d8e
--- /dev/null
+++ b/contrib/fuse-lib/mount-gluster-compat.h
@@ -0,0 +1,105 @@
+/*
+ FUSE: Filesystem in Userspace
+ Copyright (C) 2001-2007 Miklos Szeredi <miklos@szeredi.hu>
+ Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
+
+ This program can be distributed under the terms of the GNU LGPLv2.
+ See the file COPYING.LIB.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stddef.h>
+#include <limits.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <dirent.h>
+#include <signal.h>
+#if defined(GF_LINUX_HOST_OS)
+#include <mntent.h>
+#endif /* GF_LINUX_HOST_OS */
+#include <sys/stat.h>
+#include <sys/poll.h>
+#include <sys/un.h>
+#include <sys/wait.h>
+#include <sys/mount.h>
+
+#ifdef GF_LINUX_HOST_OS
+typedef unsigned long mount_flag_t;
+#endif
+
+#if defined(__NetBSD__)
+#include <perfuse.h>
+#define umount2(dir, flags) unmount(dir, ((flags) != 0) ? MNT_FORCE : 0)
+#define MS_RDONLY MNT_RDONLY
+#define MS_NOSUID MNT_NOSUID
+#define MS_NODEV MNT_NODEV
+#define MS_NOATIME MNT_NOATIME
+#define MS_NOEXEC MNT_NOEXEC
+typedef int mount_flag_t;
+#endif
+
+#if defined(GF_DARWIN_HOST_OS) || defined(__FreeBSD__)
+#include <sys/param.h>
+#include <sys/mount.h>
+#define umount2(dir, flags) unmount(dir, ((flags) != 0) ? MNT_FORCE : 0)
+#endif
+
+#if defined(__FreeBSD__)
+#define MS_RDONLY MNT_RDONLY
+#define MS_NOSUID MNT_NOSUID
+/* "nodev"/MNT_NODEV was removed from FreBSD, as it became unneeded because "As
+ * of FreeBSD 6.0 device nodes may be created in regular file systems but such
+ * nodes cannot be used to access devices." (See
+ * https://freebsd.org/cgi/man.cgi?query=mknod&sektion=8 .
+ * Also see:
+ * - https://github.com/freebsd/freebsd/commit/266790a
+ * - https://github.com/freebsd/freebsd/commit/a5e716d
+ * - 700008 in
+ * https://www.freebsd.org/doc/en/books/porters-handbook/versions-7.html .)
+ */
+#if __FreeBSD_version < 700008
+#define MS_NODEV MNT_NODEV
+#else
+#define MS_NODEV 0
+#endif
+#define MS_NOATIME MNT_NOATIME
+#define MS_NOEXEC MNT_NOEXEC
+#if __FreeBSD_version < 1000715
+typedef int mount_flag_t;
+#else
+/* __FreeBSD_version was not bumped for this type change. Anyway, see
+ * https://github.com/freebsd/freebsd/commit/e8d76f8
+ * and respective __FreeBSD_version:
+ * https://github.com/freebsd/freebsd/blob/e8d76f8/sys/sys/param.h#L61 .
+ * We use the subsequent value, 1000715, to switch. (Also see:
+ * https://www.freebsd.org/doc/en/books/porters-handbook/versions-10.html .)
+ */
+typedef long long mount_flag_t;
+#endif
+#endif
+
+#ifdef GF_LINUX_HOST_OS
+#define _PATH_MOUNT "/bin/mount"
+#else /* FreeBSD, NetBSD, MacOS X */
+#define _PATH_MOUNT "/sbin/mount"
+#endif
+
+#ifdef FUSE_UTIL
+#define MALLOC(size) malloc (size)
+#define FREE(ptr) free (ptr)
+#define GFFUSE_LOGERR(...) fprintf (stderr, ## __VA_ARGS__)
+#else /* FUSE_UTIL */
+#include "glusterfs/glusterfs.h"
+#include "glusterfs/logging.h"
+#include "glusterfs/common-utils.h"
+
+#define GFFUSE_LOGERR(...) \
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR, ## __VA_ARGS__)
+#endif /* !FUSE_UTIL */
diff --git a/contrib/fuse-lib/mount.c b/contrib/fuse-lib/mount.c
index bbc05f28683..06ff191f542 100644
--- a/contrib/fuse-lib/mount.c
+++ b/contrib/fuse-lib/mount.c
@@ -7,48 +7,8 @@
See the file COPYING.LIB.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <stddef.h>
-#include <limits.h>
-#include <fcntl.h>
-#include <errno.h>
-#include <dirent.h>
-#include <signal.h>
-#ifndef __NetBSD__
-#include <mntent.h>
-#endif /* __NetBSD__ */
-#include <sys/stat.h>
-#include <sys/poll.h>
-#include <sys/un.h>
-#include <sys/wait.h>
-#include <sys/mount.h>
-
-#ifdef __NetBSD__
-#include <perfuse.h>
-#define umount2(dir, flags) unmount(dir, ((flags) != 0) ? MNT_FORCE : 0)
-#endif
-
-#ifdef linux
-#define _PATH_MOUNT "/bin/mount"
-#else /* NetBSD, MacOS X */
-#define _PATH_MOUNT "/sbin/mount"
-#endif
-
-#ifdef FUSE_UTIL
-#define MALLOC(size) malloc (size)
-#define FREE(ptr) free (ptr)
-#define GFFUSE_LOGERR(...) fprintf (stderr, ## __VA_ARGS__)
-#else /* FUSE_UTIL */
-#include "glusterfs.h"
-#include "logging.h"
-#include "common-utils.h"
+#include "mount_util.h"
+#include "mount-gluster-compat.h"
#ifdef GF_FUSERMOUNT
#define FUSERMOUNT_PROG FUSERMOUNT_DIR "/fusermount-glusterfs"
@@ -57,198 +17,118 @@
#endif
#define FUSE_DEVFD_ENV "_FUSE_DEVFD"
-#define GFFUSE_LOGERR(...) \
- gf_log ("glusterfs-fuse", GF_LOG_ERROR, ## __VA_ARGS__)
-#endif /* !FUSE_UTIL */
-
-/*
- * Functions below, until following note, were taken from libfuse
- * (http://git.gluster.com/?p=users/csaba/fuse.git;a=commit;h=b988bbf9)
- * almost verbatim. What has been changed:
- * - style adopted to that of glusterfs
- * - s/fprintf/gf_log/
- * - s/free/FREE/, s/malloc/MALLOC/
- * - there are some other minor things
- */
+#ifdef __FreeBSD__
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <unistd.h>
+#endif /* __FreeBSD__ */
-#ifndef __NetBSD__
-static int
-mtab_needs_update (const char *mnt)
+/* FUSE: function is called fuse_kern_unmount() */
+void
+gf_fuse_unmount (const char *mountpoint, int fd)
{
int res;
- struct stat stbuf;
-
- /* If mtab is within new mount, don't touch it */
- if (strncmp (mnt, _PATH_MOUNTED, strlen (mnt)) == 0 &&
- _PATH_MOUNTED[strlen (mnt)] == '/')
- return 0;
-
- /*
- * Skip mtab update if /etc/mtab:
- *
- * - doesn't exist,
- * - is a symlink,
- * - is on a read-only filesystem.
- */
- res = lstat (_PATH_MOUNTED, &stbuf);
- if (res == -1) {
- if (errno == ENOENT)
- return 0;
- } else {
- if (S_ISLNK (stbuf.st_mode))
- return 0;
+ int pid;
+
+ if (!mountpoint)
+ return;
+
+ if (fd != -1) {
+ struct pollfd pfd;
- res = access (_PATH_MOUNTED, W_OK);
- if (res == -1 && errno == EROFS)
- return 0;
+ pfd.fd = fd;
+ pfd.events = 0;
+ res = poll (&pfd, 1, 0);
+ /* If file poll returns POLLERR on the device file descriptor,
+ then the filesystem is already unmounted */
+ if (res == 1 && (pfd.revents & POLLERR))
+ return;
+
+ /* Need to close file descriptor, otherwise synchronous umount
+ would recurse into filesystem, and deadlock */
+ close (fd);
}
- return 1;
+ if (geteuid () == 0) {
+ fuse_mnt_umount ("fuse", mountpoint, mountpoint, 1);
+ return;
+ } else {
+ GFFUSE_LOGERR ("fuse: Effective-uid: %d", geteuid());
+ }
+
+ res = umount2 (mountpoint, 2);
+ if (res == 0)
+ return;
+
+ GFFUSE_LOGERR ("fuse: failed to unmount %s: %s",
+ mountpoint, strerror (errno));
+ pid = fork ();
+ if (pid == -1)
+ return;
+
+ if (pid == 0) {
+ const char *argv[] = { FUSERMOUNT_PROG, "-u", "-q", "-z",
+ "--", mountpoint, NULL };
+
+ execvp (FUSERMOUNT_PROG, (char **)argv);
+ GFFUSE_LOGERR ("fuse: failed to execute fuserumount: %s",
+ strerror (errno));
+ _exit (1);
+ }
+ waitpid (pid, NULL, 0);
}
-#else /* __NetBSD__ */
-#define mtab_needs_update(x) 1
-#endif /* __NetBSD__ */
-#ifndef FUSE_UTIL
-static
-#endif
+
+/* gluster-specific routines */
+
+/* Unmounting in a daemon that lurks 'till main process exits */
int
-fuse_mnt_add_mount (const char *progname, const char *fsname,
- const char *mnt, const char *type, const char *opts)
+gf_fuse_unmount_daemon (const char *mountpoint, int fd)
{
- int res;
- int status;
- sigset_t blockmask;
- sigset_t oldmask;
-
- if (!mtab_needs_update (mnt))
- return 0;
-
- sigemptyset (&blockmask);
- sigaddset (&blockmask, SIGCHLD);
- res = sigprocmask (SIG_BLOCK, &blockmask, &oldmask);
- if (res == -1) {
- GFFUSE_LOGERR ("%s: sigprocmask: %s",
- progname, strerror (errno));
+ int ret = -1;
+ pid_t pid = -1;
+
+ if (fd == -1)
return -1;
- }
- res = fork ();
- if (res == -1) {
- GFFUSE_LOGERR ("%s: fork: %s", progname, strerror (errno));
- goto out_restore;
- }
- if (res == 0) {
- char templ[] = "/tmp/fusermountXXXXXX";
- char *tmp;
-
- sigprocmask (SIG_SETMASK, &oldmask, NULL);
- setuid (geteuid ());
-
- /*
- * hide in a directory, where mount isn't able to resolve
- * fsname as a valid path
- */
- tmp = mkdtemp (templ);
- if (!tmp) {
- GFFUSE_LOGERR ("%s: failed to create temporary directory",
- progname);
- exit (1);
- }
- if (chdir (tmp)) {
- GFFUSE_LOGERR ("%s: failed to chdir to %s: %s",
- progname, tmp, strerror (errno));
- exit (1);
- }
- rmdir (tmp);
- execl (_PATH_MOUNT, _PATH_MOUNT, "-i", "-f", "-t", type,
- "-o", opts, fsname, mnt, NULL);
- GFFUSE_LOGERR ("%s: failed to execute %s: %s",
- progname, _PATH_MOUNT, strerror (errno));
- exit (1);
+ int ump[2] = {0,};
+
+ ret = pipe(ump);
+ if (ret == -1) {
+ close (fd);
+ return -1;
}
- res = waitpid (res, &status, 0);
- if (res == -1)
- GFFUSE_LOGERR ("%s: waitpid: %s", progname, strerror (errno));
- res = (res != -1 && status == 0) ? 0 : -1;
+ pid = fork ();
+ switch (pid) {
+ case 0:
+ {
+ char c = 0;
+ sigset_t sigset;
- out_restore:
- sigprocmask (SIG_SETMASK, &oldmask, NULL);
- return res;
-}
+ close_fds_except (ump, 1);
-#ifndef FUSE_UTIL
-static
-#endif
-char
-*fuse_mnt_resolve_path (const char *progname, const char *orig)
-{
- char buf[PATH_MAX];
- char *copy;
- char *dst;
- char *end;
- char *lastcomp;
- const char *toresolv;
-
- if (!orig[0]) {
- GFFUSE_LOGERR ("%s: invalid mountpoint '%s'", progname, orig);
- return NULL;
- }
+ setsid();
+ (void)chdir("/");
+ sigfillset(&sigset);
+ sigprocmask(SIG_BLOCK, &sigset, NULL);
- copy = strdup (orig);
- if (copy == NULL) {
- GFFUSE_LOGERR ("%s: failed to allocate memory", progname);
- return NULL;
- }
+ read (ump[0], &c, 1);
- toresolv = copy;
- lastcomp = NULL;
- for (end = copy + strlen (copy) - 1; end > copy && *end == '/'; end --);
- if (end[0] != '/') {
- char *tmp;
- end[1] = '\0';
- tmp = strrchr (copy, '/');
- if (tmp == NULL) {
- lastcomp = copy;
- toresolv = ".";
- } else {
- lastcomp = tmp + 1;
- if (tmp == copy)
- toresolv = "/";
- }
- if (strcmp (lastcomp, ".") == 0 || strcmp (lastcomp, "..") == 0) {
- lastcomp = NULL;
- toresolv = copy;
- }
- else if (tmp)
- tmp[0] = '\0';
- }
- if (realpath (toresolv, buf) == NULL) {
- GFFUSE_LOGERR ("%s: bad mount point %s: %s", progname, orig,
- strerror (errno));
- FREE (copy);
- return NULL;
+ gf_fuse_unmount (mountpoint, fd);
+ exit (0);
}
- if (lastcomp == NULL)
- dst = strdup (buf);
- else {
- dst = (char *) MALLOC (strlen (buf) + 1 + strlen (lastcomp) + 1);
- if (dst) {
- unsigned buflen = strlen (buf);
- if (buflen && buf[buflen-1] == '/')
- sprintf (dst, "%s%s", buf, lastcomp);
- else
- sprintf (dst, "%s/%s", buf, lastcomp);
- }
+ case -1:
+ close (fd);
+ fd = -1;
+ ret = -1;
+ close (ump[1]);
}
- FREE (copy);
- if (dst == NULL)
- GFFUSE_LOGERR ("%s: failed to allocate memory", progname);
- return dst;
+ close (ump[0]);
+
+ return ret;
}
-#ifndef FUSE_UTIL
static char *
escape (char *s)
{
@@ -279,8 +159,8 @@ escape (char *s)
}
static int
-fuse_mount_fusermount (const char *mountpoint, char *fsname, char *mnt_param,
- int fd)
+fuse_mount_fusermount (const char *mountpoint, char *fsname,
+ char *mnt_param, int fd)
{
int pid = -1;
int res = 0;
@@ -346,192 +226,177 @@ fuse_mount_fusermount (const char *mountpoint, char *fsname, char *mnt_param,
FREE (fm_mnt_params);
return ret;
}
-#endif
-#ifndef FUSE_UTIL
-static
-#endif
-int
-fuse_mnt_umount (const char *progname, const char *abs_mnt,
- const char *rel_mnt, int lazy)
+#if defined(__FreeBSD__)
+void
+build_iovec(struct iovec **iov, int *iovlen, const char *name, void *val,
+ size_t len)
{
- int res;
- int status;
- sigset_t blockmask;
- sigset_t oldmask;
-
- if (!mtab_needs_update (abs_mnt)) {
- res = umount2 (rel_mnt, lazy ? 2 : 0);
- if (res == -1)
- GFFUSE_LOGERR ("%s: failed to unmount %s: %s",
- progname, abs_mnt, strerror (errno));
- return res;
- }
-
- sigemptyset (&blockmask);
- sigaddset (&blockmask, SIGCHLD);
- res = sigprocmask (SIG_BLOCK, &blockmask, &oldmask);
- if (res == -1) {
- GFFUSE_LOGERR ("%s: sigprocmask: %s", progname,
- strerror (errno));
- return -1;
- }
+ int i;
+ if (*iovlen < 0)
+ return;
+ i = *iovlen;
- res = fork ();
- if (res == -1) {
- GFFUSE_LOGERR ("%s: fork: %s", progname, strerror (errno));
- goto out_restore;
- }
- if (res == 0) {
- sigprocmask (SIG_SETMASK, &oldmask, NULL);
- setuid (geteuid ());
- execl ("/bin/umount", "/bin/umount", "-i", rel_mnt,
- lazy ? "-l" : NULL, NULL);
- GFFUSE_LOGERR ("%s: failed to execute /bin/umount: %s",
- progname, strerror (errno));
- exit (1);
+ *iov = realloc(*iov, sizeof **iov * (i + 2));
+ if (*iov == NULL) {
+ *iovlen = -1;
+ return;
}
- res = waitpid (res, &status, 0);
- if (res == -1)
- GFFUSE_LOGERR ("%s: waitpid: %s", progname, strerror (errno));
-
- if (status != 0)
- res = -1;
- out_restore:
- sigprocmask (SIG_SETMASK, &oldmask, NULL);
- return res;
-}
+ (*iov)[i].iov_base = strdup(name);
+ (*iov)[i].iov_len = strlen(name) + 1;
-#ifdef FUSE_UTIL
-int
-fuse_mnt_check_empty (const char *progname, const char *mnt,
- mode_t rootmode, off_t rootsize)
-{
- int isempty = 1;
-
- if (S_ISDIR (rootmode)) {
- struct dirent *ent;
- DIR *dp = opendir (mnt);
- if (dp == NULL) {
- fprintf (stderr,
- "%s: failed to open mountpoint for reading: %s\n",
- progname, strerror (errno));
- return -1;
- }
- while ((ent = readdir (dp)) != NULL) {
- if (strcmp (ent->d_name, ".") != 0 &&
- strcmp (ent->d_name, "..") != 0) {
- isempty = 0;
- break;
- }
- }
- closedir (dp);
- } else if (rootsize)
- isempty = 0;
-
- if (!isempty) {
- fprintf (stderr, "%s: mountpoint is not empty\n", progname);
- fprintf (stderr, "%s: if you are sure this is safe, "
- "use the 'nonempty' mount option\n", progname);
- return -1;
+ i++;
+ (*iov)[i].iov_base = val;
+ if (len == (size_t) -1) {
+ if (val != NULL)
+ len = strlen(val) + 1;
+ else
+ len = 0;
}
- return 0;
+ (*iov)[i].iov_len = (int)len;
+ *iovlen = ++i;
}
-int
-fuse_mnt_check_fuseblk (void)
+/*
+ * This function is needed for compatibility with parameters
+ * which used to use the mount_argf() command for the old mount() syscall.
+ */
+void
+build_iovec_argf(struct iovec **iov, int *iovlen, const char *name,
+ const char *fmt, ...)
{
- char buf[256];
- FILE *f = fopen ("/proc/filesystems", "r");
- if (!f)
- return 1;
-
- while (fgets (buf, sizeof (buf), f))
- if (strstr (buf, "fuseblk\n")) {
- fclose (f);
- return 1;
- }
+ va_list ap;
+ char val[255] = { 0 };
- fclose (f);
- return 0;
+ va_start(ap, fmt);
+ vsnprintf(val, sizeof(val), fmt, ap);
+ va_end(ap);
+ build_iovec(iov, iovlen, name, strdup(val), (size_t)-1);
}
+#endif /* __FreeBSD__ */
+
+struct mount_flags {
+ const char *opt;
+ mount_flag_t flag;
+ int on;
+} mount_flags[] = {
+ /* We provide best effort cross platform support for mount flags by
+ * defining the ones which are commonly used in Unix-like OS-es.
+ */
+ {"ro", MS_RDONLY, 1},
+ {"nosuid", MS_NOSUID, 1},
+ {"nodev", MS_NODEV, 1},
+ {"noatime", MS_NOATIME, 1},
+ {"noexec", MS_NOEXEC, 1},
+#ifdef GF_LINUX_HOST_OS
+ {"rw", MS_RDONLY, 0},
+ {"suid", MS_NOSUID, 0},
+ {"dev", MS_NODEV, 0},
+ {"exec", MS_NOEXEC, 0},
+ {"async", MS_SYNCHRONOUS, 0},
+ {"sync", MS_SYNCHRONOUS, 1},
+ {"atime", MS_NOATIME, 0},
+ {"dirsync", MS_DIRSYNC, 1},
#endif
+ {NULL, 0, 0}
+};
-#ifndef FUSE_UTIL
-void
-gf_fuse_unmount (const char *mountpoint, int fd)
+static int
+mount_param_to_flag (char *mnt_param, mount_flag_t *mntflags,
+ char **mnt_param_new)
{
- int res;
- int pid;
-
- if (!mountpoint)
- return;
+ gf_boolean_t found = _gf_false;
+ struct mount_flags *flag = NULL;
+ char *param_tok = NULL;
+ token_iter_t tit = {0,};
+ gf_boolean_t iter_end = _gf_false;
+
+ /* Allocate a buffer that will hold the mount parameters remaining
+ * after the ones corresponding to mount flags are processed and
+ * removed.The length of the original params are a good upper bound
+ * of the size needed.
+ */
+ *mnt_param_new = strdup (mnt_param);
+ if (!*mnt_param_new)
+ return -1;
- if (fd != -1) {
- struct pollfd pfd;
+ for (param_tok = token_iter_init (*mnt_param_new, ',', &tit) ;;) {
+ iter_end = next_token (&param_tok, &tit);
- pfd.fd = fd;
- pfd.events = 0;
- res = poll (&pfd, 1, 0);
- /* If file poll returns POLLERR on the device file descriptor,
- then the filesystem is already unmounted */
- if (res == 1 && (pfd.revents & POLLERR))
- return;
+ found = _gf_false;
+ for (flag = mount_flags; flag->opt; flag++) {
+ /* Compare the mount flag name to the param
+ * name at hand.
+ */
+ if (strcmp (flag->opt, param_tok) == 0) {
+ /* If there is a match, adjust mntflags
+ * accordingly and break.
+ */
+ if (flag->on) {
+ *mntflags |= flag->flag;
+ } else {
+ *mntflags &= ~flag->flag;
+ }
+ found = _gf_true;
+ break;
+ }
+ }
+ /* Exclude flag names from new parameter list. */
+ if (found)
+ drop_token (param_tok, &tit);
- /* Need to close file descriptor, otherwise synchronous umount
- would recurse into filesystem, and deadlock */
- close (fd);
+ if (iter_end)
+ break;
}
- if (geteuid () == 0) {
- fuse_mnt_umount ("fuse", mountpoint, mountpoint, 1);
- return;
- }
-
- res = umount2 (mountpoint, 2);
- if (res == 0)
- return;
-
- pid = fork ();
- if (pid == -1)
- return;
-
- if (pid == 0) {
- const char *argv[] = { FUSERMOUNT_PROG, "-u", "-q", "-z",
- "--", mountpoint, NULL };
-
- execvp (FUSERMOUNT_PROG, (char **)argv);
- _exit (1);
- }
- waitpid (pid, NULL, 0);
+ return 0;
}
-#endif
-
-/*
- * Functions below are loosely modelled after similar functions of libfuse
- */
-#ifndef FUSE_UTIL
static int
-fuse_mount_sys (const char *mountpoint, char *fsname, char *mnt_param, int fd)
+fuse_mount_sys (const char *mountpoint, char *fsname,
+ char *mnt_param, int fd)
{
int ret = -1;
unsigned mounted = 0;
char *mnt_param_mnt = NULL;
char *fstype = "fuse.glusterfs";
char *source = fsname;
-
- ret = asprintf (&mnt_param_mnt,
- "%s,fd=%i,rootmode=%o,user_id=%i,group_id=%i",
- mnt_param, fd, S_IFDIR, getuid (), getgid ());
+ mount_flag_t mountflags = 0;
+ char *mnt_param_new = NULL;
+
+ ret = mount_param_to_flag (mnt_param, &mountflags, &mnt_param_new);
+ if (ret == 0)
+ ret = asprintf (&mnt_param_mnt,
+ "%s,fd=%i,rootmode=%o,user_id=%i,group_id=%i",
+ mnt_param_new, fd, S_IFDIR, getuid (),
+ getgid ());
if (ret == -1) {
GFFUSE_LOGERR ("Out of memory");
goto out;
}
- ret = mount (source, mountpoint, fstype, 0,
+
+#ifdef __FreeBSD__
+ struct iovec *iov = NULL;
+ int iovlen = 0;
+ char fdstr[15];
+ sprintf (fdstr, "%d", fd);
+
+ build_iovec (&iov, &iovlen, "fstype", "fusefs", -1);
+ build_iovec (&iov, &iovlen, "subtype", "glusterfs", -1);
+ build_iovec (&iov, &iovlen, "fspath", __DECONST(void *, mountpoint),
+ -1);
+ build_iovec (&iov, &iovlen, "from", "/dev/fuse", -1);
+ build_iovec (&iov, &iovlen, "volname", source, -1);
+ build_iovec (&iov, &iovlen, "fd", fdstr, -1);
+ build_iovec (&iov, &iovlen, "allow_other", NULL, -1);
+ ret = nmount (iov, iovlen, mountflags);
+#else
+ ret = mount (source, mountpoint, fstype, mountflags,
mnt_param_mnt);
+#endif /* __FreeBSD__ */
+#ifdef GF_LINUX_HOST_OS
if (ret == -1 && errno == ENODEV) {
/* fs subtype support was added by 79c0b2df aka
v2.6.21-3159-g79c0b2d. Probably we have an
@@ -543,16 +408,19 @@ fuse_mount_sys (const char *mountpoint, char *fsname, char *mnt_param, int fd)
goto out;
}
- ret = mount (source, mountpoint, fstype, 0,
+ ret = mount (source, mountpoint, fstype, mountflags,
mnt_param_mnt);
}
+#endif /* GF_LINUX_HOST_OS */
if (ret == -1)
goto out;
else
mounted = 1;
+#ifdef GF_LINUX_HOST_OS
if (geteuid () == 0) {
char *newmnt = fuse_mnt_resolve_path ("fuse", mountpoint);
+ char *mnt_param_mtab = NULL;
if (!newmnt) {
ret = -1;
@@ -560,8 +428,17 @@ fuse_mount_sys (const char *mountpoint, char *fsname, char *mnt_param, int fd)
goto out;
}
- ret = fuse_mnt_add_mount ("fuse", source, newmnt, fstype,
- mnt_param);
+ ret = asprintf (&mnt_param_mtab, "%s%s",
+ mountflags & MS_RDONLY ? "ro," : "",
+ mnt_param_new);
+ if (ret == -1)
+ GFFUSE_LOGERR ("Out of memory");
+ else {
+ ret = fuse_mnt_add_mount ("fuse", source, newmnt,
+ fstype, mnt_param_mtab);
+ FREE (mnt_param_mtab);
+ }
+
FREE (newmnt);
if (ret == -1) {
GFFUSE_LOGERR ("failed to add mtab entry");
@@ -569,13 +446,16 @@ fuse_mount_sys (const char *mountpoint, char *fsname, char *mnt_param, int fd)
goto out;
}
}
+#endif /* GF_LINUX_HOST_OS */
out:
if (ret == -1) {
+ GFFUSE_LOGERR("ret = -1\n");
if (mounted)
umount2 (mountpoint, 2); /* lazy umount */
}
FREE (mnt_param_mnt);
+ FREE (mnt_param_new);
if (source != fsname)
FREE (source);
@@ -583,8 +463,8 @@ out:
}
int
-gf_fuse_mount (const char *mountpoint, char *fsname, char *mnt_param,
- pid_t *mnt_pid, int status_fd)
+gf_fuse_mount (const char *mountpoint, char *fsname,
+ char *mnt_param, pid_t *mnt_pid, int status_fd)
{
int fd = -1;
pid_t pid = -1;
@@ -614,16 +494,21 @@ gf_fuse_mount (const char *mountpoint, char *fsname, char *mnt_param,
ret = fuse_mount_sys (mountpoint, fsname, mnt_param, fd);
if (ret == -1) {
gf_log ("glusterfs-fuse", GF_LOG_INFO,
- "direct mount failed (%s), "
- "retry to mount via fusermount",
- strerror (errno));
+ "direct mount failed (%s) errno %d",
+ strerror (errno), errno);
+
+ if (errno == EPERM) {
+ gf_log ("glusterfs-fuse", GF_LOG_INFO,
+ "retry to mount via fusermount");
- ret = fuse_mount_fusermount (mountpoint, fsname,
- mnt_param, fd);
+ ret = fuse_mount_fusermount (mountpoint, fsname,
+ mnt_param, fd);
+ }
}
if (ret == -1)
- GFFUSE_LOGERR ("mount failed");
+ GFFUSE_LOGERR ("mount of %s to %s (%s) failed",
+ fsname, mountpoint, mnt_param);
if (status_fd >= 0)
(void)write (status_fd, &ret, sizeof (ret));
@@ -639,4 +524,3 @@ gf_fuse_mount (const char *mountpoint, char *fsname, char *mnt_param,
return fd;
}
-#endif
diff --git a/contrib/fuse-util/Makefile.am b/contrib/fuse-util/Makefile.am
index a72e3832b16..abbc10eb6d9 100644
--- a/contrib/fuse-util/Makefile.am
+++ b/contrib/fuse-util/Makefile.am
@@ -1,9 +1,11 @@
bin_PROGRAMS = fusermount-glusterfs
-fusermount_glusterfs_SOURCES = fusermount.c $(CONTRIBDIR)/fuse-lib/mount.c
-noinst_HEADERS = mount_util.h
+fusermount_glusterfs_SOURCES = fusermount.c mount_util.c $(CONTRIBDIR)/fuse-lib/mount-common.c
+noinst_HEADERS = $(CONTRIBDIR)/fuse-include/mount_util.h
-AM_CFLAGS = -Wall -D_FILE_OFFSET_BITS=64 -DFUSE_UTIL $(GF_CFLAGS) -D_GNU_SOURCE
+AM_CPPFLAGS = $(GF_CPPFLAGS) -DFUSE_UTIL -I$(CONTRIBDIR)/fuse-include -I$(CONTRIBDIR)/fuse-lib
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
install-exec-hook:
-chown root $(DESTDIR)$(bindir)/fusermount-glusterfs
diff --git a/contrib/fuse-util/fusermount.c b/contrib/fuse-util/fusermount.c
index 4e7e4f9501c..ff743f75a21 100644
--- a/contrib/fuse-util/fusermount.c
+++ b/contrib/fuse-util/fusermount.c
@@ -10,6 +10,11 @@
#include <config.h>
#include "mount_util.h"
+
+#ifndef HAVE_UMOUNT2
+#include "mount-gluster-compat.h"
+#endif
+
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -20,11 +25,18 @@
#include <fcntl.h>
#include <pwd.h>
#include <limits.h>
+#if !defined(__NetBSD__) && !defined(GF_DARWIN_HOST_OS)
#include <mntent.h>
+#endif /* __NetBSD__ */
#include <sys/wait.h>
#include <sys/stat.h>
-#include <sys/mount.h>
+#ifdef HAVE_SET_FSID
#include <sys/fsuid.h>
+#endif
+#ifdef GF_DARWIN_HOST_OS
+#include <sys/param.h>
+#endif
+#include <sys/mount.h>
#include <sys/socket.h>
#include <sys/utsname.h>
#include <sched.h>
@@ -63,22 +75,32 @@ static const char *get_user_name(void)
}
}
+#ifdef HAVE_SET_FSID
static uid_t oldfsuid;
static gid_t oldfsgid;
+#endif
static void drop_privs(void)
{
if (getuid() != 0) {
+#ifdef HAVE_SET_FSID
oldfsuid = setfsuid(getuid());
oldfsgid = setfsgid(getgid());
+#else
+ fprintf(stderr, "%s: Implement alternative setfsuid/gid \n", progname);
+#endif
}
}
static void restore_privs(void)
{
if (getuid() != 0) {
+#ifdef HAVE_SET_FSID
setfsuid(oldfsuid);
setfsgid(oldfsgid);
+#else
+ fprintf(stderr, "%s: Implement alternative setfsuid/gid \n", progname);
+#endif
}
}
@@ -116,8 +138,16 @@ static int lock_umount(void)
static void unlock_umount(int mtablock)
{
- lockf(mtablock, F_ULOCK, 0);
- close(mtablock);
+ if (mtablock >= 0) {
+ int res;
+
+ res = lockf(mtablock, F_ULOCK, 0);
+ if (res < 0) {
+ fprintf(stderr, "%s: error releasing lock: %s\n",
+ progname, strerror(errno));
+ }
+ close(mtablock);
+ }
}
static int add_mount(const char *source, const char *mnt, const char *type,
@@ -240,7 +270,7 @@ static int check_is_mount_child(void *p)
}
count = 0;
- while ((entp = getmntent(fp)) != NULL)
+ while (getmntent(fp) != NULL)
count++;
endmntent(fp);
@@ -327,7 +357,7 @@ static int check_is_mount(const char *last, const char *mnt)
return 0;
}
-static int chdir_to_parent(char *copy, const char **lastp, int *currdir_fd)
+static int chdir_to_parent(char *copy, const char **lastp)
{
char *tmp;
const char *parent;
@@ -352,14 +382,6 @@ static int chdir_to_parent(char *copy, const char **lastp, int *currdir_fd)
parent = "/";
}
- *currdir_fd = open(".", O_RDONLY);
- if (*currdir_fd == -1) {
- fprintf(stderr,
- "%s: failed to open current directory: %s\n",
- progname, strerror(errno));
- return -1;
- }
-
res = chdir(parent);
if (res == -1) {
fprintf(stderr, "%s: failed to chdir to %s: %s\n",
@@ -384,7 +406,6 @@ static int chdir_to_parent(char *copy, const char **lastp, int *currdir_fd)
static int unmount_fuse_locked(const char *mnt, int quiet, int lazy)
{
- int currdir_fd = -1;
char *copy;
const char *last;
int res;
@@ -401,7 +422,7 @@ static int unmount_fuse_locked(const char *mnt, int quiet, int lazy)
return -1;
}
- res = chdir_to_parent(copy, &last, &currdir_fd);
+ res = chdir_to_parent(copy, &last);
if (res == -1)
goto out;
@@ -413,10 +434,6 @@ static int unmount_fuse_locked(const char *mnt, int quiet, int lazy)
out:
free(copy);
- if (currdir_fd != -1) {
- fchdir(currdir_fd);
- close(currdir_fd);
- }
return res;
}
@@ -503,20 +520,22 @@ static void parse_line(char *line, int linenum)
static void read_conf(void)
{
+ int len;
FILE *fp = fopen(FUSE_CONF, "r");
if (fp != NULL) {
int linenum = 1;
char line[256];
int isnewline = 1;
while (fgets(line, sizeof(line), fp) != NULL) {
+ len = strlen (line);
if (isnewline) {
- if (line[strlen(line)-1] == '\n') {
+ if (len && line[len-1] == '\n') {
strip_line(line);
parse_line(line, linenum);
} else {
isnewline = 0;
}
- } else if(line[strlen(line)-1] == '\n') {
+ } else if (len && line[len-1] == '\n') {
fprintf(stderr, "%s: reading %s: line %i too long\n", progname, FUSE_CONF, linenum);
isnewline = 1;
@@ -611,7 +630,7 @@ static int add_option(char **optsp, const char *opt, unsigned expand)
static int get_mnt_opts(int flags, char *opts, char **mnt_optsp)
{
int i;
- int l;
+ size_t l;
if (!(flags & MS_RDONLY) && add_option(mnt_optsp, "rw", 0) == -1)
return -1;
@@ -626,7 +645,7 @@ static int get_mnt_opts(int flags, char *opts, char **mnt_optsp)
return -1;
/* remove comma from end of opts*/
l = strlen(*mnt_optsp);
- if ((*mnt_optsp)[l-1] == ',')
+ if (l && (*mnt_optsp)[l-1] == ',')
(*mnt_optsp)[l-1] = '\0';
if (getuid() != 0) {
const char *user = get_user_name();
@@ -655,8 +674,7 @@ static int get_string_opt(const char *s, unsigned len, const char *opt,
unsigned opt_len = strlen(opt);
char *d;
- if (*val)
- free(*val);
+ free(*val);
*val = (char *) malloc(len - opt_len + 1);
if (!*val) {
fprintf(stderr, "%s: failed to allocate memory\n", progname);
@@ -825,15 +843,14 @@ static int do_mount(const char *mnt, char **typep, mode_t rootmode,
fprintf(stderr, "%s: mount failed: %s\n", progname,
strerror(errno_save));
goto err;
- } else {
- *sourcep = source;
- *typep = type;
- *mnt_optsp = mnt_opts;
}
+ *sourcep = source;
+ *typep = type;
+ *mnt_optsp = mnt_opts;
free(fsname);
free(optbuf);
- return res;
+ return 0;
err:
free(fsname);
@@ -876,8 +893,7 @@ static int check_version(const char *dev)
return 0;
}
-static int check_perm(const char **mntp, struct stat *stbuf, int *currdir_fd,
- int *mountpoint_fd)
+static int check_perm(const char **mntp, struct stat *stbuf, int *mountpoint_fd)
{
int res;
const char *mnt = *mntp;
@@ -895,13 +911,6 @@ static int check_perm(const char **mntp, struct stat *stbuf, int *currdir_fd,
return 0;
if (S_ISDIR(stbuf->st_mode)) {
- *currdir_fd = open(".", O_RDONLY);
- if (*currdir_fd == -1) {
- fprintf(stderr,
- "%s: failed to open current directory: %s\n",
- progname, strerror(errno));
- return -1;
- }
res = chdir(mnt);
if (res == -1) {
fprintf(stderr,
@@ -1057,7 +1066,6 @@ static int mount_fuse(const char *mnt, const char *opts, char *devfd)
char *source = NULL;
char *mnt_opts = NULL;
const char *real_mnt = mnt;
- int currdir_fd = -1;
int mountpoint_fd = -1;
fd = devfd ? check_fuse_device(devfd, &dev) : open_fuse_device(&dev);
@@ -1071,15 +1079,13 @@ static int mount_fuse(const char *mnt, const char *opts, char *devfd)
int mount_count = count_fuse_fs();
if (mount_count >= mount_max) {
fprintf(stderr, "%s: too many FUSE filesystems mounted; mount_max=N can be set in /etc/fuse.conf\n", progname);
- close(fd);
- return -1;
+ goto fail_close_fd;
}
}
res = check_version(dev);
if (res != -1) {
- res = check_perm(&real_mnt, &stbuf, &currdir_fd,
- &mountpoint_fd);
+ res = check_perm(&real_mnt, &stbuf, &mountpoint_fd);
restore_privs();
if (res != -1)
res = do_mount(real_mnt, &type, stbuf.st_mode & S_IFMT,
@@ -1088,33 +1094,38 @@ static int mount_fuse(const char *mnt, const char *opts, char *devfd)
} else
restore_privs();
- if (currdir_fd != -1) {
- fchdir(currdir_fd);
- close(currdir_fd);
- }
if (mountpoint_fd != -1)
close(mountpoint_fd);
+ if (res == -1)
+ goto fail_close_fd;
+
+ res = chdir("/");
if (res == -1) {
- close(fd);
- return -1;
+ fprintf(stderr, "%s: failed to chdir to '/'\n", progname);
+ goto fail_close_fd;
}
if (geteuid() == 0) {
res = add_mount(source, mnt, type, mnt_opts);
if (res == -1) {
- umount2(mnt, 2); /* lazy umount */
- close(fd);
- return -1;
+ /* Can't clean up mount in a non-racy way */
+ goto fail_close_fd;
}
}
+out_free:
free(source);
free(type);
free(mnt_opts);
free(dev);
return fd;
+
+fail_close_fd:
+ close(fd);
+ fd = -1;
+ goto out_free;
}
static int send_fd(int sock_fd, int fd)
@@ -1253,6 +1264,13 @@ int main(int argc, char *argv[])
drop_privs();
mnt = fuse_mnt_resolve_path(progname, origmnt);
+ if (mnt != NULL) {
+ res = chdir("/");
+ if (res == -1) {
+ fprintf(stderr, "%s: failed to chdir to '/'\n", progname);
+ exit(1);
+ }
+ }
restore_privs();
if (mnt == NULL)
exit(1);
diff --git a/contrib/fuse-util/mount_util.c b/contrib/fuse-util/mount_util.c
new file mode 100644
index 00000000000..911b84445e9
--- /dev/null
+++ b/contrib/fuse-util/mount_util.c
@@ -0,0 +1,64 @@
+/*
+ FUSE: Filesystem in Userspace
+ Copyright (C) 2001-2007 Miklos Szeredi <miklos@szeredi.hu>
+
+ This program can be distributed under the terms of the GNU LGPLv2.
+ See the file COPYING.LIB.
+*/
+
+#include <dirent.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+int fuse_mnt_check_empty(const char *progname, const char *mnt,
+ mode_t rootmode, off_t rootsize)
+{
+ int isempty = 1;
+
+ if (S_ISDIR(rootmode)) {
+ struct dirent *ent;
+ DIR *dp = opendir(mnt);
+ if (dp == NULL) {
+ fprintf(stderr,
+ "%s: failed to open mountpoint for reading: %s\n",
+ progname, strerror(errno));
+ return -1;
+ }
+ while ((ent = readdir(dp)) != NULL) {
+ if (strcmp(ent->d_name, ".") != 0 &&
+ strcmp(ent->d_name, "..") != 0) {
+ isempty = 0;
+ break;
+ }
+ }
+ closedir(dp);
+ } else if (rootsize)
+ isempty = 0;
+
+ if (!isempty) {
+ fprintf(stderr, "%s: mountpoint is not empty\n", progname);
+ fprintf(stderr, "%s: if you are sure this is safe, use the 'nonempty' mount option\n", progname);
+ return -1;
+ }
+ return 0;
+}
+
+int fuse_mnt_check_fuseblk(void)
+{
+ char buf[256];
+ FILE *f = fopen("/proc/filesystems", "r");
+ if (!f)
+ return 1;
+
+ while (fgets(buf, sizeof(buf), f))
+ if (strstr(buf, "fuseblk\n")) {
+ fclose(f);
+ return 1;
+ }
+
+ fclose(f);
+ return 0;
+}
diff --git a/contrib/ipaddr-py/COPYING b/contrib/ipaddr-py/COPYING
deleted file mode 100644
index d6456956733..00000000000
--- a/contrib/ipaddr-py/COPYING
+++ /dev/null
@@ -1,202 +0,0 @@
-
- Apache License
- Version 2.0, January 2004
- http://www.apache.org/licenses/
-
- TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
- 1. Definitions.
-
- "License" shall mean the terms and conditions for use, reproduction,
- and distribution as defined by Sections 1 through 9 of this document.
-
- "Licensor" shall mean the copyright owner or entity authorized by
- the copyright owner that is granting the License.
-
- "Legal Entity" shall mean the union of the acting entity and all
- other entities that control, are controlled by, or are under common
- control with that entity. For the purposes of this definition,
- "control" means (i) the power, direct or indirect, to cause the
- direction or management of such entity, whether by contract or
- otherwise, or (ii) ownership of fifty percent (50%) or more of the
- outstanding shares, or (iii) beneficial ownership of such entity.
-
- "You" (or "Your") shall mean an individual or Legal Entity
- exercising permissions granted by this License.
-
- "Source" form shall mean the preferred form for making modifications,
- including but not limited to software source code, documentation
- source, and configuration files.
-
- "Object" form shall mean any form resulting from mechanical
- transformation or translation of a Source form, including but
- not limited to compiled object code, generated documentation,
- and conversions to other media types.
-
- "Work" shall mean the work of authorship, whether in Source or
- Object form, made available under the License, as indicated by a
- copyright notice that is included in or attached to the work
- (an example is provided in the Appendix below).
-
- "Derivative Works" shall mean any work, whether in Source or Object
- form, that is based on (or derived from) the Work and for which the
- editorial revisions, annotations, elaborations, or other modifications
- represent, as a whole, an original work of authorship. For the purposes
- of this License, Derivative Works shall not include works that remain
- separable from, or merely link (or bind by name) to the interfaces of,
- the Work and Derivative Works thereof.
-
- "Contribution" shall mean any work of authorship, including
- the original version of the Work and any modifications or additions
- to that Work or Derivative Works thereof, that is intentionally
- submitted to Licensor for inclusion in the Work by the copyright owner
- or by an individual or Legal Entity authorized to submit on behalf of
- the copyright owner. For the purposes of this definition, "submitted"
- means any form of electronic, verbal, or written communication sent
- to the Licensor or its representatives, including but not limited to
- communication on electronic mailing lists, source code control systems,
- and issue tracking systems that are managed by, or on behalf of, the
- Licensor for the purpose of discussing and improving the Work, but
- excluding communication that is conspicuously marked or otherwise
- designated in writing by the copyright owner as "Not a Contribution."
-
- "Contributor" shall mean Licensor and any individual or Legal Entity
- on behalf of whom a Contribution has been received by Licensor and
- subsequently incorporated within the Work.
-
- 2. Grant of Copyright License. Subject to the terms and conditions of
- this License, each Contributor hereby grants to You a perpetual,
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable
- copyright license to reproduce, prepare Derivative Works of,
- publicly display, publicly perform, sublicense, and distribute the
- Work and such Derivative Works in Source or Object form.
-
- 3. Grant of Patent License. Subject to the terms and conditions of
- this License, each Contributor hereby grants to You a perpetual,
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable
- (except as stated in this section) patent license to make, have made,
- use, offer to sell, sell, import, and otherwise transfer the Work,
- where such license applies only to those patent claims licensable
- by such Contributor that are necessarily infringed by their
- Contribution(s) alone or by combination of their Contribution(s)
- with the Work to which such Contribution(s) was submitted. If You
- institute patent litigation against any entity (including a
- cross-claim or counterclaim in a lawsuit) alleging that the Work
- or a Contribution incorporated within the Work constitutes direct
- or contributory patent infringement, then any patent licenses
- granted to You under this License for that Work shall terminate
- as of the date such litigation is filed.
-
- 4. Redistribution. You may reproduce and distribute copies of the
- Work or Derivative Works thereof in any medium, with or without
- modifications, and in Source or Object form, provided that You
- meet the following conditions:
-
- (a) You must give any other recipients of the Work or
- Derivative Works a copy of this License; and
-
- (b) You must cause any modified files to carry prominent notices
- stating that You changed the files; and
-
- (c) You must retain, in the Source form of any Derivative Works
- that You distribute, all copyright, patent, trademark, and
- attribution notices from the Source form of the Work,
- excluding those notices that do not pertain to any part of
- the Derivative Works; and
-
- (d) If the Work includes a "NOTICE" text file as part of its
- distribution, then any Derivative Works that You distribute must
- include a readable copy of the attribution notices contained
- within such NOTICE file, excluding those notices that do not
- pertain to any part of the Derivative Works, in at least one
- of the following places: within a NOTICE text file distributed
- as part of the Derivative Works; within the Source form or
- documentation, if provided along with the Derivative Works; or,
- within a display generated by the Derivative Works, if and
- wherever such third-party notices normally appear. The contents
- of the NOTICE file are for informational purposes only and
- do not modify the License. You may add Your own attribution
- notices within Derivative Works that You distribute, alongside
- or as an addendum to the NOTICE text from the Work, provided
- that such additional attribution notices cannot be construed
- as modifying the License.
-
- You may add Your own copyright statement to Your modifications and
- may provide additional or different license terms and conditions
- for use, reproduction, or distribution of Your modifications, or
- for any such Derivative Works as a whole, provided Your use,
- reproduction, and distribution of the Work otherwise complies with
- the conditions stated in this License.
-
- 5. Submission of Contributions. Unless You explicitly state otherwise,
- any Contribution intentionally submitted for inclusion in the Work
- by You to the Licensor shall be under the terms and conditions of
- this License, without any additional terms or conditions.
- Notwithstanding the above, nothing herein shall supersede or modify
- the terms of any separate license agreement you may have executed
- with Licensor regarding such Contributions.
-
- 6. Trademarks. This License does not grant permission to use the trade
- names, trademarks, service marks, or product names of the Licensor,
- except as required for reasonable and customary use in describing the
- origin of the Work and reproducing the content of the NOTICE file.
-
- 7. Disclaimer of Warranty. Unless required by applicable law or
- agreed to in writing, Licensor provides the Work (and each
- Contributor provides its Contributions) on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
- implied, including, without limitation, any warranties or conditions
- of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
- PARTICULAR PURPOSE. You are solely responsible for determining the
- appropriateness of using or redistributing the Work and assume any
- risks associated with Your exercise of permissions under this License.
-
- 8. Limitation of Liability. In no event and under no legal theory,
- whether in tort (including negligence), contract, or otherwise,
- unless required by applicable law (such as deliberate and grossly
- negligent acts) or agreed to in writing, shall any Contributor be
- liable to You for damages, including any direct, indirect, special,
- incidental, or consequential damages of any character arising as a
- result of this License or out of the use or inability to use the
- Work (including but not limited to damages for loss of goodwill,
- work stoppage, computer failure or malfunction, or any and all
- other commercial damages or losses), even if such Contributor
- has been advised of the possibility of such damages.
-
- 9. Accepting Warranty or Additional Liability. While redistributing
- the Work or Derivative Works thereof, You may choose to offer,
- and charge a fee for, acceptance of support, warranty, indemnity,
- or other liability obligations and/or rights consistent with this
- License. However, in accepting such obligations, You may act only
- on Your own behalf and on Your sole responsibility, not on behalf
- of any other Contributor, and only if You agree to indemnify,
- defend, and hold each Contributor harmless for any liability
- incurred by, or claims asserted against, such Contributor by reason
- of your accepting any such warranty or additional liability.
-
- END OF TERMS AND CONDITIONS
-
- APPENDIX: How to apply the Apache License to your work.
-
- To apply the Apache License to your work, attach the following
- boilerplate notice, with the fields enclosed by brackets "[]"
- replaced with your own identifying information. (Don't include
- the brackets!) The text should be enclosed in the appropriate
- comment syntax for the file format. We also recommend that a
- file or class name and description of purpose be included on the
- same "printed page" as the copyright notice for easier
- identification within third-party archives.
-
- Copyright [yyyy] [name of copyright owner]
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
diff --git a/contrib/ipaddr-py/MANIFEST.in b/contrib/ipaddr-py/MANIFEST.in
deleted file mode 100644
index f572804441b..00000000000
--- a/contrib/ipaddr-py/MANIFEST.in
+++ /dev/null
@@ -1,3 +0,0 @@
-include COPYING
-include ipaddr_test.py
-include RELEASENOTES
diff --git a/contrib/ipaddr-py/OWNERS b/contrib/ipaddr-py/OWNERS
deleted file mode 100644
index 501673e0395..00000000000
--- a/contrib/ipaddr-py/OWNERS
+++ /dev/null
@@ -1,4 +0,0 @@
-pmoody
-harro
-mshields
-smart
diff --git a/contrib/ipaddr-py/README b/contrib/ipaddr-py/README
deleted file mode 100644
index 1b54294bb10..00000000000
--- a/contrib/ipaddr-py/README
+++ /dev/null
@@ -1,8 +0,0 @@
-ipaddr.py is a library for working with IP addresses, both IPv4 and IPv6.
-It was developed by Google for internal use, and is now open source.
-
-Project home page: http://code.google.com/p/ipaddr-py/
-
-Please send contributions to ipaddr-py-dev@googlegroups.com. Code should
-include unit tests and follow the Google Python style guide:
-http://code.google.com/p/soc/wiki/PythonStyleGuide
diff --git a/contrib/ipaddr-py/ipaddr.py b/contrib/ipaddr-py/ipaddr.py
deleted file mode 100644
index a89298a315d..00000000000
--- a/contrib/ipaddr-py/ipaddr.py
+++ /dev/null
@@ -1,1907 +0,0 @@
-#!/usr/bin/python
-#
-# Copyright 2007 Google Inc.
-# Licensed to PSF under a Contributor Agreement.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-# implied. See the License for the specific language governing
-# permissions and limitations under the License.
-
-"""A fast, lightweight IPv4/IPv6 manipulation library in Python.
-
-This library is used to create/poke/manipulate IPv4 and IPv6 addresses
-and networks.
-
-"""
-
-__version__ = 'trunk'
-
-import struct
-
-IPV4LENGTH = 32
-IPV6LENGTH = 128
-
-
-class AddressValueError(ValueError):
- """A Value Error related to the address."""
-
-
-class NetmaskValueError(ValueError):
- """A Value Error related to the netmask."""
-
-
-def IPAddress(address, version=None):
- """Take an IP string/int and return an object of the correct type.
-
- Args:
- address: A string or integer, the IP address. Either IPv4 or
- IPv6 addresses may be supplied; integers less than 2**32 will
- be considered to be IPv4 by default.
- version: An Integer, 4 or 6. If set, don't try to automatically
- determine what the IP address type is. important for things
- like IPAddress(1), which could be IPv4, '0.0.0.1', or IPv6,
- '::1'.
-
- Returns:
- An IPv4Address or IPv6Address object.
-
- Raises:
- ValueError: if the string passed isn't either a v4 or a v6
- address.
-
- """
- if version:
- if version == 4:
- return IPv4Address(address)
- elif version == 6:
- return IPv6Address(address)
-
- try:
- return IPv4Address(address)
- except (AddressValueError, NetmaskValueError):
- pass
-
- try:
- return IPv6Address(address)
- except (AddressValueError, NetmaskValueError):
- pass
-
- raise ValueError('%r does not appear to be an IPv4 or IPv6 address' %
- address)
-
-
-def IPNetwork(address, version=None, strict=False):
- """Take an IP string/int and return an object of the correct type.
-
- Args:
- address: A string or integer, the IP address. Either IPv4 or
- IPv6 addresses may be supplied; integers less than 2**32 will
- be considered to be IPv4 by default.
- version: An Integer, if set, don't try to automatically
- determine what the IP address type is. important for things
- like IPNetwork(1), which could be IPv4, '0.0.0.1/32', or IPv6,
- '::1/128'.
-
- Returns:
- An IPv4Network or IPv6Network object.
-
- Raises:
- ValueError: if the string passed isn't either a v4 or a v6
- address. Or if a strict network was requested and a strict
- network wasn't given.
-
- """
- if version:
- if version == 4:
- return IPv4Network(address, strict)
- elif version == 6:
- return IPv6Network(address, strict)
-
- try:
- return IPv4Network(address, strict)
- except (AddressValueError, NetmaskValueError):
- pass
-
- try:
- return IPv6Network(address, strict)
- except (AddressValueError, NetmaskValueError):
- pass
-
- raise ValueError('%r does not appear to be an IPv4 or IPv6 network' %
- address)
-
-
-def v4_int_to_packed(address):
- """The binary representation of this address.
-
- Args:
- address: An integer representation of an IPv4 IP address.
-
- Returns:
- The binary representation of this address.
-
- Raises:
- ValueError: If the integer is too large to be an IPv4 IP
- address.
- """
- if address > _BaseV4._ALL_ONES:
- raise ValueError('Address too large for IPv4')
- return struct.pack('!I', address)
-
-
-def v6_int_to_packed(address):
- """The binary representation of this address.
-
- Args:
- address: An integer representation of an IPv4 IP address.
-
- Returns:
- The binary representation of this address.
- """
- return struct.pack('!QQ', address >> 64, address & (2**64 - 1))
-
-
-def _find_address_range(addresses):
- """Find a sequence of addresses.
-
- Args:
- addresses: a list of IPv4 or IPv6 addresses.
-
- Returns:
- A tuple containing the first and last IP addresses in the sequence.
-
- """
- first = last = addresses[0]
- for ip in addresses[1:]:
- if ip._ip == last._ip + 1:
- last = ip
- else:
- break
- return (first, last)
-
-def _get_prefix_length(number1, number2, bits):
- """Get the number of leading bits that are same for two numbers.
-
- Args:
- number1: an integer.
- number2: another integer.
- bits: the maximum number of bits to compare.
-
- Returns:
- The number of leading bits that are the same for two numbers.
-
- """
- for i in range(bits):
- if number1 >> i == number2 >> i:
- return bits - i
- return 0
-
-def _count_righthand_zero_bits(number, bits):
- """Count the number of zero bits on the right hand side.
-
- Args:
- number: an integer.
- bits: maximum number of bits to count.
-
- Returns:
- The number of zero bits on the right hand side of the number.
-
- """
- if number == 0:
- return bits
- for i in range(bits):
- if (number >> i) % 2:
- return i
-
-def summarize_address_range(first, last):
- """Summarize a network range given the first and last IP addresses.
-
- Example:
- >>> summarize_address_range(IPv4Address('1.1.1.0'),
- IPv4Address('1.1.1.130'))
- [IPv4Network('1.1.1.0/25'), IPv4Network('1.1.1.128/31'),
- IPv4Network('1.1.1.130/32')]
-
- Args:
- first: the first IPv4Address or IPv6Address in the range.
- last: the last IPv4Address or IPv6Address in the range.
-
- Returns:
- The address range collapsed to a list of IPv4Network's or
- IPv6Network's.
-
- Raise:
- TypeError:
- If the first and last objects are not IP addresses.
- If the first and last objects are not the same version.
- ValueError:
- If the last object is not greater than the first.
- If the version is not 4 or 6.
-
- """
- if not (isinstance(first, _BaseIP) and isinstance(last, _BaseIP)):
- raise TypeError('first and last must be IP addresses, not networks')
- if first.version != last.version:
- raise TypeError("%s and %s are not of the same version" % (
- str(first), str(last)))
- if first > last:
- raise ValueError('last IP address must be greater than first')
-
- networks = []
-
- if first.version == 4:
- ip = IPv4Network
- elif first.version == 6:
- ip = IPv6Network
- else:
- raise ValueError('unknown IP version')
-
- ip_bits = first._max_prefixlen
- first_int = first._ip
- last_int = last._ip
- while first_int <= last_int:
- nbits = _count_righthand_zero_bits(first_int, ip_bits)
- current = None
- while nbits >= 0:
- addend = 2**nbits - 1
- current = first_int + addend
- nbits -= 1
- if current <= last_int:
- break
- prefix = _get_prefix_length(first_int, current, ip_bits)
- net = ip('%s/%d' % (str(first), prefix))
- networks.append(net)
- if current == ip._ALL_ONES:
- break
- first_int = current + 1
- first = IPAddress(first_int, version=first._version)
- return networks
-
-def _collapse_address_list_recursive(addresses):
- """Loops through the addresses, collapsing concurrent netblocks.
-
- Example:
-
- ip1 = IPv4Network('1.1.0.0/24')
- ip2 = IPv4Network('1.1.1.0/24')
- ip3 = IPv4Network('1.1.2.0/24')
- ip4 = IPv4Network('1.1.3.0/24')
- ip5 = IPv4Network('1.1.4.0/24')
- ip6 = IPv4Network('1.1.0.1/22')
-
- _collapse_address_list_recursive([ip1, ip2, ip3, ip4, ip5, ip6]) ->
- [IPv4Network('1.1.0.0/22'), IPv4Network('1.1.4.0/24')]
-
- This shouldn't be called directly; it is called via
- collapse_address_list([]).
-
- Args:
- addresses: A list of IPv4Network's or IPv6Network's
-
- Returns:
- A list of IPv4Network's or IPv6Network's depending on what we were
- passed.
-
- """
- ret_array = []
- optimized = False
-
- for cur_addr in addresses:
- if not ret_array:
- ret_array.append(cur_addr)
- continue
- if cur_addr in ret_array[-1]:
- optimized = True
- elif cur_addr == ret_array[-1].supernet().subnet()[1]:
- ret_array.append(ret_array.pop().supernet())
- optimized = True
- else:
- ret_array.append(cur_addr)
-
- if optimized:
- return _collapse_address_list_recursive(ret_array)
-
- return ret_array
-
-
-def collapse_address_list(addresses):
- """Collapse a list of IP objects.
-
- Example:
- collapse_address_list([IPv4('1.1.0.0/24'), IPv4('1.1.1.0/24')]) ->
- [IPv4('1.1.0.0/23')]
-
- Args:
- addresses: A list of IPv4Network or IPv6Network objects.
-
- Returns:
- A list of IPv4Network or IPv6Network objects depending on what we
- were passed.
-
- Raises:
- TypeError: If passed a list of mixed version objects.
-
- """
- i = 0
- addrs = []
- ips = []
- nets = []
-
- # split IP addresses and networks
- for ip in addresses:
- if isinstance(ip, _BaseIP):
- if ips and ips[-1]._version != ip._version:
- raise TypeError("%s and %s are not of the same version" % (
- str(ip), str(ips[-1])))
- ips.append(ip)
- elif ip._prefixlen == ip._max_prefixlen:
- if ips and ips[-1]._version != ip._version:
- raise TypeError("%s and %s are not of the same version" % (
- str(ip), str(ips[-1])))
- ips.append(ip.ip)
- else:
- if nets and nets[-1]._version != ip._version:
- raise TypeError("%s and %s are not of the same version" % (
- str(ip), str(ips[-1])))
- nets.append(ip)
-
- # sort and dedup
- ips = sorted(set(ips))
- nets = sorted(set(nets))
-
- while i < len(ips):
- (first, last) = _find_address_range(ips[i:])
- i = ips.index(last) + 1
- addrs.extend(summarize_address_range(first, last))
-
- return _collapse_address_list_recursive(sorted(
- addrs + nets, key=_BaseNet._get_networks_key))
-
-# backwards compatibility
-CollapseAddrList = collapse_address_list
-
-# Test whether this Python implementation supports byte objects that
-# are not identical to str ones.
-# We need to exclude platforms where bytes == str so that we can
-# distinguish between packed representations and strings, for example
-# b'12::' (the IPv4 address 49.50.58.58) and '12::' (an IPv6 address).
-try:
- _compat_has_real_bytes = bytes is not str
-except NameError: # <Python2.6
- _compat_has_real_bytes = False
-
-def get_mixed_type_key(obj):
- """Return a key suitable for sorting between networks and addresses.
-
- Address and Network objects are not sortable by default; they're
- fundamentally different so the expression
-
- IPv4Address('1.1.1.1') <= IPv4Network('1.1.1.1/24')
-
- doesn't make any sense. There are some times however, where you may wish
- to have ipaddr sort these for you anyway. If you need to do this, you
- can use this function as the key= argument to sorted().
-
- Args:
- obj: either a Network or Address object.
- Returns:
- appropriate key.
-
- """
- if isinstance(obj, _BaseNet):
- return obj._get_networks_key()
- elif isinstance(obj, _BaseIP):
- return obj._get_address_key()
- return NotImplemented
-
-class _IPAddrBase(object):
-
- """The mother class."""
-
- def __index__(self):
- return self._ip
-
- def __int__(self):
- return self._ip
-
- def __hex__(self):
- return hex(self._ip)
-
- @property
- def exploded(self):
- """Return the longhand version of the IP address as a string."""
- return self._explode_shorthand_ip_string()
-
- @property
- def compressed(self):
- """Return the shorthand version of the IP address as a string."""
- return str(self)
-
-
-class _BaseIP(_IPAddrBase):
-
- """A generic IP object.
-
- This IP class contains the version independent methods which are
- used by single IP addresses.
-
- """
-
- def __init__(self, address):
- if (not (_compat_has_real_bytes and isinstance(address, bytes))
- and '/' in str(address)):
- raise AddressValueError(address)
-
- def __eq__(self, other):
- try:
- return (self._ip == other._ip
- and self._version == other._version)
- except AttributeError:
- return NotImplemented
-
- def __ne__(self, other):
- eq = self.__eq__(other)
- if eq is NotImplemented:
- return NotImplemented
- return not eq
-
- def __le__(self, other):
- gt = self.__gt__(other)
- if gt is NotImplemented:
- return NotImplemented
- return not gt
-
- def __ge__(self, other):
- lt = self.__lt__(other)
- if lt is NotImplemented:
- return NotImplemented
- return not lt
-
- def __lt__(self, other):
- if self._version != other._version:
- raise TypeError('%s and %s are not of the same version' % (
- str(self), str(other)))
- if not isinstance(other, _BaseIP):
- raise TypeError('%s and %s are not of the same type' % (
- str(self), str(other)))
- if self._ip != other._ip:
- return self._ip < other._ip
- return False
-
- def __gt__(self, other):
- if self._version != other._version:
- raise TypeError('%s and %s are not of the same version' % (
- str(self), str(other)))
- if not isinstance(other, _BaseIP):
- raise TypeError('%s and %s are not of the same type' % (
- str(self), str(other)))
- if self._ip != other._ip:
- return self._ip > other._ip
- return False
-
- # Shorthand for Integer addition and subtraction. This is not
- # meant to ever support addition/subtraction of addresses.
- def __add__(self, other):
- if not isinstance(other, int):
- return NotImplemented
- return IPAddress(int(self) + other, version=self._version)
-
- def __sub__(self, other):
- if not isinstance(other, int):
- return NotImplemented
- return IPAddress(int(self) - other, version=self._version)
-
- def __repr__(self):
- return '%s(%r)' % (self.__class__.__name__, str(self))
-
- def __str__(self):
- return '%s' % self._string_from_ip_int(self._ip)
-
- def __hash__(self):
- return hash(hex(long(self._ip)))
-
- def _get_address_key(self):
- return (self._version, self)
-
- @property
- def version(self):
- raise NotImplementedError('BaseIP has no version')
-
-
-class _BaseNet(_IPAddrBase):
-
- """A generic IP object.
-
- This IP class contains the version independent methods which are
- used by networks.
-
- """
-
- def __init__(self, address):
- self._cache = {}
-
- def __repr__(self):
- return '%s(%r)' % (self.__class__.__name__, str(self))
-
- def iterhosts(self):
- """Generate Iterator over usable hosts in a network.
-
- This is like __iter__ except it doesn't return the network
- or broadcast addresses.
-
- """
- cur = int(self.network) + 1
- bcast = int(self.broadcast) - 1
- while cur <= bcast:
- cur += 1
- yield IPAddress(cur - 1, version=self._version)
-
- def __iter__(self):
- cur = int(self.network)
- bcast = int(self.broadcast)
- while cur <= bcast:
- cur += 1
- yield IPAddress(cur - 1, version=self._version)
-
- def __getitem__(self, n):
- network = int(self.network)
- broadcast = int(self.broadcast)
- if n >= 0:
- if network + n > broadcast:
- raise IndexError
- return IPAddress(network + n, version=self._version)
- else:
- n += 1
- if broadcast + n < network:
- raise IndexError
- return IPAddress(broadcast + n, version=self._version)
-
- def __lt__(self, other):
- if self._version != other._version:
- raise TypeError('%s and %s are not of the same version' % (
- str(self), str(other)))
- if not isinstance(other, _BaseNet):
- raise TypeError('%s and %s are not of the same type' % (
- str(self), str(other)))
- if self.network != other.network:
- return self.network < other.network
- if self.netmask != other.netmask:
- return self.netmask < other.netmask
- return False
-
- def __gt__(self, other):
- if self._version != other._version:
- raise TypeError('%s and %s are not of the same version' % (
- str(self), str(other)))
- if not isinstance(other, _BaseNet):
- raise TypeError('%s and %s are not of the same type' % (
- str(self), str(other)))
- if self.network != other.network:
- return self.network > other.network
- if self.netmask != other.netmask:
- return self.netmask > other.netmask
- return False
-
- def __le__(self, other):
- gt = self.__gt__(other)
- if gt is NotImplemented:
- return NotImplemented
- return not gt
-
- def __ge__(self, other):
- lt = self.__lt__(other)
- if lt is NotImplemented:
- return NotImplemented
- return not lt
-
- def __eq__(self, other):
- try:
- return (self._version == other._version
- and self.network == other.network
- and int(self.netmask) == int(other.netmask))
- except AttributeError:
- if isinstance(other, _BaseIP):
- return (self._version == other._version
- and self._ip == other._ip)
-
- def __ne__(self, other):
- eq = self.__eq__(other)
- if eq is NotImplemented:
- return NotImplemented
- return not eq
-
- def __str__(self):
- return '%s/%s' % (str(self.ip),
- str(self._prefixlen))
-
- def __hash__(self):
- return hash(int(self.network) ^ int(self.netmask))
-
- def __contains__(self, other):
- # always false if one is v4 and the other is v6.
- if self._version != other._version:
- return False
- # dealing with another network.
- if isinstance(other, _BaseNet):
- return (self.network <= other.network and
- self.broadcast >= other.broadcast)
- # dealing with another address
- else:
- return (int(self.network) <= int(other._ip) <=
- int(self.broadcast))
-
- def overlaps(self, other):
- """Tell if self is partly contained in other."""
- return self.network in other or self.broadcast in other or (
- other.network in self or other.broadcast in self)
-
- @property
- def network(self):
- x = self._cache.get('network')
- if x is None:
- x = IPAddress(self._ip & int(self.netmask), version=self._version)
- self._cache['network'] = x
- return x
-
- @property
- def broadcast(self):
- x = self._cache.get('broadcast')
- if x is None:
- x = IPAddress(self._ip | int(self.hostmask), version=self._version)
- self._cache['broadcast'] = x
- return x
-
- @property
- def hostmask(self):
- x = self._cache.get('hostmask')
- if x is None:
- x = IPAddress(int(self.netmask) ^ self._ALL_ONES,
- version=self._version)
- self._cache['hostmask'] = x
- return x
-
- @property
- def with_prefixlen(self):
- return '%s/%d' % (str(self.ip), self._prefixlen)
-
- @property
- def with_netmask(self):
- return '%s/%s' % (str(self.ip), str(self.netmask))
-
- @property
- def with_hostmask(self):
- return '%s/%s' % (str(self.ip), str(self.hostmask))
-
- @property
- def numhosts(self):
- """Number of hosts in the current subnet."""
- return int(self.broadcast) - int(self.network) + 1
-
- @property
- def version(self):
- raise NotImplementedError('BaseNet has no version')
-
- @property
- def prefixlen(self):
- return self._prefixlen
-
- def address_exclude(self, other):
- """Remove an address from a larger block.
-
- For example:
-
- addr1 = IPNetwork('10.1.1.0/24')
- addr2 = IPNetwork('10.1.1.0/26')
- addr1.address_exclude(addr2) =
- [IPNetwork('10.1.1.64/26'), IPNetwork('10.1.1.128/25')]
-
- or IPv6:
-
- addr1 = IPNetwork('::1/32')
- addr2 = IPNetwork('::1/128')
- addr1.address_exclude(addr2) = [IPNetwork('::0/128'),
- IPNetwork('::2/127'),
- IPNetwork('::4/126'),
- IPNetwork('::8/125'),
- ...
- IPNetwork('0:0:8000::/33')]
-
- Args:
- other: An IPvXNetwork object of the same type.
-
- Returns:
- A sorted list of IPvXNetwork objects addresses which is self
- minus other.
-
- Raises:
- TypeError: If self and other are of difffering address
- versions, or if other is not a network object.
- ValueError: If other is not completely contained by self.
-
- """
- if not self._version == other._version:
- raise TypeError("%s and %s are not of the same version" % (
- str(self), str(other)))
-
- if not isinstance(other, _BaseNet):
- raise TypeError("%s is not a network object" % str(other))
-
- if other not in self:
- raise ValueError('%s not contained in %s' % (str(other),
- str(self)))
- if other == self:
- return []
-
- ret_addrs = []
-
- # Make sure we're comparing the network of other.
- other = IPNetwork('%s/%s' % (str(other.network), str(other.prefixlen)),
- version=other._version)
-
- s1, s2 = self.subnet()
- while s1 != other and s2 != other:
- if other in s1:
- ret_addrs.append(s2)
- s1, s2 = s1.subnet()
- elif other in s2:
- ret_addrs.append(s1)
- s1, s2 = s2.subnet()
- else:
- # If we got here, there's a bug somewhere.
- assert True == False, ('Error performing exclusion: '
- 's1: %s s2: %s other: %s' %
- (str(s1), str(s2), str(other)))
- if s1 == other:
- ret_addrs.append(s2)
- elif s2 == other:
- ret_addrs.append(s1)
- else:
- # If we got here, there's a bug somewhere.
- assert True == False, ('Error performing exclusion: '
- 's1: %s s2: %s other: %s' %
- (str(s1), str(s2), str(other)))
-
- return sorted(ret_addrs, key=_BaseNet._get_networks_key)
-
- def compare_networks(self, other):
- """Compare two IP objects.
-
- This is only concerned about the comparison of the integer
- representation of the network addresses. This means that the
- host bits aren't considered at all in this method. If you want
- to compare host bits, you can easily enough do a
- 'HostA._ip < HostB._ip'
-
- Args:
- other: An IP object.
-
- Returns:
- If the IP versions of self and other are the same, returns:
-
- -1 if self < other:
- eg: IPv4('1.1.1.0/24') < IPv4('1.1.2.0/24')
- IPv6('1080::200C:417A') < IPv6('1080::200B:417B')
- 0 if self == other
- eg: IPv4('1.1.1.1/24') == IPv4('1.1.1.2/24')
- IPv6('1080::200C:417A/96') == IPv6('1080::200C:417B/96')
- 1 if self > other
- eg: IPv4('1.1.1.0/24') > IPv4('1.1.0.0/24')
- IPv6('1080::1:200C:417A/112') >
- IPv6('1080::0:200C:417A/112')
-
- If the IP versions of self and other are different, returns:
-
- -1 if self._version < other._version
- eg: IPv4('10.0.0.1/24') < IPv6('::1/128')
- 1 if self._version > other._version
- eg: IPv6('::1/128') > IPv4('255.255.255.0/24')
-
- """
- if self._version < other._version:
- return -1
- if self._version > other._version:
- return 1
- # self._version == other._version below here:
- if self.network < other.network:
- return -1
- if self.network > other.network:
- return 1
- # self.network == other.network below here:
- if self.netmask < other.netmask:
- return -1
- if self.netmask > other.netmask:
- return 1
- # self.network == other.network and self.netmask == other.netmask
- return 0
-
- def _get_networks_key(self):
- """Network-only key function.
-
- Returns an object that identifies this address' network and
- netmask. This function is a suitable "key" argument for sorted()
- and list.sort().
-
- """
- return (self._version, self.network, self.netmask)
-
- def _ip_int_from_prefix(self, prefixlen=None):
- """Turn the prefix length netmask into a int for comparison.
-
- Args:
- prefixlen: An integer, the prefix length.
-
- Returns:
- An integer.
-
- """
- if not prefixlen and prefixlen != 0:
- prefixlen = self._prefixlen
- return self._ALL_ONES ^ (self._ALL_ONES >> prefixlen)
-
- def _prefix_from_ip_int(self, ip_int, mask=32):
- """Return prefix length from the decimal netmask.
-
- Args:
- ip_int: An integer, the IP address.
- mask: The netmask. Defaults to 32.
-
- Returns:
- An integer, the prefix length.
-
- """
- while mask:
- if ip_int & 1 == 1:
- break
- ip_int >>= 1
- mask -= 1
-
- return mask
-
- def _ip_string_from_prefix(self, prefixlen=None):
- """Turn a prefix length into a dotted decimal string.
-
- Args:
- prefixlen: An integer, the netmask prefix length.
-
- Returns:
- A string, the dotted decimal netmask string.
-
- """
- if not prefixlen:
- prefixlen = self._prefixlen
- return self._string_from_ip_int(self._ip_int_from_prefix(prefixlen))
-
- def iter_subnets(self, prefixlen_diff=1, new_prefix=None):
- """The subnets which join to make the current subnet.
-
- In the case that self contains only one IP
- (self._prefixlen == 32 for IPv4 or self._prefixlen == 128
- for IPv6), return a list with just ourself.
-
- Args:
- prefixlen_diff: An integer, the amount the prefix length
- should be increased by. This should not be set if
- new_prefix is also set.
- new_prefix: The desired new prefix length. This must be a
- larger number (smaller prefix) than the existing prefix.
- This should not be set if prefixlen_diff is also set.
-
- Returns:
- An iterator of IPv(4|6) objects.
-
- Raises:
- ValueError: The prefixlen_diff is too small or too large.
- OR
- prefixlen_diff and new_prefix are both set or new_prefix
- is a smaller number than the current prefix (smaller
- number means a larger network)
-
- """
- if self._prefixlen == self._max_prefixlen:
- yield self
- return
-
- if new_prefix is not None:
- if new_prefix < self._prefixlen:
- raise ValueError('new prefix must be longer')
- if prefixlen_diff != 1:
- raise ValueError('cannot set prefixlen_diff and new_prefix')
- prefixlen_diff = new_prefix - self._prefixlen
-
- if prefixlen_diff < 0:
- raise ValueError('prefix length diff must be > 0')
- new_prefixlen = self._prefixlen + prefixlen_diff
-
- if not self._is_valid_netmask(str(new_prefixlen)):
- raise ValueError(
- 'prefix length diff %d is invalid for netblock %s' % (
- new_prefixlen, str(self)))
-
- first = IPNetwork('%s/%s' % (str(self.network),
- str(self._prefixlen + prefixlen_diff)),
- version=self._version)
-
- yield first
- current = first
- while True:
- broadcast = current.broadcast
- if broadcast == self.broadcast:
- return
- new_addr = IPAddress(int(broadcast) + 1, version=self._version)
- current = IPNetwork('%s/%s' % (str(new_addr), str(new_prefixlen)),
- version=self._version)
-
- yield current
-
- def masked(self):
- """Return the network object with the host bits masked out."""
- return IPNetwork('%s/%d' % (self.network, self._prefixlen),
- version=self._version)
-
- def subnet(self, prefixlen_diff=1, new_prefix=None):
- """Return a list of subnets, rather than an iterator."""
- return list(self.iter_subnets(prefixlen_diff, new_prefix))
-
- def supernet(self, prefixlen_diff=1, new_prefix=None):
- """The supernet containing the current network.
-
- Args:
- prefixlen_diff: An integer, the amount the prefix length of
- the network should be decreased by. For example, given a
- /24 network and a prefixlen_diff of 3, a supernet with a
- /21 netmask is returned.
-
- Returns:
- An IPv4 network object.
-
- Raises:
- ValueError: If self.prefixlen - prefixlen_diff < 0. I.e., you have a
- negative prefix length.
- OR
- If prefixlen_diff and new_prefix are both set or new_prefix is a
- larger number than the current prefix (larger number means a
- smaller network)
-
- """
- if self._prefixlen == 0:
- return self
-
- if new_prefix is not None:
- if new_prefix > self._prefixlen:
- raise ValueError('new prefix must be shorter')
- if prefixlen_diff != 1:
- raise ValueError('cannot set prefixlen_diff and new_prefix')
- prefixlen_diff = self._prefixlen - new_prefix
-
-
- if self.prefixlen - prefixlen_diff < 0:
- raise ValueError(
- 'current prefixlen is %d, cannot have a prefixlen_diff of %d' %
- (self.prefixlen, prefixlen_diff))
- return IPNetwork('%s/%s' % (str(self.network),
- str(self.prefixlen - prefixlen_diff)),
- version=self._version)
-
- # backwards compatibility
- Subnet = subnet
- Supernet = supernet
- AddressExclude = address_exclude
- CompareNetworks = compare_networks
- Contains = __contains__
-
-
-class _BaseV4(object):
-
- """Base IPv4 object.
-
- The following methods are used by IPv4 objects in both single IP
- addresses and networks.
-
- """
-
- # Equivalent to 255.255.255.255 or 32 bits of 1's.
- _ALL_ONES = (2**IPV4LENGTH) - 1
- _DECIMAL_DIGITS = frozenset('0123456789')
-
- def __init__(self, address):
- self._version = 4
- self._max_prefixlen = IPV4LENGTH
-
- def _explode_shorthand_ip_string(self, ip_str=None):
- if not ip_str:
- ip_str = str(self)
- return ip_str
-
- def _ip_int_from_string(self, ip_str):
- """Turn the given IP string into an integer for comparison.
-
- Args:
- ip_str: A string, the IP ip_str.
-
- Returns:
- The IP ip_str as an integer.
-
- Raises:
- AddressValueError: if ip_str isn't a valid IPv4 Address.
-
- """
- octets = ip_str.split('.')
- if len(octets) != 4:
- raise AddressValueError(ip_str)
-
- packed_ip = 0
- for oc in octets:
- try:
- packed_ip = (packed_ip << 8) | self._parse_octet(oc)
- except ValueError:
- raise AddressValueError(ip_str)
- return packed_ip
-
- def _parse_octet(self, octet_str):
- """Convert a decimal octet into an integer.
-
- Args:
- octet_str: A string, the number to parse.
-
- Returns:
- The octet as an integer.
-
- Raises:
- ValueError: if the octet isn't strictly a decimal from [0..255].
-
- """
- # Whitelist the characters, since int() allows a lot of bizarre stuff.
- if not self._DECIMAL_DIGITS.issuperset(octet_str):
- raise ValueError
- octet_int = int(octet_str, 10)
- # Disallow leading zeroes, because no clear standard exists on
- # whether these should be interpreted as decimal or octal.
- if octet_int > 255 or (octet_str[0] == '0' and len(octet_str) > 1):
- raise ValueError
- return octet_int
-
- def _string_from_ip_int(self, ip_int):
- """Turns a 32-bit integer into dotted decimal notation.
-
- Args:
- ip_int: An integer, the IP address.
-
- Returns:
- The IP address as a string in dotted decimal notation.
-
- """
- octets = []
- for _ in xrange(4):
- octets.insert(0, str(ip_int & 0xFF))
- ip_int >>= 8
- return '.'.join(octets)
-
- @property
- def max_prefixlen(self):
- return self._max_prefixlen
-
- @property
- def packed(self):
- """The binary representation of this address."""
- return v4_int_to_packed(self._ip)
-
- @property
- def version(self):
- return self._version
-
- @property
- def is_reserved(self):
- """Test if the address is otherwise IETF reserved.
-
- Returns:
- A boolean, True if the address is within the
- reserved IPv4 Network range.
-
- """
- return self in IPv4Network('240.0.0.0/4')
-
- @property
- def is_private(self):
- """Test if this address is allocated for private networks.
-
- Returns:
- A boolean, True if the address is reserved per RFC 1918.
-
- """
- return (self in IPv4Network('10.0.0.0/8') or
- self in IPv4Network('172.16.0.0/12') or
- self in IPv4Network('192.168.0.0/16'))
-
- @property
- def is_multicast(self):
- """Test if the address is reserved for multicast use.
-
- Returns:
- A boolean, True if the address is multicast.
- See RFC 3171 for details.
-
- """
- return self in IPv4Network('224.0.0.0/4')
-
- @property
- def is_unspecified(self):
- """Test if the address is unspecified.
-
- Returns:
- A boolean, True if this is the unspecified address as defined in
- RFC 5735 3.
-
- """
- return self in IPv4Network('0.0.0.0')
-
- @property
- def is_loopback(self):
- """Test if the address is a loopback address.
-
- Returns:
- A boolean, True if the address is a loopback per RFC 3330.
-
- """
- return self in IPv4Network('127.0.0.0/8')
-
- @property
- def is_link_local(self):
- """Test if the address is reserved for link-local.
-
- Returns:
- A boolean, True if the address is link-local per RFC 3927.
-
- """
- return self in IPv4Network('169.254.0.0/16')
-
-
-class IPv4Address(_BaseV4, _BaseIP):
-
- """Represent and manipulate single IPv4 Addresses."""
-
- def __init__(self, address):
-
- """
- Args:
- address: A string or integer representing the IP
- '192.168.1.1'
-
- Additionally, an integer can be passed, so
- IPv4Address('192.168.1.1') == IPv4Address(3232235777).
- or, more generally
- IPv4Address(int(IPv4Address('192.168.1.1'))) ==
- IPv4Address('192.168.1.1')
-
- Raises:
- AddressValueError: If ipaddr isn't a valid IPv4 address.
-
- """
- _BaseIP.__init__(self, address)
- _BaseV4.__init__(self, address)
-
- # Efficient constructor from integer.
- if isinstance(address, (int, long)):
- self._ip = address
- if address < 0 or address > self._ALL_ONES:
- raise AddressValueError(address)
- return
-
- # Constructing from a packed address
- if _compat_has_real_bytes:
- if isinstance(address, bytes) and len(address) == 4:
- self._ip = struct.unpack('!I', address)[0]
- return
-
- # Assume input argument to be string or any object representation
- # which converts into a formatted IP string.
- addr_str = str(address)
- self._ip = self._ip_int_from_string(addr_str)
-
-
-class IPv4Network(_BaseV4, _BaseNet):
-
- """This class represents and manipulates 32-bit IPv4 networks.
-
- Attributes: [examples for IPv4Network('1.2.3.4/27')]
- ._ip: 16909060
- .ip: IPv4Address('1.2.3.4')
- .network: IPv4Address('1.2.3.0')
- .hostmask: IPv4Address('0.0.0.31')
- .broadcast: IPv4Address('1.2.3.31')
- .netmask: IPv4Address('255.255.255.224')
- .prefixlen: 27
-
- """
-
- # the valid octets for host and netmasks. only useful for IPv4.
- _valid_mask_octets = set((255, 254, 252, 248, 240, 224, 192, 128, 0))
-
- def __init__(self, address, strict=False):
- """Instantiate a new IPv4 network object.
-
- Args:
- address: A string or integer representing the IP [& network].
- '192.168.1.1/24'
- '192.168.1.1/255.255.255.0'
- '192.168.1.1/0.0.0.255'
- are all functionally the same in IPv4. Similarly,
- '192.168.1.1'
- '192.168.1.1/255.255.255.255'
- '192.168.1.1/32'
- are also functionaly equivalent. That is to say, failing to
- provide a subnetmask will create an object with a mask of /32.
-
- If the mask (portion after the / in the argument) is given in
- dotted quad form, it is treated as a netmask if it starts with a
- non-zero field (e.g. /255.0.0.0 == /8) and as a hostmask if it
- starts with a zero field (e.g. 0.255.255.255 == /8), with the
- single exception of an all-zero mask which is treated as a
- netmask == /0. If no mask is given, a default of /32 is used.
-
- Additionally, an integer can be passed, so
- IPv4Network('192.168.1.1') == IPv4Network(3232235777).
- or, more generally
- IPv4Network(int(IPv4Network('192.168.1.1'))) ==
- IPv4Network('192.168.1.1')
-
- strict: A boolean. If true, ensure that we have been passed
- A true network address, eg, 192.168.1.0/24 and not an
- IP address on a network, eg, 192.168.1.1/24.
-
- Raises:
- AddressValueError: If ipaddr isn't a valid IPv4 address.
- NetmaskValueError: If the netmask isn't valid for
- an IPv4 address.
- ValueError: If strict was True and a network address was not
- supplied.
-
- """
- _BaseNet.__init__(self, address)
- _BaseV4.__init__(self, address)
-
- # Efficient constructor from integer.
- if isinstance(address, (int, long)):
- self._ip = address
- self.ip = IPv4Address(self._ip)
- self._prefixlen = self._max_prefixlen
- self.netmask = IPv4Address(self._ALL_ONES)
- if address < 0 or address > self._ALL_ONES:
- raise AddressValueError(address)
- return
-
- # Constructing from a packed address
- if _compat_has_real_bytes:
- if isinstance(address, bytes) and len(address) == 4:
- self._ip = struct.unpack('!I', address)[0]
- self.ip = IPv4Address(self._ip)
- self._prefixlen = self._max_prefixlen
- self.netmask = IPv4Address(self._ALL_ONES)
- return
-
- # Assume input argument to be string or any object representation
- # which converts into a formatted IP prefix string.
- addr = str(address).split('/')
-
- if len(addr) > 2:
- raise AddressValueError(address)
-
- self._ip = self._ip_int_from_string(addr[0])
- self.ip = IPv4Address(self._ip)
-
- if len(addr) == 2:
- mask = addr[1].split('.')
- if len(mask) == 4:
- # We have dotted decimal netmask.
- if self._is_valid_netmask(addr[1]):
- self.netmask = IPv4Address(self._ip_int_from_string(
- addr[1]))
- elif self._is_hostmask(addr[1]):
- self.netmask = IPv4Address(
- self._ip_int_from_string(addr[1]) ^ self._ALL_ONES)
- else:
- raise NetmaskValueError('%s is not a valid netmask'
- % addr[1])
-
- self._prefixlen = self._prefix_from_ip_int(int(self.netmask))
- else:
- # We have a netmask in prefix length form.
- if not self._is_valid_netmask(addr[1]):
- raise NetmaskValueError(addr[1])
- self._prefixlen = int(addr[1])
- self.netmask = IPv4Address(self._ip_int_from_prefix(
- self._prefixlen))
- else:
- self._prefixlen = self._max_prefixlen
- self.netmask = IPv4Address(self._ip_int_from_prefix(
- self._prefixlen))
- if strict:
- if self.ip != self.network:
- raise ValueError('%s has host bits set' %
- self.ip)
-
- def _is_hostmask(self, ip_str):
- """Test if the IP string is a hostmask (rather than a netmask).
-
- Args:
- ip_str: A string, the potential hostmask.
-
- Returns:
- A boolean, True if the IP string is a hostmask.
-
- """
- bits = ip_str.split('.')
- try:
- parts = [int(x) for x in bits if int(x) in self._valid_mask_octets]
- except ValueError:
- return False
- if len(parts) != len(bits):
- return False
- if parts[0] < parts[-1]:
- return True
- return False
-
- def _is_valid_netmask(self, netmask):
- """Verify that the netmask is valid.
-
- Args:
- netmask: A string, either a prefix or dotted decimal
- netmask.
-
- Returns:
- A boolean, True if the prefix represents a valid IPv4
- netmask.
-
- """
- mask = netmask.split('.')
- if len(mask) == 4:
- if [x for x in mask if int(x) not in self._valid_mask_octets]:
- return False
- if [y for idx, y in enumerate(mask) if idx > 0 and
- y > mask[idx - 1]]:
- return False
- return True
- try:
- netmask = int(netmask)
- except ValueError:
- return False
- return 0 <= netmask <= self._max_prefixlen
-
- # backwards compatibility
- IsRFC1918 = lambda self: self.is_private
- IsMulticast = lambda self: self.is_multicast
- IsLoopback = lambda self: self.is_loopback
- IsLinkLocal = lambda self: self.is_link_local
-
-
-class _BaseV6(object):
-
- """Base IPv6 object.
-
- The following methods are used by IPv6 objects in both single IP
- addresses and networks.
-
- """
-
- _ALL_ONES = (2**IPV6LENGTH) - 1
- _HEXTET_COUNT = 8
- _HEX_DIGITS = frozenset('0123456789ABCDEFabcdef')
-
- def __init__(self, address):
- self._version = 6
- self._max_prefixlen = IPV6LENGTH
-
- def _ip_int_from_string(self, ip_str):
- """Turn an IPv6 ip_str into an integer.
-
- Args:
- ip_str: A string, the IPv6 ip_str.
-
- Returns:
- A long, the IPv6 ip_str.
-
- Raises:
- AddressValueError: if ip_str isn't a valid IPv6 Address.
-
- """
- parts = ip_str.split(':')
-
- # An IPv6 address needs at least 2 colons (3 parts).
- if len(parts) < 3:
- raise AddressValueError(ip_str)
-
- # If the address has an IPv4-style suffix, convert it to hexadecimal.
- if '.' in parts[-1]:
- ipv4_int = IPv4Address(parts.pop())._ip
- parts.append('%x' % ((ipv4_int >> 16) & 0xFFFF))
- parts.append('%x' % (ipv4_int & 0xFFFF))
-
- # An IPv6 address can't have more than 8 colons (9 parts).
- if len(parts) > self._HEXTET_COUNT + 1:
- raise AddressValueError(ip_str)
-
- # Disregarding the endpoints, find '::' with nothing in between.
- # This indicates that a run of zeroes has been skipped.
- try:
- skip_index, = (
- [i for i in xrange(1, len(parts) - 1) if not parts[i]] or
- [None])
- except ValueError:
- # Can't have more than one '::'
- raise AddressValueError(ip_str)
-
- # parts_hi is the number of parts to copy from above/before the '::'
- # parts_lo is the number of parts to copy from below/after the '::'
- if skip_index is not None:
- # If we found a '::', then check if it also covers the endpoints.
- parts_hi = skip_index
- parts_lo = len(parts) - skip_index - 1
- if not parts[0]:
- parts_hi -= 1
- if parts_hi:
- raise AddressValueError(ip_str) # ^: requires ^::
- if not parts[-1]:
- parts_lo -= 1
- if parts_lo:
- raise AddressValueError(ip_str) # :$ requires ::$
- parts_skipped = self._HEXTET_COUNT - (parts_hi + parts_lo)
- if parts_skipped < 1:
- raise AddressValueError(ip_str)
- else:
- # Otherwise, allocate the entire address to parts_hi. The endpoints
- # could still be empty, but _parse_hextet() will check for that.
- if len(parts) != self._HEXTET_COUNT:
- raise AddressValueError(ip_str)
- parts_hi = len(parts)
- parts_lo = 0
- parts_skipped = 0
-
- try:
- # Now, parse the hextets into a 128-bit integer.
- ip_int = 0L
- for i in xrange(parts_hi):
- ip_int <<= 16
- ip_int |= self._parse_hextet(parts[i])
- ip_int <<= 16 * parts_skipped
- for i in xrange(-parts_lo, 0):
- ip_int <<= 16
- ip_int |= self._parse_hextet(parts[i])
- return ip_int
- except ValueError:
- raise AddressValueError(ip_str)
-
- def _parse_hextet(self, hextet_str):
- """Convert an IPv6 hextet string into an integer.
-
- Args:
- hextet_str: A string, the number to parse.
-
- Returns:
- The hextet as an integer.
-
- Raises:
- ValueError: if the input isn't strictly a hex number from [0..FFFF].
-
- """
- # Whitelist the characters, since int() allows a lot of bizarre stuff.
- if not self._HEX_DIGITS.issuperset(hextet_str):
- raise ValueError
- hextet_int = int(hextet_str, 16)
- if hextet_int > 0xFFFF:
- raise ValueError
- return hextet_int
-
- def _compress_hextets(self, hextets):
- """Compresses a list of hextets.
-
- Compresses a list of strings, replacing the longest continuous
- sequence of "0" in the list with "" and adding empty strings at
- the beginning or at the end of the string such that subsequently
- calling ":".join(hextets) will produce the compressed version of
- the IPv6 address.
-
- Args:
- hextets: A list of strings, the hextets to compress.
-
- Returns:
- A list of strings.
-
- """
- best_doublecolon_start = -1
- best_doublecolon_len = 0
- doublecolon_start = -1
- doublecolon_len = 0
- for index in range(len(hextets)):
- if hextets[index] == '0':
- doublecolon_len += 1
- if doublecolon_start == -1:
- # Start of a sequence of zeros.
- doublecolon_start = index
- if doublecolon_len > best_doublecolon_len:
- # This is the longest sequence of zeros so far.
- best_doublecolon_len = doublecolon_len
- best_doublecolon_start = doublecolon_start
- else:
- doublecolon_len = 0
- doublecolon_start = -1
-
- if best_doublecolon_len > 1:
- best_doublecolon_end = (best_doublecolon_start +
- best_doublecolon_len)
- # For zeros at the end of the address.
- if best_doublecolon_end == len(hextets):
- hextets += ['']
- hextets[best_doublecolon_start:best_doublecolon_end] = ['']
- # For zeros at the beginning of the address.
- if best_doublecolon_start == 0:
- hextets = [''] + hextets
-
- return hextets
-
- def _string_from_ip_int(self, ip_int=None):
- """Turns a 128-bit integer into hexadecimal notation.
-
- Args:
- ip_int: An integer, the IP address.
-
- Returns:
- A string, the hexadecimal representation of the address.
-
- Raises:
- ValueError: The address is bigger than 128 bits of all ones.
-
- """
- if not ip_int and ip_int != 0:
- ip_int = int(self._ip)
-
- if ip_int > self._ALL_ONES:
- raise ValueError('IPv6 address is too large')
-
- hex_str = '%032x' % ip_int
- hextets = []
- for x in range(0, 32, 4):
- hextets.append('%x' % int(hex_str[x:x+4], 16))
-
- hextets = self._compress_hextets(hextets)
- return ':'.join(hextets)
-
- def _explode_shorthand_ip_string(self, ip_str=None):
- """Expand a shortened IPv6 address.
-
- Args:
- ip_str: A string, the IPv6 address.
-
- Returns:
- A string, the expanded IPv6 address.
-
- """
- if not ip_str:
- ip_str = str(self)
- if isinstance(self, _BaseNet):
- ip_str = str(self.ip)
-
- ip_int = self._ip_int_from_string(ip_str)
- parts = []
- for i in xrange(self._HEXTET_COUNT):
- parts.append('%04x' % (ip_int & 0xFFFF))
- ip_int >>= 16
- parts.reverse()
- return ':'.join(parts)
-
- @property
- def max_prefixlen(self):
- return self._max_prefixlen
-
- @property
- def packed(self):
- """The binary representation of this address."""
- return v6_int_to_packed(self._ip)
-
- @property
- def version(self):
- return self._version
-
- @property
- def is_multicast(self):
- """Test if the address is reserved for multicast use.
-
- Returns:
- A boolean, True if the address is a multicast address.
- See RFC 2373 2.7 for details.
-
- """
- return self in IPv6Network('ff00::/8')
-
- @property
- def is_reserved(self):
- """Test if the address is otherwise IETF reserved.
-
- Returns:
- A boolean, True if the address is within one of the
- reserved IPv6 Network ranges.
-
- """
- return (self in IPv6Network('::/8') or
- self in IPv6Network('100::/8') or
- self in IPv6Network('200::/7') or
- self in IPv6Network('400::/6') or
- self in IPv6Network('800::/5') or
- self in IPv6Network('1000::/4') or
- self in IPv6Network('4000::/3') or
- self in IPv6Network('6000::/3') or
- self in IPv6Network('8000::/3') or
- self in IPv6Network('A000::/3') or
- self in IPv6Network('C000::/3') or
- self in IPv6Network('E000::/4') or
- self in IPv6Network('F000::/5') or
- self in IPv6Network('F800::/6') or
- self in IPv6Network('FE00::/9'))
-
- @property
- def is_unspecified(self):
- """Test if the address is unspecified.
-
- Returns:
- A boolean, True if this is the unspecified address as defined in
- RFC 2373 2.5.2.
-
- """
- return self._ip == 0 and getattr(self, '_prefixlen', 128) == 128
-
- @property
- def is_loopback(self):
- """Test if the address is a loopback address.
-
- Returns:
- A boolean, True if the address is a loopback address as defined in
- RFC 2373 2.5.3.
-
- """
- return self._ip == 1 and getattr(self, '_prefixlen', 128) == 128
-
- @property
- def is_link_local(self):
- """Test if the address is reserved for link-local.
-
- Returns:
- A boolean, True if the address is reserved per RFC 4291.
-
- """
- return self in IPv6Network('fe80::/10')
-
- @property
- def is_site_local(self):
- """Test if the address is reserved for site-local.
-
- Note that the site-local address space has been deprecated by RFC 3879.
- Use is_private to test if this address is in the space of unique local
- addresses as defined by RFC 4193.
-
- Returns:
- A boolean, True if the address is reserved per RFC 3513 2.5.6.
-
- """
- return self in IPv6Network('fec0::/10')
-
- @property
- def is_private(self):
- """Test if this address is allocated for private networks.
-
- Returns:
- A boolean, True if the address is reserved per RFC 4193.
-
- """
- return self in IPv6Network('fc00::/7')
-
- @property
- def ipv4_mapped(self):
- """Return the IPv4 mapped address.
-
- Returns:
- If the IPv6 address is a v4 mapped address, return the
- IPv4 mapped address. Return None otherwise.
-
- """
- if (self._ip >> 32) != 0xFFFF:
- return None
- return IPv4Address(self._ip & 0xFFFFFFFF)
-
- @property
- def teredo(self):
- """Tuple of embedded teredo IPs.
-
- Returns:
- Tuple of the (server, client) IPs or None if the address
- doesn't appear to be a teredo address (doesn't start with
- 2001::/32)
-
- """
- if (self._ip >> 96) != 0x20010000:
- return None
- return (IPv4Address((self._ip >> 64) & 0xFFFFFFFF),
- IPv4Address(~self._ip & 0xFFFFFFFF))
-
- @property
- def sixtofour(self):
- """Return the IPv4 6to4 embedded address.
-
- Returns:
- The IPv4 6to4-embedded address if present or None if the
- address doesn't appear to contain a 6to4 embedded address.
-
- """
- if (self._ip >> 112) != 0x2002:
- return None
- return IPv4Address((self._ip >> 80) & 0xFFFFFFFF)
-
-
-class IPv6Address(_BaseV6, _BaseIP):
-
- """Represent and manipulate single IPv6 Addresses.
- """
-
- def __init__(self, address):
- """Instantiate a new IPv6 address object.
-
- Args:
- address: A string or integer representing the IP
-
- Additionally, an integer can be passed, so
- IPv6Address('2001:4860::') ==
- IPv6Address(42541956101370907050197289607612071936L).
- or, more generally
- IPv6Address(IPv6Address('2001:4860::')._ip) ==
- IPv6Address('2001:4860::')
-
- Raises:
- AddressValueError: If address isn't a valid IPv6 address.
-
- """
- _BaseIP.__init__(self, address)
- _BaseV6.__init__(self, address)
-
- # Efficient constructor from integer.
- if isinstance(address, (int, long)):
- self._ip = address
- if address < 0 or address > self._ALL_ONES:
- raise AddressValueError(address)
- return
-
- # Constructing from a packed address
- if _compat_has_real_bytes:
- if isinstance(address, bytes) and len(address) == 16:
- tmp = struct.unpack('!QQ', address)
- self._ip = (tmp[0] << 64) | tmp[1]
- return
-
- # Assume input argument to be string or any object representation
- # which converts into a formatted IP string.
- addr_str = str(address)
- if not addr_str:
- raise AddressValueError('')
-
- self._ip = self._ip_int_from_string(addr_str)
-
-
-class IPv6Network(_BaseV6, _BaseNet):
-
- """This class represents and manipulates 128-bit IPv6 networks.
-
- Attributes: [examples for IPv6('2001:658:22A:CAFE:200::1/64')]
- .ip: IPv6Address('2001:658:22a:cafe:200::1')
- .network: IPv6Address('2001:658:22a:cafe::')
- .hostmask: IPv6Address('::ffff:ffff:ffff:ffff')
- .broadcast: IPv6Address('2001:658:22a:cafe:ffff:ffff:ffff:ffff')
- .netmask: IPv6Address('ffff:ffff:ffff:ffff::')
- .prefixlen: 64
-
- """
-
-
- def __init__(self, address, strict=False):
- """Instantiate a new IPv6 Network object.
-
- Args:
- address: A string or integer representing the IPv6 network or the IP
- and prefix/netmask.
- '2001:4860::/128'
- '2001:4860:0000:0000:0000:0000:0000:0000/128'
- '2001:4860::'
- are all functionally the same in IPv6. That is to say,
- failing to provide a subnetmask will create an object with
- a mask of /128.
-
- Additionally, an integer can be passed, so
- IPv6Network('2001:4860::') ==
- IPv6Network(42541956101370907050197289607612071936L).
- or, more generally
- IPv6Network(IPv6Network('2001:4860::')._ip) ==
- IPv6Network('2001:4860::')
-
- strict: A boolean. If true, ensure that we have been passed
- A true network address, eg, 192.168.1.0/24 and not an
- IP address on a network, eg, 192.168.1.1/24.
-
- Raises:
- AddressValueError: If address isn't a valid IPv6 address.
- NetmaskValueError: If the netmask isn't valid for
- an IPv6 address.
- ValueError: If strict was True and a network address was not
- supplied.
-
- """
- _BaseNet.__init__(self, address)
- _BaseV6.__init__(self, address)
-
- # Efficient constructor from integer.
- if isinstance(address, (int, long)):
- self._ip = address
- self.ip = IPv6Address(self._ip)
- self._prefixlen = self._max_prefixlen
- self.netmask = IPv6Address(self._ALL_ONES)
- if address < 0 or address > self._ALL_ONES:
- raise AddressValueError(address)
- return
-
- # Constructing from a packed address
- if _compat_has_real_bytes:
- if isinstance(address, bytes) and len(address) == 16:
- tmp = struct.unpack('!QQ', address)
- self._ip = (tmp[0] << 64) | tmp[1]
- self.ip = IPv6Address(self._ip)
- self._prefixlen = self._max_prefixlen
- self.netmask = IPv6Address(self._ALL_ONES)
- return
-
- # Assume input argument to be string or any object representation
- # which converts into a formatted IP prefix string.
- addr = str(address).split('/')
-
- if len(addr) > 2:
- raise AddressValueError(address)
-
- self._ip = self._ip_int_from_string(addr[0])
- self.ip = IPv6Address(self._ip)
-
- if len(addr) == 2:
- if self._is_valid_netmask(addr[1]):
- self._prefixlen = int(addr[1])
- else:
- raise NetmaskValueError(addr[1])
- else:
- self._prefixlen = self._max_prefixlen
-
- self.netmask = IPv6Address(self._ip_int_from_prefix(self._prefixlen))
-
- if strict:
- if self.ip != self.network:
- raise ValueError('%s has host bits set' %
- self.ip)
-
- def _is_valid_netmask(self, prefixlen):
- """Verify that the netmask/prefixlen is valid.
-
- Args:
- prefixlen: A string, the netmask in prefix length format.
-
- Returns:
- A boolean, True if the prefix represents a valid IPv6
- netmask.
-
- """
- try:
- prefixlen = int(prefixlen)
- except ValueError:
- return False
- return 0 <= prefixlen <= self._max_prefixlen
-
- @property
- def with_netmask(self):
- return self.with_prefixlen
diff --git a/contrib/ipaddr-py/ipaddr_test.py b/contrib/ipaddr-py/ipaddr_test.py
deleted file mode 100755
index 09bece0e751..00000000000
--- a/contrib/ipaddr-py/ipaddr_test.py
+++ /dev/null
@@ -1,1099 +0,0 @@
-#!/usr/bin/python
-#
-# Copyright 2007 Google Inc.
-# Licensed to PSF under a Contributor Agreement.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Unittest for ipaddr module."""
-
-
-import unittest
-import time
-import ipaddr
-
-# Compatibility function to cast str to bytes objects
-if ipaddr._compat_has_real_bytes:
- _cb = lambda bytestr: bytes(bytestr, 'charmap')
-else:
- _cb = str
-
-class IpaddrUnitTest(unittest.TestCase):
-
- def setUp(self):
- self.ipv4 = ipaddr.IPv4Network('1.2.3.4/24')
- self.ipv4_hostmask = ipaddr.IPv4Network('10.0.0.1/0.255.255.255')
- self.ipv6 = ipaddr.IPv6Network('2001:658:22a:cafe:200:0:0:1/64')
-
- def tearDown(self):
- del(self.ipv4)
- del(self.ipv4_hostmask)
- del(self.ipv6)
- del(self)
-
- def testRepr(self):
- self.assertEqual("IPv4Network('1.2.3.4/32')",
- repr(ipaddr.IPv4Network('1.2.3.4')))
- self.assertEqual("IPv6Network('::1/128')",
- repr(ipaddr.IPv6Network('::1')))
-
- def testAutoMasking(self):
- addr1 = ipaddr.IPv4Network('1.1.1.255/24')
- addr1_masked = ipaddr.IPv4Network('1.1.1.0/24')
- self.assertEqual(addr1_masked, addr1.masked())
-
- addr2 = ipaddr.IPv6Network('2000:cafe::efac:100/96')
- addr2_masked = ipaddr.IPv6Network('2000:cafe::/96')
- self.assertEqual(addr2_masked, addr2.masked())
-
- # issue57
- def testAddressIntMath(self):
- self.assertEqual(ipaddr.IPv4Address('1.1.1.1') + 255,
- ipaddr.IPv4Address('1.1.2.0'))
- self.assertEqual(ipaddr.IPv4Address('1.1.1.1') - 256,
- ipaddr.IPv4Address('1.1.0.1'))
- self.assertEqual(ipaddr.IPv6Address('::1') + (2**16 - 2),
- ipaddr.IPv6Address('::ffff'))
- self.assertEqual(ipaddr.IPv6Address('::ffff') - (2**16 - 2),
- ipaddr.IPv6Address('::1'))
-
- def testInvalidStrings(self):
- def AssertInvalidIP(ip_str):
- self.assertRaises(ValueError, ipaddr.IPAddress, ip_str)
- AssertInvalidIP("")
- AssertInvalidIP("016.016.016.016")
- AssertInvalidIP("016.016.016")
- AssertInvalidIP("016.016")
- AssertInvalidIP("016")
- AssertInvalidIP("000.000.000.000")
- AssertInvalidIP("000")
- AssertInvalidIP("0x0a.0x0a.0x0a.0x0a")
- AssertInvalidIP("0x0a.0x0a.0x0a")
- AssertInvalidIP("0x0a.0x0a")
- AssertInvalidIP("0x0a")
- AssertInvalidIP("42.42.42.42.42")
- AssertInvalidIP("42.42.42")
- AssertInvalidIP("42.42")
- AssertInvalidIP("42")
- AssertInvalidIP("42..42.42")
- AssertInvalidIP("42..42.42.42")
- AssertInvalidIP("42.42.42.42.")
- AssertInvalidIP("42.42.42.42...")
- AssertInvalidIP(".42.42.42.42")
- AssertInvalidIP("...42.42.42.42")
- AssertInvalidIP("42.42.42.-0")
- AssertInvalidIP("42.42.42.+0")
- AssertInvalidIP(".")
- AssertInvalidIP("...")
- AssertInvalidIP("bogus")
- AssertInvalidIP("bogus.com")
- AssertInvalidIP("192.168.0.1.com")
- AssertInvalidIP("12345.67899.-54321.-98765")
- AssertInvalidIP("257.0.0.0")
- AssertInvalidIP("42.42.42.-42")
- AssertInvalidIP("3ffe::1.net")
- AssertInvalidIP("3ffe::1::1")
- AssertInvalidIP("1::2::3::4:5")
- AssertInvalidIP("::7:6:5:4:3:2:")
- AssertInvalidIP(":6:5:4:3:2:1::")
- AssertInvalidIP("2001::db:::1")
- AssertInvalidIP("FEDC:9878")
- AssertInvalidIP("+1.+2.+3.4")
- AssertInvalidIP("1.2.3.4e0")
- AssertInvalidIP("::7:6:5:4:3:2:1:0")
- AssertInvalidIP("7:6:5:4:3:2:1:0::")
- AssertInvalidIP("9:8:7:6:5:4:3::2:1")
- AssertInvalidIP("0:1:2:3::4:5:6:7")
- AssertInvalidIP("3ffe:0:0:0:0:0:0:0:1")
- AssertInvalidIP("3ffe::10000")
- AssertInvalidIP("3ffe::goog")
- AssertInvalidIP("3ffe::-0")
- AssertInvalidIP("3ffe::+0")
- AssertInvalidIP("3ffe::-1")
- AssertInvalidIP(":")
- AssertInvalidIP(":::")
- AssertInvalidIP("::1.2.3")
- AssertInvalidIP("::1.2.3.4.5")
- AssertInvalidIP("::1.2.3.4:")
- AssertInvalidIP("1.2.3.4::")
- AssertInvalidIP("2001:db8::1:")
- AssertInvalidIP(":2001:db8::1")
- AssertInvalidIP(":1:2:3:4:5:6:7")
- AssertInvalidIP("1:2:3:4:5:6:7:")
- AssertInvalidIP(":1:2:3:4:5:6:")
-
- self.assertRaises(ipaddr.AddressValueError, ipaddr.IPv4Network, '')
- self.assertRaises(ipaddr.AddressValueError, ipaddr.IPv4Network,
- 'google.com')
- self.assertRaises(ipaddr.AddressValueError, ipaddr.IPv4Network,
- '::1.2.3.4')
- self.assertRaises(ipaddr.AddressValueError, ipaddr.IPv6Network, '')
- self.assertRaises(ipaddr.AddressValueError, ipaddr.IPv6Network,
- 'google.com')
- self.assertRaises(ipaddr.AddressValueError, ipaddr.IPv6Network,
- '1.2.3.4')
- self.assertRaises(ipaddr.AddressValueError, ipaddr.IPv6Network,
- 'cafe:cafe::/128/190')
- self.assertRaises(ipaddr.AddressValueError, ipaddr.IPv6Network,
- '1234:axy::b')
- self.assertRaises(ipaddr.AddressValueError, ipaddr.IPv6Address,
- '1234:axy::b')
- self.assertRaises(ipaddr.AddressValueError, ipaddr.IPv6Address,
- '2001:db8:::1')
- self.assertRaises(ipaddr.AddressValueError, ipaddr.IPv6Address,
- '2001:888888::1')
- self.assertRaises(ipaddr.AddressValueError,
- ipaddr.IPv4Address(1)._ip_int_from_string,
- '1.a.2.3')
- self.assertEqual(False, ipaddr.IPv4Network(1)._is_hostmask('1.a.2.3'))
-
- def testGetNetwork(self):
- self.assertEqual(int(self.ipv4.network), 16909056)
- self.assertEqual(str(self.ipv4.network), '1.2.3.0')
- self.assertEqual(str(self.ipv4_hostmask.network), '10.0.0.0')
-
- self.assertEqual(int(self.ipv6.network),
- 42540616829182469433403647294022090752)
- self.assertEqual(str(self.ipv6.network),
- '2001:658:22a:cafe::')
- self.assertEqual(str(self.ipv6.hostmask),
- '::ffff:ffff:ffff:ffff')
-
- def testBadVersionComparison(self):
- # These should always raise TypeError
- v4addr = ipaddr.IPAddress('1.1.1.1')
- v4net = ipaddr.IPNetwork('1.1.1.1')
- v6addr = ipaddr.IPAddress('::1')
- v6net = ipaddr.IPAddress('::1')
-
- self.assertRaises(TypeError, v4addr.__lt__, v6addr)
- self.assertRaises(TypeError, v4addr.__gt__, v6addr)
- self.assertRaises(TypeError, v4net.__lt__, v6net)
- self.assertRaises(TypeError, v4net.__gt__, v6net)
-
- self.assertRaises(TypeError, v6addr.__lt__, v4addr)
- self.assertRaises(TypeError, v6addr.__gt__, v4addr)
- self.assertRaises(TypeError, v6net.__lt__, v4net)
- self.assertRaises(TypeError, v6net.__gt__, v4net)
-
- def testMixedTypeComparison(self):
- v4addr = ipaddr.IPAddress('1.1.1.1')
- v4net = ipaddr.IPNetwork('1.1.1.1/32')
- v6addr = ipaddr.IPAddress('::1')
- v6net = ipaddr.IPNetwork('::1/128')
-
- self.assertFalse(v4net.__contains__(v6net))
- self.assertFalse(v6net.__contains__(v4net))
-
- self.assertRaises(TypeError, lambda: v4addr < v4net)
- self.assertRaises(TypeError, lambda: v4addr > v4net)
- self.assertRaises(TypeError, lambda: v4net < v4addr)
- self.assertRaises(TypeError, lambda: v4net > v4addr)
-
- self.assertRaises(TypeError, lambda: v6addr < v6net)
- self.assertRaises(TypeError, lambda: v6addr > v6net)
- self.assertRaises(TypeError, lambda: v6net < v6addr)
- self.assertRaises(TypeError, lambda: v6net > v6addr)
-
- # with get_mixed_type_key, you can sort addresses and network.
- self.assertEqual([v4addr, v4net], sorted([v4net, v4addr],
- key=ipaddr.get_mixed_type_key))
- self.assertEqual([v6addr, v6net], sorted([v6net, v6addr],
- key=ipaddr.get_mixed_type_key))
-
- def testIpFromInt(self):
- self.assertEqual(self.ipv4.ip, ipaddr.IPv4Network(16909060).ip)
- self.assertRaises(ipaddr.AddressValueError,
- ipaddr.IPv4Network, 2**32)
- self.assertRaises(ipaddr.AddressValueError,
- ipaddr.IPv4Network, -1)
-
- ipv4 = ipaddr.IPNetwork('1.2.3.4')
- ipv6 = ipaddr.IPNetwork('2001:658:22a:cafe:200:0:0:1')
- self.assertEqual(ipv4, ipaddr.IPNetwork(int(ipv4)))
- self.assertEqual(ipv6, ipaddr.IPNetwork(int(ipv6)))
-
- v6_int = 42540616829182469433547762482097946625
- self.assertEqual(self.ipv6.ip, ipaddr.IPv6Network(v6_int).ip)
- self.assertRaises(ipaddr.AddressValueError,
- ipaddr.IPv6Network, 2**128)
- self.assertRaises(ipaddr.AddressValueError,
- ipaddr.IPv6Network, -1)
-
- self.assertEqual(ipaddr.IPNetwork(self.ipv4.ip).version, 4)
- self.assertEqual(ipaddr.IPNetwork(self.ipv6.ip).version, 6)
-
- if ipaddr._compat_has_real_bytes: # on python3+
- def testIpFromPacked(self):
- ip = ipaddr.IPNetwork
-
- self.assertEqual(self.ipv4.ip,
- ip(_cb('\x01\x02\x03\x04')).ip)
- self.assertEqual(ip('255.254.253.252'),
- ip(_cb('\xff\xfe\xfd\xfc')))
- self.assertRaises(ValueError, ipaddr.IPNetwork, _cb('\x00' * 3))
- self.assertRaises(ValueError, ipaddr.IPNetwork, _cb('\x00' * 5))
- self.assertEqual(self.ipv6.ip,
- ip(_cb('\x20\x01\x06\x58\x02\x2a\xca\xfe'
- '\x02\x00\x00\x00\x00\x00\x00\x01')).ip)
- self.assertEqual(ip('ffff:2:3:4:ffff::'),
- ip(_cb('\xff\xff\x00\x02\x00\x03\x00\x04' +
- '\xff\xff' + '\x00' * 6)))
- self.assertEqual(ip('::'),
- ip(_cb('\x00' * 16)))
- self.assertRaises(ValueError, ip, _cb('\x00' * 15))
- self.assertRaises(ValueError, ip, _cb('\x00' * 17))
-
- def testGetIp(self):
- self.assertEqual(int(self.ipv4.ip), 16909060)
- self.assertEqual(str(self.ipv4.ip), '1.2.3.4')
- self.assertEqual(str(self.ipv4_hostmask.ip), '10.0.0.1')
-
- self.assertEqual(int(self.ipv6.ip),
- 42540616829182469433547762482097946625)
- self.assertEqual(str(self.ipv6.ip),
- '2001:658:22a:cafe:200::1')
-
- def testGetNetmask(self):
- self.assertEqual(int(self.ipv4.netmask), 4294967040L)
- self.assertEqual(str(self.ipv4.netmask), '255.255.255.0')
- self.assertEqual(str(self.ipv4_hostmask.netmask), '255.0.0.0')
- self.assertEqual(int(self.ipv6.netmask),
- 340282366920938463444927863358058659840)
- self.assertEqual(self.ipv6.prefixlen, 64)
-
- def testZeroNetmask(self):
- ipv4_zero_netmask = ipaddr.IPv4Network('1.2.3.4/0')
- self.assertEqual(int(ipv4_zero_netmask.netmask), 0)
- self.assertTrue(ipv4_zero_netmask._is_valid_netmask(str(0)))
-
- ipv6_zero_netmask = ipaddr.IPv6Network('::1/0')
- self.assertEqual(int(ipv6_zero_netmask.netmask), 0)
- self.assertTrue(ipv6_zero_netmask._is_valid_netmask(str(0)))
-
- def testGetBroadcast(self):
- self.assertEqual(int(self.ipv4.broadcast), 16909311L)
- self.assertEqual(str(self.ipv4.broadcast), '1.2.3.255')
-
- self.assertEqual(int(self.ipv6.broadcast),
- 42540616829182469451850391367731642367)
- self.assertEqual(str(self.ipv6.broadcast),
- '2001:658:22a:cafe:ffff:ffff:ffff:ffff')
-
- def testGetPrefixlen(self):
- self.assertEqual(self.ipv4.prefixlen, 24)
-
- self.assertEqual(self.ipv6.prefixlen, 64)
-
- def testGetSupernet(self):
- self.assertEqual(self.ipv4.supernet().prefixlen, 23)
- self.assertEqual(str(self.ipv4.supernet().network), '1.2.2.0')
- self.assertEqual(ipaddr.IPv4Network('0.0.0.0/0').supernet(),
- ipaddr.IPv4Network('0.0.0.0/0'))
-
- self.assertEqual(self.ipv6.supernet().prefixlen, 63)
- self.assertEqual(str(self.ipv6.supernet().network),
- '2001:658:22a:cafe::')
- self.assertEqual(ipaddr.IPv6Network('::0/0').supernet(),
- ipaddr.IPv6Network('::0/0'))
-
- def testGetSupernet3(self):
- self.assertEqual(self.ipv4.supernet(3).prefixlen, 21)
- self.assertEqual(str(self.ipv4.supernet(3).network), '1.2.0.0')
-
- self.assertEqual(self.ipv6.supernet(3).prefixlen, 61)
- self.assertEqual(str(self.ipv6.supernet(3).network),
- '2001:658:22a:caf8::')
-
- def testGetSupernet4(self):
- self.assertRaises(ValueError, self.ipv4.supernet, prefixlen_diff=2,
- new_prefix=1)
- self.assertRaises(ValueError, self.ipv4.supernet, new_prefix=25)
- self.assertEqual(self.ipv4.supernet(prefixlen_diff=2),
- self.ipv4.supernet(new_prefix=22))
-
- self.assertRaises(ValueError, self.ipv6.supernet, prefixlen_diff=2,
- new_prefix=1)
- self.assertRaises(ValueError, self.ipv6.supernet, new_prefix=65)
- self.assertEqual(self.ipv6.supernet(prefixlen_diff=2),
- self.ipv6.supernet(new_prefix=62))
-
- def testIterSubnets(self):
- self.assertEqual(self.ipv4.subnet(), list(self.ipv4.iter_subnets()))
- self.assertEqual(self.ipv6.subnet(), list(self.ipv6.iter_subnets()))
-
- def testFancySubnetting(self):
- self.assertEqual(sorted(self.ipv4.subnet(prefixlen_diff=3)),
- sorted(self.ipv4.subnet(new_prefix=27)))
- self.assertRaises(ValueError, self.ipv4.subnet, new_prefix=23)
- self.assertRaises(ValueError, self.ipv4.subnet,
- prefixlen_diff=3, new_prefix=27)
- self.assertEqual(sorted(self.ipv6.subnet(prefixlen_diff=4)),
- sorted(self.ipv6.subnet(new_prefix=68)))
- self.assertRaises(ValueError, self.ipv6.subnet, new_prefix=63)
- self.assertRaises(ValueError, self.ipv6.subnet,
- prefixlen_diff=4, new_prefix=68)
-
- def testGetSubnet(self):
- self.assertEqual(self.ipv4.subnet()[0].prefixlen, 25)
- self.assertEqual(str(self.ipv4.subnet()[0].network), '1.2.3.0')
- self.assertEqual(str(self.ipv4.subnet()[1].network), '1.2.3.128')
-
- self.assertEqual(self.ipv6.subnet()[0].prefixlen, 65)
-
- def testGetSubnetForSingle32(self):
- ip = ipaddr.IPv4Network('1.2.3.4/32')
- subnets1 = [str(x) for x in ip.subnet()]
- subnets2 = [str(x) for x in ip.subnet(2)]
- self.assertEqual(subnets1, ['1.2.3.4/32'])
- self.assertEqual(subnets1, subnets2)
-
- def testGetSubnetForSingle128(self):
- ip = ipaddr.IPv6Network('::1/128')
- subnets1 = [str(x) for x in ip.subnet()]
- subnets2 = [str(x) for x in ip.subnet(2)]
- self.assertEqual(subnets1, ['::1/128'])
- self.assertEqual(subnets1, subnets2)
-
- def testSubnet2(self):
- ips = [str(x) for x in self.ipv4.subnet(2)]
- self.assertEqual(
- ips,
- ['1.2.3.0/26', '1.2.3.64/26', '1.2.3.128/26', '1.2.3.192/26'])
-
- ipsv6 = [str(x) for x in self.ipv6.subnet(2)]
- self.assertEqual(
- ipsv6,
- ['2001:658:22a:cafe::/66',
- '2001:658:22a:cafe:4000::/66',
- '2001:658:22a:cafe:8000::/66',
- '2001:658:22a:cafe:c000::/66'])
-
- def testSubnetFailsForLargeCidrDiff(self):
- self.assertRaises(ValueError, self.ipv4.subnet, 9)
- self.assertRaises(ValueError, self.ipv6.subnet, 65)
-
- def testSupernetFailsForLargeCidrDiff(self):
- self.assertRaises(ValueError, self.ipv4.supernet, 25)
- self.assertRaises(ValueError, self.ipv6.supernet, 65)
-
- def testSubnetFailsForNegativeCidrDiff(self):
- self.assertRaises(ValueError, self.ipv4.subnet, -1)
- self.assertRaises(ValueError, self.ipv6.subnet, -1)
-
- def testGetNumHosts(self):
- self.assertEqual(self.ipv4.numhosts, 256)
- self.assertEqual(self.ipv4.subnet()[0].numhosts, 128)
- self.assertEqual(self.ipv4.supernet().numhosts, 512)
-
- self.assertEqual(self.ipv6.numhosts, 18446744073709551616)
- self.assertEqual(self.ipv6.subnet()[0].numhosts, 9223372036854775808)
- self.assertEqual(self.ipv6.supernet().numhosts, 36893488147419103232)
-
- def testContains(self):
- self.assertTrue(ipaddr.IPv4Network('1.2.3.128/25') in self.ipv4)
- self.assertFalse(ipaddr.IPv4Network('1.2.4.1/24') in self.ipv4)
- self.assertTrue(self.ipv4 in self.ipv4)
- self.assertTrue(self.ipv6 in self.ipv6)
- # We can test addresses and string as well.
- addr1 = ipaddr.IPv4Address('1.2.3.37')
- self.assertTrue(addr1 in self.ipv4)
- # issue 61, bad network comparison on like-ip'd network objects
- # with identical broadcast addresses.
- self.assertFalse(ipaddr.IPv4Network('1.1.0.0/16').__contains__(
- ipaddr.IPv4Network('1.0.0.0/15')))
-
- def testBadAddress(self):
- self.assertRaises(ipaddr.AddressValueError, ipaddr.IPv4Network,
- 'poop')
- self.assertRaises(ipaddr.AddressValueError,
- ipaddr.IPv4Network, '1.2.3.256')
-
- self.assertRaises(ipaddr.AddressValueError, ipaddr.IPv6Network,
- 'poopv6')
- self.assertRaises(ipaddr.AddressValueError,
- ipaddr.IPv4Network, '1.2.3.4/32/24')
- self.assertRaises(ipaddr.AddressValueError,
- ipaddr.IPv4Network, '10/8')
- self.assertRaises(ipaddr.AddressValueError,
- ipaddr.IPv6Network, '10/8')
-
-
- def testBadNetMask(self):
- self.assertRaises(ipaddr.NetmaskValueError,
- ipaddr.IPv4Network, '1.2.3.4/')
- self.assertRaises(ipaddr.NetmaskValueError,
- ipaddr.IPv4Network, '1.2.3.4/33')
- self.assertRaises(ipaddr.NetmaskValueError,
- ipaddr.IPv4Network, '1.2.3.4/254.254.255.256')
- self.assertRaises(ipaddr.NetmaskValueError,
- ipaddr.IPv4Network, '1.1.1.1/240.255.0.0')
- self.assertRaises(ipaddr.NetmaskValueError,
- ipaddr.IPv6Network, '::1/')
- self.assertRaises(ipaddr.NetmaskValueError,
- ipaddr.IPv6Network, '::1/129')
-
- def testNth(self):
- self.assertEqual(str(self.ipv4[5]), '1.2.3.5')
- self.assertRaises(IndexError, self.ipv4.__getitem__, 256)
-
- self.assertEqual(str(self.ipv6[5]),
- '2001:658:22a:cafe::5')
-
- def testGetitem(self):
- # http://code.google.com/p/ipaddr-py/issues/detail?id=15
- addr = ipaddr.IPv4Network('172.31.255.128/255.255.255.240')
- self.assertEqual(28, addr.prefixlen)
- addr_list = list(addr)
- self.assertEqual('172.31.255.128', str(addr_list[0]))
- self.assertEqual('172.31.255.128', str(addr[0]))
- self.assertEqual('172.31.255.143', str(addr_list[-1]))
- self.assertEqual('172.31.255.143', str(addr[-1]))
- self.assertEqual(addr_list[-1], addr[-1])
-
- def testEqual(self):
- self.assertTrue(self.ipv4 == ipaddr.IPv4Network('1.2.3.4/24'))
- self.assertFalse(self.ipv4 == ipaddr.IPv4Network('1.2.3.4/23'))
- self.assertFalse(self.ipv4 == ipaddr.IPv6Network('::1.2.3.4/24'))
- self.assertFalse(self.ipv4 == '')
- self.assertFalse(self.ipv4 == [])
- self.assertFalse(self.ipv4 == 2)
- self.assertTrue(ipaddr.IPNetwork('1.1.1.1/32') ==
- ipaddr.IPAddress('1.1.1.1'))
- self.assertTrue(ipaddr.IPNetwork('1.1.1.1/24') ==
- ipaddr.IPAddress('1.1.1.1'))
- self.assertFalse(ipaddr.IPNetwork('1.1.1.0/24') ==
- ipaddr.IPAddress('1.1.1.1'))
-
- self.assertTrue(self.ipv6 ==
- ipaddr.IPv6Network('2001:658:22a:cafe:200::1/64'))
- self.assertTrue(ipaddr.IPNetwork('::1/128') ==
- ipaddr.IPAddress('::1'))
- self.assertTrue(ipaddr.IPNetwork('::1/127') ==
- ipaddr.IPAddress('::1'))
- self.assertFalse(ipaddr.IPNetwork('::0/127') ==
- ipaddr.IPAddress('::1'))
- self.assertFalse(self.ipv6 ==
- ipaddr.IPv6Network('2001:658:22a:cafe:200::1/63'))
- self.assertFalse(self.ipv6 == ipaddr.IPv4Network('1.2.3.4/23'))
- self.assertFalse(self.ipv6 == '')
- self.assertFalse(self.ipv6 == [])
- self.assertFalse(self.ipv6 == 2)
-
- def testNotEqual(self):
- self.assertFalse(self.ipv4 != ipaddr.IPv4Network('1.2.3.4/24'))
- self.assertTrue(self.ipv4 != ipaddr.IPv4Network('1.2.3.4/23'))
- self.assertTrue(self.ipv4 != ipaddr.IPv6Network('::1.2.3.4/24'))
- self.assertTrue(self.ipv4 != '')
- self.assertTrue(self.ipv4 != [])
- self.assertTrue(self.ipv4 != 2)
-
- addr2 = ipaddr.IPAddress('2001:658:22a:cafe:200::1')
- self.assertFalse(self.ipv6 !=
- ipaddr.IPv6Network('2001:658:22a:cafe:200::1/64'))
- self.assertTrue(self.ipv6 !=
- ipaddr.IPv6Network('2001:658:22a:cafe:200::1/63'))
- self.assertTrue(self.ipv6 != ipaddr.IPv4Network('1.2.3.4/23'))
- self.assertTrue(self.ipv6 != '')
- self.assertTrue(self.ipv6 != [])
- self.assertTrue(self.ipv6 != 2)
-
- def testSlash32Constructor(self):
- self.assertEqual(str(ipaddr.IPv4Network('1.2.3.4/255.255.255.255')),
- '1.2.3.4/32')
-
- def testSlash128Constructor(self):
- self.assertEqual(str(ipaddr.IPv6Network('::1/128')),
- '::1/128')
-
- def testSlash0Constructor(self):
- self.assertEqual(str(ipaddr.IPv4Network('1.2.3.4/0.0.0.0')),
- '1.2.3.4/0')
-
- def testCollapsing(self):
- # test only IP addresses including some duplicates
- ip1 = ipaddr.IPv4Address('1.1.1.0')
- ip2 = ipaddr.IPv4Address('1.1.1.1')
- ip3 = ipaddr.IPv4Address('1.1.1.2')
- ip4 = ipaddr.IPv4Address('1.1.1.3')
- ip5 = ipaddr.IPv4Address('1.1.1.4')
- ip6 = ipaddr.IPv4Address('1.1.1.0')
- # check that addreses are subsumed properly.
- collapsed = ipaddr.collapse_address_list([ip1, ip2, ip3, ip4, ip5, ip6])
- self.assertEqual(collapsed, [ipaddr.IPv4Network('1.1.1.0/30'),
- ipaddr.IPv4Network('1.1.1.4/32')])
-
- # test a mix of IP addresses and networks including some duplicates
- ip1 = ipaddr.IPv4Address('1.1.1.0')
- ip2 = ipaddr.IPv4Address('1.1.1.1')
- ip3 = ipaddr.IPv4Address('1.1.1.2')
- ip4 = ipaddr.IPv4Address('1.1.1.3')
- ip5 = ipaddr.IPv4Network('1.1.1.4/30')
- ip6 = ipaddr.IPv4Network('1.1.1.4/30')
- # check that addreses are subsumed properly.
- collapsed = ipaddr.collapse_address_list([ip5, ip1, ip2, ip3, ip4, ip6])
- self.assertEqual(collapsed, [ipaddr.IPv4Network('1.1.1.0/29')])
-
- # test only IP networks
- ip1 = ipaddr.IPv4Network('1.1.0.0/24')
- ip2 = ipaddr.IPv4Network('1.1.1.0/24')
- ip3 = ipaddr.IPv4Network('1.1.2.0/24')
- ip4 = ipaddr.IPv4Network('1.1.3.0/24')
- ip5 = ipaddr.IPv4Network('1.1.4.0/24')
- # stored in no particular order b/c we want CollapseAddr to call [].sort
- ip6 = ipaddr.IPv4Network('1.1.0.0/22')
- # check that addreses are subsumed properly.
- collapsed = ipaddr.collapse_address_list([ip1, ip2, ip3, ip4, ip5, ip6])
- self.assertEqual(collapsed, [ipaddr.IPv4Network('1.1.0.0/22'),
- ipaddr.IPv4Network('1.1.4.0/24')])
-
- # test that two addresses are supernet'ed properly
- collapsed = ipaddr.collapse_address_list([ip1, ip2])
- self.assertEqual(collapsed, [ipaddr.IPv4Network('1.1.0.0/23')])
-
- # test same IP networks
- ip_same1 = ip_same2 = ipaddr.IPv4Network('1.1.1.1/32')
- self.assertEqual(ipaddr.collapse_address_list([ip_same1, ip_same2]),
- [ip_same1])
-
- # test same IP addresses
- ip_same1 = ip_same2 = ipaddr.IPv4Address('1.1.1.1')
- self.assertEqual(ipaddr.collapse_address_list([ip_same1, ip_same2]),
- [ipaddr.IPNetwork('1.1.1.1/32')])
- ip1 = ipaddr.IPv6Network('::2001:1/100')
- ip2 = ipaddr.IPv6Network('::2002:1/120')
- ip3 = ipaddr.IPv6Network('::2001:1/96')
- # test that ipv6 addresses are subsumed properly.
- collapsed = ipaddr.collapse_address_list([ip1, ip2, ip3])
- self.assertEqual(collapsed, [ip3])
-
- # the toejam test
- ip1 = ipaddr.IPAddress('1.1.1.1')
- ip2 = ipaddr.IPAddress('::1')
- self.assertRaises(TypeError, ipaddr.collapse_address_list,
- [ip1, ip2])
-
- def testSummarizing(self):
- #ip = ipaddr.IPAddress
- #ipnet = ipaddr.IPNetwork
- summarize = ipaddr.summarize_address_range
- ip1 = ipaddr.IPAddress('1.1.1.0')
- ip2 = ipaddr.IPAddress('1.1.1.255')
- # test a /24 is sumamrized properly
- self.assertEqual(summarize(ip1, ip2)[0], ipaddr.IPNetwork('1.1.1.0/24'))
- # test an IPv4 range that isn't on a network byte boundary
- ip2 = ipaddr.IPAddress('1.1.1.8')
- self.assertEqual(summarize(ip1, ip2), [ipaddr.IPNetwork('1.1.1.0/29'),
- ipaddr.IPNetwork('1.1.1.8')])
-
- ip1 = ipaddr.IPAddress('1::')
- ip2 = ipaddr.IPAddress('1:ffff:ffff:ffff:ffff:ffff:ffff:ffff')
- # test a IPv6 is sumamrized properly
- self.assertEqual(summarize(ip1, ip2)[0], ipaddr.IPNetwork('1::/16'))
- # test an IPv6 range that isn't on a network byte boundary
- ip2 = ipaddr.IPAddress('2::')
- self.assertEqual(summarize(ip1, ip2), [ipaddr.IPNetwork('1::/16'),
- ipaddr.IPNetwork('2::/128')])
-
- # test exception raised when first is greater than last
- self.assertRaises(ValueError, summarize, ipaddr.IPAddress('1.1.1.0'),
- ipaddr.IPAddress('1.1.0.0'))
- # test exception raised when first and last aren't IP addresses
- self.assertRaises(TypeError, summarize,
- ipaddr.IPNetwork('1.1.1.0'),
- ipaddr.IPNetwork('1.1.0.0'))
- self.assertRaises(TypeError, summarize,
- ipaddr.IPNetwork('1.1.1.0'), ipaddr.IPNetwork('1.1.0.0'))
- # test exception raised when first and last are not same version
- self.assertRaises(TypeError, summarize, ipaddr.IPAddress('::'),
- ipaddr.IPNetwork('1.1.0.0'))
-
- def testAddressComparison(self):
- self.assertTrue(ipaddr.IPAddress('1.1.1.1') <=
- ipaddr.IPAddress('1.1.1.1'))
- self.assertTrue(ipaddr.IPAddress('1.1.1.1') <=
- ipaddr.IPAddress('1.1.1.2'))
- self.assertTrue(ipaddr.IPAddress('::1') <= ipaddr.IPAddress('::1'))
- self.assertTrue(ipaddr.IPAddress('::1') <= ipaddr.IPAddress('::2'))
-
- def testNetworkComparison(self):
- # ip1 and ip2 have the same network address
- ip1 = ipaddr.IPv4Network('1.1.1.0/24')
- ip2 = ipaddr.IPv4Network('1.1.1.1/24')
- ip3 = ipaddr.IPv4Network('1.1.2.0/24')
-
- self.assertTrue(ip1 < ip3)
- self.assertTrue(ip3 > ip2)
-
- self.assertEqual(ip1.compare_networks(ip2), 0)
- self.assertTrue(ip1._get_networks_key() == ip2._get_networks_key())
- self.assertEqual(ip1.compare_networks(ip3), -1)
- self.assertTrue(ip1._get_networks_key() < ip3._get_networks_key())
-
- ip1 = ipaddr.IPv6Network('2001::2000/96')
- ip2 = ipaddr.IPv6Network('2001::2001/96')
- ip3 = ipaddr.IPv6Network('2001:ffff::2000/96')
-
- self.assertTrue(ip1 < ip3)
- self.assertTrue(ip3 > ip2)
- self.assertEqual(ip1.compare_networks(ip2), 0)
- self.assertTrue(ip1._get_networks_key() == ip2._get_networks_key())
- self.assertEqual(ip1.compare_networks(ip3), -1)
- self.assertTrue(ip1._get_networks_key() < ip3._get_networks_key())
-
- # Test comparing different protocols.
- # Should always raise a TypeError.
- ipv6 = ipaddr.IPv6Network('::/0')
- ipv4 = ipaddr.IPv4Network('0.0.0.0/0')
- self.assertRaises(TypeError, ipv4.__lt__, ipv6)
- self.assertRaises(TypeError, ipv4.__gt__, ipv6)
- self.assertRaises(TypeError, ipv6.__lt__, ipv4)
- self.assertRaises(TypeError, ipv6.__gt__, ipv4)
-
- # Regression test for issue 19.
- ip1 = ipaddr.IPNetwork('10.1.2.128/25')
- self.assertFalse(ip1 < ip1)
- self.assertFalse(ip1 > ip1)
- ip2 = ipaddr.IPNetwork('10.1.3.0/24')
- self.assertTrue(ip1 < ip2)
- self.assertFalse(ip2 < ip1)
- self.assertFalse(ip1 > ip2)
- self.assertTrue(ip2 > ip1)
- ip3 = ipaddr.IPNetwork('10.1.3.0/25')
- self.assertTrue(ip2 < ip3)
- self.assertFalse(ip3 < ip2)
- self.assertFalse(ip2 > ip3)
- self.assertTrue(ip3 > ip2)
-
- # Regression test for issue 28.
- ip1 = ipaddr.IPNetwork('10.10.10.0/31')
- ip2 = ipaddr.IPNetwork('10.10.10.0')
- ip3 = ipaddr.IPNetwork('10.10.10.2/31')
- ip4 = ipaddr.IPNetwork('10.10.10.2')
- sorted = [ip1, ip2, ip3, ip4]
- unsorted = [ip2, ip4, ip1, ip3]
- unsorted.sort()
- self.assertEqual(sorted, unsorted)
- unsorted = [ip4, ip1, ip3, ip2]
- unsorted.sort()
- self.assertEqual(sorted, unsorted)
- self.assertRaises(TypeError, ip1.__lt__, ipaddr.IPAddress('10.10.10.0'))
- self.assertRaises(TypeError, ip2.__lt__, ipaddr.IPAddress('10.10.10.0'))
-
- # <=, >=
- self.assertTrue(ipaddr.IPNetwork('1.1.1.1') <=
- ipaddr.IPNetwork('1.1.1.1'))
- self.assertTrue(ipaddr.IPNetwork('1.1.1.1') <=
- ipaddr.IPNetwork('1.1.1.2'))
- self.assertFalse(ipaddr.IPNetwork('1.1.1.2') <=
- ipaddr.IPNetwork('1.1.1.1'))
- self.assertTrue(ipaddr.IPNetwork('::1') <= ipaddr.IPNetwork('::1'))
- self.assertTrue(ipaddr.IPNetwork('::1') <= ipaddr.IPNetwork('::2'))
- self.assertFalse(ipaddr.IPNetwork('::2') <= ipaddr.IPNetwork('::1'))
-
- def testStrictNetworks(self):
- self.assertRaises(ValueError, ipaddr.IPNetwork, '192.168.1.1/24',
- strict=True)
- self.assertRaises(ValueError, ipaddr.IPNetwork, '::1/120', strict=True)
-
- def testOverlaps(self):
- other = ipaddr.IPv4Network('1.2.3.0/30')
- other2 = ipaddr.IPv4Network('1.2.2.0/24')
- other3 = ipaddr.IPv4Network('1.2.2.64/26')
- self.assertTrue(self.ipv4.overlaps(other))
- self.assertFalse(self.ipv4.overlaps(other2))
- self.assertTrue(other2.overlaps(other3))
-
- def testEmbeddedIpv4(self):
- ipv4_string = '192.168.0.1'
- ipv4 = ipaddr.IPv4Network(ipv4_string)
- v4compat_ipv6 = ipaddr.IPv6Network('::%s' % ipv4_string)
- self.assertEqual(int(v4compat_ipv6.ip), int(ipv4.ip))
- v4mapped_ipv6 = ipaddr.IPv6Network('::ffff:%s' % ipv4_string)
- self.assertNotEqual(v4mapped_ipv6.ip, ipv4.ip)
- self.assertRaises(ipaddr.AddressValueError, ipaddr.IPv6Network,
- '2001:1.1.1.1:1.1.1.1')
-
- # Issue 67: IPv6 with embedded IPv4 address not recognized.
- def testIPv6AddressTooLarge(self):
- # RFC4291 2.5.5.2
- self.assertEqual(ipaddr.IPAddress('::FFFF:192.0.2.1'),
- ipaddr.IPAddress('::FFFF:c000:201'))
- # RFC4291 2.2 (part 3) x::d.d.d.d
- self.assertEqual(ipaddr.IPAddress('FFFF::192.0.2.1'),
- ipaddr.IPAddress('FFFF::c000:201'))
-
- def testIPVersion(self):
- self.assertEqual(self.ipv4.version, 4)
- self.assertEqual(self.ipv6.version, 6)
-
- def testMaxPrefixLength(self):
- self.assertEqual(self.ipv4.max_prefixlen, 32)
- self.assertEqual(self.ipv6.max_prefixlen, 128)
-
- def testPacked(self):
- self.assertEqual(self.ipv4.packed,
- _cb('\x01\x02\x03\x04'))
- self.assertEqual(ipaddr.IPv4Network('255.254.253.252').packed,
- _cb('\xff\xfe\xfd\xfc'))
- self.assertEqual(self.ipv6.packed,
- _cb('\x20\x01\x06\x58\x02\x2a\xca\xfe'
- '\x02\x00\x00\x00\x00\x00\x00\x01'))
- self.assertEqual(ipaddr.IPv6Network('ffff:2:3:4:ffff::').packed,
- _cb('\xff\xff\x00\x02\x00\x03\x00\x04\xff\xff'
- + '\x00' * 6))
- self.assertEqual(ipaddr.IPv6Network('::1:0:0:0:0').packed,
- _cb('\x00' * 6 + '\x00\x01' + '\x00' * 8))
-
- def testIpStrFromPrefixlen(self):
- ipv4 = ipaddr.IPv4Network('1.2.3.4/24')
- self.assertEqual(ipv4._ip_string_from_prefix(), '255.255.255.0')
- self.assertEqual(ipv4._ip_string_from_prefix(28), '255.255.255.240')
-
- def testIpType(self):
- ipv4net = ipaddr.IPNetwork('1.2.3.4')
- ipv4addr = ipaddr.IPAddress('1.2.3.4')
- ipv6net = ipaddr.IPNetwork('::1.2.3.4')
- ipv6addr = ipaddr.IPAddress('::1.2.3.4')
- self.assertEqual(ipaddr.IPv4Network, type(ipv4net))
- self.assertEqual(ipaddr.IPv4Address, type(ipv4addr))
- self.assertEqual(ipaddr.IPv6Network, type(ipv6net))
- self.assertEqual(ipaddr.IPv6Address, type(ipv6addr))
-
- def testReservedIpv4(self):
- # test networks
- self.assertEqual(True, ipaddr.IPNetwork('224.1.1.1/31').is_multicast)
- self.assertEqual(False, ipaddr.IPNetwork('240.0.0.0').is_multicast)
-
- self.assertEqual(True, ipaddr.IPNetwork('192.168.1.1/17').is_private)
- self.assertEqual(False, ipaddr.IPNetwork('192.169.0.0').is_private)
- self.assertEqual(True, ipaddr.IPNetwork('10.255.255.255').is_private)
- self.assertEqual(False, ipaddr.IPNetwork('11.0.0.0').is_private)
- self.assertEqual(True, ipaddr.IPNetwork('172.31.255.255').is_private)
- self.assertEqual(False, ipaddr.IPNetwork('172.32.0.0').is_private)
-
- self.assertEqual(True,
- ipaddr.IPNetwork('169.254.100.200/24').is_link_local)
- self.assertEqual(False,
- ipaddr.IPNetwork('169.255.100.200/24').is_link_local)
-
- self.assertEqual(True,
- ipaddr.IPNetwork('127.100.200.254/32').is_loopback)
- self.assertEqual(True, ipaddr.IPNetwork('127.42.0.0/16').is_loopback)
- self.assertEqual(False, ipaddr.IPNetwork('128.0.0.0').is_loopback)
-
- # test addresses
- self.assertEqual(True, ipaddr.IPAddress('224.1.1.1').is_multicast)
- self.assertEqual(False, ipaddr.IPAddress('240.0.0.0').is_multicast)
-
- self.assertEqual(True, ipaddr.IPAddress('192.168.1.1').is_private)
- self.assertEqual(False, ipaddr.IPAddress('192.169.0.0').is_private)
- self.assertEqual(True, ipaddr.IPAddress('10.255.255.255').is_private)
- self.assertEqual(False, ipaddr.IPAddress('11.0.0.0').is_private)
- self.assertEqual(True, ipaddr.IPAddress('172.31.255.255').is_private)
- self.assertEqual(False, ipaddr.IPAddress('172.32.0.0').is_private)
-
- self.assertEqual(True,
- ipaddr.IPAddress('169.254.100.200').is_link_local)
- self.assertEqual(False,
- ipaddr.IPAddress('169.255.100.200').is_link_local)
-
- self.assertEqual(True,
- ipaddr.IPAddress('127.100.200.254').is_loopback)
- self.assertEqual(True, ipaddr.IPAddress('127.42.0.0').is_loopback)
- self.assertEqual(False, ipaddr.IPAddress('128.0.0.0').is_loopback)
- self.assertEqual(True, ipaddr.IPNetwork('0.0.0.0').is_unspecified)
-
- def testReservedIpv6(self):
-
- self.assertEqual(True, ipaddr.IPNetwork('ffff::').is_multicast)
- self.assertEqual(True, ipaddr.IPNetwork(2**128-1).is_multicast)
- self.assertEqual(True, ipaddr.IPNetwork('ff00::').is_multicast)
- self.assertEqual(False, ipaddr.IPNetwork('fdff::').is_multicast)
-
- self.assertEqual(True, ipaddr.IPNetwork('fecf::').is_site_local)
- self.assertEqual(True, ipaddr.IPNetwork(
- 'feff:ffff:ffff:ffff::').is_site_local)
- self.assertEqual(False, ipaddr.IPNetwork('fbf:ffff::').is_site_local)
- self.assertEqual(False, ipaddr.IPNetwork('ff00::').is_site_local)
-
- self.assertEqual(True, ipaddr.IPNetwork('fc00::').is_private)
- self.assertEqual(True, ipaddr.IPNetwork(
- 'fc00:ffff:ffff:ffff::').is_private)
- self.assertEqual(False, ipaddr.IPNetwork('fbff:ffff::').is_private)
- self.assertEqual(False, ipaddr.IPNetwork('fe00::').is_private)
-
- self.assertEqual(True, ipaddr.IPNetwork('fea0::').is_link_local)
- self.assertEqual(True, ipaddr.IPNetwork('febf:ffff::').is_link_local)
- self.assertEqual(False, ipaddr.IPNetwork('fe7f:ffff::').is_link_local)
- self.assertEqual(False, ipaddr.IPNetwork('fec0::').is_link_local)
-
- self.assertEqual(True, ipaddr.IPNetwork('0:0::0:01').is_loopback)
- self.assertEqual(False, ipaddr.IPNetwork('::1/127').is_loopback)
- self.assertEqual(False, ipaddr.IPNetwork('::').is_loopback)
- self.assertEqual(False, ipaddr.IPNetwork('::2').is_loopback)
-
- self.assertEqual(True, ipaddr.IPNetwork('0::0').is_unspecified)
- self.assertEqual(False, ipaddr.IPNetwork('::1').is_unspecified)
- self.assertEqual(False, ipaddr.IPNetwork('::/127').is_unspecified)
-
- # test addresses
- self.assertEqual(True, ipaddr.IPAddress('ffff::').is_multicast)
- self.assertEqual(True, ipaddr.IPAddress(2**128-1).is_multicast)
- self.assertEqual(True, ipaddr.IPAddress('ff00::').is_multicast)
- self.assertEqual(False, ipaddr.IPAddress('fdff::').is_multicast)
-
- self.assertEqual(True, ipaddr.IPAddress('fecf::').is_site_local)
- self.assertEqual(True, ipaddr.IPAddress(
- 'feff:ffff:ffff:ffff::').is_site_local)
- self.assertEqual(False, ipaddr.IPAddress('fbf:ffff::').is_site_local)
- self.assertEqual(False, ipaddr.IPAddress('ff00::').is_site_local)
-
- self.assertEqual(True, ipaddr.IPAddress('fc00::').is_private)
- self.assertEqual(True, ipaddr.IPAddress(
- 'fc00:ffff:ffff:ffff::').is_private)
- self.assertEqual(False, ipaddr.IPAddress('fbff:ffff::').is_private)
- self.assertEqual(False, ipaddr.IPAddress('fe00::').is_private)
-
- self.assertEqual(True, ipaddr.IPAddress('fea0::').is_link_local)
- self.assertEqual(True, ipaddr.IPAddress('febf:ffff::').is_link_local)
- self.assertEqual(False, ipaddr.IPAddress('fe7f:ffff::').is_link_local)
- self.assertEqual(False, ipaddr.IPAddress('fec0::').is_link_local)
-
- self.assertEqual(True, ipaddr.IPAddress('0:0::0:01').is_loopback)
- self.assertEqual(True, ipaddr.IPAddress('::1').is_loopback)
- self.assertEqual(False, ipaddr.IPAddress('::2').is_loopback)
-
- self.assertEqual(True, ipaddr.IPAddress('0::0').is_unspecified)
- self.assertEqual(False, ipaddr.IPAddress('::1').is_unspecified)
-
- # some generic IETF reserved addresses
- self.assertEqual(True, ipaddr.IPAddress('100::').is_reserved)
- self.assertEqual(True, ipaddr.IPNetwork('4000::1/128').is_reserved)
-
- def testIpv4Mapped(self):
- self.assertEqual(ipaddr.IPAddress('::ffff:192.168.1.1').ipv4_mapped,
- ipaddr.IPAddress('192.168.1.1'))
- self.assertEqual(ipaddr.IPAddress('::c0a8:101').ipv4_mapped, None)
- self.assertEqual(ipaddr.IPAddress('::ffff:c0a8:101').ipv4_mapped,
- ipaddr.IPAddress('192.168.1.1'))
-
- def testAddrExclude(self):
- addr1 = ipaddr.IPNetwork('10.1.1.0/24')
- addr2 = ipaddr.IPNetwork('10.1.1.0/26')
- addr3 = ipaddr.IPNetwork('10.2.1.0/24')
- addr4 = ipaddr.IPAddress('10.1.1.0')
- self.assertEqual(addr1.address_exclude(addr2),
- [ipaddr.IPNetwork('10.1.1.64/26'),
- ipaddr.IPNetwork('10.1.1.128/25')])
- self.assertRaises(ValueError, addr1.address_exclude, addr3)
- self.assertRaises(TypeError, addr1.address_exclude, addr4)
- self.assertEqual(addr1.address_exclude(addr1), [])
-
- def testHash(self):
- self.assertEqual(hash(ipaddr.IPNetwork('10.1.1.0/24')),
- hash(ipaddr.IPNetwork('10.1.1.0/24')))
- self.assertEqual(hash(ipaddr.IPAddress('10.1.1.0')),
- hash(ipaddr.IPAddress('10.1.1.0')))
- # i70
- self.assertEqual(hash(ipaddr.IPAddress('1.2.3.4')),
- hash(ipaddr.IPAddress(
- long(ipaddr.IPAddress('1.2.3.4')._ip))))
- ip1 = ipaddr.IPAddress('10.1.1.0')
- ip2 = ipaddr.IPAddress('1::')
- dummy = {}
- dummy[self.ipv4] = None
- dummy[self.ipv6] = None
- dummy[ip1] = None
- dummy[ip2] = None
- self.assertTrue(self.ipv4 in dummy)
- self.assertTrue(ip2 in dummy)
-
- def testCopyConstructor(self):
- addr1 = ipaddr.IPNetwork('10.1.1.0/24')
- addr2 = ipaddr.IPNetwork(addr1)
- addr3 = ipaddr.IPNetwork('2001:658:22a:cafe:200::1/64')
- addr4 = ipaddr.IPNetwork(addr3)
- addr5 = ipaddr.IPv4Address('1.1.1.1')
- addr6 = ipaddr.IPv6Address('2001:658:22a:cafe:200::1')
-
- self.assertEqual(addr1, addr2)
- self.assertEqual(addr3, addr4)
- self.assertEqual(addr5, ipaddr.IPv4Address(addr5))
- self.assertEqual(addr6, ipaddr.IPv6Address(addr6))
-
- def testCompressIPv6Address(self):
- test_addresses = {
- '1:2:3:4:5:6:7:8': '1:2:3:4:5:6:7:8/128',
- '2001:0:0:4:0:0:0:8': '2001:0:0:4::8/128',
- '2001:0:0:4:5:6:7:8': '2001::4:5:6:7:8/128',
- '2001:0:3:4:5:6:7:8': '2001:0:3:4:5:6:7:8/128',
- '2001:0:3:4:5:6:7:8': '2001:0:3:4:5:6:7:8/128',
- '0:0:3:0:0:0:0:ffff': '0:0:3::ffff/128',
- '0:0:0:4:0:0:0:ffff': '::4:0:0:0:ffff/128',
- '0:0:0:0:5:0:0:ffff': '::5:0:0:ffff/128',
- '1:0:0:4:0:0:7:8': '1::4:0:0:7:8/128',
- '0:0:0:0:0:0:0:0': '::/128',
- '0:0:0:0:0:0:0:0/0': '::/0',
- '0:0:0:0:0:0:0:1': '::1/128',
- '2001:0658:022a:cafe:0000:0000:0000:0000/66':
- '2001:658:22a:cafe::/66',
- '::1.2.3.4': '::102:304/128',
- '1:2:3:4:5:ffff:1.2.3.4': '1:2:3:4:5:ffff:102:304/128',
- '::7:6:5:4:3:2:1': '0:7:6:5:4:3:2:1/128',
- '::7:6:5:4:3:2:0': '0:7:6:5:4:3:2:0/128',
- '7:6:5:4:3:2:1::': '7:6:5:4:3:2:1:0/128',
- '0:6:5:4:3:2:1::': '0:6:5:4:3:2:1:0/128',
- }
- for uncompressed, compressed in test_addresses.items():
- self.assertEqual(compressed, str(ipaddr.IPv6Network(uncompressed)))
-
- def testExplodeShortHandIpStr(self):
- addr1 = ipaddr.IPv6Network('2001::1')
- addr2 = ipaddr.IPv6Address('2001:0:5ef5:79fd:0:59d:a0e5:ba1')
- self.assertEqual('2001:0000:0000:0000:0000:0000:0000:0001',
- addr1._explode_shorthand_ip_string(str(addr1.ip)))
- self.assertEqual('0000:0000:0000:0000:0000:0000:0000:0001',
- ipaddr.IPv6Network('::1/128').exploded)
- # issue 77
- self.assertEqual('2001:0000:5ef5:79fd:0000:059d:a0e5:0ba1',
- addr2.exploded)
-
- def testIntRepresentation(self):
- self.assertEqual(16909060, int(self.ipv4))
- self.assertEqual(42540616829182469433547762482097946625, int(self.ipv6))
-
- def testHexRepresentation(self):
- self.assertEqual(hex(0x1020304),
- hex(self.ipv4))
-
- self.assertEqual(hex(0x20010658022ACAFE0200000000000001),
- hex(self.ipv6))
-
- # backwards compatibility
- def testBackwardsCompability(self):
- self.assertEqual(ipaddr.CollapseAddrList(
- [ipaddr.IPNetwork('1.1.0.0/24'), ipaddr.IPNetwork('1.1.1.0/24')]),
- [ipaddr.IPNetwork('1.1.0.0/23')])
-
- self.assertEqual(ipaddr.IPNetwork('::42:0/112').AddressExclude(
- ipaddr.IPNetwork('::42:8000/113')),
- [ipaddr.IPNetwork('::42:0/113')])
-
- self.assertTrue(ipaddr.IPNetwork('1::/8').CompareNetworks(
- ipaddr.IPNetwork('2::/9')) < 0)
-
- self.assertEqual(ipaddr.IPNetwork('1::/16').Contains(
- ipaddr.IPNetwork('2::/16')), False)
-
- self.assertEqual(ipaddr.IPNetwork('0.0.0.0/0').Subnet(),
- [ipaddr.IPNetwork('0.0.0.0/1'),
- ipaddr.IPNetwork('128.0.0.0/1')])
- self.assertEqual(ipaddr.IPNetwork('::/127').Subnet(),
- [ipaddr.IPNetwork('::/128'),
- ipaddr.IPNetwork('::1/128')])
-
- self.assertEqual(ipaddr.IPNetwork('1.0.0.0/32').Supernet(),
- ipaddr.IPNetwork('1.0.0.0/31'))
- self.assertEqual(ipaddr.IPNetwork('::/121').Supernet(),
- ipaddr.IPNetwork('::/120'))
-
- self.assertEqual(ipaddr.IPNetwork('10.0.0.2').IsRFC1918(), True)
- self.assertEqual(ipaddr.IPNetwork('10.0.0.0').IsMulticast(), False)
- self.assertEqual(ipaddr.IPNetwork('127.255.255.255').IsLoopback(), True)
- self.assertEqual(ipaddr.IPNetwork('169.255.255.255').IsLinkLocal(),
- False)
-
- def testForceVersion(self):
- self.assertEqual(ipaddr.IPNetwork(1).version, 4)
- self.assertEqual(ipaddr.IPNetwork(1, version=6).version, 6)
-
- def testWithStar(self):
- self.assertEqual(str(self.ipv4.with_prefixlen), "1.2.3.4/24")
- self.assertEqual(str(self.ipv4.with_netmask), "1.2.3.4/255.255.255.0")
- self.assertEqual(str(self.ipv4.with_hostmask), "1.2.3.4/0.0.0.255")
-
- self.assertEqual(str(self.ipv6.with_prefixlen),
- '2001:658:22a:cafe:200::1/64')
- # rfc3513 sec 2.3 says that ipv6 only uses cidr notation for
- # subnets
- self.assertEqual(str(self.ipv6.with_netmask),
- '2001:658:22a:cafe:200::1/64')
- # this probably don't make much sense, but it's included for
- # compatibility with ipv4
- self.assertEqual(str(self.ipv6.with_hostmask),
- '2001:658:22a:cafe:200::1/::ffff:ffff:ffff:ffff')
-
- def testNetworkElementCaching(self):
- # V4 - make sure we're empty
- self.assertFalse(self.ipv4._cache.has_key('network'))
- self.assertFalse(self.ipv4._cache.has_key('broadcast'))
- self.assertFalse(self.ipv4._cache.has_key('hostmask'))
-
- # V4 - populate and test
- self.assertEqual(self.ipv4.network, ipaddr.IPv4Address('1.2.3.0'))
- self.assertEqual(self.ipv4.broadcast, ipaddr.IPv4Address('1.2.3.255'))
- self.assertEqual(self.ipv4.hostmask, ipaddr.IPv4Address('0.0.0.255'))
-
- # V4 - check we're cached
- self.assertTrue(self.ipv4._cache.has_key('network'))
- self.assertTrue(self.ipv4._cache.has_key('broadcast'))
- self.assertTrue(self.ipv4._cache.has_key('hostmask'))
-
- # V6 - make sure we're empty
- self.assertFalse(self.ipv6._cache.has_key('network'))
- self.assertFalse(self.ipv6._cache.has_key('broadcast'))
- self.assertFalse(self.ipv6._cache.has_key('hostmask'))
-
- # V6 - populate and test
- self.assertEqual(self.ipv6.network,
- ipaddr.IPv6Address('2001:658:22a:cafe::'))
- self.assertEqual(self.ipv6.broadcast, ipaddr.IPv6Address(
- '2001:658:22a:cafe:ffff:ffff:ffff:ffff'))
- self.assertEqual(self.ipv6.hostmask,
- ipaddr.IPv6Address('::ffff:ffff:ffff:ffff'))
-
- # V6 - check we're cached
- self.assertTrue(self.ipv6._cache.has_key('network'))
- self.assertTrue(self.ipv6._cache.has_key('broadcast'))
- self.assertTrue(self.ipv6._cache.has_key('hostmask'))
-
- def testTeredo(self):
- # stolen from wikipedia
- server = ipaddr.IPv4Address('65.54.227.120')
- client = ipaddr.IPv4Address('192.0.2.45')
- teredo_addr = '2001:0000:4136:e378:8000:63bf:3fff:fdd2'
- self.assertEqual((server, client),
- ipaddr.IPAddress(teredo_addr).teredo)
- bad_addr = '2000::4136:e378:8000:63bf:3fff:fdd2'
- self.assertFalse(ipaddr.IPAddress(bad_addr).teredo)
- bad_addr = '2001:0001:4136:e378:8000:63bf:3fff:fdd2'
- self.assertFalse(ipaddr.IPAddress(bad_addr).teredo)
-
- # i77
- teredo_addr = ipaddr.IPv6Address('2001:0:5ef5:79fd:0:59d:a0e5:ba1')
- self.assertEqual((ipaddr.IPv4Address('94.245.121.253'),
- ipaddr.IPv4Address('95.26.244.94')),
- teredo_addr.teredo)
-
-
- def testsixtofour(self):
- sixtofouraddr = ipaddr.IPAddress('2002:ac1d:2d64::1')
- bad_addr = ipaddr.IPAddress('2000:ac1d:2d64::1')
- self.assertEqual(ipaddr.IPv4Address('172.29.45.100'),
- sixtofouraddr.sixtofour)
- self.assertFalse(bad_addr.sixtofour)
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/contrib/ipaddr-py/setup.py b/contrib/ipaddr-py/setup.py
deleted file mode 100755
index 33564320e45..00000000000
--- a/contrib/ipaddr-py/setup.py
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/usr/bin/python
-#
-# Copyright 2008 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from distutils.core import setup
-
-import ipaddr
-
-
-setup(name='ipaddr',
- maintainer='Google',
- maintainer_email='ipaddr-py-dev@googlegroups.com',
- version=ipaddr.__version__,
- url='http://code.google.com/p/ipaddr-py/',
- license='Apache License, Version 2.0',
- classifiers=[
- 'Development Status :: 5 - Production/Stable',
- 'Intended Audience :: Developers',
- 'License :: OSI Approved :: Apache Software License',
- 'Operating System :: OS Independent',
- 'Topic :: Internet',
- 'Topic :: Software Development :: Libraries',
- 'Topic :: System :: Networking'],
- py_modules=['ipaddr'])
diff --git a/contrib/ipaddr-py/test-2to3.sh b/contrib/ipaddr-py/test-2to3.sh
deleted file mode 100755
index 408d665bcc2..00000000000
--- a/contrib/ipaddr-py/test-2to3.sh
+++ /dev/null
@@ -1,15 +0,0 @@
-#!/bin/sh
-
-# Converts the python2 ipaddr files to python3 and runs the unit tests
-# with both python versions.
-
-mkdir -p 2to3output && \
-cp -f *.py 2to3output && \
-( cd 2to3output && 2to3 . | patch -p0 ) && \
-py3version=$(python3 --version 2>&1) && \
-echo -e "\nTesting with ${py3version}" && \
-python3 2to3output/ipaddr_test.py && \
-rm -r 2to3output && \
-pyversion=$(python --version 2>&1) && \
-echo -e "\nTesting with ${pyversion}" && \
-./ipaddr_test.py
diff --git a/contrib/libexecinfo/execinfo.c b/contrib/libexecinfo/execinfo.c
new file mode 100644
index 00000000000..03500257788
--- /dev/null
+++ b/contrib/libexecinfo/execinfo.c
@@ -0,0 +1,442 @@
+/*
+ * Copyright (c) 2003 Maxim Sobolev <sobomax@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $Id: execinfo.c,v 1.3 2004/07/19 05:21:09 sobomax Exp $
+ */
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#ifndef HAVE_BACKTRACE
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <dlfcn.h>
+#include <math.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stddef.h>
+
+#include "execinfo_compat.h"
+
+#define D10(x) ceil(log10(((x) == 0) ? 2 : ((x) + 1)))
+
+static void *
+getreturnaddr(int level)
+{
+ switch (level) {
+ case 0: return __builtin_return_address(1);
+ case 1: return __builtin_return_address(2);
+ case 2: return __builtin_return_address(3);
+ case 3: return __builtin_return_address(4);
+ case 4: return __builtin_return_address(5);
+ case 5: return __builtin_return_address(6);
+ case 6: return __builtin_return_address(7);
+ case 7: return __builtin_return_address(8);
+ case 8: return __builtin_return_address(9);
+ case 9: return __builtin_return_address(10);
+ case 10: return __builtin_return_address(11);
+ case 11: return __builtin_return_address(12);
+ case 12: return __builtin_return_address(13);
+ case 13: return __builtin_return_address(14);
+ case 14: return __builtin_return_address(15);
+ case 15: return __builtin_return_address(16);
+ case 16: return __builtin_return_address(17);
+ case 17: return __builtin_return_address(18);
+ case 18: return __builtin_return_address(19);
+ case 19: return __builtin_return_address(20);
+ case 20: return __builtin_return_address(21);
+ case 21: return __builtin_return_address(22);
+ case 22: return __builtin_return_address(23);
+ case 23: return __builtin_return_address(24);
+ case 24: return __builtin_return_address(25);
+ case 25: return __builtin_return_address(26);
+ case 26: return __builtin_return_address(27);
+ case 27: return __builtin_return_address(28);
+ case 28: return __builtin_return_address(29);
+ case 29: return __builtin_return_address(30);
+ case 30: return __builtin_return_address(31);
+ case 31: return __builtin_return_address(32);
+ case 32: return __builtin_return_address(33);
+ case 33: return __builtin_return_address(34);
+ case 34: return __builtin_return_address(35);
+ case 35: return __builtin_return_address(36);
+ case 36: return __builtin_return_address(37);
+ case 37: return __builtin_return_address(38);
+ case 38: return __builtin_return_address(39);
+ case 39: return __builtin_return_address(40);
+ case 40: return __builtin_return_address(41);
+ case 41: return __builtin_return_address(42);
+ case 42: return __builtin_return_address(43);
+ case 43: return __builtin_return_address(44);
+ case 44: return __builtin_return_address(45);
+ case 45: return __builtin_return_address(46);
+ case 46: return __builtin_return_address(47);
+ case 47: return __builtin_return_address(48);
+ case 48: return __builtin_return_address(49);
+ case 49: return __builtin_return_address(50);
+ case 50: return __builtin_return_address(51);
+ case 51: return __builtin_return_address(52);
+ case 52: return __builtin_return_address(53);
+ case 53: return __builtin_return_address(54);
+ case 54: return __builtin_return_address(55);
+ case 55: return __builtin_return_address(56);
+ case 56: return __builtin_return_address(57);
+ case 57: return __builtin_return_address(58);
+ case 58: return __builtin_return_address(59);
+ case 59: return __builtin_return_address(60);
+ case 60: return __builtin_return_address(61);
+ case 61: return __builtin_return_address(62);
+ case 62: return __builtin_return_address(63);
+ case 63: return __builtin_return_address(64);
+ case 64: return __builtin_return_address(65);
+ case 65: return __builtin_return_address(66);
+ case 66: return __builtin_return_address(67);
+ case 67: return __builtin_return_address(68);
+ case 68: return __builtin_return_address(69);
+ case 69: return __builtin_return_address(70);
+ case 70: return __builtin_return_address(71);
+ case 71: return __builtin_return_address(72);
+ case 72: return __builtin_return_address(73);
+ case 73: return __builtin_return_address(74);
+ case 74: return __builtin_return_address(75);
+ case 75: return __builtin_return_address(76);
+ case 76: return __builtin_return_address(77);
+ case 77: return __builtin_return_address(78);
+ case 78: return __builtin_return_address(79);
+ case 79: return __builtin_return_address(80);
+ case 80: return __builtin_return_address(81);
+ case 81: return __builtin_return_address(82);
+ case 82: return __builtin_return_address(83);
+ case 83: return __builtin_return_address(84);
+ case 84: return __builtin_return_address(85);
+ case 85: return __builtin_return_address(86);
+ case 86: return __builtin_return_address(87);
+ case 87: return __builtin_return_address(88);
+ case 88: return __builtin_return_address(89);
+ case 89: return __builtin_return_address(90);
+ case 90: return __builtin_return_address(91);
+ case 91: return __builtin_return_address(92);
+ case 92: return __builtin_return_address(93);
+ case 93: return __builtin_return_address(94);
+ case 94: return __builtin_return_address(95);
+ case 95: return __builtin_return_address(96);
+ case 96: return __builtin_return_address(97);
+ case 97: return __builtin_return_address(98);
+ case 98: return __builtin_return_address(99);
+ case 99: return __builtin_return_address(100);
+ case 100: return __builtin_return_address(101);
+ case 101: return __builtin_return_address(102);
+ case 102: return __builtin_return_address(103);
+ case 103: return __builtin_return_address(104);
+ case 104: return __builtin_return_address(105);
+ case 105: return __builtin_return_address(106);
+ case 106: return __builtin_return_address(107);
+ case 107: return __builtin_return_address(108);
+ case 108: return __builtin_return_address(109);
+ case 109: return __builtin_return_address(110);
+ case 110: return __builtin_return_address(111);
+ case 111: return __builtin_return_address(112);
+ case 112: return __builtin_return_address(113);
+ case 113: return __builtin_return_address(114);
+ case 114: return __builtin_return_address(115);
+ case 115: return __builtin_return_address(116);
+ case 116: return __builtin_return_address(117);
+ case 117: return __builtin_return_address(118);
+ case 118: return __builtin_return_address(119);
+ case 119: return __builtin_return_address(120);
+ case 120: return __builtin_return_address(121);
+ case 121: return __builtin_return_address(122);
+ case 122: return __builtin_return_address(123);
+ case 123: return __builtin_return_address(124);
+ case 124: return __builtin_return_address(125);
+ case 125: return __builtin_return_address(126);
+ case 126: return __builtin_return_address(127);
+ case 127: return __builtin_return_address(128);
+ default: return NULL;
+ }
+}
+
+static void *
+getframeaddr(int level)
+{
+
+ switch (level) {
+ case 0: return __builtin_frame_address(1);
+ case 1: return __builtin_frame_address(2);
+ case 2: return __builtin_frame_address(3);
+ case 3: return __builtin_frame_address(4);
+ case 4: return __builtin_frame_address(5);
+ case 5: return __builtin_frame_address(6);
+ case 6: return __builtin_frame_address(7);
+ case 7: return __builtin_frame_address(8);
+ case 8: return __builtin_frame_address(9);
+ case 9: return __builtin_frame_address(10);
+ case 10: return __builtin_frame_address(11);
+ case 11: return __builtin_frame_address(12);
+ case 12: return __builtin_frame_address(13);
+ case 13: return __builtin_frame_address(14);
+ case 14: return __builtin_frame_address(15);
+ case 15: return __builtin_frame_address(16);
+ case 16: return __builtin_frame_address(17);
+ case 17: return __builtin_frame_address(18);
+ case 18: return __builtin_frame_address(19);
+ case 19: return __builtin_frame_address(20);
+ case 20: return __builtin_frame_address(21);
+ case 21: return __builtin_frame_address(22);
+ case 22: return __builtin_frame_address(23);
+ case 23: return __builtin_frame_address(24);
+ case 24: return __builtin_frame_address(25);
+ case 25: return __builtin_frame_address(26);
+ case 26: return __builtin_frame_address(27);
+ case 27: return __builtin_frame_address(28);
+ case 28: return __builtin_frame_address(29);
+ case 29: return __builtin_frame_address(30);
+ case 30: return __builtin_frame_address(31);
+ case 31: return __builtin_frame_address(32);
+ case 32: return __builtin_frame_address(33);
+ case 33: return __builtin_frame_address(34);
+ case 34: return __builtin_frame_address(35);
+ case 35: return __builtin_frame_address(36);
+ case 36: return __builtin_frame_address(37);
+ case 37: return __builtin_frame_address(38);
+ case 38: return __builtin_frame_address(39);
+ case 39: return __builtin_frame_address(40);
+ case 40: return __builtin_frame_address(41);
+ case 41: return __builtin_frame_address(42);
+ case 42: return __builtin_frame_address(43);
+ case 43: return __builtin_frame_address(44);
+ case 44: return __builtin_frame_address(45);
+ case 45: return __builtin_frame_address(46);
+ case 46: return __builtin_frame_address(47);
+ case 47: return __builtin_frame_address(48);
+ case 48: return __builtin_frame_address(49);
+ case 49: return __builtin_frame_address(50);
+ case 50: return __builtin_frame_address(51);
+ case 51: return __builtin_frame_address(52);
+ case 52: return __builtin_frame_address(53);
+ case 53: return __builtin_frame_address(54);
+ case 54: return __builtin_frame_address(55);
+ case 55: return __builtin_frame_address(56);
+ case 56: return __builtin_frame_address(57);
+ case 57: return __builtin_frame_address(58);
+ case 58: return __builtin_frame_address(59);
+ case 59: return __builtin_frame_address(60);
+ case 60: return __builtin_frame_address(61);
+ case 61: return __builtin_frame_address(62);
+ case 62: return __builtin_frame_address(63);
+ case 63: return __builtin_frame_address(64);
+ case 64: return __builtin_frame_address(65);
+ case 65: return __builtin_frame_address(66);
+ case 66: return __builtin_frame_address(67);
+ case 67: return __builtin_frame_address(68);
+ case 68: return __builtin_frame_address(69);
+ case 69: return __builtin_frame_address(70);
+ case 70: return __builtin_frame_address(71);
+ case 71: return __builtin_frame_address(72);
+ case 72: return __builtin_frame_address(73);
+ case 73: return __builtin_frame_address(74);
+ case 74: return __builtin_frame_address(75);
+ case 75: return __builtin_frame_address(76);
+ case 76: return __builtin_frame_address(77);
+ case 77: return __builtin_frame_address(78);
+ case 78: return __builtin_frame_address(79);
+ case 79: return __builtin_frame_address(80);
+ case 80: return __builtin_frame_address(81);
+ case 81: return __builtin_frame_address(82);
+ case 82: return __builtin_frame_address(83);
+ case 83: return __builtin_frame_address(84);
+ case 84: return __builtin_frame_address(85);
+ case 85: return __builtin_frame_address(86);
+ case 86: return __builtin_frame_address(87);
+ case 87: return __builtin_frame_address(88);
+ case 88: return __builtin_frame_address(89);
+ case 89: return __builtin_frame_address(90);
+ case 90: return __builtin_frame_address(91);
+ case 91: return __builtin_frame_address(92);
+ case 92: return __builtin_frame_address(93);
+ case 93: return __builtin_frame_address(94);
+ case 94: return __builtin_frame_address(95);
+ case 95: return __builtin_frame_address(96);
+ case 96: return __builtin_frame_address(97);
+ case 97: return __builtin_frame_address(98);
+ case 98: return __builtin_frame_address(99);
+ case 99: return __builtin_frame_address(100);
+ case 100: return __builtin_frame_address(101);
+ case 101: return __builtin_frame_address(102);
+ case 102: return __builtin_frame_address(103);
+ case 103: return __builtin_frame_address(104);
+ case 104: return __builtin_frame_address(105);
+ case 105: return __builtin_frame_address(106);
+ case 106: return __builtin_frame_address(107);
+ case 107: return __builtin_frame_address(108);
+ case 108: return __builtin_frame_address(109);
+ case 109: return __builtin_frame_address(110);
+ case 110: return __builtin_frame_address(111);
+ case 111: return __builtin_frame_address(112);
+ case 112: return __builtin_frame_address(113);
+ case 113: return __builtin_frame_address(114);
+ case 114: return __builtin_frame_address(115);
+ case 115: return __builtin_frame_address(116);
+ case 116: return __builtin_frame_address(117);
+ case 117: return __builtin_frame_address(118);
+ case 118: return __builtin_frame_address(119);
+ case 119: return __builtin_frame_address(120);
+ case 120: return __builtin_frame_address(121);
+ case 121: return __builtin_frame_address(122);
+ case 122: return __builtin_frame_address(123);
+ case 123: return __builtin_frame_address(124);
+ case 124: return __builtin_frame_address(125);
+ case 125: return __builtin_frame_address(126);
+ case 126: return __builtin_frame_address(127);
+ case 127: return __builtin_frame_address(128);
+ default: return NULL;
+ }
+}
+
+static inline void *
+realloc_safe(void *ptr, size_t size)
+{
+ void *nptr;
+
+ nptr = realloc (ptr, size);
+ if (nptr == NULL)
+ free (ptr);
+ return nptr;
+}
+
+int
+backtrace(void **buffer, int size)
+{
+ int i;
+
+ for (i = 1; getframeaddr(i + 1) != NULL && i != size + 1; i++) {
+ buffer[i - 1] = getreturnaddr(i);
+ if (buffer[i - 1] == NULL)
+ break;
+ }
+ return i - 1;
+}
+
+char **
+backtrace_symbols(void *const *buffer, int size)
+{
+ size_t clen, alen;
+ int i, offset;
+ char **rval;
+ Dl_info info;
+
+ clen = size * sizeof(char *);
+ rval = malloc(clen);
+ if (rval == NULL)
+ return NULL;
+ for (i = 0; i < size; i++) {
+ if (dladdr(buffer[i], &info) != 0) {
+ if (info.dli_sname == NULL)
+ info.dli_sname = "???";
+ if (info.dli_saddr == NULL)
+ info.dli_saddr = buffer[i];
+ offset = buffer[i] - info.dli_saddr;
+ /* "0x01234567 <function+offset> at filename" */
+ alen = 2 + /* "0x" */
+ (sizeof(void *) * 2) + /* "01234567" */
+ 2 + /* " <" */
+ strlen(info.dli_sname) + /* "function" */
+ 1 + /* "+" */
+ 10 + /* "offset */
+ 5 + /* "> at " */
+ strlen(info.dli_fname) + /* "filename" */
+ 1; /* "\0" */
+ rval = realloc_safe(rval, clen + alen);
+ if (rval == NULL)
+ return NULL;
+ snprintf((char *) rval + clen, alen, "%p <%s+%d> at %s",
+ buffer[i], info.dli_sname, offset, info.dli_fname);
+ } else {
+ alen = 2 + /* "0x" */
+ (sizeof(void *) * 2) + /* "01234567" */
+ 1; /* "\0" */
+ rval = realloc_safe(rval, clen + alen);
+ if (rval == NULL)
+ return NULL;
+ snprintf((char *) rval + clen, alen, "%p", buffer[i]);
+ }
+ rval[i] = (char *) clen;
+ clen += alen;
+ }
+
+ for (i = 0; i < size; i++)
+ rval[i] += (long) rval;
+
+ return rval;
+}
+
+void
+backtrace_symbols_fd(void *const *buffer, int size, int fd)
+{
+ int i, len, offset;
+ char *buf;
+ Dl_info info;
+
+ for (i = 0; i < size; i++) {
+ if (dladdr(buffer[i], &info) != 0) {
+ if (info.dli_sname == NULL)
+ info.dli_sname = "???";
+ if (info.dli_saddr == NULL)
+ info.dli_saddr = buffer[i];
+ offset = buffer[i] - info.dli_saddr;
+ /* "0x01234567 <function+offset> at filename" */
+ len = 2 + /* "0x" */
+ (sizeof(void *) * 2) + /* "01234567" */
+ 2 + /* " <" */
+ strlen(info.dli_sname) + /* "function" */
+ 1 + /* "+" */
+ D10(offset) + /* "offset */
+ 5 + /* "> at " */
+ strlen(info.dli_fname) + /* "filename" */
+ 2; /* "\n\0" */
+ buf = alloca(len);
+ if (buf == NULL)
+ return;
+ snprintf(buf, len, "%p <%s+%d> at %s\n",
+ buffer[i], info.dli_sname, offset, info.dli_fname);
+ } else {
+ len = 2 + /* "0x" */
+ (sizeof(void *) * 2) + /* "01234567" */
+ 2; /* "\n\0" */
+ buf = alloca(len);
+ if (buf == NULL)
+ return;
+ snprintf(buf, len, "%p\n", buffer[i]);
+ }
+ if (write(fd, buf, strlen(buf)) == -1)
+ return;
+ }
+}
+#endif
diff --git a/contrib/libexecinfo/execinfo_compat.h b/contrib/libexecinfo/execinfo_compat.h
new file mode 100644
index 00000000000..ae84cfb1f35
--- /dev/null
+++ b/contrib/libexecinfo/execinfo_compat.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2003 Maxim Sobolev <sobomax@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $Id: execinfo.h,v 1.2 2004/07/19 05:20:29 sobomax Exp $
+ */
+
+#ifndef _EXECINFO_H_
+#define _EXECINFO_H_
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#ifndef HAVE_BACKTRACE
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern int backtrace(void **, int);
+extern char **backtrace_symbols(void *const *, int);
+extern void backtrace_symbols_fd(void *const *, int, int);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
+
+#endif /* _EXECINFO_H_ */
diff --git a/contrib/libgen/basename_r.c b/contrib/libgen/basename_r.c
new file mode 100644
index 00000000000..2c3a87afe1c
--- /dev/null
+++ b/contrib/libgen/basename_r.c
@@ -0,0 +1,40 @@
+/*
+ * borrowed from glibc-2.12.1/string/basename.c
+ * Modified to return "." for NULL or "", as required for SUSv2.
+ */
+#include <string.h>
+#include <stdlib.h>
+#ifdef THREAD_UNSAFE_BASENAME
+
+/* Return the name-within-directory of a file name.
+ Copyright (C) 1996,97,98,2002 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+char *
+basename_r (filename)
+ const char *filename;
+{
+ char *p;
+
+ if ((filename == NULL) || (*filename == '\0'))
+ return ".";
+
+ p = strrchr (filename, '/');
+ return p ? p + 1 : (char *) filename;
+}
+#endif /* THREAD_UNSAFE_BASENAME */
diff --git a/contrib/libgen/dirname_r.c b/contrib/libgen/dirname_r.c
new file mode 100644
index 00000000000..131cbcf2a96
--- /dev/null
+++ b/contrib/libgen/dirname_r.c
@@ -0,0 +1,243 @@
+/*
+ * Borrowed from glibc-2.12.1/string/memrchr.c
+ * Based on strlen implementation by Torbjorn Granlund (tege@sics.se),
+ * Removed code for long bigger than 32 bytes, renamed __ptr_t as void *
+ * changed reg_char type to char.
+ */
+#include <string.h>
+#include <stdlib.h>
+#ifdef THREAD_UNSAFE_DIRNAME
+
+/* memrchr -- find the last occurrence of a byte in a memory block
+ Copyright (C) 1991, 93, 96, 97, 99, 2000 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Based on strlen implementation by Torbjorn Granlund (tege@sics.se),
+ with help from Dan Sahlin (dan@sics.se) and
+ commentary by Jim Blandy (jimb@ai.mit.edu);
+ adaptation to memchr suggested by Dick Karpinski (dick@cca.ucsf.edu),
+ and implemented by Roland McGrath (roland@ai.mit.edu).
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+void *
+__memrchr (s, c_in, n)
+ const void * s;
+ int c_in;
+ size_t n;
+{
+ const unsigned char *char_ptr;
+ const unsigned long int *longword_ptr;
+ unsigned long int longword, magic_bits, charmask;
+ unsigned char c;
+
+ c = (unsigned char) c_in;
+
+ /* Handle the last few characters by reading one character at a time.
+ Do this until CHAR_PTR is aligned on a longword boundary. */
+ for (char_ptr = (const unsigned char *) s + n;
+ n > 0 && ((unsigned long int) char_ptr
+ & (sizeof (longword) - 1)) != 0;
+ --n)
+ if (*--char_ptr == c)
+ return (void *) char_ptr;
+
+ /* All these elucidatory comments refer to 4-byte longwords,
+ but the theory applies equally well to 8-byte longwords. */
+
+ longword_ptr = (const unsigned long int *) char_ptr;
+
+ /* Bits 31, 24, 16, and 8 of this number are zero. Call these bits
+ the "holes." Note that there is a hole just to the left of
+ each byte, with an extra at the end:
+
+ bits: 01111110 11111110 11111110 11111111
+ bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD
+
+ The 1-bits make sure that carries propagate to the next 0-bit.
+ The 0-bits provide holes for carries to fall into. */
+
+ if (sizeof (longword) != 4 && sizeof (longword) != 8)
+ abort ();
+
+ magic_bits = 0x7efefeff;
+
+ /* Set up a longword, each of whose bytes is C. */
+ charmask = c | (c << 8);
+ charmask |= charmask << 16;
+
+ /* Instead of the traditional loop which tests each character,
+ we will test a longword at a time. The tricky part is testing
+ if *any of the four* bytes in the longword in question are zero. */
+ while (n >= sizeof (longword))
+ {
+ /* We tentatively exit the loop if adding MAGIC_BITS to
+ LONGWORD fails to change any of the hole bits of LONGWORD.
+
+ 1) Is this safe? Will it catch all the zero bytes?
+ Suppose there is a byte with all zeros. Any carry bits
+ propagating from its left will fall into the hole at its
+ least significant bit and stop. Since there will be no
+ carry from its most significant bit, the LSB of the
+ byte to the left will be unchanged, and the zero will be
+ detected.
+
+ 2) Is this worthwhile? Will it ignore everything except
+ zero bytes? Suppose every byte of LONGWORD has a bit set
+ somewhere. There will be a carry into bit 8. If bit 8
+ is set, this will carry into bit 16. If bit 8 is clear,
+ one of bits 9-15 must be set, so there will be a carry
+ into bit 16. Similarly, there will be a carry into bit
+ 24. If one of bits 24-30 is set, there will be a carry
+ into bit 31, so all of the hole bits will be changed.
+
+ The one misfire occurs when bits 24-30 are clear and bit
+ 31 is set; in this case, the hole at bit 31 is not
+ changed. If we had access to the processor carry flag,
+ we could close this loophole by putting the fourth hole
+ at bit 32!
+
+ So it ignores everything except 128's, when they're aligned
+ properly.
+
+ 3) But wait! Aren't we looking for C, not zero?
+ Good point. So what we do is XOR LONGWORD with a longword,
+ each of whose bytes is C. This turns each byte that is C
+ into a zero. */
+
+ longword = *--longword_ptr ^ charmask;
+
+ /* Add MAGIC_BITS to LONGWORD. */
+ if ((((longword + magic_bits)
+
+ /* Set those bits that were unchanged by the addition. */
+ ^ ~longword)
+
+ /* Look at only the hole bits. If any of the hole bits
+ are unchanged, most likely one of the bytes was a
+ zero. */
+ & ~magic_bits) != 0)
+ {
+ /* Which of the bytes was C? If none of them were, it was
+ a misfire; continue the search. */
+
+ const unsigned char *cp = (const unsigned char *) longword_ptr;
+
+ if (cp[3] == c)
+ return (void *) &cp[3];
+ if (cp[2] == c)
+ return (void *) &cp[2];
+ if (cp[1] == c)
+ return (void *) &cp[1];
+ if (cp[0] == c)
+ return (void *) cp;
+ }
+
+ n -= sizeof (longword);
+ }
+
+ char_ptr = (const unsigned char *) longword_ptr;
+
+ while (n-- > 0)
+ {
+ if (*--char_ptr == c)
+ return (void *) char_ptr;
+ }
+
+ return 0;
+}
+
+/*
+ * Borrowed from glibc-2.12.1/misc/dirname.c
+ */
+
+/* dirname - return directory part of PATH.
+ Copyright (C) 1996, 2000, 2001, 2002 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+char *
+dirname_r (char *path)
+{
+ static const char dot[] = ".";
+ char *last_slash;
+
+ /* Find last '/'. */
+ last_slash = path != NULL ? strrchr (path, '/') : NULL;
+
+ if (last_slash != NULL && last_slash != path && last_slash[1] == '\0')
+ {
+ /* Determine whether all remaining characters are slashes. */
+ char *runp;
+
+ for (runp = last_slash; runp != path; --runp)
+ if (runp[-1] != '/')
+ break;
+
+ /* The '/' is the last character, we have to look further. */
+ if (runp != path)
+ last_slash = __memrchr (path, '/', runp - path);
+ }
+
+ if (last_slash != NULL)
+ {
+ /* Determine whether all remaining characters are slashes. */
+ char *runp;
+
+ for (runp = last_slash; runp != path; --runp)
+ if (runp[-1] != '/')
+ break;
+
+ /* Terminate the path. */
+ if (runp == path)
+ {
+ /* The last slash is the first character in the string. We have to
+ return "/". As a special case we have to return "//" if there
+ are exactly two slashes at the beginning of the string. See
+ XBD 4.10 Path Name Resolution for more information. */
+ if (last_slash == path + 1)
+ ++last_slash;
+ else
+ last_slash = path + 1;
+ }
+ else
+ last_slash = runp;
+
+ last_slash[0] = '\0';
+ }
+ else
+ /* This assignment is ill-designed but the XPG specs require to
+ return a string containing "." in any case no directory part is
+ found and so a static and constant string is required. */
+ path = (char *) dot;
+
+ return path;
+}
+#endif /* THREAD_UNSAFE_DIRNAME */
diff --git a/contrib/macfuse/fuse_param.h b/contrib/macfuse/fuse_param.h
index 81d753c6cd7..347db9464bc 100644
--- a/contrib/macfuse/fuse_param.h
+++ b/contrib/macfuse/fuse_param.h
@@ -1,4 +1,9 @@
/*
+ * 'rebel' branch modifications:
+ * Copyright (C) 2010 Tuxera. All Rights Reserved.
+ */
+
+/*
* Copyright (C) 2006-2008 Google. All Rights Reserved.
* Amit Singh <singh@>
*/
@@ -6,69 +11,81 @@
#ifndef _FUSE_PARAM_H_
#define _FUSE_PARAM_H_
-/* Compile-time tunables (M_MACFUSE*) */
-
-#define M_MACFUSE_ENABLE_FIFOFS 0
-#define M_MACFUSE_ENABLE_INTERRUPT 1
-#define M_MACFUSE_ENABLE_SPECFS 0
-#define M_MACFUSE_ENABLE_TSLOCKING 0
-#define M_MACFUSE_ENABLE_UNSUPPORTED 1
-#define M_MACFUSE_ENABLE_XATTR 1
-
-#if M_MACFUSE_ENABLE_UNSUPPORTED
- #define M_MACFUSE_ENABLE_DSELECT 0
- #define M_MACFUSE_ENABLE_EXCHANGE 1
- #define M_MACFUSE_ENABLE_KQUEUE 1
- #define M_MACFUSE_ENABLE_KUNC 0
-#if __LP64__
- #define M_MACFUSE_ENABLE_INTERIM_FSNODE_LOCK 1
-#endif /* __LP64__ */
-#endif /* M_MACFUSE_ENABLE_UNSUPPORTED */
-
-#if M_MACFUSE_ENABLE_INTERIM_FSNODE_LOCK
-#define FUSE_VNOP_EXPORT __private_extern__
+#include <AvailabilityMacros.h>
+
+/* Compile-time tunables (M_OSXFUSE*) */
+
+#define M_OSXFUSE_ENABLE_FIFOFS 0
+#define M_OSXFUSE_ENABLE_INTERRUPT 1
+#define M_OSXFUSE_ENABLE_SPECFS 0
+#define M_OSXFUSE_ENABLE_TSLOCKING 1
+#define M_OSXFUSE_ENABLE_UNSUPPORTED 1
+#define M_OSXFUSE_ENABLE_XATTR 1
+#define M_OSXFUSE_ENABLE_DSELECT 1
+
+#if M_OSXFUSE_ENABLE_UNSUPPORTED
+# define M_OSXFUSE_ENABLE_EXCHANGE 1
+# define M_OSXFUSE_ENABLE_KUNC 0
+# define M_OSXFUSE_ENABLE_INTERIM_FSNODE_LOCK 1
+#endif /* M_OSXFUSE_ENABLE_UNSUPPORTED */
+
+#if MAC_OS_X_VERSION_MIN_REQUIRED < 1060
+# if M_OSXFUSE_ENABLE_UNSUPPORTED
+ /*
+ * In Mac OS X 10.5 the file system implementation is responsible for
+ * posting kqueue events. Starting with Mac OS X 10.6 VFS took over that
+ * job.
+ */
+# define M_OSXFUSE_ENABLE_KQUEUE 1
+# endif
+#endif /* MAC_OS_X_VERSION_MIN_REQUIRED < 1060 */
+
+#if M_OSXFUSE_ENABLE_INTERIM_FSNODE_LOCK
+# define M_OSXFUSE_ENABLE_HUGE_LOCK 0
+# define M_OSXFUSE_ENABLE_LOCK_LOGGING 0
+# define FUSE_VNOP_EXPORT __private_extern__
#else
-#define FUSE_VNOP_EXPORT static
-#endif /* M_MACFUSE_ENABLE_INTERIM_FSNODE_LOCK */
+# define FUSE_VNOP_EXPORT static
+#endif /* M_OSXFUSE_ENABLE_INTERIM_FSNODE_LOCK */
/* User Control */
-#define MACFUSE_POSTUNMOUNT_SIGNAL SIGKILL
+#define OSXFUSE_POSTUNMOUNT_SIGNAL SIGKILL
#define MACOSX_ADMIN_GROUP_NAME "admin"
-#define SYSCTL_MACFUSE_TUNABLES_ADMIN "macfuse.tunables.admin_group"
-#define SYSCTL_MACFUSE_VERSION_NUMBER "macfuse.version.number"
+#define SYSCTL_OSXFUSE_TUNABLES_ADMIN "osxfuse.tunables.admin_group"
+#define SYSCTL_OSXFUSE_VERSION_NUMBER "osxfuse.version.number"
/* Paths */
-#define MACFUSE_BUNDLE_PATH "/Library/Filesystems/fusefs.fs"
-#define MACFUSE_KEXT MACFUSE_BUNDLE_PATH "/Support/fusefs.kext"
-#define MACFUSE_LOAD_PROG MACFUSE_BUNDLE_PATH "/Support/load_fusefs"
-#define MACFUSE_MOUNT_PROG MACFUSE_BUNDLE_PATH "/Support/mount_fusefs"
+#define OSXFUSE_BUNDLE_PATH "/Library/Filesystems/osxfusefs.fs"
+#define OSXFUSE_KEXT OSXFUSE_BUNDLE_PATH "/Support/osxfusefs.kext"
+#define OSXFUSE_LOAD_PROG OSXFUSE_BUNDLE_PATH "/Support/load_osxfusefs"
+#define OSXFUSE_MOUNT_PROG OSXFUSE_BUNDLE_PATH "/Support/mount_osxfusefs"
#define SYSTEM_KEXTLOAD "/sbin/kextload"
#define SYSTEM_KEXTUNLOAD "/sbin/kextunload"
/* Compatible API version */
-#define MACFUSE_MIN_USER_VERSION_MAJOR 7
-#define MACFUSE_MIN_USER_VERSION_MINOR 5
+#define OSXFUSE_MIN_USER_VERSION_MAJOR 7
+#define OSXFUSE_MIN_USER_VERSION_MINOR 5
/* Device Interface */
/*
- * This is the prefix ("fuse" by default) of the name of a FUSE device node
- * in devfs. The suffix is the device number. "/dev/fuse0" is the first FUSE
+ * This is the prefix ("osxfuse" by default) of the name of a FUSE device node
+ * in devfs. The suffix is the device number. "/dev/osxfuse0" is the first FUSE
* device by default. If you change the prefix from the default to something
* else, the user-space FUSE library will need to know about it too.
*/
-#define MACFUSE_DEVICE_BASENAME "fuse"
+#define OSXFUSE_DEVICE_BASENAME "osxfuse"
/*
- * This is the number of /dev/fuse<n> nodes we will create. <n> goes from
- * 0 to (FUSE_NDEVICES - 1).
+ * This is the number of /dev/osxfuse<n> nodes we will create. <n> goes from
+ * 0 to (OSXFUSE_NDEVICES - 1).
*/
-#define MACFUSE_NDEVICES 24
+#define OSXFUSE_NDEVICES 24
/*
* This is the default block size of the virtual storage devices that are
@@ -131,13 +148,13 @@
/* User-Kernel IPC Buffer */
#define FUSE_MIN_USERKERNEL_BUFSIZE (128 * 1024)
-#define FUSE_MAX_USERKERNEL_BUFSIZE (4096 * 1024)
+#define FUSE_MAX_USERKERNEL_BUFSIZE (16 * 1024 * 1024)
#define FUSE_REASONABLE_XATTRSIZE FUSE_MIN_USERKERNEL_BUFSIZE
#endif /* KERNEL */
-#define FUSE_DEFAULT_USERKERNEL_BUFSIZE (4096 * 1024)
+#define FUSE_DEFAULT_USERKERNEL_BUFSIZE (16 * 1024 * 1024)
#define FUSE_LINK_MAX LINK_MAX
#define FUSE_UIO_BACKUP_MAX 8
diff --git a/contrib/macfuse/mount_darwin.c b/contrib/macfuse/mount_darwin.c
index c485583e96b..d1d1c34e761 100644
--- a/contrib/macfuse/mount_darwin.c
+++ b/contrib/macfuse/mount_darwin.c
@@ -1,5 +1,5 @@
/*
- * Derived from mount_bsd.c from the fuse distribution.
+ * Derived from mount_bsd.c from the fuse distribution.
*
* FUSE: Filesystem in Userspace
* Copyright (C) 2005-2006 Csaba Henk <csaba.henk@creo.hu>
@@ -34,324 +34,231 @@
#include "fuse_param.h"
#include "fuse_ioctl.h"
-#include "glusterfs.h"
-#include "logging.h"
-#include "common-utils.h"
+#include "glusterfs/glusterfs.h"
+#include "glusterfs/logging.h"
+#include "glusterfs/common-utils.h"
#define GFFUSE_LOGERR(...) \
gf_log ("glusterfs-fuse", GF_LOG_ERROR, ## __VA_ARGS__)
-static long
-fuse_os_version_major_np(void)
-{
- int ret = 0;
- long major = 0;
- char *c = NULL;
- struct utsname u;
- size_t oldlen;
-
- oldlen = sizeof(u.release);
-
- ret = sysctlbyname("kern.osrelease", u.release, &oldlen, NULL, 0);
- if (ret != 0) {
- return -1;
- }
-
- c = strchr(u.release, '.');
- if (c == NULL) {
- return -1;
- }
-
- *c = '\0';
-
- errno = 0;
- major = strtol(u.release, NULL, 10);
- if ((errno == EINVAL) || (errno == ERANGE)) {
- return -1;
- }
-
- return major;
-}
-
-static int
-fuse_running_under_rosetta(void)
-{
- int result = 0;
- int is_native = 1;
- size_t sz = sizeof(result);
-
- int ret = sysctlbyname("sysctl.proc_native", &result, &sz, NULL, (size_t)0);
- if ((ret == 0) && !result) {
- is_native = 0;
- }
-
- return !is_native;
-}
-
-static int
-loadkmod(void)
-{
- int result = -1;
- int pid, terminated_pid;
- union wait status;
- long major;
-
- major = fuse_os_version_major_np();
-
- if (major < 9) { /* not Mac OS X 10.5+ */
- return EINVAL;
- }
-
- pid = fork();
-
- if (pid == 0) {
- execl(MACFUSE_LOAD_PROG, MACFUSE_LOAD_PROG, NULL);
-
- /* exec failed */
- exit(ENOENT);
- }
-
- require_action(pid != -1, Return, result = errno);
-
- while ((terminated_pid = wait4(pid, (int *)&status, 0, NULL)) < 0) {
- /* retry if EINTR, else break out with error */
- if (errno != EINTR) {
- break;
- }
- }
-
- if ((terminated_pid == pid) && (WIFEXITED(status))) {
- result = WEXITSTATUS(status);
- } else {
- result = -1;
- }
-
-Return:
- check_noerr_string(result, strerror(errno));
-
- return result;
-}
-
int
gf_fuse_mount (const char *mountpoint, char *fsname, char *mnt_param,
- pid_t *mtab_pid /* not used on OS X */)
+ pid_t *mnt_pid, int status_fd) /* Not used on OS X */
{
- int fd, pid;
- int result;
- char *fdnam, *dev;
- const char *mountprog = MACFUSE_MOUNT_PROG;
- sig_t chldf;
+ int fd = 0;
+ int pid = 0;
+ int ret = 0;
+ char *fdnam = NULL;
+ char *dev = NULL;
+ char vstr[4];
+ unsigned vval = 0;
+ int i = 0;
+
+ const char *mountprog = OSXFUSE_MOUNT_PROG;
+ sig_t chldf = SIG_ERR;
+ char version[MAXHOSTNAMELEN + 1] = { 0 };
+ size_t version_len = MAXHOSTNAMELEN;
+ size_t version_len_desired = 0;
+ int r = 0;
+ char devpath[MAXPATHLEN] = { 0 };;
+
+ if (!mountpoint) {
+ gf_log ("glustefs-fuse", GF_LOG_ERROR,
+ "missing or invalid mount point");
+ goto err;
+ }
- /* mount_fusefs should not try to spawn the daemon */
- setenv("MOUNT_FUSEFS_SAFE", "1", 1);
+ /* mount_fusefs should not try to spawn the daemon */
+ setenv("MOUNT_FUSEFS_SAFE", "1", 1);
- /* to notify mount_fusefs it's called from lib */
- setenv("MOUNT_FUSEFS_CALL_BY_LIB", "1", 1);
+ /* to notify mount_fusefs it's called from lib */
+ setenv("MOUNT_FUSEFS_CALL_BY_LIB", "1", 1);
- if (!mountpoint) {
- fprintf(stderr, "missing or invalid mount point\n");
- return -1;
- }
+ chldf = signal(SIGCHLD, SIG_DFL); /* So that we can wait4() below. */
- if (fuse_running_under_rosetta()) {
- fprintf(stderr, "MacFUSE does not work under Rosetta\n");
- return -1;
- }
+ if (chldf == SIG_ERR) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "signal() returned SIG_ERR: %s",
+ strerror(errno));
+ goto err;
+ }
- chldf = signal(SIGCHLD, SIG_DFL); /* So that we can wait4() below. */
+ /* check for user<->kernel match. */
+ ret = sysctlbyname(SYSCTL_OSXFUSE_VERSION_NUMBER, version,
+ &version_len, NULL, (size_t)0);
+ if (ret != 0) {
+ gf_log ("glustefs-fuse", GF_LOG_ERROR,
+ "sysctlbyname() returned error: %s",
+ strerror(errno));
+ goto err;
+ }
- result = loadkmod();
- if (result == EINVAL)
- GFFUSE_LOGERR("OS X >= 10.5 (at least Leopard) required");
- else if (result == 0 || result == ENOENT || result == EBUSY) {
- /* Module loaded, but now need to check for user<->kernel match. */
+ /* sysctlbyname() includes the trailing '\0' in version_len */
+ version_len_desired = sizeof ("2.x.y");
- char version[MAXHOSTNAMELEN + 1] = { 0 };
- size_t version_len = MAXHOSTNAMELEN;
- size_t version_len_desired = 0;
+ if (version_len != version_len_desired) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "version length mismatch for OSXFUSE %s",
+ version);
+ ret = -1;
+ goto err;
+ }
- result = sysctlbyname(SYSCTL_MACFUSE_VERSION_NUMBER, version,
- &version_len, NULL, (size_t)0);
- if (result == 0) {
- /* sysctlbyname() includes the trailing '\0' in version_len */
- version_len_desired = strlen("2.x.y") + 1;
-
- if (version_len != version_len_desired)
- result = -1;
- } else
- strcpy(version, "?.?.?");
- if (result == 0) {
- char *ep;
- char vstr[4];
- unsigned vval;
- int i;
-
- for (i = 0; i < 3; i++)
+ for (i = 0; i < 3; i++)
vstr[i] = version[2*i];
- vstr[3] = '\0';
-
- vval = strtoul(vstr, &ep, 10);
- if (*ep || vval < 203 || vval > 217)
- result = -1;
- else
- gf_log("glusterfs-fuse", GF_LOG_INFO,
- "MacFUSE kext version %s", version);
- }
- if (result != 0)
- GFFUSE_LOGERR("MacFUSE version %s is not supported", version);
- } else
- GFFUSE_LOGERR("cannot load MacFUSE kext");
- if (result != 0)
- return -1;
-
- fdnam = getenv("FUSE_DEV_FD");
-
- if (fdnam) {
- char *ep;
-
- fd = strtol(fdnam, &ep, 10);
- if (*ep != '\0' || fd < 0) {
- GFFUSE_LOGERR("invalid value given in FUSE_DEV_FD");
- return -1;
+ vstr[3] = '\0';
+
+ vval = strtoul(vstr, NULL, 10);
+ if (vval < 264) {
+ GFFUSE_LOGERR("OSXFUSE version %s is not supported", version);
+ ret = -1;
+ goto err;
}
- goto mount;
- }
+ gf_log("glusterfs-fuse", GF_LOG_INFO,
+ "OSXFUSE kext version supported %s", version);
- dev = getenv("FUSE_DEV_NAME");
- if (dev) {
- if ((fd = open(dev, O_RDWR)) < 0) {
- GFFUSE_LOGERR("failed to open device (%s)", strerror(errno));
- return -1;
+ fdnam = getenv("FUSE_DEV_FD");
+ if (fdnam) {
+ fd = strtol(fdnam, NULL, 10);
+ if (fd < 0) {
+ GFFUSE_LOGERR("invalid value given in FUSE_DEV_FD");
+ ret = -1;
+ goto err;
+ }
+ goto mount;
}
- } else {
- int r, devidx = -1;
- char devpath[MAXPATHLEN];
-
- for (r = 0; r < MACFUSE_NDEVICES; r++) {
- snprintf(devpath, MAXPATHLEN - 1,
- _PATH_DEV MACFUSE_DEVICE_BASENAME "%d", r);
- fd = open(devpath, O_RDWR);
- if (fd >= 0) {
- dev = devpath;
- devidx = r;
- break;
- }
+
+ dev = getenv("FUSE_DEV_NAME");
+ if (!dev) {
+ for (r = 0; r < OSXFUSE_NDEVICES; r++) {
+ snprintf(devpath, MAXPATHLEN - 1,
+ _PATH_DEV OSXFUSE_DEVICE_BASENAME "%d", r);
+ if ((fd = open(devpath, O_RDWR)) < 0) {
+ GFFUSE_LOGERR("failed to open device %s (%s)",
+ devpath,
+ strerror(errno));
+ goto err;
+ }
+ dev = devpath;
+ goto mount;
+ }
}
- if (devidx == -1) {
- GFFUSE_LOGERR("failed to open device (%s)", strerror(errno));
- return -1;
+
+ fd = open(dev, O_RDWR);
+ if (fd < 0) {
+ GFFUSE_LOGERR("failed to open device %s (%s)", dev,
+ strerror(errno));
+ ret = -1;
+ goto err;
}
- }
mount:
- if (getenv("FUSE_NO_MOUNT") || ! mountpoint)
- goto out;
-
- signal(SIGCHLD, chldf);
-
- pid = fork();
-
- if (pid == -1) {
- GFFUSE_LOGERR("fork() failed (%s)", strerror(errno));
- close(fd);
- return -1;
- }
-
- if (pid == 0) {
+ signal(SIGCHLD, chldf);
pid = fork();
if (pid == -1) {
- GFFUSE_LOGERR("fork() failed (%s)", strerror(errno));
- close(fd);
- exit(1);
+ GFFUSE_LOGERR("fork() failed (%s)", strerror(errno));
+ ret = -1;
+ goto err;
}
if (pid == 0) {
- const char *argv[32];
- int a = 0;
- char *opts = NULL;
-
- if (asprintf(&opts, "%s,fssubtype=glusterfs", mnt_param) == -1) {
- GFFUSE_LOGERR("Out of memory");
- exit(1);
- }
-
- if (! fdnam)
- asprintf(&fdnam, "%d", fd);
-
- argv[a++] = mountprog;
- if (opts) {
- argv[a++] = "-o";
- argv[a++] = opts;
- }
- argv[a++] = fdnam;
- argv[a++] = mountpoint;
- argv[a++] = NULL;
-
- {
- char title[MAXPATHLEN + 1] = { 0 };
- u_int32_t len = MAXPATHLEN;
- int ret = proc_pidpath(getpid(), title, len);
- if (ret) {
- setenv("MOUNT_FUSEFS_DAEMON_PATH", title, 1);
+ pid = fork();
+ if (pid == -1) {
+ GFFUSE_LOGERR("fork() failed (%s)", strerror(errno));
+ ret = -1;
+ goto err;
}
- }
- execvp(mountprog, (char **) argv);
- GFFUSE_LOGERR("MacFUSE: failed to exec mount program (%s)", strerror(errno));
- exit(1);
- }
- _exit(0);
- }
-
-out:
- return fd;
+ if (pid == 0) {
+ const char *argv[32];
+ int a = 0;
+ char *opts = NULL;
+
+ if (asprintf(&opts, "%s,fssubtype=glusterfs",
+ mnt_param) == -1) {
+ GFFUSE_LOGERR("asprintf() error: %s",
+ strerror(errno));
+ ret = -1;
+ goto err;
+ }
+
+ if (!fdnam)
+ asprintf(&fdnam, "%d", fd);
+
+ argv[a++] = mountprog;
+ if (opts) {
+ argv[a++] = "-o";
+ argv[a++] = opts;
+ }
+ argv[a++] = fdnam;
+ argv[a++] = mountpoint;
+ argv[a++] = NULL;
+
+ {
+ char title[MAXPATHLEN + 1] = { 0 };
+ u_int32_t len = MAXPATHLEN;
+ int ret = proc_pidpath(getpid(), title, len);
+ if (ret) {
+ setenv("MOUNT_FUSEFS_DAEMON_PATH",
+ title, 1);
+ }
+ }
+ execvp(mountprog, (char **) argv);
+ GFFUSE_LOGERR("OSXFUSE: failed to exec mount"
+ " program (%s)", strerror(errno));
+ _exit(1);
+ }
+ _exit(0);
+ }
+ ret = fd;
+err:
+ if (ret == -1) {
+ if (fd > 0) {
+ close(fd);
+ }
+ }
+ return ret;
}
void
gf_fuse_unmount(const char *mountpoint, int fd)
{
- int ret;
- struct stat sbuf;
- char dev[128];
- char resolved_path[PATH_MAX];
- char *ep, *rp = NULL;
-
- unsigned int hs_complete = 0;
+ int ret;
+ struct stat sbuf;
+ char dev[128];
+ char resolved_path[PATH_MAX];
+ char *ep, *rp = NULL;
- ret = ioctl(fd, FUSEDEVIOCGETHANDSHAKECOMPLETE, &hs_complete);
- if (ret || !hs_complete) {
- return;
- }
- /* XXX does this have any use here? */
- ret = ioctl(fd, FUSEDEVIOCSETDAEMONDEAD, &fd);
- if (ret) {
- return;
- }
+ unsigned int hs_complete = 0;
- if (fstat(fd, &sbuf) == -1) {
- return;
- }
+ ret = ioctl(fd, FUSEDEVIOCGETHANDSHAKECOMPLETE, &hs_complete);
+ if (ret || !hs_complete) {
+ return;
+ }
- devname_r(sbuf.st_rdev, S_IFCHR, dev, 128);
+ if (fstat(fd, &sbuf) == -1) {
+ return;
+ }
- if (strncmp(dev, MACFUSE_DEVICE_BASENAME,
- sizeof(MACFUSE_DEVICE_BASENAME) - 1)) {
- return;
- }
+ devname_r(sbuf.st_rdev, S_IFCHR, dev, 128);
- strtol(dev + 4, &ep, 10);
- if (*ep != '\0') {
- return;
- }
+ if (strncmp(dev, OSXFUSE_DEVICE_BASENAME,
+ sizeof(OSXFUSE_DEVICE_BASENAME) - 1)) {
+ return;
+ }
- rp = realpath(mountpoint, resolved_path);
- if (rp) {
- ret = unmount(resolved_path, 0);
- }
+ strtol(dev + sizeof(OSXFUSE_DEVICE_BASENAME) - 1, &ep, 10);
+ if (*ep != '\0') {
+ return;
+ }
- close(fd);
+ rp = realpath(mountpoint, resolved_path);
+ if (rp) {
+ ret = unmount(resolved_path, 0);
+ }
- return;
+ close(fd);
+ return;
}
diff --git a/contrib/mount/mntent.c b/contrib/mount/mntent.c
new file mode 100644
index 00000000000..9a7e5f39bdb
--- /dev/null
+++ b/contrib/mount/mntent.c
@@ -0,0 +1,260 @@
+/*
+ * Copyright (c) 1980, 1989, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ * Copyright (c) 2001
+ * David Rufino <daverufino@btinternet.com>
+ * Copyright (c) 2014
+ * Red Hat, Inc. <http://www.redhat.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if !defined(GF_LINUX_HOST_OS)
+
+#include <stdlib.h>
+#include <string.h>
+#include <sys/param.h>
+#include <sys/ucred.h>
+#include <sys/mount.h>
+#include "mntent_compat.h"
+
+#ifdef __NetBSD__
+typedef struct statvfs gf_statfs_t;
+#else
+typedef struct statfs gf_statfs_t;
+#endif
+
+typedef struct _mntent_state {
+ struct mntent mntent;
+ gf_statfs_t *statfs;
+ int count;
+ int pos;
+ /* A buffer big enough to store all defined flags as a string.
+ * Increase it if necessary when more flags are defined. */
+ char buf[256];
+} mntent_state_t;
+
+typedef struct _mntflag {
+ unsigned long value;
+ const char *on;
+ const char *off;
+} mntflag_t;
+
+static mntflag_t mntflags[] = {
+ { MNT_RDONLY, "ro", "rw" },
+ { MNT_SYNCHRONOUS, "sync", NULL },
+ { MNT_NOEXEC, "noexec", NULL },
+ { MNT_NOSUID, "nosuid", NULL },
+#if !defined(__FreeBSD__)
+ { MNT_NODEV, "nodev", NULL },
+#endif /* __FreeBSD__ */
+ { MNT_UNION, "union", NULL },
+ { MNT_ASYNC, "async", NULL },
+#if !defined(GF_DARWIN_HOST_OS)
+ { MNT_NOATIME, "noatime", NULL },
+#if !defined(__NetBSD__)
+ { MNT_NOCLUSTERR, "noclusterr", NULL },
+ { MNT_NOCLUSTERW, "noclusterw", NULL },
+ { MNT_NOSYMFOLLOW, "nosymfollow", NULL },
+ { MNT_SUIDDIR, "suiddir", NULL },
+#endif /* !__NetBSD__ */
+#endif /* !GF_DARWIN_HOST_OS */
+ { 0, NULL, NULL }
+};
+
+char *
+hasmntopt (const struct mntent *mnt, const char *option)
+{
+ char *opt, *optbuf;
+ int len;
+
+ optbuf = strdup(mnt->mnt_opts);
+ if (optbuf == NULL) {
+ return NULL;
+ }
+
+ opt = optbuf;
+ len = 0;
+ while (*opt) {
+ while (opt[len] != 0) {
+ if (opt[len] == ' ') {
+ opt[len++] = 0;
+ break;
+ }
+ len++;
+ }
+ if ((*opt != 0) && (strcasecmp(opt, option) == 0)) {
+ break;
+ }
+ opt += len;
+ len = 0;
+ }
+ free(optbuf);
+ if (len == 0) {
+ return NULL;
+ }
+
+ return opt - optbuf + mnt->mnt_opts;
+}
+
+static int
+writeopt(const char *text, char *buf, int buflen, int pos)
+{
+ int len;
+
+ /* buflen must be > 0 */
+
+ if (text == NULL) {
+ return pos;
+ }
+
+ buf += pos;
+ if (pos > 0) {
+ /* We are sure we have at least one byte to store the space.
+ * We don't need to check buflen here. */
+ *buf++ = ' ';
+ pos++;
+ }
+ len = strlen(text) + 1;
+ pos += len;
+ if (pos >= buflen) {
+ /* There won't be enough space for the text and the
+ * terminating null character. We copy as much as we can
+ * of the text and mark the end of the string with '...' */
+ memcpy(buf, text, buflen - pos + len);
+ if (buflen > 3) {
+ strcpy(buf + buflen - 4, "...");
+ } else {
+ strncpy(buf, "...", buflen - 1);
+ buf[buflen - 1] = 0;
+ }
+ pos = buflen;
+ } else {
+ memcpy(buf, text, len);
+ }
+
+ return pos;
+}
+
+static char *
+flags2opts (int flags, char *buf, int buflen)
+{
+ char other[16];
+ mntflag_t *flg;
+ int pos;
+
+ if (buflen == 0) {
+ return NULL;
+ }
+
+ pos = 0;
+ for (flg = mntflags; flg->value != 0; flg++) {
+ pos = writeopt((flags & flg->value) == 0 ? flg->off : flg->on,
+ buf, buflen, pos);
+ flags &= ~flg->value;
+ }
+
+ if (flags != 0) {
+ sprintf(other, "[0x%x]", flags);
+ writeopt(other, buf, buflen, pos);
+ }
+
+ return buf;
+}
+
+static void
+statfs_to_mntent (struct mntent *mntent, gf_statfs_t *mntbuf, char *buf,
+ int buflen)
+{
+ int f_flags;
+
+ mntent->mnt_fsname = mntbuf->f_mntfromname;
+ mntent->mnt_dir = mntbuf->f_mntonname;
+ mntent->mnt_type = mntbuf->f_fstypename;
+
+#ifdef __NetBSD__
+ f_flags = mntbuf->f_flag;
+#else
+ f_flags = mntbuf->f_flags;
+#endif
+ mntent->mnt_opts = flags2opts (f_flags, buf, buflen);
+
+ mntent->mnt_freq = mntent->mnt_passno = 0;
+}
+
+struct mntent *
+getmntent_r (FILE *fp, struct mntent *mntent, char *buf, int buflen)
+{
+ mntent_state_t *state = (mntent_state_t *)fp;
+
+ if (state->pos >= state->count) {
+ return NULL;
+ }
+
+ statfs_to_mntent(mntent, &state->statfs[state->pos++], buf, buflen);
+
+ return mntent;
+}
+
+struct mntent *
+getmntent (FILE *fp)
+{
+ mntent_state_t *state = (mntent_state_t *)fp;
+
+ return getmntent_r(fp, &state->mntent, state->buf,
+ sizeof(state->buf));
+}
+
+FILE *
+setmntent (const char *filename, const char *type)
+{
+ mntent_state_t *state;
+
+ /* We don't really need to access any file so we'll use the FILE* as
+ * a fake file to store state information.
+ */
+
+ state = malloc(sizeof(mntent_state_t));
+ if (state != NULL) {
+ state->pos = 0;
+ state->count = getmntinfo(&state->statfs, MNT_NOWAIT);
+ }
+
+ return (FILE *)state;
+}
+
+int
+endmntent (FILE *fp)
+{
+ free(fp);
+
+ return 1; /* endmntent() always returns 1 */
+}
+
+#endif /* !GF_LINUX_HOST_OS */
diff --git a/contrib/mount/mntent_compat.h b/contrib/mount/mntent_compat.h
new file mode 100644
index 00000000000..ca82e9aa60f
--- /dev/null
+++ b/contrib/mount/mntent_compat.h
@@ -0,0 +1,37 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _MNTENT_H
+#define _MNTENT_H
+
+#if !defined(GF_LINUX_HOST_OS)
+#include <stdio.h>
+
+struct mntent {
+ char *mnt_fsname;
+ char *mnt_dir;
+ char *mnt_type;
+ char *mnt_opts;
+ int mnt_freq;
+ int mnt_passno;
+};
+
+struct mntent *getmntent (FILE *fp);
+struct mntent *getmntent_r (FILE *fp, struct mntent *result,
+ char *buffer, int bufsize);
+FILE *setmntent (const char *filename, const char *type);
+int endmntent(FILE *fp);
+char * hasmntopt (const struct mntent *mnt, const char *option);
+
+/* Dummy - /etc/mtab has no meaning on OSX platform */
+#define _PATH_MOUNTED "/etc/mtab"
+
+#endif /* GF_DARWIN_HOST_OS || __NetBSD__ */
+#endif /* _MNTENT_H */
diff --git a/contrib/stdlib/gf_mkostemp.c b/contrib/stdlib/gf_mkostemp.c
deleted file mode 100644
index 988300adb1c..00000000000
--- a/contrib/stdlib/gf_mkostemp.c
+++ /dev/null
@@ -1,110 +0,0 @@
-/* Borrowed from glibc-2.16/sysdeps/posix/tempname.c */
-
-#include <stdlib.h>
-#include <string.h>
-#include <sys/stat.h>
-#include <unistd.h>
-#include <errno.h>
-#include <sys/time.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <time.h>
-#include <inttypes.h>
-
-/* Copyright (C) 1991-2001, 2006, 2007, 2009 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-static const char letters[] =
-"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
-
-/* Generate a temporary file name based on TMPL. TMPL must match the
- rules for mk[s]temp (i.e. end in "XXXXXX", possibly with a suffix).
-*/
-
-int
-gf_mkostemp (char *tmpl, int suffixlen, int flags)
-{
- int len;
- char *XXXXXX;
- static uint64_t value;
- uint64_t random_time_bits;
- unsigned int count;
- int fd = -1;
-
- /* A lower bound on the number of temporary files to attempt to
- generate. The maximum total number of temporary file names that
- can exist for a given template is 62**6. It should never be
- necessary to try all these combinations. Instead if a reasonable
- number of names is tried (we define reasonable as 62**3) fail to
- give the system administrator the chance to remove the problems. */
-#define ATTEMPTS_MIN (62 * 62 * 62)
-
- /* The number of times to attempt to generate a temporary file. To
- conform to POSIX, this must be no smaller than TMP_MAX. */
-#if ATTEMPTS_MIN < TMP_MAX
- unsigned int attempts = TMP_MAX;
-#else
- unsigned int attempts = ATTEMPTS_MIN;
-#endif
-
- len = strlen (tmpl);
- if (len < 6 + suffixlen || memcmp (&tmpl[len - 6 - suffixlen],
- "XXXXXX", 6))
- return -1;
-
- /* This is where the Xs start. */
- XXXXXX = &tmpl[len - 6 - suffixlen];
-
- /* Get some more or less random data. */
-# if HAVE_GETTIMEOFDAY
- struct timeval tv;
- gettimeofday (&tv, NULL);
- random_time_bits = ((uint64_t) tv.tv_usec << 16) ^ tv.tv_sec;
-# else
- random_time_bits = time (NULL);
-# endif
-
- value += random_time_bits ^ getpid ();
-
- for (count = 0; count < attempts; value += 7777, ++count) {
- uint64_t v = value;
-
- /* Fill in the random bits. */
- XXXXXX[0] = letters[v % 62];
- v /= 62;
- XXXXXX[1] = letters[v % 62];
- v /= 62;
- XXXXXX[2] = letters[v % 62];
- v /= 62;
- XXXXXX[3] = letters[v % 62];
- v /= 62;
- XXXXXX[4] = letters[v % 62];
- v /= 62;
- XXXXXX[5] = letters[v % 62];
-
- fd = open (tmpl, (flags & ~O_ACCMODE)
- | O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
-
- if (fd >= 0)
- return fd;
- else if (errno != EEXIST)
- return -1;
- }
-
- /* We got out of the loop because we ran out of combinations to try. */
- return -1;
-}
diff --git a/contrib/timer-wheel/find_last_bit.c b/contrib/timer-wheel/find_last_bit.c
new file mode 100644
index 00000000000..192fee802a8
--- /dev/null
+++ b/contrib/timer-wheel/find_last_bit.c
@@ -0,0 +1,61 @@
+/* bit search implementation
+ *
+ * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * Copyright (C) 2008 IBM Corporation
+ * 'find_last_bit' is written by Rusty Russell <rusty@rustcorp.com.au>
+ * (Inspired by David Howell's find_next_bit implementation)
+ *
+ * Rewritten by Yury Norov <yury.norov@gmail.com> to decrease
+ * size and improve performance, 2015.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+/**
+ * @find_last_bit
+ * optimized implementation of find last bit in
+ */
+
+#ifndef BITS_PER_LONG
+#ifdef __LP64__
+#define BITS_PER_LONG 64
+#else
+#define BITS_PER_LONG 32
+#endif
+#endif
+
+unsigned long gw_tw_fls (unsigned long word)
+{
+ int num = BITS_PER_LONG;
+
+#if BITS_PER_LONG == 64
+ if (!(word & (~0ul << 32))) {
+ num -= 32;
+ word <<= 32;
+ }
+#endif
+ if (!(word & (~0ul << (BITS_PER_LONG-16)))) {
+ num -= 16;
+ word <<= 16;
+ }
+ if (!(word & (~0ul << (BITS_PER_LONG-8)))) {
+ num -= 8;
+ word <<= 8;
+ }
+ if (!(word & (~0ul << (BITS_PER_LONG-4)))) {
+ num -= 4;
+ word <<= 4;
+ }
+ if (!(word & (~0ul << (BITS_PER_LONG-2)))) {
+ num -= 2;
+ word <<= 2;
+ }
+ if (!(word & (~0ul << (BITS_PER_LONG-1))))
+ num -= 1;
+ return num;
+}
diff --git a/contrib/timer-wheel/timer-wheel.c b/contrib/timer-wheel/timer-wheel.c
new file mode 100644
index 00000000000..58e0607bf0c
--- /dev/null
+++ b/contrib/timer-wheel/timer-wheel.c
@@ -0,0 +1,374 @@
+/*
+ * linux/kernel/timer.c
+ *
+ * Kernel internal timers
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ */
+/*
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/select.h>
+
+#include "timer-wheel.h"
+
+static inline void
+__gf_tw_add_timer (struct tvec_base *base, struct gf_tw_timer_list *timer)
+{
+ int i;
+ unsigned long idx;
+ unsigned long expires;
+ struct list_head *vec;
+
+ expires = timer->expires;
+
+ idx = expires - base->timer_sec;
+
+ if (idx < TVR_SIZE) {
+ i = expires & TVR_MASK;
+ vec = base->tv1.vec + i;
+ } else if (idx < 1 << (TVR_BITS + TVN_BITS)) {
+ i = (expires >> TVR_BITS) & TVN_MASK;
+ vec = base->tv2.vec + i;
+ } else if (idx < 1 << (TVR_BITS + 2*TVN_BITS)) {
+ i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK;
+ vec = base->tv3.vec + i;
+ } else if (idx < 1 << (TVR_BITS + 3*TVN_BITS)) {
+ i = (expires >> (TVR_BITS + 2*TVN_BITS)) & TVN_MASK;
+ vec = base->tv4.vec + i;
+ } else if (idx < 0) {
+ vec = base->tv1.vec + (base->timer_sec & TVR_MASK);
+ } else {
+ i = (expires >> (TVR_BITS + 3*TVN_BITS)) & TVN_MASK;
+ vec = base->tv5.vec + i;
+ }
+
+ list_add_tail (&timer->entry, vec);
+}
+
+unsigned long gf_tw_find_last_bit(const unsigned long *, unsigned long);
+
+#if defined(__GNUC__) || defined(__clang__)
+static inline unsigned long gf_tw_fls (unsigned long word)
+{
+ return BITS_PER_LONG - __builtin_clzl(word);
+}
+#else
+extern unsigned long gf_tw_fls (unsigned long);
+#endif
+
+static inline unsigned long
+apply_slack(struct tvec_base *base, struct gf_tw_timer_list *timer)
+{
+ long delta;
+ unsigned long mask, expires, expires_limit;
+
+ expires = timer->expires;
+
+ delta = expires - base->timer_sec;
+ if (delta < 256)
+ return expires;
+
+ expires_limit = expires + delta / 256;
+ mask = expires ^ expires_limit;
+ if (mask == 0)
+ return expires;
+
+ int bit = gf_tw_fls (mask);
+ mask = (1UL << bit) - 1;
+
+ expires_limit = expires_limit & ~(mask);
+ return expires_limit;
+}
+
+static inline void
+__gf_tw_detach_timer (struct gf_tw_timer_list *timer)
+{
+ struct list_head *entry = &timer->entry;
+
+ list_del (entry);
+ entry->next = NULL;
+}
+
+static inline int
+cascade (struct tvec_base *base, struct tvec *tv, int index)
+{
+ struct gf_tw_timer_list *timer, *tmp;
+ struct list_head tv_list;
+
+ list_replace_init (tv->vec + index, &tv_list);
+
+ list_for_each_entry_safe (timer, tmp, &tv_list, entry) {
+ __gf_tw_add_timer (base, timer);
+ }
+
+ return index;
+}
+
+#define INDEX(N) ((base->timer_sec >> (TVR_BITS + N * TVN_BITS)) & TVN_MASK)
+
+/**
+ * run expired timers
+ */
+static inline void
+run_timers (struct tvec_base *base)
+{
+ unsigned long index, call_time;
+ struct gf_tw_timer_list *timer;
+
+ struct list_head work_list;
+ struct list_head *head = &work_list;
+
+ pthread_spin_lock (&base->lock);
+ {
+ index = base->timer_sec & TVR_MASK;
+
+ if (!index &&
+ (!cascade (base, &base->tv2, INDEX(0))) &&
+ (!cascade (base, &base->tv3, INDEX(1))) &&
+ (!cascade (base, &base->tv4, INDEX(2))))
+ cascade (base, &base->tv5, INDEX(3));
+
+ call_time = base->timer_sec++;
+ list_replace_init (base->tv1.vec + index, head);
+ while (!list_empty(head)) {
+ void (*fn)(struct gf_tw_timer_list *, void *, unsigned long);
+ void *data;
+
+ timer = list_first_entry (head, struct gf_tw_timer_list, entry);
+ fn = timer->function;
+ data = timer->data;
+
+ __gf_tw_detach_timer (timer);
+ pthread_spin_unlock(&base->lock);
+ {
+ /* It is required to run the actual function outside
+ of the locked zone, so we don't bother about
+ locked operations inside that function */
+ fn(timer, data, call_time);
+ }
+ pthread_spin_lock(&base->lock);
+ }
+ }
+ pthread_spin_unlock (&base->lock);
+
+}
+
+void *runner (void *arg)
+{
+ struct timeval tv = {0,};
+ struct tvec_base *base = arg;
+
+ while (1) {
+ run_timers (base);
+
+ tv.tv_sec = 1;
+ tv.tv_usec = 0;
+ (void) select (0, NULL, NULL, NULL, &tv);
+ }
+
+ return NULL;
+
+}
+
+static inline int timer_pending (struct gf_tw_timer_list *timer)
+{
+ struct list_head *entry = &timer->entry;
+
+ return (entry->next != NULL);
+}
+
+static inline int __detach_if_pending (struct gf_tw_timer_list *timer)
+{
+ if (!timer_pending (timer))
+ return 0;
+
+ __gf_tw_detach_timer (timer);
+ return 1;
+}
+
+static inline int __mod_timer (struct tvec_base *base,
+ struct gf_tw_timer_list *timer, int pending_only)
+{
+ int ret = 0;
+
+ ret = __detach_if_pending (timer);
+ if (!ret && pending_only)
+ goto done;
+
+ ret = 1;
+ __gf_tw_add_timer (base, timer);
+
+ done:
+ return ret;
+}
+
+/* interface */
+
+/**
+ * Add a timer in the timer wheel
+ */
+void gf_tw_add_timer (struct tvec_base *base, struct gf_tw_timer_list *timer)
+{
+ pthread_spin_lock (&base->lock);
+ {
+ timer->expires += base->timer_sec;
+ timer->expires = apply_slack (base, timer);
+ __gf_tw_add_timer (base, timer);
+ }
+ pthread_spin_unlock (&base->lock);
+}
+
+/**
+ * Remove a timer from the timer wheel
+ */
+int gf_tw_del_timer (struct tvec_base *base, struct gf_tw_timer_list *timer)
+{
+ int ret = 0;
+
+ pthread_spin_lock (&base->lock);
+ {
+ if (timer_pending (timer)) {
+ ret = 1;
+ __gf_tw_detach_timer (timer);
+ }
+ }
+ pthread_spin_unlock (&base->lock);
+
+ return ret;
+}
+
+int gf_tw_mod_timer_pending (struct tvec_base *base,
+ struct gf_tw_timer_list *timer,
+ unsigned long expires)
+{
+ int ret = 1;
+
+ pthread_spin_lock (&base->lock);
+ {
+ timer->expires = expires + base->timer_sec;
+ timer->expires = apply_slack (base, timer);
+
+ ret = __mod_timer (base, timer, 1);
+ }
+ pthread_spin_unlock (&base->lock);
+
+ return ret;
+}
+
+int gf_tw_mod_timer (struct tvec_base *base,
+ struct gf_tw_timer_list *timer, unsigned long expires)
+{
+ int ret = 1;
+
+ pthread_spin_lock (&base->lock);
+ {
+ /* fast path optimization */
+ if (timer_pending (timer) && timer->expires == expires)
+ goto unblock;
+
+ timer->expires = expires + base->timer_sec;
+ timer->expires = apply_slack (base, timer);
+
+ ret = __mod_timer (base, timer, 0);
+ }
+ unblock:
+ pthread_spin_unlock (&base->lock);
+
+ return ret;
+}
+
+int gf_tw_cleanup_timers (struct tvec_base *base)
+{
+ int ret = 0;
+ void *res = NULL;
+
+ /* terminate runner */
+ ret = pthread_cancel (base->runner);
+ if (ret != 0)
+ goto error_return;
+ ret = pthread_join (base->runner, &res);
+ if (ret != 0)
+ goto error_return;
+ if (res != PTHREAD_CANCELED)
+ goto error_return;
+
+ /* destroy lock */
+ ret = pthread_spin_destroy (&base->lock);
+ if (ret != 0)
+ goto error_return;
+
+ /* deallocated timer base */
+ free (base);
+ return 0;
+
+ error_return:
+ return -1;
+}
+
+/**
+ * Initialize various timer wheel lists and spawn a thread that
+ * invokes run_timers()
+ */
+struct tvec_base *gf_tw_init_timers ()
+{
+ int j = 0;
+ int ret = 0;
+ struct timeval tv = {0,};
+ struct tvec_base *base = NULL;
+
+ base = malloc (sizeof (*base));
+ if (!base)
+ goto error_return;
+
+ ret = pthread_spin_init (&base->lock, 0);
+ if (ret != 0)
+ goto error_dealloc;
+
+ for (j = 0; j < TVN_SIZE; j++) {
+ INIT_LIST_HEAD (base->tv5.vec + j);
+ INIT_LIST_HEAD (base->tv4.vec + j);
+ INIT_LIST_HEAD (base->tv3.vec + j);
+ INIT_LIST_HEAD (base->tv2.vec + j);
+ }
+
+ for (j = 0; j < TVR_SIZE; j++) {
+ INIT_LIST_HEAD (base->tv1.vec + j);
+ }
+
+ ret = gettimeofday (&tv, 0);
+ if (ret < 0)
+ goto destroy_lock;
+ base->timer_sec = tv.tv_sec;
+
+ ret = pthread_create (&base->runner, NULL, runner, base);
+ if (ret != 0)
+ goto destroy_lock;
+ return base;
+
+ destroy_lock:
+ (void) pthread_spin_destroy (&base->lock);
+ error_dealloc:
+ free (base);
+ error_return:
+ return NULL;
+}
diff --git a/contrib/timer-wheel/timer-wheel.h b/contrib/timer-wheel/timer-wheel.h
new file mode 100644
index 00000000000..5637735ec22
--- /dev/null
+++ b/contrib/timer-wheel/timer-wheel.h
@@ -0,0 +1,77 @@
+/*
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+*/
+
+#ifndef __TIMER_WHEEL_H
+#define __TIMER_WHEEL_H
+
+#include "glusterfs/locking.h"
+
+#include "glusterfs/list.h"
+
+#define TVR_BITS 8
+#define TVN_BITS 6
+#define TVR_SIZE (1 << TVR_BITS)
+#define TVN_SIZE (1 << TVN_BITS)
+#define TVR_MASK (TVR_SIZE - 1)
+#define TVN_MASK (TVN_SIZE - 1)
+
+#define BITS_PER_LONG 64
+
+struct tvec {
+ struct list_head vec[TVN_SIZE];
+};
+
+struct tvec_root {
+ struct list_head vec[TVR_SIZE];
+};
+
+struct tvec_base {
+ pthread_spinlock_t lock; /* base lock */
+
+ pthread_t runner; /* run_timer() */
+
+ unsigned long timer_sec; /* time counter */
+
+ struct tvec_root tv1;
+ struct tvec tv2;
+ struct tvec tv3;
+ struct tvec tv4;
+ struct tvec tv5;
+};
+
+struct gf_tw_timer_list {
+ void *data;
+ unsigned long expires;
+
+ /** callback routine */
+ void (*function)(struct gf_tw_timer_list *, void *, unsigned long);
+
+ struct list_head entry;
+};
+
+/** The API! */
+struct tvec_base *gf_tw_init_timers ();
+int gf_tw_cleanup_timers (struct tvec_base *);
+void gf_tw_add_timer (struct tvec_base *, struct gf_tw_timer_list *);
+int gf_tw_del_timer (struct tvec_base *, struct gf_tw_timer_list *);
+
+int gf_tw_mod_timer_pending (struct tvec_base *,
+ struct gf_tw_timer_list *, unsigned long);
+
+int gf_tw_mod_timer (struct tvec_base *,
+ struct gf_tw_timer_list *, unsigned long);
+
+#endif
diff --git a/contrib/umountd/Makefile.am b/contrib/umountd/Makefile.am
new file mode 100644
index 00000000000..b39e000f5aa
--- /dev/null
+++ b/contrib/umountd/Makefile.am
@@ -0,0 +1,11 @@
+sbin_PROGRAMS = umountd
+umountd_SOURCES = umountd.c
+umountd_CFLAGS = $(GF_CFLAGS) -DDATADIR=\"$(localstatedir)\"
+umountd_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la ${GF_LDADD}
+umountd_LDFLAGS = $(GF_LDFLAGS)
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/contrib/umountd/umountd.c b/contrib/umountd/umountd.c
new file mode 100644
index 00000000000..3f933ecb554
--- /dev/null
+++ b/contrib/umountd/umountd.c
@@ -0,0 +1,255 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <errno.h>
+#include <dirent.h>
+#include <limits.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+
+#include "glusterfs/glusterfs.h"
+#include "glusterfs/globals.h"
+#include "glusterfs/logging.h"
+#include "glusterfs/syscall.h"
+#include "glusterfs/mem-types.h"
+
+static void
+usage (void)
+{
+ fprintf (stderr, "Usage: umountd [-d dev] [-t timeout] [-r] path\n");
+ exit (EXIT_FAILURE);
+}
+
+
+static int
+sanity_check (char *path, dev_t *devp)
+{
+ struct stat st;
+ struct stat parent_st;
+ int ret;
+ char pathtmp[PATH_MAX];
+ char *parent;
+
+ if (path == NULL)
+ usage ();
+
+ if ((ret = stat (path, &st)) != 0) {
+ switch (errno) {
+ case ENOTCONN:
+ /* volume is stopped */
+ break;
+ default:
+ gf_log ("umountd", GF_LOG_ERROR,
+ "Cannot access %s: %s\n",
+ path, strerror (errno));
+ goto out;
+ }
+ }
+
+ /* If dev was not specified, get it from path */
+ if (*devp == -1 && ret == 0)
+ *devp = st.st_dev;
+
+ snprintf (pathtmp, PATH_MAX, "%s", path);
+ parent = dirname (pathtmp);
+
+ if (stat (parent, &parent_st) != 0) {
+ gf_log ("umountd", GF_LOG_ERROR,
+ "Cannot access %s: %s\n",
+ parent, strerror (errno));
+ goto out;
+ }
+
+ if (st.st_dev == parent_st.st_dev) {
+ gf_log ("umountd", GF_LOG_ERROR,
+ "No filesystem mounted on %s\n", path);
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+static void
+log_rotate (int signum)
+{
+ gf_log_logrotate (1);
+
+ if (signal (SIGHUP, *log_rotate) == SIG_ERR) {
+ gf_log ("umountd", GF_LOG_ERROR, "signal () failed");
+ exit (EXIT_FAILURE);
+ }
+
+ return;
+}
+
+static int
+logging_init (void)
+{
+ glusterfs_ctx_t *ctx;
+ char log_file[PATH_MAX];
+ int ret = -1;
+
+ ctx = glusterfs_ctx_new ();
+ if (!ctx) {
+ fprintf (stderr, "glusterfs_ctx_new failed\n");
+ goto out;
+ }
+
+ ret = glusterfs_globals_init (ctx);
+ if (ret) {
+ fprintf (stderr, "glusterfs_globals_init failed\n");
+ goto out;
+ }
+
+ THIS->ctx = ctx;
+ xlator_mem_acct_init (THIS, gf_common_mt_end);
+
+ snprintf (log_file, PATH_MAX,
+ "%s/umountd.log", DEFAULT_LOG_FILE_DIRECTORY);
+
+ ret = gf_log_init (ctx, log_file, "umountd");
+ if (ret) {
+ fprintf (stderr, "gf_log_init failed\n");
+ goto out;
+ }
+
+ if (signal (SIGHUP, *log_rotate) == SIG_ERR) {
+ gf_log ("umountd", GF_LOG_ERROR, "signal () failed");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static int
+umountd_async (char *path, dev_t dev, int frmdir, int timeout)
+{
+ int ret = -1;
+ struct stat stbuf = {0, };
+ int unmount_ret = 0;
+
+ do {
+ unmount_ret = unmount (path, 0);
+ if (unmount_ret == 0)
+ gf_log ("umountd", GF_LOG_INFO, "Unmounted %s", path);
+
+ if (unmount_ret != 0 && errno != EBUSY) {
+ gf_log ("umountd", GF_LOG_WARNING,
+ "umount %s failed: %s",
+ path, strerror (errno));
+ }
+
+ ret = sys_lstat (path, &stbuf);
+ if (ret != 0) {
+ gf_log ("umountd", GF_LOG_WARNING,
+ "Cannot stat device from %s",
+ path, strerror (errno));
+ break;
+ }
+
+ if (stbuf.st_dev != dev) {
+ if (unmount_ret != 0)
+ gf_log ("umountd", GF_LOG_INFO,
+ "device mismatch "
+ "(expect %lld, found %lld), "
+ "someone else unmounted %s",
+ dev, stbuf.st_dev, path);
+ ret = 0;
+ break;
+ }
+
+ sleep (timeout);
+ } while (1/*CONSTCOND*/);
+
+ if (ret) {
+ gf_log ("umountd", GF_LOG_ERROR,
+ "Asynchronous unmount of %s failed: %s",
+ path, strerror (errno));
+ } else {
+ if (frmdir) {
+ ret = rmdir (path);
+ if (ret)
+ gf_log ("umountd", GF_LOG_WARNING,
+ "rmdir %s failed: %s",
+ path, strerror (errno));
+ else
+ gf_log ("umountd", GF_LOG_INFO,
+ "Removed %s", path);
+ }
+ }
+
+ return ret;
+}
+
+int
+main (int argc, char **argv)
+{
+ char *path = NULL;
+ dev_t dev = -1;
+ int frmdir = 0;
+ int timeout = 30;
+ int f;
+
+ while ((f = getopt (argc, argv, "d:rt:")) != -1) {
+ switch (f) {
+ case 'p':
+ path = optarg;
+ break;
+ case 'd':
+ dev = strtoll (optarg, NULL, 10);
+ break;
+ case 't':
+ timeout = atoi (optarg);
+ break;
+ case 'r':
+ frmdir = 1;
+ break;
+ default:
+ usage ();
+ break;
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ if (argc != 1)
+ usage ();
+
+ path = argv[0];
+
+ if (logging_init () != 0)
+ exit (EXIT_FAILURE);
+
+ if (sanity_check (path, &dev) != 0)
+ exit (EXIT_FAILURE);
+
+ if (daemon (0, 0) != 0)
+ exit (EXIT_FAILURE);
+
+ if (umountd_async (path, dev, frmdir, timeout) != 0)
+ exit (EXIT_FAILURE);
+
+ return EXIT_SUCCESS;
+}
diff --git a/contrib/userspace-rcu/rculist-extra.h b/contrib/userspace-rcu/rculist-extra.h
new file mode 100644
index 00000000000..274cf9f9d7a
--- /dev/null
+++ b/contrib/userspace-rcu/rculist-extra.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2002 Free Software Foundation, Inc.
+ * (originally part of the GNU C Library)
+ * Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
+ *
+ * Copyright (C) 2009 Pierre-Marc Fournier
+ * Conversion to RCU list.
+ * Copyright (C) 2010 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef URCU_RCULIST_EXTRA_H
+#define URCU_RCULIST_EXTRA_H
+/* Copying this definition from liburcu-0.8 as liburcu-0.7 does not have this
+ * particular list api
+ */
+/* Add new element at the tail of the list. */
+
+static inline
+void cds_list_add_tail_rcu(struct cds_list_head *newp,
+ struct cds_list_head *head)
+{
+ newp->next = head;
+ newp->prev = head->prev;
+ rcu_assign_pointer(head->prev->next, newp);
+ head->prev = newp;
+}
+
+#endif
diff --git a/contrib/userspace-rcu/static-wfcqueue.h b/contrib/userspace-rcu/static-wfcqueue.h
new file mode 100644
index 00000000000..37d14ad674b
--- /dev/null
+++ b/contrib/userspace-rcu/static-wfcqueue.h
@@ -0,0 +1,685 @@
+#ifndef _URCU_WFCQUEUE_STATIC_H
+#define _URCU_WFCQUEUE_STATIC_H
+
+/*
+ * urcu/static/wfcqueue.h
+ *
+ * Userspace RCU library - Concurrent Queue with Wait-Free Enqueue/Blocking Dequeue
+ *
+ * TO BE INCLUDED ONLY IN LGPL-COMPATIBLE CODE. See urcu/wfcqueue.h for
+ * linking dynamically with the userspace rcu library.
+ *
+ * Copyright 2010-2012 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ * Copyright 2011-2012 - Lai Jiangshan <laijs@cn.fujitsu.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/* Copied from userspace-rcu 0.10 because version 0.7 doesn't contain it. */
+
+#include <pthread.h>
+#include <assert.h>
+#include <poll.h>
+#include <stdbool.h>
+#include <urcu/compiler.h>
+#include <urcu/uatomic.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Concurrent queue with wait-free enqueue/blocking dequeue.
+ *
+ * This queue has been designed and implemented collaboratively by
+ * Mathieu Desnoyers and Lai Jiangshan. Inspired from
+ * half-wait-free/half-blocking queue implementation done by Paul E.
+ * McKenney.
+ *
+ * Mutual exclusion of cds_wfcq_* / __cds_wfcq_* API
+ *
+ * Synchronization table:
+ *
+ * External synchronization techniques described in the API below is
+ * required between pairs marked with "X". No external synchronization
+ * required between pairs marked with "-".
+ *
+ * Legend:
+ * [1] cds_wfcq_enqueue
+ * [2] __cds_wfcq_splice (destination queue)
+ * [3] __cds_wfcq_dequeue
+ * [4] __cds_wfcq_splice (source queue)
+ * [5] __cds_wfcq_first
+ * [6] __cds_wfcq_next
+ *
+ * [1] [2] [3] [4] [5] [6]
+ * [1] - - - - - -
+ * [2] - - - - - -
+ * [3] - - X X X X
+ * [4] - - X - X X
+ * [5] - - X X - -
+ * [6] - - X X - -
+ *
+ * Mutual exclusion can be ensured by holding cds_wfcq_dequeue_lock().
+ *
+ * For convenience, cds_wfcq_dequeue_blocking() and
+ * cds_wfcq_splice_blocking() hold the dequeue lock.
+ *
+ * Besides locking, mutual exclusion of dequeue, splice and iteration
+ * can be ensured by performing all of those operations from a single
+ * thread, without requiring any lock.
+ */
+
+#define WFCQ_ADAPT_ATTEMPTS 10 /* Retry if being set */
+#define WFCQ_WAIT 10 /* Wait 10 ms if being set */
+
+/*
+ * cds_wfcq_node_init: initialize wait-free queue node.
+ */
+static inline void _cds_wfcq_node_init(struct cds_wfcq_node *node)
+{
+ node->next = NULL;
+}
+
+/*
+ * cds_wfcq_init: initialize wait-free queue (with lock). Pair with
+ * cds_wfcq_destroy().
+ */
+static inline void _cds_wfcq_init(struct cds_wfcq_head *head,
+ struct cds_wfcq_tail *tail)
+{
+ int ret;
+
+ /* Set queue head and tail */
+ _cds_wfcq_node_init(&head->node);
+ tail->p = &head->node;
+ ret = pthread_mutex_init(&head->lock, NULL);
+ assert(!ret);
+}
+
+/*
+ * cds_wfcq_destroy: destroy wait-free queue (with lock). Pair with
+ * cds_wfcq_init().
+ */
+static inline void _cds_wfcq_destroy(struct cds_wfcq_head *head,
+ struct cds_wfcq_tail *tail)
+{
+ int ret = pthread_mutex_destroy(&head->lock);
+ assert(!ret);
+}
+
+/*
+ * __cds_wfcq_init: initialize wait-free queue (without lock). Don't
+ * pair with any destroy function.
+ */
+static inline void ___cds_wfcq_init(struct __cds_wfcq_head *head,
+ struct cds_wfcq_tail *tail)
+{
+ /* Set queue head and tail */
+ _cds_wfcq_node_init(&head->node);
+ tail->p = &head->node;
+}
+
+/*
+ * cds_wfcq_empty: return whether wait-free queue is empty.
+ *
+ * No memory barrier is issued. No mutual exclusion is required.
+ *
+ * We perform the test on head->node.next to check if the queue is
+ * possibly empty, but we confirm this by checking if the tail pointer
+ * points to the head node because the tail pointer is the linearisation
+ * point of the enqueuers. Just checking the head next pointer could
+ * make a queue appear empty if an enqueuer is preempted for a long time
+ * between xchg() and setting the previous node's next pointer.
+ */
+static inline bool _cds_wfcq_empty(cds_wfcq_head_ptr_t u_head,
+ struct cds_wfcq_tail *tail)
+{
+ struct __cds_wfcq_head *head = u_head._h;
+ /*
+ * Queue is empty if no node is pointed by head->node.next nor
+ * tail->p. Even though the tail->p check is sufficient to find
+ * out of the queue is empty, we first check head->node.next as a
+ * common case to ensure that dequeuers do not frequently access
+ * enqueuer's tail->p cache line.
+ */
+ return CMM_LOAD_SHARED(head->node.next) == NULL
+ && CMM_LOAD_SHARED(tail->p) == &head->node;
+}
+
+static inline void _cds_wfcq_dequeue_lock(struct cds_wfcq_head *head,
+ struct cds_wfcq_tail *tail)
+{
+ int ret;
+
+ ret = pthread_mutex_lock(&head->lock);
+ assert(!ret);
+}
+
+static inline void _cds_wfcq_dequeue_unlock(struct cds_wfcq_head *head,
+ struct cds_wfcq_tail *tail)
+{
+ int ret;
+
+ ret = pthread_mutex_unlock(&head->lock);
+ assert(!ret);
+}
+
+static inline bool ___cds_wfcq_append(cds_wfcq_head_ptr_t u_head,
+ struct cds_wfcq_tail *tail,
+ struct cds_wfcq_node *new_head,
+ struct cds_wfcq_node *new_tail)
+{
+ struct __cds_wfcq_head *head = u_head._h;
+ struct cds_wfcq_node *old_tail;
+
+ /*
+ * Implicit memory barrier before uatomic_xchg() orders earlier
+ * stores to data structure containing node and setting
+ * node->next to NULL before publication.
+ */
+ old_tail = uatomic_xchg(&tail->p, new_tail);
+
+ /*
+ * Implicit memory barrier after uatomic_xchg() orders store to
+ * q->tail before store to old_tail->next.
+ *
+ * At this point, dequeuers see a NULL tail->p->next, which
+ * indicates that the queue is being appended to. The following
+ * store will append "node" to the queue from a dequeuer
+ * perspective.
+ */
+ CMM_STORE_SHARED(old_tail->next, new_head);
+ /*
+ * Return false if queue was empty prior to adding the node,
+ * else return true.
+ */
+ return old_tail != &head->node;
+}
+
+/*
+ * cds_wfcq_enqueue: enqueue a node into a wait-free queue.
+ *
+ * Issues a full memory barrier before enqueue. No mutual exclusion is
+ * required.
+ *
+ * Returns false if the queue was empty prior to adding the node.
+ * Returns true otherwise.
+ */
+static inline bool _cds_wfcq_enqueue(cds_wfcq_head_ptr_t head,
+ struct cds_wfcq_tail *tail,
+ struct cds_wfcq_node *new_tail)
+{
+ return ___cds_wfcq_append(head, tail, new_tail, new_tail);
+}
+
+/*
+ * CDS_WFCQ_WAIT_SLEEP:
+ *
+ * By default, this sleeps for the given @msec milliseconds.
+ * This is a macro which LGPL users may #define themselves before
+ * including wfcqueue.h to override the default behavior (e.g.
+ * to log a warning or perform other background work).
+ */
+#ifndef CDS_WFCQ_WAIT_SLEEP
+#define CDS_WFCQ_WAIT_SLEEP(msec) ___cds_wfcq_wait_sleep(msec)
+#endif
+
+static inline void ___cds_wfcq_wait_sleep(int msec)
+{
+ (void) poll(NULL, 0, msec);
+}
+
+/*
+ * ___cds_wfcq_busy_wait: adaptative busy-wait.
+ *
+ * Returns 1 if nonblocking and needs to block, 0 otherwise.
+ */
+static inline bool
+___cds_wfcq_busy_wait(int *attempt, int blocking)
+{
+ if (!blocking)
+ return 1;
+ if (++(*attempt) >= WFCQ_ADAPT_ATTEMPTS) {
+ CDS_WFCQ_WAIT_SLEEP(WFCQ_WAIT); /* Wait for 10ms */
+ *attempt = 0;
+ } else {
+ caa_cpu_relax();
+ }
+ return 0;
+}
+
+/*
+ * Waiting for enqueuer to complete enqueue and return the next node.
+ */
+static inline struct cds_wfcq_node *
+___cds_wfcq_node_sync_next(struct cds_wfcq_node *node, int blocking)
+{
+ struct cds_wfcq_node *next;
+ int attempt = 0;
+
+ /*
+ * Adaptative busy-looping waiting for enqueuer to complete enqueue.
+ */
+ while ((next = CMM_LOAD_SHARED(node->next)) == NULL) {
+ if (___cds_wfcq_busy_wait(&attempt, blocking))
+ return CDS_WFCQ_WOULDBLOCK;
+ }
+
+ return next;
+}
+
+static inline struct cds_wfcq_node *
+___cds_wfcq_first(cds_wfcq_head_ptr_t u_head,
+ struct cds_wfcq_tail *tail,
+ int blocking)
+{
+ struct __cds_wfcq_head *head = u_head._h;
+ struct cds_wfcq_node *node;
+
+ if (_cds_wfcq_empty(__cds_wfcq_head_cast(head), tail))
+ return NULL;
+ node = ___cds_wfcq_node_sync_next(&head->node, blocking);
+ /* Load head->node.next before loading node's content */
+ cmm_smp_read_barrier_depends();
+ return node;
+}
+
+/*
+ * __cds_wfcq_first_blocking: get first node of a queue, without dequeuing.
+ *
+ * Content written into the node before enqueue is guaranteed to be
+ * consistent, but no other memory ordering is ensured.
+ * Dequeue/splice/iteration mutual exclusion should be ensured by the
+ * caller.
+ *
+ * Used by for-like iteration macros in urcu/wfqueue.h:
+ * __cds_wfcq_for_each_blocking()
+ * __cds_wfcq_for_each_blocking_safe()
+ *
+ * Returns NULL if queue is empty, first node otherwise.
+ */
+static inline struct cds_wfcq_node *
+___cds_wfcq_first_blocking(cds_wfcq_head_ptr_t head,
+ struct cds_wfcq_tail *tail)
+{
+ return ___cds_wfcq_first(head, tail, 1);
+}
+
+
+/*
+ * __cds_wfcq_first_nonblocking: get first node of a queue, without dequeuing.
+ *
+ * Same as __cds_wfcq_first_blocking, but returns CDS_WFCQ_WOULDBLOCK if
+ * it needs to block.
+ */
+static inline struct cds_wfcq_node *
+___cds_wfcq_first_nonblocking(cds_wfcq_head_ptr_t head,
+ struct cds_wfcq_tail *tail)
+{
+ return ___cds_wfcq_first(head, tail, 0);
+}
+
+static inline struct cds_wfcq_node *
+___cds_wfcq_next(cds_wfcq_head_ptr_t head,
+ struct cds_wfcq_tail *tail,
+ struct cds_wfcq_node *node,
+ int blocking)
+{
+ struct cds_wfcq_node *next;
+
+ /*
+ * Even though the following tail->p check is sufficient to find
+ * out if we reached the end of the queue, we first check
+ * node->next as a common case to ensure that iteration on nodes
+ * do not frequently access enqueuer's tail->p cache line.
+ */
+ if ((next = CMM_LOAD_SHARED(node->next)) == NULL) {
+ /* Load node->next before tail->p */
+ cmm_smp_rmb();
+ if (CMM_LOAD_SHARED(tail->p) == node)
+ return NULL;
+ next = ___cds_wfcq_node_sync_next(node, blocking);
+ }
+ /* Load node->next before loading next's content */
+ cmm_smp_read_barrier_depends();
+ return next;
+}
+
+/*
+ * __cds_wfcq_next_blocking: get next node of a queue, without dequeuing.
+ *
+ * Content written into the node before enqueue is guaranteed to be
+ * consistent, but no other memory ordering is ensured.
+ * Dequeue/splice/iteration mutual exclusion should be ensured by the
+ * caller.
+ *
+ * Used by for-like iteration macros in urcu/wfqueue.h:
+ * __cds_wfcq_for_each_blocking()
+ * __cds_wfcq_for_each_blocking_safe()
+ *
+ * Returns NULL if reached end of queue, non-NULL next queue node
+ * otherwise.
+ */
+static inline struct cds_wfcq_node *
+___cds_wfcq_next_blocking(cds_wfcq_head_ptr_t head,
+ struct cds_wfcq_tail *tail,
+ struct cds_wfcq_node *node)
+{
+ return ___cds_wfcq_next(head, tail, node, 1);
+}
+
+/*
+ * __cds_wfcq_next_blocking: get next node of a queue, without dequeuing.
+ *
+ * Same as __cds_wfcq_next_blocking, but returns CDS_WFCQ_WOULDBLOCK if
+ * it needs to block.
+ */
+static inline struct cds_wfcq_node *
+___cds_wfcq_next_nonblocking(cds_wfcq_head_ptr_t head,
+ struct cds_wfcq_tail *tail,
+ struct cds_wfcq_node *node)
+{
+ return ___cds_wfcq_next(head, tail, node, 0);
+}
+
+static inline struct cds_wfcq_node *
+___cds_wfcq_dequeue_with_state(cds_wfcq_head_ptr_t u_head,
+ struct cds_wfcq_tail *tail,
+ int *state,
+ int blocking)
+{
+ struct __cds_wfcq_head *head = u_head._h;
+ struct cds_wfcq_node *node, *next;
+
+ if (state)
+ *state = 0;
+
+ if (_cds_wfcq_empty(__cds_wfcq_head_cast(head), tail)) {
+ return NULL;
+ }
+
+ node = ___cds_wfcq_node_sync_next(&head->node, blocking);
+ if (!blocking && node == CDS_WFCQ_WOULDBLOCK) {
+ return CDS_WFCQ_WOULDBLOCK;
+ }
+
+ if ((next = CMM_LOAD_SHARED(node->next)) == NULL) {
+ /*
+ * @node is probably the only node in the queue.
+ * Try to move the tail to &q->head.
+ * q->head.next is set to NULL here, and stays
+ * NULL if the cmpxchg succeeds. Should the
+ * cmpxchg fail due to a concurrent enqueue, the
+ * q->head.next will be set to the next node.
+ * The implicit memory barrier before
+ * uatomic_cmpxchg() orders load node->next
+ * before loading q->tail.
+ * The implicit memory barrier before uatomic_cmpxchg
+ * orders load q->head.next before loading node's
+ * content.
+ */
+ _cds_wfcq_node_init(&head->node);
+ if (uatomic_cmpxchg(&tail->p, node, &head->node) == node) {
+ if (state)
+ *state |= CDS_WFCQ_STATE_LAST;
+ return node;
+ }
+ next = ___cds_wfcq_node_sync_next(node, blocking);
+ /*
+ * In nonblocking mode, if we would need to block to
+ * get node's next, set the head next node pointer
+ * (currently NULL) back to its original value.
+ */
+ if (!blocking && next == CDS_WFCQ_WOULDBLOCK) {
+ head->node.next = node;
+ return CDS_WFCQ_WOULDBLOCK;
+ }
+ }
+
+ /*
+ * Move queue head forward.
+ */
+ head->node.next = next;
+
+ /* Load q->head.next before loading node's content */
+ cmm_smp_read_barrier_depends();
+ return node;
+}
+
+/*
+ * __cds_wfcq_dequeue_with_state_blocking: dequeue node from queue, with state.
+ *
+ * Content written into the node before enqueue is guaranteed to be
+ * consistent, but no other memory ordering is ensured.
+ * It is valid to reuse and free a dequeued node immediately.
+ * Dequeue/splice/iteration mutual exclusion should be ensured by the
+ * caller.
+ */
+static inline struct cds_wfcq_node *
+___cds_wfcq_dequeue_with_state_blocking(cds_wfcq_head_ptr_t head,
+ struct cds_wfcq_tail *tail, int *state)
+{
+ return ___cds_wfcq_dequeue_with_state(head, tail, state, 1);
+}
+
+/*
+ * ___cds_wfcq_dequeue_blocking: dequeue node from queue.
+ *
+ * Same as __cds_wfcq_dequeue_with_state_blocking, but without saving
+ * state.
+ */
+static inline struct cds_wfcq_node *
+___cds_wfcq_dequeue_blocking(cds_wfcq_head_ptr_t head,
+ struct cds_wfcq_tail *tail)
+{
+ return ___cds_wfcq_dequeue_with_state_blocking(head, tail, NULL);
+}
+
+/*
+ * __cds_wfcq_dequeue_with_state_nonblocking: dequeue node, with state.
+ *
+ * Same as __cds_wfcq_dequeue_blocking, but returns CDS_WFCQ_WOULDBLOCK
+ * if it needs to block.
+ */
+static inline struct cds_wfcq_node *
+___cds_wfcq_dequeue_with_state_nonblocking(cds_wfcq_head_ptr_t head,
+ struct cds_wfcq_tail *tail, int *state)
+{
+ return ___cds_wfcq_dequeue_with_state(head, tail, state, 0);
+}
+
+/*
+ * ___cds_wfcq_dequeue_nonblocking: dequeue node from queue.
+ *
+ * Same as __cds_wfcq_dequeue_with_state_nonblocking, but without saving
+ * state.
+ */
+static inline struct cds_wfcq_node *
+___cds_wfcq_dequeue_nonblocking(cds_wfcq_head_ptr_t head,
+ struct cds_wfcq_tail *tail)
+{
+ return ___cds_wfcq_dequeue_with_state_nonblocking(head, tail, NULL);
+}
+
+/*
+ * __cds_wfcq_splice: enqueue all src_q nodes at the end of dest_q.
+ *
+ * Dequeue all nodes from src_q.
+ * dest_q must be already initialized.
+ * Mutual exclusion for src_q should be ensured by the caller as
+ * specified in the "Synchronisation table".
+ * Returns enum cds_wfcq_ret which indicates the state of the src or
+ * dest queue.
+ */
+static inline enum cds_wfcq_ret
+___cds_wfcq_splice(
+ cds_wfcq_head_ptr_t u_dest_q_head,
+ struct cds_wfcq_tail *dest_q_tail,
+ cds_wfcq_head_ptr_t u_src_q_head,
+ struct cds_wfcq_tail *src_q_tail,
+ int blocking)
+{
+ struct __cds_wfcq_head *dest_q_head = u_dest_q_head._h;
+ struct __cds_wfcq_head *src_q_head = u_src_q_head._h;
+ struct cds_wfcq_node *head, *tail;
+ int attempt = 0;
+
+ /*
+ * Initial emptiness check to speed up cases where queue is
+ * empty: only require loads to check if queue is empty.
+ */
+ if (_cds_wfcq_empty(__cds_wfcq_head_cast(src_q_head), src_q_tail))
+ return CDS_WFCQ_RET_SRC_EMPTY;
+
+ for (;;) {
+ /*
+ * Open-coded _cds_wfcq_empty() by testing result of
+ * uatomic_xchg, as well as tail pointer vs head node
+ * address.
+ */
+ head = uatomic_xchg(&src_q_head->node.next, NULL);
+ if (head)
+ break; /* non-empty */
+ if (CMM_LOAD_SHARED(src_q_tail->p) == &src_q_head->node)
+ return CDS_WFCQ_RET_SRC_EMPTY;
+ if (___cds_wfcq_busy_wait(&attempt, blocking))
+ return CDS_WFCQ_RET_WOULDBLOCK;
+ }
+
+ /*
+ * Memory barrier implied before uatomic_xchg() orders store to
+ * src_q->head before store to src_q->tail. This is required by
+ * concurrent enqueue on src_q, which exchanges the tail before
+ * updating the previous tail's next pointer.
+ */
+ tail = uatomic_xchg(&src_q_tail->p, &src_q_head->node);
+
+ /*
+ * Append the spliced content of src_q into dest_q. Does not
+ * require mutual exclusion on dest_q (wait-free).
+ */
+ if (___cds_wfcq_append(__cds_wfcq_head_cast(dest_q_head), dest_q_tail,
+ head, tail))
+ return CDS_WFCQ_RET_DEST_NON_EMPTY;
+ else
+ return CDS_WFCQ_RET_DEST_EMPTY;
+}
+
+/*
+ * __cds_wfcq_splice_blocking: enqueue all src_q nodes at the end of dest_q.
+ *
+ * Dequeue all nodes from src_q.
+ * dest_q must be already initialized.
+ * Mutual exclusion for src_q should be ensured by the caller as
+ * specified in the "Synchronisation table".
+ * Returns enum cds_wfcq_ret which indicates the state of the src or
+ * dest queue. Never returns CDS_WFCQ_RET_WOULDBLOCK.
+ */
+static inline enum cds_wfcq_ret
+___cds_wfcq_splice_blocking(
+ cds_wfcq_head_ptr_t dest_q_head,
+ struct cds_wfcq_tail *dest_q_tail,
+ cds_wfcq_head_ptr_t src_q_head,
+ struct cds_wfcq_tail *src_q_tail)
+{
+ return ___cds_wfcq_splice(dest_q_head, dest_q_tail,
+ src_q_head, src_q_tail, 1);
+}
+
+/*
+ * __cds_wfcq_splice_nonblocking: enqueue all src_q nodes at the end of dest_q.
+ *
+ * Same as __cds_wfcq_splice_blocking, but returns
+ * CDS_WFCQ_RET_WOULDBLOCK if it needs to block.
+ */
+static inline enum cds_wfcq_ret
+___cds_wfcq_splice_nonblocking(
+ cds_wfcq_head_ptr_t dest_q_head,
+ struct cds_wfcq_tail *dest_q_tail,
+ cds_wfcq_head_ptr_t src_q_head,
+ struct cds_wfcq_tail *src_q_tail)
+{
+ return ___cds_wfcq_splice(dest_q_head, dest_q_tail,
+ src_q_head, src_q_tail, 0);
+}
+
+/*
+ * cds_wfcq_dequeue_with_state_blocking: dequeue a node from a wait-free queue.
+ *
+ * Content written into the node before enqueue is guaranteed to be
+ * consistent, but no other memory ordering is ensured.
+ * Mutual exclusion with cds_wfcq_splice_blocking and dequeue lock is
+ * ensured.
+ * It is valid to reuse and free a dequeued node immediately.
+ */
+static inline struct cds_wfcq_node *
+_cds_wfcq_dequeue_with_state_blocking(struct cds_wfcq_head *head,
+ struct cds_wfcq_tail *tail, int *state)
+{
+ struct cds_wfcq_node *retval;
+
+ _cds_wfcq_dequeue_lock(head, tail);
+ retval = ___cds_wfcq_dequeue_with_state_blocking(cds_wfcq_head_cast(head),
+ tail, state);
+ _cds_wfcq_dequeue_unlock(head, tail);
+ return retval;
+}
+
+/*
+ * cds_wfcq_dequeue_blocking: dequeue node from queue.
+ *
+ * Same as cds_wfcq_dequeue_blocking, but without saving state.
+ */
+static inline struct cds_wfcq_node *
+_cds_wfcq_dequeue_blocking(struct cds_wfcq_head *head,
+ struct cds_wfcq_tail *tail)
+{
+ return _cds_wfcq_dequeue_with_state_blocking(head, tail, NULL);
+}
+
+/*
+ * cds_wfcq_splice_blocking: enqueue all src_q nodes at the end of dest_q.
+ *
+ * Dequeue all nodes from src_q.
+ * dest_q must be already initialized.
+ * Content written into the node before enqueue is guaranteed to be
+ * consistent, but no other memory ordering is ensured.
+ * Mutual exclusion with cds_wfcq_dequeue_blocking and dequeue lock is
+ * ensured.
+ * Returns enum cds_wfcq_ret which indicates the state of the src or
+ * dest queue. Never returns CDS_WFCQ_RET_WOULDBLOCK.
+ */
+static inline enum cds_wfcq_ret
+_cds_wfcq_splice_blocking(
+ struct cds_wfcq_head *dest_q_head,
+ struct cds_wfcq_tail *dest_q_tail,
+ struct cds_wfcq_head *src_q_head,
+ struct cds_wfcq_tail *src_q_tail)
+{
+ enum cds_wfcq_ret ret;
+
+ _cds_wfcq_dequeue_lock(src_q_head, src_q_tail);
+ ret = ___cds_wfcq_splice_blocking(cds_wfcq_head_cast(dest_q_head), dest_q_tail,
+ cds_wfcq_head_cast(src_q_head), src_q_tail);
+ _cds_wfcq_dequeue_unlock(src_q_head, src_q_tail);
+ return ret;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _URCU_WFCQUEUE_STATIC_H */
diff --git a/contrib/userspace-rcu/static-wfstack.h b/contrib/userspace-rcu/static-wfstack.h
new file mode 100644
index 00000000000..29b81c3aac3
--- /dev/null
+++ b/contrib/userspace-rcu/static-wfstack.h
@@ -0,0 +1,455 @@
+#ifndef _URCU_STATIC_WFSTACK_H
+#define _URCU_STATIC_WFSTACK_H
+
+/*
+ * urcu/static/wfstack.h
+ *
+ * Userspace RCU library - Stack with with wait-free push, blocking traversal.
+ *
+ * TO BE INCLUDED ONLY IN LGPL-COMPATIBLE CODE. See urcu/wfstack.h for
+ * linking dynamically with the userspace rcu library.
+ *
+ * Copyright 2010-2012 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/* Adapted from userspace-rcu 0.10 because version 0.7 doesn't support a stack
+ * without mutex. */
+
+#include <pthread.h>
+#include <assert.h>
+#include <poll.h>
+#include <stdbool.h>
+#include <urcu/compiler.h>
+#include <urcu/uatomic.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define CDS_WFS_END ((void *) 0x1UL)
+#define CDS_WFS_ADAPT_ATTEMPTS 10 /* Retry if being set */
+#define CDS_WFS_WAIT 10 /* Wait 10 ms if being set */
+
+/*
+ * Stack with wait-free push, blocking traversal.
+ *
+ * Stack implementing push, pop, pop_all operations, as well as iterator
+ * on the stack head returned by pop_all.
+ *
+ * Wait-free operations: cds_wfs_push, __cds_wfs_pop_all, cds_wfs_empty,
+ * cds_wfs_first.
+ * Blocking operations: cds_wfs_pop, cds_wfs_pop_all, cds_wfs_next,
+ * iteration on stack head returned by pop_all.
+ *
+ * Synchronization table:
+ *
+ * External synchronization techniques described in the API below is
+ * required between pairs marked with "X". No external synchronization
+ * required between pairs marked with "-".
+ *
+ * cds_wfs_push __cds_wfs_pop __cds_wfs_pop_all
+ * cds_wfs_push - - -
+ * __cds_wfs_pop - X X
+ * __cds_wfs_pop_all - X -
+ *
+ * cds_wfs_pop and cds_wfs_pop_all use an internal mutex to provide
+ * synchronization.
+ */
+
+/*
+ * cds_wfs_node_init: initialize wait-free stack node.
+ */
+static inline
+void _cds_wfs_node_init(struct cds_wfs_node *node)
+{
+ node->next = NULL;
+}
+
+/*
+ * __cds_wfs_init: initialize wait-free stack. Don't pair with
+ * any destroy function.
+ */
+static inline void ___cds_wfs_init(struct __cds_wfs_stack *s)
+{
+ s->head = CDS_WFS_END;
+}
+
+/*
+ * cds_wfs_init: initialize wait-free stack. Pair with
+ * cds_wfs_destroy().
+ */
+static inline
+void _cds_wfs_init(struct cds_wfs_stack *s)
+{
+ int ret;
+
+ s->head = CDS_WFS_END;
+ ret = pthread_mutex_init(&s->lock, NULL);
+ assert(!ret);
+}
+
+/*
+ * cds_wfs_destroy: destroy wait-free stack. Pair with
+ * cds_wfs_init().
+ */
+static inline
+void _cds_wfs_destroy(struct cds_wfs_stack *s)
+{
+ int ret = pthread_mutex_destroy(&s->lock);
+ assert(!ret);
+}
+
+static inline bool ___cds_wfs_end(void *node)
+{
+ return node == CDS_WFS_END;
+}
+
+/*
+ * cds_wfs_empty: return whether wait-free stack is empty.
+ *
+ * No memory barrier is issued. No mutual exclusion is required.
+ */
+static inline bool _cds_wfs_empty(cds_wfs_stack_ptr_t u_stack)
+{
+ struct __cds_wfs_stack *s = u_stack._s;
+
+ return ___cds_wfs_end(CMM_LOAD_SHARED(s->head));
+}
+
+/*
+ * cds_wfs_push: push a node into the stack.
+ *
+ * Issues a full memory barrier before push. No mutual exclusion is
+ * required.
+ *
+ * Returns 0 if the stack was empty prior to adding the node.
+ * Returns non-zero otherwise.
+ */
+static inline
+int _cds_wfs_push(cds_wfs_stack_ptr_t u_stack, struct cds_wfs_node *node)
+{
+ struct __cds_wfs_stack *s = u_stack._s;
+ struct cds_wfs_head *old_head, *new_head;
+
+ assert(node->next == NULL);
+ new_head = caa_container_of(node, struct cds_wfs_head, node);
+ /*
+ * uatomic_xchg() implicit memory barrier orders earlier stores
+ * to node (setting it to NULL) before publication.
+ */
+ old_head = uatomic_xchg(&s->head, new_head);
+ /*
+ * At this point, dequeuers see a NULL node->next, they should
+ * busy-wait until node->next is set to old_head.
+ */
+ CMM_STORE_SHARED(node->next, &old_head->node);
+ return !___cds_wfs_end(old_head);
+}
+
+/*
+ * Waiting for push to complete enqueue and return the next node.
+ */
+static inline struct cds_wfs_node *
+___cds_wfs_node_sync_next(struct cds_wfs_node *node, int blocking)
+{
+ struct cds_wfs_node *next;
+ int attempt = 0;
+
+ /*
+ * Adaptative busy-looping waiting for push to complete.
+ */
+ while ((next = CMM_LOAD_SHARED(node->next)) == NULL) {
+ if (!blocking)
+ return CDS_WFS_WOULDBLOCK;
+ if (++attempt >= CDS_WFS_ADAPT_ATTEMPTS) {
+ (void) poll(NULL, 0, CDS_WFS_WAIT); /* Wait for 10ms */
+ attempt = 0;
+ } else {
+ caa_cpu_relax();
+ }
+ }
+
+ return next;
+}
+
+static inline
+struct cds_wfs_node *
+___cds_wfs_pop(cds_wfs_stack_ptr_t u_stack, int *state, int blocking)
+{
+ struct cds_wfs_head *head, *new_head;
+ struct cds_wfs_node *next;
+ struct __cds_wfs_stack *s = u_stack._s;
+
+ if (state)
+ *state = 0;
+ for (;;) {
+ head = CMM_LOAD_SHARED(s->head);
+ if (___cds_wfs_end(head)) {
+ return NULL;
+ }
+ next = ___cds_wfs_node_sync_next(&head->node, blocking);
+ if (!blocking && next == CDS_WFS_WOULDBLOCK) {
+ return CDS_WFS_WOULDBLOCK;
+ }
+ new_head = caa_container_of(next, struct cds_wfs_head, node);
+ if (uatomic_cmpxchg(&s->head, head, new_head) == head) {
+ if (state && ___cds_wfs_end(new_head))
+ *state |= CDS_WFS_STATE_LAST;
+ return &head->node;
+ }
+ if (!blocking) {
+ return CDS_WFS_WOULDBLOCK;
+ }
+ /* busy-loop if head changed under us */
+ }
+}
+
+/*
+ * __cds_wfs_pop_with_state_blocking: pop a node from the stack, with state.
+ *
+ * Returns NULL if stack is empty.
+ *
+ * __cds_wfs_pop_blocking needs to be synchronized using one of the
+ * following techniques:
+ *
+ * 1) Calling __cds_wfs_pop_blocking under rcu read lock critical
+ * section. The caller must wait for a grace period to pass before
+ * freeing the returned node or modifying the cds_wfs_node structure.
+ * 2) Using mutual exclusion (e.g. mutexes) to protect
+ * __cds_wfs_pop_blocking and __cds_wfs_pop_all callers.
+ * 3) Ensuring that only ONE thread can call __cds_wfs_pop_blocking()
+ * and __cds_wfs_pop_all(). (multi-provider/single-consumer scheme).
+ *
+ * "state" saves state flags atomically sampled with pop operation.
+ */
+static inline
+struct cds_wfs_node *
+___cds_wfs_pop_with_state_blocking(cds_wfs_stack_ptr_t u_stack, int *state)
+{
+ return ___cds_wfs_pop(u_stack, state, 1);
+}
+
+static inline
+struct cds_wfs_node *
+___cds_wfs_pop_blocking(cds_wfs_stack_ptr_t u_stack)
+{
+ return ___cds_wfs_pop_with_state_blocking(u_stack, NULL);
+}
+
+/*
+ * __cds_wfs_pop_with_state_nonblocking: pop a node from the stack.
+ *
+ * Same as __cds_wfs_pop_with_state_blocking, but returns
+ * CDS_WFS_WOULDBLOCK if it needs to block.
+ *
+ * "state" saves state flags atomically sampled with pop operation.
+ */
+static inline
+struct cds_wfs_node *
+___cds_wfs_pop_with_state_nonblocking(cds_wfs_stack_ptr_t u_stack, int *state)
+{
+ return ___cds_wfs_pop(u_stack, state, 0);
+}
+
+/*
+ * __cds_wfs_pop_nonblocking: pop a node from the stack.
+ *
+ * Same as __cds_wfs_pop_blocking, but returns CDS_WFS_WOULDBLOCK if
+ * it needs to block.
+ */
+static inline
+struct cds_wfs_node *
+___cds_wfs_pop_nonblocking(cds_wfs_stack_ptr_t u_stack)
+{
+ return ___cds_wfs_pop_with_state_nonblocking(u_stack, NULL);
+}
+
+/*
+ * __cds_wfs_pop_all: pop all nodes from a stack.
+ *
+ * __cds_wfs_pop_all does not require any synchronization with other
+ * push, nor with other __cds_wfs_pop_all, but requires synchronization
+ * matching the technique used to synchronize __cds_wfs_pop_blocking:
+ *
+ * 1) If __cds_wfs_pop_blocking is called under rcu read lock critical
+ * section, both __cds_wfs_pop_blocking and cds_wfs_pop_all callers
+ * must wait for a grace period to pass before freeing the returned
+ * node or modifying the cds_wfs_node structure. However, no RCU
+ * read-side critical section is needed around __cds_wfs_pop_all.
+ * 2) Using mutual exclusion (e.g. mutexes) to protect
+ * __cds_wfs_pop_blocking and __cds_wfs_pop_all callers.
+ * 3) Ensuring that only ONE thread can call __cds_wfs_pop_blocking()
+ * and __cds_wfs_pop_all(). (multi-provider/single-consumer scheme).
+ */
+static inline
+struct cds_wfs_head *
+___cds_wfs_pop_all(cds_wfs_stack_ptr_t u_stack)
+{
+ struct __cds_wfs_stack *s = u_stack._s;
+ struct cds_wfs_head *head;
+
+ /*
+ * Implicit memory barrier after uatomic_xchg() matches implicit
+ * memory barrier before uatomic_xchg() in cds_wfs_push. It
+ * ensures that all nodes of the returned list are consistent.
+ * There is no need to issue memory barriers when iterating on
+ * the returned list, because the full memory barrier issued
+ * prior to each uatomic_cmpxchg, which each write to head, are
+ * taking care to order writes to each node prior to the full
+ * memory barrier after this uatomic_xchg().
+ */
+ head = uatomic_xchg(&s->head, CDS_WFS_END);
+ if (___cds_wfs_end(head))
+ return NULL;
+ return head;
+}
+
+/*
+ * cds_wfs_pop_lock: lock stack pop-protection mutex.
+ */
+static inline void _cds_wfs_pop_lock(struct cds_wfs_stack *s)
+{
+ int ret;
+
+ ret = pthread_mutex_lock(&s->lock);
+ assert(!ret);
+}
+
+/*
+ * cds_wfs_pop_unlock: unlock stack pop-protection mutex.
+ */
+static inline void _cds_wfs_pop_unlock(struct cds_wfs_stack *s)
+{
+ int ret;
+
+ ret = pthread_mutex_unlock(&s->lock);
+ assert(!ret);
+}
+
+/*
+ * Call __cds_wfs_pop_with_state_blocking with an internal pop mutex held.
+ */
+static inline
+struct cds_wfs_node *
+_cds_wfs_pop_with_state_blocking(struct cds_wfs_stack *s, int *state)
+{
+ struct cds_wfs_node *retnode;
+
+ _cds_wfs_pop_lock(s);
+ retnode = ___cds_wfs_pop_with_state_blocking(s, state);
+ _cds_wfs_pop_unlock(s);
+ return retnode;
+}
+
+/*
+ * Call _cds_wfs_pop_with_state_blocking without saving any state.
+ */
+static inline
+struct cds_wfs_node *
+_cds_wfs_pop_blocking(struct cds_wfs_stack *s)
+{
+ return _cds_wfs_pop_with_state_blocking(s, NULL);
+}
+
+/*
+ * Call __cds_wfs_pop_all with an internal pop mutex held.
+ */
+static inline
+struct cds_wfs_head *
+_cds_wfs_pop_all_blocking(struct cds_wfs_stack *s)
+{
+ struct cds_wfs_head *rethead;
+
+ _cds_wfs_pop_lock(s);
+ rethead = ___cds_wfs_pop_all(s);
+ _cds_wfs_pop_unlock(s);
+ return rethead;
+}
+
+/*
+ * cds_wfs_first: get first node of a popped stack.
+ *
+ * Content written into the node before enqueue is guaranteed to be
+ * consistent, but no other memory ordering is ensured.
+ *
+ * Used by for-like iteration macros in urcu/wfstack.h:
+ * cds_wfs_for_each_blocking()
+ * cds_wfs_for_each_blocking_safe()
+ *
+ * Returns NULL if popped stack is empty, top stack node otherwise.
+ */
+static inline struct cds_wfs_node *
+_cds_wfs_first(struct cds_wfs_head *head)
+{
+ if (___cds_wfs_end(head))
+ return NULL;
+ return &head->node;
+}
+
+static inline struct cds_wfs_node *
+___cds_wfs_next(struct cds_wfs_node *node, int blocking)
+{
+ struct cds_wfs_node *next;
+
+ next = ___cds_wfs_node_sync_next(node, blocking);
+ /*
+ * CDS_WFS_WOULDBLOCK != CSD_WFS_END, so we can check for end
+ * even if ___cds_wfs_node_sync_next returns CDS_WFS_WOULDBLOCK,
+ * and still return CDS_WFS_WOULDBLOCK.
+ */
+ if (___cds_wfs_end(next))
+ return NULL;
+ return next;
+}
+
+/*
+ * cds_wfs_next_blocking: get next node of a popped stack.
+ *
+ * Content written into the node before enqueue is guaranteed to be
+ * consistent, but no other memory ordering is ensured.
+ *
+ * Used by for-like iteration macros in urcu/wfstack.h:
+ * cds_wfs_for_each_blocking()
+ * cds_wfs_for_each_blocking_safe()
+ *
+ * Returns NULL if reached end of popped stack, non-NULL next stack
+ * node otherwise.
+ */
+static inline struct cds_wfs_node *
+_cds_wfs_next_blocking(struct cds_wfs_node *node)
+{
+ return ___cds_wfs_next(node, 1);
+}
+
+
+/*
+ * cds_wfs_next_nonblocking: get next node of a popped stack.
+ *
+ * Same as cds_wfs_next_blocking, but returns CDS_WFS_WOULDBLOCK if it
+ * needs to block.
+ */
+static inline struct cds_wfs_node *
+_cds_wfs_next_nonblocking(struct cds_wfs_node *node)
+{
+ return ___cds_wfs_next(node, 0);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _URCU_STATIC_WFSTACK_H */
diff --git a/contrib/userspace-rcu/wfcqueue.h b/contrib/userspace-rcu/wfcqueue.h
new file mode 100644
index 00000000000..0292585ac79
--- /dev/null
+++ b/contrib/userspace-rcu/wfcqueue.h
@@ -0,0 +1,216 @@
+#ifndef _URCU_WFCQUEUE_H
+#define _URCU_WFCQUEUE_H
+
+/*
+ * urcu/wfcqueue.h
+ *
+ * Userspace RCU library - Concurrent Queue with Wait-Free Enqueue/Blocking Dequeue
+ *
+ * Copyright 2010-2012 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ * Copyright 2011-2012 - Lai Jiangshan <laijs@cn.fujitsu.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/* Adapted from userspace-rcu 0.10 because version 0.7 doesn't contain it.
+ * The non-LGPL section has been removed. */
+
+#include <pthread.h>
+#include <assert.h>
+#include <stdbool.h>
+#include <urcu/compiler.h>
+#include <urcu/arch.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Concurrent queue with wait-free enqueue/blocking dequeue.
+ *
+ * This queue has been designed and implemented collaboratively by
+ * Mathieu Desnoyers and Lai Jiangshan. Inspired from
+ * half-wait-free/half-blocking queue implementation done by Paul E.
+ * McKenney.
+ */
+
+#define CDS_WFCQ_WOULDBLOCK ((struct cds_wfcq_node *) -1UL)
+
+enum cds_wfcq_ret {
+ CDS_WFCQ_RET_WOULDBLOCK = -1,
+ CDS_WFCQ_RET_DEST_EMPTY = 0,
+ CDS_WFCQ_RET_DEST_NON_EMPTY = 1,
+ CDS_WFCQ_RET_SRC_EMPTY = 2,
+};
+
+enum cds_wfcq_state {
+ CDS_WFCQ_STATE_LAST = (1U << 0),
+};
+
+struct cds_wfcq_node {
+ struct cds_wfcq_node *next;
+};
+
+/*
+ * Do not put head and tail on the same cache-line if concurrent
+ * enqueue/dequeue are expected from many CPUs. This eliminates
+ * false-sharing between enqueue and dequeue.
+ */
+struct __cds_wfcq_head {
+ struct cds_wfcq_node node;
+};
+
+struct cds_wfcq_head {
+ struct cds_wfcq_node node;
+ pthread_mutex_t lock;
+};
+
+#ifndef __cplusplus
+/*
+ * The transparent union allows calling functions that work on both
+ * struct cds_wfcq_head and struct __cds_wfcq_head on any of those two
+ * types.
+ */
+typedef union {
+ struct __cds_wfcq_head *_h;
+ struct cds_wfcq_head *h;
+} __attribute__((__transparent_union__)) cds_wfcq_head_ptr_t;
+
+/*
+ * This static inline is only present for compatibility with C++. It is
+ * effect-less in C.
+ */
+static inline struct __cds_wfcq_head *__cds_wfcq_head_cast(struct __cds_wfcq_head *head)
+{
+ return head;
+}
+
+/*
+ * This static inline is only present for compatibility with C++. It is
+ * effect-less in C.
+ */
+static inline struct cds_wfcq_head *cds_wfcq_head_cast(struct cds_wfcq_head *head)
+{
+ return head;
+}
+#else /* #ifndef __cplusplus */
+
+/* C++ ignores transparent union. */
+typedef union {
+ struct __cds_wfcq_head *_h;
+ struct cds_wfcq_head *h;
+} cds_wfcq_head_ptr_t;
+
+/* C++ ignores transparent union. Requires an explicit conversion. */
+static inline cds_wfcq_head_ptr_t __cds_wfcq_head_cast(struct __cds_wfcq_head *head)
+{
+ cds_wfcq_head_ptr_t ret = { ._h = head };
+ return ret;
+}
+/* C++ ignores transparent union. Requires an explicit conversion. */
+static inline cds_wfcq_head_ptr_t cds_wfcq_head_cast(struct cds_wfcq_head *head)
+{
+ cds_wfcq_head_ptr_t ret = { .h = head };
+ return ret;
+}
+#endif /* #else #ifndef __cplusplus */
+
+struct cds_wfcq_tail {
+ struct cds_wfcq_node *p;
+};
+
+#include "static-wfcqueue.h"
+
+#define cds_wfcq_node_init _cds_wfcq_node_init
+#define cds_wfcq_init _cds_wfcq_init
+#define __cds_wfcq_init ___cds_wfcq_init
+#define cds_wfcq_destroy _cds_wfcq_destroy
+#define cds_wfcq_empty _cds_wfcq_empty
+#define cds_wfcq_enqueue _cds_wfcq_enqueue
+
+/* Dequeue locking */
+#define cds_wfcq_dequeue_lock _cds_wfcq_dequeue_lock
+#define cds_wfcq_dequeue_unlock _cds_wfcq_dequeue_unlock
+
+/* Locking performed within cds_wfcq calls. */
+#define cds_wfcq_dequeue_blocking _cds_wfcq_dequeue_blocking
+#define cds_wfcq_dequeue_with_state_blocking \
+ _cds_wfcq_dequeue_with_state_blocking
+#define cds_wfcq_splice_blocking _cds_wfcq_splice_blocking
+#define cds_wfcq_first_blocking _cds_wfcq_first_blocking
+#define cds_wfcq_next_blocking _cds_wfcq_next_blocking
+
+/* Locking ensured by caller by holding cds_wfcq_dequeue_lock() */
+#define __cds_wfcq_dequeue_blocking ___cds_wfcq_dequeue_blocking
+#define __cds_wfcq_dequeue_with_state_blocking \
+ ___cds_wfcq_dequeue_with_state_blocking
+#define __cds_wfcq_splice_blocking ___cds_wfcq_splice_blocking
+#define __cds_wfcq_first_blocking ___cds_wfcq_first_blocking
+#define __cds_wfcq_next_blocking ___cds_wfcq_next_blocking
+
+/*
+ * Locking ensured by caller by holding cds_wfcq_dequeue_lock().
+ * Non-blocking: deque, first, next return CDS_WFCQ_WOULDBLOCK if they
+ * need to block. splice returns nonzero if it needs to block.
+ */
+#define __cds_wfcq_dequeue_nonblocking ___cds_wfcq_dequeue_nonblocking
+#define __cds_wfcq_dequeue_with_state_nonblocking \
+ ___cds_wfcq_dequeue_with_state_nonblocking
+#define __cds_wfcq_splice_nonblocking ___cds_wfcq_splice_nonblocking
+#define __cds_wfcq_first_nonblocking ___cds_wfcq_first_nonblocking
+#define __cds_wfcq_next_nonblocking ___cds_wfcq_next_nonblocking
+
+/*
+ * __cds_wfcq_for_each_blocking: Iterate over all nodes in a queue,
+ * without dequeuing them.
+ * @head: head of the queue (struct cds_wfcq_head or __cds_wfcq_head pointer).
+ * @tail: tail of the queue (struct cds_wfcq_tail pointer).
+ * @node: iterator on the queue (struct cds_wfcq_node pointer).
+ *
+ * Content written into each node before enqueue is guaranteed to be
+ * consistent, but no other memory ordering is ensured.
+ * Dequeue/splice/iteration mutual exclusion should be ensured by the
+ * caller.
+ */
+#define __cds_wfcq_for_each_blocking(head, tail, node) \
+ for (node = __cds_wfcq_first_blocking(head, tail); \
+ node != NULL; \
+ node = __cds_wfcq_next_blocking(head, tail, node))
+
+/*
+ * __cds_wfcq_for_each_blocking_safe: Iterate over all nodes in a queue,
+ * without dequeuing them. Safe against deletion.
+ * @head: head of the queue (struct cds_wfcq_head or __cds_wfcq_head pointer).
+ * @tail: tail of the queue (struct cds_wfcq_tail pointer).
+ * @node: iterator on the queue (struct cds_wfcq_node pointer).
+ * @n: struct cds_wfcq_node pointer holding the next pointer (used
+ * internally).
+ *
+ * Content written into each node before enqueue is guaranteed to be
+ * consistent, but no other memory ordering is ensured.
+ * Dequeue/splice/iteration mutual exclusion should be ensured by the
+ * caller.
+ */
+#define __cds_wfcq_for_each_blocking_safe(head, tail, node, n) \
+ for (node = __cds_wfcq_first_blocking(head, tail), \
+ n = (node ? __cds_wfcq_next_blocking(head, tail, node) : NULL); \
+ node != NULL; \
+ node = n, n = (node ? __cds_wfcq_next_blocking(head, tail, node) : NULL))
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _URCU_WFCQUEUE_H */
diff --git a/contrib/userspace-rcu/wfstack.h b/contrib/userspace-rcu/wfstack.h
new file mode 100644
index 00000000000..738fd1cfd33
--- /dev/null
+++ b/contrib/userspace-rcu/wfstack.h
@@ -0,0 +1,178 @@
+#ifndef _URCU_WFSTACK_H
+#define _URCU_WFSTACK_H
+
+/*
+ * urcu/wfstack.h
+ *
+ * Userspace RCU library - Stack with wait-free push, blocking traversal.
+ *
+ * Copyright 2010-2012 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/* Adapted from userspace-rcu 0.10 because version 0.7 doesn't support a stack
+ * without mutex. The non-LGPL section has been removed. */
+
+#include <pthread.h>
+#include <assert.h>
+#include <stdbool.h>
+#include <urcu/compiler.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Stack with wait-free push, blocking traversal.
+ *
+ * Stack implementing push, pop, pop_all operations, as well as iterator
+ * on the stack head returned by pop_all.
+ *
+ * Wait-free operations: cds_wfs_push, __cds_wfs_pop_all, cds_wfs_empty,
+ * cds_wfs_first.
+ * Blocking operations: cds_wfs_pop, cds_wfs_pop_all, cds_wfs_next,
+ * iteration on stack head returned by pop_all.
+ *
+ * Synchronization table:
+ *
+ * External synchronization techniques described in the API below is
+ * required between pairs marked with "X". No external synchronization
+ * required between pairs marked with "-".
+ *
+ * cds_wfs_push __cds_wfs_pop __cds_wfs_pop_all
+ * cds_wfs_push - - -
+ * __cds_wfs_pop - X X
+ * __cds_wfs_pop_all - X -
+ *
+ * cds_wfs_pop and cds_wfs_pop_all use an internal mutex to provide
+ * synchronization.
+ */
+
+#define CDS_WFS_WOULDBLOCK ((void *) -1UL)
+
+enum cds_wfs_state {
+ CDS_WFS_STATE_LAST = (1U << 0),
+};
+
+/*
+ * struct cds_wfs_node is returned by __cds_wfs_pop, and also used as
+ * iterator on stack. It is not safe to dereference the node next
+ * pointer when returned by __cds_wfs_pop_blocking.
+ */
+struct cds_wfs_node {
+ struct cds_wfs_node *next;
+};
+
+/*
+ * struct cds_wfs_head is returned by __cds_wfs_pop_all, and can be used
+ * to begin iteration on the stack. "node" needs to be the first field of
+ * cds_wfs_head, so the end-of-stack pointer value can be used for both
+ * types.
+ */
+struct cds_wfs_head {
+ struct cds_wfs_node node;
+};
+
+struct __cds_wfs_stack {
+ struct cds_wfs_head *head;
+};
+
+struct cds_wfs_stack {
+ struct cds_wfs_head *head;
+ pthread_mutex_t lock;
+};
+
+/*
+ * The transparent union allows calling functions that work on both
+ * struct cds_wfs_stack and struct __cds_wfs_stack on any of those two
+ * types.
+ */
+typedef union {
+ struct __cds_wfs_stack *_s;
+ struct cds_wfs_stack *s;
+} __attribute__((__transparent_union__)) cds_wfs_stack_ptr_t;
+
+#include "static-wfstack.h"
+
+#define cds_wfs_node_init _cds_wfs_node_init
+#define cds_wfs_init _cds_wfs_init
+#define cds_wfs_destroy _cds_wfs_destroy
+#define __cds_wfs_init ___cds_wfs_init
+#define cds_wfs_empty _cds_wfs_empty
+#define cds_wfs_push _cds_wfs_push
+
+/* Locking performed internally */
+#define cds_wfs_pop_blocking _cds_wfs_pop_blocking
+#define cds_wfs_pop_with_state_blocking _cds_wfs_pop_with_state_blocking
+#define cds_wfs_pop_all_blocking _cds_wfs_pop_all_blocking
+
+/*
+ * For iteration on cds_wfs_head returned by __cds_wfs_pop_all or
+ * cds_wfs_pop_all_blocking.
+ */
+#define cds_wfs_first _cds_wfs_first
+#define cds_wfs_next_blocking _cds_wfs_next_blocking
+#define cds_wfs_next_nonblocking _cds_wfs_next_nonblocking
+
+/* Pop locking with internal mutex */
+#define cds_wfs_pop_lock _cds_wfs_pop_lock
+#define cds_wfs_pop_unlock _cds_wfs_pop_unlock
+
+/* Synchronization ensured by the caller. See synchronization table. */
+#define __cds_wfs_pop_blocking ___cds_wfs_pop_blocking
+#define __cds_wfs_pop_with_state_blocking \
+ ___cds_wfs_pop_with_state_blocking
+#define __cds_wfs_pop_nonblocking ___cds_wfs_pop_nonblocking
+#define __cds_wfs_pop_with_state_nonblocking \
+ ___cds_wfs_pop_with_state_nonblocking
+#define __cds_wfs_pop_all ___cds_wfs_pop_all
+
+#ifdef __cplusplus
+}
+#endif
+
+/*
+ * cds_wfs_for_each_blocking: Iterate over all nodes returned by
+ * __cds_wfs_pop_all().
+ * @head: head of the queue (struct cds_wfs_head pointer).
+ * @node: iterator (struct cds_wfs_node pointer).
+ *
+ * Content written into each node before enqueue is guaranteed to be
+ * consistent, but no other memory ordering is ensured.
+ */
+#define cds_wfs_for_each_blocking(head, node) \
+ for (node = cds_wfs_first(head); \
+ node != NULL; \
+ node = cds_wfs_next_blocking(node))
+
+/*
+ * cds_wfs_for_each_blocking_safe: Iterate over all nodes returned by
+ * __cds_wfs_pop_all(). Safe against deletion.
+ * @head: head of the queue (struct cds_wfs_head pointer).
+ * @node: iterator (struct cds_wfs_node pointer).
+ * @n: struct cds_wfs_node pointer holding the next pointer (used
+ * internally).
+ *
+ * Content written into each node before enqueue is guaranteed to be
+ * consistent, but no other memory ordering is ensured.
+ */
+#define cds_wfs_for_each_blocking_safe(head, node, n) \
+ for (node = cds_wfs_first(head), \
+ n = (node ? cds_wfs_next_blocking(node) : NULL); \
+ node != NULL; \
+ node = n, n = (node ? cds_wfs_next_blocking(node) : NULL))
+
+#endif /* _URCU_WFSTACK_H */
diff --git a/contrib/uuid/clear.c b/contrib/uuid/clear.c
deleted file mode 100644
index 2d91fee9399..00000000000
--- a/contrib/uuid/clear.c
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * clear.c -- Clear a UUID
- *
- * Copyright (C) 1996, 1997 Theodore Ts'o.
- *
- * %Begin-Header%
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, and the entire permission notice in its entirety,
- * including the disclaimer of warranties.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote
- * products derived from this software without specific prior
- * written permission.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
- * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
- * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
- * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
- * DAMAGE.
- * %End-Header%
- */
-
-#include "string.h"
-
-#include "uuidP.h"
-
-void uuid_clear(uuid_t uu)
-{
- memset(uu, 0, 16);
-}
-
diff --git a/contrib/uuid/compare.c b/contrib/uuid/compare.c
deleted file mode 100644
index f28a72678cf..00000000000
--- a/contrib/uuid/compare.c
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * compare.c --- compare whether or not two UUID's are the same
- *
- * Returns 0 if the two UUID's are different, and 1 if they are the same.
- *
- * Copyright (C) 1996, 1997 Theodore Ts'o.
- *
- * %Begin-Header%
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, and the entire permission notice in its entirety,
- * including the disclaimer of warranties.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote
- * products derived from this software without specific prior
- * written permission.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
- * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
- * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
- * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
- * DAMAGE.
- * %End-Header%
- */
-
-#include "uuidP.h"
-#include <string.h>
-
-#define UUCMP(u1,u2) if (u1 != u2) return((u1 < u2) ? -1 : 1);
-
-int uuid_compare(const uuid_t uu1, const uuid_t uu2)
-{
- struct uuid uuid1, uuid2;
-
- uuid_unpack(uu1, &uuid1);
- uuid_unpack(uu2, &uuid2);
-
- UUCMP(uuid1.time_low, uuid2.time_low);
- UUCMP(uuid1.time_mid, uuid2.time_mid);
- UUCMP(uuid1.time_hi_and_version, uuid2.time_hi_and_version);
- UUCMP(uuid1.clock_seq, uuid2.clock_seq);
- return memcmp(uuid1.node, uuid2.node, 6);
-}
-
diff --git a/contrib/uuid/copy.c b/contrib/uuid/copy.c
deleted file mode 100644
index ead33aa26e8..00000000000
--- a/contrib/uuid/copy.c
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * copy.c --- copy UUIDs
- *
- * Copyright (C) 1996, 1997 Theodore Ts'o.
- *
- * %Begin-Header%
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, and the entire permission notice in its entirety,
- * including the disclaimer of warranties.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote
- * products derived from this software without specific prior
- * written permission.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
- * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
- * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
- * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
- * DAMAGE.
- * %End-Header%
- */
-
-#include "uuidP.h"
-
-void uuid_copy(uuid_t dst, const uuid_t src)
-{
- unsigned char *cp1;
- const unsigned char *cp2;
- int i;
-
- for (i=0, cp1 = dst, cp2 = src; i < 16; i++)
- *cp1++ = *cp2++;
-}
diff --git a/contrib/uuid/gen_uuid.c b/contrib/uuid/gen_uuid.c
deleted file mode 100644
index b3eda9de387..00000000000
--- a/contrib/uuid/gen_uuid.c
+++ /dev/null
@@ -1,679 +0,0 @@
-/*
- * gen_uuid.c --- generate a DCE-compatible uuid
- *
- * Copyright (C) 1996, 1997, 1998, 1999 Theodore Ts'o.
- *
- * %Begin-Header%
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, and the entire permission notice in its entirety,
- * including the disclaimer of warranties.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote
- * products derived from this software without specific prior
- * written permission.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
- * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
- * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
- * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
- * DAMAGE.
- * %End-Header%
- */
-
-/*
- * Force inclusion of SVID stuff since we need it if we're compiling in
- * gcc-wall wall mode
- */
-#define _SVID_SOURCE
-
-#include "config.h"
-#ifdef _WIN32
-#define _WIN32_WINNT 0x0500
-#include <windows.h>
-#define UUID MYUUID
-#endif
-#include <stdio.h>
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif
-#ifdef HAVE_STDLIB_H
-#include <stdlib.h>
-#endif
-#include <string.h>
-#include <fcntl.h>
-#include <errno.h>
-#include <sys/types.h>
-#ifdef HAVE_SYS_TIME_H
-#include <sys/time.h>
-#endif
-#include <sys/wait.h>
-#include <sys/stat.h>
-#ifdef HAVE_SYS_FILE_H
-#include <sys/file.h>
-#endif
-#ifdef HAVE_SYS_IOCTL_H
-#include <sys/ioctl.h>
-#endif
-#ifdef HAVE_SYS_SOCKET_H
-#include <sys/socket.h>
-#endif
-#ifdef HAVE_SYS_UN_H
-#include <sys/un.h>
-#endif
-#ifdef HAVE_SYS_SOCKIO_H
-#include <sys/sockio.h>
-#endif
-#ifdef HAVE_NET_IF_H
-#include <net/if.h>
-#endif
-#ifdef HAVE_NETINET_IN_H
-#include <netinet/in.h>
-#endif
-#ifdef HAVE_NET_IF_DL_H
-#include <net/if_dl.h>
-#endif
-#if defined(__linux__) && defined(HAVE_SYS_SYSCALL_H)
-#include <sys/syscall.h>
-#endif
-#ifdef HAVE_SYS_RESOURCE_H
-#include <sys/resource.h>
-#endif
-#include <limits.h>
-
-#include "uuidP.h"
-#include "uuidd.h"
-
-#ifdef HAVE_SRANDOM
-#define srand(x) srandom(x)
-#define rand() random()
-#endif
-
-#ifdef TLS
-#define THREAD_LOCAL static TLS
-#else
-#define THREAD_LOCAL static
-#endif
-
-#if defined(__linux__) && defined(__NR_gettid) && defined(HAVE_JRAND48)
-#define DO_JRAND_MIX
-THREAD_LOCAL unsigned short jrand_seed[3];
-#endif
-
-#ifndef OPEN_MAX
-#define OPEN_MAX 1024
-#endif
-
-#ifdef _WIN32
-static void gettimeofday (struct timeval *tv, void *dummy)
-{
- FILETIME ftime;
- uint64_t n;
-
- GetSystemTimeAsFileTime (&ftime);
- n = (((uint64_t) ftime.dwHighDateTime << 32)
- + (uint64_t) ftime.dwLowDateTime);
- if (n) {
- n /= 10;
- n -= ((369 * 365 + 89) * (uint64_t) 86400) * 1000000;
- }
-
- tv->tv_sec = n / 1000000;
- tv->tv_usec = n % 1000000;
-}
-
-static int getuid (void)
-{
- return 1;
-}
-#endif
-
-static int get_random_fd(void)
-{
- struct timeval tv;
- static int fd = -2;
- int i;
-
- if (fd == -2) {
- gettimeofday(&tv, 0);
-#ifndef _WIN32
- fd = open("/dev/urandom", O_RDONLY);
- if (fd == -1)
- fd = open("/dev/random", O_RDONLY | O_NONBLOCK);
- if (fd >= 0) {
- i = fcntl(fd, F_GETFD);
- if (i >= 0)
- fcntl(fd, F_SETFD, i | FD_CLOEXEC);
- }
-#endif
- srand((getpid() << 16) ^ getuid() ^ tv.tv_sec ^ tv.tv_usec);
-#ifdef DO_JRAND_MIX
- jrand_seed[0] = getpid() ^ (tv.tv_sec & 0xFFFF);
- jrand_seed[1] = getppid() ^ (tv.tv_usec & 0xFFFF);
- jrand_seed[2] = (tv.tv_sec ^ tv.tv_usec) >> 16;
-#endif
- }
- /* Crank the random number generator a few times */
- gettimeofday(&tv, 0);
- for (i = (tv.tv_sec ^ tv.tv_usec) & 0x1F; i > 0; i--)
- rand();
- return fd;
-}
-
-
-/*
- * Generate a series of random bytes. Use /dev/urandom if possible,
- * and if not, use srandom/random.
- */
-static void get_random_bytes(void *buf, int nbytes)
-{
- int i, n = nbytes, fd = get_random_fd();
- int lose_counter = 0;
- unsigned char *cp = (unsigned char *) buf;
-#ifdef DO_JRAND_MIX
- unsigned short tmp_seed[3];
-#endif
- if (fd >= 0) {
- while (n > 0) {
- i = read(fd, cp, n);
- if (i <= 0) {
- if (lose_counter++ > 16)
- break;
- continue;
- }
- n -= i;
- cp += i;
- lose_counter = 0;
- }
- }
-
- /*
- * We do this all the time, but this is the only source of
- * randomness if /dev/random/urandom is out to lunch.
- */
- for (cp = buf, i = 0; i < nbytes; i++)
- *cp++ ^= (rand() >> 7) & 0xFF;
-#ifdef DO_JRAND_MIX
- memcpy(tmp_seed, jrand_seed, sizeof(tmp_seed));
- jrand_seed[2] = jrand_seed[2] ^ syscall(__NR_gettid);
- for (cp = buf, i = 0; i < nbytes; i++)
- *cp++ ^= (jrand48(tmp_seed) >> 7) & 0xFF;
- memcpy(jrand_seed, tmp_seed,
- sizeof(jrand_seed)-sizeof(unsigned short));
-#endif
-
- return;
-}
-
-/*
- * Get the ethernet hardware address, if we can find it...
- *
- * XXX for a windows version, probably should use GetAdaptersInfo:
- * http://www.codeguru.com/cpp/i-n/network/networkinformation/article.php/c5451
- * commenting out get_node_id just to get gen_uuid to compile under windows
- * is not the right way to go!
- */
-static int get_node_id(unsigned char *node_id)
-{
-#ifdef HAVE_NET_IF_H
- int sd;
- struct ifreq ifr, *ifrp;
- struct ifconf ifc;
- char buf[1024];
- int n, i;
- unsigned char *a;
-#ifdef HAVE_NET_IF_DL_H
- struct sockaddr_dl *sdlp;
-#endif
-
-/*
- * BSD 4.4 defines the size of an ifreq to be
- * max(sizeof(ifreq), sizeof(ifreq.ifr_name)+ifreq.ifr_addr.sa_len
- * However, under earlier systems, sa_len isn't present, so the size is
- * just sizeof(struct ifreq)
- */
-#ifdef HAVE_SA_LEN
-#ifndef max
-#define max(a,b) ((a) > (b) ? (a) : (b))
-#endif
-#define ifreq_size(i) max(sizeof(struct ifreq),\
- sizeof((i).ifr_name)+(i).ifr_addr.sa_len)
-#else
-#define ifreq_size(i) sizeof(struct ifreq)
-#endif /* HAVE_SA_LEN*/
-
- sd = socket(AF_INET, SOCK_DGRAM, IPPROTO_IP);
- if (sd < 0) {
- return -1;
- }
- memset(buf, 0, sizeof(buf));
- ifc.ifc_len = sizeof(buf);
- ifc.ifc_buf = buf;
- if (ioctl (sd, SIOCGIFCONF, (char *)&ifc) < 0) {
- close(sd);
- return -1;
- }
- n = ifc.ifc_len;
- for (i = 0; i < n; i+= ifreq_size(*ifrp) ) {
- ifrp = (struct ifreq *)((char *) ifc.ifc_buf+i);
- strncpy(ifr.ifr_name, ifrp->ifr_name, IFNAMSIZ);
-#ifdef SIOCGIFHWADDR
- if (ioctl(sd, SIOCGIFHWADDR, &ifr) < 0)
- continue;
- a = (unsigned char *) &ifr.ifr_hwaddr.sa_data;
-#else
-#ifdef SIOCGENADDR
- if (ioctl(sd, SIOCGENADDR, &ifr) < 0)
- continue;
- a = (unsigned char *) ifr.ifr_enaddr;
-#else
-#ifdef HAVE_NET_IF_DL_H
- sdlp = (struct sockaddr_dl *) &ifrp->ifr_addr;
- if ((sdlp->sdl_family != AF_LINK) || (sdlp->sdl_alen != 6))
- continue;
- a = (unsigned char *) &sdlp->sdl_data[sdlp->sdl_nlen];
-#else
- /*
- * XXX we don't have a way of getting the hardware
- * address
- */
- close(sd);
- return 0;
-#endif /* HAVE_NET_IF_DL_H */
-#endif /* SIOCGENADDR */
-#endif /* SIOCGIFHWADDR */
- if (!a[0] && !a[1] && !a[2] && !a[3] && !a[4] && !a[5])
- continue;
- if (node_id) {
- memcpy(node_id, a, 6);
- close(sd);
- return 1;
- }
- }
- close(sd);
-#endif
- return 0;
-}
-
-/* Assume that the gettimeofday() has microsecond granularity */
-#define MAX_ADJUSTMENT 10
-
-static int get_clock(uint32_t *clock_high, uint32_t *clock_low,
- uint16_t *ret_clock_seq, int *num)
-{
- THREAD_LOCAL int adjustment = 0;
- THREAD_LOCAL struct timeval last = {0, 0};
- THREAD_LOCAL int state_fd = -2;
- THREAD_LOCAL FILE *state_f;
- THREAD_LOCAL uint16_t clock_seq;
- struct timeval tv;
- struct flock fl;
- uint64_t clock_reg;
- mode_t save_umask;
- int len;
-
- if (state_fd == -2) {
- save_umask = umask(0);
- state_fd = open("/var/lib/libuuid/clock.txt",
- O_RDWR|O_CREAT, 0660);
- (void) umask(save_umask);
- state_f = fdopen(state_fd, "r+");
- if (!state_f) {
- close(state_fd);
- state_fd = -1;
- }
- }
- fl.l_type = F_WRLCK;
- fl.l_whence = SEEK_SET;
- fl.l_start = 0;
- fl.l_len = 0;
- fl.l_pid = 0;
- if (state_fd >= 0) {
- rewind(state_f);
- while (fcntl(state_fd, F_SETLKW, &fl) < 0) {
- if ((errno == EAGAIN) || (errno == EINTR))
- continue;
- fclose(state_f);
- close(state_fd);
- state_fd = -1;
- break;
- }
- }
- if (state_fd >= 0) {
- unsigned int cl;
- unsigned long tv1, tv2;
- int a;
-
- if (fscanf(state_f, "clock: %04x tv: %lu %lu adj: %d\n",
- &cl, &tv1, &tv2, &a) == 4) {
- clock_seq = cl & 0x3FFF;
- last.tv_sec = tv1;
- last.tv_usec = tv2;
- adjustment = a;
- }
- }
-
- if ((last.tv_sec == 0) && (last.tv_usec == 0)) {
- get_random_bytes(&clock_seq, sizeof(clock_seq));
- clock_seq &= 0x3FFF;
- gettimeofday(&last, 0);
- last.tv_sec--;
- }
-
-try_again:
- gettimeofday(&tv, 0);
- if ((tv.tv_sec < last.tv_sec) ||
- ((tv.tv_sec == last.tv_sec) &&
- (tv.tv_usec < last.tv_usec))) {
- clock_seq = (clock_seq+1) & 0x3FFF;
- adjustment = 0;
- last = tv;
- } else if ((tv.tv_sec == last.tv_sec) &&
- (tv.tv_usec == last.tv_usec)) {
- if (adjustment >= MAX_ADJUSTMENT)
- goto try_again;
- adjustment++;
- } else {
- adjustment = 0;
- last = tv;
- }
-
- clock_reg = tv.tv_usec*10 + adjustment;
- clock_reg += ((uint64_t) tv.tv_sec)*10000000;
- clock_reg += (((uint64_t) 0x01B21DD2) << 32) + 0x13814000;
-
- if (num && (*num > 1)) {
- adjustment += *num - 1;
- last.tv_usec += adjustment / 10;
- adjustment = adjustment % 10;
- last.tv_sec += last.tv_usec / 1000000;
- last.tv_usec = last.tv_usec % 1000000;
- }
-
- if (state_fd > 0) {
- rewind(state_f);
- len = fprintf(state_f,
- "clock: %04x tv: %016lu %08lu adj: %08d\n",
- clock_seq, last.tv_sec, last.tv_usec, adjustment);
- fflush(state_f);
- if (ftruncate(state_fd, len) < 0) {
- fprintf(state_f, " \n");
- fflush(state_f);
- }
- rewind(state_f);
- fl.l_type = F_UNLCK;
- fcntl(state_fd, F_SETLK, &fl);
- }
-
- *clock_high = clock_reg >> 32;
- *clock_low = clock_reg;
- *ret_clock_seq = clock_seq;
- return 0;
-}
-
-#if defined(USE_UUIDD) && defined(HAVE_SYS_UN_H)
-static ssize_t read_all(int fd, char *buf, size_t count)
-{
- ssize_t ret;
- ssize_t c = 0;
- int tries = 0;
-
- memset(buf, 0, count);
- while (count > 0) {
- ret = read(fd, buf, count);
- if (ret <= 0) {
- if ((errno == EAGAIN || errno == EINTR || ret == 0) &&
- (tries++ < 5))
- continue;
- return c ? c : -1;
- }
- if (ret > 0)
- tries = 0;
- count -= ret;
- buf += ret;
- c += ret;
- }
- return c;
-}
-#endif
-
-/*
- * Close all file descriptors
- */
-#if defined(USE_UUIDD) && defined(HAVE_SYS_UN_H)
-static void close_all_fds(void)
-{
- int i, max;
-
-#if defined(HAVE_SYSCONF) && defined(_SC_OPEN_MAX)
- max = sysconf(_SC_OPEN_MAX);
-#elif defined(HAVE_GETDTABLESIZE)
- max = getdtablesize();
-#elif defined(HAVE_GETRLIMIT) && defined(RLIMIT_NOFILE)
- struct rlimit rl;
-
- getrlimit(RLIMIT_NOFILE, &rl);
- max = rl.rlim_cur;
-#else
- max = OPEN_MAX;
-#endif
-
- for (i=0; i < max; i++) {
- close(i);
- if (i <= 2)
- open("/dev/null", O_RDWR);
- }
-}
-#endif
-
-
-/*
- * Try using the uuidd daemon to generate the UUID
- *
- * Returns 0 on success, non-zero on failure.
- */
-static int get_uuid_via_daemon(int op, uuid_t out, int *num)
-{
-#if defined(USE_UUIDD) && defined(HAVE_SYS_UN_H)
- char op_buf[64];
- int op_len;
- int s;
- ssize_t ret;
- int32_t reply_len = 0, expected = 16;
- struct sockaddr_un srv_addr;
- struct stat st;
- pid_t pid;
- static const char *uuidd_path = UUIDD_PATH;
- static int access_ret = -2;
- static int start_attempts = 0;
-
- if ((s = socket(AF_UNIX, SOCK_STREAM, 0)) < 0)
- return -1;
-
- srv_addr.sun_family = AF_UNIX;
- strcpy(srv_addr.sun_path, UUIDD_SOCKET_PATH);
-
- if (connect(s, (const struct sockaddr *) &srv_addr,
- sizeof(struct sockaddr_un)) < 0) {
- if (access_ret == -2)
- access_ret = access(uuidd_path, X_OK);
- if (access_ret == 0)
- access_ret = stat(uuidd_path, &st);
- if (access_ret == 0 && (st.st_mode & (S_ISUID | S_ISGID)) == 0)
- access_ret = access(UUIDD_DIR, W_OK);
- if (access_ret == 0 && start_attempts++ < 5) {
- if ((pid = fork()) == 0) {
- close_all_fds();
- execl(uuidd_path, "uuidd", "-qT", "300",
- (char *) NULL);
- exit(1);
- }
- (void) waitpid(pid, 0, 0);
- if (connect(s, (const struct sockaddr *) &srv_addr,
- sizeof(struct sockaddr_un)) < 0)
- goto fail;
- } else
- goto fail;
- }
- op_buf[0] = op;
- op_len = 1;
- if (op == UUIDD_OP_BULK_TIME_UUID) {
- memcpy(op_buf+1, num, sizeof(*num));
- op_len += sizeof(*num);
- expected += sizeof(*num);
- }
-
- ret = write(s, op_buf, op_len);
- if (ret < 1)
- goto fail;
-
- ret = read_all(s, (char *) &reply_len, sizeof(reply_len));
- if (ret < 0)
- goto fail;
-
- if (reply_len != expected)
- goto fail;
-
- ret = read_all(s, op_buf, reply_len);
-
- if (op == UUIDD_OP_BULK_TIME_UUID)
- memcpy(op_buf+16, num, sizeof(int));
-
- memcpy(out, op_buf, 16);
-
- close(s);
- return ((ret == expected) ? 0 : -1);
-
-fail:
- close(s);
-#endif
- return -1;
-}
-
-void uuid__generate_time(uuid_t out, int *num)
-{
- static unsigned char node_id[6];
- static int has_init = 0;
- struct uuid uu;
- uint32_t clock_mid;
-
- if (!has_init) {
- if (get_node_id(node_id) <= 0) {
- get_random_bytes(node_id, 6);
- /*
- * Set multicast bit, to prevent conflicts
- * with IEEE 802 addresses obtained from
- * network cards
- */
- node_id[0] |= 0x01;
- }
- has_init = 1;
- }
- get_clock(&clock_mid, &uu.time_low, &uu.clock_seq, num);
- uu.clock_seq |= 0x8000;
- uu.time_mid = (uint16_t) clock_mid;
- uu.time_hi_and_version = ((clock_mid >> 16) & 0x0FFF) | 0x1000;
- memcpy(uu.node, node_id, 6);
- uuid_pack(&uu, out);
-}
-
-void uuid_generate_time(uuid_t out)
-{
-#ifdef TLS
- THREAD_LOCAL int num = 0;
- THREAD_LOCAL struct uuid uu;
- THREAD_LOCAL time_t last_time = 0;
- time_t now;
-
- if (num > 0) {
- now = time(0);
- if (now > last_time+1)
- num = 0;
- }
- if (num <= 0) {
- num = 1000;
- if (get_uuid_via_daemon(UUIDD_OP_BULK_TIME_UUID,
- out, &num) == 0) {
- last_time = time(0);
- uuid_unpack(out, &uu);
- num--;
- return;
- }
- num = 0;
- }
- if (num > 0) {
- uu.time_low++;
- if (uu.time_low == 0) {
- uu.time_mid++;
- if (uu.time_mid == 0)
- uu.time_hi_and_version++;
- }
- num--;
- uuid_pack(&uu, out);
- return;
- }
-#else
- if (get_uuid_via_daemon(UUIDD_OP_TIME_UUID, out, 0) == 0)
- return;
-#endif
-
- uuid__generate_time(out, 0);
-}
-
-
-void uuid__generate_random(uuid_t out, int *num)
-{
- uuid_t buf;
- struct uuid uu;
- int i, n;
-
- if (!num || !*num)
- n = 1;
- else
- n = *num;
-
- for (i = 0; i < n; i++) {
- get_random_bytes(buf, sizeof(buf));
- uuid_unpack(buf, &uu);
-
- uu.clock_seq = (uu.clock_seq & 0x3FFF) | 0x8000;
- uu.time_hi_and_version = (uu.time_hi_and_version & 0x0FFF)
- | 0x4000;
- uuid_pack(&uu, out);
- out += sizeof(uuid_t);
- }
-}
-
-void uuid_generate_random(uuid_t out)
-{
- int num = 1;
- /* No real reason to use the daemon for random uuid's -- yet */
-
- uuid__generate_random(out, &num);
-}
-
-
-/*
- * This is the generic front-end to uuid_generate_random and
- * uuid_generate_time. It uses uuid_generate_random only if
- * /dev/urandom is available, since otherwise we won't have
- * high-quality randomness.
- */
-void uuid_generate(uuid_t out)
-{
- if (get_random_fd() >= 0)
- uuid_generate_random(out);
- else
- uuid_generate_time(out);
-}
diff --git a/contrib/uuid/gen_uuid_nt.c b/contrib/uuid/gen_uuid_nt.c
deleted file mode 100644
index aa44bfd3d7d..00000000000
--- a/contrib/uuid/gen_uuid_nt.c
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * gen_uuid_nt.c -- Use NT api to generate uuid
- *
- * Written by Andrey Shedel (andreys@ns.cr.cyco.com)
- */
-
-
-#include "uuidP.h"
-
-#pragma warning(push,4)
-
-#pragma comment(lib, "ntdll.lib")
-
-//
-// Here is a nice example why it's not a good idea
-// to use native API in ordinary applications.
-// Number of parameters in function below was changed from 3 to 4
-// for NT5.
-//
-//
-// NTSYSAPI
-// NTSTATUS
-// NTAPI
-// NtAllocateUuids(
-// OUT PULONG p1,
-// OUT PULONG p2,
-// OUT PULONG p3,
-// OUT PUCHAR Seed // 6 bytes
-// );
-//
-//
-
-unsigned long
-__stdcall
-NtAllocateUuids(
- void* p1, // 8 bytes
- void* p2, // 4 bytes
- void* p3 // 4 bytes
- );
-
-typedef
-unsigned long
-(__stdcall*
-NtAllocateUuids_2000)(
- void* p1, // 8 bytes
- void* p2, // 4 bytes
- void* p3, // 4 bytes
- void* seed // 6 bytes
- );
-
-
-
-//
-// Nice, but instead of including ntddk.h ot winnt.h
-// I should define it here because they MISSED __stdcall in those headers.
-//
-
-__declspec(dllimport)
-struct _TEB*
-__stdcall
-NtCurrentTeb(void);
-
-
-//
-// The only way to get version information from the system is to examine
-// one stored in PEB. But it's pretty dangerouse because this value could
-// be altered in image header.
-//
-
-static
-int
-Nt5(void)
-{
- //return NtCuttentTeb()->Peb->OSMajorVersion >= 5;
- return (int)*(int*)((char*)(int)(*(int*)((char*)NtCurrentTeb() + 0x30)) + 0xA4) >= 5;
-}
-
-
-
-
-void uuid_generate(uuid_t out)
-{
- if(Nt5())
- {
- unsigned char seed[6];
- ((NtAllocateUuids_2000)NtAllocateUuids)(out, ((char*)out)+8, ((char*)out)+12, &seed[0] );
- }
- else
- {
- NtAllocateUuids(out, ((char*)out)+8, ((char*)out)+12);
- }
-}
diff --git a/contrib/uuid/isnull.c b/contrib/uuid/isnull.c
deleted file mode 100644
index 931e7e7dba2..00000000000
--- a/contrib/uuid/isnull.c
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * isnull.c --- Check whether or not the UUID is null
- *
- * Copyright (C) 1996, 1997 Theodore Ts'o.
- *
- * %Begin-Header%
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, and the entire permission notice in its entirety,
- * including the disclaimer of warranties.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote
- * products derived from this software without specific prior
- * written permission.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
- * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
- * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
- * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
- * DAMAGE.
- * %End-Header%
- */
-
-#include "uuidP.h"
-
-/* Returns 1 if the uuid is the NULL uuid */
-int uuid_is_null(const uuid_t uu)
-{
- const unsigned char *cp;
- int i;
-
- for (i=0, cp = uu; i < 16; i++)
- if (*cp++)
- return 0;
- return 1;
-}
-
diff --git a/contrib/uuid/pack.c b/contrib/uuid/pack.c
deleted file mode 100644
index 097516d2e2f..00000000000
--- a/contrib/uuid/pack.c
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Internal routine for packing UUID's
- *
- * Copyright (C) 1996, 1997 Theodore Ts'o.
- *
- * %Begin-Header%
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, and the entire permission notice in its entirety,
- * including the disclaimer of warranties.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote
- * products derived from this software without specific prior
- * written permission.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
- * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
- * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
- * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
- * DAMAGE.
- * %End-Header%
- */
-
-#include <string.h>
-#include "uuidP.h"
-
-void uuid_pack(const struct uuid *uu, uuid_t ptr)
-{
- uint32_t tmp;
- unsigned char *out = ptr;
-
- tmp = uu->time_low;
- out[3] = (unsigned char) tmp;
- tmp >>= 8;
- out[2] = (unsigned char) tmp;
- tmp >>= 8;
- out[1] = (unsigned char) tmp;
- tmp >>= 8;
- out[0] = (unsigned char) tmp;
-
- tmp = uu->time_mid;
- out[5] = (unsigned char) tmp;
- tmp >>= 8;
- out[4] = (unsigned char) tmp;
-
- tmp = uu->time_hi_and_version;
- out[7] = (unsigned char) tmp;
- tmp >>= 8;
- out[6] = (unsigned char) tmp;
-
- tmp = uu->clock_seq;
- out[9] = (unsigned char) tmp;
- tmp >>= 8;
- out[8] = (unsigned char) tmp;
-
- memcpy(out+10, uu->node, 6);
-}
-
diff --git a/contrib/uuid/parse.c b/contrib/uuid/parse.c
deleted file mode 100644
index 074383efae7..00000000000
--- a/contrib/uuid/parse.c
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * parse.c --- UUID parsing
- *
- * Copyright (C) 1996, 1997 Theodore Ts'o.
- *
- * %Begin-Header%
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, and the entire permission notice in its entirety,
- * including the disclaimer of warranties.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote
- * products derived from this software without specific prior
- * written permission.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
- * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
- * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
- * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
- * DAMAGE.
- * %End-Header%
- */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <ctype.h>
-#include <string.h>
-
-#include "uuidP.h"
-
-int uuid_parse(const char *in, uuid_t uu)
-{
- struct uuid uuid;
- int i;
- const char *cp;
- char buf[3];
-
- if (strlen(in) != 36)
- return -1;
- for (i=0, cp = in; i <= 36; i++,cp++) {
- if ((i == 8) || (i == 13) || (i == 18) ||
- (i == 23)) {
- if (*cp == '-')
- continue;
- else
- return -1;
- }
- if (i== 36)
- if (*cp == 0)
- continue;
- if (!isxdigit(*cp))
- return -1;
- }
- uuid.time_low = strtoul(in, NULL, 16);
- uuid.time_mid = strtoul(in+9, NULL, 16);
- uuid.time_hi_and_version = strtoul(in+14, NULL, 16);
- uuid.clock_seq = strtoul(in+19, NULL, 16);
- cp = in+24;
- buf[2] = 0;
- for (i=0; i < 6; i++) {
- buf[0] = *cp++;
- buf[1] = *cp++;
- uuid.node[i] = strtoul(buf, NULL, 16);
- }
-
- uuid_pack(&uuid, uu);
- return 0;
-}
diff --git a/contrib/uuid/tst_uuid.c b/contrib/uuid/tst_uuid.c
deleted file mode 100644
index e03138f7d18..00000000000
--- a/contrib/uuid/tst_uuid.c
+++ /dev/null
@@ -1,180 +0,0 @@
-/*
- * tst_uuid.c --- test program from the UUID library
- *
- * Copyright (C) 1996, 1997, 1998 Theodore Ts'o.
- *
- * %Begin-Header%
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, and the entire permission notice in its entirety,
- * including the disclaimer of warranties.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote
- * products derived from this software without specific prior
- * written permission.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
- * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
- * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
- * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
- * DAMAGE.
- * %End-Header%
- */
-
-#ifdef _WIN32
-#define _WIN32_WINNT 0x0500
-#include <windows.h>
-#define UUID MYUUID
-#endif
-
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "uuid.h"
-
-static int test_uuid(const char * uuid, int isValid)
-{
- static const char * validStr[2] = {"invalid", "valid"};
- uuid_t uuidBits;
- int parsedOk;
-
- parsedOk = uuid_parse(uuid, uuidBits) == 0;
-
- printf("%s is %s", uuid, validStr[isValid]);
- if (parsedOk != isValid) {
- printf(" but uuid_parse says %s\n", validStr[parsedOk]);
- return 1;
- }
- printf(", OK\n");
- return 0;
-}
-
-#ifdef __GNUC__
-#define ATTR(x) __attribute__(x)
-#else
-#define ATTR(x)
-#endif
-
-int
-main(int argc ATTR((unused)) , char **argv ATTR((unused)))
-{
- uuid_t buf, tst;
- char str[100];
- struct timeval tv;
- time_t time_reg;
- unsigned char *cp;
- int i;
- int failed = 0;
- int type, variant;
-
- uuid_generate(buf);
- uuid_unparse(buf, str);
- printf("UUID generate = %s\n", str);
- printf("UUID: ");
- for (i=0, cp = (unsigned char *) &buf; i < 16; i++) {
- printf("%02x", *cp++);
- }
- printf("\n");
- type = uuid_type(buf); variant = uuid_variant(buf);
- printf("UUID type = %d, UUID variant = %d\n", type, variant);
- if (variant != UUID_VARIANT_DCE) {
- printf("Incorrect UUID Variant; was expecting DCE!\n");
- failed++;
- }
- printf("\n");
-
- uuid_generate_random(buf);
- uuid_unparse(buf, str);
- printf("UUID random string = %s\n", str);
- printf("UUID: ");
- for (i=0, cp = (unsigned char *) &buf; i < 16; i++) {
- printf("%02x", *cp++);
- }
- printf("\n");
- type = uuid_type(buf); variant = uuid_variant(buf);
- printf("UUID type = %d, UUID variant = %d\n", type, variant);
- if (variant != UUID_VARIANT_DCE) {
- printf("Incorrect UUID Variant; was expecting DCE!\n");
- failed++;
- }
- if (type != 4) {
- printf("Incorrect UUID type; was expecting "
- "4 (random type)!\n");
- failed++;
- }
- printf("\n");
-
- uuid_generate_time(buf);
- uuid_unparse(buf, str);
- printf("UUID string = %s\n", str);
- printf("UUID time: ");
- for (i=0, cp = (unsigned char *) &buf; i < 16; i++) {
- printf("%02x", *cp++);
- }
- printf("\n");
- type = uuid_type(buf); variant = uuid_variant(buf);
- printf("UUID type = %d, UUID variant = %d\n", type, variant);
- if (variant != UUID_VARIANT_DCE) {
- printf("Incorrect UUID Variant; was expecting DCE!\n");
- failed++;
- }
- if (type != 1) {
- printf("Incorrect UUID type; was expecting "
- "1 (time-based type)!\\n");
- failed++;
- }
- tv.tv_sec = 0;
- tv.tv_usec = 0;
- time_reg = uuid_time(buf, &tv);
- printf("UUID time is: (%ld, %ld): %s\n", tv.tv_sec, tv.tv_usec,
- ctime(&time_reg));
- uuid_parse(str, tst);
- if (!uuid_compare(buf, tst))
- printf("UUID parse and compare succeeded.\n");
- else {
- printf("UUID parse and compare failed!\n");
- failed++;
- }
- uuid_clear(tst);
- if (uuid_is_null(tst))
- printf("UUID clear and is null succeeded.\n");
- else {
- printf("UUID clear and is null failed!\n");
- failed++;
- }
- uuid_copy(buf, tst);
- if (!uuid_compare(buf, tst))
- printf("UUID copy and compare succeeded.\n");
- else {
- printf("UUID copy and compare failed!\n");
- failed++;
- }
- failed += test_uuid("84949cc5-4701-4a84-895b-354c584a981b", 1);
- failed += test_uuid("84949CC5-4701-4A84-895B-354C584A981B", 1);
- failed += test_uuid("84949cc5-4701-4a84-895b-354c584a981bc", 0);
- failed += test_uuid("84949cc5-4701-4a84-895b-354c584a981", 0);
- failed += test_uuid("84949cc5x4701-4a84-895b-354c584a981b", 0);
- failed += test_uuid("84949cc504701-4a84-895b-354c584a981b", 0);
- failed += test_uuid("84949cc5-470104a84-895b-354c584a981b", 0);
- failed += test_uuid("84949cc5-4701-4a840895b-354c584a981b", 0);
- failed += test_uuid("84949cc5-4701-4a84-895b0354c584a981b", 0);
- failed += test_uuid("g4949cc5-4701-4a84-895b-354c584a981b", 0);
- failed += test_uuid("84949cc5-4701-4a84-895b-354c584a981g", 0);
-
- if (failed) {
- printf("%d failures.\n", failed);
- exit(1);
- }
- return 0;
-}
diff --git a/contrib/uuid/unpack.c b/contrib/uuid/unpack.c
deleted file mode 100644
index beaaff3ca8a..00000000000
--- a/contrib/uuid/unpack.c
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Internal routine for unpacking UUID
- *
- * Copyright (C) 1996, 1997 Theodore Ts'o.
- *
- * %Begin-Header%
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, and the entire permission notice in its entirety,
- * including the disclaimer of warranties.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote
- * products derived from this software without specific prior
- * written permission.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
- * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
- * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
- * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
- * DAMAGE.
- * %End-Header%
- */
-
-#include <string.h>
-#include "uuidP.h"
-
-void uuid_unpack(const uuid_t in, struct uuid *uu)
-{
- const uint8_t *ptr = in;
- uint32_t tmp;
-
- tmp = *ptr++;
- tmp = (tmp << 8) | *ptr++;
- tmp = (tmp << 8) | *ptr++;
- tmp = (tmp << 8) | *ptr++;
- uu->time_low = tmp;
-
- tmp = *ptr++;
- tmp = (tmp << 8) | *ptr++;
- uu->time_mid = tmp;
-
- tmp = *ptr++;
- tmp = (tmp << 8) | *ptr++;
- uu->time_hi_and_version = tmp;
-
- tmp = *ptr++;
- tmp = (tmp << 8) | *ptr++;
- uu->clock_seq = tmp;
-
- memcpy(uu->node, ptr, 6);
-}
-
diff --git a/contrib/uuid/unparse.c b/contrib/uuid/unparse.c
deleted file mode 100644
index a95bbb04258..00000000000
--- a/contrib/uuid/unparse.c
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * unparse.c -- convert a UUID to string
- *
- * Copyright (C) 1996, 1997 Theodore Ts'o.
- *
- * %Begin-Header%
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, and the entire permission notice in its entirety,
- * including the disclaimer of warranties.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote
- * products derived from this software without specific prior
- * written permission.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
- * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
- * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
- * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
- * DAMAGE.
- * %End-Header%
- */
-
-#include <stdio.h>
-
-#include "uuidP.h"
-
-static const char *fmt_lower =
- "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x";
-
-static const char *fmt_upper =
- "%08X-%04X-%04X-%02X%02X-%02X%02X%02X%02X%02X%02X";
-
-#ifdef UUID_UNPARSE_DEFAULT_UPPER
-#define FMT_DEFAULT fmt_upper
-#else
-#define FMT_DEFAULT fmt_lower
-#endif
-
-static void uuid_unparse_x(const uuid_t uu, char *out, const char *fmt)
-{
- struct uuid uuid;
-
- uuid_unpack(uu, &uuid);
- sprintf(out, fmt,
- uuid.time_low, uuid.time_mid, uuid.time_hi_and_version,
- uuid.clock_seq >> 8, uuid.clock_seq & 0xFF,
- uuid.node[0], uuid.node[1], uuid.node[2],
- uuid.node[3], uuid.node[4], uuid.node[5]);
-}
-
-void uuid_unparse_lower(const uuid_t uu, char *out)
-{
- uuid_unparse_x(uu, out, fmt_lower);
-}
-
-void uuid_unparse_upper(const uuid_t uu, char *out)
-{
- uuid_unparse_x(uu, out, fmt_upper);
-}
-
-void uuid_unparse(const uuid_t uu, char *out)
-{
- uuid_unparse_x(uu, out, FMT_DEFAULT);
-}
diff --git a/contrib/uuid/uuid.h b/contrib/uuid/uuid.h
deleted file mode 100644
index ab006652fc0..00000000000
--- a/contrib/uuid/uuid.h
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Public include file for the UUID library
- *
- * Copyright (C) 1996, 1997, 1998 Theodore Ts'o.
- *
- * %Begin-Header%
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, and the entire permission notice in its entirety,
- * including the disclaimer of warranties.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote
- * products derived from this software without specific prior
- * written permission.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
- * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
- * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
- * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
- * DAMAGE.
- * %End-Header%
- */
-
-#ifndef _UUID_UUID_H
-#define _UUID_UUID_H
-
-#include "config.h"
-#include <sys/types.h>
-#ifndef _WIN32
-#include <sys/time.h>
-#endif
-#include <time.h>
-
-typedef unsigned char uuid_t[16];
-
-/* UUID Variant definitions */
-#define UUID_VARIANT_NCS 0
-#define UUID_VARIANT_DCE 1
-#define UUID_VARIANT_MICROSOFT 2
-#define UUID_VARIANT_OTHER 3
-
-/* UUID Type definitions */
-#define UUID_TYPE_DCE_TIME 1
-#define UUID_TYPE_DCE_RANDOM 4
-
-/* Allow UUID constants to be defined */
-#ifdef __GNUC__
-#define UUID_DEFINE(name,u0,u1,u2,u3,u4,u5,u6,u7,u8,u9,u10,u11,u12,u13,u14,u15) \
- static const uuid_t name __attribute__ ((unused)) = {u0,u1,u2,u3,u4,u5,u6,u7,u8,u9,u10,u11,u12,u13,u14,u15}
-#else
-#define UUID_DEFINE(name,u0,u1,u2,u3,u4,u5,u6,u7,u8,u9,u10,u11,u12,u13,u14,u15) \
- static const uuid_t name = {u0,u1,u2,u3,u4,u5,u6,u7,u8,u9,u10,u11,u12,u13,u14,u15}
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* clear.c */
-void uuid_clear(uuid_t uu);
-
-/* compare.c */
-int uuid_compare(const uuid_t uu1, const uuid_t uu2);
-
-/* copy.c */
-void uuid_copy(uuid_t dst, const uuid_t src);
-
-/* gen_uuid.c */
-void uuid_generate(uuid_t out);
-void uuid_generate_random(uuid_t out);
-void uuid_generate_time(uuid_t out);
-
-/* isnull.c */
-int uuid_is_null(const uuid_t uu);
-
-/* parse.c */
-int uuid_parse(const char *in, uuid_t uu);
-
-/* unparse.c */
-void uuid_unparse(const uuid_t uu, char *out);
-void uuid_unparse_lower(const uuid_t uu, char *out);
-void uuid_unparse_upper(const uuid_t uu, char *out);
-
-/* uuid_time.c */
-time_t uuid_time(const uuid_t uu, struct timeval *ret_tv);
-int uuid_type(const uuid_t uu);
-int uuid_variant(const uuid_t uu);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _UUID_UUID_H */
diff --git a/contrib/uuid/uuidP.h b/contrib/uuid/uuidP.h
deleted file mode 100644
index 9a2de6132fe..00000000000
--- a/contrib/uuid/uuidP.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * uuid.h -- private header file for uuids
- *
- * Copyright (C) 1996, 1997 Theodore Ts'o.
- *
- * %Begin-Header%
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, and the entire permission notice in its entirety,
- * including the disclaimer of warranties.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote
- * products derived from this software without specific prior
- * written permission.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
- * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
- * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
- * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
- * DAMAGE.
- * %End-Header%
- */
-
-#include "uuid.h"
-#ifdef HAVE_INTTYPES_H
-#include <inttypes.h>
-#else
-#include "uuid_types.h"
-#endif
-#include <sys/types.h>
-
-
-/*
- * Offset between 15-Oct-1582 and 1-Jan-70
- */
-#define TIME_OFFSET_HIGH 0x01B21DD2
-#define TIME_OFFSET_LOW 0x13814000
-
-struct uuid {
- uint32_t time_low;
- uint16_t time_mid;
- uint16_t time_hi_and_version;
- uint16_t clock_seq;
- uint8_t node[6];
-};
-
-
-/*
- * prototypes
- */
-void uuid_pack(const struct uuid *uu, uuid_t ptr);
-void uuid_unpack(const uuid_t in, struct uuid *uu);
diff --git a/contrib/uuid/uuid_time.c b/contrib/uuid/uuid_time.c
deleted file mode 100644
index f25f5c90fe5..00000000000
--- a/contrib/uuid/uuid_time.c
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
- * uuid_time.c --- Interpret the time field from a uuid. This program
- * violates the UUID abstraction barrier by reaching into the guts
- * of a UUID and interpreting it.
- *
- * Copyright (C) 1998, 1999 Theodore Ts'o.
- *
- * %Begin-Header%
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, and the entire permission notice in its entirety,
- * including the disclaimer of warranties.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote
- * products derived from this software without specific prior
- * written permission.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
- * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
- * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
- * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
- * DAMAGE.
- * %End-Header%
- */
-
-#ifdef _WIN32
-#define _WIN32_WINNT 0x0500
-#include <windows.h>
-#define UUID MYUUID
-#endif
-
-#include <stdio.h>
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif
-#include <stdlib.h>
-#include <sys/types.h>
-#ifdef HAVE_SYS_TIME_H
-#include <sys/time.h>
-#endif
-#include <time.h>
-
-#include "uuidP.h"
-
-time_t uuid_time(const uuid_t uu, struct timeval *ret_tv)
-{
- struct timeval tv;
- struct uuid uuid;
- uint32_t high;
- uint64_t clock_reg;
-
- uuid_unpack(uu, &uuid);
-
- high = uuid.time_mid | ((uuid.time_hi_and_version & 0xFFF) << 16);
- clock_reg = uuid.time_low | ((uint64_t) high << 32);
-
- clock_reg -= (((uint64_t) 0x01B21DD2) << 32) + 0x13814000;
- tv.tv_sec = clock_reg / 10000000;
- tv.tv_usec = (clock_reg % 10000000) / 10;
-
- if (ret_tv)
- *ret_tv = tv;
-
- return tv.tv_sec;
-}
-
-int uuid_type(const uuid_t uu)
-{
- struct uuid uuid;
-
- uuid_unpack(uu, &uuid);
- return ((uuid.time_hi_and_version >> 12) & 0xF);
-}
-
-int uuid_variant(const uuid_t uu)
-{
- struct uuid uuid;
- int var;
-
- uuid_unpack(uu, &uuid);
- var = uuid.clock_seq;
-
- if ((var & 0x8000) == 0)
- return UUID_VARIANT_NCS;
- if ((var & 0x4000) == 0)
- return UUID_VARIANT_DCE;
- if ((var & 0x2000) == 0)
- return UUID_VARIANT_MICROSOFT;
- return UUID_VARIANT_OTHER;
-}
-
-#ifdef DEBUG
-static const char *variant_string(int variant)
-{
- switch (variant) {
- case UUID_VARIANT_NCS:
- return "NCS";
- case UUID_VARIANT_DCE:
- return "DCE";
- case UUID_VARIANT_MICROSOFT:
- return "Microsoft";
- default:
- return "Other";
- }
-}
-
-
-int
-main(int argc, char **argv)
-{
- uuid_t buf;
- time_t time_reg;
- struct timeval tv;
- int type, variant;
-
- if (argc != 2) {
- fprintf(stderr, "Usage: %s uuid\n", argv[0]);
- exit(1);
- }
- if (uuid_parse(argv[1], buf)) {
- fprintf(stderr, "Invalid UUID: %s\n", argv[1]);
- exit(1);
- }
- variant = uuid_variant(buf);
- type = uuid_type(buf);
- time_reg = uuid_time(buf, &tv);
-
- printf("UUID variant is %d (%s)\n", variant, variant_string(variant));
- if (variant != UUID_VARIANT_DCE) {
- printf("Warning: This program only knows how to interpret "
- "DCE UUIDs.\n\tThe rest of the output is likely "
- "to be incorrect!!\n");
- }
- printf("UUID type is %d", type);
- switch (type) {
- case 1:
- printf(" (time based)\n");
- break;
- case 2:
- printf(" (DCE)\n");
- break;
- case 3:
- printf(" (name-based)\n");
- break;
- case 4:
- printf(" (random)\n");
- break;
- default:
- printf("\n");
- }
- if (type != 1) {
- printf("Warning: not a time-based UUID, so UUID time "
- "decoding will likely not work!\n");
- }
- printf("UUID time is: (%ld, %ld): %s\n", tv.tv_sec, tv.tv_usec,
- ctime(&time_reg));
-
- return 0;
-}
-#endif
diff --git a/contrib/uuid/uuid_types.h.in b/contrib/uuid/uuid_types.h.in
deleted file mode 100644
index f21ff4ee183..00000000000
--- a/contrib/uuid/uuid_types.h.in
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * If linux/types.h is already been included, assume it has defined
- * everything we need. (cross fingers) Other header files may have
- * also defined the types that we need.
- */
-#if (!defined(_STDINT_H) && !defined(_UUID_STDINT_H))
-#define _UUID_STDINT_H
-
-typedef unsigned char uint8_t;
-typedef signed char int8_t;
-
-#if (@SIZEOF_INT@ == 8)
-typedef int int64_t;
-typedef unsigned int uint64_t;
-#elif (@SIZEOF_LONG@ == 8)
-typedef long int64_t;
-typedef unsigned long uint64_t;
-#elif (@SIZEOF_LONG_LONG@ == 8)
-#if defined(__GNUC__)
-typedef __signed__ long long int64_t;
-#else
-typedef signed long long int64_t;
-#endif
-typedef unsigned long long uint64_t;
-#endif
-
-#if (@SIZEOF_INT@ == 2)
-typedef int int16_t;
-typedef unsigned int uint16_t;
-#elif (@SIZEOF_SHORT@ == 2)
-typedef short int16_t;
-typedef unsigned short uint16_t;
-#else
- ?==error: undefined 16 bit type
-#endif
-
-#if (@SIZEOF_INT@ == 4)
-typedef int int32_t;
-typedef unsigned int uint32_t;
-#elif (@SIZEOF_LONG@ == 4)
-typedef long int32_t;
-typedef unsigned long uint32_t;
-#elif (@SIZEOF_SHORT@ == 4)
-typedef short int32_t;
-typedef unsigned short uint32_t;
-#else
- ?== error: undefined 32 bit type
-#endif
-
-#endif
diff --git a/contrib/uuid/uuidd.h b/contrib/uuid/uuidd.h
deleted file mode 100644
index c71f4b78835..00000000000
--- a/contrib/uuid/uuidd.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Definitions used by the uuidd daemon
- *
- * Copyright (C) 2007 Theodore Ts'o.
- *
- * %Begin-Header%
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, and the entire permission notice in its entirety,
- * including the disclaimer of warranties.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote
- * products derived from this software without specific prior
- * written permission.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
- * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
- * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
- * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
- * DAMAGE.
- * %End-Header%
- */
-
-#ifndef _UUID_UUIDD_H
-#define _UUID_UUIDD_H
-
-#define UUIDD_DIR "/var/lib/libuuid"
-#define UUIDD_SOCKET_PATH UUIDD_DIR "/request"
-#define UUIDD_PIDFILE_PATH UUIDD_DIR "/uuidd.pid"
-#define UUIDD_PATH "/usr/sbin/uuidd"
-
-#define UUIDD_OP_GETPID 0
-#define UUIDD_OP_GET_MAXOP 1
-#define UUIDD_OP_TIME_UUID 2
-#define UUIDD_OP_RANDOM_UUID 3
-#define UUIDD_OP_BULK_TIME_UUID 4
-#define UUIDD_OP_BULK_RANDOM_UUID 5
-#define UUIDD_MAX_OP UUIDD_OP_BULK_RANDOM_UUID
-
-extern void uuid__generate_time(uuid_t out, int *num);
-extern void uuid__generate_random(uuid_t out, int *num);
-
-#endif /* _UUID_UUID_H */
diff --git a/contrib/xxhash/xxhash.c b/contrib/xxhash/xxhash.c
new file mode 100644
index 00000000000..56f80f8811d
--- /dev/null
+++ b/contrib/xxhash/xxhash.c
@@ -0,0 +1,1029 @@
+/*
+* xxHash - Fast Hash algorithm
+* Copyright (C) 2012-2016, Yann Collet
+*
+* BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following disclaimer
+* in the documentation and/or other materials provided with the
+* distribution.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+* You can contact the author at :
+* - xxHash homepage: http://www.xxhash.com
+* - xxHash source repository : https://github.com/Cyan4973/xxHash
+*/
+
+
+/* *************************************
+* Tuning parameters
+***************************************/
+/*!XXH_FORCE_MEMORY_ACCESS :
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
+ * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
+ * The below switch allow to select different access method for improved performance.
+ * Method 0 (default) : use `memcpy()`. Safe and portable.
+ * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
+ * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
+ * Method 2 : direct access. This method doesn't depend on compiler but violate C standard.
+ * It can generate buggy code on targets which do not support unaligned memory accesses.
+ * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
+ * See http://stackoverflow.com/a/32095106/646947 for details.
+ * Prefer these methods in priority order (0 > 1 > 2)
+ */
+#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
+# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
+ || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \
+ || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
+# define XXH_FORCE_MEMORY_ACCESS 2
+# elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || \
+ (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
+ || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
+ || defined(__ARM_ARCH_7S__) ))
+# define XXH_FORCE_MEMORY_ACCESS 1
+# endif
+#endif
+
+/*!XXH_ACCEPT_NULL_INPUT_POINTER :
+ * If input pointer is NULL, xxHash default behavior is to dereference it, triggering a segfault.
+ * When this macro is enabled, xxHash actively checks input for null pointer.
+ * It it is, result for null input pointers is the same as a null-length input.
+ */
+#ifndef XXH_ACCEPT_NULL_INPUT_POINTER /* can be defined externally */
+# define XXH_ACCEPT_NULL_INPUT_POINTER 0
+#endif
+
+/*!XXH_FORCE_NATIVE_FORMAT :
+ * By default, xxHash library provides endian-independent Hash values, based on little-endian convention.
+ * Results are therefore identical for little-endian and big-endian CPU.
+ * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
+ * Should endian-independence be of no importance for your application, you may set the #define below to 1,
+ * to improve speed for Big-endian CPU.
+ * This option has no impact on Little_Endian CPU.
+ */
+#ifndef XXH_FORCE_NATIVE_FORMAT /* can be defined externally */
+# define XXH_FORCE_NATIVE_FORMAT 0
+#endif
+
+/*!XXH_FORCE_ALIGN_CHECK :
+ * This is a minor performance trick, only useful with lots of very small keys.
+ * It means : check for aligned/unaligned input.
+ * The check costs one initial branch per hash;
+ * set it to 0 when the input is guaranteed to be aligned,
+ * or when alignment doesn't matter for performance.
+ */
+#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */
+# if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
+# define XXH_FORCE_ALIGN_CHECK 0
+# else
+# define XXH_FORCE_ALIGN_CHECK 1
+# endif
+#endif
+
+
+/* *************************************
+* Includes & Memory related functions
+***************************************/
+/*! Modify the local functions below should you wish to use some other memory routines
+* for malloc(), free() */
+#include <stdlib.h>
+static void* XXH_malloc(size_t s) { return malloc(s); }
+static void XXH_free (void* p) { free(p); }
+/*! and for memcpy() */
+#include <string.h>
+static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); }
+
+#include <assert.h> /* assert */
+
+#define XXH_STATIC_LINKING_ONLY
+#include "xxhash.h"
+
+
+/* *************************************
+* Compiler Specific Options
+***************************************/
+#ifdef _MSC_VER /* Visual Studio */
+# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
+# define FORCE_INLINE static __forceinline
+#else
+# if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
+# ifdef __GNUC__
+# define FORCE_INLINE static inline __attribute__((always_inline))
+# else
+# define FORCE_INLINE static inline
+# endif
+# else
+# define FORCE_INLINE static
+# endif /* __STDC_VERSION__ */
+#endif
+
+
+/* *************************************
+* Basic Types
+***************************************/
+#ifndef MEM_MODULE
+# if !defined (__VMS) \
+ && (defined (__cplusplus) \
+ || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+# include <stdint.h>
+ typedef uint8_t BYTE;
+ typedef uint16_t U16;
+ typedef uint32_t U32;
+# else
+ typedef unsigned char BYTE;
+ typedef unsigned short U16;
+ typedef unsigned int U32;
+# endif
+#endif
+
+#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
+
+/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
+static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; }
+
+#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { U32 u32; } __attribute__((packed)) unalign;
+static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
+
+#else
+
+/* portable and safe solution. Generally efficient.
+ * see : http://stackoverflow.com/a/32095106/646947
+ */
+static U32 XXH_read32(const void* memPtr)
+{
+ U32 val;
+ memcpy(&val, memPtr, sizeof(val));
+ return val;
+}
+
+#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
+
+
+/* ****************************************
+* Compiler-specific Functions and Macros
+******************************************/
+#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+
+/* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */
+#if defined(_MSC_VER)
+# define XXH_rotl32(x,r) _rotl(x,r)
+# define XXH_rotl64(x,r) _rotl64(x,r)
+#else
+# define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r)))
+# define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r)))
+#endif
+
+#if defined(_MSC_VER) /* Visual Studio */
+# define XXH_swap32 _byteswap_ulong
+#elif GCC_VERSION >= 403
+# define XXH_swap32 __builtin_bswap32
+#else
+static U32 XXH_swap32 (U32 x)
+{
+ return ((x << 24) & 0xff000000 ) |
+ ((x << 8) & 0x00ff0000 ) |
+ ((x >> 8) & 0x0000ff00 ) |
+ ((x >> 24) & 0x000000ff );
+}
+#endif
+
+
+/* *************************************
+* Architecture Macros
+***************************************/
+typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
+
+/* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */
+#ifndef XXH_CPU_LITTLE_ENDIAN
+static int XXH_isLittleEndian(void)
+{
+ const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */
+ return one.c[0];
+}
+# define XXH_CPU_LITTLE_ENDIAN XXH_isLittleEndian()
+#endif
+
+
+/* ***************************
+* Memory reads
+*****************************/
+typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
+
+FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
+{
+ if (align==XXH_unaligned)
+ return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
+ else
+ return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr);
+}
+
+FORCE_INLINE U32 XXH_readLE32(const void* ptr, XXH_endianess endian)
+{
+ return XXH_readLE32_align(ptr, endian, XXH_unaligned);
+}
+
+static U32 XXH_readBE32(const void* ptr)
+{
+ return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr);
+}
+
+
+/* *************************************
+* Macros
+***************************************/
+#define XXH_STATIC_ASSERT(c) { enum { XXH_sa = 1/(int)(!!(c)) }; } /* use after variable declarations */
+XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
+
+
+/* *******************************************************************
+* 32-bit hash functions
+*********************************************************************/
+static const U32 PRIME32_1 = 2654435761U;
+static const U32 PRIME32_2 = 2246822519U;
+static const U32 PRIME32_3 = 3266489917U;
+static const U32 PRIME32_4 = 668265263U;
+static const U32 PRIME32_5 = 374761393U;
+
+static U32 XXH32_round(U32 seed, U32 input)
+{
+ seed += input * PRIME32_2;
+ seed = XXH_rotl32(seed, 13);
+ seed *= PRIME32_1;
+ return seed;
+}
+
+/* mix all bits */
+static U32 XXH32_avalanche(U32 h32)
+{
+ h32 ^= h32 >> 15;
+ h32 *= PRIME32_2;
+ h32 ^= h32 >> 13;
+ h32 *= PRIME32_3;
+ h32 ^= h32 >> 16;
+ return(h32);
+}
+
+#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align)
+
+static U32
+XXH32_finalize(U32 h32, const void* ptr, size_t len,
+ XXH_endianess endian, XXH_alignment align)
+
+{
+ const BYTE* p = (const BYTE*)ptr;
+#define PROCESS1 \
+ h32 += (*p) * PRIME32_5; \
+ p++; \
+ h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
+
+#define PROCESS4 \
+ h32 += XXH_get32bits(p) * PRIME32_3; \
+ p+=4; \
+ h32 = XXH_rotl32(h32, 17) * PRIME32_4 ;
+
+ switch(len&15) /* or switch(bEnd - p) */
+ {
+ case 12: PROCESS4;
+ /* fallthrough */
+ case 8: PROCESS4;
+ /* fallthrough */
+ case 4: PROCESS4;
+ return XXH32_avalanche(h32);
+
+ case 13: PROCESS4;
+ /* fallthrough */
+ case 9: PROCESS4;
+ /* fallthrough */
+ case 5: PROCESS4;
+ PROCESS1;
+ return XXH32_avalanche(h32);
+
+ case 14: PROCESS4;
+ /* fallthrough */
+ case 10: PROCESS4;
+ /* fallthrough */
+ case 6: PROCESS4;
+ PROCESS1;
+ PROCESS1;
+ return XXH32_avalanche(h32);
+
+ case 15: PROCESS4;
+ /* fallthrough */
+ case 11: PROCESS4;
+ /* fallthrough */
+ case 7: PROCESS4;
+ /* fallthrough */
+ case 3: PROCESS1;
+ /* fallthrough */
+ case 2: PROCESS1;
+ /* fallthrough */
+ case 1: PROCESS1;
+ /* fallthrough */
+ case 0: return XXH32_avalanche(h32);
+ }
+ assert(0);
+ return h32; /* reaching this point is deemed impossible */
+}
+
+
+FORCE_INLINE U32
+XXH32_endian_align(const void* input, size_t len, U32 seed,
+ XXH_endianess endian, XXH_alignment align)
+{
+ const BYTE* p = (const BYTE*)input;
+ const BYTE* bEnd = p + len;
+ U32 h32;
+
+#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
+ if (p==NULL) {
+ len=0;
+ bEnd=p=(const BYTE*)(size_t)16;
+ }
+#endif
+
+ if (len>=16) {
+ const BYTE* const limit = bEnd - 15;
+ U32 v1 = seed + PRIME32_1 + PRIME32_2;
+ U32 v2 = seed + PRIME32_2;
+ U32 v3 = seed + 0;
+ U32 v4 = seed - PRIME32_1;
+
+ do {
+ v1 = XXH32_round(v1, XXH_get32bits(p)); p+=4;
+ v2 = XXH32_round(v2, XXH_get32bits(p)); p+=4;
+ v3 = XXH32_round(v3, XXH_get32bits(p)); p+=4;
+ v4 = XXH32_round(v4, XXH_get32bits(p)); p+=4;
+ } while (p < limit);
+
+ h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7)
+ + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
+ } else {
+ h32 = seed + PRIME32_5;
+ }
+
+ h32 += (U32)len;
+
+ return XXH32_finalize(h32, p, len&15, endian, align);
+}
+
+
+XXH_PUBLIC_API unsigned int XXH32 (const void* input, size_t len, unsigned int seed)
+{
+#if 0
+ /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
+ XXH32_state_t state;
+ XXH32_reset(&state, seed);
+ XXH32_update(&state, input, len);
+ return XXH32_digest(&state);
+#else
+ XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+ if (XXH_FORCE_ALIGN_CHECK) {
+ if ((((size_t)input) & 3) == 0) { /* Input is 4-bytes aligned, leverage the speed benefit */
+ if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+ return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
+ else
+ return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
+ } }
+
+ if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+ return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
+ else
+ return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
+#endif
+}
+
+
+
+/*====== Hash streaming ======*/
+
+XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void)
+{
+ return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
+}
+XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
+{
+ XXH_free(statePtr);
+ return XXH_OK;
+}
+
+XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t* srcState)
+{
+ memcpy(dstState, srcState, sizeof(*dstState));
+}
+
+XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed)
+{
+ XXH32_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
+ memset(&state, 0, sizeof(state));
+ state.v1 = seed + PRIME32_1 + PRIME32_2;
+ state.v2 = seed + PRIME32_2;
+ state.v3 = seed + 0;
+ state.v4 = seed - PRIME32_1;
+ /* do not write into reserved, planned to be removed in a future version */
+ memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved));
+ return XXH_OK;
+}
+
+
+FORCE_INLINE
+XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian)
+{
+ const BYTE* p = (const BYTE*)input;
+ const BYTE* const bEnd = p + len;
+
+ if (input==NULL)
+#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
+ return XXH_OK;
+#else
+ return XXH_ERROR;
+#endif
+
+ state->total_len_32 += (unsigned)len;
+ state->large_len |= (len>=16) | (state->total_len_32>=16);
+
+ if (state->memsize + len < 16) { /* fill in tmp buffer */
+ XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len);
+ state->memsize += (unsigned)len;
+ return XXH_OK;
+ }
+
+ if (state->memsize) { /* some data left from previous update */
+ XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize);
+ { const U32* p32 = state->mem32;
+ state->v1 = XXH32_round(state->v1, XXH_readLE32(p32, endian)); p32++;
+ state->v2 = XXH32_round(state->v2, XXH_readLE32(p32, endian)); p32++;
+ state->v3 = XXH32_round(state->v3, XXH_readLE32(p32, endian)); p32++;
+ state->v4 = XXH32_round(state->v4, XXH_readLE32(p32, endian));
+ }
+ p += 16-state->memsize;
+ state->memsize = 0;
+ }
+
+ if (p <= bEnd-16) {
+ const BYTE* const limit = bEnd - 16;
+ U32 v1 = state->v1;
+ U32 v2 = state->v2;
+ U32 v3 = state->v3;
+ U32 v4 = state->v4;
+
+ do {
+ v1 = XXH32_round(v1, XXH_readLE32(p, endian)); p+=4;
+ v2 = XXH32_round(v2, XXH_readLE32(p, endian)); p+=4;
+ v3 = XXH32_round(v3, XXH_readLE32(p, endian)); p+=4;
+ v4 = XXH32_round(v4, XXH_readLE32(p, endian)); p+=4;
+ } while (p<=limit);
+
+ state->v1 = v1;
+ state->v2 = v2;
+ state->v3 = v3;
+ state->v4 = v4;
+ }
+
+ if (p < bEnd) {
+ XXH_memcpy(state->mem32, p, (size_t)(bEnd-p));
+ state->memsize = (unsigned)(bEnd-p);
+ }
+
+ return XXH_OK;
+}
+
+
+XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len)
+{
+ XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+ if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+ return XXH32_update_endian(state_in, input, len, XXH_littleEndian);
+ else
+ return XXH32_update_endian(state_in, input, len, XXH_bigEndian);
+}
+
+
+FORCE_INLINE U32
+XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian)
+{
+ U32 h32;
+
+ if (state->large_len) {
+ h32 = XXH_rotl32(state->v1, 1)
+ + XXH_rotl32(state->v2, 7)
+ + XXH_rotl32(state->v3, 12)
+ + XXH_rotl32(state->v4, 18);
+ } else {
+ h32 = state->v3 /* == seed */ + PRIME32_5;
+ }
+
+ h32 += state->total_len_32;
+
+ return XXH32_finalize(h32, state->mem32, state->memsize, endian, XXH_aligned);
+}
+
+
+XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* state_in)
+{
+ XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+ if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+ return XXH32_digest_endian(state_in, XXH_littleEndian);
+ else
+ return XXH32_digest_endian(state_in, XXH_bigEndian);
+}
+
+
+/*====== Canonical representation ======*/
+
+/*! Default XXH result types are basic unsigned 32 and 64 bits.
+* The canonical representation follows human-readable write convention, aka big-endian (large digits first).
+* These functions allow transformation of hash result into and from its canonical format.
+* This way, hash values can be written into a file or buffer, remaining comparable across different systems.
+*/
+
+XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash)
+{
+ XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));
+ if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);
+ memcpy(dst, &hash, sizeof(*dst));
+}
+
+XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src)
+{
+ return XXH_readBE32(src);
+}
+
+
+#ifndef XXH_NO_LONG_LONG
+
+/* *******************************************************************
+* 64-bit hash functions
+*********************************************************************/
+
+/*====== Memory access ======*/
+
+#ifndef MEM_MODULE
+# define MEM_MODULE
+# if !defined (__VMS) \
+ && (defined (__cplusplus) \
+ || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+# include <stdint.h>
+ typedef uint64_t U64;
+# else
+ /* if compiler doesn't support unsigned long long, replace by another 64-bit type */
+ typedef unsigned long long U64;
+# endif
+#endif
+
+
+#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
+
+/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
+static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; }
+
+#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign64;
+static U64 XXH_read64(const void* ptr) { return ((const unalign64*)ptr)->u64; }
+
+#else
+
+/* portable and safe solution. Generally efficient.
+ * see : http://stackoverflow.com/a/32095106/646947
+ */
+
+static U64 XXH_read64(const void* memPtr)
+{
+ U64 val;
+ memcpy(&val, memPtr, sizeof(val));
+ return val;
+}
+
+#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
+
+#if defined(_MSC_VER) /* Visual Studio */
+# define XXH_swap64 _byteswap_uint64
+#elif GCC_VERSION >= 403
+# define XXH_swap64 __builtin_bswap64
+#else
+static U64 XXH_swap64 (U64 x)
+{
+ return ((x << 56) & 0xff00000000000000ULL) |
+ ((x << 40) & 0x00ff000000000000ULL) |
+ ((x << 24) & 0x0000ff0000000000ULL) |
+ ((x << 8) & 0x000000ff00000000ULL) |
+ ((x >> 8) & 0x00000000ff000000ULL) |
+ ((x >> 24) & 0x0000000000ff0000ULL) |
+ ((x >> 40) & 0x000000000000ff00ULL) |
+ ((x >> 56) & 0x00000000000000ffULL);
+}
+#endif
+
+FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
+{
+ if (align==XXH_unaligned)
+ return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
+ else
+ return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr);
+}
+
+FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianess endian)
+{
+ return XXH_readLE64_align(ptr, endian, XXH_unaligned);
+}
+
+static U64 XXH_readBE64(const void* ptr)
+{
+ return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr);
+}
+
+
+/*====== xxh64 ======*/
+
+static const U64 PRIME64_1 = 11400714785074694791ULL;
+static const U64 PRIME64_2 = 14029467366897019727ULL;
+static const U64 PRIME64_3 = 1609587929392839161ULL;
+static const U64 PRIME64_4 = 9650029242287828579ULL;
+static const U64 PRIME64_5 = 2870177450012600261ULL;
+
+static U64 XXH64_round(U64 acc, U64 input)
+{
+ acc += input * PRIME64_2;
+ acc = XXH_rotl64(acc, 31);
+ acc *= PRIME64_1;
+ return acc;
+}
+
+static U64 XXH64_mergeRound(U64 acc, U64 val)
+{
+ val = XXH64_round(0, val);
+ acc ^= val;
+ acc = acc * PRIME64_1 + PRIME64_4;
+ return acc;
+}
+
+static U64 XXH64_avalanche(U64 h64)
+{
+ h64 ^= h64 >> 33;
+ h64 *= PRIME64_2;
+ h64 ^= h64 >> 29;
+ h64 *= PRIME64_3;
+ h64 ^= h64 >> 32;
+ return h64;
+}
+
+
+#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align)
+
+static U64
+XXH64_finalize(U64 h64, const void* ptr, size_t len,
+ XXH_endianess endian, XXH_alignment align)
+{
+ const BYTE* p = (const BYTE*)ptr;
+
+#define PROCESS1_64 \
+ h64 ^= (*p) * PRIME64_5; \
+ p++; \
+ h64 = XXH_rotl64(h64, 11) * PRIME64_1;
+
+#define PROCESS4_64 \
+ h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1; \
+ p+=4; \
+ h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
+
+#define PROCESS8_64 { \
+ U64 const k1 = XXH64_round(0, XXH_get64bits(p)); \
+ p+=8; \
+ h64 ^= k1; \
+ h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; \
+}
+
+ switch(len&31) {
+ case 24: PROCESS8_64;
+ /* fallthrough */
+ case 16: PROCESS8_64;
+ /* fallthrough */
+ case 8: PROCESS8_64;
+ return XXH64_avalanche(h64);
+
+ case 28: PROCESS8_64;
+ /* fallthrough */
+ case 20: PROCESS8_64;
+ /* fallthrough */
+ case 12: PROCESS8_64;
+ /* fallthrough */
+ case 4: PROCESS4_64;
+ return XXH64_avalanche(h64);
+
+ case 25: PROCESS8_64;
+ /* fallthrough */
+ case 17: PROCESS8_64;
+ /* fallthrough */
+ case 9: PROCESS8_64;
+ PROCESS1_64;
+ return XXH64_avalanche(h64);
+
+ case 29: PROCESS8_64;
+ /* fallthrough */
+ case 21: PROCESS8_64;
+ /* fallthrough */
+ case 13: PROCESS8_64;
+ /* fallthrough */
+ case 5: PROCESS4_64;
+ PROCESS1_64;
+ return XXH64_avalanche(h64);
+
+ case 26: PROCESS8_64;
+ /* fallthrough */
+ case 18: PROCESS8_64;
+ /* fallthrough */
+ case 10: PROCESS8_64;
+ PROCESS1_64;
+ PROCESS1_64;
+ return XXH64_avalanche(h64);
+
+ case 30: PROCESS8_64;
+ /* fallthrough */
+ case 22: PROCESS8_64;
+ /* fallthrough */
+ case 14: PROCESS8_64;
+ /* fallthrough */
+ case 6: PROCESS4_64;
+ PROCESS1_64;
+ PROCESS1_64;
+ return XXH64_avalanche(h64);
+
+ case 27: PROCESS8_64;
+ /* fallthrough */
+ case 19: PROCESS8_64;
+ /* fallthrough */
+ case 11: PROCESS8_64;
+ PROCESS1_64;
+ PROCESS1_64;
+ PROCESS1_64;
+ return XXH64_avalanche(h64);
+
+ case 31: PROCESS8_64;
+ /* fallthrough */
+ case 23: PROCESS8_64;
+ /* fallthrough */
+ case 15: PROCESS8_64;
+ /* fallthrough */
+ case 7: PROCESS4_64;
+ /* fallthrough */
+ case 3: PROCESS1_64;
+ /* fallthrough */
+ case 2: PROCESS1_64;
+ /* fallthrough */
+ case 1: PROCESS1_64;
+ /* fallthrough */
+ case 0: return XXH64_avalanche(h64);
+ }
+
+ /* impossible to reach */
+ assert(0);
+ return 0; /* unreachable, but some compilers complain without it */
+}
+
+FORCE_INLINE U64
+XXH64_endian_align(const void* input, size_t len, U64 seed,
+ XXH_endianess endian, XXH_alignment align)
+{
+ const BYTE* p = (const BYTE*)input;
+ const BYTE* bEnd = p + len;
+ U64 h64;
+
+#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
+ if (p==NULL) {
+ len=0;
+ bEnd=p=(const BYTE*)(size_t)32;
+ }
+#endif
+
+ if (len>=32) {
+ const BYTE* const limit = bEnd - 32;
+ U64 v1 = seed + PRIME64_1 + PRIME64_2;
+ U64 v2 = seed + PRIME64_2;
+ U64 v3 = seed + 0;
+ U64 v4 = seed - PRIME64_1;
+
+ do {
+ v1 = XXH64_round(v1, XXH_get64bits(p)); p+=8;
+ v2 = XXH64_round(v2, XXH_get64bits(p)); p+=8;
+ v3 = XXH64_round(v3, XXH_get64bits(p)); p+=8;
+ v4 = XXH64_round(v4, XXH_get64bits(p)); p+=8;
+ } while (p<=limit);
+
+ h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
+ h64 = XXH64_mergeRound(h64, v1);
+ h64 = XXH64_mergeRound(h64, v2);
+ h64 = XXH64_mergeRound(h64, v3);
+ h64 = XXH64_mergeRound(h64, v4);
+
+ } else {
+ h64 = seed + PRIME64_5;
+ }
+
+ h64 += (U64) len;
+
+ return XXH64_finalize(h64, p, len, endian, align);
+}
+
+
+XXH_PUBLIC_API unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed)
+{
+#if 0
+ /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
+ XXH64_state_t state;
+ XXH64_reset(&state, seed);
+ XXH64_update(&state, input, len);
+ return XXH64_digest(&state);
+#else
+ XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+ if (XXH_FORCE_ALIGN_CHECK) {
+ if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */
+ if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+ return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
+ else
+ return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
+ } }
+
+ if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+ return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
+ else
+ return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
+#endif
+}
+
+/*====== Hash Streaming ======*/
+
+XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void)
+{
+ return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
+}
+XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
+{
+ XXH_free(statePtr);
+ return XXH_OK;
+}
+
+XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dstState, const XXH64_state_t* srcState)
+{
+ memcpy(dstState, srcState, sizeof(*dstState));
+}
+
+XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed)
+{
+ XXH64_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
+ memset(&state, 0, sizeof(state));
+ state.v1 = seed + PRIME64_1 + PRIME64_2;
+ state.v2 = seed + PRIME64_2;
+ state.v3 = seed + 0;
+ state.v4 = seed - PRIME64_1;
+ /* do not write into reserved, planned to be removed in a future version */
+ memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved));
+ return XXH_OK;
+}
+
+FORCE_INLINE
+XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian)
+{
+ const BYTE* p = (const BYTE*)input;
+ const BYTE* const bEnd = p + len;
+
+ if (input==NULL)
+#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
+ return XXH_OK;
+#else
+ return XXH_ERROR;
+#endif
+
+ state->total_len += len;
+
+ if (state->memsize + len < 32) { /* fill in tmp buffer */
+ XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len);
+ state->memsize += (U32)len;
+ return XXH_OK;
+ }
+
+ if (state->memsize) { /* tmp buffer is full */
+ XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize);
+ state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0, endian));
+ state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1, endian));
+ state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2, endian));
+ state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3, endian));
+ p += 32-state->memsize;
+ state->memsize = 0;
+ }
+
+ if (p+32 <= bEnd) {
+ const BYTE* const limit = bEnd - 32;
+ U64 v1 = state->v1;
+ U64 v2 = state->v2;
+ U64 v3 = state->v3;
+ U64 v4 = state->v4;
+
+ do {
+ v1 = XXH64_round(v1, XXH_readLE64(p, endian)); p+=8;
+ v2 = XXH64_round(v2, XXH_readLE64(p, endian)); p+=8;
+ v3 = XXH64_round(v3, XXH_readLE64(p, endian)); p+=8;
+ v4 = XXH64_round(v4, XXH_readLE64(p, endian)); p+=8;
+ } while (p<=limit);
+
+ state->v1 = v1;
+ state->v2 = v2;
+ state->v3 = v3;
+ state->v4 = v4;
+ }
+
+ if (p < bEnd) {
+ XXH_memcpy(state->mem64, p, (size_t)(bEnd-p));
+ state->memsize = (unsigned)(bEnd-p);
+ }
+
+ return XXH_OK;
+}
+
+XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len)
+{
+ XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+ if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+ return XXH64_update_endian(state_in, input, len, XXH_littleEndian);
+ else
+ return XXH64_update_endian(state_in, input, len, XXH_bigEndian);
+}
+
+FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian)
+{
+ U64 h64;
+
+ if (state->total_len >= 32) {
+ U64 const v1 = state->v1;
+ U64 const v2 = state->v2;
+ U64 const v3 = state->v3;
+ U64 const v4 = state->v4;
+
+ h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
+ h64 = XXH64_mergeRound(h64, v1);
+ h64 = XXH64_mergeRound(h64, v2);
+ h64 = XXH64_mergeRound(h64, v3);
+ h64 = XXH64_mergeRound(h64, v4);
+ } else {
+ h64 = state->v3 /*seed*/ + PRIME64_5;
+ }
+
+ h64 += (U64) state->total_len;
+
+ return XXH64_finalize(h64, state->mem64, (size_t)state->total_len, endian, XXH_aligned);
+}
+
+XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* state_in)
+{
+ XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+ if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+ return XXH64_digest_endian(state_in, XXH_littleEndian);
+ else
+ return XXH64_digest_endian(state_in, XXH_bigEndian);
+}
+
+
+/*====== Canonical representation ======*/
+
+XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash)
+{
+ XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
+ if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
+ memcpy(dst, &hash, sizeof(*dst));
+}
+
+XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src)
+{
+ return XXH_readBE64(src);
+}
+
+#endif /* XXH_NO_LONG_LONG */
diff --git a/contrib/xxhash/xxhash.h b/contrib/xxhash/xxhash.h
new file mode 100644
index 00000000000..d6bad943358
--- /dev/null
+++ b/contrib/xxhash/xxhash.h
@@ -0,0 +1,328 @@
+/*
+ xxHash - Extremely Fast Hash algorithm
+ Header File
+ Copyright (C) 2012-2016, Yann Collet.
+
+ BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following disclaimer
+ in the documentation and/or other materials provided with the
+ distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ You can contact the author at :
+ - xxHash source repository : https://github.com/Cyan4973/xxHash
+*/
+
+/* Notice extracted from xxHash homepage :
+
+xxHash is an extremely fast Hash algorithm, running at RAM speed limits.
+It also successfully passes all tests from the SMHasher suite.
+
+Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
+
+Name Speed Q.Score Author
+xxHash 5.4 GB/s 10
+CrapWow 3.2 GB/s 2 Andrew
+MumurHash 3a 2.7 GB/s 10 Austin Appleby
+SpookyHash 2.0 GB/s 10 Bob Jenkins
+SBox 1.4 GB/s 9 Bret Mulvey
+Lookup3 1.2 GB/s 9 Bob Jenkins
+SuperFastHash 1.2 GB/s 1 Paul Hsieh
+CityHash64 1.05 GB/s 10 Pike & Alakuijala
+FNV 0.55 GB/s 5 Fowler, Noll, Vo
+CRC32 0.43 GB/s 9
+MD5-32 0.33 GB/s 10 Ronald L. Rivest
+SHA1-32 0.28 GB/s 10
+
+Q.Score is a measure of quality of the hash function.
+It depends on successfully passing SMHasher test set.
+10 is a perfect score.
+
+A 64-bit version, named XXH64, is available since r35.
+It offers much better speed, but for 64-bit applications only.
+Name Speed on 64 bits Speed on 32 bits
+XXH64 13.8 GB/s 1.9 GB/s
+XXH32 6.8 GB/s 6.0 GB/s
+*/
+
+#ifndef XXHASH_H_5627135585666179
+#define XXHASH_H_5627135585666179 1
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/* ****************************
+* Definitions
+******************************/
+#include <stddef.h> /* size_t */
+typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
+
+
+/* ****************************
+ * API modifier
+ ******************************/
+/** XXH_INLINE_ALL (and XXH_PRIVATE_API)
+ * This is useful to include xxhash functions in `static` mode
+ * in order to inline them, and remove their symbol from the public list.
+ * Inlining can offer dramatic performance improvement on small keys.
+ * Methodology :
+ * #define XXH_INLINE_ALL
+ * #include "xxhash.h"
+ * `xxhash.c` is automatically included.
+ * It's not useful to compile and link it as a separate module.
+ */
+#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
+# ifndef XXH_STATIC_LINKING_ONLY
+# define XXH_STATIC_LINKING_ONLY
+# endif
+# if defined(__GNUC__)
+# define XXH_PUBLIC_API static __inline __attribute__((unused))
+# elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+# define XXH_PUBLIC_API static inline
+# elif defined(_MSC_VER)
+# define XXH_PUBLIC_API static __inline
+# else
+ /* this version may generate warnings for unused static functions */
+# define XXH_PUBLIC_API static
+# endif
+#else
+# define XXH_PUBLIC_API /* do nothing */
+#endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */
+
+/*! XXH_NAMESPACE, aka Namespace Emulation :
+ *
+ * If you want to include _and expose_ xxHash functions from within your own library,
+ * but also want to avoid symbol collisions with other libraries which may also include xxHash,
+ *
+ * you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library
+ * with the value of XXH_NAMESPACE (therefore, avoid NULL and numeric values).
+ *
+ * Note that no change is required within the calling program as long as it includes `xxhash.h` :
+ * regular symbol name will be automatically translated by this header.
+ */
+#ifdef XXH_NAMESPACE
+# define XXH_CAT(A,B) A##B
+# define XXH_NAME2(A,B) XXH_CAT(A,B)
+# define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber)
+# define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
+# define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
+# define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
+# define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
+# define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
+# define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
+# define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState)
+# define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash)
+# define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical)
+# define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
+# define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
+# define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
+# define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
+# define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
+# define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
+# define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState)
+# define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash)
+# define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical)
+#endif
+
+
+/* *************************************
+* Version
+***************************************/
+#define XXH_VERSION_MAJOR 0
+#define XXH_VERSION_MINOR 6
+#define XXH_VERSION_RELEASE 5
+#define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
+XXH_PUBLIC_API unsigned XXH_versionNumber (void);
+
+
+/*-**********************************************************************
+* 32-bit hash
+************************************************************************/
+typedef unsigned int XXH32_hash_t;
+
+/*! XXH32() :
+ Calculate the 32-bit hash of sequence "length" bytes stored at memory address "input".
+ The memory between input & input+length must be valid (allocated and read-accessible).
+ "seed" can be used to alter the result predictably.
+ Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s */
+XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed);
+
+/*====== Streaming ======*/
+typedef struct XXH32_state_s XXH32_state_t; /* incomplete type */
+XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void);
+XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr);
+XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_t* src_state);
+
+XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, unsigned int seed);
+XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
+XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr);
+
+/*
+ * Streaming functions generate the xxHash of an input provided in multiple segments.
+ * Note that, for small input, they are slower than single-call functions, due to state management.
+ * For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized.
+ *
+ * XXH state must first be allocated, using XXH*_createState() .
+ *
+ * Start a new hash by initializing state with a seed, using XXH*_reset().
+ *
+ * Then, feed the hash state by calling XXH*_update() as many times as necessary.
+ * The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
+ *
+ * Finally, a hash value can be produced anytime, by using XXH*_digest().
+ * This function returns the nn-bits hash as an int or long long.
+ *
+ * It's still possible to continue inserting input into the hash state after a digest,
+ * and generate some new hashes later on, by calling again XXH*_digest().
+ *
+ * When done, free XXH state space if it was allocated dynamically.
+ */
+
+/*====== Canonical representation ======*/
+
+typedef struct { unsigned char digest[4]; } XXH32_canonical_t;
+XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash);
+XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
+
+/* Default result type for XXH functions are primitive unsigned 32 and 64 bits.
+ * The canonical representation uses human-readable write convention, aka big-endian (large digits first).
+ * These functions allow transformation of hash result into and from its canonical format.
+ * This way, hash values can be written into a file / memory, and remain comparable on different systems and programs.
+ */
+
+
+#ifndef XXH_NO_LONG_LONG
+/*-**********************************************************************
+* 64-bit hash
+************************************************************************/
+typedef unsigned long long XXH64_hash_t;
+
+/*! XXH64() :
+ Calculate the 64-bit hash of sequence of length "len" stored at memory address "input".
+ "seed" can be used to alter the result predictably.
+ This function runs faster on 64-bit systems, but slower on 32-bit systems (see benchmark).
+*/
+XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed);
+
+/*====== Streaming ======*/
+typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */
+XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void);
+XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr);
+XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dst_state, const XXH64_state_t* src_state);
+
+XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long seed);
+XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
+XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr);
+
+/*====== Canonical representation ======*/
+typedef struct { unsigned char digest[8]; } XXH64_canonical_t;
+XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash);
+XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);
+#endif /* XXH_NO_LONG_LONG */
+
+
+
+#ifdef XXH_STATIC_LINKING_ONLY
+
+/* ================================================================================================
+ This section contains declarations which are not guaranteed to remain stable.
+ They may change in future versions, becoming incompatible with a different version of the library.
+ These declarations should only be used with static linking.
+ Never use them in association with dynamic linking !
+=================================================================================================== */
+
+/* These definitions are only present to allow
+ * static allocation of XXH state, on stack or in a struct for example.
+ * Never **ever** use members directly. */
+
+#if !defined (__VMS) \
+ && (defined (__cplusplus) \
+ || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+# include <stdint.h>
+
+struct XXH32_state_s {
+ uint32_t total_len_32;
+ uint32_t large_len;
+ uint32_t v1;
+ uint32_t v2;
+ uint32_t v3;
+ uint32_t v4;
+ uint32_t mem32[4];
+ uint32_t memsize;
+ uint32_t reserved; /* never read nor write, might be removed in a future version */
+}; /* typedef'd to XXH32_state_t */
+
+struct XXH64_state_s {
+ uint64_t total_len;
+ uint64_t v1;
+ uint64_t v2;
+ uint64_t v3;
+ uint64_t v4;
+ uint64_t mem64[4];
+ uint32_t memsize;
+ uint32_t reserved[2]; /* never read nor write, might be removed in a future version */
+}; /* typedef'd to XXH64_state_t */
+
+# else
+
+struct XXH32_state_s {
+ unsigned total_len_32;
+ unsigned large_len;
+ unsigned v1;
+ unsigned v2;
+ unsigned v3;
+ unsigned v4;
+ unsigned mem32[4];
+ unsigned memsize;
+ unsigned reserved; /* never read nor write, might be removed in a future version */
+}; /* typedef'd to XXH32_state_t */
+
+# ifndef XXH_NO_LONG_LONG /* remove 64-bit support */
+struct XXH64_state_s {
+ unsigned long long total_len;
+ unsigned long long v1;
+ unsigned long long v2;
+ unsigned long long v3;
+ unsigned long long v4;
+ unsigned long long mem64[4];
+ unsigned memsize;
+ unsigned reserved[2]; /* never read nor write, might be removed in a future version */
+}; /* typedef'd to XXH64_state_t */
+# endif
+
+# endif
+
+
+#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
+# include "xxhash.c" /* include xxhash function bodies as `static`, for inlining */
+#endif
+
+#endif /* XXH_STATIC_LINKING_ONLY */
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* XXHASH_H_5627135585666179 */
diff --git a/contrib/xxhash/xxhsum.c b/contrib/xxhash/xxhsum.c
new file mode 100644
index 00000000000..69931f727f0
--- /dev/null
+++ b/contrib/xxhash/xxhsum.c
@@ -0,0 +1,1301 @@
+/*
+* xxhsum - Command line interface for xxhash algorithms
+* Copyright (C) Yann Collet 2012-2016
+*
+* GPL v2 License
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation; either version 2 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License along
+* with this program; if not, write to the Free Software Foundation, Inc.,
+* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+*
+* You can contact the author at :
+* - xxHash homepage : http://www.xxhash.com
+* - xxHash source repository : https://github.com/Cyan4973/xxHash
+*/
+
+/* xxhsum :
+ * Provides hash value of a file content, or a list of files, or stdin
+ * Display convention is Big Endian, for both 32 and 64 bits algorithms
+ */
+
+#ifndef XXHASH_C_2097394837
+#define XXHASH_C_2097394837
+
+/* ************************************
+ * Compiler Options
+ **************************************/
+/* MS Visual */
+#if defined(_MSC_VER) || defined(_WIN32)
+# define _CRT_SECURE_NO_WARNINGS /* removes visual warnings */
+#endif
+
+/* Under Linux at least, pull in the *64 commands */
+#ifndef _LARGEFILE64_SOURCE
+# define _LARGEFILE64_SOURCE
+#endif
+
+
+/* ************************************
+ * Includes
+ **************************************/
+#include <stdlib.h> /* malloc, calloc, free, exit */
+#include <stdio.h> /* fprintf, fopen, ftello64, fread, stdin, stdout, _fileno (when present) */
+#include <string.h> /* strcmp */
+#include <sys/types.h> /* stat, stat64, _stat64 */
+#include <sys/stat.h> /* stat, stat64, _stat64 */
+#include <time.h> /* clock_t, clock, CLOCKS_PER_SEC */
+#include <assert.h> /* assert */
+
+#define XXH_STATIC_LINKING_ONLY /* *_state_t */
+#include "xxhash.h"
+
+
+/* ************************************
+ * OS-Specific Includes
+ **************************************/
+#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32) || defined(__CYGWIN__)
+# include <fcntl.h> /* _O_BINARY */
+# include <io.h> /* _setmode, _isatty */
+# define SET_BINARY_MODE(file) _setmode(_fileno(file), _O_BINARY)
+# define IS_CONSOLE(stdStream) _isatty(_fileno(stdStream))
+#else
+# include <unistd.h> /* isatty, STDIN_FILENO */
+# define SET_BINARY_MODE(file)
+# define IS_CONSOLE(stdStream) isatty(STDIN_FILENO)
+#endif
+
+#if !defined(S_ISREG)
+# define S_ISREG(x) (((x) & S_IFMT) == S_IFREG)
+#endif
+
+
+/* ************************************
+* Basic Types
+**************************************/
+#ifndef MEM_MODULE
+# define MEM_MODULE
+# if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
+# include <stdint.h>
+ typedef uint8_t BYTE;
+ typedef uint16_t U16;
+ typedef uint32_t U32;
+ typedef int32_t S32;
+ typedef uint64_t U64;
+# else
+ typedef unsigned char BYTE;
+ typedef unsigned short U16;
+ typedef unsigned int U32;
+ typedef signed int S32;
+ typedef unsigned long long U64;
+# endif
+#endif
+
+static unsigned BMK_isLittleEndian(void)
+{
+ const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */
+ return one.c[0];
+}
+
+
+/* *************************************
+ * Constants
+ ***************************************/
+#define LIB_VERSION XXH_VERSION_MAJOR.XXH_VERSION_MINOR.XXH_VERSION_RELEASE
+#define QUOTE(str) #str
+#define EXPAND_AND_QUOTE(str) QUOTE(str)
+#define PROGRAM_VERSION EXPAND_AND_QUOTE(LIB_VERSION)
+static const int g_nbBits = (int)(sizeof(void*)*8);
+static const char g_lename[] = "little endian";
+static const char g_bename[] = "big endian";
+#define ENDIAN_NAME (BMK_isLittleEndian() ? g_lename : g_bename)
+static const char author[] = "Yann Collet";
+#define WELCOME_MESSAGE(exename) "%s %s (%i-bits %s), by %s \n", \
+ exename, PROGRAM_VERSION, g_nbBits, ENDIAN_NAME, author
+
+#define KB *( 1<<10)
+#define MB *( 1<<20)
+#define GB *(1U<<30)
+
+static size_t XXH_DEFAULT_SAMPLE_SIZE = 100 KB;
+#define NBLOOPS 3 /* Default number of benchmark iterations */
+#define TIMELOOP_S 1
+#define TIMELOOP (TIMELOOP_S * CLOCKS_PER_SEC) /* Minimum timing per iteration */
+#define XXHSUM32_DEFAULT_SEED 0 /* Default seed for algo_xxh32 */
+#define XXHSUM64_DEFAULT_SEED 0 /* Default seed for algo_xxh64 */
+
+#define MAX_MEM (2 GB - 64 MB)
+
+static const char stdinName[] = "-";
+typedef enum { algo_xxh32, algo_xxh64 } algoType;
+static const algoType g_defaultAlgo = algo_xxh64; /* required within main() & usage() */
+
+/* <16 hex char> <SPC> <SPC> <filename> <'\0'>
+ * '4096' is typical Linux PATH_MAX configuration. */
+#define DEFAULT_LINE_LENGTH (sizeof(XXH64_hash_t) * 2 + 2 + 4096 + 1)
+
+/* Maximum acceptable line length. */
+#define MAX_LINE_LENGTH (32 KB)
+
+
+/* ************************************
+ * Display macros
+ **************************************/
+#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
+#define DISPLAYRESULT(...) fprintf(stdout, __VA_ARGS__)
+#define DISPLAYLEVEL(l, ...) do { if (g_displayLevel>=l) DISPLAY(__VA_ARGS__); } while (0)
+static int g_displayLevel = 2;
+
+
+/* ************************************
+ * Local variables
+ **************************************/
+static U32 g_nbIterations = NBLOOPS;
+
+
+/* ************************************
+ * Benchmark Functions
+ **************************************/
+static clock_t BMK_clockSpan( clock_t start )
+{
+ return clock() - start; /* works even if overflow; Typical max span ~ 30 mn */
+}
+
+
+static size_t BMK_findMaxMem(U64 requiredMem)
+{
+ size_t const step = 64 MB;
+ void* testmem = NULL;
+
+ requiredMem = (((requiredMem >> 26) + 1) << 26);
+ requiredMem += 2*step;
+ if (requiredMem > MAX_MEM) requiredMem = MAX_MEM;
+
+ while (!testmem) {
+ if (requiredMem > step) requiredMem -= step;
+ else requiredMem >>= 1;
+ testmem = malloc ((size_t)requiredMem);
+ }
+ free (testmem);
+
+ /* keep some space available */
+ if (requiredMem > step) requiredMem -= step;
+ else requiredMem >>= 1;
+
+ return (size_t)requiredMem;
+}
+
+
+static U64 BMK_GetFileSize(const char* infilename)
+{
+ int r;
+#if defined(_MSC_VER)
+ struct _stat64 statbuf;
+ r = _stat64(infilename, &statbuf);
+#else
+ struct stat statbuf;
+ r = stat(infilename, &statbuf);
+#endif
+ if (r || !S_ISREG(statbuf.st_mode)) return 0; /* No good... */
+ return (U64)statbuf.st_size;
+}
+
+typedef U32 (*hashFunction)(const void* buffer, size_t bufferSize, U32 seed);
+
+static U32 localXXH32(const void* buffer, size_t bufferSize, U32 seed) { return XXH32(buffer, bufferSize, seed); }
+
+static U32 localXXH64(const void* buffer, size_t bufferSize, U32 seed) { return (U32)XXH64(buffer, bufferSize, seed); }
+
+static void BMK_benchHash(hashFunction h, const char* hName, const void* buffer, size_t bufferSize)
+{
+ U32 nbh_perIteration = ((300 MB) / (bufferSize+1)) + 1; /* first loop conservatively aims for 300 MB/s */
+ U32 iterationNb;
+ double fastestH = 100000000.;
+
+ DISPLAYLEVEL(2, "\r%70s\r", ""); /* Clean display line */
+ if (g_nbIterations<1) g_nbIterations=1;
+ for (iterationNb = 1; iterationNb <= g_nbIterations; iterationNb++) {
+ U32 r=0;
+ clock_t cStart;
+
+ DISPLAYLEVEL(2, "%1i-%-17.17s : %10u ->\r", iterationNb, hName, (U32)bufferSize);
+ cStart = clock();
+ while (clock() == cStart); /* starts clock() at its exact beginning */
+ cStart = clock();
+
+ { U32 i;
+ for (i=0; i<nbh_perIteration; i++)
+ r += h(buffer, bufferSize, i);
+ }
+ if (r==0) DISPLAYLEVEL(3,".\r"); /* do something with r to avoid compiler "optimizing" away hash function */
+ { double const timeS = ((double)BMK_clockSpan(cStart) / CLOCKS_PER_SEC) / nbh_perIteration;
+ if (timeS < fastestH) fastestH = timeS;
+ DISPLAYLEVEL(2, "%1i-%-17.17s : %10u -> %8.0f it/s (%7.1f MB/s) \r",
+ iterationNb, hName, (U32)bufferSize,
+ (double)1 / fastestH,
+ ((double)bufferSize / (1<<20)) / fastestH );
+ }
+ assert(fastestH > 1./2000000000); /* avoid U32 overflow */
+ nbh_perIteration = (U32)(1 / fastestH) + 1; /* adjust nbh_perIteration to last roughtly one second */
+ }
+ DISPLAYLEVEL(1, "%-19.19s : %10u -> %8.0f it/s (%7.1f MB/s) \n", hName, (U32)bufferSize,
+ (double)1 / fastestH,
+ ((double)bufferSize / (1<<20)) / fastestH);
+ if (g_displayLevel<1)
+ DISPLAYLEVEL(0, "%u, ", (U32)((double)1 / fastestH));
+}
+
+
+/* BMK_benchMem():
+ * specificTest : 0 == run all tests, 1+ run only specific test
+ * buffer : is supposed 8-bytes aligned (if malloc'ed, it should be)
+ * the real allocated size of buffer is supposed to be >= (bufferSize+3).
+ * @return : 0 on success, 1 if error (invalid mode selected) */
+static int BMK_benchMem(const void* buffer, size_t bufferSize, U32 specificTest)
+{
+ assert((((size_t)buffer) & 8) == 0); /* ensure alignment */
+
+ /* XXH32 bench */
+ if ((specificTest==0) | (specificTest==1))
+ BMK_benchHash(localXXH32, "XXH32", buffer, bufferSize);
+
+ /* Bench XXH32 on Unaligned input */
+ if ((specificTest==0) | (specificTest==2))
+ BMK_benchHash(localXXH32, "XXH32 unaligned", ((const char*)buffer)+1, bufferSize);
+
+ /* Bench XXH64 */
+ if ((specificTest==0) | (specificTest==3))
+ BMK_benchHash(localXXH64, "XXH64", buffer, bufferSize);
+
+ /* Bench XXH64 on Unaligned input */
+ if ((specificTest==0) | (specificTest==4))
+ BMK_benchHash(localXXH64, "XXH64 unaligned", ((const char*)buffer)+3, bufferSize);
+
+ if (specificTest > 4) {
+ DISPLAY("benchmark mode invalid \n");
+ return 1;
+ }
+ return 0;
+}
+
+
+static size_t BMK_selectBenchedSize(const char* fileName)
+{ U64 const inFileSize = BMK_GetFileSize(fileName);
+ size_t benchedSize = (size_t) BMK_findMaxMem(inFileSize);
+ if ((U64)benchedSize > inFileSize) benchedSize = (size_t)inFileSize;
+ if (benchedSize < inFileSize) {
+ DISPLAY("Not enough memory for '%s' full size; testing %i MB only...\n", fileName, (int)(benchedSize>>20));
+ }
+ return benchedSize;
+}
+
+
+static int BMK_benchFiles(const char** fileNamesTable, int nbFiles, U32 specificTest)
+{
+ int result = 0;
+ int fileIdx;
+
+ for (fileIdx=0; fileIdx<nbFiles; fileIdx++) {
+ const char* const inFileName = fileNamesTable[fileIdx];
+ FILE* const inFile = fopen( inFileName, "rb" );
+ size_t const benchedSize = BMK_selectBenchedSize(inFileName);
+ char* const buffer = (char*)calloc(benchedSize+16+3, 1);
+ void* const alignedBuffer = (buffer+15) - (((size_t)(buffer+15)) & 0xF); /* align on next 16 bytes */
+
+ /* Checks */
+ if ((inFile==NULL) || (inFileName==NULL)) {
+ DISPLAY("Pb opening %s\n", inFileName);
+ free(buffer);
+ return 11;
+ }
+ if(!buffer) {
+ DISPLAY("\nError: not enough memory!\n");
+ fclose(inFile);
+ return 12;
+ }
+
+ /* Fill input buffer */
+ DISPLAYLEVEL(1, "\rLoading %s... \n", inFileName);
+ { size_t const readSize = fread(alignedBuffer, 1, benchedSize, inFile);
+ fclose(inFile);
+ if(readSize != benchedSize) {
+ DISPLAY("\nError: problem reading file '%s' !! \n", inFileName);
+ free(buffer);
+ return 13;
+ } }
+
+ /* bench */
+ result |= BMK_benchMem(alignedBuffer, benchedSize, specificTest);
+
+ free(buffer);
+ }
+
+ return result;
+}
+
+
+
+static int BMK_benchInternal(size_t keySize, int specificTest)
+{
+ void* const buffer = calloc(keySize+16+3, 1);
+ void* const alignedBuffer = ((char*)buffer+15) - (((size_t)((char*)buffer+15)) & 0xF); /* align on next 16 bytes */
+ if(!buffer) {
+ DISPLAY("\nError: not enough memory!\n");
+ return 12;
+ }
+
+ /* bench */
+ DISPLAYLEVEL(1, "Sample of ");
+ if (keySize > 10 KB) {
+ DISPLAYLEVEL(1, "%u KB", (U32)(keySize >> 10));
+ } else {
+ DISPLAYLEVEL(1, "%u bytes", (U32)keySize);
+ }
+ DISPLAYLEVEL(1, "... \n");
+
+ { int const result = BMK_benchMem(alignedBuffer, keySize, specificTest);
+ free(buffer);
+ return result;
+ }
+}
+
+
+static void BMK_checkResult(U32 r1, U32 r2)
+{
+ static int nbTests = 1;
+ if (r1==r2) {
+ DISPLAYLEVEL(3, "\rTest%3i : %08X == %08X ok ", nbTests, r1, r2);
+ } else {
+ DISPLAY("\rERROR : Test%3i : %08X <> %08X !!!!! \n", nbTests, r1, r2);
+ exit(1);
+ }
+ nbTests++;
+}
+
+
+static void BMK_checkResult64(U64 r1, U64 r2)
+{
+ static int nbTests = 1;
+ if (r1!=r2) {
+ DISPLAY("\rERROR : Test%3i : 64-bit values non equals !!!!! \n", nbTests);
+ DISPLAY("\r %08X%08X != %08X%08X \n", (U32)(r1>>32), (U32)r1, (U32)(r2>>32), (U32)r2);
+ exit(1);
+ }
+ nbTests++;
+}
+
+
+static void BMK_testSequence64(void* sentence, size_t len, U64 seed, U64 Nresult)
+{
+ XXH64_state_t state;
+ U64 Dresult;
+ size_t pos;
+
+ Dresult = XXH64(sentence, len, seed);
+ BMK_checkResult64(Dresult, Nresult);
+
+ XXH64_reset(&state, seed);
+ XXH64_update(&state, sentence, len);
+ Dresult = XXH64_digest(&state);
+ BMK_checkResult64(Dresult, Nresult);
+
+ XXH64_reset(&state, seed);
+ for (pos=0; pos<len; pos++)
+ XXH64_update(&state, ((char*)sentence)+pos, 1);
+ Dresult = XXH64_digest(&state);
+ BMK_checkResult64(Dresult, Nresult);
+}
+
+
+static void BMK_testSequence(const void* sequence, size_t len, U32 seed, U32 Nresult)
+{
+ XXH32_state_t state;
+ U32 Dresult;
+ size_t pos;
+
+ Dresult = XXH32(sequence, len, seed);
+ BMK_checkResult(Dresult, Nresult);
+
+ XXH32_reset(&state, seed);
+ XXH32_update(&state, sequence, len);
+ Dresult = XXH32_digest(&state);
+ BMK_checkResult(Dresult, Nresult);
+
+ XXH32_reset(&state, seed);
+ for (pos=0; pos<len; pos++)
+ XXH32_update(&state, ((const char*)sequence)+pos, 1);
+ Dresult = XXH32_digest(&state);
+ BMK_checkResult(Dresult, Nresult);
+}
+
+
+#define SANITY_BUFFER_SIZE 101
+static void BMK_sanityCheck(void)
+{
+ static const U32 prime = 2654435761U;
+ BYTE sanityBuffer[SANITY_BUFFER_SIZE];
+ U32 byteGen = prime;
+
+ int i;
+ for (i=0; i<SANITY_BUFFER_SIZE; i++) {
+ sanityBuffer[i] = (BYTE)(byteGen>>24);
+ byteGen *= byteGen;
+ }
+
+ BMK_testSequence(NULL, 0, 0, 0x02CC5D05);
+ BMK_testSequence(NULL, 0, prime, 0x36B78AE7);
+ BMK_testSequence(sanityBuffer, 1, 0, 0xB85CBEE5);
+ BMK_testSequence(sanityBuffer, 1, prime, 0xD5845D64);
+ BMK_testSequence(sanityBuffer, 14, 0, 0xE5AA0AB4);
+ BMK_testSequence(sanityBuffer, 14, prime, 0x4481951D);
+ BMK_testSequence(sanityBuffer, SANITY_BUFFER_SIZE, 0, 0x1F1AA412);
+ BMK_testSequence(sanityBuffer, SANITY_BUFFER_SIZE, prime, 0x498EC8E2);
+
+ BMK_testSequence64(NULL , 0, 0, 0xEF46DB3751D8E999ULL);
+ BMK_testSequence64(NULL , 0, prime, 0xAC75FDA2929B17EFULL);
+ BMK_testSequence64(sanityBuffer, 1, 0, 0x4FCE394CC88952D8ULL);
+ BMK_testSequence64(sanityBuffer, 1, prime, 0x739840CB819FA723ULL);
+ BMK_testSequence64(sanityBuffer, 14, 0, 0xCFFA8DB881BC3A3DULL);
+ BMK_testSequence64(sanityBuffer, 14, prime, 0x5B9611585EFCC9CBULL);
+ BMK_testSequence64(sanityBuffer, SANITY_BUFFER_SIZE, 0, 0x0EAB543384F878ADULL);
+ BMK_testSequence64(sanityBuffer, SANITY_BUFFER_SIZE, prime, 0xCAA65939306F1E21ULL);
+
+ DISPLAYLEVEL(3, "\r%70s\r", ""); /* Clean display line */
+ DISPLAYLEVEL(3, "Sanity check -- all tests ok\n");
+}
+
+
+/* ********************************************************
+* File Hashing
+**********************************************************/
+
+static void BMK_display_LittleEndian(const void* ptr, size_t length)
+{
+ const BYTE* p = (const BYTE*)ptr;
+ size_t idx;
+ for (idx=length-1; idx<length; idx--) /* intentional underflow to negative to detect end */
+ DISPLAYRESULT("%02x", p[idx]);
+}
+
+static void BMK_display_BigEndian(const void* ptr, size_t length)
+{
+ const BYTE* p = (const BYTE*)ptr;
+ size_t idx;
+ for (idx=0; idx<length; idx++)
+ DISPLAYRESULT("%02x", p[idx]);
+}
+
+static void BMK_hashStream(void* xxhHashValue, const algoType hashType, FILE* inFile, void* buffer, size_t blockSize)
+{
+ XXH64_state_t state64;
+ XXH32_state_t state32;
+ size_t readSize;
+
+ /* Init */
+ XXH32_reset(&state32, XXHSUM32_DEFAULT_SEED);
+ XXH64_reset(&state64, XXHSUM64_DEFAULT_SEED);
+
+ /* Load file & update hash */
+ readSize = 1;
+ while (readSize) {
+ readSize = fread(buffer, 1, blockSize, inFile);
+ switch(hashType)
+ {
+ case algo_xxh32:
+ XXH32_update(&state32, buffer, readSize);
+ break;
+ case algo_xxh64:
+ XXH64_update(&state64, buffer, readSize);
+ break;
+ default:
+ break;
+ }
+ }
+
+ switch(hashType)
+ {
+ case algo_xxh32:
+ { U32 const h32 = XXH32_digest(&state32);
+ memcpy(xxhHashValue, &h32, sizeof(h32));
+ break;
+ }
+ case algo_xxh64:
+ { U64 const h64 = XXH64_digest(&state64);
+ memcpy(xxhHashValue, &h64, sizeof(h64));
+ break;
+ }
+ default:
+ break;
+ }
+}
+
+
+typedef enum { big_endian, little_endian} endianess;
+
+static int BMK_hash(const char* fileName,
+ const algoType hashType,
+ const endianess displayEndianess)
+{
+ FILE* inFile;
+ size_t const blockSize = 64 KB;
+ void* buffer;
+ U32 h32 = 0;
+ U64 h64 = 0;
+
+ /* Check file existence */
+ if (fileName == stdinName) {
+ inFile = stdin;
+ SET_BINARY_MODE(stdin);
+ }
+ else
+ inFile = fopen( fileName, "rb" );
+ if (inFile==NULL) {
+ DISPLAY( "Pb opening %s\n", fileName);
+ return 1;
+ }
+
+ /* Memory allocation & restrictions */
+ buffer = malloc(blockSize);
+ if(!buffer) {
+ DISPLAY("\nError: not enough memory!\n");
+ fclose(inFile);
+ return 1;
+ }
+
+ /* loading notification */
+ { const size_t fileNameSize = strlen(fileName);
+ const char* const fileNameEnd = fileName + fileNameSize;
+ const int maxInfoFilenameSize = (int)(fileNameSize > 30 ? 30 : fileNameSize);
+ int infoFilenameSize = 1;
+ while ((infoFilenameSize < maxInfoFilenameSize)
+ && (fileNameEnd[-1-infoFilenameSize] != '/')
+ && (fileNameEnd[-1-infoFilenameSize] != '\\') )
+ infoFilenameSize++;
+ DISPLAY("\rLoading %s... \r", fileNameEnd - infoFilenameSize);
+
+ /* Load file & update hash */
+ switch(hashType)
+ {
+ case algo_xxh32:
+ BMK_hashStream(&h32, algo_xxh32, inFile, buffer, blockSize);
+ break;
+ case algo_xxh64:
+ BMK_hashStream(&h64, algo_xxh64, inFile, buffer, blockSize);
+ break;
+ default:
+ break;
+ }
+
+ fclose(inFile);
+ free(buffer);
+ DISPLAY("%s \r", fileNameEnd - infoFilenameSize); /* erase line */
+ }
+
+ /* display Hash */
+ switch(hashType)
+ {
+ case algo_xxh32:
+ { XXH32_canonical_t hcbe32;
+ XXH32_canonicalFromHash(&hcbe32, h32);
+ displayEndianess==big_endian ?
+ BMK_display_BigEndian(&hcbe32, sizeof(hcbe32)) : BMK_display_LittleEndian(&hcbe32, sizeof(hcbe32));
+ DISPLAYRESULT(" %s\n", fileName);
+ break;
+ }
+ case algo_xxh64:
+ { XXH64_canonical_t hcbe64;
+ XXH64_canonicalFromHash(&hcbe64, h64);
+ displayEndianess==big_endian ?
+ BMK_display_BigEndian(&hcbe64, sizeof(hcbe64)) : BMK_display_LittleEndian(&hcbe64, sizeof(hcbe64));
+ DISPLAYRESULT(" %s\n", fileName);
+ break;
+ }
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+
+static int BMK_hashFiles(const char** fnList, int fnTotal,
+ algoType hashType, endianess displayEndianess)
+{
+ int fnNb;
+ int result = 0;
+
+ if (fnTotal==0)
+ return BMK_hash(stdinName, hashType, displayEndianess);
+
+ for (fnNb=0; fnNb<fnTotal; fnNb++)
+ result += BMK_hash(fnList[fnNb], hashType, displayEndianess);
+ DISPLAY("\r%70s\r", "");
+ return result;
+}
+
+
+typedef enum {
+ GetLine_ok,
+ GetLine_eof,
+ GetLine_exceedMaxLineLength,
+ GetLine_outOfMemory,
+} GetLineResult;
+
+typedef enum {
+ CanonicalFromString_ok,
+ CanonicalFromString_invalidFormat,
+} CanonicalFromStringResult;
+
+typedef enum {
+ ParseLine_ok,
+ ParseLine_invalidFormat,
+} ParseLineResult;
+
+typedef enum {
+ LineStatus_hashOk,
+ LineStatus_hashFailed,
+ LineStatus_failedToOpen,
+} LineStatus;
+
+typedef union {
+ XXH32_canonical_t xxh32;
+ XXH64_canonical_t xxh64;
+} Canonical;
+
+typedef struct {
+ Canonical canonical;
+ const char* filename;
+ int xxhBits; /* canonical type : 32:xxh32, 64:xxh64 */
+} ParsedLine;
+
+typedef struct {
+ unsigned long nProperlyFormattedLines;
+ unsigned long nImproperlyFormattedLines;
+ unsigned long nMismatchedChecksums;
+ unsigned long nOpenOrReadFailures;
+ unsigned long nMixedFormatLines;
+ int xxhBits;
+ int quit;
+} ParseFileReport;
+
+typedef struct {
+ const char* inFileName;
+ FILE* inFile;
+ int lineMax;
+ char* lineBuf;
+ size_t blockSize;
+ char* blockBuf;
+ int strictMode;
+ int statusOnly;
+ int warn;
+ int quiet;
+ ParseFileReport report;
+} ParseFileArg;
+
+
+/* Read line from stream.
+ Returns GetLine_ok, if it reads line successfully.
+ Returns GetLine_eof, if stream reaches EOF.
+ Returns GetLine_exceedMaxLineLength, if line length is longer than MAX_LINE_LENGTH.
+ Returns GetLine_outOfMemory, if line buffer memory allocation failed.
+ */
+static GetLineResult getLine(char** lineBuf, int* lineMax, FILE* inFile)
+{
+ GetLineResult result = GetLine_ok;
+ int len = 0;
+
+ if ((*lineBuf == NULL) || (*lineMax<1)) {
+ free(*lineBuf); /* in case it's != NULL */
+ *lineMax = 0;
+ *lineBuf = (char*)malloc(DEFAULT_LINE_LENGTH);
+ if(*lineBuf == NULL) return GetLine_outOfMemory;
+ *lineMax = DEFAULT_LINE_LENGTH;
+ }
+
+ for (;;) {
+ const int c = fgetc(inFile);
+ if (c == EOF) {
+ /* If we meet EOF before first character, returns GetLine_eof,
+ * otherwise GetLine_ok.
+ */
+ if (len == 0) result = GetLine_eof;
+ break;
+ }
+
+ /* Make enough space for len+1 (for final NUL) bytes. */
+ if (len+1 >= *lineMax) {
+ char* newLineBuf = NULL;
+ int newBufSize = *lineMax;
+
+ newBufSize += (newBufSize/2) + 1; /* x 1.5 */
+ if (newBufSize > MAX_LINE_LENGTH) newBufSize = MAX_LINE_LENGTH;
+ if (len+1 >= newBufSize) return GetLine_exceedMaxLineLength;
+
+ newLineBuf = (char*) realloc(*lineBuf, newBufSize);
+ if (newLineBuf == NULL) return GetLine_outOfMemory;
+
+ *lineBuf = newLineBuf;
+ *lineMax = newBufSize;
+ }
+
+ if (c == '\n') break;
+ (*lineBuf)[len++] = (char) c;
+ }
+
+ (*lineBuf)[len] = '\0';
+ return result;
+}
+
+
+/* Converts one hexadecimal character to integer.
+ * Returns -1, if given character is not hexadecimal.
+ */
+static int charToHex(char c)
+{
+ int result = -1;
+ if (c >= '0' && c <= '9') {
+ result = (int) (c - '0');
+ } else if (c >= 'A' && c <= 'F') {
+ result = (int) (c - 'A') + 0x0a;
+ } else if (c >= 'a' && c <= 'f') {
+ result = (int) (c - 'a') + 0x0a;
+ }
+ return result;
+}
+
+
+/* Converts XXH32 canonical hexadecimal string hashStr to big endian unsigned char array dst.
+ * Returns CANONICAL_FROM_STRING_INVALID_FORMAT, if hashStr is not well formatted.
+ * Returns CANONICAL_FROM_STRING_OK, if hashStr is parsed successfully.
+ */
+static CanonicalFromStringResult canonicalFromString(unsigned char* dst,
+ size_t dstSize,
+ const char* hashStr)
+{
+ size_t i;
+ for (i = 0; i < dstSize; ++i) {
+ int h0, h1;
+
+ h0 = charToHex(hashStr[i*2 + 0]);
+ if (h0 < 0) return CanonicalFromString_invalidFormat;
+
+ h1 = charToHex(hashStr[i*2 + 1]);
+ if (h1 < 0) return CanonicalFromString_invalidFormat;
+
+ dst[i] = (unsigned char) ((h0 << 4) | h1);
+ }
+ return CanonicalFromString_ok;
+}
+
+
+/* Parse single line of xxHash checksum file.
+ * Returns PARSE_LINE_ERROR_INVALID_FORMAT, if line is not well formatted.
+ * Returns PARSE_LINE_OK if line is parsed successfully.
+ * And members of parseLine will be filled by parsed values.
+ *
+ * - line must be ended with '\0'.
+ * - Since parsedLine.filename will point within given argument `line`,
+ * users must keep `line`s content during they are using parsedLine.
+ *
+ * Given xxHash checksum line should have the following format:
+ *
+ * <8 or 16 hexadecimal char> <space> <space> <filename...> <'\0'>
+ */
+static ParseLineResult parseLine(ParsedLine* parsedLine, const char* line)
+{
+ const char* const firstSpace = strchr(line, ' ');
+ const char* const secondSpace = firstSpace + 1;
+
+ parsedLine->filename = NULL;
+ parsedLine->xxhBits = 0;
+
+ if (firstSpace == NULL || *secondSpace != ' ') return ParseLine_invalidFormat;
+
+ switch (firstSpace - line)
+ {
+ case 8:
+ { XXH32_canonical_t* xxh32c = &parsedLine->canonical.xxh32;
+ if (canonicalFromString(xxh32c->digest, sizeof(xxh32c->digest), line)
+ != CanonicalFromString_ok) {
+ return ParseLine_invalidFormat;
+ }
+ parsedLine->xxhBits = 32;
+ break;
+ }
+
+ case 16:
+ { XXH64_canonical_t* xxh64c = &parsedLine->canonical.xxh64;
+ if (canonicalFromString(xxh64c->digest, sizeof(xxh64c->digest), line)
+ != CanonicalFromString_ok) {
+ return ParseLine_invalidFormat;
+ }
+ parsedLine->xxhBits = 64;
+ break;
+ }
+
+ default:
+ return ParseLine_invalidFormat;
+ break;
+ }
+
+ parsedLine->filename = secondSpace + 1;
+ return ParseLine_ok;
+}
+
+
+/*! Parse xxHash checksum file.
+ */
+static void parseFile1(ParseFileArg* parseFileArg)
+{
+ const char* const inFileName = parseFileArg->inFileName;
+ ParseFileReport* const report = &parseFileArg->report;
+
+ unsigned long lineNumber = 0;
+ memset(report, 0, sizeof(*report));
+
+ while (!report->quit) {
+ FILE* fp = NULL;
+ LineStatus lineStatus = LineStatus_hashFailed;
+ GetLineResult getLineResult;
+ ParsedLine parsedLine;
+ memset(&parsedLine, 0, sizeof(parsedLine));
+
+ lineNumber++;
+ if (lineNumber == 0) {
+ /* This is unlikely happen, but md5sum.c has this
+ * error check. */
+ DISPLAY("%s : too many checksum lines\n", inFileName);
+ report->quit = 1;
+ break;
+ }
+
+ getLineResult = getLine(&parseFileArg->lineBuf, &parseFileArg->lineMax,
+ parseFileArg->inFile);
+ if (getLineResult != GetLine_ok) {
+ if (getLineResult == GetLine_eof) break;
+
+ switch (getLineResult)
+ {
+ case GetLine_ok:
+ case GetLine_eof:
+ /* These cases never happen. See above getLineResult related "if"s.
+ They exist just for make gcc's -Wswitch-enum happy. */
+ break;
+
+ default:
+ DISPLAY("%s : %lu: unknown error\n", inFileName, lineNumber);
+ break;
+
+ case GetLine_exceedMaxLineLength:
+ DISPLAY("%s : %lu: too long line\n", inFileName, lineNumber);
+ break;
+
+ case GetLine_outOfMemory:
+ DISPLAY("%s : %lu: out of memory\n", inFileName, lineNumber);
+ break;
+ }
+ report->quit = 1;
+ break;
+ }
+
+ if (parseLine(&parsedLine, parseFileArg->lineBuf) != ParseLine_ok) {
+ report->nImproperlyFormattedLines++;
+ if (parseFileArg->warn) {
+ DISPLAY("%s : %lu: improperly formatted XXHASH checksum line\n"
+ , inFileName, lineNumber);
+ }
+ continue;
+ }
+
+ if (report->xxhBits != 0 && report->xxhBits != parsedLine.xxhBits) {
+ /* Don't accept xxh32/xxh64 mixed file */
+ report->nImproperlyFormattedLines++;
+ report->nMixedFormatLines++;
+ if (parseFileArg->warn) {
+ DISPLAY("%s : %lu: improperly formatted XXHASH checksum line (XXH32/64)\n"
+ , inFileName, lineNumber);
+ }
+ continue;
+ }
+
+ report->nProperlyFormattedLines++;
+ if (report->xxhBits == 0) {
+ report->xxhBits = parsedLine.xxhBits;
+ }
+
+ fp = fopen(parsedLine.filename, "rb");
+ if (fp == NULL) {
+ lineStatus = LineStatus_failedToOpen;
+ } else {
+ lineStatus = LineStatus_hashFailed;
+ switch (parsedLine.xxhBits)
+ {
+ case 32:
+ { XXH32_hash_t xxh;
+ BMK_hashStream(&xxh, algo_xxh32, fp, parseFileArg->blockBuf, parseFileArg->blockSize);
+ if (xxh == XXH32_hashFromCanonical(&parsedLine.canonical.xxh32)) {
+ lineStatus = LineStatus_hashOk;
+ } }
+ break;
+
+ case 64:
+ { XXH64_hash_t xxh;
+ BMK_hashStream(&xxh, algo_xxh64, fp, parseFileArg->blockBuf, parseFileArg->blockSize);
+ if (xxh == XXH64_hashFromCanonical(&parsedLine.canonical.xxh64)) {
+ lineStatus = LineStatus_hashOk;
+ } }
+ break;
+
+ default:
+ break;
+ }
+ fclose(fp);
+ }
+
+ switch (lineStatus)
+ {
+ default:
+ DISPLAY("%s : unknown error\n", inFileName);
+ report->quit = 1;
+ break;
+
+ case LineStatus_failedToOpen:
+ report->nOpenOrReadFailures++;
+ if (!parseFileArg->statusOnly) {
+ DISPLAYRESULT("%s : %lu: FAILED open or read %s\n"
+ , inFileName, lineNumber, parsedLine.filename);
+ }
+ break;
+
+ case LineStatus_hashOk:
+ case LineStatus_hashFailed:
+ { int b = 1;
+ if (lineStatus == LineStatus_hashOk) {
+ /* If --quiet is specified, don't display "OK" */
+ if (parseFileArg->quiet) b = 0;
+ } else {
+ report->nMismatchedChecksums++;
+ }
+
+ if (b && !parseFileArg->statusOnly) {
+ DISPLAYRESULT("%s: %s\n", parsedLine.filename
+ , lineStatus == LineStatus_hashOk ? "OK" : "FAILED");
+ } }
+ break;
+ }
+ } /* while (!report->quit) */
+}
+
+
+/* Parse xxHash checksum file.
+ * Returns 1, if all procedures were succeeded.
+ * Returns 0, if any procedures was failed.
+ *
+ * If strictMode != 0, return error code if any line is invalid.
+ * If statusOnly != 0, don't generate any output.
+ * If warn != 0, print a warning message to stderr.
+ * If quiet != 0, suppress "OK" line.
+ *
+ * "All procedures are succeeded" means:
+ * - Checksum file contains at least one line and less than SIZE_T_MAX lines.
+ * - All files are properly opened and read.
+ * - All hash values match with its content.
+ * - (strict mode) All lines in checksum file are consistent and well formatted.
+ *
+ */
+static int checkFile(const char* inFileName,
+ const endianess displayEndianess,
+ U32 strictMode,
+ U32 statusOnly,
+ U32 warn,
+ U32 quiet)
+{
+ int result = 0;
+ FILE* inFile = NULL;
+ ParseFileArg parseFileArgBody;
+ ParseFileArg* const parseFileArg = &parseFileArgBody;
+ ParseFileReport* const report = &parseFileArg->report;
+
+ if (displayEndianess != big_endian) {
+ /* Don't accept little endian */
+ DISPLAY( "Check file mode doesn't support little endian\n" );
+ return 0;
+ }
+
+ /* note : stdinName is special constant pointer. It is not a string. */
+ if (inFileName == stdinName) {
+ /* note : Since we expect text input for xxhash -c mode,
+ * Don't set binary mode for stdin */
+ inFile = stdin;
+ } else {
+ inFile = fopen( inFileName, "rt" );
+ }
+
+ if (inFile == NULL) {
+ DISPLAY( "Pb opening %s\n", inFileName);
+ return 0;
+ }
+
+ parseFileArg->inFileName = inFileName;
+ parseFileArg->inFile = inFile;
+ parseFileArg->lineMax = DEFAULT_LINE_LENGTH;
+ parseFileArg->lineBuf = (char*) malloc((size_t) parseFileArg->lineMax);
+ parseFileArg->blockSize = 64 * 1024;
+ parseFileArg->blockBuf = (char*) malloc(parseFileArg->blockSize);
+ parseFileArg->strictMode = strictMode;
+ parseFileArg->statusOnly = statusOnly;
+ parseFileArg->warn = warn;
+ parseFileArg->quiet = quiet;
+
+ parseFile1(parseFileArg);
+
+ free(parseFileArg->blockBuf);
+ free(parseFileArg->lineBuf);
+
+ if (inFile != stdin) fclose(inFile);
+
+ /* Show error/warning messages. All messages are copied from md5sum.c
+ */
+ if (report->nProperlyFormattedLines == 0) {
+ DISPLAY("%s: no properly formatted XXHASH checksum lines found\n", inFileName);
+ } else if (!statusOnly) {
+ if (report->nImproperlyFormattedLines) {
+ DISPLAYRESULT("%lu lines are improperly formatted\n"
+ , report->nImproperlyFormattedLines);
+ }
+ if (report->nOpenOrReadFailures) {
+ DISPLAYRESULT("%lu listed files could not be read\n"
+ , report->nOpenOrReadFailures);
+ }
+ if (report->nMismatchedChecksums) {
+ DISPLAYRESULT("%lu computed checksums did NOT match\n"
+ , report->nMismatchedChecksums);
+ } }
+
+ /* Result (exit) code logic is copied from
+ * gnu coreutils/src/md5sum.c digest_check() */
+ result = report->nProperlyFormattedLines != 0
+ && report->nMismatchedChecksums == 0
+ && report->nOpenOrReadFailures == 0
+ && (!strictMode || report->nImproperlyFormattedLines == 0)
+ && report->quit == 0;
+ return result;
+}
+
+
+static int checkFiles(const char** fnList, int fnTotal,
+ const endianess displayEndianess,
+ U32 strictMode,
+ U32 statusOnly,
+ U32 warn,
+ U32 quiet)
+{
+ int ok = 1;
+
+ /* Special case for stdinName "-",
+ * note: stdinName is not a string. It's special pointer. */
+ if (fnTotal==0) {
+ ok &= checkFile(stdinName, displayEndianess, strictMode, statusOnly, warn, quiet);
+ } else {
+ int fnNb;
+ for (fnNb=0; fnNb<fnTotal; fnNb++)
+ ok &= checkFile(fnList[fnNb], displayEndianess, strictMode, statusOnly, warn, quiet);
+ }
+ return ok ? 0 : 1;
+}
+
+
+/* ********************************************************
+* Main
+**********************************************************/
+
+static int usage(const char* exename)
+{
+ DISPLAY( WELCOME_MESSAGE(exename) );
+ DISPLAY( "Usage :\n");
+ DISPLAY( " %s [arg] [filenames]\n", exename);
+ DISPLAY( "When no filename provided, or - provided : use stdin as input\n");
+ DISPLAY( "Arguments :\n");
+ DISPLAY( " -H# : hash selection : 0=32bits, 1=64bits (default: %i)\n", (int)g_defaultAlgo);
+ DISPLAY( " -c : read xxHash sums from the [filenames] and check them\n");
+ DISPLAY( " -h : help \n");
+ return 0;
+}
+
+
+static int usage_advanced(const char* exename)
+{
+ usage(exename);
+ DISPLAY( "Advanced :\n");
+ DISPLAY( " --little-endian : hash printed using little endian convention (default: big endian)\n");
+ DISPLAY( " -V, --version : display version\n");
+ DISPLAY( " -h, --help : display long help and exit\n");
+ DISPLAY( " -b : benchmark mode \n");
+ DISPLAY( " -i# : number of iterations (benchmark mode; default %i)\n", g_nbIterations);
+ DISPLAY( "\n");
+ DISPLAY( "The following four options are useful only when verifying checksums (-c):\n");
+ DISPLAY( "--strict : don't print OK for each successfully verified file\n");
+ DISPLAY( "--status : don't output anything, status code shows success\n");
+ DISPLAY( "--quiet : exit non-zero for improperly formatted checksum lines\n");
+ DISPLAY( "--warn : warn about improperly formatted checksum lines\n");
+ return 0;
+}
+
+static int badusage(const char* exename)
+{
+ DISPLAY("Wrong parameters\n");
+ usage(exename);
+ return 1;
+}
+
+/*! readU32FromChar() :
+ @return : unsigned integer value read from input in `char` format,
+ 0 is no figure at *stringPtr position.
+ Interprets K, KB, KiB, M, MB and MiB suffix.
+ Modifies `*stringPtr`, advancing it to position where reading stopped.
+ Note : function result can overflow if digit string > MAX_UINT */
+static unsigned readU32FromChar(const char** stringPtr)
+{
+ unsigned result = 0;
+ while ((**stringPtr >='0') && (**stringPtr <='9'))
+ result *= 10, result += **stringPtr - '0', (*stringPtr)++ ;
+ if ((**stringPtr=='K') || (**stringPtr=='M')) {
+ result <<= 10;
+ if (**stringPtr=='M') result <<= 10;
+ (*stringPtr)++ ;
+ if (**stringPtr=='i') (*stringPtr)++;
+ if (**stringPtr=='B') (*stringPtr)++;
+ }
+ return result;
+}
+
+int main(int argc, const char** argv)
+{
+ int i, filenamesStart = 0;
+ const char* const exename = argv[0];
+ U32 benchmarkMode = 0;
+ U32 fileCheckMode = 0;
+ U32 strictMode = 0;
+ U32 statusOnly = 0;
+ U32 warn = 0;
+ U32 quiet = 0;
+ U32 specificTest = 0;
+ size_t keySize = XXH_DEFAULT_SAMPLE_SIZE;
+ algoType algo = g_defaultAlgo;
+ endianess displayEndianess = big_endian;
+
+ /* special case : xxh32sum default to 32 bits checksum */
+ if (strstr(exename, "xxh32sum") != NULL) algo = algo_xxh32;
+
+ for(i=1; i<argc; i++) {
+ const char* argument = argv[i];
+
+ if(!argument) continue; /* Protection, if argument empty */
+
+ if (!strcmp(argument, "--little-endian")) { displayEndianess = little_endian; continue; }
+ if (!strcmp(argument, "--check")) { fileCheckMode = 1; continue; }
+ if (!strcmp(argument, "--strict")) { strictMode = 1; continue; }
+ if (!strcmp(argument, "--status")) { statusOnly = 1; continue; }
+ if (!strcmp(argument, "--quiet")) { quiet = 1; continue; }
+ if (!strcmp(argument, "--warn")) { warn = 1; continue; }
+ if (!strcmp(argument, "--help")) { return usage_advanced(exename); }
+ if (!strcmp(argument, "--version")) { DISPLAY(WELCOME_MESSAGE(exename)); return 0; }
+
+ if (*argument!='-') {
+ if (filenamesStart==0) filenamesStart=i; /* only supports a continuous list of filenames */
+ continue;
+ }
+
+ /* command selection */
+ argument++; /* note : *argument=='-' */
+
+ while (*argument!=0) {
+ switch(*argument)
+ {
+ /* Display version */
+ case 'V':
+ DISPLAY(WELCOME_MESSAGE(exename)); return 0;
+
+ /* Display help on usage */
+ case 'h':
+ return usage_advanced(exename);
+
+ /* select hash algorithm */
+ case 'H':
+ algo = (algoType)(argument[1] - '0');
+ argument+=2;
+ break;
+
+ /* File check mode */
+ case 'c':
+ fileCheckMode=1;
+ argument++;
+ break;
+
+ /* Warning mode (file check mode only, alias of "--warning") */
+ case 'w':
+ warn=1;
+ argument++;
+ break;
+
+ /* Trigger benchmark mode */
+ case 'b':
+ argument++;
+ benchmarkMode = 1;
+ specificTest = readU32FromChar(&argument); /* select one specific test (hidden option) */
+ break;
+
+ /* Modify Nb Iterations (benchmark only) */
+ case 'i':
+ argument++;
+ g_nbIterations = readU32FromChar(&argument);
+ break;
+
+ /* Modify Block size (benchmark only) */
+ case 'B':
+ argument++;
+ keySize = readU32FromChar(&argument);
+ break;
+
+ /* Modify verbosity of benchmark output (hidden option) */
+ case 'q':
+ argument++;
+ g_displayLevel--;
+ break;
+
+ default:
+ return badusage(exename);
+ }
+ }
+ } /* for(i=1; i<argc; i++) */
+
+ /* Check benchmark mode */
+ if (benchmarkMode) {
+ DISPLAYLEVEL(2, WELCOME_MESSAGE(exename) );
+ BMK_sanityCheck();
+ if (filenamesStart==0) return BMK_benchInternal(keySize, specificTest);
+ return BMK_benchFiles(argv+filenamesStart, argc-filenamesStart, specificTest);
+ }
+
+ /* Check if input is defined as console; trigger an error in this case */
+ if ( (filenamesStart==0) && IS_CONSOLE(stdin) ) return badusage(exename);
+
+ if (filenamesStart==0) filenamesStart = argc;
+ if (fileCheckMode) {
+ return checkFiles(argv+filenamesStart, argc-filenamesStart,
+ displayEndianess, strictMode, statusOnly, warn, quiet);
+ } else {
+ return BMK_hashFiles(argv+filenamesStart, argc-filenamesStart, algo, displayEndianess);
+ }
+}
+
+#endif /* XXHASH_C_2097394837 */
diff --git a/doc/Makefile.am b/doc/Makefile.am
index aee7b0b61e4..de68c20b4d7 100644
--- a/doc/Makefile.am
+++ b/doc/Makefile.am
@@ -1,10 +1,9 @@
-EXTRA_DIST = glusterfs.vol.sample glusterfsd.vol.sample glusterfs.8 mount.glusterfs.8\
- glusterd.vol gluster.8 glusterd.8 glusterfsd.8
+EXTRA_DIST = glusterfs.8 mount.glusterfs.8 gluster.8 \
+ glusterd.8 glusterfsd.8
-voldir = $(sysconfdir)/glusterfs
-vol_DATA = glusterd.vol
-
-# TODO: update man pages and uncomment this section
-#man8_MANS = glusterfs.8 mount.glusterfs.8 gluster.8 glusterd.8 glusterfsd.8
+man8_MANS = glusterfs.8 mount.glusterfs.8 gluster.8
+if WITH_SERVER
+man8_MANS += glusterd.8 glusterfsd.8
+endif
CLEANFILES =
diff --git a/doc/README.md b/doc/README.md
new file mode 100644
index 00000000000..6aa28642ef4
--- /dev/null
+++ b/doc/README.md
@@ -0,0 +1,26 @@
+## Developer Guide
+
+Gluster's contributors can check about the internals by visiting [Developer Guide Section](developer-guide). While it is not 'comprehensive', it can help you to get started.
+
+Also while coding, keep [Coding Standard](developer-guide/coding-standard.md) in mind.
+
+When you are ready to commit the changes, make sure you meet our [Commit message standard](developer-guide/commit-guidelines.md).
+
+## Admin Guide ##
+
+The gluster administration guide is maintained at [github](https://github.com/gluster/glusterdocs). The browsable admin guide can be found [here](http://docs.gluster.org/en/latest/Administrator%20Guide/).
+
+The doc patch has to be sent against the above mentioned repository.
+
+## Features/Spec ##
+
+The Gluster features which are 'in progress' or implemented can be found at [github](https://github.com/gluster/glusterfs-specs).
+
+## Upgrade Guide ##
+
+The gluster upgrade guide is maintained at [github](https://github.com/gluster/glusterdocs). The browsable upgrade guide can be found [here](http://docs.gluster.org/en/latest/Upgrade-Guide)
+
+The doc patch has to be sent against the above mentioned repository.
+
+
+For more details about the docuemntation workflow please refer [this discussion](https://www.mail-archive.com/gluster-users@gluster.org/msg21168.html)
diff --git a/doc/admin-guide/en-US/Administration_Guide.ent b/doc/admin-guide/en-US/Administration_Guide.ent
deleted file mode 100644
index 3381b2bfec1..00000000000
--- a/doc/admin-guide/en-US/Administration_Guide.ent
+++ /dev/null
@@ -1,4 +0,0 @@
-<!ENTITY PRODUCT "Documentation">
-<!ENTITY BOOKID "Administration_Guide">
-<!ENTITY YEAR "2012">
-<!ENTITY HOLDER "Red Hat Inc">
diff --git a/doc/admin-guide/en-US/Administration_Guide.xml b/doc/admin-guide/en-US/Administration_Guide.xml
deleted file mode 100644
index 483855b1a02..00000000000
--- a/doc/admin-guide/en-US/Administration_Guide.xml
+++ /dev/null
@@ -1,27 +0,0 @@
-<?xml version='1.0' encoding='utf-8' ?>
-<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
-<!ENTITY % BOOK_ENTITIES SYSTEM "Administration_Guide.ent">
-%BOOK_ENTITIES;
-]>
-<book>
- <xi:include href="Book_Info.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="Preface.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="gfs_introduction.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="admin_start_stop_daemon.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="admin_console.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="admin_storage_pools.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="admin_setting_volumes.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="admin_settingup_clients.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="admin_managing_volumes.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="admin_geo-replication.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="admin_directory_Quota.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="admin_monitoring_workload.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="admin_ACLs.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="admin_UFO.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="admin_Hadoop.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="admin_troubleshooting.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="admin_commandref.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="glossary.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="Revision_History.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
-</book>
-
diff --git a/doc/admin-guide/en-US/Author_Group.xml b/doc/admin-guide/en-US/Author_Group.xml
deleted file mode 100644
index f3fa3174037..00000000000
--- a/doc/admin-guide/en-US/Author_Group.xml
+++ /dev/null
@@ -1,17 +0,0 @@
-<?xml version='1.0' encoding='utf-8' ?>
-<!DOCTYPE authorgroup PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
-<!ENTITY % BOOK_ENTITIES SYSTEM "Administration_Guide.ent">
-%BOOK_ENTITIES;
-]>
-<authorgroup>
- <author>
- <firstname>Divya</firstname>
- <surname>Muntimadugu</surname>
- <affiliation>
- <orgname>Red Hat</orgname>
- <orgdiv>Engineering Content Services</orgdiv>
- </affiliation>
- <email>divya@redhat.com</email>
- </author>
-</authorgroup>
-
diff --git a/doc/admin-guide/en-US/Book_Info.xml b/doc/admin-guide/en-US/Book_Info.xml
deleted file mode 100644
index 19fb40a2f34..00000000000
--- a/doc/admin-guide/en-US/Book_Info.xml
+++ /dev/null
@@ -1,28 +0,0 @@
-<?xml version='1.0' encoding='utf-8' ?>
-<!DOCTYPE bookinfo PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
-<!ENTITY % BOOK_ENTITIES SYSTEM "Administration_Guide.ent">
-%BOOK_ENTITIES;
-]>
-<bookinfo id="book-Administration_Guide-Administration_Guide">
- <title>Administration Guide</title>
- <subtitle>Using Gluster File System </subtitle>
- <productname>Gluster File System</productname>
- <productnumber>3.3.0</productnumber>
- <edition>1</edition>
- <pubsnumber>1</pubsnumber>
- <abstract>
- <para>
- This guide describes Gluster File System (GlusterFS) and provides information on how to configure, operate, and manage GlusterFS.
- </para>
- </abstract>
- <corpauthor>
- <inlinemediaobject>
- <imageobject>
- <imagedata fileref="Common_Content/images/title_logo.svg" format="SVG" />
- </imageobject>
- </inlinemediaobject>
- </corpauthor>
- <xi:include href="Common_Content/Legal_Notice.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="Author_Group.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
-</bookinfo>
-
diff --git a/doc/admin-guide/en-US/Chapter.xml b/doc/admin-guide/en-US/Chapter.xml
deleted file mode 100644
index 4a1cef872c8..00000000000
--- a/doc/admin-guide/en-US/Chapter.xml
+++ /dev/null
@@ -1,33 +0,0 @@
-<?xml version='1.0' encoding='utf-8' ?>
-<!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
-<!ENTITY % BOOK_ENTITIES SYSTEM "Administration_Guide.ent">
-%BOOK_ENTITIES;
-]>
-<chapter id="chap-Administration_Guide-Test_Chapter">
- <title>Test Chapter</title>
- <para>
- This is a test paragraph
- </para>
- <section id="sect-Administration_Guide-Test_Chapter-Test_Section_1">
- <title>Test Section 1</title>
- <para>
- This is a test paragraph in a section
- </para>
- </section>
-
- <section id="sect-Administration_Guide-Test_Chapter-Test_Section_2">
- <title>Test Section 2</title>
- <para>
- This is a test paragraph in Section 2
- <orderedlist>
- <listitem>
- <para>
- listitem text
- </para>
- </listitem>
- </orderedlist>
- </para>
- </section>
-
-</chapter>
-
diff --git a/doc/admin-guide/en-US/Preface.xml b/doc/admin-guide/en-US/Preface.xml
deleted file mode 100644
index 320311906d0..00000000000
--- a/doc/admin-guide/en-US/Preface.xml
+++ /dev/null
@@ -1,24 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. -->
-<!DOCTYPE preface PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
-<!ENTITY % BOOK_ENTITIES SYSTEM "Administration_Guide.ent">
-%BOOK_ENTITIES;
-]>
-<preface id="pref-Administration_Guide-Preface">
- <title>Preface</title>
- <para>This guide describes how to configure, operate, and manage Gluster File System (GlusterFS).</para>
- <section>
- <title>Audience</title>
- <para>This guide is intended for Systems Administrators interested in configuring and managing GlusterFS.</para>
- <para>This guide assumes that you are familiar with the Linux operating system, concepts of File System, GlusterFS concepts, and GlusterFS Installation</para>
- </section>
- <section>
- <title>License</title>
- <para>The License information is available at <ulink url="http://www.redhat.com/licenses/rhel_rha_eula.html"/>.</para>
- </section>
- <xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="Common_Content/Conventions.xml"/>
- <xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="Feedback.xml">
- <xi:fallback xmlns:xi="http://www.w3.org/2001/XInclude"> <xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="Common_Content/Feedback.xml"/>
- </xi:fallback>
- </xi:include>
-</preface>
diff --git a/doc/admin-guide/en-US/Revision_History.xml b/doc/admin-guide/en-US/Revision_History.xml
deleted file mode 100644
index 09320821fb0..00000000000
--- a/doc/admin-guide/en-US/Revision_History.xml
+++ /dev/null
@@ -1,27 +0,0 @@
-<?xml version='1.0' encoding='utf-8' ?>
-<!DOCTYPE appendix PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
-<!ENTITY % BOOK_ENTITIES SYSTEM "Administration_Guide.ent">
-%BOOK_ENTITIES;
-]>
-<appendix id="appe-Administration_Guide-Revision_History">
- <title>Revision History</title>
- <simpara>
- <revhistory>
- <revision>
- <revnumber>1-0</revnumber>
- <date>Thu Apr 5 2012</date>
- <author>
- <firstname>Divya</firstname>
- <surname>Muntimadugu</surname>
- <email>divya@redhat.com</email>
- </author>
- <revdescription>
- <simplelist>
- <member>Draft </member>
- </simplelist>
- </revdescription>
- </revision>
- </revhistory>
- </simpara>
-</appendix>
-
diff --git a/doc/admin-guide/en-US/admin_ACLs.xml b/doc/admin-guide/en-US/admin_ACLs.xml
deleted file mode 100644
index edad2d67d60..00000000000
--- a/doc/admin-guide/en-US/admin_ACLs.xml
+++ /dev/null
@@ -1,211 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "docbookV4.5/docbookx.dtd" []>
-<chapter id="chap-Administration_Guide-ACLs">
- <title>POSIX Access Control Lists </title>
- <para>POSIX Access Control Lists (ACLs) allows you to assign different permissions for different users or
-groups even though they do not correspond to the original owner or the owning group.
- </para>
- <para>For example: User john creates a file but does not want to allow anyone to do anything with this
-file, except another user, antony (even though there are other users that belong to the group john).
-</para>
- <para>This means, in addition to the file owner, the file group, and others, additional users and groups can
-be granted or denied access by using POSIX ACLs.
-</para>
- <section id="sect-Administration_Guide-ACLs-Activating_ACLs">
- <title>Activating POSIX ACLs Support </title>
- <para>To use POSIX ACLs for a file or directory, the mount point where the file or directory exists, must be mounted with
-POSIX ACLs support.
-</para>
- <section id="sect-Administration_Guide-ACLs-Activating_ACLs-Server">
- <title>Activating POSIX ACLs Support on Sever </title>
- <para>To mount the backend export directories for POSIX ACLs support, use the following command:
-</para>
- <para><command># mount -o acl <replaceable>device-name</replaceable><replaceable>partition</replaceable></command>
-</para>
- <para>If the backend export directory is already mounted, use the following command:
-</para>
- <para><command># mount -oremount,acl <replaceable>device-name</replaceable><replaceable>partition</replaceable></command>
-</para>
-
- <para>For example:
-</para>
- <para><command># mount -o acl /dev/sda1 /export1 </command></para>
- <para>Alternatively, if the partition is listed in the /etc/fstab file, add the following entry for the partition
-to include the POSIX ACLs option:
-</para>
- <para><command>LABEL=/work /export1 xfs rw,acl 1 4 </command></para>
- </section>
- <section>
- <title>Activating POSIX ACLs Support on Client </title>
- <para>To mount the glusterfs volume with POSIX ACLs support, use the following command:
-</para>
- <para><command># mount –t glusterfs -o acl <replaceable>severname:/volume-id</replaceable><replaceable>mount point</replaceable></command>
-</para>
- <para>For example:
-</para>
- <para><command># mount -t glusterfs -o acl 198.192.198.234:/glustervolume /mnt/gluster</command>
-</para>
- </section>
- </section>
- <section>
- <title>Setting POSIX ACLs </title>
- <para>You can set two types of POSIX ACLs, that is, access ACLs and default ACLs. You can use
-access ACLs to grant permission for a specific file or directory. You can use default ACLs only
-on a directory but if a file inside that directory does not have an ACLs, it inherits the permissions of
-the default ACLs of the directory.
-</para>
- <para>You can set ACLs for per user, per group, for users not in the user group for the file, and via the
-effective right mask.
-</para>
- <section>
- <title>Setting Access ACLs </title>
- <para>You can apply access ACLs to grant permission for both files and directories.
-</para>
- <para><emphasis role="bold">To set or modify Access ACLs</emphasis>
-</para>
- <para>You can set or modify access ACLs use the following command:
-</para>
- <para><command># setfacl –m <replaceable>entry type</replaceable> file </command></para>
- <para>The ACL entry types are the POSIX ACLs representations of owner, group, and other.
-</para>
- <para>Permissions must be a combination of the characters <command>r</command> (read), <command>w</command> (write), and <command>x</command> (execute). You must
-specify the ACL entry in the following format and can specify multiple entry types separated by
-commas.
-</para>
- <informaltable frame="all">
- <tgroup cols="2">
- <colspec colname="c1"/>
- <colspec colname="c2"/>
- <thead>
- <row>
- <entry>ACL Entry</entry>
- <entry>Description</entry>
- </row>
- </thead>
- <tbody>
- <row>
- <entry>u:uid:&lt;permission&gt; </entry>
- <entry>Sets the access ACLs for a user. You can specify user name or UID </entry>
- </row>
- <row>
- <entry>g:gid:&lt;permission&gt; </entry>
- <entry>Sets the access ACLs for a group. You can specify group name or GID. </entry>
- </row>
- <row>
- <entry>m:&lt;permission&gt; </entry>
- <entry>Sets the effective rights mask. The mask is the combination of all access permissions of the owning group and all of the user and group entries. </entry>
- </row>
- <row>
- <entry>o:&lt;permission&gt; </entry>
- <entry>Sets the access ACLs for users other than the ones in the group for the file. </entry>
- </row>
- </tbody>
- </tgroup>
- </informaltable>
- <para>If a file or directory already has an POSIX ACLs, and the setfacl command is used, the additional
-permissions are added to the existing POSIX ACLs or the existing rule is modified.
-</para>
- <para>For example, to give read and write permissions to user antony:
-</para>
- <para><command># setfacl -m u:antony:rw /mnt/gluster/data/testfile </command></para>
- </section>
- <section>
- <title>Setting Default ACLs </title>
- <para>You can apply default ACLs only to directories. They determine the permissions of a file system
-objects that inherits from its parent directory when it is created.
-</para>
- <para>To set default ACLs
-</para>
- <para>You can set default ACLs for files and directories using the following command:
-</para>
- <para><command># setfacl –m –-set <replaceable>entry type directory</replaceable></command>
-</para>
- <para>For example, to set the default ACLs for the /data directory to read for users not in the user group:
-</para>
- <para><command># setfacl –m --set o::r /mnt/gluster/data </command></para>
- <para><note>
- <para>An access ACLs set for an individual file can override the default ACLs permissions.
-</para>
- </note></para>
- <para><emphasis role="bold">Effects of a Default ACLs </emphasis></para>
- <para>The following are the ways in which the permissions of a directory&apos;s default ACLs are passed to the
-files and subdirectories in it:
-</para>
- <itemizedlist>
- <listitem>
- <para>A subdirectory inherits the default ACLs of the parent directory both as its default ACLs and as an
-access ACLs.
-</para>
- </listitem>
- <listitem>
- <para>A file inherits the default ACLs as its access ACLs.
-</para>
- </listitem>
- </itemizedlist>
- </section>
- </section>
- <section>
- <title>Retrieving POSIX ACLs </title>
- <para>You can view the existing POSIX ACLs for a file or directory.
-</para>
- <para><emphasis role="bold">To view existing POSIX ACLs </emphasis></para>
- <itemizedlist>
- <listitem>
- <para>View the existing access ACLs of a file using the following command:
-</para>
- <para><command># getfacl <replaceable>path/filename</replaceable></command>
-</para>
- <para>For example, to view the existing POSIX ACLs for sample.jpg
-</para>
- <programlisting># getfacl /mnt/gluster/data/test/sample.jpg
-# owner: antony
-# group: antony
-user::rw-
-group::rw-
-other::r--</programlisting>
- </listitem>
- <listitem>
- <para>View the default ACLs of a directory using the following command:
-</para>
- <para><command># getfacl <replaceable>directory name</replaceable></command></para>
- <para>For example, to view the existing ACLs for /data/doc
-</para>
- <programlisting># getfacl /mnt/gluster/data/doc
-# owner: antony
-# group: antony
-user::rw-
-user:john:r--
-group::r--
-mask::r--
-other::r--
-default:user::rwx
-default:user:antony:rwx
-default:group::r-x
-default:mask::rwx
-default:other::r-x</programlisting>
- </listitem>
- </itemizedlist>
- </section>
- <section>
- <title>Removing POSIX ACLs </title>
- <para>To remove all the permissions for a user, groups, or others, use the following command:
-</para>
- <para><command># setfacl -x <replaceable>ACL entry type file</replaceable></command></para>
- <para>For example, to remove all permissions from the user antony:
-</para>
- <para><command># setfacl -x u:antony /mnt/gluster/data/test-file</command></para>
- </section>
- <section>
- <title>Samba and ACLs </title>
- <para>If you are using Samba to access GlusterFS FUSE mount, then POSIX ACLs are enabled by default.
-Samba has been compiled with the <command>--with-acl-support</command> option, so no special flags are required
-when accessing or mounting a Samba share.
-</para>
- </section>
- <section>
- <title>NFS and ACLs </title>
- <para>Currently we do not support ACLs configuration through NFS, i.e. setfacl and getfacl commands do
-not work. However, ACLs permissions set using Gluster Native Client applies on NFS mounts.
-</para>
- </section>
-</chapter>
diff --git a/doc/admin-guide/en-US/admin_Hadoop.xml b/doc/admin-guide/en-US/admin_Hadoop.xml
deleted file mode 100644
index 31eaaa84f17..00000000000
--- a/doc/admin-guide/en-US/admin_Hadoop.xml
+++ /dev/null
@@ -1,248 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
-<!ENTITY % BOOK_ENTITIES SYSTEM "Administration_Guide.ent">
-%BOOK_ENTITIES;
-]>
-<chapter id="chap-Administration_Guide-Hadoop">
- <title>Managing Hadoop Compatible Storage </title>
- <para>GlusterFS provides compatibility for Apache Hadoop and it uses the standard file system
-APIs available in Hadoop to provide a new storage option for Hadoop deployments. Existing
-MapReduce based applications can use GlusterFS seamlessly. This new functionality opens up data
-within Hadoop deployments to any file-based or object-based application.
-
- </para>
- <section id="sect-Administration_Guide-Hadoop-Introduction-Architecture_Overview">
- <title>Architecture Overview </title>
- <para>The following diagram illustrates Hadoop integration with GlusterFS:
-<mediaobject>
- <imageobject>
- <imagedata fileref="images/Hadoop_Architecture.png"/>
- </imageobject>
- </mediaobject>
- </para>
- </section>
- <section id="sect-Administration_Guide-Hadoop-Introduction-Advantages">
- <title>Advantages </title>
- <para>
-The following are the advantages of Hadoop Compatible Storage with GlusterFS:
-
-
- </para>
- <itemizedlist>
- <listitem>
- <para>Provides simultaneous file-based and object-based access within Hadoop.
-</para>
- </listitem>
- <listitem>
- <para>Eliminates the centralized metadata server.
-</para>
- </listitem>
- <listitem>
- <para>Provides compatibility with MapReduce applications and rewrite is not required.
-</para>
- </listitem>
- <listitem>
- <para>Provides a fault tolerant file system.
-</para>
- </listitem>
- </itemizedlist>
- </section>
- <section>
- <title>Preparing to Install Hadoop Compatible Storage</title>
- <para>This section provides information on pre-requisites and list of dependencies that will be installed
-during installation of Hadoop compatible storage.
-
-</para>
- <section id="sect-Administration_Guide-Hadoop-Preparation">
- <title>Pre-requisites </title>
- <para>The following are the pre-requisites to install Hadoop Compatible
-Storage :
-
- </para>
- <itemizedlist>
- <listitem>
- <para>Hadoop 0.20.2 is installed, configured, and is running on all the machines in the cluster.
-</para>
- </listitem>
- <listitem>
- <para>Java Runtime Environment
-</para>
- </listitem>
- <listitem>
- <para>Maven (mandatory only if you are building the plugin from the source)
-</para>
- </listitem>
- <listitem>
- <para>JDK (mandatory only if you are building the plugin from the source)
-</para>
- </listitem>
- <listitem>
- <para>getfattr
-- command line utility</para>
- </listitem>
- </itemizedlist>
- </section>
- </section>
- <section>
- <title>Installing, and Configuring Hadoop Compatible Storage</title>
- <para>This section describes how to install and configure Hadoop Compatible Storage in your storage
-environment and verify that it is functioning correctly.
-
-</para>
- <orderedlist>
- <para>To install and configure Hadoop compatible storage:</para>
- <listitem>
- <para>Download
- <filename>glusterfs-hadoop-plugin-0.20.2-0.1.x86_64.rpm</filename>
- file to each server on your cluster. You can download the file from
- <ulink url="http://download.gluster.com/pub/gluster/glusterfs/3.3/3.3.0/Hadoop/glusterfs-hadoop-plugin-0.20.2_0.1-beta.noarch.rpm"/>.
-
-</para>
- </listitem>
- <listitem>
- <para>To install Hadoop Compatible Storage on all servers in your cluster, run the following command:
-</para>
- <para><command># rpm –ivh --nodeps glusterfs-hadoop-plugin-0.20.2-0.1.x86_64.rpm</command>
-</para>
- <para>The following files will be extracted:
- </para>
- <itemizedlist>
- <listitem>
- <para>/usr/local/lib/glusterfs-<replaceable>Hadoop-version-gluster_plugin_version</replaceable>.jar </para>
- </listitem>
- <listitem>
- <para> /usr/local/lib/conf/core-site.xml</para>
- </listitem>
- </itemizedlist>
- </listitem>
- <listitem>
- <para>(Optional) To install Hadoop Compatible Storage in a different location, run the following
-command:
-</para>
- <para><command># rpm –ivh --nodeps –prefix /usr/local/glusterfs/hadoop
- glusterfs-hadoop-plugin-0.20.2-0.1.x86_64.rpm</command>
-</para>
- </listitem>
- <listitem>
- <para>Edit the <filename>conf/core-site.xml</filename> file. The following is the sample <filename>conf/core-site.xml</filename> file:
-</para>
- <para><programlisting>&lt;configuration&gt;
- &lt;property&gt;
- &lt;name&gt;fs.glusterfs.impl&lt;/name&gt;
- &lt;value&gt;org.apache.hadoop.fs.glusterfs.Gluster FileSystem&lt;/value&gt;
-&lt;/property&gt;
-
-&lt;property&gt;
- &lt;name&gt;fs.default.name&lt;/name&gt;
- &lt;value&gt;glusterfs://fedora1:9000&lt;/value&gt;
-&lt;/property&gt;
-
-&lt;property&gt;
- &lt;name&gt;fs.glusterfs.volname&lt;/name&gt;
- &lt;value&gt;hadoopvol&lt;/value&gt;
-&lt;/property&gt;
-
-&lt;property&gt;
- &lt;name&gt;fs.glusterfs.mount&lt;/name&gt;
- &lt;value&gt;/mnt/glusterfs&lt;/value&gt;
-&lt;/property&gt;
-
-&lt;property&gt;
- &lt;name&gt;fs.glusterfs.server&lt;/name&gt;
- &lt;value&gt;fedora2&lt;/value&gt;
-&lt;/property&gt;
-
-&lt;property&gt;
- &lt;name&gt;quick.slave.io&lt;/name&gt;
- &lt;value&gt;Off&lt;/value&gt;
-&lt;/property&gt;
-&lt;/configuration&gt;
-</programlisting></para>
- <para>The following are the configurable fields:
-</para>
- <para><informaltable frame="none">
- <tgroup cols="3">
- <colspec colnum="1" colname="c0" colsep="0"/>
- <colspec colnum="2" colname="c1" colsep="0"/>
- <colspec colnum="3" colname="c2" colsep="0"/>
- <thead>
- <row>
- <entry>Property Name </entry>
- <entry>Default Value </entry>
- <entry>Description </entry>
- </row>
- </thead>
- <tbody>
- <row>
- <entry>fs.default.name </entry>
- <entry>glusterfs://fedora1:9000</entry>
- <entry>Any hostname in the cluster as the server and any port number. </entry>
- </row>
- <row>
- <entry>fs.glusterfs.volname </entry>
- <entry>hadoopvol </entry>
- <entry>GlusterFS volume to mount. </entry>
- </row>
- <row>
- <entry>fs.glusterfs.mount </entry>
- <entry>/mnt/glusterfs</entry>
- <entry>The directory used to fuse mount the volume.</entry>
- </row>
- <row>
- <entry>fs.glusterfs.server </entry>
- <entry>fedora2</entry>
- <entry>Any hostname or IP address on the cluster except the client/master. </entry>
- </row>
- <row>
- <entry>quick.slave.io </entry>
- <entry>Off </entry>
- <entry>Performance tunable option. If this option is set to On, the plugin will try to perform I/O directly from the disk file system (like ext3 or ext4) the file resides on. Hence read performance will improve and job would run faster. <note>
- <para>This option is not tested widely</para>
- </note></entry>
- </row>
- </tbody>
- </tgroup>
- </informaltable></para>
- </listitem>
- <listitem>
- <para>Create a soft link in Hadoop’s library and configuration directory for the downloaded files (in
-Step 3) using the following commands:
-</para>
- <para><command># ln -s <replaceable>&lt;target location&gt; &lt;source location</replaceable>&gt;</command>
-</para>
- <para>For example,
-</para>
- <para><command># ln –s /usr/local/lib/glusterfs-0.20.2-0.1.jar <replaceable>$HADOOP_HOME</replaceable>/lib/glusterfs-0.20.2-0.1.jar</command>
-</para>
- <para><command># ln –s /usr/local/lib/conf/core-site.xml <replaceable>$HADOOP_HOME</replaceable>/conf/core-site.xml </command></para>
- </listitem>
- <listitem>
- <para> (Optional) You can run the following command on Hadoop master to build the plugin and deploy
-it along with core-site.xml file, instead of repeating the above steps:
-</para>
- <para><command># build-deploy-jar.py -d <replaceable>$HADOOP_HOME</replaceable> -c </command></para>
- </listitem>
- </orderedlist>
- </section>
- <section>
- <title>Starting and Stopping the Hadoop MapReduce Daemon</title>
- <para>To start and stop MapReduce daemon</para>
- <itemizedlist>
- <listitem>
- <para>To start MapReduce daemon manually, enter the following command:
-</para>
- <para><command># <replaceable>$HADOOP_HOME</replaceable>/bin/start-mapred.sh</command>
-</para>
- </listitem>
- <listitem>
- <para>To stop MapReduce daemon manually, enter the following command:
-</para>
- <para><command># <replaceable>$HADOOP_HOME</replaceable>/bin/stop-mapred.sh </command></para>
- </listitem>
- </itemizedlist>
- <para><note>
- <para>You must start Hadoop MapReduce daemon on all servers.
-</para>
- </note></para>
- </section>
-</chapter>
diff --git a/doc/admin-guide/en-US/admin_UFO.xml b/doc/admin-guide/en-US/admin_UFO.xml
deleted file mode 100644
index 9669c59f854..00000000000
--- a/doc/admin-guide/en-US/admin_UFO.xml
+++ /dev/null
@@ -1,1580 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
-<!ENTITY % BOOK_ENTITIES SYSTEM "Administration_Guide.ent">
-%BOOK_ENTITIES;
-]>
-<chapter id="chap-Administration_Guide-UFO">
- <title>Managing Unified File and Object Storage</title>
- <para>Unified File and Object Storage (UFO) unifies NAS and object storage technology. It
-provides a system for data storage that enables users to access the same data, both as an object and as a
-file, thus simplifying management and controlling storage costs.
-
-</para>
- <para>Unified File and Object Storage is built upon Openstack&apos;s Object Storage Swift. Open Stack Object Storage allows users to store and retrieve files and content through a simple Web Service (REST: Representational State Transfer) interface as objects and GlusterFS, allows users to store and retrieve files using Native Fuse and NFS mounts. It uses GlusterFS as a backend file system for Open Stack Swift. It also leverages on Open Stack Swift&apos;s web interface for storing and retrieving files over the web combined with GlusterFS features like scalability and high availability, replication, elastic volume management for data management at disk level.</para>
- <para>Unified File and Object Storage technology enables enterprises to adopt and deploy
-cloud storage solutions. It allows users to access and modify data as objects from a
-REST interface along with the ability to access and modify files from NAS interfaces including NFS
-and CIFS. In addition to decreasing cost and making it faster and easier to access object data,
-it also delivers massive scalability, high availability and replication of object storage.
-Infrastructure as a Service (IaaS) providers can utilize GlusterFS Unified File and Object Storage technology to enable their own cloud
-storage service. Enterprises can use this technology to accelerate the process of preparing file-based
-applications for the cloud and simplify new application development for cloud computing
-environments.
-
-</para>
- <para>OpenStack Object Storage is scalable object storage system and it is not a traditional file system. You will not be able to mount this system like traditional SAN or NAS
-volumes and perform POSIX compliant operations. </para>
- <para><figure>
- <title>Unified File and Object Storage Architecture</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/UFO_Architecture.png"/>
- </imageobject>
- </mediaobject>
- </figure></para>
- <section>
- <title>Components of Object Storage</title>
- <para>The major components of Object Storage are:
- </para>
- <para><emphasis role="bold">Proxy Server</emphasis>
-
-</para>
- <para>All REST requests to the UFO are routed through the Proxy Server.
-
-
-</para>
- <para><emphasis role="bold">Objects and Containers </emphasis></para>
- <para>An object is the basic storage entity and any optional metadata that represents the data
-you store. When you upload data, the data is stored as-is (with no compression or encryption).
-
-</para>
- <para>A container is a storage compartment for your data and provides a way for you to organize
-your data. Containers can be visualized as directories in a Linux system. Data must be stored in a container and hence objects are created within a container.
-
-</para>
- <para>It implements objects as files and directories under the container. The object name is a &apos;/&apos; separated path and UFO maps it to directories until the last name in the path, which is marked as a file. With this approach, objects can be accessed as files and directories from native GlusterFS (FUSE) or NFS mounts by providing the &apos;/&apos; separated path.</para>
- <para><emphasis role="bold">Accounts and Account Servers</emphasis></para>
- <para>The OpenStack Object Storage system is designed to be used by many different storage
-consumers. Each user is associated with one or more accounts and must identify themselves using an authentication system. While authenticating, users must provide the name of the account for which the authentication is requested.
-
-</para>
- <para>UFO implements accounts as GlusterFS volumes. So, when a user is granted read/write permission on an account, it means that that user has access to all the data available on that GlusterFS volume.
-
-
-
-
-</para>
- <para><emphasis role="bold">Authentication and Access Permissions</emphasis>
-
-</para>
- <para>You must authenticate against an authentication service to receive OpenStack Object
-Storage connection parameters and an authentication token. The token must be passed
-in for all subsequent container or object operations. One authentication service that you
-can use as a middleware example is called <literal>tempauth</literal>.</para>
- <para>By default, each user has their own storage account and has full access to that
-account. Users must authenticate with their credentials as described above, but once
-authenticated they can manage containers and objects within that account. If a user wants to access the content from another account, they must have API access key or a session token provided by their authentication system.</para>
- </section>
- <section>
- <title>Advantages of using GlusterFS Unified File and Object Storage</title>
- <para>The following are the advantages of using GlusterFS UFO:</para>
- <itemizedlist>
- <listitem>
- <para>No limit on upload and download files sizes as compared to Open Stack Swift which limits the object size to 5GB.</para>
- </listitem>
- <listitem>
- <para>A unified view of data across NAS and Object Storage technologies.</para>
- </listitem>
- <listitem>
- <para>Using GlusterFS&apos;s UFO has other advantages like the following: </para>
- <para><itemizedlist>
- <listitem>
- <para>High availability</para>
- </listitem>
- <listitem>
- <para>Scalability</para>
- </listitem>
- <listitem>
- <para>Replication</para>
- </listitem>
- <listitem>
- <para>Elastic Volume management</para>
- </listitem>
- </itemizedlist></para>
- </listitem>
- </itemizedlist>
- </section>
- <section>
- <title>Preparing to Deploy Unified File and Object Storage</title>
- <para>This section provides information on pre-requisites and list of dependencies that will be installed
-during the installation of Unified File and Object Storage.
-</para>
- <section>
- <title>Pre-requisites </title>
- <para>GlusterFS&apos;s Unified File and Object Storage needs <literal>user_xattr</literal> support from the underlying disk file system.
-Use the following command to enable <literal>user_xattr</literal> for GlusterFS bricks backend:
-</para>
- <para><command># mount –o remount,user_xattr <replaceable>device name</replaceable></command></para>
- <para>For example,
-</para>
- <para><command># mount –o remount,user_xattr /dev/hda1 </command>
-</para>
- </section>
- <section>
- <title>Dependencies </title>
- <para>The following packages are installed on GlusterFS when you install Unified File and Object
-Storage:
-
-</para>
- <itemizedlist>
- <listitem>
- <para>curl
-
-
-
-
-
-
-
-
-
-
-
-
-
-</para>
- </listitem>
- <listitem>
- <para>memcached</para>
- </listitem>
- <listitem>
- <para>openssl</para>
- </listitem>
- <listitem>
- <para>xfsprogs</para>
- </listitem>
- <listitem>
- <para>python2.6</para>
- </listitem>
- <listitem>
- <para>pyxattr</para>
- </listitem>
- <listitem>
- <para>python-configobj
-</para>
- </listitem>
- <listitem>
- <para>python-setuptools
-
-</para>
- </listitem>
- <listitem>
- <para>python-simplejson
-
-</para>
- </listitem>
- <listitem>
- <para>python-webob
-
-</para>
- </listitem>
- <listitem>
- <para>python-eventlet
-
-</para>
- </listitem>
- <listitem>
- <para>python-greenlet
-
-</para>
- </listitem>
- <listitem>
- <para>python-pastedeploy
-
-</para>
- </listitem>
- <listitem>
- <para>python-netifaces
-</para>
- </listitem>
- </itemizedlist>
- </section>
- </section>
- <section>
- <title>Installing and Configuring Unified File and Object Storage</title>
- <para>This section provides instructions on how to install and configure Unified File and Object Storage in your storage
-environment.</para>
- <section id="chap-ation_Guide-Dir_Quota-Enable">
- <title>Installing Unified File and Object Storage</title>
- <para>To install Unified File and Object Storage:</para>
- <orderedlist>
- <listitem>
- <para>Download <filename>rhel_install.sh</filename> install script from <ulink url="http://download.gluster.com/pub/gluster/glusterfs/3.3/3.3.0/UFO/"/> .
-</para>
- </listitem>
- <listitem>
- <para>Run
- <filename>rhel_install.sh</filename> script using the following command:
-</para>
- <para><command># sh rhel_install.sh</command></para>
- </listitem>
- <para><note>
- <para>You must repeat the above steps on all the machines on which you want to install Unified File and Object Storage. If you install the Unified File and Object Storage on multiple servers, you can use a load balancer like pound, nginx, and so on to distribute the request across the machines.</para>
- </note></para>
- </orderedlist>
- </section>
- <section>
- <title>Adding Users</title>
- <para>The authentication system allows the administrator to grant different levels of access to different users based on the requirement. The following are the types of user permissions:
- </para>
- <itemizedlist>
- <listitem>
- <para>admin user
- </para>
- </listitem>
- <listitem>
- <para>normal user</para>
- </listitem>
- </itemizedlist>
- <para>Admin user has read and write permissions on the account. By default, a normal user has no read or write permissions. A normal user can only authenticate itself to get a Auth-Token. Read or write permission are provided through ACLs by the admin users.</para>
- <para>Add a new user by adding the following entry in <filename>/etc/swift/proxy-server.conf</filename> file:</para>
- <para><command>user_&lt;account-name&gt;_&lt;user-name&gt; = &lt;password&gt; [.admin]</command></para>
- <para>For example, </para>
- <para><command>user_test_tester = testing .admin</command>
-</para>
- <note>
- <para>During installation, the installation script adds few sample users to the <filename>proxy-server.conf</filename> file. It is highly recommended that you remove all the default sample user entries from the configuration file.
-</para>
- </note>
- <para>For more information on setting ACLs, see <xref linkend="chap-Administration_Guide-Working_UFO-Setting_ACLs"/>.</para>
- </section>
- <section>
- <title>Configuring Proxy Server</title>
- <para>The Proxy Server is responsible for connecting to the rest of the OpenStack Object Storage architecture. For each request, it looks up the location of the account, container, or object in the ring and route the request accordingly. The public API is also exposed through the proxy server. When objects are streamed to or from an object server, they are streamed directly through the proxy server to or from the user – the proxy server does not spool them.
-</para>
- <para>The configurable options pertaining to proxy server are stored in <filename>/etc/swift/proxy-server.conf</filename>. The following is the sample <filename>proxy-server.conf</filename> file:</para>
- <para><programlisting>[app:proxy-server]
-use = egg:swift#proxy
-allow_account_management=true
-account_autocreate=true
-
-[filter:tempauth]
-use = egg:swift#tempauth user_admin_admin=admin.admin.reseller_admin
-user_test_tester=testing.admin
-user_test2_tester2=testing2.admin
-user_test_tester3=testing3
-
-[filter:healthcheck]
-use = egg:swift#healthcheck
-
-[filter:cache]
-use = egg:swift#memcache</programlisting></para>
- <para>By default, GlusterFS&apos;s Unified File and Object Storage is configured to support HTTP protocol and uses temporary authentication to authenticate the HTTP requests.</para>
- </section>
- <section>
- <title>Configuring Authentication System</title>
- <para>Proxy server must be configured to authenticate using <literal>
- <literal>tempauth</literal>
- </literal>. </para>
- </section>
- <section>
- <title>Configuring Proxy Server for HTTPS</title>
- <para>By default, proxy server only handles HTTP request. To configure the proxy server to process HTTPS requests, perform the following steps:</para>
- <orderedlist>
- <listitem>
- <para>Create self-signed cert for SSL using the following commands:</para>
- <para><programlisting>cd /etc/swift
-openssl req -new -x509 -nodes -out cert.crt -keyout cert.key</programlisting></para>
- </listitem>
- <listitem>
- <para>Add the following lines to <filename>/etc/swift/proxy-server.conf </filename>under <replaceable>[DEFAULT]</replaceable></para>
- <para><programlisting>bind_port = 443
- cert_file = /etc/swift/cert.crt
- key_file = /etc/swift/cert.key</programlisting></para>
- </listitem>
- <listitem>
- <para>Restart the servers using the following commands:</para>
- <para><programlisting>swift-init main stop
-swift-init main start</programlisting></para>
- </listitem>
- </orderedlist>
- <para>The following are the configurable options:
-</para>
- <table frame="all">
- <title>proxy-server.conf Default Options in the [DEFAULT] section </title>
- <tgroup cols="3">
- <colspec colname="c1"/>
- <colspec colname="c2"/>
- <colspec colname="c3"/>
- <thead>
- <row>
- <entry>Option </entry>
- <entry>Default </entry>
- <entry>Description </entry>
- </row>
- </thead>
- <tbody>
- <row>
- <entry>bind_ip </entry>
- <entry>0.0.0.0 </entry>
- <entry>IP Address for server to bind</entry>
- </row>
- <row>
- <entry>bind_port </entry>
- <entry>80 </entry>
- <entry>Port for server to bind </entry>
- </row>
- <row>
- <entry>swift_dir </entry>
- <entry>/etc/swift </entry>
- <entry>Swift configuration directory </entry>
- </row>
- <row>
- <entry>workers </entry>
- <entry>1</entry>
- <entry>Number of workers to fork </entry>
- </row>
- <row>
- <entry>user </entry>
- <entry>swift </entry>
- <entry>swift user</entry>
- </row>
- <row>
- <entry>cert_file </entry>
- <entry/>
- <entry>Path to the ssl .crt </entry>
- </row>
- <row>
- <entry>key_file </entry>
- <entry/>
- <entry>Path to the ssl .key </entry>
- </row>
- </tbody>
- </tgroup>
- </table>
- <table frame="all">
- <title>proxy-server.conf Server Options in the [proxy-server] section </title>
- <tgroup cols="3">
- <colspec colname="c1"/>
- <colspec colname="c2"/>
- <colspec colname="c3"/>
- <thead>
- <row>
- <entry>Option </entry>
- <entry>Default </entry>
- <entry>Description </entry>
- </row>
- </thead>
- <tbody>
- <row>
- <entry>use </entry>
- <entry/>
- <entry>paste.deploy entry point for the container server. For most cases, this should be <literal>egg:swift#container</literal>. </entry>
- </row>
- <row>
- <entry>log_name </entry>
- <entry>proxy-server </entry>
- <entry>Label used when logging </entry>
- </row>
- <row>
- <entry>log_facility </entry>
- <entry>LOG_LOCAL0 </entry>
- <entry>Syslog log facility </entry>
- </row>
- <row>
- <entry>log_level </entry>
- <entry>INFO </entry>
- <entry>Log level </entry>
- </row>
- <row>
- <entry>log_headers </entry>
- <entry>True </entry>
- <entry>If True, log headers in each request </entry>
- </row>
- <row>
- <entry>recheck_account_existence </entry>
- <entry>60 </entry>
- <entry>Cache timeout in seconds to send memcached for account existence </entry>
- </row>
- <row>
- <entry>recheck_container_existence </entry>
- <entry>60 </entry>
- <entry>Cache timeout in seconds to send memcached for container existence </entry>
- </row>
- <row>
- <entry>object_chunk_size </entry>
- <entry>65536 </entry>
- <entry>Chunk size to read from object servers </entry>
- </row>
- <row>
- <entry>client_chunk_size </entry>
- <entry>65536 </entry>
- <entry>Chunk size to read from clients </entry>
- </row>
- <row>
- <entry>memcache_servers </entry>
- <entry>127.0.0.1:11211 </entry>
- <entry>Comma separated list of memcached servers ip:port </entry>
- </row>
- <row>
- <entry>node_timeout </entry>
- <entry>10 </entry>
- <entry>Request timeout to external services </entry>
- </row>
- <row>
- <entry>client_timeout </entry>
- <entry>60 </entry>
- <entry>Timeout to read one chunk from a client </entry>
- </row>
- <row>
- <entry>conn_timeout </entry>
- <entry>0.5 </entry>
- <entry>Connection timeout to external services </entry>
- </row>
- <row>
- <entry>error_suppression_interval </entry>
- <entry>60 </entry>
- <entry>Time in seconds that must elapse since the last error for a node to be considered no longer error limited </entry>
- </row>
- <row>
- <entry>error_suppression_limit </entry>
- <entry>10 </entry>
- <entry>Error count to consider a node error limited </entry>
- </row>
- <row>
- <entry>allow_account_management </entry>
- <entry>false </entry>
- <entry>Whether account <literal>PUT</literal>s and <literal>DELETE</literal>s are even callable </entry>
- </row>
- </tbody>
- </tgroup>
- </table>
- </section>
- <section>
- <title>Configuring Object Server</title>
- <para>The Object Server is a very simple blob storage server that can store, retrieve, and delete objects stored on local devices. Objects are stored as binary files on the file system with metadata stored in the file’s extended attributes (xattrs). This requires that the underlying file system choice for object servers support xattrs on files.
-
-</para>
- <para>The configurable options pertaining Object Server are stored in the file <filename>/etc/swift/object-server/1.conf</filename>. The following is the sample <filename>object-server/1.conf</filename> file:</para>
- <para><programlisting>[DEFAULT]
-devices = /srv/1/node
-mount_check = false
-bind_port = 6010
-user = root
-log_facility = LOG_LOCAL2
-
-[pipeline:main]
-pipeline = gluster object-server
-
-[app:object-server]
-use = egg:swift#object
-
-[filter:gluster]
-use = egg:swift#gluster
-
-[object-replicator]
-vm_test_mode = yes
-
-[object-updater]
-[object-auditor]</programlisting></para>
- <para>The following are the configurable options:
-</para>
- <table frame="all">
- <title>object-server.conf Default Options in the [DEFAULT] section </title>
- <tgroup cols="3">
- <colspec colname="c1"/>
- <colspec colname="c2"/>
- <colspec colname="c3"/>
- <thead>
- <row>
- <entry>Option </entry>
- <entry>Default </entry>
- <entry>Description </entry>
- </row>
- </thead>
- <tbody>
- <row>
- <entry>swift_dir </entry>
- <entry>/etc/swift </entry>
- <entry>Swift configuration directory </entry>
- </row>
- <row>
- <entry>devices </entry>
- <entry>/srv/node </entry>
- <entry>Mount parent directory where devices are mounted </entry>
- </row>
- <row>
- <entry>mount_check </entry>
- <entry>true </entry>
- <entry>Whether or not check if the devices are mounted to prevent accidentally writing to the root device </entry>
- </row>
- <row>
- <entry>bind_ip </entry>
- <entry>0.0.0.0 </entry>
- <entry>IP Address for server to bind</entry>
- </row>
- <row>
- <entry>bind_port </entry>
- <entry>6000 </entry>
- <entry>Port for server to bind</entry>
- </row>
- <row>
- <entry>workers </entry>
- <entry>1 </entry>
- <entry>Number of workers to fork </entry>
- </row>
- </tbody>
- </tgroup>
- </table>
- <table frame="all">
- <title>object-server.conf Server Options in the [object-server] section </title>
- <tgroup cols="3">
- <colspec colname="c1"/>
- <colspec colname="c2"/>
- <colspec colname="c3"/>
- <thead>
- <row>
- <entry>Option </entry>
- <entry>Default </entry>
- <entry>Description </entry>
- </row>
- </thead>
- <tbody>
- <row>
- <entry>use </entry>
- <entry/>
- <entry>paste.deploy entry point for the object server. For most cases, this should be <literal>egg:swift#object</literal>. </entry>
- </row>
- <row>
- <entry>log_name </entry>
- <entry>object-server </entry>
- <entry>log name used when logging </entry>
- </row>
- <row>
- <entry>log_facility </entry>
- <entry>LOG_LOCAL0 </entry>
- <entry>Syslog log facility </entry>
- </row>
- <row>
- <entry>log_level </entry>
- <entry>INFO </entry>
- <entry>Logging level </entry>
- </row>
- <row>
- <entry>log_requests </entry>
- <entry>True </entry>
- <entry>Whether or not to log each request </entry>
- </row>
- <row>
- <entry>user </entry>
- <entry>swift </entry>
- <entry>swift user</entry>
- </row>
- <row>
- <entry>node_timeout </entry>
- <entry>3</entry>
- <entry>Request timeout to external services </entry>
- </row>
- <row>
- <entry>conn_timeout </entry>
- <entry>0.5</entry>
- <entry>Connection timeout to external services </entry>
- </row>
- <row>
- <entry>network_chunk_size </entry>
- <entry>65536 </entry>
- <entry>Size of chunks to read or write over the network </entry>
- </row>
- <row>
- <entry>disk_chunk_size </entry>
- <entry>65536 </entry>
- <entry>Size of chunks to read or write to disk </entry>
- </row>
- <row>
- <entry>max_upload_time </entry>
- <entry>65536 </entry>
- <entry>Maximum time allowed to upload an object </entry>
- </row>
- <row>
- <entry>slow </entry>
- <entry>0</entry>
- <entry>If &gt; 0, Minimum time in seconds for a <literal>PUT</literal> or <literal>DELETE</literal> request to complete </entry>
- </row>
- </tbody>
- </tgroup>
- </table>
- </section>
- <section>
- <title>Configuring Container Server</title>
- <para>The Container Server’s primary job is to handle listings of objects. The listing is done by querying the GlusterFS mount point with path. This query returns a list of all files and directories present under that container.
-</para>
- <para>The configurable options pertaining to container server are stored in <filename>/etc/swift/container-server/1.conf</filename> file. The following is the sample <filename>container-server/1.conf</filename> file:</para>
- <para><programlisting>[DEFAULT]
-devices = /srv/1/node
-mount_check = false
-bind_port = 6011
-user = root
-log_facility = LOG_LOCAL2
-
-[pipeline:main]
-pipeline = gluster container-server
-
-[app:container-server]
-use = egg:swift#container
-
-[filter:gluster]
-use = egg:swift#gluster
-
-[container-replicator]
-[container-updater]
-[container-auditor]</programlisting></para>
- <para>The following are the configurable options:</para>
- <table frame="all">
- <title>container-server.conf Default Options in the [DEFAULT] section </title>
- <tgroup cols="3">
- <colspec colname="c1"/>
- <colspec colname="c2"/>
- <colspec colname="c3"/>
- <thead>
- <row>
- <entry>Option </entry>
- <entry>Default </entry>
- <entry>Description </entry>
- </row>
- </thead>
- <tbody>
- <row>
- <entry>swift_dir </entry>
- <entry>/etc/swift </entry>
- <entry>Swift configuration directory </entry>
- </row>
- <row>
- <entry>devices </entry>
- <entry>/srv/node </entry>
- <entry>Mount parent directory where devices are mounted </entry>
- </row>
- <row>
- <entry>mount_check </entry>
- <entry>true </entry>
- <entry>Whether or not check if the devices are mounted to prevent accidentally writing to the root device </entry>
- </row>
- <row>
- <entry>bind_ip </entry>
- <entry>0.0.0.0 </entry>
- <entry>IP Address for server to bind</entry>
- </row>
- <row>
- <entry>bind_port </entry>
- <entry>6001 </entry>
- <entry>Port for server to bind</entry>
- </row>
- <row>
- <entry>workers </entry>
- <entry>1 </entry>
- <entry>Number of workers to fork </entry>
- </row>
- <row>
- <entry>user </entry>
- <entry>swift </entry>
- <entry>Swift user</entry>
- </row>
- </tbody>
- </tgroup>
- </table>
- <table frame="all">
- <title>container-server.conf Server Options in the [container-server] section </title>
- <tgroup cols="3">
- <colspec colname="c1"/>
- <colspec colname="c2"/>
- <colspec colname="c3"/>
- <thead>
- <row>
- <entry>Option </entry>
- <entry>Default </entry>
- <entry>Description </entry>
- </row>
- </thead>
- <tbody>
- <row>
- <entry>use </entry>
- <entry/>
- <entry>paste.deploy entry point for the container server. For most cases, this should be <literal>egg:swift#container</literal>. </entry>
- </row>
- <row>
- <entry>log_name </entry>
- <entry>container-server </entry>
- <entry>Label used when logging </entry>
- </row>
- <row>
- <entry>log_facility </entry>
- <entry>LOG_LOCAL0 </entry>
- <entry>Syslog log facility </entry>
- </row>
- <row>
- <entry>log_level </entry>
- <entry>INFO </entry>
- <entry>Logging level </entry>
- </row>
- <row>
- <entry>node_timeout </entry>
- <entry>3 </entry>
- <entry>Request timeout to external services </entry>
- </row>
- <row>
- <entry>conn_timeout </entry>
- <entry>0.5 </entry>
- <entry>Connection timeout to external services </entry>
- </row>
- </tbody>
- </tgroup>
- </table>
- </section>
- <section>
- <title>Configuring Account Server</title>
- <para>The Account Server is very similar to the Container Server, except that it is responsible for listing of containers rather than objects. In UFO, each gluster volume is an account.
-</para>
- <para>The configurable options pertaining to account server are stored in <filename>/etc/swift/account-server/1.conf</filename> file. The following is the sample <filename>account-server/1.conf</filename> file: </para>
- <para><programlisting>[DEFAULT]
-devices = /srv/1/node
-mount_check = false
-bind_port = 6012
-user = root
-log_facility = LOG_LOCAL2
-
-[pipeline:main]
-pipeline = gluster account-server
-
-[app:account-server]
-use = egg:swift#account
-
-[filter:gluster]
-use = egg:swift#gluster
-
-[account-replicator]
-vm_test_mode = yes
-
-[account-auditor]
-[account-reaper]</programlisting></para>
- <para>The following are the configurable options:</para>
- <table frame="all">
- <title>account-server.conf Default Options in the [DEFAULT] section </title>
- <tgroup cols="3">
- <colspec colname="c1"/>
- <colspec colname="c2"/>
- <colspec colname="c3"/>
- <thead>
- <row>
- <entry>Option </entry>
- <entry>Default </entry>
- <entry>Description </entry>
- </row>
- </thead>
- <tbody>
- <row>
- <entry>swift_dir </entry>
- <entry>/etc/swift </entry>
- <entry>Swift configuration directory </entry>
- </row>
- <row>
- <entry>devices </entry>
- <entry>/srv/node </entry>
- <entry>mount parent directory where devices are mounted </entry>
- </row>
- <row>
- <entry>mount_check </entry>
- <entry>true </entry>
- <entry>Whether or not check if the devices are mounted to prevent accidentally writing to the root device </entry>
- </row>
- <row>
- <entry>bind_ip </entry>
- <entry>0.0.0.0 </entry>
- <entry>IP Address for server to bind</entry>
- </row>
- <row>
- <entry>bind_port </entry>
- <entry>6002 </entry>
- <entry>Port for server to bind</entry>
- </row>
- <row>
- <entry>workers </entry>
- <entry>1 </entry>
- <entry>Number of workers to fork </entry>
- </row>
- <row>
- <entry>user </entry>
- <entry>swift </entry>
- <entry>Swift user</entry>
- </row>
- </tbody>
- </tgroup>
- </table>
- <table frame="all">
- <title>account-server.conf Server Options in the [account-server] section </title>
- <tgroup cols="3">
- <colspec colname="c1"/>
- <colspec colname="c2"/>
- <colspec colname="c3"/>
- <thead>
- <row>
- <entry>Option </entry>
- <entry>Default </entry>
- <entry>Description </entry>
- </row>
- </thead>
- <tbody>
- <row>
- <entry>use </entry>
- <entry/>
- <entry>paste.deploy entry point for the container server. For most cases, this should be <literal>egg:swift#container</literal>. </entry>
- </row>
- <row>
- <entry>log_name </entry>
- <entry>account-server </entry>
- <entry>Label used when logging </entry>
- </row>
- <row>
- <entry>log_facility </entry>
- <entry>LOG_LOCAL0 </entry>
- <entry>Syslog log facility </entry>
- </row>
- <row>
- <entry>log_level </entry>
- <entry>INFO </entry>
- <entry>Logging level </entry>
- </row>
- </tbody>
- </tgroup>
- </table>
- </section>
- <section>
- <title>Starting and Stopping Server</title>
- <para>You must start the server manually when system reboots and whenever you update/modify the configuration files.</para>
- <itemizedlist>
- <listitem>
- <para>To start the server, enter the following command:</para>
- <para><command># swift_init main start</command></para>
- </listitem>
- <listitem>
- <para>To stop the server, enter the following command:</para>
- <para><command># swift_init main stop</command></para>
- </listitem>
- </itemizedlist>
- </section>
- </section>
- <section>
- <title>Working with Unified File and Object Storage</title>
- <para>This section describes the REST API for administering and managing Object Storage. All requests will
-be directed to the host and URL described in the <filename>X-Storage-URL HTTP</filename> header obtained during
-successful authentication.
-</para>
- <section>
- <title>Configuring Authenticated Access </title>
- <para>Authentication is the process of proving identity to the system. To use the REST interface, you must
-obtain an authorization token using GET method and supply it with v1.0 as the path.
-</para>
- <para>Each REST request against the Object Storage system requires the addition of a specific authorization
-token HTTP x-header, defined as X-Auth-Token. The storage URL and authentication token are
-returned in the headers of the response.
-</para>
- <itemizedlist>
- <listitem>
- <para>To authenticate, run the following command:
-</para>
- <programlisting>GET auth/v1.0 HTTP/1.1
-Host: &lt;auth URL&gt;
-X-Auth-User: &lt;account name&gt;:&lt;user name&gt;
-X-Auth-Key: &lt;user-Password&gt;</programlisting>
- <para>For example,
-</para>
- <programlisting>GET auth/v1.0 HTTP/1.1
-Host: auth.example.com
-X-Auth-User: test:tester
-X-Auth-Key: testing
-
-HTTP/1.1 200 OK
-X-Storage-Url: https:/example.storage.com:443/v1/AUTH_test
-X-Storage-Token: AUTH_tkde3ad38b087b49bbbac0494f7600a554
-X-Auth-Token: AUTH_tkde3ad38b087b49bbbac0494f7600a554
-Content-Length: 0
-Date: Wed, 10 jul 2011 06:11:51 GMT</programlisting>
- <para>To authenticate access using cURL (for the above example), run the following
-command:
-</para>
- <programlisting>curl -v -H &apos;X-Storage-User: test:tester&apos; -H &apos;X-Storage-Pass:testing&apos; -k
-https://auth.example.com:443/auth/v1.0</programlisting>
- <para>The X-Auth-Url has to be parsed and used in the connection and request line of all subsequent
-requests to the server. In the example output, users connecting to server will send most
-container/object requests with a host header of example.storage.com and the request line&apos;s version
-and account as v1/AUTH_test.
-
-</para>
- </listitem>
- </itemizedlist>
- <note>
- <para>The authentication tokens are valid for a 24 hour period.
-</para>
- </note>
- </section>
- <section>
- <title>Working with Accounts </title>
- <para>This section describes the list of operations you can perform at the account level of the URL.
-</para>
- <section>
- <title>Displaying Container Information </title>
- <para>You can list the objects of a specific container, or all containers, as needed using GET command. You
-can use the following optional parameters with GET request to refine the results:
-</para>
- <para><informaltable frame="none">
- <tgroup cols="2">
- <colspec colnum="1" colname="c0" colsep="0"/>
- <colspec colnum="2" colname="c1" colsep="0"/>
- <thead>
- <row>
- <entry>Parameter </entry>
- <entry>Description </entry>
- </row>
- </thead>
- <tbody>
- <row>
- <entry>limit </entry>
- <entry>Limits the number of results to at most <emphasis role="italic">n</emphasis> value. </entry>
- </row>
- <row>
- <entry>marker </entry>
- <entry>Returns object names greater in value than the specified marker. </entry>
- </row>
- <row>
- <entry>format </entry>
- <entry>Specify either json or xml to return the respective serialized response. </entry>
- </row>
- </tbody>
- </tgroup>
- </informaltable></para>
- <para><emphasis role="bold">To display container information </emphasis></para>
- <itemizedlist>
- <listitem>
- <para>List all the containers of an account using the following command:
-</para>
- <para><programlisting>GET /&lt;apiversion&gt;/&lt;account&gt; HTTP/1.1
-Host: &lt;storage URL&gt;
-X-Auth-Token: &lt;authentication-token-key&gt;</programlisting></para>
- <para>For example,
-</para>
- <programlisting>GET /v1/AUTH_test HTTP/1.1
-Host: example.storage.com
-X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
-
-HTTP/1.1 200 Ok
-Date: Wed, 13 Jul 2011 16:32:21 GMT
-Server: Apache
-Content-Type: text/plain; charset=UTF-8
-Content-Length: 39
-
-songs
-movies
-documents
-reports</programlisting>
- </listitem>
- </itemizedlist>
- <para>To display container information using cURL (for the above example), run the following
-command:
-</para>
- <para><programlisting>curl -v -X GET -H &apos;X-Auth-Token: AUTH_tkde3ad38b087b49bbbac0494f7600a554&apos;
-https://example.storage.com:443/v1/AUTH_test -k</programlisting></para>
- </section>
- <section>
- <title>Displaying Account Metadata Information </title>
- <para>You can issue HEAD command to the storage service to view the number of containers and the total
-bytes stored in the account.
-</para>
- <itemizedlist>
- <listitem>
- <para>To display containers and storage used, run the following command:
-</para>
- <programlisting>HEAD /&lt;apiversion&gt;/&lt;account&gt; HTTP/1.1
-Host: &lt;storage URL&gt;
-X-Auth-Token: &lt;authentication-token-key&gt;</programlisting>
- <para>For example,
-</para>
- <programlisting>HEAD /v1/AUTH_test HTTP/1.1
-Host: example.storage.com
-X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
-
-HTTP/1.1 204 No Content
-Date: Wed, 13 Jul 2011 16:52:21 GMT
-Server: Apache
-X-Account-Container-Count: 4
-X-Account-Total-Bytes-Used: 394792</programlisting>
- <para>To display account metadata information using cURL (for the above example), run the following
-command:
-</para>
- <programlisting>curl -v -X HEAD -H &apos;X-Auth-Token:
-AUTH_tkde3ad38b087b49bbbac0494f7600a554&apos;
-https://example.storage.com:443/v1/AUTH_test -k</programlisting>
- </listitem>
- </itemizedlist>
- </section>
- </section>
- <section>
- <title>Working with Containers </title>
- <para>This section describes the list of operations you can perform at the container level of the URL.
-</para>
- <section>
- <title> Creating Containers </title>
- <para>You can use PUT command to create containers. Containers are the storage folders for your data.
-The URL encoded name must be less than 256 bytes and cannot contain a forward slash &apos;/&apos; character.
-</para>
- <itemizedlist>
- <listitem>
- <para>To create a container, run the following command:
-</para>
- <programlisting>PUT /&lt;apiversion&gt;/&lt;account&gt;/&lt;container&gt;/ HTTP/1.1
-Host: &lt;storage URL&gt;
-X-Auth-Token: &lt;authentication-token-key&gt;</programlisting>
- <para>For example,
-</para>
- <programlisting>PUT /v1/AUTH_test/pictures/ HTTP/1.1
-Host: example.storage.com
-X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
-HTTP/1.1 201 Created
-
-Date: Wed, 13 Jul 2011 17:32:21 GMT
-Server: Apache
-Content-Type: text/plain; charset=UTF-8</programlisting>
- <para>To create container using cURL (for the above example), run the following command:
-</para>
- <programlisting>curl -v -X PUT -H &apos;X-Auth-Token:
-AUTH_tkde3ad38b087b49bbbac0494f7600a554&apos;
-https://example.storage.com:443/v1/AUTH_test/pictures -k</programlisting>
- <para>The status code of 201 (Created) indicates that you have successfully created the container. If a
-container with same is already existed, the status code of 202 is displayed.
-</para>
- </listitem>
- </itemizedlist>
- </section>
- <section>
- <title>Displaying Objects of a Container </title>
- <para>You can list the objects of a container using GET command. You can use the following optional
-parameters with GET request to refine the results:
-</para>
- <para><informaltable frame="none">
- <tgroup cols="2">
- <colspec colnum="1" colname="c0" colsep="0"/>
- <colspec colnum="2" colname="c1" colsep="0"/>
- <thead>
- <row>
- <entry>Parameter </entry>
- <entry>Description </entry>
- </row>
- </thead>
- <tbody>
- <row>
- <entry>limit </entry>
- <entry>Limits the number of results to at most <emphasis role="italic">n</emphasis> value. </entry>
- </row>
- <row>
- <entry>marker </entry>
- <entry>Returns object names greater in value than the specified marker. </entry>
- </row>
- <row>
- <entry>prefix </entry>
- <entry>Displays the results limited to object names beginning with the substring x. beginning with the substring x. </entry>
- </row>
- <row>
- <entry>path </entry>
- <entry>Returns the object names nested in the pseudo path. </entry>
- </row>
- <row>
- <entry>format </entry>
- <entry>Specify either json or xml to return the respective serialized response. </entry>
- </row>
- <row>
- <entry>delimiter </entry>
- <entry>Returns all the object names nested in the container. </entry>
- </row>
- </tbody>
- </tgroup>
- </informaltable></para>
- <para>To display objects of a container
-</para>
- <itemizedlist>
- <listitem>
- <para>List objects of a specific container using the following command:
-</para>
- </listitem>
- </itemizedlist>
- <programlisting>GET /&lt;apiversion&gt;/&lt;account&gt;/&lt;container&gt;[parm=value] HTTP/1.1
-Host: &lt;storage URL&gt;
-X-Auth-Token: &lt;authentication-token-key&gt;</programlisting>
- <para>For example,
-</para>
- <programlisting>GET /v1/AUTH_test/images HTTP/1.1
-Host: example.storage.com
-X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
-
-HTTP/1.1 200 Ok
-Date: Wed, 13 Jul 2011 15:42:21 GMT
-Server: Apache
-Content-Type: text/plain; charset=UTF-8
-Content-Length: 139
-
-sample file.jpg
-test-file.pdf
-You and Me.pdf
-Puddle of Mudd.mp3
-Test Reports.doc</programlisting>
- <para>To display objects of a container using cURL (for the above example), run the following
-command:
-</para>
- <programlisting>curl -v -X GET-H &apos;X-Auth-Token: AUTH_tkde3ad38b087b49bbbac0494f7600a554&apos;
-https://example.storage.com:443/v1/AUTH_test/images -k</programlisting>
- </section>
- <section>
- <title>Displaying Container Metadata Information </title>
- <para>You can issue HEAD command to the storage service to view the number of objects in a container and
-the total bytes of all the objects stored in the container.
-</para>
- <itemizedlist>
- <listitem>
- <para>To display list of objects and storage used, run the following command:
-</para>
- <programlisting>HEAD /&lt;apiversion&gt;/&lt;account&gt;/&lt;container&gt; HTTP/1.1
-Host: &lt;storage URL&gt;
-X-Auth-Token: &lt;authentication-token-key&gt;</programlisting>
- <para>For example,</para>
- <programlisting>HEAD /v1/AUTH_test/images HTTP/1.1
-Host: example.storage.com
-X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
-
-HTTP/1.1 204 No Content
-Date: Wed, 13 Jul 2011 19:52:21 GMT
-Server: Apache
-X-Account-Object-Count: 8
-X-Container-Bytes-Used: 472</programlisting>
- <para>To display list of objects and storage used in a container using cURL (for the above example), run
-the following command:
-</para>
- <programlisting>curl -v -X HEAD -H &apos;X-Auth-Token:
-AUTH_tkde3ad38b087b49bbbac0494f7600a554&apos;
-https://example.storage.com:443/v1/AUTH_test/images -k</programlisting>
- </listitem>
- </itemizedlist>
- </section>
- <section>
- <title>Deleting Container </title>
- <para>You can use DELETE command to permanently delete containers. The container must be empty
-before it can be deleted.
-</para>
- <para>You can issue HEAD command to determine if it contains any objects.
-</para>
- <itemizedlist>
- <listitem>
- <para>To delete a container, run the following command:
-</para>
- <programlisting>DELETE /&lt;apiversion&gt;/&lt;account&gt;/&lt;container&gt;/ HTTP/1.1
-Host: &lt;storage URL&gt;
-X-Auth-Token: &lt;authentication-token-key&gt;</programlisting>
- <para>For example,</para>
- <programlisting>DELETE /v1/AUTH_test/pictures HTTP/1.1
-Host: example.storage.com
-X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
-
-HTTP/1.1 204 No Content
-Date: Wed, 13 Jul 2011 17:52:21 GMT
-Server: Apache
-Content-Length: 0
-Content-Type: text/plain; charset=UTF-8</programlisting>
- <para>To delete a container using cURL (for the above example), run the following command:
-</para>
- <programlisting>curl -v -X DELETE -H &apos;X-Auth-Token:
-AUTH_tkde3ad38b087b49bbbac0494f7600a554&apos;
-https://example.storage.com:443/v1/AUTH_test/pictures -k</programlisting>
- <para>The status code of 204 (No Content) indicates that you have successfully deleted the container. If
-that container does not exist, the status code 404 (Not Found) is displayed, and if the container is
-not empty, the status code 409 (Conflict) is displayed.
-</para>
- </listitem>
- </itemizedlist>
- </section>
- <section>
- <title>Updating Container Metadata </title>
- <para>You can update the metadata of container using POST operation, metadata keys should be prefixed
-with &apos;x-container-meta&apos;.
-</para>
- <itemizedlist>
- <listitem>
- <para>To update the metadata of the object, run the following command:
-</para>
- <programlisting>POST /&lt;apiversion&gt;/&lt;account&gt;/&lt;container&gt; HTTP/1.1
-Host: &lt;storage URL&gt;
-X-Auth-Token: &lt;Authentication-token-key&gt;
-X-Container-Meta-&lt;key&gt;: &lt;new value&gt;
-X-Container-Meta-&lt;key&gt;: &lt;new value&gt;</programlisting>
- <para>For example,
-</para>
- <para><programlisting>POST /v1/AUTH_test/images HTTP/1.1
-Host: example.storage.com
-X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
-X-Container-Meta-Zoo: Lion
-X-Container-Meta-Home: Dog
-
-HTTP/1.1 204 No Content
-Date: Wed, 13 Jul 2011 20:52:21 GMT
-Server: Apache
-Content-Type: text/plain; charset=UTF-8</programlisting></para>
- <para>To update the metadata of the object using cURL (for the above example), run the following
-command:
-</para>
- <para><programlisting>curl -v -X POST -H &apos;X-Auth-Token:
-AUTH_tkde3ad38b087b49bbbac0494f7600a554&apos;
-https://example.storage.com:443/v1/AUTH_test/images -H &apos; X-Container-Meta-Zoo: Lion&apos; -H &apos;X-Container-Meta-Home: Dog&apos; -k</programlisting></para>
- <para>The status code of 204 (No Content) indicates the container&apos;s metadata is updated successfully. If
-that object does not exist, the status code 404 (Not Found) is displayed.
-</para>
- </listitem>
- </itemizedlist>
- </section>
- <section id="chap-Administration_Guide-Working_UFO-Setting_ACLs">
- <title> Setting ACLs on Container </title>
- <para>You can set the container access control list by using POST command on container with <command>x- container-read</command> and<command> x-container-write</command> keys.
-</para>
- <para>The ACL format is <command>[item[,item...]]</command>. Each item can be a group name to give access to or a
-referrer designation to grant or deny based on the HTTP Referer header.
-</para>
- <para>The referrer designation format is:<command> .r:[-]value</command>.
-</para>
- <para>The .r can also be <command>.ref, .referer, </command>or .<command>referrer</command>; though it will be shortened to.r for
-decreased character count usage. The value can be <command>*</command> to specify any referrer host is allowed access. The leading minus sign (-)
-indicates referrer hosts that should be denied access.
-</para>
- <para>Examples of valid ACLs:
-</para>
- <para><programlisting>.r:*
-.r:*,bobs_account,sues_account:sue
-bobs_account,sues_account:sue</programlisting></para>
- <para>Examples of invalid ACLs:</para>
- <para><programlisting>.r:
-.r:-</programlisting></para>
- <para>By default, allowing read access via <command><command>.</command>r </command>will not allow listing objects in the container but allows
-retrieving objects from the container. To turn on listings, use the .<command>rlistings</command> directive. Also, <command>.r</command>
-designations are not allowed in headers whose names include the word write.
-</para>
- <para>For example, to set all the objects access rights to &quot;public‟ inside the container using cURL (for the
-above example), run the following command:
-</para>
- <para><programlisting>curl -v -X POST -H &apos;X-Auth-Token:
-AUTH_tkde3ad38b087b49bbbac0494f7600a554&apos;
-https://example.storage.com:443/v1/AUTH_test/images
--H &apos;X-Container-Read: .r:*&apos; -k</programlisting></para>
- </section>
- </section>
- <section>
- <title> Working with Objects </title>
- <para>An object represents the data and any metadata for the files stored in the system. Through the REST
-interface, metadata for an object can be included by adding custom HTTP headers to the request
-and the data payload as the request body. Objects name should not exceed 1024 bytes after URL
-encoding.
-</para>
- <para>This section describes the list of operations you can perform at the object level of the URL.
-</para>
- <section>
- <title>Creating or Updating Object </title>
- <para>You can use PUT command to write or update an object&apos;s content and metadata.
-</para>
- <para>You can verify the data integrity by including an MD5checksum for the object&apos;s data in the ETag
-header. ETag header is optional and can be used to ensure that the object&apos;s contents are stored
-successfully in the storage system.
-</para>
- <para>You can assign custom metadata to objects by including additional HTTP headers on the PUT request.
-The objects created with custom metadata via HTTP headers are identified with the<command>X-Object- Meta</command>- prefix.
-</para>
- <itemizedlist>
- <listitem>
- <para>To create or update an object, run the following command:
-</para>
- <para><programlisting>PUT /&lt;apiversion&gt;/&lt;account&gt;/&lt;container&gt;/&lt;object&gt; HTTP/1.1
-Host: &lt;storage URL&gt;
-X-Auth-Token: &lt;authentication-token-key&gt;
-ETag: da1e100dc9e7becc810986e37875ae38
-Content-Length: 342909
-X-Object-Meta-PIN: 2343</programlisting></para>
- <para>For example,</para>
- <para><programlisting>PUT /v1/AUTH_test/pictures/dog HTTP/1.1
-Host: example.storage.com
-X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
-ETag: da1e100dc9e7becc810986e37875ae38
-
-HTTP/1.1 201 Created
-Date: Wed, 13 Jul 2011 18:32:21 GMT
-Server: Apache
-ETag: da1e100dc9e7becc810986e37875ae38
-Content-Length: 0
-Content-Type: text/plain; charset=UTF-8</programlisting></para>
- <para>To create or update an object using cURL (for the above example), run the following command:
-</para>
- <para><programlisting>curl -v -X PUT -H &apos;X-Auth-Token:
-AUTH_tkde3ad38b087b49bbbac0494f7600a554&apos;
-https://example.storage.com:443/v1/AUTH_test/pictures/dog -H &apos;Content-
-Length: 0&apos; -k</programlisting></para>
- <para>The status code of 201 (Created) indicates that you have successfully created or updated the object.
-If there is a missing content-Length or Content-Type header in the request, the status code of 412
-(Length Required) is displayed. (Optionally) If the MD5 checksum of the data written to the storage
-system does not match the ETag value, the status code of 422 (Unprocessable Entity) is displayed.
-</para>
- </listitem>
- </itemizedlist>
- <section>
- <title>Chunked Transfer Encoding </title>
- <para>You can upload data without knowing the size of the data to be uploaded. You can do this by
-specifying an HTTP header of Transfer-Encoding: chunked and without using a Content-Length
-header.
-</para>
- <para>You can use this feature while doing a DB dump, piping the output through gzip, and then piping the
-data directly into Object Storage without having to buffer the data to disk to compute the file size.
-</para>
- <itemizedlist>
- <listitem>
- <para>To create or update an object, run the following command:
- </para>
- <para><programlisting>PUT /&lt;apiversion&gt;/&lt;account&gt;/&lt;container&gt;/&lt;object&gt; HTTP/1.1
-Host: &lt;storage URL&gt;
-X-Auth-Token: &lt;authentication-token-key&gt;
-Transfer-Encoding: chunked
-X-Object-Meta-PIN: 2343</programlisting></para>
- <para>For example,
-</para>
- <para><programlisting>PUT /v1/AUTH_test/pictures/cat HTTP/1.1
-Host: example.storage.com
-X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
-Transfer-Encoding: chunked
-X-Object-Meta-PIN: 2343
-19
-A bunch of data broken up
-D
-into chunks.
-0</programlisting>
-
-</para>
- </listitem>
- </itemizedlist>
- </section>
- </section>
- <section>
- <title>Copying Object </title>
- <para>You can copy object from one container to another or add a new object and then add reference to
-designate the source of the data from another container.
-</para>
- <para><emphasis role="bold">To copy object from one container to another </emphasis></para>
- <itemizedlist>
- <listitem>
- <para>To add a new object and designate the source of the data from another container, run the
-following command:
-</para>
- <para><programlisting>COPY /&lt;apiversion&gt;/&lt;account&gt;/&lt;container&gt;/&lt;sourceobject&gt; HTTP/1.1
-Host: &lt;storage URL&gt;
-X-Auth-Token: &lt; authentication-token-key&gt;
-Destination: /&lt;container&gt;/&lt;destinationobject&gt;</programlisting></para>
- <para>For example,
-</para>
- <para><programlisting>COPY /v1/AUTH_test/images/dogs HTTP/1.1
-Host: example.storage.com
-X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
-Destination: /photos/cats
-
-HTTP/1.1 201 Created
-Date: Wed, 13 Jul 2011 18:32:21 GMT
-Server: Apache
-Content-Length: 0
-Content-Type: text/plain; charset=UTF-8</programlisting></para>
- <para>To copy an object using cURL (for the above example), run the following command:
-</para>
- <para><programlisting>curl -v -X COPY -H &apos;X-Auth-Token:
-AUTH_tkde3ad38b087b49bbbac0494f7600a554&apos; -H &apos;Destination: /photos/cats&apos; -k https://example.storage.com:443/v1/AUTH_test/images/dogs</programlisting></para>
- <para>The status code of 201 (Created) indicates that you have successfully copied the object. If there is a
-missing content-Length or Content-Type header in the request, the status code of 412 (Length
-Required) is displayed.
-</para>
- <para>You can also use PUT command to copy object by using additional header <command>X-Copy-From: container/obj</command>.
-</para>
- </listitem>
- <listitem>
- <para>To use PUT command to copy an object, run the following command:
-</para>
- <para><programlisting>PUT /v1/AUTH_test/photos/cats HTTP/1.1
-Host: example.storage.com
-X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
-X-Copy-From: /images/dogs
-
-HTTP/1.1 201 Created
-Date: Wed, 13 Jul 2011 18:32:21 GMT
-Server: Apache
-Content-Type: text/plain; charset=UTF-8</programlisting></para>
- <para>To copy an object using cURL (for the above example), run the following command:
-</para>
- <para><programlisting>curl -v -X PUT -H &apos;X-Auth-Token: AUTH_tkde3ad38b087b49bbbac0494f7600a554&apos;
--H &apos;X-Copy-From: /images/dogs&apos; –k
-https://example.storage.com:443/v1/AUTH_test/images/cats</programlisting></para>
- <para>The status code of 201 (Created) indicates that you have successfully copied the object.
-</para>
- </listitem>
- </itemizedlist>
- </section>
- <section>
- <title>Displaying Object Information </title>
- <para>You can issue GET command on an object to view the object data of the object.
-</para>
- <itemizedlist>
- <listitem>
- <para>To display the content of an object run the following command:</para>
- <para><programlisting>GET /&lt;apiversion&gt;/&lt;account&gt;/&lt;container&gt;/&lt;object&gt; HTTP/1.1
-Host: &lt;storage URL&gt;
-X-Auth-Token: &lt;Authentication-token-key&gt;</programlisting></para>
- <para>For example,
-</para>
- <para><programlisting>GET /v1/AUTH_test/images/cat HTTP/1.1
-Host: example.storage.com
-X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
-
-HTTP/1.1 200 Ok
-Date: Wed, 13 Jul 2011 23:52:21 GMT
-Server: Apache
-Last-Modified: Thu, 14 Jul 2011 13:40:18 GMT
-ETag: 8a964ee2a5e88be344f36c22562a6486
-Content-Length: 534210
-[.........]</programlisting></para>
- <para>To display the content of an object using cURL (for the above example), run the following
-command:
-</para>
- <para><programlisting>curl -v -X GET -H &apos;X-Auth-Token:
-AUTH_tkde3ad38b087b49bbbac0494f7600a554&apos;
-https://example.storage.com:443/v1/AUTH_test/images/cat -k</programlisting></para>
- <para>The status code of 200 (Ok) indicates the object‟s data is displayed successfully. If that object does
-not exist, the status code 404 (Not Found) is displayed.
-</para>
- </listitem>
- </itemizedlist>
- </section>
- <section>
- <title>Displaying Object Metadata </title>
- <para>You can issue HEAD command on an object to view the object metadata and other standard HTTP
-headers. You must send only authorization token as header.
-</para>
- <itemizedlist>
- <listitem>
- <para>To display the metadata of the object, run the following command:
-</para>
- </listitem>
- </itemizedlist>
- <para><programlisting>HEAD /&lt;apiversion&gt;/&lt;account&gt;/&lt;container&gt;/&lt;object&gt; HTTP/1.1
-Host: &lt;storage URL&gt;
-X-Auth-Token: &lt;Authentication-token-key&gt;</programlisting></para>
- <para>For example,
-</para>
- <para><programlisting>HEAD /v1/AUTH_test/images/cat HTTP/1.1
-Host: example.storage.com
-X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
-
-HTTP/1.1 204 No Content
-Date: Wed, 13 Jul 2011 21:52:21 GMT
-Server: Apache
-Last-Modified: Thu, 14 Jul 2011 13:40:18 GMT
-ETag: 8a964ee2a5e88be344f36c22562a6486
-Content-Length: 512000
-Content-Type: text/plain; charset=UTF-8
-X-Object-Meta-House: Cat
-X-Object-Meta-Zoo: Cat
-X-Object-Meta-Home: Cat
-X-Object-Meta-Park: Cat</programlisting></para>
- <para>To display the metadata of the object using cURL (for the above example), run the following
-command:
-</para>
- <para><programlisting>curl -v -X HEAD -H &apos;X-Auth-Token:
-AUTH_tkde3ad38b087b49bbbac0494f7600a554&apos;
-https://example.storage.com:443/v1/AUTH_test/images/cat -k</programlisting></para>
- <para>The status code of 204 (No Content) indicates the object‟s metadata is displayed successfully. If that
-object does not exist, the status code 404 (Not Found) is displayed.
-</para>
- </section>
- <section>
- <title>Updating Object Metadata </title>
- <para>You can issue POST command on an object name only to set or overwrite arbitrary key metadata. You
-cannot change the object‟s other headers such as Content-Type, ETag and others using POST
-operation. The POST command will delete all the existing metadata and replace it with the new
-arbitrary key metadata.
-</para>
- <para>You must prefix <emphasis role="bold">X-Object-Meta-</emphasis> to the key names.
-</para>
- <itemizedlist>
- <listitem>
- <para>To update the metadata of an object, run the following command:</para>
- <para><programlisting>POST /&lt;apiversion&gt;/&lt;account&gt;/&lt;container&gt;/&lt;object&gt; HTTP/1.1
-Host: &lt;storage URL&gt;
-X-Auth-Token: &lt;Authentication-token-key&gt;
-X-Object-Meta-&lt;key&gt;: &lt;new value&gt;
-X-Object-Meta-&lt;key&gt;: &lt;new value&gt;</programlisting>
-</para>
- <para>For example,
-</para>
- <para><programlisting>POST /v1/AUTH_test/images/cat HTTP/1.1
-Host: example.storage.com
-X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
-X-Object-Meta-Zoo: Lion
-X-Object-Meta-Home: Dog
-
-HTTP/1.1 202 Accepted
-Date: Wed, 13 Jul 2011 22:52:21 GMT
-Server: Apache
-Content-Length: 0
-Content-Type: text/plain; charset=UTF-8</programlisting></para>
- <para>To update the metadata of an object using cURL (for the above example), run the following
-command:
-</para>
- <para><programlisting>curl -v -X POST -H &apos;X-Auth-Token:
-AUTH_tkde3ad38b087b49bbbac0494f7600a554&apos;
-https://example.storage.com:443/v1/AUTH_test/images/cat -H &apos; X-Object-
-Meta-Zoo: Lion&apos; -H &apos;X-Object-Meta-Home: Dog&apos; -k</programlisting></para>
- <para>The status code of 202 (Accepted) indicates that you have successfully updated the object‟s
-metadata. If that object does not exist, the status code 404 (Not Found) is displayed.
-
-</para>
- </listitem>
- </itemizedlist>
- </section>
- <section>
- <title>Deleting Object </title>
- <para>You can use DELETE command to permanently delete the object.
-</para>
- <para>The DELETE command on an object will be processed immediately and any subsequent operations
-like GET, HEAD, POST, or DELETE on the object will display 404 (Not Found) error.
-</para>
- <itemizedlist>
- <listitem>
- <para>To delete an object, run the following command:
-</para>
- <para><programlisting>DELETE /&lt;apiversion&gt;/&lt;account&gt;/&lt;container&gt;/&lt;object&gt; HTTP/1.1
-Host: &lt;storage URL&gt;
-X-Auth-Token: &lt;Authentication-token-key&gt;</programlisting></para>
- <para>For example,
-</para>
- <para><programlisting>DELETE /v1/AUTH_test/pictures/cat HTTP/1.1
-Host: example.storage.com
-X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
-
-HTTP/1.1 204 No Content
-Date: Wed, 13 Jul 2011 20:52:21 GMT
-Server: Apache
-Content-Type: text/plain; charset=UTF-8</programlisting></para>
- <para>To delete an object using cURL (for the above example), run the following command:
-</para>
- <para><programlisting>curl -v -X DELETE -H &apos;X-Auth-Token:
-AUTH_tkde3ad38b087b49bbbac0494f7600a554&apos;
-https://example.storage.com:443/v1/AUTH_test/pictures/cat -k</programlisting></para>
- <para>The status code of 204 (No Content) indicates that you have successfully deleted the object. If that
-object does not exist, the status code 404 (Not Found) is displayed.
-</para>
- </listitem>
- </itemizedlist>
- </section>
- </section>
- </section>
-</chapter>
diff --git a/doc/admin-guide/en-US/admin_commandref.xml b/doc/admin-guide/en-US/admin_commandref.xml
deleted file mode 100644
index 05196b77326..00000000000
--- a/doc/admin-guide/en-US/admin_commandref.xml
+++ /dev/null
@@ -1,336 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "docbookV4.5/docbookx.dtd" []>
-<chapter id="chap-Administration_Guide-Com_Ref">
- <title>Command Reference </title>
- <para>This section describes the available commands and includes thefollowing section:
-</para>
- <itemizedlist>
- <listitem>
- <para>gluster Command
-</para>
- <para>Gluster Console Manager (command line interpreter)
-</para>
- </listitem>
- <listitem>
- <para>glusterd Daemon
-</para>
- <para>Gluster elastic volume management daemon
-</para>
- </listitem>
- </itemizedlist>
- <section>
- <title>gluster Command </title>
- <para><emphasis role="bold">NAME</emphasis>
-</para>
- <para>gluster - Gluster Console Manager (command line interpreter)
-</para>
- <para><emphasis role="bold">SYNOPSIS</emphasis>
-</para>
- <para>To run the program and display the gluster prompt:
-</para>
- <para><emphasis role="bold">gluster</emphasis>
-</para>
- <para>To specify a command directly:
-gluster [COMMANDS] [OPTIONS]
-</para>
- <para><emphasis role="bold">DESCRIPTION</emphasis>
-</para>
- <para>The Gluster Console Manager is a command line utility for elastic volume management. 'gluster' command enables administrators to perform cloud
-operations such as creating, expanding, shrinking, rebalancing, and migrating volumes without needing to schedule server downtime.
-</para>
- <para><emphasis role="bold">COMMANDS</emphasis>
-</para>
- <para><informaltable frame="none">
- <tgroup cols="3">
- <colspec colnum="1" colname="c0" colsep="0"/>
- <colspec colnum="2" colname="cgen1" colsep="0"/>
- <colspec colnum="3" colname="c1" colsep="0"/>
- <thead>
- <row>
- <entry>Command</entry>
- <entry namest="cgen1" nameend="c1">Description</entry>
- </row>
- </thead>
- <tbody>
- <row>
- <entry namest="c0" nameend="c1" align="left">
- <emphasis role="bold">Volume</emphasis>
- </entry>
- </row>
- <row>
- <entry>volume info [all | VOLNAME]</entry>
- <entry namest="cgen1" nameend="c1">Displays information about all volumes, or the specified volume.</entry>
- </row>
- <row>
- <entry>volume create NEW-VOLNAME [stripe COUNT] [replica COUNT] [transport tcp | rdma | tcp,rdma] NEW-BRICK ...</entry>
- <entry namest="cgen1" nameend="c1">Creates a new volume of the specified type using the specified bricks and transport type (the default transport type is tcp).
- NOTE: with 3.3.0 release, transport type 'rdma' and 'tcp,rdma' are not fully supported.</entry>
- </row>
- <row>
- <entry>volume delete VOLNAME</entry>
- <entry namest="cgen1" nameend="c1">Deletes the specified volume.</entry>
- </row>
- <row>
- <entry>volume start VOLNAME </entry>
- <entry namest="cgen1" nameend="c1">Starts the specified volume.</entry>
- </row>
- <row>
- <entry>volume stop VOLNAME [force] </entry>
- <entry namest="cgen1" nameend="c1">Stops the specified volume. </entry>
- </row>
- <row>
- <entry>volume help </entry>
- <entry namest="cgen1" nameend="c1">Displays help for the volume command.</entry>
- </row>
- <row>
- <entry namest="c0" nameend="c1" align="left">
- <emphasis role="bold">Brick</emphasis>
- </entry>
- </row>
- <row>
- <entry>volume add-brick VOLNAME [replica N] [stripe N] NEW-BRICK1 ... </entry>
- <entry namest="cgen1" nameend="c1">Adds the specified brick(s) to the given VOLUME. Using add-brick, users can increase the replica/stripe count of the volume, or increase the volume capacity by adding the brick(s) without changing volume type. </entry>
- </row>
- <row>
- <entry>volume replace-brick VOLNAME (BRICK NEW-BRICK) [start | start force | abort | status | commit | commit force] </entry>
- <entry namest="cgen1" nameend="c1">Used to replace BRICK with NEW-BRICK in a given VOLUME. After replace-brick is complete, the changes to get reflected in volume information, replace-brick 'commit' command is neccessary.</entry>
- </row>
- <row>
- <entry>volume remove-brick VOLNAME [replica N] BRICK1 ... [start | stop | status | commit | force ] </entry>
- <entry namest="cgen1" nameend="c1">Removes the specified brick(s) from the specified volume. 'remove-brick' command can be used to reduce the replica count of the volume when 'replica N' option is given. To ensure data migration from the removed brick to existing bricks, give 'start' sub-command at the end of the command. After the 'status' command says remove-brick operation is complete, user can 'commit' the changes to volume file. Using 'remove-brick' without 'start' option works similar to 'force' command, which makes the changes to volume configuration without migrating the data.</entry>
- </row>
- <row>
- <entry namest="c0" nameend="c1" align="left">
- <emphasis role="bold">Rebalance</emphasis>
- </entry>
- </row>
- <row>
- <entry>volume rebalance VOLNAME start</entry>
- <entry namest="cgen1" nameend="c1">Starts rebalancing of the data on specified volume.</entry>
- </row>
- <row>
- <entry>volume rebalance VOLNAME stop </entry>
- <entry namest="cgen1" nameend="c1">Stops rebalancing the specified volume. </entry>
- </row>
- <row>
- <entry>volume rebalance VOLNAME status </entry>
- <entry namest="cgen1" nameend="c1">Displays the rebalance status of the specified volume.</entry>
- </row>
- <row>
- <entry namest="c0" nameend="c1" align="left">
- <emphasis role="bold">Log</emphasis>
- </entry>
- </row>
- <row>
- <entry>volume log rotate VOLNAME [BRICK] </entry>
- <entry namest="cgen1" nameend="c1">Rotates the log file for corresponding volume/brick.</entry>
- </row>
-
- <row>
- <entry namest="c0" nameend="c1" align="left">
- <emphasis role="bold">Debugging</emphasis>
- </entry>
- </row>
- <row>
- <entry>volume top VOLNAME {[open|read|write|opendir|readdir [nfs]] |[read-perf|write-perf [nfs|{bs COUNT count COUNT}]]|[clear [nfs]]} [BRICK] [list-cnt COUNT]</entry>
- <entry namest="cgen1" nameend="c1">Shows the operation details on the volume depending on the arguments given. </entry>
- </row>
-
- <row>
- <entry>volume profile VOLNAME {start|info|stop} [nfs]</entry>
- <entry namest="cgen1" nameend="c1">Shows the file operation details on each bricks of the volume.</entry>
- </row>
- <row>
- <entry>volume status [all | VOLNAME] [nfs|shd|BRICK] [detail|clients|mem|inode|fd|callpool]</entry>
- <entry namest="cgen1" nameend="c1">Show details of activity, internal data of the processes (nfs/shd/BRICK) corresponding to one of the next argument given. If now argument is given, this command outputs bare minimum details of the current status (include PID of brick process etc) of volume's bricks.</entry>
- </row>
- <row>
- <entry>statedump VOLNAME [nfs] [all|mem|iobuf|callpool|priv|fd|inode|history]</entry>
- <entry namest="cgen1" nameend="c1">Command is used to take the statedump of the process, which is used captures most of the internal details. </entry>
- </row>
- <row>
- <entry namest="c0" nameend="c1" align="left">
- <emphasis role="bold">Peer</emphasis>
- </entry>
- </row>
- <row>
- <entry>peer probe HOSTNAME </entry>
- <entry namest="cgen1" nameend="c1">Probes the specified peer. </entry>
- </row>
- <row>
- <entry>peer detach HOSTNAME </entry>
- <entry namest="cgen1" nameend="c1">Detaches the specified peer. </entry>
- </row>
- <row>
- <entry>peer status </entry>
- <entry namest="cgen1" nameend="c1">Displays the status of peers. </entry>
- </row>
- <row>
- <entry>peer help </entry>
- <entry namest="cgen1" nameend="c1">Displays help for the peer command.</entry>
- </row>
- <row>
- <entry namest="c0" nameend="c1" align="left">
- <emphasis role="bold">Geo-replication</emphasis>
- </entry>
- </row>
- <row>
- <entry>volume geo-replication MASTER SLAVE start</entry>
- <entry namest="cgen1" nameend="c1">
- <para>Start geo-replication between the hosts specified by MASTER and SLAVE. You can specify a local master volume as :VOLNAME.</para>
- <para>You can specify a local slave volume as :VOLUME and a local slave directory as /DIRECTORY/SUB-DIRECTORY. You can specify a remote slave volume as DOMAIN::VOLNAME and a remote slave directory as DOMAIN:/DIRECTORY/SUB-DIRECTORY.</para>
- </entry>
- </row>
- <row>
- <entry>volume geo-replication MASTER SLAVE stop</entry>
- <entry namest="cgen1" nameend="c1">
- <para>Stop geo-replication between the hosts specified by MASTER and SLAVE. You can specify a local master volume as :VOLNAME and a local master directory as /DIRECTORY/SUB-DIRECTORY.</para>
- <para>You can specify a local slave volume as :VOLNAME and a local slave directory as /DIRECTORY/SUB-DIRECTORY. You can specify a remote slave volume as DOMAIN::VOLNAME and a remote slave directory as DOMAIN:/DIRECTORY/SUB-DIRECTORY.
-</para>
- </entry>
- </row>
- <row>
- <entry morerows="10">volume geo-replication MASTER SLAVE config [options]</entry>
- <entry>Configure geo-replication options between the hosts specified by MASTER and SLAVE. </entry>
- </row>
- <row>
- <entry>gluster-command COMMAND</entry>
- <entry>The path where the gluster command is installed.</entry>
- </row>
- <row>
- <entry>gluster-log-level LOGFILELEVEL</entry>
- <entry>The log level for gluster processes.</entry>
- </row>
- <row>
- <entry>log-file LOGFILE</entry>
- <entry>The path to the geo-replication log file.</entry>
- </row>
- <row>
- <entry>log-level LOGFILELEVEL</entry>
- <entry>The log level for geo-replication.</entry>
- </row>
- <row>
- <entry>remote-gsyncd COMMAND</entry>
- <entry>The path where the gsyncd binary is installed on the remote machine.</entry>
- </row>
- <row>
- <entry>ssh-command COMMAND</entry>
- <entry>The ssh command to use to connect to the remote machine (the default is ssh).</entry>
- </row>
- <row>
- <entry>rsync-command COMMAND</entry>
- <entry>The rsync command to use for synchronizing the files (the default is rsync).</entry>
- </row>
- <row>
- <entry>volume_id= UID</entry>
- <entry>The command to delete the existing master UID for the intermediate/slave node.</entry>
- </row>
- <row>
- <entry>timeout SECONDS</entry>
- <entry>The timeout period.</entry>
- </row>
- <row>
- <entry>sync-jobs N</entry>
- <entry>The number of simultaneous files/directories that can be synchronized.</entry>
- </row>
- <row>
- <entry>ignore-deletes</entry>
- <entry>If this option is set to 1, a file deleted on master will not trigger a delete operation on the slave. Hence, the slave will remain as a superset of the master and can be used to recover the master in case of crash and/or accidental delete.</entry>
- </row>
- <row>
- <entry namest="c0" nameend="c1" align="left">
- <emphasis role="bold">Other</emphasis>
- </entry>
- </row>
- <row>
- <entry>help</entry>
- <entry>Display the command options.</entry>
- </row>
- <row>
- <entry>quit</entry>
- <entry/>
- <entry>Exit the gluster command line interface.</entry>
- </row>
- </tbody>
- </tgroup>
- </informaltable></para>
- <para><emphasis role="bold">FILES</emphasis>
-
-</para>
- <para>/var/lib/glusterd/*
-</para>
- </section>
- <section>
- <title>glusterd Daemon </title>
- <para><emphasis role="bold">NAME</emphasis>
-</para>
- <para>glusterd - Gluster elastic volume management daemon</para>
- <para><emphasis role="bold">SYNOPSIS</emphasis>
-</para>
- <para>glusterd [OPTION...]
-</para>
- <para><emphasis role="bold">DESCRIPTION</emphasis>
-</para>
- <para>The glusterd daemon is used for elastic volume management. The daemon must be run on all export servers.
-</para>
- <para><emphasis role="bold">OPTIONS</emphasis>
-</para>
- <para><informaltable frame="none">
- <tgroup cols="2">
- <colspec colnum="1" colname="c0" colsep="0"/>
- <colspec colnum="2" colname="c1" colsep="0"/>
- <thead>
- <row>
- <entry>Option</entry>
- <entry>Description</entry>
- </row>
- </thead>
- <tbody>
- <row>
- <entry namest="c0" nameend="c1" align="left">
- <emphasis role="bold">Basic</emphasis>
- </entry>
- </row>
- <row>
- <entry>-l=LOGFILE, --log-file=LOGFILE</entry>
- <entry>Files to use for logging (the default is /usr/local/var/log/glusterfs/glusterfs.log).</entry>
- </row>
- <row>
- <entry>-L=LOGLEVEL, --log-level=LOGLEVEL</entry>
- <entry>Logging severity. Valid options are TRACE, DEBUG, INFO, WARNING, ERROR and CRITICAL (the default is INFO). </entry>
- </row>
- <row>
- <entry>--debug</entry>
- <entry>Runs the program in debug mode. This option sets --no-daemon, --log-level to DEBUG, and --log-file to console.</entry>
- </row>
- <row>
- <entry>-N, --no-daemon</entry>
- <entry>Runs the program in the foreground.</entry>
- </row>
- <row>
- <entry namest="c0" nameend="c1" align="left">
- <emphasis role="bold">Miscellaneous</emphasis>
- </entry>
- </row>
- <row>
- <entry>-?, --help</entry>
- <entry>Displays this help.</entry>
- </row>
- <row>
- <entry>--usage</entry>
- <entry>Displays a short usage message.</entry>
- </row>
- <row>
- <entry>-V, --version</entry>
- <entry>Prints the program version.</entry>
- </row>
- </tbody>
- </tgroup>
- </informaltable></para>
- <para><emphasis role="bold">FILES</emphasis>
-
-</para>
- <para>/var/lib/glusterd/*
-</para>
- </section>
-</chapter>
diff --git a/doc/admin-guide/en-US/admin_console.xml b/doc/admin-guide/en-US/admin_console.xml
deleted file mode 100644
index 74d12b965d9..00000000000
--- a/doc/admin-guide/en-US/admin_console.xml
+++ /dev/null
@@ -1,29 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "docbookV4.5/docbookx.dtd" []>
-<chapter>
- <title>Using the Gluster Console Manager – Command Line Utility</title>
- <para>The Gluster Console Manager is a single command line utility that simplifies configuration and management of your storage environment. The Gluster Console Manager is similar to the LVM (Logical Volume Manager) CLI or ZFS Command Line Interface, but it works in sync with multiple storage servers. You can use the Gluster Console Manager while volumes are mounted and active too. Gluster automatically synchronizes volume configuration information across all Gluster servers.</para>
- <para>Using the Gluster Console Manager, you can create new volumes, start volumes, and stop volumes, as required. You can also add bricks to volumes, remove bricks from existing volumes, as well as change volume settings (such as some translator specific options), among other operations.</para>
- <para>You can also use these CLI commands to create scripts for automation, as well as use the commands as an API to allow integration with third-party applications. </para>
- <para><emphasis role="bold">Running the Gluster Console Manager</emphasis></para>
- <para>You can run the Gluster Console Manager on any GlusterFS server either by invoking the commands or by running the Gluster CLI in interactive mode. You can also use the gluster command remotely using SSH. </para>
- <itemizedlist>
- <listitem>
- <para>To run commands directly: </para>
- <para><command> # gluster peer <replaceable>command</replaceable></command></para>
- <para>For example:</para>
- <para><command> # gluster peer status </command></para>
- </listitem>
- <listitem>
- <para>To run the Gluster Console Manager in interactive mode </para>
- <para><command># gluster</command></para>
- <para>You can execute gluster commands from the Console Manager prompt:</para>
- <para><command> gluster&gt; <replaceable>command</replaceable></command> </para>
- <para>For example, to view the status of the peer server:</para>
- <para># <command>gluster </command></para>
- <para><command>gluster &gt; peer status </command></para>
- <para>Display the status of the peer.</para>
- </listitem>
- </itemizedlist>
- <para> With any 'gluster' installation, to check all the supported CLI commands, use <command> 'gluster help' </command>.</para>
-</chapter>
diff --git a/doc/admin-guide/en-US/admin_directory_Quota.xml b/doc/admin-guide/en-US/admin_directory_Quota.xml
deleted file mode 100644
index 83c4ff451fb..00000000000
--- a/doc/admin-guide/en-US/admin_directory_Quota.xml
+++ /dev/null
@@ -1,180 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "docbookV4.5/docbookx.dtd" []>
-<chapter id="chap-Administration_Guide-Dir_Quota">
- <title>Managing Directory Quota </title>
- <para>Directory quotas in GlusterFS allow you to set limits on usage of disk space of a given directory.
-The storage administrators can control the disk space utilization at the directory level in GlusterFS by setting quota limits on the given directory. If
-admin sets the quota limit on the '/' of the volume, it can be treated as 'volume level quota'. GlusterFS's quota implementation can have different quota
-limit set on any directory and it can be nested. This is particularly useful in cloud deployments to facilitate utility billing model.
- </para>
- <para> <note>
- <para>For now, only Hard limit is supported. Here, the limit cannot be exceeded and attempts to use
-more disk space beyond the set limit will be denied.
-</para>
- </note></para>
- <para>System administrators can also monitor the resource utilization to limit the storage for the users
-depending on their role in the organization.
-</para>
- <para>You can set the quota at the following levels:
- </para>
- <itemizedlist>
- <listitem>
- <para>Directory level – limits the usage at the directory level
- </para>
- </listitem>
- <listitem>
- <para>Volume level – limits the usage at the volume level
- </para>
- </listitem>
- </itemizedlist>
- <note>
- <para>You can set the disk limit on the directory even if it is not created. The disk limit is enforced
-immediately after creating that directory. For more information on setting disk limit, see <xref linkend="chap-Administration_Guide-Dir_Quota-Set_Replace"/>.
-</para>
- </note>
- <section id="chap-Administration_Guide-Dir_Quota-Enable">
- <title>Enabling Quota </title>
- <para>You must enable Quota to set disk limits.
-</para>
- <para><emphasis role="bold">To enable quota</emphasis>
-</para>
- <itemizedlist>
- <listitem>
- <para>Enable the quota using the following command:
-</para>
- <para><command># gluster volume quota <replaceable>VOLNAME</replaceable> enable </command></para>
- <para>For example, to enable quota on test-volume:
-</para>
- <programlisting># gluster volume quota test-volume enable
-Quota is enabled on /test-volume</programlisting> <!-- why '/' here before test-volume? its volume name, not directory -->
- </listitem>
- </itemizedlist>
- </section>
- <section id="chap-Administration_Guide-Dir_Quota-Disable">
- <title>Disabling Quota </title>
- <para>You can disable Quota, if needed.
-</para>
- <para><emphasis role="bold">To disable quota:</emphasis>
-</para>
- <itemizedlist>
- <listitem>
- <para>Disable the quota using the following command:
-</para>
- <para><command># gluster volume quota <replaceable>VOLNAME</replaceable> disable </command></para>
- <para>For example, to disable quota on test-volume:
-</para>
- <programlisting># gluster volume quota test-volume disable
-Quota translator is disabled on /test-volume</programlisting> <!-- why '/' here before test-volume? its volume name, not directory -->
- </listitem>
- </itemizedlist>
- </section>
- <section id="chap-Administration_Guide-Dir_Quota-Set_Replace">
- <title>Setting or Replacing Disk Limit </title>
- <para>You can create new directories in your storage environment and set the disk limit or set disk limit for
-the existing directories. The directory name should be relative to the volume with the export
-directory/mount being treated as &quot;/&quot;.
-</para>
- <para><emphasis role="bold">To set or replace disk limit</emphasis>
-</para>
- <itemizedlist>
- <listitem>
- <para>Set the disk limit using the following command:
-</para>
- <para><command># gluster volume quota <replaceable>VOLNAME</replaceable> limit-usage /<replaceable>directory</replaceable><replaceable>limit-value</replaceable></command></para>
- <para>For example, to set limit on data directory on test-volume where data is a directory under the
-export directory:
-</para>
- <programlisting># gluster volume quota test-volume limit-usage /data 10GB
-Usage limit has been set on /data</programlisting>
- <para><note>
- <para>In a multi-level directory hierarchy, the minimum of disk limit in entire hierarchy will be considered for enforcement.
-</para>
- </note></para>
- </listitem>
- </itemizedlist>
- </section>
- <section id="chap-Administration_Guide-Dir_Quota-Display">
- <title>Displaying Disk Limit Information </title>
- <para>You can display disk limit information on all the directories on which the limit is set.
-</para>
- <para><emphasis role="bold">To display disk limit information</emphasis>
-</para>
- <itemizedlist>
- <listitem>
- <para>Display disk limit information of all the directories on which limit is set, using the following
-command:
-</para>
- <para><command># gluster volume quota <replaceable>VOLNAME</replaceable> list</command>
-</para>
- <para>For example, to see the set disks limit on test-volume:
-</para>
- <programlisting># gluster volume quota test-volume list
-
-<emphasis role="underline">
- <emphasis role="underline"><emphasis role="underline">Path</emphasis>__________Limit______Set Size</emphasis>
- </emphasis>
-/Test/data 10 GB 6 GB
-/Test/data1 10 GB 4 GB</programlisting>
-<para> NOTE that, the directory listed here is not absolute directory name, but relative path to the volume's root ('/'). For example, if 'test-volume' is mounted on '/mnt/glusterfs', then for the above example, '/Test/data' means, '/mnt/glusterfs/Test/data' </para>
- </listitem>
- <listitem>
- <para>Display disk limit information on a particular directory on which limit is set, using the following
-command:
-</para>
- <para><command># gluster volume quota <replaceable>VOLNAME</replaceable> list <replaceable>/directory name</replaceable></command>
-</para>
- <para>For example, to see the set limit on /data directory of test-volume:</para>
- <programlisting># gluster volume quota test-volume list /data
-
-<emphasis role="underline"><emphasis role="underline">Path</emphasis>__________Limit______Set Size</emphasis>
-/Test/data 10 GB 6 GB</programlisting>
- </listitem>
- </itemizedlist>
- </section>
- <section id="chap-Administration_Guide-Dir_Quota-Update">
- <title> Updating Memory Cache Size </title>
- <para>For performance reasons, quota caches the directory sizes on client. You can set timeout indicating
-the maximum valid duration of directory sizes in cache, from the time they are populated.
-</para>
- <para>For example: If there are multiple clients writing to a single directory, there are chances that some
-other client might write till the quota limit is exceeded. However, this new file-size may not get
-reflected in the client till size entry in cache has become stale because of timeout. If writes happen
-on this client during this duration, they are allowed even though they would lead to exceeding of
-quota-limits, since size in cache is not in sync with the actual size. When timeout happens, the size
-in cache is updated from servers and will be in sync and no further writes will be allowed. A timeout
-of zero will force fetching of directory sizes from server for every operation that modifies file data
-and will effectively disables directory size caching on client side.
-</para>
- <para><emphasis role="bold">To update the memory cache size</emphasis>
-</para>
- <itemizedlist>
- <listitem>
- <para>Update the memory cache size using the following command:
-</para>
- <para><command># gluster volume set <replaceable>VOLNAME</replaceable> features.quota-timeout<replaceable> value</replaceable></command></para>
- <para>For example, to update the memory cache size for every 5 seconds on test-volume:
-</para>
- <programlisting># gluster volume set test-volume features.quota-timeout 5
-Set volume successful</programlisting>
- </listitem>
- </itemizedlist>
- </section>
- <section id="chap-Administration_Guide-Dir_Quota-Remove">
- <title>Removing Disk Limit </title>
- <para>You can remove set disk limit, if you do not want quota anymore.
-</para>
- <para><emphasis role="bold">To remove disk limit </emphasis></para>
- <itemizedlist>
- <listitem>
- <para>Remove disk limit set on a particular directory using the following command:
-</para>
- <para><command># gluster volume quota <replaceable>VOLNAME</replaceable> remove <replaceable>/directory name</replaceable></command>
-</para>
- <para>For example, to remove the disk limit on /data directory of test-volume:
-</para>
- <programlisting># gluster volume quota test-volume remove /data
-Usage limit set on /data is removed</programlisting>
- </listitem>
- </itemizedlist>
- </section>
-</chapter>
diff --git a/doc/admin-guide/en-US/admin_geo-replication.xml b/doc/admin-guide/en-US/admin_geo-replication.xml
deleted file mode 100644
index 4691116acb8..00000000000
--- a/doc/admin-guide/en-US/admin_geo-replication.xml
+++ /dev/null
@@ -1,730 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "docbookV4.5/docbookx.dtd" []>
-<chapter id="chap-Administration_Guide-Geo_Rep">
- <title>Managing Geo-replication</title>
- <para>Geo-replication provides a continuous, asynchronous, and incremental replication service from one site to another over Local Area Networks (LANs), Wide Area Network (WANs), and across the Internet. </para>
- <para>Geo-replication uses a master–slave model, whereby replication and mirroring occurs between the following partners:</para>
- <itemizedlist>
- <listitem>
- <para>Master – a GlusterFS volume </para>
- </listitem>
- <listitem>
- <para>Slave – a slave which can be of the following types: </para>
- <itemizedlist>
- <listitem>
- <para>A local directory which can be represented as file URL like <filename>file:///path/to/dir</filename>. You can use shortened form, for example, <filename> /path/to/dir</filename>.</para>
- </listitem>
- <listitem>
- <para>A GlusterFS Volume - Slave volume can be either a local volume like <filename>gluster://localhost:volname</filename> (shortened form - <filename>:volname</filename>) or a volume served by different host like <filename>gluster://host:volname</filename> (shortened form - <filename>host:volname</filename>).</para>
- </listitem>
- </itemizedlist>
- <note>
- <para> Both of the above types can be accessed remotely using SSH tunnel. To use SSH, add an SSH prefix to either a file URL or gluster type URL. For example, <literal> ssh://root@remote-host:/path/to/dir</literal> (shortened form - <literal>root@remote-host:/path/to/dir</literal>) or <literal>ssh://root@remote-host:gluster://localhost:volname</literal> (shortened from - <literal>root@remote-host::volname</literal>). </para>
- </note>
- </listitem>
- </itemizedlist>
- <para>This section introduces Geo-replication, illustrates the various deployment scenarios, and explains how to configure the system to provide replication and mirroring in your environment. </para>
- <section id="chap-Administration_Guide-Geo_Rep-Replicated_volumes">
- <title>Replicated Volumes vs Geo-replication</title>
- <para>The following table lists the difference between replicated volumes and geo-replication:</para>
- <informaltable frame="all">
- <tgroup cols="2">
- <colspec colname="c1"/>
- <colspec colname="c2"/>
- <thead>
- <row>
- <entry>Replicated Volumes</entry>
- <entry>Geo-replication</entry>
- </row>
- </thead>
- <tbody>
- <row>
- <entry>Mirrors data across nodes in a cluster</entry>
- <entry>Mirrors data across geographically distributed clusters </entry>
- </row>
- <row>
- <entry>Provides high-availability</entry>
- <entry>Ensures backing up of data for disaster recovery</entry>
- </row>
- <row>
- <entry>Synchronous replication (each and every file modify operation is sent across all the bricks)</entry>
- <entry>Asynchronous replication (checks for the changes in files periodically and syncs them on detecting differences) </entry>
- </row>
- </tbody>
- </tgroup>
- </informaltable>
- </section>
- <section id="chap-Administration_Guide-Geo_Rep-Preparation">
- <title>Preparing to Deploy Geo-replication</title>
- <para>This section provides an overview of the Geo-replication deployment scenarios, describes how you can check the minimum system requirements, and explores common deployment scenarios.</para>
- <itemizedlist>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Geo_Rep-Preparation-Deployment_options"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Geo_Rep-Preparation-Deployment_Overview"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Geo_Rep-Preparation-Minimum_Reqs"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Geo_Rep-Preparation-Settingup_Environment"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Geo_Rep-Preparation-Settingup_Slave"/></para>
- </listitem>
- </itemizedlist>
- <section id="chap-Administration_Guide-Geo_Rep-Preparation-Deployment_options">
- <title>Exploring Geo-replication Deployment Scenarios</title>
- <para>Geo-replication provides an incremental replication service over Local Area Networks (LANs), Wide Area Network (WANs), and across the Internet. This section illustrates the most common deployment scenarios for Geo-replication, including the following: </para>
- <itemizedlist>
- <listitem>
- <para>Geo-replication over LAN
-</para>
- </listitem>
- <listitem>
- <para>Geo-replication over WAN
-</para>
- </listitem>
- <listitem>
- <para>Geo-replication over the Internet</para>
- </listitem>
- <listitem>
- <para>Multi-site cascading Geo-replication</para>
- </listitem>
- </itemizedlist>
- <para><emphasis role="bold">Geo-replication over LAN</emphasis></para>
- <para>You can configure Geo-replication to mirror data over a Local Area Network. </para>
- <mediaobject>
- <textobject>
- <phrase>Geo-replication over LAN</phrase>
- </textobject>
- <imageobject>
- <imagedata fileref="images/Geo-Rep_LAN.png"/>
- </imageobject>
- </mediaobject>
- <para><emphasis role="bold">Geo-replication over WAN</emphasis></para>
- <para>You can configure Geo-replication to replicate data over a Wide Area Network.</para>
- <mediaobject>
- <textobject>
- <phrase>
- <phrase>Geo-replication over WAN</phrase>
- </phrase>
- </textobject>
- <imageobject>
- <imagedata fileref="images/Geo-Rep_WAN.png"/>
- </imageobject>
- </mediaobject>
- <para><emphasis role="bold">Geo-replication over Internet</emphasis></para>
- <para>You can configure Geo-replication to mirror data over the Internet.</para>
- <mediaobject>
- <textobject>
- <phrase>
- <phrase>Geo-replication over Internet</phrase>
- </phrase>
- </textobject>
- <imageobject>
- <imagedata fileref="images/Geo-Rep03_Internet.png"/>
- </imageobject>
- </mediaobject>
- <para><emphasis role="bold">Multi-site cascading Geo-replication</emphasis> </para>
- <para>You can configure Geo-replication to mirror data in a cascading fashion across multiple sites. </para>
- <mediaobject>
- <textobject>
- <phrase>
- <phrase>Multi-site cascading Geo-replication </phrase>
- </phrase>
- </textobject>
- <imageobject>
- <imagedata fileref="images/Geo-Rep04_Cascading.png"/>
- </imageobject>
- </mediaobject>
- </section>
- <section id="chap-Administration_Guide-Geo_Rep-Preparation-Deployment_Overview">
- <title>Geo-replication Deployment Overview</title>
- <para>Deploying Geo-replication involves the following steps:</para>
- <orderedlist>
- <listitem>
- <para>Verify that your environment matches the minimum system requirements. For more information, see <xref linkend="chap-Administration_Guide-Geo_Rep-Preparation-Minimum_Reqs"/>.</para>
- </listitem>
- <listitem>
- <para>Determine the appropriate deployment scenario. For more information, see <xref linkend="chap-Administration_Guide-Geo_Rep-Preparation-Deployment_options"/>.</para>
- </listitem>
- <listitem>
- <para>Start Geo-replication on master and slave systems, as required. For more information, see <xref linkend="chap-Administration_Guide-Geo_Rep-Starting"/>.</para>
- </listitem>
- </orderedlist>
- </section>
- <section id="chap-Administration_Guide-Geo_Rep-Preparation-Minimum_Reqs">
- <title>Checking Geo-replication Minimum Requirements</title>
- <para condition="gfs">Before deploying GlusterFS Geo-replication, verify that your systems match the minimum requirements. </para>
- <para condition="gfs">The following table outlines the minimum requirements for both master and slave nodes within your environment:</para>
- <informaltable frame="all" condition="gfs">
- <tgroup cols="3">
- <colspec colname="c1"/>
- <colspec colname="c2"/>
- <colspec colname="c3"/>
- <thead>
- <row>
- <entry>Component</entry>
- <entry>Master</entry>
- <entry>Slave</entry>
- </row>
- </thead>
- <tbody>
- <row>
- <entry>Operating System </entry>
- <entry>GNU/Linux</entry>
- <entry>GNU/Linux</entry>
- </row>
- <row>
- <entry>Filesystem</entry>
- <entry>GlusterFS 3.2 or higher</entry>
- <entry>GlusterFS 3.2 or higher, ext3, ext4, or XFS (any other POSIX compliant file system would work, but has not been tested extensively) </entry>
- </row>
- <row>
- <entry>Python </entry>
- <entry>Python 2.4 (with ctypes external module), or Python 2.5 (or higher)</entry>
- <entry>Python 2.4 (with ctypes external module), or Python 2.5 (or higher)</entry>
- </row>
- <row>
- <entry>Secure shell </entry>
- <entry>OpenSSH version 4.0 (or higher)</entry>
- <entry>SSH2-compliant daemon </entry>
- </row>
- <row>
- <entry>Remote synchronization</entry>
- <entry>rsync 3.0.0 or higher </entry>
- <entry>rsync 3.0.0 or higher </entry>
- </row>
- <row>
- <entry>FUSE </entry>
- <entry>GlusterFS supported versions </entry>
- <entry>GlusterFS supported versions </entry>
- </row>
- </tbody>
- </tgroup>
- </informaltable>
- </section>
- <section id="chap-Administration_Guide-Geo_Rep-Preparation-Settingup_Environment">
- <title>Setting Up the Environment for Geo-replication</title>
- <para><emphasis role="bold">Time Synchronization</emphasis> </para>
- <itemizedlist>
- <listitem>
- <para>All servers that are part of a geo-replication master volume need to have their clocks in sync. You are recommended to set up NTP (Network Time Protocol) daemon service to keep the clocks in sync.</para>
- <para>For example: In a Replicated volume where brick1 of the master is at 12.20 hrs and brick 2 of the master is at 12.10 hrs with 10 minutes time lag, all the changes in brick2 between this period may go unnoticed during synchronization of files with Slave.</para>
- <para>For more information on setting up NTP daemon, see <ulink url="http://docs.redhat.com/docs/en-US/Red_Hat_Enterprise_Linux/6/html/Migration_Planning_Guide/ch04s07.html"/>.</para>
- </listitem>
- </itemizedlist>
- <para><emphasis role="bold">To setup Geo-replication for SSH </emphasis></para>
- <para>Password-less login has to be set up between the host machine (where geo-replication start command will be issued) and the remote machine (where slave process should be launched through SSH).</para>
- <orderedlist>
- <listitem>
- <para>On the node where geo-replication start commands are to be issued, run the following command:</para>
- <para><command># ssh-keygen -f /var/lib/glusterd/geo-replication/secret.pem</command>
-</para>
- <para>Press Enter twice to avoid passphrase.
-</para>
- </listitem>
- <listitem>
- <para>Run the following command on master for all the slave hosts: </para>
- <para><command># ssh-copy-id -i /var/lib/glusterd/geo-replication/secret.pem.pub <varname>user</varname>@<varname>slavehost</varname></command></para>
- </listitem>
- </orderedlist>
- </section>
- <section id="chap-Administration_Guide-Geo_Rep-Preparation-Settingup_Slave">
- <title>Setting Up the Environment for a Secure Geo-replication Slave</title>
- <para>You can configure a secure slave using SSH so that master is granted
-restricted access. With GlusterFS 3.3, you need not specify slave
-configuration parameters on the master-side. For example, the master does not require the location of
-the rsync program on slave but the slave must ensure that rsync is in
-the PATH of the user which the master connects using SSH. The only
-information that master and slave have to negotiate are the slave-side
-user account, slave&apos;s resources and
-the master&apos;s public key. Secure access to the slave can be established
-using the following options:</para>
- <itemizedlist>
- <listitem>
- <para>Restricting Remote Command Execution</para>
- </listitem>
- <listitem>
- <para>Using <filename>Mountbroker</filename> for Slaves</para>
- </listitem>
- <listitem>
- <para>Using IP based Access Control</para>
- </listitem>
- </itemizedlist>
- <para><emphasis role="bold">Backward Compatibility</emphasis> </para>
- <para>Your existing Geo-replication environment will work with GlusterFS
- 3.3, except for the following:</para>
- <itemizedlist>
- <listitem>
- <para>The process of secure reconfiguration affects only the GlusterFS
-instance on slave. The changes are transparent to master with the
-exception that you may have to change the SSH target to an unprivileged
- account on the slave.</para>
- </listitem>
- <listitem>
- <para>The following are some exceptions where backward compatibility cannot be provided:</para>
- <para><itemizedlist>
- <listitem>
- <para>Geo-replication URLs which specify the slave resource include the following special characters: space, *, ?, [;</para>
- </listitem>
- <listitem>
- <para>Slave does not have glusterd running.</para>
- </listitem>
- </itemizedlist></para>
- </listitem>
- </itemizedlist>
- <section>
- <title>Restricting Remote Command Execution</title>
- <para>If you restrict remote command execution, then the slave audits commands
-coming from the master and only the pre-configured commands are allowed. The slave also provides access only
-to the files which are pre-configured to be read or manipulated by the master.</para>
- <para>To restrict remote command execution:</para>
- <orderedlist>
- <listitem>
- <para>Identify the location of the gsyncd helper utility on Slave. This utility is installed in <filename>PREFIX/libexec/glusterfs/gsyncd</filename>, where PREFIX is a compile-time parameter of glusterfs. For example, <filename>--prefix=PREFIX</filename> to the configure script with the following common values<filename> /usr, /usr/local, and /opt/glusterfs/glusterfs_version</filename>.</para>
- </listitem>
- <listitem>
- <para>Ensure that command invoked from master to slave is passed through the slave&apos;s gsyncd utility. </para>
- <para>You can use either of the following two options:</para>
- <itemizedlist>
- <listitem>
- <para>Set gsyncd with an absolute path as the shell for the account
-which the master connects through SSH. If you need to use a privileged
-account, then set it up by creating a new user with UID 0. </para>
- </listitem>
- <listitem>
- <para>Setup key authentication with command enforcement to gsyncd. You must prefix the copy of master&apos;s public key in the Slave account&apos;s <filename>authorized_keys</filename> file with the following command:</para>
- <para><filename>command=&lt;path to gsyncd&gt;</filename>. </para>
- <para>For example, <command>command=&quot;PREFIX/glusterfs/gsyncd&quot; ssh-rsa AAAAB3Nza....</command></para>
- </listitem>
- </itemizedlist>
- </listitem>
- </orderedlist>
- </section>
- <section>
- <title>Using Mountbroker for Slaves </title>
- <para><filename>mountbroker</filename> is a new service of glusterd. This service allows an
-unprivileged process to own a GlusterFS mount. This is accomplished by registering a label
-(and DSL (Domain-specific language) options ) with glusterd through the
-glusterd volfile. Using CLI, you can send a mount request to glusterd and
-receive an alias (symlink) to the mounted volume.</para>
- <para>The unprivileged process/agent uses the
-mountbroker service of glusterd to set up an auxiliary gluster mount. The mount
-is setup so as to allow only that agent to provide administrative
-level access to the particular volume.</para>
- <para><emphasis role="bold">To setup an auxiliary gluster mount for the agent</emphasis>:</para>
- <orderedlist>
- <listitem>
- <para>Create a new group. For example, <filename>geogroup</filename>.</para>
- </listitem>
- <listitem>
- <para>Create a unprivileged account. For example, <filename> geoaccount</filename>. Make it a member of <filename> geogroup</filename>.</para>
- </listitem>
- <listitem>
- <para>Create a new directory as superuser to be used as mountbroker's root. </para>
- </listitem>
- <listitem>
- <para> Change the permission of the directory to <emphasis role="italic">0711.</emphasis> </para>
- </listitem>
- <listitem>
- <para>Add the following options to the glusterd volfile, located at /etc/glusterfs/glusterd.vol, assuming the name of the slave gluster volume as <filename>slavevol</filename>:</para>
- <para><command>option mountbroker-root /var/mountbroker-root </command></para>
- <para><command>option mountbroker-geo-replication.geoaccount slavevol</command></para>
- <para><command>option geo-replication-log-group geogroup</command></para>
- <para>A sample glusterd volfile along with default options:</para>
- <para><screen>volume management
- type mgmt/glusterd
- option working-directory /etc/glusterd
- option transport-type socket,rdma
- option transport.socket.keepalive-time 10
- option transport.socket.keepalive-interval 2
- option transport.socket.read-fail-log off
-
- option mountbroker-root /var/mountbroker-root
- option mountbroker-geo-replication.geoaccount slavevol
- option geo-replication-log-group geogroup
-end-volume</screen></para>
- <para>If you host multiple slave volumes, you can repeat step 2. for each of the slave volumes and add the following options to the <filename>volfile</filename>:</para>
- <para><screen>option mountbroker-geo-replication.geoaccount2 slavevol2
-option mountbroker-geo-replication.geoaccount3 slavevol3</screen></para>
- </listitem>
- <listitem>
- <para>Setup Master to access Slave as <filename>geoaccount@Slave</filename>.</para>
- <para>You can add multiple slave volumes within the same account (geoaccount) by providing comma-separated list of slave
- volumes (without spaces) as the argument of <command>mountbroker-geo-replication.geogroup</command>. You can also have multiple options of the form <command>mountbroker-geo-replication.*</command>. It is recommended to use one service account per Master machine. For example, if there are multiple slave volumes on Slave for the master machines Master1, Master2, and Master3, then create a dedicated service user on Slave for them by repeating Step 2. for each (like geogroup1, geogroup2, and geogroup3), and then add the following corresponding options to the volfile:
-</para>
- <para><command>option mountbroker-geo-replication.geoaccount1 slavevol11,slavevol12,slavevol13</command></para>
- <para><command>option mountbroker-geo-replication.geoaccount2 slavevol21,slavevol22</command></para>
- <para><command>option mountbroker-geo-replication.geoaccount3 slavevol31</command></para>
- <para>
-Now set up Master1 to ssh to geoaccount1@Slave, etc.
-</para>
- <para>You must restart glusterd to make the configuration changes effective. </para>
- </listitem>
- </orderedlist>
- </section>
- <section>
- <title>Using IP based Access Control</title>
- <para>You can provide access control for the slave resources using IP
- addresses. You can use method for both Gluster volume and and
- file tree slaves, but in this section, we are focusing on file tree slaves.</para>
- <para>To set IP address based access control for file tree slaves:</para>
- <orderedlist>
- <listitem>
- <para>Set a general restriction for accessibility of file tree resources:
-</para>
- <para><command># gluster volume geo-replication &apos;/*&apos; config allow-network ::1,127.0.0.1 </command></para>
- <para>This will refuse all requests for spawning slave agents except for
-requests initiated locally.</para>
- </listitem>
- <listitem>
- <para>If you want the to lease file tree at <filename>/data/slave-tree</filename> to Master, enter the following command:</para>
- <para><command># gluster volume geo-replication<varname> /data/slave-tree </varname>config allow-network <varname>MasterIP</varname></command></para>
- <para><varname>MasterIP</varname> is the IP address of Master. The slave agent spawn request from
-master will be accepted if it is executed at <filename>/data/slave-tree</filename>.</para>
- </listitem>
- </orderedlist>
- <para>If the Master side network configuration does not enable the Slave to
-recognize the exact IP address of Master, you can use CIDR notation to
-specify a subnet instead of a single IP address as MasterIP or even
-comma-separated lists of CIDR subnets.</para>
- <para>If you want to extend IP based access control to gluster slaves, use the following command:</para>
- <para><command># gluster volume geo-replication &apos;*&apos; config allow-network ::1,127.0.0.1</command></para>
- </section>
- </section>
- </section>
- <section id="chap-Administration_Guide-Geo_Rep-Starting">
- <title>Starting Geo-replication</title>
- <para>This section describes how to configure and start Gluster Geo-replication in your storage environment, and verify that it is functioning correctly. </para>
- <itemizedlist>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Geo_Rep-Starting-Start"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Geo_Rep-Starting-Verify"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Geo_Rep-Starting-Display"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Geo_Rep-Starting-Configure"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Geo_Rep-Starting-Stop"/></para>
- </listitem>
- </itemizedlist>
- <section id="chap-Administration_Guide-Geo_Rep-Starting-Start">
- <title>Starting Geo-replication</title>
- <para>To start Gluster Geo-replication </para>
- <itemizedlist>
- <listitem>
- <para>Start geo-replication between the hosts using the following command:
- </para>
- <para><command># gluster volume geo-replication <replaceable>MASTER SLAVE</replaceable> start</command>
-</para>
- <para>For example:
-</para>
- <para><programlisting># gluster volume geo-replication Volume1 example.com:/data/remote_dir start
-Starting geo-replication session between Volume1
-example.com:/data/remote_dir has been successful</programlisting></para>
- <para><note>
- <para>You may need to configure the Geo-replication service before
- starting it. For more information, see <xref linkend="chap-Administration_Guide-Geo_Rep-Starting-Configure"/>.</para>
- </note></para>
- </listitem>
- </itemizedlist>
- </section>
- <section id="chap-Administration_Guide-Geo_Rep-Starting-Verify">
- <title>Verifying Successful Deployment</title>
- <para>You can use the gluster command to verify the status of Gluster Geo-replication in your environment.</para>
- <para><emphasis role="bold">To verify the status Gluster Geo-replication</emphasis></para>
- <itemizedlist>
- <listitem>
- <para>Verify the status by issuing the following command on host:</para>
- <para><command># gluster volume geo-replication <replaceable>MASTER SLAVE</replaceable> status</command>
-</para>
- <para>For example:
-</para>
- <para><command># gluster volume geo-replication Volume1 example.com:/data/remote_dir status</command>
-</para>
- <para><programlisting># gluster volume geo-replication Volume1 example.com:/data/remote_dir status
-
-MASTER SLAVE STATUS
-______ ______________________________ ____________
-Volume1 root@example.com:/data/remote_dir Starting....</programlisting>
-</para>
- </listitem>
- </itemizedlist>
- </section>
- <section id="chap-Administration_Guide-Geo_Rep-Starting-Display">
- <title>Displaying Geo-replication Status Information</title>
- <para>You can display status information about a specific geo-replication master session, or a particular master-slave session, or all geo-replication sessions, as needed.</para>
- <para><emphasis role="bold">To display geo-replication status information</emphasis></para>
- <itemizedlist>
- <listitem>
- <para>Display information of all geo-replication sessions using the following command:</para>
- <para><programlisting># gluster volume geo-replication Volume1 example.com:/data/remote_dir status
-
-MASTER SLAVE STATUS
-______ ______________________________ ____________
-Volume1 root@example.com:/data/remote_dir Starting....</programlisting></para>
- </listitem>
- </itemizedlist>
- <itemizedlist>
- <listitem>
- <para>Display information of a particular master slave session using the following command:
-</para>
- <para><command># gluster volume geo-replication <replaceable>MASTER SLAVE</replaceable> status</command>
-</para>
- <para>For example, to display information of Volume1 and example.com:/data/remote_dir
-</para>
- <para><command># gluster volume geo-replication Volume1 example.com:/data/remote_dir status</command>
-</para>
- <para>The status of the geo-replication between Volume1 and example.com:/data/remote_dir is displayed.</para>
- </listitem>
- <listitem>
- <para>Display information of all geo-replication sessions belonging to a master</para>
- <para><command># gluster volume geo-replication MASTER status</command>
-</para>
- <para>For example, to display information of Volume1</para>
- <para><programlisting># gluster volume geo-replication Volume1 example.com:/data/remote_dir status
-
-MASTER SLAVE STATUS
-______ ______________________________ ____________
-Volume1 ssh://example.com:gluster://127.0.0.1:remove_volume OK
-
-Volume1 ssh://example.com:file:///data/remote_dir OK</programlisting></para>
- <para>The status of a session could be one of the following four:</para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">Starting</emphasis>: This is the initial phase of the Geo-replication session; it remains in this state for a minute, to make sure no abnormalities are present.</para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">OK</emphasis>: The geo-replication session is in a stable state.</para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">Faulty</emphasis>: The geo-replication session has witnessed some abnormality and the situation has to be investigated further. For further information, see <xref linkend="chap-Administration_Guide-Troubleshooting"/> section.</para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">Corrupt</emphasis>: The monitor thread which is monitoring the geo-replication session has died. This situation should not occur normally, if it persists contact Red Hat Support<ulink url="www.redhat.com/support/"/>.</para>
- </listitem>
- </itemizedlist>
- </section>
- <section id="chap-Administration_Guide-Geo_Rep-Starting-Configure">
- <title>Configuring Geo-replication</title>
- <para>To configure Gluster Geo-replication </para>
- <itemizedlist>
- <listitem>
- <para>Use the following command at the Gluster command line:
-</para>
- <para><command># gluster volume geo-replication <replaceable>MASTER SLAVE</replaceable> config [options]</command>
-</para>
- <para>For more information about the options, see <xref linkend="chap-Administration_Guide-Com_Ref"/>.
-</para>
- <para>For example:
-</para>
- <para>To view list of all option/value pair, use the following command:
-</para>
- <para><command># gluster volume geo-replication Volume1 example.com:/data/remote_dir config</command>
-</para>
- </listitem>
- </itemizedlist>
- </section>
- <section id="chap-Administration_Guide-Geo_Rep-Starting-Stop">
- <title>Stopping Geo-replication</title>
- <para>You can use the gluster command to stop Gluster Geo-replication (syncing of data from Master to Slave) in your environment. </para>
- <para><emphasis role="bold">To stop Gluster Geo-replication</emphasis> </para>
- <itemizedlist>
- <listitem>
- <para>Stop geo-replication between the hosts using the following command:
-</para>
- <para><command># gluster volume geo-replication <replaceable>MASTER SLAVE</replaceable> stop </command></para>
- <para>For example:
-</para>
- <para><programlisting># gluster volume geo-replication Volume1 example.com:/data/remote_dir stop
-Stopping geo-replication session between Volume1 and
-example.com:/data/remote_dir has been successful</programlisting></para>
- <para>See <xref linkend="chap-Administration_Guide-Com_Ref"/> for more information about the gluster command.
-</para>
- </listitem>
- </itemizedlist>
- </section>
- </section>
- <section id="chap-Administration_Guide-Geo_Rep-Restoring_Data">
- <title>Restoring Data from the Slave</title>
- <para>You can restore data from the slave to the master volume, whenever the master volume becomes faulty for reasons like hardware failure.
-</para>
- <para>The example in this section assumes that you are using the Master Volume (Volume1) with the following configuration:
-</para>
- <para><programlisting>machine1# gluster volume info
-Type: Distribute
-Status: Started
-Number of Bricks: 2
-Transport-type: tcp
-Bricks:
-Brick1: machine1:/export/dir16
-Brick2: machine2:/export/dir16
-Options Reconfigured:
-geo-replication.indexing: on</programlisting></para>
- <para>The data is syncing from master volume (Volume1) to slave directory (example.com:/data/remote_dir). To view the status of this geo-replication session run the following command on Master: </para>
- <programlisting># gluster volume geo-replication Volume1 root@example.com:/data/remote_dir status
-
-MASTER SLAVE STATUS
-______ ______________________________ ____________
-Volume1 root@example.com:/data/remote_dir OK</programlisting>
- <para><emphasis role="bold">Before Failure</emphasis>
-</para>
- <para>Assume that the Master volume had 100 files and was mounted at /mnt/gluster on one of the client machines (client). Run the following command on Client machine to view the list of files:
-</para>
- <para><programlisting>client# ls /mnt/gluster | wc –l
-100</programlisting></para>
- <para>The slave directory (example.com) will have same data as in the master volume and same can be viewed by running the following command on slave:
-</para>
- <para><programlisting>example.com# ls /data/remote_dir/ | wc –l
-100</programlisting></para>
- <para><emphasis role="bold">After Failure</emphasis>
-</para>
- <para>If one of the bricks (machine2) fails, then the status of Geo-replication session is changed from &quot;OK&quot; to &quot;Faulty&quot;. To view the status of this geo-replication session run the following command on Master:
-</para>
- <programlisting># gluster volume geo-replication Volume1 root@example.com:/data/remote_dir status
-
-MASTER SLAVE STATUS
-______ ______________________________ ____________
-Volume1 root@example.com:/data/remote_dir Faulty</programlisting>
- <para>Machine2 is failed and now you can see discrepancy in number of files between master and slave. Few files will be missing from the master volume but they will be available only on slave as shown below.
-</para>
- <para>Run the following command on Client:
- </para>
- <para><programlisting>client # ls /mnt/gluster | wc –l
-52</programlisting></para>
- <para>Run the following command on slave (example.com):
-</para>
- <para><programlisting>Example.com# # ls /data/remote_dir/ | wc –l
-100</programlisting></para>
- <para><emphasis role="bold">To restore data from the slave machine</emphasis></para>
- <orderedlist>
- <listitem>
- <para>Stop all Master&apos;s geo-replication sessions using the following command:
-</para>
- <para><command># gluster volume geo-replication <replaceable>MASTER SLAVE</replaceable> stop</command>
-</para>
- <para>For example:
-</para>
- <para><programlisting>machine1# gluster volume geo-replication Volume1
-example.com:/data/remote_dir stop
-
-Stopping geo-replication session between Volume1 &amp;
-example.com:/data/remote_dir has been successful</programlisting></para>
- <para><note>
- <para>Repeat <command># gluster volume geo-replication <replaceable>MASTER SLAVE</replaceable> stop </command>command on all active geo-replication sessions of master volume.</para>
- </note></para>
- </listitem>
- <listitem>
- <para>Replace the faulty brick in the master by using the following command:
-</para>
- <para><command># gluster volume replace-brick <replaceable>VOLNAME BRICK NEW-BRICK</replaceable> start</command>
-</para>
- <para>For example:
-</para>
- <para><programlisting>machine1# gluster volume replace-brick Volume1 machine2:/export/dir16 machine3:/export/dir16 start
-Replace-brick started successfully</programlisting></para>
- </listitem>
- <listitem>
- <para>Commit the migration of data using the following command:
-</para>
- <para><command># gluster volume replace-brick <replaceable>VOLNAME BRICK NEW-BRICK</replaceable> commit force </command></para>
- <para>For example:
-</para>
- <para><programlisting>machine1# gluster volume replace-brick Volume1 machine2:/export/dir16 machine3:/export/dir16 commit force
-Replace-brick commit successful</programlisting></para>
- </listitem>
- <listitem>
- <para>Verify the migration of brick by viewing the volume info using the following command:
-</para>
- <para><command># gluster volume info <replaceable>VOLNAME</replaceable></command></para>
- <para>For example:
-</para>
- <para><programlisting>machine1# gluster volume info
-Volume Name: Volume1
-Type: Distribute
-Status: Started
-Number of Bricks: 2
-Transport-type: tcp
-Bricks:
-Brick1: machine1:/export/dir16
-Brick2: machine3:/export/dir16
-Options Reconfigured:
-geo-replication.indexing: on</programlisting></para>
- </listitem>
- <listitem>
- <para>Run rsync command manually to sync data from slave to master volume&apos;s client (mount point).
-</para>
- <para>For example:
-</para>
- <para><command>example.com# rsync -PavhS --xattrs --ignore-existing /data/remote_dir/ client:/mnt/gluster</command></para>
- <para>Verify that the data is synced by using the following command:
-</para>
- <para>On master volume, run the following command:
-</para>
- <para><programlisting>Client # ls | wc –l
-100</programlisting></para>
- <para>On the Slave run the following command:
-</para>
- <para><programlisting>example.com# ls /data/remote_dir/ | wc –l
-100</programlisting></para>
- <para>Now Master volume and Slave directory is synced.
-</para>
- </listitem>
- <listitem>
- <para>Restart geo-replication session from master to slave using the following command:
-</para>
- <para><command># gluster volume geo-replication <replaceable>MASTER SLAVE</replaceable> start </command></para>
- <para>For example:
-</para>
- <para><programlisting>machine1# gluster volume geo-replication Volume1
-example.com:/data/remote_dir start
-Starting geo-replication session between Volume1 &amp;
-example.com:/data/remote_dir has been successful</programlisting></para>
- </listitem>
- </orderedlist>
- </section>
- <section id="chap-Administration_Guide-Geo_Rep-Best_Practices">
- <title>Best Practices</title>
- <para><emphasis role="bold">Manually Setting Time </emphasis></para>
- <para>If you have to change the time on your bricks manually, then you must set uniform time on all bricks. This avoids the out-of-time sync issue described in <xref linkend="chap-Administration_Guide-Geo_Rep-Preparation-Settingup_Environment"/>. Setting time backward corrupts the geo-replication index, so the recommended way to set the time manually is:
-</para>
- <orderedlist>
- <listitem>
- <para>Stop geo-replication between the master and slave using the following command:
-</para>
- <para><command># gluster volume geo-replication <replaceable>MASTER SLAVE</replaceable> sto</command>p
-</para>
- </listitem>
- <listitem>
- <para>Stop the geo-replication indexing using the following command:
-</para>
- <para><command># gluster volume set <replaceable>MASTER</replaceable> geo-replication.indexing of</command>f</para>
- </listitem>
- <listitem>
- <para>Set uniform time on
- all bricks.s</para>
- </listitem>
- <listitem>
- <para>Restart your geo-replication sessions by using the following command:
-</para>
- <para><command># gluster volume geo-replication <replaceable>MASTER SLAVE </replaceable>start </command></para>
- </listitem>
- </orderedlist>
- <para><emphasis role="bold">Running Geo-replication commands in one system</emphasis>
-</para>
- <para>It is advisable to run the geo-replication commands in one of the bricks in the trusted storage pool. This is because, the log files for the geo-replication session would be stored in the *Server* where the Geo-replication start is initiated. Hence it would be easier to locate the log-files when required.
-</para>
- <para><emphasis role="bold">Isolation </emphasis></para>
- <para>Geo-replication slave operation is not sandboxed as of now and is ran as a privileged service. So for the security reason, it is advised to create a sandbox environment (dedicated machine / dedicated virtual machine / chroot/container type solution) by the administrator to run the geo-replication slave in it. Enhancement in this regard will be available in follow-up minor release.
-</para>
- </section>
-</chapter>
-
diff --git a/doc/admin-guide/en-US/admin_managing_volumes.xml b/doc/admin-guide/en-US/admin_managing_volumes.xml
deleted file mode 100644
index 333d46cd6e4..00000000000
--- a/doc/admin-guide/en-US/admin_managing_volumes.xml
+++ /dev/null
@@ -1,742 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
-<!ENTITY % BOOK_ENTITIES SYSTEM "Administration_Guide.ent">
-%BOOK_ENTITIES;
-]>
-<chapter id="chap-Administration_Guide-Managing_Volumes">
- <title>Managing GlusterFS Volumes</title>
- <para>This section describes how to perform common GlusterFS management operations, including the following: </para>
- <itemizedlist>
- <listitem>
- <para><xref linkend="sect-Administration_Guide-Managing_Volumes-Tuning"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="sect-Administration_Guide-Managing_Volumes-Expanding"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="sect-Administration_Guide-Managing_Volumes-Shrinking"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="sect-Administration_Guide-Managing_Volumes-Migrating"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="sect-Administration_Guide-Managing_Volumes-Rebalancing"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="sect-Administration_Guide-Managing_Volumes-Stop"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="sect-Administration_Guide-Managing_Volumes-Delete"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="sect-Administration_Guide-Managing_Volumes-Self_heal"/></para>
- </listitem>
- </itemizedlist>
- <section id="sect-Administration_Guide-Managing_Volumes-Tuning">
- <title>Tuning Volume Options</title>
- <para>You can tune volume options, as needed, while the cluster is online and available. </para>
- <para><note>
- <para>It is recommend to set server.allow-insecure option to ON if there are too many bricks in each volume or if there are too many services which have already utilized all the privileged ports in the system. Turning this option ON allows ports to accept/reject messages from insecure ports. So, use this option only if your deployment requires it. </para>
- </note></para>
- <para>To tune volume options </para>
- <itemizedlist>
- <listitem>
- <para>Tune volume options using the following command:</para>
- <para><command># gluster volume set <replaceable>VOLNAME OPTION PARAMETER</replaceable></command></para>
- <para>For example, to specify the performance cache size for test-volume:</para>
- <para><programlisting># gluster volume set test-volume performance.cache-size 256MB
-Set volume successful</programlisting></para>
- <para>The following table lists the Volume options along with its description and default value: </para>
- <para><note>
- <para>The default options given here are subject to modification at any given time and may not be the same for all versions.</para>
- </note></para>
- <informaltable frame="all">
- <tgroup cols="4">
- <colspec colname="c1"/>
- <colspec colname="c2"/>
- <colspec colname="c3"/>
- <colspec colname="c4"/>
- <thead>
- <row>
- <entry>Option</entry>
- <entry>Description</entry>
- <entry>Default Value</entry>
- <entry>Available Options</entry>
- </row>
- </thead>
- <tbody>
- <row>
- <entry>auth.allow</entry>
- <entry>IP addresses of the clients which should be allowed to access the volume. </entry>
- <entry>* (allow all)</entry>
- <entry>Valid IP address which includes wild card patterns including *, such as 192.168.1.*</entry>
- </row>
- <row>
- <entry>auth.reject</entry>
- <entry>IP addresses of the clients which should be denied to access the volume. </entry>
- <entry>NONE (reject none) </entry>
- <entry>Valid IP address which includes wild card patterns including *, such as 192.168.2.*</entry>
- </row>
- <row>
- <entry>client.grace-timeout</entry>
- <entry>Specifies the duration for the lock state to be maintained on the client after a network disconnection.</entry>
- <entry>10 </entry>
- <entry>10 - 1800 secs</entry>
- </row>
- <row>
- <entry>cluster.self-heal-window-size</entry>
- <entry>Specifies the maximum number of blocks per file on which self-heal would happen simultaneously. </entry>
- <entry>16 </entry>
- <entry>0 - 1025 blocks</entry>
- </row>
- <row>
- <entry>cluster.data-self-heal-algorithm</entry>
- <entry>Specifies the type of self-heal. If you set the option as &quot;full&quot;, the entire file is copied from source to destinations. If the option is set to &quot;diff&quot; the file blocks that are not in sync are copied to destinations. Reset uses a heuristic model. If the file does not exist on one of the subvolumes, or a zero-byte file exists (created by entry self-heal) the entire content has to be copied anyway, so there is no benefit from using the &quot;diff&quot; algorithm. If the file size is about the same as page size, the entire file can be read and written with a few operations, which will be faster than &quot;diff&quot; which has to read checksums and then read and write. </entry>
- <entry>reset</entry>
- <entry>full | diff | reset</entry>
- </row>
- <row>
- <entry>cluster.min-free-disk</entry>
- <entry>Specifies the percentage of disk space that must be kept free. Might be useful for non-uniform bricks. </entry>
- <entry>10%</entry>
- <entry>Percentage of required minimum free disk space</entry>
- </row>
- <row>
- <entry>cluster.stripe-block-size</entry>
- <entry>Specifies the size of the stripe unit that will be read from or written to. </entry>
- <entry>128 KB (for all files)</entry>
- <entry>size in bytes</entry>
- </row>
- <row>
- <entry>cluster.self-heal-daemon</entry>
- <entry>Allows you to turn-off proactive self-heal on replicated volumes.</entry>
- <entry>on</entry>
- <entry>On | Off</entry>
- </row>
- <row>
- <entry>diagnostics.brick-log-level</entry>
- <entry>Changes the log-level of the bricks. </entry>
- <entry>INFO </entry>
- <entry>DEBUG|WARNING|ERROR|CRITICAL|NONE|TRACE</entry>
- </row>
- <row>
- <entry>diagnostics.client-log-level</entry>
- <entry>Changes the log-level of the clients. </entry>
- <entry>INFO </entry>
- <entry>DEBUG|WARNING|ERROR|CRITICAL|NONE|TRACE</entry>
- </row>
- <row>
- <entry>diagnostics.latency-measurement</entry>
- <entry>Statistics related to the latency of each operation would be tracked. </entry>
- <entry>off </entry>
- <entry>On | Off</entry>
- </row>
- <row>
- <entry>diagnostics.dump-fd-stats</entry>
- <entry>Statistics related to file-operations would be tracked.</entry>
- <entry>off </entry>
- <entry>On | Off</entry>
- </row>
- <row>
- <entry>feature.read-only</entry>
- <entry>Enables you to mount the entire volume as read-only for all the clients (including NFS clients) accessing it.</entry>
- <entry>off</entry>
- <entry>On | Off</entry>
- </row>
- <row>
- <entry>features.lock-heal</entry>
- <entry>Enables self-healing of locks when the network disconnects.</entry>
- <entry>on</entry>
- <entry>On | Off</entry>
- </row>
- <row>
- <entry>features.quota-timeout</entry>
- <entry>For performance reasons, quota caches the directory sizes on client. You can set timeout indicating the maximum duration of directory sizes in cache, from the time they are populated, during which they are considered valid. </entry>
- <entry>0</entry>
- <entry>0 - 3600 secs</entry>
- </row>
- <row>
- <entry>geo-replication.indexing</entry>
- <entry>Use this option to automatically sync the changes in the filesystem from Master to Slave.</entry>
- <entry>off </entry>
- <entry>On | Off</entry>
- </row>
- <row>
- <entry>network.frame-timeout</entry>
- <entry>The time frame after which the operation has to be declared as dead, if the server does not respond for a particular operation. </entry>
- <entry>1800 (30 mins) </entry>
- <entry>1800 secs</entry>
- </row>
- <row>
- <entry>network.ping-timeout</entry>
- <entry>The time duration for which the client waits to check if the server is responsive. When a ping timeout happens, there is a network disconnect between the client and server. All resources held by server on behalf of the client get cleaned up. When a reconnection happens, all resources will need to be re-acquired before the client can resume its operations on the server. Additionally, the locks will be acquired and the lock tables updated. <para>This reconnect is a very expensive operation and should be avoided.
-</para></entry>
- <entry>42 Secs</entry>
- <entry>42 Secs</entry>
- </row>
- <row>
- <entry>nfs.enable-ino32</entry>
- <entry>For 32-bit nfs clients or applications that do not support 64-bit inode numbers or large files, use this option from the CLI to make Gluster NFS return 32-bit inode numbers instead of 64-bit inode numbers. Applications that will benefit are those that were either: <para>* Built 32-bit and run on 32-bit machines.</para><para>* Built 32-bit on 64-bit systems.</para><para>* Built 64-bit but use a library built 32-bit, especially relevant for python and perl scripts.</para><para>Either of the conditions above can lead to application on Linux NFS clients failing with &quot;Invalid argument&quot; or &quot;Value too large for defined data type&quot; errors.</para></entry>
- <entry>off</entry>
- <entry>On | Off</entry>
- </row>
- <row>
- <entry>nfs.volume-access </entry>
- <entry>Set the access type for the specified sub-volume. </entry>
- <entry>read-write </entry>
- <entry>read-write|read-only </entry>
- </row>
- <row>
- <entry>nfs.trusted-write </entry>
- <entry>If there is an UNSTABLE write from the client, STABLE flag will be returned to force the client to not send a COMMIT request. <para>In some environments, combined with a replicated GlusterFS setup, this option can improve write performance. This flag allows users to trust Gluster replication logic to sync data to the disks and recover when required. COMMIT requests if received will be handled in a default manner by fsyncing. STABLE writes are still handled in a sync manner.</para></entry>
- <entry> off </entry>
- <entry>On | Off </entry>
- </row>
- <row>
- <entry>nfs.trusted-sync</entry>
- <entry> All writes and COMMIT requests are treated as async. This implies that no write requests are guaranteed to be on server disks when the write reply is received at the NFS client. Trusted sync includes trusted-write behavior. </entry>
- <entry>off </entry>
- <entry>On | Off </entry>
- </row>
- <row>
- <entry>nfs.export-dir </entry>
- <entry>By default, all sub-volumes of NFS are exported as individual exports. Now, this option allows you to export only the specified subdirectory or subdirectories in the volume. This option can also be used in conjunction with nfs3.export-volumes option to restrict exports only to the subdirectories specified through this option. You must provide an absolute path.</entry>
- <entry>Enabled for all sub directories.</entry>
- <entry>Enable | Disable </entry>
- </row>
- <row>
- <entry>nfs.export-volumes </entry>
- <entry>Enable/Disable exporting entire volumes, instead if used in conjunction with nfs3.export-dir, can allow setting up only subdirectories as exports. </entry>
- <entry>on</entry>
- <entry> On | Off </entry>
- </row>
- <row>
- <entry>nfs.rpc-auth-unix </entry>
- <entry>Enable/Disable the AUTH_UNIX authentication type. This option is enabled by default for better interoperability. However, you can disable it if required.</entry>
- <entry>on </entry>
- <entry> On | Off </entry>
- </row>
- <row>
- <entry>nfs.rpc-auth-null </entry>
- <entry>Enable/Disable the AUTH_NULL authentication type. It is not recommended to change the default value for this option. </entry>
- <entry>on </entry>
- <entry> On | Off </entry>
- </row>
- <row>
- <entry>nfs.rpc-auth-allow&lt;IP- Addresses&gt; </entry>
- <entry>Allow a comma separated list of addresses and/or hostnames to connect to the server. By default, all clients are disallowed. This allows you to define a general rule for all exported volumes.</entry>
- <entry>Reject All </entry>
- <entry>IP address or Host name </entry>
- </row>
- <row>
- <entry>nfs.rpc-auth-reject IP- Addresses </entry>
- <entry>Reject a comma separated list of addresses and/or hostnames from connecting to the server. By default, all connections are disallowed. This allows you to define a general rule for all exported volumes.</entry>
- <entry>Reject All </entry>
- <entry>IP address or Host name </entry>
- </row>
- <row>
- <entry>nfs.ports-insecure </entry>
- <entry>Allow client connections from unprivileged ports. By default only privileged ports are allowed. This is a global setting in case insecure ports are to be enabled for all exports using a single option. </entry>
- <entry>off</entry>
- <entry> On | Off </entry>
- </row>
- <row>
- <entry>nfs.addr-namelookup </entry>
- <entry>Turn-off name lookup for incoming client connections using this option. In some setups, the name server can take too long to reply to DNS queries resulting in timeouts of mount requests. Use this option to turn off name lookups during address authentication. Note, turning this off will prevent you from using hostnames in rpc-auth.addr.* filters. </entry>
- <entry>on </entry>
- <entry> On | Off </entry>
- </row>
- <row>
- <entry>nfs.register-with- portmap </entry>
- <entry>For systems that need to run multiple NFS servers, you need to prevent more than one from registering with portmap service. Use this option to turn off portmap registration for Gluster NFS. </entry>
- <entry>on </entry>
- <entry> On | Off </entry>
- </row>
- <row>
- <entry>nfs.port &lt;PORT- NUMBER&gt; </entry>
- <entry>Use this option on systems that need Gluster NFS to be associated with a non-default port number. </entry>
- <entry>38465- 38467 </entry>
- <entry/>
- </row>
- <row>
- <entry>nfs.disable</entry>
- <entry>Turn-off volume being exported by NFS</entry>
- <entry> off </entry>
- <entry>On | Off </entry>
- </row>
- <row>
- <entry>performance.write-behind-window-size </entry>
- <entry>Size of the per-file write-behind buffer.</entry>
- <entry>1 MB </entry>
- <entry>Write-behind cache size </entry>
- </row>
- <row>
- <entry>performance.io-thread-count </entry>
- <entry>The number of threads in IO threads translator. </entry>
- <entry>16</entry>
- <entry>0 - 65 </entry>
- </row>
- <row>
- <entry>performance.flush-behind </entry>
- <entry>If this option is set ON, instructs write-behind translator to perform flush in background, by returning success (or any errors, if any of previous writes were failed) to application even before flush is sent to backend filesystem. </entry>
- <entry>On </entry>
- <entry>On | Off </entry>
- </row>
- <row>
- <entry>performance.cache-max-file-size </entry>
- <entry>Sets the maximum file size cached by the io-cache translator. Can use the normal size descriptors of KB, MB, GB,TB or PB (for example, 6GB). Maximum size uint64. </entry>
- <entry>2 ^ 64 -1 bytes </entry>
- <entry>size in bytes </entry>
- </row>
- <row>
- <entry>performance.cache-min-file-size </entry>
- <entry> Sets the minimum file size cached by the io-cache translator. Values same as &quot;max&quot; above.</entry>
- <entry>0B</entry>
- <entry>size in bytes </entry>
- </row>
- <row>
- <entry>performance.cache-refresh-timeout </entry>
- <entry>The cached data for a file will be retained till &apos;cache-refresh-timeout&apos; seconds, after which data re-validation is performed. </entry>
- <entry>1 sec </entry>
- <entry>0 - 61 </entry>
- </row>
- <row>
- <entry>performance.cache-size </entry>
- <entry>Size of the read cache.</entry>
- <entry> 32 MB </entry>
- <entry>size in bytes </entry>
- </row>
- <row>
- <entry>server.allow-insecure </entry>
- <entry>Allow client connections from unprivileged ports. By default only privileged ports are allowed. This is a global setting in case insecure ports are to be enabled for all exports using a single option. </entry>
- <entry>on </entry>
- <entry>On | Off </entry>
- </row>
- <row>
- <entry>server.grace-timeout</entry>
- <entry>Specifies the duration for the lock state to be maintained on the server after a network disconnection.</entry>
- <entry>10</entry>
- <entry>10 - 1800 secs</entry>
- </row>
- <row>
- <entry>server.statedump-path </entry>
- <entry>Location of the state dump file. </entry>
- <entry>/tmp directory of the brick </entry>
- <entry>New directory path</entry>
- </row>
- </tbody>
- </tgroup>
- </informaltable>
- <para>You can view the changed volume options using the<command> # gluster volume info <replaceable>VOLNAME</replaceable></command> command. For more information, see <xref linkend="sect-Administration_Guide-Managing_Volumes-Delete"/>.</para>
- </listitem>
- </itemizedlist>
- </section>
- <section id="sect-Administration_Guide-Managing_Volumes-Expanding">
- <title>Expanding Volumes</title>
- <para>You can expand volumes, as needed, while the cluster is online and available. For example, you might want to add a brick to a distributed volume, thereby increasing the distribution and adding to the capacity of the GlusterFS volume. </para>
- <para>Similarly, you might want to add a group of bricks to a distributed replicated volume, increasing the capacity of the GlusterFS volume. </para>
- <para><note>
- <para>When expanding distributed replicated and distributed striped volumes, you need to add a number of bricks that is a multiple of the replica or stripe count. For example, to expand a distributed replicated volume with a replica count of 2, you need to add bricks in multiples of 2 (such as 4, 6, 8, etc.). </para>
- </note></para>
- <para><emphasis role="bold">To expand a volume</emphasis> </para>
- <orderedlist>
- <listitem>
- <para>On the first server in the cluster, probe the server to which you want to add the new brick using the following command:</para>
- <para><command># gluster peer probe <replaceable>HOSTNAME</replaceable></command></para>
- <para>For example:</para>
- <para><programlisting># gluster peer probe server4
-Probe successful</programlisting></para>
- </listitem>
- <listitem>
- <para>Add the brick using the following command: </para>
- <para><command># gluster volume add-brick <replaceable>VOLNAME NEW-BRICK</replaceable></command></para>
- <para>For example:</para>
- <para><programlisting># gluster volume add-brick test-volume server4:/exp4
-Add Brick successful</programlisting></para>
- </listitem>
- <listitem>
- <para>Check the volume information using the following command: </para>
- <para><command># gluster volume info </command></para>
- <para>The command displays information similar to the following:</para>
- <para><programlisting>Volume Name: test-volume
-Type: Distribute
-Status: Started
-Number of Bricks: 4
-Bricks:
-Brick1: server1:/exp1
-Brick2: server2:/exp2
-Brick3: server3:/exp3
-Brick4: server4:/exp4</programlisting></para>
- </listitem>
- <listitem>
- <para>Rebalance the volume to ensure that all files are distributed to the new brick.</para>
- <para>You can use the rebalance command as described in <xref linkend="sect-Administration_Guide-Managing_Volumes-Rebalancing"/>.</para>
- </listitem>
- </orderedlist>
- </section>
- <section id="sect-Administration_Guide-Managing_Volumes-Shrinking">
- <title>Shrinking Volumes</title>
- <para>You can shrink volumes, as needed, while the cluster is online and available. For example, you might need to remove a brick that has become inaccessible in a distributed volume due to hardware or network failure. </para>
- <para><note>
- <para>Data residing on the brick that you are removing will no longer be accessible at the Gluster mount point. Note however that only the configuration information is removed - you can continue to access the data directly from the brick, as necessary. </para>
- </note></para>
- <para>When shrinking distributed replicated and distributed striped volumes, you need to remove a number of bricks that is a multiple of the replica or stripe count. For example, to shrink a distributed striped volume with a stripe count of 2, you need to remove bricks in multiples of 2 (such as 4, 6, 8, etc.). In addition, the bricks you are trying to remove must be from the same sub-volume (the same replica or stripe set). </para>
- <para><emphasis role="bold">To shrink a volume</emphasis> </para>
- <orderedlist>
- <listitem>
- <para>Remove the brick using the following command:</para>
- <para><command># gluster volume remove-brick <varname>VOLNAME</varname><replaceable> BRICK</replaceable></command> <command>start</command></para>
- <para>For example, to remove server2:/exp2:</para>
- <para><programlisting># gluster volume remove-brick test-volume server2:/exp2 start
-
-Removing brick(s) can result in data loss. Do you want to Continue? (y/n)</programlisting></para>
- </listitem>
- <listitem>
- <para>Enter &quot;y&quot; to confirm the operation. The command displays the following message indicating that the remove brick operation is successfully started: </para>
- <para><programlisting>Remove Brick successful </programlisting></para>
- </listitem>
- <listitem>
- <para>(Optional) View the status of the remove brick operation using the following command:</para>
- <para><command># gluster volume remove-brick <varname>VOLNAME</varname><replaceable> BRICK</replaceable></command><command> status</command></para>
- <para>For example, to view the status of remove brick operation on server2:/exp2 brick:</para>
- <para><screen># gluster volume remove-brick test-volume server2:/exp2 status
- Node Rebalanced-files size scanned status
- --------- ---------------- ---- ------- -----------
-617c923e-6450-4065-8e33-865e28d9428f 34 340 162 in progress</screen></para>
- </listitem>
- <listitem>
- <para>Commit the remove brick operation using the following command:</para>
- <para><command># gluster volume remove-brick <varname>VOLNAME</varname><replaceable> BRICK</replaceable></command><command> commit</command></para>
- <para>For example, to view the status of remove brick operation on server2:/exp2 brick:</para>
- <para><screen># gluster volume remove-brick test-volume server2:/exp2 commit</screen></para>
- <para><programlisting>Remove Brick successful </programlisting></para>
- </listitem>
- <listitem>
- <para>Check the volume information using the following command: </para>
- <para><command># gluster volume info </command></para>
- <para>The command displays information similar to the following:</para>
- <para><programlisting># gluster volume info
-Volume Name: test-volume
-Type: Distribute
-Status: Started
-Number of Bricks: 3
-Bricks:
-Brick1: server1:/exp1
-Brick3: server3:/exp3
-Brick4: server4:/exp4</programlisting></para>
- </listitem>
- <listitem>
- <para>Rebalance the volume to ensure that all files are distributed to the new brick.</para>
- <para>You can use the rebalance command as described in <xref linkend="sect-Administration_Guide-Managing_Volumes-Rebalancing"/>.</para>
- </listitem>
- </orderedlist>
- </section>
- <section id="sect-Administration_Guide-Managing_Volumes-Migrating">
- <title>Migrating Volumes</title>
- <para>You can migrate the data from one brick to another, as needed, while the cluster is online and available. </para>
- <para><emphasis role="bold">To migrate a volume</emphasis> </para>
- <orderedlist>
- <listitem>
- <para>Make sure the new brick, server5 in this example, is successfully added to the cluster.</para>
- <para>For more information, see <xref linkend="sect-Administration_Guide-Storage_Pools-Adding_Servers"/>.</para>
- </listitem>
- <listitem>
- <para>Migrate the data from one brick to another using the following command:</para>
- <para><command> # gluster volume replace-brick <code>VOLNAME</code><code> BRICK</code><code>NEW-BRICK</code> start</command></para>
- <para>For example, to migrate the data in server3:/exp3 to server5:/exp5 in test-volume:</para>
- <para><programlisting># gluster volume replace-brick test-volume server3:/exp3 server5:exp5 start
-Replace brick start operation successful</programlisting></para>
- <para><note>
- <para>You need to have the FUSE package installed on the server on which you are running the replace-brick command for the command to work.</para>
- </note></para>
- </listitem>
- <listitem>
- <para>To pause the migration operation, if needed, use the following command: </para>
- <para><command># gluster volume replace-brick <varname>VOLNAME BRICK NEW-BRICK </varname> pause </command></para>
- <para>For example, to pause the data migration from server3:/exp3 to server5:/exp5 in test-volume:</para>
- <para><programlisting># gluster volume replace-brick test-volume server3:/exp3 server5:exp5 pause
-Replace brick pause operation successful</programlisting></para>
- </listitem>
- <listitem>
- <para>To abort the migration operation, if needed, use the following command: </para>
- <para><command> # gluster volume replace-brick <varname>VOLNAME BRICK NEW-BRICK </varname>abort </command></para>
- <para>For example, to abort the data migration from server3:/exp3 to server5:/exp5 in test-volume:</para>
- <para><programlisting># gluster volume replace-brick test-volume server3:/exp3 server5:exp5 abort
-Replace brick abort operation successful</programlisting></para>
- </listitem>
- <listitem>
- <para>Check the status of the migration operation using the following command: </para>
- <para><command> # gluster volume replace-brick <varname>VOLNAME BRICK NEW-BRICK </varname>status </command></para>
- <para>For example, to check the data migration status from server3:/exp3 to server5:/exp5 in test-volume:</para>
- <para><programlisting># gluster volume replace-brick test-volume server3:/exp3 server5:/exp5 status
-Current File = /usr/src/linux-headers-2.6.31-14/block/Makefile
-Number of files migrated = 10567
-Migration complete</programlisting></para>
- <para>The status command shows the current file being migrated along with the current total number of files migrated. After completion of migration, it displays Migration complete.</para>
- </listitem>
- <listitem>
- <para>Commit the migration of data from one brick to another using the following command: </para>
- <para><command> # gluster volume replace-brick <varname>VOLNAME BRICK NEW-BRICK </varname>commit </command></para>
- <para>For example, to commit the data migration from server3:/exp3 to server5:/exp5 in test-volume:</para>
- <para><programlisting># gluster volume replace-brick test-volume server3:/exp3 server5:/exp5 commit
-replace-brick commit successful</programlisting></para>
- </listitem>
- <listitem>
- <para>Verify the migration of brick by viewing the volume info using the following command: </para>
- <para><command># gluster volume info <code>VOLNAME</code></command></para>
- <para>For example, to check the volume information of new brick server5:/exp5 in test-volume:</para>
- <para><programlisting># gluster volume info test-volume
-Volume Name: testvolume
-Type: Replicate
-Status: Started
-Number of Bricks: 4
-Transport-type: tcp
-Bricks:
-Brick1: server1:/exp1
-Brick2: server2:/exp2
-Brick3: server4:/exp4
-Brick4: server5:/exp5
-
-The new volume details are displayed.
-</programlisting></para>
- <para>The new volume details are displayed.</para>
- <para>In the above example, previously, there were bricks; 1,2,3, and 4 and now brick 3 is replaced by brick 5.</para>
- </listitem>
- </orderedlist>
- </section>
- <section id="sect-Administration_Guide-Managing_Volumes-Rebalancing">
- <title>Rebalancing Volumes</title>
- <para>After expanding or shrinking a volume (using the add-brick and remove-brick commands respectively), you need to rebalance the data among the servers. New directories created after expanding or shrinking of the volume will be evenly distributed automatically. For all the existing directories, the distribution can be fixed by rebalancing the layout and/or data. </para>
- <para>This section describes how to rebalance GlusterFS volumes in your storage environment, using the following common scenarios: </para>
- <itemizedlist>
- <listitem>
- <para>Fix Layout - Fixes the layout changes so that the files can actually go to newly added nodes. For more information, see <xref linkend="sect-Administration_Guide-Managing_Volumes-Rebalancing-Fix_Layout"/>. </para>
- </listitem>
- <listitem>
- <para>Fix Layout and Migrate Data - Rebalances volume by fixing the layout changes and migrating the existing data. For more information, see <xref linkend="sect-Administration_Guide-Managing_Volumes-Rebalancing-Fix_Migrate"/>.</para>
- </listitem>
- </itemizedlist>
- <section id="sect-Administration_Guide-Managing_Volumes-Rebalancing-Fix_Layout">
- <title>Rebalancing Volume to Fix Layout Changes</title>
- <para>Fixing the layout is necessary because the layout structure is static for a given directory. In a scenario where new bricks have been added to the existing volume, newly created files in existing directories will still be distributed only among the old bricks. The <command># gluster volume rebalance<varname> VOLNAME</varname> fix-layout start </command>command will fix the layout information so that the files can also go to newly added nodes. When this command is issued, all the file stat information which is already cached will get revalidated. </para>
- <para>A fix-layout rebalance will only fix the layout changes and does not migrate data. If you want to migrate the existing data, use<command># gluster volume rebalance <varname>VOLNAME</varname> start </command> command to rebalance data among the servers. </para>
- <para><emphasis role="bold">To rebalance a volume to fix layout changes</emphasis></para>
- <itemizedlist>
- <listitem>
- <para>Start the rebalance operation on any one of the server using the following command:</para>
- <para><command># gluster volume rebalance<varname> VOLNAME</varname> fix-layout start</command></para>
- <para>For example:</para>
- <para><programlisting># gluster volume rebalance test-volume fix-layout start
-Starting rebalance on volume test-volume has been successful</programlisting></para>
- </listitem>
- </itemizedlist>
- </section>
- <section id="sect-Administration_Guide-Managing_Volumes-Rebalancing-Fix_Migrate">
- <title>Rebalancing Volume to Fix Layout and Migrate Data</title>
- <para>After expanding or shrinking a volume (using the add-brick and remove-brick commands respectively), you need to rebalance the data among the servers. </para>
- <para><emphasis role="bold">To rebalance a volume to fix layout and migrate the existing data</emphasis></para>
- <itemizedlist>
- <listitem>
- <para>Start the rebalance operation on any one of the server using the following command:</para>
- <para><command># gluster volume rebalance<varname> VOLNAME</varname> start</command></para>
- <para>For example:</para>
- <para><programlisting># gluster volume rebalance test-volume start
-Starting rebalancing on volume test-volume has been successful</programlisting></para>
- </listitem>
- <listitem>
- <para>Start the migration operation forcefully on any one of the server using the following command:</para>
- <para><command># gluster volume rebalance<varname> VOLNAME</varname> start force</command></para>
- <para>For example:</para>
- <para><programlisting># gluster volume rebalance test-volume start force
-Starting rebalancing on volume test-volume has been successful</programlisting></para>
- </listitem>
- </itemizedlist>
- </section>
- <section>
- <title>Displaying Status of Rebalance Operation</title>
- <para>You can display the status information about rebalance volume operation, as needed. </para>
- <para><emphasis role="bold">To view status of rebalance volume</emphasis></para>
- <itemizedlist>
- <listitem>
- <para>Check the status of the rebalance operation, using the following command:</para>
- <para><command># gluster volume rebalance <replaceable>VOLNAME</replaceable> status</command></para>
- <para>For example:</para>
- <para><screen># gluster volume rebalance test-volume status
- Node Rebalanced-files size scanned status
- --------- ---------------- ---- ------- -----------
-617c923e-6450-4065-8e33-865e28d9428f 416 1463 312 in progress</screen></para>
- <para>The time to complete the rebalance operation depends on the number of files on the volume along with the corresponding file sizes. Continue checking the rebalance status, verifying that the number of files rebalanced or total files scanned keeps increasing.</para>
- <para>For example, running the status command again might display a result similar to the following:</para>
- <para><screen># gluster volume rebalance test-volume status
- Node Rebalanced-files size scanned status
- --------- ---------------- ---- ------- -----------
-617c923e-6450-4065-8e33-865e28d9428f 498 1783 378 in progress</screen></para>
- <para>The rebalance status displays the following when the rebalance is complete:</para>
- <para><screen># gluster volume rebalance test-volume status
- Node Rebalanced-files size scanned status
- --------- ---------------- ---- ------- -----------
-617c923e-6450-4065-8e33-865e28d9428f 502 1873 334 completed</screen></para>
- </listitem>
- </itemizedlist>
- </section>
- <section>
- <title>Stopping Rebalance Operation</title>
- <para>You can stop the rebalance operation, as needed.</para>
- <para><emphasis role="bold">To stop rebalance</emphasis></para>
- <itemizedlist>
- <listitem>
- <para>Stop the rebalance operation using the following command:</para>
- <para><command># gluster volume rebalance <replaceable>VOLNAME</replaceable> stop</command></para>
- <para>For example:</para>
- <para><screen># gluster volume rebalance test-volume stop
- Node Rebalanced-files size scanned status
- --------- ---------------- ---- ------- -----------
-617c923e-6450-4065-8e33-865e28d9428f 59 590 244 stopped
-Stopped rebalance process on volume test-volume </screen></para>
- </listitem>
- </itemizedlist>
- </section>
- </section>
- <section id="sect-Administration_Guide-Managing_Volumes-Stop">
- <title>Stopping Volumes</title>
- <para>To stop a volume</para>
- <orderedlist>
- <listitem>
- <para>Stop the volume using the following command:
-
-</para>
- <para><command># gluster volume stop <varname>VOLNAME </varname></command></para>
- <para>For example, to stop test-volume:</para>
- <para><programlisting># gluster volume stop test-volume
-Stopping volume will make its data inaccessible. Do you want to continue? (y/n)
-</programlisting></para>
- </listitem>
- <listitem>
- <para>Enter <userinput>y</userinput> to confirm the operation. The output of the command displays the following:
-
-</para>
- <programlisting>Stopping volume test-volume has been successful</programlisting>
- </listitem>
- </orderedlist>
- </section>
- <section id="sect-Administration_Guide-Managing_Volumes-Delete">
- <title>Deleting Volumes</title>
- <para>To delete a volume </para>
- <orderedlist>
- <listitem>
- <para>Delete the volume using the following command:</para>
- <para><command># gluster volume delete <varname>VOLNAME</varname></command></para>
- <para>For example, to delete test-volume:</para>
- <para><programlisting># gluster volume delete test-volume
-Deleting volume will erase all information about the volume. Do you want to continue? (y/n)</programlisting></para>
- </listitem>
- <listitem>
- <para>Enter <userinput role="bold">y</userinput> to confirm the operation. The command displays the following:</para>
- <para><programlisting>Deleting volume test-volume has been successful</programlisting></para>
- </listitem>
- </orderedlist>
- </section>
- <section id="sect-Administration_Guide-Managing_Volumes-Self_heal">
- <title>Triggering Self-Heal on Replicate</title>
- <para>In replicate module, previously you had to manually trigger a self-heal when a brick goes offline and comes back online, to bring all the replicas in sync. Now the pro-active self-heal daemon runs in the background, diagnoses issues and automatically initiates self-healing every 10 minutes on the files which requires<emphasis role="italic"> healing</emphasis>. </para>
- <para>You can view the list of files that need <emphasis role="italic">healing</emphasis>, the list of files which are currently/previously <emphasis role="italic">healed</emphasis>, list of files which are in split-brain state, and you can manually trigger self-heal on the entire volume or only on the files which need <emphasis role="italic">healing</emphasis>.</para>
- <itemizedlist>
- <listitem>
- <para>Trigger self-heal only on the files which requires <emphasis role="italic">healing</emphasis>:</para>
- <para><command># gluster volume heal <replaceable>VOLNAME</replaceable></command></para>
- <para>For example, to trigger self-heal on files which requires <emphasis role="italic">healing</emphasis> of test-volume:</para>
- <para><screen># gluster volume heal test-volume
-Heal operation on volume test-volume has been successful</screen></para>
- </listitem>
- <listitem>
- <para>Trigger self-heal on all the files of a volume:</para>
- <para><command># gluster volume heal <replaceable>VOLNAME</replaceable></command> <command>full</command></para>
- <para>For example, to trigger self-heal on all the files of of test-volume:</para>
- <para><screen># gluster volume heal test-volume full
-Heal operation on volume test-volume has been successful</screen></para>
- </listitem>
- <listitem>
- <para>View the list of files that needs <emphasis role="italic">healing</emphasis>:</para>
- <para><command># gluster volume heal <replaceable>VOLNAME</replaceable></command> <command>info</command></para>
- <para>For example, to view the list of files on test-volume that needs <emphasis role="italic">healing</emphasis>:</para>
- <para><screen># gluster volume heal test-volume info
-Brick <emphasis role="italic">server1</emphasis>:/gfs/test-volume_0
-Number of entries: 0
-
-Brick <emphasis role="italic">server2</emphasis>:/gfs/test-volume_1
-Number of entries: 101
-/95.txt
-/32.txt
-/66.txt
-/35.txt
-/18.txt
-/26.txt
-/47.txt
-/55.txt
-/85.txt
-...</screen></para>
- </listitem>
- <listitem>
- <para>View the list of files that are self-healed:</para>
- <para><command># gluster volume heal <replaceable>VOLNAME</replaceable></command> <command>info healed</command> </para>
- <para>For example, to view the list of files on test-volume that are self-healed:</para>
- <para><screen># gluster volume heal test-volume info healed
-Brick <emphasis role="italic">server1</emphasis>:/gfs/test-volume_0
-Number of entries: 0
-
-Brick <emphasis role="italic">server2</emphasis>:/gfs/test-volume_1
-Number of entries: 69
-/99.txt
-/93.txt
-/76.txt
-/11.txt
-/27.txt
-/64.txt
-/80.txt
-/19.txt
-/41.txt
-/29.txt
-/37.txt
-/46.txt
-...</screen></para>
- </listitem>
- <listitem>
- <para>View the list of files of a particular volume on which the self-heal failed:</para>
- <para><command># gluster volume heal <replaceable>VOLNAME</replaceable></command> <command>info failed</command> </para>
- <para>For example, to view the list of files of test-volume that are not self-healed:</para>
- <para><screen># gluster volume heal test-volume info failed
-Brick <emphasis role="italic">server1</emphasis>:/gfs/test-volume_0
-Number of entries: 0
-
-Brick server2:/gfs/test-volume_3
-Number of entries: 72
-/90.txt
-/95.txt
-/77.txt
-/71.txt
-/87.txt
-/24.txt
-...</screen></para>
- </listitem>
- <listitem>
- <para>View the list of files of a particular volume which are in split-brain state:</para>
- <para><command># gluster volume heal <replaceable>VOLNAME</replaceable></command> <command>info split-brain</command> </para>
- <para>For example, to view the list of files of test-volume which are in split-brain state:</para>
- <para><screen># gluster volume heal test-volume info split-brain
-Brick server1:/gfs/test-volume_2
-Number of entries: 12
-/83.txt
-/28.txt
-/69.txt
-...
-
-Brick <emphasis role="italic">server2</emphasis>:/gfs/test-volume_2
-Number of entries: 12
-/83.txt
-/28.txt
-/69.txt
-...</screen></para>
- </listitem>
- </itemizedlist>
- </section>
-</chapter>
diff --git a/doc/admin-guide/en-US/admin_monitoring_workload.xml b/doc/admin-guide/en-US/admin_monitoring_workload.xml
deleted file mode 100644
index e85bc51d896..00000000000
--- a/doc/admin-guide/en-US/admin_monitoring_workload.xml
+++ /dev/null
@@ -1,878 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "docbookV4.5/docbookx.dtd" []>
-<chapter id="chap-Administration_Guide-Monitor_Workload">
- <title>Monitoring your GlusterFS Workload</title>
- <para>You can monitor the GlusterFS volumes on different parameters. Monitoring volumes helps in capacity planning and performance tuning tasks of the GlusterFS volume. Using these information, you can identify and troubleshoot issues. </para>
- <para>You can use Volume Top and Profile commands to view the performance and identify bottlenecks/hotspots of each brick of a volume. This helps system administrators to get vital performance information whenever performance needs to be probed. </para>
- <para>You can also perform statedump of the brick processes and nfs server process of a volume, and also view volume status and volume information. </para>
- <section id="chap-Administration_Guide-Monitor_Workload-Profile">
- <title>Running GlusterFS Volume Profile Command</title>
- <para>GlusterFS Volume Profile command provides an interface to get the per-brick I/O information for each File Operation (FOP) of a volume. The per brick information helps in identifying bottlenecks in the storage system.
-</para>
- <para>This section describes how to run GlusterFS Volume Profile command by performing the following operations:
-</para>
- <itemizedlist>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Monitor_Workload-Profile-Start"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Monitor_Workload-Profile-Display"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Monitor_Workload-Profile-Stop"/></para>
- </listitem>
- </itemizedlist>
- <section id="chap-Administration_Guide-Monitor_Workload-Profile-Start">
- <title>Start Profiling</title>
- <para>You must start the Profiling to view the File Operation information for each brick.
-</para>
- <para><emphasis role="bold">To start profiling: </emphasis></para>
- <itemizedlist>
- <listitem>
- <para>Start profiling using the following command:
- </para>
- </listitem>
- </itemizedlist>
- <para><command># gluster volume profile <replaceable>VOLNAME</replaceable> start </command></para>
- <para>For example, to start profiling on test-volume:
-</para>
- <para><programlisting># gluster volume profile test-volume start
-Profiling started on test-volume</programlisting></para>
- <para>When profiling on the volume is started, the following additional options are displayed in the Volume Info:
-</para>
- <para><programlisting>diagnostics.count-fop-hits: on
-
-diagnostics.latency-measurement: on</programlisting></para>
- </section>
- <section id="chap-Administration_Guide-Monitor_Workload-Profile-Display">
- <title>Displaying the I/0 Information</title>
- <para>You can view the I/O information of each brick.
-</para>
- <para>To display I/O information:
-</para>
- <itemizedlist>
- <listitem>
- <para>Display the I/O information using the following command:
-</para>
- </listitem>
- </itemizedlist>
- <para><command># gluster volume profile <replaceable>VOLNAME</replaceable> info</command>
-
-</para>
- <para>For example, to see the I/O information on test-volume:
-
-</para>
- <para><programlisting># gluster volume profile test-volume info
-Brick: Test:/export/2
-Cumulative Stats:
-
-Block 1b+ 32b+ 64b+
-Size:
- Read: 0 0 0
- Write: 908 28 8
-
-Block 128b+ 256b+ 512b+
-Size:
- Read: 0 6 4
- Write: 5 23 16
-
-Block 1024b+ 2048b+ 4096b+
-Size:
- Read: 0 52 17
- Write: 15 120 846
-
-Block 8192b+ 16384b+ 32768b+
-Size:
- Read: 52 8 34
- Write: 234 134 286
-
-Block 65536b+ 131072b+
-Size:
- Read: 118 622
- Write: 1341 594
-
-
-%-latency Avg- Min- Max- calls Fop
- latency Latency Latency
-___________________________________________________________
-4.82 1132.28 21.00 800970.00 4575 WRITE
-5.70 156.47 9.00 665085.00 39163 READDIRP
-11.35 315.02 9.00 1433947.00 38698 LOOKUP
-11.88 1729.34 21.00 2569638.00 7382 FXATTROP
-47.35 104235.02 2485.00 7789367.00 488 FSYNC
-
-------------------
-
-------------------
-
-Duration : 335
-
-BytesRead : 94505058
-
-BytesWritten : 195571980</programlisting></para>
- </section>
- <section id="chap-Administration_Guide-Monitor_Workload-Profile-Stop">
- <title>Stop Profiling</title>
- <para>You can stop profiling the volume, if you do not need profiling information anymore.
-</para>
- <para><emphasis role="bold">To stop profiling</emphasis>
-</para>
- <itemizedlist>
- <listitem>
- <para>Stop profiling using the following command:
-</para>
- <para><command># gluster volume profile <replaceable>VOLNAME</replaceable> stop</command>
-</para>
- <para>For example, to stop profiling on test-volume:</para>
- <para><command># gluster volume profile <replaceable>test-volume</replaceable> stop</command> </para>
- <para><computeroutput>Profiling stopped on test-volume</computeroutput></para>
- </listitem>
- </itemizedlist>
- </section>
- </section>
- <section id="chap-Administration_Guide-Monitor_Workload-Top">
- <title> Running GlusterFS Volume TOP Command </title>
- <para>GlusterFS Volume Top command allows you to view the glusterfs bricks’ performance metrics like
-read, write, file open calls, file read calls, file write calls, directory open calls, and directory real
-calls. The top command displays up to 100 results.
-</para>
- <para>This section describes how to run and view the results for the following GlusterFS Top commands:
-</para>
- <itemizedlist>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Monitor_Workload-Top-Open_FD_Count"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Monitor_Workload-Top-File_Read"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Monitor_Workload-Top-File_Write"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Monitor_Workload-Top-Open_Calls"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Monitor_Workload-Top-Read_Calls"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Monitor_Workload-Top-Read_Perf"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Monitor_Workload-Top-Write_Perf"/></para>
- </listitem>
- </itemizedlist>
- <section id="chap-Administration_Guide-Monitor_Workload-Top-Open_FD_Count">
- <title>Viewing Open fd Count and Maximum fd Count </title>
- <para>You can view both current open fd count (list of files that are currently the most opened and the
-count) on the brick and the maximum open fd count (count of files that are the currently open and
-the count of maximum number of files opened at any given point of time, since the servers are up
-and running). If the brick name is not specified, then open fd metrics of all the bricks belonging to
-the volume will be displayed.
-</para>
- <para><emphasis role="bold">To view open fd count and maximum fd count: </emphasis></para>
- <itemizedlist>
- <listitem>
- <para>View open fd count and maximum fd count using the following command:</para>
- <para><command># gluster volume top <replaceable>VOLNAME</replaceable> open [brick <replaceable>BRICK-NAME</replaceable>] [list-cnt <replaceable>cnt</replaceable>]</command>
-</para>
- <para>For example, to view open fd count and maximum fd count on brick <replaceable>server:/export</replaceable> of <replaceable>test-volume</replaceable> and list top 10 open calls:
-</para>
- <para><command># gluster volume top <replaceable>test-volume</replaceable> open brick <replaceable>server:/export/</replaceable> list-cnt <replaceable>10</replaceable></command></para>
- <para><computeroutput>Brick: server:/export/dir1 </computeroutput></para>
- <para><computeroutput>Current open fd&apos;s: 34 Max open fd&apos;s: 209 </computeroutput><programlisting> ==========Open file stats========
-
-open file name
-call count
-
-2 /clients/client0/~dmtmp/PARADOX/
- COURSES.DB
-
-11 /clients/client0/~dmtmp/PARADOX/
- ENROLL.DB
-
-11 /clients/client0/~dmtmp/PARADOX/
- STUDENTS.DB
-
-10 /clients/client0/~dmtmp/PWRPNT/
- TIPS.PPT
-
-10 /clients/client0/~dmtmp/PWRPNT/
- PCBENCHM.PPT
-
-9 /clients/client7/~dmtmp/PARADOX/
- STUDENTS.DB
-
-9 /clients/client1/~dmtmp/PARADOX/
- STUDENTS.DB
-
-9 /clients/client2/~dmtmp/PARADOX/
- STUDENTS.DB
-
-9 /clients/client0/~dmtmp/PARADOX/
- STUDENTS.DB
-
-9 /clients/client8/~dmtmp/PARADOX/
- STUDENTS.DB</programlisting></para>
- </listitem>
- </itemizedlist>
- </section>
- <section id="chap-Administration_Guide-Monitor_Workload-Top-File_Read">
- <title>Viewing Highest File Read Calls </title>
- <para>You can view highest read calls on each brick. If brick name is not specified, then by default, list of
-100 files will be displayed.
-</para>
- <para><emphasis role="bold">To view highest file Read calls:</emphasis>
-</para>
- <itemizedlist>
- <listitem>
- <para>View highest file Read calls using the following command:
-</para>
- <para><command># gluster volume top <replaceable>VOLNAME</replaceable> read [brick <replaceable>BRICK-NAME</replaceable>] [list-cnt <replaceable>cnt</replaceable>] </command></para>
- <para>For example, to view highest Read calls on brick server:/export of test-volume:
-</para>
- <para><command># gluster volume top <replaceable>test-volume</replaceable> read brick <replaceable>server:/export</replaceable> list-cnt <replaceable>10</replaceable></command></para>
- <para><computeroutput>Brick:</computeroutput> <replaceable>server:/export/dir1</replaceable><programlisting> ==========Read file stats========
-
-read filename
-call count
-
-116 /clients/client0/~dmtmp/SEED/LARGE.FIL
-
-64 /clients/client0/~dmtmp/SEED/MEDIUM.FIL
-
-54 /clients/client2/~dmtmp/SEED/LARGE.FIL
-
-54 /clients/client6/~dmtmp/SEED/LARGE.FIL
-
-54 /clients/client5/~dmtmp/SEED/LARGE.FIL
-
-54 /clients/client0/~dmtmp/SEED/LARGE.FIL
-
-54 /clients/client3/~dmtmp/SEED/LARGE.FIL
-
-54 /clients/client4/~dmtmp/SEED/LARGE.FIL
-
-54 /clients/client9/~dmtmp/SEED/LARGE.FIL
-
-54 /clients/client8/~dmtmp/SEED/LARGE.FIL</programlisting> </para>
- </listitem>
- </itemizedlist>
- </section>
- <section id="chap-Administration_Guide-Monitor_Workload-Top-File_Write">
- <title>Viewing Highest File Write Calls </title>
- <para>You can view list of files which has highest file write calls on each brick. If brick name is not
-specified, then by default, list of 100 files will be displayed.
-</para>
- <para><emphasis role="bold">To view highest file Write calls:</emphasis>
-</para>
- <itemizedlist>
- <listitem>
- <para>View highest file Write calls using the following command:
-</para>
- <para><command># gluster volume top <replaceable>VOLNAME</replaceable> write [brick <replaceable>BRICK-NAME</replaceable>] [list-cnt <replaceable>cnt</replaceable>] </command></para>
- <para>For example, to view highest Write calls on brick <replaceable>server:/export</replaceable> of <replaceable>test-volume</replaceable>:
-</para>
- <para><command># gluster volume top <replaceable>test-volume</replaceable> write brick <replaceable>server:/export</replaceable> list-cnt <replaceable>10</replaceable></command></para>
- <para><code>Brick: server:/export/dir1 </code><programlisting> ==========Write file stats========
-write call count filename
-
-83 /clients/client0/~dmtmp/SEED/LARGE.FIL
-
-59 /clients/client7/~dmtmp/SEED/LARGE.FIL
-
-59 /clients/client1/~dmtmp/SEED/LARGE.FIL
-
-59 /clients/client2/~dmtmp/SEED/LARGE.FIL
-
-59 /clients/client0/~dmtmp/SEED/LARGE.FIL
-
-59 /clients/client8/~dmtmp/SEED/LARGE.FIL
-
-59 /clients/client5/~dmtmp/SEED/LARGE.FIL
-
-59 /clients/client4/~dmtmp/SEED/LARGE.FIL
-
-59 /clients/client6/~dmtmp/SEED/LARGE.FIL
-
-59 /clients/client3/~dmtmp/SEED/LARGE.FIL</programlisting></para>
- </listitem>
- </itemizedlist>
- </section>
- <section id="chap-Administration_Guide-Monitor_Workload-Top-Open_Calls">
- <title>Viewing Highest Open Calls on Directories </title>
- <para>You can view list of files which has highest open calls on directories of each brick. If brick name is
-not specified, then the metrics of all the bricks belonging to that volume will be displayed.
-</para>
- <para>To view list of open calls on each directory</para>
- <itemizedlist>
- <listitem>
- <para>View list of open calls on each directory using the following command:
-</para>
- <para><command># gluster volume top <replaceable>VOLNAME</replaceable> opendir [brick <replaceable>BRICK-NAME</replaceable>] [list-cnt <replaceable>cnt</replaceable>] </command></para>
- <para>For example, to view open calls on brick server:/export/ of test-volume:
-</para>
- <para><command># gluster volume top <replaceable>test-volume</replaceable> opendir brick <replaceable>server:/export</replaceable> list-cnt <replaceable>10</replaceable></command></para>
- <para><code>Brick: server:/export/dir1 </code><programlisting> ==========Directory open stats========
-
-Opendir count directory name
-
-1001 /clients/client0/~dmtmp
-
-454 /clients/client8/~dmtmp
-
-454 /clients/client2/~dmtmp
-
-454 /clients/client6/~dmtmp
-
-454 /clients/client5/~dmtmp
-
-454 /clients/client9/~dmtmp
-
-443 /clients/client0/~dmtmp/PARADOX
-
-408 /clients/client1/~dmtmp
-
-408 /clients/client7/~dmtmp
-
-402 /clients/client4/~dmtmp</programlisting></para>
- </listitem>
- </itemizedlist>
- </section>
- <section id="chap-Administration_Guide-Monitor_Workload-Top-Read_Calls">
- <title>Viewing Highest Read Calls on Directory </title>
- <para>You can view list of files which has highest directory read calls on each brick. If brick name is not
-specified, then the metrics of all the bricks belonging to that volume will be displayed.
-</para>
- <para><emphasis role="bold">To view list of highest directory read calls on each brick</emphasis>
-</para>
- <itemizedlist>
- <listitem>
- <para>View list of highest directory read calls on each brick using the following command:
-</para>
- <para><command># gluster volume top <replaceable>VOLNAME</replaceable> readdir [brick <replaceable>BRICK-NAME</replaceable>] [list-cnt <replaceable>cnt</replaceable>] </command></para>
- <para>For example, to view highest directory read calls on brick <replaceable>server:/export</replaceable> of <replaceable>test-volume</replaceable>:</para>
- <para><command># gluster volume top <replaceable>test-volume</replaceable> readdir brick <replaceable>server:/export</replaceable> list-cnt <replaceable>10</replaceable></command> </para>
- <para><code>Brick: <replaceable>server:/export/dir1</replaceable></code><programlisting>==========Directory readdirp stats========
-
-readdirp count directory name
-
-1996 /clients/client0/~dmtmp
-
-1083 /clients/client0/~dmtmp/PARADOX
-
-904 /clients/client8/~dmtmp
-
-904 /clients/client2/~dmtmp
-
-904 /clients/client6/~dmtmp
-
-904 /clients/client5/~dmtmp
-
-904 /clients/client9/~dmtmp
-
-812 /clients/client1/~dmtmp
-
-812 /clients/client7/~dmtmp
-
-800 /clients/client4/~dmtmp</programlisting>
-</para>
- </listitem>
- </itemizedlist>
- </section>
- <section id="chap-Administration_Guide-Monitor_Workload-Top-Read_Perf">
- <title>Viewing List of Read Performance on each Brick </title>
- <para>You can view the read throughput of files on each brick. If brick name is not specified, then the
-metrics of all the bricks belonging to that volume will be displayed. The output will be the read
-throughput.
-</para>
- <para><programlisting> ==========Read throughput file stats========
-
-read filename Time
-through
-put(MBp
-s)
-
-2570.00 /clients/client0/~dmtmp/PWRPNT/ -2011-01-31
- TRIDOTS.POT 15:38:36.894610
-2570.00 /clients/client0/~dmtmp/PWRPNT/ -2011-01-31
- PCBENCHM.PPT 15:38:39.815310
-2383.00 /clients/client2/~dmtmp/SEED/ -2011-01-31
- MEDIUM.FIL 15:52:53.631499
-
-2340.00 /clients/client0/~dmtmp/SEED/ -2011-01-31
- MEDIUM.FIL 15:38:36.926198
-
-2299.00 /clients/client0/~dmtmp/SEED/ -2011-01-31
- LARGE.FIL 15:38:36.930445
-
-2259.00 /clients/client0/~dmtmp/PARADOX/ -2011-01-31
- COURSES.X04 15:38:40.549919
-
-2221.00 /clients/client0/~dmtmp/PARADOX/ -2011-01-31
- STUDENTS.VAL 15:52:53.298766
-
-2221.00 /clients/client3/~dmtmp/SEED/ -2011-01-31
- COURSES.DB 15:39:11.776780
-
-2184.00 /clients/client3/~dmtmp/SEED/ -2011-01-31
- MEDIUM.FIL 15:39:10.251764
-
-2184.00 /clients/client5/~dmtmp/WORD/ -2011-01-31
- BASEMACH.DOC 15:39:09.336572 </programlisting>This command will initiate a dd for the specified count and block size and measures the
-corresponding throughput.
-</para>
- <para><emphasis role="bold">To view list of read performance on each brick</emphasis>
-</para>
- <itemizedlist>
- <listitem>
- <para>View list of read performance on each brick using the following command:
-</para>
- <para><command># gluster volume top <replaceable>VOLNAME</replaceable> read-perf [bs <replaceable>blk-size</replaceable> count <replaceable>count</replaceable>] [brick <replaceable>BRICK-NAME</replaceable>] [list-cnt <replaceable>cnt</replaceable>]</command>
-</para>
- <para>For example, to view read performance on brick server:/export/ of test-volume, 256 block size
-of count 1, and list count 10:
-</para>
- <para><command># gluster volume top <replaceable>test-volume</replaceable> read-perf bs 256 count 1 brick <replaceable>server:/export/ </replaceable>list-cnt <replaceable>10</replaceable></command></para>
- <para><computeroutput>Brick: server:/export/dir1 256 bytes (256 B) copied, Throughput: 4.1 MB/s </computeroutput></para>
- <programlisting> ==========Read throughput file stats========
-
-read filename Time
-through
-put(MBp
-s)
-
-2912.00 /clients/client0/~dmtmp/PWRPNT/ -2011-01-31
- TRIDOTS.POT 15:38:36.896486
-
-2570.00 /clients/client0/~dmtmp/PWRPNT/ -2011-01-31
- PCBENCHM.PPT 15:38:39.815310
-
-2383.00 /clients/client2/~dmtmp/SEED/ -2011-01-31
- MEDIUM.FIL 15:52:53.631499
-
-2340.00 /clients/client0/~dmtmp/SEED/ -2011-01-31
- MEDIUM.FIL 15:38:36.926198
-
-2299.00 /clients/client0/~dmtmp/SEED/ -2011-01-31
- LARGE.FIL 15:38:36.930445
-
-2259.00 /clients/client0/~dmtmp/PARADOX/ -2011-01-31
- COURSES.X04 15:38:40.549919
-
-2221.00 /clients/client9/~dmtmp/PARADOX/ -2011-01-31
- STUDENTS.VAL 15:52:53.298766
-
-2221.00 /clients/client8/~dmtmp/PARADOX/ -2011-01-31
- COURSES.DB 15:39:11.776780
-
-2184.00 /clients/client3/~dmtmp/SEED/ -2011-01-31
- MEDIUM.FIL 15:39:10.251764
-
-2184.00 /clients/client5/~dmtmp/WORD/ -2011-01-31
- BASEMACH.DOC 15:39:09.336572
- </programlisting>
- </listitem>
- </itemizedlist>
- </section>
- <section id="chap-Administration_Guide-Monitor_Workload-Top-Write_Perf">
- <title>Viewing List of Write Performance on each Brick </title>
- <para>You can view list of write throughput of files on each brick. If brick name is not specified, then the
-metrics of all the bricks belonging to that volume will be displayed. The output will be the write
-throughput.
-</para>
- <para>This command will initiate a dd for the specified count and block size and measures the
-corresponding throughput.
-To view list of write performance on each brick:
-</para>
- <itemizedlist>
- <listitem>
- <para>View list of write performance on each brick using the following command:
-</para>
- <para><command># gluster volume top <replaceable>VOLNAME</replaceable> write-perf [bs <replaceable>blk-size</replaceable> count <replaceable>count</replaceable>] [brick <replaceable>BRICK-NAME</replaceable>] [list-cnt <replaceable>cnt</replaceable>] </command></para>
- <para>For example, to view write performance on brick <replaceable>server:/export/</replaceable> of <replaceable>test-volume</replaceable>, 256 block size
-of count 1, and list count 10:
-</para>
- <para><command># gluster volume top <replaceable>test-volume</replaceable> write-perf bs 256 count 1 brick <replaceable>server:/export/ </replaceable>list-cnt <replaceable>10</replaceable></command></para>
- <para><code>Brick</code>: <replaceable>server:/export/dir1</replaceable>
-</para>
- <para><code>256 bytes (256 B) copied, Throughput: 2.8 MB/s </code><programlisting> ==========Write throughput file stats========
-
-write filename Time
-throughput
-(MBps)
-
-1170.00 /clients/client0/~dmtmp/SEED/ -2011-01-31
- SMALL.FIL 15:39:09.171494
-
-1008.00 /clients/client6/~dmtmp/SEED/ -2011-01-31
- LARGE.FIL 15:39:09.73189
-
-949.00 /clients/client0/~dmtmp/SEED/ -2011-01-31
- MEDIUM.FIL 15:38:36.927426
-
-936.00 /clients/client0/~dmtmp/SEED/ -2011-01-31
- LARGE.FIL 15:38:36.933177
-897.00 /clients/client5/~dmtmp/SEED/ -2011-01-31
- MEDIUM.FIL 15:39:09.33628
-
-897.00 /clients/client6/~dmtmp/SEED/ -2011-01-31
- MEDIUM.FIL 15:39:09.27713
-
-885.00 /clients/client0/~dmtmp/SEED/ -2011-01-31
- SMALL.FIL 15:38:36.924271
-
-528.00 /clients/client5/~dmtmp/SEED/ -2011-01-31
- LARGE.FIL 15:39:09.81893
-
-516.00 /clients/client6/~dmtmp/ACCESS/ -2011-01-31
- FASTENER.MDB 15:39:01.797317
-</programlisting></para>
- </listitem>
- </itemizedlist>
- </section>
- </section>
- <section id="sect-Administration_Guide-Monitor_Workload-Displaying_Volume_Information">
- <title>Displaying Volume Information </title>
- <para>You can display information about a specific volume, or all volumes, as needed.</para>
- <para><emphasis role="bold">To display volume information </emphasis></para>
- <itemizedlist>
- <listitem>
- <para>Display information about a specific volume using the following command:</para>
- <para><command># gluster volume info </command><varname>VOLNAME</varname></para>
- <para>For example, to display information about test-volume:</para>
- <para><programlisting># gluster volume info test-volume
-Volume Name: test-volume
-Type: Distribute
-Status: Created
-Number of Bricks: 4
-Bricks:
-Brick1: server1:/exp1
-Brick2: server2:/exp2
-Brick3: server3:/exp3
-Brick4: server4:/exp4</programlisting></para>
- </listitem>
- <listitem>
- <para>Display information about all volumes using the following command:</para>
- <para><command># gluster volume info all</command></para>
- <para><programlisting># gluster volume info all
-
-Volume Name: test-volume
-Type: Distribute
-Status: Created
-Number of Bricks: 4
-Bricks:
-Brick1: server1:/exp1
-Brick2: server2:/exp2
-Brick3: server3:/exp3
-Brick4: server4:/exp4
-
-Volume Name: mirror
-Type: Distributed-Replicate
-Status: Started
-Number of Bricks: 2 X 2 = 4
-Bricks:
-Brick1: server1:/brick1
-Brick2: server2:/brick2
-Brick3: server3:/brick3
-Brick4: server4:/brick4
-
-Volume Name: Vol
-Type: Distribute
-Status: Started
-Number of Bricks: 1
-Bricks:
-Brick: server:/brick6
-
-</programlisting></para>
- </listitem>
- </itemizedlist>
- </section>
- <section id="sect-Administration_Guide-Monitor_Workload-Performing_Statedump">
- <title>Performing Statedump on a Volume </title>
- <para>Statedump is a mechanism through which you can get details of all internal variables and state of the glusterfs process at the time of issuing the command.You can perform statedumps of the brick processes and nfs server process of a volume using the statedump command. The following options can be used to determine what information is to be dumped:</para>
- <itemizedlist>
- <listitem>
- <para><emphasis role="bold">mem</emphasis> - Dumps the memory usage and memory pool details of the bricks.</para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">iobuf</emphasis> - Dumps iobuf details of the bricks.</para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">priv</emphasis> - Dumps private information of loaded translators.</para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">callpool</emphasis> - Dumps the pending calls of the volume.</para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">fd</emphasis> - Dumps the open fd tables of the volume.</para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">inode</emphasis> - Dumps the inode tables of the volume.</para>
- </listitem>
- </itemizedlist>
- <para><emphasis role="bold">To display volume statedump </emphasis></para>
- <itemizedlist>
- <listitem>
- <para>Display statedump of a volume or NFS server using the following command:</para>
- <para> <command># gluster volume statedump <replaceable>VOLNAME</replaceable> [nfs] [all|mem|iobuf|callpool|priv|fd|inode]</command></para>
- <para>For example, to display statedump of test-volume:</para>
- <para><programlisting># gluster volume statedump test-volume
-Volume statedump successful</programlisting></para>
- <para>The statedump files are created on the brick servers in the<filename> /tmp</filename> directory or in the directory set using <command>server.statedump-path</command> volume option. The naming convention of the dump file is <filename>&lt;brick-path&gt;.&lt;brick-pid&gt;.dump</filename>.</para>
- </listitem>
- <listitem>
- <para>By defult, the output of the statedump is stored at <filename> /tmp/&lt;brickname.PID.dump&gt;</filename> file on that particular server. Change the directory of the statedump file using the following command:</para>
- <para><command># gluster volume set <replaceable>VOLNAME</replaceable> server.statedump-path <replaceable>path</replaceable></command></para>
- <para>For example, to change the location of the statedump file of test-volume:</para>
- <para><programlisting># gluster volume set test-volume server.statedump-path /usr/local/var/log/glusterfs/dumps/
-Set volume successful</programlisting></para>
- <para>You can view the changed path of the statedump file using the following command:</para>
- <para><command># gluster volume info <replaceable>VOLNAME</replaceable></command></para>
- </listitem>
- </itemizedlist>
- </section>
- <section id="sect-Administration_Guide-Monitor_Workload-Displaying_Volume_Status">
- <title>Displaying Volume Status </title>
- <para>You can display the status information about a specific volume, brick or all volumes, as needed. Status information can be used to understand the current status of the brick, nfs processes, and overall file system. Status information can also be used to monitor and debug the volume information. You can view status of the volume along with the following details:</para>
- <itemizedlist>
- <listitem>
- <para><emphasis role="bold">detail</emphasis> - Displays additional information about the bricks.</para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">clients</emphasis> - Displays the list of clients connected to the volume.</para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">mem</emphasis> - Displays the memory usage and memory pool details of the bricks.</para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">inode</emphasis> - Displays the inode tables of the volume.</para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">fd</emphasis> - Displays the open fd (file descriptors) tables of the volume.</para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">callpool</emphasis> - Displays the pending calls of the volume.</para>
- </listitem>
- </itemizedlist>
- <para><emphasis role="bold">To display volume status </emphasis></para>
- <itemizedlist>
- <listitem>
- <para>Display information about a specific volume using the following command:</para>
- <para><command># gluster volume status [all|<replaceable>VOLNAME</replaceable> [<replaceable>BRICKNAME</replaceable>]] [detail|clients|mem|inode|fd|callpool]</command> </para>
- <para>For example, to display information about test-volume:</para>
- <para><programlisting># gluster volume status test-volume
-STATUS OF VOLUME: test-volume
-BRICK PORT ONLINE PID
---------------------------------------------------------
-arch:/export/1 24009 Y 22445
---------------------------------------------------------
-arch:/export/2 24010 Y 22450</programlisting></para>
- </listitem>
- <listitem>
- <para>Display information about all volumes using the following command:</para>
- <para><command># gluster volume status all</command>
-</para>
- <para><programlisting># gluster volume status all
-STATUS OF VOLUME: volume-test
-BRICK PORT ONLINE PID
---------------------------------------------------------
-arch:/export/4 24010 Y 22455
-
-STATUS OF VOLUME: test-volume
-BRICK PORT ONLINE PID
---------------------------------------------------------
-arch:/export/1 24009 Y 22445
---------------------------------------------------------
-arch:/export/2 24010 Y 22450</programlisting></para>
- </listitem>
- <listitem>
- <para>Display additional information about the bricks using the following command:</para>
- <para><command># gluster volume status <replaceable>VOLNAME</replaceable> detail</command>
-</para>
- <para>For example, to display additional information about the bricks of test-volume:</para>
- <para><programlisting># gluster volume status test-volume details
-STATUS OF VOLUME: test-volume
--------------------------------------------
-Brick : arch:/export/1
-Port : 24009
-Online : Y
-Pid : 16977
-File System : rootfs
-Device : rootfs
-Mount Options : rw
-Disk Space Free : 13.8GB
-Total Disk Space : 46.5GB
-Inode Size : N/A
-Inode Count : N/A
-Free Inodes : N/A
-
-Number of Bricks: 1
-Bricks:
-Brick: server:/brick6</programlisting></para>
- </listitem>
- <listitem>
- <para>Display the list of clients accessing the volumes using the following command:</para>
- <para><command># gluster volume status <replaceable>VOLNAME</replaceable> clients</command>
-</para>
- <para>For example, to display the list of clients connected to test-volume:</para>
- <para><programlisting># gluster volume status test-volume clients
-Brick : arch:/export/1
-Clients connected : 2
-Hostname Bytes Read BytesWritten
--------- --------- ------------
-127.0.0.1:1013 776 676
-127.0.0.1:1012 50440 51200</programlisting></para>
- </listitem>
- <listitem>
- <para>Display the memory usage and memory pool details of the bricks using the following command:</para>
- <para><command># gluster volume status <replaceable>VOLNAME</replaceable> mem</command>
-</para>
- <para>For example, to display the memory usage and memory pool details of the bricks of test-volume:</para>
- <screen>Memory status for volume : test-volume
-----------------------------------------------
-Brick : arch:/export/1
-Mallinfo
---------
-Arena : 434176
-Ordblks : 2
-Smblks : 0
-Hblks : 12
-Hblkhd : 40861696
-Usmblks : 0
-Fsmblks : 0
-Uordblks : 332416
-Fordblks : 101760
-Keepcost : 100400
-
-Mempool Stats
--------------
-Name HotCount ColdCount PaddedSizeof AllocCount MaxAlloc
----- -------- --------- ------------ ---------- --------
-test-volume-server:fd_t 0 16384 92 57 5
-test-volume-server:dentry_t 59 965 84 59 59
-test-volume-server:inode_t 60 964 148 60 60
-test-volume-server:rpcsvc_request_t 0 525 6372 351 2
-glusterfs:struct saved_frame 0 4096 124 2 2
-glusterfs:struct rpc_req 0 4096 2236 2 2
-glusterfs:rpcsvc_request_t 1 524 6372 2 1
-glusterfs:call_stub_t 0 1024 1220 288 1
-glusterfs:call_stack_t 0 8192 2084 290 2
-glusterfs:call_frame_t 0 16384 172 1728 6</screen>
- </listitem>
- <listitem>
- <para>Display the inode tables of the volume using the following command:</para>
- <para><command># gluster volume status <replaceable>VOLNAME</replaceable> inode</command>
-</para>
- <para>For example, to display the inode tables of the test-volume:</para>
- <para><programlisting># gluster volume status test-volume inode
-inode tables for volume test-volume
-----------------------------------------------
-Brick : arch:/export/1
-Active inodes:
-GFID Lookups Ref IA type
----- ------- --- -------
-6f3fe173-e07a-4209-abb6-484091d75499 1 9 2
-370d35d7-657e-44dc-bac4-d6dd800ec3d3 1 1 2
-
-LRU inodes:
-GFID Lookups Ref IA type
----- ------- --- -------
-80f98abe-cdcf-4c1d-b917-ae564cf55763 1 0 1
-3a58973d-d549-4ea6-9977-9aa218f233de 1 0 1
-2ce0197d-87a9-451b-9094-9baa38121155 1 0 2</programlisting></para>
- </listitem>
- <listitem>
- <para>Display the open fd tables of the volume using the following command:</para>
- <para><command># gluster volume status <replaceable>VOLNAME</replaceable> fd</command>
-</para>
- <para>For example, to display the open fd tables of the test-volume:</para>
- <para><screen># gluster volume status test-volume fd
-
-FD tables for volume test-volume
-----------------------------------------------
-Brick : arch:/export/1
-Connection 1:
-RefCount = 0 MaxFDs = 128 FirstFree = 4
-FD Entry PID RefCount Flags
--------- --- -------- -----
-0 26311 1 2
-1 26310 3 2
-2 26310 1 2
-3 26311 3 2
-
-Connection 2:
-RefCount = 0 MaxFDs = 128 FirstFree = 0
-No open fds
-
-Connection 3:
-RefCount = 0 MaxFDs = 128 FirstFree = 0
-No open fds</screen></para>
- </listitem>
- <listitem>
- <para>Display the pending calls of the volume using the following command:</para>
- <para><command># gluster volume status <replaceable>VOLNAME</replaceable> callpool</command>
-</para>
- <para>Each call has a call stack containing call frames.</para>
- <para>For example, to display the pending calls of test-volume:</para>
- <para><programlisting># gluster volume status test-volume
-
-Pending calls for volume test-volume
-----------------------------------------------
-Brick : arch:/export/1
-Pending calls: 2
-Call Stack1
- UID : 0
- GID : 0
- PID : 26338
- Unique : 192138
- Frames : 7
- Frame 1
- Ref Count = 1
- Translator = test-volume-server
- Completed = No
- Frame 2
- Ref Count = 0
- Translator = test-volume-posix
- Completed = No
- Parent = test-volume-access-control
- Wind From = default_fsync
- Wind To = FIRST_CHILD(this)-&gt;fops-&gt;fsync
- Frame 3
- Ref Count = 1
- Translator = test-volume-access-control
- Completed = No
- Parent = repl-locks
- Wind From = default_fsync
- Wind To = FIRST_CHILD(this)-&gt;fops-&gt;fsync
- Frame 4
- Ref Count = 1
- Translator = test-volume-locks
- Completed = No
- Parent = test-volume-io-threads
- Wind From = iot_fsync_wrapper
- Wind To = FIRST_CHILD (this)-&gt;fops-&gt;fsync
- Frame 5
- Ref Count = 1
- Translator = test-volume-io-threads
- Completed = No
- Parent = test-volume-marker
- Wind From = default_fsync
- Wind To = FIRST_CHILD(this)-&gt;fops-&gt;fsync
- Frame 6
- Ref Count = 1
- Translator = test-volume-marker
- Completed = No
- Parent = /export/1
- Wind From = io_stats_fsync
- Wind To = FIRST_CHILD(this)-&gt;fops-&gt;fsync
- Frame 7
- Ref Count = 1
- Translator = /export/1
- Completed = No
- Parent = test-volume-server
- Wind From = server_fsync_resume
- Wind To = bound_xl-&gt;fops-&gt;fsync</programlisting></para>
- </listitem>
- </itemizedlist>
- </section>
-</chapter>
diff --git a/doc/admin-guide/en-US/admin_setting_volumes.xml b/doc/admin-guide/en-US/admin_setting_volumes.xml
deleted file mode 100644
index 051fb723a04..00000000000
--- a/doc/admin-guide/en-US/admin_setting_volumes.xml
+++ /dev/null
@@ -1,325 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
-<!ENTITY % BOOK_ENTITIES SYSTEM "Administration_Guide.ent">
-%BOOK_ENTITIES;
-]>
-<chapter id="chap-Administration_Guide-Setting_Volumes">
- <title>Setting up GlusterFS Server Volumes</title>
- <para>A volume is a logical collection of bricks where each brick is an export directory on a server in the trusted storage pool. Most of the gluster management operations are performed on the volume. </para>
- <para>To create a new volume in your storage environment, specify the bricks that comprise the volume. After you have created a new volume, you must start it before attempting to mount it. </para>
- <itemizedlist>
- <listitem>
- <para>Volumes of the following types can be created in your storage environment: </para>
- <itemizedlist>
- <listitem>
- <para>Distributed - Distributed volumes distributes files throughout the bricks in the volume. You can use distributed volumes where the requirement is to scale storage and the redundancy is either not important or is provided by other hardware/software layers. For more information, see <xref linkend="sect-Administration_Guide-Setting_Volumes-Distributed"/> .</para>
- </listitem>
- <listitem>
- <para>Replicated – Replicated volumes replicates files across bricks in the volume. You can use replicated volumes in environments where high-availability and high-reliability are critical. For more information, see <xref linkend="sect-Administration_Guide-Setting_Volumes-Replicated"/>.</para>
- </listitem>
- <listitem>
- <para>Striped – Striped volumes stripes data across bricks in the volume. For best results, you should use striped volumes only in high concurrency environments accessing very large files. For more information, see <xref linkend="sect-Administration_Guide-Setting_Volumes-Striped"/>.</para>
- </listitem>
- <listitem>
- <para>Distributed Striped - Distributed striped volumes stripe data across two or more nodes in the cluster. You should use distributed striped volumes where the requirement is to scale storage and in high concurrency environments accessing very large files is critical. For more information, see <xref linkend="sect-Administration_Guide-Setting_Volumes-Distributed_Striped"/>.</para>
- </listitem>
- <listitem>
- <para>Distributed Replicated - Distributed replicated volumes distributes files across replicated bricks in the volume. You can use distributed replicated volumes in environments where the requirement is to scale storage and high-reliability is critical. Distributed replicated volumes also offer improved read performance in most environments. For more information, see <xref linkend="sect-Administration_Guide-Setting_Volumes-Distributed_Replicated"/>. </para>
- </listitem>
- <listitem>
- <para>Distributed Striped Replicated – Distributed striped replicated volumes distributes striped data across replicated bricks in the cluster. For best results, you should use distributed striped replicated volumes in highly concurrent environments where parallel access of very large files and performance is critical. In this release, configuration of this volume type is supported only for Map Reduce workloads. For more information, see <xref linkend="sect-Administration_Guide-Setting_Volumes-Distributed_Striped_Replicated"/>.
-</para>
- </listitem>
- <listitem>
- <para>Striped Replicated – Striped replicated volumes stripes data across replicated bricks in the cluster. For best results, you should use striped replicated volumes in highly concurrent environments where there is parallel access of very large files and performance is critical. In this release, configuration of this volume type is supported only for Map Reduce workloads. For more
-information, see <xref linkend="sect-Administration_Guide-Setting_Volumes-Striped_Replicated"/>.</para>
- </listitem>
- </itemizedlist>
- </listitem>
- </itemizedlist>
- <para><emphasis role="bold">To create a new volume </emphasis></para>
- <itemizedlist>
- <listitem>
- <para>Create a new volume :</para>
- <para><command># gluster volume create<replaceable> NEW-VOLNAME</replaceable> [stripe <replaceable>COUNT</replaceable> | replica <replaceable>COUNT</replaceable>] [transport [tcp | rdma | tcp,rdma]] <replaceable>NEW-BRICK1 NEW-BRICK2 NEW-BRICK3...</replaceable></command></para>
- <para>For example, to create a volume called test-volume consisting of server3:/exp3 and server4:/exp4:</para>
- <para><programlisting># gluster volume create test-volume server3:/exp3 server4:/exp4
-Creation of test-volume has been successful
-Please start the volume to access data.</programlisting></para>
- </listitem>
- </itemizedlist>
- <section id="sect-Administration_Guide-Setting_Volumes-Distributed">
- <title>Creating Distributed Volumes</title>
- <para>In a distributed volumes files are spread randomly across the bricks in the volume. Use distributed volumes where you need to scale storage and redundancy is either not important or is provided by other hardware/software layers. </para>
- <para><note>
- <para>Disk/server failure in distributed volumes can result in a serious loss of data because directory contents are spread randomly across the bricks in the volume. </para>
- </note></para>
- <figure>
- <title>Illustration of a Distributed Volume</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/Distributed_Volume.png"/>
- </imageobject>
- </mediaobject>
- </figure>
- <para><emphasis role="bold">To create a distributed volume</emphasis></para>
- <orderedlist>
- <listitem>
- <para>Create a trusted storage pool as described earlier in <xref linkend="sect-Administration_Guide-Storage_Pools-Adding_Servers"/>.</para>
- </listitem>
- <listitem>
- <para>Create the distributed volume:</para>
- <para><command># gluster volume create <replaceable>NEW-VOLNAME</replaceable> [transport [tcp | rdma | tcp,rdma]] <replaceable>NEW-BRICK...</replaceable></command></para>
- <para>For example, to create a distributed volume with four storage servers using tcp:</para>
- <para><programlisting># gluster volume create test-volume server1:/exp1 server2:/exp2 server3:/exp3 server4:/exp4
-Creation of test-volume has been successful
-Please start the volume to access data.</programlisting></para>
- <para>(Optional) You can display the volume information:</para>
- <para><programlisting># gluster volume info
-Volume Name: test-volume
-Type: Distribute
-Status: Created
-Number of Bricks: 4
-Transport-type: tcp
-Bricks:
-Brick1: server1:/exp1
-Brick2: server2:/exp2
-Brick3: server3:/exp3
-Brick4: server4:/exp4</programlisting></para>
- <para>For example, to create a distributed volume with four storage servers over InfiniBand:</para>
- <para><programlisting># gluster volume create test-volume transport rdma server1:/exp1 server2:/exp2 server3:/exp3 server4:/exp4
-Creation of test-volume has been successful
-Please start the volume to access data.</programlisting></para>
- <para>If the transport type is not specified, <emphasis role="italic"> tcp</emphasis> is used as the default. You can also set additional options if required, such as auth.allow or auth.reject. For more information, see <xref linkend="sect-Administration_Guide-Managing_Volumes-Tuning"/></para>
- <para><note>
- <para>Make sure you start your volumes before you try to mount them or else client operations after the mount will hang, see <xref linkend="sect-Administration_Guide-Setting_Volumes-Starting"/> for details. </para>
- </note></para>
- </listitem>
- </orderedlist>
- </section>
- <section id="sect-Administration_Guide-Setting_Volumes-Replicated">
- <title>Creating Replicated Volumes </title>
- <para>Replicated volumes create copies of files across multiple bricks in the volume. You can use replicated volumes in environments where high-availability and high-reliability are critical. </para>
- <para><note>
- <para>The number of bricks should be equal to of the replica count for a replicated volume.
-To protect against server and disk failures, it is recommended that the bricks of the volume are from different servers. </para>
- </note></para>
- <figure>
- <title>Illustration of a Replicated Volume</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/Replicated_Volume.png"/>
- </imageobject>
- </mediaobject>
- </figure>
- <para><emphasis role="bold">To create a replicated volume </emphasis></para>
- <orderedlist>
- <listitem>
- <para>Create a trusted storage pool as described earlier in <xref linkend="sect-Administration_Guide-Storage_Pools-Adding_Servers"/>.</para>
- </listitem>
- <listitem>
- <para>Create the replicated volume:</para>
- <para><command># gluster volume create <replaceable>NEW-VOLNAME</replaceable> [replica <replaceable>COUNT</replaceable>] [transport [tcp | rdma | tcp,rdma]] <replaceable>NEW-BRICK...</replaceable></command></para>
- <para>For example, to create a replicated volume with two storage servers:</para>
- <para><programlisting># gluster volume create test-volume replica 2 transport tcp server1:/exp1 server2:/exp2
-Creation of test-volume has been successful
-Please start the volume to access data.</programlisting></para>
- <para>If the transport type is not specified, <emphasis role="italic"> tcp</emphasis> is used as the default. You can also set additional options if required, such as auth.allow or auth.reject. For more information, see <xref linkend="sect-Administration_Guide-Managing_Volumes-Tuning"/></para>
- <para><note>
- <para>Make sure you start your volumes before you try to mount them or else client operations after the mount will hang, see <xref linkend="sect-Administration_Guide-Setting_Volumes-Starting"/> for details. </para>
- </note></para>
- </listitem>
- </orderedlist>
- </section>
- <section id="sect-Administration_Guide-Setting_Volumes-Striped">
- <title>Creating Striped Volumes</title>
- <para>Striped volumes stripes data across bricks in the volume. For best results, you should use striped volumes only in high concurrency environments accessing very large files.</para>
- <para><note>
- <para>The number of bricks should be a equal to the stripe count for a striped volume. </para>
- </note></para>
- <figure>
- <title>Illustration of a Striped Volume</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/Striped_Volume.png"/>
- </imageobject>
- </mediaobject>
- </figure>
- <para><emphasis role="bold">To create a striped volume </emphasis></para>
- <orderedlist>
- <listitem>
- <para>Create a trusted storage pool as described earlier in <xref linkend="sect-Administration_Guide-Storage_Pools-Adding_Servers"/>.</para>
- </listitem>
- <listitem>
- <para>Create the striped volume:</para>
- <para><command># gluster volume create <replaceable>NEW-VOLNAME</replaceable> [stripe <replaceable>COUNT</replaceable>] [transport [tcp | rdma | tcp,rdma]] <replaceable>NEW-BRICK...</replaceable></command></para>
- <para>For example, to create a striped volume across two storage servers:</para>
- <para><programlisting># gluster volume create test-volume stripe 2 transport tcp server1:/exp1 server2:/exp2
-Creation of test-volume has been successful
-Please start the volume to access data.</programlisting></para>
- <para>If the transport type is not specified, <emphasis role="italic"> tcp</emphasis> is used as the default. You can also set additional options if required, such as auth.allow or auth.reject. For more information, see <xref linkend="sect-Administration_Guide-Managing_Volumes-Tuning"/></para>
- <para><note>
- <para>Make sure you start your volumes before you try to mount them or else client operations after the mount will hang, see <xref linkend="sect-Administration_Guide-Setting_Volumes-Starting"/> for details. </para>
- </note></para>
- </listitem>
- </orderedlist>
- </section>
- <section id="sect-Administration_Guide-Setting_Volumes-Distributed_Striped">
- <title>Creating Distributed Striped Volumes </title>
- <para>Distributed striped volumes stripes files across two or more nodes in the cluster. For best results, you should use distributed striped volumes where the requirement is to scale storage and in high concurrency environments accessing very large files is critical.</para>
- <para><note>
- <para>The number of bricks should be a multiple of the stripe count for a distributed striped volume. </para>
- </note></para>
- <figure>
- <title>Illustration of a Distributed Striped Volume</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/Distributed_Striped_Volume.png"/>
- </imageobject>
- </mediaobject>
- </figure>
- <para><emphasis role="bold">To create a distributed striped volume </emphasis></para>
- <orderedlist>
- <listitem>
- <para>Create a trusted storage pool as described earlier in <xref linkend="sect-Administration_Guide-Storage_Pools-Adding_Servers"/>.</para>
- </listitem>
- <listitem>
- <para>Create the distributed striped volume:</para>
- <para><command># gluster volume create <replaceable>NEW-VOLNAME</replaceable> [stripe <replaceable>COUNT</replaceable>] [transport [tcp | rdma | tcp,rdma]] <replaceable>NEW-BRICK...</replaceable></command></para>
- <para>For example, to create a distributed striped volume across eight storage servers:</para>
- <para><programlisting># gluster volume create test-volume stripe 4 transport tcp server1:/exp1 server2:/exp2 server3:/exp3 server4:/exp4 server5:/exp5 server6:/exp6 server7:/exp7 server8:/exp8
-Creation of test-volume has been successful
-Please start the volume to access data.</programlisting></para>
- <para>If the transport type is not specified, <emphasis role="italic"> tcp</emphasis> is used as the default. You can also set additional options if required, such as auth.allow or auth.reject. For more information, see <xref linkend="sect-Administration_Guide-Managing_Volumes-Tuning"/></para>
- <para><note>
- <para>Make sure you start your volumes before you try to mount them or else client operations after the mount will hang, see <xref linkend="sect-Administration_Guide-Setting_Volumes-Starting"/> for details. </para>
- </note></para>
- </listitem>
- </orderedlist>
- </section>
- <section id="sect-Administration_Guide-Setting_Volumes-Distributed_Replicated">
- <title>Creating Distributed Replicated Volumes </title>
- <para>Distributes files across replicated bricks in the volume. You can use distributed replicated volumes in environments where the requirement is to scale storage and high-reliability is critical. Distributed replicated volumes also offer improved read performance in most environments.</para>
- <para><note>
- <para>The number of bricks should be a multiple of the replica count for a distributed replicated volume. Also, the order in which bricks are specified has a great effect on data protection. Each replica_count consecutive bricks in the list you give will form a replica set, with all replica sets combined into a volume-wide distribute set. To make sure that replica-set members are not placed on the same node, list the first brick on every server, then the second brick on every server in the same order, and so on. </para>
- </note></para>
- <figure>
- <title>Illustration of a Distributed Replicated Volume</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/Distributed_Replicated_Volume.png"/>
- </imageobject>
- </mediaobject>
- </figure>
- <para><emphasis role="bold">To create a distributed replicated volume </emphasis></para>
- <orderedlist>
- <listitem>
- <para>Create a trusted storage pool as described earlier in <xref linkend="sect-Administration_Guide-Storage_Pools-Adding_Servers"/>.</para>
- </listitem>
- <listitem>
- <para>Create the distributed replicated volume:</para>
- <para><command># gluster volume create <replaceable>NEW-VOLNAME</replaceable> [replica <replaceable>COUNT</replaceable>] [transport [tcp | rdma | tcp,rdma]] <replaceable>NEW-BRICK...</replaceable></command></para>
- <para>For example, four node distributed (replicated) volume with a two-way mirror:
-</para>
- <para><programlisting># gluster volume create test-volume replica 2 transport tcp server1:/exp1 server2:/exp2 server3:/exp3 server4:/exp4
-Creation of test-volume has been successful
-Please start the volume to access data.</programlisting></para>
- <para>For example, to create a six node distributed (replicated) volume with a two-way mirror:</para>
- <para><programlisting># gluster volume create test-volume replica 2 transport tcp server1:/exp1 server2:/exp2 server3:/exp3 server4:/exp4 server5:/exp5 server6:/exp6
-Creation of test-volume has been successful
-Please start the volume to access data.</programlisting></para>
- <para>If the transport type is not specified, <emphasis role="italic"> tcp</emphasis> is used as the default. You can also set additional options if required, such as auth.allow or auth.reject. For more information, see <xref linkend="sect-Administration_Guide-Managing_Volumes-Tuning"/></para>
- <para><note>
- <para>Make sure you start your volumes before you try to mount them or else client operations after the mount will hang, see <xref linkend="sect-Administration_Guide-Setting_Volumes-Starting"/> for details. </para>
- </note></para>
- </listitem>
- </orderedlist>
- </section>
- <section id="sect-Administration_Guide-Setting_Volumes-Distributed_Striped_Replicated">
- <title>Creating Distributed Striped Replicated Volumes </title>
- <para>Distributed striped replicated volumes distributes striped data across replicated bricks in the cluster. For best results, you should use distributed striped replicated volumes in highly concurrent environments where parallel access of very large files and performance is critical. In this release, configuration of this volume type is supported only for Map Reduce workloads. </para>
- <para><note>
- <para>The number of bricks should be a multiples of number of stripe count and replica count for
-a distributed striped replicated volume.
- </para>
- </note></para>
- <para><emphasis role="bold">To create a distributed striped replicated volume</emphasis>
-</para>
- <orderedlist>
- <listitem>
- <para>Create a trusted storage pool as described earlier in <xref linkend="sect-Administration_Guide-Storage_Pools-Adding_Servers"/>.</para>
- </listitem>
- <listitem>
- <para>Create a distributed striped replicated volume using the following command:</para>
- <para><command># gluster volume create <replaceable>NEW-VOLNAME</replaceable> [stripe <replaceable>COUNT</replaceable>] [replica <replaceable>COUNT</replaceable>] [transport [tcp | rdma | tcp,rdma]] <replaceable>NEW-BRICK...</replaceable></command></para>
- <para>For example, to create a distributed replicated striped volume across eight storage servers:
-</para>
- <para><programlisting># gluster volume create test-volume stripe 2 replica 2 transport tcp server1:/exp1 server2:/exp2 server3:/exp3 server4:/exp4 server5:/exp5 server6:/exp6 server7:/exp7 server8:/exp8
-Creation of test-volume has been successful
-Please start the volume to access data.</programlisting></para>
- <para>If the transport type is not specified, <emphasis role="italic"> tcp</emphasis> is used as the default. You can also set additional options if required, such as auth.allow or auth.reject. For more information, see <xref linkend="sect-Administration_Guide-Managing_Volumes-Tuning"/></para>
- <para><note>
- <para>Make sure you start your volumes before you try to mount them or else client operations after the mount will hang, see <xref linkend="sect-Administration_Guide-Setting_Volumes-Starting"/> for details. </para>
- </note></para>
- </listitem>
- </orderedlist>
- </section>
- <section id="sect-Administration_Guide-Setting_Volumes-Striped_Replicated">
- <title>Creating Striped Replicated Volumes </title>
- <para>Striped replicated volumes stripes data across replicated bricks in the cluster. For best results, you should use striped replicated volumes in highly concurrent environments where there is parallel access of very large files and performance is critical. In this release, configuration of this volume type is supported only for Map Reduce workloads.</para>
- <para><note>
- <para>The number of bricks should be a multiple of the replicate count and stripe count for a
-striped replicated volume.
-</para>
- </note></para>
- <figure>
- <title>Illustration of a Striped Replicated Volume</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/Striped_Replicated_Volume.png"/>
- </imageobject>
- </mediaobject>
- </figure>
- <para><emphasis role="bold">To create a striped replicated volume</emphasis>
-</para>
- <orderedlist>
- <listitem>
- <para>Create a trusted storage pool consisting of the storage servers that will comprise the volume.</para>
- <para>For more information, see <xref linkend="sect-Administration_Guide-Storage_Pools-Adding_Servers"/>.</para>
- </listitem>
- <listitem>
- <para>Create a striped replicated volume :</para>
- <para><command># gluster volume create <replaceable>NEW-VOLNAME</replaceable> [stripe <replaceable>COUNT</replaceable>] [replica <replaceable>COUNT</replaceable>] [transport [tcp | rdma | tcp,rdma]] <replaceable>NEW-BRICK...</replaceable></command></para>
- <para>For example, to create a striped replicated volume across four storage servers:
-
-</para>
- <para><programlisting># gluster volume create test-volume stripe 2 replica 2 transport tcp server1:/exp1 server2:/exp2 server3:/exp3 server4:/exp4
-Creation of test-volume has been successful
-Please start the volume to access data.</programlisting></para>
- <para>To create a striped replicated volume across six storage servers:
-</para>
- <para><programlisting># gluster volume create test-volume stripe 3 replica 2 transport tcp server1:/exp1 server2:/exp2 server3:/exp3 server4:/exp4 server5:/exp5 server6:/exp6
-Creation of test-volume has been successful
-Please start the volume to access data.</programlisting></para>
- <para>If the transport type is not specified, <emphasis role="italic"> tcp</emphasis> is used as the default. You can also set additional options if required, such as auth.allow or auth.reject. For more information, see <xref linkend="sect-Administration_Guide-Managing_Volumes-Tuning"/></para>
- <para><note>
- <para>Make sure you start your volumes before you try to mount them or else client operations after the mount will hang, see <xref linkend="sect-Administration_Guide-Setting_Volumes-Starting"/> for details. </para>
- </note></para>
- </listitem>
- </orderedlist>
- </section>
- <section id="sect-Administration_Guide-Setting_Volumes-Starting">
- <title>Starting Volumes </title>
- <para>You must start your volumes before you try to mount them. </para>
- <para><emphasis role="bold">To start a volume </emphasis></para>
- <itemizedlist>
- <listitem>
- <para>Start a volume:</para>
- <para><command># gluster volume start <replaceable>VOLNAME</replaceable></command></para>
- <para>For example, to start test-volume:</para>
- <para><programlisting># gluster volume start test-volume
-Starting test-volume has been successful</programlisting></para>
- </listitem>
- </itemizedlist>
- </section>
-</chapter>
diff --git a/doc/admin-guide/en-US/admin_settingup_clients.xml b/doc/admin-guide/en-US/admin_settingup_clients.xml
deleted file mode 100644
index 233aac4389c..00000000000
--- a/doc/admin-guide/en-US/admin_settingup_clients.xml
+++ /dev/null
@@ -1,411 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
-<!ENTITY % BOOK_ENTITIES SYSTEM "Administration_Guide.ent">
-%BOOK_ENTITIES;
-]>
-<chapter id="chap-Administration_Guide-GlusterFS_Client">
- <title>Accessing Data - Setting Up GlusterFS Client</title>
- <para>Gluster volumes can be accessed in multiple ways. One can use Gluster Native Client method for high concurrency, performance and transparent failover in GNU/Linux clients. Gluster exports volumes using NFS v3 protocol too.</para>
- <para>CIFS can also be used to access volumes by exporting the Gluster Native mount point as a samba export.</para>
- <section id="sect-Administration_Guide-GlusterFS_Client-Native">
- <title>Gluster Native Client</title>
- <para>The Gluster Native Client is a FUSE-based client running in user space. Gluster Native Client is the recommended method for accessing volumes if all the clustered features of GlusterFS has to be utilized.</para>
- <para>This section introduces the Gluster Native Client and explains how to install the software on client machines. This section also describes how to mount volumes on clients (both manually and automatically). </para>
- <section>
- <title>Installing the Gluster Native Client</title>
- <para>Gluster Native Client has a dependancy on FUSE module. To make sure FUSE module is loaded, execute below commands: </para>
- <orderedlist>
- <listitem>
- <para>Add the FUSE loadable kernel module (LKM) to the Linux kernel:</para>
- <para><command># modprobe fuse</command></para>
- </listitem>
- <listitem>
- <para>Verify that the FUSE module is loaded:</para>
- <para><command># dmesg | grep -i fuse </command></para>
- <para><command>fuse init (API version 7.13)</command></para>
- </listitem>
- </orderedlist>
- <section id="sect-Administration_Guide-GlusterFS_Client-Native-RPM">
- <title>Installing on RPM Based Distributions </title>
- <para>To install Gluster Native Client on RPM distribution-based systems</para>
- <orderedlist>
- <listitem>
- <para>Install required prerequisites on the client using the following command:</para>
- <para><command>$ sudo yum -y install fuse fuse-libs</command></para>
- </listitem>
- <listitem>
- <para>Download the latest glusterfs, glusterfs-fuse RPM files on each client. The glusterfs package contains the GlusterFS Binary and required libraries. The glusterfs-fuse package contains the FUSE plugin (in GlusterFS terms, its called Translator) required for mounting.</para>
- <para><note><para>Install 'glusterfs-rdma' RPM if RDMA support is required. 'glusterfs-rdma' contains RDMA transport module for Infiniband interconnect.</para></note></para>
- <para>You can download the software at <ulink url="http://bits.gluster.com/gluster/glusterfs/3.3.0/x86_64/"/>.</para>
- </listitem>
- <listitem>
- <para>Install Gluster Native Client on the client.</para>
- <para><command>$ sudo rpm -i glusterfs-3.3.0-1.x86_64.rpm </command></para>
- <para><command>$ sudo rpm -i glusterfs-fuse-3.3.0-1.x86_64.rpm </command></para>
- <para><command>$ sudo rpm -i glusterfs-rdma-3.3.0-1.x86_64.rpm</command></para>
- </listitem>
- </orderedlist>
- </section>
- <section condition="gfs">
- <title>Installing on Debian-based Distributions</title>
- <para>To install Gluster Native Client on Debian-based distributions</para>
- <orderedlist>
- <listitem>
- <para>Download the latest GlusterFS .deb file.</para>
- <para>You can download the software at <ulink url="http://www.gluster.org/download/"/>.</para>
- </listitem>
- <listitem>
- <para>Uninstall GlusterFS v3.1.x/v3.2.x (or an earlier version) from the client using the following command:
-</para>
- <para><command>$ sudo dpkg -r glusterfs </command></para>
- <para>(Optional) Run <command>$ sudo dpkg -purge glusterfs </command>to purge the configuration files.</para>
- </listitem>
- <listitem>
- <para>Install Gluster Native Client on the client using the following command:
-</para>
- <para><command>$ sudo dpkg -i glusterfs-$version.deb </command></para>
- <para>For example:
-</para>
- <para><command>$ sudo dpkg -i glusterfs-3.3.0.deb </command></para>
- </listitem>
- </orderedlist>
- </section>
- <section>
- <title>Performing a Source Installation</title>
- <para>To build and install Gluster Native Client from the source code</para>
- <orderedlist>
- <listitem>
- <para>Create a new directory using the following commands:</para>
- <para><command># mkdir glusterfs </command></para>
- <para><command># cd glusterfs</command></para>
- </listitem>
- <listitem>
- <para>Download the source code.</para>
- <para>You can download the source at <ulink url="http://www.gluster.org/download/"/>.</para>
- </listitem>
- <listitem>
- <para>Extract the source code using the following command:
-</para>
- <para><command># tar -xvzf glusterfs-3.3.0.tar.gz </command></para>
- </listitem>
- <listitem>
- <para>Run the configuration utility using the following command:
-</para>
- <para><code># ./configure </code></para>
- <para><code>...</code></para>
- <para><code>GlusterFS configure summary </code></para>
- <para><code>=========================== </code></para>
- <para><code>FUSE client : yes </code></para>
- <para><code>Infiniband verbs : yes </code></para>
- <para><code>epoll IO multiplex : yes </code></para>
- <para><code>argp-standalone : no </code></para>
- <para><code>fusermount : no </code></para>
- <para><code>readline : yes</code></para>
- <para><note><para>The configuration summary shown above is sample, it can vary depending on other packages.</para></note></para>
- </listitem>
- <listitem>
- <para>Build the Gluster Native Client software using the following commands:
-</para>
- <para><command># make </command></para>
- <para><command># make install</command></para>
- </listitem>
- <listitem>
- <para>Verify that the correct version of Gluster Native Client is installed, using the following command:
-</para>
- <para><command># glusterfs –-version</command></para>
- </listitem>
- </orderedlist>
- </section>
- </section>
- <section id="sect-Administration_Guide-GlusterFS_Client-Mounting_Volumes">
- <title>Mounting Volumes</title>
- <para>After installing the Gluster Native Client, you need to mount Gluster volumes to access data. There are two methods you can choose: </para>
- <itemizedlist>
- <listitem>
- <para><xref linkend="sect-Administration_Guide-GlusterFS_Client-Manuall"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="sect-Administration_Guide-GlusterFS_Client-Automatic"/></para>
- </listitem>
- </itemizedlist>
- <para>After mounting a volume, you can test the mounted volume using the procedure described in <xref linkend="sect-Administration_Guide-GlusterFS_Client-Testing"/>. </para>
- <para><note>
- <para>Server names selected during creation of Volumes should be resolvable in the client machine. You can use appropriate /etc/hosts entries or DNS server to resolve server names to IP addresses. </para>
- </note></para>
- <section id="sect-Administration_Guide-GlusterFS_Client-Manuall">
- <title>Manually Mounting Volumes</title>
- <para>To manually mount a Gluster volume </para>
- <itemizedlist>
- <listitem>
- <para>To mount a volume, use the following command:
-</para>
- <para><command># mount -t glusterfs HOSTNAME-OR-IPADDRESS:/VOLNAME MOUNTDIR</command>
-</para>
- <para>For example:
-</para>
- <para><command># mount -t glusterfs server1:/test-volume /mnt/glusterfs</command></para>
- <note>
- <para>The server specified in the mount command is only used to fetch the gluster configuration volfile describing the volume name. Subsequently, the client will communicate directly with the servers mentioned in the volfile (which might not even include the one used for mount). </para>
- </note>
- </listitem>
- </itemizedlist>
- </section>
- <section id="sect-Administration_Guide-GlusterFS_Client-Automatic" dir="lro">
- <title>Automatically Mounting Volumes</title>
- <para><emphasis role="bold">To automatically mount a Gluster volume</emphasis></para>
- <itemizedlist>
- <listitem>
- <para>To mount a volume, edit the /etc/fstab file and add the following line:
-</para>
- <para><command>HOSTNAME-OR-IPADDRESS:/VOLNAME MOUNTDIR glusterfs defaults,_netdev 0 0 </command></para>
- <para>For example:
-</para>
- <para><code>server1:/test-volume /mnt/glusterfs glusterfs defaults,_netdev 0 0</code></para>
- </listitem>
- </itemizedlist>
- <para><emphasis role="bold">Mounting Options</emphasis></para>
- <para>You can specify the following options when using the <command>mount -t glusterfs</command> command. Note that you need to separate all options with commas.</para>
- <para>backupvolfile-server=server-name</para>
- <para>fetch-attempts=N (where N is number of attempts)</para>
- <para>log-level=loglevel</para>
- <para>log-file=logfile</para>
- <para>direct-io-mode=[enable|disable]</para>
- <para>ro (for readonly mounts)</para>
- <para>acl (for enabling posix-ACLs)</para>
- <para>worm (making the mount WORM - Write Once, Read Many type)</para>
- <para>selinux (enable selinux on GlusterFS mount</para>
- <para></para>
- <para>For example: </para>
- <para><code># mount -t glusterfs -o backupvolfile-server=volfile_server2,fetch-attempts=2,log-level=WARNING,log-file=/var/log/gluster.log server1:/test-volume /mnt/glusterfs</code></para>
- <para>Using /etc/fstab, options would look like below:</para>
- <para><code>HOSTNAME-OR-IPADDRESS:/VOLNAME MOUNTDIR glusterfs defaults,_netdev,log-level=WARNING,log-file=/var/log/gluster.log 0 0 </code></para>
- <para>If <option>backupvolfile-server</option> option is added while mounting fuse client, when the first
- volfile server fails, then the server specified in <option>backupvolfile-server</option> option is used as volfile server to mount the client.</para>
- <para>In <code>fetch-attempts=N</code> option, specify the number of attempts to fetch volume files while mounting a volume. This option will be useful when round-robin DNS is configured for the server-name. </para>
- </section>
- </section>
- </section>
-
- <section id="sect-Administration_Guide-GlusterFS_Client-NFS">
- <title>NFS</title>
- <para>You can use NFS v3 to access to gluster volumes.</para>
- <para>GlusterFS 3.3.0, now includes network lock manager (NLM) v4 feature too. NLM enables applications on NFSv3 clients to do record locking on files. NLM program is started automatically with the NFS server process.</para>
- <para>This section describes how to use NFS to mount Gluster volumes (both manually and automatically). </para>
-
- <section>
- <title>Using NFS to Mount Volumes</title>
- <para>You can use either of the following methods to mount Gluster volumes: </para>
- <para><itemizedlist>
- <listitem>
- <para><xref linkend="sect-Administration_Guide-GlusterFS_Client-NFS-Manual"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="sect-Administration_Guide-GlusterFS_Client-NFS-Automatic"/></para>
- </listitem>
- </itemizedlist></para>
- <para>After mounting a volume, you can test the mounted volume using the procedure described in <xref linkend="sect-Administration_Guide-GlusterFS_Client-Testing"/>. </para>
-
- <section id="sect-Administration_Guide-GlusterFS_Client-NFS-Manual">
- <title>Manually Mounting Volumes Using NFS </title>
- <para>To manually mount a Gluster volume using NFS </para>
- <itemizedlist>
- <listitem>
- <para>To mount a volume, use the following command:
-</para>
- <para><command># mount -t nfs -o vers=3 HOSTNAME-OR-IPADDRESS:/VOLNAME MOUNTDIR</command>
-</para>
- <para>For example:</para>
- <para><command># mount -t nfs -o vers=3 server1:/test-volume /mnt/glusterfs</command></para>
- <para><note>
- <para> Gluster NFS server does not support UDP. If the NFS client you are using defaults to connecting using UDP, the following message appears: </para>
- <para><code>requested NFS version or transport protocol is not supported</code>. </para>
- </note></para>
- <para><emphasis role="bold">To connect using TCP</emphasis></para>
- </listitem>
- <listitem>
- <para>Add the following option to the mount command:
-</para>
- <para><command>-o mountproto=tcp </command></para>
- <para>For example:
-</para>
- <para><command># mount -o mountproto=tcp,vers=3 -t nfs server1:/test-volume /mnt/glusterfs</command></para>
- </listitem>
- </itemizedlist>
- <para><emphasis role="bold">To mount Gluster NFS server from a Solaris client </emphasis></para>
- <itemizedlist>
- <listitem>
- <para>Use the following command:
-</para>
- <para><command># mount -o proto=tcp,vers=3 nfs://HOSTNAME-OR-IPADDRESS:38467/VOLNAME MOUNTDIR</command></para>
- <para>
-For example:</para>
- <para><command> # mount -o proto=tcp,vers=3 nfs://server1:38467/test-volume /mnt/glusterfs</command></para>
- </listitem>
- </itemizedlist>
- </section>
-
- <section id="sect-Administration_Guide-GlusterFS_Client-NFS-Automatic">
- <title>Automatically Mounting Volumes Using NFS</title>
- <para>You can configure your system to automatically mount Gluster volumes using NFS each time the system starts.</para>
- <para><emphasis role="bold">To automatically mount a Gluster volume using NFS </emphasis></para>
- <itemizedlist>
- <listitem>
- <para>To mount a volume, edit the /etc/fstab file and add the following line:</para>
- <para><command>HOSTNAME-OR-IPADDRESS:/VOLNAME MOUNTDIR nfs defaults,_netdev,vers=3 0 0</command></para>
- <para>For example,</para>
- <para><command>server1:/test-volume /mnt/glusterfs nfs defaults,_netdev,vers=3 0 0</command></para>
- <para>If default transport to mount NFS is UDP, use below line in fstab</para>
- <para><command>server1:/test-volume /mnt/glusterfs nfs defaults,_netdev,mountproto=tcp 0 0</command></para>
- </listitem>
- </itemizedlist>
- <para><emphasis role="bold">To automount NFS mounts</emphasis></para>
- <para>Gluster supports *nix standard method of automounting NFS mounts. Update the /etc/auto.master and /etc/auto.misc and restart the autofs service. After that, whenever a user or process attempts to access the directory it will be mounted in the background. </para>
- </section>
-
- </section>
-
- </section>
-
- <section id="sect-Administration_Guide-GlusterFS_Client-CIFS">
- <title>CIFS</title>
- <para>You can use CIFS to access to volumes when using Microsoft Windows as well as SAMBA clients. For this access method, Samba packages need to be present on the client side. You can export glusterfs mount point as the samba export, and then mount it using CIFS protocol.</para>
- <para>This section describes how to mount CIFS shares on Microsoft Windows-based clients (both manually and automatically) and how to verify that the volume has mounted successfully.</para>
- <para><note>
- <para> CIFS access using the Mac OS X Finder is not supported, however, you can use the Mac OS X command line to access Gluster volumes using CIFS.</para>
- </note></para>
-
- <section>
- <title>Using CIFS to Mount Volumes</title>
- <para>You can use either of the following methods to mount Gluster volumes: </para>
- <itemizedlist>
- <listitem>
- <para><xref linkend="sect-Administration_Guide-GlusterFS_Client-CIFS-Manual"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="sect-Administration_Guide-GlusterFS_Client-CIFS-Automatic"/></para>
- </listitem>
- </itemizedlist>
- <para>After mounting a volume, you can test the mounted volume using the procedure described in <xref linkend="sect-Administration_Guide-GlusterFS_Client-Testing"/>.</para>
- <para>You can also use Samba for exporting Gluster Volumes through CIFS protocol.</para>
-
- <section>
- <title>Exporting Gluster Volumes Through Samba</title>
- <para>We recommend you to use Samba for exporting Gluster volumes through the CIFS protocol. </para>
- <para><emphasis role="bold">To export volumes through CIFS protocol </emphasis></para>
- <orderedlist>
- <listitem>
- <para>Mount a Gluster volume. For more information on mounting volumes, see <xref linkend="sect-Administration_Guide-GlusterFS_Client-Mounting_Volumes"/>.</para>
- </listitem>
- <listitem>
- <para>Setup Samba configuration to export the mount point of the Gluster volume.</para>
- <para>For example, if a Gluster volume is mounted on /mnt/gluster, you must edit smb.conf file to enable exporting this through CIFS. Open smb.conf file in an editor and add the following lines for a simple configuration:</para>
- <para>[glustertest]
- </para>
- <para> comment = For testing a Gluster volume exported through CIFS
- </para>
- <para> path = /mnt/glusterfs
- </para>
- <para> read only = no
- </para>
- <para> guest ok = yes</para>
- </listitem>
- </orderedlist>
- <para>Save the changes and start the smb service using your systems init scripts (/etc/init.d/smb [re]start).</para>
- <para><note>
- <para>To be able mount from any server in the trusted storage pool, you must repeat these steps on each Gluster node. For more advanced configurations, see Samba documentation. </para>
- </note></para>
- </section>
-
- <section id="sect-Administration_Guide-GlusterFS_Client-CIFS-Manual">
- <title>Manually Mounting Volumes Using CIFS </title>
- <para>You can manually mount Gluster volumes using CIFS on Microsoft Windows-based client machines. </para>
- <para><emphasis role="bold">To manually mount a Gluster volume using CIFS </emphasis></para>
- <orderedlist>
- <listitem>
- <para>Using Windows Explorer, choose <emphasis role="bold">Tools &gt; Map Network Drive…</emphasis> from the menu. The <emphasis role="bold">Map Network Drive </emphasis>window appears. </para>
- </listitem>
- <listitem>
- <para>Choose the drive letter using the <emphasis role="bold">Drive</emphasis> drop-down list. </para>
- </listitem>
- <listitem>
- <para>Click <emphasis role="bold">Browse</emphasis>, select the volume to map to the network drive, and click <emphasis role="bold">OK</emphasis>. </para>
- </listitem>
- <listitem>
- <para>Click <emphasis role="bold">Finish.</emphasis></para>
- </listitem>
- </orderedlist>
- <para>The network drive (mapped to the volume) appears in the Computer window.</para>
- <para><emphasis role="bold">Alternatively, to manually mount a Gluster volume using CIFS.</emphasis></para>
- <itemizedlist>
- <listitem>
- <para>Click <emphasis role="bold">Start &gt; Run</emphasis> and enter the following:</para>
- <para><command>
- <code>\\SERVERNAME\VOLNAME</code>
- </command></para>
- <para>For example:</para>
- <para><command>
- <code>\\server1\test-volume</code>
- </command></para>
- </listitem>
- </itemizedlist>
- </section>
-
- <section id="sect-Administration_Guide-GlusterFS_Client-CIFS-Automatic">
- <title>Automatically Mounting Volumes Using CIFS</title>
- <para>You can configure your system to automatically mount Gluster volumes using CIFS on Microsoft Windows-based clients each time the system starts.</para>
- <para><emphasis role="bold">To automatically mount a Gluster volume using CIFS</emphasis></para>
- <para>The network drive (mapped to the volume) appears in the Computer window and is reconnected each time the system starts.</para>
- <orderedlist>
- <listitem>
- <para>Using Windows Explorer, choose <emphasis role="bold">Tools &gt; Map Network Drive…</emphasis> from the menu. The <emphasis role="bold">Map Network Drive </emphasis>window appears. </para>
- </listitem>
- <listitem>
- <para>Choose the drive letter using the <emphasis role="bold">Drive</emphasis> drop-down list. </para>
- </listitem>
- <listitem>
- <para>Click <emphasis role="bold">Browse</emphasis>, select the volume to map to the network drive, and click <emphasis role="bold">OK</emphasis>. </para>
- </listitem>
- <listitem>
- <para>Click the <emphasis role="bold">Reconnect</emphasis> at logon checkbox.</para>
- </listitem>
- <listitem>
- <para>Click <emphasis role="bold">Finish.</emphasis></para>
- </listitem>
- </orderedlist>
- </section>
-
- </section>
- </section>
- <section id="sect-Administration_Guide-GlusterFS_Client-Testing">
- <title>Testing Mounted Volumes</title>
- <para>To test mounted volumes</para>
- <itemizedlist>
- <listitem>
- <para>Use the following command:</para>
- <para><command># mount </command></para>
- <para>If the gluster volume was successfully mounted, the output of the mount command on the client will be similar to this example:</para>
- <para><code>server1:/test-volume on /mnt/glusterfs type fuse.glusterfs (rw,allow_other,default_permissions,max_read=131072</code></para>
- </listitem>
- </itemizedlist>
- <itemizedlist>
- <listitem>
- <para>Use the following command:</para>
- <para><command># df -h </command></para>
- <para>The output of df command on the client will display the aggregated storage space from all the bricks in a volume similar to this example:</para>
- <para><code># df -h /mnt/glusterfs</code></para>
- <para><code>Filesystem Size Used Avail Use% Mounted on </code></para>
- <para><code>server1:/test-volume 28T 22T 5.4T 82% /mnt/glusterfs</code></para>
- </listitem>
- <listitem>
- <para>Change to the directory and list the contents by entering the following:</para>
- <para><command># cd MOUNTDIR </command></para>
- <para><command># ls</command></para>
- </listitem>
- <listitem>
- <para>For example,</para>
- <para><code># cd /mnt/glusterfs </code></para>
- <para><code># ls</code></para>
- </listitem>
- </itemizedlist>
- </section>
-</chapter>
diff --git a/doc/admin-guide/en-US/admin_start_stop_daemon.xml b/doc/admin-guide/en-US/admin_start_stop_daemon.xml
deleted file mode 100644
index bdab0b8b608..00000000000
--- a/doc/admin-guide/en-US/admin_start_stop_daemon.xml
+++ /dev/null
@@ -1,56 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
-<!ENTITY % BOOK_ENTITIES SYSTEM "Administration_Guide.ent">
-%BOOK_ENTITIES;
-]>
-<chapter id="chap-Administration_Guide-Start_Stop_Daemon">
- <title id="chap-Administration_Guide-Stop_Start_Daemon">Managing the glusterd Service</title>
- <para>After installing GlusterFS, you must start glusterd service. The glusterd service serves as the Gluster elastic volume manager, overseeing glusterfs processes, and co-ordinating dynamic volume operations, such as adding and removing volumes across multiple storage servers non-disruptively.</para>
- <para>This section describes how to start the glusterd service in the following ways: </para>
- <itemizedlist>
- <listitem>
- <para><xref linkend="sect-Administration_Guide-Start_Stop_Daemon-Manually"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="sect-Administration_Guide-Start_Stop_Daemon-Automatically"/></para>
- </listitem>
- </itemizedlist>
- <note>
- <para>You must start glusterd on all GlusterFS servers.</para>
- </note>
- <section id="sect-Administration_Guide-Start_Stop_Daemon-Manually">
- <title>Starting and Stopping glusterd Manually</title>
- <para>This section describes how to start and stop glusterd manually</para>
- <itemizedlist>
- <listitem>
- <para>To start glusterd manually, enter the following command:</para>
- <para><command># /etc/init.d/glusterd start </command></para>
- </listitem>
- <listitem>
- <para>To stop glusterd manually, enter the following command: </para>
- <para><command># /etc/init.d/glusterd stop</command></para>
- </listitem>
- </itemizedlist>
- </section>
- <section id="sect-Administration_Guide-Start_Stop_Daemon-Automatically">
- <title>Starting glusterd Automatically</title>
- <para condition="gfs">This section describes how to configure the system to automatically start the glusterd service every time the system boots. </para>
- <para condition="appliance">To automatically start the glusterd service every time the system boots, enter the following from the command line: </para>
- <para condition="appliance"><command># chkconfig glusterd on </command></para>
- <section condition="gfs">
- <title condition="gfs">Red Hat-based Systems</title>
- <para>To configure Red Hat-based systems to automatically start the glusterd service every time the system boots, enter the following from the command line: </para>
- <para><command># chkconfig glusterd on </command></para>
- </section>
- <section condition="gfs">
- <title condition="gfs">Debian-based Systems</title>
- <para>To configure Debian-based systems to automatically start the glusterd service every time the system boots, enter the following from the command line:</para>
- <para><command># update-rc.d glusterd defaults</command></para>
- </section>
- <section condition="gfs">
- <title condition="gfs">Systems Other than Red Hat and Debain</title>
- <para>To configure systems other than Red Hat or Debian to automatically start the glusterd service every time the system boots, enter the following entry to the<emphasis role="italic"> /etc/rc.local</emphasis> file: </para>
- <para><command># echo &quot;glusterd&quot; &gt;&gt; /etc/rc.local </command></para>
- </section>
- </section>
-</chapter>
diff --git a/doc/admin-guide/en-US/admin_storage_pools.xml b/doc/admin-guide/en-US/admin_storage_pools.xml
deleted file mode 100644
index 2c4a5cabe1d..00000000000
--- a/doc/admin-guide/en-US/admin_storage_pools.xml
+++ /dev/null
@@ -1,57 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "docbookV4.5/docbookx.dtd" []>
-<chapter id="chap-Administration_Guide-Storage-pool">
- <title>Setting up Trusted Storage Pools</title>
- <para>Before you can configure a GlusterFS volume, you must create a trusted storage pool consisting of the storage servers that provides bricks to a volume. </para>
- <para>A storage pool is a trusted network of storage servers. When you start the first server, the storage pool consists of that server alone. To add additional storage servers to the storage pool, you can use the probe command from a storage server that is already part of the trusted storage pool. </para>
- <para><note>
- <para>Do not self-probe the first server/localhost.</para>
- </note></para>
- <para>The glusterd service must be running on all storage servers that you want to add to the storage pool. See <xref linkend="chap-Administration_Guide-Start_Stop_Daemon"/> for more information.</para>
- <section id="sect-Administration_Guide-Storage_Pools-Adding_Servers">
- <title>Adding Servers to Trusted Storage Pool</title>
- <para>To create a trusted storage pool, add servers to the trusted storage pool</para>
- <orderedlist>
- <listitem>
- <para>The hostnames used to create the storage pool must be resolvable by DNS. Also make sure that firewall is not blocking the probe requests/replies. (iptables -F)</para>
- <para>To add a server to the storage pool:</para>
- <para><command># gluster peer probe <replaceable>server</replaceable></command></para>
- <para>For example, to create a trusted storage pool of four servers, add three servers to the storage pool from server1:</para>
- <para><programlisting># gluster peer probe server2
-Probe successful
-
-# gluster peer probe server3
-Probe successful
-
-# gluster peer probe server4
-Probe successful
-</programlisting></para>
- </listitem>
- <listitem>
- <para>Verify the peer status from the first server using the following commands:</para>
- <para><programlisting># gluster peer status
-Number of Peers: 3
-
-Hostname: server2
-Uuid: 5e987bda-16dd-43c2-835b-08b7d55e94e5
-State: Peer in Cluster (Connected)
-
-Hostname: server3
-Uuid: 1e0ca3aa-9ef7-4f66-8f15-cbc348f29ff7
-State: Peer in Cluster (Connected)
-
-Hostname: server4
-Uuid: 3e0caba-9df7-4f66-8e5d-cbc348f29ff7
-State: Peer in Cluster (Connected)</programlisting></para>
- </listitem>
- </orderedlist>
- </section>
- <section>
- <title>Removing Servers from the Trusted Storage Pool</title>
- <para>To remove a server from the storage pool:</para>
- <para><command># gluster peer detach<replaceable> server</replaceable></command></para>
- <para> For example, to remove server4 from the trusted storage pool:</para>
- <para><programlisting># gluster peer detach server4
-Detach successful</programlisting></para>
- </section>
-</chapter>
diff --git a/doc/admin-guide/en-US/admin_troubleshooting.xml b/doc/admin-guide/en-US/admin_troubleshooting.xml
deleted file mode 100644
index 1c6866d8ba7..00000000000
--- a/doc/admin-guide/en-US/admin_troubleshooting.xml
+++ /dev/null
@@ -1,508 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "docbookV4.5/docbookx.dtd" []>
-<chapter id="chap-Administration_Guide-Troubleshooting">
- <title>Troubleshooting GlusterFS </title>
- <para>This section describes how to manage GlusterFS logs and most common troubleshooting scenarios
-related to GlusterFS.
-</para>
- <section>
- <title>Managing GlusterFS Logs </title>
- <para>This section describes how to manage GlusterFS logs by performing the following operation:
-
-</para>
- <itemizedlist>
- <listitem>
- <para>Rotating Logs
-</para>
- </listitem>
- </itemizedlist>
- <section>
- <title>Rotating Logs </title>
- <para>Administrators can rotate the log file in a volume, as needed.
-</para>
- <para><emphasis role="bold">To rotate a log file </emphasis></para>
- <itemizedlist>
- <listitem>
- <para>Rotate the log file using the following command:
-</para>
- <para><command># gluster volume log rotate <replaceable>VOLNAME</replaceable></command></para>
- <para>For example, to rotate the log file on test-volume:
-</para>
- <programlisting># gluster volume log rotate test-volume
-log rotate successful
-</programlisting>
- <note>
- <para>When a log file is rotated, the contents of the current log file are moved to log-file-
-name.epoch-time-stamp.
-</para>
- </note>
- </listitem>
- </itemizedlist>
- </section>
- </section>
- <section>
- <title>Troubleshooting Geo-replication </title>
- <para>This section describes the most common troubleshooting scenarios related to GlusterFS Geo-replication.
-</para>
- <section>
- <title>Locating Log Files </title>
- <para>For every Geo-replication session, the following three log files are associated to it (four, if the slave is a
-gluster volume):
-</para>
- <itemizedlist>
- <listitem>
- <para>Master-log-file - log file for the process which monitors the Master volume
-</para>
- </listitem>
- <listitem>
- <para>Slave-log-file - log file for process which initiates the changes in slave
-</para>
- </listitem>
- <listitem>
- <para>Master-gluster-log-file - log file for the maintenance mount point that Geo-replication module
-uses to monitor the master volume
-</para>
- </listitem>
- <listitem>
- <para>Slave-gluster-log-file - is the slave&apos;s counterpart of it
-</para>
- </listitem>
- </itemizedlist>
- <para><emphasis role="bold">Master Log File</emphasis>
-</para>
- <para>To get the Master-log-file for geo-replication, use the following command:
-</para>
- <para><command>gluster volume geo-replication <code>MASTER SLAVE</code> config log-file</command>
-</para>
- <para>For example:
-</para>
- <para><command># gluster volume geo-replication Volume1 example.com:/data/remote_dir config log-file </command></para>
- <para><emphasis role="bold">Slave Log File </emphasis></para>
- <para>To get the log file for Geo-replication on slave (glusterd must be running on slave machine), use the
-following commands:
-</para>
- <orderedlist>
- <listitem>
- <para>On master, run the following command:
-</para>
- <para><command># gluster volume geo-replication Volume1 example.com:/data/remote_dir config session-owner 5f6e5200-756f-11e0-a1f0-0800200c9a66 </command></para>
- <para>Displays the session owner details.
-</para>
- </listitem>
- <listitem>
- <para>On slave, run the following command:
-</para>
- <para><command># gluster volume geo-replication /data/remote_dir config log-file /var/log/gluster/${session-owner}:remote-mirror.log </command></para>
- </listitem>
- <listitem>
- <para>Replace the session owner details (output of Step 1) to the output of the Step 2 to get the
-location of the log file.
-</para>
- <para><command>/var/log/gluster/5f6e5200-756f-11e0-a1f0-0800200c9a66:remote-mirror.log</command>
-</para>
- </listitem>
- </orderedlist>
- </section>
- <section>
- <title>Rotating Geo-replication Logs</title>
- <para>Administrators can rotate the log file of a particular master-slave session, as needed.
-When you run geo-replication&apos;s <command> log-rotate</command> command, the log file
-is backed up with the current timestamp suffixed to the file
-name and signal is sent to gsyncd to start logging to a new
-log file.</para>
- <para><emphasis role="bold">To rotate a geo-replication log file </emphasis></para>
- <itemizedlist>
- <listitem>
- <para>Rotate log file for a particular master-slave session using the following command:
-</para>
- <para><command># gluster volume geo-replication <replaceable>master slave</replaceable> log-rotate</command>
-</para>
- <para>For example, to rotate the log file of master <filename>Volume1</filename> and slave <filename>example.com:/data/remote_dir</filename>
-:
-</para>
- <programlisting># gluster volume geo-replication Volume1 example.com:/data/remote_dir log rotate
-log rotate successful</programlisting>
- </listitem>
- <listitem>
- <para>Rotate log file for all sessions for a master volume using the following command:
-</para>
- <para><command># gluster volume geo-replication <replaceable>master</replaceable> log-rotate</command>
-</para>
- <para>For example, to rotate the log file of master <filename>Volume1</filename>:
-</para>
- <programlisting># gluster volume geo-replication Volume1 log rotate
-log rotate successful</programlisting>
- </listitem>
- <listitem>
- <para>Rotate log file for all sessions using the following command:
-</para>
- <para><command># gluster volume geo-replication log-rotate</command>
-</para>
- <para>For example, to rotate the log file for all sessions:</para>
- <programlisting># gluster volume geo-replication log-rotate
-log rotate successful</programlisting>
- </listitem>
- </itemizedlist>
- </section>
- <section>
- <title>Synchronization is not complete </title>
- <para><emphasis role="bold">Description</emphasis>: GlusterFS Geo-replication did not synchronize the data completely but still the geo-
-replication status displayed is OK.
-</para>
- <para><emphasis role="bold">Solution</emphasis>: You can enforce a full sync of the data by erasing the index and restarting GlusterFS Geo-
-replication. After restarting, GlusterFS Geo-replication begins synchronizing all the data. All files are compared using checksum, which can be a lengthy and high resource utilization operation on large
-data sets. If the error situation persists, contact Red Hat Support.
-</para>
- <para>For more information about erasing index, see <xref linkend="sect-Administration_Guide-Managing_Volumes-Tuning"/>.
-</para>
- </section>
- <section>
- <title>Issues in Data Synchronization </title>
- <para><emphasis role="bold">Description</emphasis>: Geo-replication display status as OK, but the files do not get synced, only
-directories and symlink gets synced with the following error message in the log:
-</para>
- <para><errortext>[2011-05-02 13:42:13.467644] E [master:288:regjob] GMaster: failed to sync ./some_file` </errortext></para>
- <para><emphasis role="bold">Solution</emphasis>: Geo-replication invokes rsync v3.0.0 or higher on the host and the remote machine. You must verify if
-you have installed the required version.
-</para>
- </section>
- <section>
- <title>Geo-replication status displays Faulty very often </title>
- <para><emphasis role="bold">Description</emphasis>: Geo-replication displays status as faulty very often with a backtrace similar to
-the following:
-</para>
- <para><errortext>2011-04-28 14:06:18.378859] E [syncdutils:131:log_raise_exception] &lt;top&gt;: FAIL: Traceback (most recent call last): File &quot;/usr/local/libexec/glusterfs/python/syncdaemon/syncdutils.py&quot;, line 152, in twraptf(*aa) File &quot;/usr/local/libexec/glusterfs/python/syncdaemon/repce.py&quot;, line 118, in listen rid, exc, res = recv(self.inf) File &quot;/usr/local/libexec/glusterfs/python/syncdaemon/repce.py&quot;, line 42, in recv return pickle.load(inf) EOFError </errortext></para>
- <para><emphasis role="bold">Solution</emphasis>: This error indicates that the RPC communication between the master geo-replication module and slave
-geo-replication module is broken and this can happen for various reasons. Check if it satisfies all the following
-pre-requisites:
-</para>
- <itemizedlist>
- <listitem>
- <para>Password-less SSH is set up properly between the host where geo-replication command is executed and the remote machine where the slave geo-replication is located.
-</para>
- </listitem>
- <listitem>
- <para>If FUSE is installed in the machine where the geo-replication command is executed, because geo-replication module mounts the GlusterFS volume using FUSE to sync data.
-</para>
- </listitem>
- <listitem>
- <para>If the <emphasis role="bold">Slave</emphasis> is a volume, check if that volume is started.
-</para>
- </listitem>
- <listitem>
- <para>If the Slave is a plain directory, verify if the directory has been created already with the
-required permissions.
-</para>
- </listitem>
- <listitem>
- <para>If GlusterFS 3.3 or higher is not installed in the default location (in Master) and has been prefixed to be
-installed in a custom location, configure the <command>gluster-command</command> for it to point to the exact
-location.
-</para>
- </listitem>
- <listitem>
- <para>If GlusterFS 3.3 or higher is not installed in the default location (in slave) and has been prefixed to be
-installed in a custom location, configure the <command>remote-gsyncd-command</command> for it to point to the
-exact place where geo-replication is located.
-</para>
- </listitem>
- </itemizedlist>
- </section>
- <section>
- <title>Intermediate Master goes to Faulty State </title>
- <para><emphasis role="bold">Description</emphasis>: In a cascading set-up, the intermediate master goes to faulty state with the following
-log:
-</para>
- <para><errortext>raise RuntimeError (&quot;aborting on uuid change from %s to %s&quot; % \ RuntimeError: aborting on uuid change from af07e07c-427f-4586-ab9f- 4bf7d299be81 to de6b5040-8f4e-4575-8831-c4f55bd41154 </errortext></para>
- <para><emphasis role="bold">Solution</emphasis>: In a cascading set-up the Intermediate master is loyal to the original primary master. The
-above log means that the geo-replication module has detected change in primary master.
-If this is the desired behavior, delete the config option volume-id in the session initiated from the
-intermediate master.
-</para>
- </section>
- </section>
- <section>
- <title>Troubleshooting POSIX ACLs </title>
- <para>This section describes the most common troubleshooting issues related to POSIX ACLs.
-</para>
- <section>
- <title>setfacl command fails with “setfacl: &lt;file or directory name&gt;: Operation not supported†error </title>
- <para>You may face this error when the backend file systems in one of the servers is not mounted with
-the &quot;-o acl&quot; option. The same can be confirmed by viewing the following error message in the log file
-of the server &quot;Posix access control list is not supported&quot;.
-</para>
- <para><emphasis role="bold">Solution</emphasis>: Remount the backend file system with &quot;-o acl&quot; option. For more information, see <xref linkend="sect-Administration_Guide-ACLs-Activating_ACLs-Server"/>.
-</para>
- </section>
- </section>
- <section>
- <title>Troubleshooting Hadoop Compatible Storage </title>
- <para>This section describes the most common troubleshooting issues related to Hadoop Compatible
-Storage.
-
- </para>
- <section id="sect-Administration_Guide-Troubleshooting-Test_Section_1">
- <title>Time Sync</title>
- <para>Running MapReduce job may throw exceptions if the clocks are out-of-sync on the hosts in the cluster.
-
- </para>
- <para><emphasis role="bold">Solution</emphasis>: Sync the time on all hosts using ntpd program.
-</para>
- </section>
- </section>
- <section>
- <title>Troubleshooting NFS </title>
- <para>This section describes the most common troubleshooting issues related to NFS .
-</para>
- <section>
- <title>mount command on NFS client fails with “RPC Error: Program not registered†</title>
- <para>Start portmap or rpcbind service on the machine where NFS server is running.
-</para>
- <para>This error is encountered when the server has not started correctly.
-</para>
- <para>On most Linux distributions this is fixed by starting portmap:
-</para>
- <para><command>$ /etc/init.d/portmap start</command>
-</para>
- <para>On some distributions where portmap has been replaced by rpcbind, the following command is
-required:
-</para>
- <para><command>$ /etc/init.d/rpcbind start </command></para>
- <para>After starting portmap or rpcbind, gluster NFS server needs to be restarted.
-</para>
- </section>
- <section>
- <title>NFS server start-up fails with “Port is already in use†error in the log file.&quot; </title>
- <para>Another Gluster NFS server is running on the same machine.
-</para>
- <para>This error can arise in case there is already a Gluster NFS server running on the same machine.
-This situation can be confirmed from the log file, if the following error lines exist:
-</para>
- <para><screen>[2010-05-26 23:40:49] E [rpc-socket.c:126:rpcsvc_socket_listen] rpc-socket: binding socket failed:Address already in use
-[2010-05-26 23:40:49] E [rpc-socket.c:129:rpcsvc_socket_listen] rpc-socket: Port is already in use
-[2010-05-26 23:40:49] E [rpcsvc.c:2636:rpcsvc_stage_program_register] rpc-service: could not create listening connection
-[2010-05-26 23:40:49] E [rpcsvc.c:2675:rpcsvc_program_register] rpc-service: stage registration of program failed
-[2010-05-26 23:40:49] E [rpcsvc.c:2695:rpcsvc_program_register] rpc-service: Program registration failed: MOUNT3, Num: 100005, Ver: 3, Port: 38465
-[2010-05-26 23:40:49] E [nfs.c:125:nfs_init_versions] nfs: Program init failed
-[2010-05-26 23:40:49] C [nfs.c:531:notify] nfs: Failed to initialize protocols</screen></para>
- <para>To resolve this error one of the Gluster NFS servers will have to be shutdown. At this time,
-Gluster NFS server does not support running multiple NFS servers on the same machine.
-</para>
- </section>
- <section>
- <title>mount command fails with “rpc.statd†related error message </title>
- <para>If the mount command fails with the following error message:
-</para>
- <para><errortext>mount.nfs: rpc.statd is not running but is required for remote locking. mount.nfs: Either use &apos;-o nolock&apos; to keep locks local, or start statd. </errortext></para>
- <para><errortext>Start rpc.statd </errortext></para>
- <para>For NFS clients to mount the NFS server, rpc.statd service must be running on the client machine. </para>
- <para>Start
-rpc.statd service by running the following command:
-</para>
- <para><command>$ rpc.statd </command></para>
- </section>
- <section>
- <title>mount command takes too long to finish. </title>
- <para>Start rpcbind service on the NFS client.
-</para>
- <para>The problem is that the rpcbind or portmap service is not running on the NFS client. The
-resolution for this is to start either of these services by running the following command:
-</para>
- <para><command>$ /etc/init.d/portmap start</command>
-</para>
- <para>On some distributions where portmap has been replaced by rpcbind, the following command is
-required:
-</para>
- <para><command>$ /etc/init.d/rpcbind start</command></para>
- </section>
- <section>
- <title>NFS server, glusterfsd starts but initialization fails with “nfsrpc- service: portmap registration of program failed†error message in the log. </title>
- <para>NFS start-up can succeed but the initialization of the NFS service can still fail preventing clients
-from accessing the mount points. Such a situation can be confirmed from the following error
-messages in the log file:
-</para>
- <para><screen>[2010-05-26 23:33:47] E [rpcsvc.c:2598:rpcsvc_program_register_portmap] rpc-service: Could notregister with portmap
-[2010-05-26 23:33:47] E [rpcsvc.c:2682:rpcsvc_program_register] rpc-service: portmap registration of program failed
-[2010-05-26 23:33:47] E [rpcsvc.c:2695:rpcsvc_program_register] rpc-service: Program registration failed: MOUNT3, Num: 100005, Ver: 3, Port: 38465
-[2010-05-26 23:33:47] E [nfs.c:125:nfs_init_versions] nfs: Program init failed
-[2010-05-26 23:33:47] C [nfs.c:531:notify] nfs: Failed to initialize protocols
-[2010-05-26 23:33:49] E [rpcsvc.c:2614:rpcsvc_program_unregister_portmap] rpc-service: Could not unregister with portmap
-[2010-05-26 23:33:49] E [rpcsvc.c:2731:rpcsvc_program_unregister] rpc-service: portmap unregistration of program failed
-[2010-05-26 23:33:49] E [rpcsvc.c:2744:rpcsvc_program_unregister] rpc-service: Program unregistration failed: MOUNT3, Num: 100005, Ver: 3, Port: 38465</screen></para>
- <orderedlist>
- <listitem>
- <para>Start portmap or rpcbind service on the NFS server.
-</para>
- <para>On most Linux distributions, portmap can be started using the following command:
-</para>
- <para><command>$ /etc/init.d/portmap start </command></para>
- <para>On some distributions where portmap has been replaced by rpcbind, run the following command:
-</para>
- <para><command>$ /etc/init.d/rpcbind start </command></para>
- <para>After starting portmap or rpcbind, gluster NFS server needs to be restarted.
-</para>
- </listitem>
- <listitem>
- <para>Stop another NFS server running on the same machine.
-</para>
- <para>Such an error is also seen when there is another NFS server running on the same machine but it is
-not the Gluster NFS server. On Linux systems, this could be the kernel NFS server. Resolution
-involves stopping the other NFS server or not running the Gluster NFS server on the machine.
-Before stopping the kernel NFS server, ensure that no critical service depends on access to that
-NFS server&apos;s exports.
-</para>
- <para>On Linux, kernel NFS servers can be stopped by using either of the following commands
-depending on the distribution in use:
-</para>
- <para><command>$ /etc/init.d/nfs-kernel-server stop</command>
-</para>
- <para><command>$ /etc/init.d/nfs stop</command></para>
- </listitem>
- <listitem>
- <para>Restart Gluster NFS server.
-</para>
- </listitem>
- </orderedlist>
- </section>
- <section>
- <title>mount command fails with NFS server failed error. </title>
- <para>mount command fails with following error
-</para>
- <para><emphasis role="italic">mount: mount to NFS server &apos;10.1.10.11&apos; failed: timed out (retrying).</emphasis></para>
- <para>Perform one of the following to resolve this issue:
-</para>
- <orderedlist>
- <listitem>
- <para>Disable name lookup requests from NFS server to a DNS server.
-</para>
- <para>The NFS server attempts to authenticate NFS clients by performing a reverse DNS lookup to
-match hostnames in the volume file with the client IP addresses. There can be a situation where
-the NFS server either is not able to connect to the DNS server or the DNS server is taking too long
-to responsd to DNS request. These delays can result in delayed replies from the NFS server to the
-NFS client resulting in the timeout error seen above.
-</para>
- <para>NFS server provides a work-around that disables DNS requests, instead relying only on the client
-IP addresses for authentication. The following option can be added for successful mounting in
-such situations:
-</para>
- <para><command>option rpc-auth.addr.namelookup off </command></para>
- <para><note>
- <para>Note: Remember that disabling the NFS server forces authentication of clients to use only IP
-addresses and if the authentication rules in the volume file use hostnames, those authentication
-rules will fail and disallow mounting for those clients.
-</para>
- </note></para>
- <para>or</para>
- </listitem>
- <listitem>
- <para>NFS version used by the NFS client is other than version 3.
-</para>
- <para>Gluster NFS server supports version 3 of NFS protocol. In recent Linux kernels, the default NFS
-version has been changed from 3 to 4. It is possible that the client machine is unable to connect
-to the Gluster NFS server because it is using version 4 messages which are not understood by
-Gluster NFS server. The timeout can be resolved by forcing the NFS client to use version 3. The
-<emphasis role="bold">vers</emphasis> option to mount command is used for this purpose:
-</para>
- <para><command>$ mount <replaceable>nfsserver</replaceable><replaceable>:export</replaceable> -o vers=3 <replaceable>mount-point</replaceable></command>
-</para>
- </listitem>
- </orderedlist>
- </section>
- <section>
- <title>showmount fails with clnt_create: RPC: Unable to receive </title>
- <para>Check your firewall setting to open ports 111 for portmap requests/replies and Gluster NFS
-server requests/replies. Gluster NFS server operates over the following port numbers: 38465,
-38466, and 38467.
-</para>
- <para>For more information, see <xref linkend="sect-Administration_Guide-GlusterFS_Client-Native-RPM"/>.
-</para>
- </section>
- <section>
- <title>Application fails with &quot;Invalid argument&quot; or &quot;Value too large for defined data type&quot; error. </title>
- <para>These two errors generally happen for 32-bit nfs clients or applications that do not support 64-bit
-inode numbers or large files.
-Use the following option from the CLI to make Gluster NFS server return 32-bit inode numbers instead:
-nfs.enable-ino32 &lt;on|off&gt;
-</para>
- <para>Applications that will benefit are those that were either:
-</para>
- <itemizedlist>
- <listitem>
- <para>built 32-bit and run on 32-bit machines such that they do not support large files by default</para>
- </listitem>
- <listitem>
- <para>built 32-bit on 64-bit systems
-</para>
- </listitem>
- </itemizedlist>
- <para>This option is disabled by default. So Gluster NFS server returns 64-bit inode numbers by default.
-</para>
- <para>Applications which can be rebuilt from source are recommended to rebuild using the following
-flag with gcc:</para>
- <para><command> -D_FILE_OFFSET_BITS=64</command>
-</para>
- </section>
- </section>
- <section>
- <title>Troubleshooting File Locks</title>
- <para>In GlusterFS 3.3 you can use <command>statedump</command> command to list the locks held on files. The statedump output also provides information on each lock with its range, basename, PID of the application holding the lock, and so on. You can analyze the output to know about the locks whose owner/application is no longer running or interested in that lock. After ensuring that the no application is using the file, you can clear the lock using the following <command>clear lock</command> command:</para>
- <para><command># <command>gluster volume clear-locks <replaceable>VOLNAME path</replaceable> kind {blocked | granted | all}{inode [range] | entry [basename] | posix [range]}</command></command></para>
- <para>For more information on performing <command>statedump</command>, see <xref linkend="sect-Administration_Guide-Monitor_Workload-Performing_Statedump"/></para>
- <para><emphasis role="bold">To identify locked file and clear locks</emphasis></para>
- <orderedlist>
- <listitem>
- <para>Perform statedump on the volume to view the files that are locked using the following command:</para>
- <para> <command># gluster volume statedump <replaceable>VOLNAME</replaceable> inode</command></para>
- <para>For example, to display statedump of test-volume:</para>
- <para><programlisting># gluster volume statedump test-volume
-Volume statedump successful</programlisting></para>
- <para>The statedump files are created on the brick servers in the<filename> /tmp</filename> directory or in the directory set using <command>server.statedump-path</command> volume option. The naming convention of the dump file is <filename>&lt;brick-path&gt;.&lt;brick-pid&gt;.dump</filename>.</para>
- <para>The following are the sample contents of the statedump file. It indicates that GlusterFS has entered into a state where there is an entry lock (entrylk) and an inode lock (inodelk). Ensure that those are stale locks and no resources own them before clearing. </para>
- <para><screen>[xlator.features.locks.vol-locks.inode]
-path=/
-mandatory=0
-entrylk-count=1
-lock-dump.domain.domain=vol-replicate-0
-xlator.feature.locks.lock-dump.domain.entrylk.entrylk[0](ACTIVE)=type=ENTRYLK_WRLCK on basename=file1, pid = 714782904, owner=ffffff2a3c7f0000, transport=0x20e0670, , granted at Mon Feb 27 16:01:01 2012
-
-conn.2.bound_xl./gfs/brick1.hashsize=14057
-conn.2.bound_xl./gfs/brick1.name=/gfs/brick1/inode
-conn.2.bound_xl./gfs/brick1.lru_limit=16384
-conn.2.bound_xl./gfs/brick1.active_size=2
-conn.2.bound_xl./gfs/brick1.lru_size=0
-conn.2.bound_xl./gfs/brick1.purge_size=0
-
-[conn.2.bound_xl./gfs/brick1.active.1]
-gfid=538a3d4a-01b0-4d03-9dc9-843cd8704d07
-nlookup=1
-ref=2
-ia_type=1
-[xlator.features.locks.vol-locks.inode]
-path=/file1
-mandatory=0
-inodelk-count=1
-lock-dump.domain.domain=vol-replicate-0
-inodelk.inodelk[0](ACTIVE)=type=WRITE, whence=0, start=0, len=0, pid = 714787072, owner=00ffff2a3c7f0000, transport=0x20e0670, , granted at Mon Feb 27 16:01:01 2012</screen></para>
- </listitem>
- <listitem>
- <para>Clear the lock using the following command:</para>
- <para><command># <command>gluster volume clear-locks <replaceable>VOLNAME path</replaceable> kind granted entry basename</command></command></para>
- <para>For example, to clear the entry lock on <filename>file1</filename> of test-volume:
-</para>
- <para><screen># gluster volume clear-locks test-volume / kind granted entry file1
-Volume clear-locks successful
-vol-locks: entry blocked locks=0 granted locks=1</screen></para>
- </listitem>
- <listitem>
- <para>Clear the inode lock using the following command:</para>
- <para><command># <command>gluster volume clear-locks <replaceable>VOLNAME path</replaceable> kind granted inode range </command></command></para>
- <para>For example, to clear the inode lock on <filename>file1</filename> of test-volume:
-</para>
- <para><screen># gluster volume clear-locks test-volume /file1 kind granted inode 0,0-0
-Volume clear-locks successful
-vol-locks: inode blocked locks=0 granted locks=1</screen></para>
- <para>You can perform statedump on test-volume again to verify that the above inode and entry locks are cleared.</para>
- </listitem>
- </orderedlist>
- </section>
-</chapter>
diff --git a/doc/admin-guide/en-US/gfs_introduction.xml b/doc/admin-guide/en-US/gfs_introduction.xml
deleted file mode 100644
index 64b1c077930..00000000000
--- a/doc/admin-guide/en-US/gfs_introduction.xml
+++ /dev/null
@@ -1,54 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "docbookV4.5/docbookx.dtd" []>
-<chapter>
- <title>Introducing Gluster File System</title>
- <para>GlusterFS is an open source, clustered file system capable of scaling to several petabytes and handling thousands of clients. GlusterFS can be flexibly combined with commodity physical, virtual, and cloud resources to deliver highly available and performant enterprise storage at a fraction of the cost of traditional solutions.</para>
- <para>GlusterFS clusters together storage building blocks over Infiniband RDMA and/or TCP/IP interconnect, aggregating disk and memory resources and managing data in a single global namespace. GlusterFS is based on a stackable user space design, delivering exceptional performance for diverse workloads.
-</para>
- <figure>
- <title>Virtualized Cloud Environments</title>
- <mediaobject>
- <textobject>
- <phrase>Virtualized Cloud Environments</phrase>
- </textobject>
- <imageobject>
- <imagedata align="center" fileref="images/640px-GlusterFS_3.2_Architecture.png"/>
- </imageobject>
- </mediaobject>
- </figure>
- <para>GlusterFS is designed for today&apos;s high-performance, virtualized cloud environments. Unlike traditional data centers, cloud environments require multi-tenancy along with the ability to grow or shrink resources on demand. Enterprises can scale capacity, performance, and availability on demand, with no vendor lock-in, across on-premise, public cloud, and hybrid environments. </para>
- <para>GlusterFS is in production at thousands of enterprises spanning media, healthcare, government, education, web 2.0, and financial services. The following table lists the commercial offerings and its documentation location:
-</para>
- <informaltable frame="all">
- <tgroup cols="2">
- <colspec colname="c1" colwidth="16%"/>
- <colspec colname="c2" colwidth="84%"/>
- <thead>
- <row>
- <entry>Product</entry>
- <entry>Documentation Location</entry>
- </row>
- </thead>
- <tbody>
- <row>
- <entry>Red Hat Storage Software Appliance</entry>
- <entry>
- <ulink url="http://docs.redhat.com/docs/en-US/Red_Hat_Storage_Software_Appliance/index.html"/>
- </entry>
- </row>
- <row>
- <entry>Red Hat Virtual Storage Appliance</entry>
- <entry>
- <ulink url="http://docs.redhat.com/docs/en-US/Red_Hat_Virtual_Storage_Appliance/index.html"/>
- </entry>
- </row>
- <row>
- <entry>Red Hat Storage </entry>
- <entry>
- <ulink url="http://docs.redhat.com/docs/en-US/Red_Hat_Storage/index.html"/>
- </entry>
- </row>
- </tbody>
- </tgroup>
- </informaltable>
-</chapter>
diff --git a/doc/admin-guide/en-US/glossary.xml b/doc/admin-guide/en-US/glossary.xml
deleted file mode 100644
index 8c314feaad4..00000000000
--- a/doc/admin-guide/en-US/glossary.xml
+++ /dev/null
@@ -1,126 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "docbookV4.5/docbookx.dtd" []>
-<chapter>
- <title>Glossary</title>
- <glosslist>
- <glossentry>
- <glossterm>Brick</glossterm>
- <glossdef>
- <para>A Brick is the GlusterFS basic unit of storage, represented by an export directory on a server in the trusted storage pool. A Brick is expressed by combining a server with an export directory in the following format:</para>
- <para><code>SERVER:EXPORT</code></para>
- <para>For example:</para>
- <para><filename>myhostname:/exports/myexportdir/</filename></para>
- </glossdef>
- </glossentry>
- <glossentry>
- <glossterm>Cluster</glossterm>
- <glossdef>
- <para>A cluster is a group of linked computers, working together closely thus in many respects forming a single computer.</para>
- </glossdef>
- </glossentry>
- <glossentry>
- <glossterm>Distributed File System</glossterm>
- <glossdef>
- <para>A file system that allows multiple clients to concurrently access data over a computer network.</para>
- </glossdef>
- </glossentry>
- <glossentry>
- <glossterm>Filesystem</glossterm>
- <glossdef>
- <para>A method of storing and organizing computer files and their data. Essentially, it organizes these files into a database for the storage, organization, manipulation, and retrieval by the computer&apos;s operating system.</para>
- <para>Source: <ulink url="http://en.wikipedia.org/wiki/Filesystem">Wikipedia</ulink></para>
- </glossdef>
- </glossentry>
- <glossentry>
- <glossterm>FUSE</glossterm>
- <glossdef>
- <para>Filesystem in Userspace (<acronym>FUSE</acronym>) is a loadable kernel module for Unix-like computer operating systems that lets non-privileged users create their own file systems without editing kernel code. This is achieved by running file system code in user space while the <acronym>FUSE</acronym> module provides only a &quot;bridge&quot; to the actual kernel interfaces.</para>
- <para>Source: <ulink url="http://en.wikipedia.org/wiki/Filesystem_in_Userspace">Wikipedia</ulink></para>
- </glossdef>
- </glossentry>
- <glossentry>
- <glossterm>Geo-Replication</glossterm>
- <glossdef>
- <para>Geo-replication provides a continuous, asynchronous, and incremental replication service from site to another over Local Area Networks (<acronym>LAN</acronym>), Wide Area Network (<acronym>WAN</acronym>), and across the Internet.</para>
- </glossdef>
- </glossentry>
- <glossentry>
- <glossterm>glusterd</glossterm>
- <glossdef>
- <para>The Gluster management daemon that needs to run on all servers in the trusted storage pool.</para>
- </glossdef>
- </glossentry>
- <glossentry>
- <glossterm>Metadata</glossterm>
- <glossdef>
- <para>Metadata is data providing information about one or more other pieces of data.</para>
- </glossdef>
- </glossentry>
- <glossentry>
- <glossterm>Namespace</glossterm>
- <glossdef>
- <para>Namespace is an abstract container or environment created to hold a logical grouping of unique identifiers or symbols. Each Gluster volume exposes a single namespace as a POSIX mount point that contains every file in the cluster.</para>
- </glossdef>
- </glossentry>
- <glossentry>
- <glossterm>Open Source</glossterm>
- <glossdef>
- <para>Open source describes practices in production and development that promote access to the end product&apos;s source materials. Some consider open source a philosophy, others consider it a pragmatic methodology.</para>
- <para>Before the term open source became widely adopted, developers and producers used a variety of phrases to describe the concept; open source gained hold with the rise of the Internet, and the attendant need for massive retooling of the computing source code.</para>
- <para>Opening the source code enabled a self-enhancing diversity of production models, communication paths, and interactive communities. Subsequently, a new, three-word phrase &quot;open source software&quot; was born to describe the environment that the new copyright, licensing, domain, and consumer issues created.</para>
- <para>Source: <ulink url="http://en.wikipedia.org/wiki/Open_source">Wikipedia</ulink></para>
- </glossdef>
- </glossentry>
- <glossentry>
- <glossterm>Petabyte</glossterm>
- <glossdef>
- <para>A petabyte (derived from the SI prefix peta- ) is a unit of information equal to one quadrillion (short scale) bytes, or 1000 terabytes. The unit symbol for the petabyte is PB. The prefix peta- (P) indicates a power of 1000:</para>
- <para>1 PB = 1,000,000,000,000,000 B = 10005 B = 1015 B.</para>
- <para>The term &quot;pebibyte&quot; (<acronym>PiB</acronym>), using a binary prefix, is used for the corresponding power of 1024.</para>
- <para>Source: <ulink url="http://en.wikipedia.org/wiki/Petabyte">Wikipedia</ulink></para>
- </glossdef>
- </glossentry>
- <glossentry>
- <glossterm>POSIX</glossterm>
- <glossdef>
- <para>Portable Operating System Interface (for Unix) is the name of a family of related standards specified by the IEEE to define the application programming interface (<acronym>API</acronym>), along with shell and utilities interfaces for software compatible with variants of the Unix operating system. Gluster exports a fully <acronym>POSIX</acronym> compliant file system.</para>
- </glossdef>
- </glossentry>
- <glossentry>
- <glossterm>RAID</glossterm>
- <glossdef>
- <para>Redundant Array of Inexpensive Disks (<acronym>RAID</acronym>) is a technology that provides increased storage reliability through redundancy, combining multiple low-cost, less-reliable disk drives components into a logical unit where all drives in the array are interdependent.</para>
- </glossdef>
- </glossentry>
- <glossentry>
- <glossterm>RRDNS</glossterm>
- <glossdef>
- <para>Round Robin Domain Name Service (<acronym>RRDNS</acronym>) is a method to distribute load across application servers. <acronym>RRDNS</acronym> is implemented by creating multiple A records with the same name and different IP addresses in the zone file of a DNS server.</para>
- </glossdef>
- </glossentry>
- <glossentry>
- <glossterm>Trusted Storage Pool</glossterm>
- <glossdef>
- <para>A storage pool is a trusted network of storage servers. When you start the first server, the storage pool consists of that server alone.</para>
- </glossdef>
- </glossentry>
- <glossentry>
- <glossterm>Userspace</glossterm>
- <glossdef>
- <para>Applications running in user space don’t directly interact with hardware, instead using the kernel to moderate access. Userspace applications are generally more portable than applications in kernel space. Gluster is a user space application.</para>
- </glossdef>
- </glossentry>
- <glossentry>
- <glossterm>Volfile</glossterm>
- <glossdef>
- <para>Volfile is a configuration file used by glusterfs process. Volfile will be usually located at <filename>/etc/glusterd/vols/VOLNAME</filename>.</para>
- </glossdef>
- </glossentry>
- <glossentry>
- <glossterm>Volume</glossterm>
- <glossdef>
- <para>A volume is a logical collection of bricks. Most of the gluster management operations happen on the volume.</para>
- </glossdef>
- </glossentry>
- </glosslist>
-</chapter>
diff --git a/doc/admin-guide/en-US/images/640px-GlusterFS_3.2_Architecture.png b/doc/admin-guide/en-US/images/640px-GlusterFS_3.2_Architecture.png
deleted file mode 100644
index 95f89ec8286..00000000000
--- a/doc/admin-guide/en-US/images/640px-GlusterFS_3.2_Architecture.png
+++ /dev/null
Binary files differ
diff --git a/doc/admin-guide/en-US/images/Distributed_Replicated_Volume.png b/doc/admin-guide/en-US/images/Distributed_Replicated_Volume.png
deleted file mode 100644
index dfc0a2c5600..00000000000
--- a/doc/admin-guide/en-US/images/Distributed_Replicated_Volume.png
+++ /dev/null
Binary files differ
diff --git a/doc/admin-guide/en-US/images/Distributed_Striped_Replicated_Volume.png b/doc/admin-guide/en-US/images/Distributed_Striped_Replicated_Volume.png
deleted file mode 100644
index d286fa99e94..00000000000
--- a/doc/admin-guide/en-US/images/Distributed_Striped_Replicated_Volume.png
+++ /dev/null
Binary files differ
diff --git a/doc/admin-guide/en-US/images/Distributed_Striped_Volume.png b/doc/admin-guide/en-US/images/Distributed_Striped_Volume.png
deleted file mode 100644
index 752fa982fa6..00000000000
--- a/doc/admin-guide/en-US/images/Distributed_Striped_Volume.png
+++ /dev/null
Binary files differ
diff --git a/doc/admin-guide/en-US/images/Distributed_Volume.png b/doc/admin-guide/en-US/images/Distributed_Volume.png
deleted file mode 100644
index 4386ca935b9..00000000000
--- a/doc/admin-guide/en-US/images/Distributed_Volume.png
+++ /dev/null
Binary files differ
diff --git a/doc/admin-guide/en-US/images/Geo-Rep03_Internet.png b/doc/admin-guide/en-US/images/Geo-Rep03_Internet.png
deleted file mode 100644
index 3cd0eaded02..00000000000
--- a/doc/admin-guide/en-US/images/Geo-Rep03_Internet.png
+++ /dev/null
Binary files differ
diff --git a/doc/admin-guide/en-US/images/Geo-Rep04_Cascading.png b/doc/admin-guide/en-US/images/Geo-Rep04_Cascading.png
deleted file mode 100644
index 54bf9f05cff..00000000000
--- a/doc/admin-guide/en-US/images/Geo-Rep04_Cascading.png
+++ /dev/null
Binary files differ
diff --git a/doc/admin-guide/en-US/images/Geo-Rep_LAN.png b/doc/admin-guide/en-US/images/Geo-Rep_LAN.png
deleted file mode 100644
index a74f6dbb50a..00000000000
--- a/doc/admin-guide/en-US/images/Geo-Rep_LAN.png
+++ /dev/null
Binary files differ
diff --git a/doc/admin-guide/en-US/images/Geo-Rep_WAN.png b/doc/admin-guide/en-US/images/Geo-Rep_WAN.png
deleted file mode 100644
index d72d72768bc..00000000000
--- a/doc/admin-guide/en-US/images/Geo-Rep_WAN.png
+++ /dev/null
Binary files differ
diff --git a/doc/admin-guide/en-US/images/GlusterFS_3.2_Architecture.png b/doc/admin-guide/en-US/images/GlusterFS_3.2_Architecture.png
deleted file mode 100644
index b506db1f4e7..00000000000
--- a/doc/admin-guide/en-US/images/GlusterFS_3.2_Architecture.png
+++ /dev/null
Binary files differ
diff --git a/doc/admin-guide/en-US/images/Hadoop_Architecture.png b/doc/admin-guide/en-US/images/Hadoop_Architecture.png
deleted file mode 100644
index 8725bd330bb..00000000000
--- a/doc/admin-guide/en-US/images/Hadoop_Architecture.png
+++ /dev/null
Binary files differ
diff --git a/doc/admin-guide/en-US/images/Replicated_Volume.png b/doc/admin-guide/en-US/images/Replicated_Volume.png
deleted file mode 100644
index 135a63f345a..00000000000
--- a/doc/admin-guide/en-US/images/Replicated_Volume.png
+++ /dev/null
Binary files differ
diff --git a/doc/admin-guide/en-US/images/Striped_Replicated_Volume.png b/doc/admin-guide/en-US/images/Striped_Replicated_Volume.png
deleted file mode 100644
index ee88af73134..00000000000
--- a/doc/admin-guide/en-US/images/Striped_Replicated_Volume.png
+++ /dev/null
Binary files differ
diff --git a/doc/admin-guide/en-US/images/Striped_Volume.png b/doc/admin-guide/en-US/images/Striped_Volume.png
deleted file mode 100644
index 63a84b242ab..00000000000
--- a/doc/admin-guide/en-US/images/Striped_Volume.png
+++ /dev/null
Binary files differ
diff --git a/doc/admin-guide/en-US/images/UFO_Architecture.png b/doc/admin-guide/en-US/images/UFO_Architecture.png
deleted file mode 100644
index be85d7b2825..00000000000
--- a/doc/admin-guide/en-US/images/UFO_Architecture.png
+++ /dev/null
Binary files differ
diff --git a/doc/admin-guide/en-US/images/VSA_Architecture.png b/doc/admin-guide/en-US/images/VSA_Architecture.png
deleted file mode 100644
index c3ab80cf3e8..00000000000
--- a/doc/admin-guide/en-US/images/VSA_Architecture.png
+++ /dev/null
Binary files differ
diff --git a/doc/admin-guide/en-US/images/arhitecture.png b/doc/admin-guide/en-US/images/arhitecture.png
deleted file mode 100644
index 4e5188bf8dd..00000000000
--- a/doc/admin-guide/en-US/images/arhitecture.png
+++ /dev/null
@@ -1,13 +0,0 @@
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
-<html xmlns="http://www.w3.org/1999/xhtml">
-<head>
-<title>HTTP Error 403</title>
-</head>
-<body>
-<h1>Error 403</h1>
-<p>We're sorry, but we could not fulfill your request for
-/community/documentation/index.php/Image:GlusterFS_3.2_Architecture.png on this server.</p>
-<p>An invalid request was received from your browser. This may be caused by a malfunctioning proxy server or browser privacy software.</p>
-<p>Your technical support key is: <strong>7ab5-0b6a-1756-6707</strong></p>
-<p>You can use this key to <a href="http://www.ioerror.us/bb2-support-key?key=7ab5-0b6a-1756-6707">fix this problem yourself</a>.</p>
-<p>If you are unable to fix the problem yourself, please contact <a href="mailto:webmaster+nospam@nospam.gluster.com">webmaster at gluster.com</a> and be sure to provide the technical support key shown above.</p>
diff --git a/doc/admin-guide/en-US/images/icon.svg b/doc/admin-guide/en-US/images/icon.svg
deleted file mode 100644
index b2f16d0f61d..00000000000
--- a/doc/admin-guide/en-US/images/icon.svg
+++ /dev/null
@@ -1,19 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<svg xmlns:svg="http://www.w3.org/2000/svg" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.0" width="32" height="32" id="svg3017">
- <defs id="defs3019">
- <linearGradient id="linearGradient2381">
- <stop id="stop2383" style="stop-color:#ffffff;stop-opacity:1" offset="0"/>
- <stop id="stop2385" style="stop-color:#ffffff;stop-opacity:0" offset="1"/>
- </linearGradient>
- <linearGradient x1="296.4996" y1="188.81061" x2="317.32471" y2="209.69398" id="linearGradient2371" xlink:href="#linearGradient2381" gradientUnits="userSpaceOnUse" gradientTransform="matrix(0.90776,0,0,0.90776,24.35648,49.24131)"/>
- </defs>
- <g transform="matrix(0.437808,-0.437808,0.437808,0.437808,-220.8237,43.55311)" id="g5089">
- <path d="m 8.4382985,-6.28125 c -0.6073916,0 -4.3132985,5.94886271 -4.3132985,8.25 l 0,26.71875 c 0,0.846384 0.5818159,1.125 1.15625,1.125 l 25.5625,0 c 0.632342,0 1.125001,-0.492658 1.125,-1.125 l 0,-5.21875 0.28125,0 c 0.49684,0 0.906249,-0.409411 0.90625,-0.90625 l 0,-27.9375 c 0,-0.4968398 -0.40941,-0.90625 -0.90625,-0.90625 l -23.8117015,0 z" transform="translate(282.8327,227.1903)" id="path5091" style="fill:#5c5c4f;stroke:#000000;stroke-width:3.23021388;stroke-miterlimit:4;stroke-dasharray:none"/>
- <rect width="27.85074" height="29.369793" rx="1.1414107" ry="1.1414107" x="286.96509" y="227.63805" id="rect5093" style="fill:#032c87"/>
- <path d="m 288.43262,225.43675 25.2418,0 0,29.3698 -26.37615,0.0241 1.13435,-29.39394 z" id="rect5095" style="fill:#ffffff"/>
- <path d="m 302.44536,251.73726 c 1.38691,7.85917 -0.69311,11.28365 -0.69311,11.28365 2.24384,-1.60762 3.96426,-3.47694 4.90522,-5.736 0.96708,2.19264 1.83294,4.42866 4.27443,5.98941 0,0 -1.59504,-7.2004 -1.71143,-11.53706 l -6.77511,0 z" id="path5097" style="fill:#a70000;fill-opacity:1;stroke-width:2"/>
- <rect width="25.241802" height="29.736675" rx="0.89682275" ry="0.89682275" x="290.73544" y="220.92249" id="rect5099" style="fill:#809cc9"/>
- <path d="m 576.47347,725.93939 6.37084,0.41502 0.4069,29.51809 c -1.89202,-1.31785 -6.85427,-3.7608 -8.26232,-1.68101 l 0,-26.76752 c 0,-0.82246 0.66212,-1.48458 1.48458,-1.48458 z" transform="matrix(0.499065,-0.866565,0,1,0,0)" id="rect5101" style="fill:#4573b3;fill-opacity:1"/>
- <path d="m 293.2599,221.89363 20.73918,0 c 0.45101,0 0.8141,0.3631 0.8141,0.81411 0.21547,6.32836 -19.36824,21.7635 -22.36739,17.59717 l 0,-17.59717 c 0,-0.45101 0.3631,-0.81411 0.81411,-0.81411 z" id="path5103" style="opacity:0.65536726;fill:url(#linearGradient2371);fill-opacity:1"/>
- </g>
-</svg>
diff --git a/doc/admin-guide/publican.cfg b/doc/admin-guide/publican.cfg
deleted file mode 100644
index e42fa1b3dc8..00000000000
--- a/doc/admin-guide/publican.cfg
+++ /dev/null
@@ -1,12 +0,0 @@
-# Config::Simple 4.59
-# Thu Apr 5 11:09:15 2012
-
-xml_lang: "en-US"
-type: Book
-brand: Gluster_Brand
-prod_url: http://www.gluster.org
-doc_url: http://www.gluster.com/community/documentation/index.php/Main_Page
-condition: gfs
-show_remarks: 1
-
-
diff --git a/doc/debugging/analyzing-regression-cores.md b/doc/debugging/analyzing-regression-cores.md
new file mode 100644
index 00000000000..5e10f41c6eb
--- /dev/null
+++ b/doc/debugging/analyzing-regression-cores.md
@@ -0,0 +1,54 @@
+# Analyzing Regression Cores
+This document explains how to analyze core-dumps obtained from regression machines, with examples.
+1. Download the core-tarball and extract it.
+2. `cd` into directory where the tarball is extracted.
+```
+[sh]# pwd
+/home/user/Downloads
+[sh]# ls
+build build-install-20150625_05_42_39.tar.bz2 lib64 usr
+```
+3. Determine the core file you need to examine. There can be more than one core file. You can list them from './build/install/cores' directory.
+```
+[sh]# ls build/install/cores/
+core.9341 liblist.txt liblist.txt.tmp
+```
+In case you are unsure which binary generated the core-file, executing 'file' command on it will help.
+```
+[sh]# file ./build/install/cores/core.9341
+./build/install/cores/core.9341: ELF 64-bit LSB core file x86-64, version 1 (SYSV), SVR4-style, from '/build/install/sbin/glusterfsd -s slave26.cloud.gluster.org --volfile-id patchy'
+```
+As seen, the core file was generated by glusterfsd binary, and path to it is provided (/build/install/sbin/glusterfsd).
+
+4. Now, run the following command on the core:
+```
+gdb -ex 'set sysroot ./' -ex 'core-file ./build/install/cores/core.xxx' <target, say ./build/install/sbin/glusterd>
+In this case,
+gdb -ex 'set sysroot ./' -ex 'core-file ./build/install/cores/core.9341' ./build/install/sbin/glusterfsd
+```
+5. You can cross check if all shared libraries are available and loaded by using 'info sharedlibrary' command from inside gdb.
+6. Once verified, usual gdb commands based on requirement can be used to debug the core.
+ `bt` or `backtrace` from gdb of core used in examples:
+```
+Core was generated by `/build/install/sbin/glusterfsd -s slave26.cloud.gluster.org --volfile-id patchy'.
+Program terminated with signal SIGABRT, Aborted.
+#0 0x00007f512a54e625 in raise () from ./lib64/libc.so.6
+(gdb) bt
+#0 0x00007f512a54e625 in raise () from ./lib64/libc.so.6
+#1 0x00007f512a54fe05 in abort () from ./lib64/libc.so.6
+#2 0x00007f512a54774e in __assert_fail_base () from ./lib64/libc.so.6
+#3 0x00007f512a547810 in __assert_fail () from ./lib64/libc.so.6
+#4 0x00007f512b9fc434 in __gf_free (free_ptr=0x7f50f4000e50) at /home/jenkins/root/workspace/rackspace-regression-2GB-triggered/libglusterfs/src/mem-pool.c:304
+#5 0x00007f512b9b6657 in loc_wipe (loc=0x7f510c20d1a0) at /home/jenkins/root/workspace/rackspace-regression-2GB-triggered/libglusterfs/src/xlator.c:685
+#6 0x00007f511cb8201d in mq_start_quota_txn_v2 (this=0x7f5118019b60, loc=0x7f510c20d2b8, ctx=0x7f50f4000bf0, contri=0x7f50f4000d60)
+ at /home/jenkins/root/workspace/rackspace-regression-2GB-triggered/xlators/features/marker/src/marker-quota.c:2921
+#7 0x00007f511cb82c55 in mq_initiate_quota_task (opaque=0x7f510c20d2b0) at /home/jenkins/root/workspace/rackspace-regression-2GB-triggered/xlators/features/marker/src/marker-quota.c:3199
+#8 0x00007f511cb81820 in mq_synctask (this=0x7f5118019b60, task=0x7f511cb829fa <mq_initiate_quota_task>, spawn=_gf_false, loc=0x7f510c20d430, dict=0x0, buf=0x0, contri=0)
+ at /home/jenkins/root/workspace/rackspace-regression-2GB-triggered/xlators/features/marker/src/marker-quota.c:2789
+#9 0x00007f511cb82f82 in mq_initiate_quota_blocking_txn (this=0x7f5118019b60, loc=0x7f510c20d430) at /home/jenkins/root/workspace/rackspace-regression-2GB-triggered/xlators/features/marker/src/marker-quota.c:3230
+#10 0x00007f511cb82844 in mq_reduce_parent_size_task (opaque=0x7f510c000df0) at /home/jenkins/root/workspace/rackspace-regression-2GB-triggered/xlators/features/marker/src/marker-quota.c:3117
+#11 0x00007f512ba0f9dc in synctask_wrap (old_task=0x7f510c0053e0) at /home/jenkins/root/workspace/rackspace-regression-2GB-triggered/libglusterfs/src/syncop.c:370
+#12 0x00007f512a55f8f0 in ?? () from ./lib64/libc.so.6
+#13 0x0000000000000000 in ?? ()
+(gdb)
+```
diff --git a/doc/debugging/gfid-to-path.md b/doc/debugging/gfid-to-path.md
new file mode 100644
index 00000000000..1917bf2cca1
--- /dev/null
+++ b/doc/debugging/gfid-to-path.md
@@ -0,0 +1,68 @@
+# Convert GFID to Path
+
+GlusterFS internal file identifier (GFID) is a uuid that is unique to each
+file across the entire cluster. This is analogous to inode number in a
+normal filesystem. The GFID of a file is stored in its xattr named
+`trusted.gfid`.
+
+#### Special mount using [gfid-access translator][1]:
+```
+mount -t glusterfs -o aux-gfid-mount vm1:test /mnt/testvol
+```
+
+Assuming, you have `GFID` of a file from changelog (or somewhere else).
+For trying this out, you can get `GFID` of a file from mountpoint:
+```
+getfattr -n glusterfs.gfid.string /mnt/testvol/dir/file
+```
+
+
+---
+### Get file path from GFID (Method 1):
+**(Lists hardlinks delimited by `:`, returns path as seen from mountpoint)**
+
+#### Turn on build-pgfid option
+```
+gluster volume set test build-pgfid on
+```
+Read virtual xattr `glusterfs.ancestry.path` which contains the file path
+```
+getfattr -n glusterfs.ancestry.path -e text /mnt/testvol/.gfid/<GFID>
+```
+
+**Example:**
+```
+[root@vm1 glusterfs]# ls -il /mnt/testvol/dir/
+total 1
+10610563327990022372 -rw-r--r--. 2 root root 3 Jul 17 18:05 file
+10610563327990022372 -rw-r--r--. 2 root root 3 Jul 17 18:05 file3
+
+[root@vm1 glusterfs]# getfattr -n glusterfs.gfid.string /mnt/testvol/dir/file
+getfattr: Removing leading '/' from absolute path names
+# file: mnt/testvol/dir/file
+glusterfs.gfid.string="11118443-1894-4273-9340-4b212fa1c0e4"
+
+[root@vm1 glusterfs]# getfattr -n glusterfs.ancestry.path -e text /mnt/testvol/.gfid/11118443-1894-4273-9340-4b212fa1c0e4
+getfattr: Removing leading '/' from absolute path names
+# file: mnt/testvol/.gfid/11118443-1894-4273-9340-4b212fa1c0e4
+glusterfs.ancestry.path="/dir/file:/dir/file3"
+```
+
+---
+### Get file path from GFID (Method 2):
+**(Does not list all hardlinks, returns backend brick path)**
+```
+getfattr -n trusted.glusterfs.pathinfo -e text /mnt/testvol/.gfid/<GFID>
+```
+
+**Example:**
+```
+[root@vm1 glusterfs]# getfattr -n trusted.glusterfs.pathinfo -e text /mnt/testvol/.gfid/11118443-1894-4273-9340-4b212fa1c0e4
+getfattr: Removing leading '/' from absolute path names
+# file: mnt/testvol/.gfid/11118443-1894-4273-9340-4b212fa1c0e4
+trusted.glusterfs.pathinfo="(<DISTRIBUTE:test-dht> <POSIX(/mnt/brick-test/b):vm1:/mnt/brick-test/b/dir//file3>)"
+```
+
+---
+#### References and links:
+[posix: placeholders for GFID to path conversion](http://review.gluster.org/5951)
diff --git a/doc/debugging/mem-alloc-list.md b/doc/debugging/mem-alloc-list.md
new file mode 100644
index 00000000000..1c68e65d323
--- /dev/null
+++ b/doc/debugging/mem-alloc-list.md
@@ -0,0 +1,19 @@
+## Viewing Memory Allocations
+
+While statedumps provide stats of the number of allocations, size etc for a
+particular mem type, there is no easy way to examine all the allocated objects of that type
+in memory.Being able to view this information could help with determining how an object is used,
+and if there are any memory leaks.
+
+The mem_acct_rec structures have been updated to include lists to which the allocated object is
+added. These can be examined in gdb using simple scripts.
+
+`gdb> plist xl->mem_acct.rec[$type]->obj_list`
+
+will print out the pointers of all allocations of $type.
+
+These changes are primarily targeted at developers and need to enabled
+at compile-time using `configure --enable-debug`.
+
+
+
diff --git a/doc/debugging/split-brain.md b/doc/debugging/split-brain.md
new file mode 100644
index 00000000000..6b122c40551
--- /dev/null
+++ b/doc/debugging/split-brain.md
@@ -0,0 +1,264 @@
+# Steps to recover from File split-brain
+This document contains steps to recover from a file split-brain.
+## Quick Start:
+### Step 1. Get the path of the file that is in split-brain:
+It can be obtained either by
+1. The command `gluster volume heal info split-brain`.
+2. Identify the files for which file operations performed from the client keep failing with Input/Output error.
+
+### Step 2. Close the applications that opened this file from the mount point.
+In case of VMs, they need to be powered-off.
+
+### Step 3. Decide on the correct copy:
+This is done by observing the afr changelog extended attributes of the file on
+the bricks using the getfattr command; then identifying the type of split-brain
+(data split-brain, metadata split-brain, entry split-brain or split-brain due to
+gfid-mismatch); and finally determining which of the bricks contains the 'good copy'
+of the file.
+```
+getfattr -d -m . -e hex <file-path-on-brick>
+```
+
+It is also possible that one brick might contain the correct data while the
+other might contain the correct metadata.
+
+### Step 4. Reset the relevant extended attribute on the brick(s) that contains the 'bad copy' of the file data/metadata using the setfattr command.
+```
+setfattr -n <attribute-name> -v <attribute-value> <file-path-on-brick>
+```
+
+### Step 5. Trigger self-heal on the file by performing lookup from the client:
+```
+ls -l <file-path-on-gluster-mount>
+```
+
+Detailed Instructions for steps 3 through 5:
+===========================================
+To understand how to resolve split-brain we need to know how to interpret the
+afr changelog extended attributes.
+
+Execute `getfattr -d -m . -e hex <file-path-on-brick>`
+
+Example:
+```
+[root@store3 ~]# getfattr -d -e hex -m. brick-a/file.txt
+\#file: brick-a/file.txt
+security.selinux=0x726f6f743a6f626a6563745f723a66696c655f743a733000
+trusted.afr.vol-client-2=0x000000000000000000000000
+trusted.afr.vol-client-3=0x000000000200000000000000
+trusted.gfid=0x307a5c9efddd4e7c96e94fd4bcdcbd1b
+```
+
+The extended attributes with `trusted.afr.<volname>-client-<subvolume-index>`
+are used by afr to maintain changelog of the file.The values of the
+`trusted.afr.<volname>-client-<subvolume-index>` are calculated by the glusterfs
+client (fuse or nfs-server) processes. When the glusterfs client modifies a file
+or directory, the client contacts each brick and updates the changelog extended
+attribute according to the response of the brick.
+
+`subvolume-index` is nothing but (brick number - 1) in
+`gluster volume info <volname>` output.
+
+Example:
+```
+[root@pranithk-laptop ~]# gluster volume info vol
+ Volume Name: vol
+ Type: Distributed-Replicate
+ Volume ID: 4f2d7849-fbd6-40a2-b346-d13420978a01
+ Status: Created
+ Number of Bricks: 4 x 2 = 8
+ Transport-type: tcp
+ Bricks:
+ brick-a: pranithk-laptop:/gfs/brick-a
+ brick-b: pranithk-laptop:/gfs/brick-b
+ brick-c: pranithk-laptop:/gfs/brick-c
+ brick-d: pranithk-laptop:/gfs/brick-d
+ brick-e: pranithk-laptop:/gfs/brick-e
+ brick-f: pranithk-laptop:/gfs/brick-f
+ brick-g: pranithk-laptop:/gfs/brick-g
+ brick-h: pranithk-laptop:/gfs/brick-h
+```
+
+In the example above:
+```
+Brick | Replica set | Brick subvolume index
+----------------------------------------------------------------------------
+-/gfs/brick-a | 0 | 0
+-/gfs/brick-b | 0 | 1
+-/gfs/brick-c | 1 | 2
+-/gfs/brick-d | 1 | 3
+-/gfs/brick-e | 2 | 4
+-/gfs/brick-f | 2 | 5
+-/gfs/brick-g | 3 | 6
+-/gfs/brick-h | 3 | 7
+```
+
+Each file in a brick maintains the changelog of itself and that of the files
+present in all the other bricks in it's replica set as seen by that brick.
+
+In the example volume given above, all files in brick-a will have 2 entries,
+one for itself and the other for the file present in it's replica pair, i.e.brick-b:
+```
+trusted.afr.vol-client-0=0x000000000000000000000000 -->changelog for itself (brick-a)
+trusted.afr.vol-client-1=0x000000000000000000000000 -->changelog for brick-b as seen by brick-a
+```
+Likewise, all files in brick-b will have:
+```
+trusted.afr.vol-client-0=0x000000000000000000000000 -->changelog for brick-a as seen by brick-b
+trusted.afr.vol-client-1=0x000000000000000000000000 -->changelog for itself (brick-b)
+```
+
+The same can be extended for other replica pairs.
+
+Interpreting Changelog (roughly pending operation count) Value:
+Each extended attribute has a value which is 24 hexa decimal digits.
+First 8 digits represent changelog of data. Second 8 digits represent changelog
+of metadata. Last 8 digits represent Changelog of directory entries.
+
+Pictorially representing the same, we have:
+```
+0x 000003d7 00000001 00000000
+ | | |
+ | | \_ changelog of directory entries
+ | \_ changelog of metadata
+ \ _ changelog of data
+```
+
+
+For Directories metadata and entry changelogs are valid.
+For regular files data and metadata changelogs are valid.
+For special files like device files etc metadata changelog is valid.
+When a file split-brain happens it could be either data split-brain or
+meta-data split-brain or both. When a split-brain happens the changelog of the
+file would be something like this:
+
+Example:(Lets consider both data, metadata split-brain on same file).
+```
+[root@pranithk-laptop vol]# getfattr -d -m . -e hex /gfs/brick-?/a
+getfattr: Removing leading '/' from absolute path names
+\#file: gfs/brick-a/a
+trusted.afr.vol-client-0=0x000000000000000000000000
+trusted.afr.vol-client-1=0x000003d70000000100000000
+trusted.gfid=0x80acdbd886524f6fbefa21fc356fed57
+\#file: gfs/brick-b/a
+trusted.afr.vol-client-0=0x000003b00000000100000000
+trusted.afr.vol-client-1=0x000000000000000000000000
+trusted.gfid=0x80acdbd886524f6fbefa21fc356fed57
+```
+
+### Observations:
+
+#### According to changelog extended attributes on file /gfs/brick-a/a:
+The first 8 digits of trusted.afr.vol-client-0 are all
+zeros (0x00000000................), and the first 8 digits of
+trusted.afr.vol-client-1 are not all zeros (0x000003d7................).
+So the changelog on /gfs/brick-a/a implies that some data operations succeeded
+on itself but failed on /gfs/brick-b/a.
+
+The second 8 digits of trusted.afr.vol-client-0 are
+all zeros (0x........00000000........), and the second 8 digits of
+trusted.afr.vol-client-1 are not all zeros (0x........00000001........).
+So the changelog on /gfs/brick-a/a implies that some metadata operations succeeded
+on itself but failed on /gfs/brick-b/a.
+
+#### According to Changelog extended attributes on file /gfs/brick-b/a:
+The first 8 digits of trusted.afr.vol-client-0 are not all
+zeros (0x000003b0................), and the first 8 digits of
+trusted.afr.vol-client-1 are all zeros (0x00000000................).
+So the changelog on /gfs/brick-b/a implies that some data operations succeeded
+on itself but failed on /gfs/brick-a/a.
+
+The second 8 digits of trusted.afr.vol-client-0 are not
+all zeros (0x........00000001........), and the second 8 digits of
+trusted.afr.vol-client-1 are all zeros (0x........00000000........).
+So the changelog on /gfs/brick-b/a implies that some metadata operations succeeded
+on itself but failed on /gfs/brick-a/a.
+
+Since both the copies have data, metadata changes that are not on the other
+file, it is in both data and metadata split-brain.
+
+Deciding on the correct copy:
+-----------------------------
+The user may have to inspect stat,getfattr output of the files to decide which
+metadata to retain and contents of the file to decide which data to retain.
+Continuing with the example above, lets say we want to retain the data
+of /gfs/brick-a/a and metadata of /gfs/brick-b/a.
+
+Resetting the relevant changelogs to resolve the split-brain:
+-------------------------------------------------------------
+For resolving data-split-brain:
+We need to change the changelog extended attributes on the files as if some data
+operations succeeded on /gfs/brick-a/a but failed on /gfs/brick-b/a. But
+/gfs/brick-b/a should NOT have any changelog which says some data operations
+succeeded on /gfs/brick-b/a but failed on /gfs/brick-a/a. We need to reset the
+data part of the changelog on trusted.afr.vol-client-0 of /gfs/brick-b/a.
+
+For resolving metadata-split-brain:
+We need to change the changelog extended attributes on the files as if some
+metadata operations succeeded on /gfs/brick-b/a but failed on /gfs/brick-a/a.
+But /gfs/brick-a/a should NOT have any changelog which says some metadata
+operations succeeded on /gfs/brick-a/a but failed on /gfs/brick-b/a.
+We need to reset metadata part of the changelog on
+trusted.afr.vol-client-1 of /gfs/brick-a/a
+
+So, the intended changes are:
+On /gfs/brick-b/a:
+For trusted.afr.vol-client-0
+0x000003b00000000100000000 to 0x000000000000000100000000
+(Note that the metadata part is still not all zeros)
+Hence execute
+`setfattr -n trusted.afr.vol-client-0 -v 0x000000000000000100000000 /gfs/brick-b/a`
+
+On /gfs/brick-a/a:
+For trusted.afr.vol-client-1
+0x0000000000000000ffffffff to 0x000003d70000000000000000
+(Note that the data part is still not all zeros)
+Hence execute
+`setfattr -n trusted.afr.vol-client-1 -v 0x000003d70000000000000000 /gfs/brick-a/a`
+
+Thus after the above operations are done, the changelogs look like this:
+```
+[root@pranithk-laptop vol]# getfattr -d -m . -e hex /gfs/brick-?/a
+getfattr: Removing leading '/' from absolute path names
+\#file: gfs/brick-a/a
+trusted.afr.vol-client-0=0x000000000000000000000000
+trusted.afr.vol-client-1=0x000003d70000000000000000
+trusted.gfid=0x80acdbd886524f6fbefa21fc356fed57
+
+\#file: gfs/brick-b/a
+trusted.afr.vol-client-0=0x000000000000000100000000
+trusted.afr.vol-client-1=0x000000000000000000000000
+trusted.gfid=0x80acdbd886524f6fbefa21fc356fed57
+```
+
+Triggering Self-heal:
+---------------------
+Perform `ls -l <file-path-on-gluster-mount>` to trigger healing.
+
+Fixing Directory entry split-brain:
+----------------------------------
+Afr has the ability to conservatively merge different entries in the directories
+when there is a split-brain on directory.
+If on one brick directory 'd' has entries '1', '2' and has entries '3', '4' on
+the other brick then afr will merge all of the entries in the directory to have
+'1', '2', '3', '4' entries in the same directory.
+(Note: this may result in deleted files to re-appear in case the split-brain
+happens because of deletion of files on the directory)
+Split-brain resolution needs human intervention when there is at least one entry
+which has same file name but different gfid in that directory.
+Example:
+On brick-a the directory has entries '1' (with gfid g1), '2' and on brick-b
+directory has entries '1' (with gfid g2) and '3'.
+These kinds of directory split-brains need human intervention to resolve.
+The user needs to remove either file '1' on brick-a or the file '1' on brick-b
+to resolve the split-brain. In addition, the corresponding gfid-link file also
+needs to be removed.The gfid-link files are present in the .glusterfs folder
+in the top-level directory of the brick. If the gfid of the file is
+0x307a5c9efddd4e7c96e94fd4bcdcbd1b (the trusted.gfid extended attribute got
+from the getfattr command earlier),the gfid-link file can be found at
+`/gfs/brick-a/.glusterfs/30/7a/307a5c9efddd4e7c96e94fd4bcdcbd1b`
+
+#### Word of caution:
+Before deleting the gfid-link, we have to ensure that there are no hard links
+to the file present on that brick. If hard-links exist,they must be deleted as
+well.
diff --git a/doc/debugging/statedump.md b/doc/debugging/statedump.md
new file mode 100644
index 00000000000..9dfdce15fad
--- /dev/null
+++ b/doc/debugging/statedump.md
@@ -0,0 +1,414 @@
+# Statedump
+Statedump is a file generated by glusterfs process with different data structure state which may contain the active inodes, fds, mempools, iobufs, memory allocation stats of different types of datastructures per xlator etc.
+
+## How to generate statedump
+We can find the directory where statedump files are created using `gluster --print-statedumpdir` command.
+Create that directory if not already present based on the type of installation.
+Lets call this directory `statedump-directory`.
+
+We can generate statedump using `kill -USR1 <pid-of-gluster-process>`.
+gluster-process is nothing but glusterd/glusterfs/glusterfsd process.
+
+There are also commands to generate statedumps for brick processes/nfs server/quotad
+
+For bricks:
+```
+gluster volume statedump <volname>
+```
+
+For nfs server:
+```
+gluster volume statedump <volname> nfs
+```
+
+For quotad:
+```
+gluster volume statedump <volname> quotad
+```
+
+For brick-processes files will be created in `statedump-directory` with name of the file as `hyphenated-brick-path.<pid>.dump.timestamp`. For all other processes it will be `glusterdump.<pid>.dump.timestamp`.
+
+For applications using libgfapi, `SIGUSR1` cannot be used, eg: smbd/libvirtd
+processes could have used the `SIGUSR1` signal already for other purposes.
+To generate statedump for the processes, using libgfapi, below command can be
+executed from one of the nodes in the gluster cluster to which the libgfapi
+application is connected to.
+```
+gluster volume statedump <volname> client <hostname>:<process id>
+```
+The statedumps can be found in the `statedump-directory`, the name of the
+statedumps being `glusterdump.<pid>.dump.timestamp`. For a process there can be
+multiple such files created depending on the number of times the volume is
+accessed by the process (related to the number of `glfs_init()` calls).
+
+## How to read statedump
+We shall see snippets of each type of statedump.
+
+First and last lines of the file have starting and ending time of writing the statedump file. Times will be in UTC timezone.
+
+mallinfo return status is printed in the following format. Please read man mallinfo for more information about what each field means.
+### Mallinfo
+```
+[mallinfo]
+mallinfo_arena=100020224 /* Non-mmapped space allocated (bytes) */
+mallinfo_ordblks=69467 /* Number of free chunks */
+mallinfo_smblks=449 /* Number of free fastbin blocks */
+mallinfo_hblks=13 /* Number of mmapped regions */
+mallinfo_hblkhd=20144128 /* Space allocated in mmapped regions (bytes) */
+mallinfo_usmblks=0 /* Maximum total allocated space (bytes) */
+mallinfo_fsmblks=39264 /* Space in freed fastbin blocks (bytes) */
+mallinfo_uordblks=96710112 /* Total allocated space (bytes) */
+mallinfo_fordblks=3310112 /* Total free space (bytes) */
+mallinfo_keepcost=133712 /* Top-most, releasable space (bytes) */
+```
+
+### Data structure allocation stats
+For every xlator data structure memory per translator loaded in the call-graph is displayed in the following format:
+
+For xlator with name: glusterfs
+```
+[global.glusterfs - Memory usage] #[global.xlator-name - Memory usage]
+num_types=119 #It shows the number of data types it is using
+```
+
+Now for each data-type it prints the memory usage.
+
+```
+[global.glusterfs - usage-type gf_common_mt_gf_timer_t memusage]
+#[global.xlator-name - usage-type <tag associated with the data-type> memusage]
+size=112 #num_allocs times the sizeof(data-type) i.e. num_allocs * sizeof (data-type)
+num_allocs=2 #Number of allocations of the data-type which are active at the time of taking statedump.
+max_size=168 #max_num_allocs times the sizeof(data-type) i.e. max_num_allocs * sizeof (data-type)
+max_num_allocs=3 #Maximum number of active allocations at any point in the life of the process.
+total_allocs=7 #Number of times this data is allocated in the life of the process.
+```
+
+### Mempools
+
+Mempools are optimization to reduce the number of allocations of a data type. If we create a mem-pool of lets say 1024 elements for a data-type, new elements will be allocated from heap using syscalls like calloc, only if all the 1024 elements in the pool are in active use.
+
+Memory pool allocated by each xlator is displayed in the following format:
+
+```
+[mempool] #Section name
+-----=-----
+pool-name=fuse:fd_t #pool-name=<xlator-name>:<data-type>
+hot-count=1 #number of mempool elements that are in active use. i.e. for this pool it is the number of 'fd_t' s in active use.
+cold-count=1023 #number of mempool elements that are not in use. If a new allocation is required it will be served from here until all the elements in the pool are in use i.e. cold-count becomes 0.
+padded_sizeof=108 #Each mempool element is padded with a doubly-linked-list + ptr of mempool + is-in-use info to operate the pool of elements, this size is the element-size after padding
+pool-misses=0 #Number of times the element had to be allocated from heap because all elements from the pool are in active use.
+alloc-count=314 #Number of times this type of data is allocated through out the life of this process. This may include pool-misses as well.
+max-alloc=3 #Maximum number of elements from the pool in active use at any point in the life of the process. This does *not* include pool-misses.
+cur-stdalloc=0 #Denotes the number of allocations made from heap once cold-count reaches 0, that are yet to be released via mem_put().
+max-stdalloc=0 #Maximum number of allocations from heap that are in active use at any point in the life of the process.
+```
+
+### Iobufs
+```
+[iobuf.global]
+iobuf_pool=0x1f0d970 #The memory pool for iobufs
+iobuf_pool.default_page_size=131072 #The default size of iobuf (if no iobuf size is specified the default size is allocated)
+#iobuf_arena: One arena represents a group of iobufs of a particular size
+iobuf_pool.arena_size=12976128 # The initial size of the iobuf pool (doesn't include the stdalloc'd memory or the newly added arenas)
+iobuf_pool.arena_cnt=8 #Total number of arenas in the pool
+iobuf_pool.request_misses=0 #The number of iobufs that were stdalloc'd (as they exceeded the default max page size provided by iobuf_pool).
+```
+
+There are 3 lists of arenas:
+
+1. Arena list: arenas allocated during iobuf pool creation and the arenas that are in use(active_cnt != 0) will be part of this list.
+2. Purge list: arenas that can be purged(no active iobufs, active_cnt == 0).
+3. Filled list: arenas without free iobufs.
+
+```
+[purge.1] #purge.<S.No.>
+purge.1.mem_base=0x7fc47b35f000 #The address of the arena structure
+purge.1.active_cnt=0 #The number of iobufs active in that arena
+purge.1.passive_cnt=1024 #The number of unused iobufs in the arena
+purge.1.alloc_cnt=22853 #Total allocs in this pool(number of times the iobuf was allocated from this arena)
+purge.1.max_active=7 #Max active iobufs from this arena, at any point in the life of this process.
+purge.1.page_size=128 #Size of all the iobufs in this arena.
+
+[arena.5] #arena.<S.No.>
+arena.5.mem_base=0x7fc47af1f000
+arena.5.active_cnt=0
+arena.5.passive_cnt=64
+arena.5.alloc_cnt=0
+arena.5.max_active=0
+arena.5.page_size=32768
+```
+
+If the active_cnt of any arena is non zero, then the statedump will also have the iobuf list.
+```
+[arena.6.active_iobuf.1] #arena.<S.No>.active_iobuf.<iobuf.S.No.>
+arena.6.active_iobuf.1.ref=1 #refcount of the iobuf
+arena.6.active_iobuf.1.ptr=0x7fdb921a9000 #address of the iobuf
+
+[arena.6.active_iobuf.2]
+arena.6.active_iobuf.2.ref=1
+arena.6.active_iobuf.2.ptr=0x7fdb92189000
+```
+
+At any given point in time if there are lots of filled arenas then that could be a sign of iobuf leaks.
+
+### Call stack
+All the fops received by gluster are handled using call-stacks. Call stack contains the information about uid/gid/pid etc of the process that is executing the fop. Each call-stack contains different call-frames per xlator which handles that fop.
+
+```
+[global.callpool.stack.3] #global.callpool.stack.<Serial-Number>
+stack=0x7fc47a44bbe0 #Stack address
+uid=0 #Uid of the process which is executing the fop
+gid=0 #Gid of the process which is executing the fop
+pid=6223 #Pid of the process which is executing the fop
+unique=2778 #Xlators like afr do copy_frame and perform the operation in a different stack, this id is useful to find out the stacks that are inter-related because of copy-frame
+lk-owner=0000000000000000 #Some of the fuse fops have lk-owner.
+op=LOOKUP #Fop
+type=1 #Type of the op i.e. FOP/MGMT-OP
+cnt=9 #Number of frames in this stack.
+```
+### Call-frame
+Each frame will have information about which xlator the frame belongs to, what is the function it wound to/from and will be unwind to. It also mentions if the unwind happened or not. If we observe hangs in the system and want to find out which xlator is causing it. Take a statedump and see what is the final xlator which is yet to be unwound.
+
+```
+[global.callpool.stack.3.frame.2]#global.callpool.stack.<stack-serial-number>.frame.<frame-serial-number>
+frame=0x7fc47a611dbc #Frame address
+ref_count=0 #Incremented at the time of wind and decremented at the time of unwind.
+translator=r2-client-1 #Xlator this frame belongs to
+complete=0 #if this value is 1 that means this frame is already unwound. 0 if it is yet to unwind.
+parent=r2-replicate-0 #Parent xlator of this frame
+wind_from=afr_lookup #Parent xlator function from which the wind happened
+wind_to=priv->children[i]->fops->lookup
+unwind_to=afr_lookup_cbk #Parent xlator function to which unwind happened
+```
+
+### History of operations in Fuse
+
+Fuse maintains history of operations that happened in fuse.
+
+```
+[xlator.mount.fuse.history]
+TIME=2014-07-09 16:44:57.523364
+message=[0] fuse_release: RELEASE(): 4590:, fd: 0x1fef0d8, gfid: 3afb4968-5100-478d-91e9-76264e634c9f
+
+TIME=2014-07-09 16:44:57.523373
+message=[0] send_fuse_err: Sending Success for operation 18 on inode 3afb4968-5100-478d-91e9-76264e634c9f
+
+TIME=2014-07-09 16:44:57.523394
+message=[0] fuse_getattr_resume: 4591, STAT, path: (/iozone.tmp), gfid: (3afb4968-5100-478d-91e9-76264e634c9f)
+```
+
+### Xlator configuration
+```
+[cluster/replicate.r2-replicate-0] #Xlator type, name information
+child_count=2 #Number of children to the xlator
+#Xlator specific configuration below
+child_up[0]=1
+pending_key[0]=trusted.afr.r2-client-0
+child_up[1]=1
+pending_key[1]=trusted.afr.r2-client-1
+data_self_heal=on
+metadata_self_heal=1
+entry_self_heal=1
+data_change_log=1
+metadata_change_log=1
+entry-change_log=1
+read_child=1
+favorite_child=-1
+wait_count=1
+```
+
+### Graph/inode table
+```
+[active graph - 1]
+
+conn.1.bound_xl./data/brick01a/homegfs.hashsize=14057
+conn.1.bound_xl./data/brick01a/homegfs.name=/data/brick01a/homegfs/inode
+conn.1.bound_xl./data/brick01a/homegfs.lru_limit=16384 #Least recently used size limit
+conn.1.bound_xl./data/brick01a/homegfs.active_size=690 #Number of inodes undergoing some kind of fop to be precise on which there is at least one ref.
+conn.1.bound_xl./data/brick01a/homegfs.lru_size=183 #Number of inodes present in lru list
+conn.1.bound_xl./data/brick01a/homegfs.purge_size=0 #Number of inodes present in purge list
+```
+
+### Inode
+```
+[conn.1.bound_xl./data/brick01a/homegfs.active.324] #324th inode in active inode list
+gfid=e6d337cf-97eb-44b3-9492-379ba3f6ad42 #Gfid of the inode
+nlookup=13 #Number of times lookups happened from the client or from fuse kernel
+fd-count=4 #Number of fds opened on the inode
+ref=11 #Number of refs taken on the inode
+ia_type=1 #Type of the inode. This should be changed to some string :-(
+Ref by xl:.patchy-md-cache=11 #Further this there will be a list of xlators, and the ref count taken by each of them on this inode at the time of statedump
+
+[conn.1.bound_xl./data/brick01a/homegfs.lru.1] #1st inode in lru list. Note that ref count is zero for these inodes.
+gfid=5114574e-69bc-412b-9e52-f13ff087c6fc
+nlookup=5
+fd-count=0
+ref=0
+ia_type=2
+Ref by xl:.fuse=1
+Ref by xl:.patchy-client-0=-1
+```
+### Inode context
+For each inode per xlator some context could be stored. This context can also be printed in the statedump. Here is the inode ctx of locks xlator
+```
+[xlator.features.locks.homegfs-locks.inode]
+path=/homegfs/users/dfrobins/gfstest/r4/SCRATCH/fort.5102 - path of the file
+mandatory=0
+inodelk-count=5 #Number of inode locks
+lock-dump.domain.domain=homegfs-replicate-0:self-heal #Domain name where self-heals take locks to prevent more than one heal on the same file
+inodelk.inodelk[0](ACTIVE)=type=WRITE, whence=0, start=0, len=0, pid = 18446744073709551615, owner=080b1ada117f0000, client=0xb7fc30, connection-id=compute-30-029.com-3505-2014/06/29-14:46:12:477358-homegfs-client-0-0-1, granted at Sun Jun 29 11:01:00 2014 #Active lock information
+
+inodelk.inodelk[1](BLOCKED)=type=WRITE, whence=0, start=0, len=0, pid = 18446744073709551615, owner=c0cb091a277f0000, client=0xad4f10, connection-id=gfs01a.com-4080-2014/06/29-14:41:36:917768-homegfs-client-0-0-0, blocked at Sun Jun 29 11:04:44 2014 #Blocked lock information
+
+lock-dump.domain.domain=homegfs-replicate-0:metadata #Domain name where metadata operations take locks to maintain replication consistency
+lock-dump.domain.domain=homegfs-replicate-0 #Domain name where entry/data operations take locks to maintain replication consistency
+inodelk.inodelk[0](ACTIVE)=type=WRITE, whence=0, start=11141120, len=131072, pid = 18446744073709551615, owner=080b1ada117f0000, client=0xb7fc30, connection-id=compute-30-029.com-3505-2014/06/29-14:46:12:477358-homegfs-client-0-0-1, granted at Sun Jun 29 11:10:36 2014 #Active lock information
+```
+
+## FAQ
+### How to debug Memory leaks using statedump?
+
+#### Using memory accounting feature:
+
+[Bug 1120151](https://bugzilla.redhat.com/show_bug.cgi?id=1120151) is one of the bugs which was debugged using statedump to see which data-structure is leaking. Here is the process used to find what the leak is using statedump. According to the bug the observation is that the process memory usage is increasing whenever one of the bricks is wiped in a replicate volume and a `full` self-heal is invoked to heal the contents. Statedump of the process is taken using `kill -USR1 <pid-of-gluster-self-heal-daemon>`.
+```
+grep -w num_allocs glusterdump.5225.dump.1405493251
+num_allocs=77078
+num_allocs=87070
+num_allocs=117376
+....
+
+grep hot-count glusterdump.5225.dump.1405493251
+hot-count=16384
+hot-count=16384
+hot-count=4095
+....
+
+Find the occurrences in the statedump file to figure out the tags.
+```
+grep of the statedump revealed too many allocations for the following data-types under replicate,
+
+1. gf_common_mt_asprintf
+2. gf_common_mt_char
+3. gf_common_mt_mem_pool.
+
+After checking afr-code for allocations with tag `gf_common_mt_char` found `data-self-heal` code path does not free one such allocated memory. `gf_common_mt_mem_pool` suggests that there is a leak in pool memory. `replicate-0:dict_t`, `glusterfs:data_t` and `glusterfs:data_pair_t` pools are using lot of memory, i.e. cold_count is `0` and too many allocations. Checking source code of dict.c revealed that `key` in `dict` is allocated with `gf_common_mt_char` i.e. `2.` tag and value is created using gf_asprintf which in-turn uses `gf_common_mt_asprintf` i.e. `1.`. Browsing the code for leak in self-heal code paths lead to a line which over-writes a variable with new dictionary even when it was already holding a reference to another dictionary. After fixing these leaks, ran the same test to verify that none of the `num_allocs` are increasing even after healing 10,000 files directory hierarchy in statedump of self-heal daemon.
+Please check this [patch](http://review.gluster.org/8316) for more info about the fix.
+
+#### Debugging leaks in memory pools:
+Statedump output of memory pools was used to test and verify the fixes to [Bug 1134221](https://bugzilla.redhat.com/show_bug.cgi?id=1134221). On code analysis, dict_t objects were found to be leaking (in terms of not being unref'd enough number of times, during name self-heal. The test involved creating 100 files on plain replicate volume, removing them from one of the brick's backend, and then triggering lookup on them from the mount point. Statedump of the mount process was taken before executing the test case and after it, after compiling glusterfs with -DDEBUG flags (to have cold count set to 0 by default).
+
+Statedump output of the fuse mount process before the test case was executed:
+
+```
+
+pool-name=glusterfs:dict_t
+hot-count=0
+cold-count=0
+padded_sizeof=140
+alloc-count=33
+max-alloc=0
+pool-misses=33
+cur-stdalloc=14
+max-stdalloc=18
+
+```
+Statedump output of the fuse mount process after the test case was executed:
+
+```
+
+pool-name=glusterfs:dict_t
+hot-count=0
+cold-count=0
+padded_sizeof=140
+alloc-count=2841
+max-alloc=0
+pool-misses=2841
+cur-stdalloc=214
+max-stdalloc=220
+
+```
+Here, with cold count being 0 by default, `cur-stdalloc` indicated the number of `dict_t` objects that were allocated in heap using `mem_get()`, and yet to be freed using `mem_put()` (refer to this [page](../developer-guide/datastructure-mem-pool.md) for more details on how mempool works). After the test case (name selfheal of 100 files), there was a rise in the cur-stdalloc value (from 14 to 214) for `dict_t`.
+
+After these leaks were fixed, glusterfs was again compiled with -DDEBUG flags, and the same steps were performed again and statedump was taken before and after executing the test case, of the mount. This was done to ascertain the validity of the fix. And the following are the results:
+
+Statedump output of the fuse mount process before executing the test case:
+
+```
+pool-name=glusterfs:dict_t
+hot-count=0
+cold-count=0
+padded_sizeof=140
+alloc-count=33
+max-alloc=0
+pool-misses=33
+cur-stdalloc=14
+max-stdalloc=18
+
+```
+Statedump output of the fuse mount process after executing the test case:
+
+```
+pool-name=glusterfs:dict_t
+hot-count=0
+cold-count=0
+padded_sizeof=140
+alloc-count=2837
+max-alloc=0
+pool-misses=2837
+cur-stdalloc=14
+max-stdalloc=119
+
+```
+The value of cur-stdalloc remained 14 before and after the test, indicating that the fix indeed does what it's supposed to do.
+
+### How to debug hangs because of frame-loss?
+[Bug 994959](https://bugzilla.redhat.com/show_bug.cgi?id=994959) is one of the bugs where statedump was helpful in finding where the frame was lost. Here is the process used to find where the hang is using statedump.
+When the hang was observed, statedumps are taken for all the processes. On mount's statedump the following stack is shown:
+```
+[global.callpool.stack.1.frame.1]
+ref_count=1
+translator=fuse
+complete=0
+
+[global.callpool.stack.1.frame.2]
+ref_count=0
+translator=r2-client-1
+complete=1 <<----- Client xlator completed the readdirp call and unwound to afr
+parent=r2-replicate-0
+wind_from=afr_do_readdir
+wind_to=children[call_child]->fops->readdirp
+unwind_from=client3_3_readdirp_cbk
+unwind_to=afr_readdirp_cbk
+
+[global.callpool.stack.1.frame.3]
+ref_count=0
+translator=r2-replicate-0
+complete=0 <<---- Afr xlator is not unwinding for some reason.
+parent=r2-dht
+wind_from=dht_do_readdir
+wind_to=xvol->fops->readdirp
+unwind_to=dht_readdirp_cbk
+
+[global.callpool.stack.1.frame.4]
+ref_count=1
+translator=r2-dht
+complete=0
+parent=r2-io-cache
+wind_from=ioc_readdirp
+wind_to=FIRST_CHILD(this)->fops->readdirp
+unwind_to=ioc_readdirp_cbk
+
+[global.callpool.stack.1.frame.5]
+ref_count=1
+translator=r2-io-cache
+complete=0
+parent=r2-quick-read
+wind_from=qr_readdirp
+wind_to=FIRST_CHILD (this)->fops->readdirp
+unwind_to=qr_readdirp_cbk
+
+```
+`unwind_to` shows that call was unwound to `afr_readdirp_cbk` from client xlator.
+Inspecting that function revealed that afr is not unwinding the stack when fop failed.
+Check this [patch](http://review.gluster.org/5531) for more info about the fix.
diff --git a/doc/developer-guide/Language-Bindings.md b/doc/developer-guide/Language-Bindings.md
new file mode 100644
index 00000000000..951f5fae2f6
--- /dev/null
+++ b/doc/developer-guide/Language-Bindings.md
@@ -0,0 +1,45 @@
+# Language Bindings
+GlusterFS 3.4 introduced the libgfapi client API for C programs. This
+page lists bindings to the libgfapi C library from other languages.
+
+Go
+--
+
+- [gogfapi](https://github.com/gluster/gogfapi) - Go language bindings
+ for libgfapi, aiming to provide an api consistent with the default
+ Go file apis.
+
+Java
+----
+
+- [libgfapi-jni](https://github.com/semiosis/libgfapi-jni/) - Low
+ level JNI binding for libgfapi
+- [glusterfs-java-filesystem](https://github.com/semiosis/glusterfs-java-filesystem)
+ - High level NIO.2 FileSystem Provider implementation for the Java
+ platform
+- [libgfapi-java-io](https://github.com/gluster/libgfapi-java-io) -
+ Java bindings for libgfapi, similar to java.io
+
+Python
+------
+
+- [libgfapi-python](https://github.com/gluster/libgfapi-python) -
+ Libgfapi bindings for Python
+
+Ruby
+----
+
+- [libgfapi-ruby](https://github.com/spajus/libgfapi-ruby) - Libgfapi
+ bindings for Ruby using FFI
+
+Rust
+----
+
+- [gfapi-sys](https://github.com/cholcombe973/Gfapi-sys) - Libgfapi
+ bindings for Rust using FFI
+
+Perl
+----
+
+- [libgfapi-perl](https://github.com/gluster/libgfapi-perl) - Libgfapi
+ bindings for Perl using FFI
diff --git a/doc/developer-guide/README.md b/doc/developer-guide/README.md
new file mode 100644
index 00000000000..aaf9c7476b0
--- /dev/null
+++ b/doc/developer-guide/README.md
@@ -0,0 +1,81 @@
+Developers
+==========
+
+### From GlusterDocumentation
+
+Contributing to the Gluster community
+-------------------------------------
+
+Are you itching to send in patches and participate as a developer in the
+Gluster community? Here are a number of starting points for getting
+involved. We don't require a signed contributor license agreement or
+copyright assignment, but we do require a "signed-off-by" line on each
+code check-in.
+
+- [License
+ Change](http://www.gluster.org/2012/05/glusterfs-license-change/) -
+ we recently changed the client library code to a dual license under
+ the GPL v2 and the LGPL v3 or later
+- [GlusterFS Coding Standards](./coding-standard.md)
+
+- If you are not sure of where to start, and what to do, we have a small
+ write-up on what you can pick. [Check it out](./options-to-contribute.md)
+
+
+Adding File operations
+----------------------
+
+- [Steps to be followed when adding a new FOP to GlusterFS ](./adding-fops.md)
+
+Automatic File Replication
+--------------------------
+
+- [Cluster/afr translator](./afr.md)
+- [History of Locking in AFR](./afr-locks-evolution.md)
+- [Self heal Daemon](./afr-self-heal-daemon.md)
+
+Data Structures
+---------------
+
+- [inode data structure](./datastructure-inode.md)
+- [iobuf data structure](./datastructure-iobuf.md)
+- [mem-pool data structure](./datastructure-mem-pool.md)
+
+Find the gfapi symbol versions [here](./gfapi-symbol-versions.md)
+
+Daemon Management Framework
+---------------------------
+
+- [How to introduce new daemons using daemon management framework](./daemon-management-framework.md)
+
+Translators
+-----------
+
+- [Performance/write-Behind Translator](./write-behind.md)
+- [Translator Development](./translator-development.md)
+- [Storage/posix Translator](./posix.md)
+
+
+Brick multiplex
+---------------
+
+- [Brick mux resource reduction](./brickmux-thread-reduction.md)
+
+Fuse
+----
+
+- [Interrupt handling](./fuse-interrupt.md)
+
+Testing/Debugging
+-----------------
+
+- [Unit Tests in GlusterFS](./unittest.md)
+- [Using the Gluster Test
+ Framework](./Using-Gluster-Test-Framework.md) - Step by
+ step instructions for running the Gluster Test Framework
+- [Coredump Analysis](../debugging/analyzing-regression-cores.md) - Steps to analize coredumps generated by regression machines.
+- [Identifying Resource Leaks](./identifying-resource-leaks.md)
+
+Release Process
+---------------
+- [Versioning](./versioning.md)
diff --git a/doc/developer-guide/Using-Gluster-Test-Framework.md b/doc/developer-guide/Using-Gluster-Test-Framework.md
new file mode 100644
index 00000000000..d2bb1c391da
--- /dev/null
+++ b/doc/developer-guide/Using-Gluster-Test-Framework.md
@@ -0,0 +1,271 @@
+# USing Gluster Test Framwork
+Description
+-----------
+
+The Gluster Test Framework, is a suite of scripts used for regression
+testing of Gluster.
+
+It runs well on RHEL and CentOS (possibly Fedora too, presently being
+tested), and is automatically run against every patch submitted to
+Gluster [for review](http://review.gluster.org).
+
+The Gluster Test Framework is part of the main Gluster code base, living
+under the "tests" subdirectory:
+
+ http://git.gluster.org/?p=glusterfs.git;a=summary
+
+WARNING
+-------
+
+Running the Gluster Test Framework deletes “/var/lib/glusterd/\*â€.
+
+**DO NOT run it on a server with any data.**
+
+Preparation steps for Ubuntu 14.04 LTS
+--------------------------------------
+
+​1. \# apt-get install dbench git libacl1-dev mock nfs-common
+nfs-kernel-server libtest-harness-perl libyajl-dev xfsprogs psmisc attr
+acl lvm2 rpm
+
+​2. \# apt-get install python-webob python-paste python-sphinx
+
+​3. \# apt-get install autoconf automake bison dos2unix flex libfuse-dev
+libaio-dev libibverbs-dev librdmacm-dev libtool libxml2-dev
+libxml2-utils liblvm2-dev make libssl-dev pkg-config libpython-dev
+python-eventlet python-netifaces python-simplejson python-pyxattr
+libreadline-dev tar
+
+​4) Install cmockery2 from github (https://github.com/lpabon/cmockery2)
+and compile and make install as in Readme
+
+5)
+
+ sudo groupadd mock
+ sudo useradd -g mock mock
+
+​6) mkdir /var/run/gluster
+
+**Note**: redhat-rpm-config package is not found in ubuntu
+
+Preparation steps for CentOS 7 (only)
+-------------------------------------
+
+​1. Install EPEL:
+
+ $ sudo yum install -y http://epel.mirror.net.in/epel/7/x86_64/e/epel-release-7-1.noarch.rpm
+
+​2. Install the CentOS 7.x dependencies:
+
+ $ sudo yum install -y --enablerepo=epel cmockery2-devel dbench git libacl-devel mock nfs-utils perl-Test-Harness yajl xfsprogs psmisc
+
+ $ sudo yum install -y --enablerepo=epel python-webob1.0 python-paste-deploy1.5 python-sphinx10 redhat-rpm-config
+
+==\> Despite below missing packages it worked for me
+
+ No package python-webob1.0 available.
+ No package python-paste-deploy1.5 available.
+ No package python-sphinx10 available.
+
+ $ sudo yum install -y --enablerepo=epel autoconf automake bison dos2unix flex fuse-devel libaio-devel libibverbs-devel \
+  librdmacm-devel libtool libxml2-devel lvm2-devel make openssl-devel pkgconfig \
+  python-devel python-eventlet python-netifaces python-paste-deploy \
+  python-simplejson python-sphinx python-webob pyxattr readline-devel rpm-build \
+  tar
+
+​3. Create the mock user
+
+ $ sudo useradd -g mock mock
+
+Preparation steps for CentOS 6.3+ (only)
+----------------------------------------
+
+​1. Install EPEL:
+
+ $ sudo yum install -y http://download.fedoraproject.org/pub/epel/6/x86_64/epel-release-6-8.noarch.rpm
+
+​2. Install the CentOS 6.x dependencies:
+
+ $ sudo yum install -y --enablerepo=epel cmockery2-devel dbench git libacl-devel mock nfs-utils perl-Test-Harness yajl xfsprogs
+ $ sudo yum install -y --enablerepo=epel python-webob1.0 python-paste-deploy1.5 python-sphinx10 redhat-rpm-config
+ $ sudo yum install -y --enablerepo=epel autoconf automake bison dos2unix flex fuse-devel libaio-devel libibverbs-devel \
+   librdmacm-devel libtool libxml2-devel lvm2-devel make openssl-devel pkgconfig \
+   python-devel python-eventlet python-netifaces python-paste-deploy \
+   python-simplejson python-sphinx python-webob pyxattr readline-devel rpm-build \
+   tar
+
+​3. Create the mock user
+
+ $ sudo useradd -g mock mock
+
+Preparation steps for RHEL 6.3+ (only)
+--------------------------------------
+
+​1. Ensure you have the "Scalable Filesystem Support" group installed
+
+This provides the xfsprogs package, which is required by the test
+framework.
+
+​2. Install EPEL:
+
+ $ sudo yum install -y http://download.fedoraproject.org/pub/epel/6/x86_64/epel-release-6-8.noarch.rpm
+
+​3. Install the CentOS 6.x dependencies:
+
+ $ sudo yum install -y --enablerepo=epel cmockery2-devel dbench git libacl-devel mock nfs-utils yajl perl-Test-Harness
+ $ sudo yum install -y --enablerepo=rhel-6-server-optional-rpms python-webob1.0 python-paste-deploy1.5 python-sphinx10 redhat-rpm-config
+ $ sudo yum install -y --disablerepo=rhs* --enablerepo=*optional-rpms autoconf \
+   automake bison dos2unix flex fuse-devel libaio-devel libibverbs-devel \
+   librdmacm-devel libtool libxml2-devel lvm2-devel make openssl-devel pkgconfig \
+   python-devel python-eventlet python-netifaces python-paste-deploy \
+   python-simplejson python-sphinx python-webob pyxattr readline-devel rpm-build \
+   tar
+
+​4. Create the mock user
+
+ $ sudo useradd -g mock mock
+
+Preparation steps for Fedora 16-19 (only)
+-----------------------------------------
+
+**Still in development**
+
+​1. Install the Fedora dependencies:
+
+ $ sudo yum install -y attr cmockery2-devel dbench git mock nfs-utils perl-Test-Harness psmisc xfsprogs
+ $ sudo yum install -y python-webob1.0 python-paste-deploy1.5 python-sphinx10 redhat-rpm-config
+ $ sudo yum install -y autoconf automake bison dos2unix flex fuse-devel libaio-devel libibverbs-devel \
+   librdmacm-devel libtool libxml2-devel lvm2-devel make openssl-devel pkgconfig \
+   python-devel python-eventlet python-netifaces python-paste-deploy \
+   python-simplejson python-sphinx python-webob pyxattr readline-devel rpm-build \
+   tar
+
+​3. Create the mock user
+
+ $ sudo useradd -g mock mock
+
+Common steps
+------------
+
+​1. Ensure DNS for your server is working
+
+The Gluster Test Framework fails miserably if the full domain name for
+your server doesn't resolve back to itself.
+
+If you don't have a working DNS infrastructure in place, adding an entry
+for your server to its /etc/hosts file will work.
+
+​2. Install the version of Gluster you are testing
+
+Either install an existing set of rpms:
+
+ $ sudo yum install [your gluster rpms here]
+
+Or compile your own ones (fairly easy):
+
+ http://www.gluster.org/community/documentation/index.php/CompilingRPMS
+
+​3. Clone the GlusterFS git repository
+
+ $ git clone git://git.gluster.org/glusterfs
+ $ cd glusterfs
+
+Ensure mock can access the directory
+------------------------------------
+
+Some tests run as the user "mock". If the mock user can't access the
+tests subdirectory directory, these tests fail. (rpm.t is one such test)
+
+This is a known gotcha when the git repo is cloned to your home
+directory. Home directories generally don't have world readable
+permissions. You can fix this by adjusting your home directory
+permissions, or placing the git repo somewhere else (with access for the
+mock user).
+
+Running the tests
+-----------------
+
+The tests need to run as root, so they can mount volumes and manage
+gluster processes as needed.
+
+It's also best to run them directly as the root user, instead of through
+sudo. Strange things sporadicly happen (for me) when using the full test
+framework through sudo, that haven't happened (yet) when running
+directly as root. Hangs in dbench particularly, which are part of at
+least one test.
+
+ # ./run-tests.sh
+
+The test framework takes just over 45 minutes to run in a VM here (4
+cpu's assigned, 8GB ram, SSD storage). It may take significantly more or
+less time for you, depending on the hardware and software you're using.
+
+Showing debug information
+-------------------------
+
+To display verbose information while the tests are running, set the
+DEBUG environment variable to 1 prior to running the tests.
+
+ # DEBUG=1 ./run-tests.sh
+
+Log files
+---------
+
+Verbose output from the rpm.t test goes into "rpmbuild-mock.log",
+located in the same directory the test is run from.
+
+Reporting bugs
+--------------
+
+If you hit a bug when running the test framework, **please** create a
+bug report for it on Bugzilla so it gets fixed:
+
+ https://bugzilla.redhat.com/enter_bug.cgi?product=GlusterFS&component=tests
+
+Creating your own tests
+-----------------------
+
+The test scripts are written in bash, with their filenames ending in .t
+instead of .sh.
+
+When creating your own test scripts, create them in an appropriate
+subdirectory under "tests" (eg "bugs" or "features") and use descriptive
+names like "bug-XXXXXXX-checking-feature-X.t"
+
+Also include the "include.rc" file, which defines the test types and
+host/brick/volume defaults:
+
+ . $(dirname $0)/../include.rc
+
+There are 5 test types available at present, but feel free to add more
+if you need something that doesn't yet exist. The test types are
+explained in more detail below.
+
+Also essential is the "cleanup" command, which removes any existing
+Gluster configuration (**without backing it up**), and also kills any
+running gluster processes.
+
+There is a basic test template you can copy, named bug-000000.t in the
+bugs subdirectory:
+
+ $ cp bugs/bug-000000.t somedir/descriptive-name.t
+
+### TEST
+
+- Example of usage in basic/volume.t
+
+### TEST\_IN\_LOOP
+
+- Example of usage in basic/rpm.t
+
+### EXPECT
+
+- Example of usage in basic/volume.t
+
+### EXPECT\_WITHIN
+
+- Example of usage in basic/volume-status.t
+
+### EXPECT\_KEYWORD
+
+- Defined in include.rc, but seems to be unused?
diff --git a/doc/developer-guide/adding-fops.md b/doc/developer-guide/adding-fops.md
new file mode 100644
index 00000000000..3f72ed3e23a
--- /dev/null
+++ b/doc/developer-guide/adding-fops.md
@@ -0,0 +1,18 @@
+Adding a new FOP
+================
+
+Steps to be followed when adding a new FOP to GlusterFS:
+
+1. Edit `glusterfs.h` and add a `GF_FOP_*` constant.
+2. Edit `xlator.[ch]` and:
+ * add the new prototype for fop and callback.
+ * edit `xlator_fops` structure.
+3. Edit `xlator.c` and add to fill_defaults.
+4. Edit `protocol.h` and add struct necessary for the new FOP.
+5. Edit `defaults.[ch]` and provide default implementation.
+6. Edit `call-stub.[ch]` and provide stub implementation.
+7. Edit `common-utils.c` and add to gf_global_variable_init().
+8. Edit client-protocol and add your FOP.
+9. Edit server-protocol and add your FOP.
+10. Implement your FOP in any translator for which the default implementation
+ is not sufficient.
diff --git a/doc/developer-guide/afr-locks-evolution.md b/doc/developer-guide/afr-locks-evolution.md
new file mode 100644
index 00000000000..2dabbcfeb13
--- /dev/null
+++ b/doc/developer-guide/afr-locks-evolution.md
@@ -0,0 +1,91 @@
+History of locking in AFR
+--------------------------
+
+GlusterFS has **locks** translator which provides the following internal locking operations called `inodelk`, `entrylk` which are used by afr to achieve synchronization of operations on files or directories that conflict with each other.
+
+`Inodelk` gives the facility for translators in GlusterFS to obtain range (denoted by tuple with **offset**, **length**) locks in a given **domain** for an inode.
+Full file lock is denoted by the tuple (offset: `0`, length: `0`) i.e. length `0` is considered as infinity.
+
+`Entrylk` enables translators of GlusterFS to obtain locks on `name` in a given **domain** for an inode, typically a directory.
+
+The **locks** translator provides both *blocking* and *nonblocking* variants and of these locks.
+
+
+AFR makes use of locks xlator extensively:
+
+1)For FOPS (from clients)
+-----------------------
+* Data transactions take inode locks on data domain, Let's refer to this domain name as DATA_DOMAIN.
+
+ So locking for writes would be something like this:`inodelk(offset,length, DATA_DOMAIN)`
+ For truncating a file to zero, it would be `inodelk(0,0,DATA_DOMAIN)`
+
+* Metadata transactions (chown/chmod) also take inode locks but on a special range on metadata domain,
+ i.e.`(LLONG_MAX-1 , 0, METADATA_DOMAIN).`
+
+* Entry transactions (create, mkdir, rmdir,unlink, symlink, link,rename) take entrylk on `(name, parent inode)`.
+
+
+2)For self heal:
+-------------
+* For Metadata self-heal, it is the same. i.e.`inodelk(LLONG_MAX-1 , 0, METADATA_DOMAIN)`.
+* For Entry self-heal, it is `entrylk(NULL name, parent inode)`. Specifying NULL for the name takes full lock on the directory referred to by the inode.
+* For data self-heal, there is a bit of history as to how locks evolved:
+
+### Initial version (say version 1) :
+There was no concept of selfheal daemon (shd). Only client lookups triggered heals. so AFR always took `inodelk(0,0,DATA_DOMAIN)` for healing. The issue with this approach was that when heal was in progress, I/O from clients was blocked .
+
+### version 2:
+shd was introduced. We needed to allow I/O to go through when heal was going,provided the ranges did not overlap. To that extent, the following approach was adopted:
+
++ 1.shd takes (full inodelk in DATA_DOMAIN). Thus client FOPS are blocked and cannot modify changelog-xattrs
++ 2.shd inspects xattrs to determine source/sink
++ 3.shd takes a chunk inodelk(0-128kb) again in DATA_DOMAIN (locks xlator allows overlapping locks if lock owner is the same).
++ 4.unlock full lock
++ 5.heal
++ 6.take next chunk lock(129-256kb)
++ 7.unlock 1st chunk lock, heal the second chunk and so on.
+
+
+Thus after 4, any client FOP could write to regions that was not currently under heal. The exception was truncate (to size 0) because it needs full file lock and will always block because some chunk is always under lock by the shd until heal completes.
+
+Another issue was that 2 shds could run in parallel. Say SHD1 and SHD2 compete for step 1. Let SHD1 win. It proceeds and completes step 4. Now SHD2 also succeeds in step 1, continues all steps. Thus at the end both shds will decrement the changelog leading to negative values in it)
+
+### version 3
+To prevent parallel self heals, another domain was introduced, let us call it SELF_HEAL_DOMAIN. With this domain, the following approach was adopted and is **the approach currently in use**:
+
++ 1.shd takes (full inodelk on SELF_HEAL_DOMAIN)
++ 2.shd takes (full inodelk on DATA_DOMAIN)
++ 3.shd inspects xattrs to determine source/sink
++ 4.unlock full lock on DATA_DOMAIN
++ 5.take chunk lock(0-128kb) on DATA_DOMAIN
++ 6.heal
++ 7.take next chunk lock(129-256kb) on DATA_DOMAIN
++ 8.unlock 1st chunk lock, heal and so on.
++ 9.Finally release full lock on SELF_HEAL_DOMAIN
+
+Thus until one shd completes step 9, another shd cannot start step 1, solving the problem of simultaneous heals.
+Note that the issue of truncate (to zero) FOP hanging still remains.
+Also there are multiple network calls involved in this scheme. (lock,heal(ie read+write), unlock) per chunk. i.e 4 calls per chunk.
+
+### version 4 (ToDo)
+Some improvements that need to be made in version 3:
+* Reduce network calls using piggy backing.
+* After taking chunk lock and healing, we need to unlock the lock before locking the next chunk. This gives a window for any pending truncate FOPs to succeed. If truncate succeeds, the heal of next chunk will fail (read returns zero)
+and heal is stopped. *BUT* there is **yet another** issue:
+
+* shd does steps 1 to 4. Let's assume source is brick b1, sink is brick b2 . i.e xattrs are (0,1) and (0,0) on b1 and b2 respectively. Now before shd takes (0-128kb) lock, a client FOP takes it.
+It modifies data but the FOP succeeds only on brick 2. writev returns success, and the attrs now read (0,1) (1,0). SHD takes over and heals. It had observed (0,1),(0,0) earlier
+and thus goes ahead and copies stale 128Kb from brick 1 to brick2. Thus as far as application is concerned, `writev` returned success but bricks have stale data.
+What needs to be done is `writev` must return success only if it succeeded on atleast one source brick (brick b1 in this case). Otherwise The heal still happens in reverse direction but as far as the application is concerned, it received an error.
+
+### Note on lock **domains**
+We have used conceptual names in this document like DATA_DOMAIN/ METADATA_DOMAIN/ SELF_HEAL_DOMAIN. In the code, these are mapped to strings that are based on the AFR xlator name like so:
+
+DATA_DOMAIN --->"vol_name-replicate-n"
+
+METADATA_DOMAIN --->"vol_name-replicate-n:metadata"
+
+SELF_HEAL_DOMAIN -->"vol_name-replicate-n:self-heal"
+
+where vol_name is the name of the volume and 'n' is the replica subvolume index (starting from 0).
diff --git a/doc/developer-guide/afr-self-heal-daemon.md b/doc/developer-guide/afr-self-heal-daemon.md
new file mode 100644
index 00000000000..65940d420b7
--- /dev/null
+++ b/doc/developer-guide/afr-self-heal-daemon.md
@@ -0,0 +1,92 @@
+Self-Heal Daemon
+================
+The self-heal daemon (shd) is a glusterfs process that is responsible for healing files in a replicate/ disperse gluster volume.
+Every server (brick) node of the volume runs one instance of the shd. So even if one node contains replicate/ disperse bricks of
+multiple volumes, it would be healed by the same shd.
+
+This document only describes how the shd works for replicate (AFR) volumes.
+
+The shd is launched by glusterd when the volume starts (only if the volume includes a replicate configuration). The graph
+of the shd process in every node contains the following: The io-stats which is the top most xlator, its children being the
+replicate xlators (subvolumes) of *only* the bricks present in that particular node, and finally *all* the client xlators that are the children of the replicate xlators.
+
+The shd does two types of self-heal crawls: Index heal and Full heal. For both these types of crawls, the basic idea is the same:
+For each file encountered while crawling, perform metadata, data and entry heals under appropriate locks.
+* An overview of how each of these heals is performed is detailed in the 'Self-healing' section of *doc/features/afr-v1.md*
+* The different file locks which the shd takes for each of these heals is detailed in *doc/developer
+-guide/afr-locks-evolution.md*
+
+Metadata heal refers to healing extended attributes, mode and permissions of a file or directory.
+Data heal refers to healing the file contents.
+Entry self-heal refers to healing entries inside a directory.
+
+Index heal
+==========
+The index heal is done:
+ a) Every 600 seconds (can be changed via the `cluster.heal-timeout` volume option)
+ b) When it is explicitly triggered via the `gluster vol heal <VOLNAME>` command
+ c) Whenever a replica brick that was down comes back up.
+
+Only one heal can be in progress at one time, irrespective of reason why it was triggered. If another heal is triggered before the first one completes, it will be queued.
+Only one heal can be queued while the first one is running. If an Index heal is queued, it can be overridden by queuing a Full heal and not vice-versa. Also, before processing
+each entry in index heal, a check is made if a full heal is queued. If it is, then the index heal is aborted so that the full heal can proceed.
+
+In index heal, each shd reads the entries present inside .glusterfs/indices/xattrop/ folder and triggers heal on each entry with appropriate locks.
+The .glusterfs/indices/xattrop/ directory contains a base entry of the name "xattrop-<virtual-gfid-string>". All other entries are hardlinks to the base entry. The
+*names* of the hardlinks are the gfid strings of the files that may need heal.
+
+When a client (mount) performs an operation on the file, the index xlator present in each brick process adds the hardlinks in the pre-op phase of the FOP's transaction
+and removes it in post-op phase if the operation is successful. Thus if an entry is present inside the .glusterfs/indices/xattrop/ directory when there is no I/O
+happening on the file, it means the file needs healing (or atleast an examination if the brick crashed after the post-op completed but just before the removal of the hardlink).
+
+#### Index heal steps:
+<pre><code>
+In shd process of *each node* {
+ opendir +readdir (.glusterfs/indices/xattrop/)
+ for each entry inside it {
+ self_heal_entry() //Explained below.
+ }
+}
+</code></pre>
+
+<pre><code>
+self_heal_entry() {
+ Call syncop_lookup(replicae subvolume) which eventually does {
+ take appropriate locks
+ determine source and sinks from AFR changelog xattrs
+ perform whatever heal is needed (any of metadata, data and entry heal in that order)
+ clear changelog xattrs and hardlink inside .glusterfs/indices/xattrop/
+ }
+}
+</code></pre>
+
+Note:
+* If the gfid hardlink is present in the .glusterfs/indices/xattrop/ of both replica bricks, then each shd will try to heal the file but only one of them will be able to proceed due to the self-heal domain lock.
+
+* While processing entries inside .glusterfs/indices/xattrop/, if shd encounters an entry whose parent is yet to be healed, it will skip it and it will be picked up in the next crawl.
+
+* If a file is in data/ metadata split-brain, it will not be healed.
+
+* If a directory is in entry split-brain, a conservative merge will be performed, wherein after the merge, the entries of the directory will be a union of the entries in the replica pairs.
+
+Full heal
+=========
+A full heal is triggered by running `gluster vol heal <VOLNAME> full`. This command is usually run in disk replacement scenarios where the entire data is to be copied from one of the healthy bricks of the replica to the brick that was just replaced.
+
+Unlike the index heal which runs on the shd of every node in a replicate subvolume, the full heal is run only on the shd of one node per replicate subvolume: the node having the highest UUID.
+i.e In a 2x2 volume made of 4 nodes N1, N2, N3 and N4, If UUID of N1>N2 and UUID N4 >N3, then the full crawl is carried out by the shds of N1 and N4.(Node UUID can be found in `/var/lib/glusterd/glusterd.info`)
+
+The full heal steps are almost identical to the index heal, except the heal is performed on each replica starting from the root of the volume:
+<pre><code>
+In shd process of *highest UUID node per replica* {
+ opendir +readdir ("/")
+ for each entry inside it {
+ self_heal_entry()
+ if (entry == directory) {
+ /* Recurse*/
+ again opendir+readdir (directory) followed by self_heal_entry() of each entry.
+ }
+
+ }
+}
+</code></pre>
diff --git a/doc/developer-guide/afr.md b/doc/developer-guide/afr.md
new file mode 100644
index 00000000000..566573a4e26
--- /dev/null
+++ b/doc/developer-guide/afr.md
@@ -0,0 +1,191 @@
+cluster/afr translator
+======================
+
+Locking
+-------
+
+Before understanding replicate, one must understand two internal FOPs:
+
+### `GF_FILE_LK`
+
+This is exactly like `fcntl(2)` locking, except the locks are in a
+separate domain from locks held by applications.
+
+### `GF_DIR_LK (loc_t *loc, char *basename)`
+
+This allows one to lock a name under a directory. For example,
+to lock /mnt/glusterfs/foo, one would use the call:
+
+```
+GF_DIR_LK ({loc_t for "/mnt/glusterfs"}, "foo")
+```
+
+If one wishes to lock *all* the names under a particular directory,
+supply the basename argument as `NULL`.
+
+The locks can either be read locks or write locks; consult the
+function prototype for more details.
+
+Both these operations are implemented by the features/locks (earlier
+known as posix-locks) translator.
+
+Basic design
+------------
+
+All FOPs can be classified into four major groups:
+
+### inode-read
+
+Operations that read an inode's data (file contents) or metadata (perms, etc.).
+
+access, getxattr, fstat, readlink, readv, stat.
+
+### inode-write
+
+Operations that modify an inode's data or metadata.
+
+chmod, chown, truncate, writev, utimens.
+
+### dir-read
+
+Operations that read a directory's contents or metadata.
+
+readdir, getdents, checksum.
+
+### dir-write
+
+Operations that modify a directory's contents or metadata.
+
+create, link, mkdir, mknod, rename, rmdir, symlink, unlink.
+
+Some of these make a subgroup in that they modify *two* different entries:
+link, rename, symlink.
+
+### Others
+
+Other operations.
+
+flush, lookup, open, opendir, statfs.
+
+Algorithms
+----------
+
+Each of the four major groups has its own algorithm:
+
+### inode-read, dir-read
+
+1. Send a request to the first child that is up:
+ * if it fails:
+ * try the next available child
+ * if we have exhausted all children:
+ * return failure
+
+### inode-write
+
+ All operations are done in parallel unless specified otherwise.
+
+1. Send a ``GF_FILE_LK`` request on all children for a write lock on the
+ appropriate region
+ (for metadata operations: entire file (0, 0) for writev:
+ (offset, offset+size of buffer))
+ * If a lock request fails on a child:
+ * unlock all children
+ * try to acquire a blocking lock (`F_SETLKW`) on each child, serially.
+ If this fails (due to `ENOTCONN` or `EINVAL`):
+ Consider this child as dead for rest of transaction.
+2. Mark all children as "pending" on all (alive) children (see below for
+meaning of "pending").
+ * If it fails on any child:
+ * mark it as dead (in transaction local state).
+3. Perform operation on all (alive) children.
+ * If it fails on any child:
+ * mark it as dead (in transaction local state).
+4. Unmark all successful children as not "pending" on all nodes.
+5. Unlock region on all (alive) children.
+
+### dir-write
+
+ The algorithm for dir-write is same as above except instead of holding
+ `GF_FILE_LK` locks we hold a GF_DIR_LK lock on the name being operated upon.
+ In case of link-type calls, we hold locks on both the operand names.
+
+"pending"
+---------
+
+The "pending" number is like a journal entry. A pending entry is an
+array of 32-bit integers stored in network byte-order as the extended
+attribute of an inode (which can be a directory as well).
+
+There are three keys corresponding to three types of pending operations:
+
+### `AFR_METADATA_PENDING`
+
+There are some metadata operations pending on this inode (perms, ctime/mtime,
+xattr, etc.).
+
+### `AFR_DATA_PENDING`
+
+There is some data pending on this inode (writev).
+
+### `AFR_ENTRY_PENDING`
+
+There are some directory operations pending on this directory
+(create, unlink, etc.).
+
+Self heal
+---------
+
+* On lookup, gather extended attribute data:
+ * If entry is a regular file:
+ * If an entry is present on one child and not on others:
+ * create entry on others.
+ * If entries exist but have different metadata (perms, etc.):
+ * consider the entry with the highest `AFR_METADATA_PENDING` number as
+ definitive and replicate its attributes on children.
+ * If entry is a directory:
+ * Consider the entry with the highest `AFR_ENTRY_PENDING` number as
+ definitive and replicate its contents on all children.
+ * If any two entries have non-matching types (i.e., one is file and
+ other is directory):
+ * Announce to the user via log that a split-brain situation has been
+ detected, and do nothing.
+* On open, gather extended attribute data:
+ * Consider the file with the highest `AFR_DATA_PENDING` number as
+ the definitive one and replicate its contents on all other
+ children.
+
+During all self heal operations, appropriate locks must be held on all
+regions/entries being affected.
+
+Inode scaling
+-------------
+
+Inode scaling is necessary because if a situation arises where an inode number
+is returned for a directory (by lookup) which was previously the inode number
+of a file (as per FUSE's table), then FUSE gets horribly confused (consult a
+FUSE expert for more details).
+
+To avoid such a situation, we distribute the 64-bit inode space equally
+among all children of replicate.
+
+To illustrate:
+
+If c1, c2, c3 are children of replicate, they each get 1/3 of the available
+inode space:
+
+------------- -- -- -- -- -- -- -- -- -- -- -- ---
+Child: c1 c2 c3 c1 c2 c3 c1 c2 c3 c1 c2 ...
+Inode number: 1 2 3 4 5 6 7 8 9 10 11 ...
+------------- -- -- -- -- -- -- -- -- -- -- -- ---
+
+Thus, if lookup on c1 returns an inode number "2", it is scaled to "4"
+(which is the second inode number in c1's space).
+
+This way we ensure that there is never a collision of inode numbers from
+two different children.
+
+This reduction of inode space doesn't really reduce the usability of
+replicate since even if we assume replicate has 1024 children (which would be a
+highly unusual scenario), each child still has a 54-bit inode space:
+$2^{54} \sim 1.8 \times 10^{16}$, which is much larger than any real
+world requirement.
diff --git a/doc/developer-guide/brickmux-thread-reduction.md b/doc/developer-guide/brickmux-thread-reduction.md
new file mode 100644
index 00000000000..7d76e8ff579
--- /dev/null
+++ b/doc/developer-guide/brickmux-thread-reduction.md
@@ -0,0 +1,64 @@
+# Resource usage reduction in brick multiplexing
+
+Each brick is regresented with a graph of translators in a brick process.
+Each translator in the graph has its own set of threads and mem pools
+and other system resources allocations. Most of the times all these
+resources are not put to full use. Reducing the resource consumption
+of each brick is a problem in itself that needs to be addressed. The other
+aspect to it is, sharing of resources across brick graph, this becomes
+critical in brick multiplexing scenario. In this document we will be discussing
+only about the threads.
+
+If a brick mux process hosts 50 bricks there are atleast 600+ threads created
+in that process. Some of these are global threads that are shared by all the
+brick graphs, and others are per translator threads. The global threads like
+synctask threads, timer threads, sigwaiter, poller etc. are configurable and
+do not needs to be reduced. The per translator threads keeps growing as the
+number of bricks in the process increases. Each brick spawns atleast 10+
+threads:
+- io-threads
+- posix threads:
+ 1. Janitor
+ 2. Fsyncer
+ 3. Helper
+ 4. aio-thread
+- changelog and bitrot threads(even when the features are not enabled)
+
+## io-threads
+
+io-threads should be made global to the process, having 16+ threads for
+each brick does not make sense. But io-thread translator is loaded in
+the graph, and the position of io-thread translator decides from when
+the fops will be parallelised across threads. We cannot entirely move
+the io-threads to libglusterfs and say the multiplexing happens from
+the master translator or so. Hence, the io-thread orchestrator code
+is moved to libglusterfs, which ensures there is only one set of
+io-threads that is shared among the io-threads translator in each brick.
+This poses performance issues due to lock-contention in the io-threds
+layer. This also shall be addressed by having multiple locks instead of
+one global lock for io-threads.
+
+## Posix threads
+Most of the posix threads execute tasks in a timely manner, hence it can be
+replaced with a timer whose handler register a task to synctask framework, once
+the task is complete, the timer is registered again. With this we can eliminate
+the need of one thread for each task. The problem with using synctasks is
+the performance impact it will have due to make/swapcontext. For task that
+does not involve network wait, we need not do makecontext, instead the task
+function with arg can be stored and executed when a synctask thread is free.
+We need to implement an api in synctask to execute atomic tasks(no network wait)
+without the overhead of make/swapcontext. This will solve the performance
+impact associated with using synctask framework.
+
+And the other challenge, is to cancel all the tasks pending from a translator.
+This is important to cleanly detach brick. For this, we need to implement an
+api in synctask that can cancel all the tasks from a given translator.
+
+For future, this will be replced to use global thread-pool(once implemented).
+
+## Changelog and bitrot threads
+
+In the initial implementation, the threads are not created if the feature is
+not enabled. We need to share threads across changelog instances if we plan
+to enable these features in brick mux scenario.
+
diff --git a/doc/developer-guide/coding-standard.md b/doc/developer-guide/coding-standard.md
new file mode 100644
index 00000000000..031c6c0da99
--- /dev/null
+++ b/doc/developer-guide/coding-standard.md
@@ -0,0 +1,697 @@
+GlusterFS Coding Standards
+==========================
+
+Before you get started
+----------------------
+Before starting with other part of coding standard, install `clang-format`
+
+On Fedora:
+```
+$ dnf install clang
+```
+On debian/Ubuntu:
+```
+$ apt-get install clang
+```
+Once you are done with all the local changes, you need to run below set of commands,
+before submitting the patch for review.
+```
+$ git add $file # if any
+$ git commit -a -s -m "commit message"
+$ git show --pretty="format:" --name-only | grep -v "contrib/" | egrep "*\.[ch]$" | xargs clang-format -i
+$ git diff # see if there are any changes
+$ git commit -a --amend # get the format changes done
+$ ./submit-for-review.sh
+```
+
+
+Structure definitions should have a comment per member
+------------------------------------------------------
+
+Every member in a structure definition must have a comment about its purpose.
+The comment should be descriptive without being overly verbose. For pointer
+members, lifecycle concerns for the pointed-to object should be noted. For lock
+members, the relationship between the lock member and the other members it
+protects should be explicit.
+
+*Bad:*
+
+```
+gf_lock_t lock; /* lock */
+```
+
+*Good:*
+
+```
+DBTYPE access_mode; /* access mode for accessing
+ * the databases, can be
+ * DB_HASH, DB_BTREE
+ * (option access-mode <mode>)
+ */
+```
+
+Structure members should be aligned based on the padding requirements
+---------------------------------------------------------------------
+
+The compiler will make sure that structure members have optimum alignment,
+but at the expense of suboptimal padding. More important is to optimize the
+padding. The compiler won't do that for you.
+
+This also will help utilize the memory better
+
+*Bad:*
+```
+struct bad {
+ bool b; /* 0 */
+ /* 1..7 pad */
+ void *p; /* 8..15 */
+ char c; /* 16 */
+ char a[16]; /* 17..33 */
+ /* 34..39 pad */
+ int64_t ii; /* 40..47 */
+ int32_t i; /* 48..51 */
+ /* 52..55 pad */
+ int64_t iii; /* 56..63 */
+};
+```
+
+*Good:*
+```
+struct good {
+ int64_t ii; /* explicit 64-bit types */
+ void *p; /* may be 64- or 32-bit */
+ long l; /* may be 64- or 32-bit */
+ int i; /* 32-bit */
+ short s; /* 16-bit */
+ char c; /* 8-bit */
+ bool b; /* 8-bit */
+ char a[1024];
+);
+```
+Make sure the items with the most stringent alignment requirements will need
+to come earliest (ie, pointers and perhaps uint64_t etc), and those with less
+stringent alignment requirements at the end (uint16/uint8 and char). Also note
+that the long array (if any) should be at the end of the structure, regardless
+of the type.
+
+Also note, if your structure's overall size is crossing 1k-4k limit, it is
+recommended to mention the reason why the particular structure needs so much
+memory as a comment at the top.
+
+Use \_typename for struct tags and typename\_t for typedefs
+---------------------------------------------------------
+
+Being consistent here makes it possible to automate navigation from use of a
+type to its true definition (not just the typedef).
+
+*Bad:*
+
+```
+struct thing {...};
+struct thing_t {...};
+typedef struct _thing thing;
+```
+
+*Good:*
+
+```
+typedef struct _thing {...} thing_t;
+```
+
+No double underscores
+---------------------
+
+Identifiers beginning with double underscores are supposed to reserved for the
+compiler.
+
+http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1570.pdf
+
+When you need to define inner/outer functions, use a different prefix/suffix.
+
+*Bad:*
+
+```
+void __do_something (void);
+
+void
+do_something (void)
+{
+ LOCK ();
+ __do_something ();
+ UNLOCK ();
+}
+```
+
+*Good:*
+
+```
+void do_something_locked (void);
+```
+
+Only use safe pointers in initializers
+----------------------------------------------------------
+
+Some pointers, such as `this` in a fop function, can be assumed to be non-NULL.
+However, other parameters and further-derived values might be NULL.
+
+*Good:*
+
+```
+pid_t pid = frame->root->pid;
+```
+
+
+*Bad:*
+
+```
+data_t *my_data = dict_get (xdata, "fubar");
+```
+
+No giant stack allocations
+--------------------------
+
+Synctasks have small finite stacks. To avoid overflowing these stacks, avoid
+allocating any large data structures on the stack. Use dynamic allocation
+instead.
+
+*Bad:*
+
+```
+gf_boolean_t port_inuse[65536]; /* 256KB, this actually happened */
+```
+
+NOTE: Ideal is to limit the stack array to less than 256 bytes.
+
+
+Character array initializing
+----------------------------
+
+It is recommended to keep the character array initializing to empty string.
+
+*Good:*
+```
+char msg[1024] = "";
+```
+
+Not so much recommended, even though it means the same.
+
+```
+char msg[1024] = {0,};
+```
+
+We recommend above to structure initialization.
+
+
+
+Validate all arguments to a function
+------------------------------------
+
+All pointer arguments to a function must be checked for `NULL`.
+A macro named `GF_VALIDATE_OR_GOTO` (in `common-utils.h`)
+takes two arguments; if the first is `NULL`, it writes a log message and
+jumps to a label specified by the second aergument after setting errno
+appropriately. There are several variants of this function for more
+specific purposes, and their use is recommended.
+
+*Bad:*
+
+```
+/* top of function */
+ret = dict_get (xdata, ...)
+```
+
+*Good:*
+
+```
+/* top of function */
+GF_VALIDATE_OR_GOTO(xdata,out);
+ret = dict_get (xdata, ...)
+```
+
+Never rely on precedence of operators
+-------------------------------------
+
+Never write code that relies on the precedence of operators to execute
+correctly. Such code can be hard to read and someone else might not
+know the precedence of operators as accurately as you do. This includes
+precedence of increment/decrement vs. field/subscript. The only exceptions are
+arithmetic operators (which have had defined precedence since before computers
+even existed) and boolean negation.
+
+*Bad:*
+
+```
+if (op_ret == -1 && errno != ENOENT)
+++foo->bar /* incrementing foo, or incrementing foo->bar? */
+a && b || !c
+```
+
+*Good:*
+
+```
+if ((op_ret == -1) && (errno != ENOENT))
+(++foo)->bar
+++(foo->bar)
+(a && b) || !c
+a && (b || !c)
+```
+
+Use exactly matching types
+--------------------------
+
+Use a variable of the exact type declared in the manual to hold the
+return value of a function. Do not use an 'equivalent' type.
+
+
+*Bad:*
+
+```
+int len = strlen (path);
+```
+
+*Good:*
+
+```
+size_t len = strlen (path);
+```
+
+Avoid code such as `foo->bar->baz`; check every pointer
+-------------------------------------------------------------
+
+Do not write code that blindly follows a chain of pointer references. Any
+pointer in the chain may be `NULL` and thus cause a crash. Verify that each
+pointer is non-null before following it. Even if `foo->bar` has been checked
+and is known safe, repeating it can make code more verbose and less clear.
+
+This rule includes `[]` as well as `->` because both dereference pointers.
+
+*Bad:*
+
+```
+foo->bar->field1 = value1;
+xyz = foo->bar->field2 + foo->bar->field3 * foo->bar->field4;
+foo->bar[5].baz
+```
+
+*Good:*
+
+```
+my_bar = foo->bar;
+if (!my_bar) ... return;
+my_bar->field1 = value1;
+xyz = my_bar->field2 + my_bar->field3 * my_bar->field4;
+```
+
+Document unchecked return values
+----------------------------------------------------
+
+In general, return values should be checked. If a function is being called
+for its side effects and the return value really doesn't matter, an explicit
+cast to void is required (to keep static analyzers happy) and a comment is
+recommended.
+
+*Bad:*
+
+```
+close (fd);
+do_important_thing ();
+```
+
+*Good (or at least OK):*
+
+```
+(void) sleep (1);
+```
+
+Gracefully handle failure of malloc (and other allocation functions)
+--------------------------------------------------------------------
+
+GlusterFS should never crash or exit due to lack of memory. If a
+memory allocation fails, the call should be unwound and an error
+returned to the user.
+
+*Use result args and reserve the return value to indicate success or failure:*
+
+The return value of every functions must indicate success or failure (unless
+it is impossible for the function to fail --- e.g., boolean functions). If
+the function needs to return additional data, it must be returned using a
+result (pointer) argument.
+
+*Bad:*
+
+```
+int32_t dict_get_int32 (dict_t *this, char *key);
+```
+
+*Good:*
+
+```
+int dict_get_int32 (dict_t *this, char *key, int32_t *val);
+```
+
+Always use the 'n' versions of string functions
+-----------------------------------------------
+
+Unless impossible, use the length-limited versions of the string functions.
+
+*Bad:*
+
+```
+strcpy (entry_path, real_path);
+```
+
+*Good:*
+
+```
+strncpy (entry_path, real_path, entry_path_len);
+```
+
+Do not use memset prior to sprintf/snprintf/vsnprintf etc...
+------------------------------------------------------------
+snprintf(and other similar string functions) terminates the buffer with a
+'\0'(null character). Hence, there is no need to do a memset before using
+snprintf. (Of course you need to account one extra byte for the null character
+in your allocation).
+
+Note: Similarly if you are doing pre-memory allocation for the buffer, use
+GF_MALLOC instead of GF_CALLOC, since the later is bit costlier.
+
+*Bad:*
+
+```
+char buffer[x];
+memset (buffer, 0, x);
+bytes_read = snprintf (buffer, sizeof buffer, "bad standard");
+```
+
+*Good:*
+```
+char buffer[x];
+bytes_read = snprintf (buffer, sizeof (buffer), "good standard");
+```
+
+And it is always to good initialize the char array if the string is static.
+
+E.g.
+```
+char buffer[] = "good standard";
+```
+
+No dead or commented code
+-------------------------
+
+There must be no dead code (code to which control can never be passed) or
+commented out code in the codebase.
+
+Function length or Keep functions small
+---------------------------------------
+
+We live in the UNIX-world where modules do one thing and do it well.
+This rule should apply to our functions also. If a function is very long, try splitting it
+into many little helper functions. The question is, in a coding
+spree, how do we know a function is long and unreadable. One rule of
+thumb given by Linus Torvalds is that, a function should be broken-up
+if you have 4 or more levels of indentation going on for more than 3-4
+lines.
+
+*Example for a helper function:*
+```
+static int
+same_owner (posix_lock_t *l1, posix_lock_t *l2)
+{
+ return ((l1->client_pid == l2->client_pid) &&
+ (l1->transport == l2->transport));
+}
+```
+
+Define functions as static
+--------------------------
+
+Declare functions as static unless they're exposed via a module-level API for
+use from other modules.
+
+No nested functions
+-------------------
+
+Nested functions have proven unreliable, e.g. as callbacks in code that uses
+ucontext (green) threads,
+
+Use inline functions instead of macros whenever possible
+--------------------------------------------------------
+
+Inline functions enforce type safety; macros do not. Use macros only for things
+that explicitly need to be type-agnostic (e.g. cases where one might use
+generics or templates in other languages), or that use other preprocessor
+features such as `#` for stringification or `##` for token pasting. In general,
+"static inline" is the preferred form.
+
+Avoid copypasta
+---------------
+
+Code that is copied and then pasted into multiple functions often creates
+maintenance problems later, e.g. updating all but one instance for a subsequent
+change. If you find yourself copying the same "boilerplate" many places,
+consider refactoring to use helper functions (including inline) or macros, or
+code generation.
+
+Ensure function calls wrap around after 80-columns
+--------------------------------------------------
+
+Place remaining arguments on the next line if needed.
+
+Functions arguments and function definition
+-------------------------------------------
+
+Place all the arguments of a function definition on the same line
+until the line goes beyond 80-cols. Arguments that extend beyind
+80-cols should be placed on the next line.
+
+Style issues
+------------
+
+### Brace placement
+
+Use K&R/Linux style of brace placement for blocks.
+
+*Good:*
+
+```
+int some_function (...)
+{
+ if (...) {
+ /* ... */
+ } else if (...) {
+ /* ... */
+ } else {
+ /* ... */
+ }
+
+ do {
+ /* ... */
+ } while (cond);
+}
+```
+
+### Indentation
+
+Use *eight* spaces for indenting blocks. Ensure that your
+file contains only spaces and not tab characters. You can do this
+in Emacs by selecting the entire file (`C-x h`) and
+running `M-x untabify`.
+
+To make Emacs indent lines automatically by eight spaces, add this
+line to your `.emacs`:
+
+```
+(add-hook 'c-mode-hook (lambda () (c-set-style "linux")))
+```
+
+### Comments
+
+Write a comment before every function describing its purpose (one-line),
+its arguments, and its return value. Mention whether it is an internal
+function or an exported function.
+
+Write a comment before every structure describing its purpose, and
+write comments about each of its members.
+
+Follow the style shown below for comments, since such comments
+can then be automatically extracted by doxygen to generate
+documentation.
+
+*Good:*
+
+```
+/**
+* hash_name -hash function for filenames
+* @par: parent inode number
+* @name: basename of inode
+* @mod: number of buckets in the hashtable
+*
+* @return: success: bucket number
+* failure: -1
+*
+* Not for external use.
+*/
+```
+
+### Indicating critical sections
+
+To clearly show regions of code which execute with locks held, use
+the following format:
+
+```
+pthread_mutex_lock (&mutex);
+{
+ /* code */
+}
+pthread_mutex_unlock (&mutex);
+```
+
+### Always use braces
+
+Even around single statements.
+
+*Bad:*
+
+```
+if (condition) action ();
+
+if (condition)
+ action ();
+```
+
+*Good:*
+
+```
+if (condition) {
+ action ();
+}
+```
+
+### Avoid multi-line conditionals
+
+These can be hard to read and even harder to modify later. Predicate functions
+and helper variables are always better for maintainability.
+
+*Bad:*
+
+```
+if ((thing1 && other_complex_condition (thing1, lots, of, args))
+ || (!thing2 || even_more_complex_condition (thing2))
+ || all_sorts_of_stuff_with_thing3) {
+ return;
+}
+
+```
+
+*Better:*
+
+```
+thing1_ok = predicate1 (thing1, lots, of, args
+thing2_ok = predicate2 (thing2);
+thing3_ok = predicate3 (thing3);
+
+if (!thing1_ok || !thing2_ok || !thing3_ok) {
+ return;
+}
+```
+
+*Best:*
+
+```
+if (thing1 && other_complex_condition (thing1, lots, of, args)) {
+ return;
+}
+if (!thing2 || even_more_complex_condition (thing2)) {
+ /* Note potential for a different message here. */
+ return;
+}
+if (all_sorts_of_stuff_with_thing3) {
+ /* And here too. */
+ return;
+}
+```
+
+### Use 'const' liberally
+
+If a value isn't supposed/expected to change, there's no cost to adding a
+'const' keyword and it will help prevent violation of expectations.
+
+### Avoid global variables (including 'static' auto variables)
+Almost all state in Gluster is contextual and should be contained in the
+appropriate structure reflecting its scope (e.g. `call\_frame\_t`, `call\_stack\_t`,
+`xlator\_t`, `glusterfs\_ctx\_t`). With dynamic loading and graph switches in play,
+each global requires careful consideration of when it should be initialized or
+reinitialized, when it might _accidentally_ be reinitialized, when its value
+might become stale, and so on. A few global variables are needed to serve as
+'anchor points' for these structures, and more exceptions to the rule might be
+approved in the future, but new globals should not be added to the codebase
+without explicit approval.
+
+## A skeleton fop function
+
+This is the recommended template for any fop. In the beginning come the
+initializations. After that, the 'success' control flow should be linear. Any
+error conditions should cause a `goto` to a label at the end. By convention
+this is 'out' if there is only one such label, but a cascade of such labels is
+allowable to support multi-stage cleanup. At that point, the code should detect
+the error that has occurred and do appropriate cleanup.
+
+```
+int32_t
+sample_fop (call_frame_t *frame, xlator_t *this, ...)
+{
+ char * var1 = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ DIR * dir = NULL;
+ struct posix_fd * pfd = NULL;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+
+ /* other validations */
+
+ dir = opendir (...);
+
+ if (dir == NULL) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "opendir failed on %s (%s)", loc->path,
+ strerror (op_errno));
+ goto out;
+ }
+
+ /* another system call */
+ if (...) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ goto out;
+ }
+
+ /* ... */
+
+ out:
+ if (op_ret == -1) {
+
+ /* check for all the cleanup that needs to be
+ done */
+
+ if (dir) {
+ closedir (dir);
+ dir = NULL;
+ }
+
+ if (pfd) {
+ FREE (pfd->path);
+ FREE (pfd);
+ pfd = NULL;
+ }
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, fd);
+ return 0;
+}
+```
diff --git a/doc/developer-guide/commit-guidelines.md b/doc/developer-guide/commit-guidelines.md
new file mode 100644
index 00000000000..38bbe525cbd
--- /dev/null
+++ b/doc/developer-guide/commit-guidelines.md
@@ -0,0 +1,136 @@
+## Git Commit Good Practice
+
+The following document is based on experience doing code development, bug troubleshooting and code review across a number of projects using Git. The document is mostly borrowed from [Open Stack](https://wiki.openstack.org/wiki/GitCommitMessages), but made more meaningful in the context of GlusterFS project.
+
+This topic can be split into two areas of concern
+
+* The structured set/split of the code changes
+* The information provided in the commit message
+
+### Executive Summary
+The points and examples that will be raised in this document ought to clearly demonstrate the value in splitting up changes into a sequence of individual commits, and the importance in writing good commit messages to go along with them. If these guidelines were widely applied it would result in a significant improvement in the quality of the GlusterFS Git history. Both a carrot & stick will be required to effect changes. This document intends to be the carrot by alerting people to the benefits, while anyone doing Gerrit code review can act as the stick ;-P
+
+In other words, when reviewing a change in Gerrit:
+* Do not simply look at the correctness of the code.
+* Review the commit message itself and request improvements to its content.
+* Look out for commits which are mixing multiple logical changes and require the submitter to split them into separate commits.
+* Ensure whitespace changes are not mixed in with functional changes.
+* Ensure no-op code refactoring is done separately from functional changes.
+
+And so on.
+
+It might be mentioned that Gerrit's handling of patch series is not entirely perfect. Let that not become a valid reason to avoid creating patch series. The tools being used should be subservient to developers needs, and since they are open source they can be fixed / improved. Software source code is "read mostly, write occassionally" and thus the most important criteria is to improve the long term maintainability by the large pool of developers in the community, and not to sacrifice too much for the sake of the single author who may never touch the code again.
+
+And now the long detailed guidelines & examples of good & bad practice
+
+### Structural split of changes
+The cardinal rule for creating good commits is to ensure there is only one "logical change" per commit. There are many reasons why this is an important rule:
+
+* The smaller the amount of code being changed, the quicker & easier it is to review & identify potential flaws.
+* If a change is found to be flawed later, it may be necessary to revert the broken commit. This is much easier to do if there are not other unrelated code changes entangled with the original commit.
+* When troubleshooting problems using Git's bisect capability, small well defined changes will aid in isolating exactly where the code problem was introduced.
+* When browsing history using Git annotate/blame, small well defined changes also aid in isolating exactly where & why a piece of code came from.
+
+#### Things to avoid when creating commits
+With the above points in mind, there are some commonly encountered examples of bad things to avoid
+
+* Mixing whitespace changes with functional code changes.
+
+The whitespace changes will obscure the important functional changes, making it harder for a reviewer to correctly determine whether the change is correct. Solution: Create 2 commits, one with the whitespace changes, one with the functional changes. Typically the whitespace change would be done first, but that need not be a hard rule.
+
+* Mixing two unrelated functional changes.
+
+Again the reviewer will find it harder to identify flaws if two unrelated changes are mixed together. If it becomes necessary to later revert a broken commit, the two unrelated changes will need to be untangled, with further risk of bug creation.
+
+* Sending large new features in a single giant commit.
+
+It may well be the case that the code for a new feature is only useful when all of it is present. This does not, however, imply that the entire feature should be provided in a single commit. New features often entail refactoring existing code. It is highly desirable that any refactoring is done in commits which are separate from those implementing the new feature. This helps reviewers and test suites validate that the refactoring has no unintentional functional changes.
+
+Even the newly written code can often be split up into multiple pieces that can be independently reviewed. For example, changes which add new internal fops or library functions, can be in self-contained commits. Again this leads to easier code review. It also allows other developers to cherry-pick small parts of the work, if the entire new feature is not immediately ready for merge. This will encourage the author & reviewers to think about the generic library functions' design, and not simply pick a design that is easier for their currently chosen internal implementation.
+
+The basic rule to follow is
+
+If a code change can be split into a sequence of patches/commits, then it should be split. Less is not more. More is more.
+
+##### Examples of bad practice
+
+TODO: Pick glusterfs specific example.
+
+
+##### Examples of good practice
+
+
+### Information in commit messages
+As important as the content of the change, is the content of the commit message describing it. When writing a commit message there are some important things to remember
+
+* Do not assume the reviewer understands what the original problem was.
+
+When reading bug reports, after a number of back & forth comments, it is often as clear as mud, what the root cause problem is. The commit message should have a clear statement as to what the original problem is. The bug is merely interesting historical background on /how/ the problem was identified. It should be possible to review a proposed patch for correctness without needing to read the bug ticket.
+
+* Do not assume the reviewer has access to external web services/site.
+
+In 6 months time when someone is on a train/plane/coach/beach/pub troubleshooting a problem & browsing Git history, there is no guarantee they will have access to the online bug tracker, or online blueprint documents. The great step forward with distributed SCM is that you no longer need to be "online" to have access to all information about the code repository. The commit message should be totally self-contained, to maintain that benefit.
+
+* Do not assume the code is self-evident/self-documenting.
+
+What is self-evident to one person, might be clear as mud to another person. Always document what the original problem was and how it is being fixed, for any change except the most obvious typos, or whitespace only commits.
+
+* Describe why a change is being made.
+
+A common mistake is to just document how the code has been written, without describing /why/ the developer chose to do it that way. By all means describe the overall code structure, particularly for large changes, but more importantly describe the intent/motivation behind the changes.
+
+* Read the commit message to see if it hints at improved code structure.
+
+Often when describing a large commit message, it becomes obvious that a commit should have in fact been split into 2 or more parts. Don't be afraid to go back and rebase the change to split it up into separate commits.
+
+* Ensure sufficient information to decide whether to review.
+
+When Gerrit sends out email alerts for new patch submissions there is minimal information included, principally the commit message and the list of files changes. Given the high volume of patches, it is not reasonable to expect all reviewers to examine the patches in detail. The commit message must thus contain sufficient information to alert the potential reviewers to the fact that this is a patch they need to look at.
+
+* The first commit line is the most important.
+
+In Git commits the first line of the commit message has special significance. It is used as email subject line, git annotate messages, gitk viewer annotations, merge commit messages and many more places where space is at a premium. As well as summarizing the change itself, it should take care to detail what part of the code is affected. eg if it is 'afr', 'dht' or any translator. Or in some cases, it can be touching all these components, but the commit message can be 'coverity:', 'txn-framework:', 'new-fop: ', etc.
+
+* Describe any limitations of the current code.
+
+If the code being changed still has future scope for improvements, or any known limitations then mention these in the commit message. This demonstrates to the reviewer that the broader picture has been considered and what tradeoffs have been done in terms of short term goals vs. long term wishes.
+
+* Do not include patch set-specific comments.
+
+In other words, if you rebase your change please don't add "Patch set 2: rebased" to your commit message. That isn't going to be relevant once your change has merged. Please do make a note of that in Gerrit as a comment on your change, however. It helps reviewers know what changed between patch sets. This also applies to comments such as "Added unit tests", "Fixed localization problems", or any other such patch set to patch set changes that don't affect the overall intent of your commit.
+
+**The main rule to follow is:**
+
+The commit message must contain all the information required to fully understand & review the patch for correctness. Less is not more. More is more.
+
+
+#### Including external references
+
+The commit message is primarily targeted towards human interpretation, but there is always some metadata provided for machine use. In the case of GlusterFS this includes at least the 'Change-id', "bug"/"feature" ID references and "Signed-off-by" tag (generated by 'git commit -s').
+
+The 'Change-id' line is a unique hash describing the change, which is generated by a Git commit hook. This should not be changed when rebasing a commit following review feedback, since it is used by Gerrit, to track versions of a patch.
+
+The 'bug' line can reference a bug in a few ways. Gerrit creates a link to the bug when viewing the patch on review.gluster.org so that reviewers can quickly access the bug/issue on Bugzilla or Github.
+
+**Fixes: bz#1601166** -- use 'Fixes: bz#NNNNN' if the commit is intended to fully fix and close the bug being referenced.
+**Fixes: #411** -- use 'Fixes: #NNN' if the patch fixes the github issue completely.
+
+**Updates: bz#1193929** -- use 'Updates: bz#NNNN' if the commit is only a partial fix and more work is needed.
+**Updates: #175** -- use 'Updates: #NNNN' if the commit is only a partial fix and more work is needed for the feature completion.
+
+We encourage the use of `Co-Authored-By: name <name@example.com>` in commit messages to indicate people who worked on a particular patch. It's a convention for recognizing multiple authors, and our projects would encourage the stats tools to observe it when collecting statistics.
+
+### Summary of Git commit message structure
+
+* Provide a brief description of the change in the first line.
+* The first line should be limited to 50 characters and should not end with a period.
+
+* Insert a single blank line after the first line.
+
+* Provide a detailed description of the change in the following lines, breaking paragraphs where needed.
+
+* Subsequent lines should be wrapped at 72 characters.
+
+Put the 'Change-id', 'Fixes bz#NNNNN' and 'Signed-off-by: <>' lines at the very end.
+
+TODO: Add good examples
diff --git a/doc/developer-guide/daemon-management-framework.md b/doc/developer-guide/daemon-management-framework.md
new file mode 100644
index 00000000000..592192e665d
--- /dev/null
+++ b/doc/developer-guide/daemon-management-framework.md
@@ -0,0 +1,38 @@
+
+How to introduce new daemons using daemon management framework
+==============================================================
+Glusterd manages GlusterFS daemons providing services like NFS, Proactive
+self-heal, Quota, User servicable snapshots etc. Following are some of the
+aspects that come under daemon management.
+
+Data members & functions of different management objects
+
+- **Connection Management**
+ - unix domain sockets based channel for internal communication
+ - rpc connection for the communication
+ - frame timeout value for UDS
+ - Methods - notify
+ - init, connect, termination, disconnect APIs can be invoked using the
+ connection management object
+
+- **Process Management**
+ - Name of the process
+ - pidfile to detect if the daemon is running
+ - loggging directory, log file, volfile, volfileserver & volfileid
+ - init, stop APIs can be invoked using the process management object
+
+- **Service Management**
+ - connection object
+ - process object
+ - online status
+ - Methods - manager, start, stop which can be abstracted as a common methods
+ or specific to service requirements
+ - init API can be invoked using the service management object
+
+ The above structures defines the skeleton of the daemon management framework.
+ Introduction of new daemons in GlusterFS needs to inherit these properties. Any
+ requirement specific to a daemon needs to be implemented in its own service
+ (for eg : snapd defines its own type glusterd_snapdsvc_t using glusterd_svc_t
+ and snapd specific data). New daemons will need to have its own service specific
+ code written in glusterd-<feature>-svc.h{c} and need to reuse the existing
+ framework.
diff --git a/doc/developer-guide/datastructure-inode.md b/doc/developer-guide/datastructure-inode.md
new file mode 100644
index 00000000000..45d7a941e5f
--- /dev/null
+++ b/doc/developer-guide/datastructure-inode.md
@@ -0,0 +1,221 @@
+# Inode and dentry management in GlusterFS:
+
+## Background
+Filesystems internally refer to files and directories via inodes. Inodes
+are unique identifiers of the entities stored in a filesystem. Whenever an
+application has to operate on a file/directory (read/modify), the filesystem
+maps that file/directory to the right inode and start referring to that inode
+whenever an operation has to be performed on the file/directory.
+
+In GlusterFS a new inode gets created whenever a new file/directory is created
+OR when a successful lookup is done on a file/directory for the first time.
+Inodes in GlusterFS are maintained by the inode table which gets initiated when
+the filesystem daemon is started (both for the brick process as well as the
+mount process). Below are some important data structures for inode management.
+
+## Data-structure (inode-table)
+```
+struct _inode_table {
+ pthread_mutex_t lock;
+ size_t hashsize; /* bucket size of inode hash and dentry hash */
+ char *name; /* name of the inode table, just for gf_log() */
+ inode_t *root; /* root directory inode, with inode
+ number and gfid 1 */
+ xlator_t *xl; /* xlator to be called to do purge and
+ the xlator which maintains the inode table*/
+ uint32_t lru_limit; /* maximum LRU cache size */
+ struct list_head *inode_hash; /* buckets for inode hash table */
+ struct list_head *name_hash; /* buckets for dentry hash table */
+ struct list_head active; /* list of inodes currently active (in an fop) */
+ uint32_t active_size; /* count of inodes in active list */
+ struct list_head lru; /* list of inodes recently used.
+ lru.next most recent */
+ uint32_t lru_size; /* count of inodes in lru list */
+ struct list_head purge; /* list of inodes to be purged soon */
+ uint32_t purge_size; /* count of inodes in purge list */
+
+ struct mem_pool *inode_pool; /* memory pool for inodes */
+ struct mem_pool *dentry_pool; /* memory pool for dentrys */
+ struct mem_pool *fd_mem_pool; /* memory pool for fd_t */
+ int ctxcount; /* number of slots in inode->ctx */
+};
+```
+
+# Life-cycle
+```
+inode_table_new (size_t lru_limit, xlator_t *xl)
+```
+This is a function which allocates a new inode table. Usually the top xlators in
+the graph such as protocol/server (for bricks), fuse and nfs (for fuse and nfs
+mounts) and libgfapi do inode managements. Hence they are the ones which will
+allocate a new inode table by calling the above function.
+
+Each xlator graph in glusterfs maintains an inode table. So in fuse clients,
+whenever there is a graph change due to add brick/remove brick or
+addition/removal of some other xlators, a new graph is created which creates a
+new inode table.
+
+Thus an allocated inode table is destroyed only when the filesystem daemon is
+killed or unmounted.
+
+
+# what it contains.
+Inode table in glusterfs mainly contains a hash table for maintaining inodes.
+In general a file/directory is considered to be existing if there is a
+corresponding inode present in the inode table. If a inode for a file/directory
+cannot be found in the inode table, glusterfs tries to resolve it by sending a
+lookup on the entry for which the inode is needed. If lookup is successful, then
+a new inode correponding to the entry is added to the hash table present in the
+inode table. Thus an inode present in the hash-table means, its an existing
+file/directory within the filesystem. The inode table also contains the hash
+size of the hash table (as of now it is hard coded to 14057. The hash value of
+a inode is calculated using its gfid).
+
+Apart from the hash table, inode table also maintains 3 important list of inodes
+1. Active list:
+Active list contains all the active inodes (i.e inodes which are currently part
+of some fop).
+2. Lru list:
+Least recently used inodes list. A limit can be set for the size of the lru
+list. For bricks it is 16384 and for clients it is infinity.
+3. Purge list:
+List of all the inodes which have to be purged (i.e inodes which have to be
+deleted from the inode table due to unlink/rmdir/forget).
+
+And at last it also contains the mem-pool for allocating inodes, dentries so
+that frequent malloc/calloc and free of the data structures can be avoided.
+
+
+# Data structure (inode)
+```
+struct _inode {
+ inode_table_t *table; /* the table this inode belongs to */
+ uuid_t gfid; /* unique identifier of the inode */
+ gf_lock_t lock;
+ uint64_t nlookup;
+ uint32_t fd_count; /* Open fd count */
+ uint32_t ref; /* reference count on this inode */
+ ia_type_t ia_type; /* what kind of file */
+ struct list_head fd_list; /* list of open files on this inode */
+ struct list_head dentry_list; /* list of directory entries for this inode */
+ struct list_head hash; /* hash table pointers */
+ struct list_head list; /* active/lru/purge */
+
+ struct _inode_ctx *_ctx; /* place holder for keeping the
+ information about the inode by different xlators */
+};
+```
+As said above, inodes are internal way of identifying the files/directories. A
+inode uniquely represents a file/directory. A new inode is created whenever a
+create/mkdir/symlink/mknod operations are performed. Apart from that a new inode
+is created upon the successful fresh lookup of a file/directory. Say the
+filesystem contained some file "a" within root and the filesystem was
+unmounted. Now when glusterfs is mounted and some operation is perfomed on "/a",
+glusterfs tries to get the inode for the entry "a" with parent inode as
+root. But, since glusterfs just came up, it will not be able to find the inode
+for "a" and will send a lookup on "/a". If the lookup operation succeeds (i.e.
+the root of glusterfs contains an entry called "a"), then a new inode for "/a"
+is created and added to the inode table.
+
+Depending upon the situation, an inode can be in one of the 3 lists maintained
+by the inode table. If some fop is happening on the inode, then the inode will
+be present in the active inodes list maintained by the inode table. Active
+inodes are those inodes whose refcount is greater than zero. Whenever some
+operation comes on a file/directory, and the resolver tries to find the inode
+for it, it increments the refcount of the inode before returning the inode. The
+refcount of an inode can be incremented by calling the below function
+```
+inode_ref (inode_t *inode)
+```
+Any xlator which wants to operate on a inode as part of some fop (or wants the
+inode in the callback), should hold a ref on the inode.
+Once the fop is completed before sending the reply of the fop to the above
+layers , the inode has to be unrefed. When the refcount of an inode becomes
+zero, it is removed from the active inodes list and put into LRU list maintained
+by the inode table. Thus in short if some fop is happening on a file/directory,
+the corresponding inode will be in the active list or it will be in the LRU
+list.
+
+
+# Life Cycle
+
+A new inode is created whenever a new file/directory/symlink is created OR a
+successful lookup of an existing entry is done. The xlators which does inode
+management (as of now protocol/server, fuse, nfs, gfapi) will perform inode_link
+operation upon successful lookup or successful creation of a new entry.
+```
+inode_link (inode_t *inode, inode_t *parent, const char *name,
+ struct iatt *buf);
+```
+inode_link actually adds the inode to the inode table (to be precise it adds
+the inode to the hash table maintained by the inode table. The hash value is
+calculated based on the gfid). Copies the gfid to the inode (the gfid is
+present in the iatt structure). Creates a dentry with the new name.
+
+A inode is removed from the inode table and eventually destroyed when unlink
+or rmdir operation is performed on a file/directory, or the the lru limit of
+the inode table has been exceeded.
+
+# Data structure (dentry)
+```
+
+struct _dentry {
+ struct list_head inode_list; /* list of dentries of inode */
+ struct list_head hash; /* hash table pointers */
+ inode_t *inode; /* inode of this directory entry */
+ char *name; /* name of the directory entry */
+ inode_t *parent; /* directory of the entry */
+};
+```
+A dentry is the presence of an entry for a file/directory within its parent
+directory. A dentry usually points to the inode to which it belongs to. In
+glusterfs a dentry contains the following fields.
+1. a hook using which it can add itself to the list of
+the dentries maintained by the inode to which it points to.
+2. A hash table pointer.
+3. Pointer to the inode to which it belongs to.
+4. Name of the dentry
+5. Pointer to the inode of the parent directory in which the dentry is present
+
+A new dentry is created when a new file/directory/symlink is created or a hard
+link to an existing file is created.
+```
+__dentry_create (inode_t *inode, inode_t *parent, const char *name);
+```
+A dentry holds a refcount on the parent
+directory so that the parent inode is never removed from the active inode's list
+and put to the lru list (If the lru limit of the lru list is exceeded, there is
+a chance of parent inode being destroyed. To avoid it, the dentries hold a
+reference to the parent inode). A dentry is removed whenevern a unlink/rmdir
+is perfomed on a file/directory. Or when the lru limit has been exceeded, the
+oldest inodes are purged out of the inode table, during which all the dentries
+of the inode are removed.
+
+Whenever a unlink/rmdir comes on a file/directory, the corresponding inode
+should be removed from the inode table. So upon unlink/rmdir, the inode will
+be moved to the purge list maintained by the inode table and from there it is
+destroyed. To be more specific, if a inode has to be destroyed, its refcount
+and nlookup count both should become 0. For refcount to become 0, the inode
+should not be part of any fop (there should not be any open fds). Or if the
+inode belongs to a directory, then there should not be any fop happening on the
+directory and it should not contain any dentries within it. For nlookup count to
+become zero, a forget has to be sent on the inode with nlookup count set to 0 as
+an argument. For fuse clients, forget is sent by the kernel itself whenever a
+unlink/rmdir is performed. But for brick processes, upon unlink/rmdir, the
+protocol/server itself has to do inode_forget. Whenever the inode has to be
+deleted due to file removal or lru limit being exceeded the inode is retired
+(i.e. all the dentries of the inode are deleted and the inode is moved to the
+purge list maintained by the inode table), the nlookup count is set to 0 via
+inode_forget api. The inode table, then prunes all the inodes from the purge
+list by destroying the inode contexts maintained by each xlator.
+```
+unlinking of the dentry is done via inode_unlink;
+
+void
+inode_unlink (inode_t *inode, inode_t *parent, const char *name);
+```
+If the inode has multiple hard links, then the unlink operation performed by
+the application results just in the removal of the dentry with the name provided
+by the application. For the inode to be removed, all the dentries of the inode
+should be unlinked.
+
diff --git a/doc/developer-guide/datastructure-iobuf.md b/doc/developer-guide/datastructure-iobuf.md
new file mode 100644
index 00000000000..03604e3672c
--- /dev/null
+++ b/doc/developer-guide/datastructure-iobuf.md
@@ -0,0 +1,259 @@
+# Iobuf-pool
+## Datastructures
+### iobuf
+Short for IO Buffer. It is one allocatable unit for the consumers of the IOBUF
+API, each unit hosts @page_size(defined in arena structure) bytes of memory. As
+initial step of processing a fop, the IO buffer passed onto GlusterFS by the
+other applications (FUSE VFS/ Applications using gfapi) is copied into GlusterFS
+space i.e. iobufs. Hence Iobufs are mostly allocated/deallocated in Fuse, gfapi,
+protocol xlators, and also in performance xlators to cache the IO buffers etc.
+```
+struct iobuf {
+ union {
+ struct list_head list;
+ struct {
+ struct iobuf *next;
+ struct iobuf *prev;
+ };
+ };
+ struct iobuf_arena *iobuf_arena;
+
+ gf_lock_t lock; /* for ->ptr and ->ref */
+ int ref; /* 0 == passive, >0 == active */
+
+ void *ptr; /* usable memory region by the consumer */
+
+ void *free_ptr; /* in case of stdalloc, this is the
+ one to be freed not the *ptr */
+};
+```
+
+### iobref
+There may be need of multiple iobufs for a single fop, like in vectored read/write.
+Hence multiple iobufs(default 16) are encapsulated under one iobref.
+```
+struct iobref {
+ gf_lock_t lock;
+ int ref;
+ struct iobuf **iobrefs; /* list of iobufs */
+ int alloced; /* 16 by default, grows as required */
+ int used; /* number of iobufs added to this iobref */
+};
+```
+### iobuf_arenas
+One region of memory MMAPed from the operating system. Each region MMAPs
+@arena_size bytes of memory, and hosts @arena_size / @page_size IOBUFs.
+The same sized iobufs are grouped into one arena, for sanity of access.
+
+```
+struct iobuf_arena {
+ union {
+ struct list_head list;
+ struct {
+ struct iobuf_arena *next;
+ struct iobuf_arena *prev;
+ };
+ };
+
+ size_t page_size; /* size of all iobufs in this arena */
+ size_t arena_size; /* this is equal to
+ (iobuf_pool->arena_size / page_size)
+ * page_size */
+ size_t page_count;
+
+ struct iobuf_pool *iobuf_pool;
+
+ void *mem_base;
+ struct iobuf *iobufs; /* allocated iobufs list */
+
+ int active_cnt;
+ struct iobuf active; /* head node iobuf
+ (unused by itself) */
+ int passive_cnt;
+ struct iobuf passive; /* head node iobuf
+ (unused by itself) */
+ uint64_t alloc_cnt; /* total allocs in this pool */
+ int max_active; /* max active buffers at a given time */
+};
+
+```
+### iobuf_pool
+Pool of Iobufs. As there may be many Io buffers required by the filesystem,
+a pool of iobufs are preallocated and kept, if these preallocated ones are
+exhausted only then the standard malloc/free is called, thus improving the
+performance. Iobuf pool is generally one per process, allocated during
+glusterfs_ctx_t init (glusterfs_ctx_defaults_init), currently the preallocated
+iobuf pool memory is freed on process exit. Iobuf pool is globally accessible
+across GlusterFs, hence iobufs allocated by any xlator can be accessed by any
+other xlators(unless iobuf is not passed).
+```
+struct iobuf_pool {
+ pthread_mutex_t mutex;
+ size_t arena_size; /* size of memory region in
+ arena */
+ size_t default_page_size; /* default size of iobuf */
+
+ int arena_cnt;
+ struct list_head arenas[GF_VARIABLE_IOBUF_COUNT];
+ /* array of arenas. Each element of the array is a list of arenas
+ holding iobufs of particular page_size */
+
+ struct list_head filled[GF_VARIABLE_IOBUF_COUNT];
+ /* array of arenas without free iobufs */
+
+ struct list_head purge[GF_VARIABLE_IOBUF_COUNT];
+ /* array of of arenas which can be purged */
+
+ uint64_t request_misses; /* mostly the requests for higher
+ value of iobufs */
+};
+```
+~~~
+The default size of the iobuf_pool(as of yet):
+1024 iobufs of 128Bytes = 128KB
+512 iobufs of 512Bytes = 256KB
+512 iobufs of 2KB = 1MB
+128 iobufs of 8KB = 1MB
+64 iobufs of 32KB = 2MB
+32 iobufs of 128KB = 4MB
+8 iobufs of 256KB = 2MB
+2 iobufs of 1MB = 2MB
+Total ~13MB
+~~~
+As seen in the datastructure iobuf_pool has 3 arena lists.
+
+- arenas:
+The arenas allocated during iobuf_pool create, are part of this list. This list
+also contains arenas that are partially filled i.e. contain few active and few
+passive iobufs (passive_cnt !=0, active_cnt!=0 except for initially allocated
+arenas). There will be by default 8 arenas of the sizes mentioned above.
+- filled:
+If all the iobufs in the arena are filled(passive_cnt = 0), the arena is moved
+to the filled list. If any of the iobufs from the filled arena is iobuf_put,
+then the arena moves back to the 'arenas' list.
+- purge:
+If there are no active iobufs in the arena(active_cnt = 0), the arena is moved
+to purge list. iobuf_put() triggers destruction of the arenas in this list. The
+arenas in the purge list are destroyed only if there is atleast one arena in
+'arenas' list, that way there won't be spurious mmap/unmap of buffers.
+(e.g: If there is an arena (page_size=128KB, count=32) in purge list, this arena
+is destroyed(munmap) only if there is an arena in 'arenas' list with page_size=128KB).
+
+## APIs
+### iobuf_get
+
+```
+struct iobuf *iobuf_get (struct iobuf_pool *iobuf_pool);
+```
+Creates a new iobuf of the default page size(128KB hard coded as of yet).
+Also takes a reference(increments ref count), hence no need of doing it
+explicitly after getting iobuf.
+
+### iobuf_get2
+
+```
+struct iobuf * iobuf_get2 (struct iobuf_pool *iobuf_pool, size_t page_size);
+```
+Creates a new iobuf of a specified page size, if page_size=0 default page size
+is considered.
+```
+if (requested iobuf size > Max iobuf size in the pool(1MB as of yet))
+ {
+ Perform standard allocation(CALLOC) of the requested size and
+ add it to the list iobuf_pool->arenas[IOBUF_ARENA_MAX_INDEX].
+ }
+ else
+ {
+ -Round the page size to match the stndard sizes in iobuf pool.
+ (eg: if 3KB is requested, it is rounded to 8KB).
+ -Select the arena list corresponding to the rounded size
+ (eg: select 8KB arena)
+ If the selected arena has passive count > 0, then return the
+ iobuf from this arena, set the counters(passive/active/etc.)
+ appropriately.
+ else the arena is full, allocate new arena with rounded size
+ and standard page numbers and add to the arena list
+ (eg: 128 iobufs of 8KB is allocated).
+ }
+```
+Also takes a reference(increments ref count), hence no need of doing it
+explicitly after getting iobuf.
+
+### iobuf_ref
+
+```
+struct iobuf *iobuf_ref (struct iobuf *iobuf);
+```
+ Take a reference on the iobuf. If using an iobuf allocated by some other
+xlator/function/, its a good practice to take a reference so that iobuf is not
+deleted by the allocator.
+
+### iobuf_unref
+```
+void iobuf_unref (struct iobuf *iobuf);
+```
+Unreference the iobuf, if the ref count is zero iobuf is considered free.
+
+```
+ -Delete the iobuf, if allocated from standard alloc and return.
+ -set the active/passive count appropriately.
+ -if passive count > 0 then add the arena to 'arena' list.
+ -if active count = 0 then add the arena to 'purge' list.
+```
+Every iobuf_ref should have a corresponding iobuf_unref, and also every
+iobuf_get/2 should have a correspondning iobuf_unref.
+
+### iobref_new
+```
+struct iobref *iobref_new ();
+```
+Creates a new iobref structure and returns its pointer.
+
+### iobref_ref
+```
+struct iobref *iobref_ref (struct iobref *iobref);
+```
+Take a reference on the iobref.
+
+### iobref_unref
+```
+void iobref_unref (struct iobref *iobref);
+```
+Decrements the reference count of the iobref. If the ref count is 0, then unref
+all the iobufs(iobuf_unref) in the iobref, and destroy the iobref.
+
+### iobref_add
+```
+int iobref_add (struct iobref *iobref, struct iobuf *iobuf);
+```
+Adds the given iobuf into the iobref, it takes a ref on the iobuf before adding
+it, hence explicit iobuf_ref is not required if adding to the iobref.
+
+### iobref_merge
+```
+int iobref_merge (struct iobref *to, struct iobref *from);
+```
+Adds all the iobufs in the 'from' iobref to the 'to' iobref. Merge will not
+cause the delete of the 'from' iobref, therefore it will result in another ref
+on all the iobufs added to the 'to' iobref. Hence iobref_unref should be
+performed both on 'from' and 'to' iobrefs (performing iobref_unref only on 'to'
+will not free the iobufs and may result in leak).
+
+### iobref_clear
+```
+void iobref_clear (struct iobref *iobref);
+```
+Unreference all the iobufs in the iobref, and also unref the iobref.
+
+## Iobuf Leaks
+If all iobuf_refs/iobuf_new do not have correspondning iobuf_unref, then the
+iobufs are not freed and recurring execution of such code path may lead to huge
+memory leaks. The easiest way to identify if a memory leak is caused by iobufs
+is to take a statedump. If the statedump shows a lot of filled arenas then it is
+a sure sign of leak. Refer doc/debugging/statedump.md for more details.
+
+If iobufs are leaking, the next step is to find where the iobuf_unref went
+missing. There is no standard/easy way of debugging this, code reading and logs
+are the only ways. If there is a liberty to reproduce the memory leak at will,
+then logs(gf_callinginfo) in iobuf_ref/unref might help.
+TODO: A easier way to debug iobuf leaks.
diff --git a/doc/developer-guide/datastructure-mem-pool.md b/doc/developer-guide/datastructure-mem-pool.md
new file mode 100644
index 00000000000..225567cbf9f
--- /dev/null
+++ b/doc/developer-guide/datastructure-mem-pool.md
@@ -0,0 +1,124 @@
+# Mem-pool
+## Background
+There was a time when every fop in glusterfs used to incur cost of allocations/de-allocations for every stack wind/unwind between xlators because stack/frame/*_localt_t in every wind/unwind was allocated and de-allocated. Because of all these system calls in the fop path there was lot of latency and the worst part is that most of the times the number of frames/stacks active at any time wouldn't cross a threshold. So it was decided that this threshold number of frames/stacks would be allocated in the beginning of the process only once. Get one of them from the pool of stacks/frames whenever `STACK_WIND` is performed and put it back into the pool in `STACK_UNWIND`/`STACK_DESTROY` without incurring any extra system calls. The data structures are allocated only when threshold number of such items are in active use i.e. pool is in complete use.% increase in the performance once this was added to all the common data structures (inode/fd/dict etc) in xlators throughout the stack was tremendous.
+
+## Data structure
+```
+struct mem_pool {
+ struct list_head list; /*Each member in the mempool is element padded with a doubly-linked-list + ptr of mempool + is-in
+-use info. This list is used to add the element to the list of free members in the mem-pool*/
+ int hot_count;/*number of mempool elements that are in active use*/
+ int cold_count;/*number of mempool elements that are not in use. If a new allocation is required it
+will be served from here until all the elements in the pool are in use i.e. cold-count becomes 0.*/
+ gf_lock_t lock;/*synchronization mechanism*/
+ unsigned long padded_sizeof_type;/*Each mempool element is padded with a doubly-linked-list + ptr of mempool + is-in
+-use info to operate the pool of elements, this size is the element-size after padding*/
+ void *pool;/*Starting address of pool*/
+ void *pool_end;/*Ending address of pool*/
+/* If an element address is in the range between pool, pool_end addresses then it is alloced from the pool otherwise it is 'calloced' this is very useful for functions like 'mem_put'*/
+ int real_sizeof_type;/* size of just the element without any padding*/
+ uint64_t alloc_count; /*Number of times this type of data is allocated through out the life of this process. This may include calloced elements as well*/
+ uint64_t pool_misses; /*Number of times the element had to be allocated from heap because all elements from the pool are in active use.*/
+ int max_alloc; /*Maximum number of elements from the pool in active use at any point in the life of the process. This does *not* include calloced elements*/
+ int curr_stdalloc;/*Number of elements that are allocated from heap at the moment because the pool is in completed use. It should be '0' when pool is not in complete use*/
+ int max_stdalloc;/*Maximum number of allocations from heap after the pool is completely used that are in active use at any point in the life of the process.*/
+ char *name; /*Contains xlator-name:data-type as a string
+ struct list_head global_list;/*This is used to insert it into the global_list of mempools maintained in 'glusterfs-ctx'
+};
+```
+
+## Life-cycle
+```
+mem_pool_new (data_type, unsigned long count)
+
+This is a macro which expands to mem_pool_new_fn (sizeof (data_type), count, string-rep-of-data_type)
+
+struct mem_pool *
+mem_pool_new_fn (unsigned long sizeof_type, unsigned long count, char *name)
+
+Padded-element:
+ ----------------------------------------
+|list-ptr|mem-pool-address|in-use|Element|
+ ----------------------------------------
+ ```
+
+This function allocates the `mem-pool` structure and sets up the pool for use.
+`name` parameter above is the `string` containing type of the datatype. This `name` is appended to `xlator-name + ':'` so that it can be easily identified in things like statedump. `count` is the number of elements that need to be allocated. `sizeof_type` is the size of each element. Ideally `('sizeof_type'*'count')` should be the size of the total pool. But to manage the pool using `mem_get`/`mem_put` (will be explained after this section) each element needs to be padded in the front with a `('list', 'mem-pool-address', 'in_use')`. So the actual size of the pool it allocates will be `('padded_sizeof_type'*'count')`. Why these extra elements are needed will be evident after understanding how `mem_get` and `mem_put` are implemented. In this function it just initializes all the `list` structures in front of each element and adds them to the `mem_pool->list` which represent the list of `cold` elements which can be allocated whenever `mem_get` is called on this mem_pool. It remembers mem_pool's start and end addresses in `mem_pool->pool`, `mem_pool->pool_end` respectively. Initializes `mem_pool->cold_count` to `count` and `mem_pool->hot_count` to `0`. This mem-pool will be added to the list of `global_list` maintained in `glusterfs-ctx`
+
+
+```
+void* mem_get (struct mem_pool *mem_pool)
+
+Initial-list before mem-get
+----------------
+| Pool |
+| ----------- | ---------------------------------------- ----------------------------------------
+| | pool-list | |<---> |list-ptr|mem-pool-address|in-use|Element|<--->|list-ptr|mem-pool-address|in-use|Element|
+| ----------- | ---------------------------------------- ----------------------------------------
+----------------
+
+list after mem-get from the pool
+----------------
+| Pool |
+| ----------- | ----------------------------------------
+| | pool-list | |<--->|list-ptr|mem-pool-address|in-use|Element|
+| ----------- | ----------------------------------------
+----------------
+
+List when the pool is full:
+ ----------------
+| Pool | extra element that is allocated
+| ----------- | ----------------------------------------
+| | pool-list | | |list-ptr|mem-pool-address|in-use|Element|
+| ----------- | ----------------------------------------
+ ----------------
+```
+
+This function is similar to `malloc()` but it gives memory of type `element` of this pool. When this function is called it increments `mem_pool->alloc_count`, checks if there are any free elements in the pool that can be returned by inspecting `mem_pool->cold_count`. If `mem_pool->cold_count` is non-zero then it means there are elements in the pool which are not in active use. It deletes one element from the list of free elements and decrements `mem_pool->cold_count` and increments `mem_pool->hot_count` to indicate there is one more element in active use. Updates `mem_pool->max_alloc` accordingly. Sets `element->in_use` in the padded memory to `1`. Sets `element->mem_pool` address to this mem_pool also in the padded memory(It is useful for mem_put). Returns the address of the memory after the padded boundary to the caller of this function. In the cases where all the elements in the pool are in active use it `callocs` the element with padded size and sets mem_pool address in the padded memory. To indicate the pool-miss and give useful accounting information of the pool-usage it increments `mem_pool->pool_misses`, `mem_pool->curr_stdalloc`. Updates `mem_pool->max_stdalloc` accordingly.
+
+```
+void* mem_get0 (struct mem_pool *mem_pool)
+```
+Just like `calloc` is to `malloc`, `mem_get0` is to `mem_get`. It memsets the memory to all '0' before returning the element.
+
+
+```
+void mem_put (void *ptr)
+
+list before mem-put from the pool
+ ----------------
+| Pool |
+| ----------- | ----------------------------------------
+| | pool-list | |<--->|list-ptr|mem-pool-address|in-use|Element|
+| ----------- | ----------------------------------------
+ ----------------
+
+list after mem-put to the pool
+ ----------------
+| Pool |
+| ----------- | ---------------------------------------- ----------------------------------------
+| | pool-list | |<---> |list-ptr|mem-pool-address|in-use|Element|<--->|list-ptr|mem-pool-address|in-use|Element|
+| ----------- | ---------------------------------------- ----------------------------------------
+ ----------------
+
+If mem_put is putting an element not from pool then it is just freed so
+no change to the pool
+ ----------------
+| Pool |
+| ----------- |
+| | pool-list | |
+| ----------- |
+ ----------------
+```
+
+This function is similar to `free()`. Remember that ptr passed to this function is the address of the element, so this function gets the ptr to its head of the padding in front of it. If this memory falls in bettween `mem_pool->pool`, `mem_pool->pool_end` then the memory is part of the 'pool' memory that is allocated so it does some sanity checks to see if the memory is indeed head of the element by checking if `in_use` is set to `1`. It resets `in_use` to `0`. It gets the mem_pool address stored in the padded region and adds this element to the list of free elements. Decreases `mem_pool->hot_count` increases `mem_pool->cold_count`. In the case where padded-element address does not fall in the range of `mem_pool->pool`, `mem_pool->pool_end` it just frees the element and decreases `mem_pool->curr_stdalloc`.
+
+```
+void
+mem_pool_destroy (struct mem_pool *pool)
+```
+Deletes this pool from the `global_list` maintained by `glusterfs-ctx` and frees all the memory allocated in `mem_pool_new`.
+
+
+### How to pick pool-size
+This varies from work-load to work-load. Create the mem-pool with some random size and run the work-load. Take the statedump after the work-load is complete. In the statedump if `max_alloc` is always less than `cold_count` may be reduce the size of the pool closer to `max_alloc`. On the otherhand if there are lots of `pool-misses` then increase the `pool_size` by `max_stdalloc` to achieve better 'hit-rate' of the pool.
diff --git a/doc/developer-guide/dirops-transactions-in-dht.md b/doc/developer-guide/dirops-transactions-in-dht.md
new file mode 100644
index 00000000000..909a97001aa
--- /dev/null
+++ b/doc/developer-guide/dirops-transactions-in-dht.md
@@ -0,0 +1,273 @@
+# dirops transactions in dht
+Need for transactions during operations on directories arise from two
+basic design elements of DHT:
+
+ 1. A directory is created on all subvolumes of dht. Since glusterfs
+ associates each file-system object with an unique gfid, every
+ subvolume should have the same unique mapping of (path of directory,
+ gfid). To elaborate,
+ * Each subvolume should've same gfid associated with a path to
+ directory.
+ * A gfid should not be associated with more than one path in any
+ subvolume.
+
+ So, entire operations like mkdir, renamedir, rmdir and creation of
+ directories during self-heal need to be atomic in dht. In other words,
+ any of these operations shouldn't begin on an inode if one of them is
+ already in progress on the same inode, till it completes on all
+ subvolumes of dht. If not, more than one of these operations
+ happening in parallel can break any or all of the two requirements
+ listed above. This is referred in the rest of the document by the
+ name _Atomicity during namespace operations_.
+
+ 2. Each directory has an independent layout persisted on
+ subvolumes. Each subvolume contains only part of the layout relevant
+ to it. For performance reasons _and_ since _only_ dht has aggregated
+ view, this layout is cached in memory of client. To make sure dht
+ reads or modifies a single complete layout while parallel modifications of the layout are in progress, we need atomicity during layout modification and reading. This is referred in the rest of the document as _Atomicity during layout modification and reading_.
+
+Rest of the document explains how atomicity is achieved for each of
+the case above.
+
+**Atomicity during layout modification and reading**
+File operations a.k.a fops can be classified into two categories based on how they consume layout.
+
+ - Layout writer. Setting of layout during selfheal of a directory is
+ layout writer of _that_ directory.
+ - Layout reader.
+ * Any entry fop like create, unlink, rename, link, symlink,
+ unlink, mknod, rename, mkdir, rmdir, renamedir which needs layout of the parent directory. Each of these fops are readers of layout on parent directory.
+ * setting of layout during mkdir of a directory is considered as
+ a reader of the same directory's layout. The reason for this is that
+ only a parallel lookup on that directory can be a competing fop that modifies the layout (Other fops need gfid of the directory which can be got only after either lookup or mkdir completes). However, healing of layout is considered as a writer and a single writer blocks all readers.
+
+*Algorithm*
+Atomicity is achieved by locking on the inode of directory whose
+layout is being modified or read. The fop used is inodelk.
+ - Writer acquires blocking inodelk (directory-inode, write-lock) on
+ all subvolumes serially. The order of subvols in which they are
+ locked by different clients remains constant for a directory. If locking fails on any subvolume, layout modification is abandoned.
+ - Reader acquires an inodelk (directory-inode, read-lock) on _any_
+ one subvolume. If locking fails on a subvolume (say with
+ ESTALE/ENOTCONN error), locking can be tried on other subvolumes till
+ we get one lock. If we cannot get lock on at least one subvolume,
+ consistency of layout is not guaranteed. Based on the consistency
+ requirements of fops, they can be failed or continued.
+
+Reasons why writer has to lock on _all_ subvols:
+
+ - DHT don't have a metadata server and locking is implemented by brick. So, there is no well-defined subvol/brick that can be used as an arbitrator by different clients while acquiring locks.
+ - readers should acquire as minimum number of locks as possible. In
+ other words, the algorithm aims to have less synchronization cost to
+ readers.
+ - The subvolume to which a directory hashes could be used as a
+ lock server. However, in the case of an entry fop like create
+ (/a/b/c) where we want to block modification of layout of b for the
+ duration of create, we would be required to acquire lock on the
+ subvol to which /a/b hashes. To find out the hashed-subvol of
+ /a/b, we would need layout of /a. Note that how there is a dependency
+ of locking the layouts of ancestors all the way to root. So this
+ locking is not preferred. Also, note that only the immediate parent
+ inode is available in arguments of a fop like create.
+
+**Atomicity during namespace operations**
+
+ - We use locks on inode of parent directory in the namespace of
+ _"basename"_ during mkdir, rmdir, renamedir and directory
+ creation phase of self-heal. The exact fop we use is _entrylk
+ (parent-inode, "basename")_.
+ - refresh in-memory layout of parent-inode from values stored on backend
+ - _entrylk (parent-inode, "basename")_ is done on subvolume to which
+ _"basename" hashes_. So, this operation is a _reader_ of the
+ layout on _parent-inode_. Which means an _inodelk (parent-inode,
+ read-lock)_ has to be completed before _entrylk (parent-inode,
+ "basename")_ is issued. Both the locks have to be held till the
+ operation is tried on all subvolumes. If acquiring of any/all of
+ these locks fail, the operation should be failed.
+
+With the above details, algorithms for mkdir, rmdir, renamedir,
+self-heal of directory are explicitly detailed below.
+
+**Self-heal of a directory**
+
+ - creation of directories on subvolumes is done only during
+ _named-lookup_ of a directory as we need < parent-inode,
+ "basename" >.
+ - If a directory is missing on one or more subvolumes,
+ * acquire _inodelk (parent-inode, read-lock)_ on _any one_ of the
+ subvolumes.
+ * refresh the in-memory layout of parent-inode from values stored on backend
+ * acquire _entrylk (parent-inode, "basename")_ on the subvolume
+ to which _"basename"_ hashes.
+ * If any/all of the locks fail, self-heal is aborted.
+ * create directories on missing subvolumes.
+ * release _entrylk (parent-inode, "basename")_.
+ * release _inodelk (parent-inode, read-lock)_.
+
+ - If layout of a directory needs healing
+ * acquire _inodelk (directory-inode, write-lock)_ on _all_ the
+ subvolumes. If locking fails on any of the subvolumes,
+ self-heal is aborted. Blocking Locks are acquired serially across subvolumes in a _well-defined_ order which is _constant_ across all the healers of a directory. One order could be the order in which subvolumes are stored in the array _children_ of dht xlator.
+ * heal the layout.
+ * release _inodelk (directory-inode, write-lock)_ on _all_ the
+ subvolumes in parallel.
+ * Note that healing of layout can be done in both _named_ and
+ _nameless_ lookups of a directory as _only directory-inode_ is needed
+ for healing and it is available during both.
+
+**mkdir (parent-inode, "basename")**
+
+* while creating directory across subvolumes,
+
+ - acquire _inodelk (parent-inode, read-lock)_ on _any one_ of the
+ subvolumes.
+ - refresh in-memory layout of parent-inode from values stored on backend
+ - acquire _entrylk (parent-inode, "basename")_ on the subvolume to
+ which _"basename"_ hashes.
+ - If any/all of the above two locks fail, release the locks that
+ were acquired successfully and mkdir should be failed (as perceived by application).
+ - do _mkdir (parent-inode, "basename")_ on the subvolume to which
+ _"basename"_ hashes. If this mkdir fails, mkdir is failed.
+ - do _mkdir (parent-inode, "basename")_ on the remaining subvolumes.
+ - release _entrylk (parent-inode, "basename")_.
+ - release _inodelk (parent-inode, "read-lock")_.
+* while setting the layout of a directory,
+ - acquire _inodelk (directory-inode, read-lock)_ on _any one_ of the
+ subvolumes.
+ - If locking fails, cleanup the locks that were acquired
+ successfully and abort layout setting. Note that we'll have a
+ directory without a layout till a lookup happens on the
+ directory. This means entry operations within this directory fail
+ in this time window. We can also consider failing mkdir. The
+ problem of dealing with a directory without layout is out of the
+ scope of this document.
+ - set the layout on _directory-inode_.
+ - release _inodelk (directory-inode, read-lock)_.
+* Note that during layout setting we consider mkdir as a _reader_ not
+ _writer_, though it is setting the layout. Reasons are:
+ - Before any of other readers like create, link etc that operate on
+ this directory to happen, _gfid_ of this directory has to be
+ resolved. But _gfid_ is only available only if either of following
+ conditions are true:
+ * after mkdir is complete.
+ * a lookup on the same path happens parallel to in-progress
+ mkdir.
+
+ But, on completion of any of the above two operations, layout
+ will be healed. So, none of the _readers_ will happen on a
+ directory with partial layout.
+
+* Note that since we've an _entrylk (parent-inode, "basename")_ for
+ the entire duration of (attempting) creating directories, parallel
+ mkdirs will no longer contend on _mkdir_ on subvolume _to which "basename" hashes_. But instead, contend on _entrylk (parent-inode, "basename")_ on the subvolume _to which "basename" hashes_. So, we can attempt the _mkdir_ in _parallel_ on all subvolumes instead of two stage mkdir on hashed first and the rest of them in parallel later. However, we need to make sure that mkdir is successful on the subvolume _to which "basename" hashes_ for mkdir to be successful (as perceived by application). In the case of failed mkdir (as perceived by application), a cleanup should be performed on all the subvolumes before _entrylk (parent-inode, "basename")_ is released.
+
+**rmdir (parent-inode, "basename", directory-inode)**
+
+ - acquire _inodelk (parent-inode, read-lock)_ on _any one_
+ subvolume.
+ - refresh in-memory layout of parent-inode from values stored on backend
+ - acquire _entrylk (parent-inode, "basename")_ on the subvolume to
+ which _"basename" hashes_.
+ - If any/all of the above locks fail, rmdir is failed after cleanup
+ of the locks that were acquired successfully.
+ - do _rmdir (parent-inode, "basename")_ on the subvolumes to which
+ _"basename" doesn't hash to_.
+ * If successful, continue.
+ * Else,
+ * recreate directories on those subvolumes where rmdir
+ succeeded.
+ * heal the layout of _directory-inode_. Note that this will have
+ same synchronization requirements as discussed during layout
+ healing part of the section "Directoy self-heal" above.
+ * release _entrylk (parent-inode, "basename")_.
+ * release _inodelk (parent-inode, read-lock)_.
+ * fail _rmdir (parent-inode, "basename")_ to application.
+ - do _rmdir (parent-inode, "basename")_ on the subvolume to which
+ _"basename" hashes_.
+ - If successful, continue.
+ - Else, Go to the failure part of _rmdir (parent-inode, "basename")_
+ on subvolumes to which "basename" _doesn't hash to_.
+ - release _entrylk (parent-inode, "basename")_.
+ - release _inodelk (parent-inode, read-lock)_.
+ - return success to application.
+
+**renamedir (src-parent-inode, "src-basename", src-directory-inode, dst-parent-inode, "dst-basename", dst-directory-inode)**
+
+ - requirement is to prevent any operation in both _src-namespace_
+ and _dst-namespace_. So, we need to acquire locks on both
+ namespaces.We also need to have constant ordering while acquiring
+ locks during parallel renames of the form _rename (src, dst)_ and
+ _rename (dst, src)_ to prevent deadlocks. We can sort gfids of
+ _src-parent-inode_ and _dst-parent-inode_ and use that order to
+ acquire locks. For the sake of explanation lets say we ended up
+ with order of _src_ followed by _dst_.
+ - acquire _inodelk (src-parent-inode, read-lock)_.
+ - refresh in-memory layout of src-parent-inode from values stored on backend
+ - acquire _entrylk (src-parent-inode, "src-basename")_.
+ - acquire _inodelk (dst-parent-inode, read-lock)_.
+ - refresh in-memory layout of dst-parent-inode from values stored on backend
+ - acquire _entrylk (dst-parent-inode, "dst-basename")_.
+ - If acquiring any/all of the locks above fail,
+ * release the locks that were successfully acquired.
+ * fail the renamedir operation to application
+ * done
+ - do _renamedir ("src", "dst")_ on the subvolume _to which "dst-basename" hashes_.
+ * If failure, Goto point _If acquiring any/all of the locks above fail_.
+ * else, continue.
+ - do _renamedir ("src", "dst")_ on rest of the subvolumes.
+ * If there is any failure,
+ * revert the successful renames.
+ * Goto to point _If acquiring any/all of the locks above fail_.
+ * else,
+ - release all the locks acquired.
+ - return renamedir as success to application.
+
+**Some examples of races**
+This section gives concrete examples of races that can result in inconsistencies explained in the beginning of the document.
+
+Some assumptions are:
+
+* We consider an example distribute of three subvols s1, s2 and s3.
+* For examples of renamedir ("src", "dst"), _src_ hashes to s1 and _dst_ hashes to s2. _src_ and _dst_ are associated with _gfid-src_ and _gfid-dst_ respectively
+* For non renamedir examples, _dir_ is the name of directory and it hashes to s1.
+
+And the examples are:
+
+ - mkdir vs rmdir - inconsistency in namespace.
+ * mkdir ("dir", gfid1) is complete on s1
+ * rmdir is issued on same directory. Note that, since rmdir needs a gfid, a lookup should be complete before rmdir. lookup creates the directory on rest of the subvols as part of self-heal.
+ * rmdir (gfid1) deletes directory from all subvols.
+ * A new mkdir ("dir", gfid2) is issued. It is successful on s1 associating "dir" with gfid2.
+ * mkdir ("dir", gfid1) resumes and creates directory on s2 and s3 associating "dir" with gfid1.
+ * mkdir ("dir", gfid2) fails with EEXIST on s2 and s3. Since, EEXIST errors are ignored, mkdir is considered successful to application.
+ * In this example we have multiple inconsitencies
+ * "dir" is associated with gfid1 on s2, s3 and with gfid2 on s1
+ * Even if mkdir ("dir", gfid2) was not issued, we would've a case of a directory magically reappearing after a successful rmdir.
+ - lookup heal vs rmdir
+ * rmdir ("dir", gfid1) is issued. It is successful on s2 and s3 (non-hashed subvols for name "dir")
+ * lookup ("dir") is issued. Since directory is present on s1 yet, it is created on s2 and s3 associating with gfid1 as part of self-heal
+ * rmdir ("dir", gfid1) is complete on s1 and it is successful
+ * Another lookup ("dir") creates the directory on s1 too
+ * "dir" magically reappears after a successful rmdir
+ - lookup heal (src) vs renamedir ("src", "dst")
+ * renamedir ("src", "dst") complete on s2
+ * lookup ("src") recreates _src_ with _gfid-src_ on s2
+ * renamedir ("src", "dst") completes on s1, s3. After rename is complete path _dst_ will be associated with gfid _gfid-src_
+ * Another lookup ("src") recreates _src_ on subvols s1 and s3, associating it with gfid _gfid-src_
+ * Inconsistencies are
+ * after a successful renamedir ("src", "dst"), both src and dst exist
+ * Two directories - src and dst - are associated with same gfid. One common symptom is that some entries (of the earlier _src_ and current _dst_ directory) being missed out in readdir listing as the gfid handle might be pointing to the empty healed directory than the actual directory containing entries
+ - lookup heal (dst) vs renamedir ("src", "dst")
+ * dst exists and empty when renamdir started
+ * dst doesn't exist when renamedir started
+ - renamedir ("src", "dst") complete on s2 and s3
+ - lookup ("dst") creates _dst_ associating it with _gfid-src_ on s1
+ - An entry is created in _dst_ on either s1
+ - renamedir ("src", "dst") on s1 will result in a directory _dst/dst_ as _dst_ is no longer empty and _man 2 rename_ states that if _dst_ is not empty, _src_ is renamed _as a subdirectory of dst_
+ - A lookup ( _dst/dst_) creates _dst/dst_ on s2 and s3 associating with _gfid-src_ as part of self-heal
+ - Inconsistencies are:
+ * Two directories - _dst_ and _dst/dst_ - exist even though both of them didn't exist at the beginning of renamedir
+ * Both _dst_ and _dst/dst_ have same gfid - _gfid-src_. As observed earlier, symptom might be directory listing being incomplete
+ - mkdir (dst) vs renamedir ("src", "dst")
+ - rmdir (src) vs renamedir ("src", "dst")
+ - rmdir (dst) vs renamedir ("src", "dst")
diff --git a/doc/developer-guide/ec-implementation.md b/doc/developer-guide/ec-implementation.md
new file mode 100644
index 00000000000..77e62583caa
--- /dev/null
+++ b/doc/developer-guide/ec-implementation.md
@@ -0,0 +1,588 @@
+Erasure coding implementation
+=============================
+
+This document provides information about how [erasure code][1] has
+been implemented into ec translator. It describes the algorithm used
+and the optimizations made, but it doesn't contain a full description
+of the mathematical background needed to understand erasure coding in
+general. It either describes the other parts of ec not directly
+related to the encoding/decoding procedure, like synchronization or
+fop management.
+
+
+Introduction
+------------
+
+EC is based on [Reed-Solomon][2] erasure code. It's a very old code.
+It's not considered the best one nowadays, but is good enough and it's
+one of the few codes that is not covered by any patent and can be
+freely used.
+
+To define the Reed-Solomon code we use 3 parameters:
+
+ * __Key fragments (K)__
+ It represents the minimum number of healthy fragments that will be
+ needed to be able to recover the original data. Any subset of K
+ out of the total number of fragments will serve.
+
+ * __Redundancy fragments (R)__
+ It represents the number of extra fragments to compute for each
+ original data block. This value determines how many fragments can
+ be lost before being unable to recover the original data.
+
+ * __Fragment size (S)__
+ This determines the size of each fragment. The original data
+ block size is computed as S * K. Currently this values is fixed
+ to 512 bytes.
+
+ * __Total number of fragments (N = K + R)__
+ This isn't a real parameter but it will be useful to simplify
+ the following descriptions.
+
+From the point of view of the implementation, it only consists on
+matrix multiplications. There are two kinds of matrices to use for
+Reed-Solomon:
+
+ * __[Systematic][3]__
+ This kind of matrix has the particularity that K of the encoded
+ fragments are simply a copy of the original data, divided into K
+ pieces. Thus no real encoding needs to be done for them and only
+ the R redundancy fragments need to be computed.
+
+ This kind of matrices contain one KxK submatrix that is the
+ [identity matrix][4].
+
+ * __Non-systematic__
+ This kind of matrix doesn't contain an identity submatrix. This
+ means that all of the N fragments need to be encoded, requiring
+ more computation. On the other hand, these matrices have some nice
+ properties that allow faster implementations of some algorithms,
+ like the matrix inversion used to decode the data.
+
+ Another advantage of non-systematic matrices is that the decoding
+ time is constant, independently of how many fragments are lost,
+ while systematic approach can suffer from performance degradation
+ when one fragment is lost.
+
+All non-systematic matrices can be converted to systematic ones, but
+then we lose the good properties of the non-systematic. We have to
+choose betwee best peek performance (systematic) and performance
+stability (non-systematic).
+
+
+Encoding procedure
+------------------
+
+To encode a block of data we need a KxN matrix where each subset of K
+rows is [linearly independent][5]. In other words, the determinant of
+each KxK submatrix is not 0.
+
+There are some known ways to obtain this kind of matrices. EC uses a
+small variation of a matrix known as [Vandermonde Matrix][6] where
+each element of the matrix is defined as:
+
+ a(i, j) = i ^ (K - j)
+
+ where i is the row from 1 to N, and j is the column from 1 to K.
+
+This is exactly the Vandermonde Matrix but with the elements of each
+row in reverse order. This change is made to be able to implement a
+small optimization in the matrix multiplication.
+
+Once we have the matrix, we only need to compute the multiplication
+of this matrix by a vector composed of K elements of data coming from
+the original data block.
+
+ / \ / \
+ | 1 1 1 1 1 | / \ | a + b + c + d + e = t |
+ | 16 8 4 2 1 | | a | | 16a + 8b + 4c + 2d + e = u |
+ | 81 27 9 3 1 | | b | = | 81a + 27b + 9c + 3d + e = v |
+ | 256 64 16 4 1 | * | c | | 256a + 64b + 16c + 4d + e = w |
+ | 625 125 25 5 1 | | d | | 625a + 125b + 25c + 5d + e = x |
+ | 1296 216 36 6 1 | | e | | 1296a + 216b + 36c + 6d + e = y |
+ | 2401 343 49 7 1 | \ / | 2401a + 343b + 49c + 7d + e = z |
+ \ / \ /
+
+The optimization that can be done here is this:
+
+ 16a + 8b + 4c + 2d + e = 2(2(2(2a + b) + c) + d) + e
+
+So all the multiplications are always by the number of the row (2 in
+this case) and we don't need temporal storage for intermediate
+results:
+
+ a *= 2
+ a += b
+ a *= 2
+ a += c
+ a *= 2
+ a += d
+ a *= 2
+ a += e
+
+Once we have the result vector, each element is a fragment that needs
+to be stored in a separate place.
+
+
+Decoding procedure
+------------------
+
+To recover the data we need exactly K of the fragments. We need to
+know which K fragments we have (i.e. the original row number from
+which each fragment was calculated). Once we have this data we build
+a square KxK matrix composed by the rows corresponding to the given
+fragments and invert it.
+
+With the inverted matrix, we can recover the original data by
+multiplying it with the vector composed by the K fragments.
+
+In our previous example, if we consider that we have recovered
+fragments t, u, v, x and z, corresponding to rows 1, 2, 3, 5 and 7,
+we can build the following matrix:
+
+ / \
+ | 1 1 1 1 1 |
+ | 16 8 4 2 1 |
+ | 81 27 9 3 1 |
+ | 625 125 25 5 1 |
+ | 2401 343 49 7 1 |
+ \ /
+
+And invert it:
+
+ / \
+ | 1/48 -1/15 1/16 -1/48 1/240 |
+ | -17/48 16/15 -15/16 13/48 -11/240 |
+ | 101/48 -86/15 73/16 -53/48 41/240 |
+ | -247/48 176/15 -129/16 83/48 -61/240 |
+ | 35/8 -7 35/8 -7/8 1/8 |
+ \ /
+
+Multiplying it by the vector (t, u, v, x, z) we recover the original
+data (a, b, c, d, e):
+
+ / \ / \ / \
+ | 1/48 -1/15 1/16 -1/48 1/240 | | t | | a |
+ | -17/48 16/15 -15/16 13/48 -11/240 | | u | | b |
+ | 101/48 -86/15 73/16 -53/48 41/240 | * | v | = | c |
+ | -247/48 176/15 -129/16 83/48 -61/240 | | x | | d |
+ | 35/8 -7 35/8 -7/8 1/8 | | z | | e |
+ \ / \ / \ /
+
+
+Galois Field
+------------
+
+This encoding/decoding procedure is quite complex to compute using
+regular mathematical operations and it's not well suited for what
+we want to do (note that matrix elements can grow unboundly).
+
+To solve this problem, exactly the same procedure is done inside a
+[Galois Field][7] of characteristic 2, which is a finite field with
+some interesting properties that make it specially useful for fast
+operations using computers.
+
+There are two main differences when we use this specific Galois Field:
+
+ * __All regular additions are replaced by bitwise xor's__
+ For todays computers it's not really faster to execute an xor
+ compared to an addition, however replacing additions by xor's
+ inside a multiplication has many advantages (we will make use of
+ this to optimize the multiplication).
+
+ Another consequence of this change is that additions and
+ substractions are really the same xor operation.
+
+ * __The elements of the matrix are bounded__
+ The field uses a modulus that keep all possible elements inside
+ a delimited region, avoiding really big numbers and fixing the
+ number of bits needed to represent each value.
+
+ In the current implementation EC uses 8 bits per field element.
+
+It's very important to understand how multiplications are computed
+inside a Galois Field to be able to understand how has it been
+optimized.
+
+We'll start with a simple 'old school' multiplication but in base 2.
+For example, if we want to multiply 7 * 5 (111b * 101b in binary), we
+do the following:
+
+ 1 1 1 (= 7)
+ * 1 0 1 (= 5)
+ -----------
+ 1 1 1 (= 7)
+ + 0 0 0 (= 0)
+ + 1 1 1 (= 7)
+ -----------
+ 1 0 0 0 1 1 (= 35)
+
+This is quite simple. Note that the addition of the third column
+generates a carry that is propagated to all the other left columns.
+
+The next step is to define the modulus of the field. Suppose we use
+11 as the modulus. Then we convert the result into an element of the
+field by dividing by the modulus and taking the residue. We also use
+the 'old school' method in binary:
+
+
+ 1 0 0 0 1 1 (= 35) | 1 0 1 1 (= 11)
+ - 0 0 0 0 ----------------
+ --------- 0 1 1 (= 3)
+ 1 0 0 0 1
+ - 1 0 1 1
+ -----------
+ 0 0 1 1 0 1
+ - 1 0 1 1
+ -------------
+ 0 0 1 0 (= 2)
+
+So, 7 * 5 in a field with modulus 11 is 2. Note that the main
+objective in each iteration of the division is to make higher bits
+equal to 0 when possible (if it's not possible in one iteration, it
+will be zeroed on the next).
+
+If we do the same but changing additions with xors we get this:
+
+ 1 1 1 (= 7)
+ * 1 0 1 (= 5)
+ -----------
+ 1 1 1 (= 7)
+ x 0 0 0 (= 0)
+ x 1 1 1 (= 7)
+ -----------
+ 1 1 0 1 1 (= 27)
+
+In this case, the xor of the third column doesn't generate any carry.
+
+Now we need to divide by the modulus. We can also use 11 as the
+modulus since it still satisfies the needed conditions to work on a
+Galois Field of characteristic 2 with 3 bits:
+
+ 1 1 0 1 1 (= 27) | 1 0 1 1 (= 11)
+ x 1 0 1 1 ----------------
+ --------- 1 1 1 (= 7)
+ 0 1 1 0 1
+ x 1 0 1 1
+ -----------
+ 0 1 1 0 1
+ x 1 0 1 1
+ -------------
+ 0 1 1 0 (= 6)
+
+Note that, in this case, to make zero the higher bit we need to
+consider the result of the xor operation, not the addition operation.
+
+So, 7 * 5 in a Galois Field of 3 bits with modulus 11 is 6.
+
+
+Optimization
+------------
+
+To compute all these operations in a fast way some methods have been
+traditionally used. Maybe the most common is the [lookup table][8].
+
+The problem with this method is that it requires 3 lookups for each
+byte multiplication, greatly amplifying the needed memory bandwidth
+and making it difficult to take advantage of any SIMD support on the
+processor.
+
+What EC does to improve the performance is based on the following
+property (using the 3 bits Galois Field of the last example):
+
+ A * B mod N = (A * b{2} * 4 mod N) x
+ (A * b{1} * 2 mod N) x
+ (A * b{0} mod N)
+
+This is basically a rewrite of the steps made in the previous example
+to multiply two numbers but moving the modulus calculation inside each
+intermediate result. What we can see here is that each term of the
+xor can be zeroed if the corresponding bit of B is 0, so we can ignore
+that factor. If the bit is 1, we need to compute A multiplied by a
+power of two and take the residue of the division by the modulus. We
+can precompute these values:
+
+ A0 = A (we don't need to compute the modulus here)
+ A1 = A0 * 2 mod N
+ A2 = A1 * 2 mod N
+
+Having these values we only need to add those corresponding to bits
+set to 1 in B. Using our previous example:
+
+ A = 1 1 1 (= 7)
+ B = 1 0 1 (= 5)
+
+ A0 = 1 1 1 (= 7)
+ A1 = 1 1 1 * 1 0 mod 1 0 1 1 = 1 0 1 (= 5)
+ A2 = 1 0 1 * 1 0 mod 1 0 1 1 = 0 0 1 (= 1)
+
+ Since only bits 0 and 2 are 1 in B, we add A0 and A2:
+
+ A0 + A2 = 1 1 1 x 0 0 1 = 1 1 0 (= 6)
+
+If we carefully look at what we are doing when computing each Ax, we
+see that we do two basic things:
+
+ - Shift the original value one bit to the left
+ - If the highest bit is 1, xor with the modulus
+
+Let's write this in a detailed way (representing each bit):
+
+ Original value: a{2} a{1} a{0}
+ Shift 1 bit: a{2} a{1} a{0} 0
+
+ If a{2} is 0 we already have the result:
+ a{1} a{0} 0
+
+ If a{2} is 1 we need to xor with the modulus:
+ 1 a{1} a{0} 0 x 1 0 1 1 = a{1} (a{0} x 1) 1
+
+An important thing to see here is that if a{2} is 0, we can get the
+same result by xoring with all 0 instead of the modulus. For this
+reason we can rewrite the modulus as this:
+
+ Modulus: a{2} 0 a{2} a{2}
+
+This means that the modulus will be 0 0 0 0 is a{2} is 0, so the value
+won't change, and it will be 1 0 1 1 if a{2} is 1, giving the correct
+result. So, the computation is simply:
+
+ Original value: a{2} a{1} a{0}
+ Shift 1 bit: a{2} a{1} a{0} 0
+ Apply modulus: a{1} (a{0} x a{2}) a{2}
+
+We can compute all Ax using this method. We'll get this:
+
+ A0 = a{2} a{1} a{0}
+ A1 = a{1} (a{0} x a{2}) a{2}
+ A2 = (a{0} x a{2}) (a{1} x a{2}) a{1}
+
+Once we have all terms, we xor the ones corresponding to the bits set
+to 1 in B. In out example this will be A0 and A2:
+
+ Result: (a{2} x a{0} x a{2}) (a{1} x a{1} x a{2}) (a{0} x a{1})
+
+We can easily see that we can remove some redundant factors:
+
+ Result: a{0} a{2} (a{0} x a{1})
+
+This way we have come up with a simply set of equations to compute the
+multiplication of any number by 5. If A is 1 1 1 (= 7), the result
+must be 1 1 0 (= 6) using the equations, as we expected. If we try
+another numbe for A, like 0 1 0 (= 2), the result must be 0 0 1 (= 1).
+
+This seems a really fast way to compute the multiplication without
+using any table lookup. The problem is that this is only valid for
+B = 5. For other values of B another set of equations will be found.
+To solve this problem we can pregenerate the equations for all
+possible values of B. Since the Galois Field we use is small, this is
+feasible.
+
+One thing to be aware of is that, in general, two equations for
+different bits of the same B can share common subexpressions. This
+gives space for further optimizations to reduce the total number of
+xors used in the final equations for a given B. However this is not
+easy to find, since finding the smallest number of xors that give the
+correct result is an NP-Problem. For EC an exhaustive search has been
+made to find the best combinations for each possible value.
+
+
+Implementation
+--------------
+
+All this seems great from the hardware point of view, but implementing
+this using normal processor instructions is not so easy because we
+would need a lot of shifts, ands, xors and ors to move the bits of
+each number to the correct position to compute the equation and then
+another shift to put each bit back to its final place.
+
+For example, to implement the functions to multiply by 5, we would
+need something like this:
+
+ Bit 2: T2 = (A & 1) << 2
+ Bit 1: T1 = (A & 4) >> 1
+ Bit 0: T0 = ((A >> 1) x A) & 1
+ Result: T2 + T1 + T0
+
+This doesn't look good. So here we make a change in the way we get
+and process the data: instead of reading full numbers into variables
+and operate with them afterwards, we use a single independent variable
+for each bit of the number.
+
+Assume that we can read and write independent bits from memory (later
+we'll see how to solve this problem when this is not possible). In
+this case, the code would look something like this:
+
+ Bit 2: T2 = Mem[2]
+ Bit 1: T1 = Mem[1]
+ Bit 0: T0 = Mem[0]
+ Computation: T1 ^= T0
+ Store result: Mem[2] = T0
+ Mem[1] = T2
+ Mem[0] = T1
+
+Note that in this case we handle the final reordering of bits simply
+by storing the right variable to the right place, without any shifts,
+ands nor ors. In fact we only have memory loads, memory stores and
+xors. Note also that we can do all the computations directly using the
+variables themselves, without additional storage. This true for most
+of the values, but in some cases an additional one or two temporal
+variables will be needed to store intermediate results.
+
+The drawback of this approach is that additions, that are simply a
+xor of two numbers will need as many xors as bits are in each number.
+
+
+SIMD optimization
+-----------------
+
+So we have a good way to compute the multiplications, but even using
+this we'll need several operations for each byte of the original data.
+We can improve this by doing multiple multiplications using the same
+set of instructions.
+
+With the approach taken in the implementation section, we can see that
+in fact it's really easy to add SIMD support to this method. We only
+need to store in each variable one bit from multiple numbers. For
+example, when we load T2 from memory, instead of reading the bit 2 of
+the first number, we can read the bit 2 of the first, second, third,
+fourth, ... numbers. The same can be done when loading T1 and T0.
+
+Obviously this needs to have a special encoding of the numbers into
+memory to be able to do that in a single operation, but since we can
+choose whatever encoding we want for EC, we have chosen to have
+exactly that. We interpret the original data as a stream of bits, and
+we split it into subsequences of length L, each containing one bit of
+a number. Every S subsequences form a set of numbers of S bits that
+are encoded and decoded as a single group. This repeats for any
+remaining data.
+
+For example, in a simple case with L = 8 and S = 3, the original data
+would contain something like this (interpreted as a sequence of bits,
+offsets are also bit-based):
+
+ Offset 0: a{0} b{0} c{0} d{0} e{0} f{0} g{0} h{0}
+ Offset 8: a{1} b{1} c{1} d{1} e{1} f{1} g{1} h{1}
+ Offset 16: a{2} b{2} c{2} d{2} e{2} f{2} g{2} h{2}
+ Offset 24: i{0} j{0} k{0} l{0} m{0} n{0} o{0} p{0}
+ Offset 32: i{1} j{1} k{1} l{1} m{1} n{1} o{1} p{1}
+ Offset 40: i{2} j{2} k{2} l{2} m{2} n{2} o{2} p{2}
+
+Note: If the input file is not a multiple of S * L, 0-padding is done.
+
+Here we have 16 numbers encoded, from A to P. This way we can easily
+see that reading the first byte of the file will read all bits 0 of
+number A, B, C, D, E, F, G and H. The same happens with bits 1 and 2
+when we read the second and third bytes respectively. Using this
+encoding and the implementation described above, we can see that the
+same set of instructions will be computing the multiplication of 8
+numbers at the same time.
+
+This can be further improved if we use L = 64 with 64 bits variables
+on 64-bits processor. It's even faster if we use L = 128 using SSE
+registers or L = 256 using AVX registers on Intel processors.
+
+Currently EC uses L = 512 and S = 8. This means that numbers are
+packed in blocks of 512 bytes and gives space for even bigger
+processor registers up to 512 bits.
+
+
+Conclusions
+-----------
+
+This method requires a single variable/processor register for each
+bit. This can be challenging if we want to avoid additional memory
+accesses, even if we use modern processors that have many registers.
+However, the implementation we chose for the Vandermonde Matrix
+doesn't require temporary storage, so we don't need a full set of 8
+new registers (one for each bit) to store partial computations.
+Additionally, the computation of the multiplications requires, at
+most, 2 extra registers, but this is afordable.
+
+Xors are a really fast operation in modern processors. Intel CPU's
+can dispatch up to 3 xors per CPU cycle if there are no dependencies
+with ongoing previous instructions. Worst case is 1 xor per cycle. So,
+in some configurations, this method could be very near to the memory
+speed.
+
+Another interesting thing of this method is that all data it needs to
+operate is packed in small sequential blocks of memory, meaning that
+it can take advantage of the faster internal CPU caches.
+
+
+Results
+-------
+
+For the particular case of 8 bits, EC can compute each multiplication
+using 12.8 xors on average (without counting 0 and 1 that do not
+require any xor). Some numbers require less, like 2 that only requires
+3 xors.
+
+Having all this, we can check some numbers to see the performance of
+this method.
+
+Maybe the most interesting thing is the average number of xors needed
+to encode a single byte of data. To compute this we'll need to define
+some variables:
+
+ * K: Number of data fragments
+ * R: Number of redundancy fragments
+ * N: K + R
+ * B: Number of bits per number
+ * A: Average number of xors per number
+ * Z: Bits per CPU register (can be up to 256 for AVX registers)
+ * X: Average number of xors per CPU cycle
+ * L: Average cycles per load
+ * S: Average cycles per store
+ * G: Core speed in Hz
+
+_Total number of bytes processed for a single matrix multiplication_:
+
+ * __Read__: K * B * Z / 8
+ * __Written__: N * B * Z / 8
+
+_Total number of memory accesses_:
+
+ * __Loads__: K * B * N
+ * __Stores__: B * N
+
+> We need to read the same K * B * Z bits, in registers of Z bits, N
+> times, one for each row of the matrix. However the last N - 1 reads
+> could be made from the internal CPU caches if conditions are good.
+
+_Total number of operations_:
+
+ * __Additions__: (K - 1) * N
+ * __Multiplications__: K * N
+
+__Total number of xors__: B * (K - 1) * N + A * K * N =
+ N * ((A + B) * K - B)
+
+__Xors per byte__: 8 * N * ((A + B) * K - B) / (K * B * Z)
+
+__CPU cycles per byte__: 8 * N * ((A + B) * K - B) / (K * B * Z * X) +
+ 8 * L * N / Z + (loads)
+ 8 * S * N / (K * Z) (stores)
+
+__Bytes per second__: G / {CPU cycles per byte}
+
+Some xors per byte numbers for specific configurations (B=8):
+
+ Z=64 Z=128 Z=256
+ K=2/R=1 0.79 0.39 0.20
+ K=4/R=2 1.76 0.88 0.44
+ K=4/R=3 2.06 1.03 0.51
+ K=8/R=3 3.40 1.70 0.85
+ K=8/R=4 3.71 1.86 0.93
+ K=16/R=4 6.34 3.17 1.59
+
+
+
+[1]: https://en.wikipedia.org/wiki/Erasure_code
+[2]: https://en.wikipedia.org/wiki/Reed%E2%80%93Solomon_error_correction
+[3]: https://en.wikipedia.org/wiki/Systematic_code
+[4]: https://en.wikipedia.org/wiki/Identity_matrix
+[5]: https://en.wikipedia.org/wiki/Linear_independence
+[6]: https://en.wikipedia.org/wiki/Vandermonde_matrix
+[7]: https://en.wikipedia.org/wiki/Finite_field
+[8]: https://en.wikipedia.org/wiki/Finite_field_arithmetic#Implementation_tricks
diff --git a/doc/developer-guide/fuse-interrupt.md b/doc/developer-guide/fuse-interrupt.md
new file mode 100644
index 00000000000..ec991b81ec5
--- /dev/null
+++ b/doc/developer-guide/fuse-interrupt.md
@@ -0,0 +1,211 @@
+# Fuse interrupt handling
+
+## Conventions followed
+
+- *FUSE* refers to the "wire protocol" between kernel and userspace and
+ related specifications.
+- *fuse* refers to the kernel subsystem and also to the GlusterFs translator.
+
+## FUSE interrupt handling spec
+
+The [Linux kernel FUSE documentation](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/filesystems/fuse.txt?h=v4.18#n148)
+desrcibes how interrupt handling happens in fuse.
+
+## Interrupt handling in the fuse translator
+
+### Declarations
+
+This document describes the internal API in the fuse translator with which
+interrupt can be handled.
+
+The API being internal (to be used only in fuse-bridge.c; the functions are
+not exported to a header file).
+
+```
+enum fuse_interrupt_state {
+ /* ... */
+ INTERRUPT_SQUELCHED,
+ INTERRUPT_HANDLED,
+ /* ... */
+};
+typedef enum fuse_interrupt_state fuse_interrupt_state_t;
+struct fuse_interrupt_record;
+typedef struct fuse_interrupt_record fuse_interrupt_record_t;
+typedef void (*fuse_interrupt_handler_t)(xlator_t *this,
+ fuse_interrupt_record_t *);
+struct fuse_interrupt_record {
+ fuse_in_header_t fuse_in_header;
+ void *data;
+ /*
+ ...
+ */
+};
+
+fuse_interrupt_record_t *
+fuse_interrupt_record_new(fuse_in_header_t *finh,
+ fuse_interrupt_handler_t handler);
+
+void
+fuse_interrupt_record_insert(xlator_t *this, fuse_interrupt_record_t *fir);
+
+gf_boolean_t
+fuse_interrupt_finish_fop(call_frame_t *frame, xlator_t *this,
+ gf_boolean_t sync, void **datap);
+
+void
+fuse_interrupt_finish_interrupt(xlator_t *this, fuse_interrupt_record_t *fir,
+ fuse_interrupt_state_t intstat,
+ gf_boolean_t sync, void **datap);
+```
+
+The code demonstrates the usage of the API through `fuse_flush()`. (It's a
+dummy implementation only for demonstration purposes.) Flush is chosen
+because a `FLUSH` interrupt is easy to trigger (see
+*tests/features/interrupt.t*). Interrupt handling for flush is switched on
+by `--fuse-flush-handle-interrupt` (a hidden glusterfs command line flag).
+The implementation of flush interrupt is contained in the
+`fuse_flush_interrupt_handler()` function and blocks guarded by the
+
+```
+if (priv->flush_handle_interrupt) { ...
+```
+
+conditional (where `priv` is a `*fuse_private_t`).
+
+### Overview
+
+"Regular" fuse fops and interrupt handlers interact via a list containing
+interrupt records.
+
+If a fop wishes to have its interrupts handled, it needs to set up an
+interrupt record and insert it into the list; also when it's to finish
+(ie. in its "cbk" stage) it needs to delete the record from the list.
+
+If no interrupt happens, basically that's all to it - a list insertion
+and deletion.
+
+However, if an interrupt comes for the fop, the interrupt FUSE request
+will carry the data identifying an ongoing fop (that is, its `unique`),
+and based on that, the interrupt record will be looked up in the list, and
+the specific interrupt handler (a member of the interrupt record) will be
+called.
+
+Usually the fop needs to share some data with the interrupt handler to
+enable it to perform its task (also shared via the interrupt record).
+The interrupt API offers two approaches to manage shared data:
+- _Async or reference-counting strategy_: from the point on when the interrupt
+ record is inserted to the list, it's owned jointly by the regular fop and
+ the prospective interrupt handler. Both of them need to check before they
+ return if the other is still holding a reference; if not, then they are
+ responsible for reclaiming the shared data.
+- _Sync or borrow strategy_: the interrupt handler is considered a borrower
+ of the shared data. The interrupt handler should not reclaim the shared
+ data. The fop will wait for the interrupt handler to finish (ie., the borrow
+ to be returned), then it has to reclaim the shared data.
+
+The user of the interrupt API need to call the following functions to
+instrument this control flow:
+- `fuse_interrupt_record_insert()` in the fop to insert the interrupt record to
+ the list;
+- `fuse_interrupt_finish_fop()`in the fop (cbk) and
+- `fuse_interrupt_finish_interrupt()`in the interrupt handler
+
+to perform needed synchronization at the end their tenure. The data management
+strategies are implemented by the `fuse_interrupt_finish_*()` functions (which
+have an argument to specify which strategy to use); these routines take care
+of freeing the interrupt record itself, while the reclamation of the shared data
+is left to the API user.
+
+### Usage
+
+A given FUSE fop can be enabled to handle interrupts via the following
+steps:
+
+- Define a handler function (of type `fuse_interrupt_handler_t`).
+ It should implement the interrupt handling logic and in the end
+ call (directly or as async callback) `fuse_interrupt_finish_interrupt()`.
+ The `intstat` argument to `fuse_interrupt_finish_interrupt` should be
+ either `INTERRUPT_SQUELCHED` or `INTERRUPT_HANDLED`.
+ - `INTERRUPT_SQUELCHED` means that the interrupt could not be delivered
+ and the fop is going on uninterrupted.
+ - `INTERRUPT_HANDLED` means that the interrupt was actually handled. In
+ this case the fop will be answered from interrupt context with errno
+ `EINTR` (that is, the fop should not send a response to the kernel).
+
+ (the enum `fuse_interrupt_state` includes further members, which are reserved
+ for internal use).
+
+ We return to the `sync` and `datap` arguments later.
+- In the `fuse_<FOP>` function create an interrupt record using
+ `fuse_interrupt_record_new()`, passing the incoming `fuse_in_header` and
+ the above handler function to it.
+ - Arbitrary further data can be referred to via the `data` member of the
+ interrupt record that is to be passed on from fop context to
+ interrupt context.
+- When it's set up, pass the interrupt record to
+ `fuse_interrupt_record_insert()`.
+- In `fuse_<FOP>_cbk` call `fuse_interrupt_finish_fop()`.
+ - `fuse_interrupt_finish_fop()` returns a Boolean according to whether the
+ interrupt was handled. If it was, then the FUSE request is already
+ answered and the stack gets destroyed in `fuse_interrupt_finish_fop` so
+ `fuse_<FOP>_cbk()` can just return (zero). Otherwise follow the standard
+ cbk logic (answer the FUSE request and destroy the stack -- these are
+ typically accomplished by `fuse_err_cbk()`).
+- The last two argument of `fuse_interrupt_finish_fop()` and
+ `fuse_interrupt_finish_interrupt()` are `gf_boolean_t sync` and
+ `void **datap`.
+ - `sync` represents the strategy for freeing the interrupt record. The
+ interrupt handler and the fop handler are in race to get at the interrupt
+ record first (interrupt handler for purposes of doing the interrupt
+ handling, fop handler for purposes of deactivating the interrupt record
+ upon completion of the fop handling).
+ - If `sync` is true, then the fop handler will wait for the interrupt
+ handler to finish and it takes care of freeing.
+ - If `sync` is false, the loser of the above race will perform freeing.
+
+ Freeing is done within the respective interrupt finish routines, except
+ for the `data` field of the interrupt record; with respect to that, see
+ the discussion of the `datap` parameter below. The strategy has to be
+ consensual, that is, `fuse_interrupt_finish_fop()` and
+ `fuse_interrupt_finish_interrupt()` must pass the same value for `sync`.
+ If dismantling the resources associated with the interrupt record is
+ simple, `sync = _gf_false` is the suggested choice; `sync = _gf_true` can
+ be useful in the opposite case, when dismantling those resources would
+ be inconvenient to implement in two places or to enact in non-fop context.
+ - If `datap` is `NULL`, the `data` member of the interrupt record will be
+ freed within the interrupt finish routine. If it points to a valid
+ `void *` pointer, and if caller is doing the cleanup (see `sync` above),
+ then that pointer will be directed to the `data` member of the interrupt
+ record and it's up to the caller what it's doing with it.
+ - If `sync` is true, interrupt handler can use `datap = NULL`, and
+ fop handler will have `datap` point to a valid pointer.
+ - If `sync` is false, and handlers pass a pointer to a pointer for
+ `datap`, they should check if the pointed pointer is NULL before
+ attempting to deal with the data.
+
+### FUSE answer for the interrupted fop
+
+The kernel acknowledges a successful interruption for a given FUSE request
+if the filesystem daemon answers it with errno EINTR; upon that, the syscall
+which induced the request will be abruptly terminated with an interrupt, rather
+than returning a value.
+
+In glusterfs, this can be arranged in two ways.
+
+- If the interrupt handler wins the race for the interrupt record, ie.
+ `fuse_interrupt_finish_fop()` returns true to `fuse_<FOP>_cbk()`, then, as
+ said above, `fuse_<FOP>_cbk()` does not need to answer the FUSE request.
+ That's because then the interrupt handler will take care about answering
+ it (with errno EINTR).
+- If `fuse_interrupt_finish_fop()` returns false to `fuse_<FOP>_cbk()`, then
+ this return value does not inform the fop handler whether there was an interrupt
+ or not. This return value occurs both when fop handler won the race for the
+ interrupt record against the interrupt handler, and when there was no interrupt
+ at all.
+
+ However, the internal logic of the fop handler might detect from other
+ circumstances that an interrupt was delivered. For example, the fop handler
+ might be sleeping, waiting for some data to arrive, so that a premature
+ wakeup (with no data present) occurs if the interrupt handler intervenes. In
+ such cases it's the responsibility of the fop handler to reply the FUSE
+ request with errro EINTR.
diff --git a/doc/developer-guide/gfapi-symbol-versions.md b/doc/developer-guide/gfapi-symbol-versions.md
new file mode 100644
index 00000000000..e4f4fe9f052
--- /dev/null
+++ b/doc/developer-guide/gfapi-symbol-versions.md
@@ -0,0 +1,270 @@
+
+## Symbol Versions and SO_NAMEs
+
+ In general, adding new APIs to a shared library does not require that
+symbol versions be used or the the SO_NAME be "bumped." These actions
+are usually reserved for when a major change is introduced, e.g. many
+APIs change or a signficant change in the functionality occurs.
+
+ Over the normal lifetime of a When a new API is added, the library is
+recompiled, consumers of the new API are able to do so, and existing,
+legacy consumers of the original API continue as before. If by some
+chance an old copy of the library is installed on a system, it's unlikely
+that most applications will be affected. New applications that use the
+new API will incur a run-time error terminate.
+
+ Bumping the SO_NAME, i.e. changing the shared lib's file name, e.g.
+from libfoo.so.0 to libfoo.so.1, which also changes the ELF SO_NAME
+attribute inside the file, works a little differently. libfoo.so.0
+contains only the old APIs. libfoo.so.1 contains both the old and new
+APIs. Legacy software that was linked with libfoo.so.0 continues to work
+as libfoo.so.0 is usually left installed on the system. New software that
+uses the new APIs is linked with libfoo.so.1, and works as long as
+long as libfoo.so.1 is installed on the system. Accidentally (re)installing
+libfoo.so.0 doesn't break new software as long as reinstalling doesn't
+erase libfoo.so.1.
+
+ Using symbol versions is somewhere in the middle. The shared library
+file remains libfoo.so.0 forever. Legacy APIs may or may not have an
+associated symbol version. New APIs may or may not have an associated
+symbol version either. In general symbol versions are reserved for APIs
+that have changed. Either the function's signature has changed, i.e. the
+return time or the number of paramaters, and/or the parameter types have
+changed. Another reason for using symbol versions on an API is when the
+behaviour or functionality of the API changes dramatically. As with a
+library that doesn't use versioned symbols, old and new applications
+either find or don't find the versioned symbols they need. If the versioned
+symbol doesn't exist in the installed library, the application incurs a
+run-time error and terminates.
+
+ GlusterFS wanted to keep tight control over the APIs in libgfapi.
+Originally bumping the SO_NAME was considered, and GlusterFS-3.6.0 was
+released with libgfapi.so.7. Not only was "7" a mistake (it should have
+been "6"), but it was quickly pointed out that many dependent packages
+that use libgfapi would be forced to be recompiled/relinked. Thus no
+packages of 3.6.0 were ever released and 3.6.1 was quickly released with
+libgfapi.so.0, but with symbol versions. There's no strong technical
+reason for either; the APIs have not changed, only new APIs have been
+added. It's merely being done in anticipation that some APIs might change
+sometime in the future.
+
+ Enough about that now, let's get into the nitty gritty——
+
+## Adding new APIs
+
+### Adding a public API.
+
+ This is the default, and the easiest thing to do. Public APIs have
+declarations in either glfs.h, glfs-handles.h, or, at your discretion,
+in a new header file intended for consumption by other developers.
+
+Here's what you need to do to add a new public API:
+
++ Write the declaration, e.g. in glfs.h:
+
+```C
+ int glfs_dtrt (const char *volname, void *stuff) __THROW
+```
+
++ Write the definition, e.g. in glfs-dtrt.c:
+
+```C
+ int
+ pub_glfs_dtrt (const char *volname, void *stuff)
+ {
+ ...
+ return 0;
+ }
+```
+
++ Add the symbol version magic for ELF, gnu toolchain to the definition.
+
+ following the definition of your new function in glfs-dtrtops.c, add a
+ line like this:
+
+```C
+ GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_dtrt, 3.7.0)
+```
+
+ The whole thing should look like:
+
+```C
+ int
+ pub_glfs_dtrt (const char *volname, void *stuff)
+ {
+ ...
+ }
+ GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_dtrt, 3.7.0);
+```
+
+ In this example, 3.7.0 refers to the Version the symbol will first
+ appear in. There's nothing magic about it, it's just a string token.
+ The current versions we have are 3.4.0, 3.4.2, 3.5.0, 3.5.1, and 3.6.0.
+ They are to be considered locked or closed. You can not, must not add
+ any new APIs and use these versions. Most new APIs will use 3.7.0. If
+ you add a new API appearing in 3.6.2 (and mainline) then you would use
+ 3.6.2.
+
++ Add the symbol version magic for OS X to the declaration.
+
+ following the declaration in glfs.h, add a line like this:
+
+```C
+ GFAPI_PUBLIC(glfs_dtrt, 3.7.0)
+```
+
+ The whole thing should look like:
+
+```C
+ int glfs_dtrt (const char *volname, void *stuff) __THROW
+ GFAPI_PUBLIC(glfs_dtrt, 3.7.0);
+```
+
+ The version here must match the version associated with the definition.
+
++ Add the new API to the ELF, gnu toolchain link map file, gfapi.map
+
+ Most new public APIs will probably be added to a new section that
+ looks like this:
+
+```
+ GFAPI_3.7.0 {
+ global:
+ glfs_dtrt;
+ } GFAPI_PRIVATE_3.7.0;
+```
+
+ if you're adding your new API to, e.g. 3.6.2, it'll look like this:
+
+```
+ GFAPI_3.6.2 {
+ global:
+ glfs_dtrt;
+ } GFAPI_3.6.0;
+```
+
+ and you must change the
+```
+ GFAPI_PRIVATE_3.7.0 { ...} GFAPI_3.6.0;
+```
+ section to:
+```
+ GFAPI_PRIVATE_3.7.0 { ...} GFAPI_3.6.2;
+```
+
++ Add the new API to the OS X alias list file, gfapi.aliases.
+
+ Most new APIs will use a line that looks like this:
+
+```C
+ _pub_glfs_dtrt _glfs_dtrt$GFAPI_3.7.0
+```
+
+ if you're adding your new API to, e.g. 3.6.2, it'll look like this:
+
+```C
+ _pub_glfs_dtrt _glfs_dtrt$GFAPI_3.6.2
+```
+
+And that's it.
+
+
+### Adding a private API.
+
+ If you're thinking about adding a private API that isn't declared in
+one of the header files, then you should seriously rethink what you're
+doing and figure out how to put it in libglusterfs instead.
+
+If that hasn't convinced you, follow the instructions above, but use the
+_PRIVATE versions of macros, symbol versions, and aliases. If you're 1337
+enough to ignore this advice, then you're 1337 enough to figure out how
+to do it.
+
+
+## Changing an API.
+
+### Changing a public API.
+
+ There are two ways an API might change, 1) its signature has changed, or
+2) its new functionality or behavior is substantially different than the
+old. An APIs signature consists of the function return type, and the number
+and/or type of its parameters. E.g. the original API:
+
+```C
+ int glfs_dtrt (const char *volname, void *stuff);
+```
+
+and the changed API:
+
+```C
+ void *glfs_dtrt (const char *volname, glfs_t *ctx, void *stuff);
+```
+
+ One way to avoid a change like this, and which is preferable in many
+ways, is to leave the legacy glfs_dtrt() function alone, document it as
+deprecated, and simply add a new API, e.g. glfs_dtrt2(). Practically
+speaking, that's effectively what we'll be doing anyway, the difference
+is only that we'll use a versioned symbol to do it.
+
+ On the assumption that adding a new API is undesirable for some reason,
+perhaps the use of glfs_gnu() is just so pervasive that we really don't
+want to add glfs_gnu2().
+
++ change the declaration in glfs.h:
+
+```C
+ glfs_t *glfs_gnu (const char *volname, void *stuff) __THROW
+ GFAPI_PUBLIC(glfs_gnu, 3.7.0);
+````
+
+Note that there is only the single, new declaration.
+
++ change the old definition of glfs_gnu() in glfs.c:
+
+```C
+ struct glfs *
+ pub_glfs_gnu340 (const char * volname)
+ {
+ ...
+ }
+ GFAPI_SYMVER_PUBLIC(glfs_gnu340, glfs_gnu, 3.4.0);
+```
+
++ create the new definition of glfs_gnu in glfs.c:
+
+```C
+ struct glfs *
+ pub_glfs_gnu (const char * volname, void *stuff)
+ {
+ ...
+ }
+ GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_gnu, 3.7.0);
+```
+
++ Add the new API to the ELF, gnu toolchain link map file, gfapi.map
+
+```
+ GFAPI_3.7.0 {
+ global:
+ glfs_gnu;
+ } GFAPI_PRIVATE_3.7.0;
+```
+
++ Update the OS X alias list file, gfapi.aliases, for both versions:
+
+Change the old line:
+```C
+ _pub_glfs_gnu _glfs_gnu$GFAPI_3.4.0
+```
+to:
+```C
+ _pub_glfs_gnu340 _glfs_gnu$GFAPI_3.4.0
+```
+
+Add a new line:
+```C
+ _pub_glfs_gnu _glfs_gnu$GFAPI_3.7.0
+```
+
++ Lastly, change all gfapi internal calls glfs_gnu to the new API.
+
diff --git a/doc/developer-guide/identifying-resource-leaks.md b/doc/developer-guide/identifying-resource-leaks.md
new file mode 100644
index 00000000000..950cae79b0a
--- /dev/null
+++ b/doc/developer-guide/identifying-resource-leaks.md
@@ -0,0 +1,200 @@
+# Identifying Resource Leaks
+
+Like most other pieces of software, GlusterFS is not perfect in how it manages
+its resources like memory, threads and the like. Gluster developers try hard to
+prevent leaking resources but releasing and unallocating the used structures.
+Unfortunately every now and then some resource leaks are unintentionally added.
+
+This document tries to explain a few helpful tricks to identify resource leaks
+so that they can be addressed.
+
+
+## Debug Builds
+
+There are certain techniques used in GlusterFS that make it difficult to use
+tools like Valgrind for memory leak detection. There are some build options
+that make it more practical to use Valgrind and other tools. When running
+Valgrind, it is important to have GlusterFS builds that contain the
+debuginfo/symbols. Some distributions (try to) strip the debuginfo to get
+smaller executables. Fedora and RHEL based distributions have sub-packages
+called ...-debuginfo that need to be installed for symbol resolving.
+
+
+### Memory Pools
+
+By using memory pools, there are no allocation/freeing of single structures
+needed. This improves performance, but also makes it impossible to track the
+allocation and freeing of srtuctures.
+
+It is possible to disable the use of memory pools, and use standard `malloc()`
+and `free()` functions provided by the C library. Valgrind is then able to
+track the allocated areas and verify if they have been free'd. In order to
+disable memory pools, the Gluster sources needs to be configured with the
+`--enable-debug` option:
+
+```shell
+./configure --enable-debug
+```
+
+When building RPMs, the `.spec` handles the `--with=debug` option too:
+
+```shell
+make dist
+rpmbuild -ta --with=debug glusterfs-....tar.gz
+```
+
+### Dynamically Loaded xlators
+
+Valgrind tracks the call chain of functions that do memory allocations. The
+addresses of the functions are stored and before Valgrind exits the addresses
+are resolved into human readable function names and offsets (line numbers in
+source files). Because Gluster loads xlators dynamically, and unloads then
+before exiting, Valgrind is not able to resolve the function addresses into
+symbols anymore. Whenever this happend, Valgrind shows `???` in the output,
+like
+
+```
+ ==25170== 344 bytes in 1 blocks are definitely lost in loss record 233 of 324
+ ==25170== at 0x4C29975: calloc (vg_replace_malloc.c:711)
+ ==25170== by 0x52C7C0B: __gf_calloc (mem-pool.c:117)
+ ==25170== by 0x12B0638A: ???
+ ==25170== by 0x528FCE6: __xlator_init (xlator.c:472)
+ ==25170== by 0x528FE16: xlator_init (xlator.c:498)
+ ...
+```
+
+These `???` can be prevented by not calling `dlclose()` for unloading the
+xlator. This will cause a small leak of the handle that was returned with
+`dlopen()`, but for improved debugging this can be acceptible. For this and
+other Valgrind features, a `--enable-valgrind` option is available to
+`./configure`. When GlusterFS is built with this option, Valgrind will be able
+to resolve the symbol names of the functions that do memory allocations inside
+xlators.
+
+```shell
+./configure --enable-valgrind
+```
+
+When building RPMs, the `.spec` handles the `--with=valgrind` option too:
+
+```shell
+make dist
+rpmbuild -ta --with=valgrind glusterfs-....tar.gz
+```
+
+## Running Valgrind against a single xlator
+
+Debugging a single xlator is not trivial. But there are some tools to make it
+easier. The `sink` xlator does not do any memory allocations itself, but
+contains just enough functionality to mount a volume with only the `sink`
+xlator. There is a little gfapi application under `tests/basic/gfapi/` in the
+GlusterFS sources that can be used to run only gfapi and the core GlusterFS
+infrastructure with the `sink` xlator. By extending the `.vol` file to load
+more xlators, each xlator can be debugged pretty much separately (as long as
+the xlators have no dependencies on each other). A basic Valgrind run with the
+suitable configure options looks like this:
+
+```shell
+./autogen.sh
+./configure --enable-debug --enable-valgrind
+make && make install
+cd tests/basic/gfapi/
+make gfapi-load-volfile
+valgrind ./gfapi-load-volfile sink.vol
+```
+
+Combined with other very useful options to Valgrind, the following execution
+shows many more useful details:
+
+```shell
+valgrind \
+ --fullpath-after= --leak-check=full --show-leak-kinds=all \
+ ./gfapi-load-volfile sink.vol
+```
+
+Note that the `--fullpath-after=` option is left empty, this makes Valgrind
+print the full path and filename that contains the functions:
+
+```
+==2450== 80 bytes in 1 blocks are definitely lost in loss record 8 of 60
+==2450== at 0x4C29975: calloc (/builddir/build/BUILD/valgrind-3.11.0/coregrind/m_replacemalloc/vg_replace_malloc.c:711)
+==2450== by 0x52C6F73: __gf_calloc (/usr/src/debug/glusterfs-3.11dev/libglusterfs/src/mem-pool.c:117)
+==2450== by 0x12F10CDA: init (/usr/src/debug/glusterfs-3.11dev/xlators/meta/src/meta.c:231)
+==2450== by 0x528EFD5: __xlator_init (/usr/src/debug/glusterfs-3.11dev/libglusterfs/src/xlator.c:472)
+==2450== by 0x528F105: xlator_init (/usr/src/debug/glusterfs-3.11dev/libglusterfs/src/xlator.c:498)
+==2450== by 0x52D9D8B: glusterfs_graph_init (/usr/src/debug/glusterfs-3.11dev/libglusterfs/src/graph.c:321)
+...
+```
+
+In the above example, the `init` function in `xlators/meta/src/meta.c` does a
+memory allocation on line 231. This memory is never free'd again, and hence
+Valgrind logs this call stack. When looking in the code, it seems that the
+allocation of `priv` is assigned to the `this->private` member of the
+`xlator_t` structure. Because the allocation is done in `init()`, free'ing is
+expected to happen in `fini()`. Both functions are shown below, with the
+inclusion of the empty `fini()`:
+
+
+```
+226 int
+227 init (xlator_t *this)
+228 {
+229 meta_priv_t *priv = NULL;
+230
+231 priv = GF_CALLOC (sizeof(*priv), 1, gf_meta_mt_priv_t);
+232 if (!priv)
+233 return -1;
+234
+235 GF_OPTION_INIT ("meta-dir-name", priv->meta_dir_name, str, out);
+236
+237 this->private = priv;
+238 out:
+239 return 0;
+240 }
+241
+242
+243 int
+244 fini (xlator_t *this)
+245 {
+246 return 0;
+247 }
+```
+
+In this case, the resource leak can be addressed by adding a single line to the
+`fini()` function:
+
+```
+243 int
+244 fini (xlator_t *this)
+245 {
+246 GF_FREE (this->private);
+247 return 0;
+248 }
+```
+
+Running the same Valgrind command and comparing the output will show that the
+memory leak in `xlators/meta/src/meta.c:init` is not reported anymore.
+
+### Running DRD, the Valgrind thread error detector
+
+When configuring GlusterFS with:
+
+```shell
+./configure --enable-valgrind
+```
+
+the default Valgrind tool (Memcheck) is enabled. But it's also possble to select
+one of Memcheck or DRD by using:
+
+```shell
+./configure --enable-valgrind=memcheck
+```
+
+or:
+
+```shell
+./configure --enable-valgrind=drd
+```
+
+respectively. When using DRD, it's recommended to consult
+https://valgrind.org/docs/manual/drd-manual.html before running.
diff --git a/doc/developer-guide/logging-guidelines.md b/doc/developer-guide/logging-guidelines.md
new file mode 100644
index 00000000000..0e6b2588535
--- /dev/null
+++ b/doc/developer-guide/logging-guidelines.md
@@ -0,0 +1,132 @@
+Guidelines on using the logging framework within a component
+============================================================
+Gluster library libglusterfs.so provides message logging abstractions that
+are intended to be used across all code/components within gluster.
+
+There could be potentially 2 major cases how the logging infrastructure is
+used,
+ - A new gluster service daemon or end point is created
+ - The service daemon infrastructure itself initlializes the logging
+ infrastructure (i.e calling gf_log_init and related set functions)
+ - See, glusterfsd.c:logging_init
+ - Alternatively there could be a case where an end point service (say
+ gfapi) may need to do the required initialization
+ - This document does not (yet?) cover guidelines for these cases. Best
+ bet would be to look at code in glusterfsd.c:logging_init (or equivalent)
+ in case a need arises and you reach this document.
+ - A new xlator or subcomponent is written as a part of the stack
+ - Primarily in this case, the consumer of the logging APIs would only
+ invoke an API to *log* a particular message at a certain severity
+ - This document elaborates on this use of the message logging framework
+ in this context
+
+There are 2 interfaces provided to log messages,
+1. The older gf_log* interface
+2. The newer gf_msg* interface
+
+1. Older _to_be_deprecated_ gf_log* interface
+ - Not much documentation is provided for this interface here, as these are
+ meant to be deprecated and all code should be moved to the newer gf_msg*
+ interface
+
+2. New gf_msg* interface
+ - The set of interfaces provided by gf_msg are,
+ - NOTE: It is best to consult logging.h for the latest interfaces, as
+ this document may get out of sync with the code
+
+ *gf_msg(dom, levl, errnum, msgid, fmt...)*
+ - Used predominantly to log a message above TRACE and DEBUG levels
+ - See further guidelines below
+
+ *gf_msg_debug(dom, errnum, fmt...)*
+ *gf_msg_trace(dom, errnum, fmt...)*
+ - Above are handy shortcuts for TRACE and DEBUG level messages
+ - See further guidelines below
+
+ *gf_msg_callingfn(dom, levl, errnum, msgid, fmt...)*
+ - Useful function when a backtrace to detect calling routines that
+ reached this execution point needs to be logged in addition to the
+ message
+ - This is very handy for framework libraries or code, as there could
+ be many callers to the same, and the real failure would need the call
+ stack to determine who the caller actually was
+ - A good example is the dict interfaces using this function to log who
+ called, when a particular error/warning needs to be displayed
+ - See further guidelines below
+
+ *gf_msg_plain(levl, fmt...)*
+ *gf_msg_plain_nomem(levl, msg)*
+ *gf_msg_vplain(levl, fmt, va)*
+ *gf_msg_backtrace_nomem*
+ - The above interfaces are provided to log messages without any typical
+ headers (like the time stamp, dom, errnum etc.). The primary users of
+ the above interfaces are, when printing the final graph, or printing
+ the configuration when a process is about dump core or abort, or
+ printing the backtrace when a process receives a critical signal
+ - These interfaces should not be used outside the scope of the users
+ above, unless you know what you are doing
+
+ *gf_msg_nomem(dom, levl, size)*
+ - Very crisp messages, throwing out file, function, line numbers, in
+ addition to the passed in size
+ - These are used in the memory allocation abstractions interfaces in
+ gluster, when the allocation fails (hence a no-mem log message, so that
+ further allocation is not attempted by the logging infrastructure)
+ - If you are contemplating using these, then you know why and how
+
+ - Guidelines for the various arguments to the interfaces
+ **dom**
+ - The domain from which this message appears, IOW for xlators it should
+ be the name of the xlator (as in this->name)
+
+ - The intention of this string is to distinguish from which
+ component/xlator the message is being printed
+
+ - This information can also be gleaned from the FILE:LINE:FUNCTION
+ information printed in the message anyway, but is retained to provide
+ backward compatability to the messages as seen by admins earlier
+
+ **levl**
+ - Consult logging.h:gf_loglevel_t for logging levels (they pretty much
+ map to syslog levels)
+
+ **errnum**
+ - errno should be passed in here, if available, 0 otherwise. This auto
+ enables the logging infrastructure to print (man 3) strerror form of the
+ same at the end of the message in a consistent format
+
+ - If any message is already adding the strerror as a parameter to the
+ message, it is suggested/encouraged to remove the same and pass it as
+ errnum
+
+ - The intention is that, if all messages did this using errnum and not
+ as a part of their own argument list, the output would look consistent
+ for the admin and cross component developers when reading logs
+
+ **msgid**
+ - This is a unique message ID per message (which now is more a message
+ ID per group of messages in the implementation)
+
+ - Rules for generating this ID can be found in dht-messages.h (it used
+ to be template-common-messages.h, but that template is not updated,
+ this comment should be changed once that is done) and glfs-message-id.h
+
+ - Every message that is *above* TRACE and DEBUG should get a message
+ ID, as this helps generating message catalogs that can help admins to
+ understand the context of messages better. Another intention is that
+ automated message parsers could detect a class of message IDs and send
+ out notifications on the same, rather than parse the message string
+ itself (which if it changes, the input to the parser has to change, etc.
+ and hence is better to retain message IDs)
+
+ - Ok so if intention is not yet clear, look at journald MESSAGE ID
+ motivation, as that coupled with ident/dom above is expected to provide
+ us with similar advantages
+
+ - Bottomline: Every message gets its own ID, in case a message is
+ *not* DEBUG or TRACE and still is developer centric, a generic message
+ ID per component *maybe* assigned to the same to provide ease of use
+ of the API
+
+ **fmt**
+ - As in format argument of (man 3) printf
diff --git a/doc/developer-guide/network_compression.md b/doc/developer-guide/network_compression.md
new file mode 100644
index 00000000000..1222a765276
--- /dev/null
+++ b/doc/developer-guide/network_compression.md
@@ -0,0 +1,71 @@
+# On-Wire Compression + Decompression
+
+The 'compression translator' compresses and decompresses data in-flight
+between client and bricks.
+
+### Working
+When a writev call occurs, the client compresses the data before sending it to
+brick. On the brick, compressed data is decompressed. Similarly, when a readv
+call occurs, the brick compresses the data before sending it to client. On the
+client, the compressed data is decompressed. Thus, the amount of data sent over
+the wire is minimized. Compression/Decompression is done using Zlib library.
+
+During normal operation, this is the format of data sent over wire:
+
+~~~
+<compressed-data> + trailer(8 bytes)
+~~~
+
+The trailer contains the CRC32 checksum and length of original uncompressed
+data. This is used for validation.
+
+### Usage
+
+Turning on compression xlator:
+
+~~~
+gluster volume set <vol_name> network.compression on
+~~~
+
+### Configurable parameters (optional)
+
+**Compression level**
+~~~
+gluster volume set <vol_name> network.compression.compression-level 8
+~~~
+
+~~~
+ 0 : no compression
+ 1 : best speed
+ 9 : best compression
+-1 : default compression
+~~~
+
+**Minimum file size**
+
+~~~
+gluster volume set <vol_name> network.compression.min-size 50
+~~~
+
+Data is compressed only when its size exceeds the above value in bytes.
+
+**Other paramaters**
+
+Other less frequently used parameters include `network.compression.mem-level`
+and `network.compression.window-size`. More details can about these options
+can be found by running `gluster volume set help` command.
+
+### Known Issues and Limitations
+
+* Compression translator cannot work with striped volumes.
+* Mount point hangs when writing a file with write-behind xlator turned on. To
+overcome this, turn off `performance.write-behind` entirely OR
+set`performance.strict-write-ordering` to on.
+* For glusterfs versions <= 3.5, compression traslator can ONLY work with pure
+distribute volumes. This limitation is caused by AFR not being able to
+propagate xdata. This issue has been fixed in glusterfs versions > 3.5
+
+### TODO
+Although zlib offers high compression ratio, it is very slow. We can make the
+translator pluggable to add support for other compression methods such as
+[lz4 compression](https://code.google.com/p/lz4/)
diff --git a/doc/developer-guide/options-to-contribute.md b/doc/developer-guide/options-to-contribute.md
new file mode 100644
index 00000000000..3f0d84e7645
--- /dev/null
+++ b/doc/developer-guide/options-to-contribute.md
@@ -0,0 +1,212 @@
+# A guide for contributors
+
+While you have gone through 'how to contribute' guides, if you are
+not sure what to work on, but really want to help the project, you
+have now landed on the right document :-)
+
+### Basic
+
+Instead of planning to fix **all** the below issues in one patch,
+we recommend you to have a a constant, continuous flow of improvements
+for the project. We recommend you to pick 1 file (or just few files) at
+a time to address below issues.
+Pick any `.c` (or `.h`) file, and you can send a patch which fixes **any**
+of the below themes. Ideally, fix all such occurrences in the file, even
+though, the reviewers would review even a single line change patch
+from you.
+
+1. Check for variable definitions, and if there is an array definition,
+which is very large at the top of the function, see if you can re-scope
+the variable to relevant sections (if it helps).
+
+Most of the time, some of these arrays may be used for 'error' handling,
+and it is possible to use them only in that scope.
+
+Reference: https://review.gluster.org/20846/
+
+
+2. Check for complete string initialization at the beginning of a function.
+Ideally, there is no reason to initialize a string. Fix it across the file.
+
+Example:
+
+`char new_path_name[PATH_MAX] = {0};` to `char new_path_name[PATH_MAX];`
+
+
+3. Change `calloc()` to `malloc()` wherever it makes sense.
+
+In a case of allocating a structures, where you expect certain (or most of)
+variables to be 0 (or NULL), it makes sense to use calloc(). But otherwise,
+there is an extra cost to `memset()` the whole object after allocating it.
+While it is not a significant improvement in performance, code which gets
+hit 1000s of times in a second, it would add some value.
+
+Reference: https://review.gluster.org/20878/
+
+
+4. You can consider using `snprintf()`, instead of `strncpy()` while dealing
+with strings.
+
+strncpy() won't null terminate if the dest buffer isn't big enough; snprintf()
+does. While most of the string operations in the code is on array, and larger
+size than required, strncpy() does an extra copy of 0s at the end of
+string till the size of the array. It makes sense to use `snprintf()`,
+which doesn't suffer from that behavior.
+
+Also check the return value from snprintf() for buffer overflow and handle
+accordingly
+
+Reference: https://review.gluster.org/20925/
+
+
+5. Now, pick a `.h` file, and see if a structure is very large, and see
+if re-aligning them as per [coding-standard](./coding-standard.md) gives any size benefit,
+if yes, go ahead and change it. Make sure you check all the structures
+in the file for similar pattern.
+
+Reference: [Check this section](https://github.com/gluster/glusterfs/blob/master/doc/developer-guide/coding-standard.md#structure-members-should-be-aligned-based-on-the-padding-requirements
+
+
+### If you are up for more :-)
+
+Good progress! Glad you are interested to know more. We are surely interested
+in next level of contributions from you!
+
+#### Coverity
+
+Visit [Coverity Dashboard](https://scan.coverity.com/projects/gluster-glusterfs?tab=overview).
+
+Now, if the number of defect is not 0, you have an opportunity to contribute.
+
+You get all the detail on why the particular defect is mentioned there, and
+most probable hint on how to fix it. Do it!
+
+Reference: https://review.gluster.org/21394/
+
+Use the same reference Id (789278) as the patch, so we can capture it is in
+single bugzilla.
+
+#### Clang-Scan
+
+Clang-Scan is a tool which scans the .c files and reports the possible issues,
+similar to coverity, but a different tool. Over the years we have seen, they
+both report very different set of issues, and hence there is a value in fixing it.
+
+GlusterFS project gets tested with clang-scan job every night, and the report is
+posted in the [job details page](https://build.gluster.org/job/clang-scan/lastCompletedBuild/clangScanBuildBugs/).
+As long as the number is not 0 in the report here, you have an opportunity to
+contribute! Similar to coverity dashboard, click on 'Details' to find out the
+reason behind that report, and send a patch.
+
+Reference: https://review.gluster.org/21025
+
+Again, you can use reference Id (1622665) for these patches!
+
+
+### I am good with programming, I would like to do more than above!
+
+#### Locked regions / Critical sections
+
+In the file you open, see if the lock is taken only to increment or decrement
+a flag, counter etc. If yes, then recommend you to convert it to ATOMIC locks.
+It is simple activity, but, if you know programing, you would know the benefit
+here.
+
+NOTE: There may not always a possibility to do this! You may have to check
+with developers first before going ahead.
+
+Reference: https://review.gluster.org/21221/
+
+
+#### ASan (address sanitizer)
+
+[The job](https://build.gluster.org/job/asan/) runs regression with asan builds,
+and you can also run glusterfs with asan on your workload to identify the leaks.
+If there are any leaks reported, feel free to check it, and send us patch.
+
+You can also run `valgrind` and let us know what it reports.
+
+Reference: https://review.gluster.org/21397
+
+
+#### Porting to different architecture
+
+This is something which we are not focusing right now, happy to collaborate!
+
+Reference: https://review.gluster.org/21276
+
+
+#### Fix 'TODO/FIXME' in codebase
+
+There are few cases of pending features, or pending validations, which are
+pending from sometime. You can pick them in the given file, and choose to
+fix it.
+
+
+### I don't know C, but I am interested to contribute in some way!
+
+You are most welcome! Our community is open for your contribution! First thing
+which comes to our mind is **documentation**. Next is, **testing** or validation.
+
+If you have some hardware, and want to run some performance comparisons with
+different version, or options, and help us to tune better is also a great help.
+
+
+#### Documentation
+
+1. We have some documentation in [glusterfs repo](../), go through these, and
+see if you can help us to keep up-to-date.
+
+2. The https://docs.gluster.org is powered by https://github.com/gluster/glusterdocs
+repo. You can check out the repo, and help in keeping that up-to-date.
+
+3. [Our website](https://gluster.org) is maintained by https://github.com/gluster/glusterweb
+repo. Help us to keep this up-to-date, and add content there.
+
+4. Write blogs about Gluster, and your experience, and make world know little
+more about Gluster, and your use-case, and how it helped to solve the problem.
+
+
+#### Testing
+
+1. There is a regression test suite in glusterfs, which runs with every patch, and is
+triggered by just running `./run-tests.sh` from the root of the project repo.
+
+You can add more test case to match your use-case, and send it as a patch, so you
+can make sure all future patches in glusterfs would keep your usecase intact.
+
+2. [Glusto-Tests](https://github.com/gluster/glusto-tests): This is another testing
+framework written for gluster, and makes use of clustered setup to test different
+use-cases, and helps to validate many bugs.
+
+
+#### Ansible
+
+Gluster Organization has rich set of ansible roles, which are actively maintained.
+Feel free to check them out here - https://github.com/gluster/gluster-ansible
+
+
+#### Monitoring
+
+We have prometheus repo, and are actively working on adding more metrics. Add what
+you need @ https://github.com/gluster/gluster-prometheus
+
+
+#### Health-Report
+
+This is a project, where at any given point in time, you want to run some set of
+commands locally, and get an output to analyze the status, it can be added.
+Contribute @ https://github.com/gluster/gluster-health-report
+
+
+### All these C/bash/python is old-school, I want something in containers.
+
+We have something for you too :-)
+
+Please visit our https://github.com/gluster/gcs repo for checking how you can help,
+and how gluster can help you in container world.
+
+
+### Note
+
+For any queries, best way is to contact us through mailing-list, <mailto:gluster-devel@gluster.org>
diff --git a/doc/developer-guide/posix.md b/doc/developer-guide/posix.md
new file mode 100644
index 00000000000..84c813e55a2
--- /dev/null
+++ b/doc/developer-guide/posix.md
@@ -0,0 +1,59 @@
+storage/posix translator
+========================
+
+Notes
+-----
+
+### `SET_FS_ID`
+
+This is so that all filesystem checks are done with the user's
+uid/gid and not GlusterFS's uid/gid.
+
+### `MAKE_REAL_PATH`
+
+This macro concatenates the base directory of the posix volume
+('option directory') with the given path.
+
+### `need_xattr` in lookup
+
+If this flag is passed, lookup returns a xattr dictionary that contains
+the file's create time, the file's contents, and the version number
+of the file.
+
+This is a hack to increase small file performance. If an application
+wants to read a small file, it can finish its job with just a lookup
+call instead of a lookup followed by read.
+
+### `getdents`/`setdents`
+
+These are used by unify to set and get directory entries.
+
+### `ALIGN_BUF`
+
+Macro to align an address to a page boundary (4K).
+
+### `priv->export_statfs`
+
+In some cases, two exported volumes may reside on the same
+partition on the server. Sending statvfs info for both
+the volumes will lead to erroneous df output at the client,
+since free space on the partition will be counted twice.
+
+In such cases, user can disable exporting statvfs info
+on one of the volumes by setting this option.
+
+### `xattrop`
+
+This fop is used by replicate to set version numbers on files.
+
+### `getxattr`/`setxattr` hack to read/write files
+
+A key, `GLUSTERFS_FILE_CONTENT_STRING`, is handled in a special way by
+`getxattr`/`setxattr`. A getxattr with the key will return the entire
+content of the file as the value. A `setxattr` with the key will write
+the value as the entire content of the file.
+
+### `posix_checksum`
+
+This calculates a simple XOR checksum on all entry names in a
+directory that is used by unify to compare directory contents.
diff --git a/doc/legacy/rpc-for-glusterfs.changes-done.txt b/doc/developer-guide/rpc-for-glusterfs.changes-done.txt
index 6bbbca78826..6bbbca78826 100644
--- a/doc/legacy/rpc-for-glusterfs.changes-done.txt
+++ b/doc/developer-guide/rpc-for-glusterfs.changes-done.txt
diff --git a/doc/developer-guide/rpc-for-glusterfs.new-versions.md b/doc/developer-guide/rpc-for-glusterfs.new-versions.md
new file mode 100644
index 00000000000..e3da5efa4a2
--- /dev/null
+++ b/doc/developer-guide/rpc-for-glusterfs.new-versions.md
@@ -0,0 +1,32 @@
+# GlusterFS RPC program versions
+
+## Compatibility
+
+RPC layer of glusterfs is implemented with possible changes over the protocol layers in mind. If there are any changes in the FOPs from what is assumed to be client side, and whats in serverside, they are to be added as a separate program table.
+
+### Program tables and Versions
+
+A given RPC program has a specific Task, and Version along with actors belonging to the program. In any of the programs, if a new actor is added, it is very important to define one more program with different version, and then keep both, if both are supported. Or else, it is important to handle the 'handshake' properly.
+
+#### Server details
+
+More info on RPC program is at `rpc/rpc-lib/src/rpcsvc.h` and check for structure `rpcsvc_actor_t` and `struct rpcsvc_program`. For usage, check `xlators/protocol/server/src/server-rpc-fops.c`
+
+#### Client details
+
+For details on client structures check `rpc/rpc-lib/src/rpc-clnt.h` for `rpc_clnt_procedure_t` and `rpc_clnt_program_t`. For usage, check `xlators/protocol/client/src/client-rpc-fops.c`
+
+## Protocol
+
+A protocol is what is agreed between two parties. In glusterfs, a RPC protocol is defined as .x file, which then gets converted to .c/.h file using `rpcgen`. There are different protocols defined for communication between `xlators/mgmt/glusterd <==> glusterfsd`, `gluster CLI <==> glusterd`, and `client-protocol <==> server-protocol`
+
+Once a protocol is defined and a release is made with that protocol, make sure no one changes it. Any edits to a given structure there should be a new version of the structure, and also it should get used in new actor, and thus new program version.
+
+## Server and Client Handshake
+
+When a client succeeds to establish a connect (it can be any transport, socket, ib-verbs or unix), client sends a dump (GF_DUMP_DUMP) request to server, which will respond back with all the supported versions of the server RPC (the supported programs which are registered with `rpcsvc_program_register()`).
+
+A client which expects certain programs to be present in server, it should be taking care of looking for it in the handshake methods, and take appropriate action depending on what to do next. In general a compatibility issue should be handled at handshake level itself, thus we can clearly let user/admin know of any 'in-compatibilities'.
+As a developer of GlusterFS protocol layer, one just has to make sure *never to make changes to existing program structures*, but they have to add new programs if required. New programs can have the same actors as present in existing, and also little more. Or it can even have same actor behave differently, take different parameter.
+
+If this is followed properly, there would be smooth upgrade / downgrade of versions. If not, technically, it is 100% guarantee of getting compatibility related issues.
diff --git a/doc/developer-guide/syncop.md b/doc/developer-guide/syncop.md
new file mode 100644
index 00000000000..bcc8bd08e01
--- /dev/null
+++ b/doc/developer-guide/syncop.md
@@ -0,0 +1,72 @@
+# syncop framework
+A coroutines-based, cooperative multi-tasking framework.
+
+## Topics
+
+- Glossary
+- Lifecycle of a synctask
+- Existing usage
+
+
+## Glossary
+
+### syncenv
+
+syncenv is an object that provides access to a pool of worker threads.
+synctasks execute in a syncenv.
+
+### synctask
+
+synctask can be informally defined as a pair of function pointers, namely _the
+call_ and _the callback_ (see syncop.h for more details).
+
+ synctask_fn_t - 'the call'
+ synctask_cbk_t - 'the callback'
+
+synctask has two modes of operation,
+
+1. The calling thread waits for the synctask to complete.
+2. The calling thread schedules the synctask and continues.
+
+synctask guarantees that the callback is called _after_ the call completes.
+
+### Lifecycle of a synctask
+
+A synctask could go into the following stages while in execution.
+
+- CREATED - On calling synctask_create/synctask_new.
+
+- RUNNABLE - synctask is queued in env->runq.
+
+- RUNNING - When one of syncenv's worker threads calls synctask_switch_to.
+
+- WAITING - When a synctask calls synctask_yield.
+
+- DONE - When a synctask has run to completion.
+
+
+ +-------------------------------+
+ | CREATED |
+ +-------------------------------+
+ |
+ | synctask_new/synctask_create
+ v
+ +-------------------------------+
+ | RUNNABLE (in env->runq) | <+
+ +-------------------------------+ |
+ | |
+ | synctask_switch_to |
+ v |
+ +------+ on task completion +-------------------------------+ |
+ | DONE | <-------------------- | RUNNING | | synctask_wake/wake
+ +------+ +-------------------------------+ |
+ | |
+ | synctask_yield/yield |
+ v |
+ +-------------------------------+ |
+ | WAITING (in env->waitq) | -+
+ +-------------------------------+
+
+Note: A synctask is not guaranteed to run on the same thread throughout its
+lifetime. Every time a synctask yields, it is possible for it to run on a
+different thread.
diff --git a/doc/developer-guide/thread-naming.md b/doc/developer-guide/thread-naming.md
new file mode 100644
index 00000000000..513140d4437
--- /dev/null
+++ b/doc/developer-guide/thread-naming.md
@@ -0,0 +1,104 @@
+Thread Naming
+================
+Gluster processes spawn many threads; some threads are created by libglusterfs
+library, while others are created by xlators. When gfapi library is used in an
+application, some threads belong to the application and some are spawned by
+gluster libraries. We also have features where n number of threads are spawned
+to act as worker threads for same operation.
+
+In all the above cases, it is useful to be able to determine the list of threads
+that exist in runtime. Naming threads when you create them is the easiest way to
+provide that information to kernel so that it can then be queried by any means.
+
+How to name threads
+-------------------
+We have two wrapper functions in libglusterfs for creating threads. They take
+name as an argument and set thread name after its creation.
+
+```C
+gf_thread_create (pthread_t *thread, const pthread_attr_t *attr,
+ void *(*start_routine)(void *), void *arg, const char *name)
+```
+
+```C
+gf_thread_create_detached (pthread_t *thread,
+ void *(*start_routine)(void *), void *arg,
+ const char *name)
+```
+
+As max name length for a thread in POSIX is only 16 characters including the
+'\0' character, you have to be a little creative with naming. Also, it is
+important that all Gluster threads have common prefix. Considering these
+conditions, we have "glfs_" as prefix for all the threads created by these
+wrapper functions. It is responsibility of the owner of thread to provide the
+suffix part of the name. It does not have to be a descriptive name, as it has
+only 10 letters to work with. However, it should be unique enough such that it
+can be matched with a table which describes it.
+
+If n number of threads are spwaned to perform same function, it is must that the
+threads are numbered.
+
+Table of thread names
+---------------------
+Thread names don't have to be a descriptive; however, it should be unique enough
+such that it can be matched with a table below without ambiguity.
+
+- bdaio - block device aio
+- brfsscan - bit rot fs scanner
+- brhevent - bit rot event handler
+- brmon - bit rot monitor
+- brosign - bit rot one shot signer
+- brpobj - bit rot object processor
+- brsproc - bit rot scrubber
+- brssign - bit rot stub signer
+- brswrker - bit rot worker
+- clogc - changelog consumer
+- clogcbki - changelog callback invoker
+- clogd - changelog dispatcher
+- clogecon - changelog reverse connection
+- clogfsyn - changelog fsync
+- cloghcon - changelog history consumer
+- clogjan - changelog janitor
+- clogpoll - changelog poller
+- clogproc - changelog process
+- clogro - changelog rollover
+- ctrcomp - change time recorder compaction
+- dhtdf - dht defrag task
+- dhtdg - dht defrag start
+- dhtfcnt - dht rebalance file counter
+- ecshd - ec heal daemon
+- epollN - epoll thread
+- fdlwrker - fdl worker
+- fusenoti - fuse notify
+- fuseproc - fuse main thread
+- gdhooks - glusterd hooks
+- glfspoll - gfapi poller thread
+- idxwrker - index worker
+- iosdump - io stats dump
+- iotwr - io thread worker
+- jbrflush - jbr flush
+- leasercl - lease recall
+- memsweep - sweeper thread for mem pools
+- nfsauth - nfs auth
+- nfsnsm - nfs nsm
+- nfsudp - nfs udp mount
+- nlmmon - nfs nlm/nsm mon
+- posixaio - posix aio
+- posixfsy - posix fsync
+- posixhc - posix heal
+- posixjan - posix janitor
+- posixrsv - posix reserve
+- quiesce - quiesce dequeue
+- rdmaAsyn - rdma async event handler
+- rdmaehan - rdma completion handler
+- rdmarcom - rdma receive completion handler
+- rdmascom - rdma send completion handler
+- rpcsvcrh - rpcsvc request handler
+- scleanup - socket cleanup
+- shdheal - self heal daemon
+- sigwait - glusterfsd sigwaiter
+- spoller - socket poller
+- sprocN - syncop worker thread
+- tbfclock - token bucket filter token generator thread
+- timer - timer thread
+- upreaper - upcall reaper
diff --git a/doc/developer-guide/translator-development.md b/doc/developer-guide/translator-development.md
new file mode 100644
index 00000000000..f75935519f6
--- /dev/null
+++ b/doc/developer-guide/translator-development.md
@@ -0,0 +1,683 @@
+Translator development
+======================
+
+Setting the Stage
+-----------------
+
+This is the first post in a series that will explain some of the details of
+writing a GlusterFS translator, using some actual code to illustrate.
+
+Before we begin, a word about environments. GlusterFS is over 300K lines of
+code spread across a few hundred files. That's no Linux kernel or anything, but
+ you're still going to be navigating through a lot of code in every
+code-editing session, so some kind of cross-referencing is *essential*. I use
+cscope with the vim bindings, and if I couldn't do Crtl+G and such to jump
+between definitions all the time my productivity would be cut in half. You may
+prefer different tools, but as I go through these examples you'll need
+something functionally similar to follow on. OK, on with the show.
+
+The first thing you need to know is that translators are not just bags of
+functions and variables. They need to have a very definite internal structure
+so that the translator-loading code can figure out where all the pieces are.
+The way it does this is to use dlsym to look for specific names within your
+shared-object file, as follow (from `xlator.c`):
+
+```
+if (!(xl->fops = dlsym (handle, "fops"))) {
+ gf_log ("xlator", GF_LOG_WARNING, "dlsym(fops) on %s",
+ dlerror ());
+ goto out;
+}
+
+if (!(xl->cbks = dlsym (handle, "cbks"))) {
+ gf_log ("xlator", GF_LOG_WARNING, "dlsym(cbks) on %s",
+ dlerror ());
+ goto out;
+}
+
+if (!(xl->init = dlsym (handle, "init"))) {
+ gf_log ("xlator", GF_LOG_WARNING, "dlsym(init) on %s",
+ dlerror ());
+ goto out;
+}
+
+if (!(xl->fini = dlsym (handle, "fini"))) {
+ gf_log ("xlator", GF_LOG_WARNING, "dlsym(fini) on %s",
+ dlerror ());
+ goto out;
+}
+```
+
+In this example, `xl` is a pointer to the in-memory object for the translator
+we're loading. As you can see, it's looking up various symbols *by name* in the
+ shared object it just loaded, and storing pointers to those symbols. Some of
+them (e.g. init) are functions, while others (e.g. fops) are dispatch tables
+containing pointers to many functions. Together, these make up the translator's
+ public interface.
+
+Most of this glue or boilerplate can easily be found at the bottom of one of
+the source files that make up each translator. We're going to use the `rot-13`
+translator just for fun, so in this case you'd look in `rot-13.c` to see this:
+
+```
+struct xlator_fops fops = {
+ .readv = rot13_readv,
+ .writev = rot13_writev
+};
+
+struct xlator_cbks cbks = {
+};
+
+struct volume_options options[] = {
+{ .key = {"encrypt-write"},
+ .type = GF_OPTION_TYPE_BOOL
+},
+{ .key = {"decrypt-read"},
+ .type = GF_OPTION_TYPE_BOOL
+},
+{ .key = {NULL} },
+};
+```
+
+The `fops` table, defined in `xlator.h`, is one of the most important pieces.
+This table contains a pointer to each of the filesystem functions that your
+translator might implement -- `open`, `read`, `stat`, `chmod`, and so on. There
+are 82 such functions in all, but don't worry; any that you don't specify here
+will be see as null and filled with defaults from `defaults.c` when your
+translator is loaded. In this particular example, since `rot-13` is an
+exceptionally simple translator, we only fill in two entries for `readv` and
+`writev`.
+
+There are actually two other tables, also required to have predefined names,
+that are also used to find translator functions: `cbks` (which is empty in this
+ snippet) and `dumpops` (which is missing entirely). The first of these specify
+ entry points for when inodes are forgotten or file descriptors are released.
+In other words, they're destructors for objects in which your translator might
+ have an interest. Mostly you can ignore them, because the default behavior
+handles even the simpler cases of translator-specific inode/fd context
+automatically. However, if the context you attach is a complex structure
+requiring complex cleanup, you'll need to supply these functions. As for
+dumpops, that's just used if you want to provide functions to pretty-print
+various structures in logs. I've never used it myself, though I probably
+should. What's noteworthy here is that we don't even define dumpops. That's
+because all of the functions that might use these dispatch functions will check
+ for `xl->dumpops` being `NULL` before calling through it. This is in sharp
+contrast to the behavior for `fops` and `cbks`, which *must* be present. If
+they're not, translator loading will fail because these pointers are not
+checked every time and if they're `NULL` then we'll segfault. That's why we
+provide an empty definition for cbks; it's OK for the individual function
+pointers to be NULL, but not for the whole table to be absent.
+
+The last piece I'll cover today is options. As you can see, this is a table of
+translator-specific option names and some information about their types.
+GlusterFS actually provides a pretty rich set of types (`volume_option_type_t`
+in `options.`h) which includes paths, translator names, percentages, and times
+in addition to the obvious integers and strings. Also, the `volume_option_t`
+structure can include information about alternate names, min/max/default
+values, enumerated string values, and descriptions. We don't see any of these
+here, so let's take a quick look at some more complex examples from afr.c and
+then come back to `rot-13`.
+
+```
+{ .key = {"data-self-heal-algorithm"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "",
+ .description = "Select between \"full\", \"diff\". The "
+ "\"full\" algorithm copies the entire file from "
+ "source to sink. The \"diff\" algorithm copies to "
+ "sink only those blocks whose checksums don't match "
+ "with those of source.",
+ .value = { "diff", "full", "" }
+},
+{ .key = {"data-self-heal-window-size"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 1024,
+ .default_value = "1",
+ .description = "Maximum number blocks per file for which "
+ "self-heal process would be applied simultaneously."
+},
+```
+
+When your translator is loaded, all of this information is used to parse the
+options actually provided in the volfile, and then the result is turned into a
+dictionary and stored as `xl->options`. This dictionary is then processed by
+your init function, which you can see being looked up in the first code
+fragment above. We're only going to look at a small part of the `rot-13`'s
+init for now.
+
+```
+priv->decrypt_read = 1;
+priv->encrypt_write = 1;
+
+data = dict_get (this->options, "encrypt-write");
+if (data) {
+ if (gf_string2boolean (data->data, &priv->encrypt_write
+ == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "encrypt-write takes only boolean options");
+ return -1;
+ }
+}
+```
+
+What we can see here is that we're setting some defaults in our priv structure,
+then looking to see if an `encrypt-write` option was actually provided. If so,
+we convert and store it. This is a pretty classic use of dict_get to fetch a
+field from a dictionary, and of using one of many conversion functions in
+`common-utils.c` to convert `data->data` into something we can use.
+
+So far we've covered the basic of how a translator gets loaded, how we find its
+various parts, and how we process its options. In my next Translator 101 post,
+we'll go a little deeper into other things that init and its companion fini
+might do, and how some other fields in our `xlator_t` structure (commonly
+referred to as this) are commonly used.
+
+`init`, `fini`, and private context
+-----------------------------------
+
+In the previous Translator 101 post, we looked at some of the dispatch tables
+and options processing in a translator. This time we're going to cover the rest
+ of the "shell" of a translator -- i.e. the other global parts not specific to
+handling a particular request.
+
+Let's start by looking at the relationship between a translator and its shared
+library. At a first approximation, this is the relationship between an object
+and a class in just about any object-oriented programming language. The class
+defines behaviors, but has to be instantiated as an object to have any kind of
+existence. In our case the object is an `xlator_t`. Several of these might be
+created within the same daemon, sharing all of the same code through init/fini
+and dispatch tables, but sharing *no data*. You could implement shared data (as
+ static variables in your shared libraries) but that's strongly discouraged.
+Every function in your shared library will get an `xlator_t` as an argument,
+and should use it. This lack of class-level data is one of the points where
+the analogy to common OOP systems starts to break down. Another place is the
+complete lack of inheritance. Translators inherit behavior (code) from exactly
+one shared library -- looked up and loaded using the `type` field in a volfile
+`volume ... end-volume` block -- and that's it -- not even single inheritance,
+no subclasses or superclasses, no mixins or prototypes, just the relationship
+between an object and its class. With that in mind, let's turn to the init
+function that we just barely touched on last time.
+
+```
+int32_t
+init (xlator_t *this)
+{
+ data_t *data = NULL;
+ rot_13_private_t *priv = NULL;
+
+ if (!this->children || this->children->next) {
+ gf_log ("rot13", GF_LOG_ERROR,
+ "FATAL: rot13 should have exactly one child");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ priv = GF_CALLOC (sizeof (rot_13_private_t), 1, 0);
+ if (!priv)
+ return -1;
+```
+
+At the very top, we see the function signature -- we get a pointer to the
+`xlator_t` object that we're initializing, and we return an `int32_t` status.
+As with most functions in the translator API, this should be zero to indicate
+success. In this case it's safe to return -1 for failure, but watch out: in
+dispatch-table functions, the return value means the status of the *function
+call* rather than the *request*. A request error should be reflected as a
+callback with a non-zero `op_re`t value, but the dispatch function itself
+should still return zero. In fact, the handling of a non-zero return from a
+dispatch function is not all that robust (we recently had a bug report in
+HekaFS related to this) so it's something you should probably avoid
+altogether. This only underscores the difference between dispatch functions
+and `init`/`fini` functions, where non-zero returns *are* expected and handled
+logically by aborting the translator setup. We can see that down at the
+bottom, where we return -1 to indicate that we couldn't allocate our
+private-data area (more about that later).
+
+The first thing this init function does is check that the translator is being
+set up in the right kind of environment. Translators are called by parents and
+in turn call children. Some translators are "initial" translators that inject
+requests into the system from elsewhere -- e.g. mount/fuse injecting requests
+from the kernel, protocol/server injecting requests from the network. Those
+translators don't need parents, but `rot-13` does and so we check for that.
+Similarly, some translators are "final" translators that (from the perspective
+of the current process) terminate requests instead of passing them on -- e.g.
+`protocol/client` passing them to another node, `storage/posix` passing them to
+a local filesystem. Other translators "multiplex" between multiple children --
+ passing each parent request on to one (`cluster/dht`), some
+(`cluster/stripe`), or all (`cluster/afr`) of those children. `rot-13` fits
+into none of those categories either, so it checks that it has *exactly one*
+child. It might be more convenient or robust if translator shared libraries
+had standard variables describing these requirements, to be checked in a
+consistent way by the translator-loading infrastructure itself instead of by
+each separate init function, but this is the way translators work today.
+
+The last thing we see in this fragment is allocating our private data area.
+This can literally be anything we want; the infrastructure just provides the
+priv pointer as a convenience but takes no responsibility for how it's used. In
+ this case we're using `GF_CALLOC` to allocate our own `rot_13_private_t`
+structure. This gets us all the benefits of GlusterFS's memory-leak detection
+infrastructure, but the way we're calling it is not quite ideal. For one thing,
+ the first two arguments -- from `calloc(3)` -- are kind of reversed. For
+another, notice how the last argument is zero. That can actually be an
+enumerated value, to tell the GlusterFS allocator *what* type we're
+allocating. This can be very useful information for memory profiling and leak
+detection, so it's recommended that you follow the example of any
+x`xx-mem-types.h` file elsewhere in the source tree instead of just passing
+zero here (even though that works).
+
+To finish our tour of standard initialization/termination, let's look at the
+end of `init` and the beginning of `fini`:
+
+```
+ this->private = priv;
+ gf_log ("rot13", GF_LOG_DEBUG, "rot13 xlator loaded");
+ return 0;
+}
+
+void
+fini (xlator_t *this)
+{
+ rot_13_private_t *priv = this->private;
+
+ if (!priv)
+ return;
+ this->private = NULL;
+ GF_FREE (priv);
+```
+
+At the end of init we're just storing our private-data pointer in the `priv`
+field of our `xlator_t`, then returning zero to indicate that initialization
+succeeded. As is usually the case, our fini is even simpler. All it really has
+to do is `GF_FREE` our private-data pointer, which we do in a slightly
+roundabout way here. Notice how we don't even have a return value here, since
+there's nothing obvious and useful that the infrastructure could do if `fini`
+failed.
+
+That's practically everything we need to know to get our translator through
+loading, initialization, options processing, and termination. If we had defined
+ no dispatch functions, we could actually configure a daemon to use our
+translator and it would work as a basic pass-through from its parent to a
+single child. In the next post I'll cover how to build the translator and
+configure a daemon to use it, so that we can actually step through it in a
+debugger and see how it all fits together before we actually start adding
+functionality.
+
+This Time For Real
+------------------
+
+In the first two parts of this series, we learned how to write a basic
+translator skeleton that can get through loading, initialization, and option
+processing. This time we'll cover how to build that translator, configure a
+volume to use it, and run the glusterfs daemon in debug mode.
+
+Unfortunately, there's not much direct support for writing new translators. You
+can check out a GlusterFS tree and splice in your own translator directory, but
+ that's a bit painful because you'll have to update multiple makefiles plus a
+bunch of autoconf garbage. As part of the HekaFS project, I basically reverse
+engineered the truly necessary parts of the translator-building process and
+then pestered one of the Fedora glusterfs package maintainers (thanks
+daMaestro!) to add a `glusterfs-devel` package with the required headers. Since
+ then the complexity level in the HekaFS tree has crept back up a bit, but I
+still remember the simple method and still consider it the easiest way to get
+started on a new translator. For the sake of those not using Fedora, I'm going
+to describe a method that doesn't depend on that header package. What it does
+depend on is a GlusterFS source tree, much as you might have cloned from GitHub
+ or the Gluster review site. This tree doesn't have to be fully built, but you
+do need to run `autogen.sh` and configure in it. Then you can take the
+following simple makefile and put it in a directory with your actual source.
+
+```
+# Change these to match your source code.
+TARGET = rot-13.so
+OBJECTS = rot-13.o
+
+# Change these to match your environment.
+GLFS_SRC = /srv/glusterfs
+GLFS_LIB = /usr/lib64
+HOST_OS = GF_LINUX_HOST_OS
+
+# You shouldn't need to change anything below here.
+
+CFLAGS = -fPIC -Wall -O0 -g \
+ -DHAVE_CONFIG_H -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE \
+ -D$(HOST_OS) -I$(GLFS_SRC) -I$(GLFS_SRC)/contrib/uuid \
+ -I$(GLFS_SRC)/libglusterfs/src
+LDFLAGS = -shared -nostartfiles -L$(GLFS_LIB)
+LIBS = -lglusterfs -lpthread
+
+$(TARGET): $(OBJECTS)
+ $(CC) $(OBJECTS) $(LDFLAGS) -o $(TARGET) $(OBJECTS) $(LIBS)
+```
+
+Yes, it's still Linux-specific. Mea culpa. As you can see, we're sticking with
+the `rot-13` example, so you can just copy the files from
+`xlators/encryption/rot-13/src` in your GlusterFS tree to follow on. Type
+`make` and you should be rewarded with a nice little `.so` file.
+
+```
+xlator_example$ ls -l rot-13.so
+-rwxr-xr-x. 1 jeff jeff 40784 Nov 16 16:41 rot-13.so
+```
+
+Notice that we've built with optimization level zero and debugging symbols
+included, which would not typically be the case for a packaged version of
+GlusterFS. Let's put our version of `rot-13.so` into a slightly different file
+on our system, so that it doesn't stomp on the installed version (not that
+you'd ever want to use that anyway).
+
+```
+xlator_example# ls /usr/lib64/glusterfs/3git/xlator/encryption/
+crypt.so crypt.so.0 crypt.so.0.0.0 rot-13.so rot-13.so.0
+rot-13.so.0.0.0
+xlator_example# cp rot-13.so \
+ /usr/lib64/glusterfs/3git/xlator/encryption/my-rot-13.so
+```
+
+These paths represent the current Gluster filesystem layout, which is likely to
+be deprecated in favor of the Fedora layout; your paths may vary. At this point
+ we're ready to configure a volume using our new translator. To do that, I'm
+going to suggest something that's strongly discouraged except during
+development (the Gluster guys are going to hate me for this): write our own
+volfile. Here's just about the simplest volfile you'll ever see.
+
+```
+volume my-posix
+ type storage/posix
+ option directory /srv/export
+end-volume
+
+volume my-rot13
+ type encryption/my-rot-13
+ subvolumes my-posix
+end-volume
+```
+
+All we have here is a basic brick using `/srv/export` for its data, and then
+an instance of our translator layered on top -- no client or server is
+necessary for what we're doing, and the system will automatically push a
+mount/fuse translator on top if there's no server translator. To try this out,
+all we need is the following command (assuming the directories involved already
+ exist).
+
+```
+xlator_example$ glusterfs --debug -f my.vol /srv/import
+```
+
+You should be rewarded with a whole lot of log output, including the text of
+the volfile (this is very useful for debugging problems in the field). If you
+go to another window on the same machine, you can see that you have a new
+filesystem mounted.
+
+```
+~$ df /srv/import
+Filesystem 1K-blocks Used Available Use% Mounted on
+/srv/xlator_example/my.vol
+ 114506240 2706176 105983488 3% /srv/import
+```
+
+Just for fun, write something into a file in `/srv/import`, then look at the
+corresponding file in `/srv/export` to see it all `rot-13`'ed for you.
+
+```
+~$ echo hello > /srv/import/a_file
+~$ cat /srv/export/a_file
+uryyb
+```
+
+There you have it -- functionality you control, implemented easily, layered on
+top of local storage. Now you could start adding functionality -- real
+encryption, perhaps -- and inevitably having to debug it. You could do that the
+ old-school way, with `gf_log` (preferred) or even plain old `printf`, or you
+could run daemons under `gdb` instead. Alternatively, you could wait for the
+next Translator 101 post, where we'll be doing exactly that.
+
+Debugging a Translator
+----------------------
+
+Now that we've learned what a translator looks like and how to build one, it's
+time to run one and actually watch it work. The best way to do this is good
+old-fashioned `gdb`, as follows (using some of the examples from last time).
+
+```
+xlator_example# gdb glusterfs
+GNU gdb (GDB) Red Hat Enterprise Linux (7.2-50.el6)
+...
+(gdb) r --debug -f my.vol /srv/import
+Starting program: /usr/sbin/glusterfs --debug -f my.vol /srv/import
+...
+[2011-11-23 11:23:16.495516] I [fuse-bridge.c:2971:fuse_init]
+ 0-glusterfs-fuse: FUSE inited with protocol versions:
+ glusterfs 7.13 kernel 7.13
+```
+
+If you get to this point, your glusterfs client process is already running. You
+can go to another window to see the mountpoint, do file operations, etc.
+
+```
+~# df /srv/import
+Filesystem 1K-blocks Used Available Use% Mounted on
+/root/xlator_example/my.vol
+ 114506240 2643968 106045568 3% /srv/import
+~# ls /srv/import
+a_file
+~# cat /srv/import/a_file
+hello
+```
+
+Now let's interrupt the process and see where we are.
+
+```
+
+Program received signal SIGINT, Interrupt.
+0x0000003a0060b3dc in pthread_cond_wait@@GLIBC_2.3.2 ()
+ from /lib64/libpthread.so.0
+(gdb) info threads
+ 5 Thread 0x7fffeffff700 (LWP 27206) 0x0000003a002dd8c7
+ in readv ()
+ from /lib64/libc.so.6
+ 4 Thread 0x7ffff50e3700 (LWP 27205) 0x0000003a0060b75b
+ in pthread_cond_timedwait@@GLIBC_2.3.2 ()
+ from /lib64/libpthread.so.0
+ 3 Thread 0x7ffff5f02700 (LWP 27204) 0x0000003a0060b3dc
+ in pthread_cond_wait@@GLIBC_2.3.2 ()
+ from /lib64/libpthread.so.0
+ 2 Thread 0x7ffff6903700 (LWP 27203) 0x0000003a0060f245
+ in sigwait ()
+ from /lib64/libpthread.so.0
+* 1 Thread 0x7ffff7957700 (LWP 27196) 0x0000003a0060b3dc
+ in pthread_cond_wait@@GLIBC_2.3.2 ()
+ from /lib64/libpthread.so.0
+```
+
+Like any non-toy server, this one has multiple threads. What are they all
+doing? Honestly, even I don't know. Thread 1 turns out to be in
+`event_dispatch_epoll`, which means it's the one handling all of our network
+I/O. Note that with socket multi-threading patch this will change, with one
+thread in `socket_poller` per connection. Thread 2 is in `glusterfs_sigwaiter`
+which means signals will be isolated to that thread. Thread 3 is in
+`syncenv_task`, so it's a worker process for synchronous requests such as
+those used by the rebalance and repair code. Thread 4 is in
+`janitor_get_next_fd`, so it's waiting for a chance to close no-longer-needed
+file descriptors on the local filesystem. (I admit I had to look that one up,
+BTW.) Lastly, thread 5 is in `fuse_thread_proc`, so it's the one fetching
+requests from our FUSE interface. You'll often see many more threads than
+this, but it's a pretty good basic set. Now, let's set a breakpoint so we can
+actually watch a request.
+
+```
+(gdb) b rot13_writev
+Breakpoint 1 at 0x7ffff50e4f0b: file rot-13.c, line 119.
+(gdb) c
+Continuing.
+```
+
+At this point we go into our other window and do something that will involve a write.
+
+```
+~# echo goodbye > /srv/import/another_file
+(back to the first window)
+[Switching to Thread 0x7fffeffff700 (LWP 27206)]
+
+Breakpoint 1, rot13_writev (frame=0x7ffff6e4402c, this=0x638440,
+ fd=0x7ffff409802c, vector=0x7fffe8000cd8, count=1, offset=0,
+ iobref=0x7fffe8001070) at rot-13.c:119
+119 rot_13_private_t *priv = (rot_13_private_t *)this->private;
+```
+
+Remember how we built with debugging symbols enabled and no optimization? That
+will be pretty important for the next few steps. As you can see, we're in
+`rot13_writev`, with several parameters.
+
+* `frame` is our always-present frame pointer for this request. Also,
+ `frame->local` will point to any local data we created and attached to the
+ request ourselves.
+* `this` is a pointer to our instance of the `rot-13` translator. You can examine
+ it if you like to see the name, type, options, parent/children, inode table,
+ and other stuff associated with it.
+* `fd` is a pointer to a file-descriptor *object* (`fd_t`, not just a
+ file-descriptor index which is what most people use "fd" for). This in turn
+ points to an inode object (`inode_t`) and we can associate our own
+ `rot-13`-specific data with either of these.
+* `vector` and `count` together describe the data buffers for this write, which
+ we'll get to in a moment.
+* `offset` is the offset into the file at which we're writing.
+* `iobref` is a buffer-reference object, which is used to track the life cycle
+ of buffers containing read/write data. If you look closely, you'll notice that
+ `vector[0].iov_base` points to the same address as `iobref->iobrefs[0].ptr`, which
+ should give you some idea of the inter-relationships between vector and iobref.
+
+OK, now what about that `vector`? We can use it to examine the data being
+written, like this.
+
+```
+(gdb) p vector[0]
+$2 = {iov_base = 0x7ffff7936000, iov_len = 8}
+(gdb) x/s 0x7ffff7936000
+0x7ffff7936000: "goodbye\n"
+```
+
+It's not always safe to view this data as a string, because it might just as
+well be binary data, but since we're generating the write this time it's safe
+and convenient. With that knowledge, let's step through things a bit.
+
+```
+(gdb) s
+120 if (priv->encrypt_write)
+(gdb)
+121 rot13_iovec (vector, count);
+(gdb)
+rot13_iovec (vector=0x7fffe8000cd8, count=1) at rot-13.c:57
+57 for (i = 0; i < count; i++) {
+(gdb)
+58 rot13 (vector[i].iov_base, vector[i].iov_len);
+(gdb)
+rot13 (buf=0x7ffff7936000 "goodbye\n", len=8) at rot-13.c:45
+45 for (i = 0; i < len; i++) {
+(gdb)
+46 if (buf[i] >= 'a' && buf[i] <= 'z')
+(gdb)
+47 buf[i] = 'a' + ((buf[i] - 'a' + 13) % 26);
+```
+
+Here we've stepped into `rot13_iovec`, which iterates through our vector
+calling `rot13`, which in turn iterates through the characters in that chunk
+doing the `rot-13` operation if/as appropriate. This is pretty straightforward
+stuff, so let's skip to the next interesting bit.
+
+```
+(gdb) fin
+Run till exit from #0 rot13 (buf=0x7ffff7936000 "goodbye\n",
+ len=8) at rot-13.c:47
+rot13_iovec (vector=0x7fffe8000cd8, count=1) at rot-13.c:57
+57 for (i = 0; i < count; i++) {
+(gdb) fin
+Run till exit from #0 rot13_iovec (vector=0x7fffe8000cd8,
+ count=1) at rot-13.c:57
+rot13_writev (frame=0x7ffff6e4402c, this=0x638440,
+ fd=0x7ffff409802c, vector=0x7fffe8000cd8, count=1,
+ offset=0, iobref=0x7fffe8001070) at rot-13.c:123
+123 STACK_WIND (frame,
+(gdb) b 129
+Breakpoint 2 at 0x7ffff50e4f35: file rot-13.c, line 129.
+(gdb) b rot13_writev_cbk
+Breakpoint 3 at 0x7ffff50e4db3: file rot-13.c, line 106.
+(gdb) c
+```
+
+So we've set breakpoints on both the callback and the statement following the
+`STACK_WIND`. Which one will we hit first?
+
+```
+Breakpoint 3, rot13_writev_cbk (frame=0x7ffff6e4402c,
+ cookie=0x7ffff6e440d8, this=0x638440, op_ret=8, op_errno=0,
+ prebuf=0x7fffefffeca0, postbuf=0x7fffefffec30)
+ at rot-13.c:106
+106 STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno,
+ prebuf, postbuf);
+(gdb) bt
+#0 rot13_writev_cbk (frame=0x7ffff6e4402c,
+ cookie=0x7ffff6e440d8, this=0x638440, op_ret=8, op_errno=0,
+ prebuf=0x7fffefffeca0, postbuf=0x7fffefffec30)
+ at rot-13.c:106
+#1 0x00007ffff52f1b37 in posix_writev (frame=0x7ffff6e440d8,
+ this=<value optimized out>, fd=<value optimized out>,
+ vector=<value optimized out>, count=1,
+ offset=<value optimized out>, iobref=0x7fffe8001070)
+ at posix.c:2217
+#2 0x00007ffff50e513e in rot13_writev (frame=0x7ffff6e4402c,
+ this=0x638440, fd=0x7ffff409802c, vector=0x7fffe8000cd8,
+ count=1, offset=0, iobref=0x7fffe8001070) at rot-13.c:123
+```
+
+Surprise! We're in `rot13_writev_cbk` now, called (indirectly) while we're
+still in `rot13_writev` before `STACK_WIND` returns (still at rot-13.c:123). If
+ you did any request cleanup here, then you need to be careful about what you
+do in the remainder of `rot13_writev` because data may have been freed etc.
+It's tempting to say you should just do the cleanup in `rot13_writev` after
+the `STACK_WIND,` but that's not valid because it's also possible that some
+other translator returned without calling `STACK_UNWIND` -- i.e. before
+`rot13_writev` is called, so then it would be the one getting null-pointer
+errors instead. To put it another way, the callback and the return from
+`STACK_WIND` can occur in either order or even simultaneously on different
+threads. Even if you were to use reference counts, you'd have to make sure to
+use locking or atomic operations to avoid races, and it's not worth it. Unless
+you *really* understand the possible flows of control and know what you're
+doing, it's better to do cleanup in the callback and nothing after
+`STACK_WIND.`
+
+At this point all that's left is a `STACK_UNWIND` and a return. The
+`STACK_UNWIND` invokes our parent's completion callback, and in this case our
+parent is FUSE so at that point the VFS layer is notified of the write being
+complete. Finally, we return through several levels of normal function calls
+until we come back to fuse_thread_proc, which waits for the next request.
+
+So that's it. For extra fun, you might want to repeat this exercise by stepping
+through some other call -- stat or setxattr might be good choices -- but you'll
+ have to use a translator that actually implements those calls to see much
+that's interesting. Then you'll pretty much know everything I knew when I
+started writing my first for-real translators, and probably even a bit more. I
+hope you've enjoyed this series, or at least found it useful, and if you have
+any suggestions for other topics I should cover please let me know (via
+comments or email, IRC or Twitter).
+
+Other versions
+--------------
+
+Original author's site:
+
+ * [Translator 101 - Setting the Stage](http://pl.atyp.us/hekafs.org/index.php/2011/11/translator-101-class-1-setting-the-stage/)
+
+ * [Translator 101 - Init, Fini and Private Context](http://pl.atyp.us/hekafs.org/index.php/2011/11/translator-101-lesson-2-init-fini-and-private-context/)
+
+ * [Translator 101 - This Time for Real](http://pl.atyp.us/hekafs.org/index.php/2011/11/translator-101-lesson-3-this-time-for-real/)
+
+ * [Translator 101 - Debugging a Translator](http://pl.atyp.us/hekafs.org/index.php/2011/11/translator-101-lesson-4-debugging-a-translator/)
+
+Gluster community site:
+
+ * [Translators](https://docs.gluster.org/en/latest/Quick-Start-Guide/Architecture/#translators)
diff --git a/doc/developer-guide/unittest.md b/doc/developer-guide/unittest.md
new file mode 100644
index 00000000000..5c6c0a8a039
--- /dev/null
+++ b/doc/developer-guide/unittest.md
@@ -0,0 +1,228 @@
+# Unit Tests in GlusterFS
+
+## Overview
+[Art-of-unittesting][definitionofunittest] provides a good definition for unit tests. A good unit test is:
+
+* Able to be fully automated
+* Has full control over all the pieces running (Use mocks or stubs to achieve this isolation when needed)
+* Can be run in any order if part of many other tests
+* Runs in memory (no DB or File access, for example)
+* Consistently returns the same result (You always run the same test, so no random numbers, for example. save those for integration or range tests)
+* Runs fast
+* Tests a single logical concept in the system
+* Readable
+* Maintainable
+* Trustworthy (when you see its result, you don’t need to debug the code just to be sure)
+
+## cmocka
+GlusterFS unit test framework is based on [cmocka][]. cmocka provides
+developers with methods to isolate and test modules written in C language. It
+also provides integration with Jenkins by providing JUnit XML compliant unit
+test results.
+
+cmocka
+
+## Running Unit Tests
+To execute the unit tests, all you need is to type `make check`. Here is a step-by-step example assuming you just cloned a GlusterFS tree:
+
+```
+$ ./autogen.sh
+$ ./configure --enable-debug
+$ make check
+```
+
+Sample output:
+
+```
+PASS: mem_pool_unittest
+============================================================================
+Testsuite summary for glusterfs 3git
+============================================================================
+# TOTAL: 1
+# PASS: 1
+# SKIP: 0
+# XFAIL: 0
+# FAIL: 0
+# XPASS: 0
+# ERROR: 0
+============================================================================
+```
+
+In this example, `mem_pool_unittest` has multiple tests inside, but `make check` assumes that the program itself is the test, and that is why it only shows one test. Here is the output when we run `mem_pool_unittest` directly:
+
+```
+$ ./libglusterfs/src/mem_pool_unittest
+[==========] Running 10 test(s).
+[ RUN ] test_gf_mem_acct_enable_set
+Expected assertion data != ((void *)0) occurred
+[ OK ] test_gf_mem_acct_enable_set
+[ RUN ] test_gf_mem_set_acct_info_asserts
+Expected assertion xl != ((void *)0) occurred
+Expected assertion size > ((4 + sizeof (size_t) + sizeof (xlator_t *) + 4 + 8) + 8) occurred
+Expected assertion type <= xl->mem_acct.num_types occurred
+[ OK ] test_gf_mem_set_acct_info_asserts
+[ RUN ] test_gf_mem_set_acct_info_memory
+[ OK ] test_gf_mem_set_acct_info_memory
+[ RUN ] test_gf_calloc_default_calloc
+[ OK ] test_gf_calloc_default_calloc
+[ RUN ] test_gf_calloc_mem_acct_enabled
+[ OK ] test_gf_calloc_mem_acct_enabled
+[ RUN ] test_gf_malloc_default_malloc
+[ OK ] test_gf_malloc_default_malloc
+[ RUN ] test_gf_malloc_mem_acct_enabled
+[ OK ] test_gf_malloc_mem_acct_enabled
+[ RUN ] test_gf_realloc_default_realloc
+[ OK ] test_gf_realloc_default_realloc
+[ RUN ] test_gf_realloc_mem_acct_enabled
+[ OK ] test_gf_realloc_mem_acct_enabled
+[ RUN ] test_gf_realloc_ptr
+Expected assertion ((void *)0) != ptr occurred
+[ OK ] test_gf_realloc_ptr
+[==========] 10 test(s) run.
+[ PASSED ] 10 test(s).
+[ FAILED ] 0 test(s).
+[ REPORT ] Created libglusterfs_mem_pool_xunit.xml report
+```
+
+
+## Writing Unit Tests
+
+### Enhancing your C functions
+
+#### Programming by Contract
+Add the following to your C file:
+
+```c
+#include <cmocka_pbc.h>
+```
+
+```c
+/*
+ * Programming by Contract is a programming methodology
+ * which binds the caller and the function called to a
+ * contract. The contract is represented using Hoare Triple:
+ * {P} C {Q}
+ * where {P} is the precondition before executing command C,
+ * and {Q} is the postcondition.
+ *
+ * See also:
+ * http://en.wikipedia.org/wiki/Design_by_contract
+ * http://en.wikipedia.org/wiki/Hoare_logic
+ * http://dlang.org/dbc.html
+ */
+ #ifndef CMOCKERY_PBC_H_
+#define CMOCKERY_PBC_H_
+
+#if defined(UNIT_TESTING) || defined (DEBUG)
+
+#include <assert.h>
+
+/*
+ * Checks caller responsibility against contract
+ */
+#define REQUIRE(cond) assert(cond)
+
+/*
+ * Checks function reponsability against contract.
+ */
+#define ENSURE(cond) assert(cond)
+
+/*
+ * While REQUIRE and ENSURE apply to functions, INVARIANT
+ * applies to classes/structs. It ensures that intances
+ * of the class/struct are consistent. In other words,
+ * that the instance has not been corrupted.
+ */
+#define INVARIANT(invariant_fnc) do{ (invariant_fnc) } while (0);
+
+#else
+#define REQUIRE(cond) do { } while (0);
+#define ENSURE(cond) do { } while (0);
+#define INVARIANT(invariant_fnc) do{ } while (0);
+
+#endif /* defined(UNIT_TESTING) || defined (DEBUG) */
+#endif /* CMOCKERY_PBC_H_ */
+```
+
+##### Example
+This is an _extremely_ simple example:
+
+```c
+int divide (int n, int d)
+{
+ int ans;
+
+ REQUIRE(d != 0);
+
+ ans = n / d;
+
+ // As code is added to this function throughout its lifetime,
+ // ENSURE will assert that data will be returned
+ // according to the contract. Again this is an
+ // extremely simple example. :-D
+ ENSURE( ans == (n / d) );
+
+ return ans;
+}
+
+```
+
+##### Important Note
+`REQUIRE`, `ENSURE`, and `INVARIANT` are only available when `DEBUG` or `UNIT_TESTING` are set in the CFLAGS. You must pass `--enable-debug` to `./configure` to enable PBC on your non-unittest builds.
+
+#### Overriding functions
+Cmockery2 provides its own memory allocation functions which check for buffer overrun and memory leaks. The following header file must be included **last** to be able to override any of the memory allocation functions:
+
+```c
+#include <cmocka.h>
+```
+
+This file will only take effect with the `UNIT_TESTING` CFLAG is set.
+
+### Creating a unit test
+Once you identify the C file you would like to test, first create a `unittest` directory under the directory where the C file is located. This will isolate the unittests to a different directory.
+
+Next, you need to edit the `Makefile.am` file in the directory where your C file is located. Initialize the
+`Makefile.am` if it does not already have the following sections:
+
+```
+#### UNIT TESTS #####
+CLEANFILES += *.gcda *.gcno *_xunit.xml
+noinst_PROGRAMS =
+TESTS =
+```
+
+Now you can add the following for each of the unit tests that you would like to build:
+
+```
+### UNIT TEST xxx_unittest ###
+xxx_unittest_CPPFLAGS = $(xxx_CPPFLAGS)
+xxx_unittest_SOURCES = xxx.c \
+ unittest/xxx_unittest.c
+xxx_unittest_CFLAGS = $(UNITTEST_CFLAGS)
+xxx_unittest_LDFLAGS = $(UNITTEST_LDFLAGS)
+noinst_PROGRAMS += xxx_unittest
+TESTS += xxx_unittest
+```
+
+Where `xxx` is the name of your C file. For example, look at `libglusterfs/src/Makefile.am`.
+
+Copy the simple unit test from the [cmocka API][cmockaapi] to `unittest/xxx_unittest.c`. If you would like to see an example of a unit test, please refer to `libglusterfs/src/unittest/mem_pool_unittest.c`.
+
+#### Mocking
+You may see that the linker will complain about missing functions needed by the C file you would like to test. Identify the required functions, then place their stubs in a file called `unittest/xxx_mock.c`, then include this file in `Makefile.am` in `xxx_unittest_SOURCES`. This will allow you to you Cmockery2's mocking functions.
+
+#### Running the unit test
+You can type `make` in the directory where the C file is located. Once you built it and there are no errors, you can execute the test either by directly executing the program (in our example above it is called `xxx_unittest` ), or by running `make check`.
+
+#### Debugging
+Sometimes you may need to debug your unit test. To do that, you will have to point `gdb` to the binary which is located in the same directory as the source. For example, you can do the following from the root of the source tree to debug `mem_pool_unittest`:
+
+```
+$ gdb libglusterfs/src/mem_pool_unittest
+```
+
+
+[cmocka]: https://cmocka.org
+[definitionofunittest]: http://artofunittesting.com/definition-of-a-unit-test/
+[cmockapi]: https://api.cmocka.org
diff --git a/doc/developer-guide/versioning.md b/doc/developer-guide/versioning.md
new file mode 100644
index 00000000000..10c1511b79b
--- /dev/null
+++ b/doc/developer-guide/versioning.md
@@ -0,0 +1,44 @@
+Versioning
+==========
+
+### current
+
+The number of the current interface exported by the library. A current value
+of '1', means that you are calling the interface exported by this library
+interface 1.
+
+### revision
+
+The implementation number of the most recent interface exported by this library.
+In this case, a revision value of `0` means that this is the first
+implementation of the interface.
+
+If the next release of this library exports the same interface, but has a
+different implementation (perhaps some bugs have been fixed), the revision
+number will be higher, but current number will be the same. In that case, when
+given a choice, the library with the highest revision will always be used by
+the runtime loader.
+
+### age
+
+The number of previous additional interfaces supported by this library. If age
+were '2', then this library can be linked into executables which were built with
+a release of this library that exported the current interface number, current,
+or any of the previous two interfaces. By definition age must be less than or
+equal to current. At the outset, only the first ever interface is implemented,
+so age can only be `0'.
+
+For every release of the library `-version-info` argument needs to be set
+correctly depending on any interface changes you have made.
+
+This is quite straightforward when you understand what the three numbers mean:
+
+If you have changed any of the sources for this library, the revision number
+must be incremented. This is a new revision of the current interface. If the
+interface has changed, then current must be incremented, and revision reset
+to '0'.
+
+This is the first revision of a new interface. If the new interface is a
+superset of the previous interface (that is, if the previous interface has not
+been broken by the changes in this new release), then age must be incremented.
+This release is backwards compatible with the previous release.
diff --git a/doc/developer-guide/write-behind.md b/doc/developer-guide/write-behind.md
new file mode 100644
index 00000000000..0d78964fa20
--- /dev/null
+++ b/doc/developer-guide/write-behind.md
@@ -0,0 +1,56 @@
+performance/write-behind translator
+===================================
+
+Basic working
+--------------
+
+Write behind is basically a translator to lie to the application that the
+write-requests are finished, even before it is actually finished.
+
+On a regular translator tree without write-behind, control flow is like this:
+
+1. application makes a `write()` system call.
+2. VFS ==> FUSE ==> `/dev/fuse`.
+3. fuse-bridge initiates a glusterfs `writev()` call.
+4. `writev()` is `STACK_WIND()`ed up to client-protocol or storage translator.
+5. client-protocol, on receiving reply from server, starts `STACK_UNWIND()` towards the fuse-bridge.
+
+On a translator tree with write-behind, control flow is like this:
+
+1. application makes a `write()` system call.
+2. VFS ==> FUSE ==> `/dev/fuse`.
+3. fuse-bridge initiates a glusterfs `writev()` call.
+4. `writev()` is `STACK_WIND()`ed up to write-behind translator.
+5. write-behind adds the write buffer to its internal queue and does a `STACK_UNWIND()` towards the fuse-bridge.
+
+write call is completed in application's percepective. after
+`STACK_UNWIND()`ing towards the fuse-bridge, write-behind initiates a fresh
+writev() call to its child translator, whose replies will be consumed by
+write-behind itself. Write-behind _doesn't_ cache the write buffer, unless
+`option flush-behind on` is specified in volume specification file.
+
+Windowing
+---------
+
+With respect to write-behind, each write-buffer has three flags: `stack_wound`, `write_behind` and `got_reply`.
+
+* `stack_wound`: if set, indicates that write-behind has initiated `STACK_WIND()` towards child translator.
+* `write_behind`: if set, indicates that write-behind has done `STACK_UNWIND()` towards fuse-bridge.
+* `got_reply`: if set, indicates that write-behind has received reply from child translator for a `writev()` `STACK_WIND()`. a request will be destroyed by write-behind only if this flag is set.
+
+Currently pending write requests = aggregate size of requests with write_behind = 1 and got_reply = 0.
+
+window size limits the aggregate size of currently pending write requests. once
+the pending requests' size has reached the window size, write-behind blocks
+writev() calls from fuse-bridge. Blocking is only from application's
+perspective. Write-behind does `STACK_WIND()` to child translator
+straight-away, but hold behind the `STACK_UNWIND()` towards fuse-bridge.
+`STACK_UNWIND()` is done only once write-behind gets enough replies to
+accommodate for currently blocked request.
+
+Flush behind
+------------
+
+If `option flush-behind on` is specified in volume specification file, then
+write-behind sends aggregate write requests to child translator, instead of
+regular per request `STACK_WIND()`s.
diff --git a/doc/developer-guide/writing-a-cloudsync-plugin.md b/doc/developer-guide/writing-a-cloudsync-plugin.md
new file mode 100644
index 00000000000..907860aaed8
--- /dev/null
+++ b/doc/developer-guide/writing-a-cloudsync-plugin.md
@@ -0,0 +1,164 @@
+## How to write your Cloudsync Plugin
+
+### Background
+
+Cloudsync translator is part of the archival feature in Gluster. This translator
+does the retrieval/download part. Each cold file will be archived to a remote
+storage (public or private cloud). On future access to the file, it will be
+retrieved from the remote storage by Cloudsync translator. Each remote storage
+would need a unique plugin. Cloudsync translator will load this plugin and
+call the necessary plugin functions.
+
+Upload can be done by a script or program. There are some basic mandatory steps
+for uploading the data. There is a sample script for crawl and upload given at
+the end of this guide.
+
+### Necessary changes to create a plugin
+
+1. Define store_methods:
+
+* This structure is the container of basic functions that will be called by
+ cloudsync xlator.
+
+ typedef struct store_methodds {
+ int (*fop_download) (call_frame_t *frame, void *config);
+ /* return type should be the store config */
+ void *(*fop_init) (xlator_t *this);
+ int (*fop_reconfigure) (xlator_t *this, dict_t *options);
+ void (*fop_fini) (void *config);
+ } store_methods_t;
+
+
+ Member details:
+ fop_download:
+ This is the download function pointer.
+
+ frame: This will have the fd to write data downloaded from
+ cloud to GlusterFS.(frame->local->fd)
+
+ config: This is the plugin configuration variable.
+
+ Note: Structure cs_local_t has member dlfd and dloffset which
+ can be used to manage the writes to Glusterfs.
+ Include cloudsync-common.h to access these structures.
+
+ fop_init:
+ This is similar to xlator init. But here the return value is
+ the plugin configuration pointer. This pointer will be stored
+ in the cloudsync private object (priv->stores->config). And
+ the cloudsync private object can be accessed by "this->private"
+ where "this" is of type xlator_t.
+
+ fop_reconfigure:
+ This is similar to xlator_reconfigure.
+
+ fop_fini:
+ Free plugin resources.
+
+ Note: Store_methods_t is part of cs_private_t which in turn part of
+ xlator_t. Create a store_methods_t object named "store_ops" in
+ your plugin. For example
+
+ store_methods_t store_ops = {
+ .fop_download = aws_download_s3,
+ .fop_init = aws_init,
+ .fop_reconfigure = aws_reconfigure,
+ .fop_fini = aws_fini,
+ };
+
+
+2 - Making Cloudsync xlator aware of the plugin:
+
+ Add an entry in to the cs_plugin structure. For example
+ struct cs_plugin plugins[] = {
+ {
+ .name = "amazons3",
+ .library = "libamazons3.so",
+ .description = "amazon s3 store."
+ },
+
+ {.name = NULL},
+ };
+
+ Description about individual members:
+ name: name of the plugin
+ library: This is the shared object created. Cloudsync will load
+ this library during init.
+ description: Describe about the plugin.
+
+3- Makefile Changes in Cloudsync:
+
+ Add <plugin.la> to cloudsync_la_LIBADD variable.
+
+4 - Configure.ac changes:
+
+ In cloudsync section add the necessary dependency checks for
+ the plugin.
+
+5 - Export symbols:
+
+ Cloudsync needs "store_ops" to resolve all plugin functions.
+ Create a file <plugin>.sym and add write "store_ops" to it.
+
+
+### Sample script for upload
+This script assumes amazon s3 is the target cloud and bucket name is
+gluster-bucket. User can do necessary aws configuration using command
+"aws configure". Currently for amazons3 there are four gluster settings
+available.
+1- features.s3plugin-seckey -> s3 secret key
+2- features.s3plugin-keyid -> s3 key id
+3- features.s3plugin-bucketid -> bucketid
+4- features.s3plugin-hostname -> hostname e.g. s3.amazonaws.com
+
+Additionally set cloudsync storetype to amazons3.
+
+gluster v set <VOLNAME> cloudsync-storetype amazons3
+
+Now create a mount dedicated for this upload task.
+
+That covers necessary configurations needed.
+
+Below is the sample script for upload. The script will crawl directly on the
+brick and will upload those files which are not modified for last one month.
+It needs two arguments.
+1st arguement - Gluster Brick path
+2nd arguement - coldness that is how many days since the file was modified.
+3rd argument - dedicated gluster mount point created for uploading.
+
+Once the cloud setup is done, run the following script on individual bricks.
+Note: For an AFR volume, pick only the fully synchronized brick among the
+replica bricks.
+
+```
+target_folder=$1
+coldness=$2
+mnt=$3
+
+cd $target_folder
+for i in `find . -type f | grep -v "glusterfs" | sed 's/..//'`
+do
+ echo "processing $mnt/$i"
+
+ #check whether the file is already archived
+ getfattr -n trusted.glusterfs.cs.remote $i &> /dev/null
+ if [ $? -eq 0 ]
+ then
+ echo "file $mnt/$i is already archived"
+ else
+ #upload to cloud
+ aws s3 cp $mnt/$i s3://gluster-bucket/
+ mtime=`stat -c "%Y" $mnt/$i`
+
+ #post processing of upload
+ setfattr -n trusted.glusterfs.csou.complete -v $mtime $mnt/$i
+ if [ $? -ne 0 ]
+ then
+ echo "archiving of file $mnt/$i failed"
+ else
+ echo "archiving of file $mnt/$i succeeded"
+ fi
+
+ fi
+done
+```
diff --git a/doc/developer-guide/xlator-classification.md b/doc/developer-guide/xlator-classification.md
new file mode 100644
index 00000000000..6073df9375f
--- /dev/null
+++ b/doc/developer-guide/xlator-classification.md
@@ -0,0 +1,221 @@
+# xlator categories and expectations
+
+The purpose of the document is to define a category for various xlators
+and expectations around what each category means from a perspective of
+health and maintenance of a xlator.
+
+The need to do this is to ensure certain categories are kept in good
+health, and helps the community and contributors focus their efforts around the
+same.
+
+This document also provides implementation details for xlator developers to
+declare a category for any xlator.
+
+## Table of contents
+1. Audience
+2. Categories (and expectations of each category)
+3. Implementation and usage details
+
+## Audience
+
+This document is intended for the following community participants,
+- New xlator contributors
+- Existing xlator maintainers
+- Packaging and gluster management stack maintainers
+
+For a more user facing understanding it is recommended to read section (TBD)
+in the gluster documentation.
+
+## Categories
+1. Experimental (E)
+2. TechPreview (TP)
+3. Maintained (M)
+4. Deprecated (D)
+5. Obsolete (O)
+
+### Experimental (E)
+
+Developed in the experimental branch, for exploring new features. These xlators
+are NEVER packaged as a part of releases, interested users and contributors can
+build and work with these from sources. In the future, these maybe available as
+an package based on a weekly build of the same.
+
+#### Quality expectations
+- Compiles or passes smoke tests
+- Does not break nightly experimental regressions
+ - NOTE: If a nightly is broken, then all patches that were merged are reverted
+ till the errant patch is found and subsequently fixed
+
+### TechPreview (TP)
+
+Xlators in master or release branches that are not deemed fit to be in
+production deployments, but are feature complete to invite feedback and host
+user data.
+
+These xlators will be worked upon with priority by maintainers/authors who are
+involved in making them more stable than xlators in the Experimental/Deprecated/
+Obsolete categories.
+
+There is no guarantee that these xlators will move to the Maintained state, and
+may just get Obsoleted based on feedback, or other project goals or technical
+alternatives.
+
+#### Quality expectations
+- Same as Maintained, minus
+ - Performance, Scale, other(?)
+ - *TBD* *NOTE* Need inputs, Intention is all quality goals as in Maintained,
+ other than the list above (which for now has scale and performance)
+
+### Maintained (M)
+
+These xltors are part of the core Gluster functionality and are maintained
+actively. These are part of master and release branches and are higher in
+priority of maintainers and other interested contributors.
+
+#### Quality expectations
+
+NOTE: A short note on what each of these mean are added here, details to follow.
+
+NOTE: Out of the gate all of the following are not mandated, consider the
+following a desirable state to reach as we progress on each
+
+- Bug backlog: Actively address bug backlog
+- Enhancement backlog: Actively maintain outstanding enhancement backlog (need
+ not be acted on, but should be visible to all)
+- Review backlog: Actively keep this below desired counts and states
+- Static code health: Actively meet near-zero issues in this regard
+ - Coverity, spellcheck and other checks
+- Runtime code health: Actively meet defined coverage levels in this regard
+ - Coverage, others?
+ - Per-patch regressions
+ - Glusto runs
+ - Performance
+ - Scalability
+- Technical specifications: Implementation details should be documented and
+ updated at regular cadence (even per patch that change assumptions in
+ here)
+- User documentation: User facing details should be maintained to current
+ status in the documentation
+- Debuggability: Steps, tools, procedures should be documented and maintained
+ each release/patch as applicable
+- Troubleshooting: Steps, tools, procedures should be documented and maintained
+ each release/patch as applicable
+ - Steps/guides for self service
+ - Knowledge base for problems
+- Other common criteria that will apply: Required metrics/desired states to be
+ defined per criteria
+ - Monitoring, usability, statedump, and other such xlator expectations
+
+### Deprecated (D)
+
+Xlators on master or release branches that would be obsoleted and/or replaced
+with similar or other functionality in the next major release.
+
+#### Quality expectations
+- Retain status-quo when moved to this state, till it is moved to obsoleted
+- Provide migration steps if feature provided by the xlator is replaced with
+other xlators
+
+### Obsolete (O)
+
+Xlator/code still in tree, but not packaged or shipped or maintained in any
+form. This is noted as a category till the code is removed from the tree.
+
+These xlators and their corresponding code and test health will not be executed.
+
+#### Quality expectations
+- None
+
+## Implementation and usage details
+
+### How to specify an xlators category
+
+While defining 'xlator_api_t' structure for the corresponding xlator, add a
+flag like below:
+
+```
+diff --git a/xlators/performance/nl-cache/src/nl-cache.c b/xlators/performance/nl-cache/src/nl-cache.c
+index 0f0e53bac2..8267d6897c 100644
+--- a/xlators/performance/nl-cache/src/nl-cache.c
++++ b/xlators/performance/nl-cache/src/nl-cache.c
+@@ -869,4 +869,5 @@ xlator_api_t xlator_api = {
+ .cbks = &nlc_cbks,
+ .options = nlc_options,
+ .identifier = "nl-cache",
++ .category = GF_TECH_PREVIEW,
+ };
+diff --git a/xlators/performance/quick-read/src/quick-read.c b/xlators/performance/quick-read/src/quick-read.c
+index 8d39720e7f..235de27c19 100644
+--- a/xlators/performance/quick-read/src/quick-read.c
++++ b/xlators/performance/quick-read/src/quick-read.c
+@@ -1702,4 +1702,5 @@ xlator_api_t xlator_api = {
+ .cbks = &qr_cbks,
+ .options = qr_options,
+ .identifier = "quick-read",
++ .category = GF_MAINTAINED,
+ };
+```
+
+Similarly, if a particular option is in different state other than
+the xlator state, one can add the same flag in options structure too.
+
+```
+diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
+index 0e86e33d03..81996743d1 100644
+--- a/xlators/cluster/afr/src/afr.c
++++ b/xlators/cluster/afr/src/afr.c
+@@ -772,6 +772,7 @@ struct volume_options options[] = {
+ .description = "Maximum latency for shd halo replication in msec."
+ },
+ { .key = {"halo-enabled"},
++ .category = GF_TECH_PREVIEW,
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "False",
+
+```
+
+
+### User experience using the categories
+
+#### Ability to use a category
+
+This section details which category of xlators can be used when and specifics
+around when each category is enabled.
+
+1. Maintained category xlators can be used by default, this implies, volumes
+created with these xlators enabled will throw no warnings, or need no user
+intervention to use the xlator.
+
+2. Tech Preview category xlators needs cluster configuration changes to allow
+these xlatorss to be used in volumes, further, logs will contain a message
+stating TP xlators are in use. Without the cluster configured to allow TP
+xlators, volumes created or edited to use such xlators would result in errors.
+ - (TBD) Cluster configuration option
+ - (TBD) Warning message
+ - (TBD) Code mechanics on how this is achieved
+
+3. Deprecated category xlators can be used by default, but will throw a warning
+in the logs that such are in use and will be deprecated in the future.
+ - (TBD) Warning message
+
+4. Obsolete category xlators will not be packaged and hence cannot be used from
+release builds.
+
+5. Experimental category xlators will not be packaged and hence cannot be used
+from release builds, if running experimental (weekly or other such) builds,
+these will throw a warning in the logs stating experimental xlators are in use.
+ - (TBD) Warning message
+
+#### Ability to query xlator category
+
+(TBD) Need to provide the ability to query xlator categories, or list xlators
+and their respective categories.
+
+#### User facing changes
+
+User facing changes that are expected due to this change include the following,
+- Cluster wide option to enable TP xlators, or more generically a category
+level of xlators
+- Errors in commands that fail due to invalid categories
+- Warning messages in logs to denote certain categories of xlators are in use
+- (TBD) Ability to query xlators and their respective categories
diff --git a/doc/examples/legacy/Makefile.am b/doc/examples/legacy/Makefile.am
deleted file mode 100644
index 49c9701efb3..00000000000
--- a/doc/examples/legacy/Makefile.am
+++ /dev/null
@@ -1,8 +0,0 @@
-EXTRA = README replicate.vol stripe.vol protocol-client.vol protocol-server.vol posix-locks.vol trash.vol write-behind.vol io-threads.vol io-cache.vol read-ahead.vol filter.vol trace.vol
-EXTRA_DIST = $(EXTRA)
-
-docdir = $(datadir)/doc/$(PACKAGE_NAME)
-Examplesdir = $(docdir)/examples
-Examples_DATA = $(EXTRA)
-
-CLEANFILES =
diff --git a/doc/examples/legacy/README b/doc/examples/legacy/README
deleted file mode 100644
index 73275157160..00000000000
--- a/doc/examples/legacy/README
+++ /dev/null
@@ -1,13 +0,0 @@
-GlusterFS's translator feature is very flexible and there are quite a lot of
-ways one can configure their filesystem to behave like.
-
-Volume Specification is a way in which GlusterFS understands how it has to work,
-based on what is written there.
-
-Going through the following URLs may give you more idea about all these.
-
-* http://www.gluster.org/docs/index.php/GlusterFS
-* http://www.gluster.org/docs/index.php/GlusterFS_Volume_Specification
-* http://www.gluster.org/docs/index.php/GlusterFS_Translators
-
-Mail us any doubts, suggestions on 'gluster-devel(at)nongnu.org'
diff --git a/doc/examples/legacy/filter.vol b/doc/examples/legacy/filter.vol
deleted file mode 100644
index 59bb23ecf2c..00000000000
--- a/doc/examples/legacy/filter.vol
+++ /dev/null
@@ -1,23 +0,0 @@
-volume client
- type protocol/client
- option transport-type tcp # for TCP/IP transport
- option remote-host 192.168.1.10 # IP address of the remote brick
- option remote-subvolume brick # name of the remote volume
-end-volume
-
-## In normal clustered storage type, any of the cluster translators can come here.
-#
-# Definition of other clients
-#
-# Definition of cluster translator (may be unify, afr, or unify over afr)
-#
-
-### 'Filter' translator is used on client side (or server side according to needs). This traslator makes all the below translators, (or say volumes) as read-only. Hence if one wants a 'read-only' filesystem, using filter as the top most volume will make it really fast as the fops are returned from this level itself.
-
-volume filter-ro
- type features/filter
- option root-squashing enable
-# option completely-read-only yes
-# translate-uid 1-99=0
- subvolumes client
-end-volume
diff --git a/doc/examples/legacy/io-cache.vol b/doc/examples/legacy/io-cache.vol
deleted file mode 100644
index a71745017a2..00000000000
--- a/doc/examples/legacy/io-cache.vol
+++ /dev/null
@@ -1,31 +0,0 @@
-volume client
- type protocol/client
- option transport-type tcp # for TCP/IP transport
- option remote-host 192.168.1.10 # IP address of the remote brick
- option remote-subvolume brick # name of the remote volume
-end-volume
-
-## In normal clustered storage type, any of the cluster translators can come
-# here.
-#
-# Definition of other clients
-#
-# Definition of cluster translator (may be distribute, replicate, or distribute
-# over replicate)
-#
-
-### 'IO-Cache' translator is best used on client side when a filesystem has file
-# which are not modified frequently but read several times. For example, while
-# compiling a kernel, *.h files are read while compiling every *.c file, in
-# these case, io-cache translator comes very handy, as it keeps the whole file
-# content in the cache, and serves from the cache.
-# One can provide the priority of the cache too.
-
-volume ioc
- type performance/io-cache
- subvolumes client # In this example it is 'client' you may have to
- # change it according to your spec file.
- option cache-size 64MB # 32MB is default
- option force-revalidate-timeout 5 # 1second is default
- option priority *.html:2,*:1 # default is *:0
-end-volume
diff --git a/doc/examples/legacy/io-threads.vol b/doc/examples/legacy/io-threads.vol
deleted file mode 100644
index 236f5b8b188..00000000000
--- a/doc/examples/legacy/io-threads.vol
+++ /dev/null
@@ -1,22 +0,0 @@
-volume brick
- type storage/posix # POSIX FS translator
- option directory /home/export # Export this directory
-end-volume
-
-### 'IO-threads' translator gives a threading behaviour to File I/O calls. All
-# other normal fops are having default behaviour. Loading this on server side
-# helps to reduce the contension of network. (Which is assumed as a GlusterFS
-# hang).
-
-volume iot
- type performance/io-threads
- subvolumes brick
- option thread-count 4 # default value is 1
-end-volume
-
-volume server
- type protocol/server
- subvolumes iot
- option transport-type tcp # For TCP/IP transport
- option auth.addr.iot.allow 192.168.*
-end-volume
diff --git a/doc/examples/legacy/posix-locks.vol b/doc/examples/legacy/posix-locks.vol
deleted file mode 100644
index 673afa3f8ae..00000000000
--- a/doc/examples/legacy/posix-locks.vol
+++ /dev/null
@@ -1,19 +0,0 @@
-volume brick
- type storage/posix # POSIX FS translator
- option directory /home/export # Export this directory
-end-volume
-
-# 'Posix-locks' feature should be added on the server side.
-
-volume p-locks
- type features/posix-locks
- subvolumes brick
- option mandatory on
-end-volume
-
-volume server
- type protocol/server
- subvolumes p-locks
- option transport-type tcp
- option auth.addr.p-locks.allow 192.168.* # Allow access to "p-locks" volume
-end-volume
diff --git a/doc/examples/legacy/protocol-client.vol b/doc/examples/legacy/protocol-client.vol
deleted file mode 100644
index c34ef790d0e..00000000000
--- a/doc/examples/legacy/protocol-client.vol
+++ /dev/null
@@ -1,12 +0,0 @@
-volume client
- type protocol/client
- option transport-type tcp # for TCP/IP transport
- option remote-host 192.168.1.10 # IP address of the remote brick
-# option transport.socket.remote-port 24016
-
-# option transport-type rdma # for Infiniband verbs transport
-# option transport.rdma.work-request-send-count 16
-# option transport.rdma.work-request-recv-count 16
-# option transport.rdma.remote-port 24016
- option remote-subvolume brick # name of the remote volume
-end-volume
diff --git a/doc/examples/legacy/protocol-server.vol b/doc/examples/legacy/protocol-server.vol
deleted file mode 100644
index 195e4965782..00000000000
--- a/doc/examples/legacy/protocol-server.vol
+++ /dev/null
@@ -1,21 +0,0 @@
-### Export volume "brick" with the contents of "/home/export" directory.
-volume brick
- type storage/posix # POSIX FS translator
- option directory /home/export # Export this directory
-end-volume
-
-### Add network serving capability to above brick.
-volume server
- type protocol/server
- option transport-type tcp # For TCP/IP transport
-# option transport.socket.listen-port 24016
-
-# option transport-type rdma
-# option transport.rdma.work-request-send-count 64
-# option transport.rdma.work-request-recv-count 64
-# option transport.rdma.listen-port 24016
-
-# option bind-address 192.168.1.10 # Default is to listen on all interfaces
- subvolumes brick
- option auth.addr.brick.allow 192.168.* # Allow access to "brick" volume
-end-volume
diff --git a/doc/examples/legacy/read-ahead.vol b/doc/examples/legacy/read-ahead.vol
deleted file mode 100644
index 9e4dba55627..00000000000
--- a/doc/examples/legacy/read-ahead.vol
+++ /dev/null
@@ -1,24 +0,0 @@
-volume client
- type protocol/client
- option transport-type tcp # for TCP/IP transport
- option remote-host 192.168.1.10 # IP address of the remote brick
- option remote-subvolume brick # name of the remote volume
-end-volume
-
-## In normal clustered storage type, any of the cluster translators can come here.
-#
-# Definition of other clients
-#
-# Definition of cluster translator (may be distribute, replicate, or distribute
-# over replicate)
-#
-
-# 'Read-Ahead' translator is best utilized on client side, as it prefetches
-# the file contents when the first read() call is issued.
-
-volume ra
- type performance/read-ahead
- subvolumes client
- option page-count 4 # default is 2
- option force-atime-update no # defalut is 'no'
-end-volume
diff --git a/doc/examples/legacy/replicate.vol b/doc/examples/legacy/replicate.vol
deleted file mode 100644
index 10626d46f05..00000000000
--- a/doc/examples/legacy/replicate.vol
+++ /dev/null
@@ -1,118 +0,0 @@
-### 'NOTE'
-# This file has both server spec and client spec to get an understanding of
-# replicate spec file. Hence can't be used as it is, as a GlusterFS spec file.
-# One need to seperate out server spec and client spec to get it working.
-
-#=========================================================================
-
-# **** server1 spec file ****
-
-### Export volume "brick" with the contents of "/home/export" directory.
-volume posix1
- type storage/posix # POSIX FS translator
- option directory /home/export1 # Export this directory
-end-volume
-
-### Add POSIX record locking support to the storage brick
-volume brick1
- type features/posix-locks
- option mandatory on # enables mandatory locking on all files
- subvolumes posix1
-end-volume
-
-### Add network serving capability to above brick.
-volume server
- type protocol/server
- option transport-type tcp # For TCP/IP transport
- option transport.socket.listen-port 24016
- subvolumes brick1
- option auth.addr.brick1.allow * # access to "brick" volume
-end-volume
-
-
-#=========================================================================
-
-# **** server2 spec file ****
-volume posix2
- type storage/posix # POSIX FS translator
- option directory /home/export2 # Export this directory
-end-volume
-
-### Add POSIX record locking support to the storage brick
-volume brick2
- type features/posix-locks
- option mandatory on # enables mandatory locking on all files
- subvolumes posix2
-end-volume
-
-### Add network serving capability to above brick.
-volume server
- type protocol/server
- option transport-type tcp # For TCP/IP transport
- option transport.socket.listen-port 24017
- subvolumes brick2
- option auth.addr.brick2.allow * # Allow access to "brick" volume
-end-volume
-
-
-#=========================================================================
-
-# **** server3 spec file ****
-
-volume posix3
- type storage/posix # POSIX FS translator
- option directory /home/export3 # Export this directory
-end-volume
-
-### Add POSIX record locking support to the storage brick
-volume brick3
- type features/posix-locks
- option mandatory on # enables mandatory locking on all files
- subvolumes posix3
-end-volume
-
-### Add network serving capability to above brick.
-volume server
- type protocol/server
- option transport-type tcp # For TCP/IP transport
- option transport.socket.listen-port 24018
- subvolumes brick3
- option auth.addr.brick3.allow * # access to "brick" volume
-end-volume
-
-
-#=========================================================================
-
-# **** Clustered Client config file ****
-
-### Add client feature and attach to remote subvolume of server1
-volume client1
- type protocol/client
- option transport-type tcp # for TCP/IP transport
- option remote-host 127.0.0.1 # IP address of the remote brick
- option transport.socket.remote-port 24016
- option remote-subvolume brick1 # name of the remote volume
-end-volume
-
-### Add client feature and attach to remote subvolume of server2
-volume client2
- type protocol/client
- option transport-type tcp # for TCP/IP transport
- option remote-host 127.0.0.1 # IP address of the remote brick
- option transport.socket.remote-port 24017
- option remote-subvolume brick2 # name of the remote volume
-end-volume
-
-volume client3
- type protocol/client
- option transport-type tcp # for TCP/IP transport
- option remote-host 127.0.0.1 # IP address of the remote brick
- option transport.socket.remote-port 24018
- option remote-subvolume brick3 # name of the remote volume
-end-volume
-
-## Add replicate feature.
-volume replicate
- type cluster/replicate
- subvolumes client1 client2 client3
-end-volume
diff --git a/doc/examples/legacy/stripe.vol b/doc/examples/legacy/stripe.vol
deleted file mode 100644
index 9524e8198bf..00000000000
--- a/doc/examples/legacy/stripe.vol
+++ /dev/null
@@ -1,120 +0,0 @@
-
-### 'NOTE'
-# This file has both server spec and client spec to get an understanding of
-# stripe's spec file. Hence can't be used as it is, as a GlusterFS spec file.
-# One need to seperate out server spec and client spec to get it working.
-
-#=========================================================================
-
-# **** server1 spec file ****
-
-### Export volume "brick" with the contents of "/home/export" directory.
-volume posix1
- type storage/posix # POSIX FS translator
- option directory /home/export1 # Export this directory
-end-volume
-
-### Add POSIX record locking support to the storage brick
-volume brick1
- type features/posix-locks
- option mandatory on # enables mandatory locking on all files
- subvolumes posix1
-end-volume
-
-### Add network serving capability to above brick.
-volume server
- type protocol/server
- option transport-type tcp # For TCP/IP transport
- option transport.socket.listen-port 24016
- subvolumes brick1
- option auth.addr.brick1.allow * # access to "brick" volume
-end-volume
-
-
-#=========================================================================
-
-# **** server2 spec file ****
-volume posix2
- type storage/posix # POSIX FS translator
- option directory /home/export2 # Export this directory
-end-volume
-
-### Add POSIX record locking support to the storage brick
-volume brick2
- type features/posix-locks
- option mandatory on # enables mandatory locking on all files
- subvolumes posix2
-end-volume
-
-### Add network serving capability to above brick.
-volume server
- type protocol/server
- option transport-type tcp # For TCP/IP transport
- option transport.socket.listen-port 24017
- subvolumes brick2
- option auth.addr.brick2.allow * # Allow access to "brick" volume
-end-volume
-
-
-#=========================================================================
-
-# **** server3 spec file ****
-
-volume posix3
- type storage/posix # POSIX FS translator
- option directory /home/export3 # Export this directory
-end-volume
-
-### Add POSIX record locking support to the storage brick
-volume brick3
- type features/posix-locks
- option mandatory on # enables mandatory locking on all files
- subvolumes posix3
-end-volume
-
-### Add network serving capability to above brick.
-volume server
- type protocol/server
- option transport-type tcp # For TCP/IP transport
- option transport.socket.listen-port 24018
- subvolumes brick3
- option auth.addr.brick3.allow * # access to "brick" volume
-end-volume
-
-
-#=========================================================================
-
-# **** Clustered Client config file ****
-
-### Add client feature and attach to remote subvolume of server1
-volume client1
- type protocol/client
- option transport-type tcp # for TCP/IP transport
- option remote-host 127.0.0.1 # IP address of the remote brick
- option transport.socket.remote-port 24016
- option remote-subvolume brick1 # name of the remote volume
-end-volume
-
-### Add client feature and attach to remote subvolume of server2
-volume client2
- type protocol/client
- option transport-type tcp # for TCP/IP transport
- option remote-host 127.0.0.1 # IP address of the remote brick
- option transport.socket.remote-port 24017
- option remote-subvolume brick2 # name of the remote volume
-end-volume
-
-volume client3
- type protocol/client
- option transport-type tcp # for TCP/IP transport
- option remote-host 127.0.0.1 # IP address of the remote brick
- option transport.socket.remote-port 24018
- option remote-subvolume brick3 # name of the remote volume
-end-volume
-
-## Add Stripe Feature.
-volume stripe
- type cluster/stripe
- subvolumes client1 client2 client3
- option block-size 1MB
-end-volume
diff --git a/doc/examples/legacy/trace.vol b/doc/examples/legacy/trace.vol
deleted file mode 100644
index 59830f26a9d..00000000000
--- a/doc/examples/legacy/trace.vol
+++ /dev/null
@@ -1,21 +0,0 @@
-volume client
- type protocol/client
- option transport-type tcp # for TCP/IP transport
- option remote-host 192.168.1.10 # IP address of the remote brick
- option remote-subvolume brick # name of the remote volume
-end-volume
-
-### 'Trace' translator is a very handy debug tool for GlusterFS, as it can be
-# loaded between any of the two volumes without changing the behaviour of the
-# filesystem.
-# On client side it can be the top most volume in spec (like now) to understand
-# what calls are made on FUSE filesystem, when a mounted filesystem is
-# accessed.
-
-volume trace
- type debug/trace
- subvolumes client
-end-volume
-
-# 'NOTE:' By loading 'debug/trace' translator, filesystem will be very slow as
-# it logs each and every calls to the log file.
diff --git a/doc/examples/legacy/trash.vol b/doc/examples/legacy/trash.vol
deleted file mode 100644
index 3fcf315af08..00000000000
--- a/doc/examples/legacy/trash.vol
+++ /dev/null
@@ -1,20 +0,0 @@
-volume brick
- type storage/posix # POSIX FS translator
- option directory /home/export # Export this directory
-end-volume
-
-### 'Trash' translator is best used on server side as it just renames the
-# deleted file inside 'trash-dir', and it makes 4 seperate fops for one unlink
-# call.
-volume trashcan
- type features/trash
- subvolumes brick
- option trash-dir /.trashcan
-end-volume
-
-volume server
- type protocol/server
- subvolumes trashcan
- option transport-type tcp # For TCP/IP transport
- option auth.addr.brick.allow 192.168.* # Allow access to "brick" volume
-end-volume
diff --git a/doc/examples/legacy/write-behind.vol b/doc/examples/legacy/write-behind.vol
deleted file mode 100644
index 2b5ed413921..00000000000
--- a/doc/examples/legacy/write-behind.vol
+++ /dev/null
@@ -1,27 +0,0 @@
-volume client
- type protocol/client
- option transport-type tcp # for TCP/IP transport
- option remote-host 192.168.1.10 # IP address of the remote brick
- option remote-subvolume brick # name of the remote volume
-end-volume
-
-## In normal clustered storage type, any of the cluster translators can come here.
-#
-# Definition of other clients
-#
-# Definition of cluster translator (may be unify, replicate, or unify over replicate)
-#
-
-
-# 'Write-behind' translator is a performance booster for write operation. Best
-# used on client side, as its main intension is to reduce the network latency
-# caused for each write operation.
-
-volume wb
- type performance/write-behind
- subvolumes client
- option flush-behind on # default value is 'off'
- option window-size 2MB
- option enable-O_SYNC no # default is no
- option disable-for-first-nbytes 128KB #default is 1
-end-volume
diff --git a/doc/features/ctime.md b/doc/features/ctime.md
new file mode 100644
index 00000000000..74a77abed4b
--- /dev/null
+++ b/doc/features/ctime.md
@@ -0,0 +1,68 @@
+# Consistent time attributes in gluster across replica/distribute
+
+
+#### Problem:
+Traditionally gluster has been using time attributes (ctime, atime, mtime) of files/dirs from bricks. The problem with this approach is that, it is not consisteant across replica and distribute bricks. And applications which depend on it breaks as replica might not always return time attributes from same brick.
+
+Tar especially gives "file changed as we read it" whenever it detects ctime differences when stat is served from different bricks. The way we have been trying to solve it is to serve the stat structures from same brick in afr, max-time in dht. But it doesn't avoid the problem completely. Because there is no way to change ctime at the moment(lutimes() only allows mtime, atime), there is little we can do to make sure ctimes match after self-heals/xattr updates/rebalance.
+
+#### Solution Proposed:
+Store time attribues (ctime, mtime, atime) as an xattr of the file. The xattr is updated based
+on the fop. If a filesystem fop changes only mtime and ctime, update only those in xattr for
+that file.
+
+#### Design Overview:
+1) As part of each fop, top layer will generate a time stamp and pass it to the down along
+ with other information
+ - This will bring a dependency for NTP synced clients along with servers
+ - There can be a diff in time if the fop stuck in the xlator for various reason,
+for ex: because of locks.
+
+ 2) On the server, posix layer stores the value in the memory (inode ctx) and will sync the data periodically to the disk as an extended attr
+ - Of course sync call also will force it. And fop comes for an inode which is not linked, we do the sync immediately.
+
+ 3) Each time when inodes are created or initialized it read the data from disk and store in inode ctx.
+
+ 4) Before setting to inode_ctx we compare the timestamp stored and the timestamp received, and only store if the stored value is lesser than the current value.
+
+ 5) So in best case data will be stored and retrieved from the memory. We replace the values in iatt with the values in inode_ctx.
+
+ 6) File ops that changes the parent directory attr time need to be consistent across all the distributed directories across the subvolumes. (for eg: a create call will change ctime and mtime of parent dir)
+
+ - This has to handle separately because we only send the fop to the hashed subvolume.
+ - We can asynchronously send the timeupdate setattr fop to the other subvoumes and change the values for parent directory if the file fops is successful on hashed subvolume.
+ - This will have a window where the times are inconsistent across dht subvolume (Please provide your suggestions)
+
+7) Currently we have couple of mount options for time attributes like noatime, relatime , nodiratime etc. But we are not explicitly handled those options even if it is given as mount option when gluster mount.
+
+
+#### Implementation Overview:
+This features involves changes in following xlators.
+ - utime xlator
+ - posix xlator
+
+##### utime xlator:
+This is a new client side xlator which does following tasks.
+
+1. It will generate a time stamp and passes it down in frame->root->ctime and over the network.
+2. Based on fop, it also decides the time attributes to be updated and this passed using "frame->root->flags"
+
+ Patches:
+ 1. https://review.gluster.org/#/c/19857/
+
+##### posix xlator:
+Following tasks are done in posix xlator:
+
+1. Provides APIs to set and get the xattr from backend. It also caches the xattr in inode context. During get, it updates time attributes stored in xattr into iatt structure.
+2. Based on the flags from utime xlator, relevant fops update the time attributes in the xattr.
+
+ Patches:
+ 1. https://review.gluster.org/#/c/19267/
+ 2. https://review.gluster.org/#/c/19795/
+ 3. https://review.gluster.org/#/c/19796/
+
+#### Pending Work:
+1. Handling of time related mount options (noatime, realatime,etc)
+2. flag based create (depending on flags in open, create behaviour might change)
+3. Changes in dht for direcotory sync acrosss multiple subvolumes
+4. readdirp stat need to be worked on.
diff --git a/doc/features/ganesha-ha.md b/doc/features/ganesha-ha.md
new file mode 100644
index 00000000000..4b226a22ccf
--- /dev/null
+++ b/doc/features/ganesha-ha.md
@@ -0,0 +1,43 @@
+# Overview of Ganesha HA Resource Agents in GlusterFS 3.7
+
+The ganesha_mon RA monitors its ganesha.nfsd daemon. While the
+daemon is running, it creates two attributes: ganesha-active and
+grace-active. When the daemon stops for any reason, the attributes
+are deleted. Deleting the ganesha-active attribute triggers the
+failover of the virtual IP (the IPaddr RA) to another node —
+according to constraint location rules — where ganesha.nfsd is
+still running.
+
+The ganesha_grace RA monitors the grace-active attribute. When
+the grace-active attibute is deleted, the ganesha_grace RA stops,
+and will not restart. This triggers pacemaker to invoke the notify
+action in the ganesha_grace RAs on the other nodes in the cluster;
+which send a DBUS message to their respective ganesha.nfsd.
+
+(N.B. grace-active is a bit of a misnomer. while the grace-active
+attribute exists, everything is normal and healthy. Deleting the
+attribute triggers putting the surviving ganesha.nfsds into GRACE.)
+
+To ensure that the remaining/surviving ganesha.nfsds are put into
+ NFS-GRACE before the IPaddr (virtual IP) fails over there is a
+short delay (sleep) between deleting the grace-active attribute
+and the ganesha-active attribute. To summarize, e.g. in a four
+node cluster:
+
+1. on node 2 ganesha_mon::monitor notices that ganesha.nfsd has died
+
+2. on node 2 ganesha_mon::monitor deletes its grace-active attribute
+
+3. on node 2 ganesha_grace::monitor notices that grace-active is gone
+and returns OCF_ERR_GENERIC, a.k.a. new error. When pacemaker tries
+to (re)start ganesha_grace, its start action will return
+OCF_NOT_RUNNING, a.k.a. known error, don't attempt further restarts.
+
+4. on nodes 1, 3, and 4, ganesha_grace::notify receives a post-stop
+notification indicating that node 2 is gone, and sends a DBUS message
+to its ganesha.nfsd, putting it into NFS-GRACE.
+
+5. on node 2 ganesha_mon::monitor waits a short period, then deletes
+its ganesha-active attribute. This triggers the IPaddr (virt IP)
+failover according to constraint location rules.
+
diff --git a/doc/gluster.8 b/doc/gluster.8
index 87c81208175..ba595edca15 100644
--- a/doc/gluster.8
+++ b/doc/gluster.8
@@ -1,18 +1,11 @@
-.\" Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+
+.\" Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
.\" This file is part of GlusterFS.
.\"
-.\" GlusterFS is free software; you can redistribute it and/or modify
-.\" it under the terms of the GNU General Public License as published
-.\" by the Free Software Foundation; either version 3 of the License,
-.\" or (at your option) any later version.
-.\"
-.\" GlusterFS is distributed in the hope that it will be useful, but
-.\" WITHOUT ANY WARRANTY; without even the implied warranty of
-.\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-.\" General Public License for more details.
-.\"
-.\" You should have received a copy of the GNU General Public License
-.\" along with this program. If not, see " <http://www.gnu.org/licenses/>.
+.\" This file is licensed to you under your choice of the GNU Lesser
+.\" General Public License, version 3 or any later version (LGPLv3 or
+.\" later), or the GNU General Public License, version 2 (GPLv2), in all
+.\" cases as published by the Free Software Foundation.
.\"
.\"
.TH Gluster 8 "Gluster command line utility" "07 March 2011" "Gluster Inc."
@@ -23,15 +16,14 @@ gluster - Gluster Console Manager (command line utility)
.PP
To run the program and display gluster prompt:
.PP
-.B gluster
+.B gluster [--remote-host=<gluster_node>] [--mode=script] [--xml]
.PP
(or)
.PP
To specify a command directly:
.PP
.B gluster
-.I [commands] [options]
-
+.I [commands] [options] [--remote-host=<gluster_node>] [--mode=script] [--xml]
.SH DESCRIPTION
The Gluster Console Manager is a command line utility for elastic volume management. You can run the gluster command on any export server. The command enables administrators to perform cloud operations, such as creating, expanding, shrinking, rebalancing, and migrating volumes without needing to schedule server downtime.
.SH COMMANDS
@@ -43,7 +35,13 @@ The Gluster Console Manager is a command line utility for elastic volume managem
\fB\ volume info [all|<VOLNAME>] \fR
Display information about all volumes, or the specified volume.
.TP
-\fB\ volume create <NEW-VOLNAME> [stripe <COUNT>] [replica <COUNT>] [transport <tcp|rdma|tcp,rdma>] <NEW-BRICK> ... \fR
+\fB\ volume list \fR
+List all volumes in cluster
+.TP
+\fB\ volume status [all | <VOLNAME> [nfs|shd|<BRICK>|quotad]] [detail|clients|mem|inode|fd|callpool|tasks|client-list] \fR
+Display status of all or specified volume(s)/brick
+.TP
+\fB\ volume create <NEW-VOLNAME> [stripe <COUNT>] [[replica <COUNT> [arbiter <COUNT>]]|[replica 2 thin-arbiter 1]] [disperse [<COUNT>]] [disperse-data <COUNT>] [redundancy <COUNT>] [transport <tcp|rdma|tcp,rdma>] <NEW-BRICK> ... <TA-BRICK> \fR
Create a new volume of the specified type using the specified bricks and transport type (the default transport type is tcp).
To create a volume with both transports (tcp and rdma), give 'transport tcp,rdma' as an option.
.TP
@@ -56,12 +54,21 @@ Start the specified volume.
\fB\ volume stop <VOLNAME> [force] \fR
Stop the specified volume.
.TP
-\fB\ volume rename <VOLNAME> <NEW-VOLNAME> \fR
-Rename the specified volume.
-.TP
\fB\ volume set <VOLNAME> <OPTION> <PARAMETER> [<OPTION> <PARAMETER>] ... \fR
Set the volume options.
.TP
+\fB\ volume get <VOLNAME/all> <OPTION/all> \fR
+Get the value of the all options or given option for volume <VOLNAME> or all option. gluster volume get all all is to get all global options
+.TP
+\fB\ volume reset <VOLNAME> [option] [force] \fR
+Reset all the reconfigured options
+.TP
+\fB\ volume barrier <VOLNAME> {enable|disable} \fR
+Barrier/unbarrier file operations on a volume
+.TP
+\fB\ volume clear-locks <VOLNAME> <path> kind {blocked|granted|all}{inode [range]|entry [basename]|posix [range]} \fR
+Clear locks held on path
+.TP
\fB\ volume help \fR
Display help for the volume command.
.SS "Brick Commands"
@@ -78,7 +85,13 @@ If you remove the brick, the data stored in that brick will not be available. Yo
.B replace-brick
option.
.TP
-\fB\ volume rebalance-brick <VOLNAME>(<BRICK> <NEW-BRICK>) start \fR
+\fB\ volume reset-brick <VOLNAME> <SOURCE-BRICK> {{start} | {<NEW-BRICK> commit}} \fR
+Brings down or replaces the specified source brick with the new brick.
+.TP
+\fB\ volume replace-brick <VOLNAME> <SOURCE-BRICK> <NEW-BRICK> commit force \fR
+Replace the specified source brick with a new brick.
+.TP
+\fB\ volume rebalance <VOLNAME> start \fR
Start rebalancing the specified volume.
.TP
\fB\ volume rebalance <VOLNAME> stop \fR
@@ -86,9 +99,6 @@ Stop rebalancing the specified volume.
.TP
\fB\ volume rebalance <VOLNAME> status \fR
Display the rebalance status of the specified volume.
-.TP
-\fB\ volume replace-brick <VOLNAME> (<BRICK> <NEW-BRICK>) start|pause|abort|status|commit \fR
-Replace the specified brick.
.SS "Log Commands"
.TP
\fB\ volume log filename <VOLNAME> [BRICK] <DIRECTORY> \fB
@@ -99,10 +109,22 @@ Locate the log file for corresponding volume/brick.
.TP
\fB\ volume log rotate <VOLNAME> [BRICK] \fB
Rotate the log file for corresponding volume/brick.
+.TP
+\fB\ volume profile <VOLNAME> {start|info [peek|incremental [peek]|cumulative|clear]|stop} [nfs] \fR
+Profile operations on the volume. Once started, volume profile <volname> info provides cumulative statistics of the FOPs performed.
+.TP
+\fB\ volume top <VOLNAME> {open|read|write|opendir|readdir|clear} [nfs|brick <brick>] [list-cnt <value>] | {read-perf|write-perf} [bs <size> count <count>] [brick <brick>] [list-cnt <value>] \fR
+Generates a profile of a volume representing the performance and bottlenecks/hotspots of each brick.
+.TP
+\fB\ volume statedump <VOLNAME> [[nfs|quotad] [all|mem|iobuf|callpool|priv|fd|inode|history]... | [client <hostname:process-id>]] \fR
+Dumps the in memory state of the specified process or the bricks of the volume.
+.TP
+\fB\ volume sync <HOSTNAME> [all|<VOLNAME>] \fR
+Sync the volume information from a peer
.SS "Peer Commands"
.TP
\fB\ peer probe <HOSTNAME> \fR
-Probe the specified peer.
+Probe the specified peer. In case the <HOSTNAME> given belongs to an already probed peer, the peer probe command will add the hostname to the peer if required.
.TP
\fB\ peer detach <HOSTNAME> \fR
Detach the specified peer.
@@ -110,10 +132,216 @@ Detach the specified peer.
\fB\ peer status \fR
Display the status of peers.
.TP
+\fB\ pool list \fR
+List all the nodes in the pool (including localhost)
+.TP
\fB\ peer help \fR
Display help for the peer command.
+.SS "Quota Commands"
+.TP
+\fB\ volume quota <VOLNAME> enable \fR
+Enable quota on the specified volume. This will cause all the directories in the filesystem hierarchy to be accounted and updated thereafter on each operation in the the filesystem. To kick start this accounting, a crawl is done over the hierarchy with an auxiliary client.
+.TP
+\fB\ volume quota <VOLNAME> disable \fR
+Disable quota on the volume. This will disable enforcement and accounting in the filesystem. Any configured limits will be lost.
+.TP
+\fB\ volume quota <VOLNAME> limit-usage <PATH> <SIZE> [<PERCENT>] \fR
+Set a usage limit on the given path. Any previously set limit is overridden to the new value. The soft limit can optionally be specified (as a percentage of hard limit). If soft limit percentage is not provided the default soft limit value for the volume is used to decide the soft limit.
+.TP
+\fB\ volume quota <VOLNAME> limit-objects <PATH> <SIZE> [<PERCENT>] \fR
+Set an inode limit on the given path. Any previously set limit is overridden to the new value. The soft limit can optionally be specified (as a percentage of hard limit). If soft limit percentage is not provided the default soft limit value for the volume is used to decide the soft limit.
+.TP
+NOTE: valid units of SIZE are : B, KB, MB, GB, TB, PB. If no unit is specified, the unit defaults to bytes.
+.TP
+\fB\ volume quota <VOLNAME> remove <PATH> \fR
+Remove any usage limit configured on the specified directory. Note that if any limit is configured on the ancestors of this directory (previous directories along the path), they will still be honored and enforced.
+.TP
+\fB\ volume quota <VOLNAME> remove-objects <PATH> \fR
+Remove any inode limit configured on the specified directory. Note that if any limit is configured on the ancestors of this directory (previous directories along the path), they will still be honored and enforced.
+.TP
+\fB\ volume quota <VOLNAME> list <PATH> \fR
+Lists the usage and limits configured on directory(s). If a path is given only the limit that has been configured on the directory(if any) is displayed along with the directory's usage. If no path is given, usage and limits are displayed for all directories that has limits configured.
+.TP
+\fB\ volume quota <VOLNAME> list-objects <PATH> \fR
+Lists the inode usage and inode limits configured on directory(s). If a path is given only the limit that has been configured on the directory(if any) is displayed along with the directory's inode usage. If no path is given, usage and limits are displayed for all directories that has limits configured.
+.TP
+\fB\ volume quota <VOLNAME> default-soft-limit <PERCENT> \fR
+Set the percentage value for default soft limit for the volume.
+.TP
+\fB\ volume quota <VOLNAME> soft-timeout <TIME> \fR
+Set the soft timeout for the volume. The interval in which limits are retested before the soft limit is breached.
+.TP
+\fB\ volume quota <VOLNAME> hard-timeout <TIME> \fR
+Set the hard timeout for the volume. The interval in which limits are retested after the soft limit is breached.
+.TP
+\fB\ volume quota <VOLNAME> alert-time <TIME> \fR
+Set the frequency in which warning messages need to be logged (in the brick logs) once soft limit is breached.
+.TP
+\fB\ volume inode-quota <VOLNAME> enable/disable \fR
+Enable/disable inode-quota for <VOLNAME>
+.TP
+\fB\ volume quota help \fR
+Display help for volume quota commands
+.TP
+NOTE: valid units of time and their symbols are : hours(h/hr), minutes(m/min), seconds(s/sec), weeks(w/wk), Days(d/days).
+.SS "Geo-replication Commands"
+.TP
+\fI\ Note\fR: password-less ssh, from the master node (where these commands are executed) to the slave node <SLAVE_HOST>, is a prerequisite for the geo-replication commands.
+.TP
+\fB\ system:: execute gsec_create\fR
+Generates pem keys which are required for push-pem
+.TP
+\fB\ volume geo-replication <MASTER_VOL> <SLAVE_HOST>::<SLAVE_VOL> create [[ssh-port n][[no-verify]|[push-pem]]] [force]\fR
+Create a new geo-replication session from <MASTER_VOL> to <SLAVE_HOST> host machine having <SLAVE_VOL>.
+Use ssh-port n if custom SSH port is configured in slave nodes.
+Use no-verify if the rsa-keys of nodes in master volume is distributed to slave nodes through an external agent.
+Use push-pem to push the keys automatically.
+.TP
+\fB\ volume geo-replication <MASTER_VOL> <SLAVE_HOST>::<SLAVE_VOL> {start|stop} [force] \fR
+Start/stop the geo-replication session from <MASTER_VOL> to <SLAVE_HOST> host machine having <SLAVE_VOL>.
+.TP
+\fB\ volume geo-replication [<MASTER_VOL> [<SLAVE_HOST>::<SLAVE_VOL>]] status [detail] \fR
+Query status of the geo-replication session from <MASTER_VOL> to <SLAVE_HOST> host machine having <SLAVE_VOL>.
+.TP
+\fB\ volume geo-replication <MASTER_VOL> <SLAVE_HOST>::<SLAVE_VOL> {pause|resume} [force] \fR
+Pause/resume the geo-replication session from <MASTER_VOL> to <SLAVE_HOST> host machine having <SLAVE_VOL>.
+.TP
+\fB\ volume geo-replication <MASTER_VOL> <SLAVE_HOST>::<SLAVE_VOL> delete [reset-sync-time]\fR
+Delete the geo-replication session from <MASTER_VOL> to <SLAVE_HOST> host machine having <SLAVE_VOL>.
+Optionally you can also reset the sync time in case you need to resync the entire volume on session recreate.
+.TP
+\fB\ volume geo-replication <MASTER_VOL> <SLAVE_HOST>::<SLAVE_VOL> config [[!]<options> [<value>]] \fR
+View (when no option provided) or set configuration for this geo-replication session.
+Use "!<OPTION>" to reset option <OPTION> to default value.
+.SS "Bitrot Commands"
+.TP
+\fB\ volume bitrot <VOLNAME> {enable|disable} \fR
+Enable/disable bitrot for volume <VOLNAME>
+.TP
+\fB\ volume bitrot <VOLNAME> signing-time <time-in-secs> \fR
+Waiting time for an object after last fd is closed to start signing process.
+.TP
+\fB\ volume bitrot <VOLNAME> signer-threads <count> \fR
+Number of signing process threads. Usually set to number of available cores.
+.TP
+\fB\ volume bitrot <VOLNAME> scrub-throttle {lazy|normal|aggressive} \fR
+Scrub-throttle value is a measure of how fast or slow the scrubber scrubs the filesystem for volume <VOLNAME>
+.TP
+\fB\ volume bitrot <VOLNAME> scrub-frequency {hourly|daily|weekly|biweekly|monthly} \fR
+Scrub frequency for volume <VOLNAME>
+.TP
+\fB\ volume bitrot <VOLNAME> scrub {pause|resume|status|ondemand} \fR
+Pause/Resume scrub. Upon resume, scrubber continues where it left off. status option shows the statistics of scrubber. ondemand option starts the scrubbing immediately if the scrubber is not paused or already running.
+.TP
+\fB\ volume bitrot help \fR
+Display help for volume bitrot commands
+.TP
+.SS "Snapshot Commands"
+.TP
+\fB\ snapshot create <snapname> <volname> [no-timestamp] [description <description>] [force] \fR
+Creates a snapshot of a GlusterFS volume. User can provide a snap-name and a description to identify the snap. Snap will be created by appending timestamp in GMT. User can override this behaviour using "no-timestamp" option. The description cannot be more than 1024 characters. To be able to take a snapshot, volume should be present and it should be in started state.
+.TP
+\fB\ snapshot restore <snapname> \fR
+Restores an already taken snapshot of a GlusterFS volume. Snapshot restore is an offline activity therefore if the volume is online (in started state) then the restore operation will fail. Once the snapshot is restored it will not be available in the list of snapshots.
+.TP
+\fB\ snapshot clone <clonename> <snapname> \fR
+Create a clone of a snapshot volume, the resulting volume will be GlusterFS volume. User can provide a clone-name. To be able to take a clone, snapshot should be present and it should be in activated state.
+.TP
+\fB\ snapshot delete ( all | <snapname> | volume <volname> ) \fR
+If snapname is specified then mentioned snapshot is deleted. If volname is specified then all snapshots belonging to that particular volume is deleted. If keyword *all* is used then all snapshots belonging to the system is deleted.
+.TP
+\fB\ snapshot list [volname] \fR
+Lists all snapshots taken. If volname is provided, then only the snapshots belonging to that particular volume is listed.
+.TP
+\fB\ snapshot info [snapname | (volume <volname>)] \fR
+This command gives information such as snapshot name, snapshot UUID, time at which snapshot was created, and it lists down the snap-volume-name, number of snapshots already taken and number of snapshots still available for that particular volume, and the state of the snapshot. If snapname is specified then info of the mentioned snapshot is displayed. If volname is specified then info of all snapshots belonging to that volume is displayed. If both snapname and volname is not specified then info of all the snapshots present in the system are displayed.
+.TP
+\fB\ snapshot status [snapname | (volume <volname>)] \fR
+This command gives status of the snapshot. The details included are snapshot brick path, volume group(LVM details), status of the snapshot bricks, PID of the bricks, data percentage filled for that particular volume group to which the snapshots belong to, and total size of the logical volume.
+
+If snapname is specified then status of the mentioned snapshot is displayed. If volname is specified then status of all snapshots belonging to that volume is displayed. If both snapname and volname is not specified then status of all the snapshots present in the system are displayed.
+.TP
+\fB\ snapshot config [volname] ([snap-max-hard-limit <count>] [snap-max-soft-limit <percent>]) | ([auto-delete <enable|disable>]) | ([activate-on-create <enable|disable>])
+Displays and sets the snapshot config values.
+
+snapshot config without any keywords displays the snapshot config values of all volumes in the system. If volname is provided, then the snapshot config values of that volume is displayed.
+
+Snapshot config command along with keywords can be used to change the existing config values. If volname is provided then config value of that volume is changed, else it will set/change the system limit.
+
+snap-max-soft-limit and auto-delete are global options, that will be inherited by all volumes in the system and cannot be set to individual volumes.
+
+snap-max-hard-limit can be set globally, as well as per volume. The lowest limit between the global system limit and the volume specific limit, becomes the
+"Effective snap-max-hard-limit" for a volume.
+
+snap-max-soft-limit is a percentage value, which is applied on the "Effective snap-max-hard-limit" to get the "Effective snap-max-soft-limit".
+
+When auto-delete feature is enabled, then upon reaching the "Effective snap-max-soft-limit", with every successful snapshot creation, the oldest snapshot will be deleted.
+
+When auto-delete feature is disabled, then upon reaching the "Effective snap-max-soft-limit", the user gets a warning with every successful snapshot creation.
+
+When auto-delete feature is disabled, then upon reaching the "Effective snap-max-hard-limit", further snapshot creations will not be allowed.
+
+activate-on-create is disabled by default. If you enable activate-on-create, then further snapshot will be activated during the time of snapshot creation.
+.TP
+\fB\ snapshot activate <snapname> \fR
+Activates the mentioned snapshot.
+
+Note : By default the snapshot is activated during snapshot creation.
+.TP
+\fB\ snapshot deactivate <snapname> \fR
+Deactivates the mentioned snapshot.
+.TP
+\fB\ snapshot help \fR
+Display help for the snapshot commands.
+.SS "Self-heal Commands"
+.TP
+\fB\ volume heal <VOLNAME>\fR
+Triggers index self heal for the files that need healing.
+
+.TP
+\fB\ volume heal <VOLNAME> [enable | disable]\fR
+Enable/disable self-heal-daemon for volume <VOLNAME>.
+
+.TP
+\fB\ volume heal <VOLNAME> full\fR
+Triggers self heal on all the files.
+
+.TP
+\fB\ volume heal <VOLNAME> info \fR
+Lists the files that need healing.
+
+.TP
+\fB\ volume heal <VOLNAME> info split-brain \fR
+Lists the files which are in split-brain state.
+
+.TP
+\fB\ volume heal <VOLNAME> statistics \fR
+Lists the crawl statistics.
+
+.TP
+\fB\ volume heal <VOLNAME> statistics heal-count \fR
+Displays the count of files to be healed.
+
+.TP
+\fB\ volume heal <VOLNAME> statistics heal-count replica <HOSTNAME:BRICKNAME> \fR
+Displays the number of files to be healed from a particular replica subvolume to which the brick <HOSTNAME:BRICKNAME> belongs.
+
+.TP
+\fB\ volume heal <VOLNAME> split-brain bigger-file <FILE> \fR
+Performs healing of <FILE> which is in split-brain by choosing the bigger file in the replica as source.
+
+.TP
+\fB\ volume heal <VOLNAME> split-brain source-brick <HOSTNAME:BRICKNAME> \fR
+Selects <HOSTNAME:BRICKNAME> as the source for all the files that are in split-brain in that replica and heals them.
+
+.TP
+\fB\ volume heal <VOLNAME> split-brain source-brick <HOSTNAME:BRICKNAME> <FILE> \fR
+Selects the split-brained <FILE> present in <HOSTNAME:BRICKNAME> as source and completes heal.
.SS "Other Commands"
.TP
+\fB\ get-state [<daemon>] [[odir </path/to/output/dir/>] [file <filename>]] [detail|volumeoptions] \fR
+Get local state representation of mentioned daemon and store data in provided path information
+.TP
\fB\ help \fR
Display the command options.
.TP
@@ -121,7 +349,7 @@ Display the command options.
Exit the gluster command line interface.
.SH FILES
-/etc/glusterd/*
+/var/lib/glusterd/*
.SH SEE ALSO
.nf
\fBfusermount\fR(1), \fBmount.glusterfs\fR(8), \fBglusterfs\fR(8), \fBglusterd\fR(8)
diff --git a/doc/glusterd.8 b/doc/glusterd.8
index 267a7e00f48..e3768c78761 100644
--- a/doc/glusterd.8
+++ b/doc/glusterd.8
@@ -1,20 +1,11 @@
.\"
-.\" Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+.\" Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
.\" This file is part of GlusterFS.
.\"
-.\" GlusterFS is free software; you can redistribute it and/or modify
-.\" it under the terms of the GNU General Public License as published
-.\" by the Free Software Foundation; either version 3 of the License,
-.\" or (at your option) any later version.
-.\"
-.\" GlusterFS is distributed in the hope that it will be useful, but
-.\" WITHOUT ANY WARRANTY; without even the implied warranty of
-.\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-.\" General Public License for more details.
-.\"
-.\" You should have received a copy of the GNU General Public License
-.\" along with this program. If not, see
-.\" <http://www.gnu.org/licenses/>.
+.\" This file is licensed to you under your choice of the GNU Lesser
+.\" General Public License, version 3 or any later version (LGPLv3 or
+.\" later), or the GNU General Public License, version 2 (GPLv2), in all
+.\" cases as published by the Free Software Foundation.
.\"
.\"
@@ -39,6 +30,9 @@ File to use for logging.
\fB\-L <LOGLEVEL>, \fB\-\-log\-level=<LOGLEVEL>\fR
Logging severity. Valid options are TRACE, DEBUG, INFO, WARNING, ERROR and CRITICAL (the default is INFO).
.TP
+\fB\-\-localtime\-logging\fR
+Enable localtime log timestamps.
+.TP
\fB\-\-debug\fR
Run the program in debug mode. This option sets \fB\-\-no\-daemon\fR, \fB\-\-log\-level\fR to DEBUG
and \fB\-\-log\-file\fR to console.
@@ -59,7 +53,7 @@ Print the program version.
.PP
.SH FILES
-/etc/glusterd/*
+/var/lib/glusterd/*
.SH SEE ALSO
.nf
diff --git a/doc/glusterd.vol b/doc/glusterd.vol
deleted file mode 100644
index de17d8fd8f9..00000000000
--- a/doc/glusterd.vol
+++ /dev/null
@@ -1,8 +0,0 @@
-volume management
- type mgmt/glusterd
- option working-directory /var/lib/glusterd
- option transport-type socket,rdma
- option transport.socket.keepalive-time 10
- option transport.socket.keepalive-interval 2
- option transport.socket.read-fail-log off
-end-volume
diff --git a/doc/glusterfs.8 b/doc/glusterfs.8
index 37bf67d1821..3d359ea85e4 100644
--- a/doc/glusterfs.8
+++ b/doc/glusterfs.8
@@ -1,19 +1,10 @@
-.\" Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+.\" Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
.\" This file is part of GlusterFS.
.\"
-.\" GlusterFS is free software; you can redistribute it and/or modify
-.\" it under the terms of the GNU General Public License as published
-.\" by the Free Software Foundation; either version 3 of the License,
-.\" or (at your option) any later version.
-.\"
-.\" GlusterFS is distributed in the hope that it will be useful, but
-.\" WITHOUT ANY WARRANTY; without even the implied warranty of
-.\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-.\" General Public License for more details.
-.\"
-.\" You should have received a copy of the GNU General Public License
-.\" long with this program. If not, see
-.\" <http://www.gnu.org/licenses/>.
+.\" This file is licensed to you under your choice of the GNU Lesser
+.\" General Public License, version 3 or any later version (LGPLv3 or
+.\" later), or the GNU General Public License, version 2 (GPLv2), in all
+.\" cases as published by the Free Software Foundation.
.\"
.\"
.\"
@@ -32,7 +23,8 @@ be made of any commodity hardware, such as x86-64 server with SATA-II RAID and
Infiniband HBA.
GlusterFS is fully POSIX compliant file system. On client side, it has dependency
-on FUSE package, on server side, it works seemlessly on different operating systems. Currently supported on GNU/Linux and Solaris.
+on FUSE package, on server side, it works seemlessly on different operating systems.
+Currently supported on GNU/Linux and Solaris.
.SH OPTIONS
@@ -40,33 +32,58 @@ on FUSE package, on server side, it works seemlessly on different operating syst
.PP
.TP
\fB\-f, \fB\-\-volfile=VOLUME-FILE\fR
-File to use as VOLUME-FILE (the default is /etc/glusterfs/glusterfs.vol).
+File to use as VOLUME-FILE.
.TP
\fB\-l, \fB\-\-log\-file=LOGFILE\fR
-File to use for logging.
+File to use for logging (the default is <INSTALL-DIR>/var/log/glusterfs/<MOUNT-POINT>.log).
.TP
\fB\-L, \fB\-\-log\-level=LOGLEVEL\fR
-Logging severity. Valid options are TRACE, DEBUG, INFO, WARNING, ERROR and CRITICAL (the default is WARNING).
+Logging severity. Valid options are TRACE, DEBUG, INFO, WARNING, ERROR and CRITICAL (the default is INFO).
.TP
\fB\-s, \fB\-\-volfile\-server=SERVER\fR
Server to get the volume from. This option overrides \fB\-\-volfile \fR option.
+.TP
+\fB\-\-volfile\-max\-fetch\-attempts=MAX\-ATTEMPTS\fR
+Maximum number of connect attempts to server. This option should be provided with
+\fB\-\-volfile\-server\fR option (the default is 1).
.SS "Advanced options"
.PP
.TP
+\fB\-\-acl\fR
+Mount the filesystem with POSIX ACL support.
+.TP
+\fB\-\-localtime\-logging\fR
+Enable localtime log timestamps.
+.TP
\fB\-\-debug\fR
Run in debug mode. This option sets \fB\-\-no\-daemon\fR, \fB\-\-log\-level\fR to DEBUG,
and \fB\-\-log\-file\fR to console.
.TP
+\fB\-\-enable\-ino32=BOOL\fR
+Use 32-bit inodes when mounting to workaround application that doesn't support 64-bit inodes.
+.TP
+\fB\-\-fopen\-keep\-cache[=BOOL]\fR
+Do not purge the cache on file open (default: false).
+.TP
+\fB\-\-mac\-compat=BOOL\fR
+Provide stubs for attributes needed for seamless operation on Macs (the default is off).
+.TP
\fB\-N, \fB\-\-no\-daemon\fR
Run in the foreground.
.TP
-\fB\-\-read\-only\fR
-Make the file system read-only.
-.TP
\fB\-p, \fB\-\-pid\-file=PIDFILE\fR
File to use as PID file.
.TP
+\fB\-\-read\-only\fR
+Mount the file system in 'read-only' mode.
+.TP
+\fB\-\-selinux\fR
+Enable SELinux label (extended attributes) support on inodes.
+.TP
+\fB\-S, \fB\-\-socket\-file=SOCKFILE\fR
+File to use as unix-socket.
+.TP
\fB\-\-volfile\-id=KEY\fR
Key of the volume file to be fetched from the server.
.TP
@@ -74,26 +91,61 @@ Key of the volume file to be fetched from the server.
Port number of volfile server.
.TP
\fB\-\-volfile\-server\-transport=TRANSPORT\fR
-Transport type to get volume file from server (the default is socket).
+Transport type to get volume file from server (the default is tcp).
.TP
\fB\-\-volume\-name=VOLUME\-NAME\fR
Volume name to be used for MOUNT-POINT (the default is top most volume in VOLUME-FILE).
.TP
+\fB\-\-worm\fR
+Mount the filesystem in 'worm' mode.
+.TP
\fB\-\-xlator\-option=VOLUME\-NAME.OPTION=VALUE\fR
Add/Override a translator option for a volume with the specified value.
+.TP
+\fB\-\-subdir\-mount=SUBDIR\-MOUNT\-PATH\fR
+Mount subdirectory instead of the '/' of volume.
.SS "Fuse options"
.PP
.TP
+\fB\-\-attr\-times\-granularity=NANOSECONDS\fR
+Declare supported granularity of file attribute times (default is 0 which kernel handles as unspecified; valid real values are between 1 and 1000000000).
+.TP
\fB\-\-attribute\-timeout=SECONDS\fR
Set attribute timeout to SECONDS for inodes in fuse kernel module (the default is 1).
.TP
+\fB\-\-background\-qlen=N\fR
+Set fuse module's background queue length to N (the default is 64).
+.TP
+\fB\-\-congestion\-threshold=N\fR
+Set fuse module's congestion threshold to N (the default is 48).
+.TP
+\fB\-\-direct\-io\-mode=BOOL|auto\fR
+Specify fuse direct I/O strategy (the default is auto).
+.TP
+\fB\-\-dump-fuse=PATH\f\R
+Dump fuse traffic to PATH
+.TP
\fB\-\-entry\-timeout=SECONDS\fR
Set entry timeout to SECONDS in fuse kernel module (the default is 1).
.TP
-\fB\-\-direct\-io\-mode=BOOL\fR
-Enable/Disable the direct-I/O mode in fuse module (the default is enable).
+\fB\-\-gid\-timeout=SECONDS\fR
+Set auxiliary group list timeout to SECONDS for fuse translator (the default is 0).
+.TP
+\fB\-\-kernel-writeback-cache=BOOL\fR
+Enable fuse in-kernel writeback cache.
+.TP
+\fB\-\-negative\-timeout=SECONDS\fR
+Set negative timeout to SECONDS in fuse kernel module (the default is 0).
+.TP
+\fB\-\-auto\-invalidation=BOOL\fR
+controls whether fuse-kernel can auto-invalidate attribute, dentry and
+page-cache. Disable this only if same files/directories are not
+accessed across two different mounts concurrently [default: on].
+.TP
+\fB\-\-volfile-check\fR
+Enable strict volume file checking.
.SS "Miscellaneous Options"
.PP
@@ -110,7 +162,7 @@ Print the program version.
.PP
.SH FILES
-/etc/glusterfs/*.vol, /etc/glusterd/vols/*/*.vol
+/var/lib/glusterd/vols/*/*.vol
.SH EXAMPLES
mount a volume named foo on server bar with log level DEBUG on mount point
/mnt/foo
@@ -124,6 +176,6 @@ mount a volume named foo on server bar with log level DEBUG on mount point
.fi
.SH COPYRIGHT
.nf
-Copyright(c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+Copyright(c) 2006-2011 Red Hat, Inc. <http://www.redhat.com>
\fR
.fi
diff --git a/doc/glusterfs.vol.sample b/doc/glusterfs.vol.sample
deleted file mode 100644
index 977363b921b..00000000000
--- a/doc/glusterfs.vol.sample
+++ /dev/null
@@ -1,53 +0,0 @@
-### file: client-volume.vol.sample
-
-#####################################
-### GlusterFS Client Volume File ##
-#####################################
-
-#### CONFIG FILE RULES:
-### "#" is comment character.
-### - Config file is case sensitive
-### - Options within a volume block can be in any order.
-### - Spaces or tabs are used as delimitter within a line.
-### - Each option should end within a line.
-### - Missing or commented fields will assume default values.
-### - Blank/commented lines are allowed.
-### - Sub-volumes should already be defined above before referring.
-
-### Add client feature and attach to remote subvolume
-volume client
- type protocol/client
- option transport-type tcp
-# option transport-type unix
-# option transport-type ib-sdp
- option remote-host 127.0.0.1 # IP address of the remote brick
-# option transport.socket.remote-port 24016
-
-# option transport-type rdma
-# option transport.rdma.remote-port 24016
-# option transport.rdma.work-request-send-count 16
-# option transport.rdma.work-request-recv-count 16
-
- option remote-subvolume brick # name of the remote volume
-end-volume
-
-### Add readahead feature
-#volume readahead
-# type performance/read-ahead
-# option page-count 2 # cache per file = (page-count x page-size)
-# subvolumes client
-#end-volume
-
-### Add IO-Cache feature
-#volume iocache
-# type performance/io-cache
-# subvolumes readahead
-#end-volume
-
-### Add writeback feature
-#volume writeback
-# type performance/write-behind
-# option window-size 2MB
-# option flush-behind off
-# subvolumes iocache
-#end-volume
diff --git a/doc/glusterfsd.8 b/doc/glusterfsd.8
index 17d053a5c07..bc1de2a8c80 100644
--- a/doc/glusterfsd.8
+++ b/doc/glusterfsd.8
@@ -1,19 +1,10 @@
-.\" Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+.\" Copyright (c) 20088888888-2012 Red Hat, Inc. <http://www.redhat.com>
.\" This file is part of GlusterFS.
.\"
-.\" GlusterFS is free software; you can redistribute it and/or modify
-.\" it under the terms of the GNU General Public License as published
-.\" by the Free Software Foundation; either version 3 of the License,
-.\" or (at your option) any later version.
-.\"
-.\" GlusterFS is distributed in the hope that it will be useful, but
-.\" WITHOUT ANY WARRANTY; without even the implied warranty of
-.\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-.\" General Public License for more details.
-.\"
-.\" You should have received a copy of the GNU General Public License
-.\" long with this program. If not, see
-.\" <http://www.gnu.org/licenses/>.
+.\" This file is licensed to you under your choice of the GNU Lesser
+.\" General Public License, version 3 or any later version (LGPLv3 or
+.\" later), or the GNU General Public License, version 2 (GPLv2), in all
+.\" cases as published by the Free Software Foundation.
.\"
.\"
.\"
@@ -60,6 +51,9 @@ Server to get the volume from. This option overrides \fB\-\-volfile option
.PP
.TP
+\fB\-\-localtime\-logging\fR
+Enable localtime log timestamps.
+.TP
\fB\-\-debug\fR
Run in debug mode. This option sets \fB\-\-no\-daemon\fR, \fB\-\-log\-level\fR to DEBUG
and \fB\-\-log\-file\fR to console
@@ -89,7 +83,7 @@ KEY of the volume file to be fetched from server
Port number of volfile server
.TP
\fB\-\-volfile\-server\-transport=TRANSPORT\fR
-Transport type to get volume file from server [default: socket]
+Transport type to get volume file from server [default: tcp]
.TP
\fB\-\-volume\-name=VOLUME\-NAME\fR
Volume name to be used for MOUNT-POINT [default: top most volume in
@@ -110,6 +104,14 @@ Set entry timeout to SECONDS in fuse kernel module [default: 1]
.TP
\fB\-\-direct\-io\-mode=BOOL\fR
Enable/Disable direct-io mode in fuse module [default: enable]
+.TP
+\fB\-\-resolve-gids\fR
+Resolve all auxiliary groups in fuse translator (max 32 otherwise)
+.TP
+\fB\-\-auto\-invalidation=BOOL\fR
+controls whether fuse-kernel can auto-invalidate attribute, dentry and
+page-cache. Disable this only if same files/directories are not
+accessed across two different mounts concurrently [default: on]
.SS "Miscellaneous Options"
.PP
@@ -131,7 +133,7 @@ Print program version
.SH EXAMPLES
Start a GlusterFS server on localhost with volume name foo
-glusterfsd \-s localhost \-\-volfile\-id foo.server.media-disk\-1 \-p /etc/glusterd/vols/foo/run/server\-media\-disk\-1.pid \-S /tmp/<uniqueid>.socket \-\-brick-name /media/disk\-1 \-l /var/log/glusterfs/bricks/media\-disk\-1.log \-\-brick\-port 24009 \-\-xlator\-option foo\-server.listen-port=24009
+glusterfsd \-s localhost \-\-volfile\-id foo.server.media-disk\-1 \-p /var/lib/glusterd/vols/foo/run/server\-media\-disk\-1.pid \-S /tmp/<uniqueid>.socket \-\-brick-name /media/disk\-1 \-l /var/log/glusterfs/bricks/media\-disk\-1.log \-\-brick\-port 24009 \-\-xlator\-option foo\-server.listen-port=24009
.SH SEE ALSO
.nf
diff --git a/doc/glusterfsd.vol.sample b/doc/glusterfsd.vol.sample
deleted file mode 100644
index ec2fd341ef0..00000000000
--- a/doc/glusterfsd.vol.sample
+++ /dev/null
@@ -1,44 +0,0 @@
-### file: server-volume.vol.sample
-
-#####################################
-### GlusterFS Server Volume File ##
-#####################################
-
-#### CONFIG FILE RULES:
-### "#" is comment character.
-### - Config file is case sensitive
-### - Options within a volume block can be in any order.
-### - Spaces or tabs are used as delimitter within a line.
-### - Multiple values to options will be : delimited.
-### - Each option should end within a line.
-### - Missing or commented fields will assume default values.
-### - Blank/commented lines are allowed.
-### - Sub-volumes should already be defined above before referring.
-
-### Export volume "brick" with the contents of "/home/export" directory.
-volume brick
- type storage/posix # POSIX FS translator
- option directory /home/export # Export this directory
-end-volume
-
-### Add network serving capability to above brick.
-volume server
- type protocol/server
- option transport-type tcp
-# option transport-type unix
-# option transport-type ib-sdp
-# option transport.socket.bind-address 192.168.1.10 # Default is to listen
- # on all interfaces
-# option transport.socket.listen-port 24016
-
-# option transport-type rdma
-# option transport.rdma.listen-port 24016
-# option transport.rdma.work-request-send-count 64
-# option transport.rdma.work-request-recv-count 64
-
- subvolumes brick
-# NOTE: Access to any volume through protocol/server is denied by
-# default. You need to explicitly grant access through # "auth"
-# option.
- option auth.addr.brick.allow * # Allow access to "brick" volume
-end-volume
diff --git a/doc/legacy/authentication.txt b/doc/legacy/authentication.txt
deleted file mode 100644
index 73cb21d73ba..00000000000
--- a/doc/legacy/authentication.txt
+++ /dev/null
@@ -1,112 +0,0 @@
-
-* Authentication is provided by two modules addr and login. Login based authentication uses username/password from client for authentication. Each module returns either ACCEPT, REJCET or DONT_CARE. DONT_CARE is returned if the input authentication information to the module is not concerned to its working. The theory behind authentication is that "none of the auth modules should return REJECT and atleast one of them should return ACCEPT"
-
-* Currently all the authentication related information is passed un-encrypted over the network from client to server.
-
-----------------------------------------------------------------------------------------------------
-* options provided in protocol/client:
- * for username/password based authentication:
- option username <username>
- option password <password>
- * client can have only one set of username/password
- * for addr based authentication:
- * no options required in protocol/client. Client has to bind to privileged port (port < 1024 ) which means the process in which protocol/client is loaded has to be run as root.
-
-----------------------------------------------------------------------------------------------------
-* options provided in protocol/server:
- * for username/password based authentication:
- option auth.login.<brick>.allow [comma seperated list of usernames using which clients can connect to volume <brick>]
- option auth.login.<username>.password <password> #specify password <password> for username <username>
- * for addr based authentication:
- option auth.addr.<brick>.allow [comma seperated list of ip-addresses/unix-paths from which clients are allowed to connect to volume <brick>]
- option auth.addr.<brick>.reject [comma seperated list of ip-addresses/unix-paths from which clients are not allowed to connect to volume <brick>]
- * negation operator '!' is used to invert the sense of matching.
- Eg., option auth.addr.brick.allow !a.b.c.d #do not allow client from a.b.c.d to connect to volume brick
- option auth.addr.brick.reject !w.x.y.z #allow client from w.x.y.z to connect to volume brick
- * wildcard '*' can be used to match any ip-address/unix-path
-
-----------------------------------------------------------------------------------------------------
-
-* Usecases:
-
-* username/password based authentication only
- protocol/client:
- option username foo
- option password foo-password
- option remote-subvolume foo-brick
-
- protocol/server:
- option auth.login.foo-brick.allow foo,who #,other users allowed to connect to foo-brick
- option auth.login.foo.password foo-password
- option auth.login.who.password who-password
-
- * in protocol/server, dont specify ip from which client is connecting in auth.addr.foo-brick.reject list
-
-****************************************************************************************************
-
-* ip based authentication only
- protocol/client:
- option remote-subvolume foo-brick
- * Client is connecting from a.b.c.d
-
- protocol/server:
- option auth.addr.foo-brick.allow a.b.c.d,e.f.g.h,i.j.k.l #, other ip addresses from which clients are allowed to connect to foo-brick
-
-****************************************************************************************************
-* ip and username/password based authentication
- * allow only "user foo from a.b.c.d"
- protocol/client:
- option username foo
- option password foo-password
- option remote-subvolume foo-brick
-
- protocol/server:
- option auth.login.foo-brick.allow foo
- option auth.login.foo.password foo-password
- option auth.addr.foo-brick.reject !a.b.c.d
-
- * allow only "user foo" from a.b.c.d i.e., only user foo is allowed from a.b.c.d, but anyone is allowed from ip addresses other than a.b.c.d
- protocol/client:
- option username foo
- option password foo-password
- option remote-subvolume foo-brick
-
- protocol/server:
- option auth.login.foo-brick.allow foo
- option auth.login.foo.password foo-password
- option auth.addr.foo-brick.allow !a.b.c.d
-
- * reject only "user shoo from a.b.c.d"
- protcol/client:
- option remote-subvolume shoo-brick
-
- protocol/server:
- # observe that no "option auth.login.shoo-brick.allow shoo" given
- # Also other users from a.b.c.d have to be explicitly allowed using auth.login.shoo-brick.allow ...
- option auth.addr.shoo-brick.allow !a.b.c.d
-
- * reject only "user shoo" from a.b.c.d i.e., user shoo from a.b.c.d has to be rejected.
- * same as reject only "user shoo from a.b.c.d" above, but rules have to be added whether to allow ip addresses (and users from those ips) other than a.b.c.d
-
-****************************************************************************************************
-
-* ip or username/password based authentication
-
- * allow user foo or clients from a.b.c.d
- protocol/client:
- option remote-subvolume foo-brick
-
- protocol/server:
- option auth.login.foo-brick.allow foo
- option auth.login.foo.password foo-password
- option auth.addr.foo-brick.allow a.b.c.d
-
- * reject user shoo or clients from a.b.c.d
- protocol/client:
- option remote-subvolume shoo-brick
-
- protocol/server:
- option auth.login.shoo-brick.allow <usernames other than shoo>
- #for each username mentioned in the above <usernames other than shoo> list, specify password as below
- option auth.login.<username other than shoo>.password password
- option auth.addr.shoo-brick.reject a.b.c.d
diff --git a/doc/legacy/booster.txt b/doc/legacy/booster.txt
deleted file mode 100644
index 6f215c63848..00000000000
--- a/doc/legacy/booster.txt
+++ /dev/null
@@ -1,54 +0,0 @@
-Introduction
-============
-* booster is a LD_PRELOADable library which boosts read/write performance by bypassing fuse for
- read() and write() calls.
-
-Requirements
-============
-* fetch volfile from glusterfs.
-* identify whether multiple files are from the same mount point. If so, use only one context.
-
-Design
-======
-* for a getxattr, along with other attributes, fuse returns following attributes.
- * contents of client volume-file.
- * mount point.
-
-* LD_PRELOADed booster.so maintains an hash table storing mount-points and libglusterfsclient handles
- so that handles are reused for files from same mount point.
-
-* it also maintains a fdtable. fdtable maps the fd (integer) returned to application to fd (pointer to fd struct)
- used by libglusterfsclient. application is returned the same fd as the one returned from libc apis.
-
-* During fork, these tables are overwritten to enable creation of fresh glusterfs context in child.
-
-Working
-=======
-* application willing to use booster LD_PRELOADs booster.so which is a wrapper library implementing
- open, read and write.
-
-* application should specify the path to logfile through the environment variable GLFS_BOOSTER_LOGFILE. If
- not specified, logging is done to /dev/stderr.
-
-* open call does,
- * real_open on the file.
- * fgetxattr(fd).
- * store the volume-file content got in the dictionary to a temparory file.
- * look in the hashtable for the mount-point, if already present get the libglusterfsclient handle from the
- hashtable. Otherwise get a new handle from libglusterfsclient (be careful about mount point not present in
- the hashtable and multiple glusterfs_inits running simultaneously for the same mount-point there by using
- multiple handles for the same mount point).
- * real_close (fd).
- * delete temporary volume-volfile.
- * glusterfs_open (handle, path, mode).
- * store the fd returned by glusterfs_open in the fdtable at the same index as the fd returned by real_open.
- * return the index as fd.
-
-* read/write calls do,
- * get the libglusterfsclient fd from fdtable.
- * if found use glusterfs_read/glusterfs_write, else use real_read/real_write.
-
-* close call does,
- * remove the fd from the fdtable.
-
-* other calls use real_calls.
diff --git a/doc/legacy/coding-standard.pdf b/doc/legacy/coding-standard.pdf
deleted file mode 100644
index bc9cb56202c..00000000000
--- a/doc/legacy/coding-standard.pdf
+++ /dev/null
Binary files differ
diff --git a/doc/legacy/coding-standard.tex b/doc/legacy/coding-standard.tex
deleted file mode 100644
index abaedb69c3c..00000000000
--- a/doc/legacy/coding-standard.tex
+++ /dev/null
@@ -1,385 +0,0 @@
-\documentclass{article}[12pt]
-\usepackage{color}
-
-\begin{document}
-
-
-\hrule
-\begin{center}\textbf{\Large{GlusterFS Coding Standards}}\end{center}
-\begin{center}\textbf{\large{\textcolor{red}{Z} Research}}\end{center}
-\begin{center}{July 14, 2008}\end{center}
-\hrule
-
-\vspace{8ex}
-
-\section*{$\bullet$ Structure definitions should have a comment per member}
-
-Every member in a structure definition must have a comment about its
-purpose. The comment should be descriptive without being overly verbose.
-
-\vspace{2ex}
-\textsl{Bad}:
-
-\begin{verbatim}
- gf_lock_t lock; /* lock */
-\end{verbatim}
-
-\textsl{Good}:
-
-\begin{verbatim}
- DBTYPE access_mode; /* access mode for accessing
- * the databases, can be
- * DB_HASH, DB_BTREE
- * (option access-mode <mode>)
- */
-\end{verbatim}
-
-\section*{$\bullet$ Declare all variables at the beginning of the function}
-All local variables in a function must be declared immediately after the
-opening brace. This makes it easy to keep track of memory that needs to be freed
-during exit. It also helps debugging, since gdb cannot handle variables
-declared inside loops or other such blocks.
-
-\section*{$\bullet$ Always initialize local variables}
-Every local variable should be initialized to a sensible default value
-at the point of its declaration. All pointers should be initialized to NULL,
-and all integers should be zero or (if it makes sense) an error value.
-
-\vspace{2ex}
-
-\textsl{Good}:
-
-\begin{verbatim}
- int ret = 0;
- char *databuf = NULL;
- int _fd = -1;
-\end{verbatim}
-
-\section*{$\bullet$ Initialization should always be done with a constant value}
-Never use a non-constant expression as the initialization value for a variable.
-
-\vspace{2ex}
-
-\textsl{Bad}:
-
-\begin{verbatim}
- pid_t pid = frame->root->pid;
- char *databuf = malloc (1024);
-\end{verbatim}
-
-\section*{$\bullet$ Validate all arguments to a function}
-All pointer arguments to a function must be checked for \texttt{NULL}.
-A macro named \texttt{VALIDATE} (in \texttt{common-utils.h})
-takes one argument, and if it is \texttt{NULL}, writes a log message and
-jumps to a label called \texttt{err} after setting op\_ret and op\_errno
-appropriately. It is recommended to use this template.
-
-\vspace{2ex}
-
-\textsl{Good}:
-
-\begin{verbatim}
- VALIDATE(frame);
- VALIDATE(this);
- VALIDATE(inode);
-\end{verbatim}
-
-\section*{$\bullet$ Never rely on precedence of operators}
-Never write code that relies on the precedence of operators to execute
-correctly. Such code can be hard to read and someone else might not
-know the precedence of operators as accurately as you do.
-\vspace{2ex}
-
-\textsl{Bad}:
-
-\begin{verbatim}
- if (op_ret == -1 && errno != ENOENT)
-\end{verbatim}
-
-\textsl{Good}:
-
-\begin{verbatim}
- if ((op_ret == -1) && (errno != ENOENT))
-\end{verbatim}
-
-\section*{$\bullet$ Use exactly matching types}
-Use a variable of the exact type declared in the manual to hold the
-return value of a function. Do not use an ``equivalent'' type.
-
-\vspace{2ex}
-
-\textsl{Bad}:
-
-\begin{verbatim}
- int len = strlen (path);
-\end{verbatim}
-
-\textsl{Good}:
-
-\begin{verbatim}
- size_t len = strlen (path);
-\end{verbatim}
-
-\section*{$\bullet$ Never write code such as \texttt{foo->bar->baz}; check every pointer}
-Do not write code that blindly follows a chain of pointer
-references. Any pointer in the chain may be \texttt{NULL} and thus
-cause a crash. Verify that each pointer is non-null before following
-it.
-
-\section*{$\bullet$ Check return value of all functions and system calls}
-The return value of all system calls and API functions must be checked
-for success or failure.
-
-\vspace{2ex}
-\textsl{Bad}:
-
-\begin{verbatim}
- close (fd);
-\end{verbatim}
-
-\textsl{Good}:
-
-\begin{verbatim}
- op_ret = close (_fd);
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "close on file %s failed (%s)", real_path,
- strerror (errno));
- op_errno = errno;
- goto out;
- }
-\end{verbatim}
-
-
-\section*{$\bullet$ Gracefully handle failure of malloc}
-GlusterFS should never crash or exit due to lack of memory. If a
-memory allocation fails, the call should be unwound and an error
-returned to the user.
-
-\section*{$\bullet$ Use result args and reserve the return value to indicate success or failure}
-The return value of every functions must indicate success or failure (unless
-it is impossible for the function to fail --- e.g., boolean functions). If
-the function needs to return additional data, it must be returned using a
-result (pointer) argument.
-
-\vspace{2ex}
-\textsl{Bad}:
-
-\begin{verbatim}
- int32_t dict_get_int32 (dict_t *this, char *key);
-\end{verbatim}
-
-\textsl{Good}:
-
-\begin{verbatim}
- int dict_get_int32 (dict_t *this, char *key, int32_t *val);
-\end{verbatim}
-
-\section*{$\bullet$ Always use the `n' versions of string functions}
-Unless impossible, use the length-limited versions of the string functions.
-
-\vspace{2ex}
-\textsl{Bad}:
-
-\begin{verbatim}
- strcpy (entry_path, real_path);
-\end{verbatim}
-
-\textsl{Good}:
-
-\begin{verbatim}
- strncpy (entry_path, real_path, entry_path_len);
-\end{verbatim}
-
-\section*{$\bullet$ No dead or commented code}
-There must be no dead code (code to which control can never be passed) or
-commented out code in the codebase.
-
-\section*{$\bullet$ Only one unwind and return per function}
-There must be only one exit out of a function. \texttt{UNWIND} and return
-should happen at only point in the function.
-
-\section*{$\bullet$ Function length or Keep functions small}
-We live in the UNIX-world where modules do one thing and do it well.
-This rule should apply to our functions also. If a function is very long, try splitting it
-into many little helper functions. The question is, in a coding
-spree, how do we know a function is long and unreadable. One rule of
-thumb given by Linus Torvalds is that, a function should be broken-up
-if you have 4 or more levels of indentation going on for more than 3-4
-lines.
-
-\vspace{2ex}
-\textsl{Example for a helper function}:
-\begin{verbatim}
- static int
- same_owner (posix_lock_t *l1, posix_lock_t *l2)
- {
- return ((l1->client_pid == l2->client_pid) &&
- (l1->transport == l2->transport));
- }
-\end{verbatim}
-
-\section*{$\bullet$ Defining functions as static}
-Define internal functions as static only if you're
-very sure that there will not be a crash(..of any kind..) emanating in
-that function. If there is even a remote possibility, perhaps due to
-pointer derefering, etc, declare the function as non-static. This
-ensures that when a crash does happen, the function name shows up the
-in the back-trace generated by libc. However, doing so has potential
-for polluting the function namespace, so to avoid conflicts with other
-components in other parts, ensure that the function names are
-prepended with a prefix that identify the component to which it
-belongs. For eg. non-static functions in io-threads translator start
-with iot\_.
-
-\section*{$\bullet$ Ensure function calls wrap around after
-80-columns.}
-Place remaining arguments on the next line if needed.
-
-\section*{$\bullet$ Functions arguments and function definition}
-Place all the arguments of a function definition on the same line
-until the line goes beyond 80-cols. Arguments that extend beyind
-80-cols should be placed on the next line.
-
-\section*{Style issues}
-
-\subsection*{Brace placement}
-Use K\&R/Linux style of brace placement for blocks.
-
-\textsl{Example}:
-\begin{verbatim}
- int some_function (...)
- {
- if (...) {
- /* ... */
- } else if (...) {
- /* ... */
- } else {
- /* ... */
- }
-
- do {
- /* ... */
- } while (cond);
- }
-\end{verbatim}
-
-\subsection*{Indentation}
-Use \textbf{eight} spaces for indenting blocks. Ensure that your
-file contains only spaces and not tab characters. You can do this
-in Emacs by selecting the entire file (\texttt{C-x h}) and
-running \texttt{M-x untabify}.
-
-To make Emacs indent lines automatically by eight spaces, add this
-line to your \texttt{.emacs}:
-
-\begin{verbatim}
- (add-hook 'c-mode-hook (lambda () (c-set-style "linux")))
-\end{verbatim}
-
-\subsection*{Comments}
-Write a comment before every function describing its purpose (one-line),
-its arguments, and its return value. Mention whether it is an internal
-function or an exported function.
-
-Write a comment before every structure describing its purpose, and
-write comments about each of its members.
-
-Follow the style shown below for comments, since such comments
-can then be automatically extracted by doxygen to generate
-documentation.
-
-\textsl{Example}:
-\begin{verbatim}
-/**
- * hash_name -hash function for filenames
- * @par: parent inode number
- * @name: basename of inode
- * @mod: number of buckets in the hashtable
- *
- * @return: success: bucket number
- * failure: -1
- *
- * Not for external use.
- */
-\end{verbatim}
-
-\subsection*{Indicating critical sections}
-To clearly show regions of code which execute with locks held, use
-the following format:
-
-\begin{verbatim}
- pthread_mutex_lock (&mutex);
- {
- /* code */
- }
- pthread_mutex_unlock (&mutex);
-\end{verbatim}
-
-\section*{A skeleton fop function}
-This is the recommended template for any fop. In the beginning come
-the initializations. After that, the `success' control flow should be
-linear. Any error conditions should cause a \texttt{goto} to a single
-point, \texttt{out}. At that point, the code should detect the error
-that has occured and do appropriate cleanup.
-
-\begin{verbatim}
-int32_t
-sample_fop (call_frame_t *frame, xlator_t *this, ...)
-{
- char * var1 = NULL;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- DIR * dir = NULL;
- struct posix_fd * pfd = NULL;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
-
- /* other validations */
-
- dir = opendir (...);
-
- if (dir == NULL) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "opendir failed on %s (%s)", loc->path,
- strerror (op_errno));
- goto out;
- }
-
- /* another system call */
- if (...) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "out of memory :(");
- goto out;
- }
-
- /* ... */
-
- out:
- if (op_ret == -1) {
-
- /* check for all the cleanup that needs to be
- done */
-
- if (dir) {
- closedir (dir);
- dir = NULL;
- }
-
- if (pfd) {
- if (pfd->path)
- FREE (pfd->path);
- FREE (pfd);
- pfd = NULL;
- }
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, fd);
- return 0;
-}
-\end{verbatim}
-
-\end{document}
diff --git a/doc/legacy/errno.list.bsd.txt b/doc/legacy/errno.list.bsd.txt
deleted file mode 100644
index 350af25e4ab..00000000000
--- a/doc/legacy/errno.list.bsd.txt
+++ /dev/null
@@ -1,376 +0,0 @@
-/*-
- * Copyright (c) 1982, 1986, 1989, 1993
- * The Regents of the University of California. All rights reserved.
- * (c) UNIX System Laboratories, Inc.
- * All or some portions of this file are derived from material licensed
- * to the University of California by American Telephone and Telegraph
- * Co. or Unix System Laboratories, Inc. and are reproduced herein with
- * the permission of UNIX System Laboratories, Inc.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * @(#)errno.h 8.5 (Berkeley) 1/21/94
- * $FreeBSD: src/sys/sys/errno.h,v 1.28 2005/04/02 12:33:28 das Exp $
- */
-
-#ifndef _SYS_ERRNO_H_
-#define _SYS_ERRNO_H_
-
-#ifndef _KERNEL
-#include <sys/cdefs.h>
-__BEGIN_DECLS
-int * __error(void);
-__END_DECLS
-#define errno (* __error())
-#endif
-
-#define EPERM 1 /* Operation not permitted */
-#define ENOENT 2 /* No such file or directory */
-#define ESRCH 3 /* No such process */
-#define EINTR 4 /* Interrupted system call */
-#define EIO 5 /* Input/output error */
-#define ENXIO 6 /* Device not configured */
-#define E2BIG 7 /* Argument list too long */
-#define ENOEXEC 8 /* Exec format error */
-#define EBADF 9 /* Bad file descriptor */
-#define ECHILD 10 /* No child processes */
-#define EDEADLK 11 /* Resource deadlock avoided */
- /* 11 was EAGAIN */
-#define ENOMEM 12 /* Cannot allocate memory */
-#define EACCES 13 /* Permission denied */
-#define EFAULT 14 /* Bad address */
-#ifndef _POSIX_SOURCE
-#define ENOTBLK 15 /* Block device required */
-#endif
-#define EBUSY 16 /* Device busy */
-#define EEXIST 17 /* File exists */
-#define EXDEV 18 /* Cross-device link */
-#define ENODEV 19 /* Operation not supported by device */
-#define ENOTDIR 20 /* Not a directory */
-#define EISDIR 21 /* Is a directory */
-#define EINVAL 22 /* Invalid argument */
-#define ENFILE 23 /* Too many open files in system */
-#define EMFILE 24 /* Too many open files */
-#define ENOTTY 25 /* Inappropriate ioctl for device */
-#ifndef _POSIX_SOURCE
-#define ETXTBSY 26 /* Text file busy */
-#endif
-#define EFBIG 27 /* File too large */
-#define ENOSPC 28 /* No space left on device */
-#define ESPIPE 29 /* Illegal seek */
-#define EROFS 30 /* Read-only filesystem */
-#define EMLINK 31 /* Too many links */
-#define EPIPE 32 /* Broken pipe */
-
-/* math software */
-#define EDOM 33 /* Numerical argument out of domain */
-#define ERANGE 34 /* Result too large */
-
-/* non-blocking and interrupt i/o */
-#define EAGAIN 35 /* Resource temporarily unavailable */
-#ifndef _POSIX_SOURCE
-#define EWOULDBLOCK EAGAIN /* Operation would block */
-#define EINPROGRESS 36 /* Operation now in progress */
-#define EALREADY 37 /* Operation already in progress */
-
-/* ipc/network software -- argument errors */
-#define ENOTSOCK 38 /* Socket operation on non-socket */
-#define EDESTADDRREQ 39 /* Destination address required */
-#define EMSGSIZE 40 /* Message too long */
-#define EPROTOTYPE 41 /* Protocol wrong type for socket */
-#define ENOPROTOOPT 42 /* Protocol not available */
-#define EPROTONOSUPPORT 43 /* Protocol not supported */
-#define ESOCKTNOSUPPORT 44 /* Socket type not supported */
-#define EOPNOTSUPP 45 /* Operation not supported */
-#define ENOTSUP EOPNOTSUPP /* Operation not supported */
-#define EPFNOSUPPORT 46 /* Protocol family not supported */
-#define EAFNOSUPPORT 47 /* Address family not supported by protocol family */
-#define EADDRINUSE 48 /* Address already in use */
-#define EADDRNOTAVAIL 49 /* Can't assign requested address */
-
-/* ipc/network software -- operational errors */
-#define ENETDOWN 50 /* Network is down */
-#define ENETUNREACH 51 /* Network is unreachable */
-#define ENETRESET 52 /* Network dropped connection on reset */
-#define ECONNABORTED 53 /* Software caused connection abort */
-#define ECONNRESET 54 /* Connection reset by peer */
-#define ENOBUFS 55 /* No buffer space available */
-#define EISCONN 56 /* Socket is already connected */
-#define ENOTCONN 57 /* Socket is not connected */
-#define ESHUTDOWN 58 /* Can't send after socket shutdown */
-#define ETOOMANYREFS 59 /* Too many references: can't splice */
-#define ETIMEDOUT 60 /* Operation timed out */
-#define ECONNREFUSED 61 /* Connection refused */
-
-#define ELOOP 62 /* Too many levels of symbolic links */
-#endif /* _POSIX_SOURCE */
-#define ENAMETOOLONG 63 /* File name too long */
-
-/* should be rearranged */
-#ifndef _POSIX_SOURCE
-#define EHOSTDOWN 64 /* Host is down */
-#define EHOSTUNREACH 65 /* No route to host */
-#endif /* _POSIX_SOURCE */
-#define ENOTEMPTY 66 /* Directory not empty */
-
-/* quotas & mush */
-#ifndef _POSIX_SOURCE
-#define EPROCLIM 67 /* Too many processes */
-#define EUSERS 68 /* Too many users */
-#define EDQUOT 69 /* Disc quota exceeded */
-
-/* Network File System */
-#define ESTALE 70 /* Stale NFS file handle */
-#define EREMOTE 71 /* Too many levels of remote in path */
-#define EBADRPC 72 /* RPC struct is bad */
-#define ERPCMISMATCH 73 /* RPC version wrong */
-#define EPROGUNAVAIL 74 /* RPC prog. not avail */
-#define EPROGMISMATCH 75 /* Program version wrong */
-#define EPROCUNAVAIL 76 /* Bad procedure for program */
-#endif /* _POSIX_SOURCE */
-
-#define ENOLCK 77 /* No locks available */
-#define ENOSYS 78 /* Function not implemented */
-
-#ifndef _POSIX_SOURCE
-#define EFTYPE 79 /* Inappropriate file type or format */
-#define EAUTH 80 /* Authentication error */
-#define ENEEDAUTH 81 /* Need authenticator */
-#define EIDRM 82 /* Identifier removed */
-#define ENOMSG 83 /* No message of desired type */
-#define EOVERFLOW 84 /* Value too large to be stored in data type */
-#define ECANCELED 85 /* Operation canceled */
-#define EILSEQ 86 /* Illegal byte sequence */
-#define ENOATTR 87 /* Attribute not found */
-
-#define EDOOFUS 88 /* Programming error */
-#endif /* _POSIX_SOURCE */
-
-#define EBADMSG 89 /* Bad message */
-#define EMULTIHOP 90 /* Multihop attempted */
-#define ENOLINK 91 /* Link has been severed */
-#define EPROTO 92 /* Protocol error */
-
-#ifndef _POSIX_SOURCE
-#define ELAST 92 /* Must be equal largest errno */
-#endif /* _POSIX_SOURCE */
-
-#ifdef _KERNEL
-/* pseudo-errors returned inside kernel to modify return to process */
-#define ERESTART (-1) /* restart syscall */
-#define EJUSTRETURN (-2) /* don't modify regs, just return */
-#define ENOIOCTL (-3) /* ioctl not handled by this layer */
-#define EDIRIOCTL (-4) /* do direct ioctl in GEOM */
-#endif
-
-#endif
-/*-
- * Copyright (c) 1982, 1986, 1989, 1993
- * The Regents of the University of California. All rights reserved.
- * (c) UNIX System Laboratories, Inc.
- * All or some portions of this file are derived from material licensed
- * to the University of California by American Telephone and Telegraph
- * Co. or Unix System Laboratories, Inc. and are reproduced herein with
- * the permission of UNIX System Laboratories, Inc.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * @(#)errno.h 8.5 (Berkeley) 1/21/94
- * $FreeBSD: src/sys/sys/errno.h,v 1.28 2005/04/02 12:33:28 das Exp $
- */
-
-#ifndef _SYS_ERRNO_H_
-#define _SYS_ERRNO_H_
-
-#ifndef _KERNEL
-#include <sys/cdefs.h>
-__BEGIN_DECLS
-int * __error(void);
-__END_DECLS
-#define errno (* __error())
-#endif
-
-#define EPERM 1 /* Operation not permitted */
-#define ENOENT 2 /* No such file or directory */
-#define ESRCH 3 /* No such process */
-#define EINTR 4 /* Interrupted system call */
-#define EIO 5 /* Input/output error */
-#define ENXIO 6 /* Device not configured */
-#define E2BIG 7 /* Argument list too long */
-#define ENOEXEC 8 /* Exec format error */
-#define EBADF 9 /* Bad file descriptor */
-#define ECHILD 10 /* No child processes */
-#define EDEADLK 11 /* Resource deadlock avoided */
- /* 11 was EAGAIN */
-#define ENOMEM 12 /* Cannot allocate memory */
-#define EACCES 13 /* Permission denied */
-#define EFAULT 14 /* Bad address */
-#ifndef _POSIX_SOURCE
-#define ENOTBLK 15 /* Block device required */
-#endif
-#define EBUSY 16 /* Device busy */
-#define EEXIST 17 /* File exists */
-#define EXDEV 18 /* Cross-device link */
-#define ENODEV 19 /* Operation not supported by device */
-#define ENOTDIR 20 /* Not a directory */
-#define EISDIR 21 /* Is a directory */
-#define EINVAL 22 /* Invalid argument */
-#define ENFILE 23 /* Too many open files in system */
-#define EMFILE 24 /* Too many open files */
-#define ENOTTY 25 /* Inappropriate ioctl for device */
-#ifndef _POSIX_SOURCE
-#define ETXTBSY 26 /* Text file busy */
-#endif
-#define EFBIG 27 /* File too large */
-#define ENOSPC 28 /* No space left on device */
-#define ESPIPE 29 /* Illegal seek */
-#define EROFS 30 /* Read-only filesystem */
-#define EMLINK 31 /* Too many links */
-#define EPIPE 32 /* Broken pipe */
-
-/* math software */
-#define EDOM 33 /* Numerical argument out of domain */
-#define ERANGE 34 /* Result too large */
-
-/* non-blocking and interrupt i/o */
-#define EAGAIN 35 /* Resource temporarily unavailable */
-#ifndef _POSIX_SOURCE
-#define EWOULDBLOCK EAGAIN /* Operation would block */
-#define EINPROGRESS 36 /* Operation now in progress */
-#define EALREADY 37 /* Operation already in progress */
-
-/* ipc/network software -- argument errors */
-#define ENOTSOCK 38 /* Socket operation on non-socket */
-#define EDESTADDRREQ 39 /* Destination address required */
-#define EMSGSIZE 40 /* Message too long */
-#define EPROTOTYPE 41 /* Protocol wrong type for socket */
-#define ENOPROTOOPT 42 /* Protocol not available */
-#define EPROTONOSUPPORT 43 /* Protocol not supported */
-#define ESOCKTNOSUPPORT 44 /* Socket type not supported */
-#define EOPNOTSUPP 45 /* Operation not supported */
-#define ENOTSUP EOPNOTSUPP /* Operation not supported */
-#define EPFNOSUPPORT 46 /* Protocol family not supported */
-#define EAFNOSUPPORT 47 /* Address family not supported by protocol family */
-#define EADDRINUSE 48 /* Address already in use */
-#define EADDRNOTAVAIL 49 /* Can't assign requested address */
-
-/* ipc/network software -- operational errors */
-#define ENETDOWN 50 /* Network is down */
-#define ENETUNREACH 51 /* Network is unreachable */
-#define ENETRESET 52 /* Network dropped connection on reset */
-#define ECONNABORTED 53 /* Software caused connection abort */
-#define ECONNRESET 54 /* Connection reset by peer */
-#define ENOBUFS 55 /* No buffer space available */
-#define EISCONN 56 /* Socket is already connected */
-#define ENOTCONN 57 /* Socket is not connected */
-#define ESHUTDOWN 58 /* Can't send after socket shutdown */
-#define ETOOMANYREFS 59 /* Too many references: can't splice */
-#define ETIMEDOUT 60 /* Operation timed out */
-#define ECONNREFUSED 61 /* Connection refused */
-
-#define ELOOP 62 /* Too many levels of symbolic links */
-#endif /* _POSIX_SOURCE */
-#define ENAMETOOLONG 63 /* File name too long */
-
-/* should be rearranged */
-#ifndef _POSIX_SOURCE
-#define EHOSTDOWN 64 /* Host is down */
-#define EHOSTUNREACH 65 /* No route to host */
-#endif /* _POSIX_SOURCE */
-#define ENOTEMPTY 66 /* Directory not empty */
-
-/* quotas & mush */
-#ifndef _POSIX_SOURCE
-#define EPROCLIM 67 /* Too many processes */
-#define EUSERS 68 /* Too many users */
-#define EDQUOT 69 /* Disc quota exceeded */
-
-/* Network File System */
-#define ESTALE 70 /* Stale NFS file handle */
-#define EREMOTE 71 /* Too many levels of remote in path */
-#define EBADRPC 72 /* RPC struct is bad */
-#define ERPCMISMATCH 73 /* RPC version wrong */
-#define EPROGUNAVAIL 74 /* RPC prog. not avail */
-#define EPROGMISMATCH 75 /* Program version wrong */
-#define EPROCUNAVAIL 76 /* Bad procedure for program */
-#endif /* _POSIX_SOURCE */
-
-#define ENOLCK 77 /* No locks available */
-#define ENOSYS 78 /* Function not implemented */
-
-#ifndef _POSIX_SOURCE
-#define EFTYPE 79 /* Inappropriate file type or format */
-#define EAUTH 80 /* Authentication error */
-#define ENEEDAUTH 81 /* Need authenticator */
-#define EIDRM 82 /* Identifier removed */
-#define ENOMSG 83 /* No message of desired type */
-#define EOVERFLOW 84 /* Value too large to be stored in data type */
-#define ECANCELED 85 /* Operation canceled */
-#define EILSEQ 86 /* Illegal byte sequence */
-#define ENOATTR 87 /* Attribute not found */
-
-#define EDOOFUS 88 /* Programming error */
-#endif /* _POSIX_SOURCE */
-
-#define EBADMSG 89 /* Bad message */
-#define EMULTIHOP 90 /* Multihop attempted */
-#define ENOLINK 91 /* Link has been severed */
-#define EPROTO 92 /* Protocol error */
-
-#ifndef _POSIX_SOURCE
-#define ELAST 92 /* Must be equal largest errno */
-#endif /* _POSIX_SOURCE */
-
-#ifdef _KERNEL
-/* pseudo-errors returned inside kernel to modify return to process */
-#define ERESTART (-1) /* restart syscall */
-#define EJUSTRETURN (-2) /* don't modify regs, just return */
-#define ENOIOCTL (-3) /* ioctl not handled by this layer */
-#define EDIRIOCTL (-4) /* do direct ioctl in GEOM */
-#endif
-
-#endif
diff --git a/doc/legacy/errno.list.linux.txt b/doc/legacy/errno.list.linux.txt
deleted file mode 100644
index cc868644b36..00000000000
--- a/doc/legacy/errno.list.linux.txt
+++ /dev/null
@@ -1,1586 +0,0 @@
-#define ICONV_SUPPORTS_ERRNO 1
-#include <errno.h>
-/* Error constants. Linux specific version.
- Copyright (C) 1996, 1997, 1998, 1999, 2005 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
-
-#ifdef _ERRNO_H
-
-# undef EDOM
-# undef EILSEQ
-# undef ERANGE
-# include <linux/errno.h>
-
-/* Linux has no ENOTSUP error code. */
-# define ENOTSUP EOPNOTSUPP
-
-/* Older Linux versions also had no ECANCELED error code. */
-# ifndef ECANCELED
-# define ECANCELED 125
-# endif
-
-/* Support for error codes to support robust mutexes was added later, too. */
-# ifndef EOWNERDEAD
-# define EOWNERDEAD 130
-# define ENOTRECOVERABLE 131
-# endif
-
-# ifndef __ASSEMBLER__
-/* Function to get address of global `errno' variable. */
-extern int *__errno_location (void) __THROW __attribute__ ((__const__));
-
-# if !defined _LIBC || defined _LIBC_REENTRANT
-/* When using threads, errno is a per-thread value. */
-# define errno (*__errno_location ())
-# endif
-# endif /* !__ASSEMBLER__ */
-#endif /* _ERRNO_H */
-
-#if !defined _ERRNO_H && defined __need_Emath
-/* This is ugly but the kernel header is not clean enough. We must
- define only the values EDOM, EILSEQ and ERANGE in case __need_Emath is
- defined. */
-# define EDOM 33 /* Math argument out of domain of function. */
-# define EILSEQ 84 /* Illegal byte sequence. */
-# define ERANGE 34 /* Math result not representable. */
-#endif /* !_ERRNO_H && __need_Emath */
-/* Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef APR_ERRNO_H
-#define APR_ERRNO_H
-
-/**
- * @file apr_errno.h
- * @brief APR Error Codes
- */
-
-#include "apr.h"
-
-#if APR_HAVE_ERRNO_H
-#include <errno.h>
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/**
- * @defgroup apr_errno Error Codes
- * @ingroup APR
- * @{
- */
-
-/**
- * Type for specifying an error or status code.
- */
-typedef int apr_status_t;
-
-/**
- * Return a human readable string describing the specified error.
- * @param statcode The error code the get a string for.
- * @param buf A buffer to hold the error string.
- * @param bufsize Size of the buffer to hold the string.
- */
-APR_DECLARE(char *) apr_strerror(apr_status_t statcode, char *buf,
- apr_size_t bufsize);
-
-#if defined(DOXYGEN)
-/**
- * @def APR_FROM_OS_ERROR(os_err_type syserr)
- * Fold a platform specific error into an apr_status_t code.
- * @return apr_status_t
- * @param e The platform os error code.
- * @warning macro implementation; the syserr argument may be evaluated
- * multiple times.
- */
-#define APR_FROM_OS_ERROR(e) (e == 0 ? APR_SUCCESS : e + APR_OS_START_SYSERR)
-
-/**
- * @def APR_TO_OS_ERROR(apr_status_t statcode)
- * @return os_err_type
- * Fold an apr_status_t code back to the native platform defined error.
- * @param e The apr_status_t folded platform os error code.
- * @warning macro implementation; the statcode argument may be evaluated
- * multiple times. If the statcode was not created by apr_get_os_error
- * or APR_FROM_OS_ERROR, the results are undefined.
- */
-#define APR_TO_OS_ERROR(e) (e == 0 ? APR_SUCCESS : e - APR_OS_START_SYSERR)
-
-/** @def apr_get_os_error()
- * @return apr_status_t the last platform error, folded into apr_status_t, on most platforms
- * @remark This retrieves errno, or calls a GetLastError() style function, and
- * folds it with APR_FROM_OS_ERROR. Some platforms (such as OS2) have no
- * such mechanism, so this call may be unsupported. Do NOT use this
- * call for socket errors from socket, send, recv etc!
- */
-
-/** @def apr_set_os_error(e)
- * Reset the last platform error, unfolded from an apr_status_t, on some platforms
- * @param e The OS error folded in a prior call to APR_FROM_OS_ERROR()
- * @warning This is a macro implementation; the statcode argument may be evaluated
- * multiple times. If the statcode was not created by apr_get_os_error
- * or APR_FROM_OS_ERROR, the results are undefined. This macro sets
- * errno, or calls a SetLastError() style function, unfolding statcode
- * with APR_TO_OS_ERROR. Some platforms (such as OS2) have no such
- * mechanism, so this call may be unsupported.
- */
-
-/** @def apr_get_netos_error()
- * Return the last socket error, folded into apr_status_t, on all platforms
- * @remark This retrieves errno or calls a GetLastSocketError() style function,
- * and folds it with APR_FROM_OS_ERROR.
- */
-
-/** @def apr_set_netos_error(e)
- * Reset the last socket error, unfolded from an apr_status_t
- * @param e The socket error folded in a prior call to APR_FROM_OS_ERROR()
- * @warning This is a macro implementation; the statcode argument may be evaluated
- * multiple times. If the statcode was not created by apr_get_os_error
- * or APR_FROM_OS_ERROR, the results are undefined. This macro sets
- * errno, or calls a WSASetLastError() style function, unfolding
- * socketcode with APR_TO_OS_ERROR.
- */
-
-#endif /* defined(DOXYGEN) */
-
-/**
- * APR_OS_START_ERROR is where the APR specific error values start.
- */
-#define APR_OS_START_ERROR 20000
-/**
- * APR_OS_ERRSPACE_SIZE is the maximum number of errors you can fit
- * into one of the error/status ranges below -- except for
- * APR_OS_START_USERERR, which see.
- */
-#define APR_OS_ERRSPACE_SIZE 50000
-/**
- * APR_OS_START_STATUS is where the APR specific status codes start.
- */
-#define APR_OS_START_STATUS (APR_OS_START_ERROR + APR_OS_ERRSPACE_SIZE)
-/**
- * APR_OS_START_USERERR are reserved for applications that use APR that
- * layer their own error codes along with APR's. Note that the
- * error immediately following this one is set ten times farther
- * away than usual, so that users of apr have a lot of room in
- * which to declare custom error codes.
- */
-#define APR_OS_START_USERERR (APR_OS_START_STATUS + APR_OS_ERRSPACE_SIZE)
-/**
- * APR_OS_START_USEERR is obsolete, defined for compatibility only.
- * Use APR_OS_START_USERERR instead.
- */
-#define APR_OS_START_USEERR APR_OS_START_USERERR
-/**
- * APR_OS_START_CANONERR is where APR versions of errno values are defined
- * on systems which don't have the corresponding errno.
- */
-#define APR_OS_START_CANONERR (APR_OS_START_USERERR \
- + (APR_OS_ERRSPACE_SIZE * 10))
-/**
- * APR_OS_START_EAIERR folds EAI_ error codes from getaddrinfo() into
- * apr_status_t values.
- */
-#define APR_OS_START_EAIERR (APR_OS_START_CANONERR + APR_OS_ERRSPACE_SIZE)
-/**
- * APR_OS_START_SYSERR folds platform-specific system error values into
- * apr_status_t values.
- */
-#define APR_OS_START_SYSERR (APR_OS_START_EAIERR + APR_OS_ERRSPACE_SIZE)
-
-/** no error. */
-#define APR_SUCCESS 0
-
-/**
- * @defgroup APR_Error APR Error Values
- * <PRE>
- * <b>APR ERROR VALUES</b>
- * APR_ENOSTAT APR was unable to perform a stat on the file
- * APR_ENOPOOL APR was not provided a pool with which to allocate memory
- * APR_EBADDATE APR was given an invalid date
- * APR_EINVALSOCK APR was given an invalid socket
- * APR_ENOPROC APR was not given a process structure
- * APR_ENOTIME APR was not given a time structure
- * APR_ENODIR APR was not given a directory structure
- * APR_ENOLOCK APR was not given a lock structure
- * APR_ENOPOLL APR was not given a poll structure
- * APR_ENOSOCKET APR was not given a socket
- * APR_ENOTHREAD APR was not given a thread structure
- * APR_ENOTHDKEY APR was not given a thread key structure
- * APR_ENOSHMAVAIL There is no more shared memory available
- * APR_EDSOOPEN APR was unable to open the dso object. For more
- * information call apr_dso_error().
- * APR_EGENERAL General failure (specific information not available)
- * APR_EBADIP The specified IP address is invalid
- * APR_EBADMASK The specified netmask is invalid
- * APR_ESYMNOTFOUND Could not find the requested symbol
- * </PRE>
- *
- * <PRE>
- * <b>APR STATUS VALUES</b>
- * APR_INCHILD Program is currently executing in the child
- * APR_INPARENT Program is currently executing in the parent
- * APR_DETACH The thread is detached
- * APR_NOTDETACH The thread is not detached
- * APR_CHILD_DONE The child has finished executing
- * APR_CHILD_NOTDONE The child has not finished executing
- * APR_TIMEUP The operation did not finish before the timeout
- * APR_INCOMPLETE The operation was incomplete although some processing
- * was performed and the results are partially valid
- * APR_BADCH Getopt found an option not in the option string
- * APR_BADARG Getopt found an option that is missing an argument
- * and an argument was specified in the option string
- * APR_EOF APR has encountered the end of the file
- * APR_NOTFOUND APR was unable to find the socket in the poll structure
- * APR_ANONYMOUS APR is using anonymous shared memory
- * APR_FILEBASED APR is using a file name as the key to the shared memory
- * APR_KEYBASED APR is using a shared key as the key to the shared memory
- * APR_EINIT Ininitalizer value. If no option has been found, but
- * the status variable requires a value, this should be used
- * APR_ENOTIMPL The APR function has not been implemented on this
- * platform, either because nobody has gotten to it yet,
- * or the function is impossible on this platform.
- * APR_EMISMATCH Two passwords do not match.
- * APR_EABSOLUTE The given path was absolute.
- * APR_ERELATIVE The given path was relative.
- * APR_EINCOMPLETE The given path was neither relative nor absolute.
- * APR_EABOVEROOT The given path was above the root path.
- * APR_EBUSY The given lock was busy.
- * APR_EPROC_UNKNOWN The given process wasn't recognized by APR
- * </PRE>
- * @{
- */
-/** @see APR_STATUS_IS_ENOSTAT */
-#define APR_ENOSTAT (APR_OS_START_ERROR + 1)
-/** @see APR_STATUS_IS_ENOPOOL */
-#define APR_ENOPOOL (APR_OS_START_ERROR + 2)
-/* empty slot: +3 */
-/** @see APR_STATUS_IS_EBADDATE */
-#define APR_EBADDATE (APR_OS_START_ERROR + 4)
-/** @see APR_STATUS_IS_EINVALSOCK */
-#define APR_EINVALSOCK (APR_OS_START_ERROR + 5)
-/** @see APR_STATUS_IS_ENOPROC */
-#define APR_ENOPROC (APR_OS_START_ERROR + 6)
-/** @see APR_STATUS_IS_ENOTIME */
-#define APR_ENOTIME (APR_OS_START_ERROR + 7)
-/** @see APR_STATUS_IS_ENODIR */
-#define APR_ENODIR (APR_OS_START_ERROR + 8)
-/** @see APR_STATUS_IS_ENOLOCK */
-#define APR_ENOLOCK (APR_OS_START_ERROR + 9)
-/** @see APR_STATUS_IS_ENOPOLL */
-#define APR_ENOPOLL (APR_OS_START_ERROR + 10)
-/** @see APR_STATUS_IS_ENOSOCKET */
-#define APR_ENOSOCKET (APR_OS_START_ERROR + 11)
-/** @see APR_STATUS_IS_ENOTHREAD */
-#define APR_ENOTHREAD (APR_OS_START_ERROR + 12)
-/** @see APR_STATUS_IS_ENOTHDKEY */
-#define APR_ENOTHDKEY (APR_OS_START_ERROR + 13)
-/** @see APR_STATUS_IS_EGENERAL */
-#define APR_EGENERAL (APR_OS_START_ERROR + 14)
-/** @see APR_STATUS_IS_ENOSHMAVAIL */
-#define APR_ENOSHMAVAIL (APR_OS_START_ERROR + 15)
-/** @see APR_STATUS_IS_EBADIP */
-#define APR_EBADIP (APR_OS_START_ERROR + 16)
-/** @see APR_STATUS_IS_EBADMASK */
-#define APR_EBADMASK (APR_OS_START_ERROR + 17)
-/* empty slot: +18 */
-/** @see APR_STATUS_IS_EDSOPEN */
-#define APR_EDSOOPEN (APR_OS_START_ERROR + 19)
-/** @see APR_STATUS_IS_EABSOLUTE */
-#define APR_EABSOLUTE (APR_OS_START_ERROR + 20)
-/** @see APR_STATUS_IS_ERELATIVE */
-#define APR_ERELATIVE (APR_OS_START_ERROR + 21)
-/** @see APR_STATUS_IS_EINCOMPLETE */
-#define APR_EINCOMPLETE (APR_OS_START_ERROR + 22)
-/** @see APR_STATUS_IS_EABOVEROOT */
-#define APR_EABOVEROOT (APR_OS_START_ERROR + 23)
-/** @see APR_STATUS_IS_EBADPATH */
-#define APR_EBADPATH (APR_OS_START_ERROR + 24)
-/** @see APR_STATUS_IS_EPATHWILD */
-#define APR_EPATHWILD (APR_OS_START_ERROR + 25)
-/** @see APR_STATUS_IS_ESYMNOTFOUND */
-#define APR_ESYMNOTFOUND (APR_OS_START_ERROR + 26)
-/** @see APR_STATUS_IS_EPROC_UNKNOWN */
-#define APR_EPROC_UNKNOWN (APR_OS_START_ERROR + 27)
-/** @see APR_STATUS_IS_ENOTENOUGHENTROPY */
-#define APR_ENOTENOUGHENTROPY (APR_OS_START_ERROR + 28)
-/** @} */
-
-/**
- * @defgroup APR_STATUS_IS Status Value Tests
- * @warning For any particular error condition, more than one of these tests
- * may match. This is because platform-specific error codes may not
- * always match the semantics of the POSIX codes these tests (and the
- * corresponding APR error codes) are named after. A notable example
- * are the APR_STATUS_IS_ENOENT and APR_STATUS_IS_ENOTDIR tests on
- * Win32 platforms. The programmer should always be aware of this and
- * adjust the order of the tests accordingly.
- * @{
- */
-/**
- * APR was unable to perform a stat on the file
- * @warning always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_ENOSTAT(s) ((s) == APR_ENOSTAT)
-/**
- * APR was not provided a pool with which to allocate memory
- * @warning always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_ENOPOOL(s) ((s) == APR_ENOPOOL)
-/** APR was given an invalid date */
-#define APR_STATUS_IS_EBADDATE(s) ((s) == APR_EBADDATE)
-/** APR was given an invalid socket */
-#define APR_STATUS_IS_EINVALSOCK(s) ((s) == APR_EINVALSOCK)
-/** APR was not given a process structure */
-#define APR_STATUS_IS_ENOPROC(s) ((s) == APR_ENOPROC)
-/** APR was not given a time structure */
-#define APR_STATUS_IS_ENOTIME(s) ((s) == APR_ENOTIME)
-/** APR was not given a directory structure */
-#define APR_STATUS_IS_ENODIR(s) ((s) == APR_ENODIR)
-/** APR was not given a lock structure */
-#define APR_STATUS_IS_ENOLOCK(s) ((s) == APR_ENOLOCK)
-/** APR was not given a poll structure */
-#define APR_STATUS_IS_ENOPOLL(s) ((s) == APR_ENOPOLL)
-/** APR was not given a socket */
-#define APR_STATUS_IS_ENOSOCKET(s) ((s) == APR_ENOSOCKET)
-/** APR was not given a thread structure */
-#define APR_STATUS_IS_ENOTHREAD(s) ((s) == APR_ENOTHREAD)
-/** APR was not given a thread key structure */
-#define APR_STATUS_IS_ENOTHDKEY(s) ((s) == APR_ENOTHDKEY)
-/** Generic Error which can not be put into another spot */
-#define APR_STATUS_IS_EGENERAL(s) ((s) == APR_EGENERAL)
-/** There is no more shared memory available */
-#define APR_STATUS_IS_ENOSHMAVAIL(s) ((s) == APR_ENOSHMAVAIL)
-/** The specified IP address is invalid */
-#define APR_STATUS_IS_EBADIP(s) ((s) == APR_EBADIP)
-/** The specified netmask is invalid */
-#define APR_STATUS_IS_EBADMASK(s) ((s) == APR_EBADMASK)
-/* empty slot: +18 */
-/**
- * APR was unable to open the dso object.
- * For more information call apr_dso_error().
- */
-#if defined(WIN32)
-#define APR_STATUS_IS_EDSOOPEN(s) ((s) == APR_EDSOOPEN \
- || APR_TO_OS_ERROR(s) == ERROR_MOD_NOT_FOUND)
-#else
-#define APR_STATUS_IS_EDSOOPEN(s) ((s) == APR_EDSOOPEN)
-#endif
-/** The given path was absolute. */
-#define APR_STATUS_IS_EABSOLUTE(s) ((s) == APR_EABSOLUTE)
-/** The given path was relative. */
-#define APR_STATUS_IS_ERELATIVE(s) ((s) == APR_ERELATIVE)
-/** The given path was neither relative nor absolute. */
-#define APR_STATUS_IS_EINCOMPLETE(s) ((s) == APR_EINCOMPLETE)
-/** The given path was above the root path. */
-#define APR_STATUS_IS_EABOVEROOT(s) ((s) == APR_EABOVEROOT)
-/** The given path was bad. */
-#define APR_STATUS_IS_EBADPATH(s) ((s) == APR_EBADPATH)
-/** The given path contained wildcards. */
-#define APR_STATUS_IS_EPATHWILD(s) ((s) == APR_EPATHWILD)
-/** Could not find the requested symbol.
- * For more information call apr_dso_error().
- */
-#if defined(WIN32)
-#define APR_STATUS_IS_ESYMNOTFOUND(s) ((s) == APR_ESYMNOTFOUND \
- || APR_TO_OS_ERROR(s) == ERROR_PROC_NOT_FOUND)
-#else
-#define APR_STATUS_IS_ESYMNOTFOUND(s) ((s) == APR_ESYMNOTFOUND)
-#endif
-/** The given process was not recognized by APR. */
-#define APR_STATUS_IS_EPROC_UNKNOWN(s) ((s) == APR_EPROC_UNKNOWN)
-
-/** APR could not gather enough entropy to continue. */
-#define APR_STATUS_IS_ENOTENOUGHENTROPY(s) ((s) == APR_ENOTENOUGHENTROPY)
-
-/** @} */
-
-/**
- * @addtogroup APR_Error
- * @{
- */
-/** @see APR_STATUS_IS_INCHILD */
-#define APR_INCHILD (APR_OS_START_STATUS + 1)
-/** @see APR_STATUS_IS_INPARENT */
-#define APR_INPARENT (APR_OS_START_STATUS + 2)
-/** @see APR_STATUS_IS_DETACH */
-#define APR_DETACH (APR_OS_START_STATUS + 3)
-/** @see APR_STATUS_IS_NOTDETACH */
-#define APR_NOTDETACH (APR_OS_START_STATUS + 4)
-/** @see APR_STATUS_IS_CHILD_DONE */
-#define APR_CHILD_DONE (APR_OS_START_STATUS + 5)
-/** @see APR_STATUS_IS_CHILD_NOTDONE */
-#define APR_CHILD_NOTDONE (APR_OS_START_STATUS + 6)
-/** @see APR_STATUS_IS_TIMEUP */
-#define APR_TIMEUP (APR_OS_START_STATUS + 7)
-/** @see APR_STATUS_IS_INCOMPLETE */
-#define APR_INCOMPLETE (APR_OS_START_STATUS + 8)
-/* empty slot: +9 */
-/* empty slot: +10 */
-/* empty slot: +11 */
-/** @see APR_STATUS_IS_BADCH */
-#define APR_BADCH (APR_OS_START_STATUS + 12)
-/** @see APR_STATUS_IS_BADARG */
-#define APR_BADARG (APR_OS_START_STATUS + 13)
-/** @see APR_STATUS_IS_EOF */
-#define APR_EOF (APR_OS_START_STATUS + 14)
-/** @see APR_STATUS_IS_NOTFOUND */
-#define APR_NOTFOUND (APR_OS_START_STATUS + 15)
-/* empty slot: +16 */
-/* empty slot: +17 */
-/* empty slot: +18 */
-/** @see APR_STATUS_IS_ANONYMOUS */
-#define APR_ANONYMOUS (APR_OS_START_STATUS + 19)
-/** @see APR_STATUS_IS_FILEBASED */
-#define APR_FILEBASED (APR_OS_START_STATUS + 20)
-/** @see APR_STATUS_IS_KEYBASED */
-#define APR_KEYBASED (APR_OS_START_STATUS + 21)
-/** @see APR_STATUS_IS_EINIT */
-#define APR_EINIT (APR_OS_START_STATUS + 22)
-/** @see APR_STATUS_IS_ENOTIMPL */
-#define APR_ENOTIMPL (APR_OS_START_STATUS + 23)
-/** @see APR_STATUS_IS_EMISMATCH */
-#define APR_EMISMATCH (APR_OS_START_STATUS + 24)
-/** @see APR_STATUS_IS_EBUSY */
-#define APR_EBUSY (APR_OS_START_STATUS + 25)
-/** @} */
-
-/**
- * @addtogroup APR_STATUS_IS
- * @{
- */
-/**
- * Program is currently executing in the child
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code */
-#define APR_STATUS_IS_INCHILD(s) ((s) == APR_INCHILD)
-/**
- * Program is currently executing in the parent
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_INPARENT(s) ((s) == APR_INPARENT)
-/**
- * The thread is detached
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_DETACH(s) ((s) == APR_DETACH)
-/**
- * The thread is not detached
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_NOTDETACH(s) ((s) == APR_NOTDETACH)
-/**
- * The child has finished executing
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_CHILD_DONE(s) ((s) == APR_CHILD_DONE)
-/**
- * The child has not finished executing
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_CHILD_NOTDONE(s) ((s) == APR_CHILD_NOTDONE)
-/**
- * The operation did not finish before the timeout
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_TIMEUP(s) ((s) == APR_TIMEUP)
-/**
- * The operation was incomplete although some processing was performed
- * and the results are partially valid.
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_INCOMPLETE(s) ((s) == APR_INCOMPLETE)
-/* empty slot: +9 */
-/* empty slot: +10 */
-/* empty slot: +11 */
-/**
- * Getopt found an option not in the option string
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_BADCH(s) ((s) == APR_BADCH)
-/**
- * Getopt found an option not in the option string and an argument was
- * specified in the option string
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_BADARG(s) ((s) == APR_BADARG)
-/**
- * APR has encountered the end of the file
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_EOF(s) ((s) == APR_EOF)
-/**
- * APR was unable to find the socket in the poll structure
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_NOTFOUND(s) ((s) == APR_NOTFOUND)
-/* empty slot: +16 */
-/* empty slot: +17 */
-/* empty slot: +18 */
-/**
- * APR is using anonymous shared memory
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_ANONYMOUS(s) ((s) == APR_ANONYMOUS)
-/**
- * APR is using a file name as the key to the shared memory
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_FILEBASED(s) ((s) == APR_FILEBASED)
-/**
- * APR is using a shared key as the key to the shared memory
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_KEYBASED(s) ((s) == APR_KEYBASED)
-/**
- * Ininitalizer value. If no option has been found, but
- * the status variable requires a value, this should be used
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_EINIT(s) ((s) == APR_EINIT)
-/**
- * The APR function has not been implemented on this
- * platform, either because nobody has gotten to it yet,
- * or the function is impossible on this platform.
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_ENOTIMPL(s) ((s) == APR_ENOTIMPL)
-/**
- * Two passwords do not match.
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_EMISMATCH(s) ((s) == APR_EMISMATCH)
-/**
- * The given lock was busy
- * @warning always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_EBUSY(s) ((s) == APR_EBUSY)
-
-/** @} */
-
-/**
- * @addtogroup APR_Error APR Error Values
- * @{
- */
-/* APR CANONICAL ERROR VALUES */
-/** @see APR_STATUS_IS_EACCES */
-#ifdef EACCES
-#define APR_EACCES EACCES
-#else
-#define APR_EACCES (APR_OS_START_CANONERR + 1)
-#endif
-
-/** @see APR_STATUS_IS_EXIST */
-#ifdef EEXIST
-#define APR_EEXIST EEXIST
-#else
-#define APR_EEXIST (APR_OS_START_CANONERR + 2)
-#endif
-
-/** @see APR_STATUS_IS_ENAMETOOLONG */
-#ifdef ENAMETOOLONG
-#define APR_ENAMETOOLONG ENAMETOOLONG
-#else
-#define APR_ENAMETOOLONG (APR_OS_START_CANONERR + 3)
-#endif
-
-/** @see APR_STATUS_IS_ENOENT */
-#ifdef ENOENT
-#define APR_ENOENT ENOENT
-#else
-#define APR_ENOENT (APR_OS_START_CANONERR + 4)
-#endif
-
-/** @see APR_STATUS_IS_ENOTDIR */
-#ifdef ENOTDIR
-#define APR_ENOTDIR ENOTDIR
-#else
-#define APR_ENOTDIR (APR_OS_START_CANONERR + 5)
-#endif
-
-/** @see APR_STATUS_IS_ENOSPC */
-#ifdef ENOSPC
-#define APR_ENOSPC ENOSPC
-#else
-#define APR_ENOSPC (APR_OS_START_CANONERR + 6)
-#endif
-
-/** @see APR_STATUS_IS_ENOMEM */
-#ifdef ENOMEM
-#define APR_ENOMEM ENOMEM
-#else
-#define APR_ENOMEM (APR_OS_START_CANONERR + 7)
-#endif
-
-/** @see APR_STATUS_IS_EMFILE */
-#ifdef EMFILE
-#define APR_EMFILE EMFILE
-#else
-#define APR_EMFILE (APR_OS_START_CANONERR + 8)
-#endif
-
-/** @see APR_STATUS_IS_ENFILE */
-#ifdef ENFILE
-#define APR_ENFILE ENFILE
-#else
-#define APR_ENFILE (APR_OS_START_CANONERR + 9)
-#endif
-
-/** @see APR_STATUS_IS_EBADF */
-#ifdef EBADF
-#define APR_EBADF EBADF
-#else
-#define APR_EBADF (APR_OS_START_CANONERR + 10)
-#endif
-
-/** @see APR_STATUS_IS_EINVAL */
-#ifdef EINVAL
-#define APR_EINVAL EINVAL
-#else
-#define APR_EINVAL (APR_OS_START_CANONERR + 11)
-#endif
-
-/** @see APR_STATUS_IS_ESPIPE */
-#ifdef ESPIPE
-#define APR_ESPIPE ESPIPE
-#else
-#define APR_ESPIPE (APR_OS_START_CANONERR + 12)
-#endif
-
-/**
- * @see APR_STATUS_IS_EAGAIN
- * @warning use APR_STATUS_IS_EAGAIN instead of just testing this value
- */
-#ifdef EAGAIN
-#define APR_EAGAIN EAGAIN
-#elif defined(EWOULDBLOCK)
-#define APR_EAGAIN EWOULDBLOCK
-#else
-#define APR_EAGAIN (APR_OS_START_CANONERR + 13)
-#endif
-
-/** @see APR_STATUS_IS_EINTR */
-#ifdef EINTR
-#define APR_EINTR EINTR
-#else
-#define APR_EINTR (APR_OS_START_CANONERR + 14)
-#endif
-
-/** @see APR_STATUS_IS_ENOTSOCK */
-#ifdef ENOTSOCK
-#define APR_ENOTSOCK ENOTSOCK
-#else
-#define APR_ENOTSOCK (APR_OS_START_CANONERR + 15)
-#endif
-
-/** @see APR_STATUS_IS_ECONNREFUSED */
-#ifdef ECONNREFUSED
-#define APR_ECONNREFUSED ECONNREFUSED
-#else
-#define APR_ECONNREFUSED (APR_OS_START_CANONERR + 16)
-#endif
-
-/** @see APR_STATUS_IS_EINPROGRESS */
-#ifdef EINPROGRESS
-#define APR_EINPROGRESS EINPROGRESS
-#else
-#define APR_EINPROGRESS (APR_OS_START_CANONERR + 17)
-#endif
-
-/**
- * @see APR_STATUS_IS_ECONNABORTED
- * @warning use APR_STATUS_IS_ECONNABORTED instead of just testing this value
- */
-
-#ifdef ECONNABORTED
-#define APR_ECONNABORTED ECONNABORTED
-#else
-#define APR_ECONNABORTED (APR_OS_START_CANONERR + 18)
-#endif
-
-/** @see APR_STATUS_IS_ECONNRESET */
-#ifdef ECONNRESET
-#define APR_ECONNRESET ECONNRESET
-#else
-#define APR_ECONNRESET (APR_OS_START_CANONERR + 19)
-#endif
-
-/** @see APR_STATUS_IS_ETIMEDOUT
- * @deprecated */
-#ifdef ETIMEDOUT
-#define APR_ETIMEDOUT ETIMEDOUT
-#else
-#define APR_ETIMEDOUT (APR_OS_START_CANONERR + 20)
-#endif
-
-/** @see APR_STATUS_IS_EHOSTUNREACH */
-#ifdef EHOSTUNREACH
-#define APR_EHOSTUNREACH EHOSTUNREACH
-#else
-#define APR_EHOSTUNREACH (APR_OS_START_CANONERR + 21)
-#endif
-
-/** @see APR_STATUS_IS_ENETUNREACH */
-#ifdef ENETUNREACH
-#define APR_ENETUNREACH ENETUNREACH
-#else
-#define APR_ENETUNREACH (APR_OS_START_CANONERR + 22)
-#endif
-
-/** @see APR_STATUS_IS_EFTYPE */
-#ifdef EFTYPE
-#define APR_EFTYPE EFTYPE
-#else
-#define APR_EFTYPE (APR_OS_START_CANONERR + 23)
-#endif
-
-/** @see APR_STATUS_IS_EPIPE */
-#ifdef EPIPE
-#define APR_EPIPE EPIPE
-#else
-#define APR_EPIPE (APR_OS_START_CANONERR + 24)
-#endif
-
-/** @see APR_STATUS_IS_EXDEV */
-#ifdef EXDEV
-#define APR_EXDEV EXDEV
-#else
-#define APR_EXDEV (APR_OS_START_CANONERR + 25)
-#endif
-
-/** @see APR_STATUS_IS_ENOTEMPTY */
-#ifdef ENOTEMPTY
-#define APR_ENOTEMPTY ENOTEMPTY
-#else
-#define APR_ENOTEMPTY (APR_OS_START_CANONERR + 26)
-#endif
-
-/** @} */
-
-#if defined(OS2) && !defined(DOXYGEN)
-
-#define APR_FROM_OS_ERROR(e) (e == 0 ? APR_SUCCESS : e + APR_OS_START_SYSERR)
-#define APR_TO_OS_ERROR(e) (e == 0 ? APR_SUCCESS : e - APR_OS_START_SYSERR)
-
-#define INCL_DOSERRORS
-#define INCL_DOS
-
-/* Leave these undefined.
- * OS2 doesn't rely on the errno concept.
- * The API calls always return a result codes which
- * should be filtered through APR_FROM_OS_ERROR().
- *
- * #define apr_get_os_error() (APR_FROM_OS_ERROR(GetLastError()))
- * #define apr_set_os_error(e) (SetLastError(APR_TO_OS_ERROR(e)))
- */
-
-/* A special case, only socket calls require this;
- */
-#define apr_get_netos_error() (APR_FROM_OS_ERROR(errno))
-#define apr_set_netos_error(e) (errno = APR_TO_OS_ERROR(e))
-
-/* And this needs to be greped away for good:
- */
-#define APR_OS2_STATUS(e) (APR_FROM_OS_ERROR(e))
-
-/* These can't sit in a private header, so in spite of the extra size,
- * they need to be made available here.
- */
-#define SOCBASEERR 10000
-#define SOCEPERM (SOCBASEERR+1) /* Not owner */
-#define SOCESRCH (SOCBASEERR+3) /* No such process */
-#define SOCEINTR (SOCBASEERR+4) /* Interrupted system call */
-#define SOCENXIO (SOCBASEERR+6) /* No such device or address */
-#define SOCEBADF (SOCBASEERR+9) /* Bad file number */
-#define SOCEACCES (SOCBASEERR+13) /* Permission denied */
-#define SOCEFAULT (SOCBASEERR+14) /* Bad address */
-#define SOCEINVAL (SOCBASEERR+22) /* Invalid argument */
-#define SOCEMFILE (SOCBASEERR+24) /* Too many open files */
-#define SOCEPIPE (SOCBASEERR+32) /* Broken pipe */
-#define SOCEOS2ERR (SOCBASEERR+100) /* OS/2 Error */
-#define SOCEWOULDBLOCK (SOCBASEERR+35) /* Operation would block */
-#define SOCEINPROGRESS (SOCBASEERR+36) /* Operation now in progress */
-#define SOCEALREADY (SOCBASEERR+37) /* Operation already in progress */
-#define SOCENOTSOCK (SOCBASEERR+38) /* Socket operation on non-socket */
-#define SOCEDESTADDRREQ (SOCBASEERR+39) /* Destination address required */
-#define SOCEMSGSIZE (SOCBASEERR+40) /* Message too long */
-#define SOCEPROTOTYPE (SOCBASEERR+41) /* Protocol wrong type for socket */
-#define SOCENOPROTOOPT (SOCBASEERR+42) /* Protocol not available */
-#define SOCEPROTONOSUPPORT (SOCBASEERR+43) /* Protocol not supported */
-#define SOCESOCKTNOSUPPORT (SOCBASEERR+44) /* Socket type not supported */
-#define SOCEOPNOTSUPP (SOCBASEERR+45) /* Operation not supported on socket */
-#define SOCEPFNOSUPPORT (SOCBASEERR+46) /* Protocol family not supported */
-#define SOCEAFNOSUPPORT (SOCBASEERR+47) /* Address family not supported by protocol family */
-#define SOCEADDRINUSE (SOCBASEERR+48) /* Address already in use */
-#define SOCEADDRNOTAVAIL (SOCBASEERR+49) /* Can't assign requested address */
-#define SOCENETDOWN (SOCBASEERR+50) /* Network is down */
-#define SOCENETUNREACH (SOCBASEERR+51) /* Network is unreachable */
-#define SOCENETRESET (SOCBASEERR+52) /* Network dropped connection on reset */
-#define SOCECONNABORTED (SOCBASEERR+53) /* Software caused connection abort */
-#define SOCECONNRESET (SOCBASEERR+54) /* Connection reset by peer */
-#define SOCENOBUFS (SOCBASEERR+55) /* No buffer space available */
-#define SOCEISCONN (SOCBASEERR+56) /* Socket is already connected */
-#define SOCENOTCONN (SOCBASEERR+57) /* Socket is not connected */
-#define SOCESHUTDOWN (SOCBASEERR+58) /* Can't send after socket shutdown */
-#define SOCETOOMANYREFS (SOCBASEERR+59) /* Too many references: can't splice */
-#define SOCETIMEDOUT (SOCBASEERR+60) /* Connection timed out */
-#define SOCECONNREFUSED (SOCBASEERR+61) /* Connection refused */
-#define SOCELOOP (SOCBASEERR+62) /* Too many levels of symbolic links */
-#define SOCENAMETOOLONG (SOCBASEERR+63) /* File name too long */
-#define SOCEHOSTDOWN (SOCBASEERR+64) /* Host is down */
-#define SOCEHOSTUNREACH (SOCBASEERR+65) /* No route to host */
-#define SOCENOTEMPTY (SOCBASEERR+66) /* Directory not empty */
-
-/* APR CANONICAL ERROR TESTS */
-#define APR_STATUS_IS_EACCES(s) ((s) == APR_EACCES \
- || (s) == APR_OS_START_SYSERR + ERROR_ACCESS_DENIED \
- || (s) == APR_OS_START_SYSERR + ERROR_SHARING_VIOLATION)
-#define APR_STATUS_IS_EEXIST(s) ((s) == APR_EEXIST \
- || (s) == APR_OS_START_SYSERR + ERROR_OPEN_FAILED \
- || (s) == APR_OS_START_SYSERR + ERROR_FILE_EXISTS \
- || (s) == APR_OS_START_SYSERR + ERROR_ALREADY_EXISTS \
- || (s) == APR_OS_START_SYSERR + ERROR_ACCESS_DENIED)
-#define APR_STATUS_IS_ENAMETOOLONG(s) ((s) == APR_ENAMETOOLONG \
- || (s) == APR_OS_START_SYSERR + ERROR_FILENAME_EXCED_RANGE \
- || (s) == APR_OS_START_SYSERR + SOCENAMETOOLONG)
-#define APR_STATUS_IS_ENOENT(s) ((s) == APR_ENOENT \
- || (s) == APR_OS_START_SYSERR + ERROR_FILE_NOT_FOUND \
- || (s) == APR_OS_START_SYSERR + ERROR_PATH_NOT_FOUND \
- || (s) == APR_OS_START_SYSERR + ERROR_NO_MORE_FILES \
- || (s) == APR_OS_START_SYSERR + ERROR_OPEN_FAILED)
-#define APR_STATUS_IS_ENOTDIR(s) ((s) == APR_ENOTDIR)
-#define APR_STATUS_IS_ENOSPC(s) ((s) == APR_ENOSPC \
- || (s) == APR_OS_START_SYSERR + ERROR_DISK_FULL)
-#define APR_STATUS_IS_ENOMEM(s) ((s) == APR_ENOMEM)
-#define APR_STATUS_IS_EMFILE(s) ((s) == APR_EMFILE \
- || (s) == APR_OS_START_SYSERR + ERROR_TOO_MANY_OPEN_FILES)
-#define APR_STATUS_IS_ENFILE(s) ((s) == APR_ENFILE)
-#define APR_STATUS_IS_EBADF(s) ((s) == APR_EBADF \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_HANDLE)
-#define APR_STATUS_IS_EINVAL(s) ((s) == APR_EINVAL \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_PARAMETER \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_FUNCTION)
-#define APR_STATUS_IS_ESPIPE(s) ((s) == APR_ESPIPE \
- || (s) == APR_OS_START_SYSERR + ERROR_NEGATIVE_SEEK)
-#define APR_STATUS_IS_EAGAIN(s) ((s) == APR_EAGAIN \
- || (s) == APR_OS_START_SYSERR + ERROR_NO_DATA \
- || (s) == APR_OS_START_SYSERR + SOCEWOULDBLOCK \
- || (s) == APR_OS_START_SYSERR + ERROR_LOCK_VIOLATION)
-#define APR_STATUS_IS_EINTR(s) ((s) == APR_EINTR \
- || (s) == APR_OS_START_SYSERR + SOCEINTR)
-#define APR_STATUS_IS_ENOTSOCK(s) ((s) == APR_ENOTSOCK \
- || (s) == APR_OS_START_SYSERR + SOCENOTSOCK)
-#define APR_STATUS_IS_ECONNREFUSED(s) ((s) == APR_ECONNREFUSED \
- || (s) == APR_OS_START_SYSERR + SOCECONNREFUSED)
-#define APR_STATUS_IS_EINPROGRESS(s) ((s) == APR_EINPROGRESS \
- || (s) == APR_OS_START_SYSERR + SOCEINPROGRESS)
-#define APR_STATUS_IS_ECONNABORTED(s) ((s) == APR_ECONNABORTED \
- || (s) == APR_OS_START_SYSERR + SOCECONNABORTED)
-#define APR_STATUS_IS_ECONNRESET(s) ((s) == APR_ECONNRESET \
- || (s) == APR_OS_START_SYSERR + SOCECONNRESET)
-/* XXX deprecated */
-#define APR_STATUS_IS_ETIMEDOUT(s) ((s) == APR_ETIMEDOUT \
- || (s) == APR_OS_START_SYSERR + SOCETIMEDOUT)
-#undef APR_STATUS_IS_TIMEUP
-#define APR_STATUS_IS_TIMEUP(s) ((s) == APR_TIMEUP \
- || (s) == APR_OS_START_SYSERR + SOCETIMEDOUT)
-#define APR_STATUS_IS_EHOSTUNREACH(s) ((s) == APR_EHOSTUNREACH \
- || (s) == APR_OS_START_SYSERR + SOCEHOSTUNREACH)
-#define APR_STATUS_IS_ENETUNREACH(s) ((s) == APR_ENETUNREACH \
- || (s) == APR_OS_START_SYSERR + SOCENETUNREACH)
-#define APR_STATUS_IS_EFTYPE(s) ((s) == APR_EFTYPE)
-#define APR_STATUS_IS_EPIPE(s) ((s) == APR_EPIPE \
- || (s) == APR_OS_START_SYSERR + ERROR_BROKEN_PIPE \
- || (s) == APR_OS_START_SYSERR + SOCEPIPE)
-#define APR_STATUS_IS_EXDEV(s) ((s) == APR_EXDEV \
- || (s) == APR_OS_START_SYSERR + ERROR_NOT_SAME_DEVICE)
-#define APR_STATUS_IS_ENOTEMPTY(s) ((s) == APR_ENOTEMPTY \
- || (s) == APR_OS_START_SYSERR + ERROR_DIR_NOT_EMPTY \
- || (s) == APR_OS_START_SYSERR + ERROR_ACCESS_DENIED)
-
-/*
- Sorry, too tired to wrap this up for OS2... feel free to
- fit the following into their best matches.
-
- { ERROR_NO_SIGNAL_SENT, ESRCH },
- { SOCEALREADY, EALREADY },
- { SOCEDESTADDRREQ, EDESTADDRREQ },
- { SOCEMSGSIZE, EMSGSIZE },
- { SOCEPROTOTYPE, EPROTOTYPE },
- { SOCENOPROTOOPT, ENOPROTOOPT },
- { SOCEPROTONOSUPPORT, EPROTONOSUPPORT },
- { SOCESOCKTNOSUPPORT, ESOCKTNOSUPPORT },
- { SOCEOPNOTSUPP, EOPNOTSUPP },
- { SOCEPFNOSUPPORT, EPFNOSUPPORT },
- { SOCEAFNOSUPPORT, EAFNOSUPPORT },
- { SOCEADDRINUSE, EADDRINUSE },
- { SOCEADDRNOTAVAIL, EADDRNOTAVAIL },
- { SOCENETDOWN, ENETDOWN },
- { SOCENETRESET, ENETRESET },
- { SOCENOBUFS, ENOBUFS },
- { SOCEISCONN, EISCONN },
- { SOCENOTCONN, ENOTCONN },
- { SOCESHUTDOWN, ESHUTDOWN },
- { SOCETOOMANYREFS, ETOOMANYREFS },
- { SOCELOOP, ELOOP },
- { SOCEHOSTDOWN, EHOSTDOWN },
- { SOCENOTEMPTY, ENOTEMPTY },
- { SOCEPIPE, EPIPE }
-*/
-
-#elif defined(WIN32) && !defined(DOXYGEN) /* !defined(OS2) */
-
-#define APR_FROM_OS_ERROR(e) (e == 0 ? APR_SUCCESS : e + APR_OS_START_SYSERR)
-#define APR_TO_OS_ERROR(e) (e == 0 ? APR_SUCCESS : e - APR_OS_START_SYSERR)
-
-#define apr_get_os_error() (APR_FROM_OS_ERROR(GetLastError()))
-#define apr_set_os_error(e) (SetLastError(APR_TO_OS_ERROR(e)))
-
-/* A special case, only socket calls require this:
- */
-#define apr_get_netos_error() (APR_FROM_OS_ERROR(WSAGetLastError()))
-#define apr_set_netos_error(e) (WSASetLastError(APR_TO_OS_ERROR(e)))
-
-/* APR CANONICAL ERROR TESTS */
-#define APR_STATUS_IS_EACCES(s) ((s) == APR_EACCES \
- || (s) == APR_OS_START_SYSERR + ERROR_ACCESS_DENIED \
- || (s) == APR_OS_START_SYSERR + ERROR_CANNOT_MAKE \
- || (s) == APR_OS_START_SYSERR + ERROR_CURRENT_DIRECTORY \
- || (s) == APR_OS_START_SYSERR + ERROR_DRIVE_LOCKED \
- || (s) == APR_OS_START_SYSERR + ERROR_FAIL_I24 \
- || (s) == APR_OS_START_SYSERR + ERROR_LOCK_VIOLATION \
- || (s) == APR_OS_START_SYSERR + ERROR_LOCK_FAILED \
- || (s) == APR_OS_START_SYSERR + ERROR_NOT_LOCKED \
- || (s) == APR_OS_START_SYSERR + ERROR_NETWORK_ACCESS_DENIED \
- || (s) == APR_OS_START_SYSERR + ERROR_SHARING_VIOLATION)
-#define APR_STATUS_IS_EEXIST(s) ((s) == APR_EEXIST \
- || (s) == APR_OS_START_SYSERR + ERROR_FILE_EXISTS \
- || (s) == APR_OS_START_SYSERR + ERROR_ALREADY_EXISTS)
-#define APR_STATUS_IS_ENAMETOOLONG(s) ((s) == APR_ENAMETOOLONG \
- || (s) == APR_OS_START_SYSERR + ERROR_FILENAME_EXCED_RANGE \
- || (s) == APR_OS_START_SYSERR + WSAENAMETOOLONG)
-#define APR_STATUS_IS_ENOENT(s) ((s) == APR_ENOENT \
- || (s) == APR_OS_START_SYSERR + ERROR_FILE_NOT_FOUND \
- || (s) == APR_OS_START_SYSERR + ERROR_PATH_NOT_FOUND \
- || (s) == APR_OS_START_SYSERR + ERROR_OPEN_FAILED \
- || (s) == APR_OS_START_SYSERR + ERROR_NO_MORE_FILES)
-#define APR_STATUS_IS_ENOTDIR(s) ((s) == APR_ENOTDIR \
- || (s) == APR_OS_START_SYSERR + ERROR_PATH_NOT_FOUND \
- || (s) == APR_OS_START_SYSERR + ERROR_BAD_NETPATH \
- || (s) == APR_OS_START_SYSERR + ERROR_BAD_NET_NAME \
- || (s) == APR_OS_START_SYSERR + ERROR_BAD_PATHNAME \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_DRIVE)
-#define APR_STATUS_IS_ENOSPC(s) ((s) == APR_ENOSPC \
- || (s) == APR_OS_START_SYSERR + ERROR_DISK_FULL)
-#define APR_STATUS_IS_ENOMEM(s) ((s) == APR_ENOMEM \
- || (s) == APR_OS_START_SYSERR + ERROR_ARENA_TRASHED \
- || (s) == APR_OS_START_SYSERR + ERROR_NOT_ENOUGH_MEMORY \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_BLOCK \
- || (s) == APR_OS_START_SYSERR + ERROR_NOT_ENOUGH_QUOTA \
- || (s) == APR_OS_START_SYSERR + ERROR_OUTOFMEMORY)
-#define APR_STATUS_IS_EMFILE(s) ((s) == APR_EMFILE \
- || (s) == APR_OS_START_SYSERR + ERROR_TOO_MANY_OPEN_FILES)
-#define APR_STATUS_IS_ENFILE(s) ((s) == APR_ENFILE)
-#define APR_STATUS_IS_EBADF(s) ((s) == APR_EBADF \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_HANDLE \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_TARGET_HANDLE)
-#define APR_STATUS_IS_EINVAL(s) ((s) == APR_EINVAL \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_ACCESS \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_DATA \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_FUNCTION \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_HANDLE \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_PARAMETER \
- || (s) == APR_OS_START_SYSERR + ERROR_NEGATIVE_SEEK)
-#define APR_STATUS_IS_ESPIPE(s) ((s) == APR_ESPIPE \
- || (s) == APR_OS_START_SYSERR + ERROR_SEEK_ON_DEVICE \
- || (s) == APR_OS_START_SYSERR + ERROR_NEGATIVE_SEEK)
-#define APR_STATUS_IS_EAGAIN(s) ((s) == APR_EAGAIN \
- || (s) == APR_OS_START_SYSERR + ERROR_NO_DATA \
- || (s) == APR_OS_START_SYSERR + ERROR_NO_PROC_SLOTS \
- || (s) == APR_OS_START_SYSERR + ERROR_NESTING_NOT_ALLOWED \
- || (s) == APR_OS_START_SYSERR + ERROR_MAX_THRDS_REACHED \
- || (s) == APR_OS_START_SYSERR + ERROR_LOCK_VIOLATION \
- || (s) == APR_OS_START_SYSERR + WSAEWOULDBLOCK)
-#define APR_STATUS_IS_EINTR(s) ((s) == APR_EINTR \
- || (s) == APR_OS_START_SYSERR + WSAEINTR)
-#define APR_STATUS_IS_ENOTSOCK(s) ((s) == APR_ENOTSOCK \
- || (s) == APR_OS_START_SYSERR + WSAENOTSOCK)
-#define APR_STATUS_IS_ECONNREFUSED(s) ((s) == APR_ECONNREFUSED \
- || (s) == APR_OS_START_SYSERR + WSAECONNREFUSED)
-#define APR_STATUS_IS_EINPROGRESS(s) ((s) == APR_EINPROGRESS \
- || (s) == APR_OS_START_SYSERR + WSAEINPROGRESS)
-#define APR_STATUS_IS_ECONNABORTED(s) ((s) == APR_ECONNABORTED \
- || (s) == APR_OS_START_SYSERR + WSAECONNABORTED)
-#define APR_STATUS_IS_ECONNRESET(s) ((s) == APR_ECONNRESET \
- || (s) == APR_OS_START_SYSERR + ERROR_NETNAME_DELETED \
- || (s) == APR_OS_START_SYSERR + WSAECONNRESET)
-/* XXX deprecated */
-#define APR_STATUS_IS_ETIMEDOUT(s) ((s) == APR_ETIMEDOUT \
- || (s) == APR_OS_START_SYSERR + WSAETIMEDOUT \
- || (s) == APR_OS_START_SYSERR + WAIT_TIMEOUT)
-#undef APR_STATUS_IS_TIMEUP
-#define APR_STATUS_IS_TIMEUP(s) ((s) == APR_TIMEUP \
- || (s) == APR_OS_START_SYSERR + WSAETIMEDOUT \
- || (s) == APR_OS_START_SYSERR + WAIT_TIMEOUT)
-#define APR_STATUS_IS_EHOSTUNREACH(s) ((s) == APR_EHOSTUNREACH \
- || (s) == APR_OS_START_SYSERR + WSAEHOSTUNREACH)
-#define APR_STATUS_IS_ENETUNREACH(s) ((s) == APR_ENETUNREACH \
- || (s) == APR_OS_START_SYSERR + WSAENETUNREACH)
-#define APR_STATUS_IS_EFTYPE(s) ((s) == APR_EFTYPE \
- || (s) == APR_OS_START_SYSERR + ERROR_EXE_MACHINE_TYPE_MISMATCH \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_DLL \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_MODULETYPE \
- || (s) == APR_OS_START_SYSERR + ERROR_BAD_EXE_FORMAT \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_EXE_SIGNATURE \
- || (s) == APR_OS_START_SYSERR + ERROR_FILE_CORRUPT \
- || (s) == APR_OS_START_SYSERR + ERROR_BAD_FORMAT)
-#define APR_STATUS_IS_EPIPE(s) ((s) == APR_EPIPE \
- || (s) == APR_OS_START_SYSERR + ERROR_BROKEN_PIPE)
-#define APR_STATUS_IS_EXDEV(s) ((s) == APR_EXDEV \
- || (s) == APR_OS_START_SYSERR + ERROR_NOT_SAME_DEVICE)
-#define APR_STATUS_IS_ENOTEMPTY(s) ((s) == APR_ENOTEMPTY \
- || (s) == APR_OS_START_SYSERR + ERROR_DIR_NOT_EMPTY)
-
-#elif defined(NETWARE) && defined(USE_WINSOCK) && !defined(DOXYGEN) /* !defined(OS2) && !defined(WIN32) */
-
-#define APR_FROM_OS_ERROR(e) (e == 0 ? APR_SUCCESS : e + APR_OS_START_SYSERR)
-#define APR_TO_OS_ERROR(e) (e == 0 ? APR_SUCCESS : e - APR_OS_START_SYSERR)
-
-#define apr_get_os_error() (errno)
-#define apr_set_os_error(e) (errno = (e))
-
-/* A special case, only socket calls require this: */
-#define apr_get_netos_error() (APR_FROM_OS_ERROR(WSAGetLastError()))
-#define apr_set_netos_error(e) (WSASetLastError(APR_TO_OS_ERROR(e)))
-
-/* APR CANONICAL ERROR TESTS */
-#define APR_STATUS_IS_EACCES(s) ((s) == APR_EACCES)
-#define APR_STATUS_IS_EEXIST(s) ((s) == APR_EEXIST)
-#define APR_STATUS_IS_ENAMETOOLONG(s) ((s) == APR_ENAMETOOLONG)
-#define APR_STATUS_IS_ENOENT(s) ((s) == APR_ENOENT)
-#define APR_STATUS_IS_ENOTDIR(s) ((s) == APR_ENOTDIR)
-#define APR_STATUS_IS_ENOSPC(s) ((s) == APR_ENOSPC)
-#define APR_STATUS_IS_ENOMEM(s) ((s) == APR_ENOMEM)
-#define APR_STATUS_IS_EMFILE(s) ((s) == APR_EMFILE)
-#define APR_STATUS_IS_ENFILE(s) ((s) == APR_ENFILE)
-#define APR_STATUS_IS_EBADF(s) ((s) == APR_EBADF)
-#define APR_STATUS_IS_EINVAL(s) ((s) == APR_EINVAL)
-#define APR_STATUS_IS_ESPIPE(s) ((s) == APR_ESPIPE)
-
-#define APR_STATUS_IS_EAGAIN(s) ((s) == APR_EAGAIN \
- || (s) == EWOULDBLOCK \
- || (s) == APR_OS_START_SYSERR + WSAEWOULDBLOCK)
-#define APR_STATUS_IS_EINTR(s) ((s) == APR_EINTR \
- || (s) == APR_OS_START_SYSERR + WSAEINTR)
-#define APR_STATUS_IS_ENOTSOCK(s) ((s) == APR_ENOTSOCK \
- || (s) == APR_OS_START_SYSERR + WSAENOTSOCK)
-#define APR_STATUS_IS_ECONNREFUSED(s) ((s) == APR_ECONNREFUSED \
- || (s) == APR_OS_START_SYSERR + WSAECONNREFUSED)
-#define APR_STATUS_IS_EINPROGRESS(s) ((s) == APR_EINPROGRESS \
- || (s) == APR_OS_START_SYSERR + WSAEINPROGRESS)
-#define APR_STATUS_IS_ECONNABORTED(s) ((s) == APR_ECONNABORTED \
- || (s) == APR_OS_START_SYSERR + WSAECONNABORTED)
-#define APR_STATUS_IS_ECONNRESET(s) ((s) == APR_ECONNRESET \
- || (s) == APR_OS_START_SYSERR + WSAECONNRESET)
-/* XXX deprecated */
-#define APR_STATUS_IS_ETIMEDOUT(s) ((s) == APR_ETIMEDOUT \
- || (s) == APR_OS_START_SYSERR + WSAETIMEDOUT \
- || (s) == APR_OS_START_SYSERR + WAIT_TIMEOUT)
-#undef APR_STATUS_IS_TIMEUP
-#define APR_STATUS_IS_TIMEUP(s) ((s) == APR_TIMEUP \
- || (s) == APR_OS_START_SYSERR + WSAETIMEDOUT \
- || (s) == APR_OS_START_SYSERR + WAIT_TIMEOUT)
-#define APR_STATUS_IS_EHOSTUNREACH(s) ((s) == APR_EHOSTUNREACH \
- || (s) == APR_OS_START_SYSERR + WSAEHOSTUNREACH)
-#define APR_STATUS_IS_ENETUNREACH(s) ((s) == APR_ENETUNREACH \
- || (s) == APR_OS_START_SYSERR + WSAENETUNREACH)
-#define APR_STATUS_IS_ENETDOWN(s) ((s) == APR_OS_START_SYSERR + WSAENETDOWN)
-#define APR_STATUS_IS_EFTYPE(s) ((s) == APR_EFTYPE)
-#define APR_STATUS_IS_EPIPE(s) ((s) == APR_EPIPE)
-#define APR_STATUS_IS_EXDEV(s) ((s) == APR_EXDEV)
-#define APR_STATUS_IS_ENOTEMPTY(s) ((s) == APR_ENOTEMPTY)
-
-#else /* !defined(NETWARE) && !defined(OS2) && !defined(WIN32) */
-
-/*
- * os error codes are clib error codes
- */
-#define APR_FROM_OS_ERROR(e) (e)
-#define APR_TO_OS_ERROR(e) (e)
-
-#define apr_get_os_error() (errno)
-#define apr_set_os_error(e) (errno = (e))
-
-/* A special case, only socket calls require this:
- */
-#define apr_get_netos_error() (errno)
-#define apr_set_netos_error(e) (errno = (e))
-
-/**
- * @addtogroup APR_STATUS_IS
- * @{
- */
-
-/** permission denied */
-#define APR_STATUS_IS_EACCES(s) ((s) == APR_EACCES)
-/** file exists */
-#define APR_STATUS_IS_EEXIST(s) ((s) == APR_EEXIST)
-/** path name is too long */
-#define APR_STATUS_IS_ENAMETOOLONG(s) ((s) == APR_ENAMETOOLONG)
-/**
- * no such file or directory
- * @remark
- * EMVSCATLG can be returned by the automounter on z/OS for
- * paths which do not exist.
- */
-#ifdef EMVSCATLG
-#define APR_STATUS_IS_ENOENT(s) ((s) == APR_ENOENT \
- || (s) == EMVSCATLG)
-#else
-#define APR_STATUS_IS_ENOENT(s) ((s) == APR_ENOENT)
-#endif
-/** not a directory */
-#define APR_STATUS_IS_ENOTDIR(s) ((s) == APR_ENOTDIR)
-/** no space left on device */
-#ifdef EDQUOT
-#define APR_STATUS_IS_ENOSPC(s) ((s) == APR_ENOSPC \
- || (s) == EDQUOT)
-#else
-#define APR_STATUS_IS_ENOSPC(s) ((s) == APR_ENOSPC)
-#endif
-/** not enough memory */
-#define APR_STATUS_IS_ENOMEM(s) ((s) == APR_ENOMEM)
-/** too many open files */
-#define APR_STATUS_IS_EMFILE(s) ((s) == APR_EMFILE)
-/** file table overflow */
-#define APR_STATUS_IS_ENFILE(s) ((s) == APR_ENFILE)
-/** bad file # */
-#define APR_STATUS_IS_EBADF(s) ((s) == APR_EBADF)
-/** invalid argument */
-#define APR_STATUS_IS_EINVAL(s) ((s) == APR_EINVAL)
-/** illegal seek */
-#define APR_STATUS_IS_ESPIPE(s) ((s) == APR_ESPIPE)
-
-/** operation would block */
-#if !defined(EWOULDBLOCK) || !defined(EAGAIN)
-#define APR_STATUS_IS_EAGAIN(s) ((s) == APR_EAGAIN)
-#elif (EWOULDBLOCK == EAGAIN)
-#define APR_STATUS_IS_EAGAIN(s) ((s) == APR_EAGAIN)
-#else
-#define APR_STATUS_IS_EAGAIN(s) ((s) == APR_EAGAIN \
- || (s) == EWOULDBLOCK)
-#endif
-
-/** interrupted system call */
-#define APR_STATUS_IS_EINTR(s) ((s) == APR_EINTR)
-/** socket operation on a non-socket */
-#define APR_STATUS_IS_ENOTSOCK(s) ((s) == APR_ENOTSOCK)
-/** Connection Refused */
-#define APR_STATUS_IS_ECONNREFUSED(s) ((s) == APR_ECONNREFUSED)
-/** operation now in progress */
-#define APR_STATUS_IS_EINPROGRESS(s) ((s) == APR_EINPROGRESS)
-
-/**
- * Software caused connection abort
- * @remark
- * EPROTO on certain older kernels really means ECONNABORTED, so we need to
- * ignore it for them. See discussion in new-httpd archives nh.9701 & nh.9603
- *
- * There is potentially a bug in Solaris 2.x x<6, and other boxes that
- * implement tcp sockets in userland (i.e. on top of STREAMS). On these
- * systems, EPROTO can actually result in a fatal loop. See PR#981 for
- * example. It's hard to handle both uses of EPROTO.
- */
-#ifdef EPROTO
-#define APR_STATUS_IS_ECONNABORTED(s) ((s) == APR_ECONNABORTED \
- || (s) == EPROTO)
-#else
-#define APR_STATUS_IS_ECONNABORTED(s) ((s) == APR_ECONNABORTED)
-#endif
-
-/** Connection Reset by peer */
-#define APR_STATUS_IS_ECONNRESET(s) ((s) == APR_ECONNRESET)
-/** Operation timed out
- * @deprecated */
-#define APR_STATUS_IS_ETIMEDOUT(s) ((s) == APR_ETIMEDOUT)
-/** no route to host */
-#define APR_STATUS_IS_EHOSTUNREACH(s) ((s) == APR_EHOSTUNREACH)
-/** network is unreachable */
-#define APR_STATUS_IS_ENETUNREACH(s) ((s) == APR_ENETUNREACH)
-/** inappropiate file type or format */
-#define APR_STATUS_IS_EFTYPE(s) ((s) == APR_EFTYPE)
-/** broken pipe */
-#define APR_STATUS_IS_EPIPE(s) ((s) == APR_EPIPE)
-/** cross device link */
-#define APR_STATUS_IS_EXDEV(s) ((s) == APR_EXDEV)
-/** Directory Not Empty */
-#define APR_STATUS_IS_ENOTEMPTY(s) ((s) == APR_ENOTEMPTY || \
- (s) == APR_EEXIST)
-/** @} */
-
-#endif /* !defined(NETWARE) && !defined(OS2) && !defined(WIN32) */
-
-/** @} */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* ! APR_ERRNO_H */
-#ifndef _LINUX_ERRNO_H
-#define _LINUX_ERRNO_H
-
-#include <asm/errno.h>
-
-#ifdef __KERNEL__
-
-/* Should never be seen by user programs */
-#define ERESTARTSYS 512
-#define ERESTARTNOINTR 513
-#define ERESTARTNOHAND 514 /* restart if no handler.. */
-#define ENOIOCTLCMD 515 /* No ioctl command */
-#define ERESTART_RESTARTBLOCK 516 /* restart by calling sys_restart_syscall */
-
-/* Defined for the NFSv3 protocol */
-#define EBADHANDLE 521 /* Illegal NFS file handle */
-#define ENOTSYNC 522 /* Update synchronization mismatch */
-#define EBADCOOKIE 523 /* Cookie is stale */
-#define ENOTSUPP 524 /* Operation is not supported */
-#define ETOOSMALL 525 /* Buffer or request is too small */
-#define ESERVERFAULT 526 /* An untranslatable error occurred */
-#define EBADTYPE 527 /* Type not supported by server */
-#define EJUKEBOX 528 /* Request initiated, but will not complete before timeout */
-#define EIOCBQUEUED 529 /* iocb queued, will get completion event */
-#define EIOCBRETRY 530 /* iocb queued, will trigger a retry */
-
-#endif
-
-#endif
-// Copyright (c) 1994 James Clark
-// See the file COPYING for copying permission.
-
-#ifndef ErrnoMessageArg_INCLUDED
-#define ErrnoMessageArg_INCLUDED 1
-
-#include "MessageArg.h"
-#include "rtti.h"
-
-#ifdef SP_NAMESPACE
-namespace SP_NAMESPACE {
-#endif
-
-class SP_API ErrnoMessageArg : public OtherMessageArg {
- RTTI_CLASS
-public:
- ErrnoMessageArg(int errnum) : errno_(errnum) { }
- MessageArg *copy() const;
- // errno might be a macro so we must use a different name
- int errnum() const;
-private:
- int errno_;
-};
-
-inline
-int ErrnoMessageArg::errnum() const
-{
- return errno_;
-}
-
-#ifdef SP_NAMESPACE
-}
-#endif
-
-#endif /* not ErrnoMessageArg_INCLUDED */
-/* Copyright (C) 1991,92,93,94,95,96,97,2002 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
-
-/*
- * ISO C99 Standard: 7.5 Errors <errno.h>
- */
-
-#ifndef _ERRNO_H
-
-/* The includer defined __need_Emath if he wants only the definitions
- of EDOM and ERANGE, and not everything else. */
-#ifndef __need_Emath
-# define _ERRNO_H 1
-# include <features.h>
-#endif
-
-__BEGIN_DECLS
-
-/* Get the error number constants from the system-specific file.
- This file will test __need_Emath and _ERRNO_H. */
-#include <bits/errno.h>
-#undef __need_Emath
-
-#ifdef _ERRNO_H
-
-/* Declare the `errno' variable, unless it's defined as a macro by
- bits/errno.h. This is the case in GNU, where it is a per-thread
- variable. This redeclaration using the macro still works, but it
- will be a function declaration without a prototype and may trigger
- a -Wstrict-prototypes warning. */
-#ifndef errno
-extern int errno;
-#endif
-
-#ifdef __USE_GNU
-
-/* The full and simple forms of the name with which the program was
- invoked. These variables are set up automatically at startup based on
- the value of ARGV[0] (this works only if you use GNU ld). */
-extern char *program_invocation_name, *program_invocation_short_name;
-#endif /* __USE_GNU */
-#endif /* _ERRNO_H */
-
-__END_DECLS
-
-#endif /* _ERRNO_H */
-
-/* The Hurd <bits/errno.h> defines `error_t' as an enumerated type so
- that printing `error_t' values in the debugger shows the names. We
- might need this definition sometimes even if this file was included
- before. */
-#if defined __USE_GNU || defined __need_error_t
-# ifndef __error_t_defined
-typedef int error_t;
-# define __error_t_defined 1
-# endif
-# undef __need_error_t
-#endif
-#ifndef _I386_ERRNO_H
-#define _I386_ERRNO_H
-
-#include <asm-generic/errno.h>
-
-#endif
-#ifndef _ASM_GENERIC_ERRNO_BASE_H
-#define _ASM_GENERIC_ERRNO_BASE_H
-
-#define EPERM 1 /* Operation not permitted */
-#define ENOENT 2 /* No such file or directory */
-#define ESRCH 3 /* No such process */
-#define EINTR 4 /* Interrupted system call */
-#define EIO 5 /* I/O error */
-#define ENXIO 6 /* No such device or address */
-#define E2BIG 7 /* Argument list too long */
-#define ENOEXEC 8 /* Exec format error */
-#define EBADF 9 /* Bad file number */
-#define ECHILD 10 /* No child processes */
-#define EAGAIN 11 /* Try again */
-#define ENOMEM 12 /* Out of memory */
-#define EACCES 13 /* Permission denied */
-#define EFAULT 14 /* Bad address */
-#define ENOTBLK 15 /* Block device required */
-#define EBUSY 16 /* Device or resource busy */
-#define EEXIST 17 /* File exists */
-#define EXDEV 18 /* Cross-device link */
-#define ENODEV 19 /* No such device */
-#define ENOTDIR 20 /* Not a directory */
-#define EISDIR 21 /* Is a directory */
-#define EINVAL 22 /* Invalid argument */
-#define ENFILE 23 /* File table overflow */
-#define EMFILE 24 /* Too many open files */
-#define ENOTTY 25 /* Not a typewriter */
-#define ETXTBSY 26 /* Text file busy */
-#define EFBIG 27 /* File too large */
-#define ENOSPC 28 /* No space left on device */
-#define ESPIPE 29 /* Illegal seek */
-#define EROFS 30 /* Read-only file system */
-#define EMLINK 31 /* Too many links */
-#define EPIPE 32 /* Broken pipe */
-#define EDOM 33 /* Math argument out of domain of func */
-#define ERANGE 34 /* Math result not representable */
-
-#endif
-#ifndef _ASM_GENERIC_ERRNO_H
-#define _ASM_GENERIC_ERRNO_H
-
-#include <asm-generic/errno-base.h>
-
-#define EDEADLK 35 /* Resource deadlock would occur */
-#define ENAMETOOLONG 36 /* File name too long */
-#define ENOLCK 37 /* No record locks available */
-#define ENOSYS 38 /* Function not implemented */
-#define ENOTEMPTY 39 /* Directory not empty */
-#define ELOOP 40 /* Too many symbolic links encountered */
-#define EWOULDBLOCK EAGAIN /* Operation would block */
-#define ENOMSG 42 /* No message of desired type */
-#define EIDRM 43 /* Identifier removed */
-#define ECHRNG 44 /* Channel number out of range */
-#define EL2NSYNC 45 /* Level 2 not synchronized */
-#define EL3HLT 46 /* Level 3 halted */
-#define EL3RST 47 /* Level 3 reset */
-#define ELNRNG 48 /* Link number out of range */
-#define EUNATCH 49 /* Protocol driver not attached */
-#define ENOCSI 50 /* No CSI structure available */
-#define EL2HLT 51 /* Level 2 halted */
-#define EBADE 52 /* Invalid exchange */
-#define EBADR 53 /* Invalid request descriptor */
-#define EXFULL 54 /* Exchange full */
-#define ENOANO 55 /* No anode */
-#define EBADRQC 56 /* Invalid request code */
-#define EBADSLT 57 /* Invalid slot */
-
-#define EDEADLOCK EDEADLK
-
-#define EBFONT 59 /* Bad font file format */
-#define ENOSTR 60 /* Device not a stream */
-#define ENODATA 61 /* No data available */
-#define ETIME 62 /* Timer expired */
-#define ENOSR 63 /* Out of streams resources */
-#define ENONET 64 /* Machine is not on the network */
-#define ENOPKG 65 /* Package not installed */
-#define EREMOTE 66 /* Object is remote */
-#define ENOLINK 67 /* Link has been severed */
-#define EADV 68 /* Advertise error */
-#define ESRMNT 69 /* Srmount error */
-#define ECOMM 70 /* Communication error on send */
-#define EPROTO 71 /* Protocol error */
-#define EMULTIHOP 72 /* Multihop attempted */
-#define EDOTDOT 73 /* RFS specific error */
-#define EBADMSG 74 /* Not a data message */
-#define EOVERFLOW 75 /* Value too large for defined data type */
-#define ENOTUNIQ 76 /* Name not unique on network */
-#define EBADFD 77 /* File descriptor in bad state */
-#define EREMCHG 78 /* Remote address changed */
-#define ELIBACC 79 /* Can not access a needed shared library */
-#define ELIBBAD 80 /* Accessing a corrupted shared library */
-#define ELIBSCN 81 /* .lib section in a.out corrupted */
-#define ELIBMAX 82 /* Attempting to link in too many shared libraries */
-#define ELIBEXEC 83 /* Cannot exec a shared library directly */
-#define EILSEQ 84 /* Illegal byte sequence */
-#define ERESTART 85 /* Interrupted system call should be restarted */
-#define ESTRPIPE 86 /* Streams pipe error */
-#define EUSERS 87 /* Too many users */
-#define ENOTSOCK 88 /* Socket operation on non-socket */
-#define EDESTADDRREQ 89 /* Destination address required */
-#define EMSGSIZE 90 /* Message too long */
-#define EPROTOTYPE 91 /* Protocol wrong type for socket */
-#define ENOPROTOOPT 92 /* Protocol not available */
-#define EPROTONOSUPPORT 93 /* Protocol not supported */
-#define ESOCKTNOSUPPORT 94 /* Socket type not supported */
-#define EOPNOTSUPP 95 /* Operation not supported on transport endpoint */
-#define EPFNOSUPPORT 96 /* Protocol family not supported */
-#define EAFNOSUPPORT 97 /* Address family not supported by protocol */
-#define EADDRINUSE 98 /* Address already in use */
-#define EADDRNOTAVAIL 99 /* Cannot assign requested address */
-#define ENETDOWN 100 /* Network is down */
-#define ENETUNREACH 101 /* Network is unreachable */
-#define ENETRESET 102 /* Network dropped connection because of reset */
-#define ECONNABORTED 103 /* Software caused connection abort */
-#define ECONNRESET 104 /* Connection reset by peer */
-#define ENOBUFS 105 /* No buffer space available */
-#define EISCONN 106 /* Transport endpoint is already connected */
-#define ENOTCONN 107 /* Transport endpoint is not connected */
-#define ESHUTDOWN 108 /* Cannot send after transport endpoint shutdown */
-#define ETOOMANYREFS 109 /* Too many references: cannot splice */
-#define ETIMEDOUT 110 /* Connection timed out */
-#define ECONNREFUSED 111 /* Connection refused */
-#define EHOSTDOWN 112 /* Host is down */
-#define EHOSTUNREACH 113 /* No route to host */
-#define EALREADY 114 /* Operation already in progress */
-#define EINPROGRESS 115 /* Operation now in progress */
-#define ESTALE 116 /* Stale NFS file handle */
-#define EUCLEAN 117 /* Structure needs cleaning */
-#define ENOTNAM 118 /* Not a XENIX named type file */
-#define ENAVAIL 119 /* No XENIX semaphores available */
-#define EISNAM 120 /* Is a named type file */
-#define EREMOTEIO 121 /* Remote I/O error */
-#define EDQUOT 122 /* Quota exceeded */
-
-#define ENOMEDIUM 123 /* No medium found */
-#define EMEDIUMTYPE 124 /* Wrong medium type */
-#define ECANCELED 125 /* Operation Canceled */
-#define ENOKEY 126 /* Required key not available */
-#define EKEYEXPIRED 127 /* Key has expired */
-#define EKEYREVOKED 128 /* Key has been revoked */
-#define EKEYREJECTED 129 /* Key was rejected by service */
-
-/* for robust mutexes */
-#define EOWNERDEAD 130 /* Owner died */
-#define ENOTRECOVERABLE 131 /* State not recoverable */
-
-#endif
diff --git a/doc/legacy/errno.list.macosx.txt b/doc/legacy/errno.list.macosx.txt
deleted file mode 100644
index 4954e03d855..00000000000
--- a/doc/legacy/errno.list.macosx.txt
+++ /dev/null
@@ -1,1513 +0,0 @@
-/* Copyright 2000-2005 The Apache Software Foundation or its licensors, as
- * applicable.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef APR_ERRNO_H
-#define APR_ERRNO_H
-
-/**
- * @file apr_errno.h
- * @brief APR Error Codes
- */
-
-#include "apr.h"
-
-#if APR_HAVE_ERRNO_H
-#include <errno.h>
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/**
- * @defgroup apr_errno Error Codes
- * @ingroup APR
- * @{
- */
-
-/**
- * Type for specifying an error or status code.
- */
-typedef int apr_status_t;
-
-/**
- * Return a human readable string describing the specified error.
- * @param statcode The error code the get a string for.
- * @param buf A buffer to hold the error string.
- * @param bufsize Size of the buffer to hold the string.
- */
-APR_DECLARE(char *) apr_strerror(apr_status_t statcode, char *buf,
- apr_size_t bufsize);
-
-#if defined(DOXYGEN)
-/**
- * @def APR_FROM_OS_ERROR(os_err_type syserr)
- * Fold a platform specific error into an apr_status_t code.
- * @return apr_status_t
- * @param e The platform os error code.
- * @warning macro implementation; the syserr argument may be evaluated
- * multiple times.
- */
-#define APR_FROM_OS_ERROR(e) (e == 0 ? APR_SUCCESS : e + APR_OS_START_SYSERR)
-
-/**
- * @def APR_TO_OS_ERROR(apr_status_t statcode)
- * @return os_err_type
- * Fold an apr_status_t code back to the native platform defined error.
- * @param e The apr_status_t folded platform os error code.
- * @warning macro implementation; the statcode argument may be evaluated
- * multiple times. If the statcode was not created by apr_get_os_error
- * or APR_FROM_OS_ERROR, the results are undefined.
- */
-#define APR_TO_OS_ERROR(e) (e == 0 ? APR_SUCCESS : e - APR_OS_START_SYSERR)
-
-/** @def apr_get_os_error()
- * @return apr_status_t the last platform error, folded into apr_status_t, on most platforms
- * @remark This retrieves errno, or calls a GetLastError() style function, and
- * folds it with APR_FROM_OS_ERROR. Some platforms (such as OS2) have no
- * such mechanism, so this call may be unsupported. Do NOT use this
- * call for socket errors from socket, send, recv etc!
- */
-
-/** @def apr_set_os_error(e)
- * Reset the last platform error, unfolded from an apr_status_t, on some platforms
- * @param e The OS error folded in a prior call to APR_FROM_OS_ERROR()
- * @warning This is a macro implementation; the statcode argument may be evaluated
- * multiple times. If the statcode was not created by apr_get_os_error
- * or APR_FROM_OS_ERROR, the results are undefined. This macro sets
- * errno, or calls a SetLastError() style function, unfolding statcode
- * with APR_TO_OS_ERROR. Some platforms (such as OS2) have no such
- * mechanism, so this call may be unsupported.
- */
-
-/** @def apr_get_netos_error()
- * Return the last socket error, folded into apr_status_t, on all platforms
- * @remark This retrieves errno or calls a GetLastSocketError() style function,
- * and folds it with APR_FROM_OS_ERROR.
- */
-
-/** @def apr_set_netos_error(e)
- * Reset the last socket error, unfolded from an apr_status_t
- * @param e The socket error folded in a prior call to APR_FROM_OS_ERROR()
- * @warning This is a macro implementation; the statcode argument may be evaluated
- * multiple times. If the statcode was not created by apr_get_os_error
- * or APR_FROM_OS_ERROR, the results are undefined. This macro sets
- * errno, or calls a WSASetLastError() style function, unfolding
- * socketcode with APR_TO_OS_ERROR.
- */
-
-#endif /* defined(DOXYGEN) */
-
-/**
- * APR_OS_START_ERROR is where the APR specific error values start.
- */
-#define APR_OS_START_ERROR 20000
-/**
- * APR_OS_ERRSPACE_SIZE is the maximum number of errors you can fit
- * into one of the error/status ranges below -- except for
- * APR_OS_START_USERERR, which see.
- */
-#define APR_OS_ERRSPACE_SIZE 50000
-/**
- * APR_OS_START_STATUS is where the APR specific status codes start.
- */
-#define APR_OS_START_STATUS (APR_OS_START_ERROR + APR_OS_ERRSPACE_SIZE)
-/**
- * APR_OS_START_USERERR are reserved for applications that use APR that
- * layer their own error codes along with APR's. Note that the
- * error immediately following this one is set ten times farther
- * away than usual, so that users of apr have a lot of room in
- * which to declare custom error codes.
- */
-#define APR_OS_START_USERERR (APR_OS_START_STATUS + APR_OS_ERRSPACE_SIZE)
-/**
- * APR_OS_START_USEERR is obsolete, defined for compatibility only.
- * Use APR_OS_START_USERERR instead.
- */
-#define APR_OS_START_USEERR APR_OS_START_USERERR
-/**
- * APR_OS_START_CANONERR is where APR versions of errno values are defined
- * on systems which don't have the corresponding errno.
- */
-#define APR_OS_START_CANONERR (APR_OS_START_USERERR \
- + (APR_OS_ERRSPACE_SIZE * 10))
-/**
- * APR_OS_START_EAIERR folds EAI_ error codes from getaddrinfo() into
- * apr_status_t values.
- */
-#define APR_OS_START_EAIERR (APR_OS_START_CANONERR + APR_OS_ERRSPACE_SIZE)
-/**
- * APR_OS_START_SYSERR folds platform-specific system error values into
- * apr_status_t values.
- */
-#define APR_OS_START_SYSERR (APR_OS_START_EAIERR + APR_OS_ERRSPACE_SIZE)
-
-/** no error. */
-#define APR_SUCCESS 0
-
-/**
- * @defgroup APR_Error APR Error Values
- * <PRE>
- * <b>APR ERROR VALUES</b>
- * APR_ENOSTAT APR was unable to perform a stat on the file
- * APR_ENOPOOL APR was not provided a pool with which to allocate memory
- * APR_EBADDATE APR was given an invalid date
- * APR_EINVALSOCK APR was given an invalid socket
- * APR_ENOPROC APR was not given a process structure
- * APR_ENOTIME APR was not given a time structure
- * APR_ENODIR APR was not given a directory structure
- * APR_ENOLOCK APR was not given a lock structure
- * APR_ENOPOLL APR was not given a poll structure
- * APR_ENOSOCKET APR was not given a socket
- * APR_ENOTHREAD APR was not given a thread structure
- * APR_ENOTHDKEY APR was not given a thread key structure
- * APR_ENOSHMAVAIL There is no more shared memory available
- * APR_EDSOOPEN APR was unable to open the dso object. For more
- * information call apr_dso_error().
- * APR_EGENERAL General failure (specific information not available)
- * APR_EBADIP The specified IP address is invalid
- * APR_EBADMASK The specified netmask is invalid
- * APR_ESYMNOTFOUND Could not find the requested symbol
- * </PRE>
- *
- * <PRE>
- * <b>APR STATUS VALUES</b>
- * APR_INCHILD Program is currently executing in the child
- * APR_INPARENT Program is currently executing in the parent
- * APR_DETACH The thread is detached
- * APR_NOTDETACH The thread is not detached
- * APR_CHILD_DONE The child has finished executing
- * APR_CHILD_NOTDONE The child has not finished executing
- * APR_TIMEUP The operation did not finish before the timeout
- * APR_INCOMPLETE The operation was incomplete although some processing
- * was performed and the results are partially valid
- * APR_BADCH Getopt found an option not in the option string
- * APR_BADARG Getopt found an option that is missing an argument
- * and an argument was specified in the option string
- * APR_EOF APR has encountered the end of the file
- * APR_NOTFOUND APR was unable to find the socket in the poll structure
- * APR_ANONYMOUS APR is using anonymous shared memory
- * APR_FILEBASED APR is using a file name as the key to the shared memory
- * APR_KEYBASED APR is using a shared key as the key to the shared memory
- * APR_EINIT Ininitalizer value. If no option has been found, but
- * the status variable requires a value, this should be used
- * APR_ENOTIMPL The APR function has not been implemented on this
- * platform, either because nobody has gotten to it yet,
- * or the function is impossible on this platform.
- * APR_EMISMATCH Two passwords do not match.
- * APR_EABSOLUTE The given path was absolute.
- * APR_ERELATIVE The given path was relative.
- * APR_EINCOMPLETE The given path was neither relative nor absolute.
- * APR_EABOVEROOT The given path was above the root path.
- * APR_EBUSY The given lock was busy.
- * APR_EPROC_UNKNOWN The given process wasn't recognized by APR
- * </PRE>
- * @{
- */
-/** @see APR_STATUS_IS_ENOSTAT */
-#define APR_ENOSTAT (APR_OS_START_ERROR + 1)
-/** @see APR_STATUS_IS_ENOPOOL */
-#define APR_ENOPOOL (APR_OS_START_ERROR + 2)
-/* empty slot: +3 */
-/** @see APR_STATUS_IS_EBADDATE */
-#define APR_EBADDATE (APR_OS_START_ERROR + 4)
-/** @see APR_STATUS_IS_EINVALSOCK */
-#define APR_EINVALSOCK (APR_OS_START_ERROR + 5)
-/** @see APR_STATUS_IS_ENOPROC */
-#define APR_ENOPROC (APR_OS_START_ERROR + 6)
-/** @see APR_STATUS_IS_ENOTIME */
-#define APR_ENOTIME (APR_OS_START_ERROR + 7)
-/** @see APR_STATUS_IS_ENODIR */
-#define APR_ENODIR (APR_OS_START_ERROR + 8)
-/** @see APR_STATUS_IS_ENOLOCK */
-#define APR_ENOLOCK (APR_OS_START_ERROR + 9)
-/** @see APR_STATUS_IS_ENOPOLL */
-#define APR_ENOPOLL (APR_OS_START_ERROR + 10)
-/** @see APR_STATUS_IS_ENOSOCKET */
-#define APR_ENOSOCKET (APR_OS_START_ERROR + 11)
-/** @see APR_STATUS_IS_ENOTHREAD */
-#define APR_ENOTHREAD (APR_OS_START_ERROR + 12)
-/** @see APR_STATUS_IS_ENOTHDKEY */
-#define APR_ENOTHDKEY (APR_OS_START_ERROR + 13)
-/** @see APR_STATUS_IS_EGENERAL */
-#define APR_EGENERAL (APR_OS_START_ERROR + 14)
-/** @see APR_STATUS_IS_ENOSHMAVAIL */
-#define APR_ENOSHMAVAIL (APR_OS_START_ERROR + 15)
-/** @see APR_STATUS_IS_EBADIP */
-#define APR_EBADIP (APR_OS_START_ERROR + 16)
-/** @see APR_STATUS_IS_EBADMASK */
-#define APR_EBADMASK (APR_OS_START_ERROR + 17)
-/* empty slot: +18 */
-/** @see APR_STATUS_IS_EDSOPEN */
-#define APR_EDSOOPEN (APR_OS_START_ERROR + 19)
-/** @see APR_STATUS_IS_EABSOLUTE */
-#define APR_EABSOLUTE (APR_OS_START_ERROR + 20)
-/** @see APR_STATUS_IS_ERELATIVE */
-#define APR_ERELATIVE (APR_OS_START_ERROR + 21)
-/** @see APR_STATUS_IS_EINCOMPLETE */
-#define APR_EINCOMPLETE (APR_OS_START_ERROR + 22)
-/** @see APR_STATUS_IS_EABOVEROOT */
-#define APR_EABOVEROOT (APR_OS_START_ERROR + 23)
-/** @see APR_STATUS_IS_EBADPATH */
-#define APR_EBADPATH (APR_OS_START_ERROR + 24)
-/** @see APR_STATUS_IS_EPATHWILD */
-#define APR_EPATHWILD (APR_OS_START_ERROR + 25)
-/** @see APR_STATUS_IS_ESYMNOTFOUND */
-#define APR_ESYMNOTFOUND (APR_OS_START_ERROR + 26)
-/** @see APR_STATUS_IS_EPROC_UNKNOWN */
-#define APR_EPROC_UNKNOWN (APR_OS_START_ERROR + 27)
-/** @see APR_STATUS_IS_ENOTENOUGHENTROPY */
-#define APR_ENOTENOUGHENTROPY (APR_OS_START_ERROR + 28)
-/** @} */
-
-/**
- * @defgroup APR_STATUS_IS Status Value Tests
- * @warning For any particular error condition, more than one of these tests
- * may match. This is because platform-specific error codes may not
- * always match the semantics of the POSIX codes these tests (and the
- * corresponding APR error codes) are named after. A notable example
- * are the APR_STATUS_IS_ENOENT and APR_STATUS_IS_ENOTDIR tests on
- * Win32 platforms. The programmer should always be aware of this and
- * adjust the order of the tests accordingly.
- * @{
- */
-/**
- * APR was unable to perform a stat on the file
- * @warning always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_ENOSTAT(s) ((s) == APR_ENOSTAT)
-/**
- * APR was not provided a pool with which to allocate memory
- * @warning always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_ENOPOOL(s) ((s) == APR_ENOPOOL)
-/** APR was given an invalid date */
-#define APR_STATUS_IS_EBADDATE(s) ((s) == APR_EBADDATE)
-/** APR was given an invalid socket */
-#define APR_STATUS_IS_EINVALSOCK(s) ((s) == APR_EINVALSOCK)
-/** APR was not given a process structure */
-#define APR_STATUS_IS_ENOPROC(s) ((s) == APR_ENOPROC)
-/** APR was not given a time structure */
-#define APR_STATUS_IS_ENOTIME(s) ((s) == APR_ENOTIME)
-/** APR was not given a directory structure */
-#define APR_STATUS_IS_ENODIR(s) ((s) == APR_ENODIR)
-/** APR was not given a lock structure */
-#define APR_STATUS_IS_ENOLOCK(s) ((s) == APR_ENOLOCK)
-/** APR was not given a poll structure */
-#define APR_STATUS_IS_ENOPOLL(s) ((s) == APR_ENOPOLL)
-/** APR was not given a socket */
-#define APR_STATUS_IS_ENOSOCKET(s) ((s) == APR_ENOSOCKET)
-/** APR was not given a thread structure */
-#define APR_STATUS_IS_ENOTHREAD(s) ((s) == APR_ENOTHREAD)
-/** APR was not given a thread key structure */
-#define APR_STATUS_IS_ENOTHDKEY(s) ((s) == APR_ENOTHDKEY)
-/** Generic Error which can not be put into another spot */
-#define APR_STATUS_IS_EGENERAL(s) ((s) == APR_EGENERAL)
-/** There is no more shared memory available */
-#define APR_STATUS_IS_ENOSHMAVAIL(s) ((s) == APR_ENOSHMAVAIL)
-/** The specified IP address is invalid */
-#define APR_STATUS_IS_EBADIP(s) ((s) == APR_EBADIP)
-/** The specified netmask is invalid */
-#define APR_STATUS_IS_EBADMASK(s) ((s) == APR_EBADMASK)
-/* empty slot: +18 */
-/**
- * APR was unable to open the dso object.
- * For more information call apr_dso_error().
- */
-#if defined(WIN32)
-#define APR_STATUS_IS_EDSOOPEN(s) ((s) == APR_EDSOOPEN \
- || APR_TO_OS_ERROR(s) == ERROR_MOD_NOT_FOUND)
-#else
-#define APR_STATUS_IS_EDSOOPEN(s) ((s) == APR_EDSOOPEN)
-#endif
-/** The given path was absolute. */
-#define APR_STATUS_IS_EABSOLUTE(s) ((s) == APR_EABSOLUTE)
-/** The given path was relative. */
-#define APR_STATUS_IS_ERELATIVE(s) ((s) == APR_ERELATIVE)
-/** The given path was neither relative nor absolute. */
-#define APR_STATUS_IS_EINCOMPLETE(s) ((s) == APR_EINCOMPLETE)
-/** The given path was above the root path. */
-#define APR_STATUS_IS_EABOVEROOT(s) ((s) == APR_EABOVEROOT)
-/** The given path was bad. */
-#define APR_STATUS_IS_EBADPATH(s) ((s) == APR_EBADPATH)
-/** The given path contained wildcards. */
-#define APR_STATUS_IS_EPATHWILD(s) ((s) == APR_EPATHWILD)
-/** Could not find the requested symbol.
- * For more information call apr_dso_error().
- */
-#if defined(WIN32)
-#define APR_STATUS_IS_ESYMNOTFOUND(s) ((s) == APR_ESYMNOTFOUND \
- || APR_TO_OS_ERROR(s) == ERROR_PROC_NOT_FOUND)
-#else
-#define APR_STATUS_IS_ESYMNOTFOUND(s) ((s) == APR_ESYMNOTFOUND)
-#endif
-/** The given process was not recognized by APR. */
-#define APR_STATUS_IS_EPROC_UNKNOWN(s) ((s) == APR_EPROC_UNKNOWN)
-
-/** APR could not gather enough entropy to continue. */
-#define APR_STATUS_IS_ENOTENOUGHENTROPY(s) ((s) == APR_ENOTENOUGHENTROPY)
-
-/** @} */
-
-/**
- * @addtogroup APR_Error
- * @{
- */
-/** @see APR_STATUS_IS_INCHILD */
-#define APR_INCHILD (APR_OS_START_STATUS + 1)
-/** @see APR_STATUS_IS_INPARENT */
-#define APR_INPARENT (APR_OS_START_STATUS + 2)
-/** @see APR_STATUS_IS_DETACH */
-#define APR_DETACH (APR_OS_START_STATUS + 3)
-/** @see APR_STATUS_IS_NOTDETACH */
-#define APR_NOTDETACH (APR_OS_START_STATUS + 4)
-/** @see APR_STATUS_IS_CHILD_DONE */
-#define APR_CHILD_DONE (APR_OS_START_STATUS + 5)
-/** @see APR_STATUS_IS_CHILD_NOTDONE */
-#define APR_CHILD_NOTDONE (APR_OS_START_STATUS + 6)
-/** @see APR_STATUS_IS_TIMEUP */
-#define APR_TIMEUP (APR_OS_START_STATUS + 7)
-/** @see APR_STATUS_IS_INCOMPLETE */
-#define APR_INCOMPLETE (APR_OS_START_STATUS + 8)
-/* empty slot: +9 */
-/* empty slot: +10 */
-/* empty slot: +11 */
-/** @see APR_STATUS_IS_BADCH */
-#define APR_BADCH (APR_OS_START_STATUS + 12)
-/** @see APR_STATUS_IS_BADARG */
-#define APR_BADARG (APR_OS_START_STATUS + 13)
-/** @see APR_STATUS_IS_EOF */
-#define APR_EOF (APR_OS_START_STATUS + 14)
-/** @see APR_STATUS_IS_NOTFOUND */
-#define APR_NOTFOUND (APR_OS_START_STATUS + 15)
-/* empty slot: +16 */
-/* empty slot: +17 */
-/* empty slot: +18 */
-/** @see APR_STATUS_IS_ANONYMOUS */
-#define APR_ANONYMOUS (APR_OS_START_STATUS + 19)
-/** @see APR_STATUS_IS_FILEBASED */
-#define APR_FILEBASED (APR_OS_START_STATUS + 20)
-/** @see APR_STATUS_IS_KEYBASED */
-#define APR_KEYBASED (APR_OS_START_STATUS + 21)
-/** @see APR_STATUS_IS_EINIT */
-#define APR_EINIT (APR_OS_START_STATUS + 22)
-/** @see APR_STATUS_IS_ENOTIMPL */
-#define APR_ENOTIMPL (APR_OS_START_STATUS + 23)
-/** @see APR_STATUS_IS_EMISMATCH */
-#define APR_EMISMATCH (APR_OS_START_STATUS + 24)
-/** @see APR_STATUS_IS_EBUSY */
-#define APR_EBUSY (APR_OS_START_STATUS + 25)
-/** @} */
-
-/**
- * @addtogroup APR_STATUS_IS
- * @{
- */
-/**
- * Program is currently executing in the child
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code */
-#define APR_STATUS_IS_INCHILD(s) ((s) == APR_INCHILD)
-/**
- * Program is currently executing in the parent
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_INPARENT(s) ((s) == APR_INPARENT)
-/**
- * The thread is detached
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_DETACH(s) ((s) == APR_DETACH)
-/**
- * The thread is not detached
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_NOTDETACH(s) ((s) == APR_NOTDETACH)
-/**
- * The child has finished executing
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_CHILD_DONE(s) ((s) == APR_CHILD_DONE)
-/**
- * The child has not finished executing
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_CHILD_NOTDONE(s) ((s) == APR_CHILD_NOTDONE)
-/**
- * The operation did not finish before the timeout
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_TIMEUP(s) ((s) == APR_TIMEUP)
-/**
- * The operation was incomplete although some processing was performed
- * and the results are partially valid.
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_INCOMPLETE(s) ((s) == APR_INCOMPLETE)
-/* empty slot: +9 */
-/* empty slot: +10 */
-/* empty slot: +11 */
-/**
- * Getopt found an option not in the option string
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_BADCH(s) ((s) == APR_BADCH)
-/**
- * Getopt found an option not in the option string and an argument was
- * specified in the option string
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_BADARG(s) ((s) == APR_BADARG)
-/**
- * APR has encountered the end of the file
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_EOF(s) ((s) == APR_EOF)
-/**
- * APR was unable to find the socket in the poll structure
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_NOTFOUND(s) ((s) == APR_NOTFOUND)
-/* empty slot: +16 */
-/* empty slot: +17 */
-/* empty slot: +18 */
-/**
- * APR is using anonymous shared memory
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_ANONYMOUS(s) ((s) == APR_ANONYMOUS)
-/**
- * APR is using a file name as the key to the shared memory
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_FILEBASED(s) ((s) == APR_FILEBASED)
-/**
- * APR is using a shared key as the key to the shared memory
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_KEYBASED(s) ((s) == APR_KEYBASED)
-/**
- * Ininitalizer value. If no option has been found, but
- * the status variable requires a value, this should be used
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_EINIT(s) ((s) == APR_EINIT)
-/**
- * The APR function has not been implemented on this
- * platform, either because nobody has gotten to it yet,
- * or the function is impossible on this platform.
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_ENOTIMPL(s) ((s) == APR_ENOTIMPL)
-/**
- * Two passwords do not match.
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_EMISMATCH(s) ((s) == APR_EMISMATCH)
-/**
- * The given lock was busy
- * @warning always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_EBUSY(s) ((s) == APR_EBUSY)
-
-/** @} */
-
-/**
- * @addtogroup APR_Error APR Error Values
- * @{
- */
-/* APR CANONICAL ERROR VALUES */
-/** @see APR_STATUS_IS_EACCES */
-#ifdef EACCES
-#define APR_EACCES EACCES
-#else
-#define APR_EACCES (APR_OS_START_CANONERR + 1)
-#endif
-
-/** @see APR_STATUS_IS_EXIST */
-#ifdef EEXIST
-#define APR_EEXIST EEXIST
-#else
-#define APR_EEXIST (APR_OS_START_CANONERR + 2)
-#endif
-
-/** @see APR_STATUS_IS_ENAMETOOLONG */
-#ifdef ENAMETOOLONG
-#define APR_ENAMETOOLONG ENAMETOOLONG
-#else
-#define APR_ENAMETOOLONG (APR_OS_START_CANONERR + 3)
-#endif
-
-/** @see APR_STATUS_IS_ENOENT */
-#ifdef ENOENT
-#define APR_ENOENT ENOENT
-#else
-#define APR_ENOENT (APR_OS_START_CANONERR + 4)
-#endif
-
-/** @see APR_STATUS_IS_ENOTDIR */
-#ifdef ENOTDIR
-#define APR_ENOTDIR ENOTDIR
-#else
-#define APR_ENOTDIR (APR_OS_START_CANONERR + 5)
-#endif
-
-/** @see APR_STATUS_IS_ENOSPC */
-#ifdef ENOSPC
-#define APR_ENOSPC ENOSPC
-#else
-#define APR_ENOSPC (APR_OS_START_CANONERR + 6)
-#endif
-
-/** @see APR_STATUS_IS_ENOMEM */
-#ifdef ENOMEM
-#define APR_ENOMEM ENOMEM
-#else
-#define APR_ENOMEM (APR_OS_START_CANONERR + 7)
-#endif
-
-/** @see APR_STATUS_IS_EMFILE */
-#ifdef EMFILE
-#define APR_EMFILE EMFILE
-#else
-#define APR_EMFILE (APR_OS_START_CANONERR + 8)
-#endif
-
-/** @see APR_STATUS_IS_ENFILE */
-#ifdef ENFILE
-#define APR_ENFILE ENFILE
-#else
-#define APR_ENFILE (APR_OS_START_CANONERR + 9)
-#endif
-
-/** @see APR_STATUS_IS_EBADF */
-#ifdef EBADF
-#define APR_EBADF EBADF
-#else
-#define APR_EBADF (APR_OS_START_CANONERR + 10)
-#endif
-
-/** @see APR_STATUS_IS_EINVAL */
-#ifdef EINVAL
-#define APR_EINVAL EINVAL
-#else
-#define APR_EINVAL (APR_OS_START_CANONERR + 11)
-#endif
-
-/** @see APR_STATUS_IS_ESPIPE */
-#ifdef ESPIPE
-#define APR_ESPIPE ESPIPE
-#else
-#define APR_ESPIPE (APR_OS_START_CANONERR + 12)
-#endif
-
-/**
- * @see APR_STATUS_IS_EAGAIN
- * @warning use APR_STATUS_IS_EAGAIN instead of just testing this value
- */
-#ifdef EAGAIN
-#define APR_EAGAIN EAGAIN
-#elif defined(EWOULDBLOCK)
-#define APR_EAGAIN EWOULDBLOCK
-#else
-#define APR_EAGAIN (APR_OS_START_CANONERR + 13)
-#endif
-
-/** @see APR_STATUS_IS_EINTR */
-#ifdef EINTR
-#define APR_EINTR EINTR
-#else
-#define APR_EINTR (APR_OS_START_CANONERR + 14)
-#endif
-
-/** @see APR_STATUS_IS_ENOTSOCK */
-#ifdef ENOTSOCK
-#define APR_ENOTSOCK ENOTSOCK
-#else
-#define APR_ENOTSOCK (APR_OS_START_CANONERR + 15)
-#endif
-
-/** @see APR_STATUS_IS_ECONNREFUSED */
-#ifdef ECONNREFUSED
-#define APR_ECONNREFUSED ECONNREFUSED
-#else
-#define APR_ECONNREFUSED (APR_OS_START_CANONERR + 16)
-#endif
-
-/** @see APR_STATUS_IS_EINPROGRESS */
-#ifdef EINPROGRESS
-#define APR_EINPROGRESS EINPROGRESS
-#else
-#define APR_EINPROGRESS (APR_OS_START_CANONERR + 17)
-#endif
-
-/**
- * @see APR_STATUS_IS_ECONNABORTED
- * @warning use APR_STATUS_IS_ECONNABORTED instead of just testing this value
- */
-
-#ifdef ECONNABORTED
-#define APR_ECONNABORTED ECONNABORTED
-#else
-#define APR_ECONNABORTED (APR_OS_START_CANONERR + 18)
-#endif
-
-/** @see APR_STATUS_IS_ECONNRESET */
-#ifdef ECONNRESET
-#define APR_ECONNRESET ECONNRESET
-#else
-#define APR_ECONNRESET (APR_OS_START_CANONERR + 19)
-#endif
-
-/** @see APR_STATUS_IS_ETIMEDOUT
- * @deprecated */
-#ifdef ETIMEDOUT
-#define APR_ETIMEDOUT ETIMEDOUT
-#else
-#define APR_ETIMEDOUT (APR_OS_START_CANONERR + 20)
-#endif
-
-/** @see APR_STATUS_IS_EHOSTUNREACH */
-#ifdef EHOSTUNREACH
-#define APR_EHOSTUNREACH EHOSTUNREACH
-#else
-#define APR_EHOSTUNREACH (APR_OS_START_CANONERR + 21)
-#endif
-
-/** @see APR_STATUS_IS_ENETUNREACH */
-#ifdef ENETUNREACH
-#define APR_ENETUNREACH ENETUNREACH
-#else
-#define APR_ENETUNREACH (APR_OS_START_CANONERR + 22)
-#endif
-
-/** @see APR_STATUS_IS_EFTYPE */
-#ifdef EFTYPE
-#define APR_EFTYPE EFTYPE
-#else
-#define APR_EFTYPE (APR_OS_START_CANONERR + 23)
-#endif
-
-/** @see APR_STATUS_IS_EPIPE */
-#ifdef EPIPE
-#define APR_EPIPE EPIPE
-#else
-#define APR_EPIPE (APR_OS_START_CANONERR + 24)
-#endif
-
-/** @see APR_STATUS_IS_EXDEV */
-#ifdef EXDEV
-#define APR_EXDEV EXDEV
-#else
-#define APR_EXDEV (APR_OS_START_CANONERR + 25)
-#endif
-
-/** @see APR_STATUS_IS_ENOTEMPTY */
-#ifdef ENOTEMPTY
-#define APR_ENOTEMPTY ENOTEMPTY
-#else
-#define APR_ENOTEMPTY (APR_OS_START_CANONERR + 26)
-#endif
-
-/** @} */
-
-#if defined(OS2) && !defined(DOXYGEN)
-
-#define APR_FROM_OS_ERROR(e) (e == 0 ? APR_SUCCESS : e + APR_OS_START_SYSERR)
-#define APR_TO_OS_ERROR(e) (e == 0 ? APR_SUCCESS : e - APR_OS_START_SYSERR)
-
-#define INCL_DOSERRORS
-#define INCL_DOS
-
-/* Leave these undefined.
- * OS2 doesn't rely on the errno concept.
- * The API calls always return a result codes which
- * should be filtered through APR_FROM_OS_ERROR().
- *
- * #define apr_get_os_error() (APR_FROM_OS_ERROR(GetLastError()))
- * #define apr_set_os_error(e) (SetLastError(APR_TO_OS_ERROR(e)))
- */
-
-/* A special case, only socket calls require this;
- */
-#define apr_get_netos_error() (APR_FROM_OS_ERROR(errno))
-#define apr_set_netos_error(e) (errno = APR_TO_OS_ERROR(e))
-
-/* And this needs to be greped away for good:
- */
-#define APR_OS2_STATUS(e) (APR_FROM_OS_ERROR(e))
-
-/* These can't sit in a private header, so in spite of the extra size,
- * they need to be made available here.
- */
-#define SOCBASEERR 10000
-#define SOCEPERM (SOCBASEERR+1) /* Not owner */
-#define SOCESRCH (SOCBASEERR+3) /* No such process */
-#define SOCEINTR (SOCBASEERR+4) /* Interrupted system call */
-#define SOCENXIO (SOCBASEERR+6) /* No such device or address */
-#define SOCEBADF (SOCBASEERR+9) /* Bad file number */
-#define SOCEACCES (SOCBASEERR+13) /* Permission denied */
-#define SOCEFAULT (SOCBASEERR+14) /* Bad address */
-#define SOCEINVAL (SOCBASEERR+22) /* Invalid argument */
-#define SOCEMFILE (SOCBASEERR+24) /* Too many open files */
-#define SOCEPIPE (SOCBASEERR+32) /* Broken pipe */
-#define SOCEOS2ERR (SOCBASEERR+100) /* OS/2 Error */
-#define SOCEWOULDBLOCK (SOCBASEERR+35) /* Operation would block */
-#define SOCEINPROGRESS (SOCBASEERR+36) /* Operation now in progress */
-#define SOCEALREADY (SOCBASEERR+37) /* Operation already in progress */
-#define SOCENOTSOCK (SOCBASEERR+38) /* Socket operation on non-socket */
-#define SOCEDESTADDRREQ (SOCBASEERR+39) /* Destination address required */
-#define SOCEMSGSIZE (SOCBASEERR+40) /* Message too long */
-#define SOCEPROTOTYPE (SOCBASEERR+41) /* Protocol wrong type for socket */
-#define SOCENOPROTOOPT (SOCBASEERR+42) /* Protocol not available */
-#define SOCEPROTONOSUPPORT (SOCBASEERR+43) /* Protocol not supported */
-#define SOCESOCKTNOSUPPORT (SOCBASEERR+44) /* Socket type not supported */
-#define SOCEOPNOTSUPP (SOCBASEERR+45) /* Operation not supported on socket */
-#define SOCEPFNOSUPPORT (SOCBASEERR+46) /* Protocol family not supported */
-#define SOCEAFNOSUPPORT (SOCBASEERR+47) /* Address family not supported by protocol family */
-#define SOCEADDRINUSE (SOCBASEERR+48) /* Address already in use */
-#define SOCEADDRNOTAVAIL (SOCBASEERR+49) /* Can't assign requested address */
-#define SOCENETDOWN (SOCBASEERR+50) /* Network is down */
-#define SOCENETUNREACH (SOCBASEERR+51) /* Network is unreachable */
-#define SOCENETRESET (SOCBASEERR+52) /* Network dropped connection on reset */
-#define SOCECONNABORTED (SOCBASEERR+53) /* Software caused connection abort */
-#define SOCECONNRESET (SOCBASEERR+54) /* Connection reset by peer */
-#define SOCENOBUFS (SOCBASEERR+55) /* No buffer space available */
-#define SOCEISCONN (SOCBASEERR+56) /* Socket is already connected */
-#define SOCENOTCONN (SOCBASEERR+57) /* Socket is not connected */
-#define SOCESHUTDOWN (SOCBASEERR+58) /* Can't send after socket shutdown */
-#define SOCETOOMANYREFS (SOCBASEERR+59) /* Too many references: can't splice */
-#define SOCETIMEDOUT (SOCBASEERR+60) /* Connection timed out */
-#define SOCECONNREFUSED (SOCBASEERR+61) /* Connection refused */
-#define SOCELOOP (SOCBASEERR+62) /* Too many levels of symbolic links */
-#define SOCENAMETOOLONG (SOCBASEERR+63) /* File name too long */
-#define SOCEHOSTDOWN (SOCBASEERR+64) /* Host is down */
-#define SOCEHOSTUNREACH (SOCBASEERR+65) /* No route to host */
-#define SOCENOTEMPTY (SOCBASEERR+66) /* Directory not empty */
-
-/* APR CANONICAL ERROR TESTS */
-#define APR_STATUS_IS_EACCES(s) ((s) == APR_EACCES \
- || (s) == APR_OS_START_SYSERR + ERROR_ACCESS_DENIED \
- || (s) == APR_OS_START_SYSERR + ERROR_SHARING_VIOLATION)
-#define APR_STATUS_IS_EEXIST(s) ((s) == APR_EEXIST \
- || (s) == APR_OS_START_SYSERR + ERROR_OPEN_FAILED \
- || (s) == APR_OS_START_SYSERR + ERROR_FILE_EXISTS \
- || (s) == APR_OS_START_SYSERR + ERROR_ALREADY_EXISTS \
- || (s) == APR_OS_START_SYSERR + ERROR_ACCESS_DENIED)
-#define APR_STATUS_IS_ENAMETOOLONG(s) ((s) == APR_ENAMETOOLONG \
- || (s) == APR_OS_START_SYSERR + ERROR_FILENAME_EXCED_RANGE \
- || (s) == APR_OS_START_SYSERR + SOCENAMETOOLONG)
-#define APR_STATUS_IS_ENOENT(s) ((s) == APR_ENOENT \
- || (s) == APR_OS_START_SYSERR + ERROR_FILE_NOT_FOUND \
- || (s) == APR_OS_START_SYSERR + ERROR_PATH_NOT_FOUND \
- || (s) == APR_OS_START_SYSERR + ERROR_NO_MORE_FILES \
- || (s) == APR_OS_START_SYSERR + ERROR_OPEN_FAILED)
-#define APR_STATUS_IS_ENOTDIR(s) ((s) == APR_ENOTDIR)
-#define APR_STATUS_IS_ENOSPC(s) ((s) == APR_ENOSPC \
- || (s) == APR_OS_START_SYSERR + ERROR_DISK_FULL)
-#define APR_STATUS_IS_ENOMEM(s) ((s) == APR_ENOMEM)
-#define APR_STATUS_IS_EMFILE(s) ((s) == APR_EMFILE \
- || (s) == APR_OS_START_SYSERR + ERROR_TOO_MANY_OPEN_FILES)
-#define APR_STATUS_IS_ENFILE(s) ((s) == APR_ENFILE)
-#define APR_STATUS_IS_EBADF(s) ((s) == APR_EBADF \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_HANDLE)
-#define APR_STATUS_IS_EINVAL(s) ((s) == APR_EINVAL \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_PARAMETER \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_FUNCTION)
-#define APR_STATUS_IS_ESPIPE(s) ((s) == APR_ESPIPE \
- || (s) == APR_OS_START_SYSERR + ERROR_NEGATIVE_SEEK)
-#define APR_STATUS_IS_EAGAIN(s) ((s) == APR_EAGAIN \
- || (s) == APR_OS_START_SYSERR + ERROR_NO_DATA \
- || (s) == APR_OS_START_SYSERR + SOCEWOULDBLOCK \
- || (s) == APR_OS_START_SYSERR + ERROR_LOCK_VIOLATION)
-#define APR_STATUS_IS_EINTR(s) ((s) == APR_EINTR \
- || (s) == APR_OS_START_SYSERR + SOCEINTR)
-#define APR_STATUS_IS_ENOTSOCK(s) ((s) == APR_ENOTSOCK \
- || (s) == APR_OS_START_SYSERR + SOCENOTSOCK)
-#define APR_STATUS_IS_ECONNREFUSED(s) ((s) == APR_ECONNREFUSED \
- || (s) == APR_OS_START_SYSERR + SOCECONNREFUSED)
-#define APR_STATUS_IS_EINPROGRESS(s) ((s) == APR_EINPROGRESS \
- || (s) == APR_OS_START_SYSERR + SOCEINPROGRESS)
-#define APR_STATUS_IS_ECONNABORTED(s) ((s) == APR_ECONNABORTED \
- || (s) == APR_OS_START_SYSERR + SOCECONNABORTED)
-#define APR_STATUS_IS_ECONNRESET(s) ((s) == APR_ECONNRESET \
- || (s) == APR_OS_START_SYSERR + SOCECONNRESET)
-/* XXX deprecated */
-#define APR_STATUS_IS_ETIMEDOUT(s) ((s) == APR_ETIMEDOUT \
- || (s) == APR_OS_START_SYSERR + SOCETIMEDOUT)
-#undef APR_STATUS_IS_TIMEUP
-#define APR_STATUS_IS_TIMEUP(s) ((s) == APR_TIMEUP \
- || (s) == APR_OS_START_SYSERR + SOCETIMEDOUT)
-#define APR_STATUS_IS_EHOSTUNREACH(s) ((s) == APR_EHOSTUNREACH \
- || (s) == APR_OS_START_SYSERR + SOCEHOSTUNREACH)
-#define APR_STATUS_IS_ENETUNREACH(s) ((s) == APR_ENETUNREACH \
- || (s) == APR_OS_START_SYSERR + SOCENETUNREACH)
-#define APR_STATUS_IS_EFTYPE(s) ((s) == APR_EFTYPE)
-#define APR_STATUS_IS_EPIPE(s) ((s) == APR_EPIPE \
- || (s) == APR_OS_START_SYSERR + ERROR_BROKEN_PIPE \
- || (s) == APR_OS_START_SYSERR + SOCEPIPE)
-#define APR_STATUS_IS_EXDEV(s) ((s) == APR_EXDEV \
- || (s) == APR_OS_START_SYSERR + ERROR_NOT_SAME_DEVICE)
-#define APR_STATUS_IS_ENOTEMPTY(s) ((s) == APR_ENOTEMPTY \
- || (s) == APR_OS_START_SYSERR + ERROR_DIR_NOT_EMPTY \
- || (s) == APR_OS_START_SYSERR + ERROR_ACCESS_DENIED)
-
-/*
- Sorry, too tired to wrap this up for OS2... feel free to
- fit the following into their best matches.
-
- { ERROR_NO_SIGNAL_SENT, ESRCH },
- { SOCEALREADY, EALREADY },
- { SOCEDESTADDRREQ, EDESTADDRREQ },
- { SOCEMSGSIZE, EMSGSIZE },
- { SOCEPROTOTYPE, EPROTOTYPE },
- { SOCENOPROTOOPT, ENOPROTOOPT },
- { SOCEPROTONOSUPPORT, EPROTONOSUPPORT },
- { SOCESOCKTNOSUPPORT, ESOCKTNOSUPPORT },
- { SOCEOPNOTSUPP, EOPNOTSUPP },
- { SOCEPFNOSUPPORT, EPFNOSUPPORT },
- { SOCEAFNOSUPPORT, EAFNOSUPPORT },
- { SOCEADDRINUSE, EADDRINUSE },
- { SOCEADDRNOTAVAIL, EADDRNOTAVAIL },
- { SOCENETDOWN, ENETDOWN },
- { SOCENETRESET, ENETRESET },
- { SOCENOBUFS, ENOBUFS },
- { SOCEISCONN, EISCONN },
- { SOCENOTCONN, ENOTCONN },
- { SOCESHUTDOWN, ESHUTDOWN },
- { SOCETOOMANYREFS, ETOOMANYREFS },
- { SOCELOOP, ELOOP },
- { SOCEHOSTDOWN, EHOSTDOWN },
- { SOCENOTEMPTY, ENOTEMPTY },
- { SOCEPIPE, EPIPE }
-*/
-
-#elif defined(WIN32) && !defined(DOXYGEN) /* !defined(OS2) */
-
-#define APR_FROM_OS_ERROR(e) (e == 0 ? APR_SUCCESS : e + APR_OS_START_SYSERR)
-#define APR_TO_OS_ERROR(e) (e == 0 ? APR_SUCCESS : e - APR_OS_START_SYSERR)
-
-#define apr_get_os_error() (APR_FROM_OS_ERROR(GetLastError()))
-#define apr_set_os_error(e) (SetLastError(APR_TO_OS_ERROR(e)))
-
-/* A special case, only socket calls require this:
- */
-#define apr_get_netos_error() (APR_FROM_OS_ERROR(WSAGetLastError()))
-#define apr_set_netos_error(e) (WSASetLastError(APR_TO_OS_ERROR(e)))
-
-/* APR CANONICAL ERROR TESTS */
-#define APR_STATUS_IS_EACCES(s) ((s) == APR_EACCES \
- || (s) == APR_OS_START_SYSERR + ERROR_ACCESS_DENIED \
- || (s) == APR_OS_START_SYSERR + ERROR_CANNOT_MAKE \
- || (s) == APR_OS_START_SYSERR + ERROR_CURRENT_DIRECTORY \
- || (s) == APR_OS_START_SYSERR + ERROR_DRIVE_LOCKED \
- || (s) == APR_OS_START_SYSERR + ERROR_FAIL_I24 \
- || (s) == APR_OS_START_SYSERR + ERROR_LOCK_VIOLATION \
- || (s) == APR_OS_START_SYSERR + ERROR_LOCK_FAILED \
- || (s) == APR_OS_START_SYSERR + ERROR_NOT_LOCKED \
- || (s) == APR_OS_START_SYSERR + ERROR_NETWORK_ACCESS_DENIED \
- || (s) == APR_OS_START_SYSERR + ERROR_SHARING_VIOLATION)
-#define APR_STATUS_IS_EEXIST(s) ((s) == APR_EEXIST \
- || (s) == APR_OS_START_SYSERR + ERROR_FILE_EXISTS \
- || (s) == APR_OS_START_SYSERR + ERROR_ALREADY_EXISTS)
-#define APR_STATUS_IS_ENAMETOOLONG(s) ((s) == APR_ENAMETOOLONG \
- || (s) == APR_OS_START_SYSERR + ERROR_FILENAME_EXCED_RANGE \
- || (s) == APR_OS_START_SYSERR + WSAENAMETOOLONG)
-#define APR_STATUS_IS_ENOENT(s) ((s) == APR_ENOENT \
- || (s) == APR_OS_START_SYSERR + ERROR_FILE_NOT_FOUND \
- || (s) == APR_OS_START_SYSERR + ERROR_PATH_NOT_FOUND \
- || (s) == APR_OS_START_SYSERR + ERROR_OPEN_FAILED \
- || (s) == APR_OS_START_SYSERR + ERROR_NO_MORE_FILES)
-#define APR_STATUS_IS_ENOTDIR(s) ((s) == APR_ENOTDIR \
- || (s) == APR_OS_START_SYSERR + ERROR_PATH_NOT_FOUND \
- || (s) == APR_OS_START_SYSERR + ERROR_BAD_NETPATH \
- || (s) == APR_OS_START_SYSERR + ERROR_BAD_NET_NAME \
- || (s) == APR_OS_START_SYSERR + ERROR_BAD_PATHNAME \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_DRIVE)
-#define APR_STATUS_IS_ENOSPC(s) ((s) == APR_ENOSPC \
- || (s) == APR_OS_START_SYSERR + ERROR_DISK_FULL)
-#define APR_STATUS_IS_ENOMEM(s) ((s) == APR_ENOMEM \
- || (s) == APR_OS_START_SYSERR + ERROR_ARENA_TRASHED \
- || (s) == APR_OS_START_SYSERR + ERROR_NOT_ENOUGH_MEMORY \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_BLOCK \
- || (s) == APR_OS_START_SYSERR + ERROR_NOT_ENOUGH_QUOTA \
- || (s) == APR_OS_START_SYSERR + ERROR_OUTOFMEMORY)
-#define APR_STATUS_IS_EMFILE(s) ((s) == APR_EMFILE \
- || (s) == APR_OS_START_SYSERR + ERROR_TOO_MANY_OPEN_FILES)
-#define APR_STATUS_IS_ENFILE(s) ((s) == APR_ENFILE)
-#define APR_STATUS_IS_EBADF(s) ((s) == APR_EBADF \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_HANDLE \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_TARGET_HANDLE)
-#define APR_STATUS_IS_EINVAL(s) ((s) == APR_EINVAL \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_ACCESS \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_DATA \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_FUNCTION \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_HANDLE \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_PARAMETER \
- || (s) == APR_OS_START_SYSERR + ERROR_NEGATIVE_SEEK)
-#define APR_STATUS_IS_ESPIPE(s) ((s) == APR_ESPIPE \
- || (s) == APR_OS_START_SYSERR + ERROR_SEEK_ON_DEVICE \
- || (s) == APR_OS_START_SYSERR + ERROR_NEGATIVE_SEEK)
-#define APR_STATUS_IS_EAGAIN(s) ((s) == APR_EAGAIN \
- || (s) == APR_OS_START_SYSERR + ERROR_NO_DATA \
- || (s) == APR_OS_START_SYSERR + ERROR_NO_PROC_SLOTS \
- || (s) == APR_OS_START_SYSERR + ERROR_NESTING_NOT_ALLOWED \
- || (s) == APR_OS_START_SYSERR + ERROR_MAX_THRDS_REACHED \
- || (s) == APR_OS_START_SYSERR + ERROR_LOCK_VIOLATION \
- || (s) == APR_OS_START_SYSERR + WSAEWOULDBLOCK)
-#define APR_STATUS_IS_EINTR(s) ((s) == APR_EINTR \
- || (s) == APR_OS_START_SYSERR + WSAEINTR)
-#define APR_STATUS_IS_ENOTSOCK(s) ((s) == APR_ENOTSOCK \
- || (s) == APR_OS_START_SYSERR + WSAENOTSOCK)
-#define APR_STATUS_IS_ECONNREFUSED(s) ((s) == APR_ECONNREFUSED \
- || (s) == APR_OS_START_SYSERR + WSAECONNREFUSED)
-#define APR_STATUS_IS_EINPROGRESS(s) ((s) == APR_EINPROGRESS \
- || (s) == APR_OS_START_SYSERR + WSAEINPROGRESS)
-#define APR_STATUS_IS_ECONNABORTED(s) ((s) == APR_ECONNABORTED \
- || (s) == APR_OS_START_SYSERR + WSAECONNABORTED)
-#define APR_STATUS_IS_ECONNRESET(s) ((s) == APR_ECONNRESET \
- || (s) == APR_OS_START_SYSERR + ERROR_NETNAME_DELETED \
- || (s) == APR_OS_START_SYSERR + WSAECONNRESET)
-/* XXX deprecated */
-#define APR_STATUS_IS_ETIMEDOUT(s) ((s) == APR_ETIMEDOUT \
- || (s) == APR_OS_START_SYSERR + WSAETIMEDOUT \
- || (s) == APR_OS_START_SYSERR + WAIT_TIMEOUT)
-#undef APR_STATUS_IS_TIMEUP
-#define APR_STATUS_IS_TIMEUP(s) ((s) == APR_TIMEUP \
- || (s) == APR_OS_START_SYSERR + WSAETIMEDOUT \
- || (s) == APR_OS_START_SYSERR + WAIT_TIMEOUT)
-#define APR_STATUS_IS_EHOSTUNREACH(s) ((s) == APR_EHOSTUNREACH \
- || (s) == APR_OS_START_SYSERR + WSAEHOSTUNREACH)
-#define APR_STATUS_IS_ENETUNREACH(s) ((s) == APR_ENETUNREACH \
- || (s) == APR_OS_START_SYSERR + WSAENETUNREACH)
-#define APR_STATUS_IS_EFTYPE(s) ((s) == APR_EFTYPE \
- || (s) == APR_OS_START_SYSERR + ERROR_EXE_MACHINE_TYPE_MISMATCH \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_DLL \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_MODULETYPE \
- || (s) == APR_OS_START_SYSERR + ERROR_BAD_EXE_FORMAT \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_EXE_SIGNATURE \
- || (s) == APR_OS_START_SYSERR + ERROR_FILE_CORRUPT \
- || (s) == APR_OS_START_SYSERR + ERROR_BAD_FORMAT)
-#define APR_STATUS_IS_EPIPE(s) ((s) == APR_EPIPE \
- || (s) == APR_OS_START_SYSERR + ERROR_BROKEN_PIPE)
-#define APR_STATUS_IS_EXDEV(s) ((s) == APR_EXDEV \
- || (s) == APR_OS_START_SYSERR + ERROR_NOT_SAME_DEVICE)
-#define APR_STATUS_IS_ENOTEMPTY(s) ((s) == APR_ENOTEMPTY \
- || (s) == APR_OS_START_SYSERR + ERROR_DIR_NOT_EMPTY)
-
-#elif defined(NETWARE) && defined(USE_WINSOCK) && !defined(DOXYGEN) /* !defined(OS2) && !defined(WIN32) */
-
-#define APR_FROM_OS_ERROR(e) (e == 0 ? APR_SUCCESS : e + APR_OS_START_SYSERR)
-#define APR_TO_OS_ERROR(e) (e == 0 ? APR_SUCCESS : e - APR_OS_START_SYSERR)
-
-#define apr_get_os_error() (errno)
-#define apr_set_os_error(e) (errno = (e))
-
-/* A special case, only socket calls require this: */
-#define apr_get_netos_error() (APR_FROM_OS_ERROR(WSAGetLastError()))
-#define apr_set_netos_error(e) (WSASetLastError(APR_TO_OS_ERROR(e)))
-
-/* APR CANONICAL ERROR TESTS */
-#define APR_STATUS_IS_EACCES(s) ((s) == APR_EACCES)
-#define APR_STATUS_IS_EEXIST(s) ((s) == APR_EEXIST)
-#define APR_STATUS_IS_ENAMETOOLONG(s) ((s) == APR_ENAMETOOLONG)
-#define APR_STATUS_IS_ENOENT(s) ((s) == APR_ENOENT)
-#define APR_STATUS_IS_ENOTDIR(s) ((s) == APR_ENOTDIR)
-#define APR_STATUS_IS_ENOSPC(s) ((s) == APR_ENOSPC)
-#define APR_STATUS_IS_ENOMEM(s) ((s) == APR_ENOMEM)
-#define APR_STATUS_IS_EMFILE(s) ((s) == APR_EMFILE)
-#define APR_STATUS_IS_ENFILE(s) ((s) == APR_ENFILE)
-#define APR_STATUS_IS_EBADF(s) ((s) == APR_EBADF)
-#define APR_STATUS_IS_EINVAL(s) ((s) == APR_EINVAL)
-#define APR_STATUS_IS_ESPIPE(s) ((s) == APR_ESPIPE)
-
-#define APR_STATUS_IS_EAGAIN(s) ((s) == APR_EAGAIN \
- || (s) == EWOULDBLOCK \
- || (s) == APR_OS_START_SYSERR + WSAEWOULDBLOCK)
-#define APR_STATUS_IS_EINTR(s) ((s) == APR_EINTR \
- || (s) == APR_OS_START_SYSERR + WSAEINTR)
-#define APR_STATUS_IS_ENOTSOCK(s) ((s) == APR_ENOTSOCK \
- || (s) == APR_OS_START_SYSERR + WSAENOTSOCK)
-#define APR_STATUS_IS_ECONNREFUSED(s) ((s) == APR_ECONNREFUSED \
- || (s) == APR_OS_START_SYSERR + WSAECONNREFUSED)
-#define APR_STATUS_IS_EINPROGRESS(s) ((s) == APR_EINPROGRESS \
- || (s) == APR_OS_START_SYSERR + WSAEINPROGRESS)
-#define APR_STATUS_IS_ECONNABORTED(s) ((s) == APR_ECONNABORTED \
- || (s) == APR_OS_START_SYSERR + WSAECONNABORTED)
-#define APR_STATUS_IS_ECONNRESET(s) ((s) == APR_ECONNRESET \
- || (s) == APR_OS_START_SYSERR + WSAECONNRESET)
-/* XXX deprecated */
-#define APR_STATUS_IS_ETIMEDOUT(s) ((s) == APR_ETIMEDOUT \
- || (s) == APR_OS_START_SYSERR + WSAETIMEDOUT \
- || (s) == APR_OS_START_SYSERR + WAIT_TIMEOUT)
-#undef APR_STATUS_IS_TIMEUP
-#define APR_STATUS_IS_TIMEUP(s) ((s) == APR_TIMEUP \
- || (s) == APR_OS_START_SYSERR + WSAETIMEDOUT \
- || (s) == APR_OS_START_SYSERR + WAIT_TIMEOUT)
-#define APR_STATUS_IS_EHOSTUNREACH(s) ((s) == APR_EHOSTUNREACH \
- || (s) == APR_OS_START_SYSERR + WSAEHOSTUNREACH)
-#define APR_STATUS_IS_ENETUNREACH(s) ((s) == APR_ENETUNREACH \
- || (s) == APR_OS_START_SYSERR + WSAENETUNREACH)
-#define APR_STATUS_IS_ENETDOWN(s) ((s) == APR_OS_START_SYSERR + WSAENETDOWN)
-#define APR_STATUS_IS_EFTYPE(s) ((s) == APR_EFTYPE)
-#define APR_STATUS_IS_EPIPE(s) ((s) == APR_EPIPE)
-#define APR_STATUS_IS_EXDEV(s) ((s) == APR_EXDEV)
-#define APR_STATUS_IS_ENOTEMPTY(s) ((s) == APR_ENOTEMPTY)
-
-#else /* !defined(NETWARE) && !defined(OS2) && !defined(WIN32) */
-
-/*
- * os error codes are clib error codes
- */
-#define APR_FROM_OS_ERROR(e) (e)
-#define APR_TO_OS_ERROR(e) (e)
-
-#define apr_get_os_error() (errno)
-#define apr_set_os_error(e) (errno = (e))
-
-/* A special case, only socket calls require this:
- */
-#define apr_get_netos_error() (errno)
-#define apr_set_netos_error(e) (errno = (e))
-
-/**
- * @addtogroup APR_STATUS_IS
- * @{
- */
-
-/** permission denied */
-#define APR_STATUS_IS_EACCES(s) ((s) == APR_EACCES)
-/** file exists */
-#define APR_STATUS_IS_EEXIST(s) ((s) == APR_EEXIST)
-/** path name is too long */
-#define APR_STATUS_IS_ENAMETOOLONG(s) ((s) == APR_ENAMETOOLONG)
-/**
- * no such file or directory
- * @remark
- * EMVSCATLG can be returned by the automounter on z/OS for
- * paths which do not exist.
- */
-#ifdef EMVSCATLG
-#define APR_STATUS_IS_ENOENT(s) ((s) == APR_ENOENT \
- || (s) == EMVSCATLG)
-#else
-#define APR_STATUS_IS_ENOENT(s) ((s) == APR_ENOENT)
-#endif
-/** not a directory */
-#define APR_STATUS_IS_ENOTDIR(s) ((s) == APR_ENOTDIR)
-/** no space left on device */
-#ifdef EDQUOT
-#define APR_STATUS_IS_ENOSPC(s) ((s) == APR_ENOSPC \
- || (s) == EDQUOT)
-#else
-#define APR_STATUS_IS_ENOSPC(s) ((s) == APR_ENOSPC)
-#endif
-/** not enough memory */
-#define APR_STATUS_IS_ENOMEM(s) ((s) == APR_ENOMEM)
-/** too many open files */
-#define APR_STATUS_IS_EMFILE(s) ((s) == APR_EMFILE)
-/** file table overflow */
-#define APR_STATUS_IS_ENFILE(s) ((s) == APR_ENFILE)
-/** bad file # */
-#define APR_STATUS_IS_EBADF(s) ((s) == APR_EBADF)
-/** invalid argument */
-#define APR_STATUS_IS_EINVAL(s) ((s) == APR_EINVAL)
-/** illegal seek */
-#define APR_STATUS_IS_ESPIPE(s) ((s) == APR_ESPIPE)
-
-/** operation would block */
-#if !defined(EWOULDBLOCK) || !defined(EAGAIN)
-#define APR_STATUS_IS_EAGAIN(s) ((s) == APR_EAGAIN)
-#elif (EWOULDBLOCK == EAGAIN)
-#define APR_STATUS_IS_EAGAIN(s) ((s) == APR_EAGAIN)
-#else
-#define APR_STATUS_IS_EAGAIN(s) ((s) == APR_EAGAIN \
- || (s) == EWOULDBLOCK)
-#endif
-
-/** interrupted system call */
-#define APR_STATUS_IS_EINTR(s) ((s) == APR_EINTR)
-/** socket operation on a non-socket */
-#define APR_STATUS_IS_ENOTSOCK(s) ((s) == APR_ENOTSOCK)
-/** Connection Refused */
-#define APR_STATUS_IS_ECONNREFUSED(s) ((s) == APR_ECONNREFUSED)
-/** operation now in progress */
-#define APR_STATUS_IS_EINPROGRESS(s) ((s) == APR_EINPROGRESS)
-
-/**
- * Software caused connection abort
- * @remark
- * EPROTO on certain older kernels really means ECONNABORTED, so we need to
- * ignore it for them. See discussion in new-httpd archives nh.9701 & nh.9603
- *
- * There is potentially a bug in Solaris 2.x x<6, and other boxes that
- * implement tcp sockets in userland (i.e. on top of STREAMS). On these
- * systems, EPROTO can actually result in a fatal loop. See PR#981 for
- * example. It's hard to handle both uses of EPROTO.
- */
-#ifdef EPROTO
-#define APR_STATUS_IS_ECONNABORTED(s) ((s) == APR_ECONNABORTED \
- || (s) == EPROTO)
-#else
-#define APR_STATUS_IS_ECONNABORTED(s) ((s) == APR_ECONNABORTED)
-#endif
-
-/** Connection Reset by peer */
-#define APR_STATUS_IS_ECONNRESET(s) ((s) == APR_ECONNRESET)
-/** Operation timed out
- * @deprecated */
-#define APR_STATUS_IS_ETIMEDOUT(s) ((s) == APR_ETIMEDOUT)
-/** no route to host */
-#define APR_STATUS_IS_EHOSTUNREACH(s) ((s) == APR_EHOSTUNREACH)
-/** network is unreachable */
-#define APR_STATUS_IS_ENETUNREACH(s) ((s) == APR_ENETUNREACH)
-/** inappropiate file type or format */
-#define APR_STATUS_IS_EFTYPE(s) ((s) == APR_EFTYPE)
-/** broken pipe */
-#define APR_STATUS_IS_EPIPE(s) ((s) == APR_EPIPE)
-/** cross device link */
-#define APR_STATUS_IS_EXDEV(s) ((s) == APR_EXDEV)
-/** Directory Not Empty */
-#define APR_STATUS_IS_ENOTEMPTY(s) ((s) == APR_ENOTEMPTY || \
- (s) == APR_EEXIST)
-/** @} */
-
-#endif /* !defined(NETWARE) && !defined(OS2) && !defined(WIN32) */
-
-/** @} */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* ! APR_ERRNO_H */
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_LICENSE_HEADER_START@
- *
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this
- * file.
- *
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- *
- * @APPLE_LICENSE_HEADER_END@
- */
-#include <sys/errno.h>
-
-
-/*
- * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- *
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- *
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- *
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
-/*
- * Copyright (c) 1982, 1986, 1989, 1993
- * The Regents of the University of California. All rights reserved.
- * (c) UNIX System Laboratories, Inc.
- * All or some portions of this file are derived from material licensed
- * to the University of California by American Telephone and Telegraph
- * Co. or Unix System Laboratories, Inc. and are reproduced herein with
- * the permission of UNIX System Laboratories, Inc.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * @(#)errno.h 8.5 (Berkeley) 1/21/94
- */
-
-#ifndef _SYS_ERRNO_H_
-#define _SYS_ERRNO_H_
-
-#include <sys/cdefs.h>
-__BEGIN_DECLS
-extern int * __error(void);
-#define errno (*__error())
-__END_DECLS
-
-/*
- * Error codes
- */
-
-#define EPERM 1 /* Operation not permitted */
-#define ENOENT 2 /* No such file or directory */
-#define ESRCH 3 /* No such process */
-#define EINTR 4 /* Interrupted system call */
-#define EIO 5 /* Input/output error */
-#define ENXIO 6 /* Device not configured */
-#define E2BIG 7 /* Argument list too long */
-#define ENOEXEC 8 /* Exec format error */
-#define EBADF 9 /* Bad file descriptor */
-#define ECHILD 10 /* No child processes */
-#define EDEADLK 11 /* Resource deadlock avoided */
- /* 11 was EAGAIN */
-#define ENOMEM 12 /* Cannot allocate memory */
-#define EACCES 13 /* Permission denied */
-#define EFAULT 14 /* Bad address */
-#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
-#define ENOTBLK 15 /* Block device required */
-#endif
-#define EBUSY 16 /* Device / Resource busy */
-#define EEXIST 17 /* File exists */
-#define EXDEV 18 /* Cross-device link */
-#define ENODEV 19 /* Operation not supported by device */
-#define ENOTDIR 20 /* Not a directory */
-#define EISDIR 21 /* Is a directory */
-#define EINVAL 22 /* Invalid argument */
-#define ENFILE 23 /* Too many open files in system */
-#define EMFILE 24 /* Too many open files */
-#define ENOTTY 25 /* Inappropriate ioctl for device */
-#define ETXTBSY 26 /* Text file busy */
-#define EFBIG 27 /* File too large */
-#define ENOSPC 28 /* No space left on device */
-#define ESPIPE 29 /* Illegal seek */
-#define EROFS 30 /* Read-only file system */
-#define EMLINK 31 /* Too many links */
-#define EPIPE 32 /* Broken pipe */
-
-/* math software */
-#define EDOM 33 /* Numerical argument out of domain */
-#define ERANGE 34 /* Result too large */
-
-/* non-blocking and interrupt i/o */
-#define EAGAIN 35 /* Resource temporarily unavailable */
-#define EWOULDBLOCK EAGAIN /* Operation would block */
-#define EINPROGRESS 36 /* Operation now in progress */
-#define EALREADY 37 /* Operation already in progress */
-
-/* ipc/network software -- argument errors */
-#define ENOTSOCK 38 /* Socket operation on non-socket */
-#define EDESTADDRREQ 39 /* Destination address required */
-#define EMSGSIZE 40 /* Message too long */
-#define EPROTOTYPE 41 /* Protocol wrong type for socket */
-#define ENOPROTOOPT 42 /* Protocol not available */
-#define EPROTONOSUPPORT 43 /* Protocol not supported */
-#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
-#define ESOCKTNOSUPPORT 44 /* Socket type not supported */
-#endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */
-#define ENOTSUP 45 /* Operation not supported */
-#if !__DARWIN_UNIX03 && !defined(KERNEL)
-/*
- * This is the same for binary and source copmpatability, unless compiling
- * the kernel itself, or compiling __DARWIN_UNIX03; if compiling for the
- * kernel, the correct value will be returned. If compiling non-POSIX
- * source, the kernel return value will be converted by a stub in libc, and
- * if compiling source with __DARWIN_UNIX03, the conversion in libc is not
- * done, and the caller gets the expected (discrete) value.
- */
-#define EOPNOTSUPP ENOTSUP /* Operation not supported on socket */
-#endif /* !__DARWIN_UNIX03 && !KERNEL */
-
-#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
-#define EPFNOSUPPORT 46 /* Protocol family not supported */
-#endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */
-#define EAFNOSUPPORT 47 /* Address family not supported by protocol family */
-#define EADDRINUSE 48 /* Address already in use */
-#define EADDRNOTAVAIL 49 /* Can't assign requested address */
-
-/* ipc/network software -- operational errors */
-#define ENETDOWN 50 /* Network is down */
-#define ENETUNREACH 51 /* Network is unreachable */
-#define ENETRESET 52 /* Network dropped connection on reset */
-#define ECONNABORTED 53 /* Software caused connection abort */
-#define ECONNRESET 54 /* Connection reset by peer */
-#define ENOBUFS 55 /* No buffer space available */
-#define EISCONN 56 /* Socket is already connected */
-#define ENOTCONN 57 /* Socket is not connected */
-#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
-#define ESHUTDOWN 58 /* Can't send after socket shutdown */
-#define ETOOMANYREFS 59 /* Too many references: can't splice */
-#endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */
-#define ETIMEDOUT 60 /* Operation timed out */
-#define ECONNREFUSED 61 /* Connection refused */
-
-#define ELOOP 62 /* Too many levels of symbolic links */
-#define ENAMETOOLONG 63 /* File name too long */
-
-/* should be rearranged */
-#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
-#define EHOSTDOWN 64 /* Host is down */
-#endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */
-#define EHOSTUNREACH 65 /* No route to host */
-#define ENOTEMPTY 66 /* Directory not empty */
-
-/* quotas & mush */
-#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
-#define EPROCLIM 67 /* Too many processes */
-#define EUSERS 68 /* Too many users */
-#endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */
-#define EDQUOT 69 /* Disc quota exceeded */
-
-/* Network File System */
-#define ESTALE 70 /* Stale NFS file handle */
-#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
-#define EREMOTE 71 /* Too many levels of remote in path */
-#define EBADRPC 72 /* RPC struct is bad */
-#define ERPCMISMATCH 73 /* RPC version wrong */
-#define EPROGUNAVAIL 74 /* RPC prog. not avail */
-#define EPROGMISMATCH 75 /* Program version wrong */
-#define EPROCUNAVAIL 76 /* Bad procedure for program */
-#endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */
-
-#define ENOLCK 77 /* No locks available */
-#define ENOSYS 78 /* Function not implemented */
-
-#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
-#define EFTYPE 79 /* Inappropriate file type or format */
-#define EAUTH 80 /* Authentication error */
-#define ENEEDAUTH 81 /* Need authenticator */
-
-/* Intelligent device errors */
-#define EPWROFF 82 /* Device power is off */
-#define EDEVERR 83 /* Device error, e.g. paper out */
-#endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */
-
-#define EOVERFLOW 84 /* Value too large to be stored in data type */
-
-/* Program loading errors */
-#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
-#define EBADEXEC 85 /* Bad executable */
-#define EBADARCH 86 /* Bad CPU type in executable */
-#define ESHLIBVERS 87 /* Shared library version mismatch */
-#define EBADMACHO 88 /* Malformed Macho file */
-#endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */
-
-#define ECANCELED 89 /* Operation canceled */
-
-#define EIDRM 90 /* Identifier removed */
-#define ENOMSG 91 /* No message of desired type */
-#define EILSEQ 92 /* Illegal byte sequence */
-#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
-#define ENOATTR 93 /* Attribute not found */
-#endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */
-
-#define EBADMSG 94 /* Bad message */
-#define EMULTIHOP 95 /* Reserved */
-#define ENODATA 96 /* No message available on STREAM */
-#define ENOLINK 97 /* Reserved */
-#define ENOSR 98 /* No STREAM resources */
-#define ENOSTR 99 /* Not a STREAM */
-#define EPROTO 100 /* Protocol error */
-#define ETIME 101 /* STREAM ioctl timeout */
-
-#if __DARWIN_UNIX03 || defined(KERNEL)
-/* This value is only discrete when compiling __DARWIN_UNIX03, or KERNEL */
-#define EOPNOTSUPP 102 /* Operation not supported on socket */
-#endif /* __DARWIN_UNIX03 || KERNEL */
-
-#define ENOPOLICY 103 /* No such policy registered */
-
-#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
-#define ELAST 103 /* Must be equal largest errno */
-#endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */
-
-#endif /* _SYS_ERRNO_H_ */
diff --git a/doc/legacy/errno.list.solaris.txt b/doc/legacy/errno.list.solaris.txt
deleted file mode 100644
index 23601e9d374..00000000000
--- a/doc/legacy/errno.list.solaris.txt
+++ /dev/null
@@ -1,206 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2000 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
-/* All Rights Reserved */
-
-/*
- * University Copyright- Copyright (c) 1982, 1986, 1988
- * The Regents of the University of California
- * All Rights Reserved
- *
- * University Acknowledgment- Portions of this document are derived from
- * software developed by the University of California, Berkeley, and its
- * contributors.
- */
-
-#ifndef _SYS_ERRNO_H
-#define _SYS_ERRNO_H
-
-#pragma ident "@(#)errno.h 1.22 05/06/08 SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * Error codes
- */
-
-#define EPERM 1 /* Not super-user */
-#define ENOENT 2 /* No such file or directory */
-#define ESRCH 3 /* No such process */
-#define EINTR 4 /* interrupted system call */
-#define EIO 5 /* I/O error */
-#define ENXIO 6 /* No such device or address */
-#define E2BIG 7 /* Arg list too long */
-#define ENOEXEC 8 /* Exec format error */
-#define EBADF 9 /* Bad file number */
-#define ECHILD 10 /* No children */
-#define EAGAIN 11 /* Resource temporarily unavailable */
-#define ENOMEM 12 /* Not enough core */
-#define EACCES 13 /* Permission denied */
-#define EFAULT 14 /* Bad address */
-#define ENOTBLK 15 /* Block device required */
-#define EBUSY 16 /* Mount device busy */
-#define EEXIST 17 /* File exists */
-#define EXDEV 18 /* Cross-device link */
-#define ENODEV 19 /* No such device */
-#define ENOTDIR 20 /* Not a directory */
-#define EISDIR 21 /* Is a directory */
-#define EINVAL 22 /* Invalid argument */
-#define ENFILE 23 /* File table overflow */
-#define EMFILE 24 /* Too many open files */
-#define ENOTTY 25 /* Inappropriate ioctl for device */
-#define ETXTBSY 26 /* Text file busy */
-#define EFBIG 27 /* File too large */
-#define ENOSPC 28 /* No space left on device */
-#define ESPIPE 29 /* Illegal seek */
-#define EROFS 30 /* Read only file system */
-#define EMLINK 31 /* Too many links */
-#define EPIPE 32 /* Broken pipe */
-#define EDOM 33 /* Math arg out of domain of func */
-#define ERANGE 34 /* Math result not representable */
-#define ENOMSG 35 /* No message of desired type */
-#define EIDRM 36 /* Identifier removed */
-#define ECHRNG 37 /* Channel number out of range */
-#define EL2NSYNC 38 /* Level 2 not synchronized */
-#define EL3HLT 39 /* Level 3 halted */
-#define EL3RST 40 /* Level 3 reset */
-#define ELNRNG 41 /* Link number out of range */
-#define EUNATCH 42 /* Protocol driver not attached */
-#define ENOCSI 43 /* No CSI structure available */
-#define EL2HLT 44 /* Level 2 halted */
-#define EDEADLK 45 /* Deadlock condition. */
-#define ENOLCK 46 /* No record locks available. */
-#define ECANCELED 47 /* Operation canceled */
-#define ENOTSUP 48 /* Operation not supported */
-
-/* Filesystem Quotas */
-#define EDQUOT 49 /* Disc quota exceeded */
-
-/* Convergent Error Returns */
-#define EBADE 50 /* invalid exchange */
-#define EBADR 51 /* invalid request descriptor */
-#define EXFULL 52 /* exchange full */
-#define ENOANO 53 /* no anode */
-#define EBADRQC 54 /* invalid request code */
-#define EBADSLT 55 /* invalid slot */
-#define EDEADLOCK 56 /* file locking deadlock error */
-
-#define EBFONT 57 /* bad font file fmt */
-
-/* Interprocess Robust Locks */
-#define EOWNERDEAD 58 /* process died with the lock */
-#define ENOTRECOVERABLE 59 /* lock is not recoverable */
-
-/* stream problems */
-#define ENOSTR 60 /* Device not a stream */
-#define ENODATA 61 /* no data (for no delay io) */
-#define ETIME 62 /* timer expired */
-#define ENOSR 63 /* out of streams resources */
-
-#define ENONET 64 /* Machine is not on the network */
-#define ENOPKG 65 /* Package not installed */
-#define EREMOTE 66 /* The object is remote */
-#define ENOLINK 67 /* the link has been severed */
-#define EADV 68 /* advertise error */
-#define ESRMNT 69 /* srmount error */
-
-#define ECOMM 70 /* Communication error on send */
-#define EPROTO 71 /* Protocol error */
-
-/* Interprocess Robust Locks */
-#define ELOCKUNMAPPED 72 /* locked lock was unmapped */
-
-#define ENOTACTIVE 73 /* Facility is not active */
-#define EMULTIHOP 74 /* multihop attempted */
-#define EBADMSG 77 /* trying to read unreadable message */
-#define ENAMETOOLONG 78 /* path name is too long */
-#define EOVERFLOW 79 /* value too large to be stored in data type */
-#define ENOTUNIQ 80 /* given log. name not unique */
-#define EBADFD 81 /* f.d. invalid for this operation */
-#define EREMCHG 82 /* Remote address changed */
-
-/* shared library problems */
-#define ELIBACC 83 /* Can't access a needed shared lib. */
-#define ELIBBAD 84 /* Accessing a corrupted shared lib. */
-#define ELIBSCN 85 /* .lib section in a.out corrupted. */
-#define ELIBMAX 86 /* Attempting to link in too many libs. */
-#define ELIBEXEC 87 /* Attempting to exec a shared library. */
-#define EILSEQ 88 /* Illegal byte sequence. */
-#define ENOSYS 89 /* Unsupported file system operation */
-#define ELOOP 90 /* Symbolic link loop */
-#define ERESTART 91 /* Restartable system call */
-#define ESTRPIPE 92 /* if pipe/FIFO, don't sleep in stream head */
-#define ENOTEMPTY 93 /* directory not empty */
-#define EUSERS 94 /* Too many users (for UFS) */
-
-/* BSD Networking Software */
- /* argument errors */
-#define ENOTSOCK 95 /* Socket operation on non-socket */
-#define EDESTADDRREQ 96 /* Destination address required */
-#define EMSGSIZE 97 /* Message too long */
-#define EPROTOTYPE 98 /* Protocol wrong type for socket */
-#define ENOPROTOOPT 99 /* Protocol not available */
-#define EPROTONOSUPPORT 120 /* Protocol not supported */
-#define ESOCKTNOSUPPORT 121 /* Socket type not supported */
-#define EOPNOTSUPP 122 /* Operation not supported on socket */
-#define EPFNOSUPPORT 123 /* Protocol family not supported */
-#define EAFNOSUPPORT 124 /* Address family not supported by */
- /* protocol family */
-#define EADDRINUSE 125 /* Address already in use */
-#define EADDRNOTAVAIL 126 /* Can't assign requested address */
- /* operational errors */
-#define ENETDOWN 127 /* Network is down */
-#define ENETUNREACH 128 /* Network is unreachable */
-#define ENETRESET 129 /* Network dropped connection because */
- /* of reset */
-#define ECONNABORTED 130 /* Software caused connection abort */
-#define ECONNRESET 131 /* Connection reset by peer */
-#define ENOBUFS 132 /* No buffer space available */
-#define EISCONN 133 /* Socket is already connected */
-#define ENOTCONN 134 /* Socket is not connected */
-/* XENIX has 135 - 142 */
-#define ESHUTDOWN 143 /* Can't send after socket shutdown */
-#define ETOOMANYREFS 144 /* Too many references: can't splice */
-#define ETIMEDOUT 145 /* Connection timed out */
-#define ECONNREFUSED 146 /* Connection refused */
-#define EHOSTDOWN 147 /* Host is down */
-#define EHOSTUNREACH 148 /* No route to host */
-#define EWOULDBLOCK EAGAIN
-#define EALREADY 149 /* operation already in progress */
-#define EINPROGRESS 150 /* operation now in progress */
-
-/* SUN Network File System */
-#define ESTALE 151 /* Stale NFS file handle */
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_ERRNO_H */
diff --git a/doc/legacy/get_put_api_using_xattr.txt b/doc/legacy/get_put_api_using_xattr.txt
deleted file mode 100644
index 243f9f1aec2..00000000000
--- a/doc/legacy/get_put_api_using_xattr.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-GlusterFS get/put API interface provided through extended attributes:
-
-API usage:
- int put(dirpath/filename, data): setfattr -n glusterfs.file.<filename> -v <data> <dirpath>
- void *get(dirpath/filename): getfattr -n glusterfs.file.<filename> <dirpath>
-
-
-internals:
-* unify handling setxattr/getxattr
- - setxattr
- unify's setxattr forwards setxattr call to all the child nodes with XATTR_REPLACE flag, except namespace. setxattr will succeeds only on the child node on which the file already exists. if the setxattr operation fails on all child nodes, it indicates that the file does not already exist on any of the child nodes. unify follows the same rules as it follows for create, but using setxattr call itself with XATTR_CREATE flag. unify sends a setxattr to namespace first, with zero length data. if namespace setxattr succeeds, unify schedules setxattr to one of the child nodes.
-
- - getxattr
- unify's getxattr forwards getxattr call to all the child nodes. wait for completion of operation on all the child nodes, and returns success if getxattr succeeded one child node.
-
-* posix handling setxattr/getxattr
- - setxattr
- posix setxattr does a open with O_CREAT|O_TRUNC on the <path>/<name>, writes value of the setxattr as data into the file and closes the file. when data is null, posix setxattr avoids doing write. file is closed after write.
-
- - getxattr
- posix getxattr does open with O_RDONLY on the <path>/<name>, reads the complete content of the file. file is closed after read.
-
diff --git a/doc/legacy/hacker-guide/Makefile.am b/doc/legacy/hacker-guide/Makefile.am
deleted file mode 100644
index 65c92ac235e..00000000000
--- a/doc/legacy/hacker-guide/Makefile.am
+++ /dev/null
@@ -1,8 +0,0 @@
-EXTRA_DIST = replicate.txt bdb.txt posix.txt call-stub.txt write-behind.txt
-
-#EXTRA_DIST = hacker-guide.tex afr.txt bdb.txt posix.txt call-stub.txt write-behind.txt
-#hacker_guidedir = $(docdir)
-#hacker_guide_DATA = hacker-guide.pdf
-
-#hacker-guide.pdf: $(EXTRA_DIST)
-# pdflatex $(srcdir)/hacker-guide.tex
diff --git a/doc/legacy/hacker-guide/adding-fops.txt b/doc/legacy/hacker-guide/adding-fops.txt
deleted file mode 100644
index e70dbbdc829..00000000000
--- a/doc/legacy/hacker-guide/adding-fops.txt
+++ /dev/null
@@ -1,33 +0,0 @@
- HOW TO ADD A NEW FOP TO GlusterFS
- =================================
-
-Steps to be followed when adding a new FOP to GlusterFS:
-
-1. Edit glusterfs.h and add a GF_FOP_* constant.
-
-2. Edit xlator.[ch] and:
- 2a. add the new prototype for fop and callback.
- 2b. edit xlator_fops structure.
-
-3. Edit xlator.c and add to fill_defaults.
-
-4. Edit protocol.h and add struct necessary for the new FOP.
-
-5. Edit defaults.[ch] and provide default implementation.
-
-6. Edit call-stub.[ch] and provide stub implementation.
-
-7. Edit common-utils.c and add to gf_global_variable_init().
-
-8. Edit client-protocol and add your FOP.
-
-9. Edit server-protocol and add your FOP.
-
-10. Implement your FOP in any translator for which the default implementation
- is not sufficient.
-
-==========================================
-Last updated: Mon Oct 27 21:35:49 IST 2008
-
-Author: Vikas Gorur <vikas@gluster.com>
-==========================================
diff --git a/doc/legacy/hacker-guide/bdb.txt b/doc/legacy/hacker-guide/bdb.txt
deleted file mode 100644
index 1a80be813f6..00000000000
--- a/doc/legacy/hacker-guide/bdb.txt
+++ /dev/null
@@ -1,70 +0,0 @@
-
-* How does file translates to key/value pair?
----------------------------------------------
-
- in bdb a file is identified by key (obtained by taking basename() of the path of
-the file) and file contents are stored as value corresponding to the key in database
-file (defaults to glusterfs_storage.db under dirname() directory).
-
-* symlinks, directories
------------------------
-
- symlinks and directories are stored as is.
-
-* db (database) files
----------------------
-
- every directory, including root directory, contains a database file called
-glusterfs_storage.db. all the regular files contained in the directory are stored
-as key/value pair inside the glusterfs_storage.db.
-
-* internal data cache
----------------------
-
- db does not provide a way to find out the size of the value corresponding to a key.
-so, bdb makes DB->get() call for key and takes the length of the value returned.
-since DB->get() also returns file contents for key, bdb maintains an internal cache and
-stores the file contents in the cache.
- every directory maintains a seperate cache.
-
-* inode number transformation
------------------------------
-
- bdb allocates a inode number to each file and directory on its own. bdb maintains a
-global counter and increments it after allocating inode number for each file
-(regular, symlink or directory). NOTE: bdb does not guarantee persistent inode numbers.
-
-* checkpoint thread
--------------------
-
- bdb creates a checkpoint thread at the time of init(). checkpoint thread does a
-periodic checkpoint on the DB_ENV. checkpoint is the mechanism, provided by db, to
-forcefully commit the logged transactions to the storage.
-
-NOTES ABOUT FOPS:
------------------
-
-lookup() -
- 1> do lstat() on the path, if lstat fails, we assume that the file being looked up
- is either a regular file or doesn't exist.
- 2> lookup in the DB of parent directory for key corresponding to path. if key exists,
- return key, with.
- NOTE: 'struct stat' stat()ed from DB file is used as a container for 'struct stat'
- of the regular file. st_ino, st_size, st_blocks are updated with file's values.
-
-readv() -
- 1> do a lookup in bctx cache. if successful, return the requested data from cache.
- 2> if cache missed, do a DB->get() the entire file content and insert to cache.
-
-writev():
- 1> flush any cached content of this file.
- 2> do a DB->put(), with DB_DBT_PARTIAL flag.
- NOTE: DB_DBT_PARTIAL is used to do partial update of a value in DB.
-
-readdir():
- 1> regular readdir() in a loop, and vomit all DB_ENV log files and DB files that
- we encounter.
- 2> if the readdir() buffer still has space, open a DB cursor and do a sequential
- DBC->get() to fill the reaadir buffer.
-
-
diff --git a/doc/legacy/hacker-guide/call-stub.txt b/doc/legacy/hacker-guide/call-stub.txt
deleted file mode 100644
index 021037a3512..00000000000
--- a/doc/legacy/hacker-guide/call-stub.txt
+++ /dev/null
@@ -1,1033 +0,0 @@
-creating a call stub and pausing a call
----------------------------------------
-libglusterfs provides seperate API to pause each of the fop. parameters to each API is
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
- NOTE: @fn should exactly take the same type and number of parameters that
- the corresponding regular fop takes.
-rest will be the regular parameters to corresponding fop.
-
-NOTE: @frame can never be NULL. fop_<operation>_stub() fails with errno
- set to EINVAL, if @frame is NULL. also wherever @loc is applicable,
- @loc cannot be NULL.
-
-refer to individual stub creation API to know about call-stub creation's behaviour with
-specific parameters.
-
-here is the list of stub creation APIs for xlator fops.
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@loc - pointer to location structure.
- NOTE: @loc will be copied to a different location, with inode_ref() to
- @loc->inode and @loc->parent, if not NULL. also @loc->path will be
- copied to a different location.
-@need_xattr - flag to specify if xattr should be returned or not.
-call_stub_t *
-fop_lookup_stub (call_frame_t *frame,
- fop_lookup_t fn,
- loc_t *loc,
- int32_t need_xattr);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@loc - pointer to location structure.
- NOTE: @loc will be copied to a different location, with inode_ref() to
- @loc->inode and @loc->parent, if not NULL. also @loc->path will be
- copied to a different location.
-call_stub_t *
-fop_stat_stub (call_frame_t *frame,
- fop_stat_t fn,
- loc_t *loc);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@fd - file descriptor parameter to lk fop.
- NOTE: @fd is stored with a fd_ref().
-call_stub_t *
-fop_fstat_stub (call_frame_t *frame,
- fop_fstat_t fn,
- fd_t *fd);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@loc - pointer to location structure.
- NOTE: @loc will be copied to a different location, with inode_ref() to @loc->inode and
- @loc->parent, if not NULL. also @loc->path will be copied to a different location.
-@mode - mode parameter to chmod.
-call_stub_t *
-fop_chmod_stub (call_frame_t *frame,
- fop_chmod_t fn,
- loc_t *loc,
- mode_t mode);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@fd - file descriptor parameter to lk fop.
- NOTE: @fd is stored with a fd_ref().
-@mode - mode parameter for fchmod fop.
-call_stub_t *
-fop_fchmod_stub (call_frame_t *frame,
- fop_fchmod_t fn,
- fd_t *fd,
- mode_t mode);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@loc - pointer to location structure.
- NOTE: @loc will be copied to a different location, with inode_ref() to @loc->inode and
- @loc->parent, if not NULL. also @loc->path will be copied to a different location.
-@uid - uid parameter to chown.
-@gid - gid parameter to chown.
-call_stub_t *
-fop_chown_stub (call_frame_t *frame,
- fop_chown_t fn,
- loc_t *loc,
- uid_t uid,
- gid_t gid);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@fd - file descriptor parameter to lk fop.
- NOTE: @fd is stored with a fd_ref().
-@uid - uid parameter to fchown.
-@gid - gid parameter to fchown.
-call_stub_t *
-fop_fchown_stub (call_frame_t *frame,
- fop_fchown_t fn,
- fd_t *fd,
- uid_t uid,
- gid_t gid);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@loc - pointer to location structure.
- NOTE: @loc will be copied to a different location, with inode_ref() to
- @loc->inode and @loc->parent, if not NULL. also @loc->path will be
- copied to a different location, if not NULL.
-@off - offset parameter to truncate fop.
-call_stub_t *
-fop_truncate_stub (call_frame_t *frame,
- fop_truncate_t fn,
- loc_t *loc,
- off_t off);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@fd - file descriptor parameter to lk fop.
- NOTE: @fd is stored with a fd_ref().
-@off - offset parameter to ftruncate fop.
-call_stub_t *
-fop_ftruncate_stub (call_frame_t *frame,
- fop_ftruncate_t fn,
- fd_t *fd,
- off_t off);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@loc - pointer to location structure.
- NOTE: @loc will be copied to a different location, with inode_ref() to
- @loc->inode and @loc->parent, if not NULL. also @loc->path will be
- copied to a different location.
-@tv - tv parameter to utimens fop.
-call_stub_t *
-fop_utimens_stub (call_frame_t *frame,
- fop_utimens_t fn,
- loc_t *loc,
- struct timespec tv[2]);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@loc - pointer to location structure.
- NOTE: @loc will be copied to a different location, with inode_ref() to
- @loc->inode and @loc->parent, if not NULL. also @loc->path will be
- copied to a different location.
-@mask - mask parameter for access fop.
-call_stub_t *
-fop_access_stub (call_frame_t *frame,
- fop_access_t fn,
- loc_t *loc,
- int32_t mask);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@loc - pointer to location structure.
- NOTE: @loc will be copied to a different location, with inode_ref() to
- @loc->inode and @loc->parent, if not NULL. also @loc->path will be
- copied to a different location.
-@size - size parameter to readlink fop.
-call_stub_t *
-fop_readlink_stub (call_frame_t *frame,
- fop_readlink_t fn,
- loc_t *loc,
- size_t size);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@loc - pointer to location structure.
- NOTE: @loc will be copied to a different location, with inode_ref() to
- @loc->inode and @loc->parent, if not NULL. also @loc->path will be
- copied to a different location.
-@mode - mode parameter to mknod fop.
-@rdev - rdev parameter to mknod fop.
-call_stub_t *
-fop_mknod_stub (call_frame_t *frame,
- fop_mknod_t fn,
- loc_t *loc,
- mode_t mode,
- dev_t rdev);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@loc - pointer to location structure.
- NOTE: @loc will be copied to a different location, with inode_ref() to
- @loc->inode and @loc->parent, if not NULL. also @loc->path will be
- copied to a different location.
-@mode - mode parameter to mkdir fop.
-call_stub_t *
-fop_mkdir_stub (call_frame_t *frame,
- fop_mkdir_t fn,
- loc_t *loc,
- mode_t mode);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@loc - pointer to location structure.
- NOTE: @loc will be copied to a different location, with inode_ref() to
- @loc->inode and @loc->parent, if not NULL. also @loc->path will be
- copied to a different location.
-call_stub_t *
-fop_unlink_stub (call_frame_t *frame,
- fop_unlink_t fn,
- loc_t *loc);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@loc - pointer to location structure.
- NOTE: @loc will be copied to a different location, with inode_ref() to
- @loc->inode and @loc->parent, if not NULL. also @loc->path will be
- copied to a different location.
-call_stub_t *
-fop_rmdir_stub (call_frame_t *frame,
- fop_rmdir_t fn,
- loc_t *loc);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@linkname - linkname parameter to symlink fop.
-@loc - pointer to location structure.
- NOTE: @loc will be copied to a different location, with inode_ref() to
- @loc->inode and @loc->parent, if not NULL. also @loc->path will be
- copied to a different location.
-call_stub_t *
-fop_symlink_stub (call_frame_t *frame,
- fop_symlink_t fn,
- const char *linkname,
- loc_t *loc);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@oldloc - pointer to location structure.
- NOTE: @oldloc will be copied to a different location, with inode_ref() to
- @oldloc->inode and @oldloc->parent, if not NULL. also @oldloc->path will
- be copied to a different location, if not NULL.
-@newloc - pointer to location structure.
- NOTE: @newloc will be copied to a different location, with inode_ref() to
- @newloc->inode and @newloc->parent, if not NULL. also @newloc->path will
- be copied to a different location, if not NULL.
-call_stub_t *
-fop_rename_stub (call_frame_t *frame,
- fop_rename_t fn,
- loc_t *oldloc,
- loc_t *newloc);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@loc - pointer to location structure.
- NOTE: @loc will be copied to a different location, with inode_ref() to
- @loc->inode and @loc->parent, if not NULL. also @loc->path will be
- copied to a different location.
-@newpath - newpath parameter to link fop.
-call_stub_t *
-fop_link_stub (call_frame_t *frame,
- fop_link_t fn,
- loc_t *oldloc,
- const char *newpath);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@loc - pointer to location structure.
- NOTE: @loc will be copied to a different location, with inode_ref() to
- @loc->inode and @loc->parent, if not NULL. also @loc->path will be
- copied to a different location.
-@flags - flags parameter to create fop.
-@mode - mode parameter to create fop.
-@fd - file descriptor parameter to create fop.
- NOTE: @fd is stored with a fd_ref().
-call_stub_t *
-fop_create_stub (call_frame_t *frame,
- fop_create_t fn,
- loc_t *loc,
- int32_t flags,
- mode_t mode, fd_t *fd);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@flags - flags parameter to open fop.
-@loc - pointer to location structure.
- NOTE: @loc will be copied to a different location, with inode_ref() to
- @loc->inode and @loc->parent, if not NULL. also @loc->path will be
- copied to a different location.
-call_stub_t *
-fop_open_stub (call_frame_t *frame,
- fop_open_t fn,
- loc_t *loc,
- int32_t flags,
- fd_t *fd);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@fd - file descriptor parameter to lk fop.
- NOTE: @fd is stored with a fd_ref().
-@size - size parameter to readv fop.
-@off - offset parameter to readv fop.
-call_stub_t *
-fop_readv_stub (call_frame_t *frame,
- fop_readv_t fn,
- fd_t *fd,
- size_t size,
- off_t off);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@fd - file descriptor parameter to lk fop.
- NOTE: @fd is stored with a fd_ref().
-@vector - vector parameter to writev fop.
- NOTE: @vector is iov_dup()ed while creating stub. and frame->root->req_refs
- dictionary is dict_ref()ed.
-@count - count parameter to writev fop.
-@off - off parameter to writev fop.
-call_stub_t *
-fop_writev_stub (call_frame_t *frame,
- fop_writev_t fn,
- fd_t *fd,
- struct iovec *vector,
- int32_t count,
- off_t off);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@fd - file descriptor parameter to flush fop.
- NOTE: @fd is stored with a fd_ref().
-call_stub_t *
-fop_flush_stub (call_frame_t *frame,
- fop_flush_t fn,
- fd_t *fd);
-
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@fd - file descriptor parameter to lk fop.
- NOTE: @fd is stored with a fd_ref().
-@datasync - datasync parameter to fsync fop.
-call_stub_t *
-fop_fsync_stub (call_frame_t *frame,
- fop_fsync_t fn,
- fd_t *fd,
- int32_t datasync);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@loc - pointer to location structure.
- NOTE: @loc will be copied to a different location, with inode_ref() to @loc->inode and
- @loc->parent, if not NULL. also @loc->path will be copied to a different location.
-@fd - file descriptor parameter to opendir fop.
- NOTE: @fd is stored with a fd_ref().
-call_stub_t *
-fop_opendir_stub (call_frame_t *frame,
- fop_opendir_t fn,
- loc_t *loc,
- fd_t *fd);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@fd - file descriptor parameter to getdents fop.
- NOTE: @fd is stored with a fd_ref().
-@size - size parameter to getdents fop.
-@off - off parameter to getdents fop.
-@flags - flags parameter to getdents fop.
-call_stub_t *
-fop_getdents_stub (call_frame_t *frame,
- fop_getdents_t fn,
- fd_t *fd,
- size_t size,
- off_t off,
- int32_t flag);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@fd - file descriptor parameter to setdents fop.
- NOTE: @fd is stored with a fd_ref().
-@flags - flags parameter to setdents fop.
-@entries - entries parameter to setdents fop.
-call_stub_t *
-fop_setdents_stub (call_frame_t *frame,
- fop_setdents_t fn,
- fd_t *fd,
- int32_t flags,
- dir_entry_t *entries,
- int32_t count);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@fd - file descriptor parameter to setdents fop.
- NOTE: @fd is stored with a fd_ref().
-@datasync - datasync parameter to fsyncdir fop.
-call_stub_t *
-fop_fsyncdir_stub (call_frame_t *frame,
- fop_fsyncdir_t fn,
- fd_t *fd,
- int32_t datasync);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@loc - pointer to location structure.
- NOTE: @loc will be copied to a different location, with inode_ref() to
- @loc->inode and @loc->parent, if not NULL. also @loc->path will be
- copied to a different location.
-call_stub_t *
-fop_statfs_stub (call_frame_t *frame,
- fop_statfs_t fn,
- loc_t *loc);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@loc - pointer to location structure.
- NOTE: @loc will be copied to a different location, with inode_ref() to
- @loc->inode and @loc->parent, if not NULL. also @loc->path will be
- copied to a different location.
-@dict - dict parameter to setxattr fop.
- NOTE: stub creation procedure stores @dict pointer with dict_ref() to it.
-call_stub_t *
-fop_setxattr_stub (call_frame_t *frame,
- fop_setxattr_t fn,
- loc_t *loc,
- dict_t *dict,
- int32_t flags);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@loc - pointer to location structure.
- NOTE: @loc will be copied to a different location, with inode_ref() to
- @loc->inode and @loc->parent, if not NULL. also @loc->path will be
- copied to a different location.
-@name - name parameter to getxattr fop.
-call_stub_t *
-fop_getxattr_stub (call_frame_t *frame,
- fop_getxattr_t fn,
- loc_t *loc,
- const char *name);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@loc - pointer to location structure.
- NOTE: @loc will be copied to a different location, with inode_ref() to
- @loc->inode and @loc->parent, if not NULL. also @loc->path will be
- copied to a different location.
-@name - name parameter to removexattr fop.
- NOTE: name string will be copied to a different location while creating stub.
-call_stub_t *
-fop_removexattr_stub (call_frame_t *frame,
- fop_removexattr_t fn,
- loc_t *loc,
- const char *name);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@fd - file descriptor parameter to lk fop.
- NOTE: @fd is stored with a fd_ref().
-@cmd - command parameter to lk fop.
-@lock - lock parameter to lk fop.
- NOTE: lock will be copied to a different location while creating stub.
-call_stub_t *
-fop_lk_stub (call_frame_t *frame,
- fop_lk_t fn,
- fd_t *fd,
- int32_t cmd,
- struct flock *lock);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@fd - fd parameter to gf_lk fop.
- NOTE: @fd is fd_ref()ed while creating stub, if not NULL.
-@cmd - cmd parameter to gf_lk fop.
-@lock - lock paramater to gf_lk fop.
- NOTE: @lock is copied to a different memory location while creating
- stub.
-call_stub_t *
-fop_gf_lk_stub (call_frame_t *frame,
- fop_gf_lk_t fn,
- fd_t *fd,
- int32_t cmd,
- struct flock *lock);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@fd - file descriptor parameter to readdir fop.
- NOTE: @fd is stored with a fd_ref().
-@size - size parameter to readdir fop.
-@off - offset parameter to readdir fop.
-call_stub_t *
-fop_readdir_stub (call_frame_t *frame,
- fop_readdir_t fn,
- fd_t *fd,
- size_t size,
- off_t off);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@loc - pointer to location structure.
- NOTE: @loc will be copied to a different location, with inode_ref() to
- @loc->inode and @loc->parent, if not NULL. also @loc->path will be
- copied to a different location.
-@flags - flags parameter to checksum fop.
-call_stub_t *
-fop_checksum_stub (call_frame_t *frame,
- fop_checksum_t fn,
- loc_t *loc,
- int32_t flags);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-@inode - inode parameter to @fn.
- NOTE: @inode pointer is stored with a inode_ref().
-@buf - buf parameter to @fn.
- NOTE: @buf is copied to a different memory location, if not NULL.
-@dict - dict parameter to @fn.
- NOTE: @dict pointer is stored with dict_ref().
-call_stub_t *
-fop_lookup_cbk_stub (call_frame_t *frame,
- fop_lookup_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf,
- dict_t *dict);
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-@buf - buf parameter to @fn.
- NOTE: @buf is copied to a different memory location, if not NULL.
-call_stub_t *
-fop_stat_cbk_stub (call_frame_t *frame,
- fop_stat_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-@buf - buf parameter to @fn.
- NOTE: @buf is copied to a different memory location, if not NULL.
-call_stub_t *
-fop_fstat_cbk_stub (call_frame_t *frame,
- fop_fstat_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-@buf - buf parameter to @fn.
- NOTE: @buf is copied to a different memory location, if not NULL.
-call_stub_t *
-fop_chmod_cbk_stub (call_frame_t *frame,
- fop_chmod_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-@buf - buf parameter to @fn.
- NOTE: @buf is copied to a different memory location, if not NULL.
-call_stub_t *
-fop_fchmod_cbk_stub (call_frame_t *frame,
- fop_fchmod_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-@buf - buf parameter to @fn.
- NOTE: @buf is copied to a different memory location, if not NULL.
-call_stub_t *
-fop_chown_cbk_stub (call_frame_t *frame,
- fop_chown_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf);
-
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-@buf - buf parameter to @fn.
- NOTE: @buf is copied to a different memory location, if not NULL.
-call_stub_t *
-fop_fchown_cbk_stub (call_frame_t *frame,
- fop_fchown_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf);
-
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-@buf - buf parameter to @fn.
- NOTE: @buf is copied to a different memory location, if not NULL.
-call_stub_t *
-fop_truncate_cbk_stub (call_frame_t *frame,
- fop_truncate_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf);
-
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-@buf - buf parameter to @fn.
- NOTE: @buf is copied to a different memory location, if not NULL.
-call_stub_t *
-fop_ftruncate_cbk_stub (call_frame_t *frame,
- fop_ftruncate_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf);
-
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-@buf - buf parameter to @fn.
- NOTE: @buf is copied to a different memory location, if not NULL.
-call_stub_t *
-fop_utimens_cbk_stub (call_frame_t *frame,
- fop_utimens_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf);
-
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-call_stub_t *
-fop_access_cbk_stub (call_frame_t *frame,
- fop_access_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno);
-
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-@path - path parameter to @fn.
- NOTE: @path is copied to a different memory location, if not NULL.
-call_stub_t *
-fop_readlink_cbk_stub (call_frame_t *frame,
- fop_readlink_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- const char *path);
-
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-@inode - inode parameter to @fn.
- NOTE: @inode pointer is stored with a inode_ref().
-@buf - buf parameter to @fn.
- NOTE: @buf is copied to a different memory location, if not NULL.
-call_stub_t *
-fop_mknod_cbk_stub (call_frame_t *frame,
- fop_mknod_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf);
-
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-@inode - inode parameter to @fn.
- NOTE: @inode pointer is stored with a inode_ref().
-@buf - buf parameter to @fn.
- NOTE: @buf is copied to a different memory location, if not NULL.
-call_stub_t *
-fop_mkdir_cbk_stub (call_frame_t *frame,
- fop_mkdir_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf);
-
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-call_stub_t *
-fop_unlink_cbk_stub (call_frame_t *frame,
- fop_unlink_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno);
-
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-call_stub_t *
-fop_rmdir_cbk_stub (call_frame_t *frame,
- fop_rmdir_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno);
-
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-@inode - inode parameter to @fn.
- NOTE: @inode pointer is stored with a inode_ref().
-@buf - buf parameter to @fn.
- NOTE: @buf is copied to a different memory location, if not NULL.
-call_stub_t *
-fop_symlink_cbk_stub (call_frame_t *frame,
- fop_symlink_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf);
-
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-@buf - buf parameter to @fn.
- NOTE: @buf is copied to a different memory location, if not NULL.
-call_stub_t *
-fop_rename_cbk_stub (call_frame_t *frame,
- fop_rename_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf);
-
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-@inode - inode parameter to @fn.
- NOTE: @inode pointer is stored with a inode_ref().
-@buf - buf parameter to @fn.
- NOTE: @buf is copied to a different memory location, if not NULL.
-call_stub_t *
-fop_link_cbk_stub (call_frame_t *frame,
- fop_link_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-@fd - fd parameter to @fn.
- NOTE: @fd pointer is stored with a fd_ref().
-@inode - inode parameter to @fn.
- NOTE: @inode pointer is stored with a inode_ref().
-@buf - buf parameter to @fn.
- NOTE: @buf is copied to a different memory location, if not NULL.
-call_stub_t *
-fop_create_cbk_stub (call_frame_t *frame,
- fop_create_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd,
- inode_t *inode,
- struct stat *buf);
-
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-@fd - fd parameter to @fn.
- NOTE: @fd pointer is stored with a fd_ref().
-call_stub_t *
-fop_open_cbk_stub (call_frame_t *frame,
- fop_open_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd);
-
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-@vector - vector parameter to @fn.
- NOTE: @vector is copied to a different memory location, if not NULL. also
- frame->root->rsp_refs is dict_ref()ed.
-@stbuf - stbuf parameter to @fn.
- NOTE: @stbuf is copied to a different memory location, if not NULL.
-call_stub_t *
-fop_readv_cbk_stub (call_frame_t *frame,
- fop_readv_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iovec *vector,
- int32_t count,
- struct stat *stbuf);
-
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-@stbuf - stbuf parameter to @fn.
- NOTE: @stbuf is copied to a different memory location, if not NULL.
-call_stub_t *
-fop_writev_cbk_stub (call_frame_t *frame,
- fop_writev_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *stbuf);
-
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-call_stub_t *
-fop_flush_cbk_stub (call_frame_t *frame,
- fop_flush_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-call_stub_t *
-fop_fsync_cbk_stub (call_frame_t *frame,
- fop_fsync_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno);
-
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-@fd - fd parameter to @fn.
- NOTE: @fd pointer is stored with a fd_ref().
-call_stub_t *
-fop_opendir_cbk_stub (call_frame_t *frame,
- fop_opendir_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd);
-
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-@entries - entries parameter to @fn.
-@count - count parameter to @fn.
-call_stub_t *
-fop_getdents_cbk_stub (call_frame_t *frame,
- fop_getdents_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entries,
- int32_t count);
-
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-call_stub_t *
-fop_setdents_cbk_stub (call_frame_t *frame,
- fop_setdents_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-call_stub_t *
-fop_fsyncdir_cbk_stub (call_frame_t *frame,
- fop_fsyncdir_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno);
-
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-@buf - buf parameter to @fn.
- NOTE: @buf is copied to a different memory location, if not NULL.
-call_stub_t *
-fop_statfs_cbk_stub (call_frame_t *frame,
- fop_statfs_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct statvfs *buf);
-
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-call_stub_t *
-fop_setxattr_cbk_stub (call_frame_t *frame,
- fop_setxattr_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno);
-
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-@value - value dictionary parameter to @fn.
- NOTE: @value pointer is stored with a dict_ref().
-call_stub_t *
-fop_getxattr_cbk_stub (call_frame_t *frame,
- fop_getxattr_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *value);
-
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-call_stub_t *
-fop_removexattr_cbk_stub (call_frame_t *frame,
- fop_removexattr_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno);
-
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-@lock - lock parameter to @fn.
- NOTE: @lock is copied to a different memory location while creating
- stub.
-call_stub_t *
-fop_lk_cbk_stub (call_frame_t *frame,
- fop_lk_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct flock *lock);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-@lock - lock parameter to @fn.
- NOTE: @lock is copied to a different memory location while creating
- stub.
-call_stub_t *
-fop_gf_lk_cbk_stub (call_frame_t *frame,
- fop_gf_lk_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct flock *lock);
-
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-@entries - entries parameter to @fn.
-call_stub_t *
-fop_readdir_cbk_stub (call_frame_t *frame,
- fop_readdir_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- gf_dirent_t *entries);
-
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-@file_checksum - file_checksum parameter to @fn.
- NOTE: file_checksum will be copied to a different memory location
- while creating stub.
-@dir_checksum - dir_checksum parameter to @fn.
- NOTE: file_checksum will be copied to a different memory location
- while creating stub.
-call_stub_t *
-fop_checksum_cbk_stub (call_frame_t *frame,
- fop_checksum_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- uint8_t *file_checksum,
- uint8_t *dir_checksum);
-
-resuming a call:
----------------
- call can be resumed using call stub through call_resume API.
-
- void call_resume (call_stub_t *stub);
-
- stub - call stub created during pausing a call.
-
- NOTE: call_resume() will decrease reference count of any fd_t, dict_t and inode_t that it finds
- in stub->args.<operation>.<fd_t-or-inode_t-or-dict_t>. so, if any fd_t, dict_t or
- inode_t pointers are assigned at stub->args.<operation>.<fd_t-or-inode_t-or-dict_t> after
- fop_<operation>_stub() call, they must be <fd_t-or-inode_t-or-dict_t>_ref()ed.
-
- call_resume does not STACK_DESTROY() for any fop.
-
- if stub->fn is NULL, call_resume does STACK_WIND() or STACK_UNWIND() using the stub->frame.
-
- return - call resume fails only if stub is NULL. call resume fails with errno set to EINVAL.
diff --git a/doc/legacy/hacker-guide/hacker-guide.tex b/doc/legacy/hacker-guide/hacker-guide.tex
deleted file mode 100644
index 11101e7a87a..00000000000
--- a/doc/legacy/hacker-guide/hacker-guide.tex
+++ /dev/null
@@ -1,309 +0,0 @@
-\documentclass{book}[12pt]
-\usepackage{graphicx}
-% \usepackage{fancyhdr}
-
-% \pagestyle{fancy}
-\begin{document}
-
-% \headheight 117pt
-% \rhead{\includegraphics{zr-logo.eps}}
-
-\author{Gluster}
-\title{GlusterFS 1.3 Hacker's Guide}
-\date{June 1, 2007}
-
-\maketitle
-\frontmatter
-\tableofcontents
-
-\mainmatter
-\chapter{Introduction}
-
-\section{Coding guidelines}
-GlusterFS uses Git for version control. To get the latest source do:
-\begin{verbatim}
- $ git clone git://git.gluster.com/glusterfs.git glusterfs
-\end{verbatim}
-\noindent
-GlusterFS follows the GNU coding
-standards\footnote{http://www.gnu.org/prep/standards\_toc.html} for the
-most part.
-
-\chapter{Major components}
-\section{libglusterfs}
-\texttt{libglusterfs} contains supporting code used by all the other components.
-The important files here are:
-
-\texttt{dict.c}: This is an implementation of a serializable dictionary type. It is
-used by the protocol code to send requests and replies. It is also used to pass options
-to translators.
-
-\texttt{logging.c}: This is a thread-safe logging library. The log messages go to a
-file (default \texttt{/usr/local/var/log/glusterfs/*}).
-
-\texttt{protocol.c}: This file implements the GlusterFS on-the-wire
-protocol. The protocol itself is a simple ASCII protocol, designed to
-be easy to parse and be human readable.
-
-A sample GlusterFS protocol block looks like this:
-\begin{verbatim}
- Block Start header
- 0000000000000023 callid
- 00000001 type
- 00000016 op
- xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx human-readable name
- 00000000000000000000000000000ac3 block size
- <...> block
- Block End
-\end{verbatim}
-
-\texttt{stack.h}: This file defines the \texttt{STACK\_WIND} and
-\texttt{STACK\_UNWIND} macros which are used to implement the parallel
-stack that is maintained for inter-xlator calls. See the \textsl{Taking control
-of the stack} section below for more details.
-
-\texttt{spec.y}: This contains the Yacc grammar for the GlusterFS
-specification file, and the parsing code.
-
-
-Draw diagrams of trees
-Two rules:
-(1) directory structure is same
-(2) file can exist only on one node
-
-\section{glusterfs-fuse}
-\section{glusterfsd}
-\section{transport}
-\section{scheduler}
-\section{xlator}
-
-\chapter{xlators}
-\section{Taking control of the stack}
-One can think of STACK\_WIND/UNWIND as a very specific RPC mechanism.
-
-% \includegraphics{stack.eps}
-
-\section{Overview of xlators}
-
-\flushleft{\LARGE\texttt{cluster/}}
-\vskip 2ex
-\flushleft{\Large\texttt{afr}}
-\vskip 2ex
-\flushleft{\Large\texttt{stripe}}
-\vskip 2ex
-\flushleft{\Large\texttt{unify}}
-
-\vskip 4ex
-\flushleft{\LARGE\texttt{debug/}}
-\vskip 2ex
-\flushleft{\Large\texttt{trace}}
-\vskip 2ex
-The trace xlator simply logs all fops and mops, and passes them through to its child.
-
-\vskip 4ex
-\flushleft{\LARGE\texttt{features/}}
-\flushleft{\Large\texttt{posix-locks}}
-\vskip 2ex
-This xlator implements \textsc{posix} record locking semantics over
-any kind of storage.
-
-\vskip 4ex
-\flushleft{\LARGE\texttt{performance/}}
-
-\flushleft{\Large\texttt{io-threads}}
-\vskip 2ex
-\flushleft{\Large\texttt{read-ahead}}
-\vskip 2ex
-\flushleft{\Large\texttt{stat-prefetch}}
-\vskip 2ex
-\flushleft{\Large\texttt{write-behind}}
-\vskip 2ex
-
-\vskip 4ex
-\flushleft{\LARGE\texttt{protocol/}}
-\vskip 2ex
-
-\flushleft{\Large\texttt{client}}
-\vskip 2ex
-
-\flushleft{\Large\texttt{server}}
-\vskip 2ex
-
-\vskip 4ex
-\flushleft{\LARGE\texttt{storage/}}
-\flushleft{\Large\texttt{posix}}
-\vskip 2ex
-The \texttt{posix} xlator is the one which actually makes calls to the
-on-disk filesystem. Currently this is the only storage xlator available. However,
-plans to develop other storage xlators, such as one for Amazon's S3 service, are
-on the roadmap.
-
-\chapter{Writing a simple xlator}
-\noindent
-In this section we're going to write a rot13 xlator. ``Rot13'' is a
-simple substitution cipher which obscures a text by replacing each
-letter with the letter thirteen places down the alphabet. So `a' (0)
-would become `n' (12), `b' would be 'm', and so on. Rot13 applied to
-a piece of ciphertext yields the plaintext again, because rot13 is its
-own inverse, since:
-
-\[
-x_c = x + 13\; (mod\; 26)
-\]
-\[
-x_c + 13\; (mod\; 26) = x + 13 + 13\; (mod\; 26) = x
-\]
-
-First we include the requisite headers.
-
-\begin{verbatim}
-#include <ctype.h>
-#include <sys/uio.h>
-
-#include "glusterfs.h"
-#include "xlator.h"
-#include "logging.h"
-
-/*
- * This is a rot13 ``encryption'' xlator. It rot13's data when
- * writing to disk and rot13's it back when reading it.
- * This xlator is meant as an example, not for production
- * use ;) (hence no error-checking)
- */
-
-\end{verbatim}
-
-Then we write the rot13 function itself. For simplicity, we only transform lower case
-letters. Any other byte is passed through as it is.
-
-\begin{verbatim}
-/* We only handle lower case letters for simplicity */
-static void
-rot13 (char *buf, int len)
-{
- int i;
- for (i = 0; i < len; i++) {
- if (isalpha (buf[i]))
- buf[i] = (buf[i] - 'a' + 13) % 26;
- else if (buf[i] <= 26)
- buf[i] = (buf[i] + 13) % 26 + 'a';
- }
-}
-\end{verbatim}
-
-Next comes a utility function whose purpose will be clear after looking at the code
-below.
-
-\begin{verbatim}
-static void
-rot13_iovec (struct iovec *vector, int count)
-{
- int i;
- for (i = 0; i < count; i++) {
- rot13 (vector[i].iov_base, vector[i].iov_len);
- }
-}
-\end{verbatim}
-
-\begin{verbatim}
-static int32_t
-rot13_readv_cbk (call_frame_t *frame,
- call_frame_t *prev_frame,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iovec *vector,
- int32_t count)
-{
- rot13_iovec (vector, count);
-
- STACK_UNWIND (frame, op_ret, op_errno, vector, count);
- return 0;
-}
-
-static int32_t
-rot13_readv (call_frame_t *frame,
- xlator_t *this,
- dict_t *ctx,
- size_t size,
- off_t offset)
-{
- STACK_WIND (frame,
- rot13_readv_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->readv,
- ctx, size, offset);
- return 0;
-}
-
-static int32_t
-rot13_writev_cbk (call_frame_t *frame,
- call_frame_t *prev_frame,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-static int32_t
-rot13_writev (call_frame_t *frame,
- xlator_t *this,
- dict_t *ctx,
- struct iovec *vector,
- int32_t count,
- off_t offset)
-{
- rot13_iovec (vector, count);
-
- STACK_WIND (frame,
- rot13_writev_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->writev,
- ctx, vector, count, offset);
- return 0;
-}
-
-\end{verbatim}
-
-Every xlator must define two functions and two external symbols. The functions are
-\texttt{init} and \texttt{fini}, and the symbols are \texttt{fops} and \texttt{mops}.
-The \texttt{init} function is called when the xlator is loaded by GlusterFS, and
-contains code for the xlator to initialize itself. Note that if an xlator is present
-multiple times in the spec tree, the \texttt{init} function will be called each time
-the xlator is loaded.
-
-\begin{verbatim}
-int32_t
-init (xlator_t *this)
-{
- if (!this->children) {
- gf_log ("rot13", GF_LOG_ERROR,
- "FATAL: rot13 should have exactly one child");
- return -1;
- }
-
- gf_log ("rot13", GF_LOG_DEBUG, "rot13 xlator loaded");
- return 0;
-}
-\end{verbatim}
-
-\begin{verbatim}
-
-void
-fini (xlator_t *this)
-{
- return;
-}
-
-struct xlator_fops fops = {
- .readv = rot13_readv,
- .writev = rot13_writev
-};
-
-
-\end{verbatim}
-
-\end{document}
-
diff --git a/doc/legacy/hacker-guide/lock-ahead.txt b/doc/legacy/hacker-guide/lock-ahead.txt
deleted file mode 100644
index 70aa452d3de..00000000000
--- a/doc/legacy/hacker-guide/lock-ahead.txt
+++ /dev/null
@@ -1,80 +0,0 @@
- Lock-ahead translator
- ---------------------
-
-The objective of the lock-ahead translator is to speculatively
-hold locks (inodelk and entrylk) on the universal set (0 - infinity
-in case of inodelk and all basenames in case of entrylk) even
-when a lock is requested only on a subset, in anticipation that
-further locks will be requested within the same universal set.
-
-So, for example, when cluster/replicate locks a region before
-writing to it, lock-ahead would instead lock the entire file.
-On further writes, lock-ahead can immediately return success for
-the lock requests, since the entire file has been previously locked.
-
-To avoid starvation of other clients/mountpoints, we employ a
-notify mechanism, described below.
-
-typedef struct {
- struct list_head subset_locks;
-} la_universal_lock_t;
-
-Universal lock structure is stored in the inode context.
-
-typedef struct {
- enum {LOCK_AHEAD_ENTRYLK, LOCK_AHEAD_FENTRYLK,
- LOCK_AHEAD_INODELK, LOCK_AHEAD_FINODELK};
-
- union {
- fd_t *fd;
- loc_t loc;
- };
-
- off_t l_start;
- off_t l_len;
-
- const char *basename;
-
- struct list_head universal_lock;
-} la_subset_lock_t;
-
-
-fops implemented:
-
-* inodelk/finodelk/entrylk/fentrylk:
-
-lock:
- if universal lock held:
- add subset to it (save loc_t or fd) and return success
- else:
- send lock-notify fop
- hold universal lock and return
- (set inode context, add subset to it, save loc_t or fd)
-
- if this fails:
- forward the lock request
-
-unlock:
- if subset exists in universal lock:
- delete subset lock from list
- else:
- forward it
-
-* release:
- hold subset locks (each subset lock using the saved loc_t or fd)
- and release universal lock
-
-* lock-notify (on unwind) (new fop)
- hold subset locks and release universal lock
-
-
-lock-notify in locks translator:
-
-if a subset lock in entrylk/inodelk cannot be satisfied
-because of a universal lock held by someone else:
- unwind the lock-notify fop
-
-==============================================
-$ Last updated: Tue Feb 17 11:31:18 IST 2009 $
-$ Author: Vikas Gorur <vikas@gluster.com> $
-==============================================
diff --git a/doc/legacy/hacker-guide/posix.txt b/doc/legacy/hacker-guide/posix.txt
deleted file mode 100644
index 7958af2ea7d..00000000000
--- a/doc/legacy/hacker-guide/posix.txt
+++ /dev/null
@@ -1,59 +0,0 @@
----------------
-* storage/posix
----------------
-
-- SET_FS_ID
-
- This is so that all filesystem checks are done with the user's
- uid/gid and not GlusterFS's uid/gid.
-
-- MAKE_REAL_PATH
-
- This macro concatenates the base directory of the posix volume
- ('option directory') with the given path.
-
-- need_xattr in lookup
-
- If this flag is passed, lookup returns a xattr dictionary that contains
- the file's create time, the file's contents, and the version number
- of the file.
-
- This is a hack to increase small file performance. If an application
- wants to read a small file, it can finish its job with just a lookup
- call instead of a lookup followed by read.
-
-- getdents/setdents
-
- These are used by unify to set and get directory entries.
-
-- ALIGN_BUF
-
- Macro to align an address to a page boundary (4K).
-
-- priv->export_statfs
-
- In some cases, two exported volumes may reside on the same
- partition on the server. Sending statvfs info for both
- the volumes will lead to erroneous df output at the client,
- since free space on the partition will be counted twice.
-
- In such cases, user can disable exporting statvfs info
- on one of the volumes by setting this option.
-
-- xattrop
-
- This fop is used by replicate to set version numbers on files.
-
-- getxattr/setxattr hack to read/write files
-
- A key, GLUSTERFS_FILE_CONTENT_STRING, is handled in a special way by
- getxattr/setxattr. A getxattr with the key will return the entire
- content of the file as the value. A setxattr with the key will write
- the value as the entire content of the file.
-
-- posix_checksum
-
- This calculates a simple XOR checksum on all entry names in a
- directory that is used by unify to compare directory contents.
-
-
diff --git a/doc/legacy/hacker-guide/replicate.txt b/doc/legacy/hacker-guide/replicate.txt
deleted file mode 100644
index 133c72afa91..00000000000
--- a/doc/legacy/hacker-guide/replicate.txt
+++ /dev/null
@@ -1,206 +0,0 @@
----------------
-* cluster/replicate
----------------
-
-Before understanding replicate, one must understand two internal FOPs:
-
-GF_FILE_LK:
- This is exactly like fcntl(2) locking, except the locks are in a
- separate domain from locks held by applications.
-
-GF_DIR_LK (loc_t *loc, char *basename):
- This allows one to lock a name under a directory. For example,
- to lock /mnt/glusterfs/foo, one would use the call:
-
- GF_DIR_LK ({loc_t for "/mnt/glusterfs"}, "foo")
-
- If one wishes to lock *all* the names under a particular directory,
- supply the basename argument as NULL.
-
- The locks can either be read locks or write locks; consult the
- function prototype for more details.
-
-Both these operations are implemented by the features/locks (earlier
-known as posix-locks) translator.
-
---------------
-* Basic design
---------------
-
-All FOPs can be classified into four major groups:
-
- - inode-read
- Operations that read an inode's data (file contents) or metadata (perms, etc.).
-
- access, getxattr, fstat, readlink, readv, stat.
-
- - inode-write
- Operations that modify an inode's data or metadata.
-
- chmod, chown, truncate, writev, utimens.
-
- - dir-read
- Operations that read a directory's contents or metadata.
-
- readdir, getdents, checksum.
-
- - dir-write
- Operations that modify a directory's contents or metadata.
-
- create, link, mkdir, mknod, rename, rmdir, symlink, unlink.
-
- Some of these make a subgroup in that they modify *two* different entries:
- link, rename, symlink.
-
- - Others
- Other operations.
-
- flush, lookup, open, opendir, statfs.
-
-------------
-* Algorithms
-------------
-
-Each of the four major groups has its own algorithm:
-
- ----------------------
- - inode-read, dir-read
- ----------------------
-
- = Send a request to the first child that is up:
- - if it fails:
- try the next available child
- - if we have exhausted all children:
- return failure
-
- -------------
- - inode-write
- -------------
-
- All operations are done in parallel unless specified otherwise.
-
- (1) Send a GF_FILE_LK request on all children for a write lock on
- the appropriate region
- (for metadata operations: entire file (0, 0)
- for writev: (offset, offset+size of buffer))
-
- - If a lock request fails on a child:
- unlock all children
- try to acquire a blocking lock (F_SETLKW) on each child, serially.
-
- If this fails (due to ENOTCONN or EINVAL):
- Consider this child as dead for rest of transaction.
-
- (2) Mark all children as "pending" on all (alive) children
- (see below for meaning of "pending").
-
- - If it fails on any child:
- mark it as dead (in transaction local state).
-
- (3) Perform operation on all (alive) children.
-
- - If it fails on any child:
- mark it as dead (in transaction local state).
-
- (4) Unmark all successful children as not "pending" on all nodes.
-
- (5) Unlock region on all (alive) children.
-
- -----------
- - dir-write
- -----------
-
- The algorithm for dir-write is same as above except instead of holding
- GF_FILE_LK locks we hold a GF_DIR_LK lock on the name being operated upon.
- In case of link-type calls, we hold locks on both the operand names.
-
------------
-* "pending"
------------
-
- The "pending" number is like a journal entry. A pending entry is an
- array of 32-bit integers stored in network byte-order as the extended
- attribute of an inode (which can be a directory as well).
-
- There are three keys corresponding to three types of pending operations:
-
- - AFR_METADATA_PENDING
- There are some metadata operations pending on this inode (perms, ctime/mtime,
- xattr, etc.).
-
- - AFR_DATA_PENDING
- There is some data pending on this inode (writev).
-
- - AFR_ENTRY_PENDING
- There are some directory operations pending on this directory
- (create, unlink, etc.).
-
------------
-* Self heal
------------
-
- - On lookup, gather extended attribute data:
- - If entry is a regular file:
- - If an entry is present on one child and not on others:
- - create entry on others.
- - If entries exist but have different metadata (perms, etc.):
- - consider the entry with the highest AFR_METADATA_PENDING number as
- definitive and replicate its attributes on children.
-
- - If entry is a directory:
- - Consider the entry with the higest AFR_ENTRY_PENDING number as
- definitive and replicate its contents on all children.
-
- - If any two entries have non-matching types (i.e., one is file and
- other is directory):
- - Announce to the user via log that a split-brain situation has been
- detected, and do nothing.
-
- - On open, gather extended attribute data:
- - Consider the file with the highest AFR_DATA_PENDING number as
- the definitive one and replicate its contents on all other
- children.
-
- During all self heal operations, appropriate locks must be held on all
- regions/entries being affected.
-
----------------
-* Inode scaling
----------------
-
-Inode scaling is necessary because if a situation arises where:
- - An inode number is returned for a directory (by lookup) which was
- previously the inode number of a file (as per FUSE's table), then
- FUSE gets horribly confused (consult a FUSE expert for more details).
-
-To avoid such a situation, we distribute the 64-bit inode space equally
-among all children of replicate.
-
-To illustrate:
-
-If c1, c2, c3 are children of replicate, they each get 1/3 of the available
-inode space:
-
-Child: c1 c2 c3 c1 c2 c3 c1 c2 c3 c1 c2 ...
-Inode number: 1 2 3 4 5 6 7 8 9 10 11 ...
-
-Thus, if lookup on c1 returns an inode number "2", it is scaled to "4"
-(which is the second inode number in c1's space).
-
-This way we ensure that there is never a collision of inode numbers from
-two different children.
-
-This reduction of inode space doesn't really reduce the usability of
-replicate since even if we assume replicate has 1024 children (which would be a
-highly unusual scenario), each child still has a 54-bit inode space.
-
-2^54 ~ 1.8 * 10^16
-
-which is much larger than any real world requirement.
-
-
-==============================================
-$ Last updated: Sun Oct 12 23:17:01 IST 2008 $
-$ Author: Vikas Gorur <vikas@gluster.com> $
-==============================================
-
diff --git a/doc/legacy/hacker-guide/write-behind.txt b/doc/legacy/hacker-guide/write-behind.txt
deleted file mode 100644
index 50b7d2a1d07..00000000000
--- a/doc/legacy/hacker-guide/write-behind.txt
+++ /dev/null
@@ -1,45 +0,0 @@
-basic working
---------------
-
- write behind is basically a translator to lie to the application that the write-requests are finished, even before it is actually finished.
-
- on a regular translator tree without write-behind, control flow is like this:
-
- 1. application makes a write() system call.
- 2. VFS ==> FUSE ==> /dev/fuse.
- 3. fuse-bridge initiates a glusterfs writev() call.
- 4. writev() is STACK_WIND()ed upto client-protocol or storage translator.
- 5. client-protocol, on receiving reply from server, starts STACK_UNWIND() towards the fuse-bridge.
-
- on a translator tree with write-behind, control flow is like this:
-
- 1. application makes a write() system call.
- 2. VFS ==> FUSE ==> /dev/fuse.
- 3. fuse-bridge initiates a glusterfs writev() call.
- 4. writev() is STACK_WIND()ed upto write-behind translator.
- 5. write-behind adds the write buffer to its internal queue and does a STACK_UNWIND() towards the fuse-bridge.
-
- write call is completed in application's percepective. after STACK_UNWIND()ing towards the fuse-bridge, write-behind initiates a fresh writev() call to its child translator, whose replies will be consumed by write-behind itself. write-behind _doesn't_ cache the write buffer, unless 'option flush-behind on' is specified in volume specification file.
-
-windowing
----------
-
- write respect to write-behind, each write-buffer has three flags: 'stack_wound', 'write_behind' and 'got_reply'.
-
- stack_wound: if set, indicates that write-behind has initiated STACK_WIND() towards child translator.
-
- write_behind: if set, indicates that write-behind has done STACK_UNWIND() towards fuse-bridge.
-
- got_reply: if set, indicates that write-behind has received reply from child translator for a writev() STACK_WIND(). a request will be destroyed by write-behind only if this flag is set.
-
- currently pending write requests = aggregate size of requests with write_behind = 1 and got_reply = 0.
-
- window size limits the aggregate size of currently pending write requests. once the pending requests' size has reached the window size, write-behind blocks writev() calls from fuse-bridge.
- blocking is only from application's perspective. write-behind does STACK_WIND() to child translator straight-away, but hold behind the STACK_UNWIND() towards fuse-bridge. STACK_UNWIND() is done only once write-behind gets enough replies to accomodate for currently blocked request.
-
-flush behind
-------------
-
- if 'option flush-behind on' is specified in volume specification file, then write-behind sends aggregate write requests to child translator, instead of regular per request STACK_WIND()s.
-
-
diff --git a/doc/legacy/handling-options.txt b/doc/legacy/handling-options.txt
deleted file mode 100644
index 9a3b2510acb..00000000000
--- a/doc/legacy/handling-options.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-
-How to add a new option to a given volume ?
-===========================================
-
-* Add a entry in 'struct volume_options options[]' with your key, what is
- the type of the 'key', etc.
-
-* The 'key' and corresponding 'value' given for the same by user are validated
- before calling init() of the translator/transport/scheduler/auth-module.
-
-* Once the complete init() is successful, user will get a warning if he has
- given a 'key' which is not defined in these modules.
-
diff --git a/doc/legacy/mac-related-xattrs.txt b/doc/legacy/mac-related-xattrs.txt
deleted file mode 100644
index 92bb2ceef2d..00000000000
--- a/doc/legacy/mac-related-xattrs.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-
-This document is intended to briefly explain how the Extended Attributes on
-Darwin 10.5.x releases works
-----
-
-On Darwin other than all the normal filesystem operations, 'Finder' (like
-Explorer in Windows but a little more) keeps its information in two extended
-attributes named 'com.apple.FinderInfo' and 'com.apple.ResourceFork'. If these
-xattrs are not implemented the filesystem won't be shown on Finder, and if they
-are not implemented properly there may be issues when some of the file operations
-are done through GUI of Finder. But when a filesystem is used over mountpoint in a
-terminal, everything is fine and these xattrs are not required.
-
-Currently the way these xattrs are implemented is simple. All the xattr calls
-(getxattr, setxattr, listxattr, removexattr) are passed down to underlaying filesystem,
-most of the cases when exported FS is on MacOS X itself, these keys are supported, hence
-the fops succeed. But in the case of using exports of different OS on Darwin the issue is
-extended attribute prefix like 'com.apple.' may not be supported, hence the problem with
-Finder. To solve this issue, GlusterFS returns virtual default values to these keys, which
-works fine on most of the cases.
-
diff --git a/doc/legacy/porting_guide.txt b/doc/legacy/porting_guide.txt
deleted file mode 100644
index 5705cd96461..00000000000
--- a/doc/legacy/porting_guide.txt
+++ /dev/null
@@ -1,45 +0,0 @@
- GlusterFS Porting Guide
- -----------------------
-
-* General setup
-
-The configure script will detect the target platform for the build.
-All platform-specific CFLAGS, macro definitions should be done
-in configure.ac
-
-Platform-specific code can be written like this:
-
-#ifdef GF_DARWIN_HOST_OS
- /* some code specific to Darwin */
-#endif
-
-* Coding guidelines
-
-In general, avoid glibc extensions. For example, nested functions don't work
-on Mac OS X. It is best to stick to C99.
-
-When using library calls and system calls, pay attention to the
-portability notes. As far as possible stick to POSIX-specified behavior.
-Do not use anything expressly permitted by the specification. For example,
-some fields in structures may be present only on certain platforms. Avoid
-use of such things.
-
-Do not pass values of constants such as F_*, O_*, errno values, etc. across
-platforms.
-
-Please refer compat-errno.h for more details about errno handling inside
-glusterfs for cross platform.
-
-* Specific issues
-
-- The argp library is available only on Linux through glibc, but for other
- platforms glusterfs has already included argp-standalone library which will
- statically linked during the glusterfs build.
-
-- Extended attribute calls (setxattr, listxattr, etc.) have differing prototypes
- on different platforms. See compat.h for macro definitions to resolve this, also
- read out the specific extended attribute documentation for your platforms.
-
-------------------------------------------
-Last revised: Thu Feb 28 13:58:07 IST 2008
-------------------------------------------
diff --git a/doc/legacy/replicate.lyx b/doc/legacy/replicate.lyx
deleted file mode 100644
index 58ba6b2e00e..00000000000
--- a/doc/legacy/replicate.lyx
+++ /dev/null
@@ -1,797 +0,0 @@
-#LyX 1.4.2 created this file. For more info see http://www.lyx.org/
-\lyxformat 245
-\begin_document
-\begin_header
-\textclass article
-\language english
-\inputencoding auto
-\fontscheme default
-\graphics default
-\paperfontsize default
-\spacing single
-\papersize default
-\use_geometry false
-\use_amsmath 1
-\cite_engine basic
-\use_bibtopic false
-\paperorientation portrait
-\secnumdepth 3
-\tocdepth 3
-\paragraph_separation skip
-\defskip medskip
-\quotes_language english
-\papercolumns 1
-\papersides 1
-\paperpagestyle default
-\tracking_changes false
-\output_changes false
-\end_header
-
-\begin_body
-
-\begin_layout Title
-
-\size larger
-Automatic File Replication (replicate) in GlusterFS
-\end_layout
-
-\begin_layout Author
-Vikas Gorur
-\family typewriter
-\size larger
-<vikas@gluster.com>
-\end_layout
-
-\begin_layout Standard
-\begin_inset ERT
-status open
-
-\begin_layout Standard
-
-
-\backslash
-hrule
-\end_layout
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Section*
-Overview
-\end_layout
-
-\begin_layout Standard
-This document describes the design and usage of the replicate translator in GlusterFS.
- This document is valid for the 1.4.x releases, and not earlier ones.
-\end_layout
-
-\begin_layout Standard
-The replicate translator of GlusterFS aims to keep identical copies of a file
- on all its subvolumes, as far as possible.
- It tries to do this by performing all filesystem mutation operations (writing
- data, creating files, changing ownership, etc.) on all its subvolumes in
- such a way that if an operation succeeds on atleast one subvolume, all
- other subvolumes can later be brought up to date.
-\end_layout
-
-\begin_layout Standard
-In the rest of the document the terms
-\begin_inset Quotes eld
-\end_inset
-
-subvolume
-\begin_inset Quotes erd
-\end_inset
-
- and
-\begin_inset Quotes eld
-\end_inset
-
-server
-\begin_inset Quotes erd
-\end_inset
-
- are used interchangeably, trusting that it will cause no confusion to the
- reader.
-\end_layout
-
-\begin_layout Section*
-Usage
-\end_layout
-
-\begin_layout Standard
-A sample volume declaration for replicate looks like this:
-\end_layout
-
-\begin_layout Standard
-\begin_inset ERT
-status open
-
-\begin_layout Standard
-
-
-\backslash
-begin{verbatim}
-\end_layout
-
-\begin_layout Standard
-
-volume replicate
-\end_layout
-
-\begin_layout Standard
-
- type cluster/replicate
-\end_layout
-
-\begin_layout Standard
-
- # options, see below for description
-\end_layout
-
-\begin_layout Standard
-
- subvolumes brick1 brick2
-\end_layout
-
-\begin_layout Standard
-
-end-volume
-\end_layout
-
-\begin_layout Standard
-
-
-\backslash
-end{verbatim}
-\end_layout
-
-\begin_layout Standard
-
-\end_layout
-
-\begin_layout Standard
-
-\end_layout
-
-\begin_layout Standard
-
-\end_layout
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Standard
-This defines an replicate volume with two subvolumes, brick1, and brick2.
- For replicate to work properly, it is essential that its subvolumes support
-\series bold
-extended attributes
-\series default
-.
- This means that you should choose a backend filesystem that supports extended
- attributes, like XFS, ReiserFS, or Ext3.
-\end_layout
-
-\begin_layout Standard
-The storage volumes used as backend for replicate
-\emph on
-must
-\emph default
- have a posix-locks volume loaded above them.
-\end_layout
-
-\begin_layout Standard
-\begin_inset ERT
-status open
-
-\begin_layout Standard
-
-
-\backslash
-begin{verbatim}
-\end_layout
-
-\begin_layout Standard
-
-volume brick1
-\end_layout
-
-\begin_layout Standard
-
- type features/posix-locks
-\end_layout
-
-\begin_layout Standard
-
- subvolumes brick1-ds
-\end_layout
-
-\begin_layout Standard
-
-end-volume
-\end_layout
-
-\begin_layout Standard
-
-
-\backslash
-end{verbatim}
-\end_layout
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Section*
-Design
-\end_layout
-
-\begin_layout Subsection*
-Read algorithm
-\end_layout
-
-\begin_layout Standard
-All operations that do not modify the file or directory are sent to all
- the subvolumes and the first successful reply is returned to the application.
-\end_layout
-
-\begin_layout Standard
-The read() system call (reading data from a file) is an exception.
- For read() calls, replicate tries to do load balancing by sending all reads from
- a particular file to a particular server.
-\end_layout
-
-\begin_layout Standard
-The read algorithm is also affected by the option read-subvolume; see below
- for details.
-\end_layout
-
-\begin_layout Subsection*
-Classes of file operations
-\end_layout
-
-\begin_layout Standard
-replicate divides all filesystem write operations into three classes:
-\end_layout
-
-\begin_layout Itemize
-
-\series bold
-data:
-\series default
-Operations that modify the contents of a file (write, truncate).
-\end_layout
-
-\begin_layout Itemize
-
-\series bold
-metadata:
-\series default
-Operations that modify attributes of a file or directory (permissions, ownership
-, etc.).
-\end_layout
-
-\begin_layout Itemize
-
-\series bold
-entry:
-\series default
-Operations that create or delete directory entries (mkdir, create, rename,
- rmdir, unlink, etc.).
-\end_layout
-
-\begin_layout Subsection*
-Locking and Change Log
-\end_layout
-
-\begin_layout Standard
-To ensure consistency across subvolumes, replicate holds a lock whenever a modificatio
-n is being made to a file or directory.
- By default, replicate considers the first subvolume as the sole lock server.
- However, the number of lock servers can be increased upto the total number
- of subvolumes.
-\end_layout
-
-\begin_layout Standard
-The change log is a set of extended attributes associated with files and
- directories that replicate maintains.
- The change log keeps track of the changes made to files and directories
- (data, metadata, entry) so that the self-heal algorithm knows which copy
- of a file or directory is the most recent one.
-\end_layout
-
-\begin_layout Subsection*
-Write algorithm
-\end_layout
-
-\begin_layout Standard
-The algorithm for all write operations (data, metadata, entry) is:
-\end_layout
-
-\begin_layout Enumerate
-Lock the file (or directory) on all of the lock servers (see options below).
-\end_layout
-
-\begin_layout Enumerate
-Write change log entries on all servers.
-\end_layout
-
-\begin_layout Enumerate
-Perform the operation.
-\end_layout
-
-\begin_layout Enumerate
-Erase change log entries.
-\end_layout
-
-\begin_layout Enumerate
-Unlock the file (or directory) on all of the lock servers.
-\end_layout
-
-\begin_layout Standard
-The above algorithm is a simplified version intended for general users.
- Please refer to the source code for the full details.
-\end_layout
-
-\begin_layout Subsection*
-Self-Heal
-\end_layout
-
-\begin_layout Standard
-replicate automatically tries to fix any inconsistencies it detects among different
- copies of a file.
- It uses information in the change log to determine which copy is the
-\begin_inset Quotes eld
-\end_inset
-
-correct
-\begin_inset Quotes erd
-\end_inset
-
- version.
-\end_layout
-
-\begin_layout Standard
-Self-heal is triggered when a file or directory is first
-\begin_inset Quotes eld
-\end_inset
-
-accessed
-\begin_inset Quotes erd
-\end_inset
-
-, that is, the first time any operation is attempted on it.
- The self-heal algorithm does the following things:
-\end_layout
-
-\begin_layout Standard
-If the entry being accessed is a directory:
-\end_layout
-
-\begin_layout Itemize
-The contents of the
-\begin_inset Quotes eld
-\end_inset
-
-correct
-\begin_inset Quotes erd
-\end_inset
-
- version is replicated on all subvolumes, by deleting entries and creating
- entries as necessary.
-\end_layout
-
-\begin_layout Standard
-If the entry being accessed is a file:
-\end_layout
-
-\begin_layout Itemize
-If the file does not exist on some subvolumes, it is created.
-\end_layout
-
-\begin_layout Itemize
-If there is a mismatch in the size of the file, or ownership, or permission,
- it is fixed.
-\end_layout
-
-\begin_layout Itemize
-If the change log indicates that some copies need updating, they are updated.
-\end_layout
-
-\begin_layout Subsection*
-Split-brain
-\end_layout
-
-\begin_layout Standard
-It may happen that one replicate client can access only some of the servers in
- a cluster and another replicate client can access the remaining servers.
- Or it may happen that in a cluster of two servers, one server goes down
- and comes back up, but the other goes down immediately.
- Both these scenarios result in a
-\begin_inset Quotes eld
-\end_inset
-
-split-brain
-\begin_inset Quotes erd
-\end_inset
-
-.
-\end_layout
-
-\begin_layout Standard
-In a split-brain situation, there will be two or more copies of a file,
- all of which are
-\begin_inset Quotes eld
-\end_inset
-
-correct
-\begin_inset Quotes erd
-\end_inset
-
- in some sense.
- replicate without manual intervention has no way of knowing what to do, since
- it cannot consider any single copy as definitive, nor does it know of any
- meaningful way to merge the copies.
-\end_layout
-
-\begin_layout Standard
-If replicate detects that a split-brain has happened on a file, it disallows opening
- of that file.
- You will have to manually resolve the conflict by deleting all but one
- copy of the file.
- Alternatively you can set an automatic split-brain resolution policy by
- using the `favorite-child' option (see below).
-\end_layout
-
-\begin_layout Section*
-Translator Options
-\end_layout
-
-\begin_layout Standard
-replicate accepts the following options:
-\end_layout
-
-\begin_layout Subsection*
-read-subvolume (default: none)
-\end_layout
-
-\begin_layout Standard
-The value of this option must be the name of a subvolume.
- If given, all read operations are sent to only the specified subvolume,
- instead of being balanced across all subvolumes.
-\end_layout
-
-\begin_layout Subsection*
-favorite-child (default: none)
-\end_layout
-
-\begin_layout Standard
-The value of this option must be the name of a subvolume.
- If given, the specified subvolume will be preferentially used in resolving
- conflicts (
-\begin_inset Quotes eld
-\end_inset
-
-split-brain
-\begin_inset Quotes erd
-\end_inset
-
-).
- This means if a discrepancy is noticed in the attributes or content of
- a file, the copy on the `favorite-child' will be considered the definitive
- version and its contents will
-\emph on
-overwrite
-\emph default
-the contents of all other copies.
- Use this option with caution! It is possible to
-\emph on
-lose data
-\emph default
- with this option.
- If you are in doubt, do not specify this option.
-\end_layout
-
-\begin_layout Subsection*
-Self-heal options
-\end_layout
-
-\begin_layout Standard
-Setting any of these options to
-\begin_inset Quotes eld
-\end_inset
-
-off
-\begin_inset Quotes erd
-\end_inset
-
- prevents that kind of self-heal from being done on a file or directory.
- For example, if metadata self-heal is turned off, permissions and ownership
- are no longer fixed automatically.
-\end_layout
-
-\begin_layout Subsubsection*
-data-self-heal (default: on)
-\end_layout
-
-\begin_layout Standard
-Enable/disable self-healing of file contents.
-\end_layout
-
-\begin_layout Subsubsection*
-metadata-self-heal (default: off)
-\end_layout
-
-\begin_layout Standard
-Enable/disable self-healing of metadata (permissions, ownership, modification
- times).
-\end_layout
-
-\begin_layout Subsubsection*
-entry-self-heal (default: on)
-\end_layout
-
-\begin_layout Standard
-Enable/disable self-healing of directory entries.
-\end_layout
-
-\begin_layout Subsection*
-Change Log options
-\end_layout
-
-\begin_layout Standard
-If any of these options is turned off, it disables writing of change log
- entries for that class of file operations.
- That is, steps 2 and 4 of the write algorithm (see above) are not done.
- Note that if the change log is not written, the self-heal algorithm cannot
- determine the
-\begin_inset Quotes eld
-\end_inset
-
-correct
-\begin_inset Quotes erd
-\end_inset
-
- version of a file and hence self-heal will only be able to fix
-\begin_inset Quotes eld
-\end_inset
-
-obviously
-\begin_inset Quotes erd
-\end_inset
-
- wrong things (such as a file existing on only one node).
-\end_layout
-
-\begin_layout Subsubsection*
-data-change-log (default: on)
-\end_layout
-
-\begin_layout Standard
-Enable/disable writing of change log for data operations.
-\end_layout
-
-\begin_layout Subsubsection*
-metadata-change-log (default: on)
-\end_layout
-
-\begin_layout Standard
-Enable/disable writing of change log for metadata operations.
-\end_layout
-
-\begin_layout Subsubsection*
-entry-change-log (default: on)
-\end_layout
-
-\begin_layout Standard
-Enable/disable writing of change log for entry operations.
-\end_layout
-
-\begin_layout Subsection*
-Locking options
-\end_layout
-
-\begin_layout Standard
-These options let you specify the number of lock servers to use for each
- class of file operations.
- The default values are satisfactory in most cases.
- If you are extra paranoid, you may want to increase the values.
- However, be very cautious if you set the data- or entry- lock server counts
- to zero, since this can result in
-\emph on
-lost data.
-
-\emph default
- For example, if you set the data-lock-server-count to zero, and two application
-s write to the same region of a file, there is a possibility that none of
- your servers will have all the data.
- In other words, the copies will be
-\emph on
-inconsistent
-\emph default
-, and
-\emph on
-incomplete
-\emph default
-.
- Do not set data- and entry- lock server counts to zero unless you absolutely
- know what you are doing and agree to not hold GlusterFS responsible for
- any lost data.
-\end_layout
-
-\begin_layout Subsubsection*
-data-lock-server-count (default: 1)
-\end_layout
-
-\begin_layout Standard
-Number of lock servers to use for data operations.
-\end_layout
-
-\begin_layout Subsubsection*
-metadata-lock-server-count (default: 0)
-\end_layout
-
-\begin_layout Standard
-Number of lock servers to use for metadata operations.
-\end_layout
-
-\begin_layout Subsubsection*
-entry-lock-server-count (default: 1)
-\end_layout
-
-\begin_layout Standard
-Number of lock servers to use for entry operations.
-\end_layout
-
-\begin_layout Section*
-Known Issues
-\end_layout
-
-\begin_layout Subsection*
-Self-heal of file with more than one link (hard links):
-\end_layout
-
-\begin_layout Standard
-Consider two servers, A and B.
- Assume A is down, and the user creates a file `new' as a hard link to a
- file `old'.
- When A comes back up, replicate will see that the file `new' does not exist on
- A, and self-heal will create the file and copy the contents from B.
- However, now on server A the file `new' is not a link to the file `old'
- but an entirely different file.
-\end_layout
-
-\begin_layout Standard
-We know of no easy way to fix this problem, but we will try to fix it in
- forthcoming releases.
-\end_layout
-
-\begin_layout Subsection*
-File re-opening after a server comes back up:
-\end_layout
-
-\begin_layout Standard
-If a server A goes down and comes back up, any files which were opened while
- A was down and are still open will not have their writes replicated on
- A.
- In other words, data replication only happens on those servers which were
- alive when the file was opened.
-\end_layout
-
-\begin_layout Standard
-This is a rather tricky issue but we hope to fix it very soon.
-\end_layout
-
-\begin_layout Section*
-Frequently Asked Questions
-\end_layout
-
-\begin_layout Subsection*
-1.
- How can I force self-heal to happen?
-\end_layout
-
-\begin_layout Standard
-You can force self-heal to happen on your cluster by running a script or
- a command that accesses every file.
- A simple way to do it would be:
-\end_layout
-
-\begin_layout Standard
-\begin_inset ERT
-status open
-
-\begin_layout Standard
-
-\end_layout
-
-\begin_layout Standard
-
-
-\backslash
-begin{verbatim}
-\end_layout
-
-\begin_layout Standard
-
-$ ls -lR
-\end_layout
-
-\begin_layout Standard
-
-
-\backslash
-end{verbatim}
-\end_layout
-
-\begin_layout Standard
-
-\end_layout
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Standard
-Run the command in all directories which you want to forcibly self-heal.
-\end_layout
-
-\begin_layout Subsection*
-2.
- Which backend filesystem should I use for replicate?
-\end_layout
-
-\begin_layout Standard
-You can use any backend filesystem that supports extended attributes.
- We know of users successfully using XFR, ReiserFS, and Ext3.
-\end_layout
-
-\begin_layout Subsection*
-3.
- What can I do to improve replicate performance?
-\end_layout
-
-\begin_layout Standard
-Try loading performance translators such as io-threads, write-behind, io-cache,
- and read-ahead depending on your workload.
- If you are willing to sacrifice correctness in corner cases, you can experiment
- with the lock-server-count and the change-log options (see above).
- As warned earlier, be very careful!
-\end_layout
-
-\begin_layout Subsection*
-4.
- How can I selectively replicate files?
-\end_layout
-
-\begin_layout Standard
-There is no support for selective replication in replicate itself.
- You can achieve selective replication by loading the unify translator over
- replicate, and using the switch scheduler.
- Configure unify with two subvolumes, one of them being replicate.
- Using the switch scheduler, schedule all files for which you need replication
- to the replicate subvolume.
- Consult unify and switch documentation for more details.
-\end_layout
-
-\begin_layout Section*
-Contact
-\end_layout
-
-\begin_layout Standard
-If you need more assistance on replicate, contact us on the mailing list <gluster-user
-s@gluster.org> (visit gluster.org for details on how to subscribe).
-\end_layout
-
-\begin_layout Standard
-Send you comments and suggestions about this document to <vikas@gluster.com>.
-\end_layout
-
-\end_body
-\end_document
diff --git a/doc/legacy/replicate.pdf b/doc/legacy/replicate.pdf
deleted file mode 100644
index b7212af2b4e..00000000000
--- a/doc/legacy/replicate.pdf
+++ /dev/null
Binary files differ
diff --git a/doc/legacy/solaris-related-xattrs.txt b/doc/legacy/solaris-related-xattrs.txt
deleted file mode 100644
index 3a4643948c0..00000000000
--- a/doc/legacy/solaris-related-xattrs.txt
+++ /dev/null
@@ -1,44 +0,0 @@
- Solaris Extended Attributes
-
-In solaris extended attributes are logically supported as files
-within the filesystem. The file system is therefore augmented
-with an orthogonal namespace of file attributes. Attribute values
-are accessed by file descriptors obtained through a special attribute
-interface. This type of logical view of "attributes as files" allows
-the leveraging of existing file system interface functionality to
-support the construction, deletion and manipulation of attributes.
-
-But as we have tested through this functionality provided by Solaris
-we have come accross two major issues as written below.
-
-1. Symlink XATTR_NOFOLLOW not present for creating extended attributes
- directly on the symlinks like other platforms Linux,MAC-OSX,BSD etc.
- An implementation is present for O_NOFOLLOW for "openat()" call sets
- up errno ELOOP whenever encountered with a symlink and also another
- implementation AT_SYMLINK_NOFOLLOW which is not present for calls like
- "attropen(), openat()"
-
- a snippet of test code which helped us understand this behaviour
- --------------------------------------
- attrfd = attropen (path, key,
- flags|AT_SYMLINK_NOFOLLOW|O_CREAT|O_WRONLY|O_NOFOLLOW, 0777);
- if (attrfd >= 0) {
- ftruncate (attrfd, 0);
- ret = write (attrfd, value, size);
- close (attrfd);
- } else {
- fprintf (stderr, "Couldn't set extended attribute for %s (%d)\n",
- path, errno);
- }
- --------------------------------------
-
-2. Extended attribute support for special files like device files, fifo files
- is not supported under solaris.
-
-Apart from these glitches almost everything regarding porting functionality
-for extended attribute calls has been properly implemented in compat.c
-with writing wrapper around functions over
-"attropen()", "openat()", "unlinkat()"
-
-
-
diff --git a/doc/legacy/stat-prefetch-design.txt b/doc/legacy/stat-prefetch-design.txt
deleted file mode 100644
index 68ed423d3dd..00000000000
--- a/doc/legacy/stat-prefetch-design.txt
+++ /dev/null
@@ -1,154 +0,0 @@
-what is stat-prefetch?
-======================
-It is a translator which caches the dentries read in readdir. This dentry
-list is stored in the context of fd. Later when lookup happens on
-[parent-inode, basename (path)] combination, this list is searched for the
-basename. The dentry thus searched is used to fill up the stat corresponding
-to path being looked upon, thereby short-cutting lookup calls. This cache is
-preserved till closedir is called on the fd. The purpose of this translator
-is to optimize operations like 'ls -l', where a readdir is followed by
-lookup (stat) calls on each directory entry.
-
-1. stat-prefetch harnesses the efficiency of short lookup calls
- (saves network roundtrip time for lookup calls from being accounted to
- the stat call).
-2. To maintain the correctness, it does lookup-behind - lookup is winded to
- underlying translators after it is unwound to upper translators.
- lookup-behind is necessary as inode gets populated in server inode table
- only in lookup-cbk and also because various translators store their
- contexts in inode contexts during lookup calls.
-
-fops to be implemented:
-=======================
-* lookup
- 1. check the dentry cache stored in context of fds opened by the same process
- on parent inode for basename. If found unwind with cached stat, else wind
- the lookup call to underlying translators.
- 2. stat is stored in the context of inode if the path being looked upon
- happens to be directory. This stat will be used to fill postparent stat
- when lookup happens on any of the directory contents.
-
-* readdir
- 1. cache the direntries returned in readdir_cbk in the context of fd.
- 2. if the readdir is happening on non-expected offsets (means a seekdir/rewinddir
- has happened), cache has to be flushed.
- 3. delete the entry corresponding to basename of path on which fd is opened
- from cache stored in parent.
-
-* chmod/fchmod
- delete the entry corresponding to basename from cache stored in context of
- fds opened on parent inode, since these calls change st_mode and st_ctime of
- stat.
-
-* chown/fchown
- delete the entry corresponding to basename from cache stored in context of
- fds opened on parent inode, since these calls change st_uid/st_gid and
- st_ctime of stat.
-
-* truncate/ftruncate
- delete the entry corresponding to basename from cache stored in context of
- fds opened on parent inode, since these calls change st_size/st_mtime of stat.
-
-* utimens
- delete the entry corresponding to basename from cache stored in context of
- fds opened on parent inode, since this call changes st_atime/st_mtime of stat.
-
-* readlink
- delete the entry corresponding to basename from cache stored in context of fds
- opened on parent inode, since this call changes st_atime of stat.
-
-* unlink
- 1. delete the entry corresponding to basename from cache stored in context of
- fds, opened on parent directory containing the file being unlinked.
- 2. delete the entry corresponding to basename of parent directory from cache
- of grand-parent.
-
-* rmdir
- 1. delete the entry corresponding to basename from cache stored in context of
- fds opened on parent inode.
- 2. remove the entire cache from all fds opened on inode corresponding to
- directory being removed.
- 3. delete the entry correspondig to basename of parent from cache stored in
- grand-parent.
-
-* readv
- delete the entry corresponding to basename from cache stored in context of fds
- opened on parent inode, since readv changes st_atime of file.
-
-* writev
- delete the entry corresponding to basename from cache stored in context of fds
- opened on parent inode, since writev can possibly change st_size and definitely
- changes st_mtime of file.
-
-* fsync
- there is a confusion here as to whether fsync updates mtime/ctimes. Disk based
- filesystems (atleast ext2) just writes the times stored in inode to disk
- during fsync and not the time at which fsync is being done. But in glusterfs,
- a translator like write-behind actually sends writes during fsync which will
- change mtime/ctime. Hence stat-prefetch implements fsync to delete the entry
- corresponding to basename from cache stored in context of fds opened on parent
- inode.
-
-* rename
- 1. remove entry corresponding to oldname from cache stored in fd contexts of
- oldparent.
- 2. remove entry corresponding to newname from cache stored in fd contexts of
- newparent.
- 3. remove entry corresponding to oldparent from cache stored in
- old-grand-parent, since removing oldname changes st_mtime and st_ctime
- of oldparent stat.
- 4. remove entry corresponding to newparent from cache stored in
- new-grand-parent, since adding newname changes st_mtime and st_ctime
- of newparent stat.
- 5. if oldname happens to be a directory, remove entire cache from all fds
- opened on it.
-
-* create/mknod/mkdir/symlink/link
- delete entry corresponding to basename of parent directory in which these
- operations are happening, from cache stored in context of fds opened on
- grand-parent, since adding a new entry to a directory changes st_mtime
- and st_ctime of parent directory.
-
-* setxattr/removexattr
- delete the entry corresponding to basename from cache stored in context of
- fds opened on parent inode, since setxattr changes st_ctime of file.
-
-* setdents
- 1. remove entry corresponding to basename of path on which fd is opened from
- cache stored in context of fds opened on parent.
- 2. for each of the entry in the direntry list, delete from cache stored in
- context of fd, the entry corresponding to basename of path being passed.
-
-* getdents
- 1. remove entry corresponding to basename of path on which fd is opened from
- cache stored in parent, since getdents changes st_atime.
- 2. remove entries corresponding to symbolic links from cache, since readlink
- would've changed st_atime.
-
-* checksum
- delete the entry corresponding to basename from cache stored in context of
- fds opened on parent inode, since st_atime is changed during this call.
-
-* xattrop/fxattrop
- delete the entry corresponding to basename from cache stored in context of fds
- opened on parent inode, since these calls modify st_ctime of file.
-
-callbacks to be implemented:
-============================
-* releasedir
- free the context stored in fd.
-
-* forget
- dree the stat if the inode corresponds to a directory.
-
-limitations:
-============
-* since a readdir does not return extended attributes of file, if need_xattr is
- set, short-cutting of lookup does not happen and lookup is passed to
- underlying translators.
-
-* posix_readdir does not check whether the dentries are spanning across multiple
- mount points. Hence it is not transforming inode numbers in stat buffers if
- posix is configured to allow export directory spanning on multiple mountpoints.
- This is a bug which needs to be fixed. posix_readdir should treat dentries the
- same way as if lookup is happening on dentries.
diff --git a/doc/legacy/translator-options.txt b/doc/legacy/translator-options.txt
deleted file mode 100644
index 3422c058a5d..00000000000
--- a/doc/legacy/translator-options.txt
+++ /dev/null
@@ -1,224 +0,0 @@
-mount/fuse:
- * direct-io-mode GF_OPTION_TYPE_BOOL on|off|yes|no
- * mount-point (mountpoint) GF_OPTION_TYPE_PATH <any-posix-valid-path>
- * attribute-timeout GF_OPTION_TYPE_DOUBLE 0.0
- * entry-timeout GF_OPTION_TYPE_DOUBLE 0.0
-
-protocol/server:
- * transport-type GF_OPTION_TYPE_STR tcp|socket|ib-verbs|unix|ib-sdp|
- tcp/client|ib-verbs/client
- * volume-filename.* GF_OPTION_TYPE_PATH
- * inode-lru-limit GF_OPTION_TYPE_INT 0-(1 * GF_UNIT_MB)
- * client-volume-filename GF_OPTION_TYPE_PATH
-
-protocol/client:
- * username GF_OPTION_TYPE_ANY
- * password GF_OPTION_TYPE_ANY
- * transport-type GF_OPTION_TYPE_STR tcp|socket|ib-verbs|unix|ib-sdp|
- tcp/client|ib-verbs/client
- * remote-host GF_OPTION_TYPE_ANY
- * remote-subvolume GF_OPTION_TYPE_ANY
- * transport-timeout GF_OPTION_TYPE_TIME 5-1013
-
-cluster/replicate:
- * read-subvolume GF_OPTION_TYPE_XLATOR
- * favorite-child GF_OPTION_TYPE_XLATOR
- * data-self-heal GF_OPTION_TYPE_BOOL
- * metadata-self-heal GF_OPTION_TYPE_BOOL
- * entry-self-heal GF_OPTION_TYPE_BOOL
- * data-change-log GF_OPTION_TYPE_BOOL
- * metadata-change-log GF_OPTION_TYPE_BOOL
- * entry-change-log GF_OPTION_TYPE_BOOL
- * data-lock-server-count GF_OPTION_TYPE_INT 0
- * metadata-lock-server-count GF_OPTION_TYPE_INT 0
- * entry-lock-server-count GF_OPTION_TYPE_INT 0
-
-cluster/distribute:
- * lookup-unhashed GF_OPTION_TYPE_BOOL
-
-cluster/unify:
- * namespace GF_OPTION_TYPE_XLATOR
- * scheduler GF_OPTION_TYPE_STR alu|rr|random|nufa|switch
- * self-heal GF_OPTION_TYPE_STR foreground|background|off
- * optimist GF_OPTION_TYPE_BOOL
-
-cluster/nufa:
- local-volume-name GF_OPTION_TYPE_XLATOR
-
-cluster/stripe:
- * block-size GF_OPTION_TYPE_ANY
- * use-xattr GF_OPTION_TYPE_BOOL
-
-debug/trace:
- * include-ops (include) GF_OPTION_TYPE_STR
- * exclude-ops (exclude) GF_OPTION_TYPE_STR
-
-encryption/rot-13:
- * encrypt-write GF_OPTION_TYPE_BOOL
- * decrypt-read GF_OPTION_TYPE_BOOL
-
-features/path-convertor:
- * start-offset GF_OPTION_TYPE_INT 0-4095
- * end-offset GF_OPTION_TYPE_INT 1-4096
- * replace-with GF_OPTION_TYPE_ANY
-
-features/trash:
- * trash-dir GF_OPTION_TYPE_PATH
-
-features/locks:
- * mandatory-locks (mandatory) GF_OPTION_TYPE_BOOL
-
-features/filter:
- * root-squashing GF_OPTION_TYPE_BOOL
- * read-only GF_OPTION_TYPE_BOOL
- * fixed-uid GF_OPTION_TYPE_INT
- * fixed-gid GF_OPTION_TYPE_INT
- * translate-uid GF_OPTION_TYPE_ANY
- * translate-gid GF_OPTION_TYPE_ANY
- * filter-uid GF_OPTION_TYPE_ANY
- * filter-gid GF_OPTION_TYPE_ANY
-
-features/quota:
- * min-free-disk-limit GF_OPTION_TYPE_PERCENT
- * refresh-interval GF_OPTION_TYPE_TIME
- * disk-usage-limit GF_OPTION_TYPE_SIZET
-
-storage/posix:
- * o-direct GF_OPTION_TYPE_BOOL
- * directory GF_OPTION_TYPE_PATH
- * export-statfs-size GF_OPTION_TYPE_BOOL
- * mandate-attribute GF_OPTION_TYPE_BOOL
-
-storage/bdb:
- * directory GF_OPTION_TYPE_PATH
- * logdir GF_OPTION_TYPE_PATH
- * errfile GF_OPTION_TYPE_PATH
- * dir-mode GF_OPTION_TYPE_ANY
- * file-mode GF_OPTION_TYPE_ANY
- * page-size GF_OPTION_TYPE_SIZET
- * lru-limit GF_OPTION_TYPE_INT
- * lock-timeout GF_OPTION_TYPE_TIME
- * checkpoint-timeout GF_OPTION_TYPE_TIME
- * transaction-timeout GF_OPTION_TYPE_TIME
- * mode GF_OPTION_TYPE_BOOL
- * access-mode GF_OPTION_TYPE_STR
-
-performance/read-ahead:
- * force-atime-update GF_OPTION_TYPE_BOOL
- * page-size GF_OPTION_TYPE_SIZET (64 * GF_UNIT_KB)-(2 * GF_UNIT_MB)
- * page-count GF_OPTION_TYPE_INT 1-16
-
-performance/write-behind:
- * flush-behind GF_OPTION_TYPE_BOOL
- * aggregate-size GF_OPTION_TYPE_SIZET (128 * GF_UNIT_KB)-(4 * GF_UNIT_MB)
- * window-size GF_OPTION_TYPE_SIZET (512 * GF_UNIT_KB)-(1 * GF_UNIT_GB)
- * enable-O_SYNC GF_OPTION_TYPE_BOOL
- * disable-for-first-nbytes GF_OPTION_TYPE_SIZET 1 - (1 * GF_UNIT_MB)
-
-performance/symlink-cache:
-
-performance/io-threads:
- * thread-count GF_OPTION_TYPE_INT 1-32
-
-performance/io-cache:
- * priority GF_OPTION_TYPE_ANY
- * cache-timeout (force-revalidate-timeout) GF_OPTION_TYPE_INT 0-60
- * page-size GF_OPTION_TYPE_SIZET (16 * GF_UNIT_KB)-(4 * GF_UNIT_MB)
- * cache-size GF_OPTION_TYPE_SIZET (4 * GF_UNIT_MB)-(6 * GF_UNIT_GB)
-
-performance/quick-read:
- * cache-timeout GF_OPTION_TYPE_INT 1-60
- * max-file-size GF_OPTION_TYPE_SIZET 0-(1000 * GF_UNIT_KB)
-
-auth:
-- addr:
- * auth.addr.*.allow GF_OPTION_TYPE_ANY
- * auth.addr.*.reject GF_OPTION_TYPE_ANY
-
-- login:
- * auth.login.*.allow GF_OPTION_TYPE_ANY
- * auth.login.*.password GF_OPTION_TYPE_ANY
-
-scheduler/alu:
- * scheduler.alu.order (alu.order)
- GF_OPTION_TYPE_ANY
- * scheduler.alu.disk-usage.entry-threshold (alu.disk-usage.entry-threshold)
- GF_OPTION_TYPE_SIZET
- * scheduler.alu.disk-usage.exit-threshold (alu.disk-usage.exit-threshold)
- GF_OPTION_TYPE_SIZET
- * scheduler.alu.write-usage.entry-threshold (alu.write-usage.entry-threshold)
- GF_OPTION_TYPE_SIZET
- * scheduler.alu.write-usage.exit-threshold (alu.write-usage.exit-threshold)
- GF_OPTION_TYPE_SIZET
- * scheduler.alu.read-usage.entry-threshold (alu.read-usage.entry-threshold)
- GF_OPTION_TYPE_SIZET
- * scheduler.alu.read-usage.exit-threshold (alu.read-usage.exit-threshold)
- GF_OPTION_TYPE_SIZET
- * scheduler.alu.open-files-usage.entry-threshold (alu.open-files-usage.entry-threshold)
- GF_OPTION_TYPE_INT
- * scheduler.alu.open-files-usage.exit-threshold (alu.open-files-usage.exit-threshold)
- GF_OPTION_TYPE_INT
- * scheduler.read-only-subvolumes (alu.read-only-subvolumes)
- GF_OPTION_TYPE_ANY
- * scheduler.refresh-interval (alu.refresh-interval)
- GF_OPTION_TYPE_TIME
- * scheduler.limits.min-free-disk (alu.limits.min-free-disk)
- GF_OPTION_TYPE_PERCENT
- * scheduler.alu.stat-refresh.num-file-create (alu.stat-refresh.num-file-create)
- GF_OPTION_TYPE_INT
-
-scheduler/nufa:
- * scheduler.refresh-interval (nufa.refresh-interval)
- GF_OPTION_TYPE_TIME
- * scheduler.limits.min-free-disk (nufa.limits.min-free-disk)
- GF_OPTION_TYPE_PERCENT
- * scheduler.local-volume-name (nufa.local-volume-name)
- GF_OPTION_TYPE_XLATOR
-
-scheduler/random:
- * scheduler.refresh-interval (random.refresh-interval) GF_OPTION_TYPE_TIME
- * scheduler.limits.min-free-disk (random.limits.min-free-disk) GF_OPTION_TYPE_PERCENT
-
-scheduler/rr:
- * scheduler.refresh-interval (rr.refresh-interval) GF_OPTION_TYPE_TIME
- * scheduler.limits.min-free-disk (rr.limits.min-free-disk) GF_OPTION_TYPE_PERCENT
- * scheduler.read-only-subvolumes (rr.read-only-subvolumes) GF_OPTION_TYPE_ANY
-
-scheduler/switch:
- * scheduler.read-only-subvolumes (switch.read-only-subvolumes) GF_OPTION_TYPE_ANY
- * scheduler.local-volume-name (switch.nufa.local-volume-name) GF_OPTION_TYPE_XLATOR
- * scheduler.switch.case (switch.case) GF_OPTION_TYPE_ANY
-
-transport/ib-verbs:
- * transport.ib-verbs.port (ib-verbs-port) GF_OPTION_TYPE_INT 1-4
- check the option by 'ibv_devinfo'
- * transport.ib-verbs.mtu (ib-verbs-mtu) GF_OPTION_TYPE_INT
- * transport.ib-verbs.device-name (ib-verbs-device-name) GF_OPTION_TYPE_ANY,
- check by 'ibv_devinfo'
- * transport.ib-verbs.work-request-send-size (ib-verbs-work-request-send-size)
- GF_OPTION_TYPE_INT,
- * transport.ib-verbs.work-request-recv-size (ib-verbs-work-request-recv-size)
- GF_OPTION_TYPE_INT
- * transport.ib-verbs.work-request-send-count (ib-verbs-work-request-send-count)
- GF_OPTION_TYPE_INT
- * transport.ib-verbs.work-request-recv-count (ib-verbs-work-request-recv-count)
- GF_OPTION_TYPE_INT
- * remote-port (transport.remote-port,transport.ib-verbs.remote-port)
- GF_OPTION_TYPE_INT
- * transport.ib-verbs.listen-port GF_OPTION_TYPE_INT
- * transport.ib-verbs.connect-path (connect-path) GF_OPTION_TYPE_ANY
- * transport.ib-verbs.bind-path (bind-path) GF_OPTION_TYPE_ANY
- * transport.ib-verbs.listen-path (listen-path) GF_OPTION_TYPE_ANY
- * transport.address-family (address-family) GF_OPTION_TYPE_STR inet|inet6|inet/inet6|
- inet6/inet|unix|inet-sdp
-
-transport/socket:
- * transport.remote-port (remote-port,transport.socket.remote-port) GF_OPTION_TYPE_INT
- * transport.socket.listen-port (listen-port) GF_OPTION_TYPE_INT
- * transport.socket.bind-address (bind-address) GF_OPTION_TYPE_ANY
- * transport.socket.connect-path (connect-path) GF_OPTION_TYPE_ANY
- * transport.socket.bind-path (bind-path) GF_OPTION_TYPE_ANY
- * transport.socket.listen-path (listen-path) GF_OPTION_TYPE_ANY
- * transport.address-family (address-family) GF_OPTION_TYPE_STR inet|inet6|
- inet/inet6|inet6/inet|
- unix|inet-sdp
diff --git a/doc/mount.glusterfs.8 b/doc/mount.glusterfs.8
index 10e1d59f055..ce16e9e40b7 100644
--- a/doc/mount.glusterfs.8
+++ b/doc/mount.glusterfs.8
@@ -1,37 +1,31 @@
-.\" Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+.\" Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
.\" This file is part of GlusterFS.
.\"
-.\" GlusterFS is free software; you can redistribute it and/or modify
-.\" it under the terms of the GNU General Public License as published
-.\" by the Free Software Foundation; either version 3 of the License,
-.\" or (at your option) any later version.
+.\" This file is licensed to you under your choice of the GNU Lesser
+.\" General Public License, version 3 or any later version (LGPLv3 or
+.\" later), or the GNU General Public License, version 2 (GPLv2), in all
+.\" cases as published by the Free Software Foundation.
.\"
-.\" GlusterFS is distributed in the hope that it will be useful, but
-.\" WITHOUT ANY WARRANTY; without even the implied warranty of
-.\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-.\" General Public License for more details.
.\"
-.\" You should have received a copy of the GNU General Public License
-.\" long with this program. If not, see
-.\" <http://www.gnu.org/licenses/>.
.\"
-.\"
-.\"
-.TH GlusterFS 8 "Cluster Filesystem" "18 March 2010" "Gluster Inc."
+.TH GlusterFS 8 "Cluster Filesystem" "14 September 2013" "Red Hat, Inc."
.SH NAME
-mount.glusterfs - script to mount native GlusterFS volume
+.B mount.glusterfs - script to mount native GlusterFS volume
.SH SYNOPSIS
-.B mount -t glusterfs
-.I [-o <options>] <volumeserver>:<volumeid> <mountpoint>
+.B mount -t glusterfs [-o <options>] <volumeserver>:/<volume>[/<subdir>]
+.B <mountpoint>
+.TP
+.B mount -t glusterfs [-o <options>] <server1>,<server2>,
+.B <server3>,..<serverN>:/<volname>[/<subdir>] <mount_point>
.TP
-.B mount -t glusterfs
-.I [-o <options>] <path/to/volumefile> <mountpoint>
+.TP
+.B mount -t glusterfs [-o <options>] <path/to/volumefile> <mountpoint>
.PP
.SH DESCRIPTION
This tool is part of \fBglusterfs\fR(8) package, which is used to mount using
GlusterFS native binary.
-\fBmount.glusterfs\fR is meant to be used by the mount(8) command for mounting
+\fBmount.glusterfs\fR is meant to be used by the mount(8) command for mounting
native GlusterFS client. This subcommand, however, can also be used as a
standalone command with limited functionality.
@@ -47,41 +41,137 @@ File to use for logging [default:/var/log/glusterfs/glusterfs.log]
Logging severity. Valid options are TRACE, DEBUG, WARNING, ERROR, CRITICAL
INFO and NONE [default: INFO]
.TP
+\fBacl
+Mount the filesystem with POSIX ACL support
+.TP
+\fBfopen\-keep\-cache[=BOOL]
+Do not purge the cache on file open (default: false)
+.TP
+\fBworm
+Mount the filesystem in 'worm' mode
+.TP
+\fBaux\-gfid\-mount
+Enable access to filesystem through gfid directly
+.TP
\fBro\fR
Mount the filesystem read-only
+.TP
+\fBenable\-ino32=\fRBOOL
+Use 32-bit inodes when mounting to workaround broken applications that don't
+support 64-bit inodes
+.TP
+\fBmem\-accounting
+Enable internal memory accounting
+.TP
+\fBcapability
+Enable file capability setting and retrival
+.TP
+\fBthin-client
+Enables thin mount and connects via gfproxyd daemon
+
.PP
.SS "Advanced options"
.PP
.TP
-\fBvolfile\-id=\fRKEY
-Volume key or name of the volume file to be fetched from server
+\fBattribute\-timeout=\fRSECONDS
+Set attribute timeout to SECONDS for inodes in fuse kernel module [default: 1]
+.TP
+\fBentry\-timeout=\fRSECONDS
+Set entry timeout to SECONDS in fuse kernel module [default: 1]
+.TP
+\fBbackground\-qlen=\fRN
+Set fuse module's background queue length to N [default: 64]
.TP
-\fBtransport=\fRTRANSPORT-TYPE
-Transport type to get volume file from server [default: socket]
+\fBgid\-timeout=\fRSECONDS
+Set auxiliary group list timeout to SECONDS for fuse translator [default: 0]
+.TP
+\fBnegative\-timeout=\fRSECONDS
+Set negative timeout to SECONDS in fuse kernel module [default: 0]
.TP
\fBvolume\-name=\fRVOLUME-NAME
Volume name to be used for MOUNT-POINT [default: top most volume in
VOLUME-FILE]
.TP
-\fBdirect\-io\-mode=\fRdisable
-Disable direct I/O mode in fuse kernel module
+\fBdirect\-io\-mode=\fRBOOL|auto
+Specify fuse direct I/O strategy [default: auto]
+.TP
+\fBcongestion\-threshold=\fRN
+Set fuse module's congestion threshold to N [default: 48]
+.TP
+\fsubdir\-mount=\fRN
+Set the subdirectory mount option [default: NULL, ie, no subdirectory mount]
+.TP
+.TP
+\fBbackup\-volfile\-servers=\fRSERVERLIST
+Provide list of backup volfile servers in the following format [default: None]
+
+\fB$ mount \-t glusterfs \-obackup\-volfile\-servers=<server2>:\fR
+\fB <server3>:...:<serverN> <server1>:/<volname> <mount_point>\fR
+
+.TP
+.TP
+\fBbackupvolfile\-server=\fRSERVER
+Provide list of backup volfile servers in the following format [default: None]
+
+\fB $ mount \-t glusterfs \-obackupvolfile\-server=<server2>
+\fB <server1>:/<volname> <mount_point>
+
+.TP
+.TP
+\fBfetch-attempts=\fRN
+\fBDeprecated\fR option - placed here for backward compatibility [default: 1]
+.TP
+.TP
+\fBlru-limit=\fRN
+Set fuse module's limit for number of inodes kept in LRU list to N [default: 65536]
+.TP
+.TP
+\fBinvalidate-limit=\fRN
+Suspend fuse invalidations implied by 'lru-limit' if number of outstanding
+invalidations reaches N
+.TP
+.TP
+\fBbackground-qlen=\fRN
+Set fuse module's background queue length to N [default: 64]
+.TP
+\fBno\-root\-squash=\fRBOOL
+disable root squashing for the trusted client [default: off]
+.TP
+\fBroot\-squash=\fRBOOL
+enable root squashing for the trusted client [default: on]
+.TP
+\fBuse\-readdirp=\fRBOOL
+Use readdirp() mode in fuse kernel module [default: on]
+.TP
+\fBdump\-fuse=\fRPATH
+Dump fuse traffic to PATH
+.TP
+\fBkernel\-writeback\-cache=\fRBOOL
+Enable fuse in-kernel writeback cache [default: off]
+.TP
+\fBattr\-times\-granularity=\fRNS
+Declare supported granularity of file attribute [default: 0]
.TP
+\fBauto\-invalidation=\fRBOOL
+controls whether fuse-kernel can auto-invalidate attribute, dentry and
+page-cache. Disable this only if same files/directories are not
+accessed across two different mounts concurrently [default: on]
.PP
.SH FILES
.TP
.I /etc/fstab
A typical GlusterFS entry in /etc/fstab looks like below
-server1.gluster.com:mirror /mnt/mirror glusterfs log-file=/var/log/mirror.vol,ro,defaults 0 0
+\fBserver1:/mirror /mnt/mirror glusterfs log-file=/var/log/mirror.log,acl 0 0\fR
.TP
-.I /etc/mtab
-An example entry of a GlusterFS mountpoint in /etc/mtab looks like below
+.I /proc/mounts
+An example entry of a GlusterFS mountpoint in /proc/mounts looks like below
-mirror.vol /mnt/glusterfs fuse.glusterfs rw,allow_other,default_permissions,max_read=131072 0 0
+\fBserver1:/mirror /mnt/glusterfs fuse.glusterfs rw,allow_other,default_permissions,max_read=131072 0 0\fR
.SH SEE ALSO
\fBglusterfs\fR(8), \fBmount\fR(8), \fBgluster\fR(8)
.SH COPYRIGHT
-Copyright(c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+Copyright(c) 2006-2013 Red Hat, Inc. <http://www.redhat.com>
diff --git a/doc/qa/legacy/qa-client.vol b/doc/qa/legacy/qa-client.vol
deleted file mode 100644
index bcf242347d3..00000000000
--- a/doc/qa/legacy/qa-client.vol
+++ /dev/null
@@ -1,170 +0,0 @@
-# This spec file should be used for testing before any release
-#
-
-# 1st client
-volume client1
- type protocol/client
- option transport-type tcp # for TCP/IP transport
-# option transport-type ib-sdp # for Infiniband transport
-# option transport-type ib-verbs # for ib-verbs transport
-# option transport.ib-verbs.work-request-send-size 131072
-# option transport.ib-verbs.work-request-send-count 64
-# option transport.ib-verbs.work-request-recv-size 131072
-# option transport.ib-verbs.work-request-recv-count 64
- option remote-host 127.0.0.1
- option remote-subvolume ra1
-end-volume
-
-# 2nd client
-volume client2
- type protocol/client
- option transport-type tcp # for TCP/IP transport
-# option transport-type ib-sdp # for Infiniband transport
-# option transport-type ib-verbs # for ib-verbs transport
- option remote-host 127.0.0.1
- option remote-subvolume ra2
-end-volume
-
-# 3rd client
-volume client3
- type protocol/client
- option transport-type tcp # for TCP/IP transport
-# option transport-type ib-sdp # for Infiniband transport
-# option transport-type ib-verbs # for ib-verbs transport
- option remote-host 127.0.0.1
- option remote-subvolume ra3
-end-volume
-
-# 4th client
-volume client4
- type protocol/client
- option transport-type tcp # for TCP/IP transport
-# option transport-type ib-sdp # for Infiniband transport
-# option transport-type ib-verbs # for ib-verbs transport
- option remote-host 127.0.0.1
- option remote-subvolume ra4
-end-volume
-
-# 5th client
-volume client5
- type protocol/client
- option transport-type tcp # for TCP/IP transport
-# option transport-type ib-sdp # for Infiniband transport
-# option transport-type ib-verbs # for ib-verbs transport
- option remote-host 127.0.0.1
- option remote-subvolume ra5
-end-volume
-
-# 6th client
-volume client6
- type protocol/client
- option transport-type tcp # for TCP/IP transport
-# option transport-type ib-sdp # for Infiniband transport
-# option transport-type ib-verbs # for ib-verbs transport
- option remote-host 127.0.0.1
- option remote-subvolume ra6
-end-volume
-
-# 7th client
-volume client7
- type protocol/client
- option transport-type tcp # for TCP/IP transport
-# option transport-type ib-sdp # for Infiniband transport
-# option transport-type ib-verbs # for ib-verbs transport
- option remote-host 127.0.0.1
- option remote-subvolume ra7
-end-volume
-
-# 8th client
-volume client8
- type protocol/client
- option transport-type tcp # for TCP/IP transport
-# option transport-type ib-sdp # for Infiniband transport
-# option transport-type ib-verbs # for ib-verbs transport
- option remote-host 127.0.0.1
- option remote-subvolume ra8
-end-volume
-
-# 1st Stripe (client1 client2)
-volume stripe1
- type cluster/stripe
- subvolumes client1 client2
- option block-size 128KB # all striped in 128kB block
-end-volume
-
-# 2st Stripe (client3 client4)
-volume stripe2
- type cluster/stripe
- subvolumes client3 client4
- option block-size 128KB # all striped in 128kB block
-end-volume
-
-# 3st Stripe (client5 client6)
-volume stripe3
- type cluster/stripe
- subvolumes client5 client6
- option block-size 128KB # all striped in 128kB block
-end-volume
-
-# 4st Stripe (client7 client8)
-volume stripe4
- type cluster/stripe
- subvolumes client7 client8
- option block-size 128KB # all striped in 128kB block
-end-volume
-
-
-# 1st replicate
-volume replicate1
- type cluster/replicate
- subvolumes stripe1 stripe2
-end-volume
-
-# 2nd replicate
-volume replicate2
- type cluster/replicate
- subvolumes stripe3 stripe4
-end-volume
-
-volume ns
- type protocol/client
- option transport-type tcp
- option remote-host 127.0.0.1
- option remote-subvolume brick-ns
-end-volume
-
-# Unify
-volume unify0
- type cluster/unify
- subvolumes replicate1 replicate2
-# subvolumes stripe1 stripe3
- option namespace ns
- option scheduler rr # random # alu # nufa
- option rr.limits.min-free-disk 1GB
-# option alu.order x
-# option alu.x.entry-threshold
-# option alu.x.exit-threshold
-end-volume
-
-
-# ==== Performance Translators ====
-# The default options for performance translators should be the best for 90+% of the cases
-volume iot
- type performance/io-threads
- subvolumes unify0
-end-volume
-
-volume wb
- type performance/write-behind
- subvolumes iot
-end-volume
-
-volume ioc
- type performance/io-cache
- subvolumes wb
-end-volume
-
-volume ra
- type performance/read-ahead
- subvolumes ioc
-end-volume
diff --git a/doc/qa/legacy/qa-high-avail-client.vol b/doc/qa/legacy/qa-high-avail-client.vol
deleted file mode 100644
index 69cb8dd30f1..00000000000
--- a/doc/qa/legacy/qa-high-avail-client.vol
+++ /dev/null
@@ -1,17 +0,0 @@
-volume client
- type protocol/client
- option transport-type tcp
- option remote-host localhost
- option transport.socket.remote-port 7001
- option remote-subvolume server1-iot
-end-volume
-
-volume ra
- type performance/read-ahead
- subvolumes client
-end-volume
-
-volume wb
- type performance/write-behind
- subvolumes ra
-end-volume
diff --git a/doc/qa/legacy/qa-high-avail-server.vol b/doc/qa/legacy/qa-high-avail-server.vol
deleted file mode 100644
index 784e8d208e4..00000000000
--- a/doc/qa/legacy/qa-high-avail-server.vol
+++ /dev/null
@@ -1,344 +0,0 @@
-
-# -- server 1 --
-volume server1-posix1
- type storage/posix
- option directory /tmp/ha-export1/
-end-volume
-
-volume server1-ns1
- type storage/posix
- option directory /tmp/ha-export-ns1/
-end-volume
-
-volume server1-client2
- type protocol/client
- option transport-type tcp
- option remote-host 127.0.0.1
- option transport.socket.remote-port 7002
- option remote-subvolume server2-posix2
-end-volume
-
-volume server1-ns2
- type protocol/client
- option transport-type tcp
- option remote-host 127.0.0.1
- option transport.socket.remote-port 7002
- option remote-subvolume server2-ns2
-end-volume
-
-volume server1-client3
- type protocol/client
- option transport-type tcp
- option remote-host 127.0.0.1
- option transport.socket.remote-port 7003
- option remote-subvolume server3-posix3
-end-volume
-
-volume server1-ns3
- type protocol/client
- option transport-type tcp
- option remote-host 127.0.0.1
- option transport.socket.remote-port 7003
- option remote-subvolume server3-ns3
-end-volume
-
-volume server1-io1
- type performance/io-threads
- option thread-count 8
- subvolumes server1-posix1
-end-volume
-
-
-volume server1-io2
- type performance/io-threads
- option thread-count 8
- subvolumes server1-client2
-end-volume
-
-volume server1-io3
- type performance/io-threads
- option thread-count 8
- subvolumes server1-client3
-end-volume
-
-volume server1-ns-io1
- type performance/io-threads
- option thread-count 8
- subvolumes server1-ns1
-end-volume
-
-volume server1-ns-io2
- type performance/io-threads
- option thread-count 8
- subvolumes server1-ns2
-end-volume
-
-volume server1-ns-io3
- type performance/io-threads
- option thread-count 8
- subvolumes server1-ns3
-end-volume
-
-volume server1-ns-replicate
- type cluster/replicate
- subvolumes server1-ns-io1 server1-ns-io2 server1-ns-io3
-end-volume
-
-volume server1-storage-replicate
- type cluster/replicate
- subvolumes server1-io1 server1-io2 server1-io3
-end-volume
-
-volume server1-unify
- type cluster/unify
- #option self-heal off
- subvolumes server1-storage-replicate
- option namespace server1-ns-replicate
- option scheduler rr
-end-volume
-
-volume server1-iot
- type performance/io-threads
- option thread-count 8
- subvolumes server1-unify
-end-volume
-
-volume server1
- type protocol/server
- option transport-type tcp
- subvolumes server1-iot
- option transport.socket.listen-port 7001
- option auth.addr.server1-posix1.allow *
- option auth.addr.server1-ns1.allow *
- option auth.addr.server1-iot.allow *
-end-volume
-
-
-# == Server2 ==
-volume server2-client1
- type protocol/client
- option transport-type tcp
- option remote-host 127.0.0.1
- option transport.socket.remote-port 7001
- option remote-subvolume server1-posix1
-end-volume
-
-volume server2-ns1
- type protocol/client
- option transport-type tcp
- option remote-host 127.0.0.1
- option transport.socket.remote-port 7001
- option remote-subvolume server1-ns1
-end-volume
-
-volume server2-posix2
- type storage/posix
- option directory /tmp/ha-export2/
-end-volume
-
-volume server2-ns2
- type storage/posix
- option directory /tmp/ha-export-ns2/
-end-volume
-
-volume server2-client3
- type protocol/client
- option transport-type tcp
- option remote-host 127.0.0.1
- option transport.socket.remote-port 7003
- option remote-subvolume server3-posix3
-end-volume
-
-volume server2-ns3
- type protocol/client
- option transport-type tcp
- option remote-host 127.0.0.1
- option transport.socket.remote-port 7003
- option remote-subvolume server3-ns3
-end-volume
-
-volume server2-io1
- type performance/io-threads
- option thread-count 8
- subvolumes server2-client1
-end-volume
-
-
-volume server2-io2
- type performance/io-threads
- option thread-count 8
- subvolumes server2-posix2
-end-volume
-
-volume server2-io3
- type performance/io-threads
- option thread-count 8
- subvolumes server2-client3
-end-volume
-
-volume server2-ns-io1
- type performance/io-threads
- option thread-count 8
- subvolumes server2-ns1
-end-volume
-
-volume server2-ns-io2
- type performance/io-threads
- option thread-count 8
- subvolumes server2-ns2
-end-volume
-
-volume server2-ns-io3
- type performance/io-threads
- option thread-count 8
- subvolumes server2-ns3
-end-volume
-
-volume server2-ns-replicate
- type cluster/replicate
- subvolumes server2-ns-io1 server2-ns-io2 server2-ns-io3
-end-volume
-
-volume server2-storage-replicate
- type cluster/replicate
- subvolumes server2-io2 server2-io3 server2-io1
-end-volume
-
-volume server2-unify
- type cluster/unify
- option self-heal off
- subvolumes server2-storage-replicate
- option namespace server2-ns-replicate
- option scheduler rr
-end-volume
-
-volume server2-iot
- type performance/io-threads
- option thread-count 8
- subvolumes server2-unify
-end-volume
-
-volume server2
- type protocol/server
- option transport-type tcp
- subvolumes server2-iot
- option transport.socket.listen-port 7002
- option auth.addr.server2-posix2.allow *
- option auth.addr.server2-ns2.allow *
- option auth.addr.server2-iot.allow *
-end-volume
-
-# == server 3 ==
-volume server3-client1
- type protocol/client
- option transport-type tcp
- option remote-host 127.0.0.1
- option transport.socket.remote-port 7001
- option remote-subvolume server1-posix1
-end-volume
-
-volume server3-ns1
- type protocol/client
- option transport-type tcp
- option remote-host 127.0.0.1
- option transport.socket.remote-port 7001
- option remote-subvolume server1-ns1
-end-volume
-
-volume server3-client2
- type protocol/client
- option transport-type tcp
- option remote-host 127.0.0.1
- option transport.socket.remote-port 7002
- option remote-subvolume server2-posix2
-end-volume
-
-volume server3-ns2
- type protocol/client
- option transport-type tcp
- option remote-host 127.0.0.1
- option transport.socket.remote-port 7002
- option remote-subvolume server2-ns2
-end-volume
-
-volume server3-posix3
- type storage/posix
- option directory /tmp/ha-export3/
-end-volume
-
-volume server3-ns3
- type storage/posix
- option directory /tmp/ha-export-ns3/
-end-volume
-
-volume server3-io1
- type performance/io-threads
- option thread-count 8
- subvolumes server3-client1
-end-volume
-
-
-volume server3-io2
- type performance/io-threads
- option thread-count 8
- subvolumes server3-client2
-end-volume
-
-volume server3-io3
- type performance/io-threads
- option thread-count 8
- subvolumes server3-posix3
-end-volume
-
-volume server3-ns-io1
- type performance/io-threads
- option thread-count 8
- subvolumes server3-ns1
-end-volume
-
-volume server3-ns-io2
- type performance/io-threads
- option thread-count 8
- subvolumes server3-ns2
-end-volume
-
-volume server3-ns-io3
- type performance/io-threads
- option thread-count 8
- subvolumes server3-ns3
-end-volume
-
-volume server3-ns-replicate
- type cluster/replicate
- subvolumes server3-ns-io1 server3-ns-io2 server3-ns-io3
-end-volume
-
-volume server3-storage-replicate
- type cluster/replicate
- subvolumes server3-io3 server3-io2 server3-io1
-end-volume
-
-volume server3-unify
- type cluster/unify
- option self-heal off
- subvolumes server3-storage-replicate
- option namespace server3-ns-replicate
- option scheduler rr
-end-volume
-
-volume server3-iot
- type performance/io-threads
- option thread-count 8
- subvolumes server3-unify
-end-volume
-
-volume server3
- type protocol/server
- option transport-type tcp
- subvolumes server3-iot
- option transport.socket.listen-port 7003
- option auth.addr.server3-posix3.allow *
- option auth.addr.server3-ns3.allow *
- option auth.addr.server3-iot.allow *
-end-volume
-
diff --git a/doc/qa/legacy/qa-server.vol b/doc/qa/legacy/qa-server.vol
deleted file mode 100644
index d948f701f1b..00000000000
--- a/doc/qa/legacy/qa-server.vol
+++ /dev/null
@@ -1,284 +0,0 @@
-# This spec file should be used for testing before any release
-#
-
-# Namespace posix
-volume brick-ns
- type storage/posix # POSIX FS translator
- option directory /tmp/export-ns # Export this directory
-end-volume
-
-# 1st server
-
-volume brick1
- type storage/posix # POSIX FS translator
- option directory /tmp/export1 # Export this directory
-end-volume
-
-# == Posix-Locks ==
- volume plocks1
- type features/posix-locks
-# option mandatory on
- subvolumes brick1
- end-volume
-
-volume iot1
- type performance/io-threads
- subvolumes plocks1 # change properly if above commented volumes needs to be included
-# option <key> <value>
-end-volume
-
-volume wb1
- type performance/write-behind
- subvolumes iot1
-# option <key> <value>
-end-volume
-
-volume ra1
- type performance/read-ahead
- subvolumes wb1
-# option <key> <value>
-end-volume
-
-volume brick2
- type storage/posix # POSIX FS translator
- option directory /tmp/export2 # Export this directory
-end-volume
-
-# == TrashCan Translator ==
-# volume trash2
-# type features/trash
-# option trash-dir /.trashcan
-# subvolumes brick2
-# end-volume
-
-# == Posix-Locks ==
-volume plocks2
- type features/posix-locks
-# option <something> <something>
- subvolumes brick2
-end-volume
-
-volume iot2
- type performance/io-threads
- subvolumes plocks2 # change properly if above commented volumes needs to be included
-# option <key> <value>
-end-volume
-
-volume wb2
- type performance/write-behind
- subvolumes iot2
-# option <key> <value>
-end-volume
-
-volume ra2
- type performance/read-ahead
- subvolumes wb2
-# option <key> <value>
-end-volume
-
-volume brick3
- type storage/posix # POSIX FS translator
- option directory /tmp/export3 # Export this directory
-end-volume
-
-# == TrashCan Translator ==
-# volume trash3
-# type features/trash
-# option trash-dir /.trashcan
-# subvolumes brick3
-# end-volume
-
-# == Posix-Locks ==
-volume plocks3
- type features/posix-locks
-# option <something> <something>
- subvolumes brick3
-end-volume
-
-volume iot3
- type performance/io-threads
- subvolumes plocks3 # change properly if above commented volumes needs to be included
-# option <key> <value>
-end-volume
-
-volume wb3
- type performance/write-behind
- subvolumes iot3
-# option <key> <value>
-end-volume
-
-volume ra3
- type performance/read-ahead
- subvolumes wb3
-# option <key> <value>
-end-volume
-
-volume brick4
- type storage/posix # POSIX FS translator
- option directory /tmp/export4 # Export this directory
-end-volume
-
-# == Posix-Locks ==
-volume plocks4
- type features/posix-locks
-# option <something> <something>
- subvolumes brick4
-end-volume
-
-volume iot4
- type performance/io-threads
- subvolumes plocks4 # change properly if above commented volumes needs to be included
-# option <key> <value>
-end-volume
-
-volume wb4
- type performance/write-behind
- subvolumes iot4
-# option <key> <value>
-end-volume
-
-volume ra4
- type performance/read-ahead
- subvolumes wb4
-# option <key> <value>
-end-volume
-
-volume brick5
- type storage/posix # POSIX FS translator
- option directory /tmp/export5 # Export this directory
-end-volume
-
-
-# == Posix-Locks ==
-volume plocks5
- type features/posix-locks
-# option <something> <something>
- subvolumes brick5
-end-volume
-
-volume iot5
- type performance/io-threads
- subvolumes plocks5 # change properly if above commented volumes needs to be included
-# option <key> <value>
-end-volume
-
-volume wb5
- type performance/write-behind
- subvolumes iot5
-# option <key> <value>
-end-volume
-
-volume ra5
- type performance/read-ahead
- subvolumes wb5
-# option <key> <value>
-end-volume
-
-volume brick6
- type storage/posix # POSIX FS translator
- option directory /tmp/export6 # Export this directory
-end-volume
-
-# == Posix-Locks ==
-volume plocks6
- type features/posix-locks
-# option <something> <something>
- subvolumes brick6
-end-volume
-
-volume iot6
- type performance/io-threads
- subvolumes plocks6 # change properly if above commented volumes needs to be included
-# option <key> <value>
-end-volume
-
-volume wb6
- type performance/write-behind
- subvolumes iot6
-# option <key> <value>
-end-volume
-
-volume ra6
- type performance/read-ahead
- subvolumes wb6
-# option <key> <value>
-end-volume
-
-volume brick7
- type storage/posix # POSIX FS translator
- option directory /tmp/export7 # Export this directory
-end-volume
-
-# == Posix-Locks ==
-volume plocks7
- type features/posix-locks
-# option <something> <something>
- subvolumes brick7
-end-volume
-
-volume iot7
- type performance/io-threads
- subvolumes plocks7 # change properly if above commented volumes needs to be included
-# option <key> <value>
-end-volume
-
-volume wb7
- type performance/write-behind
- subvolumes iot7
-# option <key> <value>
-end-volume
-
-volume ra7
- type performance/read-ahead
- subvolumes wb7
-# option <key> <value>
-end-volume
-
-volume brick8
- type storage/posix # POSIX FS translator
- option directory /tmp/export8 # Export this directory
-end-volume
-
-# == Posix-Locks ==
-volume plocks8
- type features/posix-locks
-# option <something> <something>
- subvolumes brick8
-end-volume
-
-volume iot8
- type performance/io-threads
- subvolumes plocks8 # change properly if above commented volumes needs to be included
-# option <key> <value>
-end-volume
-
-volume wb8
- type performance/write-behind
- subvolumes iot8
-# option <key> <value>
-end-volume
-
-volume ra8
- type performance/read-ahead
- subvolumes wb8
-# option <key> <value>
-end-volume
-
-volume server8
- type protocol/server
- subvolumes ra8 ra1 ra2 ra3 ra4 ra5 ra6 ra7 brick-ns
- option transport-type tcp # For TCP/IP transport
-# option transport-type ib-sdp # For Infiniband transport
-# option transport-type ib-verbs # For ib-verbs transport
- option client-volume-filename /examples/qa-client.vol
- option auth.addr.ra1.allow * # Allow access to "stat8" volume
- option auth.addr.ra2.allow * # Allow access to "stat8" volume
- option auth.addr.ra3.allow * # Allow access to "stat8" volume
- option auth.addr.ra4.allow * # Allow access to "stat8" volume
- option auth.addr.ra5.allow * # Allow access to "stat8" volume
- option auth.addr.ra6.allow * # Allow access to "stat8" volume
- option auth.addr.ra7.allow * # Allow access to "stat8" volume
- option auth.addr.ra8.allow * # Allow access to "stat8" volume
- option auth.addr.brick-ns.allow * # Allow access to "stat8" volume
-end-volume
-
diff --git a/doc/release-notes/en-US/Author_Group.xml b/doc/release-notes/en-US/Author_Group.xml
deleted file mode 100644
index 43a491ea9c8..00000000000
--- a/doc/release-notes/en-US/Author_Group.xml
+++ /dev/null
@@ -1,17 +0,0 @@
-<?xml version='1.0' encoding='utf-8' ?>
-<!DOCTYPE authorgroup PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
-<!ENTITY % BOOK_ENTITIES SYSTEM "Administration_Guide.ent">
-%BOOK_ENTITIES;
-]>
-<authorgroup>
- <author>
- <firstname>GlusterFS</firstname>
- <surname>Developers</surname>
- <affiliation>
- <orgname>Red Hat</orgname>
- <orgdiv>Storage</orgdiv>
- </affiliation>
- <email>gluster-devel@nongnu.org</email>
- </author>
-</authorgroup>
-
diff --git a/doc/release-notes/en-US/Book_Info.xml b/doc/release-notes/en-US/Book_Info.xml
deleted file mode 100644
index d3bbb9f68e6..00000000000
--- a/doc/release-notes/en-US/Book_Info.xml
+++ /dev/null
@@ -1,28 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. -->
-<!DOCTYPE bookinfo PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
-<!ENTITY % BOOK_ENTITIES SYSTEM "Release_Notes.ent">
-%BOOK_ENTITIES;
-]>
-<bookinfo id="book-Release_Notes-Release_Notes">
- <title>Release Notes</title>
- <subtitle>Release Notes for GlusterFS<remark> 3.3.0 </remark></subtitle>
- <productname>GlusterFS</productname>
- <productnumber>3.3</productnumber>
- <edition>1</edition>
- <pubsnumber>1</pubsnumber>
- <abstract>
- <para>
- This Release Notes introduces GlusterFS and provides information including key features and managing the software.
- </para>
- </abstract>
- <corpauthor>
- <inlinemediaobject>
- <imageobject>
- <imagedata fileref="Common_Content/images/title_logo.svg" format="SVG"/>
- </imageobject>
- </inlinemediaobject>
- </corpauthor>
- <xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="Common_Content/Legal_Notice.xml"/>
- <xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="Author_Group.xml"/>
-</bookinfo>
diff --git a/doc/release-notes/en-US/Chapter.xml b/doc/release-notes/en-US/Chapter.xml
deleted file mode 100644
index 8a2957971af..00000000000
--- a/doc/release-notes/en-US/Chapter.xml
+++ /dev/null
@@ -1,33 +0,0 @@
-<?xml version='1.0' encoding='utf-8' ?>
-<!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
-<!ENTITY % BOOK_ENTITIES SYSTEM "Release_Notes.ent">
-%BOOK_ENTITIES;
-]>
-<chapter id="chap-Release_Notes-Test_Chapter">
- <title>Test Chapter</title>
- <para>
- This is a test paragraph
- </para>
- <section id="sect-Release_Notes-Test_Chapter-Test_Section_1">
- <title>Test Section 1</title>
- <para>
- This is a test paragraph in a section
- </para>
- </section>
-
- <section id="sect-Release_Notes-Test_Chapter-Test_Section_2">
- <title>Test Section 2</title>
- <para>
- This is a test paragraph in Section 2
- <orderedlist>
- <listitem>
- <para>
- listitem text
- </para>
- </listitem>
- </orderedlist>
- </para>
- </section>
-
-</chapter>
-
diff --git a/doc/release-notes/en-US/Download_Install.xml b/doc/release-notes/en-US/Download_Install.xml
deleted file mode 100644
index 10bc5663b6f..00000000000
--- a/doc/release-notes/en-US/Download_Install.xml
+++ /dev/null
@@ -1,107 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
-<!ENTITY % BOOK_ENTITIES SYSTEM "Release_Notes.ent">
-%BOOK_ENTITIES;
-]>
-<chapter id="chap-Release_Notes-Launch">
- <title>Downloading and Installing GlusterFS </title>
- <para>You can download and install the GlusterFS 3.3.0 or upgrade to latest version
- </para>
- <section id="sect-Release_Notes-Download_and_Install-Test_Section_1">
- <title>Downloading GlusterFS 3.3 </title>
- <para>You can download the latest software to each server in your cluster from <ulink url="http://download.gluster.com/pub/gluster/glusterfs/3.0/3.3.0/ "/>.
- </para>
- </section>
- <section id="sect-Release_Notes-Download_and_Install-Test_Section_2">
- <title>New Installation </title>
- <para>
-The installation process for GlusterFS server is available at:
-
- <ulink url="http://download.gluster.com/pub/gluster/glusterfs/3.2/Documentation/IG/html/chap-Installation_Guide-Installing.html"/>
- </para>
- </section>
- <section>
- <title>Compatibility </title>
- <para>Release 3.3 of GlusterFS is not compatible with 2.0.x, 3.0.x, 3.1.x, and 3.2.x releases of GlusterFS.
-</para>
- </section>
- <section>
- <title>Upgrade </title>
- <para>Red Hat recommends that you back up your data before upgrading to GlusterFS 3.3.
-</para>
- <note>
- <para>Configurations generated outside the scope of gluster CLI are neither recommended nor
-supported by Red Hat.
-</para>
- </note>
- <section>
- <title>Upgrade from GlusterFS v3.2.x or 3.1.x to GlusterFS v3.3 </title>
- <para>In an environment with replicated bricks it is recommended that you upgrade a single storage server,
-confirm stability, and then upgrade the replica.
-</para>
- <para>Use the same installation method for the upgrade as the original glusterfs installation.
-</para>
- <para><emphasis role="bold">Using RPMs on RHEL, CentOS, Fedora</emphasis></para>
- <orderedlist>
- <listitem>
- <para>Download the 3.3 RPMs from the following location:
-</para>
- <para><emphasis role="bold">RHEL:</emphasis> <ulink url="http://download.gluster.com/pub/gluster/glusterfs/3.0/3.3.0/RHEL/ "/></para>
- <para><emphasis role="bold">Fedora:</emphasis> <ulink url="http://download.gluster.com/pub/gluster/glusterfs/3.0/3.3.0/Fedora/ "/></para>
- </listitem>
- <listitem>
- <para>Run rpm using the following command:
-</para>
- <para><command># rpm -U glusterfs*</command>
-</para>
- </listitem>
- </orderedlist>
- <para><emphasis role="bold">Using dpkg on Debian, Ubuntu</emphasis>
-</para>
- <orderedlist>
- <listitem>
- <para>Download the 3.3 packages from
-</para>
- <para><emphasis role="bold">Debian:</emphasis> <ulink url="http://download.gluster.com/pub/gluster/glusterfs/3.0/3.3.0/Debian/ "/></para>
- <para><emphasis role="bold">Ubuntu:</emphasis> <ulink url="http://download.gluster.com/pub/gluster/glusterfs/3.0/3.3.0/Ubuntu/ "/></para>
- </listitem>
- <listitem>
- <para>Run dpkg using the following command:
-</para>
- <para><command># dpkg -i glusterfs* </command></para>
- </listitem>
- </orderedlist>
- <para><emphasis role="bold">Building from source </emphasis></para>
- <orderedlist>
- <listitem>
- <para>Download the 3.3 source code from
-</para>
- <para><ulink url="http://download.gluster.com/pub/gluster/glusterfs/3.0/3.3.0/glusterf3-3.0.tar.gz"/></para>
- </listitem>
- <listitem>
- <para>Unpack and install GlusterFS using the following commands:
-</para>
- <screen># gunzip glusterfs-3.3.0.tar.gz
-# tar xvf glusterfs-3.3.0.tar
-# cd glusterfs-3.3.0
-# ./configure
-# make
-# make install</screen>
- </listitem>
- <listitem>
- <para>Stop and start GlusterFS using the following commands, this step will disconnect Gluster Native
-clients.
-</para>
- <para><command># killall glusterfsd </command></para>
- <para><command># killall glusterfs </command></para>
- <para><command># killall glusterd </command></para>
- </listitem>
- <listitem>
- <para>Start GlusterFS using the following command:
-</para>
- <para><command># /etc/init.d/glusterd start </command></para>
- </listitem>
- </orderedlist>
- </section>
- </section>
-</chapter>
diff --git a/doc/release-notes/en-US/Key_Features.xml b/doc/release-notes/en-US/Key_Features.xml
deleted file mode 100644
index 4e11bec8429..00000000000
--- a/doc/release-notes/en-US/Key_Features.xml
+++ /dev/null
@@ -1,72 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
-<!ENTITY % BOOK_ENTITIES SYSTEM "Release_Notes.ent">
-%BOOK_ENTITIES;
-]>
-<chapter id="chap-Release_Notes-Key_Features">
- <title>Key Features</title>
- <para>This section describes the key features available in Red Hat Storage. The following is a list of feature highlights of this new version of the Red Hat Storage software: </para>
- <itemizedlist>
- <listitem>
- <para><emphasis role="bold">High Availability</emphasis></para>
- <para>The Red Hat Storage provides both synchronous and asynchronous n-way file replication to assure data availability:</para>
- <para><itemizedlist>
- <listitem>
- <para><emphasis role="bold">Synchronous replication</emphasis> provides redundancy and protection within a single data center or multiple data centers and availability zones in a region.</para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">Asynchronous geo-replication</emphasis>- Red Hat Storage supports Geo-Rep long distance replication. Customers can configure storage server nodes and Red Hat Storage to asynchronously replicate data
-over vast geographical distances.
-.</para>
- </listitem>
- </itemizedlist></para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">Deploy in Minutes</emphasis></para>
- <para>The Red Hat Storage S can be deployed in minutes, providing one of the fastest ways to create an on-demand, high-performance, petabyte-scale storage environment. </para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">No Application Rewrites</emphasis></para>
- <para>Red Hat Storage Appliance provides full support for the semantics of a normal Linux file system like ext4 so there is no need to rewrite applications when moving data to the cloud as with cloud-based object storage. </para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">Flexibility</emphasis>
-</para>
- <para>Runs in userspace, eliminating the need for complex
-kernel patches or dependencies.
-</para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">Scalability</emphasis>
-</para>
- <para>Elastic volume management enables storage volumes
-to be abstracted from the hardware so data and hardware can be managed independently. Storage can be
-added while data continues to be available, with no
-application interruption. Volumes can grow across
-machines in the system and can be migrated within the
-system to rebalance capacity. Storage server nodes
-can be added on the fly.
-</para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">Simple Management</emphasis>
-</para>
- <para>Simple, single command for storage management. It also includes performance monitoring and analysis tools like Top and Profile. Top provides visibility into the workload pattern and Profile provides performance
-statistics over a user-defined
-time period for metrics including latency and amount of
-data read or written.
-</para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">No Metadata Server </emphasis></para>
- <para>Rather than using a centralized or
-distributed metadata server, Red Hat Storage software
- uses an elastic hashing algorithm to locate
-data in the storage pool removing this common source
-of I/O bottlenecks and vulnerability to failure. Data
-access is fully parallelized and performance scales
-linearly.
-</para>
- </listitem>
- </itemizedlist>
-</chapter>
diff --git a/doc/release-notes/en-US/Known_Issues.xml b/doc/release-notes/en-US/Known_Issues.xml
deleted file mode 100644
index 834ed53363c..00000000000
--- a/doc/release-notes/en-US/Known_Issues.xml
+++ /dev/null
@@ -1,164 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
-<!ENTITY % BOOK_ENTITIES SYSTEM "Release_Notes.ent">
-%BOOK_ENTITIES;
-]>
-<chapter id="chap-Release_Notes-Known_Issues">
- <title>Known Issues</title>
- <para>
- The following are the known issues:
-
- </para>
- <para><itemizedlist>
- <listitem>
- <para>Issues related to Distributed Replicated Volumes:
-</para>
- <itemizedlist>
- <listitem>
- <para>When process has done <emphasis role="italic">
- <emphasis role="italic">
- <command>cd</command>
- </emphasis>
- </emphasis> into a directory, stat of deleted file recreates it (directory self-
-heal not triggered).
-</para>
- <para>In GlusterFS replicated setup, if you are inside a directory (for example, <filename>Test</filename> directory) of
-replicated volume. From another node, you will delete a file inside <filename>Test</filename> directory. Then if you
-perform <command>stat</command> operation on the same file name, the file will be automatically created. (that is, a
-proper directory self-heal is not triggered when process has done <command>cd</command> into a path).
-</para>
- </listitem>
- </itemizedlist>
- </listitem>
- <listitem>
- <para>Issues related to Distributed Volumes:
-</para>
- <itemizedlist>
- <listitem>
- <para>Rebalance does not happen if bricks are down.
-
-</para>
- <para>Currently while running rebalance, make sure all the bricks are in operating or connected state.
-
-</para>
- </listitem>
- </itemizedlist>
- </listitem>
- <listitem>
- <para>glusterfsd - Error return code is not proper after daemonizing the process.
-</para>
- <para>Due to this, scripts that mount glusterfs or start glusterfs process must not depend on its return
-value.
-</para>
- </listitem>
- <listitem>
- <para>After <command># gluster volume replace-brick <replaceable>VOLNAME Brick New-Brick</replaceable> commit</command> command
-is issued, the file system operations on that particular volume, which are in transit will fail.
-</para>
- </listitem>
- <listitem>
- <para>Command <command># gluster volume replace-brick ...</command> will fail in a RDMA set up.
-</para>
- </listitem>
- <listitem>
- <para>If files and directories have different GFIDs on different backends, GlusterFS client may hang or
-display errors.
-</para>
- <para><emphasis role="bold">Work Around</emphasis>: The workaround for this issue is explained at <ulink url="http://gluster.org/pipermail/gluster-users/2011-July/008215.html"/>
-.
-</para>
- </listitem>
- <listitem>
- <para>Issues related to Directory Quota:
-</para>
- <itemizedlist>
- <listitem>
- <para>Some writes can appear to pass even though the quota limit is exceeded (write returns
-success). This is because they could be cached in write-behind. However disk-space would
-not exceed the quota limit, since when writes to backend happen, quota does not allow
-them. Hence it is advised that applications should check for return value of close call.
-</para>
- </listitem>
- <listitem>
- <para>If a user has done <command>cd</command> into a directory on which the administrator is setting the limit, even
- though the command succeeds and the new limit value will be applicable to all the users
-except for those users’ who has done <command>cd</command> in to that particular directory. The old limit value
- will be applicable until the user has <command>cd</command> out of that directory.
-</para>
- </listitem>
- <listitem>
- <para>Rename operation (that is, removing oldpath and creating newpath) requires additional disk
-space equal to file size. This is because, during rename, it subtracts the size on oldpath after
-rename operation is performed, but it checks whether quota limit is exceeded on parents of
-newfile before rename operation.
-</para>
- </listitem>
- <listitem>
- <para>With striped volumes, Quota feature is not available.</para>
- </listitem>
- </itemizedlist>
- </listitem>
- <listitem>
- <para>Issues related to POSIX ACLs:
-</para>
- <itemizedlist>
- <listitem>
- <para>Even though POSIX ACLs are set on the file or directory, the <computeroutput>+</computeroutput> (plus) sign in the file
- permissions will not be displayed. This is for performance optimization and will be fixed in a
- future release.
-</para>
- </listitem>
- </itemizedlist>
- <itemizedlist>
- <listitem>
- <para>When glusterfs is mounted with <command>-o acl</command>, directory read performance can be bad. Commands
-like recursive directory listing can be slower than normal.
-</para>
- </listitem>
- </itemizedlist>
- <itemizedlist>
- <listitem>
- <para>When POSIX ACLs are set and multiple NFS clients are used, there could be inconsistency in
-the way ACLs are applied due to attribute caching in NFS. For a consistent view of POSIX ACLs
-in a multiple client setup, use <command>-o noac</command> option on NFS mount to switch off attribute caching.
- This could have a performance impact on operations involving attributes.
-</para>
- </listitem>
- </itemizedlist>
- </listitem>
- <listitem>
- <para>If you have enabled Gluster NLM, you cannot mount kernel NFS client on your storage nodes.
-
-</para>
- </listitem>
- <listitem condition="gfs">
- <para>Error with lost and found directory while using multiple disks.</para>
- <para><emphasis role="bold">Work Around</emphasis>: You must ensure that the brick directories are one of the extra directories in the back-end mount point. For Example, if <filename>/dev/sda1</filename> is mounted on <filename>/export1</filename>, use <filename>/export1/volume</filename> as the glusterfs&apos;s export directory. </para>
- </listitem>
- <listitem>
- <para>Due to enhancements in Graphs, you may experience excessive memory usage with this release.
-
-</para>
- </listitem>
- <listitem>
- <para>After you restart the NFS server, the unlock within the grace-period may fail and previously held locks may not be reclaimed.
-
-</para>
- </listitem>
- <listitem>
- <para>After a rebalancing a volume, if you run <command>rm -rf</command> command at the mount point to remove all contents of the current working directory recursively without prompting, you may get &quot;Directory not Empty&quot; error message.
-
-</para>
- </listitem>
- <listitem>
- <para>The following is a known missing (minor) feature:
-</para>
- <itemizedlist>
- <listitem>
- <para>locks - <emphasis role="italic">mandatory</emphasis> locking is not supported.
-</para>
- </listitem>
- </itemizedlist>
- </listitem>
- </itemizedlist></para>
-</chapter>
diff --git a/doc/release-notes/en-US/Preface.xml b/doc/release-notes/en-US/Preface.xml
deleted file mode 100644
index 597dc5da08a..00000000000
--- a/doc/release-notes/en-US/Preface.xml
+++ /dev/null
@@ -1,24 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. -->
-<!DOCTYPE preface PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
-<!ENTITY % BOOK_ENTITIES SYSTEM "Release_Notes.ent">
-%BOOK_ENTITIES;
-]>
-<preface id="pref-Release_Notes-Preface">
- <title>Preface</title>
- <para>This guide describes how to configure, operate, and manage Gluster File System (GlusterFS).</para>
- <section>
- <title>Audience</title>
- <para>This guide is intended for Systems Administrators interested in configuring and managing GlusterFS.</para>
- <para>This guide assumes that you are familiar with the Linux operating system, concepts of File System, GlusterFS concepts, and GlusterFS Installation</para>
- </section>
- <section>
- <title>License</title>
- <para>The License information is available at <ulink url="http://www.redhat.com/licenses/rhel_rha_eula.html"/>.</para>
- </section>
- <xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="Common_Content/Conventions.xml"/>
- <xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="Feedback.xml">
- <xi:fallback xmlns:xi="http://www.w3.org/2001/XInclude"> <xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="Common_Content/Feedback.xml"/>
- </xi:fallback>
- </xi:include>
-</preface>
diff --git a/doc/release-notes/en-US/Product_Documentation.xml b/doc/release-notes/en-US/Product_Documentation.xml
deleted file mode 100644
index 77329ab0192..00000000000
--- a/doc/release-notes/en-US/Product_Documentation.xml
+++ /dev/null
@@ -1,12 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
-<!ENTITY % BOOK_ENTITIES SYSTEM "Release_Notes.ent">
-%BOOK_ENTITIES;
-]>
-<chapter id="chap-Release_Notes-Product_Documentation">
- <title>Product Documentation</title>
- <para>Product documentation of GlusterFS
-
- is available at <ulink url="http://www.gluster.org/community/documentation/index.php/Main_Page"/> .
-</para>
-</chapter>
diff --git a/doc/release-notes/en-US/Product_Support.xml b/doc/release-notes/en-US/Product_Support.xml
deleted file mode 100644
index ab44e518208..00000000000
--- a/doc/release-notes/en-US/Product_Support.xml
+++ /dev/null
@@ -1,12 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
-<!ENTITY % BOOK_ENTITIES SYSTEM "Release_Notes.ent">
-%BOOK_ENTITIES;
-]>
-<chapter id="chap-Release_Notes-Product_Support">
- <title>Product Support</title>
- <para>
-
- You can reach support at <ulink url="http://www.redhat.com/support"/>.
-</para>
-</chapter>
diff --git a/doc/release-notes/en-US/Release_Notes.ent b/doc/release-notes/en-US/Release_Notes.ent
deleted file mode 100644
index 9275f166d51..00000000000
--- a/doc/release-notes/en-US/Release_Notes.ent
+++ /dev/null
@@ -1,4 +0,0 @@
-<!ENTITY PRODUCT "Documentation">
-<!ENTITY BOOKID "Release_Notes">
-<!ENTITY YEAR "2012">
-<!ENTITY HOLDER "Red Hat Inc">
diff --git a/doc/release-notes/en-US/Release_Notes.xml b/doc/release-notes/en-US/Release_Notes.xml
deleted file mode 100644
index d23fb4d7f2f..00000000000
--- a/doc/release-notes/en-US/Release_Notes.xml
+++ /dev/null
@@ -1,17 +0,0 @@
-<?xml version='1.0' encoding='utf-8' ?>
-<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
-<!ENTITY % BOOK_ENTITIES SYSTEM "Release_Notes.ent">
-%BOOK_ENTITIES;
-]>
-<book>
- <xi:include href="Book_Info.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="Preface.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="gfs_introduction.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="Whats_New.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="Download_Install.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="Known_Issues.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="Product_Support.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="Product_Documentation.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="Revision_History.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
-</book>
-
diff --git a/doc/release-notes/en-US/Revision_History.xml b/doc/release-notes/en-US/Revision_History.xml
deleted file mode 100644
index f473d0a299c..00000000000
--- a/doc/release-notes/en-US/Revision_History.xml
+++ /dev/null
@@ -1,27 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. -->
-<!DOCTYPE appendix PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
-<!ENTITY % BOOK_ENTITIES SYSTEM "Release_Notes.ent">
-%BOOK_ENTITIES;
-]>
-<appendix id="appe-Release_Notes-Revision_History">
- <title>Revision History</title>
- <simpara>
- <revhistory>
- <revision>
- <revnumber> 1-0</revnumber>
- <date>Mon Apr 9 2012</date>
- <author>
- <firstname>Divya </firstname>
- <surname>Muntimadugu</surname>
- <email>divya@redhat.com</email>
- </author>
- <revdescription>
- <simplelist>
- <member>Draft</member>
- </simplelist>
- </revdescription>
- </revision>
- </revhistory>
- </simpara>
-</appendix>
diff --git a/doc/release-notes/en-US/Whats_New.xml b/doc/release-notes/en-US/Whats_New.xml
deleted file mode 100644
index c320c1aa3ec..00000000000
--- a/doc/release-notes/en-US/Whats_New.xml
+++ /dev/null
@@ -1,90 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
-<!ENTITY % BOOK_ENTITIES SYSTEM "Release_Notes.ent">
-%BOOK_ENTITIES;
-]>
-<chapter id="chap-2.0_Release_Notes-Whats_New">
- <title>What is New in this Release?</title>
- <para>This section describes the key features available in GlusterFS. The following is a list of feature highlights of this new version of the GlusterFS software: </para>
- <itemizedlist>
- <listitem>
- <para><emphasis role="bold">Unified File and Object Storage</emphasis></para>
- <para>Unified File and Object Storage (UFO) unifies NAS and object storage technology. It provides a system for data storage that enables users to access the same data, both as an object and as a file, thus simplifying management and controlling storage costs.</para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">Replicate Improvements (Pro-active Self-heal)</emphasis></para>
- <para>In replicate module, previously you had to manually trigger a self-heal when a brick goes offline and comes back online, to bring all the replicas in sync. Now the pro-active self-heal daemon runs in the background, diagnoses issues and automatically initiates self-healing when the brick comes on-line. You can view the list of files that need healing, the list of files which are recently healed, list of files which are in split-brain state, and you can manually trigger self-heal on the entire volume or only on the files which need healing. </para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">Network Lock Manager</emphasis>
-</para>
- <para>GlusterFS 3.3 includes network lock manager (NLM) v4. NLM is a standard and an extension to NFSv3 which allows NFSv3 clients to lock on files across the network. NLM is required to make applications running on top of NFSv3 mount points to use the standard fcntl() (POSIX) and flock() (BSD) lock system calls to synchronize access across clients.
-</para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">Volume Statedump</emphasis>
-</para>
- <para>Statedump is a mechanism through which you can get details of all internal variables and state of the glusterfs process at the time of issuing the command.You can perform statedumps of the brick processes and nfs server process of a volume using the statedump command. The statedump information is useful while debugging.
-</para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">Volume Status and Brick Information</emphasis>
-</para>
- <para>You can display the status information about a specific volume, brick or all volumes, as needed. Volume status information includes memory usage, memory pool details of the bricks, inode tables of the volume, pending calls of the volume and other statistics. This information can be used to understand the current status of the brick, nfs processes, and overall file
-system. Status information can also be used to monitor and debug the volume information. </para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">Geo-replication Enhancements </emphasis></para>
- <para>Now you can configure a secure slave using SSH so that master is granted a restricted access. You need not specify configuration parameters regarding the slave on the master-side configuration. You can also rotate the log file of a particular master-slave session, all sessions of a mater volume, and all geo-replication sessions, as needed. You can also set ignore-deletes option to 1 so that the file deleted on master will not trigger a delete operation on the slave. Hence, the slave will remain as a superset of the master and can be used to recover the master in case of crash and/or accidental delete.
-</para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">Mount Server Fail-over </emphasis></para>
- <para>Now there is an option to add backup volfile server while mounting fuse client. When the first volfile server fails, then the server specified in backupvolfile-server option is used as volfile server to mount the client. You can also specify the number of attempts to fetch while mounting glusterFS server. This option is useful when you mount a server with multiple IPs.
-</para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">Debugging Locks</emphasis></para>
- <para>You can use statedump command to list the locks held on files. The statedump output also provides information on each lock with its range, basename, PID of the application holding the lock, and so on. You can analyze and know which locks are valid and relevant at a point of time. After ensuring that the no application is using the file, you can clear the lock using the clear lock command.
-</para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">Change in Working Directory </emphasis></para>
- <para>The working directory of glusterd is changed to <emphasis role="italic">/var/lib/glusterd </emphasis>from <emphasis role="italic">/etc/glusterd</emphasis>.
-</para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">Hadoop Compatible Storage </emphasis></para>
- <para>GlusterFS provides compatibility for Apache Hadoop and it uses the standard file system APIs available in Hadoop to provide a new storage option for Hadoop deployments. Existing MapReduce based applications can use GlusterFS seamlessly. This new functionality opens up data within Hadoop deployments to any file-based or object-based application.</para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">Granual Locking for Large Files </emphasis></para>
- <para>Enables using GlusterFS as a backing store for preserving large files like virtual machine images. Granualar locking enables internal file operations (like self-heal) without blocking user level file operations. The latency for user I/O is reduced during self-heal operation.
-</para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">Configuration Enhancements</emphasis></para>
- <para><itemizedlist>
- <listitem>
- <para><emphasis role="bold">Remove Brick Enhancements</emphasis></para>
- <para>Previously, remove-bick command was used to remove a brick that is inaccessible due to hardware or network failure. And as a clean-up operation to remove dead server details from the volume configuration. Now remove-brick command can migrate data to existing bricks before deleting given brick.</para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">Rebalance Enhancements </emphasis></para>
- <para>GlusterFS 3.3 supports open file rebalance and files that have hardlinks. Rebalance has been enchanced to be more efficient with respect to network usage, completion time, and amount of data movement and starts migration of data immediately without waiting for directory layout to be fixed.</para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">Dynamic Alteration of Volume Type </emphasis></para>
- <para>You can now the change type of the volume from Distributed volume to Distributed Replicated Volume when performing add-brick and remove-brick operation. You must specify the replica count paramenter to increase the number of replicas to change it to distributed replicated volume.<note>
- <para>Currently, changing of <emphasis role="italic">stripe</emphasis> count while changing volume configurations is not supported.</para>
- </note></para>
- </listitem>
- </itemizedlist></para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">Read-only Volume </emphasis></para>
- <para>GlusterFS 3.3 enables you to mount volumes as read-only. While mounting the client, you can mount it as read-only for the volumes and you can also make the entire volume as read-only for all the clients (including NFS clients) using volume set option.
-</para>
- </listitem>
- </itemizedlist>
-</chapter>
diff --git a/doc/release-notes/en-US/gfs_introduction.xml b/doc/release-notes/en-US/gfs_introduction.xml
deleted file mode 100644
index 5fd88730556..00000000000
--- a/doc/release-notes/en-US/gfs_introduction.xml
+++ /dev/null
@@ -1,54 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "docbookV4.5/docbookx.dtd" []>
-<chapter>
- <title>Introducing Gluster File System</title>
- <para>GlusterFS is an open source, clustered file system capable of scaling to several petabytes and handling thousands of clients. GlusterFS can be flexibly combined with commodity physical, virtual, and cloud resources to deliver highly available and performant enterprise storage at a fraction of the cost of traditional solutions.</para>
- <para>GlusterFS clusters together storage building blocks over Infiniband RDMA and/or TCP/IP interconnect, aggregating disk and memory resources and managing data in a single global namespace. GlusterFS is based on a stackable user space design, delivering exceptional performance for diverse workloads.
-</para>
- <figure>
- <title>Virtualized Cloud Environments</title>
- <mediaobject>
- <textobject>
- <phrase>Virtualized Cloud Environments</phrase>
- </textobject>
- <imageobject>
- <imagedata align="center" fileref="images/640px-GlusterFS_3.2_Architecture.png"/>
- </imageobject>
- </mediaobject>
- </figure>
- <para>GlusterFS is designed for today&apos;s high-performance, virtualized cloud environments. Unlike traditional data centers, cloud environments require multi-tenancy along with the ability to grow or shrink resources on demand. Enterprises can scale capacity, performance, and availability on demand, with no vendor lock-in, across on-premise, public cloud, and hybrid environments. </para>
- <para>GlusterFS is in production at thousands of enterprises spanning media, healthcare, government, education, web 2.0, and financial services. The following table lists the commercial offerings and its documentation location:
-</para>
- <informaltable frame="all">
- <tgroup cols="2">
- <colspec colname="c1" colwidth="16%"/>
- <colspec colname="c2" colwidth="84%"/>
- <thead>
- <row>
- <entry>Product</entry>
- <entry>Documentation Location</entry>
- </row>
- </thead>
- <tbody>
- <row>
- <entry>Red Hat Storage Software Appliance</entry>
- <entry>
- <ulink url="http://docs.redhat.com/docs/en-US/Red_Hat_Storage_Software_Appliance/index.html"/>
- </entry>
- </row>
- <row>
- <entry>Red Hat Virtual Storage Appliance</entry>
- <entry>
- <ulink url="http://docs.redhat.com/docs/en-US/Red_Hat_Virtual_Storage_Appliance/index.html"/>
- </entry>
- </row>
- <row>
- <entry>Red Hat Storage </entry>
- <entry>
- <ulink url="http://docs.redhat.com/docs/en-US/Red_Hat_Storage/index.html"/>
- </entry>
- </row>
- </tbody>
- </tgroup>
- </informaltable>
-</chapter>
diff --git a/doc/release-notes/en-US/images/640px-GlusterFS_3.2_Architecture.png b/doc/release-notes/en-US/images/640px-GlusterFS_3.2_Architecture.png
deleted file mode 100644
index 95f89ec8286..00000000000
--- a/doc/release-notes/en-US/images/640px-GlusterFS_3.2_Architecture.png
+++ /dev/null
Binary files differ
diff --git a/doc/release-notes/en-US/images/icon.svg b/doc/release-notes/en-US/images/icon.svg
deleted file mode 100644
index b2f16d0f61d..00000000000
--- a/doc/release-notes/en-US/images/icon.svg
+++ /dev/null
@@ -1,19 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<svg xmlns:svg="http://www.w3.org/2000/svg" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.0" width="32" height="32" id="svg3017">
- <defs id="defs3019">
- <linearGradient id="linearGradient2381">
- <stop id="stop2383" style="stop-color:#ffffff;stop-opacity:1" offset="0"/>
- <stop id="stop2385" style="stop-color:#ffffff;stop-opacity:0" offset="1"/>
- </linearGradient>
- <linearGradient x1="296.4996" y1="188.81061" x2="317.32471" y2="209.69398" id="linearGradient2371" xlink:href="#linearGradient2381" gradientUnits="userSpaceOnUse" gradientTransform="matrix(0.90776,0,0,0.90776,24.35648,49.24131)"/>
- </defs>
- <g transform="matrix(0.437808,-0.437808,0.437808,0.437808,-220.8237,43.55311)" id="g5089">
- <path d="m 8.4382985,-6.28125 c -0.6073916,0 -4.3132985,5.94886271 -4.3132985,8.25 l 0,26.71875 c 0,0.846384 0.5818159,1.125 1.15625,1.125 l 25.5625,0 c 0.632342,0 1.125001,-0.492658 1.125,-1.125 l 0,-5.21875 0.28125,0 c 0.49684,0 0.906249,-0.409411 0.90625,-0.90625 l 0,-27.9375 c 0,-0.4968398 -0.40941,-0.90625 -0.90625,-0.90625 l -23.8117015,0 z" transform="translate(282.8327,227.1903)" id="path5091" style="fill:#5c5c4f;stroke:#000000;stroke-width:3.23021388;stroke-miterlimit:4;stroke-dasharray:none"/>
- <rect width="27.85074" height="29.369793" rx="1.1414107" ry="1.1414107" x="286.96509" y="227.63805" id="rect5093" style="fill:#032c87"/>
- <path d="m 288.43262,225.43675 25.2418,0 0,29.3698 -26.37615,0.0241 1.13435,-29.39394 z" id="rect5095" style="fill:#ffffff"/>
- <path d="m 302.44536,251.73726 c 1.38691,7.85917 -0.69311,11.28365 -0.69311,11.28365 2.24384,-1.60762 3.96426,-3.47694 4.90522,-5.736 0.96708,2.19264 1.83294,4.42866 4.27443,5.98941 0,0 -1.59504,-7.2004 -1.71143,-11.53706 l -6.77511,0 z" id="path5097" style="fill:#a70000;fill-opacity:1;stroke-width:2"/>
- <rect width="25.241802" height="29.736675" rx="0.89682275" ry="0.89682275" x="290.73544" y="220.92249" id="rect5099" style="fill:#809cc9"/>
- <path d="m 576.47347,725.93939 6.37084,0.41502 0.4069,29.51809 c -1.89202,-1.31785 -6.85427,-3.7608 -8.26232,-1.68101 l 0,-26.76752 c 0,-0.82246 0.66212,-1.48458 1.48458,-1.48458 z" transform="matrix(0.499065,-0.866565,0,1,0,0)" id="rect5101" style="fill:#4573b3;fill-opacity:1"/>
- <path d="m 293.2599,221.89363 20.73918,0 c 0.45101,0 0.8141,0.3631 0.8141,0.81411 0.21547,6.32836 -19.36824,21.7635 -22.36739,17.59717 l 0,-17.59717 c 0,-0.45101 0.3631,-0.81411 0.81411,-0.81411 z" id="path5103" style="opacity:0.65536726;fill:url(#linearGradient2371);fill-opacity:1"/>
- </g>
-</svg>
diff --git a/doc/release-notes/publican.cfg b/doc/release-notes/publican.cfg
deleted file mode 100644
index 8afb0d03b19..00000000000
--- a/doc/release-notes/publican.cfg
+++ /dev/null
@@ -1,12 +0,0 @@
-# Config::Simple 4.59
-# Thu Apr 5 16:25:43 2012
-
-xml_lang: "en-US"
-type: Book
-brand: Gluster_Brand
-prod_url: http://www.gluster.org
-doc_url: http://www.gluster.com/community/documentation/index.php/Main_Page
-condition: gfs
-show_remarks: 1
-
-
diff --git a/doc/user-guide/legacy/Makefile.am b/doc/user-guide/legacy/Makefile.am
deleted file mode 100644
index b2caabaa2f3..00000000000
--- a/doc/user-guide/legacy/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-info_TEXINFOS = user-guide.texi
-CLEANFILES = *~
-DISTCLEANFILES = .deps/*.P *.info *vti
diff --git a/doc/user-guide/legacy/advanced-stripe.odg b/doc/user-guide/legacy/advanced-stripe.odg
deleted file mode 100644
index 7686d7091b2..00000000000
--- a/doc/user-guide/legacy/advanced-stripe.odg
+++ /dev/null
Binary files differ
diff --git a/doc/user-guide/legacy/advanced-stripe.pdf b/doc/user-guide/legacy/advanced-stripe.pdf
deleted file mode 100644
index ec8b03dcfbb..00000000000
--- a/doc/user-guide/legacy/advanced-stripe.pdf
+++ /dev/null
Binary files differ
diff --git a/doc/user-guide/legacy/colonO-icon.jpg b/doc/user-guide/legacy/colonO-icon.jpg
deleted file mode 100644
index 3e66f7a2775..00000000000
--- a/doc/user-guide/legacy/colonO-icon.jpg
+++ /dev/null
Binary files differ
diff --git a/doc/user-guide/legacy/fdl.texi b/doc/user-guide/legacy/fdl.texi
deleted file mode 100644
index e33c687cdfb..00000000000
--- a/doc/user-guide/legacy/fdl.texi
+++ /dev/null
@@ -1,454 +0,0 @@
-
-@c @node GNU Free Documentation License
-@c @appendixsec GNU Free Documentation License
-
-@cindex FDL, GNU Free Documentation License
-@center Version 1.2, November 2002
-
-@display
-Copyright @copyright{} 2000,2001,2002 Free Software Foundation, Inc.
-59 Temple Place, Suite 330, Boston, MA 02111-1307, USA
-
-Everyone is permitted to copy and distribute verbatim copies
-of this license document, but changing it is not allowed.
-@end display
-
-@enumerate 0
-@item
-PREAMBLE
-
-The purpose of this License is to make a manual, textbook, or other
-functional and useful document @dfn{free} in the sense of freedom: to
-assure everyone the effective freedom to copy and redistribute it,
-with or without modifying it, either commercially or noncommercially.
-Secondarily, this License preserves for the author and publisher a way
-to get credit for their work, while not being considered responsible
-for modifications made by others.
-
-This License is a kind of ``copyleft'', which means that derivative
-works of the document must themselves be free in the same sense. It
-complements the GNU General Public License, which is a copyleft
-license designed for free software.
-
-We have designed this License in order to use it for manuals for free
-software, because free software needs free documentation: a free
-program should come with manuals providing the same freedoms that the
-software does. But this License is not limited to software manuals;
-it can be used for any textual work, regardless of subject matter or
-whether it is published as a printed book. We recommend this License
-principally for works whose purpose is instruction or reference.
-
-@item
-APPLICABILITY AND DEFINITIONS
-
-This License applies to any manual or other work, in any medium, that
-contains a notice placed by the copyright holder saying it can be
-distributed under the terms of this License. Such a notice grants a
-world-wide, royalty-free license, unlimited in duration, to use that
-work under the conditions stated herein. The ``Document'', below,
-refers to any such manual or work. Any member of the public is a
-licensee, and is addressed as ``you''. You accept the license if you
-copy, modify or distribute the work in a way requiring permission
-under copyright law.
-
-A ``Modified Version'' of the Document means any work containing the
-Document or a portion of it, either copied verbatim, or with
-modifications and/or translated into another language.
-
-A ``Secondary Section'' is a named appendix or a front-matter section
-of the Document that deals exclusively with the relationship of the
-publishers or authors of the Document to the Document's overall
-subject (or to related matters) and contains nothing that could fall
-directly within that overall subject. (Thus, if the Document is in
-part a textbook of mathematics, a Secondary Section may not explain
-any mathematics.) The relationship could be a matter of historical
-connection with the subject or with related matters, or of legal,
-commercial, philosophical, ethical or political position regarding
-them.
-
-The ``Invariant Sections'' are certain Secondary Sections whose titles
-are designated, as being those of Invariant Sections, in the notice
-that says that the Document is released under this License. If a
-section does not fit the above definition of Secondary then it is not
-allowed to be designated as Invariant. The Document may contain zero
-Invariant Sections. If the Document does not identify any Invariant
-Sections then there are none.
-
-The ``Cover Texts'' are certain short passages of text that are listed,
-as Front-Cover Texts or Back-Cover Texts, in the notice that says that
-the Document is released under this License. A Front-Cover Text may
-be at most 5 words, and a Back-Cover Text may be at most 25 words.
-
-A ``Transparent'' copy of the Document means a machine-readable copy,
-represented in a format whose specification is available to the
-general public, that is suitable for revising the document
-straightforwardly with generic text editors or (for images composed of
-pixels) generic paint programs or (for drawings) some widely available
-drawing editor, and that is suitable for input to text formatters or
-for automatic translation to a variety of formats suitable for input
-to text formatters. A copy made in an otherwise Transparent file
-format whose markup, or absence of markup, has been arranged to thwart
-or discourage subsequent modification by readers is not Transparent.
-An image format is not Transparent if used for any substantial amount
-of text. A copy that is not ``Transparent'' is called ``Opaque''.
-
-Examples of suitable formats for Transparent copies include plain
-@sc{ascii} without markup, Texinfo input format, La@TeX{} input
-format, @acronym{SGML} or @acronym{XML} using a publicly available
-@acronym{DTD}, and standard-conforming simple @acronym{HTML},
-PostScript or @acronym{PDF} designed for human modification. Examples
-of transparent image formats include @acronym{PNG}, @acronym{XCF} and
-@acronym{JPG}. Opaque formats include proprietary formats that can be
-read and edited only by proprietary word processors, @acronym{SGML} or
-@acronym{XML} for which the @acronym{DTD} and/or processing tools are
-not generally available, and the machine-generated @acronym{HTML},
-PostScript or @acronym{PDF} produced by some word processors for
-output purposes only.
-
-The ``Title Page'' means, for a printed book, the title page itself,
-plus such following pages as are needed to hold, legibly, the material
-this License requires to appear in the title page. For works in
-formats which do not have any title page as such, ``Title Page'' means
-the text near the most prominent appearance of the work's title,
-preceding the beginning of the body of the text.
-
-A section ``Entitled XYZ'' means a named subunit of the Document whose
-title either is precisely XYZ or contains XYZ in parentheses following
-text that translates XYZ in another language. (Here XYZ stands for a
-specific section name mentioned below, such as ``Acknowledgements'',
-``Dedications'', ``Endorsements'', or ``History''.) To ``Preserve the Title''
-of such a section when you modify the Document means that it remains a
-section ``Entitled XYZ'' according to this definition.
-
-The Document may include Warranty Disclaimers next to the notice which
-states that this License applies to the Document. These Warranty
-Disclaimers are considered to be included by reference in this
-License, but only as regards disclaiming warranties: any other
-implication that these Warranty Disclaimers may have is void and has
-no effect on the meaning of this License.
-
-@item
-VERBATIM COPYING
-
-You may copy and distribute the Document in any medium, either
-commercially or noncommercially, provided that this License, the
-copyright notices, and the license notice saying this License applies
-to the Document are reproduced in all copies, and that you add no other
-conditions whatsoever to those of this License. You may not use
-technical measures to obstruct or control the reading or further
-copying of the copies you make or distribute. However, you may accept
-compensation in exchange for copies. If you distribute a large enough
-number of copies you must also follow the conditions in section 3.
-
-You may also lend copies, under the same conditions stated above, and
-you may publicly display copies.
-
-@item
-COPYING IN QUANTITY
-
-If you publish printed copies (or copies in media that commonly have
-printed covers) of the Document, numbering more than 100, and the
-Document's license notice requires Cover Texts, you must enclose the
-copies in covers that carry, clearly and legibly, all these Cover
-Texts: Front-Cover Texts on the front cover, and Back-Cover Texts on
-the back cover. Both covers must also clearly and legibly identify
-you as the publisher of these copies. The front cover must present
-the full title with all words of the title equally prominent and
-visible. You may add other material on the covers in addition.
-Copying with changes limited to the covers, as long as they preserve
-the title of the Document and satisfy these conditions, can be treated
-as verbatim copying in other respects.
-
-If the required texts for either cover are too voluminous to fit
-legibly, you should put the first ones listed (as many as fit
-reasonably) on the actual cover, and continue the rest onto adjacent
-pages.
-
-If you publish or distribute Opaque copies of the Document numbering
-more than 100, you must either include a machine-readable Transparent
-copy along with each Opaque copy, or state in or with each Opaque copy
-a computer-network location from which the general network-using
-public has access to download using public-standard network protocols
-a complete Transparent copy of the Document, free of added material.
-If you use the latter option, you must take reasonably prudent steps,
-when you begin distribution of Opaque copies in quantity, to ensure
-that this Transparent copy will remain thus accessible at the stated
-location until at least one year after the last time you distribute an
-Opaque copy (directly or through your agents or retailers) of that
-edition to the public.
-
-It is requested, but not required, that you contact the authors of the
-Document well before redistributing any large number of copies, to give
-them a chance to provide you with an updated version of the Document.
-
-@item
-MODIFICATIONS
-
-You may copy and distribute a Modified Version of the Document under
-the conditions of sections 2 and 3 above, provided that you release
-the Modified Version under precisely this License, with the Modified
-Version filling the role of the Document, thus licensing distribution
-and modification of the Modified Version to whoever possesses a copy
-of it. In addition, you must do these things in the Modified Version:
-
-@enumerate A
-@item
-Use in the Title Page (and on the covers, if any) a title distinct
-from that of the Document, and from those of previous versions
-(which should, if there were any, be listed in the History section
-of the Document). You may use the same title as a previous version
-if the original publisher of that version gives permission.
-
-@item
-List on the Title Page, as authors, one or more persons or entities
-responsible for authorship of the modifications in the Modified
-Version, together with at least five of the principal authors of the
-Document (all of its principal authors, if it has fewer than five),
-unless they release you from this requirement.
-
-@item
-State on the Title page the name of the publisher of the
-Modified Version, as the publisher.
-
-@item
-Preserve all the copyright notices of the Document.
-
-@item
-Add an appropriate copyright notice for your modifications
-adjacent to the other copyright notices.
-
-@item
-Include, immediately after the copyright notices, a license notice
-giving the public permission to use the Modified Version under the
-terms of this License, in the form shown in the Addendum below.
-
-@item
-Preserve in that license notice the full lists of Invariant Sections
-and required Cover Texts given in the Document's license notice.
-
-@item
-Include an unaltered copy of this License.
-
-@item
-Preserve the section Entitled ``History'', Preserve its Title, and add
-to it an item stating at least the title, year, new authors, and
-publisher of the Modified Version as given on the Title Page. If
-there is no section Entitled ``History'' in the Document, create one
-stating the title, year, authors, and publisher of the Document as
-given on its Title Page, then add an item describing the Modified
-Version as stated in the previous sentence.
-
-@item
-Preserve the network location, if any, given in the Document for
-public access to a Transparent copy of the Document, and likewise
-the network locations given in the Document for previous versions
-it was based on. These may be placed in the ``History'' section.
-You may omit a network location for a work that was published at
-least four years before the Document itself, or if the original
-publisher of the version it refers to gives permission.
-
-@item
-For any section Entitled ``Acknowledgements'' or ``Dedications'', Preserve
-the Title of the section, and preserve in the section all the
-substance and tone of each of the contributor acknowledgements and/or
-dedications given therein.
-
-@item
-Preserve all the Invariant Sections of the Document,
-unaltered in their text and in their titles. Section numbers
-or the equivalent are not considered part of the section titles.
-
-@item
-Delete any section Entitled ``Endorsements''. Such a section
-may not be included in the Modified Version.
-
-@item
-Do not retitle any existing section to be Entitled ``Endorsements'' or
-to conflict in title with any Invariant Section.
-
-@item
-Preserve any Warranty Disclaimers.
-@end enumerate
-
-If the Modified Version includes new front-matter sections or
-appendices that qualify as Secondary Sections and contain no material
-copied from the Document, you may at your option designate some or all
-of these sections as invariant. To do this, add their titles to the
-list of Invariant Sections in the Modified Version's license notice.
-These titles must be distinct from any other section titles.
-
-You may add a section Entitled ``Endorsements'', provided it contains
-nothing but endorsements of your Modified Version by various
-parties---for example, statements of peer review or that the text has
-been approved by an organization as the authoritative definition of a
-standard.
-
-You may add a passage of up to five words as a Front-Cover Text, and a
-passage of up to 25 words as a Back-Cover Text, to the end of the list
-of Cover Texts in the Modified Version. Only one passage of
-Front-Cover Text and one of Back-Cover Text may be added by (or
-through arrangements made by) any one entity. If the Document already
-includes a cover text for the same cover, previously added by you or
-by arrangement made by the same entity you are acting on behalf of,
-you may not add another; but you may replace the old one, on explicit
-permission from the previous publisher that added the old one.
-
-The author(s) and publisher(s) of the Document do not by this License
-give permission to use their names for publicity for or to assert or
-imply endorsement of any Modified Version.
-
-@item
-COMBINING DOCUMENTS
-
-You may combine the Document with other documents released under this
-License, under the terms defined in section 4 above for modified
-versions, provided that you include in the combination all of the
-Invariant Sections of all of the original documents, unmodified, and
-list them all as Invariant Sections of your combined work in its
-license notice, and that you preserve all their Warranty Disclaimers.
-
-The combined work need only contain one copy of this License, and
-multiple identical Invariant Sections may be replaced with a single
-copy. If there are multiple Invariant Sections with the same name but
-different contents, make the title of each such section unique by
-adding at the end of it, in parentheses, the name of the original
-author or publisher of that section if known, or else a unique number.
-Make the same adjustment to the section titles in the list of
-Invariant Sections in the license notice of the combined work.
-
-In the combination, you must combine any sections Entitled ``History''
-in the various original documents, forming one section Entitled
-``History''; likewise combine any sections Entitled ``Acknowledgements'',
-and any sections Entitled ``Dedications''. You must delete all
-sections Entitled ``Endorsements.''
-
-@item
-COLLECTIONS OF DOCUMENTS
-
-You may make a collection consisting of the Document and other documents
-released under this License, and replace the individual copies of this
-License in the various documents with a single copy that is included in
-the collection, provided that you follow the rules of this License for
-verbatim copying of each of the documents in all other respects.
-
-You may extract a single document from such a collection, and distribute
-it individually under this License, provided you insert a copy of this
-License into the extracted document, and follow this License in all
-other respects regarding verbatim copying of that document.
-
-@item
-AGGREGATION WITH INDEPENDENT WORKS
-
-A compilation of the Document or its derivatives with other separate
-and independent documents or works, in or on a volume of a storage or
-distribution medium, is called an ``aggregate'' if the copyright
-resulting from the compilation is not used to limit the legal rights
-of the compilation's users beyond what the individual works permit.
-When the Document is included in an aggregate, this License does not
-apply to the other works in the aggregate which are not themselves
-derivative works of the Document.
-
-If the Cover Text requirement of section 3 is applicable to these
-copies of the Document, then if the Document is less than one half of
-the entire aggregate, the Document's Cover Texts may be placed on
-covers that bracket the Document within the aggregate, or the
-electronic equivalent of covers if the Document is in electronic form.
-Otherwise they must appear on printed covers that bracket the whole
-aggregate.
-
-@item
-TRANSLATION
-
-Translation is considered a kind of modification, so you may
-distribute translations of the Document under the terms of section 4.
-Replacing Invariant Sections with translations requires special
-permission from their copyright holders, but you may include
-translations of some or all Invariant Sections in addition to the
-original versions of these Invariant Sections. You may include a
-translation of this License, and all the license notices in the
-Document, and any Warranty Disclaimers, provided that you also include
-the original English version of this License and the original versions
-of those notices and disclaimers. In case of a disagreement between
-the translation and the original version of this License or a notice
-or disclaimer, the original version will prevail.
-
-If a section in the Document is Entitled ``Acknowledgements'',
-``Dedications'', or ``History'', the requirement (section 4) to Preserve
-its Title (section 1) will typically require changing the actual
-title.
-
-@item
-TERMINATION
-
-You may not copy, modify, sublicense, or distribute the Document except
-as expressly provided for under this License. Any other attempt to
-copy, modify, sublicense or distribute the Document is void, and will
-automatically terminate your rights under this License. However,
-parties who have received copies, or rights, from you under this
-License will not have their licenses terminated so long as such
-parties remain in full compliance.
-
-@item
-FUTURE REVISIONS OF THIS LICENSE
-
-The Free Software Foundation may publish new, revised versions
-of the GNU Free Documentation License from time to time. Such new
-versions will be similar in spirit to the present version, but may
-differ in detail to address new problems or concerns. See
-@uref{http://www.gnu.org/copyleft/}.
-
-Each version of the License is given a distinguishing version number.
-If the Document specifies that a particular numbered version of this
-License ``or any later version'' applies to it, you have the option of
-following the terms and conditions either of that specified version or
-of any later version that has been published (not as a draft) by the
-Free Software Foundation. If the Document does not specify a version
-number of this License, you may choose any version ever published (not
-as a draft) by the Free Software Foundation.
-@end enumerate
-
-@page
-@c @appendixsubsec ADDENDUM: How to use this License for your
-@c documents
-@subsection ADDENDUM: How to use this License for your documents
-
-To use this License in a document you have written, include a copy of
-the License in the document and put the following copyright and
-license notices just after the title page:
-
-@smallexample
-@group
- Copyright (C) @var{year} @var{your name}.
- Permission is granted to copy, distribute and/or modify this document
- under the terms of the GNU Free Documentation License, Version 1.2
- or any later version published by the Free Software Foundation;
- with no Invariant Sections, no Front-Cover Texts, and no Back-Cover
- Texts. A copy of the license is included in the section entitled ``GNU
- Free Documentation License''.
-@end group
-@end smallexample
-
-If you have Invariant Sections, Front-Cover Texts and Back-Cover Texts,
-replace the ``with...Texts.'' line with this:
-
-@smallexample
-@group
- with the Invariant Sections being @var{list their titles}, with
- the Front-Cover Texts being @var{list}, and with the Back-Cover Texts
- being @var{list}.
-@end group
-@end smallexample
-
-If you have Invariant Sections without Cover Texts, or some other
-combination of the three, merge those two alternatives to suit the
-situation.
-
-If your document contains nontrivial examples of program code, we
-recommend releasing these examples in parallel under your choice of
-free software license, such as the GNU General Public License,
-to permit their use in free software.
-
-@c Local Variables:
-@c ispell-local-pdict: "ispell-dict"
-@c End:
-
diff --git a/doc/user-guide/legacy/fuse.odg b/doc/user-guide/legacy/fuse.odg
deleted file mode 100644
index 61bd103c78b..00000000000
--- a/doc/user-guide/legacy/fuse.odg
+++ /dev/null
Binary files differ
diff --git a/doc/user-guide/legacy/fuse.pdf b/doc/user-guide/legacy/fuse.pdf
deleted file mode 100644
index a7d13faff56..00000000000
--- a/doc/user-guide/legacy/fuse.pdf
+++ /dev/null
Binary files differ
diff --git a/doc/user-guide/legacy/ha.odg b/doc/user-guide/legacy/ha.odg
deleted file mode 100644
index e4b8b72d08b..00000000000
--- a/doc/user-guide/legacy/ha.odg
+++ /dev/null
Binary files differ
diff --git a/doc/user-guide/legacy/ha.pdf b/doc/user-guide/legacy/ha.pdf
deleted file mode 100644
index e372c0ab03e..00000000000
--- a/doc/user-guide/legacy/ha.pdf
+++ /dev/null
Binary files differ
diff --git a/doc/user-guide/legacy/stripe.odg b/doc/user-guide/legacy/stripe.odg
deleted file mode 100644
index 79441bf1452..00000000000
--- a/doc/user-guide/legacy/stripe.odg
+++ /dev/null
Binary files differ
diff --git a/doc/user-guide/legacy/stripe.pdf b/doc/user-guide/legacy/stripe.pdf
deleted file mode 100644
index b94446feb56..00000000000
--- a/doc/user-guide/legacy/stripe.pdf
+++ /dev/null
Binary files differ
diff --git a/doc/user-guide/legacy/unify.odg b/doc/user-guide/legacy/unify.odg
deleted file mode 100644
index ccaa9bf16f9..00000000000
--- a/doc/user-guide/legacy/unify.odg
+++ /dev/null
Binary files differ
diff --git a/doc/user-guide/legacy/unify.pdf b/doc/user-guide/legacy/unify.pdf
deleted file mode 100644
index c22027f66e7..00000000000
--- a/doc/user-guide/legacy/unify.pdf
+++ /dev/null
Binary files differ
diff --git a/doc/user-guide/legacy/user-guide.info b/doc/user-guide/legacy/user-guide.info
deleted file mode 100644
index 6a49d078d64..00000000000
--- a/doc/user-guide/legacy/user-guide.info
+++ /dev/null
@@ -1,2697 +0,0 @@
-This is ../../../doc/user-guide/user-guide.info, produced by makeinfo version 4.13 from ../../../doc/user-guide/user-guide.texi.
-
-START-INFO-DIR-ENTRY
-* GlusterFS: (user-guide). GlusterFS distributed filesystem user guide
-END-INFO-DIR-ENTRY
-
- This is the user manual for GlusterFS 2.0.
-
- Copyright (c) 2007-2011 Gluster, Inc. Permission is granted to
-copy, distribute and/or modify this document under the terms of the GNU
-Free Documentation License, Version 1.2 or any later version published
-by the Free Software Foundation; with no Invariant Sections, no
-Front-Cover Texts, and no Back-Cover Texts. A copy of the license is
-included in the chapter entitled "GNU Free Documentation License".
-
-
-File: user-guide.info, Node: Top, Next: Acknowledgements, Up: (dir)
-
-GlusterFS 2.0 User Guide
-************************
-
-This is the user manual for GlusterFS 2.0.
-
- Copyright (c) 2007-2011 Gluster, Inc. Permission is granted to
-copy, distribute and/or modify this document under the terms of the GNU
-Free Documentation License, Version 1.2 or any later version published
-by the Free Software Foundation; with no Invariant Sections, no
-Front-Cover Texts, and no Back-Cover Texts. A copy of the license is
-included in the chapter entitled "GNU Free Documentation License".
-
-* Menu:
-
-* Acknowledgements::
-* Introduction::
-* Installation and Invocation::
-* Concepts::
-* Translators::
-* Usage Scenarios::
-* Troubleshooting::
-* GNU Free Documentation Licence::
-* Index::
-
- --- The Detailed Node Listing ---
-
-Installation and Invocation
-
-* Pre requisites::
-* Getting GlusterFS::
-* Building::
-* Running GlusterFS::
-* A Tutorial Introduction::
-
-Running GlusterFS
-
-* Server::
-* Client::
-
-Concepts
-
-* Filesystems in Userspace::
-* Translator::
-* Volume specification file::
-
-Translators
-
-* Storage Translators::
-* Client and Server Translators::
-* Clustering Translators::
-* Performance Translators::
-* Features Translators::
-
-Storage Translators
-
-* POSIX::
-
-Client and Server Translators
-
-* Transport modules::
-* Client protocol::
-* Server protocol::
-
-Clustering Translators
-
-* Unify::
-* Replicate::
-* Stripe::
-
-Performance Translators
-
-* Read Ahead::
-* Write Behind::
-* IO Threads::
-* IO Cache::
-
-Features Translators
-
-* POSIX Locks::
-* Fixed ID::
-
-Miscellaneous Translators
-
-* ROT-13::
-* Trace::
-
-
-File: user-guide.info, Node: Acknowledgements, Next: Introduction, Prev: Top, Up: Top
-
-Acknowledgements
-****************
-
-GlusterFS continues to be a wonderful and enriching experience for all
-of us involved.
-
- GlusterFS development would not have been possible at this pace if
-not for our enthusiastic users. People from around the world have
-helped us with bug reports, performance numbers, and feature
-suggestions. A huge thanks to them all.
-
- Matthew Paine - for RPMs & general enthu
-
- Leonardo Rodrigues de Mello - for DEBs
-
- Julian Perez & Adam D'Auria - for multi-server tutorial
-
- Paul England - for HA spec
-
- Brent Nelson - for many bug reports
-
- Jacques Mattheij - for Europe mirror.
-
- Patrick Negri - for TCP non-blocking connect.
- http://gluster.org/core-team.php (<list-hacking@gluster.com>)
- Gluster
-
-
-File: user-guide.info, Node: Introduction, Next: Installation and Invocation, Prev: Acknowledgements, Up: Top
-
-1 Introduction
-**************
-
-GlusterFS is a distributed filesystem. It works at the file level, not
-block level.
-
- A network filesystem is one which allows us to access remote files. A
-distributed filesystem is one that stores data on multiple machines and
-makes them all appear to be a part of the same filesystem.
-
- Need for distributed filesystems
-
- * Scalability: A distributed filesystem allows us to store more data
- than what can be stored on a single machine.
-
- * Redundancy: We might want to replicate crucial data on to several
- machines.
-
- * Uniform access: One can mount a remote volume (for example your
- home directory) from any machine and access the same data.
-
-1.1 Contacting us
-=================
-
-You can reach us through the mailing list *gluster-devel*
-(<gluster-devel@nongnu.org>).
-
- You can also find many of the developers on IRC, on the `#gluster'
-channel on Freenode (<irc.freenode.net>).
-
- The GlusterFS documentation wiki is also useful:
-<http://gluster.org/docs/index.php/GlusterFS>
-
- For commercial support, you can contact Gluster at:
-
- 3194 Winding Vista Common
- Fremont, CA 94539
- USA.
-
- Phone: +1 (510) 354 6801
- Toll free: +1 (888) 813 6309
- Fax: +1 (510) 372 0604
-
- You can also email us at <support@gluster.com>.
-
-
-File: user-guide.info, Node: Installation and Invocation, Next: Concepts, Prev: Introduction, Up: Top
-
-2 Installation and Invocation
-*****************************
-
-* Menu:
-
-* Pre requisites::
-* Getting GlusterFS::
-* Building::
-* Running GlusterFS::
-* A Tutorial Introduction::
-
-
-File: user-guide.info, Node: Pre requisites, Next: Getting GlusterFS, Up: Installation and Invocation
-
-2.1 Pre requisites
-==================
-
-Before installing GlusterFS make sure you have the following components
-installed.
-
-2.1.1 FUSE
-----------
-
-You'll need FUSE version 2.6.0 or higher to use GlusterFS. You can omit
-installing FUSE if you want to build _only_ the server. Note that you
-won't be able to mount a GlusterFS filesystem on a machine that does
-not have FUSE installed.
-
- FUSE can be downloaded from: <http://fuse.sourceforge.net/>
-
- To get the best performance from GlusterFS, however, it is
-recommended that you use our patched version of FUSE. See Patched FUSE
-for details.
-
-2.1.2 Patched FUSE
-------------------
-
-The GlusterFS project maintains a patched version of FUSE meant to be
-used with GlusterFS. The patches increase GlusterFS performance. It is
-recommended that all users use the patched FUSE.
-
- The patched FUSE tarball can be downloaded from:
-
- <ftp://ftp.gluster.com/pub/gluster/glusterfs/fuse/>
-
- The specific changes made to FUSE are:
-
- * The communication channel size between FUSE kernel module and
- GlusterFS has been increased to 1MB, permitting large reads and
- writes to be sent in bigger chunks.
-
- * The kernel's read-ahead boundry has been extended upto 1MB.
-
- * Block size returned in the `stat()'/`fstat()' calls tuned to 1MB,
- to make cp and similar commands perform I/O using that block size.
-
- * `flock()' locking support has been added (although some rework in
- GlusterFS is needed for perfect compliance).
-
-2.1.3 libibverbs (optional)
----------------------------
-
-This is only needed if you want GlusterFS to use InfiniBand as the
-interconnect mechanism between server and client. You can get it from:
-
- <http://www.openfabrics.org/downloads.htm>.
-
-2.1.4 Bison and Flex
---------------------
-
-These should be already installed on most Linux systems. If not, use
-your distribution's normal software installation procedures to install
-them. Make sure you install the relevant developer packages also.
-
-
-File: user-guide.info, Node: Getting GlusterFS, Next: Building, Prev: Pre requisites, Up: Installation and Invocation
-
-2.2 Getting GlusterFS
-=====================
-
-There are many ways to get hold of GlusterFS. For a production
-deployment, the recommended method is to download the latest release
-tarball. Release tarballs are available at:
-<http://gluster.org/download.php>.
-
- If you want the bleeding edge development source, you can get them
-from the GNU Arch(1) repository. First you must install GNU Arch
-itself. Then register the GlusterFS archive by doing:
-
- $ tla register-archive http://arch.sv.gnu.org/archives/gluster
-
- Now you can check out the source itself:
-
- $ tla get -A gluster@sv.gnu.org glusterfs--mainline--3.0
-
- ---------- Footnotes ----------
-
- (1) <http://www.gnu.org/software/gnu-arch/>
-
-
-File: user-guide.info, Node: Building, Next: Running GlusterFS, Prev: Getting GlusterFS, Up: Installation and Invocation
-
-2.3 Building
-============
-
-You can skip this section if you're installing from RPMs or DEBs.
-
- GlusterFS uses the Autotools mechanism to build. As such, the
-procedure is straight-forward. First, change into the GlusterFS source
-directory.
-
- $ cd glusterfs-<version>
-
- If you checked out the source from the Arch repository, you'll need
-to run `./autogen.sh' first. Note that you'll need to have Autoconf and
-Automake installed for this.
-
- Run `configure'.
-
- $ ./configure
-
- The configure script accepts the following options:
-
-`--disable-ibverbs'
- Disable the InfiniBand transport mechanism.
-
-`--disable-fuse-client'
- Disable the FUSE client.
-
-`--disable-server'
- Disable building of the GlusterFS server.
-
-`--disable-bdb'
- Disable building of Berkeley DB based storage translator.
-
-`--disable-mod_glusterfs'
- Disable building of Apache/lighttpd glusterfs plugins.
-
-`--disable-epoll'
- Use poll instead of epoll.
-
-`--disable-libglusterfsclient'
- Disable building of libglusterfsclient
-
-
- Build and install GlusterFS.
-
- # make install
-
- The binaries (`glusterfsd' and `glusterfs') will be by default
-installed in `/usr/local/sbin/'. Translator, scheduler, and transport
-shared libraries will be installed in
-`/usr/local/lib/glusterfs/<version>/'. Sample volume specification
-files will be in `/usr/local/etc/glusterfs/'. This document itself can
-be found in `/usr/local/share/doc/glusterfs/'. If you passed the
-`--prefix' argument to the configure script, then replace `/usr/local'
-in the preceding paths with the prefix.
-
-
-File: user-guide.info, Node: Running GlusterFS, Next: A Tutorial Introduction, Prev: Building, Up: Installation and Invocation
-
-2.4 Running GlusterFS
-=====================
-
-* Menu:
-
-* Server::
-* Client::
-
-
-File: user-guide.info, Node: Server, Next: Client, Up: Running GlusterFS
-
-2.4.1 Server
-------------
-
-The GlusterFS server is necessary to export storage volumes to remote
-clients (See *note Server protocol:: for more info). This section
-documents the invocation of the GlusterFS server program and all the
-command-line options accepted by it.
-
- Basic Options
-
-`-f, --volfile=<path>'
- Use the volume file as the volume specification.
-
-`-s, --volfile-server=<hostname>'
- Server to get volume file from. This option overrides -volfile
- option.
-
-`-l, --log-file=<path>'
- Specify the path for the log file.
-
-`-L, --log-level=<level>'
- Set the log level for the server. Log level should be one of DEBUG,
- WARNING, ERROR, CRITICAL, or NONE.
-
- Advanced Options
-
-`--debug'
- Run in debug mode. This option sets -no-daemon, -log-level to
- DEBUG and -log-file to console.
-
-`-N, --no-daemon'
- Run glusterfsd as a foreground process.
-
-`-p, --pid-file=<path>'
- Path for the PID file.
-
-`--volfile-id=<key>'
- 'key' of the volfile to be fetched from server.
-
-`--volfile-server-port=<port-number>'
- Listening port number of volfile server.
-
-`--volfile-server-transport=[socket|ib-verbs]'
- Transport type to get volfile from server. [default: `socket']
-
-`--xlator-options=<volume-name.option=value>'
- Add/override a translator option for a volume with specified value.
-
- Miscellaneous Options
-
-`-?, --help'
- Show this help text.
-
-`--usage'
- Display a short usage message.
-
-`-V, --version'
- Show version information.
-
-
-File: user-guide.info, Node: Client, Prev: Server, Up: Running GlusterFS
-
-2.4.2 Client
-------------
-
-The GlusterFS client process is necessary to access remote storage
-volumes and mount them locally using FUSE. This section documents the
-invocation of the client process and all its command-line arguments.
-
- # glusterfs [options] <mountpoint>
-
- The `mountpoint' is the directory where you want the GlusterFS
-filesystem to appear. Example:
-
- # glusterfs -f /usr/local/etc/glusterfs-client.vol /mnt
-
- The command-line options are detailed below.
-
- Basic Options
-
-`-f, --volfile=<path>'
- Use the volume file as the volume specification.
-
-`-s, --volfile-server=<hostname>'
- Server to get volume file from. This option overrides -volfile
- option.
-
-`-l, --log-file=<path>'
- Specify the path for the log file.
-
-`-L, --log-level=<level>'
- Set the log level for the server. Log level should be one of DEBUG,
- WARNING, ERROR, CRITICAL, or NONE.
-
- Advanced Options
-
-`--debug'
- Run in debug mode. This option sets -no-daemon, -log-level to
- DEBUG and -log-file to console.
-
-`-N, --no-daemon'
- Run `glusterfs' as a foreground process.
-
-`-p, --pid-file=<path>'
- Path for the PID file.
-
-`--volfile-id=<key>'
- 'key' of the volfile to be fetched from server.
-
-`--volfile-server-port=<port-number>'
- Listening port number of volfile server.
-
-`--volfile-server-transport=[socket|ib-verbs]'
- Transport type to get volfile from server. [default: `socket']
-
-`--xlator-options=<volume-name.option=value>'
- Add/override a translator option for a volume with specified value.
-
-`--volume-name=<volume name>'
- Volume name in client spec to use. Defaults to the root volume.
-
- FUSE Options
-
-`--attribute-timeout=<n>'
- Attribute timeout for inodes in the kernel, in seconds. Defaults
- to 1 second.
-
-`--disable-direct-io-mode'
- Disable direct I/O mode in FUSE kernel module.
-
-`-e, --entry-timeout=<n>'
- Entry timeout for directory entries in the kernel, in seconds.
- Defaults to 1 second.
-
- Missellaneous Options
-
-`-?, --help'
- Show this help information.
-
-`-V, --version'
- Show version information.
-
-
-File: user-guide.info, Node: A Tutorial Introduction, Prev: Running GlusterFS, Up: Installation and Invocation
-
-2.5 A Tutorial Introduction
-===========================
-
-This section will show you how to quickly get GlusterFS up and running.
-We'll configure GlusterFS as a simple network filesystem, with one
-server and one client. In this mode of usage, GlusterFS can serve as a
-replacement for NFS.
-
- We'll make use of two machines; call them _server_ and _client_ (If
-you don't want to setup two machines, just run everything that follows
-on the same machine). In the examples that follow, the shell prompts
-will use these names to clarify the machine on which the command is
-being run. For example, a command that should be run on the server will
-be shown with the prompt:
-
- [root@server]#
-
- Our goal is to make a directory on the _server_ (say, `/export')
-accessible to the _client_.
-
- First of all, get GlusterFS installed on both the machines, as
-described in the previous sections. Make sure you have the FUSE kernel
-module loaded. You can ensure this by running:
-
- [root@server]# modprobe fuse
-
- Before we can run the GlusterFS client or server programs, we need
-to write two files called _volume specifications_ (equivalently refered
-to as _volfiles_). The volfile describes the _translator tree_ on a
-node. The next chapter will explain the concepts of `translator' and
-`volume specification' in detail. For now, just assume that the volfile
-is like an NFS `/etc/export' file.
-
- On the server, create a text file somewhere (we'll assume the path
-`/tmp/glusterfsd.vol') with the following contents.
-
- volume colon-o
- type storage/posix
- option directory /export
- end-volume
-
- volume server
- type protocol/server
- subvolumes colon-o
- option transport-type tcp
- option auth.addr.colon-o.allow *
- end-volume
-
- A brief explanation of the file's contents. The first section
-defines a storage volume, named "colon-o" (the volume names are
-arbitrary), which exports the `/export' directory. The second section
-defines options for the translator which will make the storage volume
-accessible remotely. It specifies `colon-o' as a subvolume. This
-defines the _translator tree_, about which more will be said in the
-next chapter. The two options specify that the TCP protocol is to be
-used (as opposed to InfiniBand, for example), and that access to the
-storage volume is to be provided to clients with any IP address at all.
-If you wanted to restrict access to this server to only your subnet for
-example, you'd specify something like `192.168.1.*' in the second
-option line.
-
- On the client machine, create the following text file (again, we'll
-assume the path to be `/tmp/glusterfs-client.vol'). Replace
-_server-ip-address_ with the IP address of your server machine. If you
-are doing all this on a single machine, use `127.0.0.1'.
-
- volume client
- type protocol/client
- option transport-type tcp
- option remote-host _server-ip-address_
- option remote-subvolume colon-o
- end-volume
-
- Now we need to start both the server and client programs. To start
-the server:
-
- [root@server]# glusterfsd -f /tmp/glusterfs-server.vol
-
- To start the client:
-
- [root@client]# glusterfs -f /tmp/glusterfs-client.vol /mnt/glusterfs
-
- You should now be able to see the files under the server's `/export'
-directory in the `/mnt/glusterfs' directory on the client. That's it;
-GlusterFS is now working as a network file system.
-
-
-File: user-guide.info, Node: Concepts, Next: Translators, Prev: Installation and Invocation, Up: Top
-
-3 Concepts
-**********
-
-* Menu:
-
-* Filesystems in Userspace::
-* Translator::
-* Volume specification file::
-
-
-File: user-guide.info, Node: Filesystems in Userspace, Next: Translator, Up: Concepts
-
-3.1 Filesystems in Userspace
-============================
-
-A filesystem is usually implemented in kernel space. Kernel space
-development is much harder than userspace development. FUSE is a kernel
-module/library that allows us to write a filesystem completely in
-userspace.
-
- FUSE consists of a kernel module which interacts with the userspace
-implementation using a device file `/dev/fuse'. When a process makes a
-syscall on a FUSE filesystem, VFS hands the request to the FUSE module,
-which writes the request to `/dev/fuse'. The userspace implementation
-polls `/dev/fuse', and when a request arrives, processes it and writes
-the result back to `/dev/fuse'. The kernel then reads from the device
-file and returns the result to the user process.
-
- In case of GlusterFS, the userspace program is the GlusterFS client.
-The control flow is shown in the diagram below. The GlusterFS client
-services the request by sending it to the server, which in turn hands
-it to the local POSIX filesystem.
-
-
- Fig 1. Control flow in GlusterFS
-
-
-File: user-guide.info, Node: Translator, Next: Volume specification file, Prev: Filesystems in Userspace, Up: Concepts
-
-3.2 Translator
-==============
-
-The _translator_ is the most important concept in GlusterFS. In fact,
-GlusterFS is nothing but a collection of translators working together,
-forming a translator _tree_.
-
- The idea of a translator is perhaps best understood using an
-analogy. Consider the VFS in the Linux kernel. The VFS abstracts the
-various filesystem implementations (such as EXT3, ReiserFS, XFS, etc.)
-supported by the kernel. When an application calls the kernel to
-perform an operation on a file, the kernel passes the request on to the
-appropriate filesystem implementation.
-
- For example, let's say there are two partitions on a Linux machine:
-`/', which is an EXT3 partition, and `/usr', which is a ReiserFS
-partition. Now if an application wants to open a file called, say,
-`/etc/fstab', then the kernel will internally pass the request to the
-EXT3 implementation. If on the other hand, an application wants to
-read a file called `/usr/src/linux/CREDITS', then the kernel will call
-upon the ReiserFS implementation to do the job.
-
- The "filesystem implementation" objects are analogous to GlusterFS
-translators. A GlusterFS translator implements all the filesystem
-operations. Whereas in VFS there is a two-level tree (with the kernel
-at the root and all the filesystem implementation as its children), in
-GlusterFS there exists a more elaborate tree structure.
-
- We can now define translators more precisely. A GlusterFS translator
-is a shared object (`.so') that implements every filesystem call.
-GlusterFS translators can be arranged in an arbitrary tree structure
-(subject to constraints imposed by the translators). When GlusterFS
-receives a filesystem call, it passes it on to the translator at the
-root of the translator tree. The root translator may in turn pass it on
-to any or all of its children, and so on, until the leaf nodes are
-reached. The result of a filesystem call is communicated in the reverse
-fashion, from the leaf nodes up to the root node, and then on to the
-application.
-
- So what might a translator tree look like?
-
-
- Fig 2. A sample translator tree
-
- The diagram depicts three servers and one GlusterFS client. It is
-important to note that conceptually, the translator tree spans machine
-boundaries. Thus, the client machine in the diagram, `10.0.0.1', can
-access the aggregated storage of the filesystems on the server machines
-`10.0.0.2', `10.0.0.3', and `10.0.0.4'. The translator diagram will
-make more sense once you've read the next chapter and understood the
-functions of the various translators.
-
-
-File: user-guide.info, Node: Volume specification file, Prev: Translator, Up: Concepts
-
-3.3 Volume specification file
-=============================
-
-The volume specification file describes the translator tree for both the
-server and client programs.
-
- A volume specification file is a sequence of volume definitions.
-The syntax of a volume definition is explained below:
-
- *volume* _volume-name_
- *type* _translator-name_
- *option* _option-name_ _option-value_
- ...
- *subvolumes* _subvolume1_ _subvolume2_ ...
- *end-volume*
-
- ...
-
-_volume-name_
- An identifier for the volume. This is just a human-readable name,
- and can contain any alphanumeric character. For instance,
- "storage-1", "colon-o", or "forty-two".
-
-_translator-name_
- Name of one of the available translators. Example:
- `protocol/client', `cluster/unify'.
-
-_option-name_
- Name of a valid option for the translator.
-
-_option-value_
- Value for the option. Everything following the "option" keyword to
- the end of the line is considered the value; it is up to the
- translator to parse it.
-
-_subvolume1_, _subvolume2_, ...
- Volume names of sub-volumes. The sub-volumes must already have
- been defined earlier in the file.
-
- There are a few rules you must follow when writing a volume
-specification file:
-
- * Everything following a ``#'' is considered a comment and is
- ignored. Blank lines are also ignored.
-
- * All names and keywords are case-sensitive.
-
- * The order of options inside a volume definition does not matter.
-
- * An option value may not span multiple lines.
-
- * If an option is not specified, it will assume its default value.
-
- * A sub-volume must have already been defined before it can be
- referenced. This means you have to write the specification file
- "bottom-up", starting from the leaf nodes of the translator tree
- and moving up to the root.
-
- A simple example volume specification file is shown below:
-
- # This is a comment line
- volume client
- type protocol/client
- option transport-type tcp
- option remote-host localhost # Also a comment
- option remote-subvolume brick
- # The subvolumes line may be absent
- end-volume
-
- volume iot
- type performance/io-threads
- option thread-count 4
- subvolumes client
- end-volume
-
- volume wb
- type performance/write-behind
- subvolumes iot
- end-volume
-
-
-File: user-guide.info, Node: Translators, Next: Usage Scenarios, Prev: Concepts, Up: Top
-
-4 Translators
-*************
-
-* Menu:
-
-* Storage Translators::
-* Client and Server Translators::
-* Clustering Translators::
-* Performance Translators::
-* Features Translators::
-* Miscellaneous Translators::
-
- This chapter documents all the available GlusterFS translators in
-detail. Each translator section will show its name (for example,
-`cluster/unify'), briefly describe its purpose and workings, and list
-every option accepted by that translator and their meaning.
-
-
-File: user-guide.info, Node: Storage Translators, Next: Client and Server Translators, Up: Translators
-
-4.1 Storage Translators
-=======================
-
-The storage translators form the "backend" for GlusterFS. Currently,
-the only available storage translator is the POSIX translator, which
-stores files on a normal POSIX filesystem. A pleasant consequence of
-this is that your data will still be accessible if GlusterFS crashes or
-cannot be started.
-
- Other storage backends are planned for the future. One of the
-possibilities is an Amazon S3 translator. Amazon S3 is an unlimited
-online storage service accessible through a web services API. The S3
-translator will allow you to access the storage as a normal POSIX
-filesystem. (1)
-
-* Menu:
-
-* POSIX::
-* BDB::
-
- ---------- Footnotes ----------
-
- (1) Some more discussion about this can be found at:
-
-http://developer.amazonwebservices.com/connect/message.jspa?messageID=52873
-
-
-File: user-guide.info, Node: POSIX, Next: BDB, Up: Storage Translators
-
-4.1.1 POSIX
------------
-
- type storage/posix
-
- The `posix' translator uses a normal POSIX filesystem as its
-"backend" to actually store files and directories. This can be any
-filesystem that supports extended attributes (EXT3, ReiserFS, XFS,
-...). Extended attributes are used by some translators to store
-metadata, for example, by the replicate and stripe translators. See
-*note Replicate:: and *note Stripe::, respectively for details.
-
-`directory <path>'
- The directory on the local filesystem which is to be used for
- storage.
-
-
-File: user-guide.info, Node: BDB, Prev: POSIX, Up: Storage Translators
-
-4.1.2 BDB
----------
-
- type storage/bdb
-
- The `BDB' translator uses a Berkeley DB database as its "backend" to
-actually store files as key-value pair in the database and directories
-as regular POSIX directories. Note that BDB does not provide extended
-attribute support for regular files. Do not use BDB as storage
-translator while using any translator that demands extended attributes
-on "backend".
-
-`directory <path>'
- The directory on the local filesystem which is to be used for
- storage.
-
-`mode [cache|persistent] (cache)'
- When BDB is run in `cache' mode, recovery of back-end is not
- completely guaranteed. `persistent' guarantees that BDB can
- recover back-end from Berkeley DB even if GlusterFS crashes.
-
-`errfile <path>'
- The path of the file to be used as `errfile' for Berkeley DB to
- report detailed error messages, if any. Note that all the contents
- of this file will be written by Berkeley DB, not GlusterFS.
-
-`logdir <path>'
-
-
-File: user-guide.info, Node: Client and Server Translators, Next: Clustering Translators, Prev: Storage Translators, Up: Translators
-
-4.2 Client and Server Translators
-=================================
-
-The client and server translator enable GlusterFS to export a
-translator tree over the network or access a remote GlusterFS server.
-These two translators implement GlusterFS's network protocol.
-
-* Menu:
-
-* Transport modules::
-* Client protocol::
-* Server protocol::
-
-
-File: user-guide.info, Node: Transport modules, Next: Client protocol, Up: Client and Server Translators
-
-4.2.1 Transport modules
------------------------
-
-The client and server translators are capable of using any of the
-pluggable transport modules. Currently available transport modules are
-`tcp', which uses a TCP connection between client and server to
-communicate; `ib-sdp', which uses a TCP connection over InfiniBand, and
-`ibverbs', which uses high-speed InfiniBand connections.
-
- Each transport module comes in two different versions, one to be
-used on the server side and the other on the client side.
-
-4.2.1.1 TCP
-...........
-
-The TCP transport module uses a TCP/IP connection between the server
-and the client.
-
- option transport-type tcp
-
- The TCP client module accepts the following options:
-
-`non-blocking-connect [no|off|on|yes] (on)'
- Whether to make the connection attempt asynchronous.
-
-`remote-port <n> (24007)'
- Server port to connect to.
-
-`remote-host <hostname> *'
- Hostname or IP address of the server. If the host name resolves to
- multiple IP addresses, all of them will be tried in a round-robin
- fashion. This feature can be used to implement fail-over.
-
- The TCP server module accepts the following options:
-
-`bind-address <address> (0.0.0.0)'
- The local interface on which the server should listen to requests.
- Default is to listen on all interfaces.
-
-`listen-port <n> (24007)'
- The local port to listen on.
-
-4.2.1.2 IB-SDP
-..............
-
- option transport-type ib-sdp
-
- kernel implements socket interface for ib hardware. SDP is over
-ib-verbs. This module accepts the same options as `tcp'
-
-4.2.1.3 ibverbs
-...............
-
- option transport-type tcp
-
- InfiniBand is a scalable switched fabric interconnect mechanism
-primarily used in high-performance computing. InfiniBand can deliver
-data throughput of the order of 10 Gbit/s, with latencies of 4-5 ms.
-
- The `ib-verbs' transport accesses the InfiniBand hardware through
-the "verbs" API, which is the lowest level of software access possible
-and which gives the highest performance. On InfiniBand hardware, it is
-always best to use `ib-verbs'. Use `ib-sdp' only if you cannot get
-`ib-verbs' working for some reason.
-
- The `ib-verbs' client module accepts the following options:
-
-`non-blocking-connect [no|off|on|yes] (on)'
- Whether to make the connection attempt asynchronous.
-
-`remote-port <n> (24007)'
- Server port to connect to.
-
-`remote-host <hostname> *'
- Hostname or IP address of the server. If the host name resolves to
- multiple IP addresses, all of them will be tried in a round-robin
- fashion. This feature can be used to implement fail-over.
-
- The `ib-verbs' server module accepts the following options:
-
-`bind-address <address> (0.0.0.0)'
- The local interface on which the server should listen to requests.
- Default is to listen on all interfaces.
-
-`listen-port <n> (24007)'
- The local port to listen on.
-
- The following options are common to both the client and server
-modules:
-
- If you are familiar with InfiniBand jargon, the mode is used by
-GlusterFS is "reliable connection-oriented channel transfer".
-
-`ib-verbs-work-request-send-count <n> (64)'
- Length of the send queue in datagrams. [Reason to
- increase/decrease?]
-
-`ib-verbs-work-request-recv-count <n> (64)'
- Length of the receive queue in datagrams. [Reason to
- increase/decrease?]
-
-`ib-verbs-work-request-send-size <size> (128KB)'
- Size of each datagram that is sent. [Reason to increase/decrease?]
-
-`ib-verbs-work-request-recv-size <size> (128KB)'
- Size of each datagram that is received. [Reason to
- increase/decrease?]
-
-`ib-verbs-port <n> (1)'
- Port number for ib-verbs.
-
-`ib-verbs-mtu [256|512|1024|2048|4096] (2048)'
- The Maximum Transmission Unit [Reason to increase/decrease?]
-
-`ib-verbs-device-name <device-name> (first device in the list)'
- InfiniBand device to be used.
-
- For maximum performance, you should ensure that the send/receive
-counts on both the client and server are the same.
-
- ib-verbs is preferred over ib-sdp.
-
-
-File: user-guide.info, Node: Client protocol, Next: Server protocol, Prev: Transport modules, Up: Client and Server Translators
-
-4.2.2 Client
-------------
-
- type procotol/client
-
- The client translator enables the GlusterFS client to access a
-remote server's translator tree.
-
-`transport-type [tcp,ib-sdp,ib-verbs] (tcp)'
- The transport type to use. You should use the client versions of
- all the transport modules (`tcp', `ib-sdp', `ib-verbs').
-
-`remote-subvolume <volume_name> *'
- The name of the volume on the remote host to attach to. Note that
- this is _not_ the name of the `protocol/server' volume on the
- server. It should be any volume under the server.
-
-`transport-timeout <n> (120- seconds)'
- Inactivity timeout. If a reply is expected and no activity takes
- place on the connection within this time, the transport connection
- will be broken, and a new connection will be attempted.
-
-
-File: user-guide.info, Node: Server protocol, Prev: Client protocol, Up: Client and Server Translators
-
-4.2.3 Server
-------------
-
- type protocol/server
-
- The server translator exports a translator tree and makes it
-accessible to remote GlusterFS clients.
-
-`client-volume-filename <path> (<CONFDIR>/glusterfs-client.vol)'
- The volume specification file to use for the client. This is the
- file the client will receive when it is invoked with the
- `--server' option (*note Client::).
-
-`transport-type [tcp,ib-verbs,ib-sdp] (tcp)'
- The transport to use. You should use the server versions of all
- the transport modules (`tcp', `ib-sdp', `ib-verbs').
-
-`auth.addr.<volume name>.allow <IP address wildcard pattern>'
- IP addresses of the clients that are allowed to attach to the
- specified volume. This can be a wildcard. For example, a wildcard
- of the form `192.168.*.*' allows any host in the `192.168.x.x'
- subnet to connect to the server.
-
-
-
-File: user-guide.info, Node: Clustering Translators, Next: Performance Translators, Prev: Client and Server Translators, Up: Translators
-
-4.3 Clustering Translators
-==========================
-
-The clustering translators are the most important GlusterFS
-translators, since it is these that make GlusterFS a cluster
-filesystem. These translators together enable GlusterFS to access an
-arbitrarily large amount of storage, and provide RAID-like redundancy
-and distribution over the entire cluster.
-
- There are three clustering translators: *unify*, *replicate*, and
-*stripe*. The unify translator aggregates storage from many server
-nodes. The replicate translator provides file replication. The stripe
-translator allows a file to be spread across many server nodes. The
-following sections look at each of these translators in detail.
-
-* Menu:
-
-* Unify::
-* Replicate::
-* Stripe::
-
-
-File: user-guide.info, Node: Unify, Next: Replicate, Up: Clustering Translators
-
-4.3.1 Unify
------------
-
- type cluster/unify
-
- The unify translator presents a `unified' view of all its
-sub-volumes. That is, it makes the union of all its sub-volumes appear
-as a single volume. It is the unify translator that gives GlusterFS the
-ability to access an arbitrarily large amount of storage.
-
- For unify to work correctly, certain invariants need to be
-maintained across the entire network. These are:
-
- * The directory structure of all the sub-volumes must be identical.
-
- * A particular file can exist on only one of the sub-volumes.
- Phrasing it in another way, a pathname such as
- `/home/calvin/homework.txt') is unique across the entire cluster.
-
-
-
-Looking at the second requirement, you might wonder how one can
-accomplish storing redundant copies of a file, if no file can exist
-multiple times. To answer, we must remember that these invariants are
-from _unify's perspective_. A translator such as replicate at a lower
-level in the translator tree than unify may subvert this picture.
-
- The first invariant might seem quite tedious to ensure. We shall see
-later that this is not so, since unify's _self-heal_ mechanism takes
-care of maintaining it.
-
- The second invariant implies that unify needs some way to decide
-which file goes where. Unify makes use of _scheduler_ modules for this
-purpose.
-
- When a file needs to be created, unify's scheduler decides upon the
-sub-volume to be used to store the file. There are many schedulers
-available, each using a different algorithm and suitable for different
-purposes.
-
- The various schedulers are described in detail in the sections that
-follow.
-
-4.3.1.1 ALU
-...........
-
- option scheduler alu
-
- ALU stands for "Adaptive Least Usage". It is the most advanced
-scheduler available in GlusterFS. It balances the load across volumes
-taking several factors in account. It adapts itself to changing I/O
-patterns according to its configuration. When properly configured, it
-can eliminate the need for regular tuning of the filesystem to keep
-volume load nicely balanced.
-
- The ALU scheduler is composed of multiple least-usage
-sub-schedulers. Each sub-scheduler keeps track of a certain type of
-load, for each of the sub-volumes, getting statistics from the
-sub-volumes themselves. The sub-schedulers are these:
-
- * disk-usage: The used and free disk space on the volume.
-
- * read-usage: The amount of reading done from this volume.
-
- * write-usage: The amount of writing done to this volume.
-
- * open-files-usage: The number of files currently open from this
- volume.
-
- * disk-speed-usage: The speed at which the disks are spinning. This
- is a constant value and therefore not very useful.
-
- The ALU scheduler needs to know which of these sub-schedulers to use,
-and in which order to evaluate them. This is done through the `option
-alu.order' configuration directive.
-
- Each sub-scheduler needs to know two things: when to kick in (the
-entry-threshold), and how long to stay in control (the exit-threshold).
-For example: when unifying three disks of 100GB, keeping an exact
-balance of disk-usage is not necesary. Instead, there could be a 1GB
-margin, which can be used to nicely balance other factors, such as
-read-usage. The disk-usage scheduler can be told to kick in only when a
-certain threshold of discrepancy is passed, such as 1GB. When it
-assumes control under this condition, it will write all subsequent data
-to the least-used volume. If it is doing so, it is unwise to stop right
-after the values are below the entry-threshold again, since that would
-make it very likely that the situation will occur again very soon. Such
-a situation would cause the ALU to spend most of its time disk-usage
-scheduling, which is unfair to the other sub-schedulers. The
-exit-threshold therefore defines the amount of data that needs to be
-written to the least-used disk, before control is relinquished again.
-
- In addition to the sub-schedulers, the ALU scheduler also has
-"limits" options. These can stop the creation of new files on a volume
-once values drop below a certain threshold. For example, setting
-`option alu.limits.min-free-disk 5GB' will stop the scheduling of files
-to volumes that have less than 5GB of free disk space, leaving the
-files on that disk some room to grow.
-
- The actual values you assign to the thresholds for sub-schedulers and
-limits depend on your situation. If you have fast-growing files, you'll
-want to stop file-creation on a disk much earlier than when hardly any
-of your files are growing. If you care less about disk-usage balance
-than about read-usage balance, you'll want a bigger disk-usage
-scheduler entry-threshold and a smaller read-usage scheduler
-entry-threshold.
-
- For thresholds defining a size, values specifying "KB", "MB" and "GB"
-are allowed. For example: `option alu.limits.min-free-disk 5GB'.
-
-`alu.order <order> * ("disk-usage:write-usage:read-usage:open-files-usage:disk-speed")'
-
-`alu.disk-usage.entry-threshold <size> (1GB)'
-
-`alu.disk-usage.exit-threshold <size> (512MB)'
-
-`alu.write-usage.entry-threshold <%> (25)'
-
-`alu.write-usage.exit-threshold <%> (5)'
-
-`alu.read-usage.entry-threshold <%> (25)'
-
-`alu.read-usage.exit-threshold <%> (5)'
-
-`alu.open-files-usage.entry-threshold <n> (1000)'
-
-`alu.open-files-usage.exit-threshold <n> (100)'
-
-`alu.limits.min-free-disk <%>'
-
-`alu.limits.max-open-files <n>'
-
-4.3.1.2 Round Robin (RR)
-........................
-
- option scheduler rr
-
- Round-Robin (RR) scheduler creates files in a round-robin fashion.
-Each client will have its own round-robin loop. When your files are
-mostly similar in size and I/O access pattern, this scheduler is a good
-choice. RR scheduler checks for free disk space on the server before
-scheduling, so you can know when to add another server node. The
-default value of min-free-disk is 5% and is checked on file creation
-calls, with atleast 10 seconds (by default) elapsing between two checks.
-
- Options:
-`rr.limits.min-free-disk <%> (5)'
- Minimum free disk space a node must have for RR to schedule a file
- to it.
-
-`rr.refresh-interval <t> (10 seconds)'
- Time between two successive free disk space checks.
-
-4.3.1.3 Random
-..............
-
- option scheduler random
-
- The random scheduler schedules file creation randomly among its
-child nodes. Like the round-robin scheduler, it also checks for a
-minimum amount of free disk space before scheduling a file to a node.
-
-`random.limits.min-free-disk <%> (5)'
- Minimum free disk space a node must have for random to schedule a
- file to it.
-
-`random.refresh-interval <t> (10 seconds)'
- Time between two successive free disk space checks.
-
-4.3.1.4 NUFA
-............
-
- option scheduler nufa
-
- It is common in many GlusterFS computing environments for all
-deployed machines to act as both servers and clients. For example, a
-research lab may have 40 workstations each with its own storage. All of
-these workstations might act as servers exporting a volume as well as
-clients accessing the entire cluster's storage. In such a situation,
-it makes sense to store locally created files on the local workstation
-itself (assuming files are accessed most by the workstation that
-created them). The Non-Uniform File Allocation (NUFA) scheduler
-accomplishes that.
-
- NUFA gives the local system first priority for file creation over
-other nodes. If the local volume does not have more free disk space
-than a specified amount (5% by default) then NUFA schedules files among
-the other child volumes in a round-robin fashion.
-
- NUFA is named after the similar strategy used for memory access,
-NUMA(1).
-
-`nufa.limits.min-free-disk <%> (5)'
- Minimum disk space that must be free (local or remote) for NUFA to
- schedule a file to it.
-
-`nufa.refresh-interval <t> (10 seconds)'
- Time between two successive free disk space checks.
-
-`nufa.local-volume-name <volume>'
- The name of the volume corresponding to the local system. This
- volume must be one of the children of the unify volume. This
- option is mandatory.
-
-4.3.1.5 Namespace
-.................
-
-Namespace volume needed because: - persistent inode numbers. - file
-exists even when node is down.
-
- namespace files are simply touched. on every lookup it is checked.
-
-`namespace <volume> *'
- Name of the namespace volume (which should be one of the unify
- volume's children).
-
-`self-heal [on|off] (on)'
- Enable/disable self-heal. Unless you know what you are doing, do
- not disable self-heal.
-
-4.3.1.6 Self Heal
-.................
-
-* When a 'lookup()/stat()' call is made on directory for the first
-time, a self-heal call is made, which checks for the consistancy of its
-child nodes. If an entry is present in storage node, but not in
-namespace, that entry is created in namespace, and vica-versa. There is
-an writedir() API introduced which is used for the same. It also checks
-for permissions, and uid/gid consistencies.
-
- * This check is also done when an server goes down and comes up.
-
- * If one starts with an empty namespace export, but has data in
-storage nodes, a 'find .>/dev/null' or 'ls -lR >/dev/null' should help
-to build namespace in one shot. Even otherwise, namespace is built on
-demand when a file is looked up for the first time.
-
- NOTE: There are some issues (Kernel 'Oops' msgs) seen with
-fuse-2.6.3, when someone deletes namespace in backend, when glusterfs is
-running. But with fuse-2.6.5, this issue is not there.
-
- ---------- Footnotes ----------
-
- (1) Non-Uniform Memory Access:
-<http://en.wikipedia.org/wiki/Non-Uniform_Memory_Access>
-
-
-File: user-guide.info, Node: Replicate, Next: Stripe, Prev: Unify, Up: Clustering Translators
-
-4.3.2 Replicate (formerly AFR)
-------------------------------
-
- type cluster/replicate
-
- Replicate provides RAID-1 like functionality for GlusterFS.
-Replicate replicates files and directories across the subvolumes. Hence
-if Replicate has four subvolumes, there will be four copies of all
-files and directories. Replicate provides high-availability, i.e., in
-case one of the subvolumes go down (e. g. server crash, network
-disconnection) Replicate will still service the requests using the
-redundant copies.
-
- Replicate also provides self-heal functionality, i.e., in case the
-crashed servers come up, the outdated files and directories will be
-updated with the latest versions. Replicate uses extended attributes of
-the backend file system to track the versioning of files and
-directories and provide the self-heal feature.
-
- volume replicate-example
- type cluster/replicate
- subvolumes brick1 brick2 brick3
- end-volume
-
- This sample configuration will replicate all directories and files on
-brick1, brick2 and brick3.
-
- All the read operations happen from the first alive child. If all the
-three sub-volumes are up, reads will be done from brick1; if brick1 is
-down read will be done from brick2. In case read() was being done on
-brick1 and it goes down, replicate transparently falls back to brick2.
-
- The next release of GlusterFS will add the following features:
- * Ability to specify the sub-volume from which read operations are
- to be done (this will help users who have one of the sub-volumes
- as a local storage volume).
-
- * Allow scheduling of read operations amongst the sub-volumes in a
- round-robin fashion.
-
- The order of the subvolumes list should be same across all the
-'replicate's as they will be used for locking purposes.
-
-4.3.2.1 Self Heal
-.................
-
-Replicate has self-heal feature, which updates the outdated file and
-directory copies by the most recent versions. For example consider the
-following config:
-
- volume replicate-example
- type cluster/replicate
- subvolumes brick1 brick2
- end-volume
-
-4.3.2.2 File self-heal
-......................
-
-Now if we create a file foo.txt on replicate-example, the file will be
-created on brick1 and brick2. The file will have two extended
-attributes associated with it in the backend filesystem. One is
-trusted.afr.createtime and the other is trusted.afr.version. The
-trusted.afr.createtime xattr has the create time (in terms of seconds
-since epoch) and trusted.afr.version is a number that is incremented
-each time a file is modified. This increment happens during close
-(incase any write was done before close).
-
- If brick1 goes down, we edit foo.txt the version gets incremented.
-Now the brick1 comes back up, when we open() on foo.txt replicate will
-check if their versions are same. If they are not same, the outdated
-copy is replaced by the latest copy and its version is updated. After
-the sync the open() proceeds in the usual manner and the application
-calling open() can continue on its access to the file.
-
- If brick1 goes down, we delete foo.txt and create a file with the
-same name again i.e foo.txt. Now brick1 comes back up, clearly there is
-a chance that the version on brick1 being more than the version on
-brick2, this is where createtime extended attribute helps in deciding
-which the outdated copy is. Hence we need to consider both createtime
-and version to decide on the latest copy.
-
- The version attribute is incremented during the close() call. Version
-will not be incremented in case there was no write() done. In case the
-fd that the close() gets was got by create() call, we also create the
-createtime extended attribute.
-
-4.3.2.3 Directory self-heal
-...........................
-
-Suppose brick1 goes down, we delete foo.txt, brick1 comes back up, now
-we should not create foo.txt on brick2 but we should delete foo.txt on
-brick1. We handle this situation by having the createtime and version
-attribute on the directory similar to the file. when lookup() is done
-on the directory, we compare the createtime/version attributes of the
-copies and see which files needs to be deleted and delete those files
-and update the extended attributes of the outdated directory copy.
-Each time a directory is modified (a file or a subdirectory is created
-or deleted inside the directory) and one of the subvols is down, we
-increment the directory's version.
-
- lookup() is a call initiated by the kernel on a file or directory
-just before any access to that file or directory. In glusterfs, by
-default, lookup() will not be called in case it was called in the past
-one second on that particular file or directory.
-
- The extended attributes can be seen in the backend filesystem using
-the `getfattr' command. (`getfattr -n trusted.afr.version <file>')
-
-`debug [on|off] (off)'
-
-`self-heal [on|off] (on)'
-
-`replicate <pattern> (*:1)'
-
-`lock-node <child_volume> (first child is used by default)'
-
-
-File: user-guide.info, Node: Stripe, Prev: Replicate, Up: Clustering Translators
-
-4.3.3 Stripe
-------------
-
- type cluster/stripe
-
- The stripe translator distributes the contents of a file over its
-sub-volumes. It does this by creating a file equal in size to the
-total size of the file on each of its sub-volumes. It then writes only
-a part of the file to each sub-volume, leaving the rest of it empty.
-These empty regions are called `holes' in Unix terminology. The holes
-do not consume any disk space.
-
- The diagram below makes this clear.
-
-
-
-You can configure stripe so that only filenames matching a pattern are
-striped. You can also configure the size of the data to be stored on
-each sub-volume.
-
-`block-size <pattern>:<size> (*:0 no striping)'
- Distribute files matching `<pattern>' over the sub-volumes,
- storing at least `<size>' on each sub-volume. For example,
-
- option block-size *.mpg:1M
-
- distributes all files ending in `.mpg', storing at least 1 MB on
- each sub-volume.
-
- Any number of `block-size' option lines may be present, specifying
- different sizes for different file name patterns.
-
-
-File: user-guide.info, Node: Performance Translators, Next: Features Translators, Prev: Clustering Translators, Up: Translators
-
-4.4 Performance Translators
-===========================
-
-* Menu:
-
-* Read Ahead::
-* Write Behind::
-* IO Threads::
-* IO Cache::
-* Booster::
-
-
-File: user-guide.info, Node: Read Ahead, Next: Write Behind, Up: Performance Translators
-
-4.4.1 Read Ahead
-----------------
-
- type performance/read-ahead
-
- The read-ahead translator pre-fetches data in advance on every read.
-This benefits applications that mostly process files in sequential
-order, since the next block of data will already be available by the
-time the application is done with the current one.
-
- Additionally, the read-ahead translator also behaves as a
-read-aggregator. Many small read operations are combined and issued as
-fewer, larger read requests to the server.
-
- Read-ahead deals in "pages" as the unit of data fetched. The page
-size is configurable, as is the "page count", which is the number of
-pages that are pre-fetched.
-
- Read-ahead is best used with InfiniBand (using the ib-verbs
-transport). On FastEthernet and Gigabit Ethernet networks, GlusterFS
-can achieve the link-maximum throughput even without read-ahead, making
-it quite superflous.
-
- Note that read-ahead only happens if the reads are perfectly
-sequential. If your application accesses data in a random fashion,
-using read-ahead might actually lead to a performance loss, since
-read-ahead will pointlessly fetch pages which won't be used by the
-application.
-
- Options:
-`page-size <n> (256KB)'
- The unit of data that is pre-fetched.
-
-`page-count <n> (2)'
- The number of pages that are pre-fetched.
-
-`force-atime-update [on|off|yes|no] (off|no)'
- Whether to force an access time (atime) update on the file on
- every read. Without this, the atime will be slightly imprecise, as
- it will reflect the time when the read-ahead translator read the
- data, not when the application actually read it.
-
-
-File: user-guide.info, Node: Write Behind, Next: IO Threads, Prev: Read Ahead, Up: Performance Translators
-
-4.4.2 Write Behind
-------------------
-
- type performance/write-behind
-
- The write-behind translator improves the latency of a write
-operation. It does this by relegating the write operation to the
-background and returning to the application even as the write is in
-progress. Using the write-behind translator, successive write requests
-can be pipelined. This mode of write-behind operation is best used on
-the client side, to enable decreased write latency for the application.
-
- The write-behind translator can also aggregate write requests. If the
-`aggregate-size' option is specified, then successive writes upto that
-size are accumulated and written in a single operation. This mode of
-operation is best used on the server side, as this will decrease the
-disk's head movement when multiple files are being written to in
-parallel.
-
- The `aggregate-size' option has a default value of 128KB. Although
-this works well for most users, you should always experiment with
-different values to determine the one that will deliver maximum
-performance. This is because the performance of write-behind depends on
-your interconnect, size of RAM, and the work load.
-
-`aggregate-size <n> (128KB)'
- Amount of data to accumulate before doing a write
-
-`flush-behind [on|yes|off|no] (off|no)'
-
-
-File: user-guide.info, Node: IO Threads, Next: IO Cache, Prev: Write Behind, Up: Performance Translators
-
-4.4.3 IO Threads
-----------------
-
- type performance/io-threads
-
- The IO threads translator is intended to increase the responsiveness
-of the server to metadata operations by doing file I/O (read, write) in
-a background thread. Since the GlusterFS server is single-threaded,
-using the IO threads translator can significantly improve performance.
-This translator is best used on the server side, loaded just below the
-server protocol translator.
-
- IO threads operates by handing out read and write requests to a
-separate thread. The total number of threads in existence at a time is
-constant, and configurable.
-
-`thread-count <n> (1)'
- Number of threads to use.
-
-
-File: user-guide.info, Node: IO Cache, Next: Booster, Prev: IO Threads, Up: Performance Translators
-
-4.4.4 IO Cache
---------------
-
- type performance/io-cache
-
- The IO cache translator caches data that has been read. This is
-useful if many applications read the same data multiple times, and if
-reads are much more frequent than writes (for example, IO caching may be
-useful in a web hosting environment, where most clients will simply
-read some files and only a few will write to them).
-
- The IO cache translator reads data from its child in `page-size'
-chunks. It caches data upto `cache-size' bytes. The cache is
-maintained as a prioritized least-recently-used (LRU) list, with
-priorities determined by user-specified patterns to match filenames.
-
- When the IO cache translator detects a write operation, the cache
-for that file is flushed.
-
- The IO cache translator periodically verifies the consistency of
-cached data, using the modification times on the files. The
-verification timeout is configurable.
-
-`page-size <n> (128KB)'
- Size of a page.
-
-`cache-size (n) (32MB)'
- Total amount of data to be cached.
-
-`force-revalidate-timeout <n> (1)'
- Timeout to force a cache consistency verification, in seconds.
-
-`priority <pattern> (*:0)'
- Filename patterns listed in order of priority.
-
-
-File: user-guide.info, Node: Booster, Prev: IO Cache, Up: Performance Translators
-
-4.4.5 Booster
--------------
-
- type performance/booster
-
- The booster translator gives applications a faster path to
-communicate read and write requests to GlusterFS. Normally, all
-requests to GlusterFS from applications go through FUSE, as indicated
-in *note Filesystems in Userspace::. Using the booster translator in
-conjunction with the GlusterFS booster shared library, an application
-can bypass the FUSE path and send read/write requests directly to the
-GlusterFS client process.
-
- The booster mechanism consists of two parts: the booster translator,
-and the booster shared library. The booster translator is meant to be
-loaded on the client side, usually at the root of the translator tree.
-The booster shared library should be `LD_PRELOAD'ed with the
-application.
-
- The booster translator when loaded opens a Unix domain socket and
-listens for read/write requests on it. The booster shared library
-intercepts read and write system calls and sends the requests to the
-GlusterFS process directly using the Unix domain socket, bypassing FUSE.
-This leads to superior performance.
-
- Once you've loaded the booster translator in your volume
-specification file, you can start your application as:
-
- $ LD_PRELOAD=/usr/local/bin/glusterfs-booster.so your_app
-
- The booster translator accepts no options.
-
-
-File: user-guide.info, Node: Features Translators, Next: Miscellaneous Translators, Prev: Performance Translators, Up: Translators
-
-4.5 Features Translators
-========================
-
-* Menu:
-
-* POSIX Locks::
-* Fixed ID::
-
-
-File: user-guide.info, Node: POSIX Locks, Next: Fixed ID, Up: Features Translators
-
-4.5.1 POSIX Locks
------------------
-
- type features/posix-locks
-
- This translator provides storage independent POSIX record locking
-support (`fcntl' locking). Typically you'll want to load this on the
-server side, just above the POSIX storage translator. Using this
-translator you can get both advisory locking and mandatory locking
-support. It also handles `flock()' locks properly.
-
- Caveat: Consider a file that does not have its mandatory locking bits
-(+setgid, -group execution) turned on. Assume that this file is now
-opened by a process on a client that has the write-behind xlator
-loaded. The write-behind xlator does not cache anything for files which
-have mandatory locking enabled, to avoid incoherence. Let's say that
-mandatory locking is now enabled on this file through another client.
-The former client will not know about this change, and write-behind may
-erroneously report a write as being successful when in fact it would
-fail due to the region it is writing to being locked.
-
- There seems to be no easy way to fix this. To work around this
-problem, it is recommended that you never enable the mandatory bits on
-a file while it is open.
-
-`mandatory [on|off] (on)'
- Turns mandatory locking on.
-
-
-File: user-guide.info, Node: Fixed ID, Prev: POSIX Locks, Up: Features Translators
-
-4.5.2 Fixed ID
---------------
-
- type features/fixed-id
-
- The fixed ID translator makes all filesystem requests from the client
-to appear to be coming from a fixed, specified UID/GID, regardless of
-which user actually initiated the request.
-
-`fixed-uid <n> [if not set, not used]'
- The UID to send to the server
-
-`fixed-gid <n> [if not set, not used]'
- The GID to send to the server
-
-
-File: user-guide.info, Node: Miscellaneous Translators, Prev: Features Translators, Up: Translators
-
-4.6 Miscellaneous Translators
-=============================
-
-* Menu:
-
-* ROT-13::
-* Trace::
-
-
-File: user-guide.info, Node: ROT-13, Next: Trace, Up: Miscellaneous Translators
-
-4.6.1 ROT-13
-------------
-
- type encryption/rot-13
-
- ROT-13 is a toy translator that can "encrypt" and "decrypt" file
-contents using the ROT-13 algorithm. ROT-13 is a trivial algorithm that
-rotates each alphabet by thirteen places. Thus, 'A' becomes 'N', 'B'
-becomes 'O', and 'Z' becomes 'M'.
-
- It goes without saying that you shouldn't use this translator if you
-need _real_ encryption (a future release of GlusterFS will have real
-encryption translators).
-
-`encrypt-write [on|off] (on)'
- Whether to encrypt on write
-
-`decrypt-read [on|off] (on)'
- Whether to decrypt on read
-
-
-File: user-guide.info, Node: Trace, Prev: ROT-13, Up: Miscellaneous Translators
-
-4.6.2 Trace
------------
-
- type debug/trace
-
- The trace translator is intended for debugging purposes. When
-loaded, it logs all the system calls received by the server or client
-(wherever trace is loaded), their arguments, and the results. You must
-use a GlusterFS log level of DEBUG (See *note Running GlusterFS::) for
-trace to work.
-
- Sample trace output (lines have been wrapped for readability):
- 2007-10-30 00:08:58 D [trace.c:1579:trace_opendir] trace: callid: 68
- (*this=0x8059e40, loc=0x8091984 {path=/iozone3_283, inode=0x8091f00},
- fd=0x8091d50)
-
- 2007-10-30 00:08:58 D [trace.c:630:trace_opendir_cbk] trace:
- (*this=0x8059e40, op_ret=4, op_errno=1, fd=0x8091d50)
-
- 2007-10-30 00:08:58 D [trace.c:1602:trace_readdir] trace: callid: 69
- (*this=0x8059e40, size=4096, offset=0 fd=0x8091d50)
-
- 2007-10-30 00:08:58 D [trace.c:215:trace_readdir_cbk] trace:
- (*this=0x8059e40, op_ret=0, op_errno=0, count=4)
-
- 2007-10-30 00:08:58 D [trace.c:1624:trace_closedir] trace: callid: 71
- (*this=0x8059e40, *fd=0x8091d50)
-
- 2007-10-30 00:08:58 D [trace.c:809:trace_closedir_cbk] trace:
- (*this=0x8059e40, op_ret=0, op_errno=1)
-
-
-File: user-guide.info, Node: Usage Scenarios, Next: Troubleshooting, Prev: Translators, Up: Top
-
-5 Usage Scenarios
-*****************
-
-5.1 Advanced Striping
-=====================
-
-This section is based on the Advanced Striping tutorial written by
-Anand Avati on the GlusterFS wiki (1).
-
-5.1.1 Mixed Storage Requirements
---------------------------------
-
-There are two ways of scheduling the I/O. One at file level (using
-unify translator) and other at block level (using stripe translator).
-Striped I/O is good for files that are potentially large and require
-high parallel throughput (for example, a single file of 400GB being
-accessed by 100s and 1000s of systems simultaneously and randomly). For
-most of the cases, file level scheduling works best.
-
- In the real world, it is desirable to mix file level and block level
-scheduling on a single storage volume. Alternatively users can choose
-to have two separate volumes and hence two mount points, but the
-applications may demand a single storage system to host both.
-
- This document explains how to mix file level scheduling with stripe.
-
-5.1.2 Configuration Brief
--------------------------
-
-This setup demonstrates how users can configure unify translator with
-appropriate I/O scheduler for file level scheduling and strip for only
-matching patterns. This way, GlusterFS chooses appropriate I/O profile
-and knows how to efficiently handle both the types of data.
-
- A simple technique to achieve this effect is to create a stripe set
-of unify and stripe blocks, where unify is the first sub-volume. Files
-that do not match the stripe policy passed on to first unify sub-volume
-and inturn scheduled arcoss the cluster using its file level I/O
-scheduler.
-
- 5.1.3 Preparing GlusterFS Envoronment
--------------------------------------
-
-Create the directories /export/namespace, /export/unify and
-/export/stripe on all the storage bricks.
-
- Place the following server and client volume spec file under
-/etc/glusterfs (or appropriate installed path) and replace the IP
-addresses / access control fields to match your environment.
-
- ## file: /etc/glusterfs/glusterfsd.vol
- volume posix-unify
- type storage/posix
- option directory /export/for-unify
- end-volume
-
- volume posix-stripe
- type storage/posix
- option directory /export/for-stripe
- end-volume
-
- volume posix-namespace
- type storage/posix
- option directory /export/for-namespace
- end-volume
-
- volume server
- type protocol/server
- option transport-type tcp
- option auth.addr.posix-unify.allow 192.168.1.*
- option auth.addr.posix-stripe.allow 192.168.1.*
- option auth.addr.posix-namespace.allow 192.168.1.*
- subvolumes posix-unify posix-stripe posix-namespace
- end-volume
-
- ## file: /etc/glusterfs/glusterfs.vol
- volume client-namespace
- type protocol/client
- option transport-type tcp
- option remote-host 192.168.1.1
- option remote-subvolume posix-namespace
- end-volume
-
- volume client-unify-1
- type protocol/client
- option transport-type tcp
- option remote-host 192.168.1.1
- option remote-subvolume posix-unify
- end-volume
-
- volume client-unify-2
- type protocol/client
- option transport-type tcp
- option remote-host 192.168.1.2
- option remote-subvolume posix-unify
- end-volume
-
- volume client-unify-3
- type protocol/client
- option transport-type tcp
- option remote-host 192.168.1.3
- option remote-subvolume posix-unify
- end-volume
-
- volume client-unify-4
- type protocol/client
- option transport-type tcp
- option remote-host 192.168.1.4
- option remote-subvolume posix-unify
- end-volume
-
- volume client-stripe-1
- type protocol/client
- option transport-type tcp
- option remote-host 192.168.1.1
- option remote-subvolume posix-stripe
- end-volume
-
- volume client-stripe-2
- type protocol/client
- option transport-type tcp
- option remote-host 192.168.1.2
- option remote-subvolume posix-stripe
- end-volume
-
- volume client-stripe-3
- type protocol/client
- option transport-type tcp
- option remote-host 192.168.1.3
- option remote-subvolume posix-stripe
- end-volume
-
- volume client-stripe-4
- type protocol/client
- option transport-type tcp
- option remote-host 192.168.1.4
- option remote-subvolume posix-stripe
- end-volume
-
- volume unify
- type cluster/unify
- option scheduler rr
- subvolumes cluster-unify-1 cluster-unify-2 cluster-unify-3 cluster-unify-4
- end-volume
-
- volume stripe
- type cluster/stripe
- option block-size *.img:2MB # All files ending with .img are striped with 2MB stripe block size.
- subvolumes unify cluster-stripe-1 cluster-stripe-2 cluster-stripe-3 cluster-stripe-4
- end-volume
-
- Bring up the Storage
-
- Starting GlusterFS Server: If you have installed through binary
-package, you can start the service through init.d startup script. If
-not:
-
- [root@server]# glusterfsd
-
- Mounting GlusterFS Volumes:
-
- [root@client]# glusterfs -s [BRICK-IP-ADDRESS] /mnt/cluster
-
- Improving upon this Setup
-
- Infiniband Verbs RDMA transport is much faster than TCP/IP GigE
-transport.
-
- Use of performance translators such as read-ahead, write-behind,
-io-cache, io-threads, booster is recommended.
-
- Replace round-robin (rr) scheduler with ALU to handle more dynamic
-storage environments.
-
- ---------- Footnotes ----------
-
- (1)
-http://gluster.org/docs/index.php/Mixing_Striped_and_Regular_Files
-
-
-File: user-guide.info, Node: Troubleshooting, Next: GNU Free Documentation Licence, Prev: Usage Scenarios, Up: Top
-
-6 Troubleshooting
-*****************
-
-This chapter is a general troubleshooting guide to GlusterFS. It lists
-common GlusterFS server and client error messages, debugging hints, and
-concludes with the suggested procedure to report bugs in GlusterFS.
-
-6.1 GlusterFS error messages
-============================
-
-6.1.1 Server errors
--------------------
-
- glusterfsd: FATAL: could not open specfile:
- '/etc/glusterfs/glusterfsd.vol'
-
- The GlusterFS server expects the volume specification file to be at
-`/etc/glusterfs/glusterfsd.vol'. The example specification file will be
-installed as `/etc/glusterfs/glusterfsd.vol.sample'. You need to edit
-it and rename it, or provide a different specification file using the
-`--spec-file' command line option (See *note Server::).
-
- gf_log_init: failed to open logfile "/usr/var/log/glusterfs/glusterfsd.log"
- (Permission denied)
-
- You don't have permission to create files in the
-`/usr/var/log/glusterfs' directory. Make sure you are running GlusterFS
-as root. Alternatively, specify a different path for the log file using
-the `--log-file' option (See *note Server::).
-
-6.1.2 Client errors
--------------------
-
- fusermount: failed to access mountpoint /mnt:
- Transport endpoint is not connected
-
- A previous failed (or hung) mount of GlusterFS is preventing it from
-being mounted again in the same location. The fix is to do:
-
- # umount /mnt
-
- and try mounting again.
-
- *"Transport endpoint is not connected".*
-
- If you get this error when you try a command such as `ls' or `cat',
-it means the GlusterFS mount did not succeed. Try running GlusterFS in
-`DEBUG' logging level and study the log messages to discover the cause.
-
- *"Connect to server failed", "SERVER-ADDRESS: Connection refused".*
-
- GluserFS Server is not running or dead. Check your network
-connections and firewall settings. To check if the server is reachable,
-try:
-
- telnet IP-ADDRESS 24007
-
- If the server is accessible, your `telnet' command should connect and
-block. If not you will see an error message such as `telnet: Unable to
-connect to remote host: Connection refused'. 24007 is the default
-GlusterFS port. If you have changed it, then use the corresponding port
-instead.
-
- gf_log_init: failed to open logfile "/usr/var/log/glusterfs/glusterfs.log"
- (Permission denied)
-
- You don't have permission to create files in the
-`/usr/var/log/glusterfs' directory. Make sure you are running GlusterFS
-as root. Alternatively, specify a different path for the log file using
-the `--log-file' option (See *note Client::).
-
-6.2 FUSE error messages
-=======================
-
-`modprobe fuse' fails with: "Unknown symbol in module, or unknown
-parameter".
-
- If you are using fuse-2.6.x on Redhat Enterprise Linux Work Station 4
-and Advanced Server 4 with 2.6.9-42.ELlargesmp, 2.6.9-42.ELsmp,
-2.6.9-42.EL kernels and get this error while loading FUSE kernel
-module, you need to apply the following patch.
-
- For fuse-2.6.2:
-
-<http://ftp.gluster.com/pub/gluster/glusterfs/fuse/fuse-2.6.2-rhel-build.patch>
-
- For fuse-2.6.3:
-
-<http://ftp.gluster.com/pub/gluster/glusterfs/fuse/fuse-2.6.3-rhel-build.patch>
-
-6.3 AppArmour and GlusterFS
-===========================
-
-Under OpenSuSE GNU/Linux, the AppArmour security feature does not allow
-GlusterFS to create temporary files or network socket connections even
-while running as root. You will see error messages like `Unable to open
-log file: Operation not permitted' or `Connection refused'. Disabling
-AppArmour using YaST or properly configuring AppArmour to recognize
-`glusterfsd' or `glusterfs'/`fusermount' should solve the problem.
-
-6.4 Reporting a bug
-===================
-
-If you encounter a bug in GlusterFS, please follow the below guidelines
-when you report it to the mailing list. Be sure to report it! User
-feedback is crucial to the health of the project and we value it highly.
-
-6.4.1 General instructions
---------------------------
-
-When running GlusterFS in a non-production environment, be sure to
-build it with the following command:
-
- $ make CFLAGS='-g -O0 -DDEBUG'
-
- This includes debugging information which will be helpful in getting
-backtraces (see below) and also disable optimization. Enabling
-optimization can result in incorrect line numbers being reported to gdb.
-
-6.4.2 Volume specification files
---------------------------------
-
-Attach all relevant server and client spec files you were using when
-you encountered the bug. Also tell us details of your setup, i.e., how
-many clients and how many servers.
-
-6.4.3 Log files
----------------
-
-Set the loglevel of your client and server programs to DEBUG (by
-passing the -L DEBUG option) and attach the log files with your bug
-report. Obviously, if only the client is failing (for example), you
-only need to send us the client log file.
-
-6.4.4 Backtrace
----------------
-
-If GlusterFS has encountered a segmentation fault or has crashed for
-some other reason, include the backtrace with the bug report. You can
-get the backtrace using the following procedure.
-
- Run the GlusterFS client or server inside gdb.
-
- $ gdb ./glusterfs
- (gdb) set args -f client.spec -N -l/path/to/log/file -LDEBUG /mnt/point
- (gdb) run
-
- Now when the process segfaults, you can get the backtrace by typing:
-
- (gdb) bt
-
- If the GlusterFS process has crashed and dumped a core file (you can
-find this in / if running as a daemon and in the current directory
-otherwise), you can do:
-
- $ gdb /path/to/glusterfs /path/to/core.<pid>
-
- and then get the backtrace.
-
- If the GlusterFS server or client seems to be hung, then you can get
-the backtrace by attaching gdb to the process. First get the `PID' of
-the process (using ps), and then do:
-
- $ gdb ./glusterfs <pid>
-
- Press Ctrl-C to interrupt the process and then generate the
-backtrace.
-
-6.4.5 Reproducing the bug
--------------------------
-
-If the bug is reproducible, please include the steps necessary to do
-so. If the bug is not reproducible, send us the bug report anyway.
-
-6.4.6 Other information
------------------------
-
-If you think it is relevant, send us also the version of FUSE you're
-using, the kernel version, platform.
-
-
-File: user-guide.info, Node: GNU Free Documentation Licence, Next: Index, Prev: Troubleshooting, Up: Top
-
-Appendix A GNU Free Documentation Licence
-*****************************************
-
- Version 1.2, November 2002
-
- Copyright (C) 2000,2001,2002 Free Software Foundation, Inc.
- 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA
-
- Everyone is permitted to copy and distribute verbatim copies
- of this license document, but changing it is not allowed.
-
- 0. PREAMBLE
-
- The purpose of this License is to make a manual, textbook, or other
- functional and useful document "free" in the sense of freedom: to
- assure everyone the effective freedom to copy and redistribute it,
- with or without modifying it, either commercially or
- noncommercially. Secondarily, this License preserves for the
- author and publisher a way to get credit for their work, while not
- being considered responsible for modifications made by others.
-
- This License is a kind of "copyleft", which means that derivative
- works of the document must themselves be free in the same sense.
- It complements the GNU General Public License, which is a copyleft
- license designed for free software.
-
- We have designed this License in order to use it for manuals for
- free software, because free software needs free documentation: a
- free program should come with manuals providing the same freedoms
- that the software does. But this License is not limited to
- software manuals; it can be used for any textual work, regardless
- of subject matter or whether it is published as a printed book.
- We recommend this License principally for works whose purpose is
- instruction or reference.
-
- 1. APPLICABILITY AND DEFINITIONS
-
- This License applies to any manual or other work, in any medium,
- that contains a notice placed by the copyright holder saying it
- can be distributed under the terms of this License. Such a notice
- grants a world-wide, royalty-free license, unlimited in duration,
- to use that work under the conditions stated herein. The
- "Document", below, refers to any such manual or work. Any member
- of the public is a licensee, and is addressed as "you". You
- accept the license if you copy, modify or distribute the work in a
- way requiring permission under copyright law.
-
- A "Modified Version" of the Document means any work containing the
- Document or a portion of it, either copied verbatim, or with
- modifications and/or translated into another language.
-
- A "Secondary Section" is a named appendix or a front-matter section
- of the Document that deals exclusively with the relationship of the
- publishers or authors of the Document to the Document's overall
- subject (or to related matters) and contains nothing that could
- fall directly within that overall subject. (Thus, if the Document
- is in part a textbook of mathematics, a Secondary Section may not
- explain any mathematics.) The relationship could be a matter of
- historical connection with the subject or with related matters, or
- of legal, commercial, philosophical, ethical or political position
- regarding them.
-
- The "Invariant Sections" are certain Secondary Sections whose
- titles are designated, as being those of Invariant Sections, in
- the notice that says that the Document is released under this
- License. If a section does not fit the above definition of
- Secondary then it is not allowed to be designated as Invariant.
- The Document may contain zero Invariant Sections. If the Document
- does not identify any Invariant Sections then there are none.
-
- The "Cover Texts" are certain short passages of text that are
- listed, as Front-Cover Texts or Back-Cover Texts, in the notice
- that says that the Document is released under this License. A
- Front-Cover Text may be at most 5 words, and a Back-Cover Text may
- be at most 25 words.
-
- A "Transparent" copy of the Document means a machine-readable copy,
- represented in a format whose specification is available to the
- general public, that is suitable for revising the document
- straightforwardly with generic text editors or (for images
- composed of pixels) generic paint programs or (for drawings) some
- widely available drawing editor, and that is suitable for input to
- text formatters or for automatic translation to a variety of
- formats suitable for input to text formatters. A copy made in an
- otherwise Transparent file format whose markup, or absence of
- markup, has been arranged to thwart or discourage subsequent
- modification by readers is not Transparent. An image format is
- not Transparent if used for any substantial amount of text. A
- copy that is not "Transparent" is called "Opaque".
-
- Examples of suitable formats for Transparent copies include plain
- ASCII without markup, Texinfo input format, LaTeX input format,
- SGML or XML using a publicly available DTD, and
- standard-conforming simple HTML, PostScript or PDF designed for
- human modification. Examples of transparent image formats include
- PNG, XCF and JPG. Opaque formats include proprietary formats that
- can be read and edited only by proprietary word processors, SGML or
- XML for which the DTD and/or processing tools are not generally
- available, and the machine-generated HTML, PostScript or PDF
- produced by some word processors for output purposes only.
-
- The "Title Page" means, for a printed book, the title page itself,
- plus such following pages as are needed to hold, legibly, the
- material this License requires to appear in the title page. For
- works in formats which do not have any title page as such, "Title
- Page" means the text near the most prominent appearance of the
- work's title, preceding the beginning of the body of the text.
-
- A section "Entitled XYZ" means a named subunit of the Document
- whose title either is precisely XYZ or contains XYZ in parentheses
- following text that translates XYZ in another language. (Here XYZ
- stands for a specific section name mentioned below, such as
- "Acknowledgements", "Dedications", "Endorsements", or "History".)
- To "Preserve the Title" of such a section when you modify the
- Document means that it remains a section "Entitled XYZ" according
- to this definition.
-
- The Document may include Warranty Disclaimers next to the notice
- which states that this License applies to the Document. These
- Warranty Disclaimers are considered to be included by reference in
- this License, but only as regards disclaiming warranties: any other
- implication that these Warranty Disclaimers may have is void and
- has no effect on the meaning of this License.
-
- 2. VERBATIM COPYING
-
- You may copy and distribute the Document in any medium, either
- commercially or noncommercially, provided that this License, the
- copyright notices, and the license notice saying this License
- applies to the Document are reproduced in all copies, and that you
- add no other conditions whatsoever to those of this License. You
- may not use technical measures to obstruct or control the reading
- or further copying of the copies you make or distribute. However,
- you may accept compensation in exchange for copies. If you
- distribute a large enough number of copies you must also follow
- the conditions in section 3.
-
- You may also lend copies, under the same conditions stated above,
- and you may publicly display copies.
-
- 3. COPYING IN QUANTITY
-
- If you publish printed copies (or copies in media that commonly
- have printed covers) of the Document, numbering more than 100, and
- the Document's license notice requires Cover Texts, you must
- enclose the copies in covers that carry, clearly and legibly, all
- these Cover Texts: Front-Cover Texts on the front cover, and
- Back-Cover Texts on the back cover. Both covers must also clearly
- and legibly identify you as the publisher of these copies. The
- front cover must present the full title with all words of the
- title equally prominent and visible. You may add other material
- on the covers in addition. Copying with changes limited to the
- covers, as long as they preserve the title of the Document and
- satisfy these conditions, can be treated as verbatim copying in
- other respects.
-
- If the required texts for either cover are too voluminous to fit
- legibly, you should put the first ones listed (as many as fit
- reasonably) on the actual cover, and continue the rest onto
- adjacent pages.
-
- If you publish or distribute Opaque copies of the Document
- numbering more than 100, you must either include a
- machine-readable Transparent copy along with each Opaque copy, or
- state in or with each Opaque copy a computer-network location from
- which the general network-using public has access to download
- using public-standard network protocols a complete Transparent
- copy of the Document, free of added material. If you use the
- latter option, you must take reasonably prudent steps, when you
- begin distribution of Opaque copies in quantity, to ensure that
- this Transparent copy will remain thus accessible at the stated
- location until at least one year after the last time you
- distribute an Opaque copy (directly or through your agents or
- retailers) of that edition to the public.
-
- It is requested, but not required, that you contact the authors of
- the Document well before redistributing any large number of
- copies, to give them a chance to provide you with an updated
- version of the Document.
-
- 4. MODIFICATIONS
-
- You may copy and distribute a Modified Version of the Document
- under the conditions of sections 2 and 3 above, provided that you
- release the Modified Version under precisely this License, with
- the Modified Version filling the role of the Document, thus
- licensing distribution and modification of the Modified Version to
- whoever possesses a copy of it. In addition, you must do these
- things in the Modified Version:
-
- A. Use in the Title Page (and on the covers, if any) a title
- distinct from that of the Document, and from those of
- previous versions (which should, if there were any, be listed
- in the History section of the Document). You may use the
- same title as a previous version if the original publisher of
- that version gives permission.
-
- B. List on the Title Page, as authors, one or more persons or
- entities responsible for authorship of the modifications in
- the Modified Version, together with at least five of the
- principal authors of the Document (all of its principal
- authors, if it has fewer than five), unless they release you
- from this requirement.
-
- C. State on the Title page the name of the publisher of the
- Modified Version, as the publisher.
-
- D. Preserve all the copyright notices of the Document.
-
- E. Add an appropriate copyright notice for your modifications
- adjacent to the other copyright notices.
-
- F. Include, immediately after the copyright notices, a license
- notice giving the public permission to use the Modified
- Version under the terms of this License, in the form shown in
- the Addendum below.
-
- G. Preserve in that license notice the full lists of Invariant
- Sections and required Cover Texts given in the Document's
- license notice.
-
- H. Include an unaltered copy of this License.
-
- I. Preserve the section Entitled "History", Preserve its Title,
- and add to it an item stating at least the title, year, new
- authors, and publisher of the Modified Version as given on
- the Title Page. If there is no section Entitled "History" in
- the Document, create one stating the title, year, authors,
- and publisher of the Document as given on its Title Page,
- then add an item describing the Modified Version as stated in
- the previous sentence.
-
- J. Preserve the network location, if any, given in the Document
- for public access to a Transparent copy of the Document, and
- likewise the network locations given in the Document for
- previous versions it was based on. These may be placed in
- the "History" section. You may omit a network location for a
- work that was published at least four years before the
- Document itself, or if the original publisher of the version
- it refers to gives permission.
-
- K. For any section Entitled "Acknowledgements" or "Dedications",
- Preserve the Title of the section, and preserve in the
- section all the substance and tone of each of the contributor
- acknowledgements and/or dedications given therein.
-
- L. Preserve all the Invariant Sections of the Document,
- unaltered in their text and in their titles. Section numbers
- or the equivalent are not considered part of the section
- titles.
-
- M. Delete any section Entitled "Endorsements". Such a section
- may not be included in the Modified Version.
-
- N. Do not retitle any existing section to be Entitled
- "Endorsements" or to conflict in title with any Invariant
- Section.
-
- O. Preserve any Warranty Disclaimers.
-
- If the Modified Version includes new front-matter sections or
- appendices that qualify as Secondary Sections and contain no
- material copied from the Document, you may at your option
- designate some or all of these sections as invariant. To do this,
- add their titles to the list of Invariant Sections in the Modified
- Version's license notice. These titles must be distinct from any
- other section titles.
-
- You may add a section Entitled "Endorsements", provided it contains
- nothing but endorsements of your Modified Version by various
- parties--for example, statements of peer review or that the text
- has been approved by an organization as the authoritative
- definition of a standard.
-
- You may add a passage of up to five words as a Front-Cover Text,
- and a passage of up to 25 words as a Back-Cover Text, to the end
- of the list of Cover Texts in the Modified Version. Only one
- passage of Front-Cover Text and one of Back-Cover Text may be
- added by (or through arrangements made by) any one entity. If the
- Document already includes a cover text for the same cover,
- previously added by you or by arrangement made by the same entity
- you are acting on behalf of, you may not add another; but you may
- replace the old one, on explicit permission from the previous
- publisher that added the old one.
-
- The author(s) and publisher(s) of the Document do not by this
- License give permission to use their names for publicity for or to
- assert or imply endorsement of any Modified Version.
-
- 5. COMBINING DOCUMENTS
-
- You may combine the Document with other documents released under
- this License, under the terms defined in section 4 above for
- modified versions, provided that you include in the combination
- all of the Invariant Sections of all of the original documents,
- unmodified, and list them all as Invariant Sections of your
- combined work in its license notice, and that you preserve all
- their Warranty Disclaimers.
-
- The combined work need only contain one copy of this License, and
- multiple identical Invariant Sections may be replaced with a single
- copy. If there are multiple Invariant Sections with the same name
- but different contents, make the title of each such section unique
- by adding at the end of it, in parentheses, the name of the
- original author or publisher of that section if known, or else a
- unique number. Make the same adjustment to the section titles in
- the list of Invariant Sections in the license notice of the
- combined work.
-
- In the combination, you must combine any sections Entitled
- "History" in the various original documents, forming one section
- Entitled "History"; likewise combine any sections Entitled
- "Acknowledgements", and any sections Entitled "Dedications". You
- must delete all sections Entitled "Endorsements."
-
- 6. COLLECTIONS OF DOCUMENTS
-
- You may make a collection consisting of the Document and other
- documents released under this License, and replace the individual
- copies of this License in the various documents with a single copy
- that is included in the collection, provided that you follow the
- rules of this License for verbatim copying of each of the
- documents in all other respects.
-
- You may extract a single document from such a collection, and
- distribute it individually under this License, provided you insert
- a copy of this License into the extracted document, and follow
- this License in all other respects regarding verbatim copying of
- that document.
-
- 7. AGGREGATION WITH INDEPENDENT WORKS
-
- A compilation of the Document or its derivatives with other
- separate and independent documents or works, in or on a volume of
- a storage or distribution medium, is called an "aggregate" if the
- copyright resulting from the compilation is not used to limit the
- legal rights of the compilation's users beyond what the individual
- works permit. When the Document is included in an aggregate, this
- License does not apply to the other works in the aggregate which
- are not themselves derivative works of the Document.
-
- If the Cover Text requirement of section 3 is applicable to these
- copies of the Document, then if the Document is less than one half
- of the entire aggregate, the Document's Cover Texts may be placed
- on covers that bracket the Document within the aggregate, or the
- electronic equivalent of covers if the Document is in electronic
- form. Otherwise they must appear on printed covers that bracket
- the whole aggregate.
-
- 8. TRANSLATION
-
- Translation is considered a kind of modification, so you may
- distribute translations of the Document under the terms of section
- 4. Replacing Invariant Sections with translations requires special
- permission from their copyright holders, but you may include
- translations of some or all Invariant Sections in addition to the
- original versions of these Invariant Sections. You may include a
- translation of this License, and all the license notices in the
- Document, and any Warranty Disclaimers, provided that you also
- include the original English version of this License and the
- original versions of those notices and disclaimers. In case of a
- disagreement between the translation and the original version of
- this License or a notice or disclaimer, the original version will
- prevail.
-
- If a section in the Document is Entitled "Acknowledgements",
- "Dedications", or "History", the requirement (section 4) to
- Preserve its Title (section 1) will typically require changing the
- actual title.
-
- 9. TERMINATION
-
- You may not copy, modify, sublicense, or distribute the Document
- except as expressly provided for under this License. Any other
- attempt to copy, modify, sublicense or distribute the Document is
- void, and will automatically terminate your rights under this
- License. However, parties who have received copies, or rights,
- from you under this License will not have their licenses
- terminated so long as such parties remain in full compliance.
-
- 10. FUTURE REVISIONS OF THIS LICENSE
-
- The Free Software Foundation may publish new, revised versions of
- the GNU Free Documentation License from time to time. Such new
- versions will be similar in spirit to the present version, but may
- differ in detail to address new problems or concerns. See
- `http://www.gnu.org/copyleft/'.
-
- Each version of the License is given a distinguishing version
- number. If the Document specifies that a particular numbered
- version of this License "or any later version" applies to it, you
- have the option of following the terms and conditions either of
- that specified version or of any later version that has been
- published (not as a draft) by the Free Software Foundation. If
- the Document does not specify a version number of this License,
- you may choose any version ever published (not as a draft) by the
- Free Software Foundation.
-
-A.0.1 ADDENDUM: How to use this License for your documents
-----------------------------------------------------------
-
-To use this License in a document you have written, include a copy of
-the License in the document and put the following copyright and license
-notices just after the title page:
-
- Copyright (C) YEAR YOUR NAME.
- Permission is granted to copy, distribute and/or modify this document
- under the terms of the GNU Free Documentation License, Version 1.2
- or any later version published by the Free Software Foundation;
- with no Invariant Sections, no Front-Cover Texts, and no Back-Cover
- Texts. A copy of the license is included in the section entitled ``GNU
- Free Documentation License''.
-
- If you have Invariant Sections, Front-Cover Texts and Back-Cover
-Texts, replace the "with...Texts." line with this:
-
- with the Invariant Sections being LIST THEIR TITLES, with
- the Front-Cover Texts being LIST, and with the Back-Cover Texts
- being LIST.
-
- If you have Invariant Sections without Cover Texts, or some other
-combination of the three, merge those two alternatives to suit the
-situation.
-
- If your document contains nontrivial examples of program code, we
-recommend releasing these examples in parallel under your choice of
-free software license, such as the GNU General Public License, to
-permit their use in free software.
-
-
-File: user-guide.info, Node: Index, Prev: GNU Free Documentation Licence, Up: Top
-
-Index
-*****
-
-
-* Menu:
-
-* alu (scheduler): Unify. (line 49)
-* AppArmour: Troubleshooting. (line 96)
-* arch: Getting GlusterFS. (line 6)
-* booster: Booster. (line 6)
-* commercial support: Introduction. (line 36)
-* DNS round robin: Transport modules. (line 29)
-* fcntl: POSIX Locks. (line 6)
-* FDL, GNU Free Documentation License: GNU Free Documentation Licence.
- (line 6)
-* fixed-id (translator): Fixed ID. (line 6)
-* GlusterFS client: Client. (line 6)
-* GlusterFS mailing list: Introduction. (line 28)
-* GlusterFS server: Server. (line 6)
-* infiniband transport: Transport modules. (line 58)
-* InfiniBand, installation: Pre requisites. (line 51)
-* io-cache (translator): IO Cache. (line 6)
-* io-threads (translator): IO Threads. (line 6)
-* IRC channel, #gluster: Introduction. (line 31)
-* libibverbs: Pre requisites. (line 51)
-* namespace: Unify. (line 207)
-* nufa (scheduler): Unify. (line 175)
-* OpenSuSE: Troubleshooting. (line 96)
-* posix-locks (translator): POSIX Locks. (line 6)
-* random (scheduler): Unify. (line 159)
-* read-ahead (translator): Read Ahead. (line 6)
-* record locking: POSIX Locks. (line 6)
-* Redhat Enterprise Linux: Troubleshooting. (line 78)
-* Replicate: Replicate. (line 6)
-* rot-13 (translator): ROT-13. (line 6)
-* rr (scheduler): Unify. (line 138)
-* scheduler (unify): Unify. (line 6)
-* self heal (replicate): Replicate. (line 46)
-* self heal (unify): Unify. (line 223)
-* stripe (translator): Stripe. (line 6)
-* trace (translator): Trace. (line 6)
-* unify (translator): Unify. (line 6)
-* unify invariants: Unify. (line 16)
-* write-behind (translator): Write Behind. (line 6)
-* Gluster, Inc.: Introduction. (line 36)
-
-
-
-Tag Table:
-Node: Top704
-Node: Acknowledgements2304
-Node: Introduction3214
-Node: Installation and Invocation4649
-Node: Pre requisites4933
-Node: Getting GlusterFS7023
-Ref: Getting GlusterFS-Footnote-17809
-Node: Building7857
-Node: Running GlusterFS9559
-Node: Server9770
-Node: Client11358
-Node: A Tutorial Introduction13564
-Node: Concepts17101
-Node: Filesystems in Userspace17316
-Node: Translator18457
-Node: Volume specification file21160
-Node: Translators23632
-Node: Storage Translators24201
-Ref: Storage Translators-Footnote-125008
-Node: POSIX25142
-Node: BDB25765
-Node: Client and Server Translators26822
-Node: Transport modules27298
-Node: Client protocol31445
-Node: Server protocol32384
-Node: Clustering Translators33373
-Node: Unify34260
-Ref: Unify-Footnote-143859
-Node: Replicate43951
-Node: Stripe49006
-Node: Performance Translators50164
-Node: Read Ahead50438
-Node: Write Behind52170
-Node: IO Threads53579
-Node: IO Cache54367
-Node: Booster55691
-Node: Features Translators57105
-Node: POSIX Locks57333
-Node: Fixed ID58650
-Node: Miscellaneous Translators59136
-Node: ROT-1359334
-Node: Trace60013
-Node: Usage Scenarios61282
-Ref: Usage Scenarios-Footnote-167215
-Node: Troubleshooting67290
-Node: GNU Free Documentation Licence73638
-Node: Index96087
-
-End Tag Table
diff --git a/doc/user-guide/legacy/user-guide.pdf b/doc/user-guide/legacy/user-guide.pdf
deleted file mode 100644
index ed7bd2a9907..00000000000
--- a/doc/user-guide/legacy/user-guide.pdf
+++ /dev/null
Binary files differ
diff --git a/doc/user-guide/legacy/user-guide.texi b/doc/user-guide/legacy/user-guide.texi
deleted file mode 100644
index 2d51da022cf..00000000000
--- a/doc/user-guide/legacy/user-guide.texi
+++ /dev/null
@@ -1,2246 +0,0 @@
-\input texinfo
-@setfilename user-guide.info
-@settitle GlusterFS 2.0 User Guide
-@afourpaper
-
-@direntry
-* GlusterFS: (user-guide). GlusterFS distributed filesystem user guide
-@end direntry
-
-@copying
-This is the user manual for GlusterFS 2.0.
-
-Copyright @copyright{} 2007-2011 @email{@b{Gluster}} , Inc. Permission is granted to
-copy, distribute and/or modify this document under the terms of the
-@acronym{GNU} Free Documentation License, Version 1.2 or any later
-version published by the Free Software Foundation; with no Invariant
-Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the
-license is included in the chapter entitled ``@acronym{GNU} Free
-Documentation License''.
-@end copying
-
-@titlepage
-@title GlusterFS 2.0 User Guide [DRAFT]
-@subtitle January 15, 2008
-@author http://gluster.org/core-team.php
-@author @email{@b{Gluster}}
-@page
-@vskip 0pt plus 1filll
-@insertcopying
-@end titlepage
-
-@c Info stuff
-@ifnottex
-@node Top
-@top GlusterFS 2.0 User Guide
-
-@insertcopying
-@menu
-* Acknowledgements::
-* Introduction::
-* Installation and Invocation::
-* Concepts::
-* Translators::
-* Usage Scenarios::
-* Troubleshooting::
-* GNU Free Documentation Licence::
-* Index::
-
-@detailmenu
- --- The Detailed Node Listing ---
-
-Installation and Invocation
-
-* Pre requisites::
-* Getting GlusterFS::
-* Building::
-* Running GlusterFS::
-* A Tutorial Introduction::
-
-Running GlusterFS
-
-* Server::
-* Client::
-
-Concepts
-
-* Filesystems in Userspace::
-* Translator::
-* Volume specification file::
-
-Translators
-
-* Storage Translators::
-* Client and Server Translators::
-* Clustering Translators::
-* Performance Translators::
-* Features Translators::
-
-Storage Translators
-
-* POSIX::
-
-Client and Server Translators
-
-* Transport modules::
-* Client protocol::
-* Server protocol::
-
-Clustering Translators
-
-* Unify::
-* Replicate::
-* Stripe::
-
-Performance Translators
-
-* Read Ahead::
-* Write Behind::
-* IO Threads::
-* IO Cache::
-
-Features Translators
-
-* POSIX Locks::
-* Fixed ID::
-
-Miscellaneous Translators
-
-* ROT-13::
-* Trace::
-
-@end detailmenu
-@end menu
-
-@end ifnottex
-@c Info stuff end
-
-@contents
-
-@node Acknowledgements
-@unnumbered Acknowledgements
-GlusterFS continues to be a wonderful and enriching experience for all
-of us involved.
-
-GlusterFS development would not have been possible at this pace if
-not for our enthusiastic users. People from around the world have
-helped us with bug reports, performance numbers, and feature suggestions.
-A huge thanks to them all.
-
-Matthew Paine - for RPMs & general enthu
-
-Leonardo Rodrigues de Mello - for DEBs
-
-Julian Perez & Adam D'Auria - for multi-server tutorial
-
-Paul England - for HA spec
-
-Brent Nelson - for many bug reports
-
-Jacques Mattheij - for Europe mirror.
-
-Patrick Negri - for TCP non-blocking connect.
-@flushright
-http://gluster.org/core-team.php (@email{list-hacking@@gluster.com})
-@email{@b{Gluster}}
-@end flushright
-
-@node Introduction
-@chapter Introduction
-
-GlusterFS is a distributed filesystem. It works at the file level,
-not block level.
-
-A network filesystem is one which allows us to access remote files. A
-distributed filesystem is one that stores data on multiple machines
-and makes them all appear to be a part of the same filesystem.
-
-Need for distributed filesystems
-
-@itemize @bullet
-@item Scalability: A distributed filesystem allows us to store more data than what can be stored on a single machine.
-
-@item Redundancy: We might want to replicate crucial data on to several machines.
-
-@item Uniform access: One can mount a remote volume (for example your home directory) from any machine and access the same data.
-@end itemize
-
-@section Contacting us
-You can reach us through the mailing list @strong{gluster-devel}
-(@email{gluster-devel@@nongnu.org}).
-@cindex GlusterFS mailing list
-
-You can also find many of the developers on @acronym{IRC}, on the @code{#gluster}
-channel on Freenode (@indicateurl{irc.freenode.net}).
-@cindex IRC channel, #gluster
-
-The GlusterFS documentation wiki is also useful: @*
-@indicateurl{http://gluster.org/docs/index.php/GlusterFS}
-
-For commercial support, you can contact @email{@b{Gluster}} at:
-@cindex commercial support
-@cindex Gluster, Inc.
-
-@display
-3194 Winding Vista Common
-Fremont, CA 94539
-USA.
-
-Phone: +1 (510) 354 6801
-Toll free: +1 (888) 813 6309
-Fax: +1 (510) 372 0604
-@end display
-
-You can also email us at @email{support@@gluster.com}.
-
-@node Installation and Invocation
-@chapter Installation and Invocation
-
-@menu
-* Pre requisites::
-* Getting GlusterFS::
-* Building::
-* Running GlusterFS::
-* A Tutorial Introduction::
-@end menu
-
-@node Pre requisites
-@section Pre requisites
-
-Before installing GlusterFS make sure you have the
-following components installed.
-
-@subsection @acronym{FUSE}
-GlusterFS has now built-in support for the @acronym{FUSE} protocol.
-You need a kernel with @acronym{FUSE} support to mount GlusterFS.
-You do not need the @acronym{FUSE} package (library and utilities),
-but be aware of the following issues:
-
-@itemize
-@item If you want unprivileged users to be able to mount GlusterFS filesystems,
-you need a recent version of the @command{fusermount} utility. You already have
-it if you have @acronym{FUSE} version 2.7.0 or higher installed; if that's not
-the case, one will be compiled along with GlusterFS if you pass
-@command{--enable-fusermount} to the @command{configure} script. @item You
-need to ensure @acronym{FUSE} support is configured properly on your system. In
-details:
-@itemize
-@item If your kernel has @acronym{FUSE} as a loadable module, make sure it's
-loaded.
-@item Create @command{/dev/fuse} (major 10, minor 229) either by means of udev
-rules or by hand.
-@item Optionally, if you want runtime control over your @acronym{FUSE} mounts,
-mount the fusectl auxiliary filesystem:
-
-@example
-# mount -t fusectl none /sys/fs/fuse/connections
-@end example
-@end itemize
-
-The @acronym{FUSE} packages shipped by the various distributions usually take care
-about these things, so the easiest way to get the above tasks handled is still
-installing the @acronym{FUSE} package(s).
-@end itemize
-
-To get the best performance from GlusterFS,it is recommended that you use
-our patched version of the @acronym{FUSE} kernel module. See Patched FUSE for details.
-
-@subsection Patched FUSE
-
-The GlusterFS project maintains a patched version of @acronym{FUSE} meant to be used
-with GlusterFS. The patches increase GlusterFS performance. It is recommended that
-all users use the patched @acronym{FUSE}.
-
-The patched @acronym{FUSE} tarball can be downloaded from:
-
-@indicateurl{ftp://ftp.gluster.com/pub/gluster/glusterfs/fuse/}
-
-The specific changes made to @acronym{FUSE} are:
-
-@itemize
-@item The communication channel size between @acronym{FUSE} kernel module and GlusterFS has been increased to 1MB, permitting large reads and writes to be sent in bigger chunks.
-
-@item The kernel's read-ahead boundry has been extended upto 1MB.
-
-@item Block size returned in the @command{stat()}/@command{fstat()} calls tuned to 1MB, to make cp and similar commands perform I/O using that block size.
-
-@item @command{flock()} locking support has been added (although some rework in GlusterFS is needed for perfect compliance).
-@end itemize
-
-@subsection libibverbs (optional)
-@cindex InfiniBand, installation
-@cindex libibverbs
-This is only needed if you want GlusterFS to use InfiniBand as the
-interconnect mechanism between server and client. You can get it from:
-
-@indicateurl{http://www.openfabrics.org/downloads.htm}.
-
-@subsection Bison and Flex
-These should be already installed on most Linux systems. If not, use your distribution's
-normal software installation procedures to install them. Make sure you install the
-relevant developer packages also.
-
-@node Getting GlusterFS
-@section Getting GlusterFS
-@cindex arch
-There are many ways to get hold of GlusterFS. For a production deployment,
-the recommended method is to download the latest release tarball.
-Release tarballs are available at: @indicateurl{http://gluster.org/download.php}.
-
-If you want the bleeding edge development source, you can get them
-from the Git
-@footnote{@indicateurl{http://git-scm.com}}
-repository. First you must install Git itself. Then
-you can check out the source
-
-@example
-$ git clone git://git.sv.gnu.org/gluster.git glusterfs
-@end example
-
-@node Building
-@section Building
-You can skip this section if you're installing from @acronym{RPM}s
-or @acronym{DEB}s.
-
-GlusterFS uses the Autotools mechanism to build. As such, the procedure
-is straight-forward. First, change into the GlusterFS source directory.
-
-@example
-$ cd glusterfs-<version>
-@end example
-
-If you checked out the source from the Arch repository, you'll need
-to run @command{./autogen.sh} first. Note that you'll need to have
-Autoconf and Automake installed for this.
-
-Run @command{configure}.
-
-@example
-$ ./configure
-@end example
-
-The configure script accepts the following options:
-
-@cartouche
-@table @code
-
-@item --disable-ibverbs
-Disable the InfiniBand transport mechanism.
-
-@item --disable-fuse-client
-Disable the @acronym{FUSE} client.
-
-@item --disable-server
-Disable building of the GlusterFS server.
-
-@item --disable-bdb
-Disable building of Berkeley DB based storage translator.
-
-@item --disable-mod_glusterfs
-Disable building of Apache/lighttpd glusterfs plugins.
-
-@item --disable-epoll
-Use poll instead of epoll.
-
-@item --disable-libglusterfsclient
-Disable building of libglusterfsclient
-
-@item --enable-fusermount
-Build fusermount
-
-@end table
-@end cartouche
-
-Build and install GlusterFS.
-
-@example
-# make install
-@end example
-
-The binaries (@command{glusterfsd} and @command{glusterfs}) will be by
-default installed in @command{/usr/local/sbin/}. Translator,
-scheduler, and transport shared libraries will be installed in
-@command{/usr/local/lib/glusterfs/<version>/}. Sample volume
-specification files will be in @command{/usr/local/etc/glusterfs/}.
-This document itself can be found in
-@command{/usr/local/share/doc/glusterfs/}. If you passed the @command{--prefix}
-argument to the configure script, then replace @command{/usr/local} in the preceding
-paths with the prefix.
-
-@node Running GlusterFS
-@section Running GlusterFS
-
-@menu
-* Server::
-* Client::
-@end menu
-
-@node Server
-@subsection Server
-@cindex GlusterFS server
-
-The GlusterFS server is necessary to export storage volumes to remote clients
-(See @ref{Server protocol} for more info). This section documents the invocation
-of the GlusterFS server program and all the command-line options accepted by it.
-
-@cartouche
-@table @code
-Basic Options
-@item -f, --volfile=<path>
- Use the volume file as the volume specification.
-
-@item -s, --volfile-server=<hostname>
- Server to get volume file from. This option overrides --volfile option.
-
-@item -l, --log-file=<path>
- Specify the path for the log file.
-
-@item -L, --log-level=<level>
- Set the log level for the server. Log level should be one of @acronym{DEBUG},
-@acronym{WARNING}, @acronym{ERROR}, @acronym{CRITICAL}, or @acronym{NONE}.
-
-Advanced Options
-@item --debug
- Run in debug mode. This option sets --no-daemon, --log-level to DEBUG and
- --log-file to console.
-
-@item -N, --no-daemon
- Run glusterfsd as a foreground process.
-
-@item -p, --pid-file=<path>
- Path for the @acronym{PID} file.
-
-@item --volfile-id=<key>
- 'key' of the volfile to be fetched from server.
-
-@item --volfile-server-port=<port-number>
- Listening port number of volfile server.
-
-@item --volfile-server-transport=[socket|ib-verbs]
- Transport type to get volfile from server. [default: @command{socket}]
-
-@item --xlator-options=<volume-name.option=value>
- Add/override a translator option for a volume with specified value.
-
-Miscellaneous Options
-@item -?, --help
- Show this help text.
-
-@item --usage
- Display a short usage message.
-
-@item -V, --version
- Show version information.
-@end table
-@end cartouche
-
-@node Client
-@subsection Client
-@cindex GlusterFS client
-
-The GlusterFS client process is necessary to access remote storage volumes and
-mount them locally using @acronym{FUSE}. This section documents the invocation of the
-client process and all its command-line arguments.
-
-@example
- # glusterfs [options] <mountpoint>
-@end example
-
-The @command{mountpoint} is the directory where you want the GlusterFS
-filesystem to appear. Example:
-
-@example
- # glusterfs -f /usr/local/etc/glusterfs-client.vol /mnt
-@end example
-
-The command-line options are detailed below.
-
-@tex
-\vfill
-@end tex
-@page
-
-@cartouche
-@table @code
-
-Basic Options
-@item -f, --volfile=<path>
- Use the volume file as the volume specification.
-
-@item -s, --volfile-server=<hostname>
- Server to get volume file from. This option overrides --volfile option.
-
-@item -l, --log-file=<path>
- Specify the path for the log file.
-
-@item -L, --log-level=<level>
- Set the log level for the server. Log level should be one of @acronym{DEBUG},
-@acronym{WARNING}, @acronym{ERROR}, @acronym{CRITICAL}, or @acronym{NONE}.
-
-Advanced Options
-@item --debug
- Run in debug mode. This option sets --no-daemon, --log-level to DEBUG and
- --log-file to console.
-
-@item -N, --no-daemon
- Run @command{glusterfs} as a foreground process.
-
-@item -p, --pid-file=<path>
- Path for the @acronym{PID} file.
-
-@item --volfile-id=<key>
- 'key' of the volfile to be fetched from server.
-
-@item --volfile-server-port=<port-number>
- Listening port number of volfile server.
-
-@item --volfile-server-transport=[socket|ib-verbs]
- Transport type to get volfile from server. [default: @command{socket}]
-
-@item --xlator-options=<volume-name.option=value>
- Add/override a translator option for a volume with specified value.
-
-@item --volume-name=<volume name>
- Volume name in client spec to use. Defaults to the root volume.
-
-@acronym{FUSE} Options
-@item --attribute-timeout=<n>
- Attribute timeout for inodes in the kernel, in seconds. Defaults to 1 second.
-
-@item --disable-direct-io-mode
- Disable direct @acronym{I/O} mode in @acronym{FUSE} kernel module. This is set
- automatically if kernel supports big writes (>= 2.6.26).
-
-@item -e, --entry-timeout=<n>
- Entry timeout for directory entries in the kernel, in seconds.
- Defaults to 1 second.
-
-Missellaneous Options
-@item -?, --help
- Show this help information.
-
-@item -V, --version
- Show version information.
-@end table
-@end cartouche
-
-@node A Tutorial Introduction
-@section A Tutorial Introduction
-
-This section will show you how to quickly get GlusterFS up and running. We'll
-configure GlusterFS as a simple network filesystem, with one server and one client.
-In this mode of usage, GlusterFS can serve as a replacement for NFS.
-
-We'll make use of two machines; call them @emph{server} and
-@emph{client} (If you don't want to setup two machines, just run
-everything that follows on the same machine). In the examples that
-follow, the shell prompts will use these names to clarify the machine
-on which the command is being run. For example, a command that should
-be run on the server will be shown with the prompt:
-
-@example
-[root@@server]#
-@end example
-
-Our goal is to make a directory on the @emph{server} (say, @command{/export})
-accessible to the @emph{client}.
-
-First of all, get GlusterFS installed on both the machines, as described in the
-previous sections. Make sure you have the @acronym{FUSE} kernel module loaded. You
-can ensure this by running:
-
-@example
-[root@@server]# modprobe fuse
-@end example
-
-Before we can run the GlusterFS client or server programs, we need to write
-two files called @emph{volume specifications} (equivalently refered to as @emph{volfiles}).
-The volfile describes the @emph{translator tree} on a node. The next chapter will
-explain the concepts of `translator' and `volume specification' in detail. For now,
-just assume that the volfile is like an NFS @command{/etc/export} file.
-
-On the server, create a text file somewhere (we'll assume the path
-@command{/tmp/glusterfsd.vol}) with the following contents.
-
-@cartouche
-@example
-volume colon-o
- type storage/posix
- option directory /export
-end-volume
-
-volume server
- type protocol/server
- subvolumes colon-o
- option transport-type tcp
- option auth.addr.colon-o.allow *
-end-volume
-@end example
-@end cartouche
-
-A brief explanation of the file's contents. The first section defines a storage
-volume, named ``colon-o'' (the volume names are arbitrary), which exports the
-@command{/export} directory. The second section defines options for the translator
-which will make the storage volume accessible remotely. It specifies @command{colon-o} as
-a subvolume. This defines the @emph{translator tree}, about which more will be said
-in the next chapter. The two options specify that the @acronym{TCP} protocol is to be
-used (as opposed to InfiniBand, for example), and that access to the storage volume
-is to be provided to clients with any @acronym{IP} address at all. If you wanted to
-restrict access to this server to only your subnet for example, you'd specify
-something like @command{192.168.1.*} in the second option line.
-
-On the client machine, create the following text file (again, we'll assume
-the path to be @command{/tmp/glusterfs-client.vol}). Replace
-@emph{server-ip-address} with the @acronym{IP} address of your server machine. If you
-are doing all this on a single machine, use @command{127.0.0.1}.
-
-@cartouche
-@example
-volume client
- type protocol/client
- option transport-type tcp
- option remote-host @emph{server-ip-address}
- option remote-subvolume colon-o
-end-volume
-@end example
-@end cartouche
-
-Now we need to start both the server and client programs. To start the server:
-
-@example
-[root@@server]# glusterfsd -f /tmp/glusterfs-server.vol
-@end example
-
-To start the client:
-
-@example
-[root@@client]# glusterfs -f /tmp/glusterfs-client.vol /mnt/glusterfs
-@end example
-
-You should now be able to see the files under the server's @command{/export} directory
-in the @command{/mnt/glusterfs} directory on the client. That's it; GlusterFS is now
-working as a network file system.
-
-@node Concepts
-@chapter Concepts
-
-@menu
-* Filesystems in Userspace::
-* Translator::
-* Volume specification file::
-@end menu
-
-@node Filesystems in Userspace
-@section Filesystems in Userspace
-
-A filesystem is usually implemented in kernel space. Kernel space
-development is much harder than userspace development. @acronym{FUSE}
-is a kernel module/library that allows us to write a filesystem
-completely in userspace.
-
-@acronym{FUSE} consists of a kernel module which interacts with the userspace
-implementation using a device file @code{/dev/fuse}. When a process
-makes a syscall on a @acronym{FUSE} filesystem, @acronym{VFS} hands the request to the
-@acronym{FUSE} module, which writes the request to @code{/dev/fuse}. The
-userspace implementation polls @code{/dev/fuse}, and when a request arrives,
-processes it and writes the result back to @code{/dev/fuse}. The kernel then
-reads from the device file and returns the result to the user process.
-
-In case of GlusterFS, the userspace program is the GlusterFS client.
-The control flow is shown in the diagram below. The GlusterFS client
-services the request by sending it to the server, which in turn
-hands it to the local @acronym{POSIX} filesystem.
-
-@center @image{fuse,44pc,,,.pdf}
-@center Fig 1. Control flow in GlusterFS
-
-@node Translator
-@section Translator
-
-The @emph{translator} is the most important concept in GlusterFS. In
-fact, GlusterFS is nothing but a collection of translators working
-together, forming a translator @emph{tree}.
-
-The idea of a translator is perhaps best understood using an
-analogy. Consider the @acronym{VFS} in the Linux kernel. The
-@acronym{VFS} abstracts the various filesystem implementations (such
-as @acronym{EXT3}, ReiserFS, @acronym{XFS}, etc.) supported by the
-kernel. When an application calls the kernel to perform an operation
-on a file, the kernel passes the request on to the appropriate
-filesystem implementation.
-
-For example, let's say there are two partitions on a Linux machine:
-@command{/}, which is an @acronym{EXT3} partition, and @command{/usr},
-which is a ReiserFS partition. Now if an application wants to open a
-file called, say, @command{/etc/fstab}, then the kernel will
-internally pass the request to the @acronym{EXT3} implementation. If
-on the other hand, an application wants to read a file called
-@command{/usr/src/linux/CREDITS}, then the kernel will call upon the
-ReiserFS implementation to do the job.
-
-The ``filesystem implementation'' objects are analogous to GlusterFS
-translators. A GlusterFS translator implements all the filesystem
-operations. Whereas in @acronym{VFS} there is a two-level tree (with
-the kernel at the root and all the filesystem implementation as its
-children), in GlusterFS there exists a more elaborate tree structure.
-
-We can now define translators more precisely. A GlusterFS translator
-is a shared object (@command{.so}) that implements every filesystem
-call. GlusterFS translators can be arranged in an arbitrary tree
-structure (subject to constraints imposed by the translators). When
-GlusterFS receives a filesystem call, it passes it on to the
-translator at the root of the translator tree. The root translator may
-in turn pass it on to any or all of its children, and so on, until the
-leaf nodes are reached. The result of a filesystem call is
-communicated in the reverse fashion, from the leaf nodes up to the
-root node, and then on to the application.
-
-So what might a translator tree look like?
-
-@tex
-\vfill
-@end tex
-@page
-
-@center @image{xlator,44pc,,,.pdf}
-@center Fig 2. A sample translator tree
-
-The diagram depicts three servers and one GlusterFS client. It is important
-to note that conceptually, the translator tree spans machine boundaries.
-Thus, the client machine in the diagram, @command{10.0.0.1}, can access
-the aggregated storage of the filesystems on the server machines @command{10.0.0.2},
-@command{10.0.0.3}, and @command{10.0.0.4}. The translator diagram will make more
-sense once you've read the next chapter and understood the functions of the
-various translators.
-
-@node Volume specification file
-@section Volume specification file
-The volume specification file describes the translator tree for both the
-server and client programs.
-
-A volume specification file is a sequence of volume definitions.
-The syntax of a volume definition is explained below:
-
-@cartouche
-@example
-@strong{volume} @emph{volume-name}
- @strong{type} @emph{translator-name}
- @strong{option} @emph{option-name} @emph{option-value}
- @dots{}
- @strong{subvolumes} @emph{subvolume1} @emph{subvolume2} @dots{}
-@strong{end-volume}
-@end example
-
-@dots{}
-@end cartouche
-
-@table @asis
-@item @emph{volume-name}
- An identifier for the volume. This is just a human-readable name,
-and can contain any alphanumeric character. For instance, ``storage-1'', ``colon-o'',
-or ``forty-two''.
-
-@item @emph{translator-name}
- Name of one of the available translators. Example: @command{protocol/client},
-@command{cluster/unify}.
-
-@item @emph{option-name}
- Name of a valid option for the translator.
-
-@item @emph{option-value}
- Value for the option. Everything following the ``option'' keyword to the end of the
-line is considered the value; it is up to the translator to parse it.
-
-@item @emph{subvolume1}, @emph{subvolume2}, @dots{}
- Volume names of sub-volumes. The sub-volumes must already have been defined earlier
-in the file.
-@end table
-
-There are a few rules you must follow when writing a volume specification file:
-
-@itemize
-@item Everything following a `@command{#}' is considered a comment and is ignored. Blank lines are also ignored.
-@item All names and keywords are case-sensitive.
-@item The order of options inside a volume definition does not matter.
-@item An option value may not span multiple lines.
-@item If an option is not specified, it will assume its default value.
-@item A sub-volume must have already been defined before it can be referenced. This means you have to write the specification file ``bottom-up'', starting from the leaf nodes of the translator tree and moving up to the root.
-@end itemize
-
-A simple example volume specification file is shown below:
-
-@cartouche
-@example
-# This is a comment line
-volume client
- type protocol/client
- option transport-type tcp
- option remote-host localhost # Also a comment
- option remote-subvolume brick
-# The subvolumes line may be absent
-end-volume
-
-volume iot
- type performance/io-threads
- option thread-count 4
- subvolumes client
-end-volume
-
-volume wb
- type performance/write-behind
- subvolumes iot
-end-volume
-@end example
-@end cartouche
-
-@node Translators
-@chapter Translators
-
-@menu
-* Storage Translators::
-* Client and Server Translators::
-* Clustering Translators::
-* Performance Translators::
-* Features Translators::
-* Miscellaneous Translators::
-@end menu
-
-This chapter documents all the available GlusterFS translators in detail.
-Each translator section will show its name (for example, @command{cluster/unify}),
-briefly describe its purpose and workings, and list every option accepted by
-that translator and their meaning.
-
-@node Storage Translators
-@section Storage Translators
-
-The storage translators form the ``backend'' for GlusterFS. Currently,
-the only available storage translator is the @acronym{POSIX}
-translator, which stores files on a normal @acronym{POSIX}
-filesystem. A pleasant consequence of this is that your data will
-still be accessible if GlusterFS crashes or cannot be started.
-
-Other storage backends are planned for the future. One of the possibilities is an
-Amazon S3 translator. Amazon S3 is an unlimited online storage service accessible
-through a web services @acronym{API}. The S3 translator will allow you to access
-the storage as a normal @acronym{POSIX} filesystem.
-@footnote{Some more discussion about this can be found at:
-
-http://developer.amazonwebservices.com/connect/message.jspa?messageID=52873}
-
-@menu
-* POSIX::
-* BDB::
-@end menu
-
-@node POSIX
-@subsection POSIX
-@example
-type storage/posix
-@end example
-
-The @command{posix} translator uses a normal @acronym{POSIX}
-filesystem as its ``backend'' to actually store files and
-directories. This can be any filesystem that supports extended
-attributes (@acronym{EXT3}, ReiserFS, @acronym{XFS}, ...). Extended
-attributes are used by some translators to store metadata, for
-example, by the replicate and stripe translators. See
-@ref{Replicate} and @ref{Stripe}, respectively for details.
-
-@cartouche
-@table @code
-@item directory <path>
-The directory on the local filesystem which is to be used for storage.
-@end table
-@end cartouche
-
-@node BDB
-@subsection BDB
-@example
-type storage/bdb
-@end example
-
-The @command{BDB} translator uses a @acronym{Berkeley DB} database as its
-``backend'' to actually store files as key-value pair in the database and
-directories as regular @acronym{POSIX} directories. Note that @acronym{BDB}
-does not provide extended attribute support for regular files. Do not use
-@acronym{BDB} as storage translator while using any translator that demands
-extended attributes on ``backend''.
-
-@cartouche
-@table @code
-@item directory <path>
-The directory on the local filesystem which is to be used for storage.
-@item mode [cache|persistent] (cache)
-When @acronym{BDB} is run in @command{cache} mode, recovery of back-end is not completely
-guaranteed. @command{persistent} guarantees that @acronym{BDB} can recover back-end from
-@acronym{Berkeley DB} even if GlusterFS crashes.
-@item errfile <path>
-The path of the file to be used as @command{errfile} for @acronym{Berkeley DB} to report
-detailed error messages, if any. Note that all the contents of this file will be written
-by @acronym{Berkeley DB}, not GlusterFS.
-@item logdir <path>
-
-
-@end table
-@end cartouche
-
-@node Client and Server Translators, Clustering Translators, Storage Translators, Translators
-@section Client and Server Translators
-
-The client and server translator enable GlusterFS to export a
-translator tree over the network or access a remote GlusterFS
-server. These two translators implement GlusterFS's network protocol.
-
-@menu
-* Transport modules::
-* Client protocol::
-* Server protocol::
-@end menu
-
-@node Transport modules
-@subsection Transport modules
-The client and server translators are capable of using any of the
-pluggable transport modules. Currently available transport modules are
-@command{tcp}, which uses a @acronym{TCP} connection between client
-and server to communicate; @command{ib-sdp}, which uses a
-@acronym{TCP} connection over InfiniBand, and @command{ibverbs}, which
-uses high-speed InfiniBand connections.
-
-Each transport module comes in two different versions, one to be used on
-the server side and the other on the client side.
-
-@subsubsection TCP
-
-The @acronym{TCP} transport module uses a @acronym{TCP/IP} connection between
-the server and the client.
-
-@example
- option transport-type tcp
-@end example
-
-The @acronym{TCP} client module accepts the following options:
-
-@cartouche
-@table @code
-@item non-blocking-connect [no|off|on|yes] (on)
-Whether to make the connection attempt asynchronous.
-@item remote-port <n> (24007)
-Server port to connect to.
-@cindex DNS round robin
-@item remote-host <hostname> *
-Hostname or @acronym{IP} address of the server. If the host name resolves to
-multiple IP addresses, all of them will be tried in a round-robin fashion. This
-feature can be used to implement fail-over.
-@end table
-@end cartouche
-
-The @acronym{TCP} server module accepts the following options:
-
-@cartouche
-@table @code
-@item bind-address <address> (0.0.0.0)
-The local interface on which the server should listen to requests. Default is to
-listen on all interfaces.
-@item listen-port <n> (24007)
-The local port to listen on.
-@end table
-@end cartouche
-
-@subsubsection IB-SDP
-@example
- option transport-type ib-sdp
-@end example
-
-kernel implements socket interface for ib hardware. SDP is over ib-verbs.
-This module accepts the same options as @command{tcp}
-
-@subsubsection ibverbs
-
-@example
- option transport-type tcp
-@end example
-
-@cindex infiniband transport
-
-InfiniBand is a scalable switched fabric interconnect mechanism
-primarily used in high-performance computing. InfiniBand can deliver
-data throughput of the order of 10 Gbit/s, with latencies of 4-5 ms.
-
-The @command{ib-verbs} transport accesses the InfiniBand hardware through
-the ``verbs'' @acronym{API}, which is the lowest level of software access possible
-and which gives the highest performance. On InfiniBand hardware, it is always
-best to use @command{ib-verbs}. Use @command{ib-sdp} only if you cannot get
-@command{ib-verbs} working for some reason.
-
-The @command{ib-verbs} client module accepts the following options:
-
-@cartouche
-@table @code
-@item non-blocking-connect [no|off|on|yes] (on)
-Whether to make the connection attempt asynchronous.
-@item remote-port <n> (24007)
-Server port to connect to.
-@cindex DNS round robin
-@item remote-host <hostname> *
-Hostname or @acronym{IP} address of the server. If the host name resolves to
-multiple IP addresses, all of them will be tried in a round-robin fashion. This
-feature can be used to implement fail-over.
-@end table
-@end cartouche
-
-The @command{ib-verbs} server module accepts the following options:
-
-@cartouche
-@table @code
-@item bind-address <address> (0.0.0.0)
-The local interface on which the server should listen to requests. Default is to
-listen on all interfaces.
-@item listen-port <n> (24007)
-The local port to listen on.
-@end table
-@end cartouche
-
-The following options are common to both the client and server modules:
-
-If you are familiar with InfiniBand jargon,
-the mode is used by GlusterFS is ``reliable connection-oriented channel transfer''.
-
-@cartouche
-@table @code
-@item ib-verbs-work-request-send-count <n> (64)
-Length of the send queue in datagrams. [Reason to increase/decrease?]
-
-@item ib-verbs-work-request-recv-count <n> (64)
-Length of the receive queue in datagrams. [Reason to increase/decrease?]
-
-@item ib-verbs-work-request-send-size <size> (128KB)
-Size of each datagram that is sent. [Reason to increase/decrease?]
-
-@item ib-verbs-work-request-recv-size <size> (128KB)
-Size of each datagram that is received. [Reason to increase/decrease?]
-
-@item ib-verbs-port <n> (1)
-Port number for ib-verbs.
-
-@item ib-verbs-mtu [256|512|1024|2048|4096] (2048)
-The Maximum Transmission Unit [Reason to increase/decrease?]
-
-@item ib-verbs-device-name <device-name> (first device in the list)
-InfiniBand device to be used.
-@end table
-@end cartouche
-
-For maximum performance, you should ensure that the send/receive counts on both
-the client and server are the same.
-
-ib-verbs is preferred over ib-sdp.
-
-@node Client protocol
-@subsection Client
-@example
-type procotol/client
-@end example
-
-The client translator enables the GlusterFS client to access a remote server's
-translator tree.
-
-@cartouche
-@table @code
-
-@item transport-type [tcp,ib-sdp,ib-verbs] (tcp)
-The transport type to use. You should use the client versions of all the
-transport modules (@command{tcp}, @command{ib-sdp},
-@command{ib-verbs}).
-@item remote-subvolume <volume_name> *
-The name of the volume on the remote host to attach to. Note that
-this is @emph{not} the name of the @command{protocol/server} volume on the
-server. It should be any volume under the server.
-@item transport-timeout <n> (120- seconds)
-Inactivity timeout. If a reply is expected and no activity takes place
-on the connection within this time, the transport connection will be
-broken, and a new connection will be attempted.
-@end table
-@end cartouche
-
-@node Server protocol
-@subsection Server
-@example
-type protocol/server
-@end example
-
-The server translator exports a translator tree and makes it accessible to
-remote GlusterFS clients.
-
-@cartouche
-@table @code
-@item client-volume-filename <path> (<CONFDIR>/glusterfs-client.vol)
-The volume specification file to use for the client. This is the file the
-client will receive when it is invoked with the @command{--server} option
-(@ref{Client}).
-
-@item transport-type [tcp,ib-verbs,ib-sdp] (tcp)
-The transport to use. You should use the server versions of all the transport
-modules (@command{tcp}, @command{ib-sdp}, @command{ib-verbs}).
-
-@item auth.addr.<volume name>.allow <IP address wildcard pattern>
-IP addresses of the clients that are allowed to attach to the specified volume.
-This can be a wildcard. For example, a wildcard of the form @command{192.168.*.*}
-allows any host in the @command{192.168.x.x} subnet to connect to the server.
-
-@end table
-@end cartouche
-
-@node Clustering Translators
-@section Clustering Translators
-
-The clustering translators are the most important GlusterFS
-translators, since it is these that make GlusterFS a cluster
-filesystem. These translators together enable GlusterFS to access an
-arbitrarily large amount of storage, and provide @acronym{RAID}-like
-redundancy and distribution over the entire cluster.
-
-There are three clustering translators: @strong{unify}, @strong{replicate},
-and @strong{stripe}. The unify translator aggregates storage from
-many server nodes. The replicate translator provides file replication. The stripe
-translator allows a file to be spread across many server nodes. The following sections
-look at each of these translators in detail.
-
-@menu
-* Unify::
-* Replicate::
-* Stripe::
-@end menu
-
-@node Unify
-@subsection Unify
-@cindex unify (translator)
-@cindex scheduler (unify)
-@example
-type cluster/unify
-@end example
-
-The unify translator presents a `unified' view of all its sub-volumes. That is,
-it makes the union of all its sub-volumes appear as a single volume. It is the
-unify translator that gives GlusterFS the ability to access an arbitrarily
-large amount of storage.
-
-For unify to work correctly, certain invariants need to be maintained across
-the entire network. These are:
-
-@cindex unify invariants
-@itemize
-@item The directory structure of all the sub-volumes must be identical.
-@item A particular file can exist on only one of the sub-volumes. Phrasing it in another way, a pathname such as @command{/home/calvin/homework.txt}) is unique across the entire cluster.
-@end itemize
-
-@tex
-\vfill
-@end tex
-@page
-
-@center @image{unify,44pc,,,.pdf}
-
-Looking at the second requirement, you might wonder how one can
-accomplish storing redundant copies of a file, if no file can exist
-multiple times. To answer, we must remember that these invariants are
-from @emph{unify's perspective}. A translator such as replicate at a lower
-level in the translator tree than unify may subvert this picture.
-
-The first invariant might seem quite tedious to ensure. We shall see
-later that this is not so, since unify's @emph{self-heal} mechanism
-takes care of maintaining it.
-
-The second invariant implies that unify needs some way to decide which file goes where.
-Unify makes use of @emph{scheduler} modules for this purpose.
-
-When a file needs to be created, unify's scheduler decides upon the
-sub-volume to be used to store the file. There are many schedulers
-available, each using a different algorithm and suitable for different
-purposes.
-
-The various schedulers are described in detail in the sections that follow.
-
-@subsubsection ALU
-@cindex alu (scheduler)
-
-@example
- option scheduler alu
-@end example
-
-ALU stands for "Adaptive Least Usage". It is the most advanced
-scheduler available in GlusterFS. It balances the load across volumes
-taking several factors in account. It adapts itself to changing I/O
-patterns according to its configuration. When properly configured, it
-can eliminate the need for regular tuning of the filesystem to keep
-volume load nicely balanced.
-
-The ALU scheduler is composed of multiple least-usage
-sub-schedulers. Each sub-scheduler keeps track of a certain type of
-load, for each of the sub-volumes, getting statistics from
-the sub-volumes themselves. The sub-schedulers are these:
-
-@itemize
-@item disk-usage: The used and free disk space on the volume.
-
-@item read-usage: The amount of reading done from this volume.
-
-@item write-usage: The amount of writing done to this volume.
-
-@item open-files-usage: The number of files currently open from this volume.
-
-@item disk-speed-usage: The speed at which the disks are spinning. This is a constant value and therefore not very useful.
-@end itemize
-
-The ALU scheduler needs to know which of these sub-schedulers to use,
-and in which order to evaluate them. This is done through the
-@command{option alu.order} configuration directive.
-
-Each sub-scheduler needs to know two things: when to kick in (the
-entry-threshold), and how long to stay in control (the
-exit-threshold). For example: when unifying three disks of 100GB,
-keeping an exact balance of disk-usage is not necesary. Instead, there
-could be a 1GB margin, which can be used to nicely balance other
-factors, such as read-usage. The disk-usage scheduler can be told to
-kick in only when a certain threshold of discrepancy is passed, such
-as 1GB. When it assumes control under this condition, it will write
-all subsequent data to the least-used volume. If it is doing so, it is
-unwise to stop right after the values are below the entry-threshold
-again, since that would make it very likely that the situation will
-occur again very soon. Such a situation would cause the ALU to spend
-most of its time disk-usage scheduling, which is unfair to the other
-sub-schedulers. The exit-threshold therefore defines the amount of
-data that needs to be written to the least-used disk, before control
-is relinquished again.
-
-In addition to the sub-schedulers, the ALU scheduler also has "limits"
-options. These can stop the creation of new files on a volume once
-values drop below a certain threshold. For example, setting
-@command{option alu.limits.min-free-disk 5GB} will stop the scheduling
-of files to volumes that have less than 5GB of free disk space,
-leaving the files on that disk some room to grow.
-
-The actual values you assign to the thresholds for sub-schedulers and
-limits depend on your situation. If you have fast-growing files,
-you'll want to stop file-creation on a disk much earlier than when
-hardly any of your files are growing. If you care less about
-disk-usage balance than about read-usage balance, you'll want a bigger
-disk-usage scheduler entry-threshold and a smaller read-usage
-scheduler entry-threshold.
-
-For thresholds defining a size, values specifying "KB", "MB" and "GB"
-are allowed. For example: @command{option alu.limits.min-free-disk 5GB}.
-
-@cartouche
-@table @code
-@item alu.order <order> * ("disk-usage:write-usage:read-usage:open-files-usage:disk-speed")
-@item alu.disk-usage.entry-threshold <size> (1GB)
-@item alu.disk-usage.exit-threshold <size> (512MB)
-@item alu.write-usage.entry-threshold <%> (25)
-@item alu.write-usage.exit-threshold <%> (5)
-@item alu.read-usage.entry-threshold <%> (25)
-@item alu.read-usage.exit-threshold <%> (5)
-@item alu.open-files-usage.entry-threshold <n> (1000)
-@item alu.open-files-usage.exit-threshold <n> (100)
-@item alu.limits.min-free-disk <%>
-@item alu.limits.max-open-files <n>
-@end table
-@end cartouche
-
-@subsubsection Round Robin (RR)
-@cindex rr (scheduler)
-
-@example
- option scheduler rr
-@end example
-
-Round-Robin (RR) scheduler creates files in a round-robin
-fashion. Each client will have its own round-robin loop. When your
-files are mostly similar in size and I/O access pattern, this
-scheduler is a good choice. RR scheduler checks for free disk space
-on the server before scheduling, so you can know when to add
-another server node. The default value of min-free-disk is 5% and is
-checked on file creation calls, with atleast 10 seconds (by default)
-elapsing between two checks.
-
-Options:
-@cartouche
-@table @code
-@item rr.limits.min-free-disk <%> (5)
-Minimum free disk space a node must have for RR to schedule a file to it.
-@item rr.refresh-interval <t> (10 seconds)
-Time between two successive free disk space checks.
-@end table
-@end cartouche
-
-@subsubsection Random
-@cindex random (scheduler)
-
-@example
- option scheduler random
-@end example
-
-The random scheduler schedules file creation randomly among its child nodes.
-Like the round-robin scheduler, it also checks for a minimum amount of free disk
-space before scheduling a file to a node.
-
-@cartouche
-@table @code
-@item random.limits.min-free-disk <%> (5)
-Minimum free disk space a node must have for random to schedule a file to it.
-@item random.refresh-interval <t> (10 seconds)
-Time between two successive free disk space checks.
-@end table
-@end cartouche
-
-@subsubsection NUFA
-@cindex nufa (scheduler)
-
-@example
- option scheduler nufa
-@end example
-
-It is common in many GlusterFS computing environments for all deployed
-machines to act as both servers and clients. For example, a
-research lab may have 40 workstations each with its own storage. All
-of these workstations might act as servers exporting a volume as well
-as clients accessing the entire cluster's storage. In such a
-situation, it makes sense to store locally created files on the local
-workstation itself (assuming files are accessed most by the
-workstation that created them). The Non-Uniform File Allocation (@acronym{NUFA})
-scheduler accomplishes that.
-
-@acronym{NUFA} gives the local system first priority for file creation
-over other nodes. If the local volume does not have more free disk space
-than a specified amount (5% by default) then @acronym{NUFA} schedules files
-among the other child volumes in a round-robin fashion.
-
-@acronym{NUFA} is named after the similar strategy used for memory access,
-@acronym{NUMA}@footnote{Non-Uniform Memory Access:
-@indicateurl{http://en.wikipedia.org/wiki/Non-Uniform_Memory_Access}}.
-
-@cartouche
-@table @code
-@item nufa.limits.min-free-disk <%> (5)
-Minimum disk space that must be free (local or remote) for @acronym{NUFA} to schedule a
-file to it.
-@item nufa.refresh-interval <t> (10 seconds)
-Time between two successive free disk space checks.
-@item nufa.local-volume-name <volume>
-The name of the volume corresponding to the local system. This volume must be
-one of the children of the unify volume. This option is mandatory.
-@end table
-@end cartouche
-
-@cindex namespace
-@subsubsection Namespace
-Namespace volume needed because:
- - persistent inode numbers.
- - file exists even when node is down.
-
-namespace files are simply touched. on every lookup it is checked.
-
-@cartouche
-@table @code
-@item namespace <volume> *
-Name of the namespace volume (which should be one of the unify volume's children).
-@item self-heal [on|off] (on)
-Enable/disable self-heal. Unless you know what you are doing, do not disable self-heal.
-@end table
-@end cartouche
-
-@cindex self heal (unify)
-@subsubsection Self Heal
- * When a 'lookup()/stat()' call is made on directory for the first
-time, a self-heal call is made, which checks for the consistancy of
-its child nodes. If an entry is present in storage node, but not in
-namespace, that entry is created in namespace, and vica-versa. There
-is an writedir() API introduced which is used for the same. It also
-checks for permissions, and uid/gid consistencies.
-
- * This check is also done when an server goes down and comes up.
-
- * If one starts with an empty namespace export, but has data in
-storage nodes, a 'find .>/dev/null' or 'ls -lR >/dev/null' should help
-to build namespace in one shot. Even otherwise, namespace is built on
-demand when a file is looked up for the first time.
-
-NOTE: There are some issues (Kernel 'Oops' msgs) seen with fuse-2.6.3,
-when someone deletes namespace in backend, when glusterfs is
-running. But with fuse-2.6.5, this issue is not there.
-
-@node Replicate
-@subsection Replicate (formerly AFR)
-@cindex Replicate
-@example
-type cluster/replicate
-@end example
-
-Replicate provides @acronym{RAID}-1 like functionality for
-GlusterFS. Replicate replicates files and directories across the
-subvolumes. Hence if Replicate has four subvolumes, there will be
-four copies of all files and directories. Replicate provides
-high-availability, i.e., in case one of the subvolumes go down
-(e. g. server crash, network disconnection) Replicate will still
-service the requests using the redundant copies.
-
-Replicate also provides self-heal functionality, i.e., in case the
-crashed servers come up, the outdated files and directories will be
-updated with the latest versions. Replicate uses extended
-attributes of the backend file system to track the versioning of files
-and directories and provide the self-heal feature.
-
-@example
-volume replicate-example
- type cluster/replicate
- subvolumes brick1 brick2 brick3
-end-volume
-@end example
-
-This sample configuration will replicate all directories and files on
-brick1, brick2 and brick3.
-
-All the read operations happen from the first alive child. If all the
-three sub-volumes are up, reads will be done from brick1; if brick1 is
-down read will be done from brick2. In case read() was being done on
-brick1 and it goes down, replicate transparently falls back to
-brick2.
-
-The next release of GlusterFS will add the following features:
-@itemize
-@item Ability to specify the sub-volume from which read operations are to be done (this will help users who have one of the sub-volumes as a local storage volume).
-@item Allow scheduling of read operations amongst the sub-volumes in a round-robin fashion.
-@end itemize
-
-The order of the subvolumes list should be same across all the 'replicate's as
-they will be used for locking purposes.
-
-@cindex self heal (replicate)
-@subsubsection Self Heal
-Replicate has self-heal feature, which updates the outdated file and
-directory copies by the most recent versions. For example consider the
-following config:
-
-@example
-volume replicate-example
- type cluster/replicate
- subvolumes brick1 brick2
-end-volume
-@end example
-
-@subsubsection File self-heal
-
-Now if we create a file foo.txt on replicate-example, the file will be created
-on brick1 and brick2. The file will have two extended attributes associated
-with it in the backend filesystem. One is trusted.afr.createtime and the
-other is trusted.afr.version. The trusted.afr.createtime xattr has the
-create time (in terms of seconds since epoch) and trusted.afr.version
-is a number that is incremented each time a file is modified. This increment
-happens during close (incase any write was done before close).
-
-If brick1 goes down, we edit foo.txt the version gets incremented. Now
-the brick1 comes back up, when we open() on foo.txt replicate will check if
-their versions are same. If they are not same, the outdated copy is
-replaced by the latest copy and its version is updated. After the sync
-the open() proceeds in the usual manner and the application calling open()
-can continue on its access to the file.
-
-If brick1 goes down, we delete foo.txt and create a file with the same
-name again i.e foo.txt. Now brick1 comes back up, clearly there is a
-chance that the version on brick1 being more than the version on brick2,
-this is where createtime extended attribute helps in deciding which
-the outdated copy is. Hence we need to consider both createtime and
-version to decide on the latest copy.
-
-The version attribute is incremented during the close() call. Version
-will not be incremented in case there was no write() done. In case the
-fd that the close() gets was got by create() call, we also create
-the createtime extended attribute.
-
-@subsubsection Directory self-heal
-
-Suppose brick1 goes down, we delete foo.txt, brick1 comes back up, now
-we should not create foo.txt on brick2 but we should delete foo.txt
-on brick1. We handle this situation by having the createtime and version
-attribute on the directory similar to the file. when lookup() is done
-on the directory, we compare the createtime/version attributes of the
-copies and see which files needs to be deleted and delete those files
-and update the extended attributes of the outdated directory copy.
-Each time a directory is modified (a file or a subdirectory is created
-or deleted inside the directory) and one of the subvols is down, we
-increment the directory's version.
-
-lookup() is a call initiated by the kernel on a file or directory
-just before any access to that file or directory. In glusterfs, by
-default, lookup() will not be called in case it was called in the
-past one second on that particular file or directory.
-
-The extended attributes can be seen in the backend filesystem using
-the @command{getfattr} command. (@command{getfattr -n trusted.afr.version <file>})
-
-@cartouche
-@table @code
-@item debug [on|off] (off)
-@item self-heal [on|off] (on)
-@item replicate <pattern> (*:1)
-@item lock-node <child_volume> (first child is used by default)
-@end table
-@end cartouche
-
-@node Stripe
-@subsection Stripe
-@cindex stripe (translator)
-@example
-type cluster/stripe
-@end example
-
-The stripe translator distributes the contents of a file over its
-sub-volumes. It does this by creating a file equal in size to the
-total size of the file on each of its sub-volumes. It then writes only
-a part of the file to each sub-volume, leaving the rest of it empty.
-These empty regions are called `holes' in Unix terminology. The holes
-do not consume any disk space.
-
-The diagram below makes this clear.
-
-@center @image{stripe,44pc,,,.pdf}
-
-You can configure stripe so that only filenames matching a pattern
-are striped. You can also configure the size of the data to be stored
-on each sub-volume.
-
-@cartouche
-@table @code
-@item block-size <pattern>:<size> (*:0 no striping)
-Distribute files matching @command{<pattern>} over the sub-volumes,
-storing at least @command{<size>} on each sub-volume. For example,
-
-@example
- option block-size *.mpg:1M
-@end example
-
-distributes all files ending in @command{.mpg}, storing at least 1 MB on
-each sub-volume.
-
-Any number of @command{block-size} option lines may be present, specifying
-different sizes for different file name patterns.
-@end table
-@end cartouche
-
-@node Performance Translators
-@section Performance Translators
-
-@menu
-* Read Ahead::
-* Write Behind::
-* IO Threads::
-* IO Cache::
-* Booster::
-@end menu
-
-@node Read Ahead
-@subsection Read Ahead
-@cindex read-ahead (translator)
-@example
-type performance/read-ahead
-@end example
-
-The read-ahead translator pre-fetches data in advance on every read.
-This benefits applications that mostly process files in sequential order,
-since the next block of data will already be available by the time the
-application is done with the current one.
-
-Additionally, the read-ahead translator also behaves as a read-aggregator.
-Many small read operations are combined and issued as fewer, larger read
-requests to the server.
-
-Read-ahead deals in ``pages'' as the unit of data fetched. The page size
-is configurable, as is the ``page count'', which is the number of pages
-that are pre-fetched.
-
-Read-ahead is best used with InfiniBand (using the ib-verbs transport).
-On FastEthernet and Gigabit Ethernet networks,
-GlusterFS can achieve the link-maximum throughput even without
-read-ahead, making it quite superflous.
-
-Note that read-ahead only happens if the reads are perfectly
-sequential. If your application accesses data in a random fashion,
-using read-ahead might actually lead to a performance loss, since
-read-ahead will pointlessly fetch pages which won't be used by the
-application.
-
-@cartouche
-Options:
-@table @code
-@item page-size <n> (256KB)
-The unit of data that is pre-fetched.
-@item page-count <n> (2)
-The number of pages that are pre-fetched.
-@item force-atime-update [on|off|yes|no] (off|no)
-Whether to force an access time (atime) update on the file on every read. Without
-this, the atime will be slightly imprecise, as it will reflect the time when
-the read-ahead translator read the data, not when the application actually read it.
-@end table
-@end cartouche
-
-@node Write Behind
-@subsection Write Behind
-@cindex write-behind (translator)
-@example
-type performance/write-behind
-@end example
-
-The write-behind translator improves the latency of a write operation.
-It does this by relegating the write operation to the background and
-returning to the application even as the write is in progress. Using the
-write-behind translator, successive write requests can be pipelined.
-This mode of write-behind operation is best used on the client side, to
-enable decreased write latency for the application.
-
-The write-behind translator can also aggregate write requests. If the
-@command{aggregate-size} option is specified, then successive writes upto that
-size are accumulated and written in a single operation. This mode of operation
-is best used on the server side, as this will decrease the disk's head movement
-when multiple files are being written to in parallel.
-
-The @command{aggregate-size} option has a default value of 128KB. Although
-this works well for most users, you should always experiment with different values
-to determine the one that will deliver maximum performance. This is because the
-performance of write-behind depends on your interconnect, size of RAM, and the
-work load.
-
-@cartouche
-@table @code
-@item aggregate-size <n> (128KB)
-Amount of data to accumulate before doing a write
-@item flush-behind [on|yes|off|no] (off|no)
-
-@end table
-@end cartouche
-
-@node IO Threads
-@subsection IO Threads
-@cindex io-threads (translator)
-@example
-type performance/io-threads
-@end example
-
-The IO threads translator is intended to increase the responsiveness
-of the server to metadata operations by doing file I/O (read, write)
-in a background thread. Since the GlusterFS server is
-single-threaded, using the IO threads translator can significantly
-improve performance. This translator is best used on the server side,
-loaded just below the server protocol translator.
-
-IO threads operates by handing out read and write requests to a separate thread.
-The total number of threads in existence at a time is constant, and configurable.
-
-@cartouche
-@table @code
-@item thread-count <n> (1)
-Number of threads to use.
-@end table
-@end cartouche
-
-@node IO Cache
-@subsection IO Cache
-@cindex io-cache (translator)
-@example
-type performance/io-cache
-@end example
-
-The IO cache translator caches data that has been read. This is useful
-if many applications read the same data multiple times, and if reads
-are much more frequent than writes (for example, IO caching may be
-useful in a web hosting environment, where most clients will simply
-read some files and only a few will write to them).
-
-The IO cache translator reads data from its child in @command{page-size} chunks.
-It caches data upto @command{cache-size} bytes. The cache is maintained as
-a prioritized least-recently-used (@acronym{LRU}) list, with priorities determined
-by user-specified patterns to match filenames.
-
-When the IO cache translator detects a write operation, the
-cache for that file is flushed.
-
-The IO cache translator periodically verifies the consistency of
-cached data, using the modification times on the files. The verification timeout
-is configurable.
-
-@cartouche
-@table @code
-@item page-size <n> (128KB)
-Size of a page.
-@item cache-size (n) (32MB)
-Total amount of data to be cached.
-@item force-revalidate-timeout <n> (1)
-Timeout to force a cache consistency verification, in seconds.
-@item priority <pattern> (*:0)
-Filename patterns listed in order of priority.
-@end table
-@end cartouche
-
-@node Booster
-@subsection Booster
-@cindex booster
-@example
- type performance/booster
-@end example
-
-The booster translator gives applications a faster path to communicate
-read and write requests to GlusterFS. Normally, all requests to GlusterFS from
-applications go through FUSE, as indicated in @ref{Filesystems in Userspace}.
-Using the booster translator in conjunction with the GlusterFS booster shared
-library, an application can bypass the FUSE path and send read/write requests
-directly to the GlusterFS client process.
-
-The booster mechanism consists of two parts: the booster translator,
-and the booster shared library. The booster translator is meant to be
-loaded on the client side, usually at the root of the translator tree.
-The booster shared library should be @command{LD_PRELOAD}ed with the
-application.
-
-The booster translator when loaded opens a Unix domain socket and
-listens for read/write requests on it. The booster shared library
-intercepts read and write system calls and sends the requests to the
-GlusterFS process directly using the Unix domain socket, bypassing FUSE.
-This leads to superior performance.
-
-Once you've loaded the booster translator in your volume specification file, you
-can start your application as:
-
-@example
- $ LD_PRELOAD=/usr/local/bin/glusterfs-booster.so your_app
-@end example
-
-The booster translator accepts no options.
-
-@node Features Translators
-@section Features Translators
-
-@menu
-* POSIX Locks::
-* Fixed ID::
-@end menu
-
-@node POSIX Locks
-@subsection POSIX Locks
-@cindex record locking
-@cindex fcntl
-@cindex posix-locks (translator)
-@example
-type features/posix-locks
-@end example
-
-This translator provides storage independent POSIX record locking
-support (@command{fcntl} locking). Typically you'll want to load this on the
-server side, just above the @acronym{POSIX} storage translator. Using this
-translator you can get both advisory locking and mandatory locking
-support. It also handles @command{flock()} locks properly.
-
-Caveat: Consider a file that does not have its mandatory locking bits
-(+setgid, -group execution) turned on. Assume that this file is now
-opened by a process on a client that has the write-behind xlator
-loaded. The write-behind xlator does not cache anything for files
-which have mandatory locking enabled, to avoid incoherence. Let's say
-that mandatory locking is now enabled on this file through another
-client. The former client will not know about this change, and
-write-behind may erroneously report a write as being successful when
-in fact it would fail due to the region it is writing to being locked.
-
-There seems to be no easy way to fix this. To work around this
-problem, it is recommended that you never enable the mandatory bits on
-a file while it is open.
-
-@cartouche
-@table @code
-@item mandatory [on|off] (on)
-Turns mandatory locking on.
-@end table
-@end cartouche
-
-@node Fixed ID
-@subsection Fixed ID
-@cindex fixed-id (translator)
-@example
-type features/fixed-id
-@end example
-
-The fixed ID translator makes all filesystem requests from the client
-to appear to be coming from a fixed, specified
-@acronym{UID}/@acronym{GID}, regardless of which user actually
-initiated the request.
-
-@cartouche
-@table @code
-@item fixed-uid <n> [if not set, not used]
-The @acronym{UID} to send to the server
-@item fixed-gid <n> [if not set, not used]
-The @acronym{GID} to send to the server
-@end table
-@end cartouche
-
-@node Miscellaneous Translators
-@section Miscellaneous Translators
-
-@menu
-* ROT-13::
-* Trace::
-@end menu
-
-@node ROT-13
-@subsection ROT-13
-@cindex rot-13 (translator)
-@example
-type encryption/rot-13
-@end example
-
-@acronym{ROT-13} is a toy translator that can ``encrypt'' and ``decrypt'' file
-contents using the @acronym{ROT-13} algorithm. @acronym{ROT-13} is a trivial
-algorithm that rotates each alphabet by thirteen places. Thus, 'A' becomes 'N',
-'B' becomes 'O', and 'Z' becomes 'M'.
-
-It goes without saying that you shouldn't use this translator if you need
-@emph{real} encryption (a future release of GlusterFS will have real encryption
-translators).
-
-@cartouche
-@table @code
-@item encrypt-write [on|off] (on)
-Whether to encrypt on write
-@item decrypt-read [on|off] (on)
-Whether to decrypt on read
-@end table
-@end cartouche
-
-@node Trace
-@subsection Trace
-@cindex trace (translator)
-@example
-type debug/trace
-@end example
-
-The trace translator is intended for debugging purposes. When loaded, it
-logs all the system calls received by the server or client (wherever
-trace is loaded), their arguments, and the results. You must use a GlusterFS log
-level of DEBUG (See @ref{Running GlusterFS}) for trace to work.
-
-Sample trace output (lines have been wrapped for readability):
-@cartouche
-@example
-2007-10-30 00:08:58 D [trace.c:1579:trace_opendir] trace: callid: 68
-(*this=0x8059e40, loc=0x8091984 @{path=/iozone3_283, inode=0x8091f00@},
- fd=0x8091d50)
-
-2007-10-30 00:08:58 D [trace.c:630:trace_opendir_cbk] trace:
-(*this=0x8059e40, op_ret=4, op_errno=1, fd=0x8091d50)
-
-2007-10-30 00:08:58 D [trace.c:1602:trace_readdir] trace: callid: 69
-(*this=0x8059e40, size=4096, offset=0 fd=0x8091d50)
-
-2007-10-30 00:08:58 D [trace.c:215:trace_readdir_cbk] trace:
-(*this=0x8059e40, op_ret=0, op_errno=0, count=4)
-
-2007-10-30 00:08:58 D [trace.c:1624:trace_closedir] trace: callid: 71
-(*this=0x8059e40, *fd=0x8091d50)
-
-2007-10-30 00:08:58 D [trace.c:809:trace_closedir_cbk] trace:
-(*this=0x8059e40, op_ret=0, op_errno=1)
-@end example
-@end cartouche
-
-@node Usage Scenarios
-@chapter Usage Scenarios
-
-@section Advanced Striping
-
-This section is based on the Advanced Striping tutorial written by
-Anand Avati on the GlusterFS wiki
-@footnote{http://gluster.org/docs/index.php/Mixing_Striped_and_Regular_Files}.
-
-@subsection Mixed Storage Requirements
-
-There are two ways of scheduling the I/O. One at file level (using
-unify translator) and other at block level (using stripe
-translator). Striped I/O is good for files that are potentially large
-and require high parallel throughput (for example, a single file of
-400GB being accessed by 100s and 1000s of systems simultaneously and
-randomly). For most of the cases, file level scheduling works best.
-
-In the real world, it is desirable to mix file level and block level
-scheduling on a single storage volume. Alternatively users can choose
-to have two separate volumes and hence two mount points, but the
-applications may demand a single storage system to host both.
-
-This document explains how to mix file level scheduling with stripe.
-
-@subsection Configuration Brief
-
-This setup demonstrates how users can configure unify translator with
-appropriate I/O scheduler for file level scheduling and strip for only
-matching patterns. This way, GlusterFS chooses appropriate I/O profile
-and knows how to efficiently handle both the types of data.
-
-A simple technique to achieve this effect is to create a stripe set of
-unify and stripe blocks, where unify is the first sub-volume. Files
-that do not match the stripe policy passed on to first unify
-sub-volume and inturn scheduled arcoss the cluster using its file
-level I/O scheduler.
-
-@image{advanced-stripe,44pc,,,.pdf}
-
-@subsection Preparing GlusterFS Envoronment
-
-Create the directories /export/namespace, /export/unify and
-/export/stripe on all the storage bricks.
-
- Place the following server and client volume spec file under
-/etc/glusterfs (or appropriate installed path) and replace the IP
-addresses / access control fields to match your environment.
-
-@cartouche
-@example
- ## file: /etc/glusterfs/glusterfsd.vol
- volume posix-unify
- type storage/posix
- option directory /export/for-unify
- end-volume
-
- volume posix-stripe
- type storage/posix
- option directory /export/for-stripe
- end-volume
-
- volume posix-namespace
- type storage/posix
- option directory /export/for-namespace
- end-volume
-
- volume server
- type protocol/server
- option transport-type tcp
- option auth.addr.posix-unify.allow 192.168.1.*
- option auth.addr.posix-stripe.allow 192.168.1.*
- option auth.addr.posix-namespace.allow 192.168.1.*
- subvolumes posix-unify posix-stripe posix-namespace
- end-volume
-@end example
-@end cartouche
-
-@cartouche
-@example
- ## file: /etc/glusterfs/glusterfs.vol
- volume client-namespace
- type protocol/client
- option transport-type tcp
- option remote-host 192.168.1.1
- option remote-subvolume posix-namespace
- end-volume
-
- volume client-unify-1
- type protocol/client
- option transport-type tcp
- option remote-host 192.168.1.1
- option remote-subvolume posix-unify
- end-volume
-
- volume client-unify-2
- type protocol/client
- option transport-type tcp
- option remote-host 192.168.1.2
- option remote-subvolume posix-unify
- end-volume
-
- volume client-unify-3
- type protocol/client
- option transport-type tcp
- option remote-host 192.168.1.3
- option remote-subvolume posix-unify
- end-volume
-
- volume client-unify-4
- type protocol/client
- option transport-type tcp
- option remote-host 192.168.1.4
- option remote-subvolume posix-unify
- end-volume
-
- volume client-stripe-1
- type protocol/client
- option transport-type tcp
- option remote-host 192.168.1.1
- option remote-subvolume posix-stripe
- end-volume
-
- volume client-stripe-2
- type protocol/client
- option transport-type tcp
- option remote-host 192.168.1.2
- option remote-subvolume posix-stripe
- end-volume
-
- volume client-stripe-3
- type protocol/client
- option transport-type tcp
- option remote-host 192.168.1.3
- option remote-subvolume posix-stripe
- end-volume
-
- volume client-stripe-4
- type protocol/client
- option transport-type tcp
- option remote-host 192.168.1.4
- option remote-subvolume posix-stripe
- end-volume
-
- volume unify
- type cluster/unify
- option scheduler rr
- subvolumes cluster-unify-1 cluster-unify-2 cluster-unify-3 cluster-unify-4
- end-volume
-
- volume stripe
- type cluster/stripe
- option block-size *.img:2MB # All files ending with .img are striped with 2MB stripe block size.
- subvolumes unify cluster-stripe-1 cluster-stripe-2 cluster-stripe-3 cluster-stripe-4
- end-volume
-@end example
-@end cartouche
-
-
-Bring up the Storage
-
-Starting GlusterFS Server: If you have installed through binary
-package, you can start the service through init.d startup script. If
-not:
-
-@example
-[root@@server]# glusterfsd
-@end example
-
-Mounting GlusterFS Volumes:
-
-@example
-[root@@client]# glusterfs -s [BRICK-IP-ADDRESS] /mnt/cluster
-@end example
-
-Improving upon this Setup
-
-Infiniband Verbs RDMA transport is much faster than TCP/IP GigE
-transport.
-
-Use of performance translators such as read-ahead, write-behind,
-io-cache, io-threads, booster is recommended.
-
-Replace round-robin (rr) scheduler with ALU to handle more dynamic
-storage environments.
-
-@node Troubleshooting
-@chapter Troubleshooting
-
-This chapter is a general troubleshooting guide to GlusterFS. It lists
-common GlusterFS server and client error messages, debugging hints, and
-concludes with the suggested procedure to report bugs in GlusterFS.
-
-@section GlusterFS error messages
-
-@subsection Server errors
-
-@example
-glusterfsd: FATAL: could not open specfile:
-'/etc/glusterfs/glusterfsd.vol'
-@end example
-
-The GlusterFS server expects the volume specification file to be
-at @command{/etc/glusterfs/glusterfsd.vol}. The example
-specification file will be installed as
-@command{/etc/glusterfs/glusterfsd.vol.sample}. You need to edit
-it and rename it, or provide a different specification file using
-the @command{--spec-file} command line option (See @ref{Server}).
-
-@vskip 4ex
-
-@example
-gf_log_init: failed to open logfile "/usr/var/log/glusterfs/glusterfsd.log"
- (Permission denied)
-@end example
-
-You don't have permission to create files in the
-@command{/usr/var/log/glusterfs} directory. Make sure you are running
-GlusterFS as root. Alternatively, specify a different path for the log
-file using the @command{--log-file} option (See @ref{Server}).
-
-@subsection Client errors
-
-@example
-fusermount: failed to access mountpoint /mnt:
- Transport endpoint is not connected
-@end example
-
-A previous failed (or hung) mount of GlusterFS is preventing it from being
-mounted again in the same location. The fix is to do:
-
-@example
-# umount /mnt
-@end example
-
-and try mounting again.
-
-@vskip 4ex
-
-@strong{``Transport endpoint is not connected''.}
-
-If you get this error when you try a command such as @command{ls} or @command{cat},
-it means the GlusterFS mount did not succeed. Try running GlusterFS in @command{DEBUG}
-logging level and study the log messages to discover the cause.
-
-@vskip 4ex
-
-@strong{``Connect to server failed'', ``SERVER-ADDRESS: Connection refused''.}
-
-GluserFS Server is not running or dead. Check your network
-connections and firewall settings. To check if the server is reachable,
-try:
-
-@example
-telnet IP-ADDRESS 24007
-@end example
-
-If the server is accessible, your `telnet' command should connect and
-block. If not you will see an error message such as @command{telnet: Unable to
-connect to remote host: Connection refused}. 24007 is the default
-GlusterFS port. If you have changed it, then use the corresponding
-port instead.
-
-@vskip 4ex
-
-@example
-gf_log_init: failed to open logfile "/usr/var/log/glusterfs/glusterfs.log"
- (Permission denied)
-@end example
-
-You don't have permission to create files in the
-@command{/usr/var/log/glusterfs} directory. Make sure you are running
-GlusterFS as root. Alternatively, specify a different path for the log
-file using the @command{--log-file} option (See @ref{Client}).
-
-@section FUSE error messages
-@command{modprobe fuse} fails with: ``Unknown symbol in module, or unknown parameter''.
-@cindex Redhat Enterprise Linux
-
-If you are using fuse-2.6.x on Redhat Enterprise Linux Work Station 4
-and Advanced Server 4 with 2.6.9-42.ELlargesmp, 2.6.9-42.ELsmp,
-2.6.9-42.EL kernels and get this error while loading @acronym{FUSE} kernel
-module, you need to apply the following patch.
-
-For fuse-2.6.2:
-
-@indicateurl{http://ftp.gluster.com/pub/gluster/glusterfs/fuse/fuse-2.6.2-rhel-build.patch}
-
-For fuse-2.6.3:
-
-@indicateurl{http://ftp.gluster.com/pub/gluster/glusterfs/fuse/fuse-2.6.3-rhel-build.patch}
-
-@section AppArmour and GlusterFS
-@cindex AppArmour
-@cindex OpenSuSE
-Under OpenSuSE GNU/Linux, the AppArmour security feature does not
-allow GlusterFS to create temporary files or network socket
-connections even while running as root. You will see error messages
-like `Unable to open log file: Operation not permitted' or `Connection
-refused'. Disabling AppArmour using YaST or properly configuring
-AppArmour to recognize @command{glusterfsd} or @command{glusterfs}/@command{fusermount}
-should solve the problem.
-
-@section Reporting a bug
-
-If you encounter a bug in GlusterFS, please follow the below
-guidelines when you report it to the mailing list. Be sure to report
-it! User feedback is crucial to the health of the project and we value
-it highly.
-
-@subsection General instructions
-
-When running GlusterFS in a non-production environment, be sure to
-build it with the following command:
-
-@example
- $ make CFLAGS='-g -O0 -DDEBUG'
-@end example
-
-This includes debugging information which will be helpful in getting
-backtraces (see below) and also disable optimization. Enabling
-optimization can result in incorrect line numbers being reported to
-gdb.
-
-@subsection Volume specification files
-
-Attach all relevant server and client spec files you were using when
-you encountered the bug. Also tell us details of your setup, i.e., how
-many clients and how many servers.
-
-@subsection Log files
-
-Set the loglevel of your client and server programs to @acronym{DEBUG} (by
-passing the -L @acronym{DEBUG} option) and attach the log files with your bug
-report. Obviously, if only the client is failing (for example), you
-only need to send us the client log file.
-
-@subsection Backtrace
-
-If GlusterFS has encountered a segmentation fault or has crashed for
-some other reason, include the backtrace with the bug report. You can
-get the backtrace using the following procedure.
-
-Run the GlusterFS client or server inside gdb.
-
-@example
- $ gdb ./glusterfs
- (gdb) set args -f client.spec -N -l/path/to/log/file -LDEBUG /mnt/point
- (gdb) run
-@end example
-
-Now when the process segfaults, you can get the backtrace by typing:
-
-@example
- (gdb) bt
-@end example
-
-If the GlusterFS process has crashed and dumped a core file (you can
-find this in / if running as a daemon and in the current directory
-otherwise), you can do:
-
-@example
- $ gdb /path/to/glusterfs /path/to/core.<pid>
-@end example
-
-and then get the backtrace.
-
-If the GlusterFS server or client seems to be hung, then you can get
-the backtrace by attaching gdb to the process. First get the @command{PID} of
-the process (using ps), and then do:
-
-@example
- $ gdb ./glusterfs <pid>
-@end example
-
-Press Ctrl-C to interrupt the process and then generate the backtrace.
-
-@subsection Reproducing the bug
-
-If the bug is reproducible, please include the steps necessary to do
-so. If the bug is not reproducible, send us the bug report anyway.
-
-@subsection Other information
-
-If you think it is relevant, send us also the version of @acronym{FUSE} you're
-using, the kernel version, platform.
-
-@node GNU Free Documentation Licence
-@appendix GNU Free Documentation Licence
-@include fdl.texi
-
-@node Index
-@unnumbered Index
-@printindex cp
-
-@bye
diff --git a/doc/user-guide/legacy/xlator.odg b/doc/user-guide/legacy/xlator.odg
deleted file mode 100644
index 179a65f6e26..00000000000
--- a/doc/user-guide/legacy/xlator.odg
+++ /dev/null
Binary files differ
diff --git a/doc/user-guide/legacy/xlator.pdf b/doc/user-guide/legacy/xlator.pdf
deleted file mode 100644
index a07e14d67d2..00000000000
--- a/doc/user-guide/legacy/xlator.pdf
+++ /dev/null
Binary files differ
diff --git a/events/Makefile.am b/events/Makefile.am
new file mode 100644
index 00000000000..264bb742a80
--- /dev/null
+++ b/events/Makefile.am
@@ -0,0 +1,8 @@
+SUBDIRS = src tools
+EXTRA_DIST = eventskeygen.py
+noinst_PYTHON = eventskeygen.py
+
+if BUILD_EVENTS
+install-data-hook:
+ $(INSTALL) -d -m 755 $(DESTDIR)@GLUSTERD_WORKDIR@/events
+endif
diff --git a/events/eventskeygen.py b/events/eventskeygen.py
new file mode 100644
index 00000000000..e28ebe9b7e6
--- /dev/null
+++ b/events/eventskeygen.py
@@ -0,0 +1,243 @@
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+import os
+import sys
+
+GLUSTER_SRC_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+eventtypes_h = os.path.join(GLUSTER_SRC_ROOT, "libglusterfs/src/eventtypes.h")
+eventtypes_py = os.path.join(GLUSTER_SRC_ROOT, "events/src/eventtypes.py")
+
+gen_header_type = sys.argv[1]
+
+# When adding new keys add it to the END
+keys = (
+ # user driven events
+ #peer and volume management events
+ "EVENT_PEER_ATTACH",
+ "EVENT_PEER_DETACH",
+ "EVENT_VOLUME_CREATE",
+ "EVENT_VOLUME_START",
+ "EVENT_VOLUME_STOP",
+ "EVENT_VOLUME_DELETE",
+ "EVENT_VOLUME_SET",
+ "EVENT_VOLUME_RESET",
+ "EVENT_BRICK_RESET_START",
+ "EVENT_BRICK_RESET_COMMIT",
+ "EVENT_BRICK_REPLACE",
+
+ #geo-rep events
+ "EVENT_GEOREP_CREATE",
+ "EVENT_GEOREP_START",
+ "EVENT_GEOREP_STOP",
+ "EVENT_GEOREP_PAUSE",
+ "EVENT_GEOREP_RESUME",
+ "EVENT_GEOREP_DELETE",
+ "EVENT_GEOREP_CONFIG_SET",
+ "EVENT_GEOREP_CONFIG_RESET",
+
+ #bitrot events
+ "EVENT_BITROT_ENABLE",
+ "EVENT_BITROT_DISABLE",
+ "EVENT_BITROT_SCRUB_THROTTLE",
+ "EVENT_BITROT_SCRUB_FREQ",
+ "EVENT_BITROT_SCRUB_OPTION",
+ "EVENT_BITROT_SCRUB_ONDEMAND",
+
+ #quota events
+ "EVENT_QUOTA_ENABLE",
+ "EVENT_QUOTA_DISABLE",
+ "EVENT_QUOTA_SET_USAGE_LIMIT",
+ "EVENT_QUOTA_SET_OBJECTS_LIMIT",
+ "EVENT_QUOTA_REMOVE_USAGE_LIMIT",
+ "EVENT_QUOTA_REMOVE_OBJECTS_LIMIT",
+ "EVENT_QUOTA_ALERT_TIME",
+ "EVENT_QUOTA_SOFT_TIMEOUT",
+ "EVENT_QUOTA_HARD_TIMEOUT",
+ "EVENT_QUOTA_DEFAULT_SOFT_LIMIT",
+
+ #snapshot events
+ "EVENT_SNAPSHOT_CREATED",
+ "EVENT_SNAPSHOT_CREATE_FAILED",
+ "EVENT_SNAPSHOT_ACTIVATED",
+ "EVENT_SNAPSHOT_ACTIVATE_FAILED",
+ "EVENT_SNAPSHOT_DEACTIVATED",
+ "EVENT_SNAPSHOT_DEACTIVATE_FAILED",
+ "EVENT_SNAPSHOT_SOFT_LIMIT_REACHED",
+ "EVENT_SNAPSHOT_HARD_LIMIT_REACHED",
+ "EVENT_SNAPSHOT_RESTORED",
+ "EVENT_SNAPSHOT_RESTORE_FAILED",
+ "EVENT_SNAPSHOT_DELETED",
+ "EVENT_SNAPSHOT_DELETE_FAILED",
+ "EVENT_SNAPSHOT_CLONED",
+ "EVENT_SNAPSHOT_CLONE_FAILED",
+ "EVENT_SNAPSHOT_CONFIG_UPDATED",
+ "EVENT_SNAPSHOT_CONFIG_UPDATE_FAILED",
+ "EVENT_SNAPSHOT_SCHEDULER_INITIALISED",
+ "EVENT_SNAPSHOT_SCHEDULER_INIT_FAILED",
+ "EVENT_SNAPSHOT_SCHEDULER_ENABLED",
+ "EVENT_SNAPSHOT_SCHEDULER_ENABLE_FAILED",
+ "EVENT_SNAPSHOT_SCHEDULER_DISABLED",
+ "EVENT_SNAPSHOT_SCHEDULER_DISABLE_FAILED",
+ "EVENT_SNAPSHOT_SCHEDULER_SCHEDULE_ADDED",
+ "EVENT_SNAPSHOT_SCHEDULER_SCHEDULE_ADD_FAILED",
+ "EVENT_SNAPSHOT_SCHEDULER_SCHEDULE_EDITED",
+ "EVENT_SNAPSHOT_SCHEDULER_SCHEDULE_EDIT_FAILED",
+ "EVENT_SNAPSHOT_SCHEDULER_SCHEDULE_DELETED",
+ "EVENT_SNAPSHOT_SCHEDULER_SCHEDULE_DELETE_FAILED",
+
+ #async events
+ #glusterd events
+ "EVENT_SVC_MANAGER_FAILED",
+ "EVENT_SVC_RECONFIGURE_FAILED",
+ "EVENT_SVC_CONNECTED",
+ "EVENT_SVC_DISCONNECTED",
+ "EVENT_PEER_STORE_FAILURE",
+ "EVENT_PEER_RPC_CREATE_FAILED",
+ "EVENT_PEER_REJECT",
+ "EVENT_PEER_CONNECT",
+ "EVENT_PEER_DISCONNECT",
+ "EVENT_PEER_NOT_FOUND",
+ "EVENT_UNKNOWN_PEER",
+ "EVENT_BRICK_START_FAILED",
+ "EVENT_BRICK_STOP_FAILED",
+ "EVENT_BRICK_DISCONNECTED",
+ "EVENT_BRICK_CONNECTED",
+ "EVENT_BRICKPATH_RESOLVE_FAILED",
+ "EVENT_NOTIFY_UNKNOWN_OP",
+ "EVENT_QUORUM_LOST",
+ "EVENT_QUORUM_REGAINED",
+ "EVENT_REBALANCE_START_FAILED",
+ "EVENT_REBALANCE_STATUS_UPDATE_FAILED",
+ "EVENT_IMPORT_QUOTA_CONF_FAILED",
+ "EVENT_IMPORT_VOLUME_FAILED",
+ "EVENT_IMPORT_BRICK_FAILED",
+ "EVENT_COMPARE_FRIEND_VOLUME_FAILED",
+ "EVENT_NFS_GANESHA_EXPORT_FAILED",
+ #ec events
+ "EVENT_EC_MIN_BRICKS_NOT_UP",
+ "EVENT_EC_MIN_BRICKS_UP",
+ #georep async events
+ "EVENT_GEOREP_FAULTY",
+ "EVENT_GEOREP_CHECKPOINT_COMPLETED",
+ "EVENT_GEOREP_ACTIVE",
+ "EVENT_GEOREP_PASSIVE",
+
+ #quota async events
+ "EVENT_QUOTA_CROSSED_SOFT_LIMIT",
+ #bitrot async events
+ "EVENT_BITROT_BAD_FILE",
+ #protocol-server events
+ "EVENT_CLIENT_CONNECT",
+ "EVENT_CLIENT_AUTH_REJECT",
+ "EVENT_CLIENT_DISCONNECT",
+ #posix events
+ "EVENT_POSIX_SAME_GFID",
+ "EVENT_POSIX_ALREADY_PART_OF_VOLUME",
+ "EVENT_POSIX_BRICK_NOT_IN_VOLUME",
+ "EVENT_POSIX_BRICK_VERIFICATION_FAILED",
+ "EVENT_POSIX_ACL_NOT_SUPPORTED",
+ "EVENT_POSIX_HEALTH_CHECK_FAILED",
+ #afr events
+ "EVENT_AFR_QUORUM_MET",
+ "EVENT_AFR_QUORUM_FAIL",
+ "EVENT_AFR_SUBVOL_UP",
+ "EVENT_AFR_SUBVOLS_DOWN",
+ "EVENT_AFR_SPLIT_BRAIN",
+
+ #tier events
+ "EVENT_TIER_ATTACH",
+ "EVENT_TIER_ATTACH_FORCE",
+ "EVENT_TIER_DETACH_START",
+ "EVENT_TIER_DETACH_STOP",
+ "EVENT_TIER_DETACH_COMMIT",
+ "EVENT_TIER_DETACH_FORCE",
+ "EVENT_TIER_PAUSE",
+ "EVENT_TIER_RESUME",
+ "EVENT_TIER_WATERMARK_HI",
+ "EVENT_TIER_WATERMARK_DROPPED_TO_MID",
+ "EVENT_TIER_WATERMARK_RAISED_TO_MID",
+ "EVENT_TIER_WATERMARK_DROPPED_TO_LOW",
+
+ #dht events
+ #add/remove brick events
+ "EVENT_VOLUME_ADD_BRICK",
+ "EVENT_VOLUME_ADD_BRICK_FAILED",
+ "EVENT_VOLUME_REMOVE_BRICK_START",
+ "EVENT_VOLUME_REMOVE_BRICK_START_FAILED",
+ "EVENT_VOLUME_REMOVE_BRICK_COMMIT",
+ "EVENT_VOLUME_REMOVE_BRICK_COMMIT_FAILED",
+ "EVENT_VOLUME_REMOVE_BRICK_STOP",
+ "EVENT_VOLUME_REMOVE_BRICK_STOP_FAILED",
+ "EVENT_VOLUME_REMOVE_BRICK_FORCE",
+ "EVENT_VOLUME_REMOVE_BRICK_FORCE_FAILED",
+ "EVENT_VOLUME_REMOVE_BRICK_FAILED",
+
+ #rebalance events
+ "EVENT_VOLUME_REBALANCE_START",
+ "EVENT_VOLUME_REBALANCE_STOP",
+ "EVENT_VOLUME_REBALANCE_FAILED",
+ "EVENT_VOLUME_REBALANCE_COMPLETE",
+
+ #tier events
+ "EVENT_TIER_START",
+ "EVENT_TIER_START_FORCE",
+
+ #brick/inodes events
+ "EVENT_DHT_DISK_USAGE",
+ "EVENT_DHT_INODES_USAGE",
+)
+
+LAST_EVENT = "EVENT_LAST"
+
+ERRORS = (
+ "EVENT_SEND_OK",
+ "EVENT_ERROR_INVALID_INPUTS",
+ "EVENT_ERROR_SOCKET",
+ "EVENT_ERROR_CONNECT",
+ "EVENT_ERROR_SEND",
+ "EVENT_ERROR_RESOLVE",
+ "EVENT_ERROR_MSG_FORMAT",
+)
+
+if gen_header_type == "C_HEADER":
+ # Generate eventtypes.h
+ with open(eventtypes_h, "w") as f:
+ f.write("#ifndef __EVENTTYPES_H__\n")
+ f.write("#define __EVENTTYPES_H__\n\n")
+ f.write("typedef enum {\n")
+ for k in ERRORS:
+ f.write(" {0},\n".format(k))
+ f.write("} event_errors_t;\n")
+
+ f.write("\n")
+
+ f.write("typedef enum {\n")
+ for k in keys:
+ f.write(" {0},\n".format(k))
+
+ f.write(" {0}\n".format(LAST_EVENT))
+ f.write("} eventtypes_t;\n")
+ f.write("\n#endif /* __EVENTTYPES_H__ */\n")
+
+if gen_header_type == "PY_HEADER":
+ # Generate eventtypes.py
+ with open(eventtypes_py, "w") as f:
+ f.write("# -*- coding: utf-8 -*-\n")
+ f.write("all_events = [\n")
+ for ev in keys:
+ f.write(' "{0}",\n'.format(ev))
+
+ f.write("]\n\n")
+
+ for idx, ev in enumerate(keys):
+ f.write("{0} = {1}\n".format(ev.replace("EVENT_", ""), idx))
diff --git a/events/src/Makefile.am b/events/src/Makefile.am
new file mode 100644
index 00000000000..3b229691897
--- /dev/null
+++ b/events/src/Makefile.am
@@ -0,0 +1,41 @@
+noinst_PYTHON = $(top_srcdir)/events/eventskeygen.py
+EXTRA_DIST = glustereventsd.py __init__.py eventsapiconf.py.in \
+ handlers.py utils.py peer_eventsapi.py eventsconfig.json gf_event.py
+
+BUILT_SOURCES = eventtypes.py
+CLEANFILES = eventtypes.py
+
+eventsdir = $(GLUSTERFS_LIBEXECDIR)/gfevents
+if BUILD_EVENTS
+events_PYTHON = __init__.py gf_event.py eventsapiconf.py eventtypes.py \
+ utils.py
+endif
+# this does not work, see the Makefile.am in the root for a workaround
+#nodist_events_PYTHON = eventtypes.py
+
+eventtypes.py: $(top_srcdir)/events/eventskeygen.py
+ $(PYTHON) $(top_srcdir)/events/eventskeygen.py PY_HEADER
+
+if BUILD_EVENTS
+eventspeerscriptdir = $(GLUSTERFS_LIBEXECDIR)
+eventsconfdir = $(sysconfdir)/glusterfs
+eventsconf_DATA = eventsconfig.json
+
+events_PYTHON += handlers.py
+events_SCRIPTS = glustereventsd.py
+eventspeerscript_SCRIPTS = peer_eventsapi.py
+
+install-exec-hook:
+ $(mkdir_p) $(DESTDIR)$(sbindir)
+ rm -f $(DESTDIR)$(sbindir)/glustereventsd
+ ln -s $(GLUSTERFS_LIBEXECDIR)/gfevents/glustereventsd.py \
+ $(DESTDIR)$(sbindir)/glustereventsd
+ rm -f $(DESTDIR)$(sbindir)/gluster-eventsapi
+ ln -s $(GLUSTERFS_LIBEXECDIR)/peer_eventsapi.py \
+ $(DESTDIR)$(sbindir)/gluster-eventsapi
+
+uninstall-hook:
+ rm -f $(DESTDIR)$(sbindir)/glustereventsd
+ rm -f $(DESTDIR)$(sbindir)/gluster-eventsapi
+
+endif
diff --git a/events/src/__init__.py b/events/src/__init__.py
new file mode 100644
index 00000000000..f27c53a4df4
--- /dev/null
+++ b/events/src/__init__.py
@@ -0,0 +1,10 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
diff --git a/events/src/eventsapiconf.py.in b/events/src/eventsapiconf.py.in
new file mode 100644
index 00000000000..700093bee60
--- /dev/null
+++ b/events/src/eventsapiconf.py.in
@@ -0,0 +1,59 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+import subprocess
+glusterd_workdir = None
+
+# Methods
+def get_glusterd_workdir():
+ global glusterd_workdir
+ if glusterd_workdir is not None:
+ return glusterd_workdir
+ proc = subprocess.Popen(["gluster", "system::", "getwd"],
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+ universal_newlines = True)
+ out, err = proc.communicate()
+ if proc.returncode == 0:
+ glusterd_workdir = out.strip()
+ else:
+ glusterd_workdir = "@GLUSTERD_WORKDIR@"
+ return glusterd_workdir
+
+SERVER_ADDRESS = "0.0.0.0"
+SERVER_ADDRESSv4 = "0.0.0.0"
+SERVER_ADDRESSv6 = "::1"
+DEFAULT_CONFIG_FILE = "@SYSCONF_DIR@/glusterfs/eventsconfig.json"
+CUSTOM_CONFIG_FILE_TO_SYNC = "/events/config.json"
+CUSTOM_CONFIG_FILE = get_glusterd_workdir() + CUSTOM_CONFIG_FILE_TO_SYNC
+WEBHOOKS_FILE_TO_SYNC = "/events/webhooks.json"
+WEBHOOKS_FILE = get_glusterd_workdir() + WEBHOOKS_FILE_TO_SYNC
+LOG_FILE = "@localstatedir@/log/glusterfs/events.log"
+EVENTSD = "glustereventsd"
+CONFIG_KEYS = ["log-level", "port", "disable-events-log"]
+BOOL_CONFIGS = ["disable-events-log"]
+INT_CONFIGS = ["port"]
+RESTART_CONFIGS = ["port"]
+EVENTS_ENABLED = @EVENTS_ENABLED@
+UUID_FILE = get_glusterd_workdir() + "/glusterd.info"
+PID_FILE = "@localstatedir@/run/glustereventsd.pid"
+AUTO_BOOL_ATTRIBUTES = ["force", "push-pem", "no-verify"]
+AUTO_INT_ATTRIBUTES = ["ssh-port"]
+CERTS_DIR = get_glusterd_workdir() + "/events"
+
+# Errors
+ERROR_SAME_CONFIG = 2
+ERROR_ALL_NODES_STATUS_NOT_OK = 3
+ERROR_PARTIAL_SUCCESS = 4
+ERROR_WEBHOOK_ALREADY_EXISTS = 5
+ERROR_WEBHOOK_NOT_EXISTS = 6
+ERROR_INVALID_CONFIG = 7
+ERROR_WEBHOOK_SYNC_FAILED = 8
+ERROR_CONFIG_SYNC_FAILED = 9
diff --git a/events/src/eventsconfig.json b/events/src/eventsconfig.json
new file mode 100644
index 00000000000..89e5b9c1d68
--- /dev/null
+++ b/events/src/eventsconfig.json
@@ -0,0 +1,5 @@
+{
+ "log-level": "INFO",
+ "port": 24009,
+ "disable-events-log": false
+}
diff --git a/events/src/gf_event.py b/events/src/gf_event.py
new file mode 100644
index 00000000000..260b0d9aa48
--- /dev/null
+++ b/events/src/gf_event.py
@@ -0,0 +1,60 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+import socket
+import time
+
+from gfevents.eventsapiconf import SERVER_ADDRESS, EVENTS_ENABLED
+from gfevents.eventtypes import all_events
+
+from gfevents.utils import logger, setup_logger, get_config
+
+# Run this when this lib loads
+setup_logger()
+
+
+def gf_event(event_type, **kwargs):
+ if EVENTS_ENABLED == 0:
+ return
+
+ if not isinstance(event_type, int) or event_type >= len(all_events):
+ logger.error("Invalid Event Type: {0}".format(event_type))
+ return
+
+ try:
+ client = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
+ except socket.error as e:
+ logger.error("Unable to connect to events Server: {0}".format(e))
+ return
+
+ port = get_config("port")
+ if port is None:
+ logger.error("Unable to get eventsd port details")
+ return
+
+ # Convert key value args into KEY1=VALUE1;KEY2=VALUE2;..
+ msg = ""
+ for k, v in kwargs.items():
+ msg += "{0}={1};".format(k, v)
+
+ # <TIMESTAMP> <EVENT_TYPE> <MSG>
+ msg = "{0} {1} {2}".format(int(time.time()), event_type, msg.strip(";")).encode()
+
+ try:
+ sent = client.sendto(msg, (SERVER_ADDRESS, port))
+ assert sent == len(msg)
+ except socket.error as e:
+ logger.error("Unable to Send message: {0}".format(e))
+ except AssertionError:
+ logger.error("Unable to send message. Sent: {0}, Actual: {1}".format(
+ sent, len(msg)))
+ finally:
+ client.close()
diff --git a/events/src/glustereventsd.py b/events/src/glustereventsd.py
new file mode 100644
index 00000000000..341a3b60947
--- /dev/null
+++ b/events/src/glustereventsd.py
@@ -0,0 +1,159 @@
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+from __future__ import print_function
+import sys
+import signal
+import threading
+try:
+ import socketserver
+except ImportError:
+ import SocketServer as socketserver
+import socket
+from argparse import ArgumentParser, RawDescriptionHelpFormatter
+
+from eventtypes import all_events
+import handlers
+import utils
+from eventsapiconf import SERVER_ADDRESSv4, SERVER_ADDRESSv6, PID_FILE
+from eventsapiconf import AUTO_BOOL_ATTRIBUTES, AUTO_INT_ATTRIBUTES
+from utils import logger, PidFile, PidFileLockFailed, boolify
+
+# Subclass so that specifically IPv4 packets are captured
+class UDPServerv4(socketserver.ThreadingUDPServer):
+ address_family = socket.AF_INET
+
+# Subclass so that specifically IPv6 packets are captured
+class UDPServerv6(socketserver.ThreadingUDPServer):
+ address_family = socket.AF_INET6
+
+class GlusterEventsRequestHandler(socketserver.BaseRequestHandler):
+
+ def handle(self):
+ data = self.request[0].strip()
+ if sys.version_info >= (3,):
+ data = self.request[0].strip().decode("utf-8")
+
+ logger.debug("EVENT: {0} from {1}".format(repr(data),
+ self.client_address[0]))
+ try:
+ # Event Format <TIMESTAMP> <TYPE> <DETAIL>
+ ts, key, value = data.split(" ", 2)
+ except ValueError:
+ logger.warn("Invalid Event Format {0}".format(data))
+ return
+
+ data_dict = {}
+ try:
+ # Format key=value;key=value
+ data_dict = dict(x.split('=') for x in value.split(';'))
+ except ValueError:
+ logger.warn("Unable to parse Event {0}".format(data))
+ return
+
+ for k, v in data_dict.items():
+ try:
+ if k in AUTO_BOOL_ATTRIBUTES:
+ data_dict[k] = boolify(v)
+ if k in AUTO_INT_ATTRIBUTES:
+ data_dict[k] = int(v)
+ except ValueError:
+ # Auto Conversion failed, Retain the old value
+ continue
+
+ try:
+ # Event Type to Function Map, Received event data will be in
+ # the form <TIMESTAMP> <TYPE> <DETAIL>, Get Event name for the
+ # received Type/Key and construct a function name starting with
+ # handle_ For example: handle_event_volume_create
+ func_name = "handle_" + all_events[int(key)].lower()
+ except IndexError:
+ # This type of Event is not handled?
+ logger.warn("Unhandled Event: {0}".format(key))
+ func_name = None
+
+ if func_name is not None:
+ # Get function from handlers module
+ func = getattr(handlers, func_name, None)
+ # If func is None, then handler unimplemented for that event.
+ if func is not None:
+ func(ts, int(key), data_dict)
+ else:
+ # Generic handler, broadcast whatever received
+ handlers.generic_handler(ts, int(key), data_dict)
+
+
+def signal_handler_sigusr2(sig, frame):
+ utils.load_all()
+ utils.restart_webhook_pool()
+
+
+def UDP_server_thread(sock):
+ sock.serve_forever()
+
+
+def init_event_server():
+ utils.setup_logger()
+ utils.load_all()
+ utils.init_webhook_pool()
+
+ port = utils.get_config("port")
+ if port is None:
+ sys.stderr.write("Unable to get Port details from Config\n")
+ sys.exit(1)
+
+ # Creating the Eventing Server, UDP Server for IPv4 packets
+ try:
+ serverv4 = UDPServerv4((SERVER_ADDRESSv4, port),
+ GlusterEventsRequestHandler)
+ except socket.error as e:
+ sys.stderr.write("Failed to start Eventsd for IPv4: {0}\n".format(e))
+ sys.exit(1)
+ # Creating the Eventing Server, UDP Server for IPv6 packets
+ try:
+ serverv6 = UDPServerv6((SERVER_ADDRESSv6, port),
+ GlusterEventsRequestHandler)
+ except socket.error as e:
+ sys.stderr.write("Failed to start Eventsd for IPv6: {0}\n".format(e))
+ sys.exit(1)
+ server_thread1 = threading.Thread(target=UDP_server_thread,
+ args=(serverv4,))
+ server_thread2 = threading.Thread(target=UDP_server_thread,
+ args=(serverv6,))
+ server_thread1.start()
+ server_thread2.start()
+
+
+def get_args():
+ parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter,
+ description=__doc__)
+ parser.add_argument("-p", "--pid-file", help="PID File",
+ default=PID_FILE)
+
+ return parser.parse_args()
+
+
+def main():
+ args = get_args()
+ try:
+ with PidFile(args.pid_file):
+ init_event_server()
+ except PidFileLockFailed as e:
+ sys.stderr.write("Failed to get lock for pid file({0}): {1}".format(
+ args.pid_file, e))
+ except KeyboardInterrupt:
+ sys.exit(1)
+
+
+if __name__ == "__main__":
+ signal.signal(signal.SIGUSR2, signal_handler_sigusr2)
+ main()
diff --git a/events/src/handlers.py b/events/src/handlers.py
new file mode 100644
index 00000000000..7746d488bf3
--- /dev/null
+++ b/events/src/handlers.py
@@ -0,0 +1,40 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+import utils
+
+
+def generic_handler(ts, key, data):
+ """
+ Generic handler to broadcast message to all peers, custom handlers
+ can be created by func name handler_<event_name>
+ Ex: handle_event_volume_create(ts, key, data)
+ """
+ utils.publish(ts, key, data)
+
+
+def handle_event_volume_set(ts, key, data):
+ """
+ Received data will have all the options as one string, split into
+ list of options. "key1,value1,key2,value2" into
+ [[key1, value1], [key2, value2]]
+ """
+ opts = data.get("options", "").strip(",").split(",")
+ data["options"] = []
+ for i, opt in enumerate(opts):
+ if i % 2 == 0:
+ # Add new array with key
+ data["options"].append([opt])
+ else:
+ # Add to the last added array
+ data["options"][-1].append(opt)
+
+ utils.publish(ts, key, data)
diff --git a/events/src/peer_eventsapi.py b/events/src/peer_eventsapi.py
new file mode 100644
index 00000000000..4d2e5f35b1c
--- /dev/null
+++ b/events/src/peer_eventsapi.py
@@ -0,0 +1,669 @@
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+from __future__ import print_function
+import os
+import json
+from errno import EEXIST
+import fcntl
+from errno import EACCES, EAGAIN
+import signal
+import sys
+import time
+
+import requests
+from prettytable import PrettyTable
+
+from gluster.cliutils import (Cmd, node_output_ok, node_output_notok,
+ sync_file_to_peers, GlusterCmdException,
+ output_error, execute_in_peers, runcli,
+ set_common_args_func)
+from gfevents.utils import LockedOpen, get_jwt_token, save_https_cert
+
+from gfevents.eventsapiconf import (WEBHOOKS_FILE_TO_SYNC,
+ WEBHOOKS_FILE,
+ DEFAULT_CONFIG_FILE,
+ CUSTOM_CONFIG_FILE,
+ CUSTOM_CONFIG_FILE_TO_SYNC,
+ EVENTSD,
+ CONFIG_KEYS,
+ BOOL_CONFIGS,
+ INT_CONFIGS,
+ PID_FILE,
+ RESTART_CONFIGS,
+ ERROR_INVALID_CONFIG,
+ ERROR_WEBHOOK_NOT_EXISTS,
+ ERROR_CONFIG_SYNC_FAILED,
+ ERROR_WEBHOOK_ALREADY_EXISTS,
+ ERROR_PARTIAL_SUCCESS,
+ ERROR_ALL_NODES_STATUS_NOT_OK,
+ ERROR_SAME_CONFIG,
+ ERROR_WEBHOOK_SYNC_FAILED,
+ CERTS_DIR)
+
+
+def handle_output_error(err, errcode=1, json_output=False):
+ if json_output:
+ print (json.dumps({
+ "output": "",
+ "error": err
+ }))
+ sys.exit(errcode)
+ else:
+ output_error(err, errcode)
+
+
+def file_content_overwrite(fname, data):
+ with open(fname + ".tmp", "w") as f:
+ f.write(json.dumps(data))
+
+ os.rename(fname + ".tmp", fname)
+
+
+def create_custom_config_file_if_not_exists(args):
+ try:
+ config_dir = os.path.dirname(CUSTOM_CONFIG_FILE)
+ mkdirp(config_dir)
+ except OSError as e:
+ handle_output_error("Failed to create dir %s: %s" % (config_dir, e),
+ json_output=args.json)
+
+ if not os.path.exists(CUSTOM_CONFIG_FILE):
+ with open(CUSTOM_CONFIG_FILE, "w") as f:
+ f.write("{}")
+
+
+def create_webhooks_file_if_not_exists(args):
+ try:
+ webhooks_dir = os.path.dirname(WEBHOOKS_FILE)
+ mkdirp(webhooks_dir)
+ except OSError as e:
+ handle_output_error("Failed to create dir %s: %s" % (webhooks_dir, e),
+ json_output=args.json)
+
+ if not os.path.exists(WEBHOOKS_FILE):
+ with open(WEBHOOKS_FILE, "w") as f:
+ f.write("{}")
+
+
+def boolify(value):
+ val = False
+ if value.lower() in ["enabled", "true", "on", "yes"]:
+ val = True
+ return val
+
+
+def mkdirp(path, exit_on_err=False, logger=None):
+ """
+ Try creating required directory structure
+ ignore EEXIST and raise exception for rest of the errors.
+ Print error in stderr and exit
+ """
+ try:
+ os.makedirs(path)
+ except OSError as e:
+ if e.errno != EEXIST or not os.path.isdir(path):
+ raise
+
+
+def is_active():
+ state = False
+ try:
+ with open(PID_FILE, "a+") as f:
+ fcntl.flock(f.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
+ state = False
+ except (IOError, OSError) as e:
+ if e.errno in (EACCES, EAGAIN):
+ # cannot grab. so, process still running..move on
+ state = True
+ else:
+ state = False
+ return state
+
+
+def reload_service():
+ pid = None
+ if is_active():
+ with open(PID_FILE) as f:
+ try:
+ pid = int(f.read().strip())
+ except ValueError:
+ pid = None
+ if pid is not None:
+ os.kill(pid, signal.SIGUSR2)
+
+ return (0, "", "")
+
+
+def rows_to_json(json_out, column_name, rows):
+ num_ok_rows = 0
+ for row in rows:
+ num_ok_rows += 1 if row.ok else 0
+ json_out.append({
+ "node": row.hostname,
+ "node_status": "UP" if row.node_up else "DOWN",
+ column_name: "OK" if row.ok else "NOT OK",
+ "error": row.error
+ })
+ return num_ok_rows
+
+
+def rows_to_table(table, rows):
+ num_ok_rows = 0
+ for row in rows:
+ num_ok_rows += 1 if row.ok else 0
+ table.add_row([row.hostname,
+ "UP" if row.node_up else "DOWN",
+ "OK" if row.ok else "NOT OK: {0}".format(
+ row.error)])
+ return num_ok_rows
+
+
+def sync_to_peers(args):
+ if os.path.exists(WEBHOOKS_FILE):
+ try:
+ sync_file_to_peers(WEBHOOKS_FILE_TO_SYNC)
+ except GlusterCmdException as e:
+ # Print stdout if stderr is empty
+ errmsg = e.message[2] if e.message[2] else e.message[1]
+ handle_output_error("Failed to sync Webhooks file: [Error: {0}]"
+ "{1}".format(e.message[0], errmsg),
+ errcode=ERROR_WEBHOOK_SYNC_FAILED,
+ json_output=args.json)
+
+ if os.path.exists(CUSTOM_CONFIG_FILE):
+ try:
+ sync_file_to_peers(CUSTOM_CONFIG_FILE_TO_SYNC)
+ except GlusterCmdException as e:
+ # Print stdout if stderr is empty
+ errmsg = e.message[2] if e.message[2] else e.message[1]
+ handle_output_error("Failed to sync Config file: [Error: {0}]"
+ "{1}".format(e.message[0], errmsg),
+ errcode=ERROR_CONFIG_SYNC_FAILED,
+ json_output=args.json)
+
+ out = execute_in_peers("node-reload")
+ if not args.json:
+ table = PrettyTable(["NODE", "NODE STATUS", "SYNC STATUS"])
+ table.align["NODE STATUS"] = "r"
+ table.align["SYNC STATUS"] = "r"
+
+ json_out = []
+ if args.json:
+ num_ok_rows = rows_to_json(json_out, "sync_status", out)
+ else:
+ num_ok_rows = rows_to_table(table, out)
+
+ ret = 0
+ if num_ok_rows == 0:
+ ret = ERROR_ALL_NODES_STATUS_NOT_OK
+ elif num_ok_rows != len(out):
+ ret = ERROR_PARTIAL_SUCCESS
+
+ if args.json:
+ print (json.dumps({
+ "output": json_out,
+ "error": ""
+ }))
+ else:
+ print (table)
+
+ # If sync status is not ok for any node set error code as partial success
+ sys.exit(ret)
+
+
+def node_output_handle(resp):
+ rc, out, err = resp
+ if rc == 0:
+ node_output_ok(out)
+ else:
+ node_output_notok(err)
+
+
+def action_handle(action, json_output=False):
+ out = execute_in_peers("node-" + action)
+ column_name = action.upper()
+ if action == "status":
+ column_name = EVENTSD.upper()
+
+ if not json_output:
+ table = PrettyTable(["NODE", "NODE STATUS", column_name + " STATUS"])
+ table.align["NODE STATUS"] = "r"
+ table.align[column_name + " STATUS"] = "r"
+
+ json_out = []
+ if json_output:
+ rows_to_json(json_out, column_name.lower() + "_status", out)
+ else:
+ rows_to_table(table, out)
+
+ return json_out if json_output else table
+
+
+class NodeReload(Cmd):
+ name = "node-reload"
+
+ def run(self, args):
+ node_output_handle(reload_service())
+
+
+class ReloadCmd(Cmd):
+ name = "reload"
+
+ def run(self, args):
+ out = action_handle("reload", args.json)
+ if args.json:
+ print (json.dumps({
+ "output": out,
+ "error": ""
+ }))
+ else:
+ print (out)
+
+
+class NodeStatus(Cmd):
+ name = "node-status"
+
+ def run(self, args):
+ node_output_ok("UP" if is_active() else "DOWN")
+
+
+class StatusCmd(Cmd):
+ name = "status"
+
+ def run(self, args):
+ webhooks = {}
+ if os.path.exists(WEBHOOKS_FILE):
+ webhooks = json.load(open(WEBHOOKS_FILE))
+
+ json_out = {"webhooks": [], "data": []}
+ if args.json:
+ json_out["webhooks"] = webhooks.keys()
+ else:
+ print ("Webhooks: " + ("" if webhooks else "None"))
+ for w in webhooks:
+ print (w)
+
+ print ()
+
+ out = action_handle("status", args.json)
+ if args.json:
+ json_out["data"] = out
+ print (json.dumps({
+ "output": json_out,
+ "error": ""
+ }))
+ else:
+ print (out)
+
+
+class WebhookAddCmd(Cmd):
+ name = "webhook-add"
+
+ def args(self, parser):
+ parser.add_argument("url", help="URL of Webhook")
+ parser.add_argument("--bearer_token", "-t", help="Bearer Token",
+ default="")
+ parser.add_argument("--secret", "-s",
+ help="Secret to add JWT Bearer Token", default="")
+
+ def run(self, args):
+ create_webhooks_file_if_not_exists(args)
+
+ with LockedOpen(WEBHOOKS_FILE, 'r+'):
+ data = json.load(open(WEBHOOKS_FILE))
+ if data.get(args.url, None) is not None:
+ handle_output_error("Webhook already exists",
+ errcode=ERROR_WEBHOOK_ALREADY_EXISTS,
+ json_output=args.json)
+
+ data[args.url] = {"token": args.bearer_token,
+ "secret": args.secret}
+ file_content_overwrite(WEBHOOKS_FILE, data)
+
+ sync_to_peers(args)
+
+
+class WebhookModCmd(Cmd):
+ name = "webhook-mod"
+
+ def args(self, parser):
+ parser.add_argument("url", help="URL of Webhook")
+ parser.add_argument("--bearer_token", "-t", help="Bearer Token",
+ default="")
+ parser.add_argument("--secret", "-s",
+ help="Secret to add JWT Bearer Token", default="")
+
+ def run(self, args):
+ create_webhooks_file_if_not_exists(args)
+
+ with LockedOpen(WEBHOOKS_FILE, 'r+'):
+ data = json.load(open(WEBHOOKS_FILE))
+ if data.get(args.url, None) is None:
+ handle_output_error("Webhook does not exists",
+ errcode=ERROR_WEBHOOK_NOT_EXISTS,
+ json_output=args.json)
+
+ if isinstance(data[args.url], str):
+ data[args.url]["token"] = data[args.url]
+
+ if args.bearer_token != "":
+ data[args.url]["token"] = args.bearer_token
+
+ if args.secret != "":
+ data[args.url]["secret"] = args.secret
+
+ file_content_overwrite(WEBHOOKS_FILE, data)
+
+ sync_to_peers(args)
+
+
+class WebhookDelCmd(Cmd):
+ name = "webhook-del"
+
+ def args(self, parser):
+ parser.add_argument("url", help="URL of Webhook")
+
+ def run(self, args):
+ create_webhooks_file_if_not_exists(args)
+
+ with LockedOpen(WEBHOOKS_FILE, 'r+'):
+ data = json.load(open(WEBHOOKS_FILE))
+ if data.get(args.url, None) is None:
+ handle_output_error("Webhook does not exists",
+ errcode=ERROR_WEBHOOK_NOT_EXISTS,
+ json_output=args.json)
+
+ del data[args.url]
+ file_content_overwrite(WEBHOOKS_FILE, data)
+
+ sync_to_peers(args)
+
+
+class NodeWebhookTestCmd(Cmd):
+ name = "node-webhook-test"
+
+ def args(self, parser):
+ parser.add_argument("url")
+ parser.add_argument("bearer_token")
+ parser.add_argument("secret")
+
+ def run(self, args):
+ http_headers = {}
+ hashval = ""
+ if args.bearer_token != ".":
+ hashval = args.bearer_token
+
+ if args.secret != ".":
+ hashval = get_jwt_token(args.secret, "TEST", int(time.time()))
+
+ if hashval:
+ http_headers["Authorization"] = "Bearer " + hashval
+
+ urldata = requests.utils.urlparse(args.url)
+ parts = urldata.netloc.split(":")
+ domain = parts[0]
+ # Default https port if not specified
+ port = 443
+ if len(parts) == 2:
+ port = int(parts[1])
+
+ cert_path = os.path.join(CERTS_DIR, args.url.replace("/", "_").strip())
+ verify = True
+ while True:
+ try:
+ resp = requests.post(args.url, headers=http_headers,
+ verify=verify)
+ # Successful webhook push
+ break
+ except requests.exceptions.SSLError as e:
+ # If verify is equal to cert path, but still failed with
+ # SSLError, Looks like some issue with custom downloaded
+ # certificate, Try with verify = false
+ if verify == cert_path:
+ verify = False
+ continue
+
+ # If verify is instance of bool and True, then custom cert
+ # is required, download the cert and retry
+ try:
+ save_https_cert(domain, port, cert_path)
+ verify = cert_path
+ except Exception:
+ verify = False
+
+ # Done with collecting cert, continue
+ continue
+ except Exception as e:
+ node_output_notok("{0}".format(e))
+ break
+
+ if resp.status_code != 200:
+ node_output_notok("{0}".format(resp.status_code))
+
+ node_output_ok()
+
+
+class WebhookTestCmd(Cmd):
+ name = "webhook-test"
+
+ def args(self, parser):
+ parser.add_argument("url", help="URL of Webhook")
+ parser.add_argument("--bearer_token", "-t", help="Bearer Token")
+ parser.add_argument("--secret", "-s",
+ help="Secret to generate Bearer Token")
+
+ def run(self, args):
+ url = args.url
+ bearer_token = args.bearer_token
+ secret = args.secret
+
+ if not args.url:
+ url = "."
+ if not args.bearer_token:
+ bearer_token = "."
+ if not args.secret:
+ secret = "."
+
+ out = execute_in_peers("node-webhook-test", [url, bearer_token,
+ secret])
+
+ if not args.json:
+ table = PrettyTable(["NODE", "NODE STATUS", "WEBHOOK STATUS"])
+ table.align["NODE STATUS"] = "r"
+ table.align["WEBHOOK STATUS"] = "r"
+
+ num_ok_rows = 0
+ json_out = []
+ if args.json:
+ num_ok_rows = rows_to_json(json_out, "webhook_status", out)
+ else:
+ num_ok_rows = rows_to_table(table, out)
+
+ ret = 0
+ if num_ok_rows == 0:
+ ret = ERROR_ALL_NODES_STATUS_NOT_OK
+ elif num_ok_rows != len(out):
+ ret = ERROR_PARTIAL_SUCCESS
+
+ if args.json:
+ print (json.dumps({
+ "output": json_out,
+ "error": ""
+ }))
+ else:
+ print (table)
+
+ sys.exit(ret)
+
+
+class ConfigGetCmd(Cmd):
+ name = "config-get"
+
+ def args(self, parser):
+ parser.add_argument("--name", help="Config Name")
+
+ def run(self, args):
+ data = json.load(open(DEFAULT_CONFIG_FILE))
+ if os.path.exists(CUSTOM_CONFIG_FILE):
+ data.update(json.load(open(CUSTOM_CONFIG_FILE)))
+
+ if args.name is not None and args.name not in CONFIG_KEYS:
+ handle_output_error("Invalid Config item",
+ errcode=ERROR_INVALID_CONFIG,
+ json_output=args.json)
+
+ if args.json:
+ json_out = {}
+ if args.name is None:
+ json_out = data
+ else:
+ json_out[args.name] = data[args.name]
+
+ print (json.dumps({
+ "output": json_out,
+ "error": ""
+ }))
+ else:
+ table = PrettyTable(["NAME", "VALUE"])
+ if args.name is None:
+ for k, v in data.items():
+ table.add_row([k, v])
+ else:
+ table.add_row([args.name, data[args.name]])
+
+ print (table)
+
+
+def read_file_content_json(fname):
+ content = "{}"
+ with open(fname) as f:
+ content = f.read()
+ if content.strip() == "":
+ content = "{}"
+
+ return json.loads(content)
+
+
+class ConfigSetCmd(Cmd):
+ name = "config-set"
+
+ def args(self, parser):
+ parser.add_argument("name", help="Config Name")
+ parser.add_argument("value", help="Config Value")
+
+ def run(self, args):
+ if args.name not in CONFIG_KEYS:
+ handle_output_error("Invalid Config item",
+ errcode=ERROR_INVALID_CONFIG,
+ json_output=args.json)
+
+ create_custom_config_file_if_not_exists(args)
+
+ with LockedOpen(CUSTOM_CONFIG_FILE, 'r+'):
+ data = json.load(open(DEFAULT_CONFIG_FILE))
+ if os.path.exists(CUSTOM_CONFIG_FILE):
+ config_json = read_file_content_json(CUSTOM_CONFIG_FILE)
+ data.update(config_json)
+
+ # Do Nothing if same as previous value
+ if data[args.name] == args.value:
+ handle_output_error("Config value not changed. Same config",
+ errcode=ERROR_SAME_CONFIG,
+ json_output=args.json)
+
+ # TODO: Validate Value
+ new_data = read_file_content_json(CUSTOM_CONFIG_FILE)
+
+ v = args.value
+ if args.name in BOOL_CONFIGS:
+ v = boolify(args.value)
+
+ if args.name in INT_CONFIGS:
+ v = int(args.value)
+
+ new_data[args.name] = v
+ file_content_overwrite(CUSTOM_CONFIG_FILE, new_data)
+
+ # If any value changed which requires restart of REST server
+ restart = False
+ if args.name in RESTART_CONFIGS:
+ restart = True
+
+ if restart:
+ print ("\nRestart glustereventsd in all nodes")
+
+ sync_to_peers(args)
+
+
+class ConfigResetCmd(Cmd):
+ name = "config-reset"
+
+ def args(self, parser):
+ parser.add_argument("name", help="Config Name or all")
+
+ def run(self, args):
+ create_custom_config_file_if_not_exists(args)
+
+ with LockedOpen(CUSTOM_CONFIG_FILE, 'r+'):
+ changed_keys = []
+ data = {}
+ if os.path.exists(CUSTOM_CONFIG_FILE):
+ data = read_file_content_json(CUSTOM_CONFIG_FILE)
+
+ # If No data available in custom config or, the specific config
+ # item is not available in custom config
+ if not data or \
+ (args.name != "all" and data.get(args.name, None) is None):
+ handle_output_error("Config value not reset. Already "
+ "set to default value",
+ errcode=ERROR_SAME_CONFIG,
+ json_output=args.json)
+
+ if args.name.lower() == "all":
+ for k, v in data.items():
+ changed_keys.append(k)
+
+ # Reset all keys
+ file_content_overwrite(CUSTOM_CONFIG_FILE, {})
+ else:
+ changed_keys.append(args.name)
+ del data[args.name]
+ file_content_overwrite(CUSTOM_CONFIG_FILE, data)
+
+ # If any value changed which requires restart of REST server
+ restart = False
+ for key in changed_keys:
+ if key in RESTART_CONFIGS:
+ restart = True
+ break
+
+ if restart:
+ print ("\nRestart glustereventsd in all nodes")
+
+ sync_to_peers(args)
+
+
+class SyncCmd(Cmd):
+ name = "sync"
+
+ def run(self, args):
+ sync_to_peers(args)
+
+
+def common_args(parser):
+ parser.add_argument("--json", help="JSON Output", action="store_true")
+
+
+if __name__ == "__main__":
+ set_common_args_func(common_args)
+ runcli()
diff --git a/events/src/utils.py b/events/src/utils.py
new file mode 100644
index 00000000000..6d4e0791a2b
--- /dev/null
+++ b/events/src/utils.py
@@ -0,0 +1,445 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+import sys
+import json
+import os
+import logging
+import logging.handlers
+import fcntl
+from errno import EBADF
+from threading import Thread
+import multiprocessing
+try:
+ from queue import Queue
+except ImportError:
+ from Queue import Queue
+from datetime import datetime, timedelta
+import base64
+import hmac
+from hashlib import sha256
+from calendar import timegm
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from gfevents.eventsapiconf import (LOG_FILE,
+ WEBHOOKS_FILE,
+ DEFAULT_CONFIG_FILE,
+ CUSTOM_CONFIG_FILE,
+ UUID_FILE,
+ CERTS_DIR)
+from gfevents import eventtypes
+
+
+# Webhooks list
+_webhooks = {}
+_webhooks_file_mtime = 0
+# Default Log Level
+_log_level = "INFO"
+# Config Object
+_config = {}
+
+# Init Logger instance
+logger = logging.getLogger(__name__)
+NodeID = None
+webhooks_pool = None
+
+
+def boolify(value):
+ value = str(value)
+ if value.lower() in ["1", "on", "true", "yes"]:
+ return True
+ else:
+ return False
+
+
+def log_event(data):
+ # Log all published events unless it is disabled
+ if not _config.get("disable-events-log", False):
+ logger.info(repr(data))
+
+
+def get_node_uuid():
+ val = None
+ with open(UUID_FILE) as f:
+ for line in f:
+ if line.startswith("UUID="):
+ val = line.strip().split("=")[-1]
+ break
+ return val
+
+
+def get_config(key, default_value=None):
+ if not _config:
+ load_config()
+ return _config.get(key, default_value)
+
+
+def get_event_type_name(idx):
+ """
+ Returns Event Type text from the index. For example, VOLUME_CREATE
+ """
+ return eventtypes.all_events[idx].replace("EVENT_", "")
+
+
+def setup_logger():
+ """
+ Logging initialization, Log level by default will be INFO, once config
+ file is read, respective log_level will be set.
+ """
+ global logger
+ logger.setLevel(logging.INFO)
+
+ # create the logging file handler
+ fh = logging.handlers.WatchedFileHandler(LOG_FILE)
+
+ formatter = logging.Formatter("[%(asctime)s] %(levelname)s "
+ "[%(module)s - %(lineno)s:%(funcName)s] "
+ "- %(message)s")
+
+ fh.setFormatter(formatter)
+
+ # add handler to logger object
+ logger.addHandler(fh)
+
+
+def load_config():
+ """
+ Load/Reload the config from REST Config files. This function will
+ be triggered during init and when SIGUSR2.
+ """
+ global _config
+ _config = {}
+ if os.path.exists(DEFAULT_CONFIG_FILE):
+ _config = json.load(open(DEFAULT_CONFIG_FILE))
+ if os.path.exists(CUSTOM_CONFIG_FILE):
+ _config.update(json.load(open(CUSTOM_CONFIG_FILE)))
+
+
+def load_log_level():
+ """
+ Reads log_level from Config file and sets accordingly. This function will
+ be triggered during init and when SIGUSR2.
+ """
+ global logger, _log_level
+ new_log_level = _config.get("log-level", "INFO")
+ if _log_level != new_log_level:
+ logger.setLevel(getattr(logging, new_log_level.upper()))
+ _log_level = new_log_level.upper()
+
+
+def load_webhooks():
+ """
+ Load/Reload the webhooks list. This function will
+ be triggered during init and when SIGUSR2.
+ """
+ global _webhooks, _webhooks_file_mtime
+ _webhooks = {}
+ if os.path.exists(WEBHOOKS_FILE):
+ _webhooks = json.load(open(WEBHOOKS_FILE))
+ st = os.lstat(WEBHOOKS_FILE)
+ _webhooks_file_mtime = st.st_mtime
+
+
+def load_all():
+ """
+ Wrapper function to call all load/reload functions. This function will
+ be triggered during init and when SIGUSR2.
+ """
+ load_config()
+ load_webhooks()
+ load_log_level()
+
+
+def publish(ts, event_key, data):
+ global NodeID
+ if NodeID is None:
+ NodeID = get_node_uuid()
+
+ autoload_webhooks()
+
+ message = {
+ "nodeid": NodeID,
+ "ts": int(ts),
+ "event": get_event_type_name(event_key),
+ "message": data
+ }
+
+ log_event(message)
+
+ if _webhooks:
+ plugin_webhook(message)
+ else:
+ # TODO: Default action?
+ pass
+
+
+def autoload_webhooks():
+ global _webhooks_file_mtime
+ try:
+ st = os.lstat(WEBHOOKS_FILE)
+ except OSError:
+ st = None
+
+ if st is not None:
+ # If Stat is available and mtime is not matching with
+ # previously recorded mtime, reload the webhooks file
+ if st.st_mtime != _webhooks_file_mtime:
+ load_webhooks()
+
+
+def base64_urlencode(inp):
+ return base64.urlsafe_b64encode(inp).replace("=", "").strip()
+
+
+def get_jwt_token(secret, event_type, event_ts, jwt_expiry_time_seconds=60):
+ exp = datetime.utcnow() + timedelta(seconds=jwt_expiry_time_seconds)
+ payload = {
+ "exp": timegm(exp.utctimetuple()),
+ "iss": "gluster",
+ "sub": event_type,
+ "iat": event_ts
+ }
+ header = '{"alg":"HS256","typ":"JWT"}'
+ payload = json.dumps(payload, separators=(',', ':'), sort_keys=True)
+ msg = base64_urlencode(header) + "." + base64_urlencode(payload)
+ return "%s.%s" % (
+ msg,
+ base64_urlencode(hmac.HMAC(str(secret), msg, sha256).digest())
+ )
+
+
+def save_https_cert(domain, port, cert_path):
+ import ssl
+
+ # Cert file already available for this URL
+ if os.path.exists(cert_path):
+ return
+
+ cert_data = ssl.get_server_certificate((domain, port))
+ with open(cert_path, "w") as f:
+ f.write(cert_data)
+
+
+def publish_to_webhook(url, token, secret, message_queue):
+ # Import requests here since not used in any other place
+ import requests
+
+ http_headers = {"Content-Type": "application/json"}
+ urldata = requests.utils.urlparse(url)
+ parts = urldata.netloc.split(":")
+ domain = parts[0]
+ # Default https port if not specified
+ port = 443
+ if len(parts) == 2:
+ port = int(parts[1])
+
+ cert_path = os.path.join(CERTS_DIR, url.replace("/", "_").strip())
+
+ while True:
+ hashval = ""
+ event_type, event_ts, message_json = message_queue.get()
+ if token != "" and token is not None:
+ hashval = token
+
+ if secret != "" and secret is not None:
+ hashval = get_jwt_token(secret, event_type, event_ts)
+
+ if hashval:
+ http_headers["Authorization"] = "Bearer " + hashval
+
+ verify = True
+ while True:
+ try:
+ resp = requests.post(url, headers=http_headers,
+ data=message_json,
+ verify=verify)
+ # Successful webhook push
+ message_queue.task_done()
+ if resp.status_code != 200:
+ logger.warn("Event push failed to URL: {url}, "
+ "Event: {event}, "
+ "Status Code: {status_code}".format(
+ url=url,
+ event=message_json,
+ status_code=resp.status_code))
+ break
+ except requests.exceptions.SSLError as e:
+ # If verify is equal to cert path, but still failed with
+ # SSLError, Looks like some issue with custom downloaded
+ # certificate, Try with verify = false
+ if verify == cert_path:
+ logger.warn("Event push failed with certificate, "
+ "ignoring verification url={0} "
+ "Error={1}".format(url, e))
+ verify = False
+ continue
+
+ # If verify is instance of bool and True, then custom cert
+ # is required, download the cert and retry
+ try:
+ save_https_cert(domain, port, cert_path)
+ verify = cert_path
+ except Exception as ex:
+ verify = False
+ logger.warn("Unable to get Server certificate, "
+ "ignoring verification url={0} "
+ "Error={1}".format(url, ex))
+
+ # Done with collecting cert, continue
+ continue
+ except Exception as e:
+ logger.warn("Event push failed to URL: {url}, "
+ "Event: {event}, "
+ "Status: {error}".format(
+ url=url,
+ event=message_json,
+ error=e))
+ message_queue.task_done()
+ break
+
+
+def plugin_webhook(message):
+ message_json = json.dumps(message, sort_keys=True)
+ logger.debug("EVENT: {0}".format(message_json))
+ webhooks_pool.send(message["event"], message["ts"], message_json)
+
+
+class LockedOpen(object):
+
+ def __init__(self, filename, *args, **kwargs):
+ self.filename = filename
+ self.open_args = args
+ self.open_kwargs = kwargs
+ self.fileobj = None
+
+ def __enter__(self):
+ """
+ If two processes compete to update a file, The first process
+ gets the lock and the second process is blocked in the fcntl.flock()
+ call. When first process replaces the file and releases the lock,
+ the already open file descriptor in the second process now points
+ to a "ghost" file(not reachable by any path name) with old contents.
+ To avoid that conflict, check the fd already opened is same or
+ not. Open new one if not same
+ """
+ f = open(self.filename, *self.open_args, **self.open_kwargs)
+ while True:
+ fcntl.flock(f, fcntl.LOCK_EX)
+ fnew = open(self.filename, *self.open_args, **self.open_kwargs)
+ if os.path.sameopenfile(f.fileno(), fnew.fileno()):
+ fnew.close()
+ break
+ else:
+ f.close()
+ f = fnew
+ self.fileobj = f
+ return f
+
+ def __exit__(self, _exc_type, _exc_value, _traceback):
+ self.fileobj.close()
+
+
+class PidFileLockFailed(Exception):
+ pass
+
+
+class PidFile(object):
+ def __init__(self, filename):
+ self.filename = filename
+ self.pid = os.getpid()
+ self.fh = None
+
+ def cleanup(self, remove_file=True):
+ try:
+ if self.fh is not None:
+ self.fh.close()
+ except IOError as exc:
+ if exc.errno != EBADF:
+ raise
+ finally:
+ if os.path.isfile(self.filename) and remove_file:
+ os.remove(self.filename)
+
+ def __enter__(self):
+ self.fh = open(self.filename, 'a+')
+ try:
+ fcntl.flock(self.fh.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
+ except IOError as exc:
+ self.cleanup(remove_file=False)
+ raise PidFileLockFailed(exc)
+
+ self.fh.seek(0)
+ self.fh.truncate()
+ self.fh.write("%d\n" % self.pid)
+ self.fh.flush()
+ self.fh.seek(0)
+ return self
+
+ def __exit__(self, _exc_type, _exc_value, _traceback):
+ self.cleanup()
+
+
+def webhook_monitor(proc_queue, webhooks):
+ queues = {}
+ for url, data in webhooks.items():
+ if isinstance(data, str):
+ token = data
+ secret = None
+ else:
+ token = data["token"]
+ secret = data["secret"]
+
+ queues[url] = Queue()
+ t = Thread(target=publish_to_webhook, args=(url, token, secret,
+ queues[url]))
+ t.start()
+
+ # Get the message sent to Process queue and distribute to all thread queues
+ while True:
+ message = proc_queue.get()
+ for _, q in queues.items():
+ q.put(message)
+
+
+class WebhookThreadPool(object):
+ def start(self):
+ # Separate process to emit messages to webhooks
+ # which maintains one thread per webhook. Separate
+ # process is required since on reload we need to stop
+ # and start the thread pool. In Python Threads can't be stopped
+ # so terminate the process and start again. Note: In transit
+ # events will be missed during reload
+ self.queue = multiprocessing.Queue()
+ self.proc = multiprocessing.Process(target=webhook_monitor,
+ args=(self.queue, _webhooks))
+ self.proc.start()
+
+ def restart(self):
+ # In transit messages are skipped, since webhooks monitor process
+ # is terminated.
+ self.proc.terminate()
+ self.start()
+
+ def send(self, event_type, event_ts, message):
+ self.queue.put((event_type, event_ts, message))
+
+
+def init_webhook_pool():
+ global webhooks_pool
+ webhooks_pool = WebhookThreadPool()
+ webhooks_pool.start()
+
+
+def restart_webhook_pool():
+ global webhooks_pool
+ if webhooks_pool is not None:
+ webhooks_pool.restart()
diff --git a/events/tools/Makefile.am b/events/tools/Makefile.am
new file mode 100644
index 00000000000..fb6770cf070
--- /dev/null
+++ b/events/tools/Makefile.am
@@ -0,0 +1,6 @@
+EXTRA_DIST = eventsdash.py
+
+if BUILD_EVENTS
+scriptsdir = $(datadir)/glusterfs/scripts
+scripts_SCRIPTS = eventsdash.py
+endif
diff --git a/events/tools/eventsdash.py b/events/tools/eventsdash.py
new file mode 100644
index 00000000000..6479ea59da6
--- /dev/null
+++ b/events/tools/eventsdash.py
@@ -0,0 +1,75 @@
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+from __future__ import print_function
+from argparse import ArgumentParser, RawDescriptionHelpFormatter
+import logging
+from datetime import datetime
+
+from flask import Flask, request
+
+app = Flask(__name__)
+app.logger.disabled = True
+log = logging.getLogger('werkzeug')
+log.disabled = True
+
+
+def human_time(ts):
+ return datetime.fromtimestamp(float(ts)).strftime("%Y-%m-%d %H:%M:%S")
+
+
+@app.route("/")
+def home():
+ return "OK"
+
+
+@app.route("/listen", methods=["POST"])
+def listen():
+ data = request.json
+ if data is None:
+ return "OK"
+
+ message = []
+ for k, v in data.get("message", {}).items():
+ message.append("{0}={1}".format(k, v))
+
+ print(("{0:20s} {1:20s} {2:36} {3}".format(
+ human_time(data.get("ts")),
+ data.get("event"),
+ data.get("nodeid"),
+ " ".join(message))))
+
+ return "OK"
+
+
+def main():
+ parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter,
+ description=__doc__)
+ parser.add_argument("--port", type=int, help="Port", default=9000)
+ parser.add_argument("--debug", help="Run Server in debug mode",
+ action="store_true")
+ args = parser.parse_args()
+
+ print(("{0:20s} {1:20s} {2:36} {3}".format(
+ "TIMESTAMP", "EVENT", "NODE ID", "MESSAGE"
+ )))
+ print(("{0:20s} {1:20s} {2:36} {3}".format(
+ "-"*20, "-"*20, "-"*36, "-"*20
+ )))
+ if args.debug:
+ app.debug = True
+
+ app.run(host="0.0.0.0", port=args.port)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/extras/LinuxRPM/Makefile.am b/extras/LinuxRPM/Makefile.am
new file mode 100644
index 00000000000..f02853798c0
--- /dev/null
+++ b/extras/LinuxRPM/Makefile.am
@@ -0,0 +1,55 @@
+
+GFS_TAR = ../../glusterfs-$(VERSION).tar.gz
+
+.PHONY: all
+
+all:
+ @echo "To build RPMS run 'make glusterrpms'"
+
+.PHONY: glusterrpms glusterrpms_without_autogen
+.PHONY: autogen prep srcrpm testsrpm clean
+
+glusterrpms: autogen glusterrpms_without_autogen
+
+glusterrpms_without_autogen: prep srcrpm rpms
+ -rm -rf rpmbuild
+
+autogen:
+ cd ../.. && \
+ rm -rf autom4te.cache && \
+ ./autogen.sh && \
+ ./configure --enable-gnfs --with-previous-options
+
+prep:
+ $(MAKE) -C ../.. dist;
+ -mkdir -p rpmbuild/BUILD
+ -mkdir -p rpmbuild/SPECS
+ -mkdir -p rpmbuild/RPMS
+ -mkdir -p rpmbuild/SRPMS
+ -mkdir -p rpmbuild/SOURCES
+ -rm -rf rpmbuild/SOURCES/*
+ cp ../../*.tar.gz ./rpmbuild/SOURCES
+ cp ../../glusterfs.spec ./rpmbuild/SPECS
+
+srcrpm:
+ rpmbuild --define '_topdir $(shell pwd)/rpmbuild' -bs rpmbuild/SPECS/glusterfs.spec
+ mv rpmbuild/SRPMS/* .
+
+rpms:
+ rpmbuild --define '_topdir $(shell pwd)/rpmbuild' --with gnfs -bb rpmbuild/SPECS/glusterfs.spec
+ mv rpmbuild/RPMS/*/* .
+
+# EPEL-5 does not like new versions of rpmbuild and requires some
+# _source_* defines
+
+testsrpm: prep
+ rpmbuild --define '_topdir $(shell pwd)/rpmbuild' \
+ --define '_source_payload w9.gzdio' \
+ --define '_source_filedigest_algorithm 1' \
+ -bs rpmbuild/SPECS/glusterfs.spec
+ mv rpmbuild/SRPMS/* ../..
+ -rm -rf rpmbuild
+
+clean:
+ -rm -rf rpmbuild
+ -rm -f *.rpm
diff --git a/extras/LinuxRPM/make_glusterrpms b/extras/LinuxRPM/make_glusterrpms
new file mode 100755
index 00000000000..3156af97870
--- /dev/null
+++ b/extras/LinuxRPM/make_glusterrpms
@@ -0,0 +1,9 @@
+#!/bin/sh
+
+(
+ cd $(realpath $(dirname $0))/../.. || exit 1
+ rm -rf autom4te.cache
+ ./autogen.sh || exit 1
+ ./configure --with-previous-options || exit 1
+)
+make -C $(realpath $(dirname $0)) glusterrpms_without_autogen
diff --git a/extras/MacOSX/Portfile b/extras/MacOSX/Portfile
deleted file mode 100644
index 1262a44daea..00000000000
--- a/extras/MacOSX/Portfile
+++ /dev/null
@@ -1,26 +0,0 @@
-# $Id$
-
-PortSystem 1.0
-
-name glusterfs
-version 2.0.0rc8
-categories fuse
-maintainers amar@gluster.com
-description GlusterFS
-long_description GlusterFS is a cluster file system, flexible to tune it for your needs.
-homepage http://www.gluster.org/
-platforms darwin
-master_sites http://ftp.gluster.com/pub/gluster/glusterfs/2.0/2.0.0
-
-configure.args --disable-bdb
-checksums md5 33c2d02344d4fab422e80cfb637e0b48
-
-post-destroot {
- file mkdir ${destroot}/Library/LaunchDaemons/
- file copy ${worksrcpath}/extras/glusterfs-server.plist \
- ${destroot}/Library/LaunchDaemons/com.gluster.glusterfs.plist
-
- file mkdir ${destroot}/sbin/
- file copy ${worksrcpath}/xlators/mount/fuse/utils/mount_glusterfs \
- ${destroot}/sbin/
-} \ No newline at end of file
diff --git a/extras/MacOSX/README.MacOSX b/extras/MacOSX/README.MacOSX
deleted file mode 100644
index ec7abc2cc78..00000000000
--- a/extras/MacOSX/README.MacOSX
+++ /dev/null
@@ -1,84 +0,0 @@
-
-Mostly the usage is over OS-X.
-
-Important links
-
-GlusterFS
-=========
-* http://www.gluster.org
-* http://gluster.org/docs/index.php/GlusterFS
-* http://gluster.org/docs/index.php/GlusterFS_on_MAC_OS_X
-
-MacFUSE
-=======
-* http://code.google.com/p/macfuse/
-* http://code.google.com/p/macfuse/wiki/FAQ
-
-
-Important steps:
-================
-
-Make sure that there is no previous installation of MacFUSE exists in the
-system. Run,
-
- bash# sudo /Library/Filesystems/fusefs.fs/Support/uninstall-macfuse-core.sh
-
-After this, install MacFUSE (mostly through .dmg available in macfuse homepage
-or if Gluster Inc provides any custom built .dmg)
-
-Make sure the .dmg of glusterfs is built against the installed MacFUSE version
-(if not, any operations over mountpoint gives EIO ie, Input/Output Error). If
-glusterfs tarball is used then compile it only after the MacFUSE installation
-is complete.
-
-To make an entry in /etc/fstab for glusterfs mount, use 'vifs' command
-
- bash# sudo vifs
-
-after the entry is added in /etc/fstab, it can be mounted by 'mount' command.
-
-To start the server process one can use the 'launchd' mechanism. Follow below
-steps after installation
-
- bash# launchctl load /Library/LaunchDaemons/com.gluster.glusterfs.plist
-
-No need to run the command if the machine reboots, as it will be loaded
-automatically by launchd process.
-
-Now copy the server volume file in the proper path
-
- bash# sudo vi /opt/local/etc/glusterfs/server.vol
-
-NOTE: (If glusterfs is installed in different path other than '/opt/local'
- update the volume file at the corresponding path, and also need to
- update the /Library/LaunchDaemons/com.gluster.glusterfs.plist with
- the proper path)
-
-Once the volume file is updated, administrator can start the server process by
-running,
-
- bash# launchctl start com.gluster.glusterfs
-
-and stop like
-
- bash# launchctl stop com.gluster.glusterfs
-
-NOTE: To start the process by default when the loaded, one need to add the
-following lines to .plist file
- -----
- <key>KeepAlive</key>
- <true/>
- -----
-
-
-
-
-Install using .dmg in Terminal
-=================================
-
- bash# hdiutil attach <package>.dmg
- bash# cd /Volumes/<package>/
- bash# installer -pkg <package>.pkg -installer /
- bash# cd
- bash# hdiutil detach /Volumes/<package>/
-
diff --git a/extras/Makefile.am b/extras/Makefile.am
index 565f93ea7b1..983f014cca6 100644
--- a/extras/Makefile.am
+++ b/extras/Makefile.am
@@ -1,14 +1,77 @@
+addonexecdir = $(GLUSTERFS_LIBEXECDIR)
+addonexec_SCRIPTS =
+if WITH_SERVER
+addonexec_SCRIPTS += peer_add_secret_pub
+if USE_SYSTEMD
+addonexec_SCRIPTS += mount-shared-storage.sh
+endif
+endif
-docdir = $(datadir)/doc/glusterfs/
-EditorModedir = $(docdir)/
+EditorModedir = $(docdir)
EditorMode_DATA = glusterfs-mode.el glusterfs.vim
-SUBDIRS = init.d benchmarking hook-scripts
+SUBDIRS = init.d systemd benchmarking hook-scripts $(OCF_SUBDIR) LinuxRPM \
+ $(GEOREP_EXTRAS_SUBDIR) snap_scheduler firewalld cliutils python \
+ ganesha
confdir = $(sysconfdir)/glusterfs
-conf_DATA = glusterfs-logrotate group-virt.example
+if WITH_SERVER
+conf_DATA = glusterfs-logrotate gluster-rsyslog-7.2.conf gluster-rsyslog-5.8.conf \
+ logger.conf.example glusterfs-georep-logrotate group-virt.example \
+ group-metadata-cache group-gluster-block group-nl-cache \
+ group-db-workload group-distributed-virt group-samba
+endif
-EXTRA_DIST = specgen.scm MacOSX/Portfile glusterfs-mode.el glusterfs.vim \
- migrate-unify-to-distribute.sh backend-xattr-sanitize.sh \
- backend-cleanup.sh disk_usage_sync.sh quota-remove-xattr.sh \
- quota-metadata-cleanup.sh glusterfs-logrotate group-virt.example
+voldir = $(sysconfdir)/glusterfs
+vol_DATA = thin-arbiter/thin-arbiter.vol
+if WITH_SERVER
+vol_DATA += glusterd.vol
+endif
+
+scriptsdir = $(datadir)/glusterfs/scripts
+scripts_SCRIPTS = thin-arbiter/setup-thin-arbiter.sh
+if WITH_SERVER
+scripts_SCRIPTS += post-upgrade-script-for-quota.sh \
+ pre-upgrade-script-for-quota.sh stop-all-gluster-processes.sh
+if USE_SYSTEMD
+scripts_SCRIPTS += control-cpu-load.sh
+scripts_SCRIPTS += control-mem.sh
+endif
+endif
+
+EXTRA_DIST = glusterfs-logrotate gluster-rsyslog-7.2.conf gluster-rsyslog-5.8.conf \
+ logger.conf.example glusterfs-georep-logrotate group-virt.example \
+ group-metadata-cache group-gluster-block group-nl-cache \
+ group-db-workload group-samba specgen.scm glusterfs-mode.el glusterfs.vim \
+ migrate-unify-to-distribute.sh backend-xattr-sanitize.sh \
+ backend-cleanup.sh disk_usage_sync.sh clear_xattrs.sh \
+ glusterd-sysconfig glusterd.vol post-upgrade-script-for-quota.sh \
+ pre-upgrade-script-for-quota.sh command-completion/gluster.bash \
+ command-completion/Makefile command-completion/README \
+ stop-all-gluster-processes.sh clang-checker.sh mount-shared-storage.sh \
+ control-cpu-load.sh control-mem.sh group-distributed-virt \
+ thin-arbiter/thin-arbiter.vol thin-arbiter/setup-thin-arbiter.sh
+
+if WITH_SERVER
+install-data-local:
+ if [ -n "$(tmpfilesdir)" ]; then \
+ $(mkdir_p) $(DESTDIR)$(tmpfilesdir); \
+ $(INSTALL_DATA) run-gluster.tmpfiles \
+ $(DESTDIR)$(tmpfilesdir)/gluster.conf; \
+ fi
+ $(mkdir_p) $(DESTDIR)$(GLUSTERD_WORKDIR)/groups
+ $(INSTALL_DATA) $(top_srcdir)/extras/group-virt.example \
+ $(DESTDIR)$(GLUSTERD_WORKDIR)/groups/virt
+ $(INSTALL_DATA) $(top_srcdir)/extras/group-metadata-cache \
+ $(DESTDIR)$(GLUSTERD_WORKDIR)/groups/metadata-cache
+ $(INSTALL_DATA) $(top_srcdir)/extras/group-gluster-block \
+ $(DESTDIR)$(GLUSTERD_WORKDIR)/groups/gluster-block
+ $(INSTALL_DATA) $(top_srcdir)/extras/group-nl-cache \
+ $(DESTDIR)$(GLUSTERD_WORKDIR)/groups/nl-cache
+ $(INSTALL_DATA) $(top_srcdir)/extras/group-db-workload \
+ $(DESTDIR)$(GLUSTERD_WORKDIR)/groups/db-workload
+ $(INSTALL_DATA) $(top_srcdir)/extras/group-distributed-virt \
+ $(DESTDIR)$(GLUSTERD_WORKDIR)/groups/distributed-virt
+ $(INSTALL_DATA) $(top_srcdir)/extras/group-samba \
+ $(DESTDIR)$(GLUSTERD_WORKDIR)/groups/samba
+endif
diff --git a/extras/Ubuntu/README.Ubuntu b/extras/Ubuntu/README.Ubuntu
index 0c5b7828d12..890da3ca614 100644
--- a/extras/Ubuntu/README.Ubuntu
+++ b/extras/Ubuntu/README.Ubuntu
@@ -1,5 +1,6 @@
Bug 765014 - Mounting from localhost in fstab fails at boot on ubuntu
-(https://bugzilla.redhat.com/show_bug.cgi?id=765014)
+(original bug: https://bugzilla.redhat.com/show_bug.cgi?id=765014)
+(updated in: https://bugzilla.redhat.com/show_bug.cgi?id=1047007)
(https://bugs.launchpad.net/ubuntu/+source/glusterfs/+bug/876648)
Ubuntu uses upstart instead of init to bootstrap the system and it has a unique
@@ -10,15 +11,16 @@ and the volume is mounted from localhost, the mount fails at boot time. To
correct this we need to launch glusterd using upstart and block the glusterfs
mounting event until glusterd is started.
-The glusterd.conf file contains the necessary configuration for upstart to
-manage the glusterd service. It should be placed in /etc/init/glusterd.conf
+The glusterfs-server.conf file contains the necessary configuration for upstart to
+manage the glusterd service. It should be placed in /etc/init/glusterfs-server.conf
on Ubuntu systems, and then the old initscript /etc/init.d/glusterd can be
removed. An additional upstart job, mounting-glusterfs.conf, is also required
-to block mounting glusterfs volumes until the glusterd service is available.
+to block mounting glusterfs volumes until the network interfaces are available.
Both of these upstart jobs need to be placed in /etc/init to resolve the issue.
-Starting with Ubuntu 12.04, Precise Pangolin, these upstart jobs will be
-included with the glusterfs-server package in the Ubuntu repository.
+Starting with Ubuntu 14.04, Trusty Tahr, these upstart jobs will be included
+with the glusterfs-server and glusterfs-client packages in the Ubuntu
+universe repository.
This affects all versions of glusterfs on the Ubuntu platform since at least
10.04, Lucid Lynx.
diff --git a/extras/Ubuntu/glusterd.conf b/extras/Ubuntu/glusterfs-server.conf
index aa99502b0a7..aa99502b0a7 100644
--- a/extras/Ubuntu/glusterd.conf
+++ b/extras/Ubuntu/glusterfs-server.conf
diff --git a/extras/Ubuntu/mounting-glusterfs.conf b/extras/Ubuntu/mounting-glusterfs.conf
index 3c59c0f635e..786ef16df04 100644
--- a/extras/Ubuntu/mounting-glusterfs.conf
+++ b/extras/Ubuntu/mounting-glusterfs.conf
@@ -1,7 +1,6 @@
author "Louis Zuckerman <me@louiszuckerman.com>"
-description "Block the mounting event for glusterfs filesystems until glusterd is running"
+description "Block the mounting event for glusterfs filesystems until the network interfaces are running"
start on mounting TYPE=glusterfs
task
-exec start wait-for-state WAIT_FOR=glusterd WAITER=mounting-glusterfs
-
+exec start wait-for-state WAIT_FOR=static-network-up WAITER=mounting-glusterfs
diff --git a/extras/backend-cleanup.sh b/extras/backend-cleanup.sh
index dc504860b73..cb0a1d8871f 100644
--- a/extras/backend-cleanup.sh
+++ b/extras/backend-cleanup.sh
@@ -17,7 +17,7 @@ export_directory="/export/glusterfs"
clean_dir()
{
# Clean the 'link' files on backend
- find "${export_directory}" -type f -perm +01000 -exec rm -v '{}' \;
+ find "${export_directory}" -type f -perm /01000 -exec rm -v '{}' \;
}
main()
diff --git a/extras/benchmarking/Makefile.am b/extras/benchmarking/Makefile.am
index 04cc06182ab..bfcc59277a5 100644
--- a/extras/benchmarking/Makefile.am
+++ b/extras/benchmarking/Makefile.am
@@ -1,7 +1,5 @@
-docdir = $(datadir)/doc/$(PACKAGE_NAME)/benchmarking
-
-benchmarkingdir = $(docdir)
+benchmarkingdir = $(docdir)/benchmarking
benchmarking_DATA = rdd.c glfs-bm.c README launch-script.sh local-script.sh
diff --git a/extras/benchmarking/glfs-bm.c b/extras/benchmarking/glfs-bm.c
index 035d055dfcd..f7f5873f84d 100644
--- a/extras/benchmarking/glfs-bm.c
+++ b/extras/benchmarking/glfs-bm.c
@@ -1,21 +1,12 @@
-/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#define _GNU_SOURCE
#define __USE_FILE_OFFSET64
#define _FILE_OFFSET_BITS 64
@@ -34,365 +25,338 @@
#include <sys/time.h>
struct state {
- char need_op_write:1;
- char need_op_read:1;
+ char need_op_write : 1;
+ char need_op_read : 1;
- char need_iface_fileio:1;
- char need_iface_xattr:1;
+ char need_iface_fileio : 1;
+ char need_iface_xattr : 1;
- char need_mode_posix:1;
+ char need_mode_posix : 1;
- char prefix[512];
- long int count;
+ char prefix[512];
+ long int count;
- size_t block_size;
+ size_t block_size;
- char *specfile;
+ char *specfile;
- long int io_size;
+ long int io_size;
};
-
-#define MEASURE(func, arg) measure (func, #func, arg)
-
+#define MEASURE(func, arg) measure(func, #func, arg)
void
-tv_difference (struct timeval *tv_stop,
- struct timeval *tv_start,
- struct timeval *tv_diff)
+tv_difference(struct timeval *tv_stop, struct timeval *tv_start,
+ struct timeval *tv_diff)
{
- if (tv_stop->tv_usec < tv_start->tv_usec) {
- tv_diff->tv_usec = (tv_stop->tv_usec + 1000000) - tv_start->tv_usec;
- tv_diff->tv_sec = (tv_stop->tv_sec - 1 - tv_start->tv_sec);
- } else {
- tv_diff->tv_usec = tv_stop->tv_usec - tv_start->tv_usec;
- tv_diff->tv_sec = tv_stop->tv_sec - tv_start->tv_sec;
- }
+ if (tv_stop->tv_usec < tv_start->tv_usec) {
+ tv_diff->tv_usec = (tv_stop->tv_usec + 1000000) - tv_start->tv_usec;
+ tv_diff->tv_sec = (tv_stop->tv_sec - 1 - tv_start->tv_sec);
+ } else {
+ tv_diff->tv_usec = tv_stop->tv_usec - tv_start->tv_usec;
+ tv_diff->tv_sec = tv_stop->tv_sec - tv_start->tv_sec;
+ }
}
-
void
-measure (int (*func)(struct state *state),
- char *func_name, struct state *state)
+measure(int (*func)(struct state *state), char *func_name, struct state *state)
{
- struct timeval tv_start, tv_stop, tv_diff;
- state->io_size = 0;
- long int count;
+ struct timeval tv_start, tv_stop, tv_diff;
+ state->io_size = 0;
+ long int count;
- gettimeofday (&tv_start, NULL);
- count = func (state);
- gettimeofday (&tv_stop, NULL);
+ gettimeofday(&tv_start, NULL);
+ count = func(state);
+ gettimeofday(&tv_stop, NULL);
- tv_difference (&tv_stop, &tv_start, &tv_diff);
+ tv_difference(&tv_stop, &tv_start, &tv_diff);
- fprintf (stdout, "%s: count=%ld, size=%ld, time=%ld:%ld\n",
- func_name, count, state->io_size,
- tv_diff.tv_sec, tv_diff.tv_usec);
+ fprintf(stdout, "%s: count=%ld, size=%ld, time=%ld:%ld\n", func_name, count,
+ state->io_size, tv_diff.tv_sec, tv_diff.tv_usec);
}
-
static error_t
-parse_opts (int key, char *arg,
- struct argp_state *_state)
+parse_opts(int key, char *arg, struct argp_state *_state)
{
- struct state *state = _state->input;
+ struct state *state = _state->input;
- switch (key)
- {
+ switch (key) {
case 'o':
- if (strcasecmp (arg, "read") == 0) {
- state->need_op_write = 0;
- state->need_op_read = 1;
- } else if (strcasecmp (arg, "write") == 0) {
- state->need_op_write = 1;
- state->need_op_read = 0;
- } else if (strcasecmp (arg, "both") == 0) {
- state->need_op_write = 1;
- state->need_op_read = 1;
- } else {
- fprintf (stderr, "unknown op: %s\n", arg);
- return -1;
- }
- break;
+ if (strcasecmp(arg, "read") == 0) {
+ state->need_op_write = 0;
+ state->need_op_read = 1;
+ } else if (strcasecmp(arg, "write") == 0) {
+ state->need_op_write = 1;
+ state->need_op_read = 0;
+ } else if (strcasecmp(arg, "both") == 0) {
+ state->need_op_write = 1;
+ state->need_op_read = 1;
+ } else {
+ fprintf(stderr, "unknown op: %s\n", arg);
+ return -1;
+ }
+ break;
case 'i':
- if (strcasecmp (arg, "fileio") == 0) {
- state->need_iface_fileio = 1;
- state->need_iface_xattr = 0;
- } else if (strcasecmp (arg, "xattr") == 0) {
- state->need_iface_fileio = 0;
- state->need_iface_xattr = 1;
- } else if (strcasecmp (arg, "both") == 0) {
- state->need_iface_fileio = 1;
- state->need_iface_xattr = 1;
- } else {
- fprintf (stderr, "unknown interface: %s\n", arg);
- return -1;
- }
- break;
- case 'b':
- {
- size_t block_size = atoi (arg);
- if (!block_size) {
- fprintf (stderr, "incorrect size: %s\n", arg);
- return -1;
- }
- state->block_size = block_size;
- }
- break;
+ if (strcasecmp(arg, "fileio") == 0) {
+ state->need_iface_fileio = 1;
+ state->need_iface_xattr = 0;
+ } else if (strcasecmp(arg, "xattr") == 0) {
+ state->need_iface_fileio = 0;
+ state->need_iface_xattr = 1;
+ } else if (strcasecmp(arg, "both") == 0) {
+ state->need_iface_fileio = 1;
+ state->need_iface_xattr = 1;
+ } else {
+ fprintf(stderr, "unknown interface: %s\n", arg);
+ return -1;
+ }
+ break;
+ case 'b': {
+ size_t block_size = atoi(arg);
+ if (!block_size) {
+ fprintf(stderr, "incorrect size: %s\n", arg);
+ return -1;
+ }
+ state->block_size = block_size;
+ } break;
case 's':
- state->specfile = strdup (arg);
- break;
+ state->specfile = strdup(arg);
+ break;
case 'p':
- fprintf (stderr, "using prefix: %s\n", arg);
- strncpy (state->prefix, arg, 512);
- break;
- case 'c':
- {
- long count = atol (arg);
- if (!count) {
- fprintf (stderr, "incorrect count: %s\n", arg);
- return -1;
- }
- state->count = count;
- }
- break;
+ fprintf(stderr, "using prefix: %s\n", arg);
+ strncpy(state->prefix, arg, 512);
+ break;
+ case 'c': {
+ long count = atol(arg);
+ if (!count) {
+ fprintf(stderr, "incorrect count: %s\n", arg);
+ return -1;
+ }
+ state->count = count;
+ } break;
case ARGP_KEY_NO_ARGS:
- break;
+ break;
case ARGP_KEY_ARG:
- break;
- }
+ break;
+ }
- return 0;
+ return 0;
}
int
-do_mode_posix_iface_fileio_write (struct state *state)
+do_mode_posix_iface_fileio_write(struct state *state)
{
- long int i;
- int ret = -1;
- char block[state->block_size];
-
- for (i=0; i<state->count; i++) {
- int fd = -1;
- char filename[512];
-
- sprintf (filename, "%s.%06ld", state->prefix, i);
-
- fd = open (filename, O_CREAT|O_WRONLY, 00600);
- if (fd == -1) {
- fprintf (stderr, "open(%s) => %s\n", filename, strerror (errno));
- break;
- }
- ret = write (fd, block, state->block_size);
- if (ret != state->block_size) {
- fprintf (stderr, "write (%s) => %d/%s\n", filename, ret,
- strerror (errno));
- close (fd);
- break;
- }
- close (fd);
- state->io_size += ret;
+ long int i;
+ int ret = -1;
+ char block[state->block_size];
+
+ for (i = 0; i < state->count; i++) {
+ int fd = -1;
+ char filename[512];
+
+ sprintf(filename, "%s.%06ld", state->prefix, i);
+
+ fd = open(filename, O_CREAT | O_WRONLY, 00600);
+ if (fd == -1) {
+ fprintf(stderr, "open(%s) => %s\n", filename, strerror(errno));
+ break;
}
+ ret = write(fd, block, state->block_size);
+ if (ret != state->block_size) {
+ fprintf(stderr, "write (%s) => %d/%s\n", filename, ret,
+ strerror(errno));
+ close(fd);
+ break;
+ }
+ close(fd);
+ state->io_size += ret;
+ }
- return i;
+ return i;
}
-
int
-do_mode_posix_iface_fileio_read (struct state *state)
+do_mode_posix_iface_fileio_read(struct state *state)
{
- long int i;
- int ret = -1;
- char block[state->block_size];
-
- for (i=0; i<state->count; i++) {
- int fd = -1;
- char filename[512];
-
- sprintf (filename, "%s.%06ld", state->prefix, i);
-
- fd = open (filename, O_RDONLY);
- if (fd == -1) {
- fprintf (stderr, "open(%s) => %s\n", filename, strerror (errno));
- break;
- }
- ret = read (fd, block, state->block_size);
- if (ret == -1) {
- fprintf (stderr, "read(%s) => %d/%s\n", filename, ret, strerror (errno));
- close (fd);
- break;
- }
- close (fd);
- state->io_size += ret;
+ long int i;
+ int ret = -1;
+ char block[state->block_size];
+
+ for (i = 0; i < state->count; i++) {
+ int fd = -1;
+ char filename[512];
+
+ sprintf(filename, "%s.%06ld", state->prefix, i);
+
+ fd = open(filename, O_RDONLY);
+ if (fd == -1) {
+ fprintf(stderr, "open(%s) => %s\n", filename, strerror(errno));
+ break;
+ }
+ ret = read(fd, block, state->block_size);
+ if (ret == -1) {
+ fprintf(stderr, "read(%s) => %d/%s\n", filename, ret,
+ strerror(errno));
+ close(fd);
+ break;
}
+ close(fd);
+ state->io_size += ret;
+ }
- return i;
+ return i;
}
-
int
-do_mode_posix_iface_fileio (struct state *state)
+do_mode_posix_iface_fileio(struct state *state)
{
- if (state->need_op_write)
- MEASURE (do_mode_posix_iface_fileio_write, state);
+ if (state->need_op_write)
+ MEASURE(do_mode_posix_iface_fileio_write, state);
- if (state->need_op_read)
- MEASURE (do_mode_posix_iface_fileio_read, state);
+ if (state->need_op_read)
+ MEASURE(do_mode_posix_iface_fileio_read, state);
- return 0;
+ return 0;
}
-
int
-do_mode_posix_iface_xattr_write (struct state *state)
+do_mode_posix_iface_xattr_write(struct state *state)
{
- long int i;
- int ret = -1;
- char block[state->block_size];
- char *dname = NULL, *dirc = NULL;
- char *bname = NULL, *basec = NULL;
-
- dirc = strdup (state->prefix);
- basec = strdup (state->prefix);
- dname = dirname (dirc);
- bname = basename (basec);
-
- for (i=0; i<state->count; i++) {
- char key[512];
-
- sprintf (key, "glusterfs.file.%s.%06ld", bname, i);
-
- ret = lsetxattr (dname, key, block, state->block_size, 0);
-
- if (ret != 0) {
- fprintf (stderr, "lsetxattr (%s, %s, %p) => %s\n",
- dname, key, block, strerror (errno));
- break;
- }
- state->io_size += state->block_size;
+ long int i;
+ int ret = -1;
+ char block[state->block_size];
+ char *dname = NULL, *dirc = NULL;
+ char *bname = NULL, *basec = NULL;
+
+ dirc = strdup(state->prefix);
+ basec = strdup(state->prefix);
+ dname = dirname(dirc);
+ bname = basename(basec);
+
+ for (i = 0; i < state->count; i++) {
+ char key[512];
+
+ sprintf(key, "glusterfs.file.%s.%06ld", bname, i);
+
+ ret = lsetxattr(dname, key, block, state->block_size, 0);
+
+ if (ret != 0) {
+ fprintf(stderr, "lsetxattr (%s, %s, %p) => %s\n", dname, key, block,
+ strerror(errno));
+ break;
}
+ state->io_size += state->block_size;
+ }
- free (dirc);
- free (basec);
+ free(dirc);
+ free(basec);
- return i;
+ return i;
}
-
int
-do_mode_posix_iface_xattr_read (struct state *state)
+do_mode_posix_iface_xattr_read(struct state *state)
{
- long int i;
- int ret = -1;
- char block[state->block_size];
- char *dname = NULL, *dirc = NULL;
- char *bname = NULL, *basec = NULL;
-
- dirc = strdup (state->prefix);
- basec = strdup (state->prefix);
- dname = dirname (dirc);
- bname = basename (basec);
-
- for (i=0; i<state->count; i++) {
- char key[512];
-
- sprintf (key, "glusterfs.file.%s.%06ld", bname, i);
-
- ret = lgetxattr (dname, key, block, state->block_size);
-
- if (ret < 0) {
- fprintf (stderr, "lgetxattr (%s, %s, %p) => %s\n",
- dname, key, block, strerror (errno));
- break;
- }
- state->io_size += ret;
+ long int i;
+ int ret = -1;
+ char block[state->block_size];
+ char *dname = NULL, *dirc = NULL;
+ char *bname = NULL, *basec = NULL;
+
+ dirc = strdup(state->prefix);
+ basec = strdup(state->prefix);
+ dname = dirname(dirc);
+ bname = basename(basec);
+
+ for (i = 0; i < state->count; i++) {
+ char key[512];
+
+ sprintf(key, "glusterfs.file.%s.%06ld", bname, i);
+
+ ret = lgetxattr(dname, key, block, state->block_size);
+
+ if (ret < 0) {
+ fprintf(stderr, "lgetxattr (%s, %s, %p) => %s\n", dname, key, block,
+ strerror(errno));
+ break;
}
+ state->io_size += ret;
+ }
- return i;
+ return i;
}
-
int
-do_mode_posix_iface_xattr (struct state *state)
+do_mode_posix_iface_xattr(struct state *state)
{
- if (state->need_op_write)
- MEASURE (do_mode_posix_iface_xattr_write, state);
+ if (state->need_op_write)
+ MEASURE(do_mode_posix_iface_xattr_write, state);
- if (state->need_op_read)
- MEASURE (do_mode_posix_iface_xattr_read, state);
+ if (state->need_op_read)
+ MEASURE(do_mode_posix_iface_xattr_read, state);
- return 0;
+ return 0;
}
int
-do_mode_posix (struct state *state)
+do_mode_posix(struct state *state)
{
- if (state->need_iface_fileio)
- do_mode_posix_iface_fileio (state);
+ if (state->need_iface_fileio)
+ do_mode_posix_iface_fileio(state);
- if (state->need_iface_xattr)
- do_mode_posix_iface_xattr (state);
+ if (state->need_iface_xattr)
+ do_mode_posix_iface_xattr(state);
- return 0;
+ return 0;
}
-
int
-do_actions (struct state *state)
+do_actions(struct state *state)
{
- if (state->need_mode_posix)
- do_mode_posix (state);
+ if (state->need_mode_posix)
+ do_mode_posix(state);
- return 0;
+ return 0;
}
static struct argp_option options[] = {
- {"op", 'o', "OPERATIONS", 0,
- "WRITE|READ|BOTH - defaults to BOTH"},
- {"iface", 'i', "INTERFACE", 0,
- "FILEIO|XATTR|BOTH - defaults to FILEIO"},
- {"block", 'b', "BLOCKSIZE", 0,
- "<NUM> - defaults to 4096"},
- {"specfile", 's', "SPECFILE", 0,
- "absolute path to specfile"},
- {"prefix", 'p', "PREFIX", 0,
- "filename prefix"},
- {"count", 'c', "COUNT", 0,
- "number of files"},
- {0, 0, 0, 0, 0}
-};
+ {"op", 'o', "OPERATIONS", 0, "WRITE|READ|BOTH - defaults to BOTH"},
+ {"iface", 'i', "INTERFACE", 0, "FILEIO|XATTR|BOTH - defaults to FILEIO"},
+ {"block", 'b', "BLOCKSIZE", 0, "<NUM> - defaults to 4096"},
+ {"specfile", 's', "SPECFILE", 0, "absolute path to specfile"},
+ {"prefix", 'p', "PREFIX", 0, "filename prefix"},
+ {"count", 'c', "COUNT", 0, "number of files"},
+ {0, 0, 0, 0, 0}};
-static struct argp argp = {
- options,
- parse_opts,
- "tool",
- "tool to benchmark small file performance"
-};
+static struct argp argp = {options, parse_opts, "tool",
+ "tool to benchmark small file performance"};
int
-main (int argc, char *argv[])
+main(int argc, char *argv[])
{
- struct state state = {0, };
+ struct state state = {
+ 0,
+ };
- state.need_op_write = 1;
- state.need_op_read = 1;
+ state.need_op_write = 1;
+ state.need_op_read = 1;
- state.need_iface_fileio = 1;
- state.need_iface_xattr = 0;
+ state.need_iface_fileio = 1;
+ state.need_iface_xattr = 0;
- state.need_mode_posix = 1;
+ state.need_mode_posix = 1;
- state.block_size = 4096;
+ state.block_size = 4096;
- strcpy (state.prefix, "tmpfile");
- state.count = 1048576;
+ strcpy(state.prefix, "tmpfile");
+ state.count = 1048576;
- if (argp_parse (&argp, argc, argv, 0, 0, &state) != 0) {
- fprintf (stderr, "argp_parse() failed\n");
- return 1;
- }
+ if (argp_parse(&argp, argc, argv, 0, 0, &state) != 0) {
+ fprintf(stderr, "argp_parse() failed\n");
+ return 1;
+ }
- do_actions (&state);
+ do_actions(&state);
- return 0;
+ return 0;
}
diff --git a/extras/benchmarking/rdd.c b/extras/benchmarking/rdd.c
index b06333370ae..efc9d342a37 100644
--- a/extras/benchmarking/rdd.c
+++ b/extras/benchmarking/rdd.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <stdio.h>
#include <sys/stat.h>
#include <sys/types.h>
@@ -30,633 +20,586 @@
#define TWO_POWER(power) (2UL << (power))
-#define RDD_INTEGER_VALUE ((TWO_POWER ((sizeof (int) * 8))) - 1)
+#define RDD_INTEGER_VALUE ((TWO_POWER((sizeof(int) * 8))) - 1)
#ifndef UNIX_PATH_MAX
#define UNIX_PATH_MAX 108
#endif
#define UNIT_KB 1024ULL
-#define UNIT_MB UNIT_KB*1024ULL
-#define UNIT_GB UNIT_MB*1024ULL
-#define UNIT_TB UNIT_GB*1024ULL
-#define UNIT_PB UNIT_TB*1024ULL
+#define UNIT_MB UNIT_KB * 1024ULL
+#define UNIT_GB UNIT_MB * 1024ULL
+#define UNIT_TB UNIT_GB * 1024ULL
+#define UNIT_PB UNIT_TB * 1024ULL
-#define UNIT_KB_STRING "KB"
-#define UNIT_MB_STRING "MB"
-#define UNIT_GB_STRING "GB"
-#define UNIT_TB_STRING "TB"
-#define UNIT_PB_STRING "PB"
+#define UNIT_KB_STRING "KB"
+#define UNIT_MB_STRING "MB"
+#define UNIT_GB_STRING "GB"
+#define UNIT_TB_STRING "TB"
+#define UNIT_PB_STRING "PB"
struct rdd_file {
- char path[UNIX_PATH_MAX];
- struct stat st;
- int fd;
+ char path[UNIX_PATH_MAX];
+ struct stat st;
+ int fd;
};
struct rdd_config {
- long iters;
- long max_ops_per_seq;
- size_t max_bs;
- size_t min_bs;
- int thread_count;
- pthread_t *threads;
- pthread_barrier_t barrier;
- pthread_mutex_t lock;
- struct rdd_file in_file;
- struct rdd_file out_file;
- ssize_t file_size;
+ long iters;
+ long max_ops_per_seq;
+ size_t max_bs;
+ size_t min_bs;
+ int thread_count;
+ pthread_t *threads;
+ pthread_barrier_t barrier;
+ pthread_mutex_t lock;
+ struct rdd_file in_file;
+ struct rdd_file out_file;
+ ssize_t file_size;
};
static struct rdd_config rdd_config;
enum rdd_keys {
- RDD_MIN_BS_KEY = 1,
- RDD_MAX_BS_KEY,
+ RDD_MIN_BS_KEY = 1,
+ RDD_MAX_BS_KEY,
};
static error_t
-rdd_parse_opts (int key, char *arg,
- struct argp_state *_state)
+rdd_parse_opts(int key, char *arg, struct argp_state *_state)
{
- switch (key) {
- case 'o':
- {
- int len = 0;
- len = strlen (arg);
- if (len > UNIX_PATH_MAX) {
- fprintf (stderr, "output file name too long (%s)\n",
- arg);
- return -1;
- }
-
- strncpy (rdd_config.out_file.path, arg, len);
- }
- break;
-
- case 'i':
- {
- int len = 0;
- len = strlen (arg);
- if (len > UNIX_PATH_MAX) {
- fprintf (stderr, "input file name too long (%s)\n",
- arg);
- return -1;
- }
-
- strncpy (rdd_config.in_file.path, arg, len);
- rdd_config.in_file.path[len] = '\0';
- }
- break;
-
- case 'f':
- {
- char *tmp = NULL;
- unsigned long long fs = 0;
- if (string2bytesize (arg, &fs) == -1) {
- fprintf (stderr, "invalid argument for file size "
- "(%s)\n", arg);
- return -1;
- }
-
- rdd_config.file_size = fs;
- }
- break;
-
- case RDD_MIN_BS_KEY:
- {
- char *tmp = NULL;
- long bs = 0;
- bs = strtol (arg, &tmp, 10);
- if ((bs == LONG_MAX) || (bs == LONG_MIN) || (tmp && *tmp)) {
- fprintf (stderr, "invalid argument for minimum block"
- "size (%s)\n", arg);
- return -1;
- }
-
- rdd_config.min_bs = bs;
- }
- break;
-
- case RDD_MAX_BS_KEY:
- {
- char *tmp = NULL;
- long bs = 0;
- bs = strtol (arg, &tmp, 10);
- if ((bs == LONG_MAX) || (bs == LONG_MIN) || (tmp && *tmp)) {
- fprintf (stderr, "invalid argument for maximum block"
- "size (%s)\n", arg);
- return -1;
- }
-
- rdd_config.max_bs = bs;
- }
- break;
-
- case 'r':
- {
- char *tmp = NULL;
- long iters = 0;
- iters = strtol (arg, &tmp, 10);
- if ((iters == LONG_MAX) ||
- (iters == LONG_MIN) ||
- (tmp && *tmp)) {
- fprintf (stderr, "invalid argument for iterations"
- "(%s)\n", arg);
- return -1;
- }
-
- rdd_config.iters = iters;
- }
- break;
-
- case 'm':
- {
- char *tmp = NULL;
- long max_ops = 0;
- max_ops = strtol (arg, &tmp, 10);
- if ((max_ops == LONG_MAX) ||
- (max_ops == LONG_MIN) ||
- (tmp && *tmp)) {
- fprintf (stderr, "invalid argument for max-ops"
- "(%s)\n", arg);
- return -1;
- }
+ switch (key) {
+ case 'o': {
+ int len = 0;
+ len = strlen(arg);
+ if (len > UNIX_PATH_MAX) {
+ fprintf(stderr, "output file name too long (%s)\n", arg);
+ return -1;
+ }
- rdd_config.max_ops_per_seq = max_ops;
- }
- break;
+ strncpy(rdd_config.out_file.path, arg, len);
+ } break;
- case 't':
- {
- char *tmp = NULL;
- long threads = 0;
- threads = strtol (arg, &tmp, 10);
- if ((threads == LONG_MAX) ||
- (threads == LONG_MIN) ||
- (tmp && *tmp)) {
- fprintf (stderr, "invalid argument for thread count"
- "(%s)\n", arg);
- return -1;
- }
+ case 'i': {
+ int len = 0;
+ len = strlen(arg);
+ if (len > UNIX_PATH_MAX) {
+ fprintf(stderr, "input file name too long (%s)\n", arg);
+ return -1;
+ }
+
+ strncpy(rdd_config.in_file.path, arg, len);
+ rdd_config.in_file.path[len] = '\0';
+ } break;
+
+ case 'f': {
+ char *tmp = NULL;
+ unsigned long long fs = 0;
+ if (string2bytesize(arg, &fs) == -1) {
+ fprintf(stderr,
+ "invalid argument for file size "
+ "(%s)\n",
+ arg);
+ return -1;
+ }
+
+ rdd_config.file_size = fs;
+ } break;
+
+ case RDD_MIN_BS_KEY: {
+ char *tmp = NULL;
+ long bs = 0;
+ bs = strtol(arg, &tmp, 10);
+ if ((bs == LONG_MAX) || (bs == LONG_MIN) || (tmp && *tmp)) {
+ fprintf(stderr,
+ "invalid argument for minimum block"
+ "size (%s)\n",
+ arg);
+ return -1;
+ }
+
+ rdd_config.min_bs = bs;
+ } break;
+
+ case RDD_MAX_BS_KEY: {
+ char *tmp = NULL;
+ long bs = 0;
+ bs = strtol(arg, &tmp, 10);
+ if ((bs == LONG_MAX) || (bs == LONG_MIN) || (tmp && *tmp)) {
+ fprintf(stderr,
+ "invalid argument for maximum block"
+ "size (%s)\n",
+ arg);
+ return -1;
+ }
+
+ rdd_config.max_bs = bs;
+ } break;
+
+ case 'r': {
+ char *tmp = NULL;
+ long iters = 0;
+ iters = strtol(arg, &tmp, 10);
+ if ((iters == LONG_MAX) || (iters == LONG_MIN) || (tmp && *tmp)) {
+ fprintf(stderr,
+ "invalid argument for iterations"
+ "(%s)\n",
+ arg);
+ return -1;
+ }
+
+ rdd_config.iters = iters;
+ } break;
+
+ case 'm': {
+ char *tmp = NULL;
+ long max_ops = 0;
+ max_ops = strtol(arg, &tmp, 10);
+ if ((max_ops == LONG_MAX) || (max_ops == LONG_MIN) ||
+ (tmp && *tmp)) {
+ fprintf(stderr,
+ "invalid argument for max-ops"
+ "(%s)\n",
+ arg);
+ return -1;
+ }
+
+ rdd_config.max_ops_per_seq = max_ops;
+ } break;
+
+ case 't': {
+ char *tmp = NULL;
+ long threads = 0;
+ threads = strtol(arg, &tmp, 10);
+ if ((threads == LONG_MAX) || (threads == LONG_MIN) ||
+ (tmp && *tmp)) {
+ fprintf(stderr,
+ "invalid argument for thread count"
+ "(%s)\n",
+ arg);
+ return -1;
+ }
- rdd_config.thread_count = threads;
- }
- break;
+ rdd_config.thread_count = threads;
+ } break;
case ARGP_KEY_NO_ARGS:
- break;
+ break;
case ARGP_KEY_ARG:
- break;
+ break;
case ARGP_KEY_END:
- if (_state->argc == 1) {
- argp_usage (_state);
- }
-
- }
+ if (_state->argc == 1) {
+ argp_usage(_state);
+ }
+ }
- return 0;
+ return 0;
}
int
-string2bytesize (const char *str, unsigned long long *n)
+string2bytesize(const char *str, unsigned long long *n)
{
- unsigned long long value = 0ULL;
- char *tail = NULL;
- int old_errno = 0;
- const char *s = NULL;
-
- if (str == NULL || n == NULL)
- {
- errno = EINVAL;
- return -1;
+ unsigned long long value = 0ULL;
+ char *tail = NULL;
+ int old_errno = 0;
+ const char *s = NULL;
+
+ if (str == NULL || n == NULL) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ for (s = str; *s != '\0'; s++) {
+ if (isspace(*s)) {
+ continue;
}
-
- for (s = str; *s != '\0'; s++)
- {
- if (isspace (*s))
- {
- continue;
- }
- if (*s == '-')
- {
- return -1;
- }
- break;
+ if (*s == '-') {
+ return -1;
}
-
- old_errno = errno;
- errno = 0;
- value = strtoull (str, &tail, 10);
-
- if (errno == ERANGE || errno == EINVAL)
- {
- return -1;
- }
-
- if (errno == 0)
- {
- errno = old_errno;
+ break;
+ }
+
+ old_errno = errno;
+ errno = 0;
+ value = strtoull(str, &tail, 10);
+
+ if (errno == ERANGE || errno == EINVAL) {
+ return -1;
+ }
+
+ if (errno == 0) {
+ errno = old_errno;
+ }
+
+ if (tail[0] != '\0') {
+ if (strcasecmp(tail, UNIT_KB_STRING) == 0) {
+ value *= UNIT_KB;
+ } else if (strcasecmp(tail, UNIT_MB_STRING) == 0) {
+ value *= UNIT_MB;
+ } else if (strcasecmp(tail, UNIT_GB_STRING) == 0) {
+ value *= UNIT_GB;
+ } else if (strcasecmp(tail, UNIT_TB_STRING) == 0) {
+ value *= UNIT_TB;
+ } else if (strcasecmp(tail, UNIT_PB_STRING) == 0) {
+ value *= UNIT_PB;
}
- if (tail[0] != '\0')
- {
- if (strcasecmp (tail, UNIT_KB_STRING) == 0)
- {
- value *= UNIT_KB;
- }
- else if (strcasecmp (tail, UNIT_MB_STRING) == 0)
- {
- value *= UNIT_MB;
- }
- else if (strcasecmp (tail, UNIT_GB_STRING) == 0)
- {
- value *= UNIT_GB;
- }
- else if (strcasecmp (tail, UNIT_TB_STRING) == 0)
- {
- value *= UNIT_TB;
- }
- else if (strcasecmp (tail, UNIT_PB_STRING) == 0)
- {
- value *= UNIT_PB;
- }
-
- else
- {
- return -1;
- }
+ else {
+ return -1;
}
+ }
- *n = value;
+ *n = value;
- return 0;
+ return 0;
}
static struct argp_option rdd_options[] = {
- {"if", 'i', "INPUT_FILE", 0, "input-file"},
- {"of", 'o', "OUTPUT_FILE", 0, "output-file"},
- {"threads", 't', "COUNT", 0, "number of threads to spawn (defaults to 2)"},
- {"min-bs", RDD_MIN_BS_KEY, "MIN_BLOCK_SIZE", 0,
- "Minimum block size in bytes (defaults to 1024)"},
- {"max-bs", RDD_MAX_BS_KEY, "MAX_BLOCK_SIZE", 0,
- "Maximum block size in bytes (defaults to 4096)"},
- {"iters", 'r', "ITERS", 0,
- "Number of read-write sequences (defaults to 1000000)"},
- {"max-ops", 'm', "MAXOPS", 0,
- "maximum number of read-writes to be performed in a sequence (defaults to 1)"},
- {"file-size", 'f', "FILESIZE", 0,
- "the size of the file which will be created and upon it I/O will be done"
- " (defaults to 100MB"},
- {0, 0, 0, 0, 0}
-};
+ {"if", 'i', "INPUT_FILE", 0, "input-file"},
+ {"of", 'o', "OUTPUT_FILE", 0, "output-file"},
+ {"threads", 't', "COUNT", 0, "number of threads to spawn (defaults to 2)"},
+ {"min-bs", RDD_MIN_BS_KEY, "MIN_BLOCK_SIZE", 0,
+ "Minimum block size in bytes (defaults to 1024)"},
+ {"max-bs", RDD_MAX_BS_KEY, "MAX_BLOCK_SIZE", 0,
+ "Maximum block size in bytes (defaults to 4096)"},
+ {"iters", 'r', "ITERS", 0,
+ "Number of read-write sequences (defaults to 1000000)"},
+ {"max-ops", 'm', "MAXOPS", 0,
+ "maximum number of read-writes to be performed in a sequence (defaults to "
+ "1)"},
+ {"file-size", 'f', "FILESIZE", 0,
+ "the size of the file which will be created and upon it I/O will be done"
+ " (defaults to 100MB"},
+ {0, 0, 0, 0, 0}};
static struct argp argp = {
- rdd_options,
- rdd_parse_opts,
- "",
- "random dd - tool to do a sequence of random block-sized continuous"
- "read writes starting at a random offset"
-};
-
+ rdd_options, rdd_parse_opts, "",
+ "random dd - tool to do a sequence of random block-sized continuous"
+ "read writes starting at a random offset"};
static void
-rdd_default_config (void)
+rdd_default_config(void)
{
- char *tmp_path = "rdd.in";
-
- rdd_config.thread_count = 2;
- rdd_config.iters = 1000000;
- rdd_config.max_bs = 4096;
- rdd_config.min_bs = 1024;
- rdd_config.in_file.fd = rdd_config.out_file.fd = -1;
- rdd_config.max_ops_per_seq = 1;
- strncpy (rdd_config.in_file.path, tmp_path, strlen (tmp_path));
- rdd_config.file_size = 104857600;
-
- return;
+ char *tmp_path = "rdd.in";
+
+ rdd_config.thread_count = 2;
+ rdd_config.iters = 1000000;
+ rdd_config.max_bs = 4096;
+ rdd_config.min_bs = 1024;
+ rdd_config.in_file.fd = rdd_config.out_file.fd = -1;
+ rdd_config.max_ops_per_seq = 1;
+ strncpy(rdd_config.in_file.path, tmp_path, strlen(tmp_path));
+ rdd_config.file_size = 104857600;
+
+ return;
}
-
static char
-rdd_valid_config (void)
+rdd_valid_config(void)
{
- char ret = 1;
- int fd = -1;
-
- fd = open (rdd_config.in_file.path, O_RDONLY);
- if (fd == -1 && (errno != ENOENT)) {
- fprintf (stderr, "open: (%s)", strerror (errno));
- ret = 0;
- goto out;
- }
- close (fd);
+ char ret = 1;
+ int fd = -1;
- if (rdd_config.min_bs > rdd_config.max_bs) {
- fprintf (stderr, "minimum blocksize %ld is greater than the "
- "maximum blocksize %ld", rdd_config.min_bs,
- rdd_config.max_bs);
- ret = 0;
- goto out;
- }
+ fd = open(rdd_config.in_file.path, O_RDONLY);
+ if (fd == -1 && (errno != ENOENT)) {
+ fprintf(stderr, "open: (%s)", strerror(errno));
+ ret = 0;
+ goto out;
+ }
+ close(fd);
+
+ if (rdd_config.min_bs > rdd_config.max_bs) {
+ fprintf(stderr,
+ "minimum blocksize %ld is greater than the "
+ "maximum blocksize %ld",
+ rdd_config.min_bs, rdd_config.max_bs);
+ ret = 0;
+ goto out;
+ }
- if (strlen (rdd_config.out_file.path) == 0) {
- sprintf (rdd_config.out_file.path, "%s.rddout",
- rdd_config.in_file.path);
- }
+ if (strlen(rdd_config.out_file.path) == 0) {
+ sprintf(rdd_config.out_file.path, "%s.rddout", rdd_config.in_file.path);
+ }
out:
- return ret;
+ return ret;
}
-
static void *
-rdd_read_write (void *arg)
+rdd_read_write(void *arg)
{
- int i = 0, ret = 0;
- size_t bs = 0;
- off_t offset = 0;
- long rand = 0;
- long max_ops = 0;
- char *buf = NULL;
-
- buf = calloc (1, rdd_config.max_bs);
- if (!buf) {
- fprintf (stderr, "calloc failed (%s)\n", strerror (errno));
+ int i = 0, ret = 0;
+ size_t bs = 0;
+ off_t offset = 0;
+ long rand = 0;
+ long max_ops = 0;
+ char *buf = NULL;
+
+ buf = calloc(1, rdd_config.max_bs);
+ if (!buf) {
+ fprintf(stderr, "calloc failed (%s)\n", strerror(errno));
+ ret = -1;
+ goto out;
+ }
+
+ for (i = 0; i < rdd_config.iters; i++) {
+ pthread_mutex_lock(&rdd_config.lock);
+ {
+ int bytes = 0;
+ rand = random();
+
+ if (rdd_config.min_bs == rdd_config.max_bs) {
+ bs = rdd_config.max_bs;
+ } else {
+ bs = rdd_config.min_bs +
+ (rand % (rdd_config.max_bs - rdd_config.min_bs));
+ }
+
+ offset = rand % rdd_config.in_file.st.st_size;
+ max_ops = rand % rdd_config.max_ops_per_seq;
+ if (!max_ops) {
+ max_ops++;
+ }
+
+ ret = lseek(rdd_config.in_file.fd, offset, SEEK_SET);
+ if (ret != offset) {
+ fprintf(stderr, "lseek failed (%s)\n", strerror(errno));
ret = -1;
- goto out;
- }
+ goto unlock;
+ }
- for (i = 0; i < rdd_config.iters; i++)
- {
- pthread_mutex_lock (&rdd_config.lock);
- {
- int bytes = 0;
- rand = random ();
-
- if (rdd_config.min_bs == rdd_config.max_bs) {
- bs = rdd_config.max_bs;
- } else {
- bs = rdd_config.min_bs +
- (rand %
- (rdd_config.max_bs -
- rdd_config.min_bs));
- }
-
- offset = rand % rdd_config.in_file.st.st_size;
- max_ops = rand % rdd_config.max_ops_per_seq;
- if (!max_ops) {
- max_ops ++;
- }
-
- ret = lseek (rdd_config.in_file.fd, offset, SEEK_SET);
- if (ret != offset) {
- fprintf (stderr, "lseek failed (%s)\n",
- strerror (errno));
- ret = -1;
- goto unlock;
- }
-
- ret = lseek (rdd_config.out_file.fd, offset, SEEK_SET);
- if (ret != offset) {
- fprintf (stderr, "lseek failed (%s)\n",
- strerror (errno));
- ret = -1;
- goto unlock;
- }
-
- while (max_ops--)
- {
- bytes = read (rdd_config.in_file.fd, buf, bs);
- if (!bytes) {
- break;
- }
-
- if (bytes == -1) {
- fprintf (stderr, "read failed (%s)\n",
- strerror (errno));
- ret = -1;
- goto unlock;
- }
-
- if (write (rdd_config.out_file.fd, buf, bytes)
- != bytes) {
- fprintf (stderr, "write failed (%s)\n",
- strerror (errno));
- ret = -1;
- goto unlock;
- }
- }
+ ret = lseek(rdd_config.out_file.fd, offset, SEEK_SET);
+ if (ret != offset) {
+ fprintf(stderr, "lseek failed (%s)\n", strerror(errno));
+ ret = -1;
+ goto unlock;
+ }
+
+ while (max_ops--) {
+ bytes = read(rdd_config.in_file.fd, buf, bs);
+ if (!bytes) {
+ break;
}
- unlock:
- pthread_mutex_unlock (&rdd_config.lock);
- if (ret == -1) {
- goto out;
+
+ if (bytes == -1) {
+ fprintf(stderr, "read failed (%s)\n", strerror(errno));
+ ret = -1;
+ goto unlock;
+ }
+
+ if (write(rdd_config.out_file.fd, buf, bytes) != bytes) {
+ fprintf(stderr, "write failed (%s)\n", strerror(errno));
+ ret = -1;
+ goto unlock;
}
- ret = 0;
+ }
}
+ unlock:
+ pthread_mutex_unlock(&rdd_config.lock);
+ if (ret == -1) {
+ goto out;
+ }
+ ret = 0;
+ }
out:
- free (buf);
- pthread_barrier_wait (&rdd_config.barrier);
+ free(buf);
+ pthread_barrier_wait(&rdd_config.barrier);
- return NULL;
+ return NULL;
}
static void
-cleanup (void)
+cleanup(void)
{
- close (rdd_config.in_file.fd);
- close (rdd_config.out_file.fd);
- rdd_config.in_file.fd = rdd_config.out_file.fd = -1;
+ close(rdd_config.in_file.fd);
+ close(rdd_config.out_file.fd);
+ rdd_config.in_file.fd = rdd_config.out_file.fd = -1;
}
static int
-check_and_create (void)
+check_and_create(void)
{
- int ret = -1;
- char buf[4096] = {0,};
- struct stat stbuf = {0,};
- int fd[2] = {-1,};
- size_t total_size = -1;
-
- total_size = rdd_config.file_size;
-
- ret = stat (rdd_config.in_file.path, &stbuf);
- if (ret == -1 && (errno != ENOENT))
+ int ret = -1;
+ char buf[4096] = {
+ 0,
+ };
+ struct stat stbuf = {
+ 0,
+ };
+ int fd[2] = {
+ -1,
+ };
+ size_t total_size = -1;
+
+ total_size = rdd_config.file_size;
+
+ ret = stat(rdd_config.in_file.path, &stbuf);
+ if (ret == -1 && (errno != ENOENT))
+ goto out;
+
+ fd[1] = open(rdd_config.in_file.path, O_CREAT | O_WRONLY | O_TRUNC);
+ if (fd[1] == -1)
+ goto out;
+
+ fd[0] = open("/dev/urandom", O_RDONLY);
+ if (fd[0] == -1)
+ goto out;
+
+ while (total_size > 0) {
+ if (total_size >= 4096) {
+ ret = read(fd[0], buf, 4096);
+ if (ret == -1)
goto out;
-
- fd[1] = open (rdd_config.in_file.path, O_CREAT | O_WRONLY | O_TRUNC);
- if (fd[1] == -1)
+ ret = write(fd[1], buf, 4096);
+ if (ret == -1)
goto out;
-
- fd[0] = open ("/dev/urandom", O_RDONLY);
- if (fd[0] == -1)
+ total_size = total_size - 4096;
+ } else {
+ ret = read(fd[0], buf, total_size);
+ if (ret == -1)
goto out;
-
- while (total_size > 0) {
- if (total_size >= 4096) {
- ret = read (fd[0], buf, 4096);
- if (ret == -1)
- goto out;
- ret = write (fd[1], buf, 4096);
- if (ret == -1)
- goto out;
- total_size = total_size - 4096;
- } else {
- ret = read (fd[0], buf, total_size);
- if (ret == -1)
- goto out;
- ret = write (fd[1], buf, total_size);
- if (ret == -1)
- goto out;
- total_size = total_size - total_size;
- }
-
+ ret = write(fd[1], buf, total_size);
+ if (ret == -1)
+ goto out;
+ total_size = total_size - total_size;
}
+ }
- ret = 0;
+ ret = 0;
out:
- if (fd[0] > 0)
- close (fd[0]);
- if (fd[1] > 0)
- close (fd[1]);
- return ret;
+ if (fd[0] > 0)
+ close(fd[0]);
+ if (fd[1] > 0)
+ close(fd[1]);
+ return ret;
}
static int
-rdd_spawn_threads (void)
+rdd_spawn_threads(void)
{
- int i = 0, ret = -1, fd = -1;
- char buf[4096];
-
- ret = check_and_create ();
- if (ret == -1)
- goto out;
-
- fd = open (rdd_config.in_file.path, O_RDONLY);
- if (fd < 0) {
- fprintf (stderr, "cannot open %s (%s)\n",
- rdd_config.in_file.path, strerror (errno));
- ret = -1;
- goto out;
- }
- ret = fstat (fd, &rdd_config.in_file.st);
- if (ret != 0) {
- close (fd);
- fprintf (stderr, "cannot stat %s (%s)\n",
- rdd_config.in_file.path, strerror (errno));
- ret = -1;
- goto out;
- }
- rdd_config.in_file.fd = fd;
-
- fd = open (rdd_config.out_file.path, O_WRONLY | O_CREAT | O_TRUNC,
- S_IRWXU | S_IROTH);
- if (fd < 0) {
- close (rdd_config.in_file.fd);
- rdd_config.in_file.fd = -1;
- fprintf (stderr, "cannot open %s (%s)\n",
- rdd_config.out_file.path, strerror (errno));
- ret = -1;
- goto out;
- }
- rdd_config.out_file.fd = fd;
-
- while ((ret = read (rdd_config.in_file.fd, buf, 4096)) > 0) {
- if (write (rdd_config.out_file.fd, buf, ret) != ret) {
- fprintf (stderr, "write failed (%s)\n",
- strerror (errno));
- cleanup ();
- ret = -1;
- goto out;
- }
- }
-
- rdd_config.threads = calloc (rdd_config.thread_count,
- sizeof (pthread_t));
- if (rdd_config.threads == NULL) {
- fprintf (stderr, "calloc() failed (%s)\n", strerror (errno));
-
- ret = -1;
- cleanup ();
- goto out;
- }
-
- ret = pthread_barrier_init (&rdd_config.barrier, NULL,
- rdd_config.thread_count + 1);
- if (ret != 0) {
- fprintf (stderr, "pthread_barrier_init() failed (%s)\n",
- strerror (ret));
-
- free (rdd_config.threads);
- cleanup ();
- ret = -1;
- goto out;
+ int i = 0, ret = -1, fd = -1;
+ char buf[4096];
+
+ ret = check_and_create();
+ if (ret == -1)
+ goto out;
+
+ fd = open(rdd_config.in_file.path, O_RDONLY);
+ if (fd < 0) {
+ fprintf(stderr, "cannot open %s (%s)\n", rdd_config.in_file.path,
+ strerror(errno));
+ ret = -1;
+ goto out;
+ }
+ ret = fstat(fd, &rdd_config.in_file.st);
+ if (ret != 0) {
+ close(fd);
+ fprintf(stderr, "cannot stat %s (%s)\n", rdd_config.in_file.path,
+ strerror(errno));
+ ret = -1;
+ goto out;
+ }
+ rdd_config.in_file.fd = fd;
+
+ fd = open(rdd_config.out_file.path, O_WRONLY | O_CREAT | O_TRUNC,
+ S_IRWXU | S_IROTH);
+ if (fd < 0) {
+ close(rdd_config.in_file.fd);
+ rdd_config.in_file.fd = -1;
+ fprintf(stderr, "cannot open %s (%s)\n", rdd_config.out_file.path,
+ strerror(errno));
+ ret = -1;
+ goto out;
+ }
+ rdd_config.out_file.fd = fd;
+
+ while ((ret = read(rdd_config.in_file.fd, buf, 4096)) > 0) {
+ if (write(rdd_config.out_file.fd, buf, ret) != ret) {
+ fprintf(stderr, "write failed (%s)\n", strerror(errno));
+ cleanup();
+ ret = -1;
+ goto out;
}
-
- ret = pthread_mutex_init (&rdd_config.lock, NULL);
+ }
+
+ rdd_config.threads = calloc(rdd_config.thread_count, sizeof(pthread_t));
+ if (rdd_config.threads == NULL) {
+ fprintf(stderr, "calloc() failed (%s)\n", strerror(errno));
+
+ ret = -1;
+ cleanup();
+ goto out;
+ }
+
+ ret = pthread_barrier_init(&rdd_config.barrier, NULL,
+ rdd_config.thread_count + 1);
+ if (ret != 0) {
+ fprintf(stderr, "pthread_barrier_init() failed (%s)\n", strerror(ret));
+
+ free(rdd_config.threads);
+ cleanup();
+ ret = -1;
+ goto out;
+ }
+
+ ret = pthread_mutex_init(&rdd_config.lock, NULL);
+ if (ret != 0) {
+ fprintf(stderr, "pthread_mutex_init() failed (%s)\n", strerror(ret));
+
+ free(rdd_config.threads);
+ pthread_barrier_destroy(&rdd_config.barrier);
+ cleanup();
+ ret = -1;
+ goto out;
+ }
+
+ for (i = 0; i < rdd_config.thread_count; i++) {
+ ret = pthread_create(&rdd_config.threads[i], NULL, rdd_read_write,
+ NULL);
if (ret != 0) {
- fprintf (stderr, "pthread_mutex_init() failed (%s)\n",
- strerror (ret));
-
- free (rdd_config.threads);
- pthread_barrier_destroy (&rdd_config.barrier);
- cleanup ();
- ret = -1;
- goto out;
- }
-
- for (i = 0; i < rdd_config.thread_count; i++)
- {
- ret = pthread_create (&rdd_config.threads[i], NULL,
- rdd_read_write, NULL);
- if (ret != 0) {
- fprintf (stderr, "pthread_create failed (%s)\n",
- strerror (errno));
- exit (1);
- }
+ fprintf(stderr, "pthread_create failed (%s)\n", strerror(errno));
+ exit(1);
}
+ }
out:
- return ret;
+ return ret;
}
static void
-rdd_wait_for_completion (void)
+rdd_wait_for_completion(void)
{
- pthread_barrier_wait (&rdd_config.barrier);
+ pthread_barrier_wait(&rdd_config.barrier);
}
-
int
-main (int argc, char *argv[])
+main(int argc, char *argv[])
{
- int ret = -1;
+ int ret = -1;
- rdd_default_config ();
+ rdd_default_config();
- ret = argp_parse (&argp, argc, argv, 0, 0, NULL);
- if (ret != 0) {
- ret = -1;
- fprintf (stderr, "%s: argp_parse() failed\n", argv[0]);
- goto err;
- }
+ ret = argp_parse(&argp, argc, argv, 0, 0, NULL);
+ if (ret != 0) {
+ ret = -1;
+ fprintf(stderr, "%s: argp_parse() failed\n", argv[0]);
+ goto err;
+ }
- if (!rdd_valid_config ()) {
- ret = -1;
- fprintf (stderr, "%s: configuration validation failed\n",
- argv[0]);
- goto err;
- }
+ if (!rdd_valid_config()) {
+ ret = -1;
+ fprintf(stderr, "%s: configuration validation failed\n", argv[0]);
+ goto err;
+ }
- ret = rdd_spawn_threads ();
- if (ret != 0) {
- fprintf (stderr, "%s: spawning threads failed\n", argv[0]);
- goto err;
- }
+ ret = rdd_spawn_threads();
+ if (ret != 0) {
+ fprintf(stderr, "%s: spawning threads failed\n", argv[0]);
+ goto err;
+ }
- rdd_wait_for_completion ();
+ rdd_wait_for_completion();
err:
- return ret;
+ return ret;
}
diff --git a/extras/check_goto.pl b/extras/check_goto.pl
new file mode 100755
index 00000000000..fa71bfc6683
--- /dev/null
+++ b/extras/check_goto.pl
@@ -0,0 +1,45 @@
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+
+my @ignore_labels = qw (TODO retry fetch_data again try_again sp_state_read_proghdr redo disabled_loop fd_alloc_try_again);
+my @ignore_files = qw (y.tab.c lex.c);
+my @c_files;
+my $line;
+my @labels;
+my $in_comments;
+
+{
+ local $" = "|";
+ my $cmd = "find . -type f -name '*.c' | grep -vE '(@ignore_files)'";
+ @c_files = `$cmd`;
+}
+
+foreach my $file (@c_files) {
+ chomp ($file);
+ open FD, $file or die ("Failed to read file $file: $!");
+ @labels = ();
+ $in_comments = 0;
+ while ($line = <FD>) {
+ chomp ($line);
+
+ next if $line =~ /^\s*(#|\/\/)/;
+ $in_comments = 1 if ($line =~ /\/\*/);
+ $in_comments = 0 if ($line =~ /\*\//);
+
+ next if $in_comments;
+ if ($line =~ /^\s*(([a-zA-Z]|_)\w*)\s*:/) {
+ push (@labels, $1) unless grep (/$1/, @ignore_labels);
+ }
+ @labels = () if $line =~ /^}/;
+
+ next unless @labels;
+ if ($line =~ /^\s*goto\s*(\w+)/) {
+ print "$file:$.: $line\n" if grep /^$1$/, @labels;
+ }
+ }
+
+ close FD;
+}
+
diff --git a/extras/clang-checker.sh b/extras/clang-checker.sh
new file mode 100755
index 00000000000..4909d3adfcd
--- /dev/null
+++ b/extras/clang-checker.sh
@@ -0,0 +1,301 @@
+#!/usr/bin/env bash
+#*******************************************************************************
+# *
+# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> *
+# This file is part of GlusterFS. *
+# *
+# This file is licensed to you under your choice of the GNU Lesser *
+# General Public License, version 3 or any later version (LGPLv3 or *
+# later), or the GNU General Public License, version 2 (GPLv2), in all *
+# cases as published by the Free Software Foundation. *
+#------------------------------------------------------------------------------*
+# *
+# clang-checker.sh: This script runs clang static analyzer using 'scan-build' *
+# a perl wrapper. After you commit your patch i.e. right *
+# before executing rfc.sh in order to push the patch to *
+# repository, it is recommended that you execute *
+# clang-checker.sh to perform static analysis inorder to *
+# check if there are any possible bugs in the code. *
+# *
+# This script performs the static analysis with and *
+# without HEAD commit, it runs the analyzer only in the *
+# directory where changes have been made and finally diff's *
+# the number of bugs using both cases (i.e. with and *
+# without your commit) and gives a summary, which explain's *
+# about the eligibility of your patch. *
+# *
+# Usage: $ cd $PATH_TO_GLUSTERFS *
+# $ extras/clang-checker.sh (or) $ make clang-check *
+# *
+# Author: Prasanna Kumar Kalever <prasanna.kalever@redhat.com> *
+# *
+#*******************************************************************************
+
+REPORTS_DIR=$(pwd)
+BASELINE_DIR=${REPORTS_DIR}/baseline
+BRESULTS_DIR=${BASELINE_DIR}/results
+BBACKUP_DIR=${BASELINE_DIR}/backup
+TARGET_DIR=${REPORTS_DIR}/target
+TRESULTS_DIR=${TARGET_DIR}/results
+TBACKUP_DIR=${TARGET_DIR}/backup
+
+declare -A DICT_B
+declare -A DICT_T
+declare -A ARR
+declare -A FILES
+
+function identify_changes () {
+ MODIFIED_DATA=$(git show --name-status --oneline | tail -n +2)
+ FLAG=0
+ for i in ${MODIFIED_DATA}; do
+ if [ $FLAG -eq 1 ]; then
+ ARR+="$(dirname $i) ";
+ FLAG=0;
+ fi
+ if [ $i = 'M' ] || [ $i = 'A' ]; then
+ FLAG=1;
+ fi
+ done
+
+ MODIFIED_DIR=$(echo "${ARR[@]}" | tr ' ' '\n' | sort -u | tr '\n' ' ')
+ for i in $MODIFIED_DIR; do
+ # run only in directories which has Makefile
+ if [ $(find ./$i -iname "makefile*" | wc -c) -gt 0 ]; then
+ # skip 'doc' and '.'(top) directory
+ if [ "xx$i" != "xxdoc" ] && [ "xx$i" != "xx." ]; then
+ FILES+="$i "
+ fi
+ fi
+ done
+ if [ -z $FILES ]; then
+ echo "Probably no changes made to 'c' files"
+ exit;
+ fi
+}
+
+function check_prerequisites () {
+ if ! type "clang" 2> /dev/null; then
+ echo -e "\ntry after installing clang and scan-build..."
+ echo "useful info at http://clang-analyzer.llvm.org/installation.html\n"
+ echo -e "hint: 'dnf -y install clang-analyzer.noarch'\n"
+ exit 1;
+ elif ! type "scan-build" 2> /dev/null; then
+ echo -e "\ntry after installing scan-build..."
+ echo "useful info at http://clang-analyzer.llvm.org/installation.html"
+ echo -e "hint: 'dnf -y install clang-analyzer.noarch'\n"
+ exit 1;
+ fi
+}
+
+function force_terminate () {
+ echo -e "\nreceived a signal to force terminate ..\n"
+ git am --abort 2> /dev/null
+ git am ${PATCH_NAME}
+ rm -f ${REPORTS_DIR}/${PATCH_NAME}
+ exit 1;
+}
+
+function run_scanbuild () {
+ local CLANG=$(which clang)
+ local SCAN_BUILD=$(which scan-build)
+ local ORIG_COMMIT=$(git rev-parse --verify HEAD^)
+ PATCH_NAME=$(git format-patch $ORIG_COMMIT)
+
+ echo -e "\n| Performing clang analysis on:" \
+ "$(git log --pretty=format:"%h - '%s' by %an" -1) ... |\n"
+ echo -e "Changes are identified in '${FILES[@]}' directorie[s]\n"
+
+ if [ -d "${BRESULTS_DIR}" ]; then
+ mkdir -p ${BBACKUP_DIR} ${TBACKUP_DIR}
+ mv ${BRESULTS_DIR} \
+ ${BBACKUP_DIR}/results_$(ls -l ${BBACKUP_DIR} | wc -l)
+ mv ${TRESULTS_DIR} \
+ ${TBACKUP_DIR}/results_$(ls -l ${TBACKUP_DIR} | wc -l)
+ fi
+ for DIR in ${FILES[@]}; do
+ mkdir -p ${BRESULTS_DIR}/$(echo ${DIR} | sed 's/\//_/g')
+ mkdir -p ${TRESULTS_DIR}/$(echo ${DIR} | sed 's/\//_/g')
+ done
+ # get nproc info
+ case $(uname -s) in
+ 'Linux')
+ local NPROC=$(getconf _NPROCESSORS_ONLN)
+ ;;
+ 'NetBSD')
+ local NPROC=$(getconf NPROCESSORS_ONLN)
+ ;;
+ esac
+
+ trap force_terminate INT TERM QUIT EXIT
+
+ git reset --hard HEAD^
+
+ # build complete source code for sake of dependencies
+ echo -e "\n# make -j${NPROC} ..."
+ make -j${NPROC} 1>/dev/null
+
+ for DIR in ${FILES[@]}; do
+ if [ $(find ./$i -iname "makefile*" | wc -c) -gt 0 ]; then
+ make clean -C ${DIR} 1>/dev/null
+ echo -e "\n| Analyzing ${DIR} without commit ... |\n"
+ # run only in directory where changes are made
+ ${SCAN_BUILD} -o ${BRESULTS_DIR}/$(echo ${DIR} | sed 's/\//_/g') \
+ --use-analyzer=${CLANG} make -j${NPROC} -C ${DIR}
+ fi
+ done
+
+ echo -e "\n| Analyzing without commit complete ... |\n"
+
+ git am ${PATCH_NAME}
+ trap - INT TERM QUIT EXIT
+
+ # In case commit has changes to configure stuff ?
+ echo -e "\n# make clean ..."
+ make clean 1>/dev/null
+ echo -e "\n# ./autogen.sh && ./configure --with-previous-options ..."
+ ${REPORTS_DIR}/autogen.sh 2>/dev/null
+ ${REPORTS_DIR}/configure --with-previous-options 1>/dev/null
+ echo -e "\n# make -j${NPROC} ..."
+ make -j${NPROC} 1>/dev/null
+
+ for DIR in ${FILES[@]}; do
+ if [ $(find ./$i -iname "makefile*" | wc -c) -gt 0 ]; then
+ make clean -C ${DIR} 1>/dev/null
+ echo -e "\n| Analyzing ${DIR} with commit ... |\n"
+ # run only in directory where changes are made
+ ${SCAN_BUILD} -o ${TRESULTS_DIR}/$(echo ${DIR} | sed 's/\//_/g') \
+ --use-analyzer=${CLANG} make -j${NPROC} -C ${DIR}
+ fi
+ done
+
+ echo -e "\n| Analyzing with commit complete ... |\n"
+
+ rm -f ${REPORTS_DIR}/${PATCH_NAME}
+}
+
+function count_for_baseline () {
+ for DIR in ${FILES[@]}; do
+ HTMLS_DIR=${BRESULTS_DIR}/$(echo ${DIR} |
+ sed 's/\//_/g')/$(ls ${BRESULTS_DIR}/$(echo ${DIR} |
+ sed 's/\//_/g')/);
+
+ local NAMES_OF_BUGS_B=$(grep -n "SUMM_DESC" ${HTMLS_DIR}/index.html |
+ cut -d"<" -f3 | cut -d">" -f2 |
+ sed 's/[^a-zA-Z0]/_/g' | tr '\n' ' ')
+ local NO_OF_BUGS_B=$(grep -n "SUMM_DESC" ${HTMLS_DIR}/index.html |
+ cut -d"<" -f5 | cut -d">" -f2 | tr '\n' ' ')
+ local count_B=0;
+
+ read -a BUG_NAME_B <<<$NAMES_OF_BUGS_B
+ read -a BUG_COUNT_B <<<$NO_OF_BUGS_B
+ for i in ${BUG_NAME_B[@]};
+ do
+ if [ ! -z ${DICT_B[$i]} ]; then
+ DICT_B[$i]=$(expr ${BUG_COUNT_B[count_B]} + ${DICT_B[$i]});
+ else
+ DICT_B+=([$i]=${BUG_COUNT_B[count_B]});
+ fi
+ count_B=$(expr $count_B + 1)
+ done
+ done
+
+ echo -e "\nBASELINE BUGS LIST (before applying patch):"
+ echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
+ for key_B in ${!DICT_B[@]}; do
+ echo "${key_B} --> ${DICT_B[${key_B}]}" | sed 's/_/ /g' | tr -s ' '
+ done
+}
+
+function count_for_target () {
+ for DIR in ${FILES[@]}; do
+ HTMLS_DIR=${TRESULTS_DIR}/$(echo ${DIR} |
+ sed 's/\//_/g')/$(ls ${TRESULTS_DIR}/$(echo ${DIR} |
+ sed 's/\//_/g')/);
+
+ local NAME_OF_BUGS_T=$(grep -n "SUMM_DESC" ${HTMLS_DIR}/index.html |
+ cut -d"<" -f3 | cut -d">" -f2 |
+ sed 's/[^a-zA-Z0]/_/g'| tr '\n' ' ')
+ local NO_OF_BUGS_T=$(grep -n "SUMM_DESC" ${HTMLS_DIR}/index.html |
+ cut -d"<" -f5 | cut -d">" -f2 | tr '\n' ' ')
+ local count_T=0;
+
+ read -a BUG_NAME_T <<<$NAME_OF_BUGS_T
+ read -a BUG_COUNT_T <<<$NO_OF_BUGS_T
+
+ for i in ${BUG_NAME_T[@]};
+ do
+ if [ ! -z ${DICT_T[$i]} ]; then
+ DICT_T[$i]=$(expr ${BUG_COUNT_T[count_T]} + ${DICT_T[$i]});
+ else
+ DICT_T+=([$i]=${BUG_COUNT_T[count_T]});
+ fi
+ count_T=$(expr $count_T + 1)
+ done
+ done
+
+ echo -e "\nTARGET BUGS LIST (after applying patch):"
+ echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
+ for key_T in ${!DICT_T[@]}; do
+ echo "${key_T} --> ${DICT_T[${key_T}]}" | sed 's/_/ /g' | tr -s ' '
+ done
+}
+
+function array_contains () {
+ local SEEKING=$1; shift
+ local IN=1
+ for ELEMENT; do
+ if [[ $ELEMENT == $SEEKING ]]; then
+ IN=0
+ break
+ fi
+ done
+ return $IN
+}
+
+function main () {
+ echo -e "\n================ Clang analyzer in progress ================\n"
+ check_prerequisites
+ identify_changes
+ run_scanbuild
+ clear
+ count_for_baseline
+ count_for_target
+ echo -e "\nSUMMARY OF CLANG-ANALYZER:"
+ echo "~~~~~~~~~~~~~~~~~~~~~~~~~~"
+
+ FLAG=0
+ for BUG in ${!DICT_T[@]}; do
+ array_contains $BUG "${!DICT_B[@]}"
+ if [ $? -eq 1 ]; then
+ echo "New ${DICT_T[${BUG}]} Bug[s] introduced: $(echo $BUG |
+ sed 's/_/ /g' |
+ tr -s ' ')"
+ FLAG=1
+ else
+ if [ ${BUG} != "All_Bugs" ]; then
+ if [ ${DICT_B[${BUG}]} -lt \
+ ${DICT_T[${BUG}]} ]; then
+ echo "Extra $(expr ${DICT_T[${BUG}]} - \
+ ${DICT_B[${BUG}]}) Bug[s] Introduced in: $(echo $BUG |
+ sed 's/_/ /g' | tr -s ' ')"
+ FLAG=1
+ fi
+ fi
+ fi
+ done
+
+ echo
+ if [ $FLAG -eq 0 ]; then
+ echo -e "Patch Value given by Clang analyzer '+1'\n"
+ else
+ echo -e "Patch Value given by Clang analyzer '-1'\n"
+ fi
+ echo -e "\nExplore complete results at:"
+ find ${BRESULTS_DIR}/ -iname "index.html"
+ find ${TRESULTS_DIR}/ -iname "index.html"
+ echo -e "\n================= Done with Clang Analysis =================\n"
+
+ exit ${FLAG}
+}
+
+main
diff --git a/extras/cliutils/Makefile.am b/extras/cliutils/Makefile.am
new file mode 100644
index 00000000000..7039703e275
--- /dev/null
+++ b/extras/cliutils/Makefile.am
@@ -0,0 +1,4 @@
+EXTRA_DIST= cliutils.py __init__.py
+
+cliutilsdir = @BUILD_PYTHON_SITE_PACKAGES@/gluster/cliutils
+cliutils_PYTHON = cliutils.py __init__.py
diff --git a/extras/cliutils/README.md b/extras/cliutils/README.md
new file mode 100644
index 00000000000..309beb1ca25
--- /dev/null
+++ b/extras/cliutils/README.md
@@ -0,0 +1,233 @@
+# CLI utility for creating Cluster aware CLI tools for Gluster
+cliutils is a Python library which provides wrapper around `gluster system::
+execute` command to extend the functionalities of Gluster.
+
+Example use cases:
+- Start a service in all peer nodes of Cluster
+- Collect the status of a service from all peer nodes
+- Collect the config values from each peer nodes and display latest
+ config based on version.
+- Copy a file present in GLUSTERD_WORKDIR from one peer node to all
+ other peer nodes.(Geo-replication create push-pem is using this to
+ distribute the SSH public keys from all master nodes to all slave
+ nodes)
+- Generate pem keys in all peer nodes and collect all the public keys
+ to one place(Geo-replication gsec_create is doing this)
+- Provide Config sync CLIs for new features like `gluster-eventsapi`,
+ `gluster-restapi`, `gluster-mountbroker` etc.
+
+## Introduction
+
+If a executable file present in `$GLUSTER_LIBEXEC` directory in all
+peer nodes(Filename startswith `peer_`) then it can be executed by
+running `gluster system:: execute` command from any one peer node.
+
+- This command will not copy any executables to peer nodes, Script
+ should exist in all peer nodes to use this infrastructure. Raises
+ error in case script not exists in any one of the peer node.
+- Filename should start with `peer_` and should exist in
+ `$GLUSTER_LIBEXEC` directory.
+- This command can not be called from outside the cluster.
+
+To understand the functionality, create a executable file `peer_hello`
+under $GLUSTER_LIBEXEC directory and copy to all peer nodes.
+
+ #!/usr/bin/env bash
+ echo "Hello from $(gluster system:: uuid get)"
+
+Now run the following command from any one gluster node,
+
+ gluster system:: execute hello
+
+**Note:** Gluster will not copy the executable script to all nodes,
+ copy `peer_hello` script to all peer nodes to use `gluster system::
+ execute` infrastructure.
+
+It will run `peer_hello` executable in all peer nodes and shows the
+output from each node(Below example shows output from my two nodes
+cluster)
+
+ Hello from UUID: e7a3c5c8-e7ad-47ad-aa9c-c13907c4da84
+ Hello from UUID: c680fc0a-01f9-4c93-a062-df91cc02e40f
+
+## cliutils
+A Python wrapper around `gluster system:: execute` command is created
+to address the following issues
+
+- If a node is down in the cluster, `system:: execute` just skips it
+ and runs only in up nodes.
+- `system:: execute` commands are not user friendly
+- It captures only stdout, so handling errors is tricky.
+
+**Advantages of cliutils:**
+
+- Single executable file will act as node component as well as User CLI.
+- `execute_in_peers` utility function will merge the `gluster system::
+ execute` output with `gluster peer status` to identify offline nodes.
+- Easy CLI Arguments handling.
+- If node component returns non zero return value then, `gluster
+ system:: execute` will fail to aggregate the output from other
+ nodes. `node_output_ok` or `node_output_notok` utility functions
+ returns zero both in case of success or error, but returns json
+ with ok: true or ok:false respectively.
+- Easy to iterate on the node outputs.
+- Better error handling - Geo-rep CLIs `gluster system:: execute
+ mountbroker`, `gluster system:: execute gsec_create` and `gluster
+ system:: add_secret_pub` are suffering from error handling. These
+ tools are not notifying user if any failures during execute or if a node
+ is down during execute.
+
+### Hello World
+Create a file in `$LIBEXEC/glusterfs/peer_message.py` with following
+content.
+
+ #!/usr/bin/python3
+ from gluster.cliutils import Cmd, runcli, execute_in_peers, node_output_ok
+
+ class NodeHello(Cmd):
+ name = "node-hello"
+
+ def run(self, args):
+ node_output_ok("Hello")
+
+ class Hello(Cmd):
+ name = "hello"
+
+ def run(self, args):
+ out = execute_in_peers("node-hello")
+ for row in out:
+ print ("{0} from {1}".format(row.output, row.hostname))
+
+ if __name__ == "__main__":
+ runcli()
+
+When we run `python peer_message.py`, it will have two subcommands,
+"node-hello" and "hello". This file should be copied to
+`$LIBEXEC/glusterfs` directory in all peer nodes. User will call
+subcommand "hello" from any one peer node, which internally call
+`gluster system:: execute message.py node-hello`(This runs in all peer
+nodes and collect the outputs)
+
+For node component do not print the output directly, use
+`node_output_ok` or `node_output_notok` functions. `node_output_ok`
+additionally collects the node UUID and prints in JSON
+format. `execute_in_peers` function will collect this output and
+merges with `peers list` so that we don't miss the node information if
+that node is offline.
+
+If you observed already, function `args` is optional, if you don't
+have arguments then no need to create a function. When we run the
+file, we will have two subcommands. For example,
+
+ python peer_message.py hello
+ python peer_message.py node-hello
+
+First subcommand calls second subcommand in all peer nodes. Basically
+`execute_in_peers(NAME, ARGS)` will be converted into
+
+ CMD_NAME = FILENAME without "peers_"
+ gluster system:: execute <CMD_NAME> <SUBCOMMAND> <ARGS>
+
+In our example,
+
+ filename = "peer_message.py"
+ cmd_name = "message.py"
+ gluster system:: execute ${cmd_name} node-hello
+
+Now create symlink in `/usr/bin` or `/usr/sbin` directory depending on
+the usecase.(Optional step for usability)
+
+ ln -s /usr/libexec/glusterfs/peer_message.py /usr/bin/gluster-message
+
+Now users can use `gluster-message` instead of calling
+`/usr/libexec/glusterfs/peer_message.py`
+
+ gluster-message hello
+
+### Showing CLI output as Table
+
+Following example uses prettytable library, which can be installed
+using `pip install prettytable` or `dnf install python-prettytable`
+
+ #!/usr/bin/python3
+ from prettytable import PrettyTable
+ from gluster.cliutils import Cmd, runcli, execute_in_peers, node_output_ok
+
+ class NodeHello(Cmd):
+ name = "node-hello"
+
+ def run(self, args):
+ node_output_ok("Hello")
+
+ class Hello(Cmd):
+ name = "hello"
+
+ def run(self, args):
+ out = execute_in_peers("node-hello")
+ # Initialize the CLI table
+ table = PrettyTable(["ID", "NODE", "NODE STATUS", "MESSAGE"])
+ table.align["NODE STATUS"] = "r"
+ for row in out:
+ table.add_row([row.nodeid,
+ row.hostname,
+ "UP" if row.node_up else "DOWN",
+ row.output if row.ok else row.error])
+
+ print table
+
+ if __name__ == "__main__":
+ runcli()
+
+
+Example output,
+
+ +--------------------------------------+-----------+-------------+---------+
+ | ID | NODE | NODE STATUS | MESSAGE |
+ +--------------------------------------+-----------+-------------+---------+
+ | e7a3c5c8-e7ad-47ad-aa9c-c13907c4da84 | localhost | UP | Hello |
+ | bb57a4c4-86eb-4af5-865d-932148c2759b | vm2 | UP | Hello |
+ | f69b918f-1ffa-4fe5-b554-ee10f051294e | vm3 | DOWN | N/A |
+ +--------------------------------------+-----------+-------------+---------+
+
+## How to package in Gluster
+If the project is created in `$GLUSTER_SRC/tools/message`
+
+Add "message" to SUBDIRS list in `$GLUSTER_SRC/tools/Makefile.am`
+
+and then create a `Makefile.am` in `$GLUSTER_SRC/tools/message`
+directory with following content.
+
+ EXTRA_DIST = peer_message.py
+
+ peertoolsdir = $(libexecdir)/glusterfs/
+ peertools_SCRIPTS = peer_message.py
+
+ install-exec-hook:
+ $(mkdir_p) $(DESTDIR)$(bindir)
+ rm -f $(DESTDIR)$(bindir)/gluster-message
+ ln -s $(libexecdir)/glusterfs/peer_message.py \
+ $(DESTDIR)$(bindir)/gluster-message
+
+ uninstall-hook:
+ rm -f $(DESTDIR)$(bindir)/gluster-message
+
+Thats all. Add following files in `glusterfs.spec.in` if packaging is
+required.(Under `%files` section)
+
+ %{_libexecdir}/glusterfs/peer_message.py*
+ %{_bindir}/gluster-message
+
+## Who is using cliutils
+- gluster-mountbroker http://review.gluster.org/14544
+- gluster-eventsapi http://review.gluster.org/14248
+- gluster-georep-sshkey http://review.gluster.org/14732
+- gluster-restapi https://github.com/gluster/restapi
+
+## Limitations/TODOs
+- Not yet possible to create CLI without any subcommand, For example
+ `gluster-message` without any arguments
+- Hiding node subcommands in `--help`(`gluster-message --help` will
+ show all subcommands including node subcommands)
+- Only positional arguments supported for node arguments, Optional
+ arguments can be used for other commands.
+- API documentation
diff --git a/extras/cliutils/__init__.py b/extras/cliutils/__init__.py
new file mode 100644
index 00000000000..8765cc85099
--- /dev/null
+++ b/extras/cliutils/__init__.py
@@ -0,0 +1,31 @@
+# -*- coding: utf-8 -*-
+# Reexporting the utility funcs and classes
+from .cliutils import (runcli,
+ sync_file_to_peers,
+ execute_in_peers,
+ execute,
+ node_output_ok,
+ node_output_notok,
+ output_error,
+ oknotok,
+ yesno,
+ get_node_uuid,
+ Cmd,
+ GlusterCmdException,
+ set_common_args_func)
+
+
+# This will be useful when `from cliutils import *`
+__all__ = ["runcli",
+ "sync_file_to_peers",
+ "execute_in_peers",
+ "execute",
+ "node_output_ok",
+ "node_output_notok",
+ "output_error",
+ "oknotok",
+ "yesno",
+ "get_node_uuid",
+ "Cmd",
+ "GlusterCmdException",
+ "set_common_args_func"]
diff --git a/extras/cliutils/cliutils.py b/extras/cliutils/cliutils.py
new file mode 100644
index 00000000000..55fbaf56704
--- /dev/null
+++ b/extras/cliutils/cliutils.py
@@ -0,0 +1,237 @@
+# -*- coding: utf-8 -*-
+from __future__ import print_function
+from argparse import ArgumentParser, RawDescriptionHelpFormatter
+import inspect
+import subprocess
+import os
+import xml.etree.cElementTree as etree
+import json
+import sys
+
+MY_UUID = None
+parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter,
+ description=__doc__)
+subparsers = parser.add_subparsers(dest="mode")
+
+subcommands = {}
+cache_data = {}
+ParseError = etree.ParseError if hasattr(etree, 'ParseError') else SyntaxError
+_common_args_func = lambda p: True
+
+
+class GlusterCmdException(Exception):
+ def __init__(self, message):
+ self.message = message
+ try:
+ # Python 3
+ super().__init__(message)
+ except TypeError:
+ # Python 2
+ super(GlusterCmdException, self).__init__(message)
+
+
+def get_node_uuid():
+ # Caches the Node UUID in global variable,
+ # Executes gluster system:: uuid get command only if
+ # calling this function for first time
+ global MY_UUID
+ if MY_UUID is not None:
+ return MY_UUID
+
+ cmd = ["gluster", "system::", "uuid", "get", "--xml"]
+ rc, out, err = execute(cmd)
+
+ if rc != 0:
+ return None
+
+ tree = etree.fromstring(out)
+ uuid_el = tree.find("uuidGenerate/uuid")
+ MY_UUID = uuid_el.text
+ return MY_UUID
+
+
+def yesno(flag):
+ return "Yes" if flag else "No"
+
+
+def oknotok(flag):
+ return "OK" if flag else "NOT OK"
+
+
+def output_error(message, errcode=1):
+ print (message, file=sys.stderr)
+ sys.exit(errcode)
+
+
+def node_output_ok(message=""):
+ # Prints Success JSON output and exits with returncode zero
+ out = {"ok": True, "nodeid": get_node_uuid(), "output": message}
+ print (json.dumps(out))
+ sys.exit(0)
+
+
+def node_output_notok(message):
+ # Prints Error JSON output and exits with returncode zero
+ out = {"ok": False, "nodeid": get_node_uuid(), "error": message}
+ print (json.dumps(out))
+ sys.exit(0)
+
+
+def execute(cmd):
+ p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+ universal_newlines=True)
+ out, err = p.communicate()
+ return p.returncode, out, err
+
+
+def get_pool_list():
+ cmd = ["gluster", "--mode=script", "pool", "list", "--xml"]
+ rc, out, err = execute(cmd)
+ if rc != 0:
+ output_error("Failed to get Pool Info: {0}".format(err))
+
+ tree = etree.fromstring(out)
+
+ pool = []
+ try:
+ for p in tree.findall('peerStatus/peer'):
+ pool.append({"nodeid": p.find("uuid").text,
+ "hostname": p.find("hostname").text,
+ "connected": (True if p.find("connected").text == "1"
+ else False)})
+ except (ParseError, AttributeError, ValueError) as e:
+ output_error("Failed to parse Pool Info: {0}".format(e))
+
+ return pool
+
+
+class NodeOutput(object):
+ def __init__(self, **kwargs):
+ self.nodeid = kwargs.get("nodeid", "")
+ self.hostname = kwargs.get("hostname", "")
+ self.node_up = kwargs.get("node_up", False)
+ self.ok = kwargs.get("ok", False)
+ self.output = kwargs.get("output", "N/A")
+ self.error = kwargs.get("error", "N/A")
+
+
+def execute_in_peers(name, args=[]):
+ # Get the file name of Caller function, If the file name is peer_example.py
+ # then Gluster peer command will be gluster system:: execute example.py
+ # Command name is without peer_
+ frame = inspect.stack()[1]
+ module = inspect.getmodule(frame[0])
+ actual_file = module.__file__
+ # If file is symlink then find actual file
+ if os.path.islink(actual_file):
+ actual_file = os.readlink(actual_file)
+
+ # Get the name of file without peer_
+ cmd_name = os.path.basename(actual_file).replace("peer_", "")
+ cmd = ["gluster", "system::", "execute", cmd_name, name] + args
+ rc, out, err = execute(cmd)
+ if rc != 0:
+ raise GlusterCmdException((rc, out, err, " ".join(cmd)))
+
+ out = out.strip().splitlines()
+
+ # JSON decode each line and construct one object with node id as key
+ all_nodes_data = {}
+ for node_data in out:
+ data = json.loads(node_data)
+ all_nodes_data[data["nodeid"]] = {
+ "nodeid": data.get("nodeid"),
+ "ok": data.get("ok"),
+ "output": data.get("output", ""),
+ "error": data.get("error", "")}
+
+ # gluster pool list
+ pool_list = get_pool_list()
+
+ data_out = []
+ # Iterate pool_list and merge all_nodes_data collected above
+ # If a peer node is down then set node_up = False
+ for p in pool_list:
+ p_data = all_nodes_data.get(p.get("nodeid"), None)
+ row_data = NodeOutput(node_up=False,
+ hostname=p.get("hostname"),
+ nodeid=p.get("nodeid"),
+ ok=False)
+
+ if p_data is not None:
+ # Node is UP
+ row_data.node_up = True
+ row_data.ok = p_data.get("ok")
+ row_data.output = p_data.get("output")
+ row_data.error = p_data.get("error")
+
+ data_out.append(row_data)
+
+ return data_out
+
+
+def sync_file_to_peers(fname):
+ # Copy file from current node to all peer nodes, fname
+ # is path after GLUSTERD_WORKDIR
+ cmd = ["gluster", "system::", "copy", "file", fname]
+ rc, out, err = execute(cmd)
+ if rc != 0:
+ raise GlusterCmdException((rc, out, err))
+
+
+class Cmd(object):
+ name = ""
+
+ def run(self, args):
+ # Must required method. Raise NotImplementedError if derived class
+ # not implemented this method
+ raise NotImplementedError("\"run(self, args)\" method is "
+ "not implemented by \"{0}\"".format(
+ self.__class__.__name__))
+
+
+def runcli():
+ # Get list of Classes derived from class "Cmd" and create
+ # a subcommand as specified in the Class name. Call the args
+ # method by passing subcommand parser, Derived class can add
+ # arguments to the subcommand parser.
+ metavar_data = []
+ for c in Cmd.__subclasses__():
+ cls = c()
+ if getattr(cls, "name", "") == "":
+ raise NotImplementedError("\"name\" is not added "
+ "to \"{0}\"".format(
+ cls.__class__.__name__))
+
+ # Do not show in help message if subcommand starts with node-
+ if not cls.name.startswith("node-"):
+ metavar_data.append(cls.name)
+
+ p = subparsers.add_parser(cls.name)
+ args_func = getattr(cls, "args", None)
+ if args_func is not None:
+ args_func(p)
+
+ # Apply common args if any
+ _common_args_func(p)
+
+ # A dict to save subcommands, key is name of the subcommand
+ subcommands[cls.name] = cls
+
+ # Hide node commands in Help message
+ subparsers.metavar = "{" + ",".join(metavar_data) + "}"
+
+ # Get all parsed arguments
+ args = parser.parse_args()
+
+ # Get the subcommand to execute
+ cls = subcommands.get(args.mode, None)
+
+ # Run
+ if cls is not None:
+ cls.run(args)
+
+
+def set_common_args_func(func):
+ global _common_args_func
+ _common_args_func = func
diff --git a/extras/collect-system-stats.sh b/extras/collect-system-stats.sh
new file mode 100755
index 00000000000..865e70bbc11
--- /dev/null
+++ b/extras/collect-system-stats.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+################################################################################
+# Usage: collect-system-stats.sh <delay-in-seconds>
+# This script starts sar/top/iostat/vmstat processes which collect system stats
+# with the interval <delay-in-seconds> given as argument to the script. When
+# the script is stopped either by entering any input or Ctrl+C the list of
+# files where output is captured will be printed on the screen which can be
+# observed to find any problems/bottlenecks.
+###############################################################################
+
+function stop_processes {
+ echo "Stopping the monitoring processes"
+ echo "sar pid:$sar_pid", "top pid: $top_pid", "iostat pid: $iostat_pid", "vmstat pid: $vmstat_pid"
+ kill "$sar_pid" "$top_pid" "$iostat_pid" "$vmstat_pid"
+ echo "Files created: ${timestamp}-network.out, ${timestamp}-top.out, ${timestamp}-iostat.out, ${timestamp}-vmstat.out"
+}
+
+function check_dependent_commands_exist()
+{
+ declare -a arr=("sar" "top" "iostat" "vmstat")
+ for i in "${arr[@]}"
+ do
+ if ! command -v "$i" > /dev/null 2>&1
+ then
+ echo "ERROR: '$i' command is not found"
+ exit 1
+ fi
+ done
+
+}
+
+case "$1" in
+ ''|*[!0-9]*) echo "Usage: $0 <delay-between-successive-metrics-collection-in-seconds>"; exit 1 ;;
+ *) interval="$1" ;;
+esac
+
+timestamp=$(date +"%s")
+
+check_dependent_commands_exist
+sar -n DEV "$interval" > "${timestamp}"-network.out &
+sar_pid="$!"
+top -bHd "$interval" > "${timestamp}"-top.out &
+top_pid="$!"
+iostat -Ntkdx "$interval" > "${timestamp}"-iostat.out &
+iostat_pid="$!"
+vmstat -t "$interval" > "${timestamp}"-vmstat.out &
+vmstat_pid="$!"
+echo "Started sar, vmstat, iostat, top for collecting stats"
+
+
+trap stop_processes EXIT
+read -r -p "Press anything and ENTER to exit";
diff --git a/extras/command-completion/Makefile b/extras/command-completion/Makefile
new file mode 100644
index 00000000000..06cbfe0672f
--- /dev/null
+++ b/extras/command-completion/Makefile
@@ -0,0 +1,6 @@
+install:
+ mkdir -p /etc/bash_completion.d
+ cp gluster.bash /etc/bash_completion.d/gluster
+
+uninstall:
+ rm -f /etc/bash_completion.d/gluster
diff --git a/extras/command-completion/README b/extras/command-completion/README
new file mode 100644
index 00000000000..0acba38168f
--- /dev/null
+++ b/extras/command-completion/README
@@ -0,0 +1,5 @@
+This file is not moved to /etc/bash_completion.d/ with the source installation.
+Please execute make install explicity from here to move this file to
+/etc/bash_completion.d
+
+Similarly, use make uninstall to remove it from /etc/bash_completion.d
diff --git a/extras/command-completion/gluster.bash b/extras/command-completion/gluster.bash
new file mode 100644
index 00000000000..73d16098875
--- /dev/null
+++ b/extras/command-completion/gluster.bash
@@ -0,0 +1,492 @@
+#!/bin/bash
+
+if pidof glusterd > /dev/null 2>&1; then
+ GLUSTER_SET_OPTIONS="
+ $(for token in `gluster volume set help 2>/dev/null | grep "^Option:" | cut -d ' ' -f 2`
+ do
+ echo "{$token},"
+ done)
+ "
+ GLUSTER_RESET_OPTIONS="$GLUSTER_SET_OPTIONS"
+fi
+
+GLUSTER_TOP_SUBOPTIONS1="
+ {nfs},
+ {brick},
+ {list-cnt}
+"
+GLUSTER_TOP_SUBOPTIONS2="
+ {bs
+ {__SIZE
+ {count}
+ }
+ },
+ {brick},
+ {list-cnt}
+"
+GLUSTER_TOP_OPTIONS="
+ {open
+ [ $GLUSTER_TOP_SUBOPTIONS1 ]
+ },
+ {read
+ [ $GLUSTER_TOP_SUBOPTIONS1 ]
+ },
+ {write
+ [ $GLUSTER_TOP_SUBOPTIONS1 ]
+ },
+ {opendir
+ [ $GLUSTER_TOP_SUBOPTIONS1 ]
+ },
+ {readdir
+ [ $GLUSTER_TOP_SUBOPTIONS1 ]
+ },
+ {clear
+ [ $GLUSTER_TOP_SUBOPTIONS1 ]
+ },
+ {read-perf
+ [ $GLUSTER_TOP_SUBOPTIONS2 ]
+ },
+ {write-perf
+ [ $GLUSTER_TOP_SUBOPTIONS2 ]
+ }
+"
+
+GLUSTER_QUOTA_OPTIONS="
+ {enable},
+ {disable},
+ {list},
+ {remove},
+ {default-soft-limit},
+ {limit-usage},
+ {alert-time},
+ {soft-timeout},
+ {hard-timeout}
+"
+
+GLUSTER_PROFILE_OPTIONS="
+ {start},
+ {info [
+ {peek},
+ {incremental
+ {peek}
+ },
+ {cumulative},
+ {clear},
+ ]
+ },
+ {stop}
+"
+
+GLUSTER_BARRIER_OPTIONS="
+ {enable},
+ {disable}
+"
+
+GLUSTER_GEO_REPLICATION_SUBOPTIONS="
+"
+GLUSTER_GEO_REPLICATION_OPTIONS="
+ {__VOLNAME [
+ {__SLAVEURL [
+ {create [
+ {push-pem
+ {force}
+ },
+ {force}
+ ]
+ },
+ {start {force} },
+ {status {detail} },
+ {config},
+ {pause {force} },
+ {resume {force} },
+ {stop {force} },
+ {delete {force} }
+ ]
+ },
+ {status}
+ ]
+ },
+ {status}
+"
+
+GLUSTER_VOLUME_OPTIONS="
+ {volume [
+ {add-brick
+ {__VOLNAME}
+ },
+ {barrier
+ {__VOLNAME
+ [ $GLUSTER_BARRIER_OPTIONS ]
+ }
+ },
+ {clear-locks
+ {__VOLNAME}
+ },
+ {create},
+ {delete
+ {__VOLNAME}
+ },
+ {geo-replication
+ [ $GLUSTER_GEO_REPLICATION_OPTIONS ]
+ },
+ {heal
+ {__VOLNAME}
+ },
+ {help},
+ {info
+ {__VOLNAME}
+ },
+ {list},
+ {log
+ {__VOLNAME}
+ },
+ {profile
+ {__VOLNAME
+ [ $GLUSTER_PROFILE_OPTIONS ]
+ }
+ },
+ {quota
+ {__VOLNAME
+ [ $GLUSTER_QUOTA_OPTIONS ]
+ }
+ },
+ {rebalance
+ {__VOLNAME}
+ },
+ {remove-brick
+ {__VOLNAME}
+ },
+ {replace-brick
+ {__VOLNAME}
+ },
+ {reset
+ {__VOLNAME
+ [ $GLUSTER_RESET_OPTIONS ]
+ }
+ },
+ {set
+ {__VOLNAME
+ [ $GLUSTER_SET_OPTIONS ]
+ }
+ },
+ {start
+ {__VOLNAME
+ {force}
+ }
+ },
+ {statedump
+ {__VOLNAME}
+ },
+ {status
+ {__VOLNAME}
+ },
+ {stop
+ {__VOLNAME
+ {force}
+ }
+ },
+ {sync
+ {__HOSTNAME}
+ },
+ {top
+ {__VOLNAME
+ [ $GLUSTER_TOP_OPTIONS ]
+ }
+ }
+ ]
+ }
+"
+
+GLUSTER_COMMAND_TREE="
+{gluster [
+ $GLUSTER_VOLUME_OPTIONS ,
+ {peer [
+ {probe
+ {__HOSTNAME}
+ },
+ {detach
+ {__HOSTNAME
+ {force}
+ }
+ },
+ {status}
+ ]
+ },
+ {pool
+ {list}
+ },
+ {help}
+ ]
+}"
+
+__SIZE ()
+{
+ return 0
+}
+
+__SLAVEURL ()
+{
+ return 0
+}
+
+__HOSTNAME ()
+{
+ local zero=0
+ local ret=0
+ local cur_word="$2"
+
+ if [ "$1" == "X" ]; then
+ return
+
+ elif [ "$1" == "match" ]; then
+ return 0
+
+ elif [ "$1" == "complete" ]; then
+ COMPREPLY=($(compgen -A hostname -- $cur_word))
+ fi
+ return 0
+}
+
+__VOLNAME ()
+{
+ local zero=0
+ local ret=0
+ local cur_word="$2"
+ local list=""
+
+ if [ "X$1" == "X" ]; then
+ return
+
+ elif [ "$1" == "match" ]; then
+ return 0
+
+ elif [ "$1" == "complete" ]; then
+ if ! pidof glusterd > /dev/null 2>&1; then
+ list='';
+
+ else
+ list=`gluster volume list 2> /dev/null`
+ fi
+
+ else
+ return 0
+ fi
+
+ COMPREPLY=($(compgen -W "$list" -- $cur_word))
+ return 0
+}
+
+_gluster_throw () {
+#echo $1 >&2
+ COMPREPLY=''
+ exit
+}
+
+declare GLUSTER_FINAL_LIST=''
+declare GLUSTER_LIST=''
+declare -i GLUSTER_TOP=0
+_gluster_push () {
+ GLUSTER_TOP=$((GLUSTER_TOP + 1))
+ return $GLUSTER_TOP
+}
+_gluster_pop () {
+ GLUSTER_TOP=$((GLUSTER_TOP - 1))
+ return $GLUSTER_TOP
+}
+
+_gluster_goto_end ()
+{
+ local prev_top=$1
+ local top=$1
+ local token=''
+
+ while [ $top -ge $prev_top ]; do
+ read -r token
+ case $token in
+ '{' | '[')
+ _gluster_push
+ top=$?
+ ;;
+ '}' | ']')
+ _gluster_pop
+ top=$?
+ ;;
+ esac
+ done
+
+ return
+}
+
+_gluster_form_list ()
+{
+ local token=''
+ local top=0
+ local comma=''
+ local cur_word="$1"
+
+ read -r token
+ case $token in
+ ']')
+ ;;
+ '{')
+ _gluster_push
+ top=$?
+ read -r key
+ if [ "X$cur_word" == "X" -o "${cur_word:0:1}" == "${key:0:1}" -o "${key:0:1}" == "_" ]; then
+ GLUSTER_LIST="$GLUSTER_LIST $key"
+ fi
+
+ _gluster_goto_end $top
+ read -r comma
+ if [ "$comma" == "," ]; then
+ _gluster_form_list $cur_word
+ fi
+ ;;
+ *)
+ _gluster_throw "Expected '{' but received $token"
+ ;;
+ esac
+
+ return
+}
+
+_gluster_goto_child ()
+{
+ local match_string="$1"
+ local token=''
+ local top=0
+ local comma=''
+
+ read -r token
+ case $token in
+ '{')
+ _gluster_push
+ top=$?
+ ;;
+ *)
+ _gluster_throw "Expected '{' but received $token"
+ ;;
+ esac
+
+ read -r token
+ case `echo $token` in
+ '[' | ']' | '{' | '}')
+ _gluster_throw "Expected string but received $token"
+ ;;
+ _*)
+ $token "match" $match_string
+ ret=$?
+ if [ $ret -eq 0 ]; then
+ return
+ else
+ _gluster_goto_end $top
+
+ read -r comma
+ if [ "$comma" == "," ]; then
+ _gluster_goto_child $match_string
+ fi
+ fi
+ ;;
+
+ "$match_string")
+ return
+ ;;
+ *)
+ _gluster_goto_end $top
+
+ read -r comma
+ if [ "$comma" == "," ]; then
+ _gluster_goto_child $match_string
+ fi
+ ;;
+ esac
+
+ return
+}
+
+_gluster_does_match ()
+{
+ local token="$1"
+ local key="$2"
+
+ if [ "${token:0:1}" == "_" ]; then
+ $token $2
+ return $?
+ fi
+
+ [ "$token" == "$key" ] && return 0
+
+ return 1
+}
+
+_gluster_parse ()
+{
+ local i=0
+ local token=''
+ local tmp_token=''
+ local word=''
+
+ while [ $i -lt $COMP_CWORD ]; do
+ read -r token
+ case $token in
+ '[')
+ _gluster_push
+ _gluster_goto_child ${COMP_WORDS[$i]}
+ ;;
+ '{')
+ _gluster_push
+ read -r tmp_token
+ _gluster_does_match $tmp_token ${COMP_WORDS[$i]}
+ if [ $? -ne 0 ]; then
+ _gluster_throw "No match"
+ fi
+ ;;
+ esac
+ i=$((i+1))
+ done
+
+ read -r token
+ if [ "$token" == '[' ]; then
+ _gluster_push
+ _gluster_form_list ${COMP_WORDS[COMP_CWORD]}
+
+ elif [ "$token" == '{' ]; then
+ read -r tmp_token
+ GLUSTER_LIST="$tmp_token"
+ fi
+
+ echo $GLUSTER_LIST
+}
+
+_gluster_handle_list ()
+{
+ local list="${!1}"
+ local cur_word=$2
+ local count=0
+ local i=0
+
+ for i in `echo $list`; do
+ count=$((count + 1))
+ done
+
+ if [ $count -eq 1 ] && [ "${i:0:1}" == "_" ]; then
+ $i "complete" $cur_word
+ else
+ COMPREPLY=($(compgen -W "$list" -- $cur_word))
+ fi
+ return
+}
+
+_gluster_completion ()
+{
+ GLUSTER_FINAL_LIST=`echo $GLUSTER_COMMAND_TREE | \
+ egrep -ao --color=never "([A-Za-z0-9_.-]+)|[[:space:]]+|." | \
+ egrep -v --color=never "^[[:space:]]*$" | \
+ _gluster_parse`
+
+ ARG="GLUSTER_FINAL_LIST"
+ _gluster_handle_list $ARG ${COMP_WORDS[COMP_CWORD]}
+ return
+}
+
+complete -F _gluster_completion gluster
diff --git a/extras/control-cpu-load.sh b/extras/control-cpu-load.sh
new file mode 100755
index 00000000000..52dcf62fd9f
--- /dev/null
+++ b/extras/control-cpu-load.sh
@@ -0,0 +1,116 @@
+#!/bin/bash
+
+USAGE="This script provides a utility to control CPU utilization for any
+gluster daemon.In this, we use cgroup framework to configure CPU quota
+for a process(like selfheal daemon). Before running this script, make
+sure that daemon is running.Every time daemon restarts, it is required
+to rerun this command to set CPU quota on new daemon process id.
+User can enter any value between 10 to 100 for CPU quota.
+Recommended value of quota period is 25. 25 means, kernel will allocate
+25 ms period to this group of tasks in every 100 ms period. This 25ms
+could be considered as the maximum percentage of CPU quota daemon can take.
+This value will be reflected on CPU usage of "top" command.If provided pid
+is the only process and no other process is in competition to get CPU, more
+ than 25% could be allocated to daemon to speed up the process."
+
+if [ $# -ge 1 ]; then
+ case $1 in
+ -h|--help) echo " " "$USAGE" | sed -r -e 's/^[ ]+//g'
+ exit 0;
+ ;;
+ *) echo "Please Provide correct input for script."
+ echo "For help correct options are -h or --help."
+ exit 1;
+ ;;
+ esac
+fi
+
+DIR_EXIST=0
+LOC="/sys/fs/cgroup/cpu,cpuacct/system.slice/glusterd.service"
+echo "Enter gluster daemon pid for which you want to control CPU."
+read daemon_pid
+
+if expr ${daemon_pid} + 0 > /dev/null 2>&1 ;then
+ CHECK_PID=$(pgrep -f gluster | grep ${daemon_pid})
+ if [ -z "${CHECK_PID}" ]; then
+ echo "No daemon is running or pid ${daemon_pid} does not match."
+ echo "with running gluster processes."
+ exit 1
+ fi
+else
+ echo "Entered daemon_pid is not numeric so Rerun the script."
+ exit 1
+fi
+
+
+if [ -f ${LOC}/tasks ];then
+ CHECK_CGROUP=$(grep ${daemon_pid} ${LOC}/tasks)
+ if [ ${CHECK_CGROUP} ]; then
+ echo "pid ${daemon_pid} is attached with glusterd.service cgroup."
+ fi
+fi
+
+cgroup_name=cgroup_gluster_${daemon_pid}
+if [ -f ${LOC}/${cgroup_name}/tasks ]; then
+ CHECK_CGROUP=$(grep ${daemon_pid} ${LOC}/${cgroup_name}/tasks)
+ if [ ${CHECK_CGROUP} ]; then
+ val=`cat ${LOC}/${cgroup_name}/cpu.cfs_quota_us`
+ qval=$((val / 1000))
+ echo "pid ${daemon_pid} is already attached ${cgroup_name} with quota value ${qval}."
+ echo "Press n if you don't want to reassign ${daemon_pid} with new quota value."
+ DIR_EXIST=1
+ else
+ echo "pid ${daemon_pid} is not attached with ${cgroup_name}."
+ fi
+fi
+
+read -p "If you want to continue the script to attach ${daemon_pid} with new ${cgroup_name} cgroup Press (y/n)?" choice
+case "$choice" in
+ y|Y ) echo "yes";;
+ n|N ) echo "no";exit;;
+ * ) echo "invalid";exit;;
+esac
+
+systemctl set-property glusterd.service CPUShares=1024
+
+if [ ${DIR_EXIST} -eq 0 ];then
+ echo "Creating child cgroup directory '${cgroup_name} cgroup' for glusterd.service."
+ mkdir -p ${LOC}/${cgroup_name}
+ if [ ! -f ${LOC}/${cgroup_name}/tasks ];then
+ echo "Not able to create ${cgroup_name} directory so exit."
+ exit 1
+ fi
+fi
+
+echo "Enter quota value in range [10,100]: "
+
+read quota_value
+if expr ${quota_value} + 0 > /dev/null 2>&1 ;then
+ if [ ${quota_value} -lt 10 ] || [ ${quota_value} -gt 100 ]; then
+ echo "Entered quota value is not correct,it should be in the range ."
+ echo "10-100. Ideal value is 25."
+ echo "Rerun the sript with correct value."
+ exit 1
+ else
+ echo "Entered quota value is $quota_value"
+ fi
+else
+ echo "Entered quota value is not numeric so Rerun the script."
+ exit 1
+fi
+
+quota_value=$((quota_value * 1000))
+echo "Setting $quota_value to cpu.cfs_quota_us for gluster_cgroup."
+echo ${quota_value} > ${LOC}/${cgroup_name}/cpu.cfs_quota_us
+
+if ps -T -p ${daemon_pid} | grep gluster > /dev/null; then
+ for thid in `ps -T -p ${daemon_pid} | grep -v SPID | awk -F " " '{print $2}'`;
+ do
+ echo ${thid} > ${LOC}/${cgroup_name}/tasks ;
+ done
+ if cat /proc/${daemon_pid}/cgroup | grep -w ${cgroup_name} > /dev/null; then
+ echo "Tasks are attached successfully specific to ${daemon_pid} to ${cgroup_name}."
+ else
+ echo "Tasks are not attached successfully."
+ fi
+fi
diff --git a/extras/control-mem.sh b/extras/control-mem.sh
new file mode 100755
index 00000000000..91b36f8107a
--- /dev/null
+++ b/extras/control-mem.sh
@@ -0,0 +1,128 @@
+#!/bin/bash
+
+USAGE="This commands provides a utility to control MEMORY utilization for any
+gluster daemon.In this, we use cgroup framework to configure MEMORY limit for
+a process. Before running this script, make sure that daemon is running.Every
+time daemon restarts, it is required to rerun this command to set memory limit
+(in bytes) on new daemon process id.User can enter any value between 100
+(in Mega bytes) to 8000000000000 for Memory limit in Mega bytes.
+Memory limit value is depends on how much maximum memory user wants to restrict
+for specific daemon process.If a process will try to consume memore more than
+configured value then cgroup will hang/sleep this task and to resume the task
+rerun the script with new increase memory limit value ."
+
+if [ $# -ge 1 ]; then
+ case $1 in
+ -h|--help) echo " " "$USAGE" | sed -r -e 's/^[ ]+//g'
+ exit 0;
+ ;;
+ *) echo "Please Provide correct input for script."
+ echo "For help correct options are -h of --help."
+ exit 1;
+ ;;
+ esac
+fi
+
+DIR_EXIST=0
+LOC="/sys/fs/cgroup/memory/system.slice/glusterd.service"
+echo "Enter Any gluster daemon pid for that you want to control MEMORY."
+read daemon_pid
+
+if expr ${daemon_pid} + 0 > /dev/null 2>&1 ;then
+ CHECK_PID=$(pgrep -f gluster | grep ${daemon_pid})
+ if [ -z "${CHECK_PID}" ]; then
+ echo "No daemon is running or pid ${daemon_pid} does not match."
+ echo "with running gluster processes."
+ exit 1
+ fi
+else
+ echo "Entered daemon_pid is not numeric so Rerun the script."
+ exit 1
+fi
+
+
+if [ -f ${LOC}/tasks ]; then
+ CHECK_CGROUP=$(grep ${daemon_pid} ${LOC}/tasks)
+ if [ ${CHECK_CGROUP} ] ;then
+ echo "pid ${daemon_pid} is attached with default glusterd.service cgroup."
+ fi
+fi
+
+cgroup_name=cgroup_gluster_${daemon_pid}
+if [ -f ${LOC}/${cgroup_name}/tasks ];then
+ CHECK_CGROUP=$(grep ${daemon_pid} ${LOC}/${cgroup_name}/tasks)
+ if [ ${CHECK_CGROUP} ]; then
+ val=`cat ${LOC}/${cgroup_name}/memory.limit_in_bytes`
+ mval=$((val / 1024 / 1024))
+ echo "pid ${daemon_pid} is already attached ${cgroup_name} with mem value ${mval}."
+ echo "Press n if you don't want to reassign ${daemon_pid} with new mem value."
+ DIR_EXIST=1
+ else
+ echo "pid ${daemon_pid} is not attached with ${cgroup_name}."
+ fi
+fi
+
+read -p "If you want to continue the script to attach daeomon with new cgroup. Press (y/n)?" choice
+case "$choice" in
+ y|Y ) echo "yes";;
+ n|N ) echo "no";exit;;
+ * ) echo "invalid";exit;;
+esac
+
+systemctl set-property glusterd.service CPUShares=1024
+
+if [ ${DIR_EXIST} -eq 0 ];then
+ echo "Creating child cgroup directory '${cgroup_name} cgroup' for glusterd.service."
+ mkdir -p ${LOC}/${cgroup_name}
+ if [ ! -f ${LOC}/${cgroup_name}/tasks ];then
+ echo "Not able to create ${LOC}/${cgroup_name} directory so exit."
+ exit 1
+ fi
+fi
+
+echo "Enter Memory value in Mega bytes [100,8000000000000]: "
+
+read mem_value
+if expr ${mem_value} + 0 > /dev/null 2>&1 ;then
+ if [ ${mem_value} -lt 100 ] || [ ${mem_value} -gt 8000000000000 ]; then
+ echo "Entered memory value is not correct,it should be in the range ."
+ echo "100-8000000000000, Rerun the script with correct value ."
+ exit 1
+ else
+ echo "Entered memory limit value is ${mem_value}."
+ fi
+else
+ echo "Entered memory value is not numeric so Rerun the script."
+ exit 1
+fi
+
+mem_value=$(($mem_value * 1024 * 1024))
+if [ ${DIR_EXIST} -eq 0 ];then
+ echo "Setting ${mem_value} to memory.limit_in_bytes for ${LOC}/${cgroup_name}."
+ echo ${mem_value} > ${LOC}/${cgroup_name}/memory.limit_in_bytes
+ #Set memory value to memory.memsw.limit_in_bytes
+ echo ${mem_value} > ${LOC}/${cgroup_name}/memory.memsw.limit_in_bytes
+ # disable oom_control so that kernel will not send kill signal to the
+ # task once limit has reached
+ echo 1 > ${LOC}/${cgroup_name}/memory.oom_control
+else
+ #Increase mem_value to memory.memsw.limit_in_bytes
+ echo ${mem_value} > ${LOC}/${cgroup_name}/memory.memsw.limit_in_bytes
+ echo "Increase ${mem_value} to memory.limit_in_bytes for ${LOC}/${cgroup_name}."
+ echo ${mem_value} > ${LOC}/${cgroup_name}/memory.limit_in_bytes
+ # disable oom_control so that kernel will not send kill signal to the
+ # task once limit has reached
+ echo 1 > ${LOC}/${cgroup_name}/memory.oom_control
+fi
+
+if ps -T -p ${daemon_pid} | grep gluster > /dev/null; then
+ for thid in `ps -T -p ${daemon_pid} | grep -v SPID | awk -F " " '{print $2}'`;
+ do
+ echo ${thid} > ${LOC}/${cgroup_name}/tasks ;
+ done
+ if cat /proc/${daemon_pid}/cgroup | grep -iw ${cgroup_name} > /dev/null; then
+ echo "Tasks are attached successfully specific to ${daemon_pid} to ${cgroup_name}."
+ else
+ echo "Tasks are not attached successfully."
+ fi
+fi
diff --git a/extras/create_new_xlator/README.md b/extras/create_new_xlator/README.md
new file mode 100644
index 00000000000..fdc1ba00812
--- /dev/null
+++ b/extras/create_new_xlator/README.md
@@ -0,0 +1,24 @@
+####This document explains how to create a template for your new xlator.
+
+`$ python ./generate_xlator.py <XLATOR_DIRECTORY> <XLATOR_NAME> <FOP_PREFIX>`
+ * XLATOR_DIRECTORY: Directory path where the new xlator folder will reside
+ * XLATOR_NAME: Name of the xlator you wish to create
+ * FOP_PREFIX: This is the fop prefix that you wish to prefix every fop definition in your xlator, fop prefix is generally different than xlator name, if the xlator name is too long.
+
+Eg: `python ./generate_xlator.py /home/u1/glusterfs/xlators/features compression cmpr`
+This command will create the following files with some initial contents like copyright, fops definition etc.
+Note that there shouldn't be a "/" specified at the end of the <XLATOR_DIRECTORY>
+ `* /home/u1/glusterfs/xlators/features/compression/Makefile.am
+ * /home/u1/glusterfs/xlators/features/compression/src/Makefile.am
+ * /home/u1/glusterfs/xlators/features/compression/src/compression.c
+ * /home/u1/glusterfs/xlators/features/compression/src/compression.h
+ * /home/u1/glusterfs/xlators/features/compression/src/compression-mem-types.h
+ * /home/u1/glusterfs/xlators/features/compression/src/compression-messages.h`
+
+By default all the fops and functions are generated, if you wish to not implement certain fops and functions, comment those lines (by adding '#' at the start of the line) in libglusterfs/src/generate_xlator.py
+
+Few other manual steps required to get the new xlator completely functional:
+* Change configure.ac
+* Change `<XLATOR_DIRECTORY>/Makefile.am` to include the new xlator directory.
+ Eg: `/home/u1/glusterfs/xlators/features/Makefile.am`
+* Change vol file or glusterd volgen to include the new xlator in volfile
diff --git a/extras/create_new_xlator/generate_xlator.py b/extras/create_new_xlator/generate_xlator.py
new file mode 100755
index 00000000000..983868c04db
--- /dev/null
+++ b/extras/create_new_xlator/generate_xlator.py
@@ -0,0 +1,208 @@
+#!/usr/bin/python3
+
+from __future__ import print_function
+import os
+import re
+import sys
+import string
+import time
+path = os.path.abspath(os.path.dirname(__file__)) + '/../../libglusterfs/src'
+sys.path.append(path)
+from generator import ops, xlator_cbks, xlator_dumpops
+
+MAKEFILE_FMT = """
+xlator_LTLIBRARIES = @XL_NAME@.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/@XL_TYPE@
+@XL_NAME_NO_HYPHEN@_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+@XL_NAME_NO_HYPHEN@_la_SOURCES = @XL_NAME@.c
+@XL_NAME_NO_HYPHEN@_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+noinst_HEADERS = @XL_NAME@.h @XL_NAME@-mem-types.h @XL_NAME@-messages.h
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
+AM_CFLAGS = -Wall -fno-strict-aliasing $(GF_CFLAGS)
+CLEANFILES =
+"""
+
+fop_subs = {}
+cbk_subs = {}
+fn_subs = {}
+
+
+def get_error_arg(type_str):
+ if type_str.find(" *") != -1:
+ return "NULL"
+ return "-1"
+
+
+def get_param(names, types):
+ # Convert two separate tuples to one of (name, type) sub-tuples.
+ as_tuples = list(zip(types, names))
+ # Convert each sub-tuple into a "type name" string.
+ as_strings = [' '.join(item) for item in as_tuples]
+ # Join all of those into one big string.
+ return ',\n\t'.join(as_strings)
+
+
+def generate(tmpl, name, table):
+ w_arg_names = [a[1] for a in table[name] if a[0] == 'fop-arg']
+ w_arg_types = [a[2] for a in table[name] if a[0] == 'fop-arg']
+ u_arg_names = [a[1] for a in table[name] if a[0] == 'cbk-arg']
+ u_arg_types = [a[2] for a in table[name] if a[0] == 'cbk-arg']
+ fn_arg_names = [a[1] for a in table[name] if a[0] == 'fn-arg']
+ fn_arg_types = [a[2] for a in table[name] if a[0] == 'fn-arg']
+ ret_type = [a[1] for a in table[name] if a[0] == 'ret-val']
+ ret_var = [a[2] for a in table[name] if a[0] == 'ret-val']
+
+ sdict = {}
+ #Parameters are (t1, var1), (t2, var2)...
+ #Args are (var1, var2,...)
+ sdict["@WIND_ARGS@"] = ', '.join(w_arg_names)
+ sdict["@UNWIND_ARGS@"] = ', '.join(u_arg_names)
+ sdict["@ERROR_ARGS@"] = ', '.join(list(map(get_error_arg, u_arg_types)))
+ sdict["@WIND_PARAMS@"] = get_param(w_arg_names, w_arg_types)
+ sdict["@UNWIND_PARAMS@"] = get_param(u_arg_names, u_arg_types)
+ sdict["@FUNC_PARAMS@"] = get_param(fn_arg_names, fn_arg_types)
+ sdict["@NAME@"] = name
+ sdict["@FOP_PREFIX@"] = fop_prefix
+ sdict["@RET_TYPE@"] = ''.join(ret_type)
+ sdict["@RET_VAR@"] = ''.join(ret_var)
+
+ for old, new in sdict.items():
+ tmpl = tmpl.replace(old, new)
+ # TBD: reindent/reformat the result for maximum readability.
+ return tmpl
+
+
+def gen_xlator():
+ xl = open(src_dir_path+"/"+xl_name+".c", 'w+')
+
+ print(COPYRIGHT, file=xl)
+ print(fragments["INCLUDE_IN_SRC_FILE"].replace("@XL_NAME@",
+ xl_name), file=xl)
+
+ #Generate cbks and fops
+ for fop in ops:
+ print(generate(fragments["CBK_TEMPLATE"], fop, ops), file=xl)
+ print(generate(fragments["FOP_TEMPLATE"], fop, ops), file=xl)
+
+ for cbk in xlator_cbks:
+ print(generate(fragments["FUNC_TEMPLATE"], cbk,
+ xlator_cbks), file=xl)
+
+ for dops in xlator_dumpops:
+ print(generate(fragments["FUNC_TEMPLATE"], dops,
+ xlator_dumpops), file=xl)
+
+ #Generate fop table
+ print("struct xlator_fops fops = {", file=xl)
+ for fop in ops:
+ print(" .{0:20} = {1}_{2},".format(fop, fop_prefix, fop), file=xl)
+ print("};", file=xl)
+
+ #Generate xlator_cbks table
+ print("struct xlator_cbks cbks = {", file=xl)
+ for cbk in xlator_cbks:
+ print(" .{0:20} = {1}_{2},".format(cbk, fop_prefix, cbk), file=xl)
+ print("};", file=xl)
+
+ #Generate xlator_dumpops table
+ print("struct xlator_dumpops dumpops = {", file=xl)
+ for dops in xlator_dumpops:
+ print(" .{0:20} = {1}_{2},".format(dops, fop_prefix, dops), file=xl)
+ print("};", file=xl)
+
+ xlator_methods = fragments["XLATOR_METHODS"].replace("@XL_NAME@", xl_name)
+ xlator_methods = xlator_methods.replace("@FOP_PREFIX@", fop_prefix)
+ print(xlator_methods, file=xl)
+
+ xl.close()
+
+
+def create_dir_struct():
+ if not os.path.exists(dir_path+"/src"):
+ os.makedirs(dir_path+"/src")
+
+
+def gen_header_files():
+ upname = xl_name_no_hyphen.upper()
+ h = open(src_dir_path+"/"+xl_name+".h", 'w+')
+ print(COPYRIGHT, file=h)
+ txt = fragments["HEADER_FMT"].replace("@HFL_NAME@", upname)
+ txt = txt.replace("@XL_NAME@", xl_name)
+ print(txt, file=h)
+ h.close()
+
+ h = open(src_dir_path+"/"+xl_name+"-mem-types.h", 'w+')
+ print(COPYRIGHT, file=h)
+ txt = fragments["MEM_HEADER_FMT"].replace("@HFL_NAME@", upname+"_MEM_TYPES")
+ txt = txt.replace("@FOP_PREFIX@", fop_prefix)
+ print(txt, file=h)
+ h.close()
+
+ h = open(src_dir_path+"/"+xl_name+"-messages.h", 'w+')
+ print(COPYRIGHT, file=h)
+ txt = fragments["MSG_HEADER_FMT"].replace("@HFL_NAME@", upname+"_MESSAGES")
+ txt = txt.replace("@FOP_PREFIX@", fop_prefix.upper())
+ print(txt, file=h)
+ h.close()
+
+
+def gen_makefiles():
+ m = open(dir_path+"/Makefile.am", 'w+')
+ print("SUBDIRS = src\n\nCLEANFILES =", file=m)
+ m.close()
+
+ m = open(src_dir_path+"/Makefile.am", 'w+')
+ txt = MAKEFILE_FMT.replace("@XL_NAME@", xl_name)
+ txt = txt.replace("@XL_NAME_NO_HYPHEN@", xl_name_no_hyphen)
+ txt = txt.replace("@XL_TYPE@", xlator_type)
+ print(txt, file=m)
+ m.close()
+
+def get_copyright ():
+ return fragments["CP"].replace("@CURRENT_YEAR@",
+ time.strftime("%Y"))
+
+def load_fragments ():
+ pragma_re = re.compile('pragma fragment (.*)')
+ cur_symbol = None
+ cur_value = ""
+ result = {}
+ basepath = os.path.abspath(os.path.dirname(__file__))
+ fragpath = basepath + "/new-xlator.c.tmpl"
+ for line in open(fragpath, "r").readlines():
+ m = pragma_re.search(line)
+ if m:
+ if cur_symbol:
+ result[cur_symbol] = cur_value
+ cur_symbol = m.group(1)
+ cur_value = ""
+ else:
+ cur_value += line
+ if cur_symbol:
+ result[cur_symbol] = cur_value
+ return result
+
+if __name__ == '__main__':
+
+ if len(sys.argv) < 3:
+ print("USAGE: ./gen_xlator <XLATOR_DIR> <XLATOR_NAME> <FOP_PREFIX>")
+ sys.exit(0)
+
+ xl_name = sys.argv[2]
+ xl_name_no_hyphen = xl_name.replace("-", "_")
+ if sys.argv[1].endswith('/'):
+ dir_path = sys.argv[1] + xl_name
+ else:
+ dir_path = sys.argv[1] + "/" + xl_name
+ xlator_type = os.path.basename(sys.argv[1])
+ fop_prefix = sys.argv[3]
+ src_dir_path = dir_path + "/src"
+
+ fragments = load_fragments()
+
+ COPYRIGHT = get_copyright()
+ create_dir_struct()
+ gen_xlator()
+ gen_header_files()
+ gen_makefiles()
diff --git a/extras/create_new_xlator/new-xlator.c.tmpl b/extras/create_new_xlator/new-xlator.c.tmpl
new file mode 100644
index 00000000000..fe9735bfcf1
--- /dev/null
+++ b/extras/create_new_xlator/new-xlator.c.tmpl
@@ -0,0 +1,151 @@
+#pragma fragment CBK_TEMPLATE
+int32_t @FOP_PREFIX@_@NAME@_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, @UNWIND_PARAMS@)
+{
+ STACK_UNWIND_STRICT(@NAME@, frame, op_ret, op_errno, @UNWIND_ARGS@);
+ return 0;
+}
+
+#pragma fragment COMMENT
+If you are generating the leaf xlators, remove the STACK_WIND and replace the
+ @ERROR_ARGS@ to @UNWIND_ARGS@ if necessary
+
+#pragma fragment FOP_TEMPLATE
+ int32_t @FOP_PREFIX@_@NAME@(call_frame_t *frame, xlator_t *this, @WIND_PARAMS@)
+{
+ STACK_WIND(frame, @FOP_PREFIX@_@NAME@_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->@NAME@, @WIND_ARGS@);
+ return 0;
+err:
+ STACK_UNWIND_STRICT(@NAME@, frame, -1, errno, @ERROR_ARGS@);
+ return 0;
+}
+
+#pragma fragment FUNC_TEMPLATE
+@RET_TYPE@ @FOP_PREFIX@_@NAME@(@FUNC_PARAMS@)
+{
+ return @RET_VAR@;
+}
+
+#pragma fragment CP
+/*
+ * Copyright (c) @CURRENT_YEAR@ Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+#pragma fragment INCLUDE_IN_SRC_FILE
+#include "@XL_NAME@.h"
+
+#pragma fragment XLATOR_METHODS
+
+static int32_t @FOP_PREFIX@_init(xlator_t *this)
+{
+ return 0;
+}
+
+static void @FOP_PREFIX@_fini(xlator_t *this)
+{
+ return;
+}
+
+static int32_t @FOP_PREFIX@_reconfigure(xlator_t *this, dict_t *dict)
+{
+ return 0;
+}
+
+static int @FOP_PREFIX@_notify(xlator_t *this, int event, void *data, ...)
+{
+ return default_notify(this, event, data);
+}
+
+static int32_t @FOP_PREFIX@_mem_acct_init(xlator_t *this)
+{
+ int ret = -1;
+
+ ret = xlator_mem_acct_init(this, gf_@FOP_PREFIX@_mt_end + 1);
+ return ret;
+}
+
+static int32_t @FOP_PREFIX@_dump_metrics(xlator_t *this, int fd)
+{
+ return 0;
+}
+
+struct volume_options @FOP_PREFIX@_options[] = {
+ /*{ .key = {""},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "",
+ .op_version = {GD_OP_VERSION_},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC | OPT_FLAG_CLIENT_OPT,
+ .tags = {""},
+ .description = "",
+ .category = GF_EXPERIMENTAL,
+ },
+ { .key = {NULL} },
+ */
+};
+
+xlator_api_t xlator_api = {
+ .init = @FOP_PREFIX@_init,
+ .fini = @FOP_PREFIX@_fini,
+ .notify = @FOP_PREFIX@_notify,
+ .reconfigure = @FOP_PREFIX@_reconfigure,
+ .mem_acct_init = @FOP_PREFIX@_mem_acct_init,
+ .dump_metrics = @FOP_PREFIX@_dump_metrics,
+ .op_version = {GD_OP_VERSION_},
+ .dumpops = &@FOP_PREFIX@_dumpops,
+ .fops = &@FOP_PREFIX@_fops,
+ .cbks = &@FOP_PREFIX @_cbks,
+ .options = @FOP_PREFIX@_options,
+ .identifier = "@XL_NAME@",
+ .category = GF_EXPERIMENTAL,
+};
+#pragma fragment HEADER_FMT
+#ifndef __ @HFL_NAME@_H__
+#define __ @HFL_NAME@_H__
+
+#include "@XL_NAME@-mem-types.h"
+#include "@XL_NAME@-messages.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+
+#endif /* __@HFL_NAME@_H__ */
+
+#pragma fragment MEM_HEADER_FMT
+#ifndef __ @HFL_NAME@_H__
+#define __ @HFL_NAME@_H__
+
+#include <glusterfs/mem-types.h>
+
+enum gf_mdc_mem_types_ {
+ gf_@FOP_PREFIX@_mt_ = gf_common_mt_end + 1,
+ gf_@FOP_PREFIX@_mt_end
+};
+
+#endif /* __@HFL_NAME@_H__ */
+
+#pragma fragment MSG_HEADER_FMT
+#ifndef __@HFL_NAME@_H__
+#define __@HFL_NAME@_H__
+
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(@FOP_PREFIX@, @FOP_PREFIX@_MSG_NO_MEMORY);
+
+#endif /* __@HFL_NAME@_H__ */
diff --git a/extras/devel-tools/devel-vagrant/Vagrantfile b/extras/devel-tools/devel-vagrant/Vagrantfile
new file mode 100644
index 00000000000..78dc29bdc68
--- /dev/null
+++ b/extras/devel-tools/devel-vagrant/Vagrantfile
@@ -0,0 +1,165 @@
+# -*- mode: ruby -*-
+# vi: set ft=ruby :
+#
+# Author: Rajesh Joseph (rjoseph@redhat.com)
+# Author: Christopher Blum (cblum@redhat.com)
+#
+
+
+#Variables
+box_name = "gluster-dev-fedora"
+box_url = "http://download.gluster.org/pub/gluster/glusterfs/vagrant/gluster-dev-fedora/boxes/gluster-dev-fedora.json"
+node_count = 0
+disk_count = -1
+node_name = "Node"
+ipbase="192.168.99."
+source_path = "/source/glusterfs"
+target_path = "/mnt/src"
+
+if ARGV[0] == "up"
+ environment = open('vagrant_env.conf', 'w')
+
+ print "\n\e[1;37mEnter Node (or VM) Count? Default: 1 \e[32m"
+ while node_count < 1 or node_count > 99
+ node_count = $stdin.gets.strip.to_i
+ if node_count == 0 # The user pressed enter without input or we cannot parse the input to a number
+ node_count = 1
+ elsif node_count < 1
+ print "\e[31mWe need at least 1 VM ;) Try again \e[32m"
+ elsif node_count > 99
+ print "\e[31mWe don't support more than 99 VMs - Try again \e[32m"
+ end
+ end
+
+ print "\e[1;37mEnter per Node Disc (Brick) Count? Default: 2 \e[32m"
+
+ while disk_count < 1
+ disk_count = $stdin.gets.strip.to_i
+ if disk_count == 0 # The user pressed enter without input or we cannot parse the input to a number
+ disk_count = 2
+ elsif disk_count < 1
+ print "\e[31mWe need at least 1 disk ;) Try again \e[32m"
+ end
+ end
+
+ print "\e[1;37mEnter GlusterFS source location? Default: \"#{source_path}\" : \e[32m"
+ tmploc = $stdin.gets.strip.to_s
+ if tmploc != ""
+ source_path = "#{tmploc}"
+ end
+
+ environment.puts("# BEWARE: Do NOT modify ANY settings in here or your vagrant environment will be messed up")
+ environment.puts(node_count.to_s)
+ environment.puts(disk_count.to_s)
+ environment.puts(source_path)
+
+ print "\e[32m\nOK I will provision #{node_count} VMs for you and each one will have #{disk_count} disks for bricks\e[37m\n\n"
+ system "sleep 1"
+else # So that we destroy and can connect to all VMs...
+ environment = open('vagrant_env.conf', 'r')
+
+ environment.readline # Skip the comment on top
+ node_count = environment.readline.to_i
+ disk_count = environment.readline.to_i
+ source_path = environment.readline.gsub(/\s+/, "")
+
+ if ARGV[0] != "ssh-config"
+ puts "Detected settings from previous vagrant up:"
+ puts " We deployed #{node_count} VMs with each #{disk_count} disks"
+ puts ""
+ end
+end
+
+environment.close
+
+$ansivar = Hash.new{ |hash,key| hash[key] = [] }
+$devnamecreated = false
+
+#
+# Function to create and attach disks
+#
+def attachDisks(numDisk, provider)
+ suffix = "bcdefghijklmn".split("")
+ for i in 1..numDisk.to_i
+ devname = "vd" + (suffix[i-1]).to_s
+ if $devnamecreated == false
+ $ansivar["device"].push "#{devname}"
+ end
+ provider.storage :file,
+ :size => '1G',
+ :device => "vd" + (suffix[i-1]).to_s,
+ :type => "qcow2",
+ :bus => "virtio",
+ :cache => "default"
+ end
+ $devnamecreated = true
+end
+
+
+$ansivar["src_path"].push "#{source_path}"
+$ansivar["trg_path"].push "#{target_path}"
+
+groups = Hash.new{ |hash,key| hash[key] = [] }
+
+groups["origin"].push "#{node_name}1"
+groups["all"].push "#{node_name}1"
+
+(2..node_count).each do |num|
+ $ansivar["peer_nodes"].push "#{node_name}#{num}"
+ groups["all"].push "#{node_name}#{num}"
+end
+
+hostsFile = "\n"
+(1..node_count).each do |num|
+ hostsFile += "#{ipbase}#{( 100 + num).to_s} #{node_name}#{num.to_s}\n"
+end
+
+Vagrant.configure("2") do |config|
+ (1..node_count).each do |num|
+ config.vm.define "#{node_name}#{num}" do |node|
+ ip_addr = "#{ipbase}#{(100 + num).to_s}"
+ node.vm.network "private_network", ip: "#{ip_addr}"
+ node.vm.box = box_name
+ node.vm.box_url = box_url
+ node.vm.hostname = "#{node_name}#{num}"
+ node.ssh.insert_key = false
+ node.vm.synced_folder "#{source_path}", "#{target_path}", type: "nfs"
+
+ # Define basic config for VM, memory, cpu, storage pool
+ node.vm.provider "libvirt" do |virt|
+ virt.storage_pool_name = "default"
+ virt.memory = 1024
+ virt.cpus = 1
+
+ attachDisks( disk_count, virt )
+ end
+
+ node.vm.post_up_message = "\e[37mBuilding of this VM is finished \n"
+ "You can access it now with: \n"
+ "vagrant ssh #{node_name}#{num.to_s}\n\n"
+ "#{target_path} directory in VM #{node_name}#{num.to_s}"
+ "is synced with Host machine. \nSo any changes done in this"
+ "directory will be reflected in the host machine as well\n"
+ "Beware of this when you delete content from this directory\e[32m"
+
+ node.vm.provision :shell, path: "bootstrap.sh"
+
+ node.vm.provision "shell", inline: <<-SHELL
+ echo '#{hostsFile}' | sudo tee -a /etc/hosts
+ SHELL
+
+ if num == node_count
+ # Let's provision
+ node.vm.provision "ansible" do |setup|
+ setup.verbose = "v"
+ setup.playbook = "ansible/setup.yml"
+ setup.limit = "all"
+ setup.sudo = "true"
+ setup.groups = groups
+ setup.extra_vars = $ansivar
+ end
+ end
+
+ end
+ end
+end
diff --git a/extras/devel-tools/devel-vagrant/ansible/roles/cluster/tasks/main.yml b/extras/devel-tools/devel-vagrant/ansible/roles/cluster/tasks/main.yml
new file mode 100644
index 00000000000..3306c7a3dc2
--- /dev/null
+++ b/extras/devel-tools/devel-vagrant/ansible/roles/cluster/tasks/main.yml
@@ -0,0 +1,5 @@
+---
+- name: gluster peer probe
+ shell: gluster peer probe {{ item }}
+ with_items: "{{ peer_nodes | default([]) }}"
+
diff --git a/extras/devel-tools/devel-vagrant/ansible/roles/compile-gluster/tasks/main.yml b/extras/devel-tools/devel-vagrant/ansible/roles/compile-gluster/tasks/main.yml
new file mode 100644
index 00000000000..6ee258c7780
--- /dev/null
+++ b/extras/devel-tools/devel-vagrant/ansible/roles/compile-gluster/tasks/main.yml
@@ -0,0 +1,29 @@
+---
+- name: autogen.sh
+ shell: chdir={{ item }} ./autogen.sh
+ with_items: "{{ trg_path }}"
+
+- name: configure
+ shell: chdir={{ item }} CFLAGS="-g -O0 -Werror -Wall -Wno-error=cpp -Wno-error=maybe-uninitialized" \
+ ./configure \
+ --prefix=/usr \
+ --exec-prefix=/usr \
+ --bindir=/usr/bin \
+ --sbindir=/usr/sbin \
+ --sysconfdir=/etc \
+ --datadir=/usr/share \
+ --includedir=/usr/include \
+ --libdir=/usr/lib64 \
+ --libexecdir=/usr/libexec \
+ --localstatedir=/var \
+ --sharedstatedir=/var/lib \
+ --mandir=/usr/share/man \
+ --infodir=/usr/share/info \
+ --libdir=/usr/lib64 \
+ --enable-debug
+ with_items: "{{ trg_path }}"
+
+- name: make install
+ shell: chdir={{ item }} make install
+ with_items: "{{ trg_path }}"
+
diff --git a/extras/devel-tools/devel-vagrant/ansible/roles/install-pkgs/tasks/main.yml b/extras/devel-tools/devel-vagrant/ansible/roles/install-pkgs/tasks/main.yml
new file mode 100644
index 00000000000..3944054dd25
--- /dev/null
+++ b/extras/devel-tools/devel-vagrant/ansible/roles/install-pkgs/tasks/main.yml
@@ -0,0 +1,72 @@
+---
+- name: install deltarpm
+ dnf: name=deltarpm state=present
+
+- name: update system
+ shell: dnf update -y
+
+- name: install other packages
+ dnf: name={{ item }} state=present
+ with_items:
+ - attr
+ - autoconf
+ - automake
+ - bison
+ - cifs-utils
+ - cscope
+ - ctags
+ - dbench
+ - dos2unix
+ - e2fsprogs
+ - findutils
+ - flex
+ - fuse-devel
+ - fuse-libs
+ - gcc
+ - gdb
+ - git
+ - glib2-devel
+ - hostname
+ - libacl-devel
+ - libaio-devel
+ - libattr-devel
+ - libibverbs-devel
+ - librdmacm-devel
+ - libtool
+ - libxml2-devel
+ - lvm2-devel
+ - make
+ - man-db
+ - mock
+ - net-tools
+ - nfs-utils
+ - openssh-server
+ - openssl-devel
+ - perl-Test-Harness
+ - pkgconfig
+ - procps-ng
+ - psmisc
+ - python-devel
+ - python-eventlet
+ - python-netifaces
+ - python-paste-deploy
+ - python-setuptools
+ - python-simplejson
+ - python-sphinx
+ - python-webob
+ - pyxattr
+ - readline-devel
+ - rpm-build
+ - screen
+ - strace
+ - supervisor
+ - systemtap-sdt-devel
+ - sqlite-devel
+ - samba*
+ - userspace-rcu-devel
+ - vim
+ - wget
+ - which
+ - xfsprogs
+ - yajl-devel
+
diff --git a/extras/devel-tools/devel-vagrant/ansible/roles/iptables/tasks/main.yml b/extras/devel-tools/devel-vagrant/ansible/roles/iptables/tasks/main.yml
new file mode 100644
index 00000000000..768cb0e8668
--- /dev/null
+++ b/extras/devel-tools/devel-vagrant/ansible/roles/iptables/tasks/main.yml
@@ -0,0 +1,3 @@
+---
+- name: disable iptables, need to add specific rules later
+ shell: iptables -F
diff --git a/extras/devel-tools/devel-vagrant/ansible/roles/prepare-brick/tasks/main.yml b/extras/devel-tools/devel-vagrant/ansible/roles/prepare-brick/tasks/main.yml
new file mode 100644
index 00000000000..a3a6c463468
--- /dev/null
+++ b/extras/devel-tools/devel-vagrant/ansible/roles/prepare-brick/tasks/main.yml
@@ -0,0 +1,30 @@
+---
+- name: Create physical device
+ shell: pvcreate /dev/{{ item }}
+ with_items: "{{ device }}"
+
+- name: Create volume group
+ shell: vgcreate vg{{ item }} /dev/{{ item }}
+ with_items: "{{ device }}"
+
+- name: Create thin pool
+ shell: lvcreate -L 950M -T vg{{ item }}/thinpool
+ with_items: "{{ device }}"
+
+- name: Create thin volume
+ shell: lvcreate -V900M -T vg{{ item }}/thinpool -n thinp1
+ with_items: "{{ device }}"
+
+- name: Format backend
+ filesystem: fstype=xfs dev=/dev/vg{{ item }}/thinp1
+ with_items: "{{ device }}"
+
+- name: Create mount directory
+ file: path=/bricks/br{{ item }} state=directory recurse=yes
+ with_items: "{{ device }}"
+
+- name: Add entry to fstab and mount
+ mount: name=/bricks/br{{ item }} src=/dev/vg{{ item }}/thinp1 fstype=xfs state=mounted
+ with_items: "{{ device }}"
+
+
diff --git a/extras/devel-tools/devel-vagrant/ansible/roles/selinux/tasks/main.yml b/extras/devel-tools/devel-vagrant/ansible/roles/selinux/tasks/main.yml
new file mode 100644
index 00000000000..c9ba9618428
--- /dev/null
+++ b/extras/devel-tools/devel-vagrant/ansible/roles/selinux/tasks/main.yml
@@ -0,0 +1,3 @@
+---
+- name: Allow gfapi in Samba to bind to other ports than well known smb ports
+ seboolean: name=samba_load_libgfapi state=yes persistent=yes
diff --git a/extras/devel-tools/devel-vagrant/ansible/roles/service/tasks/main.yml b/extras/devel-tools/devel-vagrant/ansible/roles/service/tasks/main.yml
new file mode 100644
index 00000000000..ef78a125ae8
--- /dev/null
+++ b/extras/devel-tools/devel-vagrant/ansible/roles/service/tasks/main.yml
@@ -0,0 +1,21 @@
+---
+- name: disable kernel nfs
+ service: name=nfs-server enabled=no
+
+- name: stop kernel nfs
+ service: name=nfs-server state=stopped
+
+- name: enable rpcbind
+ service: name=rpcbind enabled=yes
+
+- name: start rpcbind
+ service: name=rpcbind state=started
+
+- name: enable glusterd
+ service: name=glusterd enabled=yes
+
+- name: start glusterd
+ service: name=glusterd state=started
+
+
+
diff --git a/extras/devel-tools/devel-vagrant/ansible/setup.yml b/extras/devel-tools/devel-vagrant/ansible/setup.yml
new file mode 100644
index 00000000000..c26bd7d6051
--- /dev/null
+++ b/extras/devel-tools/devel-vagrant/ansible/setup.yml
@@ -0,0 +1,23 @@
+---
+- hosts: all
+ become: yes
+ become_method: sudo
+ roles:
+ - install-pkgs
+ - prepare-brick
+ - selinux
+ - iptables
+
+- hosts: all
+ become: yes
+ become_method: sudo
+ serial: 1
+ roles:
+ - compile-gluster
+ - service
+
+- hosts: origin
+ become: yes
+ become_method: sudo
+ roles:
+ - cluster
diff --git a/extras/devel-tools/devel-vagrant/bootstrap.sh b/extras/devel-tools/devel-vagrant/bootstrap.sh
new file mode 100644
index 00000000000..bbf9fa2c063
--- /dev/null
+++ b/extras/devel-tools/devel-vagrant/bootstrap.sh
@@ -0,0 +1,3 @@
+dnf install -y nfs-utils
+dnf install -y vim
+dnf install -y python2 python2-dnf libselinux-python libsemanage-python
diff --git a/extras/devel-tools/devel-vagrant/up.sh b/extras/devel-tools/devel-vagrant/up.sh
new file mode 100755
index 00000000000..35cbe79d835
--- /dev/null
+++ b/extras/devel-tools/devel-vagrant/up.sh
@@ -0,0 +1,4 @@
+#!/bin/sh
+
+vagrant up --no-provision $@
+vagrant provision
diff --git a/extras/devel-tools/gdb_macros b/extras/devel-tools/gdb_macros
new file mode 100644
index 00000000000..aae4ccb3b37
--- /dev/null
+++ b/extras/devel-tools/gdb_macros
@@ -0,0 +1,84 @@
+
+#
+# gdb_macros is a gdb script file which can assist
+# developer in debugging. This script provides
+# following functions for debugging gluster processes
+# effectively:
+#
+# pdict : This function will iterate through all the
+# dictionary (dict_t) members and print the
+# key-value pair.
+#
+# plist : This function will print address of each member
+# of gluster list (list_head).
+#
+# gdb script should be loaded in gdb before using these
+# functions. gdb script can be loaded on-demand or on gdb
+# start-up. Use the following command on gdb prompt for
+# loading it on-demand.
+# source <script filename>
+# e.g.
+# (gdb) source /mnt/gdb_macros
+#
+# To automatically load gdb script on startup use .gdbinit
+# file. This file is automatically loaded by gdb on start.
+# Edit (or create) ~/.gdbinit file and add the following
+# entry:
+# source /mnt/gdb_macros
+#
+
+
+
+# This is an internal helper function
+define _print_dict_data
+ set $data = ($arg0)
+ set $len = $data->len - 1
+ set $i = 0
+ set $ishex = 0
+ while ($i < $len)
+ set $ch = $data->data [$i]
+
+ # Print only alpha-numeric values as char
+ # and remaining as hex value. This is done
+ # in this way because using %s screws up
+ # the display if the string has non-displayable
+ # characters
+ if ($ishex == 0) && ($ch > 31) && ($ch < 127)
+ printf "%c", $ch
+ else
+ printf "%x", ($ch & 0xFF)
+ set $ishex = 1
+ end
+ set $i = $i + 1
+ end
+end
+
+
+define pdict
+ set $cnt = 1
+ set $temp = *($arg0->members)
+ while ($temp)
+ printf "[%d](%s::",$cnt, $temp->key
+ _print_dict_data ($temp)->value
+ printf ")\n"
+ set $temp = $temp->next
+ set $cnt = $cnt + 1
+ end
+ printf "Done\n"
+end
+
+
+define plist
+ set $node = &($arg0)
+ set $head = $node
+
+ printf "[0x%lx]", $node
+ set $node = $node->next
+
+ while ($node != $head)
+ printf "--> [0x%lx]", $node
+ set $node = $node->next
+ end
+ printf "\n"
+end
+
diff --git a/extras/devel-tools/print-backtrace.sh b/extras/devel-tools/print-backtrace.sh
new file mode 100755
index 00000000000..33fbae288bc
--- /dev/null
+++ b/extras/devel-tools/print-backtrace.sh
@@ -0,0 +1,115 @@
+#!/bin/bash
+# sample unresolved backtrace lines picked up from a brick log that should go
+# into a backtrace file eg. bt-file.txt:
+# /usr/lib64/glusterfs/3.8.4/xlator/cluster/replicate.so(+0x3ec81)[0x7fe4bc271c81]
+# /usr/lib64/glusterfs/3.8.4/xlator/cluster/replicate.so(+0x3eecd)[0x7fe4bc271ecd]
+# /usr/lib64/glusterfs/3.8.4/xlator/cluster/replicate.so(+0x404cb)[0x7fe4bc2734cb]
+# /usr/lib64/glusterfs/3.8.4/xlator/cluster/replicate.so(+0x3d2b6)[0x7fe4bc2702b6]
+# /usr/lib64/glusterfs/3.8.4/xlator/cluster/replicate.so(+0x3d323)[0x7fe4bc270323]
+#
+# following is the output of the script for the above backtrace lines:
+# /usr/lib64/glusterfs/3.8.4/xlator/cluster/replicate.so(+0x3ec81)[0x7fe4bc271c81] __afr_selfheal_data_finalize_source inlined at /usr/src/debug/glusterfs-3.8.4/xlators/cluster/afr/src/afr-self-heal-data.c:684 in __afr_selfheal_data_prepare /usr/src/debug/glusterfs-3.8.4/xlators/cluster/afr/src/afr-self-heal-data.c:603
+# /usr/lib64/glusterfs/3.8.4/xlator/cluster/replicate.so(+0x3eecd)[0x7fe4bc271ecd] __afr_selfheal_data /usr/src/debug/glusterfs-3.8.4/xlators/cluster/afr/src/afr-self-heal-data.c:740
+# /usr/lib64/glusterfs/3.8.4/xlator/cluster/replicate.so(+0x404cb)[0x7fe4bc2734cb] afr_selfheal_data /usr/src/debug/glusterfs-3.8.4/xlators/cluster/afr/src/afr-self-heal-data.c:883
+# /usr/lib64/glusterfs/3.8.4/xlator/cluster/replicate.so(+0x3d2b6)[0x7fe4bc2702b6] afr_selfheal_do /usr/src/debug/glusterfs-3.8.4/xlators/cluster/afr/src/afr-self-heal-common.c:1968
+# /usr/lib64/glusterfs/3.8.4/xlator/cluster/replicate.so(+0x3d323)[0x7fe4bc270323] afr_selfheal /usr/src/debug/glusterfs-3.8.4/xlators/cluster/afr/src/afr-self-heal-common.c:2015
+#
+# Usage with debuginfo RPM:
+# print-backtrace.sh $HOME/Downloads/glusterfs-debuginfo-3.8.4-10.el7.x86_64.rpm bt-file.txt
+#
+# Usage with source install:
+# print-packtrace.sh none bt-file.txt
+
+function version_compare() { test $(echo $1|awk -F '.' '{print $1 $2 $3}') -gt $(echo $2|awk -F '.' '{print $1 $2 $3}'); }
+
+function Usage()
+{
+ echo -e "Usage:\n\t$0 { none | <debuginfo-rpm> } <backtrace-file>"
+ echo "none: implies we don't have a debuginfo rpm but want to resolve"
+ echo " against a source install which already has the debuginfo"
+ echo " NOTE: in this case you should have configured the build"
+ echo " with --enable-debug and the linker options should"
+ echo " have the option -rdynamic"
+}
+
+debuginfo_rpm=$1
+backtrace_file=$2
+
+if [ ! $debuginfo_rpm ] || [ ! $backtrace_file ]; then
+ Usage
+ exit 1
+fi
+
+if [ $debuginfo_rpm != "none" ]; then
+ if [ ! -f $debuginfo_rpm ]; then
+ echo "no such rpm file: $debuginfo_rpm"
+ exit 1
+ fi
+fi
+
+if [ ! -f $backtrace_file ]; then
+ echo "no such backtrace file: $backtrace_file"
+ exit 1
+fi
+
+if [ "$debuginfo_rpm" != "none" ]; then
+ if ! file $debuginfo_rpm | grep RPM >/dev/null 2>&1 ; then
+ echo "file does not look like an rpm: $debuginfo_rpm"
+ exit 1
+ fi
+fi
+
+cpio_version=$(cpio --version|grep cpio|cut -f 2 -d ')'|sed -e 's/^[[:space:]]*//')
+rpm_name=""
+debuginfo_path=""
+debuginfo_extension=""
+
+if [ $debuginfo_rpm != "none" ]; then
+ # extract the gluster debuginfo rpm to resolve the symbols against
+ rpm_name=$(basename $debuginfo_rpm '.rpm')
+ if [ -d $rpm_name ]; then
+ echo "directory already exists: $rpm_name"
+ echo "please remove/move it and reattempt"
+ exit 1
+ fi
+ mkdir -p $rpm_name
+ if version_compare $cpio_version "2.11"; then
+ rpm2cpio $debuginfo_rpm | cpio --quiet --extract --make-directories --preserve-modification-time --directory=$rpm_name
+ ret=$?
+ else
+ current_dir="$PWD"
+ cd $rpm_name
+ rpm2cpio $debuginfo_rpm | cpio --quiet --extract --make-directories --preserve-modification-time
+ ret=$?
+ cd $current_dir
+ fi
+ if [ $ret -eq 1 ]; then
+ echo "failed to extract rpm $debuginfo_rpm to $PWD/$rpm_name directory"
+ rm -rf $rpm_name
+ exit 1
+ fi
+ debuginfo_path="$PWD/$rpm_name/usr/lib/debug"
+ debuginfo_extension=".debug"
+else
+ debuginfo_path=""
+ debuginfo_extension=""
+fi
+
+# NOTE: backtrace file should contain only the lines which need to be resolved
+for bt in $(cat $backtrace_file)
+do
+ libname=$(echo $bt | cut -f 1 -d '(')
+ addr=$(echo $bt | cut -f 2 -d '(' | cut -f 1 -d ')')
+ libpath=${debuginfo_path}${libname}${debuginfo_extension}
+ if [ ! -f $libpath ]; then
+ continue
+ fi
+ newbt=( $(eu-addr2line --functions --exe=$libpath $addr) )
+ echo "$bt ${newbt[*]}"
+done
+
+# remove the temporary directory
+if [ -d $rpm_name ]; then
+ rm -rf $rpm_name
+fi
+
diff --git a/extras/devel-tools/strace-brick.sh b/extras/devel-tools/strace-brick.sh
new file mode 100755
index 00000000000..a140729111c
--- /dev/null
+++ b/extras/devel-tools/strace-brick.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+# Usage:
+# nice -n -19 strace-brick.sh glusterfsd 50
+
+brick_process_name=$1
+min_watch_cpu=$2
+if [ ! $brick_process_name ]; then
+ brick_process_name=glusterfsd
+fi
+
+if [ ! $min_watch_cpu ]; then
+ min_watch_cpu=50
+fi
+
+echo "min_watch_cpu: $min_watch_cpu"
+
+break=false
+
+while ! $break;
+do
+ mypids=( $(pgrep $brick_process_name) )
+ echo "mypids: ${mypids[*]}"
+
+ pid_args=$(echo ${mypids[*]} | sed -e 's/ / -p /g;s/^/-p /')
+ echo "pid_args: $pid_args"
+
+ pcpu=( $(ps $pid_args -o pcpu -h ) )
+ echo "pcpu: ${pcpu[*]}"
+
+ wait_longer=false
+
+ for i in $( seq 0 $((${#pcpu[*]} - 1)) )
+ do
+ echo "i: $i"
+ echo "mypids[$i]: ${mypids[$i]}"
+
+ int_pcpu=$(echo ${pcpu[$i]} | cut -f 1 -d '.')
+ echo "int_pcpu: $int_pcpu"
+ if [ ! $int_pcpu ] || [ ! $min_watch_cpu ]; then
+ break=true
+ echo "breaking"
+ fi
+ if [ $int_pcpu -ge $min_watch_cpu ]; then
+ wait_longer=true
+ mydirname="${brick_process_name}-${mypids[$i]}-$(date --utc +'%Y%m%d-%H%M%S.%N')"
+ $(mkdir $mydirname && cd $mydirname && timeout --kill-after=5 --signal=KILL 60 nice -n -19 strace -p ${mypids[$i]} -ff -tt -T -o $brick_process_name) &
+ fi
+ done
+
+ if $wait_longer; then
+ sleep 90
+ else
+ sleep 15
+ fi
+done
diff --git a/extras/distributed-testing/README b/extras/distributed-testing/README
new file mode 100644
index 00000000000..928d943f211
--- /dev/null
+++ b/extras/distributed-testing/README
@@ -0,0 +1,28 @@
+PROBLEM
+
+The testing methodology of Gluster is extremely slow. It takes a very long time (6+ hrs) to run the basic tests on a single machine. It takes about 20+ hours to run code analysis version of tests like valgrind, asan, tsan etc.
+
+SOLUTION
+
+The fundamental problem is that the tests cannot be parallelized on a single machine. The natural solution is to run these tests on a cluster of machines. In a nutshell, apply map-reduce to run unit tests.
+
+WORK @ Facebook
+
+At Facebook we have applied the map-reduce approach to testing and have observed 10X improvements.
+
+The solution supports the following
+
+Distribute tests across machines, collect results/logs
+Share worker pool across different testers
+Try failure 3 times on 3 different machines before calling it a failure
+Support running asan, valgrind, asan-noleaks
+Self management of worker pools. The clients will manage the worker pool including version update, no manual maintenance required
+WORK
+
+Port the code from gluster-fb-3.8 to gluster master
+
+HOW TO RUN
+
+./extras/distributed-testing/distributed-test.sh --hosts '<h1> <h2> <h3>'
+
+All hosts should have no password for ssh via root. This can be achieved with keys setup on the client and the server machines.
diff --git a/extras/distributed-testing/distributed-test-build-env b/extras/distributed-testing/distributed-test-build-env
new file mode 100644
index 00000000000..cd68ff717da
--- /dev/null
+++ b/extras/distributed-testing/distributed-test-build-env
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+GF_CONF_OPTS="--localstatedir=/var --sysconfdir /var/lib --prefix /usr --libdir /usr/lib64 \
+ --enable-bd-xlator=yes --enable-debug --enable-gnfs"
+
+if [ -x /usr/lib/rpm/redhat/dist.sh ]; then
+ REDHAT_MAJOR=$(/usr/lib/rpm/redhat/dist.sh --distnum)
+else
+ REDHAT_MAJOR=0
+fi
+
+ASAN_ENABLED=${ASAN_ENABLED:=0}
+if [ "$ASAN_ENABLED" -eq "1" ]; then
+ GF_CONF_OPTS="$GF_CONF_OPTS --with-asan"
+fi
+
+GF_CONF_OPTS="$GF_CONF_OPTS --with-systemd"
+export GF_CONF_OPTS
+
+export CFLAGS="-O0 -ggdb -fPIC -Wall"
diff --git a/extras/distributed-testing/distributed-test-build.sh b/extras/distributed-testing/distributed-test-build.sh
new file mode 100755
index 00000000000..e8910d8425c
--- /dev/null
+++ b/extras/distributed-testing/distributed-test-build.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+
+set -e
+
+EXTRA_CONFIGURE_ARGS="$@"
+ASAN_REQUESTED=false
+for arg in $EXTRA_CONFIGURE_ARGS; do
+ if [ $arg == "--with-asan" ]; then
+ echo "Requested ASAN, cleaning build first."
+ make -j distclean || true
+ touch .with_asan
+ ASAN_REQUESTED=true
+ fi
+done
+
+if [ $ASAN_REQUESTED == false ]; then
+ if [ -f .with_asan ]; then
+ echo "Previous build was with ASAN, cleaning build first."
+ make -j distclean || true
+ rm -v .with_asan
+ fi
+fi
+
+source extras/distributed-testing/distributed-test-build-env
+./autogen.sh
+./configure $GF_CONF_OPTS $EXTRA_CONFIGURE_ARGS
+make -j
diff --git a/extras/distributed-testing/distributed-test-env b/extras/distributed-testing/distributed-test-env
new file mode 100644
index 00000000000..36fdd82e5dd
--- /dev/null
+++ b/extras/distributed-testing/distributed-test-env
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+SMOKE_TESTS="\
+ tests/basic/*.t\
+ tests/basic/afr/*.t\
+ tests/basic/distribute/*.t\
+ tests/bugs/fb*.t\
+ tests/features/brick-min-free-space.t\
+"
+
+KNOWN_FLAKY_TESTS="\
+"
+
+BROKEN_TESTS="\
+ tests/features/lock_revocation.t\
+ tests/features/recon.t\
+ tests/features/fdl-overflow.t\
+ tests/features/fdl.t\
+ tests/features/ipc.t\
+ tests/bugs/distribute/bug-1247563.t\
+ tests/bugs/distribute/bug-1543279.t\
+ tests/bugs/distribute/bug-1066798.t\
+ tests/bugs/ec/bug-1304988.t\
+ tests/bugs/unclassified/bug-1357397.t\
+ tests/bugs/quota/bug-1235182.t\
+ tests/bugs/fuse/bug-1309462.t\
+ tests/bugs/glusterd/bug-1238706-daemons-stop-on-peer-cleanup.t\
+ tests/bugs/stripe/bug-1002207.t\
+ tests/bugs/stripe/bug-1111454.t\
+ tests/bugs/snapshot/bug-1140162-file-snapshot-features-encrypt-opts-validation.t\
+ tests/bugs/write-behind/bug-1279730.t\
+ tests/bugs/gfapi/bug-1093594.t\
+ tests/bugs/replicate/bug-1473026.t\
+ tests/bugs/replicate/bug-802417.t\
+ tests/basic/inode-leak.t\
+ tests/basic/distribute/force-migration.t\
+ tests/basic/ec/heal-info.t\
+ tests/basic/ec/ec-seek.t\
+ tests/basic/jbr/jbr-volgen.t\
+ tests/basic/jbr/jbr.t\
+ tests/basic/afr/tarissue.t\
+ tests/basic/tier/tierd_check.t\
+ tests/basic/gfapi/bug1291259.t\
+"
+
+SMOKE_TESTS=$(echo $SMOKE_TESTS | tr -s ' ' ' ')
+KNOWN_FLAKY_TESTS=$(echo $KNOWN_FLAKY_TESTS | tr -s ' ' ' ')
+BROKEN_TESTS=$(echo $BROKEN_TESTS | tr -s ' ' ' ')
diff --git a/extras/distributed-testing/distributed-test-runner.py b/extras/distributed-testing/distributed-test-runner.py
new file mode 100755
index 00000000000..5a07e2feab1
--- /dev/null
+++ b/extras/distributed-testing/distributed-test-runner.py
@@ -0,0 +1,859 @@
+#!/usr/bin/python3
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import unicode_literals
+from __future__ import print_function
+import re
+import sys
+import fcntl
+import base64
+import threading
+import socket
+import os
+import shlex
+import argparse
+import subprocess
+import time
+import SimpleXMLRPCServer
+import xmlrpclib
+import md5
+import httplib
+import uuid
+
+DEFAULT_PORT = 9999
+TEST_TIMEOUT_S = 15 * 60
+CLIENT_CONNECT_TIMEOUT_S = 10
+CLIENT_TIMEOUT_S = 60
+PATCH_FILE_UID = str(uuid.uuid4())
+SSH_TIMEOUT_S = 10
+MAX_ATTEMPTS = 3
+ADDRESS_FAMILY = 'IPv4'
+
+
+def socket_instance(address_family):
+ if address_family.upper() == 'ipv4'.upper():
+ return socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ elif address_family.upper() == 'ipv6'.upper():
+ return socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
+ else:
+ Log.error("Invalid IP address family")
+ sys.exit(1)
+
+
+def patch_file():
+ return "/tmp/%s-patch.tar.gz" % PATCH_FILE_UID
+
+# ..............................................................................
+# SimpleXMLRPCServer IPvX Wrapper
+# ..............................................................................
+
+
+class GeneralXMLRPCServer(SimpleXMLRPCServer.SimpleXMLRPCServer):
+ def __init__(self, addr):
+ SimpleXMLRPCServer.SimpleXMLRPCServer.__init__(self, addr)
+
+ def server_bind(self):
+ if self.socket:
+ self.socket.close()
+ self.socket = socket_instance(args.address_family)
+ self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+ SimpleXMLRPCServer.SimpleXMLRPCServer.server_bind(self)
+
+
+class HTTPConnection(httplib.HTTPConnection):
+ def __init__(self, host):
+ self.host = host
+ httplib.HTTPConnection.__init__(self, host)
+
+ def connect(self):
+ old_timeout = socket.getdefaulttimeout()
+ self.sock = socket.create_connection((self.host, self.port),
+ timeout=CLIENT_CONNECT_TIMEOUT_S)
+ self.sock.settimeout(old_timeout)
+
+
+class IPTransport(xmlrpclib.Transport):
+ def __init__(self, *args, **kwargs):
+ xmlrpclib.Transport.__init__(self, *args, **kwargs)
+
+ def make_connection(self, host):
+ return HTTPConnection(host)
+
+
+# ..............................................................................
+# Common
+# ..............................................................................
+
+
+class Timer:
+ def __init__(self):
+ self.start = time.time()
+
+ def elapsed_s(self):
+ return int(time.time() - self.start)
+
+ def reset(self):
+ ret = self.elapsed_s()
+ self.start = time.time()
+ return ret
+
+
+def encode(buf):
+ return base64.b16encode(buf)
+
+
+def decode(buf):
+ return base64.b16decode(buf)
+
+
+def get_file_content(path):
+ with open(path, "r") as f:
+ return f.read()
+
+
+def write_to_file(path, data):
+ with open(path, "w") as f:
+ f.write(data)
+
+
+def failsafe(fn, args=()):
+ try:
+ return (True, fn(*args))
+ except (xmlrpclib.Fault, xmlrpclib.ProtocolError, xmlrpclib.ResponseError,
+ Exception) as err:
+ Log.debug(str(err))
+ return (False, None)
+
+
+class LogLevel:
+ DEBUG = 2
+ ERROR = 1
+ CLI = 0
+
+
+class Log:
+ LOGLEVEL = LogLevel.ERROR
+
+ @staticmethod
+ def _normalize(msg):
+ return msg[:100]
+
+ @staticmethod
+ def debug(msg):
+ if Log.LOGLEVEL >= LogLevel.DEBUG:
+ sys.stdout.write("<debug> %s\n" % Log._normalize(msg))
+ sys.stdout.flush()
+
+ @staticmethod
+ def error(msg):
+ sys.stderr.write("<error> %s\n" % Log._normalize(msg))
+
+ @staticmethod
+ def header(msg):
+ sys.stderr.write("* %s *\n" % Log._normalize(msg))
+
+ @staticmethod
+ def cli(msg):
+ sys.stderr.write("%s\n" % msg)
+
+
+class Shell:
+ def __init__(self, cwd=None, logpath=None):
+ self.cwd = cwd
+ self.shell = True
+ self.redirect = open(os.devnull if not logpath else logpath, "wr+")
+
+ def __del__(self):
+ self.redirect.close()
+
+ def cd(self, cwd):
+ Log.debug("cd %s" % cwd)
+ self.cwd = cwd
+
+ def truncate(self):
+ self.redirect.truncate(0)
+
+ def read_logs(self):
+ self.redirect.seek(0)
+ return self.redirect.read()
+
+ def check_call(self, cmd):
+ status = self.call(cmd)
+ if status:
+ raise Exception("Error running command %s. status=%s"
+ % (cmd, status))
+
+ def call(self, cmd):
+ if isinstance(cmd, list):
+ return self._calls(cmd)
+
+ return self._call(cmd)
+
+ def ssh(self, hostname, cmd, id_rsa=None):
+ flags = "" if not id_rsa else "-i " + id_rsa
+ return self.call("timeout %s ssh %s root@%s \"%s\"" %
+ (SSH_TIMEOUT_S, flags, hostname, cmd))
+
+ def scp(self, hostname, src, dest, id_rsa=None):
+ flags = "" if not id_rsa else "-i " + id_rsa
+ return self.call("timeout %s scp %s %s root@%s:%s" %
+ (SSH_TIMEOUT_S, flags, src, hostname, dest))
+
+ def output(self, cmd, cwd=None):
+ Log.debug("%s> %s" % (cwd, cmd))
+ return subprocess.check_output(shlex.split(cmd), cwd=self.cwd)
+
+ def _calls(self, cmds):
+ Log.debug("Running commands. %s" % cmds)
+ for c in cmds:
+ status = self.call(c)
+ if status:
+ Log.error("Commands failed with %s" % status)
+ return status
+ return 0
+
+ def _call(self, cmd):
+ if not self.shell:
+ cmd = shlex.split(cmd)
+
+ Log.debug("%s> %s" % (self.cwd, cmd))
+
+ status = subprocess.call(cmd, cwd=self.cwd, shell=self.shell,
+ stdout=self.redirect, stderr=self.redirect)
+
+ Log.debug("return %s" % status)
+ return status
+
+
+# ..............................................................................
+# Server role
+# ..............................................................................
+
+class TestServer:
+ def __init__(self, port, scratchdir):
+ self.port = port
+ self.scratchdir = scratchdir
+ self.shell = Shell()
+ self.rpc = None
+ self.pidf = None
+
+ self.shell.check_call("mkdir -p %s" % self.scratchdir)
+ self._process_lock()
+
+ def __del__(self):
+ if self.pidf:
+ self.pidf.close()
+
+ def init(self):
+ Log.debug("Starting xmlrpc server on port %s" % self.port)
+ self.rpc = GeneralXMLRPCServer(("", self.port))
+ self.rpc.register_instance(Handlers(self.scratchdir))
+
+ def serve(self):
+ (status, _) = failsafe(self.rpc.serve_forever)
+ Log.cli("== End ==")
+
+ def _process_lock(self):
+ pid_filename = os.path.basename(__file__).replace("/", "-")
+ pid_filepath = "%s/%s.pid" % (self.scratchdir, pid_filename)
+ self.pidf = open(pid_filepath, "w")
+ try:
+ fcntl.lockf(self.pidf, fcntl.LOCK_EX | fcntl.LOCK_NB)
+ # We have the lock, kick anybody listening on this port
+ self.shell.call("kill $(lsof -t -i:%s)" % self.port)
+ except IOError:
+ Log.error("Another process instance is running")
+ sys.exit(0)
+
+#
+# Server Handler
+#
+
+
+handler_lock = threading.Lock()
+handler_serving_since = Timer()
+
+
+def synchronized(func):
+ def decorator(*args, **kws):
+ handler_lock.acquire()
+ h = args[0]
+ try:
+ h.shell.truncate()
+ ret = func(*args, **kws)
+ return ret
+ except Exception() as err:
+ Log.error(str(err))
+ Log.error(decode(h._log_content()))
+ raise
+ finally:
+ handler_lock.release()
+ handler_serving_since.reset()
+
+ return decorator
+
+
+class Handlers:
+ def __init__(self, scratchdir):
+ self.client_id = None
+ self.scratchdir = scratchdir
+ self.gluster_root = "%s/glusterfs" % self.scratchdir
+ self.shell = Shell(logpath="%s/test-handlers.log" % self.scratchdir)
+
+ def hello(self, id):
+ if not handler_lock.acquire(False):
+ return False
+ try:
+ return self._hello_locked(id)
+ finally:
+ handler_lock.release()
+
+ def _hello_locked(self, id):
+ if handler_serving_since.elapsed_s() > CLIENT_TIMEOUT_S:
+ Log.debug("Disconnected client %s" % self.client_id)
+ self.client_id = None
+
+ if not self.client_id:
+ self.client_id = id
+ handler_serving_since.reset()
+ return True
+
+ return (id == self.client_id)
+
+ @synchronized
+ def ping(self, id=None):
+ if id:
+ return id == self.client_id
+ return True
+
+ @synchronized
+ def bye(self, id):
+ assert id == self.client_id
+ self.client_id = None
+ handler_serving_since.reset()
+ return True
+
+ @synchronized
+ def cleanup(self, id):
+ assert id == self.client_id
+ self.shell.cd(self.gluster_root)
+ self.shell.check_call("PATH=.:$PATH; sudo ./clean_gfs_devserver.sh")
+ return True
+
+ @synchronized
+ def copy(self, id, name, content):
+ with open("%s/%s" % (self.scratchdir, name), "w+") as f:
+ f.write(decode(content))
+ return True
+
+ @synchronized
+ def copygzip(self, id, content):
+ assert id == self.client_id
+ gzipfile = "%s/tmp.tar.gz" % self.scratchdir
+ tarfile = "%s/tmp.tar" % self.scratchdir
+ self.shell.check_call("rm -f %s" % gzipfile)
+ self.shell.check_call("rm -f %s" % tarfile)
+ write_to_file(gzipfile, decode(content))
+
+ self.shell.cd(self.scratchdir)
+ self.shell.check_call("rm -r -f %s" % self.gluster_root)
+ self.shell.check_call("mkdir -p %s" % self.gluster_root)
+
+ self.shell.cd(self.gluster_root)
+ cmds = [
+ "gunzip -f -q %s" % gzipfile,
+ "tar -xvf %s" % tarfile
+ ]
+ return self.shell.call(cmds) == 0
+
+ @synchronized
+ def build(self, id, asan=False):
+ assert id == self.client_id
+ self.shell.cd(self.gluster_root)
+ self.shell.call("make clean")
+ env = "ASAN_ENABLED=1" if asan else ""
+ return self.shell.call(
+ "%s ./extras/distributed-testing/distributed-test-build.sh" % env) == 0
+
+ @synchronized
+ def install(self, id):
+ assert id == self.client_id
+ self.shell.cd(self.gluster_root)
+ return self.shell.call("make install") == 0
+
+ @synchronized
+ def prove(self, id, test, timeout, valgrind="no", asan_noleaks=True):
+ assert id == self.client_id
+ self.shell.cd(self.gluster_root)
+ env = "DEBUG=1 "
+ if valgrind == "memcheck" or valgrind == "yes":
+ cmd = "valgrind"
+ cmd += " --tool=memcheck --leak-check=full --track-origins=yes"
+ cmd += " --show-leak-kinds=all -v prove -v"
+ elif valgrind == "drd":
+ cmd = "valgrind"
+ cmd += " --tool=drd -v prove -v"
+ elif asan_noleaks:
+ cmd = "prove -v"
+ env += "ASAN_OPTIONS=detect_leaks=0 "
+ else:
+ cmd = "prove -v"
+
+ status = self.shell.call(
+ "%s timeout %s %s %s" % (env, timeout, cmd, test))
+
+ if status != 0:
+ return (False, self._log_content())
+ return (True, "")
+
+ def _log_content(self):
+ return encode(self.shell.read_logs())
+
+# ..............................................................................
+# Cli role
+# ..............................................................................
+
+
+class RPCConnection((threading.Thread)):
+ def __init__(self, host, port, path, cb):
+ threading.Thread.__init__(self)
+ self.host = host
+ self.port = port
+ self.path = path
+ self.shell = Shell()
+ self.cb = cb
+ self.stop = False
+ self.proxy = None
+ self.logid = "%s:%s" % (self.host, self.port)
+
+ def connect(self):
+ (status, ret) = failsafe(self._connect)
+ return (status and ret)
+
+ def _connect(self):
+ url = "http://%s:%s" % (self.host, self.port)
+ self.proxy = xmlrpclib.ServerProxy(url, transport=IPTransport())
+ return self.proxy.hello(self.cb.id)
+
+ def disconnect(self):
+ self.stop = True
+
+ def ping(self):
+ return self.proxy.ping()
+
+ def init(self):
+ return self._copy() and self._compile_and_install()
+
+ def run(self):
+ (status, ret) = failsafe(self.init)
+ if not status:
+ self.cb.note_lost_connection(self)
+ return
+ elif not ret:
+ self.cb.note_setup_failed(self)
+ return
+
+ while not self.stop:
+ (status, ret) = failsafe(self._run)
+ if not status or not ret:
+ self.cb.note_lost_connection(self)
+ break
+ time.sleep(0)
+
+ failsafe(self.proxy.bye, (self.cb.id,))
+ Log.debug("%s connection thread stopped" % self.host)
+
+ def _run(self):
+ test = self.cb.next_test()
+ (status, _) = failsafe(self._execute_next, (test,))
+ if not status:
+ self.cb.note_retry(test)
+ return False
+ return True
+
+ def _execute_next(self, test):
+ if not test:
+ time.sleep(1)
+ return
+
+ (status, error) = self.proxy.prove(self.cb.id, test,
+ self.cb.test_timeout,
+ self.cb.valgrind,
+ self.cb.asan_noleaks)
+ if status:
+ self.cb.note_done(test)
+ else:
+ self.cb.note_error(test, error)
+
+ def _compile_and_install(self):
+ Log.debug("<%s> Build " % self.logid)
+ asan = self.cb.asan or self.cb.asan_noleaks
+ return (self.proxy.build(self.cb.id, asan) and
+ self.proxy.install(self.cb.id))
+
+ def _copy(self):
+ return self._copy_gzip()
+
+ def _copy_gzip(self):
+ Log.cli("<%s> copying and compiling %s to remote" %
+ (self.logid, self.path))
+ data = encode(get_file_content(patch_file()))
+ Log.debug("GZIP size = %s B" % len(data))
+ return self.proxy.copygzip(self.cb.id, data)
+
+
+class RPCConnectionPool:
+ def __init__(self, gluster_path, hosts, n, id_rsa):
+ self.gluster_path = gluster_path
+ self.hosts = hosts
+ self.conns = []
+ self.faulty = []
+ self.n = int(len(hosts) / 2) + 1 if not n else n
+ self.id_rsa = id_rsa
+ self.stop = False
+ self.scanner = threading.Thread(target=self._scan_hosts_loop)
+ self.kicker = threading.Thread(target=self._kick_hosts_loop)
+ self.shell = Shell()
+ self.since_start = Timer()
+
+ self.shell.check_call("rm -f %s" % patch_file())
+ self.shell.check_call("tar -zcvf %s ." % patch_file())
+ self.id = md5.new(get_file_content(patch_file())).hexdigest()
+ Log.cli("client UID %s" % self.id)
+ Log.cli("patch UID %s" % PATCH_FILE_UID)
+
+ def __del__(self):
+ self.shell.check_call("rm -f %s" % patch_file())
+
+ def pool_status(self):
+ elapsed_m = int(self.since_start.elapsed_s() / 60)
+ return "%s/%s connected, %smin elapsed" % (len(self.conns), self.n,
+ elapsed_m)
+
+ def connect(self):
+ Log.debug("Starting scanner")
+ self.scanner.start()
+ self.kicker.start()
+
+ def disconnect(self):
+ self.stop = True
+ for conn in self.conns:
+ conn.disconnect()
+
+ def note_lost_connection(self, conn):
+ Log.cli("lost connection to %s" % conn.host)
+ self.conns.remove(conn)
+ self.hosts.append((conn.host, conn.port))
+
+ def note_setup_failed(self, conn):
+ Log.error("Setup failed on %s:%s" % (conn.host, conn.port))
+ self.conns.remove(conn)
+ self.faulty.append((conn.host, conn.port))
+
+ def _scan_hosts_loop(self):
+ Log.debug("Scanner thread started")
+ while not self.stop:
+ failsafe(self._scan_hosts)
+ time.sleep(5)
+
+ def _scan_hosts(self):
+ if len(self.hosts) == 0 and len(self.conns) == 0:
+ Log.error("no more hosts available to loadbalance")
+ sys.exit(1)
+
+ for (host, port) in self.hosts:
+ if (len(self.conns) >= self.n) or self.stop:
+ break
+ self._scan_host(host, port)
+
+ def _scan_host(self, host, port):
+ Log.debug("scanning %s:%s" % (host, port))
+ c = RPCConnection(host, port, self.gluster_path, self)
+ (status, result) = failsafe(c.connect)
+ if status and result:
+ self.hosts.remove((host, port))
+ Log.debug("Connected to %s:%s" % (host, port))
+ self.conns.append(c)
+ c.start()
+ Log.debug("%s / %s connected" % (len(self.conns), self.n))
+ else:
+ Log.debug("Failed to connect to %s:%s" % (host, port))
+
+ def _kick_hosts_loop(self):
+ Log.debug("Kick thread started")
+ while not self.stop:
+ time.sleep(10)
+ failsafe(self._kick_hosts)
+
+ Log.debug("Kick thread stopped")
+
+ def _is_pingable(self, host, port):
+ c = RPCConnection(host, port, self.gluster_path, self)
+ failsafe(c.connect)
+ (status, result) = failsafe(c.ping)
+ return status and result
+
+ def _kick_hosts(self):
+ # Do not kick hosts if we have the optimal number of connections
+ if (len(self.conns) >= self.n) or self.stop:
+ Log.debug("Skip kicking hosts")
+ return
+
+ # Check and if dead kick all hosts
+ for (host, port) in self.hosts:
+ if self.stop:
+ Log.debug("Break kicking hosts")
+ break
+
+ if self._is_pingable(host, port):
+ Log.debug("Host=%s is alive. Won't kick" % host)
+ continue
+
+ Log.debug("Kicking %s" % host)
+ mypath = sys.argv[0]
+ myname = os.path.basename(mypath)
+ destpath = "/tmp/%s" % myname
+ sh = Shell()
+ sh.scp(host, mypath, destpath, self.id_rsa)
+ sh.ssh(host, "nohup %s --server &>> %s.log &" %
+ (destpath, destpath), self.id_rsa)
+
+ def join(self):
+ self.scanner.join()
+ self.kicker.join()
+ for c in self.conns:
+ c.join()
+
+
+# ..............................................................................
+# test role
+# ..............................................................................
+
+class TestRunner(RPCConnectionPool):
+ def __init__(self, gluster_path, hosts, n, tests, flaky_tests, valgrind,
+ asan, asan_noleaks, id_rsa, test_timeout):
+ RPCConnectionPool.__init__(self, gluster_path, self._parse_hosts(hosts),
+ n, id_rsa)
+ self.flaky_tests = flaky_tests.split(" ")
+ self.pending = []
+ self.done = []
+ self.error = []
+ self.retry = {}
+ self.error_logs = []
+ self.stats_timer = Timer()
+ self.valgrind = valgrind
+ self.asan = asan
+ self.asan_noleaks = asan_noleaks
+ self.test_timeout = test_timeout
+
+ self.tests = self._get_tests(tests)
+
+ Log.debug("tests: %s" % self.tests)
+
+ def _get_tests(self, tests):
+ if not tests or tests == "all":
+ return self._not_flaky(self._all())
+ elif tests == "flaky":
+ return self.flaky_tests
+ else:
+ return self._not_flaky(tests.strip().split(" "))
+
+ def run(self):
+ self.connect()
+ self.join()
+ return len(self.error)
+
+ def _pretty_print(self, data):
+ if isinstance(data, list):
+ str = ""
+ for i in data:
+ str = "%s %s" % (str, i)
+ return str
+ return "%s" % data
+
+ def print_result(self):
+ Log.cli("== RESULTS ==")
+ Log.cli("SUCCESS : %s" % len(self.done))
+ Log.cli("ERRORS : %s" % len(self.error))
+ Log.cli("== ERRORS ==")
+ Log.cli(self._pretty_print(self.error))
+ Log.cli("== LOGS ==")
+ Log.cli(self._pretty_print(self.error_logs))
+ Log.cli("== END ==")
+
+ def next_test(self):
+ if len(self.tests):
+ test = self.tests.pop()
+ self.pending.append(test)
+ return test
+
+ if not len(self.pending):
+ self.disconnect()
+
+ return None
+
+ def _pct_completed(self):
+ total = len(self.tests) + len(self.pending) + len(self.done)
+ total += len(self.error)
+ completed = len(self.done) + len(self.error)
+ return 0 if not total else int(completed / total * 100)
+
+ def note_done(self, test):
+ Log.cli("%s PASS (%s%% done) (%s)" % (test, self._pct_completed(),
+ self.pool_status()))
+ self.pending.remove(test)
+ self.done.append(test)
+ if test in self.retry:
+ del self.retry[test]
+
+ def note_error(self, test, errstr):
+ Log.cli("%s FAIL" % test)
+ self.pending.remove(test)
+ if test not in self.retry:
+ self.retry[test] = 1
+
+ if errstr:
+ path = "%s/%s-%s.log" % ("/tmp", test.replace("/", "-"),
+ self.retry[test])
+ failsafe(write_to_file, (path, decode(errstr),))
+ self.error_logs.append(path)
+
+ if self.retry[test] < MAX_ATTEMPTS:
+ self.retry[test] += 1
+ Log.debug("retry test %s attempt %s" % (test, self.retry[test]))
+ self.tests.append(test)
+ else:
+ Log.debug("giveup attempt test %s" % test)
+ del self.retry[test]
+ self.error.append(test)
+
+ def note_retry(self, test):
+ Log.cli("retry %s on another host" % test)
+ self.pending.remove(test)
+ self.tests.append(test)
+
+ #
+ # test classifications
+ #
+ def _all(self):
+ return self._list_tests(["tests"], recursive=True)
+
+ def _not_flaky(self, tests):
+ for t in self.flaky_tests:
+ if t in tests:
+ tests.remove(t)
+ return tests
+
+ def _list_tests(self, prefixes, recursive=False, ignore_ifnotexist=False):
+ tests = []
+ for prefix in prefixes:
+ real_path = "%s/%s" % (self.gluster_path, prefix)
+ if not os.path.exists(real_path) and ignore_ifnotexist:
+ continue
+ for f in os.listdir(real_path):
+ if os.path.isdir(real_path + "/" + f):
+ if recursive:
+ tests += self._list_tests([prefix + "/" + f], recursive)
+ else:
+ if re.match(r".*\.t$", f):
+ tests += [prefix + "/" + f]
+ return tests
+
+ def _parse_hosts(self, hosts):
+ ret = []
+ for h in args.hosts.split(" "):
+ ret.append((h, DEFAULT_PORT))
+ Log.debug(ret)
+ return ret
+
+# ..............................................................................
+# Roles entry point
+# ..............................................................................
+
+
+def run_as_server(args):
+ if not args.server_path:
+ Log.error("please provide server path")
+ return 1
+
+ server = TestServer(args.port, args.server_path)
+ server.init()
+ server.serve()
+ return 0
+
+
+def run_as_tester(args):
+ Log.header("GLUSTER TEST CLI")
+
+ Log.debug("args = %s" % args)
+
+ tests = TestRunner(args.gluster_path, args.hosts, args.n, args.tests,
+ args.flaky_tests, valgrind=args.valgrind,
+ asan=args.asan, asan_noleaks=args.asan_noleaks,
+ id_rsa=args.id_rsa, test_timeout=args.test_timeout)
+ result = tests.run()
+ tests.print_result()
+ return result
+
+# ..............................................................................
+# main
+# ..............................................................................
+
+
+def main(args):
+ if args.v:
+ Log.LOGLEVEL = LogLevel.DEBUG
+
+ if args.server and args.tester:
+ Log.error("Invalid arguments. More than one role specified")
+ sys.exit(1)
+
+ if args.server:
+ sys.exit(run_as_server(args))
+ elif args.tester:
+ sys.exit(run_as_tester(args))
+ else:
+ Log.error("please specify a mode for CI")
+ parser.print_help()
+ sys.exit(1)
+
+
+parser = argparse.ArgumentParser(description="Gluster CI")
+
+# server role
+parser.add_argument("--server", help="start server", action="store_true")
+parser.add_argument("--server_path", help="server scratch space",
+ default="/tmp/gluster-test")
+parser.add_argument("--host", help="server address to listen", default="")
+parser.add_argument("--port", help="server port to listen",
+ type=int, default=DEFAULT_PORT)
+# test role
+parser.add_argument("--tester", help="start tester", action="store_true")
+parser.add_argument("--valgrind[=memcheck,drd]",
+ help="run tests with valgrind tool 'memcheck' or 'drd'",
+ default="no")
+parser.add_argument("--asan", help="test with asan enabled",
+ action="store_true")
+parser.add_argument("--asan-noleaks", help="test with asan but no mem leaks",
+ action="store_true")
+parser.add_argument("--tests", help="all/flaky/list of tests", default=None)
+parser.add_argument("--flaky_tests", help="list of flaky tests", default=None)
+parser.add_argument("--n", help="max number of machines to use", type=int,
+ default=0)
+parser.add_argument("--hosts", help="list of worker machines")
+parser.add_argument("--gluster_path", help="gluster path to test",
+ default=os.getcwd())
+parser.add_argument("--id-rsa", help="private key to use for ssh",
+ default=None)
+parser.add_argument("--test-timeout",
+ help="test timeout in sec (default 15min)",
+ default=TEST_TIMEOUT_S)
+# general
+parser.add_argument("-v", help="verbose", action="store_true")
+parser.add_argument("--address_family", help="IPv6 or IPv4 to use",
+ default=ADDRESS_FAMILY)
+
+args = parser.parse_args()
+
+main(args)
diff --git a/extras/distributed-testing/distributed-test.sh b/extras/distributed-testing/distributed-test.sh
new file mode 100755
index 00000000000..8f1e0310f33
--- /dev/null
+++ b/extras/distributed-testing/distributed-test.sh
@@ -0,0 +1,95 @@
+#!/bin/bash
+
+source ./extras/distributed-testing/distributed-test-env
+
+N=0
+TESTS='all'
+FLAKY=$KNOWN_FLAKY_TESTS
+BROKEN=$BROKEN_TESTS
+TEST_TIMEOUT_S=900
+ADDRESS_FAMILY='IPv4'
+
+FLAGS=""
+
+function print_env {
+ echo "Settings:"
+ echo "N=$N"
+ echo -e "-------\nHOSTS\n$HOSTS\n-------"
+ echo -e "TESTS\n$TESTS\n-------"
+ echo -e "SKIP\n$FLAKY $BROKEN\n-------"
+ echo -e "TEST_TIMEOUT_S=$TEST_TIMEOUT_S s\n"
+}
+
+function cleanup {
+ rm -f /tmp/test*.log
+}
+
+function usage {
+ echo "Usage: $0 [-h or --help] [-v or --verbose]
+ [--all] [--flaky] [--smoke] [--broken]
+ [--valgrind] [--asan] [--asan-noleaks]
+ [--hosts <hosts>] [-n <parallelism>]
+ [--tests <tests>]
+ [--id-rsa <ssh private key>]
+ [--address_family <IPv4 or IPv6>]
+ "
+}
+
+function parse_args () {
+ args=`getopt \
+ -o hvn: \
+ --long help,verbose,address_family:,valgrind,asan,asan-noleaks,all,\
+smoke,flaky,broken,hosts:,tests:,id-rsa:,test-timeout: \
+ -n 'fb-remote-test.sh' -- "$@"`
+
+ if [ $? != 0 ]; then
+ echo "Error parsing getopt"
+ exit 1
+ fi
+
+ eval set -- "$args"
+
+ while true; do
+ case "$1" in
+ -h | --help) usage ; exit 1 ;;
+ -v | --verbose) FLAGS="$FLAGS -v" ; shift ;;
+ --address_family) ADDRESS_FAMILY=$2; shift 2 ;;
+ --valgrind) FLAGS="$FLAGS --valgrind" ; shift ;;
+ --asan-noleaks) FLAGS="$FLAGS --asan-noleaks"; shift ;;
+ --asan) FLAGS="$FLAGS --asan" ; shift ;;
+ --hosts) HOSTS=$2; shift 2 ;;
+ --tests) TESTS=$2; FLAKY= ; BROKEN= ; shift 2 ;;
+ --test-timeout) TEST_TIMEOUT_S=$2; shift 2 ;;
+ --all) TESTS='all' ; shift 1 ;;
+ --flaky) TESTS=$FLAKY; FLAKY= ; shift 1 ;;
+ --smoke) TESTS=$SMOKE_TESTS; shift 1 ;;
+ --broken) TESTS=$BROKEN_TESTS; FLAKY= ; BROKEN= ; shift 1 ;;
+ --id-rsa) FLAGS="$FLAGS --id-rsa $2" ; shift 2 ;;
+ -n) N=$2; shift 2 ;;
+ *) break ;;
+ esac
+ done
+ run_tests_args="$@"
+}
+
+function main {
+ parse_args "$@"
+
+ if [ -z "$HOSTS" ]; then
+ echo "Please provide hosts to run the tests in"
+ exit -1
+ fi
+
+ print_env
+
+ cleanup
+
+ "extras/distributed-testing/distributed-test-runner.py" $FLAGS --tester \
+ --n "$N" --hosts "$HOSTS" --tests "$TESTS" \
+ --flaky_tests "$FLAKY $BROKEN" --test-timeout "$TEST_TIMEOUT_S" \
+ --address_family "$ADDRESS_FAMILY"
+
+ exit $?
+}
+
+main "$@"
diff --git a/extras/ec-heal-script/README.md b/extras/ec-heal-script/README.md
new file mode 100644
index 00000000000..aaefd6681f6
--- /dev/null
+++ b/extras/ec-heal-script/README.md
@@ -0,0 +1,69 @@
+# gluster-heal-scripts
+Scripts to correct extended attributes of fragments of files to make them healble.
+
+Following are the guidelines/suggestions to use these scripts.
+
+1 - Passwordless ssh should be setup for all the nodes of the cluster.
+
+2 - Scripts should be executed from one of these nodes.
+
+3 - Make sure NO "IO" is going on for the files for which we are running
+these two scripts.
+
+4 - There should be no heal going on for the file for which xattrs are being
+set by correct_pending_heals.sh. Disable the self heal while running this script.
+
+5 - All the bricks of the volume should be UP to identify good and bad fragments
+and to decide if an entry is healble or not.
+
+6 - If correct_pending_heals.sh is stopped in the middle while it was processing
+healble entries, it is suggested to re-run gfid_needing_heal_parallel.sh to create
+latest list of healble and non healble entries and "potential_heal" "can_not_heal" files.
+
+7 - Based on the number of entries, these files might take time to get and set the
+stats and xattrs of entries.
+
+8 - A backup of the fragments will be taken on <brick path>/.glusterfs/correct_pending_heals
+ directory with a file name same as gfid.
+
+9 - Once the correctness of the file gets verified by user, these backup should be removed.
+
+10 - Make sure we have enough space on bricks to take these backups.
+
+11 - At the end this will create two files -
+ 1 - modified_and_backedup_files - Contains list of files which have been modified and should be healed.
+ 2 - can_not_heal - Contains list of files which can not be healed.
+
+12 - It is suggested that the integrity of the data of files, which were modified and healed,
+ should be checked by the user.
+
+
+Usage:
+
+Following are the sequence of steps to use these scripts -
+
+1 - ./gfid_needing_heal_parallel.sh <volume name>
+
+ Execute gfid_needing_heal_parallel.sh with volume name to create list of files which could
+ be healed and can not be healed. It creates "potential_heal" and "can_not_heal" files.
+ During execution, it also displays the list of files on consol with the verdict.
+
+2 - ./correct_pending_heals.sh
+
+ Execute correct_pending_heals.sh without any argument. This script processes entries present
+ in "heal" file. It asks user to enter how many files we want to process in one attempt.
+ Once the count is provided, this script will fetch the entries one by one from "potential_heal" file and takes necessary action.
+ If at this point also a file can not be healed, it will be pushed to "can_not_heal" file.
+ If a file can be healed, this script will modify the xattrs of that file fragments and create an entry in "modified_and_backedup_files" file
+
+3 - At the end, all the entries of "potential_heal" will be processed and based on the processing only two files will be left.
+
+ 1 - modified_and_backedup_files - Contains list of files which have been modified and should be healed.
+ 2 - can_not_heal - Contains list of files which can not be healed.
+
+Logs and other files -
+
+1 - modified_and_backedup_files - It contains all the files which could be healed and the location of backup of each fragments.
+2 - can_not_heal - It contains all the files which can not be healed.
+3 - potential_heal - List of files which could be healed and should be processed by "correct_pending_heals.sh"
+4 - /var/log/glusterfs/ec-heal-script.log - It contains logs of both the files.
diff --git a/extras/ec-heal-script/correct_pending_heals.sh b/extras/ec-heal-script/correct_pending_heals.sh
new file mode 100755
index 00000000000..c9f19dd7c89
--- /dev/null
+++ b/extras/ec-heal-script/correct_pending_heals.sh
@@ -0,0 +1,415 @@
+#!/bin/bash
+# Copyright (c) 2019-2020 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
+# This script finally resets the xattrs of all the fragments of a file
+# which can be healed as per gfid_needing_heal_parallel.sh.
+# gfid_needing_heal_parallel.sh will produce two files, potential_heal and can_not_heal.
+# This script takes potential_heal as input and resets xattrs of all the fragments
+# of those files present in this file and which could be healed as per
+# trusted.ec.size xattar of the file else it will place the entry in can_not_heal
+# file. Those entries which must be healed will be place in must_heal file
+# after setting xattrs so that user can track those files.
+
+
+MOD_BACKUP_FILES="modified_and_backedup_files"
+CAN_NOT_HEAL="can_not_heal"
+LOG_DIR="/var/log/glusterfs"
+LOG_FILE="$LOG_DIR/ec-heal-script.log"
+LINE_SEP="==================================================="
+
+function heal_log()
+{
+ echo "$1" >> "$LOG_FILE"
+}
+
+function desc ()
+{
+ echo ""
+ echo "This script finally resets the xattrs of all the fragments of a file
+which can be healed as per gfid_needing_heal_parallel.sh.
+gfid_needing_heal_parallel.sh will produce two files, potential_heal and can_not_heal.
+This script takes potential_heal as input and resets xattrs of all the fragments
+of those files present in this file and which could be healed as per
+trusted.ec.size xattar of the file else it will place the entry in can_not_heal
+file. Those entries which must be healed will be place in must_heal file
+after setting xattrs so that user can track those files."
+}
+
+function _init ()
+{
+ if [ $# -ne 0 ]
+ then
+ echo "usage: $0"
+ desc
+ exit 2
+ fi
+
+ if [ ! -f "potential_heal" ]
+ then
+ echo "Nothing to correct. File "potential_heal" does not exist"
+ echo ""
+ desc
+ exit 2
+ fi
+}
+
+function total_file_size_in_hex()
+{
+ local frag_size=$1
+ local size=0
+ local hex_size=""
+
+ size=$((frag_size * 4))
+ hex_size=$(printf '0x%016x' $size)
+ echo "$hex_size"
+}
+
+function backup_file_fragment()
+{
+ local file_host=$1
+ local file_entry=$2
+ local gfid_actual_paths=$3
+ local brick_root=""
+ local temp=""
+ local backup_dir=""
+ local cmd=""
+ local gfid=""
+
+ brick_root=$(echo "$file_entry" | cut -d "#" -f 1)
+ temp=$(echo "$(basename "$BASH_SOURCE")" | cut -d '.' -f 1)
+ backup_dir=$(echo "${brick_root}/.glusterfs/${temp}")
+ file_entry=${file_entry//#}
+
+ gfid=$(echo "${gfid_actual_paths}" | cut -d '|' -f 1 | cut -d '/' -f 5)
+ echo "${file_host}:${backup_dir}/${gfid}" >> "$MOD_BACKUP_FILES"
+
+ cmd="mkdir -p ${backup_dir} && yes | cp -af ${file_entry} ${backup_dir}/${gfid} 2>/dev/null"
+ ssh -n "${file_host}" "${cmd}"
+}
+
+function set_frag_xattr ()
+{
+ local file_host=$1
+ local file_entry=$2
+ local good=$3
+ local cmd1=""
+ local cmd2=""
+ local cmd=""
+ local version="0x00000000000000010000000000000001"
+ local dirty="0x00000000000000010000000000000001"
+
+ if [[ $good -eq 0 ]]
+ then
+ version="0x00000000000000000000000000000000"
+ fi
+
+ cmd1=" setfattr -n trusted.ec.version -v ${version} ${file_entry} &&"
+ cmd2=" setfattr -n trusted.ec.dirty -v ${dirty} ${file_entry}"
+ cmd=${cmd1}${cmd2}
+ ssh -n "${file_host}" "${cmd}"
+}
+
+function set_version_dirty_xattr ()
+{
+ local file_paths=$1
+ local good=$2
+ local gfid_actual_paths=$3
+ local file_entry=""
+ local file_host=""
+ local bpath=""
+
+ for bpath in ${file_paths//,/ }
+ do
+ file_host=$(echo "$bpath" | cut -d ":" -f 1)
+ file_entry=$(echo "$bpath" | cut -d ":" -f 2)
+ backup_file_fragment "$file_host" "$file_entry" "$gfid_actual_paths"
+ file_entry=${file_entry//#}
+ set_frag_xattr "$file_host" "$file_entry" "$good"
+ done
+}
+
+function match_size_xattr_quorum ()
+{
+ local file_paths=$1
+ local file_entry=""
+ local file_host=""
+ local cmd=""
+ local size_xattr=""
+ local bpath=""
+ declare -A xattr_count
+
+ for bpath in ${file_paths//,/ }
+ do
+ size_xattr=""
+ file_host=$(echo "$bpath" | cut -d ":" -f 1)
+ file_entry=$(echo "$bpath" | cut -d ":" -f 2)
+ file_entry=${file_entry//#}
+
+ cmd="getfattr -n trusted.ec.size -d -e hex ${file_entry} 2>/dev/null | grep -w "trusted.ec.size" | cut -d '=' -f 2"
+ size_xattr=$(ssh -n "${file_host}" "${cmd}")
+ if [[ -n $size_xattr ]]
+ then
+ count=$((xattr_count["$size_xattr"] + 1))
+ xattr_count["$size_xattr"]=${count}
+ if [[ $count -ge 4 ]]
+ then
+ echo "${size_xattr}"
+ return
+ fi
+ fi
+ done
+ echo "False"
+}
+
+function match_version_xattr ()
+{
+ local file_paths=$1
+ local file_entry=""
+ local file_host=""
+ local cmd=""
+ local version=""
+ local bpath=""
+ declare -A ver_count
+
+ for bpath in ${file_paths//,/ }
+ do
+ version=""
+ file_host=$(echo "$bpath" | cut -d ":" -f 1)
+ file_entry=$(echo "$bpath" | cut -d ":" -f 2)
+ file_entry=${file_entry//#}
+
+ cmd="getfattr -n trusted.ec.version -d -e hex ${file_entry} 2>/dev/null | grep -w "trusted.ec.version" | cut -d '=' -f 2"
+ version=$(ssh -n "${file_host}" "${cmd}")
+ ver_count["$version"]=$((ver_count["$version"] + 1))
+ done
+ for key in "${ver_count[@]}"
+ do
+ if [[ $key -ge 4 ]]
+ then
+ echo "True"
+ return
+ else
+ echo "False"
+ return
+ fi
+ done
+}
+
+function match_stat_size_with_xattr ()
+{
+ local bpath=$1
+ local size=$2
+ local file_stat=$3
+ local xattr=$4
+ local file_entry=""
+ local file_host=""
+ local cmd=""
+ local stat_output=""
+ local hex_size=""
+
+ file_host=$(echo "$bpath" | cut -d ":" -f 1)
+ file_entry=$(echo "$bpath" | cut -d ":" -f 2)
+
+ file_entry=${file_entry//#}
+ cmd="stat --format=%F:%B:%s $file_entry 2>/dev/null"
+ stat_output=$(ssh -n "${file_host}" "${cmd}")
+ echo "$stat_output" | grep -w "${file_stat}" > /dev/null
+
+ if [[ $? -eq 0 ]]
+ then
+ cmd="getfattr -n trusted.ec.size -d -e hex ${file_entry} 2>/dev/null | grep -w "trusted.ec.size" | cut -d '=' -f 2"
+ hex_size=$(ssh -n "${file_host}" "${cmd}")
+
+ if [[ -z $hex_size || "$hex_size" != "$xattr" ]]
+ then
+ echo "False"
+ return
+ fi
+ size_diff=$(printf '%d' $(( size - hex_size )))
+ if [[ $size_diff -gt 2047 ]]
+ then
+ echo "False"
+ return
+ else
+ echo "True"
+ return
+ fi
+ else
+ echo "False"
+ return
+ fi
+}
+
+function find_file_paths ()
+{
+ local bpath=$1
+ local file_entry=""
+ local file_host=""
+ local cmd=""
+ local brick_root=""
+ local gfid=""
+ local actual_path=""
+ local gfid_path=""
+
+ file_host=$(echo "$bpath" | cut -d ":" -f 1)
+ file_entry=$(echo "$bpath" | cut -d ":" -f 2)
+ brick_root=$(echo "$file_entry" | cut -d "#" -f 1)
+
+ gfid=$(echo "${file_entry}" | grep ".glusterfs")
+ if [[ -n "$gfid" ]]
+ then
+ gfid_path=$(echo "$file_entry" | cut -d "#" -f 2)
+ file_entry=${file_entry//#}
+ cmd="find -L '$brick_root' -samefile '$file_entry' 2>/dev/null | grep -v '.glusterfs' "
+ actual_path=$(ssh -n "${file_host}" "${cmd}")
+ #removing absolute path so that user can refer this from mount point
+ actual_path=${actual_path#"$brick_root"}
+ else
+ actual_path=$(echo "$file_entry" | cut -d "#" -f 2)
+ file_entry=${file_entry//#}
+ cmd="find -L '$brick_root' -samefile '$file_entry' 2>/dev/null | grep '.glusterfs' "
+ gfid_path=$(ssh -n "${file_host}" "${cmd}")
+ gfid_path=${gfid_path#"$brick_root"}
+ fi
+
+ echo "${gfid_path}|${actual_path}"
+}
+
+function log_can_not_heal ()
+{
+ local gfid_actual_paths=$1
+ local file_paths=$2
+ file_paths=${file_paths//#}
+
+ echo "${LINE_SEP}" >> "$CAN_NOT_HEAL"
+ echo "Can Not Heal : $(echo "$gfid_actual_paths" | cut -d '|' -f 2)" >> "$CAN_NOT_HEAL"
+ for bpath in ${file_paths//,/ }
+ do
+ echo "${bpath}" >> "$CAN_NOT_HEAL"
+ done
+}
+
+function check_all_frag_and_set_xattr ()
+{
+ local file_paths=$1
+ local total_size=$2
+ local file_stat=$3
+ local bpath=""
+ local healthy_count=0
+ local match="False"
+ local matching_bricks=""
+ local bad_bricks=""
+ local gfid_actual_paths=""
+
+ for bpath in ${file_paths//,/ }
+ do
+ if [[ -n "$gfid_actual_paths" ]]
+ then
+ break
+ fi
+ gfid_actual_paths=$(find_file_paths "$bpath")
+ done
+
+ match=$(match_size_xattr_quorum "$file_paths")
+
+# echo "${match} : $bpath" >> "$MOD_BACKUP_FILES"
+
+ if [[ "$match" != "False" ]]
+ then
+ xattr="$match"
+ for bpath in ${file_paths//,/ }
+ do
+ match="False"
+ match=$(match_stat_size_with_xattr "$bpath" "$total_size" "$file_stat" "$xattr")
+ if [[ "$match" == "True" ]]
+ then
+ matching_bricks="${bpath},${matching_bricks}"
+ healthy_count=$((healthy_count + 1))
+ else
+ bad_bricks="${bpath},${bad_bricks}"
+ fi
+ done
+ fi
+
+ if [[ $healthy_count -ge 4 ]]
+ then
+ match="True"
+ echo "${LINE_SEP}" >> "$MOD_BACKUP_FILES"
+ echo "Modified : $(echo "$gfid_actual_paths" | cut -d '|' -f 2)" >> "$MOD_BACKUP_FILES"
+ set_version_dirty_xattr "$matching_bricks" 1 "$gfid_actual_paths"
+ set_version_dirty_xattr "$bad_bricks" 0 "$gfid_actual_paths"
+ else
+ log_can_not_heal "$gfid_actual_paths" "${file_paths}"
+ fi
+
+ echo "$match"
+}
+function set_xattr()
+{
+ local count=$1
+ local heal_entry=""
+ local file_stat=""
+ local frag_size=""
+ local total_size=""
+ local file_paths=""
+ local num=""
+ local can_heal_count=0
+
+ heal_log "Started $(basename $BASH_SOURCE) on $(date) "
+
+ while read -r heal_entry
+ do
+ heal_log "$LINE_SEP"
+ heal_log "${heal_entry}"
+
+ file_stat=$(echo "$heal_entry" | cut -d "|" -f 1)
+ frag_size=$(echo "$file_stat" | rev | cut -d ":" -f 1 | rev)
+ total_size="$(total_file_size_in_hex "$frag_size")"
+ file_paths=$(echo "$heal_entry" | cut -d "|" -f 2)
+ match=$(check_all_frag_and_set_xattr "$file_paths" "$total_size" "$file_stat")
+ if [[ "$match" == "True" ]]
+ then
+ can_heal_count=$((can_heal_count + 1))
+ fi
+
+ sed -i '1d' potential_heal
+ count=$((count - 1))
+ if [ $count == 0 ]
+ then
+ num=$(cat potential_heal | wc -l)
+ heal_log "$LINE_SEP"
+ heal_log "${1} : Processed"
+ heal_log "${can_heal_count} : Modified to Heal"
+ heal_log "$((${1} - can_heal_count)) : Moved to can_not_heal."
+ heal_log "${num} : Pending as Potential Heal"
+ exit 0
+ fi
+
+ done < potential_heal
+}
+
+function main ()
+{
+ local count=0
+
+ read -p "Number of files to correct: [choose between 1-1000] (0 for All):" count
+ if [[ $count -lt 0 || $count -gt 1000 ]]
+ then
+ echo "Provide correct value:"
+ exit 2
+ fi
+
+ if [[ $count -eq 0 ]]
+ then
+ count=$(cat potential_heal | wc -l)
+ fi
+ set_xattr "$count"
+}
+
+_init "$@" && main "$@"
diff --git a/extras/ec-heal-script/gfid_needing_heal_parallel.sh b/extras/ec-heal-script/gfid_needing_heal_parallel.sh
new file mode 100755
index 00000000000..d7f53c97c33
--- /dev/null
+++ b/extras/ec-heal-script/gfid_needing_heal_parallel.sh
@@ -0,0 +1,278 @@
+#!/bin/bash
+# Copyright (c) 2019-2020 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
+# This script provides a list of all the files which can be healed or not healed.
+# It also generates two files, potential_heal and can_not_heal, which contains the information
+# of all theose files. These files could be used by correct_pending_heals.sh to correct
+# the fragmnets so that files could be healed by shd.
+
+CAN_NOT_HEAL="can_not_heal"
+CAN_HEAL="potential_heal"
+LINE_SEP="==================================================="
+LOG_DIR="/var/log/glusterfs"
+LOG_FILE="$LOG_DIR/ec-heal-script.log"
+
+function heal_log()
+{
+ echo "$1" >> "$LOG_FILE"
+}
+
+function _init ()
+{
+ if [ $# -ne 1 ]; then
+ echo "usage: $0 <gluster volume name>";
+ echo "This script provides a list of all the files which can be healed or not healed.
+It also generates two files, potential_heal and can_not_heal, which contains the information
+of all theose files. These files could be used by correct_pending_heals.sh to correct
+the fragmnets so that files could be healed by shd."
+ exit 2;
+ fi
+
+ volume=$1;
+}
+
+function get_pending_entries ()
+{
+ local volume_name=$1
+
+ gluster volume heal "$volume_name" info | grep -v ":/" | grep -v "Number of entries" | grep -v "Status:" | sort -u | sed '/^$/d'
+}
+
+function get_entry_path_on_brick()
+{
+ local path="$1"
+ local gfid_string=""
+ if [[ "${path:0:1}" == "/" ]];
+ then
+ echo "$path"
+ else
+ gfid_string="$(echo "$path" | cut -f2 -d':' | cut -f1 -d '>')"
+ echo "/.glusterfs/${gfid_string:0:2}/${gfid_string:2:2}/$gfid_string"
+ fi
+}
+
+function run_command_on_server()
+{
+ local subvolume="$1"
+ local host="$2"
+ local cmd="$3"
+ local output
+ output=$(ssh -n "${host}" "${cmd}")
+ if [ -n "$output" ]
+ then
+ echo "$subvolume:$output"
+ fi
+}
+
+function get_entry_path_all_bricks ()
+{
+ local entry="$1"
+ local bricks="$2"
+ local cmd=""
+ for brick in $bricks
+ do
+ echo "${brick}#$(get_entry_path_on_brick "$entry")"
+ done | tr '\n' ','
+}
+
+function get_stat_for_entry_from_all_bricks ()
+{
+ local entry="$1"
+ local bricks="$2"
+ local subvolume=0
+ local host=""
+ local bpath=""
+ local cmd=""
+
+ for brick in $bricks
+ do
+ if [[ "$((subvolume % 6))" == "0" ]]
+ then
+ subvolume=$((subvolume+1))
+ fi
+ host=$(echo "$brick" | cut -f1 -d':')
+ bpath=$(echo "$brick" | cut -f2 -d':')
+
+ cmd="stat --format=%F:%B:%s $bpath$(get_entry_path_on_brick "$entry") 2>/dev/null"
+ run_command_on_server "$subvolume" "${host}" "${cmd}" &
+ done | sort | uniq -c | sort -rnk1
+}
+
+function get_bricks_from_volume()
+{
+ local v=$1
+ gluster volume info "$v" | grep -E "^Brick[0-9][0-9]*:" | cut -f2- -d':'
+}
+
+function print_entry_gfid()
+{
+ local host="$1"
+ local dirpath="$2"
+ local entry="$3"
+ local gfid
+ gfid="$(ssh -n "${host}" "getfattr -d -m. -e hex $dirpath/$entry 2>/dev/null | grep trusted.gfid=|cut -f2 -d'='")"
+ echo "$entry" - "$gfid"
+}
+
+function print_brick_directory_info()
+{
+ local h="$1"
+ local dirpath="$2"
+ while read -r e
+ do
+ print_entry_gfid "${h}" "${dirpath}" "${e}"
+ done < <(ssh -n "${h}" "ls $dirpath 2>/dev/null")
+}
+
+function print_directory_info()
+{
+ local entry="$1"
+ local bricks="$2"
+ local h
+ local b
+ local gfid
+ for brick in $bricks;
+ do
+ h="$(echo "$brick" | cut -f1 -d':')"
+ b="$(echo "$brick" | cut -f2 -d':')"
+ dirpath="$b$(get_entry_path_on_brick "$entry")"
+ print_brick_directory_info "${h}" "${dirpath}" &
+ done | sort | uniq -c
+}
+
+function print_entries_needing_heal()
+{
+ local quorum=0
+ local entry="$1"
+ local bricks="$2"
+ while read -r line
+ do
+ quorum=$(echo "$line" | awk '{print $1}')
+ if [[ "$quorum" -lt 4 ]]
+ then
+ echo "$line - Not in Quorum"
+ else
+ echo "$line - In Quorum"
+ fi
+ done < <(print_directory_info "$entry" "$bricks")
+}
+
+function find_file_paths ()
+{
+ local bpath=$1
+ local file_entry=""
+ local file_host=""
+ local cmd=""
+ local brick_root=""
+ local gfid=""
+ local actual_path=""
+ local gfid_path=""
+
+ file_host=$(echo "$bpath" | cut -d ":" -f 1)
+ file_entry=$(echo "$bpath" | cut -d ":" -f 2)
+ brick_root=$(echo "$file_entry" | cut -d "#" -f 1)
+
+ gfid=$(echo "${file_entry}" | grep ".glusterfs")
+
+ if [[ -n "$gfid" ]]
+ then
+ gfid_path=$(echo "$file_entry" | cut -d "#" -f 2)
+ file_entry=${file_entry//#}
+ cmd="find -L '$brick_root' -samefile '$file_entry' 2>/dev/null | grep -v '.glusterfs' "
+ actual_path=$(ssh -n "${file_host}" "${cmd}")
+ #removing absolute path so that user can refer this from mount point
+ actual_path=${actual_path#"$brick_root"}
+ else
+ actual_path=$(echo "$file_entry" | cut -d "#" -f 2)
+ file_entry=${file_entry//#}
+ cmd="find -L '$brick_root' -samefile '$file_entry' 2>/dev/null | grep '.glusterfs' "
+ gfid_path=$(ssh -n "${file_host}" "${cmd}")
+ gfid_path=${gfid_path#"$brick_root"}
+ fi
+
+ echo "${gfid_path}|${actual_path}"
+}
+
+function log_can_not_heal ()
+{
+ local gfid_actual_paths=$1
+ local file_paths=$2
+ file_paths=${file_paths//#}
+
+ echo "${LINE_SEP}" >> "$CAN_NOT_HEAL"
+ echo "Can Not Heal : $(echo "$gfid_actual_paths" | cut -d '|' -f 2)" >> "$CAN_NOT_HEAL"
+ for bpath in ${file_paths//,/ }
+ do
+ echo "${bpath}" >> "$CAN_NOT_HEAL"
+ done
+}
+
+function main ()
+{
+ local bricks=""
+ local quorum=0
+ local stat_info=""
+ local file_type=""
+ local gfid_actual_paths=""
+ local bpath=""
+ local file_paths=""
+ local good=0
+ local bad=0
+ bricks=$(get_bricks_from_volume "$volume")
+ rm -f "$CAN_HEAL"
+ rm -f "$CAN_NOT_HEAL"
+ mkdir "$LOG_DIR" -p
+
+ heal_log "Started $(basename "$BASH_SOURCE") on $(date) "
+ while read -r heal_entry
+ do
+ heal_log "------------------------------------------------------------------"
+ heal_log "$heal_entry"
+
+ gfid_actual_paths=""
+ file_paths="$(get_entry_path_all_bricks "$heal_entry" "$bricks")"
+ stat_info="$(get_stat_for_entry_from_all_bricks "$heal_entry" "$bricks")"
+ heal_log "$stat_info"
+
+ quorum=$(echo "$stat_info" | head -1 | awk '{print $1}')
+ good_stat=$(echo "$stat_info" | head -1 | awk '{print $3}')
+ file_type="$(echo "$stat_info" | head -1 | cut -f2 -d':')"
+ if [[ "$file_type" == "directory" ]]
+ then
+ print_entries_needing_heal "$heal_entry" "$bricks"
+ else
+ if [[ "$quorum" -ge 4 ]]
+ then
+ good=$((good + 1))
+ heal_log "Verdict: Healable"
+
+ echo "${good_stat}|$file_paths" >> "$CAN_HEAL"
+ else
+ bad=$((bad + 1))
+ heal_log "Verdict: Not Healable"
+ for bpath in ${file_paths//,/ }
+ do
+ if [[ -z "$gfid_actual_paths" ]]
+ then
+ gfid_actual_paths=$(find_file_paths "$bpath")
+ else
+ break
+ fi
+ done
+ log_can_not_heal "$gfid_actual_paths" "${file_paths}"
+ fi
+ fi
+ done < <(get_pending_entries "$volume")
+ heal_log "========================================="
+ heal_log "Total number of potential heal : ${good}"
+ heal_log "Total number of can not heal : ${bad}"
+ heal_log "========================================="
+}
+
+_init "$@" && main "$@"
diff --git a/extras/failed-tests.py b/extras/failed-tests.py
new file mode 100755
index 00000000000..f7f110246b5
--- /dev/null
+++ b/extras/failed-tests.py
@@ -0,0 +1,180 @@
+#!/usr/bin/python3
+
+from __future__ import print_function
+import blessings
+import requests
+from requests.packages.urllib3.exceptions import InsecureRequestWarning
+import re
+import argparse
+from collections import defaultdict
+from datetime import timedelta, datetime
+from pystache import render
+
+# This tool goes though the Gluster regression links and checks for failures
+
+BASE = 'https://build.gluster.org'
+TERM = blessings.Terminal()
+MAX_BUILDS = 1000
+summary = defaultdict(list)
+VERBOSE = None
+
+
+def process_failure(url, node):
+ text = requests.get(url, verify=False).text
+ accum = []
+ for t in text.split('\n'):
+ if t.find("Result: FAIL") != -1:
+ for t2 in accum:
+ if VERBOSE:
+ print(t2.encode('utf-8'))
+ if t2.find("Wstat") != -1:
+ test_case = re.search('\./tests/.*\.t', t2)
+ if test_case:
+ summary[test_case.group()].append((url, node))
+ accum = []
+ elif t.find("cur_cores=/") != -1:
+ summary["core"].append([t.split("/")[1]])
+ summary["core"].append(url)
+ else:
+ accum.append(t)
+
+
+def print_summary(failed_builds, total_builds, html=False):
+ # All the templates
+ count = [
+ '{{failed}} of {{total}} regressions failed',
+ '<p><b>{{failed}}</b> of <b>{{total}}</b> regressions failed</p>'
+ ]
+ regression_link = [
+ '\tRegression Link: {{link}}\n'
+ '\tNode: {{node}}',
+ '<p>&emsp;Regression Link: {{link}}</p>'
+ '<p>&emsp;Node: {{node}}</p>'
+ ]
+ component = [
+ '\tComponent: {{comp}}',
+ '<p>&emsp;Component: {{comp}}</p>'
+ ]
+ failure_count = [
+ ''.join([
+ TERM.red,
+ '{{test}} ; Failed {{count}} times',
+ TERM.normal
+ ]),
+ (
+ '<p><font color="red"><b>{{test}};</b> Failed <b>{{count}}'
+ '</b> times</font></p>'
+ )
+ ]
+
+ template = 0
+ if html:
+ template = 1
+ print(render(
+ count[template],
+ {'failed': failed_builds, 'total': total_builds}
+ ))
+ for k, v in summary.items():
+ if k == 'core':
+ print(''.join([TERM.red, "Found cores:", TERM.normal]))
+ for comp, link in zip(v[::2], v[1::2]):
+ print(render(component[template], {'comp': comp}))
+ print(render(
+ regression_link[template],
+ {'link': link[0], 'node': link[1]}
+ ))
+ else:
+ print(render(failure_count[template], {'test': k, 'count': len(v)}))
+ for link in v:
+ print(render(
+ regression_link[template],
+ {'link': link[0], 'node': link[1]}
+ ))
+
+
+def get_summary(cut_off_date, reg_link):
+ '''
+ Get links to the failed jobs
+ '''
+ success_count = 0
+ failure_count = 0
+ for page in range(0, MAX_BUILDS, 100):
+ build_info = requests.get(''.join([
+ BASE,
+ reg_link,
+ 'api/json?depth=1&tree=allBuilds'
+ '[url,result,timestamp,builtOn]',
+ '{{{0},{1}}}'.format(page, page+100)
+ ]), verify=False).json()
+ for build in build_info.get('allBuilds'):
+ if datetime.fromtimestamp(build['timestamp']/1000) < cut_off_date:
+ # stop when timestamp older than cut off date
+ return failure_count, failure_count + success_count
+ if build['result'] in [None, 'SUCCESS']:
+ # pass when build is a success or ongoing
+ success_count += 1
+ continue
+ if VERBOSE:
+ print(''.join([
+ TERM.red,
+ 'FAILURE on {0}'.format(build['url']),
+ TERM.normal
+ ]))
+ url = ''.join([build['url'], 'consoleText'])
+ failure_count += 1
+ process_failure(url, build['builtOn'])
+ return failure_count, failure_count + success_count
+
+
+def main(num_days, regression_link, html_report):
+ cut_off_date = datetime.today() - timedelta(days=num_days)
+ failure = 0
+ total = 0
+ for reg in regression_link:
+ if reg == 'centos':
+ reg_link = '/job/centos6-regression/'
+ elif reg == 'netbsd':
+ reg_link = '/job/netbsd7-regression/'
+ else:
+ reg_link = reg
+ counts = get_summary(cut_off_date, reg_link)
+ failure += counts[0]
+ total += counts[1]
+ print_summary(failure, total, html_report)
+
+
+if __name__ == '__main__':
+ requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
+ parser = argparse.ArgumentParser()
+ parser.add_argument("get-summary")
+ parser.add_argument(
+ "last_no_of_days",
+ default=1,
+ type=int,
+ help="Regression summary of last number of days"
+ )
+ parser.add_argument(
+ "regression_link",
+ default="centos",
+ nargs='+',
+ help="\"centos\" | \"netbsd\" | any other regression link"
+ )
+ parser.add_argument(
+ "--verbose",
+ default=False,
+ action="store_true",
+ help="Print a detailed report of each test case that is failed"
+ )
+ parser.add_argument(
+ "--html-report",
+ default=False,
+ action="store_true",
+ help="Print a brief report of failed regressions in html format"
+ )
+ args = parser.parse_args()
+ VERBOSE = args.verbose
+ main(
+ num_days=args.last_no_of_days,
+ regression_link=args.regression_link,
+ html_report=args.html_report
+ )
diff --git a/extras/firewalld/Makefile.am b/extras/firewalld/Makefile.am
new file mode 100644
index 00000000000..530881fb8eb
--- /dev/null
+++ b/extras/firewalld/Makefile.am
@@ -0,0 +1,8 @@
+EXTRA_DIST = glusterfs.xml
+
+if USE_FIREWALLD
+if WITH_SERVER
+staticdir = /usr/lib/firewalld/services/
+static_DATA = glusterfs.xml
+endif
+endif
diff --git a/extras/firewalld/glusterfs.xml b/extras/firewalld/glusterfs.xml
new file mode 100644
index 00000000000..7e176442f5b
--- /dev/null
+++ b/extras/firewalld/glusterfs.xml
@@ -0,0 +1,14 @@
+<?xml version="1.0" encoding="utf-8"?>
+<service>
+<short>glusterfs-static</short>
+<description>Default ports for gluster-distributed storage</description>
+<port protocol="tcp" port="24007"/> <!--For glusterd -->
+<port protocol="tcp" port="24008"/> <!--For glusterd RDMA port management -->
+<port protocol="tcp" port="24009"/> <!--For glustereventsd -->
+<port protocol="tcp" port="38465"/> <!--Gluster NFS service -->
+<port protocol="tcp" port="38466"/> <!--Gluster NFS service -->
+<port protocol="tcp" port="38467"/> <!--Gluster NFS service -->
+<port protocol="tcp" port="38468"/> <!--Gluster NFS service -->
+<port protocol="tcp" port="38469"/> <!--Gluster NFS service -->
+<port protocol="tcp" port="49152-49664"/> <!--512 ports for bricks -->
+</service>
diff --git a/extras/ganesha/Makefile.am b/extras/ganesha/Makefile.am
new file mode 100644
index 00000000000..9eaa401b6c8
--- /dev/null
+++ b/extras/ganesha/Makefile.am
@@ -0,0 +1,2 @@
+SUBDIRS = scripts config ocf
+CLEANFILES =
diff --git a/extras/ganesha/config/Makefile.am b/extras/ganesha/config/Makefile.am
new file mode 100644
index 00000000000..c729273096e
--- /dev/null
+++ b/extras/ganesha/config/Makefile.am
@@ -0,0 +1,4 @@
+EXTRA_DIST= ganesha-ha.conf.sample
+
+confdir = $(sysconfdir)/ganesha
+conf_DATA = ganesha-ha.conf.sample
diff --git a/extras/ganesha/config/ganesha-ha.conf.sample b/extras/ganesha/config/ganesha-ha.conf.sample
new file mode 100644
index 00000000000..c22892bde56
--- /dev/null
+++ b/extras/ganesha/config/ganesha-ha.conf.sample
@@ -0,0 +1,19 @@
+# Name of the HA cluster created.
+# must be unique within the subnet
+HA_NAME="ganesha-ha-360"
+#
+# N.B. you may use short names or long names; you may not use IP addrs.
+# Once you select one, stay with it as it will be mildly unpleasant to
+# clean up if you switch later on. Ensure that all names - short and/or
+# long - are in DNS or /etc/hosts on all machines in the cluster.
+#
+# The subset of nodes of the Gluster Trusted Pool that form the ganesha
+# HA cluster. Hostname is specified.
+HA_CLUSTER_NODES="server1,server2,..."
+#HA_CLUSTER_NODES="server1.lab.redhat.com,server2.lab.redhat.com,..."
+#
+# Virtual IPs for each of the nodes specified above.
+VIP_server1="10.0.2.1"
+VIP_server2="10.0.2.2"
+#VIP_server1_lab_redhat_com="10.0.2.1"
+#VIP_server2_lab_redhat_com="10.0.2.2"
diff --git a/extras/ganesha/ocf/Makefile.am b/extras/ganesha/ocf/Makefile.am
new file mode 100644
index 00000000000..990a609f254
--- /dev/null
+++ b/extras/ganesha/ocf/Makefile.am
@@ -0,0 +1,11 @@
+EXTRA_DIST= ganesha_grace ganesha_mon ganesha_nfsd
+
+# The root of the OCF resource agent hierarchy
+# Per the OCF standard, it's always "lib",
+# not "lib64" (even on 64-bit platforms).
+ocfdir = $(prefix)/lib/ocf
+
+# The provider directory
+radir = $(ocfdir)/resource.d/heartbeat
+
+ra_SCRIPTS = ganesha_grace ganesha_mon ganesha_nfsd
diff --git a/extras/ganesha/ocf/ganesha_grace b/extras/ganesha/ocf/ganesha_grace
new file mode 100644
index 00000000000..825f7164597
--- /dev/null
+++ b/extras/ganesha/ocf/ganesha_grace
@@ -0,0 +1,221 @@
+#!/bin/bash
+#
+# Copyright (c) 2014 Anand Subramanian anands@redhat.com
+# Copyright (c) 2015 Red Hat Inc.
+# All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like. Any license provided herein, whether implied or
+# otherwise, applies only to this software file. Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+#
+#
+
+# Initialization:
+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
+. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+
+if [ -n "$OCF_DEBUG_LIBRARY" ]; then
+ . $OCF_DEBUG_LIBRARY
+else
+ : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
+ . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+fi
+
+OCF_RESKEY_grace_active_default="grace-active"
+: ${OCF_RESKEY_grace_active=${OCF_RESKEY_grace_active_default}}
+
+ganesha_meta_data() {
+ cat <<END
+<?xml version="1.0"?>
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
+<resource-agent name="ganesha_grace">
+<version>1.0</version>
+
+<longdesc lang="en">
+This Linux-specific resource agent acts as a dummy
+resource agent for nfs-ganesha.
+</longdesc>
+
+<shortdesc lang="en">Manages the user-space nfs-ganesha NFS server</shortdesc>
+
+<parameters>
+<parameter name="grace_active">
+<longdesc lang="en">NFS-Ganesha grace active attribute</longdesc>
+<shortdesc lang="en">NFS-Ganesha grace active attribute</shortdesc>
+<content type="string" default="grace-active" />
+</parameter>
+</parameters>
+
+<actions>
+<action name="start" timeout="40s" />
+<action name="stop" timeout="40s" />
+<action name="status" timeout="20s" interval="60s" />
+<action name="monitor" depth="0" timeout="10s" interval="5s" />
+<action name="notify" timeout="10s" />
+<action name="meta-data" timeout="20s" />
+</actions>
+</resource-agent>
+END
+
+return ${OCF_SUCCESS}
+}
+
+ganesha_grace_usage() {
+ echo "ganesha.nfsd USAGE"
+}
+
+# Make sure meta-data and usage always succeed
+case $__OCF_ACTION in
+ meta-data) ganesha_meta_data
+ exit ${OCF_SUCCESS}
+ ;;
+ usage|help) ganesha_usage
+ exit ${OCF_SUCCESS}
+ ;;
+ *)
+ ;;
+esac
+
+ganesha_grace_start()
+{
+ local rc=${OCF_ERR_GENERIC}
+ local host=$(hostname -s)
+
+ ocf_log debug "ganesha_grace_start()"
+ # give ganesha_mon RA a chance to set the crm_attr first
+ # I mislike the sleep, but it's not clear that looping
+ # with a small sleep is necessarily better
+ # start has a 40sec timeout, so a 5sec sleep here is okay
+ sleep 5
+ attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null)
+ if [ $? -ne 0 ]; then
+ host=$(hostname)
+ attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null )
+ if [ $? -ne 0 ]; then
+ ocf_log info "grace start: crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} failed"
+ fi
+ fi
+
+ # Three possibilities:
+ # 1. There is no attribute at all and attr_updater returns
+ # a zero length string. This happens when
+ # ganesha_mon::monitor hasn't run at least once to set
+ # the attribute. The assumption here is that the system
+ # is coming up. We pretend, for now, that the node is
+ # healthy, to allow the system to continue coming up.
+ # It will cure itself in a few seconds
+ # 2. There is an attribute, and it has the value "1"; this
+ # node is healthy.
+ # 3. There is an attribute, but it has no value or the value
+ # "0"; this node is not healthy.
+
+ # case 1
+ if [[ -z "${attr}" ]]; then
+ return ${OCF_SUCCESS}
+ fi
+
+ # case 2
+ if [[ "${attr}" = *"value=1" ]]; then
+ return ${OCF_SUCCESS}
+ fi
+
+ # case 3
+ return ${OCF_NOT_RUNNING}
+}
+
+ganesha_grace_stop()
+{
+
+ ocf_log debug "ganesha_grace_stop()"
+ return ${OCF_SUCCESS}
+}
+
+ganesha_grace_notify()
+{
+ # since this is a clone RA we should only ever see pre-start
+ # or post-stop
+ mode="${OCF_RESKEY_CRM_meta_notify_type}-${OCF_RESKEY_CRM_meta_notify_operation}"
+ case "${mode}" in
+ pre-start | post-stop)
+ dbus-send --print-reply --system --dest=org.ganesha.nfsd /org/ganesha/nfsd/admin org.ganesha.nfsd.admin.grace string:${OCF_RESKEY_CRM_meta_notify_stop_uname}
+ if [ $? -ne 0 ]; then
+ ocf_log info "dbus-send --print-reply --system --dest=org.ganesha.nfsd /org/ganesha/nfsd/admin org.ganesha.nfsd.admin.grace string:${OCF_RESKEY_CRM_meta_notify_stop_uname} failed"
+ fi
+ ;;
+ esac
+
+ return ${OCF_SUCCESS}
+}
+
+ganesha_grace_monitor()
+{
+ local host=$(hostname -s)
+
+ ocf_log debug "monitor"
+
+ attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null)
+ if [ $? -ne 0 ]; then
+ host=$(hostname)
+ attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null)
+ if [ $? -ne 0 ]; then
+ ocf_log info "crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} failed"
+ fi
+ fi
+
+ # if there is no attribute (yet), maybe it's because
+ # this RA started before ganesha_mon (nfs-mon) has had
+ # chance to create it. In which case we'll pretend
+ # everything is okay this time around
+ if [[ -z "${attr}" ]]; then
+ return ${OCF_SUCCESS}
+ fi
+
+ if [[ "${attr}" = *"value=1" ]]; then
+ return ${OCF_SUCCESS}
+ fi
+
+ return ${OCF_NOT_RUNNING}
+}
+
+ganesha_grace_validate()
+{
+ return ${OCF_SUCCESS}
+}
+
+ganesha_grace_validate
+
+# Translate each action into the appropriate function call
+case $__OCF_ACTION in
+start) ganesha_grace_start
+ ;;
+stop) ganesha_grace_stop
+ ;;
+status|monitor) ganesha_grace_monitor
+ ;;
+notify) ganesha_grace_notify
+ ;;
+*) ganesha_grace_usage
+ exit ${OCF_ERR_UNIMPLEMENTED}
+ ;;
+esac
+
+rc=$?
+
+# The resource agent may optionally log a debug message
+ocf_log debug "${OCF_RESOURCE_INSTANCE} ${__OCF_ACTION} returned $rc"
+exit $rc
diff --git a/extras/ganesha/ocf/ganesha_mon b/extras/ganesha/ocf/ganesha_mon
new file mode 100644
index 00000000000..2b4a9d6da84
--- /dev/null
+++ b/extras/ganesha/ocf/ganesha_mon
@@ -0,0 +1,234 @@
+#!/bin/bash
+#
+# Copyright (c) 2014 Anand Subramanian anands@redhat.com
+# Copyright (c) 2015 Red Hat Inc.
+# All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like. Any license provided herein, whether implied or
+# otherwise, applies only to this software file. Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+#
+#
+
+# Initialization:
+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
+. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+
+if [ -n "${OCF_DEBUG_LIBRARY}" ]; then
+ . ${OCF_DEBUG_LIBRARY}
+else
+ : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
+ . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+fi
+
+# Defaults
+OCF_RESKEY_ganesha_active_default="ganesha-active"
+OCF_RESKEY_grace_active_default="grace-active"
+OCF_RESKEY_grace_delay_default="5"
+
+: ${OCF_RESKEY_ganesha_active=${OCF_RESKEY_ganesha_active_default}}
+: ${OCF_RESKEY_grace_active=${OCF_RESKEY_grace_active_default}}
+: ${OCF_RESKEY_grace_delay=${OCF_RESKEY_grace_delay_default}}
+
+ganesha_meta_data() {
+ cat <<END
+<?xml version="1.0"?>
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
+<resource-agent name="ganesha_mon">
+<version>1.0</version>
+
+<longdesc lang="en">
+This Linux-specific resource agent acts as a dummy
+resource agent for nfs-ganesha.
+</longdesc>
+
+<shortdesc lang="en">Manages the user-space nfs-ganesha NFS server</shortdesc>
+
+<parameters>
+<parameter name="ganesha_active">
+<longdesc lang="en">NFS-Ganesha daemon active attribute</longdesc>
+<shortdesc lang="en">NFS-Ganesha daemon active attribute</shortdesc>
+<content type="string" default="ganesha-active" />
+</parameter>
+<parameter name="grace_active">
+<longdesc lang="en">NFS-Ganesha grace active attribute</longdesc>
+<shortdesc lang="en">NFS-Ganesha grace active attribute</shortdesc>
+<content type="string" default="grace-active" />
+</parameter>
+<parameter name="grace_delay">
+<longdesc lang="en">
+NFS-Ganesha grace delay.
+When changing this, adjust the ganesha_grace RA's monitor interval to match.
+</longdesc>
+<shortdesc lang="en">NFS-Ganesha grace delay</shortdesc>
+<content type="string" default="5" />
+</parameter>
+</parameters>
+
+<actions>
+<action name="start" timeout="40s" />
+<action name="stop" timeout="40s" />
+<action name="status" timeout="20s" interval="60s" />
+<action name="monitor" depth="0" timeout="10s" interval="10s" />
+<action name="meta-data" timeout="20s" />
+</actions>
+</resource-agent>
+END
+
+return ${OCF_SUCCESS}
+}
+
+ganesha_mon_usage() {
+ echo "ganesha.nfsd USAGE"
+}
+
+# Make sure meta-data and usage always succeed
+case ${__OCF_ACTION} in
+ meta-data) ganesha_meta_data
+ exit ${OCF_SUCCESS}
+ ;;
+ usage|help) ganesha_usage
+ exit ${OCF_SUCCESS}
+ ;;
+ *)
+ ;;
+esac
+
+ganesha_mon_start()
+{
+ ocf_log debug "ganesha_mon_start"
+ ganesha_mon_monitor
+ return $OCF_SUCCESS
+}
+
+ganesha_mon_stop()
+{
+ ocf_log debug "ganesha_mon_stop"
+ return $OCF_SUCCESS
+}
+
+ganesha_mon_monitor()
+{
+ local host=$(hostname -s)
+ local pid_file="/var/run/ganesha.pid"
+ local rhel6_pid_file="/var/run/ganesha.nfsd.pid"
+ local proc_pid="/proc/"
+
+ # RHEL6 /etc/init.d/nfs-ganesha adds -p /var/run/ganesha.nfsd.pid
+ # RHEL7 systemd does not. Would be nice if all distros used the
+ # same pid file.
+ if [ -e ${rhel6_pid_file} ]; then
+ pid_file=${rhel6_pid_file}
+ fi
+ if [ -e ${pid_file} ]; then
+ proc_pid="${proc_pid}$(cat ${pid_file})"
+ fi
+
+ if [ "x${proc_pid}" != "x/proc/" -a -d ${proc_pid} ]; then
+
+ attrd_updater -n ${OCF_RESKEY_ganesha_active} -v 1
+ if [ $? -ne 0 ]; then
+ ocf_log info "warning: attrd_updater -n ${OCF_RESKEY_ganesha_active} -v 1 failed"
+ fi
+
+ # ganesha_grace (nfs-grace) RA follows grace-active attr
+ # w/ constraint location
+ attrd_updater -n ${OCF_RESKEY_grace_active} -v 1
+ if [ $? -ne 0 ]; then
+ ocf_log info "warning: attrd_updater -n ${OCF_RESKEY_grace_active} -v 1 failed"
+ fi
+
+ # ganesha_mon (nfs-mon) and ganesha_grace (nfs-grace)
+ # track grace-active crm_attr (attr != crm_attr)
+ # we can't just use the attr as there's no way to query
+ # its value in RHEL6 pacemaker
+
+ crm_attribute --node=${host} --lifetime=forever --name=${OCF_RESKEY_grace_active} --update=1 2> /dev/null
+ if [ $? -ne 0 ]; then
+ host=$(hostname)
+ crm_attribute --node=${host} --lifetime=forever --name=${OCF_RESKEY_grace_active} --update=1 2> /dev/null
+ if [ $? -ne 0 ]; then
+ ocf_log info "mon monitor warning: crm_attribute --node=${host} --lifetime=forever --name=${OCF_RESKEY_grace_active} --update=1 failed"
+ fi
+ fi
+
+ return ${OCF_SUCCESS}
+ fi
+
+ # VIP fail-over is triggered by clearing the
+ # ganesha-active node attribute on this node.
+ #
+ # Meanwhile the ganesha_grace notify() runs when its
+ # nfs-grace resource is disabled on a node; which
+ # is triggered by clearing the grace-active attribute
+ # on this node.
+ #
+ # We need to allow time for it to run and put
+ # the remaining ganesha.nfsds into grace before
+ # initiating the VIP fail-over.
+
+ attrd_updater -D -n ${OCF_RESKEY_grace_active}
+ if [ $? -ne 0 ]; then
+ ocf_log info "warning: attrd_updater -D -n ${OCF_RESKEY_grace_active} failed"
+ fi
+
+ host=$(hostname -s)
+ crm_attribute --node=${host} --name=${OCF_RESKEY_grace_active} --update=0 2> /dev/null
+ if [ $? -ne 0 ]; then
+ host=$(hostname)
+ crm_attribute --node=${host} --name=${OCF_RESKEY_grace_active} --update=0 2> /dev/null
+ if [ $? -ne 0 ]; then
+ ocf_log info "mon monitor warning: crm_attribute --node=${host} --name=${OCF_RESKEY_grace_active} --update=0 failed"
+ fi
+ fi
+
+ sleep ${OCF_RESKEY_grace_delay}
+
+ attrd_updater -D -n ${OCF_RESKEY_ganesha_active}
+ if [ $? -ne 0 ]; then
+ ocf_log info "warning: attrd_updater -D -n ${OCF_RESKEY_ganesha_active} failed"
+ fi
+
+ return ${OCF_SUCCESS}
+}
+
+ganesha_mon_validate()
+{
+ return ${OCF_SUCCESS}
+}
+
+ganesha_mon_validate
+
+# Translate each action into the appropriate function call
+case ${__OCF_ACTION} in
+start) ganesha_mon_start
+ ;;
+stop) ganesha_mon_stop
+ ;;
+status|monitor) ganesha_mon_monitor
+ ;;
+*) ganesha_mon_usage
+ exit ${OCF_ERR_UNIMPLEMENTED}
+ ;;
+esac
+
+rc=$?
+
+# The resource agent may optionally log a debug message
+ocf_log debug "${OCF_RESOURCE_INSTANCE} ${__OCF_ACTION} returned $rc"
+exit $rc
diff --git a/extras/ganesha/ocf/ganesha_nfsd b/extras/ganesha/ocf/ganesha_nfsd
new file mode 100644
index 00000000000..f91e8b6b8f7
--- /dev/null
+++ b/extras/ganesha/ocf/ganesha_nfsd
@@ -0,0 +1,167 @@
+#!/bin/bash
+#
+# Copyright (c) 2014 Anand Subramanian anands@redhat.com
+# Copyright (c) 2015 Red Hat Inc.
+# All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like. Any license provided herein, whether implied or
+# otherwise, applies only to this software file. Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+#
+#
+
+# Initialization:
+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
+. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+
+if [ -n "${OCF_DEBUG_LIBRARY}" ]; then
+ . ${OCF_DEBUG_LIBRARY}
+else
+ : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
+ . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+fi
+
+OCF_RESKEY_ha_vol_mnt_default="/run/gluster/shared_storage"
+: ${OCF_RESKEY_ha_vol_mnt=${OCF_RESKEY_ha_vol_mnt_default}}
+
+ganesha_meta_data() {
+ cat <<END
+<?xml version="1.0"?>
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
+<resource-agent name="ganesha_nfsd">
+<version>1.0</version>
+
+<longdesc lang="en">
+This Linux-specific resource agent acts as a dummy
+resource agent for nfs-ganesha.
+</longdesc>
+
+<shortdesc lang="en">Manages the user-space nfs-ganesha NFS server</shortdesc>
+
+<parameters>
+<parameter name="ha_vol_mnt">
+<longdesc lang="en">HA State Volume Mount Point</longdesc>
+<shortdesc lang="en">HA_State Volume Mount Point</shortdesc>
+<content type="string" default="" />
+</parameter>
+</parameters>
+
+<actions>
+<action name="start" timeout="5s" />
+<action name="stop" timeout="5s" />
+<action name="status" depth="0" timeout="5s" interval="0" />
+<action name="monitor" depth="0" timeout="5s" interval="0" />
+<action name="meta-data" timeout="20s" />
+</actions>
+</resource-agent>
+END
+
+return ${OCF_SUCCESS}
+}
+
+ganesha_nfsd_usage() {
+ echo "ganesha.nfsd USAGE"
+}
+
+# Make sure meta-data and usage always succeed
+case $__OCF_ACTION in
+ meta-data) ganesha_meta_data
+ exit ${OCF_SUCCESS}
+ ;;
+ usage|help) ganesha_usage
+ exit ${OCF_SUCCESS}
+ ;;
+ *)
+ ;;
+esac
+
+ganesha_nfsd_start()
+{
+ local long_host=$(hostname)
+
+ if [[ -d /var/lib/nfs ]]; then
+ mv /var/lib/nfs /var/lib/nfs.backup
+ if [ $? -ne 0 ]; then
+ ocf_log notice "mv /var/lib/nfs /var/lib/nfs.backup failed"
+ fi
+ ln -s ${OCF_RESKEY_ha_vol_mnt}/nfs-ganesha/${long_host}/nfs /var/lib/nfs
+ if [ $? -ne 0 ]; then
+ ocf_log notice "ln -s ${OCF_RESKEY_ha_vol_mnt}/nfs-ganesha/${long_host}/nfs /var/lib/nfs failed"
+ fi
+ fi
+
+ return ${OCF_SUCCESS}
+}
+
+ganesha_nfsd_stop()
+{
+
+ if [ -L /var/lib/nfs -a -d /var/lib/nfs.backup ]; then
+ rm -f /var/lib/nfs
+ if [ $? -ne 0 ]; then
+ ocf_log notice "rm -f /var/lib/nfs failed"
+ fi
+ mv /var/lib/nfs.backup /var/lib/nfs
+ if [ $? -ne 0 ]; then
+ ocf_log notice "mv /var/lib/nfs.backup /var/lib/nfs failed"
+ fi
+ fi
+
+ return ${OCF_SUCCESS}
+}
+
+ganesha_nfsd_monitor()
+{
+ # pacemaker checks to see if RA is already running before starting it.
+ # if we return success, then it's presumed it's already running and
+ # doesn't need to be started, i.e. invoke the start action.
+ # return something other than success to make pacemaker invoke the
+ # start action
+ if [[ -L /var/lib/nfs ]]; then
+ return ${OCF_SUCCESS}
+ fi
+ return ${OCF_NOT_RUNNING}
+}
+
+ganesha_nfsd_validate()
+{
+ return ${OCF_SUCCESS}
+}
+
+ganesha_nfsd_validate
+
+# ocf_log notice "ganesha_nfsd ${OCF_RESOURCE_INSTANCE} $__OCF_ACTION"
+
+# Translate each action into the appropriate function call
+case $__OCF_ACTION in
+start) ganesha_nfsd_start
+ ;;
+stop) ganesha_nfsd_stop
+ ;;
+status|monitor) ganesha_nfsd_monitor
+ ;;
+*) ganesha_nfsd_usage
+ exit ${OCF_ERR_UNIMPLEMENTED}
+ ;;
+esac
+
+rc=$?
+
+# The resource agent may optionally log a debug message
+ocf_log debug "${OCF_RESOURCE_INSTANCE} ${__OCF_ACTION} returned $rc"
+exit $rc
diff --git a/extras/ganesha/scripts/Makefile.am b/extras/ganesha/scripts/Makefile.am
new file mode 100644
index 00000000000..7e345fd5f19
--- /dev/null
+++ b/extras/ganesha/scripts/Makefile.am
@@ -0,0 +1,6 @@
+EXTRA_DIST= create-export-ganesha.sh generate-epoch.py dbus-send.sh \
+ ganesha-ha.sh
+
+scriptsdir = $(libexecdir)/ganesha
+scripts_SCRIPTS = create-export-ganesha.sh dbus-send.sh generate-epoch.py \
+ ganesha-ha.sh
diff --git a/extras/ganesha/scripts/create-export-ganesha.sh b/extras/ganesha/scripts/create-export-ganesha.sh
new file mode 100755
index 00000000000..3040e8138b0
--- /dev/null
+++ b/extras/ganesha/scripts/create-export-ganesha.sh
@@ -0,0 +1,92 @@
+#!/bin/bash
+
+#This script is called by glusterd when the user
+#tries to export a volume via NFS-Ganesha.
+#An export file specific to a volume
+#is created in GANESHA_DIR/exports.
+
+# Try loading the config from any of the distro
+# specific configuration locations
+if [ -f /etc/sysconfig/ganesha ]
+ then
+ . /etc/sysconfig/ganesha
+fi
+if [ -f /etc/conf.d/ganesha ]
+ then
+ . /etc/conf.d/ganesha
+fi
+if [ -f /etc/default/ganesha ]
+ then
+ . /etc/default/ganesha
+fi
+
+GANESHA_DIR=${1%/}
+OPTION=$2
+VOL=$3
+CONF=$GANESHA_DIR"/ganesha.conf"
+declare -i EXPORT_ID
+
+function check_cmd_status()
+{
+ if [ "$1" != "0" ]
+ then
+ rm -rf $GANESHA_DIR/exports/export.$VOL.conf
+ sed -i /$VOL.conf/d $CONF
+ exit 1
+ fi
+}
+
+
+if [ ! -d "$GANESHA_DIR/exports" ];
+ then
+ mkdir $GANESHA_DIR/exports
+ check_cmd_status `echo $?`
+fi
+
+function write_conf()
+{
+echo -e "# WARNING : Using Gluster CLI will overwrite manual
+# changes made to this file. To avoid it, edit the
+# file and run ganesha-ha.sh --refresh-config."
+
+echo "EXPORT{"
+echo " Export_Id = 2;"
+echo " Path = \"/$VOL\";"
+echo " FSAL {"
+echo " name = "GLUSTER";"
+echo " hostname=\"localhost\";"
+echo " volume=\"$VOL\";"
+echo " }"
+echo " Access_type = RW;"
+echo " Disable_ACL = true;"
+echo ' Squash="No_root_squash";'
+echo " Pseudo=\"/$VOL\";"
+echo ' Protocols = "3", "4" ;'
+echo ' Transports = "UDP","TCP";'
+echo ' SecType = "sys";'
+echo ' Security_Label = False;'
+echo " }"
+}
+if [ "$OPTION" = "on" ];
+then
+ if ! (cat $CONF | grep $VOL.conf\"$ )
+ then
+ write_conf $@ > $GANESHA_DIR/exports/export.$VOL.conf
+ echo "%include \"$GANESHA_DIR/exports/export.$VOL.conf\"" >> $CONF
+ count=`ls -l $GANESHA_DIR/exports/*.conf | wc -l`
+ if [ "$count" = "1" ] ; then
+ EXPORT_ID=2
+ else
+ EXPORT_ID=`cat $GANESHA_DIR/.export_added`
+ check_cmd_status `echo $?`
+ EXPORT_ID=EXPORT_ID+1
+ sed -i s/Export_Id.*/"Export_Id= $EXPORT_ID ;"/ \
+ $GANESHA_DIR/exports/export.$VOL.conf
+ check_cmd_status `echo $?`
+ fi
+ echo $EXPORT_ID > $GANESHA_DIR/.export_added
+ fi
+else
+ rm -rf $GANESHA_DIR/exports/export.$VOL.conf
+ sed -i /$VOL.conf/d $CONF
+fi
diff --git a/extras/ganesha/scripts/dbus-send.sh b/extras/ganesha/scripts/dbus-send.sh
new file mode 100755
index 00000000000..9d613a0e7ad
--- /dev/null
+++ b/extras/ganesha/scripts/dbus-send.sh
@@ -0,0 +1,70 @@
+#!/bin/bash
+
+# Try loading the config from any of the distro
+# specific configuration locations
+if [ -f /etc/sysconfig/ganesha ]
+ then
+ . /etc/sysconfig/ganesha
+fi
+if [ -f /etc/conf.d/ganesha ]
+ then
+ . /etc/conf.d/ganesha
+fi
+if [ -f /etc/default/ganesha ]
+ then
+ . /etc/default/ganesha
+fi
+
+GANESHA_DIR=${1%/}
+OPTION=$2
+VOL=$3
+CONF=$GANESHA_DIR"/ganesha.conf"
+
+function check_cmd_status()
+{
+ if [ "$1" != "0" ]
+ then
+ logger "dynamic export failed on node :${hostname -s}"
+ fi
+}
+
+#This function keeps track of export IDs and increments it with every new entry
+function dynamic_export_add()
+{
+ dbus-send --system \
+--dest=org.ganesha.nfsd /org/ganesha/nfsd/ExportMgr \
+org.ganesha.nfsd.exportmgr.AddExport string:$GANESHA_DIR/exports/export.$VOL.conf \
+string:"EXPORT(Path=/$VOL)"
+ check_cmd_status `echo $?`
+}
+
+#This function removes an export dynamically(uses the export_id of the export)
+function dynamic_export_remove()
+{
+ # Below bash fetch all the export from ShowExport command and search
+ # export entry based on path and then get its export entry.
+ # There are two possiblities for path, either entire volume will be
+ # exported or subdir. It handles both cases. But it remove only first
+ # entry from the list based on assumption that entry exported via cli
+ # has lowest export id value
+ removed_id=$(dbus-send --type=method_call --print-reply --system \
+ --dest=org.ganesha.nfsd /org/ganesha/nfsd/ExportMgr \
+ org.ganesha.nfsd.exportmgr.ShowExports | grep -B 1 -we \
+ "/"$VOL -e "/"$VOL"/" | grep uint16 | awk '{print $2}' \
+ | head -1)
+
+ dbus-send --print-reply --system \
+--dest=org.ganesha.nfsd /org/ganesha/nfsd/ExportMgr \
+org.ganesha.nfsd.exportmgr.RemoveExport uint16:$removed_id
+ check_cmd_status `echo $?`
+}
+
+if [ "$OPTION" = "on" ];
+then
+ dynamic_export_add $@
+fi
+
+if [ "$OPTION" = "off" ];
+then
+ dynamic_export_remove $@
+fi
diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh
new file mode 100644
index 00000000000..9790a719e10
--- /dev/null
+++ b/extras/ganesha/scripts/ganesha-ha.sh
@@ -0,0 +1,1199 @@
+#!/bin/bash
+
+# Copyright 2015-2016 Red Hat Inc. All Rights Reserved
+#
+# Pacemaker+Corosync High Availability for NFS-Ganesha
+#
+# setup, teardown, add, delete, refresh-config, and status
+#
+# Each participating node in the cluster is assigned a virtual IP (VIP)
+# which fails over to another node when its associated ganesha.nfsd dies
+# for any reason. After the VIP is moved to another node all the
+# ganesha.nfsds are send a signal using DBUS to put them into NFS GRACE.
+#
+# There are six resource agent types used: ganesha_mon, ganesha_grace,
+# ganesha_nfsd, IPaddr, and Dummy. ganesha_mon is used to monitor the
+# ganesha.nfsd. ganesha_grace is used to send the DBUS signal to put
+# the remaining ganesha.nfsds into grace. ganesha_nfsd is used to start
+# and stop the ganesha.nfsd during setup and teardown. IPaddr manages
+# the VIP. A Dummy resource named $hostname-trigger_ip-1 is used to
+# ensure that the NFS GRACE DBUS signal is sent after the VIP moves to
+# the new host.
+
+GANESHA_HA_SH=$(realpath $0)
+HA_NUM_SERVERS=0
+HA_SERVERS=""
+HA_VOL_NAME="gluster_shared_storage"
+HA_VOL_MNT="/run/gluster/shared_storage"
+HA_CONFDIR=$HA_VOL_MNT"/nfs-ganesha"
+SERVICE_MAN="DISTRO_NOT_FOUND"
+
+# rhel, fedora id, version
+ID=""
+VERSION_ID=""
+
+PCS9OR10_PCS_CNAME_OPTION=""
+PCS9OR10_PCS_CLONE_OPTION="clone"
+SECRET_PEM="/var/lib/glusterd/nfs/secret.pem"
+
+# UNBLOCK RA uses shared_storage which may become unavailable
+# during any of the nodes reboot. Hence increase timeout value.
+PORTBLOCK_UNBLOCK_TIMEOUT="60s"
+
+# Try loading the config from any of the distro
+# specific configuration locations
+if [ -f /etc/sysconfig/ganesha ]
+ then
+ . /etc/sysconfig/ganesha
+fi
+if [ -f /etc/conf.d/ganesha ]
+ then
+ . /etc/conf.d/ganesha
+fi
+if [ -f /etc/default/ganesha ]
+ then
+ . /etc/default/ganesha
+fi
+
+GANESHA_CONF=
+
+function find_rhel7_conf
+{
+ while [[ $# > 0 ]]
+ do
+ key="$1"
+ case $key in
+ -f)
+ CONFFILE="$2"
+ break;
+ ;;
+ *)
+ ;;
+ esac
+ shift
+ done
+}
+
+if [ -z ${CONFFILE} ]
+ then
+ find_rhel7_conf ${OPTIONS}
+
+fi
+
+GANESHA_CONF=${CONFFILE:-/etc/ganesha/ganesha.conf}
+
+usage() {
+
+ echo "Usage : add|delete|refresh-config|status"
+ echo "Add-node : ganesha-ha.sh --add <HA_CONF_DIR> \
+<NODE-HOSTNAME> <NODE-VIP>"
+ echo "Delete-node: ganesha-ha.sh --delete <HA_CONF_DIR> \
+<NODE-HOSTNAME>"
+ echo "Refresh-config : ganesha-ha.sh --refresh-config <HA_CONFDIR> \
+<volume>"
+ echo "Status : ganesha-ha.sh --status <HA_CONFDIR>"
+}
+
+determine_service_manager () {
+
+ if [ -e "/bin/systemctl" ];
+ then
+ SERVICE_MAN="/bin/systemctl"
+ elif [ -e "/sbin/invoke-rc.d" ];
+ then
+ SERVICE_MAN="/sbin/invoke-rc.d"
+ elif [ -e "/sbin/service" ];
+ then
+ SERVICE_MAN="/sbin/service"
+ fi
+ if [[ "${SERVICE_MAN}X" == "DISTRO_NOT_FOUNDX" ]]
+ then
+ logger "Service manager not recognized, exiting"
+ exit 1
+ fi
+}
+
+manage_service ()
+{
+ local action=${1}
+ local new_node=${2}
+ local option=
+
+ if [[ "${action}" == "start" ]]; then
+ option="yes"
+ else
+ option="no"
+ fi
+ ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \
+${SECRET_PEM} root@${new_node} "${GANESHA_HA_SH} --setup-ganesha-conf-files $HA_CONFDIR $option"
+
+ if [[ "${SERVICE_MAN}" == "/bin/systemctl" ]]
+ then
+ ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \
+${SECRET_PEM} root@${new_node} "${SERVICE_MAN} ${action} nfs-ganesha"
+ else
+ ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \
+${SECRET_PEM} root@${new_node} "${SERVICE_MAN} nfs-ganesha ${action}"
+ fi
+}
+
+
+check_cluster_exists()
+{
+ local name=${1}
+ local cluster_name=""
+
+ if [ -e /var/run/corosync.pid ]; then
+ cluster_name=$(pcs status | grep "Cluster name:" | cut -d ' ' -f 3)
+ if [[ "${cluster_name}X" == "${name}X" ]]; then
+ logger "$name already exists, exiting"
+ exit 0
+ fi
+ fi
+}
+
+
+determine_servers()
+{
+ local cmd=${1}
+ local num_servers=0
+ local tmp_ifs=${IFS}
+ local ha_servers=""
+
+ if [ "${cmd}X" != "setupX" -a "${cmd}X" != "statusX" ]; then
+ ha_servers=$(pcs status | grep "Online:" | grep -o '\[.*\]' | sed -e 's/\[//' | sed -e 's/\]//')
+ IFS=$' '
+ for server in ${ha_servers} ; do
+ num_servers=$(expr ${num_servers} + 1)
+ done
+ IFS=${tmp_ifs}
+ HA_NUM_SERVERS=${num_servers}
+ HA_SERVERS="${ha_servers}"
+ else
+ IFS=$','
+ for server in ${HA_CLUSTER_NODES} ; do
+ num_servers=$(expr ${num_servers} + 1)
+ done
+ IFS=${tmp_ifs}
+ HA_NUM_SERVERS=${num_servers}
+ HA_SERVERS="${HA_CLUSTER_NODES//,/ }"
+ fi
+}
+
+stop_ganesha_all()
+{
+ local serverlist=${1}
+ for node in ${serverlist} ; do
+ manage_service "stop" ${node}
+ done
+}
+
+setup_cluster()
+{
+ local name=${1}
+ local num_servers=${2}
+ local servers=${3}
+ local unclean=""
+ local quorum_policy="stop"
+
+ logger "setting up cluster ${name} with the following ${servers}"
+
+ # pcs cluster setup --force ${PCS9OR10_PCS_CNAME_OPTION} ${name} ${servers}
+ pcs cluster setup --force ${PCS9OR10_PCS_CNAME_OPTION} ${name} --enable ${servers}
+ if [ $? -ne 0 ]; then
+ logger "pcs cluster setup ${PCS9OR10_PCS_CNAME_OPTION} ${name} --enable ${servers} failed, shutting down ganesha and bailing out"
+ #set up failed stop all ganesha process and clean up symlinks in cluster
+ stop_ganesha_all "${servers}"
+ exit 1;
+ fi
+
+ # pcs cluster auth ${servers}
+ pcs cluster auth
+ if [ $? -ne 0 ]; then
+ logger "pcs cluster auth failed"
+ fi
+
+ pcs cluster start --all
+ if [ $? -ne 0 ]; then
+ logger "pcs cluster start failed"
+ exit 1;
+ fi
+
+ sleep 1
+ # wait for the cluster to elect a DC before querying or writing
+ # to the CIB. BZ 1334092
+ crmadmin --dc_lookup --timeout=5000 > /dev/null 2>&1
+ while [ $? -ne 0 ]; do
+ crmadmin --dc_lookup --timeout=5000 > /dev/null 2>&1
+ done
+
+ unclean=$(pcs status | grep -u "UNCLEAN")
+ while [[ "${unclean}X" == "UNCLEANX" ]]; do
+ sleep 1
+ unclean=$(pcs status | grep -u "UNCLEAN")
+ done
+ sleep 1
+
+ if [ ${num_servers} -lt 3 ]; then
+ quorum_policy="ignore"
+ fi
+ pcs property set no-quorum-policy=${quorum_policy}
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs property set no-quorum-policy=${quorum_policy} failed"
+ fi
+
+ pcs property set stonith-enabled=false
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs property set stonith-enabled=false failed"
+ fi
+}
+
+
+setup_finalize_ha()
+{
+ local cibfile=${1}
+ local stopped=""
+
+ stopped=$(pcs status | grep -u "Stopped")
+ while [[ "${stopped}X" == "StoppedX" ]]; do
+ sleep 1
+ stopped=$(pcs status | grep -u "Stopped")
+ done
+}
+
+
+refresh_config ()
+{
+ local short_host=$(hostname -s)
+ local VOL=${1}
+ local HA_CONFDIR=${2}
+ local short_host=$(hostname -s)
+
+ local export_id=$(grep ^[[:space:]]*Export_Id $HA_CONFDIR/exports/export.$VOL.conf |\
+ awk -F"[=,;]" '{print $2}' | tr -d '[[:space:]]')
+
+
+ if [ -e ${SECRET_PEM} ]; then
+ while [[ ${3} ]]; do
+ current_host=`echo ${3} | cut -d "." -f 1`
+ if [[ ${short_host} != ${current_host} ]]; then
+ output=$(ssh -oPasswordAuthentication=no \
+-oStrictHostKeyChecking=no -i ${SECRET_PEM} root@${current_host} \
+"dbus-send --print-reply --system --dest=org.ganesha.nfsd \
+/org/ganesha/nfsd/ExportMgr org.ganesha.nfsd.exportmgr.UpdateExport \
+string:$HA_CONFDIR/exports/export.$VOL.conf \
+string:\"EXPORT(Export_Id=$export_id)\" 2>&1")
+ ret=$?
+ logger <<< "${output}"
+ if [ ${ret} -ne 0 ]; then
+ echo "Refresh-config failed on ${current_host}. Please check logs on ${current_host}"
+ else
+ echo "Refresh-config completed on ${current_host}."
+ fi
+
+ fi
+ shift
+ done
+ else
+ echo "Error: refresh-config failed. Passwordless ssh is not enabled."
+ exit 1
+ fi
+
+ # Run the same command on the localhost,
+ output=$(dbus-send --print-reply --system --dest=org.ganesha.nfsd \
+/org/ganesha/nfsd/ExportMgr org.ganesha.nfsd.exportmgr.UpdateExport \
+string:$HA_CONFDIR/exports/export.$VOL.conf \
+string:"EXPORT(Export_Id=$export_id)" 2>&1)
+ ret=$?
+ logger <<< "${output}"
+ if [ ${ret} -ne 0 ] ; then
+ echo "Refresh-config failed on localhost."
+ else
+ echo "Success: refresh-config completed."
+ fi
+}
+
+
+teardown_cluster()
+{
+ local name=${1}
+
+ for server in ${HA_SERVERS} ; do
+ if [[ ${HA_CLUSTER_NODES} != *${server}* ]]; then
+ logger "info: ${server} is not in config, removing"
+
+ pcs cluster stop ${server} --force
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs cluster stop ${server} failed"
+ fi
+
+ pcs cluster node remove ${server}
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs cluster node remove ${server} failed"
+ fi
+ fi
+ done
+
+ # BZ 1193433 - pcs doesn't reload cluster.conf after modification
+ # after teardown completes, a subsequent setup will appear to have
+ # 'remembered' the deleted node. You can work around this by
+ # issuing another `pcs cluster node remove $node`,
+ # `crm_node -f -R $server`, or
+ # `cibadmin --delete --xml-text '<node id="$server"
+ # uname="$server"/>'
+
+ pcs cluster stop --all
+ if [ $? -ne 0 ]; then
+ logger "warning pcs cluster stop --all failed"
+ fi
+
+ pcs cluster destroy
+ if [ $? -ne 0 ]; then
+ logger "error pcs cluster destroy failed"
+ exit 1
+ fi
+}
+
+
+cleanup_ganesha_config ()
+{
+ rm -f /etc/corosync/corosync.conf
+ rm -rf /etc/cluster/cluster.conf*
+ rm -rf /var/lib/pacemaker/cib/*
+ sed -r -i -e '/^%include[[:space:]]+".+\.conf"$/d' $HA_CONFDIR/ganesha.conf
+}
+
+do_create_virt_ip_constraints()
+{
+ local cibfile=${1}; shift
+ local primary=${1}; shift
+ local weight="1000"
+
+ # first a constraint location rule that says the VIP must be where
+ # there's a ganesha.nfsd running
+ pcs -f ${cibfile} constraint location ${primary}-group rule score=-INFINITY ganesha-active ne 1
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs constraint location ${primary}-group rule score=-INFINITY ganesha-active ne 1 failed"
+ fi
+
+ # then a set of constraint location prefers to set the prefered order
+ # for where a VIP should move
+ while [[ ${1} ]]; do
+ pcs -f ${cibfile} constraint location ${primary}-group prefers ${1}=${weight}
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs constraint location ${primary}-group prefers ${1}=${weight} failed"
+ fi
+ weight=$(expr ${weight} + 1000)
+ shift
+ done
+ # and finally set the highest preference for the VIP to its home node
+ # default weight when created is/was 100.
+ # on Fedora setting appears to be additive, so to get the desired
+ # value we adjust the weight
+ # weight=$(expr ${weight} - 100)
+ pcs -f ${cibfile} constraint location ${primary}-group prefers ${primary}=${weight}
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs constraint location ${primary}-group prefers ${primary}=${weight} failed"
+ fi
+}
+
+
+wrap_create_virt_ip_constraints()
+{
+ local cibfile=${1}; shift
+ local primary=${1}; shift
+ local head=""
+ local tail=""
+
+ # build a list of peers, e.g. for a four node cluster, for node1,
+ # the result is "node2 node3 node4"; for node2, "node3 node4 node1"
+ # and so on.
+ while [[ ${1} ]]; do
+ if [[ ${1} == ${primary} ]]; then
+ shift
+ while [[ ${1} ]]; do
+ tail=${tail}" "${1}
+ shift
+ done
+ else
+ head=${head}" "${1}
+ fi
+ shift
+ done
+ do_create_virt_ip_constraints ${cibfile} ${primary} ${tail} ${head}
+}
+
+
+create_virt_ip_constraints()
+{
+ local cibfile=${1}; shift
+
+ while [[ ${1} ]]; do
+ wrap_create_virt_ip_constraints ${cibfile} ${1} ${HA_SERVERS}
+ shift
+ done
+}
+
+
+setup_create_resources()
+{
+ local cibfile=$(mktemp -u)
+
+ # fixup /var/lib/nfs
+ logger "pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} ${PCS9OR10_PCS_CLONE_OPTION}"
+ pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} ${PCS9OR10_PCS_CLONE_OPTION}
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} ${PCS9OR10_PCS_CLONE_OPTION} failed"
+ fi
+
+ pcs resource create nfs-mon ocf:heartbeat:ganesha_mon ${PCS9OR10_PCS_CLONE_OPTION}
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs resource create nfs-mon ocf:heartbeat:ganesha_mon ${PCS9OR10_PCS_CLONE_OPTION} failed"
+ fi
+
+ # see comment in (/usr/lib/ocf/resource.d/heartbeat/ganesha_grace
+ # start method. Allow time for ganesha_mon to start and set the
+ # ganesha-active crm_attribute
+ sleep 5
+
+ pcs resource create nfs-grace ocf:heartbeat:ganesha_grace ${PCS9OR10_PCS_CLONE_OPTION} notify=true
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs resource create nfs-grace ocf:heartbeat:ganesha_grace ${PCS9OR10_PCS_CLONE_OPTION} failed"
+ fi
+
+ pcs constraint location nfs-grace-clone rule score=-INFINITY grace-active ne 1
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs constraint location nfs-grace-clone rule score=-INFINITY grace-active ne 1"
+ fi
+
+ pcs cluster cib ${cibfile}
+
+ while [[ ${1} ]]; do
+
+ # this is variable indirection
+ # from a nvs like 'VIP_host1=10.7.6.5' or 'VIP_host1="10.7.6.5"'
+ # (or VIP_host-1=..., or VIP_host-1.my.domain.name=...)
+ # a variable 'clean_name' is created (e.g. w/ value 'VIP_host_1')
+ # and a clean nvs (e.g. w/ value 'VIP_host_1="10_7_6_5"')
+ # after the `eval ${clean_nvs}` there is a variable VIP_host_1
+ # with the value '10_7_6_5', and the following \$$ magic to
+ # reference it, i.e. `eval tmp_ipaddr=\$${clean_name}` gives us
+ # ${tmp_ipaddr} with 10_7_6_5 and then convert the _s back to .s
+ # to give us ipaddr="10.7.6.5". whew!
+ name="VIP_${1}"
+ clean_name=${name//[-.]/_}
+ nvs=$(grep "^${name}=" ${HA_CONFDIR}/ganesha-ha.conf)
+ clean_nvs=${nvs//[-.]/_}
+ eval ${clean_nvs}
+ eval tmp_ipaddr=\$${clean_name}
+ ipaddr=${tmp_ipaddr//_/.}
+
+ pcs -f ${cibfile} resource create ${1}-nfs_block ocf:heartbeat:portblock protocol=tcp \
+ portno=2049 action=block ip=${ipaddr} --group ${1}-group
+ if [ $? -ne 0 ]; then
+ logger "warning pcs resource create ${1}-nfs_block failed"
+ fi
+ pcs -f ${cibfile} resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} \
+ cidr_netmask=32 op monitor interval=15s --group ${1}-group --after ${1}-nfs_block
+ if [ $? -ne 0 ]; then
+ logger "warning pcs resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} \
+ cidr_netmask=32 op monitor interval=15s failed"
+ fi
+
+ pcs -f ${cibfile} constraint order nfs-grace-clone then ${1}-cluster_ip-1
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs constraint order nfs-grace-clone then ${1}-cluster_ip-1 failed"
+ fi
+
+ pcs -f ${cibfile} resource create ${1}-nfs_unblock ocf:heartbeat:portblock protocol=tcp \
+ portno=2049 action=unblock ip=${ipaddr} reset_local_on_unblock_stop=true \
+ tickle_dir=${HA_VOL_MNT}/nfs-ganesha/tickle_dir/ --group ${1}-group --after ${1}-cluster_ip-1 \
+ op stop timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} op start timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} \
+ op monitor interval=10s timeout=${PORTBLOCK_UNBLOCK_TIMEOUT}
+ if [ $? -ne 0 ]; then
+ logger "warning pcs resource create ${1}-nfs_unblock failed"
+ fi
+
+
+ shift
+ done
+
+ create_virt_ip_constraints ${cibfile} ${HA_SERVERS}
+
+ pcs cluster cib-push ${cibfile}
+ if [ $? -ne 0 ]; then
+ logger "warning pcs cluster cib-push ${cibfile} failed"
+ fi
+ rm -f ${cibfile}
+}
+
+
+teardown_resources()
+{
+ # local mntpt=$(grep ha-vol-mnt ${HA_CONFIG_FILE} | cut -d = -f 2)
+
+ # restore /var/lib/nfs
+ logger "notice: pcs resource delete nfs_setup-clone"
+ pcs resource delete nfs_setup-clone
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs resource delete nfs_setup-clone failed"
+ fi
+
+ # delete -clone resource agents
+ # in particular delete the ganesha monitor so we don't try to
+ # trigger anything when we shut down ganesha next.
+ pcs resource delete nfs-mon-clone
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs resource delete nfs-mon-clone failed"
+ fi
+
+ pcs resource delete nfs-grace-clone
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs resource delete nfs-grace-clone failed"
+ fi
+
+ while [[ ${1} ]]; do
+ pcs resource delete ${1}-group
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs resource delete ${1}-group failed"
+ fi
+ shift
+ done
+
+}
+
+
+recreate_resources()
+{
+ local cibfile=${1}; shift
+
+ while [[ ${1} ]]; do
+ # this is variable indirection
+ # see the comment on the same a few lines up
+ name="VIP_${1}"
+ clean_name=${name//[-.]/_}
+ nvs=$(grep "^${name}=" ${HA_CONFDIR}/ganesha-ha.conf)
+ clean_nvs=${nvs//[-.]/_}
+ eval ${clean_nvs}
+ eval tmp_ipaddr=\$${clean_name}
+ ipaddr=${tmp_ipaddr//_/.}
+
+ pcs -f ${cibfile} resource create ${1}-nfs_block ocf:heartbeat:portblock protocol=tcp \
+ portno=2049 action=block ip=${ipaddr} --group ${1}-group
+ if [ $? -ne 0 ]; then
+ logger "warning pcs resource create ${1}-nfs_block failed"
+ fi
+ pcs -f ${cibfile} resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} \
+ cidr_netmask=32 op monitor interval=15s --group ${1}-group --after ${1}-nfs_block
+ if [ $? -ne 0 ]; then
+ logger "warning pcs resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} \
+ cidr_netmask=32 op monitor interval=15s failed"
+ fi
+
+ pcs -f ${cibfile} constraint order nfs-grace-clone then ${1}-cluster_ip-1
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs constraint order nfs-grace-clone then ${1}-cluster_ip-1 failed"
+ fi
+
+ pcs -f ${cibfile} resource create ${1}-nfs_unblock ocf:heartbeat:portblock protocol=tcp \
+ portno=2049 action=unblock ip=${ipaddr} reset_local_on_unblock_stop=true \
+ tickle_dir=${HA_VOL_MNT}/nfs-ganesha/tickle_dir/ --group ${1}-group --after ${1}-cluster_ip-1 \
+ op stop timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} op start timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} \
+ op monitor interval=10s timeout=${PORTBLOCK_UNBLOCK_TIMEOUT}
+ if [ $? -ne 0 ]; then
+ logger "warning pcs resource create ${1}-nfs_unblock failed"
+ fi
+
+ shift
+ done
+}
+
+
+addnode_recreate_resources()
+{
+ local cibfile=${1}; shift
+ local add_node=${1}; shift
+ local add_vip=${1}; shift
+
+ recreate_resources ${cibfile} ${HA_SERVERS}
+
+ pcs -f ${cibfile} resource create ${add_node}-nfs_block ocf:heartbeat:portblock \
+ protocol=tcp portno=2049 action=block ip=${add_vip} --group ${add_node}-group
+ if [ $? -ne 0 ]; then
+ logger "warning pcs resource create ${add_node}-nfs_block failed"
+ fi
+ pcs -f ${cibfile} resource create ${add_node}-cluster_ip-1 ocf:heartbeat:IPaddr \
+ ip=${add_vip} cidr_netmask=32 op monitor interval=15s --group ${add_node}-group \
+ --after ${add_node}-nfs_block
+ if [ $? -ne 0 ]; then
+ logger "warning pcs resource create ${add_node}-cluster_ip-1 ocf:heartbeat:IPaddr \
+ ip=${add_vip} cidr_netmask=32 op monitor interval=15s failed"
+ fi
+
+ pcs -f ${cibfile} constraint order nfs-grace-clone then ${add_node}-cluster_ip-1
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs constraint order nfs-grace-clone then ${add_node}-cluster_ip-1 failed"
+ fi
+ pcs -f ${cibfile} resource create ${add_node}-nfs_unblock ocf:heartbeat:portblock \
+ protocol=tcp portno=2049 action=unblock ip=${add_vip} reset_local_on_unblock_stop=true \
+ tickle_dir=${HA_VOL_MNT}/nfs-ganesha/tickle_dir/ --group ${add_node}-group --after \
+ ${add_node}-cluster_ip-1 op stop timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} op start \
+ timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} op monitor interval=10s \
+ timeout=${PORTBLOCK_UNBLOCK_TIMEOUT}
+ if [ $? -ne 0 ]; then
+ logger "warning pcs resource create ${add_node}-nfs_unblock failed"
+ fi
+}
+
+
+clear_resources()
+{
+ local cibfile=${1}; shift
+
+ while [[ ${1} ]]; do
+ pcs -f ${cibfile} resource delete ${1}-group
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs -f ${cibfile} resource delete ${1}-group"
+ fi
+
+ shift
+ done
+}
+
+
+addnode_create_resources()
+{
+ local add_node=${1}; shift
+ local add_vip=${1}; shift
+ local cibfile=$(mktemp -u)
+
+ # start HA on the new node
+ pcs cluster start ${add_node}
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs cluster start ${add_node} failed"
+ fi
+
+ pcs cluster cib ${cibfile}
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs cluster cib ${cibfile} failed"
+ fi
+
+ # delete all the -cluster_ip-1 resources, clearing
+ # their constraints, then create them again so we can
+ # recompute their constraints
+ clear_resources ${cibfile} ${HA_SERVERS}
+ addnode_recreate_resources ${cibfile} ${add_node} ${add_vip}
+
+ HA_SERVERS="${HA_SERVERS} ${add_node}"
+ create_virt_ip_constraints ${cibfile} ${HA_SERVERS}
+
+ pcs cluster cib-push ${cibfile}
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs cluster cib-push ${cibfile} failed"
+ fi
+ rm -f ${cibfile}
+}
+
+
+deletenode_delete_resources()
+{
+ local node=${1}; shift
+ local ha_servers=$(echo "${HA_SERVERS}" | sed s/${node}//)
+ local cibfile=$(mktemp -u)
+
+ pcs cluster cib ${cibfile}
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs cluster cib ${cibfile} failed"
+ fi
+
+ # delete all the -cluster_ip-1 and -trigger_ip-1 resources,
+ # clearing their constraints, then create them again so we can
+ # recompute their constraints
+ clear_resources ${cibfile} ${HA_SERVERS}
+ recreate_resources ${cibfile} ${ha_servers}
+ HA_SERVERS=$(echo "${ha_servers}" | sed -e "s/ / /")
+
+ create_virt_ip_constraints ${cibfile} ${HA_SERVERS}
+
+ pcs cluster cib-push ${cibfile}
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs cluster cib-push ${cibfile} failed"
+ fi
+ rm -f ${cibfile}
+
+}
+
+
+deletenode_update_haconfig()
+{
+ local name="VIP_${1}"
+ local clean_name=${name//[-.]/_}
+
+ ha_servers=$(echo ${HA_SERVERS} | sed -e "s/ /,/")
+ sed -i -e "s/^HA_CLUSTER_NODES=.*$/HA_CLUSTER_NODES=\"${ha_servers// /,}\"/" -e "s/^${name}=.*$//" -e "/^$/d" ${HA_CONFDIR}/ganesha-ha.conf
+}
+
+
+setup_state_volume()
+{
+ local mnt=${HA_VOL_MNT}
+ local longname=""
+ local shortname=""
+ local dname=""
+ local dirname=""
+
+ longname=$(hostname)
+ dname=${longname#$(hostname -s)}
+
+ while [[ ${1} ]]; do
+
+ if [[ ${1} == *${dname} ]]; then
+ dirname=${1}
+ else
+ dirname=${1}${dname}
+ fi
+
+ if [ ! -d ${mnt}/nfs-ganesha/tickle_dir ]; then
+ mkdir ${mnt}/nfs-ganesha/tickle_dir
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname} ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd
+ chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd
+ fi
+ if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/state ]; then
+ touch ${mnt}/nfs-ganesha/${dirname}/nfs/state
+ chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/state
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4old ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4old
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm
+ chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak
+ chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak
+ fi
+ if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state ]; then
+ touch ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state
+ fi
+ for server in ${HA_SERVERS} ; do
+ if [[ ${server} != ${dirname} ]]; then
+ ln -s ${mnt}/nfs-ganesha/${server}/nfs/ganesha ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/${server}
+ ln -s ${mnt}/nfs-ganesha/${server}/nfs/statd ${mnt}/nfs-ganesha/${dirname}/nfs/statd/${server}
+ fi
+ done
+ shift
+ done
+
+}
+
+
+enable_pacemaker()
+{
+ while [[ ${1} ]]; do
+ if [[ "${SERVICE_MAN}" == "/bin/systemctl" ]]; then
+ ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \
+${SECRET_PEM} root@${1} "${SERVICE_MAN} enable pacemaker"
+ else
+ ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \
+${SECRET_PEM} root@${1} "${SERVICE_MAN} pacemaker enable"
+ fi
+ shift
+ done
+}
+
+
+addnode_state_volume()
+{
+ local newnode=${1}; shift
+ local mnt=${HA_VOL_MNT}
+ local longname=""
+ local dname=""
+ local dirname=""
+
+ longname=$(hostname)
+ dname=${longname#$(hostname -s)}
+
+ if [[ ${newnode} == *${dname} ]]; then
+ dirname=${newnode}
+ else
+ dirname=${newnode}${dname}
+ fi
+
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname} ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd
+ chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd
+ fi
+ if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/state ]; then
+ touch ${mnt}/nfs-ganesha/${dirname}/nfs/state
+ chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/state
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4old ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4old
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm
+ chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak
+ chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak
+ fi
+ if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state ]; then
+ touch ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state
+ fi
+
+ for server in ${HA_SERVERS} ; do
+
+ if [[ ${server} != ${dirname} ]]; then
+ ln -s ${mnt}/nfs-ganesha/${server}/nfs/ganesha ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/${server}
+ ln -s ${mnt}/nfs-ganesha/${server}/nfs/statd ${mnt}/nfs-ganesha/${dirname}/nfs/statd/${server}
+
+ ln -s ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha ${mnt}/nfs-ganesha/${server}/nfs/ganesha/${dirname}
+ ln -s ${mnt}/nfs-ganesha/${dirname}/nfs/statd ${mnt}/nfs-ganesha/${server}/nfs/statd/${dirname}
+ fi
+ done
+
+}
+
+
+delnode_state_volume()
+{
+ local delnode=${1}; shift
+ local mnt=${HA_VOL_MNT}
+ local longname=""
+ local dname=""
+ local dirname=""
+
+ longname=$(hostname)
+ dname=${longname#$(hostname -s)}
+
+ if [[ ${delnode} == *${dname} ]]; then
+ dirname=${delnode}
+ else
+ dirname=${delnode}${dname}
+ fi
+
+ rm -rf ${mnt}/nfs-ganesha/${dirname}
+
+ for server in ${HA_SERVERS} ; do
+ if [[ ${server} != ${dirname} ]]; then
+ rm -f ${mnt}/nfs-ganesha/${server}/nfs/ganesha/${dirname}
+ rm -f ${mnt}/nfs-ganesha/${server}/nfs/statd/${dirname}
+ fi
+ done
+}
+
+
+status()
+{
+ local scratch=$(mktemp)
+ local regex_str="^${1}-cluster_ip-1"
+ local healthy=0
+ local index=1
+ local nodes
+
+ # change tabs to spaces, strip leading spaces, including any
+ # new '*' at the beginning of a line introduced in pcs-0.10.x
+ pcs status | sed -e "s/\t/ /g" -e "s/^[ ]*\*//" -e "s/^[ ]*//" > ${scratch}
+
+ nodes[0]=${1}; shift
+
+ # make a regex of the configured nodes
+ # and initalize the nodes array for later
+ while [[ ${1} ]]; do
+
+ regex_str="${regex_str}|^${1}-cluster_ip-1"
+ nodes[${index}]=${1}
+ ((index++))
+ shift
+ done
+
+ # print the nodes that are expected to be online
+ grep -E "Online:" ${scratch}
+
+ echo
+
+ # print the VIPs and which node they are on
+ grep -E "${regex_str}" < ${scratch} | cut -d ' ' -f 1,4
+
+ echo
+
+ # check if the VIP and port block/unblock RAs are on the expected nodes
+ for n in ${nodes[*]}; do
+
+ grep -E -x "${n}-nfs_block \(ocf::heartbeat:portblock\): Started ${n}" > /dev/null 2>&1 ${scratch}
+ result=$?
+ ((healthy+=${result}))
+ grep -E -x "${n}-cluster_ip-1 \(ocf::heartbeat:IPaddr\): Started ${n}" > /dev/null 2>&1 ${scratch}
+ result=$?
+ ((healthy+=${result}))
+ grep -E -x "${n}-nfs_unblock \(ocf::heartbeat:portblock\): Started ${n}" > /dev/null 2>&1 ${scratch}
+ result=$?
+ ((healthy+=${result}))
+ done
+
+ grep -E "\):\ Stopped|FAILED" > /dev/null 2>&1 ${scratch}
+ result=$?
+
+ if [ ${result} -eq 0 ]; then
+ echo "Cluster HA Status: BAD"
+ elif [ ${healthy} -eq 0 ]; then
+ echo "Cluster HA Status: HEALTHY"
+ else
+ echo "Cluster HA Status: FAILOVER"
+ fi
+
+ rm -f ${scratch}
+}
+
+create_ganesha_conf_file()
+{
+ if [[ "$1" == "yes" ]];
+ then
+ if [ -e $GANESHA_CONF ];
+ then
+ rm -rf $GANESHA_CONF
+ fi
+ # The symlink /etc/ganesha/ganesha.conf need to be
+ # created using ganesha conf file mentioned in the
+ # shared storage. Every node will only have this
+ # link and actual file will stored in shared storage,
+ # so that ganesha conf editing of ganesha conf will
+ # be easy as well as it become more consistent.
+
+ ln -s $HA_CONFDIR/ganesha.conf $GANESHA_CONF
+ else
+ # Restoring previous file
+ rm -rf $GANESHA_CONF
+ cp $HA_CONFDIR/ganesha.conf $GANESHA_CONF
+ sed -r -i -e '/^%include[[:space:]]+".+\.conf"$/d' $GANESHA_CONF
+ fi
+}
+
+set_quorum_policy()
+{
+ local quorum_policy="stop"
+ local num_servers=${1}
+
+ if [ ${num_servers} -lt 3 ]; then
+ quorum_policy="ignore"
+ fi
+ pcs property set no-quorum-policy=${quorum_policy}
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs property set no-quorum-policy=${quorum_policy} failed"
+ fi
+}
+
+main()
+{
+
+ local cmd=${1}; shift
+ if [[ ${cmd} == *help ]]; then
+ usage
+ exit 0
+ fi
+
+ if (selinuxenabled) ;then
+ semanage boolean -m gluster_use_execmem --on
+ fi
+
+ local osid=""
+
+ osid=$(grep ^ID= /etc/os-release)
+ eval $(echo ${osid} | grep -F ID=)
+ osid=$(grep ^VERSION_ID= /etc/os-release)
+ eval $(echo ${osid} | grep -F VERSION_ID=)
+
+ HA_CONFDIR=${1%/}; shift
+ local ha_conf=${HA_CONFDIR}/ganesha-ha.conf
+ local node=""
+ local vip=""
+
+ # ignore any comment lines
+ cfgline=$(grep ^HA_NAME= ${ha_conf})
+ eval $(echo ${cfgline} | grep -F HA_NAME=)
+ cfgline=$(grep ^HA_CLUSTER_NODES= ${ha_conf})
+ eval $(echo ${cfgline} | grep -F HA_CLUSTER_NODES=)
+
+ case "${cmd}" in
+
+ setup | --setup)
+ logger "setting up ${HA_NAME}"
+
+ check_cluster_exists ${HA_NAME}
+
+ determine_servers "setup"
+
+ # Fedora 29+ and rhel/centos 8 has PCS-0.10.x
+ # default is pcs-0.10.x options but check for
+ # rhel/centos 7 (pcs-0.9.x) and adjust accordingly
+ if [[ ! ${ID} =~ {rhel,centos} ]]; then
+ if [[ ${VERSION_ID} == 7.* ]]; then
+ PCS9OR10_PCS_CNAME_OPTION="--name"
+ PCS9OR10_PCS_CLONE_OPTION="--clone"
+ fi
+ fi
+
+ if [[ "${HA_NUM_SERVERS}X" != "1X" ]]; then
+
+ determine_service_manager
+
+ setup_cluster ${HA_NAME} ${HA_NUM_SERVERS} "${HA_SERVERS}"
+
+ setup_create_resources ${HA_SERVERS}
+
+ setup_finalize_ha
+
+ setup_state_volume ${HA_SERVERS}
+
+ enable_pacemaker ${HA_SERVERS}
+
+ else
+
+ logger "insufficient servers for HA, aborting"
+ fi
+ ;;
+
+ teardown | --teardown)
+ logger "tearing down ${HA_NAME}"
+
+ determine_servers "teardown"
+
+ teardown_resources ${HA_SERVERS}
+
+ teardown_cluster ${HA_NAME}
+
+ cleanup_ganesha_config ${HA_CONFDIR}
+ ;;
+
+ cleanup | --cleanup)
+ cleanup_ganesha_config ${HA_CONFDIR}
+ ;;
+
+ add | --add)
+ node=${1}; shift
+ vip=${1}; shift
+
+ logger "adding ${node} with ${vip} to ${HA_NAME}"
+
+ determine_service_manager
+
+ manage_service "start" ${node}
+
+ determine_servers "add"
+
+ pcs cluster node add ${node}
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs cluster node add ${node} failed"
+ fi
+
+ addnode_create_resources ${node} ${vip}
+ # Subsequent add-node recreates resources for all the nodes
+ # that already exist in the cluster. The nodes are picked up
+ # from the entries in the ganesha-ha.conf file. Adding the
+ # newly added node to the file so that the resources specfic
+ # to this node is correctly recreated in the future.
+ clean_node=${node//[-.]/_}
+ echo "VIP_${node}=\"${vip}\"" >> ${HA_CONFDIR}/ganesha-ha.conf
+
+ NEW_NODES="$HA_CLUSTER_NODES,${node}"
+
+ sed -i s/HA_CLUSTER_NODES.*/"HA_CLUSTER_NODES=\"$NEW_NODES\""/ \
+$HA_CONFDIR/ganesha-ha.conf
+
+ addnode_state_volume ${node}
+
+ # addnode_create_resources() already appended ${node} to
+ # HA_SERVERS, so only need to increment HA_NUM_SERVERS
+ # and set quorum policy
+ HA_NUM_SERVERS=$(expr ${HA_NUM_SERVERS} + 1)
+ set_quorum_policy ${HA_NUM_SERVERS}
+ ;;
+
+ delete | --delete)
+ node=${1}; shift
+
+ logger "deleting ${node} from ${HA_NAME}"
+
+ determine_servers "delete"
+
+ deletenode_delete_resources ${node}
+
+ pcs cluster node remove ${node}
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs cluster node remove ${node} failed"
+ fi
+
+ deletenode_update_haconfig ${node}
+
+ delnode_state_volume ${node}
+
+ determine_service_manager
+
+ manage_service "stop" ${node}
+
+ HA_NUM_SERVERS=$(expr ${HA_NUM_SERVERS} - 1)
+ set_quorum_policy ${HA_NUM_SERVERS}
+ ;;
+
+ status | --status)
+ determine_servers "status"
+
+ status ${HA_SERVERS}
+ ;;
+
+ refresh-config | --refresh-config)
+ VOL=${1}
+
+ determine_servers "refresh-config"
+
+ refresh_config ${VOL} ${HA_CONFDIR} ${HA_SERVERS}
+ ;;
+
+ setup-ganesha-conf-files | --setup-ganesha-conf-files)
+
+ create_ganesha_conf_file ${1}
+ ;;
+
+ *)
+ # setup and teardown are not intended to be used by a
+ # casual user
+ usage
+ logger "Usage: ganesha-ha.sh add|delete|status"
+ ;;
+
+ esac
+
+ if (selinuxenabled) ;then
+ semanage boolean -m gluster_use_execmem --off
+ fi
+}
+
+main $*
diff --git a/extras/ganesha/scripts/generate-epoch.py b/extras/ganesha/scripts/generate-epoch.py
new file mode 100755
index 00000000000..77af014bab9
--- /dev/null
+++ b/extras/ganesha/scripts/generate-epoch.py
@@ -0,0 +1,48 @@
+#!/usr/bin/python3
+#
+# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+# Generates unique epoch value on each gluster node to be used by
+# nfs-ganesha service on that node.
+#
+# Configure 'EPOCH_EXEC' option to this script path in
+# '/etc/sysconfig/ganesha' file used by nfs-ganesha service.
+#
+# Construct epoch as follows -
+# first 32-bit contains the now() time
+# rest 32-bit value contains the local glusterd node uuid
+
+import time
+import binascii
+
+# Calculate the now() time into a 64-bit integer value
+def epoch_now():
+ epoch_time = int(time.mktime(time.localtime())) << 32
+ return epoch_time
+
+# Read glusterd UUID and extract first 32-bit of it
+def epoch_uuid():
+ file_name = '/var/lib/glusterd/glusterd.info'
+
+ for line in open(file_name):
+ if "UUID" in line:
+ glusterd_uuid = line.split('=')[1].strip()
+
+ uuid_bin = binascii.unhexlify(glusterd_uuid.replace("-",""))
+
+ epoch_uuid = int(binascii.hexlify(uuid_bin), 32) & 0xFFFF0000
+ return epoch_uuid
+
+# Construct epoch as follows -
+# first 32-bit contains the now() time
+# rest 32-bit value contains the local glusterd node uuid
+epoch = (epoch_now() | epoch_uuid())
+print((str(epoch)))
+
+exit(0)
diff --git a/extras/generate-xdr-files.sh b/extras/generate-xdr-files.sh
deleted file mode 100755
index e52321cd362..00000000000
--- a/extras/generate-xdr-files.sh
+++ /dev/null
@@ -1,107 +0,0 @@
-#!/bin/sh
-
-_init ()
-{
- xfile="$1";
- # TODO: check the validity of .x file
-
- cfile="${1%.x}.c";
- hfile="${1%.x}.h";
-
- tmp_cfile="$1.c";
-
- tmp1_hfile="$1.h.tmp";
- tmp1_cfile="$1.c.tmp";
-
-}
-
-append_licence_header ()
-{
- src_file=$1;
- dst_file=$2;
-
- cat >$dst_file <<EOF
-/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include "xdr-common.h"
-#include "compat.h"
-
-#if defined(__GNUC__)
-#if __GNUC__ >= 4
-#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
-#endif
-#endif
-
-EOF
-
- cat $src_file >> $dst_file;
-
-}
-
-main ()
-{
- if [ $# -ne 1 ]; then
- echo "wrong number of arguments given"
- echo " $0 <XDR-definition-file>.x"
- exit 1;
- fi
-
-
- echo -n "writing the XDR routine file ($tmp_cfile) ... ";
- rm -f $tmp_cfile;
- rpcgen -c -o $tmp_cfile $xfile;
-
- # get rid of warnings in xdr .c file due to "unused variable 'buf'"
- sed -i -e 's:buf;$:buf;\
- buf = NULL;:' $tmp_cfile;
-
- sed -i '/int i;/d' $tmp_cfile;
-
- echo "OK";
-
- # no need for a temporary file here as there are no changes from glusterfs
- echo -n "writing the XDR header file ($hfile) ... ";
- rm -f $hfile;
- rpcgen -h -o $hfile $xfile;
-
- # the '#ifdef' part of file should be fixed
- sed -i -e 's/-/_/g' $hfile;
-
- echo "OK";
-
- echo -n "writing licence header to the generated files ... ";
- # Write header to temp file and append generated file
- append_licence_header $hfile $tmp1_hfile;
- append_licence_header $tmp_cfile $tmp1_cfile;
- echo "OK"
-
- # now move the destination file to actual original file
- echo -n "updating existing files ... ";
- mv $tmp1_hfile $hfile;
- mv $tmp1_cfile $cfile;
-
- # remove unwanted temporary files (if any)
- rm -f $tmp_cfile $tmp1_cfile $tmp1_hfile
-
- echo "OK"
-
-}
-
-_init "$@" && main "$@";
diff --git a/extras/geo-rep/Makefile.am b/extras/geo-rep/Makefile.am
new file mode 100644
index 00000000000..09eff308ac4
--- /dev/null
+++ b/extras/geo-rep/Makefile.am
@@ -0,0 +1,16 @@
+scriptsdir = $(libexecdir)/glusterfs/scripts
+scripts_SCRIPTS = gsync-upgrade.sh generate-gfid-file.sh get-gfid.sh \
+ slave-upgrade.sh schedule_georep.py
+
+scripts_PROGRAMS = gsync-sync-gfid
+gsync_sync_gfid_CFLAGS = $(GF_CFLAGS) -Wall -I$(top_srcdir)/libglusterfs/src
+gsync_sync_gfid_LDFLAGS = $(GF_LDFLAGS)
+gsync_sync_gfid_LDADD = $(GF_LDADD) $(top_builddir)/libglusterfs/src/libglusterfs.la
+gsync_sync_gfid_SOURCES = gsync-sync-gfid.c
+gsync_sync_gfid_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
+
+EXTRA_DIST = gsync-sync-gfid.c gsync-upgrade.sh generate-gfid-file.sh \
+ get-gfid.sh slave-upgrade.sh schedule_georep.py.in
+
+CLEANFILES = schedule_georep.py
diff --git a/extras/geo-rep/generate-gfid-file.sh b/extras/geo-rep/generate-gfid-file.sh
new file mode 100644
index 00000000000..14f104b986d
--- /dev/null
+++ b/extras/geo-rep/generate-gfid-file.sh
@@ -0,0 +1,70 @@
+#!/bin/bash
+#Usage: generate-gfid-file.sh <master-volfile-server:master-volume> <path-to-get-gfid.sh> <output-file> [dirs-list-file]
+
+function get_gfids()
+{
+ GET_GFID_CMD=$1
+ OUTPUT_FILE=$2
+ DIR_PATH=$3
+ find "$DIR_PATH" -exec $GET_GFID_CMD {} \; >> $OUTPUT_FILE
+}
+
+function mount_client()
+{
+ local T; # temporary mount
+ local i; # inode number
+
+ VOLFILE_SERVER=$1;
+ VOLUME=$2;
+ GFID_CMD=$3;
+ OUTPUT=$4;
+
+ T=$(mktemp -d -t ${0##*/}.XXXXXX);
+
+ glusterfs -s $VOLFILE_SERVER --volfile-id $VOLUME $T;
+
+ i=$(stat -c '%i' $T);
+
+ [ "x$i" = "x1" ] || fatal "could not mount volume $MASTER on $T";
+
+ cd $T;
+ rm -f $OUTPUT;
+ touch $OUTPUT;
+
+ if [ "$DIRS_FILE" = "." ]
+ then
+ get_gfids $GFID_CMD $OUTPUT "."
+ else
+ while read line
+ do
+ get_gfids $GFID_CMD $OUTPUT "$line"
+ done < $DIRS_FILE
+ fi;
+
+ cd -;
+
+ umount $T || fatal "could not umount $MASTER from $T";
+
+ rmdir $T || warn "rmdir of $T failed";
+}
+
+
+function main()
+{
+ SLAVE=$1
+ GET_GFID_CMD=$2
+ OUTPUT=$3
+
+ VOLFILE_SERVER=`echo $SLAVE | sed -e 's/\(.*\):.*/\1/'`
+ VOLUME_NAME=`echo $SLAVE | sed -e 's/.*:\(.*\)/\1/'`
+
+ if [ "$#" -lt 4 ]
+ then
+ DIRS_FILE="."
+ else
+ DIRS_FILE=$4
+ fi
+ mount_client $VOLFILE_SERVER $VOLUME_NAME $GET_GFID_CMD $OUTPUT $DIRS_FILE
+}
+
+main "$@";
diff --git a/extras/geo-rep/get-gfid.sh b/extras/geo-rep/get-gfid.sh
new file mode 100755
index 00000000000..a4d609b0bc5
--- /dev/null
+++ b/extras/geo-rep/get-gfid.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+ATTR_STR=`getfattr -h $1 -n glusterfs.gfid.string`
+GLFS_PATH=`echo $ATTR_STR | sed -e 's/# file: \(.*\) glusterfs.gfid.string*/\1/g'`
+GFID=`echo $ATTR_STR | sed -e 's/.*glusterfs.gfid.string="\(.*\)"/\1/g'`
+
+echo "$GFID $GLFS_PATH"
diff --git a/extras/geo-rep/gsync-sync-gfid.c b/extras/geo-rep/gsync-sync-gfid.c
new file mode 100644
index 00000000000..47dca0413e9
--- /dev/null
+++ b/extras/geo-rep/gsync-sync-gfid.c
@@ -0,0 +1,109 @@
+
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <limits.h>
+#include <sys/types.h>
+#include <libgen.h>
+#include <ctype.h>
+#include <stdlib.h>
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/syscall.h>
+
+#ifndef UUID_CANONICAL_FORM_LEN
+#define UUID_CANONICAL_FORM_LEN 36
+#endif
+
+#ifndef GF_FUSE_AUX_GFID_HEAL
+#define GF_FUSE_AUX_GFID_HEAL "glusterfs.gfid.heal"
+#endif
+
+#define GLFS_LINE_MAX (PATH_MAX + (2 * UUID_CANONICAL_FORM_LEN))
+
+int
+main(int argc, char *argv[])
+{
+ char *file = NULL;
+ char *tmp = NULL;
+ char *tmp1 = NULL;
+ char *parent_dir = NULL;
+ char *gfid = NULL;
+ char *bname = NULL;
+ int ret = -1;
+ int len = 0;
+ FILE *fp = NULL;
+ char line[GLFS_LINE_MAX] = {
+ 0,
+ };
+ char *path = NULL;
+ void *blob = NULL;
+ void *tmp_blob = NULL;
+
+ if (argc != 2) {
+ /* each line in the file has the following format
+ * uuid-in-canonical-form path-relative-to-gluster-mount.
+ * Both uuid and relative path are from master mount.
+ */
+ fprintf(stderr, "usage: %s <file-of-paths-to-be-synced>\n", argv[0]);
+ goto out;
+ }
+
+ file = argv[1];
+
+ fp = fopen(file, "r");
+ if (fp == NULL) {
+ fprintf(stderr, "cannot open %s for reading (%s)\n", file,
+ strerror(errno));
+ goto out;
+ }
+
+ while (fgets(line, GLFS_LINE_MAX, fp) != NULL) {
+ tmp = line;
+ path = gfid = line;
+
+ path += UUID_CANONICAL_FORM_LEN + 1;
+
+ while (isspace(*path))
+ path++;
+
+ len = strlen(line);
+ if ((len < GLFS_LINE_MAX) && (line[len - 1] == '\n'))
+ line[len - 1] = '\0';
+
+ line[UUID_CANONICAL_FORM_LEN] = '\0';
+
+ tmp = strdup(path);
+ tmp1 = strdup(path);
+ parent_dir = dirname(tmp);
+ bname = basename(tmp1);
+
+ /* gfid + '\0' + bname + '\0' */
+ len = UUID_CANONICAL_FORM_LEN + 1 + strlen(bname) + 1;
+
+ blob = malloc(len);
+
+ memcpy(blob, gfid, UUID_CANONICAL_FORM_LEN);
+
+ tmp_blob = blob + UUID_CANONICAL_FORM_LEN + 1;
+
+ memcpy(tmp_blob, bname, strlen(bname));
+
+ ret = sys_lsetxattr(parent_dir, GF_FUSE_AUX_GFID_HEAL, blob, len, 0);
+ if (ret < 0) {
+ fprintf(stderr, "setxattr on %s/%s failed (%s)\n", parent_dir,
+ bname, strerror(errno));
+ }
+ memset(line, 0, GLFS_LINE_MAX);
+
+ free(blob);
+ free(tmp);
+ free(tmp1);
+ blob = NULL;
+ }
+
+ ret = 0;
+out:
+ if (fp)
+ fclose(fp);
+ return ret;
+}
diff --git a/extras/geo-rep/gsync-upgrade.sh b/extras/geo-rep/gsync-upgrade.sh
new file mode 100644
index 00000000000..0f73a33884b
--- /dev/null
+++ b/extras/geo-rep/gsync-upgrade.sh
@@ -0,0 +1,123 @@
+#!/bin/bash
+#usage: gsync-upgrade.sh <slave-volfile-server:slave-volume> <gfid-file>
+# <path-to-gsync-sync-gfid> <ssh-identity-file>
+#<slave-volfile-server>: a machine on which gluster cli can fetch slave volume info.
+# slave-volfile-server defaults to localhost.
+#
+#<gfid-file>: a file containing paths and their associated gfids
+# on master. The paths are relative to master mount point
+# (not absolute). An example extract of <gfid-file> can be,
+#
+# <extract>
+# 22114455-57c5-46e9-a783-c40f83a72b09 /dir
+# 25772386-3eb8-4550-a802-c3fdc938ca80 /dir/file
+# </extract>
+#
+#<ssh-identity-file>: file from which the identity (private key) for public key authentication is read.
+
+SLAVE_MOUNT='/tmp/glfs_slave'
+
+function SSH()
+{
+ HOST=$1
+ SSHKEY=$2
+
+ shift 2
+
+ ssh -qi $SSHKEY \
+ -oPasswordAuthentication=no \
+ -oStrictHostKeyChecking=no \
+ "$HOST" "$@";
+}
+
+function get_bricks()
+{
+ SSHKEY=$3
+
+ SSH $1 $SSHKEY "gluster volume info $2" | grep -E 'Brick[0-9]+' | sed -e 's/[^:]*:\(.*\)/\1/g'
+}
+
+function cleanup_brick()
+{
+ HOST=$1
+ BRICK=$2
+ SSHKEY=$3
+
+ # TODO: write a C program to receive a list of files and does cleanup on
+ # them instead of spawning a new setfattr process for each file if
+ # performance is bad.
+ SSH -i $SSHKEY $HOST "rm -rf $BRICK/.glusterfs/* && find $BRICK -exec setfattr -x trusted.gfid {} \;"
+}
+
+function cleanup_slave()
+{
+ SSHKEY=$2
+
+ VOLFILE_SERVER=`echo $1 | sed -e 's/\(.*\):.*/\1/'`
+ VOLUME_NAME=`echo $1 | sed -e 's/.*:\(.*\)/\1/'`
+
+ BRICKS=`get_bricks $VOLFILE_SERVER $VOLUME_NAME $SSHKEY`
+
+ for i in $BRICKS; do
+ HOST=`echo $i | sed -e 's/\(.*\):.*/\1/'`
+ BRICK=`echo $i | sed -e 's/.*:\(.*\)/\1/'`
+ cleanup_brick $HOST $BRICK $SSHKEY
+ done
+
+ SSH -i $SSHKEY $VOLFILE_SERVER "gluster --mode=script volume stop $VOLUME_NAME; gluster volume start $VOLUME_NAME";
+
+}
+
+function mount_client()
+{
+ local T; # temporary mount
+ local i; # inode number
+ GFID_FILE=$3
+ SYNC_CMD=$4
+
+ T=$(mktemp -d -t ${0##*/}.XXXXXX);
+
+ glusterfs --aux-gfid-mount -s $1 --volfile-id $2 $T;
+
+ i=$(stat -c '%i' $T);
+
+ [ "x$i" = "x1" ] || fatal "could not mount volume $MASTER on $T";
+
+ cd $T;
+
+ $SYNC_CMD $GFID_FILE
+
+ cd -;
+
+ umount -l $T || fatal "could not umount $MASTER from $T";
+
+ rmdir $T || warn "rmdir of $T failed";
+}
+
+function sync_gfids()
+{
+ SLAVE=$1
+ GFID_FILE=$2
+
+ SLAVE_VOLFILE_SERVER=`echo $SLAVE | sed -e 's/\(.*\):.*/\1/'`
+ SLAVE_VOLUME_NAME=`echo $SLAVE | sed -e 's/.*:\(.*\)/\1/'`
+
+ if [ "x$SLAVE_VOLFILE_SERVER" = "x" ]; then
+ SLAVE_VOLFILE_SERVER="localhost"
+ fi
+
+ mount_client $SLAVE_VOLFILE_SERVER $SLAVE_VOLUME_NAME $GFID_FILE $3
+}
+
+function upgrade()
+{
+ SLAVE=$1
+ GFID_FILE=$2
+ SYNC_CMD=$3
+ SSHKEY=$4
+
+ cleanup_slave $SLAVE $SSHKEY
+ sync_gfids $SLAVE $GFID_FILE $SYNC_CMD
+}
+
+upgrade "$@"
diff --git a/extras/geo-rep/schedule_georep.py.in b/extras/geo-rep/schedule_georep.py.in
new file mode 100644
index 00000000000..48b2b507060
--- /dev/null
+++ b/extras/geo-rep/schedule_georep.py.in
@@ -0,0 +1,492 @@
+#!/usr/bin/python3
+"""
+Schedule Geo-replication
+------------------------
+A tool to run Geo-replication when required. This can be used to
+schedule the Geo-replication to run once in a day using
+
+ # Run daily at 08:30pm
+ 30 20 * * * root python /usr/share/glusterfs/scripts/schedule_georep.py \\
+ --no-color gv1 fvm1 gv2 >> /var/log/glusterfs/schedule_georep.log 2>&1
+
+This tool does the following,
+
+1. Stop Geo-replication if Started
+2. Start Geo-replication
+3. Set Checkpoint
+4. Check the Status and see Checkpoint is Complete.(LOOP)
+5. If checkpoint complete, Stop Geo-replication
+
+Usage:
+
+ python /usr/share/glusterfs/scripts/schedule_georep.py <MASTERVOL> \\
+ <SLAVEHOST> <SLAVEVOL>
+
+For example,
+
+ python /usr/share/glusterfs/scripts/schedule_georep.py gv1 fvm1 gv2
+
+"""
+import subprocess
+import time
+import xml.etree.cElementTree as etree
+import sys
+from contextlib import contextmanager
+import tempfile
+import os
+from argparse import ArgumentParser, RawDescriptionHelpFormatter
+
+ParseError = etree.ParseError if hasattr(etree, 'ParseError') else SyntaxError
+cache_data = {}
+
+SESSION_MOUNT_LOG_FILE = ("/var/log/glusterfs/geo-replication"
+ "/schedule_georep.mount.log")
+
+USE_CLI_COLOR = True
+mnt_list = []
+
+class GlusterBadXmlFormat(Exception):
+ """
+ Exception class for XML Parse Errors
+ """
+ pass
+
+
+def output_notok(msg, err="", exitcode=1):
+ if USE_CLI_COLOR:
+ out = "\033[31m[NOT OK]\033[0m {0}\n{1}\n"
+ else:
+ out = "[NOT OK] {0}\n{1}\n"
+ sys.stderr.write(out.format(msg, err))
+ sys.exit(exitcode)
+
+
+def output_warning(msg):
+ if USE_CLI_COLOR:
+ out = "\033[33m[ WARN]\033[0m {0}\n"
+ else:
+ out = "[ WARN] {0}\n"
+ sys.stderr.write(out.format(msg))
+
+
+def output_ok(msg):
+ if USE_CLI_COLOR:
+ out = "\033[32m[ OK]\033[0m {0}\n"
+ else:
+ out = "[ OK] {0}\n"
+ sys.stderr.write(out.format(msg))
+
+
+def execute(cmd, success_msg="", failure_msg="", exitcode=-1):
+ """
+ Generic wrapper to execute the CLI commands. Returns Output if success.
+ On success it can print message in stdout if specified.
+ On failure, exits after writing to stderr.
+ """
+ p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
+ out, err = p.communicate()
+ if p.returncode == 0:
+ if success_msg:
+ output_ok(success_msg)
+ return out
+ else:
+ if exitcode == 0:
+ return
+ err_msg = err if err else out
+ output_notok(failure_msg, err=err_msg, exitcode=exitcode)
+
+
+def cache_output_with_args(func):
+ """
+ Decorator function to remember the output of any function
+ """
+ def wrapper(*args, **kwargs):
+ global cache_data
+ key = "_".join([func.func_name] + list(args))
+ if cache_data.get(key, None) is None:
+ cache_data[key] = func(*args, **kwargs)
+
+ return cache_data[key]
+ return wrapper
+
+
+def cleanup(hostname, volname, mnt):
+ """
+ Unmount the Volume and Remove the temporary directory
+ """
+ execute(["umount", "-l", mnt],
+ failure_msg="Unable to Unmount Gluster Volume "
+ "{0}:{1}(Mounted at {2})".format(hostname, volname, mnt))
+ execute(["rmdir", mnt],
+ failure_msg="Unable to Remove temp directory "
+ "{0}".format(mnt), exitcode=0)
+
+
+@contextmanager
+def glustermount(hostname, volname):
+ """
+ Context manager for Mounting Gluster Volume
+ Use as
+ with glustermount(HOSTNAME, VOLNAME) as MNT:
+ # Do your stuff
+ Automatically unmounts it in case of Exceptions/out of context
+ """
+ mnt = tempfile.mkdtemp(prefix="georepsetup_")
+ mnt_list.append(mnt)
+ execute(["@SBIN_DIR@/glusterfs",
+ "--volfile-server", hostname,
+ "--volfile-id", volname,
+ "-l", SESSION_MOUNT_LOG_FILE,
+ mnt],
+ failure_msg="Unable to Mount Gluster Volume "
+ "{0}:{1}".format(hostname, volname))
+ if os.path.ismount(mnt):
+ yield mnt
+ else:
+ output_notok("Unable to Mount Gluster Volume "
+ "{0}:{1}".format(hostname, volname))
+ cleanup(hostname, volname, mnt)
+
+
+@cache_output_with_args
+def get_bricks(volname):
+ """
+ Returns Bricks list, caches the Bricks list for a volume once
+ parsed.
+ """
+ value = []
+ cmd = ["@SBIN_DIR@/gluster", "volume", "info", volname, "--xml"]
+ info = execute(cmd)
+ try:
+ tree = etree.fromstring(info)
+ volume_el = tree.find('volInfo/volumes/volume')
+ for b in volume_el.findall('bricks/brick'):
+ value.append({"name": b.find("name").text,
+ "hostUuid": b.find("hostUuid").text})
+ except ParseError:
+ raise GlusterBadXmlFormat("Bad XML Format: %s" % " ".join(cmd))
+
+ return value
+
+
+def get_georep_status(mastervol, slave):
+ session_keys = set()
+ out = {}
+ cmd = ["@SBIN_DIR@/gluster", "volume", "geo-replication"]
+ if mastervol is not None:
+ cmd += [mastervol]
+ if slave:
+ cmd += [slave]
+
+ cmd += ["status", "--xml"]
+ info = execute(cmd)
+
+ try:
+ tree = etree.fromstring(info)
+ # Get All Sessions
+ for volume_el in tree.findall("geoRep/volume"):
+ sessions_el = volume_el.find("sessions")
+ # Master Volume name if multiple Volumes
+ mvol = volume_el.find("name").text
+
+ # For each session, collect the details
+ for session in sessions_el.findall("session"):
+ session_slave = "{0}:{1}".format(mvol, session.find(
+ "session_slave").text)
+ session_keys.add(session_slave)
+ out[session_slave] = {}
+
+ for pair in session.findall('pair'):
+ master_brick = "{0}:{1}".format(
+ pair.find("master_node").text,
+ pair.find("master_brick").text
+ )
+
+ out[session_slave][master_brick] = {
+ "mastervol": mvol,
+ "slavevol": pair.find("slave").text.split("::")[-1],
+ "master_node": pair.find("master_node").text,
+ "master_brick": pair.find("master_brick").text,
+ "slave_user": pair.find("slave_user").text,
+ "slave": pair.find("slave").text,
+ "slave_node": pair.find("slave_node").text,
+ "status": pair.find("status").text,
+ "crawl_status": pair.find("crawl_status").text,
+ "entry": pair.find("entry").text,
+ "data": pair.find("data").text,
+ "meta": pair.find("meta").text,
+ "failures": pair.find("failures").text,
+ "checkpoint_completed": pair.find(
+ "checkpoint_completed").text,
+ "master_node_uuid": pair.find("master_node_uuid").text,
+ "last_synced": pair.find("last_synced").text,
+ "checkpoint_time": pair.find("checkpoint_time").text,
+ "checkpoint_completion_time":
+ pair.find("checkpoint_completion_time").text
+ }
+ except ParseError:
+ raise GlusterBadXmlFormat("Bad XML Format: %s" % " ".join(cmd))
+
+ return session_keys, out
+
+
+def get_offline_status(volname, brick, node_uuid, slave):
+ node, brick = brick.split(":")
+ if "@" not in slave:
+ slave_user = "root"
+ else:
+ slave_user, _ = slave.split("@")
+
+ return {
+ "mastervol": volname,
+ "slavevol": slave.split("::")[-1],
+ "master_node": node,
+ "master_brick": brick,
+ "slave_user": slave_user,
+ "slave": slave,
+ "slave_node": "N/A",
+ "status": "Offline",
+ "crawl_status": "N/A",
+ "entry": "N/A",
+ "data": "N/A",
+ "meta": "N/A",
+ "failures": "N/A",
+ "checkpoint_completed": "N/A",
+ "master_node_uuid": node_uuid,
+ "last_synced": "N/A",
+ "checkpoint_time": "N/A",
+ "checkpoint_completion_time": "N/A"
+ }
+
+
+def get(mastervol=None, slave=None):
+ """
+ This function gets list of Bricks of Master Volume and collects
+ respective Geo-rep status. Output will be always ordered as the
+ bricks list in Master Volume. If Geo-rep status is not available
+ for any brick then it updates OFFLINE status.
+ """
+ out = []
+ session_keys, gstatus = get_georep_status(mastervol, slave)
+
+ for session in session_keys:
+ mvol, _, slave = session.split(":", 2)
+ slave = slave.replace("ssh://", "")
+ master_bricks = get_bricks(mvol)
+ out.append([])
+ for brick in master_bricks:
+ bname = brick["name"]
+ if gstatus.get(session) and gstatus[session].get(bname, None):
+ out[-1].append(gstatus[session][bname])
+ else:
+ out[-1].append(
+ get_offline_status(mvol, bname, brick["hostUuid"], slave))
+
+ return out
+
+
+def get_summary(mastervol, slave_url):
+ """
+ Wrapper function around Geo-rep Status and Gluster Volume Info
+ This combines the output from Bricks list and Geo-rep Status.
+ If a Master Brick node is down or Status is faulty then increments
+ the faulty counter. It also collects the checkpoint status from all
+ workers and compares with Number of Bricks.
+ """
+ down_rows = []
+ faulty_rows = []
+ out = []
+
+ status_data = get(mastervol, slave_url)
+
+ for session in status_data:
+ session_name = ""
+ summary = {
+ "active": 0,
+ "passive": 0,
+ "faulty": 0,
+ "initializing": 0,
+ "stopped": 0,
+ "created": 0,
+ "offline": 0,
+ "paused": 0,
+ "workers": 0,
+ "completed_checkpoints": 0,
+ "checkpoint": False,
+ "checkpoints_ok": False,
+ "ok": False
+ }
+
+ for row in session:
+ summary[row["status"].replace("...", "").lower()] += 1
+ summary["workers"] += 1
+ if row["checkpoint_completed"] == "Yes":
+ summary["completed_checkpoints"] += 1
+
+ session_name = "{0}=>{1}".format(
+ row["mastervol"],
+ row["slave"].replace("ssh://", "")
+ )
+
+ if row["status"] == "Faulty":
+ faulty_rows.append("{0}:{1}".format(row["master_node"],
+ row["master_brick"]))
+
+ if row["status"] == "Offline":
+ down_rows.append("{0}:{1}".format(row["master_node"],
+ row["master_brick"]))
+
+ if summary["active"] == summary["completed_checkpoints"] and \
+ summary["faulty"] == 0 and summary["offline"] == 0:
+ summary["checkpoints_ok"] = True
+
+ if summary["faulty"] == 0 and summary["offline"] == 0:
+ summary["ok"] = True
+
+ if session_name != "":
+ out.append([session_name, summary, faulty_rows, down_rows])
+
+ return out
+
+
+def touch_mount_root(mastervol):
+ # Create a Mount and Touch the Mount point root,
+ # Hack to make sure some event available after
+ # setting Checkpoint. Without this there is a chance of
+ # Checkpoint never completes.
+ with glustermount("localhost", mastervol) as mnt:
+ execute(["touch", mnt])
+
+
+def main(args):
+ turns = 1
+
+ # Stop Force
+ cmd = ["@SBIN_DIR@/gluster", "volume", "geo-replication", args.mastervol,
+ "%s::%s" % (args.slave, args.slavevol), "stop", "force"]
+ execute(cmd)
+ output_ok("Stopped Geo-replication")
+
+ # Set Checkpoint to NOW
+ cmd = ["@SBIN_DIR@/gluster", "volume", "geo-replication", args.mastervol,
+ "%s::%s" % (args.slave, args.slavevol), "config", "checkpoint",
+ "now"]
+ execute(cmd)
+ output_ok("Set Checkpoint")
+
+ # Start the Geo-replication
+ cmd = ["@SBIN_DIR@/gluster", "volume", "geo-replication", args.mastervol,
+ "%s::%s" % (args.slave, args.slavevol), "start"]
+ execute(cmd)
+ output_ok("Started Geo-replication and watching Status for "
+ "Checkpoint completion")
+
+ start_time = int(time.time())
+ duration = 0
+
+ # Sleep till Geo-rep initializes
+ time.sleep(60)
+
+ touch_mount_root(args.mastervol)
+
+ slave_url = "{0}::{1}".format(args.slave, args.slavevol)
+
+ # Loop to Check the Geo-replication Status and Checkpoint
+ # If All Status OK and all Checkpoints complete,
+ # Stop the Geo-replication and Log the Completeness
+ while True:
+ session_summary = get_summary(args.mastervol,
+ slave_url)
+ if len(session_summary) == 0:
+ # If Status command fails with another transaction error
+ # or any other error. Gluster cmd still produces XML output
+ # with different message
+ output_warning("Unable to get Geo-replication Status")
+ else:
+ session_name, summary, faulty_rows, down_rows = session_summary[0]
+ chkpt_status = "COMPLETE" if summary["checkpoints_ok"] else \
+ "NOT COMPLETE"
+ ok_status = "OK" if summary["ok"] else "NOT OK"
+
+ if summary["ok"]:
+ output_ok("All Checkpoints {1}, "
+ "All status {2} (Turns {0:>3})".format(
+ turns, chkpt_status, ok_status))
+ else:
+ output_warning("All Checkpoints {1}, "
+ "All status {2} (Turns {0:>3})".format(
+ turns, chkpt_status, ok_status))
+
+ output_warning("Geo-rep workers Faulty/Offline, "
+ "Faulty: {0} Offline: {1}".format(
+ repr(faulty_rows),
+ repr(down_rows)))
+
+ if summary["checkpoints_ok"]:
+ output_ok("Stopping Geo-replication session now")
+ cmd = ["@SBIN_DIR@/gluster", "volume", "geo-replication",
+ args.mastervol,
+ "%s::%s" % (args.slave, args.slavevol), "stop"]
+ execute(cmd)
+ break
+ else:
+ # If Checkpoint is not complete after a iteration means brick
+ # was down and came online now. SETATTR on mount is not
+ # recorded, So again issue touch on mount root So that
+ # Stime will increase and Checkpoint will complete.
+ touch_mount_root(args.mastervol)
+
+ # Increment the turns and Sleep for 10 sec
+ turns += 1
+ duration = int(time.time()) - start_time
+ if args.timeout > 0 and duration > (args.timeout * 60):
+ cmd = ["@SBIN_DIR@/gluster", "volume", "geo-replication",
+ args.mastervol,
+ "%s::%s" % (args.slave, args.slavevol), "stop", "force"]
+ execute(cmd)
+ output_notok("Timed out, Stopping Geo-replication("
+ "Duration: {0}sec)".format(duration))
+
+ time.sleep(args.interval)
+
+ for mnt in mnt_list:
+ execute(["rmdir", mnt],
+ failure_msg="Unable to Remove temp directory "
+ "{0}".format(mnt), exitcode=0)
+
+if __name__ == "__main__":
+ parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter,
+ description=__doc__)
+ parser.add_argument("mastervol", help="Master Volume Name")
+ parser.add_argument("slave",
+ help="Slave hostname "
+ "(<username>@SLAVEHOST or SLAVEHOST)",
+ metavar="SLAVE")
+ parser.add_argument("slavevol", help="Slave Volume Name")
+ parser.add_argument("--interval", help="Interval in Seconds. "
+ "Wait time before each status check",
+ type=int, default=10)
+ parser.add_argument("--timeout", help="Timeout in minutes. Script will "
+ "stop Geo-replication if Checkpoint is not complete "
+ "in the specified timeout time", type=int,
+ default=0)
+ parser.add_argument("--no-color", help="Don't use Color in CLI output",
+ action="store_true")
+ args = parser.parse_args()
+ if args.no_color:
+ USE_CLI_COLOR = False
+ try:
+ # Check for session existence
+ cmd = ["@SBIN_DIR@/gluster", "volume", "geo-replication",
+ args.mastervol, "%s::%s" % (args.slave, args.slavevol), "status"]
+ execute(cmd)
+ main(args)
+ except KeyboardInterrupt:
+ for mnt in mnt_list:
+ execute(["umount", "-l", mnt],
+ failure_msg="Unable to Unmount Gluster Volume "
+ "Mounted at {0}".format(mnt), exitcode=0)
+ execute(["rmdir", mnt],
+ failure_msg="Unable to Remove temp directory "
+ "{0}".format(mnt), exitcode=0)
+ output_notok("Exiting...")
diff --git a/extras/geo-rep/slave-upgrade.sh b/extras/geo-rep/slave-upgrade.sh
new file mode 100644
index 00000000000..3a37f8e3579
--- /dev/null
+++ b/extras/geo-rep/slave-upgrade.sh
@@ -0,0 +1,102 @@
+#!/bin/bash
+#usage: slave-upgrade.sh <volfile-server:volname> <gfid-file>
+# <path-to-gsync-sync-gfid>
+#<slave-volfile-server>: a machine on which gluster cli can fetch slave volume info.
+# slave-volfile-server defaults to localhost.
+#
+#<gfid-file>: a file containing paths and their associated gfids
+# on master. The paths are relative to master mount point
+# (not absolute). An example extract of <gfid-file> can be,
+#
+# <extract>
+# 22114455-57c5-46e9-a783-c40f83a72b09 /dir
+# 25772386-3eb8-4550-a802-c3fdc938ca80 /dir/file
+# </extract>
+
+function get_bricks()
+{
+ gluster volume info $1 | grep -E 'Brick[0-9]+' | sed -e 's/[^:]*:\(.*\)/\1/g'
+}
+
+function cleanup_brick()
+{
+ HOST=$1
+ BRICK=$2
+
+ # TODO: write a C program to receive a list of files and does cleanup on
+ # them instead of spawning a new setfattr process for each file if
+ # performance is bad.
+ ssh $HOST "rm -rf $BRICK/.glusterfs/* && find $BRICK -exec setfattr -x trusted.gfid {} \; 2>/dev/null"
+}
+
+function cleanup_slave()
+{
+ VOLUME_NAME=`echo $1 | sed -e 's/.*:\(.*\)/\1/'`
+
+ BRICKS=`get_bricks $VOLUME_NAME`
+
+ for i in $BRICKS; do
+ HOST=`echo $i | sed -e 's/\(.*\):.*/\1/'`
+ BRICK=`echo $i | sed -e 's/.*:\(.*\)/\1/'`
+ cleanup_brick $HOST $BRICK
+ done
+
+ # Now restart the volume
+ gluster --mode=script volume stop $VOLUME_NAME;
+ gluster volume start $VOLUME_NAME;
+}
+
+function mount_client()
+{
+ local T; # temporary mount
+ local i; # inode number
+
+ VOLUME_NAME=$2;
+ GFID_FILE=$3
+ SYNC_CMD=$4
+
+ T=$(mktemp -d -t ${0##*/}.XXXXXX);
+
+ glusterfs --aux-gfid-mount -s $1 --volfile-id $VOLUME_NAME $T;
+
+ i=$(stat -c '%i' $T);
+
+ cd $T;
+
+ $SYNC_CMD $GFID_FILE
+
+ cd -;
+
+ umount $T || fatal "could not umount $MASTER from $T";
+
+ rmdir $T || warn "rmdir of $T failed";
+}
+
+function sync_gfids()
+{
+ SLAVE=$1
+ GFID_FILE=$2
+ SYNC_CMD=$3
+
+ SLAVE_VOLFILE_SERVER=`echo $SLAVE | sed -e 's/\(.*\):.*/\1/'`
+ SLAVE_VOLUME_NAME=`echo $SLAVE | sed -e 's/.*:\(.*\)/\1/'`
+
+ if [ "x$SLAVE_VOLFILE_SERVER" = "x" ]; then
+ SLAVE_VOLFILE_SERVER="localhost"
+ fi
+
+ mount_client $SLAVE_VOLFILE_SERVER $SLAVE_VOLUME_NAME $GFID_FILE $SYNC_CMD
+}
+
+function upgrade()
+{
+ SLAVE=$1
+ GFID_FILE=$2
+ SYNC_CMD=$3
+
+ cleanup_slave $SLAVE
+
+ sync_gfids $SLAVE $GFID_FILE $SYNC_CMD
+}
+
+upgrade "$@"
diff --git a/extras/gfid-to-dirname.sh b/extras/gfid-to-dirname.sh
new file mode 100755
index 00000000000..fd359fab58a
--- /dev/null
+++ b/extras/gfid-to-dirname.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+function read_symlink()
+{
+ DOT_GLUSTERFS_PATH=$BRICK_PATH/.glusterfs
+ gfid_string=$1
+ symlink_path="$DOT_GLUSTERFS_PATH/${gfid_string:0:2}/${gfid_string:2:2}/$gfid_string"
+ #remove trailing '/'
+ symlink_path=${symlink_path%/}
+ linkname=$(readlink $symlink_path)
+ if [ $? -ne 0 ]; then
+ echo "readlink of $symlink_path returned an error." >&2
+ exit -1
+ fi
+ echo $linkname
+}
+
+main()
+{
+ if [ $# -lt 2 ] ;then
+ echo "Usage: $0 <brick-path> <gfid-string-of-directory>"
+ echo "Example: $0 /bricks/brick1 1b835012-1ae5-4f0d-9db4-64de574d891c"
+ exit -1
+ fi
+
+ BRICK_PATH=$1
+ name=$(read_symlink $2)
+ if [ $? -ne 0 ]; then
+ exit -1
+ fi
+
+ while [ ${name:12:36} != "00000000-0000-0000-0000-000000000001" ]
+ do
+ LOCATION=`basename $name`/$LOCATION
+ GFID_STRING=${name:12:36}
+ name=$(read_symlink $GFID_STRING)
+ if [ $? -ne 0 ]; then
+ exit -1
+ fi
+ done
+
+ LOCATION=`basename $name`/$LOCATION
+ echo "Location of the directory corresponding to gfid:$2 is $BRICK_PATH/$LOCATION"
+}
+
+main "$@"
diff --git a/extras/git-branch-diff.py b/extras/git-branch-diff.py
new file mode 100755
index 00000000000..382513e069e
--- /dev/null
+++ b/extras/git-branch-diff.py
@@ -0,0 +1,285 @@
+#!/bin/python2
+
+"""
+ Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+"""
+
+"""
+ ABOUT:
+ This script helps in visualizing backported and missed commits between two
+ different branches, tags or commit ranges. In the list of missed commits,
+ it will help you identify patches which are posted for reviews on gerrit server.
+
+ USAGE:
+ $ ./extras/git-branch-diff.py --help
+ usage: git-branch-diff.py [-h] [-s SOURCE] -t TARGET [-a AUTHOR] [-p PATH]
+ [-o OPTIONS]
+
+ git wrapper to diff local or remote branches/tags/commit-ranges
+
+ optional arguments:
+ -h, --help show this help message and exit
+ -s SOURCE, --source SOURCE
+ source pattern, it could be a branch, tag or a commit
+ range
+ -t TARGET, --target TARGET
+ target pattern, it could be a branch, tag or a commit
+ range
+ -a AUTHOR, --author AUTHOR
+ default: git config name/email, to provide multiple
+ specify comma separated values
+ -p PATH, --path PATH show source and target diff w.r.t given path, to
+ provide multiple specify space in between them
+ -o OPTIONS, --options OPTIONS
+ add other git options such as --after=<>, --before=<>
+ etc. experts use;
+
+ SAMPLE EXECUTIONS:
+ $ ./extras/git-branch-diff.py -t origin/release-3.8
+
+ $ ./extras/git-branch-diff.py -s local_branch -t origin/release-3.7
+
+ $ ./extras/git-branch-diff.py -s 4517bf8..e66add8 -t origin/release-3.7
+ $ ./extras/git-branch-diff.py -s HEAD..c4efd39 -t origin/release-3.7
+
+ $ ./extras/git-branch-diff.py -t v3.7.11 --author="author@redhat.com"
+ $ ./extras/git-branch-diff.py -t v3.7.11 --author="authorX, authorY, authorZ"
+
+ $ ./extras/git-branch-diff.py -t origin/release-3.8 --path="xlators/"
+ $ ./extras/git-branch-diff.py -t origin/release-3.8 --path="./xlators ./rpc"
+
+ $ ./extras/git-branch-diff.py -t origin/release-3.6 --author="*"
+ $ ./extras/git-branch-diff.py -t origin/release-3.6 --author="All"
+ $ ./extras/git-branch-diff.py -t origin/release-3.6 --author="Null"
+
+ $ ./extras/git-branch-diff.py -t v3.7.11 --options="--after=2015-03-01 \
+ --before=2016-01-30"
+
+ DECLARATION:
+ While backporting commit to another branch only subject of the patch may
+ remain unchanged, all others such as commit message, commit Id, change Id,
+ bug Id, may be changed. This script works by taking commit subject as the
+ key value for comparing two git branches, which can be local or remote.
+
+ Note: This script may ignore commits which have altered their commit subjects
+ while backporting patches. Also this script doesn't have any intelligence to
+ detect squashed commits.
+
+ AUTHOR:
+ Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
+"""
+
+from __future__ import print_function
+import os
+import sys
+import argparse
+import commands
+import subprocess
+import requests
+
+class GitBranchDiff:
+ def __init__ (self):
+ " color symbols"
+ self.tick = u'\033[1;32m[ \u2714 ]\033[0m'
+ self.cross = u'\033[1;31m[ \u2716 ]\033[0m'
+ self.green_set = u'\033[1;34m'
+ self.yello_set = u'\033[4;33m'
+ self.color_unset = '\033[0m'
+
+ self.parse_cmd_args()
+
+ " replace default values with actual values from command args"
+ self.g_author = self.argsdict['author']
+ self.s_pattern = self.argsdict['source']
+ self.t_pattern = self.argsdict['target']
+ self.r_path = self.argsdict['path']
+ self.options = ' '.join(self.argsdict['options'])
+
+ self.gerrit_server = "http://review.gluster.org"
+
+ def check_dir_exist (self, os_path):
+ " checks whether given path exist"
+ path_list = os_path.split()
+ for path in path_list:
+ if not os.path.exists(path):
+ raise argparse.ArgumentTypeError("'%s' path %s is not valid"
+ %(os_path, path))
+ return os_path
+
+ def check_pattern_exist (self):
+ " defend to check given branch[s] exit"
+ status_sbr, op = commands.getstatusoutput('git log ' +
+ self.s_pattern)
+ status_tbr, op = commands.getstatusoutput('git log ' +
+ self.t_pattern)
+ if status_sbr != 0:
+ print("Error: --source=" + self.s_pattern + " doesn't exit\n")
+ self.parser.print_help()
+ exit(status_sbr)
+ elif status_tbr != 0:
+ print("Error: --target=" + self.t_pattern + " doesn't exit\n")
+ self.parser.print_help()
+ exit(status_tbr)
+
+ def check_author_exist (self):
+ " defend to check given author exist, format in case of multiple"
+ contrib_list = ['', '*', 'all', 'All', 'ALL', 'null', 'Null', 'NULL']
+ if self.g_author in contrib_list:
+ self.g_author = ""
+ else:
+ ide_list = self.g_author.split(',')
+ for ide in ide_list:
+ cmd4 = 'git log ' + self.s_pattern + ' --author=' + ide
+ c_list = subprocess.check_output(cmd4, shell = True)
+ if len(c_list) is 0:
+ print("Error: --author=%s doesn't exit" %self.g_author)
+ print("see '%s --help'" %__file__)
+ exit(1)
+ if len(ide_list) > 1:
+ self.g_author = "\|".join(ide_list)
+
+ def connected_to_gerrit (self):
+ "check if gerrit server is reachable"
+ try:
+ r = requests.get(self.gerrit_server, timeout=3)
+ return True
+ except requests.Timeout as err:
+ " request timed out"
+ print("Warning: failed to get list of open review commits on " \
+ "gerrit.\n" \
+ "hint: Request timed out! gerrit server could possibly " \
+ "slow ...\n")
+ return False
+ except requests.RequestException as err:
+ " handle other errors"
+ print("Warning: failed to get list of open review commits on " \
+ "gerrit\n" \
+ "hint: check with internet connection ...\n")
+ return False
+
+ def parse_cmd_args (self):
+ " command line parser"
+ author = subprocess.check_output('git config user.email',
+ shell = True).rstrip('\n')
+ source = "remotes/origin/master"
+ options = [' --pretty=format:"%h %s" ']
+ path = subprocess.check_output('git rev-parse --show-toplevel',
+ shell = True).rstrip('\n')
+ self.parser = argparse.ArgumentParser(description = 'git wrapper to '
+ 'diff local or remote branches/'
+ 'tags/commit-ranges')
+ self.parser.add_argument('-s',
+ '--source',
+ help = 'source pattern, it could be a branch,'
+ ' tag or a commit range',
+ default = source,
+ dest = 'source')
+ self.parser.add_argument('-t',
+ '--target',
+ help = 'target pattern, it could be a branch,'
+ ' tag or a commit range',
+ required = True,
+ dest = 'target')
+ self.parser.add_argument('-a',
+ '--author',
+ help = 'default: git config name/email, '
+ 'to provide multiple specify comma'
+ ' separated values',
+ default = author,
+ dest = 'author')
+ self.parser.add_argument('-p',
+ '--path',
+ type = self.check_dir_exist,
+ help = 'show source and target diff w.r.t '
+ 'given path, to provide multiple '
+ 'specify space in between them',
+ default = path,
+ dest = 'path')
+ self.parser.add_argument('-o',
+ '--options',
+ help = 'add other git options such as '
+ '--after=<>, --before=<> etc. '
+ 'experts use;',
+ default = options,
+ dest = 'options',
+ action='append')
+ self.argsdict = vars(self.parser.parse_args())
+
+ def print_output (self):
+ " display the result list"
+ print("\n------------------------------------------------------------\n")
+ print(self.tick + " Successfully Backported changes:")
+ print(' {' + 'from: ' + self.s_pattern + \
+ ' to: '+ self.t_pattern + '}\n')
+ for key, value in self.s_dict.items():
+ if value in self.t_dict.itervalues():
+ print("[%s%s%s] %s" %(self.yello_set,
+ key,
+ self.color_unset,
+ value))
+ print("\n------------------------------------------------------------\n")
+ print(self.cross + " Missing patches in " + self.t_pattern + ':\n')
+ if self.connected_to_gerrit():
+ cmd3 = "git review -r origin -l"
+ review_list = subprocess.check_output(cmd3, shell = True).split('\n')
+ else:
+ review_list = []
+
+ for key, value in self.s_dict.items():
+ if value not in self.t_dict.itervalues():
+ if any(value in s for s in review_list):
+ print("[%s%s%s] %s %s(under review)%s" %(self.yello_set,
+ key,
+ self.color_unset,
+ value,
+ self.green_set,
+ self.color_unset))
+ else:
+ print("[%s%s%s] %s" %(self.yello_set,
+ key,
+ self.color_unset,
+ value))
+ print("\n------------------------------------------------------------\n")
+
+ def main (self):
+ self.check_pattern_exist()
+ self.check_author_exist()
+
+ " actual git commands"
+ cmd1 = 'git log' + self.options + ' ' + self.s_pattern + \
+ ' --author=\'' + self.g_author + '\' ' + self.r_path
+
+ " could be backported by anybody so --author doesn't apply here"
+ cmd2 = 'git log' + self.options + ' ' + self.t_pattern + \
+ ' ' + self.r_path
+
+ s_list = subprocess.check_output(cmd1, shell = True).split('\n')
+ t_list = subprocess.check_output(cmd2, shell = True)
+
+ if len(t_list) is 0:
+ print("No commits in the target: %s" %self.t_pattern)
+ print("see '%s --help'" %__file__)
+ exit()
+ else:
+ t_list = t_list.split('\n')
+
+ self.s_dict = dict()
+ self.t_dict = dict()
+
+ for item in s_list:
+ self.s_dict.update(dict([item.split(' ', 1)]))
+ for item in t_list:
+ self.t_dict.update(dict([item.split(' ', 1)]))
+
+ self.print_output()
+
+
+if __name__ == '__main__':
+ run = GitBranchDiff()
+ run.main()
diff --git a/extras/gluster-rsyslog-5.8.conf b/extras/gluster-rsyslog-5.8.conf
new file mode 100644
index 00000000000..2519999bcac
--- /dev/null
+++ b/extras/gluster-rsyslog-5.8.conf
@@ -0,0 +1,51 @@
+##### gluster.conf #####
+
+#
+## If you want to log every message to the log file instead of
+## intelligently suppressing repeated messages, set off to
+## RepeatedMsgReduction. This change requires rsyslog restart
+## (eg. run 'service rsyslog restart')
+#
+#$RepeatedMsgReduction off
+$RepeatedMsgReduction on
+
+#
+## The mmcount module provides the capability to count log messages by
+## severity or json property of given app-name. The count value is added
+## into the log message as json property named '$msgid'
+#
+$ModLoad mmcount
+$mmcountKey gf_code # start counting value of gf_code
+
+$template Glusterfsd_dynLogFile,"/var/log/glusterfs/bricks/%app-name%.log"
+$template Gluster_dynLogFile,"/var/log/glusterfs/%app-name%.log"
+
+$template GLFS_Template,"%msgid%/%syslogfacility-text:::uppercase%/%syslogseverity-text:::uppercase% [%TIMESTAMP:::date-rfc3339%] %msg:::sp-if-no-1st-sp%%msg:::drop-last-lf%\n"
+
+#
+## Pass logs to mmcount if app-name is 'gluster'
+#
+if $app-name contains 'gluster' then :mmcount:
+
+if $app-name contains 'glusterfsd' then ?Glusterfsd_dynLogFile;GLFS_Template
+if $app-name contains 'gluster' and not ( $app-name contains 'glusterfsd' ) then ?Gluster_dynLogFile;GLFS_Template
+
+#
+## Sample configuration to send a email alert for every 50th mmcount
+#
+#$ModLoad ommail
+#$ActionMailSMTPServer smtp.example.com
+#$ActionMailFrom rsyslog@example.com
+#$ActionMailTo glusteradmin@example.com
+#$template mailSubject,"50th message of gf_code=9999 on %hostname%"
+#$template mailBody,"RSYSLOG Alert\r\nmsg='%msg%'"
+#$ActionMailSubject mailSubject
+#$ActionExecOnlyOnceEveryInterval 30
+#if $app-name == 'glusterfsd' and $msgid != 0 and $msgid % 50 == 0 \
+#then :ommail:;RSYSLOG_SyslogProtocol23Format
+#
+
+#
+## discard logs where app-name is 'gluster' as we processed already
+#
+if $app-name contains 'gluster' then ~
diff --git a/extras/gluster-rsyslog-7.2.conf b/extras/gluster-rsyslog-7.2.conf
new file mode 100644
index 00000000000..8b2841543f4
--- /dev/null
+++ b/extras/gluster-rsyslog-7.2.conf
@@ -0,0 +1,76 @@
+##### gluster.conf #####
+#
+## If you want to log every message to the log file instead of
+## intelligently suppressing repeated messages, set off to
+## RepeatedMsgReduction. This change requires rsyslog restart
+## (eg. run 'service rsyslog restart')
+#
+#$RepeatedMsgReduction off
+$RepeatedMsgReduction on
+
+$ModLoad mmjsonparse
+*.* :mmjsonparse:
+
+#
+## The mmcount module provides the capability to count log messages by
+## severity or json property of given app-name. The count value is added
+## into the log message as json property named 'mmcount'
+##
+## More info at http://www.rsyslog.com/doc/mmcount.html
+#
+#module(load="mmcount")
+#action(type="mmcount" appname="glusterd" key="!gf_code") # count each value of gf_code of appname glusterd
+#action(type="mmcount" appname="glusterfsd" key="!gf_code") # count each value of gf_code of appname glusterfsd
+#action(type="mmcount" appname="glusterfs" key="!gf_code") # count each value of gf_code of appname glusterfs
+
+template (name="Glusterfsd_dynLogFile" type="string" string="/var/log/glusterfs/bricks/%app-name%.log")
+template (name="Gluster_dynLogFile" type="string" string="/var/log/glusterfs/%app-name%.log")
+
+template(name="GLFS_template" type="list") {
+ property(name="$!mmcount")
+ constant(value="/")
+ property(name="syslogfacility-text" caseConversion="upper")
+ constant(value="/")
+ property(name="syslogseverity-text" caseConversion="upper")
+ constant(value=" ")
+ constant(value="[")
+ property(name="timereported" dateFormat="rfc3339")
+ constant(value="] ")
+ constant(value="[")
+ property(name="$!gf_code")
+ constant(value="] ")
+ constant(value="[")
+ property(name="$!gf_message")
+ constant(value="] ")
+ property(name="$!msg")
+ constant(value="\n")
+}
+
+if $app-name contains 'glusterfsd' then {
+ action(type="omfile"
+ DynaFile="Glusterfsd_dynLogFile"
+ Template="GLFS_template")
+ stop
+}
+
+if $app-name contains 'gluster' then {
+ action(type="omfile"
+ DynaFile="Gluster_dynLogFile"
+ Template="GLFS_template")
+ stop
+}
+
+#
+## send email for every 50th mmcount
+#$ModLoad ommail
+#if $app-name == 'glusterfsd' and $!mmcount <> 0 and $!mmcount % 50 == 0 then {
+# $ActionMailSMTPServer smtp.example.com
+# $ActionMailFrom rsyslog@example.com
+# $ActionMailTo glusteradmin@example.com
+# $template mailSubject,"50th message of gf_code=9999 on %hostname%"
+# $template mailBody,"RSYSLOG Alert\r\nmsg='%msg%'"
+# $ActionMailSubject mailSubject
+# $ActionExecOnlyOnceEveryInterval 30
+# :ommail:;RSYSLOG_SyslogProtocol23Format
+#}
+#
diff --git a/extras/glusterd-sysconfig b/extras/glusterd-sysconfig
new file mode 100644
index 00000000000..8237c571104
--- /dev/null
+++ b/extras/glusterd-sysconfig
@@ -0,0 +1,6 @@
+## Set custom log file and log level (bellow are defaults)
+# LOG_FILE='/var/log/glusterfs/glusterd.log'
+# LOG_LEVEL='INFO'
+
+## Set custom options for glusterd
+# GLUSTERD_OPTIONS=''
diff --git a/extras/glusterd.vol.in b/extras/glusterd.vol.in
new file mode 100644
index 00000000000..5d7bad0e4c8
--- /dev/null
+++ b/extras/glusterd.vol.in
@@ -0,0 +1,15 @@
+volume management
+ type mgmt/glusterd
+ option working-directory @GLUSTERD_WORKDIR@
+ option transport-type socket
+ option transport.socket.keepalive-time 10
+ option transport.socket.keepalive-interval 2
+ option transport.socket.read-fail-log off
+ option transport.socket.listen-port 24007
+ option ping-timeout 0
+ option event-threads 1
+# option lock-timer 180
+# option transport.address-family inet6
+# option base-port 49152
+ option max-port 60999
+end-volume
diff --git a/extras/glusterfs-georep-logrotate b/extras/glusterfs-georep-logrotate
new file mode 100644
index 00000000000..3e7ecf373a1
--- /dev/null
+++ b/extras/glusterfs-georep-logrotate
@@ -0,0 +1,61 @@
+/var/log/glusterfs/geo-replication/*/*.log {
+ sharedscripts
+ weekly
+ maxsize 10M
+ minsize 100k
+
+ # 6 months of logs are good enough
+ rotate 26
+
+ missingok
+ compress
+ delaycompress
+ notifempty
+ postrotate
+ for pid in `ps -aef | grep glusterfs | egrep "\-\-aux-gfid-mount" | awk '{print $2}'`; do
+ /usr/bin/kill -HUP $pid > /dev/null 2>&1 || true
+ done
+ endscript
+}
+
+
+/var/log/glusterfs/geo-replication-slaves/*.log {
+ sharedscripts
+ weekly
+ maxsize 10M
+ minsize 100k
+
+ # 6 months of logs are good enough
+ rotate 26
+
+ missingok
+ compress
+ delaycompress
+ notifempty
+ postrotate
+ for pid in `ps -aef | grep glusterfs | egrep "\-\-aux-gfid-mount" | awk '{print $2}'`; do
+ /usr/bin/kill -HUP $pid > /dev/null 2>&1 || true
+ done
+ endscript
+}
+
+
+/var/log/glusterfs/geo-replication-slaves/*/*.log {
+ sharedscripts
+ weekly
+ maxsize 10M
+ minsize 100k
+
+ # 6 months of logs are good enough
+ rotate 26
+
+ missingok
+ compress
+ delaycompress
+ notifempty
+ postrotate
+ for pid in `ps -aef | grep glusterfs | egrep "\-\-aux-gfid-mount" | awk '{print $2}'`; do
+ /usr/bin/kill -HUP $pid > /dev/null 2>&1 || true
+ done
+ endscript
+}
diff --git a/extras/glusterfs-georep-upgrade.py b/extras/glusterfs-georep-upgrade.py
new file mode 100755
index 00000000000..634576058d6
--- /dev/null
+++ b/extras/glusterfs-georep-upgrade.py
@@ -0,0 +1,77 @@
+#!/usr/bin/python3
+"""
+
+Copyright (c) 2020 Red Hat, Inc. <http://www.redhat.com>
+This file is part of GlusterFS.
+
+This file is licensed to you under your choice of the GNU Lesser
+General Public License, version 3 or any later version (LGPLv3 or
+later), or the GNU General Public License, version 2 (GPLv2), in all
+cases as published by the Free Software Foundation.
+
+"""
+
+import argparse
+import errno
+import os, sys
+import shutil
+from datetime import datetime
+
+def find_htime_path(brick_path):
+ dirs = []
+ htime_dir = os.path.join(brick_path, '.glusterfs/changelogs/htime')
+ for file in os.listdir(htime_dir):
+ if os.path.isfile(os.path.join(htime_dir,file)) and file.startswith("HTIME"):
+ dirs.append(os.path.join(htime_dir, file))
+ else:
+ raise FileNotFoundError("%s unavailable" % (os.path.join(htime_dir, file)))
+ return dirs
+
+def modify_htime_file(brick_path):
+ htime_file_path_list = find_htime_path(brick_path)
+
+ for htime_file_path in htime_file_path_list:
+ changelog_path = os.path.join(brick_path, '.glusterfs/changelogs')
+ temp_htime_path = os.path.join(changelog_path, 'htime/temp_htime_file')
+ with open(htime_file_path, 'r') as htime_file, open(temp_htime_path, 'w') as temp_htime_file:
+ #extract epoch times from htime file
+ paths = htime_file.read().split("\x00")
+
+ for pth in paths:
+ epoch_no = pth.split(".")[-1]
+ changelog = os.path.basename(pth)
+ #convert epoch time to year, month and day
+ if epoch_no != '':
+ date=(datetime.fromtimestamp(float(int(epoch_no))).strftime("%Y/%m/%d"))
+ #update paths in temp htime file
+ temp_htime_file.write("%s/%s/%s\x00" % (changelog_path, date, changelog))
+ #create directory in the format year/month/days
+ path = os.path.join(changelog_path, date)
+
+ if changelog.startswith("CHANGELOG."):
+ try:
+ os.makedirs(path, mode = 0o600);
+ except OSError as exc:
+ if exc.errno == errno.EEXIST:
+ pass
+ else:
+ raise
+
+ #copy existing changelogs to new directory structure, delete old changelog files
+ shutil.copyfile(pth, os.path.join(path, changelog))
+ os.remove(pth)
+
+ #rename temp_htime_file with htime file
+ os.rename(htime_file_path, os.path.join('%s.bak'%htime_file_path))
+ os.rename(temp_htime_path, htime_file_path)
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ parser.add_argument('brick_path', help="This upgrade script, which is to be run on\
+ server side, takes brick path as the argument, \
+ updates paths inside htime file and alters the directory structure \
+ above the changelog files inorder to support new optimised format \
+ of the directory structure as per \
+ https://review.gluster.org/#/c/glusterfs/+/23733/")
+ args = parser.parse_args()
+ modify_htime_file(args.brick_path)
diff --git a/extras/glusterfs-logrotate b/extras/glusterfs-logrotate
index 373ec2e0b92..6ba6ef18e9f 100644
--- a/extras/glusterfs-logrotate
+++ b/extras/glusterfs-logrotate
@@ -1,17 +1,17 @@
-# perform the log rotate every week
-weekly
-# keep the backup of 52 weeks
-rotate 52
-missingok
-
-# compress the logs, but from the .2 onwards
-compress
-delaycompress
-notifempty
-
# Rotate client logs
/var/log/glusterfs/*.log {
sharedscripts
+ weekly
+ maxsize 10M
+ minsize 100k
+
+# 6 months of logs are good enough
+ rotate 26
+
+ missingok
+ compress
+ delaycompress
+ notifempty
postrotate
/usr/bin/killall -HUP glusterfs > /dev/null 2>&1 || true
/usr/bin/killall -HUP glusterd > /dev/null 2>&1 || true
@@ -21,7 +21,48 @@ notifempty
# Rotate server logs
/var/log/glusterfs/bricks/*.log {
sharedscripts
+ weekly
+ maxsize 10M
+ minsize 100k
+
+# 6 months of logs are good enough
+ rotate 26
+
+ missingok
+ compress
+ delaycompress
+ notifempty
postrotate
/usr/bin/killall -HUP glusterfsd > /dev/null 2>&1 || true
endscript
}
+
+/var/log/glusterfs/samples/*.samp {
+ daily
+ rotate 3
+ sharedscripts
+ missingok
+ compress
+ delaycompress
+}
+
+# Rotate snapd log
+/var/log/glusterfs/snaps/*/*.log {
+ sharedscripts
+ weekly
+ maxsize 10M
+ minsize 100k
+
+ # 6 months of logs are good enough
+ rotate 26
+
+ missingok
+ compress
+ delaycompress
+ notifempty
+ postrotate
+ for pid in `ps -aef | grep glusterfs | egrep "snapd" | awk '{print $2}'`; do
+ /usr/bin/kill -HUP $pid > /dev/null 2>&1 || true
+ done
+ endscript
+}
diff --git a/extras/glusterfs-mode.el b/extras/glusterfs-mode.el
index d4f6dc568b6..a9ed2335ab3 100644
--- a/extras/glusterfs-mode.el
+++ b/extras/glusterfs-mode.el
@@ -1,112 +1,113 @@
-;;; Copyright (C) 2007-2011 Gluster Inc. <http://www.gluster.com>
-;;;
-;;; This program is free software; you can redistribute it and/or modify
-;;; it under the terms of the GNU General Public License as published by
-;;; the Free Software Foundation; either version 2 of the License, or
-;;; (at your option) any later version.
-;;;
-;;; This program is distributed in the hope that it will be useful,
-;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
-;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-;;; GNU General Public License for more details.
-;;;
-;;; You should have received a copy of the GNU General Public License
-;;; along with this program; if not, write to the Free Software
-;;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-;;;
-
-(defvar glusterfs-mode-hook nil)
-
-;; (defvar glusterfs-mode-map
-;; (let ((glusterfs-mode-map (make-keymap)))
-;; (define-key glusterfs-mode-map "\C-j" 'newline-and-indent)
-;; glusterfs-mode-map)
-;; "Keymap for WPDL major mode")
-
-(add-to-list 'auto-mode-alist '("\\.vol\\'" . glusterfs-mode))
-
-(defconst glusterfs-font-lock-keywords-1
- (list
- ; "cluster/{unify,afr,stripe}"
- ; "performance/{io-cache,io-threads,write-behind,read-ahead,stat-prefetch}"
- ; "protocol/{client/server}"
- ; "features/{trash,posix-locks,fixed-id,filter}"
- ; "stroage/posix"
- ; "encryption/rot-13"
- ; "debug/trace"
- '("\\<\\(cluster/\\(unify\\|afr\\|replicate\\|stripe\\|ha\\|dht\\|distribute\\)\\|\\performance/\\(io-\\(cache\\|threads\\)\\|write-behind\\|read-ahead\\|symlink-cache\\)\\|protocol/\\(server\\|client\\)\\|features/\\(trash\\|posix-locks\\|locks\\|path-converter\\|filter\\)\\|storage/\\(posix\\|bdb\\)\\|encryption/rot-13\\|debug/trace\\)\\>" . font-lock-keyword-face))
-"Additional Keywords to highlight in GlusterFS mode.")
-
-(defconst glusterfs-font-lock-keywords-2
- (append glusterfs-font-lock-keywords-1
- (list
- ; "replicate" "namespace" "scheduler" "remote-subvolume" "remote-host"
- ; "auth.addr" "block-size" "remote-port" "listen-port" "transport-type"
- ; "limits.min-free-disk" "directory"
- ; TODO: add all the keys here.
- '("\\<\\(inode-lru-limit\\|replicate\\|namespace\\|scheduler\\|username\\|password\\|allow\\|reject\\|block-size\\|listen-port\\|transport-type\\|transport-timeout\\|directory\\|page-size\\|page-count\\|aggregate-size\\|non-blocking-io\\|client-volume-filename\\|bind-address\\|self-heal\\|read-only-subvolumes\\|read-subvolume\\|thread-count\\|cache-size\\|window-size\\|force-revalidate-timeout\\|priority\\|include\\|exclude\\|remote-\\(host\\|subvolume\\|port\\)\\|auth.\\(addr\\|login\\)\\|limits.\\(min-disk-free\\|transaction-size\\|ib-verbs-\\(work-request-\\(send-\\|recv-\\(count\\|size\\)\\)\\|port\\|mtu\\|device-name\\)\\)\\)\ \\>" . font-lock-constant-face)))
- "option keys in GlusterFS mode.")
-
-(defconst glusterfs-font-lock-keywords-3
- (append glusterfs-font-lock-keywords-2
- (list
- ; "option" "volume" "end-volume" "subvolumes" "type"
- '("\\<\\(option\ \\|volume\ \\|subvolumes\ \\|type\ \\|end-volume\\)\\>" . font-lock-builtin-face)))
- ;'((regexp-opt (" option " "^volume " "^end-volume" "subvolumes " " type ") t) . font-lock-builtin-face))
- "Minimal highlighting expressions for GlusterFS mode.")
-
-
-(defvar glusterfs-font-lock-keywords glusterfs-font-lock-keywords-3
- "Default highlighting expressions for GlusterFS mode.")
-
-(defvar glusterfs-mode-syntax-table
- (let ((glusterfs-mode-syntax-table (make-syntax-table)))
- (modify-syntax-entry ?\# "<" glusterfs-mode-syntax-table)
- (modify-syntax-entry ?* ". 23" glusterfs-mode-syntax-table)
- (modify-syntax-entry ?\n ">#" glusterfs-mode-syntax-table)
- glusterfs-mode-syntax-table)
- "Syntax table for glusterfs-mode")
-
-;; TODO: add an indentation table
-
-(defun glusterfs-indent-line ()
- "Indent current line as GlusterFS code"
- (interactive)
- (beginning-of-line)
- (if (bobp)
- (indent-line-to 0) ; First line is always non-indented
- (let ((not-indented t) cur-indent)
- (if (looking-at "^[ \t]*volume\ ")
- (progn
- (save-excursion
- (forward-line -1)
- (setq not-indented nil)
- (setq cur-indent 0))))
- (if (looking-at "^[ \t]*end-volume")
- (progn
- (save-excursion
- (forward-line -1)
- (setq cur-indent 0))
- (if (< cur-indent 0) ; We can't indent past the left margin
- (setq cur-indent 0)))
- (save-excursion
- (while not-indented ; Iterate backwards until we find an indentation hint
- (progn
- (setq cur-indent 2) ; Do the actual indenting
- (setq not-indented nil)))))
- (if cur-indent
- (indent-line-to cur-indent)
- (indent-line-to 0)))))
-
-(defun glusterfs-mode ()
- (interactive)
- (kill-all-local-variables)
- ;; (use-local-map glusterfs-mode-map)
- (set-syntax-table glusterfs-mode-syntax-table)
- (set (make-local-variable 'indent-line-function) 'glusterfs-indent-line)
- (set (make-local-variable 'font-lock-defaults) '(glusterfs-font-lock-keywords))
- (setq major-mode 'glusterfs-mode)
- (setq mode-name "GlusterFS")
- (run-hooks 'glusterfs-mode-hook))
-
-(provide 'glusterfs-mode)
+;;; Copyright (C) 2007-2017 Red Hat, Inc. <http://www.redhat.com>
+;;; Copyright (C) 2007-2011 Gluster Inc. <http://www.gluster.com>
+;;;
+;;; This program is free software; you can redistribute it and/or
+;;; modify it under the terms of the GNU General Public License
+;;; as published by the Free Software Foundation; either version 2
+;;; of the License, or (at your option) any later version.
+;;;
+;;; This program is distributed in the hope that it will be useful,
+;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;;; GNU General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU General Public License
+;;; along with this program; if not, write to the Free Software
+;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+;;;
+
+(defvar glusterfs-mode-hook nil)
+
+;; (defvar glusterfs-mode-map
+;; (let ((glusterfs-mode-map (make-keymap)))
+;; (define-key glusterfs-mode-map "\C-j" 'newline-and-indent)
+;; glusterfs-mode-map)
+;; "Keymap for WPDL major mode")
+
+(add-to-list 'auto-mode-alist '("\\.vol\\'" . glusterfs-mode))
+
+(defconst glusterfs-font-lock-keywords-1
+ (list
+ ; "cluster/{unify,afr,stripe}"
+ ; "performance/{io-cache,io-threads,write-behind,read-ahead,stat-prefetch}"
+ ; "protocol/{client/server}"
+ ; "features/{trash,posix-locks,fixed-id,filter}"
+ ; "storage/posix"
+ ; "encryption/rot-13"
+ ; "debug/trace"
+ '("\\<\\(cluster/\\(unify\\|afr\\|replicate\\|stripe\\|ha\\|dht\\|distribute\\)\\|\\performance/\\(io-\\(cache\\|threads\\)\\|write-behind\\|read-ahead\\|symlink-cache\\)\\|protocol/\\(server\\|client\\)\\|features/\\(trash\\|posix-locks\\|locks\\|path-converter\\|filter\\)\\|storage/\\(posix\\|bdb\\)\\|encryption/rot-13\\|debug/trace\\)\\>" . font-lock-keyword-face))
+"Additional Keywords to highlight in GlusterFS mode.")
+
+(defconst glusterfs-font-lock-keywords-2
+ (append glusterfs-font-lock-keywords-1
+ (list
+ ; "replicate" "namespace" "scheduler" "remote-subvolume" "remote-host"
+ ; "auth.addr" "block-size" "remote-port" "listen-port" "transport-type"
+ ; "limits.min-free-disk" "directory"
+ ; TODO: add all the keys here.
+ '("\\<\\(inode-lru-limit\\|replicate\\|namespace\\|scheduler\\|username\\|password\\|allow\\|reject\\|block-size\\|listen-port\\|transport-type\\|transport-timeout\\|directory\\|page-size\\|page-count\\|aggregate-size\\|non-blocking-io\\|client-volume-filename\\|bind-address\\|self-heal\\|read-only-subvolumes\\|read-subvolume\\|thread-count\\|cache-size\\|window-size\\|force-revalidate-timeout\\|priority\\|include\\|exclude\\|remote-\\(host\\|subvolume\\|port\\)\\|auth.\\(addr\\|login\\)\\|limits.\\(min-disk-free\\|transaction-size\\|ib-verbs-\\(work-request-\\(send-\\|recv-\\(count\\|size\\)\\)\\|port\\|mtu\\|device-name\\)\\)\\)\ \\>" . font-lock-constant-face)))
+ "option keys in GlusterFS mode.")
+
+(defconst glusterfs-font-lock-keywords-3
+ (append glusterfs-font-lock-keywords-2
+ (list
+ ; "option" "volume" "end-volume" "subvolumes" "type"
+ '("\\<\\(option\ \\|volume\ \\|subvolumes\ \\|type\ \\|end-volume\\)\\>" . font-lock-builtin-face)))
+ ;'((regexp-opt (" option " "^volume " "^end-volume" "subvolumes " " type ") t) . font-lock-builtin-face))
+ "Minimal highlighting expressions for GlusterFS mode.")
+
+
+(defvar glusterfs-font-lock-keywords glusterfs-font-lock-keywords-3
+ "Default highlighting expressions for GlusterFS mode.")
+
+(defvar glusterfs-mode-syntax-table
+ (let ((glusterfs-mode-syntax-table (make-syntax-table)))
+ (modify-syntax-entry ?\# "<" glusterfs-mode-syntax-table)
+ (modify-syntax-entry ?* ". 23" glusterfs-mode-syntax-table)
+ (modify-syntax-entry ?\n ">#" glusterfs-mode-syntax-table)
+ glusterfs-mode-syntax-table)
+ "Syntax table for glusterfs-mode")
+
+;; TODO: add an indentation table
+
+(defun glusterfs-indent-line ()
+ "Indent current line as GlusterFS code"
+ (interactive)
+ (beginning-of-line)
+ (if (bobp)
+ (indent-line-to 0) ; First line is always non-indented
+ (let ((not-indented t) cur-indent)
+ (if (looking-at "^[ \t]*volume\ ")
+ (progn
+ (save-excursion
+ (forward-line -1)
+ (setq not-indented nil)
+ (setq cur-indent 0))))
+ (if (looking-at "^[ \t]*end-volume")
+ (progn
+ (save-excursion
+ (forward-line -1)
+ (setq cur-indent 0))
+ (if (< cur-indent 0) ; We can't indent past the left margin
+ (setq cur-indent 0)))
+ (save-excursion
+ (while not-indented ; Iterate backwards until we find an indentation hint
+ (progn
+ (setq cur-indent 2) ; Do the actual indenting
+ (setq not-indented nil)))))
+ (if cur-indent
+ (indent-line-to cur-indent)
+ (indent-line-to 0)))))
+
+(defun glusterfs-mode ()
+ (interactive)
+ (kill-all-local-variables)
+ ;; (use-local-map glusterfs-mode-map)
+ (set-syntax-table glusterfs-mode-syntax-table)
+ (set (make-local-variable 'indent-line-function) 'glusterfs-indent-line)
+ (set (make-local-variable 'font-lock-defaults) '(glusterfs-font-lock-keywords))
+ (setq major-mode 'glusterfs-mode)
+ (setq mode-name "GlusterFS")
+ (run-hooks 'glusterfs-mode-hook))
+
+(provide 'glusterfs-mode)
diff --git a/extras/glusterfs.vim b/extras/glusterfs.vim
index 62102c1ee1b..899cc65511c 100644
--- a/extras/glusterfs.vim
+++ b/extras/glusterfs.vim
@@ -1,20 +1,11 @@
" glusterfs.vim: GNU Vim Syntax file for GlusterFS .vol specification
-" Copyright (c) 2017-2011 Gluster, Inc. <http://www.gluster.com>
-" This file is part of GlusterFS.
+" Copyright (c) 2007777777Red Hat, Inc. <http://www.redhat.com>
+" This file is part of GlusterFS.
"
-" GlusterFS is free software; you can redistribute it and/or modify
-" it under the terms of the GNU General Public License as published
-" by the Free Software Foundation; either version 3 of the License,
-" or (at your option) any later version.
-"
-" GlusterFS is distributed in the hope that it will be useful, but
-" WITHOUT ANY WARRANTY; without even the implied warranty of
-" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-" General Public License for more details.
-"
-" You should have received a copy of the GNU General Public License
-" along with this program. If not, see
-" <http://www.gnu.org/licenses/>.
+" This file is licensed to you under your choice of the GNU Lesser
+" General Public License, version 3 or any later version (LGPLv3 or
+" later), or the GNU General Public License, version 2 (GPLv2), in all
+" cases as published by the Free Software Foundation.
"
" Last Modified: Wed Aug 1 00:47:10 IST 2007
" Version: 0.8
diff --git a/extras/gnfs-loganalyse.py b/extras/gnfs-loganalyse.py
index 6b24e5a7192..6341d007188 100644..100755
--- a/extras/gnfs-loganalyse.py
+++ b/extras/gnfs-loganalyse.py
@@ -1,23 +1,16 @@
#!/bin/python
"""
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
"""
+from __future__ import print_function
import os
import string
import sys
@@ -80,7 +73,7 @@ class NFSRequest:
self.replygfid = tokens [gfididx + 1].strip(",")
def dump (self):
- print "ReqLine: " + str(self.calllinecount) + " TimeStamp: " + self.timestamp + ", XID: " + self.xid + " " + self.op + " ARGS: " + self.opdata + " RepLine: " + str(self.replylinecount) + " " + self.replydata
+ print("ReqLine: " + str(self.calllinecount) + " TimeStamp: " + self.timestamp + ", XID: " + self.xid + " " + self.op + " ARGS: " + self.opdata + " RepLine: " + str(self.replylinecount) + " " + self.replydata)
class NFSLogAnalyzer:
@@ -157,7 +150,7 @@ class NFSLogAnalyzer:
return
rcount = len (self.xid_request_map.keys ())
orphancount = len (self.orphan_replies.keys ())
- print "Requests: " + str(rcount) + ", Orphans: " + str(orphancount)
+ print("Requests: " + str(rcount) + ", Orphans: " + str(orphancount))
def dump (self):
self.getStats ()
diff --git a/extras/group-db-workload b/extras/group-db-workload
new file mode 100644
index 00000000000..9334d6fb942
--- /dev/null
+++ b/extras/group-db-workload
@@ -0,0 +1,12 @@
+performance.open-behind=on
+performance.write-behind=off
+performance.stat-prefetch=off
+performance.quick-read=off
+performance.strict-o-direct=on
+performance.read-ahead=off
+performance.io-cache=off
+performance.readdir-ahead=off
+performance.client-io-threads=on
+server.event-threads=4
+client.event-threads=4
+performance.read-after-open=yes
diff --git a/extras/group-distributed-virt b/extras/group-distributed-virt
new file mode 100644
index 00000000000..a960b76c694
--- /dev/null
+++ b/extras/group-distributed-virt
@@ -0,0 +1,10 @@
+performance.quick-read=off
+performance.read-ahead=off
+performance.io-cache=off
+performance.low-prio-threads=32
+network.remote-dio=enable
+features.shard=on
+user.cifs=off
+client.event-threads=4
+server.event-threads=4
+performance.client-io-threads=on
diff --git a/extras/group-gluster-block b/extras/group-gluster-block
new file mode 100644
index 00000000000..1e398019e6b
--- /dev/null
+++ b/extras/group-gluster-block
@@ -0,0 +1,27 @@
+performance.quick-read=off
+performance.read-ahead=off
+performance.io-cache=off
+performance.stat-prefetch=off
+performance.open-behind=off
+performance.readdir-ahead=off
+performance.strict-o-direct=on
+performance.client-io-threads=on
+performance.io-thread-count=32
+performance.high-prio-threads=32
+performance.normal-prio-threads=32
+performance.low-prio-threads=32
+performance.least-prio-threads=4
+client.event-threads=8
+server.event-threads=8
+network.remote-dio=disable
+cluster.eager-lock=enable
+cluster.quorum-type=auto
+cluster.data-self-heal-algorithm=full
+cluster.locking-scheme=granular
+cluster.shd-max-threads=8
+cluster.shd-wait-qlength=10000
+features.shard=on
+features.shard-block-size=64MB
+user.cifs=off
+server.allow-insecure=on
+cluster.choose-local=off
diff --git a/extras/group-metadata-cache b/extras/group-metadata-cache
new file mode 100644
index 00000000000..b890b288fc7
--- /dev/null
+++ b/extras/group-metadata-cache
@@ -0,0 +1,6 @@
+features.cache-invalidation=on
+features.cache-invalidation-timeout=600
+performance.stat-prefetch=on
+performance.cache-invalidation=on
+performance.md-cache-timeout=600
+network.inode-lru-limit=200000
diff --git a/extras/group-nl-cache b/extras/group-nl-cache
new file mode 100644
index 00000000000..897807e8933
--- /dev/null
+++ b/extras/group-nl-cache
@@ -0,0 +1,5 @@
+features.cache-invalidation=on
+features.cache-invalidation-timeout=600
+performance.nl-cache=on
+performance.nl-cache-timeout=600
+network.inode-lru-limit=200000
diff --git a/extras/group-samba b/extras/group-samba
new file mode 100644
index 00000000000..eeee6e06031
--- /dev/null
+++ b/extras/group-samba
@@ -0,0 +1,11 @@
+features.cache-invalidation=on
+features.cache-invalidation-timeout=600
+performance.cache-samba-metadata=on
+performance.stat-prefetch=on
+performance.cache-invalidation=on
+performance.md-cache-timeout=600
+network.inode-lru-limit=200000
+performance.nl-cache=on
+performance.nl-cache-timeout=600
+performance.readdir-ahead=on
+performance.parallel-readdir=on
diff --git a/extras/group-virt.example b/extras/group-virt.example
index 75a5ef31426..cc37c98a25c 100644
--- a/extras/group-virt.example
+++ b/extras/group-virt.example
@@ -1,6 +1,24 @@
-quick-read=off
-read-ahead=off
-io-cache=off
-stat-prefetch=off
-linux-aio=enable
-eager-lock=enable
+performance.quick-read=off
+performance.read-ahead=off
+performance.io-cache=off
+performance.low-prio-threads=32
+network.remote-dio=disable
+performance.strict-o-direct=on
+cluster.eager-lock=enable
+cluster.quorum-type=auto
+cluster.server-quorum-type=server
+cluster.data-self-heal-algorithm=full
+cluster.locking-scheme=granular
+cluster.shd-max-threads=8
+cluster.shd-wait-qlength=10000
+features.shard=on
+user.cifs=off
+cluster.choose-local=off
+client.event-threads=4
+server.event-threads=4
+performance.client-io-threads=on
+network.ping-timeout=20
+server.tcp-user-timeout=20
+server.keepalive-time=10
+server.keepalive-interval=2
+server.keepalive-count=5
diff --git a/extras/hook-scripts/Makefile.am b/extras/hook-scripts/Makefile.am
index 9994eae6b8d..26059d7dbb9 100644
--- a/extras/hook-scripts/Makefile.am
+++ b/extras/hook-scripts/Makefile.am
@@ -1,2 +1,7 @@
-SUBDIRS = start stop
-CLEANFILES =
+EXTRA_DIST = S40ufo-stop.py S56glusterd-geo-rep-create-post.sh
+SUBDIRS = add-brick create delete set start stop reset
+
+scriptsdir = $(GLUSTERD_WORKDIR)/hooks/1/gsync-create/post/
+if USE_GEOREP
+scripts_SCRIPTS = S56glusterd-geo-rep-create-post.sh
+endif
diff --git a/extras/hook-scripts/S40ufo-stop.py b/extras/hook-scripts/S40ufo-stop.py
new file mode 100755
index 00000000000..2c79eb1d54a
--- /dev/null
+++ b/extras/hook-scripts/S40ufo-stop.py
@@ -0,0 +1,24 @@
+#!/usr/bin/python3
+
+import os
+from optparse import OptionParser
+
+if __name__ == '__main__':
+ # check if swift is installed
+ try:
+ from gluster.swift.common.Glusterfs import get_mnt_point, unmount
+ except ImportError:
+ import sys
+ sys.exit("Openstack Swift does not appear to be installed properly");
+
+ op = OptionParser(usage="%prog [options...]")
+ op.add_option('--volname', dest='vol', type=str)
+ op.add_option('--last', dest='last', type=str)
+ (opts, args) = op.parse_args()
+
+
+ mnt_point = get_mnt_point(opts.vol)
+ if mnt_point:
+ unmount(mnt_point)
+ else:
+ sys.exit("get_mnt_point returned none for mount point")
diff --git a/extras/hook-scripts/S56glusterd-geo-rep-create-post.sh b/extras/hook-scripts/S56glusterd-geo-rep-create-post.sh
new file mode 100755
index 00000000000..7d6052315bb
--- /dev/null
+++ b/extras/hook-scripts/S56glusterd-geo-rep-create-post.sh
@@ -0,0 +1,104 @@
+#!/bin/bash
+
+#key_val_pair is the arguments passed to the script in the form of
+#key value pair
+
+key_val_pair1=`echo $2 | cut -d ',' -f 1`
+key_val_pair2=`echo $2 | cut -d ',' -f 2`
+key_val_pair3=`echo $2 | cut -d ',' -f 3`
+key_val_pair4=`echo $2 | cut -d ',' -f 4`
+key_val_pair5=`echo $2 | cut -d ',' -f 5`
+key_val_pair6=`echo $2 | cut -d ',' -f 6`
+
+mastervol=`echo $1 | cut -d '=' -f 2`
+if [ "$mastervol" == "" ]; then
+ exit;
+fi
+
+key=`echo $key_val_pair1 | cut -d '=' -f 1`
+val=`echo $key_val_pair1 | cut -d '=' -f 2`
+if [ "$key" != "is_push_pem" ]; then
+ exit;
+fi
+if [ "$val" != '1' ]; then
+ exit;
+fi
+
+key=`echo $key_val_pair2 | cut -d '=' -f 1`
+val=`echo $key_val_pair2 | cut -d '=' -f 2`
+if [ "$key" != "pub_file" ]; then
+ exit;
+fi
+if [ "$val" == "" ]; then
+ exit;
+fi
+
+pub_file=`echo $val`
+pub_file_bname="$(basename $pub_file)"
+pub_file_dname="$(dirname $pub_file)"
+pub_file_tmp=`echo $val`_tmp
+
+key=`echo $key_val_pair3 | cut -d '=' -f 1`
+val=`echo $key_val_pair3 | cut -d '=' -f 2`
+if [ "$key" != "slave_user" ]; then
+ exit;
+fi
+if [ "$val" == "" ]; then
+ exit;
+fi
+slave_user=`echo $val`
+
+key=`echo $key_val_pair4 | cut -d '=' -f 1`
+val=`echo $key_val_pair4 | cut -d '=' -f 2`
+if [ "$key" != "slave_ip" ]; then
+ exit;
+fi
+if [ "$val" == "" ]; then
+ exit;
+fi
+slave_ip=`echo $val`
+
+key=`echo $key_val_pair5 | cut -d '=' -f 1`
+val=`echo $key_val_pair5 | cut -d '=' -f 2`
+if [ "$key" != "slave_vol" ]; then
+ exit;
+fi
+if [ "$val" == "" ]; then
+ exit;
+fi
+slavevol=`echo $val`
+
+key=`echo $key_val_pair6 | cut -d '=' -f 1`
+val=`echo $key_val_pair6 | cut -d '=' -f 2`
+if [ "$key" != "ssh_port" ]; then
+ exit;
+fi
+if [ "$val" == "" ]; then
+ exit;
+fi
+SSH_PORT=`echo $val`
+SSH_OPT="-oPasswordAuthentication=no -oStrictHostKeyChecking=no"
+
+if [ -f $pub_file ]; then
+ # For a non-root user copy the pub file to the user's home directory
+ # For a root user copy the pub files to priv_dir->geo-rep.
+ if [ "$slave_user" != "root" ]; then
+ slave_user_home_dir=`ssh -p ${SSH_PORT} ${SSH_OPT} $slave_user@$slave_ip "getent passwd $slave_user | cut -d ':' -f 6"`
+ scp -P ${SSH_PORT} ${SSH_OPT} $pub_file $slave_user@$slave_ip:$slave_user_home_dir/common_secret.pem.pub_tmp
+ ssh -p ${SSH_PORT} ${SSH_OPT} $slave_user@$slave_ip "mv $slave_user_home_dir/common_secret.pem.pub_tmp $slave_user_home_dir/${mastervol}_${slavevol}_common_secret.pem.pub"
+ else
+ if [[ -z "${GR_SSH_IDENTITY_KEY}" ]]; then
+ scp -P ${SSH_PORT} ${SSH_OPT} $pub_file $slave_ip:$pub_file_tmp
+ ssh -p ${SSH_PORT} ${SSH_OPT} $slave_ip "mv $pub_file_tmp ${pub_file_dname}/${mastervol}_${slavevol}_${pub_file_bname}"
+ ssh -p ${SSH_PORT} ${SSH_OPT} $slave_ip "gluster system:: copy file /geo-replication/${mastervol}_${slavevol}_common_secret.pem.pub > /dev/null"
+ ssh -p ${SSH_PORT} ${SSH_OPT} $slave_ip "gluster system:: execute add_secret_pub root geo-replication/${mastervol}_${slavevol}_common_secret.pem.pub > /dev/null"
+ ssh -p ${SSH_PORT} ${SSH_OPT} $slave_ip "gluster vol set ${slavevol} features.read-only on"
+ else
+ scp -P ${SSH_PORT} -i ${GR_SSH_IDENTITY_KEY} ${SSH_OPT} $pub_file $slave_ip:$pub_file_tmp
+ ssh -p ${SSH_PORT} -i ${GR_SSH_IDENTITY_KEY} ${SSH_OPT} $slave_ip "mv $pub_file_tmp ${pub_file_dname}/${mastervol}_${slavevol}_${pub_file_bname}"
+ ssh -p ${SSH_PORT} -i ${GR_SSH_IDENTITY_KEY} ${SSH_OPT} $slave_ip "gluster system:: copy file /geo-replication/${mastervol}_${slavevol}_common_secret.pem.pub > /dev/null"
+ ssh -p ${SSH_PORT} -i ${GR_SSH_IDENTITY_KEY} ${SSH_OPT} $slave_ip "gluster system:: execute add_secret_pub root geo-replication/${mastervol}_${slavevol}_common_secret.pem.pub > /dev/null"
+ ssh -p ${SSH_PORT} -i ${GR_SSH_IDENTITY_KEY} ${SSH_OPT} $slave_ip "gluster vol set ${slavevol} features.read-only on"
+ fi
+ fi
+fi
diff --git a/extras/hook-scripts/add-brick/Makefile.am b/extras/hook-scripts/add-brick/Makefile.am
new file mode 100644
index 00000000000..6e2701e909e
--- /dev/null
+++ b/extras/hook-scripts/add-brick/Makefile.am
@@ -0,0 +1,2 @@
+SUBDIRS = post pre
+CLEANFILES =
diff --git a/extras/hook-scripts/add-brick/post/Makefile.am b/extras/hook-scripts/add-brick/post/Makefile.am
new file mode 100644
index 00000000000..9b236df096d
--- /dev/null
+++ b/extras/hook-scripts/add-brick/post/Makefile.am
@@ -0,0 +1,6 @@
+EXTRA_DIST = disabled-quota-root-xattr-heal.sh S10selinux-label-brick.sh S13create-subdir-mounts.sh
+
+hookdir = $(GLUSTERD_WORKDIR)/hooks/1/add-brick/post/
+if WITH_SERVER
+hook_SCRIPTS = disabled-quota-root-xattr-heal.sh S10selinux-label-brick.sh S13create-subdir-mounts.sh
+endif
diff --git a/extras/hook-scripts/add-brick/post/S10selinux-label-brick.sh b/extras/hook-scripts/add-brick/post/S10selinux-label-brick.sh
new file mode 100755
index 00000000000..4a17c993a77
--- /dev/null
+++ b/extras/hook-scripts/add-brick/post/S10selinux-label-brick.sh
@@ -0,0 +1,100 @@
+#!/bin/bash
+#
+# Install to hooks/<HOOKS_VER>/add-brick/post
+#
+# Add an SELinux file context for each brick using the glusterd_brick_t type.
+# This ensures that the brick is relabeled correctly on an SELinux restart or
+# restore. Subsequently, run a restore on the brick path to set the selinux
+# labels.
+#
+###
+
+PROGNAME="Sselinux"
+OPTSPEC="volname:,version:,gd-workdir:,volume-op:"
+VOL=
+
+parse_args () {
+ ARGS=$(getopt -o '' -l ${OPTSPEC} -n ${PROGNAME} -- "$@")
+ eval set -- "${ARGS}"
+
+ while true; do
+ case ${1} in
+ --volname)
+ shift
+ VOL=${1}
+ ;;
+ --gd-workdir)
+ shift
+ GLUSTERD_WORKDIR=$1
+ ;;
+ --version)
+ shift
+ ;;
+ --volume-op)
+ shift
+ ;;
+ *)
+ shift
+ break
+ ;;
+ esac
+ shift
+ done
+}
+
+set_brick_labels()
+{
+ local volname="${1}"
+ local fctx
+ local list=()
+
+ fctx="$(semanage fcontext --list -C)"
+
+ # wait for new brick path to be updated under
+ # ${GLUSTERD_WORKDIR}/vols/${volname}/bricks/
+ sleep 5
+
+ # grab the path for each local brick
+ brickpath="${GLUSTERD_WORKDIR}/vols/${volname}/bricks/"
+ brickdirs=$(
+ find "${brickpath}" -type f -exec grep '^path=' {} \; | \
+ cut -d= -f 2 | \
+ sort -u
+ )
+
+ # create a list of bricks for which custom SELinux
+ # label doesn't exist
+ for b in ${brickdirs}; do
+ pattern="${b}(/.*)?"
+ echo "${fctx}" | grep "^${pattern}\s" >/dev/null
+ if [[ $? -ne 0 ]]; then
+ list+=("${pattern}")
+ fi
+ done
+
+ # Add a file context for each brick path in the list and associate with the
+ # glusterd_brick_t SELinux type.
+ for p in ${list[@]}
+ do
+ semanage fcontext --add -t glusterd_brick_t -r s0 "${p}"
+ done
+
+ # Set the labels for which SELinux label was added above
+ for b in ${brickdirs}
+ do
+ echo "${list[@]}" | grep "${b}" >/dev/null
+ if [[ $? -eq 0 ]]; then
+ restorecon -R "${b}"
+ fi
+ done
+}
+
+SELINUX_STATE=$(which getenforce && getenforce)
+[ "${SELINUX_STATE}" = 'Disabled' ] && exit 0
+
+parse_args "$@"
+[ -z "${VOL}" ] && exit 1
+
+set_brick_labels "${VOL}"
+
+exit 0
diff --git a/extras/hook-scripts/add-brick/post/S13create-subdir-mounts.sh b/extras/hook-scripts/add-brick/post/S13create-subdir-mounts.sh
new file mode 100755
index 00000000000..1a6923ee7aa
--- /dev/null
+++ b/extras/hook-scripts/add-brick/post/S13create-subdir-mounts.sh
@@ -0,0 +1,86 @@
+#!/bin/bash
+
+##---------------------------------------------------------------------------
+## This script runs the self-heal of the directories which are expected to
+## be present as they are mounted as subdirectory mounts.
+##---------------------------------------------------------------------------
+
+MOUNT_DIR=`mktemp -d -t ${0##*/}.XXXXXX`;
+OPTSPEC="volname:,version:,gd-workdir:,volume-op:"
+PROGNAME="add-brick-create-subdir"
+VOL_NAME=test
+GLUSTERD_WORKDIR="/var/lib/glusterd"
+
+cleanup_mountpoint ()
+{
+ umount -f $MOUNT_DIR;
+ if [ 0 -ne $? ]
+ then
+ return $?
+ fi
+
+ rmdir $MOUNT_DIR;
+ if [ 0 -ne $? ]
+ then
+ return $?
+ fi
+}
+
+##------------------------------------------
+## Parse the arguments
+##------------------------------------------
+ARGS=$(getopt -l $OPTSPEC -name $PROGNAME $@)
+eval set -- "$ARGS"
+
+while true;
+do
+ case $1 in
+ --volname)
+ shift
+ VOL_NAME=$1
+ ;;
+ --gd-workdir)
+ shift
+ GLUSTERD_WORKDIR=$1
+ ;;
+ --version)
+ shift
+ ;;
+ --volume-op)
+ shift
+ ;;
+ *)
+ shift
+ break
+ ;;
+ esac
+ shift
+done
+
+## See if we have any subdirs to be healed before going further
+subdirs=$(grep 'auth.allow' ${GLUSTERD_WORKDIR}/vols/${VOL_NAME}/info | cut -f2 -d'=' | tr ',' '\n' | cut -f1 -d'(');
+
+if [ -z ${subdirs} ]; then
+ rmdir $MOUNT_DIR;
+ exit 0;
+fi
+
+##----------------------------------------
+## Mount the volume in temp directory.
+## -----------------------------------
+glusterfs -s localhost --volfile-id=$VOL_NAME --client-pid=-50 $MOUNT_DIR;
+if [ 0 -ne $? ]
+then
+ exit $?;
+fi
+
+## -----------------------------------
+# Do the 'stat' on all the directory for now. Ideal fix is to look at subdir
+# list from 'auth.allow' option and only stat them.
+for subdir in ${subdirs}
+do
+ stat ${MOUNT_DIR}/${subdir} > /dev/null;
+done
+
+## Clean up and exit
+cleanup_mountpoint;
diff --git a/extras/hook-scripts/add-brick/post/disabled-quota-root-xattr-heal.sh b/extras/hook-scripts/add-brick/post/disabled-quota-root-xattr-heal.sh
new file mode 100755
index 00000000000..ca17a903549
--- /dev/null
+++ b/extras/hook-scripts/add-brick/post/disabled-quota-root-xattr-heal.sh
@@ -0,0 +1,145 @@
+#!/bin/sh
+
+##---------------------------------------------------------------------------
+## This script updates the 'limit-set' xattr on the newly added node. Please
+## refer hook-scripts/add-brick/pre/S28Quota-root-xattr-heal.sh for the complete
+## description.
+## Do the following only if limit configured on root.
+## 1. Do an auxiliary mount.
+## 2. Get 'limit-set' xattr on root
+## 3. Set xattrs with the same value on the root.
+## 4. Disable itself
+##---------------------------------------------------------------------------
+
+QUOTA_LIMIT_XATTR="trusted.glusterfs.quota.limit-set"
+QUOTA_OBJECT_LIMIT_XATTR="trusted.glusterfs.quota.limit-objects"
+MOUNT_DIR=$(mktemp -d -t "${0##*/}.XXXXXX");
+OPTSPEC="volname:,version:,gd-workdir:,volume-op:"
+PROGNAME="Quota-xattr-heal-add-brick"
+VOL_NAME=
+VERSION=
+VOLUME_OP=
+GLUSTERD_WORKDIR=
+ENABLED_NAME_PREFIX="S28"
+ENABLED_NAME="Quota-root-xattr-heal.sh"
+
+THIS_SCRIPT=$(echo "${0}" | awk -F'/' '{print $NF}')
+
+cleanup_mountpoint ()
+{
+
+ if umount -f "${MOUNT_DIR}"; then
+ return $?
+ fi
+
+ if rmdir "${MOUNT_DIR}"; then
+ return $?
+ fi
+}
+
+disable_and_exit ()
+{
+ if [ -e "${ENABLED_STATE}" ]
+ then
+ unlink "${ENABLED_STATE}";
+ exit $?
+ fi
+
+ exit 0
+}
+
+get_and_set_xattr ()
+{
+ XATTR=$1
+
+ VALUE=$(getfattr -n "${XATTR}" -e hex --absolute-names "${MOUNT_DIR}" 2>&1)
+ RET=$?
+ if [ 0 -eq ${RET} ]; then
+ VALUE=$(echo "${VALUE}" | grep "${XATTR}" | awk -F'=' '{print $NF}')
+ setfattr -n "${XATTR}" -v "${VALUE}" "${MOUNT_DIR}";
+ RET=$?
+ else
+ if echo "${VALUE}" | grep -iq "No such attribute" ; then
+ RET=0
+ fi
+ fi
+
+ return ${RET};
+}
+
+##------------------------------------------
+## Parse the arguments
+##------------------------------------------
+ARGS=$(getopt -o '' -l ${OPTSPEC} -n ${PROGNAME} -- "$@")
+eval set -- "$ARGS"
+
+while true;
+do
+ case $1 in
+ --volname)
+ shift
+ VOL_NAME=$1
+ ;;
+ --version)
+ shift
+ VERSION=$1
+ ;;
+ --gd-workdir)
+ shift
+ GLUSTERD_WORKDIR=$1
+ ;;
+ --volume-op)
+ shift
+ VOLUME_OP=$1
+ ;;
+ *)
+ shift
+ break
+ ;;
+ esac
+ shift
+done
+##----------------------------------------
+
+# Avoid long lines
+ENABLED_STATE_1="${GLUSTERD_WORKDIR}/hooks/${VERSION}/${VOLUME_OP}/"
+ENABLED_STATE_2="post/${ENABLED_NAME_PREFIX}${VOL_NAME}-${ENABLED_NAME}"
+ENABLED_STATE="${ENABLED_STATE_1}${ENABLED_STATE_2}"
+
+if [ "${THIS_SCRIPT}" != *"${VOL_NAME}"* ]; then
+ exit 0
+fi
+
+## Is quota enabled?
+FLAG=$(grep "^features.quota=" "${GLUSTERD_WORKDIR}/vols/${VOL_NAME}/info" \
+| awk -F'=' '{print $NF}');
+if [ "${FLAG}" != "on" ]
+then
+ disable_and_exit
+fi
+
+## -----------------------------------
+## Mount the volume in temp directory.
+## -----------------------------------
+# Avoid long lines
+CMD_1="glusterfs -s localhost"
+CMD_2="--volfile-id=${VOL_NAME} client-pid=-42 ${MOUNT_DIR}"
+CMD="${CMD_1}${CMD_2}"
+
+if ${CMD}
+then
+ exit $?;
+fi
+## -----------------------------------
+
+RET1=$(get_and_set_xattr "${QUOTA_LIMIT_XATTR}")
+RET2=$(get_and_set_xattr "${QUOTA_OBJECT_LIMIT_XATTR}")
+
+## Clean up and exit
+cleanup_mountpoint;
+
+if [ "${RET1}" -ne 0 ] || [ "${RET2}" -ne 0 ]; then
+ exit 1
+fi
+
+disable_and_exit;
diff --git a/extras/hook-scripts/add-brick/pre/Makefile.am b/extras/hook-scripts/add-brick/pre/Makefile.am
new file mode 100644
index 00000000000..3288581aa57
--- /dev/null
+++ b/extras/hook-scripts/add-brick/pre/Makefile.am
@@ -0,0 +1,6 @@
+EXTRA_DIST = S28Quota-enable-root-xattr-heal.sh
+
+hookdir = $(GLUSTERD_WORKDIR)/hooks/1/add-brick/pre/
+if WITH_SERVER
+hook_SCRIPTS = S28Quota-enable-root-xattr-heal.sh
+endif
diff --git a/extras/hook-scripts/add-brick/pre/S28Quota-enable-root-xattr-heal.sh b/extras/hook-scripts/add-brick/pre/S28Quota-enable-root-xattr-heal.sh
new file mode 100755
index 00000000000..27e85231f45
--- /dev/null
+++ b/extras/hook-scripts/add-brick/pre/S28Quota-enable-root-xattr-heal.sh
@@ -0,0 +1,101 @@
+#!/bin/sh
+
+###############################################################################
+## ----------------------------------------------------------------------------
+## The scripts
+## I. add-brick/pre/S28Quota-root-xattr-heal.sh (itself)
+## II. add-brick/post/disabled-root-xattr-heal.sh AND
+## collectively archieves the job of healing the 'limit-set' xattr upon
+## add-brick to the gluster volume.
+##
+## This script is the 'controlling' script. Upon add-brick this script enables
+## the corresponding script based on the status of the volume.
+## If volume is started - enable add-brick/post script
+## else - enable start/post script.
+##
+## The enabling and disabling of a script is based on the glusterd's logic,
+## that it only runs the scripts which starts its name with 'S'. So,
+## Enable - symlink the file to 'S'*.
+## Disable- unlink symlink
+## ----------------------------------------------------------------------------
+###############################################################################
+
+OPTSPEC="volname:,version:,gd-workdir:,volume-op:"
+PROGNAME="Quota-xattr-heal-add-brick-pre"
+VOL_NAME=
+GLUSTERD_WORKDIR=
+VOLUME_OP=
+VERSION=
+ENABLED_NAME_PREFIX="S28"
+ENABLED_NAME="Quota-root-xattr-heal.sh"
+DISABLED_NAME="disabled-quota-root-xattr-heal.sh"
+
+activate ()
+{
+ ln -sf $DISABLED_STATE $1;
+}
+
+##------------------------------------------
+## Parse the arguments
+##------------------------------------------
+ARGS=$(getopt -o '' -l $OPTSPEC -n $PROGNAME -- "$@")
+eval set -- "$ARGS"
+
+while true;
+do
+ case $1 in
+ --volname)
+ shift
+ VOL_NAME=$1
+ ;;
+ --gd-workdir)
+ shift
+ GLUSTERD_WORKDIR=$1
+ ;;
+ --volume-op)
+ shift
+ VOLUME_OP=$1
+ ;;
+ --version)
+ shift
+ VERSION=$1
+ ;;
+ *)
+ shift
+ break
+ ;;
+ esac
+ shift
+done
+##----------------------------------------
+
+DISABLED_STATE="$GLUSTERD_WORKDIR/hooks/$VERSION/add-brick/post/$DISABLED_NAME"
+ENABLED_STATE_START="$GLUSTERD_WORKDIR/hooks/$VERSION/start/post/""$ENABLED_NAME_PREFIX$VOL_NAME""-""$ENABLED_NAME"
+ENABLED_STATE_ADD_BRICK="$GLUSTERD_WORKDIR/hooks/$VERSION/add-brick/post/""$ENABLED_NAME_PREFIX""$VOL_NAME""-""$ENABLED_NAME";
+
+## Why to proceed if the required script itself is not present?
+ls $DISABLED_STATE;
+if [ 0 -ne $? ]
+then
+ exit $?;
+fi
+
+## Is quota enabled?
+FLAG=`cat $GLUSTERD_WORKDIR/vols/$VOL_NAME/info | grep "^features.quota=" \
+ | awk -F'=' '{print $NF}'`;
+if [ "$FLAG" != "on" ]
+then
+ exit $EXIT_SUCCESS;
+fi
+
+## Is volume started?
+FLAG=`cat $GLUSTERD_WORKDIR/vols/$VOL_NAME/info | grep "^status=" \
+ | awk -F'=' '{print $NF}'`;
+if [ "$FLAG" != "1" ]
+then
+ activate $ENABLED_STATE_START;
+ exit $?
+fi
+
+activate $ENABLED_STATE_ADD_BRICK;
+exit $?
diff --git a/extras/hook-scripts/create/Makefile.am b/extras/hook-scripts/create/Makefile.am
new file mode 100644
index 00000000000..b083a9145d6
--- /dev/null
+++ b/extras/hook-scripts/create/Makefile.am
@@ -0,0 +1 @@
+SUBDIRS = post
diff --git a/extras/hook-scripts/create/post/Makefile.am b/extras/hook-scripts/create/post/Makefile.am
new file mode 100644
index 00000000000..fd1892e9589
--- /dev/null
+++ b/extras/hook-scripts/create/post/Makefile.am
@@ -0,0 +1,8 @@
+EXTRA_DIST = S10selinux-label-brick.sh
+
+scriptsdir = $(GLUSTERD_WORKDIR)/hooks/1/create/post/
+if WITH_SERVER
+if USE_SELINUX
+scripts_SCRIPTS = S10selinux-label-brick.sh
+endif
+endif
diff --git a/extras/hook-scripts/create/post/S10selinux-label-brick.sh b/extras/hook-scripts/create/post/S10selinux-label-brick.sh
new file mode 100755
index 00000000000..f9b4b1a57e3
--- /dev/null
+++ b/extras/hook-scripts/create/post/S10selinux-label-brick.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+#
+# Install to hooks/<HOOKS_VER>/create/post
+#
+# Add an SELinux file context for each brick using the glusterd_brick_t type.
+# This ensures that the brick is relabeled correctly on an SELinux restart or
+# restore. Subsequently, run a restore on the brick path to set the selinux
+# labels.
+#
+###
+
+PROGNAME="Sselinux"
+OPTSPEC="volname:"
+VOL=
+
+parse_args () {
+ ARGS=$(getopt -o '' -l ${OPTSPEC} -n ${PROGNAME} -- "$@")
+ eval set -- "${ARGS}"
+
+ while true; do
+ case ${1} in
+ --volname)
+ shift
+ VOL=${1}
+ ;;
+ *)
+ shift
+ break
+ ;;
+ esac
+ shift
+ done
+}
+
+set_brick_labels()
+{
+ volname="${1}"
+
+ # grab the path for each local brick
+ brickpath="/var/lib/glusterd/vols/${volname}/bricks/"
+ brickdirs=$(
+ find "${brickpath}" -type f -exec grep '^path=' {} \; | \
+ cut -d= -f 2 | \
+ sort -u
+ )
+
+ for b in ${brickdirs}; do
+ # Add a file context for each brick path and associate with the
+ # glusterd_brick_t SELinux type.
+ pattern="${b}(/.*)?"
+ semanage fcontext --add -t glusterd_brick_t -r s0 "${pattern}"
+ # Set the labels on the new brick path.
+ restorecon -R "${b}"
+ done
+}
+
+SELINUX_STATE=$(which getenforce && getenforce)
+[ "${SELINUX_STATE}" = 'Disabled' ] && exit 0
+
+parse_args "$@"
+[ -z "${VOL}" ] && exit 1
+
+set_brick_labels "${VOL}"
+
+exit 0
diff --git a/extras/hook-scripts/delete/Makefile.am b/extras/hook-scripts/delete/Makefile.am
new file mode 100644
index 00000000000..c98a05d9205
--- /dev/null
+++ b/extras/hook-scripts/delete/Makefile.am
@@ -0,0 +1 @@
+SUBDIRS = pre
diff --git a/extras/hook-scripts/delete/pre/Makefile.am b/extras/hook-scripts/delete/pre/Makefile.am
new file mode 100644
index 00000000000..4fbfbe7311f
--- /dev/null
+++ b/extras/hook-scripts/delete/pre/Makefile.am
@@ -0,0 +1,8 @@
+EXTRA_DIST = S10selinux-del-fcontext.sh
+
+scriptsdir = $(GLUSTERD_WORKDIR)/hooks/1/delete/pre/
+if WITH_SERVER
+if USE_SELINUX
+scripts_SCRIPTS = S10selinux-del-fcontext.sh
+endif
+endif
diff --git a/extras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh b/extras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh
new file mode 100755
index 00000000000..056b52afe76
--- /dev/null
+++ b/extras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh
@@ -0,0 +1,73 @@
+#!/bin/bash
+#
+# Install to hooks/<HOOKS_VER>/delete/pre
+#
+# Delete the file context associated with the brick path on volume deletion. The
+# associated file context was added during volume creation.
+#
+# We do not explicitly relabel the brick, as this could be time consuming and
+# unnecessary.
+#
+###
+
+PROGNAME="Sselinux"
+OPTSPEC="volname:"
+VOL=
+
+function parse_args () {
+ ARGS=$(getopt -o '' -l ${OPTSPEC} -n ${PROGNAME} -- "$@")
+ eval set -- "${ARGS}"
+
+ while true; do
+ case ${1} in
+ --volname)
+ shift
+ VOL=${1}
+ ;;
+ *)
+ shift
+ break
+ ;;
+ esac
+ shift
+ done
+}
+
+function delete_brick_fcontext()
+{
+ local volname=$1
+ local fctx
+ local list=()
+
+ fctx="$(semanage fcontext --list -C)"
+ # grab the path for each local brick
+ brickpath="/var/lib/glusterd/vols/${volname}/bricks/"
+ brickdirs=$(find "${brickpath}" -type f -exec grep '^path=' {} \; | \
+ cut -d= -f 2 | sort -u)
+ for b in ${brickdirs}
+ do
+ pattern="${b}(/.*)?"
+ echo "${fctx}" | grep "^${pattern}\s" >/dev/null
+ if [[ $? -eq 0 ]]; then
+ list+=("${pattern}")
+ fi
+ done
+ if [[ ${#list[@]} -gt 0 ]]; then
+ printf 'fcontext --delete %s\n' "${list[@]}" | semanage -i -
+ fi
+ for b in ${brickdirs}
+ do
+ restorecon -R "${b}"
+ done
+}
+
+SELINUX_STATE=$(which getenforce && getenforce)
+[ "${SELINUX_STATE}" = 'Disabled' ] && exit 0
+
+parse_args "$@"
+[ -z "${VOL}" ] && exit 1
+
+delete_brick_fcontext "${VOL}"
+
+# failure to delete the fcontext is not fatal
+exit 0
diff --git a/extras/hook-scripts/reset/Makefile.am b/extras/hook-scripts/reset/Makefile.am
new file mode 100644
index 00000000000..6e2701e909e
--- /dev/null
+++ b/extras/hook-scripts/reset/Makefile.am
@@ -0,0 +1,2 @@
+SUBDIRS = post pre
+CLEANFILES =
diff --git a/extras/hook-scripts/reset/post/Makefile.am b/extras/hook-scripts/reset/post/Makefile.am
new file mode 100644
index 00000000000..1b336ac1a85
--- /dev/null
+++ b/extras/hook-scripts/reset/post/Makefile.am
@@ -0,0 +1 @@
+EXTRA_DIST =
diff --git a/extras/hook-scripts/reset/pre/Makefile.am b/extras/hook-scripts/reset/pre/Makefile.am
new file mode 100644
index 00000000000..1b336ac1a85
--- /dev/null
+++ b/extras/hook-scripts/reset/pre/Makefile.am
@@ -0,0 +1 @@
+EXTRA_DIST =
diff --git a/extras/hook-scripts/set/Makefile.am b/extras/hook-scripts/set/Makefile.am
new file mode 100644
index 00000000000..1fcade4b07f
--- /dev/null
+++ b/extras/hook-scripts/set/Makefile.am
@@ -0,0 +1,2 @@
+SUBDIRS = post
+CLEANFILES =
diff --git a/extras/hook-scripts/set/post/Makefile.am b/extras/hook-scripts/set/post/Makefile.am
new file mode 100644
index 00000000000..506a25a8666
--- /dev/null
+++ b/extras/hook-scripts/set/post/Makefile.am
@@ -0,0 +1,6 @@
+EXTRA_DIST = S30samba-set.sh S32gluster_enable_shared_storage.sh
+
+hookdir = $(GLUSTERD_WORKDIR)/hooks/1/set/post/
+if WITH_SERVER
+hook_SCRIPTS = $(EXTRA_DIST)
+endif
diff --git a/extras/hook-scripts/set/post/S30samba-set.sh b/extras/hook-scripts/set/post/S30samba-set.sh
index bec3ac270b3..854f131f6c8 100644..100755
--- a/extras/hook-scripts/set/post/S30samba-set.sh
+++ b/extras/hook-scripts/set/post/S30samba-set.sh
@@ -1,127 +1,161 @@
#!/bin/bash
+#Need to be copied to hooks/<HOOKS_VER>/set/post/
+
+#TODO: All gluster and samba paths are assumed for fedora like systems.
+#Some efforts are required to make it work on other distros.
+
+#The preferred way of creating a smb share of a gluster volume has changed.
+#The old method was to create a fuse mount of the volume and share the mount
+#point through samba.
+#
+#New method eliminates the requirement of fuse mount and changes in fstab.
+#glusterfs_vfs plugin for samba makes call to libgfapi to access the volume.
+#
+#This hook script enables user to enable or disable smb share by volume set
+#option. Keys "user.cifs" and "user.smb" both are valid, but user.smb is
+#preferred.
+
+
PROGNAME="Ssamba-set"
-OPTSPEC="volname:"
+OPTSPEC="volname:,gd-workdir:"
VOL=
-MNT_PRE="/mnt/samba"
-
-enable_cifs=""
+CONFIGFILE=
+LOGFILEBASE=
+PIDDIR=
+GLUSTERD_WORKDIR=
+USERSMB_SET=""
+USERCIFS_SET=""
function parse_args () {
- ARGS=$(getopt -l $OPTSPEC -o "o" -name $PROGNAME $@)
+ ARGS=$(getopt -o 'o:' -l $OPTSPEC -n $PROGNAME -- "$@")
eval set -- "$ARGS"
while true; do
case $1 in
- --volname)
- shift
- VOL=$1
- ;;
- *)
- shift
- for pair in $@; do
- read key value < <(echo "$pair" | tr "=" " ")
+ --volname)
+ shift
+ VOL=$1
+ ;;
+ --gd-workdir)
+ shift
+ GLUSTERD_WORKDIR=$1
+ ;;
+ --)
+ shift
+ break
+ ;;
+ -o)
+ shift
+ read key value < <(echo "$1" | tr "=" " ")
case "$key" in
- "user.cifs") enable_cifs=$value;;
- *) ;;
+ "user.cifs")
+ USERCIFS_SET="YES"
+ ;;
+ "user.smb")
+ USERSMB_SET="YES"
+ ;;
+ *)
+ ;;
esac
- done
-
- shift
- break
- ;;
+ ;;
+ *)
+ shift
+ break
+ ;;
esac
shift
done
}
-function add_samba_export () {
+function find_config_info () {
+ cmdout=`smbd -b | grep smb.conf`
+ if [ $? -ne 0 ]; then
+ echo "Samba is not installed"
+ exit 1
+ fi
+ CONFIGFILE=`echo $cmdout | awk '{print $2}'`
+ PIDDIR=`smbd -b | grep PIDDIR | awk '{print $2}'`
+ LOGFILEBASE=`smbd -b | grep 'LOGFILEBASE' | awk '{print $2}'`
+}
+
+function add_samba_share () {
volname=$1
- mnt_pre=$2
- mkdir -p $mnt_pre/$volname && \
- printf "\n[gluster-$volname]\ncomment=For samba export of volume $volname\npath=$mnt_pre/$volname\nread only=no\nguest ok=yes\n" >> /etc/samba/smb.conf
+ STRING="\n[gluster-$volname]\n"
+ STRING+="comment = For samba share of volume $volname\n"
+ STRING+="vfs objects = glusterfs\n"
+ STRING+="glusterfs:volume = $volname\n"
+ STRING+="glusterfs:logfile = $LOGFILEBASE/glusterfs-$volname.%%M.log\n"
+ STRING+="glusterfs:loglevel = 7\n"
+ STRING+="path = /\n"
+ STRING+="read only = no\n"
+ STRING+="kernel share modes = no\n"
+ printf "$STRING" >> ${CONFIGFILE}
}
function sighup_samba () {
- pid=`cat /var/run/smbd.pid`
- if [ "$pid" != "" ]
+ pid=`cat ${PIDDIR}/smbd.pid`
+ if [ "x$pid" != "x" ]
then
kill -HUP "$pid";
else
- /etc/init.d/smb start
+ service smb condrestart
fi
}
-function add_fstab_entry () {
+function deactivate_samba_share () {
volname=$1
- mntpt=$2
- mntent="`hostname`:/$volname $mntpt glusterfs defaults,transport=tcp 0 0"
- exists=`grep "$mntent" /etc/fstab`
- if [ "$exists" == "" ]
- then
- echo "$mntent" >> /etc/fstab
- fi
+ sed -i -e '/^\[gluster-'"$volname"'\]/{ :a' -e 'n; /available = no/H; /^$/!{$!ba;}; x; /./!{ s/^/available = no/; $!{G;x}; $H; }; s/.*//; x; };' ${CONFIGFILE}
}
-function del_samba_export () {
+function is_volume_started () {
volname=$1
- cp /etc/samba/smb.conf /tmp/smb.conf
- sed -i "/gluster-$volname/,/^$/d" /tmp/smb.conf &&\
- mv /tmp/smb.conf /etc/samba/smb.conf
+ echo "$(grep status $GLUSTERD_WORKDIR/vols/"$volname"/info |\
+ cut -d"=" -f2)"
}
-function umount_volume () {
+function get_smb () {
volname=$1
- mnt_pre=$2
- umount -l $mnt_pre/$volname
-}
+ uservalue=
-function remove_fstab_entry () {
- volname=$1
- mntpt=$2
- mntent="`hostname`:/$volname $mntpt glusterfs defaults,transport=tcp 0 0"
- esc_mntent=$(echo -e "$mntent" | sed 's/\//\\\//g')
- exists=`grep "$mntent" /etc/fstab`
- if [ "$exists" != " " ]
- then
- sed -i /"$esc_mntent"/d /etc/fstab
- fi
-}
+ usercifsvalue=$(grep user.cifs $GLUSTERD_WORKDIR/vols/"$volname"/info |\
+ cut -d"=" -f2)
+ usersmbvalue=$(grep user.smb $GLUSTERD_WORKDIR/vols/"$volname"/info |\
+ cut -d"=" -f2)
-function is_volume_started () {
- volname=$1
- echo "$(grep status /var/lib/glusterd/vols/"$volname"/info | cut -d"=" -f2)"
-}
+ if [ -n "$usercifsvalue" ]; then
+ if [ "$usercifsvalue" = "disable" ] || [ "$usercifsvalue" = "off" ]; then
+ uservalue="disable"
+ fi
+ fi
-function get_cifs () {
- volname=$1
- echo "$(grep user.cifs /var/lib/glusterd/vols/"$volname"/info | cut -d"=" -f2)"
-}
+ if [ -n "$usersmbvalue" ]; then
+ if [ "$usersmbvalue" = "disable" ] || [ "$usersmbvalue" = "off" ]; then
+ uservalue="disable"
+ fi
+ fi
-function mount_volume () {
- volname=$1
- mntpt=$2
- if [ "$(cat /proc/mounts | grep "$mntpt")" == "" ]; then
- mount -t glusterfs `hostname`:$volname $mntpt && \
- add_fstab_entry $volname $mntpt
- fi
+ echo "$uservalue"
}
-parse_args $@
-if [ "0" = $(is_volume_started "$VOL") ]; then
+parse_args "$@"
+if [ "0" = "$(is_volume_started "$VOL")" ]; then
exit 0
fi
-if [ "$enable_cifs" = "enable" ]; then
- add_samba_export $VOL $MNT_PRE
- mkdir -p $MNT_PRE/$VOL
- sleep 5
- mount_volume $VOL $MNT_PRE/$VOL
- sighup_samba
-elif [ "$enable_cifs" = "disable" ]; then
- del_samba_export $VOL
- umount_volume $VOL $MNT_PRE
- remove_fstab_entry $VOL $MNT_PRE/$VOL
+if [ "$USERCIFS_SET" = "YES" ] || [ "$USERSMB_SET" = "YES" ]; then
+ #Find smb.conf, smbd pid directory and smbd logfile path
+ find_config_info
+
+ if [ "$(get_smb "$VOL")" = "disable" ]; then
+ deactivate_samba_share $VOL
+ else
+ if ! grep --quiet "\[gluster-$VOL\]" ${CONFIGFILE} ; then
+ add_samba_share $VOL
+ else
+ sed -i '/\[gluster-'"$VOL"'\]/,/^$/!b;/available = no/d' ${CONFIGFILE}
+ fi
+ fi
sighup_samba
fi
diff --git a/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh b/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh
new file mode 100755
index 00000000000..1f2564b44ff
--- /dev/null
+++ b/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh
@@ -0,0 +1,136 @@
+#!/bin/bash
+
+key=`echo $3 | cut -d '=' -f 1`
+val=`echo $3 | cut -d '=' -f 2`
+if [ "$key" != "cluster.enable-shared-storage" ] && [ "$key" != "enable-shared-storage" ]; then
+ exit;
+fi
+if [ "$val" != 'enable' ]; then
+ if [ "$val" != 'disable' ]; then
+ exit;
+ fi
+fi
+
+option=$val
+
+key_val_pair1=`echo $4 | cut -d ',' -f 1`
+key_val_pair2=`echo $4 | cut -d ',' -f 2`
+
+key=`echo $key_val_pair1 | cut -d '=' -f 1`
+val=`echo $key_val_pair1 | cut -d '=' -f 2`
+if [ "$key" != "is_originator" ]; then
+ exit;
+fi
+is_originator=$val;
+
+key=`echo $key_val_pair2 | cut -d '=' -f 1`
+val=`echo $key_val_pair2 | cut -d '=' -f 2`
+if [ "$key" != "local_node_hostname" ]; then
+ exit;
+fi
+local_node_hostname=$val;
+
+# Read gluster peer status to find the peers
+# which are in 'Peer in Cluster' mode and
+# are connected.
+
+number_of_connected_peers=0
+while read -r line
+do
+ # Already got two connected peers. Including the current node
+ # we have 3 peers which is enough to create a shared storage
+ # with replica 3
+ if [ "$number_of_connected_peers" == "2" ]; then
+ break;
+ fi
+
+ key=`echo $line | cut -d ':' -f 1`
+ if [ "$key" == "Hostname" ]; then
+ hostname=`echo $line | cut -d ':' -f 2 | xargs`
+ fi
+
+ if [ "$key" == "State" ]; then
+ peer_state=`echo $line | cut -d ':' -f 2 | cut -d '(' -f 1 | xargs`
+ conn_state=`echo $line | cut -d '(' -f 2 | cut -d ')' -f 1 | xargs`
+
+ if [ "$peer_state" == "Peer in Cluster" ]; then
+ if [ "$conn_state" == "Connected" ]; then
+ ((number_of_connected_peers++))
+ connected_peer[$number_of_connected_peers]=$hostname
+ fi
+ fi
+ fi
+
+done < <(gluster peer status)
+
+# Include current node in connected peer list
+((number_of_connected_peers++))
+connected_peer[$number_of_connected_peers]=$local_node_hostname
+
+# forming the create vol command
+create_cmd="gluster --mode=script --wignore volume create \
+ gluster_shared_storage replica $number_of_connected_peers"
+
+# Adding the brick names in the command
+for i in "${connected_peer[@]}"
+do
+ create_cmd=$create_cmd" "$i:"$GLUSTERD_WORKDIR"/ss_brick
+done
+
+if [ "$option" == "disable" ]; then
+ # Unmount the volume on all the nodes
+ umount /run/gluster/shared_storage
+ cat /etc/fstab | grep -v "gluster_shared_storage /run/gluster/shared_storage/" > /run/gluster/fstab.tmp
+ mv /run/gluster/fstab.tmp /etc/fstab
+fi
+
+if [ "$is_originator" == 1 ]; then
+ if [ "$option" == "enable" ]; then
+ # Create and start the volume
+ $create_cmd
+ gluster --mode=script --wignore volume start gluster_shared_storage
+ fi
+
+ if [ "$option" == "disable" ]; then
+ # Stop and delete the volume
+ gluster --mode=script --wignore volume stop gluster_shared_storage
+ gluster --mode=script --wignore volume delete gluster_shared_storage
+ fi
+fi
+
+function check_volume_status()
+{
+ status=`gluster volume info gluster_shared_storage | grep Status | cut -d ':' -f 2 | xargs`
+ echo $status
+}
+
+key=`echo $5 | cut -d '=' -f 1`
+val=`echo $5 | cut -d '=' -f 2`
+if [ "$key" == "transport.address-family" ]; then
+ mount_cmd="mount -t glusterfs -o xlator-option=transport.address-family=inet6 \
+ $local_node_hostname:/gluster_shared_storage /run/gluster/shared_storage"
+else
+ mount_cmd="mount -t glusterfs $local_node_hostname:/gluster_shared_storage \
+ /run/gluster/shared_storage"
+fi
+
+if [ "$option" == "enable" ]; then
+ retry=0;
+ # Wait for volume to start before mounting
+ status=$(check_volume_status)
+ while [ "$status" != "Started" ]; do
+ sleep 5;
+ ((retry++))
+ if [ "$retry" == 3 ]; then
+ break;
+ fi
+ status=$(check_volume_status)
+ done
+ # Mount the volume on all the nodes
+ umount /run/gluster/shared_storage
+ mkdir -p /run/gluster/shared_storage
+ $mount_cmd
+ cp /etc/fstab /run/gluster/fstab.tmp
+ echo "$local_node_hostname:/gluster_shared_storage /run/gluster/shared_storage/ glusterfs defaults 0 0" >> /run/gluster/fstab.tmp
+ mv /run/gluster/fstab.tmp /etc/fstab
+fi
diff --git a/extras/hook-scripts/start/post/Makefile.am b/extras/hook-scripts/start/post/Makefile.am
index d9cba93ed52..792019d3c9f 100644
--- a/extras/hook-scripts/start/post/Makefile.am
+++ b/extras/hook-scripts/start/post/Makefile.am
@@ -1 +1,6 @@
-EXTRA_DIST = S29CTDBsetup.sh S30samba-start.sh
+EXTRA_DIST = S29CTDBsetup.sh S30samba-start.sh S31ganesha-start.sh
+
+hookdir = $(GLUSTERD_WORKDIR)/hooks/1/start/post/
+if WITH_SERVER
+hook_SCRIPTS = $(EXTRA_DIST)
+endif
diff --git a/extras/hook-scripts/start/post/S29CTDBsetup.sh b/extras/hook-scripts/start/post/S29CTDBsetup.sh
index 2567cf8307e..69a0d89a3eb 100644..100755
--- a/extras/hook-scripts/start/post/S29CTDBsetup.sh
+++ b/extras/hook-scripts/start/post/S29CTDBsetup.sh
@@ -1,18 +1,22 @@
#! /bin/bash
-# RHS-2.0 only
+#
# - The script mounts the 'meta-vol' on start 'event' on a known
# directory (eg. /gluster/lock)
-# - Adds the necessary configuration changes for ctdb in smb.conf and
-# restarts smb service.
# - P.S: There are other 'tasks' that need to be done outside this script
# to get CTDB based failover up and running.
-SMB_CONF=/etc/samba/smb.conf
-
CTDB_MNT=/gluster/lock
+# Make sure ping-timeout is not default for CTDB volume
+PING_TIMEOUT_SECS=10
PROGNAME="ctdb"
-OPTSPEC="volname:"
+OPTSPEC="volname:,gd-workdir:,version:,volume-op:,first:"
+HOSTNAME=`hostname`
+MNTOPTS="_netdev,transport=tcp,xlator-option=*client*.ping-timeout=${PING_TIMEOUT_SECS}"
VOL=
+GLUSTERD_WORKDIR=
+VERSION=
+VOLUME_OP=
+FIRST=
# $META is the volume that will be used by CTDB as a shared filesystem.
# It is not desirable to use this volume for storing 'data' as well.
# META is set to 'all' (viz. a keyword and hence not a legal volume name)
@@ -21,7 +25,7 @@ VOL=
META="all"
function parse_args () {
- ARGS=$(getopt -l $OPTSPEC -name $PROGNAME $@)
+ ARGS=$(getopt -o '' -l $OPTSPEC -n $PROGNAME -- "$@")
eval set -- "$ARGS"
while true; do
@@ -29,49 +33,52 @@ function parse_args () {
--volname)
shift
VOL=$1
- ;;
-
+ ;;
+ --gd-workdir)
+ shift
+ GLUSTERD_WORKDIR=$1
+ ;;
+ --version)
+ shift
+ VERSION=$1
+ ;;
+ --volume-op)
+ shift
+ VOLUME_OP=$1
+ ;;
+ --first)
+ shift
+ FIRST=$1
+ ;;
*)
- shift
- break
- ;;
-
+ shift
+ break
+ ;;
esac
shift
done
}
-function add_glusterfs_ctdb_options () {
- PAT="Share Definitions"
- GLUSTER_CTDB_CONFIG="# ctdb config for glusterfs\n\tclustering = yes\n\tidmap backend = tdb2\n"
- exists=`grep "clustering = yes" "$SMB_CONF"`
- if [ "$exists" == "" ]
- then
- sed -i /"$PAT"/i\ "$GLUSTER_CTDB_CONFIG" "$SMB_CONF"
- fi
-}
-
function add_fstab_entry () {
volname=$1
mntpt=$2
- mntent="`hostname`:/$volname $mntpt glusterfs defaults,transport=tcp 0 0"
- exists=`grep "$mntent" /etc/fstab`
+ mntopts="${MNTOPTS}"
+
+ mntent="${HOSTNAME}:/${volname} ${mntpt} glusterfs ${mntopts} 0 0"
+ exists=`grep "${mntpt}" /etc/fstab`
if [ "$exists" == "" ]
then
- echo "$mntent" >> /etc/fstab
+ echo "${mntent}" >> /etc/fstab
fi
}
-parse_args $@
+parse_args "$@"
if [ "$META" = "$VOL" ]
then
- #expects ctdb service to manage smb
- service smb stop
- add_glusterfs_ctdb_options
- mkdir -p $CTDB_MNT
- sleep 5
- mount -t glusterfs `hostname`:$VOL "$CTDB_MNT" && \
- add_fstab_entry $VOL $CTDB_MNT
- chkconfig ctdb on
+ mkdir -p $CTDB_MNT
+ sleep 5
+ mount -t glusterfs -o${MNTOPTS} ${HOSTNAME}:/$VOL "$CTDB_MNT" && \
+ add_fstab_entry $VOL $CTDB_MNT
+ chkconfig ctdb on
fi
diff --git a/extras/hook-scripts/start/post/S30samba-start.sh b/extras/hook-scripts/start/post/S30samba-start.sh
index 70158e75590..cac0cbf1464 100644..100755
--- a/extras/hook-scripts/start/post/S30samba-start.sh
+++ b/extras/hook-scripts/start/post/S30samba-start.sh
@@ -1,78 +1,145 @@
#!/bin/bash
+#Need to be copied to hooks/<HOOKS_VER>/start/post
+
+#TODO: All gluster and samba paths are assumed for fedora like systems.
+#Some efforts are required to make it work on other distros.
+
+#The preferred way of creating a smb share of a gluster volume has changed.
+#The old method was to create a fuse mount of the volume and share the mount
+#point through samba.
+#
+#New method eliminates the requirement of fuse mount and changes in fstab.
+#glusterfs_vfs plugin for samba makes call to libgfapi to access the volume.
+#
+#This hook script automagically creates shares for volume on every volume start
+#event by adding the entries in smb.conf file and sending SIGHUP to samba.
+#
+#In smb.conf:
+#glusterfs vfs plugin has to be specified as required vfs object.
+#Path value is relative to the root of gluster volume;"/" signifies complete
+#volume.
+
PROGNAME="Ssamba-start"
-OPTSPEC="volname:"
+OPTSPEC="volname:,gd-workdir:,version:,volume-op:,first:"
VOL=
-MNT_PRE="/mnt/samba"
+CONFIGFILE=
+LOGFILEBASE=
+PIDDIR=
+GLUSTERD_WORKDIR=
+VERSION=
+VOLUME_OP=
+FIRST=
function parse_args () {
- ARGS=$(getopt -l $OPTSPEC -name $PROGNAME $@)
+ ARGS=$(getopt -o '' -l $OPTSPEC -n $PROGNAME -- "$@")
eval set -- "$ARGS"
while true; do
- case $1 in
- --volname)
- shift
- VOL=$1
- ;;
- *)
- shift
- break
- ;;
- esac
- shift
+ case $1 in
+ --volname)
+ shift
+ VOL=$1
+ ;;
+ --gd-workdir)
+ shift
+ GLUSTERD_WORKDIR=$1
+ ;;
+ --version)
+ shift
+ VERSION=$1
+ ;;
+ --volume-op)
+ shift
+ VOLUME_OP=$1
+ ;;
+ --first)
+ shift
+ FIRST=$1
+ ;;
+ *)
+ shift
+ break
+ ;;
+ esac
+
+ shift
done
}
-function add_samba_export () {
+function find_config_info () {
+ cmdout=$(smbd -b 2> /dev/null)
+ CONFIGFILE=$(echo "$cmdout" | grep CONFIGFILE | awk '{print $2}')
+ if [ -z "$CONFIGFILE" ]; then
+ echo "Samba is not installed"
+ exit 1
+ fi
+ PIDDIR=$(echo "$cmdout" | grep PIDDIR | awk '{print $2}')
+ LOGFILEBASE=$(echo "$cmdout" | grep 'LOGFILEBASE' | awk '{print $2}')
+}
+
+function add_samba_share () {
volname=$1
- mnt_pre=$2
- mkdir -p $mnt_pre/$volname && \
- printf "\n[gluster-$volname]\ncomment=For samba export of volume $volname\npath=$mnt_pre/$volname\nread only=no\nguest ok=yes\n" >> /etc/samba/smb.conf
+ STRING="\n[gluster-$volname]\n"
+ STRING+="comment = For samba share of volume $volname\n"
+ STRING+="vfs objects = glusterfs\n"
+ STRING+="glusterfs:volume = $volname\n"
+ STRING+="glusterfs:logfile = $LOGFILEBASE/glusterfs-$volname.%%M.log\n"
+ STRING+="glusterfs:loglevel = 7\n"
+ STRING+="path = /\n"
+ STRING+="read only = no\n"
+ STRING+="kernel share modes = no\n"
+ printf "$STRING" >> "${CONFIGFILE}"
}
function sighup_samba () {
- pid=`cat /var/run/smbd.pid`
- if [ "$pid" != "" ]
+ pid=$(cat "${PIDDIR}/smbd.pid" 2> /dev/null)
+ if [ "x$pid" != "x" ]
then
kill -HUP "$pid";
else
- /etc/init.d/smb condrestart
+ service smb condrestart
fi
}
-function add_fstab_entry () {
+function get_smb () {
volname=$1
- mntpt=$2
- mntent="`hostname`:/$volname $mntpt glusterfs defaults,transport=tcp 0 0"
- exists=`grep "$mntent" /etc/fstab`
- if [ "$exists" == "" ]
- then
- echo "$mntent" >> /etc/fstab
+ uservalue=
+
+ usercifsvalue=$(grep user.cifs "$GLUSTERD_WORKDIR"/vols/"$volname"/info |\
+ cut -d"=" -f2)
+ usersmbvalue=$(grep user.smb "$GLUSTERD_WORKDIR"/vols/"$volname"/info |\
+ cut -d"=" -f2)
+
+ if [ -n "$usercifsvalue" ]; then
+ if [ "$usercifsvalue" = "enable" ] || [ "$usercifsvalue" = "on" ]; then
+ uservalue="enable"
+ fi
fi
-}
-function get_cifs () {
- volname=$1
- echo "$(grep user.cifs /var/lib/glusterd/vols/"$volname"/info | cut -d"=" -f2)"
-}
+ if [ -n "$usersmbvalue" ]; then
+ if [ "$usersmbvalue" = "enable" ] || [ "$usersmbvalue" = "on" ]; then
+ uservalue="enable"
+ fi
+ fi
-function mount_volume () {
- volname=$1
- mntpt=$2
- if [ "$(cat /proc/mounts | grep "$mntpt")" == "" ]; then
- mount -t glusterfs `hostname`:$volname $mntpt && \
- add_fstab_entry $volname $mntpt
- fi
+ echo "$uservalue"
}
-parse_args $@
-if [ $(get_cifs "$VOL") = "disable" ]; then
+parse_args "$@"
+
+value=$(get_smb "$VOL")
+
+if [ -z "$value" ] || [ "$value" != "enable" ]; then
exit 0
fi
-add_samba_export $VOL $MNT_PRE
-mkdir -p $MNT_PRE/$VOL
-sleep 5
-mount_volume $VOL $MNT_PRE/$VOL
+#Find smb.conf, smbd pid directory and smbd logfile path
+find_config_info
+
+if ! grep --quiet "\[gluster-$VOL\]" "${CONFIGFILE}" ; then
+ add_samba_share "$VOL"
+else
+ sed -i '/\[gluster-'"$VOL"'\]/,/^$/!b;/available = no/d' "${CONFIGFILE}"
+fi
sighup_samba
diff --git a/extras/hook-scripts/start/post/S31ganesha-start.sh b/extras/hook-scripts/start/post/S31ganesha-start.sh
new file mode 100755
index 00000000000..7ad6f23ad06
--- /dev/null
+++ b/extras/hook-scripts/start/post/S31ganesha-start.sh
@@ -0,0 +1,122 @@
+#!/bin/bash
+PROGNAME="Sganesha-start"
+OPTSPEC="volname:,gd-workdir:"
+VOL=
+declare -i EXPORT_ID
+ganesha_key="ganesha.enable"
+GANESHA_DIR="/run/gluster/shared_storage/nfs-ganesha"
+CONF1="$GANESHA_DIR/ganesha.conf"
+GLUSTERD_WORKDIR=
+
+function parse_args ()
+{
+ ARGS=$(getopt -l $OPTSPEC -o "o" -name $PROGNAME $@)
+ eval set -- "$ARGS"
+
+ while true; do
+ case $1 in
+ --volname)
+ shift
+ VOL=$1
+ ;;
+ --gd-workdir)
+ shift
+ GLUSTERD_WORKDIR=$1
+ ;;
+ *)
+ shift
+ break
+ ;;
+ esac
+ shift
+ done
+}
+
+
+
+#This function generates a new export entry as export.volume_name.conf
+function write_conf()
+{
+echo -e "# WARNING : Using Gluster CLI will overwrite manual
+# changes made to this file. To avoid it, edit the
+# file, copy it over to all the NFS-Ganesha nodes
+# and run ganesha-ha.sh --refresh-config."
+
+echo "EXPORT{"
+echo " Export_Id = 2;"
+echo " Path = \"/$VOL\";"
+echo " FSAL {"
+echo " name = \"GLUSTER\";"
+echo " hostname=\"localhost\";"
+echo " volume=\"$VOL\";"
+echo " }"
+echo " Access_type = RW;"
+echo " Disable_ACL = true;"
+echo " Squash=\"No_root_squash\";"
+echo " Pseudo=\"/$VOL\";"
+echo " Protocols = \"3\", \"4\" ;"
+echo " Transports = \"UDP\",\"TCP\";"
+echo " SecType = \"sys\";"
+echo "}"
+}
+
+#It adds the export dynamically by sending dbus signals
+function export_add()
+{
+ dbus-send --print-reply --system --dest=org.ganesha.nfsd \
+/org/ganesha/nfsd/ExportMgr org.ganesha.nfsd.exportmgr.AddExport \
+string:$GANESHA_DIR/exports/export.$VOL.conf string:"EXPORT(Export_Id=$EXPORT_ID)"
+
+}
+
+# based on src/scripts/ganeshactl/Ganesha/export_mgr.py
+function is_exported()
+{
+ local volume="${1}"
+
+ dbus-send --type=method_call --print-reply --system \
+ --dest=org.ganesha.nfsd /org/ganesha/nfsd/ExportMgr \
+ org.ganesha.nfsd.exportmgr.ShowExports \
+ | grep -w -q "/${volume}"
+
+ return $?
+}
+
+# Check the info file (contains the volume options) to see if Ganesha is
+# enabled for this volume.
+function ganesha_enabled()
+{
+ local volume="${1}"
+ local info_file="${GLUSTERD_WORKDIR}/vols/${VOL}/info"
+ local enabled="off"
+
+ enabled=$(grep -w ${ganesha_key} ${info_file} | cut -d"=" -f2)
+
+ [ "${enabled}" == "on" ]
+
+ return $?
+}
+
+parse_args $@
+
+if ganesha_enabled ${VOL} && ! is_exported ${VOL}
+then
+ if [ ! -e ${GANESHA_DIR}/exports/export.${VOL}.conf ]
+ then
+ #Remove export entry from nfs-ganesha.conf
+ sed -i /$VOL.conf/d $CONF1
+ write_conf ${VOL} > ${GANESHA_DIR}/exports/export.${VOL}.conf
+ EXPORT_ID=`cat $GANESHA_DIR/.export_added`
+ EXPORT_ID=EXPORT_ID+1
+ echo $EXPORT_ID > $GANESHA_DIR/.export_added
+ sed -i s/Export_Id.*/"Export_Id=$EXPORT_ID;"/ \
+ $GANESHA_DIR/exports/export.$VOL.conf
+ echo "%include \"$GANESHA_DIR/exports/export.$VOL.conf\"" >> $CONF1
+ else
+ EXPORT_ID=$(grep ^[[:space:]]*Export_Id $GANESHA_DIR/exports/export.$VOL.conf |\
+ awk -F"[=,;]" '{print $2}' | tr -d '[[:space:]]')
+ fi
+ export_add $VOL
+fi
+
+exit 0
diff --git a/extras/hook-scripts/stop/pre/Makefile.am b/extras/hook-scripts/stop/pre/Makefile.am
index 51a139bad8d..9e8d1565e93 100644
--- a/extras/hook-scripts/stop/pre/Makefile.am
+++ b/extras/hook-scripts/stop/pre/Makefile.am
@@ -1 +1,6 @@
-EXTRA_DIST = S30samba-stop.sh S29CTDB-teardown.sh
+EXTRA_DIST = S29CTDB-teardown.sh S30samba-stop.sh
+
+hookdir = $(GLUSTERD_WORKDIR)/hooks/1/stop/pre/
+if WITH_SERVER
+hook_SCRIPTS = $(EXTRA_DIST)
+endif
diff --git a/extras/hook-scripts/stop/pre/S29CTDB-teardown.sh b/extras/hook-scripts/stop/pre/S29CTDB-teardown.sh
index 30316f92f68..0975a00f18d 100644..100755
--- a/extras/hook-scripts/stop/pre/S29CTDB-teardown.sh
+++ b/extras/hook-scripts/stop/pre/S29CTDB-teardown.sh
@@ -1,11 +1,10 @@
#! /bin/bash
-#non-portable - RHS-2.0 only
-SMB_CONF=/etc/samba/smb.conf
CTDB_MNT=/gluster/lock
PROGNAME="ctdb"
-OPTSPEC="volname:"
+OPTSPEC="volname:,last:"
VOL=
+LAST=
# $META is the volume that will be used by CTDB as a shared filesystem.
# It is not desirable to use this volume for storing 'data' as well.
# META is set to 'all' (viz. a keyword and hence not a legal volume name)
@@ -13,18 +12,8 @@ VOL=
# User needs to set META to the volume that serves CTDB lockfile.
META="all"
-function sighup_samba () {
- pid=`cat /var/run/smbd.pid`
- if [ "$pid" != "" ]
- then
- kill -HUP $pid;
- else
- /etc/init.d/smb start
- fi
-}
-
function parse_args () {
- ARGS=$(getopt -l $OPTSPEC -name $PROGNAME $@)
+ ARGS=$(getopt -o '' -l $OPTSPEC -n $PROGNAME -- "$@")
eval set -- "$ARGS"
while true; do
@@ -32,49 +21,42 @@ function parse_args () {
--volname)
shift
VOL=$1
- ;;
-
+ ;;
+ --last)
+ shift
+ LAST=$1
+ ;;
*)
- shift
- break
- ;;
-
+ shift
+ break
+ ;;
esac
-
shift
done
}
-function remove_ctdb_options () {
- IFS=$'\n'
- GLUSTER_CTDB_CONFIG=$'# ctdb config for glusterfs\n\tclustering = yes\n\tidmap backend = tdb2\n'
-
- for line in $GLUSTER_CTDB_CONFIG
- do
- sed -i /"$line"/d $SMB_CONF
- done
- unset IFS
-}
-
function remove_fstab_entry () {
- volname=$1
- mntpt=$2
- mntent="`hostname`:/$volname $mntpt glusterfs defaults,transport=tcp 0 0"
- esc_mntent=$(echo -e "$mntent" | sed 's/\//\\\//g')
- exists=`grep "$mntent" /etc/fstab`
- if [ "$exists" != " " ]
- then
- sed -i /"$esc_mntent"/d /etc/fstab
- fi
+ mntpt=$1
+ fstab="/etc/fstab"
+ exists=`grep "$mntpt" ${fstab}`
+ esc_mntpt=$(echo -e $mntpt | sed 's/\//\\\//g')
+ if [ "$exists" != " " ]
+ then
+ sed -i /"$esc_mntpt"/d $fstab
+ exists=`grep "$mntpt" ${fstab}`
+ if [ "$exists" != " " ]
+ then
+ echo "fstab entry cannot be removed for unknown reason"
+ exit 1
+ fi
+ fi
}
-parse_args $@
+parse_args "$@"
if [ "$META" = "$VOL" ]
then
umount "$CTDB_MNT"
chkconfig ctdb off
- remove_fstab_entry $VOL $CTDB_MNT
- remove_ctdb_options
- sighup_samba
+ remove_fstab_entry $CTDB_MNT
fi
diff --git a/extras/hook-scripts/stop/pre/S30samba-stop.sh b/extras/hook-scripts/stop/pre/S30samba-stop.sh
index 66efa3cc5ae..ea799381d62 100644..100755
--- a/extras/hook-scripts/stop/pre/S30samba-stop.sh
+++ b/extras/hook-scripts/stop/pre/S30samba-stop.sh
@@ -1,66 +1,77 @@
#! /bin/bash
+#Need to be copied to hooks/<HOOKS_VER>/stop/pre
+
+#TODO: All gluster and samba paths are assumed for fedora like systems.
+#Some efforts are required to make it work on other distros.
+
+#The preferred way of creating a smb share of a gluster volume has changed.
+#The old method was to create a fuse mount of the volume and share the mount
+#point through samba.
+#
+#New method eliminates the requirement of fuse mount and changes in fstab.
+#glusterfs_vfs plugin for samba makes call to libgfapi to access the volume.
+#
+#This hook script automagically removes shares for volume on every volume stop
+#event by removing the volume related entries(if any) in smb.conf file.
+
PROGNAME="Ssamba-stop"
-OPTSPEC="volname:"
+OPTSPEC="volname:,last:"
VOL=
-MNT_PRE="/mnt/samba"
+CONFIGFILE=
+PIDDIR=
+LAST=
function parse_args () {
- ARGS=$(getopt -l $OPTSPEC -name $PROGNAME $@)
+ ARGS=$(getopt -o '' -l $OPTSPEC -n $PROGNAME -- "$@")
eval set -- "$ARGS"
while true; do
- case $1 in
- --volname)
- shift
- VOL=$1
- ;;
- *)
- shift
- break
- ;;
- esac
- shift
+ case $1 in
+ --volname)
+ shift
+ VOL=$1
+ ;;
+ --last)
+ shift
+ LAST=$1
+ ;;
+ *)
+ shift
+ break
+ ;;
+ esac
+
+ shift
done
}
-function del_samba_export () {
- volname=$1
- cp /etc/samba/smb.conf /tmp/smb.conf
- sed -i "/gluster-$volname/,/^$/d" /tmp/smb.conf &&\
- mv /tmp/smb.conf /etc/samba/smb.conf
+function find_config_info () {
+ cmdout=`smbd -b | grep smb.conf`
+ if [ $? -ne 0 ];then
+ echo "Samba is not installed"
+ exit 1
+ fi
+ CONFIGFILE=`echo $cmdout | awk '{print $2}'`
+ PIDDIR=`smbd -b | grep PIDDIR | awk '{print $2}'`
}
-function umount_volume () {
+function deactivate_samba_share () {
volname=$1
- mnt_pre=$2
- umount -l $mnt_pre/$volname
-}
-
-function remove_fstab_entry () {
- volname=$1
- mntpt=$2
- mntent="`hostname`:/$volname $mntpt glusterfs defaults,transport=tcp 0 0"
- esc_mntent=$(echo -e "$mntent" | sed 's/\//\\\//g')
- exists=`grep "$mntent" /etc/fstab`
- if [ "$exists" != " " ]
- then
- sed -i /"$esc_mntent"/d /etc/fstab
- fi
+ sed -i -e '/^\[gluster-'"$volname"'\]/{ :a' -e 'n; /available = no/H; /^$/!{$!ba;}; x; /./!{ s/^/available = no/; $!{G;x}; $H; }; s/.*//; x; };' ${CONFIGFILE}
}
function sighup_samba () {
- pid=`cat /var/run/smbd.pid`
- if [ $pid != " " ]
+ pid=`cat ${PIDDIR}/smbd.pid`
+ if [ "x$pid" != "x" ]
then
kill -HUP $pid;
else
- /etc/init.d/smb condrestart
+ service smb condrestart
fi
}
-parse_args $@
-del_samba_export $VOL
-umount_volume $VOL $MNT_PRE
-remove_fstab_entry $VOL $MNT_PRE/$VOL
+parse_args "$@"
+find_config_info
+deactivate_samba_share $VOL
sighup_samba
diff --git a/extras/identify-hangs.sh b/extras/identify-hangs.sh
new file mode 100755
index 00000000000..ebc6bf144aa
--- /dev/null
+++ b/extras/identify-hangs.sh
@@ -0,0 +1,53 @@
+#!/bin/bash
+function get_statedump_fnames_without_timestamps
+{
+ ls | grep -E "[.]dump[.][0-9][0-9]*" | cut -f1-3 -d'.' | sort -u
+}
+
+function get_non_uniq_fields
+{
+ local statedump_fname_prefix=$1
+ print_stack_lkowner_unique_in_one_line "$statedump_fname_prefix" | sort | uniq -c | grep -vE "^\s*1 " | awk '{$1="repeats="$1; print $0}'
+}
+
+function print_stack_lkowner_unique_in_one_line
+{
+ local statedump_fname_prefix=$1
+ sed -e '/./{H;$!d;}' -e 'x;/unique=/!d;/stack=/!d;/lk-owner=/!d;/pid=/!d;' "${statedump_fname_prefix}"* | grep -E "(stack|lk-owner|unique|pid)=" | paste -d " " - - - -
+}
+
+function get_stacks_that_appear_in_multiple_statedumps
+{
+ #If a stack with same 'unique/lk-owner/stack' appears in multiple statedumps
+ #print the stack
+ local statedump_fname_prefix=$1
+ while read -r non_uniq_stack;
+ do
+ if [ -z "$printed" ];
+ then
+ printed="1"
+ fi
+ echo "$statedump_fname_prefix" "$non_uniq_stack"
+ done < <(get_non_uniq_fields "$statedump_fname_prefix")
+}
+
+statedumpdir=${1}
+if [ -z "$statedumpdir" ];
+then
+ echo "Usage: $0 <statedump-dir>"
+ exit 1
+fi
+
+if [ ! -d "$statedumpdir" ];
+then
+ echo "$statedumpdir: Is not a directory"
+ echo "Usage: $0 <statedump-dir>"
+ exit 1
+fi
+
+cd "$statedumpdir" || exit 1
+for statedump_fname_prefix in $(get_statedump_fnames_without_timestamps);
+do
+ get_stacks_that_appear_in_multiple_statedumps "$statedump_fname_prefix"
+done | column -t
+echo "NOTE: stacks with lk-owner=\"\"/lk-owner=0000000000000000/unique=0 may not be hung frames and need further inspection" >&2
diff --git a/extras/init.d/Makefile.am b/extras/init.d/Makefile.am
index 66715f4314a..8d8cc69571a 100644
--- a/extras/init.d/Makefile.am
+++ b/extras/init.d/Makefile.am
@@ -1,19 +1,32 @@
-EXTRA_DIST = glusterd-Debian glusterd-Redhat glusterd-SuSE glusterd.plist
+EXTRA_DIST = glusterd-Debian glusterd-FreeBSD glusterd-Redhat \
+ glusterd-SuSE glusterd.plist glustereventsd-FreeBSD \
+ glustereventsd-Redhat glustereventsd-Debian
-CLEANFILES =
+CLEANFILES =
-initdir = @initdir@
-launchddir = @launchddir@
+INIT_DIR = @initdir@
+SYSTEMD_DIR = @systemddir@
+LAUNCHD_DIR = @launchddir@
-$(GF_DISTRIBUTION):
- $(mkdir_p) $(DESTDIR)$(initdir)
- $(INSTALL_PROGRAM) glusterd-$(GF_DISTRIBUTION) $(DESTDIR)$(initdir)/glusterd
+$(GF_DISTRIBUTION):
+if WITH_SERVER
+ @if [ ! -d $(SYSTEMD_DIR) ]; then \
+ $(mkdir_p) $(DESTDIR)$(INIT_DIR); \
+ $(INSTALL_PROGRAM) glusterd-$(GF_DISTRIBUTION) $(DESTDIR)$(INIT_DIR)/glusterd; \
+ fi
+endif
+if BUILD_EVENTS
+ @if [ ! -d $(SYSTEMD_DIR) ]; then \
+ $(mkdir_p) $(DESTDIR)$(INIT_DIR); \
+ $(INSTALL_PROGRAM) glustereventsd-$(GF_DISTRIBUTION) $(DESTDIR)$(INIT_DIR)/glustereventsd; \
+ fi
+endif
install-exec-local: $(GF_DISTRIBUTION)
-install-data-local:
+install-data-local:
if GF_DARWIN_HOST_OS
- $(mkdir_p) $(DESTDIR)$(launchddir)
- $(INSTALL_PROGRAM) glusterd.plist $(DESTDIR)$(launchddir)/com.gluster.glusterd.plist
+ $(mkdir_p) $(DESTDIR)$(LAUNCHD_DIR)
+ $(INSTALL_PROGRAM) glusterd.plist $(DESTDIR)$(LAUNCHD_DIR)/org.gluster.glusterd.plist
endif
diff --git a/extras/init.d/glusterd-FreeBSD.in b/extras/init.d/glusterd-FreeBSD.in
new file mode 100644
index 00000000000..21c3da72624
--- /dev/null
+++ b/extras/init.d/glusterd-FreeBSD.in
@@ -0,0 +1,24 @@
+#!/bin/sh
+#
+# $FreeBSD$
+#
+
+# PROVIDE: glusterd
+
+. /etc/rc.subr
+
+name="glusterd"
+rcvar=`set_rcvar`
+command=@prefix@/sbin/${name}
+pidfile="/var/run/${name}.pid"
+glusterd_flags="-p /var/run/${name}.pid"
+start_precmd="glusterd_prestart"
+
+glusterd_prestart()
+{
+ mkdir -p @GLUSTERD_WORKDIR@ /var/log/glusterfs
+ return 0
+}
+
+load_rc_config $name
+run_rc_command "$1"
diff --git a/extras/init.d/glusterd-Redhat.in b/extras/init.d/glusterd-Redhat.in
index 18f3debc4ca..94801fe31a5 100755
--- a/extras/init.d/glusterd-Redhat.in
+++ b/extras/init.d/glusterd-Redhat.in
@@ -1,76 +1,143 @@
#!/bin/bash
#
-# chkconfig: 35 20 80
-# description: Gluster File System service for volume management
+# glusterd Startup script for the glusterfs server
#
+# chkconfig: - 20 80
+# description: Clustered file-system server
-# Get function from functions library
+### BEGIN INIT INFO
+# Provides: glusterd
+# Required-Start: $local_fs $network
+# Required-Stop: $local_fs $network
+# Should-Start:
+# Should-Stop:
+# Default-Start: 2 3 4 5
+# Default-Stop: 0 1 6
+# Short-Description: glusterfs server
+# Description: Clustered file-system server
+### END INIT INFO
+#
+
+# Source function library.
. /etc/rc.d/init.d/functions
BASE=glusterd
-PIDFILE=/var/run/$BASE.pid
+
+# Fedora File System Layout dictates /run
+[ -e /run ] && RUNDIR="/run"
+PIDFILE="${RUNDIR:-/var/run}/${BASE}.pid"
+
PID=`test -f $PIDFILE && cat $PIDFILE`
+
+# Overwriteable from sysconfig
+LOG_LEVEL=''
+LOG_FILE=''
+GLUSTERD_OPTIONS=''
+GLUSTERD_NOFILE='65536'
+
+[ -f /etc/sysconfig/${BASE} ] && . /etc/sysconfig/${BASE}
+
+[ ! -z $LOG_LEVEL ] && GLUSTERD_OPTIONS="${GLUSTERD_OPTIONS} --log-level ${LOG_LEVEL}"
+[ ! -z $LOG_FILE ] && GLUSTERD_OPTIONS="${GLUSTERD_OPTIONS} --log-file ${LOG_FILE}"
+
GLUSTERFSD=glusterfsd
GLUSTERFS=glusterfs
GLUSTERD_BIN=@prefix@/sbin/$BASE
-GLUSTERD_OPTS="--pid-file=$PIDFILE"
+GLUSTERD_OPTS="--pid-file=$PIDFILE ${GLUSTERD_OPTIONS}"
GLUSTERD="$GLUSTERD_BIN $GLUSTERD_OPTS"
RETVAL=0
+LOCKFILE=/var/lock/subsys/${BASE}
+
# Start the service $BASE
start()
{
- pidofproc -p $PIDFILE $GLUSTERD_BIN &> /dev/null
- status=$?
- if [ $status -eq 0 ]; then
+ if pidofproc -p $PIDFILE $GLUSTERD_BIN &> /dev/null; then
echo "glusterd service is already running with pid $PID"
- exit 1
+ return 0
else
+ ulimit -n $GLUSTERD_NOFILE
echo -n $"Starting $BASE:"
daemon $GLUSTERD
RETVAL=$?
echo
- [ $RETVAL -ne 0 ] && exit $RETVAL
+ [ $RETVAL -eq 0 ] && touch $LOCKFILE
+ return $RETVAL
fi
-
}
# Stop the service $BASE
stop()
{
echo -n $"Stopping $BASE:"
- pidofproc -p $PIDFILE $GLUSTERD_BIN &> /dev/null
- status=$?
- if [ $status -eq 0 ]; then
+ if pidofproc -p $PIDFILE $GLUSTERD_BIN &> /dev/null; then
killproc -p $PIDFILE $BASE
- [ -w $PIDFILE ] && rm -f $PIDFILE
else
killproc $BASE
fi
-
+ RETVAL=$?
echo
+ [ $RETVAL -eq 0 ] && rm -f $LOCKFILE
+ return $RETVAL
+}
+
+restart()
+{
+ stop
+ start
+}
+reload()
+{
+ restart
+}
+
+force_reload()
+{
+ restart
+}
+
+rh_status()
+{
+ status $BASE
+}
+
+rh_status_q()
+{
+ rh_status &>/dev/null
}
### service arguments ###
case $1 in
start)
- start
+ rh_status_q && exit 0
+ $1
;;
stop)
- stop
+ rh_status_q || exit 0
+ $1
+ ;;
+ restart)
+ $1
+ ;;
+ reload)
+ rh_status_q || exit 7
+ $1
+ ;;
+ force-reload)
+ force_reload
;;
status)
- status $BASE
+ rh_status
;;
- restart)
- $0 stop
- $0 start
+ condrestart|try-restart)
+ rh_status_q || exit 0
+ restart
;;
*)
- echo $"Usage: $0 {start|stop|status|restart}."
+ echo $"Usage: $0 {start|stop|status|restart|condrestart|try-restart|reload|force-reload}"
exit 1
esac
-exit 0
+exit $?
diff --git a/extras/init.d/glusterd-SuSE.in b/extras/init.d/glusterd-SuSE.in
index 16cf8de6a13..6259bab00b6 100755
--- a/extras/init.d/glusterd-SuSE.in
+++ b/extras/init.d/glusterd-SuSE.in
@@ -2,8 +2,8 @@
#
### BEGIN INIT INFO
# Provides: glusterd
-# Required-Start: $local_fs $network
-# Required-Stop:
+# Required-Start: $remote_fs $network
+# Required-Stop: $remote_fs $network
# Default-Start: 3 5
# Default-Stop:
# Short-Description: Gluster File System service for volume management
@@ -61,13 +61,17 @@ case $1 in
fi
rc_status -v
;;
+ reload)
+ rc_failed 3
+ rc_status -v
+ ;;
restart)
$0 stop
$0 start
rc_status
;;
*)
- echo $"Usage: $0 {start|stop|status|restart}."
+ echo $"Usage: $0 {start|stop|status|reload|restart}."
exit 1
esac
diff --git a/extras/init.d/glusterd.plist.in b/extras/init.d/glusterd.plist.in
index 7385fa486df..3ee9f60c5c5 100644
--- a/extras/init.d/glusterd.plist.in
+++ b/extras/init.d/glusterd.plist.in
@@ -3,7 +3,7 @@
<plist version="1.0">
<dict>
<key>Label</key>
- <string>com.gluster.glusterd</string>
+ <string>org.gluster.glusterd</string>
<key>ProgramArguments</key>
<array>
<string>@prefix@/sbin/glusterd</string>
diff --git a/extras/init.d/glustereventsd-Debian.in b/extras/init.d/glustereventsd-Debian.in
new file mode 100644
index 00000000000..6eebdb2b8d8
--- /dev/null
+++ b/extras/init.d/glustereventsd-Debian.in
@@ -0,0 +1,91 @@
+#!/bin/sh
+### BEGIN INIT INFO
+# Provides: glustereventsd
+# Required-Start: $local_fs $network
+# Required-Stop: $local_fs $network
+# Default-Start: 2 3 4 5
+# Default-Stop: 0 1 6
+# Short-Description: Gluster Events Server
+# Description: Gluster Events Server
+### END INIT INFO
+
+# Author: Chris AtLee <chris@atlee.ca>
+# Patched by: Matthias Albert < matthias@linux4experts.de>
+
+PATH=/sbin:/usr/sbin:/bin:/usr/bin
+NAME=glustereventsd
+SCRIPTNAME=/etc/init.d/$NAME
+DAEMON=@prefix@/sbin/$NAME
+PIDFILE=/var/run/$NAME.pid
+GLUSTEREVENTSD_OPTS=""
+PID=`test -f $PIDFILE && cat $PIDFILE`
+
+
+# Gracefully exit if the package has been removed.
+test -x $DAEMON || exit 0
+
+# Load the VERBOSE setting and other rcS variables
+. /lib/init/vars.sh
+
+# Define LSB log_* functions.
+. /lib/lsb/init-functions
+
+
+do_start()
+{
+ pidofproc -p $PIDFILE $DAEMON >/dev/null
+ status=$?
+ if [ $status -eq 0 ]; then
+ log_success_msg "glustereventsd service is already running with pid $PID"
+ else
+ log_daemon_msg "Starting glustereventsd service" "glustereventsd"
+ start-stop-daemon --start --quiet --oknodo --pidfile $PIDFILE --startas $DAEMON -- -p $PIDFILE $GLUSTEREVENTSD_OPTS
+ log_end_msg $?
+ start_daemon -p $PIDFILE $DAEMON -f $CONFIGFILE
+ return $?
+ fi
+}
+
+do_stop()
+{
+ log_daemon_msg "Stopping glustereventsd service" "glustereventsd"
+ start-stop-daemon --stop --quiet --oknodo --pidfile $PIDFILE
+ log_end_msg $?
+ rm -f $PIDFILE
+ killproc -p $PIDFILE $DAEMON
+ return $?
+}
+
+do_status()
+{
+ pidofproc -p $PIDFILE $DAEMON >/dev/null
+ status=$?
+ if [ $status -eq 0 ]; then
+ log_success_msg "glustereventsd service is running with pid $PID"
+ else
+ log_failure_msg "glustereventsd service is not running."
+ fi
+ exit $status
+}
+
+case "$1" in
+ start)
+ do_start
+ ;;
+ stop)
+ do_stop
+ ;;
+ status)
+ do_status;
+ ;;
+ restart|force-reload)
+ do_stop
+ sleep 2
+ do_start
+ ;;
+ *)
+ echo "Usage: $SCRIPTNAME {start|stop|status|restart|force-reload}" >&2
+ exit 3
+ ;;
+esac
+
diff --git a/extras/init.d/glustereventsd-FreeBSD.in b/extras/init.d/glustereventsd-FreeBSD.in
new file mode 100644
index 00000000000..2e8303ec6c6
--- /dev/null
+++ b/extras/init.d/glustereventsd-FreeBSD.in
@@ -0,0 +1,19 @@
+#!/bin/sh
+#
+# $FreeBSD$
+#
+
+# PROVIDE: glustereventsd
+
+. /etc/rc.subr
+
+name="glustereventsd"
+rcvar=`set_rcvar`
+command=@prefix@/sbin/${name}
+command_interpreter=/usr/local/bin/python
+pidfile="/var/run/${name}.pid"
+glustereventsd_flags="-p /var/run/${name}.pid"
+start_cmd="/usr/sbin/daemon $command ${glustereventsd_flags}"
+
+load_rc_config $name
+run_rc_command "$1"
diff --git a/extras/init.d/glustereventsd-Redhat.in b/extras/init.d/glustereventsd-Redhat.in
new file mode 100644
index 00000000000..d23ce4c244f
--- /dev/null
+++ b/extras/init.d/glustereventsd-Redhat.in
@@ -0,0 +1,129 @@
+#!/bin/bash
+#
+# glustereventsd Startup script for the glusterfs Events server
+#
+# chkconfig: - 20 80
+# description: Gluster Events Server
+
+### BEGIN INIT INFO
+# Provides: glustereventsd
+# Required-Start: $local_fs $network
+# Required-Stop: $local_fs $network
+# Should-Start:
+# Should-Stop:
+# Default-Start: 2 3 4 5
+# Default-Stop: 0 1 6
+# Short-Description: glusterfs Events server
+# Description: GlusterFS Events Server
+### END INIT INFO
+#
+
+# Source function library.
+. /etc/rc.d/init.d/functions
+
+BASE=glustereventsd
+
+# Fedora File System Layout dictates /run
+[ -e /run ] && RUNDIR="/run"
+PIDFILE="${RUNDIR:-/var/run}/${BASE}.pid"
+
+PID=`test -f $PIDFILE && cat $PIDFILE`
+
+GLUSTEREVENTSD_BIN=@prefix@/sbin/$BASE
+GLUSTEREVENTSD_OPTS="--pid-file=$PIDFILE"
+GLUSTEREVENTSD="$GLUSTEREVENTSD_BIN $GLUSTEREVENTSD_OPTS"
+RETVAL=0
+
+LOCKFILE=/var/lock/subsys/${BASE}
+
+# Start the service $BASE
+start()
+{
+ if pidofproc -p $PIDFILE $GLUSTEREVENTSD_BIN &> /dev/null; then
+ echo "glustereventsd service is already running with pid $PID"
+ return 0
+ else
+ echo -n $"Starting $BASE:"
+ daemon $GLUSTEREVENTSD &
+ RETVAL=$?
+ echo
+ [ $RETVAL -eq 0 ] && touch $LOCKFILE
+ return $RETVAL
+ fi
+}
+
+# Stop the service $BASE
+stop()
+{
+ echo -n $"Stopping $BASE:"
+ if pidofproc -p $PIDFILE $GLUSTEREVENTSD_BIN &> /dev/null; then
+ killproc -p $PIDFILE $BASE
+ else
+ killproc $BASE
+ fi
+ RETVAL=$?
+ echo
+ [ $RETVAL -eq 0 ] && rm -f $LOCKFILE
+ return $RETVAL
+}
+
+restart()
+{
+ stop
+ start
+}
+
+reload()
+{
+ restart
+}
+
+force_reload()
+{
+ restart
+}
+
+rh_status()
+{
+ status $BASE
+}
+
+rh_status_q()
+{
+ rh_status &>/dev/null
+}
+
+
+### service arguments ###
+case $1 in
+ start)
+ rh_status_q && exit 0
+ $1
+ ;;
+ stop)
+ rh_status_q || exit 0
+ $1
+ ;;
+ restart)
+ $1
+ ;;
+ reload)
+ rh_status_q || exit 7
+ $1
+ ;;
+ force-reload)
+ force_reload
+ ;;
+ status)
+ rh_status
+ ;;
+ condrestart|try-restart)
+ rh_status_q || exit 0
+ restart
+ ;;
+ *)
+ echo $"Usage: $0 {start|stop|status|restart|condrestart|try-restart|reload|force-reload}"
+ exit 1
+esac
+
+exit $?
diff --git a/extras/logger.conf.example b/extras/logger.conf.example
new file mode 100644
index 00000000000..248be5bda69
--- /dev/null
+++ b/extras/logger.conf.example
@@ -0,0 +1,13 @@
+#
+# Sample logger.conf file to configure enhanced Logging in GlusterFS
+#
+# To enable enhanced logging capabilities,
+#
+# 1. rename this file to /etc/glusterfs/logger.conf
+#
+# 2. rename /etc/rsyslog.d/gluster.conf.example to
+# /etc/rsyslog.d/gluster.conf
+#
+# This change requires restart of all gluster services/volumes and
+# rsyslog.
+#
diff --git a/extras/mount-shared-storage.sh b/extras/mount-shared-storage.sh
new file mode 100755
index 00000000000..cc40e13c3e3
--- /dev/null
+++ b/extras/mount-shared-storage.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+#Post reboot there is a chance in which mounting of shared storage will fail
+#This will impact starting of features like NFS-Ganesha. So this script will
+#try to mount the shared storage if it fails
+
+exitStatus=0
+
+while IFS= read -r glm
+do
+ IFS=$' \t' read -r -a arr <<< "$glm"
+
+ #Validate storage type is glusterfs
+ if [ "${arr[2]}" == "glusterfs" ]
+ then
+
+ #check whether shared storage is mounted
+ #if it is mounted then mountpoint -q will return a 0 success code
+ if mountpoint -q "${arr[1]}"
+ then
+ echo "${arr[1]} is already mounted"
+ continue
+ fi
+
+ mount -t glusterfs -o "${arr[3]}" "${arr[0]}" "${arr[1]}"
+ #wait for few seconds
+ sleep 10
+
+ #recheck mount got succeed
+ if mountpoint -q "${arr[1]}"
+ then
+ echo "${arr[1]} has been mounted"
+ continue
+ else
+ echo "${arr[1]} failed to mount"
+ exitStatus=1
+ fi
+ fi
+done <<< "$(sed '/^#/ d' </etc/fstab | grep 'glusterfs')"
+exit $exitStatus
diff --git a/extras/ocf/Makefile.am b/extras/ocf/Makefile.am
new file mode 100644
index 00000000000..c49a835fbca
--- /dev/null
+++ b/extras/ocf/Makefile.am
@@ -0,0 +1,11 @@
+EXTRA_DIST = glusterd.in volume.in
+
+# The root of the OCF resource agent hierarchy
+# Per the OCF standard, it's always "lib",
+# not "lib64" (even on 64-bit platforms).
+ocfdir = $(prefix)/lib/ocf
+
+# The ceph provider directory
+radir = $(ocfdir)/resource.d/$(PACKAGE_NAME)
+
+ra_SCRIPTS = glusterd volume
diff --git a/extras/ocf/glusterd.in b/extras/ocf/glusterd.in
new file mode 100755
index 00000000000..c119a285d32
--- /dev/null
+++ b/extras/ocf/glusterd.in
@@ -0,0 +1,212 @@
+#!/bin/sh
+#
+# glusterd
+#
+# Description: Manages a glusterd server as a (typically cloned)
+# HA resource
+#
+# Authors: Florian Haas (hastexo Professional Services GmbH)
+#
+# License: GNU General Public License (GPL)
+
+#######################################################################
+# Initialization:
+
+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
+. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+
+# Convenience variables
+# When sysconfdir and localstatedir aren't passed in as
+# configure flags, they're defined in terms of prefix
+prefix=@prefix@
+#######################################################################
+
+
+OCF_RESKEY_binary_default="glusterd"
+OCF_RESKEY_pid_default="@localstatedir@/run/glusterd.pid"
+OCF_RESKEY_socket_default=""
+OCF_RESKEY_additional_parameters_default=""
+
+: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
+: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}}
+
+glusterd_meta_data() {
+ cat <<EOF
+<?xml version="1.0"?>
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
+<resource-agent name="glusterd" version="0.1">
+ <version>0.1</version>
+ <longdesc lang="en">
+ </longdesc>
+ <shortdesc lang="en">Manages a Gluster server</shortdesc>
+ <parameters>
+ <parameter name="binary">
+ <longdesc lang="en">
+ Name of the glusterd executable. Specify a full absolute
+ path if the binary is not in your \$PATH.
+ </longdesc>
+ <shortdesc lang="en">glusterd executable</shortdesc>
+ <content type="string" default="$OCF_RESKEY_binary_default"/>
+ </parameter>
+ <parameter name="pid">
+ <longdesc lang="en">
+ Path to the glusterd PID file.
+ </longdesc>
+ <shortdesc lang="en">PID file</shortdesc>
+ <content type="string" default="$OCF_RESKEY_pid_default"/>
+ </parameter>
+ <parameter name="socket">
+ <longdesc lang="en">
+ Path to the glusterd UNIX socket file. If unspecified,
+ glusterd will not listen on any socket.
+ </longdesc>
+ <shortdesc lang="en">Socket file</shortdesc>
+ <content type="string"/>
+ </parameter>
+ </parameters>
+ <actions>
+ <action name="start" timeout="20" />
+ <action name="stop" timeout="20" />
+ <action name="monitor" timeout="20" interval="10" />
+ <action name="reload" timeout="20" />
+ <action name="meta-data" timeout="5" />
+ <action name="validate-all" timeout="20" />
+ </actions>
+</resource-agent>
+EOF
+
+}
+
+glusterd_start() {
+ local glusterd_options
+ # exit immediately if configuration is not valid
+ glusterd_validate_all || exit $?
+
+ # if resource is already running, bail out early
+ if glusterd_monitor; then
+ ocf_log info "Resource is already running"
+ return $OCF_SUCCESS
+ fi
+
+ # actually start up the resource here (make sure to immediately
+ # exit with an $OCF_ERR_ error code if anything goes seriously
+ # wrong)
+ glusterd_options="-p $OCF_RESKEY_pid"
+ if [ -n "$OCF_RESKEY_socket" ]; then
+ glusterd_options="$glusterd_options -S $OCF_RESKEY_socket"
+ fi
+ if [ -n "$OCF_RESKEY_additional_parameters" ]; then
+ glusterd_options="$glusterd_options $OCF_RESKEY_additional_parameters"
+ fi
+
+ ocf_run $OCF_RESKEY_binary $glusterd_options || exit $OCF_ERR_GENERIC
+
+ # After the resource has been started, check whether it started up
+ # correctly. If the resource starts asynchronously, the agent may
+ # spin on the monitor function here -- if the resource does not
+ # start up within the defined timeout, the cluster manager will
+ # consider the start action failed
+ while ! glusterd_monitor; do
+ ocf_log debug "Resource has not started yet, waiting"
+ sleep 1
+ done
+
+ # only return $OCF_SUCCESS if _everything_ succeeded as expected
+ return $OCF_SUCCESS
+}
+
+glusterd_stop() {
+ local rc
+ local pid
+
+ # exit immediately if configuration is not valid
+ glusterd_validate_all || exit $?
+
+ glusterd_monitor
+ rc=$?
+ case "$rc" in
+ "$OCF_SUCCESS")
+ # Currently running. Normal, expected behavior.
+ ocf_log debug "Resource is currently running"
+ ;;
+ "$OCF_NOT_RUNNING")
+ # Currently not running. Nothing to do.
+ ocf_log info "Resource is already stopped"
+ return $OCF_SUCCESS
+ ;;
+ esac
+
+ # actually shut down the resource here (make sure to immediately
+ # exit with an $OCF_ERR_ error code if anything goes seriously
+ # wrong)
+ pid=`cat $OCF_RESKEY_pid`
+ ocf_run kill -s TERM $pid || exit OCF_ERR_GENERIC
+
+ # After the resource has been stopped, check whether it shut down
+ # correctly. If the resource stops asynchronously, the agent may
+ # spin on the monitor function here -- if the resource does not
+ # shut down within the defined timeout, the cluster manager will
+ # consider the stop action failed
+ while glusterd_monitor; do
+ ocf_log debug "Resource has not stopped yet, waiting"
+ sleep 1
+ done
+
+ # only return $OCF_SUCCESS if _everything_ succeeded as expected
+ return $OCF_SUCCESS
+
+}
+
+glusterd_monitor() {
+ local pid
+
+ [ -e $OCF_RESKEY_pid ] || return $OCF_NOT_RUNNING
+
+ pid=`cat $OCF_RESKEY_pid`
+ ocf_run kill -s 0 $pid || return $OCF_NOT_RUNNING
+
+ ocf_log debug "$OCF_RESKEY_binary running with PID $pid"
+ return $OCF_SUCCESS
+}
+
+glusterd_validate_all() {
+ # Test for required binaries
+ check_binary $OCF_RESKEY_binary
+
+ return $OCF_SUCCESS
+}
+
+
+
+# Make sure meta-data and usage always succeed
+case $__OCF_ACTION in
+meta-data) glusterd_meta_data
+ exit $OCF_SUCCESS
+ ;;
+usage|help) glusterd_usage
+ exit $OCF_SUCCESS
+ ;;
+esac
+
+# Anything other than meta-data and usage must pass validation
+glusterd_validate_all || exit $?
+
+# Translate each action into the appropriate function call
+case $__OCF_ACTION in
+start) glusterd_start;;
+stop) glusterd_stop;;
+status|monitor) glusterd_monitor;;
+reload) ocf_log info "Reloading..."
+ glusterd_start
+ ;;
+validate-all) ;;
+notify) exit $OCF_SUCCESS;;
+*) glusterd_usage
+ exit $OCF_ERR_UNIMPLEMENTED
+ ;;
+esac
+rc=$?
+
+# The resource agent may optionally log a debug message
+ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION returned $rc"
+exit $rc
diff --git a/extras/ocf/volume.in b/extras/ocf/volume.in
new file mode 100755
index 00000000000..76cc649e55f
--- /dev/null
+++ b/extras/ocf/volume.in
@@ -0,0 +1,276 @@
+#!/bin/sh
+#
+# glusterd
+#
+# Description: Manages a glusterd server as a (typically cloned)
+# HA resource
+#
+# Authors: Florian Haas (hastexo Professional Services GmbH)
+# Jiri Lunacek (Hosting90 Systems s.r.o.)
+#
+# License: GNU General Public License (GPL)
+
+#######################################################################
+# Initialization:
+
+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
+. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+
+# Convenience variables
+# When sysconfdir and localstatedir aren't passed in as
+# configure flags, they're defined in terms of prefix
+prefix=@prefix@
+SHORTHOSTNAME=`hostname -s`
+#######################################################################
+
+OCF_RESKEY_binary_default="gluster"
+
+: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
+
+volume_meta_data() {
+ cat <<EOF
+<?xml version="1.0"?>
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
+<resource-agent name="volume" version="0.1">
+ <version>0.1</version>
+ <longdesc lang="en">
+Manages a GlusterFS volume and monitors its bricks. When a resource of
+this type is configured as a clone (as is commonly the case), then it
+must have clone ordering enabled.
+ </longdesc>
+ <shortdesc lang="en">Manages a GlusterFS volume</shortdesc>
+ <parameters>
+ <parameter name="volname" required="1">
+ <longdesc lang="en">
+ The name of the volume to manage.
+ </longdesc>
+ <shortdesc lang="en">volume name</shortdesc>
+ <content type="string"/>
+ </parameter>
+ <parameter name="binary">
+ <longdesc lang="en">
+ Name of the gluster executable. Specify a full absolute
+ path if the binary is not in your \$PATH.
+ </longdesc>
+ <shortdesc lang="en">gluster executable</shortdesc>
+ <content type="string" default="$OCF_RESKEY_binary_default"/>
+ </parameter>
+ <parameter name="peer_map">
+ <longdesc lang="en">
+ Mapping of hostname - peer name in the gluster cluster
+ in format hostname1:peername1,hostname2:peername2,...
+ </longdesc>
+ <shortdesc lang="en">gluster peer map</shortdesc>
+ <content type="string" default=""/>
+ </parameter>
+ </parameters>
+ <actions>
+ <action name="start" timeout="20" />
+ <action name="stop" timeout="20" />
+ <action name="monitor" timeout="20" interval="10" />
+ <action name="reload" timeout="20" />
+ <action name="meta-data" timeout="5" />
+ <action name="validate-all" timeout="20" />
+ </actions>
+</resource-agent>
+EOF
+
+}
+
+if [ -n "${OCF_RESKEY_peer_map}" ]; then
+ SHORTHOSTNAME=`echo "${OCF_RESKEY_peer_map}" | egrep -o "$SHORTHOSTNAME\:[^,]+" | awk -F: '{print $2}'`
+fi
+
+volume_getdir() {
+ local voldir
+ voldir="@GLUSTERD_WORKDIR@/vols/${OCF_RESKEY_volname}"
+
+ [ -d ${voldir} ] || return 1
+
+ echo "${voldir}"
+ return 0
+}
+
+volume_getpid_dir() {
+ local volpid_dir
+ volpid_dir="/var/run/gluster/vols/${OCF_RESKEY_volname}"
+
+ [ -d ${volpid_dir} ] || return 1
+
+ echo "${volpid_dir}"
+ return 0
+}
+
+volume_getbricks() {
+ local infofile
+ local voldir
+ voldir=`volume_getdir`
+ infofile="${voldir}/info"
+
+ [ -e ${infofile} ] || return 1
+
+ echo "`sed -n -e "s/^brick-.\+=${SHORTHOSTNAME}://p" < ${infofile}`"
+ return 0
+}
+
+volume_getpids() {
+ local bricks
+ local pidfile
+ local infofile
+ local volpid_dir
+
+ volpid_dir=`volume_getpid_dir`
+ bricks=`volume_getbricks`
+
+ if [ -z "$bricks" ]; then
+ return 1
+ fi
+
+ for brick in ${bricks}; do
+ pidfile="${volpid_dir}/${SHORTHOSTNAME}${brick}.pid"
+ [ -e $pidfile ] || return 1
+ cat $pidfile
+ done
+
+ return 0
+}
+
+volume_start() {
+ local volume_options
+
+ # exit immediately if configuration is not valid
+ volume_validate_all || exit $?
+
+ # if resource is already running, bail out early
+ if volume_monitor; then
+ ocf_log info "Resource is already running"
+ return $OCF_SUCCESS
+ fi
+
+ # actually start up the resource here
+ ocf_run "$OCF_RESKEY_binary" \
+ volume start "$OCF_RESKEY_volname" force || exit $OCF_ERR_GENERIC
+
+ # After the resource has been started, check whether it started up
+ # correctly. If the resource starts asynchronously, the agent may
+ # spin on the monitor function here -- if the resource does not
+ # start up within the defined timeout, the cluster manager will
+ # consider the start action failed
+ while ! volume_monitor; do
+ ocf_log debug "Resource has not started yet, waiting"
+ sleep 1
+ done
+
+ # only return $OCF_SUCCESS if _everything_ succeeded as expected
+ return $OCF_SUCCESS
+}
+
+volume_stop() {
+ local rc
+ local pid
+
+ # exit immediately if configuration is not valid
+ volume_validate_all || exit $?
+
+ volume_monitor
+ rc=$?
+ case "$rc" in
+ "$OCF_SUCCESS")
+ # Currently running. Normal, expected behavior.
+ ocf_log debug "Resource is currently running"
+ ;;
+ "$OCF_NOT_RUNNING")
+ # Currently not running. Nothing to do.
+ ocf_log info "Resource is already stopped"
+ return $OCF_SUCCESS
+ ;;
+ esac
+
+ # actually shut down the resource here (make sure to immediately
+ # exit with an $OCF_ERR_ error code if anything goes seriously
+ # wrong)
+ pids=`volume_getpids`
+ for pid in $pids; do
+ ocf_run kill -s TERM $pid
+ done
+
+ # After the resource has been stopped, check whether it shut down
+ # correctly. If the resource stops asynchronously, the agent may
+ # spin on the monitor function here -- if the resource does not
+ # shut down within the defined timeout, the cluster manager will
+ # consider the stop action failed
+ while volume_monitor; do
+ ocf_log debug "Resource has not stopped yet, waiting"
+ sleep 1
+ done
+
+ # only return $OCF_SUCCESS if _everything_ succeeded as expected
+ return $OCF_SUCCESS
+
+}
+
+volume_monitor() {
+ local pid
+
+ pids=`volume_getpids` || return $OCF_NOT_RUNNING
+
+ for pid in $pids; do
+ ocf_run kill -s 0 $pid || return $OCF_NOT_RUNNING
+ done
+
+ ocf_log debug "Local bricks for volume ${OCF_RESKEY_volname} running with PIDs $pids"
+ return $OCF_SUCCESS
+}
+
+volume_validate_all() {
+ # Test for configuration errors first
+ if [ -z "${OCF_RESKEY_volname}" ]; then
+ ocf_log err 'Missing required parameter "volname"'
+ return $OCF_ERR_CONFIGURED
+ fi
+
+ # Test for required binaries
+ check_binary $OCF_RESKEY_binary
+
+ if [ -z "$SHORTHOSTNAME" ]; then
+ ocf_log err 'Unable to get host in node map'
+ return $OCF_ERR_CONFIGURED
+ fi
+
+ return $OCF_SUCCESS
+}
+
+
+
+# Make sure meta-data and usage always succeed
+case $__OCF_ACTION in
+meta-data) volume_meta_data
+ exit $OCF_SUCCESS
+ ;;
+usage|help) volume_usage
+ exit $OCF_SUCCESS
+ ;;
+esac
+
+# Anything other than meta-data and usage must pass validation
+volume_validate_all || exit $?
+
+# Translate each action into the appropriate function call
+case $__OCF_ACTION in
+start) volume_start;;
+stop) volume_stop;;
+status|monitor) volume_monitor;;
+reload) ocf_log info "Reloading..."
+ volume_start
+ ;;
+validate-all) ;;
+notify) exit $OCF_SUCCESS;;
+*) volume_usage
+ exit $OCF_ERR_UNIMPLEMENTED
+ ;;
+esac
+rc=$?
+
+# The resource agent may optionally log a debug message
+ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION returned $rc"
+exit $rc
diff --git a/extras/peer_add_secret_pub.in b/extras/peer_add_secret_pub.in
new file mode 100644
index 00000000000..c9674af353d
--- /dev/null
+++ b/extras/peer_add_secret_pub.in
@@ -0,0 +1,70 @@
+#!/bin/bash
+
+user=$1
+pub_file=$2
+
+if [ "$user" == "" ]; then
+ echo "Invalid User";
+ exit 1;
+fi
+
+if [ "$pub_file" == "" ]; then
+ echo "Invalid pub file";
+ exit 1;
+fi
+
+home_dir=`getent passwd $user | cut -d ':' -f 6`;
+
+if [ "$home_dir" == "" ]; then
+ echo "Invalid home dir";
+ exit 1;
+fi
+
+authorized_keys_file=$(cat /etc/ssh/sshd_config | \
+ grep -e "^AuthorizedKeysFile" | \
+ awk '{print $2}' | tail -1);
+
+# If not set, use default location
+if [ "x$authorized_keys_file" == "x" ]; then
+ authorized_keys_file="%h/.ssh/authorized_keys"
+fi
+
+# If default location
+if [ "$authorized_keys_file" == ".ssh/authorized_keys" ]; then
+ authorized_keys_file="%h/$authorized_keys_file"
+fi
+
+# Replace %u with user name (ex: /etc/ssh/keys/%u/authorized_keys)
+authorized_keys_file="${authorized_keys_file//%u/$user}";
+
+# Replace %h with home dir (ex: %h/.ssh/authorized_keys)
+authorized_keys_file="${authorized_keys_file//%h/$home_dir}";
+ssh_dir=$(dirname $authorized_keys_file);
+
+if [ ! -d $ssh_dir ]; then
+ mkdir $ssh_dir;
+ chmod 700 $ssh_dir;
+ chown $user: $ssh_dir;
+fi
+
+if [ ! -d $authorized_keys_file ]; then
+ touch $authorized_keys_file;
+ chmod 600 $authorized_keys_file;
+ chown $user: $authorized_keys_file;
+fi
+
+# Restore SELinux security contexts. This is required
+# for passwdless SSH to work.
+
+if type restorecon >/dev/null 2>&1; then
+ restorecon -F $ssh_dir $authorized_keys_file;
+fi
+
+# Add to authorized_keys file only if not exists already
+while read line
+do
+ grep -Fxq "$line" $authorized_keys_file;
+ [ $? -ne 0 ] && echo "$line" >> $authorized_keys_file;
+done < "$GLUSTERD_WORKDIR"/$pub_file;
+
+exit 0;
diff --git a/extras/post-upgrade-script-for-quota.sh b/extras/post-upgrade-script-for-quota.sh
new file mode 100755
index 00000000000..15652429056
--- /dev/null
+++ b/extras/post-upgrade-script-for-quota.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+
+#The post-upgrade script must be executed after all the nodes in the cluster
+#have upgraded.
+#Also, all the clients accessing the given volume must also be upgraded
+#before the script is run.
+#Make sure glusterd and the brick processes are running on all nodes in the
+#cluster post upgrade.
+#Execute this script on the node where the pre-upgrade script for quota was run.
+
+VOL=$1;
+
+BACKUP_DIR=/var/tmp/glusterfs/quota-config-backup
+
+function set_limits {
+ local var=$(gluster volume info $1 | grep 'features.quota'| cut -d" " -f2);
+
+ if [ -z "$var" ] || [ "$var" = "off" ]; then
+ if [ $2 -eq '0' ]; then
+ echo "Volume $1 does not have quota enabled. " \
+ "Exiting ..."
+ exit 1
+ fi
+ else
+ gluster volume set $1 default-soft-limit 80%
+ if [ $? -ne '0' ]; then
+ echo "Post-upgrade process failed." \
+ "Please address the error and run " \
+ "post-upgrade-script.sh on volume $VOL again."
+ exit 1
+ fi
+
+ gluster volume start $1 force
+ sleep 3;
+
+ local path_array=( $(cat $BACKUP_DIR/vol_$1 | tail -n +3 | awk '{print $1}') )
+ local limit_array=( $(cat $BACKUP_DIR/vol_$1 | tail -n +3 | awk '{print $2}') )
+ local len=${#path_array[@]}
+
+ for ((j=0; j<$len; j++))
+ do
+ gluster volume quota $1 limit-usage ${path_array[$j]} ${limit_array[j]};
+ if [ $? -eq 0 ]; then
+ echo "Setting limit (${limit_array[j]}) on " \
+ "path ${path_array[$j]} has been " \
+ "successful"
+ fi
+ done
+ fi;
+}
+
+if [ -z $1 ]; then
+ echo "Please provide volume name or 'all'";
+ exit 1;
+fi
+
+if [ "$1" = "all" ]; then
+ for VOL in `gluster volume list`;
+ do
+ set_limits $VOL '1';
+ done
+
+else
+ set_limits $1 '0';
+fi
diff --git a/extras/pre-upgrade-script-for-quota.sh b/extras/pre-upgrade-script-for-quota.sh
new file mode 100755
index 00000000000..9ff15f3b307
--- /dev/null
+++ b/extras/pre-upgrade-script-for-quota.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+#Make sure glusterd and the brick processes are running on all nodes in the
+#cluster.
+#This script must be run prior to upgradation, that too on
+#only one of the nodes in the cluster.
+
+BACKUP_DIR=/var/tmp/glusterfs/quota-config-backup
+
+mkdir -p $BACKUP_DIR
+for i in `gluster volume list`; do
+ var=$(gluster volume info $i | grep 'features.quota'| cut -d" " -f2);
+ if [ -z "$var" ] || [ "$var" = "off" ]; then
+ continue
+ else
+ gluster volume quota $i list > $BACKUP_DIR/vol_$i;
+ fi;
+done
diff --git a/extras/profiler/glusterfs-profiler b/extras/profiler/glusterfs-profiler
index 042eadcf2df..aaafd088648 100755
--- a/extras/profiler/glusterfs-profiler
+++ b/extras/profiler/glusterfs-profiler
@@ -1,22 +1,15 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
-# texttable - module for creating simple ASCII tables
-# Copyright (C) 2003-2010 Gerome Fournier <jefke(at)free.fr>
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
+# Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
+# texttable - module for creating simple ASCII tables
# Incorporated from texttable.py downloaded from
# http://jefke.free.fr/stuff/python/texttable/texttable-0.7.0.tar.gz
@@ -298,7 +291,7 @@ class Texttable:
s = "%s%s%s" % (horiz, [horiz, self._char_corner][self._has_vlines()],
horiz)
# build the line
- l = string.join([horiz*n for n in self._width], s)
+ l = s.join([horiz*n for n in self._width])
# add border if needed
if self._has_border():
l = "%s%s%s%s%s\n" % (self._char_corner, horiz, l, horiz,
diff --git a/extras/python/Makefile.am b/extras/python/Makefile.am
new file mode 100644
index 00000000000..7d81fa0319b
--- /dev/null
+++ b/extras/python/Makefile.am
@@ -0,0 +1,7 @@
+if HAVE_PYTHON
+# Install __init__.py into the Python site-packages area
+pypkgdir = @BUILD_PYTHON_SITE_PACKAGES@/gluster
+pypkg_PYTHON = __init__.py
+endif
+
+EXTRA_DIST = __init__.py
diff --git a/extras/python/__init__.py b/extras/python/__init__.py
new file mode 100644
index 00000000000..3ad9513f40e
--- /dev/null
+++ b/extras/python/__init__.py
@@ -0,0 +1,2 @@
+from pkgutil import extend_path
+__path__ = extend_path(__path__, __name__)
diff --git a/extras/quota-metadata-cleanup.sh b/extras/quota-metadata-cleanup.sh
deleted file mode 100755
index 37ad8bc3137..00000000000
--- a/extras/quota-metadata-cleanup.sh
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/bin/bash
-
-# This script is used to cleanup xattrs setup by quota-translator in (a)
-# backend directory(ies). It takes a single or list of backend directories
-# as argument(s).
-
-usage ()
-{
- echo >&2 "usage: $0 <list-of-backend-directories>"
-}
-
-main ()
-{
- [ $# -lt 1 ] && usage
-
- INSTALL_DIR=`dirname $0`
-
- for i in $@; do
- find $i -exec $INSTALL_DIR/quota-remove-xattr.sh '{}' \;
- done
-
-}
-
-main $@
diff --git a/extras/quota-remove-xattr.sh b/extras/quota-remove-xattr.sh
deleted file mode 100755
index 7191f9bd443..00000000000
--- a/extras/quota-remove-xattr.sh
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/bin/bash
-
-# This script is used to remove xattrs set by quota translator on a path.
-# It is generally invoked from quota-metadata-cleanup.sh, but can
-# also be used stand-alone.
-
-usage ()
-{
- echo >&2 "usage: $0 <path>"
-}
-
-main ()
-{
- [ $# -ne 1 ] && usage $0
-
- XATTR_KEY_VALUE_PAIRS=`getfattr -h -d -m 'trusted.glusterfs.quota' $1 2>/dev/null | sed -e '/^# file/d'`
-
- for i in $XATTR_KEY_VALUE_PAIRS; do
- XATTR_KEY=`echo $i | sed -e 's/\([^=]*\).*/\1/g'`
- setfattr -h -x $XATTR_KEY $1
- done
-}
-
-main $@
diff --git a/extras/contri-add.sh b/extras/quota/contri-add.sh
index 7db5edd5d20..7db5edd5d20 100755
--- a/extras/contri-add.sh
+++ b/extras/quota/contri-add.sh
diff --git a/extras/quota/log_accounting.sh b/extras/quota/log_accounting.sh
new file mode 100755
index 00000000000..e2dd87b84d7
--- /dev/null
+++ b/extras/quota/log_accounting.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+# The script does an accounting of all directories using command 'du' and
+# using gluster. We can then compare the two to identify accounting mismatch
+# THere can be minor mismatch because gluster only accounts for the size of
+# files. Direcotries can take up upto 4kB space on FS per directory. THis
+# size is accounted by du and not by gluster. However the difference would
+# not be significant.
+
+mountpoint=$1
+volname=$2
+
+usage ()
+{
+ echo >&2 "usage: $0 <mountpoint> <volume name>"
+ exit
+}
+
+[ $# -lt 2 ] && usage
+
+cd $mountpoint
+du -h | head -n -1 | tr -d '.' |awk '{ for (i = 2; i <= NF; i++) { printf("%s ", $i);} print "" }' > /tmp/gluster_quota_1
+cat /tmp/gluster_quota_1 | sed 's/ $//' | sed 's/ /\\ /g' | sed 's/(/\\(/g' | sed 's/)/\\)/g' |xargs gluster v quota $volname list > /tmp/gluster_quota_2
+du -h | head -n -1 |awk '{ for (i = 2; i <= NF; i++) { printf("%s %s", $i, $1);} print "" }' | tr -d '.' > /tmp/gluster_quota_3
+cat /tmp/gluster_quota_2 /tmp/gluster_quota_3 | sort > /tmp/gluster_quota_4
+find . -type d > /tmp/gluster_quota_5
+tar -cvf /tmp/gluster_quota_files.tar /tmp/gluster_quota_*
diff --git a/extras/quota/quota_fsck.py b/extras/quota/quota_fsck.py
new file mode 100755
index 00000000000..e62f7fc52a3
--- /dev/null
+++ b/extras/quota/quota_fsck.py
@@ -0,0 +1,377 @@
+#!/usr/bin/python3
+# The following script enables, Detecting, Reporting and Fixing
+# anomalies in quota accounting. Run this script with -h option
+# for further details.
+
+'''
+ Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+'''
+from __future__ import print_function
+import os, sys, re
+from stat import *
+import subprocess
+import argparse
+import xattr
+
+aggr_size = {}
+verbose_mode = False
+mnt_path = None
+brick_path = None
+obj_fix_count = 0
+file_count = 0
+dir_count = 0
+
+#CONSTANTS
+KB = 1024
+MB = 1048576
+GB = 1048576 * 1024
+TB = 1048576 * 1048576
+
+QUOTA_VERBOSE = 0
+QUOTA_META_ABSENT = 1
+QUOTA_SIZE_MISMATCH = 2
+
+IS_DIRTY ='0x3100'
+IS_CLEAN ='0x3000'
+
+
+epilog_msg='''
+ The script attempts to find any gluster accounting issues in the
+ filesystem at the given subtree. The script crawls the given
+ subdirectory tree doing a stat for all files and compares the
+ size reported by gluster quota with the size reported by stat
+ calls. Any mismatch is reported. In addition integrity of marker
+ xattrs are verified.
+ '''
+
+def print_msg(log_type, path, xattr_dict = {}, stbuf = "", dir_size = None):
+ if log_type == QUOTA_VERBOSE:
+ print('%-24s %-60s\nxattr_values: %s\n%s\n' % ("Verbose", path, xattr_dict, stbuf))
+ elif log_type == QUOTA_META_ABSENT:
+ print('%-24s %-60s\n%s\n' % ("Quota-Meta Absent", path, xattr_dict))
+ elif log_type == QUOTA_SIZE_MISMATCH:
+ print("mismatch")
+ if dir_size is not None:
+ print('%24s %60s %12s %12s' % ("Size Mismatch", path,
+ xattr_dict, dir_size))
+ else:
+ print('%-24s %-60s %-12s %-12s' % ("Size Mismatch", path, xattr_dict,
+ stbuf.st_size))
+
+def size_differs_lot(s1, s2):
+ '''
+ There could be minor accounting differences between the stat based
+ accounting and gluster accounting. To avoid these from throwing lot
+ of false positives in our logs. using a threshold of 1M for now.
+ TODO: For a deeply nested directory, at higher levels in hierarchy
+ differences may not be significant, hence this check needs to be improved.
+ '''
+ if abs(s1-s2) > 0:
+ return True
+ else:
+ return False
+
+def fix_hardlink_accounting(curr_dict, accounted_dict, curr_size):
+ '''
+ Hard links are messy.. we have to account them for their parent
+ directory. But, stop accounting at the most common ancestor.
+ Eg:
+ say we have 3 hardlinks : /d1/d2/h1, /d1/d3/h2 and /d1/h3
+
+ suppose we encounter the hard links h1 first , then h2 and then h3.
+ while accounting for h1, we account the size until root(d2->d1->/)
+ while accounting for h2, we need to account only till d3. (as d1
+ and / are accounted for this inode).
+ while accounting for h3 we should not account at all.. as all
+ its ancestors are already accounted for same inode.
+
+ curr_dict : dict of hardlinks that were seen and
+ accounted by the current iteration.
+ accounted_dict : dict of hardlinks that has already been
+ accounted for.
+
+ size : size of the object as accounted by the
+ curr_iteration.
+
+ Return vale:
+ curr_size : size reduced by hardlink sizes for those
+ hardlinks that has already been accounted
+ in current subtree.
+ Also delete the duplicate link from curr_dict.
+ '''
+
+ dual_accounted_links = set(curr_dict.keys()) & set(accounted_dict.keys())
+ for link in dual_accounted_links:
+ curr_size = curr_size - curr_dict[link]
+ del curr_dict[link]
+ return curr_size
+
+
+def fix_xattr(file_name, mark_dirty):
+ global obj_fix_count
+ global mnt_path
+
+ if mnt_path is None:
+ return
+ if mark_dirty:
+ print("MARKING DIRTY: " + file_name)
+ out = subprocess.check_output (["/usr/bin/setfattr", "-n",
+ "trusted.glusterfs.quota.dirty",
+ "-v", IS_DIRTY, file_name])
+ rel_path = os.path.relpath(file_name, brick_path)
+ print("stat on " + mnt_path + "/" + rel_path)
+ stbuf = os.lstat(mnt_path + "/" + rel_path)
+
+ obj_fix_count += 1
+
+def get_quota_xattr_brick(dpath):
+ out = subprocess.check_output (["/usr/bin/getfattr", "--no-dereference",
+ "-d", "-m.", "-e", "hex", dpath])
+ pairs = out.splitlines()
+
+ '''
+ Sample output to be parsed:
+ [root@dhcp35-100 mnt]# getfattr -d -m. -e hex /export/b1/B0/d14/d13/
+ # file: export/b1/B0/d14/d13/
+ security.selinux=0x756e636f6e66696e65645f753a6f626a6563745f723a7573725f743a733000
+ trusted.gfid=0xbae5e0d2d05043de9fd851d91ecf63e8
+ trusted.glusterfs.dht=0x000000010000000000000000ffffffff
+ trusted.glusterfs.dht.mds=0x00000000
+ trusted.glusterfs.quota.6a7675a3-b85a-40c5-830b-de9229d702ce.contri.39=0x00000000000000000000000000000000000000000000000e
+ trusted.glusterfs.quota.dirty=0x3000
+ trusted.glusterfs.quota.size.39=0x00000000000000000000000000000000000000000000000e
+ '''
+
+ '''
+ xattr_dict dictionary holds quota related xattrs
+ eg:
+ '''
+
+ xattr_dict = {}
+ xattr_dict['parents'] = {}
+
+ for xattr in pairs[1:]:
+ xattr = xattr.decode("utf-8")
+ xattr_key = xattr.split("=")[0]
+ if xattr_key == "":
+ # skip any empty lines
+ continue
+ elif not re.search("quota", xattr_key):
+ # skip all non quota xattr.
+ continue
+
+ xattr_value = xattr.split("=")[1]
+ if re.search("contri", xattr_key):
+
+ xattr_version = xattr_key.split(".")[5]
+ if 'version' not in xattr_dict:
+ xattr_dict['version'] = xattr_version
+ else:
+ if xattr_version != xattr_dict['version']:
+ print("Multiple xattr version found")
+
+
+ cur_parent = xattr_key.split(".")[3]
+ if cur_parent not in xattr_dict['parents']:
+ xattr_dict['parents'][cur_parent] = {}
+
+ contri_dict = xattr_dict['parents'][cur_parent]
+ if len(xattr_value) == 34:
+ # 34 bytes implies file contri xattr
+ # contri format =0x< 16bytes file size><16bytes file count>
+ # size is obtained in iatt, file count = 1, dir count=0
+ contri_dict['contri_size'] = int(xattr_value[2:18], 16)
+ contri_dict['contri_file_count'] = int(xattr_value[18:34], 16)
+ contri_dict['contri_dir_count'] = 0
+ else:
+ # This is a directory contri.
+ contri_dict['contri_size'] = int(xattr_value[2:18], 16)
+ contri_dict['contri_file_count'] = int(xattr_value[18:34], 16)
+ contri_dict['contri_dir_count'] = int(xattr_value[34:], 16)
+
+ elif re.search("size", xattr_key):
+ xattr_dict['size'] = int(xattr_value[2:18], 16)
+ xattr_dict['file_count'] = int(xattr_value[18:34], 16)
+ xattr_dict['dir_count'] = int(xattr_value[34:], 16)
+ elif re.search("dirty", xattr_key):
+ if xattr_value == IS_CLEAN:
+ xattr_dict['dirty'] = False
+ elif xattr_value == IS_DIRTY:
+ xattr_dict['dirty'] = True
+ elif re.search("limit_objects", xattr_key):
+ xattr_dict['limit_objects'] = int(xattr_value[2:18], 16)
+ elif re.search("limit_set", xattr_key):
+ xattr_dict['limit_set'] = int(xattr_value[2:18], 16)
+
+ return xattr_dict
+
+def verify_file_xattr(path, stbuf = None):
+
+ global file_count
+ file_count += 1
+
+ if stbuf is None:
+ stbuf = os.lstat(path)
+
+ xattr_dict = get_quota_xattr_brick(path)
+
+ for parent in xattr_dict['parents']:
+ contri_dict = xattr_dict['parents'][parent]
+
+ if 'contri_size' not in contri_dict or \
+ 'contri_file_count' not in contri_dict or \
+ 'contri_dir_count' not in contri_dict:
+ print_msg(QUOTA_META_ABSENT, path, xattr_dict, stbuf)
+ fix_xattr(path, False)
+ return
+ elif size_differs_lot(contri_dict['contri_size'], stbuf.st_size):
+ print_msg(QUOTA_SIZE_MISMATCH, path, xattr_dict, stbuf)
+ fix_xattr(path, False)
+ return
+
+ if verbose_mode is True:
+ print_msg(QUOTA_VERBOSE, path, xattr_dict, stbuf)
+
+
+def verify_dir_xattr(path, dir_size):
+
+ global dir_count
+ dir_count += 1
+ xattr_dict = get_quota_xattr_brick(path)
+
+ stbuf = os.lstat(path)
+
+ for parent in xattr_dict['parents']:
+ contri_dict = xattr_dict['parents'][parent]
+
+ if 'size' not in xattr_dict or 'contri_size' not in contri_dict:
+ print_msg(QUOTA_META_ABSENT, path)
+ fix_xattr(path, True)
+ return
+ elif size_differs_lot(dir_size, xattr_dict['size']) or \
+ size_differs_lot(contri_dict['contri_size'], xattr_dict['size']):
+ print_msg(QUOTA_SIZE_MISMATCH, path, xattr_dict, stbuf, dir_size)
+ fix_xattr(path, True)
+ return
+
+ if verbose_mode is True:
+ print_msg("VERBOSE", path, xattr_dict, stbuf, dir_size)
+
+
+def walktree(t_dir, hard_link_dict):
+ '''recursively descend the directory tree rooted at dir,
+ aggregating the size
+ t_dir : directory to walk over.
+ hard_link_dict : dict of inodes with multiple hard_links under t_dir
+ '''
+ global aggr_size
+ aggr_size[t_dir] = 0
+
+ for entry in os.listdir(t_dir):
+ pathname = os.path.join(t_dir, entry)
+ stbuf = os.lstat(pathname)
+ if S_ISDIR(stbuf.st_mode):
+ # It's a directory, recurse into it
+ if entry == '.glusterfs':
+ print("skipping " + pathname)
+ continue
+ descendent_hardlinks = {}
+ subtree_size = walktree(pathname, descendent_hardlinks)
+
+ subtree_size = fix_hardlink_accounting(descendent_hardlinks,
+ hard_link_dict,
+ subtree_size)
+
+ aggr_size[t_dir] = aggr_size[t_dir] + subtree_size
+
+ elif S_ISREG(stbuf.st_mode) or S_ISLNK(stbuf.st_mode):
+ # Even a symbolic link file may have multiple hardlinks.
+
+ file_size = stbuf.st_size
+ if stbuf.st_nlink > 2:
+ # send a single element dict to check if file is accounted.
+ file_size = fix_hardlink_accounting({stbuf.st_ino:stbuf.st_size},
+ hard_link_dict,
+ stbuf.st_size)
+
+ if file_size == 0:
+ print_msg("HARD_LINK (skipped)", pathname, "",
+ stbuf)
+ else:
+ print_msg("HARD_LINK (accounted)", pathname, "",
+ stbuf)
+ hard_link_dict[stbuf.st_ino] = stbuf.st_size
+
+ if t_dir in aggr_size:
+ aggr_size[t_dir] = aggr_size[t_dir] + file_size
+ else:
+ aggr_size[t_dir] = file_size
+ verify_file_xattr(pathname, stbuf)
+
+ else:
+ # Unknown file type, print a message
+ print('Skipping %s, due to file mode' % (pathname))
+
+ if t_dir not in aggr_size:
+ aggr_size[t_dir] = 0
+
+ verify_dir_xattr(t_dir, aggr_size[t_dir])
+ # du also accounts for t_directory sizes
+ # aggr_size[t_dir] += 4096
+
+ #cleanup
+ ret = aggr_size[t_dir]
+ del aggr_size[t_dir]
+ return ret
+
+
+if __name__ == '__main__':
+
+ parser = argparse.ArgumentParser(description='Diagnose quota accounting issues.', epilog=epilog_msg)
+ parser.add_argument('brick_path', nargs=1,
+ help='The brick path (or any descendent sub-directory of brick path)',
+ )
+ parser.add_argument('--full-logs', dest='verbose', action='store_true',
+ help='''
+ log all the xattr values and stat values reported
+ for analysis. [CAUTION: This can give lot of output
+ depending on FS depth. So one has to make sure enough
+ disk space exists if redirecting to file]
+ '''
+ )
+ parser.add_argument('--fix-issues', metavar='mount_path', dest='mnt', action='store',
+ help='''
+ fix accounting issues where the xattr values disagree
+ with stat sizes reported by gluster. A mount is also
+ required for this option to be used.
+ [CAUTION: This will directly modify backend xattr]
+ '''
+ )
+ parser.add_argument('--sub-dir', metavar='sub_dir', dest='sub_dir', action='store',
+ help='''
+ limit the crawling and accounting verification/correction
+ to a specific subdirectory.
+ '''
+ )
+
+ args = parser.parse_args()
+ verbose_mode = args.verbose
+ brick_path = args.brick_path[0]
+ sub_dir = args.sub_dir
+ mnt_path = args.mnt
+ hard_link_dict = {}
+ if sub_dir is not None:
+ walktree(os.path.join(brick_path, sub_dir), hard_link_dict)
+ else:
+ walktree(brick_path, hard_link_dict)
+
+ print("Files verified : " + str(file_count))
+ print("Directories verified : " + str(dir_count))
+ if mnt_path is not None:
+ print("Objects Fixed : " + str(obj_fix_count))
diff --git a/extras/quota/xattr_analysis.py b/extras/quota/xattr_analysis.py
new file mode 100755
index 00000000000..7bd7d96374c
--- /dev/null
+++ b/extras/quota/xattr_analysis.py
@@ -0,0 +1,73 @@
+#!/usr/bin/python3
+# Below script has two purposes
+# 1. Display xattr of entire FS tree in a human readable form
+# 2. Display all the directory where contri and size mismatch.
+# (If there are any directory with contri and size mismatch that are not dirty
+# then that highlights a propagation issue)
+# The script takes only one input LOG _FILE generated from the command,
+# find <brick_path> | xargs getfattr -d -m. -e hex > log_gluster_xattr
+
+from __future__ import print_function
+import re
+import subprocess
+import sys
+from hurry.filesize import size
+
+if len(sys.argv) < 2:
+ sys.exit('Usage: %s log_gluster_xattr \n'
+ 'to generate log_gluster_xattr use: \n'
+ 'find <brick_path> | xargs getfattr -d -m. -e hex > log_gluster_xattr'
+ % sys.argv[0])
+LOG_FILE=sys.argv[1]
+
+def get_quota_xattr_brick():
+ out = subprocess.check_output (["/usr/bin/cat", LOG_FILE])
+ pairs = out.splitlines()
+
+ xdict = {}
+ mismatch_size = [('====contri_size===', '====size====')]
+ for xattr in pairs:
+ k = xattr.split("=")[0]
+ if re.search("# file:", k):
+ print(xdict)
+ filename=k
+ print("=====" + filename + "=======")
+ xdict = {}
+ elif k is "":
+ pass
+ else:
+ print(xattr)
+ v = xattr.split("=")[1]
+ if re.search("contri", k):
+ if len(v) == 34:
+ # for files size is obtained in iatt, file count should be 1, dir count=0
+ xdict['contri_file_count'] = int(v[18:34], 16)
+ xdict['contri_dir_count'] = 0
+ else:
+ xdict['contri_size'] = size(int(v[2:18], 16))
+ xdict['contri_file_count'] = int(v[18:34], 16)
+ xdict['contri_dir_count'] = int(v[34:], 16)
+ elif re.search("size", k):
+ xdict['size'] = size(int(v[2:18], 16))
+ xdict['file_count'] = int(v[18:34], 16)
+ xdict['dir_count'] = int(v[34:], 16)
+ elif re.search("dirty", k):
+ if v == '0x3000':
+ xdict['dirty'] = False
+ elif v == '0x3100':
+ xdict['dirty'] = True
+ elif re.search("limit_objects", k):
+ xdict['limit_objects'] = int(v[2:18], 16)
+ elif re.search("limit_set", k):
+ xdict['limit_set'] = size(int(v[2:18], 16))
+
+ if 'size' in xdict and 'contri_size' in xdict and xdict['size'] != xdict['contri_size']:
+ mismatch_size.append((xdict['contri_size'], xdict['size'], filename))
+
+ for values in mismatch_size:
+ print(values)
+
+
+if __name__ == '__main__':
+ get_quota_xattr_brick()
+
diff --git a/extras/rebalance.py b/extras/rebalance.py
new file mode 100755
index 00000000000..37c68ebbb42
--- /dev/null
+++ b/extras/rebalance.py
@@ -0,0 +1,309 @@
+#!/usr/bin/python3
+
+from __future__ import print_function
+
+import atexit
+import copy
+import optparse
+import os
+import pipes
+import shutil
+import string
+import subprocess
+import sys
+import tempfile
+import volfilter
+import platform
+
+# It's just more convenient to have named fields.
+class Brick:
+ def __init__ (self, path, name):
+ self.path = path
+ self.sv_name = name
+ self.size = 0
+ self.curr_size = 0
+ self.good_size = 0
+ def set_size (self, size):
+ self.size = size
+ def set_range (self, rs, re):
+ self.r_start = rs
+ self.r_end = re
+ self.curr_size = self.r_end - self.r_start + 1
+ def __repr__ (self):
+ value = self.path[:]
+ value += "(%d," % self.size
+ if self.curr_size:
+ value += "0x%x,0x%x)" % (self.r_start, self.r_end)
+ else:
+ value += "-)"
+ return value
+
+def get_bricks (host, vol):
+ t = pipes.Template()
+ t.prepend("gluster --remote-host=%s system getspec %s"%(host, vol), ".-")
+ return t.open(None, "r")
+
+def generate_stanza (vf, all_xlators, cur_subvol):
+ sv_list = []
+ for sv in cur_subvol.subvols:
+ generate_stanza(vf, all_xlators, sv)
+ sv_list.append(sv.name)
+ vf.write("volume %s\n" % cur_subvol.name)
+ vf.write(" type %s\n" % cur_subvol.type)
+ for kvpair in cur_subvol.opts.items():
+ vf.write(" option %s %s\n" % kvpair)
+ if sv_list:
+ vf.write(" subvolumes %s\n" % ''.join(sv_list))
+ vf.write("end-volume\n\n")
+
+
+def mount_brick (localpath, all_xlators, dht_subvol):
+
+ # Generate a volfile.
+ vf_name = localpath + ".vol"
+ vf = open(vf_name, "w")
+ generate_stanza(vf, all_xlators, dht_subvol)
+ vf.flush()
+ vf.close()
+
+ # Create a brick directory and mount the brick there.
+ os.mkdir(localpath)
+ subprocess.call(["glusterfs", "-f", vf_name, localpath])
+
+# We use the command-line tools because there's no getxattr support in the
+# Python standard library (which is ridiculous IMO). Adding the xattr package
+# from PyPI would create a new and difficult dependency because the bits to
+# satisfy it don't seem to exist in Fedora. We already expect the command-line
+# tools to be there, so it's safer just to rely on them.
+#
+# We might have to revisit this if we get as far as actually issuing millions
+# of setxattr requests. Even then, it might be better to do that part with a C
+# program which has only a build-time dependency.
+def get_range (brick):
+ t = pipes.Template()
+ cmd = "getfattr -e hex -n trusted.glusterfs.dht %s 2> /dev/null"
+ t.prepend(cmd%brick, ".-")
+ t.append("grep ^trusted.glusterfs.dht=", "--")
+ f = t.open(None, "r")
+ try:
+ value = f.readline().rstrip().split('=')[1][2:]
+ except:
+ print("could not get layout for %s (might be OK)" % brick)
+ return None
+ v_start = int("0x"+value[16:24], 16)
+ v_end = int("0x"+value[24:32], 16)
+ return (v_start, v_end)
+
+def calc_sizes (bricks, total):
+ leftover = 1 << 32
+ for b in bricks:
+ if b.size:
+ b.good_size = (b.size << 32) / total
+ leftover -= b.good_size
+ else:
+ b.good_size = 0
+ if leftover:
+ # Add the leftover to an old brick if we can.
+ for b in bricks:
+ if b.good_size:
+ b.good_size += leftover
+ break
+ else:
+ # Fine, just add it wherever.
+ bricks[0].good_size += leftover
+
+# Normalization means sorting the bricks by r_start and (b) ensuring that there
+# are no gaps.
+def normalize (in_bricks):
+ out_bricks = []
+ curr_hash = 0
+ used = 0
+ while curr_hash < (1<<32):
+ curr_best = None
+ for b in in_bricks:
+ if b.r_start == curr_hash:
+ used += 1
+ out_bricks.append(b)
+ in_bricks.remove(b)
+ curr_hash = b.r_end + 1
+ break
+ else:
+ print("gap found at 0x%08x" % curr_hash)
+ sys.exit(1)
+ return out_bricks + in_bricks, used
+
+def get_score (bricks):
+ score = 0
+ curr_hash = 0
+ for b in bricks:
+ if not b.curr_size:
+ curr_hash += b.good_size
+ continue
+ new_start = curr_hash
+ curr_hash += b.good_size
+ new_end = curr_hash - 1
+ if new_start > b.r_start:
+ max_start = new_start
+ else:
+ max_start = b.r_start
+ if new_end < b.r_end:
+ min_end = new_end
+ else:
+ min_end = b.r_end
+ if max_start <= min_end:
+ score += (min_end - max_start + 1)
+ return score
+
+if __name__ == "__main__":
+
+ my_usage = "%prog [options] server volume [directory]"
+ parser = optparse.OptionParser(usage=my_usage)
+ parser.add_option("-f", "--free-space", dest="free_space",
+ default=False, action="store_true",
+ help="use free space instead of total space")
+ parser.add_option("-l", "--leave-mounted", dest="leave_mounted",
+ default=False, action="store_true",
+ help="leave subvolumes mounted")
+ parser.add_option("-v", "--verbose", dest="verbose",
+ default=False, action="store_true",
+ help="verbose output")
+ options, args = parser.parse_args()
+
+ if len(args) == 3:
+ fix_dir = args[2]
+ else:
+ if len(args) != 2:
+ parser.print_help()
+ sys.exit(1)
+ fix_dir = None
+ hostname, volname = args[:2]
+
+ # Make sure stuff gets cleaned up, even if there are exceptions.
+ orig_dir = os.getcwd()
+ work_dir = tempfile.mkdtemp()
+ bricks = []
+ def cleanup_workdir ():
+ os.chdir(orig_dir)
+ if options.verbose:
+ print("Cleaning up %s" % work_dir)
+ for b in bricks:
+ subprocess.call(["umount", b.path])
+ shutil.rmtree(work_dir)
+ if not options.leave_mounted:
+ atexit.register(cleanup_workdir)
+ os.chdir(work_dir)
+
+ # Mount each brick individually, so we can issue brick-specific calls.
+ if options.verbose:
+ print("Mounting subvolumes...")
+ index = 0
+ volfile_pipe = get_bricks(hostname, volname)
+ all_xlators, last_xlator = volfilter.load(volfile_pipe)
+ for dht_vol in all_xlators.itervalues():
+ if dht_vol.type == "cluster/distribute":
+ break
+ else:
+ print("no DHT volume found")
+ sys.exit(1)
+ for sv in dht_vol.subvols:
+ #print "found subvol %s" % sv.name
+ lpath = "%s/brick%s" % (work_dir, index)
+ index += 1
+ mount_brick(lpath, all_xlators, sv)
+ bricks.append(Brick(lpath, sv.name))
+ if index == 0:
+ print("no bricks")
+ sys.exit(1)
+
+ # Collect all of the sizes.
+ if options.verbose:
+ print("Collecting information...")
+ total = 0
+ for b in bricks:
+ info = os.statvfs(b.path)
+ # On FreeBSD f_bsize (info[0]) contains the optimal I/O size,
+ # not the block size as it's found on Linux. In this case we
+ # use f_frsize (info[1]).
+ if platform.system() == 'FreeBSD':
+ bsize = info[1]
+ else:
+ bsize = info[0]
+ # We want a standard unit even if different bricks use
+ # different block sizes. The size is chosen to avoid overflows
+ # for very large bricks with very small block sizes, but also
+ # accommodate filesystems which use very large block sizes to
+ # cheat on benchmarks.
+ blocksper100mb = 104857600 / bsize
+ if options.free_space:
+ size = info[3] / blocksper100mb
+ else:
+ size = info[2] / blocksper100mb
+ if size <= 0:
+ print("brick %s has invalid size %d" % (b.path, size))
+ sys.exit(1)
+ b.set_size(size)
+ total += size
+
+ # Collect all of the layout information.
+ for b in bricks:
+ hash_range = get_range(b.path)
+ if hash_range is not None:
+ rs, re = hash_range
+ if rs > re:
+ print("%s has backwards hash range" % b.path)
+ sys.exit(1)
+ b.set_range(hash_range[0], hash_range[1])
+
+ if options.verbose:
+ print("Calculating new layouts...")
+ calc_sizes(bricks, total)
+ bricks, used = normalize(bricks)
+
+ # We can't afford O(n!) here, but O(n^2) should be OK and the result
+ # should be almost as good.
+ while used < len(bricks):
+ best_place = used
+ best_score = get_score(bricks)
+ for i in range(used):
+ new_bricks = bricks[:]
+ del new_bricks[used]
+ new_bricks.insert(i, bricks[used])
+ new_score = get_score(new_bricks)
+ if new_score > best_score:
+ best_place = i
+ best_score = new_score
+ if best_place != used:
+ nb = bricks[used]
+ del bricks[used]
+ bricks.insert(best_place, nb)
+ used += 1
+
+ # Finalize whatever we decided on.
+ curr_hash = 0
+ for b in bricks:
+ b.r_start = curr_hash
+ curr_hash += b.good_size
+ b.r_end = curr_hash - 1
+
+ print("Here are the xattr values for your size-weighted layout:")
+ for b in bricks:
+ print(" %s: 0x0000000200000000%08x%08x" % (
+ b.sv_name, b.r_start, b.r_end))
+
+ if fix_dir:
+ if options.verbose:
+ print("Fixing layout for %s" % fix_dir)
+ for b in bricks:
+ value = "0x0000000200000000%08x%08x" % (
+ b.r_start, b.r_end)
+ path = "%s/%s" % (b.path, fix_dir)
+ cmd = "setfattr -n trusted.glusterfs.dht -v %s %s" % (
+ value, path)
+ print(cmd)
+
+ if options.leave_mounted:
+ print("The following subvolumes are still mounted:")
+ for b in bricks:
+ print("%s on %s" % (b.sv_name, b.path))
+ print("Don't forget to clean up when you're done.")
+
diff --git a/extras/rhel_install.sh b/extras/rhel_install.sh
deleted file mode 100644
index 3e10e6c2f91..00000000000
--- a/extras/rhel_install.sh
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/bin/bash
-
-rpm -ivh http://download.gluster.com/pub/gluster/glusterfs/3.3/3.3.0/UFO/gluster-swift-1.4.8-4.el6.noarch.rpm
-rpm -ivh http://download.gluster.com/pub/gluster/glusterfs/3.3/3.3.0/UFO/gluster-swift-account-1.4.8-4.el6.noarch.rpm
-rpm -ivh http://download.gluster.com/pub/gluster/glusterfs/3.3/3.3.0/UFO/gluster-swift-container-1.4.8-4.el6.noarch.rpm
-rpm -ivh http://download.gluster.com/pub/gluster/glusterfs/3.3/3.3.0/UFO/gluster-swift-object-1.4.8-4.el6.noarch.rpm
-rpm -ivh http://download.gluster.com/pub/gluster/glusterfs/3.3/3.3.0/UFO/gluster-swift-proxy-1.4.8-4.el6.noarch.rpm
-rpm -ivh http://download.gluster.com/pub/gluster/glusterfs/3.3/3.3.0/UFO/gluster-swift-plugin-1.0-2.noarch.rpm
-rpm -ivh http://download.gluster.com/pub/gluster/glusterfs/3.3/3.3.0/UFO/gluster-swift-doc-1.4.8-4.el6.noarch.rpm
diff --git a/extras/run-gluster.tmpfiles.in b/extras/run-gluster.tmpfiles.in
new file mode 100644
index 00000000000..329f2dde6db
--- /dev/null
+++ b/extras/run-gluster.tmpfiles.in
@@ -0,0 +1,2 @@
+# hardcoding /run for now, should be detected while building from source?
+d /run/gluster 0775 gluster gluster -
diff --git a/extras/snap_scheduler/Makefile.am b/extras/snap_scheduler/Makefile.am
new file mode 100644
index 00000000000..782f139016f
--- /dev/null
+++ b/extras/snap_scheduler/Makefile.am
@@ -0,0 +1,9 @@
+snap_schedulerdir = $(sbindir)/
+
+if WITH_SERVER
+snap_scheduler_SCRIPTS = gcron.py snap_scheduler.py conf.py
+endif
+
+EXTRA_DIST = gcron.py snap_scheduler.py conf.py
+
+CLEANFILES =
diff --git a/extras/snap_scheduler/README.md b/extras/snap_scheduler/README.md
new file mode 100644
index 00000000000..1316bb76469
--- /dev/null
+++ b/extras/snap_scheduler/README.md
@@ -0,0 +1,125 @@
+Snapshot Scheduler
+==============================
+
+SUMMARY
+-------
+
+GlusterFS volume snapshot provides point-in-time copy of a GlusterFS volume. Currently, GlusterFS volume snapshots can be easily scheduled by setting up cron jobs on one of the nodes in the GlusterFS trusted storage pool. This has a single point failure (SPOF), as scheduled jobs can be missed if the node running the cron jobs dies.
+
+We can avoid the SPOF by distributing the cron jobs to all nodes of the trusted storage pool.
+
+DETAILED DESCRIPTION
+--------------------
+
+The solution to the above problems involves the usage of:
+
+* A shared storage - This can be any shared storage (another gluster volume, a NFS mount, etc.) that will be used to share the schedule configuration and will help in the coordination of the jobs.
+* An agent - This agent will perform the actual snapshot commands, instead of cron. It will contain the logic to perform coordinated snapshots.
+* A helper script - This script will allow the user to initialise the scheduler on the local node, enable/disable scheduling, add/edit/list/delete snapshot schedules.
+* cronie - It is the default cron daemon shipped with RHEL. It invokes the agent at the appropriate intervals as mentioned by the user to perform the snapshot operation on the volume as mentioned by the user in the schedule.
+
+INITIAL SETUP
+-------------
+
+The administrator needs to create a shared storage that can be available to nodes across the cluster. A GlusterFS volume can also be used for the same. It is preferable that the *shared volume* be a replicate volume to avoid SPOF.
+
+Once the shared storage is created, it should be mounted on all nodes in the trusted storage pool which will be participating in the scheduling. The location where the shared_storage should be mounted (/var/run/gluster/snaps/shared_storage) in these nodes is fixed and is not configurable. Each node participating in the scheduling then needs to perform an initialisation of the snapshot scheduler by invoking the following:
+
+snap_scheduler.py init
+
+NOTE: This command needs to be run on all the nodes participating in the scheduling
+
+HELPER SCRIPT
+-------------
+
+The helper script(snap_scheduler.py) will initialise the scheduler on the local node, enable/disable scheduling, add/edit/list/delete snapshot schedules.
+
+a) snap_scheduler.py init
+
+This command initialises the snap_scheduler and interfaces it with the crond running on the local node. This is the first step, before executing any scheduling related commands from a node.
+
+NOTE: The helper script needs to be run with this option on all the nodes participating in the scheduling. Other options of the helper script can be run independently from any node, where initialisation has been successfully completed.
+
+b) snap_scheduler.py enable
+
+The snap scheduler is disabled by default after initialisation. This command enables the snap scheduler.
+
+c) snap_scheduler.py disable
+
+This command disables the snap scheduler.
+
+d) snap_scheduler.py status
+
+This command displays the current status(Enabled/Disabled) of the snap scheduler.
+
+e) snap_scheduler.py add "Job Name" "Schedule" "Volume Name"
+
+This command adds a new snapshot schedule. All the arguments must be provided within double-quotes(""). It takes three arguments:
+
+-> Job Name: This name uniquely identifies this particular schedule, and can be used to reference this schedule for future events like edit/delete. If a schedule already exists for the specified Job Name, the add command will fail.
+
+-> Schedule: The schedules are accepted in the format crond understands:-
+
+Example of job definition:
+.---------------- minute (0 - 59)
+| .------------- hour (0 - 23)
+| | .---------- day of month (1 - 31)
+| | | .------- month (1 - 12) OR jan,feb,mar,apr ...
+| | | | .---- day of week (0 - 6) (Sunday=0 or 7) OR sun,mon,tue,wed,thu,fri,sat
+| | | | |
+* * * * * user-name command to be executed
+Although we accept all valid cron schedules, currently we support granularity of snapshot schedules to a maximum of half-hourly snapshots.
+
+-> Volume Name: The name of the volume on which the scheduled snapshot operation will be performed.
+
+f) snap_scheduler.py edit "Job Name" "Schedule" "Volume Name"
+
+This command edits an existing snapshot schedule. It takes the same three arguments that the add option takes. All the arguments must be provided within double-quotes(""). If a schedule does not exists for the specified Job Name, the edit command will fail.
+
+g) snap_scheduler.py delete "Job Name"
+
+This command deletes an existing snapshot schedule. It takes the job name of the schedule as argument. The argument must be provided within double-quotes(""). If a schedule does not exists for the specified Job Name, the delete command will fail.
+
+h) snap_scheduler.py list
+
+This command lists the existing snapshot schedules in the following manner: Pseudocode:
+
+# snap_scheduler.py list
+JOB_NAME SCHEDULE OPERATION VOLUME NAME
+--------------------------------------------------------------------
+Job0 * * * * * Snapshot Create test_vol
+
+THE AGENT
+---------
+
+The snapshots scheduled with the help of the helper script, are read by crond which then invokes the agent(gcron.py) at the scheduled intervals to perform the snapshot operations on the specified volumes. It then performs the scheduled snapshots using the following algorithm to coordinate.
+
+start_time = get current time
+lock_file = job_name passed as an argument
+vol_name = volume name psased as an argument
+try POSIX locking the $lock_file
+ if lock is obtained, then
+ mod_time = Get modification time of $entry
+ if $mod_time < $start_time, then
+ Take snapshot of $entry.name (Volume name)
+ if snapshot failed, then
+ log the failure
+ Update modification time of $entry to current time
+ unlock the $entry
+
+The coordination with other scripts running on other nodes, is handled by the use of POSIX locks. All the instances of the script will attempt to lock the lock_file which is essentialy an empty file with the job name, and one which gets the lock will take the snapshot.
+
+To prevent redoing a done task, the script will make use of the mtime attribute of the entry. At the beginning execution, the script would have saved its start time. Once the script obtains the lock on the lock_file, before taking the snapshot, it compares the mtime of the entry with the start time. The snapshot will only be taken if the mtime is smaller than start time. Once the snapshot command completes, the script will update the mtime of the lock_file to the current time before unlocking.
+
+If a snapshot command fails, the script will log the failure (in syslog) and continue with its operation. It will not attempt to retry the failed snapshot in the current schedule, but will attempt it again in the next schedules. It is left to the administrator to monitor the logs and decide what to do after a failure.
+
+ASSUMPTIONS AND LIMITATIONS
+---------------------------
+
+It is assumed that all nodes in the have their times synced using NTP or any other mechanism. This is a hard requirement for this feature to work.
+
+The administrator needs to have python2.7 or higher installed, as well as the argparse module installed, to be able to use the helper script(snap_scheduler.py).
+
+There is a latency of one minute, between providing a command by the helper script and that command taking effect. Hence, currently we do not support snapshot schedules with per minute granularity.
+
+The administrator can however leverage the scheduler to schedule snapshots with granularity of half-hourly/hourly/daily/weekly/monthly/yearly periodic intervals. They can also schedule snapshots, which are customised mentioning which minute of the hour, which day of the week, which week of the month, and which month of the year, they want to schedule the snapshot operation.
diff --git a/extras/snap_scheduler/conf.py.in b/extras/snap_scheduler/conf.py.in
new file mode 100644
index 00000000000..6dcca0534a7
--- /dev/null
+++ b/extras/snap_scheduler/conf.py.in
@@ -0,0 +1,11 @@
+#
+# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+GLUSTERFS_LIBEXECDIR = '@GLUSTERFS_LIBEXECDIR@'
diff --git a/extras/snap_scheduler/gcron.py b/extras/snap_scheduler/gcron.py
new file mode 100755
index 00000000000..0e4df77d481
--- /dev/null
+++ b/extras/snap_scheduler/gcron.py
@@ -0,0 +1,190 @@
+#!/usr/bin/python3
+#
+# Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
+from __future__ import print_function
+import subprocess
+import os
+import os.path
+import sys
+import time
+import logging
+import logging.handlers
+import fcntl
+
+
+GCRON_TASKS = "/run/gluster/shared_storage/snaps/glusterfs_snap_cron_tasks"
+GCRON_CROND_TASK = "/etc/cron.d/glusterfs_snap_cron_tasks"
+GCRON_RELOAD_FLAG = "/var/run/gluster/crond_task_reload_flag"
+LOCK_FILE_DIR = "/run/gluster/shared_storage/snaps/lock_files/"
+log = logging.getLogger("gcron-logger")
+start_time = 0.0
+
+
+def initLogger(script_name):
+ log.setLevel(logging.DEBUG)
+ logFormat = "[%(asctime)s %(filename)s:%(lineno)s %(funcName)s] "\
+ "%(levelname)s %(message)s"
+ formatter = logging.Formatter(logFormat)
+
+ sh = logging.handlers.SysLogHandler()
+ sh.setLevel(logging.ERROR)
+ sh.setFormatter(formatter)
+
+ process = subprocess.Popen(["gluster", "--print-logdir"],
+ stdout=subprocess.PIPE,
+ universal_newlines=True)
+ out, err = process.communicate()
+ if process.returncode == 0:
+ logfile = os.path.join(out.strip(), script_name[:-3]+".log")
+
+ fh = logging.FileHandler(logfile)
+ fh.setLevel(logging.DEBUG)
+ fh.setFormatter(formatter)
+
+ log.addHandler(sh)
+ log.addHandler(fh)
+
+
+def takeSnap(volname="", snapname=""):
+ success = True
+ if volname == "":
+ log.debug("No volname given")
+ return False
+ if snapname == "":
+ log.debug("No snapname given")
+ return False
+
+ cli = ["gluster",
+ "snapshot",
+ "create",
+ snapname,
+ volname]
+ log.debug("Running command '%s'", " ".join(cli))
+
+ p = subprocess.Popen(cli, stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ out, err = p.communicate()
+ rv = p.returncode
+
+ log.debug("Command '%s' returned '%d'", " ".join(cli), rv)
+
+ if rv:
+ log.error("Snapshot of %s failed", volname)
+ log.error("Command output:")
+ log.error(err)
+ success = False
+ else:
+ log.info("Snapshot of %s successful", volname)
+
+ return success
+
+
+def doJob(name, lockFile, jobFunc, volname):
+ success = True
+ try:
+ f = os.open(lockFile, os.O_CREAT | os.O_RDWR | os.O_NONBLOCK)
+ try:
+ fcntl.flock(f, fcntl.LOCK_EX | fcntl.LOCK_NB)
+ mtime = os.path.getmtime(lockFile)
+ global start_time
+ log.debug("%s last modified at %s", lockFile, time.ctime(mtime))
+ if mtime < start_time:
+ log.debug("Processing job %s", name)
+ if jobFunc(volname, name):
+ log.info("Job %s succeeded", name)
+ else:
+ log.error("Job %s failed", name)
+ success = False
+ os.utime(lockFile, None)
+ else:
+ log.info("Job %s has been processed already", name)
+ fcntl.flock(f, fcntl.LOCK_UN)
+ except (OSError, IOError):
+ log.info("Job %s is being processed by another agent", name)
+ os.close(f)
+ except (OSError, IOError) as e:
+ log.debug("Failed to open lock file %s : %s", lockFile, e)
+ log.error("Failed to process job %s", name)
+ success = False
+
+ return success
+
+
+def main():
+ script_name = os.path.basename(__file__)
+ initLogger(script_name)
+ global start_time
+ if sys.argv[1] == "--update":
+ if not os.path.exists(GCRON_TASKS):
+ # Create a flag in /var/run/gluster which indicates that this
+ # node doesn't have access to GCRON_TASKS right now, so that
+ # when the mount is available and GCRON_TASKS is available
+ # the flag will tell this routine to reload GCRON_CROND_TASK
+ try:
+ f = os.open(GCRON_RELOAD_FLAG,
+ os.O_CREAT | os.O_NONBLOCK, 0o644)
+ os.close(f)
+ except OSError as e:
+ if errno != EEXIST:
+ log.error("Failed to create %s : %s",
+ GCRON_RELOAD_FLAG, e)
+ output("Failed to create %s. Error: %s"
+ % (GCRON_RELOAD_FLAG, e))
+ return
+
+ if not os.path.exists(GCRON_CROND_TASK):
+ return
+
+ # As GCRON_TASKS exists now, we should check if GCRON_RELOAD_FLAG
+ # also exists. If so we should touch GCRON_CROND_TASK and remove
+ # the GCRON_RELOAD_FLAG
+ if os.path.exists(GCRON_RELOAD_FLAG):
+ try:
+ os.remove(GCRON_RELOAD_FLAG);
+ process = subprocess.Popen(["touch", "-h", GCRON_CROND_TASK],
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ out, err = process.communicate()
+ if process.returncode != 0:
+ log.error("Failed to touch %s. Error: %s.",
+ GCRON_CROND_TASK, err)
+ except (IOError, OSError) as e:
+ log.error("Failed to touch %s. Error: %s.",
+ GCRON_CROND_TASK, e)
+ return
+ if os.lstat(GCRON_TASKS).st_mtime > \
+ os.lstat(GCRON_CROND_TASK).st_mtime:
+ try:
+ process = subprocess.Popen(["touch", "-h", GCRON_CROND_TASK],
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ out, err = process.communicate()
+ if process.returncode != 0:
+ log.error("Failed to touch %s. Error: %s.",
+ GCRON_CROND_TASK, err)
+ except IOError as e:
+ log.error("Failed to touch %s. Error: %s.",
+ GCRON_CROND_TASK, e)
+ return
+
+ volname = sys.argv[1]
+ jobname = sys.argv[2]
+ locking_file = os.path.join(LOCK_FILE_DIR, jobname)
+ log.debug("locking_file = %s", locking_file)
+ log.debug("volname = %s", volname)
+ log.debug("jobname = %s", jobname)
+
+ start_time = int(time.time())
+
+ doJob("Scheduled-" + jobname + "-" + volname, locking_file, takeSnap, volname)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/extras/snap_scheduler/snap_scheduler.py b/extras/snap_scheduler/snap_scheduler.py
new file mode 100755
index 00000000000..e8fcc449a9b
--- /dev/null
+++ b/extras/snap_scheduler/snap_scheduler.py
@@ -0,0 +1,941 @@
+#!/usr/bin/python3
+#
+# Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
+from __future__ import print_function
+import subprocess
+import os
+import os.path
+import logging
+import argparse
+import fcntl
+import logging.handlers
+import sys
+import shutil
+from errno import EEXIST
+from conf import GLUSTERFS_LIBEXECDIR
+sys.path.insert(1, GLUSTERFS_LIBEXECDIR)
+
+EVENTS_ENABLED = True
+try:
+ from events.eventtypes import SNAPSHOT_SCHEDULER_INITIALISED \
+ as EVENT_SNAPSHOT_SCHEDULER_INITIALISED
+ from events.eventtypes import SNAPSHOT_SCHEDULER_INIT_FAILED \
+ as EVENT_SNAPSHOT_SCHEDULER_INIT_FAILED
+ from events.eventtypes import SNAPSHOT_SCHEDULER_DISABLED \
+ as EVENT_SNAPSHOT_SCHEDULER_DISABLED
+ from events.eventtypes import SNAPSHOT_SCHEDULER_DISABLE_FAILED \
+ as EVENT_SNAPSHOT_SCHEDULER_DISABLE_FAILED
+ from events.eventtypes import SNAPSHOT_SCHEDULER_ENABLED \
+ as EVENT_SNAPSHOT_SCHEDULER_ENABLED
+ from events.eventtypes import SNAPSHOT_SCHEDULER_ENABLE_FAILED \
+ as EVENT_SNAPSHOT_SCHEDULER_ENABLE_FAILED
+ from events.eventtypes import SNAPSHOT_SCHEDULER_SCHEDULE_ADDED \
+ as EVENT_SNAPSHOT_SCHEDULER_SCHEDULE_ADDED
+ from events.eventtypes import SNAPSHOT_SCHEDULER_SCHEDULE_ADD_FAILED \
+ as EVENT_SNAPSHOT_SCHEDULER_SCHEDULE_ADD_FAILED
+ from events.eventtypes import SNAPSHOT_SCHEDULER_SCHEDULE_DELETED \
+ as EVENT_SNAPSHOT_SCHEDULER_SCHEDULE_DELETED
+ from events.eventtypes import SNAPSHOT_SCHEDULER_SCHEDULE_DELETE_FAILED \
+ as EVENT_SNAPSHOT_SCHEDULER_SCHEDULE_DELETE_FAILED
+ from events.eventtypes import SNAPSHOT_SCHEDULER_SCHEDULE_EDITED \
+ as EVENT_SNAPSHOT_SCHEDULER_SCHEDULE_EDITED
+ from events.eventtypes import SNAPSHOT_SCHEDULER_SCHEDULE_EDIT_FAILED \
+ as EVENT_SNAPSHOT_SCHEDULER_SCHEDULE_EDIT_FAILED
+except ImportError:
+ # Events APIs not installed, dummy eventtypes with None
+ EVENTS_ENABLED = False
+ EVENT_SNAPSHOT_SCHEDULER_INITIALISED = None
+ EVENT_SNAPSHOT_SCHEDULER_INIT_FAILED = None
+ EVENT_SNAPSHOT_SCHEDULER_DISABLED = None
+ EVENT_SNAPSHOT_SCHEDULER_DISABLE_FAILED = None
+ EVENT_SNAPSHOT_SCHEDULER_ENABLED = None
+ EVENT_SNAPSHOT_SCHEDULER_ENABLE_FAILED = None
+ EVENT_SNAPSHOT_SCHEDULER_SCHEDULE_ADDED = None
+ EVENT_SNAPSHOT_SCHEDULER_SCHEDULE_ADD_FAILED = None
+ EVENT_SNAPSHOT_SCHEDULER_SCHEDULE_DELETED = None
+ EVENT_SNAPSHOT_SCHEDULER_SCHEDULE_DELETE_FAILED = None
+ EVENT_SNAPSHOT_SCHEDULER_SCHEDULE_EDITED = None
+ EVENT_SNAPSHOT_SCHEDULER_SCHEDULE_EDIT_FAILED = None
+
+SCRIPT_NAME = "snap_scheduler"
+scheduler_enabled = False
+log = logging.getLogger(SCRIPT_NAME)
+SHARED_STORAGE_DIR="/run/gluster/shared_storage"
+GCRON_DISABLED = SHARED_STORAGE_DIR+"/snaps/gcron_disabled"
+GCRON_ENABLED = SHARED_STORAGE_DIR+"/snaps/gcron_enabled"
+GCRON_TASKS = SHARED_STORAGE_DIR+"/snaps/glusterfs_snap_cron_tasks"
+GCRON_CROND_TASK = "/etc/cron.d/glusterfs_snap_cron_tasks"
+LOCK_FILE_DIR = SHARED_STORAGE_DIR+"/snaps/lock_files/"
+LOCK_FILE = LOCK_FILE_DIR+"lock_file"
+TMP_FILE = SHARED_STORAGE_DIR+"/snaps/tmp_file"
+GCRON_UPDATE_TASK = "/etc/cron.d/gcron_update_task"
+CURRENT_SCHEDULER = SHARED_STORAGE_DIR+"/snaps/current_scheduler"
+tasks = {}
+longest_field = 12
+current_scheduler = ""
+
+INTERNAL_ERROR = 2
+SHARED_STORAGE_DIR_DOESNT_EXIST = 3
+SHARED_STORAGE_NOT_MOUNTED = 4
+ANOTHER_TRANSACTION_IN_PROGRESS = 5
+INIT_FAILED = 6
+SCHEDULING_ALREADY_DISABLED = 7
+SCHEDULING_ALREADY_ENABLED = 8
+NODE_NOT_INITIALISED = 9
+ANOTHER_SCHEDULER_ACTIVE = 10
+JOB_ALREADY_EXISTS = 11
+JOB_NOT_FOUND = 12
+INVALID_JOBNAME = 13
+INVALID_VOLNAME = 14
+INVALID_SCHEDULE = 15
+INVALID_ARG = 16
+VOLUME_DOES_NOT_EXIST = 17
+
+def print_error (error_num):
+ if error_num == INTERNAL_ERROR:
+ return "Internal Error"
+ elif error_num == SHARED_STORAGE_DIR_DOESNT_EXIST:
+ return "The shared storage directory ("+SHARED_STORAGE_DIR+")" \
+ " does not exist."
+ elif error_num == SHARED_STORAGE_NOT_MOUNTED:
+ return "The shared storage directory ("+SHARED_STORAGE_DIR+")" \
+ " is not mounted."
+ elif error_num == ANOTHER_TRANSACTION_IN_PROGRESS:
+ return "Another transaction is in progress."
+ elif error_num == INIT_FAILED:
+ return "Initialisation failed."
+ elif error_num == SCHEDULING_ALREADY_DISABLED:
+ return "Snapshot scheduler is already disabled."
+ elif error_num == SCHEDULING_ALREADY_ENABLED:
+ return "Snapshot scheduler is already enabled."
+ elif error_num == NODE_NOT_INITIALISED:
+ return "The node is not initialised."
+ elif error_num == ANOTHER_SCHEDULER_ACTIVE:
+ return "Another scheduler is active."
+ elif error_num == JOB_ALREADY_EXISTS:
+ return "The job already exists."
+ elif error_num == JOB_NOT_FOUND:
+ return "The job cannot be found."
+ elif error_num == INVALID_JOBNAME:
+ return "The job name is invalid."
+ elif error_num == INVALID_VOLNAME:
+ return "The volume name is invalid."
+ elif error_num == INVALID_SCHEDULE:
+ return "The schedule is invalid."
+ elif error_num == INVALID_ARG:
+ return "The argument is invalid."
+ elif error_num == VOLUME_DOES_NOT_EXIST:
+ return "The volume does not exist."
+
+def output(msg):
+ print("%s: %s" % (SCRIPT_NAME, msg))
+
+
+def initLogger():
+ log.setLevel(logging.DEBUG)
+ logFormat = "[%(asctime)s %(filename)s:%(lineno)s %(funcName)s] "\
+ "%(levelname)s %(message)s"
+ formatter = logging.Formatter(logFormat)
+
+ sh = logging.handlers.SysLogHandler()
+ sh.setLevel(logging.ERROR)
+ sh.setFormatter(formatter)
+
+ process = subprocess.Popen(["gluster", "--print-logdir"],
+ stdout=subprocess.PIPE, universal_newlines=True)
+ logfile = os.path.join(process.stdout.read()[:-1], SCRIPT_NAME + ".log")
+
+ fh = logging.FileHandler(logfile)
+ fh.setLevel(logging.DEBUG)
+ fh.setFormatter(formatter)
+
+ log.addHandler(sh)
+ log.addHandler(fh)
+
+
+def scheduler_status():
+ ret = INTERNAL_ERROR
+ global scheduler_enabled
+ try:
+ f = os.path.realpath(GCRON_TASKS)
+ if f != os.path.realpath(GCRON_ENABLED) or not os.path.exists(GCRON_ENABLED):
+ log.info("Snapshot scheduler is currently disabled.")
+ scheduler_enabled = False
+ else:
+ log.info("Snapshot scheduler is currently enabled.")
+ scheduler_enabled = True
+ ret = 0
+ except:
+ log.error("Failed to enable snapshot scheduling. Error: "
+ "Failed to check the status of %s.", GCRON_DISABLED)
+
+ return ret
+
+def enable_scheduler():
+ ret = scheduler_status()
+ if ret == 0:
+ if not scheduler_enabled:
+
+ # Check if another scheduler is active.
+ ret = get_current_scheduler()
+ if ret == 0:
+ if (current_scheduler != "none"):
+ print_str = "Failed to enable snapshot scheduling. " \
+ "Error: Another scheduler is active."
+ log.error(print_str)
+ output(print_str)
+ ret = ANOTHER_SCHEDULER_ACTIVE
+ return ret
+ else:
+ print_str = "Failed to get current scheduler info."
+ log.error(print_str)
+ output(print_str)
+ return ret
+
+ log.info("Enabling snapshot scheduler.")
+ try:
+ if os.path.exists(GCRON_DISABLED):
+ os.remove(GCRON_DISABLED)
+ if os.path.lexists(GCRON_TASKS):
+ os.remove(GCRON_TASKS)
+ try:
+ f = os.open(GCRON_ENABLED, os.O_CREAT | os.O_NONBLOCK,
+ 0o644)
+ os.close(f)
+ except OSError as e:
+ log.error("Failed to open %s. Error: %s.",
+ GCRON_ENABLED, e)
+ ret = INTERNAL_ERROR
+ return ret
+ os.symlink(GCRON_ENABLED, GCRON_TASKS)
+ update_current_scheduler("cli")
+ log.info("Snapshot scheduling is enabled")
+ output("Snapshot scheduling is enabled")
+ ret = 0
+ except OSError as e:
+ print_str = ("Failed to enable snapshot scheduling."
+ "Error: {{}}" + e)
+ log.error(print_str)
+ output(print_str)
+ ret = INTERNAL_ERROR
+ else:
+ print_str = "Failed to enable snapshot scheduling. " \
+ "Error: Snapshot scheduling is already enabled."
+ log.error(print_str)
+ output(print_str)
+ ret = SCHEDULING_ALREADY_ENABLED
+ else:
+ print_str = "Failed to enable snapshot scheduling. " \
+ "Error: Failed to check scheduler status."
+ log.error(print_str)
+ output(print_str)
+
+ return ret
+
+
+def disable_scheduler():
+ ret = scheduler_status()
+ if ret == 0:
+ if scheduler_enabled:
+ log.info("Disabling snapshot scheduler.")
+ try:
+ # Check if another scheduler is active. If not, then
+ # update current scheduler to "none". Else do nothing.
+ ret = get_current_scheduler()
+ if ret == 0:
+ if (current_scheduler == "cli"):
+ update_current_scheduler("none")
+ else:
+ print_str = "Failed to disable snapshot scheduling. " \
+ "Error: Failed to get current scheduler info."
+ log.error(print_str)
+ output(print_str)
+ return ret
+
+ if os.path.exists(GCRON_DISABLED):
+ os.remove(GCRON_DISABLED)
+ if os.path.lexists(GCRON_TASKS):
+ os.remove(GCRON_TASKS)
+ f = os.open(GCRON_DISABLED, os.O_CREAT, 0o644)
+ os.close(f)
+ os.symlink(GCRON_DISABLED, GCRON_TASKS)
+ log.info("Snapshot scheduling is disabled")
+ output("Snapshot scheduling is disabled")
+ ret = 0
+ except OSError as e:
+ print_str = ("Failed to disable snapshot scheduling. Error: "
+ + e)
+ log.error(print_str)
+ output(print_str)
+ ret = INTERNAL_ERROR
+ else:
+ print_str = "Failed to disable scheduling. " \
+ "Error: Snapshot scheduling is already disabled."
+ log.error(print_str)
+ output(print_str)
+ ret = SCHEDULING_ALREADY_DISABLED
+ else:
+ print_str = "Failed to disable snapshot scheduling. " \
+ "Error: Failed to check scheduler status."
+ log.error(print_str)
+ output(print_str)
+ ret = INTERNAL_ERROR
+
+ return ret
+
+
+def load_tasks_from_file():
+ global tasks
+ global longest_field
+ try:
+ with open(GCRON_ENABLED, 'r') as f:
+ for line in f:
+ line = line.rstrip('\n')
+ if not line:
+ break
+ line = line.split("gcron.py")
+ schedule = line[0].split("root")[0].rstrip(' ')
+ line = line[1].split(" ")
+ volname = line[1]
+ jobname = line[2]
+ longest_field = max(longest_field, len(jobname), len(volname),
+ len(schedule))
+ tasks[jobname] = schedule+":"+volname
+ f.close()
+ ret = 0
+ except IOError as e:
+ log.error("Failed to open %s. Error: %s.", GCRON_ENABLED, e)
+ ret = INTERNAL_ERROR
+
+ return ret
+
+
+def get_current_scheduler():
+ global current_scheduler
+ try:
+ with open(CURRENT_SCHEDULER, 'r') as f:
+ current_scheduler = f.readline().rstrip('\n')
+ f.close()
+ ret = 0
+ except IOError as e:
+ log.error("Failed to open %s. Error: %s.", CURRENT_SCHEDULER, e)
+ ret = INTERNAL_ERROR
+
+ return ret
+
+
+def list_schedules():
+ log.info("Listing snapshot schedules.")
+ ret = load_tasks_from_file()
+ if ret == 0:
+ if len(tasks) == 0:
+ output("No snapshots scheduled")
+ else:
+ jobname = "JOB_NAME".ljust(longest_field+5)
+ schedule = "SCHEDULE".ljust(longest_field+5)
+ operation = "OPERATION".ljust(longest_field+5)
+ volname = "VOLUME NAME".ljust(longest_field+5)
+ hyphens = "".ljust((longest_field+5) * 4, '-')
+ print(jobname+schedule+operation+volname)
+ print(hyphens)
+ for key in sorted(tasks):
+ jobname = key.ljust(longest_field+5)
+ schedule = tasks[key].split(":")[0].ljust(
+ longest_field + 5)
+ volname = tasks[key].split(":")[1].ljust(
+ longest_field + 5)
+ operation = "Snapshot Create".ljust(longest_field+5)
+ print(jobname+schedule+operation+volname)
+ ret = 0
+ else:
+ print_str = "Failed to list snapshot schedules. " \
+ "Error: Failed to load tasks from "+GCRON_ENABLED
+ log.error(print_str)
+ output(print_str)
+
+ return ret
+
+
+def write_tasks_to_file():
+ try:
+ with open(TMP_FILE, "w", 0o644) as f:
+ # If tasks is empty, just create an empty tmp file
+ if len(tasks) != 0:
+ for key in sorted(tasks):
+ jobname = key
+ schedule = tasks[key].split(":")[0]
+ volname = tasks[key].split(":")[1]
+ f.write("%s root PATH=$PATH:/usr/local/sbin:/usr/sbin "
+ "gcron.py %s %s\n" % (schedule, volname, jobname))
+ f.write("\n")
+ f.flush()
+ os.fsync(f.fileno())
+ f.close()
+ except IOError as e:
+ log.error("Failed to open %s. Error: %s.", TMP_FILE, e)
+ ret = INTERNAL_ERROR
+ return ret
+
+ shutil.move(TMP_FILE, GCRON_ENABLED)
+ ret = 0
+
+ return ret
+
+def update_current_scheduler(data):
+ try:
+ with open(TMP_FILE, "w", 0o644) as f:
+ f.write("%s" % data)
+ f.flush()
+ os.fsync(f.fileno())
+ f.close()
+ except IOError as e:
+ log.error("Failed to open %s. Error: %s.", TMP_FILE, e)
+ ret = INTERNAL_ERROR
+ return ret
+
+ shutil.move(TMP_FILE, CURRENT_SCHEDULER)
+ ret = 0
+
+ return ret
+
+
+def isVolumePresent(volname):
+ success = False
+ if volname == "":
+ log.debug("No volname given")
+ return success
+
+ cli = ["gluster",
+ "volume",
+ "info",
+ volname]
+ log.debug("Running command '%s'", " ".join(cli))
+
+ p = subprocess.Popen(cli, stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ out, err = p.communicate()
+ rv = p.returncode
+
+ log.debug("Command '%s' returned '%d'", " ".join(cli), rv)
+
+ if rv:
+ log.error("Command output:")
+ log.error(err)
+ else:
+ success = True;
+
+ return success
+
+
+def add_schedules(jobname, schedule, volname):
+ log.info("Adding snapshot schedules.")
+ ret = load_tasks_from_file()
+ if ret == 0:
+ if jobname in tasks:
+ print_str = ("%s already exists in schedule. Use "
+ "'edit' to modify %s" % (jobname, jobname))
+ log.error(print_str)
+ output(print_str)
+ ret = JOB_ALREADY_EXISTS
+ else:
+ if not isVolumePresent(volname):
+ print_str = ("Volume %s does not exist. Create %s and retry." %
+ (volname, volname))
+ log.error(print_str)
+ output(print_str)
+ ret = VOLUME_DOES_NOT_EXIST
+ else:
+ tasks[jobname] = schedule + ":" + volname
+ ret = write_tasks_to_file()
+ if ret == 0:
+ # Create a LOCK_FILE for the job
+ job_lockfile = LOCK_FILE_DIR + jobname
+ try:
+ f = os.open(job_lockfile, os.O_CREAT | os.O_NONBLOCK,
+ 0o644)
+ os.close(f)
+ except OSError as e:
+ log.error("Failed to open %s. Error: %s.",
+ job_lockfile, e)
+ ret = INTERNAL_ERROR
+ return ret
+ log.info("Successfully added snapshot schedule %s" %
+ jobname)
+ output("Successfully added snapshot schedule")
+ ret = 0
+ else:
+ print_str = "Failed to add snapshot schedule. " \
+ "Error: Failed to load tasks from "+GCRON_ENABLED
+ log.error(print_str)
+ output(print_str)
+
+ return ret
+
+
+def delete_schedules(jobname):
+ log.info("Delete snapshot schedules.")
+ ret = load_tasks_from_file()
+ if ret == 0:
+ if jobname in tasks:
+ del tasks[jobname]
+ ret = write_tasks_to_file()
+ if ret == 0:
+ # Delete the LOCK_FILE for the job
+ job_lockfile = LOCK_FILE_DIR+jobname
+ try:
+ os.remove(job_lockfile)
+ except OSError as e:
+ log.error("Failed to open %s. Error: %s.",
+ job_lockfile, e)
+ ret = INTERNAL_ERROR
+ return ret
+ log.info("Successfully deleted snapshot schedule %s"
+ % jobname)
+ output("Successfully deleted snapshot schedule")
+ ret = 0
+ else:
+ print_str = ("Failed to delete %s. Error: No such "
+ "job scheduled" % jobname)
+ log.error(print_str)
+ output(print_str)
+ ret = JOB_NOT_FOUND
+ else:
+ print_str = "Failed to delete snapshot schedule. " \
+ "Error: Failed to load tasks from "+GCRON_ENABLED
+ log.error(print_str)
+ output(print_str)
+
+ return ret
+
+
+def edit_schedules(jobname, schedule, volname):
+ log.info("Editing snapshot schedules.")
+ ret = load_tasks_from_file()
+ if ret == 0:
+ if jobname in tasks:
+ if not isVolumePresent(volname):
+ print_str = ("Volume %s does not exist. Create %s and retry." %
+ (volname, volname))
+ log.error(print_str)
+ output(print_str)
+ ret = VOLUME_DOES_NOT_EXIST
+ else:
+ tasks[jobname] = schedule+":"+volname
+ ret = write_tasks_to_file()
+ if ret == 0:
+ log.info("Successfully edited snapshot schedule %s" %
+ jobname)
+ output("Successfully edited snapshot schedule")
+ else:
+ print_str = ("Failed to edit %s. Error: No such "
+ "job scheduled" % jobname)
+ log.error(print_str)
+ output(print_str)
+ ret = JOB_NOT_FOUND
+ else:
+ print_str = "Failed to edit snapshot schedule. " \
+ "Error: Failed to load tasks from "+GCRON_ENABLED
+ log.error(print_str)
+ output(print_str)
+
+ return ret
+
+def get_bool_val():
+ getsebool_cli = ["getsebool",
+ "-a"]
+ p1 = subprocess.Popen(getsebool_cli, stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+
+ grep_cmd = ["grep",
+ "cron_system_cronjob_use_shares"]
+ p2 = subprocess.Popen(grep_cmd, stdin=p1.stdout,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+
+ p1.stdout.close()
+ output, err = p2.communicate()
+ rv = p2.returncode
+
+ if rv:
+ log.error("Command output:")
+ log.error(err)
+ return -1
+
+ bool_val = output.split()[2]
+ log.debug("Bool value = '%s'", bool_val)
+
+ return bool_val
+
+def get_selinux_status():
+ getenforce_cli = ["getenforce"]
+ log.debug("Running command '%s'", " ".join(getenforce_cli))
+
+ try:
+ p1 = subprocess.Popen(getenforce_cli, stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ except OSError as oserr:
+ log.error("Failed to run the command \"getenforce\". Error: %s" %\
+ oserr)
+ return -1
+
+ output, err = p1.communicate()
+ rv = p1.returncode
+
+ if rv:
+ log.error("Command output:")
+ log.error(err)
+ return -1
+ else:
+ selinux_status=output.rstrip()
+ log.debug("selinux status: %s", selinux_status)
+
+ return selinux_status
+
+def set_cronjob_user_share():
+ selinux_status = get_selinux_status()
+ if (selinux_status == -1):
+ log.error("Failed to get selinux status")
+ return -1
+ elif (selinux_status == "Disabled"):
+ return 0
+
+ bool_val = get_bool_val()
+ # In case of a failure (where the boolean value is not)
+ # present in the system, we should not proceed further
+ # We should only proceed when the value is "off"
+ if (bool_val == -1 or bool_val != "off"):
+ return 0
+
+ setsebool_cli = ["setsebool", "-P",
+ "cron_system_cronjob_use_shares",
+ "on"]
+ log.debug("Running command '%s'", " ".join(setsebool_cli))
+
+ p1 = subprocess.Popen(setsebool_cli, stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+
+ output, err = p1.communicate()
+ rv = p1.returncode
+
+ if rv:
+ log.error("Command output:")
+ log.error(err)
+ return rv
+
+ bool_val = get_bool_val()
+ if (bool_val == "on"):
+ return 0
+ else:
+ # In case of an error or if boolean is not on
+ # we return a failure here
+ return -1
+
+def initialise_scheduler():
+ ret = set_cronjob_user_share()
+ if ret:
+ log.error("Failed to set selinux boolean "
+ "cron_system_cronjob_use_shares to 'on'")
+ return ret
+
+ try:
+ with open(TMP_FILE, "w+", 0o644) as f:
+ updater = ("* * * * * root PATH=$PATH:/usr/local/sbin:"
+ "/usr/sbin gcron.py --update\n")
+ f.write("%s\n" % updater)
+ f.flush()
+ os.fsync(f.fileno())
+ f.close()
+ except IOError as e:
+ log.error("Failed to open %s. Error: %s.", TMP_FILE, e)
+ ret = INIT_FAILED
+ return ret
+
+ shutil.move(TMP_FILE, GCRON_UPDATE_TASK)
+
+ if not os.path.lexists(GCRON_TASKS):
+ try:
+ f = open(GCRON_TASKS, "w", 0o644)
+ f.close()
+ except IOError as e:
+ log.error("Failed to open %s. Error: %s.", GCRON_TASKS, e)
+ ret = INIT_FAILED
+ return ret
+
+ if os.path.lexists(GCRON_CROND_TASK):
+ os.remove(GCRON_CROND_TASK)
+
+ os.symlink(GCRON_TASKS, GCRON_CROND_TASK)
+
+ log.info("Successfully initialised snapshot scheduler for this node")
+ output("Successfully initialised snapshot scheduler for this node")
+ gf_event (EVENT_SNAPSHOT_SCHEDULER_INITIALISED, status="Success")
+
+ ret = 0
+ return ret
+
+
+def syntax_checker(args):
+ if hasattr(args, 'jobname'):
+ if (len(args.jobname.split()) != 1):
+ output("Invalid Jobname. Jobname should not be empty and should not contain \" \" character.")
+ ret = INVALID_JOBNAME
+ return ret
+ args.jobname=args.jobname.strip()
+
+ if hasattr(args, 'volname'):
+ if (len(args.volname.split()) != 1):
+ output("Invalid Volname. Volname should not be empty and should not contain \" \" character.")
+ ret = INVALID_VOLNAME
+ return ret
+ args.volname=args.volname.strip()
+
+ if hasattr(args, 'schedule'):
+ if (len(args.schedule.split()) != 5):
+ output("Invalid Schedule. Please refer to the following for adding a valid cron schedule")
+ print ("* * * * *")
+ print ("| | | | |")
+ print ("| | | | +---- Day of the Week (range: 1-7, 1 standing for Monday)")
+ print ("| | | +------ Month of the Year (range: 1-12)")
+ print ("| | +-------- Day of the Month (range: 1-31)")
+ print ("| +---------- Hour (range: 0-23)")
+ print ("+------------ Minute (range: 0-59)")
+ ret = INVALID_SCHEDULE
+ return ret
+
+ ret = 0
+ return ret
+
+
+def perform_operation(args):
+ if not os.path.exists(CURRENT_SCHEDULER):
+ update_current_scheduler("none")
+
+ # Initialise snapshot scheduler on local node
+ if args.action == "init":
+ ret = initialise_scheduler()
+ if ret != 0:
+ output("Failed to initialise snapshot scheduling")
+ gf_event (EVENT_SNAPSHOT_SCHEDULER_INIT_FAILED,
+ error=print_error(ret))
+ return ret
+
+ # Disable snapshot scheduler
+ if args.action == "disable_force":
+ ret = disable_scheduler()
+ if ret == 0:
+ subprocess.Popen(["touch", "-h", GCRON_TASKS])
+ gf_event (EVENT_SNAPSHOT_SCHEDULER_DISABLED,
+ status="Successfully Disabled")
+ else:
+ gf_event (EVENT_SNAPSHOT_SCHEDULER_DISABLE_FAILED,
+ error=print_error(ret))
+ return ret
+
+ # Check if the symlink to GCRON_TASKS is properly set in the shared storage
+ if (not os.path.lexists(GCRON_UPDATE_TASK) or
+ not os.path.lexists(GCRON_CROND_TASK) or
+ os.readlink(GCRON_CROND_TASK) != GCRON_TASKS):
+ print_str = ("Please run 'snap_scheduler.py' init to initialise "
+ "the snap scheduler for the local node.")
+ log.error(print_str)
+ output(print_str)
+ ret = NODE_NOT_INITIALISED
+ return ret
+
+ # Check status of snapshot scheduler.
+ if args.action == "status":
+ ret = scheduler_status()
+ if ret == 0:
+ if scheduler_enabled:
+ output("Snapshot scheduling status: Enabled")
+ else:
+ output("Snapshot scheduling status: Disabled")
+ else:
+ output("Failed to check status of snapshot scheduler")
+ return ret
+
+ # Enable snapshot scheduler
+ if args.action == "enable":
+ ret = enable_scheduler()
+ if ret == 0:
+ subprocess.Popen(["touch", "-h", GCRON_TASKS])
+ gf_event (EVENT_SNAPSHOT_SCHEDULER_ENABLED,
+ status="Successfully Enabled")
+ else:
+ gf_event (EVENT_SNAPSHOT_SCHEDULER_ENABLE_FAILED,
+ error=print_error(ret))
+ return ret
+
+ # Disable snapshot scheduler
+ if args.action == "disable":
+ ret = disable_scheduler()
+ if ret == 0:
+ subprocess.Popen(["touch", "-h", GCRON_TASKS])
+ gf_event (EVENT_SNAPSHOT_SCHEDULER_DISABLED,
+ status="Successfully Disabled")
+ else:
+ gf_event (EVENT_SNAPSHOT_SCHEDULER_DISABLE_FAILED,
+ error=print_error(ret))
+ return ret
+
+ # List snapshot schedules
+ if args.action == "list":
+ ret = list_schedules()
+ return ret
+
+ # Add snapshot schedules
+ if args.action == "add":
+ ret = syntax_checker(args)
+ if ret != 0:
+ return ret
+ ret = add_schedules(args.jobname, args.schedule, args.volname)
+ if ret == 0:
+ subprocess.Popen(["touch", "-h", GCRON_TASKS])
+ gf_event (EVENT_SNAPSHOT_SCHEDULER_SCHEDULE_ADDED,
+ status="Successfully added job "+args.jobname)
+ else:
+ gf_event (EVENT_SNAPSHOT_SCHEDULER_SCHEDULE_ADD_FAILED,
+ status="Failed to add job "+args.jobname,
+ error=print_error(ret))
+ return ret
+
+ # Delete snapshot schedules
+ if args.action == "delete":
+ ret = syntax_checker(args)
+ if ret != 0:
+ return ret
+ ret = delete_schedules(args.jobname)
+ if ret == 0:
+ subprocess.Popen(["touch", "-h", GCRON_TASKS])
+ gf_event (EVENT_SNAPSHOT_SCHEDULER_SCHEDULE_DELETED,
+ status="Successfully deleted job "+args.jobname)
+ else:
+ gf_event (EVENT_SNAPSHOT_SCHEDULER_SCHEDULE_DELETE_FAILED,
+ status="Failed to delete job "+args.jobname,
+ error=print_error(ret))
+ return ret
+
+ # Edit snapshot schedules
+ if args.action == "edit":
+ ret = syntax_checker(args)
+ if ret != 0:
+ return ret
+ ret = edit_schedules(args.jobname, args.schedule, args.volname)
+ if ret == 0:
+ subprocess.Popen(["touch", "-h", GCRON_TASKS])
+ gf_event (EVENT_SNAPSHOT_SCHEDULER_SCHEDULE_EDITED,
+ status="Successfully edited job "+args.jobname)
+ else:
+ gf_event (EVENT_SNAPSHOT_SCHEDULER_SCHEDULE_EDIT_FAILED,
+ status="Failed to edit job "+args.jobname,
+ error=print_error(ret))
+ return ret
+
+ ret = INVALID_ARG
+ return ret
+
+def gf_event(event_type, **kwargs):
+ if EVENTS_ENABLED:
+ from events.gf_event import gf_event as gfevent
+ gfevent(event_type, **kwargs)
+
+
+def main(argv):
+ initLogger()
+ ret = -1
+ parser = argparse.ArgumentParser()
+ subparsers = parser.add_subparsers(dest="action",
+ metavar=('{init, status, enable,'
+ ' disable, list, add,'
+ ' delete, edit}'))
+ subparsers.add_parser('init',
+ help="Initialise the node for snapshot scheduling")
+
+ subparsers.add_parser("status",
+ help="Check if snapshot scheduling is "
+ "enabled or disabled")
+ subparsers.add_parser("enable",
+ help="Enable snapshot scheduling")
+ subparsers.add_parser("disable",
+ help="Disable snapshot scheduling")
+ subparsers.add_parser("disable_force")
+ subparsers.add_parser("list",
+ help="List snapshot schedules")
+ parser_add = subparsers.add_parser("add",
+ help="Add snapshot schedules")
+ parser_add.add_argument("jobname", help="Job Name")
+ parser_add.add_argument("schedule", help="Schedule")
+ parser_add.add_argument("volname", help="Volume Name")
+
+ parser_delete = subparsers.add_parser("delete",
+ help="Delete snapshot schedules")
+ parser_delete.add_argument("jobname", help="Job Name")
+ parser_edit = subparsers.add_parser("edit",
+ help="Edit snapshot schedules")
+ parser_edit.add_argument("jobname", help="Job Name")
+ parser_edit.add_argument("schedule", help="Schedule")
+ parser_edit.add_argument("volname", help="Volume Name")
+
+ args = parser.parse_args(argv)
+
+ if not os.path.exists(SHARED_STORAGE_DIR):
+ output("Failed: "+SHARED_STORAGE_DIR+" does not exist.")
+ return SHARED_STORAGE_DIR_DOESNT_EXIST
+
+ if not os.path.ismount(SHARED_STORAGE_DIR):
+ output("Failed: Shared storage is not mounted at "+SHARED_STORAGE_DIR)
+ return SHARED_STORAGE_NOT_MOUNTED
+
+ if not os.path.exists(SHARED_STORAGE_DIR+"/snaps/"):
+ try:
+ os.makedirs(SHARED_STORAGE_DIR+"/snaps/")
+ except OSError as e:
+ if errno != EEXIST:
+ log.error("Failed to create %s : %s", SHARED_STORAGE_DIR+"/snaps/", e)
+ output("Failed to create %s. Error: %s"
+ % (SHARED_STORAGE_DIR+"/snaps/", e))
+ return INTERNAL_ERROR
+
+ if not os.path.exists(GCRON_ENABLED):
+ f = os.open(GCRON_ENABLED, os.O_CREAT | os.O_NONBLOCK, 0o644)
+ os.close(f)
+
+ if not os.path.exists(LOCK_FILE_DIR):
+ try:
+ os.makedirs(LOCK_FILE_DIR)
+ except OSError as e:
+ if errno != EEXIST:
+ log.error("Failed to create %s : %s", LOCK_FILE_DIR, e)
+ output("Failed to create %s. Error: %s"
+ % (LOCK_FILE_DIR, e))
+ return INTERNAL_ERROR
+
+ try:
+ f = os.open(LOCK_FILE, os.O_CREAT | os.O_RDWR | os.O_NONBLOCK, 0o644)
+ try:
+ fcntl.flock(f, fcntl.LOCK_EX | fcntl.LOCK_NB)
+ ret = perform_operation(args)
+ fcntl.flock(f, fcntl.LOCK_UN)
+ except IOError:
+ log.info("%s is being processed by another agent.", LOCK_FILE)
+ output("Another snap_scheduler command is running. "
+ "Please try again after some time.")
+ return ANOTHER_TRANSACTION_IN_PROGRESS
+ os.close(f)
+ except OSError as e:
+ log.error("Failed to open %s : %s", LOCK_FILE, e)
+ output("Failed to open %s. Error: %s" % (LOCK_FILE, e))
+ return INTERNAL_ERROR
+
+ return ret
+
+
+if __name__ == "__main__":
+ sys.exit(main(sys.argv[1:]))
diff --git a/extras/statedumpparse.rb b/extras/statedumpparse.rb
new file mode 100755
index 00000000000..1aff43377db
--- /dev/null
+++ b/extras/statedumpparse.rb
@@ -0,0 +1,208 @@
+#!/usr/bin/env ruby
+
+require 'time'
+require 'optparse'
+
+unless Array.instance_methods.include? :to_h
+ class Array
+ def to_h
+ h = {}
+ each { |k,v| h[k]=v }
+ h
+ end
+ end
+end
+
+# statedump.c:gf_proc_dump_mempool_info uses a five-dash record separator,
+# client.c:client_fd_lk_ctx_dump uses a six-dash record separator.
+ARRSEP = /^(-{5,6}=-{5,6})?$/
+HEAD = /^\[(.*)\]$/
+INPUT_FORMATS = %w[statedump json]
+
+format = 'json'
+input_format = 'statedump'
+tz = '+0000'
+memstat_select,memstat_reject = //,/\Z./
+OptionParser.new do |op|
+ op.banner << " [<] <STATEDUMP>"
+ op.on("-f", "--format=F", "json/yaml/memstat(-[plain|human|json])") { |s| format = s }
+ op.on("--input-format=F", INPUT_FORMATS.join(?/)) { |s| input_format = s }
+ op.on("--timezone=T",
+ "time zone to apply to zoneless timestamps [default UTC]") { |s| tz = s }
+ op.on("--memstat-select=RX", "memstat: select memory types matching RX") { |s|
+ memstat_select = Regexp.new s
+ }
+ op.on("--memstat-reject=RX", "memstat: reject memory types matching RX") { |s|
+ memstat_reject = Regexp.new s
+ }
+end.parse!
+
+
+if format =~ /\Amemstat(?:-(.*))?/
+ memstat_type = $1 || 'plain'
+ unless %w[plain human json].include? memstat_type
+ raise "unknown memstat type #{memstat_type.dump}"
+ end
+ format = 'memstat'
+end
+
+repr, logsep = case format
+when 'yaml'
+ require 'yaml'
+
+ [proc { |e| e.to_yaml }, "\n"]
+when 'json', 'memstat'
+ require 'json'
+
+ [proc { |e| e.to_json }, " "]
+else
+ raise "unkonwn format '#{format}'"
+end
+formatter = proc { |e| puts repr.call(e) }
+
+INPUT_FORMATS.include? input_format or raise "unkwown input format '#{input_format}'"
+
+dumpinfo = {}
+
+# parse a statedump entry
+elem_cbk = proc { |s,&cbk|
+ arraylike = false
+ s.grep(/\S/).empty? and next
+ head = nil
+ while s.last =~ /^\s*$/
+ s.pop
+ end
+ body = catch { |misc2|
+ s[0] =~ HEAD ? (head = $1) : (throw misc2)
+ body = [[]]
+ s[1..-1].each { |l|
+ if l =~ ARRSEP
+ arraylike = true
+ body << []
+ next
+ end
+ body.last << l
+ }
+
+ body.reject(&:empty?).map { |e|
+ ea = e.map { |l|
+ k,v = l.split("=",2)
+ m = /\A(0|-?[1-9]\d*)(\.\d+)?\Z/.match v
+ [k, m ? (m[2] ? Float(v) : Integer(v)) : v]
+ }
+ begin
+ ea.to_h
+ rescue
+ throw misc2
+ end
+ }
+ }
+
+ if body
+ cbk.call [head, arraylike ? body : (body.empty? ? {} : body[0])]
+ else
+ STDERR.puts ["WARNING: failed to parse record:", repr.call(s)].join(logsep)
+ end
+}
+
+# aggregator routine
+aggr = case format
+when 'memstat'
+ meminfo = {}
+ # commit memory-related entries to meminfo
+ proc { |k,r|
+ case k
+ when /memusage/
+ (meminfo["GF_MALLOC"]||={})[k] ||= r["size"] if k =~ memstat_select and k !~ memstat_reject
+ when "mempool"
+ r.each {|e|
+ kk = "mempool:#{e['pool-name']}"
+ (meminfo["mempool"]||={})[kk] ||= e["size"] if kk =~ memstat_select and kk !~ memstat_reject
+ }
+ end
+ }
+else
+ # just format data, don't actually aggregate anything
+ proc { |pair| formatter.call pair }
+end
+
+# processing the data
+case input_format
+when 'statedump'
+ acc = []
+ $<.each { |l|
+ l = l.strip
+ if l =~ /^(DUMP-(?:START|END)-TIME):\s+(.*)/
+ dumpinfo["_meta"]||={}
+ (dumpinfo["_meta"]["date"]||={})[$1] = Time.parse([$2, tz].join " ")
+ next
+ end
+
+ if l =~ HEAD
+ elem_cbk.call(acc, &aggr)
+ acc = [l]
+ next
+ end
+
+ acc << l
+ }
+ elem_cbk.call(acc, &aggr)
+when 'json'
+ $<.each { |l|
+ r = JSON.load l
+ case r
+ when Array
+ aggr[r]
+ when Hash
+ dumpinfo.merge! r
+ end
+ }
+end
+
+# final actions: output aggregated data
+case format
+when 'memstat'
+ ma = meminfo.values.map(&:to_a).inject(:+)
+ totals = meminfo.map { |coll,h| [coll, h.values.inject(:+)] }.to_h
+ tt = ma.transpose[1].inject(:+)
+
+ summary_sep,showm = case memstat_type
+ when 'json'
+ ["", proc { |k,v| puts({type: k, value: v}.to_json) }]
+ when 'plain', 'human'
+ # human-friendly number representation
+ hr = proc { |n|
+ qa = %w[B kB MB GB]
+ q = ((1...qa.size).find {|i| n < (1 << i*10)} || qa.size) - 1
+ "%.2f%s" % [n.to_f / (1 << q*10), qa[q]]
+ }
+
+ templ = "%{val} %{key}"
+ tft = proc { |t| t }
+ nft = if memstat_type == 'human'
+ nw = [ma.transpose[1], totals.values, tt].flatten.map{|n| hr[n].size}.max
+ proc { |n|
+ hn = hr[n]
+ " " * (nw - hn.size) + hn
+ }
+ else
+ nw = tt.to_s.size
+ proc { |n| "%#{nw}d" % n }
+ end
+ ## Alternative template, key first:
+ # templ = "%{key} %{val}"
+ # tw = ma.transpose[0].map(&:size).max
+ # tft = proc { |t| t + " " * [tw - t.size, 0].max }
+ # nft = (memstat_type == 'human') ? hr : proc { |n| n }
+ ["\n", proc { |k,v| puts templ % {key: tft[k], val: nft[v]} }]
+ else
+ raise 'this should be impossible'
+ end
+
+ ma.sort_by { |k,v| v }.each(&showm)
+ print summary_sep
+ totals.each { |coll,t| showm.call "Total #{coll}", t }
+ showm.call "TOTAL", tt
+else
+ formatter.call dumpinfo
+end
diff --git a/extras/stop-all-gluster-processes.sh b/extras/stop-all-gluster-processes.sh
new file mode 100755
index 00000000000..710aaf5fd3c
--- /dev/null
+++ b/extras/stop-all-gluster-processes.sh
@@ -0,0 +1,193 @@
+#!/bin/bash
+#
+# Kill all the processes/services except glusterd
+#
+# Usage: ./extras/stop-all-gluster-processes.sh [-g] [-h]
+# options:
+# -g Terminate in graceful mode
+# -h Show this message, then exit
+#
+# eg:
+# 1. ./extras/stop-all-gluster-processes.sh
+# 2. ./extras/stop-all-gluster-processes.sh -g
+#
+# By default, this script executes in force mode, i.e. all of brick, gsyncd
+# and other glustershd services/processes are killed without checking for
+# ongoing tasks such as geo-rep, self-heal, rebalance and etc. which may lead
+# to inconsistency after the node is brought back.
+#
+# On specifying '-g' option this script works in graceful mode, to maintain
+# data consistency the script fails with a valid exit code incase if any of
+# the gluster processes are busy in doing their jobs.
+#
+# The author of page [1] proposes user-defined exit codes to the range 64 - 113
+# Find the better explanation behind the choice in the link
+#
+# The exit code returned by stop-all-gluster-processes.sh:
+# 0 No errors/Success
+# 64 Rebalance is in progress
+# 65 Self-Heal is in progress
+# 66 Tier daemon running on this node
+# 127 option not found
+#
+# [1] http://www.tldp.org/LDP/abs/html/exitcodes.html
+
+
+# global
+errors=0
+
+# find the mounts and return their pids
+get_mount_pids()
+{
+ local opts
+ local pid
+
+ for opts in $(grep -w fuse.glusterfs /proc/mounts| awk '{print $1":/"$2}');
+ do
+ IFS=' ' read -r -a volinfo <<< $(echo "${opts}" | sed 's/:\// /g')
+ pid+="$(ps -Ao pid,args | grep -w "volfile-server=${volinfo[0]}" |
+ grep -w "volfile-id=/${volinfo[1]}" | grep -w "${volinfo[2]}" |
+ awk '{print $1}') "
+ done
+ echo "${pid}"
+}
+
+# handle mount processes i.e. 'glusterfs'
+kill_mounts()
+{
+ local signal=${1}
+ local pid
+
+ for pid in $(get_mount_pids);
+ do
+ echo "sending SIG${signal} to mount process with pid: ${pid}";
+ kill -${signal} ${pid};
+ done
+}
+
+# handle brick processes and node services
+kill_bricks_and_services()
+{
+ local signal=${1}
+ local pidfile
+ local pid
+
+ for pidfile in $(find /var/run/gluster/ -name '*.pid');
+ do
+ local pid=$(cat ${pidfile});
+ echo "sending SIG${signal} to pid: ${pid}";
+ kill -${signal} ${pid};
+ done
+}
+
+# for geo-replication, only 'monitor' has pid file written, other
+# processes are not having a pid file, so get it through 'ps' and
+# handle these processes
+kill_georep_gsync()
+{
+ local signal=${1}
+
+ # FIXME: add strick/better check
+ local gsyncpid=$(ps -Ao pid,args | grep gluster | grep gsync |
+ awk '{print $1}');
+ if [ -n "${gsyncpid}" ]
+ then
+ echo "sending SIG${signal} to geo-rep gsync process ${gsyncpid}";
+ kill -${signal} ${gsyncpid} || errors=$((${errors} + 1));
+ fi
+}
+
+# check if all processes are ready to die
+check_background_tasks()
+{
+ volumes=$(gluster vol list)
+ quit=0
+ for volname in ${volumes};
+ do
+ # tiering
+ if [[ $(gluster volume tier ${volname} status 2> /dev/null |
+ grep "localhost" | grep -c "in progress") -gt 0 ]]
+ then
+ quit=66
+ break;
+ fi
+
+ # rebalance
+ if [[ $(gluster volume rebalance ${volname} status 2> /dev/null |
+ grep -c "in progress") -gt 0 ]]
+ then
+ quit=64
+ break;
+ fi
+
+ # self heal
+ if [[ $(gluster volume heal ${volname} info | grep "Number of entries" |
+ awk '{ sum+=$4} END {print sum}') -gt 0 ]];
+ then
+ quit=65
+ break;
+ fi
+
+ # geo-rep, snapshot and quota doesn't need grace checks,
+ # as they ensures the consistancy on force kills
+ done
+
+ echo ${quit}
+}
+
+usage()
+{
+ cat <<EOM
+Usage: $0 [-g] [-h]
+ options:
+ -g Terminate in graceful mode
+ -h Show this message, then exit
+
+eg:
+ 1. $0
+ 2. $0 -g
+EOM
+}
+
+main()
+{
+ while getopts "gh" opt; do
+ case $opt in
+ g)
+ # graceful mode
+ quit=$(check_background_tasks)
+ if [[ ${quit} -ne 0 ]]
+ then
+ exit ${quit};
+ fi
+ # else safe to kill
+ ;;
+ h)
+ usage
+ exit 0;
+ ;;
+ *)
+ usage
+ exit 127;
+ ;;
+ esac
+ done
+ # remove all the options that have been parsed by getopts
+ shift $((OPTIND-1))
+
+ kill_mounts TERM
+ kill_georep_gsync TERM
+ kill_bricks_and_services TERM
+
+ sleep 5;
+ echo ""
+
+ # still not Terminated? let's pass SIGKILL
+ kill_mounts KILL
+ kill_georep_gsync KILL
+ kill_bricks_and_services KILL
+
+ exit ${errors};
+}
+
+main "$@"
diff --git a/extras/stripe-merge.c b/extras/stripe-merge.c
index 32768badd36..e013a6e6e8a 100644
--- a/extras/stripe-merge.c
+++ b/extras/stripe-merge.c
@@ -28,7 +28,7 @@
#include <stdint.h>
#include <errno.h>
#include <string.h>
-#include <attr/xattr.h>
+#include <sys/xattr.h>
#include <fnmatch.h>
#define ATTRNAME_STRIPE_INDEX "trusted.*.stripe-index"
@@ -40,33 +40,33 @@
#define INVALID_MODE UINT32_MAX
struct file_stripe_info {
- int stripe_count;
- int stripe_size;
- int coalesce;
- mode_t mode;
- int fd[0];
+ int stripe_count;
+ int stripe_size;
+ int coalesce;
+ mode_t mode;
+ int fd[0];
};
-static int close_files(struct file_stripe_info *);
+static int
+close_files(struct file_stripe_info *);
-static struct
-file_stripe_info *alloc_file_stripe_info(int count)
+static struct file_stripe_info *
+alloc_file_stripe_info(int count)
{
- int i;
- struct file_stripe_info *finfo;
+ int i;
+ struct file_stripe_info *finfo;
- finfo = calloc(1, sizeof(struct file_stripe_info) +
- (sizeof(int) * count));
- if (!finfo)
- return NULL;
+ finfo = calloc(1, sizeof(struct file_stripe_info) + (sizeof(int) * count));
+ if (!finfo)
+ return NULL;
- for (i = 0; i < count; i++)
- finfo->fd[i] = INVALID_FD;
+ for (i = 0; i < count; i++)
+ finfo->fd[i] = INVALID_FD;
- finfo->mode = INVALID_MODE;
- finfo->coalesce = INVALID_FD;
+ finfo->mode = INVALID_MODE;
+ finfo->coalesce = INVALID_FD;
- return finfo;
+ return finfo;
}
/*
@@ -77,39 +77,39 @@ file_stripe_info *alloc_file_stripe_info(int count)
static int
get_stripe_attr_name(const char *path, const char *pattern, char **attrname)
{
- char attrbuf[4096];
- char *ptr, *match = NULL;
- int len, r, match_count = 0;
-
- if (!path || !pattern || !attrname)
- return -1;
-
- len = listxattr(path, attrbuf, sizeof(attrbuf));
- if (len < 0)
- return len;
-
- ptr = attrbuf;
- while (ptr) {
- r = fnmatch(pattern, ptr, 0);
- if (!r) {
- if (!match)
- match = ptr;
- match_count++;
- } else if (r != FNM_NOMATCH) {
- return -1;
- }
-
- len -= strlen(ptr) + 1;
- if (len > 0)
- ptr += strlen(ptr) + 1;
- else
- ptr = NULL;
- }
-
- if (match)
- *attrname = strdup(match);
-
- return match_count;
+ char attrbuf[4096];
+ char *ptr, *match = NULL;
+ int len, r, match_count = 0;
+
+ if (!path || !pattern || !attrname)
+ return -1;
+
+ len = listxattr(path, attrbuf, sizeof(attrbuf));
+ if (len < 0)
+ return len;
+
+ ptr = attrbuf;
+ while (ptr) {
+ r = fnmatch(pattern, ptr, 0);
+ if (!r) {
+ if (!match)
+ match = ptr;
+ match_count++;
+ } else if (r != FNM_NOMATCH) {
+ return -1;
+ }
+
+ len -= strlen(ptr) + 1;
+ if (len > 0)
+ ptr += strlen(ptr) + 1;
+ else
+ ptr = NULL;
+ }
+
+ if (match)
+ *attrname = strdup(match);
+
+ return match_count;
}
/*
@@ -118,19 +118,19 @@ get_stripe_attr_name(const char *path, const char *pattern, char **attrname)
static int
get_stripe_attr_val(const char *path, const char *attr, int *val)
{
- char attrbuf[4096];
- int len;
+ char attrbuf[4096];
+ int len;
- if (!path || !attr || !val)
- return -1;
+ if (!path || !attr || !val)
+ return -1;
- len = getxattr(path, attr, attrbuf, sizeof(attrbuf));
- if (len < 0)
- return len;
+ len = getxattr(path, attr, attrbuf, sizeof(attrbuf));
+ if (len < 0)
+ return len;
- *val = atoi(attrbuf);
+ *val = atoi(attrbuf);
- return 0;
+ return 0;
}
/*
@@ -145,29 +145,31 @@ get_stripe_attr_val(const char *path, const char *attr, int *val)
static int
get_attr(const char *path, const char *pattern, char **buf, int *val)
{
- int count = 1;
-
- if (!buf)
- return -1;
-
- if (!*buf) {
- count = get_stripe_attr_name(path, pattern, buf);
- if (count > 1) {
- /* pattern isn't good enough */
- fprintf(stderr, "ERROR: duplicate attributes found "
- "matching pattern: %s\n", pattern);
- free(*buf);
- *buf = NULL;
- return count;
- } else if (count < 1) {
- return count;
- }
- }
-
- if (get_stripe_attr_val(path, *buf, val) < 0)
- return -1;
-
- return count;
+ int count = 1;
+
+ if (!buf)
+ return -1;
+
+ if (!*buf) {
+ count = get_stripe_attr_name(path, pattern, buf);
+ if (count > 1) {
+ /* pattern isn't good enough */
+ fprintf(stderr,
+ "ERROR: duplicate attributes found "
+ "matching pattern: %s\n",
+ pattern);
+ free(*buf);
+ *buf = NULL;
+ return count;
+ } else if (count < 1) {
+ return count;
+ }
+ }
+
+ if (get_stripe_attr_val(path, *buf, val) < 0)
+ return -1;
+
+ return count;
}
/*
@@ -178,168 +180,168 @@ get_attr(const char *path, const char *pattern, char **buf, int *val)
* print a warning if any files are missing. We proceed without error in the
* latter case to support partial recovery.
*/
-static struct
-file_stripe_info *validate_and_open_files(char *paths[], int count)
+static struct file_stripe_info *
+validate_and_open_files(char *paths[], int count)
{
- int i, val, tmp;
- struct stat sbuf;
- char *stripe_count_attr = NULL;
- char *stripe_size_attr = NULL;
- char *stripe_index_attr = NULL;
- char *stripe_coalesce_attr = NULL;
- struct file_stripe_info *finfo = NULL;
-
- for (i = 0; i < count; i++) {
- if (!paths[i])
- goto err;
-
- /*
- * Check the stripe count first so we can allocate the info
- * struct with the appropriate number of fds.
- */
- if (get_attr(paths[i], ATTRNAME_STRIPE_COUNT,
- &stripe_count_attr, &val) != 1) {
- fprintf(stderr, "ERROR: %s: attribute: '%s'\n",
- paths[i], ATTRNAME_STRIPE_COUNT);
- goto err;
- }
- if (!finfo) {
- finfo = alloc_file_stripe_info(val);
- if (!finfo)
- goto err;
-
- if (val != count)
- fprintf(stderr, "WARNING: %s: stripe-count "
- "(%d) != file count (%d). Result may "
- "be incomplete.\n", paths[i], val,
- count);
-
- finfo->stripe_count = val;
- } else if (val != finfo->stripe_count) {
- fprintf(stderr, "ERROR %s: invalid stripe count: %d "
- "(expected %d)\n", paths[i], val,
- finfo->stripe_count);
- goto err;
- }
-
- /*
- * Get and validate the chunk size.
- */
- if (get_attr(paths[i], ATTRNAME_STRIPE_SIZE, &stripe_size_attr,
- &val) != 1) {
- fprintf(stderr, "ERROR: %s: attribute: '%s'\n",
- paths[i], ATTRNAME_STRIPE_SIZE);
- goto err;
- }
-
- if (!finfo->stripe_size) {
- finfo->stripe_size = val;
- } else if (val != finfo->stripe_size) {
- fprintf(stderr, "ERROR: %s: invalid stripe size: %d "
- "(expected %d)\n", paths[i], val,
- finfo->stripe_size);
- goto err;
- }
-
- /*
- * stripe-coalesce is a backward compatible attribute. If the
- * attribute does not exist, assume a value of zero for the
- * traditional stripe format.
- */
- tmp = get_attr(paths[i], ATTRNAME_STRIPE_COALESCE,
- &stripe_coalesce_attr, &val);
- if (!tmp) {
- val = 0;
- } else if (tmp != 1) {
- fprintf(stderr, "ERROR: %s: attribute: '%s'\n",
- paths[i], ATTRNAME_STRIPE_COALESCE);
- goto err;
- }
-
- if (finfo->coalesce == INVALID_FD) {
- finfo->coalesce = val;
- } else if (val != finfo->coalesce) {
- fprintf(stderr, "ERROR: %s: invalid coalesce flag\n",
- paths[i]);
- goto err;
- }
-
- /*
- * Get/validate the stripe index and open the file in the
- * appropriate fd slot.
- */
- if (get_attr(paths[i], ATTRNAME_STRIPE_INDEX,
- &stripe_index_attr, &val) != 1) {
- fprintf(stderr, "ERROR: %s: attribute: '%s'\n",
- paths[i], ATTRNAME_STRIPE_INDEX);
- goto err;
- }
- if (finfo->fd[val] != INVALID_FD) {
- fprintf(stderr, "ERROR: %s: duplicate stripe index: "
- "%d\n", paths[i], val);
- goto err;
- }
-
- finfo->fd[val] = open(paths[i], O_RDONLY);
- if (finfo->fd[val] < 0)
- goto err;
-
- /*
- * Get the creation mode for the file.
- */
- if (fstat(finfo->fd[val], &sbuf) < 0)
- goto err;
- if (finfo->mode == INVALID_MODE) {
- finfo->mode = sbuf.st_mode;
- } else if (sbuf.st_mode != finfo->mode) {
- fprintf(stderr, "ERROR: %s: invalid mode\n", paths[i]);
- goto err;
- }
- }
-
- free(stripe_count_attr);
- free(stripe_size_attr);
- free(stripe_index_attr);
- free(stripe_coalesce_attr);
-
- return finfo;
+ int i, val, tmp;
+ struct stat sbuf;
+ char *stripe_count_attr = NULL;
+ char *stripe_size_attr = NULL;
+ char *stripe_index_attr = NULL;
+ char *stripe_coalesce_attr = NULL;
+ struct file_stripe_info *finfo = NULL;
+
+ for (i = 0; i < count; i++) {
+ if (!paths[i])
+ goto err;
+
+ /*
+ * Check the stripe count first so we can allocate the info
+ * struct with the appropriate number of fds.
+ */
+ if (get_attr(paths[i], ATTRNAME_STRIPE_COUNT, &stripe_count_attr,
+ &val) != 1) {
+ fprintf(stderr, "ERROR: %s: attribute: '%s'\n", paths[i],
+ ATTRNAME_STRIPE_COUNT);
+ goto err;
+ }
+ if (!finfo) {
+ finfo = alloc_file_stripe_info(val);
+ if (!finfo)
+ goto err;
+
+ if (val != count)
+ fprintf(stderr,
+ "WARNING: %s: stripe-count "
+ "(%d) != file count (%d). Result may "
+ "be incomplete.\n",
+ paths[i], val, count);
+
+ finfo->stripe_count = val;
+ } else if (val != finfo->stripe_count) {
+ fprintf(stderr,
+ "ERROR %s: invalid stripe count: %d "
+ "(expected %d)\n",
+ paths[i], val, finfo->stripe_count);
+ goto err;
+ }
+
+ /*
+ * Get and validate the chunk size.
+ */
+ if (get_attr(paths[i], ATTRNAME_STRIPE_SIZE, &stripe_size_attr, &val) !=
+ 1) {
+ fprintf(stderr, "ERROR: %s: attribute: '%s'\n", paths[i],
+ ATTRNAME_STRIPE_SIZE);
+ goto err;
+ }
+
+ if (!finfo->stripe_size) {
+ finfo->stripe_size = val;
+ } else if (val != finfo->stripe_size) {
+ fprintf(stderr,
+ "ERROR: %s: invalid stripe size: %d "
+ "(expected %d)\n",
+ paths[i], val, finfo->stripe_size);
+ goto err;
+ }
+
+ /*
+ * stripe-coalesce is a backward compatible attribute. If the
+ * attribute does not exist, assume a value of zero for the
+ * traditional stripe format.
+ */
+ tmp = get_attr(paths[i], ATTRNAME_STRIPE_COALESCE,
+ &stripe_coalesce_attr, &val);
+ if (!tmp) {
+ val = 0;
+ } else if (tmp != 1) {
+ fprintf(stderr, "ERROR: %s: attribute: '%s'\n", paths[i],
+ ATTRNAME_STRIPE_COALESCE);
+ goto err;
+ }
+
+ if (finfo->coalesce == INVALID_FD) {
+ finfo->coalesce = val;
+ } else if (val != finfo->coalesce) {
+ fprintf(stderr, "ERROR: %s: invalid coalesce flag\n", paths[i]);
+ goto err;
+ }
+
+ /*
+ * Get/validate the stripe index and open the file in the
+ * appropriate fd slot.
+ */
+ if (get_attr(paths[i], ATTRNAME_STRIPE_INDEX, &stripe_index_attr,
+ &val) != 1) {
+ fprintf(stderr, "ERROR: %s: attribute: '%s'\n", paths[i],
+ ATTRNAME_STRIPE_INDEX);
+ goto err;
+ }
+ if (finfo->fd[val] != INVALID_FD) {
+ fprintf(stderr,
+ "ERROR: %s: duplicate stripe index: "
+ "%d\n",
+ paths[i], val);
+ goto err;
+ }
+
+ finfo->fd[val] = open(paths[i], O_RDONLY);
+ if (finfo->fd[val] < 0)
+ goto err;
+
+ /*
+ * Get the creation mode for the file.
+ */
+ if (fstat(finfo->fd[val], &sbuf) < 0)
+ goto err;
+ if (finfo->mode == INVALID_MODE) {
+ finfo->mode = sbuf.st_mode;
+ } else if (sbuf.st_mode != finfo->mode) {
+ fprintf(stderr, "ERROR: %s: invalid mode\n", paths[i]);
+ goto err;
+ }
+ }
+
+ free(stripe_count_attr);
+ free(stripe_size_attr);
+ free(stripe_index_attr);
+ free(stripe_coalesce_attr);
+
+ return finfo;
err:
- if (stripe_count_attr)
- free(stripe_count_attr);
- if (stripe_size_attr)
- free(stripe_size_attr);
- if (stripe_index_attr)
- free(stripe_index_attr);
- if (stripe_coalesce_attr)
- free(stripe_coalesce_attr);
-
- if (finfo) {
- close_files(finfo);
- free(finfo);
- }
-
- return NULL;
+ free(stripe_count_attr);
+ free(stripe_size_attr);
+ free(stripe_index_attr);
+ free(stripe_coalesce_attr);
+
+ if (finfo) {
+ close_files(finfo);
+ free(finfo);
+ }
+
+ return NULL;
}
static int
close_files(struct file_stripe_info *finfo)
{
- int i, ret;
+ int i, ret;
- if (!finfo)
- return -1;
+ if (!finfo)
+ return -1;
- for (i = 0; i < finfo->stripe_count; i++) {
- if (finfo->fd[i] == INVALID_FD)
- continue;
+ for (i = 0; i < finfo->stripe_count; i++) {
+ if (finfo->fd[i] == INVALID_FD)
+ continue;
- ret = close(finfo->fd[i]);
- if (ret < 0)
- return ret;
- }
+ ret = close(finfo->fd[i]);
+ if (ret < 0)
+ return ret;
+ }
- return ret;
+ return ret;
}
/*
@@ -355,43 +357,43 @@ close_files(struct file_stripe_info *finfo)
static int
generate_file_coalesce(int target, struct file_stripe_info *finfo)
{
- char *buf;
- int ret = 0;
- int r, w, i;
-
- buf = malloc(finfo->stripe_size);
- if (!buf)
- return -1;
-
- i = 0;
- while (1) {
- if (finfo->fd[i] == INVALID_FD) {
- if (lseek(target, finfo->stripe_size, SEEK_CUR) < 0)
- break;
-
- i = (i + 1) % finfo->stripe_count;
- continue;
- }
-
- r = read(finfo->fd[i], buf, finfo->stripe_size);
- if (r < 0) {
- ret = r;
- break;
- }
- if (!r)
- break;
-
- w = write(target, buf, r);
- if (w < 0) {
- ret = w;
- break;
- }
-
- i = (i + 1) % finfo->stripe_count;
- }
-
- free(buf);
- return ret;
+ char *buf;
+ int ret = 0;
+ int r, w, i;
+
+ buf = malloc(finfo->stripe_size);
+ if (!buf)
+ return -1;
+
+ i = 0;
+ while (1) {
+ if (finfo->fd[i] == INVALID_FD) {
+ if (lseek(target, finfo->stripe_size, SEEK_CUR) < 0)
+ break;
+
+ i = (i + 1) % finfo->stripe_count;
+ continue;
+ }
+
+ r = read(finfo->fd[i], buf, finfo->stripe_size);
+ if (r < 0) {
+ ret = r;
+ break;
+ }
+ if (!r)
+ break;
+
+ w = write(target, buf, r);
+ if (w < 0) {
+ ret = w;
+ break;
+ }
+
+ i = (i + 1) % finfo->stripe_count;
+ }
+
+ free(buf);
+ return ret;
}
/*
@@ -402,97 +404,100 @@ generate_file_coalesce(int target, struct file_stripe_info *finfo)
static int
generate_file_traditional(int target, struct file_stripe_info *finfo)
{
- int i, j, max_ret, ret;
- char buf[finfo->stripe_count][4096];
-
- do {
- char newbuf[4096] = {0, };
-
- max_ret = 0;
- for (i = 0; i < finfo->stripe_count; i++) {
- memset(buf[i], 0, 4096);
- ret = read(finfo->fd[i], buf[i], 4096);
- if (ret > max_ret)
- max_ret = ret;
- }
- for (i = 0; i < max_ret; i++)
- for (j = 0; j < finfo->stripe_count; j++)
- newbuf[i] |= buf[j][i];
- write(target, newbuf, max_ret);
- } while (max_ret);
-
- return 0;
+ int i, j, max_ret, ret;
+ char buf[finfo->stripe_count][4096];
+
+ do {
+ char newbuf[4096] = {
+ 0,
+ };
+
+ max_ret = 0;
+ for (i = 0; i < finfo->stripe_count; i++) {
+ memset(buf[i], 0, 4096);
+ ret = read(finfo->fd[i], buf[i], 4096);
+ if (ret > max_ret)
+ max_ret = ret;
+ }
+ for (i = 0; i < max_ret; i++)
+ for (j = 0; j < finfo->stripe_count; j++)
+ newbuf[i] |= buf[j][i];
+ write(target, newbuf, max_ret);
+ } while (max_ret);
+
+ return 0;
}
static int
generate_file(int target, struct file_stripe_info *finfo)
{
- if (finfo->coalesce)
- return generate_file_coalesce(target, finfo);
+ if (finfo->coalesce)
+ return generate_file_coalesce(target, finfo);
- return generate_file_traditional(target, finfo);
+ return generate_file_traditional(target, finfo);
}
static void
usage(char *name)
{
- fprintf(stderr, "Usage: %s [-o <outputfile>] <inputfile1> "
- "<inputfile2> ...\n", name);
+ fprintf(stderr,
+ "Usage: %s [-o <outputfile>] <inputfile1> "
+ "<inputfile2> ...\n",
+ name);
}
int
main(int argc, char *argv[])
{
- int file_count, opt;
- char *opath = NULL;
- int targetfd;
- struct file_stripe_info *finfo;
-
- while ((opt = getopt(argc, argv, "o:")) != -1) {
- switch (opt) {
- case 'o':
- opath = optarg;
- break;
- default:
- usage(argv[0]);
- return -1;
- }
- }
-
- file_count = argc - optind;
-
- if (!opath || !file_count) {
- usage(argv[0]);
- return -1;
- }
-
- finfo = validate_and_open_files(&argv[optind], file_count);
- if (!finfo)
- goto err;
-
- targetfd = open(opath, O_RDWR|O_CREAT, finfo->mode);
- if (targetfd < 0)
- goto err;
-
- if (generate_file(targetfd, finfo) < 0)
- goto err;
-
- if (fsync(targetfd) < 0)
- fprintf(stderr, "ERROR: %s\n", strerror(errno));
- if (close(targetfd) < 0)
- fprintf(stderr, "ERROR: %s\n", strerror(errno));
-
- close_files(finfo);
- free(finfo);
-
- return 0;
+ int file_count, opt;
+ char *opath = NULL;
+ int targetfd;
+ struct file_stripe_info *finfo;
+
+ while ((opt = getopt(argc, argv, "o:")) != -1) {
+ switch (opt) {
+ case 'o':
+ opath = optarg;
+ break;
+ default:
+ usage(argv[0]);
+ return -1;
+ }
+ }
+
+ file_count = argc - optind;
+
+ if (!opath || !file_count) {
+ usage(argv[0]);
+ return -1;
+ }
+
+ finfo = validate_and_open_files(&argv[optind], file_count);
+ if (!finfo)
+ goto err;
+
+ targetfd = open(opath, O_RDWR | O_CREAT, finfo->mode);
+ if (targetfd < 0)
+ goto err;
+
+ if (generate_file(targetfd, finfo) < 0)
+ goto err;
+
+ if (fsync(targetfd) < 0)
+ fprintf(stderr, "ERROR: %s\n", strerror(errno));
+ if (close(targetfd) < 0)
+ fprintf(stderr, "ERROR: %s\n", strerror(errno));
+
+ close_files(finfo);
+ free(finfo);
+
+ return 0;
err:
- if (finfo) {
- close_files(finfo);
- free(finfo);
- }
+ if (finfo) {
+ close_files(finfo);
+ free(finfo);
+ }
- return -1;
+ return -1;
}
-
diff --git a/extras/systemd/Makefile.am b/extras/systemd/Makefile.am
new file mode 100644
index 00000000000..61446a9b84a
--- /dev/null
+++ b/extras/systemd/Makefile.am
@@ -0,0 +1,17 @@
+CLEANFILES = glusterd.service glustereventsd.service glusterfssharedstorage.service gluster-ta-volume.service
+EXTRA_DIST = glusterd.service.in glustereventsd.service.in glusterfssharedstorage.service.in gluster-ta-volume.service.in
+
+if USE_SYSTEMD
+systemd_DATA = gluster-ta-volume.service
+endif
+
+if WITH_SERVER
+if USE_SYSTEMD
+# systemddir is already defined through configure.ac
+systemd_DATA += glusterd.service glusterfssharedstorage.service
+
+if BUILD_EVENTS
+systemd_DATA += glustereventsd.service
+endif
+endif
+endif
diff --git a/extras/systemd/gluster-ta-volume.service.in b/extras/systemd/gluster-ta-volume.service.in
new file mode 100644
index 00000000000..2802bca05bf
--- /dev/null
+++ b/extras/systemd/gluster-ta-volume.service.in
@@ -0,0 +1,13 @@
+[Unit]
+Description=GlusterFS, Thin-arbiter process to maintain quorum for replica volume
+After=network.target
+
+[Service]
+Environment="LOG_LEVEL=WARNING"
+ExecStart=@prefix@/sbin/glusterfsd -N --volfile-id ta -f @GLUSTERD_WORKDIR@/thin-arbiter/thin-arbiter.vol --brick-port 24007 --xlator-option ta-server.transport.socket.listen-port=24007 -LWARNING
+Restart=always
+KillMode=process
+SuccessExitStatus=15
+
+[Install]
+WantedBy=multi-user.target
diff --git a/extras/systemd/glusterd.service.in b/extras/systemd/glusterd.service.in
new file mode 100644
index 00000000000..abb0d82911f
--- /dev/null
+++ b/extras/systemd/glusterd.service.in
@@ -0,0 +1,26 @@
+[Unit]
+Description=GlusterFS, a clustered file-system server
+Documentation=man:glusterd(8)
+StartLimitBurst=6
+StartLimitIntervalSec=3600
+Requires=@RPCBIND_SERVICE@
+After=network.target @RPCBIND_SERVICE@
+Before=network-online.target
+
+[Service]
+Type=forking
+PIDFile=@localstatedir@/run/glusterd.pid
+LimitNOFILE=65536
+Environment="LOG_LEVEL=INFO"
+EnvironmentFile=-@SYSCONF_DIR@/sysconfig/glusterd
+ExecStart=@prefix@/sbin/glusterd -p @localstatedir@/run/glusterd.pid --log-level $LOG_LEVEL $GLUSTERD_OPTIONS
+KillMode=process
+TimeoutSec=300
+SuccessExitStatus=15
+Restart=on-abnormal
+RestartSec=60
+StartLimitBurst=6
+StartLimitInterval=3600
+
+[Install]
+WantedBy=multi-user.target
diff --git a/extras/systemd/glustereventsd.service.in b/extras/systemd/glustereventsd.service.in
new file mode 100644
index 00000000000..f80b78199f6
--- /dev/null
+++ b/extras/systemd/glustereventsd.service.in
@@ -0,0 +1,16 @@
+[Unit]
+Description=Gluster Events Notifier
+After=network.target
+Documentation=man:glustereventsd(8)
+
+
+[Service]
+Environment=PYTHONPATH=@BUILD_PYTHON_SITE_PACKAGES_EXPANDED@:$PYTHONPATH
+Type=simple
+ExecStart=@SBIN_DIR@/glustereventsd --pid-file @localstatedir@/run/glustereventsd.pid
+ExecReload=/bin/kill -SIGUSR2 $MAINPID
+KillMode=control-group
+PIDFile=@localstatedir@/run/glustereventsd.pid
+
+[Install]
+WantedBy=multi-user.target
diff --git a/extras/systemd/glusterfssharedstorage.service.in b/extras/systemd/glusterfssharedstorage.service.in
new file mode 100644
index 00000000000..723ff49afb7
--- /dev/null
+++ b/extras/systemd/glusterfssharedstorage.service.in
@@ -0,0 +1,13 @@
+[Unit]
+Description=Mount glusterfs sharedstorage
+Requires=glusterd.service remote-fs-pre.target local-fs.target
+
+[Service]
+Type=forking
+ExecStart=@GLUSTERFS_LIBEXECDIR@/mount-shared-storage.sh
+Restart=on-failure
+RestartSec=3
+RestartForceExitStatus=1
+
+[Install]
+WantedBy=multi-user.target
diff --git a/extras/test/bug-920583.t b/extras/test/bug-920583.t
new file mode 100755
index 00000000000..eedbb800ac0
--- /dev/null
+++ b/extras/test/bug-920583.t
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+##Copy this file to tests/bugs before running run.sh (cp extras/test/bug-920583.t tests/bugs/)
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+logdir=`gluster --print-logdir`
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST $CLI volume create $V0 replica 2 stripe 2 $H0:$B0/${V0}{1,2,3,4,5,6,7,8};
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+function log-file-name()
+{
+ logfilename=$M0".log"
+ echo ${logfilename:1} | tr / -
+}
+
+log_file=$logdir"/"`log-file-name`
+
+lookup_unhashed_count=`grep "adding option 'lookup-unhashed'" $log_file | wc -l`
+no_child_down_count=`grep "adding option 'assert-no-child-down'" $log_file | wc -l`
+mount -t glusterfs $H0:/$V0 $M0 -o "xlator-option=*dht.assert-no-child-down=yes,xlator-option=*dht.lookup-unhashed=yes"
+touch $M0/file1;
+
+new_lookup_unhashed_count=`grep "adding option 'lookup-unhashed'" $log_file | wc -l`
+new_no_child_down_count=`grep "adding option 'assert-no-child-down'" $log_file | wc -l`
+EXPECT "1" expr $new_lookup_unhashed_count - $lookup_unhashed_count
+EXPECT "1" expr $new_no_child_down_count - $no_child_down_count
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/extras/test/gluster_commands.sh b/extras/test/gluster_commands.sh
index e1e396020af..cb2a55fd5b5 100755
--- a/extras/test/gluster_commands.sh
+++ b/extras/test/gluster_commands.sh
@@ -1,21 +1,13 @@
#!/bin/bash
-# Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
-# This file is part of GlusterFS.
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
+# Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
# This script tests the basics gluster cli commands.
@@ -54,13 +46,13 @@ gluster volume start vol
gluster volume info
sleep 1
mount -t glusterfs `hostname`:vol /mnt/client
-sleep 1
+sleep 1
df -h
echo "adding-brick......."
gluster volume add-brick vol `hostname`:/exports/exp2
gluster volume info
-sleep 1
+sleep 1
umount /mnt/client
mount -t glusterfs `hostname`:vol /mnt/client
df -h
@@ -102,7 +94,7 @@ sleep 1
echo "removing brick......."
gluster --mode=script volume remove-brick vol `hostname`:/exports/exp2
gluster volume info
-sleep 1
+sleep 1
df -h
sleep 1
@@ -127,7 +119,7 @@ sleep 1
echo "starting replicate volume......"
gluster volume start mirror
gluster volume info
-sleep 1
+sleep 1
mount -t glusterfs `hostname`:mirror /mnt/client
sleep 1
df -h
@@ -136,7 +128,7 @@ sleep 1
echo "adding-brick......."
gluster volume add-brick mirror `hostname`:/exports/exp3 `hostname`:/exports/exp4
gluster volume info
-sleep 1
+sleep 1
df -h
sleep 1
@@ -178,14 +170,14 @@ sleep 1
echo "removeing-brick....."
gluster --mode=script volume remove-brick mirror `hostname`:/exports/exp3 `hostname`:/exports/exp4
gluster volume info
-sleep 1
+sleep 1
df -h
sleep 1
echo "stopping replicate volume....."
gluster --mode=script volume stop mirror
gluster volume info
-sleep 1
+sleep 1
umount /mnt/client
df -h
@@ -206,14 +198,14 @@ gluster volume start str
gluster volume info
sleep 1
mount -t glusterfs `hostname`:str /mnt/client
-sleep 1
+sleep 1
df -h
sleep 1
echo "adding brick...."
gluster volume add-brick str `hostname`:/exports/exp3 `hostname`:/exports/exp4
gluster volume info
-sleep 1
+sleep 1
df -h
sleep 1
diff --git a/extras/test/ld-preload-test/README b/extras/test/ld-preload-test/README
index 725b94023c8..df80003844b 100644
--- a/extras/test/ld-preload-test/README
+++ b/extras/test/ld-preload-test/README
@@ -32,9 +32,9 @@ Instructions
$ make
(We've tested the tool on Debian and CentOS. If there are build errors or
-warnings, please do report them to glusterfs-devel@nongnu.org.)
+warnings, please do report them to gluster-devel@gluster.org.)
2. Run the test.
$ ./test-preload.sh > preload.log
-3. Mail the log to glusterfs-devel@nongnu.org.
+3. Mail the log to gluster-devel@gluster.org.
diff --git a/extras/test/ld-preload-test/ld-preload-lib.c b/extras/test/ld-preload-test/ld-preload-lib.c
index e17305a4a1d..d120c053a69 100644
--- a/extras/test/ld-preload-test/ld-preload-lib.c
+++ b/extras/test/ld-preload-test/ld-preload-lib.c
@@ -1,23 +1,13 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
/* LD PRELOAD'able library
* A very simple library that intercepts booster supported system calls
* and prints a log message to stdout.
@@ -44,594 +34,582 @@
#include <fcntl.h>
#include <sys/stat.h>
#include <dirent.h>
-#include <attr/xattr.h>
+#include <sys/xattr.h>
#include <sys/sendfile.h>
/* Err number that is assigned to errno so that test application can
* verify that the function was intercepted correctly.
*/
-#define PRELOAD_ERRNO_VERF 6449
-#define set_errno() (errno = PRELOAD_ERRNO_VERF)
+#define PRELOAD_ERRNO_VERF 6449
+#define set_errno() (errno = PRELOAD_ERRNO_VERF)
-inline void
-intercept (char *call, int tabs)
+void
+intercept(char *call, int tabs)
{
- while (tabs > 0) {
- fprintf (stdout, "\t");
- --tabs;
- }
+ while (tabs > 0) {
+ fprintf(stdout, "\t");
+ --tabs;
+ }
- fprintf (stdout, "Intercepted by %s", call);
+ fprintf(stdout, "Intercepted by %s", call);
}
int
-creat64 (const char *pathname, mode_t mode)
+creat64(const char *pathname, mode_t mode)
{
- intercept ("creat64", 2);
- set_errno ();
- return -1;
+ intercept("creat64", 2);
+ set_errno();
+ return -1;
}
int
-creat (const char *pathname, mode_t mode)
+creat(const char *pathname, mode_t mode)
{
- intercept ("creat", 2);
- set_errno ();
- return -1;
+ intercept("creat", 2);
+ set_errno();
+ return -1;
}
-
int
-close (int fd)
+close(int fd)
{
- intercept ("close", 2);
- set_errno ();
- return -1;
+ intercept("close", 2);
+ set_errno();
+ return -1;
}
int
-open64 (const char *pathname, int flags, ...)
+open64(const char *pathname, int flags, ...)
{
- intercept ("open64", 2);
- set_errno ();
- return -1;
+ intercept("open64", 2);
+ set_errno();
+ return -1;
}
-
int
-open (const char *pathname, int flags, ...)
+open(const char *pathname, int flags, ...)
{
- intercept ("open", 2);
- set_errno ();
- return -1;
+ intercept("open", 2);
+ set_errno();
+ return -1;
}
ssize_t
-read (int fd, void *buf, size_t count)
+read(int fd, void *buf, size_t count)
{
- intercept ("read", 2);
- set_errno ();
- return -1;
+ intercept("read", 2);
+ set_errno();
+ return -1;
}
ssize_t
-readv (int fd, const struct iovec *vector, int count)
+readv(int fd, const struct iovec *vector, int count)
{
- intercept ("readv", 2);
- set_errno ();
- return -1;
+ intercept("readv", 2);
+ set_errno();
+ return -1;
}
ssize_t
-pread (int fd, void *buf, size_t count, unsigned long offset)
+pread(int fd, void *buf, size_t count, unsigned long offset)
{
- intercept ("pread", 2);
- set_errno ();
- return -1;
+ intercept("pread", 2);
+ set_errno();
+ return -1;
}
-
ssize_t
-pread64 (int fd, void *buf, size_t count, uint64_t offset)
+pread64(int fd, void *buf, size_t count, uint64_t offset)
{
- intercept ("pread64", 2);
- set_errno ();
- return -1;
+ intercept("pread64", 2);
+ set_errno();
+ return -1;
}
ssize_t
-write (int fd, const void *buf, size_t count)
+write(int fd, const void *buf, size_t count)
{
- intercept ("write", 2);
- set_errno ();
- return -1;
+ intercept("write", 2);
+ set_errno();
+ return -1;
}
ssize_t
-writev (int fd, const struct iovec *vector, int count)
+writev(int fd, const struct iovec *vector, int count)
{
- intercept ("writev", 2);
- set_errno ();
- return -1;
+ intercept("writev", 2);
+ set_errno();
+ return -1;
}
ssize_t
-pwrite (int fd, const void *buf, size_t count, unsigned long offset)
+pwrite(int fd, const void *buf, size_t count, unsigned long offset)
{
- intercept ("pwrite", 2);
- set_errno ();
- return -1;
+ intercept("pwrite", 2);
+ set_errno();
+ return -1;
}
ssize_t
-pwrite64 (int fd, const void *buf, size_t count, uint64_t offset)
+pwrite64(int fd, const void *buf, size_t count, uint64_t offset)
{
- intercept ("pwrite64", 2);
- set_errno ();
- return -1;
+ intercept("pwrite64", 2);
+ set_errno();
+ return -1;
}
-
off_t
-lseek (int fildes, unsigned long offset, int whence)
+lseek(int fildes, unsigned long offset, int whence)
{
- intercept ("lseek", 2);
- set_errno ();
- return -1;
+ intercept("lseek", 2);
+ set_errno();
+ return -1;
}
off_t
-lseek64 (int fildes, uint64_t offset, int whence)
+lseek64(int fildes, uint64_t offset, int whence)
{
- intercept ("lseek64", 2);
- set_errno ();
- return -1;
+ intercept("lseek64", 2);
+ set_errno();
+ return -1;
}
-
int
-dup (int fd)
+dup(int fd)
{
- intercept ("dup", 2);
- set_errno ();
- return -1;
+ intercept("dup", 2);
+ set_errno();
+ return -1;
}
int
-dup2 (int oldfd, int newfd)
+dup2(int oldfd, int newfd)
{
- intercept ("dup2", 2);
- set_errno ();
- return -1;
+ intercept("dup2", 2);
+ set_errno();
+ return -1;
}
int
-mkdir (const char *pathname, mode_t mode)
+mkdir(const char *pathname, mode_t mode)
{
- intercept ("mkdir", 2);
- set_errno ();
- return -1;
+ intercept("mkdir", 2);
+ set_errno();
+ return -1;
}
int
-rmdir (const char *pathname)
+rmdir(const char *pathname)
{
- intercept ("rmdir", 2);
- set_errno ();
- return -1;
+ intercept("rmdir", 2);
+ set_errno();
+ return -1;
}
int
-chmod (const char *pathname, mode_t mode)
+chmod(const char *pathname, mode_t mode)
{
- intercept ("chmod", 2);
- set_errno ();
- return -1;
+ intercept("chmod", 2);
+ set_errno();
+ return -1;
}
int
-chown (const char *pathname, uid_t owner, gid_t group)
+chown(const char *pathname, uid_t owner, gid_t group)
{
- intercept ("chown", 2);
- set_errno ();
- return -1;
+ intercept("chown", 2);
+ set_errno();
+ return -1;
}
int
-fchmod (int fd, mode_t mode)
+fchmod(int fd, mode_t mode)
{
- intercept ("fchmod", 2);
- set_errno ();
- return -1;
+ intercept("fchmod", 2);
+ set_errno();
+ return -1;
}
int
-fchown (int fd, uid_t uid, gid_t gid)
+fchown(int fd, uid_t uid, gid_t gid)
{
- intercept ("fchown", 2);
- set_errno ();
- return -1;
+ intercept("fchown", 2);
+ set_errno();
+ return -1;
}
-int fsync (int fd)
+int
+fsync(int fd)
{
- intercept ("fsync", 2);
- set_errno ();
- return -1;
+ intercept("fsync", 2);
+ set_errno();
+ return -1;
}
-
int
-ftruncate (int fd, off_t length)
+ftruncate(int fd, off_t length)
{
- intercept ("ftruncate", 1);
- set_errno ();
- return -1;
+ intercept("ftruncate", 1);
+ set_errno();
+ return -1;
}
-
int
-ftruncate64 (int fd, off_t length)
+ftruncate64(int fd, off_t length)
{
- intercept ("ftruncate64", 1);
- set_errno ();
- return -1;
+ intercept("ftruncate64", 1);
+ set_errno();
+ return -1;
}
int
-link (const char *oldpath, const char *newname)
+link(const char *oldpath, const char *newname)
{
- intercept ("link", 2);
- set_errno ();
- return -1;
+ intercept("link", 2);
+ set_errno();
+ return -1;
}
int
-rename (const char *oldpath, const char *newpath)
+rename(const char *oldpath, const char *newpath)
{
- intercept ("rename", 2);
- set_errno ();
- return -1;
+ intercept("rename", 2);
+ set_errno();
+ return -1;
}
int
-utimes (const char *path, const struct timeval times[2])
+utimes(const char *path, const struct timeval times[2])
{
- intercept ("utimes", 2);
- set_errno ();
- return -1;
+ intercept("utimes", 2);
+ set_errno();
+ return -1;
}
int
-utime (const char *path, const struct utimbuf *buf)
+futimes(int fd, const struct timeval times[2])
{
- intercept ("utime", 2);
- set_errno ();
- return -1;
+ intercept("futimes", 2);
+ set_errno();
+ return -1;
}
-
int
-mknod (const char *path, mode_t mode, dev_t dev)
+utime(const char *path, const struct utimbuf *buf)
{
- intercept ("mknod", 2);
- set_errno ();
- return -1;
+ intercept("utime", 2);
+ set_errno();
+ return -1;
}
int
-__xmknod (int ver, const char *path, mode_t mode, dev_t *dev)
+mknod(const char *path, mode_t mode, dev_t dev)
{
- intercept ("__xmknod", 2);
- set_errno ();
- return -1;
+ intercept("mknod", 2);
+ set_errno();
+ return -1;
}
int
-mkfifo (const char *path, mode_t mode)
+__xmknod(int ver, const char *path, mode_t mode, dev_t *dev)
{
- intercept ("mkfifo", 2);
- set_errno ();
- return -1;
+ intercept("__xmknod", 2);
+ set_errno();
+ return -1;
}
int
-unlink (const char *path)
+mkfifo(const char *path, mode_t mode)
{
- intercept ("unlink", 2);
- set_errno ();
- return -1;
+ intercept("mkfifo", 2);
+ set_errno();
+ return -1;
}
-
int
-symlink (const char *oldpath, const char *newpath)
+unlink(const char *path)
{
- intercept ("symlink", 2);
- set_errno ();
- return -1;
+ intercept("unlink", 2);
+ set_errno();
+ return -1;
}
int
-readlink (const char *path, char *buf, size_t bufsize)
+symlink(const char *oldpath, const char *newpath)
{
- intercept ("readlink", 1);
- set_errno ();
- return -1;
+ intercept("symlink", 2);
+ set_errno();
+ return -1;
}
+int
+readlink(const char *path, char *buf, size_t bufsize)
+{
+ intercept("readlink", 1);
+ set_errno();
+ return -1;
+}
char *
-realpath (const char *path, char *resolved)
+realpath(const char *path, char *resolved)
{
- intercept ("realpath", 1);
- set_errno ();
- return NULL;
+ intercept("realpath", 1);
+ set_errno();
+ return NULL;
}
-
DIR *
-opendir (const char *path)
+opendir(const char *path)
{
- intercept ("opendir", 2);
- set_errno ();
- return NULL;
+ intercept("opendir", 2);
+ set_errno();
+ return NULL;
}
-
struct dirent *
-readdir (DIR *dir)
+readdir(DIR *dir)
{
- intercept ("readdir\t", 2);
- set_errno ();
- return NULL;
+ intercept("readdir\t", 2);
+ set_errno();
+ return NULL;
}
struct dirent *
-readdir64 (DIR *dir)
+readdir64(DIR *dir)
{
- intercept ("readdir64", 2);
- set_errno ();
- return NULL;
+ intercept("readdir64", 2);
+ set_errno();
+ return NULL;
}
-
int
-readdir_r (DIR *dir, struct dirent *entry, struct dirent **result)
+readdir_r(DIR *dir, struct dirent *entry, struct dirent **result)
{
- intercept ("readdir_r", 1);
- set_errno ();
- return -1;
+ intercept("readdir_r", 1);
+ set_errno();
+ return -1;
}
int
-readdir64_r (DIR *dir, struct dirent *entry, struct dirent **result)
+readdir64_r(DIR *dir, struct dirent *entry, struct dirent **result)
{
- intercept ("readdir64_r", 1);
- set_errno ();
- return -1;
+ intercept("readdir64_r", 1);
+ set_errno();
+ return -1;
}
-
int
-closedir (DIR *dh)
+closedir(DIR *dh)
{
- intercept ("closedir", 1);
- set_errno ();
- return -1;
+ intercept("closedir", 1);
+ set_errno();
+ return -1;
}
int
-__xstat (int ver, const char *path, struct stat *buf)
+__xstat(int ver, const char *path, struct stat *buf)
{
- intercept ("__xstat\t", 2);
- set_errno ();
- return -1;
+ intercept("__xstat\t", 2);
+ set_errno();
+ return -1;
}
-
int
-__xstat64 (int ver, const char *path, struct stat *buf)
+__xstat64(int ver, const char *path, struct stat *buf)
{
- intercept ("__xstat64", 2);
- set_errno ();
- return -1;
+ intercept("__xstat64", 2);
+ set_errno();
+ return -1;
}
int
-stat (const char *path, struct stat *buf)
+stat(const char *path, struct stat *buf)
{
- intercept ("stat", 2);
- set_errno ();
- return -1;
+ intercept("stat", 2);
+ set_errno();
+ return -1;
}
int
-stat64 (const char *path, struct stat *buf)
+stat64(const char *path, struct stat *buf)
{
- intercept ("stat64", 2);
- set_errno ();
- return -1;
+ intercept("stat64", 2);
+ set_errno();
+ return -1;
}
int
-__fxstat (int ver, int fd, struct stat *buf)
+__fxstat(int ver, int fd, struct stat *buf)
{
- intercept ("__fxstat\t", 2);
- set_errno ();
- return -1;
+ intercept("__fxstat\t", 2);
+ set_errno();
+ return -1;
}
-
int
-__fxstat64 (int ver, int fd, struct stat *buf)
+__fxstat64(int ver, int fd, struct stat *buf)
{
- intercept ("__fxstat64", 2);
- set_errno ();
- return -1;
+ intercept("__fxstat64", 2);
+ set_errno();
+ return -1;
}
int
-fstat (int fd, struct stat *buf)
+fstat(int fd, struct stat *buf)
{
- intercept ("fstat", 2);
- set_errno ();
- return -1;
+ intercept("fstat", 2);
+ set_errno();
+ return -1;
}
int
-fstat64 (int fd , struct stat *buf)
+fstat64(int fd, struct stat *buf)
{
- intercept ("fstat64", 2);
- set_errno ();
- return -1;
+ intercept("fstat64", 2);
+ set_errno();
+ return -1;
}
int
-__lxstat (int ver, const char *path, struct stat *buf)
+__lxstat(int ver, const char *path, struct stat *buf)
{
- intercept ("__lxstat\t", 2);
- set_errno ();
- return -1;
+ intercept("__lxstat\t", 2);
+ set_errno();
+ return -1;
}
int
-__lxstat64 (int ver, const char *path, struct stat *buf)
+__lxstat64(int ver, const char *path, struct stat *buf)
{
- intercept ("__lxstat64", 2);
- set_errno ();
- return -1;
+ intercept("__lxstat64", 2);
+ set_errno();
+ return -1;
}
int
-lstat (const char *path, struct stat *buf)
+lstat(const char *path, struct stat *buf)
{
- intercept ("lstat", 2);
- set_errno ();
- return -1;
+ intercept("lstat", 2);
+ set_errno();
+ return -1;
}
int
-lstat64 (const char *path, struct stat *buf)
+lstat64(const char *path, struct stat *buf)
{
- intercept ("lstat64", 2);
- set_errno ();
- return -1;
+ intercept("lstat64", 2);
+ set_errno();
+ return -1;
}
int
-statfs (const char *path, struct statfs *buf)
+statfs(const char *path, struct statfs *buf)
{
- intercept ("statfs", 2);
- set_errno ();
- return -1;
+ intercept("statfs", 2);
+ set_errno();
+ return -1;
}
-
int
-statfs64 (const char *path, struct statfs *buf)
+statfs64(const char *path, struct statfs *buf)
{
- intercept ("statfs64", 2);
- set_errno ();
- return -1;
+ intercept("statfs64", 2);
+ set_errno();
+ return -1;
}
int
-statvfs (const char *path, struct statvfs *buf)
+statvfs(const char *path, struct statvfs *buf)
{
- intercept ("statvfs\t", 2);
- set_errno ();
- return -1;
+ intercept("statvfs\t", 2);
+ set_errno();
+ return -1;
}
-
int
-statvfs64 (const char *path, struct statvfs *buf)
+statvfs64(const char *path, struct statvfs *buf)
{
- intercept ("statvfs64", 2);
- set_errno ();
- return -1;
+ intercept("statvfs64", 2);
+ set_errno();
+ return -1;
}
ssize_t
-getxattr (const char *path, const char *name, void *value, size_t size)
+getxattr(const char *path, const char *name, void *value, size_t size)
{
- intercept ("getxattr", 1);
- set_errno ();
- return -1;
+ intercept("getxattr", 1);
+ set_errno();
+ return -1;
}
ssize_t
-lgetxattr (const char *path, const char *name, void *value, size_t size)
+lgetxattr(const char *path, const char *name, void *value, size_t size)
{
- intercept ("lgetxattr", 1);
- set_errno ();
- return -1;
+ intercept("lgetxattr", 1);
+ set_errno();
+ return -1;
}
-
int
-remove (const char* path)
+remove(const char *path)
{
- intercept ("remove", 2);
- set_errno ();
- return -1;
+ intercept("remove", 2);
+ set_errno();
+ return -1;
}
int
-lchown (const char *path, uid_t owner, gid_t group)
+lchown(const char *path, uid_t owner, gid_t group)
{
- intercept ("lchown", 2);
- set_errno ();
- return -1;
+ intercept("lchown", 2);
+ set_errno();
+ return -1;
}
void
-rewinddir (DIR *dirp)
+rewinddir(DIR *dirp)
{
- intercept ("rewinddir", 1);
- set_errno ();
- return;
+ intercept("rewinddir", 1);
+ set_errno();
+ return;
}
void
-seekdir (DIR *dirp, off_t offset)
+seekdir(DIR *dirp, off_t offset)
{
- intercept ("seekdir", 2);
- set_errno ();
- return;
+ intercept("seekdir", 2);
+ set_errno();
+ return;
}
off_t
-telldir (DIR *dirp)
+telldir(DIR *dirp)
{
- intercept ("telldir", 2);
- set_errno ();
- return -1;
+ intercept("telldir", 2);
+ set_errno();
+ return -1;
}
ssize_t
-sendfile (int out_fd, int in_fd, off_t *offset, size_t count)
+sendfile(int out_fd, int in_fd, off_t *offset, size_t count)
{
- intercept ("sendfile\t", 1);
- set_errno ();
- return -1;
+ intercept("sendfile\t", 1);
+ set_errno();
+ return -1;
}
ssize_t
-sendfile64 (int out_fd, int in_fd, off_t *offset, size_t count)
+sendfile64(int out_fd, int in_fd, off_t *offset, size_t count)
{
- intercept ("sendfile64", 1);
- set_errno ();
- return -1;
+ intercept("sendfile64", 1);
+ set_errno();
+ return -1;
}
-
int
-fcntl (int fd, int cmd, ...)
+fcntl(int fd, int cmd, ...)
{
- intercept ("fcntl", 2);
- set_errno ();
- return -1;
+ intercept("fcntl", 2);
+ set_errno();
+ return -1;
}
-
diff --git a/extras/test/ld-preload-test/ld-preload-test.c b/extras/test/ld-preload-test/ld-preload-test.c
index 55dd98805b4..54dde8c7d54 100644
--- a/extras/test/ld-preload-test/ld-preload-test.c
+++ b/extras/test/ld-preload-test/ld-preload-test.c
@@ -1,23 +1,13 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
/*
* LD PRELOAD Test Tool
*
@@ -56,322 +46,313 @@
#include <sys/uio.h>
#include <utime.h>
#include <sys/time.h>
-#include <attr/xattr.h>
+#include <sys/xattr.h>
#include <sys/sendfile.h>
-
-#define PRELOAD_ERRNO_VERF 6449
-inline void
+#define PRELOAD_ERRNO_VERF 6449
+void
check_err(int ret, char *call, int tabs)
{
- while (tabs > 0) {
- fprintf (stdout, "\t");
- --tabs;
- }
- if (ret != -1) {
- fprintf (stdout, "Not intercepted: %s\n", call);
- return;
- }
-
- if (errno != PRELOAD_ERRNO_VERF) {
- fprintf (stdout, "Not intercepted: %s: err: %s\n", call,
- strerror (errno));
- return;
- }
+ while (tabs > 0) {
+ fprintf(stdout, "\t");
+ --tabs;
+ }
+ if (ret != -1) {
+ fprintf(stdout, "Not intercepted: %s\n", call);
+ return;
+ }
- fprintf (stdout, "Intercept verified: %s\n", call);
+ if (errno != PRELOAD_ERRNO_VERF) {
+ fprintf(stdout, "Not intercepted: %s: err: %s\n", call,
+ strerror(errno));
return;
+ }
+
+ fprintf(stdout, "Intercept verified: %s\n", call);
+ return;
}
void
-usage (FILE *fp)
+usage(FILE *fp)
{
- fprintf (fp, "Usage: ld-preload-test <Options>\n");
- fprintf (fp, "Options\n");
- fprintf (fp, "\t--path\t\tPathname is used as the file/directory"
- " created for the test.\n");
-
+ fprintf(fp, "Usage: ld-preload-test <Options>\n");
+ fprintf(fp, "Options\n");
+ fprintf(fp,
+ "\t--path\t\tPathname is used as the file/directory"
+ " created for the test.\n");
}
-
int
-run_file_tests (char *testfile)
+run_file_tests(char *testfile)
{
- int ret = -1;
- struct stat buf;
+ int ret = -1;
+ struct stat buf;
- assert (testfile);
- fprintf (stdout, "Testing creat");
- ret = creat (testfile, S_IRWXU);
- check_err (ret, "creat", 2);
+ assert(testfile);
+ fprintf(stdout, "Testing creat");
+ ret = creat(testfile, S_IRWXU);
+ check_err(ret, "creat", 2);
- fprintf (stdout, "Testing close");
- ret = close (ret);
- check_err (ret, "close", 2);
+ fprintf(stdout, "Testing close");
+ ret = close(ret);
+ check_err(ret, "close", 2);
- fprintf (stdout, "Testing open");
- ret = open (testfile, O_RDONLY);
- check_err (ret, "open", 2);
+ fprintf(stdout, "Testing open");
+ ret = open(testfile, O_RDONLY);
+ check_err(ret, "open", 2);
- fprintf (stdout, "Testing read");
- ret = read (0, NULL, 0);
- check_err (ret, "read", 2);
+ fprintf(stdout, "Testing read");
+ ret = read(0, NULL, 0);
+ check_err(ret, "read", 2);
- fprintf (stdout, "Testing readv");
- ret = readv (0, NULL, 0);
- check_err (ret, "readv", 2);
+ fprintf(stdout, "Testing readv");
+ ret = readv(0, NULL, 0);
+ check_err(ret, "readv", 2);
- fprintf (stdout, "Testing pread");
- ret = pread (0, NULL, 0, 0);
- check_err (ret, "pread", 2);
+ fprintf(stdout, "Testing pread");
+ ret = pread(0, NULL, 0, 0);
+ check_err(ret, "pread", 2);
- fprintf (stdout, "Testing write");
- ret = write (0, NULL, 0);
- check_err (ret, "write", 2);
+ fprintf(stdout, "Testing write");
+ ret = write(0, NULL, 0);
+ check_err(ret, "write", 2);
- fprintf (stdout, "Testing writev");
- ret = writev (0, NULL, 0);
- check_err (ret, "writev", 2);
+ fprintf(stdout, "Testing writev");
+ ret = writev(0, NULL, 0);
+ check_err(ret, "writev", 2);
- fprintf (stdout, "Testing pwrite");
- ret = pwrite (0, NULL, 0, 0);
- check_err (ret, "pwrite", 2);
+ fprintf(stdout, "Testing pwrite");
+ ret = pwrite(0, NULL, 0, 0);
+ check_err(ret, "pwrite", 2);
- fprintf (stdout, "Testing lseek");
- ret = lseek (0, 0, 0);
- check_err (ret, "lseek", 2);
+ fprintf(stdout, "Testing lseek");
+ ret = lseek(0, 0, 0);
+ check_err(ret, "lseek", 2);
- fprintf (stdout, "Testing dup");
- ret = dup (0);
- check_err (ret, "dup", 2);
+ fprintf(stdout, "Testing dup");
+ ret = dup(0);
+ check_err(ret, "dup", 2);
- fprintf (stdout, "Testing dup2");
- ret = dup2 (0, 0);
- check_err (ret, "dup2", 2);
+ fprintf(stdout, "Testing dup2");
+ ret = dup2(0, 0);
+ check_err(ret, "dup2", 2);
- fprintf (stdout, "Testing fchmod");
- ret = fchmod (0, 0);
- check_err (ret, "fchmod", 2);
+ fprintf(stdout, "Testing fchmod");
+ ret = fchmod(0, 0);
+ check_err(ret, "fchmod", 2);
- fprintf (stdout, "Testing fchown");
- ret = fchown (0, 0, 0);
- check_err (ret, "fchown", 2);
+ fprintf(stdout, "Testing fchown");
+ ret = fchown(0, 0, 0);
+ check_err(ret, "fchown", 2);
- fprintf (stdout, "Testing fsync");
- ret = fsync (0);
- check_err (ret, "fsync", 2);
+ fprintf(stdout, "Testing fsync");
+ ret = fsync(0);
+ check_err(ret, "fsync", 2);
- fprintf (stdout, "Testing ftruncate");
- ret = ftruncate (0, 0);
- check_err (ret, "ftruncate", 1);
+ fprintf(stdout, "Testing ftruncate");
+ ret = ftruncate(0, 0);
+ check_err(ret, "ftruncate", 1);
- fprintf (stdout, "Testing fstat");
- ret = fstat (0, &buf);
- check_err (ret, "fstat", 1);
+ fprintf(stdout, "Testing fstat");
+ ret = fstat(0, &buf);
+ check_err(ret, "fstat", 1);
- fprintf (stdout, "Testing sendfile");
- ret = sendfile (0, 0, NULL, 0);
- check_err (ret, "sendfile", 1);
+ fprintf(stdout, "Testing sendfile");
+ ret = sendfile(0, 0, NULL, 0);
+ check_err(ret, "sendfile", 1);
- fprintf (stdout, "Testing fcntl");
- ret = fcntl (0, 0, NULL);
- check_err (ret, "fcntl", 2);
+ fprintf(stdout, "Testing fcntl");
+ ret = fcntl(0, 0, NULL);
+ check_err(ret, "fcntl", 2);
- fprintf (stdout, "Testing close");
- ret = close (ret);
- check_err (ret, "close", 2);
+ fprintf(stdout, "Testing close");
+ ret = close(ret);
+ check_err(ret, "close", 2);
- fprintf (stdout, "Testing remove");
- ret = remove (testfile);
- check_err (ret, "remove", 2);
+ fprintf(stdout, "Testing remove");
+ ret = remove(testfile);
+ check_err(ret, "remove", 2);
- return ret;
+ return ret;
}
-
int
-run_attr_tests (char *testfile)
+run_attr_tests(char *testfile)
{
- int ret = -1;
- char *res = NULL;
- struct stat buf;
- struct statfs sbuf;
- struct statvfs svbuf;
-
- assert (testfile);
-
- fprintf (stdout, "Testing chmod");
- ret = chmod (testfile, 0);
- check_err (ret, "chmod", 2);
-
- fprintf (stdout, "Testing chown");
- ret = chown (testfile, 0, 0);
- check_err (ret, "chown", 2);
-
- fprintf (stdout, "Testing link");
- ret = link (testfile, testfile);
- check_err (ret, "link", 2);
-
- fprintf (stdout, "Testing rename");
- ret = rename (testfile, testfile);
- check_err (ret, "rename", 2);
-
- fprintf (stdout, "Testing utimes");
- ret = utimes (testfile, NULL);
- check_err (ret, "utimes", 2);
-
- fprintf (stdout, "Testing utime");
- ret = utime (testfile, NULL);
- check_err (ret, "utime", 2);
-
- fprintf (stdout, "Testing unlink");
- ret = unlink (testfile);
- check_err (ret, "unlink", 2);
-
- fprintf (stdout, "Testing symlink");
- ret = symlink (testfile, testfile);
- check_err (ret, "symlink", 2);
-
- fprintf (stdout, "Testing readlink");
- ret = readlink (testfile, testfile, 0);
- check_err (ret, "readlink", 2);
-
- fprintf (stdout, "Testing realpath");
- ret = 0;
- res = realpath ((const char *)testfile, testfile);
- if (!res)
- ret = -1;
- check_err (ret, "realpath", 2);
-
- fprintf (stdout, "Testing stat");
- ret = stat (testfile, &buf);
- check_err (ret, "stat", 1);
-
- fprintf (stdout, "Testing lstat");
- ret = lstat (testfile, &buf);
- check_err (ret, "lstat", 1);
-
- fprintf (stdout, "Testing statfs");
- ret = statfs (testfile, &sbuf);
- check_err (ret, "statfs", 2);
-
- fprintf (stdout, "Testing statvfs");
- ret = statvfs (testfile, &svbuf);
- check_err (ret, "statvfs", 1);
-
- fprintf (stdout, "Testing getxattr");
- ret = getxattr (testfile, NULL, NULL, 0);
- check_err (ret, "getxattr", 2);
-
- fprintf (stdout, "Testing lgetxattr");
- ret = lgetxattr (testfile, NULL, NULL, 0);
- check_err (ret, "lgetxattr", 1);
-
- fprintf (stdout, "Testing lchown");
- ret = lchown (testfile, 0, 0);
- check_err (ret, "lchown", 2);
- return 0;
+ int ret = -1;
+ char *res = NULL;
+ struct stat buf;
+ struct statfs sbuf;
+ struct statvfs svbuf;
+
+ assert(testfile);
+
+ fprintf(stdout, "Testing chmod");
+ ret = chmod(testfile, 0);
+ check_err(ret, "chmod", 2);
+
+ fprintf(stdout, "Testing chown");
+ ret = chown(testfile, 0, 0);
+ check_err(ret, "chown", 2);
+
+ fprintf(stdout, "Testing link");
+ ret = link(testfile, testfile);
+ check_err(ret, "link", 2);
+
+ fprintf(stdout, "Testing rename");
+ ret = rename(testfile, testfile);
+ check_err(ret, "rename", 2);
+
+ fprintf(stdout, "Testing utimes");
+ ret = utimes(testfile, NULL);
+ check_err(ret, "utimes", 2);
+
+ fprintf(stdout, "Testing utime");
+ ret = utime(testfile, NULL);
+ check_err(ret, "utime", 2);
+
+ fprintf(stdout, "Testing unlink");
+ ret = unlink(testfile);
+ check_err(ret, "unlink", 2);
+
+ fprintf(stdout, "Testing symlink");
+ ret = symlink(testfile, testfile);
+ check_err(ret, "symlink", 2);
+
+ fprintf(stdout, "Testing readlink");
+ ret = readlink(testfile, testfile, 0);
+ check_err(ret, "readlink", 2);
+
+ fprintf(stdout, "Testing realpath");
+ ret = 0;
+ res = realpath((const char *)testfile, testfile);
+ if (!res)
+ ret = -1;
+ check_err(ret, "realpath", 2);
+
+ fprintf(stdout, "Testing stat");
+ ret = stat(testfile, &buf);
+ check_err(ret, "stat", 1);
+
+ fprintf(stdout, "Testing lstat");
+ ret = lstat(testfile, &buf);
+ check_err(ret, "lstat", 1);
+
+ fprintf(stdout, "Testing statfs");
+ ret = statfs(testfile, &sbuf);
+ check_err(ret, "statfs", 2);
+
+ fprintf(stdout, "Testing statvfs");
+ ret = statvfs(testfile, &svbuf);
+ check_err(ret, "statvfs", 1);
+
+ fprintf(stdout, "Testing getxattr");
+ ret = getxattr(testfile, NULL, NULL, 0);
+ check_err(ret, "getxattr", 2);
+
+ fprintf(stdout, "Testing lgetxattr");
+ ret = lgetxattr(testfile, NULL, NULL, 0);
+ check_err(ret, "lgetxattr", 1);
+
+ fprintf(stdout, "Testing lchown");
+ ret = lchown(testfile, 0, 0);
+ check_err(ret, "lchown", 2);
+ return 0;
}
-
int
-run_dev_tests (char *testfile)
+run_dev_tests(char *testfile)
{
- int ret = -1;
+ int ret = -1;
- assert (testfile);
+ assert(testfile);
- fprintf (stdout, "Testing mknod");
- ret = mknod (testfile, 0, 0);
- check_err (ret, "mknod", 2);
+ fprintf(stdout, "Testing mknod");
+ ret = mknod(testfile, 0, 0);
+ check_err(ret, "mknod", 2);
- fprintf (stdout, "Testing mkfifo");
- ret = mkfifo (testfile, 0);
- check_err (ret, "mkfifo", 2);
- return 0;
+ fprintf(stdout, "Testing mkfifo");
+ ret = mkfifo(testfile, 0);
+ check_err(ret, "mkfifo", 2);
+ return 0;
}
int
-run_dir_tests (char *testpath)
+run_dir_tests(char *testpath)
{
- int ret = -1;
- DIR *dh = NULL;
- struct dirent *dire = NULL;
-
- assert (testpath);
-
- fprintf (stdout, "Testing mkdir");
- ret = mkdir (testpath, 0);
- check_err (ret, "mkdir", 2);
-
- fprintf (stdout, "Testing rmdir");
- ret = rmdir (testpath);
- check_err (ret, "rmdir", 2);
-
- fprintf (stdout, "Testing opendir");
- ret = 0;
- dh = opendir (testpath);
- if (!dh)
- ret = -1;
- check_err (ret, "opendir", 2);
-
- fprintf (stdout, "Testing readdir");
- ret = 0;
- dire = readdir (dh);
- if (!dire)
- ret = -1;
- check_err (ret, "readdir", 1);
-
- fprintf (stdout, "Testing readdir_r");
- ret = readdir_r (dh, dire, &dire);
- check_err (ret, "readdir_r", 1);
-
- fprintf (stdout, "Testing rewinddir");
- rewinddir (dh);
- check_err (-1, "rewinddir", 1);
-
- fprintf (stdout, "Testing seekdir");
- seekdir (dh, 0);
- check_err (-1, "seekdir", 2);
-
- fprintf (stdout, "Testing telldir");
- ret = telldir (dh);
- check_err (ret, "telldir", 2);
-
- fprintf (stdout, "Testing closedir");
- ret = closedir (dh);
- check_err (ret, "closedir", 2);
- return 0;
+ int ret = -1;
+ DIR *dh = NULL;
+ struct dirent *dire = NULL;
+
+ assert(testpath);
+
+ fprintf(stdout, "Testing mkdir");
+ ret = mkdir(testpath, 0);
+ check_err(ret, "mkdir", 2);
+
+ fprintf(stdout, "Testing rmdir");
+ ret = rmdir(testpath);
+ check_err(ret, "rmdir", 2);
+
+ fprintf(stdout, "Testing opendir");
+ ret = 0;
+ dh = opendir(testpath);
+ if (!dh)
+ ret = -1;
+ check_err(ret, "opendir", 2);
+
+ fprintf(stdout, "Testing readdir");
+ ret = 0;
+ dire = readdir(dh);
+ if (!dire)
+ ret = -1;
+ check_err(ret, "readdir", 1);
+
+ fprintf(stdout, "Testing readdir_r");
+ ret = readdir_r(dh, dire, &dire);
+ check_err(ret, "readdir_r", 1);
+
+ fprintf(stdout, "Testing rewinddir");
+ rewinddir(dh);
+ check_err(-1, "rewinddir", 1);
+
+ fprintf(stdout, "Testing seekdir");
+ seekdir(dh, 0);
+ check_err(-1, "seekdir", 2);
+
+ fprintf(stdout, "Testing telldir");
+ ret = telldir(dh);
+ check_err(ret, "telldir", 2);
+
+ fprintf(stdout, "Testing closedir");
+ ret = closedir(dh);
+ check_err(ret, "closedir", 2);
+ return 0;
}
-
-
int
-main (int argc, char *argv[])
+main(int argc, char *argv[])
{
- char *testpath = NULL;
- int x = 0;
-
- for (;x < argc; ++x) {
- if (strcmp (argv[x], "--path") == 0) {
- testpath = argv[x+1];
- continue;
- }
+ char *testpath = NULL;
+ int x = 0;
+ for (; x < argc; ++x) {
+ if (strcmp(argv[x], "--path") == 0) {
+ testpath = argv[x + 1];
+ continue;
}
+ }
- if (!testpath) {
- fprintf (stderr, "--path not specified\n");
- usage (stderr);
- return -1;
- }
+ if (!testpath) {
+ fprintf(stderr, "--path not specified\n");
+ usage(stderr);
+ return -1;
+ }
- run_file_tests (testpath);
- run_dir_tests (testpath);
- run_attr_tests (testpath);
- run_dev_tests (testpath);
+ run_file_tests(testpath);
+ run_dir_tests(testpath);
+ run_attr_tests(testpath);
+ run_dev_tests(testpath);
- return 0;
+ return 0;
}
-
-
diff --git a/extras/test/open-fd-tests.c b/extras/test/open-fd-tests.c
index 4184079d043..509952b4180 100644
--- a/extras/test/open-fd-tests.c
+++ b/extras/test/open-fd-tests.c
@@ -4,61 +4,64 @@
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
-#include <attr/xattr.h>
+#include <sys/xattr.h>
#include <errno.h>
#include <string.h>
int
-main (int argc, char *argv[])
+main(int argc, char *argv[])
{
- int ret = -1;
- int fd = 0;
- char *filename = NULL;
- int loop = 0;
- struct stat stbuf = {0,};
- char string[1024] = {0,};
+ int ret = -1;
+ int fd = 0;
+ char *filename = NULL;
+ int loop = 0;
+ struct stat stbuf = {
+ 0,
+ };
+ char string[1024] = {
+ 0,
+ };
- if (argc > 1)
- filename = argv[1];
+ if (argc > 1)
+ filename = argv[1];
- if (!filename)
- filename = "temp-fd-test-file";
+ if (!filename)
+ filename = "temp-fd-test-file";
- fd = open (filename, O_RDWR|O_CREAT|O_TRUNC);
- if (fd < 0) {
- fd = 0;
- fprintf (stderr, "open failed : %s\n", strerror (errno));
- goto out;
- }
-
- while (loop < 1000) {
- /* Use it as a mechanism to test time delays */
- memset (string, 0, 1024);
- scanf ("%s", string);
+ fd = open(filename, O_RDWR | O_CREAT | O_TRUNC);
+ if (fd < 0) {
+ fd = 0;
+ fprintf(stderr, "open failed : %s\n", strerror(errno));
+ goto out;
+ }
- ret = write (fd, string, strlen (string));
- if (ret != strlen (string)) {
- fprintf (stderr, "write failed : %s (%s %d)\n",
- strerror (errno), string, loop);
- goto out;
- }
+ while (loop < 1000) {
+ /* Use it as a mechanism to test time delays */
+ memset(string, 0, 1024);
+ scanf("%s", string);
- ret = write (fd, "\n", 1);
- if (ret != 1) {
- fprintf (stderr, "write failed : %s (%d)\n",
- strerror (errno), loop);
- goto out;
- }
+ ret = write(fd, string, strlen(string));
+ if (ret != strlen(string)) {
+ fprintf(stderr, "write failed : %s (%s %d)\n", strerror(errno),
+ string, loop);
+ goto out;
+ }
- loop++;
+ ret = write(fd, "\n", 1);
+ if (ret != 1) {
+ fprintf(stderr, "write failed : %s (%d)\n", strerror(errno), loop);
+ goto out;
}
- fprintf (stdout, "finishing the test after %d loops\n", loop);
+ loop++;
+ }
+
+ fprintf(stdout, "finishing the test after %d loops\n", loop);
- ret = 0;
+ ret = 0;
out:
- if (fd)
- close (fd);
+ if (fd)
+ close(fd);
- return ret;
+ return ret;
}
diff --git a/extras/test/run.sh b/extras/test/run.sh
index 2440af237a5..4b3839cf679 100755
--- a/extras/test/run.sh
+++ b/extras/test/run.sh
@@ -1,21 +1,12 @@
#!/bin/sh
-# Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
-# This file is part of GlusterFS.
-
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
+# Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
# Running gluster sanity test which starts glusterd and runs gluster commands, and exit at the first failure.
$PWD/gluster_commands.sh
diff --git a/extras/test/stop_glusterd.sh b/extras/test/stop_glusterd.sh
index a84689beb48..a2db13f4241 100755
--- a/extras/test/stop_glusterd.sh
+++ b/extras/test/stop_glusterd.sh
@@ -1,21 +1,12 @@
#!/bin/bash
-# Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
-# This file is part of GlusterFS.
-
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
+# Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
#This script stops the glusterd running on the machine. Helpful for gluster sanity script
diff --git a/extras/test/test-ffop.c b/extras/test/test-ffop.c
index 2c47ab0045b..1d9c125db67 100644
--- a/extras/test/test-ffop.c
+++ b/extras/test/test-ffop.c
@@ -3,103 +3,918 @@
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
-#include <attr/xattr.h>
+#include <sys/xattr.h>
#include <errno.h>
#include <string.h>
+#include <dirent.h>
int
-main (int argc, char *argv[])
+fd_based_fops_1(char *filename); // for fd based fops after unlink
+int
+fd_based_fops_2(char *filename); // for fd based fops before unlink
+int
+dup_fd_based_fops(char *filename); // fops based on fd after dup
+int
+path_based_fops(char *filename); // for fops based on path
+int
+dir_based_fops(char *filename); // for fops which operate on directory
+int
+link_based_fops(
+ char *filename); // for fops which operate in link files (symlinks)
+int
+test_open_modes(
+ char *filename); // to test open syscall with open modes available.
+int
+generic_open_read_write(
+ char *filename,
+ int flag); // generic function which does open write and read.
+
+int
+main(int argc, char *argv[])
{
- int ret = -1;
- int fd = 0;
- char *filename = NULL;
- struct stat stbuf = {0,};
-
- if (argc > 1)
- filename = argv[1];
-
- if (!filename)
- filename = "temp-xattr-test-file";
-
- fd = open (filename, O_RDWR|O_CREAT);
- if (fd < 0) {
- fd = 0;
- fprintf (stderr, "open failed : %s\n", strerror (errno));
- goto out;
- }
+ int ret = -1;
+ char filename[255] = {
+ 0,
+ };
- ret = unlink (filename);
- if (ret < 0) {
- fprintf (stderr, "unlink failed : %s\n", strerror (errno));
- goto out;
- }
+ if (argc > 1)
+ strcpy(filename, argv[1]);
+ else
+ strcpy(filename, "temp-xattr-test-file");
- ret = ftruncate (fd, 0);
- if (ret < 0) {
- fprintf (stderr, "ftruncate failed : %s\n", strerror (errno));
- goto out;
- }
+ ret = fd_based_fops_1(strcat(filename, "_1"));
+ if (ret < 0)
+ fprintf(stderr, "fd based file operation 1 failed\n");
+ else
+ fprintf(stdout, "fd based file operation 1 passed\n");
- ret = fstat (fd, &stbuf);
- if (ret < 0) {
- fprintf (stderr, "fstat failed : %s\n", strerror (errno));
- goto out;
- }
+ ret = fd_based_fops_2(strcat(filename, "_2"));
+ if (ret < 0)
+ fprintf(stderr, "fd based file operation 2 failed\n");
+ else
+ fprintf(stdout, "fd based file operation 2 passed\n");
- ret = fchmod (fd, 0640);
- if (ret < 0) {
- fprintf (stderr, "fchmod failed : %s\n", strerror (errno));
- goto out;
- }
+ ret = dup_fd_based_fops(strcat(filename, "_3"));
+ if (ret < 0)
+ fprintf(stderr, "dup fd based file operation failed\n");
+ else
+ fprintf(stdout, "dup fd based file operation passed\n");
- ret = fchown (fd, 10001, 10001);
- if (ret < 0) {
- fprintf (stderr, "fchown failed : %s\n", strerror (errno));
- goto out;
- }
+ ret = path_based_fops(strcat(filename, "_4"));
+ if (ret < 0)
+ fprintf(stderr, "path based file operation failed\n");
+ else
+ fprintf(stdout, "path based file operation passed\n");
- ret = fsync (fd);
- if (ret < 0) {
- fprintf (stderr, "fsync failed : %s\n", strerror (errno));
- goto out;
- }
+ ret = dir_based_fops(strcat(filename, "_5"));
+ if (ret < 0)
+ fprintf(stderr, "directory based file operation failed\n");
+ else
+ fprintf(stdout, "directory based file operation passed\n");
- ret = fsetxattr (fd, "trusted.xattr-test", "working", 8, 0);
- if (ret < 0) {
- fprintf (stderr, "fsetxattr failed : %s\n", strerror (errno));
- goto out;
- }
+ ret = link_based_fops(strcat(filename, "_5"));
+ if (ret < 0)
+ fprintf(stderr, "link based file operation failed\n");
+ else
+ fprintf(stdout, "link based file operation passed\n");
- ret = fdatasync (fd);
- if (ret < 0) {
- fprintf (stderr, "fdatasync failed : %s\n", strerror (errno));
- goto out;
- }
+ ret = test_open_modes(strcat(filename, "_5"));
+ if (ret < 0)
+ fprintf(stderr, "testing modes of 'open' call failed\n");
+ else
+ fprintf(stdout, "testing modes of 'open' call passed\n");
+
+out:
+ return ret;
+}
+
+int
+fd_based_fops_1(char *filename)
+{
+ int fd = 0;
+ int ret = -1;
+ struct stat stbuf = {
+ 0,
+ };
+ char wstr[50] = {
+ 0,
+ };
+ char rstr[50] = {
+ 0,
+ };
+
+ fd = open(filename, O_RDWR | O_CREAT);
+ if (fd < 0) {
+ fd = 0;
+ fprintf(stderr, "open failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = unlink(filename);
+ if (ret < 0) {
+ fprintf(stderr, "unlink failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ strcpy(wstr, "This is my string\n");
+ ret = write(fd, wstr, strlen(wstr));
+ if (ret <= 0) {
+ ret = -1;
+ fprintf(stderr, "write failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = lseek(fd, 0, SEEK_SET);
+ if (ret < 0) {
+ fprintf(stderr, "lseek failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = read(fd, rstr, strlen(wstr));
+ if (ret <= 0) {
+ ret = -1;
+ fprintf(stderr, "read failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = memcmp(rstr, wstr, strlen(wstr));
+ if (ret != 0) {
+ ret = -1;
+ fprintf(stderr, "read returning junk\n");
+ goto out;
+ }
+
+ ret = ftruncate(fd, 0);
+ if (ret < 0) {
+ fprintf(stderr, "ftruncate failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = fstat(fd, &stbuf);
+ if (ret < 0) {
+ fprintf(stderr, "fstat failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = fchmod(fd, 0640);
+ if (ret < 0) {
+ fprintf(stderr, "fchmod failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = fchown(fd, 10001, 10001);
+ if (ret < 0) {
+ fprintf(stderr, "fchown failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = fsync(fd);
+ if (ret < 0) {
+ fprintf(stderr, "fsync failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = fsetxattr(fd, "trusted.xattr-test", "working", 8, 0);
+ if (ret < 0) {
+ fprintf(stderr, "fsetxattr failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = fdatasync(fd);
+ if (ret < 0) {
+ fprintf(stderr, "fdatasync failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = flistxattr(fd, NULL, 0);
+ if (ret <= 0) {
+ ret = -1;
+ fprintf(stderr, "flistxattr failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = fgetxattr(fd, "trusted.xattr-test", NULL, 0);
+ if (ret <= 0) {
+ ret = -1;
+ fprintf(stderr, "fgetxattr failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = fremovexattr(fd, "trusted.xattr-test");
+ if (ret < 0) {
+ fprintf(stderr, "fremovexattr failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (fd)
+ close(fd);
+
+ return ret;
+}
+
+int
+fd_based_fops_2(char *filename)
+{
+ int fd = 0;
+ int ret = -1;
+ struct stat stbuf = {
+ 0,
+ };
+ char wstr[50] = {
+ 0,
+ };
+ char rstr[50] = {
+ 0,
+ };
+
+ fd = open(filename, O_RDWR | O_CREAT);
+ if (fd < 0) {
+ fd = 0;
+ fprintf(stderr, "open failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = ftruncate(fd, 0);
+
+ if (ret < 0) {
+ fprintf(stderr, "ftruncate failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ strcpy(wstr, "This is my second string\n");
+ ret = write(fd, wstr, strlen(wstr));
+ if (ret < 0) {
+ ret = -1;
+ fprintf(stderr, "write failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ lseek(fd, 0, SEEK_SET);
+ if (ret < 0) {
+ fprintf(stderr, "lseek failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = read(fd, rstr, strlen(wstr));
+ if (ret <= 0) {
+ ret = -1;
+ fprintf(stderr, "read failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = memcmp(rstr, wstr, strlen(wstr));
+ if (ret != 0) {
+ ret = -1;
+ fprintf(stderr, "read returning junk\n");
+ goto out;
+ }
+
+ ret = fstat(fd, &stbuf);
+ if (ret < 0) {
+ fprintf(stderr, "fstat failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = fchmod(fd, 0640);
+ if (ret < 0) {
+ fprintf(stderr, "fchmod failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = fchown(fd, 10001, 10001);
+ if (ret < 0) {
+ fprintf(stderr, "fchown failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = fsync(fd);
+ if (ret < 0) {
+ fprintf(stderr, "fsync failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = fsetxattr(fd, "trusted.xattr-test", "working", 8, 0);
+ if (ret < 0) {
+ fprintf(stderr, "fsetxattr failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = fdatasync(fd);
+ if (ret < 0) {
+ fprintf(stderr, "fdatasync failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = flistxattr(fd, NULL, 0);
+ if (ret <= 0) {
+ ret = -1;
+ fprintf(stderr, "flistxattr failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = fgetxattr(fd, "trusted.xattr-test", NULL, 0);
+ if (ret <= 0) {
+ ret = -1;
+ fprintf(stderr, "fgetxattr failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = fremovexattr(fd, "trusted.xattr-test");
+ if (ret < 0) {
+ fprintf(stderr, "fremovexattr failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+out:
+ if (fd)
+ close(fd);
+ unlink(filename);
+
+ return ret;
+}
+
+int
+path_based_fops(char *filename)
+{
+ int ret = -1;
+ int fd = 0;
+ struct stat stbuf = {
+ 0,
+ };
+ char newfilename[255] = {
+ 0,
+ };
+
+ fd = creat(filename, 0644);
+ if (fd < 0) {
+ fprintf(stderr, "creat failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = truncate(filename, 0);
+ if (ret < 0) {
+ fprintf(stderr, "truncate failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = stat(filename, &stbuf);
+ if (ret < 0) {
+ fprintf(stderr, "stat failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = chmod(filename, 0640);
+ if (ret < 0) {
+ fprintf(stderr, "chmod failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = chown(filename, 10001, 10001);
+ if (ret < 0) {
+ fprintf(stderr, "chown failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = setxattr(filename, "trusted.xattr-test", "working", 8, 0);
+ if (ret < 0) {
+ fprintf(stderr, "setxattr failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = listxattr(filename, NULL, 0);
+ if (ret <= 0) {
+ ret = -1;
+ fprintf(stderr, "listxattr failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = getxattr(filename, "trusted.xattr-test", NULL, 0);
+ if (ret <= 0) {
+ ret = -1;
+ fprintf(stderr, "getxattr failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = removexattr(filename, "trusted.xattr-test");
+ if (ret < 0) {
+ fprintf(stderr, "removexattr failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = access(filename, R_OK | W_OK);
+ if (ret < 0) {
+ fprintf(stderr, "access failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ strcpy(newfilename, filename);
+ strcat(newfilename, "_new");
+ ret = rename(filename, newfilename);
+ if (ret < 0) {
+ fprintf(stderr, "rename failed: %s\n", strerror(errno));
+ goto out;
+ }
+ unlink(newfilename);
+
+out:
+ if (fd)
+ close(fd);
+
+ unlink(filename);
+ return ret;
+}
- ret = flistxattr (fd, NULL, 0);
- if (ret <= 0) {
- ret = -1;
- fprintf (stderr, "flistxattr failed : %s\n", strerror (errno));
- goto out;
+int
+dup_fd_based_fops(char *filename)
+{
+ int fd = 0;
+ int newfd = 0;
+ int ret = -1;
+ struct stat stbuf = {
+ 0,
+ };
+ char wstr[50] = {
+ 0,
+ };
+ char rstr[50] = {
+ 0,
+ };
+
+ fd = open(filename, O_RDWR | O_CREAT);
+ if (fd < 0) {
+ fd = 0;
+ fprintf(stderr, "open failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ newfd = dup(fd);
+ if (newfd < 0) {
+ ret = -1;
+ fprintf(stderr, "dup failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ close(fd);
+
+ strcpy(wstr, "This is my string\n");
+ ret = write(newfd, wstr, strlen(wstr));
+ if (ret <= 0) {
+ ret = -1;
+ fprintf(stderr, "write failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = lseek(newfd, 0, SEEK_SET);
+ if (ret < 0) {
+ fprintf(stderr, "lseek failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = read(newfd, rstr, strlen(wstr));
+ if (ret <= 0) {
+ ret = -1;
+ fprintf(stderr, "read failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = memcmp(rstr, wstr, strlen(wstr));
+ if (ret != 0) {
+ ret = -1;
+ fprintf(stderr, "read returning junk\n");
+ goto out;
+ }
+
+ ret = ftruncate(newfd, 0);
+ if (ret < 0) {
+ fprintf(stderr, "ftruncate failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = fstat(newfd, &stbuf);
+ if (ret < 0) {
+ fprintf(stderr, "fstat failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = fchmod(newfd, 0640);
+ if (ret < 0) {
+ fprintf(stderr, "fchmod failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = fchown(newfd, 10001, 10001);
+ if (ret < 0) {
+ fprintf(stderr, "fchown failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = fsync(newfd);
+ if (ret < 0) {
+ fprintf(stderr, "fsync failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = fsetxattr(newfd, "trusted.xattr-test", "working", 8, 0);
+ if (ret < 0) {
+ fprintf(stderr, "fsetxattr failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = fdatasync(newfd);
+ if (ret < 0) {
+ fprintf(stderr, "fdatasync failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = flistxattr(newfd, NULL, 0);
+ if (ret <= 0) {
+ ret = -1;
+ fprintf(stderr, "flistxattr failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = fgetxattr(newfd, "trusted.xattr-test", NULL, 0);
+ if (ret <= 0) {
+ ret = -1;
+ fprintf(stderr, "fgetxattr failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = fremovexattr(newfd, "trusted.xattr-test");
+ if (ret < 0) {
+ fprintf(stderr, "fremovexattr failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (newfd)
+ close(newfd);
+ ret = unlink(filename);
+ if (ret < 0)
+ fprintf(stderr, "unlink failed : %s\n", strerror(errno));
+
+ return ret;
+}
+
+int
+dir_based_fops(char *dirname)
+{
+ int ret = -1;
+ DIR *dp = NULL;
+ char buff[255] = {
+ 0,
+ };
+ struct dirent *dbuff = {
+ 0,
+ };
+ struct stat stbuff = {
+ 0,
+ };
+ char newdname[255] = {
+ 0,
+ };
+ char *cwd = NULL;
+
+ ret = mkdir(dirname, 0755);
+ if (ret < 0) {
+ fprintf(stderr, "mkdir failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ dp = opendir(dirname);
+ if (dp == NULL) {
+ fprintf(stderr, "opendir failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ dbuff = readdir(dp);
+ if (NULL == dbuff) {
+ fprintf(stderr, "readdir failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = closedir(dp);
+ if (ret < 0) {
+ fprintf(stderr, "closedir failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = stat(dirname, &stbuff);
+ if (ret < 0) {
+ fprintf(stderr, "stat failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = chmod(dirname, 0744);
+ if (ret < 0) {
+ fprintf(stderr, "chmod failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = chown(dirname, 10001, 10001);
+ if (ret < 0) {
+ fprintf(stderr, "chmod failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = setxattr(dirname, "trusted.xattr-test", "working", 8, 0);
+ if (ret < 0) {
+ fprintf(stderr, "setxattr failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = listxattr(dirname, NULL, 0);
+ if (ret <= 0) {
+ ret = -1;
+ fprintf(stderr, "listxattr failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = getxattr(dirname, "trusted.xattr-test", NULL, 0);
+ if (ret <= 0) {
+ ret = -1;
+ fprintf(stderr, "getxattr failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = removexattr(dirname, "trusted.xattr-test");
+ if (ret < 0) {
+ fprintf(stderr, "removexattr failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ strcpy(newdname, dirname);
+ strcat(newdname, "/../");
+ ret = chdir(newdname);
+ if (ret < 0) {
+ fprintf(stderr, "chdir failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ cwd = getcwd(buff, 255);
+ if (NULL == cwd) {
+ fprintf(stderr, "getcwd failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ strcpy(newdname, dirname);
+ strcat(newdname, "new");
+ ret = rename(dirname, newdname);
+ if (ret < 0) {
+ fprintf(stderr, "rename failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = rmdir(newdname);
+ if (ret < 0) {
+ fprintf(stderr, "rmdir failed: %s\n", strerror(errno));
+ return ret;
+ }
+
+out:
+ rmdir(dirname);
+ return ret;
+}
+
+int
+link_based_fops(char *filename)
+{
+ int ret = -1;
+ int fd = 0;
+ char newname[255] = {
+ 0,
+ };
+ char linkname[255] = {
+ 0,
+ };
+ struct stat lstbuf = {
+ 0,
+ };
+
+ fd = creat(filename, 0644);
+ if (fd < 0) {
+ fd = 0;
+ fprintf(stderr, "creat failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ strcpy(newname, filename);
+ strcat(newname, "_hlink");
+ ret = link(filename, newname);
+ if (ret < 0) {
+ fprintf(stderr, "link failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = unlink(filename);
+ if (ret < 0) {
+ fprintf(stderr, "unlink failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ strcpy(linkname, filename);
+ strcat(linkname, "_slink");
+ ret = symlink(newname, linkname);
+ if (ret < 0) {
+ fprintf(stderr, "symlink failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = lstat(linkname, &lstbuf);
+ if (ret < 0) {
+ fprintf(stderr, "lstbuf failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = lchown(linkname, 10001, 10001);
+ if (ret < 0) {
+ fprintf(stderr, "lchown failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = lsetxattr(linkname, "trusted.lxattr-test", "working", 8, 0);
+ if (ret < 0) {
+ fprintf(stderr, "lsetxattr failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = llistxattr(linkname, NULL, 0);
+ if (ret < 0) {
+ ret = -1;
+ fprintf(stderr, "llistxattr failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = lgetxattr(linkname, "trusted.lxattr-test", NULL, 0);
+ if (ret < 0) {
+ ret = -1;
+ fprintf(stderr, "lgetxattr failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = lremovexattr(linkname, "trusted.lxattr-test");
+ if (ret < 0) {
+ fprintf(stderr, "lremovexattr failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+out:
+ if (fd)
+ close(fd);
+ unlink(linkname);
+ unlink(newname);
+}
+
+int
+test_open_modes(char *filename)
+{
+ int ret = -1;
+
+ ret = generic_open_read_write(filename, O_CREAT | O_WRONLY);
+ if (3 != ret) {
+ fprintf(stderr, "flag O_CREAT|O_WRONLY failed: \n");
+ goto out;
+ }
+
+ ret = generic_open_read_write(filename, O_CREAT | O_RDWR);
+ if (ret != 0) {
+ fprintf(stderr, "flag O_CREAT|O_RDWR failed\n");
+ goto out;
+ }
+
+ ret = generic_open_read_write(filename, O_CREAT | O_RDONLY);
+ if (ret != 0) {
+ fprintf(stderr, "flag O_CREAT|O_RDONLY failed\n");
+ goto out;
+ }
+
+ ret = creat(filename, 0644);
+ close(ret);
+ ret = generic_open_read_write(filename, O_WRONLY);
+ if (3 != ret) {
+ fprintf(stderr, "flag O_WRONLY failed\n");
+ goto out;
+ }
+
+ ret = creat(filename, 0644);
+ close(ret);
+ ret = generic_open_read_write(filename, O_RDWR);
+ if (0 != ret) {
+ fprintf(stderr, "flag O_RDWR failed\n");
+ goto out;
+ }
+
+ ret = creat(filename, 0644);
+ close(ret);
+ ret = generic_open_read_write(filename, O_RDONLY);
+ if (0 != ret) {
+ fprintf(stderr, "flag O_RDONLY failed\n");
+ goto out;
+ }
+
+ ret = creat(filename, 0644);
+ close(ret);
+ ret = generic_open_read_write(filename, O_TRUNC | O_WRONLY);
+ if (3 != ret) {
+ fprintf(stderr, "flag O_TRUNC|O_WRONLY failed\n");
+ goto out;
+ }
+
+#if 0 /* undefined behaviour, unable to reliably test */
+ ret = creat (filename, 0644);
+ close (ret);
+ ret = generic_open_read_write (filename, O_TRUNC|O_RDONLY);
+ if (0 != ret) {
+ fprintf (stderr, "flag O_TRUNC|O_RDONLY failed\n");
+ goto out;
}
+#endif
+
+ ret = generic_open_read_write(filename, O_CREAT | O_RDWR | O_SYNC);
+ if (0 != ret) {
+ fprintf(stderr, "flag O_CREAT|O_RDWR|O_SYNC failed\n");
+ goto out;
+ }
- ret = fgetxattr (fd, "trusted.xattr-test", NULL, 0);
- if (ret <= 0) {
- ret = -1;
- fprintf (stderr, "fgetxattr failed : %s\n", strerror (errno));
- goto out;
+ ret = creat(filename, 0644);
+ close(ret);
+ ret = generic_open_read_write(filename, O_CREAT | O_EXCL);
+ if (0 != ret) {
+ fprintf(stderr, "flag O_CREAT|O_EXCL failed\n");
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+int
+generic_open_read_write(char *filename, int flag)
+{
+ int fd = 0;
+ int ret = -1;
+ char wstring[50] = {
+ 0,
+ };
+ char rstring[50] = {
+ 0,
+ };
+
+ fd = open(filename, flag);
+ if (fd < 0) {
+ if (flag == O_CREAT | O_EXCL && errno == EEXIST) {
+ unlink(filename);
+ return 0;
+ } else {
+ fd = 0;
+ fprintf(stderr, "open failed: %s\n", strerror(errno));
+ return 1;
}
+ }
- ret = fremovexattr (fd, "trusted.xattr-test");
- if (ret < 0) {
- fprintf (stderr, "fremovexattr failed : %s\n", strerror (errno));
- goto out;
+ strcpy(wstring, "My string to write\n");
+ ret = write(fd, wstring, strlen(wstring));
+ if (ret <= 0) {
+ if (errno != EBADF) {
+ fprintf(stderr, "write failed: %s\n", strerror(errno));
+ close(fd);
+ unlink(filename);
+ return 2;
}
+ }
- ret = 0;
-out:
- if (fd)
- close (fd);
+ ret = lseek(fd, 0, SEEK_SET);
+ if (ret < 0) {
+ close(fd);
+ unlink(filename);
+ return 4;
+ }
- return ret;
+ ret = read(fd, rstring, strlen(wstring));
+ if (ret < 0) {
+ close(fd);
+ unlink(filename);
+ return 3;
+ }
+
+ /* Compare the rstring with wstring. But we do not want to return
+ * error when the flag is either O_RDONLY, O_CREAT|O_RDONLY or
+ * O_TRUNC|O_RDONLY. Because in that case we are not writing
+ * anything to the file.*/
+
+ ret = memcmp(wstring, rstring, strlen(wstring));
+ if (0 != ret && !(flag == O_CREAT | O_RDONLY || flag == O_RDONLY ||
+ flag == O_TRUNC | O_RDONLY)) {
+ fprintf(stderr, "read is returning junk\n");
+ close(fd);
+ unlink(filename);
+ return 4;
+ }
+
+ close(fd);
+ unlink(filename);
+ return 0;
}
diff --git a/extras/thin-arbiter/setup-thin-arbiter.sh b/extras/thin-arbiter/setup-thin-arbiter.sh
new file mode 100755
index 00000000000..0681b30ef3f
--- /dev/null
+++ b/extras/thin-arbiter/setup-thin-arbiter.sh
@@ -0,0 +1,184 @@
+#!/bin/bash
+# Copyright (c) 2018-2019 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
+
+# This tool has been developed to setup thin-arbiter process on a node.
+# Seting up a thin arbiter process involves following files -
+# 1 - thin-arbiter.vol
+# Thin-arbiter (TA) process will use the graph in this file to load the
+# required translators.
+# 2 - gluster-ta-volume.service (generated by gluster-ta-volume.service.in)
+# TA process would be running as systemd service.
+#
+# TA process uses a location to save TA id files for every subvolume.
+# This location can be taken as input from user. Once provided and the
+# TA process is started on a node, it can not be changed using this
+# script or by any other mean. The same location should be used in
+# the gluster CLI when creating thin-arbiter volumes.
+
+MYPATH=`dirname $0`
+
+volloc="/var/lib/glusterd/thin-arbiter"
+mkdir -p $volloc
+
+if [ -f /etc/glusterfs/thin-arbiter.vol ]; then
+ volfile=/etc/glusterfs/thin-arbiter.vol
+else
+ volfile=$MYPATH/thin-arbiter.vol
+fi
+
+tafile="$volloc/thin-arbiter.vol"
+
+
+help () {
+ echo " "
+ echo ' This tool helps to setup thin-arbiter (TA) process on a node.
+ TA process uses a location to save TA id files for every subvolume.
+ This location can be taken as input from user. Once provided and the
+ TA process is started on a node, it can not be changed using this script
+ or by any other mean. The same location should be used in gluster CLI
+ when creating thin-arbiter volumes.
+
+ usage: setup-thin-arbiter.sh [-s] [-h]
+ options:
+ -s - Setup thin-arbiter file path and start process
+ -h - Show this help message and exit
+'
+}
+
+volfile_set_brick_path () {
+ while read -r line
+ do
+ dir=`echo "$line" | cut -d' ' -f 2`
+ if [ "$dir" = "directory" ]; then
+ bpath=`echo "$line" | cut -d' ' -f 3`
+ sed -i -- 's?'$bpath'?'$1'?g' $tafile
+ return
+ fi
+ done < $tafile
+}
+
+check_ta_proc () {
+ pro=`ps aux | grep thin-arbiter.vol | grep "volfile-id"`
+ if [ "${pro}" = '' ]; then
+ echo ""
+ else
+ curr_loc=`cat $volloc/thin-arbiter.vol | grep option | grep directory`
+ loc=`echo "${curr_loc##* }"`
+ echo "******************************************************"
+ echo "Error:"
+ echo "Thin-arbiter process is running with thin-arbiter path = $loc"
+ echo "Can not change TA path on this host now."
+ echo "$pro"
+ echo "******************************************************"
+ exit 1
+ fi
+}
+
+getpath () {
+ check_ta_proc
+ echo "******************************************************"
+ echo "User will be required to enter a path/folder for arbiter volume."
+ echo "Please note that this path will be used for ALL VOLUMES using this"
+ echo "node to host thin-arbiter. After setting, if a volume"
+ echo "has been created using this host and path then path for"
+ echo "thin-arbiter can not be changed "
+ echo "******************************************************"
+ echo " "
+ while true;
+ do
+ echo -n "Enter brick path for thin arbiter volumes: "
+ echo " "
+ read tapath
+ if [ "${tapath}" = '' ]; then
+ echo "Please enter valid path"
+ continue
+ else
+ echo "Entered brick path : $tapath "
+ echo "Please note that this brick path will be used for ALL"
+ echo "VOLUMES using this node to host thin-arbiter brick"
+ echo -n "Want to continue? (y/N): "
+ echo " "
+ read cont
+
+ if [ "${cont}" = 'N' ] || [ "${cont}" = 'n' ]; then
+ exit 0
+ else
+ break
+ fi
+ fi
+ done
+}
+
+setup () {
+ getpath
+ mkdir -p $tapath/.glusterfs/indices
+ if [ -d $tapath/.glusterfs/indices ]; then
+ echo " "
+ else
+ echo "Could not create $tapath/.glusterfs/indices directory, check provided ta path."
+ exit 1
+ fi
+
+ cp -f --backup --suffix=_old $volfile $volloc/thin-arbiter.vol
+ volfile_set_brick_path "$tapath"
+
+ echo "Directory path to be used for thin-arbiter volume is: $tapath"
+ echo " "
+ echo "========================================================"
+
+ if [ -f /usr/lib/systemd/system/gluster-ta-volume.service ]; then
+ echo "Starting thin-arbiter process"
+ else
+ cp $MYPATH/../systemd/gluster-ta-volume.service /etc/systemd/system/
+ echo "Starting thin-arbiter process"
+ chmod 0644 /etc/systemd/system/gluster-ta-volume.service
+ fi
+
+ systemctl daemon-reload
+ systemctl enable gluster-ta-volume
+ systemctl stop gluster-ta-volume
+ systemctl start gluster-ta-volume
+
+ if [ $? == 0 ]; then
+ echo "thin-arbiter process has been setup and running"
+ else
+ echo "Failed to setup thin arbiter"
+ exit 1
+ fi
+
+}
+
+main()
+{
+
+ if [ "$#" -ne 1 ]; then
+ help
+ exit 0
+ fi
+
+ while getopts "sh" opt; do
+ case $opt in
+ h)
+ help
+ exit 0
+ ;;
+ s)
+ setup
+ exit 0
+ ;;
+ *)
+ help
+ exit 0
+ ;;
+ esac
+ done
+}
+
+main "$@"
diff --git a/extras/thin-arbiter/thin-arbiter.vol b/extras/thin-arbiter/thin-arbiter.vol
new file mode 100644
index 00000000000..c76babc7b3c
--- /dev/null
+++ b/extras/thin-arbiter/thin-arbiter.vol
@@ -0,0 +1,57 @@
+volume ta-posix
+ type storage/posix
+ option directory /mnt/thin-arbiter
+end-volume
+
+volume ta-thin-arbiter
+ type features/thin-arbiter
+ subvolumes ta-posix
+end-volume
+
+volume ta-locks
+ type features/locks
+ option notify-contention yes
+ subvolumes ta-thin-arbiter
+end-volume
+
+volume ta-upcall
+ type features/upcall
+ option cache-invalidation off
+ subvolumes ta-locks
+end-volume
+
+volume ta-io-threads
+ type performance/io-threads
+ subvolumes ta-upcall
+end-volume
+
+volume ta-index
+ type features/index
+ option xattrop-pending-watchlist trusted.afr.ta-
+ option xattrop-dirty-watchlist trusted.afr.dirty
+ option index-base /mnt/thin-arbiter/.glusterfs/indices
+ subvolumes ta-io-threads
+end-volume
+
+volume /mnt/thin-arbiter
+ type debug/io-stats
+ option count-fop-hits off
+ option latency-measurement off
+ option unique-id /mnt/thin-arbiter
+ subvolumes ta-index
+end-volume
+
+volume ta-server
+ type protocol/server
+ option transport.listen-backlog 10
+ option transport.socket.keepalive-count 9
+ option transport.socket.keepalive-interval 2
+ option transport.socket.keepalive-time 20
+ option transport.tcp-user-timeout 0
+ option transport.socket.keepalive 1
+ option auth.addr./mnt/thin-arbiter.allow *
+ option auth-path /mnt/thin-arbiter
+ option transport.address-family inet
+ option transport-type tcp
+ subvolumes /mnt/thin-arbiter
+end-volume
diff --git a/extras/volfilter.py b/extras/volfilter.py
new file mode 100644
index 00000000000..5558a1beff4
--- /dev/null
+++ b/extras/volfilter.py
@@ -0,0 +1,168 @@
+# Copyright (c) 2010-2011 Red Hat, Inc.
+#
+# This file is part of HekaFS.
+#
+# HekaFS is free software: you can redistribute it and/or modify it under the
+# terms of the GNU General Public License, version 3, as published by the Free
+# Software Foundation.
+#
+# HekaFS is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+# A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License * along
+# with HekaFS. If not, see <http://www.gnu.org/licenses/>.
+
+from __future__ import print_function
+import copy
+import string
+import sys
+import types
+
+good_xlators = [
+ "cluster/afr",
+ "cluster/dht",
+ "cluster/distribute",
+ "cluster/replicate",
+ "cluster/stripe",
+ "debug/io-stats",
+ "features/access-control",
+ "features/locks",
+ "features/marker",
+ "features/uidmap",
+ "performance/io-threads",
+ "protocol/client",
+ "protocol/server",
+ "storage/posix",
+]
+
+def copy_stack (old_xl, suffix, recursive=False):
+ if recursive:
+ new_name = old_xl.name + "-" + suffix
+ else:
+ new_name = suffix
+ new_xl = Translator(new_name)
+ new_xl.type = old_xl.type
+ # The results with normal assignment here are . . . amusing.
+ new_xl.opts = copy.deepcopy(old_xl.opts)
+ for sv in old_xl.subvols:
+ new_xl.subvols.append(copy_stack(sv, suffix, True))
+ # Patch up the path at the bottom.
+ if new_xl.type == "storage/posix":
+ new_xl.opts["directory"] += ("/" + suffix)
+ return new_xl
+
+def cleanup (parent, graph):
+ if parent.type in good_xlators:
+ # Temporary fix so that HekaFS volumes can use the
+ # SSL-enabled multi-threaded socket transport.
+ if parent.type == "protocol/server":
+ parent.type = "protocol/server2"
+ parent.opts["transport-type"] = "ssl"
+ elif parent.type == "protocol/client":
+ parent.type = "protocol/client2"
+ parent.opts["transport-type"] = "ssl"
+ sv = []
+ for child in parent.subvols:
+ sv.append(cleanup(child, graph))
+ parent.subvols = sv
+ else:
+ parent = cleanup(parent.subvols[0], graph)
+ return parent
+
+class Translator:
+ def __init__ (self, name):
+ self.name = name
+ self.type = ""
+ self.opts = {}
+ self.subvols = []
+ self.dumped = False
+ def __repr__ (self):
+ return "<Translator %s>" % self.name
+
+def load (path):
+ # If it's a string, open it; otherwise, assume it's already a
+ # file-like object (most notably from urllib*).
+ if type(path) in (str,):
+ fp = file(path, "r")
+ else:
+ fp = path
+ all_xlators = {}
+ xlator = None
+ last_xlator = None
+ while True:
+ text = fp.readline()
+ if text == "":
+ break
+ text = text.split()
+ if not len(text):
+ continue
+ if text[0] == "volume":
+ if xlator:
+ raise RuntimeError("nested volume definition")
+ xlator = Translator(text[1])
+ continue
+ if not xlator:
+ raise RuntimeError("text outside volume definition")
+ if text[0] == "type":
+ xlator.type = text[1]
+ continue
+ if text[0] == "option":
+ xlator.opts[text[1]] = ''.join(text[2:])
+ continue
+ if text[0] == "subvolumes":
+ for sv in text[1:]:
+ xlator.subvols.append(all_xlators[sv])
+ continue
+ if text[0] == "end-volume":
+ all_xlators[xlator.name] = xlator
+ last_xlator = xlator
+ xlator = None
+ continue
+ raise RuntimeError("unrecognized keyword %s" % text[0])
+ if xlator:
+ raise RuntimeError("unclosed volume definition")
+ return all_xlators, last_xlator
+
+def generate (graph, last, stream=sys.stdout):
+ for sv in last.subvols:
+ if not sv.dumped:
+ generate(graph, sv, stream)
+ print("", file=stream)
+ sv.dumped = True
+ print("volume %s" % last.name, file=stream)
+ print(" type %s" % last.type, file=stream)
+ for k, v in last.opts.items():
+ print(" option %s %s" % (k, v), file=stream)
+ if last.subvols:
+ print(" subvolumes %s" % ''.join(
+ [ sv.name for sv in last.subvols ]), file=stream)
+ print("end-volume", file=stream)
+
+def push_filter (graph, old_xl, filt_type, opts={}):
+ suffix = "-" + old_xl.type.split("/")[1]
+ if len(old_xl.name) > len(suffix):
+ if old_xl.name[-len(suffix):] == suffix:
+ old_xl.name = old_xl.name[:-len(suffix)]
+ new_xl = Translator(old_xl.name+suffix)
+ new_xl.type = old_xl.type
+ new_xl.opts = old_xl.opts
+ new_xl.subvols = old_xl.subvols
+ graph[new_xl.name] = new_xl
+ old_xl.name += ("-" + filt_type.split("/")[1])
+ old_xl.type = filt_type
+ old_xl.opts = opts
+ old_xl.subvols = [new_xl]
+ graph[old_xl.name] = old_xl
+
+def delete (graph, victim):
+ if len(victim.subvols) != 1:
+ raise RuntimeError("attempt to delete non-unary translator")
+ for xl in graph.itervalues():
+ while xl.subvols.count(victim):
+ i = xl.subvols.index(victim)
+ xl.subvols[i] = victim.subvols[0]
+
+if __name__ == "__main__":
+ graph, last = load(sys.argv[1])
+ generate(graph, last)
diff --git a/extras/who-wrote-glusterfs/gitdm.aliases b/extras/who-wrote-glusterfs/gitdm.aliases
new file mode 100644
index 00000000000..901c12418e3
--- /dev/null
+++ b/extras/who-wrote-glusterfs/gitdm.aliases
@@ -0,0 +1,58 @@
+#
+# This is the email aliases file, mapping secondary addresses onto a single,
+# canonical address. This file should probably match the contents of .mailmap
+# in the root of the git repository.
+#
+# Format: <alias> <real>
+
+amar@gluster.com amarts@redhat.com
+amar@del.gluster.com amarts@redhat.com
+avati@amp.gluster.com avati@redhat.com
+avati@blackhole.gluster.com avati@redhat.com
+avati@dev.gluster.com avati@redhat.com
+avati@gluster.com avati@redhat.com
+wheelear@gmail.com awheeler@redhat.com
+anush@gluster.com ashetty@redhat.com
+csaba@gluster.com csaba@redhat.com
+csaba@lowlife.hu csaba@redhat.com
+csaba@zresearch.com csaba@redhat.com
+gd@samba.org gd@redhat.com
+harsha@gluster.com fharshav@redhat.com
+harsha@zresearch.com fharshav@redhat.com
+harsha@dev.gluster.com fharshav@redhat.com
+harsha@harshavardhana.net fharshav@redhat.com
+jclift@redhat.com jclift@gluster.org
+kkeithle@linux.keithley.org kkeithle@redhat.com
+kkeithle@f16node1.kkeithle.usersys.redhat.com kkeithle@redhat.com
+kaushal@gluster.com kaushal@redhat.com
+kaushikbv@gluster.com kbudiger@redhat.com
+krishna@gluster.com ksriniva@redhat.com
+krishna@zresearch.com ksriniva@redhat.com
+krishna@guest-laptop ksriniva@redhat.com
+kp@gluster.com kparthas@redhat.com
+me@louiszuckerman.com louiszuckerman@gmail.com
+msvbhat@gmail.com vbhat@redhat.com
+nullpai@gmail.com ppai@redhat.com
+vishwanath@gluster.com vbhat@redhat.com
+obnox@samba.org madam@redhat.com
+oleksandr@natalenko.name o.natalenko@lanet.ua
+patrick@puiterwijk.org puiterwijk@fedoraproject.org
+pavan@dev.gluster.com pavan@gluster.com
+zaitcev@yahoo.com zaitcev@kotori.zaitcev.us
+pranithk@gluster.com pkarampu@redhat.com
+raghavendrabhat@gluster.com raghavendra@redhat.com
+raghavendra@gluster.com rgowdapp@redhat.com
+raghavendra@zresearch.com rgowdapp@redhat.com
+rahulcssjce@gmail.com rahulcs@redhat.com
+rajesh@gluster.com rajesh@redhat.com
+rajesh.amaravathi@gmail.com rajesh@redhat.com
+root@ravi2.(none) ravishankar@redhat.com
+sabansal@localhost.localdomain sabansal@redhat.com
+shehjart@zresearch.com shehjart@gluster.com
+venky@gluster.com vshankar@redhat.com
+vijay@gluster.com vbellur@redhat.com
+vijay@dev.gluster.com vbellur@redhat.com
+vijaykumar.koppad@gmail.com vkoppad@redhat.com
+vikas@zresearch.com vikas@gluster.com
+shishirng@gluster.com sgowda@redhat.com
+potatogim@potatogim.net potatogim@gluesys.com
diff --git a/extras/who-wrote-glusterfs/gitdm.config b/extras/who-wrote-glusterfs/gitdm.config
new file mode 100644
index 00000000000..e1ff2bd5bda
--- /dev/null
+++ b/extras/who-wrote-glusterfs/gitdm.config
@@ -0,0 +1,8 @@
+#
+# This is the gitdm configuration file for GlusterFS.
+# See the gitdm.config in the gitdm repositofy for additional options and
+# comments.
+#
+
+EmailAliases gitdm.aliases
+EmailMap gitdm.domain-map
diff --git a/extras/who-wrote-glusterfs/gitdm.domain-map b/extras/who-wrote-glusterfs/gitdm.domain-map
new file mode 100644
index 00000000000..7cd2bbd605b
--- /dev/null
+++ b/extras/who-wrote-glusterfs/gitdm.domain-map
@@ -0,0 +1,29 @@
+#
+# Here is a set of mappings of domain names onto employer names.
+#
+active.by ActiveCloud
+appeartv.com Appear TV
+cern.ch CERN
+cmss.chinamobile.com China Mobile(Suzhou) Software Technology
+datalab.es DataLab S.L.
+fb.com Facebook
+fedoraproject.org Fedora Project
+gluster.com Red Hat
+gmail.com (personal contributions)
+gooddata.com GoodData
+hastexo.com hastexo
+horde.com (personal contributions)
+ibm.com IBM
+io.com IO
+lanet.ua Lanet Network
+linbit.com LINBIT
+nectec.or.th NECTEC
+netbsd.org NetBSD
+netdirect.ca Net Direct
+nokia.com Nokia
+redhat.com Red Hat
+stepping-stone.ch stepping stone GmbH
+xtaotech.com XTAO Co.
+yahoo.in (personal contributions)
+zresearch.com Red Hat
+gluesys.com Gluesys
diff --git a/extras/who-wrote-glusterfs/who-wrote-glusterfs.sh b/extras/who-wrote-glusterfs/who-wrote-glusterfs.sh
new file mode 100755
index 00000000000..70d5964c576
--- /dev/null
+++ b/extras/who-wrote-glusterfs/who-wrote-glusterfs.sh
@@ -0,0 +1,50 @@
+#!/bin/bash
+#
+# Gather statistics on "Who wrote GlusterFS". The idea comes from the excellent
+# articles on http://lwn.net/ named "Who wrote <linux-version>?".
+#
+# gitdm comes from git://git.lwn.net/gitdm.git by Jonathan Corbet.
+#
+# Confguration files used:
+# - gitdm.config: main configuration file, pointing to the others
+# - gitdm.aliases: merge users with different emailaddresses into one
+# - gitdm.domain-map: map domain names from emailaddresses to companies
+#
+
+DIRNAME=$(dirname $0)
+
+GITDM_REPO=git://git.lwn.net/gitdm.git
+GITDM_DIR=${DIRNAME}/gitdm
+GITDM_CMD="python ${GITDM_DIR}/gitdm"
+
+error()
+{
+ local ret=${?}
+ printf "${@}\n" > /dev/stderr
+ return ${ret}
+}
+
+check_gitdm()
+{
+ if [ ! -e "${GITDM_DIR}/gitdm" ]
+ then
+ git clone --quiet ${GITDM_REPO} ${DIRNAME}/gitdm
+ fi
+}
+
+# The first argument is the revision-range (see 'git rev-list --help').
+# REV can be empty, and the statistics will be calculated over the whole
+# current branch.
+REV=${1}
+shift
+# all remaining options are passed to gitdm, see the gitdm script for an
+# explanation of the accepted options.
+GITDM_OPTS=${@}
+
+if ! check_gitdm
+then
+ error "Could not find 'gitdm', exiting..."
+ exit 1
+fi
+
+git log --numstat -M ${REV} | ${GITDM_CMD} -b ${DIRNAME} -n ${GITDM_OPTS}
diff --git a/geo-replication/Makefile.am b/geo-replication/Makefile.am
new file mode 100644
index 00000000000..591b23d0eaf
--- /dev/null
+++ b/geo-replication/Makefile.am
@@ -0,0 +1,8 @@
+SUBDIRS = syncdaemon src
+
+CLEANFILES =
+
+EXTRA_DIST = gsyncd.conf.in
+
+gsyncdconfdir = $(sysconfdir)/glusterfs/
+gsyncdconf_DATA = gsyncd.conf
diff --git a/geo-replication/gsyncd.conf.in b/geo-replication/gsyncd.conf.in
new file mode 100644
index 00000000000..9688c79fab7
--- /dev/null
+++ b/geo-replication/gsyncd.conf.in
@@ -0,0 +1,349 @@
+[__meta__]
+version = 4.0
+
+[master-bricks]
+configurable=false
+
+[slave-bricks]
+configurable=false
+
+[master-volume-id]
+configurable=false
+
+[slave-volume-id]
+configurable=false
+
+[master-replica-count]
+configurable=false
+type=int
+value=1
+
+[master-disperse-count]
+configurable=false
+type=int
+value=1
+
+[master-distribution-count]
+configurable=false
+type=int
+value=1
+
+[glusterd-workdir]
+value = @GLUSTERD_WORKDIR@
+
+[gluster-logdir]
+value = /var/log/glusterfs
+
+[gluster-rundir]
+value = /var/run/gluster
+
+[gsyncd-miscdir]
+value = /var/lib/misc/gluster/gsyncd
+
+[stime-xattr-prefix]
+value=
+
+[checkpoint]
+value=0
+help=Set Checkpoint
+validation=unixtime
+type=int
+
+[gluster-cli-options]
+value=
+help=Gluster CLI Options
+
+[pid-file]
+value=${gluster_rundir}/gsyncd-${master}-${primary_slave_host}-${slavevol}.pid
+configurable=false
+template = true
+help=PID file path
+
+[state-file]
+value=${glusterd_workdir}/geo-replication/${master}_${primary_slave_host}_${slavevol}/monitor.status
+configurable=false
+template=true
+help=Status File path
+
+[georep-session-working-dir]
+value=${glusterd_workdir}/geo-replication/${master}_${primary_slave_host}_${slavevol}/
+template=true
+help=Session Working directory
+configurable=false
+
+[access-mount]
+value=false
+type=bool
+validation=bool
+help=Do not lazy unmount the master volume. This allows admin to access the mount for debugging.
+
+[slave-access-mount]
+value=false
+type=bool
+validation=bool
+help=Do not lazy unmount the slave volume. This allows admin to access the mount for debugging.
+
+[isolated-slaves]
+value=
+help=List of Slave nodes which are isolated
+
+[changelog-batch-size]
+# Max size of Changelogs to process per batch, Changelogs Processing is
+# not limited by the number of changelogs but instead based on
+# size of the changelog file, One sample changelog file size was 145408
+# with ~1000 CREATE and ~1000 DATA. 5 such files in one batch is 727040
+# If geo-rep worker crashes while processing a batch, it has to retry only
+# that batch since stime will get updated after each batch.
+value=727040
+help=Max size of Changelogs to process per batch.
+type=int
+
+[slave-timeout]
+value=120
+type=int
+help=Timeout in seconds for Slave Gsyncd. If no activity from master for this timeout, Slave gsyncd will be disconnected. Set Timeout to zero to skip this check.
+
+[connection-timeout]
+value=60
+type=int
+help=Timeout for mounts
+
+[replica-failover-interval]
+value=1
+type=int
+help=Minimum time interval in seconds for passive worker to become Active
+
+[changelog-archive-format]
+value=%Y%m
+help=Processed changelogs will be archived in working directory. Pattern for archive file
+
+[use-meta-volume]
+value=false
+type=bool
+help=Use this to set Active Passive mode to meta-volume.
+
+[meta-volume-mnt]
+value=/run/gluster/shared_storage
+help=Meta Volume or Shared Volume mount path
+
+[allow-network]
+value=
+
+[change-interval]
+value=5
+type=int
+
+[sync-method]
+value=rsync
+help=Sync method for data sync. Available methods are tar over ssh and rsync. Default is rsync.
+validation=choice
+allowed_values=tarssh,rsync
+
+[remote-gsyncd]
+value =
+help=If SSH keys are not secured with gsyncd prefix then use this configuration to set the actual path of gsyncd(Usually /usr/libexec/glusterfs/gsyncd)
+
+[gluster-command-dir]
+value=@SBIN_DIR@
+help=Directory where Gluster binaries exist on master
+
+[slave-gluster-command-dir]
+value=@SBIN_DIR@
+help=Directory where Gluster binaries exist on slave
+
+[gluster-params]
+value = aux-gfid-mount acl
+help=Parameters for Gluster Geo-rep mount in Master
+
+[slave-gluster-params]
+value = aux-gfid-mount acl
+help=Parameters for Gluster Geo-rep mount in Slave
+
+[ignore-deletes]
+value = false
+type=bool
+help=Do not sync deletes in Slave
+
+[special-sync-mode]
+# tunables for failover/failback mechanism:
+# None - gsyncd behaves as normal
+# blind - gsyncd works with xtime pairs to identify
+# candidates for synchronization
+# wrapup - same as normal mode but does not assign
+# xtimes to orphaned files
+# see crawl() for usage of the above tunables
+value =
+help=
+
+[gfid-conflict-resolution]
+value = true
+validation=bool
+type=bool
+help=Disables automatic gfid conflict resolution while syncing
+
+[working-dir]
+value = ${gsyncd_miscdir}/${master}_${primary_slave_host}_${slavevol}/
+template=true
+configurable=false
+help=Working directory for storing Changelogs
+
+[change-detector]
+value=changelog
+help=Change detector
+validation=choice
+allowed_values=changelog,xsync
+
+[cli-log-file]
+value=${gluster_logdir}/geo-replication/cli.log
+template=true
+configurable=false
+
+[cli-log-level]
+value=INFO
+help=Set CLI Log Level
+validation=choice
+allowed_values=ERROR,INFO,WARNING,DEBUG
+
+[log-file]
+value=${gluster_logdir}/geo-replication/${master}_${primary_slave_host}_${slavevol}/gsyncd.log
+configurable=false
+template=true
+
+[changelog-log-file]
+value=${gluster_logdir}/geo-replication/${master}_${primary_slave_host}_${slavevol}/changes-${local_id}.log
+configurable=false
+template=true
+
+[gluster-log-file]
+value=${gluster_logdir}/geo-replication/${master}_${primary_slave_host}_${slavevol}/mnt-${local_id}.log
+template=true
+configurable=false
+
+[slave-log-file]
+value=${gluster_logdir}/geo-replication-slaves/${master}_${primary_slave_host}_${slavevol}/gsyncd.log
+template=true
+configurable=false
+
+[slave-gluster-log-file]
+value=${gluster_logdir}/geo-replication-slaves/${master}_${primary_slave_host}_${slavevol}/mnt-${master_node}-${master_brick_id}.log
+template=true
+configurable=false
+
+[slave-gluster-log-file-mbr]
+value=${gluster_logdir}/geo-replication-slaves/${master}_${primary_slave_host}_${slavevol}/mnt-mbr-${master_node}-${master_brick_id}.log
+template=true
+configurable=false
+
+[log-level]
+value=INFO
+help=Set Log Level
+validation=choice
+allowed_values=ERROR,INFO,WARNING,DEBUG
+
+[gluster-log-level]
+value=INFO
+help=Set Gluster mount Log Level
+validation=choice
+allowed_values=ERROR,INFO,WARNING,DEBUG
+
+[changelog-log-level]
+value=INFO
+help=Set Changelog Log Level
+validation=choice
+allowed_values=ERROR,INFO,WARNING,DEBUG
+
+[slave-log-level]
+value=INFO
+help=Set Slave Gsyncd Log Level
+validation=choice
+allowed_values=ERROR,INFO,WARNING,DEBUG
+
+[slave-gluster-log-level]
+value=INFO
+help=Set Slave Gluster mount Log Level
+validation=choice
+allowed_values=ERROR,INFO,WARNING,DEBUG
+
+[ssh-port]
+value=22
+validation=minmax
+min=1
+max=65535
+help=Set SSH port
+type=int
+
+[ssh-command]
+value=ssh
+help=Set ssh binary path
+validation=execpath
+
+[tar-command]
+value=tar
+help=Set tar command path
+validation=execpath
+
+[ssh-options]
+value = -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i ${glusterd_workdir}/geo-replication/secret.pem
+template=true
+
+[ssh-options-tar]
+value = -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i ${glusterd_workdir}/geo-replication/tar_ssh.pem
+template=true
+
+[gluster-command]
+value=gluster
+help=Set gluster binary path
+validation=execpath
+
+[sync-jobs]
+value=3
+help=Number of Syncer jobs
+validation=minmax
+min=1
+max=100
+type=int
+
+[rsync-command]
+value=rsync
+help=Set rsync command path
+validation=execpath
+
+[rsync-options]
+value=
+
+[rsync-ssh-options]
+value=
+
+[rsync-opt-ignore-missing-args]
+value=true
+type=bool
+
+[rsync-opt-existing]
+value=true
+type=bool
+
+[log-rsync-performance]
+value=false
+help=Log Rsync performance
+validation=bool
+type=bool
+
+[use-rsync-xattrs]
+value=false
+type=bool
+
+[sync-xattrs]
+value=true
+type=bool
+
+[sync-acls]
+value=true
+type=bool
+
+[max-rsync-retries]
+value=10
+type=int
+
+[state_socket_unencoded]
+# Unused, For backward compatibility
+value=
diff --git a/geo-replication/setup.py b/geo-replication/setup.py
new file mode 100644
index 00000000000..0eae469d2d6
--- /dev/null
+++ b/geo-replication/setup.py
@@ -0,0 +1,32 @@
+#
+# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+"""
+This setup.py only used to run tests, since geo-replication will
+be installed in /usr/local/libexec/glusterfs or /usr/libexec/glusterfs
+"""
+from setuptools import setup
+
+name = 'syncdaemon'
+
+setup(
+ name=name,
+ version="",
+ description='GlusterFS Geo Replication',
+ license='GPLV2 and LGPLV3+',
+ author='Red Hat, Inc.',
+ author_email='gluster-devel@gluster.org',
+ url='http://www.gluster.org',
+ packages=[name, ],
+ test_suite='nose.collector',
+ install_requires=[],
+ scripts=[],
+ entry_points={},
+)
diff --git a/geo-replication/src/Makefile.am b/geo-replication/src/Makefile.am
new file mode 100644
index 00000000000..9937a0bd026
--- /dev/null
+++ b/geo-replication/src/Makefile.am
@@ -0,0 +1,48 @@
+gsyncddir = $(GLUSTERFS_LIBEXECDIR)
+
+gsyncd_SCRIPTS = gverify.sh peer_gsec_create \
+ set_geo_rep_pem_keys.sh peer_mountbroker peer_mountbroker.py \
+ peer_georep-sshkey.py
+
+# peer_gsec_create and peer_add_secret_pub are not added to
+# EXTRA_DIST as it's derived from a .in file
+EXTRA_DIST = gverify.sh set_geo_rep_pem_keys.sh peer_mountbroker.py.in \
+ peer_georep-sshkey.py.in
+
+gsyncd_PROGRAMS = gsyncd
+
+gsyncd_SOURCES = gsyncd.c procdiggy.c
+
+gsyncd_LDADD = $(GF_LDADD) $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+gsyncd_LDFLAGS = $(GF_LDFLAGS)
+
+noinst_HEADERS = procdiggy.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \
+ -DGSYNCD_PREFIX=\"$(GLUSTERFS_LIBEXECDIR)\" -DUSE_LIBGLUSTERFS \
+ -DSBIN_DIR=\"$(sbindir)\" -DPYTHON=\"$(PYTHON)\"
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
+
+$(top_builddir)/libglusterfs/src/libglusterfs.la:
+ $(MAKE) -C $(top_builddir)/libglusterfs/src/ all
+
+
+install-exec-hook:
+ $(mkdir_p) $(DESTDIR)$(sbindir)
+ rm -f $(DESTDIR)$(sbindir)/gluster-mountbroker
+ ln -s $(GLUSTERFS_LIBEXECDIR)/peer_mountbroker.py \
+ $(DESTDIR)$(sbindir)/gluster-mountbroker
+
+ rm -f $(DESTDIR)$(sbindir)/gluster-georep-sshkey
+ ln -s $(GLUSTERFS_LIBEXECDIR)/peer_georep-sshkey.py \
+ $(DESTDIR)$(sbindir)/gluster-georep-sshkey
+
+
+uninstall-hook:
+ rm -f $(DESTDIR)$(sbindir)/gluster-mountbroker
+ rm -f $(DESTDIR)$(sbindir)/gluster-georep-sshkey
diff --git a/geo-replication/src/gsyncd.c b/geo-replication/src/gsyncd.c
new file mode 100644
index 00000000000..b5aeec5bf33
--- /dev/null
+++ b/geo-replication/src/gsyncd.c
@@ -0,0 +1,402 @@
+/*
+ Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <glusterfs/compat.h>
+#include <glusterfs/syscall.h>
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/param.h> /* for PATH_MAX */
+
+/* NOTE (USE_LIBGLUSTERFS):
+ * ------------------------
+ * When USE_LIBGLUSTERFS debugging sumbol is passed; perform
+ * glusterfs translator like initialization so that glusterfs
+ * globals, contexts are valid when glustefs api's are invoked.
+ * We unconditionally pass then while building gsyncd binary.
+ */
+#ifdef USE_LIBGLUSTERFS
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/globals.h>
+#include <glusterfs/defaults.h>
+#endif
+
+#include <glusterfs/common-utils.h>
+#include <glusterfs/run.h>
+#include "procdiggy.h"
+
+#define _GLUSTERD_CALLED_ "_GLUSTERD_CALLED_"
+#define _GSYNCD_DISPATCHED_ "_GSYNCD_DISPATCHED_"
+#define GSYNCD_CONF_TEMPLATE "geo-replication/gsyncd_template.conf"
+#define GSYNCD_PY "gsyncd.py"
+#define RSYNC "rsync"
+
+int restricted = 0;
+
+static int
+duplexpand(void **buf, size_t tsiz, size_t *len)
+{
+ size_t osiz = tsiz * *len;
+ char *p = realloc(*buf, osiz << 1);
+ if (!p) {
+ return -1;
+ }
+
+ memset(p + osiz, 0, osiz);
+ *buf = p;
+ *len <<= 1;
+
+ return 0;
+}
+
+static int
+str2argv(char *str, char ***argv)
+{
+ char *p = NULL;
+ char *savetok = NULL;
+ char *temp = NULL;
+ char *temp1 = NULL;
+ int argc = 0;
+ size_t argv_len = 32;
+ int ret = 0;
+ int i = 0;
+
+ assert(str);
+ temp = str = strdup(str);
+ if (!str)
+ goto error;
+
+ *argv = calloc(argv_len, sizeof(**argv));
+ if (!*argv)
+ goto error;
+
+ while ((p = strtok_r(str, " ", &savetok))) {
+ str = NULL;
+
+ argc++;
+ if (argc == argv_len) {
+ ret = duplexpand((void *)argv, sizeof(**argv), &argv_len);
+ if (ret == -1)
+ goto error;
+ }
+ temp1 = strdup(p);
+ if (!temp1)
+ goto error;
+ (*argv)[argc - 1] = temp1;
+ }
+
+ free(temp);
+ return argc;
+
+error:
+ fprintf(stderr, "out of memory\n");
+ free(temp);
+ for (i = 0; i < argc - 1; i++)
+ free((*argv)[i]);
+ free(*argv);
+ return -1;
+}
+
+static int
+invoke_gsyncd(int argc, char **argv)
+{
+ int i = 0;
+ int j = 0;
+ char *nargv[argc + 4];
+ char *python = NULL;
+
+ if (chdir("/") == -1)
+ goto error;
+
+ j = 0;
+ python = getenv("PYTHON");
+ if (!python)
+ python = PYTHON;
+ nargv[j++] = python;
+ nargv[j++] = GSYNCD_PREFIX "/python/syncdaemon/" GSYNCD_PY;
+ for (i = 1; i < argc; i++)
+ nargv[j++] = argv[i];
+
+ nargv[j++] = NULL;
+
+ execvp(python, nargv);
+
+ fprintf(stderr, "exec of '%s' failed\n", python);
+ return 127;
+
+error:
+ fprintf(stderr, "gsyncd initializaion failed\n");
+ return 1;
+}
+
+static int
+find_gsyncd(pid_t pid, pid_t ppid, char *name, void *data)
+{
+ char buf[NAME_MAX * 2] = {
+ 0,
+ };
+ char path[PATH_MAX] = {
+ 0,
+ };
+ char *p = NULL;
+ int zeros = 0;
+ int ret = 0;
+ int fd = -1;
+ pid_t *pida = (pid_t *)data;
+
+ if (ppid != pida[0])
+ return 0;
+
+ snprintf(path, sizeof path, PROC "/%d/cmdline", pid);
+ fd = open(path, O_RDONLY);
+ if (fd == -1)
+ return 0;
+ ret = sys_read(fd, buf, sizeof(buf));
+ sys_close(fd);
+ if (ret == -1)
+ return 0;
+ for (zeros = 0, p = buf; zeros < 2 && p < buf + ret; p++)
+ zeros += !*p;
+
+ ret = 0;
+ switch (zeros) {
+ case 2:
+ if ((strcmp(basename(buf), basename(PYTHON)) ||
+ strcmp(basename(buf + strlen(buf) + 1), GSYNCD_PY)) == 0) {
+ ret = 1;
+ break;
+ }
+ /* fallthrough */
+ case 1:
+ if (strcmp(basename(buf), GSYNCD_PY) == 0)
+ ret = 1;
+ }
+
+ if (ret == 1) {
+ if (pida[1] != -1) {
+ fprintf(stderr, GSYNCD_PY " sibling is not unique");
+ return -1;
+ }
+ pida[1] = pid;
+ }
+
+ return 0;
+}
+
+static int
+invoke_rsync(int argc, char **argv)
+{
+ int i = 0;
+ char path[PATH_MAX] = {
+ 0,
+ };
+ pid_t pid = -1;
+ pid_t ppid = -1;
+ pid_t pida[] = {-1, -1};
+ char *name = NULL;
+ char buf[PATH_MAX + 1] = {
+ 0,
+ };
+ int ret = 0;
+
+ assert(argv[argc] == NULL);
+
+ if (argc < 2 || strcmp(argv[1], "--server") != 0)
+ goto error;
+
+ for (i = 2; i < argc && argv[i][0] == '-'; i++)
+ ;
+
+ if (!(i == argc - 2 && strcmp(argv[i], ".") == 0 &&
+ argv[i + 1][0] == '/')) {
+ fprintf(stderr, "need an rsync invocation without protected args\n");
+ goto error;
+ }
+
+ /* look up sshd we are spawned from */
+ for (pid = getpid();; pid = ppid) {
+ ppid = pidinfo(pid, &name);
+ if (ppid < 0) {
+ fprintf(stderr, "sshd ancestor not found\n");
+ goto error;
+ }
+ if (strcmp(name, "sshd") == 0) {
+ GF_FREE(name);
+ break;
+ }
+ GF_FREE(name);
+ }
+ /* look up "ssh-sibling" gsyncd */
+ pida[0] = pid;
+ ret = prociter(find_gsyncd, pida);
+ if (ret == -1 || pida[1] == -1) {
+ fprintf(stderr, "gsyncd sibling not found\n");
+ goto error;
+ }
+ /* check if rsync target matches gsyncd target */
+ snprintf(path, sizeof path, PROC "/%d/cwd", pida[1]);
+ ret = sys_readlink(path, buf, sizeof(buf));
+ if (ret == -1 || ret == sizeof(buf))
+ goto error;
+ if (strcmp(argv[argc - 1], "/") == 0 /* root dir cannot be a target */ ||
+ (strcmp(argv[argc - 1], path) /* match against gluster target */ &&
+ strcmp(argv[argc - 1], buf) /* match against file target */) != 0) {
+ fprintf(stderr, "rsync target does not match " GEOREP " session\n");
+ goto error;
+ }
+
+ argv[0] = RSYNC;
+
+ execvp(RSYNC, argv);
+
+ fprintf(stderr, "exec of " RSYNC " failed\n");
+ return 127;
+
+error:
+ fprintf(stderr, "disallowed " RSYNC " invocation\n");
+ return 1;
+}
+
+static int
+invoke_gluster(int argc, char **argv)
+{
+ int i = 0;
+ int j = 0;
+ int optsover = 0;
+ char *ov = NULL;
+
+ for (i = 1; i < argc; i++) {
+ ov = strtail(argv[i], "--");
+ if (ov && !optsover) {
+ if (*ov == '\0')
+ optsover = 1;
+ continue;
+ }
+ switch (++j) {
+ case 1:
+ if (strcmp(argv[i], "volume") != 0)
+ goto error;
+ break;
+ case 2:
+ if (strcmp(argv[i], "info") != 0)
+ goto error;
+ break;
+ case 3:
+ break;
+ default:
+ goto error;
+ }
+ }
+
+ argv[0] = "gluster";
+ execvp(SBIN_DIR "/gluster", argv);
+ fprintf(stderr, "exec of gluster failed\n");
+ return 127;
+
+error:
+ fprintf(stderr, "disallowed gluster invocation\n");
+ return 1;
+}
+
+struct invocable {
+ char *name;
+ int (*invoker)(int argc, char **argv);
+};
+
+struct invocable invocables[] = {{"rsync", invoke_rsync},
+ {"gsyncd", invoke_gsyncd},
+ {"gluster", invoke_gluster},
+ {NULL, NULL}};
+
+int
+main(int argc, char **argv)
+{
+ int ret = -1;
+ char *evas = NULL;
+ struct invocable *i = NULL;
+ char *b = NULL;
+ char *sargv = NULL;
+ int j = 0;
+
+#ifdef USE_LIBGLUSTERFS
+ glusterfs_ctx_t *ctx = NULL;
+
+ ctx = glusterfs_ctx_new();
+ if (!ctx)
+ return ENOMEM;
+
+ if (glusterfs_globals_init(ctx))
+ return 1;
+
+ THIS->ctx = ctx;
+ ret = default_mem_acct_init(THIS);
+ if (ret) {
+ fprintf(stderr, "internal error: mem accounting failed\n");
+ return 1;
+ }
+#endif
+
+ evas = getenv(_GLUSTERD_CALLED_);
+ if (evas && strcmp(evas, "1") == 0)
+ /* OK, we know glusterd called us, no need to look for further config
+ *...although this conclusion should not inherit to our children
+ */
+ unsetenv(_GLUSTERD_CALLED_);
+ else {
+ /* we regard all gsyncd invocations unsafe
+ * that do not come from glusterd and
+ * therefore restrict it
+ */
+ restricted = 1;
+
+ if (!getenv(_GSYNCD_DISPATCHED_)) {
+ evas = getenv("SSH_ORIGINAL_COMMAND");
+ if (evas)
+ sargv = evas;
+ else {
+ evas = getenv("SHELL");
+ if (evas && strcmp(basename(evas), "gsyncd") == 0 &&
+ argc == 3 && strcmp(argv[1], "-c") == 0)
+ sargv = argv[2];
+ }
+ }
+ }
+
+ if (!(sargv && restricted))
+ return invoke_gsyncd(argc, argv);
+
+ argc = str2argv(sargv, &argv);
+
+ if (argc == -1) {
+ fprintf(stderr, "internal error\n");
+ return 1;
+ }
+
+ if (setenv(_GSYNCD_DISPATCHED_, "1", 1) == -1) {
+ fprintf(stderr, "internal error\n");
+ goto out;
+ }
+
+ b = basename(argv[0]);
+ for (i = invocables; i->name; i++) {
+ if (strcmp(b, i->name) == 0)
+ return i->invoker(argc, argv);
+ }
+
+ fprintf(stderr, "invoking %s in restricted SSH session is not allowed\n",
+ b);
+
+out:
+ for (j = 1; j < argc; j++)
+ free(argv[j]);
+ free(argv);
+ return 1;
+}
diff --git a/geo-replication/src/gverify.sh b/geo-replication/src/gverify.sh
new file mode 100755
index 00000000000..f5f70d245e0
--- /dev/null
+++ b/geo-replication/src/gverify.sh
@@ -0,0 +1,276 @@
+#!/bin/bash
+
+# Script to verify the Master and Slave Gluster compatibility.
+# To use ./gverify <master volume> <slave user> <slave host> <slave volume> <ssh port> <log file>
+# Returns 0 if master and slave compatible.
+
+# Considering buffer_size 100MB
+BUFFER_SIZE=104857600;
+SSH_PORT=$5;
+master_log_file=`gluster --print-logdir`/geo-replication/gverify-mastermnt.log
+slave_log_file=`gluster --print-logdir`/geo-replication/gverify-slavemnt.log
+
+function SSHM()
+{
+ if [[ -z "${GR_SSH_IDENTITY_KEY}" ]]; then
+ ssh -p ${SSH_PORT} -q \
+ -oPasswordAuthentication=no \
+ -oStrictHostKeyChecking=no \
+ -oControlMaster=yes \
+ "$@";
+ else
+ ssh -p ${SSH_PORT} -i ${GR_SSH_IDENTITY_KEY} -q \
+ -oPasswordAuthentication=no \
+ -oStrictHostKeyChecking=no \
+ -oControlMaster=yes \
+ "$@";
+ fi
+}
+
+function get_inode_num()
+{
+ local os
+ case `uname -s` in
+ NetBSD) os="NetBSD";;
+ Linux) os="Linux";;
+ *) os="Default";;
+ esac
+
+ if [[ "X$os" = "XNetBSD" ]]; then
+ echo $(stat -f "%i" "$1")
+ else
+ echo $(stat -c "%i" "$1")
+ fi
+}
+
+function umount_lazy()
+{
+ local os
+ case `uname -s` in
+ NetBSD) os="NetBSD";;
+ Linux) os="Linux";;
+ *) os="Default";;
+ esac
+
+ if [[ "X$os" = "XNetBSD" ]]; then
+ umount -f -R "$1"
+ else
+ umount -l "$1"
+ fi;
+}
+
+function disk_usage()
+{
+ local os
+ case `uname -s` in
+ NetBSD) os="NetBSD";;
+ Linux) os="Linux";;
+ *) os="Default";;
+ esac
+
+ if [[ "X$os" = "XNetBSD" ]]; then
+ echo $(df -P "$1" | tail -1)
+ else
+ echo $(df -P -B1 "$1" | tail -1)
+ fi;
+
+}
+
+function cmd_slave()
+{
+ local cmd_line;
+ cmd_line=$(cat <<EOF
+function do_verify() {
+ver=\$(gluster --version | head -1 | cut -f2 -d " ");
+echo \$ver;
+};
+source /etc/profile && do_verify;
+EOF
+);
+
+echo $cmd_line;
+}
+
+function master_stats()
+{
+ MASTERVOL=$1;
+ local inet6=$2;
+ local d;
+ local i;
+ local disk_size;
+ local used_size;
+ local ver;
+ local m_status;
+
+ d=$(mktemp -d -t ${0##*/}.XXXXXX 2>/dev/null);
+ if [ "$inet6" = "inet6" ]; then
+ glusterfs -s localhost --xlator-option="*dht.lookup-unhashed=off" --xlator-option="transport.address-family=inet6" --volfile-id $MASTERVOL -l $master_log_file $d;
+ else
+ glusterfs -s localhost --xlator-option="*dht.lookup-unhashed=off" --volfile-id $MASTERVOL -l $master_log_file $d;
+ fi
+
+ i=$(get_inode_num $d);
+ if [[ "$i" -ne "1" ]]; then
+ echo 0:0;
+ exit 1;
+ fi;
+ cd $d;
+ disk_size=$(disk_usage $d | awk "{print \$2}");
+ used_size=$(disk_usage $d | awk "{print \$3}");
+ umount_lazy $d;
+ rmdir $d;
+ ver=$(gluster --version | head -1 | cut -f2 -d " ");
+ m_status=$(echo "$disk_size:$used_size:$ver");
+ echo $m_status
+}
+
+
+function slave_stats()
+{
+ SLAVEUSER=$1;
+ SLAVEHOST=$2;
+ SLAVEVOL=$3;
+ local inet6=$4;
+ local cmd_line;
+ local ver;
+ local status;
+
+ d=$(mktemp -d -t ${0##*/}.XXXXXX 2>/dev/null);
+ if [ "$inet6" = "inet6" ]; then
+ glusterfs --xlator-option="*dht.lookup-unhashed=off" --xlator-option="transport.address-family=inet6" --volfile-server $SLAVEHOST --volfile-id $SLAVEVOL -l $slave_log_file $d;
+ else
+ glusterfs --xlator-option="*dht.lookup-unhashed=off" --volfile-server $SLAVEHOST --volfile-id $SLAVEVOL -l $slave_log_file $d;
+ fi
+
+ i=$(get_inode_num $d);
+ if [[ "$i" -ne "1" ]]; then
+ echo 0:0;
+ exit 1;
+ fi;
+ cd $d;
+ disk_size=$(disk_usage $d | awk "{print \$2}");
+ used_size=$(disk_usage $d | awk "{print \$3}");
+ no_of_files=$(find $d -maxdepth 1 -path "$d/.trashcan" -prune -o -path "$d" -o -print0 -quit);
+ umount_lazy $d;
+ rmdir $d;
+
+ cmd_line=$(cmd_slave);
+ ver=`SSHM $SLAVEUSER@$SLAVEHOST bash -c "'$cmd_line'"`;
+ status=$disk_size:$used_size:$ver:$no_of_files;
+ echo $status
+}
+
+function ping_host ()
+{
+ ### Use bash internal socket support
+ {
+ exec 100<>/dev/tcp/$1/$2
+ if [ $? -ne '0' ]; then
+ return 1;
+ else
+ exec 100>&-
+ return 0;
+ fi
+ } 1>&2 2>/dev/null
+}
+
+function main()
+{
+ log_file=$6
+ > $log_file
+
+ inet6=$7
+ local cmd_line
+ local ver
+
+ # Use FORCE_BLOCKER flag in the error message to differentiate
+ # between the errors which the force command should bypass
+
+ # Test tcp connection to port 22, this is necessary since `ping`
+ # does not work on all environments where 'ssh' is allowed but
+ # ICMP is filterd
+
+ ping_host $3 ${SSH_PORT}
+
+ if [ $? -ne 0 ]; then
+ echo "FORCE_BLOCKER|$3 not reachable." > $log_file
+ exit 1;
+ fi;
+
+ if [[ -z "${GR_SSH_IDENTITY_KEY}" ]]; then
+ ssh -p ${SSH_PORT} -oNumberOfPasswordPrompts=0 -oStrictHostKeyChecking=no $2@$3 "echo Testing_Passwordless_SSH";
+ else
+ ssh -p ${SSH_PORT} -i ${GR_SSH_IDENTITY_KEY} -oNumberOfPasswordPrompts=0 -oStrictHostKeyChecking=no $2@$3 "echo Testing_Passwordless_SSH";
+ fi
+
+ if [ $? -ne 0 ]; then
+ echo "FORCE_BLOCKER|Passwordless ssh login has not been setup with $3 for user $2." > $log_file
+ exit 1;
+ fi;
+
+ cmd_line=$(cmd_slave);
+ if [[ -z "${GR_SSH_IDENTITY_KEY}" ]]; then
+ ver=$(ssh -p ${SSH_PORT} -oNumberOfPasswordPrompts=0 -oStrictHostKeyChecking=no $2@$3 bash -c "'$cmd_line'")
+ else
+ ver=$(ssh -p ${SSH_PORT} -i ${GR_SSH_IDENTITY_KEY} -oNumberOfPasswordPrompts=0 -oStrictHostKeyChecking=no $2@$3 bash -c "'$cmd_line'")
+ fi
+
+ if [ -z "$ver" ]; then
+ echo "FORCE_BLOCKER|gluster command not found on $3 for user $2." > $log_file
+ exit 1;
+ fi;
+
+ ERRORS=0;
+ master_data=$(master_stats $1 ${inet6});
+ slave_data=$(slave_stats $2 $3 $4 ${inet6});
+ master_disk_size=$(echo $master_data | cut -f1 -d':');
+ slave_disk_size=$(echo $slave_data | cut -f1 -d':');
+ master_used_size=$(echo $master_data | cut -f2 -d':');
+ slave_used_size=$(echo $slave_data | cut -f2 -d':');
+ master_version=$(echo $master_data | cut -f3 -d':');
+ slave_version=$(echo $slave_data | cut -f3 -d':');
+ slave_no_of_files=$(echo $slave_data | cut -f4 -d':');
+
+ if [[ "x$master_disk_size" = "x" || "x$master_version" = "x" || "$master_disk_size" -eq "0" ]]; then
+ echo "FORCE_BLOCKER|Unable to mount and fetch master volume details. Please check the log: $master_log_file" > $log_file;
+ exit 1;
+ fi;
+
+ if [[ "x$slave_disk_size" = "x" || "x$slave_version" = "x" || "$slave_disk_size" -eq "0" ]]; then
+ echo "FORCE_BLOCKER|Unable to mount and fetch slave volume details. Please check the log: $slave_log_file" > $log_file;
+ exit 1;
+ fi;
+
+ # The above checks are mandatory and force command should be blocked
+ # if they fail. The checks below can be bypassed if force option is
+ # provided hence no FORCE_BLOCKER flag.
+
+ if [ "$slave_disk_size" -lt "$master_disk_size" ]; then
+ echo "Total disk size of master is greater than disk size of slave." >> $log_file;
+ ERRORS=$(($ERRORS + 1));
+ fi
+
+ effective_master_used_size=$(( $master_used_size + $BUFFER_SIZE ))
+ slave_available_size=$(( $slave_disk_size - $slave_used_size ))
+ master_available_size=$(( $master_disk_size - $effective_master_used_size ));
+
+ if [ "$slave_available_size" -lt "$master_available_size" ]; then
+ echo "Total available size of master is greater than available size of slave" >> $log_file;
+ ERRORS=$(($ERRORS + 1));
+ fi
+
+ if [ ! -z $slave_no_of_files ]; then
+ echo "$3::$4 is not empty. Please delete existing files in $3::$4 and retry, or use force to continue without deleting the existing files." >> $log_file;
+ ERRORS=$(($ERRORS + 1));
+ fi;
+
+ if [[ $master_version != $slave_version ]]; then
+ echo "Gluster version mismatch between master and slave. Master version: $master_version Slave version: $slave_version" >> $log_file;
+ ERRORS=$(($ERRORS + 1));
+ fi;
+
+ exit $ERRORS;
+}
+
+
+main "$@";
diff --git a/geo-replication/src/peer_georep-sshkey.py.in b/geo-replication/src/peer_georep-sshkey.py.in
new file mode 100644
index 00000000000..58696e9a616
--- /dev/null
+++ b/geo-replication/src/peer_georep-sshkey.py.in
@@ -0,0 +1,116 @@
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+"""
+Usage:
+ gluster-georep-sshkey generate
+ or
+ gluster-georep-sshkey generate --no-prefix
+
+Generates two SSH keys(one for gsyncd access and other for tar) in all
+peer nodes and collects the public keys to the local node where it is
+initiated. Adds `command=` prefix to common_secret.pem.pub if `--no-prefix`
+argument is not passed.
+"""
+import os
+import glob
+
+from gluster.cliutils import (node_output_ok, execute, execute_in_peers,
+ Cmd, runcli)
+from prettytable import PrettyTable
+
+
+SECRET_PEM = "@GLUSTERD_WORKDIR@/geo-replication/secret.pem"
+TAR_SSH_PEM = "@GLUSTERD_WORKDIR@/geo-replication/tar_ssh.pem"
+GSYNCD_CMD = 'command="@GLUSTERFS_LIBEXECDIR@/gsyncd" '
+TAR_CMD = 'command="tar ${SSH_ORIGINAL_COMMAND#* }" '
+COMMON_SECRET_FILE = "@GLUSTERD_WORKDIR@/geo-replication/common_secret.pem.pub"
+
+
+class NodeGenCmd(Cmd):
+ name = "node-generate"
+
+ def args(self, parser):
+ parser.add_argument("no_prefix")
+
+ def run(self, args):
+ # Regenerate if secret.pem.pub not exists
+ if not os.path.exists(SECRET_PEM + ".pub"):
+ # Cleanup old files
+ for f in glob.glob(SECRET_PEM + "*"):
+ os.remove(f)
+
+ execute(["ssh-keygen", "-N", "", "-f", SECRET_PEM])
+
+ # Regenerate if ssh_tar.pem.pub not exists
+ if not os.path.exists(TAR_SSH_PEM + ".pub"):
+ # Cleanup old files
+ for f in glob.glob(TAR_SSH_PEM + "*"):
+ os.remove(f)
+
+ execute(["ssh-keygen", "-N", "", "-f", TAR_SSH_PEM])
+
+ # Add required prefixes if prefix is not "container"
+ prefix_secret_pem_pub = ""
+ prefix_tar_ssh_pem_pub = ""
+ if args.no_prefix != "no-prefix":
+ prefix_secret_pem_pub = GSYNCD_CMD
+ prefix_tar_ssh_pem_pub = TAR_CMD
+
+ data = {"default_pub": "", "tar_pub": ""}
+ with open(SECRET_PEM + ".pub") as f:
+ data["default_pub"] = prefix_secret_pem_pub + f.read().strip()
+
+ with open(TAR_SSH_PEM + ".pub") as f:
+ data["tar_pub"] = prefix_tar_ssh_pem_pub + f.read().strip()
+
+ node_output_ok(data)
+
+
+def color_status(value):
+ if value in ["UP", "OK"]:
+ return "green"
+ return "red"
+
+
+class GenCmd(Cmd):
+ name = "generate"
+
+ def args(self, parser):
+ parser.add_argument("--no-prefix", help="Do not use prefix in "
+ "generated pub keys", action="store_true")
+
+ def run(self, args):
+ prefix = "no-prefix" if args.no_prefix else "."
+ out = execute_in_peers("node-generate", [prefix])
+
+ common_secrets = []
+ table = PrettyTable(["NODE", "NODE STATUS", "KEYGEN STATUS"])
+ table.align["NODE STATUS"] = "r"
+ table.align["KEYGEN STATUS"] = "r"
+ for p in out:
+ if p.ok:
+ common_secrets.append(p.output["default_pub"])
+ common_secrets.append(p.output["tar_pub"])
+
+ table.add_row([p.hostname,
+ "UP" if p.node_up else "DOWN",
+ "OK" if p.ok else "NOT OK: {0}".format(
+ p.error)])
+
+ with open(COMMON_SECRET_FILE, "w") as f:
+ f.write("\n".join(common_secrets) + "\n")
+
+ print (table)
+
+
+if __name__ == "__main__":
+ runcli()
diff --git a/geo-replication/src/peer_gsec_create.in b/geo-replication/src/peer_gsec_create.in
new file mode 100755
index 00000000000..6d4a4847013
--- /dev/null
+++ b/geo-replication/src/peer_gsec_create.in
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libexecdir=@libexecdir@
+
+if [ ! -f "$GLUSTERD_WORKDIR"/geo-replication/secret.pem.pub ]; then
+ \rm -rf "$GLUSTERD_WORKDIR"/geo-replication/secret.pem*
+ ssh-keygen -N '' -f "$GLUSTERD_WORKDIR"/geo-replication/secret.pem > /dev/null
+fi
+
+if [ ! -f "$GLUSTERD_WORKDIR"/geo-replication/tar_ssh.pem.pub ]; then
+ \rm -rf "$GLUSTERD_WORKDIR"/geo-replication/tar_ssh.pem*
+ ssh-keygen -N '' -f "$GLUSTERD_WORKDIR"/geo-replication/tar_ssh.pem > /dev/null
+fi
+
+if [ "Xcontainer" = "X$1" ]; then
+ output1=`cat "$GLUSTERD_WORKDIR"/geo-replication/secret.pem.pub`
+ output2=`cat "$GLUSTERD_WORKDIR"/geo-replication/tar_ssh.pem.pub`
+else
+ output1=`echo command=\"${libexecdir}/glusterfs/gsyncd\" ""``cat "$GLUSTERD_WORKDIR"/geo-replication/secret.pem.pub`
+ output2=`echo command=\"tar \$\{SSH_ORIGINAL_COMMAND#* \}\" ""``cat "$GLUSTERD_WORKDIR"/geo-replication/tar_ssh.pem.pub`
+fi
+echo -e "$output1\n$output2"
diff --git a/geo-replication/src/peer_mountbroker.in b/geo-replication/src/peer_mountbroker.in
new file mode 100644
index 00000000000..8ecf38ded41
--- /dev/null
+++ b/geo-replication/src/peer_mountbroker.in
@@ -0,0 +1,211 @@
+#!/usr/bin/python3
+
+from __future__ import print_function
+
+import os
+from argparse import ArgumentParser, RawDescriptionHelpFormatter
+import json
+import sys
+
+PROG_DESCRIPTION = """
+GlusterFS Mountbroker user management
+"""
+
+args = None
+
+
+def ok(message=""):
+ if (not args and "-j" in sys.argv) or (args and args.json):
+ print(json.dumps({"ok": True, "message": message}))
+ else:
+ if message:
+ print(message)
+
+ sys.exit(0)
+
+
+def notok(message=""):
+ if (not args and "-j" in sys.argv) or (args and args.json):
+ print(json.dumps({"ok": False, "message": message}))
+ else:
+ print("error: %s" % message)
+
+ # Always return zero due to limitation while executing
+ # as `gluster system:: execute`
+ sys.exit(0)
+
+
+class NoStdErrParser(ArgumentParser):
+ """
+ with gluster system:: execute, stderr gives
+ "Unable to end. Error : Bad file descriptor" error,
+ so deriving new class, prints error message and
+ exits with zero.
+ """
+ def error(self, message):
+ notok(message)
+
+
+class MountbrokerUserMgmt(object):
+ def __init__(self, volfile):
+ self.volfile = volfile
+ self._options = {}
+ self.commented_lines = []
+ self._parse()
+
+ def _parse(self):
+ with open(self.volfile, "r") as f:
+ for line in f:
+ line = line.strip()
+ if line.startswith("option "):
+ key, value = line.split(" ")[1:]
+ self._options[key] = value
+ if line.startswith("#"):
+ self.commented_lines.append(line)
+
+ def _get_write_data(self):
+ op = "volume management\n"
+ op += " type mgmt/glusterd\n"
+ for k, v in self._options.items():
+ op += " option %s %s\n" % (k, v)
+ for line in self.commented_lines:
+ op += " %s\n" % line
+ op += "end-volume"
+ return op
+
+ def save(self):
+ with open(self.volfile + "_tmp", "w") as f:
+ f.write(self._get_write_data())
+ f.flush()
+ os.fsync(f.fileno())
+ os.rename(self.volfile + "_tmp", self.volfile)
+
+ def set_opt(self, key, value):
+ self._options[key] = value.strip()
+
+ def remove_opt(self, key):
+ if key in self._options:
+ del(self._options[key])
+
+ def add_user(self, user, volumes):
+ vols = set()
+ for k, v in self._options.items():
+ if k.startswith("mountbroker-geo-replication.") \
+ and user == k.split(".")[-1]:
+ vols.update(v.split(","))
+
+ vols.update(volumes)
+ self.set_opt("mountbroker-geo-replication.%s" % user,
+ ",".join(vols))
+
+ def remove_volume(self, user, volumes):
+ vols = set()
+ for k, v in self._options.items():
+ if k.startswith("mountbroker-geo-replication.") \
+ and user == k.split(".")[-1]:
+ vols.update(v.split(","))
+
+ for v1 in volumes:
+ vols.discard(v1)
+
+ if vols:
+ self.set_opt("mountbroker-geo-replication.%s" % user,
+ ",".join(vols))
+ else:
+ self.remove_opt("mountbroker-geo-replication.%s" % user)
+
+ def remove_user(self, user):
+ self.remove_opt("mountbroker-geo-replication.%s" % user)
+
+ def info(self):
+ data = {"users": []}
+
+ for k, v in self._options.items():
+ if k.startswith("mountbroker-geo-replication."):
+ data["users"].append(
+ {"name": k.split(".")[-1], "volumes": v.split(",")}
+ )
+ else:
+ data[k] = v
+
+ return data
+
+
+def format_info(data):
+ op = "%s %s\n" % ("Option".ljust(50), "Value".ljust(50))
+ op += ("-" * 101) + "\n"
+ for key, value in data.items():
+ if key != "users":
+ op += "%s %s\n" % (key.ljust(50), value)
+
+ op += "\nUsers: %s\n" % ("None" if not data["users"] else "")
+ for user in data["users"]:
+ op += "%s: %s\n" % (user["name"], ", ".join(user["volumes"]))
+ op += "\n\n"
+ return op
+
+
+def _get_args():
+ parser = NoStdErrParser(formatter_class=RawDescriptionHelpFormatter,
+ description=PROG_DESCRIPTION)
+
+ parser.add_argument('-j', dest="json", help="JSON output",
+ action="store_true")
+ subparsers = parser.add_subparsers(title='subcommands', dest='cmd')
+ parser_useradd = subparsers.add_parser('user')
+ parser_userdel = subparsers.add_parser('userdel')
+ parser_volumedel = subparsers.add_parser('volumedel')
+ subparsers.add_parser('info')
+ parser_opt = subparsers.add_parser('opt')
+ parser_optdel = subparsers.add_parser('optdel')
+
+ parser_useradd.add_argument('username', help="Username", type=str)
+ parser_useradd.add_argument('volumes', type=str, default='',
+ help="Volumes list. ',' separated")
+
+ parser_volumedel.add_argument('username', help="Username", type=str)
+ parser_volumedel.add_argument('volumes', type=str, default='',
+ help="Volumes list. ',' separated")
+
+ parser_userdel.add_argument('username', help="Username", type=str)
+
+ parser_opt.add_argument('opt_name', help="Name", type=str)
+ parser_opt.add_argument('opt_value', help="Value", type=str)
+
+ parser_optdel.add_argument('opt_name', help="Name", type=str)
+
+ return parser.parse_args()
+
+
+def main():
+ global args
+ args = _get_args()
+
+ m = MountbrokerUserMgmt("@GLUSTERD_VOLFILE@")
+
+ if args.cmd == "opt":
+ m.set_opt(args.opt_name, args.opt_value)
+ elif args.cmd == "optdel":
+ m.remove_opt(args.opt_name)
+ elif args.cmd == "userdel":
+ m.remove_user(args.username)
+ elif args.cmd == "user":
+ volumes = [v.strip() for v in args.volumes.split(",")
+ if v.strip() != ""]
+ m.add_user(args.username, volumes)
+ elif args.cmd == "volumedel":
+ volumes = [v.strip() for v in args.volumes.split(",")
+ if v.strip() != ""]
+ m.remove_volume(args.username, volumes)
+ elif args.cmd == "info":
+ info = m.info()
+ if not args.json:
+ info = format_info(info)
+ ok(info)
+
+ if args.cmd != "info":
+ m.save()
+ ok()
+
+if __name__ == "__main__":
+ main()
diff --git a/geo-replication/src/peer_mountbroker.py.in b/geo-replication/src/peer_mountbroker.py.in
new file mode 100644
index 00000000000..40b90ffc560
--- /dev/null
+++ b/geo-replication/src/peer_mountbroker.py.in
@@ -0,0 +1,401 @@
+#!/usr/bin/python3
+
+from __future__ import print_function
+
+import os
+from errno import EEXIST, ENOENT
+
+from gluster.cliutils import (execute, Cmd, node_output_ok,
+ node_output_notok, execute_in_peers,
+ runcli, oknotok)
+from prettytable import PrettyTable
+
+LOG_DIR = "@localstatedir@/log/glusterfs/geo-replication-slaves"
+CLI_LOG = "@localstatedir@/log/glusterfs/cli.log"
+GEOREP_DIR = "@GLUSTERD_WORKDIR@/geo-replication"
+GLUSTERD_VOLFILE = "@GLUSTERD_VOLFILE@"
+
+
+class MountbrokerUserMgmt(object):
+ def __init__(self, volfile):
+ self.volfile = volfile
+ self._options = {}
+ self.commented_lines = []
+ self.user_volumes = {}
+ self._parse()
+
+ def _parse(self):
+ """ Example glusterd.vol
+ volume management
+ type mgmt/glusterd
+ option working-directory /var/lib/glusterd
+ option transport-type socket,rdma
+ option transport.socket.keepalive-time 10
+ option transport.socket.keepalive-interval 2
+ option transport.socket.read-fail-log off
+ option rpc-auth-allow-insecure on
+ option ping-timeout 0
+ option event-threads 1
+ # option base-port 49152
+ option mountbroker-root /var/mountbroker-root
+ option mountbroker-geo-replication.user1 vol1,vol2,vol3
+ option geo-replication-log-group geogroup
+ option rpc-auth-allow-insecure on
+ end-volume
+ """
+ with open(self.volfile, "r") as f:
+ for line in f:
+ line = line.strip()
+ if line.startswith("option "):
+ key, value = line.split()[1:]
+ self._options[key] = value
+ if line.startswith("#"):
+ self.commented_lines.append(line)
+
+ for k, v in self._options.items():
+ if k.startswith("mountbroker-geo-replication."):
+ user = k.split(".")[-1]
+ self.user_volumes[user] = set(v.split(","))
+
+ def get_group(self):
+ return self._options.get("geo-replication-log-group", None)
+
+ def _get_write_data(self):
+ op = "volume management\n"
+ op += " type mgmt/glusterd\n"
+ for k, v in self._options.items():
+ if k.startswith("mountbroker-geo-replication."):
+ # Users will be added seperately
+ continue
+
+ op += " option %s %s\n" % (k, v)
+
+ for k, v in self.user_volumes.items():
+ if v:
+ op += (" option mountbroker-geo-replication."
+ "%s %s\n" % (k, ",".join(v)))
+
+ for line in self.commented_lines:
+ op += " %s\n" % line
+
+ op += "end-volume"
+ return op
+
+ def save(self):
+ with open(self.volfile + "_tmp", "w") as f:
+ f.write(self._get_write_data())
+ f.flush()
+ os.fsync(f.fileno())
+ os.rename(self.volfile + "_tmp", self.volfile)
+
+ def set_mount_root_and_group(self, mnt_root, group):
+ self._options["mountbroker-root"] = mnt_root
+ self._options["geo-replication-log-group"] = group
+
+ def add(self, volume, user):
+ user_volumes = self.user_volumes.get(user, None)
+
+ if user_volumes is not None and volume in user_volumes:
+ # User and Volume already exists
+ return
+
+ if user_volumes is None:
+ # User not exists
+ self.user_volumes[user] = set()
+
+ self.user_volumes[user].add(volume)
+
+ def remove(self, volume=None, user=None):
+ if user is not None:
+ if volume is None:
+ self.user_volumes[user] = set()
+ else:
+ try:
+ self.user_volumes.get(user, set()).remove(volume)
+ except KeyError:
+ pass
+ else:
+ if volume is None:
+ return
+
+ for k, v in self.user_volumes.items():
+ try:
+ self.user_volumes[k].remove(volume)
+ except KeyError:
+ pass
+
+ def info(self):
+ # Convert Volumes set into Volumes list
+ users = {}
+ for k, v in self.user_volumes.items():
+ users[k] = list(v)
+
+ data = {
+ "mountbroker-root": self._options.get("mountbroker-root", "None"),
+ "geo-replication-log-group": self._options.get(
+ "geo-replication-log-group", ""),
+ "users": users
+ }
+
+ return data
+
+
+class NodeSetup(Cmd):
+ # Test if group exists using `getent group <grp>`
+ # and then group add using `groupadd <grp>`
+ # chgrp -R <grp> /var/log/glusterfs/geo-replication-slaves
+ # chgrp -R <grp> /var/lib/glusterd/geo-replication
+ # chmod -R 770 /var/log/glusterfs/geo-replication-slaves
+ # chmod 770 /var/lib/glusterd/geo-replication
+ # mkdir -p <mnt_root>
+ # chmod 0711 <mnt_root>
+ # If selinux,
+ # semanage fcontext -a -e /home /var/mountbroker-root
+ # restorecon -Rv /var/mountbroker-root
+ name = "node-setup"
+
+ def args(self, parser):
+ parser.add_argument("mount_root")
+ parser.add_argument("group")
+
+ def run(self, args):
+ m = MountbrokerUserMgmt(GLUSTERD_VOLFILE)
+
+ try:
+ os.makedirs(args.mount_root)
+ except OSError as e:
+ if e.errno == EEXIST:
+ pass
+ else:
+ node_output_notok("Unable to Create {0}".format(
+ args.mount_root))
+
+ execute(["chmod", "0711", args.mount_root])
+ try:
+ execute(["semanage", "fcontext", "-a", "-e",
+ "/home", args.mount_root])
+ except OSError as e:
+ if e.errno == ENOENT:
+ pass
+ else:
+ node_output_notok(
+ "Unable to run semanage: {0}".format(e))
+
+ try:
+ execute(["restorecon", "-Rv", args.mount_root])
+ except OSError as e:
+ if e.errno == ENOENT:
+ pass
+ else:
+ node_output_notok(
+ "Unable to run restorecon: {0}".format(e))
+
+ rc, out, err = execute(["getent", "group", args.group])
+ if rc != 0:
+ node_output_notok("User Group not exists")
+
+ execute(["chgrp", "-R", args.group, GEOREP_DIR])
+ execute(["chgrp", "-R", args.group, LOG_DIR])
+ execute(["chgrp", args.group, CLI_LOG])
+ execute(["chmod", "770", GEOREP_DIR])
+ execute(["find", LOG_DIR, "-type", "d", "-exec", "chmod", "770", "{}",
+ "+"])
+ execute(["find", LOG_DIR, "-type", "f", "-exec", "chmod", "660", "{}",
+ "+"])
+ execute(["chmod", "660", CLI_LOG])
+
+ m.set_mount_root_and_group(args.mount_root, args.group)
+ m.save()
+
+ node_output_ok()
+
+
+def color_status(value):
+ if value.lower() in ("up", "ok", "yes"):
+ return "green"
+ else:
+ return "red"
+
+
+class CliSetup(Cmd):
+ # gluster-mountbroker setup <MOUNT_ROOT> <GROUP>
+ name = "setup"
+
+ def args(self, parser):
+ parser.add_argument("mount_root",
+ help="Path to the mountbroker-root directory.")
+ parser.add_argument("group",
+ help="Group to be used for setup.")
+
+ def run(self, args):
+ out = execute_in_peers("node-setup", [args.mount_root,
+ args.group])
+ table = PrettyTable(["NODE", "NODE STATUS", "SETUP STATUS"])
+ table.align["NODE STATUS"] = "r"
+ table.align["SETUP STATUS"] = "r"
+ for p in out:
+ table.add_row([p.hostname,
+ "UP" if p.node_up else "DOWN",
+ "OK" if p.ok else "NOT OK: {0}".format(
+ p.error)])
+
+ print(table)
+
+
+class NodeStatus(Cmd):
+ # Check if Group exists
+ # Check if user exists
+ # Check directory permission /var/log/glusterfs/geo-replication-slaves
+ # and /var/lib/glusterd/geo-replication
+ # Check mount root and its permissions
+ # Check glusterd.vol file for user, group, dir existance
+ name = "node-status"
+
+ def run(self, args):
+ m = MountbrokerUserMgmt(GLUSTERD_VOLFILE)
+ data = m.info()
+ data["group_exists"] = False
+ data["path_exists"] = False
+
+ rc, out, err = execute(["getent", "group",
+ data["geo-replication-log-group"]])
+
+ if rc == 0:
+ data["group_exists"] = True
+
+ if os.path.exists(data["mountbroker-root"]):
+ data["path_exists"] = True
+
+ node_output_ok(data)
+
+
+class CliStatus(Cmd):
+ # gluster-mountbroker status
+ name = "status"
+
+ def run(self, args):
+ out = execute_in_peers("node-status")
+ table = PrettyTable(["NODE", "NODE STATUS", "MOUNT ROOT",
+ "GROUP", "USERS"])
+ table.align["NODE STATUS"] = "r"
+
+ for p in out:
+ node_data = p.output
+ if node_data == "" or node_data == "N/A":
+ node_data = {}
+
+ users_row_data = ""
+ for k, v in node_data.get("users", {}).items():
+ users_row_data += "{0}({1}) ".format(k, ", ".join(v))
+
+ if not users_row_data:
+ users_row_data = "None"
+
+ mount_root = node_data.get("mountbroker-root", "None")
+ if mount_root != "None":
+ mount_root += "({0})".format(oknotok(
+ node_data.get("path_exists", False)))
+
+ grp = node_data.get("geo-replication-log-group", "None")
+ if grp != "None":
+ grp += "({0})".format(oknotok(
+ node_data.get("group_exists", False)))
+
+ table.add_row([p.hostname,
+ "UP" if p.node_up else "DOWN",
+ mount_root,
+ grp,
+ users_row_data])
+
+ print(table)
+
+
+class NodeAdd(Cmd):
+ # useradd -m -g <grp> <usr>
+ # useradd to glusterd.vol
+ name = "node-add"
+
+ def args(self, parser):
+ parser.add_argument("volume")
+ parser.add_argument("user")
+
+ def run(self, args):
+ m = MountbrokerUserMgmt(GLUSTERD_VOLFILE)
+ grp = m.get_group()
+ if grp is None:
+ node_output_notok("Group is not available")
+
+ m.add(args.volume, args.user)
+ m.save()
+ node_output_ok()
+
+
+class CliAdd(Cmd):
+ # gluster-mountbroker add <VOLUME> <USER>
+ name = "add"
+
+ def args(self, parser):
+ parser.add_argument("volume",
+ help="Volume to be added.")
+ parser.add_argument("user",
+ help="User for which volume is to be added.")
+
+ def run(self, args):
+ out = execute_in_peers("node-add", [args.volume,
+ args.user])
+ table = PrettyTable(["NODE", "NODE STATUS", "ADD STATUS"])
+ table.align["NODE STATUS"] = "r"
+ table.align["ADD STATUS"] = "r"
+
+ for p in out:
+ table.add_row([p.hostname,
+ "UP" if p.node_up else "DOWN",
+ "OK" if p.ok else "NOT OK: {0}".format(
+ p.error)])
+
+ print(table)
+
+
+class NodeRemove(Cmd):
+ # userremove from glusterd.vol file
+ name = "node-remove"
+
+ def args(self, parser):
+ parser.add_argument("volume")
+ parser.add_argument("user")
+
+ def run(self, args):
+ m = MountbrokerUserMgmt(GLUSTERD_VOLFILE)
+ volume = None if args.volume == "." else args.volume
+ user = None if args.user == "." else args.user
+ m.remove(volume=volume, user=user)
+ m.save()
+ node_output_ok()
+
+
+class CliRemove(Cmd):
+ # gluster-mountbroker remove --volume <VOLUME> --user <USER>
+ name = "remove"
+
+ def args(self, parser):
+ parser.add_argument("--volume", default=".", help="Volume to be removed.")
+ parser.add_argument("--user", default=".",
+ help="User for which volume has to be removed.")
+
+ def run(self, args):
+ out = execute_in_peers("node-remove", [args.volume,
+ args.user])
+ table = PrettyTable(["NODE", "NODE STATUS", "REMOVE STATUS"])
+ table.align["NODE STATUS"] = "r"
+ table.align["REMOVE STATUS"] = "r"
+
+ for p in out:
+ table.add_row([p.hostname,
+ "UP" if p.node_up else "DOWN",
+ "OK" if p.ok else "NOT OK: {0}".format(
+ p.error)])
+
+ print(table)
+
+if __name__ == "__main__":
+ runcli()
diff --git a/geo-replication/src/procdiggy.c b/geo-replication/src/procdiggy.c
new file mode 100644
index 00000000000..8068ef79a42
--- /dev/null
+++ b/geo-replication/src/procdiggy.c
@@ -0,0 +1,136 @@
+/*
+ Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+#include <ctype.h>
+#include <sys/param.h> /* for PATH_MAX */
+
+#include <glusterfs/common-utils.h>
+#include <glusterfs/syscall.h>
+#include "procdiggy.h"
+
+pid_t
+pidinfo(pid_t pid, char **name)
+{
+ char buf[NAME_MAX * 2] = {
+ 0,
+ };
+ FILE *f = NULL;
+ char path[PATH_MAX] = {
+ 0,
+ };
+ char *p = NULL;
+ int ret = 0;
+ pid_t lpid = -1;
+
+ if (name)
+ *name = NULL;
+
+ snprintf(path, sizeof path, PROC "/%d/status", pid);
+
+ f = fopen(path, "r");
+ if (!f)
+ return -1;
+
+ for (;;) {
+ size_t len;
+ memset(buf, 0, sizeof(buf));
+ if (fgets(buf, sizeof(buf), f) == NULL || (len = strlen(buf)) == 0 ||
+ buf[len - 1] != '\n') {
+ lpid = -1;
+ goto out;
+ }
+ buf[len - 1] = '\0';
+
+ if (name && !*name) {
+ p = strtail(buf, "Name:");
+ if (p) {
+ while (isspace(*++p))
+ ;
+ *name = gf_strdup(p);
+ if (!*name) {
+ lpid = -2;
+ goto out;
+ }
+ continue;
+ }
+ }
+
+ p = strtail(buf, "PPid:");
+ if (p)
+ break;
+ }
+
+ while (isspace(*++p))
+ ;
+ ret = gf_string2int(p, &lpid);
+ if (ret == -1)
+ lpid = -1;
+
+out:
+ fclose(f);
+ if (lpid == -1 && name && *name)
+ GF_FREE(*name);
+ if (lpid == -2)
+ fprintf(stderr, "out of memory\n");
+ return lpid;
+}
+
+int
+prociter(int (*proch)(pid_t pid, pid_t ppid, char *tmpname, void *data),
+ void *data)
+{
+ char *name = NULL;
+ DIR *d = NULL;
+ struct dirent *de = NULL;
+ struct dirent scratch[2] = {
+ {
+ 0,
+ },
+ };
+ pid_t pid = -1;
+ pid_t ppid = -1;
+ int ret = 0;
+
+ d = sys_opendir(PROC);
+ if (!d)
+ return -1;
+
+ for (;;) {
+ errno = 0;
+ de = sys_readdir(d, scratch);
+ if (!de || errno != 0)
+ break;
+
+ if (gf_string2int(de->d_name, &pid) != -1 && pid >= 0) {
+ ppid = pidinfo(pid, &name);
+ switch (ppid) {
+ case -1:
+ continue;
+ case -2:
+ break;
+ }
+ ret = proch(pid, ppid, name, data);
+ GF_FREE(name);
+ if (ret)
+ break;
+ }
+ }
+ sys_closedir(d);
+ if (!de && errno) {
+ fprintf(stderr, "failed to traverse " PROC " (%s)\n", strerror(errno));
+ ret = -1;
+ }
+
+ return ret;
+}
diff --git a/geo-replication/src/procdiggy.h b/geo-replication/src/procdiggy.h
new file mode 100644
index 00000000000..e17ccd31c89
--- /dev/null
+++ b/geo-replication/src/procdiggy.h
@@ -0,0 +1,21 @@
+/*
+ Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifdef __NetBSD__
+#include <sys/syslimits.h>
+#endif /* __NetBSD__ */
+
+#define PROC "/proc"
+
+pid_t
+pidinfo(pid_t pid, char **name);
+
+int
+prociter(int (*proch)(pid_t pid, pid_t ppid, char *name, void *data),
+ void *data);
diff --git a/geo-replication/src/set_geo_rep_pem_keys.sh b/geo-replication/src/set_geo_rep_pem_keys.sh
new file mode 100755
index 00000000000..8a43fa39d1f
--- /dev/null
+++ b/geo-replication/src/set_geo_rep_pem_keys.sh
@@ -0,0 +1,58 @@
+#!/bin/bash
+
+# Script to copy the pem keys from the user's home directory
+# to $GLUSTERD_WORKDIR/geo-replication and then copy
+# the keys to other nodes in the cluster and add them to the
+# respective authorized keys. The script takes as argument the
+# user name and assumes that the user will be present in all
+# the nodes in the cluster. Not to be used for root user
+
+function main()
+{
+ user=$1
+ master_vol=$2
+ slave_vol=$3
+ GLUSTERD_WORKDIR=$(gluster system:: getwd)
+
+ if [ "$user" == "" ]; then
+ echo "Please enter the user's name"
+ exit 1;
+ fi
+
+ if [ "$master_vol" == "" ]; then
+ echo "Invalid master volume name"
+ exit 1;
+ fi
+
+ if [ "$slave_vol" == "" ]; then
+ echo "Invalid slave volume name"
+ exit 1;
+ fi
+
+ COMMON_SECRET_PEM_PUB=${master_vol}_${slave_vol}_common_secret.pem.pub
+
+ if [ "$user" == "root" ]; then
+ echo "This script is not needed for root"
+ exit 1;
+ fi
+
+ home_dir=`getent passwd $user | cut -d ':' -f 6`;
+
+ if [ "$home_dir" == "" ]; then
+ echo "No user $user found"
+ exit 1;
+ fi
+
+ if [ -f $home_dir/${COMMON_SECRET_PEM_PUB} ]; then
+ cp $home_dir/${COMMON_SECRET_PEM_PUB} ${GLUSTERD_WORKDIR}/geo-replication/
+ gluster system:: copy file /geo-replication/${COMMON_SECRET_PEM_PUB}
+ gluster system:: execute add_secret_pub $user geo-replication/${master_vol}_${slave_vol}_common_secret.pem.pub
+ gluster vol set ${slave_vol} features.read-only on
+ else
+ echo "$home_dir/common_secret.pem.pub not present. Please run geo-replication command on master with push-pem option to generate the file"
+ exit 1;
+ fi
+ exit 0;
+}
+
+main "$@";
diff --git a/geo-replication/syncdaemon/Makefile.am b/geo-replication/syncdaemon/Makefile.am
new file mode 100644
index 00000000000..d70e3368faf
--- /dev/null
+++ b/geo-replication/syncdaemon/Makefile.am
@@ -0,0 +1,8 @@
+syncdaemondir = $(GLUSTERFS_LIBEXECDIR)/python/syncdaemon
+
+syncdaemon_PYTHON = rconf.py gsyncd.py __init__.py master.py README.md repce.py \
+ resource.py syncdutils.py monitor.py libcxattr.py gsyncdconfig.py \
+ libgfchangelog.py gsyncdstatus.py conf.py logutils.py \
+ subcmds.py argsupgrade.py py2py3.py
+
+CLEANFILES =
diff --git a/xlators/features/marker/utils/syncdaemon/README.md b/geo-replication/syncdaemon/README.md
index d45006932d1..5ab785ae669 100644
--- a/xlators/features/marker/utils/syncdaemon/README.md
+++ b/geo-replication/syncdaemon/README.md
@@ -11,15 +11,14 @@ Requirements are categorized according to this.
* Python >= 2.5, or 2.4 with Ctypes (see below) (both)
* OpenSSH >= 4.0 (master) / SSH2 compliant sshd (eg. openssh) (slave)
* rsync (both)
-* glusterfs with marker support (master); glusterfs (optional on slave)
-* FUSE; for supported versions consult glusterfs
+* glusterfs: with marker and changelog support (master & slave);
+* FUSE: glusterfs fuse module with auxiliary gfid based access support
INSTALLATION
------------
-As of now, the supported way of operation is running from the source directory.
+As of now, the supported way of operation is running from the source directory or using the RPMs given.
-If you use Python 2.4.x, you need to install the [Ctypes module](http://python.net/crew/theller/ctypes/).
CONFIGURATION
-------------
@@ -39,41 +38,18 @@ The config file format matches the following syntax:
<option2>: <value2>
# comment
-By default (unless specified by the option `-c`), gsyncd looks for config file at _conf/gsyncd.conf_
+By default (unless specified by the option `-c`), gsyncd looks for config file at _conf/gsyncd_template.conf_
in the source tree.
USAGE
-----
-gsyncd is a utilitly for continous mirroring, ie. it mirrors master to slave incrementally.
-Assume we have a gluster volume _pop_ at localhost. We try to set up the following mirrors
-for it with gysncd:
+gsyncd is a utilitly for continuous mirroring, ie. it mirrors master to slave incrementally.
+Assume we have a gluster volume _pop_ at localhost. We try to set up the mirroring for volume
+_pop_ using gsyncd for gluster volume _moz_ on remote machine/cluster @ example.com. The
+respective gsyncd invocations are (demoing some syntax sugaring):
-1. _/data/mirror_
-2. local gluster volume _yow_
-3. _/data/far_mirror_ at example.com
-4. gluster volume _moz_ at example.com
-
-The respective gsyncd invocations are (demoing some syntax sugaring):
-
-1.
-
- gsyncd.py gluster://localhost:pop file:///data/mirror
-
- or short form
-
- gsyncd.py :pop /data/mirror
-
-2. `gsyncd :pop :yow`
-3.
-
- gsyncd.py :pop ssh://example.com:/data/far_mirror
-
- or short form
-
- gsyncd.py :pop example.com:/data/far_mirror
-
-4. `gsyncd.py :pop example.com::moz`
+`gsyncd.py :pop example.com::moz`
gsyncd has to be available on both sides; it's location on the remote side has to be specified
via the "--remote-gsyncd" option (or "remote-gsyncd" config file parameter). (This option can also be
diff --git a/xlators/features/marker/utils/syncdaemon/__codecheck.py b/geo-replication/syncdaemon/__codecheck.py
index e3386afba8b..9437147f7d9 100644
--- a/xlators/features/marker/utils/syncdaemon/__codecheck.py
+++ b/geo-replication/syncdaemon/__codecheck.py
@@ -1,10 +1,21 @@
+#
+# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+from __future__ import print_function
import os
import os.path
import sys
import tempfile
import shutil
-ipd = tempfile.mkdtemp(prefix = 'codecheck-aux')
+ipd = tempfile.mkdtemp(prefix='codecheck-aux')
try:
# add a fake ipaddr module, we don't want to
@@ -25,7 +36,7 @@ class IPNetwork(list):
if f[-3:] != '.py' or f[0] == '_':
continue
m = f[:-3]
- sys.stdout.write('importing %s ...' % m)
+ sys.stdout.write('importing %s ...' % m)
__import__(m)
print(' OK.')
@@ -33,8 +44,9 @@ class IPNetwork(list):
sys.argv = sys.argv[:1] + a
gsyncd = sys.modules['gsyncd']
- for a in [['--help'], ['--version'], ['--canonicalize-escape-url', '/foo']]:
- print('>>> invoking program with args: %s' % ' '.join(a))
+ for a in [['--help'], ['--version'],
+ ['--canonicalize-escape-url', '/foo']]:
+ print(('>>> invoking program with args: %s' % ' '.join(a)))
pid = os.fork()
if not pid:
sys_argv_set(a)
diff --git a/geo-replication/syncdaemon/__init__.py b/geo-replication/syncdaemon/__init__.py
new file mode 100644
index 00000000000..b4648b69645
--- /dev/null
+++ b/geo-replication/syncdaemon/__init__.py
@@ -0,0 +1,9 @@
+#
+# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
diff --git a/geo-replication/syncdaemon/argsupgrade.py b/geo-replication/syncdaemon/argsupgrade.py
new file mode 100644
index 00000000000..7af40633ef8
--- /dev/null
+++ b/geo-replication/syncdaemon/argsupgrade.py
@@ -0,0 +1,359 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+# Converts old style args into new style args
+
+from __future__ import print_function
+import sys
+from argparse import ArgumentParser
+import socket
+import os
+
+from syncdutils import GsyncdError
+from conf import GLUSTERD_WORKDIR
+
+
+def gethostbyname(hnam):
+ """gethostbyname wrapper"""
+ try:
+ return socket.gethostbyname(hnam)
+ except socket.gaierror:
+ ex = sys.exc_info()[1]
+ raise GsyncdError("failed to resolve %s: %s" %
+ (hnam, ex.strerror))
+
+
+def slave_url(urldata):
+ urldata = urldata.replace("ssh://", "")
+ host, vol = urldata.split("::")
+ vol = vol.split(":")[0]
+ return "%s::%s" % (host, vol)
+
+
+def init_gsyncd_template_conf():
+ path = GLUSTERD_WORKDIR + "/geo-replication/gsyncd_template.conf"
+ dname = os.path.dirname(path)
+ if not os.path.exists(dname):
+ try:
+ os.mkdir(dname)
+ except OSError:
+ pass
+
+ if not os.path.exists(path):
+ fd = os.open(path, os.O_CREAT | os.O_RDWR)
+ os.close(fd)
+
+
+def init_gsyncd_session_conf(master, slave):
+ slave = slave_url(slave)
+ master = master.strip(":")
+ slavehost, slavevol = slave.split("::")
+ slavehost = slavehost.split("@")[-1]
+
+ # Session Config File
+ path = "%s/geo-replication/%s_%s_%s/gsyncd.conf" % (
+ GLUSTERD_WORKDIR, master, slavehost, slavevol)
+
+ if os.path.exists(os.path.dirname(path)) and not os.path.exists(path):
+ fd = os.open(path, os.O_CREAT | os.O_RDWR)
+ os.close(fd)
+
+
+def init_gsyncd_conf(path):
+ dname = os.path.dirname(path)
+ if not os.path.exists(dname):
+ try:
+ os.mkdir(dname)
+ except OSError:
+ pass
+
+ if os.path.exists(dname) and not os.path.exists(path):
+ fd = os.open(path, os.O_CREAT | os.O_RDWR)
+ os.close(fd)
+
+
+def upgrade():
+ # Create dummy template conf(empty), hack to avoid glusterd
+ # fail when it does stat to check the existence.
+ init_gsyncd_template_conf()
+
+ inet6 = False
+ if "--inet6" in sys.argv:
+ inet6 = True
+
+ if "--monitor" in sys.argv:
+ # python gsyncd.py --path=/bricks/b1
+ # --monitor -c gsyncd.conf
+ # --iprefix=/var :gv1
+ # --glusterd-uuid=f26ac7a8-eb1b-4ea7-959c-80b27d3e43d0
+ # f241::gv2
+ p = ArgumentParser()
+ p.add_argument("master")
+ p.add_argument("slave")
+ p.add_argument("--glusterd-uuid")
+ p.add_argument("-c")
+ p.add_argument("--iprefix")
+ p.add_argument("--path", action="append")
+ pargs = p.parse_known_args(sys.argv[1:])[0]
+
+ # Overwrite the sys.argv after rearrange
+ init_gsyncd_session_conf(pargs.master, pargs.slave)
+ sys.argv = [
+ sys.argv[0],
+ "monitor",
+ pargs.master.strip(":"),
+ slave_url(pargs.slave),
+ "--local-node-id",
+ pargs.glusterd_uuid
+ ]
+ elif "--status-get" in sys.argv:
+ # -c gsyncd.conf --iprefix=/var :gv1 f241::gv2
+ # --status-get --path /bricks/b1
+ p = ArgumentParser()
+ p.add_argument("master")
+ p.add_argument("slave")
+ p.add_argument("-c")
+ p.add_argument("--path")
+ p.add_argument("--iprefix")
+ pargs = p.parse_known_args(sys.argv[1:])[0]
+
+ init_gsyncd_session_conf(pargs.master, pargs.slave)
+
+ sys.argv = [
+ sys.argv[0],
+ "status",
+ pargs.master.strip(":"),
+ slave_url(pargs.slave),
+ "--local-path",
+ pargs.path
+ ]
+ elif "--canonicalize-url" in sys.argv:
+ # This can accept multiple URLs and converts each URL to the
+ # format ssh://USER@IP:gluster://127.0.0.1:VOLUME
+ # This format not used in gsyncd, but added for glusterd compatibility
+ p = ArgumentParser()
+ p.add_argument("--canonicalize-url", nargs="+")
+ pargs = p.parse_known_args(sys.argv[1:])[0]
+
+ for url in pargs.canonicalize_url:
+ host, vol = url.split("::")
+ host = host.replace("ssh://", "")
+ remote_addr = host
+ if "@" not in remote_addr:
+ remote_addr = "root@" + remote_addr
+
+ user, hname = remote_addr.split("@")
+
+ if not inet6:
+ hname = gethostbyname(hname)
+
+ print(("ssh://%s@%s:gluster://127.0.0.1:%s" % (
+ user, hname, vol)))
+
+ sys.exit(0)
+ elif "--normalize-url" in sys.argv:
+ # Adds schema prefix as ssh://
+ # This format not used in gsyncd, but added for glusterd compatibility
+ p = ArgumentParser()
+ p.add_argument("--normalize-url")
+ pargs = p.parse_known_args(sys.argv[1:])[0]
+ print(("ssh://%s" % slave_url(pargs.normalize_url)))
+ sys.exit(0)
+ elif "--config-get-all" in sys.argv:
+ # -c gsyncd.conf --iprefix=/var :gv1 f241::gv2 --config-get-all
+ p = ArgumentParser()
+ p.add_argument("master")
+ p.add_argument("slave")
+ p.add_argument("-c")
+ p.add_argument("--iprefix")
+ pargs = p.parse_known_args(sys.argv[1:])[0]
+
+ init_gsyncd_session_conf(pargs.master, pargs.slave)
+
+ sys.argv = [
+ sys.argv[0],
+ "config-get",
+ pargs.master.strip(":"),
+ slave_url(pargs.slave),
+ "--show-defaults",
+ "--use-underscore"
+ ]
+ elif "--verify" in sys.argv and "spawning" in sys.argv:
+ # Just checks that able to spawn gsyncd or not
+ sys.exit(0)
+ elif "--slavevoluuid-get" in sys.argv:
+ # --slavevoluuid-get f241::gv2
+ p = ArgumentParser()
+ p.add_argument("--slavevoluuid-get")
+ p.add_argument("-c")
+ p.add_argument("--iprefix")
+ pargs = p.parse_known_args(sys.argv[1:])[0]
+ host, vol = pargs.slavevoluuid_get.split("::")
+
+ # Modified sys.argv
+ sys.argv = [
+ sys.argv[0],
+ "voluuidget",
+ host,
+ vol
+ ]
+ elif "--config-set-rx" in sys.argv:
+ # Not required since default conf is not generated
+ # and custom conf generated only when required
+ # -c gsyncd.conf --config-set-rx remote-gsyncd
+ # /usr/local/libexec/glusterfs/gsyncd . .
+ # Touch the gsyncd.conf file and create session
+ # directory if required
+ p = ArgumentParser()
+ p.add_argument("-c", dest="config_file")
+ pargs = p.parse_known_args(sys.argv[1:])[0]
+
+ # If not template conf then it is trying to create
+ # session config, create a empty file instead
+ if pargs.config_file.endswith("gsyncd.conf"):
+ init_gsyncd_conf(pargs.config_file)
+ sys.exit(0)
+ elif "--create" in sys.argv:
+ # To update monitor status file
+ # --create Created -c gsyncd.conf
+ # --iprefix=/var :gv1 f241::gv2
+ p = ArgumentParser()
+ p.add_argument("--create")
+ p.add_argument("master")
+ p.add_argument("slave")
+ p.add_argument("-c")
+ p.add_argument("--iprefix")
+ pargs = p.parse_known_args(sys.argv[1:])[0]
+
+ init_gsyncd_session_conf(pargs.master, pargs.slave)
+
+ # Modified sys.argv
+ sys.argv = [
+ sys.argv[0],
+ "monitor-status",
+ pargs.master.strip(":"),
+ slave_url(pargs.slave),
+ pargs.create
+ ]
+ elif "--config-get" in sys.argv:
+ # -c gsyncd.conf --iprefix=/var :gv1 f241::gv2 --config-get pid-file
+ p = ArgumentParser()
+ p.add_argument("--config-get")
+ p.add_argument("master")
+ p.add_argument("slave")
+ p.add_argument("-c")
+ p.add_argument("--iprefix")
+ pargs = p.parse_known_args(sys.argv[1:])[0]
+
+ init_gsyncd_session_conf(pargs.master, pargs.slave)
+
+ # Modified sys.argv
+ sys.argv = [
+ sys.argv[0],
+ "config-get",
+ pargs.master.strip(":"),
+ slave_url(pargs.slave),
+ "--only-value",
+ "--show-defaults",
+ "--name",
+ pargs.config_get.replace("_", "-")
+ ]
+ elif "--config-set" in sys.argv:
+ # ignore session-owner
+ if "session-owner" in sys.argv:
+ sys.exit(0)
+
+ # --path=/bricks/b1 -c gsyncd.conf :gv1 f241::gv2
+ # --config-set log_level DEBUG
+ p = ArgumentParser()
+ p.add_argument("master")
+ p.add_argument("slave")
+ p.add_argument("--config-set", action='store_true')
+ p.add_argument("name")
+ p.add_argument("--value")
+ p.add_argument("-c")
+ pargs = p.parse_known_args(sys.argv[1:])[0]
+
+ init_gsyncd_session_conf(pargs.master, pargs.slave)
+
+ # Modified sys.argv
+ sys.argv = [
+ sys.argv[0],
+ "config-set",
+ pargs.master.strip(":"),
+ slave_url(pargs.slave),
+ "--name=%s" % pargs.name,
+ "--value=%s" % pargs.value
+ ]
+ elif "--config-check" in sys.argv:
+ # --config-check georep_session_working_dir
+ p = ArgumentParser()
+ p.add_argument("--config-check")
+ p.add_argument("-c")
+ pargs = p.parse_known_args(sys.argv[1:])[0]
+
+ # Modified sys.argv
+ sys.argv = [
+ sys.argv[0],
+ "config-check",
+ pargs.config_check.replace("_", "-")
+ ]
+ elif "--config-del" in sys.argv:
+ # -c gsyncd.conf --iprefix=/var :gv1 f241::gv2 --config-del log_level
+ p = ArgumentParser()
+ p.add_argument("--config-del")
+ p.add_argument("master")
+ p.add_argument("slave")
+ p.add_argument("-c")
+ p.add_argument("--iprefix")
+ pargs = p.parse_known_args(sys.argv[1:])[0]
+
+ init_gsyncd_session_conf(pargs.master, pargs.slave)
+
+ # Modified sys.argv
+ sys.argv = [
+ sys.argv[0],
+ "config-reset",
+ pargs.master.strip(":"),
+ slave_url(pargs.slave),
+ pargs.config_del.replace("_", "-")
+ ]
+ elif "--delete" in sys.argv:
+ # --delete -c gsyncd.conf --iprefix=/var
+ # --path-list=--path=/bricks/b1 :gv1 f241::gv2
+ p = ArgumentParser()
+ p.add_argument("--reset-sync-time", action="store_true")
+ p.add_argument("--path-list")
+ p.add_argument("master")
+ p.add_argument("slave")
+ p.add_argument("--iprefix")
+ p.add_argument("-c")
+ pargs = p.parse_known_args(sys.argv[1:])[0]
+
+ init_gsyncd_session_conf(pargs.master, pargs.slave)
+
+ paths = pargs.path_list.split("--path=")
+ paths = ["--path=%s" % x.strip() for x in paths if x.strip() != ""]
+
+ # Modified sys.argv
+ sys.argv = [
+ sys.argv[0],
+ "delete",
+ pargs.master.strip(":"),
+ slave_url(pargs.slave)
+ ]
+ sys.argv += paths
+
+ if pargs.reset_sync_time:
+ sys.argv.append("--reset-sync-time")
+
+ if inet6:
+ # Add `--inet6` as first argument
+ sys.argv = [sys.argv[0], "--inet6"] + sys.argv[1:]
diff --git a/geo-replication/syncdaemon/conf.py.in b/geo-replication/syncdaemon/conf.py.in
new file mode 100644
index 00000000000..2042fa9cdfb
--- /dev/null
+++ b/geo-replication/syncdaemon/conf.py.in
@@ -0,0 +1,17 @@
+#
+# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+GLUSTERFS_LIBEXECDIR = '@GLUSTERFS_LIBEXECDIR@'
+GLUSTERD_WORKDIR = "@GLUSTERD_WORKDIR@"
+
+LOCALSTATEDIR = "@localstatedir@"
+UUID_FILE = "@GLUSTERD_WORKDIR@/glusterd.info"
+GLUSTERFS_CONFDIR = "@SYSCONF_DIR@/glusterfs"
+GCONF_VERSION = 4.0
diff --git a/geo-replication/syncdaemon/gsyncd.py b/geo-replication/syncdaemon/gsyncd.py
new file mode 100644
index 00000000000..257ed72c6ae
--- /dev/null
+++ b/geo-replication/syncdaemon/gsyncd.py
@@ -0,0 +1,325 @@
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+from argparse import ArgumentParser
+import time
+import os
+from errno import EEXIST
+import sys
+import logging
+
+from logutils import setup_logging
+import gsyncdconfig as gconf
+from rconf import rconf
+import subcmds
+from conf import GLUSTERD_WORKDIR, GLUSTERFS_CONFDIR, GCONF_VERSION
+from syncdutils import (set_term_handler, finalize, lf,
+ log_raise_exception, FreeObject, escape)
+import argsupgrade
+
+
+GSYNCD_VERSION = "gsyncd.py %s.0" % GCONF_VERSION
+
+
+def main():
+ rconf.starttime = time.time()
+
+ # If old Glusterd sends commands in old format, below function
+ # converts the sys.argv to new format. This conversion is added
+ # temporarily for backward compatibility. This can be removed
+ # once integrated with Glusterd2
+ # This modifies sys.argv globally, so rest of the code works as usual
+ argsupgrade.upgrade()
+
+ # Default argparse version handler prints to stderr, which is fixed in
+ # 3.x series but not in 2.x, using custom parser to fix this issue
+ if "--version" in sys.argv:
+ print(GSYNCD_VERSION)
+ sys.exit(0)
+
+ parser = ArgumentParser()
+ parser.add_argument("--inet6", action="store_true")
+ sp = parser.add_subparsers(dest="subcmd")
+
+ # Monitor Status File update
+ p = sp.add_parser("monitor-status")
+ p.add_argument("master", help="Master Volume Name")
+ p.add_argument("slave", help="Slave details user@host::vol format")
+ p.add_argument("status", help="Update Monitor Status")
+ p.add_argument("-c", "--config-file", help="Config File")
+ p.add_argument("--debug", action="store_true")
+
+ # Monitor
+ p = sp.add_parser("monitor")
+ p.add_argument("master", help="Master Volume Name")
+ p.add_argument("slave", help="Slave details user@host::vol format")
+ p.add_argument("-c", "--config-file", help="Config File")
+ p.add_argument("--pause-on-start",
+ action="store_true",
+ help="Start with Paused state")
+ p.add_argument("--local-node-id", help="Local Node ID")
+ p.add_argument("--debug", action="store_true")
+ p.add_argument("--use-gconf-volinfo", action="store_true")
+
+ # Worker
+ p = sp.add_parser("worker")
+ p.add_argument("master", help="Master Volume Name")
+ p.add_argument("slave", help="Slave details user@host::vol format")
+ p.add_argument("--local-path", help="Local Brick Path")
+ p.add_argument("--feedback-fd", type=int,
+ help="feedback fd between monitor and worker")
+ p.add_argument("--local-node", help="Local master node")
+ p.add_argument("--local-node-id", help="Local Node ID")
+ p.add_argument("--subvol-num", type=int, help="Subvolume number")
+ p.add_argument("--is-hottier", action="store_true",
+ help="Is this brick part of hot tier")
+ p.add_argument("--resource-remote",
+ help="Remote node to connect to Slave Volume")
+ p.add_argument("--resource-remote-id",
+ help="Remote node ID to connect to Slave Volume")
+ p.add_argument("--slave-id", help="Slave Volume ID")
+ p.add_argument("-c", "--config-file", help="Config File")
+ p.add_argument("--debug", action="store_true")
+
+ # Slave
+ p = sp.add_parser("slave")
+ p.add_argument("master", help="Master Volume Name")
+ p.add_argument("slave", help="Slave details user@host::vol format")
+ p.add_argument("--session-owner")
+ p.add_argument("--master-brick",
+ help="Master brick which is connected to the Slave")
+ p.add_argument("--master-node",
+ help="Master node which is connected to the Slave")
+ p.add_argument("--master-node-id",
+ help="Master node ID which is connected to the Slave")
+ p.add_argument("--local-node", help="Local Slave node")
+ p.add_argument("--local-node-id", help="Local Slave ID")
+ p.add_argument("-c", "--config-file", help="Config File")
+ p.add_argument("--debug", action="store_true")
+
+ # All configurations which are configured via "slave-" options
+ # DO NOT add default values for these configurations, default values
+ # will be picked from template config file
+ p.add_argument("--slave-timeout", type=int,
+ help="Timeout to end gsyncd at Slave side")
+ p.add_argument("--use-rsync-xattrs", action="store_true")
+ p.add_argument("--slave-log-level", help="Slave Gsyncd Log level")
+ p.add_argument("--slave-gluster-log-level",
+ help="Slave Gluster mount Log level")
+ p.add_argument("--slave-gluster-command-dir",
+ help="Directory where Gluster binaries exist on slave")
+ p.add_argument("--slave-access-mount", action="store_true",
+ help="Do not lazy umount the slave volume")
+ p.add_argument("--master-dist-count", type=int,
+ help="Master Distribution count")
+
+ # Status
+ p = sp.add_parser("status")
+ p.add_argument("master", help="Master Volume Name")
+ p.add_argument("slave", help="Slave")
+ p.add_argument("-c", "--config-file", help="Config File")
+ p.add_argument("--local-path", help="Local Brick Path")
+ p.add_argument("--debug", action="store_true")
+ p.add_argument("--json", action="store_true")
+
+ # Config-check
+ p = sp.add_parser("config-check")
+ p.add_argument("name", help="Config Name")
+ p.add_argument("--value", help="Config Value")
+ p.add_argument("--debug", action="store_true")
+
+ # Config-get
+ p = sp.add_parser("config-get")
+ p.add_argument("master", help="Master Volume Name")
+ p.add_argument("slave", help="Slave")
+ p.add_argument("--name", help="Config Name")
+ p.add_argument("-c", "--config-file", help="Config File")
+ p.add_argument("--debug", action="store_true")
+ p.add_argument("--show-defaults", action="store_true")
+ p.add_argument("--only-value", action="store_true")
+ p.add_argument("--use-underscore", action="store_true")
+ p.add_argument("--json", action="store_true")
+
+ # Config-set
+ p = sp.add_parser("config-set")
+ p.add_argument("master", help="Master Volume Name")
+ p.add_argument("slave", help="Slave")
+ p.add_argument("-n", "--name", help="Config Name")
+ p.add_argument("-v", "--value", help="Config Value")
+ p.add_argument("-c", "--config-file", help="Config File")
+ p.add_argument("--debug", action="store_true")
+
+ # Config-reset
+ p = sp.add_parser("config-reset")
+ p.add_argument("master", help="Master Volume Name")
+ p.add_argument("slave", help="Slave")
+ p.add_argument("name", help="Config Name")
+ p.add_argument("-c", "--config-file", help="Config File")
+ p.add_argument("--debug", action="store_true")
+
+ # voluuidget
+ p = sp.add_parser("voluuidget")
+ p.add_argument("host", help="Hostname")
+ p.add_argument("volname", help="Volume Name")
+ p.add_argument("--debug", action="store_true")
+
+ # Delete
+ p = sp.add_parser("delete")
+ p.add_argument("master", help="Master Volume Name")
+ p.add_argument("slave", help="Slave")
+ p.add_argument("-c", "--config-file", help="Config File")
+ p.add_argument('--path', dest='paths', action="append")
+ p.add_argument("--reset-sync-time", action="store_true",
+ help="Reset Sync Time")
+ p.add_argument("--debug", action="store_true")
+
+ # Parse arguments
+ args = parser.parse_args()
+
+ # Extra template values, All arguments are already part of template
+ # variables, use this for adding extra variables
+ extra_tmpl_args = {}
+
+ # Add First/Primary Slave host, user and volume
+ if getattr(args, "slave", None) is not None:
+ hostdata, slavevol = args.slave.split("::")
+ hostdata = hostdata.split("@")
+ slavehost = hostdata[-1]
+ slaveuser = "root"
+ if len(hostdata) == 2:
+ slaveuser = hostdata[0]
+ extra_tmpl_args["primary_slave_host"] = slavehost
+ extra_tmpl_args["slaveuser"] = slaveuser
+ extra_tmpl_args["slavevol"] = slavevol
+
+ # Add Bricks encoded path
+ if getattr(args, "local_path", None) is not None:
+ extra_tmpl_args["local_id"] = escape(args.local_path)
+
+ # Add Master Bricks encoded path(For Slave)
+ if getattr(args, "master_brick", None) is not None:
+ extra_tmpl_args["master_brick_id"] = escape(args.master_brick)
+
+ # Load configurations
+ config_file = getattr(args, "config_file", None)
+
+ # Subcmd accepts config file argument but not passed
+ # Set default path for config file in that case
+ # If an subcmd accepts config file then it also accepts
+ # master and Slave arguments.
+ if config_file is None and hasattr(args, "config_file") \
+ and args.subcmd != "slave":
+ config_file = "%s/geo-replication/%s_%s_%s/gsyncd.conf" % (
+ GLUSTERD_WORKDIR,
+ args.master,
+ extra_tmpl_args["primary_slave_host"],
+ extra_tmpl_args["slavevol"])
+
+ # If Config file path not exists, log error and continue using default conf
+ config_file_error_msg = None
+ if config_file is not None and not os.path.exists(config_file):
+ # Logging not yet initialized, create the error message to
+ # log later and reset the config_file to None
+ config_file_error_msg = lf(
+ "Session config file not exists, using the default config",
+ path=config_file)
+ config_file = None
+
+ rconf.config_file = config_file
+
+ # Override gconf values from argument values only if it is slave gsyncd
+ override_from_args = False
+ if args.subcmd == "slave":
+ override_from_args = True
+
+ if config_file is not None and \
+ args.subcmd in ["monitor", "config-get", "config-set", "config-reset"]:
+ ret = gconf.is_config_file_old(config_file, args.master, extra_tmpl_args["slavevol"])
+ if ret is not None:
+ gconf.config_upgrade(config_file, ret)
+
+ # Load Config file
+ gconf.load(GLUSTERFS_CONFDIR + "/gsyncd.conf",
+ config_file,
+ vars(args),
+ extra_tmpl_args,
+ override_from_args)
+
+ # Default label to print in log file
+ label = args.subcmd
+ if args.subcmd in ("worker"):
+ # If Worker, then add brick path also to label
+ label = "%s %s" % (args.subcmd, args.local_path)
+ elif args.subcmd == "slave":
+ # If Slave add Master node and Brick details
+ label = "%s %s%s" % (args.subcmd, args.master_node, args.master_brick)
+
+ # Setup Logger
+ # Default log file
+ log_file = gconf.get("cli-log-file")
+ log_level = gconf.get("cli-log-level")
+ if getattr(args, "master", None) is not None and \
+ getattr(args, "slave", None) is not None:
+ log_file = gconf.get("log-file")
+ log_level = gconf.get("log-level")
+
+ # Use different log file location for Slave log file
+ if args.subcmd == "slave":
+ log_file = gconf.get("slave-log-file")
+ log_level = gconf.get("slave-log-level")
+
+ if args.debug:
+ log_file = "-"
+ log_level = "DEBUG"
+
+ # Create Logdir if not exists
+ try:
+ if log_file != "-":
+ os.mkdir(os.path.dirname(log_file))
+ except OSError as e:
+ if e.errno != EEXIST:
+ raise
+
+ setup_logging(
+ log_file=log_file,
+ level=log_level,
+ label=label
+ )
+
+ if config_file_error_msg is not None:
+ logging.warn(config_file_error_msg)
+
+ # Log message for loaded config file
+ if config_file is not None:
+ logging.debug(lf("Using session config file", path=config_file))
+
+ set_term_handler()
+ excont = FreeObject(exval=0)
+
+ # Gets the function name based on the input argument. For example
+ # if subcommand passed as argument is monitor then it looks for
+ # function with name "subcmd_monitor" in subcmds file
+ func = getattr(subcmds, "subcmd_" + args.subcmd.replace("-", "_"), None)
+
+ try:
+ try:
+ if func is not None:
+ rconf.args = args
+ func(args)
+ except:
+ log_raise_exception(excont)
+ finally:
+ finalize(exval=excont.exval)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/geo-replication/syncdaemon/gsyncdconfig.py b/geo-replication/syncdaemon/gsyncdconfig.py
new file mode 100644
index 00000000000..8848071997a
--- /dev/null
+++ b/geo-replication/syncdaemon/gsyncdconfig.py
@@ -0,0 +1,485 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+try:
+ from ConfigParser import RawConfigParser, NoSectionError
+except ImportError:
+ from configparser import RawConfigParser, NoSectionError
+import os
+import shutil
+from string import Template
+from datetime import datetime
+from threading import Lock
+
+
+# Global object which can be used in other modules
+# once load_config is called
+_gconf = {}
+
+
+class GconfNotConfigurable(Exception):
+ pass
+
+
+class GconfInvalidValue(Exception):
+ pass
+
+
+class Gconf(object):
+ def __init__(self, default_conf_file, custom_conf_file=None,
+ args={}, extra_tmpl_args={}, override_from_args=False):
+ self.lock = Lock()
+ self.default_conf_file = default_conf_file
+ self.custom_conf_file = custom_conf_file
+ self.tmp_conf_file = None
+ self.gconf = {}
+ self.gconfdata = {}
+ self.gconf_typecast = {}
+ self.template_conf = []
+ self.non_configurable_configs = []
+ self.prev_mtime = 0
+ if custom_conf_file is not None:
+ self.tmp_conf_file = custom_conf_file + ".tmp"
+
+ self.session_conf_items = []
+ self.args = args
+ self.extra_tmpl_args = extra_tmpl_args
+ self.override_from_args = override_from_args
+ # Store default values only if overwritten, Only for JSON/CLI output
+ self.default_values = {}
+ self._load()
+
+ def _tmpl_substitute(self):
+ tmpl_values = {}
+ for k, v in self.gconf.items():
+ tmpl_values[k.replace("-", "_")] = v
+
+ # override the config file values with the one user passed
+ for k, v in self.args.items():
+ # override the existing value only if set by user
+ if v is not None:
+ tmpl_values[k] = v
+
+ for k, v in self.extra_tmpl_args.items():
+ tmpl_values[k] = v
+
+ for k, v in self.gconf.items():
+ if k in self.template_conf and \
+ (isinstance(v, str) or isinstance(v, unicode)):
+ self.gconf[k] = Template(v).safe_substitute(tmpl_values)
+
+ def _do_typecast(self):
+ for k, v in self.gconf.items():
+ cast_func = globals().get(
+ "to_" + self.gconf_typecast.get(k, "string"), None)
+ if cast_func is not None:
+ self.gconf[k] = cast_func(v)
+ if self.default_values.get(k, None) is not None:
+ self.default_values[k] = cast_func(v)
+
+ def reset(self, name):
+ # If custom conf file is not set then it is only read only configs
+ if self.custom_conf_file is None:
+ raise GconfNotConfigurable()
+
+ # If a config can not be modified
+ if name != "all" and not self._is_configurable(name):
+ raise GconfNotConfigurable()
+
+ cnf = RawConfigParser()
+ with open(self.custom_conf_file) as f:
+ cnf.readfp(f)
+
+ # Nothing to Reset, Not configured
+ if name != "all":
+ if not cnf.has_option("vars", name):
+ return True
+
+ # Remove option from custom conf file
+ cnf.remove_option("vars", name)
+ else:
+ # Remove and add empty section, do not disturb if config file
+ # already has any other section
+ try:
+ cnf.remove_section("vars")
+ except NoSectionError:
+ pass
+
+ cnf.add_section("vars")
+
+ with open(self.tmp_conf_file, "w") as fw:
+ cnf.write(fw)
+
+ os.rename(self.tmp_conf_file, self.custom_conf_file)
+
+ self.reload()
+
+ return True
+
+ def set(self, name, value):
+ if self.custom_conf_file is None:
+ raise GconfNotConfigurable()
+
+ if not self._is_configurable(name):
+ raise GconfNotConfigurable()
+
+ if not self._is_valid_value(name, value):
+ raise GconfInvalidValue()
+
+ curr_val = self.gconf.get(name, None)
+ if curr_val == value:
+ return True
+
+ cnf = RawConfigParser()
+ with open(self.custom_conf_file) as f:
+ cnf.readfp(f)
+
+ if not cnf.has_section("vars"):
+ cnf.add_section("vars")
+
+ cnf.set("vars", name, value)
+ with open(self.tmp_conf_file, "w") as fw:
+ cnf.write(fw)
+
+ os.rename(self.tmp_conf_file, self.custom_conf_file)
+
+ self.reload()
+
+ return True
+
+ def check(self, name, value=None, with_conffile=True):
+ if with_conffile and self.custom_conf_file is None:
+ raise GconfNotConfigurable()
+
+ if not self._is_configurable(name):
+ raise GconfNotConfigurable()
+
+ if value is not None and not self._is_valid_value(name, value):
+ raise GconfInvalidValue()
+
+
+ def _load_with_lock(self):
+ with self.lock:
+ self._load()
+
+ def _load(self):
+ self.gconf = {}
+ self.template_conf = []
+ self.gconf_typecast = {}
+ self.non_configurable_configs = []
+ self.session_conf_items = []
+ self.default_values = {}
+
+ conf = RawConfigParser()
+ # Default Template config file
+ with open(self.default_conf_file) as f:
+ conf.readfp(f)
+
+ # Custom Config file
+ if self.custom_conf_file is not None:
+ with open(self.custom_conf_file) as f:
+ conf.readfp(f)
+
+ # Get version from default conf file
+ self.version = conf.get("__meta__", "version")
+
+ # Populate default values
+ for sect in conf.sections():
+ if sect in ["__meta__", "vars"]:
+ continue
+
+ # Collect list of available options with help details
+ self.gconfdata[sect] = {}
+ for k, v in conf.items(sect):
+ self.gconfdata[sect][k] = v.strip()
+
+ # Collect the Type cast information
+ if conf.has_option(sect, "type"):
+ self.gconf_typecast[sect] = conf.get(sect, "type")
+
+ # Prepare list of configurable conf
+ if conf.has_option(sect, "configurable"):
+ if conf.get(sect, "configurable").lower() == "false":
+ self.non_configurable_configs.append(sect)
+
+ # if it is a template conf value which needs to be substituted
+ if conf.has_option(sect, "template"):
+ if conf.get(sect, "template").lower().strip() == "true":
+ self.template_conf.append(sect)
+
+ # Set default values
+ if conf.has_option(sect, "value"):
+ self.gconf[sect] = conf.get(sect, "value").strip()
+
+ # Load the custom conf elements and overwrite
+ if conf.has_section("vars"):
+ for k, v in conf.items("vars"):
+ self.session_conf_items.append(k)
+ self.default_values[k] = self.gconf.get(k, "")
+ self.gconf[k] = v.strip()
+
+ # Overwrite the Slave configurations which are sent as
+ # arguments to gsyncd slave
+ if self.override_from_args:
+ for k, v in self.args.items():
+ k = k.replace("_", "-")
+ if k.startswith("slave-") and k in self.gconf:
+ self.gconf[k] = v
+
+ self._tmpl_substitute()
+ self._do_typecast()
+
+ def reload(self, with_lock=True):
+ if self._is_config_changed():
+ if with_lock:
+ self._load_with_lock()
+ else:
+ self._load()
+
+ def get(self, name, default_value=None, with_lock=True):
+ if with_lock:
+ with self.lock:
+ return self.gconf.get(name, default_value)
+ else:
+ return self.gconf.get(name, default_value)
+
+ def getall(self, show_defaults=False, show_non_configurable=False):
+ cnf = {}
+ if not show_defaults:
+ for k in self.session_conf_items:
+ if k not in self.non_configurable_configs:
+ dv = self.default_values.get(k, "")
+ cnf[k] = {
+ "value": self.get(k),
+ "default": dv,
+ "configurable": True,
+ "modified": False if dv == "" else True
+ }
+ return cnf
+
+ # Show all configs including defaults
+ for k, v in self.gconf.items():
+ configurable = False if k in self.non_configurable_configs \
+ else True
+ dv = self.default_values.get(k, "")
+ modified = False if dv == "" else True
+ if show_non_configurable:
+ cnf[k] = {
+ "value": v,
+ "default": dv,
+ "configurable": configurable,
+ "modified": modified
+ }
+ else:
+ if k not in self.non_configurable_configs:
+ cnf[k] = {
+ "value": v,
+ "default": dv,
+ "configurable": configurable,
+ "modified": modified
+ }
+
+ return cnf
+
+ def getr(self, name, default_value=None):
+ with self.lock:
+ self.reload(with_lock=False)
+ return self.get(name, default_value, with_lock=False)
+
+ def get_help(self, name=None):
+ pass
+
+ def _is_configurable(self, name):
+ item = self.gconfdata.get(name, None)
+ if item is None:
+ return False
+
+ return item.get("configurable", True)
+
+ def _is_valid_value(self, name, value):
+ item = self.gconfdata.get(name, None)
+ if item is None:
+ return False
+
+ # If validation func not defined
+ if item.get("validation", None) is None:
+ return True
+
+ # minmax validation
+ if item["validation"] == "minmax":
+ return validate_minmax(value, item["min"], item["max"])
+
+ if item["validation"] == "choice":
+ return validate_choice(value, item["allowed_values"])
+
+ if item["validation"] == "bool":
+ return validate_bool(value)
+
+ if item["validation"] == "execpath":
+ return validate_execpath(value)
+
+ if item["validation"] == "unixtime":
+ return validate_unixtime(value)
+
+ if item["validation"] == "int":
+ return validate_int(value)
+
+ return False
+
+ def _is_config_changed(self):
+ if self.custom_conf_file is not None and \
+ os.path.exists(self.custom_conf_file):
+ st = os.lstat(self.custom_conf_file)
+ if st.st_mtime > self.prev_mtime:
+ self.prev_mtime = st.st_mtime
+ return True
+
+ return False
+
+def is_config_file_old(config_file, mastervol, slavevol):
+ cnf = RawConfigParser()
+ cnf.read(config_file)
+ session_section = "peers %s %s" % (mastervol, slavevol)
+ try:
+ return dict(cnf.items(session_section))
+ except NoSectionError:
+ return None
+
+def config_upgrade(config_file, ret):
+ config_file_backup = os.path.join(os.path.dirname(config_file), "gsyncd.conf.bkp")
+
+ #copy old config file in a backup file
+ shutil.copyfile(config_file, config_file_backup)
+
+ #write a new config file
+ config = RawConfigParser()
+ config.add_section('vars')
+
+ for key, value in ret.items():
+ #handle option name changes
+ if key == "use_tarssh":
+ new_key = "sync-method"
+ if value == "true":
+ new_value = "tarssh"
+ else:
+ new_value = "rsync"
+ config.set('vars', new_key, new_value)
+ elif key == "timeout":
+ new_key = "slave-timeout"
+ config.set('vars', new_key, value)
+ #for changes like: ignore_deletes to ignore-deletes
+ else:
+ new_key = key.replace("_", "-")
+ config.set('vars', new_key, value)
+
+ with open(config_file, 'w') as configfile:
+ config.write(configfile)
+
+
+def validate_int(value):
+ try:
+ _ = int(value)
+ return True
+ except ValueError:
+ return False
+
+
+def validate_unixtime(value):
+ try:
+ y = datetime.fromtimestamp(int(value)).strftime("%Y")
+ if y == "1970":
+ return False
+
+ return True
+ except ValueError:
+ return False
+
+
+def validate_minmax(value, minval, maxval):
+ try:
+ value = int(value)
+ minval = int(minval)
+ maxval = int(maxval)
+ return value >= minval and value <= maxval
+ except ValueError:
+ return False
+
+
+def validate_choice(value, allowed_values):
+ allowed_values = allowed_values.split(",")
+ allowed_values = [v.strip() for v in allowed_values]
+
+ return value in allowed_values
+
+
+def validate_bool(value):
+ return value in ["true", "false"]
+
+
+def validate_execpath(value):
+ return os.path.isfile(value) and os.access(value, os.X_OK)
+
+
+def validate_filepath(value):
+ return os.path.isfile(value)
+
+
+def validate_path(value):
+ return os.path.exists(value)
+
+
+def to_int(value):
+ return int(value)
+
+
+def to_float(value):
+ return float(value)
+
+
+def to_bool(value):
+ if isinstance(value, bool):
+ return value
+ return True if value in ["true", "True"] else False
+
+
+def get(name, default_value=None):
+ return _gconf.get(name, default_value)
+
+
+def getall(show_defaults=False, show_non_configurable=False):
+ return _gconf.getall(show_defaults=show_defaults,
+ show_non_configurable=show_non_configurable)
+
+
+def getr(name, default_value=None):
+ return _gconf.getr(name, default_value)
+
+
+def load(default_conf, custom_conf=None, args={}, extra_tmpl_args={},
+ override_from_args=False):
+ global _gconf
+ _gconf = Gconf(default_conf, custom_conf, args, extra_tmpl_args,
+ override_from_args)
+
+
+def setconfig(name, value):
+ global _gconf
+ _gconf.set(name, value)
+
+
+def resetconfig(name):
+ global _gconf
+ _gconf.reset(name)
+
+
+def check(name, value=None, with_conffile=True):
+ global _gconf
+ _gconf.check(name, value=value, with_conffile=with_conffile)
diff --git a/geo-replication/syncdaemon/gsyncdstatus.py b/geo-replication/syncdaemon/gsyncdstatus.py
new file mode 100644
index 00000000000..1a655ff8887
--- /dev/null
+++ b/geo-replication/syncdaemon/gsyncdstatus.py
@@ -0,0 +1,419 @@
+#!/usr/bin/python3
+#
+# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+from __future__ import print_function
+import fcntl
+import os
+import tempfile
+try:
+ import urllib.parse as urllib
+except ImportError:
+ import urllib
+import json
+import time
+from datetime import datetime
+from errno import EACCES, EAGAIN, ENOENT
+import logging
+
+from syncdutils import (EVENT_GEOREP_ACTIVE, EVENT_GEOREP_PASSIVE, gf_event,
+ EVENT_GEOREP_CHECKPOINT_COMPLETED, lf)
+
+DEFAULT_STATUS = "N/A"
+MONITOR_STATUS = ("Created", "Started", "Paused", "Stopped")
+STATUS_VALUES = (DEFAULT_STATUS,
+ "Initializing...",
+ "Active",
+ "Passive",
+ "Faulty")
+
+CRAWL_STATUS_VALUES = (DEFAULT_STATUS,
+ "Hybrid Crawl",
+ "History Crawl",
+ "Changelog Crawl")
+
+
+def human_time(ts):
+ try:
+ return datetime.fromtimestamp(float(ts)).strftime("%Y-%m-%d %H:%M:%S")
+ except ValueError:
+ return DEFAULT_STATUS
+
+
+def human_time_utc(ts):
+ try:
+ return datetime.utcfromtimestamp(
+ float(ts)).strftime("%Y-%m-%d %H:%M:%S")
+ except ValueError:
+ return DEFAULT_STATUS
+
+
+def get_default_values():
+ return {
+ "slave_node": DEFAULT_STATUS,
+ "worker_status": DEFAULT_STATUS,
+ "last_synced": 0,
+ "last_synced_entry": 0,
+ "crawl_status": DEFAULT_STATUS,
+ "entry": 0,
+ "data": 0,
+ "meta": 0,
+ "failures": 0,
+ "checkpoint_completed": DEFAULT_STATUS,
+ "checkpoint_time": 0,
+ "checkpoint_completion_time": 0}
+
+
+class LockedOpen(object):
+
+ def __init__(self, filename, *args, **kwargs):
+ self.filename = filename
+ self.open_args = args
+ self.open_kwargs = kwargs
+ self.fileobj = None
+
+ def __enter__(self):
+ """
+ If two processes compete to update a file, The first process
+ gets the lock and the second process is blocked in the fcntl.flock()
+ call. When first process replaces the file and releases the lock,
+ the already open file descriptor in the second process now points
+ to a "ghost" file(not reachable by any path name) with old contents.
+ To avoid that conflict, check the fd already opened is same or
+ not. Open new one if not same
+ """
+ f = open(self.filename, *self.open_args, **self.open_kwargs)
+ while True:
+ fcntl.flock(f, fcntl.LOCK_EX)
+ fnew = open(self.filename, *self.open_args, **self.open_kwargs)
+ if os.path.sameopenfile(f.fileno(), fnew.fileno()):
+ fnew.close()
+ break
+ else:
+ f.close()
+ f = fnew
+ self.fileobj = f
+ return f
+
+ def __exit__(self, _exc_type, _exc_value, _traceback):
+ fcntl.flock(self.fileobj, fcntl.LOCK_UN)
+ self.fileobj.close()
+
+
+def set_monitor_status(status_file, status):
+ fd = os.open(status_file, os.O_CREAT | os.O_RDWR)
+ os.close(fd)
+ with LockedOpen(status_file, 'r+'):
+ with tempfile.NamedTemporaryFile('w', dir=os.path.dirname(status_file),
+ delete=False) as tf:
+ tf.write(status)
+ tempname = tf.name
+
+ os.rename(tempname, status_file)
+ dirfd = os.open(os.path.dirname(os.path.abspath(status_file)),
+ os.O_DIRECTORY)
+ os.fsync(dirfd)
+ os.close(dirfd)
+
+
+class GeorepStatus(object):
+ def __init__(self, monitor_status_file, master_node, brick, master_node_id,
+ master, slave, monitor_pid_file=None):
+ self.master = master
+ slv_data = slave.split("::")
+ self.slave_host = slv_data[0]
+ self.slave_volume = slv_data[1].split(":")[0] # Remove Slave UUID
+ self.work_dir = os.path.dirname(monitor_status_file)
+ self.monitor_status_file = monitor_status_file
+ self.filename = os.path.join(self.work_dir,
+ "brick_%s.status"
+ % urllib.quote_plus(brick))
+
+ fd = os.open(self.filename, os.O_CREAT | os.O_RDWR)
+ os.close(fd)
+ fd = os.open(self.monitor_status_file, os.O_CREAT | os.O_RDWR)
+ os.close(fd)
+ self.master_node = master_node
+ self.master_node_id = master_node_id
+ self.brick = brick
+ self.default_values = get_default_values()
+ self.monitor_pid_file = monitor_pid_file
+
+ def send_event(self, event_type, **kwargs):
+ gf_event(event_type,
+ master_volume=self.master,
+ master_node=self.master_node,
+ master_node_id=self.master_node_id,
+ slave_host=self.slave_host,
+ slave_volume=self.slave_volume,
+ brick_path=self.brick,
+ **kwargs)
+
+ def _update(self, mergerfunc):
+ data = self.default_values
+ with LockedOpen(self.filename, 'r+') as f:
+ try:
+ data.update(json.load(f))
+ except ValueError:
+ pass
+
+ data = mergerfunc(data)
+ # If Data is not changed by merger func
+ if not data:
+ return False
+
+ with tempfile.NamedTemporaryFile(
+ 'w',
+ dir=os.path.dirname(self.filename),
+ delete=False) as tf:
+ tf.write(data)
+ tempname = tf.name
+
+ os.rename(tempname, self.filename)
+ dirfd = os.open(os.path.dirname(os.path.abspath(self.filename)),
+ os.O_DIRECTORY)
+ os.fsync(dirfd)
+ os.close(dirfd)
+ return True
+
+ def reset_on_worker_start(self):
+ def merger(data):
+ data["slave_node"] = DEFAULT_STATUS
+ data["crawl_status"] = DEFAULT_STATUS
+ data["entry"] = 0
+ data["data"] = 0
+ data["meta"] = 0
+ return json.dumps(data)
+
+ self._update(merger)
+
+ def set_field(self, key, value):
+ def merger(data):
+ # Current data and prev data is same
+ if data[key] == value:
+ return {}
+
+ data[key] = value
+ return json.dumps(data)
+
+ return self._update(merger)
+
+ def trigger_gf_event_checkpoint_completion(self, checkpoint_time,
+ checkpoint_completion_time):
+ self.send_event(EVENT_GEOREP_CHECKPOINT_COMPLETED,
+ checkpoint_time=checkpoint_time,
+ checkpoint_completion_time=checkpoint_completion_time)
+
+ def set_last_synced(self, value, checkpoint_time):
+ def merger(data):
+ data["last_synced"] = value[0]
+
+ # If checkpoint is not set or reset
+ # or if last set checkpoint is changed
+ if checkpoint_time == 0 or \
+ checkpoint_time != data["checkpoint_time"]:
+ data["checkpoint_time"] = 0
+ data["checkpoint_completion_time"] = 0
+ data["checkpoint_completed"] = "No"
+
+ # If checkpoint is completed and not marked as completed
+ # previously then update the checkpoint completed time
+ if checkpoint_time > 0 and checkpoint_time <= value[0]:
+ if data["checkpoint_completed"] == "No":
+ curr_time = int(time.time())
+ data["checkpoint_time"] = checkpoint_time
+ data["checkpoint_completion_time"] = curr_time
+ data["checkpoint_completed"] = "Yes"
+ logging.info(lf("Checkpoint completed",
+ checkpoint_time=human_time_utc(
+ checkpoint_time),
+ completion_time=human_time_utc(curr_time)))
+ self.trigger_gf_event_checkpoint_completion(
+ checkpoint_time, curr_time)
+
+ return json.dumps(data)
+
+ self._update(merger)
+
+ def set_worker_status(self, status):
+ if self.set_field("worker_status", status):
+ logging.info(lf("Worker Status Change",
+ status=status))
+
+ def set_worker_crawl_status(self, status):
+ if self.set_field("crawl_status", status):
+ logging.info(lf("Crawl Status Change",
+ status=status))
+
+ def set_slave_node(self, slave_node):
+ def merger(data):
+ data["slave_node"] = slave_node
+ return json.dumps(data)
+
+ self._update(merger)
+
+ def inc_value(self, key, value):
+ def merger(data):
+ data[key] = data.get(key, 0) + value
+ return json.dumps(data)
+
+ self._update(merger)
+
+ def dec_value(self, key, value):
+ def merger(data):
+ data[key] = data.get(key, 0) - value
+ if data[key] < 0:
+ data[key] = 0
+ return json.dumps(data)
+
+ self._update(merger)
+
+ def set_active(self):
+ if self.set_field("worker_status", "Active"):
+ logging.info(lf("Worker Status Change",
+ status="Active"))
+ self.send_event(EVENT_GEOREP_ACTIVE)
+
+ def set_passive(self):
+ if self.set_field("worker_status", "Passive"):
+ logging.info(lf("Worker Status Change",
+ status="Passive"))
+ self.send_event(EVENT_GEOREP_PASSIVE)
+
+ def get_monitor_status(self):
+ data = ""
+ with open(self.monitor_status_file, "r") as f:
+ data = f.read().strip()
+ return data
+
+ def get_status(self, checkpoint_time=0):
+ """
+ Monitor Status ---> Created Started Paused Stopped
+ ----------------------------------------------------------------------
+ slave_node N/A VALUE VALUE N/A
+ status Created VALUE Paused Stopped
+ last_synced N/A VALUE VALUE VALUE
+ last_synced_entry N/A VALUE VALUE VALUE
+ crawl_status N/A VALUE N/A N/A
+ entry N/A VALUE N/A N/A
+ data N/A VALUE N/A N/A
+ meta N/A VALUE N/A N/A
+ failures N/A VALUE VALUE VALUE
+ checkpoint_completed N/A VALUE VALUE VALUE
+ checkpoint_time N/A VALUE VALUE VALUE
+ checkpoint_completed_time N/A VALUE VALUE VALUE
+ """
+ data = self.default_values
+ with open(self.filename) as f:
+ try:
+ data.update(json.load(f))
+ except ValueError:
+ pass
+ monitor_status = self.get_monitor_status()
+
+ # Verifying whether monitor process running and adjusting status
+ if monitor_status in ["Started", "Paused"]:
+ try:
+ with open(self.monitor_pid_file, "r+") as f:
+ fcntl.lockf(f, fcntl.LOCK_EX | fcntl.LOCK_NB)
+ monitor_status = "Stopped"
+ except (IOError, OSError) as e:
+ # If pid file not exists, either monitor died or Geo-rep
+ # not even started once
+ if e.errno == ENOENT:
+ monitor_status = "Stopped"
+ elif e.errno in (EACCES, EAGAIN):
+ # cannot grab. so, monitor process still running..move on
+ pass
+ else:
+ raise
+
+ if monitor_status in ["Created", "Paused", "Stopped"]:
+ data["worker_status"] = monitor_status
+
+ if monitor_status == "":
+ data["worker_status"] = "Stopped"
+
+ # Checkpoint adjustments
+ if checkpoint_time == 0:
+ data["checkpoint_completed"] = DEFAULT_STATUS
+ data["checkpoint_time"] = DEFAULT_STATUS
+ data["checkpoint_completion_time"] = DEFAULT_STATUS
+ else:
+ if checkpoint_time != data["checkpoint_time"]:
+ if checkpoint_time <= data["last_synced"]:
+ data["checkpoint_completed"] = "Yes"
+ data["checkpoint_time"] = checkpoint_time
+ data["checkpoint_completion_time"] = data["last_synced"]
+ else:
+ data["checkpoint_completed"] = "No"
+ data["checkpoint_time"] = checkpoint_time
+ data["checkpoint_completion_time"] = DEFAULT_STATUS
+
+ if data["checkpoint_time"] not in [0, DEFAULT_STATUS]:
+ chkpt_time = data["checkpoint_time"]
+ data["checkpoint_time"] = human_time(chkpt_time)
+ data["checkpoint_time_utc"] = human_time_utc(chkpt_time)
+
+ if data["checkpoint_completion_time"] not in [0, DEFAULT_STATUS]:
+ chkpt_completion_time = data["checkpoint_completion_time"]
+ data["checkpoint_completion_time"] = human_time(
+ chkpt_completion_time)
+ data["checkpoint_completion_time_utc"] = human_time_utc(
+ chkpt_completion_time)
+
+ if data["last_synced"] == 0:
+ data["last_synced"] = DEFAULT_STATUS
+ data["last_synced_utc"] = DEFAULT_STATUS
+ else:
+ last_synced = data["last_synced"]
+ data["last_synced"] = human_time(last_synced)
+ data["last_synced_utc"] = human_time_utc(last_synced)
+
+ if data["worker_status"] != "Active":
+ data["last_synced"] = DEFAULT_STATUS
+ data["last_synced_utc"] = DEFAULT_STATUS
+ data["crawl_status"] = DEFAULT_STATUS
+ data["entry"] = DEFAULT_STATUS
+ data["data"] = DEFAULT_STATUS
+ data["meta"] = DEFAULT_STATUS
+ data["failures"] = DEFAULT_STATUS
+ data["checkpoint_completed"] = DEFAULT_STATUS
+ data["checkpoint_time"] = DEFAULT_STATUS
+ data["checkpoint_completed_time"] = DEFAULT_STATUS
+ data["checkpoint_time_utc"] = DEFAULT_STATUS
+ data["checkpoint_completion_time_utc"] = DEFAULT_STATUS
+
+ if data["worker_status"] not in ["Active", "Passive"]:
+ data["slave_node"] = DEFAULT_STATUS
+
+ if data.get("last_synced_utc", 0) == 0:
+ data["last_synced_utc"] = DEFAULT_STATUS
+
+ if data.get("checkpoint_completion_time_utc", 0) == 0:
+ data["checkpoint_completion_time_utc"] = DEFAULT_STATUS
+
+ if data.get("checkpoint_time_utc", 0) == 0:
+ data["checkpoint_time_utc"] = DEFAULT_STATUS
+
+ return data
+
+ def print_status(self, checkpoint_time=0, json_output=False):
+ status_out = self.get_status(checkpoint_time)
+ if json_output:
+ out = {}
+ # Convert all values as string
+ for k, v in status_out.items():
+ out[k] = str(v)
+ print(json.dumps(out))
+ return
+
+ for key, value in status_out.items():
+ print(("%s: %s" % (key, value)))
diff --git a/geo-replication/syncdaemon/libcxattr.py b/geo-replication/syncdaemon/libcxattr.py
new file mode 100644
index 00000000000..e6406c36bd7
--- /dev/null
+++ b/geo-replication/syncdaemon/libcxattr.py
@@ -0,0 +1,112 @@
+#
+# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+import os
+from ctypes import CDLL, get_errno
+from py2py3 import (bytearray_to_str, gr_create_string_buffer,
+ gr_query_xattr, gr_lsetxattr, gr_lremovexattr)
+
+
+class Xattr(object):
+
+ """singleton that wraps the extended attributes system
+ interface for python using ctypes
+
+ Just implement it to the degree we need it, in particular
+ - we need just the l*xattr variants, ie. we never want symlinks to be
+ followed
+ - don't need size discovery for getxattr, as we always know the exact
+ sizes we expect
+ """
+
+ libc = CDLL("libc.so.6", use_errno=True)
+
+ @classmethod
+ def geterrno(cls):
+ return get_errno()
+
+ @classmethod
+ def raise_oserr(cls):
+ errn = cls.geterrno()
+ raise OSError(errn, os.strerror(errn))
+
+ @classmethod
+ def _query_xattr(cls, path, siz, syscall, *a):
+ if siz:
+ buf = gr_create_string_buffer(siz)
+ else:
+ buf = None
+ ret = getattr(cls.libc, syscall)(*((path,) + a + (buf, siz)))
+ if ret == -1:
+ cls.raise_oserr()
+ if siz:
+ # py2 and py3 compatibility. Convert bytes array
+ # to string
+ result = bytearray_to_str(buf.raw)
+ return result[:ret]
+ else:
+ return ret
+
+ @classmethod
+ def lgetxattr(cls, path, attr, siz=0):
+ return gr_query_xattr(cls, path, siz, 'lgetxattr', attr)
+
+ @classmethod
+ def lgetxattr_buf(cls, path, attr):
+ """lgetxattr variant with size discovery"""
+ size = cls.lgetxattr(path, attr)
+ if size == -1:
+ cls.raise_oserr()
+ if size == 0:
+ return ''
+ return cls.lgetxattr(path, attr, size)
+
+ @classmethod
+ def llistxattr(cls, path, siz=0):
+ ret = gr_query_xattr(cls, path, siz, 'llistxattr')
+ if isinstance(ret, str):
+ ret = ret.strip('\0')
+ ret = ret.split('\0') if ret else []
+ return ret
+
+ @classmethod
+ def lsetxattr(cls, path, attr, val):
+ ret = gr_lsetxattr(cls, path, attr, val)
+ if ret == -1:
+ cls.raise_oserr()
+
+ @classmethod
+ def lremovexattr(cls, path, attr):
+ ret = gr_lremovexattr(cls, path, attr)
+ if ret == -1:
+ cls.raise_oserr()
+
+ @classmethod
+ def llistxattr_buf(cls, path):
+ """listxattr variant with size discovery"""
+ try:
+ # Assuming no more than 100 xattrs in a file/directory and
+ # each xattr key length will be less than 256 bytes
+ # llistxattr will be called with bigger size so that
+ # listxattr will not fail with ERANGE. OSError will be
+ # raised if fails even with the large size specified.
+ size = 256 * 100
+ return cls.llistxattr(path, size)
+ except OSError:
+ # If fixed length failed for getting list of xattrs then
+ # use the llistxattr call to get the size and use that
+ # size to get the list of xattrs.
+ size = cls.llistxattr(path)
+ if size == -1:
+ cls.raise_oserr()
+ if size == 0:
+ return []
+
+ return cls.llistxattr(path, size)
diff --git a/geo-replication/syncdaemon/libgfchangelog.py b/geo-replication/syncdaemon/libgfchangelog.py
new file mode 100644
index 00000000000..a3bda7282c0
--- /dev/null
+++ b/geo-replication/syncdaemon/libgfchangelog.py
@@ -0,0 +1,143 @@
+#
+# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+import os
+from ctypes import CDLL, RTLD_GLOBAL, get_errno, byref, c_ulong
+from ctypes.util import find_library
+from syncdutils import ChangelogException, ChangelogHistoryNotAvailable
+from py2py3 import (gr_cl_history_changelog, gr_cl_done,
+ gr_create_string_buffer, gr_cl_register,
+ gr_cl_history_done, bytearray_to_str)
+
+
+libgfc = CDLL(
+ find_library("gfchangelog"),
+ mode=RTLD_GLOBAL,
+ use_errno=True
+)
+
+
+def _raise_changelog_err():
+ errn = get_errno()
+ raise ChangelogException(errn, os.strerror(errn))
+
+
+def _init():
+ if libgfc.gf_changelog_init(None) == -1:
+ _raise_changelog_err()
+
+
+def register(brick, path, log_file, log_level, retries=0):
+ _init()
+
+ ret = gr_cl_register(libgfc, brick, path, log_file, log_level, retries)
+
+ if ret == -1:
+ _raise_changelog_err()
+
+
+def scan():
+ ret = libgfc.gf_changelog_scan()
+ if ret == -1:
+ _raise_changelog_err()
+
+
+def startfresh():
+ ret = libgfc.gf_changelog_start_fresh()
+ if ret == -1:
+ _raise_changelog_err()
+
+
+def getchanges():
+ def clsort(cfile):
+ return cfile.split('.')[-1]
+
+ changes = []
+ buf = gr_create_string_buffer(4096)
+ call = libgfc.gf_changelog_next_change
+
+ while True:
+ ret = call(buf, 4096)
+ if ret in (0, -1):
+ break
+
+ # py2 and py3 compatibility
+ result = bytearray_to_str(buf.raw[:ret - 1])
+ changes.append(result)
+
+ if ret == -1:
+ _raise_changelog_err()
+
+ # cleanup tracker
+ startfresh()
+
+ return sorted(changes, key=clsort)
+
+
+def done(clfile):
+ ret = gr_cl_done(libgfc, clfile)
+ if ret == -1:
+ _raise_changelog_err()
+
+
+def history_scan():
+ ret = libgfc.gf_history_changelog_scan()
+ if ret == -1:
+ _raise_changelog_err()
+
+ return ret
+
+
+def history_changelog(changelog_path, start, end, num_parallel):
+ actual_end = c_ulong()
+ ret = gr_cl_history_changelog(libgfc, changelog_path, start, end,
+ num_parallel, byref(actual_end))
+ if ret == -1:
+ _raise_changelog_err()
+
+ if ret == -2:
+ raise ChangelogHistoryNotAvailable()
+
+ return (ret, actual_end.value)
+
+
+def history_startfresh():
+ ret = libgfc.gf_history_changelog_start_fresh()
+ if ret == -1:
+ _raise_changelog_err()
+
+
+def history_getchanges():
+ def clsort(cfile):
+ return cfile.split('.')[-1]
+
+ changes = []
+ buf = gr_create_string_buffer(4096)
+ call = libgfc.gf_history_changelog_next_change
+
+ while True:
+ ret = call(buf, 4096)
+ if ret in (0, -1):
+ break
+
+ # py2 and py3 compatibility
+ result = bytearray_to_str(buf.raw[:ret - 1])
+ changes.append(result)
+
+ if ret == -1:
+ _raise_changelog_err()
+
+ return sorted(changes, key=clsort)
+
+
+def history_done(clfile):
+ ret = gr_cl_history_done(libgfc, clfile)
+ if ret == -1:
+ _raise_changelog_err()
diff --git a/geo-replication/syncdaemon/logutils.py b/geo-replication/syncdaemon/logutils.py
new file mode 100644
index 00000000000..01ae7852f23
--- /dev/null
+++ b/geo-replication/syncdaemon/logutils.py
@@ -0,0 +1,77 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+import logging
+from logging import Logger, handlers
+import sys
+import time
+
+
+class GLogger(Logger):
+
+ """Logger customizations for gsyncd.
+
+ It implements a log format similar to that of glusterfs.
+ """
+
+ def makeRecord(self, name, level, *a):
+ rv = Logger.makeRecord(self, name, level, *a)
+ rv.nsecs = (rv.created - int(rv.created)) * 1000000
+ fr = sys._getframe(4)
+ callee = fr.f_locals.get('self')
+ if callee:
+ ctx = str(type(callee)).split("'")[1].split('.')[-1]
+ else:
+ ctx = '<top>'
+ if not hasattr(rv, 'funcName'):
+ rv.funcName = fr.f_code.co_name
+ rv.lvlnam = logging.getLevelName(level)[0]
+ rv.ctx = ctx
+ return rv
+
+
+LOGFMT = ("[%(asctime)s.%(nsecs)d] %(lvlnam)s [%(module)s{0}"
+ ":%(lineno)s:%(funcName)s] %(ctx)s: %(message)s")
+
+
+def setup_logging(level="INFO", label="", log_file=""):
+ if label:
+ label = "(" + label + ")"
+
+ filename = None
+ stream = None
+ if log_file:
+ if log_file in ('-', '/dev/stderr'):
+ stream = sys.stderr
+ elif log_file == '/dev/stdout':
+ stream = sys.stdout
+ else:
+ filename = log_file
+
+ datefmt = "%Y-%m-%d %H:%M:%S"
+ fmt = LOGFMT.format(label)
+ logging.root = GLogger("root", level)
+ logging.setLoggerClass(GLogger)
+ logging.Formatter.converter = time.gmtime # Log in GMT/UTC time
+ logging.getLogger().handlers = []
+ logging.getLogger().setLevel(level)
+
+ if filename is not None:
+ logging_handler = handlers.WatchedFileHandler(filename)
+ formatter = logging.Formatter(fmt=fmt,
+ datefmt=datefmt)
+ logging_handler.setFormatter(formatter)
+ logging.getLogger().addHandler(logging_handler)
+ else:
+ logging.basicConfig(stream=stream,
+ format=fmt,
+ datefmt=datefmt,
+ level=level)
diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py
new file mode 100644
index 00000000000..9501aeae6b5
--- /dev/null
+++ b/geo-replication/syncdaemon/master.py
@@ -0,0 +1,2020 @@
+#
+# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+import os
+import sys
+import time
+import stat
+import logging
+import fcntl
+import string
+import errno
+import tarfile
+from errno import ENOENT, ENODATA, EEXIST, EACCES, EAGAIN, ESTALE, EINTR
+from threading import Condition, Lock
+from datetime import datetime
+
+import gsyncdconfig as gconf
+import libgfchangelog
+from rconf import rconf
+from syncdutils import (Thread, GsyncdError, escape_space_newline,
+ unescape_space_newline, gauxpfx, escape,
+ lstat, errno_wrap, FreeObject, lf, matching_disk_gfid,
+ NoStimeAvailable, PartialHistoryAvailable,
+ host_brick_split)
+
+URXTIME = (-1, 0)
+
+# Default rollover time set in changelog translator
+# changelog rollover time is hardcoded here to avoid the
+# xsync usage when crawling switch happens from history
+# to changelog. If rollover time increased in translator
+# then geo-rep can enter into xsync crawl after history
+# crawl before starting live changelog crawl.
+CHANGELOG_ROLLOVER_TIME = 15
+
+# Utility functions to help us to get to closer proximity
+# of the DRY principle (no, don't look for elevated or
+# perspectivistic things here)
+
+
+def _xtime_now():
+ t = time.time()
+ sec = int(t)
+ nsec = int((t - sec) * 1000000)
+ return (sec, nsec)
+
+
+def _volinfo_hook_relax_foreign(self):
+ volinfo_sys = self.get_sys_volinfo()
+ fgn_vi = volinfo_sys[self.KFGN]
+ if fgn_vi:
+ expiry = fgn_vi['timeout'] - int(time.time()) + 1
+ logging.info(lf('foreign volume info found, waiting for expiry',
+ expiry=expiry))
+ time.sleep(expiry)
+ volinfo_sys = self.get_sys_volinfo()
+ return volinfo_sys
+
+
+def edct(op, **ed):
+ dct = {}
+ dct['op'] = op
+ # This is used in automatic gfid conflict resolution.
+ # When marked True, it's skipped during re-processing.
+ dct['skip_entry'] = False
+ for k in ed:
+ if k == 'stat':
+ st = ed[k]
+ dst = dct['stat'] = {}
+ if st:
+ dst['uid'] = st.st_uid
+ dst['gid'] = st.st_gid
+ dst['mode'] = st.st_mode
+ dst['atime'] = st.st_atime
+ dst['mtime'] = st.st_mtime
+ else:
+ dct[k] = ed[k]
+ return dct
+
+
+# The API!
+
+def gmaster_builder(excrawl=None):
+ """produce the GMaster class variant corresponding
+ to sync mode"""
+ this = sys.modules[__name__]
+ modemixin = gconf.get("special-sync-mode")
+ if not modemixin:
+ modemixin = 'normal'
+
+ if gconf.get("change-detector") == 'xsync':
+ changemixin = 'xsync'
+ elif excrawl:
+ changemixin = excrawl
+ else:
+ changemixin = gconf.get("change-detector")
+
+ logging.debug(lf('setting up change detection mode',
+ mode=changemixin))
+ modemixin = getattr(this, modemixin.capitalize() + 'Mixin')
+ crawlmixin = getattr(this, 'GMaster' + changemixin.capitalize() + 'Mixin')
+
+ if gconf.get("use-rsync-xattrs"):
+ sendmarkmixin = SendmarkRsyncMixin
+ else:
+ sendmarkmixin = SendmarkNormalMixin
+
+ if gconf.get("ignore-deletes"):
+ purgemixin = PurgeNoopMixin
+ else:
+ purgemixin = PurgeNormalMixin
+
+ if gconf.get("sync-method") == "tarssh":
+ syncengine = TarSSHEngine
+ else:
+ syncengine = RsyncEngine
+
+ class _GMaster(crawlmixin, modemixin, sendmarkmixin,
+ purgemixin, syncengine):
+ pass
+
+ return _GMaster
+
+
+# Mixin classes that implement the data format
+# and logic particularities of the certain
+# sync modes
+
+class NormalMixin(object):
+
+ """normal geo-rep behavior"""
+
+ minus_infinity = URXTIME
+
+ # following staticmethods ideally would be
+ # methods of an xtime object (in particular,
+ # implementing the hooks needed for comparison
+ # operators), but at this point we don't yet
+ # have a dedicated xtime class
+
+ @staticmethod
+ def serialize_xtime(xt):
+ return "%d.%d" % tuple(xt)
+
+ @staticmethod
+ def deserialize_xtime(xt):
+ return tuple(int(x) for x in xt.split("."))
+
+ @staticmethod
+ def native_xtime(xt):
+ return xt
+
+ @staticmethod
+ def xtime_geq(xt0, xt1):
+ return xt0 >= xt1
+
+ def make_xtime_opts(self, is_master, opts):
+ if 'create' not in opts:
+ opts['create'] = is_master
+ if 'default_xtime' not in opts:
+ opts['default_xtime'] = URXTIME
+
+ def xtime_low(self, rsc, path, **opts):
+ if rsc == self.master:
+ xt = rsc.server.xtime(path, self.uuid)
+ else:
+ xt = rsc.server.stime(path, self.uuid)
+ if isinstance(xt, int) and xt == ENODATA:
+ xt = rsc.server.xtime(path, self.uuid)
+ if not isinstance(xt, int):
+ self.slave.server.set_stime(path, self.uuid, xt)
+ if isinstance(xt, int) and xt != ENODATA:
+ return xt
+ if xt == ENODATA or xt < self.volmark:
+ if opts['create']:
+ xt = _xtime_now()
+ rsc.server.aggregated.set_xtime(path, self.uuid, xt)
+ else:
+ zero_zero = (0, 0)
+ if xt != zero_zero:
+ xt = opts['default_xtime']
+ return xt
+
+ def keepalive_payload_hook(self, timo, gap):
+ # first grab a reference as self.volinfo
+ # can be changed in main thread
+ vi = self.volinfo
+ if vi:
+ # then have a private copy which we can mod
+ vi = vi.copy()
+ vi['timeout'] = int(time.time()) + timo
+ else:
+ # send keep-alive more frequently to
+ # avoid a delay in announcing our volume info
+ # to slave if it becomes established in the
+ # meantime
+ gap = min(10, gap)
+ return (vi, gap)
+
+ def volinfo_hook(self):
+ return self.get_sys_volinfo()
+
+ def xtime_reversion_hook(self, path, xtl, xtr):
+ if xtr > xtl:
+ raise GsyncdError("timestamp corruption for " + path)
+
+ def need_sync(self, e, xte, xtrd):
+ return xte > xtrd
+
+ def set_slave_xtime(self, path, mark):
+ self.slave.server.set_stime(path, self.uuid, mark)
+ # self.slave.server.set_xtime_remote(path, self.uuid, mark)
+
+
+class PartialMixin(NormalMixin):
+
+ """a variant tuned towards operation with a master
+ that has partial info of the slave (brick typically)"""
+
+ def xtime_reversion_hook(self, path, xtl, xtr):
+ pass
+
+
+class RecoverMixin(NormalMixin):
+
+ """a variant that differs from normal in terms
+ of ignoring non-indexed files"""
+
+ @staticmethod
+ def make_xtime_opts(is_master, opts):
+ if 'create' not in opts:
+ opts['create'] = False
+ if 'default_xtime' not in opts:
+ opts['default_xtime'] = URXTIME
+
+ def keepalive_payload_hook(self, timo, gap):
+ return (None, gap)
+
+ def volinfo_hook(self):
+ return _volinfo_hook_relax_foreign(self)
+
+# Further mixins for certain tunable behaviors
+
+
+class SendmarkNormalMixin(object):
+
+ def sendmark_regular(self, *a, **kw):
+ return self.sendmark(*a, **kw)
+
+
+class SendmarkRsyncMixin(object):
+
+ def sendmark_regular(self, *a, **kw):
+ pass
+
+
+class PurgeNormalMixin(object):
+
+ def purge_missing(self, path, names):
+ self.slave.server.purge(path, names)
+
+
+class PurgeNoopMixin(object):
+
+ def purge_missing(self, path, names):
+ pass
+
+
+class TarSSHEngine(object):
+
+ """Sync engine that uses tar(1) piped over ssh(1)
+ for data transfers. Good for lots of small files.
+ """
+
+ def a_syncdata(self, files):
+ logging.debug(lf("Files", files=files))
+
+ for f in files:
+ pb = self.syncer.add(f)
+
+ def regjob(se, xte, pb):
+ rv = pb.wait()
+ if rv[0]:
+ logging.debug(lf('synced', file=se))
+ return True
+ else:
+ # stat check for file presence
+ st = lstat(se)
+ if isinstance(st, int):
+ # file got unlinked in the interim
+ self.unlinked_gfids.add(se)
+ return True
+
+ self.add_job(self.FLAT_DIR_HIERARCHY, 'reg', regjob, f, None, pb)
+
+ def syncdata_wait(self):
+ if self.wait(self.FLAT_DIR_HIERARCHY, None):
+ return True
+
+ def syncdata(self, files):
+ self.a_syncdata(files)
+ self.syncdata_wait()
+
+
+class RsyncEngine(object):
+
+ """Sync engine that uses rsync(1) for data transfers"""
+
+ def a_syncdata(self, files):
+ logging.debug(lf("files", files=files))
+
+ for f in files:
+ logging.debug(lf('candidate for syncing', file=f))
+ pb = self.syncer.add(f)
+
+ def regjob(se, xte, pb):
+ rv = pb.wait()
+ if rv[0]:
+ logging.debug(lf('synced', file=se))
+ return True
+ else:
+ # stat to check if the file exist
+ st = lstat(se)
+ if isinstance(st, int):
+ # file got unlinked in the interim
+ self.unlinked_gfids.add(se)
+ return True
+
+ self.add_job(self.FLAT_DIR_HIERARCHY, 'reg', regjob, f, None, pb)
+
+ def syncdata_wait(self):
+ if self.wait(self.FLAT_DIR_HIERARCHY, None):
+ return True
+
+ def syncdata(self, files):
+ self.a_syncdata(files)
+ self.syncdata_wait()
+
+
+class GMasterCommon(object):
+
+ """abstract class impementling master role"""
+
+ KFGN = 0
+ KNAT = 1
+
+ def get_sys_volinfo(self):
+ """query volume marks on fs root
+
+ err out on multiple foreign masters
+ """
+ fgn_vis, nat_vi = (
+ self.master.server.aggregated.foreign_volume_infos(),
+ self.master.server.aggregated.native_volume_info())
+ fgn_vi = None
+ if fgn_vis:
+ if len(fgn_vis) > 1:
+ raise GsyncdError("cannot work with multiple foreign masters")
+ fgn_vi = fgn_vis[0]
+ return fgn_vi, nat_vi
+
+ @property
+ def uuid(self):
+ if self.volinfo:
+ return self.volinfo['uuid']
+
+ @property
+ def volmark(self):
+ if self.volinfo:
+ return self.volinfo['volume_mark']
+
+ def get_entry_stime(self):
+ data = self.slave.server.entry_stime(".", self.uuid)
+ if isinstance(data, int):
+ data = None
+ return data
+
+ def get_data_stime(self):
+ data = self.slave.server.stime(".", self.uuid)
+ if isinstance(data, int):
+ data = None
+ return data
+
+ def xtime(self, path, *a, **opts):
+ """get amended xtime
+
+ as of amending, we can create missing xtime, or
+ determine a valid value if what we get is expired
+ (as of the volume mark expiry); way of amendig
+ depends on @opts and on subject of query (master
+ or slave).
+ """
+ if a:
+ rsc = a[0]
+ else:
+ rsc = self.master
+ self.make_xtime_opts(rsc == self.master, opts)
+ return self.xtime_low(rsc, path, **opts)
+
+ def __init__(self, master, slave):
+ self.master = master
+ self.slave = slave
+ self.jobtab = {}
+ if gconf.get("sync-method") == "tarssh":
+ self.syncer = Syncer(slave, self.slave.tarssh, [2])
+ else:
+ # partial transfer (cf. rsync(1)), that's normal
+ self.syncer = Syncer(slave, self.slave.rsync, [23, 24])
+ # crawls vs. turns:
+ # - self.crawls is simply the number of crawl() invocations on root
+ # - one turn is a maximal consecutive sequence of crawls so that each
+ # crawl in it detects a change to be synced
+ # - self.turns is the number of turns since start
+ # - self.total_turns is a limit so that if self.turns reaches it, then
+ # we exit (for diagnostic purposes)
+ # so, eg., if the master fs changes unceasingly, self.turns will remain
+ # 0.
+ self.crawls = 0
+ self.turns = 0
+ self.total_turns = rconf.turns
+ self.crawl_start = datetime.now()
+ self.lastreport = {'crawls': 0, 'turns': 0, 'time': 0}
+ self.start = None
+ self.change_seen = None
+ # the actual volinfo we make use of
+ self.volinfo = None
+ self.terminate = False
+ self.sleep_interval = 1
+ self.unlinked_gfids = set()
+
+ def init_keep_alive(cls):
+ """start the keep-alive thread """
+ timo = gconf.get("slave-timeout", 0)
+ if timo > 0:
+ def keep_alive():
+ while True:
+ vi, gap = cls.keepalive_payload_hook(timo, timo * 0.5)
+ cls.slave.server.keep_alive(vi)
+ time.sleep(gap)
+ t = Thread(target=keep_alive)
+ t.start()
+
+ def mgmt_lock(self):
+
+ """Take management volume lock """
+ if rconf.mgmt_lock_fd:
+ try:
+ fcntl.lockf(rconf.mgmt_lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
+ return True
+ except:
+ ex = sys.exc_info()[1]
+ if isinstance(ex, IOError) and ex.errno in (EACCES, EAGAIN):
+ return False
+ raise
+
+ fd = None
+ bname = str(self.uuid) + "_" + rconf.args.slave_id + "_subvol_" \
+ + str(rconf.args.subvol_num) + ".lock"
+ mgmt_lock_dir = os.path.join(gconf.get("meta-volume-mnt"), "geo-rep")
+ path = os.path.join(mgmt_lock_dir, bname)
+ logging.debug(lf("lock file path", path=path))
+ try:
+ fd = os.open(path, os.O_CREAT | os.O_RDWR)
+ except OSError:
+ ex = sys.exc_info()[1]
+ if ex.errno == ENOENT:
+ logging.info("Creating geo-rep directory in meta volume...")
+ try:
+ os.makedirs(mgmt_lock_dir)
+ except OSError:
+ ex = sys.exc_info()[1]
+ if ex.errno == EEXIST:
+ pass
+ else:
+ raise
+ fd = os.open(path, os.O_CREAT | os.O_RDWR)
+ else:
+ raise
+ try:
+ fcntl.lockf(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
+ # Save latest FD for future use
+ rconf.mgmt_lock_fd = fd
+ except:
+ ex = sys.exc_info()[1]
+ if isinstance(ex, IOError) and ex.errno in (EACCES, EAGAIN):
+ # cannot grab, it's taken
+ rconf.mgmt_lock_fd = fd
+ return False
+ raise
+
+ return True
+
+ def should_crawl(self):
+ if not gconf.get("use-meta-volume"):
+ return rconf.args.local_node_id in self.master.server.node_uuid()
+
+ if not os.path.ismount(gconf.get("meta-volume-mnt")):
+ logging.error("Meta-volume is not mounted. Worker Exiting...")
+ sys.exit(1)
+ return self.mgmt_lock()
+
+ def register(self):
+ self.register()
+
+ def crawlwrap(self, oneshot=False, register_time=None):
+ if oneshot:
+ # it's important to do this during the oneshot crawl as
+ # for a passive gsyncd (ie. in a replicate scenario)
+ # the keepalive thread would keep the connection alive.
+ self.init_keep_alive()
+
+ # If crawlwrap is called when partial history available,
+ # then it sets register_time which is the time when geo-rep
+ # worker registered to changelog consumption. Since nsec is
+ # not considered in register time, there are chances of skipping
+ # changes detection in xsync crawl. This limit will be reset when
+ # crawlwrap is called again.
+ self.live_changelog_start_time = None
+ if register_time:
+ self.live_changelog_start_time = (register_time, 0)
+
+ # no need to maintain volinfo state machine.
+ # in a cascading setup, each geo-replication session is
+ # independent (ie. 'volume-mark' and 'xtime' are not
+ # propagated). This is because the slave's xtime is now
+ # stored on the master itself. 'volume-mark' just identifies
+ # that we are in a cascading setup and need to enable
+ # 'geo-replication.ignore-pid-check' option.
+ volinfo_sys = self.volinfo_hook()
+ self.volinfo = volinfo_sys[self.KNAT]
+ inter_master = volinfo_sys[self.KFGN]
+ logging.debug("%s master with volume id %s ..." %
+ (inter_master and "intermediate" or "primary",
+ self.uuid))
+ rconf.volume_id = self.uuid
+ if self.volinfo:
+ if self.volinfo['retval']:
+ logging.warn(lf("master cluster's info may not be valid",
+ error=self.volinfo['retval']))
+ else:
+ raise GsyncdError("master volinfo unavailable")
+ self.lastreport['time'] = time.time()
+
+ t0 = time.time()
+ crawl = self.should_crawl()
+ while not self.terminate:
+ if self.start:
+ logging.debug("... crawl #%d done, took %.6f seconds" %
+ (self.crawls, time.time() - self.start))
+ self.start = time.time()
+ should_display_info = self.start - self.lastreport['time'] >= 60
+ if should_display_info:
+ logging.debug("%d crawls, %d turns",
+ self.crawls - self.lastreport['crawls'],
+ self.turns - self.lastreport['turns'])
+ self.lastreport.update(crawls=self.crawls,
+ turns=self.turns,
+ time=self.start)
+ t1 = time.time()
+ if int(t1 - t0) >= gconf.get("replica-failover-interval"):
+ crawl = self.should_crawl()
+ t0 = t1
+ self.update_worker_remote_node()
+ if not crawl:
+ self.status.set_passive()
+ # bring up _this_ brick to the cluster stime
+ # which is min of cluster (but max of the replicas)
+ brick_stime = self.xtime('.', self.slave)
+ cluster_stime = self.master.server.aggregated.stime_mnt(
+ '.', '.'.join([str(self.uuid), rconf.args.slave_id]))
+ logging.debug(lf("Crawl info",
+ cluster_stime=cluster_stime,
+ brick_stime=brick_stime))
+
+ if not isinstance(cluster_stime, int):
+ if brick_stime < cluster_stime:
+ self.slave.server.set_stime(
+ self.FLAT_DIR_HIERARCHY, self.uuid, cluster_stime)
+ self.upd_stime(cluster_stime)
+ # Purge all changelogs available in processing dir
+ # less than cluster_stime
+ proc_dir = os.path.join(self.tempdir,
+ ".processing")
+
+ if os.path.exists(proc_dir):
+ to_purge = [f for f in os.listdir(proc_dir)
+ if (f.startswith("CHANGELOG.") and
+ int(f.split('.')[-1]) <
+ cluster_stime[0])]
+ for f in to_purge:
+ os.remove(os.path.join(proc_dir, f))
+
+ time.sleep(5)
+ continue
+
+ self.status.set_active()
+ self.crawl()
+
+ if oneshot:
+ return
+ time.sleep(self.sleep_interval)
+
+ @staticmethod
+ def humantime(*tpair):
+ """format xtime-like (sec, nsec) pair to human readable format"""
+ ts = datetime.fromtimestamp(float('.'.join(str(n) for n in tpair))).\
+ strftime("%Y-%m-%d %H:%M:%S")
+ if len(tpair) > 1:
+ ts += '.' + str(tpair[1])
+ return ts
+
+ def _crawl_time_format(self, crawl_time):
+ # Ex: 5 years, 4 days, 20:23:10
+ years, days = divmod(crawl_time.days, 365.25)
+ years = int(years)
+ days = int(days)
+
+ date = ""
+ m, s = divmod(crawl_time.seconds, 60)
+ h, m = divmod(m, 60)
+
+ if years != 0:
+ date += "%s %s " % (years, "year" if years == 1 else "years")
+ if days != 0:
+ date += "%s %s " % (days, "day" if days == 1 else "days")
+
+ date += "%s:%s:%s" % (string.zfill(h, 2),
+ string.zfill(m, 2), string.zfill(s, 2))
+ return date
+
+ def add_job(self, path, label, job, *a, **kw):
+ """insert @job function to job table at @path with @label"""
+ if self.jobtab.get(path) is None:
+ self.jobtab[path] = []
+ self.jobtab[path].append((label, a, lambda: job(*a, **kw)))
+
+ def add_failjob(self, path, label):
+ """invoke .add_job with a job that does nothing just fails"""
+ logging.debug('salvaged: ' + label)
+ self.add_job(path, label, lambda: False)
+
+ def wait(self, path, *args):
+ """perform jobs registered for @path
+
+ Reset jobtab entry for @path,
+ determine success as the conjunction of
+ success of all the jobs. In case of
+ success, call .sendmark on @path
+ """
+ jobs = self.jobtab.pop(path, [])
+ succeed = True
+ for j in jobs:
+ ret = j[-1]()
+ if not ret:
+ succeed = False
+ if succeed and not args[0] is None:
+ self.sendmark(path, *args)
+ return succeed
+
+ def sendmark(self, path, mark, adct=None):
+ """update slave side xtime for @path to master side xtime
+
+ also can send a setattr payload (see Server.setattr).
+ """
+ if adct:
+ self.slave.server.setattr(path, adct)
+ self.set_slave_xtime(path, mark)
+
+
+class XCrawlMetadata(object):
+ def __init__(self, st_uid, st_gid, st_mode, st_atime, st_mtime):
+ self.st_uid = int(st_uid)
+ self.st_gid = int(st_gid)
+ self.st_mode = int(st_mode)
+ self.st_atime = float(st_atime)
+ self.st_mtime = float(st_mtime)
+
+
+class GMasterChangelogMixin(GMasterCommon):
+
+ """ changelog based change detection and syncing """
+
+ # index for change type and entry
+ IDX_START = 0
+ IDX_END = 2
+ UNLINK_ENTRY = 2
+
+ POS_GFID = 0
+ POS_TYPE = 1
+ POS_ENTRY1 = -1
+
+ TYPE_META = "M "
+ TYPE_GFID = "D "
+ TYPE_ENTRY = "E "
+
+ MAX_EF_RETRIES = 10
+ MAX_OE_RETRIES = 10
+
+ # flat directory hierarchy for gfid based access
+ FLAT_DIR_HIERARCHY = '.'
+
+ CHANGELOG_CONN_RETRIES = 5
+
+ def init_fop_batch_stats(self):
+ self.batch_stats = {
+ "CREATE": 0,
+ "MKNOD": 0,
+ "UNLINK": 0,
+ "MKDIR": 0,
+ "RMDIR": 0,
+ "LINK": 0,
+ "SYMLINK": 0,
+ "RENAME": 0,
+ "SETATTR": 0,
+ "SETXATTR": 0,
+ "XATTROP": 0,
+ "DATA": 0,
+ "ENTRY_SYNC_TIME": 0,
+ "META_SYNC_TIME": 0,
+ "DATA_START_TIME": 0
+ }
+
+ def update_fop_batch_stats(self, ty):
+ if ty in ['FSETXATTR']:
+ ty = 'SETXATTR'
+ self.batch_stats[ty] = self.batch_stats.get(ty, 0) + 1
+
+ def archive_and_purge_changelogs(self, changelogs):
+ # Creates tar file instead of tar.gz, since changelogs will
+ # be appended to existing tar. archive name is
+ # archive_<YEAR><MONTH>.tar
+ archive_name = "archive_%s.tar" % datetime.today().strftime(
+ gconf.get("changelog-archive-format"))
+
+ try:
+ tar = tarfile.open(os.path.join(self.processed_changelogs_dir,
+ archive_name),
+ "a")
+ except tarfile.ReadError:
+ tar = tarfile.open(os.path.join(self.processed_changelogs_dir,
+ archive_name),
+ "w")
+
+ for f in changelogs:
+ try:
+ f = os.path.basename(f)
+ tar.add(os.path.join(self.processed_changelogs_dir, f),
+ arcname=os.path.basename(f))
+ except:
+ exc = sys.exc_info()[1]
+ if ((isinstance(exc, OSError) or
+ isinstance(exc, IOError)) and exc.errno == ENOENT):
+ continue
+ else:
+ tar.close()
+ raise
+ tar.close()
+
+ for f in changelogs:
+ try:
+ f = os.path.basename(f)
+ os.remove(os.path.join(self.processed_changelogs_dir, f))
+ except OSError as e:
+ if e.errno == errno.ENOENT:
+ continue
+ else:
+ raise
+
+ def setup_working_dir(self):
+ workdir = os.path.join(gconf.get("working-dir"),
+ escape(rconf.args.local_path))
+ logging.debug('changelog working dir %s' % workdir)
+ return workdir
+
+ def log_failures(self, failures, entry_key, gfid_prefix, log_prefix):
+ num_failures = 0
+ for failure in failures:
+ st = lstat(os.path.join(gfid_prefix, failure[0][entry_key]))
+ if not isinstance(st, int):
+ num_failures += 1
+ logging.error(lf('%s FAILED' % log_prefix,
+ data=failure))
+ if failure[0]['op'] == 'MKDIR':
+ raise GsyncdError("The above directory failed to sync."
+ " Please fix it to proceed further.")
+
+ self.status.inc_value("failures", num_failures)
+
+ def fix_possible_entry_failures(self, failures, retry_count, entries):
+ pfx = gauxpfx()
+ fix_entry_ops = []
+ failures1 = []
+ remove_gfids = set()
+ for failure in failures:
+ if failure[2]['name_mismatch']:
+ pbname = failure[2]['slave_entry']
+ elif failure[2]['dst']:
+ pbname = failure[0]['entry1']
+ else:
+ pbname = failure[0]['entry']
+
+ op = failure[0]['op']
+ # name exists but gfid is different
+ if failure[2]['gfid_mismatch'] or failure[2]['name_mismatch']:
+ slave_gfid = failure[2]['slave_gfid']
+ st = lstat(os.path.join(pfx, slave_gfid))
+ # Takes care of scenarios with no hardlinks
+ if isinstance(st, int) and st == ENOENT:
+ logging.debug(lf('Entry not present on master. Fixing gfid '
+ 'mismatch in slave. Deleting the entry',
+ retry_count=retry_count,
+ entry=repr(failure)))
+ # Add deletion to fix_entry_ops list
+ if failure[2]['slave_isdir']:
+ fix_entry_ops.append(
+ edct('RMDIR',
+ gfid=failure[2]['slave_gfid'],
+ entry=pbname))
+ else:
+ fix_entry_ops.append(
+ edct('UNLINK',
+ gfid=failure[2]['slave_gfid'],
+ entry=pbname))
+ remove_gfids.add(slave_gfid)
+ if op in ['RENAME']:
+ # If renamed gfid doesn't exists on master, remove
+ # rename entry and unlink src on slave
+ st = lstat(os.path.join(pfx, failure[0]['gfid']))
+ if isinstance(st, int) and st == ENOENT:
+ logging.debug("Unlink source %s" % repr(failure))
+ remove_gfids.add(failure[0]['gfid'])
+ fix_entry_ops.append(
+ edct('UNLINK',
+ gfid=failure[0]['gfid'],
+ entry=failure[0]['entry']))
+ # Takes care of scenarios of hardlinks/renames on master
+ elif not isinstance(st, int):
+ if matching_disk_gfid(slave_gfid, pbname):
+ # Safe to ignore the failure as master contains same
+ # file with same gfid. Remove entry from entries list
+ logging.debug(lf('Fixing gfid mismatch in slave. '
+ ' Safe to ignore, take out entry',
+ retry_count=retry_count,
+ entry=repr(failure)))
+ remove_gfids.add(failure[0]['gfid'])
+ if op == 'RENAME':
+ fix_entry_ops.append(
+ edct('UNLINK',
+ gfid=failure[0]['gfid'],
+ entry=failure[0]['entry']))
+ # The file exists on master but with different name.
+ # Probably renamed and got missed during xsync crawl.
+ elif failure[2]['slave_isdir']:
+ realpath = os.readlink(os.path.join(
+ rconf.args.local_path,
+ ".glusterfs",
+ slave_gfid[0:2],
+ slave_gfid[2:4],
+ slave_gfid))
+ dst_entry = os.path.join(pfx, realpath.split('/')[-2],
+ realpath.split('/')[-1])
+ src_entry = pbname
+ logging.debug(lf('Fixing dir name/gfid mismatch in '
+ 'slave', retry_count=retry_count,
+ entry=repr(failure)))
+ if src_entry == dst_entry:
+ # Safe to ignore the failure as master contains
+ # same directory as in slave with same gfid.
+ # Remove the failure entry from entries list
+ logging.debug(lf('Fixing dir name/gfid mismatch'
+ ' in slave. Safe to ignore, '
+ 'take out entry',
+ retry_count=retry_count,
+ entry=repr(failure)))
+ try:
+ entries.remove(failure[0])
+ except ValueError:
+ pass
+ else:
+ rename_dict = edct('RENAME', gfid=slave_gfid,
+ entry=src_entry,
+ entry1=dst_entry, stat=st,
+ link=None)
+ logging.debug(lf('Fixing dir name/gfid mismatch'
+ ' in slave. Renaming',
+ retry_count=retry_count,
+ entry=repr(rename_dict)))
+ fix_entry_ops.append(rename_dict)
+ else:
+ # A hardlink file exists with different name or
+ # renamed file exists and we are sure from
+ # matching_disk_gfid check that the entry doesn't
+ # exist with same gfid so we can safely delete on slave
+ logging.debug(lf('Fixing file gfid mismatch in slave. '
+ 'Hardlink/Rename Case. Deleting entry',
+ retry_count=retry_count,
+ entry=repr(failure)))
+ fix_entry_ops.append(
+ edct('UNLINK',
+ gfid=failure[2]['slave_gfid'],
+ entry=pbname))
+ elif failure[1] == ENOENT:
+ if op in ['RENAME']:
+ pbname = failure[0]['entry1']
+ else:
+ pbname = failure[0]['entry']
+
+ pargfid = pbname.split('/')[1]
+ st = lstat(os.path.join(pfx, pargfid))
+ # Safe to ignore the failure as master doesn't contain
+ # parent directory.
+ if isinstance(st, int):
+ logging.debug(lf('Fixing ENOENT error in slave. Parent '
+ 'does not exist on master. Safe to '
+ 'ignore, take out entry',
+ retry_count=retry_count,
+ entry=repr(failure)))
+ try:
+ entries.remove(failure[0])
+ except ValueError:
+ pass
+ else:
+ logging.debug(lf('Fixing ENOENT error in slave. Create '
+ 'parent directory on slave.',
+ retry_count=retry_count,
+ entry=repr(failure)))
+ realpath = os.readlink(os.path.join(rconf.args.local_path,
+ ".glusterfs",
+ pargfid[0:2],
+ pargfid[2:4],
+ pargfid))
+ dir_entry = os.path.join(pfx, realpath.split('/')[-2],
+ realpath.split('/')[-1])
+ fix_entry_ops.append(
+ edct('MKDIR', gfid=pargfid, entry=dir_entry,
+ mode=st.st_mode, uid=st.st_uid, gid=st.st_gid))
+
+ logging.debug("remove_gfids: %s" % repr(remove_gfids))
+ if remove_gfids:
+ for e in entries:
+ if e['op'] in ['MKDIR', 'MKNOD', 'CREATE', 'RENAME'] \
+ and e['gfid'] in remove_gfids:
+ logging.debug("Removed entry op from retrial list: entry: %s" % repr(e))
+ e['skip_entry'] = True
+
+ if fix_entry_ops:
+ # Process deletions of entries whose gfids are mismatched
+ failures1 = self.slave.server.entry_ops(fix_entry_ops)
+
+ return (failures1, fix_entry_ops)
+
+ def handle_entry_failures(self, failures, entries):
+ retries = 0
+ pending_failures = False
+ failures1 = []
+ failures2 = []
+ entry_ops1 = []
+ entry_ops2 = []
+
+ if failures:
+ pending_failures = True
+ failures1 = failures
+ entry_ops1 = entries
+
+ while pending_failures and retries < self.MAX_EF_RETRIES:
+ retries += 1
+ (failures2, entry_ops2) = self.fix_possible_entry_failures(
+ failures1, retries, entry_ops1)
+ if not failures2:
+ pending_failures = False
+ logging.info(lf('Successfully fixed entry ops with gfid '
+ 'mismatch', retry_count=retries))
+ else:
+ pending_failures = True
+ failures1 = failures2
+ entry_ops1 = entry_ops2
+
+ if pending_failures:
+ for failure in failures1:
+ logging.error("Failed to fix entry ops %s", repr(failure))
+
+ def process_change(self, change, done, retry):
+ pfx = gauxpfx()
+ clist = []
+ entries = []
+ meta_gfid = set()
+ datas = set()
+
+ change_ts = change.split(".")[-1]
+
+ # Ignore entry ops which are already processed in Changelog modes
+ ignore_entry_ops = False
+ entry_stime = None
+ data_stime = None
+ if self.name in ["live_changelog", "history_changelog"]:
+ entry_stime = self.get_entry_stime()
+ data_stime = self.get_data_stime()
+
+ if entry_stime is not None and data_stime is not None:
+ # if entry_stime is not None but data_stime > entry_stime
+ # This situation is caused by the stime update of Passive worker
+ # Consider data_stime in this case.
+ if data_stime[0] > entry_stime[0]:
+ entry_stime = data_stime
+
+ # Compare the entry_stime with changelog file suffix
+ # if changelog time is less than entry_stime then ignore
+ if int(change_ts) <= entry_stime[0]:
+ ignore_entry_ops = True
+
+ try:
+ f = open(change, "r")
+ clist = f.readlines()
+ f.close()
+ except IOError:
+ raise
+
+ for e in clist:
+ e = e.strip()
+ et = e[self.IDX_START:self.IDX_END] # entry type
+ ec = e[self.IDX_END:].split(' ') # rest of the bits
+
+ # skip ENTRY operation if hot tier brick
+ if self.name == 'live_changelog' or \
+ self.name == 'history_changelog':
+ if rconf.args.is_hottier and et == self.TYPE_ENTRY:
+ logging.debug(lf('skip ENTRY op if hot tier brick',
+ op=ec[self.POS_TYPE]))
+ continue
+
+ # Data and Meta operations are decided while parsing
+ # UNLINK/RMDIR/MKNOD except that case ignore all the other
+ # entry ops if ignore_entry_ops is True.
+ # UNLINK/RMDIR/MKNOD entry_ops are ignored in the end
+ if ignore_entry_ops and et == self.TYPE_ENTRY and \
+ ec[self.POS_TYPE] not in ["UNLINK", "RMDIR", "MKNOD"]:
+ continue
+
+ if et == self.TYPE_ENTRY:
+ # extract information according to the type of
+ # the entry operation. create(), mkdir() and mknod()
+ # have mode, uid, gid information in the changelog
+ # itself, so no need to stat()...
+ ty = ec[self.POS_TYPE]
+
+ self.update_fop_batch_stats(ec[self.POS_TYPE])
+
+ # PARGFID/BNAME
+ en = unescape_space_newline(
+ os.path.join(pfx, ec[self.POS_ENTRY1]))
+ # GFID of the entry
+ gfid = ec[self.POS_GFID]
+
+ if ty in ['UNLINK', 'RMDIR']:
+ # The index of PARGFID/BNAME for UNLINK, RMDIR
+ # is no more the last index. It varies based on
+ # changelog.capture-del-path is enabled or not.
+ en = unescape_space_newline(
+ os.path.join(pfx, ec[self.UNLINK_ENTRY]))
+
+ # Remove from DATA list, so that rsync will
+ # not fail
+ pt = os.path.join(pfx, ec[0])
+ st = lstat(pt)
+ if pt in datas and isinstance(st, int):
+ # file got unlinked, May be historical Changelog
+ datas.remove(pt)
+
+ if ty in ['RMDIR'] and not isinstance(st, int):
+ logging.info(lf('Ignoring rmdir. Directory present in '
+ 'master', gfid=gfid, pgfid_bname=en))
+ continue
+
+ if not gconf.get("ignore-deletes"):
+ if not ignore_entry_ops:
+ entries.append(edct(ty, gfid=gfid, entry=en))
+ elif ty in ['CREATE', 'MKDIR', 'MKNOD']:
+ # Special case: record mknod as link
+ if ty in ['MKNOD']:
+ mode = int(ec[2])
+ if mode & 0o1000:
+ # Avoid stat'ing the file as it
+ # may be deleted in the interim
+ st = FreeObject(st_mode=int(ec[2]),
+ st_uid=int(ec[3]),
+ st_gid=int(ec[4]),
+ st_atime=0,
+ st_mtime=0)
+
+ # So, it may be deleted, but still we are
+ # append LINK? Because, the file will be
+ # CREATED if source not exists.
+ entries.append(edct('LINK', stat=st, entry=en,
+ gfid=gfid))
+
+ # Here, we have the assumption that only
+ # tier-gfid.linkto causes this mknod. Add data
+ datas.add(os.path.join(pfx, ec[0]))
+ continue
+
+ # stat info. present in the changelog itself
+ entries.append(edct(ty, gfid=gfid, entry=en,
+ mode=int(ec[2]),
+ uid=int(ec[3]), gid=int(ec[4])))
+ elif ty == "RENAME":
+ go = os.path.join(pfx, gfid)
+ st = lstat(go)
+ if isinstance(st, int):
+ st = {}
+
+ rl = None
+ if st and stat.S_ISLNK(st.st_mode):
+ rl = errno_wrap(os.readlink, [en], [ENOENT],
+ [ESTALE, EINTR])
+ if isinstance(rl, int):
+ rl = None
+
+ e1 = unescape_space_newline(
+ os.path.join(pfx, ec[self.POS_ENTRY1 - 1]))
+ entries.append(edct(ty, gfid=gfid, entry=e1, entry1=en,
+ stat=st, link=rl))
+ # If src doesn't exist while doing rename, destination
+ # is created. If data is not followed by rename, this
+ # remains zero byte file on slave. Hence add data entry
+ # for renames
+ datas.add(os.path.join(pfx, gfid))
+ else:
+ # stat() to get mode and other information
+ if not matching_disk_gfid(gfid, en):
+ logging.debug(lf('Ignoring entry, purged in the '
+ 'interim', file=en, gfid=gfid))
+ continue
+
+ go = os.path.join(pfx, gfid)
+ st = lstat(go)
+ if isinstance(st, int):
+ logging.debug(lf('Ignoring entry, purged in the '
+ 'interim', file=en, gfid=gfid))
+ continue
+
+ if ty == 'LINK':
+ rl = None
+ if st and stat.S_ISLNK(st.st_mode):
+ rl = errno_wrap(os.readlink, [en], [ENOENT],
+ [ESTALE, EINTR])
+ if isinstance(rl, int):
+ rl = None
+ entries.append(edct(ty, stat=st, entry=en, gfid=gfid,
+ link=rl))
+ # If src doesn't exist while doing link, destination
+ # is created based on file type. If data is not
+ # followed by link, this remains zero byte file on
+ # slave. Hence add data entry for links
+ if rl is None:
+ datas.add(os.path.join(pfx, gfid))
+ elif ty == 'SYMLINK':
+ rl = errno_wrap(os.readlink, [en], [ENOENT],
+ [ESTALE, EINTR])
+ if isinstance(rl, int):
+ continue
+
+ entries.append(
+ edct(ty, stat=st, entry=en, gfid=gfid, link=rl))
+ else:
+ logging.warn(lf('ignoring op',
+ gfid=gfid,
+ type=ty))
+ elif et == self.TYPE_GFID:
+ # If self.unlinked_gfids is available, then that means it is
+ # retrying the changelog second time. Do not add the GFID's
+ # to rsync job if failed previously but unlinked in master
+ if self.unlinked_gfids and \
+ os.path.join(pfx, ec[0]) in self.unlinked_gfids:
+ logging.debug("ignoring data, since file purged interim")
+ else:
+ datas.add(os.path.join(pfx, ec[0]))
+ elif et == self.TYPE_META:
+ self.update_fop_batch_stats(ec[self.POS_TYPE])
+ if ec[1] == 'SETATTR': # only setattr's for now...
+ if len(ec) == 5:
+ # In xsync crawl, we already have stat data
+ # avoid doing stat again
+ meta_gfid.add((os.path.join(pfx, ec[0]),
+ XCrawlMetadata(st_uid=ec[2],
+ st_gid=ec[3],
+ st_mode=ec[4],
+ st_atime=ec[5],
+ st_mtime=ec[6])))
+ else:
+ meta_gfid.add((os.path.join(pfx, ec[0]), ))
+ elif ec[1] in ['SETXATTR', 'XATTROP', 'FXATTROP']:
+ # To sync xattr/acls use rsync/tar, --xattrs and --acls
+ # switch to rsync and tar
+ if not gconf.get("sync-method") == "tarssh" and \
+ (gconf.get("sync-xattrs") or gconf.get("sync-acls")):
+ datas.add(os.path.join(pfx, ec[0]))
+ else:
+ logging.warn(lf('got invalid fop type',
+ type=et))
+ logging.debug('entries: %s' % repr(entries))
+
+ # Increment counters for Status
+ self.files_in_batch += len(datas)
+ self.status.inc_value("data", len(datas))
+
+ self.batch_stats["DATA"] += self.files_in_batch - \
+ self.batch_stats["SETXATTR"] - \
+ self.batch_stats["XATTROP"]
+
+ entry_start_time = time.time()
+ # sync namespace
+ if entries and not ignore_entry_ops:
+ # Increment counters for Status
+ self.status.inc_value("entry", len(entries))
+
+ failures = self.slave.server.entry_ops(entries)
+
+ if gconf.get("gfid-conflict-resolution"):
+ count = 0
+ if failures:
+ logging.info(lf('Entry ops failed with gfid mismatch',
+ count=len(failures)))
+ while failures and count < self.MAX_OE_RETRIES:
+ count += 1
+ self.handle_entry_failures(failures, entries)
+ logging.info(lf('Retry original entries', count=count))
+ failures = self.slave.server.entry_ops(entries)
+ if not failures:
+ logging.info("Successfully fixed all entry ops with "
+ "gfid mismatch")
+ break
+
+ self.log_failures(failures, 'gfid', gauxpfx(), 'ENTRY')
+ self.status.dec_value("entry", len(entries))
+
+ # Update Entry stime in Brick Root only in case of Changelog mode
+ if self.name in ["live_changelog", "history_changelog"]:
+ entry_stime_to_update = (int(change_ts) - 1, 0)
+ self.upd_entry_stime(entry_stime_to_update)
+ self.status.set_field("last_synced_entry",
+ entry_stime_to_update[0])
+
+ self.batch_stats["ENTRY_SYNC_TIME"] += time.time() - entry_start_time
+
+ if ignore_entry_ops:
+ # Book keeping, to show in logs the range of Changelogs skipped
+ self.num_skipped_entry_changelogs += 1
+ if self.skipped_entry_changelogs_first is None:
+ self.skipped_entry_changelogs_first = change_ts
+
+ self.skipped_entry_changelogs_last = change_ts
+
+ meta_start_time = time.time()
+ # sync metadata
+ if meta_gfid:
+ meta_entries = []
+ for go in meta_gfid:
+ if len(go) > 1:
+ st = go[1]
+ else:
+ st = lstat(go[0])
+ if isinstance(st, int):
+ logging.debug(lf('file got purged in the interim',
+ file=go[0]))
+ continue
+ meta_entries.append(edct('META', go=go[0], stat=st))
+ if meta_entries:
+ self.status.inc_value("meta", len(meta_entries))
+ failures = self.slave.server.meta_ops(meta_entries)
+ self.log_failures(failures, 'go', '', 'META')
+ self.status.dec_value("meta", len(meta_entries))
+
+ self.batch_stats["META_SYNC_TIME"] += time.time() - meta_start_time
+
+ if self.batch_stats["DATA_START_TIME"] == 0:
+ self.batch_stats["DATA_START_TIME"] = time.time()
+
+ # sync data
+ if datas:
+ self.a_syncdata(datas)
+ self.datas_in_batch.update(datas)
+
+ def process(self, changes, done=1):
+ tries = 0
+ retry = False
+ self.unlinked_gfids = set()
+ self.files_in_batch = 0
+ self.datas_in_batch = set()
+ # Error log disabled till the last round
+ self.syncer.disable_errorlog()
+ self.skipped_entry_changelogs_first = None
+ self.skipped_entry_changelogs_last = None
+ self.num_skipped_entry_changelogs = 0
+ self.batch_start_time = time.time()
+ self.init_fop_batch_stats()
+
+ while True:
+ # first, fire all changelog transfers in parallel. entry and
+ # metadata are performed synchronously, therefore in serial.
+ # However at the end of each changelog, data is synchronized
+ # with syncdata_async() - which means it is serial w.r.t
+ # entries/metadata of that changelog but happens in parallel
+ # with data of other changelogs.
+
+ if retry:
+ if tries == (gconf.get("max-rsync-retries") - 1):
+ # Enable Error logging if it is last retry
+ self.syncer.enable_errorlog()
+
+ # Remove Unlinked GFIDs from Queue
+ for unlinked_gfid in self.unlinked_gfids:
+ if unlinked_gfid in self.datas_in_batch:
+ self.datas_in_batch.remove(unlinked_gfid)
+
+ # Retry only Sync. Do not retry entry ops
+ if self.datas_in_batch:
+ self.a_syncdata(self.datas_in_batch)
+ else:
+ for change in changes:
+ logging.debug(lf('processing change',
+ changelog=change))
+ self.process_change(change, done, retry)
+ if not retry:
+ # number of changelogs processed in the batch
+ self.turns += 1
+
+ # Now we wait for all the data transfers fired off in the above
+ # step to complete. Note that this is not ideal either. Ideally
+ # we want to trigger the entry/meta-data transfer of the next
+ # batch while waiting for the data transfer of the current batch
+ # to finish.
+
+ # Note that the reason to wait for the data transfer (vs doing it
+ # completely in the background and call the changelog_done()
+ # asynchronously) is because this waiting acts as a "backpressure"
+ # and prevents a spiraling increase of wait stubs from consuming
+ # unbounded memory and resources.
+
+ # update the slave's time with the timestamp of the _last_
+ # changelog file time suffix. Since, the changelog prefix time
+ # is the time when the changelog was rolled over, introduce a
+ # tolerance of 1 second to counter the small delta b/w the
+ # marker update and gettimeofday().
+ # NOTE: this is only for changelog mode, not xsync.
+
+ # @change is the last changelog (therefore max time for this batch)
+ if self.syncdata_wait():
+ self.unlinked_gfids = set()
+ if done:
+ xtl = (int(change.split('.')[-1]) - 1, 0)
+ self.upd_stime(xtl)
+ list(map(self.changelog_done_func, changes))
+ self.archive_and_purge_changelogs(changes)
+
+ # Reset Data counter after sync
+ self.status.dec_value("data", self.files_in_batch)
+ self.files_in_batch = 0
+ self.datas_in_batch = set()
+ break
+
+ # We do not know which changelog transfer failed, retry everything.
+ retry = True
+ tries += 1
+ if tries == gconf.get("max-rsync-retries"):
+ logging.error(lf('changelogs could not be processed '
+ 'completely - moving on...',
+ files=list(map(os.path.basename, changes))))
+
+ # Reset data counter on failure
+ self.status.dec_value("data", self.files_in_batch)
+ self.files_in_batch = 0
+ self.datas_in_batch = set()
+
+ if done:
+ xtl = (int(change.split('.')[-1]) - 1, 0)
+ self.upd_stime(xtl)
+ list(map(self.changelog_done_func, changes))
+ self.archive_and_purge_changelogs(changes)
+ break
+ # it's either entry_ops() or Rsync that failed to do it's
+ # job. Mostly it's entry_ops() [which currently has a problem
+ # of failing to create an entry but failing to return an errno]
+ # Therefore we do not know if it's either Rsync or the freaking
+ # entry_ops() that failed... so we retry the _whole_ changelog
+ # again.
+ # TODO: remove entry retries when it's gets fixed.
+ logging.warn(lf('incomplete sync, retrying changelogs',
+ files=list(map(os.path.basename, changes))))
+
+ # Reset the Data counter before Retry
+ self.status.dec_value("data", self.files_in_batch)
+ self.files_in_batch = 0
+ self.init_fop_batch_stats()
+ time.sleep(0.5)
+
+ # Log the Skipped Entry ops range if any
+ if self.skipped_entry_changelogs_first is not None and \
+ self.skipped_entry_changelogs_last is not None:
+ logging.info(lf("Skipping already processed entry ops",
+ from_changelog=self.skipped_entry_changelogs_first,
+ to_changelog=self.skipped_entry_changelogs_last,
+ num_changelogs=self.num_skipped_entry_changelogs))
+
+ # Log Current batch details
+ if changes:
+ logging.info(
+ lf("Entry Time Taken",
+ UNL=self.batch_stats["UNLINK"],
+ RMD=self.batch_stats["RMDIR"],
+ CRE=self.batch_stats["CREATE"],
+ MKN=self.batch_stats["MKNOD"],
+ MKD=self.batch_stats["MKDIR"],
+ REN=self.batch_stats["RENAME"],
+ LIN=self.batch_stats["LINK"],
+ SYM=self.batch_stats["SYMLINK"],
+ duration="%.4f" % self.batch_stats["ENTRY_SYNC_TIME"]))
+
+ logging.info(
+ lf("Data/Metadata Time Taken",
+ SETA=self.batch_stats["SETATTR"],
+ meta_duration="%.4f" % self.batch_stats["META_SYNC_TIME"],
+ SETX=self.batch_stats["SETXATTR"],
+ XATT=self.batch_stats["XATTROP"],
+ DATA=self.batch_stats["DATA"],
+ data_duration="%.4f" % (
+ time.time() - self.batch_stats["DATA_START_TIME"])))
+
+ logging.info(
+ lf("Batch Completed",
+ mode=self.name,
+ duration="%.4f" % (time.time() - self.batch_start_time),
+ changelog_start=changes[0].split(".")[-1],
+ changelog_end=changes[-1].split(".")[-1],
+ num_changelogs=len(changes),
+ stime=self.get_data_stime(),
+ entry_stime=self.get_entry_stime()))
+
+ def upd_entry_stime(self, stime):
+ self.slave.server.set_entry_stime(self.FLAT_DIR_HIERARCHY,
+ self.uuid,
+ stime)
+
+ def upd_stime(self, stime, path=None):
+ if not path:
+ path = self.FLAT_DIR_HIERARCHY
+ if not stime == URXTIME:
+ self.sendmark(path, stime)
+
+ # Update last_synced_time in status file based on stime
+ # only update stime if stime xattr set to Brick root
+ if path == self.FLAT_DIR_HIERARCHY:
+ chkpt_time = gconf.getr("checkpoint")
+ checkpoint_time = 0
+ if chkpt_time is not None:
+ checkpoint_time = int(chkpt_time)
+
+ self.status.set_last_synced(stime, checkpoint_time)
+
+ def update_worker_remote_node(self):
+ node = rconf.args.resource_remote
+ node_data = node.split("@")
+ node = node_data[-1]
+ remote_node_ip, _ = host_brick_split(node)
+ self.status.set_slave_node(remote_node_ip)
+
+ def changelogs_batch_process(self, changes):
+ changelogs_batches = []
+ current_size = 0
+ for c in changes:
+ si = os.lstat(c).st_size
+ if (si + current_size) > gconf.get("changelog-batch-size"):
+ # Create new batch if single Changelog file greater than
+ # Max Size! or current batch size exceeds Max size
+ changelogs_batches.append([c])
+ current_size = si
+ else:
+ # Append to last batch, if No batches available Create one
+ current_size += si
+ if not changelogs_batches:
+ changelogs_batches.append([c])
+ else:
+ changelogs_batches[-1].append(c)
+
+ for batch in changelogs_batches:
+ logging.debug(lf('processing changes',
+ batch=batch))
+ self.process(batch)
+
+ def crawl(self):
+ self.status.set_worker_crawl_status("Changelog Crawl")
+ changes = []
+ # get stime (from the brick) and purge changelogs
+ # that are _historical_ to that time.
+ data_stime = self.get_data_stime()
+
+ libgfchangelog.scan()
+ self.crawls += 1
+ changes = libgfchangelog.getchanges()
+ if changes:
+ if data_stime:
+ logging.info(lf("slave's time",
+ stime=data_stime))
+ processed = [x for x in changes
+ if int(x.split('.')[-1]) < data_stime[0]]
+ for pr in processed:
+ logging.debug(
+ lf('skipping already processed change',
+ changelog=os.path.basename(pr)))
+ self.changelog_done_func(pr)
+ changes.remove(pr)
+ self.archive_and_purge_changelogs(processed)
+
+ self.changelogs_batch_process(changes)
+
+ def register(self, register_time, status):
+ self.sleep_interval = gconf.get("change-interval")
+ self.changelog_done_func = libgfchangelog.done
+ self.tempdir = self.setup_working_dir()
+ self.processed_changelogs_dir = os.path.join(self.tempdir,
+ ".processed")
+ self.name = "live_changelog"
+ self.status = status
+
+
+class GMasterChangeloghistoryMixin(GMasterChangelogMixin):
+ def register(self, register_time, status):
+ self.changelog_register_time = register_time
+ self.history_crawl_start_time = register_time
+ self.changelog_done_func = libgfchangelog.history_done
+ self.history_turns = 0
+ self.tempdir = self.setup_working_dir()
+ self.processed_changelogs_dir = os.path.join(self.tempdir,
+ ".history/.processed")
+ self.name = "history_changelog"
+ self.status = status
+
+ def crawl(self):
+ self.history_turns += 1
+ self.status.set_worker_crawl_status("History Crawl")
+ data_stime = self.get_data_stime()
+
+ end_time = int(time.time())
+
+ #as start of historical crawl marks Geo-rep worker restart
+ if gconf.get("ignore-deletes"):
+ logging.info(lf('ignore-deletes config option is set',
+ stime=data_stime))
+
+ logging.info(lf('starting history crawl',
+ turns=self.history_turns,
+ stime=data_stime,
+ etime=end_time,
+ entry_stime=self.get_entry_stime()))
+
+ if not data_stime or data_stime == URXTIME:
+ raise NoStimeAvailable()
+
+ # Changelogs backend path is hardcoded as
+ # <BRICK_PATH>/.glusterfs/changelogs, if user configured to different
+ # location then consuming history will not work(Known issue as of now)
+ changelog_path = os.path.join(rconf.args.local_path,
+ ".glusterfs/changelogs")
+ ret, actual_end = libgfchangelog.history_changelog(
+ changelog_path,
+ data_stime[0],
+ end_time,
+ gconf.get("sync-jobs"))
+
+ # scan followed by getchanges till scan returns zero.
+ # history_scan() is blocking call, till it gets the number
+ # of changelogs to process. Returns zero when no changelogs
+ # to be processed. returns positive value as number of changelogs
+ # to be processed, which will be fetched using
+ # history_getchanges()
+ while libgfchangelog.history_scan() > 0:
+ self.crawls += 1
+
+ changes = libgfchangelog.history_getchanges()
+ if changes:
+ if data_stime:
+ logging.info(lf("slave's time",
+ stime=data_stime))
+ processed = [x for x in changes
+ if int(x.split('.')[-1]) < data_stime[0]]
+ for pr in processed:
+ logging.debug(lf('skipping already processed change',
+ changelog=os.path.basename(pr)))
+ self.changelog_done_func(pr)
+ changes.remove(pr)
+
+ self.changelogs_batch_process(changes)
+
+ history_turn_time = int(time.time()) - self.history_crawl_start_time
+
+ logging.info(lf('finished history crawl',
+ endtime=actual_end,
+ stime=self.get_data_stime(),
+ entry_stime=self.get_entry_stime()))
+
+ # If TS returned from history_changelog is < register_time
+ # then FS crawl may be required, since history is only available
+ # till TS returned from history_changelog
+ if actual_end < self.changelog_register_time:
+ if self.history_turns < 2:
+ sleep_time = 1
+ if history_turn_time < CHANGELOG_ROLLOVER_TIME:
+ sleep_time = CHANGELOG_ROLLOVER_TIME - history_turn_time
+ time.sleep(sleep_time)
+ self.history_crawl_start_time = int(time.time())
+ self.crawl()
+ else:
+ # This exception will be caught in resource.py and
+ # fallback to xsync for the small gap.
+ raise PartialHistoryAvailable(str(actual_end))
+
+
+class GMasterXsyncMixin(GMasterChangelogMixin):
+
+ """
+ This crawl needs to be xtime based (as of now
+ it's not. this is because we generate CHANGELOG
+ file during each crawl which is then processed
+ by process_change()).
+ For now it's used as a one-shot initial sync
+ mechanism and only syncs directories, regular
+ files, hardlinks and symlinks.
+ """
+
+ XSYNC_MAX_ENTRIES = 1 << 13
+
+ def register(self, register_time=None, status=None):
+ self.status = status
+ self.counter = 0
+ self.comlist = []
+ self.stimes = []
+ self.sleep_interval = 60
+ self.tempdir = self.setup_working_dir()
+ logging.info(lf('Working dir',
+ path=self.tempdir))
+ self.tempdir = os.path.join(self.tempdir, 'xsync')
+ self.processed_changelogs_dir = self.tempdir
+ self.name = "xsync"
+ try:
+ os.makedirs(self.tempdir)
+ except OSError:
+ ex = sys.exc_info()[1]
+ if ex.errno == EEXIST and os.path.isdir(self.tempdir):
+ pass
+ else:
+ raise
+ # Purge stale unprocessed xsync changelogs
+ for f in os.listdir(self.tempdir):
+ if f.startswith("XSYNC-CHANGELOG"):
+ os.remove(os.path.join(self.tempdir, f))
+
+
+ def crawl(self):
+ """
+ event dispatcher thread
+
+ this thread dispatches either changelog or synchronizes stime.
+ additionally terminates itself on receiving a 'finale' event
+ """
+ def Xsyncer():
+ self.Xcrawl()
+ t = Thread(target=Xsyncer)
+ t.start()
+ logging.info(lf('starting hybrid crawl',
+ stime=self.get_data_stime()))
+ self.status.set_worker_crawl_status("Hybrid Crawl")
+ while True:
+ try:
+ item = self.comlist.pop(0)
+ if item[0] == 'finale':
+ logging.info(lf('finished hybrid crawl',
+ stime=self.get_data_stime()))
+ break
+ elif item[0] == 'xsync':
+ logging.info(lf('processing xsync changelog',
+ path=item[1]))
+ self.process([item[1]], 0)
+ self.archive_and_purge_changelogs([item[1]])
+ elif item[0] == 'stime':
+ logging.debug(lf('setting slave time',
+ time=item[1]))
+ self.upd_stime(item[1][1], item[1][0])
+ else:
+ logging.warn(lf('unknown tuple in comlist',
+ entry=item))
+ except IndexError:
+ time.sleep(1)
+
+ def write_entry_change(self, prefix, data=[]):
+ if not getattr(self, "fh", None):
+ self.open()
+
+ self.fh.write("%s %s\n" % (prefix, ' '.join(data)))
+
+ def open(self):
+ try:
+ self.xsync_change = os.path.join(
+ self.tempdir, 'XSYNC-CHANGELOG.' + str(int(time.time())))
+ self.fh = open(self.xsync_change, 'w')
+ except IOError:
+ raise
+
+ def close(self):
+ if getattr(self, "fh", None):
+ self.fh.flush()
+ os.fsync(self.fh.fileno())
+ self.fh.close()
+ self.fh = None
+
+ def fname(self):
+ return self.xsync_change
+
+ def put(self, mark, item):
+ self.comlist.append((mark, item))
+
+ def sync_xsync(self, last):
+ """schedule a processing of changelog"""
+ self.close()
+ if self.counter > 0:
+ self.put('xsync', self.fname())
+ self.counter = 0
+ if not last:
+ time.sleep(1) # make sure changelogs are 1 second apart
+
+ def sync_stime(self, stime=None, last=False):
+ """schedule a stime synchronization"""
+ if stime:
+ self.put('stime', stime)
+ if last:
+ self.put('finale', None)
+
+ def sync_done(self, stime=[], last=False):
+ self.sync_xsync(last)
+ if stime:
+ # Send last as True only for last stime entry
+ for st in stime[:-1]:
+ self.sync_stime(st, False)
+
+ if stime and stime[-1]:
+ self.sync_stime(stime[-1], last)
+
+ def is_sticky(self, path, mo):
+ """check for DHTs linkto sticky bit file"""
+ sticky = False
+ if mo & 0o1000:
+ sticky = self.master.server.linkto_check(path)
+ return sticky
+
+ def Xcrawl(self, path='.', xtr_root=None):
+ """
+ generate a CHANGELOG file consumable by process_change.
+
+ slave's xtime (stime) is _cached_ for comparisons across
+ the filesystem tree, but set after directory synchronization.
+ """
+ if path == '.':
+ self.crawls += 1
+ if not xtr_root:
+ # get the root stime and use it for all comparisons
+ xtr_root = self.xtime('.', self.slave)
+ if isinstance(xtr_root, int):
+ if xtr_root != ENOENT:
+ logging.warn(lf("slave cluster not returning the "
+ "xtime for root",
+ error=xtr_root))
+ xtr_root = self.minus_infinity
+ xtl = self.xtime(path)
+ if isinstance(xtl, int):
+ logging.warn("master cluster's xtime not found")
+ xtr = self.xtime(path, self.slave)
+ if isinstance(xtr, int):
+ if xtr != ENOENT:
+ logging.warn(lf("slave cluster not returning the "
+ "xtime for dir",
+ path=path,
+ error=xtr))
+ xtr = self.minus_infinity
+ xtr = max(xtr, xtr_root)
+ zero_zero = (0, 0)
+ if xtr_root == zero_zero:
+ xtr = self.minus_infinity
+ if not self.need_sync(path, xtl, xtr):
+ if path == '.':
+ self.sync_done([(path, xtl)], True)
+ return
+ self.xtime_reversion_hook(path, xtl, xtr)
+ logging.debug("entering " + path)
+ dem = self.master.server.entries(path)
+ pargfid = self.master.server.gfid(path)
+ if isinstance(pargfid, int):
+ logging.warn(lf('skipping directory',
+ path=path))
+ for e in dem:
+ bname = e
+ e = os.path.join(path, e)
+ xte = self.xtime(e)
+ if isinstance(xte, int):
+ logging.warn(lf("irregular xtime",
+ path=e,
+ error=errno.errorcode[xte]))
+ continue
+ if not self.need_sync(e, xte, xtr):
+ continue
+ st = self.master.server.lstat(e)
+ if isinstance(st, int):
+ logging.warn(lf('got purged in the interim',
+ path=e))
+ continue
+ if self.is_sticky(e, st.st_mode):
+ logging.debug(lf('ignoring sticky bit file',
+ path=e))
+ continue
+ gfid = self.master.server.gfid(e)
+ if isinstance(gfid, int):
+ logging.warn(lf('skipping entry',
+ path=e))
+ continue
+ mo = st.st_mode
+ self.counter += 1 if ((stat.S_ISDIR(mo) or
+ stat.S_ISLNK(mo) or
+ stat.S_ISREG(mo))) else 0
+ if self.counter == self.XSYNC_MAX_ENTRIES:
+ self.sync_done(self.stimes, False)
+ self.stimes = []
+ if stat.S_ISDIR(mo):
+ self.write_entry_change("E",
+ [gfid, 'MKDIR', str(mo),
+ str(0), str(0), escape_space_newline(
+ os.path.join(pargfid, bname))])
+ self.write_entry_change("M", [gfid, "SETATTR", str(st.st_uid),
+ str(st.st_gid), str(st.st_mode),
+ str(st.st_atime),
+ str(st.st_mtime)])
+ self.Xcrawl(e, xtr_root)
+ stime_to_update = xte
+ # Live Changelog Start time indicates that from that time
+ # onwards Live changelogs are available. If we update stime
+ # greater than live_changelog_start time then Geo-rep will
+ # skip those changelogs as already processed. But Xsync
+ # actually failed to sync the deletes and Renames. Update
+ # stime as min(Live_changelogs_time, Actual_stime) When it
+ # switches to Changelog mode, it syncs Deletes and Renames.
+ if self.live_changelog_start_time:
+ stime_to_update = min(self.live_changelog_start_time, xte)
+ self.stimes.append((e, stime_to_update))
+ elif stat.S_ISLNK(mo):
+ self.write_entry_change(
+ "E", [gfid, 'SYMLINK', escape_space_newline(
+ os.path.join(pargfid, bname))])
+ elif stat.S_ISREG(mo):
+ nlink = st.st_nlink
+ nlink -= 1 # fixup backend stat link count
+ # if a file has a hardlink, create a Changelog entry as
+ # 'LINK' so the slave side will decide if to create the
+ # new entry, or to create link.
+ if nlink == 1:
+ self.write_entry_change("E",
+ [gfid, 'MKNOD', str(mo),
+ str(0), str(0),
+ escape_space_newline(
+ os.path.join(
+ pargfid, bname))])
+ else:
+ self.write_entry_change(
+ "E", [gfid, 'LINK', escape_space_newline(
+ os.path.join(pargfid, bname))])
+ self.write_entry_change("D", [gfid])
+ if path == '.':
+ stime_to_update = xtl
+ if self.live_changelog_start_time:
+ stime_to_update = min(self.live_changelog_start_time, xtl)
+ self.stimes.append((path, stime_to_update))
+ self.sync_done(self.stimes, True)
+
+
+class BoxClosedErr(Exception):
+ pass
+
+
+class PostBox(list):
+
+ """synchronized collection for storing things thought of as "requests" """
+
+ def __init__(self, *a):
+ list.__init__(self, *a)
+ # too bad Python stdlib does not have read/write locks...
+ # it would suffivce to grab the lock in .append as reader, in .close as
+ # writer
+ self.lever = Condition()
+ self.open = True
+ self.done = False
+
+ def wait(self):
+ """wait on requests to be processed"""
+ self.lever.acquire()
+ if not self.done:
+ self.lever.wait()
+ self.lever.release()
+ return self.result
+
+ def wakeup(self, data):
+ """wake up requestors with the result"""
+ self.result = data
+ self.lever.acquire()
+ self.done = True
+ self.lever.notifyAll()
+ self.lever.release()
+
+ def append(self, e):
+ """post a request"""
+ self.lever.acquire()
+ if not self.open:
+ raise BoxClosedErr
+ list.append(self, e)
+ self.lever.release()
+
+ def close(self):
+ """prohibit the posting of further requests"""
+ self.lever.acquire()
+ self.open = False
+ self.lever.release()
+
+
+class Syncer(object):
+
+ """a staged queue to relay rsync requests to rsync workers
+
+ By "staged queue" its meant that when a consumer comes to the
+ queue, it takes _all_ entries, leaving the queue empty.
+ (I don't know if there is an official term for this pattern.)
+
+ The queue uses a PostBox to accumulate incoming items.
+ When a consumer (rsync worker) comes, a new PostBox is
+ set up and the old one is passed on to the consumer.
+
+ Instead of the simplistic scheme of having one big lock
+ which synchronizes both the addition of new items and
+ PostBox exchanges, use a separate lock to arbitrate consumers,
+ and rely on PostBox's synchronization mechanisms take
+ care about additions.
+
+ There is a corner case racy situation, producers vs. consumers,
+ which is not handled by this scheme: namely, when the PostBox
+ exchange occurs in between being passed to the producer for posting
+ and the post placement. But that's what Postbox.close is for:
+ such a posting will find the PostBox closed, in which case
+ the producer can re-try posting against the actual PostBox of
+ the queue.
+
+ To aid accumlation of items in the PostBoxen before grabbed
+ by an rsync worker, the worker goes to sleep a bit after
+ each completed syncjob.
+ """
+
+ def __init__(self, slave, sync_engine, resilient_errnos=[]):
+ """spawn worker threads"""
+ self.log_err = False
+ self.slave = slave
+ self.lock = Lock()
+ self.pb = PostBox()
+ self.sync_engine = sync_engine
+ self.errnos_ok = resilient_errnos
+ for i in range(gconf.get("sync-jobs")):
+ t = Thread(target=self.syncjob, args=(i + 1, ))
+ t.start()
+
+ def syncjob(self, job_id):
+ """the life of a worker"""
+ while True:
+ pb = None
+ while True:
+ self.lock.acquire()
+ if self.pb:
+ pb, self.pb = self.pb, PostBox()
+ self.lock.release()
+ if pb:
+ break
+ time.sleep(0.5)
+ pb.close()
+ start = time.time()
+ po = self.sync_engine(pb, self.log_err)
+ logging.info(lf("Sync Time Taken",
+ job=job_id,
+ num_files=len(pb),
+ return_code=po.returncode,
+ duration="%.4f" % (time.time() - start)))
+
+ if po.returncode == 0:
+ ret = (True, 0)
+ elif po.returncode in self.errnos_ok:
+ ret = (False, po.returncode)
+ else:
+ po.errfail()
+ pb.wakeup(ret)
+
+ def add(self, e):
+ while True:
+ pb = self.pb
+ try:
+ pb.append(e)
+ return pb
+ except BoxClosedErr:
+ pass
+
+ def enable_errorlog(self):
+ self.log_err = True
+
+ def disable_errorlog(self):
+ self.log_err = False
diff --git a/geo-replication/syncdaemon/monitor.py b/geo-replication/syncdaemon/monitor.py
new file mode 100644
index 00000000000..6aa7b9dfc99
--- /dev/null
+++ b/geo-replication/syncdaemon/monitor.py
@@ -0,0 +1,395 @@
+#
+# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+import os
+import sys
+import time
+import signal
+import logging
+import xml.etree.ElementTree as XET
+from threading import Lock
+from errno import ECHILD, ESRCH
+import random
+
+from resource import SSH
+import gsyncdconfig as gconf
+import libgfchangelog
+from rconf import rconf
+from syncdutils import (select, waitpid, errno_wrap, lf, grabpidfile,
+ set_term_handler, GsyncdError,
+ Thread, finalize, Volinfo, VolinfoFromGconf,
+ gf_event, EVENT_GEOREP_FAULTY, get_up_nodes,
+ unshare_propagation_supported)
+from gsyncdstatus import GeorepStatus, set_monitor_status
+import py2py3
+from py2py3 import pipe
+
+ParseError = XET.ParseError if hasattr(XET, 'ParseError') else SyntaxError
+
+
+def get_subvol_num(brick_idx, vol, hot):
+ tier = vol.is_tier()
+ disperse_count = vol.disperse_count(tier, hot)
+ replica_count = vol.replica_count(tier, hot)
+ distribute_count = vol.distribution_count(tier, hot)
+ gconf.setconfig("master-distribution-count", distribute_count)
+
+ if (tier and not hot):
+ brick_idx = brick_idx - vol.get_hot_bricks_count(tier)
+
+ subvol_size = disperse_count if disperse_count > 0 else replica_count
+ cnt = int((brick_idx + 1) / subvol_size)
+ rem = (brick_idx + 1) % subvol_size
+ if rem > 0:
+ cnt = cnt + 1
+
+ if (tier and hot):
+ return "hot_" + str(cnt)
+ elif (tier and not hot):
+ return "cold_" + str(cnt)
+ else:
+ return str(cnt)
+
+
+class Monitor(object):
+
+ """class which spawns and manages gsyncd workers"""
+
+ ST_INIT = 'Initializing...'
+ ST_STARTED = 'Started'
+ ST_STABLE = 'Active'
+ ST_FAULTY = 'Faulty'
+ ST_INCON = 'inconsistent'
+ _ST_ORD = [ST_STABLE, ST_INIT, ST_FAULTY, ST_INCON]
+
+ def __init__(self):
+ self.lock = Lock()
+ self.state = {}
+ self.status = {}
+
+ @staticmethod
+ def terminate():
+ # relax one SIGTERM by setting a handler that sets back
+ # standard handler
+ set_term_handler(lambda *a: set_term_handler())
+ # give a chance to graceful exit
+ errno_wrap(os.kill, [-os.getpid(), signal.SIGTERM], [ESRCH])
+
+ def monitor(self, w, argv, cpids, slave_vol, slave_host, master,
+ suuid, slavenodes):
+ """the monitor loop
+
+ Basic logic is a blantantly simple blunt heuristics:
+ if spawned client survives 60 secs, it's considered OK.
+ This servers us pretty well as it's not vulneralbe to
+ any kind of irregular behavior of the child...
+
+ ... well, except for one: if children is hung up on
+ waiting for some event, it can survive aeons, still
+ will be defunct. So we tweak the above logic to
+ expect the worker to send us a signal within 60 secs
+ (in the form of closing its end of a pipe). The worker
+ does this when it's done with the setup stage
+ ready to enter the service loop (note it's the setup
+ stage which is vulnerable to hangs -- the full
+ blown worker blows up on EPIPE if the net goes down,
+ due to the keep-alive thread)
+ """
+ if not self.status.get(w[0]['dir'], None):
+ self.status[w[0]['dir']] = GeorepStatus(gconf.get("state-file"),
+ w[0]['host'],
+ w[0]['dir'],
+ w[0]['uuid'],
+ master,
+ "%s::%s" % (slave_host,
+ slave_vol))
+ ret = 0
+
+ def nwait(p, o=0):
+ try:
+ p2, r = waitpid(p, o)
+ if not p2:
+ return
+ return r
+ except OSError as e:
+ # no child process, this happens if the child process
+ # already died and has been cleaned up
+ if e.errno == ECHILD:
+ return -1
+ else:
+ raise
+
+ def exit_signalled(s):
+ """ child terminated due to receipt of SIGUSR1 """
+ return (os.WIFSIGNALED(s) and (os.WTERMSIG(s) == signal.SIGUSR1))
+
+ def exit_status(s):
+ if os.WIFEXITED(s):
+ return os.WEXITSTATUS(s)
+ return 1
+
+ conn_timeout = gconf.get("connection-timeout")
+ while ret in (0, 1):
+ remote_user, remote_host = w[1][0].split("@")
+ remote_id = w[1][1]
+ # Check the status of the connected slave node
+ # If the connected slave node is down then try to connect to
+ # different up node.
+ current_slave_host = remote_host
+ slave_up_hosts = get_up_nodes(slavenodes, gconf.get("ssh-port"))
+
+ if (current_slave_host, remote_id) not in slave_up_hosts:
+ if len(slave_up_hosts) > 0:
+ remote_new = random.choice(slave_up_hosts)
+ remote_host = "%s@%s" % (remote_user, remote_new[0])
+ remote_id = remote_new[1]
+
+ # Spawn the worker in lock to avoid fd leak
+ self.lock.acquire()
+
+ self.status[w[0]['dir']].set_worker_status(self.ST_INIT)
+ logging.info(lf('starting gsyncd worker',
+ brick=w[0]['dir'],
+ slave_node=remote_host))
+
+ pr, pw = pipe()
+ cpid = os.fork()
+ if cpid == 0:
+ os.close(pr)
+
+ args_to_worker = argv + [
+ 'worker',
+ rconf.args.master,
+ rconf.args.slave,
+ '--feedback-fd', str(pw),
+ '--local-path', w[0]['dir'],
+ '--local-node', w[0]['host'],
+ '--local-node-id', w[0]['uuid'],
+ '--slave-id', suuid,
+ '--subvol-num', str(w[2]),
+ '--resource-remote', remote_host,
+ '--resource-remote-id', remote_id
+ ]
+
+ if rconf.args.config_file is not None:
+ args_to_worker += ['-c', rconf.args.config_file]
+
+ if w[3]:
+ args_to_worker.append("--is-hottier")
+
+ if rconf.args.debug:
+ args_to_worker.append("--debug")
+
+ access_mount = gconf.get("access-mount")
+ if access_mount:
+ os.execv(sys.executable, args_to_worker)
+ else:
+ if unshare_propagation_supported():
+ logging.debug("Worker would mount volume privately")
+ unshare_cmd = ['unshare', '-m', '--propagation',
+ 'private']
+ cmd = unshare_cmd + args_to_worker
+ os.execvp("unshare", cmd)
+ else:
+ logging.debug("Mount is not private. It would be lazy"
+ " umounted")
+ os.execv(sys.executable, args_to_worker)
+
+ cpids.add(cpid)
+ os.close(pw)
+
+ self.lock.release()
+
+ t0 = time.time()
+ so = select((pr,), (), (), conn_timeout)[0]
+ os.close(pr)
+
+ if so:
+ ret = nwait(cpid, os.WNOHANG)
+
+ if ret is not None:
+ logging.info(lf("worker died before establishing "
+ "connection",
+ brick=w[0]['dir']))
+ else:
+ logging.debug("worker(%s) connected" % w[0]['dir'])
+ while time.time() < t0 + conn_timeout:
+ ret = nwait(cpid, os.WNOHANG)
+
+ if ret is not None:
+ logging.info(lf("worker died in startup phase",
+ brick=w[0]['dir']))
+ break
+
+ time.sleep(1)
+ else:
+ logging.info(
+ lf("Worker not confirmed after wait, aborting it. "
+ "Gsyncd invocation on remote slave via SSH or "
+ "gluster master mount might have hung. Please "
+ "check the above logs for exact issue and check "
+ "master or slave volume for errors. Restarting "
+ "master/slave volume accordingly might help.",
+ brick=w[0]['dir'],
+ timeout=conn_timeout))
+ errno_wrap(os.kill, [cpid, signal.SIGKILL], [ESRCH])
+ ret = nwait(cpid)
+ if ret is None:
+ ret = nwait(cpid)
+ if exit_signalled(ret):
+ ret = 0
+ else:
+ ret = exit_status(ret)
+ if ret in (0, 1):
+ self.status[w[0]['dir']].set_worker_status(self.ST_FAULTY)
+ gf_event(EVENT_GEOREP_FAULTY,
+ master_volume=master.volume,
+ master_node=w[0]['host'],
+ master_node_id=w[0]['uuid'],
+ slave_host=slave_host,
+ slave_volume=slave_vol,
+ current_slave_host=current_slave_host,
+ brick_path=w[0]['dir'])
+ time.sleep(10)
+ self.status[w[0]['dir']].set_worker_status(self.ST_INCON)
+ return ret
+
+ def multiplex(self, wspx, suuid, slave_vol, slave_host, master, slavenodes):
+ argv = [os.path.basename(sys.executable), sys.argv[0]]
+
+ cpids = set()
+ ta = []
+ for wx in wspx:
+ def wmon(w):
+ cpid, _ = self.monitor(w, argv, cpids, slave_vol,
+ slave_host, master, suuid, slavenodes)
+ time.sleep(1)
+ self.lock.acquire()
+ for cpid in cpids:
+ errno_wrap(os.kill, [cpid, signal.SIGKILL], [ESRCH])
+ self.lock.release()
+ finalize(exval=1)
+ t = Thread(target=wmon, args=[wx])
+ t.start()
+ ta.append(t)
+
+ # monitor status was being updated in each monitor thread. It
+ # should not be done as it can cause deadlock for a worker start.
+ # set_monitor_status uses flock to synchronize multple instances
+ # updating the file. Since each monitor thread forks worker,
+ # these processes can hold the reference to fd of status
+ # file causing deadlock to workers which starts later as flock
+ # will not be release until all references to same fd is closed.
+ # It will also cause fd leaks.
+
+ self.lock.acquire()
+ set_monitor_status(gconf.get("state-file"), self.ST_STARTED)
+ self.lock.release()
+ for t in ta:
+ t.join()
+
+
+def distribute(master, slave):
+ if rconf.args.use_gconf_volinfo:
+ mvol = VolinfoFromGconf(master.volume, master=True)
+ else:
+ mvol = Volinfo(master.volume, master.host, master=True)
+ logging.debug('master bricks: ' + repr(mvol.bricks))
+ prelude = []
+ slave_host = None
+ slave_vol = None
+
+ prelude = [gconf.get("ssh-command")] + \
+ gconf.get("ssh-options").split() + \
+ ["-p", str(gconf.get("ssh-port"))] + \
+ [slave.remote_addr]
+
+ logging.debug('slave SSH gateway: ' + slave.remote_addr)
+
+ if rconf.args.use_gconf_volinfo:
+ svol = VolinfoFromGconf(slave.volume, master=False)
+ else:
+ svol = Volinfo(slave.volume, "localhost", prelude, master=False)
+
+ sbricks = svol.bricks
+ suuid = svol.uuid
+ slave_host = slave.remote_addr.split('@')[-1]
+ slave_vol = slave.volume
+
+ # save this xattr for the session delete command
+ old_stime_xattr_prefix = gconf.get("stime-xattr-prefix", None)
+ new_stime_xattr_prefix = "trusted.glusterfs." + mvol.uuid + "." + \
+ svol.uuid
+ if not old_stime_xattr_prefix or \
+ old_stime_xattr_prefix != new_stime_xattr_prefix:
+ gconf.setconfig("stime-xattr-prefix", new_stime_xattr_prefix)
+
+ logging.debug('slave bricks: ' + repr(sbricks))
+
+ slavenodes = set((b['host'], b["uuid"]) for b in sbricks)
+ rap = SSH.parse_ssh_address(slave)
+ slaves = [(rap['user'] + '@' + h[0], h[1]) for h in slavenodes]
+
+ workerspex = []
+ for idx, brick in enumerate(mvol.bricks):
+ if rconf.args.local_node_id == brick['uuid']:
+ is_hot = mvol.is_hot(":".join([brick['host'], brick['dir']]))
+ workerspex.append((brick,
+ slaves[idx % len(slaves)],
+ get_subvol_num(idx, mvol, is_hot),
+ is_hot))
+ logging.debug('worker specs: ' + repr(workerspex))
+ return workerspex, suuid, slave_vol, slave_host, master, slavenodes
+
+
+def monitor(local, remote):
+ # Check if gsyncd restarted in pause state. If
+ # yes, send SIGSTOP to negative of monitor pid
+ # to go back to pause state.
+ if rconf.args.pause_on_start:
+ errno_wrap(os.kill, [-os.getpid(), signal.SIGSTOP], [ESRCH])
+
+ """oh yeah, actually Monitor is used as singleton, too"""
+ return Monitor().multiplex(*distribute(local, remote))
+
+
+def startup(go_daemon=True):
+ """set up logging, pidfile grabbing, daemonization"""
+ pid_file = gconf.get("pid-file")
+ if not grabpidfile():
+ sys.stderr.write("pidfile is taken, exiting.\n")
+ sys.exit(2)
+ rconf.pid_file_owned = True
+
+ if not go_daemon:
+ return
+
+ x, y = pipe()
+ cpid = os.fork()
+ if cpid:
+ os.close(x)
+ sys.exit()
+ os.close(y)
+ os.setsid()
+ dn = os.open(os.devnull, os.O_RDWR)
+ for f in (sys.stdin, sys.stdout, sys.stderr):
+ os.dup2(dn, f.fileno())
+
+ if not grabpidfile(pid_file + '.tmp'):
+ raise GsyncdError("cannot grab temporary pidfile")
+
+ os.rename(pid_file + '.tmp', pid_file)
+
+ # wait for parent to terminate
+ # so we can start up with
+ # no messing from the dirty
+ # ol' bustard
+ select((x,), (), ())
+ os.close(x)
diff --git a/geo-replication/syncdaemon/py2py3.py b/geo-replication/syncdaemon/py2py3.py
new file mode 100644
index 00000000000..f9c76e1b50a
--- /dev/null
+++ b/geo-replication/syncdaemon/py2py3.py
@@ -0,0 +1,184 @@
+#
+# Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+# All python2/python3 compatibility routines
+
+import sys
+import os
+import stat
+import struct
+from syncdutils import umask
+from ctypes import create_string_buffer
+
+if sys.version_info >= (3,):
+ def pipe():
+ (r, w) = os.pipe()
+ os.set_inheritable(r, True)
+ os.set_inheritable(w, True)
+ return (r, w)
+
+ # Raw conversion of bytearray to string. Used in the cases where
+ # buffer is created by create_string_buffer which is a 8-bit char
+ # array and passed to syscalls to fetch results. Using encode/decode
+ # doesn't work as it converts to string altering the size.
+ def bytearray_to_str(byte_arr):
+ return ''.join([chr(b) for b in byte_arr])
+
+ # Raw conversion of string to bytes. This is required to convert
+ # back the string into bytearray(c char array) to use in struc
+ # pack/unpacking. Again encode/decode can't be used as it
+ # converts it alters size.
+ def str_to_bytearray(string):
+ return bytes([ord(c) for c in string])
+
+ def gr_create_string_buffer(size):
+ return create_string_buffer(b'\0', size)
+
+ def gr_query_xattr(cls, path, size, syscall, attr=None):
+ if attr:
+ return cls._query_xattr(path.encode(), size, syscall,
+ attr.encode())
+ else:
+ return cls._query_xattr(path.encode(), size, syscall)
+
+ def gr_lsetxattr(cls, path, attr, val):
+ return cls.libc.lsetxattr(path.encode(), attr.encode(), val,
+ len(val), 0)
+
+ def gr_lremovexattr(cls, path, attr):
+ return cls.libc.lremovexattr(path.encode(), attr.encode())
+
+ def gr_cl_register(libgfapi, brick, path, log_file, log_level, retries):
+ return libgfapi.gf_changelog_register(brick.encode(),
+ path.encode(),
+ log_file.encode(),
+ log_level, retries)
+
+ def gr_cl_done(libgfapi, clfile):
+ return libgfapi.gf_changelog_done(clfile.encode())
+
+ def gr_cl_history_changelog(libgfapi, changelog_path, start, end, num_parallel,
+ actual_end):
+ return libgfapi.gf_history_changelog(changelog_path.encode(),
+ start, end, num_parallel,
+ actual_end)
+
+ def gr_cl_history_done(libgfapi, clfile):
+ return libgfapi.gf_history_changelog_done(clfile.encode())
+
+ # regular file
+
+ def entry_pack_reg(cls, gf, bn, mo, uid, gid):
+ bn_encoded = bn.encode()
+ blen = len(bn_encoded)
+ return struct.pack(cls._fmt_mknod(blen),
+ uid, gid, gf.encode(), mo, bn_encoded,
+ stat.S_IMODE(mo), 0, umask())
+
+ def entry_pack_reg_stat(cls, gf, bn, st):
+ bn_encoded = bn.encode()
+ blen = len(bn_encoded)
+ mo = st['mode']
+ return struct.pack(cls._fmt_mknod(blen),
+ st['uid'], st['gid'],
+ gf.encode(), mo, bn_encoded,
+ stat.S_IMODE(mo), 0, umask())
+ # mkdir
+
+ def entry_pack_mkdir(cls, gf, bn, mo, uid, gid):
+ bn_encoded = bn.encode()
+ blen = len(bn_encoded)
+ return struct.pack(cls._fmt_mkdir(blen),
+ uid, gid, gf.encode(), mo, bn_encoded,
+ stat.S_IMODE(mo), umask())
+ # symlink
+
+ def entry_pack_symlink(cls, gf, bn, lnk, st):
+ bn_encoded = bn.encode()
+ blen = len(bn_encoded)
+ lnk_encoded = lnk.encode()
+ llen = len(lnk_encoded)
+ return struct.pack(cls._fmt_symlink(blen, llen),
+ st['uid'], st['gid'],
+ gf.encode(), st['mode'], bn_encoded,
+ lnk_encoded)
+else:
+ def pipe():
+ (r, w) = os.pipe()
+ return (r, w)
+
+ # Raw conversion of bytearray to string
+ def bytearray_to_str(byte_arr):
+ return byte_arr
+
+ # Raw conversion of string to bytearray
+ def str_to_bytearray(string):
+ return string
+
+ def gr_create_string_buffer(size):
+ return create_string_buffer('\0', size)
+
+ def gr_query_xattr(cls, path, size, syscall, attr=None):
+ if attr:
+ return cls._query_xattr(path, size, syscall, attr)
+ else:
+ return cls._query_xattr(path, size, syscall)
+
+ def gr_lsetxattr(cls, path, attr, val):
+ return cls.libc.lsetxattr(path, attr, val, len(val), 0)
+
+ def gr_lremovexattr(cls, path, attr):
+ return cls.libc.lremovexattr(path, attr)
+
+ def gr_cl_register(libgfapi, brick, path, log_file, log_level, retries):
+ return libgfapi.gf_changelog_register(brick, path, log_file,
+ log_level, retries)
+
+ def gr_cl_done(libgfapi, clfile):
+ return libgfapi.gf_changelog_done(clfile)
+
+ def gr_cl_history_changelog(libgfapi, changelog_path, start, end, num_parallel,
+ actual_end):
+ return libgfapi.gf_history_changelog(changelog_path, start, end,
+ num_parallel, actual_end)
+
+ def gr_cl_history_done(libgfapi, clfile):
+ return libgfapi.gf_history_changelog_done(clfile)
+
+ # regular file
+
+ def entry_pack_reg(cls, gf, bn, mo, uid, gid):
+ blen = len(bn)
+ return struct.pack(cls._fmt_mknod(blen),
+ uid, gid, gf, mo, bn,
+ stat.S_IMODE(mo), 0, umask())
+
+ def entry_pack_reg_stat(cls, gf, bn, st):
+ blen = len(bn)
+ mo = st['mode']
+ return struct.pack(cls._fmt_mknod(blen),
+ st['uid'], st['gid'],
+ gf, mo, bn,
+ stat.S_IMODE(mo), 0, umask())
+ # mkdir
+
+ def entry_pack_mkdir(cls, gf, bn, mo, uid, gid):
+ blen = len(bn)
+ return struct.pack(cls._fmt_mkdir(blen),
+ uid, gid, gf, mo, bn,
+ stat.S_IMODE(mo), umask())
+ # symlink
+
+ def entry_pack_symlink(cls, gf, bn, lnk, st):
+ blen = len(bn)
+ llen = len(lnk)
+ return struct.pack(cls._fmt_symlink(blen, llen),
+ st['uid'], st['gid'],
+ gf, st['mode'], bn, lnk)
diff --git a/geo-replication/syncdaemon/rconf.py b/geo-replication/syncdaemon/rconf.py
new file mode 100644
index 00000000000..ff716ee4d6d
--- /dev/null
+++ b/geo-replication/syncdaemon/rconf.py
@@ -0,0 +1,31 @@
+#
+# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+
+class RConf(object):
+
+ """singleton class to store runtime globals
+ shared between gsyncd modules"""
+
+ ssh_ctl_dir = None
+ ssh_ctl_args = None
+ cpid = None
+ pid_file_owned = False
+ log_exit = False
+ permanent_handles = []
+ log_metadata = {}
+ mgmt_lock_fd = None
+ args = None
+ turns = 0
+ mountbroker = False
+ mount_point = None
+ mbr_umount_cmd = []
+
+rconf = RConf()
diff --git a/xlators/features/marker/utils/syncdaemon/repce.py b/geo-replication/syncdaemon/repce.py
index 755fb61df48..c622afa6373 100644
--- a/xlators/features/marker/utils/syncdaemon/repce.py
+++ b/geo-replication/syncdaemon/repce.py
@@ -1,32 +1,40 @@
+#
+# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
import os
import sys
import time
import logging
from threading import Condition
try:
- import thread
-except ImportError:
- # py 3
import _thread as thread
-try:
- from Queue import Queue
except ImportError:
- # py 3
+ import thread
+try:
from queue import Queue
+except ImportError:
+ from Queue import Queue
try:
import cPickle as pickle
except ImportError:
- # py 3
import pickle
-from syncdutils import Thread, select
+from syncdutils import Thread, select, lf
-pickle_proto = -1
+pickle_proto = 2
repce_version = 1.0
+
def ioparse(i, o):
if isinstance(i, int):
- i = os.fdopen(i)
+ i = os.fdopen(i, 'rb')
# rely on duck typing for recognizing
# streams as that works uniformly
# in py2 and py3
@@ -34,6 +42,7 @@ def ioparse(i, o):
o = o.fileno()
return (i, o)
+
def send(out, *args):
"""pickle args and write out wholly in one syscall
@@ -43,12 +52,21 @@ def send(out, *args):
"""
os.write(out, pickle.dumps(args, pickle_proto))
+
def recv(inf):
- """load an object from input stream"""
- return pickle.load(inf)
+ """load an object from input stream
+ python2 and python3 compatibility, inf is sys.stdin
+ and is opened as text stream by default. Hence using the
+ buffer attribute in python3
+ """
+ if hasattr(inf, "buffer"):
+ return pickle.load(inf.buffer)
+ else:
+ return pickle.load(inf)
class RepceServer(object):
+
"""RePCe is Hungarian for canola, http://hu.wikipedia.org/wiki/Repce
... also our homebrewed RPC backend where the transport layer is
@@ -95,16 +113,17 @@ class RepceServer(object):
if rmeth == '__repce_version__':
res = repce_version
else:
- try:
- res = getattr(self.obj, rmeth)(*in_data[2:])
- except:
- res = sys.exc_info()[1]
- exc = True
- logging.exception("call failed: ")
+ try:
+ res = getattr(self.obj, rmeth)(*in_data[2:])
+ except:
+ res = sys.exc_info()[1]
+ exc = True
+ logging.exception("call failed: ")
send(self.out, rid, exc, res)
class RepceJob(object):
+
"""class representing message status we can use
for waiting on reply"""
@@ -137,6 +156,7 @@ class RepceJob(object):
class RepceClient(object):
+
"""RePCe is Hungarian for canola, http://hu.wikipedia.org/wiki/Repce
... also our homebrewed RPC backend where the transport layer is
@@ -148,7 +168,7 @@ class RepceClient(object):
def __init__(self, i, o):
self.inf, self.out = ioparse(i, o)
self.jtab = {}
- t = Thread(target = self.listen)
+ t = Thread(target=self.listen)
t.start()
def listen(self):
@@ -177,25 +197,33 @@ class RepceClient(object):
return rjob
def __call__(self, meth, *args):
- """RePCe client is callabe, calling it implements a synchronous remote call
+ """RePCe client is callabe, calling it implements a synchronous
+ remote call.
- We do a .push with a cbk which does a wakeup upon receiving anwser, then wait
- on the RepceJob.
+ We do a .push with a cbk which does a wakeup upon receiving answer,
+ then wait on the RepceJob.
"""
- rjob = self.push(meth, *args, **{'cbk': lambda rj, res: rj.wakeup(res)})
+ rjob = self.push(
+ meth, *args, **{'cbk': lambda rj, res: rj.wakeup(res)})
exc, res = rjob.wait()
if exc:
- logging.error('call %s (%s) failed on peer with %s' % (repr(rjob), meth, str(type(res).__name__)))
+ logging.error(lf('call failed',
+ call=repr(rjob),
+ method=meth,
+ error=str(type(res).__name__)))
raise res
logging.debug("call %s %s -> %s" % (repr(rjob), meth, repr(res)))
return res
class mprx(object):
- """method proxy, standard trick to implement rubyesque method_missing
- in Python
- A class is a closure factory, you know what I mean, or go read some SICP.
+ """method proxy, standard trick to implement rubyesque
+ method_missing in Python
+
+ A class is a closure factory, you know what I mean, or go read
+ some SICP.
"""
+
def __init__(self, ins, meth):
self.ins = ins
self.meth = meth
diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py
new file mode 100644
index 00000000000..f12c7ceaa36
--- /dev/null
+++ b/geo-replication/syncdaemon/resource.py
@@ -0,0 +1,1583 @@
+#
+# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+import re
+import os
+import sys
+import stat
+import time
+import fcntl
+import types
+import struct
+import logging
+import tempfile
+import subprocess
+from errno import (EEXIST, ENOENT, ENODATA, ENOTDIR, ELOOP, EACCES,
+ EISDIR, ENOTEMPTY, ESTALE, EINVAL, EBUSY, EPERM)
+import errno
+
+from rconf import rconf
+import gsyncdconfig as gconf
+import libgfchangelog
+
+import repce
+from repce import RepceServer, RepceClient
+from master import gmaster_builder
+import syncdutils
+from syncdutils import (GsyncdError, select, privileged, funcode,
+ entry2pb, gauxpfx, errno_wrap, lstat,
+ NoStimeAvailable, PartialHistoryAvailable,
+ ChangelogException, ChangelogHistoryNotAvailable,
+ get_changelog_log_level, get_rsync_version,
+ GX_GFID_CANONICAL_LEN,
+ gf_mount_ready, lf, Popen, sup,
+ Xattr, matching_disk_gfid, get_gfid_from_mnt,
+ unshare_propagation_supported, get_slv_dir_path)
+from gsyncdstatus import GeorepStatus
+from py2py3 import (pipe, str_to_bytearray, entry_pack_reg,
+ entry_pack_reg_stat, entry_pack_mkdir,
+ entry_pack_symlink)
+
+
+ENOTSUP = getattr(errno, 'ENOTSUP', 'EOPNOTSUPP')
+
+slv_volume = None
+slv_host = None
+
+
+class Server(object):
+
+ """singleton implemening those filesystem access primitives
+ which are needed for geo-replication functionality
+
+ (Singleton in the sense it's a class which has only static
+ and classmethods and is used directly, without instantiation.)
+ """
+
+ GX_NSPACE_PFX = (privileged() and "trusted" or "system")
+ GX_NSPACE = GX_NSPACE_PFX + ".glusterfs"
+ NTV_FMTSTR = "!" + "B" * 19 + "II"
+ FRGN_XTRA_FMT = "I"
+ FRGN_FMTSTR = NTV_FMTSTR + FRGN_XTRA_FMT
+
+ # for backend gfid fetch, do not use GX_NSPACE_PFX
+ GFID_XATTR = 'trusted.gfid'
+ GFID_FMTSTR = "!" + "B" * 16
+
+ local_path = ''
+
+ @classmethod
+ def _fmt_mknod(cls, l):
+ return "!II%dsI%dsIII" % (GX_GFID_CANONICAL_LEN, l + 1)
+
+ @classmethod
+ def _fmt_mkdir(cls, l):
+ return "!II%dsI%dsII" % (GX_GFID_CANONICAL_LEN, l + 1)
+
+ @classmethod
+ def _fmt_symlink(cls, l1, l2):
+ return "!II%dsI%ds%ds" % (GX_GFID_CANONICAL_LEN, l1 + 1, l2 + 1)
+
+ def _pathguard(f):
+ """decorator method that checks
+ the path argument of the decorated
+ functions to make sure it does not
+ point out of the managed tree
+ """
+
+ fc = funcode(f)
+ pi = list(fc.co_varnames).index('path')
+
+ def ff(*args):
+ path = args[pi]
+ ps = path.split('/')
+ if path[0] == '/' or '..' in ps:
+ raise ValueError('unsafe path')
+ args = list(args)
+ args[pi] = os.path.join(args[0].local_path, path)
+ return f(*args)
+ return ff
+
+ @classmethod
+ @_pathguard
+ def entries(cls, path):
+ """directory entries in an array"""
+ # prevent symlinks being followed
+ if not stat.S_ISDIR(os.lstat(path).st_mode):
+ raise OSError(ENOTDIR, os.strerror(ENOTDIR))
+ return os.listdir(path)
+
+ @classmethod
+ @_pathguard
+ def lstat(cls, path):
+ try:
+ return os.lstat(path)
+ except (IOError, OSError):
+ ex = sys.exc_info()[1]
+ if ex.errno == ENOENT:
+ return ex.errno
+ else:
+ raise
+
+ @classmethod
+ @_pathguard
+ def linkto_check(cls, path):
+ try:
+ return not (
+ Xattr.lgetxattr_buf(path,
+ 'trusted.glusterfs.dht.linkto') == '')
+ except (IOError, OSError):
+ ex = sys.exc_info()[1]
+ if ex.errno in (ENOENT, ENODATA):
+ return False
+ else:
+ raise
+
+ @classmethod
+ @_pathguard
+ def gfid(cls, path):
+ buf = errno_wrap(Xattr.lgetxattr, [path, cls.GFID_XATTR, 16],
+ [ENOENT], [ESTALE, ENODATA])
+ if buf == ENOENT:
+ return buf
+ else:
+ buf = str_to_bytearray(buf)
+ m = re.match('(.{8})(.{4})(.{4})(.{4})(.{12})', "".join(
+ ['%02x' % x for x in struct.unpack(cls.GFID_FMTSTR, buf)]))
+ return '-'.join(m.groups())
+
+ @classmethod
+ @_pathguard
+ def purge(cls, path, entries=None):
+ """force-delete subtrees
+
+ If @entries is not specified, delete
+ the whole subtree under @path (including
+ @path).
+
+ Otherwise, @entries should be a
+ a sequence of children of @path, and
+ the effect is identical with a joint
+ @entries-less purge on them, ie.
+
+ for e in entries:
+ cls.purge(os.path.join(path, e))
+ """
+ me_also = entries is None
+ if not entries:
+ try:
+ # if it's a symlink, prevent
+ # following it
+ try:
+ os.unlink(path)
+ return
+ except OSError:
+ ex = sys.exc_info()[1]
+ if ex.errno == EISDIR:
+ entries = os.listdir(path)
+ else:
+ raise
+ except OSError:
+ ex = sys.exc_info()[1]
+ if ex.errno in (ENOTDIR, ENOENT, ELOOP):
+ try:
+ os.unlink(path)
+ return
+ except OSError:
+ ex = sys.exc_info()[1]
+ if ex.errno == ENOENT:
+ return
+ raise
+ else:
+ raise
+ for e in entries:
+ cls.purge(os.path.join(path, e))
+ if me_also:
+ os.rmdir(path)
+
+ @classmethod
+ @_pathguard
+ def _create(cls, path, ctor):
+ """path creation backend routine"""
+ try:
+ ctor(path)
+ except OSError:
+ ex = sys.exc_info()[1]
+ if ex.errno == EEXIST:
+ cls.purge(path)
+ return ctor(path)
+ raise
+
+ @classmethod
+ @_pathguard
+ def mkdir(cls, path):
+ cls._create(path, os.mkdir)
+
+ @classmethod
+ @_pathguard
+ def symlink(cls, lnk, path):
+ cls._create(path, lambda p: os.symlink(lnk, p))
+
+ @classmethod
+ @_pathguard
+ def xtime(cls, path, uuid):
+ """query xtime extended attribute
+
+ Return xtime of @path for @uuid as a pair of integers.
+ "Normal" errors due to non-existent @path or extended attribute
+ are tolerated and errno is returned in such a case.
+ """
+
+ try:
+ val = Xattr.lgetxattr(path,
+ '.'.join([cls.GX_NSPACE, uuid, 'xtime']),
+ 8)
+ val = str_to_bytearray(val)
+ return struct.unpack('!II', val)
+ except OSError:
+ ex = sys.exc_info()[1]
+ if ex.errno in (ENOENT, ENODATA, ENOTDIR):
+ return ex.errno
+ else:
+ raise
+
+ @classmethod
+ @_pathguard
+ def stime_mnt(cls, path, uuid):
+ """query xtime extended attribute
+
+ Return xtime of @path for @uuid as a pair of integers.
+ "Normal" errors due to non-existent @path or extended attribute
+ are tolerated and errno is returned in such a case.
+ """
+
+ try:
+ val = Xattr.lgetxattr(path,
+ '.'.join([cls.GX_NSPACE, uuid, 'stime']),
+ 8)
+ val = str_to_bytearray(val)
+ return struct.unpack('!II', val)
+ except OSError:
+ ex = sys.exc_info()[1]
+ if ex.errno in (ENOENT, ENODATA, ENOTDIR):
+ return ex.errno
+ else:
+ raise
+
+ @classmethod
+ @_pathguard
+ def stime(cls, path, uuid):
+ """query xtime extended attribute
+
+ Return xtime of @path for @uuid as a pair of integers.
+ "Normal" errors due to non-existent @path or extended attribute
+ are tolerated and errno is returned in such a case.
+ """
+
+ try:
+ val = Xattr.lgetxattr(path,
+ '.'.join([cls.GX_NSPACE, uuid, 'stime']),
+ 8)
+ val = str_to_bytearray(val)
+ return struct.unpack('!II', val)
+ except OSError:
+ ex = sys.exc_info()[1]
+ if ex.errno in (ENOENT, ENODATA, ENOTDIR):
+ return ex.errno
+ else:
+ raise
+
+ @classmethod
+ @_pathguard
+ def entry_stime(cls, path, uuid):
+ """
+ entry_stime xattr to reduce the number of retry of Entry changes when
+ Geo-rep worker crashes and restarts. entry_stime is updated after
+ processing every changelog file. On failure and restart, worker only
+ have to reprocess the last changelog for Entry ops.
+ Xattr Key: <PFX>.<MASTERVOL_UUID>.<SLAVEVOL_UUID>.entry_stime
+ """
+ try:
+ val = Xattr.lgetxattr(path,
+ '.'.join([cls.GX_NSPACE, uuid,
+ 'entry_stime']),
+ 8)
+ val = str_to_bytearray(val)
+ return struct.unpack('!II', val)
+ except OSError:
+ ex = sys.exc_info()[1]
+ if ex.errno in (ENOENT, ENODATA, ENOTDIR):
+ return ex.errno
+ else:
+ raise
+
+ @classmethod
+ def node_uuid(cls, path='.'):
+ try:
+ uuid_l = Xattr.lgetxattr_buf(
+ path, '.'.join([cls.GX_NSPACE, 'node-uuid']))
+ return uuid_l[:-1].split(' ')
+ except OSError:
+ raise
+
+ @classmethod
+ @_pathguard
+ def set_stime(cls, path, uuid, mark):
+ """set @mark as stime for @uuid on @path"""
+ errno_wrap(Xattr.lsetxattr,
+ [path,
+ '.'.join([cls.GX_NSPACE, uuid, 'stime']),
+ struct.pack('!II', *mark)],
+ [ENOENT],
+ [ESTALE, EINVAL])
+
+ @classmethod
+ @_pathguard
+ def set_entry_stime(cls, path, uuid, mark):
+ """set @mark as stime for @uuid on @path"""
+ errno_wrap(Xattr.lsetxattr,
+ [path,
+ '.'.join([cls.GX_NSPACE, uuid, 'entry_stime']),
+ struct.pack('!II', *mark)],
+ [ENOENT],
+ [ESTALE, EINVAL])
+
+ @classmethod
+ @_pathguard
+ def set_xtime(cls, path, uuid, mark):
+ """set @mark as xtime for @uuid on @path"""
+ errno_wrap(Xattr.lsetxattr,
+ [path,
+ '.'.join([cls.GX_NSPACE, uuid, 'xtime']),
+ struct.pack('!II', *mark)],
+ [ENOENT],
+ [ESTALE, EINVAL])
+
+ @classmethod
+ @_pathguard
+ def set_xtime_remote(cls, path, uuid, mark):
+ """
+ set @mark as xtime for @uuid on @path
+ the difference b/w this and set_xtime() being
+ set_xtime() being overloaded to set the xtime
+ on the brick (this method sets xtime on the
+ remote slave)
+ """
+ Xattr.lsetxattr(
+ path, '.'.join([cls.GX_NSPACE, uuid, 'xtime']),
+ struct.pack('!II', *mark))
+
+ @classmethod
+ def entry_ops(cls, entries):
+ pfx = gauxpfx()
+ logging.debug('entries: %s' % repr(entries))
+ dist_count = rconf.args.master_dist_count
+
+ def entry_purge(op, entry, gfid, e, uid, gid):
+ # This is an extremely racy code and needs to be fixed ASAP.
+ # The GFID check here is to be sure that the pargfid/bname
+ # to be purged is the GFID gotten from the changelog.
+ # (a stat(changelog_gfid) would also be valid here)
+ # The race here is between the GFID check and the purge.
+
+ # If the entry or the gfid of the file to be deleted is not present
+ # on slave, we can ignore the unlink/rmdir
+ if isinstance(lstat(entry), int) or \
+ isinstance(lstat(os.path.join(pfx, gfid)), int):
+ return
+
+ if not matching_disk_gfid(gfid, entry):
+ collect_failure(e, EEXIST, uid, gid)
+ return
+
+ if op == 'UNLINK':
+ er = errno_wrap(os.unlink, [entry], [ENOENT, ESTALE], [EBUSY])
+ # EISDIR is safe error, ignore. This can only happen when
+ # unlink is sent from master while fixing gfid conflicts.
+ if er != EISDIR:
+ return er
+
+ elif op == 'RMDIR':
+ er = errno_wrap(os.rmdir, [entry], [ENOENT, ESTALE,
+ ENOTEMPTY], [EBUSY])
+ if er == ENOTEMPTY:
+ return er
+
+ def collect_failure(e, cmd_ret, uid, gid, dst=False):
+ slv_entry_info = {}
+ slv_entry_info['gfid_mismatch'] = False
+ slv_entry_info['name_mismatch'] = False
+ slv_entry_info['dst'] = dst
+ slv_entry_info['slave_isdir'] = False
+ slv_entry_info['slave_name'] = None
+ slv_entry_info['slave_gfid'] = None
+ # We do this for failing fops on Slave
+ # Master should be logging this
+ if cmd_ret is None:
+ return False
+
+ if e.get("stat", {}):
+ # Copy actual UID/GID value back to entry stat
+ e['stat']['uid'] = uid
+ e['stat']['gid'] = gid
+
+ if cmd_ret in [EEXIST, ESTALE]:
+ if dst:
+ en = e['entry1']
+ else:
+ en = e['entry']
+ disk_gfid = get_gfid_from_mnt(en)
+ if isinstance(disk_gfid, str) and \
+ e['gfid'] != disk_gfid:
+ slv_entry_info['gfid_mismatch'] = True
+ st = lstat(en)
+ if not isinstance(st, int):
+ if st and stat.S_ISDIR(st.st_mode):
+ slv_entry_info['slave_isdir'] = True
+ dir_name = get_slv_dir_path(slv_host, slv_volume,
+ disk_gfid)
+ slv_entry_info['slave_name'] = dir_name
+ else:
+ slv_entry_info['slave_isdir'] = False
+ slv_entry_info['slave_gfid'] = disk_gfid
+ failures.append((e, cmd_ret, slv_entry_info))
+ else:
+ return False
+ else:
+ failures.append((e, cmd_ret, slv_entry_info))
+
+ return True
+
+ failures = []
+
+ def recursive_rmdir(gfid, entry, path):
+ """disk_gfid check added for original path for which
+ recursive_delete is called. This disk gfid check executed
+ before every Unlink/Rmdir. If disk gfid is not matching
+ with GFID from Changelog, that means other worker
+ deleted the directory. Even if the subdir/file present,
+ it belongs to different parent. Exit without performing
+ further deletes.
+ """
+ if not matching_disk_gfid(gfid, entry):
+ return
+
+ names = []
+ names = errno_wrap(os.listdir, [path], [ENOENT], [ESTALE, ENOTSUP])
+ if isinstance(names, int):
+ return
+
+ for name in names:
+ fullname = os.path.join(path, name)
+ if not matching_disk_gfid(gfid, entry):
+ return
+ er = errno_wrap(os.remove, [fullname], [ENOENT, ESTALE,
+ EISDIR], [EBUSY])
+
+ if er == EISDIR:
+ recursive_rmdir(gfid, entry, fullname)
+
+ if not matching_disk_gfid(gfid, entry):
+ return
+
+ errno_wrap(os.rmdir, [path], [ENOENT, ESTALE], [EBUSY])
+
+ def rename_with_disk_gfid_confirmation(gfid, entry, en, uid, gid):
+ if not matching_disk_gfid(gfid, entry):
+ logging.error(lf("RENAME ignored: source entry does not match "
+ "with on-disk gfid",
+ source=entry,
+ gfid=gfid,
+ disk_gfid=get_gfid_from_mnt(entry),
+ target=en))
+ collect_failure(e, EEXIST, uid, gid)
+ return
+
+ cmd_ret = errno_wrap(os.rename,
+ [entry, en],
+ [ENOENT, EEXIST], [ESTALE, EBUSY])
+ collect_failure(e, cmd_ret, uid, gid)
+
+ for e in entries:
+ blob = None
+ op = e['op']
+ gfid = e['gfid']
+ entry = e['entry']
+ uid = 0
+ gid = 0
+
+ # Skip entry processing if it's marked true during gfid
+ # conflict resolution
+ if e['skip_entry']:
+ continue
+
+ if e.get("stat", {}):
+ # Copy UID/GID value and then reset to zero. Copied UID/GID
+ # will be used to run chown once entry is created.
+ uid = e['stat']['uid']
+ gid = e['stat']['gid']
+ e['stat']['uid'] = 0
+ e['stat']['gid'] = 0
+
+ (pg, bname) = entry2pb(entry)
+ if op in ['RMDIR', 'UNLINK']:
+ # Try once, if rmdir failed with ENOTEMPTY
+ # then delete recursively.
+ er = entry_purge(op, entry, gfid, e, uid, gid)
+ if isinstance(er, int):
+ if er == ENOTEMPTY and op == 'RMDIR':
+ # Retry if ENOTEMPTY, ESTALE
+ er1 = errno_wrap(recursive_rmdir,
+ [gfid, entry,
+ os.path.join(pg, bname)],
+ [], [ENOTEMPTY, ESTALE, ENODATA])
+ if not isinstance(er1, int):
+ logging.debug("Removed %s => %s/%s recursively" %
+ (gfid, pg, bname))
+ else:
+ logging.warn(lf("Recursive remove failed",
+ gfid=gfid,
+ pgfid=pg,
+ bname=bname,
+ error=os.strerror(er1)))
+ else:
+ logging.warn(lf("Failed to remove",
+ gfid=gfid,
+ pgfid=pg,
+ bname=bname,
+ error=os.strerror(er)))
+ elif op in ['CREATE', 'MKNOD']:
+ slink = os.path.join(pfx, gfid)
+ st = lstat(slink)
+ # don't create multiple entries with same gfid
+ if isinstance(st, int):
+ blob = entry_pack_reg(cls, gfid, bname,
+ e['mode'], e['uid'], e['gid'])
+ # Self healed hardlinks are recorded as MKNOD.
+ # So if the gfid already exists, it should be
+ # processed as hard link not mknod.
+ elif op in ['MKNOD']:
+ cmd_ret = errno_wrap(os.link,
+ [slink, entry],
+ [ENOENT, EEXIST], [ESTALE])
+ collect_failure(e, cmd_ret, uid, gid)
+ elif op == 'MKDIR':
+ en = e['entry']
+ slink = os.path.join(pfx, gfid)
+ st = lstat(slink)
+ # don't create multiple entries with same gfid
+ if isinstance(st, int):
+ blob = entry_pack_mkdir(cls, gfid, bname,
+ e['mode'], e['uid'], e['gid'])
+ elif (isinstance(lstat(en), int) or
+ not matching_disk_gfid(gfid, en)):
+ # If gfid of a directory exists on slave but path based
+ # create is getting EEXIST. This means the directory is
+ # renamed in master but recorded as MKDIR during hybrid
+ # crawl. Get the directory path by reading the backend
+ # symlink and trying to rename to new name as said by
+ # master.
+ logging.info(lf("Special case: rename on mkdir",
+ gfid=gfid, entry=repr(entry)))
+ src_entry = get_slv_dir_path(slv_host, slv_volume, gfid)
+ if src_entry is None:
+ collect_failure(e, ENOENT, uid, gid)
+ if src_entry is not None and src_entry != entry:
+ slv_entry_info = {}
+ slv_entry_info['gfid_mismatch'] = False
+ slv_entry_info['name_mismatch'] = True
+ slv_entry_info['dst'] = False
+ slv_entry_info['slave_isdir'] = True
+ slv_entry_info['slave_gfid'] = gfid
+ slv_entry_info['slave_entry'] = src_entry
+
+ failures.append((e, EEXIST, slv_entry_info))
+ elif op == 'LINK':
+ slink = os.path.join(pfx, gfid)
+ st = lstat(slink)
+ if isinstance(st, int):
+ (pg, bname) = entry2pb(entry)
+ if stat.S_ISREG(e['stat']['mode']):
+ blob = entry_pack_reg_stat(cls, gfid, bname, e['stat'])
+ elif stat.S_ISLNK(e['stat']['mode']):
+ blob = entry_pack_symlink(cls, gfid, bname, e['link'],
+ e['stat'])
+ else:
+ cmd_ret = errno_wrap(os.link,
+ [slink, entry],
+ [ENOENT, EEXIST], [ESTALE])
+ collect_failure(e, cmd_ret, uid, gid)
+ elif op == 'SYMLINK':
+ en = e['entry']
+ st = lstat(entry)
+ if isinstance(st, int):
+ blob = entry_pack_symlink(cls, gfid, bname, e['link'],
+ e['stat'])
+ elif not matching_disk_gfid(gfid, en):
+ collect_failure(e, EEXIST, uid, gid)
+ elif op == 'RENAME':
+ en = e['entry1']
+ # The matching disk gfid check validates two things
+ # 1. Validates name is present, return false otherwise
+ # 2. Validates gfid is same, returns false otherwise
+ # So both validations are necessary to decide src doesn't
+ # exist. We can't rely on only gfid stat as hardlink could
+ # be present and we can't rely only on name as name could
+ # exist with different gfid.
+ if not matching_disk_gfid(gfid, entry):
+ if e['stat'] and not stat.S_ISDIR(e['stat']['mode']):
+ if stat.S_ISLNK(e['stat']['mode']):
+ # src is not present, so don't sync symlink as
+ # we don't know target. It's ok to ignore. If
+ # it's unliked, it's fine. If it's renamed to
+ # something else, it will be synced then.
+ if e['link'] is not None:
+ st1 = lstat(en)
+ if isinstance(st1, int):
+ (pg, bname) = entry2pb(en)
+ blob = entry_pack_symlink(cls, gfid, bname,
+ e['link'],
+ e['stat'])
+ elif not matching_disk_gfid(gfid, en):
+ collect_failure(e, EEXIST, uid, gid, True)
+ else:
+ slink = os.path.join(pfx, gfid)
+ st = lstat(slink)
+ # don't create multiple entries with same gfid
+ if isinstance(st, int):
+ (pg, bname) = entry2pb(en)
+ blob = entry_pack_reg_stat(cls, gfid, bname,
+ e['stat'])
+ else:
+ cmd_ret = errno_wrap(os.link, [slink, en],
+ [ENOENT, EEXIST], [ESTALE])
+ collect_failure(e, cmd_ret, uid, gid)
+ else:
+ st = lstat(entry)
+ st1 = lstat(en)
+ if isinstance(st1, int):
+ rename_with_disk_gfid_confirmation(gfid, entry, en,
+ uid, gid)
+ else:
+ if st.st_ino == st1.st_ino:
+ # we have a hard link, we can now unlink source
+ try:
+ errno_wrap(os.unlink, [entry],
+ [ENOENT, ESTALE], [EBUSY])
+ except OSError as e:
+ if e.errno == EISDIR:
+ try:
+ errno_wrap(os.rmdir, [entry],
+ [ENOENT, ESTALE], [EBUSY])
+ except OSError as e:
+ if e.errno == ENOTEMPTY:
+ logging.error(
+ lf("Directory Rename failed. "
+ "Both Old and New"
+ " directories exists",
+ old=entry,
+ new=en))
+ else:
+ raise
+ else:
+ raise
+ elif not matching_disk_gfid(gfid, en) and dist_count > 1:
+ collect_failure(e, EEXIST, uid, gid, True)
+ else:
+ # We are here which means matching_disk_gfid for
+ # both source and destination has returned false
+ # and distribution count for master vol is greater
+ # then one. Which basically says both the source and
+ # destination exist and not hardlinks.
+ # So we are safe to go ahead with rename here.
+ rename_with_disk_gfid_confirmation(gfid, entry, en,
+ uid, gid)
+ if blob:
+ cmd_ret = errno_wrap(Xattr.lsetxattr,
+ [pg, 'glusterfs.gfid.newfile', blob],
+ [EEXIST, ENOENT, ESTALE],
+ [ESTALE, EINVAL, EBUSY])
+ collect_failure(e, cmd_ret, uid, gid)
+
+ # If UID/GID is different than zero that means we are trying
+ # create Entry with different UID/GID. Create Entry with
+ # UID:0 and GID:0, and then call chown to set UID/GID
+ if uid != 0 or gid != 0:
+ path = os.path.join(pfx, gfid)
+ cmd_ret = errno_wrap(os.lchown, [path, uid, gid], [ENOENT],
+ [ESTALE, EINVAL])
+ collect_failure(e, cmd_ret, uid, gid)
+
+ return failures
+
+ @classmethod
+ def meta_ops(cls, meta_entries):
+ logging.debug('Meta-entries: %s' % repr(meta_entries))
+ failures = []
+ for e in meta_entries:
+ mode = e['stat']['mode']
+ uid = e['stat']['uid']
+ gid = e['stat']['gid']
+ atime = e['stat']['atime']
+ mtime = e['stat']['mtime']
+ go = e['go']
+ # Linux doesn't support chmod on symlink itself.
+ # It is always applied to the target file. So
+ # changelog would record target file's gfid
+ # and we are good. But 'chown' is supported on
+ # symlink file. So changelog would record symlink
+ # gfid in such cases. Since we do 'chown' 'chmod'
+ # 'utime' for each gfid recorded for metadata, and
+ # we know from changelog the metadata is on symlink's
+ # gfid or target file's gfid, we should be doing
+ # 'lchown' 'lchmod' 'utime with no-deference' blindly.
+ # But since 'lchmod' and 'utime with no de-reference' is
+ # not supported in python3, we have to rely on 'chmod'
+ # and 'utime with de-reference'. Hence avoiding 'chmod'
+ # and 'utime' if it's symlink file.
+
+ is_symlink = False
+ cmd_ret = errno_wrap(os.lchown, [go, uid, gid], [ENOENT],
+ [ESTALE, EINVAL])
+ if isinstance(cmd_ret, int):
+ continue
+
+ is_symlink = os.path.islink(go)
+
+ if not is_symlink:
+ cmd_ret = errno_wrap(os.chmod, [go, mode],
+ [ENOENT, EACCES, EPERM], [ESTALE, EINVAL])
+ if isinstance(cmd_ret, int):
+ failures.append((e, cmd_ret, "chmod"))
+
+ cmd_ret = errno_wrap(os.utime, [go, (atime, mtime)],
+ [ENOENT, EACCES, EPERM], [ESTALE, EINVAL])
+ if isinstance(cmd_ret, int):
+ failures.append((e, cmd_ret, "utime"))
+ return failures
+
+ @classmethod
+ @_pathguard
+ def setattr(cls, path, adct):
+ """set file attributes
+
+ @adct is a dict, where 'own', 'mode' and 'times'
+ keys are looked for and values used to perform
+ chown, chmod or utimes on @path.
+ """
+ own = adct.get('own')
+ if own:
+ os.lchown(path, *own)
+ mode = adct.get('mode')
+ if mode:
+ os.chmod(path, stat.S_IMODE(mode))
+ times = adct.get('times')
+ if times:
+ os.utime(path, times)
+
+ @staticmethod
+ def pid():
+ return os.getpid()
+
+ last_keep_alive = 0
+
+ @classmethod
+ def keep_alive(cls, dct):
+ """process keepalive messages.
+
+ Return keep-alive counter (number of received keep-alive
+ messages).
+
+ Now the "keep-alive" message can also have a payload which is
+ used to set a foreign volume-mark on the underlying file system.
+ """
+ if dct:
+ key = '.'.join([cls.GX_NSPACE, 'volume-mark', dct['uuid']])
+ val = struct.pack(cls.FRGN_FMTSTR,
+ *(dct['version'] +
+ tuple(int(x, 16)
+ for x in re.findall('(?:[\da-f]){2}',
+ dct['uuid'])) +
+ (dct['retval'],) + dct['volume_mark'][0:2] + (
+ dct['timeout'],)))
+ Xattr.lsetxattr('.', key, val)
+ cls.last_keep_alive += 1
+ return cls.last_keep_alive
+
+ @staticmethod
+ def version():
+ """version used in handshake"""
+ return 1.0
+
+
+class Mounter(object):
+
+ """Abstract base class for mounter backends"""
+
+ def __init__(self, params):
+ self.params = params
+ self.mntpt = None
+ self.umount_cmd = []
+
+ @classmethod
+ def get_glusterprog(cls):
+ gluster_cmd_dir = gconf.get("gluster-command-dir")
+ if rconf.args.subcmd == "slave":
+ gluster_cmd_dir = gconf.get("slave-gluster-command-dir")
+ return os.path.join(gluster_cmd_dir, cls.glusterprog)
+
+ def umount_l(self, d):
+ """perform lazy umount"""
+ po = Popen(self.make_umount_argv(d), stderr=subprocess.PIPE,
+ universal_newlines=True)
+ po.wait()
+ return po
+
+ @classmethod
+ def make_umount_argv(cls, d):
+ raise NotImplementedError
+
+ def make_mount_argv(self, label=None):
+ raise NotImplementedError
+
+ def cleanup_mntpt(self, *a):
+ pass
+
+ def handle_mounter(self, po):
+ po.wait()
+
+ def inhibit(self, label):
+ """inhibit a gluster filesystem
+
+ Mount glusterfs over a temporary mountpoint,
+ change into the mount, and lazy unmount the
+ filesystem.
+ """
+ mpi, mpo = pipe()
+ mh = Popen.fork()
+ if mh:
+ # Parent
+ os.close(mpi)
+ fcntl.fcntl(mpo, fcntl.F_SETFD, fcntl.FD_CLOEXEC)
+ d = None
+ margv = self.make_mount_argv(label)
+ if self.mntpt:
+ # mntpt is determined pre-mount
+ d = self.mntpt
+ mnt_msg = d + '\0'
+ encoded_msg = mnt_msg.encode()
+ os.write(mpo, encoded_msg)
+ po = Popen(margv, **self.mountkw)
+ self.handle_mounter(po)
+ po.terminate_geterr()
+ logging.debug('auxiliary glusterfs mount in place')
+ if not d:
+ # mntpt is determined during mount
+ d = self.mntpt
+ mnt_msg = d + '\0'
+ encoded_msg = mnt_msg.encode()
+ os.write(mpo, encoded_msg)
+ encoded_msg = 'M'.encode()
+ os.write(mpo, encoded_msg)
+ t = syncdutils.Thread(target=lambda: os.chdir(d))
+ t.start()
+ tlim = rconf.starttime + gconf.get("connection-timeout")
+ while True:
+ if not t.isAlive():
+ break
+
+ if time.time() >= tlim:
+ syncdutils.finalize(exval=1)
+ time.sleep(1)
+ os.close(mpo)
+ _, rv = syncdutils.waitpid(mh, 0)
+ if rv:
+ rv = (os.WIFEXITED(rv) and os.WEXITSTATUS(rv) or 0) - \
+ (os.WIFSIGNALED(rv) and os.WTERMSIG(rv) or 0)
+ logging.warn(lf('stale mount possibly left behind',
+ path=d))
+ raise GsyncdError("cleaning up temp mountpoint %s "
+ "failed with status %d" %
+ (d, rv))
+ else:
+ rv = 0
+ try:
+ os.setsid()
+ os.close(mpo)
+ mntdata = ''
+ while True:
+ c = os.read(mpi, 1)
+ c = c.decode()
+ if not c:
+ break
+ mntdata += c
+ if mntdata:
+ mounted = False
+ if mntdata[-1] == 'M':
+ mntdata = mntdata[:-1]
+ assert(mntdata)
+ mounted = True
+ assert(mntdata[-1] == '\0')
+ mntpt = mntdata[:-1]
+ assert(mntpt)
+
+ umount_master = False
+ umount_slave = False
+ if rconf.args.subcmd == "worker" \
+ and not unshare_propagation_supported() \
+ and not gconf.get("access-mount"):
+ umount_master = True
+ if rconf.args.subcmd == "slave" \
+ and not gconf.get("slave-access-mount"):
+ umount_slave = True
+
+ if mounted and (umount_master or umount_slave):
+ po = self.umount_l(mntpt)
+ po.terminate_geterr(fail_on_err=False)
+ if po.returncode != 0:
+ po.errlog()
+ rv = po.returncode
+ logging.debug("Lazy umount done: %s" % mntpt)
+ if umount_master or umount_slave:
+ self.cleanup_mntpt(mntpt)
+ except:
+ logging.exception('mount cleanup failure:')
+ rv = 200
+ os._exit(rv)
+
+ #Polling the dht.subvol.status value.
+ RETRIES = 10
+ while not gf_mount_ready():
+ if RETRIES < 0:
+ logging.error('Subvols are not up')
+ break
+ RETRIES -= 1
+ time.sleep(0.2)
+
+ logging.debug('auxiliary glusterfs mount prepared')
+
+
+class DirectMounter(Mounter):
+
+ """mounter backend which calls mount(8), umount(8) directly"""
+
+ mountkw = {'stderr': subprocess.PIPE, 'universal_newlines': True}
+ glusterprog = 'glusterfs'
+
+ @staticmethod
+ def make_umount_argv(d):
+ return ['umount', '-l', d]
+
+ def make_mount_argv(self, label=None):
+ self.mntpt = tempfile.mkdtemp(prefix='gsyncd-aux-mount-')
+ rconf.mount_point = self.mntpt
+ return [self.get_glusterprog()] + \
+ ['--' + p for p in self.params] + [self.mntpt]
+
+ def cleanup_mntpt(self, mntpt=None):
+ if not mntpt:
+ mntpt = self.mntpt
+ errno_wrap(os.rmdir, [mntpt], [ENOENT, EBUSY])
+
+
+class MountbrokerMounter(Mounter):
+
+ """mounter backend using the mountbroker gluster service"""
+
+ mountkw = {'stderr': subprocess.PIPE, 'stdout': subprocess.PIPE,
+ 'universal_newlines': True}
+ glusterprog = 'gluster'
+
+ @classmethod
+ def make_cli_argv(cls):
+ return [cls.get_glusterprog()] + ['--remote-host=localhost'] + \
+ gconf.get("gluster-cli-options").split() + ['system::']
+
+ @classmethod
+ def make_umount_argv(cls, d):
+ return cls.make_cli_argv() + ['umount', d, 'lazy']
+
+ def make_mount_argv(self, label):
+ return self.make_cli_argv() + \
+ ['mount', label, 'user-map-root=' +
+ syncdutils.getusername()] + self.params
+
+ def handle_mounter(self, po):
+ self.mntpt = po.stdout.readline()[:-1]
+ rconf.mount_point = self.mntpt
+ rconf.mountbroker = True
+ self.umount_cmd = self.make_cli_argv() + ['umount']
+ rconf.mbr_umount_cmd = self.umount_cmd
+ po.stdout.close()
+ sup(self, po)
+ if po.returncode != 0:
+ # if cli terminated with error due to being
+ # refused by glusterd, what it put
+ # out on stdout is a diagnostic message
+ logging.error(lf('glusterd answered', mnt=self.mntpt))
+
+
+class GLUSTERServer(Server):
+
+ "server enhancements for a glusterfs backend"""
+
+ @classmethod
+ def _attr_unpack_dict(cls, xattr, extra_fields=''):
+ """generic volume mark fetching/parsing backed"""
+ fmt_string = cls.NTV_FMTSTR + extra_fields
+ buf = Xattr.lgetxattr('.', xattr, struct.calcsize(fmt_string))
+ buf = str_to_bytearray(buf)
+ vm = struct.unpack(fmt_string, buf)
+ m = re.match(
+ '(.{8})(.{4})(.{4})(.{4})(.{12})',
+ "".join(['%02x' % x for x in vm[2:18]]))
+ uuid = '-'.join(m.groups())
+ volinfo = {'version': vm[0:2],
+ 'uuid': uuid,
+ 'retval': vm[18],
+ 'volume_mark': vm[19:21],
+ }
+ if extra_fields:
+ return volinfo, vm[-len(extra_fields):]
+ else:
+ return volinfo
+
+ @classmethod
+ def foreign_volume_infos(cls):
+ """return list of valid (not expired) foreign volume marks"""
+ dict_list = []
+ xattr_list = Xattr.llistxattr_buf('.')
+ for ele in xattr_list:
+ if ele.find('.'.join([cls.GX_NSPACE, 'volume-mark', ''])) == 0:
+ d, x = cls._attr_unpack_dict(ele, cls.FRGN_XTRA_FMT)
+ now = int(time.time())
+ if x[0] > now:
+ logging.debug("volinfo[%s] expires: %d "
+ "(%d sec later)" %
+ (d['uuid'], x[0], x[0] - now))
+ d['timeout'] = x[0]
+ dict_list.append(d)
+ else:
+ try:
+ Xattr.lremovexattr('.', ele)
+ except OSError:
+ pass
+ return dict_list
+
+ @classmethod
+ def native_volume_info(cls):
+ """get the native volume mark of the underlying gluster volume"""
+ try:
+ return cls._attr_unpack_dict('.'.join([cls.GX_NSPACE,
+ 'volume-mark']))
+ except OSError:
+ ex = sys.exc_info()[1]
+ if ex.errno != ENODATA:
+ raise
+
+
+class GLUSTER(object):
+
+ """scheme class for gluster:// urls
+
+ can be used to represent a gluster slave server
+ on slave side, or interface to a remote gluster
+ slave on master side, or to represent master
+ (slave-ish features come from the mixins, master
+ functionality is outsourced to GMaster from master)
+ """
+ server = GLUSTERServer
+
+ def __init__(self, host, volume):
+ self.path = "%s:%s" % (host, volume)
+ self.host = host
+ self.volume = volume
+
+ global slv_volume
+ global slv_host
+ slv_volume = self.volume
+ slv_host = self.host
+
+ def connect(self):
+ """inhibit the resource beyond
+
+ Choose mounting backend (direct or mountbroker),
+ set up glusterfs parameters and perform the mount
+ with given backend
+ """
+
+ logging.info("Mounting gluster volume locally...")
+ t0 = time.time()
+ label = gconf.get('mountbroker', None)
+ if not label and not privileged():
+ label = syncdutils.getusername()
+ mounter = label and MountbrokerMounter or DirectMounter
+
+ log_file = gconf.get("gluster-log-file")
+ if rconf.args.subcmd == "slave":
+ log_file = gconf.get("slave-gluster-log-file")
+
+ log_level = gconf.get("gluster-log-level")
+ if rconf.args.subcmd == "slave":
+ log_level = gconf.get("slave-gluster-log-level")
+
+ params = gconf.get("gluster-params").split() + \
+ ['log-level=' + log_level] + \
+ ['log-file=' + log_file, 'volfile-server=' + self.host] + \
+ ['volfile-id=' + self.volume, 'client-pid=-1']
+
+ self.mounter = mounter(params)
+ self.mounter.inhibit(label)
+ logging.info(lf("Mounted gluster volume",
+ duration="%.4f" % (time.time() - t0)))
+
+ def gmaster_instantiate_tuple(self, slave):
+ """return a tuple of the 'one shot' and the 'main crawl'
+ class instance"""
+ return (gmaster_builder('xsync')(self, slave),
+ gmaster_builder()(self, slave),
+ gmaster_builder('changeloghistory')(self, slave))
+
+ def service_loop(self, slave=None):
+ """enter service loop
+
+ - if slave given, instantiate GMaster and
+ pass control to that instance, which implements
+ master behavior
+ - else do that's what's inherited
+ """
+ if rconf.args.subcmd == "slave":
+ if gconf.get("use-rsync-xattrs") and not privileged():
+ raise GsyncdError(
+ "using rsync for extended attributes is not supported")
+
+ repce = RepceServer(
+ self.server, sys.stdin, sys.stdout, gconf.get("sync-jobs"))
+ t = syncdutils.Thread(target=lambda: (repce.service_loop(),
+ syncdutils.finalize()))
+ t.start()
+ logging.info("slave listening")
+ if gconf.get("slave-timeout") and gconf.get("slave-timeout") > 0:
+ while True:
+ lp = self.server.last_keep_alive
+ time.sleep(gconf.get("slave-timeout"))
+ if lp == self.server.last_keep_alive:
+ logging.info(
+ lf("connection inactive, stopping",
+ timeout=gconf.get("slave-timeout")))
+ break
+ else:
+ select((), (), ())
+
+ return
+
+ class brickserver(Server):
+ local_path = rconf.args.local_path
+ aggregated = self.server
+
+ @classmethod
+ def entries(cls, path):
+ e = super(brickserver, cls).entries(path)
+ # on the brick don't mess with /.glusterfs
+ if path == '.':
+ try:
+ e.remove('.glusterfs')
+ e.remove('.trashcan')
+ except ValueError:
+ pass
+ return e
+
+ @classmethod
+ def lstat(cls, e):
+ """ path based backend stat """
+ return super(brickserver, cls).lstat(e)
+
+ @classmethod
+ def gfid(cls, e):
+ """ path based backend gfid fetch """
+ return super(brickserver, cls).gfid(e)
+
+ @classmethod
+ def linkto_check(cls, e):
+ return super(brickserver, cls).linkto_check(e)
+
+ # define {,set_}xtime in slave, thus preempting
+ # the call to remote, so that it takes data from
+ # the local brick
+ slave.server.xtime = types.MethodType(
+ lambda _self, path, uuid: (
+ brickserver.xtime(path,
+ uuid + '.' + rconf.args.slave_id)
+ ),
+ slave.server)
+ slave.server.stime = types.MethodType(
+ lambda _self, path, uuid: (
+ brickserver.stime(path,
+ uuid + '.' + rconf.args.slave_id)
+ ),
+ slave.server)
+ slave.server.entry_stime = types.MethodType(
+ lambda _self, path, uuid: (
+ brickserver.entry_stime(
+ path,
+ uuid + '.' + rconf.args.slave_id)
+ ),
+ slave.server)
+ slave.server.set_stime = types.MethodType(
+ lambda _self, path, uuid, mark: (
+ brickserver.set_stime(path,
+ uuid + '.' + rconf.args.slave_id,
+ mark)
+ ),
+ slave.server)
+ slave.server.set_entry_stime = types.MethodType(
+ lambda _self, path, uuid, mark: (
+ brickserver.set_entry_stime(
+ path,
+ uuid + '.' + rconf.args.slave_id,
+ mark)
+ ),
+ slave.server)
+
+ (g1, g2, g3) = self.gmaster_instantiate_tuple(slave)
+ g1.master.server = brickserver
+ g2.master.server = brickserver
+ g3.master.server = brickserver
+
+ # bad bad bad: bad way to do things like this
+ # need to make this elegant
+ # register the crawlers and start crawling
+ # g1 ==> Xsync, g2 ==> config.change_detector(changelog by default)
+ # g3 ==> changelog History
+ status = GeorepStatus(gconf.get("state-file"),
+ rconf.args.local_node,
+ rconf.args.local_path,
+ rconf.args.local_node_id,
+ rconf.args.master,
+ rconf.args.slave)
+ status.reset_on_worker_start()
+
+ try:
+ workdir = g2.setup_working_dir()
+ # Register only when change_detector is not set to
+ # xsync, else agent will generate changelog files
+ # in .processing directory of working dir
+ if gconf.get("change-detector") != 'xsync':
+ # register with the changelog library
+ # 9 == log level (DEBUG)
+ # 5 == connection retries
+ libgfchangelog.register(rconf.args.local_path,
+ workdir,
+ gconf.get("changelog-log-file"),
+ get_changelog_log_level(
+ gconf.get("changelog-log-level")),
+ g2.CHANGELOG_CONN_RETRIES)
+
+ register_time = int(time.time())
+ g2.register(register_time, status)
+ g3.register(register_time, status)
+ except ChangelogException as e:
+ logging.error(lf("Changelog register failed", error=e))
+ sys.exit(1)
+
+ g1.register(status=status)
+ logging.info(lf("Register time",
+ time=register_time))
+ # oneshot: Try to use changelog history api, if not
+ # available switch to FS crawl
+ # Note: if config.change_detector is xsync then
+ # it will not use changelog history api
+ try:
+ g3.crawlwrap(oneshot=True)
+ except PartialHistoryAvailable as e:
+ logging.info(lf('Partial history available, using xsync crawl'
+ ' after consuming history',
+ till=e))
+ g1.crawlwrap(oneshot=True, register_time=register_time)
+ except ChangelogHistoryNotAvailable:
+ logging.info('Changelog history not available, using xsync')
+ g1.crawlwrap(oneshot=True, register_time=register_time)
+ except NoStimeAvailable:
+ logging.info('No stime available, using xsync crawl')
+ g1.crawlwrap(oneshot=True, register_time=register_time)
+ except ChangelogException as e:
+ logging.error(lf("Changelog History Crawl failed",
+ error=e))
+ sys.exit(1)
+
+ try:
+ g2.crawlwrap()
+ except ChangelogException as e:
+ logging.error(lf("Changelog crawl failed", error=e))
+ sys.exit(1)
+
+
+class SSH(object):
+
+ """scheme class for ssh:// urls
+
+ interface to remote slave on master side
+ implementing an ssh based proxy
+ """
+
+ def __init__(self, host, volume):
+ self.remote_addr = host
+ self.volume = volume
+
+ @staticmethod
+ def parse_ssh_address(self):
+ m = re.match('([^@]+)@(.+)', self.remote_addr)
+ if m:
+ u, h = m.groups()
+ else:
+ u, h = syncdutils.getusername(), self.remote_addr
+ self.remotehost = h
+ return {'user': u, 'host': h}
+
+ def start_fd_client(self, i, o):
+ """set up RePCe client, handshake with server
+
+ It's cut out as a separate method to let
+ subclasses hook into client startup
+ """
+ self.server = RepceClient(i, o)
+ rv = self.server.__version__()
+ exrv = {'proto': repce.repce_version, 'object': Server.version()}
+ da0 = (rv, exrv)
+ da1 = ({}, {})
+ for i in range(2):
+ for k, v in da0[i].items():
+ da1[i][k] = int(v)
+ if da1[0] != da1[1]:
+ raise GsyncdError(
+ "RePCe major version mismatch: local %s, remote %s" %
+ (exrv, rv))
+ slavepath = "/proc/%d/cwd" % self.server.pid()
+ self.slaveurl = ':'.join([self.remote_addr, slavepath])
+
+ def connect_remote(self):
+ """connect to inner slave url through outer ssh url
+
+ Wrap the connecting utility in ssh.
+
+ Much care is put into daemonizing: in that case
+ ssh is started before daemonization, but
+ RePCe client is to be created after that (as ssh
+ interactive password auth would be defeated by
+ a daemonized ssh, while client should be present
+ only in the final process). In that case the action
+ is taken apart to two parts, this method is ivoked
+ once pre-daemon, once post-daemon. Use @go_daemon
+ to deiced what part to perform.
+
+ [NB. ATM gluster product does not makes use of interactive
+ authentication.]
+ """
+ syncdutils.setup_ssh_ctl(tempfile.mkdtemp(prefix='gsyncd-aux-ssh-'),
+ self.remote_addr,
+ self.volume)
+
+ logging.info("Initializing SSH connection between master and slave...")
+ t0 = time.time()
+
+ extra_opts = []
+ remote_gsyncd = gconf.get("remote-gsyncd")
+ if remote_gsyncd == "":
+ remote_gsyncd = "/nonexistent/gsyncd"
+
+ if gconf.get("use-rsync-xattrs"):
+ extra_opts.append('--use-rsync-xattrs')
+
+ args_to_slave = [gconf.get("ssh-command")] + \
+ gconf.get("ssh-options").split() + \
+ ["-p", str(gconf.get("ssh-port"))] + \
+ rconf.ssh_ctl_args + [self.remote_addr] + \
+ [remote_gsyncd, "slave"] + \
+ extra_opts + \
+ [rconf.args.master, rconf.args.slave] + \
+ [
+ '--master-node', rconf.args.local_node,
+ '--master-node-id', rconf.args.local_node_id,
+ '--master-brick', rconf.args.local_path,
+ '--local-node', rconf.args.resource_remote,
+ '--local-node-id', rconf.args.resource_remote_id] + \
+ [
+ # Add all config arguments here, slave gsyncd will not use
+ # config file in slave side, so all overriding options should
+ # be sent as arguments
+ '--slave-timeout', str(gconf.get("slave-timeout")),
+ '--slave-log-level', gconf.get("slave-log-level"),
+ '--slave-gluster-log-level',
+ gconf.get("slave-gluster-log-level"),
+ '--slave-gluster-command-dir',
+ gconf.get("slave-gluster-command-dir"),
+ '--master-dist-count',
+ str(gconf.get("master-distribution-count"))]
+
+ if gconf.get("slave-access-mount"):
+ args_to_slave.append('--slave-access-mount')
+
+ if rconf.args.debug:
+ args_to_slave.append('--debug')
+
+ po = Popen(args_to_slave,
+ stdin=subprocess.PIPE, stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ rconf.transport = po
+ self.start_fd_client(po.stdout, po.stdin)
+ logging.info(lf("SSH connection between master and slave established.",
+ duration="%.4f" % (time.time() - t0)))
+
+ def rsync(self, files, *args, **kw):
+ """invoke rsync"""
+ if not files:
+ raise GsyncdError("no files to sync")
+ logging.debug("files: " + ", ".join(files))
+
+ extra_rsync_flags = []
+ # Performance flag, --ignore-missing-args, if rsync version is
+ # greater than 3.1.0 then include this flag.
+ if gconf.get("rsync-opt-ignore-missing-args") and \
+ get_rsync_version(gconf.get("rsync-command")) >= "3.1.0":
+ extra_rsync_flags = ["--ignore-missing-args"]
+
+ rsync_ssh_opts = [gconf.get("ssh-command")] + \
+ gconf.get("ssh-options").split() + \
+ ["-p", str(gconf.get("ssh-port"))] + \
+ rconf.ssh_ctl_args + \
+ gconf.get("rsync-ssh-options").split()
+
+ argv = [
+ gconf.get("rsync-command"),
+ '-aR0',
+ '--inplace',
+ '--files-from=-',
+ '--super',
+ '--stats',
+ '--numeric-ids',
+ '--no-implied-dirs'
+ ]
+
+ if gconf.get("rsync-opt-existing"):
+ argv += ["--existing"]
+
+ if gconf.get("sync-xattrs"):
+ argv += ['--xattrs']
+
+ if gconf.get("sync-acls"):
+ argv += ['--acls']
+
+ argv = argv + \
+ gconf.get("rsync-options").split() + \
+ extra_rsync_flags + ['.'] + \
+ ["-e", " ".join(rsync_ssh_opts)] + \
+ [self.slaveurl]
+
+ log_rsync_performance = gconf.getr("log-rsync-performance", False)
+
+ if log_rsync_performance:
+ # use stdout=PIPE only when log_rsync_performance enabled
+ # Else rsync will write to stdout and nobody is there
+ # to consume. If PIPE is full rsync hangs.
+ po = Popen(argv, stdin=subprocess.PIPE, stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE, universal_newlines=True)
+ else:
+ po = Popen(argv, stdin=subprocess.PIPE, stderr=subprocess.PIPE,
+ universal_newlines=True)
+
+ for f in files:
+ po.stdin.write(f)
+ po.stdin.write('\0')
+
+ stdout, stderr = po.communicate()
+
+ if kw.get("log_err", False):
+ for errline in stderr.strip().split("\n")[:-1]:
+ logging.error(lf("SYNC Error",
+ sync_engine="Rsync",
+ error=errline))
+
+ if log_rsync_performance:
+ rsync_msg = []
+ for line in stdout.split("\n"):
+ if line.startswith("Number of files:") or \
+ line.startswith("Number of regular files transferred:") or \
+ line.startswith("Total file size:") or \
+ line.startswith("Total transferred file size:") or \
+ line.startswith("Literal data:") or \
+ line.startswith("Matched data:") or \
+ line.startswith("Total bytes sent:") or \
+ line.startswith("Total bytes received:") or \
+ line.startswith("sent "):
+ rsync_msg.append(line)
+ logging.info(lf("rsync performance",
+ data=", ".join(rsync_msg)))
+
+ return po
+
+ def tarssh(self, files, log_err=False):
+ """invoke tar+ssh
+ -z (compress) can be use if needed, but omitting it now
+ as it results in weird error (tar+ssh errors out (errcode: 2)
+ """
+ if not files:
+ raise GsyncdError("no files to sync")
+ logging.debug("files: " + ", ".join(files))
+ (host, rdir) = self.slaveurl.split(':')
+
+ tar_cmd = ["tar"] + \
+ ["--sparse", "-cf", "-", "--files-from", "-"]
+ ssh_cmd = gconf.get("ssh-command").split() + \
+ gconf.get("ssh-options-tar").split() + \
+ ["-p", str(gconf.get("ssh-port"))] + \
+ [host, "tar"] + \
+ ["--overwrite", "-xf", "-", "-C", rdir]
+ p0 = Popen(tar_cmd, stdout=subprocess.PIPE,
+ stdin=subprocess.PIPE, stderr=subprocess.PIPE,
+ universal_newlines=True)
+ p1 = Popen(ssh_cmd, stdin=p0.stdout, stderr=subprocess.PIPE,
+ universal_newlines=True)
+ for f in files:
+ p0.stdin.write(f)
+ p0.stdin.write('\n')
+
+ p0.stdin.close()
+ p0.stdout.close() # Allow p0 to receive a SIGPIPE if p1 exits.
+
+ # stdin and stdout of p0 is already closed, Reset to None and
+ # wait for child process to complete
+ p0.stdin = None
+ p0.stdout = None
+
+ def wait_for_tar(p0):
+ _, stderr = p0.communicate()
+ if log_err:
+ for errline in stderr.strip().split("\n")[:-1]:
+ if "No such file or directory" not in errline:
+ logging.error(lf("SYNC Error",
+ sync_engine="Tarssh",
+ error=errline))
+
+ t = syncdutils.Thread(target=wait_for_tar, args=(p0, ))
+ # wait for tar to terminate, collecting any errors, further
+ # waiting for transfer to complete
+ t.start()
+
+ # wait for ssh process
+ _, stderr1 = p1.communicate()
+ t.join()
+
+ if log_err:
+ for errline in stderr1.strip().split("\n")[:-1]:
+ logging.error(lf("SYNC Error",
+ sync_engine="Tarssh",
+ error=errline))
+
+ return p1
diff --git a/geo-replication/syncdaemon/subcmds.py b/geo-replication/syncdaemon/subcmds.py
new file mode 100644
index 00000000000..b8508532e30
--- /dev/null
+++ b/geo-replication/syncdaemon/subcmds.py
@@ -0,0 +1,335 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+from __future__ import print_function
+from syncdutils import lf
+import logging
+import gsyncdconfig as gconf
+
+
+ERROR_CONFIG_INVALID = 2
+ERROR_CONFIG_INVALID_VALUE = 3
+ERROR_CONFIG_NOT_CONFIGURABLE = 4
+
+
+def subcmd_monitor_status(args):
+ from gsyncdstatus import set_monitor_status
+ from rconf import rconf
+
+ set_monitor_status(gconf.get("state-file"), args.status)
+ rconf.log_exit = False
+ logging.info(lf("Monitor Status Change", status=args.status))
+
+
+def subcmd_status(args):
+ from gsyncdstatus import GeorepStatus
+
+ master_name = args.master.replace(":", "")
+ slave_data = args.slave.replace("ssh://", "")
+
+ brick_status = GeorepStatus(gconf.get("state-file"),
+ "",
+ args.local_path,
+ "",
+ master_name,
+ slave_data,
+ gconf.get("pid-file"))
+ checkpoint_time = gconf.get("checkpoint", 0)
+ brick_status.print_status(checkpoint_time=checkpoint_time,
+ json_output=args.json)
+
+
+def subcmd_monitor(args):
+ import monitor
+ from resource import GLUSTER, SSH, Popen
+ go_daemon = False if args.debug else True
+
+ monitor.startup(go_daemon)
+ Popen.init_errhandler()
+ local = GLUSTER("localhost", args.master)
+ slavehost, slavevol = args.slave.split("::")
+ remote = SSH(slavehost, slavevol)
+ return monitor.monitor(local, remote)
+
+
+def subcmd_verify_spawning(args):
+ logging.info("Able to spawn gsyncd.py")
+
+
+def subcmd_worker(args):
+ import os
+ import fcntl
+
+ from resource import GLUSTER, SSH, Popen
+
+ Popen.init_errhandler()
+ fcntl.fcntl(args.feedback_fd, fcntl.F_SETFD, fcntl.FD_CLOEXEC)
+ local = GLUSTER("localhost", args.master)
+ slave_url, slavevol = args.slave.split("::")
+ if "@" not in slave_url:
+ slavehost = args.resource_remote
+ else:
+ slavehost = "%s@%s" % (slave_url.split("@")[0], args.resource_remote)
+ remote = SSH(slavehost, slavevol)
+ remote.connect_remote()
+ local.connect()
+ logging.info("Worker spawn successful. Acknowledging back to monitor")
+ os.close(args.feedback_fd)
+ local.service_loop(remote)
+
+
+def subcmd_slave(args):
+ from resource import GLUSTER, Popen
+
+ Popen.init_errhandler()
+ slavevol = args.slave.split("::")[-1]
+ local = GLUSTER("localhost", slavevol)
+
+ local.connect()
+ local.service_loop()
+
+
+def subcmd_voluuidget(args):
+ from subprocess import Popen, PIPE
+ import xml.etree.ElementTree as XET
+
+ ParseError = XET.ParseError if hasattr(XET, 'ParseError') else SyntaxError
+
+ cmd = ['gluster', '--xml', '--remote-host=' + args.host,
+ 'volume', 'info', args.volname]
+
+ if args.inet6:
+ cmd.append("--inet6")
+
+ po = Popen(cmd, bufsize=0,
+ stdin=None, stdout=PIPE, stderr=PIPE,
+ universal_newlines=True)
+
+ vix, err = po.communicate()
+ if po.returncode != 0:
+ logging.info(lf("Volume info failed, unable to get "
+ "volume uuid of slavevol, "
+ "returning empty string",
+ slavevol=args.volname,
+ slavehost=args.host,
+ error=po.returncode))
+ return ""
+ vi = XET.fromstring(vix)
+ if vi.find('opRet').text != '0':
+ logging.info(lf("Unable to get volume uuid of slavevol, "
+ "returning empty string",
+ slavevol=args.volname,
+ slavehost=args.host,
+ error=vi.find('opErrstr').text))
+ return ""
+
+ try:
+ voluuid = vi.find("volInfo/volumes/volume/id").text
+ except (ParseError, AttributeError, ValueError) as e:
+ logging.info(lf("Parsing failed to volume uuid of slavevol, "
+ "returning empty string",
+ slavevol=args.volname,
+ slavehost=args.host,
+ error=e))
+ voluuid = ""
+
+ print(voluuid)
+
+
+def _unlink(path):
+ import os
+ from errno import ENOENT
+ from syncdutils import GsyncdError
+ import sys
+
+ try:
+ os.unlink(path)
+ except (OSError, IOError):
+ if sys.exc_info()[1].errno == ENOENT:
+ pass
+ else:
+ raise GsyncdError('Unlink error: %s' % path)
+
+
+def subcmd_delete(args):
+ import logging
+ import shutil
+ import glob
+ import sys
+ from errno import ENOENT, ENODATA
+ import struct
+
+ from syncdutils import GsyncdError, Xattr, errno_wrap
+ import gsyncdconfig as gconf
+
+ logging.info('geo-replication delete')
+ # remove the stime xattr from all the brick paths so that
+ # a re-create of a session will start sync all over again
+ stime_xattr_prefix = gconf.get('stime-xattr-prefix', None)
+
+ # Delete pid file, status file, socket file
+ cleanup_paths = []
+ cleanup_paths.append(gconf.get("pid-file"))
+
+ # Cleanup Session dir
+ try:
+ shutil.rmtree(gconf.get("georep-session-working-dir"))
+ except (IOError, OSError):
+ if sys.exc_info()[1].errno == ENOENT:
+ pass
+ else:
+ raise GsyncdError(
+ 'Error while removing working dir: %s' %
+ gconf.get("georep-session-working-dir"))
+
+ # Cleanup changelog working dirs
+ try:
+ shutil.rmtree(gconf.get("working-dir"))
+ except (IOError, OSError):
+ if sys.exc_info()[1].errno == ENOENT:
+ pass
+ else:
+ raise GsyncdError(
+ 'Error while removing working dir: %s' %
+ gconf.get("working-dir"))
+
+ for path in cleanup_paths:
+ # To delete temp files
+ for f in glob.glob(path + "*"):
+ _unlink(f)
+
+ if args.reset_sync_time and stime_xattr_prefix:
+ for p in args.paths:
+ if p != "":
+ # set stime to (0,0) to trigger full volume content resync
+ # to slave on session recreation
+ # look at master.py::Xcrawl hint: zero_zero
+ errno_wrap(Xattr.lsetxattr,
+ (p, stime_xattr_prefix + ".stime",
+ struct.pack("!II", 0, 0)),
+ [ENOENT, ENODATA])
+ errno_wrap(Xattr.lremovexattr,
+ (p, stime_xattr_prefix + ".entry_stime"),
+ [ENOENT, ENODATA])
+
+ return
+
+
+def print_config(name, value, only_value=False, use_underscore=False):
+ val = value
+ if isinstance(value, bool):
+ val = str(value).lower()
+
+ if only_value:
+ print(val)
+ else:
+ if use_underscore:
+ name = name.replace("-", "_")
+
+ print(("%s:%s" % (name, val)))
+
+
+def config_name_format(val):
+ return val.replace("_", "-")
+
+
+def subcmd_config_get(args):
+ import sys
+ import json
+
+ all_config = gconf.getall(show_defaults=args.show_defaults,
+ show_non_configurable=True)
+ if args.name is not None:
+ val = all_config.get(config_name_format(args.name), None)
+ if val is None:
+ sys.stderr.write("Invalid config name \"%s\"\n" % args.name)
+ sys.exit(ERROR_CONFIG_INVALID)
+
+ print_config(args.name, val["value"], only_value=args.only_value,
+ use_underscore=args.use_underscore)
+ return
+
+ if args.json:
+ out = []
+ # Convert all values as string
+ for k in sorted(all_config):
+ v = all_config[k]
+ out.append({
+ "name": k,
+ "value": str(v["value"]),
+ "default": str(v["default"]),
+ "configurable": v["configurable"],
+ "modified": v["modified"]
+ })
+
+ print((json.dumps(out)))
+ return
+
+ for k in sorted(all_config):
+ print_config(k, all_config[k]["value"],
+ use_underscore=args.use_underscore)
+
+
+def subcmd_config_check(args):
+ import sys
+
+ try:
+ gconf.check(config_name_format(args.name), value=args.value,
+ with_conffile=False)
+ except gconf.GconfNotConfigurable:
+ cnf_val = gconf.get(config_name_format(args.name), None)
+ if cnf_val is None:
+ sys.stderr.write("Invalid config name \"%s\"\n" % args.name)
+ sys.exit(ERROR_CONFIG_INVALID)
+
+ # Not configurable
+ sys.stderr.write("Not configurable \"%s\"\n" % args.name)
+ sys.exit(ERROR_CONFIG_NOT_CONFIGURABLE)
+ except gconf.GconfInvalidValue:
+ sys.stderr.write("Invalid config value \"%s=%s\"\n" % (args.name,
+ args.value))
+ sys.exit(ERROR_CONFIG_INVALID_VALUE)
+
+
+def subcmd_config_set(args):
+ import sys
+
+ try:
+ gconf.setconfig(config_name_format(args.name), args.value)
+ except gconf.GconfNotConfigurable:
+ cnf_val = gconf.get(config_name_format(args.name), None)
+ if cnf_val is None:
+ sys.stderr.write("Invalid config name \"%s\"\n" % args.name)
+ sys.exit(ERROR_CONFIG_INVALID)
+
+ # Not configurable
+ sys.stderr.write("Not configurable \"%s\"\n" % args.name)
+ sys.exit(ERROR_CONFIG_NOT_CONFIGURABLE)
+ except gconf.GconfInvalidValue:
+ sys.stderr.write("Invalid config value \"%s=%s\"\n" % (args.name,
+ args.value))
+ sys.exit(ERROR_CONFIG_INVALID_VALUE)
+
+
+def subcmd_config_reset(args):
+ import sys
+
+ try:
+ gconf.resetconfig(config_name_format(args.name))
+ except gconf.GconfNotConfigurable:
+ cnf_val = gconf.get(config_name_format(args.name), None)
+ if cnf_val is None:
+ sys.stderr.write("Invalid config name \"%s\"\n" % args.name)
+ sys.exit(ERROR_CONFIG_INVALID)
+
+ # Not configurable
+ sys.stderr.write("Not configurable \"%s\"\n" % args.name)
+ sys.exit(ERROR_CONFIG_NOT_CONFIGURABLE)
diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py
new file mode 100644
index 00000000000..a3df103e76c
--- /dev/null
+++ b/geo-replication/syncdaemon/syncdutils.py
@@ -0,0 +1,1115 @@
+#
+# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+import os
+import sys
+import pwd
+import time
+import fcntl
+import shutil
+import logging
+import errno
+import threading
+import subprocess
+import socket
+from subprocess import PIPE
+from threading import Lock, Thread as baseThread
+from errno import (EACCES, EAGAIN, EPIPE, ENOTCONN, ENOMEM, ECONNABORTED,
+ EINTR, ENOENT, ESTALE, EBUSY, ENODATA, errorcode, EIO)
+from signal import signal, SIGTERM
+import select as oselect
+from os import waitpid as owaitpid
+import xml.etree.ElementTree as XET
+from select import error as SelectError
+try:
+ from cPickle import PickleError
+except ImportError:
+ from pickle import PickleError
+
+from conf import GLUSTERFS_LIBEXECDIR, UUID_FILE
+sys.path.insert(1, GLUSTERFS_LIBEXECDIR)
+EVENTS_ENABLED = True
+try:
+ from gfevents.eventtypes import GEOREP_FAULTY as EVENT_GEOREP_FAULTY
+ from gfevents.eventtypes import GEOREP_ACTIVE as EVENT_GEOREP_ACTIVE
+ from gfevents.eventtypes import GEOREP_PASSIVE as EVENT_GEOREP_PASSIVE
+ from gfevents.eventtypes import GEOREP_CHECKPOINT_COMPLETED \
+ as EVENT_GEOREP_CHECKPOINT_COMPLETED
+except ImportError:
+ # Events APIs not installed, dummy eventtypes with None
+ EVENTS_ENABLED = False
+ EVENT_GEOREP_FAULTY = None
+ EVENT_GEOREP_ACTIVE = None
+ EVENT_GEOREP_PASSIVE = None
+ EVENT_GEOREP_CHECKPOINT_COMPLETED = None
+
+import gsyncdconfig as gconf
+from rconf import rconf
+
+from hashlib import sha256 as sha256
+
+ENOTSUP = getattr(errno, 'ENOTSUP', 'EOPNOTSUPP')
+
+# auxiliary gfid based access prefix
+_CL_AUX_GFID_PFX = ".gfid/"
+ROOT_GFID = "00000000-0000-0000-0000-000000000001"
+GF_OP_RETRIES = 10
+
+GX_GFID_CANONICAL_LEN = 37 # canonical gfid len + '\0'
+
+NodeID = None
+rsync_version = None
+unshare_mnt_propagation = None
+slv_bricks = None
+SPACE_ESCAPE_CHAR = "%20"
+NEWLINE_ESCAPE_CHAR = "%0A"
+PERCENTAGE_ESCAPE_CHAR = "%25"
+
+final_lock = Lock()
+
+def sup(x, *a, **kw):
+ """a rubyesque "super" for python ;)
+
+ invoke caller method in parent class with given args.
+ """
+ return getattr(super(type(x), x),
+ sys._getframe(1).f_code.co_name)(*a, **kw)
+
+
+def escape(s):
+ """the chosen flavor of string escaping, used all over
+ to turn whatever data to creatable representation"""
+ return s.replace("/", "-").strip("-")
+
+
+def escape_space_newline(s):
+ return s.replace("%", PERCENTAGE_ESCAPE_CHAR)\
+ .replace(" ", SPACE_ESCAPE_CHAR)\
+ .replace("\n", NEWLINE_ESCAPE_CHAR)
+
+
+def unescape_space_newline(s):
+ return s.replace(SPACE_ESCAPE_CHAR, " ")\
+ .replace(NEWLINE_ESCAPE_CHAR, "\n")\
+ .replace(PERCENTAGE_ESCAPE_CHAR, "%")
+
+# gf_mount_ready() returns 1 if all subvols are up, else 0
+def gf_mount_ready():
+ ret = errno_wrap(Xattr.lgetxattr,
+ ['.', 'dht.subvol.status', 16],
+ [ENOENT, ENOTSUP, ENODATA], [ENOMEM])
+
+ if isinstance(ret, int):
+ logging.error("failed to get the xattr value")
+ return 1
+ ret = ret.rstrip('\x00')
+ if ret == "1":
+ return 1
+ return 0
+
+def norm(s):
+ if s:
+ return s.replace('-', '_')
+
+
+def update_file(path, updater, merger=lambda f: True):
+ """update a file in a transaction-like manner"""
+
+ fr = fw = None
+ try:
+ fd = os.open(path, os.O_CREAT | os.O_RDWR)
+ try:
+ fr = os.fdopen(fd, 'r+b')
+ except:
+ os.close(fd)
+ raise
+ fcntl.lockf(fr, fcntl.LOCK_EX)
+ if not merger(fr):
+ return
+
+ tmpp = path + '.tmp.' + str(os.getpid())
+ fd = os.open(tmpp, os.O_CREAT | os.O_EXCL | os.O_WRONLY)
+ try:
+ fw = os.fdopen(fd, 'wb', 0)
+ except:
+ os.close(fd)
+ raise
+ updater(fw)
+ os.fsync(fd)
+ os.rename(tmpp, path)
+ finally:
+ for fx in (fr, fw):
+ if fx:
+ fx.close()
+
+
+def create_manifest(fname, content):
+ """
+ Create manifest file for SSH Control Path
+ """
+ fd = None
+ try:
+ fd = os.open(fname, os.O_CREAT | os.O_RDWR)
+ try:
+ os.write(fd, content)
+ except:
+ os.close(fd)
+ raise
+ finally:
+ if fd is not None:
+ os.close(fd)
+
+
+def setup_ssh_ctl(ctld, remote_addr, resource_url):
+ """
+ Setup GConf ssh control path parameters
+ """
+ rconf.ssh_ctl_dir = ctld
+ content = "SLAVE_HOST=%s\nSLAVE_RESOURCE_URL=%s" % (remote_addr,
+ resource_url)
+ encoded_content = content.encode()
+ content_sha256 = sha256hex(encoded_content)
+ """
+ The length of ctl_path for ssh connection should not be > 108.
+ ssh fails with ctl_path too long if it is so. But when rsync
+ is piped to ssh, it is not taking > 90. Hence using first 32
+ bytes of hash. Hash collision doesn't matter as only one sock
+ file is created per directory.
+ """
+ content_sha256 = content_sha256[:32]
+ fname = os.path.join(rconf.ssh_ctl_dir,
+ "%s.mft" % content_sha256)
+
+ create_manifest(fname, encoded_content)
+ ssh_ctl_path = os.path.join(rconf.ssh_ctl_dir,
+ "%s.sock" % content_sha256)
+ rconf.ssh_ctl_args = ["-oControlMaster=auto", "-S", ssh_ctl_path]
+
+
+def grabfile(fname, content=None):
+ """open @fname + contest for its fcntl lock
+
+ @content: if given, set the file content to it
+ """
+ # damn those messy open() mode codes
+ fd = os.open(fname, os.O_CREAT | os.O_RDWR)
+ f = os.fdopen(fd, 'r+')
+ try:
+ fcntl.lockf(f, fcntl.LOCK_EX | fcntl.LOCK_NB)
+ except:
+ ex = sys.exc_info()[1]
+ f.close()
+ if isinstance(ex, IOError) and ex.errno in (EACCES, EAGAIN):
+ # cannot grab, it's taken
+ return
+ raise
+ if content:
+ try:
+ f.truncate()
+ f.write(content)
+ f.flush()
+ except:
+ f.close()
+ raise
+ rconf.permanent_handles.append(f)
+ return f
+
+
+def grabpidfile(fname=None, setpid=True):
+ """.grabfile customization for pid files"""
+ if not fname:
+ fname = gconf.get("pid-file")
+ content = None
+ if setpid:
+ content = str(os.getpid()) + '\n'
+ return grabfile(fname, content=content)
+
+
+def finalize(*args, **kwargs):
+ """all those messy final steps we go trough upon termination
+
+ Do away with pidfile, ssh control dir and logging.
+ """
+
+ final_lock.acquire()
+ if gconf.get('pid_file'):
+ rm_pidf = rconf.pid_file_owned
+ if rconf.cpid:
+ # exit path from parent branch of daemonization
+ rm_pidf = False
+ while True:
+ f = grabpidfile(setpid=False)
+ if not f:
+ # child has already taken over pidfile
+ break
+ if os.waitpid(rconf.cpid, os.WNOHANG)[0] == rconf.cpid:
+ # child has terminated
+ rm_pidf = True
+ break
+ time.sleep(0.1)
+ if rm_pidf:
+ try:
+ os.unlink(rconf.pid_file)
+ except:
+ ex = sys.exc_info()[1]
+ if ex.errno == ENOENT:
+ pass
+ else:
+ raise
+ if rconf.ssh_ctl_dir and not rconf.cpid:
+ def handle_rm_error(func, path, exc_info):
+ if exc_info[1].errno == ENOENT:
+ return
+ raise exc_info[1]
+
+ shutil.rmtree(rconf.ssh_ctl_dir, onerror=handle_rm_error)
+
+ """ Unmount if not done """
+ if rconf.mount_point:
+ if rconf.mountbroker:
+ umount_cmd = rconf.mbr_umount_cmd + [rconf.mount_point, 'lazy']
+ else:
+ umount_cmd = ['umount', '-l', rconf.mount_point]
+ p0 = subprocess.Popen(umount_cmd, stderr=subprocess.PIPE,
+ universal_newlines=True)
+ _, errdata = p0.communicate()
+ if p0.returncode == 0:
+ try:
+ os.rmdir(rconf.mount_point)
+ except OSError:
+ pass
+ else:
+ pass
+
+ if rconf.log_exit:
+ logging.info("exiting.")
+ sys.stdout.flush()
+ sys.stderr.flush()
+ os._exit(kwargs.get('exval', 0))
+
+
+def log_raise_exception(excont):
+ """top-level exception handler
+
+ Try to some fancy things to cover up we face with an error.
+ Translate some weird sounding but well understood exceptions
+ into human-friendly lingo
+ """
+
+ is_filelog = False
+ for h in logging.getLogger().handlers:
+ fno = getattr(getattr(h, 'stream', None), 'fileno', None)
+ if fno and not os.isatty(fno()):
+ is_filelog = True
+
+ exc = sys.exc_info()[1]
+ if isinstance(exc, SystemExit):
+ excont.exval = exc.code or 0
+ raise
+ else:
+ logtag = None
+ if isinstance(exc, GsyncdError):
+ if is_filelog:
+ logging.error(exc.args[0])
+ sys.stderr.write('failure: ' + exc.args[0] + '\n')
+ elif isinstance(exc, PickleError) or isinstance(exc, EOFError) or \
+ ((isinstance(exc, OSError) or isinstance(exc, IOError)) and
+ exc.errno == EPIPE):
+ logging.error('connection to peer is broken')
+ if hasattr(rconf, 'transport'):
+ rconf.transport.wait()
+ if rconf.transport.returncode == 127:
+ logging.error("getting \"No such file or directory\""
+ "errors is most likely due to "
+ "MISCONFIGURATION, please remove all "
+ "the public keys added by geo-replication "
+ "from authorized_keys file in slave nodes "
+ "and run Geo-replication create "
+ "command again.")
+ logging.error("If `gsec_create container` was used, then "
+ "run `gluster volume geo-replication "
+ "<MASTERVOL> [<SLAVEUSER>@]<SLAVEHOST>::"
+ "<SLAVEVOL> config remote-gsyncd "
+ "<GSYNCD_PATH> (Example GSYNCD_PATH: "
+ "`/usr/libexec/glusterfs/gsyncd`)")
+ rconf.transport.terminate_geterr()
+ elif isinstance(exc, OSError) and exc.errno in (ENOTCONN,
+ ECONNABORTED):
+ logging.error(lf('Gluster Mount process exited',
+ error=errorcode[exc.errno]))
+ elif isinstance(exc, OSError) and exc.errno == EIO:
+ logging.error("Getting \"Input/Output error\" "
+ "is most likely due to "
+ "a. Brick is down or "
+ "b. Split brain issue.")
+ logging.error("This is expected as per design to "
+ "keep the consistency of the file system. "
+ "Once the above issue is resolved "
+ "geo-replication would automatically "
+ "proceed further.")
+ logtag = "FAIL"
+ else:
+ logtag = "FAIL"
+ if not logtag and logging.getLogger().isEnabledFor(logging.DEBUG):
+ logtag = "FULL EXCEPTION TRACE"
+ if logtag:
+ logging.exception(logtag + ": ")
+ sys.stderr.write("failed with %s: %s.\n" % (type(exc).__name__, exc))
+ excont.exval = 1
+ sys.exit(excont.exval)
+
+
+class FreeObject(object):
+
+ """wildcard class for which any attribute can be set"""
+
+ def __init__(self, **kw):
+ for k, v in kw.items():
+ setattr(self, k, v)
+
+
+class Thread(baseThread):
+
+ """thread class flavor for gsyncd
+
+ - always a daemon thread
+ - force exit for whole program if thread
+ function coughs up an exception
+ """
+
+ def __init__(self, *args, **kwargs):
+ tf = kwargs.get('target')
+ if tf:
+ def twrap(*aargs):
+ excont = FreeObject(exval=0)
+ try:
+ tf(*aargs)
+ except:
+ try:
+ log_raise_exception(excont)
+ finally:
+ finalize(exval=excont.exval)
+ kwargs['target'] = twrap
+ baseThread.__init__(self, *args, **kwargs)
+ self.setDaemon(True)
+
+
+class GsyncdError(Exception):
+ pass
+
+
+class _MetaXattr(object):
+
+ """singleton class, a lazy wrapper around the
+ libcxattr module
+
+ libcxattr (a heavy import due to ctypes) is
+ loaded only when when the single
+ instance is tried to be used.
+
+ This reduces runtime for those invocations
+ which do not need filesystem manipulation
+ (eg. for config, url parsing)
+ """
+
+ def __getattr__(self, meth):
+ from libcxattr import Xattr as LXattr
+ xmeth = [m for m in dir(LXattr) if m[0] != '_']
+ if meth not in xmeth:
+ return
+ for m in xmeth:
+ setattr(self, m, getattr(LXattr, m))
+ return getattr(self, meth)
+
+
+Xattr = _MetaXattr()
+
+
+def getusername(uid=None):
+ if uid is None:
+ uid = os.geteuid()
+ return pwd.getpwuid(uid).pw_name
+
+
+def privileged():
+ return os.geteuid() == 0
+
+
+def boolify(s):
+ """
+ Generic string to boolean converter
+
+ return
+ - Quick return if string 's' is of type bool
+ - True if it's in true_list
+ - False if it's in false_list
+ - Warn if it's not present in either and return False
+ """
+ true_list = ['true', 'yes', '1', 'on']
+ false_list = ['false', 'no', '0', 'off']
+
+ if isinstance(s, bool):
+ return s
+
+ rv = False
+ lstr = s.lower()
+ if lstr in true_list:
+ rv = True
+ elif lstr not in false_list:
+ logging.warn(lf("Unknown string in \"string to boolean\" conversion, "
+ "defaulting to False",
+ str=s))
+
+ return rv
+
+
+def eintr_wrap(func, exc, *args):
+ """
+ wrapper around syscalls resilient to interrupt caused
+ by signals
+ """
+ while True:
+ try:
+ return func(*args)
+ except exc:
+ ex = sys.exc_info()[1]
+ if not ex.args[0] == EINTR:
+ raise
+
+
+def select(*args):
+ return eintr_wrap(oselect.select, oselect.error, *args)
+
+
+def waitpid(*args):
+ return eintr_wrap(owaitpid, OSError, *args)
+
+
+def term_handler_default_hook(signum, frame):
+ finalize(signum, frame, exval=1)
+
+
+def set_term_handler(hook=term_handler_default_hook):
+ signal(SIGTERM, hook)
+
+
+def get_node_uuid():
+ global NodeID
+ if NodeID is not None:
+ return NodeID
+
+ NodeID = ""
+ with open(UUID_FILE) as f:
+ for line in f:
+ if line.startswith("UUID="):
+ NodeID = line.strip().split("=")[-1]
+ break
+
+ if NodeID == "":
+ raise GsyncdError("Failed to get Host UUID from %s" % UUID_FILE)
+ return NodeID
+
+
+def is_host_local(host_id):
+ return host_id == get_node_uuid()
+
+
+def funcode(f):
+ fc = getattr(f, 'func_code', None)
+ if not fc:
+ # python 3
+ fc = f.__code__
+ return fc
+
+
+def memoize(f):
+ fc = funcode(f)
+ fn = fc.co_name
+
+ def ff(self, *a, **kw):
+ rv = getattr(self, '_' + fn, None)
+ if rv is None:
+ rv = f(self, *a, **kw)
+ setattr(self, '_' + fn, rv)
+ return rv
+ return ff
+
+
+def umask():
+ return os.umask(0)
+
+
+def entry2pb(e):
+ return e.rsplit('/', 1)
+
+
+def gauxpfx():
+ return _CL_AUX_GFID_PFX
+
+
+def sha256hex(s):
+ return sha256(s).hexdigest()
+
+
+def selfkill(sig=SIGTERM):
+ os.kill(os.getpid(), sig)
+
+
+def errno_wrap(call, arg=[], errnos=[], retry_errnos=[]):
+ """ wrapper around calls resilient to errnos.
+ """
+ nr_tries = 0
+ while True:
+ try:
+ return call(*arg)
+ except OSError:
+ ex = sys.exc_info()[1]
+ if ex.errno in errnos:
+ return ex.errno
+ if ex.errno not in retry_errnos:
+ raise
+ nr_tries += 1
+ if nr_tries == GF_OP_RETRIES:
+ # probably a screwed state, cannot do much...
+ logging.warn(lf('reached maximum retries',
+ args=repr(arg),
+ error=ex))
+ raise
+ time.sleep(0.250) # retry the call
+
+
+def lstat(e):
+ return errno_wrap(os.lstat, [e], [ENOENT], [ESTALE, EBUSY])
+
+def get_gfid_from_mnt(gfidpath):
+ return errno_wrap(Xattr.lgetxattr,
+ [gfidpath, 'glusterfs.gfid.string',
+ GX_GFID_CANONICAL_LEN], [ENOENT], [ESTALE])
+
+
+def matching_disk_gfid(gfid, entry):
+ disk_gfid = get_gfid_from_mnt(entry)
+ if isinstance(disk_gfid, int):
+ return False
+
+ if not gfid == disk_gfid:
+ return False
+
+ return True
+
+
+class NoStimeAvailable(Exception):
+ pass
+
+
+class PartialHistoryAvailable(Exception):
+ pass
+
+
+class ChangelogHistoryNotAvailable(Exception):
+ pass
+
+
+class ChangelogException(OSError):
+ pass
+
+
+def gf_event(event_type, **kwargs):
+ if EVENTS_ENABLED:
+ from gfevents.gf_event import gf_event as gfevent
+ gfevent(event_type, **kwargs)
+
+
+class GlusterLogLevel(object):
+ NONE = 0
+ EMERG = 1
+ ALERT = 2
+ CRITICAL = 3
+ ERROR = 4
+ WARNING = 5
+ NOTICE = 6
+ INFO = 7
+ DEBUG = 8
+ TRACE = 9
+
+
+def get_changelog_log_level(lvl):
+ return getattr(GlusterLogLevel, lvl, GlusterLogLevel.INFO)
+
+
+def get_master_and_slave_data_from_args(args):
+ master_name = None
+ slave_data = None
+ for arg in args:
+ if arg.startswith(":"):
+ master_name = arg.replace(":", "")
+ if "::" in arg:
+ slave_data = arg.replace("ssh://", "")
+
+ return (master_name, slave_data)
+
+def unshare_propagation_supported():
+ global unshare_mnt_propagation
+ if unshare_mnt_propagation is not None:
+ return unshare_mnt_propagation
+
+ unshare_mnt_propagation = False
+ p = subprocess.Popen(["unshare", "--help"],
+ stderr=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ universal_newlines=True)
+ out, err = p.communicate()
+ if p.returncode == 0:
+ if "propagation" in out:
+ unshare_mnt_propagation = True
+
+ return unshare_mnt_propagation
+
+
+def get_rsync_version(rsync_cmd):
+ global rsync_version
+ if rsync_version is not None:
+ return rsync_version
+
+ rsync_version = "0"
+ p = subprocess.Popen([rsync_cmd, "--version"],
+ stderr=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ universal_newlines=True)
+ out, err = p.communicate()
+ if p.returncode == 0:
+ rsync_version = out.split(" ", 4)[3]
+
+ return rsync_version
+
+
+def get_slv_dir_path(slv_host, slv_volume, gfid):
+ global slv_bricks
+
+ dir_path = ENOENT
+ pfx = gauxpfx()
+
+ if not slv_bricks:
+ slv_info = Volinfo(slv_volume, slv_host, master=False)
+ slv_bricks = slv_info.bricks
+ # Result of readlink would be of format as below.
+ # readlink = "../../pgfid[0:2]/pgfid[2:4]/pgfid/basename"
+ for brick in slv_bricks:
+ dir_path = errno_wrap(os.path.join,
+ [brick['dir'],
+ ".glusterfs", gfid[0:2],
+ gfid[2:4],
+ gfid], [ENOENT], [ESTALE])
+ if dir_path != ENOENT:
+ try:
+ realpath = errno_wrap(os.readlink, [dir_path],
+ [ENOENT], [ESTALE])
+ if not isinstance(realpath, int):
+ realpath_parts = realpath.split('/')
+ pargfid = realpath_parts[-2]
+ basename = realpath_parts[-1]
+ dir_entry = os.path.join(pfx, pargfid, basename)
+ return dir_entry
+ except OSError:
+ # .gfid/GFID
+ gfidpath = unescape_space_newline(os.path.join(pfx, gfid))
+ realpath = errno_wrap(Xattr.lgetxattr_buf,
+ [gfidpath, 'glusterfs.gfid2path'], [ENOENT], [ESTALE])
+ if not isinstance(realpath, int):
+ basename = os.path.basename(realpath).rstrip('\x00')
+ dirpath = os.path.dirname(realpath)
+ if dirpath == "/":
+ pargfid = ROOT_GFID
+ else:
+ dirpath = dirpath.strip("/")
+ pargfid = get_gfid_from_mnt(dirpath)
+ if isinstance(pargfid, int):
+ return None
+ dir_entry = os.path.join(pfx, pargfid, basename)
+ return dir_entry
+
+ return None
+
+
+def lf(event, **kwargs):
+ """
+ Log Format helper function, log messages can be
+ easily modified to structured log format.
+ lf("Config Change", sync_jobs=4, brick=/bricks/b1) will be
+ converted as "Config Change [{brick=/bricks/b1}, {sync_jobs=4}]"
+ """
+ msgparts = []
+ for k, v in kwargs.items():
+ msgparts.append("{%s=%s}" % (k, v))
+ return "%s [%s]" % (event, ", ".join(msgparts))
+
+
+class Popen(subprocess.Popen):
+
+ """customized subclass of subprocess.Popen with a ring
+ buffer for children error output"""
+
+ @classmethod
+ def init_errhandler(cls):
+ """start the thread which handles children's error output"""
+ cls.errstore = {}
+
+ def tailer():
+ while True:
+ errstore = cls.errstore.copy()
+ try:
+ poe, _, _ = select(
+ [po.stderr for po in errstore], [], [], 1)
+ except (ValueError, SelectError):
+ # stderr is already closed wait for some time before
+ # checking next error
+ time.sleep(0.5)
+ continue
+ for po in errstore:
+ if po.stderr not in poe:
+ continue
+ po.lock.acquire()
+ try:
+ if po.on_death_row:
+ continue
+ la = errstore[po]
+ try:
+ fd = po.stderr.fileno()
+ except ValueError: # file is already closed
+ time.sleep(0.5)
+ continue
+
+ try:
+ l = os.read(fd, 1024)
+ except OSError:
+ time.sleep(0.5)
+ continue
+
+ if not l:
+ continue
+ tots = len(l)
+ for lx in la:
+ tots += len(lx)
+ while tots > 1 << 20 and la:
+ tots -= len(la.pop(0))
+ la.append(l)
+ finally:
+ po.lock.release()
+ t = Thread(target=tailer)
+ t.start()
+ cls.errhandler = t
+
+ @classmethod
+ def fork(cls):
+ """fork wrapper that restarts errhandler thread in child"""
+ pid = os.fork()
+ if not pid:
+ cls.init_errhandler()
+ return pid
+
+ def __init__(self, args, *a, **kw):
+ """customizations for subprocess.Popen instantiation
+
+ - 'close_fds' is taken to be the default
+ - if child's stderr is chosen to be managed,
+ register it with the error handler thread
+ """
+ self.args = args
+ if 'close_fds' not in kw:
+ kw['close_fds'] = True
+ self.lock = threading.Lock()
+ self.on_death_row = False
+ self.elines = []
+ try:
+ sup(self, args, *a, **kw)
+ except:
+ ex = sys.exc_info()[1]
+ if not isinstance(ex, OSError):
+ raise
+ raise GsyncdError("""execution of "%s" failed with %s (%s)""" %
+ (args[0], errno.errorcode[ex.errno],
+ os.strerror(ex.errno)))
+ if kw.get('stderr') == subprocess.PIPE:
+ assert(getattr(self, 'errhandler', None))
+ self.errstore[self] = []
+
+ def errlog(self):
+ """make a log about child's failure event"""
+ logging.error(lf("command returned error",
+ cmd=" ".join(self.args),
+ error=self.returncode))
+ lp = ''
+
+ def logerr(l):
+ logging.error(self.args[0] + "> " + l)
+ for l in self.elines:
+ ls = l.split('\n')
+ ls[0] = lp + ls[0]
+ lp = ls.pop()
+ for ll in ls:
+ logerr(ll)
+ if lp:
+ logerr(lp)
+
+ def errfail(self):
+ """fail nicely if child did not terminate with success"""
+ self.errlog()
+ finalize(exval=1)
+
+ def terminate_geterr(self, fail_on_err=True):
+ """kill child, finalize stderr harvesting (unregister
+ from errhandler, set up .elines), fail on error if
+ asked for
+ """
+ self.lock.acquire()
+ try:
+ self.on_death_row = True
+ finally:
+ self.lock.release()
+ elines = self.errstore.pop(self)
+ if self.poll() is None:
+ self.terminate()
+ if self.poll() is None:
+ time.sleep(0.1)
+ self.kill()
+ self.wait()
+ while True:
+ if not select([self.stderr], [], [], 0.1)[0]:
+ break
+ b = os.read(self.stderr.fileno(), 1024)
+ if b:
+ elines.append(b.decode())
+ else:
+ break
+ self.stderr.close()
+ self.elines = elines
+ if fail_on_err and self.returncode != 0:
+ self.errfail()
+
+
+def host_brick_split(value):
+ """
+ IPv6 compatible way to split and get the host
+ and brick information. Example inputs:
+ node1.example.com:/exports/bricks/brick1/brick
+ fe80::af0f:df82:844f:ef66%utun0:/exports/bricks/brick1/brick
+ """
+ parts = value.split(":")
+ brick = parts[-1]
+ hostparts = parts[0:-1]
+ return (":".join(hostparts), brick)
+
+
+class Volinfo(object):
+
+ def __init__(self, vol, host='localhost', prelude=[], master=True):
+ if master:
+ gluster_cmd_dir = gconf.get("gluster-command-dir")
+ else:
+ gluster_cmd_dir = gconf.get("slave-gluster-command-dir")
+
+ gluster_cmd = os.path.join(gluster_cmd_dir, 'gluster')
+ po = Popen(prelude + [gluster_cmd, '--xml', '--remote-host=' + host,
+ 'volume', 'info', vol],
+ stdout=PIPE, stderr=PIPE, universal_newlines=True)
+ vix = po.stdout.read()
+ po.wait()
+ po.terminate_geterr()
+ vi = XET.fromstring(vix)
+ if vi.find('opRet').text != '0':
+ if prelude:
+ via = '(via %s) ' % prelude.join(' ')
+ else:
+ via = ' '
+ raise GsyncdError('getting volume info of %s%s '
+ 'failed with errorcode %s' %
+ (vol, via, vi.find('opErrno').text))
+ self.tree = vi
+ self.volume = vol
+ self.host = host
+
+ def get(self, elem):
+ return self.tree.findall('.//' + elem)
+
+ def is_tier(self):
+ return (self.get('typeStr')[0].text == 'Tier')
+
+ def is_hot(self, brickpath):
+ logging.debug('brickpath: ' + repr(brickpath))
+ return brickpath in self.hot_bricks
+
+ @property
+ @memoize
+ def bricks(self):
+ def bparse(b):
+ host, dirp = host_brick_split(b.find("name").text)
+ return {'host': host, 'dir': dirp, 'uuid': b.find("hostUuid").text}
+ return [bparse(b) for b in self.get('brick')]
+
+ @property
+ @memoize
+ def uuid(self):
+ ids = self.get('id')
+ if len(ids) != 1:
+ raise GsyncdError("volume info of %s obtained from %s: "
+ "ambiguous uuid" % (self.volume, self.host))
+ return ids[0].text
+
+ def replica_count(self, tier, hot):
+ if (tier and hot):
+ return int(self.get('hotBricks/hotreplicaCount')[0].text)
+ elif (tier and not hot):
+ return int(self.get('coldBricks/coldreplicaCount')[0].text)
+ else:
+ return int(self.get('replicaCount')[0].text)
+
+ def disperse_count(self, tier, hot):
+ if (tier and hot):
+ # Tiering doesn't support disperse volume as hot brick,
+ # hence no xml output, so returning 0. In case, if it's
+ # supported later, we should change here.
+ return 0
+ elif (tier and not hot):
+ return int(self.get('coldBricks/colddisperseCount')[0].text)
+ else:
+ return int(self.get('disperseCount')[0].text)
+
+ def distribution_count(self, tier, hot):
+ if (tier and hot):
+ return int(self.get('hotBricks/hotdistCount')[0].text)
+ elif (tier and not hot):
+ return int(self.get('coldBricks/colddistCount')[0].text)
+ else:
+ return int(self.get('distCount')[0].text)
+
+ @property
+ @memoize
+ def hot_bricks(self):
+ return [b.text for b in self.get('hotBricks/brick')]
+
+ def get_hot_bricks_count(self, tier):
+ if (tier):
+ return int(self.get('hotBricks/hotbrickCount')[0].text)
+ else:
+ return 0
+
+
+class VolinfoFromGconf(object):
+ # Glusterd will generate following config items before Geo-rep start
+ # So that Geo-rep need not run gluster commands from inside
+ # Volinfo object API/interface kept as is so that caller need not
+ # change anything except calling this instead of Volinfo()
+ #
+ # master-bricks=
+ # master-bricks=NODEID:HOSTNAME:PATH,..
+ # slave-bricks=NODEID:HOSTNAME,..
+ # master-volume-id=
+ # slave-volume-id=
+ # master-replica-count=
+ # master-disperse_count=
+ def __init__(self, vol, host='localhost', master=True):
+ self.volume = vol
+ self.host = host
+ self.master = master
+
+ def is_tier(self):
+ return False
+
+ def is_hot(self, brickpath):
+ return False
+
+ def is_uuid(self, value):
+ try:
+ uuid.UUID(value)
+ return True
+ except ValueError:
+ return False
+
+ def possible_path(self, value):
+ return "/" in value
+
+ @property
+ @memoize
+ def bricks(self):
+ pfx = "master-" if self.master else "slave-"
+ bricks_data = gconf.get(pfx + "bricks")
+ if bricks_data is None:
+ return []
+
+ bricks_data = bricks_data.split(",")
+ bricks_data = [b.strip() for b in bricks_data]
+ out = []
+ for b in bricks_data:
+ parts = b.split(":")
+ b_uuid = None
+ if self.is_uuid(parts[0]):
+ b_uuid = parts[0]
+ # Set all parts except first
+ parts = parts[1:]
+
+ if self.possible_path(parts[-1]):
+ bpath = parts[-1]
+ # Set all parts except last
+ parts = parts[0:-1]
+
+ out.append({
+ "host": ":".join(parts), # if remaining parts are IPv6 name
+ "dir": bpath,
+ "uuid": b_uuid
+ })
+
+ return out
+
+ @property
+ @memoize
+ def uuid(self):
+ if self.master:
+ return gconf.get("master-volume-id")
+ else:
+ return gconf.get("slave-volume-id")
+
+ def replica_count(self, tier, hot):
+ return gconf.get("master-replica-count")
+
+ def disperse_count(self, tier, hot):
+ return gconf.get("master-disperse-count")
+
+ def distribution_count(self, tier, hot):
+ return gconf.get("master-distribution-count")
+
+ @property
+ @memoize
+ def hot_bricks(self):
+ return []
+
+ def get_hot_bricks_count(self, tier):
+ return 0
+
+
+def can_ssh(host, port=22):
+ s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ try:
+ s.connect((host, port))
+ flag = True
+ except socket.error:
+ flag = False
+
+ s.close()
+ return flag
+
+
+def get_up_nodes(hosts, port):
+ # List of hosts with Hostname/IP and UUID
+ up_nodes = []
+ for h in hosts:
+ if can_ssh(h[0], port):
+ up_nodes.append(h)
+
+ return up_nodes
diff --git a/geo-replication/test-requirements.txt b/geo-replication/test-requirements.txt
new file mode 100644
index 00000000000..d6165640d3f
--- /dev/null
+++ b/geo-replication/test-requirements.txt
@@ -0,0 +1,7 @@
+# Hacking already pins down pep8, pyflakes and flake8
+flake8
+coverage
+nose
+nosexcover
+nosehtmloutput
+mock>=0.8.0
diff --git a/geo-replication/tests/__init__.py b/geo-replication/tests/__init__.py
new file mode 100644
index 00000000000..b4648b69645
--- /dev/null
+++ b/geo-replication/tests/__init__.py
@@ -0,0 +1,9 @@
+#
+# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
diff --git a/geo-replication/tests/unit/__init__.py b/geo-replication/tests/unit/__init__.py
new file mode 100644
index 00000000000..b4648b69645
--- /dev/null
+++ b/geo-replication/tests/unit/__init__.py
@@ -0,0 +1,9 @@
+#
+# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
diff --git a/geo-replication/tests/unit/test_gsyncdstatus.py b/geo-replication/tests/unit/test_gsyncdstatus.py
new file mode 100755
index 00000000000..9c1aa2ad4ad
--- /dev/null
+++ b/geo-replication/tests/unit/test_gsyncdstatus.py
@@ -0,0 +1,193 @@
+#!/usr/bin/python3
+#
+# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+import unittest
+import os
+import urllib
+
+from syncdaemon.gstatus import (GeorepStatus, set_monitor_status,
+ get_default_values,
+ MONITOR_STATUS, DEFAULT_STATUS,
+ STATUS_VALUES, CRAWL_STATUS_VALUES,
+ human_time, human_time_utc)
+
+
+class GeorepStatusTestCase(unittest.TestCase):
+ @classmethod
+ def setUpClass(cls):
+ cls.work_dir = os.path.dirname(os.path.abspath(__file__))
+ cls.monitor_status_file = os.path.join(cls.work_dir, "monitor.status")
+ cls.brick = "/exports/bricks/b1"
+ cls.status = GeorepStatus(cls.monitor_status_file, cls.brick)
+ cls.statusfile = os.path.join(cls.work_dir,
+ "brick_%s.status"
+ % urllib.quote_plus(cls.brick))
+
+ @classmethod
+ def tearDownClass(cls):
+ os.remove(cls.statusfile)
+ os.remove(cls.monitor_status_file)
+
+ def _filter_dict(self, inp, keys):
+ op = {}
+ for k in keys:
+ op[k] = inp.get(k, None)
+ return op
+
+ def test_monitor_status_file_created(self):
+ self.assertTrue(os.path.exists(self.monitor_status_file))
+
+ def test_status_file_created(self):
+ self.assertTrue(os.path.exists(self.statusfile))
+
+ def test_set_monitor_status(self):
+ for st in MONITOR_STATUS:
+ set_monitor_status(self.monitor_status_file, st)
+ self.assertTrue(self.status.get_monitor_status(), st)
+
+ def test_default_values_test(self):
+ self.assertTrue(get_default_values(), {
+ "slave_node": DEFAULT_STATUS,
+ "worker_status": DEFAULT_STATUS,
+ "last_synced": 0,
+ "last_synced_utc": 0,
+ "crawl_status": DEFAULT_STATUS,
+ "entry": 0,
+ "data": 0,
+ "metadata": 0,
+ "failures": 0,
+ "checkpoint_completed": False,
+ "checkpoint_time": 0,
+ "checkpoint_time_utc": 0,
+ "checkpoint_completion_time": 0,
+ "checkpoint_completion_time_utc": 0
+ })
+
+ def test_human_time(self):
+ self.assertTrue(human_time(1429174398), "2015-04-16 14:23:18")
+
+ def test_human_time_utc(self):
+ self.assertTrue(human_time_utc(1429174398), "2015-04-16 08:53:18")
+
+ def test_invalid_human_time(self):
+ self.assertTrue(human_time(142917439), DEFAULT_STATUS)
+ self.assertTrue(human_time("abcdef"), DEFAULT_STATUS)
+
+ def test_invalid_human_time_utc(self):
+ self.assertTrue(human_time_utc(142917439), DEFAULT_STATUS)
+ self.assertTrue(human_time_utc("abcdef"), DEFAULT_STATUS)
+
+ def test_worker_status(self):
+ set_monitor_status(self.monitor_status_file, "Started")
+ for st in STATUS_VALUES:
+ self.status.set_worker_status(st)
+ self.assertTrue(self.status.get_status()["worker_status"], st)
+
+ def test_crawl_status(self):
+ set_monitor_status(self.monitor_status_file, "Started")
+ self.status.set_active()
+ for st in CRAWL_STATUS_VALUES:
+ self.status.set_worker_crawl_status(st)
+ self.assertTrue(self.status.get_status()["crawl_status"], st)
+
+ def test_slave_node(self):
+ set_monitor_status(self.monitor_status_file, "Started")
+ self.status.set_active()
+ self.status.set_slave_node("fvm2")
+ self.assertTrue(self.status.get_status()["slave_node"], "fvm2")
+
+ self.status.set_worker_status("Passive")
+ self.status.set_slave_node("fvm2")
+ self.assertTrue(self.status.get_status()["slave_node"], "fvm2")
+
+ def test_active_worker_status(self):
+ set_monitor_status(self.monitor_status_file, "Started")
+ self.status.set_active()
+ self.assertTrue(self.status.get_status()["worker_status"], "Active")
+
+ def test_passive_worker_status(self):
+ set_monitor_status(self.monitor_status_file, "Started")
+ self.status.set_passive()
+ self.assertTrue(self.status.get_status()["worker_status"], "Passive")
+
+ def test_set_field(self):
+ set_monitor_status(self.monitor_status_file, "Started")
+ self.status.set_active()
+ self.status.set_field("entry", 42)
+ self.assertTrue(self.status.get_status()["entry"], 42)
+
+ def test_inc_value(self):
+ set_monitor_status(self.monitor_status_file, "Started")
+ self.status.set_active()
+ self.status.set_field("entry", 0)
+ self.status.inc_value("entry", 2)
+ self.assertTrue(self.status.get_status()["entry"], 2)
+
+ self.status.set_field("data", 0)
+ self.status.inc_value("data", 2)
+ self.assertTrue(self.status.get_status()["data"], 2)
+
+ self.status.set_field("meta", 0)
+ self.status.inc_value("meta", 2)
+ self.assertTrue(self.status.get_status()["meta"], 2)
+
+ self.status.set_field("failures", 0)
+ self.status.inc_value("failures", 2)
+ self.assertTrue(self.status.get_status()["failures"], 2)
+
+ def test_dec_value(self):
+ set_monitor_status(self.monitor_status_file, "Started")
+ self.status.set_active()
+
+ self.status.set_field("entry", 4)
+ self.status.inc_value("entry", 2)
+ self.assertTrue(self.status.get_status()["entry"], 2)
+
+ self.status.set_field("data", 4)
+ self.status.inc_value("data", 2)
+ self.assertTrue(self.status.get_status()["data"], 2)
+
+ self.status.set_field("meta", 4)
+ self.status.inc_value("meta", 2)
+ self.assertTrue(self.status.get_status()["meta"], 2)
+
+ self.status.set_field("failures", 4)
+ self.status.inc_value("failures", 2)
+ self.assertTrue(self.status.get_status()["failures"], 2)
+
+ def test_worker_status_when_monitor_status_created(self):
+ set_monitor_status(self.monitor_status_file, "Created")
+ for st in STATUS_VALUES:
+ self.status.set_worker_status(st)
+ self.assertTrue(self.status.get_status()["worker_status"],
+ "Created")
+
+ def test_worker_status_when_monitor_status_paused(self):
+ set_monitor_status(self.monitor_status_file, "Paused")
+ for st in STATUS_VALUES:
+ self.status.set_worker_status(st)
+ self.assertTrue(self.status.get_status()["worker_status"],
+ "Paused")
+
+ def test_worker_status_when_monitor_status_stopped(self):
+ set_monitor_status(self.monitor_status_file, "Stopped")
+ for st in STATUS_VALUES:
+ self.status.set_worker_status(st)
+ self.assertTrue(self.status.get_status()["worker_status"],
+ "Stopped")
+
+ def test_status_when_worker_status_active(self):
+ set_monitor_status(self.monitor_status_file, "Started")
+ self.status.set_active()
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/geo-replication/tests/unit/test_syncdutils.py b/geo-replication/tests/unit/test_syncdutils.py
new file mode 100644
index 00000000000..ff537ab2660
--- /dev/null
+++ b/geo-replication/tests/unit/test_syncdutils.py
@@ -0,0 +1,29 @@
+#
+# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+import unittest
+
+from syncdaemon import syncdutils
+
+
+class SyncdutilsTestCase(unittest.TestCase):
+ def setUp(self):
+ pass
+
+ def tearDown(self):
+ pass
+
+ def test_escape(self):
+ self.assertEqual(syncdutils.escape("http://gluster.org"),
+ "http%3A%2F%2Fgluster.org")
+
+ def test_unescape(self):
+ self.assertEqual(syncdutils.unescape("http%3A%2F%2Fgluster.org"),
+ "http://gluster.org")
diff --git a/geo-replication/tox.ini b/geo-replication/tox.ini
new file mode 100644
index 00000000000..57ff086b947
--- /dev/null
+++ b/geo-replication/tox.ini
@@ -0,0 +1,32 @@
+[tox]
+envlist = py26,py27,pep8
+
+[testenv]
+whitelist_externals=bash
+setenv = VIRTUAL_ENV={envdir}
+deps =
+ --download-cache={homedir}/.pipcache
+ -r{toxinidir}/test-requirements.txt
+changedir = {toxinidir}/tests/unit
+commands = nosetests -v --exe --with-xunit --with-coverage --cover-package syncdaemon --cover-erase --cover-xml --cover-html --cover-branches --with-html-output {posargs}
+
+[tox:jenkins]
+downloadcache = ~/cache/pip
+
+[testenv:pep8]
+changedir = {toxinidir}
+commands =
+ flake8
+ flake8 syncdaemon tests
+
+[testenv:cover]
+setenv = NOSE_WITH_COVERAGE=1
+
+[testenv:venv]
+commands = {posargs}
+
+[flake8]
+ignore = H
+builtins = _
+exclude = .venv,.tox,dist,doc,tests,*egg
+show-source = True \ No newline at end of file
diff --git a/geo-replication/unittests.sh b/geo-replication/unittests.sh
new file mode 100644
index 00000000000..d5dbd00bd4c
--- /dev/null
+++ b/geo-replication/unittests.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+cd $(dirname $0)/tests/unit
+nosetests -v --exe --with-coverage --cover-package \
+ syncdaemon --cover-erase --cover-html --cover-branches $@
+
+saved_status=$?
+rm -f .coverage
+exit $saved_status
diff --git a/glusterfs-api.pc.in b/glusterfs-api.pc.in
new file mode 100644
index 00000000000..4a2edb7bf07
--- /dev/null
+++ b/glusterfs-api.pc.in
@@ -0,0 +1,12 @@
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+includedir=@includedir@
+
+Name: glusterfs-api
+Description: GlusterFS API
+/* This is the API version, NOT package version */
+Version: @GFAPI_VERSION@
+Requires: @PKGCONFIG_UUID@
+Libs: -L${libdir} @GFAPI_LIBS@ -lgfapi -lglusterfs -lgfrpc -lgfxdr
+Cflags: -I${includedir} -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64 -D__USE_LARGEFILE64 -DUSE_POSIX_ACLS=@USE_POSIX_ACLS@
diff --git a/glusterfs-hadoop/0.20.2/conf/core-site.xml b/glusterfs-hadoop/0.20.2/conf/core-site.xml
deleted file mode 100644
index d7f75fca733..00000000000
--- a/glusterfs-hadoop/0.20.2/conf/core-site.xml
+++ /dev/null
@@ -1,38 +0,0 @@
-<?xml version="1.0"?>
-<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
-
-<!-- Put site-specific property overrides in this file. -->
-
-<configuration>
-
- <property>
- <name>fs.glusterfs.impl</name>
- <value>org.apache.hadoop.fs.glusterfs.GlusterFileSystem</value>
- </property>
-
- <property>
- <name>fs.default.name</name>
- <value>glusterfs://192.168.1.36:9000</value>
- </property>
-
- <property>
- <name>fs.glusterfs.volname</name>
- <value>volume-dist-rep</value>
- </property>
-
- <property>
- <name>fs.glusterfs.mount</name>
- <value>/mnt/glusterfs</value>
- </property>
-
- <property>
- <name>fs.glusterfs.server</name>
- <value>192.168.1.36</value>
- </property>
-
- <property>
- <name>quick.slave.io</name>
- <value>Off</value>
- </property>
-
-</configuration>
diff --git a/glusterfs-hadoop/0.20.2/pom.xml b/glusterfs-hadoop/0.20.2/pom.xml
deleted file mode 100644
index fe661d40fdc..00000000000
--- a/glusterfs-hadoop/0.20.2/pom.xml
+++ /dev/null
@@ -1,36 +0,0 @@
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
- <modelVersion>4.0.0</modelVersion>
- <groupId>org.apache.hadoop.fs.glusterfs</groupId>
- <artifactId>glusterfs</artifactId>
- <packaging>jar</packaging>
- <version>0.20.2-0.1</version>
- <name>glusterfs</name>
- <url>http://maven.apache.org</url>
- <dependencies>
- <dependency>
- <groupId>junit</groupId>
- <artifactId>junit</artifactId>
- <version>3.8.1</version>
- <scope>test</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-core</artifactId>
- <version>0.20.2</version>
- </dependency>
- </dependencies>
- <build>
- <plugins>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-compiler-plugin</artifactId>
- <version>2.3.2</version>
- <configuration>
- <source>1.5</source>
- <target>1.5</target>
- </configuration>
- </plugin>
- </plugins>
- </build>
-</project>
diff --git a/glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFSBrickClass.java b/glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFSBrickClass.java
deleted file mode 100644
index e633b8aae80..00000000000
--- a/glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFSBrickClass.java
+++ /dev/null
@@ -1,109 +0,0 @@
-/**
- *
- * Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com>
- * This file is part of GlusterFS.
- *
- * Licensed under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
- * implied. See the License for the specific language governing
- * permissions and limitations under the License.
- *
- */
-
-package org.apache.hadoop.fs.glusterfs;
-
-import java.io.*;
-
-public class GlusterFSBrickClass {
- String host;
- String exportedFile;
- long start;
- long end;
- boolean isChunked;
- int stripeSize; // Stripe size in bytes
- int nrStripes; // number of stripes
- int switchCount; // for SR, DSR - number of replicas of each stripe
- // -1 for others
-
- public GlusterFSBrickClass (String brick, long start, long len, boolean flag,
- int stripeSize, int nrStripes, int switchCount)
- throws IOException {
- this.host = brick2host(brick);
- this.exportedFile = brick2file(brick);
- this.start = start;
- this.end = start + len;
- this.isChunked = flag;
- this.stripeSize = stripeSize;
- this.nrStripes = nrStripes;
- this.switchCount = switchCount;
- }
-
- public boolean isChunked () {
- return isChunked;
- }
-
- public String brickIsLocal(String hostname) {
- String path = null;
- File f = null;
- if (host.equals(hostname))
- path = exportedFile;
-
- return path;
- }
-
- public int[] getBrickNumberInTree(long start, int len) {
- long end = len;
- int startNodeInTree = ((int) (start / stripeSize)) % nrStripes;
- int endNodeInTree = ((int) ((start + len) / stripeSize)) % nrStripes;
-
- if (startNodeInTree != endNodeInTree) {
- end = (start - (start % stripeSize)) + stripeSize;
- end -= start;
- }
-
- return new int[] {startNodeInTree, endNodeInTree, (int) end};
- }
-
- public boolean brickHasFilePart(int nodeInTree, int nodeLoc) {
- if (switchCount == -1)
- return (nodeInTree == nodeLoc);
-
- nodeInTree *= switchCount;
- for (int i = nodeInTree; i < (nodeInTree + switchCount); i++) {
- if (i == nodeLoc)
- return true;
- }
-
- return false;
- }
-
- public String brick2host (String brick)
- throws IOException {
- String[] hf = null;
-
- hf = brick.split(":");
- if (hf.length != 2)
- throw new IOException("Error getting hostname from brick");
-
- return hf[0];
- }
-
- public String brick2file (String brick)
- throws IOException {
- String[] hf = null;
-
- hf = brick.split(":");
- if (hf.length != 2)
- throw new IOException("Error getting hostname from brick");
-
- return hf[1];
- }
-
-}
diff --git a/glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFSBrickRepl.java b/glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFSBrickRepl.java
deleted file mode 100644
index 11454e6368a..00000000000
--- a/glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFSBrickRepl.java
+++ /dev/null
@@ -1,52 +0,0 @@
-/**
- *
- * Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com>
- * This file is part of GlusterFS.
- *
- * Licensed under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
- * implied. See the License for the specific language governing
- * permissions and limitations under the License.
- *
- */
-
-package org.apache.hadoop.fs.glusterfs;
-
-import java.io.*;
-
-public class GlusterFSBrickRepl {
- private String[] replHost;
- private long start;
- private long len;
- private int cnt;
-
- GlusterFSBrickRepl(int replCount, long start, long len) {
- this.replHost = new String[replCount];
- this.start = start;
- this.len = len;
- this.cnt = 0;
- }
-
- public void addHost (String host) {
- this.replHost[cnt++] = host;
- }
-
- public String[] getReplHosts () {
- return this.replHost;
- }
-
- public long getStartLen () {
- return this.start;
- }
-
- public long getOffLen () {
- return this.len;
- }
-}
diff --git a/glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFSXattr.java b/glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFSXattr.java
deleted file mode 100644
index 455dda97e9c..00000000000
--- a/glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFSXattr.java
+++ /dev/null
@@ -1,472 +0,0 @@
-/**
- *
- * Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com>
- * This file is part of GlusterFS.
- *
- * Licensed under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
- * implied. See the License for the specific language governing
- * permissions and limitations under the License.
- *
- */
-
-package org.apache.hadoop.fs.glusterfs;
-
-import java.net.*;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-import java.io.*;
-import java.util.HashMap;
-import java.util.TreeMap;
-import java.util.ArrayList;
-import java.util.Iterator;
-
-import org.apache.hadoop.fs.BlockLocation;
-
-public class GlusterFSXattr {
-
- public enum LAYOUT { D, S, R, DS, DR, SR, DSR }
- public enum CMD { GET_HINTS, GET_REPLICATION, GET_BLOCK_SIZE, CHECK_FOR_QUICK_IO }
-
- private static String hostname;
-
- public GlusterFSXattr() { }
-
- public static String brick2host (String brick)
- throws IOException {
- String[] hf = null;
-
- hf = brick.split(":");
- if (hf.length != 2) {
- System.out.println("brick not of format hostname:path");
- throw new IOException("Error getting hostname from brick");
- }
-
- return hf[0];
- }
-
- public static String brick2file (String brick)
- throws IOException {
- String[] hf = null;
-
- hf = brick.split(":");
- if (hf.length != 2) {
- System.out.println("brick not of format hostname:path");
- throw new IOException("Error getting hostname from brick");
- }
-
- return hf[1];
- }
-
- public static BlockLocation[] getPathInfo (String filename, long start, long len)
- throws IOException {
- HashMap<String, ArrayList<String>> vol = null;
- HashMap<String, Integer> meta = new HashMap<String, Integer>();
-
- vol = execGetFattr(filename, meta, CMD.GET_HINTS);
-
- return getHints(vol, meta, start, len, null);
- }
-
- public static long getBlockSize (String filename)
- throws IOException {
- HashMap<String, ArrayList<String>> vol = null;
- HashMap<String, Integer> meta = new HashMap<String, Integer>();
-
- vol = execGetFattr(filename, meta, CMD.GET_BLOCK_SIZE);
-
- if (!meta.containsKey("block-size"))
- return 0;
-
- return (long) meta.get("block-size");
-
- }
-
- public static short getReplication (String filename)
- throws IOException {
- HashMap<String, ArrayList<String>> vol = null;
- HashMap<String, Integer> meta = new HashMap<String, Integer>();
-
- vol = execGetFattr(filename, meta, CMD.GET_REPLICATION);
-
- return (short) getReplicationFromLayout(vol, meta);
-
- }
-
- public static TreeMap<Integer, GlusterFSBrickClass> quickIOPossible (String filename, long start,
- long len)
- throws IOException {
- String realpath = null;
- HashMap<String, ArrayList<String>> vol = null;
- HashMap<String, Integer> meta = new HashMap<String, Integer>();
- TreeMap<Integer, GlusterFSBrickClass> hnts = new TreeMap<Integer, GlusterFSBrickClass>();
-
- vol = execGetFattr(filename, meta, CMD.GET_HINTS);
- getHints(vol, meta, start, len, hnts);
-
- if (hnts.size() == 0)
- return null; // BOOM !!
-
- // DEBUG - dump hnts here
- return hnts;
- }
-
- public static HashMap<String, ArrayList<String>> execGetFattr (String filename,
- HashMap<String, Integer> meta,
- CMD cmd)
- throws IOException {
- Process p = null;
- BufferedReader brInput = null;
- String s = null;
- String cmdOut = null;
- String getfattrCmd = null;
- String xlator = null;
- String enclosingXl = null;
- String enclosingXlVol = null;
- String key = null;
- String layout = "";
- int rcount = 0;
- int scount = 0;
- int dcount = 0;
- int count = 0;
-
- HashMap<String, ArrayList<String>> vol = new HashMap<String, ArrayList<String>>();
-
- getfattrCmd = "getfattr -m . -n trusted.glusterfs.pathinfo " + filename;
-
- p = Runtime.getRuntime().exec(getfattrCmd);
- brInput = new BufferedReader(new InputStreamReader(p.getInputStream()));
-
- cmdOut = "";
- while ( (s = brInput.readLine()) != null )
- cmdOut += s;
-
- /**
- * TODO: Use a single regex for extracting posix paths as well
- * as xlator counts for layout matching.
- */
-
- Pattern pattern = Pattern.compile("<(.*?)[:\\(](.*?)>");
- Matcher matcher = pattern.matcher(cmdOut);
-
- Pattern p_px = Pattern.compile(".*?:(.*)");
- Matcher m_px;
- String gibberish_path;
-
- s = null;
- while (matcher.find()) {
- xlator = matcher.group(1);
- if (xlator.equalsIgnoreCase("posix")) {
- if (enclosingXl.equalsIgnoreCase("replicate"))
- count = rcount;
- else if (enclosingXl.equalsIgnoreCase("stripe"))
- count = scount;
- else if (enclosingXl.equalsIgnoreCase("distribute"))
- count = dcount;
- else
- throw new IOException("Unknown Translator: " + enclosingXl);
-
- key = enclosingXl + "-" + count;
-
- if (vol.get(key) == null)
- vol.put(key, new ArrayList<String>());
-
- gibberish_path = matcher.group(2);
-
- /* extract posix path from the gibberish string */
- m_px = p_px.matcher(gibberish_path);
- if (!m_px.find())
- throw new IOException("Cannot extract posix path");
-
- vol.get(key).add(m_px.group(1));
- continue;
- }
-
- enclosingXl = xlator;
- enclosingXlVol = matcher.group(2);
-
- if (xlator.equalsIgnoreCase("replicate"))
- if (rcount++ != 0)
- continue;
-
- if (xlator.equalsIgnoreCase("stripe")) {
- if (scount++ != 0)
- continue;
-
-
- Pattern ps = Pattern.compile("\\[(\\d+)\\]");
- Matcher ms = ps.matcher(enclosingXlVol);
-
- if (ms.find()) {
- if (((cmd == CMD.GET_BLOCK_SIZE) || (cmd == CMD.GET_HINTS))
- && (meta != null))
- meta.put("block-size", Integer.parseInt(ms.group(1)));
- } else
- throw new IOException("Cannot get stripe size");
- }
-
- if (xlator.equalsIgnoreCase("distribute"))
- if (dcount++ != 0)
- continue;
-
- layout += xlator.substring(0, 1);
- }
-
- if ((dcount == 0) && (scount == 0) && (rcount == 0))
- throw new IOException("Cannot get layout");
-
- if (meta != null) {
- meta.put("dcount", dcount);
- meta.put("scount", scount);
- meta.put("rcount", rcount);
- }
-
- vol.put("layout", new ArrayList<String>(1));
- vol.get("layout").add(layout);
-
- return vol;
- }
-
- static BlockLocation[] getHints (HashMap<String, ArrayList<String>> vol,
- HashMap<String, Integer> meta,
- long start, long len,
- TreeMap<Integer, GlusterFSBrickClass> hnts)
- throws IOException {
- String brick = null;
- String key = null;
- boolean done = false;
- int i = 0;
- int counter = 0;
- int stripeSize = 0;
- long stripeStart = 0;
- long stripeEnd = 0;
- int nrAllocs = 0;
- int allocCtr = 0;
- BlockLocation[] result = null;
- ArrayList<String> brickList = null;
- ArrayList<String> stripedBricks = null;
- Iterator<String> it = null;
-
- String[] blks = null;
- GlusterFSBrickRepl[] repl = null;
- int dcount, scount, rcount;
-
- LAYOUT l = LAYOUT.valueOf(vol.get("layout").get(0));
- dcount = meta.get("dcount");
- scount = meta.get("scount");
- rcount = meta.get("rcount");
-
- switch (l) {
- case D:
- key = "DISTRIBUTE-" + dcount;
- brick = vol.get(key).get(0);
-
- if (hnts == null) {
- result = new BlockLocation[1];
- result[0] = new BlockLocation(null, new String[] {brick2host(brick)}, start, len);
- } else
- hnts.put(0, new GlusterFSBrickClass(brick, start, len, false, -1, -1, -1));
- break;
-
- case R:
- case DR:
- /* just the name says it's striped - the volume isn't */
- stripedBricks = new ArrayList<String>();
-
- for (i = 1; i <= rcount; i++) {
- key = "REPLICATE-" + i;
- brickList = vol.get(key);
- it = brickList.iterator();
- while (it.hasNext()) {
- stripedBricks.add(it.next());
- }
- }
-
- nrAllocs = stripedBricks.size();
- if (hnts == null) {
- result = new BlockLocation[1];
- blks = new String[nrAllocs];
- }
-
- for (i = 0; i < nrAllocs; i++) {
- if (hnts == null)
- blks[i] = brick2host(stripedBricks.get(i));
- else
- hnts.put(i, new GlusterFSBrickClass(stripedBricks.get(i), start, len, false, -1, -1, -1));
- }
-
- if (hnts == null)
- result[0] = new BlockLocation(null, blks, start, len);
-
- break;
-
- case SR:
- case DSR:
- int rsize = 0;
- ArrayList<ArrayList<String>> replicas = new ArrayList<ArrayList<String>>();
-
- stripedBricks = new ArrayList<String>();
-
- if (rcount == 0)
- throw new IOException("got replicated volume with replication count 0");
-
- for (i = 1; i <= rcount; i++) {
- key = "REPLICATE-" + i;
- brickList = vol.get(key);
- it = brickList.iterator();
- replicas.add(i - 1, new ArrayList<String>());
- while (it.hasNext()) {
- replicas.get(i - 1).add(it.next());
- }
- }
-
- stripeSize = meta.get("block-size");
-
- nrAllocs = (int) (((len - start) / stripeSize) + 1);
- if (hnts == null) {
- result = new BlockLocation[nrAllocs];
- repl = new GlusterFSBrickRepl[nrAllocs];
- }
-
- // starting stripe position
- counter = (int) ((start / stripeSize) % rcount);
- stripeStart = start;
-
- key = null;
- int currAlloc = 0;
- boolean hntsDone = false;
- while ((stripeStart < len) && !done) {
- stripeEnd = (stripeStart - (stripeStart % stripeSize)) + stripeSize - 1;
- if (stripeEnd > start + len) {
- stripeEnd = start + len - 1;
- done = true;
- }
-
- rsize = replicas.get(counter).size();
-
- if (hnts == null)
- repl[allocCtr] = new GlusterFSBrickRepl(rsize, stripeStart, (stripeEnd - stripeStart));
-
- for (i = 0; i < rsize; i++) {
- brick = replicas.get(counter).get(i);
- currAlloc = (allocCtr * rsize) + i;
-
- if (hnts == null)
- repl[allocCtr].addHost(brick2host(brick));
- else
- if (currAlloc <= (rsize * rcount) - 1) {
- hnts.put(currAlloc, new GlusterFSBrickClass(brick, stripeStart,
- (stripeEnd - stripeStart),
- true, stripeSize, rcount, rsize));
- } else
- hntsDone = true;
- }
-
- if (hntsDone)
- break;
-
- stripeStart = stripeEnd + 1;
-
- allocCtr++;
- counter++;
-
- if (counter >= replicas.size())
- counter = 0;
- }
-
- if (hnts == null)
- for (int k = 0; k < nrAllocs; k++)
- result[k] = new BlockLocation(null, repl[k].getReplHosts(), repl[k].getStartLen(), repl[k].getOffLen());
-
- break;
-
- case S:
- case DS:
- if (scount == 0)
- throw new IOException("got striped volume with stripe count 0");
-
- stripedBricks = new ArrayList<String>();
- stripeSize = meta.get("block-size");
-
- key = "STRIPE-" + scount;
- brickList = vol.get(key);
- it = brickList.iterator();
- while (it.hasNext()) {
- stripedBricks.add(it.next());
- }
-
- nrAllocs = (int) ((len - start) / stripeSize) + 1;
- if (hnts == null)
- result = new BlockLocation[nrAllocs];
-
- // starting stripe position
- counter = (int) ((start / stripeSize) % stripedBricks.size());
- stripeStart = start;
-
- key = null;
- while ((stripeStart < len) && !done) {
- brick = stripedBricks.get(counter);
-
- stripeEnd = (stripeStart - (stripeStart % stripeSize)) + stripeSize - 1;
- if (stripeEnd > start + len) {
- stripeEnd = start + len - 1;
- done = true;
- }
-
- if (hnts == null)
- result[allocCtr] = new BlockLocation(null, new String[] {brick2host(brick)},
- stripeStart, (stripeEnd - stripeStart));
- else
- if (allocCtr <= stripedBricks.size()) {
- hnts.put(allocCtr, new GlusterFSBrickClass(brick, stripeStart, (stripeEnd - stripeStart),
- true, stripeSize, stripedBricks.size(), -1));
- } else
- break;
-
- stripeStart = stripeEnd + 1;
-
- counter++;
- allocCtr++;
-
- if (counter >= stripedBricks.size())
- counter = 0;
- }
-
- break;
- }
-
- return result;
- }
-
- /* TODO: use meta{dcount,scount,rcount} for checking */
- public static int getReplicationFromLayout (HashMap<String, ArrayList<String>> vol,
- HashMap<String, Integer> meta)
- throws IOException {
- int replication = 0;
- LAYOUT l = LAYOUT.valueOf(vol.get("layout").get(0));
-
- switch (l) {
- case D:
- case S:
- case DS:
- replication = 1;
- break;
-
- case R:
- case DR:
- case SR:
- case DSR:
- final String key = "REPLICATION-1";
- replication = vol.get(key).size();
- }
-
- return replication;
- }
-}
diff --git a/glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFUSEInputStream.java b/glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFUSEInputStream.java
deleted file mode 100644
index e92237aeea2..00000000000
--- a/glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFUSEInputStream.java
+++ /dev/null
@@ -1,205 +0,0 @@
-/**
- *
- * Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com>
- * This file is part of GlusterFS.
- *
- * Licensed under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
- * implied. See the License for the specific language governing
- * permissions and limitations under the License.
- *
- */
-
-package org.apache.hadoop.fs.glusterfs;
-
-import java.io.*;
-import java.util.TreeMap;
-
-import org.apache.hadoop.fs.FSInputStream;
-import org.apache.hadoop.fs.FileSystem;
-
-
-public class GlusterFUSEInputStream extends FSInputStream {
- File f;
- boolean lastActive;
- long pos;
- boolean closed;
- String thisHost;
- RandomAccessFile fuseInputStream;
- RandomAccessFile fsInputStream;
- GlusterFSBrickClass thisBrick;
- int nodeLocation;
- TreeMap<Integer, GlusterFSBrickClass> hnts;
-
- public GlusterFUSEInputStream (File f, TreeMap<Integer, GlusterFSBrickClass> hnts,
- String hostname) throws IOException {
- this.f = f;
- this.pos = 0;
- this.closed = false;
- this.hnts = hnts;
- this.thisHost = hostname;
- this.fsInputStream = null;
- this.fuseInputStream = new RandomAccessFile(f.getPath(), "r");
-
- this.lastActive = true; // true == FUSE, false == backed file
-
- String directFilePath = null;
- if (this.hnts != null) {
- directFilePath = findLocalFile(f.getPath(), this.hnts);
- if (directFilePath != null) {
- this.fsInputStream = new RandomAccessFile(directFilePath, "r");
- this.lastActive = !this.lastActive;
- }
- }
- }
-
- public String findLocalFile (String path, TreeMap<Integer, GlusterFSBrickClass> hnts) {
- int i = 0;
- String actFilePath = null;
- GlusterFSBrickClass gfsBrick = null;
-
- gfsBrick = hnts.get(0);
-
- /* do a linear search for the matching host not worrying
- about file stripes */
- for (i = 0; i < hnts.size(); i++) {
- gfsBrick = hnts.get(i);
- actFilePath = gfsBrick.brickIsLocal(this.thisHost);
- if (actFilePath != null) {
- this.thisBrick = gfsBrick;
- this.nodeLocation = i;
- break;
- }
- }
-
- return actFilePath;
- }
-
- public long getPos () throws IOException {
- return pos;
- }
-
- public synchronized int available () throws IOException {
- return (int) ((f.length()) - getPos());
- }
-
- public void seek (long pos) throws IOException {
- fuseInputStream.seek(pos);
- if (fsInputStream != null)
- fsInputStream.seek(pos);
- }
-
- public boolean seekToNewSource (long pos) throws IOException {
- return false;
- }
-
- public RandomAccessFile chooseStream (long start, int[] nlen)
- throws IOException {
- GlusterFSBrickClass gfsBrick = null;
- RandomAccessFile in = fuseInputStream;
- boolean oldActiveStream = lastActive;
- lastActive = true;
-
- if ((hnts != null) && (fsInputStream != null)) {
- gfsBrick = hnts.get(0);
- if (!gfsBrick.isChunked()) {
- in = fsInputStream;
- lastActive = false;
- } else {
- // find the current location in the tree and the amount of data it can serve
- int[] nodeInTree = thisBrick.getBrickNumberInTree(start, nlen[0]);
-
- // does this node hold the byte ranges we have been requested for ?
- if ((nodeInTree[2] != 0) && thisBrick.brickHasFilePart(nodeInTree[0], nodeLocation)) {
- in = fsInputStream;
- nlen[0] = nodeInTree[2]; // the amount of data that can be read from the stripe
- lastActive = false;
- }
- }
- }
-
- return in;
- }
-
- public synchronized int read () throws IOException {
- int byteRead = 0;
- RandomAccessFile in = null;
-
- if (closed)
- throw new IOException("Stream Closed.");
-
- int[] nlen = { 1 };
-
- in = chooseStream(getPos(), nlen);
-
- byteRead = in.read();
- if (byteRead >= 0) {
- pos++;
- syncStreams(1);
- }
-
- return byteRead;
- }
-
- public synchronized int read (byte buff[], int off, int len) throws
- IOException {
- int result = 0;
- RandomAccessFile in = null;
-
- if (closed)
- throw new IOException("Stream Closed.");
-
- int[] nlen = {len}; // hack to make len mutable
- in = chooseStream(pos, nlen);
-
- result = in.read(buff, off, nlen[0]);
- if (result > 0) {
- pos += result;
- syncStreams(result);
- }
-
- return result;
- }
-
- /**
- * TODO: use seek() insted of skipBytes(); skipBytes does I/O
- */
- public void syncStreams (int bytes) throws IOException {
- if ((hnts != null) && (hnts.get(0).isChunked()) && (fsInputStream != null))
- if (!this.lastActive)
- fuseInputStream.skipBytes(bytes);
- else
- fsInputStream.skipBytes(bytes);
- }
-
- public synchronized void close () throws IOException {
- if (closed)
- throw new IOException("Stream closed.");
-
- super.close();
- if (fsInputStream != null)
- fsInputStream.close();
- fuseInputStream.close();
-
- closed = true;
- }
-
- // Not supported - mark () and reset ()
-
- public boolean markSupported () {
- return false;
- }
-
- public void mark (int readLimit) {}
-
- public void reset () throws IOException {
- throw new IOException("Mark/Reset not supported.");
- }
-}
diff --git a/glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFUSEOutputStream.java b/glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFUSEOutputStream.java
deleted file mode 100644
index 5192a0a56cb..00000000000
--- a/glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFUSEOutputStream.java
+++ /dev/null
@@ -1,86 +0,0 @@
-/**
- *
- * Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com>
- * This file is part of GlusterFS.
- *
- * Licensed under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
- * implied. See the License for the specific language governing
- * permissions and limitations under the License.
- *
- */
-
-package org.apache.hadoop.fs.glusterfs;
-
-import java.io.*;
-
-import org.apache.hadoop.fs.FSOutputSummer;
-import org.apache.hadoop.fs.FileSystem;
-
-public class GlusterFUSEOutputStream extends OutputStream {
- File f;
- long pos;
- boolean closed;
- OutputStream fuseOutputStream;
-
- public GlusterFUSEOutputStream (String file, boolean append) throws
- IOException {
- this.f = new File(file); /* not needed ? */
- this.pos = 0;
- this.fuseOutputStream = new FileOutputStream(file, append);
- this.closed = false;
- }
-
- public long getPos () throws IOException {
- return pos;
- }
-
- public void write (int v) throws IOException {
- if (closed)
- throw new IOException("Stream closed.");
-
- byte[] b = new byte[1];
- b[0] = (byte) v;
-
- write(b, 0, 1);
- }
-
- public void write (byte b[]) throws IOException {
- if (closed)
- throw new IOException("Stream closed.");
-
- fuseOutputStream.write(b, 0, b.length);
- pos += (long) b.length;
- }
-
- public void write (byte b[], int off, int len) throws IOException {
- if (closed)
- throw new IOException("Stream closed.");
-
- fuseOutputStream.write(b, off, len);
- pos += (long) len;
- }
-
- public void flush () throws IOException {
- if (closed)
- throw new IOException("Stream closed.");
-
- fuseOutputStream.flush();
- }
-
- public void close () throws IOException {
- if (closed)
- throw new IOException("Stream closed.");
-
- flush();
- fuseOutputStream.close();
- closed = true;
- }
-}
diff --git a/glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFileSystem.java b/glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFileSystem.java
deleted file mode 100644
index b0501cced27..00000000000
--- a/glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFileSystem.java
+++ /dev/null
@@ -1,492 +0,0 @@
-/**
- *
- * Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com>
- * This file is part of GlusterFS.
- *
- * Licensed under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
- * implied. See the License for the specific language governing
- * permissions and limitations under the License.
- *
- */
-
-/**
- * Implements the Hadoop FileSystem Interface to allow applications to store
- * files on GlusterFS and run Map/Reduce jobs on the data.
- */
-
-package org.apache.hadoop.fs.glusterfs;
-
-import java.io.*;
-import java.net.*;
-
-import java.util.regex.*;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileUtil;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.BlockLocation;
-import org.apache.hadoop.fs.permission.FsPermission;
-import org.apache.hadoop.util.Progressable;
-
-import java.util.TreeMap;
-
-/*
- * This package provides interface for hadoop jobs (incl. Map/Reduce)
- * to access files in GlusterFS backed file system via FUSE mount
- */
-public class GlusterFileSystem extends FileSystem {
-
- private FileSystem glusterFs = null;
- private URI uri = null;
- private Path workingDir = null;
- private String glusterMount = null;
- private boolean mounted = false;
-
- /* for quick IO */
- private boolean quickSlaveIO = false;
-
- /* extended attribute class */
- private GlusterFSXattr xattr = null;
-
- /* hostname of this machine */
- private static String hostname;
-
- public GlusterFileSystem () {
-
- }
-
- public URI getUri () {
- return uri;
- }
-
- public boolean FUSEMount (String volname, String server, String mount)
- throws IOException, InterruptedException {
- boolean ret = true;
- int retVal = 0;
- Process p = null;
- String s = null;
- String mountCmd = null;
-
- mountCmd = "mount -t glusterfs " + server + ":" + "/" + volname + " " + mount;
-
- try {
- p = Runtime.getRuntime().exec(mountCmd);
-
- retVal = p.waitFor();
- if (retVal != 0)
- ret = false;
-
- } catch (IOException e) {
- System.out.println ("Problem mounting FUSE mount on: " + mount);
- e.printStackTrace();
- System.exit(-1);
- }
-
- return ret;
- }
-
- public void initialize (URI uri, Configuration conf) throws IOException {
- boolean ret = false;
- String volName = null;
- String remoteGFSServer = null;
- String needQuickRead = null;
-
- if (this.mounted)
- return;
-
- System.out.println("Initializing GlusterFS");
-
- try {
- volName = conf.get("fs.glusterfs.volname", "");
- glusterMount = conf.get("fs.glusterfs.mount", "");
- remoteGFSServer = conf.get("fs.glusterfs.server", "");
- needQuickRead = conf.get("quick.slave.io", "");
-
- /*
- * bail out if we do not have enough information to do a FUSE
- * mount
- */
- if ( (volName.length() == 0) || (remoteGFSServer.length() == 0) ||
- (glusterMount.length() == 0) )
- System.exit (-1);
-
- ret = FUSEMount(volName, remoteGFSServer, glusterMount);
- if (!ret) {
- System.out.println("Failed to initialize GlusterFS");
- System.exit(-1);
- }
-
- if ((needQuickRead.length() != 0)
- && (needQuickRead.equalsIgnoreCase("yes")
- || needQuickRead.equalsIgnoreCase("on")
- || needQuickRead.equals("1")))
- this.quickSlaveIO = true;
-
- this.mounted = true;
- this.glusterFs = FileSystem.getLocal(conf);
- this.workingDir = new Path(glusterMount);
- this.uri = URI.create(uri.getScheme() + "://" + uri.getAuthority());
-
- this.xattr = new GlusterFSXattr();
-
- InetAddress addr = InetAddress.getLocalHost();
- this.hostname = addr.getHostName();
-
- setConf(conf);
-
- } catch (Exception e) {
- e.printStackTrace();
- System.out.println("Unable to initialize GlusterFS");
- System.exit(-1);
- }
- }
-
- @Deprecated
- public String getName () {
- return getUri().toString();
- }
-
- public Path getWorkingDirectory () {
- return this.workingDir;
- }
-
- public Path getHomeDirectory () {
- return this.workingDir;
- }
-
- public Path makeAbsolute (Path path) {
- String pth = path.toUri().getPath();
- if (pth.startsWith(workingDir.toUri().getPath())) {
- return path;
- }
-
- return new Path(workingDir + "/" + pth);
- }
-
- public void setWorkingDirectory (Path dir) {
- this.workingDir = makeAbsolute(dir);
- }
-
- public boolean exists (Path path) throws IOException {
- Path absolute = makeAbsolute(path);
- File f = new File(absolute.toUri().getPath());
-
- return f.exists();
- }
-
- public boolean mkdirs (Path path, FsPermission permission
- ) throws IOException {
- boolean created = false;
- Path absolute = makeAbsolute(path);
- File f = new File(absolute.toUri().getPath());
-
- if (f.exists()) {
- System.out.println("Directory " + f.getPath() + " already exist");
- return true;
- }
-
- return f.mkdirs();
- }
-
- @Deprecated
- public boolean isDirectory (Path path) throws IOException {
- Path absolute = makeAbsolute(path);
- File f = new File(absolute.toUri().getPath());
-
- return f.isDirectory();
- }
-
- public boolean isFile (Path path) throws IOException {
- return !isDirectory(path);
- }
-
- public Path[] listPaths (Path path) throws IOException {
- Path absolute = makeAbsolute(path);
- File f = new File (absolute.toUri().getPath());
- String relPath = path.toUri().getPath();
- String[] fileList = null;
- Path[] filePath = null;
- int fileCnt = 0;
-
- fileList = f.list();
-
- filePath = new Path[fileList.length];
-
- for (; fileCnt < fileList.length; fileCnt++) {
- filePath[fileCnt] = new Path(relPath + "/" + fileList[fileCnt]);
- }
-
- return filePath;
- }
-
- public FileStatus[] listStatus (Path path) throws IOException {
- int fileCnt = 0;
- Path absolute = makeAbsolute(path);
- String relpath = path.toUri().getPath();
- String[] strFileList = null;
- FileStatus[] fileStatus = null;
- File f = new File(absolute.toUri().getPath());
-
- if (!f.exists()) {
- return null;
- }
-
- if (f.isFile())
- return new FileStatus[] {
- getFileStatus(path)
- };
-
- if (relpath.charAt(relpath.length() - 1) != '/')
- relpath += "/";
-
- strFileList = f.list();
-
- fileStatus = new FileStatus[strFileList.length];
-
- for (; fileCnt < strFileList.length; fileCnt++) {
- fileStatus[fileCnt] = getFileStatusFromFileString(relpath + strFileList[fileCnt]);
- }
-
- return fileStatus;
- }
-
- public FileStatus getFileStatusFromFileString (String path)
- throws IOException {
- Path nPath = new Path(path);
- return getFileStatus(nPath);
- }
-
- public FileStatus getFileStatus (Path path) throws IOException {
- Path absolute = makeAbsolute(path);
- File f = new File(absolute.toUri().getPath());
-
- if (!f.exists ())
- throw new FileNotFoundException("File " + f.getPath() + " does not exist.");
-
- if (f.isDirectory ())
- return new FileStatus(0, true, 1, 0, f.lastModified(), path.makeQualified(this));
- else
- return new FileStatus(f.length(), false, 0, getDefaultBlockSize(),
- f.lastModified(), path.makeQualified(this));
-
- }
-
- /*
- * creates a new file in glusterfs namespace. internally the file
- * descriptor is an instance of OutputStream class.
- */
- public FSDataOutputStream create (Path path, FsPermission permission,
- boolean overwrite, int bufferSize,
- short replication, long blockSize,
- Progressable progress)
- throws IOException {
- Path absolute = makeAbsolute(path);
- Path parent = null;
- File f = null;
- File fParent = null;
- FSDataOutputStream glusterFileStream = null;
-
- f = new File(absolute.toUri().getPath());
-
- if (f.exists ()) {
- if (overwrite)
- f.delete ();
- else
- throw new IOException(f.getPath() + " already exist");
- }
-
- parent = path.getParent();
- fParent = new File ((makeAbsolute(parent)).toUri().getPath());
- if ((parent != null) && (fParent != null) && (!fParent.exists()))
- if (!fParent.mkdirs())
- throw new IOException("cannot create parent directory: " + fParent.getPath());
-
- glusterFileStream = new FSDataOutputStream(new GlusterFUSEOutputStream
- (f.getPath(), false));
-
- return glusterFileStream;
- }
-
- /*
- * open the file in read mode (internally the file descriptor is an
- * instance of InputStream class).
- *
- * if quick read mode is set then read the file by by-passing FUSE
- * if we are on same slave where the file exist
- */
- public FSDataInputStream open (Path path) throws IOException {
- Path absolute = makeAbsolute(path);
- File f = new File(absolute.toUri().getPath());
- FSDataInputStream glusterFileStream = null;
- TreeMap<Integer, GlusterFSBrickClass> hnts = null;
-
- if (!f.exists())
- throw new IOException("File " + f.getPath() + " does not exist.");
-
- if (quickSlaveIO)
- hnts = xattr.quickIOPossible(f.getPath(), 0, f.length());
-
- glusterFileStream = new FSDataInputStream(new GlusterFUSEInputStream(f, hnts, hostname));
- return glusterFileStream;
- }
-
- public FSDataInputStream open (Path path, int bufferSize) throws IOException {
- return open(path);
- }
-
- public FSDataOutputStream append (Path f, int bufferSize, Progressable progress)
- throws IOException {
- throw new IOException ("append not supported (as yet).");
- }
-
- public boolean rename (Path src, Path dst) throws IOException {
- Path absoluteSrc = makeAbsolute(src);
- Path absoluteDst = makeAbsolute(dst);
-
- File fSrc = new File(absoluteSrc.toUri().getPath());
- File fDst = new File(absoluteDst.toUri().getPath());
-
- if (fDst.isDirectory()) {
- fDst = null;
- String newPath = absoluteDst.toUri().getPath() + "/" + fSrc.getName();
- fDst = new File(newPath);
- }
- return fSrc.renameTo(fDst);
- }
-
- @Deprecated
- public boolean delete (Path path) throws IOException {
- return delete(path, true);
- }
-
- public boolean delete (Path path, boolean recursive) throws IOException {
- Path absolute = makeAbsolute(path);
- File f = new File(absolute.toUri().getPath());
-
- if (f.isFile())
- return f.delete();
-
- FileStatus[] dirEntries = listStatus(absolute);
- if ((!recursive) && (dirEntries != null) && (dirEntries.length != 0))
- throw new IOException ("Directory " + path.toString() + " is not empty");
-
- if (dirEntries != null)
- for (int i = 0; i < dirEntries.length; i++)
- delete(new Path(absolute, dirEntries[i].getPath()), recursive);
-
- return f.delete();
- }
-
- @Deprecated
- public long getLength (Path path) throws IOException {
- Path absolute = makeAbsolute(path);
- File f = new File(absolute.toUri().getPath());
-
- if (!f.exists())
- throw new IOException(f.getPath() + " does not exist.");
-
- return f.length();
- }
-
- @Deprecated
- public short getReplication (Path path) throws IOException {
- Path absolute = makeAbsolute(path);
- File f = new File(absolute.toUri().getPath());
-
- if (!f.exists())
- throw new IOException(f.getPath() + " does not exist.");
-
- return xattr.getReplication(f.getPath());
- }
-
- public short getDefaultReplication (Path path) throws IOException {
- return getReplication(path);
- }
-
- public boolean setReplication (Path path, short replication)
- throws IOException {
- return true;
- }
-
- public long getBlockSize (Path path) throws IOException {
- long blkSz;
- Path absolute = makeAbsolute(path);
- File f = new File(absolute.toUri().getPath());
-
- blkSz = xattr.getBlockSize(f.getPath());
- if (blkSz == 0)
- blkSz = getLength(path);
-
- return blkSz;
- }
-
- public long getDefaultBlockSize () {
- return 1 << 26; /* default's from hdfs, kfs */
- }
-
- @Deprecated
- public void lock (Path path, boolean shared) throws IOException {
- }
-
- @Deprecated
- public void release (Path path) throws IOException {
- }
-
- public BlockLocation[] getFileBlockLocations (FileStatus file, long start, long len)
- throws IOException {
-
- Path absolute = makeAbsolute(file.getPath());
- File f = new File(absolute.toUri().getPath());
- BlockLocation[] result = null;
-
- if (file == null)
- return null;
-
- result = xattr.getPathInfo(f.getPath(), start, len);
- if (result == null) {
- System.out.println("Problem getting destination host for file "
- + f.getPath());
- return null;
- }
-
- return result;
- }
-
- // getFileBlockLocations (FileStatus, long, long) is called by hadoop
- public BlockLocation[] getFileBlockLocations (Path p, long start, long len)
- throws IOException {
- return null;
- }
-
- public void copyFromLocalFile (boolean delSrc, Path src, Path dst)
- throws IOException {
- FileUtil.copy(glusterFs, src, this, dst, delSrc, getConf());
- }
-
- public void copyToLocalFile (boolean delSrc, Path src, Path dst)
- throws IOException {
- FileUtil.copy(this, src, glusterFs, dst, delSrc, getConf());
- }
-
- public Path startLocalOutput (Path fsOutputFile, Path tmpLocalFile)
- throws IOException {
- return tmpLocalFile;
- }
-
- public void completeLocalOutput (Path fsOutputFile, Path tmpLocalFile)
- throws IOException {
- moveFromLocalFile(tmpLocalFile, fsOutputFile);
- }
-}
diff --git a/glusterfs-hadoop/0.20.2/src/test/java/org/apache/hadoop/fs/glusterfs/AppTest.java b/glusterfs-hadoop/0.20.2/src/test/java/org/apache/hadoop/fs/glusterfs/AppTest.java
deleted file mode 100644
index 21e188c52bc..00000000000
--- a/glusterfs-hadoop/0.20.2/src/test/java/org/apache/hadoop/fs/glusterfs/AppTest.java
+++ /dev/null
@@ -1,38 +0,0 @@
-package org.apache.hadoop.fs.glusterfs;
-
-import junit.framework.Test;
-import junit.framework.TestCase;
-import junit.framework.TestSuite;
-
-/**
- * Unit test for simple App.
- */
-public class AppTest
- extends TestCase
-{
- /**
- * Create the test case
- *
- * @param testName name of the test case
- */
- public AppTest( String testName )
- {
- super( testName );
- }
-
- /**
- * @return the suite of tests being tested
- */
- public static Test suite()
- {
- return new TestSuite( AppTest.class );
- }
-
- /**
- * Rigourous Test :-)
- */
- public void testApp()
- {
- assertTrue( true );
- }
-}
diff --git a/glusterfs-hadoop/0.20.2/tools/build-deploy-jar.py b/glusterfs-hadoop/0.20.2/tools/build-deploy-jar.py
deleted file mode 100755
index c20e53b39b2..00000000000
--- a/glusterfs-hadoop/0.20.2/tools/build-deploy-jar.py
+++ /dev/null
@@ -1,212 +0,0 @@
-#!/usr/bin/python
-
-##
- #
- # Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com>
- # This file is part of GlusterFS.
- #
- # Licensed under the Apache License, Version 2.0
- # (the "License"); you may not use this file except in compliance with
- # the License. You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
- # implied. See the License for the specific language governing
- # permissions and limitations under the License.
- #
- ##
-
-import getopt
-import glob
-import sys, os
-import shutil
-import subprocess, shlex
-
-def usage():
- print "usage: python build-deploy-jar.py [-b/--build] -d/--dir <hadoop-home> [-c/--core] [-m/--mapred] [-h/--henv]"
-
-def addSlash(s):
- if not (s[-1] == '/'):
- s = s + '/'
-
- return s
-
-def whereis(program):
- abspath = None
- for path in (os.environ.get('PATH', '')).split(':'):
- abspath = os.path.join(path, program)
- if os.path.exists(abspath) and not os.path.isdir(abspath):
- return abspath
-
- return None
-
-def getLatestJar(targetdir):
- glusterfsJar = glob.glob(targetdir + "*.jar")
- if len(glusterfsJar) == 0:
- print "No GlusterFS jar file found in %s ... exiting" % (targetdir)
- return None
-
- # pick up the latest jar file - just in case ...
- stat = latestJar = None
- ctime = 0
-
- for jar in glusterfsJar:
- stat = os.stat(jar)
- if stat.st_ctime > ctime:
- latestJar = jar
- ctime = stat.st_ctime
-
- return latestJar
-
-# build the glusterfs hadoop plugin using maven
-def build_jar(targetdir):
- location = whereis('mvn')
-
- if location == None:
- print "Cannot find maven to build glusterfs hadoop jar"
- print "please install maven or if it's already installed then fix your PATH environ"
- return None
-
- # do a clean packaging
- if os.path.exists(targetdir) and os.path.isdir(targetdir):
- print "Cleaning up directories ... [ " + targetdir + " ]"
- shutil.rmtree(targetdir)
-
- print "Building glusterfs jar ..."
- process = subprocess.Popen(['package'], shell=True,
- executable=location, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-
- process.wait()
- if not process.returncode == 0:
- print "Building glusterfs jar failed ... exiting"
- return None
-
- latestJar = getLatestJar(targetdir)
- return latestJar
-
-def rcopy(f, host, libdir):
- print " * doing remote copy to host %s" % (host)
- scpCmd = "scp %s %s:%s" % (f, host, libdir)
-
- os.system(scpCmd);
-
-def deployInSlave(f, confdir, libdir, cc, cm, he):
- slavefile = confdir + "slaves"
-
- ccFile = confdir + "core-site.xml"
- cmFile = confdir + "mapred-site.xml"
- heFile = confdir + "hadoop-env.sh"
-
- sf = open(slavefile, 'r')
- for host in sf:
- host = host.rstrip('\n')
- print " >>> Deploying %s on %s ..." % (os.path.basename(f), host)
- rcopy(f, host, libdir)
-
- if cc:
- print " >>> Deploying [%s] on %s ..." % (os.path.basename(ccFile), host)
- rcopy(ccFile, host, confdir)
-
- if cm:
- print " >>> Deploying [%s] on %s ..." % (os.path.basename(cmFile), host)
- rcopy(cmFile, host, confdir)
-
- if he:
- print " >>> Deploying [%s] on %s ..." % (os.path.basename(heFile), host)
- rcopy(heFile, host, confdir);
-
- print "<<< Done\n"
-
- sf.close()
-
-def deployInMaster(f, confdir, libdir):
- import socket
- masterfile = confdir + "masters"
-
- mf = open(masterfile, 'r')
- for host in mf:
- host = host.rstrip('\n')
- print " >>> Deploying %s on %s ..." % (os.path.basename(f), host)
- h = host
- try:
- socket.inet_aton(host)
- h = socket.getfqdn(host)
- except socket.error:
- pass
-
- if h == socket.gethostname() or h == 'localhost':
- # local cp
- print " * doing local copy"
- shutil.copy(f, libdir)
- else:
- # scp the file
- rcopy(f, h, libdir)
-
- print "<<< Done\n"
-
- mf.close()
-
-if __name__ == '__main__':
- opt = args = []
- try:
- opt, args = getopt.getopt(sys.argv[1:], "bd:cmh", ["build", "dir=", "core", "mapred", "henv"]);
- except getopt.GetoptError, err:
- print str(err)
- usage()
- sys.exit(1)
-
- needbuild = hadoop_dir = copyCore = copyMapred = copyHadoopEnv = None
-
- for k, v in opt:
- if k in ("-b", "--build"):
- needbuild = True
- elif k in ("-d", "--dir"):
- hadoop_dir = v
- elif k in ("-c", "--core"):
- copyCore = True
- elif k in ("-m", "--mapred"):
- copyMapred = True
- elif k in ("-h", "--henv"):
- copyHadoopEnv = True
- else:
- pass
-
- if hadoop_dir == None:
- print 'hadoop directory missing'
- usage()
- sys.exit(1)
-
- os.chdir(os.path.dirname(sys.argv[0]) + '/..')
- targetdir = './target/'
-
- if needbuild:
- jar = build_jar(targetdir)
- if jar == None:
- sys.exit(1)
- else:
- jar = getLatestJar(targetdir)
- if jar == None:
- print "Maybe you want to build it ? with -b option"
- sys.exit(1)
-
- print ""
- print "*** Deploying %s *** " % (jar)
-
- # copy jar to local hadoop distribution (master)
- hadoop_home = addSlash(hadoop_dir)
- if not (os.path.exists(hadoop_home) and os.path.isdir(hadoop_home)):
- print "path " + hadoop_home + " does not exist or is not adiretory";
- sys.exit(1);
-
- hadoop_conf = hadoop_home + "conf/"
- hadoop_lib = hadoop_home + "lib/"
-
- print " >>> Scanning hadoop master file for host(s) to deploy"
- deployInMaster(jar, hadoop_conf, hadoop_lib)
-
- print ""
- print " >>> Scanning hadoop slave file for host(s) to deploy"
- deployInSlave(jar, hadoop_conf, hadoop_lib, copyCore, copyMapred, copyHadoopEnv)
diff --git a/glusterfs-hadoop/COPYING b/glusterfs-hadoop/COPYING
deleted file mode 100644
index d6456956733..00000000000
--- a/glusterfs-hadoop/COPYING
+++ /dev/null
@@ -1,202 +0,0 @@
-
- Apache License
- Version 2.0, January 2004
- http://www.apache.org/licenses/
-
- TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
- 1. Definitions.
-
- "License" shall mean the terms and conditions for use, reproduction,
- and distribution as defined by Sections 1 through 9 of this document.
-
- "Licensor" shall mean the copyright owner or entity authorized by
- the copyright owner that is granting the License.
-
- "Legal Entity" shall mean the union of the acting entity and all
- other entities that control, are controlled by, or are under common
- control with that entity. For the purposes of this definition,
- "control" means (i) the power, direct or indirect, to cause the
- direction or management of such entity, whether by contract or
- otherwise, or (ii) ownership of fifty percent (50%) or more of the
- outstanding shares, or (iii) beneficial ownership of such entity.
-
- "You" (or "Your") shall mean an individual or Legal Entity
- exercising permissions granted by this License.
-
- "Source" form shall mean the preferred form for making modifications,
- including but not limited to software source code, documentation
- source, and configuration files.
-
- "Object" form shall mean any form resulting from mechanical
- transformation or translation of a Source form, including but
- not limited to compiled object code, generated documentation,
- and conversions to other media types.
-
- "Work" shall mean the work of authorship, whether in Source or
- Object form, made available under the License, as indicated by a
- copyright notice that is included in or attached to the work
- (an example is provided in the Appendix below).
-
- "Derivative Works" shall mean any work, whether in Source or Object
- form, that is based on (or derived from) the Work and for which the
- editorial revisions, annotations, elaborations, or other modifications
- represent, as a whole, an original work of authorship. For the purposes
- of this License, Derivative Works shall not include works that remain
- separable from, or merely link (or bind by name) to the interfaces of,
- the Work and Derivative Works thereof.
-
- "Contribution" shall mean any work of authorship, including
- the original version of the Work and any modifications or additions
- to that Work or Derivative Works thereof, that is intentionally
- submitted to Licensor for inclusion in the Work by the copyright owner
- or by an individual or Legal Entity authorized to submit on behalf of
- the copyright owner. For the purposes of this definition, "submitted"
- means any form of electronic, verbal, or written communication sent
- to the Licensor or its representatives, including but not limited to
- communication on electronic mailing lists, source code control systems,
- and issue tracking systems that are managed by, or on behalf of, the
- Licensor for the purpose of discussing and improving the Work, but
- excluding communication that is conspicuously marked or otherwise
- designated in writing by the copyright owner as "Not a Contribution."
-
- "Contributor" shall mean Licensor and any individual or Legal Entity
- on behalf of whom a Contribution has been received by Licensor and
- subsequently incorporated within the Work.
-
- 2. Grant of Copyright License. Subject to the terms and conditions of
- this License, each Contributor hereby grants to You a perpetual,
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable
- copyright license to reproduce, prepare Derivative Works of,
- publicly display, publicly perform, sublicense, and distribute the
- Work and such Derivative Works in Source or Object form.
-
- 3. Grant of Patent License. Subject to the terms and conditions of
- this License, each Contributor hereby grants to You a perpetual,
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable
- (except as stated in this section) patent license to make, have made,
- use, offer to sell, sell, import, and otherwise transfer the Work,
- where such license applies only to those patent claims licensable
- by such Contributor that are necessarily infringed by their
- Contribution(s) alone or by combination of their Contribution(s)
- with the Work to which such Contribution(s) was submitted. If You
- institute patent litigation against any entity (including a
- cross-claim or counterclaim in a lawsuit) alleging that the Work
- or a Contribution incorporated within the Work constitutes direct
- or contributory patent infringement, then any patent licenses
- granted to You under this License for that Work shall terminate
- as of the date such litigation is filed.
-
- 4. Redistribution. You may reproduce and distribute copies of the
- Work or Derivative Works thereof in any medium, with or without
- modifications, and in Source or Object form, provided that You
- meet the following conditions:
-
- (a) You must give any other recipients of the Work or
- Derivative Works a copy of this License; and
-
- (b) You must cause any modified files to carry prominent notices
- stating that You changed the files; and
-
- (c) You must retain, in the Source form of any Derivative Works
- that You distribute, all copyright, patent, trademark, and
- attribution notices from the Source form of the Work,
- excluding those notices that do not pertain to any part of
- the Derivative Works; and
-
- (d) If the Work includes a "NOTICE" text file as part of its
- distribution, then any Derivative Works that You distribute must
- include a readable copy of the attribution notices contained
- within such NOTICE file, excluding those notices that do not
- pertain to any part of the Derivative Works, in at least one
- of the following places: within a NOTICE text file distributed
- as part of the Derivative Works; within the Source form or
- documentation, if provided along with the Derivative Works; or,
- within a display generated by the Derivative Works, if and
- wherever such third-party notices normally appear. The contents
- of the NOTICE file are for informational purposes only and
- do not modify the License. You may add Your own attribution
- notices within Derivative Works that You distribute, alongside
- or as an addendum to the NOTICE text from the Work, provided
- that such additional attribution notices cannot be construed
- as modifying the License.
-
- You may add Your own copyright statement to Your modifications and
- may provide additional or different license terms and conditions
- for use, reproduction, or distribution of Your modifications, or
- for any such Derivative Works as a whole, provided Your use,
- reproduction, and distribution of the Work otherwise complies with
- the conditions stated in this License.
-
- 5. Submission of Contributions. Unless You explicitly state otherwise,
- any Contribution intentionally submitted for inclusion in the Work
- by You to the Licensor shall be under the terms and conditions of
- this License, without any additional terms or conditions.
- Notwithstanding the above, nothing herein shall supersede or modify
- the terms of any separate license agreement you may have executed
- with Licensor regarding such Contributions.
-
- 6. Trademarks. This License does not grant permission to use the trade
- names, trademarks, service marks, or product names of the Licensor,
- except as required for reasonable and customary use in describing the
- origin of the Work and reproducing the content of the NOTICE file.
-
- 7. Disclaimer of Warranty. Unless required by applicable law or
- agreed to in writing, Licensor provides the Work (and each
- Contributor provides its Contributions) on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
- implied, including, without limitation, any warranties or conditions
- of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
- PARTICULAR PURPOSE. You are solely responsible for determining the
- appropriateness of using or redistributing the Work and assume any
- risks associated with Your exercise of permissions under this License.
-
- 8. Limitation of Liability. In no event and under no legal theory,
- whether in tort (including negligence), contract, or otherwise,
- unless required by applicable law (such as deliberate and grossly
- negligent acts) or agreed to in writing, shall any Contributor be
- liable to You for damages, including any direct, indirect, special,
- incidental, or consequential damages of any character arising as a
- result of this License or out of the use or inability to use the
- Work (including but not limited to damages for loss of goodwill,
- work stoppage, computer failure or malfunction, or any and all
- other commercial damages or losses), even if such Contributor
- has been advised of the possibility of such damages.
-
- 9. Accepting Warranty or Additional Liability. While redistributing
- the Work or Derivative Works thereof, You may choose to offer,
- and charge a fee for, acceptance of support, warranty, indemnity,
- or other liability obligations and/or rights consistent with this
- License. However, in accepting such obligations, You may act only
- on Your own behalf and on Your sole responsibility, not on behalf
- of any other Contributor, and only if You agree to indemnify,
- defend, and hold each Contributor harmless for any liability
- incurred by, or claims asserted against, such Contributor by reason
- of your accepting any such warranty or additional liability.
-
- END OF TERMS AND CONDITIONS
-
- APPENDIX: How to apply the Apache License to your work.
-
- To apply the Apache License to your work, attach the following
- boilerplate notice, with the fields enclosed by brackets "[]"
- replaced with your own identifying information. (Don't include
- the brackets!) The text should be enclosed in the appropriate
- comment syntax for the file format. We also recommend that a
- file or class name and description of purpose be included on the
- same "printed page" as the copyright notice for easier
- identification within third-party archives.
-
- Copyright [yyyy] [name of copyright owner]
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
diff --git a/glusterfs-hadoop/README b/glusterfs-hadoop/README
deleted file mode 100644
index 3026f11c035..00000000000
--- a/glusterfs-hadoop/README
+++ /dev/null
@@ -1,182 +0,0 @@
-GlusterFS Hadoop Plugin
-=======================
-
-INTRODUCTION
-------------
-
-This document describes how to use GlusterFS (http://www.gluster.org/) as a backing store with Hadoop.
-
-
-REQUIREMENTS
-------------
-
-* Supported OS is GNU/Linux
-* GlusterFS and Hadoop installed on all machines in the cluster
-* Java Runtime Environment (JRE)
-* Maven (needed if you are building the plugin from source)
-* JDK (needed if you are building the plugin from source)
-
-NOTE: Plugin relies on two *nix command line utilities to function properly. They are:
-
-* mount: Used to mount GlusterFS volumes.
-* getfattr: Used to fetch Extended Attributes of a file
-
-Make sure they are installed on all hosts in the cluster and their locations are in $PATH
-environment variable.
-
-
-INSTALLATION
-------------
-
-** NOTE: Example below is for Hadoop version 0.20.2 ($GLUSTER_HOME/hdfs/0.20.2) **
-
-* Building the plugin from source [Maven (http://maven.apache.org/) and JDK is required to build the plugin]
-
- Change to glusterfs-hadoop directory in the GlusterFS source tree and build the plugin.
-
- # cd $GLUSTER_HOME/hdfs/0.20.2
- # mvn package
-
- On a successful build the plugin will be present in the `target` directory.
- (NOTE: version number will be a part of the plugin)
-
- # ls target/
- classes glusterfs-0.20.2-0.1.jar maven-archiver surefire-reports test-classes
- ^^^^^^^^^^^^^^^^^^
-
- Copy the plugin to lib/ directory in your $HADOOP_HOME dir.
-
- # cp target/glusterfs-0.20.2-0.1.jar $HADOOP_HOME/lib
-
- Copy the sample configuration file that ships with this source (conf/core-site.xml) to conf
- directory in your $HADOOP_HOME dir.
-
- # cp conf/core-site.xml $HADOOP_HOME/conf
-
-* Installing the plugin from RPM
-
- See the plugin documentation for installing from RPM.
-
-
-CLUSTER INSTALLATION
---------------------
-
- In case it is tedious to do the above steps(s) on all hosts in the cluster; use the build-and-deploy.py script to
- build the plugin in one place and deploy it (along with the configuration file on all other hosts).
-
- This should be run on the host which is that hadoop master [Job Tracker].
-
-* STEPS (You would have done Step 1 and 2 anyway while deploying Hadoop)
-
- 1. Edit conf/slaves file in your hadoop distribution; one line for each slave.
- 2. Setup password-less ssh b/w hadoop master and slave(s).
- 3. Edit conf/core-site.xml with all glusterfs related configurations (see CONFIGURATION)
- 4. Run the following
- # cd $GLUSTER_HOME/hdfs/0.20.2/tools
- # python ./build-and-deploy.py -b -d /path/to/hadoop/home -c
-
- This will build the plugin and copy it (and the config file) to all slaves (mentioned in $HADOOP_HOME/conf/slaves).
-
- Script options:
- -b : build the plugin
- -d : location of hadoop directory
- -c : deploy core-site.xml
- -m : deploy mapred-site.xml
- -h : deploy hadoop-env.sh
-
-
-CONFIGURATION
--------------
-
- All plugin configuration is done in a single XML file (core-site.xml) with <name><value> tags in each <property>
- block.
-
- Brief explanation of the tunables and the values they accept (change them where-ever needed) are mentioned below
-
- name: fs.glusterfs.impl
- value: org.apache.hadoop.fs.glusterfs.GlusterFileSystem
-
- The default FileSystem API to use (there is little reason to modify this).
-
- name: fs.default.name
- value: glusterfs://server:port
-
- The default name that hadoop uses to represent file as a URI (typically a server:port tuple). Use any host
- in the cluster as the server and any port number. This option has to be in server:port format for hadoop
- to create file URI; but is not used by plugin.
-
- name: fs.glusterfs.volname
- value: volume-dist-rep
-
- The volume to mount.
-
-
- name: fs.glusterfs.mount
- value: /mnt/glusterfs
-
- This is the directory that the plugin will use to mount (FUSE mount) the volume.
-
- name: fs.glusterfs.server
- value: 192.168.1.36, hackme.zugzug.org
-
- To mount a volume the plugin needs to know the hostname or the IP of a GlusterFS server in the cluster.
- Mention it here.
-
- name: quick.slave.io
- value: [On/Off], [Yes/No], [1/0]
-
- NOTE: This option is not tested as of now.
-
- This is a performance tunable option. Hadoop schedules jobs to hosts that contain the file data part. The job
- then does I/O on the file (via FUSE in case of GlusterFS). When this option is set, the plugin will try to
- do I/O directly from the backed filesystem (ext3, ext4 etc..) the file resides on. Hence read performance
- will improve and job would run faster.
-
-
-USAGE
------
-
- Once configured, start Hadoop Map/Reduce daemons
-
- # cd $HADOOP_HOME
- # ./bin/start-mapred.sh
-
- If the map/reduce job/task trackers are up, all I/O will be done to GlusterFS.
-
-
-FOR HACKERS
------------
-
-* Source Layout
-
-** version specific: hdfs/<version> **
-./src
-./src/main
-./src/main/java
-./src/main/java/org
-./src/main/java/org/apache
-./src/main/java/org/apache/hadoop
-./src/main/java/org/apache/hadoop/fs
-./src/main/java/org/apache/hadoop/fs/glusterfs
-./src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFSBrickClass.java
-./src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFSXattr.java <--- Fetch/Parse Extended Attributes of a file
-./src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFUSEInputStream.java <--- Input Stream (instantiated during open() calls; quick read from backed FS)
-./src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFSBrickRepl.java
-./src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFUSEOutputStream.java <--- Output Stream (instantiated during creat() calls)
-./src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFileSystem.java <--- Entry Point for the plugin (extends Hadoop FileSystem class)
-./src/test
-./src/test/java
-./src/test/java/org
-./src/test/java/org/apache
-./src/test/java/org/apache/hadoop
-./src/test/java/org/apache/hadoop/fs
-./src/test/java/org/apache/hadoop/fs/glusterfs
-./src/test/java/org/apache/hadoop/fs/glusterfs/AppTest.java <--- Your test cases go here (if any :-))
-./tools/build-deploy-jar.py <--- Build and Deployment Script
-./conf
-./conf/core-site.xml <--- Sample configuration file
-./pom.xml <--- build XML file (used by maven)
-
-** toplevel: hdfs/ **
-./COPYING <--- License
-./README <--- This file
diff --git a/glusterfs.spec.in b/glusterfs.spec.in
index 2863e27e2d2..b6d63146e14 100644
--- a/glusterfs.spec.in
+++ b/glusterfs.spec.in
@@ -1,389 +1,2049 @@
-# if you make changes, the it is advised to increment this number, and provide
-# a descriptive suffix to identify who owns or what the change represents
-# e.g. release_version 2.MSW
-%global release 1%{?dist}
-%global _sharedstatedir /var/lib
+%global _hardened_build 1
+%global _for_fedora_koji_builds 0
-# if you wish to compile an rpm without rdma support, compile like this...
-# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --without rdma
-%{?_without_rdma:%global _without_rdma --disable-ibverbs}
+# uncomment and add '%' to use the prereltag for pre-releases
+# %%global prereltag qa3
-# No RDMA Support on x390(x)
-%ifarch s390 s390x
-%global _without_rdma --disable-ibverbs
+##-----------------------------------------------------------------------------
+## All argument definitions should be placed here and keep them sorted
+##
+
+# asan
+# if you wish to compile an rpm with address sanitizer...
+# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --with asan
+%{?_with_asan:%global _with_asan --enable-asan}
+
+%if ( 0%{?rhel} && 0%{?rhel} < 7 )
+%global _with_asan %{nil}
%endif
+# cmocka
+# if you wish to compile an rpm with cmocka unit testing...
+# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --with cmocka
+%{?_with_cmocka:%global _with_cmocka --enable-cmocka}
+
+# debug
+# if you wish to compile an rpm with debugging...
+# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --with debug
+%{?_with_debug:%global _with_debug --enable-debug}
+
+# epoll
# if you wish to compile an rpm without epoll...
# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --without epoll
%{?_without_epoll:%global _without_epoll --disable-epoll}
-# if you wish to compile an rpm with fusermount...
-# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --with fusermount
-%{?_with_fusermount:%global _with_fusermount --enable-fusermount}
-
-%global version @PACKAGE_VERSION@
-%if "%{version}" >= "3.2"
-%global _can_georeplicate 1
+# fusermount
+# if you wish to compile an rpm without fusermount...
+# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --without fusermount
+%{?_without_fusermount:%global _without_fusermount --disable-fusermount}
+# geo-rep
# if you wish to compile an rpm without geo-replication support, compile like this...
# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --without georeplication
-%{?_without_georeplication:%global _without_georeplication --disable-geo-replication}
-%endif
-
-Summary: Cluster File System
-Name: @PACKAGE_NAME@
-Version: %{version}
-Release: %{release}
-License: GPLv2 or LGPLv3+
-Group: System Environment/Base
-Vendor: Red Hat Inc
-Packager: @PACKAGE_BUGREPORT@
-URL: http://www.gluster.org/docs/index.php/GlusterFS
-Source0: @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz
-BuildRoot: %(mktemp -ud %{_tmppath}/%{name}-%{version}-%{release}-XXXXXX)
-Requires(post): /sbin/chkconfig
-Requires(preun): /sbin/service, /sbin/chkconfig
-Requires(postun): /sbin/service
+%{?_without_georeplication:%global _without_georeplication --disable-georeplication}
+
+# gnfs
+# if you wish to compile an rpm with the legacy gNFS server xlator
+# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --with gnfs
+%{?_with_gnfs:%global _with_gnfs --enable-gnfs}
+
+# ipv6default
+# if you wish to compile an rpm with IPv6 default...
+# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --with ipv6default
+%{?_with_ipv6default:%global _with_ipv6default --with-ipv6-default}
+
+# libtirpc
+# if you wish to compile an rpm without TIRPC (i.e. use legacy glibc rpc)
+# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --without libtirpc
+%{?_without_libtirpc:%global _without_libtirpc --without-libtirpc}
+
+# Do not use libtirpc on EL6, it does not have xdr_uint64_t() and xdr_uint32_t
+# Do not use libtirpc on EL7, it does not have xdr_sizeof()
+%if ( 0%{?rhel} && 0%{?rhel} <= 7 )
+%global _without_libtirpc --without-libtirpc
+%endif
+
+
+# ocf
+# if you wish to compile an rpm without the OCF resource agents...
+# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --without ocf
+%{?_without_ocf:%global _without_ocf --without-ocf}
+
+# server
+# if you wish to build rpms without server components, compile like this
+# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --without server
+%{?_without_server:%global _without_server --without-server}
+
+# disable server components forcefully as rhel <= 6
+%if ( 0%{?rhel} && 0%{?rhel} <= 6 )
+%global _without_server --without-server
+%endif
+
+# syslog
+# if you wish to build rpms without syslog logging, compile like this
+# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --without syslog
+%{?_without_syslog:%global _without_syslog --disable-syslog}
+
+# disable syslog forcefully as rhel <= 6 doesn't have rsyslog or rsyslog-mmcount
+# Fedora deprecated syslog, see
+# https://fedoraproject.org/wiki/Changes/NoDefaultSyslog
+# (And what about RHEL7?)
+%if ( 0%{?fedora} && 0%{?fedora} >= 20 ) || ( 0%{?rhel} && 0%{?rhel} <= 6 )
+%global _without_syslog --disable-syslog
+%endif
+
+# tsan
+# if you wish to compile an rpm with thread sanitizer...
+# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --with tsan
+%{?_with_tsan:%global _with_tsan --enable-tsan}
+
+%if ( 0%{?rhel} && 0%{?rhel} < 7 )
+%global _with_tsan %{nil}
+%endif
+
+# valgrind
+# if you wish to compile an rpm to run all processes under valgrind...
+# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --with valgrind
+%{?_with_valgrind:%global _with_valgrind --enable-valgrind}
+
+##-----------------------------------------------------------------------------
+## All %%global definitions should be placed here and keep them sorted
+##
+
+# selinux booleans whose defalut value needs modification
+# these booleans will be consumed by "%%selinux_set_booleans" macro.
+%if ( 0%{?rhel} && 0%{?rhel} >= 8 )
+%global selinuxbooleans rsync_full_access=1 rsync_client=1
+%endif
+
+%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} > 6 )
+%global _with_systemd true
+%endif
+
+%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} >= 7 )
+%global _with_firewalld --enable-firewalld
+%endif
+
+%if 0%{?_tmpfilesdir:1}
+%global _with_tmpfilesdir --with-tmpfilesdir=%{_tmpfilesdir}
+%else
+%global _with_tmpfilesdir --without-tmpfilesdir
+%endif
+
+# without server should also disable some server-only components
+%if 0%{?_without_server:1}
+%global _without_events --disable-events
+%global _without_georeplication --disable-georeplication
+%global _with_gnfs %{nil}
+%global _without_ocf --without-ocf
+%endif
+
+%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} > 7 )
+%global _usepython3 1
+%global _pythonver 3
+%else
+%global _usepython3 0
+%global _pythonver 2
+%endif
+
+# From https://fedoraproject.org/wiki/Packaging:Python#Macros
+%if ( 0%{?rhel} && 0%{?rhel} <= 6 )
+%{!?python2_sitelib: %global python2_sitelib %(python2 -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())")}
+%{!?python2_sitearch: %global python2_sitearch %(python2 -c "from distutils.sysconfig import get_python_lib; print(get_python_lib(1))")}
+%global _rundir %{_localstatedir}/run
+%endif
+
+%if ( 0%{?_with_systemd:1} )
+%global service_start() /bin/systemctl --quiet start %1.service || : \
+%{nil}
+%global service_stop() /bin/systemctl --quiet stop %1.service || :\
+%{nil}
+%global service_install() install -D -p -m 0644 %1.service %{buildroot}%2 \
+%{nil}
+# can't seem to make a generic macro that works
+%global glusterd_svcfile %{_unitdir}/glusterd.service
+%global glusterfsd_svcfile %{_unitdir}/glusterfsd.service
+%global glusterta_svcfile %{_unitdir}/gluster-ta-volume.service
+%global glustereventsd_svcfile %{_unitdir}/glustereventsd.service
+%global glusterfssharedstorage_svcfile %{_unitdir}/glusterfssharedstorage.service
+%else
+%global systemd_post() /sbin/chkconfig --add %1 >/dev/null 2>&1 || : \
+%{nil}
+%global systemd_preun() /sbin/chkconfig --del %1 >/dev/null 2>&1 || : \
+%{nil}
+%global systemd_postun_with_restart() /sbin/service %1 condrestart >/dev/null 2>&1 || : \
+%{nil}
+%global service_start() /sbin/service %1 start >/dev/null 2>&1 || : \
+%{nil}
+%global service_stop() /sbin/service %1 stop >/dev/null 2>&1 || : \
+%{nil}
+%global service_install() install -D -p -m 0755 %1.init %{buildroot}%2 \
+%{nil}
+# can't seem to make a generic macro that works
+%global glusterd_svcfile %{_sysconfdir}/init.d/glusterd
+%global glusterfsd_svcfile %{_sysconfdir}/init.d/glusterfsd
+%global glustereventsd_svcfile %{_sysconfdir}/init.d/glustereventsd
+%endif
+
+%{!?_pkgdocdir: %global _pkgdocdir %{_docdir}/%{name}-%{version}}
+
+# We do not want to generate useless provides and requires for xlator
+# .so files to be set for glusterfs packages.
+# Filter all generated:
+#
+# TODO: RHEL5 does not have a convenient solution
+%if ( 0%{?rhel} == 6 )
+# filter_setup exists in RHEL6 only
+%filter_provides_in %{_libdir}/glusterfs/%{version}/
+%global __filter_from_req %{?__filter_from_req} | grep -v -P '^(?!lib).*\.so.*$'
+%filter_setup
+%else
+# modern rpm and current Fedora do not generate requires when the
+# provides are filtered
+%global __provides_exclude_from ^%{_libdir}/glusterfs/%{version}/.*$
+%endif
+
+
+##-----------------------------------------------------------------------------
+## All package definitions should be placed here in alphabetical order
+##
+Summary: Distributed File System
+%if ( 0%{_for_fedora_koji_builds} )
+Name: glusterfs
+Version: 3.8.0
+Release: 0.1%{?prereltag:.%{prereltag}}%{?dist}
+%else
+Name: @PACKAGE_NAME@
+Version: @PACKAGE_VERSION@
+Release: 0.@PACKAGE_RELEASE@%{?dist}
+%endif
+License: GPLv2 or LGPLv3+
+URL: http://docs.gluster.org/
+%if ( 0%{_for_fedora_koji_builds} )
+Source0: http://bits.gluster.org/pub/gluster/glusterfs/src/glusterfs-%{version}%{?prereltag}.tar.gz
+Source1: glusterd.sysconfig
+Source2: glusterfsd.sysconfig
+Source7: glusterfsd.service
+Source8: glusterfsd.init
+%else
+Source0: @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz
+%endif
+
+BuildRoot: %(mktemp -ud %{_tmppath}/%{name}-%{version}-%{release}-XXXXXX)
+
+Requires(pre): shadow-utils
+%if ( 0%{?_with_systemd:1} )
+BuildRequires: systemd
+%endif
+
+Requires: libglusterfs0%{?_isa} = %{version}-%{release}
+Requires: libgfrpc0%{?_isa} = %{version}-%{release}
+Requires: libgfxdr0%{?_isa} = %{version}-%{release}
+%if ( 0%{?_with_systemd:1} )
+%{?systemd_requires}
+%endif
+%if 0%{?_with_asan:1} && !( 0%{?rhel} && 0%{?rhel} < 7 )
+BuildRequires: libasan
+%endif
+%if 0%{?_with_tsan:1} && !( 0%{?rhel} && 0%{?rhel} < 7 )
+BuildRequires: libtsan
+%endif
+BuildRequires: bison flex
+BuildRequires: gcc make libtool
+BuildRequires: ncurses-devel readline-devel
+BuildRequires: libxml2-devel openssl-devel
+BuildRequires: libaio-devel libacl-devel
+BuildRequires: python%{_pythonver}-devel
+%if ( 0%{?rhel} && 0%{?rhel} < 8 )
+BuildRequires: python-ctypes
+%endif
+%if ( 0%{?_with_ipv6default:1} ) || ( 0%{!?_without_libtirpc:1} )
+BuildRequires: libtirpc-devel
+%endif
+%if ( 0%{?fedora} && 0%{?fedora} > 27 ) || ( 0%{?rhel} && 0%{?rhel} > 7 )
+BuildRequires: rpcgen
+%endif
+BuildRequires: userspace-rcu-devel >= 0.7
+%if ( 0%{?rhel} && 0%{?rhel} <= 6 )
+BuildRequires: automake
+%endif
+BuildRequires: libuuid-devel
+%if ( 0%{?_with_cmocka:1} )
+BuildRequires: libcmocka-devel >= 1.0.1
+%endif
+%if ( 0%{!?_without_georeplication:1} )
+BuildRequires: libattr-devel
+%endif
-BuildRequires: bison flex
-BuildRequires: gcc make automake libtool
-BuildRequires: ncurses-devel readline-devel openssl-devel
-BuildRequires: libxml2-devel
-BuildRequires: python-ctypes
-%if 0%{?suse_version}
-BuildRequires: python-devel
+%if (0%{?_with_firewalld:1})
+BuildRequires: firewalld
%endif
-Requires: openssl
-Obsoletes: %{name}-libs <= 2.0.0
Obsoletes: %{name}-common < %{version}-%{release}
Obsoletes: %{name}-core < %{version}-%{release}
-Provides: %{name}-libs = %{version}-%{release}
+Obsoletes: %{name}-ganesha
+Obsoletes: %{name}-rdma < %{version}-%{release}
+%if ( 0%{!?_with_gnfs:1} )
+Obsoletes: %{name}-gnfs < %{version}-%{release}
+%endif
Provides: %{name}-common = %{version}-%{release}
Provides: %{name}-core = %{version}-%{release}
%description
-GlusterFS is a clustered file-system capable of scaling to several
-peta-bytes. It aggregates various storage bricks over Infiniband RDMA
-or TCP/IP interconnect into one large parallel network file
-system. GlusterFS is one of the most sophisticated file system in
-terms of features and extensibility. It borrows a powerful concept
-called Translators from GNU Hurd kernel. Much of the code in GlusterFS
-is in userspace and easily manageable.
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
+
+This package includes the glusterfs binary, the glusterfsd daemon and the
+libglusterfs and glusterfs translator modules common to both GlusterFS server
+and client framework.
+
+%package cli
+Summary: GlusterFS CLI
+Requires: libglusterfs0%{?_isa} = %{version}-%{release}
+Requires: libglusterd0%{?_isa} = %{version}-%{release}
+
+%description cli
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
+
+This package provides the GlusterFS CLI application and its man page
+
+%package cloudsync-plugins
+Summary: Cloudsync Plugins
+BuildRequires: libcurl-devel
+
+%description cloudsync-plugins
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
+
+This package provides cloudsync plugins for archival feature.
+
+%package extra-xlators
+Summary: Extra Gluster filesystem Translators
+# We need python-gluster rpm for gluster module's __init__.py in Python
+# site-packages area
+Requires: python%{_pythonver}-gluster = %{version}-%{release}
+Requires: python%{_pythonver}
+
+%description extra-xlators
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
+
+This package provides extra filesystem Translators, such as Glupy,
+for GlusterFS.
+
+%package fuse
+Summary: Fuse client
+BuildRequires: fuse-devel
+Requires: attr
+Requires: psmisc
+
+Requires: %{name}%{?_isa} = %{version}-%{release}
+Requires: %{name}-client-xlators%{?_isa} = %{version}-%{release}
+
+Obsoletes: %{name}-client < %{version}-%{release}
+Provides: %{name}-client = %{version}-%{release}
-This package includes libraries and utility scripts.
+%description fuse
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
+
+This package provides support to FUSE based clients and inlcudes the
+glusterfs(d) binary.
+
+%if ( 0%{!?_without_server:1} )
+%package ganesha
+Summary: NFS-Ganesha configuration
+Group: Applications/File
+
+Requires: %{name}-server%{?_isa} = %{version}-%{release}
+Requires: nfs-ganesha-selinux >= 2.7.6
+Requires: nfs-ganesha-gluster >= 2.7.6
+Requires: pcs >= 0.10.0
+Requires: resource-agents >= 4.2.0
+Requires: dbus
+
+%if ( 0%{?rhel} && 0%{?rhel} == 6 )
+Requires: cman, pacemaker, corosync
+%endif
-%if 0%{!?_without_rdma:1}
-%package rdma
-Summary: GlusterFS rdma support for ib-verbs
-License: GPLv2 or LGPLv3+
-Group: Applications/File
-BuildRequires: libibverbs-devel
+%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} > 5 )
+# we need portblock resource-agent in 3.9.5 and later.
+Requires: net-tools
+%endif
-Requires: %{name} = %{version}-%{release}
+%if ( 0%{?fedora} && 0%{?fedora} > 25 || ( 0%{?rhel} && 0%{?rhel} > 6 ) )
+%if ( 0%{?rhel} && 0%{?rhel} < 8 )
+Requires: selinux-policy >= 3.13.1-160
+Requires(post): policycoreutils-python
+Requires(postun): policycoreutils-python
+%else
+Requires(post): policycoreutils-python-utils
+Requires(postun): policycoreutils-python-utils
+%endif
+%endif
-%description rdma
-GlusterFS is a clustered file-system capable of scaling to several
-peta-bytes. It aggregates various storage bricks over Infiniband RDMA
+%description ganesha
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over Infiniband RDMA
or TCP/IP interconnect into one large parallel network file
-system. GlusterFS is one of the most sophisticated file system in
+system. GlusterFS is one of the most sophisticated file systems in
terms of features and extensibility. It borrows a powerful concept
called Translators from GNU Hurd kernel. Much of the code in GlusterFS
-is in userspace and easily manageable.
+is in user space and easily manageable.
-This package provides support to ib-verbs library.
+This package provides the configuration and related files for using
+NFS-Ganesha as the NFS server using GlusterFS
%endif
-%if 0%{?_can_georeplicate}
-%if 0%{!?_without_georeplication:1}
+%if ( 0%{!?_without_georeplication:1} )
%package geo-replication
-Summary: GlusterFS Geo-replication
-License: GPLv3+
-Group: Applications/File
-Requires: %{name} = %{version}-%{release} , python-ctypes , rsync >= 3.0.0
+Summary: GlusterFS Geo-replication
+Requires: %{name}%{?_isa} = %{version}-%{release}
+Requires: %{name}-server%{?_isa} = %{version}-%{release}
+Requires: python%{_pythonver}
+Requires: python%{_pythonver}-prettytable
+Requires: python%{_pythonver}-gluster = %{version}-%{release}
+
+Requires: rsync
+Requires: util-linux
+# required for setting selinux bools
+%if ( 0%{?rhel} && 0%{?rhel} >= 8 )
+Requires(post): policycoreutils-python-utils
+Requires(postun): policycoreutils-python-utils
+Requires: selinux-policy-targeted
+Requires(post): selinux-policy-targeted
+BuildRequires: selinux-policy-devel
+%endif
%description geo-replication
-GlusterFS is a clustered file-system capable of scaling to several
-peta-bytes. It aggregates various storage bricks over Infiniband RDMA
-or TCP/IP interconnect into one large parallel network file
-system. GlusterFS is one of the most sophisticated file system in
-terms of features and extensibility. It borrows a powerful concept
-called Translators from GNU Hurd kernel. Much of the code in GlusterFS
-is in userspace and easily manageable.
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
This package provides support to geo-replication.
%endif
-%endif
-%package fuse
-Summary: GlusterFS Fuse client
-License: GPLv2 or LGPLv3+
-Group: Applications/File
-
-Requires: %{name} >= %{version}-%{release}
+%if ( 0%{?_with_gnfs:1} )
+%package gnfs
+Summary: GlusterFS gNFS server
+Requires: %{name}%{?_isa} = %{version}-%{release}
+Requires: %{name}-client-xlators%{?_isa} = %{version}-%{release}
+Requires: nfs-utils
+
+%description gnfs
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
+
+This package provides the glusterfs legacy gNFS server xlator
+%endif
-Obsoletes: %{name}-client < %{version}-%{release}
-Provides: %{name}-client = %{version}-%{release}
+%package -n libglusterfs0
+Summary: GlusterFS libglusterfs library
+Requires: libgfrpc0%{?_isa} = %{version}-%{release}
+Requires: libgfxdr0%{?_isa} = %{version}-%{release}
+Obsoletes: %{name}-libs <= %{version}-%{release}
+Provides: %{name}-libs = %{version}-%{release}
-%description fuse
-GlusterFS is a clustered file-system capable of scaling to several
-peta-bytes. It aggregates various storage bricks over Infiniband RDMA
-or TCP/IP interconnect into one large parallel network file
-system. GlusterFS is one of the most sophisticated file system in
-terms of features and extensibility. It borrows a powerful concept
-called Translators from GNU Hurd kernel. Much of the code in GlusterFS
-is in userspace and easily manageable.
+%description -n libglusterfs0
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
+
+This package provides the base libglusterfs library
+
+%package -n libglusterfs-devel
+Summary: GlusterFS libglusterfs library
+Requires: libgfrpc-devel%{?_isa} = %{version}-%{release}
+Requires: libgfxdr-devel%{?_isa} = %{version}-%{release}
+Obsoletes: %{name}-devel <= %{version}-%{release}
+Provides: %{name}-devel = %{version}-%{release}
+
+%description -n libglusterfs-devel
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
+
+This package provides libglusterfs.so and the gluster C header files.
+
+%package -n libgfapi0
+Summary: GlusterFS api library
+Requires: libglusterfs0%{?_isa} = %{version}-%{release}
+Requires: %{name}-client-xlators%{?_isa} = %{version}-%{release}
+Obsoletes: %{name}-api <= %{version}-%{release}
+Provides: %{name}-api = %{version}-%{release}
+
+%description -n libgfapi0
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
+
+This package provides the glusterfs libgfapi library.
+
+%package -n libgfapi-devel
+Summary: Development Libraries
+Requires: libglusterfs-devel%{?_isa} = %{version}-%{release}
+Requires: libacl-devel
+Obsoletes: %{name}-api-devel <= %{version}-%{release}
+Provides: %{name}-api-devel = %{version}-%{release}
+
+%description -n libgfapi-devel
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
+
+This package provides libgfapi.so and the api C header files.
+
+%package -n libgfchangelog0
+Summary: GlusterFS libchangelog library
+Requires: libglusterfs0%{?_isa} = %{version}-%{release}
+Obsoletes: %{name}-libs <= %{version}-%{release}
+
+%description -n libgfchangelog0
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
+
+This package provides the libgfchangelog library
+
+%package -n libgfchangelog-devel
+Summary: GlusterFS libchangelog library
+Requires: libglusterfs-devel%{?_isa} = %{version}-%{release}
+Obsoletes: %{name}-devel <= %{version}-%{release}
+
+%description -n libgfchangelog-devel
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
+
+This package provides libgfchangelog.so and changelog C header files.
+
+%package -n libgfrpc0
+Summary: GlusterFS libgfrpc0 library
+Requires: libglusterfs0%{?_isa} = %{version}-%{release}
+Obsoletes: %{name}-libs <= %{version}-%{release}
+
+%description -n libgfrpc0
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
+
+This package provides the libgfrpc library
+
+%package -n libgfrpc-devel
+Summary: GlusterFS libgfrpc library
+Requires: libglusterfs0%{?_isa} = %{version}-%{release}
+Obsoletes: %{name}-devel <= %{version}-%{release}
+
+%description -n libgfrpc-devel
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
+
+This package provides libgfrpc.so and rpc C header files.
+
+%package -n libgfxdr0
+Summary: GlusterFS libgfxdr0 library
+Requires: libglusterfs0%{?_isa} = %{version}-%{release}
+Obsoletes: %{name}-libs <= %{version}-%{release}
+
+%description -n libgfxdr0
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
+
+This package provides the libgfxdr library
+
+%package -n libgfxdr-devel
+Summary: GlusterFS libgfxdr library
+Requires: libglusterfs0%{?_isa} = %{version}-%{release}
+Obsoletes: %{name}-devel <= %{version}-%{release}
+
+%description -n libgfxdr-devel
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
+
+This package provides libgfxdr.so.
+
+%package -n libglusterd0
+Summary: GlusterFS libglusterd library
+Requires: libglusterfs0%{?_isa} = %{version}-%{release}
+Obsoletes: %{name}-libs <= %{version}-%{release}
+
+%description -n libglusterd0
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
+
+This package provides the libglusterd library
+
+%package -n python%{_pythonver}-gluster
+Summary: GlusterFS python library
+Requires: python%{_pythonver}
+%if ( ! %{_usepython3} )
+%{?python_provide:%python_provide python-gluster}
+Provides: python-gluster = %{version}-%{release}
+Obsoletes: python-gluster < 3.10
+%endif
-This package provides support to FUSE based clients.
+%description -n python%{_pythonver}-gluster
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
+
+This package contains the python modules of GlusterFS and own gluster
+namespace.
+
+%package regression-tests
+Summary: Development Tools
+Requires: %{name}%{?_isa} = %{version}-%{release}
+Requires: %{name}-fuse%{?_isa} = %{version}-%{release}
+Requires: %{name}-server%{?_isa} = %{version}-%{release}
+## thin provisioning support
+Requires: lvm2 >= 2.02.89
+Requires: perl(App::Prove) perl(Test::Harness) gcc util-linux-ng
+Requires: python%{_pythonver}
+Requires: attr dbench file git libacl-devel net-tools
+Requires: nfs-utils xfsprogs yajl psmisc bc
+
+%description regression-tests
+The Gluster Test Framework, is a suite of scripts used for
+regression testing of Gluster.
+
+%if ( 0%{!?_without_ocf:1} )
+%package resource-agents
+Summary: OCF Resource Agents for GlusterFS
+License: GPLv3+
+BuildArch: noarch
+# this Group handling comes from the Fedora resource-agents package
+# for glusterd
+Requires: %{name}-server = %{version}-%{release}
+# depending on the distribution, we need pacemaker or resource-agents
+Requires: %{_prefix}/lib/ocf/resource.d
+
+%description resource-agents
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
+
+This package provides the resource agents which plug glusterd into
+Open Cluster Framework (OCF) compliant cluster resource managers,
+like Pacemaker.
+%endif
+%if ( 0%{!?_without_server:1} )
%package server
Summary: Clustered file-system server
-License: GPLv3+
-Group: System Environment/Daemons
-Requires: %{name} = %{version}-%{release}
-Requires: %{name}-fuse = %{version}-%{release}
-Requires: openssl
-BuildRequires: libaio-devel
+Requires: %{name}%{?_isa} = %{version}-%{release}
+Requires: %{name}-cli%{?_isa} = %{version}-%{release}
+Requires: libglusterfs0%{?_isa} = %{version}-%{release}
+Requires: libgfchangelog0%{?_isa} = %{version}-%{release}
+%if ( 0%{?fedora} && 0%{?fedora} >= 30 || ( 0%{?rhel} && 0%{?rhel} >= 8 ) )
+Requires: glusterfs-selinux >= 0.1.0-2
+%endif
+# some daemons (like quota) use a fuse-mount, glusterfsd is part of -fuse
+Requires: %{name}-fuse%{?_isa} = %{version}-%{release}
+# self-heal daemon, rebalance, nfs-server etc. are actually clients
+Requires: libgfapi0%{?_isa} = %{version}-%{release}
+Requires: %{name}-client-xlators%{?_isa} = %{version}-%{release}
+# lvm2 for snapshot, and nfs-utils and rpcbind/portmap for gnfs server
+Requires: lvm2
+%if ( 0%{?_with_systemd:1} )
+%{?systemd_requires}
+%else
+Requires(post): /sbin/chkconfig
+Requires(preun): /sbin/service
+Requires(preun): /sbin/chkconfig
+Requires(postun): /sbin/service
+%endif
+%if (0%{?_with_firewalld:1})
+# we install firewalld rules, so we need to have the directory owned
+%if ( 0%{!?rhel} )
+# not on RHEL because firewalld-filesystem appeared in 7.3
+# when EL7 rpm gets weak dependencies we can add a Suggests:
+Requires: firewalld-filesystem
+%endif
+%endif
+%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} >= 6 )
+Requires: rpcbind
+%else
+Requires: portmap
+%endif
+%if ( 0%{?rhel} && 0%{?rhel} <= 6 )
+Requires: python-argparse
+%endif
+%if ( 0%{?fedora} && 0%{?fedora} > 27 ) || ( 0%{?rhel} && 0%{?rhel} > 7 )
+Requires: python%{_pythonver}-pyxattr
+%else
+Requires: pyxattr
+%endif
+%if (0%{?_with_valgrind:1})
+Requires: valgrind
+%endif
%description server
-GlusterFS is a clustered file-system capable of scaling to several
-petabytes. It aggregates various storage bricks over Infiniband RDMA
-or TCP/IP interconnect into one large parallel network file
-system. GlusterFS is one of the most sophisticated file systems in
-terms of features and extensibility. It borrows a powerful concept
-called Translators from GNU Hurd kernel. Much of the code in GlusterFS
-is in user space and easily manageable.
-
-This package provides the glusterfs server daemon and translators that
-are loaded on the server.
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
+
+This package provides the glusterfs server daemon.
+%endif
-%package devel
-Summary: Development Libraries
-License: GPLv2 or LGPLv3+
-Group: Development/Libraries
-Requires: %{name} = %{version}-%{release}
+%package thin-arbiter
+Summary: GlusterFS thin-arbiter module
+Requires: %{name}%{?_isa} = %{version}-%{release}
+Requires: %{name}-server%{?_isa} = %{version}-%{release}
+
+%description thin-arbiter
+This package provides a tie-breaker functionality to GlusterFS
+replicate volume. It includes translators required to provide the
+functionality, and also few other scripts required for getting the setup done.
+
+This package provides the glusterfs thin-arbiter translator.
+
+%package client-xlators
+Summary: GlusterFS client-side translators
+
+%description client-xlators
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
+
+This package provides the translators needed on any GlusterFS client.
+
+%if ( 0%{!?_without_events:1} )
+%package events
+Summary: GlusterFS Events
+Requires: %{name}-server%{?_isa} = %{version}-%{release}
+Requires: python%{_pythonver} python%{_pythonver}-prettytable
+Requires: python%{_pythonver}-gluster = %{version}-%{release}
+%if ( 0%{?rhel} && 0%{?rhel} < 8 )
+Requires: python-requests
+%else
+Requires: python%{_pythonver}-requests
+%endif
+%if ( 0%{?rhel} && 0%{?rhel} < 7 )
+Requires: python-argparse
+%endif
+%if ( 0%{?_with_systemd:1} )
+%{?systemd_requires}
+%endif
-%description devel
-GlusterFS is a clustered file-system capable of scaling to several
-petabytes. It aggregates various storage bricks over Infiniband RDMA
-or TCP/IP interconnect into one large parallel network file
-system. GlusterFS is one of the most sophisticated file systems in
-terms of features and extensibility. It borrows a powerful concept
-called Translators from GNU Hurd kernel. Much of the code in GlusterFS
-is in user space and easily manageable.
+%description events
+GlusterFS Events
-This package provides the development libraries.
+%endif
%prep
-%setup -q -n %{name}-%{version}
+%setup -q -n %{name}-%{version}%{?prereltag}
+%if ( ! %{_usepython3} )
+echo "fixing python shebangs..."
+for f in api events extras geo-replication libglusterfs tools xlators; do
+find $f -type f -exec sed -i 's|/usr/bin/python3|/usr/bin/python2|' {} \;
+done
+%endif
%build
+
+# RHEL6 and earlier need to manually replace config.guess and config.sub
+%if ( 0%{?rhel} && 0%{?rhel} <= 6 )
./autogen.sh
-%configure %{?_without_rdma} %{?_without_epoll} %{?_with_fusermount} %{?_without_georeplication}
+%endif
-# Remove rpath
-sed -i 's|^hardcode_libdir_flag_spec=.*|hardcode_libdir_flag_spec=""|g' libtool
-sed -i 's|^runpath_var=LD_RUN_PATH|runpath_var=DIE_RPATH_DIE|g' libtool
+%configure \
+ %{?_with_asan} \
+ %{?_with_cmocka} \
+ %{?_with_debug} \
+ %{?_with_firewalld} \
+ %{?_with_gnfs} \
+ %{?_with_tmpfilesdir} \
+ %{?_with_tsan} \
+ %{?_with_valgrind} \
+ %{?_without_epoll} \
+ %{?_without_events} \
+ %{?_without_fusermount} \
+ %{?_without_georeplication} \
+ %{?_without_ocf} \
+ %{?_without_server} \
+ %{?_without_syslog} \
+ %{?_with_ipv6default} \
+ %{?_without_libtirpc}
+
+# fix hardening and remove rpath in shlibs
+%if ( 0%{?fedora} && 0%{?fedora} > 17 ) || ( 0%{?rhel} && 0%{?rhel} > 6 )
+sed -i 's| \\\$compiler_flags |&\\\$LDFLAGS |' libtool
+%endif
+sed -i 's|^hardcode_libdir_flag_spec=.*|hardcode_libdir_flag_spec=""|' libtool
+sed -i 's|^runpath_var=LD_RUN_PATH|runpath_var=DIE_RPATH_DIE|' libtool
-%{__make} %{?_smp_mflags}
+make %{?_smp_mflags}
+%check
+make check
%install
-%{__rm} -rf %{buildroot}
-%{__make} install DESTDIR=%{buildroot}
-# Install include directory
-%{__mkdir_p} %{buildroot}%{_includedir}/glusterfs
-%{__install} -p -m 0644 libglusterfs/src/*.h \
- %{buildroot}%{_includedir}/glusterfs/
-%{__install} -p -m 0644 contrib/uuid/*.h \
- %{buildroot}%{_includedir}/glusterfs/
-# Following needed by hekafs multi-tenant translator
-%{__mkdir_p} %{buildroot}%{_includedir}/glusterfs/rpc
-%{__install} -p -m 0644 rpc/rpc-lib/src/*.h \
- %{buildroot}%{_includedir}/glusterfs/rpc/
-%{__install} -p -m 0644 rpc/xdr/src/*.h \
- %{buildroot}%{_includedir}/glusterfs/rpc/
-%{__mkdir_p} %{buildroot}%{_includedir}/glusterfs/server
-%{__install} -p -m 0644 xlators/protocol/server/src/*.h \
- %{buildroot}%{_includedir}/glusterfs/server/
+rm -rf %{buildroot}
+make install DESTDIR=%{buildroot}
+%if ( 0%{!?_without_server:1} )
+%if ( 0%{_for_fedora_koji_builds} )
+install -D -p -m 0644 %{SOURCE1} \
+ %{buildroot}%{_sysconfdir}/sysconfig/glusterd
+install -D -p -m 0644 %{SOURCE2} \
+ %{buildroot}%{_sysconfdir}/sysconfig/glusterfsd
+%else
+install -D -p -m 0644 extras/glusterd-sysconfig \
+ %{buildroot}%{_sysconfdir}/sysconfig/glusterd
+%endif
+%endif
+mkdir -p %{buildroot}%{_localstatedir}/log/glusterd
+mkdir -p %{buildroot}%{_localstatedir}/log/glusterfs
+mkdir -p %{buildroot}%{_localstatedir}/log/glusterfsd
+mkdir -p %{buildroot}%{_rundir}/gluster
# Remove unwanted files from all the shared libraries
find %{buildroot}%{_libdir} -name '*.a' -delete
find %{buildroot}%{_libdir} -name '*.la' -delete
-# Remove installed docs, we include them ourselves as %%doc
-%{__rm} -rf %{buildroot}%{_datadir}/doc/glusterfs/
+# Remove installed docs, the ones we want are included by %%doc, in
+# /usr/share/doc/glusterfs or /usr/share/doc/glusterfs-x.y.z depending
+# on the distribution
+%if ( 0%{?fedora} && 0%{?fedora} > 19 ) || ( 0%{?rhel} && 0%{?rhel} > 6 )
+rm -rf %{buildroot}%{_pkgdocdir}/*
+%else
+rm -rf %{buildroot}%{_defaultdocdir}/%{name}
+mkdir -p %{buildroot}%{_pkgdocdir}
+%endif
+head -50 ChangeLog > ChangeLog.head && mv ChangeLog.head ChangeLog
+cat << EOM >> ChangeLog
+
+More commit messages for this ChangeLog can be found at
+https://forge.gluster.org/glusterfs-core/glusterfs/commits/v%{version}%{?prereltag}
+EOM
-# Rename the samples, so we can include them as %%config
-#for file in %{buildroot}%{_sysconfdir}/glusterfs/*.sample; do
-# %{__mv} ${file} `dirname ${file}`/`basename ${file} .sample`
-#done
+# Remove benchmarking and other unpackaged files
+# make install always puts these in %%{_defaultdocdir}/%%{name} so don't
+# use %%{_pkgdocdir}; that will be wrong on later Fedora distributions
+rm -rf %{buildroot}%{_defaultdocdir}/%{name}/benchmarking
+rm -f %{buildroot}%{_defaultdocdir}/%{name}/glusterfs-mode.el
+rm -f %{buildroot}%{_defaultdocdir}/%{name}/glusterfs.vim
+%if ( 0%{!?_without_server:1} )
# Create working directory
-%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd
# Update configuration file to /var/lib working directory
sed -i 's|option working-directory /etc/glusterd|option working-directory %{_sharedstatedir}/glusterd|g' \
%{buildroot}%{_sysconfdir}/glusterfs/glusterd.vol
+%endif
+
+# Install glusterfsd .service or init.d file
+%if ( 0%{!?_without_server:1} )
+%if ( 0%{_for_fedora_koji_builds} )
+%service_install glusterfsd %{glusterfsd_svcfile}
+%endif
+%endif
-# Following needed by the hooks interface
-subdirs=("add-brick" "create" "delete" "remove-brick" "set" "start" "stop")
-for dir in ${subdirs[@]}
-do
-%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/"$dir"/pre
-%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/"$dir"/post
+install -D -p -m 0644 extras/glusterfs-logrotate \
+ %{buildroot}%{_sysconfdir}/logrotate.d/glusterfs
+
+# ganesha ghosts
+%if ( 0%{!?_without_server:1} )
+mkdir -p %{buildroot}%{_sysconfdir}/ganesha
+touch %{buildroot}%{_sysconfdir}/ganesha/ganesha-ha.conf
+mkdir -p %{buildroot}%{_localstatedir}/run/gluster/shared_storage/nfs-ganesha/
+touch %{buildroot}%{_localstatedir}/run/gluster/shared_storage/nfs-ganesha/ganesha.conf
+touch %{buildroot}%{_localstatedir}/run/gluster/shared_storage/nfs-ganesha/ganesha-ha.conf
+%endif
+
+%if ( 0%{!?_without_georeplication:1} )
+# geo-rep ghosts
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/geo-replication
+touch %{buildroot}%{_sharedstatedir}/glusterd/geo-replication/gsyncd_template.conf
+install -D -p -m 0644 extras/glusterfs-georep-logrotate \
+ %{buildroot}%{_sysconfdir}/logrotate.d/glusterfs-georep
+%endif
+
+%if ( 0%{!?_without_server:1} )
+# the rest of the ghosts
+touch %{buildroot}%{_sharedstatedir}/glusterd/glusterd.info
+touch %{buildroot}%{_sharedstatedir}/glusterd/options
+subdirs=(add-brick create copy-file delete gsync-create remove-brick reset set start stop)
+for dir in ${subdirs[@]}; do
+ mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/"$dir"/{pre,post}
done
-%{__install} -p -m 0744 extras/hook-scripts/start/post/*.sh \
- %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/start/post
-%{__install} -p -m 0744 extras/hook-scripts/stop/pre/*.sh \
- %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/stop/pre
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/glustershd
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/peers
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/vols
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/nfs/run
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/bitd
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/quotad
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/scrub
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/snaps
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/ss_brick
+touch %{buildroot}%{_sharedstatedir}/glusterd/nfs/nfs-server.vol
+touch %{buildroot}%{_sharedstatedir}/glusterd/nfs/run/nfs.pid
+%endif
-# Clean up the examples we want to include as %%doc
-#%{__cp} -a doc/examples examples
-#%{__rm} -f examples/Makefile*
+find ./tests ./run-tests.sh -type f | cpio -pd %{buildroot}%{_prefix}/share/glusterfs
+
+## Install bash completion for cli
+install -p -m 0744 -D extras/command-completion/gluster.bash \
+ %{buildroot}%{_sysconfdir}/bash_completion.d/gluster
%clean
-%{__rm} -rf %{buildroot}
+rm -rf %{buildroot}
+##-----------------------------------------------------------------------------
+## All %%post should be placed here and keep them sorted
+##
%post
/sbin/ldconfig
-
-# Copy the 'glusterfs-logrotate' file at the right place
-if [ -d /etc/logrotate.d ]; then
- cp %{_sysconfdir}/glusterfs/glusterfs-logrotate /etc/logrotate.d/glusterfs
-fi
-
-%postun
-/sbin/ldconfig
-
-%files
-%defattr(-,root,root)
-%doc AUTHORS ChangeLog COPYING-GPLV2 COPYING-LGPLV3 INSTALL NEWS README THANKS
-%config %{_sysconfdir}/glusterfs/glusterfs-logrotate
-%config %{_sysconfdir}/glusterfs/group-virt.example
-%{_libdir}/glusterfs
-%{_libdir}/*.so.*
-%{_sbindir}/glusterfs*
-#%{_mandir}/man8/*gluster*.8*
-%dir %{_localstatedir}/log/glusterfs
-%if 0%{!?_without_rdma:1}
-%exclude %{_libdir}/glusterfs/%{version}/rpc-transport/rdma*
+%if ( 0%{!?_without_syslog:1} )
+%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} >= 6 )
+%systemd_postun_with_restart rsyslog
%endif
-%exclude %{_libdir}/glusterfs/%{version}/xlator/mount/fuse*
-%exclude %{_libdir}/glusterfs/%{version}/xlator/storage*
-%exclude %{_libdir}/glusterfs/%{version}/xlator/features/posix*
-%exclude %{_libdir}/glusterfs/%{version}/xlator/protocol/server*
-%exclude %{_libdir}/glusterfs/%{version}/xlator/mgmt*
-%exclude %{_libdir}/glusterfs/%{version}/xlator/nfs*
-
-%if 0%{!?_without_rdma:1}
-%files rdma
-%defattr(-,root,root)
-%{_libdir}/glusterfs/%{version}/rpc-transport/rdma*
%endif
+exit 0
-%if 0%{?_can_georeplicate}
-%if 0%{!?_without_georeplication:1}
-%post geo-replication
-#restart glusterd.
-%{_sysconfdir}/init.d/glusterd restart &> /dev/null
+%if ( 0%{!?_without_events:1} )
+%post events
+%systemd_post glustereventsd
%endif
-%if 0%{!?_without_georeplication:1}
-%files geo-replication
-%defattr(-,root,root)
-%{_libexecdir}/glusterfs/gsyncd
-%{_libexecdir}/glusterfs/python/syncdaemon/*
+%if ( 0%{!?_without_server:1} )
+%if ( 0%{?fedora} && 0%{?fedora} > 25 || ( 0%{?rhel} && 0%{?rhel} > 6 ) )
+%post ganesha
+semanage boolean -m ganesha_use_fusefs --on
+exit 0
%endif
%endif
-%files fuse
-%defattr(-,root,root)
-%{_libdir}/glusterfs/%{version}/xlator/mount/fuse*
-#%{_mandir}/man8/mount.glusterfs.8*
-/sbin/mount.glusterfs
-%if 0%{?_with_fusermount:1}
-%{_bindir}/fusermount-glusterfs
+%if ( 0%{!?_without_georeplication:1} )
+%post geo-replication
+%if ( 0%{?rhel} && 0%{?rhel} >= 8 )
+%selinux_set_booleans %{selinuxbooleans}
+%endif
+if [ $1 -ge 1 ]; then
+ %systemd_postun_with_restart glusterd
+fi
+exit 0
%endif
+%post -n libglusterfs0
+/sbin/ldconfig
+
+%post -n libgfapi0
+/sbin/ldconfig
+
+%post -n libgfchangelog0
+/sbin/ldconfig
+
+%post -n libgfrpc0
+/sbin/ldconfig
+
+%post -n libgfxdr0
+/sbin/ldconfig
+
+%post -n libglusterd0
+/sbin/ldconfig
+
+%if ( 0%{!?_without_server:1} )
%post server
-/sbin/chkconfig --add glusterd
+# Legacy server
+%systemd_post glusterd
+%if ( 0%{_for_fedora_koji_builds} )
+%systemd_post glusterfsd
+%endif
+# ".cmd_log_history" is renamed to "cmd_history.log" in GlusterFS-3.7 .
+# While upgrading glusterfs-server package form GlusterFS version <= 3.6 to
+# GlusterFS version 3.7, ".cmd_log_history" should be renamed to
+# "cmd_history.log" to retain cli command history contents.
+if [ -f %{_localstatedir}/log/glusterfs/.cmd_log_history ]; then
+ mv %{_localstatedir}/log/glusterfs/.cmd_log_history \
+ %{_localstatedir}/log/glusterfs/cmd_history.log
+fi
-# Move legacy sysconf files to the correct sysconfdir
-if [ -d /etc/glusterd ]; then
- mkdir -p /var/lib
- mv /etc/glusterd /var/lib/
- ln -sf /var/lib/glusterd /etc/glusterd
+# Genuine Fedora (and EPEL) builds never put gluster files in /etc; if
+# there are any files in /etc from a prior gluster.org install, move them
+# to /var/lib. (N.B. Starting with 3.3.0 all gluster files are in /var/lib
+# in gluster.org RPMs.) Be careful to copy them on the off chance that
+# /etc and /var/lib are on separate file systems
+if [ -d /etc/glusterd -a ! -h %{_sharedstatedir}/glusterd ]; then
+ mkdir -p %{_sharedstatedir}/glusterd
+ cp -a /etc/glusterd %{_sharedstatedir}/glusterd
+ rm -rf /etc/glusterd
+ ln -sf %{_sharedstatedir}/glusterd /etc/glusterd
fi
-if [ -d /var/lib/glusterd/vols ]; then
- # Rename old volfiles in an RPM-standard way. These aren't actually
- # considered package config files, so %config doesn't work for them.
- for file in $(find /var/lib/glusterd/vols -name '*.vol'); do
+# Rename old volfiles in an RPM-standard way. These aren't actually
+# considered package config files, so %%config doesn't work for them.
+if [ -d %{_sharedstatedir}/glusterd/vols ]; then
+ for file in $(find %{_sharedstatedir}/glusterd/vols -name '*.vol'); do
newfile=${file}.rpmsave
echo "warning: ${file} saved as ${newfile}"
cp ${file} ${newfile}
done
fi
-# Need to copy the file.
+# add marker translator
+# but first make certain that there are no old libs around to bite us
+# BZ 834847
+if [ -e /etc/ld.so.conf.d/glusterfs.conf ]; then
+ rm -f /etc/ld.so.conf.d/glusterfs.conf
+ /sbin/ldconfig
+fi
-mkdir -p /var/lib/glusterd/groups
-cp %{_sysconfdir}/glusterfs/group-virt.example /var/lib/glusterd/groups/virt
+%if (0%{?_with_firewalld:1})
+ %firewalld_reload
+%endif
pidof -c -o %PPID -x glusterd &> /dev/null
if [ $? -eq 0 ]; then
kill -9 `pgrep -f gsyncd.py` &> /dev/null
- killall glusterd &> /dev/null
- #add marker translator
+ killall --wait glusterd &> /dev/null
glusterd --xlator-option *.upgrade=on -N
- glusterd
+
+ #Cleaning leftover glusterd socket file which is created by glusterd in
+ #rpm_script_t context.
+ rm -f %{_rundir}/glusterd.socket
+
+ # glusterd _was_ running, we killed it, it exited after *.upgrade=on,
+ # so start it again
+ %service_start glusterd
else
glusterd --xlator-option *.upgrade=on -N
+
+ #Cleaning leftover glusterd socket file which is created by glusterd in
+ #rpm_script_t context.
+ rm -f %{_rundir}/glusterd.socket
+fi
+exit 0
+%endif
+
+##-----------------------------------------------------------------------------
+## All %%pre should be placed here and keep them sorted
+##
+%pre
+getent group gluster > /dev/null || groupadd -r gluster
+getent passwd gluster > /dev/null || useradd -r -g gluster -d %{_rundir}/gluster -s /sbin/nologin -c "GlusterFS daemons" gluster
+exit 0
+
+##-----------------------------------------------------------------------------
+## All %%preun should be placed here and keep them sorted
+##
+%if ( 0%{!?_without_events:1} )
+%preun events
+if [ $1 -eq 0 ]; then
+ if [ -f %glustereventsd_svcfile ]; then
+ %service_stop glustereventsd
+ %systemd_preun glustereventsd
+ fi
fi
+exit 0
+%endif
+%if ( 0%{!?_without_server:1} )
%preun server
if [ $1 -eq 0 ]; then
- /sbin/service glusterd stop &>/dev/null || :
- /sbin/chkconfig --del glusterd
+ if [ -f %glusterfsd_svcfile ]; then
+ %service_stop glusterfsd
+ fi
+ %service_stop glusterd
+ if [ -f %glusterfsd_svcfile ]; then
+ %systemd_preun glusterfsd
+ fi
+ %systemd_preun glusterd
fi
if [ $1 -ge 1 ]; then
- /sbin/service glusterd condrestart &>/dev/null || :
+ if [ -f %glusterfsd_svcfile ]; then
+ %systemd_postun_with_restart glusterfsd
+ fi
+ %systemd_postun_with_restart glusterd
fi
+exit 0
+%endif
-# Legacy server
+%preun thin-arbiter
if [ $1 -eq 0 ]; then
- /sbin/service glusterfsd stop &>/dev/null || :
- /sbin/chkconfig --del glusterfsd
-fi
-if [ $1 -ge 1 ]; then
- /sbin/service glusterfsd condrestart &>/dev/null || :
+ if [ -f %glusterta_svcfile ]; then
+ %service_stop gluster-ta-volume
+ %systemd_preun gluster-ta-volume
+ fi
fi
+##-----------------------------------------------------------------------------
+## All %%postun should be placed here and keep them sorted
+##
+%postun
+%if ( 0%{!?_without_syslog:1} )
+%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} >= 6 )
+%systemd_postun_with_restart rsyslog
+%endif
+%endif
+
+%if ( 0%{!?_without_server:1} )
+%postun server
+%if (0%{?_with_firewalld:1})
+ %firewalld_reload
+%endif
+exit 0
+%endif
+
+%if ( 0%{!?_without_server:1} )
+%if ( 0%{?fedora} && 0%{?fedora} > 25 || ( 0%{?rhel} && 0%{?rhel} > 6 ) )
+%postun ganesha
+semanage boolean -m ganesha_use_fusefs --off
+exit 0
+%endif
+%endif
+
+##-----------------------------------------------------------------------------
+## All %%trigger should be placed here and keep them sorted
+##
+%if ( 0%{!?_without_server:1} )
+%if ( 0%{?fedora} && 0%{?fedora} > 25 || ( 0%{?rhel} && 0%{?rhel} > 6 ) )
+%trigger ganesha -- selinux-policy-targeted
+semanage boolean -m ganesha_use_fusefs --on
+exit 0
+%endif
+%endif
+
+##-----------------------------------------------------------------------------
+## All %%triggerun should be placed here and keep them sorted
+##
+%if ( 0%{!?_without_server:1} )
+%if ( 0%{?fedora} && 0%{?fedora} > 25 || ( 0%{?rhel} && 0%{?rhel} > 6 ) )
+%triggerun ganesha -- selinux-policy-targeted
+semanage boolean -m ganesha_use_fusefs --off
+exit 0
+%endif
+%endif
+
+##-----------------------------------------------------------------------------
+## All %%files should be placed here and keep them grouped
+##
+%files
+%doc ChangeLog COPYING-GPLV2 COPYING-LGPLV3 INSTALL README.md THANKS COMMITMENT
+%{_mandir}/man8/*gluster*.8*
+%if ( 0%{!?_without_server:1} )
+%exclude %{_mandir}/man8/gluster.8*
+%endif
+%dir %{_localstatedir}/log/glusterfs
+%if 0%{?!_without_server:1}
+%dir %{_datadir}/glusterfs
+%dir %{_datadir}/glusterfs/scripts
+ %{_datadir}/glusterfs/scripts/post-upgrade-script-for-quota.sh
+ %{_datadir}/glusterfs/scripts/pre-upgrade-script-for-quota.sh
+%endif
+# xlators that are needed on the client- and on the server-side
+%dir %{_libdir}/glusterfs
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/auth
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/auth/addr.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/auth/login.so
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/rpc-transport
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/rpc-transport/socket.so
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/debug
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/debug/error-gen.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/debug/delay-gen.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/debug/io-stats.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/debug/sink.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/debug/trace.so
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/access-control.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/barrier.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/cdc.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/changelog.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/utime.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/gfid-access.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/namespace.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/read-only.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/shard.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/snapview-client.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/worm.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/cloudsync.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/meta.so
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance/io-cache.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance/io-threads.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance/md-cache.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance/open-behind.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance/quick-read.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance/read-ahead.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance/readdir-ahead.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance/stat-prefetch.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance/write-behind.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance/nl-cache.so
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/system
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/system/posix-acl.so
+%dir %attr(0775,gluster,gluster) %{_rundir}/gluster
+%if 0%{?_tmpfilesdir:1} && 0%{!?_without_server:1}
+%{_tmpfilesdir}/gluster.conf
+%endif
+
+%if ( 0%{?_without_server:1} )
+#exclude ganesha related files
+%exclude %{_sysconfdir}/ganesha/ganesha-ha.conf.sample
+%exclude %{_libexecdir}/ganesha/*
+%exclude %{_prefix}/lib/ocf/resource.d/heartbeat/*
+%endif
+
+%files cli
+%{_sbindir}/gluster
+%{_mandir}/man8/gluster.8*
+%{_sysconfdir}/bash_completion.d/gluster
+
+%files cloudsync-plugins
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/cloudsync-plugins
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/cloudsync-plugins/cloudsyncs3.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/cloudsync-plugins/cloudsynccvlt.so
+
+%files -n libglusterfs-devel
+%dir %{_includedir}/glusterfs
+ %{_includedir}/glusterfs/*.h
+ %{_includedir}/glusterfs/server/*.h
+%{_libdir}/libglusterfs.so
+
+%files -n libgfapi-devel
+%dir %{_includedir}/glusterfs/api
+ %{_includedir}/glusterfs/api/*.h
+%{_libdir}/libgfapi.so
+%{_libdir}/pkgconfig/glusterfs-api.pc
+
+
+%files -n libgfchangelog-devel
+%dir %{_includedir}/glusterfs/gfchangelog
+ %{_includedir}/glusterfs/gfchangelog/*.h
+%{_libdir}/libgfchangelog.so
+%{_libdir}/pkgconfig/libgfchangelog.pc
+
+%files -n libgfrpc-devel
+%dir %{_includedir}/glusterfs/rpc
+ %{_includedir}/glusterfs/rpc/*.h
+%{_libdir}/libgfrpc.so
+
+%files -n libgfxdr-devel
+%{_libdir}/libgfxdr.so
+
+%files client-xlators
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/cluster
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/cluster/*.so
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/protocol
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/protocol/client.so
+
+%files extra-xlators
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/quiesce.so
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/playground
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/playground/template.so
+
+%files fuse
+# glusterfs is a symlink to glusterfsd, -server depends on -fuse.
+%{_sbindir}/glusterfs
+%{_sbindir}/glusterfsd
+%config(noreplace) %{_sysconfdir}/logrotate.d/glusterfs
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/mount
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/mount/fuse.so
+/sbin/mount.glusterfs
+%if ( 0%{!?_without_fusermount:1} )
+%{_bindir}/fusermount-glusterfs
+%endif
+
+%if ( 0%{?_with_gnfs:1} && 0%{!?_without_server:1} )
+%files gnfs
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/nfs
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/nfs/server.so
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/nfs
+%ghost %attr(0600,-,-) %{_sharedstatedir}/glusterd/nfs/nfs-server.vol
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/nfs/run
+%ghost %attr(0600,-,-) %{_sharedstatedir}/glusterd/nfs/run/nfs.pid
+%endif
+
+%files thin-arbiter
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/thin-arbiter.so
+%dir %{_datadir}/glusterfs/scripts
+ %{_datadir}/glusterfs/scripts/setup-thin-arbiter.sh
+%config %{_sysconfdir}/glusterfs/thin-arbiter.vol
+
+%if ( 0%{?_with_systemd:1} )
+%{_unitdir}/gluster-ta-volume.service
+%endif
+
+%if ( 0%{!?_without_georeplication:1} )
+%files geo-replication
+%config(noreplace) %{_sysconfdir}/logrotate.d/glusterfs-georep
+
+%{_sbindir}/gfind_missing_files
+%{_sbindir}/gluster-mountbroker
+%dir %{_libexecdir}/glusterfs
+%dir %{_libexecdir}/glusterfs/python
+%dir %{_libexecdir}/glusterfs/python/syncdaemon
+ %{_libexecdir}/glusterfs/gsyncd
+ %{_libexecdir}/glusterfs/python/syncdaemon/*
+%dir %{_libexecdir}/glusterfs/scripts
+ %{_libexecdir}/glusterfs/scripts/get-gfid.sh
+ %{_libexecdir}/glusterfs/scripts/slave-upgrade.sh
+ %{_libexecdir}/glusterfs/scripts/gsync-upgrade.sh
+ %{_libexecdir}/glusterfs/scripts/generate-gfid-file.sh
+ %{_libexecdir}/glusterfs/scripts/gsync-sync-gfid
+ %{_libexecdir}/glusterfs/scripts/schedule_georep.py*
+ %{_libexecdir}/glusterfs/gverify.sh
+ %{_libexecdir}/glusterfs/set_geo_rep_pem_keys.sh
+ %{_libexecdir}/glusterfs/peer_gsec_create
+ %{_libexecdir}/glusterfs/peer_mountbroker
+ %{_libexecdir}/glusterfs/peer_mountbroker.py*
+ %{_libexecdir}/glusterfs/gfind_missing_files
+ %{_libexecdir}/glusterfs/peer_georep-sshkey.py*
+%{_sbindir}/gluster-georep-sshkey
+
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/geo-replication
+%ghost %attr(0644,-,-) %{_sharedstatedir}/glusterd/geo-replication/gsyncd_template.conf
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/gsync-create
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/gsync-create/post
+ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/gsync-create/post/S56glusterd-geo-rep-create-post.sh
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/gsync-create/pre
+
+%endif
+
+%files -n libglusterfs0
+%{_libdir}/libglusterfs.so.*
+
+%files -n libgfapi0
+%{_libdir}/libgfapi.so.*
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/mount
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/mount/api.so
+
+%files -n libgfchangelog0
+%{_libdir}/libgfchangelog.so.*
+
+%files -n libgfrpc0
+%{_libdir}/libgfrpc.so.*
+
+%files -n libgfxdr0
+%{_libdir}/libgfxdr.so.*
+
+%files -n libglusterd0
+%{_libdir}/libglusterd.so.*
+%exclude %{_libdir}/libglusterd.so
+
+%files -n python%{_pythonver}-gluster
+# introducing glusterfs module in site packages.
+# so that all other gluster submodules can reside in the same namespace.
+%if ( %{_usepython3} )
+%dir %{python3_sitelib}/gluster
+ %{python3_sitelib}/gluster/__init__.*
+ %{python3_sitelib}/gluster/__pycache__
+ %{python3_sitelib}/gluster/cliutils
+%else
+%dir %{python2_sitelib}/gluster
+ %{python2_sitelib}/gluster/__init__.*
+ %{python2_sitelib}/gluster/cliutils
+%endif
+
+%files regression-tests
+%dir %{_datadir}/glusterfs
+ %{_datadir}/glusterfs/run-tests.sh
+ %{_datadir}/glusterfs/tests
+%exclude %{_datadir}/glusterfs/tests/vagrant
+
+%if ( 0%{!?_without_server:1} )
+%files ganesha
+%dir %{_libexecdir}/ganesha
+%{_sysconfdir}/ganesha/ganesha-ha.conf.sample
+%{_libexecdir}/ganesha/*
+%{_prefix}/lib/ocf/resource.d/heartbeat/*
+%{_sharedstatedir}/glusterd/hooks/1/start/post/S31ganesha-start.sh
+%ghost %attr(0644,-,-) %config(noreplace) %{_sysconfdir}/ganesha/ganesha-ha.conf
+%ghost %dir %attr(0755,-,-) %{_localstatedir}/run/gluster/shared_storage/nfs-ganesha
+%ghost %attr(0644,-,-) %config(noreplace) %{_localstatedir}/run/gluster/shared_storage/nfs-ganesha/ganesha.conf
+%ghost %attr(0644,-,-) %config(noreplace) %{_localstatedir}/run/gluster/shared_storage/nfs-ganesha/ganesha-ha.conf
+%endif
+
+%if ( 0%{!?_without_ocf:1} )
+%files resource-agents
+# /usr/lib is the standard for OCF, also on x86_64
+%{_prefix}/lib/ocf/resource.d/glusterfs
+%endif
+
+%if ( 0%{!?_without_server:1} )
%files server
-%defattr(-,root,root,-)
-#%doc examples/ doc/glusterfs*.vol.sample
+%doc extras/clear_xattrs.sh
+# sysconf
%config(noreplace) %{_sysconfdir}/glusterfs
-%{_sharedstatedir}/glusterd
-%{_sysconfdir}/init.d/glusterd
-%{_sbindir}/gluster
+%exclude %{_sysconfdir}/glusterfs/thin-arbiter.vol
+%exclude %{_sysconfdir}/glusterfs/eventsconfig.json
+%exclude %{_sharedstatedir}/glusterd/nfs/nfs-server.vol
+%exclude %{_sharedstatedir}/glusterd/nfs/run/nfs.pid
+%if ( 0%{?_with_gnfs:1} )
+%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/nfs/*
+%endif
+%config(noreplace) %{_sysconfdir}/sysconfig/glusterd
+%if ( 0%{_for_fedora_koji_builds} )
+%config(noreplace) %{_sysconfdir}/sysconfig/glusterfsd
+%endif
+
+# init files
+%glusterd_svcfile
+%if ( 0%{_for_fedora_koji_builds} )
+%glusterfsd_svcfile
+%endif
+%if ( 0%{?_with_systemd:1} )
+%glusterfssharedstorage_svcfile
+%endif
+
+# binaries
%{_sbindir}/glusterd
-%{_libdir}/glusterfs/%{version}/xlator/storage*
-%{_libdir}/glusterfs/%{version}/xlator/features/posix*
-%{_libdir}/glusterfs/%{version}/xlator/protocol/server*
-%{_libdir}/glusterfs/%{version}/xlator/mgmt*
-%{_libdir}/glusterfs/%{version}/xlator/nfs*
-
-%files devel
-%defattr(-,root,root,-)
-%{_includedir}/glusterfs
-%exclude %{_includedir}/glusterfs/y.tab.h
-%{_libdir}/*.so
+%{_libexecdir}/glusterfs/glfsheal
+%{_sbindir}/gf_attach
+%{_sbindir}/gluster-setgfid2path
+# {_sbindir}/glusterfsd is the actual binary, but glusterfs (client) is a
+# symlink. The binary itself (and symlink) are part of the glusterfs-fuse
+# package, because glusterfs-server depends on that anyway.
+
+# Manpages
+%{_mandir}/man8/gluster-setgfid2path.8*
+
+# xlators
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/arbiter.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/bit-rot.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/bitrot-stub.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/sdfs.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/index.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/locks.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/posix*
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/snapview-server.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/marker.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/quota*
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/selinux.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/trash.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/upcall.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/leases.so
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/mgmt
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/mgmt/glusterd.so
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/protocol
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/protocol/server.so
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/storage
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/storage/posix.so
+
+# snap_scheduler
+%{_sbindir}/snap_scheduler.py
+%{_sbindir}/gcron.py
+%{_sbindir}/conf.py
+
+# /var/lib/glusterd, e.g. hookscripts, etc.
+%ghost %attr(0644,-,-) %config(noreplace) %{_sharedstatedir}/glusterd/glusterd.info
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/bitd
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/groups
+ %attr(0644,-,-) %{_sharedstatedir}/glusterd/groups/virt
+ %attr(0644,-,-) %{_sharedstatedir}/glusterd/groups/metadata-cache
+ %attr(0644,-,-) %{_sharedstatedir}/glusterd/groups/gluster-block
+ %attr(0644,-,-) %{_sharedstatedir}/glusterd/groups/nl-cache
+ %attr(0644,-,-) %{_sharedstatedir}/glusterd/groups/db-workload
+ %attr(0644,-,-) %{_sharedstatedir}/glusterd/groups/distributed-virt
+ %attr(0644,-,-) %{_sharedstatedir}/glusterd/groups/samba
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/glusterfind
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/glusterfind/.keys
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/glustershd
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post
+ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post/disabled-quota-root-xattr-heal.sh
+ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post/S10selinux-label-brick.sh
+ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post/S13create-subdir-mounts.sh
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/pre
+ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/pre/S28Quota-enable-root-xattr-heal.sh
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create/post
+ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create/post/S10selinux-label-brick.sh
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create/pre
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/copy-file
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/copy-file/post
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/copy-file/pre
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete/post
+ %{_sharedstatedir}/glusterd/hooks/1/delete/post/S57glusterfind-delete-post
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete/pre
+ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete/pre/S10selinux-del-fcontext.sh
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/remove-brick
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/remove-brick/post
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/remove-brick/pre
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/reset
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/reset/post
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/reset/pre
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/set
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/set/post
+ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/set/post/S30samba-set.sh
+ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/set/post/S32gluster_enable_shared_storage.sh
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/set/pre
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/start
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/start/post
+ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/start/post/S29CTDBsetup.sh
+ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/start/post/S30samba-start.sh
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/start/pre
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/stop
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/stop/post
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/stop/pre
+ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/stop/pre/S30samba-stop.sh
+ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/stop/pre/S29CTDB-teardown.sh
+%config(noreplace) %ghost %attr(0600,-,-) %{_sharedstatedir}/glusterd/options
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/peers
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/quotad
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/scrub
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/snaps
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/ss_brick
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/vols
+
+# Extra utility script
+%dir %{_libexecdir}/glusterfs
+%dir %{_datadir}/glusterfs/scripts
+ %{_datadir}/glusterfs/scripts/stop-all-gluster-processes.sh
+%if ( 0%{?_with_systemd:1} )
+ %{_libexecdir}/glusterfs/mount-shared-storage.sh
+ %{_datadir}/glusterfs/scripts/control-cpu-load.sh
+ %{_datadir}/glusterfs/scripts/control-mem.sh
+%endif
+
+# Incrementalapi
+ %{_libexecdir}/glusterfs/glusterfind
+%{_bindir}/glusterfind
+ %{_libexecdir}/glusterfs/peer_add_secret_pub
+
+%if ( 0%{?_with_firewalld:1} )
+%{_prefix}/lib/firewalld/services/glusterfs.xml
+%endif
+# end of server files
+%endif
+
+# Events
+%if ( 0%{!?_without_events:1} )
+%files events
+%config(noreplace) %{_sysconfdir}/glusterfs/eventsconfig.json
+%dir %{_sharedstatedir}/glusterd
+%dir %{_sharedstatedir}/glusterd/events
+%dir %{_libexecdir}/glusterfs
+ %{_libexecdir}/glusterfs/gfevents
+ %{_libexecdir}/glusterfs/peer_eventsapi.py*
+%{_sbindir}/glustereventsd
+%{_sbindir}/gluster-eventsapi
+%{_datadir}/glusterfs/scripts/eventsdash.py*
+%if ( 0%{?_with_systemd:1} )
+%{_unitdir}/glustereventsd.service
+%else
+%{_sysconfdir}/init.d/glustereventsd
+%endif
+%endif
%changelog
+* Thu May 14 2020 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- refactor, common practice, Issue #1126
+
+* Mon May 11 2020 Sunny Kumar <sunkumar@redhat.com>
+- added requires policycoreutils-python-utils on rhel8 for geo-replication
+
+* Wed Oct 9 2019 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- remove leftover bd xlator cruft
+
+* Fri Aug 23 2019 Shwetha K Acharya <sacharya@redhat.com>
+- removed {name}-ufs from Obsoletes
+- added "< version" for obsoletes {name}-gnfs and {name}-rdma
+
+* Mon Jul 15 2019 Jiffin Tony Thottan <jthottan@redhat.com>
+- Adding ganesha ha bits back in gluster repository
+
+* Fri Jul 12 2019 Amar Tumballi <amarts@redhat.com>
+- Remove rdma package, and mark older rdma package as 'Obsoletes'
+
+* Fri Jun 14 2019 Niels de Vos <ndevos@redhat.com>
+- always build glusterfs-cli to allow monitoring/managing from clients
+
+* Wed Mar 6 2019 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- remove unneeded ldconfig in scriptlets
+- reported by Igor Gnatenko in Fedora
+- https://src.fedoraproject.org/rpms/glusterfs/pull-request/5
+
+* Mon Mar 4 2019 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- s390x has RDMA, since around Fedora 27 and in RHEL7 since June 2016.
+
+* Tue Feb 26 2019 Ashish Pandey <aspandey@redhat.com>
+- Add thin-arbiter package
+
+* Sun Feb 24 2019 Aravinda VK <avishwan@redhat.com>
+- Renamed events package to gfevents
+
+* Thu Feb 21 2019 Jiffin Tony Thottan <jthottan@redhat.com>
+- Obsoleting gluster-gnfs package
+
+* Wed Nov 28 2018 Krutika Dhananjay <kdhananj@redhat.com>
+- Install /var/lib/glusterd/groups/distributed-virt by default
+
+* Tue Nov 13 2018 Niels de Vos <ndevos@redhat.com>
+- Add an option to build with ThreadSanitizer (TSAN)
+
+* Fri Sep 7 2018 Niels de Vos <ndevos@redhat.com>
+- Add an option to build with address sanitizer (ASAN)
+
+* Sun Jul 29 2018 Niels de Vos <ndevos@redhat.com>
+- Disable building glusterfs-resource-agents on el6 (#1609551)
+
+* Thu Feb 22 2018 Kotresh HR <khiremat@redhat.com>
+- Added util-linux as dependency to georeplication rpm (#1544382)
+
+* Thu Feb 1 2018 Niels de Vos <ndevos@redhat.com>
+- Add '--without server' option to facilitate el6 builds (#1074947)
+
+* Wed Jan 24 2018 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- python-ctypes no long exists, now in python stdlib (#1538258)
+
+* Thu Jan 18 2018 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- Fedora 28 glibc has removed rpc headers and rpcgen, use libtirpc
+
+* Mon Dec 25 2017 Niels de Vos <ndevos@redhat.com>
+- Fedora 28 has renamed pyxattr
+
+* Wed Sep 27 2017 Mohit Agrawal <moagrawa@redhat.com>
+- Added control-cpu-load.sh and control-mem.sh scripts to glusterfs-server section(#1496335)
+
+* Tue Aug 22 2017 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- libibverbs-devel, librdmacm-devel -> rdma-core-devel #1483995
+
+* Thu Jul 20 2017 Aravinda VK <avishwan@redhat.com>
+- Added new tool/binary to set the gfid2path xattr on files
+
+* Thu Jul 13 2017 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- various directories not owned by any package
+
+* Fri Jun 16 2017 Jiffin Tony Thottan <jthottan@redhat.com>
+- Add glusterfssharedstorage.service systemd file
+
+* Fri Jun 9 2017 Poornima G <pgurusid@redhat.com>
+- Install /var/lib/glusterd/groups/nl-cache by default
+
+* Wed May 10 2017 Pranith Kumar K <pkarampu@redhat.com>
+- Install /var/lib/glusterd/groups/gluster-block by default
+
+* Thu Apr 27 2017 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- gnfs in an optional subpackage
+
+* Wed Apr 26 2017 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- /var/run/gluster owner gluster:gluster(0775) for qemu(gfapi)
+ statedumps (#1445569)
+
+* Mon Apr 24 2017 Jiffin Tony Thottan <jhottan@redhat.com>
+- Install SELinux hook scripts that manage contexts for bricks (#1047975)
+
+* Thu Apr 20 2017 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- firewalld-filesystem -> firewalld (#1443959)
+
+* Thu Apr 13 2017 Niels de Vos <ndevos@redhat.com>
+- the -regression-tests sub-package needs "bc" for some tests (#1442145)
+
+* Mon Mar 20 2017 Niels de Vos <ndevos@redhat.com>
+- Drop dependency on psmisc, pkill is used instead of killall (#1197308)
+
+* Thu Feb 16 2017 Niels de Vos <ndevos@redhat.com>
+- Obsolete and Provide python-gluster for upgrading from glusterfs < 3.10
+
+* Wed Feb 1 2017 Poornima G <pgurusid@redhat.com>
+- Install /var/lib/glusterd/groups/metadata-cache by default
+
+* Wed Jan 18 2017 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- python2 (versus python3) cleanup (#1414902)
+
+* Fri Jan 13 2017 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- switch to storhaug HA
+
+* Fri Jan 6 2017 Niels de Vos <ndevos@redhat.com>
+- use macro provided by firewalld-filesystem to reload firewalld
+
+* Thu Nov 24 2016 Jiffin Tony Thottan <jhottan@redhat.com>
+- remove S31ganesha-reset.sh from hooks (#1397795)
+
+* Thu Sep 22 2016 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- python-ctypes no long exists, now in python stdlib (#1378436)
+
+* Wed Sep 14 2016 Aravinda VK <avishwan@redhat.com>
+- Changed attribute of eventsconfig.json file as same as other configs (#1375532)
+
+* Thu Sep 08 2016 Aravinda VK <avishwan@redhat.com>
+- Added init script for glustereventsd (#1365395)
+
+* Wed Aug 31 2016 Avra Sengupta <asengupt@redhat.com>
+- Added conf.py for snap scheduler
+
+* Wed Aug 31 2016 Aravinda VK <avishwan@redhat.com>
+- Added new Geo-replication utility "gluster-georep-sshkey" (#1356508)
+
+* Thu Aug 25 2016 Aravinda VK <avishwan@redhat.com>
+- Added gluster-mountbroker utility for geo-rep mountbroker setup (#1343333)
+
+* Mon Aug 22 2016 Milind Changire <mchangir@redhat.com>
+- Add psmisc as dependency for glusterfs-fuse for killall command (#1367665)
+
+* Thu Aug 4 2016 Jiffin Tony Thottan <jthottan@redhat.com>
+- Remove ganesha.so from client xlators
+
+* Sun Jul 31 2016 Soumya Koduri <skoduri@redhat.com>
+- Add dependency on portblock resource agent for ganesha package (#1354439)
+
+* Mon Jul 18 2016 Aravinda VK <avishwan@redhat.com>
+- Added new subpackage events(glusterfs-events) (#1334044)
+
+* Fri Jul 15 2016 Aravinda VK <avishwan@redhat.com>
+- Removed ".py" extension from symlink(S57glusterfind-delete-post)(#1356868)
+
+* Thu Jul 14 2016 Aravinda VK <avishwan@redhat.com>
+- Removed extra packaging line of cliutils(python-gluster)(#1342356)
+
+* Mon Jul 11 2016 Aravinda VK <avishwan@redhat.com>
+- Added Python subpackage "cliutils" under gluster
+
+* Tue May 31 2016 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- broken brp-python-bytecompile in RHEL7 results in installed
+ but unpackaged files.
+
+* Fri May 6 2016 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- additional dirs and files in /var/lib/glusterd/... (#1326410)
+
+* Tue Apr 26 2016 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- %%postun libs w/o firewalld on RHEL6 (#1330583)
+
+* Tue Apr 12 2016 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- additional dirs and files in /var/lib/glusterd/... (#1326410)
+
+* Mon Mar 7 2016 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- %%pre, %%post etc. scriptlet cleanup, ... -p /sbin/ldconfig (#1315024)
+
+* Fri Jan 22 2016 Aravinda VK <avishwan@redhat.com>
+- Added schedule_georep.py script to the glusterfs-geo-replication (#1300956)
+
+* Sat Jan 16 2016 Niels de Vos <ndevos@redhat.com>
+- glusterfs-server depends on -api (#1296931)
+
+* Sun Jan 10 2016 Niels de Vos <ndevos@redhat.com>
+- build system got fixed so that special glupy build is not needed anymore
+
+* Mon Dec 28 2015 Niels de Vos <ndevos@redhat.com>
+- hook scripts in glusterfs-ganesha use dbus-send, add dependency (#1294446)
+
+* Tue Dec 22 2015 Niels de Vos <ndevos@redhat.com>
+- move hook scripts for nfs-ganesha to the -ganesha sub-package
+- use standard 'make' installation for the hook scripts (#1174765)
+
+* Tue Sep 1 2015 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- erroneous ghost of ../hooks/1/delete causes install failure (#1258975)
+
+* Tue Aug 25 2015 Anand Nekkunti <anekkunt@redhat.com>
+- adding glusterfs-firewalld service (#1253967)
+
+* Tue Aug 18 2015 Niels de Vos <ndevos@redhat.com>
+- Include missing directories for glusterfind hooks scripts (#1225465)
+
+* Mon Jun 15 2015 Niels de Vos <ndevos@redhat.com>
+- Replace hook script S31ganesha-set.sh by S31ganesha-start.sh (#1231738)
+
+* Fri Jun 12 2015 Aravinda VK <avishwan@redhat.com>
+- Added rsync as dependency to georeplication rpm (#1231205)
+
+* Tue Jun 02 2015 Aravinda VK <avishwan@redhat.com>
+- Added post hook for volume delete as part of glusterfind (#1225465)
+
+* Wed May 27 2015 Aravinda VK <avishwan@redhat.com>
+- Added stop-all-gluster-processes.sh in glusterfs-server section (#1204641)
+
+* Wed May 20 2015 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- python-gluster should be 'noarch' (#1219954)
+
+* Wed May 20 2015 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- move libgf{db,changelog}.pc from -api-devel to -devel (#1223385)
+
+* Wed May 20 2015 Anand Nekkunti <anekkunt@redhat.com>
+- glusterd.socket file cleanup during post run upgrade (#1210404)
+
+* Tue May 19 2015 Avra Sengupta <asengupt@redhat.com>
+- Added S32gluster_enable_shared_storage.sh as volume set hook script (#1222013)
+
+* Mon May 18 2015 Milind Changire <mchangir@redhat.com>
+- Move file peer_add_secret_pub to the server RPM to support glusterfind (#1221544)
+* Sun May 17 2015 Niels de Vos <ndevos@redhat.com>
+- Fix building on RHEL-5 based distributions (#1222317)
+
+* Tue May 05 2015 Niels de Vos <ndevos@redhat.com>
+- Introduce glusterfs-client-xlators to reduce dependencies (#1195947)
+
+* Wed Apr 15 2015 Humble Chirammal <hchiramm@redhat.com>
+- Introducing python-gluster package to own gluster namespace in sitelib (#1211848)
+
+* Sat Mar 28 2015 Mohammed Rafi KC <rkavunga@redhat.com>
+- Add dependency for librdmacm version >= 1.0.15 (#1206744)
+
+* Tue Mar 24 2015 Niels de Vos <ndevos@redhat.com>
+- move libgfdb (with sqlite dependency) to -server subpackage (#1194753)
+
+* Tue Mar 17 2015 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- glusterfs-ganesha sub-package
+
+* Thu Mar 12 2015 Kotresh H R <khiremat@redhat.com>
+- gfind_missing_files tool is included (#1187140)
+
+* Tue Mar 03 2015 Aravinda VK <avishwan@redhat.com>
+- Included glusterfind files as part of server package.
+
+* Sun Mar 1 2015 Avra Sengupta <asengupt@redhat.com>
+- Added installation of snap-scheduler
+
+* Thu Feb 26 2015 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- enable cmocka unittest support only when asked for (#1067059)
+
+* Tue Feb 24 2015 Niels de Vos <ndevos@redhat.com>
+- POSIX ACL conversion needs BuildRequires libacl-devel (#1185654)
+
+* Wed Feb 18 2015 Andreas Schneider <asn@redhat.com>
+- Change cmockery2 to cmocka.
+
+* Wed Feb 18 2015 Kaushal M <kaushal@redhat.com>
+- add userspace-rcu as a requirement
+
+* Fri Feb 13 2015 Gaurav Kumar Garg <ggarg@redhat.com>
+- .cmd_log_history file should be renamed to cmd_history.log post
+ upgrade (#1165996)
+
+* Fri Jan 30 2015 Nandaja Varma <nvarma@redhat.com>
+- remove checks for rpmbuild/mock from run-tests.sh (#178008)
+
+* Fri Jan 16 2015 Niels de Vos <ndevos@redhat.com>
+- add support for /run/gluster through a tmpfiles.d config file (#1182934)
+
+* Tue Jan 6 2015 Aravinda VK<avishwan@redhat.com>
+- Added new libexec script for mountbroker user management (peer_mountbroker)
+
+* Fri Dec 12 2014 Niels de Vos <ndevos@redhat.com>
+- do not package all /usr/share/glusterfs/* files in regression-tests (#1169005)
+
+* Fri Sep 26 2014 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- smarter logic in %%post server (#1146426)
+
+* Wed Sep 24 2014 Balamurugan Arumugam <barumuga@redhat.com>
+- remove /sbin/ldconfig as interpreter (#1145989)
+
+* Fri Sep 5 2014 Lalatendu Mohanty <lmohanty@redhat.com>
+- Changed the description as "GlusterFS a distributed filesystem"
+
+* Tue Aug 5 2014 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- logrotate files (#1126832)
+
+* Wed Jul 16 2014 Luis Pabon <lpabon@redhat.com>
+- Added cmockery2 dependency
+
+* Fri Jun 27 2014 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- killall --wait in %%post server, (#1113543)
+
+* Thu Jun 19 2014 Humble Chirammal <hchiramm@redhat.com>
+- Added dynamic loading of fuse module with glusterfs-fuse package installation in el5.
+
+* Thu Jun 12 2014 Varun Shastry <vshastry@redhat.com>
+- Add bash completion config to the cli package
+
+* Tue Jun 03 2014 Vikhyat Umrao <vumrao@redhat.com>
+- add nfs-utils package dependency for server package (#1065654)
+
+* Thu May 22 2014 Poornima G <pgurusid@redhat.com>
+- Rename old hookscripts in an RPM-standard way.
+
+* Tue May 20 2014 Niels de Vos <ndevos@redhat.com>
+- Almost drop calling ./autogen.sh
+
+* Fri Apr 25 2014 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- Sync with Fedora spec (#1091408, #1091392)
+
+* Fri Apr 25 2014 Arumugam Balamurugan <barumuga@redhat.com>
+- fix RHEL 7 build failure "Installed (but unpackaged) file(s) found" (#1058188)
+
+* Wed Apr 02 2014 Arumugam Balamurugan <barumuga@redhat.com>
+- cleanup to rearrange spec file elements
+
+* Wed Apr 02 2014 Arumugam Balamurugan <barumuga@redhat.com>
+- add version/release dynamically (#1074919)
+
+* Thu Mar 27 2014 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- attr dependency (#1184626)
+
+* Wed Mar 26 2014 Poornima G <pgurusid@redhat.com>
+- Include the hook scripts of add-brick, volume start, stop and set
+
+* Wed Feb 26 2014 Niels de Vos <ndevos@redhat.com>
+- Drop glusterfs-devel dependency from glusterfs-api (#1065750)
+
+* Wed Feb 19 2014 Justin Clift <justin@gluster.org>
+- Rename gluster.py to glupy.py to avoid namespace conflict (#1018619)
+- Move the main Glupy files into glusterfs-extra-xlators rpm
+- Move the Glupy Translator examples into glusterfs-devel rpm
+
+* Thu Feb 06 2014 Aravinda VK <avishwan@redhat.com>
+- Include geo-replication upgrade scripts and hook scripts.
+
+* Wed Jan 15 2014 Niels de Vos <ndevos@redhat.com>
+- Install /var/lib/glusterd/groups/virt by default
+
+* Sat Jan 4 2014 Niels de Vos <ndevos@redhat.com>
+- The main glusterfs package should not provide glusterfs-libs (#1048489)
+
+* Tue Dec 10 2013 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- Sync with Fedora glusterfs.spec 3.5.0-0.1.qa3
+
+* Fri Oct 11 2013 Harshavardhana <fharshav@redhat.com>
+- Add '_sharedstatedir' macro to `/var/lib` on <= RHEL5 (#1003184)
+
+* Wed Oct 9 2013 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- Sync with Fedora glusterfs.spec 3.4.1-2+
+
+* Wed Oct 9 2013 Niels de Vos <ndevos@redhat.com>
+- glusterfs-api-devel requires glusterfs-devel (#1016938, #1017094)
+
+* Mon Sep 30 2013 Niels de Vos <ndevos@redhat.com>
+- Package gfapi.py into the Python site-packages path (#1005146)
+
+* Tue Sep 17 2013 Harshavardhana <fharshav@redhat.com>
+- Provide a new package called "glusterfs-regression-tests" for standalone
+ regression testing.
+
+* Thu Aug 22 2013 Niels de Vos <ndevos@redhat.com>
+- Correct the day/date for some entries in this changelog (#1000019)
+
+* Wed Aug 7 2013 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- Sync with Fedora glusterfs.spec
+- add Requires
+- add -cli subpackage,
+- fix other minor differences with Fedora glusterfs.spec
+
+* Tue Jul 30 2013 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- Sync with Fedora glusterfs.spec, add glusterfs-libs RPM for oVirt/qemu-kvm
+
+* Thu Jul 25 2013 Csaba Henk <csaba@redhat.com>
+- Added peer_add_secret_pub and peer_gsec_create to %%{_libexecdir}/glusterfs
+
+* Thu Jul 25 2013 Aravinda VK <avishwan@redhat.com>
+- Added gverify.sh to %%{_libexecdir}/glusterfs directory.
+
+* Thu Jul 25 2013 Harshavardhana <fharshav@redhat.com>
+- Allow to build with '--without bd' to disable 'bd' xlator
+
+* Thu Jun 27 2013 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- fix the hardening fix for shlibs, use %%sed macro, shorter ChangeLog
+
+* Wed Jun 26 2013 Niels de Vos <ndevos@redhat.com>
+- move the mount/api xlator to glusterfs-api
+
+* Fri Jun 7 2013 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- Sync with Fedora glusterfs.spec, remove G4S/UFO and Swift
+
+* Mon Mar 4 2013 Niels de Vos <ndevos@redhat.com>
+- Package /var/run/gluster so that statedumps can be created
+
+* Wed Feb 6 2013 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- Sync with Fedora glusterfs.spec
+
+* Tue Dec 11 2012 Filip Pytloun <filip.pytloun@gooddata.com>
+- add sysconfig file
+
+* Thu Oct 25 2012 Niels de Vos <ndevos@redhat.com>
+- Add a sub-package for the OCF resource agents
+
+* Wed Sep 05 2012 Niels de Vos <ndevos@redhat.com>
+- Don't use python-ctypes on SLES (from Jörg Petersen)
+
+* Tue Jul 10 2012 Niels de Vos <ndevos@redhat.com>
+- Include extras/clear_xattrs.sh in the glusterfs-server sub-package
+
+* Thu Jun 07 2012 Niels de Vos <ndevos@redhat.com>
+- Mark /var/lib/glusterd as owned by glusterfs, subdirs belong to -server
+
* Wed May 9 2012 Kaleb S. KEITHLEY <kkeithle[at]redhat.com>
- Add BuildRequires: libxml2-devel so that configure will DTRT on for
- Fedora's Koji build system
@@ -403,7 +2063,7 @@ fi
- Fixed version reporting 3.2git
- Added nfs init script (disabled by default)
-* Fri Sep 1 2011 Joe Julian <me@joejulian.name> - 3.2.3-1
+* Thu Sep 1 2011 Joe Julian <me@joejulian.name> - 3.2.3-1
- Update to 3.2.3
* Tue Jul 19 2011 Joe Julian <me@joejulian.name> - 3.2.2-3
@@ -418,13 +2078,13 @@ fi
* Wed Jul 13 2011 Joe Julian <me@joejulian.name> - 3.2.1-2
- fix hardcoded path to gsyncd in source to match the actual file location
-* Mon Jun 21 2011 Joe Julian <me@joejulian.name> - 3.2.1
+* Tue Jun 21 2011 Joe Julian <me@joejulian.name> - 3.2.1
- Update to 3.2.1
* Mon Jun 20 2011 Joe Julian <me@joejulian.name> - 3.1.5
- Update to 3.1.5
-* Mon May 31 2011 Joe Julian <me@joejulian.name> - 3.1.5-qa1.4
+* Tue May 31 2011 Joe Julian <me@joejulian.name> - 3.1.5-qa1.4
- Current git
* Sun May 29 2011 Joe Julian <me@joejulian.name> - 3.1.5-qa1.2
@@ -437,7 +2097,7 @@ fi
- Add patch to remove forced 64 bit compile
- Obsolete glusterfs-core to allow for upgrading from gluster packaging
-* Sun Mar 19 2011 Jonathan Steffan <jsteffan@fedoraproject.org> - 3.1.3-1
+* Sat Mar 19 2011 Jonathan Steffan <jsteffan@fedoraproject.org> - 3.1.3-1
- Update to 3.1.3
- Merge in more upstream SPEC changes
- Remove patches from GlusterFS bugzilla #2309 and #2311
@@ -446,7 +2106,7 @@ fi
* Sun Feb 06 2011 Jonathan Steffan <jsteffan@fedoraproject.org> - 3.1.2-3
- Add back in legacy SPEC elements to support older branches
-* Tue Feb 03 2011 Jonathan Steffan <jsteffan@fedoraproject.org> - 3.1.2-2
+* Thu Feb 03 2011 Jonathan Steffan <jsteffan@fedoraproject.org> - 3.1.2-2
- Add patches from CloudFS project
* Tue Jan 25 2011 Jonathan Steffan <jsteffan@fedoraproject.org> - 3.1.2-1
@@ -482,7 +2142,7 @@ fi
* Sat Jan 2 2010 Jonathan Steffan <jsteffan@fedoraproject.org> - 2.0.9-1
- Update to 2.0.9
-* Sat Nov 8 2009 Jonathan Steffan <jsteffan@fedoraproject.org> - 2.0.8-1
+* Sun Nov 8 2009 Jonathan Steffan <jsteffan@fedoraproject.org> - 2.0.8-1
- Update to 2.0.8
- Remove install of glusterfs-volgen, it's properly added to
automake upstream now
@@ -550,10 +2210,10 @@ fi
* Fri May 9 2008 Matthias Saou <http://freshrpms.net/> 1.3.8-1
- Update to 1.3.8 final.
-* Tue Apr 23 2008 Matthias Saou <http://freshrpms.net/> 1.3.8-0.10
+* Wed Apr 23 2008 Matthias Saou <http://freshrpms.net/> 1.3.8-0.10
- Include short patch to include fixes from latest TLA 751.
-* Mon Apr 22 2008 Matthias Saou <http://freshrpms.net/> 1.3.8-0.9
+* Tue Apr 22 2008 Matthias Saou <http://freshrpms.net/> 1.3.8-0.9
- Update to 1.3.8pre6.
- Include glusterfs binary in both the client and server packages, now that
glusterfsd is a symlink to it instead of a separate binary.
@@ -597,7 +2257,7 @@ fi
* Wed Nov 21 2007 Matthias Saou <http://freshrpms.net/> 1.3.7-1
- Major spec file cleanup.
-- Add misssing %%clean section.
+- Add missing %%clean section.
- Fix ldconfig calls (weren't set for the proper sub-package).
* Sat Aug 4 2007 Matt Paine <matt@mattsoftware.com> - 1.3.pre7
diff --git a/glusterfsd/src/Makefile.am b/glusterfsd/src/Makefile.am
index 17d7a4a8139..a0a778158d8 100644
--- a/glusterfsd/src/Makefile.am
+++ b/glusterfsd/src/Makefile.am
@@ -1,19 +1,38 @@
sbin_PROGRAMS = glusterfsd
+if WITH_SERVER
+sbin_PROGRAMS += glusterfsd gf_attach
+endif
glusterfsd_SOURCES = glusterfsd.c glusterfsd-mgmt.c
glusterfsd_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
$(top_builddir)/rpc/rpc-lib/src/libgfrpc.la \
- $(top_builddir)/rpc/xdr/src/libgfxdr.la \
- $(GF_LDADD)
-glusterfsd_LDFLAGS = $(GF_LDFLAGS) $(GF_GLUSTERFS_LDFLAGS)
-noinst_HEADERS = glusterfsd.h glusterfsd-mem-types.h
+ $(top_builddir)/rpc/xdr/src/libgfxdr.la $(GF_LDADD) $(LIB_DL)
+glusterfsd_LDFLAGS = $(GF_LDFLAGS)
-AM_CFLAGS = -fPIC -Wall -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -D$(GF_HOST_OS)\
+gf_attach_SOURCES = gf_attach.c
+gf_attach_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
+ $(top_builddir)/api/src/libgfapi.la \
+ $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la \
+ $(top_builddir)/rpc/xdr/src/libgfxdr.la
+gf_attach_LDFLAGS = $(GF_LDFLAGS)
+
+noinst_HEADERS = glusterfsd.h glusterfsd-mem-types.h glusterfsd-messages.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) \
-I$(top_srcdir)/libglusterfs/src -DDATADIR=\"$(localstatedir)\" \
-DCONFDIR=\"$(sysconfdir)/glusterfs\" $(GF_GLUSTERFS_CFLAGS) \
- -I$(top_srcdir)/rpc/rpc-lib/src -I$(top_srcdir)/rpc/xdr/src
+ -DXLATORDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator\" \
+ -DLIBEXECDIR=\"$(GLUSTERFS_LIBEXECDIR)\"\
+ -I$(top_srcdir)/rpc/rpc-lib/src \
+ -I$(top_srcdir)/rpc/xdr/src \
+ -I$(top_builddir)/rpc/xdr/src \
+ -I$(top_srcdir)/xlators/nfs/server/src \
+ -I$(top_srcdir)/xlators/protocol/server/src \
+ -I$(top_srcdir)/api/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
-CLEANFILES =
+CLEANFILES =
$(top_builddir)/libglusterfs/src/libglusterfs.la:
$(MAKE) -C $(top_builddir)/libglusterfs/src/ all
@@ -24,8 +43,12 @@ uninstall-local:
install-data-local:
$(INSTALL) -d -m 755 $(DESTDIR)$(localstatedir)/run
+ $(INSTALL) -d -m 755 $(DESTDIR)$(localstatedir)/run/gluster
$(INSTALL) -d -m 755 $(DESTDIR)$(localstatedir)/log/glusterfs
+ $(INSTALL) -d -m 755 $(DESTDIR)$(sbindir)
rm -f $(DESTDIR)$(sbindir)/glusterfs
- rm -f $(DESTDIR)$(sbindir)/glusterd
ln -s glusterfsd $(DESTDIR)$(sbindir)/glusterfs
+if WITH_SERVER
+ rm -f $(DESTDIR)$(sbindir)/glusterd
ln -s glusterfsd $(DESTDIR)$(sbindir)/glusterd
+endif
diff --git a/glusterfsd/src/gf_attach.c b/glusterfsd/src/gf_attach.c
new file mode 100644
index 00000000000..c553b0b1f61
--- /dev/null
+++ b/glusterfsd/src/gf_attach.c
@@ -0,0 +1,241 @@
+/*
+ * Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <glusterfs/glusterfs.h>
+#include "glfs-internal.h"
+#include "rpc-clnt.h"
+#include "protocol-common.h"
+#include "xdr-generic.h"
+#include "glusterd1-xdr.h"
+
+/* In seconds */
+#define CONNECT_TIMEOUT 60
+#define REPLY_TIMEOUT 120
+
+int done = 0;
+int rpc_status;
+
+pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
+pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
+
+struct rpc_clnt_procedure gf_attach_actors[GLUSTERD_BRICK_MAXVALUE] = {
+ [GLUSTERD_BRICK_NULL] = {"NULL", NULL},
+ [GLUSTERD_BRICK_OP] = {"BRICK_OP", NULL},
+};
+
+struct rpc_clnt_program gf_attach_prog = {
+ .progname = "brick operations",
+ .prognum = GD_BRICK_PROGRAM,
+ .progver = GD_BRICK_VERSION,
+ .proctable = gf_attach_actors,
+ .numproc = GLUSTERD_BRICK_MAXVALUE,
+};
+
+int32_t
+my_callback(struct rpc_req *req, struct iovec *iov, int count, void *frame)
+{
+ pthread_mutex_lock(&mutex);
+ rpc_status = req->rpc_status;
+ done = 1;
+ /* Signal main thread which is the only waiter */
+ pthread_cond_signal(&cond);
+ pthread_mutex_unlock(&mutex);
+ return 0;
+}
+
+/* copied from gd_syncop_submit_request */
+int
+send_brick_req(xlator_t *this, struct rpc_clnt *rpc, char *path, int op)
+{
+ int ret = -1;
+ struct timespec ts;
+ struct iobuf *iobuf = NULL;
+ struct iobref *iobref = NULL;
+ struct iovec iov = {
+ 0,
+ };
+ ssize_t req_size = 0;
+ call_frame_t *frame = NULL;
+ gd1_mgmt_brick_op_req brick_req;
+ void *req = &brick_req;
+
+ brick_req.op = op;
+ brick_req.name = path;
+ brick_req.input.input_val = NULL;
+ brick_req.input.input_len = 0;
+ brick_req.dict.dict_val = NULL;
+ brick_req.dict.dict_len = 0;
+
+ req_size = xdr_sizeof((xdrproc_t)xdr_gd1_mgmt_brick_op_req, req);
+ iobuf = iobuf_get2(rpc->ctx->iobuf_pool, req_size);
+ if (!iobuf)
+ goto out;
+
+ iobref = iobref_new();
+ if (!iobref)
+ goto out;
+
+ iobref_add(iobref, iobuf);
+
+ iov.iov_base = iobuf->ptr;
+ iov.iov_len = iobuf_pagesize(iobuf);
+
+ /* Create the xdr payload */
+ ret = xdr_serialize_generic(iov, req, (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
+ if (ret == -1)
+ goto out;
+
+ iov.iov_len = ret;
+
+ /* Wait for connection */
+ timespec_now_realtime(&ts);
+ ts.tv_sec += CONNECT_TIMEOUT;
+ pthread_mutex_lock(&rpc->conn.lock);
+ {
+ while (!rpc->conn.connected)
+ if (pthread_cond_timedwait(&rpc->conn.cond, &rpc->conn.lock, &ts) ==
+ ETIMEDOUT) {
+ fprintf(stderr, "timeout waiting for RPC connection\n");
+ pthread_mutex_unlock(&rpc->conn.lock);
+ return EXIT_FAILURE;
+ }
+ }
+ pthread_mutex_unlock(&rpc->conn.lock);
+
+ frame = create_frame(this, this->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
+ /* Send the msg */
+ ret = rpc_clnt_submit(rpc, &gf_attach_prog, op, my_callback, &iov, 1, NULL,
+ 0, iobref, frame, NULL, 0, NULL, 0, NULL);
+ if (!ret) {
+ /* OK, wait for callback */
+ timespec_now_realtime(&ts);
+ ts.tv_sec += REPLY_TIMEOUT;
+ pthread_mutex_lock(&mutex);
+ {
+ while (!done)
+ if (pthread_cond_timedwait(&cond, &mutex, &ts) == ETIMEDOUT) {
+ fprintf(stderr, "timeout waiting for RPC reply\n");
+ pthread_mutex_unlock(&mutex);
+ return EXIT_FAILURE;
+ }
+ }
+ pthread_mutex_unlock(&mutex);
+ }
+
+out:
+
+ iobref_unref(iobref);
+ iobuf_unref(iobuf);
+ if (frame)
+ STACK_DESTROY(frame->root);
+
+ if (rpc_status != 0) {
+ fprintf(stderr, "got error %d on RPC\n", rpc_status);
+ return EXIT_FAILURE;
+ }
+
+ printf("OK\n");
+ return EXIT_SUCCESS;
+}
+
+int
+usage(char *prog)
+{
+ fprintf(stderr, "Usage: %s uds_path volfile_path (to attach)\n", prog);
+ fprintf(stderr, " %s -d uds_path brick_path (to detach)\n", prog);
+
+ return EXIT_FAILURE;
+}
+
+int
+main(int argc, char *argv[])
+{
+ glfs_t *fs;
+ struct rpc_clnt *rpc;
+ dict_t *options;
+ int ret;
+ int op = GLUSTERD_BRICK_ATTACH;
+
+ for (;;) {
+ switch (getopt(argc, argv, "d")) {
+ case 'd':
+ op = GLUSTERD_BRICK_TERMINATE;
+ break;
+ case -1:
+ goto done_parsing;
+ default:
+ return usage(argv[0]);
+ }
+ }
+done_parsing:
+ if (optind != (argc - 2)) {
+ return usage(argv[0]);
+ }
+
+ fs = glfs_new("gf-attach");
+ if (!fs) {
+ fprintf(stderr, "glfs_new failed\n");
+ return EXIT_FAILURE;
+ }
+
+ (void)glfs_set_logging(fs, "/dev/stderr", 7);
+ /*
+ * This will actually fail because we haven't defined a volume, but
+ * it will do enough initialization to get us going.
+ */
+ (void)glfs_init(fs);
+
+ options = dict_new();
+ if (!options) {
+ return EXIT_FAILURE;
+ }
+ ret = dict_set_str(options, "transport-type", "socket");
+ if (ret != 0) {
+ fprintf(stderr, "failed to set transport type\n");
+ return EXIT_FAILURE;
+ }
+ ret = dict_set_str(options, "transport.address-family", "unix");
+ if (ret != 0) {
+ fprintf(stderr, "failed to set address family\n");
+ return EXIT_FAILURE;
+ }
+ ret = dict_set_str(options, "transport.socket.connect-path", argv[optind]);
+ if (ret != 0) {
+ fprintf(stderr, "failed to set connect path\n");
+ return EXIT_FAILURE;
+ }
+
+ rpc = rpc_clnt_new(options, fs->ctx->master, "gf-attach-rpc", 0);
+ if (!rpc) {
+ fprintf(stderr, "rpc_clnt_new failed\n");
+ return EXIT_FAILURE;
+ }
+
+ if (rpc_clnt_register_notify(rpc, NULL, NULL) != 0) {
+ fprintf(stderr, "rpc_clnt_register_notify failed\n");
+ return EXIT_FAILURE;
+ }
+
+ if (rpc_clnt_start(rpc) != 0) {
+ fprintf(stderr, "rpc_clnt_start failed\n");
+ return EXIT_FAILURE;
+ }
+
+ return send_brick_req(fs->ctx->master, rpc, argv[optind + 1], op);
+}
diff --git a/glusterfsd/src/glusterfsd-mem-types.h b/glusterfsd/src/glusterfsd-mem-types.h
index a28a7b2e31b..e59b558deb0 100644
--- a/glusterfsd/src/glusterfsd-mem-types.h
+++ b/glusterfsd/src/glusterfsd-mem-types.h
@@ -1,37 +1,27 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __GLUSTERFSD_MEM_TYPES_H__
#define __GLUSTERFSD_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
#define GF_MEM_TYPE_START (gf_common_mt_end + 1)
enum gfd_mem_types_ {
- gfd_mt_xlator_list_t = GF_MEM_TYPE_START,
- gfd_mt_xlator_t,
- gfd_mt_xlator_cmdline_option_t,
- gfd_mt_char,
- gfd_mt_call_pool_t,
- gfd_mt_vol_top_priv_t,
- gfd_mt_end
+ gfd_mt_xlator_list_t = GF_MEM_TYPE_START,
+ gfd_mt_xlator_t,
+ gfd_mt_server_cmdline_t,
+ gfd_mt_xlator_cmdline_option_t,
+ gfd_mt_char,
+ gfd_mt_call_pool_t,
+ gfd_mt_end
};
#endif
diff --git a/glusterfsd/src/glusterfsd-messages.h b/glusterfsd/src/glusterfsd-messages.h
new file mode 100644
index 00000000000..0cdbffa71ea
--- /dev/null
+++ b/glusterfsd/src/glusterfsd-messages.h
@@ -0,0 +1,93 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLUSTERFSD_MESSAGES_H_
+#define _GLUSTERFSD_MESSAGES_H_
+
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(
+ GLUSTERFSD, glusterfsd_msg_1, glusterfsd_msg_2, glusterfsd_msg_3,
+ glusterfsd_msg_4, glusterfsd_msg_5, glusterfsd_msg_6, glusterfsd_msg_7,
+ glusterfsd_msg_8, glusterfsd_msg_9, glusterfsd_msg_10, glusterfsd_msg_11,
+ glusterfsd_msg_12, glusterfsd_msg_13, glusterfsd_msg_14, glusterfsd_msg_15,
+ glusterfsd_msg_16, glusterfsd_msg_17, glusterfsd_msg_18, glusterfsd_msg_19,
+ glusterfsd_msg_20, glusterfsd_msg_21, glusterfsd_msg_22, glusterfsd_msg_23,
+ glusterfsd_msg_24, glusterfsd_msg_25, glusterfsd_msg_26, glusterfsd_msg_27,
+ glusterfsd_msg_28, glusterfsd_msg_29, glusterfsd_msg_30, glusterfsd_msg_31,
+ glusterfsd_msg_32, glusterfsd_msg_33, glusterfsd_msg_34, glusterfsd_msg_35,
+ glusterfsd_msg_36, glusterfsd_msg_37, glusterfsd_msg_38, glusterfsd_msg_39,
+ glusterfsd_msg_40, glusterfsd_msg_41, glusterfsd_msg_42, glusterfsd_msg_43,
+ glusterfsd_msg_029, glusterfsd_msg_041, glusterfsd_msg_042);
+
+#define glusterfsd_msg_1_STR "Could not create absolute mountpoint path"
+#define glusterfsd_msg_2_STR "Could not get current working directory"
+#define glusterfsd_msg_4_STR "failed to set mount-point to options dictionary"
+#define glusterfsd_msg_3_STR "failed to set dict value for key"
+#define glusterfsd_msg_5_STR "failed to set disable for key"
+#define glusterfsd_msg_6_STR "failed to set enable for key"
+#define glusterfsd_msg_7_STR \
+ "Not a client process, not performing mount operation"
+#define glusterfsd_msg_8_STR "MOUNT_POINT initialization failed"
+#define glusterfsd_msg_9_STR "loading volume file failed"
+#define glusterfsd_msg_10_STR "xlator option is invalid"
+#define glusterfsd_msg_11_STR "Fetching the volume file from server..."
+#define glusterfsd_msg_12_STR "volume initialization failed"
+#define glusterfsd_msg_34_STR "memory init failed"
+#define glusterfsd_msg_13_STR "ERROR: glusterfs uuid generation failed"
+#define glusterfsd_msg_14_STR "ERROR: glusterfs pool creation failed"
+#define glusterfsd_msg_15_STR \
+ "ERROR: '--volfile-id' is mandatory if '-s' OR '--volfile-server' option " \
+ "is given"
+#define glusterfsd_msg_16_STR "ERROR: parsing the volfile failed"
+#define glusterfsd_msg_33_STR \
+ "obsolete option '--volfile-max-fecth-attempts or fetch-attempts' was " \
+ "provided"
+#define glusterfsd_msg_17_STR "pidfile open failed"
+#define glusterfsd_msg_18_STR "pidfile lock failed"
+#define glusterfsd_msg_20_STR "pidfile truncation failed"
+#define glusterfsd_msg_21_STR "pidfile write failed"
+#define glusterfsd_msg_22_STR "failed to exeute pthread_sigmask"
+#define glusterfsd_msg_23_STR "failed to create pthread"
+#define glusterfsd_msg_24_STR "daemonization failed"
+#define glusterfsd_msg_25_STR "mount failed"
+#define glusterfsd_msg_26_STR "failed to construct the graph"
+#define glusterfsd_msg_27_STR "fuse xlator cannot be specified in volume file"
+#define glusterfsd_msg_28_STR "Cannot reach volume specification file"
+#define glusterfsd_msg_29_STR "ERROR: glusterfsd context not initialized"
+#define glusterfsd_msg_43_STR \
+ "command line argument --brick-mux is valid only for brick process"
+#define glusterfsd_msg_029_STR "failed to create command line string"
+#define glusterfsd_msg_30_STR "Started running version"
+#define glusterfsd_msg_31_STR "Could not create new sync-environment"
+#define glusterfsd_msg_40_STR "No change in volfile, countinuing"
+#define glusterfsd_msg_39_STR "Unable to create/delete temporary file"
+#define glusterfsd_msg_38_STR \
+ "Not processing brick-op since volume graph is not yet active"
+#define glusterfsd_msg_35_STR "rpc req buffer unserialization failed"
+#define glusterfsd_msg_36_STR "problem in xlator loading"
+#define glusterfsd_msg_37_STR "failed to get dict value"
+#define glusterfsd_msg_41_STR "received attach request for volfile"
+#define glusterfsd_msg_42_STR "failed to unserialize xdata to dictionary"
+#define glusterfsd_msg_041_STR "can't detach. flie not found"
+#define glusterfsd_msg_042_STR \
+ "couldnot detach old graph. Aborting the reconfiguration operation"
+
+#endif /* !_GLUSTERFSD_MESSAGES_H_ */
diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c
index 5f4fc617b65..eaf6796e4c3 100644
--- a/glusterfsd/src/glusterfsd-mgmt.c
+++ b/glusterfsd/src/glusterfsd-mgmt.c
@@ -1,2169 +1,3055 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <stdio.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <stdlib.h>
#include <signal.h>
-#include <pthread.h>
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif /* _CONFIG_H */
-#include "glusterfs.h"
-#include "stack.h"
-#include "dict.h"
-#include "event.h"
-#include "defaults.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/gf-event.h>
+#include <glusterfs/defaults.h>
#include "rpc-clnt.h"
#include "protocol-common.h"
+#include "glusterfsd-messages.h"
#include "glusterfs3.h"
#include "portmap-xdr.h"
#include "xdr-generic.h"
#include "glusterfsd.h"
-#include "glusterfsd-mem-types.h"
#include "rpcsvc.h"
#include "cli1-xdr.h"
-#include "statedump.h"
-#include "syncop.h"
+#include <glusterfs/statedump.h>
+#include <glusterfs/syncop.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/syscall.h>
+#include <glusterfs/monitoring.h>
+#include "server.h"
+
+static gf_boolean_t is_mgmt_rpc_reconnect = _gf_false;
+int need_emancipate = 0;
+
+int
+glusterfs_mgmt_pmap_signin(glusterfs_ctx_t *ctx);
+int
+glusterfs_volfile_fetch(glusterfs_ctx_t *ctx);
+int
+glusterfs_process_volfp(glusterfs_ctx_t *ctx, FILE *fp);
+int
+emancipate(glusterfs_ctx_t *ctx, int ret);
+int
+glusterfs_process_svc_attach_volfp(glusterfs_ctx_t *ctx, FILE *fp,
+ char *volfile_id, char *checksum,
+ dict_t *dict);
+int
+glusterfs_mux_volfile_reconfigure(FILE *newvolfile_fp, glusterfs_ctx_t *ctx,
+ gf_volfile_t *volfile_obj, char *checksum,
+ dict_t *dict);
+int
+glusterfs_process_svc_attach_volfp(glusterfs_ctx_t *ctx, FILE *fp,
+ char *volfile_id, char *checksum,
+ dict_t *dict);
+int
+glusterfs_process_svc_detach(glusterfs_ctx_t *ctx, gf_volfile_t *volfile_obj);
-static char is_mgmt_rpc_reconnect;
+gf_boolean_t
+mgmt_is_multiplexed_daemon(char *name);
-int glusterfs_mgmt_pmap_signin (glusterfs_ctx_t *ctx);
-int glusterfs_volfile_fetch (glusterfs_ctx_t *ctx);
-int glusterfs_process_volfp (glusterfs_ctx_t *ctx, FILE *fp);
-int glusterfs_graph_unknown_options (glusterfs_graph_t *graph);
+static int
+glusterfs_volume_top_perf(const char *brick_path, dict_t *dict,
+ gf_boolean_t write_test);
int
-mgmt_cbk_spec (void *data)
+mgmt_cbk_spec(struct rpc_clnt *rpc, void *mydata, void *data)
{
- glusterfs_ctx_t *ctx = NULL;
+ glusterfs_ctx_t *ctx = NULL;
- ctx = glusterfs_ctx_get ();
- gf_log ("mgmt", GF_LOG_INFO, "Volume file changed");
+ ctx = glusterfsd_ctx;
+ gf_log("mgmt", GF_LOG_INFO, "Volume file changed");
- glusterfs_volfile_fetch (ctx);
- return 0;
+ glusterfs_volfile_fetch(ctx);
+ return 0;
}
-
int
-mgmt_cbk_event (void *data)
-{
- return 0;
-}
-struct iobuf *
-glusterfs_serialize_reply (rpcsvc_request_t *req, void *arg,
- struct iovec *outmsg, xdrproc_t xdrproc)
+mgmt_process_volfile(const char *volfile, ssize_t size, char *volfile_id,
+ dict_t *dict)
{
- struct iobuf *iob = NULL;
- ssize_t retlen = -1;
- ssize_t xdr_size = 0;
+ glusterfs_ctx_t *ctx = NULL;
+ int ret = 0;
+ FILE *tmpfp = NULL;
+ gf_volfile_t *volfile_obj = NULL;
+ gf_volfile_t *volfile_tmp = NULL;
+ char sha256_hash[SHA256_DIGEST_LENGTH] = {
+ 0,
+ };
+ int tmp_fd = -1;
+ char template[] = "/tmp/glfs.volfile.XXXXXX";
+
+ glusterfs_compute_sha256((const unsigned char *)volfile, size, sha256_hash);
+ ctx = THIS->ctx;
+ LOCK(&ctx->volfile_lock);
+ {
+ list_for_each_entry(volfile_obj, &ctx->volfile_list, volfile_list)
+ {
+ if (!strcmp(volfile_id, volfile_obj->vol_id)) {
+ if (!memcmp(sha256_hash, volfile_obj->volfile_checksum,
+ sizeof(volfile_obj->volfile_checksum))) {
+ UNLOCK(&ctx->volfile_lock);
+ gf_smsg(THIS->name, GF_LOG_INFO, 0, glusterfsd_msg_40,
+ NULL);
+ goto out;
+ }
+ volfile_tmp = volfile_obj;
+ break;
+ }
+ }
- /* First, get the io buffer into which the reply in arg will
- * be serialized.
- */
- xdr_size = xdr_sizeof (xdrproc, arg);
- iob = iobuf_get2 (req->svc->ctx->iobuf_pool, xdr_size);
- if (!iob) {
- gf_log (THIS->name, GF_LOG_ERROR, "Failed to get iobuf");
- goto ret;
+ /* coverity[secure_temp] mkstemp uses 0600 as the mode */
+ tmp_fd = mkstemp(template);
+ if (-1 == tmp_fd) {
+ UNLOCK(&ctx->volfile_lock);
+ gf_smsg(THIS->name, GF_LOG_ERROR, 0, glusterfsd_msg_39,
+ "create template=%s", template, NULL);
+ ret = -1;
+ goto out;
}
- iobuf_to_iovec (iob, outmsg);
- /* Use the given serializer to translate the give C structure in arg
- * to XDR format which will be written into the buffer in outmsg.
+ /* Calling unlink so that when the file is closed or program
+ * terminates the temporary file is deleted.
*/
- /* retlen is used to received the error since size_t is unsigned and we
- * need -1 for error notification during encoding.
- */
- retlen = xdr_serialize_generic (*outmsg, arg, xdrproc);
- if (retlen == -1) {
- gf_log (THIS->name, GF_LOG_ERROR, "Failed to encode message");
- goto ret;
+ ret = sys_unlink(template);
+ if (ret < 0) {
+ gf_smsg(THIS->name, GF_LOG_INFO, 0, glusterfsd_msg_39,
+ "delete template=%s", template, NULL);
+ ret = 0;
}
- outmsg->iov_len = retlen;
-ret:
- if (retlen == -1) {
- iob = NULL;
+ tmpfp = fdopen(tmp_fd, "w+b");
+ if (!tmpfp) {
+ ret = -1;
+ goto unlock;
+ }
+
+ fwrite(volfile, size, 1, tmpfp);
+ fflush(tmpfp);
+ if (ferror(tmpfp)) {
+ ret = -1;
+ goto unlock;
}
- return iob;
+ if (!volfile_tmp) {
+ /* There is no checksum in the list, which means simple attach
+ * the volfile
+ */
+ ret = glusterfs_process_svc_attach_volfp(ctx, tmpfp, volfile_id,
+ sha256_hash, dict);
+ goto unlock;
+ }
+ ret = glusterfs_mux_volfile_reconfigure(tmpfp, ctx, volfile_obj,
+ sha256_hash, dict);
+ if (ret < 0) {
+ gf_msg_debug("glusterfsd-mgmt", EINVAL, "Reconfigure failed !!");
+ }
+ }
+unlock:
+ UNLOCK(&ctx->volfile_lock);
+out:
+ if (tmpfp)
+ fclose(tmpfp);
+ else if (tmp_fd != -1)
+ sys_close(tmp_fd);
+ return ret;
}
int
-glusterfs_submit_reply (rpcsvc_request_t *req, void *arg,
- struct iovec *payload, int payloadcount,
- struct iobref *iobref, xdrproc_t xdrproc)
+mgmt_cbk_event(struct rpc_clnt *rpc, void *mydata, void *data)
{
- struct iobuf *iob = NULL;
- int ret = -1;
- struct iovec rsp = {0,};
- char new_iobref = 0;
+ return 0;
+}
- if (!req) {
- GF_ASSERT (req);
- goto out;
- }
+struct iobuf *
+glusterfs_serialize_reply(rpcsvc_request_t *req, void *arg,
+ struct iovec *outmsg, xdrproc_t xdrproc)
+{
+ struct iobuf *iob = NULL;
+ ssize_t retlen = -1;
+ ssize_t xdr_size = 0;
+
+ /* First, get the io buffer into which the reply in arg will
+ * be serialized.
+ */
+ xdr_size = xdr_sizeof(xdrproc, arg);
+ iob = iobuf_get2(req->svc->ctx->iobuf_pool, xdr_size);
+ if (!iob) {
+ gf_log(THIS->name, GF_LOG_ERROR, "Failed to get iobuf");
+ goto ret;
+ }
+
+ iobuf_to_iovec(iob, outmsg);
+ /* Use the given serializer to translate the give C structure in arg
+ * to XDR format which will be written into the buffer in outmsg.
+ */
+ /* retlen is used to received the error since size_t is unsigned and we
+ * need -1 for error notification during encoding.
+ */
+ retlen = xdr_serialize_generic(*outmsg, arg, xdrproc);
+ if (retlen == -1) {
+ gf_log(THIS->name, GF_LOG_ERROR, "Failed to encode message");
+ GF_FREE(iob);
+ goto ret;
+ }
+
+ outmsg->iov_len = retlen;
+ret:
+ if (retlen == -1) {
+ iob = NULL;
+ }
- if (!iobref) {
- iobref = iobref_new ();
- if (!iobref) {
- gf_log (THIS->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
+ return iob;
+}
- new_iobref = 1;
+int
+glusterfs_submit_reply(rpcsvc_request_t *req, void *arg, struct iovec *payload,
+ int payloadcount, struct iobref *iobref,
+ xdrproc_t xdrproc)
+{
+ struct iobuf *iob = NULL;
+ int ret = -1;
+ struct iovec rsp = {
+ 0,
+ };
+ char new_iobref = 0;
+
+ if (!req) {
+ GF_ASSERT(req);
+ goto out;
+ }
+
+ if (!iobref) {
+ iobref = iobref_new();
+ if (!iobref) {
+ gf_log(THIS->name, GF_LOG_ERROR, "out of memory");
+ goto out;
}
- iob = glusterfs_serialize_reply (req, arg, &rsp, xdrproc);
- if (!iob) {
- gf_log_callingfn (THIS->name, GF_LOG_ERROR, "Failed to serialize reply");
- } else {
- iobref_add (iobref, iob);
- }
+ new_iobref = 1;
+ }
- ret = rpcsvc_submit_generic (req, &rsp, 1, payload, payloadcount,
- iobref);
+ iob = glusterfs_serialize_reply(req, arg, &rsp, xdrproc);
+ if (!iob) {
+ gf_log_callingfn(THIS->name, GF_LOG_ERROR, "Failed to serialize reply");
+ } else {
+ iobref_add(iobref, iob);
+ }
- /* Now that we've done our job of handing the message to the RPC layer
- * we can safely unref the iob in the hope that RPC layer must have
- * ref'ed the iob on receiving into the txlist.
- */
- if (ret == -1) {
- gf_log (THIS->name, GF_LOG_ERROR, "Reply submission failed");
- goto out;
- }
+ ret = rpcsvc_submit_generic(req, &rsp, 1, payload, payloadcount, iobref);
- ret = 0;
+ /* Now that we've done our job of handing the message to the RPC layer
+ * we can safely unref the iob in the hope that RPC layer must have
+ * ref'ed the iob on receiving into the txlist.
+ */
+ if (ret == -1) {
+ gf_log(THIS->name, GF_LOG_ERROR, "Reply submission failed");
+ goto out;
+ }
+
+ ret = 0;
out:
- if (iob)
- iobuf_unref (iob);
+ if (iob)
+ iobuf_unref(iob);
- if (new_iobref && iobref)
- iobref_unref (iobref);
+ if (new_iobref && iobref)
+ iobref_unref(iobref);
- return ret;
+ return ret;
}
int
-glusterfs_terminate_response_send (rpcsvc_request_t *req, int op_ret)
+glusterfs_terminate_response_send(rpcsvc_request_t *req, int op_ret)
{
- gd1_mgmt_brick_op_rsp rsp = {0,};
- dict_t *dict = NULL;
- int ret = 0;
-
- rsp.op_ret = op_ret;
- rsp.op_errno = 0;
- rsp.op_errstr = "";
- dict = dict_new ();
-
- if (dict)
- ret = dict_allocate_and_serialize (dict, &rsp.output.output_val,
- (size_t *)&rsp.output.output_len);
-
+ gd1_mgmt_brick_op_rsp rsp = {
+ 0,
+ };
+ dict_t *dict = NULL;
+ int ret = 0;
+
+ rsp.op_ret = op_ret;
+ rsp.op_errno = 0;
+ rsp.op_errstr = "";
+ dict = dict_new();
+
+ if (dict)
+ ret = dict_allocate_and_serialize(dict, &rsp.output.output_val,
+ &rsp.output.output_len);
+
+ if (ret == 0)
+ ret = glusterfs_submit_reply(req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp);
- if (ret == 0)
- ret = glusterfs_submit_reply (req, &rsp, NULL, 0, NULL,
- (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp);
+ GF_FREE(rsp.output.output_val);
+ if (dict)
+ dict_unref(dict);
+ return ret;
+}
- if (rsp.output.output_val)
- GF_FREE (rsp.output.output_val);
- if (dict)
- dict_unref (dict);
- return ret;
+int
+glusterfs_handle_terminate(rpcsvc_request_t *req)
+{
+ gd1_mgmt_brick_op_req xlator_req = {
+ 0,
+ };
+ ssize_t ret;
+ glusterfs_ctx_t *ctx = NULL;
+ xlator_t *top = NULL;
+ xlator_t *victim = NULL;
+ xlator_t *tvictim = NULL;
+ xlator_list_t **trav_p = NULL;
+ gf_boolean_t lockflag = _gf_false;
+ gf_boolean_t still_bricks_attached = _gf_false;
+
+ ret = xdr_to_generic(req->msg[0], &xlator_req,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
+ if (ret < 0) {
+ req->rpc_err = GARBAGE_ARGS;
+ return -1;
+ }
+ ctx = glusterfsd_ctx;
+
+ LOCK(&ctx->volfile_lock);
+ {
+ /* Find the xlator_list_t that points to our victim. */
+ if (glusterfsd_ctx->active) {
+ top = glusterfsd_ctx->active->first;
+ for (trav_p = &top->children; *trav_p; trav_p = &(*trav_p)->next) {
+ victim = (*trav_p)->xlator;
+ if (!victim->cleanup_starting &&
+ strcmp(victim->name, xlator_req.name) == 0) {
+ break;
+ }
+ }
+ }
+
+ if (!top)
+ goto err;
+ }
+ if (!*trav_p) {
+ gf_log(THIS->name, GF_LOG_ERROR, "can't terminate %s - not found",
+ xlator_req.name);
+ /*
+ * Used to be -ENOENT. However, the caller asked us to
+ * make sure it's down and if it's already down that's
+ * good enough.
+ */
+ glusterfs_terminate_response_send(req, 0);
+ goto err;
+ }
+
+ glusterfs_terminate_response_send(req, 0);
+ for (trav_p = &top->children; *trav_p; trav_p = &(*trav_p)->next) {
+ tvictim = (*trav_p)->xlator;
+ if (!tvictim->cleanup_starting &&
+ !strcmp(tvictim->name, xlator_req.name)) {
+ continue;
+ }
+ if (!tvictim->cleanup_starting) {
+ still_bricks_attached = _gf_true;
+ break;
+ }
+ }
+ if (!still_bricks_attached) {
+ gf_log(THIS->name, GF_LOG_INFO,
+ "terminating after loss of last child %s", xlator_req.name);
+ rpc_clnt_mgmt_pmap_signout(glusterfsd_ctx, xlator_req.name);
+ kill(getpid(), SIGTERM);
+ } else {
+ /* TODO cleanup sequence needs to be done properly for
+ Quota and Changelog
+ */
+ if (victim->cleanup_starting)
+ goto err;
+
+ rpc_clnt_mgmt_pmap_signout(glusterfsd_ctx, xlator_req.name);
+ victim->cleanup_starting = 1;
+
+ UNLOCK(&ctx->volfile_lock);
+ lockflag = _gf_true;
+
+ gf_log(THIS->name, GF_LOG_INFO,
+ "detaching not-only"
+ " child %s",
+ xlator_req.name);
+ top->notify(top, GF_EVENT_CLEANUP, victim);
+ }
+err:
+ if (!lockflag)
+ UNLOCK(&ctx->volfile_lock);
+ if (xlator_req.input.input_val)
+ free(xlator_req.input.input_val);
+ if (xlator_req.dict.dict_val)
+ free(xlator_req.dict.dict_val);
+ free(xlator_req.name);
+ xlator_req.name = NULL;
+ return 0;
}
int
-glusterfs_handle_terminate (rpcsvc_request_t *req)
+glusterfs_translator_info_response_send(rpcsvc_request_t *req, int ret,
+ char *msg, dict_t *output)
{
+ gd1_mgmt_brick_op_rsp rsp = {
+ 0,
+ };
+ gf_boolean_t free_ptr = _gf_false;
+ GF_ASSERT(req);
+
+ rsp.op_ret = ret;
+ rsp.op_errno = 0;
+ if (ret && msg && msg[0])
+ rsp.op_errstr = msg;
+ else
+ rsp.op_errstr = "";
- glusterfs_terminate_response_send (req, 0);
- cleanup_and_exit (SIGTERM);
- return 0;
+ ret = -1;
+ if (output) {
+ ret = dict_allocate_and_serialize(output, &rsp.output.output_val,
+ &rsp.output.output_len);
+ }
+ if (!ret)
+ free_ptr = _gf_true;
+
+ glusterfs_submit_reply(req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp);
+ ret = 0;
+ if (free_ptr)
+ GF_FREE(rsp.output.output_val);
+ return ret;
}
int
-glusterfs_translator_info_response_send (rpcsvc_request_t *req, int ret,
- char *msg, dict_t *output)
+glusterfs_xlator_op_response_send(rpcsvc_request_t *req, int op_ret, char *msg,
+ dict_t *output)
{
- gd1_mgmt_brick_op_rsp rsp = {0,};
- gf_boolean_t free_ptr = _gf_false;
- GF_ASSERT (req);
+ gd1_mgmt_brick_op_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ gf_boolean_t free_ptr = _gf_false;
+ GF_ASSERT(req);
+
+ rsp.op_ret = op_ret;
+ rsp.op_errno = 0;
+ if (op_ret && msg && msg[0])
+ rsp.op_errstr = msg;
+ else
+ rsp.op_errstr = "";
- rsp.op_ret = ret;
- rsp.op_errno = 0;
- if (ret && msg && msg[0])
- rsp.op_errstr = msg;
- else
- rsp.op_errstr = "";
+ if (output) {
+ ret = dict_allocate_and_serialize(output, &rsp.output.output_val,
+ &rsp.output.output_len);
+ }
+ if (!ret)
+ free_ptr = _gf_true;
- ret = -1;
- if (output) {
- ret = dict_allocate_and_serialize (output,
- &rsp.output.output_val,
- (size_t *)&rsp.output.output_len);
- }
- if (!ret)
- free_ptr = _gf_true;
+ ret = glusterfs_submit_reply(req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp);
- glusterfs_submit_reply (req, &rsp, NULL, 0, NULL,
- (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp);
- ret = 0;
- if (free_ptr)
- GF_FREE (rsp.output.output_val);
- return ret;
+ if (free_ptr)
+ GF_FREE(rsp.output.output_val);
+
+ return ret;
}
int
-glusterfs_handle_translator_info_get_cont (gfd_vol_top_priv_t *priv)
+glusterfs_handle_translator_info_get(rpcsvc_request_t *req)
{
- int ret = -1;
- xlator_t *any = NULL;
- xlator_t *xlator = NULL;
- glusterfs_graph_t *active = NULL;
- glusterfs_ctx_t *ctx = NULL;
- char msg[2048] = {0,};
- dict_t *output = NULL;
- dict_t *dict = NULL;
-
- GF_ASSERT (priv);
-
- dict = dict_new ();
- ret = dict_unserialize (priv->xlator_req.input.input_val,
- priv->xlator_req.input.input_len, &dict);
- if (ret) {
- gf_log ("glusterd", GF_LOG_ERROR, "Unable to unserialize dict");
- goto cont;
- }
- ret = dict_set_double (dict, "time", priv->time);
- if (ret)
- goto cont;
- ret = dict_set_double (dict, "throughput", priv->throughput);
- if (ret)
- goto cont;
+ int32_t ret = -1;
+ gd1_mgmt_brick_op_req xlator_req = {
+ 0,
+ };
+ dict_t *dict = NULL;
+ xlator_t *this = NULL;
+ gf1_cli_top_op top_op = 0;
+ xlator_t *any = NULL;
+ xlator_t *xlator = NULL;
+ glusterfs_graph_t *active = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ char msg[2048] = {
+ 0,
+ };
+ dict_t *output = NULL;
+
+ GF_ASSERT(req);
+ this = THIS;
+ GF_ASSERT(this);
+
+ ret = xdr_to_generic(req->msg[0], &xlator_req,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
+ if (ret < 0) {
+ // failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ dict = dict_new();
+ ret = dict_unserialize(xlator_req.input.input_val,
+ xlator_req.input.input_len, &dict);
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ goto out;
+ }
+
+ ret = dict_get_int32(dict, "top-op", (int32_t *)&top_op);
+ if (ret)
+ goto cont;
+ if (GF_CLI_TOP_READ_PERF == top_op) {
+ ret = glusterfs_volume_top_perf(xlator_req.name, dict, _gf_false);
+ } else if (GF_CLI_TOP_WRITE_PERF == top_op) {
+ ret = glusterfs_volume_top_perf(xlator_req.name, dict, _gf_true);
+ }
cont:
- ctx = glusterfs_ctx_get ();
- GF_ASSERT (ctx);
- active = ctx->active;
- any = active->first;
+ ctx = glusterfsd_ctx;
+ GF_ASSERT(ctx);
+ active = ctx->active;
+ if (active == NULL) {
+ gf_log(THIS->name, GF_LOG_ERROR, "ctx->active returned NULL");
+ ret = -1;
+ goto out;
+ }
+ any = active->first;
- xlator = xlator_search_by_name (any, priv->xlator_req.name);
+ xlator = get_xlator_by_name(any, xlator_req.name);
+ if (!xlator) {
+ ret = -1;
+ snprintf(msg, sizeof(msg), "xlator %s is not loaded", xlator_req.name);
+ goto out;
+ }
+
+ if (strcmp(xlator->type, "debug/io-stats")) {
+ xlator = get_xlator_by_type(xlator, "debug/io-stats");
if (!xlator) {
- snprintf (msg, sizeof (msg), "xlator %s is not loaded",
- priv->xlator_req.name);
- goto out;
+ ret = -1;
+ snprintf(msg, sizeof(msg),
+ "xlator-type debug/io-stats is not loaded");
+ goto out;
}
+ }
- output = dict_new ();
- ret = xlator->notify (xlator, GF_EVENT_TRANSLATOR_INFO, dict, output);
+ output = dict_new();
+ ret = xlator->notify(xlator, GF_EVENT_TRANSLATOR_INFO, dict, output);
out:
- ret = glusterfs_translator_info_response_send (priv->req, ret,
- msg, output);
-
- if (priv->xlator_req.name)
- free (priv->xlator_req.name);
- if (priv->xlator_req.input.input_val)
- free (priv->xlator_req.input.input_val);
- if (dict)
- dict_unref (dict);
- if (output)
- dict_unref (output);
- GF_FREE (priv);
-
- return ret;
+ ret = glusterfs_translator_info_response_send(req, ret, msg, output);
+
+ free(xlator_req.name);
+ free(xlator_req.input.input_val);
+ if (xlator_req.dict.dict_val)
+ free(xlator_req.dict.dict_val);
+ if (output)
+ dict_unref(output);
+ if (dict)
+ dict_unref(dict);
+ return ret;
}
-int
-glusterfs_xlator_op_response_send (rpcsvc_request_t *req, int op_ret,
- char *msg, dict_t *output)
+static int
+glusterfs_volume_top_perf(const char *brick_path, dict_t *dict,
+ gf_boolean_t write_test)
{
- gd1_mgmt_brick_op_rsp rsp = {0,};
- int ret = -1;
- gf_boolean_t free_ptr = _gf_false;
- GF_ASSERT (req);
-
- rsp.op_ret = op_ret;
- rsp.op_errno = 0;
- if (op_ret && msg && msg[0])
- rsp.op_errstr = msg;
- else
- rsp.op_errstr = "";
-
- if (output) {
- ret = dict_allocate_and_serialize (output,
- &rsp.output.output_val,
- (size_t *)&rsp.output.output_len);
- }
- if (!ret)
- free_ptr = _gf_true;
-
- ret = glusterfs_submit_reply (req, &rsp, NULL, 0, NULL,
- (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp);
-
- if (free_ptr)
- GF_FREE (rsp.output.output_val);
+ int32_t fd = -1;
+ int32_t output_fd = -1;
+ char export_path[PATH_MAX] = {
+ 0,
+ };
+ char *buf = NULL;
+ int32_t iter = 0;
+ int32_t ret = -1;
+ uint64_t total_blks = 0;
+ uint32_t blk_size;
+ uint32_t blk_count;
+ double throughput = 0;
+ double time = 0;
+ struct timeval begin, end = {
+ 0,
+ };
+
+ GF_ASSERT(brick_path);
+
+ ret = dict_get_uint32(dict, "blk-size", &blk_size);
+ if (ret)
+ goto out;
+ ret = dict_get_uint32(dict, "blk-cnt", &blk_count);
+ if (ret)
+ goto out;
+
+ if (!(blk_size > 0) || !(blk_count > 0))
+ goto out;
+
+ buf = GF_CALLOC(1, blk_size * sizeof(*buf), gf_common_mt_char);
+ if (!buf) {
+ ret = -1;
+ gf_log("glusterd", GF_LOG_ERROR, "Could not allocate memory");
+ goto out;
+ }
+
+ snprintf(export_path, sizeof(export_path), "%s/%s", brick_path,
+ ".gf-tmp-stats-perf");
+ fd = open(export_path, O_CREAT | O_RDWR, S_IRWXU);
+ if (-1 == fd) {
+ ret = -1;
+ gf_log("glusterd", GF_LOG_ERROR, "Could not open tmp file");
+ goto out;
+ }
+
+ gettimeofday(&begin, NULL);
+ for (iter = 0; iter < blk_count; iter++) {
+ ret = sys_write(fd, buf, blk_size);
+ if (ret != blk_size) {
+ ret = -1;
+ goto out;
+ }
+ total_blks += ret;
+ }
+ gettimeofday(&end, NULL);
+ if (total_blks != ((uint64_t)blk_size * blk_count)) {
+ gf_log("glusterd", GF_LOG_WARNING, "Error in write");
+ ret = -1;
+ goto out;
+ }
+
+ time = gf_tvdiff(&begin, &end);
+ throughput = total_blks / time;
+ gf_log("glusterd", GF_LOG_INFO,
+ "Throughput %.2f Mbps time %.2f secs "
+ "bytes written %" PRId64,
+ throughput, time, total_blks);
+
+ /* if it's a write test, we are done. Otherwise, we continue to the read
+ * part */
+ if (write_test == _gf_true) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = sys_fsync(fd);
+ if (ret) {
+ gf_log("glusterd", GF_LOG_ERROR, "could not flush cache");
+ goto out;
+ }
+ ret = sys_lseek(fd, 0L, 0);
+ if (ret != 0) {
+ gf_log("glusterd", GF_LOG_ERROR, "could not seek back to start");
+ ret = -1;
+ goto out;
+ }
- return ret;
+ output_fd = open("/dev/null", O_RDWR);
+ if (-1 == output_fd) {
+ ret = -1;
+ gf_log("glusterd", GF_LOG_ERROR, "Could not open output file");
+ goto out;
+ }
+
+ total_blks = 0;
+
+ gettimeofday(&begin, NULL);
+ for (iter = 0; iter < blk_count; iter++) {
+ ret = sys_read(fd, buf, blk_size);
+ if (ret != blk_size) {
+ ret = -1;
+ goto out;
+ }
+ ret = sys_write(output_fd, buf, blk_size);
+ if (ret != blk_size) {
+ ret = -1;
+ goto out;
+ }
+ total_blks += ret;
+ }
+ gettimeofday(&end, NULL);
+ if (total_blks != ((uint64_t)blk_size * blk_count)) {
+ ret = -1;
+ gf_log("glusterd", GF_LOG_WARNING, "Error in read");
+ goto out;
+ }
+
+ time = gf_tvdiff(&begin, &end);
+ throughput = total_blks / time;
+ gf_log("glusterd", GF_LOG_INFO,
+ "Throughput %.2f Mbps time %.2f secs "
+ "bytes read %" PRId64,
+ throughput, time, total_blks);
+ ret = 0;
+out:
+ if (fd >= 0)
+ sys_close(fd);
+ if (output_fd >= 0)
+ sys_close(output_fd);
+ GF_FREE(buf);
+ sys_unlink(export_path);
+ if (ret == 0) {
+ ret = dict_set_double(dict, "time", time);
+ if (ret)
+ goto end;
+ ret = dict_set_double(dict, "throughput", throughput);
+ if (ret)
+ goto end;
+ }
+end:
+ return ret;
}
int
-glusterfs_handle_translator_info_get (rpcsvc_request_t *req)
+glusterfs_handle_translator_op(rpcsvc_request_t *req)
{
- int32_t ret = -1;
- gd1_mgmt_brick_op_req xlator_req = {0,};
- dict_t *dict = NULL;
- xlator_t *this = NULL;
- gf1_cli_top_op top_op = 0;
- uint32_t blk_size = 0;
- uint32_t blk_count = 0;
- gfd_vol_top_priv_t *priv = NULL;
- pthread_t tid = -1;
-
- GF_ASSERT (req);
- this = THIS;
- GF_ASSERT (this);
-
- if (!xdr_to_generic (req->msg[0], &xlator_req,
- (xdrproc_t)xdr_gd1_mgmt_brick_op_req)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
- dict = dict_new ();
-
- ret = dict_unserialize (xlator_req.input.input_val,
- xlator_req.input.input_len,
- &dict);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to "
- "unserialize req-buffer to dictionary");
- goto out;
- }
+ int32_t ret = -1;
+ int32_t op_ret = 0;
+ gd1_mgmt_brick_op_req xlator_req = {
+ 0,
+ };
+ dict_t *input = NULL;
+ xlator_t *xlator = NULL;
+ xlator_t *any = NULL;
+ dict_t *output = NULL;
+ char key[32] = {0};
+ int len;
+ char *xname = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ glusterfs_graph_t *active = NULL;
+ xlator_t *this = NULL;
+ int i = 0;
+ int count = 0;
+
+ GF_ASSERT(req);
+ this = THIS;
+ GF_ASSERT(this);
+
+ ret = xdr_to_generic(req->msg[0], &xlator_req,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
+ if (ret < 0) {
+ // failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ ctx = glusterfsd_ctx;
+ active = ctx->active;
+ if (!active) {
+ ret = -1;
+ gf_smsg(this->name, GF_LOG_ERROR, EAGAIN, glusterfsd_msg_38,
+ "brick-op_no.=%d", xlator_req.op, NULL);
+ goto out;
+ }
+ any = active->first;
+ input = dict_new();
+ ret = dict_unserialize(xlator_req.input.input_val,
+ xlator_req.input.input_len, &input);
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ goto out;
+ } else {
+ input->extra_stdfree = xlator_req.input.input_val;
+ }
+
+ ret = dict_get_int32(input, "count", &count);
+
+ output = dict_new();
+ if (!output) {
+ ret = -1;
+ goto out;
+ }
- priv = GF_MALLOC (sizeof (gfd_vol_top_priv_t), gfd_mt_vol_top_priv_t);
- if (!priv) {
- gf_log ("glusterd", GF_LOG_ERROR, "failed to allocate memory");
- goto out;
- }
- priv->xlator_req = xlator_req;
- priv->req = req;
-
- ret = dict_get_int32 (dict, "top-op", (int32_t *)&top_op);
- if ((!ret) && (GF_CLI_TOP_READ_PERF == top_op ||
- GF_CLI_TOP_WRITE_PERF == top_op)) {
- ret = dict_get_uint32 (dict, "blk-size", &blk_size);
- if (ret)
- goto cont;
- ret = dict_get_uint32 (dict, "blk-cnt", &blk_count);
- if (ret)
- goto cont;
- priv->blk_size = blk_size;
- priv->blk_count = blk_count;
- if (GF_CLI_TOP_READ_PERF == top_op) {
- ret = pthread_create (&tid, NULL,
- glusterfs_volume_top_read_perf,
- priv);
- } else if ( GF_CLI_TOP_WRITE_PERF == top_op) {
- ret = pthread_create (&tid, NULL,
- glusterfs_volume_top_write_perf,
- priv);
- }
- if (ret) {
- gf_log ("glusterd", GF_LOG_ERROR,
- "Thread create failed");
- goto cont;
- }
- gf_log ("glusterd", GF_LOG_DEBUG, "Created new thread with "
- "tid %u", (unsigned int)tid);
- goto out;
+ for (i = 0; i < count; i++) {
+ len = snprintf(key, sizeof(key), "xl-%d", i);
+ ret = dict_get_strn(input, key, len, &xname);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Couldn't get "
+ "xlator %s ",
+ key);
+ goto out;
}
-cont:
- priv->throughput = 0;
- priv->time = 0;
- ret = glusterfs_handle_translator_info_get_cont (priv);
+ xlator = xlator_search_by_name(any, xname);
+ if (!xlator) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "xlator %s is not "
+ "loaded",
+ xname);
+ goto out;
+ }
+ }
+ for (i = 0; i < count; i++) {
+ len = snprintf(key, sizeof(key), "xl-%d", i);
+ ret = dict_get_strn(input, key, len, &xname);
+ xlator = xlator_search_by_name(any, xname);
+ XLATOR_NOTIFY(ret, xlator, GF_EVENT_TRANSLATOR_OP, input, output);
+ /* If notify fails for an xlator we need to capture it but
+ * continue with the loop. */
+ if (ret)
+ op_ret = -1;
+ }
+ ret = op_ret;
out:
- if (dict)
- dict_unref (dict);
- return ret;
+ glusterfs_xlator_op_response_send(req, ret, "", output);
+ if (input)
+ dict_unref(input);
+ if (output)
+ dict_unref(output);
+ free(xlator_req.name); // malloced by xdr
+
+ return 0;
}
-void *
-glusterfs_volume_top_write_perf (void *args)
+int
+glusterfs_handle_bitrot(rpcsvc_request_t *req)
{
- int32_t fd = -1;
- int32_t input_fd = -1;
- char export_path[PATH_MAX];
- char *buf = NULL;
- uint32_t blk_size = 0;
- uint32_t blk_count = 0;
- int32_t iter = 0;
- int32_t ret = -1;
- uint64_t total_blks = 0;
- struct timeval begin, end = {0,};
- double throughput = 0;
- double time = 0;
- gfd_vol_top_priv_t *priv = NULL;
-
- GF_ASSERT (args);
- priv = (gfd_vol_top_priv_t *)args;
-
- blk_size = priv->blk_size;
- blk_count = priv->blk_count;
-
- snprintf (export_path, sizeof (export_path), "%s/%s",
- priv->xlator_req.name, ".gf-tmp-stats-perf");
-
- fd = open (export_path, O_CREAT|O_RDWR, S_IRWXU);
- if (-1 == fd) {
- ret = -1;
- gf_log ("glusterd", GF_LOG_ERROR, "Could not open tmp file");
- goto out;
- }
-
- buf = GF_MALLOC (blk_size * sizeof(*buf), gf_common_mt_char);
- if (!buf) {
- ret = -1;
- goto out;
- }
-
- input_fd = open ("/dev/zero", O_RDONLY);
- if (-1 == input_fd) {
- ret = -1;
- gf_log ("glusterd",GF_LOG_ERROR, "Unable to open input file");
- goto out;
- }
-
- gettimeofday (&begin, NULL);
- for (iter = 0; iter < blk_count; iter++) {
- ret = read (input_fd, buf, blk_size);
- if (ret != blk_size) {
- ret = -1;
- goto out;
- }
- ret = write (fd, buf, blk_size);
- if (ret != blk_size) {
- ret = -1;
- goto out;
- }
- total_blks += ret;
- }
- ret = 0;
- if (total_blks != ((uint64_t)blk_size * blk_count)) {
- gf_log ("glusterd", GF_LOG_WARNING, "Error in write");
- ret = -1;
- goto out;
- }
-
- gettimeofday (&end, NULL);
- time = (end.tv_sec - begin.tv_sec) * 1e6
- + (end.tv_usec - begin.tv_usec);
- throughput = total_blks / time;
- gf_log ("glusterd", GF_LOG_INFO, "Throughput %.2f Mbps time %.2f secs "
- "bytes written %"PRId64, throughput, time, total_blks);
+ int32_t ret = -1;
+ gd1_mgmt_brick_op_req xlator_req = {
+ 0,
+ };
+ dict_t *input = NULL;
+ dict_t *output = NULL;
+ xlator_t *any = NULL;
+ xlator_t *this = NULL;
+ xlator_t *xlator = NULL;
+ char msg[2048] = {
+ 0,
+ };
+ char xname[1024] = {
+ 0,
+ };
+ glusterfs_ctx_t *ctx = NULL;
+ glusterfs_graph_t *active = NULL;
+ char *scrub_opt = NULL;
+
+ GF_ASSERT(req);
+ this = THIS;
+ GF_ASSERT(this);
+
+ ret = xdr_to_generic(req->msg[0], &xlator_req,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
+
+ if (ret < 0) {
+ /*failed to decode msg;*/
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ ctx = glusterfsd_ctx;
+ GF_ASSERT(ctx);
+
+ active = ctx->active;
+ if (!active) {
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ any = active->first;
+
+ input = dict_new();
+ if (!input)
+ goto out;
+
+ ret = dict_unserialize(xlator_req.input.input_val,
+ xlator_req.input.input_len, &input);
+
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, glusterfsd_msg_35, NULL);
+ goto out;
+ }
+
+ /* Send scrubber request to bitrot xlator */
+ snprintf(xname, sizeof(xname), "%s-bit-rot-0", xlator_req.name);
+ xlator = xlator_search_by_name(any, xname);
+ if (!xlator) {
+ snprintf(msg, sizeof(msg), "xlator %s is not loaded", xname);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, glusterfsd_msg_36, NULL);
+ goto out;
+ }
+
+ output = dict_new();
+ if (!output) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_get_str(input, "scrub-value", &scrub_opt);
+ if (ret) {
+ snprintf(msg, sizeof(msg), "Failed to get scrub value");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, glusterfsd_msg_37, NULL);
+ ret = -1;
+ goto out;
+ }
+
+ if (!strncmp(scrub_opt, "status", SLEN("status"))) {
+ ret = xlator->notify(xlator, GF_EVENT_SCRUB_STATUS, input, output);
+ } else if (!strncmp(scrub_opt, "ondemand", SLEN("ondemand"))) {
+ ret = xlator->notify(xlator, GF_EVENT_SCRUB_ONDEMAND, input, output);
+ if (ret == -2) {
+ snprintf(msg, sizeof(msg),
+ "Scrubber is in "
+ "Pause/Inactive/Running state");
+ ret = -1;
+ goto out;
+ }
+ }
out:
- priv->throughput = throughput;
- priv->time = time;
-
- if (fd >= 0)
- close (fd);
- if (input_fd >= 0)
- close (input_fd);
- if (buf)
- GF_FREE (buf);
- unlink (export_path);
-
- (void)glusterfs_handle_translator_info_get_cont (priv);
-
- return NULL;
+ glusterfs_translator_info_response_send(req, ret, msg, output);
+
+ if (input)
+ dict_unref(input);
+ free(xlator_req.input.input_val); /*malloced by xdr*/
+ if (xlator_req.dict.dict_val)
+ free(xlator_req.dict.dict_val);
+ if (output)
+ dict_unref(output);
+ free(xlator_req.name);
+
+ return 0;
}
-void *
-glusterfs_volume_top_read_perf (void *args)
+int
+glusterfs_handle_attach(rpcsvc_request_t *req)
{
- int32_t fd = -1;
- int32_t input_fd = -1;
- int32_t output_fd = -1;
- char export_path[PATH_MAX];
- char *buf = NULL;
- uint32_t blk_size = 0;
- uint32_t blk_count = 0;
- int32_t iter = 0;
- int32_t ret = -1;
- uint64_t total_blks = 0;
- struct timeval begin, end = {0,};
- double throughput = 0;
- double time = 0;
- gfd_vol_top_priv_t *priv = NULL;
-
- GF_ASSERT (args);
- priv = (gfd_vol_top_priv_t *)args;
-
- blk_size = priv->blk_size;
- blk_count = priv->blk_count;
-
- snprintf (export_path, sizeof (export_path), "%s/%s",
- priv->xlator_req.name, ".gf-tmp-stats-perf");
- fd = open (export_path, O_CREAT|O_RDWR, S_IRWXU);
- if (-1 == fd) {
- ret = -1;
- gf_log ("glusterd", GF_LOG_ERROR, "Could not open tmp file");
- goto out;
- }
-
- buf = GF_MALLOC (blk_size * sizeof(*buf), gf_common_mt_char);
- if (!buf) {
- ret = -1;
- gf_log ("glusterd", GF_LOG_ERROR, "Could not allocate memory");
- goto out;
- }
-
- input_fd = open ("/dev/zero", O_RDONLY);
- if (-1 == input_fd) {
- ret = -1;
- gf_log ("glusterd", GF_LOG_ERROR, "Could not open input file");
- goto out;
+ int32_t ret = -1;
+ gd1_mgmt_brick_op_req xlator_req = {
+ 0,
+ };
+ xlator_t *this = NULL;
+ xlator_t *nextchild = NULL;
+ glusterfs_graph_t *newgraph = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ xlator_t *srv_xl = NULL;
+ server_conf_t *srv_conf = NULL;
+
+ GF_ASSERT(req);
+ this = THIS;
+ GF_ASSERT(this);
+
+ ctx = this->ctx;
+ if (!ctx->cmd_args.volfile_id) {
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "No volfile-id provided, erroring out");
+ return -1;
+ }
+
+ ret = xdr_to_generic(req->msg[0], &xlator_req,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
+
+ if (ret < 0) {
+ /*failed to decode msg;*/
+ req->rpc_err = GARBAGE_ARGS;
+ return -1;
+ }
+ ret = 0;
+
+ if (!this->ctx->active) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "got attach for %s but no active graph", xlator_req.name);
+ goto post_unlock;
+ }
+
+ gf_log(this->name, GF_LOG_INFO, "got attach for %s", xlator_req.name);
+
+ LOCK(&ctx->volfile_lock);
+ {
+ ret = glusterfs_graph_attach(this->ctx->active, xlator_req.name,
+ &newgraph);
+ if (!ret && (newgraph && newgraph->first)) {
+ nextchild = newgraph->first;
+ ret = xlator_notify(nextchild, GF_EVENT_PARENT_UP, nextchild);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, LG_MSG_EVENT_NOTIFY_FAILED,
+ "event=ParentUp", "name=%s", nextchild->name, NULL);
+ goto unlock;
+ }
+ /* we need a protocol/server xlator as
+ * nextchild
+ */
+ srv_xl = this->ctx->active->first;
+ srv_conf = (server_conf_t *)srv_xl->private;
+ rpcsvc_autoscale_threads(this->ctx, srv_conf->rpc, 1);
}
-
- output_fd = open ("/dev/null", O_RDWR);
- if (-1 == output_fd) {
- ret = -1;
- gf_log ("glusterd", GF_LOG_ERROR, "Could not open output file");
- goto out;
- }
-
- for (iter = 0; iter < blk_count; iter++) {
- ret = read (input_fd, buf, blk_size);
- if (ret != blk_size) {
- ret = -1;
- goto out;
- }
- ret = write (fd, buf, blk_size);
- if (ret != blk_size) {
- ret = -1;
- goto out;
- }
- }
-
- ret = fsync (fd);
if (ret) {
- gf_log ("glusterd", GF_LOG_ERROR, "could not flush cache");
- goto out;
- }
- ret = lseek (fd, 0L, 0);
- if (ret != 0) {
- gf_log ("glusterd", GF_LOG_ERROR,
- "could not seek back to start");
- ret = -1;
- goto out;
- }
- gettimeofday (&begin, NULL);
- for (iter = 0; iter < blk_count; iter++) {
- ret = read (fd, buf, blk_size);
- if (ret != blk_size) {
- ret = -1;
- goto out;
- }
- ret = write (output_fd, buf, blk_size);
- if (ret != blk_size) {
- ret = -1;
- goto out;
- }
- total_blks += ret;
- }
- ret = 0;
- if (total_blks != ((uint64_t)blk_size * blk_count)) {
- ret = -1;
- gf_log ("glusterd", GF_LOG_WARNING, "Error in read");
- goto out;
+ ret = -1;
}
+ ret = glusterfs_translator_info_response_send(req, ret, NULL, NULL);
+ if (ret) {
+ /* Response sent back to glusterd, req is already destroyed. So
+ * resetting the ret to 0. Otherwise another response will be
+ * send from rpcsvc_check_and_reply_error. Which will lead to
+ * double resource leak.
+ */
+ ret = 0;
+ }
+ unlock:
+ UNLOCK(&ctx->volfile_lock);
+ }
+post_unlock:
+ if (xlator_req.dict.dict_val)
+ free(xlator_req.dict.dict_val);
+ free(xlator_req.input.input_val);
+ free(xlator_req.name);
+
+ return ret;
+}
- gettimeofday (&end, NULL);
- time = (end.tv_sec - begin.tv_sec) * 1e6
- + (end.tv_usec - begin.tv_usec);
- throughput = total_blks / time;
- gf_log ("glusterd", GF_LOG_INFO, "Throughput %.2f Mbps time %.2f secs "
- "bytes read %"PRId64, throughput, time, total_blks);
-
+int
+glusterfs_handle_svc_attach(rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gd1_mgmt_brick_op_req xlator_req = {
+ 0,
+ };
+ xlator_t *this = NULL;
+ dict_t *dict = NULL;
+
+ GF_ASSERT(req);
+ this = THIS;
+ GF_ASSERT(this);
+
+ ret = xdr_to_generic(req->msg[0], &xlator_req,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
+
+ if (ret < 0) {
+ /*failed to decode msg;*/
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ gf_smsg(THIS->name, GF_LOG_INFO, 0, glusterfsd_msg_41, "volfile-id=%s",
+ xlator_req.name, NULL);
+
+ dict = dict_new();
+ if (!dict) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ ret = dict_unserialize(xlator_req.dict.dict_val, xlator_req.dict.dict_len,
+ &dict);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_WARNING, EINVAL, glusterfsd_msg_42, NULL);
+ goto out;
+ }
+ dict->extra_stdfree = xlator_req.dict.dict_val;
+
+ ret = 0;
+
+ ret = mgmt_process_volfile(xlator_req.input.input_val,
+ xlator_req.input.input_len, xlator_req.name,
+ dict);
out:
- priv->throughput = throughput;
- priv->time = time;
-
- if (fd >= 0)
- close (fd);
- if (input_fd >= 0)
- close (input_fd);
- if (output_fd >= 0)
- close (output_fd);
- if (buf)
- GF_FREE (buf);
- unlink (export_path);
-
- (void)glusterfs_handle_translator_info_get_cont (priv);
-
- return NULL;
+ if (dict)
+ dict_unref(dict);
+ if (xlator_req.input.input_val)
+ free(xlator_req.input.input_val);
+ if (xlator_req.name)
+ free(xlator_req.name);
+ glusterfs_translator_info_response_send(req, ret, NULL, NULL);
+ return 0;
}
int
-glusterfs_handle_translator_op (void *data)
+glusterfs_handle_svc_detach(rpcsvc_request_t *req)
{
- int32_t ret = -1;
- gd1_mgmt_brick_op_req xlator_req = {0,};
- dict_t *input = NULL;
- xlator_t *xlator = NULL;
- xlator_t *any = NULL;
- dict_t *output = NULL;
- char key[2048] = {0};
- char *xname = NULL;
- glusterfs_ctx_t *ctx = NULL;
- glusterfs_graph_t *active = NULL;
- xlator_t *this = NULL;
- int i = 0;
- int count = 0;
- rpcsvc_request_t *req = data;
-
- GF_ASSERT (req);
- this = THIS;
- GF_ASSERT (this);
-
- if (!xdr_to_generic (req->msg[0], &xlator_req,
- (xdrproc_t)xdr_gd1_mgmt_brick_op_req)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
-
- ctx = glusterfs_ctx_get ();
- active = ctx->active;
- any = active->first;
- input = dict_new ();
- ret = dict_unserialize (xlator_req.input.input_val,
- xlator_req.input.input_len,
- &input);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to "
- "unserialize req-buffer to dictionary");
- goto out;
- } else {
- input->extra_stdfree = xlator_req.input.input_val;
- }
-
- ret = dict_get_int32 (input, "count", &count);
-
- output = dict_new ();
- if (!output) {
- ret = -1;
- goto out;
- }
-
- for (i = 0; i < count; i++) {
- snprintf (key, sizeof (key), "xl-%d", i);
- ret = dict_get_str (input, key, &xname);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Couldn't get "
- "xlator %s ", key);
- goto out;
- }
- xlator = xlator_search_by_name (any, xname);
- if (!xlator) {
- gf_log (this->name, GF_LOG_ERROR, "xlator %s is not "
- "loaded", xname);
- goto out;
- }
- }
- for (i = 0; i < count; i++) {
- snprintf (key, sizeof (key), "xl-%d", i);
- ret = dict_get_str (input, key, &xname);
- xlator = xlator_search_by_name (any, xname);
- XLATOR_NOTIFY (xlator, GF_EVENT_TRANSLATOR_OP, input, output);
- if (ret)
- break;
+ gd1_mgmt_brick_op_req xlator_req = {
+ 0,
+ };
+ ssize_t ret;
+ gf_volfile_t *volfile_obj = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ gf_volfile_t *volfile_tmp = NULL;
+
+ ret = xdr_to_generic(req->msg[0], &xlator_req,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
+ if (ret < 0) {
+ req->rpc_err = GARBAGE_ARGS;
+ return -1;
+ }
+ ctx = glusterfsd_ctx;
+
+ LOCK(&ctx->volfile_lock);
+ {
+ list_for_each_entry(volfile_obj, &ctx->volfile_list, volfile_list)
+ {
+ if (!strcmp(xlator_req.name, volfile_obj->vol_id)) {
+ volfile_tmp = volfile_obj;
+ break;
+ }
+ }
+
+ if (!volfile_tmp) {
+ UNLOCK(&ctx->volfile_lock);
+ gf_smsg(THIS->name, GF_LOG_ERROR, 0, glusterfsd_msg_041, "name=%s",
+ xlator_req.name, NULL);
+ /*
+ * Used to be -ENOENT. However, the caller asked us to
+ * make sure it's down and if it's already down that's
+ * good enough.
+ */
+ ret = 0;
+ goto out;
+ }
+ /* coverity[ORDER_REVERSAL] */
+ ret = glusterfs_process_svc_detach(ctx, volfile_tmp);
+ if (ret) {
+ UNLOCK(&ctx->volfile_lock);
+ gf_smsg("glusterfsd-mgmt", GF_LOG_ERROR, EINVAL, glusterfsd_msg_042,
+ NULL);
+ goto out;
}
+ }
+ UNLOCK(&ctx->volfile_lock);
out:
- glusterfs_xlator_op_response_send (req, ret, "", output);
- if (input)
- dict_unref (input);
- if (output)
- dict_unref (output);
- if (xlator_req.name)
- free (xlator_req.name); //malloced by xdr
+ glusterfs_terminate_response_send(req, ret);
+ free(xlator_req.name);
+ xlator_req.name = NULL;
- return 0;
+ return 0;
}
-
int
-glusterfs_handle_defrag (rpcsvc_request_t *req)
+glusterfs_handle_dump_metrics(rpcsvc_request_t *req)
{
- int32_t ret = -1;
- gd1_mgmt_brick_op_req xlator_req = {0,};
- dict_t *dict = NULL;
- xlator_t *xlator = NULL;
- xlator_t *any = NULL;
- dict_t *output = NULL;
- char msg[2048] = {0};
- glusterfs_ctx_t *ctx = NULL;
- glusterfs_graph_t *active = NULL;
- xlator_t *this = NULL;
-
- GF_ASSERT (req);
- this = THIS;
- GF_ASSERT (this);
-
- ctx = glusterfs_ctx_get ();
- GF_ASSERT (ctx);
-
- active = ctx->active;
- if (!active) {
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
+ int32_t ret = -1;
+ gd1_mgmt_brick_op_req xlator_req = {
+ 0,
+ };
+ xlator_t *this = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ char *filepath = NULL;
+ int fd = -1;
+ struct stat statbuf = {
+ 0,
+ };
+ char *msg = NULL;
+
+ GF_ASSERT(req);
+ this = THIS;
+ GF_ASSERT(this);
+
+ ret = xdr_to_generic(req->msg[0], &xlator_req,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
+
+ if (ret < 0) {
+ /*failed to decode msg;*/
+ req->rpc_err = GARBAGE_ARGS;
+ return -1;
+ }
+ ret = -1;
+ ctx = this->ctx;
+
+ /* Infra for monitoring */
+ filepath = gf_monitor_metrics(ctx);
+ if (!filepath)
+ goto out;
+
+ fd = sys_open(filepath, O_RDONLY, 0);
+ if (fd < 0)
+ goto out;
+
+ if (sys_fstat(fd, &statbuf) < 0)
+ goto out;
+
+ if (statbuf.st_size > GF_UNIT_MB) {
+ gf_smsg(this->name, GF_LOG_WARNING, ENOMEM, LG_MSG_NO_MEMORY,
+ "reconsider logic (%" PRId64 ")", statbuf.st_size, NULL);
+ }
+ msg = GF_CALLOC(1, (statbuf.st_size + 1), gf_common_mt_char);
+ if (!msg)
+ goto out;
+
+ ret = sys_read(fd, msg, statbuf.st_size);
+ if (ret < 0)
+ goto out;
+
+ /* Send all the data in errstr, instead of dictionary for now */
+ glusterfs_translator_info_response_send(req, 0, msg, NULL);
+
+ ret = 0;
+out:
+ if (fd >= 0)
+ sys_close(fd);
- any = active->first;
- if (!xdr_to_generic (req->msg[0], &xlator_req,
- (xdrproc_t)xdr_gd1_mgmt_brick_op_req)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
- dict = dict_new ();
- if (!dict)
- goto out;
+ GF_FREE(msg);
+ GF_FREE(filepath);
+ if (xlator_req.input.input_val)
+ free(xlator_req.input.input_val);
+ if (xlator_req.dict.dict_val)
+ free(xlator_req.dict.dict_val);
- ret = dict_unserialize (xlator_req.input.input_val,
- xlator_req.input.input_len,
- &dict);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to "
- "unserialize req-buffer to dictionary");
- goto out;
- }
- xlator = xlator_search_by_name (any, xlator_req.name);
- if (!xlator) {
- snprintf (msg, sizeof (msg), "xlator %s is not loaded",
- xlator_req.name);
- goto out;
- }
+ return ret;
+}
- output = dict_new ();
- if (!output) {
- ret = -1;
- goto out;
- }
+int
+glusterfs_handle_defrag(rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gd1_mgmt_brick_op_req xlator_req = {
+ 0,
+ };
+ dict_t *dict = NULL;
+ xlator_t *xlator = NULL;
+ xlator_t *any = NULL;
+ dict_t *output = NULL;
+ char msg[2048] = {0};
+ glusterfs_ctx_t *ctx = NULL;
+ glusterfs_graph_t *active = NULL;
+ xlator_t *this = NULL;
+
+ GF_ASSERT(req);
+ this = THIS;
+ GF_ASSERT(this);
+
+ ctx = glusterfsd_ctx;
+ GF_ASSERT(ctx);
+
+ active = ctx->active;
+ if (!active) {
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ any = active->first;
+ ret = xdr_to_generic(req->msg[0], &xlator_req,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
+ if (ret < 0) {
+ // failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+ dict = dict_new();
+ if (!dict)
+ goto out;
+
+ ret = dict_unserialize(xlator_req.input.input_val,
+ xlator_req.input.input_len, &dict);
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ goto out;
+ }
+ xlator = xlator_search_by_name(any, xlator_req.name);
+ if (!xlator) {
+ snprintf(msg, sizeof(msg), "xlator %s is not loaded", xlator_req.name);
+ goto out;
+ }
+
+ output = dict_new();
+ if (!output) {
+ ret = -1;
+ goto out;
+ }
- ret = xlator->notify (xlator, GF_EVENT_VOLUME_DEFRAG, dict, output);
+ ret = xlator->notify(xlator, GF_EVENT_VOLUME_DEFRAG, dict, output);
- ret = glusterfs_translator_info_response_send (req, ret,
- msg, output);
+ ret = glusterfs_translator_info_response_send(req, ret, msg, output);
out:
- if (dict)
- dict_unref (dict);
- if (xlator_req.input.input_val)
- free (xlator_req.input.input_val); // malloced by xdr
- if (output)
- dict_unref (output);
- if (xlator_req.name)
- free (xlator_req.name); //malloced by xdr
-
- return ret;
-
+ if (dict)
+ dict_unref(dict);
+ free(xlator_req.input.input_val); // malloced by xdr
+ if (xlator_req.dict.dict_val)
+ free(xlator_req.dict.dict_val);
+ if (output)
+ dict_unref(output);
+ free(xlator_req.name); // malloced by xdr
+
+ return ret;
}
int
-glusterfs_handle_brick_status (rpcsvc_request_t *req)
+glusterfs_handle_brick_status(rpcsvc_request_t *req)
{
- int ret = -1;
- gd1_mgmt_brick_op_req brick_req = {0,};
- gd1_mgmt_brick_op_rsp rsp = {0,};
- glusterfs_ctx_t *ctx = NULL;
- glusterfs_graph_t *active = NULL;
- xlator_t *this = NULL;
- xlator_t *any = NULL;
- xlator_t *xlator = NULL;
- dict_t *dict = NULL;
- dict_t *output = NULL;
- char *volname = NULL;
- char *xname = NULL;
- uint32_t cmd = 0;
- char *msg = NULL;
-
- GF_ASSERT (req);
- this = THIS;
- GF_ASSERT (this);
-
- if (!xdr_to_generic (req->msg[0], &brick_req,
- (xdrproc_t)xdr_gd1_mgmt_brick_op_req)) {
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
-
- dict = dict_new ();
- ret = dict_unserialize (brick_req.input.input_val,
- brick_req.input.input_len, &dict);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR, "Failed to unserialize "
- "req-buffer to dictionary");
- goto out;
- }
-
- ret = dict_get_uint32 (dict, "cmd", &cmd);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Couldn't get status op");
- goto out;
- }
-
- ret = dict_get_str (dict, "volname", &volname);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Couldn't get volname");
- goto out;
- }
-
- ctx = glusterfs_ctx_get ();
- GF_ASSERT (ctx);
- active = ctx->active;
- any = active->first;
-
- ret = gf_asprintf (&xname, "%s-server", volname);
- if (-1 == ret) {
- gf_log (this->name, GF_LOG_ERROR, "Out of memory");
- goto out;
- }
-
- xlator = xlator_search_by_name (any, xname);
- if (!xlator) {
- gf_log (this->name, GF_LOG_ERROR, "xlator %s is not loaded",
- xname);
- ret = -1;
- goto out;
- }
-
-
- output = dict_new ();
- switch (cmd & GF_CLI_STATUS_MASK) {
- case GF_CLI_STATUS_MEM:
- ret = 0;
- gf_proc_dump_mem_info_to_dict (output);
- gf_proc_dump_mempool_info_to_dict (ctx, output);
- break;
-
- case GF_CLI_STATUS_CLIENTS:
- ret = xlator->dumpops->priv_to_dict (xlator, output);
- break;
-
- case GF_CLI_STATUS_INODE:
- ret = xlator->dumpops->inode_to_dict (xlator, output);
- break;
+ int ret = -1;
+ gd1_mgmt_brick_op_req brick_req = {
+ 0,
+ };
+ gd1_mgmt_brick_op_rsp rsp = {
+ 0,
+ };
+ glusterfs_ctx_t *ctx = NULL;
+ glusterfs_graph_t *active = NULL;
+ xlator_t *this = NULL;
+ xlator_t *server_xl = NULL;
+ xlator_t *brick_xl = NULL;
+ dict_t *dict = NULL;
+ dict_t *output = NULL;
+ uint32_t cmd = 0;
+ char *msg = NULL;
+ char *brickname = NULL;
+
+ GF_ASSERT(req);
+ this = THIS;
+ GF_ASSERT(this);
+
+ ret = xdr_to_generic(req->msg[0], &brick_req,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
+ if (ret < 0) {
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ dict = dict_new();
+ ret = dict_unserialize(brick_req.input.input_val, brick_req.input.input_len,
+ &dict);
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Failed to unserialize "
+ "req-buffer to dictionary");
+ goto out;
+ }
+
+ ret = dict_get_uint32(dict, "cmd", &cmd);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR, "Couldn't get status op");
+ goto out;
+ }
+
+ ret = dict_get_str(dict, "brick-name", &brickname);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Couldn't get brickname from"
+ " dict");
+ goto out;
+ }
+
+ ctx = glusterfsd_ctx;
+ if (ctx == NULL) {
+ gf_log(this->name, GF_LOG_ERROR, "ctx returned NULL");
+ ret = -1;
+ goto out;
+ }
+ if (ctx->active == NULL) {
+ gf_log(this->name, GF_LOG_ERROR, "ctx->active returned NULL");
+ ret = -1;
+ goto out;
+ }
+ active = ctx->active;
+ if (ctx->active->first == NULL) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "ctx->active->first "
+ "returned NULL");
+ ret = -1;
+ goto out;
+ }
+ server_xl = active->first;
- case GF_CLI_STATUS_FD:
- ret = xlator->dumpops->fd_to_dict (xlator, output);
- break;
+ brick_xl = get_xlator_by_name(server_xl, brickname);
+ if (!brick_xl) {
+ gf_log(this->name, GF_LOG_ERROR, "xlator is not loaded");
+ ret = -1;
+ goto out;
+ }
+
+ output = dict_new();
+ switch (cmd & GF_CLI_STATUS_MASK) {
+ case GF_CLI_STATUS_MEM:
+ ret = 0;
+ gf_proc_dump_mem_info_to_dict(output);
+ gf_proc_dump_mempool_info_to_dict(ctx, output);
+ break;
+
+ case GF_CLI_STATUS_CLIENTS:
+ case GF_CLI_STATUS_CLIENT_LIST:
+ ret = server_xl->dumpops->priv_to_dict(server_xl, output,
+ brickname);
+ break;
+
+ case GF_CLI_STATUS_INODE:
+ ret = server_xl->dumpops->inode_to_dict(brick_xl, output);
+ break;
+
+ case GF_CLI_STATUS_FD:
+ ret = server_xl->dumpops->fd_to_dict(brick_xl, output);
+ break;
+
+ case GF_CLI_STATUS_CALLPOOL:
+ ret = 0;
+ gf_proc_dump_pending_frames_to_dict(ctx->pool, output);
+ break;
- case GF_CLI_STATUS_CALLPOOL:
- ret = 0;
- gf_proc_dump_pending_frames_to_dict (ctx->pool, output);
- break;
+ default:
+ ret = -1;
+ msg = gf_strdup("Unknown status op");
+ break;
+ }
+ rsp.op_ret = ret;
+ rsp.op_errno = 0;
+ if (ret && msg)
+ rsp.op_errstr = msg;
+ else
+ rsp.op_errstr = "";
- default:
- ret = -1;
- msg = gf_strdup ("Unknown status op");
- break;
- }
- rsp.op_ret = ret;
- rsp.op_errno = 0;
- if (ret && msg)
- rsp.op_errstr = msg;
- else
- rsp.op_errstr = "";
-
- ret = dict_allocate_and_serialize (output, &rsp.output.output_val,
- (size_t *)&rsp.output.output_len);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "Failed to serialize output dict to rsp");
- goto out;
- }
+ ret = dict_allocate_and_serialize(output, &rsp.output.output_val,
+ &rsp.output.output_len);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Failed to serialize output dict to rsp");
+ goto out;
+ }
- glusterfs_submit_reply (req, &rsp, NULL, 0, NULL,
- (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp);
- ret = 0;
+ glusterfs_submit_reply(req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp);
+ ret = 0;
out:
- if (dict)
- dict_unref (dict);
- if (output)
- dict_unref (output);
- if (brick_req.input.input_val)
- free (brick_req.input.input_val);
- if (xname)
- GF_FREE (xname);
- if (msg)
- GF_FREE (msg);
- if (rsp.output.output_val)
- GF_FREE (rsp.output.output_val);
-
- return ret;
-}
-
-static int
-glusterfs_command_done (int ret, call_frame_t *sync_frame, void *data)
-{
- STACK_DESTROY (sync_frame->root);
- return 0;
+ if (dict)
+ dict_unref(dict);
+ if (output)
+ dict_unref(output);
+ free(brick_req.input.input_val);
+ if (brick_req.dict.dict_val)
+ free(brick_req.dict.dict_val);
+ free(brick_req.name);
+ GF_FREE(msg);
+ GF_FREE(rsp.output.output_val);
+
+ return ret;
}
int
-glusterfs_handle_node_status (rpcsvc_request_t *req)
+glusterfs_handle_node_status(rpcsvc_request_t *req)
{
- int ret = -1;
- gd1_mgmt_brick_op_req node_req = {0,};
- gd1_mgmt_brick_op_rsp rsp = {0,};
- glusterfs_ctx_t *ctx = NULL;
- glusterfs_graph_t *active = NULL;
- xlator_t *any = NULL;
- xlator_t *node = NULL;
- xlator_t *subvol = NULL;
- dict_t *dict = NULL;
- dict_t *output = NULL;
- char *volname = NULL;
- char *node_name = NULL;
- char *subvol_name = NULL;
- uint32_t cmd = 0;
- char *msg = NULL;
-
- GF_ASSERT (req);
-
- if (!xdr_to_generic (req->msg[0], &node_req,
- (xdrproc_t)xdr_gd1_mgmt_brick_op_req)) {
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
+ int ret = -1;
+ gd1_mgmt_brick_op_req node_req = {
+ 0,
+ };
+ gd1_mgmt_brick_op_rsp rsp = {
+ 0,
+ };
+ glusterfs_ctx_t *ctx = NULL;
+ glusterfs_graph_t *active = NULL;
+ xlator_t *any = NULL;
+ xlator_t *node = NULL;
+ xlator_t *subvol = NULL;
+ dict_t *dict = NULL;
+ dict_t *output = NULL;
+ char *volname = NULL;
+ char *node_name = NULL;
+ char *subvol_name = NULL;
+ uint32_t cmd = 0;
+ char *msg = NULL;
+
+ GF_ASSERT(req);
+
+ ret = xdr_to_generic(req->msg[0], &node_req,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
+ if (ret < 0) {
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ dict = dict_new();
+ ret = dict_unserialize(node_req.input.input_val, node_req.input.input_len,
+ &dict);
+ if (ret < 0) {
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "Failed to unserialize "
+ "req buffer to dictionary");
+ goto out;
+ }
+
+ ret = dict_get_uint32(dict, "cmd", &cmd);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR, "Couldn't get status op");
+ goto out;
+ }
+
+ ret = dict_get_str(dict, "volname", &volname);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR, "Couldn't get volname");
+ goto out;
+ }
+
+ ctx = glusterfsd_ctx;
+ GF_ASSERT(ctx);
+ active = ctx->active;
+ if (active == NULL) {
+ gf_log(THIS->name, GF_LOG_ERROR, "ctx->active returned NULL");
+ ret = -1;
+ goto out;
+ }
+ any = active->first;
+
+ if ((cmd & GF_CLI_STATUS_SHD) != 0)
+ ret = gf_asprintf(&node_name, "%s", "glustershd");
+#ifdef BUILD_GNFS
+ else if ((cmd & GF_CLI_STATUS_NFS) != 0)
+ ret = gf_asprintf(&node_name, "%s", "nfs-server");
+#endif
+ else if ((cmd & GF_CLI_STATUS_QUOTAD) != 0)
+ ret = gf_asprintf(&node_name, "%s", "quotad");
+ else if ((cmd & GF_CLI_STATUS_BITD) != 0)
+ ret = gf_asprintf(&node_name, "%s", "bitd");
+ else if ((cmd & GF_CLI_STATUS_SCRUB) != 0)
+ ret = gf_asprintf(&node_name, "%s", "scrubber");
+
+ else {
+ ret = -1;
+ goto out;
+ }
+ if (ret == -1) {
+ gf_log(THIS->name, GF_LOG_ERROR, "Failed to set node xlator name");
+ goto out;
+ }
+
+ node = xlator_search_by_name(any, node_name);
+ if (!node) {
+ ret = -1;
+ gf_log(THIS->name, GF_LOG_ERROR, "%s xlator is not loaded", node_name);
+ goto out;
+ }
+
+ if ((cmd & GF_CLI_STATUS_NFS) != 0)
+ ret = gf_asprintf(&subvol_name, "%s", volname);
+ else if ((cmd & GF_CLI_STATUS_SHD) != 0)
+ ret = gf_asprintf(&subvol_name, "%s-replicate-0", volname);
+ else if ((cmd & GF_CLI_STATUS_QUOTAD) != 0)
+ ret = gf_asprintf(&subvol_name, "%s", volname);
+ else if ((cmd & GF_CLI_STATUS_BITD) != 0)
+ ret = gf_asprintf(&subvol_name, "%s", volname);
+ else if ((cmd & GF_CLI_STATUS_SCRUB) != 0)
+ ret = gf_asprintf(&subvol_name, "%s", volname);
+ else {
+ ret = -1;
+ goto out;
+ }
+ if (ret == -1) {
+ gf_log(THIS->name, GF_LOG_ERROR, "Failed to set node xlator name");
+ goto out;
+ }
+
+ subvol = xlator_search_by_name(node, subvol_name);
+ if (!subvol) {
+ ret = -1;
+ gf_log(THIS->name, GF_LOG_ERROR, "%s xlator is not loaded",
+ subvol_name);
+ goto out;
+ }
+
+ output = dict_new();
+ switch (cmd & GF_CLI_STATUS_MASK) {
+ case GF_CLI_STATUS_MEM:
+ ret = 0;
+ gf_proc_dump_mem_info_to_dict(output);
+ gf_proc_dump_mempool_info_to_dict(ctx, output);
+ break;
+
+ case GF_CLI_STATUS_CLIENTS:
+ // clients not available for SHD
+ if ((cmd & GF_CLI_STATUS_SHD) != 0)
+ break;
- dict = dict_new ();
- ret = dict_unserialize (node_req.input.input_val,
- node_req.input.input_len, &dict);
- if (ret < 0) {
- gf_log (THIS->name, GF_LOG_ERROR, "Failed to unserialize "
- "req buffer to dictionary");
+ ret = dict_set_str(output, "volname", volname);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "Error setting volname to dict");
goto out;
- }
+ }
+ ret = node->dumpops->priv_to_dict(node, output, NULL);
+ break;
- ret = dict_get_uint32 (dict, "cmd", &cmd);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "Couldn't get status op");
- goto out;
- }
+ case GF_CLI_STATUS_INODE:
+ ret = 0;
+ inode_table_dump_to_dict(subvol->itable, "conn0", output);
+ ret = dict_set_int32(output, "conncount", 1);
+ break;
- ret = dict_get_str (dict, "volname", &volname);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "Couldn't get volname");
- goto out;
- }
+ case GF_CLI_STATUS_FD:
+ // cannot find fd-tables in nfs-server graph
+ // TODO: finish once found
+ break;
- ctx = glusterfs_ctx_get ();
- GF_ASSERT (ctx);
- active = ctx->active;
- any = active->first;
+ case GF_CLI_STATUS_CALLPOOL:
+ ret = 0;
+ gf_proc_dump_pending_frames_to_dict(ctx->pool, output);
+ break;
- if ((cmd & GF_CLI_STATUS_NFS) != 0)
- ret = gf_asprintf (&node_name, "%s", "nfs-server");
- else if ((cmd & GF_CLI_STATUS_SHD) != 0)
- ret = gf_asprintf (&node_name, "%s", "glustershd");
- else {
- ret = -1;
- goto out;
- }
- if (ret == -1) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "Failed to set node xlator name");
- goto out;
- }
-
- node = xlator_search_by_name (any, node_name);
- if (!node) {
- ret = -1;
- gf_log (THIS->name, GF_LOG_ERROR, "%s xlator is not loaded",
- node_name);
- goto out;
- }
+ default:
+ ret = -1;
+ msg = gf_strdup("Unknown status op");
+ gf_log(THIS->name, GF_LOG_ERROR, "%s", msg);
+ break;
+ }
+ rsp.op_ret = ret;
+ rsp.op_errno = 0;
+ if (ret && msg)
+ rsp.op_errstr = msg;
+ else
+ rsp.op_errstr = "";
- if ((cmd & GF_CLI_STATUS_NFS) != 0)
- ret = gf_asprintf (&subvol_name, "%s", volname);
- else if ((cmd & GF_CLI_STATUS_SHD) != 0)
- ret = gf_asprintf (&subvol_name, "%s-replicate-0", volname);
- else {
- ret = -1;
- goto out;
- }
- if (ret == -1) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "Failed to set node xlator name");
- goto out;
- }
+ ret = dict_allocate_and_serialize(output, &rsp.output.output_val,
+ &rsp.output.output_len);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "Failed to serialize output dict to rsp");
+ goto out;
+ }
- subvol = xlator_search_by_name (node, subvol_name);
- if (!subvol) {
- ret = -1;
- gf_log (THIS->name, GF_LOG_ERROR, "%s xlator is not loaded",
- subvol_name);
- goto out;
- }
-
- output = dict_new ();
- switch (cmd & GF_CLI_STATUS_MASK) {
- case GF_CLI_STATUS_MEM:
- ret = 0;
- gf_proc_dump_mem_info_to_dict (output);
- gf_proc_dump_mempool_info_to_dict (ctx, output);
- break;
-
- case GF_CLI_STATUS_CLIENTS:
- // clients not availbale for SHD
- if ((cmd & GF_CLI_STATUS_SHD) != 0)
- break;
-
- ret = dict_set_str (output, "volname", volname);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "Error setting volname to dict");
- goto out;
- }
- ret = node->dumpops->priv_to_dict (node, output);
- break;
-
- case GF_CLI_STATUS_INODE:
- ret = 0;
- inode_table_dump_to_dict (subvol->itable, "conn0",
- output);
- ret = dict_set_int32 (output, "conncount", 1);
- break;
-
- case GF_CLI_STATUS_FD:
- // cannot find fd-tables in nfs-server graph
- // TODO: finish once found
- break;
-
- case GF_CLI_STATUS_CALLPOOL:
- ret = 0;
- gf_proc_dump_pending_frames_to_dict (ctx->pool, output);
- break;
-
- default:
- ret = -1;
- msg = gf_strdup ("Unknown status op");
- gf_log (THIS->name, GF_LOG_ERROR, "%s", msg);
- break;
- }
- rsp.op_ret = ret;
- rsp.op_errno = 0;
- if (ret && msg)
- rsp.op_errstr = msg;
- else
- rsp.op_errstr = "";
-
- ret = dict_allocate_and_serialize (output, &rsp.output.output_val,
- (size_t *)&rsp.output.output_len);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "Failed to serialize output dict to rsp");
- goto out;
- }
-
- glusterfs_submit_reply (req, &rsp, NULL, 0, NULL,
- (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp);
- ret = 0;
+ glusterfs_submit_reply(req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp);
+ ret = 0;
out:
- if (dict)
- dict_unref (dict);
- if (node_req.input.input_val)
- free (node_req.input.input_val);
- if (msg)
- GF_FREE (msg);
- if (rsp.output.output_val)
- GF_FREE (rsp.output.output_val);
- if (node_name)
- GF_FREE (node_name);
- if (subvol_name)
- GF_FREE (subvol_name);
-
- gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ if (dict)
+ dict_unref(dict);
+ free(node_req.input.input_val);
+ if (node_req.dict.dict_val)
+ free(node_req.dict.dict_val);
+ GF_FREE(msg);
+ GF_FREE(rsp.output.output_val);
+ GF_FREE(node_name);
+ GF_FREE(subvol_name);
+
+ gf_log(THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
}
int
-glusterfs_handle_nfs_profile (rpcsvc_request_t *req)
+glusterfs_handle_nfs_profile(rpcsvc_request_t *req)
{
- int ret = -1;
- gd1_mgmt_brick_op_req nfs_req = {0,};
- gd1_mgmt_brick_op_rsp rsp = {0,};
- dict_t *dict = NULL;
- glusterfs_ctx_t *ctx = NULL;
- glusterfs_graph_t *active = NULL;
- xlator_t *any = NULL;
- xlator_t *nfs = NULL;
- xlator_t *subvol = NULL;
- char *volname = NULL;
- dict_t *output = NULL;
-
- GF_ASSERT (req);
-
- if (!xdr_to_generic (req->msg[0], &nfs_req,
- (xdrproc_t)xdr_gd1_mgmt_brick_op_req)) {
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
+ int ret = -1;
+ gd1_mgmt_brick_op_req nfs_req = {
+ 0,
+ };
+ gd1_mgmt_brick_op_rsp rsp = {
+ 0,
+ };
+ dict_t *dict = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ glusterfs_graph_t *active = NULL;
+ xlator_t *any = NULL;
+ xlator_t *nfs = NULL;
+ xlator_t *subvol = NULL;
+ char *volname = NULL;
+ dict_t *output = NULL;
+
+ GF_ASSERT(req);
+
+ ret = xdr_to_generic(req->msg[0], &nfs_req,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
+ if (ret < 0) {
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ dict = dict_new();
+ ret = dict_unserialize(nfs_req.input.input_val, nfs_req.input.input_len,
+ &dict);
+ if (ret < 0) {
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "Failed to "
+ "unserialize req-buffer to dict");
+ goto out;
+ }
+
+ ret = dict_get_str(dict, "volname", &volname);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR, "Couldn't get volname");
+ goto out;
+ }
+
+ ctx = glusterfsd_ctx;
+ GF_ASSERT(ctx);
+
+ active = ctx->active;
+ if (active == NULL) {
+ gf_log(THIS->name, GF_LOG_ERROR, "ctx->active returned NULL");
+ ret = -1;
+ goto out;
+ }
+ any = active->first;
+
+ // is this needed?
+ // are problems possible by searching for subvol directly from "any"?
+ nfs = xlator_search_by_name(any, "nfs-server");
+ if (!nfs) {
+ ret = -1;
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "xlator nfs-server is "
+ "not loaded");
+ goto out;
+ }
+
+ subvol = xlator_search_by_name(nfs, volname);
+ if (!subvol) {
+ ret = -1;
+ gf_log(THIS->name, GF_LOG_ERROR, "xlator %s is no loaded", volname);
+ goto out;
+ }
- dict = dict_new ();
- ret = dict_unserialize (nfs_req.input.input_val,
- nfs_req.input.input_len, &dict);
- if (ret < 0) {
- gf_log (THIS->name, GF_LOG_ERROR, "Failed to "
- "unserialize req-buffer to dict");
- goto out;
- }
+ output = dict_new();
+ ret = subvol->notify(subvol, GF_EVENT_TRANSLATOR_INFO, dict, output);
- ret = dict_get_str (dict, "volname", &volname);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "Couldn't get volname");
- goto out;
- }
+ rsp.op_ret = ret;
+ rsp.op_errno = 0;
+ rsp.op_errstr = "";
- ctx = glusterfs_ctx_get ();
- GF_ASSERT (ctx);
+ ret = dict_allocate_and_serialize(output, &rsp.output.output_val,
+ &rsp.output.output_len);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "Failed to serialize output dict to rsp");
+ goto out;
+ }
- active = ctx->active;
- any = active->first;
+ glusterfs_submit_reply(req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp);
+ ret = 0;
- // is this needed?
- // are problems possible by searching for subvol directly from "any"?
- nfs = xlator_search_by_name (any, "nfs-server");
- if (!nfs) {
- ret = -1;
- gf_log (THIS->name, GF_LOG_ERROR, "xlator nfs-server is "
- "not loaded");
- goto out;
- }
+out:
+ free(nfs_req.input.input_val);
+ if (nfs_req.dict.dict_val)
+ free(nfs_req.dict.dict_val);
+ if (dict)
+ dict_unref(dict);
+ if (output)
+ dict_unref(output);
+ GF_FREE(rsp.output.output_val);
+
+ gf_log(THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
- subvol = xlator_search_by_name (nfs, volname);
- if (!subvol) {
- ret = -1;
- gf_log (THIS->name, GF_LOG_ERROR, "xlator %s is no loaded",
- volname);
- goto out;
- }
+int
+glusterfs_handle_volume_barrier_op(rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gd1_mgmt_brick_op_req xlator_req = {
+ 0,
+ };
+ dict_t *dict = NULL;
+ xlator_t *xlator = NULL;
+ xlator_t *any = NULL;
+ dict_t *output = NULL;
+ char msg[2048] = {0};
+ glusterfs_ctx_t *ctx = NULL;
+ glusterfs_graph_t *active = NULL;
+ xlator_t *this = NULL;
+
+ GF_ASSERT(req);
+ this = THIS;
+ GF_ASSERT(this);
+
+ ctx = glusterfsd_ctx;
+ GF_ASSERT(ctx);
+
+ active = ctx->active;
+ if (!active) {
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ any = active->first;
+ ret = xdr_to_generic(req->msg[0], &xlator_req,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
+ if (ret < 0) {
+ // failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+ dict = dict_new();
+ if (!dict)
+ goto out;
+
+ ret = dict_unserialize(xlator_req.input.input_val,
+ xlator_req.input.input_len, &dict);
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ goto out;
+ }
+ xlator = xlator_search_by_name(any, xlator_req.name);
+ if (!xlator) {
+ snprintf(msg, sizeof(msg), "xlator %s is not loaded", xlator_req.name);
+ goto out;
+ }
+
+ output = dict_new();
+ if (!output) {
+ ret = -1;
+ goto out;
+ }
- output = dict_new ();
- ret = subvol->notify (subvol, GF_EVENT_TRANSLATOR_INFO, dict, output);
+ ret = xlator->notify(xlator, GF_EVENT_VOLUME_BARRIER_OP, dict, output);
- rsp.op_ret = ret;
- rsp.op_errno = 0;
- rsp.op_errstr = "";
+ ret = glusterfs_translator_info_response_send(req, ret, msg, output);
+out:
+ if (dict)
+ dict_unref(dict);
+ free(xlator_req.input.input_val); // malloced by xdr
+ if (xlator_req.dict.dict_val)
+ free(xlator_req.dict.dict_val);
+ if (output)
+ dict_unref(output);
+ free(xlator_req.name); // malloced by xdr
+
+ return ret;
+}
- ret = dict_allocate_and_serialize (output, &rsp.output.output_val,
- (size_t *)&rsp.output.output_len);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "Failed to serialize output dict to rsp");
- goto out;
+int
+glusterfs_handle_barrier(rpcsvc_request_t *req)
+{
+ int ret = -1;
+ gd1_mgmt_brick_op_req brick_req = {
+ 0,
+ };
+ gd1_mgmt_brick_op_rsp brick_rsp = {
+ 0,
+ };
+ glusterfs_ctx_t *ctx = NULL;
+ glusterfs_graph_t *active = NULL;
+ xlator_t *top = NULL;
+ xlator_t *xlator = NULL;
+ xlator_t *old_THIS = NULL;
+ dict_t *dict = NULL;
+ gf_boolean_t barrier = _gf_true;
+ xlator_list_t *trav;
+
+ GF_ASSERT(req);
+
+ ret = xdr_to_generic(req->msg[0], &brick_req,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
+ if (ret < 0) {
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ ctx = glusterfsd_ctx;
+ GF_ASSERT(ctx);
+ active = ctx->active;
+ if (active == NULL) {
+ gf_log(THIS->name, GF_LOG_ERROR, "ctx->active returned NULL");
+ ret = -1;
+ goto out;
+ }
+ top = active->first;
+
+ for (trav = top->children; trav; trav = trav->next) {
+ if (strcmp(trav->xlator->name, brick_req.name) == 0) {
+ break;
}
+ }
+ if (!trav) {
+ ret = -1;
+ goto out;
+ }
+ top = trav->xlator;
- glusterfs_submit_reply (req, &rsp, NULL, 0, NULL,
- (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp);
- ret = 0;
+ dict = dict_new();
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize(brick_req.input.input_val, brick_req.input.input_len,
+ &dict);
+ if (ret < 0) {
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "Failed to unserialize "
+ "request dictionary");
+ goto out;
+ }
+
+ brick_rsp.op_ret = 0;
+ brick_rsp.op_errstr = ""; // initing to prevent serilaztion failures
+ old_THIS = THIS;
+
+ /* Send barrier request to the barrier xlator */
+ xlator = get_xlator_by_type(top, "features/barrier");
+ if (!xlator) {
+ ret = -1;
+ gf_log(THIS->name, GF_LOG_ERROR, "%s xlator is not loaded",
+ "features/barrier");
+ goto out;
+ }
+
+ THIS = xlator;
+ // TODO: Extend this to accept return of errnos
+ ret = xlator->notify(xlator, GF_EVENT_TRANSLATOR_OP, dict);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR, "barrier notify failed");
+ brick_rsp.op_ret = ret;
+ brick_rsp.op_errstr = gf_strdup(
+ "Failed to reconfigure "
+ "barrier.");
+ /* This is to invoke changelog-barrier disable if barrier
+ * disable fails and don't invoke if barrier enable fails.
+ */
+ barrier = dict_get_str_boolean(dict, "barrier", _gf_true);
+ if (barrier)
+ goto submit_reply;
+ }
+
+ /* Reset THIS so that we have it correct in case of an error below
+ */
+ THIS = old_THIS;
+
+ /* Send barrier request to changelog as well */
+ xlator = get_xlator_by_type(top, "features/changelog");
+ if (!xlator) {
+ ret = -1;
+ gf_log(THIS->name, GF_LOG_ERROR, "%s xlator is not loaded",
+ "features/changelog");
+ goto out;
+ }
+
+ THIS = xlator;
+ ret = xlator->notify(xlator, GF_EVENT_TRANSLATOR_OP, dict);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR, "changelog notify failed");
+ brick_rsp.op_ret = ret;
+ brick_rsp.op_errstr = gf_strdup("changelog notify failed");
+ goto submit_reply;
+ }
+
+submit_reply:
+ THIS = old_THIS;
+
+ ret = glusterfs_submit_reply(req, &brick_rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp);
out:
- if (nfs_req.input.input_val)
- free (nfs_req.input.input_val);
- if (dict)
- dict_unref (dict);
- if (output)
- dict_unref (output);
- if (rsp.output.output_val)
- GF_FREE (rsp.output.output_val);
-
- gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ if (dict)
+ dict_unref(dict);
+ free(brick_req.input.input_val);
+ if (brick_req.dict.dict_val)
+ free(brick_req.dict.dict_val);
+ gf_log(THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
}
int
-glusterfs_handle_rpc_msg (rpcsvc_request_t *req)
+glusterfs_handle_rpc_msg(rpcsvc_request_t *req)
{
- int ret = -1;
- xlator_t *this = THIS;
- call_frame_t *frame = NULL;
-
- GF_ASSERT (this);
- switch (req->procnum) {
- case GLUSTERD_BRICK_TERMINATE:
- ret = glusterfs_handle_terminate (req);
- break;
- case GLUSTERD_BRICK_XLATOR_INFO:
- ret = glusterfs_handle_translator_info_get (req);
- break;
- case GLUSTERD_BRICK_XLATOR_OP:
- frame = create_frame (this, this->ctx->pool);
- if (!frame)
- goto out;
- ret = synctask_new (this->ctx->env,
- glusterfs_handle_translator_op,
- glusterfs_command_done, frame, req);
- break;
- case GLUSTERD_BRICK_STATUS:
- ret = glusterfs_handle_brick_status (req);
- break;
- case GLUSTERD_BRICK_XLATOR_DEFRAG:
- ret = glusterfs_handle_defrag (req);
- break;
- case GLUSTERD_NODE_PROFILE:
- ret = glusterfs_handle_nfs_profile (req);
- break;
- case GLUSTERD_NODE_STATUS:
- ret = glusterfs_handle_node_status (req);
- default:
- break;
- }
-out:
- return ret;
+ int ret = -1;
+ /* for now, nothing */
+ return ret;
}
-rpcclnt_cb_actor_t gluster_cbk_actors[] = {
- [GF_CBK_FETCHSPEC] = {"FETCHSPEC", GF_CBK_FETCHSPEC, mgmt_cbk_spec },
- [GF_CBK_EVENT_NOTIFY] = {"EVENTNOTIFY", GF_CBK_EVENT_NOTIFY,
- mgmt_cbk_event},
+static rpcclnt_cb_actor_t mgmt_cbk_actors[GF_CBK_MAXVALUE] = {
+ [GF_CBK_FETCHSPEC] = {"FETCHSPEC", mgmt_cbk_spec, GF_CBK_FETCHSPEC},
+ [GF_CBK_EVENT_NOTIFY] = {"EVENTNOTIFY", mgmt_cbk_event,
+ GF_CBK_EVENT_NOTIFY},
+ [GF_CBK_STATEDUMP] = {"STATEDUMP", mgmt_cbk_event, GF_CBK_STATEDUMP},
};
-
-struct rpcclnt_cb_program mgmt_cbk_prog = {
- .progname = "GlusterFS Callback",
- .prognum = GLUSTER_CBK_PROGRAM,
- .progver = GLUSTER_CBK_VERSION,
- .actors = gluster_cbk_actors,
- .numactors = GF_CBK_MAXVALUE,
+static struct rpcclnt_cb_program mgmt_cbk_prog = {
+ .progname = "GlusterFS Callback",
+ .prognum = GLUSTER_CBK_PROGRAM,
+ .progver = GLUSTER_CBK_VERSION,
+ .actors = mgmt_cbk_actors,
+ .numactors = GF_CBK_MAXVALUE,
};
-char *clnt_pmap_procs[GF_PMAP_MAXVALUE] = {
- [GF_PMAP_NULL] = "NULL",
- [GF_PMAP_PORTBYBRICK] = "PORTBYBRICK",
- [GF_PMAP_BRICKBYPORT] = "BRICKBYPORT",
- [GF_PMAP_SIGNIN] = "SIGNIN",
- [GF_PMAP_SIGNOUT] = "SIGNOUT",
- [GF_PMAP_SIGNUP] = "SIGNUP",
+static char *clnt_pmap_procs[GF_PMAP_MAXVALUE] = {
+ [GF_PMAP_NULL] = "NULL",
+ [GF_PMAP_PORTBYBRICK] = "PORTBYBRICK",
+ [GF_PMAP_BRICKBYPORT] = "BRICKBYPORT",
+ [GF_PMAP_SIGNIN] = "SIGNIN",
+ [GF_PMAP_SIGNOUT] = "SIGNOUT",
+ [GF_PMAP_SIGNUP] = "SIGNUP", /* DEPRECATED - DON'T USE! */
};
-
-rpc_clnt_prog_t clnt_pmap_prog = {
- .progname = "Gluster Portmap",
- .prognum = GLUSTER_PMAP_PROGRAM,
- .progver = GLUSTER_PMAP_VERSION,
- .procnames = clnt_pmap_procs,
+static rpc_clnt_prog_t clnt_pmap_prog = {
+ .progname = "Gluster Portmap",
+ .prognum = GLUSTER_PMAP_PROGRAM,
+ .progver = GLUSTER_PMAP_VERSION,
+ .procnames = clnt_pmap_procs,
};
-char *clnt_handshake_procs[GF_HNDSK_MAXVALUE] = {
- [GF_HNDSK_NULL] = "NULL",
- [GF_HNDSK_SETVOLUME] = "SETVOLUME",
- [GF_HNDSK_GETSPEC] = "GETSPEC",
- [GF_HNDSK_PING] = "PING",
- [GF_HNDSK_EVENT_NOTIFY] = "EVENTNOTIFY",
+static char *clnt_handshake_procs[GF_HNDSK_MAXVALUE] = {
+ [GF_HNDSK_NULL] = "NULL",
+ [GF_HNDSK_SETVOLUME] = "SETVOLUME",
+ [GF_HNDSK_GETSPEC] = "GETSPEC",
+ [GF_HNDSK_PING] = "PING",
+ [GF_HNDSK_EVENT_NOTIFY] = "EVENTNOTIFY",
};
-rpc_clnt_prog_t clnt_handshake_prog = {
- .progname = "GlusterFS Handshake",
- .prognum = GLUSTER_HNDSK_PROGRAM,
- .progver = GLUSTER_HNDSK_VERSION,
- .procnames = clnt_handshake_procs,
+static rpc_clnt_prog_t clnt_handshake_prog = {
+ .progname = "GlusterFS Handshake",
+ .prognum = GLUSTER_HNDSK_PROGRAM,
+ .progver = GLUSTER_HNDSK_VERSION,
+ .procnames = clnt_handshake_procs,
};
-rpcsvc_actor_t glusterfs_actors[] = {
- [GLUSTERD_BRICK_NULL] = { "NULL", GLUSTERD_BRICK_NULL, glusterfs_handle_rpc_msg, NULL, NULL, 0},
- [GLUSTERD_BRICK_TERMINATE] = { "TERMINATE", GLUSTERD_BRICK_TERMINATE, glusterfs_handle_rpc_msg, NULL, NULL, 0},
- [GLUSTERD_BRICK_XLATOR_INFO] = { "TRANSLATOR INFO", GLUSTERD_BRICK_XLATOR_INFO, glusterfs_handle_rpc_msg, NULL, NULL, 0},
- [GLUSTERD_BRICK_XLATOR_OP] = { "TRANSLATOR OP", GLUSTERD_BRICK_XLATOR_OP, glusterfs_handle_rpc_msg, NULL, NULL, 0},
- [GLUSTERD_BRICK_STATUS] = {"STATUS", GLUSTERD_BRICK_STATUS, glusterfs_handle_rpc_msg, NULL, NULL, 0},
- [GLUSTERD_BRICK_XLATOR_DEFRAG] = { "TRANSLATOR DEFRAG", GLUSTERD_BRICK_XLATOR_DEFRAG, glusterfs_handle_rpc_msg, NULL, NULL, 0},
- [GLUSTERD_NODE_PROFILE] = {"NFS PROFILE", GLUSTERD_NODE_PROFILE, glusterfs_handle_rpc_msg, NULL, NULL, 0},
- [GLUSTERD_NODE_STATUS] = {"NFS STATUS", GLUSTERD_NODE_STATUS, glusterfs_handle_rpc_msg, NULL, NULL, 0}
+static rpcsvc_actor_t glusterfs_actors[GLUSTERD_BRICK_MAXVALUE] = {
+ [GLUSTERD_BRICK_NULL] = {"NULL", glusterfs_handle_rpc_msg, NULL,
+ GLUSTERD_BRICK_NULL, DRC_NA, 0},
+ [GLUSTERD_BRICK_TERMINATE] = {"TERMINATE", glusterfs_handle_terminate, NULL,
+ GLUSTERD_BRICK_TERMINATE, DRC_NA, 0},
+ [GLUSTERD_BRICK_XLATOR_INFO] = {"TRANSLATOR INFO",
+ glusterfs_handle_translator_info_get, NULL,
+ GLUSTERD_BRICK_XLATOR_INFO, DRC_NA, 0},
+ [GLUSTERD_BRICK_XLATOR_OP] = {"TRANSLATOR OP",
+ glusterfs_handle_translator_op, NULL,
+ GLUSTERD_BRICK_XLATOR_OP, DRC_NA, 0},
+ [GLUSTERD_BRICK_STATUS] = {"STATUS", glusterfs_handle_brick_status, NULL,
+ GLUSTERD_BRICK_STATUS, DRC_NA, 0},
+ [GLUSTERD_BRICK_XLATOR_DEFRAG] = {"TRANSLATOR DEFRAG",
+ glusterfs_handle_defrag, NULL,
+ GLUSTERD_BRICK_XLATOR_DEFRAG, DRC_NA, 0},
+ [GLUSTERD_NODE_PROFILE] = {"NFS PROFILE", glusterfs_handle_nfs_profile,
+ NULL, GLUSTERD_NODE_PROFILE, DRC_NA, 0},
+ [GLUSTERD_NODE_STATUS] = {"NFS STATUS", glusterfs_handle_node_status, NULL,
+ GLUSTERD_NODE_STATUS, DRC_NA, 0},
+ [GLUSTERD_VOLUME_BARRIER_OP] = {"VOLUME BARRIER OP",
+ glusterfs_handle_volume_barrier_op, NULL,
+ GLUSTERD_VOLUME_BARRIER_OP, DRC_NA, 0},
+ [GLUSTERD_BRICK_BARRIER] = {"BARRIER", glusterfs_handle_barrier, NULL,
+ GLUSTERD_BRICK_BARRIER, DRC_NA, 0},
+ [GLUSTERD_NODE_BITROT] = {"BITROT", glusterfs_handle_bitrot, NULL,
+ GLUSTERD_NODE_BITROT, DRC_NA, 0},
+ [GLUSTERD_BRICK_ATTACH] = {"ATTACH", glusterfs_handle_attach, NULL,
+ GLUSTERD_BRICK_ATTACH, DRC_NA, 0},
+
+ [GLUSTERD_DUMP_METRICS] = {"DUMP METRICS", glusterfs_handle_dump_metrics,
+ NULL, GLUSTERD_DUMP_METRICS, DRC_NA, 0},
+
+ [GLUSTERD_SVC_ATTACH] = {"ATTACH CLIENT", glusterfs_handle_svc_attach, NULL,
+ GLUSTERD_SVC_ATTACH, DRC_NA, 0},
+
+ [GLUSTERD_SVC_DETACH] = {"DETACH CLIENT", glusterfs_handle_svc_detach, NULL,
+ GLUSTERD_SVC_DETACH, DRC_NA, 0},
+
};
-struct rpcsvc_program glusterfs_mop_prog = {
- .progname = "Gluster Brick operations",
- .prognum = GD_BRICK_PROGRAM,
- .progver = GD_BRICK_VERSION,
- .actors = glusterfs_actors,
- .numactors = GLUSTERD_BRICK_MAXVALUE,
+static struct rpcsvc_program glusterfs_mop_prog = {
+ .progname = "Gluster Brick operations",
+ .prognum = GD_BRICK_PROGRAM,
+ .progver = GD_BRICK_VERSION,
+ .actors = glusterfs_actors,
+ .numactors = GLUSTERD_BRICK_MAXVALUE,
+ .synctask = _gf_true,
};
int
-mgmt_submit_request (void *req, call_frame_t *frame,
- glusterfs_ctx_t *ctx,
- rpc_clnt_prog_t *prog, int procnum,
- fop_cbk_fn_t cbkfn, xdrproc_t xdrproc)
+mgmt_submit_request(void *req, call_frame_t *frame, glusterfs_ctx_t *ctx,
+ rpc_clnt_prog_t *prog, int procnum, fop_cbk_fn_t cbkfn,
+ xdrproc_t xdrproc)
{
- int ret = -1;
- int count = 0;
- struct iovec iov = {0, };
- struct iobuf *iobuf = NULL;
- struct iobref *iobref = NULL;
- ssize_t xdr_size = 0;
-
- iobref = iobref_new ();
- if (!iobref) {
- goto out;
- }
-
- if (req) {
- xdr_size = xdr_sizeof (xdrproc, req);
-
- iobuf = iobuf_get2 (ctx->iobuf_pool, xdr_size);
- if (!iobuf) {
- goto out;
- };
-
- iobref_add (iobref, iobuf);
-
- iov.iov_base = iobuf->ptr;
- iov.iov_len = iobuf_pagesize (iobuf);
-
- /* Create the xdr payload */
- ret = xdr_serialize_generic (iov, req, xdrproc);
- if (ret == -1) {
- gf_log (THIS->name, GF_LOG_WARNING, "failed to create XDR payload");
- goto out;
- }
- iov.iov_len = ret;
- count = 1;
+ int ret = -1;
+ int count = 0;
+ struct iovec iov = {
+ 0,
+ };
+ struct iobuf *iobuf = NULL;
+ struct iobref *iobref = NULL;
+ ssize_t xdr_size = 0;
+
+ iobref = iobref_new();
+ if (!iobref) {
+ goto out;
+ }
+
+ if (req) {
+ xdr_size = xdr_sizeof(xdrproc, req);
+
+ iobuf = iobuf_get2(ctx->iobuf_pool, xdr_size);
+ if (!iobuf) {
+ goto out;
+ };
+
+ iobref_add(iobref, iobuf);
+
+ iov.iov_base = iobuf->ptr;
+ iov.iov_len = iobuf_pagesize(iobuf);
+
+ /* Create the xdr payload */
+ ret = xdr_serialize_generic(iov, req, xdrproc);
+ if (ret == -1) {
+ gf_log(THIS->name, GF_LOG_WARNING, "failed to create XDR payload");
+ goto out;
}
+ iov.iov_len = ret;
+ count = 1;
+ }
- /* Send the msg */
- ret = rpc_clnt_submit (ctx->mgmt, prog, procnum, cbkfn,
- &iov, count,
- NULL, 0, iobref, frame, NULL, 0, NULL, 0, NULL);
+ /* Send the msg */
+ ret = rpc_clnt_submit(ctx->mgmt, prog, procnum, cbkfn, &iov, count, NULL, 0,
+ iobref, frame, NULL, 0, NULL, 0, NULL);
out:
- if (iobref)
- iobref_unref (iobref);
+ if (iobref)
+ iobref_unref(iobref);
- if (iobuf)
- iobuf_unref (iobuf);
- return ret;
+ if (iobuf)
+ iobuf_unref(iobuf);
+ return ret;
}
-
-/* XXX: move these into @ctx */
-static char oldvolfile[131072];
-static int oldvollen = 0;
-
-static int
-xlator_equal_rec (xlator_t *xl1, xlator_t *xl2)
+int
+mgmt_getspec_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
{
- xlator_list_t *trav1 = NULL;
- xlator_list_t *trav2 = NULL;
- int ret = 0;
-
- if (xl1 == NULL || xl2 == NULL) {
- gf_log ("xlator", GF_LOG_DEBUG, "invalid argument");
- return -1;
- }
-
- trav1 = xl1->children;
- trav2 = xl2->children;
+ gf_getspec_rsp rsp = {
+ 0,
+ };
+ call_frame_t *frame = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ int ret = 0, locked = 0;
+ ssize_t size = 0;
+ FILE *tmpfp = NULL;
+ char *volfile_id = NULL;
+ gf_volfile_t *volfile_obj = NULL;
+ gf_volfile_t *volfile_tmp = NULL;
+ char sha256_hash[SHA256_DIGEST_LENGTH] = {
+ 0,
+ };
+ dict_t *dict = NULL;
+ char *servers_list = NULL;
+ int tmp_fd = -1;
+ char template[] = "/tmp/glfs.volfile.XXXXXX";
+
+ frame = myframe;
+ ctx = frame->this->ctx;
+
+ if (-1 == req->rpc_status) {
+ ret = -1;
+ goto out;
+ }
- while (trav1 && trav2) {
- ret = xlator_equal_rec (trav1->xlator, trav2->xlator);
- if (ret) {
- gf_log ("glusterfsd-mgmt", GF_LOG_DEBUG,
- "xlators children not equal");
- goto out;
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_getspec_rsp);
+ if (ret < 0) {
+ gf_log(frame->this->name, GF_LOG_ERROR, "XDR decoding error");
+ ret = -1;
+ goto out;
+ }
+
+ if (-1 == rsp.op_ret) {
+ gf_log(frame->this->name, GF_LOG_ERROR,
+ "failed to get the 'volume file' from server");
+ ret = rsp.op_errno;
+ goto out;
+ }
+
+ if (!rsp.xdata.xdata_len) {
+ goto volfile;
+ }
+
+ dict = dict_new();
+ if (!dict) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ ret = dict_unserialize(rsp.xdata.xdata_val, rsp.xdata.xdata_len, &dict);
+ if (ret) {
+ gf_log(frame->this->name, GF_LOG_ERROR,
+ "failed to unserialize xdata to dictionary");
+ goto out;
+ }
+ dict->extra_stdfree = rsp.xdata.xdata_val;
+
+ ret = dict_get_str(dict, "servers-list", &servers_list);
+ if (ret) {
+ /* Server list is set by glusterd at the time of getspec */
+ ret = dict_get_str(dict, GLUSTERD_BRICK_SERVERS, &servers_list);
+ if (ret)
+ goto volfile;
+ }
+
+ gf_log(frame->this->name, GF_LOG_INFO,
+ "Received list of available volfile servers: %s", servers_list);
+
+ ret = gf_process_getspec_servers_list(&ctx->cmd_args, servers_list);
+ if (ret) {
+ gf_log(frame->this->name, GF_LOG_ERROR,
+ "Failed (%s) to process servers list: %s", strerror(errno),
+ servers_list);
+ }
+
+volfile:
+ size = rsp.op_ret;
+ volfile_id = frame->local;
+ if (mgmt_is_multiplexed_daemon(ctx->cmd_args.process_name)) {
+ ret = mgmt_process_volfile((const char *)rsp.spec, size, volfile_id,
+ dict);
+ goto post_graph_mgmt;
+ }
+
+ ret = 0;
+ glusterfs_compute_sha256((const unsigned char *)rsp.spec, size,
+ sha256_hash);
+
+ LOCK(&ctx->volfile_lock);
+ {
+ locked = 1;
+
+ list_for_each_entry(volfile_obj, &ctx->volfile_list, volfile_list)
+ {
+ if (!strcmp(volfile_id, volfile_obj->vol_id)) {
+ if (!memcmp(sha256_hash, volfile_obj->volfile_checksum,
+ sizeof(volfile_obj->volfile_checksum))) {
+ UNLOCK(&ctx->volfile_lock);
+ gf_log(frame->this->name, GF_LOG_INFO,
+ "No change in volfile,"
+ "continuing");
+ goto post_unlock;
}
-
- trav1 = trav1->next;
- trav2 = trav2->next;
+ volfile_tmp = volfile_obj;
+ break;
+ }
}
- if (trav1 || trav2) {
- ret = -1;
- goto out;
+ /* coverity[secure_temp] mkstemp uses 0600 as the mode */
+ tmp_fd = mkstemp(template);
+ if (-1 == tmp_fd) {
+ UNLOCK(&ctx->volfile_lock);
+ gf_smsg(frame->this->name, GF_LOG_ERROR, 0, glusterfsd_msg_39,
+ "create template=%s", template, NULL);
+ ret = -1;
+ goto post_unlock;
}
- if (strcmp (xl1->name, xl2->name)) {
- ret = -1;
- goto out;
+ /* Calling unlink so that when the file is closed or program
+ * terminates the temporary file is deleted.
+ */
+ ret = sys_unlink(template);
+ if (ret < 0) {
+ gf_smsg(frame->this->name, GF_LOG_INFO, 0, glusterfsd_msg_39,
+ "delete template=%s", template, NULL);
+ ret = 0;
}
-out :
- return ret;
-}
-
-static gf_boolean_t
-is_graph_topology_equal (glusterfs_graph_t *graph1,
- glusterfs_graph_t *graph2)
-{
- xlator_t *trav1 = NULL;
- xlator_t *trav2 = NULL;
- gf_boolean_t ret = _gf_true;
-
- trav1 = graph1->first;
- trav2 = graph2->first;
- ret = xlator_equal_rec (trav1, trav2);
-
- if (ret) {
- gf_log ("glusterfsd-mgmt", GF_LOG_DEBUG,
- "graphs are not equal");
- ret = _gf_false;
- goto out;
+ tmpfp = fdopen(tmp_fd, "w+b");
+ if (!tmpfp) {
+ ret = -1;
+ goto out;
}
- ret = _gf_true;
- gf_log ("glusterfsd-mgmt", GF_LOG_DEBUG,
- "graphs are equal");
-
-out:
- return ret;
-}
-
-/* Function has 3types of return value 0, -ve , 1
- * return 0 =======> reconfiguration of options has succeeded
- * return 1 =======> the graph has to be reconstructed and all the xlators should be inited
- * return -1(or -ve) =======> Some Internal Error occurred during the operation
- */
-static int
-glusterfs_volfile_reconfigure (FILE *newvolfile_fp)
-{
- glusterfs_graph_t *oldvolfile_graph = NULL;
- glusterfs_graph_t *newvolfile_graph = NULL;
- FILE *oldvolfile_fp = NULL;
- glusterfs_ctx_t *ctx = NULL;
-
- int ret = -1;
-
- oldvolfile_fp = tmpfile ();
- if (!oldvolfile_fp)
- goto out;
-
- if (!oldvollen) {
- ret = 1; // Has to call INIT for the whole graph
- goto out;
+ fwrite(rsp.spec, size, 1, tmpfp);
+ fflush(tmpfp);
+ if (ferror(tmpfp)) {
+ ret = -1;
+ goto out;
}
- fwrite (oldvolfile, oldvollen, 1, oldvolfile_fp);
- fflush (oldvolfile_fp);
+ /* Check if only options have changed. No need to reload the
+ * volfile if topology hasn't changed.
+ * glusterfs_volfile_reconfigure returns 3 possible return states
+ * return 0 =======> reconfiguration of options has succeeded
+ * return 1 =======> the graph has to be reconstructed and all
+ * the xlators should be inited return -1(or -ve) =======> Some Internal
+ * Error occurred during the operation
+ */
- oldvolfile_graph = glusterfs_graph_construct (oldvolfile_fp);
- if (!oldvolfile_graph) {
- goto out;
+ ret = glusterfs_volfile_reconfigure(tmpfp, ctx);
+ if (ret == 0) {
+ gf_log("glusterfsd-mgmt", GF_LOG_DEBUG,
+ "No need to re-load volfile, reconfigure done");
+ if (!volfile_tmp) {
+ ret = -1;
+ UNLOCK(&ctx->volfile_lock);
+ gf_log("mgmt", GF_LOG_ERROR,
+ "Graph reconfigure succeeded with out having "
+ "checksum.");
+ goto post_unlock;
+ }
+ memcpy(volfile_tmp->volfile_checksum, sha256_hash,
+ sizeof(volfile_tmp->volfile_checksum));
+ goto out;
}
- newvolfile_graph = glusterfs_graph_construct (newvolfile_fp);
- if (!newvolfile_graph) {
- goto out;
+ if (ret < 0) {
+ UNLOCK(&ctx->volfile_lock);
+ gf_log("glusterfsd-mgmt", GF_LOG_DEBUG, "Reconfigure failed !!");
+ goto post_unlock;
}
- if (!is_graph_topology_equal (oldvolfile_graph,
- newvolfile_graph)) {
+ ret = glusterfs_process_volfp(ctx, tmpfp);
+ /* tmpfp closed */
+ tmpfp = NULL;
+ tmp_fd = -1;
+ if (ret)
+ goto out;
- ret = 1;
- gf_log ("glusterfsd-mgmt", GF_LOG_DEBUG,
- "Graph topology not equal(should call INIT)");
+ if (!volfile_tmp) {
+ volfile_tmp = GF_CALLOC(1, sizeof(gf_volfile_t),
+ gf_common_volfile_t);
+ if (!volfile_tmp) {
+ ret = -1;
goto out;
- }
+ }
- gf_log ("glusterfsd-mgmt", GF_LOG_DEBUG,
- "Only options have changed in the new "
- "graph");
-
- ctx = glusterfs_ctx_get ();
-
- if (!ctx) {
- gf_log ("glusterfsd-mgmt", GF_LOG_ERROR,
- "glusterfs_ctx_get() returned NULL");
- goto out;
+ INIT_LIST_HEAD(&volfile_tmp->volfile_list);
+ volfile_tmp->graph = ctx->active;
+ list_add(&volfile_tmp->volfile_list, &ctx->volfile_list);
+ snprintf(volfile_tmp->vol_id, sizeof(volfile_tmp->vol_id), "%s",
+ volfile_id);
}
+ memcpy(volfile_tmp->volfile_checksum, sha256_hash,
+ sizeof(volfile_tmp->volfile_checksum));
+ }
+ UNLOCK(&ctx->volfile_lock);
- oldvolfile_graph = ctx->active;
-
- if (!oldvolfile_graph) {
- gf_log ("glusterfsd-mgmt", GF_LOG_ERROR,
- "glusterfs_ctx->active is NULL");
- goto out;
- }
+ locked = 0;
- /* */
- ret = glusterfs_graph_reconfigure (oldvolfile_graph,
- newvolfile_graph);
- if (ret) {
- gf_log ("glusterfsd-mgmt", GF_LOG_DEBUG,
- "Could not reconfigure new options in old graph");
- goto out;
- }
+post_graph_mgmt:
+ if (!is_mgmt_rpc_reconnect) {
+ need_emancipate = 1;
+ glusterfs_mgmt_pmap_signin(ctx);
+ is_mgmt_rpc_reconnect = _gf_true;
+ }
- ret = 0;
out:
- return ret;
+
+ if (locked)
+ UNLOCK(&ctx->volfile_lock);
+post_unlock:
+ GF_FREE(frame->local);
+ frame->local = NULL;
+ STACK_DESTROY(frame->root);
+ free(rsp.spec);
+
+ if (dict)
+ dict_unref(dict);
+
+ // Stop if server is running at an unsupported op-version
+ if (ENOTSUP == ret) {
+ gf_log("mgmt", GF_LOG_ERROR,
+ "Server is operating at an "
+ "op-version which is not supported");
+ cleanup_and_exit(0);
+ }
+
+ if (ret && ctx && !ctx->active) {
+ /* Do it only for the first time */
+ /* Failed to get the volume file, something wrong,
+ restart the process */
+ gf_log("mgmt", GF_LOG_ERROR, "failed to fetch volume file (key:%s)",
+ ctx->cmd_args.volfile_id);
+ cleanup_and_exit(0);
+ }
+
+ if (tmpfp)
+ fclose(tmpfp);
+ else if (tmp_fd != -1)
+ sys_close(tmp_fd);
+
+ return 0;
}
-int
-mgmt_getspec_cbk (struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
+static int
+glusterfs_volfile_fetch_one(glusterfs_ctx_t *ctx, char *volfile_id)
{
- gf_getspec_rsp rsp = {0,};
- call_frame_t *frame = NULL;
- glusterfs_ctx_t *ctx = NULL;
- int ret = 0;
- ssize_t size = 0;
- FILE *tmpfp = NULL;
-
- frame = myframe;
- ctx = frame->this->ctx;
-
- if (-1 == req->rpc_status) {
- ret = -1;
- goto out;
- }
-
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_getspec_rsp);
- if (ret < 0) {
- gf_log (frame->this->name, GF_LOG_ERROR, "XDR decoding error");
- ret = -1;
- goto out;
- }
-
- if (-1 == rsp.op_ret) {
- gf_log (frame->this->name, GF_LOG_ERROR,
- "failed to get the 'volume file' from server");
- ret = -1;
- goto out;
- }
-
- ret = 0;
- size = rsp.op_ret;
-
- if (size == oldvollen && (memcmp (oldvolfile, rsp.spec, size) == 0)) {
- gf_log (frame->this->name, GF_LOG_INFO,
- "No change in volfile, continuing");
- goto out;
- }
-
- tmpfp = tmpfile ();
- if (!tmpfp) {
- ret = -1;
- goto out;
- }
-
- fwrite (rsp.spec, size, 1, tmpfp);
- fflush (tmpfp);
-
- /* Check if only options have changed. No need to reload the
- * volfile if topology hasn't changed.
- * glusterfs_volfile_reconfigure returns 3 possible return states
- * return 0 =======> reconfiguration of options has succeeded
- * return 1 =======> the graph has to be reconstructed and all the xlators should be inited
- * return -1(or -ve) =======> Some Internal Error occurred during the operation
- */
-
- ret = glusterfs_volfile_reconfigure (tmpfp);
- if (ret == 0) {
- gf_log ("glusterfsd-mgmt", GF_LOG_DEBUG,
- "No need to re-load volfile, reconfigure done");
- oldvollen = size;
- memcpy (oldvolfile, rsp.spec, size);
- goto out;
- }
+ cmd_args_t *cmd_args = NULL;
+ gf_getspec_req req = {
+ 0,
+ };
+ int ret = 0;
+ call_frame_t *frame = NULL;
+ dict_t *dict = NULL;
+
+ cmd_args = &ctx->cmd_args;
+ if (!volfile_id) {
+ volfile_id = ctx->cmd_args.volfile_id;
+ if (!volfile_id) {
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "No volfile-id provided, erroring out");
+ return -1;
+ }
+ }
+
+ frame = create_frame(THIS, ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
+ req.key = volfile_id;
+ req.flags = 0;
+ /*
+ * We are only storing one variable in local, hence using the same
+ * variable. If multiple local variable is required, create a struct.
+ */
+ frame->local = gf_strdup(volfile_id);
+ if (!frame->local) {
+ ret = -1;
+ goto out;
+ }
- if (ret < 0) {
- gf_log ("glusterfsd-mgmt", GF_LOG_DEBUG, "Reconfigure failed !!");
- goto out;
+ dict = dict_new();
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ // Set the supported min and max op-versions, so glusterd can make a
+ // decision
+ ret = dict_set_int32(dict, "min-op-version", GD_OP_VERSION_MIN);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "Failed to set min-op-version"
+ " in request dict");
+ goto out;
+ }
+
+ ret = dict_set_int32(dict, "max-op-version", GD_OP_VERSION_MAX);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "Failed to set max-op-version"
+ " in request dict");
+ goto out;
+ }
+
+ /* Ask for a list of volfile (glusterd2 only) servers */
+ if (GF_CLIENT_PROCESS == ctx->process_mode) {
+ req.flags = req.flags | GF_GETSPEC_FLAG_SERVERS_LIST;
+ }
+
+ if (cmd_args->brick_name) {
+ ret = dict_set_dynstr_with_alloc(dict, "brick_name",
+ cmd_args->brick_name);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "Failed to set brick_name in request dict");
+ goto out;
}
+ }
- ret = glusterfs_process_volfp (ctx, tmpfp);
- if (ret)
- goto out;
+ ret = dict_allocate_and_serialize(dict, &req.xdata.xdata_val,
+ &req.xdata.xdata_len);
+ if (ret < 0) {
+ gf_log(THIS->name, GF_LOG_ERROR, "Failed to serialize dictionary");
+ goto out;
+ }
- oldvollen = size;
- memcpy (oldvolfile, rsp.spec, size);
- if (!is_mgmt_rpc_reconnect) {
- glusterfs_mgmt_pmap_signin (ctx);
- is_mgmt_rpc_reconnect = 1;
- }
+ ret = mgmt_submit_request(&req, frame, ctx, &clnt_handshake_prog,
+ GF_HNDSK_GETSPEC, mgmt_getspec_cbk,
+ (xdrproc_t)xdr_gf_getspec_req);
out:
- STACK_DESTROY (frame->root);
-
- if (rsp.spec)
- free (rsp.spec);
-
- if (ret && ctx && !ctx->active) {
- /* Do it only for the first time */
- /* Failed to get the volume file, something wrong,
- restart the process */
- gf_log ("mgmt", GF_LOG_ERROR,
- "failed to fetch volume file (key:%s)",
- ctx->cmd_args.volfile_id);
- cleanup_and_exit (0);
- }
- return 0;
-}
+ GF_FREE(req.xdata.xdata_val);
+ if (dict)
+ dict_unref(dict);
+ if (ret && frame) {
+ /* Free the frame->local fast, because we have not used memget
+ */
+ GF_FREE(frame->local);
+ frame->local = NULL;
+ STACK_DESTROY(frame->root);
+ }
+ return ret;
+}
int
-glusterfs_volfile_fetch (glusterfs_ctx_t *ctx)
+glusterfs_volfile_fetch(glusterfs_ctx_t *ctx)
{
- cmd_args_t *cmd_args = NULL;
- gf_getspec_req req = {0, };
- int ret = 0;
- call_frame_t *frame = NULL;
-
- cmd_args = &ctx->cmd_args;
-
- frame = create_frame (THIS, ctx->pool);
-
- req.key = cmd_args->volfile_id;
- req.flags = 0;
+ xlator_t *server_xl = NULL;
+ xlator_list_t *trav;
+ gf_volfile_t *volfile_obj = NULL;
+ int ret = 0;
+
+ LOCK(&ctx->volfile_lock);
+ {
+ if (ctx->active &&
+ mgmt_is_multiplexed_daemon(ctx->cmd_args.process_name)) {
+ list_for_each_entry(volfile_obj, &ctx->volfile_list, volfile_list)
+ {
+ ret |= glusterfs_volfile_fetch_one(ctx, volfile_obj->vol_id);
+ }
+ UNLOCK(&ctx->volfile_lock);
+ return ret;
+ }
+
+ if (ctx->active) {
+ server_xl = ctx->active->first;
+ if (strcmp(server_xl->type, "protocol/server") != 0) {
+ server_xl = NULL;
+ }
+ }
+ if (!server_xl) {
+ /* Startup (ctx->active not set) or non-server. */
+ UNLOCK(&ctx->volfile_lock);
+ return glusterfs_volfile_fetch_one(ctx, ctx->cmd_args.volfile_id);
+ }
- ret = mgmt_submit_request (&req, frame, ctx, &clnt_handshake_prog,
- GF_HNDSK_GETSPEC, mgmt_getspec_cbk,
- (xdrproc_t)xdr_gf_getspec_req);
- return ret;
+ ret = 0;
+ for (trav = server_xl->children; trav; trav = trav->next) {
+ ret |= glusterfs_volfile_fetch_one(ctx, trav->xlator->volfile_id);
+ }
+ }
+ UNLOCK(&ctx->volfile_lock);
+ return ret;
}
int32_t
-mgmt_event_notify_cbk (struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
+mgmt_event_notify_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
{
- gf_event_notify_rsp rsp = {0,};
- call_frame_t *frame = NULL;
- glusterfs_ctx_t *ctx = NULL;
- int ret = 0;
+ gf_event_notify_rsp rsp = {
+ 0,
+ };
+ call_frame_t *frame = NULL;
+ int ret = 0;
- frame = myframe;
- ctx = frame->this->ctx;
+ frame = myframe;
- if (-1 == req->rpc_status) {
- ret = -1;
- goto out;
- }
+ if (-1 == req->rpc_status) {
+ ret = -1;
+ goto out;
+ }
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_event_notify_rsp);
- if (ret < 0) {
- gf_log (frame->this->name, GF_LOG_ERROR, "XDR decoding error");
- ret = -1;
- goto out;
- }
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_event_notify_rsp);
+ if (ret < 0) {
+ gf_log(frame->this->name, GF_LOG_ERROR, "XDR decoding error");
+ ret = -1;
+ goto out;
+ }
- if (-1 == rsp.op_ret) {
- gf_log (frame->this->name, GF_LOG_ERROR,
- "failed to get the rsp from server");
- ret = -1;
- goto out;
- }
+ if (-1 == rsp.op_ret) {
+ gf_log(frame->this->name, GF_LOG_ERROR,
+ "failed to get the rsp from server");
+ ret = -1;
+ goto out;
+ }
out:
- if (rsp.dict.dict_val)
- free (rsp.dict.dict_val); //malloced by xdr
- return ret;
-
+ free(rsp.dict.dict_val); // malloced by xdr
+ return ret;
}
int32_t
-glusterfs_rebalance_event_notify_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
+glusterfs_rebalance_event_notify_cbk(struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
{
- gf_event_notify_rsp rsp = {0,};
- call_frame_t *frame = NULL;
- glusterfs_ctx_t *ctx = NULL;
- int ret = 0;
+ gf_event_notify_rsp rsp = {
+ 0,
+ };
+ call_frame_t *frame = NULL;
+ int ret = 0;
- frame = myframe;
- ctx = frame->this->ctx;
+ frame = myframe;
- if (-1 == req->rpc_status) {
- gf_log (frame->this->name, GF_LOG_ERROR,
- "failed to get the rsp from server");
- ret = -1;
- goto out;
- }
+ if (-1 == req->rpc_status) {
+ gf_log(frame->this->name, GF_LOG_ERROR,
+ "failed to get the rsp from server");
+ ret = -1;
+ goto out;
+ }
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_event_notify_rsp);
- if (ret < 0) {
- gf_log (frame->this->name, GF_LOG_ERROR, "XDR decoding error");
- ret = -1;
- goto out;
- }
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_event_notify_rsp);
+ if (ret < 0) {
+ gf_log(frame->this->name, GF_LOG_ERROR, "XDR decoding error");
+ ret = -1;
+ goto out;
+ }
- if (-1 == rsp.op_ret) {
- gf_log (frame->this->name, GF_LOG_ERROR,
- "Recieved error (%s) from server",
- strerror (rsp.op_errno));
- ret = -1;
- goto out;
- }
+ if (-1 == rsp.op_ret) {
+ gf_log(frame->this->name, GF_LOG_ERROR,
+ "Received error (%s) from server", strerror(rsp.op_errno));
+ ret = -1;
+ goto out;
+ }
out:
- if (rsp.dict.dict_val)
- free (rsp.dict.dict_val); //malloced by xdr
- return ret;
+ free(rsp.dict.dict_val); // malloced by xdr
+
+ if (frame) {
+ STACK_DESTROY(frame->root);
+ }
+ return ret;
}
int32_t
-glusterfs_rebalance_event_notify (dict_t *dict)
+glusterfs_rebalance_event_notify(dict_t *dict)
{
- glusterfs_ctx_t *ctx = NULL;
- gf_event_notify_req req = {0,};
- int32_t ret = -1;
- cmd_args_t *cmd_args = NULL;
- call_frame_t *frame = NULL;
-
- ctx = glusterfs_ctx_get ();
- cmd_args = &ctx->cmd_args;
+ glusterfs_ctx_t *ctx = NULL;
+ gf_event_notify_req req = {
+ 0,
+ };
+ int32_t ret = -1;
+ cmd_args_t *cmd_args = NULL;
+ call_frame_t *frame = NULL;
- frame = create_frame (THIS, ctx->pool);
+ ctx = glusterfsd_ctx;
+ cmd_args = &ctx->cmd_args;
- req.op = GF_EN_DEFRAG_STATUS;
+ frame = create_frame(THIS, ctx->pool);
- if (dict) {
- ret = dict_set_str (dict, "volname", cmd_args->volfile_id);
- if (ret)
- gf_log ("", GF_LOG_ERROR, "failed to set volname");
+ req.op = GF_EN_DEFRAG_STATUS;
- ret = dict_allocate_and_serialize (dict, &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
+ if (dict) {
+ ret = dict_set_str(dict, "volname", cmd_args->volfile_id);
+ if (ret) {
+ gf_log("", GF_LOG_ERROR, "failed to set volname");
}
+ ret = dict_allocate_and_serialize(dict, &req.dict.dict_val,
+ &req.dict.dict_len);
+ if (ret) {
+ gf_log("", GF_LOG_ERROR, "failed to serialize dict");
+ }
+ }
- ret = mgmt_submit_request (&req, frame, ctx, &clnt_handshake_prog,
- GF_HNDSK_EVENT_NOTIFY,
- glusterfs_rebalance_event_notify_cbk,
- (xdrproc_t)xdr_gf_event_notify_req);
-
- if (req.dict.dict_val)
- GF_FREE (req.dict.dict_val);
+ ret = mgmt_submit_request(&req, frame, ctx, &clnt_handshake_prog,
+ GF_HNDSK_EVENT_NOTIFY,
+ glusterfs_rebalance_event_notify_cbk,
+ (xdrproc_t)xdr_gf_event_notify_req);
- STACK_DESTROY (frame->root);
- return ret;
+ GF_FREE(req.dict.dict_val);
+ return ret;
}
static int
-mgmt_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
- void *data)
+mgmt_rpc_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
+ void *data)
{
- xlator_t *this = NULL;
- cmd_args_t *cmd_args = NULL;
- glusterfs_ctx_t *ctx = NULL;
- int ret = 0;
-
- this = mydata;
- ctx = this->ctx;
- cmd_args = &ctx->cmd_args;
- switch (event) {
+ xlator_t *this = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ int ret = 0;
+ server_cmdline_t *server = NULL;
+ rpc_transport_t *rpc_trans = NULL;
+ int need_term = 0;
+ int emval = 0;
+ static int log_ctr1;
+ static int log_ctr2;
+ struct dnscache6 *dnscache = NULL;
+
+ this = mydata;
+ rpc_trans = rpc->conn.trans;
+ ctx = this->ctx;
+
+ switch (event) {
case RPC_CLNT_DISCONNECT:
+ if (rpc_trans->connect_failed) {
+ GF_LOG_OCCASIONALLY(log_ctr1, "glusterfsd-mgmt", GF_LOG_ERROR,
+ "failed to connect to remote-"
+ "host: %s",
+ ctx->cmd_args.volfile_server);
+ } else {
+ GF_LOG_OCCASIONALLY(log_ctr1, "glusterfsd-mgmt", GF_LOG_INFO,
+ "disconnected from remote-"
+ "host: %s",
+ ctx->cmd_args.volfile_server);
+ }
+
+ if (!rpc->disabled) {
+ /*
+ * Check if dnscache is exhausted for current server
+ * and continue until cache is exhausted
+ */
+ dnscache = rpc_trans->dnscache;
+ if (dnscache && dnscache->next) {
+ break;
+ }
+ }
+ server = ctx->cmd_args.curr_server;
+ if (server->list.next == &ctx->cmd_args.volfile_servers) {
+ if (!ctx->active) {
+ need_term = 1;
+ }
+ emval = ENOTCONN;
+ GF_LOG_OCCASIONALLY(log_ctr2, "glusterfsd-mgmt", GF_LOG_INFO,
+ "Exhausted all volfile servers");
+ break;
+ }
+ server = list_entry(server->list.next, typeof(*server), list);
+ ctx->cmd_args.curr_server = server;
+ ctx->cmd_args.volfile_server = server->volfile_server;
+
+ ret = dict_set_str(rpc_trans->options, "remote-host",
+ server->volfile_server);
+ if (ret != 0) {
+ gf_log("glusterfsd-mgmt", GF_LOG_ERROR,
+ "failed to set remote-host: %s", server->volfile_server);
if (!ctx->active) {
- cmd_args->max_connect_attempts--;
- gf_log ("glusterfsd-mgmt", GF_LOG_ERROR,
- "failed to connect with remote-host: %s",
- strerror (errno));
- gf_log ("glusterfsd-mgmt", GF_LOG_INFO,
- "%d connect attempts left",
- cmd_args->max_connect_attempts);
- if (0 >= cmd_args->max_connect_attempts)
- cleanup_and_exit (1);
+ need_term = 1;
}
+ emval = ENOTCONN;
break;
+ }
+ gf_log("glusterfsd-mgmt", GF_LOG_INFO,
+ "connecting to next volfile server %s",
+ server->volfile_server);
+ break;
case RPC_CLNT_CONNECT:
- rpc_clnt_set_connected (&((struct rpc_clnt*)ctx->mgmt)->conn);
-
- ret = glusterfs_volfile_fetch (ctx);
- if (ret && ctx && (ctx->active == NULL)) {
- /* Do it only for the first time */
- /* Exit the process.. there is some wrong options */
- gf_log ("mgmt", GF_LOG_ERROR,
- "failed to fetch volume file (key:%s)",
- ctx->cmd_args.volfile_id);
- cleanup_and_exit (0);
+ ret = glusterfs_volfile_fetch(ctx);
+ if (ret) {
+ emval = ret;
+ if (!ctx->active) {
+ need_term = 1;
+ gf_log("glusterfsd-mgmt", GF_LOG_ERROR,
+ "failed to fetch volume file (key:%s)",
+ ctx->cmd_args.volfile_id);
+ break;
}
+ }
- if (is_mgmt_rpc_reconnect)
- glusterfs_mgmt_pmap_signin (ctx);
- break;
+ if (is_mgmt_rpc_reconnect)
+ glusterfs_mgmt_pmap_signin(ctx);
+
+ break;
default:
- break;
- }
+ break;
+ }
- return 0;
+ if (need_term) {
+ emancipate(ctx, emval);
+ cleanup_and_exit(1);
+ }
+
+ return 0;
}
int
-glusterfs_rpcsvc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event,
- void *data)
+glusterfs_rpcsvc_notify(rpcsvc_t *rpc, void *xl, rpcsvc_event_t event,
+ void *data)
{
- if (!xl || !data) {
- goto out;
- }
+ if (!xl || !data) {
+ goto out;
+ }
- switch (event) {
- case RPCSVC_EVENT_ACCEPT:
- {
- break;
+ switch (event) {
+ case RPCSVC_EVENT_ACCEPT: {
+ break;
}
- case RPCSVC_EVENT_DISCONNECT:
- {
- break;
+ case RPCSVC_EVENT_DISCONNECT: {
+ break;
}
default:
- break;
- }
+ break;
+ }
out:
- return 0;
+ return 0;
}
int
-glusterfs_listener_init (glusterfs_ctx_t *ctx)
+glusterfs_listener_init(glusterfs_ctx_t *ctx)
{
- cmd_args_t *cmd_args = NULL;
- rpcsvc_t *rpc = NULL;
- dict_t *options = NULL;
- int ret = -1;
+ cmd_args_t *cmd_args = NULL;
+ rpcsvc_t *rpc = NULL;
+ dict_t *options = NULL;
+ int ret = -1;
- cmd_args = &ctx->cmd_args;
+ cmd_args = &ctx->cmd_args;
- if (ctx->listener)
- return 0;
+ if (ctx->listener)
+ return 0;
- if (!cmd_args->sock_file)
- return 0;
+ if (!cmd_args->sock_file)
+ return 0;
- ret = rpcsvc_transport_unix_options_build (&options,
- cmd_args->sock_file);
- if (ret)
- goto out;
+ options = dict_new();
+ if (!options)
+ goto out;
- rpc = rpcsvc_init (THIS, ctx, options, 8);
- if (rpc == NULL) {
- goto out;
- }
+ ret = rpcsvc_transport_unix_options_build(options, cmd_args->sock_file);
+ if (ret)
+ goto out;
- ret = rpcsvc_register_notify (rpc, glusterfs_rpcsvc_notify, THIS);
- if (ret) {
- goto out;
- }
+ rpc = rpcsvc_init(THIS, ctx, options, 8);
+ if (rpc == NULL) {
+ goto out;
+ }
- ret = rpcsvc_create_listeners (rpc, options, "glusterfsd");
- if (ret < 1) {
- ret = -1;
- goto out;
- }
+ ret = rpcsvc_register_notify(rpc, glusterfs_rpcsvc_notify, THIS);
+ if (ret) {
+ goto out;
+ }
- ret = rpcsvc_program_register (rpc, &glusterfs_mop_prog);
- if (ret) {
- goto out;
- }
+ ret = rpcsvc_create_listeners(rpc, options, "glusterfsd");
+ if (ret < 1) {
+ goto out;
+ }
+
+ ret = rpcsvc_program_register(rpc, &glusterfs_mop_prog, _gf_false);
+ if (ret) {
+ goto out;
+ }
- ctx->listener = rpc;
+ ctx->listener = rpc;
out:
- return ret;
+ if (options)
+ dict_unref(options);
+ return ret;
}
int
-glusterfs_listener_stop (glusterfs_ctx_t *ctx)
+glusterfs_mgmt_notify(int32_t op, void *data, ...)
{
- cmd_args_t *cmd_args = NULL;
- rpcsvc_t *rpc = NULL;
- rpcsvc_listener_t *listener = NULL;
- rpcsvc_listener_t *next = NULL;
- int ret = 0;
- xlator_t *this = NULL;
-
- GF_ASSERT (ctx);
-
- rpc = ctx->listener;
- ctx->listener = NULL;
+ int ret = 0;
+ switch (op) {
+ case GF_EN_DEFRAG_STATUS:
+ ret = glusterfs_rebalance_event_notify((dict_t *)data);
+ break;
- (void) rpcsvc_program_unregister(rpc, &glusterfs_mop_prog);
-
- list_for_each_entry_safe (listener, next, &rpc->listeners, list) {
- rpcsvc_listener_destroy (listener);
- }
-
- (void) rpcsvc_unregister_notify (rpc, glusterfs_rpcsvc_notify, THIS);
-
- GF_FREE (rpc);
-
- cmd_args = &ctx->cmd_args;
- if (cmd_args->sock_file) {
- ret = unlink (cmd_args->sock_file);
- if (ret && (ENOENT == errno)) {
- ret = 0;
- }
- }
+ default:
+ gf_log("", GF_LOG_ERROR, "Invalid op");
+ break;
+ }
- if (ret) {
- this = THIS;
- gf_log (this->name, GF_LOG_ERROR, "Failed to unlink linstener "
- "socket %s, error: %s", cmd_args->sock_file,
- strerror (errno));
- }
- return ret;
+ return ret;
}
int
-glusterfs_mgmt_init (glusterfs_ctx_t *ctx)
+glusterfs_mgmt_init(glusterfs_ctx_t *ctx)
{
- cmd_args_t *cmd_args = NULL;
- struct rpc_clnt *rpc = NULL;
- dict_t *options = NULL;
- int ret = -1;
- int port = GF_DEFAULT_BASE_PORT;
- char *host = NULL;
-
- cmd_args = &ctx->cmd_args;
-
- if (ctx->mgmt)
- return 0;
+ cmd_args_t *cmd_args = NULL;
+ struct rpc_clnt *rpc = NULL;
+ dict_t *options = NULL;
+ int ret = -1;
+ int port = GF_DEFAULT_BASE_PORT;
+ char *host = NULL;
+ xlator_cmdline_option_t *opt = NULL;
+
+ cmd_args = &ctx->cmd_args;
+ GF_VALIDATE_OR_GOTO(THIS->name, cmd_args->volfile_server, out);
+
+ if (ctx->mgmt)
+ return 0;
- if (cmd_args->volfile_server_port)
- port = cmd_args->volfile_server_port;
+ options = dict_new();
+ if (!options)
+ goto out;
- host = "localhost";
- if (cmd_args->volfile_server)
- host = cmd_args->volfile_server;
+ LOCK_INIT(&ctx->volfile_lock);
- ret = rpc_transport_inet_options_build (&options, host, port);
- if (ret)
- goto out;
+ if (cmd_args->volfile_server_port)
+ port = cmd_args->volfile_server_port;
- rpc = rpc_clnt_new (options, THIS->ctx, THIS->name, 8);
- if (!rpc) {
- ret = -1;
- gf_log (THIS->name, GF_LOG_WARNING, "failed to create rpc clnt");
- goto out;
- }
+ host = cmd_args->volfile_server;
- ret = rpc_clnt_register_notify (rpc, mgmt_rpc_notify, THIS);
- if (ret) {
- gf_log (THIS->name, GF_LOG_WARNING, "failed to register notify function");
- goto out;
- }
+ if (cmd_args->volfile_server_transport &&
+ !strcmp(cmd_args->volfile_server_transport, "unix")) {
+ ret = rpc_transport_unix_options_build(options, host, 0);
+ } else {
+ opt = find_xlator_option_in_cmd_args_t("address-family", cmd_args);
+ ret = rpc_transport_inet_options_build(options, host, port,
+ (opt ? opt->value : NULL));
+ }
+ if (ret)
+ goto out;
- ret = rpcclnt_cbk_program_register (rpc, &mgmt_cbk_prog);
+ /* Explicitly turn on encrypted transport. */
+ if (ctx->secure_mgmt) {
+ ret = dict_set_dynstr_with_alloc(options,
+ "transport.socket.ssl-enabled", "yes");
if (ret) {
- gf_log (THIS->name, GF_LOG_WARNING, "failed to register callback function");
- goto out;
- }
-
- /* This value should be set before doing the 'rpc_clnt_start()' as
- the notify function uses this variable */
- ctx->mgmt = rpc;
-
- ret = rpc_clnt_start (rpc);
-out:
- return ret;
-}
-
-static int
-mgmt_pmap_signin2_cbk (struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
-{
- pmap_signin_rsp rsp = {0,};
- call_frame_t *frame = NULL;
- int ret = 0;
-
- frame = myframe;
-
- if (-1 == req->rpc_status) {
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "failed to set 'transport.socket.ssl-enabled' "
+ "in options dict");
+ goto out;
}
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_pmap_signin_rsp);
- if (ret < 0) {
- gf_log (frame->this->name, GF_LOG_ERROR, "XDR decode error");
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
- }
+ ctx->ssl_cert_depth = glusterfs_read_secure_access_file();
+ }
- if (-1 == rsp.op_ret) {
- gf_log (frame->this->name, GF_LOG_ERROR,
- "failed to register the port with glusterd");
- goto out;
- }
+ rpc = rpc_clnt_new(options, THIS, THIS->name, 8);
+ if (!rpc) {
+ ret = -1;
+ gf_log(THIS->name, GF_LOG_WARNING, "failed to create rpc clnt");
+ goto out;
+ }
+
+ ret = rpc_clnt_register_notify(rpc, mgmt_rpc_notify, THIS);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_WARNING,
+ "failed to register notify function");
+ goto out;
+ }
+
+ ret = rpcclnt_cbk_program_register(rpc, &mgmt_cbk_prog, THIS);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_WARNING,
+ "failed to register callback function");
+ goto out;
+ }
+
+ ctx->notify = glusterfs_mgmt_notify;
+
+ /* This value should be set before doing the 'rpc_clnt_start()' as
+ the notify function uses this variable */
+ ctx->mgmt = rpc;
+
+ ret = rpc_clnt_start(rpc);
out:
- STACK_DESTROY (frame->root);
- return 0;
-
+ if (options)
+ dict_unref(options);
+ return ret;
}
static int
-mgmt_pmap_signin_cbk (struct rpc_req *req, struct iovec *iov, int count,
+mgmt_pmap_signin2_cbk(struct rpc_req *req, struct iovec *iov, int count,
void *myframe)
{
- pmap_signin_rsp rsp = {0,};
- call_frame_t *frame = NULL;
- int ret = 0;
- pmap_signin_req pmap_req = {0, };
- cmd_args_t *cmd_args = NULL;
- glusterfs_ctx_t *ctx = NULL;
- char brick_name[PATH_MAX] = {0,};
-
- frame = myframe;
-
- if (-1 == req->rpc_status) {
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
- }
-
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_pmap_signin_rsp);
- if (ret < 0) {
- gf_log (frame->this->name, GF_LOG_ERROR, "XDR decode error");
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
- }
-
- if (-1 == rsp.op_ret) {
- gf_log (frame->this->name, GF_LOG_ERROR,
- "failed to register the port with glusterd");
- goto out;
- }
+ pmap_signin_rsp rsp = {
+ 0,
+ };
+ glusterfs_ctx_t *ctx = NULL;
+ call_frame_t *frame = NULL;
+ int ret = 0;
- ctx = glusterfs_ctx_get ();
- cmd_args = &ctx->cmd_args;
+ ctx = glusterfsd_ctx;
+ frame = myframe;
- if (!cmd_args->brick_port2) {
- /* We are done with signin process */
- goto out;
- }
-
- snprintf (brick_name, PATH_MAX, "%s.rdma", cmd_args->brick_name);
- pmap_req.port = cmd_args->brick_port2;
- pmap_req.brick = brick_name;
-
- ret = mgmt_submit_request (&pmap_req, frame, ctx, &clnt_pmap_prog,
- GF_PMAP_SIGNIN, mgmt_pmap_signin2_cbk,
- (xdrproc_t)xdr_pmap_signin_req);
- if (ret)
- goto out;
-
- return 0;
+ if (-1 == req->rpc_status) {
+ ret = -1;
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_pmap_signin_rsp);
+ if (ret < 0) {
+ gf_log(frame->this->name, GF_LOG_ERROR, "XDR decode error");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ if (-1 == rsp.op_ret) {
+ gf_log(frame->this->name, GF_LOG_ERROR,
+ "failed to register the port with glusterd");
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
out:
+ if (need_emancipate)
+ emancipate(ctx, ret);
- STACK_DESTROY (frame->root);
- return 0;
+ STACK_DESTROY(frame->root);
+ return 0;
}
-int
-glusterfs_mgmt_pmap_signin (glusterfs_ctx_t *ctx)
+static int
+mgmt_pmap_signin_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
{
- call_frame_t *frame = NULL;
- pmap_signin_req req = {0, };
- int ret = -1;
- cmd_args_t *cmd_args = NULL;
+ pmap_signin_rsp rsp = {
+ 0,
+ };
+ call_frame_t *frame = NULL;
+ int ret = 0;
+ int emancipate_ret = -1;
+ pmap_signin_req pmap_req = {
+ 0,
+ };
+ cmd_args_t *cmd_args = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ char brick_name[PATH_MAX] = {
+ 0,
+ };
+
+ frame = myframe;
+ ctx = glusterfsd_ctx;
+ cmd_args = &ctx->cmd_args;
+
+ if (-1 == req->rpc_status) {
+ ret = -1;
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_pmap_signin_rsp);
+ if (ret < 0) {
+ gf_log(frame->this->name, GF_LOG_ERROR, "XDR decode error");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ if (-1 == rsp.op_ret) {
+ gf_log(frame->this->name, GF_LOG_ERROR,
+ "failed to register the port with glusterd");
+ ret = -1;
+ goto out;
+ }
- frame = create_frame (THIS, ctx->pool);
- cmd_args = &ctx->cmd_args;
+ if (!cmd_args->brick_port2) {
+ /* We are done with signin process */
+ emancipate_ret = 0;
+ goto out;
+ }
- if (!cmd_args->brick_port || !cmd_args->brick_name) {
- gf_log ("fsd-mgmt", GF_LOG_DEBUG,
- "portmapper signin arguments not given");
- goto out;
- }
+ snprintf(brick_name, PATH_MAX, "%s.rdma", cmd_args->brick_name);
+ pmap_req.port = cmd_args->brick_port2;
+ pmap_req.brick = brick_name;
- req.port = cmd_args->brick_port;
- req.brick = cmd_args->brick_name;
+ ret = mgmt_submit_request(&pmap_req, frame, ctx, &clnt_pmap_prog,
+ GF_PMAP_SIGNIN, mgmt_pmap_signin2_cbk,
+ (xdrproc_t)xdr_pmap_signin_req);
+ if (ret)
+ goto out;
- ret = mgmt_submit_request (&req, frame, ctx, &clnt_pmap_prog,
- GF_PMAP_SIGNIN, mgmt_pmap_signin_cbk,
- (xdrproc_t)xdr_pmap_signin_req);
+ return 0;
out:
- return ret;
-}
+ if (need_emancipate && (ret < 0 || !cmd_args->brick_port2))
+ emancipate(ctx, emancipate_ret);
-
-static int
-mgmt_pmap_signout_cbk (struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
-{
- pmap_signout_rsp rsp = {0,};
- int ret = 0;
- glusterfs_ctx_t *ctx = NULL;
-
- if (-1 == req->rpc_status) {
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
- }
-
- ctx = glusterfs_ctx_get ();
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_pmap_signout_rsp);
- if (ret < 0) {
- gf_log (THIS->name, GF_LOG_ERROR, "XDR decoding failed");
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
- }
-
- if (-1 == rsp.op_ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "failed to register the port with glusterd");
- goto out;
- }
-out:
- return 0;
+ STACK_DESTROY(frame->root);
+ return 0;
}
-
int
-glusterfs_mgmt_pmap_signout (glusterfs_ctx_t *ctx)
+glusterfs_mgmt_pmap_signin(glusterfs_ctx_t *ctx)
{
- int ret = 0;
- pmap_signout_req req = {0, };
- call_frame_t *frame = NULL;
- cmd_args_t *cmd_args = NULL;
-
- frame = create_frame (THIS, ctx->pool);
- cmd_args = &ctx->cmd_args;
-
- if (!cmd_args->brick_port || !cmd_args->brick_name) {
- gf_log ("fsd-mgmt", GF_LOG_DEBUG,
- "portmapper signout arguments not given");
- goto out;
- }
-
- req.port = cmd_args->brick_port;
- req.brick = cmd_args->brick_name;
+ call_frame_t *frame = NULL;
+ xlator_list_t **trav_p;
+ xlator_t *top;
+ pmap_signin_req req = {
+ 0,
+ };
+ int ret = -1;
+ int emancipate_ret = -1;
+ cmd_args_t *cmd_args = NULL;
+
+ cmd_args = &ctx->cmd_args;
+
+ if (!cmd_args->brick_port || !cmd_args->brick_name) {
+ gf_log("fsd-mgmt", GF_LOG_DEBUG,
+ "portmapper signin arguments not given");
+ emancipate_ret = 0;
+ goto out;
+ }
+
+ req.port = cmd_args->brick_port;
+ req.pid = (int)getpid(); /* only glusterd2 consumes this */
+
+ if (ctx->active) {
+ top = ctx->active->first;
+ for (trav_p = &top->children; *trav_p; trav_p = &(*trav_p)->next) {
+ frame = create_frame(THIS, ctx->pool);
+ req.brick = (*trav_p)->xlator->name;
+ ret = mgmt_submit_request(&req, frame, ctx, &clnt_pmap_prog,
+ GF_PMAP_SIGNIN, mgmt_pmap_signin_cbk,
+ (xdrproc_t)xdr_pmap_signin_req);
+ if (ret < 0) {
+ gf_log(THIS->name, GF_LOG_WARNING,
+ "failed to send sign in request; brick = %s", req.brick);
+ }
+ }
+ }
+
+ /* unfortunately, the caller doesn't care about the returned value */
- ret = mgmt_submit_request (&req, frame, ctx, &clnt_pmap_prog,
- GF_PMAP_SIGNOUT, mgmt_pmap_signout_cbk,
- (xdrproc_t)xdr_pmap_signout_req);
out:
- return ret;
+ if (need_emancipate && ret < 0)
+ emancipate(ctx, emancipate_ret);
+ return ret;
}
diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c
index 44678bb1315..dae41f33fef 100644
--- a/glusterfsd/src/glusterfsd.c
+++ b/glusterfsd/src/glusterfsd.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
@@ -29,6 +19,7 @@
#include <netdb.h>
#include <signal.h>
#include <libgen.h>
+#include <dlfcn.h>
#include <sys/utsname.h>
@@ -41,909 +32,1447 @@
#include <errno.h>
#include <pwd.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
+#ifdef GF_LINUX_HOST_OS
+#ifdef HAVE_LINUX_OOM_H
+#include <linux/oom.h>
+#else
+#define OOM_SCORE_ADJ_MIN (-1000)
+#define OOM_SCORE_ADJ_MAX 1000
+#define OOM_DISABLE (-17)
+#define OOM_ADJUST_MAX 15
+#endif
#endif
#ifdef HAVE_MALLOC_H
#include <malloc.h>
#endif
-#ifdef HAVE_MALLOC_STATS
-#ifdef DEBUG
-#include <mcheck.h>
-#endif
-#endif
-
-#include "xlator.h"
-#include "glusterfs.h"
-#include "compat.h"
-#include "logging.h"
-#include "dict.h"
-#include "list.h"
-#include "timer.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/logging.h>
+#include "glusterfsd-messages.h"
+#include <glusterfs/dict.h>
+#include <glusterfs/list.h>
+#include <glusterfs/timer.h>
#include "glusterfsd.h"
-#include "stack.h"
-#include "revision.h"
-#include "common-utils.h"
-#include "event.h"
-#include "globals.h"
-#include "statedump.h"
-#include "latency.h"
+#include <glusterfs/revision.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/gf-event.h>
+#include <glusterfs/statedump.h>
+#include <glusterfs/latency.h>
#include "glusterfsd-mem-types.h"
-#include "syscall.h"
-#include "call-stub.h"
+#include <glusterfs/syscall.h>
+#include <glusterfs/call-stub.h>
#include <fnmatch.h>
#include "rpc-clnt.h"
-#include "syncop.h"
-
-#include "daemon.h"
+#include <glusterfs/syncop.h>
+#include <glusterfs/client_t.h>
+#include "netgroups.h"
+#include "exports.h"
+#include <glusterfs/monitoring.h>
-/* process mode definitions */
-#define GF_SERVER_PROCESS 0
-#define GF_CLIENT_PROCESS 1
-#define GF_GLUSTERD_PROCESS 2
+#include <glusterfs/daemon.h>
/* using argp for command line parsing */
static char gf_doc[] = "";
-static char argp_doc[] = "--volfile-server=SERVER [MOUNT-POINT]\n" \
- "--volfile=VOLFILE [MOUNT-POINT]";
-const char *argp_program_version = "" \
- PACKAGE_NAME" "PACKAGE_VERSION" built on "__DATE__" "__TIME__ \
- "\nRepository revision: " GLUSTERFS_REPOSITORY_REVISION "\n" \
- "Copyright (c) 2006-2011 Gluster Inc. " \
- "<http://www.gluster.com>\n" \
- "GlusterFS comes with ABSOLUTELY NO WARRANTY.\n" \
- "You may redistribute copies of GlusterFS under the terms of "\
- "the GNU General Public License.";
+static char argp_doc[] =
+ "--volfile-server=SERVER [MOUNT-POINT]\n"
+ "--volfile=VOLFILE [MOUNT-POINT]";
+const char *argp_program_version =
+ "" PACKAGE_NAME " " PACKAGE_VERSION
+ "\nRepository revision: " GLUSTERFS_REPOSITORY_REVISION
+ "\n"
+ "Copyright (c) 2006-2016 Red Hat, Inc. "
+ "<https://www.gluster.org/>\n"
+ "GlusterFS comes with ABSOLUTELY NO WARRANTY.\n"
+ "It is licensed to you under your choice of the GNU Lesser\n"
+ "General Public License, version 3 or any later version (LGPLv3\n"
+ "or later), or the GNU General Public License, version 2 (GPLv2),\n"
+ "in all cases as published by the Free Software Foundation.";
const char *argp_program_bug_address = "<" PACKAGE_BUGREPORT ">";
-static error_t parse_opts (int32_t key, char *arg, struct argp_state *_state);
+static error_t
+parse_opts(int32_t key, char *arg, struct argp_state *_state);
static struct argp_option gf_options[] = {
- {0, 0, 0, 0, "Basic options:"},
- {"volfile-server", ARGP_VOLFILE_SERVER_KEY, "SERVER", 0,
- "Server to get the volume file from. This option overrides "
- "--volfile option"},
- {"volfile-max-fetch-attempts", ARGP_VOLFILE_MAX_FETCH_ATTEMPTS,
- "MAX-ATTEMPTS", 0, "Maximum number of connect attempts to server. "
- "This option should be provided with --volfile-server option"
- "[default: 1]"},
- {"volfile", ARGP_VOLUME_FILE_KEY, "VOLFILE", 0,
- "File to use as VOLUME_FILE"},
- {"spec-file", ARGP_VOLUME_FILE_KEY, "VOLFILE", OPTION_HIDDEN,
- "File to use as VOLUME FILE"},
-
- {"log-level", ARGP_LOG_LEVEL_KEY, "LOGLEVEL", 0,
- "Logging severity. Valid options are DEBUG, INFO, WARNING, ERROR, "
- "CRITICAL and NONE [default: INFO]"},
- {"log-file", ARGP_LOG_FILE_KEY, "LOGFILE", 0,
- "File to use for logging [default: "
- DEFAULT_LOG_FILE_DIRECTORY "/" PACKAGE_NAME ".log" "]"},
-
- {0, 0, 0, 0, "Advanced Options:"},
- {"volfile-server-port", ARGP_VOLFILE_SERVER_PORT_KEY, "PORT", 0,
- "Listening port number of volfile server"},
- {"volfile-server-transport", ARGP_VOLFILE_SERVER_TRANSPORT_KEY,
- "TRANSPORT", 0,
- "Transport type to get volfile from server [default: socket]"},
- {"volfile-id", ARGP_VOLFILE_ID_KEY, "KEY", 0,
- "'key' of the volfile to be fetched from server"},
- {"pid-file", ARGP_PID_FILE_KEY, "PIDFILE", 0,
- "File to use as pid file"},
- {"socket-file", ARGP_SOCK_FILE_KEY, "SOCKFILE", 0,
- "File to use as unix-socket"},
- {"no-daemon", ARGP_NO_DAEMON_KEY, 0, 0,
- "Run in foreground"},
- {"run-id", ARGP_RUN_ID_KEY, "RUN-ID", OPTION_HIDDEN,
- "Run ID for the process, used by scripts to keep track of process "
- "they started, defaults to none"},
- {"debug", ARGP_DEBUG_KEY, 0, 0,
- "Run in debug mode. This option sets --no-daemon, --log-level "
- "to DEBUG and --log-file to console"},
- {"volume-name", ARGP_VOLUME_NAME_KEY, "XLATOR-NAME", 0,
- "Translator name to be used for MOUNT-POINT [default: top most volume "
- "definition in VOLFILE]"},
- {"xlator-option", ARGP_XLATOR_OPTION_KEY,"XLATOR-NAME.OPTION=VALUE", 0,
- "Add/override an option for a translator in volume file with specified"
- " value"},
- {"read-only", ARGP_READ_ONLY_KEY, 0, 0,
- "Mount the filesystem in 'read-only' mode"},
- {"acl", ARGP_ACL_KEY, 0, 0,
- "Mount the filesystem with POSIX ACL support"},
- {"selinux", ARGP_SELINUX_KEY, 0, 0,
- "Enable SELinux label (extened attributes) support on inodes"},
- {"enable-ino32", ARGP_INODE32_KEY, "BOOL", OPTION_ARG_OPTIONAL,
- "Use 32-bit inodes when mounting to workaround broken applications"
- "that don't support 64-bit inodes"},
- {"worm", ARGP_WORM_KEY, 0, 0,
- "Mount the filesystem in 'worm' mode"},
- {"mac-compat", ARGP_MAC_COMPAT_KEY, "BOOL", OPTION_ARG_OPTIONAL,
- "Provide stubs for attributes needed for seamless operation on Macs "
+ {0, 0, 0, 0, "Basic options:"},
+ {"volfile-server", ARGP_VOLFILE_SERVER_KEY, "SERVER", 0,
+ "Server to get the volume file from. Unix domain socket path when "
+ "transport type 'unix'. This option overrides --volfile option"},
+ {"volfile", ARGP_VOLUME_FILE_KEY, "VOLFILE", 0,
+ "File to use as VOLUME_FILE"},
+ {"spec-file", ARGP_VOLUME_FILE_KEY, "VOLFILE", OPTION_HIDDEN,
+ "File to use as VOLUME FILE"},
+
+ {"log-level", ARGP_LOG_LEVEL_KEY, "LOGLEVEL", 0,
+ "Logging severity. Valid options are DEBUG, INFO, WARNING, ERROR, "
+ "CRITICAL, TRACE and NONE [default: INFO]"},
+ {"log-file", ARGP_LOG_FILE_KEY, "LOGFILE", 0,
+ "File to use for logging [default: " DEFAULT_LOG_FILE_DIRECTORY
+ "/" PACKAGE_NAME ".log"
+ "]"},
+ {"logger", ARGP_LOGGER, "LOGGER", 0,
+ "Set which logging sub-system to "
+ "log to, valid options are: gluster-log and syslog, "
+ "[default: \"gluster-log\"]"},
+ {"log-format", ARGP_LOG_FORMAT, "LOG-FORMAT", 0,
+ "Set log format, valid"
+ " options are: no-msg-id and with-msg-id, [default: \"with-msg-id\"]"},
+ {"log-buf-size", ARGP_LOG_BUF_SIZE, "LOG-BUF-SIZE", 0,
+ "Set logging "
+ "buffer size, [default: 5]"},
+ {"log-flush-timeout", ARGP_LOG_FLUSH_TIMEOUT, "LOG-FLUSH-TIMEOUT", 0,
+ "Set log flush timeout, [default: 2 minutes]"},
+
+ {0, 0, 0, 0, "Advanced Options:"},
+ {"volfile-server-port", ARGP_VOLFILE_SERVER_PORT_KEY, "PORT", 0,
+ "Listening port number of volfile server"},
+ {"volfile-server-transport", ARGP_VOLFILE_SERVER_TRANSPORT_KEY, "TRANSPORT",
+ 0, "Transport type to get volfile from server [default: socket]"},
+ {"volfile-id", ARGP_VOLFILE_ID_KEY, "KEY", 0,
+ "'key' of the volfile to be fetched from server"},
+ {"pid-file", ARGP_PID_FILE_KEY, "PIDFILE", 0, "File to use as pid file"},
+ {"socket-file", ARGP_SOCK_FILE_KEY, "SOCKFILE", 0,
+ "File to use as unix-socket"},
+ {"no-daemon", ARGP_NO_DAEMON_KEY, 0, 0, "Run in foreground"},
+ {"run-id", ARGP_RUN_ID_KEY, "RUN-ID", OPTION_HIDDEN,
+ "Run ID for the process, used by scripts to keep track of process "
+ "they started, defaults to none"},
+ {"debug", ARGP_DEBUG_KEY, 0, 0,
+ "Run in debug mode. This option sets --no-daemon, --log-level "
+ "to DEBUG and --log-file to console"},
+ {"volume-name", ARGP_VOLUME_NAME_KEY, "XLATOR-NAME", 0,
+ "Translator name to be used for MOUNT-POINT [default: top most volume "
+ "definition in VOLFILE]"},
+ {"xlator-option", ARGP_XLATOR_OPTION_KEY, "XLATOR-NAME.OPTION=VALUE", 0,
+ "Add/override an option for a translator in volume file with specified"
+ " value"},
+ {"read-only", ARGP_READ_ONLY_KEY, 0, 0,
+ "Mount the filesystem in 'read-only' mode"},
+ {"acl", ARGP_ACL_KEY, 0, 0, "Mount the filesystem with POSIX ACL support"},
+ {"selinux", ARGP_SELINUX_KEY, 0, 0,
+ "Enable SELinux label (extended attributes) support on inodes"},
+ {"capability", ARGP_CAPABILITY_KEY, 0, 0,
+ "Enable Capability (extended attributes) support on inodes"},
+ {"subdir-mount", ARGP_SUBDIR_MOUNT_KEY, "SUBDIR-PATH", 0,
+ "Mount subdirectory given [default: NULL]"},
+
+ {"print-netgroups", ARGP_PRINT_NETGROUPS, "NETGROUP-FILE", 0,
+ "Validate the netgroups file and print it out"},
+ {"print-exports", ARGP_PRINT_EXPORTS, "EXPORTS-FILE", 0,
+ "Validate the exports file and print it out"},
+ {"print-xlatordir", ARGP_PRINT_XLATORDIR_KEY, 0, OPTION_ARG_OPTIONAL,
+ "Print xlator directory path"},
+ {"print-statedumpdir", ARGP_PRINT_STATEDUMPDIR_KEY, 0, OPTION_ARG_OPTIONAL,
+ "Print directory path in which statedumps shall be generated"},
+ {"print-logdir", ARGP_PRINT_LOGDIR_KEY, 0, OPTION_ARG_OPTIONAL,
+ "Print path of default log directory"},
+ {"print-libexecdir", ARGP_PRINT_LIBEXECDIR_KEY, 0, OPTION_ARG_OPTIONAL,
+ "Print path of default libexec directory"},
+
+ {"volfile-max-fetch-attempts", ARGP_VOLFILE_MAX_FETCH_ATTEMPTS, "0",
+ OPTION_HIDDEN, "Maximum number of attempts to fetch the volfile"},
+ {"aux-gfid-mount", ARGP_AUX_GFID_MOUNT_KEY, 0, 0,
+ "Enable access to filesystem through gfid directly"},
+ {"enable-ino32", ARGP_INODE32_KEY, "BOOL", OPTION_ARG_OPTIONAL,
+ "Use 32-bit inodes when mounting to workaround broken applications"
+ "that don't support 64-bit inodes"},
+ {"worm", ARGP_WORM_KEY, 0, 0, "Mount the filesystem in 'worm' mode"},
+ {"mac-compat", ARGP_MAC_COMPAT_KEY, "BOOL", OPTION_ARG_OPTIONAL,
+ "Provide stubs for attributes needed for seamless operation on Macs "
#ifdef GF_DARWIN_HOST_OS
- "[default: \"on\" on client side, else \"off\"]"
+ "[default: \"on\" on client side, else \"off\"]"
#else
- "[default: \"off\"]"
+ "[default: \"off\"]"
+#endif
+ },
+ {"brick-name", ARGP_BRICK_NAME_KEY, "BRICK-NAME", OPTION_HIDDEN,
+ "Brick name to be registered with Gluster portmapper"},
+ {"brick-port", ARGP_BRICK_PORT_KEY, "BRICK-PORT", OPTION_HIDDEN,
+ "Brick Port to be registered with Gluster portmapper"},
+ {"fopen-keep-cache", ARGP_FOPEN_KEEP_CACHE_KEY, "BOOL", OPTION_ARG_OPTIONAL,
+ "Do not purge the cache on file open [default: false]"},
+ {"global-timer-wheel", ARGP_GLOBAL_TIMER_WHEEL, "BOOL", OPTION_ARG_OPTIONAL,
+ "Instantiate process global timer-wheel"},
+ {"thin-client", ARGP_THIN_CLIENT_KEY, 0, 0,
+ "Enables thin mount and connects via gfproxyd daemon"},
+ {"global-threading", ARGP_GLOBAL_THREADING_KEY, "BOOL", OPTION_ARG_OPTIONAL,
+ "Use the global thread pool instead of io-threads"},
+ {0, 0, 0, 0, "Fuse options:"},
+ {"direct-io-mode", ARGP_DIRECT_IO_MODE_KEY, "BOOL|auto",
+ OPTION_ARG_OPTIONAL, "Specify direct I/O strategy [default: \"auto\"]"},
+ {"entry-timeout", ARGP_ENTRY_TIMEOUT_KEY, "SECONDS", 0,
+ "Set entry timeout to SECONDS in fuse kernel module [default: 1]"},
+ {"negative-timeout", ARGP_NEGATIVE_TIMEOUT_KEY, "SECONDS", 0,
+ "Set negative timeout to SECONDS in fuse kernel module [default: 0]"},
+ {"attribute-timeout", ARGP_ATTRIBUTE_TIMEOUT_KEY, "SECONDS", 0,
+ "Set attribute timeout to SECONDS for inodes in fuse kernel module "
+ "[default: 1]"},
+ {"gid-timeout", ARGP_GID_TIMEOUT_KEY, "SECONDS", 0,
+ "Set auxiliary group list timeout to SECONDS for fuse translator "
+ "[default: 300]"},
+ {"resolve-gids", ARGP_RESOLVE_GIDS_KEY, 0, 0,
+ "Resolve all auxiliary groups in fuse translator (max 32 otherwise)"},
+ {"lru-limit", ARGP_FUSE_LRU_LIMIT_KEY, "N", 0,
+ "Set fuse module's limit for number of inodes kept in LRU list to N "
+ "[default: 65536]"},
+ {"invalidate-limit", ARGP_FUSE_INVALIDATE_LIMIT_KEY, "N", 0,
+ "Suspend inode invalidations implied by 'lru-limit' if the number of "
+ "outstanding invalidations reaches N"},
+ {"background-qlen", ARGP_FUSE_BACKGROUND_QLEN_KEY, "N", 0,
+ "Set fuse module's background queue length to N "
+ "[default: 64]"},
+ {"congestion-threshold", ARGP_FUSE_CONGESTION_THRESHOLD_KEY, "N", 0,
+ "Set fuse module's congestion threshold to N "
+ "[default: 48]"},
+#ifdef GF_LINUX_HOST_OS
+ {"oom-score-adj", ARGP_OOM_SCORE_ADJ_KEY, "INTEGER", 0,
+ "Set oom_score_adj value for process"
+ "[default: 0]"},
#endif
- },
- {"brick-name", ARGP_BRICK_NAME_KEY, "BRICK-NAME", OPTION_HIDDEN,
- "Brick name to be registered with Gluster portmapper" },
- {"brick-port", ARGP_BRICK_PORT_KEY, "BRICK-PORT", OPTION_HIDDEN,
- "Brick Port to be registered with Gluster portmapper" },
-
- {0, 0, 0, 0, "Fuse options:"},
- {"direct-io-mode", ARGP_DIRECT_IO_MODE_KEY, "BOOL", OPTION_ARG_OPTIONAL,
- "Use direct I/O mode in fuse kernel module"
- " [default: \"off\" if big writes are supported, else "
- "\"on\" for fds not opened with O_RDONLY]"},
- {"entry-timeout", ARGP_ENTRY_TIMEOUT_KEY, "SECONDS", 0,
- "Set entry timeout to SECONDS in fuse kernel module [default: 1]"},
- {"attribute-timeout", ARGP_ATTRIBUTE_TIMEOUT_KEY, "SECONDS", 0,
- "Set attribute timeout to SECONDS for inodes in fuse kernel module "
- "[default: 1]"},
- {"gid-timeout", ARGP_GID_TIMEOUT_KEY, "SECONDS", 0,
- "Set auxilary group list timeout to SECONDS for fuse translator "
- "[default: 0]"},
- {"background-qlen", ARGP_FUSE_BACKGROUND_QLEN_KEY, "N", 0,
- "Set fuse module's background queue length to N "
- "[default: 64]"},
- {"congestion-threshold", ARGP_FUSE_CONGESTION_THRESHOLD_KEY, "N", 0,
- "Set fuse module's congestion threshold to N "
- "[default: 48]"},
- {"client-pid", ARGP_CLIENT_PID_KEY, "PID", OPTION_HIDDEN,
- "client will authenticate itself with process id PID to server"},
- {"user-map-root", ARGP_USER_MAP_ROOT_KEY, "USER", OPTION_HIDDEN,
- "replace USER with root in messages"},
- {"dump-fuse", ARGP_DUMP_FUSE_KEY, "PATH", 0,
- "Dump fuse traffic to PATH"},
- {"volfile-check", ARGP_VOLFILE_CHECK_KEY, 0, 0,
- "Enable strict volume file checking"},
- {"mem-accounting", ARGP_MEM_ACCOUNTING_KEY, 0, OPTION_HIDDEN,
- "Enable internal memory accounting"},
- {0, 0, 0, 0, "Miscellaneous Options:"},
- {0, }
-};
-
-
-static struct argp argp = { gf_options, parse_opts, argp_doc, gf_doc };
-
-int glusterfs_pidfile_cleanup (glusterfs_ctx_t *ctx);
-int glusterfs_volumes_init (glusterfs_ctx_t *ctx);
-int glusterfs_mgmt_init (glusterfs_ctx_t *ctx);
-int glusterfs_listener_init (glusterfs_ctx_t *ctx);
-int glusterfs_listener_stop (glusterfs_ctx_t *ctx);
+ {"client-pid", ARGP_CLIENT_PID_KEY, "PID", OPTION_HIDDEN,
+ "client will authenticate itself with process id PID to server"},
+ {"no-root-squash", ARGP_FUSE_NO_ROOT_SQUASH_KEY, "BOOL",
+ OPTION_ARG_OPTIONAL,
+ "disable/enable root squashing for the trusted "
+ "client"},
+ {"user-map-root", ARGP_USER_MAP_ROOT_KEY, "USER", OPTION_HIDDEN,
+ "replace USER with root in messages"},
+ {"dump-fuse", ARGP_DUMP_FUSE_KEY, "PATH", 0, "Dump fuse traffic to PATH"},
+ {"volfile-check", ARGP_VOLFILE_CHECK_KEY, 0, 0,
+ "Enable strict volume file checking"},
+ {"no-mem-accounting", ARGP_MEM_ACCOUNTING_KEY, 0, OPTION_HIDDEN,
+ "disable internal memory accounting"},
+ {"fuse-mountopts", ARGP_FUSE_MOUNTOPTS_KEY, "OPTIONS", OPTION_HIDDEN,
+ "Extra mount options to pass to FUSE"},
+ {"use-readdirp", ARGP_FUSE_USE_READDIRP_KEY, "BOOL", OPTION_ARG_OPTIONAL,
+ "Use readdirp mode in fuse kernel module"
+ " [default: \"yes\"]"},
+ {"secure-mgmt", ARGP_SECURE_MGMT_KEY, "BOOL", OPTION_ARG_OPTIONAL,
+ "Override default for secure (SSL) management connections"},
+ {"localtime-logging", ARGP_LOCALTIME_LOGGING_KEY, 0, 0,
+ "Enable localtime logging"},
+ {"process-name", ARGP_PROCESS_NAME_KEY, "PROCESS-NAME", OPTION_HIDDEN,
+ "option to specify the process type"},
+ {"event-history", ARGP_FUSE_EVENT_HISTORY_KEY, "BOOL", OPTION_ARG_OPTIONAL,
+ "disable/enable fuse event-history"},
+ {"reader-thread-count", ARGP_READER_THREAD_COUNT_KEY, "INTEGER",
+ OPTION_ARG_OPTIONAL, "set fuse reader thread count"},
+ {"kernel-writeback-cache", ARGP_KERNEL_WRITEBACK_CACHE_KEY, "BOOL",
+ OPTION_ARG_OPTIONAL, "enable fuse in-kernel writeback cache"},
+ {"attr-times-granularity", ARGP_ATTR_TIMES_GRANULARITY_KEY, "NS",
+ OPTION_ARG_OPTIONAL,
+ "declare supported granularity of file attribute"
+ " times in nanoseconds"},
+ {"fuse-flush-handle-interrupt", ARGP_FUSE_FLUSH_HANDLE_INTERRUPT_KEY,
+ "BOOL", OPTION_ARG_OPTIONAL | OPTION_HIDDEN,
+ "handle interrupt in fuse FLUSH handler"},
+ {"auto-invalidation", ARGP_FUSE_AUTO_INVAL_KEY, "BOOL", OPTION_ARG_OPTIONAL,
+ "controls whether fuse-kernel can auto-invalidate "
+ "attribute, dentry and page-cache. "
+ "Disable this only if same files/directories are not accessed across "
+ "two different mounts concurrently [default: \"on\"]"},
+ {"fuse-dev-eperm-ratelimit-ns", ARGP_FUSE_DEV_EPERM_RATELIMIT_NS_KEY,
+ "OPTIONS", OPTION_HIDDEN,
+ "rate limit reading from fuse device upon EPERM failure"},
+ {"brick-mux", ARGP_BRICK_MUX_KEY, 0, 0, "Enable brick mux. "},
+ {0, 0, 0, 0, "Miscellaneous Options:"},
+ {
+ 0,
+ }};
+
+static struct argp argp = {gf_options, parse_opts, argp_doc, gf_doc};
+
+int
+glusterfs_pidfile_cleanup(glusterfs_ctx_t *ctx);
+int
+glusterfs_volumes_init(glusterfs_ctx_t *ctx);
+int
+glusterfs_mgmt_init(glusterfs_ctx_t *ctx);
+int
+glusterfs_listener_init(glusterfs_ctx_t *ctx);
+#define DICT_SET_VAL(method, dict, key, val, msgid) \
+ if (method(dict, key, val)) { \
+ gf_smsg("glusterfsd", GF_LOG_ERROR, 0, msgid, "key=%s", key); \
+ goto err; \
+ }
static int
-set_fuse_mount_options (glusterfs_ctx_t *ctx, dict_t *options)
+set_fuse_mount_options(glusterfs_ctx_t *ctx, dict_t *options)
{
- int ret = 0;
- cmd_args_t *cmd_args = NULL;
- char *mount_point = NULL;
- char cwd[PATH_MAX] = {0,};
-
- cmd_args = &ctx->cmd_args;
-
- /* Check if mount-point is absolute path,
- * if not convert to absolute path by concating with CWD
- */
- if (cmd_args->mount_point[0] != '/') {
- if (getcwd (cwd, PATH_MAX) != NULL) {
- ret = gf_asprintf (&mount_point, "%s/%s", cwd,
- cmd_args->mount_point);
- if (ret == -1) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "Could not create absolute mountpoint "
- "path");
- goto err;
- }
- } else {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "Could not get current working directory");
- goto err;
- }
- } else
- mount_point = gf_strdup (cmd_args->mount_point);
-
- ret = dict_set_dynstr (options, ZR_MOUNTPOINT_OPT, mount_point);
- if (ret < 0) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "failed to set mount-point to options dictionary");
+ int ret = 0;
+ cmd_args_t *cmd_args = NULL;
+ char *mount_point = NULL;
+ char cwd[PATH_MAX] = {
+ 0,
+ };
+
+ cmd_args = &ctx->cmd_args;
+
+ /* Check if mount-point is absolute path,
+ * if not convert to absolute path by concatenating with CWD
+ */
+ if (cmd_args->mount_point[0] != '/') {
+ if (getcwd(cwd, PATH_MAX) != NULL) {
+ ret = gf_asprintf(&mount_point, "%s/%s", cwd,
+ cmd_args->mount_point);
+ if (ret == -1) {
+ gf_smsg("glusterfsd", GF_LOG_ERROR, errno, glusterfsd_msg_1,
+ "gf_asprintf failed", NULL);
goto err;
- }
-
- if (cmd_args->fuse_attribute_timeout >= 0) {
- ret = dict_set_double (options, ZR_ATTR_TIMEOUT_OPT,
- cmd_args->fuse_attribute_timeout);
-
- if (ret < 0) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "failed to set dict value for key %s",
- ZR_ATTR_TIMEOUT_OPT);
- goto err;
- }
- }
-
- if (cmd_args->fuse_entry_timeout >= 0) {
- ret = dict_set_double (options, ZR_ENTRY_TIMEOUT_OPT,
- cmd_args->fuse_entry_timeout);
- if (ret < 0) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "failed to set dict value for key %s",
- ZR_ENTRY_TIMEOUT_OPT);
- goto err;
- }
- }
-
- if (cmd_args->client_pid_set) {
- ret = dict_set_int32 (options, "client-pid",
- cmd_args->client_pid);
- if (ret < 0) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "failed to set dict value for key %s",
- "client-pid");
- goto err;
- }
- }
-
- if (cmd_args->uid_map_root) {
- ret = dict_set_int32 (options, "uid-map-root",
- cmd_args->uid_map_root);
- if (ret < 0) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "failed to set dict value for key %s",
- "uid-map-root");
- goto err;
- }
- }
-
- if (cmd_args->volfile_check) {
- ret = dict_set_int32 (options, ZR_STRICT_VOLFILE_CHECK,
- cmd_args->volfile_check);
- if (ret < 0) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "failed to set dict value for key %s",
- ZR_STRICT_VOLFILE_CHECK);
- goto err;
- }
- }
-
- if (cmd_args->dump_fuse) {
- ret = dict_set_static_ptr (options, ZR_DUMP_FUSE,
- cmd_args->dump_fuse);
- if (ret < 0) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "failed to set dict value for key %s",
- ZR_DUMP_FUSE);
- goto err;
- }
- }
-
- if (cmd_args->acl) {
- ret = dict_set_static_ptr (options, "acl", "on");
- if (ret < 0) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "failed to set dict value for key acl");
- goto err;
- }
- }
-
- if (cmd_args->selinux) {
- ret = dict_set_static_ptr (options, "selinux", "on");
- if (ret < 0) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "failed to set dict value for key selinux");
- goto err;
- }
- }
-
- if (cmd_args->enable_ino32) {
- ret = dict_set_static_ptr (options, "enable-ino32",
- "on");
- if (ret < 0) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "failed to set dict value for key %s",
- "enable-ino32");
- goto err;
- }
- }
-
- if (cmd_args->read_only) {
- ret = dict_set_static_ptr (options, "read-only", "on");
- if (ret < 0) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "failed to set dict value for key read-only");
- goto err;
- }
- }
-
- if (cmd_args->gid_timeout) {
- ret = dict_set_int32(options, "gid-timeout",
- cmd_args->gid_timeout);
- if (ret < 0) {
- gf_log("glusterfsd", GF_LOG_ERROR, "failed to set dict "
- "value for key gid-timeout");
- goto err;
- }
- }
- if (cmd_args->background_qlen) {
- ret = dict_set_int32 (options, "background-qlen",
- cmd_args->background_qlen);
- if (ret < 0) {
- gf_log("glusterfsd", GF_LOG_ERROR, "failed to set dict "
- "value for key background-qlen");
- goto err;
- }
- }
- if (cmd_args->congestion_threshold) {
- ret = dict_set_int32 (options, "congestion-threshold",
- cmd_args->congestion_threshold);
- if (ret < 0) {
- gf_log("glusterfsd", GF_LOG_ERROR, "failed to set dict "
- "value for key congestion-threshold");
- goto err;
- }
- }
-
- switch (cmd_args->fuse_direct_io_mode) {
+ }
+ } else {
+ gf_smsg("glusterfsd", GF_LOG_ERROR, errno, glusterfsd_msg_2,
+ "getcwd failed", NULL);
+ goto err;
+ }
+
+ } else {
+ mount_point = gf_strdup(cmd_args->mount_point);
+ }
+ DICT_SET_VAL(dict_set_dynstr_sizen, options, ZR_MOUNTPOINT_OPT, mount_point,
+ glusterfsd_msg_3);
+
+ if (cmd_args->fuse_attribute_timeout >= 0) {
+ DICT_SET_VAL(dict_set_double, options, ZR_ATTR_TIMEOUT_OPT,
+ cmd_args->fuse_attribute_timeout, glusterfsd_msg_3);
+ }
+
+ if (cmd_args->fuse_entry_timeout >= 0) {
+ DICT_SET_VAL(dict_set_double, options, ZR_ENTRY_TIMEOUT_OPT,
+ cmd_args->fuse_entry_timeout, glusterfsd_msg_3);
+ }
+
+ if (cmd_args->fuse_negative_timeout >= 0) {
+ DICT_SET_VAL(dict_set_double, options, ZR_NEGATIVE_TIMEOUT_OPT,
+ cmd_args->fuse_negative_timeout, glusterfsd_msg_3);
+ }
+
+ if (cmd_args->client_pid_set) {
+ DICT_SET_VAL(dict_set_int32_sizen, options, "client-pid",
+ cmd_args->client_pid, glusterfsd_msg_3);
+ }
+
+ if (cmd_args->uid_map_root) {
+ DICT_SET_VAL(dict_set_int32_sizen, options, "uid-map-root",
+ cmd_args->uid_map_root, glusterfsd_msg_3);
+ }
+
+ if (cmd_args->volfile_check) {
+ DICT_SET_VAL(dict_set_int32_sizen, options, ZR_STRICT_VOLFILE_CHECK,
+ cmd_args->volfile_check, glusterfsd_msg_3);
+ }
+
+ if (cmd_args->dump_fuse) {
+ DICT_SET_VAL(dict_set_static_ptr, options, ZR_DUMP_FUSE,
+ cmd_args->dump_fuse, glusterfsd_msg_3);
+ }
+
+ if (cmd_args->acl) {
+ DICT_SET_VAL(dict_set_static_ptr, options, "acl", "on",
+ glusterfsd_msg_3);
+ }
+
+ if (cmd_args->selinux) {
+ DICT_SET_VAL(dict_set_static_ptr, options, "selinux", "on",
+ glusterfsd_msg_3);
+ }
+
+ if (cmd_args->capability) {
+ DICT_SET_VAL(dict_set_static_ptr, options, "capability", "on",
+ glusterfsd_msg_3);
+ }
+
+ if (cmd_args->aux_gfid_mount) {
+ DICT_SET_VAL(dict_set_static_ptr, options, "virtual-gfid-access", "on",
+ glusterfsd_msg_3);
+ }
+
+ if (cmd_args->enable_ino32) {
+ DICT_SET_VAL(dict_set_static_ptr, options, "enable-ino32", "on",
+ glusterfsd_msg_3);
+ }
+
+ if (cmd_args->read_only) {
+ DICT_SET_VAL(dict_set_static_ptr, options, "read-only", "on",
+ glusterfsd_msg_3);
+ }
+
+ switch (cmd_args->fopen_keep_cache) {
+ case GF_OPTION_ENABLE:
+
+ DICT_SET_VAL(dict_set_static_ptr, options, "fopen-keep-cache", "on",
+ glusterfsd_msg_3);
+ break;
+ case GF_OPTION_DISABLE:
+ DICT_SET_VAL(dict_set_static_ptr, options, "fopen-keep-cache",
+ "off", glusterfsd_msg_3);
+ break;
+ default:
+ gf_msg_debug("glusterfsd", 0, "fopen-keep-cache mode %d",
+ cmd_args->fopen_keep_cache);
+ break;
+ }
+
+ if (cmd_args->gid_timeout_set) {
+ DICT_SET_VAL(dict_set_int32_sizen, options, "gid-timeout",
+ cmd_args->gid_timeout, glusterfsd_msg_3);
+ }
+
+ if (cmd_args->resolve_gids) {
+ DICT_SET_VAL(dict_set_static_ptr, options, "resolve-gids", "on",
+ glusterfsd_msg_3);
+ }
+
+ if (cmd_args->lru_limit >= 0) {
+ DICT_SET_VAL(dict_set_int32_sizen, options, "lru-limit",
+ cmd_args->lru_limit, glusterfsd_msg_3);
+ }
+
+ if (cmd_args->invalidate_limit >= 0) {
+ DICT_SET_VAL(dict_set_int32_sizen, options, "invalidate-limit",
+ cmd_args->invalidate_limit, glusterfsd_msg_3);
+ }
+
+ if (cmd_args->background_qlen) {
+ DICT_SET_VAL(dict_set_int32_sizen, options, "background-qlen",
+ cmd_args->background_qlen, glusterfsd_msg_3);
+ }
+ if (cmd_args->congestion_threshold) {
+ DICT_SET_VAL(dict_set_int32_sizen, options, "congestion-threshold",
+ cmd_args->congestion_threshold, glusterfsd_msg_3);
+ }
+
+ switch (cmd_args->fuse_direct_io_mode) {
case GF_OPTION_DISABLE: /* disable */
- ret = dict_set_static_ptr (options, ZR_DIRECT_IO_OPT,
- "disable");
- if (ret < 0) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "failed to set 'disable' for key %s",
- ZR_DIRECT_IO_OPT);
- goto err;
- }
- break;
+ DICT_SET_VAL(dict_set_static_ptr, options, ZR_DIRECT_IO_OPT,
+ "disable", glusterfsd_msg_3);
+ break;
case GF_OPTION_ENABLE: /* enable */
- ret = dict_set_static_ptr (options, ZR_DIRECT_IO_OPT,
- "enable");
- if (ret < 0) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "failed to set 'enable' for key %s",
- ZR_DIRECT_IO_OPT);
- goto err;
- }
- break;
- case GF_OPTION_DEFERRED: /* default */
+ DICT_SET_VAL(dict_set_static_ptr, options, ZR_DIRECT_IO_OPT,
+ "enable", glusterfsd_msg_3);
+ break;
default:
- gf_log ("", GF_LOG_DEBUG, "fuse direct io type %d",
- cmd_args->fuse_direct_io_mode);
- break;
- }
+ gf_msg_debug("glusterfsd", 0, "fuse direct io type %d",
+ cmd_args->fuse_direct_io_mode);
+ break;
+ }
- if (!cmd_args->no_daemon_mode) {
- ret = dict_set_static_ptr (options, "sync-to-mount",
- "enable");
- if (ret < 0) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "failed to set dict value for key sync-mtab");
- goto err;
- }
- }
- ret = 0;
+ switch (cmd_args->no_root_squash) {
+ case GF_OPTION_ENABLE: /* enable */
+ DICT_SET_VAL(dict_set_static_ptr, options, "no-root-squash",
+ "enable", glusterfsd_msg_3);
+ break;
+ default:
+ DICT_SET_VAL(dict_set_static_ptr, options, "no-root-squash",
+ "disable", glusterfsd_msg_3);
+ gf_msg_debug("glusterfsd", 0, "fuse no-root-squash mode %d",
+ cmd_args->no_root_squash);
+ break;
+ }
+
+ if (!cmd_args->no_daemon_mode) {
+ DICT_SET_VAL(dict_set_static_ptr, options, "sync-to-mount", "enable",
+ glusterfsd_msg_3);
+ }
+
+ if (cmd_args->use_readdirp) {
+ DICT_SET_VAL(dict_set_static_ptr, options, "use-readdirp",
+ cmd_args->use_readdirp, glusterfsd_msg_3);
+ }
+ if (cmd_args->event_history) {
+ ret = dict_set_str(options, "event-history", cmd_args->event_history);
+ DICT_SET_VAL(dict_set_static_ptr, options, "event-history",
+ cmd_args->event_history, glusterfsd_msg_3);
+ }
+ if (cmd_args->thin_client) {
+ DICT_SET_VAL(dict_set_static_ptr, options, "thin-client", "on",
+ glusterfsd_msg_3);
+ }
+ if (cmd_args->reader_thread_count) {
+ DICT_SET_VAL(dict_set_uint32, options, "reader-thread-count",
+ cmd_args->reader_thread_count, glusterfsd_msg_3);
+ }
+
+ DICT_SET_VAL(dict_set_uint32, options, "auto-invalidation",
+ cmd_args->fuse_auto_inval, glusterfsd_msg_3);
+
+ switch (cmd_args->kernel_writeback_cache) {
+ case GF_OPTION_ENABLE:
+ DICT_SET_VAL(dict_set_static_ptr, options, "kernel-writeback-cache",
+ "on", glusterfsd_msg_3);
+ break;
+ case GF_OPTION_DISABLE:
+ DICT_SET_VAL(dict_set_static_ptr, options, "kernel-writeback-cache",
+ "off", glusterfsd_msg_3);
+ break;
+ default:
+ gf_msg_debug("glusterfsd", 0, "kernel-writeback-cache mode %d",
+ cmd_args->kernel_writeback_cache);
+ break;
+ }
+ if (cmd_args->attr_times_granularity) {
+ DICT_SET_VAL(dict_set_uint32, options, "attr-times-granularity",
+ cmd_args->attr_times_granularity, glusterfsd_msg_3);
+ }
+ switch (cmd_args->fuse_flush_handle_interrupt) {
+ case GF_OPTION_ENABLE:
+ DICT_SET_VAL(dict_set_static_ptr, options, "flush-handle-interrupt",
+ "on", glusterfsd_msg_3);
+ break;
+ case GF_OPTION_DISABLE:
+ DICT_SET_VAL(dict_set_static_ptr, options, "flush-handle-interrupt",
+ "off", glusterfsd_msg_3);
+ break;
+ default:
+ gf_msg_debug("glusterfsd", 0, "fuse-flush-handle-interrupt mode %d",
+ cmd_args->fuse_flush_handle_interrupt);
+ break;
+ }
+ if (cmd_args->global_threading) {
+ DICT_SET_VAL(dict_set_static_ptr, options, "global-threading", "on",
+ glusterfsd_msg_3);
+ }
+ if (cmd_args->fuse_dev_eperm_ratelimit_ns) {
+ DICT_SET_VAL(dict_set_uint32, options, "fuse-dev-eperm-ratelimit-ns",
+ cmd_args->fuse_dev_eperm_ratelimit_ns, glusterfsd_msg_3);
+ }
+
+ ret = 0;
err:
- return ret;
+ return ret;
}
int
-create_fuse_mount (glusterfs_ctx_t *ctx)
+create_fuse_mount(glusterfs_ctx_t *ctx)
{
- int ret = 0;
- cmd_args_t *cmd_args = NULL;
- xlator_t *master = NULL;
-
- cmd_args = &ctx->cmd_args;
-
- if (!cmd_args->mount_point) {
- gf_log ("", GF_LOG_TRACE,
- "mount point not found, not a client process");
- return 0;
- }
-
- if (ctx->process_mode != GF_CLIENT_PROCESS) {
- gf_log("glusterfsd", GF_LOG_ERROR,
- "Not a client process, not performing mount operation");
- return -1;
- }
-
- master = GF_CALLOC (1, sizeof (*master),
- gfd_mt_xlator_t);
- if (!master)
- goto err;
-
- master->name = gf_strdup ("fuse");
- if (!master->name)
- goto err;
+ int ret = 0;
+ cmd_args_t *cmd_args = NULL;
+ xlator_t *master = NULL;
+
+ cmd_args = &ctx->cmd_args;
+ if (!cmd_args->mount_point) {
+ gf_msg_trace("glusterfsd", 0,
+ "mount point not found, not a client process");
+ return 0;
+ }
- if (xlator_set_type (master, "mount/fuse") == -1) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "MOUNT-POINT %s initialization failed",
- cmd_args->mount_point);
- goto err;
+ if (ctx->process_mode != GF_CLIENT_PROCESS) {
+ gf_smsg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_7, NULL);
+ return -1;
+ }
+
+ master = GF_CALLOC(1, sizeof(*master), gfd_mt_xlator_t);
+ if (!master)
+ goto err;
+
+ master->name = gf_strdup("fuse");
+ if (!master->name)
+ goto err;
+
+ if (xlator_set_type(master, "mount/fuse") == -1) {
+ gf_smsg("glusterfsd", GF_LOG_ERROR, errno, glusterfsd_msg_8,
+ "MOUNT-POINT=%s", cmd_args->mount_point, NULL);
+ goto err;
+ }
+
+ master->ctx = ctx;
+ master->options = dict_new();
+ if (!master->options)
+ goto err;
+
+ ret = set_fuse_mount_options(ctx, master->options);
+ if (ret)
+ goto err;
+
+ if (cmd_args->fuse_mountopts) {
+ ret = dict_set_static_ptr(master->options, ZR_FUSE_MOUNTOPTS,
+ cmd_args->fuse_mountopts);
+ if (ret < 0) {
+ gf_smsg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_3,
+ ZR_FUSE_MOUNTOPTS, NULL);
+ goto err;
}
+ }
- master->ctx = ctx;
- master->options = get_new_dict ();
- if (!master->options)
- goto err;
-
- ret = set_fuse_mount_options (ctx, master->options);
- if (ret)
- goto err;
-
- ret = xlator_init (master);
- if (ret) {
- gf_log ("", GF_LOG_DEBUG, "failed to initialize fuse translator");
- goto err;
- }
+ ret = xlator_init(master);
+ if (ret) {
+ gf_msg_debug("glusterfsd", 0, "failed to initialize fuse translator");
+ goto err;
+ }
- ctx->master = master;
+ ctx->master = master;
- return 0;
+ return 0;
err:
- if (master) {
- xlator_destroy (master);
- }
+ if (master) {
+ xlator_destroy(master);
+ }
- return -1;
+ return 1;
}
-
static FILE *
-get_volfp (glusterfs_ctx_t *ctx)
+get_volfp(glusterfs_ctx_t *ctx)
{
- int ret = 0;
- cmd_args_t *cmd_args = NULL;
- FILE *specfp = NULL;
- struct stat statbuf;
+ cmd_args_t *cmd_args = NULL;
+ FILE *specfp = NULL;
- cmd_args = &ctx->cmd_args;
+ cmd_args = &ctx->cmd_args;
- ret = sys_lstat (cmd_args->volfile, &statbuf);
- if (ret == -1) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "%s: %s", cmd_args->volfile, strerror (errno));
- return NULL;
- }
+ if ((specfp = fopen(cmd_args->volfile, "r")) == NULL) {
+ gf_smsg("glusterfsd", GF_LOG_ERROR, errno, glusterfsd_msg_9,
+ "volume_file=%s", cmd_args->volfile, NULL);
+ return NULL;
+ }
- if ((specfp = fopen (cmd_args->volfile, "r")) == NULL) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "volume file %s: %s",
- cmd_args->volfile,
- strerror (errno));
- return NULL;
- }
+ gf_msg_debug("glusterfsd", 0, "loading volume file %s", cmd_args->volfile);
- gf_log ("glusterfsd", GF_LOG_DEBUG,
- "loading volume file %s", cmd_args->volfile);
+ return specfp;
+}
- return specfp;
+static int
+gf_remember_backup_volfile_server(char *arg)
+{
+ glusterfs_ctx_t *ctx = NULL;
+ cmd_args_t *cmd_args = NULL;
+ int ret = -1;
+
+ ctx = glusterfsd_ctx;
+ if (!ctx)
+ goto out;
+ cmd_args = &ctx->cmd_args;
+
+ if (!cmd_args)
+ goto out;
+
+ ret = gf_set_volfile_server_common(
+ cmd_args, arg, GF_DEFAULT_VOLFILE_TRANSPORT, GF_DEFAULT_BASE_PORT);
+ if (ret) {
+ gf_log("glusterfs", GF_LOG_ERROR, "failed to set volfile server: %s",
+ strerror(errno));
+ }
+out:
+ return ret;
}
static int
-gf_remember_xlator_option (struct list_head *options, char *arg)
+gf_remember_xlator_option(char *arg)
{
- glusterfs_ctx_t *ctx = NULL;
- cmd_args_t *cmd_args = NULL;
- xlator_cmdline_option_t *option = NULL;
- int ret = -1;
- char *dot = NULL;
- char *equals = NULL;
-
- ctx = glusterfs_ctx_get ();
- cmd_args = &ctx->cmd_args;
-
- option = GF_CALLOC (1, sizeof (xlator_cmdline_option_t),
- gfd_mt_xlator_cmdline_option_t);
- if (!option)
- goto out;
+ glusterfs_ctx_t *ctx = NULL;
+ cmd_args_t *cmd_args = NULL;
+ xlator_cmdline_option_t *option = NULL;
+ int ret = -1;
+ char *dot = NULL;
+ char *equals = NULL;
- INIT_LIST_HEAD (&option->cmd_args);
+ ctx = glusterfsd_ctx;
+ cmd_args = &ctx->cmd_args;
- dot = strchr (arg, '.');
- if (!dot) {
- gf_log ("", GF_LOG_WARNING,
- "xlator option %s is invalid", arg);
- goto out;
- }
+ option = GF_CALLOC(1, sizeof(xlator_cmdline_option_t),
+ gfd_mt_xlator_cmdline_option_t);
+ if (!option)
+ goto out;
- option->volume = GF_CALLOC ((dot - arg) + 1, sizeof (char),
- gfd_mt_char);
- if (!option->volume)
- goto out;
+ INIT_LIST_HEAD(&option->cmd_args);
- strncpy (option->volume, arg, (dot - arg));
+ dot = strchr(arg, '.');
+ if (!dot) {
+ gf_smsg("", GF_LOG_WARNING, 0, glusterfsd_msg_10, "arg=%s", arg, NULL);
+ goto out;
+ }
- equals = strchr (arg, '=');
- if (!equals) {
- gf_log ("", GF_LOG_WARNING,
- "xlator option %s is invalid", arg);
- goto out;
- }
+ option->volume = GF_MALLOC((dot - arg) + 1, gfd_mt_char);
+ if (!option->volume)
+ goto out;
- option->key = GF_CALLOC ((equals - dot) + 1, sizeof (char),
- gfd_mt_char);
- if (!option->key)
- goto out;
+ strncpy(option->volume, arg, (dot - arg));
+ option->volume[(dot - arg)] = '\0';
- strncpy (option->key, dot + 1, (equals - dot - 1));
+ equals = strchr(arg, '=');
+ if (!equals) {
+ gf_smsg("", GF_LOG_WARNING, 0, glusterfsd_msg_10, "arg=%s", arg, NULL);
+ goto out;
+ }
- if (!*(equals + 1)) {
- gf_log ("", GF_LOG_WARNING,
- "xlator option %s is invalid", arg);
- goto out;
- }
+ option->key = GF_MALLOC((equals - dot) + 1, gfd_mt_char);
+ if (!option->key)
+ goto out;
- option->value = gf_strdup (equals + 1);
+ strncpy(option->key, dot + 1, (equals - dot - 1));
+ option->key[(equals - dot - 1)] = '\0';
- list_add (&option->cmd_args, &cmd_args->xlator_options);
+ if (!*(equals + 1)) {
+ gf_smsg("", GF_LOG_WARNING, 0, glusterfsd_msg_10, "arg=%s", arg, NULL);
+ goto out;
+ }
- ret = 0;
+ option->value = gf_strdup(equals + 1);
+
+ list_add(&option->cmd_args, &cmd_args->xlator_options);
+
+ ret = 0;
out:
- if (ret == -1) {
- if (option) {
- if (option->volume)
- GF_FREE (option->volume);
- if (option->key)
- GF_FREE (option->key);
- if (option->value)
- GF_FREE (option->value);
-
- GF_FREE (option);
- }
+ if (ret == -1) {
+ if (option) {
+ GF_FREE(option->volume);
+ GF_FREE(option->key);
+ GF_FREE(option->value);
+
+ GF_FREE(option);
}
+ }
- return ret;
+ return ret;
}
+#ifdef GF_LINUX_HOST_OS
+static struct oom_api_info {
+ char *oom_api_file;
+ int32_t oom_min;
+ int32_t oom_max;
+} oom_api_info[] = {
+ {"/proc/self/oom_score_adj", OOM_SCORE_ADJ_MIN, OOM_SCORE_ADJ_MAX},
+ {"/proc/self/oom_adj", OOM_DISABLE, OOM_ADJUST_MAX},
+ {NULL, 0, 0}};
+
+static struct oom_api_info *
+get_oom_api_info(void)
+{
+ struct oom_api_info *api = NULL;
+ for (api = oom_api_info; api->oom_api_file; api++) {
+ if (sys_access(api->oom_api_file, F_OK) != -1) {
+ return api;
+ }
+ }
+
+ return NULL;
+}
+#endif
static error_t
-parse_opts (int key, char *arg, struct argp_state *state)
+parse_opts(int key, char *arg, struct argp_state *state)
{
- glusterfs_ctx_t *ctx = NULL;
- cmd_args_t *cmd_args = NULL;
- uint32_t n = 0;
- double d = 0.0;
- gf_boolean_t b = _gf_false;
- char *pwd = NULL;
- char tmp_buf[2048] = {0,};
- char *tmp_str = NULL;
- char *port_str = NULL;
- struct passwd *pw = NULL;
-
- cmd_args = state->input;
-
- switch (key) {
- case ARGP_VOLFILE_SERVER_KEY:
- cmd_args->volfile_server = gf_strdup (arg);
- break;
+ cmd_args_t *cmd_args = NULL;
+ uint32_t n = 0;
+#ifdef GF_LINUX_HOST_OS
+ int32_t k = 0;
+ struct oom_api_info *api = NULL;
+#endif
+ double d = 0.0;
+ gf_boolean_t b = _gf_false;
+ char *pwd = NULL;
+ char *tmp_str = NULL;
+ char *port_str = NULL;
+ struct passwd *pw = NULL;
+ int ret = 0;
- case ARGP_VOLFILE_MAX_FETCH_ATTEMPTS:
- n = 0;
+ cmd_args = state->input;
- if (gf_string2uint_base10 (arg, &n) == 0) {
- cmd_args->max_connect_attempts = n;
- break;
- }
+ switch (key) {
+ case ARGP_VOLFILE_SERVER_KEY:
+ gf_remember_backup_volfile_server(arg);
- argp_failure (state, -1, 0,
- "Invalid limit on connect attempts %s", arg);
- break;
+ break;
case ARGP_READ_ONLY_KEY:
- cmd_args->read_only = 1;
- break;
+ cmd_args->read_only = 1;
+ break;
case ARGP_ACL_KEY:
- cmd_args->acl = 1;
- break;
+ cmd_args->acl = 1;
+ gf_remember_xlator_option("*-md-cache.cache-posix-acl=true");
+ break;
case ARGP_SELINUX_KEY:
- cmd_args->selinux = 1;
- break;
+ cmd_args->selinux = 1;
+ gf_remember_xlator_option("*-md-cache.cache-selinux=true");
+ break;
+
+ case ARGP_CAPABILITY_KEY:
+ cmd_args->capability = 1;
+ break;
+
+ case ARGP_AUX_GFID_MOUNT_KEY:
+ cmd_args->aux_gfid_mount = 1;
+ break;
case ARGP_INODE32_KEY:
- cmd_args->enable_ino32 = 1;
- break;
+ cmd_args->enable_ino32 = 1;
+ break;
case ARGP_WORM_KEY:
- cmd_args->worm = 1;
- break;
+ cmd_args->worm = 1;
+ break;
- case ARGP_MAC_COMPAT_KEY:
- if (!arg)
- arg = "on";
+ case ARGP_PRINT_NETGROUPS:
+ cmd_args->print_netgroups = arg;
+ break;
- if (gf_string2boolean (arg, &b) == 0) {
- cmd_args->mac_compat = b;
+ case ARGP_PRINT_EXPORTS:
+ cmd_args->print_exports = arg;
+ break;
- break;
- }
+ case ARGP_PRINT_XLATORDIR_KEY:
+ cmd_args->print_xlatordir = _gf_true;
+ break;
+
+ case ARGP_PRINT_STATEDUMPDIR_KEY:
+ cmd_args->print_statedumpdir = _gf_true;
+ break;
+
+ case ARGP_PRINT_LOGDIR_KEY:
+ cmd_args->print_logdir = _gf_true;
+ break;
+
+ case ARGP_PRINT_LIBEXECDIR_KEY:
+ cmd_args->print_libexecdir = _gf_true;
+ break;
+
+ case ARGP_MAC_COMPAT_KEY:
+ if (!arg)
+ arg = "on";
+
+ if (gf_string2boolean(arg, &b) == 0) {
+ cmd_args->mac_compat = b;
- argp_failure (state, -1, 0,
- "invalid value \"%s\" for mac-compat", arg);
break;
+ }
+
+ argp_failure(state, -1, 0, "invalid value \"%s\" for mac-compat",
+ arg);
+ break;
case ARGP_VOLUME_FILE_KEY:
- if (cmd_args->volfile)
- GF_FREE (cmd_args->volfile);
-
- if (arg[0] != '/') {
- pwd = getcwd (NULL, PATH_MAX);
- if (!pwd) {
- argp_failure (state, -1, errno,
- "getcwd failed with error no %d",
- errno);
- break;
- }
- snprintf (tmp_buf, 1024, "%s/%s", pwd, arg);
- cmd_args->volfile = gf_strdup (tmp_buf);
- free (pwd);
- } else {
- cmd_args->volfile = gf_strdup (arg);
+ GF_FREE(cmd_args->volfile);
+
+ if (arg[0] != '/') {
+ pwd = getcwd(NULL, PATH_MAX);
+ if (!pwd) {
+ argp_failure(state, -1, errno,
+ "getcwd failed with error no %d", errno);
+ break;
}
+ char tmp_buf[1024];
+ snprintf(tmp_buf, sizeof(tmp_buf), "%s/%s", pwd, arg);
+ cmd_args->volfile = gf_strdup(tmp_buf);
+ free(pwd);
+ } else {
+ cmd_args->volfile = gf_strdup(arg);
+ }
- break;
+ break;
case ARGP_LOG_LEVEL_KEY:
- if (strcasecmp (arg, ARGP_LOG_LEVEL_NONE_OPTION) == 0) {
- cmd_args->log_level = GF_LOG_NONE;
- break;
- }
- if (strcasecmp (arg, ARGP_LOG_LEVEL_CRITICAL_OPTION) == 0) {
- cmd_args->log_level = GF_LOG_CRITICAL;
- break;
- }
- if (strcasecmp (arg, ARGP_LOG_LEVEL_ERROR_OPTION) == 0) {
- cmd_args->log_level = GF_LOG_ERROR;
- break;
- }
- if (strcasecmp (arg, ARGP_LOG_LEVEL_WARNING_OPTION) == 0) {
- cmd_args->log_level = GF_LOG_WARNING;
- break;
- }
- if (strcasecmp (arg, ARGP_LOG_LEVEL_INFO_OPTION) == 0) {
- cmd_args->log_level = GF_LOG_INFO;
- break;
- }
- if (strcasecmp (arg, ARGP_LOG_LEVEL_DEBUG_OPTION) == 0) {
- cmd_args->log_level = GF_LOG_DEBUG;
- break;
- }
- if (strcasecmp (arg, ARGP_LOG_LEVEL_TRACE_OPTION) == 0) {
- cmd_args->log_level = GF_LOG_TRACE;
- break;
- }
-
- argp_failure (state, -1, 0, "unknown log level %s", arg);
+ if (strcasecmp(arg, ARGP_LOG_LEVEL_NONE_OPTION) == 0) {
+ cmd_args->log_level = GF_LOG_NONE;
+ break;
+ }
+ if (strcasecmp(arg, ARGP_LOG_LEVEL_CRITICAL_OPTION) == 0) {
+ cmd_args->log_level = GF_LOG_CRITICAL;
+ break;
+ }
+ if (strcasecmp(arg, ARGP_LOG_LEVEL_ERROR_OPTION) == 0) {
+ cmd_args->log_level = GF_LOG_ERROR;
+ break;
+ }
+ if (strcasecmp(arg, ARGP_LOG_LEVEL_WARNING_OPTION) == 0) {
+ cmd_args->log_level = GF_LOG_WARNING;
+ break;
+ }
+ if (strcasecmp(arg, ARGP_LOG_LEVEL_INFO_OPTION) == 0) {
+ cmd_args->log_level = GF_LOG_INFO;
+ break;
+ }
+ if (strcasecmp(arg, ARGP_LOG_LEVEL_DEBUG_OPTION) == 0) {
+ cmd_args->log_level = GF_LOG_DEBUG;
break;
+ }
+ if (strcasecmp(arg, ARGP_LOG_LEVEL_TRACE_OPTION) == 0) {
+ cmd_args->log_level = GF_LOG_TRACE;
+ break;
+ }
+
+ argp_failure(state, -1, 0, "unknown log level %s", arg);
+ break;
case ARGP_LOG_FILE_KEY:
- cmd_args->log_file = gf_strdup (arg);
- break;
+ cmd_args->log_file = gf_strdup(arg);
+ break;
case ARGP_VOLFILE_SERVER_PORT_KEY:
- n = 0;
-
- if (gf_string2uint_base10 (arg, &n) == 0) {
- cmd_args->volfile_server_port = n;
- break;
- }
+ n = 0;
- argp_failure (state, -1, 0,
- "unknown volfile server port %s", arg);
+ if (gf_string2uint_base10(arg, &n) == 0) {
+ cmd_args->volfile_server_port = n;
break;
+ }
+
+ argp_failure(state, -1, 0, "unknown volfile server port %s", arg);
+ break;
case ARGP_VOLFILE_SERVER_TRANSPORT_KEY:
- cmd_args->volfile_server_transport = gf_strdup (arg);
- break;
+ cmd_args->volfile_server_transport = gf_strdup(arg);
+ break;
case ARGP_VOLFILE_ID_KEY:
- cmd_args->volfile_id = gf_strdup (arg);
- break;
+ cmd_args->volfile_id = gf_strdup(arg);
+ break;
+
+ case ARGP_THIN_CLIENT_KEY:
+ cmd_args->thin_client = _gf_true;
+ break;
+
+ case ARGP_BRICK_MUX_KEY:
+ cmd_args->brick_mux = _gf_true;
+ break;
case ARGP_PID_FILE_KEY:
- cmd_args->pid_file = gf_strdup (arg);
- break;
+ cmd_args->pid_file = gf_strdup(arg);
+ break;
case ARGP_SOCK_FILE_KEY:
- cmd_args->sock_file = gf_strdup (arg);
- break;
+ cmd_args->sock_file = gf_strdup(arg);
+ break;
case ARGP_NO_DAEMON_KEY:
- cmd_args->no_daemon_mode = ENABLE_NO_DAEMON_MODE;
- break;
+ cmd_args->no_daemon_mode = ENABLE_NO_DAEMON_MODE;
+ break;
case ARGP_RUN_ID_KEY:
- cmd_args->run_id = gf_strdup (arg);
- break;
+ cmd_args->run_id = gf_strdup(arg);
+ break;
case ARGP_DEBUG_KEY:
- cmd_args->debug_mode = ENABLE_DEBUG_MODE;
- break;
+ cmd_args->debug_mode = ENABLE_DEBUG_MODE;
+ break;
+ case ARGP_VOLFILE_MAX_FETCH_ATTEMPTS:
+ cmd_args->max_connect_attempts = 1;
+ break;
case ARGP_DIRECT_IO_MODE_KEY:
- if (!arg)
- arg = "on";
+ if (!arg)
+ arg = "on";
- if (gf_string2boolean (arg, &b) == 0) {
- cmd_args->fuse_direct_io_mode = b;
+ if (gf_string2boolean(arg, &b) == 0) {
+ cmd_args->fuse_direct_io_mode = b;
- break;
- }
+ break;
+ }
- argp_failure (state, -1, 0,
- "unknown direct I/O mode setting \"%s\"", arg);
+ if (strcmp(arg, "auto") == 0)
break;
+ argp_failure(state, -1, 0, "unknown direct I/O mode setting \"%s\"",
+ arg);
+ break;
+
+ case ARGP_FUSE_NO_ROOT_SQUASH_KEY:
+ cmd_args->no_root_squash = _gf_true;
+ break;
+
case ARGP_ENTRY_TIMEOUT_KEY:
- d = 0.0;
+ d = 0.0;
- gf_string2double (arg, &d);
- if (!(d < 0.0)) {
- cmd_args->fuse_entry_timeout = d;
- break;
- }
+ gf_string2double(arg, &d);
+ if (!(d < 0.0)) {
+ cmd_args->fuse_entry_timeout = d;
+ break;
+ }
+
+ argp_failure(state, -1, 0, "unknown entry timeout %s", arg);
+ break;
- argp_failure (state, -1, 0, "unknown entry timeout %s", arg);
+ case ARGP_NEGATIVE_TIMEOUT_KEY:
+ d = 0.0;
+
+ ret = gf_string2double(arg, &d);
+ if ((ret == 0) && !(d < 0.0)) {
+ cmd_args->fuse_negative_timeout = d;
break;
+ }
- case ARGP_ATTRIBUTE_TIMEOUT_KEY:
- d = 0.0;
+ argp_failure(state, -1, 0, "unknown negative timeout %s", arg);
+ break;
- gf_string2double (arg, &d);
- if (!(d < 0.0)) {
- cmd_args->fuse_attribute_timeout = d;
- break;
- }
+ case ARGP_ATTRIBUTE_TIMEOUT_KEY:
+ d = 0.0;
- argp_failure (state, -1, 0,
- "unknown attribute timeout %s", arg);
+ gf_string2double(arg, &d);
+ if (!(d < 0.0)) {
+ cmd_args->fuse_attribute_timeout = d;
break;
+ }
- case ARGP_CLIENT_PID_KEY:
- if (gf_string2int (arg, &cmd_args->client_pid) == 0) {
- cmd_args->client_pid_set = 1;
- break;
- }
+ argp_failure(state, -1, 0, "unknown attribute timeout %s", arg);
+ break;
- argp_failure (state, -1, 0,
- "unknown client pid %s", arg);
+ case ARGP_CLIENT_PID_KEY:
+ if (gf_string2int(arg, &cmd_args->client_pid) == 0) {
+ cmd_args->client_pid_set = 1;
break;
+ }
+
+ argp_failure(state, -1, 0, "unknown client pid %s", arg);
+ break;
case ARGP_USER_MAP_ROOT_KEY:
- pw = getpwnam (arg);
- if (pw)
- cmd_args->uid_map_root = pw->pw_uid;
- else
- argp_failure (state, -1, 0,
- "user %s does not exist", arg);
- break;
+ pw = getpwnam(arg);
+ if (pw)
+ cmd_args->uid_map_root = pw->pw_uid;
+ else
+ argp_failure(state, -1, 0, "user %s does not exist", arg);
+ break;
case ARGP_VOLFILE_CHECK_KEY:
- cmd_args->volfile_check = 1;
- break;
+ cmd_args->volfile_check = 1;
+ break;
case ARGP_VOLUME_NAME_KEY:
- cmd_args->volume_name = gf_strdup (arg);
- break;
+ cmd_args->volume_name = gf_strdup(arg);
+ break;
case ARGP_XLATOR_OPTION_KEY:
- if (gf_remember_xlator_option (&cmd_args->xlator_options, arg))
- argp_failure (state, -1, 0, "invalid xlator option %s", arg);
+ if (gf_remember_xlator_option(arg))
+ argp_failure(state, -1, 0, "invalid xlator option %s", arg);
- break;
+ break;
case ARGP_KEY_NO_ARGS:
- break;
+ break;
case ARGP_KEY_ARG:
- if (state->arg_num >= 1)
- argp_usage (state);
-
- cmd_args->mount_point = gf_strdup (arg);
- break;
+ if (state->arg_num >= 1)
+ argp_usage(state);
+ cmd_args->mount_point = gf_strdup(arg);
+ break;
case ARGP_DUMP_FUSE_KEY:
- cmd_args->dump_fuse = gf_strdup (arg);
- break;
+ cmd_args->dump_fuse = gf_strdup(arg);
+ break;
case ARGP_BRICK_NAME_KEY:
- cmd_args->brick_name = gf_strdup (arg);
- break;
+ cmd_args->brick_name = gf_strdup(arg);
+ break;
case ARGP_BRICK_PORT_KEY:
- n = 0;
-
- port_str = strtok_r (arg, ",", &tmp_str);
- if (gf_string2uint_base10 (port_str, &n) == 0) {
- cmd_args->brick_port = n;
- port_str = strtok_r (NULL, ",", &tmp_str);
- if (port_str) {
- if (gf_string2uint_base10 (port_str, &n) == 0)
- cmd_args->brick_port2 = n;
- break;
-
- argp_failure (state, -1, 0,
- "wrong brick (listen) port %s", arg);
+ n = 0;
+
+ if (arg != NULL) {
+ port_str = strtok_r(arg, ",", &tmp_str);
+ if (gf_string2uint_base10(port_str, &n) == 0) {
+ cmd_args->brick_port = n;
+ port_str = strtok_r(NULL, ",", &tmp_str);
+ if (port_str) {
+ if (gf_string2uint_base10(port_str, &n) == 0) {
+ cmd_args->brick_port2 = n;
+ break;
}
- break;
+ argp_failure(state, -1, 0,
+ "wrong brick (listen) port %s", arg);
+ }
+ break;
}
+ }
- argp_failure (state, -1, 0,
- "unknown brick (listen) port %s", arg);
- break;
+ argp_failure(state, -1, 0, "unknown brick (listen) port %s", arg);
+ break;
case ARGP_MEM_ACCOUNTING_KEY:
- /* TODO: it should have got handled much earlier */
- ctx = glusterfs_ctx_get ();
- ctx->mem_accounting = 1;
+ /* TODO: it should have got handled much earlier */
+ // gf_mem_acct_enable_set (THIS->ctx);
+ break;
+
+ case ARGP_FOPEN_KEEP_CACHE_KEY:
+ if (!arg)
+ arg = "on";
+
+ if (gf_string2boolean(arg, &b) == 0) {
+ cmd_args->fopen_keep_cache = b;
+
break;
+ }
- case ARGP_GID_TIMEOUT_KEY:
- if (!gf_string2int(arg, &cmd_args->gid_timeout))
- break;
+ argp_failure(state, -1, 0, "unknown cache setting \"%s\"", arg);
- argp_failure(state, -1, 0, "unknown group list timeout %s", arg);
- break;
- case ARGP_FUSE_BACKGROUND_QLEN_KEY:
- if (!gf_string2int (arg, &cmd_args->background_qlen))
- break;
+ break;
+
+ case ARGP_GLOBAL_TIMER_WHEEL:
+ cmd_args->global_timer_wheel = 1;
+ break;
+
+ case ARGP_GID_TIMEOUT_KEY:
+ if (!gf_string2int(arg, &cmd_args->gid_timeout)) {
+ cmd_args->gid_timeout_set = _gf_true;
+ break;
+ }
+
+ argp_failure(state, -1, 0, "unknown group list timeout %s", arg);
+ break;
+
+ case ARGP_RESOLVE_GIDS_KEY:
+ cmd_args->resolve_gids = 1;
+ break;
+
+ case ARGP_FUSE_LRU_LIMIT_KEY:
+ if (!gf_string2int32(arg, &cmd_args->lru_limit))
+ break;
+
+ argp_failure(state, -1, 0, "unknown LRU limit option %s", arg);
+ break;
- argp_failure (state, -1, 0,
- "unknown background qlen option %s", arg);
+ case ARGP_FUSE_INVALIDATE_LIMIT_KEY:
+ if (!gf_string2int32(arg, &cmd_args->invalidate_limit))
break;
+
+ argp_failure(state, -1, 0, "unknown invalidate limit option %s",
+ arg);
+ break;
+
+ case ARGP_FUSE_BACKGROUND_QLEN_KEY:
+ if (!gf_string2int(arg, &cmd_args->background_qlen))
+ break;
+
+ argp_failure(state, -1, 0, "unknown background qlen option %s",
+ arg);
+ break;
case ARGP_FUSE_CONGESTION_THRESHOLD_KEY:
- if (!gf_string2int (arg, &cmd_args->congestion_threshold))
- break;
+ if (!gf_string2int(arg, &cmd_args->congestion_threshold))
+ break;
- argp_failure (state, -1, 0,
- "unknown congestion threshold option %s", arg);
+ argp_failure(state, -1, 0, "unknown congestion threshold option %s",
+ arg);
+ break;
+
+#ifdef GF_LINUX_HOST_OS
+ case ARGP_OOM_SCORE_ADJ_KEY:
+ k = 0;
+
+ api = get_oom_api_info();
+ if (!api)
+ goto no_oom_api;
+
+ if (gf_string2int(arg, &k) == 0 && k >= api->oom_min &&
+ k <= api->oom_max) {
+ cmd_args->oom_score_adj = gf_strdup(arg);
break;
- }
+ }
- return 0;
-}
+ argp_failure(state, -1, 0, "unknown oom_score_adj value %s", arg);
+
+ no_oom_api:
+ break;
+#endif
+ case ARGP_FUSE_MOUNTOPTS_KEY:
+ cmd_args->fuse_mountopts = gf_strdup(arg);
+ break;
-void
-cleanup_and_exit (int signum)
+ case ARGP_FUSE_USE_READDIRP_KEY:
+ if (!arg)
+ arg = "yes";
+
+ if (gf_string2boolean(arg, &b) == 0) {
+ if (b) {
+ cmd_args->use_readdirp = "yes";
+ } else {
+ cmd_args->use_readdirp = "no";
+ }
+
+ break;
+ }
+
+ argp_failure(state, -1, 0, "unknown use-readdirp setting \"%s\"",
+ arg);
+ break;
+
+ case ARGP_LOGGER:
+ if (strcasecmp(arg, GF_LOGGER_GLUSTER_LOG) == 0)
+ cmd_args->logger = gf_logger_glusterlog;
+ else if (strcasecmp(arg, GF_LOGGER_SYSLOG) == 0)
+ cmd_args->logger = gf_logger_syslog;
+ else
+ argp_failure(state, -1, 0, "unknown logger %s", arg);
+
+ break;
+
+ case ARGP_LOG_FORMAT:
+ if (strcasecmp(arg, GF_LOG_FORMAT_NO_MSG_ID) == 0)
+ cmd_args->log_format = gf_logformat_traditional;
+ else if (strcasecmp(arg, GF_LOG_FORMAT_WITH_MSG_ID) == 0)
+ cmd_args->log_format = gf_logformat_withmsgid;
+ else
+ argp_failure(state, -1, 0, "unknown log format %s", arg);
+
+ break;
+
+ case ARGP_LOG_BUF_SIZE:
+ if (gf_string2uint32(arg, &cmd_args->log_buf_size)) {
+ argp_failure(state, -1, 0, "unknown log buf size option %s",
+ arg);
+ } else if (cmd_args->log_buf_size > GF_LOG_LRU_BUFSIZE_MAX) {
+ argp_failure(state, -1, 0,
+ "Invalid log buf size %s. "
+ "Valid range: [" GF_LOG_LRU_BUFSIZE_MIN_STR
+ "," GF_LOG_LRU_BUFSIZE_MAX_STR "]",
+ arg);
+ }
+
+ break;
+
+ case ARGP_LOG_FLUSH_TIMEOUT:
+ if (gf_string2uint32(arg, &cmd_args->log_flush_timeout)) {
+ argp_failure(state, -1, 0,
+ "unknown log flush timeout option %s", arg);
+ } else if ((cmd_args->log_flush_timeout <
+ GF_LOG_FLUSH_TIMEOUT_MIN) ||
+ (cmd_args->log_flush_timeout >
+ GF_LOG_FLUSH_TIMEOUT_MAX)) {
+ argp_failure(state, -1, 0,
+ "Invalid log flush timeout %s. "
+ "Valid range: [" GF_LOG_FLUSH_TIMEOUT_MIN_STR
+ "," GF_LOG_FLUSH_TIMEOUT_MAX_STR "]",
+ arg);
+ }
+
+ break;
+
+ case ARGP_SECURE_MGMT_KEY:
+ if (!arg)
+ arg = "yes";
+
+ if (gf_string2boolean(arg, &b) == 0) {
+ cmd_args->secure_mgmt = b ? 1 : 0;
+ break;
+ }
+
+ argp_failure(state, -1, 0, "unknown secure-mgmt setting \"%s\"",
+ arg);
+ break;
+
+ case ARGP_LOCALTIME_LOGGING_KEY:
+ cmd_args->localtime_logging = 1;
+ break;
+ case ARGP_PROCESS_NAME_KEY:
+ cmd_args->process_name = gf_strdup(arg);
+ break;
+ case ARGP_SUBDIR_MOUNT_KEY:
+ if (arg[0] != '/') {
+ argp_failure(state, -1, 0, "expect '/%s', provided just \"%s\"",
+ arg, arg);
+ break;
+ }
+ cmd_args->subdir_mount = gf_strdup(arg);
+ break;
+ case ARGP_FUSE_EVENT_HISTORY_KEY:
+ if (!arg)
+ arg = "no";
+
+ if (gf_string2boolean(arg, &b) == 0) {
+ if (b) {
+ cmd_args->event_history = "yes";
+ } else {
+ cmd_args->event_history = "no";
+ }
+
+ break;
+ }
+
+ argp_failure(state, -1, 0, "unknown event-history setting \"%s\"",
+ arg);
+ break;
+ case ARGP_READER_THREAD_COUNT_KEY:
+ if (gf_string2uint32(arg, &cmd_args->reader_thread_count)) {
+ argp_failure(state, -1, 0,
+ "unknown reader thread count option %s", arg);
+ } else if ((cmd_args->reader_thread_count < 1) ||
+ (cmd_args->reader_thread_count > 64)) {
+ argp_failure(state, -1, 0,
+ "Invalid reader thread count %s. "
+ "Valid range: [\"1, 64\"]",
+ arg);
+ }
+
+ break;
+
+ case ARGP_KERNEL_WRITEBACK_CACHE_KEY:
+ if (!arg)
+ arg = "yes";
+
+ if (gf_string2boolean(arg, &b) == 0) {
+ cmd_args->kernel_writeback_cache = b;
+
+ break;
+ }
+
+ argp_failure(state, -1, 0,
+ "unknown kernel writeback cache setting \"%s\"", arg);
+ break;
+ case ARGP_ATTR_TIMES_GRANULARITY_KEY:
+ if (gf_string2uint32(arg, &cmd_args->attr_times_granularity)) {
+ argp_failure(state, -1, 0,
+ "unknown attribute times granularity option %s",
+ arg);
+ } else if (cmd_args->attr_times_granularity > 1000000000) {
+ argp_failure(state, -1, 0,
+ "Invalid attribute times granularity value %s. "
+ "Valid range: [\"0, 1000000000\"]",
+ arg);
+ }
+
+ break;
+
+ case ARGP_FUSE_FLUSH_HANDLE_INTERRUPT_KEY:
+ if (!arg)
+ arg = "yes";
+
+ if (gf_string2boolean(arg, &b) == 0) {
+ cmd_args->fuse_flush_handle_interrupt = b;
+
+ break;
+ }
+
+ argp_failure(state, -1, 0,
+ "unknown fuse flush handle interrupt setting \"%s\"",
+ arg);
+ break;
+
+ case ARGP_FUSE_AUTO_INVAL_KEY:
+ if (!arg)
+ arg = "yes";
+
+ if (gf_string2boolean(arg, &b) == 0) {
+ cmd_args->fuse_auto_inval = b;
+ break;
+ }
+
+ break;
+
+ case ARGP_GLOBAL_THREADING_KEY:
+ if (!arg || (*arg == 0)) {
+ arg = "yes";
+ }
+
+ if (gf_string2boolean(arg, &b) == 0) {
+ cmd_args->global_threading = b;
+ break;
+ }
+
+ argp_failure(state, -1, 0,
+ "Invalid value for global threading \"%s\"", arg);
+ break;
+
+ case ARGP_FUSE_DEV_EPERM_RATELIMIT_NS_KEY:
+ if (gf_string2uint32(arg, &cmd_args->fuse_dev_eperm_ratelimit_ns)) {
+ argp_failure(state, -1, 0,
+ "Non-numerical value for "
+ "'fuse-dev-eperm-ratelimit-ns' option %s",
+ arg);
+ } else if (cmd_args->fuse_dev_eperm_ratelimit_ns > 1000000000) {
+ argp_failure(state, -1, 0,
+ "Invalid 'fuse-dev-eperm-ratelimit-ns' value %s. "
+ "Valid range: [\"0, 1000000000\"]",
+ arg);
+ }
+
+ break;
+ }
+ return 0;
+}
+
+gf_boolean_t
+should_call_fini(glusterfs_ctx_t *ctx, xlator_t *trav)
{
- glusterfs_ctx_t *ctx = NULL;
- xlator_t *trav = NULL;
+ /* There's nothing to call, so the other checks don't matter. */
+ if (!trav->fini) {
+ return _gf_false;
+ }
- ctx = glusterfs_ctx_get ();
+ /* This preserves previous behavior in glusterd. */
+ if (ctx->process_mode == GF_GLUSTERD_PROCESS) {
+ return _gf_true;
+ }
+
+ return _gf_false;
+}
- if (!ctx)
- return;
+void
+cleanup_and_exit(int signum)
+{
+ glusterfs_ctx_t *ctx = NULL;
+ xlator_t *trav = NULL;
+ xlator_t *top;
+ xlator_t *victim;
+ xlator_list_t **trav_p;
- gf_log_callingfn ("", GF_LOG_WARNING,
- "received signum (%d), shutting down", signum);
+ ctx = glusterfsd_ctx;
- if (ctx->cleanup_started)
- return;
+ if (!ctx)
+ return;
+ /* To take or not to take the mutex here and in the other
+ * signal handler - gf_print_trace() - is the big question here.
+ *
+ * Taking mutex in signal handler would mean that if the process
+ * receives a fatal signal while another thread is holding
+ * ctx->log.log_buf_lock to perhaps log a message in _gf_msg_internal(),
+ * the offending thread hangs on the mutex lock forever without letting
+ * the process exit.
+ *
+ * On the other hand. not taking the mutex in signal handler would cause
+ * it to modify the lru_list of buffered log messages in a racy manner,
+ * corrupt the list and potentially give rise to an unending
+ * cascade of SIGSEGVs and other re-entrancy issues.
+ */
+
+ gf_log_disable_suppression_before_exit(ctx);
+
+ gf_msg_callingfn("", GF_LOG_WARNING, 0, glusterfsd_msg_32,
+ "received signum (%d), shutting down", signum);
+
+ if (ctx->cleanup_started)
+ return;
+ pthread_mutex_lock(&ctx->cleanup_lock);
+ {
ctx->cleanup_started = 1;
- glusterfs_mgmt_pmap_signout (ctx);
- if (ctx->listener) {
- (void) glusterfs_listener_stop (ctx);
+
+ /* signout should be sent to all the bricks in case brick mux is enabled
+ * and multiple brick instances are attached to this process
+ */
+ if (ctx->active) {
+ top = ctx->active->first;
+ for (trav_p = &top->children; *trav_p; trav_p = &(*trav_p)->next) {
+ victim = (*trav_p)->xlator;
+ rpc_clnt_mgmt_pmap_signout(ctx, victim->name);
+ }
+ } else {
+ rpc_clnt_mgmt_pmap_signout(ctx, NULL);
}
+ /* below part is a racy code where the rpcsvc object is freed.
+ * But in another thread (epoll thread), upon poll error in the
+ * socket the transports are cleaned up where again rpcsvc object
+ * is accessed (which is already freed by the below function).
+ * Since the process is about to be killed don't execute the function
+ * below.
+ */
+ /* if (ctx->listener) { */
+ /* (void) glusterfs_listener_stop (ctx); */
+ /* } */
+
/* Call fini() of FUSE xlator first:
* so there are no more requests coming and
* 'umount' of mount point is done properly */
trav = ctx->master;
if (trav && trav->fini) {
- THIS = trav;
- trav->fini (trav);
+ THIS = trav;
+ trav->fini(trav);
}
- glusterfs_pidfile_cleanup (ctx);
+ glusterfs_pidfile_cleanup(ctx);
- exit (0);
#if 0
/* TODO: Properly do cleanup_and_exit(), with synchronization */
if (ctx->mgmt) {
@@ -951,838 +1480,1259 @@ cleanup_and_exit (int signum)
rpc_clnt_connection_cleanup (&ctx->mgmt->conn);
rpc_clnt_unref (ctx->mgmt);
}
+#endif
- /* call fini() of each xlator */
trav = NULL;
- if (ctx->active)
- trav = ctx->active->top;
- while (trav) {
- if (trav->fini) {
- THIS = trav;
- trav->fini (trav);
- }
- trav = trav->next;
- }
-#endif
-}
+ /* previously we were releasing the cleanup mutex lock before the
+ process exit. As we are releasing the cleanup mutex lock, before
+ the process can exit some other thread which is blocked on
+ cleanup mutex lock is acquiring the cleanup mutex lock and
+ trying to acquire some resources which are already freed as a
+ part of cleanup. To avoid this, we are exiting the process without
+ releasing the cleanup mutex lock. This will not cause any lock
+ related issues as the process which acquired the lock is going down
+ */
+ /* NOTE: Only the least significant 8 bits i.e (signum & 255)
+ will be available to parent process on calling exit() */
+ exit(abs(signum));
+ }
+}
static void
-reincarnate (int signum)
+reincarnate(int signum)
{
- int ret = 0;
- glusterfs_ctx_t *ctx = NULL;
- cmd_args_t *cmd_args = NULL;
+ int ret = 0;
+ glusterfs_ctx_t *ctx = NULL;
+ cmd_args_t *cmd_args = NULL;
- ctx = glusterfs_ctx_get ();
- cmd_args = &ctx->cmd_args;
+ ctx = glusterfsd_ctx;
+ cmd_args = &ctx->cmd_args;
- if (cmd_args->volfile_server) {
- gf_log ("glusterfsd", GF_LOG_INFO,
- "Fetching the volume file from server...");
- ret = glusterfs_volfile_fetch (ctx);
- } else {
- gf_log ("glusterfsd", GF_LOG_DEBUG,
- "Not reloading volume specification file on SIGHUP");
- }
+ gf_msg_trace("gluster", 0, "received reincarnate request (sig:HUP)");
- /* Also, SIGHUP should do logrotate */
- gf_log_logrotate (1);
+ if (cmd_args->volfile_server) {
+ gf_smsg("glusterfsd", GF_LOG_INFO, 0, glusterfsd_msg_11, NULL);
+ ret = glusterfs_volfile_fetch(ctx);
+ }
- if (ret < 0)
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "volume initialization failed.");
+ /* Also, SIGHUP should do logrotate */
+ gf_log_logrotate(1);
- return;
-}
+ if (ret < 0)
+ gf_smsg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_12, NULL);
+ return;
+}
-static char *
-generate_uuid ()
+void
+emancipate(glusterfs_ctx_t *ctx, int ret)
{
- char tmp_str[1024] = {0,};
- char hostname[256] = {0,};
- struct timeval tv = {0,};
- struct tm now = {0, };
- char now_str[32];
-
- if (gettimeofday (&tv, NULL) == -1) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "gettimeofday: failed %s",
- strerror (errno));
- }
-
- if (gethostname (hostname, 256) == -1) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "gethostname: failed %s",
- strerror (errno));
- }
-
- localtime_r (&tv.tv_sec, &now);
- strftime (now_str, 32, "%Y/%m/%d-%H:%M:%S", &now);
- snprintf (tmp_str, 1024, "%s-%d-%s:%" GF_PRI_SUSECONDS,
- hostname, getpid(), now_str, tv.tv_usec);
-
- return gf_strdup (tmp_str);
+ /* break free from the parent */
+ if (ctx->daemon_pipe[1] != -1) {
+ sys_write(ctx->daemon_pipe[1], (void *)&ret, sizeof(ret));
+ sys_close(ctx->daemon_pipe[1]);
+ ctx->daemon_pipe[1] = -1;
+ }
}
-
static uint8_t
-gf_get_process_mode (char *exec_name)
+gf_get_process_mode(char *exec_name)
{
- char *dup_execname = NULL, *base = NULL;
- uint8_t ret = 0;
+ char *dup_execname = NULL, *base = NULL;
+ uint8_t ret = 0;
- dup_execname = gf_strdup (exec_name);
- base = basename (dup_execname);
+ dup_execname = gf_strdup(exec_name);
+ base = basename(dup_execname);
- if (!strncmp (base, "glusterfsd", 10)) {
- ret = GF_SERVER_PROCESS;
- } else if (!strncmp (base, "glusterd", 8)) {
- ret = GF_GLUSTERD_PROCESS;
- } else {
- ret = GF_CLIENT_PROCESS;
- }
+ if (!strncmp(base, "glusterfsd", 10)) {
+ ret = GF_SERVER_PROCESS;
+ } else if (!strncmp(base, "glusterd", 8)) {
+ ret = GF_GLUSTERD_PROCESS;
+ } else {
+ ret = GF_CLIENT_PROCESS;
+ }
- GF_FREE (dup_execname);
+ GF_FREE(dup_execname);
- return ret;
+ return ret;
}
-
-
static int
-set_log_file_path (cmd_args_t *cmd_args)
+glusterfs_ctx_defaults_init(glusterfs_ctx_t *ctx)
{
- int i = 0;
- int j = 0;
- int ret = 0;
- int port = 0;
- char *tmp_ptr = NULL;
- char tmp_str[1024] = {0,};
-
- if (cmd_args->mount_point) {
- j = 0;
- i = 0;
- if (cmd_args->mount_point[0] == '/')
- i = 1;
- for (; i < strlen (cmd_args->mount_point); i++,j++) {
- tmp_str[j] = cmd_args->mount_point[i];
- if (cmd_args->mount_point[i] == '/')
- tmp_str[j] = '-';
- }
-
- ret = gf_asprintf (&cmd_args->log_file,
- DEFAULT_LOG_FILE_DIRECTORY "/%s.log",
- tmp_str);
- goto done;
- }
+ cmd_args_t *cmd_args = NULL;
+ struct rlimit lim = {
+ 0,
+ };
+ int ret = -1;
- if (cmd_args->volfile) {
- j = 0;
- i = 0;
- if (cmd_args->volfile[0] == '/')
- i = 1;
- for (; i < strlen (cmd_args->volfile); i++,j++) {
- tmp_str[j] = cmd_args->volfile[i];
- if (cmd_args->volfile[i] == '/')
- tmp_str[j] = '-';
- }
- ret = gf_asprintf (&cmd_args->log_file,
- DEFAULT_LOG_FILE_DIRECTORY "/%s.log",
- tmp_str);
- goto done;
- }
-
- if (cmd_args->volfile_server) {
- port = 1;
- tmp_ptr = "default";
+ if (!ctx)
+ return ret;
- if (cmd_args->volfile_server_port)
- port = cmd_args->volfile_server_port;
- if (cmd_args->volfile_id)
- tmp_ptr = cmd_args->volfile_id;
+ ret = xlator_mem_acct_init(THIS, gfd_mt_end);
+ if (ret != 0) {
+ gf_smsg(THIS->name, GF_LOG_CRITICAL, 0, glusterfsd_msg_34, NULL);
+ return ret;
+ }
+
+ /* reset ret to -1 so that we don't need to explicitly
+ * set it in all error paths before "goto err"
+ */
+ ret = -1;
+
+ /* monitoring should be enabled by default */
+ ctx->measure_latency = true;
+
+ ctx->process_uuid = generate_glusterfs_ctx_id();
+ if (!ctx->process_uuid) {
+ gf_smsg("", GF_LOG_CRITICAL, 0, glusterfsd_msg_13, NULL);
+ goto out;
+ }
+
+ ctx->page_size = 128 * GF_UNIT_KB;
+
+ ctx->iobuf_pool = iobuf_pool_new();
+ if (!ctx->iobuf_pool) {
+ gf_smsg("", GF_LOG_CRITICAL, 0, glusterfsd_msg_14, "iobuf", NULL);
+ goto out;
+ }
+
+ ctx->event_pool = gf_event_pool_new(DEFAULT_EVENT_POOL_SIZE,
+ STARTING_EVENT_THREADS);
+ if (!ctx->event_pool) {
+ gf_smsg("", GF_LOG_CRITICAL, 0, glusterfsd_msg_14, "event", NULL);
+ goto out;
+ }
+
+ ctx->pool = GF_CALLOC(1, sizeof(call_pool_t), gfd_mt_call_pool_t);
+ if (!ctx->pool) {
+ gf_smsg("", GF_LOG_CRITICAL, 0, glusterfsd_msg_14, "call", NULL);
+ goto out;
+ }
+
+ INIT_LIST_HEAD(&ctx->pool->all_frames);
+ LOCK_INIT(&ctx->pool->lock);
+
+ /* frame_mem_pool size 112 * 4k */
+ ctx->pool->frame_mem_pool = mem_pool_new(call_frame_t, 4096);
+ if (!ctx->pool->frame_mem_pool) {
+ gf_smsg("", GF_LOG_CRITICAL, 0, glusterfsd_msg_14, "frame", NULL);
+ goto out;
+ }
+ /* stack_mem_pool size 256 * 1024 */
+ ctx->pool->stack_mem_pool = mem_pool_new(call_stack_t, 1024);
+ if (!ctx->pool->stack_mem_pool) {
+ gf_smsg("", GF_LOG_CRITICAL, 0, glusterfsd_msg_14, "stack", NULL);
+ goto out;
+ }
+
+ ctx->stub_mem_pool = mem_pool_new(call_stub_t, 1024);
+ if (!ctx->stub_mem_pool) {
+ gf_smsg("", GF_LOG_CRITICAL, 0, glusterfsd_msg_14, "stub", NULL);
+ goto out;
+ }
+
+ ctx->dict_pool = mem_pool_new(dict_t, GF_MEMPOOL_COUNT_OF_DICT_T);
+ if (!ctx->dict_pool)
+ goto out;
+
+ ctx->dict_pair_pool = mem_pool_new(data_pair_t,
+ GF_MEMPOOL_COUNT_OF_DATA_PAIR_T);
+ if (!ctx->dict_pair_pool)
+ goto out;
+
+ ctx->dict_data_pool = mem_pool_new(data_t, GF_MEMPOOL_COUNT_OF_DATA_T);
+ if (!ctx->dict_data_pool)
+ goto out;
+
+ ctx->logbuf_pool = mem_pool_new(log_buf_t, GF_MEMPOOL_COUNT_OF_LRU_BUF_T);
+ if (!ctx->logbuf_pool)
+ goto out;
+
+ pthread_mutex_init(&ctx->notify_lock, NULL);
+ pthread_mutex_init(&ctx->cleanup_lock, NULL);
+ pthread_cond_init(&ctx->notify_cond, NULL);
+
+ ctx->clienttable = gf_clienttable_alloc();
+ if (!ctx->clienttable)
+ goto out;
+
+ cmd_args = &ctx->cmd_args;
+
+ /* parsing command line arguments */
+ cmd_args->log_level = DEFAULT_LOG_LEVEL;
+ cmd_args->logger = gf_logger_glusterlog;
+ cmd_args->log_format = gf_logformat_withmsgid;
+ cmd_args->log_buf_size = GF_LOG_LRU_BUFSIZE_DEFAULT;
+ cmd_args->log_flush_timeout = GF_LOG_FLUSH_TIMEOUT_DEFAULT;
+
+ cmd_args->mac_compat = GF_OPTION_DISABLE;
+#ifdef GF_DARWIN_HOST_OS
+ /* On Darwin machines, O_APPEND is not handled,
+ * which may corrupt the data
+ */
+ cmd_args->fuse_direct_io_mode = GF_OPTION_DISABLE;
+#else
+ cmd_args->fuse_direct_io_mode = GF_OPTION_DEFERRED;
+#endif
+ cmd_args->fuse_attribute_timeout = -1;
+ cmd_args->fuse_entry_timeout = -1;
+ cmd_args->fopen_keep_cache = GF_OPTION_DEFERRED;
+ cmd_args->kernel_writeback_cache = GF_OPTION_DEFERRED;
+ cmd_args->fuse_flush_handle_interrupt = GF_OPTION_DEFERRED;
+
+ if (ctx->mem_acct_enable)
+ cmd_args->mem_acct = 1;
+
+ INIT_LIST_HEAD(&cmd_args->xlator_options);
+ INIT_LIST_HEAD(&cmd_args->volfile_servers);
+ ctx->pxl_count = 0;
+ pthread_mutex_init(&ctx->fd_lock, NULL);
+ pthread_cond_init(&ctx->fd_cond, NULL);
+ INIT_LIST_HEAD(&ctx->janitor_fds);
+
+ lim.rlim_cur = RLIM_INFINITY;
+ lim.rlim_max = RLIM_INFINITY;
+ setrlimit(RLIMIT_CORE, &lim);
+
+ ret = 0;
+out:
- ret = gf_asprintf (&cmd_args->log_file,
- DEFAULT_LOG_FILE_DIRECTORY "/%s-%s-%d.log",
- cmd_args->volfile_server, tmp_ptr, port);
+ if (ret) {
+ if (ctx->pool) {
+ mem_pool_destroy(ctx->pool->frame_mem_pool);
+ mem_pool_destroy(ctx->pool->stack_mem_pool);
}
-done:
- return ret;
-}
+ GF_FREE(ctx->pool);
+ mem_pool_destroy(ctx->stub_mem_pool);
+ mem_pool_destroy(ctx->dict_pool);
+ mem_pool_destroy(ctx->dict_data_pool);
+ mem_pool_destroy(ctx->dict_pair_pool);
+ mem_pool_destroy(ctx->logbuf_pool);
+ }
+ return ret;
+}
static int
-glusterfs_ctx_defaults_init (glusterfs_ctx_t *ctx)
+logging_init(glusterfs_ctx_t *ctx, const char *progpath)
{
- cmd_args_t *cmd_args = NULL;
- struct rlimit lim = {0, };
- call_pool_t *pool = NULL;
-
- xlator_mem_acct_init (THIS, gfd_mt_end);
-
- ctx->process_uuid = generate_uuid ();
- if (!ctx->process_uuid) {
- gf_log ("", GF_LOG_CRITICAL,
- "ERROR: glusterfs uuid generation failed");
- return -1;
- }
-
- ctx->page_size = 128 * GF_UNIT_KB;
-
- ctx->iobuf_pool = iobuf_pool_new ();
- if (!ctx->iobuf_pool) {
- gf_log ("", GF_LOG_CRITICAL,
- "ERROR: glusterfs iobuf pool creation failed");
- return -1;
- }
-
- ctx->event_pool = event_pool_new (DEFAULT_EVENT_POOL_SIZE);
- if (!ctx->event_pool) {
- gf_log ("", GF_LOG_CRITICAL,
- "ERROR: glusterfs event pool creation failed");
- return -1;
- }
+ cmd_args_t *cmd_args = NULL;
+ int ret = 0;
- pool = GF_CALLOC (1, sizeof (call_pool_t),
- gfd_mt_call_pool_t);
- if (!pool) {
- gf_log ("", GF_LOG_CRITICAL,
- "ERROR: glusterfs call pool creation failed");
- return -1;
- }
+ cmd_args = &ctx->cmd_args;
- /* frame_mem_pool size 112 * 4k */
- pool->frame_mem_pool = mem_pool_new (call_frame_t, 4096);
- if (!pool->frame_mem_pool) {
- gf_log ("", GF_LOG_CRITICAL,
- "ERROR: glusterfs frame pool creation failed");
- return -1;
- }
- /* stack_mem_pool size 256 * 1024 */
- pool->stack_mem_pool = mem_pool_new (call_stack_t, 1024);
- if (!pool->stack_mem_pool) {
- gf_log ("", GF_LOG_CRITICAL,
- "ERROR: glusterfs stack pool creation failed");
- return -1;
+ if (cmd_args->log_file == NULL) {
+ ret = gf_set_log_file_path(cmd_args, ctx);
+ if (ret == -1) {
+ fprintf(stderr,
+ "ERROR: failed to set the log file "
+ "path\n");
+ return -1;
}
+ }
- ctx->stub_mem_pool = mem_pool_new (call_stub_t, 1024);
- if (!ctx->stub_mem_pool) {
- gf_log ("", GF_LOG_CRITICAL,
- "ERROR: glusterfs stub pool creation failed");
- return -1;
+ if (cmd_args->log_ident == NULL) {
+ ret = gf_set_log_ident(cmd_args);
+ if (ret == -1) {
+ fprintf(stderr,
+ "ERROR: failed to set the log "
+ "identity\n");
+ return -1;
}
+ }
- ctx->dict_pool = mem_pool_new (dict_t, GF_MEMPOOL_COUNT_OF_DICT_T);
- if (!ctx->dict_pool)
- return -1;
-
- ctx->dict_pair_pool = mem_pool_new (data_pair_t,
- GF_MEMPOOL_COUNT_OF_DATA_PAIR_T);
- if (!ctx->dict_pair_pool)
- return -1;
+ /* finish log set parameters before init */
+ gf_log_set_loglevel(ctx, cmd_args->log_level);
- ctx->dict_data_pool = mem_pool_new (data_t, GF_MEMPOOL_COUNT_OF_DATA_T);
- if (!ctx->dict_data_pool)
- return -1;
+ gf_log_set_localtime(cmd_args->localtime_logging);
- INIT_LIST_HEAD (&pool->all_frames);
- LOCK_INIT (&pool->lock);
- ctx->pool = pool;
+ gf_log_set_logger(cmd_args->logger);
- pthread_mutex_init (&(ctx->lock), NULL);
+ gf_log_set_logformat(cmd_args->log_format);
- cmd_args = &ctx->cmd_args;
+ gf_log_set_log_buf_size(cmd_args->log_buf_size);
- /* parsing command line arguments */
- cmd_args->log_level = DEFAULT_LOG_LEVEL;
-
- cmd_args->mac_compat = GF_OPTION_DISABLE;
-#ifdef GF_DARWIN_HOST_OS
- /* On Darwin machines, O_APPEND is not handled,
- * which may corrupt the data
- */
- cmd_args->fuse_direct_io_mode = GF_OPTION_DISABLE;
-#else
- cmd_args->fuse_direct_io_mode = GF_OPTION_DEFERRED;
-#endif
- cmd_args->fuse_attribute_timeout = -1;
- cmd_args->fuse_entry_timeout = -1;
+ gf_log_set_log_flush_timeout(cmd_args->log_flush_timeout);
- INIT_LIST_HEAD (&cmd_args->xlator_options);
+ if (gf_log_init(ctx, cmd_args->log_file, cmd_args->log_ident) == -1) {
+ fprintf(stderr, "ERROR: failed to open logfile %s\n",
+ cmd_args->log_file);
+ return -1;
+ }
- lim.rlim_cur = RLIM_INFINITY;
- lim.rlim_max = RLIM_INFINITY;
- setrlimit (RLIMIT_CORE, &lim);
+ /* At this point, all the logging related parameters are initialised
+ * except for the log flush timer, which will be injected post fork(2)
+ * in daemonize() . During this time, any log message that is logged
+ * will be kept buffered. And if the list that holds these messages
+ * overflows, then the same lru policy is used to drive out the least
+ * recently used message and displace it with the message just logged.
+ */
- return 0;
+ return 0;
}
-static int
-logging_init (glusterfs_ctx_t *ctx)
+void
+gf_check_and_set_mem_acct(int argc, char *argv[])
{
- cmd_args_t *cmd_args = NULL;
- int ret = 0;
-
- cmd_args = &ctx->cmd_args;
+ int i = 0;
- if (cmd_args->log_file == NULL) {
- ret = set_log_file_path (cmd_args);
- if (ret == -1) {
- fprintf (stderr, "ERROR: failed to set the log file path\n");
- return -1;
- }
- }
-
- if (gf_log_init (cmd_args->log_file) == -1) {
- fprintf (stderr, "ERROR: failed to open logfile %s\n",
- cmd_args->log_file);
- return -1;
+ for (i = 0; i < argc; i++) {
+ if (strcmp(argv[i], "--no-mem-accounting") == 0) {
+ gf_global_mem_acct_enable_set(0);
+ break;
}
+ }
+}
- gf_log_set_loglevel (cmd_args->log_level);
+/**
+ * print_exports_file - Print out & verify the syntax
+ * of the exports file specified
+ * in the parameter.
+ *
+ * @exports_file : Path of the exports file to print & verify
+ *
+ * @return : success: 0 when successfully parsed
+ * failure: 1 when failed to parse one or more lines
+ * -1 when other critical errors (dlopen () etc)
+ * Critical errors are treated differently than parse errors. Critical
+ * errors terminate the program immediately here and print out different
+ * error messages. Hence there are different return values.
+ */
+int
+print_exports_file(const char *exports_file)
+{
+ void *libhandle = NULL;
+ char *libpathfull = NULL;
+ struct exports_file *file = NULL;
+ int ret = 0;
+
+ int (*exp_file_parse)(const char *filepath, struct exports_file **expfile,
+ struct mount3_state *ms) = NULL;
+ void (*exp_file_print)(const struct exports_file *file) = NULL;
+ void (*exp_file_deinit)(struct exports_file * ptr) = NULL;
+
+ /* XLATORDIR passed through a -D flag to GCC */
+ ret = gf_asprintf(&libpathfull, "%s/%s/server.so", XLATORDIR, "nfs");
+ if (ret < 0) {
+ gf_log("glusterfs", GF_LOG_CRITICAL, "asprintf () failed.");
+ ret = -1;
+ goto out;
+ }
+
+ /* Load up the library */
+ libhandle = dlopen(libpathfull, RTLD_NOW);
+ if (!libhandle) {
+ gf_log("glusterfs", GF_LOG_CRITICAL,
+ "Error loading NFS server library : "
+ "%s\n",
+ dlerror());
+ ret = -1;
+ goto out;
+ }
+
+ /* Load up the function */
+ exp_file_parse = dlsym(libhandle, "exp_file_parse");
+ if (!exp_file_parse) {
+ gf_log("glusterfs", GF_LOG_CRITICAL,
+ "Error finding function exp_file_parse "
+ "in symbol.");
+ ret = -1;
+ goto out;
+ }
+
+ /* Parse the file */
+ ret = exp_file_parse(exports_file, &file, NULL);
+ if (ret < 0) {
+ ret = 1; /* This means we failed to parse */
+ goto out;
+ }
+
+ /* Load up the function */
+ exp_file_print = dlsym(libhandle, "exp_file_print");
+ if (!exp_file_print) {
+ gf_log("glusterfs", GF_LOG_CRITICAL,
+ "Error finding function exp_file_print in symbol.");
+ ret = -1;
+ goto out;
+ }
+
+ /* Print it out to screen */
+ exp_file_print(file);
+
+ /* Load up the function */
+ exp_file_deinit = dlsym(libhandle, "exp_file_deinit");
+ if (!exp_file_deinit) {
+ gf_log("glusterfs", GF_LOG_CRITICAL,
+ "Error finding function exp_file_deinit in lib.");
+ ret = -1;
+ goto out;
+ }
+
+ /* Free the file */
+ exp_file_deinit(file);
- return 0;
+out:
+ if (libhandle)
+ dlclose(libhandle);
+ GF_FREE(libpathfull);
+ return ret;
}
-void
-gf_check_and_set_mem_acct (int argc, char *argv[], glusterfs_ctx_t *ctx)
+/**
+ * print_netgroups_file - Print out & verify the syntax
+ * of the netgroups file specified
+ * in the parameter.
+ *
+ * @netgroups_file : Path of the netgroups file to print & verify
+ * @return : success: 0 when successfully parsed
+ * failure: 1 when failed to parse one more more lines
+ * -1 when other critical errors (dlopen () etc)
+ *
+ * We have multiple returns here because for critical errors, we abort
+ * operations immediately and exit. For example, if we can't load the
+ * NFS server library, then we have a real bad problem so we don't continue.
+ * Or if we cannot allocate anymore memory, we don't want to continue. Also,
+ * we want to print out a different error messages based on the ret value.
+ */
+int
+print_netgroups_file(const char *netgroups_file)
{
- int i = 0;
- for (i = 0; i < argc; i++) {
- if (strcmp (argv[i], "--mem-accounting") == 0) {
- ctx->mem_accounting = 1;
- break;
- }
- }
+ void *libhandle = NULL;
+ char *libpathfull = NULL;
+ struct netgroups_file *file = NULL;
+ int ret = 0;
+
+ struct netgroups_file *(*ng_file_parse)(const char *file_path) = NULL;
+ void (*ng_file_print)(const struct netgroups_file *file) = NULL;
+ void (*ng_file_deinit)(struct netgroups_file * ptr) = NULL;
+
+ /* XLATORDIR passed through a -D flag to GCC */
+ ret = gf_asprintf(&libpathfull, "%s/%s/server.so", XLATORDIR, "nfs");
+ if (ret < 0) {
+ gf_log("glusterfs", GF_LOG_CRITICAL, "asprintf () failed.");
+ ret = -1;
+ goto out;
+ }
+ /* Load up the library */
+ libhandle = dlopen(libpathfull, RTLD_NOW);
+ if (!libhandle) {
+ gf_log("glusterfs", GF_LOG_CRITICAL,
+ "Error loading NFS server library : %s\n", dlerror());
+ ret = -1;
+ goto out;
+ }
+
+ /* Load up the function */
+ ng_file_parse = dlsym(libhandle, "ng_file_parse");
+ if (!ng_file_parse) {
+ gf_log("glusterfs", GF_LOG_CRITICAL,
+ "Error finding function ng_file_parse in symbol.");
+ ret = -1;
+ goto out;
+ }
+
+ /* Parse the file */
+ file = ng_file_parse(netgroups_file);
+ if (!file) {
+ ret = 1; /* This means we failed to parse */
+ goto out;
+ }
+
+ /* Load up the function */
+ ng_file_print = dlsym(libhandle, "ng_file_print");
+ if (!ng_file_print) {
+ gf_log("glusterfs", GF_LOG_CRITICAL,
+ "Error finding function ng_file_print in symbol.");
+ ret = -1;
+ goto out;
+ }
+
+ /* Print it out to screen */
+ ng_file_print(file);
+
+ /* Load up the function */
+ ng_file_deinit = dlsym(libhandle, "ng_file_deinit");
+ if (!ng_file_deinit) {
+ gf_log("glusterfs", GF_LOG_CRITICAL,
+ "Error finding function ng_file_deinit in lib.");
+ ret = -1;
+ goto out;
+ }
+
+ /* Free the file */
+ ng_file_deinit(file);
+
+out:
+ if (libhandle)
+ dlclose(libhandle);
+ GF_FREE(libpathfull);
+ return ret;
}
int
-parse_cmdline (int argc, char *argv[], glusterfs_ctx_t *ctx)
+parse_cmdline(int argc, char *argv[], glusterfs_ctx_t *ctx)
{
- int process_mode = 0;
- int ret = 0;
- struct stat stbuf = {0, };
- struct tm *tm = NULL;
- time_t utime;
- char timestr[256];
- char tmp_logfile[1024] = { 0 };
- char *tmp_logfile_dyn = NULL;
- char *tmp_logfilebase = NULL;
- cmd_args_t *cmd_args = NULL;
-
- cmd_args = &ctx->cmd_args;
-
- argp_parse (&argp, argc, argv, ARGP_IN_ORDER, NULL, cmd_args);
-
- if (ENABLE_DEBUG_MODE == cmd_args->debug_mode) {
- cmd_args->log_level = GF_LOG_DEBUG;
- cmd_args->log_file = "/dev/stderr";
- cmd_args->no_daemon_mode = ENABLE_NO_DAEMON_MODE;
- }
-
- process_mode = gf_get_process_mode (argv[0]);
- ctx->process_mode = process_mode;
+ int process_mode = 0;
+ int ret = 0;
+ struct stat stbuf = {
+ 0,
+ };
+ char timestr[GF_TIMESTR_SIZE];
+ char tmp_logfile[1024] = {0};
+ char *tmp_logfile_dyn = NULL;
+ char *tmp_logfilebase = NULL;
+ cmd_args_t *cmd_args = NULL;
+ int len = 0;
+ char *thin_volfileid = NULL;
+
+ cmd_args = &ctx->cmd_args;
+
+ /* Do this before argp_parse so it can be overridden. */
+ if (sys_access(SECURE_ACCESS_FILE, F_OK) == 0) {
+ cmd_args->secure_mgmt = 1;
+ ctx->ssl_cert_depth = glusterfs_read_secure_access_file();
+ }
+
+ /* Need to set lru_limit to below 0 to indicate there was nothing
+ specified. This is needed as 0 is a valid option, and may not be
+ default value. */
+ cmd_args->lru_limit = -1;
+
+ argp_parse(&argp, argc, argv, ARGP_IN_ORDER, NULL, cmd_args);
+
+ if (cmd_args->print_xlatordir || cmd_args->print_statedumpdir ||
+ cmd_args->print_logdir || cmd_args->print_libexecdir) {
+ /* Just print, nothing else to do */
+ goto out;
+ }
+
+ if (cmd_args->print_netgroups) {
+ /* When this option is set we don't want to do anything else
+ * except for printing & verifying the netgroups file.
+ */
+ ret = 0;
+ goto out;
+ }
- /* Make sure after the parsing cli, if '--volfile-server' option is
- given, then '--volfile-id' is mandatory */
- if (cmd_args->volfile_server && !cmd_args->volfile_id) {
- gf_log ("glusterfs", GF_LOG_CRITICAL,
- "ERROR: '--volfile-id' is mandatory if '-s' OR "
- "'--volfile-server' option is given");
- ret = -1;
+ if (cmd_args->print_exports) {
+ /* When this option is set we don't want to do anything else
+ * except for printing & verifying the exports file.
+ */
+ ret = 0;
+ goto out;
+ }
+
+ ctx->secure_mgmt = cmd_args->secure_mgmt;
+
+ if (ENABLE_DEBUG_MODE == cmd_args->debug_mode) {
+ cmd_args->log_level = GF_LOG_DEBUG;
+ cmd_args->log_file = gf_strdup("/dev/stderr");
+ cmd_args->no_daemon_mode = ENABLE_NO_DAEMON_MODE;
+ }
+
+ process_mode = gf_get_process_mode(argv[0]);
+ ctx->process_mode = process_mode;
+
+ if (cmd_args->process_name) {
+ ctx->cmd_args.process_name = cmd_args->process_name;
+ }
+ /* Make sure after the parsing cli, if '--volfile-server' option is
+ given, then '--volfile-id' is mandatory */
+ if (cmd_args->volfile_server && !cmd_args->volfile_id) {
+ gf_smsg("glusterfs", GF_LOG_CRITICAL, 0, glusterfsd_msg_15, NULL);
+ ret = -1;
+ goto out;
+ }
+
+ if ((cmd_args->volfile_server == NULL) && (cmd_args->volfile == NULL)) {
+ if (process_mode == GF_SERVER_PROCESS)
+ cmd_args->volfile = gf_strdup(DEFAULT_SERVER_VOLFILE);
+ else if (process_mode == GF_GLUSTERD_PROCESS)
+ cmd_args->volfile = gf_strdup(DEFAULT_GLUSTERD_VOLFILE);
+ else
+ cmd_args->volfile = gf_strdup(DEFAULT_CLIENT_VOLFILE);
+
+ /* Check if the volfile exists, if not give usage output
+ and exit */
+ ret = sys_stat(cmd_args->volfile, &stbuf);
+ if (ret) {
+ gf_smsg("glusterfs", GF_LOG_CRITICAL, errno, glusterfsd_msg_16,
+ NULL);
+ /* argp_usage (argp.) */
+ fprintf(stderr, "USAGE: %s [options] [mountpoint]\n", argv[0]);
+ goto out;
+ }
+ }
+
+ if (cmd_args->thin_client) {
+ len = strlen(cmd_args->volfile_id) + SLEN("gfproxy-client/");
+ thin_volfileid = GF_MALLOC(len + 1, gf_common_mt_char);
+ snprintf(thin_volfileid, len + 1, "gfproxy-client/%s",
+ cmd_args->volfile_id);
+ GF_FREE(cmd_args->volfile_id);
+ cmd_args->volfile_id = thin_volfileid;
+ }
+
+ if (cmd_args->run_id) {
+ ret = sys_lstat(cmd_args->log_file, &stbuf);
+ /* If its /dev/null, or /dev/stdout, /dev/stderr,
+ * let it use the same, no need to alter
+ */
+ if (((ret == 0) &&
+ (S_ISREG(stbuf.st_mode) || S_ISLNK(stbuf.st_mode))) ||
+ (ret == -1)) {
+ /* Have separate logfile per run. */
+ gf_time_fmt(timestr, sizeof timestr, gf_time(), gf_timefmt_FT);
+ sprintf(tmp_logfile, "%s.%s.%d", cmd_args->log_file, timestr,
+ getpid());
+
+ /* Create symlink to actual log file */
+ sys_unlink(cmd_args->log_file);
+
+ tmp_logfile_dyn = gf_strdup(tmp_logfile);
+ tmp_logfilebase = basename(tmp_logfile_dyn);
+ ret = sys_symlink(tmp_logfilebase, cmd_args->log_file);
+ if (ret == -1) {
+ fprintf(stderr, "ERROR: symlink of logfile failed\n");
goto out;
- }
-
- if ((cmd_args->volfile_server == NULL)
- && (cmd_args->volfile == NULL)) {
- if (process_mode == GF_SERVER_PROCESS)
- cmd_args->volfile = gf_strdup (DEFAULT_SERVER_VOLFILE);
- else if (process_mode == GF_GLUSTERD_PROCESS)
- cmd_args->volfile = gf_strdup (DEFAULT_GLUSTERD_VOLFILE);
- else
- cmd_args->volfile = gf_strdup (DEFAULT_CLIENT_VOLFILE);
-
- /* Check if the volfile exists, if not give usage output
- and exit */
- ret = stat (cmd_args->volfile, &stbuf);
- if (ret) {
- gf_log ("glusterfs", GF_LOG_CRITICAL,
- "ERROR: parsing the volfile failed (%s)\n",
- strerror (errno));
- /* argp_usage (argp.) */
- fprintf (stderr, "USAGE: %s [options] [mountpoint]\n",
- argv[0]);
- goto out;
- }
- }
-
- if (cmd_args->run_id) {
- ret = sys_lstat (cmd_args->log_file, &stbuf);
- /* If its /dev/null, or /dev/stdout, /dev/stderr,
- * let it use the same, no need to alter
- */
- if (((ret == 0) &&
- (S_ISREG (stbuf.st_mode) || S_ISLNK (stbuf.st_mode))) ||
- (ret == -1)) {
- /* Have separate logfile per run */
- tm = localtime (&utime);
- strftime (timestr, 256, "%Y%m%d.%H%M%S", tm);
- sprintf (tmp_logfile, "%s.%s.%d",
- cmd_args->log_file, timestr, getpid ());
-
- /* Create symlink to actual log file */
- sys_unlink (cmd_args->log_file);
-
- tmp_logfile_dyn = gf_strdup (tmp_logfile);
- tmp_logfilebase = basename (tmp_logfile_dyn);
- ret = sys_symlink (tmp_logfilebase,
- cmd_args->log_file);
- if (ret == -1) {
- fprintf (stderr, "ERROR: symlink of logfile failed\n");
- goto out;
- }
+ }
- GF_FREE (cmd_args->log_file);
- cmd_args->log_file = gf_strdup (tmp_logfile);
+ GF_FREE(cmd_args->log_file);
+ cmd_args->log_file = gf_strdup(tmp_logfile);
- GF_FREE (tmp_logfile_dyn);
- }
+ GF_FREE(tmp_logfile_dyn);
}
+ }
+
+ /*
+ This option was made obsolete but parsing it for backward
+ compatibility with third party applications
+ */
+ if (cmd_args->max_connect_attempts) {
+ gf_smsg("glusterfs", GF_LOG_WARNING, 0, glusterfsd_msg_33, NULL);
+ }
#ifdef GF_DARWIN_HOST_OS
- if (cmd_args->mount_point)
- cmd_args->mac_compat = GF_OPTION_DEFERRED;
+ if (cmd_args->mount_point)
+ cmd_args->mac_compat = GF_OPTION_DEFERRED;
#endif
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
int
-glusterfs_pidfile_setup (glusterfs_ctx_t *ctx)
+glusterfs_pidfile_setup(glusterfs_ctx_t *ctx)
{
- cmd_args_t *cmd_args = NULL;
- int ret = 0;
- FILE *pidfp = NULL;
+ cmd_args_t *cmd_args = NULL;
+ int ret = -1;
+ FILE *pidfp = NULL;
- cmd_args = &ctx->cmd_args;
+ cmd_args = &ctx->cmd_args;
- if (!cmd_args->pid_file)
- return 0;
-
- pidfp = fopen (cmd_args->pid_file, "a+");
- if (!pidfp) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "pidfile %s error (%s)",
- cmd_args->pid_file, strerror (errno));
- return -1;
- }
-
- ret = lockf (fileno (pidfp), F_TLOCK, 0);
- if (ret) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "pidfile %s lock error (%s)",
- cmd_args->pid_file, strerror (errno));
- return ret;
- }
+ if (!cmd_args->pid_file)
+ return 0;
- gf_log ("glusterfsd", GF_LOG_TRACE,
- "pidfile %s lock acquired",
- cmd_args->pid_file);
+ pidfp = fopen(cmd_args->pid_file, "a+");
+ if (!pidfp) {
+ gf_smsg("glusterfsd", GF_LOG_ERROR, errno, glusterfsd_msg_17,
+ "pidfile=%s", cmd_args->pid_file, NULL);
+ goto out;
+ }
- ret = lockf (fileno (pidfp), F_ULOCK, 0);
- if (ret) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "pidfile %s unlock error (%s)",
- cmd_args->pid_file, strerror (errno));
- return ret;
- }
+ ctx->pidfp = pidfp;
- ctx->pidfp = pidfp;
+ ret = 0;
+out:
- return 0;
+ return ret;
}
-
int
-glusterfs_pidfile_cleanup (glusterfs_ctx_t *ctx)
+glusterfs_pidfile_cleanup(glusterfs_ctx_t *ctx)
{
- cmd_args_t *cmd_args = NULL;
+ cmd_args_t *cmd_args = NULL;
- cmd_args = &ctx->cmd_args;
+ cmd_args = &ctx->cmd_args;
- if (!ctx->pidfp)
- return 0;
+ if (!ctx->pidfp)
+ return 0;
- gf_log ("glusterfsd", GF_LOG_TRACE,
- "pidfile %s cleanup",
- cmd_args->pid_file);
+ gf_msg_trace("glusterfsd", 0, "pidfile %s cleanup", cmd_args->pid_file);
- if (ctx->cmd_args.pid_file) {
- unlink (ctx->cmd_args.pid_file);
- ctx->cmd_args.pid_file = NULL;
- }
+ if (ctx->cmd_args.pid_file) {
+ GF_FREE(ctx->cmd_args.pid_file);
+ ctx->cmd_args.pid_file = NULL;
+ }
- lockf (fileno (ctx->pidfp), F_ULOCK, 0);
- fclose (ctx->pidfp);
- ctx->pidfp = NULL;
+ lockf(fileno(ctx->pidfp), F_ULOCK, 0);
+ fclose(ctx->pidfp);
+ ctx->pidfp = NULL;
- return 0;
+ return 0;
}
int
-glusterfs_pidfile_update (glusterfs_ctx_t *ctx)
+glusterfs_pidfile_update(glusterfs_ctx_t *ctx, pid_t pid)
{
- cmd_args_t *cmd_args = NULL;
- int ret = 0;
- FILE *pidfp = NULL;
+ cmd_args_t *cmd_args = NULL;
+ int ret = 0;
+ FILE *pidfp = NULL;
- cmd_args = &ctx->cmd_args;
+ cmd_args = &ctx->cmd_args;
- pidfp = ctx->pidfp;
- if (!pidfp)
- return 0;
+ pidfp = ctx->pidfp;
+ if (!pidfp)
+ return 0;
- ret = lockf (fileno (pidfp), F_TLOCK, 0);
- if (ret) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "pidfile %s lock failed",
- cmd_args->pid_file);
- return ret;
- }
+ ret = lockf(fileno(pidfp), F_TLOCK, 0);
+ if (ret) {
+ gf_smsg("glusterfsd", GF_LOG_ERROR, errno, glusterfsd_msg_18,
+ "pidfile=%s", cmd_args->pid_file, NULL);
+ return ret;
+ }
- ret = ftruncate (fileno (pidfp), 0);
- if (ret) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "pidfile %s truncation failed",
- cmd_args->pid_file);
- return ret;
- }
+ ret = sys_ftruncate(fileno(pidfp), 0);
+ if (ret) {
+ gf_smsg("glusterfsd", GF_LOG_ERROR, errno, glusterfsd_msg_20,
+ "pidfile=%s", cmd_args->pid_file, NULL);
+ return ret;
+ }
- ret = fprintf (pidfp, "%d\n", getpid ());
- if (ret <= 0) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "pidfile %s write failed",
- cmd_args->pid_file);
- return ret;
- }
+ ret = fprintf(pidfp, "%d\n", pid);
+ if (ret <= 0) {
+ gf_smsg("glusterfsd", GF_LOG_ERROR, errno, glusterfsd_msg_21,
+ "pidfile=%s", cmd_args->pid_file, NULL);
+ return ret;
+ }
- ret = fflush (pidfp);
- if (ret) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "pidfile %s write failed",
- cmd_args->pid_file);
- return ret;
- }
+ ret = fflush(pidfp);
+ if (ret) {
+ gf_smsg("glusterfsd", GF_LOG_ERROR, errno, glusterfsd_msg_21,
+ "pidfile=%s", cmd_args->pid_file, NULL);
+ return ret;
+ }
- gf_log ("glusterfsd", GF_LOG_DEBUG,
- "pidfile %s updated with pid %d",
- cmd_args->pid_file, getpid ());
+ gf_msg_debug("glusterfsd", 0, "pidfile %s updated with pid %d",
+ cmd_args->pid_file, pid);
- return 0;
+ return 0;
}
-
void *
-glusterfs_sigwaiter (void *arg)
+glusterfs_sigwaiter(void *arg)
{
- sigset_t set;
- int ret = 0;
- int sig = 0;
+ sigset_t set;
+ int ret = 0;
+ int sig = 0;
+ char *file = NULL;
+
+ sigemptyset(&set);
+ sigaddset(&set, SIGINT); /* cleanup_and_exit */
+ sigaddset(&set, SIGTERM); /* cleanup_and_exit */
+ sigaddset(&set, SIGHUP); /* reincarnate */
+ sigaddset(&set, SIGUSR1); /* gf_proc_dump_info */
+ sigaddset(&set, SIGUSR2);
+
+ for (;;) {
+ ret = sigwait(&set, &sig);
+ if (ret)
+ continue;
+ switch (sig) {
+ case SIGINT:
+ case SIGTERM:
+ cleanup_and_exit(sig);
+ break;
+ case SIGHUP:
+ reincarnate(sig);
+ break;
+ case SIGUSR1:
+ gf_proc_dump_info(sig, glusterfsd_ctx);
+ break;
+ case SIGUSR2:
+ file = gf_monitor_metrics(glusterfsd_ctx);
- sigemptyset (&set);
- sigaddset (&set, SIGINT); /* cleanup_and_exit */
- sigaddset (&set, SIGTERM); /* cleanup_and_exit */
- sigaddset (&set, SIGHUP); /* reincarnate */
- sigaddset (&set, SIGUSR1); /* gf_proc_dump_info */
- sigaddset (&set, SIGUSR2); /* gf_latency_toggle */
+ /* Nothing needed to be done here */
+ GF_FREE(file);
- for (;;) {
- ret = sigwait (&set, &sig);
- if (ret)
- continue;
-
-
- switch (sig) {
- case SIGINT:
- case SIGTERM:
- cleanup_and_exit (sig);
- break;
- case SIGHUP:
- reincarnate (sig);
- break;
- case SIGUSR1:
- gf_proc_dump_info (sig);
- break;
- case SIGUSR2:
- gf_latency_toggle (sig);
- break;
- default:
-
- break;
- }
+ break;
+ default:
+
+ break;
}
+ }
- return NULL;
+ return NULL;
}
+void
+glusterfsd_print_trace(int signum)
+{
+ gf_print_trace(signum, glusterfsd_ctx);
+}
int
-glusterfs_signals_setup (glusterfs_ctx_t *ctx)
+glusterfs_signals_setup(glusterfs_ctx_t *ctx)
{
- sigset_t set;
- int ret = 0;
-
- sigemptyset (&set);
-
- /* common setting for all threads */
- signal (SIGSEGV, gf_print_trace);
- signal (SIGABRT, gf_print_trace);
- signal (SIGILL, gf_print_trace);
- signal (SIGTRAP, gf_print_trace);
- signal (SIGFPE, gf_print_trace);
- signal (SIGBUS, gf_print_trace);
- signal (SIGINT, cleanup_and_exit);
- signal (SIGPIPE, SIG_IGN);
-
- /* block these signals from non-sigwaiter threads */
- sigaddset (&set, SIGTERM); /* cleanup_and_exit */
- sigaddset (&set, SIGHUP); /* reincarnate */
- sigaddset (&set, SIGUSR1); /* gf_proc_dump_info */
- sigaddset (&set, SIGUSR2); /* gf_latency_toggle */
-
- ret = pthread_sigmask (SIG_BLOCK, &set, NULL);
- if (ret) {
- gf_log ("", GF_LOG_WARNING,
- "failed to execute pthread_signmask %s",
- strerror (errno));
- return ret;
- }
-
- ret = pthread_create (&ctx->sigwaiter, NULL, glusterfs_sigwaiter,
- (void *) &set);
- if (ret) {
- /*
- TODO:
- fallback to signals getting handled by other threads.
- setup the signal handlers
- */
- gf_log ("", GF_LOG_WARNING,
- "failed to create pthread %s",
- strerror (errno));
- return ret;
- }
-
+ sigset_t set;
+ int ret = 0;
+
+ sigemptyset(&set);
+
+ /* common setting for all threads */
+ signal(SIGSEGV, glusterfsd_print_trace);
+ signal(SIGABRT, glusterfsd_print_trace);
+ signal(SIGILL, glusterfsd_print_trace);
+ signal(SIGTRAP, glusterfsd_print_trace);
+ signal(SIGFPE, glusterfsd_print_trace);
+ signal(SIGBUS, glusterfsd_print_trace);
+ signal(SIGINT, cleanup_and_exit);
+ signal(SIGPIPE, SIG_IGN);
+
+ /* block these signals from non-sigwaiter threads */
+ sigaddset(&set, SIGTERM); /* cleanup_and_exit */
+ sigaddset(&set, SIGHUP); /* reincarnate */
+ sigaddset(&set, SIGUSR1); /* gf_proc_dump_info */
+ sigaddset(&set, SIGUSR2);
+
+ /* Signals needed for asynchronous framework. */
+ sigaddset(&set, GF_ASYNC_SIGQUEUE);
+ sigaddset(&set, GF_ASYNC_SIGCTRL);
+
+ ret = pthread_sigmask(SIG_BLOCK, &set, NULL);
+ if (ret) {
+ gf_smsg("glusterfsd", GF_LOG_WARNING, errno, glusterfsd_msg_22, NULL);
return ret;
-}
+ }
+
+ ret = gf_thread_create(&ctx->sigwaiter, NULL, glusterfs_sigwaiter,
+ (void *)&set, "sigwait");
+ if (ret) {
+ /*
+ TODO:
+ fallback to signals getting handled by other threads.
+ setup the signal handlers
+ */
+ gf_smsg("glusterfsd", GF_LOG_WARNING, errno, glusterfsd_msg_23, NULL);
+ return ret;
+ }
+ return ret;
+}
int
-daemonize (glusterfs_ctx_t *ctx)
+daemonize(glusterfs_ctx_t *ctx)
{
- int ret = -1;
- cmd_args_t *cmd_args = NULL;
- int cstatus = 0;
-
- cmd_args = &ctx->cmd_args;
-
- ret = glusterfs_pidfile_setup (ctx);
- if (ret)
- goto out;
-
- if (cmd_args->no_daemon_mode)
- goto postfork;
-
- if (cmd_args->debug_mode)
- goto postfork;
-
- ret = os_daemon_return (0, 0);
- switch (ret) {
+ int ret = -1;
+ cmd_args_t *cmd_args = NULL;
+ int cstatus = 0;
+ int err = 1;
+ int child_pid = 0;
+
+ cmd_args = &ctx->cmd_args;
+
+ ret = glusterfs_pidfile_setup(ctx);
+ if (ret)
+ goto out;
+
+ if (cmd_args->no_daemon_mode) {
+ goto postfork;
+ }
+
+ if (cmd_args->debug_mode)
+ goto postfork;
+
+ ret = pipe(ctx->daemon_pipe);
+ if (ret) {
+ /* If pipe() fails, retain daemon_pipe[] = {-1, -1}
+ and parent will just not wait for child status
+ */
+ ctx->daemon_pipe[0] = -1;
+ ctx->daemon_pipe[1] = -1;
+ }
+
+ ret = os_daemon_return(0, 0);
+ switch (ret) {
case -1:
- gf_log ("daemonize", GF_LOG_ERROR,
- "Daemonization failed: %s", strerror(errno));
- goto out;
+ if (ctx->daemon_pipe[0] != -1) {
+ sys_close(ctx->daemon_pipe[0]);
+ sys_close(ctx->daemon_pipe[1]);
+ }
+
+ gf_smsg("daemonize", GF_LOG_ERROR, errno, glusterfsd_msg_24, NULL);
+ goto out;
case 0:
- break;
+ /* child */
+ /* close read */
+ sys_close(ctx->daemon_pipe[0]);
+ break;
default:
- if (ctx->mnt_pid > 0) {
- ret = waitpid (ctx->mnt_pid, &cstatus, 0);
- if (!(ret == ctx->mnt_pid && cstatus == 0)) {
- gf_log ("daemonize", GF_LOG_ERROR,
- "mount failed");
- exit (1);
- }
+ /* parent */
+ /* close write */
+ child_pid = ret;
+ sys_close(ctx->daemon_pipe[1]);
+
+ if (ctx->mnt_pid > 0) {
+ ret = waitpid(ctx->mnt_pid, &cstatus, 0);
+ if (!(ret == ctx->mnt_pid)) {
+ if (WIFEXITED(cstatus)) {
+ err = WEXITSTATUS(cstatus);
+ } else {
+ err = cstatus;
+ }
+ gf_smsg("daemonize", GF_LOG_ERROR, 0, glusterfsd_msg_25,
+ NULL);
+ exit(err);
}
- _exit (0);
- }
+ }
+ sys_read(ctx->daemon_pipe[0], (void *)&err, sizeof(err));
+ /* NOTE: Only the least significant 8 bits i.e (err & 255)
+ will be available to parent process on calling exit() */
+ if (err)
+ _exit(abs(err));
+
+ /* Update pid in parent only for glusterd process */
+ if (ctx->process_mode == GF_GLUSTERD_PROCESS) {
+ ret = glusterfs_pidfile_update(ctx, child_pid);
+ if (ret)
+ exit(1);
+ }
+ _exit(0);
+ }
postfork:
- ret = glusterfs_pidfile_update (ctx);
+ /* Update pid in child either process_mode is not belong to glusterd
+ or process is spawned in no daemon mode
+ */
+ if ((ctx->process_mode != GF_GLUSTERD_PROCESS) ||
+ (cmd_args->no_daemon_mode)) {
+ ret = glusterfs_pidfile_update(ctx, getpid());
if (ret)
- goto out;
+ goto out;
+ }
+ gf_log("glusterfs", GF_LOG_INFO, "Pid of current running process is %d",
+ getpid());
+ ret = gf_log_inject_timer_event(ctx);
- glusterfs_signals_setup (ctx);
+ glusterfs_signals_setup(ctx);
out:
- return ret;
+ return ret;
}
+#ifdef GF_LINUX_HOST_OS
+static int
+set_oom_score_adj(glusterfs_ctx_t *ctx)
+{
+ int ret = -1;
+ cmd_args_t *cmd_args = NULL;
+ int fd = -1;
+ size_t oom_score_len = 0;
+ struct oom_api_info *api = NULL;
+
+ cmd_args = &ctx->cmd_args;
+
+ if (!cmd_args->oom_score_adj)
+ goto success;
+
+ api = get_oom_api_info();
+ if (!api)
+ goto out;
+
+ fd = open(api->oom_api_file, O_WRONLY);
+ if (fd < 0)
+ goto out;
+
+ oom_score_len = strlen(cmd_args->oom_score_adj);
+ if (sys_write(fd, cmd_args->oom_score_adj, oom_score_len) !=
+ oom_score_len) {
+ sys_close(fd);
+ goto out;
+ }
+
+ if (sys_close(fd) < 0)
+ goto out;
+
+success:
+ ret = 0;
+
+out:
+ return ret;
+}
+#endif
int
-glusterfs_process_volfp (glusterfs_ctx_t *ctx, FILE *fp)
+glusterfs_process_volfp(glusterfs_ctx_t *ctx, FILE *fp)
{
- glusterfs_graph_t *graph = NULL;
- int ret = -1;
- xlator_t *trav = NULL;
+ glusterfs_graph_t *graph = NULL;
+ int ret = -1;
+ xlator_t *trav = NULL;
- graph = glusterfs_graph_construct (fp);
- if (!graph) {
- gf_log ("", GF_LOG_ERROR, "failed to construct the graph");
- goto out;
- }
+ if (!ctx)
+ return -1;
- for (trav = graph->first; trav; trav = trav->next) {
- if (strcmp (trav->type, "mount/fuse") == 0) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "fuse xlator cannot be specified "
- "in volume file");
- goto out;
- }
- }
+ graph = glusterfs_graph_construct(fp);
+ if (!graph) {
+ gf_smsg("", GF_LOG_ERROR, 0, glusterfsd_msg_26, NULL);
+ goto out;
+ }
- ret = glusterfs_graph_prepare (graph, ctx);
- if (ret) {
- glusterfs_graph_destroy (graph);
- goto out;
+ for (trav = graph->first; trav; trav = trav->next) {
+ if (strcmp(trav->type, "mount/fuse") == 0) {
+ gf_smsg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_27, NULL);
+ goto out;
}
+ }
- ret = glusterfs_graph_activate (graph, ctx);
+ xlator_t *xl = graph->first;
+ if (xl && (strcmp(xl->type, "protocol/server") == 0)) {
+ (void)copy_opts_to_child(xl, FIRST_CHILD(xl), "*auth*");
+ }
- if (ret) {
- glusterfs_graph_destroy (graph);
- goto out;
- }
+ ret = glusterfs_graph_prepare(graph, ctx, ctx->cmd_args.volume_name);
+ if (ret) {
+ goto out;
+ }
- gf_log_volume_file (fp);
+ ret = glusterfs_graph_activate(graph, ctx);
- ret = 0;
+ if (ret) {
+ goto out;
+ }
+
+ gf_log_dump_graph(fp, graph);
+
+ ret = 0;
out:
- if (fp)
- fclose (fp);
+ if (fp)
+ fclose(fp);
+
+ if (ret) {
+ /* TODO This code makes to generic for all graphs
+ client as well as servers.For now it destroys
+ graph only for server-side xlators not for client-side
+ xlators, before destroying a graph call xlator fini for
+ xlators those call xlator_init to avoid leak
+ */
+ if (graph) {
+ xl = graph->first;
+ if ((ctx->active != graph) &&
+ (xl && !strcmp(xl->type, "protocol/server"))) {
+ /* Take dict ref for every graph xlator to avoid dict leak
+ at the time of graph destroying
+ */
+ glusterfs_graph_fini(graph);
+ glusterfs_graph_destroy(graph);
+ }
+ }
- if (ret && !ctx->active) {
- /* there is some error in setting up the first graph itself */
- cleanup_and_exit (0);
+ /* there is some error in setting up the first graph itself */
+ if (!ctx->active) {
+ emancipate(ctx, ret);
+ cleanup_and_exit(ret);
}
+ }
- return ret;
+ return ret;
}
-
int
-glusterfs_volumes_init (glusterfs_ctx_t *ctx)
+glusterfs_volumes_init(glusterfs_ctx_t *ctx)
{
- FILE *fp = NULL;
- cmd_args_t *cmd_args = NULL;
- int ret = 0;
+ FILE *fp = NULL;
+ cmd_args_t *cmd_args = NULL;
+ int ret = 0;
- cmd_args = &ctx->cmd_args;
+ cmd_args = &ctx->cmd_args;
- if (cmd_args->sock_file) {
- ret = glusterfs_listener_init (ctx);
- if (ret)
- goto out;
- }
+ if (cmd_args->sock_file) {
+ ret = glusterfs_listener_init(ctx);
+ if (ret)
+ goto out;
+ }
- if (cmd_args->volfile_server) {
- ret = glusterfs_mgmt_init (ctx);
- goto out;
- }
+ if (cmd_args->volfile_server) {
+ ret = glusterfs_mgmt_init(ctx);
+ /* return, do not emancipate() yet */
+ return ret;
+ }
- fp = get_volfp (ctx);
+ fp = get_volfp(ctx);
- if (!fp) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "Cannot reach volume specification file");
- ret = -1;
- goto out;
- }
+ if (!fp) {
+ gf_smsg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_28, NULL);
+ ret = -1;
+ goto out;
+ }
- ret = glusterfs_process_volfp (ctx, fp);
- if (ret)
- goto out;
+ ret = glusterfs_process_volfp(ctx, fp);
+ if (ret)
+ goto out;
out:
- return ret;
+ emancipate(ctx, ret);
+ return ret;
}
+/* This is the only legal global pointer */
+glusterfs_ctx_t *glusterfsd_ctx;
int
-main (int argc, char *argv[])
+main(int argc, char *argv[])
{
- glusterfs_ctx_t *ctx = NULL;
- int ret = -1;
-
- ret = glusterfs_globals_init ();
- if (ret)
- return ret;
+ glusterfs_ctx_t *ctx = NULL;
+ int ret = -1;
+ char cmdlinestr[PATH_MAX] = {
+ 0,
+ };
+ cmd_args_t *cmd = NULL;
+
+ gf_check_and_set_mem_acct(argc, argv);
+
+ ctx = glusterfs_ctx_new();
+ if (!ctx) {
+ gf_smsg("glusterfs", GF_LOG_CRITICAL, 0, glusterfsd_msg_29, NULL);
+ return ENOMEM;
+ }
+ glusterfsd_ctx = ctx;
+
+ ret = glusterfs_globals_init(ctx);
+ if (ret)
+ return ret;
- ctx = glusterfs_ctx_get ();
- if (!ctx) {
- gf_log ("glusterfs", GF_LOG_CRITICAL,
- "ERROR: glusterfs context not initialized");
- return ENOMEM;
- }
-#ifndef DEBUG
- /* Enable memory accounting on the fly based on argument */
- gf_check_and_set_mem_acct (argc, argv, ctx);
-#endif
- ret = glusterfs_ctx_defaults_init (ctx);
- if (ret)
+ THIS->ctx = ctx;
+
+ ret = glusterfs_ctx_defaults_init(ctx);
+ if (ret)
+ goto out;
+
+ ret = parse_cmdline(argc, argv, ctx);
+ if (ret)
+ goto out;
+ cmd = &ctx->cmd_args;
+
+ if (cmd->print_xlatordir) {
+ /* XLATORDIR passed through a -D flag to GCC */
+ printf("%s\n", XLATORDIR);
+ goto out;
+ }
+
+ if (cmd->print_statedumpdir) {
+ printf("%s\n", DEFAULT_VAR_RUN_DIRECTORY);
+ goto out;
+ }
+
+ if (cmd->print_logdir) {
+ printf("%s\n", DEFAULT_LOG_FILE_DIRECTORY);
+ goto out;
+ }
+
+ if (cmd->print_libexecdir) {
+ printf("%s\n", LIBEXECDIR);
+ goto out;
+ }
+
+ if (cmd->print_netgroups) {
+ /* If this option is set we want to print & verify the file,
+ * set the return value (exit code in this case) and exit.
+ */
+ ret = print_netgroups_file(cmd->print_netgroups);
+ goto out;
+ }
+
+ if (cmd->print_exports) {
+ /* If this option is set we want to print & verify the file,
+ * set the return value (exit code in this case)
+ * and exit.
+ */
+ ret = print_exports_file(cmd->print_exports);
+ goto out;
+ }
+
+ ret = logging_init(ctx, argv[0]);
+ if (ret)
+ goto out;
+
+ /* set brick_mux mode only for server process */
+ if ((ctx->process_mode != GF_SERVER_PROCESS) && cmd->brick_mux) {
+ gf_smsg("glusterfs", GF_LOG_CRITICAL, 0, glusterfsd_msg_43, NULL);
+ goto out;
+ }
+
+ /* log the version of glusterfs running here along with the actual
+ command line options. */
+ {
+ int i = 0;
+ int pos = 0;
+ int len = snprintf(cmdlinestr, sizeof(cmdlinestr), "%s", argv[0]);
+ for (i = 1; (i < argc) && (len > 0); i++) {
+ pos += len;
+ len = snprintf(cmdlinestr + pos, sizeof(cmdlinestr) - pos, " %s",
+ argv[i]);
+ if ((len <= 0) || (len >= (sizeof(cmdlinestr) - pos))) {
+ gf_smsg("glusterfs", GF_LOG_ERROR, 0, glusterfsd_msg_029, NULL);
+ ret = -1;
goto out;
+ }
+ }
+ gf_smsg(argv[0], GF_LOG_INFO, 0, glusterfsd_msg_30, "arg=%s", argv[0],
+ "version=%s", PACKAGE_VERSION, "cmdlinestr=%s", cmdlinestr,
+ NULL);
- ret = parse_cmdline (argc, argv, ctx);
- if (ret)
- goto out;
+ ctx->cmdlinestr = gf_strdup(cmdlinestr);
+ }
- ret = logging_init (ctx);
- if (ret)
- goto out;
+ gf_proc_dump_init();
- /* log the version of glusterfs running here */
- gf_log (argv[0], GF_LOG_INFO,
- "Started running %s version %s",
- argv[0], PACKAGE_VERSION);
+ ret = create_fuse_mount(ctx);
+ if (ret)
+ goto out;
- gf_proc_dump_init();
+ ret = daemonize(ctx);
+ if (ret)
+ goto out;
- ret = create_fuse_mount (ctx);
- if (ret)
- goto out;
+ /*
+ * If we do this before daemonize, the pool-sweeper thread dies with
+ * the parent, but we want to do it as soon as possible after that in
+ * case something else depends on pool allocations.
+ */
+ mem_pools_init();
- ret = daemonize (ctx);
- if (ret)
- goto out;
+ ret = gf_async_init(ctx);
+ if (ret < 0) {
+ goto out;
+ }
- ctx->env = syncenv_new (0);
- if (!ctx->env) {
- gf_log ("", GF_LOG_ERROR,
- "Could not create new sync-environment");
- goto out;
- }
+#ifdef GF_LINUX_HOST_OS
+ ret = set_oom_score_adj(ctx);
+ if (ret)
+ goto out;
+#endif
- ret = glusterfs_volumes_init (ctx);
- if (ret)
- goto out;
+ ctx->env = syncenv_new(0, 0, 0);
+ if (!ctx->env) {
+ gf_smsg("", GF_LOG_ERROR, 0, glusterfsd_msg_31, NULL);
+ goto out;
+ }
- ret = event_dispatch (ctx->event_pool);
+ /* do this _after_ daemonize() */
+ if (!glusterfs_ctx_tw_get(ctx)) {
+ ret = -1;
+ goto out;
+ }
-out:
-// glusterfs_ctx_destroy (ctx);
+ ret = glusterfs_volumes_init(ctx);
+ if (ret)
+ goto out;
- return ret;
+ ret = gf_event_dispatch(ctx->event_pool);
+
+out:
+ // glusterfs_ctx_destroy (ctx);
+ gf_async_fini();
+ return ret;
}
diff --git a/glusterfsd/src/glusterfsd.h b/glusterfsd/src/glusterfsd.h
index b742df2d9f9..4e1413caa70 100644
--- a/glusterfsd/src/glusterfsd.h
+++ b/glusterfsd/src/glusterfsd.h
@@ -1,114 +1,142 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __GLUSTERFSD_H__
#define __GLUSTERFSD_H__
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
#include "rpcsvc.h"
#include "glusterd1-xdr.h"
-#define DEFAULT_GLUSTERD_VOLFILE CONFDIR "/glusterd.vol"
-#define DEFAULT_CLIENT_VOLFILE CONFDIR "/glusterfs.vol"
-#define DEFAULT_SERVER_VOLFILE CONFDIR "/glusterfsd.vol"
-#define DEFAULT_LOG_FILE_DIRECTORY DATADIR "/log/glusterfs"
-#define DEFAULT_LOG_LEVEL GF_LOG_INFO
+#define DEFAULT_GLUSTERD_VOLFILE CONFDIR "/glusterd.vol"
+#define DEFAULT_CLIENT_VOLFILE CONFDIR "/glusterfs.vol"
+#define DEFAULT_SERVER_VOLFILE CONFDIR "/glusterfsd.vol"
-#define DEFAULT_EVENT_POOL_SIZE 16384
+#define DEFAULT_EVENT_POOL_SIZE 16384
-#define ARGP_LOG_LEVEL_NONE_OPTION "NONE"
-#define ARGP_LOG_LEVEL_TRACE_OPTION "TRACE"
-#define ARGP_LOG_LEVEL_CRITICAL_OPTION "CRITICAL"
-#define ARGP_LOG_LEVEL_ERROR_OPTION "ERROR"
-#define ARGP_LOG_LEVEL_WARNING_OPTION "WARNING"
-#define ARGP_LOG_LEVEL_INFO_OPTION "INFO"
-#define ARGP_LOG_LEVEL_DEBUG_OPTION "DEBUG"
+#define ARGP_LOG_LEVEL_NONE_OPTION "NONE"
+#define ARGP_LOG_LEVEL_TRACE_OPTION "TRACE"
+#define ARGP_LOG_LEVEL_CRITICAL_OPTION "CRITICAL"
+#define ARGP_LOG_LEVEL_ERROR_OPTION "ERROR"
+#define ARGP_LOG_LEVEL_WARNING_OPTION "WARNING"
+#define ARGP_LOG_LEVEL_INFO_OPTION "INFO"
+#define ARGP_LOG_LEVEL_DEBUG_OPTION "DEBUG"
-#define ENABLE_NO_DAEMON_MODE 1
-#define ENABLE_DEBUG_MODE 1
+#define ENABLE_NO_DAEMON_MODE 1
+#define ENABLE_DEBUG_MODE 1
-#define GF_MEMPOOL_COUNT_OF_DICT_T 4096
+#define GF_MEMPOOL_COUNT_OF_DICT_T 4096
/* Considering 4 key/value pairs in a dictionary on an average */
-#define GF_MEMPOOL_COUNT_OF_DATA_T (GF_MEMPOOL_COUNT_OF_DICT_T * 4)
-#define GF_MEMPOOL_COUNT_OF_DATA_PAIR_T (GF_MEMPOOL_COUNT_OF_DICT_T * 4)
+#define GF_MEMPOOL_COUNT_OF_DATA_T (GF_MEMPOOL_COUNT_OF_DICT_T * 4)
+#define GF_MEMPOOL_COUNT_OF_DATA_PAIR_T (GF_MEMPOOL_COUNT_OF_DICT_T * 4)
+
+#define GF_MEMPOOL_COUNT_OF_LRU_BUF_T 256
enum argp_option_keys {
- ARGP_VOLFILE_SERVER_KEY = 's',
- ARGP_VOLUME_FILE_KEY = 'f',
- ARGP_LOG_LEVEL_KEY = 'L',
- ARGP_LOG_FILE_KEY = 'l',
- ARGP_VOLFILE_SERVER_PORT_KEY = 131,
- ARGP_VOLFILE_SERVER_TRANSPORT_KEY = 132,
- ARGP_PID_FILE_KEY = 'p',
- ARGP_SOCK_FILE_KEY = 'S',
- ARGP_NO_DAEMON_KEY = 'N',
- ARGP_RUN_ID_KEY = 'r',
- ARGP_DEBUG_KEY = 133,
- ARGP_ENTRY_TIMEOUT_KEY = 135,
- ARGP_ATTRIBUTE_TIMEOUT_KEY = 136,
- ARGP_VOLUME_NAME_KEY = 137,
- ARGP_XLATOR_OPTION_KEY = 138,
- ARGP_DIRECT_IO_MODE_KEY = 139,
+ ARGP_VOLFILE_SERVER_KEY = 's',
+ ARGP_VOLUME_FILE_KEY = 'f',
+ ARGP_LOG_LEVEL_KEY = 'L',
+ ARGP_LOG_FILE_KEY = 'l',
+ ARGP_VOLFILE_SERVER_PORT_KEY = 131,
+ ARGP_VOLFILE_SERVER_TRANSPORT_KEY = 132,
+ ARGP_PID_FILE_KEY = 'p',
+ ARGP_SOCK_FILE_KEY = 'S',
+ ARGP_NO_DAEMON_KEY = 'N',
+ ARGP_RUN_ID_KEY = 'r',
+ ARGP_PRINT_NETGROUPS = 'n',
+ ARGP_PRINT_EXPORTS = 'e',
+ ARGP_DEBUG_KEY = 133,
+ ARGP_NEGATIVE_TIMEOUT_KEY = 134,
+ ARGP_ENTRY_TIMEOUT_KEY = 135,
+ ARGP_ATTRIBUTE_TIMEOUT_KEY = 136,
+ ARGP_VOLUME_NAME_KEY = 137,
+ ARGP_XLATOR_OPTION_KEY = 138,
+ ARGP_DIRECT_IO_MODE_KEY = 139,
#ifdef GF_DARWIN_HOST_OS
- ARGP_NON_LOCAL_KEY = 140,
+ ARGP_NON_LOCAL_KEY = 140,
#endif /* DARWIN */
- ARGP_VOLFILE_ID_KEY = 143,
- ARGP_VOLFILE_CHECK_KEY = 144,
- ARGP_VOLFILE_MAX_FETCH_ATTEMPTS = 145,
- ARGP_LOG_SERVER_KEY = 146,
- ARGP_LOG_SERVER_PORT_KEY = 147,
- ARGP_READ_ONLY_KEY = 148,
- ARGP_MAC_COMPAT_KEY = 149,
- ARGP_DUMP_FUSE_KEY = 150,
- ARGP_BRICK_NAME_KEY = 151,
- ARGP_BRICK_PORT_KEY = 152,
- ARGP_CLIENT_PID_KEY = 153,
- ARGP_ACL_KEY = 154,
- ARGP_WORM_KEY = 155,
- ARGP_USER_MAP_ROOT_KEY = 156,
- ARGP_MEM_ACCOUNTING_KEY = 157,
- ARGP_SELINUX_KEY = 158,
- ARGP_INODE32_KEY = 159, /* 163 upstream */
- ARGP_GID_TIMEOUT_KEY = 160,
- ARGP_FUSE_BACKGROUND_QLEN_KEY = 161,
- ARGP_FUSE_CONGESTION_THRESHOLD_KEY = 162,
+ ARGP_VOLFILE_ID_KEY = 143,
+ ARGP_VOLFILE_CHECK_KEY = 144,
+ ARGP_VOLFILE_MAX_FETCH_ATTEMPTS = 145,
+ ARGP_LOG_SERVER_KEY = 146,
+ ARGP_LOG_SERVER_PORT_KEY = 147,
+ ARGP_READ_ONLY_KEY = 148,
+ ARGP_MAC_COMPAT_KEY = 149,
+ ARGP_DUMP_FUSE_KEY = 150,
+ ARGP_BRICK_NAME_KEY = 151,
+ ARGP_BRICK_PORT_KEY = 152,
+ ARGP_CLIENT_PID_KEY = 153,
+ ARGP_ACL_KEY = 154,
+ ARGP_WORM_KEY = 155,
+ ARGP_USER_MAP_ROOT_KEY = 156,
+ ARGP_MEM_ACCOUNTING_KEY = 157,
+ ARGP_SELINUX_KEY = 158,
+ ARGP_FOPEN_KEEP_CACHE_KEY = 159,
+ ARGP_GID_TIMEOUT_KEY = 160,
+ ARGP_FUSE_BACKGROUND_QLEN_KEY = 161,
+ ARGP_FUSE_CONGESTION_THRESHOLD_KEY = 162,
+ ARGP_INODE32_KEY = 163,
+ ARGP_FUSE_MOUNTOPTS_KEY = 164,
+ ARGP_FUSE_USE_READDIRP_KEY = 165,
+ ARGP_AUX_GFID_MOUNT_KEY = 166,
+ ARGP_FUSE_NO_ROOT_SQUASH_KEY = 167,
+ ARGP_LOGGER = 168,
+ ARGP_LOG_FORMAT = 169,
+ ARGP_LOG_BUF_SIZE = 170,
+ ARGP_LOG_FLUSH_TIMEOUT = 171,
+ ARGP_SECURE_MGMT_KEY = 172,
+ ARGP_GLOBAL_TIMER_WHEEL = 173,
+ ARGP_RESOLVE_GIDS_KEY = 174,
+ ARGP_CAPABILITY_KEY = 175,
+#ifdef GF_LINUX_HOST_OS
+ ARGP_OOM_SCORE_ADJ_KEY = 176,
+#endif
+ ARGP_LOCALTIME_LOGGING_KEY = 177,
+ ARGP_SUBDIR_MOUNT_KEY = 178,
+ ARGP_PROCESS_NAME_KEY = 179,
+ ARGP_FUSE_EVENT_HISTORY_KEY = 180,
+ ARGP_THIN_CLIENT_KEY = 181,
+ ARGP_READER_THREAD_COUNT_KEY = 182,
+ ARGP_PRINT_XLATORDIR_KEY = 183,
+ ARGP_PRINT_STATEDUMPDIR_KEY = 184,
+ ARGP_PRINT_LOGDIR_KEY = 185,
+ ARGP_KERNEL_WRITEBACK_CACHE_KEY = 186,
+ ARGP_ATTR_TIMES_GRANULARITY_KEY = 187,
+ ARGP_PRINT_LIBEXECDIR_KEY = 188,
+ ARGP_FUSE_FLUSH_HANDLE_INTERRUPT_KEY = 189,
+ ARGP_FUSE_LRU_LIMIT_KEY = 190,
+ ARGP_FUSE_AUTO_INVAL_KEY = 191,
+ ARGP_GLOBAL_THREADING_KEY = 192,
+ ARGP_BRICK_MUX_KEY = 193,
+ ARGP_FUSE_DEV_EPERM_RATELIMIT_NS_KEY = 194,
+ ARGP_FUSE_INVALIDATE_LIMIT_KEY = 195,
};
-struct _gfd_vol_top_priv_t {
- rpcsvc_request_t *req;
- gd1_mgmt_brick_op_req xlator_req;
- uint32_t blk_count;
- uint32_t blk_size;
- double throughput;
- double time;
- int32_t ret;
+struct _gfd_vol_top_priv {
+ rpcsvc_request_t *req;
+ gd1_mgmt_brick_op_req xlator_req;
+ uint32_t blk_count;
+ uint32_t blk_size;
+ double throughput;
+ double time;
+ int32_t ret;
};
-typedef struct _gfd_vol_top_priv_t gfd_vol_top_priv_t;
+typedef struct _gfd_vol_top_priv gfd_vol_top_priv_t;
+
+int
+glusterfs_mgmt_pmap_signin(glusterfs_ctx_t *ctx);
+int
+glusterfs_volfile_fetch(glusterfs_ctx_t *ctx);
+void
+cleanup_and_exit(int signum);
-int glusterfs_mgmt_pmap_signout (glusterfs_ctx_t *ctx);
-int glusterfs_mgmt_pmap_signin (glusterfs_ctx_t *ctx);
-int glusterfs_volfile_fetch (glusterfs_ctx_t *ctx);
-void cleanup_and_exit (int signum);
+void
+xlator_mem_cleanup(xlator_t *this);
-void *glusterfs_volume_top_read_perf (void *args);
-void *glusterfs_volume_top_write_perf (void *args);
+extern glusterfs_ctx_t *glusterfsd_ctx;
#endif /* __GLUSTERFSD_H__ */
diff --git a/scheduler/nufa/Makefile.am b/heal/Makefile.am
index d471a3f9243..a985f42a877 100644
--- a/scheduler/nufa/Makefile.am
+++ b/heal/Makefile.am
@@ -1,3 +1,3 @@
SUBDIRS = src
-CLEANFILES =
+CLEANFILES =
diff --git a/heal/src/Makefile.am b/heal/src/Makefile.am
new file mode 100644
index 00000000000..aa18d3eff88
--- /dev/null
+++ b/heal/src/Makefile.am
@@ -0,0 +1,29 @@
+if WITH_SERVER
+scriptdir = $(GLUSTERFS_LIBEXECDIR)
+script_PROGRAMS = glfsheal
+endif
+
+glfsheal_SOURCES = glfs-heal.c
+
+glfsheal_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la $(GF_LDADD)\
+ $(RLLIBS) $(top_builddir)/rpc/xdr/src/libgfxdr.la \
+ $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la \
+ $(top_builddir)/api/src/libgfapi.la \
+ $(XML_LIBS) $(GFAPI_LIBS) $(UUID_LIBS)
+
+glfsheal_LDFLAGS = $(GF_LDFLAGS)
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) \
+ -I$(top_srcdir)/xlators/lib/src\
+ -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/rpc/rpc-lib/src\
+ -I$(top_srcdir)/rpc/xdr/src\
+ -I$(top_builddir)/rpc/xdr/src\
+ -I$(top_srcdir)/api/src\
+ -DDATADIR=\"$(localstatedir)\"
+
+AM_CFLAGS = -Wall $(GF_CFLAGS) $(XML_CFLAGS)
+
+CLEANFILES =
+
+$(top_builddir)/libglusterfs/src/libglusterfs.la:
+ $(MAKE) -C $(top_builddir)/libglusterfs/src/ all
diff --git a/heal/src/glfs-heal.c b/heal/src/glfs-heal.c
new file mode 100644
index 00000000000..bf4b47f8760
--- /dev/null
+++ b/heal/src/glfs-heal.c
@@ -0,0 +1,1793 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include "glfs.h"
+#include "glfs-internal.h"
+#include "protocol-common.h"
+#include <glusterfs/syscall.h>
+#include <glusterfs/syncop.h>
+#include <glusterfs/syncop-utils.h>
+#include <string.h>
+#include <glusterfs/glusterfs.h>
+#include <libgen.h>
+
+#if (HAVE_LIB_XML)
+#include <libxml/encoding.h>
+#include <libxml/xmlwriter.h>
+
+xmlTextWriterPtr glfsh_writer;
+xmlDocPtr glfsh_doc = NULL;
+#endif
+
+#define XML_RET_CHECK_AND_GOTO(ret, label) \
+ do { \
+ if (ret < 0) { \
+ ret = -1; \
+ goto label; \
+ } else \
+ ret = 0; \
+ } while (0)
+
+#define MODE_XML (1 << 0)
+#define MODE_NO_LOG (1 << 1)
+
+typedef struct num_entries {
+ uint64_t num_entries;
+ uint64_t pending_entries;
+ uint64_t spb_entries;
+ uint64_t possibly_healing_entries;
+} num_entries_t;
+
+typedef int (*print_status)(dict_t *, char *, uuid_t, num_entries_t *,
+ gf_boolean_t flag);
+
+int
+glfsh_heal_splitbrain_file(glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc,
+ char *file, dict_t *xattr_req);
+
+typedef struct glfs_info {
+ int (*init)(void);
+ int (*print_brick_from_xl)(xlator_t *xl, loc_t *rootloc);
+ int (*print_heal_op_status)(int ret, uint64_t num_entries, char *fmt_str);
+ int (*print_heal_op_summary)(int ret, num_entries_t *num_entries);
+ int (*print_heal_status)(char *path, uuid_t gfid, char *status);
+ int (*print_spb_status)(char *path, uuid_t gfid, char *status);
+ int (*end)(int op_ret, char *op_errstr);
+} glfsh_info_t;
+
+glfsh_info_t *glfsh_output = NULL;
+int32_t is_xml;
+
+#define DEFAULT_HEAL_LOG_FILE_DIRECTORY DATADIR "/log/glusterfs"
+#define USAGE_STR \
+ "Usage: %s <VOLNAME> [bigger-file <FILE> | " \
+ "latest-mtime <FILE> | " \
+ "source-brick <HOSTNAME:BRICKNAME> [<FILE>] | " \
+ "split-brain-info | info-summary] [glusterd-sock <FILE>" \
+ "]\n"
+
+typedef enum {
+ GLFSH_MODE_CONTINUE_ON_ERROR = 1,
+ GLFSH_MODE_EXIT_ON_FIRST_FAILURE,
+} glfsh_fail_mode_t;
+
+int
+glfsh_init()
+{
+ return 0;
+}
+
+int
+glfsh_end_op_granular_entry_heal(int op_ret, char *op_errstr)
+{
+ /* If error string is available, give it higher precedence.*/
+
+ if (op_errstr) {
+ printf("%s\n", op_errstr);
+ } else if (op_ret < 0) {
+ if (op_ret == -EAGAIN)
+ printf(
+ "One or more entries need heal. Please execute "
+ "the command again after there are no entries "
+ "to be healed\n");
+ else if (op_ret == -ENOTCONN)
+ printf(
+ "One or more bricks could be down. Please "
+ "execute the command again after bringing all "
+ "bricks online and finishing any pending "
+ "heals\n");
+ else
+ printf(
+ "Command failed - %s. Please check the logs for"
+ " more details\n",
+ strerror(-op_ret));
+ }
+ return 0;
+}
+
+int
+glfsh_end(int op_ret, char *op_errstr)
+{
+ if (op_errstr)
+ printf("%s\n", op_errstr);
+ return 0;
+}
+
+int
+glfsh_print_hr_spb_status(char *path, uuid_t gfid, char *status)
+{
+ printf("%s\n", path);
+ fflush(stdout);
+ return 0;
+}
+
+int
+glfsh_no_print_hr_status(char *path, uuid_t gfid, char *status)
+{
+ return 0;
+}
+
+int
+glfsh_print_hr_heal_status(char *path, uuid_t gfid, char *status)
+{
+ printf("%s%s\n", path, status);
+ fflush(stdout);
+ return 0;
+}
+
+#if (HAVE_LIB_XML)
+
+int
+glfsh_xml_init()
+{
+ int ret = -1;
+ glfsh_writer = xmlNewTextWriterDoc(&glfsh_doc, 0);
+ if (glfsh_writer == NULL) {
+ return -1;
+ }
+
+ ret = xmlTextWriterStartDocument(glfsh_writer, "1.0", "UTF-8", "yes");
+ XML_RET_CHECK_AND_GOTO(ret, xml_out);
+
+ /* <cliOutput> */
+ ret = xmlTextWriterStartElement(glfsh_writer, (xmlChar *)"cliOutput");
+ XML_RET_CHECK_AND_GOTO(ret, xml_out);
+
+ /* <healInfo> */
+ ret = xmlTextWriterStartElement(glfsh_writer, (xmlChar *)"healInfo");
+ XML_RET_CHECK_AND_GOTO(ret, xml_out);
+ /* <bricks> */
+ ret = xmlTextWriterStartElement(glfsh_writer, (xmlChar *)"bricks");
+ XML_RET_CHECK_AND_GOTO(ret, xml_out);
+ xmlTextWriterFlush(glfsh_writer);
+xml_out:
+ return ret;
+}
+
+int
+glfsh_xml_end(int op_ret, char *op_errstr)
+{
+ int ret = -1;
+ int op_errno = 0;
+ gf_boolean_t alloc = _gf_false;
+
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ op_ret = -1;
+ if (op_errstr == NULL) {
+ op_errstr = gf_strdup(strerror(op_errno));
+ alloc = _gf_true;
+ }
+ } else {
+ op_errstr = NULL;
+ }
+
+ /* </bricks> */
+ ret = xmlTextWriterEndElement(glfsh_writer);
+ XML_RET_CHECK_AND_GOTO(ret, xml_out);
+
+ /* </healInfo> */
+ ret = xmlTextWriterEndElement(glfsh_writer);
+ XML_RET_CHECK_AND_GOTO(ret, xml_out);
+
+ ret = xmlTextWriterWriteFormatElement(glfsh_writer, (xmlChar *)"opRet",
+ "%d", op_ret);
+
+ XML_RET_CHECK_AND_GOTO(ret, xml_out);
+
+ ret = xmlTextWriterWriteFormatElement(glfsh_writer, (xmlChar *)"opErrno",
+ "%d", op_errno);
+ XML_RET_CHECK_AND_GOTO(ret, xml_out);
+
+ if (op_errstr)
+ ret = xmlTextWriterWriteFormatElement(
+ glfsh_writer, (xmlChar *)"opErrstr", "%s", op_errstr);
+ else
+ ret = xmlTextWriterWriteFormatElement(glfsh_writer,
+ (xmlChar *)"opErrstr", "%s", "");
+ XML_RET_CHECK_AND_GOTO(ret, xml_out);
+ ret = xmlTextWriterEndDocument(glfsh_writer);
+ XML_RET_CHECK_AND_GOTO(ret, xml_out);
+
+ /* Dump xml document to stdout and pretty format it */
+ xmlSaveFormatFileEnc("-", glfsh_doc, "UTF-8", 1);
+
+ xmlFreeTextWriter(glfsh_writer);
+ xmlFreeDoc(glfsh_doc);
+xml_out:
+ if (alloc)
+ GF_FREE(op_errstr);
+ return ret;
+}
+
+int
+glfsh_print_xml_heal_op_status(int ret, uint64_t num_entries, char *fmt_str)
+{
+ int x_ret = 0;
+ if (ret < 0 && num_entries == 0) {
+ x_ret = xmlTextWriterWriteFormatElement(
+ glfsh_writer, (xmlChar *)"status", "%s", strerror(-ret));
+ XML_RET_CHECK_AND_GOTO(x_ret, out);
+ if (fmt_str) {
+ x_ret = xmlTextWriterWriteFormatElement(
+ glfsh_writer, (xmlChar *)"numberOfEntries", "-");
+ XML_RET_CHECK_AND_GOTO(x_ret, out);
+ }
+ goto out;
+ } else if (ret == 0) {
+ x_ret = xmlTextWriterWriteFormatElement(
+ glfsh_writer, (xmlChar *)"status", "%s", "Connected");
+ XML_RET_CHECK_AND_GOTO(x_ret, out);
+ }
+
+ if (ret < 0) {
+ if (fmt_str) {
+ x_ret = xmlTextWriterWriteFormatElement(
+ glfsh_writer, (xmlChar *)"status",
+ "Failed to process entries completely. "
+ "(%s)%s %" PRIu64 "",
+ strerror(-ret), fmt_str, num_entries);
+ XML_RET_CHECK_AND_GOTO(x_ret, out);
+ }
+ } else {
+ if (fmt_str) {
+ x_ret = xmlTextWriterWriteFormatElement(
+ glfsh_writer, (xmlChar *)"numberOfEntries", "%" PRIu64 "",
+ num_entries);
+ XML_RET_CHECK_AND_GOTO(x_ret, out);
+ }
+ }
+out:
+ if (x_ret >= 0) {
+ x_ret = xmlTextWriterEndElement(glfsh_writer);
+ if (x_ret >= 0) {
+ xmlTextWriterFlush(glfsh_writer);
+ x_ret = 0;
+ } else {
+ x_ret = -1;
+ }
+ }
+ return x_ret;
+}
+
+int
+glfsh_print_xml_heal_op_summary(int ret, num_entries_t *num_entries)
+{
+ int x_ret = 0;
+
+ if (ret < 0 && num_entries == 0) {
+ x_ret = xmlTextWriterWriteFormatElement(
+ glfsh_writer, (xmlChar *)"status", "%s", strerror(-ret));
+ XML_RET_CHECK_AND_GOTO(x_ret, out);
+ x_ret = xmlTextWriterWriteFormatElement(
+ glfsh_writer, (xmlChar *)"totalNumberOfEntries", "-");
+ XML_RET_CHECK_AND_GOTO(x_ret, out);
+ x_ret = xmlTextWriterWriteFormatElement(
+ glfsh_writer, (xmlChar *)"numberOfEntriesInHealPending", "-");
+ XML_RET_CHECK_AND_GOTO(x_ret, out);
+ x_ret = xmlTextWriterWriteFormatElement(
+ glfsh_writer, (xmlChar *)"numberOfEntriesInSplitBrain", "-");
+ XML_RET_CHECK_AND_GOTO(x_ret, out);
+ x_ret = xmlTextWriterWriteFormatElement(
+ glfsh_writer, (xmlChar *)"numberOfEntriesPossiblyHealing", "-");
+ XML_RET_CHECK_AND_GOTO(x_ret, out);
+ goto out;
+ } else if (ret == 0) {
+ x_ret = xmlTextWriterWriteFormatElement(
+ glfsh_writer, (xmlChar *)"status", "%s", "Connected");
+ XML_RET_CHECK_AND_GOTO(x_ret, out);
+ }
+
+ if (ret < 0) {
+ x_ret = xmlTextWriterWriteFormatElement(
+ glfsh_writer, (xmlChar *)"status",
+ "Failed to process entries"
+ " completely. "
+ "(%s)totalNumberOfEntries%" PRIu64 "",
+ strerror(-ret), num_entries->num_entries);
+ XML_RET_CHECK_AND_GOTO(x_ret, out);
+ } else {
+ x_ret = xmlTextWriterWriteFormatElement(
+ glfsh_writer, (xmlChar *)"totalNumberOfEntries", "%" PRIu64 "",
+ num_entries->num_entries);
+ XML_RET_CHECK_AND_GOTO(x_ret, out);
+ x_ret = xmlTextWriterWriteFormatElement(
+ glfsh_writer, (xmlChar *)"numberOfEntriesInHealPending",
+ "%" PRIu64 "", num_entries->pending_entries);
+ XML_RET_CHECK_AND_GOTO(x_ret, out);
+ x_ret = xmlTextWriterWriteFormatElement(
+ glfsh_writer, (xmlChar *)"numberOfEntriesInSplitBrain",
+ "%" PRIu64 "", num_entries->spb_entries);
+ XML_RET_CHECK_AND_GOTO(x_ret, out);
+ x_ret = xmlTextWriterWriteFormatElement(
+ glfsh_writer, (xmlChar *)"numberOfEntriesPossiblyHealing",
+ "%" PRIu64 "", num_entries->possibly_healing_entries);
+ XML_RET_CHECK_AND_GOTO(x_ret, out);
+ }
+out:
+ if (x_ret >= 0) {
+ x_ret = xmlTextWriterEndElement(glfsh_writer);
+ }
+ return x_ret;
+}
+
+int
+glfsh_print_xml_file_status(char *path, uuid_t gfid, char *status)
+{
+ int x_ret = 0;
+
+ x_ret = xmlTextWriterStartElement(glfsh_writer, (xmlChar *)"file");
+ XML_RET_CHECK_AND_GOTO(x_ret, out);
+ x_ret = xmlTextWriterWriteFormatAttribute(glfsh_writer, (xmlChar *)"gfid",
+ "%s", uuid_utoa(gfid));
+ XML_RET_CHECK_AND_GOTO(x_ret, out);
+ x_ret = xmlTextWriterWriteFormatString(glfsh_writer, "%s", path);
+ XML_RET_CHECK_AND_GOTO(x_ret, out);
+ x_ret = xmlTextWriterEndElement(glfsh_writer);
+ XML_RET_CHECK_AND_GOTO(x_ret, out);
+ xmlTextWriterFlush(glfsh_writer);
+out:
+ return x_ret;
+}
+
+int
+glfsh_print_xml_brick_from_xl(xlator_t *xl, loc_t *rootloc)
+{
+ char *remote_host = NULL;
+ char *remote_subvol = NULL;
+ char *uuid = NULL;
+ int ret = 0;
+ int x_ret = 0;
+
+ ret = dict_get_str(xl->options, "remote-host", &remote_host);
+ if (ret < 0)
+ goto print;
+
+ ret = dict_get_str(xl->options, "remote-subvolume", &remote_subvol);
+ if (ret < 0)
+ goto print;
+ ret = syncop_getxattr(xl, rootloc, &xl->options, GF_XATTR_NODE_UUID_KEY,
+ NULL, NULL);
+ if (ret < 0)
+ goto print;
+
+ ret = dict_get_str(xl->options, GF_XATTR_NODE_UUID_KEY, &uuid);
+ if (ret < 0)
+ goto print;
+print:
+
+ x_ret = xmlTextWriterStartElement(glfsh_writer, (xmlChar *)"brick");
+ XML_RET_CHECK_AND_GOTO(x_ret, xml_out);
+ x_ret = xmlTextWriterWriteFormatAttribute(
+ glfsh_writer, (xmlChar *)"hostUuid", "%s", uuid ? uuid : "-");
+ XML_RET_CHECK_AND_GOTO(x_ret, xml_out);
+
+ x_ret = xmlTextWriterWriteFormatElement(
+ glfsh_writer, (xmlChar *)"name", "%s:%s",
+ remote_host ? remote_host : "-", remote_subvol ? remote_subvol : "-");
+ XML_RET_CHECK_AND_GOTO(x_ret, xml_out);
+ xmlTextWriterFlush(glfsh_writer);
+xml_out:
+ return ret;
+}
+#endif
+
+int
+glfsh_link_inode_update_loc(loc_t *loc, struct iatt *iattr)
+{
+ inode_t *link_inode = NULL;
+ int ret = -1;
+
+ link_inode = inode_link(loc->inode, NULL, NULL, iattr);
+ if (link_inode == NULL)
+ goto out;
+
+ inode_unref(loc->inode);
+ loc->inode = link_inode;
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+glfsh_no_print_hr_heal_op_status(int ret, uint64_t num_entries, char *fmt_str)
+{
+ return 0;
+}
+
+int
+glfsh_print_hr_heal_op_summary(int ret, num_entries_t *num_entries)
+{
+ if (ret < 0 && num_entries->num_entries == 0) {
+ printf("Status: %s\n", strerror(-ret));
+ printf("Total Number of entries: -\n");
+ printf("Number of entries in heal pending: -\n");
+ printf("Number of entries in split-brain: -\n");
+ printf("Number of entries possibly healing: -\n");
+ goto out;
+ } else if (ret == 0) {
+ printf("Status: Connected\n");
+ }
+
+ if (ret < 0) {
+ printf(
+ "Status: Failed to process entries completely. "
+ "(%s)\nTotal Number of entries: %" PRIu64 "\n",
+ strerror(-ret), num_entries->num_entries);
+ } else {
+ printf("Total Number of entries: %" PRIu64 "\n",
+ num_entries->num_entries);
+ printf("Number of entries in heal pending: %" PRIu64 "\n",
+ num_entries->pending_entries);
+ printf("Number of entries in split-brain: %" PRIu64 "\n",
+ num_entries->spb_entries);
+ printf("Number of entries possibly healing: %" PRIu64 "\n",
+ num_entries->possibly_healing_entries);
+ }
+out:
+ printf("\n");
+ fflush(stdout);
+ return 0;
+}
+
+int
+glfsh_print_hr_heal_op_status(int ret, uint64_t num_entries, char *fmt_str)
+{
+ if (ret < 0 && num_entries == 0) {
+ printf("Status: %s\n", strerror(-ret));
+ if (fmt_str)
+ printf("%s -\n", fmt_str);
+ goto out;
+ } else if (ret == 0) {
+ printf("Status: Connected\n");
+ }
+
+ if (ret < 0) {
+ if (fmt_str)
+ printf(
+ "Status: Failed to process entries completely. "
+ "(%s)\n%s %" PRIu64 "\n",
+ strerror(-ret), fmt_str, num_entries);
+ } else {
+ if (fmt_str)
+ printf("%s %" PRIu64 "\n", fmt_str, num_entries);
+ }
+out:
+ printf("\n");
+ return 0;
+}
+
+int
+glfsh_print_info_summary(int ret, num_entries_t *num_entries)
+{
+ return glfsh_output->print_heal_op_summary(ret, num_entries);
+}
+
+int
+glfsh_print_heal_op_status(int ret, uint64_t num_entries,
+ gf_xl_afr_op_t heal_op)
+{
+ char *fmt_str = NULL;
+
+ if (heal_op == GF_SHD_OP_INDEX_SUMMARY)
+ fmt_str = "Number of entries:";
+ else if (heal_op == GF_SHD_OP_SPLIT_BRAIN_FILES)
+ fmt_str = "Number of entries in split-brain:";
+ else if (heal_op == GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK)
+ fmt_str = "Number of healed entries:";
+
+ return glfsh_output->print_heal_op_status(ret, num_entries, fmt_str);
+}
+
+int
+glfsh_get_index_dir_loc(loc_t *rootloc, xlator_t *xl, loc_t *dirloc,
+ int32_t *op_errno, char *vgfid)
+{
+ void *index_gfid = NULL;
+ int ret = 0;
+ dict_t *xattr = NULL;
+ struct iatt iattr = {0};
+ struct iatt parent = {0};
+
+ ret = syncop_getxattr(xl, rootloc, &xattr, vgfid, NULL, NULL);
+ if (ret < 0) {
+ *op_errno = -ret;
+ goto out;
+ }
+
+ ret = dict_get_ptr(xattr, vgfid, &index_gfid);
+ if (ret < 0) {
+ *op_errno = EINVAL;
+ goto out;
+ }
+
+ gf_uuid_copy(dirloc->gfid, index_gfid);
+ dirloc->path = "";
+ dirloc->inode = inode_new(rootloc->inode->table);
+ ret = syncop_lookup(xl, dirloc, &iattr, &parent, NULL, NULL);
+ dirloc->path = NULL;
+ if (ret < 0) {
+ *op_errno = -ret;
+ goto out;
+ }
+ ret = glfsh_link_inode_update_loc(dirloc, &iattr);
+ if (ret)
+ goto out;
+
+ ret = glfs_loc_touchup(dirloc);
+ if (ret < 0) {
+ *op_errno = errno;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (xattr)
+ dict_unref(xattr);
+ return ret;
+}
+
+static xlator_t *
+_get_ancestor(xlator_t *xl, gf_xl_afr_op_t heal_op)
+{
+ static char *replica_xl[] = {"cluster/replicate", NULL};
+ static char *heal_xls[] = {"cluster/replicate", "cluster/disperse", NULL};
+ char **ancestors = NULL;
+
+ if (heal_op == GF_SHD_OP_INDEX_SUMMARY || heal_op == GF_SHD_OP_HEAL_SUMMARY)
+ ancestors = heal_xls;
+ else
+ ancestors = replica_xl;
+
+ if (!xl || !xl->parents)
+ return NULL;
+
+ while (xl->parents) {
+ xl = xl->parents->xlator;
+ if (!xl)
+ break;
+ if (gf_get_index_by_elem(ancestors, xl->type) != -1)
+ return xl;
+ }
+
+ return NULL;
+}
+
+int
+glfsh_index_purge(xlator_t *subvol, inode_t *inode, char *name)
+{
+ loc_t loc = {
+ 0,
+ };
+ int ret = 0;
+
+ loc.parent = inode_ref(inode);
+ loc.name = name;
+
+ ret = syncop_unlink(subvol, &loc, NULL, NULL);
+
+ loc_wipe(&loc);
+ return ret;
+}
+
+int
+glfsh_print_summary_status(dict_t *dict, char *path, uuid_t gfid,
+ num_entries_t *num_entries, gf_boolean_t flag)
+{
+ int ret = 0;
+ char *value = NULL;
+
+ ret = dict_get_str(dict, "heal-info", &value);
+ if (ret)
+ goto out;
+
+ if ((!strcmp(value, "heal")) || (!strcmp(value, "heal-pending"))) {
+ (num_entries->pending_entries)++;
+ } else if ((!strcmp(value, "split-brain")) ||
+ (!strcmp(value, "split-brain-pending"))) {
+ (num_entries->spb_entries)++;
+ } else if ((!strcmp(value, "possibly-healing-pending")) ||
+ (!strcmp(value, "possibly-healing"))) {
+ (num_entries->possibly_healing_entries)++;
+ } else {
+ goto out;
+ }
+ (num_entries->num_entries)++;
+out:
+ return ret;
+}
+
+int
+glfsh_print_spb_status(dict_t *dict, char *path, uuid_t gfid,
+ num_entries_t *num_entries, gf_boolean_t flag)
+{
+ int ret = 0;
+ gf_boolean_t pending = _gf_false;
+ gf_boolean_t split_b = _gf_false;
+ char *value = NULL;
+ char gfid_str[64] = {0};
+
+ ret = dict_get_str(dict, "heal-info", &value);
+ if (ret)
+ return 0;
+
+ if (!strcmp(value, "split-brain")) {
+ split_b = _gf_true;
+ } else if (!strcmp(value, "split-brain-pending")) {
+ split_b = _gf_true;
+ pending = _gf_true;
+ }
+ /* Consider the entry only iff :
+ * 1) The dir being processed is not indices/dirty, indicated by
+ * flag == _gf_false
+ * 2) The dir being processed is indices/dirty but the entry also
+ * exists in indices/xattrop dir and has already been processed.
+ */
+ if (split_b) {
+ if (!flag || (flag && !pending)) {
+ (num_entries->num_entries)++;
+ glfsh_output->print_spb_status(
+ path ? path : uuid_utoa_r(gfid, gfid_str), gfid, NULL);
+ }
+ }
+ return 0;
+}
+
+int
+glfsh_print_heal_status(dict_t *dict, char *path, uuid_t gfid,
+ num_entries_t *num_entries, gf_boolean_t ignore_dirty)
+{
+ int ret = 0;
+ gf_boolean_t pending = _gf_false;
+ char *status = NULL;
+ char *value = NULL;
+ char gfid_str[64] = {0};
+
+ ret = dict_get_str(dict, "heal-info", &value);
+ if (ret || (!strcmp(value, "no-heal")))
+ return 0;
+
+ if (!strcmp(value, "heal")) {
+ ret = gf_asprintf(&status, " ");
+ if (ret < 0)
+ goto out;
+ } else if (!strcmp(value, "possibly-healing")) {
+ ret = gf_asprintf(&status, " - Possibly undergoing heal");
+ if (ret < 0)
+ goto out;
+ } else if (!strcmp(value, "split-brain")) {
+ ret = gf_asprintf(&status, " - Is in split-brain");
+ if (ret < 0)
+ goto out;
+ } else if (!strcmp(value, "heal-pending")) {
+ pending = _gf_true;
+ ret = gf_asprintf(&status, " ");
+ if (ret < 0)
+ goto out;
+ } else if (!strcmp(value, "split-brain-pending")) {
+ pending = _gf_true;
+ ret = gf_asprintf(&status, " - Is in split-brain");
+ if (ret < 0)
+ goto out;
+ } else if (!strcmp(value, "possibly-healing-pending")) {
+ pending = _gf_true;
+ ret = gf_asprintf(&status, " - Possibly undergoing heal");
+ if (ret < 0)
+ goto out;
+ }
+out:
+ /* If ignore_dirty is set, it means indices/dirty directory is
+ * being processed. Ignore the entry if it also exists in
+ * indices/xattrop.
+ * Boolean pending is set to true if the entry also exists in
+ * indices/xattrop directory.
+ */
+ if (ignore_dirty) {
+ if (pending) {
+ GF_FREE(status);
+ status = NULL;
+ return 0;
+ }
+ }
+ if (ret == -1)
+ status = NULL;
+
+ (num_entries->num_entries)++;
+ glfsh_output->print_heal_status(path ? path : uuid_utoa_r(gfid, gfid_str),
+ gfid, status ? status : "");
+
+ GF_FREE(status);
+ return 0;
+}
+
+int
+glfsh_heal_status_boolean(dict_t *dict, char *path, uuid_t gfid,
+ num_entries_t *num_entries, gf_boolean_t ignore_dirty)
+{
+ int ret = 0;
+ char *value = NULL;
+
+ ret = dict_get_str(dict, "heal-info", &value);
+ if ((!ret) && (!strcmp(value, "no-heal")))
+ return 0;
+ else
+ return -1;
+}
+
+static void
+glfsh_heal_entries(glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc,
+ gf_dirent_t *entries, uint64_t *offset,
+ num_entries_t *num_entries, dict_t *xattr_req)
+{
+ gf_dirent_t *entry = NULL;
+ gf_dirent_t *tmp = NULL;
+ int ret = 0;
+ char file[64] = {0};
+
+ list_for_each_entry_safe(entry, tmp, &entries->list, list)
+ {
+ *offset = entry->d_off;
+ if ((strcmp(entry->d_name, ".") == 0) ||
+ (strcmp(entry->d_name, "..") == 0))
+ continue;
+ snprintf(file, sizeof(file), "gfid:%s", entry->d_name);
+ ret = glfsh_heal_splitbrain_file(fs, top_subvol, rootloc, file,
+ xattr_req);
+ if (ret)
+ continue;
+ (num_entries->num_entries)++;
+ }
+}
+
+static int
+glfsh_process_entries(xlator_t *xl, fd_t *fd, gf_dirent_t *entries,
+ uint64_t *offset, num_entries_t *num_entries,
+ print_status glfsh_print_status,
+ gf_boolean_t ignore_dirty, glfsh_fail_mode_t mode)
+{
+ gf_dirent_t *entry = NULL;
+ gf_dirent_t *tmp = NULL;
+ int ret = 0;
+ int print_status = 0;
+ char *path = NULL;
+ uuid_t gfid = {0};
+ xlator_t *this = NULL;
+ dict_t *dict = NULL;
+ loc_t loc = {
+ 0,
+ };
+ this = THIS;
+
+ list_for_each_entry_safe(entry, tmp, &entries->list, list)
+ {
+ *offset = entry->d_off;
+ if ((strcmp(entry->d_name, ".") == 0) ||
+ (strcmp(entry->d_name, "..") == 0))
+ continue;
+
+ if (dict) {
+ dict_unref(dict);
+ dict = NULL;
+ }
+ gf_uuid_clear(gfid);
+ GF_FREE(path);
+ path = NULL;
+
+ gf_uuid_parse(entry->d_name, gfid);
+ gf_uuid_copy(loc.gfid, gfid);
+ ret = syncop_getxattr(this, &loc, &dict, GF_HEAL_INFO, NULL, NULL);
+ if (ret) {
+ if ((mode != GLFSH_MODE_CONTINUE_ON_ERROR) && (ret == -ENOTCONN))
+ goto out;
+ else
+ continue;
+ }
+
+ ret = syncop_gfid_to_path(this->itable, xl, gfid, &path);
+
+ if (ret == -ENOENT || ret == -ESTALE) {
+ glfsh_index_purge(xl, fd->inode, entry->d_name);
+ ret = 0;
+ continue;
+ }
+ if (dict) {
+ print_status = glfsh_print_status(dict, path, gfid, num_entries,
+ ignore_dirty);
+ if ((print_status) && (mode != GLFSH_MODE_CONTINUE_ON_ERROR)) {
+ ret = -EAGAIN;
+ goto out;
+ }
+ }
+ }
+ ret = 0;
+out:
+ GF_FREE(path);
+ if (dict) {
+ dict_unref(dict);
+ dict = NULL;
+ }
+ return ret;
+}
+
+static int
+glfsh_crawl_directory(glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc,
+ xlator_t *readdir_xl, fd_t *fd, loc_t *loc,
+ dict_t *xattr_req, num_entries_t *num_entries,
+ gf_boolean_t ignore)
+{
+ int ret = 0;
+ int heal_op = -1;
+ uint64_t offset = 0;
+ gf_dirent_t entries;
+ gf_boolean_t free_entries = _gf_false;
+ glfsh_fail_mode_t mode = GLFSH_MODE_CONTINUE_ON_ERROR;
+
+ INIT_LIST_HEAD(&entries.list);
+ ret = dict_get_int32(xattr_req, "heal-op", &heal_op);
+ if (ret)
+ return ret;
+
+ if (heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE)
+ mode = GLFSH_MODE_EXIT_ON_FIRST_FAILURE;
+
+ while (1) {
+ ret = syncop_readdir(readdir_xl, fd, 131072, offset, &entries, NULL,
+ NULL);
+ if (ret <= 0)
+ break;
+ ret = 0;
+ free_entries = _gf_true;
+
+ if (list_empty(&entries.list))
+ goto out;
+
+ if (heal_op == GF_SHD_OP_INDEX_SUMMARY) {
+ ret = glfsh_process_entries(readdir_xl, fd, &entries, &offset,
+ num_entries, glfsh_print_heal_status,
+ ignore, mode);
+ if (ret < 0)
+ goto out;
+ } else if (heal_op == GF_SHD_OP_SPLIT_BRAIN_FILES) {
+ ret = glfsh_process_entries(readdir_xl, fd, &entries, &offset,
+ num_entries, glfsh_print_spb_status,
+ ignore, mode);
+ if (ret < 0)
+ goto out;
+ } else if (heal_op == GF_SHD_OP_HEAL_SUMMARY) {
+ ret = glfsh_process_entries(readdir_xl, fd, &entries, &offset,
+ num_entries, glfsh_print_summary_status,
+ ignore, mode);
+ if (ret < 0)
+ goto out;
+ } else if (heal_op == GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK) {
+ glfsh_heal_entries(fs, top_subvol, rootloc, &entries, &offset,
+ num_entries, xattr_req);
+ } else if (heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE) {
+ ret = glfsh_process_entries(readdir_xl, fd, &entries, &offset,
+ num_entries, glfsh_heal_status_boolean,
+ ignore, mode);
+ if (ret < 0)
+ goto out;
+ }
+ gf_dirent_free(&entries);
+ free_entries = _gf_false;
+ }
+ ret = 0;
+out:
+ if (free_entries)
+ gf_dirent_free(&entries);
+ return ret;
+}
+
+static int
+glfsh_no_print_brick_from_xl(xlator_t *xl, loc_t *rootloc)
+{
+ return 0;
+}
+
+static int
+glfsh_print_brick_from_xl(xlator_t *xl, loc_t *rootloc)
+{
+ char *remote_host = NULL;
+ char *remote_subvol = NULL;
+ int ret = 0;
+
+ ret = dict_get_str(xl->options, "remote-host", &remote_host);
+ if (ret < 0)
+ goto out;
+
+ ret = dict_get_str(xl->options, "remote-subvolume", &remote_subvol);
+ if (ret < 0)
+ goto out;
+out:
+ if (ret < 0)
+ printf("Brick - Not able to get brick information\n");
+ else
+ printf("Brick %s:%s\n", remote_host, remote_subvol);
+ return ret;
+}
+
+int
+glfsh_print_pending_heals_type(glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc,
+ xlator_t *xl, gf_xl_afr_op_t heal_op,
+ dict_t *xattr_req, char *vgfid,
+ num_entries_t *num_entries)
+{
+ int ret = 0;
+ loc_t dirloc = {0};
+ fd_t *fd = NULL;
+ int32_t op_errno = 0;
+ gf_boolean_t ignore = _gf_false;
+
+ if (!strcmp(vgfid, GF_XATTROP_DIRTY_GFID))
+ ignore = _gf_true;
+
+ ret = glfsh_get_index_dir_loc(rootloc, xl, &dirloc, &op_errno, vgfid);
+ if (ret < 0) {
+ if (op_errno == ESTALE || op_errno == ENOENT || op_errno == ENOTSUP)
+ ret = 0;
+ else
+ ret = -op_errno;
+ goto out;
+ }
+
+ ret = syncop_dirfd(xl, &dirloc, &fd, GF_CLIENT_PID_GLFS_HEAL);
+ if (ret)
+ goto out;
+
+ ret = glfsh_crawl_directory(fs, top_subvol, rootloc, xl, fd, &dirloc,
+ xattr_req, num_entries, ignore);
+ if (fd)
+ fd_unref(fd);
+out:
+ loc_wipe(&dirloc);
+ return ret;
+}
+
+int
+glfsh_print_pending_heals(glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc,
+ xlator_t *xl, gf_xl_afr_op_t heal_op,
+ gf_boolean_t is_parent_replicate)
+{
+ int ret = 0;
+ num_entries_t num_entries = {
+ 0,
+ };
+ num_entries_t total = {
+ 0,
+ };
+
+ dict_t *xattr_req = NULL;
+
+ xattr_req = dict_new();
+ if (!xattr_req)
+ goto out;
+ ret = dict_set_int32(xattr_req, "heal-op", heal_op);
+ if (ret)
+ goto out;
+
+ if ((!is_parent_replicate) &&
+ ((heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE) ||
+ (heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_DISABLE))) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = glfsh_output->print_brick_from_xl(xl, rootloc);
+ if (ret < 0)
+ goto out;
+
+ ret = glfsh_print_pending_heals_type(fs, top_subvol, rootloc, xl, heal_op,
+ xattr_req, GF_XATTROP_INDEX_GFID,
+ &num_entries);
+
+ if (ret < 0 && heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE)
+ goto out;
+
+ total.num_entries += num_entries.num_entries;
+ total.pending_entries += num_entries.pending_entries;
+ total.spb_entries += num_entries.spb_entries;
+ total.possibly_healing_entries += num_entries.possibly_healing_entries;
+ num_entries.num_entries = 0;
+ num_entries.pending_entries = 0;
+ num_entries.spb_entries = 0;
+ num_entries.possibly_healing_entries = 0;
+ if (ret == -ENOTCONN)
+ goto out;
+
+ if (is_parent_replicate) {
+ ret = glfsh_print_pending_heals_type(
+ fs, top_subvol, rootloc, xl, heal_op, xattr_req,
+ GF_XATTROP_DIRTY_GFID, &num_entries);
+ total.num_entries += num_entries.num_entries;
+ total.pending_entries += num_entries.pending_entries;
+ total.spb_entries += num_entries.spb_entries;
+ total.possibly_healing_entries += num_entries.possibly_healing_entries;
+ }
+out:
+ if (xattr_req)
+ dict_unref(xattr_req);
+ if (heal_op == GF_SHD_OP_HEAL_SUMMARY) {
+ glfsh_print_info_summary(ret, &total);
+ } else {
+ glfsh_print_heal_op_status(ret, total.num_entries, heal_op);
+ }
+ return ret;
+}
+
+static int
+glfsh_set_heal_options(glfs_t *fs, gf_xl_afr_op_t heal_op)
+{
+ int ret = 0;
+
+ ret = glfs_set_xlator_option(fs, "*-replicate-*",
+ "background-self-heal-count", "0");
+ if (ret)
+ goto out;
+
+ ret = glfs_set_xlator_option(fs, "*-replicate-*", "halo-enabled", "off");
+ if (ret)
+ goto out;
+
+ if ((heal_op != GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE) &&
+ (heal_op != GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK) &&
+ (heal_op != GF_SHD_OP_SBRAIN_HEAL_FROM_LATEST_MTIME))
+ return 0;
+ ret = glfs_set_xlator_option(fs, "*-replicate-*", "data-self-heal", "on");
+ if (ret)
+ goto out;
+
+ ret = glfs_set_xlator_option(fs, "*-replicate-*", "metadata-self-heal",
+ "on");
+ if (ret)
+ goto out;
+
+ ret = glfs_set_xlator_option(fs, "*-replicate-*", "entry-self-heal", "on");
+
+out:
+ return ret;
+}
+
+static int
+glfsh_validate_volume(xlator_t *xl, gf_xl_afr_op_t heal_op)
+{
+ xlator_t *heal_xl = NULL;
+ int ret = -1;
+
+ while (xl->next)
+ xl = xl->next;
+
+ while (xl) {
+ if (strcmp(xl->type, "protocol/client") == 0) {
+ heal_xl = _get_ancestor(xl, heal_op);
+ if (heal_xl) {
+ ret = 0;
+ break;
+ }
+ }
+
+ xl = xl->prev;
+ }
+
+ return ret;
+}
+
+static xlator_t *
+_brick_path_to_client_xlator(xlator_t *top_subvol, char *hostname,
+ char *brickpath)
+{
+ int ret = 0;
+ xlator_t *xl = NULL;
+ char *remote_host = NULL;
+ char *remote_subvol = NULL;
+
+ xl = top_subvol;
+
+ while (xl->next)
+ xl = xl->next;
+
+ while (xl) {
+ if (!strcmp(xl->type, "protocol/client")) {
+ ret = dict_get_str(xl->options, "remote-host", &remote_host);
+ if (ret < 0)
+ goto out;
+ ret = dict_get_str(xl->options, "remote-subvolume", &remote_subvol);
+ if (ret < 0)
+ goto out;
+ if (!strcmp(hostname, remote_host) &&
+ !strcmp(brickpath, remote_subvol))
+ return xl;
+ }
+ xl = xl->prev;
+ }
+
+out:
+ return NULL;
+}
+
+int
+glfsh_gather_heal_info(glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc,
+ gf_xl_afr_op_t heal_op)
+{
+ int ret = 0;
+ xlator_t *xl = NULL;
+ xlator_t *heal_xl = NULL;
+ xlator_t *old_THIS = NULL;
+
+ xl = top_subvol;
+ while (xl->next)
+ xl = xl->next;
+ while (xl) {
+ if (strcmp(xl->type, "protocol/client") == 0 &&
+ !strstr(xl->name, "-ta-")) {
+ heal_xl = _get_ancestor(xl, heal_op);
+ if (heal_xl) {
+ old_THIS = THIS;
+ THIS = heal_xl;
+ ret = glfsh_print_pending_heals(
+ fs, top_subvol, rootloc, xl, heal_op,
+ !strcmp(heal_xl->type, "cluster/replicate"));
+ THIS = old_THIS;
+
+ if ((ret < 0) &&
+ (heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE))
+ goto out;
+ }
+ }
+
+ xl = xl->prev;
+ }
+
+out:
+ if (heal_op != GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE)
+ ret = 0;
+
+ return ret;
+}
+
+int
+_validate_directory(dict_t *xattr_req, char *file)
+{
+ int heal_op = -1;
+ int ret = 0;
+
+ ret = dict_get_int32(xattr_req, "heal-op", &heal_op);
+ if (ret)
+ return ret;
+
+ if (heal_op == GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE) {
+ printf("'bigger-file' not a valid option for directories.\n");
+ ret = -1;
+ } else if (heal_op == GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK) {
+ printf(
+ "'source-brick' option used on a directory (%s). "
+ "Performing conservative merge.\n",
+ file);
+ }
+
+ return ret;
+}
+
+int
+glfsh_heal_splitbrain_file(glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc,
+ char *file, dict_t *xattr_req)
+{
+ int ret = -1;
+ int reval = 0;
+ loc_t loc = {
+ 0,
+ };
+ char *path = NULL;
+ char *path1 = NULL;
+ char *path2 = NULL;
+ char *filename = NULL;
+ char *filename1 = NULL;
+ struct iatt iatt = {
+ 0,
+ };
+ xlator_t *xl = top_subvol;
+ dict_t *xattr_rsp = NULL;
+ char *sh_fail_msg = NULL;
+ char *gfid_heal_msg = NULL;
+ int32_t op_errno = 0;
+ gf_boolean_t flag = _gf_false;
+
+ if (!strncmp(file, "gfid:", 5)) {
+ filename = gf_strdup(file);
+ if (!filename) {
+ printf("Error allocating memory to filename\n");
+ goto out;
+ }
+ path = strtok(filename, ":");
+ path = strtok(NULL, ";");
+ gf_uuid_parse(path, loc.gfid);
+ loc.path = gf_strdup(uuid_utoa(loc.gfid));
+ if (!loc.path) {
+ printf("Error allocating memory to path\n");
+ goto out;
+ }
+ loc.inode = inode_new(rootloc->inode->table);
+ if (!loc.inode) {
+ printf("Error getting inode\n");
+ goto out;
+ }
+ ret = syncop_lookup(xl, &loc, &iatt, 0, xattr_req, &xattr_rsp);
+ if (ret) {
+ op_errno = -ret;
+ printf("Lookup failed on %s:%s.\n", file, strerror(op_errno));
+ goto out;
+ }
+ } else {
+ if (file[0] != '/') {
+ printf(
+ "<FILE> must be absolute path w.r.t. the "
+ "volume, starting with '/'\n");
+ ret = -1;
+ goto out;
+ }
+ path1 = gf_strdup(file);
+ if (!path1) {
+ printf("Error allocating memory to path\n");
+ ret = -1;
+ goto out;
+ }
+ path2 = gf_strdup(file);
+ if (!path2) {
+ printf("Error allocating memory to path\n");
+ ret = -1;
+ goto out;
+ }
+ path = dirname(path1);
+ filename1 = basename(path2);
+ retry1:
+ ret = glfs_resolve(fs, xl, path, &loc, &iatt, reval);
+ ESTALE_RETRY(ret, errno, reval, &loc, retry1);
+ if (ret) {
+ printf("Lookup failed on %s:%s\n", path, strerror(errno));
+ goto out;
+ }
+ GF_FREE((char *)loc.path);
+ loc.path = gf_strdup(file);
+ if (!loc.path) {
+ printf("Error allocating memory for path\n");
+ ret = -1;
+ goto out;
+ }
+ loc.parent = inode_unref(loc.parent);
+ loc.parent = inode_ref(loc.inode);
+ loc.inode = inode_unref(loc.inode);
+ loc.inode = inode_new(rootloc->inode->table);
+ if (!loc.inode) {
+ printf("Error getting inode\n");
+ ret = -1;
+ goto out;
+ }
+ loc.name = filename1;
+ gf_uuid_copy(loc.pargfid, loc.gfid);
+ gf_uuid_clear(loc.gfid);
+
+ ret = syncop_lookup(xl, &loc, &iatt, 0, xattr_req, &xattr_rsp);
+ if (ret) {
+ op_errno = -ret;
+ printf("Lookup failed on %s:%s.\n", file, strerror(op_errno));
+ flag = _gf_true;
+ }
+
+ ret = dict_get_str(xattr_rsp, "gfid-heal-msg", &gfid_heal_msg);
+ if (!ret) {
+ printf("%s for file %s\n", gfid_heal_msg, file);
+ loc_wipe(&loc);
+ goto out;
+ }
+ if (flag)
+ goto out;
+
+ reval = 0;
+ loc_wipe(&loc);
+ memset(&iatt, 0, sizeof(iatt));
+
+ retry2:
+ ret = glfs_resolve(fs, xl, file, &loc, &iatt, reval);
+ ESTALE_RETRY(ret, errno, reval, &loc, retry2);
+ if (ret) {
+ printf("Lookup failed on %s:%s\n", file, strerror(errno));
+ goto out;
+ }
+ }
+
+ if (iatt.ia_type == IA_IFDIR) {
+ ret = _validate_directory(xattr_req, file);
+ if (ret)
+ goto out;
+ }
+ ret = syncop_getxattr(xl, &loc, &xattr_rsp, GF_AFR_HEAL_SBRAIN, xattr_req,
+ NULL);
+ if (ret) {
+ op_errno = -ret;
+ printf("Healing %s failed:%s.\n", file, strerror(op_errno));
+ goto out;
+ }
+ ret = dict_get_str(xattr_rsp, "sh-fail-msg", &sh_fail_msg);
+ if (!ret) {
+ printf("Healing %s failed: %s.\n", file, sh_fail_msg);
+ ret = -1;
+ goto out;
+ }
+ printf("Healed %s.\n", file);
+ ret = 0;
+out:
+ if (xattr_rsp)
+ dict_unref(xattr_rsp);
+ if (path1)
+ GF_FREE(path1);
+ if (path2)
+ GF_FREE(path2);
+ if (filename)
+ GF_FREE(filename);
+ loc_wipe(&loc);
+ return ret;
+}
+
+int
+glfsh_heal_from_brick_type(glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc,
+ char *hostname, char *brickpath, xlator_t *client,
+ dict_t *xattr_req, char *vgfid,
+ num_entries_t *num_entries)
+{
+ fd_t *fd = NULL;
+ loc_t dirloc = {0};
+ int32_t op_errno = 0;
+ int ret = -1;
+
+ ret = glfsh_get_index_dir_loc(rootloc, client, &dirloc, &op_errno, vgfid);
+ if (ret < 0) {
+ if (op_errno == ESTALE || op_errno == ENOENT)
+ ret = 0;
+ else
+ ret = -op_errno;
+ goto out;
+ }
+
+ ret = syncop_dirfd(client, &dirloc, &fd, GF_CLIENT_PID_GLFS_HEAL);
+ if (ret)
+ goto out;
+ ret = glfsh_crawl_directory(fs, top_subvol, rootloc, client, fd, &dirloc,
+ xattr_req, num_entries, _gf_false);
+ if (fd)
+ fd_unref(fd);
+out:
+ loc_wipe(&dirloc);
+ return ret;
+}
+
+int
+glfsh_heal_from_brick(glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc,
+ char *hostname, char *brickpath, char *file)
+{
+ int ret = -1;
+ dict_t *xattr_req = NULL;
+ xlator_t *client = NULL;
+ num_entries_t num_entries = {
+ 0,
+ };
+ num_entries_t total = {
+ 0,
+ };
+
+ xattr_req = dict_new();
+ if (!xattr_req)
+ goto out;
+ ret = dict_set_int32(xattr_req, "heal-op",
+ GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK);
+ if (ret)
+ goto out;
+ client = _brick_path_to_client_xlator(top_subvol, hostname, brickpath);
+ if (!client) {
+ printf("\"%s:%s\"- No such brick available in the volume.\n", hostname,
+ brickpath);
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_str(xattr_req, "child-name", client->name);
+ if (ret)
+ goto out;
+ if (file)
+ ret = glfsh_heal_splitbrain_file(fs, top_subvol, rootloc, file,
+ xattr_req);
+ else {
+ ret = glfsh_heal_from_brick_type(fs, top_subvol, rootloc, hostname,
+ brickpath, client, xattr_req,
+ GF_XATTROP_INDEX_GFID, &num_entries);
+ total.num_entries += num_entries.num_entries;
+ num_entries.num_entries = 0;
+ if (ret == -ENOTCONN)
+ goto out;
+
+ ret = glfsh_heal_from_brick_type(fs, top_subvol, rootloc, hostname,
+ brickpath, client, xattr_req,
+ GF_XATTROP_DIRTY_GFID, &num_entries);
+ total.num_entries += num_entries.num_entries;
+ if (ret < 0)
+ goto out;
+ }
+out:
+ if (xattr_req)
+ dict_unref(xattr_req);
+ if (!file)
+ glfsh_print_heal_op_status(ret, total.num_entries,
+ GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK);
+
+ return ret;
+}
+
+int
+glfsh_heal_from_bigger_file_or_mtime(glfs_t *fs, xlator_t *top_subvol,
+ loc_t *rootloc, char *file,
+ gf_xl_afr_op_t heal_op)
+{
+ int ret = -1;
+ dict_t *xattr_req = NULL;
+
+ xattr_req = dict_new();
+ if (!xattr_req)
+ goto out;
+ ret = dict_set_int32(xattr_req, "heal-op", heal_op);
+ if (ret)
+ goto out;
+ ret = glfsh_heal_splitbrain_file(fs, top_subvol, rootloc, file, xattr_req);
+out:
+ if (xattr_req)
+ dict_unref(xattr_req);
+ return ret;
+}
+
+static void
+cleanup(glfs_t *fs)
+{
+ if (!fs)
+ return;
+#if 0
+ /* glfs fini path is still racy and crashing the program. Since
+ * this program any way has to die, we are not going to call fini
+ * in the released versions. i.e. final builds. For all
+ * internal testing lets enable this so that glfs_fini code
+ * path becomes stable. */
+ glfs_fini (fs);
+#endif
+}
+
+glfsh_info_t glfsh_human_readable = {
+ .init = glfsh_init,
+ .print_brick_from_xl = glfsh_print_brick_from_xl,
+ .print_heal_op_status = glfsh_print_hr_heal_op_status,
+ .print_heal_op_summary = glfsh_print_hr_heal_op_summary,
+ .print_heal_status = glfsh_print_hr_heal_status,
+ .print_spb_status = glfsh_print_hr_spb_status,
+ .end = glfsh_end};
+
+glfsh_info_t glfsh_no_print = {
+ .init = glfsh_init,
+ .print_brick_from_xl = glfsh_no_print_brick_from_xl,
+ .print_heal_op_status = glfsh_no_print_hr_heal_op_status,
+ .print_heal_status = glfsh_no_print_hr_status,
+ .print_spb_status = glfsh_no_print_hr_status,
+ .end = glfsh_end_op_granular_entry_heal};
+
+#if (HAVE_LIB_XML)
+glfsh_info_t glfsh_xml_output = {
+ .init = glfsh_xml_init,
+ .print_brick_from_xl = glfsh_print_xml_brick_from_xl,
+ .print_heal_op_status = glfsh_print_xml_heal_op_status,
+ .print_heal_op_summary = glfsh_print_xml_heal_op_summary,
+ .print_heal_status = glfsh_print_xml_file_status,
+ .print_spb_status = glfsh_print_xml_file_status,
+ .end = glfsh_xml_end};
+#endif
+
+static void
+parse_flags(int *argc, char **argv, int *flags)
+{
+ int i = 0;
+ char *opt = NULL;
+ int count = 0;
+
+ for (i = 0; i < *argc; i++) {
+ opt = strtail(argv[i], "--");
+ if (!opt)
+ continue;
+ if (strcmp(opt, "nolog") == 0) {
+ *flags |= MODE_NO_LOG;
+ count++;
+ } else if (strcmp(opt, "xml") == 0) {
+ *flags |= MODE_XML;
+ count++;
+ }
+ }
+ *argc = *argc - count;
+}
+
+int
+main(int argc, char **argv)
+{
+ glfs_t *fs = NULL;
+ int ret = 0;
+ char *volname = NULL;
+ xlator_t *top_subvol = NULL;
+ loc_t rootloc = {0};
+ char logfilepath[PATH_MAX] = {0};
+ char *hostname = NULL;
+ char *path = NULL;
+ char *file = NULL;
+ char *op_errstr = NULL;
+ char *socket_filepath = NULL;
+ gf_xl_afr_op_t heal_op = -1;
+ gf_loglevel_t log_level = GF_LOG_INFO;
+ int flags = 0;
+
+ if (argc < 2) {
+ printf(USAGE_STR, argv[0]);
+ ret = -1;
+ goto out;
+ } else if (argc >= 4) {
+ if (!strcmp(argv[argc - 2], "glusterd-sock")) {
+ socket_filepath = argv[argc - 1];
+ argc = argc - 2;
+ }
+ }
+ volname = argv[1];
+
+ parse_flags(&argc, argv, &flags);
+ if (flags & MODE_NO_LOG)
+ log_level = GF_LOG_NONE;
+ if (flags & MODE_XML)
+ is_xml = 1;
+
+ switch (argc) {
+ case 2:
+ heal_op = GF_SHD_OP_INDEX_SUMMARY;
+ break;
+ case 3:
+ if (!strcmp(argv[2], "split-brain-info")) {
+ heal_op = GF_SHD_OP_SPLIT_BRAIN_FILES;
+ } else if (!strcmp(argv[2], "granular-entry-heal-op")) {
+ heal_op = GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE;
+ } else if (!strcmp(argv[2], "info-summary")) {
+ heal_op = GF_SHD_OP_HEAL_SUMMARY;
+ } else {
+ printf(USAGE_STR, argv[0]);
+ ret = -1;
+ goto out;
+ }
+ break;
+ case 4:
+ if (!strcmp(argv[2], "bigger-file")) {
+ heal_op = GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE;
+ file = argv[3];
+ } else if (!strcmp(argv[2], "latest-mtime")) {
+ heal_op = GF_SHD_OP_SBRAIN_HEAL_FROM_LATEST_MTIME;
+ file = argv[3];
+ } else if (!strcmp(argv[2], "source-brick")) {
+ heal_op = GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK;
+ hostname = strtok(argv[3], ":");
+ path = strtok(NULL, ":");
+ } else {
+ printf(USAGE_STR, argv[0]);
+ ret = -1;
+ goto out;
+ }
+ break;
+ case 5:
+ if (!strcmp(argv[2], "source-brick")) {
+ heal_op = GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK;
+ hostname = strtok(argv[3], ":");
+ path = strtok(NULL, ":");
+ file = argv[4];
+ } else {
+ printf(USAGE_STR, argv[0]);
+ ret = -1;
+ goto out;
+ }
+ break;
+ default:
+ printf(USAGE_STR, argv[0]);
+ ret = -1;
+ goto out;
+ }
+
+ glfsh_output = &glfsh_human_readable;
+ if (is_xml) {
+#if (HAVE_LIB_XML)
+ if ((heal_op == GF_SHD_OP_INDEX_SUMMARY) ||
+ (heal_op == GF_SHD_OP_SPLIT_BRAIN_FILES) ||
+ (heal_op == GF_SHD_OP_HEAL_SUMMARY)) {
+ glfsh_output = &glfsh_xml_output;
+ } else {
+ printf(USAGE_STR, argv[0]);
+ ret = -1;
+ goto out;
+ }
+#else
+ /*No point doing anything, just fail the command*/
+ exit(EXIT_FAILURE);
+#endif
+ }
+
+ if (heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE)
+ glfsh_output = &glfsh_no_print;
+
+ ret = glfsh_output->init();
+ if (ret)
+ exit(EXIT_FAILURE);
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ ret = -errno;
+ gf_asprintf(&op_errstr, "Not able to initialize volume '%s'", volname);
+ goto out;
+ }
+
+ if (sys_access(SECURE_ACCESS_FILE, F_OK) == 0) {
+ fs->ctx->secure_mgmt = 1;
+ fs->ctx->ssl_cert_depth = glusterfs_read_secure_access_file();
+ }
+ if (socket_filepath != NULL) {
+ ret = glfs_set_volfile_server(fs, "unix", socket_filepath, 0);
+ } else {
+ ret = glfs_set_volfile_server(fs, "unix", DEFAULT_GLUSTERD_SOCKFILE, 0);
+ }
+ if (ret) {
+ ret = -errno;
+ gf_asprintf(&op_errstr,
+ "Setting the volfile server failed, "
+ "%s",
+ strerror(errno));
+ goto out;
+ }
+
+ ret = glfsh_set_heal_options(fs, heal_op);
+ if (ret) {
+ printf("Setting xlator heal options failed, %s\n", strerror(errno));
+ goto out;
+ }
+ snprintf(logfilepath, sizeof(logfilepath),
+ DEFAULT_HEAL_LOG_FILE_DIRECTORY "/glfsheal-%s.log", volname);
+ ret = glfs_set_logging(fs, logfilepath, log_level);
+ if (ret < 0) {
+ ret = -errno;
+ gf_asprintf(&op_errstr,
+ "Failed to set the log file path, "
+ "%s",
+ strerror(errno));
+ goto out;
+ }
+
+ ret = glfs_setfspid(fs, GF_CLIENT_PID_GLFS_HEAL);
+ if (ret) {
+ printf("Setting client pid failed, %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ ret = -errno;
+ if (errno == ENOENT) {
+ gf_asprintf(&op_errstr, "Volume %s does not exist", volname);
+ } else {
+ gf_asprintf(&op_errstr,
+ "%s: Not able to fetch "
+ "volfile from glusterd",
+ volname);
+ }
+ goto out;
+ }
+
+ top_subvol = glfs_active_subvol(fs);
+ if (!top_subvol) {
+ ret = -errno;
+ if (errno == ENOTCONN) {
+ gf_asprintf(&op_errstr,
+ "Volume %s is not started "
+ "(Or) All the bricks are not "
+ "running.",
+ volname);
+ } else {
+ gf_asprintf(&op_errstr,
+ "%s: Not able to mount the "
+ "volume, %s",
+ volname, strerror(errno));
+ }
+ goto out;
+ }
+
+ char *var_str = (heal_op == GF_SHD_OP_INDEX_SUMMARY ||
+ heal_op == GF_SHD_OP_HEAL_SUMMARY)
+ ? "replicate/disperse"
+ : "replicate";
+
+ ret = glfsh_validate_volume(top_subvol, heal_op);
+ if (ret < 0) {
+ ret = -EINVAL;
+ gf_asprintf(&op_errstr,
+ "This command is supported "
+ "for only volumes of %s type. Volume %s "
+ "is not of type %s",
+ var_str, volname, var_str);
+ goto out;
+ }
+ rootloc.inode = inode_ref(top_subvol->itable->root);
+ ret = glfs_loc_touchup(&rootloc);
+ if (ret < 0) {
+ ret = -errno;
+ goto out;
+ }
+
+ switch (heal_op) {
+ case GF_SHD_OP_INDEX_SUMMARY:
+ case GF_SHD_OP_SPLIT_BRAIN_FILES:
+ case GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE:
+ case GF_SHD_OP_HEAL_SUMMARY:
+ ret = glfsh_gather_heal_info(fs, top_subvol, &rootloc, heal_op);
+ break;
+ case GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE:
+ case GF_SHD_OP_SBRAIN_HEAL_FROM_LATEST_MTIME:
+ ret = glfsh_heal_from_bigger_file_or_mtime(fs, top_subvol, &rootloc,
+ file, heal_op);
+ break;
+ case GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK:
+ ret = glfsh_heal_from_brick(fs, top_subvol, &rootloc, hostname,
+ path, file);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ glfsh_output->end(ret, NULL);
+ if (ret < 0)
+ ret = -ret;
+ loc_wipe(&rootloc);
+ glfs_subvol_done(fs, top_subvol);
+ cleanup(fs);
+
+ return ret;
+out:
+ if (fs && top_subvol)
+ glfs_subvol_done(fs, top_subvol);
+ loc_wipe(&rootloc);
+ cleanup(fs);
+ if (glfsh_output)
+ glfsh_output->end(ret, op_errstr);
+ if (op_errstr)
+ GF_FREE(op_errstr);
+ return ret;
+}
diff --git a/libgfchangelog.pc.in b/libgfchangelog.pc.in
new file mode 100644
index 00000000000..79eac2ad2d3
--- /dev/null
+++ b/libgfchangelog.pc.in
@@ -0,0 +1,12 @@
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+includedir=@includedir@
+
+
+Name: libgfchangelog
+Description: GlusterFS Changelog Consumer Library
+Version: @LIBGFCHANGELOG_VERSION@
+Requires: @PKGCONFIG_UUID@
+Libs: -L${libdir} -lgfchangelog -lglusterfs
+Cflags: -I${includedir} -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64 -D__USE_LARGEFILE64
diff --git a/mod_glusterfs/apache/2.2/Makefile.am b/libglusterd/Makefile.am
index d471a3f9243..a985f42a877 100644
--- a/mod_glusterfs/apache/2.2/Makefile.am
+++ b/libglusterd/Makefile.am
@@ -1,3 +1,3 @@
SUBDIRS = src
-CLEANFILES =
+CLEANFILES =
diff --git a/libglusterd/src/Makefile.am b/libglusterd/src/Makefile.am
new file mode 100644
index 00000000000..684d2bac96b
--- /dev/null
+++ b/libglusterd/src/Makefile.am
@@ -0,0 +1,31 @@
+libglusterd_la_CFLAGS = $(GF_CFLAGS) $(GF_DARWIN_LIBGLUSTERFS_CFLAGS) \
+ -DDATADIR=\"$(localstatedir)\"
+
+libglusterd_la_CPPFLAGS = $(GF_CPPFLAGS) -D__USE_FILE_OFFSET64 \
+ -DXLATORDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator\" \
+ -DXLATORPARENTDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)\" \
+ -DXXH_NAMESPACE=GF_ -D__USE_LARGEFILE64 \
+ -I$(CONTRIBDIR)/rbtree \
+ -I$(CONTRIBDIR)/libexecinfo ${ARGP_STANDALONE_CPPFLAGS} \
+ -DSBIN_DIR=\"$(sbindir)\" -I$(CONTRIBDIR)/timer-wheel \
+ -I$(CONTRIBDIR)/xxhash \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \
+ -I$(top_srcdir)/rpc/rpc-lib/src/
+
+libglusterd_la_LIBADD = $(ZLIB_LIBS) $(MATH_LIB) $(UUID_LIBS)
+libglusterd_la_LDFLAGS = -version-info $(LIBGLUSTERFS_LT_VERSION) $(GF_LDFLAGS) \
+ -export-symbols $(top_srcdir)/libglusterd/src/libglusterd.sym
+
+lib_LTLIBRARIES = libglusterd.la
+
+libglusterd_la_SOURCES = gd-common-utils.c
+
+libglusterd_la_HEADERS = gd-common-utils.h
+
+libglusterd_ladir = $(includedir)/glusterfs
+
+noinst_HEADERS = gd-common-utils.h
+
+EXTRA_DIST = libglusterd.sym
+
+CLEANFILES =
diff --git a/libglusterd/src/gd-common-utils.c b/libglusterd/src/gd-common-utils.c
new file mode 100644
index 00000000000..243fab215e6
--- /dev/null
+++ b/libglusterd/src/gd-common-utils.c
@@ -0,0 +1,78 @@
+/*
+ Copyright (c) 2019 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "gd-common-utils.h"
+#include "cli1-xdr.h"
+
+int
+get_vol_type(int type, int dist_count, int brick_count)
+{
+ if ((type != GF_CLUSTER_TYPE_TIER) && (type > 0) &&
+ (dist_count < brick_count))
+ type = type + GF_CLUSTER_TYPE_MAX - 1;
+
+ return type;
+}
+
+char *
+get_struct_variable(int mem_num, gf_gsync_status_t *sts_val)
+{
+ switch (mem_num) {
+ case 0:
+ return (sts_val->node);
+ case 1:
+ return (sts_val->master);
+ case 2:
+ return (sts_val->brick);
+ case 3:
+ return (sts_val->slave_user);
+ case 4:
+ return (sts_val->slave);
+ case 5:
+ return (sts_val->slave_node);
+ case 6:
+ return (sts_val->worker_status);
+ case 7:
+ return (sts_val->crawl_status);
+ case 8:
+ return (sts_val->last_synced);
+ case 9:
+ return (sts_val->entry);
+ case 10:
+ return (sts_val->data);
+ case 11:
+ return (sts_val->meta);
+ case 12:
+ return (sts_val->failures);
+ case 13:
+ return (sts_val->checkpoint_time);
+ case 14:
+ return (sts_val->checkpoint_completed);
+ case 15:
+ return (sts_val->checkpoint_completion_time);
+ case 16:
+ return (sts_val->brick_host_uuid);
+ case 17:
+ return (sts_val->last_synced_utc);
+ case 18:
+ return (sts_val->checkpoint_time_utc);
+ case 19:
+ return (sts_val->checkpoint_completion_time_utc);
+ case 20:
+ return (sts_val->slavekey);
+ case 21:
+ return (sts_val->session_slave);
+ default:
+ goto out;
+ }
+
+out:
+ return NULL;
+}
diff --git a/libglusterd/src/gd-common-utils.h b/libglusterd/src/gd-common-utils.h
new file mode 100644
index 00000000000..b9bb4f956fe
--- /dev/null
+++ b/libglusterd/src/gd-common-utils.h
@@ -0,0 +1,28 @@
+/*
+ Copyright (c) 2019 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GD_COMMON_UTILS_H
+#define _GD_COMMON_UTILS_H
+
+#include <fcntl.h>
+#include <unistd.h>
+#include <limits.h>
+#include <stddef.h>
+
+#include "protocol-common.h"
+#include "rpcsvc.h"
+
+int
+get_vol_type(int type, int dist_count, int brick_count);
+
+char *
+get_struct_variable(int mem_num, gf_gsync_status_t *sts_val);
+
+#endif /* _GD_COMMON_UTILS_H */
diff --git a/libglusterd/src/libglusterd.sym b/libglusterd/src/libglusterd.sym
new file mode 100644
index 00000000000..45969a87c12
--- /dev/null
+++ b/libglusterd/src/libglusterd.sym
@@ -0,0 +1,2 @@
+get_vol_type
+get_struct_variable
diff --git a/libglusterfs/src/Makefile.am b/libglusterfs/src/Makefile.am
index 798b309093a..385e8ef4600 100644
--- a/libglusterfs/src/Makefile.am
+++ b/libglusterfs/src/Makefile.am
@@ -1,52 +1,116 @@
-libglusterfs_la_CFLAGS = -fPIC -Wall -g -shared -nostartfiles $(GF_CFLAGS) \
- $(GF_DARWIN_LIBGLUSTERFS_CFLAGS)
+noinst_PYTHON = generator.py gen-defaults.py $(top_srcdir)/events/eventskeygen.py
-libglusterfs_la_CPPFLAGS = -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64 \
- -D_GNU_SOURCE -DXLATORDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator\" \
- -D$(GF_HOST_OS) -I$(CONTRIBDIR)/rbtree \
- -DSCHEDULERDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/scheduler\"
+libglusterfs_la_CFLAGS = $(GF_CFLAGS) $(GF_DARWIN_LIBGLUSTERFS_CFLAGS) \
+ -DDATADIR=\"$(localstatedir)\"
-libglusterfs_la_LIBADD = @LEXLIB@
+libglusterfs_la_CPPFLAGS = $(GF_CPPFLAGS) -D__USE_FILE_OFFSET64 \
+ -DXLATORDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator\" \
+ -DXLATORPARENTDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)\" \
+ -DXXH_NAMESPACE=GF_ -D__USE_LARGEFILE64 \
+ -I$(CONTRIBDIR)/rbtree \
+ -I$(CONTRIBDIR)/libexecinfo ${ARGP_STANDALONE_CPPFLAGS} \
+ -DSBIN_DIR=\"$(sbindir)\" -I$(CONTRIBDIR)/timer-wheel \
+ -I$(CONTRIBDIR)/xxhash
+
+libglusterfs_la_LIBADD = $(ZLIB_LIBS) $(MATH_LIB) $(UUID_LIBS) $(LIB_DL) \
+ $(URCU_LIBS) $(URCU_CDS_LIBS)
+libglusterfs_la_LDFLAGS = -version-info $(LIBGLUSTERFS_LT_VERSION) $(GF_LDFLAGS) \
+ -export-symbols $(top_srcdir)/libglusterfs/src/libglusterfs.sym
lib_LTLIBRARIES = libglusterfs.la
+libgfchangelogdir = $(includedir)/glusterfs/gfchangelog
CONTRIB_BUILDDIR = $(top_builddir)/contrib
libglusterfs_la_SOURCES = dict.c xlator.c logging.c \
- hashfn.c defaults.c common-utils.c timer.c inode.c call-stub.c \
+ hashfn.c common-utils.c timer.c inode.c call-stub.c \
compat.c fd.c compat-errno.c event.c mem-pool.c gf-dirent.c syscall.c \
- iobuf.c globals.c statedump.c stack.c checksum.c daemon.c \
- $(CONTRIBDIR)/rbtree/rb.c rbthash.c latency.c \
- graph.c $(CONTRIBDIR)/uuid/clear.c $(CONTRIBDIR)/uuid/copy.c \
- $(CONTRIBDIR)/uuid/gen_uuid.c $(CONTRIBDIR)/uuid/pack.c \
- $(CONTRIBDIR)/uuid/parse.c $(CONTRIBDIR)/uuid/unparse.c \
- $(CONTRIBDIR)/uuid/uuid_time.c $(CONTRIBDIR)/uuid/compare.c \
- $(CONTRIBDIR)/uuid/isnull.c $(CONTRIBDIR)/uuid/unpack.c syncop.c \
- $(CONTRIBDIR)/stdlib/gf_mkostemp.c \
- graph-print.c trie.c run.c options.c fd-lk.c circ-buff.c \
- event-history.c gidcache.c
-
-nodist_libglusterfs_la_SOURCES = y.tab.c graph.lex.c
-
-BUILT_SOURCES = graph.lex.c
-
-noinst_HEADERS = common-utils.h defaults.h dict.h glusterfs.h hashfn.h \
- logging.h xlator.h stack.h timer.h list.h inode.h call-stub.h compat.h \
- fd.h revision.h compat-errno.h event.h mem-pool.h byte-order.h \
- gf-dirent.h locking.h syscall.h iobuf.h globals.h statedump.h \
- checksum.h daemon.h $(CONTRIBDIR)/rbtree/rb.h \
- rbthash.h iatt.h latency.h mem-types.h $(CONTRIBDIR)/uuid/uuidd.h \
- $(CONTRIBDIR)/uuid/uuid.h $(CONTRIBDIR)/uuid/uuidP.h \
- $(CONTRIB_BUILDDIR)/uuid/uuid_types.h syncop.h graph-utils.h trie.h run.h \
- options.h lkowner.h fd-lk.h circ-buff.h event-history.h gidcache.h
-
-EXTRA_DIST = graph.l graph.y
+ iobuf.c globals.c statedump.c stack.c checksum.c daemon.c timespec.c \
+ $(CONTRIBDIR)/rbtree/rb.c rbthash.c store.c latency.c \
+ graph.c syncop.c graph-print.c trie.c run.c options.c fd-lk.c \
+ circ-buff.c event-history.c gidcache.c ctx.c client_t.c event-poll.c \
+ event-epoll.c syncop-utils.c cluster-syncop.c refcount.c \
+ $(CONTRIBDIR)/libgen/basename_r.c \
+ $(CONTRIBDIR)/libgen/dirname_r.c \
+ strfd.c parse-utils.c $(CONTRIBDIR)/mount/mntent.c \
+ $(CONTRIBDIR)/libexecinfo/execinfo.c quota-common-utils.c rot-buffs.c \
+ $(CONTRIBDIR)/timer-wheel/timer-wheel.c \
+ $(CONTRIBDIR)/timer-wheel/find_last_bit.c default-args.c locking.c \
+ $(CONTRIBDIR)/xxhash/xxhash.c \
+ throttle-tbf.c monitoring.c async.c
+
+nodist_libglusterfs_la_SOURCES = y.tab.c graph.lex.c defaults.c
+nodist_libglusterfs_la_HEADERS = y.tab.h
+
+BUILT_SOURCES = graph.lex.c defaults.c eventtypes.h
+
+libglusterfs_la_HEADERS = glusterfs/common-utils.h glusterfs/defaults.h \
+ glusterfs/default-args.h glusterfs/dict.h glusterfs/glusterfs.h \
+ glusterfs/hashfn.h glusterfs/timespec.h glusterfs/logging.h \
+ glusterfs/xlator.h glusterfs/stack.h glusterfs/timer.h glusterfs/list.h \
+ glusterfs/inode.h glusterfs/call-stub.h glusterfs/compat.h glusterfs/fd.h \
+ glusterfs/revision.h glusterfs/compat-errno.h glusterfs/gf-event.h \
+ glusterfs/mem-pool.h glusterfs/byte-order.h glusterfs/gf-dirent.h \
+ glusterfs/locking.h glusterfs/syscall.h glusterfs/iobuf.h \
+ glusterfs/globals.h glusterfs/statedump.h glusterfs/checksum.h \
+ glusterfs/daemon.h glusterfs/store.h glusterfs/rbthash.h glusterfs/iatt.h \
+ glusterfs/latency.h glusterfs/mem-types.h glusterfs/syncop.h \
+ glusterfs/cluster-syncop.h glusterfs/graph-utils.h glusterfs/trie.h \
+ glusterfs/refcount.h glusterfs/run.h glusterfs/options.h \
+ glusterfs/lkowner.h glusterfs/fd-lk.h glusterfs/circ-buff.h \
+ glusterfs/event-history.h glusterfs/gidcache.h glusterfs/client_t.h \
+ glusterfs/glusterfs-acl.h glusterfs/glfs-message-id.h \
+ glusterfs/template-component-messages.h glusterfs/strfd.h \
+ glusterfs/syncop-utils.h glusterfs/parse-utils.h \
+ glusterfs/libglusterfs-messages.h glusterfs/lvm-defaults.h \
+ glusterfs/quota-common-utils.h glusterfs/rot-buffs.h \
+ glusterfs/compat-uuid.h glusterfs/upcall-utils.h glusterfs/throttle-tbf.h \
+ glusterfs/events.h glusterfs/atomic.h glusterfs/monitoring.h \
+ glusterfs/async.h glusterfs/glusterfs-fops.h
+
+libglusterfs_ladir = $(includedir)/glusterfs
+
+noinst_HEADERS = unittest/unittest.h \
+ $(CONTRIBDIR)/rbtree/rb.h \
+ $(CONTRIBDIR)/mount/mntent_compat.h \
+ $(CONTRIBDIR)/libexecinfo/execinfo_compat.h \
+ $(CONTRIBDIR)/timer-wheel/timer-wheel.h \
+ $(CONTRIBDIR)/xxhash/xxhash.h \
+ $(CONTRIBDIR)/userspace-rcu/wfcqueue.h \
+ $(CONTRIBDIR)/userspace-rcu/wfstack.h \
+ $(CONTRIBDIR)/userspace-rcu/static-wfcqueue.h \
+ $(CONTRIBDIR)/userspace-rcu/static-wfstack.h
+
+eventtypes.h: $(top_srcdir)/events/eventskeygen.py
+ $(PYTHON) $(top_srcdir)/events/eventskeygen.py C_HEADER
+
+if BUILD_EVENTS
+libglusterfs_la_SOURCES += events.c
+endif
+
+libgfchangelog_HEADERS = changelog.h
+
+EXTRA_DIST = graph.l graph.y defaults-tmpl.c libglusterfs.sym
graph.lex.c: graph.l y.tab.h
- $(LEX) -t $(srcdir)/graph.l > $@
+ $(LEX) -Pgraphyy -t $(srcdir)/graph.l > $@
+y.tab.c: y.tab.h
y.tab.h: graph.y
- $(YACC) -d $(srcdir)/graph.y
+ $(YACC) -p graphyy -d $(srcdir)/graph.y
+
+defaults.c: defaults-tmpl.c generator.py gen-defaults.py
+ $(PYTHON) $(srcdir)/gen-defaults.py $(srcdir)/defaults-tmpl.c > $@
+
+CLEANFILES = $(nodist_libglusterfs_la_SOURCES) \
+ $(nodist_libglusterfs_la_HEADERS) *.pyc
+
+if UNITTEST
+CLEANFILES += *.gcda *.gcno *_xunit.xml
+noinst_PROGRAMS =
+TESTS =
+endif
-CLEANFILES = graph.lex.c y.tab.c y.tab.h
-CONFIG_CLEAN_FILES = $(CONTRIB_BUILDDIR)/uuid/uuid_types.h
+if BUILD_EVENTS
+CLEANFILES += eventtypes.h
+endif
diff --git a/libglusterfs/src/async.c b/libglusterfs/src/async.c
new file mode 100644
index 00000000000..1d6cfa374b6
--- /dev/null
+++ b/libglusterfs/src/async.c
@@ -0,0 +1,720 @@
+/*
+ Copyright (c) 2019 Red Hat, Inc <https://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/* To implement an efficient thread pool with minimum contention we have used
+ * the following ideas:
+ *
+ * - The queue of jobs has been implemented using a Wait-Free queue provided
+ * by the userspace-rcu library. This queue requires a mutex when multiple
+ * consumers can be extracting items from it concurrently, but the locked
+ * region is very small, which minimizes the chances of contention. To
+ * further minimize contention, the number of active worker threads that
+ * are accessing the queue is dynamically adjusted so that we always have
+ * the minimum required amount of workers contending for the queue. Adding
+ * new items can be done with a single atomic operation, without locks.
+ *
+ * - All queue management operations, like creating more threads, enabling
+ * sleeping ones, etc. are done by a single thread. This makes it possible
+ * to manage all scaling related information and workers lists without
+ * locks. This functionality is implemented as a role that can be assigned
+ * to any of the worker threads, which avoids that some lengthy operations
+ * could interfere with this task.
+ *
+ * - Management is based on signals. We used signals for management tasks to
+ * avoid multiple system calls for each request (with signals we can wait
+ * for multiple events and get some additional data for each request in a
+ * single call, instead of first polling and then reading).
+ *
+ * TODO: There are some other changes that can take advantage of this new
+ * thread pool.
+ *
+ * - Use this thread pool as the core threading model for synctasks. I
+ * think this would improve synctask performance because I think we
+ * currently have some contention there for some workloads.
+ *
+ * - Implement a per thread timer that will allow adding and removing
+ * timers without using mutexes.
+ *
+ * - Integrate with userspace-rcu library in QSBR mode, allowing
+ * other portions of code to be implemented using RCU-based
+ * structures with a extremely fast read side without contention.
+ *
+ * - Integrate I/O into the thread pool so that the thread pool is
+ * able to efficiently manage all loads and scale dynamically. This
+ * could make it possible to minimize context switching when serving
+ * requests from fuse or network.
+ *
+ * - Dynamically scale the number of workers based on system load.
+ * This will make it possible to reduce contention when system is
+ * heavily loaded, improving performance under these circumstances
+ * (or minimizing performance loss). This will also make it possible
+ * that gluster can coexist with other processes that also consume
+ * CPU, with minimal interference from each other.
+ */
+
+#include <unistd.h>
+#include <pthread.h>
+#include <errno.h>
+
+#include "glusterfs/list.h"
+#include "glusterfs/mem-types.h"
+#include "glusterfs/async.h"
+
+/* These macros wrap a simple system/library call to check the returned error
+ * and log a message in case of failure. */
+#define GF_ASYNC_CHECK(_func, _args...) \
+ ({ \
+ int32_t __async_error = -_func(_args); \
+ if (caa_unlikely(__async_error != 0)) { \
+ gf_async_error(__async_error, #_func "() failed."); \
+ } \
+ __async_error; \
+ })
+
+#define GF_ASYNC_CHECK_ERRNO(_func, _args...) \
+ ({ \
+ int32_t __async_error = _func(_args); \
+ if (caa_unlikely(__async_error < 0)) { \
+ __async_error = -errno; \
+ gf_async_error(__async_error, #_func "() failed."); \
+ } \
+ __async_error; \
+ })
+
+/* These macros are used when, based on POSIX documentation, the function
+ * should never fail under the conditions we are using it. So any unexpected
+ * error will be handled as a fatal event. It probably means a critical bug
+ * or memory corruption. In both cases we consider that stopping the process
+ * is safer (otherwise it could cause more corruption with unknown effects
+ * that could be worse). */
+#define GF_ASYNC_CANTFAIL(_func, _args...) \
+ do { \
+ int32_t __async_error = -_func(_args); \
+ if (caa_unlikely(__async_error != 0)) { \
+ gf_async_fatal(__async_error, #_func "() failed"); \
+ } \
+ } while (0)
+
+#define GF_ASYNC_CANTFAIL_ERRNO(_func, _args...) \
+ ({ \
+ int32_t __async_error = _func(_args); \
+ if (caa_unlikely(__async_error < 0)) { \
+ __async_error = -errno; \
+ gf_async_fatal(__async_error, #_func "() failed"); \
+ } \
+ __async_error; \
+ })
+
+/* TODO: for now we allocate a static array of workers. There's an issue if we
+ * try to use dynamic memory since these workers are initialized very
+ * early in the process startup and it seems that sometimes not all is
+ * ready to use dynamic memory. */
+static gf_async_worker_t gf_async_workers[GF_ASYNC_MAX_THREADS];
+
+/* This is the only global variable needed to manage the entire framework. */
+gf_async_control_t gf_async_ctrl = {};
+
+static __thread gf_async_worker_t *gf_async_current_worker = NULL;
+
+/* The main function of the worker threads. */
+static void *
+gf_async_worker(void *arg);
+
+static void
+gf_async_sync_init(void)
+{
+ GF_ASYNC_CANTFAIL(pthread_barrier_init, &gf_async_ctrl.sync, NULL, 2);
+}
+
+static void
+gf_async_sync_now(void)
+{
+ int32_t ret;
+
+ ret = pthread_barrier_wait(&gf_async_ctrl.sync);
+ if (ret == PTHREAD_BARRIER_SERIAL_THREAD) {
+ GF_ASYNC_CANTFAIL(pthread_barrier_destroy, &gf_async_ctrl.sync);
+ ret = 0;
+ }
+ if (caa_unlikely(ret != 0)) {
+ gf_async_fatal(-ret, "pthread_barrier_wait() failed");
+ }
+}
+
+static void
+gf_async_sigmask_empty(sigset_t *mask)
+{
+ GF_ASYNC_CANTFAIL_ERRNO(sigemptyset, mask);
+}
+
+static void
+gf_async_sigmask_add(sigset_t *mask, int32_t signal)
+{
+ GF_ASYNC_CANTFAIL_ERRNO(sigaddset, mask, signal);
+}
+
+static void
+gf_async_sigmask_set(int32_t mode, sigset_t *mask, sigset_t *old)
+{
+ GF_ASYNC_CANTFAIL(pthread_sigmask, mode, mask, old);
+}
+
+static void
+gf_async_sigaction(int32_t signum, const struct sigaction *action,
+ struct sigaction *old)
+{
+ GF_ASYNC_CANTFAIL_ERRNO(sigaction, signum, action, old);
+}
+
+static int32_t
+gf_async_sigwait(sigset_t *set)
+{
+ int32_t ret, signum;
+
+ do {
+ ret = sigwait(set, &signum);
+ } while (caa_unlikely((ret < 0) && (errno == EINTR)));
+
+ if (caa_unlikely(ret < 0)) {
+ ret = -errno;
+ gf_async_fatal(ret, "sigwait() failed");
+ }
+
+ return signum;
+}
+
+static int32_t
+gf_async_sigtimedwait(sigset_t *set, struct timespec *timeout)
+{
+ int32_t ret;
+
+ do {
+ ret = sigtimedwait(set, NULL, timeout);
+ } while (caa_unlikely((ret < 0) && (errno == EINTR)));
+ if (caa_unlikely(ret < 0)) {
+ ret = -errno;
+ /* EAGAIN means that the timeout has expired, so we allow this error.
+ * Any other error shouldn't happen. */
+ if (caa_unlikely(ret != -EAGAIN)) {
+ gf_async_fatal(ret, "sigtimedwait() failed");
+ }
+ ret = 0;
+ }
+
+ return ret;
+}
+
+static void
+gf_async_sigbroadcast(int32_t signum)
+{
+ GF_ASYNC_CANTFAIL_ERRNO(kill, gf_async_ctrl.pid, signum);
+}
+
+static void
+gf_async_signal_handler(int32_t signum)
+{
+ /* We should never handle a signal in this function. */
+ gf_async_fatal(-EBUSY,
+ "Unexpected processing of signal %d through a handler.",
+ signum);
+}
+
+static void
+gf_async_signal_setup(void)
+{
+ struct sigaction action;
+
+ /* We configure all related signals so that we can detect threads using an
+ * invalid signal mask that doesn't block our critical signal. */
+ memset(&action, 0, sizeof(action));
+ action.sa_handler = gf_async_signal_handler;
+
+ gf_async_sigaction(GF_ASYNC_SIGCTRL, &action, &gf_async_ctrl.handler_ctrl);
+
+ gf_async_sigaction(GF_ASYNC_SIGQUEUE, &action,
+ &gf_async_ctrl.handler_queue);
+}
+
+static void
+gf_async_signal_restore(void)
+{
+ /* Handlers we have previously changed are restored back to their original
+ * value. */
+
+ if (gf_async_ctrl.handler_ctrl.sa_handler != gf_async_signal_handler) {
+ gf_async_sigaction(GF_ASYNC_SIGCTRL, &gf_async_ctrl.handler_ctrl, NULL);
+ }
+
+ if (gf_async_ctrl.handler_queue.sa_handler != gf_async_signal_handler) {
+ gf_async_sigaction(GF_ASYNC_SIGQUEUE, &gf_async_ctrl.handler_queue,
+ NULL);
+ }
+}
+
+static void
+gf_async_signal_flush(void)
+{
+ struct timespec delay;
+
+ delay.tv_sec = 0;
+ delay.tv_nsec = 0;
+
+ /* We read all pending signals so that they don't trigger once the signal
+ * mask of some thread is changed. */
+ while (gf_async_sigtimedwait(&gf_async_ctrl.sigmask_ctrl, &delay) > 0) {
+ }
+ while (gf_async_sigtimedwait(&gf_async_ctrl.sigmask_queue, &delay) > 0) {
+ }
+}
+
+static int32_t
+gf_async_thread_create(pthread_t *thread, int32_t id, void *data)
+{
+ int32_t ret;
+
+ ret = gf_thread_create(thread, NULL, gf_async_worker, data,
+ GF_ASYNC_THREAD_NAME "%u", id);
+ if (caa_unlikely(ret < 0)) {
+ /* TODO: gf_thread_create() should return a more specific error
+ * code. */
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void
+gf_async_thread_wait(pthread_t thread)
+{
+ /* TODO: this is a blocking call executed inside one of the workers of the
+ * thread pool. This is bad, but this is only executed once we have
+ * received a notification from the thread that it's terminating, so
+ * this should return almost immediately. However, to be more robust
+ * it would be better to use pthread_timedjoin_np() (or even a call
+ * to pthread_tryjoin_np() followed by a delayed recheck if it
+ * fails), but they are not portable. We should see how to do this
+ * in other platforms. */
+ GF_ASYNC_CANTFAIL(pthread_join, thread, NULL);
+}
+
+static int32_t
+gf_async_worker_create(void)
+{
+ struct cds_wfs_node *node;
+ gf_async_worker_t *worker;
+ uint32_t counts, running, max;
+ int32_t ret;
+
+ node = __cds_wfs_pop_blocking(&gf_async_ctrl.available);
+ if (caa_unlikely(node == NULL)) {
+ /* There are no more available workers. We have all threads running. */
+ return 1;
+ }
+ cds_wfs_node_init(node);
+
+ ret = 1;
+
+ counts = uatomic_read(&gf_async_ctrl.counts);
+ max = uatomic_read(&gf_async_ctrl.max_threads);
+ running = GF_ASYNC_COUNT_RUNNING(counts);
+ if (running < max) {
+ uatomic_add(&gf_async_ctrl.counts, GF_ASYNC_COUNTS(1, 0));
+
+ worker = caa_container_of(node, gf_async_worker_t, stack);
+
+ ret = gf_async_thread_create(&worker->thread, worker->id, worker);
+ if (caa_likely(ret >= 0)) {
+ return 0;
+ }
+
+ uatomic_add(&gf_async_ctrl.counts, GF_ASYNC_COUNTS(-1, 0));
+ }
+
+ cds_wfs_push(&gf_async_ctrl.available, node);
+
+ return ret;
+}
+
+static void
+gf_async_worker_enable(void)
+{
+ /* This will wake one of the spare workers. If all workers are busy now,
+ * the signal will be queued so that the first one that completes its
+ * work will become the leader. */
+ gf_async_sigbroadcast(GF_ASYNC_SIGCTRL);
+
+ /* We have consumed a spare worker. We create another one for future
+ * needs. */
+ gf_async_worker_create();
+}
+
+static void
+gf_async_worker_wait(void)
+{
+ int32_t signum;
+
+ signum = gf_async_sigwait(&gf_async_ctrl.sigmask_ctrl);
+ if (caa_unlikely(signum != GF_ASYNC_SIGCTRL)) {
+ gf_async_fatal(-EINVAL, "Worker received an unexpected signal (%d)",
+ signum);
+ }
+}
+
+static void
+gf_async_leader_wait(void)
+{
+ int32_t signum;
+
+ signum = gf_async_sigwait(&gf_async_ctrl.sigmask_queue);
+ if (caa_unlikely(signum != GF_ASYNC_SIGQUEUE)) {
+ gf_async_fatal(-EINVAL, "Leader received an unexpected signal (%d)",
+ signum);
+ }
+}
+
+static void
+gf_async_run(struct cds_wfcq_node *node)
+{
+ gf_async_t *async;
+
+ /* We've just got work from the queue. Process it. */
+ async = caa_container_of(node, gf_async_t, queue);
+ /* TODO: remove dependency from THIS and xl. */
+ THIS = async->xl;
+ async->cbk(async->xl, async);
+}
+
+static void
+gf_async_worker_run(void)
+{
+ struct cds_wfcq_node *node;
+
+ do {
+ /* We keep executing jobs from the queue while it's not empty. Note
+ * that while we do this, we are ignoring any stop request. That's
+ * fine, since we need to process our own 'join' messages to fully
+ * terminate all threads. Note that normal jobs should have already
+ * completed once a stop request is received. */
+ node = cds_wfcq_dequeue_blocking(&gf_async_ctrl.queue.head,
+ &gf_async_ctrl.queue.tail);
+ if (node != NULL) {
+ gf_async_run(node);
+ }
+ } while (node != NULL);
+
+ /* TODO: I've tried to keep the worker looking at the queue for some small
+ * amount of time in a busy loop to see if more jobs come soon. With
+ * this I attempted to avoid the overhead of signal management if
+ * jobs come fast enough. However experimental results seem to
+ * indicate that doing this, CPU utilization grows and performance
+ * is actually reduced. We need to see if that's because I used bad
+ * parameters or it's really better to do it as it's done now. */
+}
+
+static void
+gf_async_leader_run(void)
+{
+ struct cds_wfcq_node *node;
+
+ node = cds_wfcq_dequeue_blocking(&gf_async_ctrl.queue.head,
+ &gf_async_ctrl.queue.tail);
+ while (caa_unlikely(node == NULL)) {
+ gf_async_leader_wait();
+
+ node = cds_wfcq_dequeue_blocking(&gf_async_ctrl.queue.head,
+ &gf_async_ctrl.queue.tail);
+ }
+
+ /* Activate the next available worker thread. It will become the new
+ * leader. */
+ gf_async_worker_enable();
+
+ gf_async_run(node);
+}
+
+static uint32_t
+gf_async_stop_check(gf_async_worker_t *worker)
+{
+ uint32_t counts, old, running, max;
+
+ /* First we check if we should stop without doing any costly atomic
+ * operation. */
+ old = uatomic_read(&gf_async_ctrl.counts);
+ max = uatomic_read(&gf_async_ctrl.max_threads);
+ running = GF_ASYNC_COUNT_RUNNING(old);
+ while (running > max) {
+ /* There are too many threads. We try to stop the current worker. */
+ counts = uatomic_cmpxchg(&gf_async_ctrl.counts, old,
+ old + GF_ASYNC_COUNTS(-1, 1));
+ if (old != counts) {
+ /* Another thread has just updated the counts. We need to retry. */
+ old = counts;
+ running = GF_ASYNC_COUNT_RUNNING(old);
+
+ continue;
+ }
+
+ running--;
+ worker->running = false;
+ }
+
+ return running;
+}
+
+static void
+gf_async_stop_all(xlator_t *xl, gf_async_t *async)
+{
+ if (gf_async_stop_check(gf_async_current_worker) > 0) {
+ /* There are more workers running. We propagate the stop request to
+ * them. */
+ gf_async(async, xl, gf_async_stop_all);
+ }
+}
+
+static void
+gf_async_join(xlator_t *xl, gf_async_t *async)
+{
+ gf_async_worker_t *worker;
+
+ worker = caa_container_of(async, gf_async_worker_t, async);
+
+ gf_async_thread_wait(worker->thread);
+
+ cds_wfs_push(&gf_async_ctrl.available, &worker->stack);
+}
+
+static void
+gf_async_terminate(gf_async_worker_t *worker)
+{
+ uint32_t counts;
+
+ counts = uatomic_add_return(&gf_async_ctrl.counts, GF_ASYNC_COUNTS(0, -1));
+ if (counts == 0) {
+ /* This is the termination of the last worker thread. We need to
+ * synchronize the main thread that is waiting for all workers to
+ * finish. */
+ gf_async_ctrl.sync_thread = worker->thread;
+
+ gf_async_sync_now();
+ } else {
+ /* Force someone else to join this thread to release resources. */
+ gf_async(&worker->async, THIS, gf_async_join);
+ }
+}
+
+static void *
+gf_async_worker(void *arg)
+{
+ gf_async_worker_t *worker;
+
+ worker = (gf_async_worker_t *)arg;
+ gf_async_current_worker = worker;
+
+ worker->running = true;
+ do {
+ /* This thread does nothing until someone enables it to become a
+ * leader. */
+ gf_async_worker_wait();
+
+ /* This thread is now a leader. It will process jobs from the queue
+ * and, if necessary, enable another worker and transfer leadership
+ * to it. */
+ gf_async_leader_run();
+
+ /* This thread is not a leader anymore. It will continue processing
+ * queued jobs until it becomes empty. */
+ gf_async_worker_run();
+
+ /* Stop the current thread if there are too many threads running. */
+ gf_async_stop_check(worker);
+ } while (worker->running);
+
+ gf_async_terminate(worker);
+
+ return NULL;
+}
+
+static void
+gf_async_cleanup(void)
+{
+ /* We do some basic initialization of the global variable 'gf_async_ctrl'
+ * so that it's put into a relatively consistent state. */
+
+ gf_async_ctrl.enabled = false;
+
+ gf_async_ctrl.pid = 0;
+ gf_async_sigmask_empty(&gf_async_ctrl.sigmask_ctrl);
+ gf_async_sigmask_empty(&gf_async_ctrl.sigmask_queue);
+
+ /* This is used to later detect if the handler of these signals have been
+ * changed or not. */
+ gf_async_ctrl.handler_ctrl.sa_handler = gf_async_signal_handler;
+ gf_async_ctrl.handler_queue.sa_handler = gf_async_signal_handler;
+
+ gf_async_ctrl.table = NULL;
+ gf_async_ctrl.max_threads = 0;
+ gf_async_ctrl.counts = 0;
+}
+
+void
+gf_async_fini(void)
+{
+ gf_async_t async;
+
+ if (uatomic_read(&gf_async_ctrl.counts) != 0) {
+ /* We ensure that all threads will quit on the next check. */
+ gf_async_ctrl.max_threads = 0;
+
+ /* Send the stop request to the thread pool. This will cause the
+ * execution of gf_async_stop_all() by one of the worker threads which,
+ * eventually, will terminate all worker threads. */
+ gf_async(&async, THIS, gf_async_stop_all);
+
+ /* We synchronize here with the last thread. */
+ gf_async_sync_now();
+
+ /* We have just synchronized with the latest thread. Now just wait for
+ * it to terminate. */
+ gf_async_thread_wait(gf_async_ctrl.sync_thread);
+
+ gf_async_signal_flush();
+ }
+
+ gf_async_signal_restore();
+
+ gf_async_cleanup();
+}
+
+void
+gf_async_adjust_threads(int32_t threads)
+{
+ if (threads == 0) {
+ /* By default we allow a maximum of 2 * #cores worker threads. This
+ * value is to try to accommodate threads that will do some I/O. Having
+ * more threads than cores we can keep CPU busy even if some threads
+ * are blocked for I/O. In the most efficient case, we can have #cores
+ * computing threads and #cores blocked threads on I/O. However this is
+ * hard to achieve because we can end with more than #cores computing
+ * threads, which won't provide a real benefit and will increase
+ * contention.
+ *
+ * TODO: implement a more intelligent dynamic maximum based on CPU
+ * usage and/or system load. */
+ threads = sysconf(_SC_NPROCESSORS_ONLN) * 2;
+ if (threads < 0) {
+ /* If we can't get the current number of processors, we pick a
+ * random number. */
+ threads = 16;
+ }
+ }
+ if (threads > GF_ASYNC_MAX_THREADS) {
+ threads = GF_ASYNC_MAX_THREADS;
+ }
+ uatomic_set(&gf_async_ctrl.max_threads, threads);
+}
+
+int32_t
+gf_async_init(glusterfs_ctx_t *ctx)
+{
+ sigset_t set;
+ gf_async_worker_t *worker;
+ uint32_t i;
+ int32_t ret;
+ bool running;
+
+ gf_async_cleanup();
+
+ if (!ctx->cmd_args.global_threading ||
+ (ctx->process_mode == GF_GLUSTERD_PROCESS)) {
+ return 0;
+ }
+
+ /* At the init time, the maximum number of threads has not yet been
+ * configured. We use a small starting value that will be layer dynamically
+ * adjusted when ctx->config.max_threads is updated. */
+ gf_async_adjust_threads(GF_ASYNC_SPARE_THREADS + 1);
+
+ gf_async_ctrl.pid = getpid();
+
+ __cds_wfs_init(&gf_async_ctrl.available);
+ cds_wfcq_init(&gf_async_ctrl.queue.head, &gf_async_ctrl.queue.tail);
+
+ gf_async_sync_init();
+
+ /* TODO: it would be cleaner to use dynamic memory, but at this point some
+ * memory management resources are not yet initialized. */
+ gf_async_ctrl.table = gf_async_workers;
+
+ /* We keep all workers in a stack. It will be used when a new thread needs
+ * to be created. */
+ for (i = GF_ASYNC_MAX_THREADS; i > 0; i--) {
+ worker = &gf_async_ctrl.table[i - 1];
+
+ worker->id = i - 1;
+ cds_wfs_node_init(&worker->stack);
+ cds_wfs_push(&gf_async_ctrl.available, &worker->stack);
+ }
+
+ /* Prepare the signal mask for regular workers and the leader. */
+ gf_async_sigmask_add(&gf_async_ctrl.sigmask_ctrl, GF_ASYNC_SIGCTRL);
+ gf_async_sigmask_add(&gf_async_ctrl.sigmask_queue, GF_ASYNC_SIGQUEUE);
+
+ /* TODO: this is needed to block our special signals in the current thread
+ * and all children that it starts. It would be cleaner to do it when
+ * signals are initialized, but there doesn't seem to be a unique
+ * place to do that, so for now we do it here. */
+ gf_async_sigmask_empty(&set);
+ gf_async_sigmask_add(&set, GF_ASYNC_SIGCTRL);
+ gf_async_sigmask_add(&set, GF_ASYNC_SIGQUEUE);
+ gf_async_sigmask_set(SIG_BLOCK, &set, NULL);
+
+ /* Configure the signal handlers. This is mostly for safety, not really
+ * needed, but it doesn't hurt. Note that the caller must ensure that the
+ * signals we need to run are already blocked in any thread already
+ * started. Otherwise this won't work. */
+ gf_async_signal_setup();
+
+ running = false;
+
+ /* We start the spare workers + 1 for the leader. */
+ for (i = 0; i < GF_ASYNC_SPARE_THREADS; i++) {
+ ret = gf_async_worker_create();
+ if (caa_unlikely(ret < 0)) {
+ /* This is the initial start up so we enforce that the spare
+ * threads are created. If this fails at the beginning, it's very
+ * unlikely that the async workers could do its job, so we abort
+ * the initialization. */
+ goto out;
+ }
+
+ /* Once the first thread is started, we can enable it to become the
+ * initial leader. */
+ if ((ret == 0) && !running) {
+ running = true;
+ gf_async_worker_enable();
+ }
+ }
+
+ if (caa_unlikely(!running)) {
+ gf_async_fatal(-ENOMEM, "No worker thread has started");
+ }
+
+ gf_async_ctrl.enabled = true;
+
+ ret = 0;
+
+out:
+ if (ret < 0) {
+ gf_async_error(ret, "Unable to initialize the thread pool.");
+ gf_async_fini();
+ }
+
+ return ret;
+}
diff --git a/libglusterfs/src/byte-order.h b/libglusterfs/src/byte-order.h
deleted file mode 100644
index 4101db2c71d..00000000000
--- a/libglusterfs/src/byte-order.h
+++ /dev/null
@@ -1,301 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _BYTE_ORDER_H
-#define _BYTE_ORDER_H
-
-#include <inttypes.h>
-
-#define LS1 0x00ffU
-#define MS1 0xff00U
-#define LS2 0x0000ffffU
-#define MS2 0xffff0000U
-#define LS4 0x00000000ffffffffULL
-#define MS4 0xffffffff00000000ULL
-
-
-static uint16_t (*hton16) (uint16_t);
-static uint32_t (*hton32) (uint32_t);
-static uint64_t (*hton64) (uint64_t);
-
-#define ntoh16 hton16
-#define ntoh32 hton32
-#define ntoh64 hton64
-
-static uint16_t (*htole16) (uint16_t);
-static uint32_t (*htole32) (uint32_t);
-static uint64_t (*htole64) (uint64_t);
-
-#define letoh16 htole16
-#define letoh32 htole32
-#define letoh64 htole64
-
-static uint16_t (*htobe16) (uint16_t);
-static uint32_t (*htobe32) (uint32_t);
-static uint64_t (*htobe64) (uint64_t);
-
-#define betoh16 htobe16
-#define betoh32 htobe32
-#define betoh64 htobe64
-
-
-#define do_swap2(x) (((x&LS1) << 8)|(((x&MS1) >> 8)))
-#define do_swap4(x) ((do_swap2(x&LS2) << 16)|(do_swap2((x&MS2) >> 16)))
-#define do_swap8(x) ((do_swap4(x&LS4) << 32)|(do_swap4((x&MS4) >> 32)))
-
-
-static inline uint16_t
-__swap16 (uint16_t x)
-{
- return do_swap2(x);
-}
-
-
-static inline uint32_t
-__swap32 (uint32_t x)
-{
- return do_swap4(x);
-}
-
-
-static inline uint64_t
-__swap64 (uint64_t x)
-{
- return do_swap8(x);
-}
-
-
-static inline uint16_t
-__noswap16 (uint16_t x)
-{
- return x;
-}
-
-
-static inline uint32_t
-__noswap32 (uint32_t x)
-{
- return x;
-}
-
-
-static inline uint64_t
-__noswap64 (uint64_t x)
-{
- return x;
-}
-
-
-static inline uint16_t
-__byte_order_n16 (uint16_t i)
-{
- uint32_t num = 1;
-
- if (((char *)(&num))[0] == 1) {
- /* cpu is le */
- hton16 = __swap16;
- hton32 = __swap32;
- hton64 = __swap64;
- } else {
- /* cpu is be */
- hton16 = __noswap16;
- hton32 = __noswap32;
- hton64 = __noswap64;
- }
-
- return hton16 (i);
-}
-
-
-static inline uint32_t
-__byte_order_n32 (uint32_t i)
-{
- uint32_t num = 1;
-
- if (((char *)(&num))[0] == 1) {
- /* cpu is le */
- hton16 = __swap16;
- hton32 = __swap32;
- hton64 = __swap64;
- } else {
- /* cpu is be */
- hton16 = __noswap16;
- hton32 = __noswap32;
- hton64 = __noswap64;
- }
-
- return hton32 (i);
-}
-
-
-static inline uint64_t
-__byte_order_n64 (uint64_t i)
-{
- uint32_t num = 1;
-
- if (((char *)(&num))[0] == 1) {
- /* cpu is le */
- hton16 = __swap16;
- hton32 = __swap32;
- hton64 = __swap64;
- } else {
- /* cpu is be */
- hton16 = __noswap16;
- hton32 = __noswap32;
- hton64 = __noswap64;
- }
-
- return hton64 (i);
-}
-
-
-static uint16_t (*hton16) (uint16_t) = __byte_order_n16;
-static uint32_t (*hton32) (uint32_t) = __byte_order_n32;
-static uint64_t (*hton64) (uint64_t) = __byte_order_n64;
-
-
-static inline uint16_t
-__byte_order_le16 (uint16_t i)
-{
- uint32_t num = 1;
-
- if (((char *)(&num))[0] == 1) {
- /* cpu is le */
- htole16 = __noswap16;
- htole32 = __noswap32;
- htole64 = __noswap64;
- } else {
- /* cpu is be */
- htole16 = __swap16;
- htole32 = __swap32;
- htole64 = __swap64;
- }
-
- return htole16 (i);
-}
-
-
-static inline uint32_t
-__byte_order_le32 (uint32_t i)
-{
- uint32_t num = 1;
-
- if (((char *)(&num))[0] == 1) {
- /* cpu is le */
- htole16 = __noswap16;
- htole32 = __noswap32;
- htole64 = __noswap64;
- } else {
- /* cpu is be */
- htole16 = __swap16;
- htole32 = __swap32;
- htole64 = __swap64;
- }
-
- return htole32 (i);
-}
-
-
-static inline uint64_t
-__byte_order_le64 (uint64_t i)
-{
- uint32_t num = 1;
-
- if (((char *)(&num))[0] == 1) {
- /* cpu is le */
- htole16 = __noswap16;
- htole32 = __noswap32;
- htole64 = __noswap64;
- } else {
- /* cpu is be */
- htole16 = __swap16;
- htole32 = __swap32;
- htole64 = __swap64;
- }
-
- return htole64 (i);
-}
-
-
-static uint16_t (*htole16) (uint16_t) = __byte_order_le16;
-static uint32_t (*htole32) (uint32_t) = __byte_order_le32;
-static uint64_t (*htole64) (uint64_t) = __byte_order_le64;
-
-
-static inline uint16_t
-__byte_order_be16 (uint16_t i)
-{
- uint32_t num = 1;
-
- if (((char *)(&num))[0] == 1) {
- /* cpu is le */
- htobe16 = __swap16;
- htobe32 = __swap32;
- htobe64 = __swap64;
- } else {
- /* cpu is be */
- htobe16 = __noswap16;
- htobe32 = __noswap32;
- htobe64 = __noswap64;
- }
-
- return htobe16 (i);
-}
-
-
-static inline uint32_t
-__byte_order_be32 (uint32_t i)
-{
- uint32_t num = 1;
-
- if (((char *)(&num))[0] == 1) {
- /* cpu is le */
- htobe16 = __swap16;
- htobe32 = __swap32;
- htobe64 = __swap64;
- } else {
- /* cpu is be */
- htobe16 = __noswap16;
- htobe32 = __noswap32;
- htobe64 = __noswap64;
- }
-
- return htobe32 (i);
-}
-
-
-static inline uint64_t
-__byte_order_be64 (uint64_t i)
-{
- uint32_t num = 1;
-
- if (((char *)(&num))[0] == 1) {
- /* cpu is le */
- htobe16 = __swap16;
- htobe32 = __swap32;
- htobe64 = __swap64;
- } else {
- /* cpu is be */
- htobe16 = __noswap16;
- htobe32 = __noswap32;
- htobe64 = __noswap64;
- }
-
- return htobe64 (i);
-}
-
-
-static uint16_t (*htobe16) (uint16_t) = __byte_order_be16;
-static uint32_t (*htobe32) (uint32_t) = __byte_order_be32;
-static uint64_t (*htobe64) (uint64_t) = __byte_order_be64;
-
-
-
-#endif /* _BYTE_ORDER_H */
diff --git a/libglusterfs/src/call-stub.c b/libglusterfs/src/call-stub.c
index 85a1aaa7b5c..ee84f08acd4 100644
--- a/libglusterfs/src/call-stub.c
+++ b/libglusterfs/src/call-stub.c
@@ -8,4153 +8,2460 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include <openssl/md5.h>
#include <inttypes.h>
-#include "call-stub.h"
-#include "mem-types.h"
-
+#include "glusterfs/call-stub.h"
+#include "glusterfs/mem-types.h"
+#include "glusterfs/libglusterfs-messages.h"
static call_stub_t *
-stub_new (call_frame_t *frame,
- char wind,
- glusterfs_fop_t fop)
+stub_new(call_frame_t *frame, const char wind, const glusterfs_fop_t fop)
{
- call_stub_t *new = NULL;
+ call_stub_t *new = NULL;
+
+ GF_VALIDATE_OR_GOTO("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ new = mem_get0(frame->this->ctx->stub_mem_pool);
+ GF_VALIDATE_OR_GOTO("call-stub", new, out);
- new = mem_get0 (frame->this->ctx->stub_mem_pool);
- GF_VALIDATE_OR_GOTO ("call-stub", new, out);
+ new->frame = frame;
+ new->wind = wind;
+ new->fop = fop;
+ new->stub_mem_pool = frame->this->ctx->stub_mem_pool;
+ INIT_LIST_HEAD(&new->list);
- new->frame = frame;
- new->wind = wind;
- new->fop = fop;
- new->stub_mem_pool = frame->this->ctx->stub_mem_pool;
- INIT_LIST_HEAD (&new->list);
+ INIT_LIST_HEAD(&new->args_cbk.entries);
out:
- return new;
+ return new;
}
-
call_stub_t *
-fop_lookup_stub (call_frame_t *frame,
- fop_lookup_t fn,
- loc_t *loc,
- dict_t *xdata)
+fop_lookup_stub(call_frame_t *frame, fop_lookup_t fn, loc_t *loc, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
+ GF_VALIDATE_OR_GOTO("call-stub", loc, out);
- stub = stub_new (frame, 1, GF_FOP_LOOKUP);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.lookup.fn = fn;
-
- loc_copy (&stub->args.lookup.loc, loc);
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_LOOKUP);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.lookup = fn;
+ args_lookup_store(&stub->args, loc, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_lookup_cbk_stub (call_frame_t *frame,
- fop_lookup_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- dict_t *xdata,
- struct iatt *postparent)
+fop_lookup_cbk_stub(call_frame_t *frame, fop_lookup_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ dict_t *xdata, struct iatt *postparent)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 0, GF_FOP_LOOKUP);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.lookup_cbk.fn = fn;
- stub->args.lookup_cbk.op_ret = op_ret;
- stub->args.lookup_cbk.op_errno = op_errno;
- if (inode)
- stub->args.lookup_cbk.inode = inode_ref (inode);
- if (buf)
- stub->args.lookup_cbk.buf = *buf;
- if (postparent)
- stub->args.lookup_cbk.postparent = *postparent;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 0, GF_FOP_LOOKUP);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.lookup = fn;
+ args_lookup_cbk_store(&stub->args_cbk, op_ret, op_errno, inode, buf, xdata,
+ postparent);
out:
- return stub;
+ return stub;
}
-
-
call_stub_t *
-fop_stat_stub (call_frame_t *frame,
- fop_stat_t fn,
- loc_t *loc, dict_t *xdata)
+fop_stat_stub(call_frame_t *frame, fop_stat_t fn, loc_t *loc, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 1, GF_FOP_STAT);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ GF_VALIDATE_OR_GOTO("call-stub", loc, out);
- stub->args.stat.fn = fn;
- loc_copy (&stub->args.stat.loc, loc);
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_STAT);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.stat = fn;
+ args_stat_store(&stub->args, loc, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_stat_cbk_stub (call_frame_t *frame,
- fop_stat_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf, dict_t *xdata)
+fop_stat_cbk_stub(call_frame_t *frame, fop_stat_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 0, GF_FOP_STAT);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.stat_cbk.fn = fn;
- stub->args.stat_cbk.op_ret = op_ret;
- stub->args.stat_cbk.op_errno = op_errno;
- if (op_ret == 0)
- stub->args.stat_cbk.buf = *buf;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 0, GF_FOP_STAT);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.stat = fn;
+ args_stat_cbk_store(&stub->args_cbk, op_ret, op_errno, buf, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_fstat_stub (call_frame_t *frame,
- fop_fstat_t fn,
- fd_t *fd, dict_t *xdata)
+fop_fstat_stub(call_frame_t *frame, fop_fstat_t fn, fd_t *fd, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ stub = stub_new(frame, 1, GF_FOP_FSTAT);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- stub = stub_new (frame, 1, GF_FOP_FSTAT);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub->fn.fstat = fn;
+ args_fstat_store(&stub->args, fd, xdata);
+out:
+ return stub;
+}
- stub->args.fstat.fn = fn;
+call_stub_t *
+fop_fstat_cbk_stub(call_frame_t *frame, fop_fstat_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
- if (fd)
- stub->args.fstat.fd = fd_ref (fd);
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 0, GF_FOP_FSTAT);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.fstat = fn;
+ args_fstat_cbk_store(&stub->args_cbk, op_ret, op_errno, buf, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_fstat_cbk_stub (call_frame_t *frame,
- fop_fstat_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf, dict_t *xdata)
+fop_truncate_stub(call_frame_t *frame, fop_truncate_t fn, loc_t *loc, off_t off,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO("call-stub", loc, out);
- stub = stub_new (frame, 0, GF_FOP_FSTAT);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.fstat_cbk.fn = fn;
- stub->args.fstat_cbk.op_ret = op_ret;
- stub->args.fstat_cbk.op_errno = op_errno;
- if (buf)
- stub->args.fstat_cbk.buf = *buf;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_TRUNCATE);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.truncate = fn;
+ args_truncate_store(&stub->args, loc, off, xdata);
out:
- return stub;
+ return stub;
}
+call_stub_t *
+fop_truncate_cbk_stub(call_frame_t *frame, fop_truncate_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
-/* truncate */
+ stub = stub_new(frame, 0, GF_FOP_TRUNCATE);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+
+ stub->fn_cbk.truncate = fn;
+ args_truncate_cbk_store(&stub->args_cbk, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+out:
+ return stub;
+}
call_stub_t *
-fop_truncate_stub (call_frame_t *frame,
- fop_truncate_t fn,
- loc_t *loc,
- off_t off, dict_t *xdata)
+fop_ftruncate_stub(call_frame_t *frame, fop_ftruncate_t fn, fd_t *fd, off_t off,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 1, GF_FOP_TRUNCATE);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new(frame, 1, GF_FOP_FTRUNCATE);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- stub->args.truncate.fn = fn;
- loc_copy (&stub->args.truncate.loc, loc);
- stub->args.truncate.off = off;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub->fn.ftruncate = fn;
+ args_ftruncate_store(&stub->args, fd, off, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_truncate_cbk_stub (call_frame_t *frame,
- fop_truncate_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
+fop_ftruncate_cbk_stub(call_frame_t *frame, fop_ftruncate_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
struct iatt *postbuf, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 0, GF_FOP_TRUNCATE);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new(frame, 0, GF_FOP_FTRUNCATE);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- stub->args.truncate_cbk.fn = fn;
- stub->args.truncate_cbk.op_ret = op_ret;
- stub->args.truncate_cbk.op_errno = op_errno;
- if (prebuf)
- stub->args.truncate_cbk.prebuf = *prebuf;
- if (postbuf)
- stub->args.truncate_cbk.postbuf = *postbuf;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub->fn_cbk.ftruncate = fn;
+ args_ftruncate_cbk_store(&stub->args_cbk, op_ret, op_errno, prebuf, postbuf,
+ xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_ftruncate_stub (call_frame_t *frame,
- fop_ftruncate_t fn,
- fd_t *fd,
- off_t off, dict_t *xdata)
+fop_access_stub(call_frame_t *frame, fop_access_t fn, loc_t *loc, int32_t mask,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 1, GF_FOP_FTRUNCATE);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ GF_VALIDATE_OR_GOTO("call-stub", loc, out);
- stub->args.ftruncate.fn = fn;
- if (fd)
- stub->args.ftruncate.fd = fd_ref (fd);
-
- stub->args.ftruncate.off = off;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_ACCESS);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.access = fn;
+ args_access_store(&stub->args, loc, mask, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_ftruncate_cbk_stub (call_frame_t *frame,
- fop_ftruncate_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+fop_access_cbk_stub(call_frame_t *frame, fop_access_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 0, GF_FOP_FTRUNCATE);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.ftruncate_cbk.fn = fn;
- stub->args.ftruncate_cbk.op_ret = op_ret;
- stub->args.ftruncate_cbk.op_errno = op_errno;
- if (prebuf)
- stub->args.ftruncate_cbk.prebuf = *prebuf;
- if (postbuf)
- stub->args.ftruncate_cbk.postbuf = *postbuf;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 0, GF_FOP_ACCESS);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.access = fn;
+ args_access_cbk_store(&stub->args_cbk, op_ret, op_errno, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_access_stub (call_frame_t *frame,
- fop_access_t fn,
- loc_t *loc,
- int32_t mask, dict_t *xdata)
+fop_readlink_stub(call_frame_t *frame, fop_readlink_t fn, loc_t *loc,
+ size_t size, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
+ GF_VALIDATE_OR_GOTO("call-stub", loc, out);
- stub = stub_new (frame, 1, GF_FOP_ACCESS);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.access.fn = fn;
- loc_copy (&stub->args.access.loc, loc);
- stub->args.access.mask = mask;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_READLINK);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.readlink = fn;
+ args_readlink_store(&stub->args, loc, size, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_access_cbk_stub (call_frame_t *frame,
- fop_access_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
+fop_readlink_cbk_stub(call_frame_t *frame, fop_readlink_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, const char *path,
+ struct iatt *stbuf, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 0, GF_FOP_ACCESS);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.access_cbk.fn = fn;
- stub->args.access_cbk.op_ret = op_ret;
- stub->args.access_cbk.op_errno = op_errno;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 0, GF_FOP_READLINK);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.readlink = fn;
+ args_readlink_cbk_store(&stub->args_cbk, op_ret, op_errno, path, stbuf,
+ xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_readlink_stub (call_frame_t *frame,
- fop_readlink_t fn,
- loc_t *loc,
- size_t size, dict_t *xdata)
+fop_mknod_stub(call_frame_t *frame, fop_mknod_t fn, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
+ GF_VALIDATE_OR_GOTO("call-stub", loc, out);
- stub = stub_new (frame, 1, GF_FOP_READLINK);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.readlink.fn = fn;
- loc_copy (&stub->args.readlink.loc, loc);
- stub->args.readlink.size = size;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_MKNOD);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.mknod = fn;
+ args_mknod_store(&stub->args, loc, mode, rdev, umask, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_readlink_cbk_stub (call_frame_t *frame,
- fop_readlink_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- const char *path,
- struct iatt *sbuf, dict_t *xdata)
+fop_mknod_cbk_stub(call_frame_t *frame, fop_mknod_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 0, GF_FOP_READLINK);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.readlink_cbk.fn = fn;
- stub->args.readlink_cbk.op_ret = op_ret;
- stub->args.readlink_cbk.op_errno = op_errno;
- if (path)
- stub->args.readlink_cbk.buf = gf_strdup (path);
- if (sbuf)
- stub->args.readlink_cbk.sbuf = *sbuf;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 0, GF_FOP_MKNOD);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.mknod = fn;
+ args_mknod_cbk_store(&stub->args_cbk, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_mknod_stub (call_frame_t *frame, fop_mknod_t fn, loc_t *loc, mode_t mode,
- dev_t rdev, mode_t umask, dict_t *xdata)
+fop_mkdir_stub(call_frame_t *frame, fop_mkdir_t fn, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 1, GF_FOP_MKNOD);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ GF_VALIDATE_OR_GOTO("call-stub", loc, out);
- stub->args.mknod.fn = fn;
- loc_copy (&stub->args.mknod.loc, loc);
- stub->args.mknod.mode = mode;
- stub->args.mknod.rdev = rdev;
- stub->args.mknod.umask = umask;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_MKDIR);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.mkdir = fn;
+ args_mkdir_store(&stub->args, loc, mode, umask, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_mknod_cbk_stub (call_frame_t *frame,
- fop_mknod_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+fop_mkdir_cbk_stub(call_frame_t *frame, fop_mkdir_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
-
- stub = stub_new (frame, 0, GF_FOP_MKNOD);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.mknod_cbk.fn = fn;
- stub->args.mknod_cbk.op_ret = op_ret;
- stub->args.mknod_cbk.op_errno = op_errno;
- if (inode)
- stub->args.mknod_cbk.inode = inode_ref (inode);
- if (buf)
- stub->args.mknod_cbk.buf = *buf;
- if (preparent)
- stub->args.mknod_cbk.preparent = *preparent;
- if (postparent)
- stub->args.mknod_cbk.postparent = *postparent;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ call_stub_t *stub = NULL;
+
+ stub = stub_new(frame, 0, GF_FOP_MKDIR);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.mkdir = fn;
+ args_mkdir_cbk_store(&stub->args_cbk, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_mkdir_stub (call_frame_t *frame, fop_mkdir_t fn,
- loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata)
+fop_unlink_stub(call_frame_t *frame, fop_unlink_t fn, loc_t *loc, int xflag,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 1, GF_FOP_MKDIR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ GF_VALIDATE_OR_GOTO("call-stub", loc, out);
- stub->args.mkdir.fn = fn;
- loc_copy (&stub->args.mkdir.loc, loc);
- stub->args.mkdir.mode = mode;
- stub->args.mkdir.umask = umask;
+ stub = stub_new(frame, 1, GF_FOP_UNLINK);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub->fn.unlink = fn;
+ args_unlink_store(&stub->args, loc, xflag, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_mkdir_cbk_stub (call_frame_t *frame,
- fop_mkdir_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
+fop_unlink_cbk_stub(call_frame_t *frame, fop_unlink_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, struct iatt *preparent,
struct iatt *postparent, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
-
- stub = stub_new (frame, 0, GF_FOP_MKDIR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.mkdir_cbk.fn = fn;
- stub->args.mkdir_cbk.op_ret = op_ret;
- stub->args.mkdir_cbk.op_errno = op_errno;
- if (inode)
- stub->args.mkdir_cbk.inode = inode_ref (inode);
- if (buf)
- stub->args.mkdir_cbk.buf = *buf;
- if (preparent)
- stub->args.mkdir_cbk.preparent = *preparent;
- if (postparent)
- stub->args.mkdir_cbk.postparent = *postparent;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ call_stub_t *stub = NULL;
+
+ stub = stub_new(frame, 0, GF_FOP_UNLINK);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.unlink = fn;
+ args_unlink_cbk_store(&stub->args_cbk, op_ret, op_errno, preparent,
+ postparent, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_unlink_stub (call_frame_t *frame,
- fop_unlink_t fn,
- loc_t *loc, int xflag, dict_t *xdata)
+fop_rmdir_stub(call_frame_t *frame, fop_rmdir_t fn, loc_t *loc, int flags,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
+ GF_VALIDATE_OR_GOTO("call-stub", loc, out);
- stub = stub_new (frame, 1, GF_FOP_UNLINK);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new(frame, 1, GF_FOP_RMDIR);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- stub->args.unlink.fn = fn;
- loc_copy (&stub->args.unlink.loc, loc);
- stub->args.unlink.xflag = xflag;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub->fn.rmdir = fn;
+ args_rmdir_store(&stub->args, loc, flags, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_unlink_cbk_stub (call_frame_t *frame,
- fop_unlink_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+fop_rmdir_cbk_stub(call_frame_t *frame, fop_rmdir_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
-
- stub = stub_new (frame, 0, GF_FOP_UNLINK);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ call_stub_t *stub = NULL;
- stub->args.unlink_cbk.fn = fn;
- stub->args.unlink_cbk.op_ret = op_ret;
- stub->args.unlink_cbk.op_errno = op_errno;
- if (preparent)
- stub->args.unlink_cbk.preparent = *preparent;
- if (postparent)
- stub->args.unlink_cbk.postparent = *postparent;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 0, GF_FOP_RMDIR);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.rmdir = fn;
+ args_rmdir_cbk_store(&stub->args_cbk, op_ret, op_errno, preparent,
+ postparent, xdata);
out:
- return stub;
+ return stub;
}
-
-
call_stub_t *
-fop_rmdir_stub (call_frame_t *frame, fop_rmdir_t fn,
- loc_t *loc, int flags, dict_t *xdata)
+fop_symlink_stub(call_frame_t *frame, fop_symlink_t fn, const char *linkname,
+ loc_t *loc, mode_t umask, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 1, GF_FOP_RMDIR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ GF_VALIDATE_OR_GOTO("call-stub", loc, out);
+ GF_VALIDATE_OR_GOTO("call-stub", linkname, out);
- stub->args.rmdir.fn = fn;
- loc_copy (&stub->args.rmdir.loc, loc);
- stub->args.rmdir.flags = flags;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_SYMLINK);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.symlink = fn;
+ args_symlink_store(&stub->args, linkname, loc, umask, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_rmdir_cbk_stub (call_frame_t *frame,
- fop_rmdir_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+fop_symlink_cbk_stub(call_frame_t *frame, fop_symlink_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
-
- stub = stub_new (frame, 0, GF_FOP_RMDIR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.rmdir_cbk.fn = fn;
- stub->args.rmdir_cbk.op_ret = op_ret;
- stub->args.rmdir_cbk.op_errno = op_errno;
- if (preparent)
- stub->args.rmdir_cbk.preparent = *preparent;
- if (postparent)
- stub->args.rmdir_cbk.postparent = *postparent;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 0, GF_FOP_SYMLINK);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.symlink = fn;
+ args_symlink_cbk_store(&stub->args_cbk, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_symlink_stub (call_frame_t *frame, fop_symlink_t fn,
- const char *linkname, loc_t *loc, mode_t umask, dict_t *xdata)
+fop_rename_stub(call_frame_t *frame, fop_rename_t fn, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
- GF_VALIDATE_OR_GOTO ("call-stub", linkname, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 1, GF_FOP_SYMLINK);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ GF_VALIDATE_OR_GOTO("call-stub", oldloc, out);
+ GF_VALIDATE_OR_GOTO("call-stub", newloc, out);
- stub->args.symlink.fn = fn;
- stub->args.symlink.linkname = gf_strdup (linkname);
- stub->args.symlink.umask = umask;
- loc_copy (&stub->args.symlink.loc, loc);
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_RENAME);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.rename = fn;
+ args_rename_store(&stub->args, oldloc, newloc, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_symlink_cbk_stub (call_frame_t *frame,
- fop_symlink_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+fop_rename_cbk_stub(call_frame_t *frame, fop_rename_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
-
- stub = stub_new (frame, 0, GF_FOP_SYMLINK);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.symlink_cbk.fn = fn;
- stub->args.symlink_cbk.op_ret = op_ret;
- stub->args.symlink_cbk.op_errno = op_errno;
- if (inode)
- stub->args.symlink_cbk.inode = inode_ref (inode);
- if (buf)
- stub->args.symlink_cbk.buf = *buf;
- if (preparent)
- stub->args.symlink_cbk.preparent = *preparent;
- if (postparent)
- stub->args.symlink_cbk.postparent = *postparent;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ call_stub_t *stub = NULL;
+
+ stub = stub_new(frame, 0, GF_FOP_RENAME);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.rename = fn;
+ args_rename_cbk_store(&stub->args_cbk, op_ret, op_errno, buf, preoldparent,
+ postoldparent, prenewparent, postnewparent, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_rename_stub (call_frame_t *frame,
- fop_rename_t fn,
- loc_t *oldloc,
- loc_t *newloc, dict_t *xdata)
+fop_link_stub(call_frame_t *frame, fop_link_t fn, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", oldloc, out);
- GF_VALIDATE_OR_GOTO ("call-stub", newloc, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 1, GF_FOP_RENAME);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ GF_VALIDATE_OR_GOTO("call-stub", oldloc, out);
+ GF_VALIDATE_OR_GOTO("call-stub", newloc, out);
- stub->args.rename.fn = fn;
- loc_copy (&stub->args.rename.old, oldloc);
- loc_copy (&stub->args.rename.new, newloc);
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_LINK);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.link = fn;
+ args_link_store(&stub->args, oldloc, newloc, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_rename_cbk_stub (call_frame_t *frame,
- fop_rename_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf,
- struct iatt *preoldparent,
- struct iatt *postoldparent,
- struct iatt *prenewparent,
- struct iatt *postnewparent, dict_t *xdata)
+fop_link_cbk_stub(call_frame_t *frame, fop_link_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
-
- stub = stub_new (frame, 0, GF_FOP_RENAME);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.rename_cbk.fn = fn;
- stub->args.rename_cbk.op_ret = op_ret;
- stub->args.rename_cbk.op_errno = op_errno;
- if (buf)
- stub->args.rename_cbk.buf = *buf;
- if (preoldparent)
- stub->args.rename_cbk.preoldparent = *preoldparent;
- if (postoldparent)
- stub->args.rename_cbk.postoldparent = *postoldparent;
- if (prenewparent)
- stub->args.rename_cbk.prenewparent = *prenewparent;
- if (postnewparent)
- stub->args.rename_cbk.postnewparent = *postnewparent;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ call_stub_t *stub = NULL;
+
+ stub = stub_new(frame, 0, GF_FOP_LINK);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.link = fn;
+ args_link_cbk_store(&stub->args_cbk, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_link_stub (call_frame_t *frame,
- fop_link_t fn,
- loc_t *oldloc,
- loc_t *newloc, dict_t *xdata)
+fop_create_stub(call_frame_t *frame, fop_create_t fn, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", oldloc, out);
- GF_VALIDATE_OR_GOTO ("call-stub", newloc, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 1, GF_FOP_LINK);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ GF_VALIDATE_OR_GOTO("call-stub", loc, out);
- stub->args.link.fn = fn;
- loc_copy (&stub->args.link.oldloc, oldloc);
- loc_copy (&stub->args.link.newloc, newloc);
-
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_CREATE);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.create = fn;
+ args_create_store(&stub->args, loc, flags, mode, umask, fd, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_link_cbk_stub (call_frame_t *frame,
- fop_link_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+fop_create_cbk_stub(call_frame_t *frame, fop_create_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
-
- stub = stub_new (frame, 0, GF_FOP_LINK);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.link_cbk.fn = fn;
- stub->args.link_cbk.op_ret = op_ret;
- stub->args.link_cbk.op_errno = op_errno;
- if (inode)
- stub->args.link_cbk.inode = inode_ref (inode);
- if (buf)
- stub->args.link_cbk.buf = *buf;
- if (preparent)
- stub->args.link_cbk.preparent = *preparent;
- if (postparent)
- stub->args.link_cbk.postparent = *postparent;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ call_stub_t *stub = NULL;
+
+ stub = stub_new(frame, 0, GF_FOP_CREATE);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.create = fn;
+ args_create_cbk_store(&stub->args_cbk, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_create_stub (call_frame_t *frame, fop_create_t fn,
- loc_t *loc, int32_t flags, mode_t mode,
- mode_t umask, fd_t *fd, dict_t *xdata)
+fop_open_stub(call_frame_t *frame, fop_open_t fn, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 1, GF_FOP_CREATE);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ GF_VALIDATE_OR_GOTO("call-stub", loc, out);
- stub->args.create.fn = fn;
- loc_copy (&stub->args.create.loc, loc);
- stub->args.create.flags = flags;
- stub->args.create.mode = mode;
- stub->args.create.umask = umask;
- if (fd)
- stub->args.create.fd = fd_ref (fd);
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_OPEN);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.open = fn;
+ args_open_store(&stub->args, loc, flags, fd, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_create_cbk_stub (call_frame_t *frame,
- fop_create_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+fop_open_cbk_stub(call_frame_t *frame, fop_open_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
-
- stub = stub_new (frame, 0, GF_FOP_CREATE);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.create_cbk.fn = fn;
- stub->args.create_cbk.op_ret = op_ret;
- stub->args.create_cbk.op_errno = op_errno;
- if (fd)
- stub->args.create_cbk.fd = fd_ref (fd);
- if (inode)
- stub->args.create_cbk.inode = inode_ref (inode);
- if (buf)
- stub->args.create_cbk.buf = *buf;
- if (preparent)
- stub->args.create_cbk.preparent = *preparent;
- if (postparent)
- stub->args.create_cbk.postparent = *postparent;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ call_stub_t *stub = NULL;
+
+ stub = stub_new(frame, 0, GF_FOP_OPEN);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.open = fn;
+ args_open_cbk_store(&stub->args_cbk, op_ret, op_errno, fd, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_open_stub (call_frame_t *frame,
- fop_open_t fn,
- loc_t *loc,
- int32_t flags, fd_t *fd,
- dict_t *xdata)
+fop_readv_stub(call_frame_t *frame, fop_readv_t fn, fd_t *fd, size_t size,
+ off_t off, uint32_t flags, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
-
- stub = stub_new (frame, 1, GF_FOP_OPEN);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ call_stub_t *stub = NULL;
- stub->args.open.fn = fn;
- loc_copy (&stub->args.open.loc, loc);
- stub->args.open.flags = flags;
- if (fd)
- stub->args.open.fd = fd_ref (fd);
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_READ);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.readv = fn;
+ args_readv_store(&stub->args, fd, size, off, flags, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_open_cbk_stub (call_frame_t *frame,
- fop_open_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd, dict_t *xdata)
+fop_readv_cbk_stub(call_frame_t *frame, fop_readv_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, struct iovec *vector, int32_t count,
+ struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 0, GF_FOP_OPEN);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.open_cbk.fn = fn;
- stub->args.open_cbk.op_ret = op_ret;
- stub->args.open_cbk.op_errno = op_errno;
- if (fd)
- stub->args.open_cbk.fd = fd_ref (fd);
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 0, GF_FOP_READ);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.readv = fn;
+ args_readv_cbk_store(&stub->args_cbk, op_ret, op_errno, vector, count,
+ stbuf, iobref, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_readv_stub (call_frame_t *frame,
- fop_readv_t fn,
- fd_t *fd,
- size_t size,
- off_t off, uint32_t flags, dict_t *xdata)
+fop_writev_stub(call_frame_t *frame, fop_writev_t fn, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t off, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO("call-stub", vector, out);
- stub = stub_new (frame, 1, GF_FOP_READ);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.readv.fn = fn;
- if (fd)
- stub->args.readv.fd = fd_ref (fd);
- stub->args.readv.size = size;
- stub->args.readv.off = off;
- stub->args.readv.flags = flags;
-
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_WRITE);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.writev = fn;
+ args_writev_store(&stub->args, fd, vector, count, off, flags, iobref,
+ xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_readv_cbk_stub (call_frame_t *frame,
- fop_readv_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iovec *vector,
- int32_t count,
- struct iatt *stbuf,
- struct iobref *iobref, dict_t *xdata)
+fop_writev_cbk_stub(call_frame_t *frame, fop_writev_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
-
- stub = stub_new (frame, 0, GF_FOP_READ);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ call_stub_t *stub = NULL;
- stub->args.readv_cbk.fn = fn;
- stub->args.readv_cbk.op_ret = op_ret;
- stub->args.readv_cbk.op_errno = op_errno;
- if (op_ret >= 0) {
- stub->args.readv_cbk.vector = iov_dup (vector, count);
- stub->args.readv_cbk.count = count;
- stub->args.readv_cbk.stbuf = *stbuf;
- stub->args.readv_cbk.iobref = iobref_ref (iobref);
- }
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 0, GF_FOP_WRITE);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.writev = fn;
+ args_writev_cbk_store(&stub->args_cbk, op_ret, op_errno, prebuf, postbuf,
+ xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_writev_stub (call_frame_t *frame,
- fop_writev_t fn,
- fd_t *fd,
- struct iovec *vector,
- int32_t count,
- off_t off, uint32_t flags,
- struct iobref *iobref, dict_t *xdata)
+fop_flush_stub(call_frame_t *frame, fop_flush_t fn, fd_t *fd, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", vector, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 1, GF_FOP_WRITE);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.writev.fn = fn;
- if (fd)
- stub->args.writev.fd = fd_ref (fd);
- stub->args.writev.vector = iov_dup (vector, count);
- stub->args.writev.count = count;
- stub->args.writev.off = off;
- stub->args.writev.flags = flags;
- stub->args.writev.iobref = iobref_ref (iobref);
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_FLUSH);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.flush = fn;
+ args_flush_store(&stub->args, fd, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_writev_cbk_stub (call_frame_t *frame,
- fop_writev_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+fop_flush_cbk_stub(call_frame_t *frame, fop_flush_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
-
- stub = stub_new (frame, 0, GF_FOP_WRITE);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.writev_cbk.fn = fn;
- stub->args.writev_cbk.op_ret = op_ret;
- stub->args.writev_cbk.op_errno = op_errno;
- if (op_ret >= 0)
- stub->args.writev_cbk.postbuf = *postbuf;
- if (prebuf)
- stub->args.writev_cbk.prebuf = *prebuf;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 0, GF_FOP_FLUSH);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.flush = fn;
+ args_flush_cbk_store(&stub->args_cbk, op_ret, op_errno, xdata);
out:
- return stub;
+ return stub;
}
-
-
call_stub_t *
-fop_flush_stub (call_frame_t *frame,
- fop_flush_t fn,
- fd_t *fd, dict_t *xdata)
+fop_fsync_stub(call_frame_t *frame, fop_fsync_t fn, fd_t *fd, int32_t datasync,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ stub = stub_new(frame, 1, GF_FOP_FSYNC);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- stub = stub_new (frame, 1, GF_FOP_FLUSH);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub->fn.fsync = fn;
+ args_fsync_store(&stub->args, fd, datasync, xdata);
+out:
+ return stub;
+}
- stub->args.flush.fn = fn;
- if (fd)
- stub->args.flush.fd = fd_ref (fd);
- if (xdata)
- stub->xdata = dict_ref (xdata);
+call_stub_t *
+fop_fsync_cbk_stub(call_frame_t *frame, fop_fsync_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ stub = stub_new(frame, 0, GF_FOP_FSYNC);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+
+ stub->fn_cbk.fsync = fn;
+ args_fsync_cbk_store(&stub->args_cbk, op_ret, op_errno, prebuf, postbuf,
+ xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_flush_cbk_stub (call_frame_t *frame,
- fop_flush_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
+fop_opendir_stub(call_frame_t *frame, fop_opendir_t fn, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO("call-stub", loc, out);
- stub = stub_new (frame, 0, GF_FOP_FLUSH);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.flush_cbk.fn = fn;
- stub->args.flush_cbk.op_ret = op_ret;
- stub->args.flush_cbk.op_errno = op_errno;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_OPENDIR);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.opendir = fn;
+ args_opendir_store(&stub->args, loc, fd, xdata);
out:
- return stub;
+ return stub;
}
+call_stub_t *
+fop_opendir_cbk_stub(call_frame_t *frame, fop_opendir_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ stub = stub_new(frame, 0, GF_FOP_OPENDIR);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.opendir = fn;
+ args_opendir_cbk_store(&stub->args_cbk, op_ret, op_errno, fd, xdata);
+out:
+ return stub;
+}
call_stub_t *
-fop_fsync_stub (call_frame_t *frame,
- fop_fsync_t fn,
- fd_t *fd,
- int32_t datasync, dict_t *xdata)
+fop_fsyncdir_stub(call_frame_t *frame, fop_fsyncdir_t fn, fd_t *fd,
+ int32_t datasync, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 1, GF_FOP_FSYNC);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.fsync.fn = fn;
- if (fd)
- stub->args.fsync.fd = fd_ref (fd);
- stub->args.fsync.datasync = datasync;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_FSYNCDIR);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.fsyncdir = fn;
+ args_fsyncdir_store(&stub->args, fd, datasync, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_fsync_cbk_stub (call_frame_t *frame,
- fop_fsync_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+fop_fsyncdir_cbk_stub(call_frame_t *frame, fop_fsyncdir_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 0, GF_FOP_FSYNC);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.fsync_cbk.fn = fn;
- stub->args.fsync_cbk.op_ret = op_ret;
- stub->args.fsync_cbk.op_errno = op_errno;
- if (prebuf)
- stub->args.fsync_cbk.prebuf = *prebuf;
- if (postbuf)
- stub->args.fsync_cbk.postbuf = *postbuf;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 0, GF_FOP_FSYNCDIR);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.fsyncdir = fn;
+ args_fsyncdir_cbk_store(&stub->args_cbk, op_ret, op_errno, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_opendir_stub (call_frame_t *frame,
- fop_opendir_t fn,
- loc_t *loc, fd_t *fd, dict_t *xdata)
+fop_statfs_stub(call_frame_t *frame, fop_statfs_t fn, loc_t *loc, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 1, GF_FOP_OPENDIR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ GF_VALIDATE_OR_GOTO("call-stub", loc, out);
- stub->args.opendir.fn = fn;
- loc_copy (&stub->args.opendir.loc, loc);
- if (fd)
- stub->args.opendir.fd = fd_ref (fd);
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_STATFS);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.statfs = fn;
+ args_statfs_store(&stub->args, loc, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_opendir_cbk_stub (call_frame_t *frame,
- fop_opendir_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd, dict_t *xdata)
+fop_statfs_cbk_stub(call_frame_t *frame, fop_statfs_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, struct statvfs *buf, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 0, GF_FOP_OPENDIR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.opendir_cbk.fn = fn;
- stub->args.opendir_cbk.op_ret = op_ret;
- stub->args.opendir_cbk.op_errno = op_errno;
-
- if (fd)
- stub->args.opendir_cbk.fd = fd_ref (fd);
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 0, GF_FOP_STATFS);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.statfs = fn;
+ args_statfs_cbk_store(&stub->args_cbk, op_ret, op_errno, buf, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_fsyncdir_stub (call_frame_t *frame,
- fop_fsyncdir_t fn,
- fd_t *fd,
- int32_t datasync, dict_t *xdata)
+fop_setxattr_stub(call_frame_t *frame, fop_setxattr_t fn, loc_t *loc,
+ dict_t *dict, int32_t flags, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 1, GF_FOP_FSYNCDIR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ GF_VALIDATE_OR_GOTO("call-stub", loc, out);
- stub->args.fsyncdir.fn = fn;
- if (fd)
- stub->args.fsyncdir.fd = fd_ref (fd);
- stub->args.fsyncdir.datasync = datasync;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_SETXATTR);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.setxattr = fn;
+ args_setxattr_store(&stub->args, loc, dict, flags, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_fsyncdir_cbk_stub (call_frame_t *frame,
- fop_fsyncdir_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
+fop_setxattr_cbk_stub(call_frame_t *frame, fop_setxattr_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
-
- stub = stub_new (frame, 0, GF_FOP_FSYNCDIR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ call_stub_t *stub = NULL;
- stub->args.fsyncdir_cbk.fn = fn;
- stub->args.fsyncdir_cbk.op_ret = op_ret;
- stub->args.fsyncdir_cbk.op_errno = op_errno;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 0, GF_FOP_SETXATTR);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.setxattr = fn;
+ args_setxattr_cbk_store(&stub->args_cbk, op_ret, op_errno, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_statfs_stub (call_frame_t *frame,
- fop_statfs_t fn,
- loc_t *loc, dict_t *xdata)
+fop_getxattr_stub(call_frame_t *frame, fop_getxattr_t fn, loc_t *loc,
+ const char *name, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 1, GF_FOP_STATFS);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ GF_VALIDATE_OR_GOTO("call-stub", loc, out);
- stub->args.statfs.fn = fn;
- loc_copy (&stub->args.statfs.loc, loc);
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_GETXATTR);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.getxattr = fn;
+ args_getxattr_store(&stub->args, loc, name, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_statfs_cbk_stub (call_frame_t *frame,
- fop_statfs_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct statvfs *buf, dict_t *xdata)
+fop_getxattr_cbk_stub(call_frame_t *frame, fop_getxattr_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 0, GF_FOP_STATFS);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.statfs_cbk.fn = fn;
- stub->args.statfs_cbk.op_ret = op_ret;
- stub->args.statfs_cbk.op_errno = op_errno;
- if (op_ret == 0)
- stub->args.statfs_cbk.buf = *buf;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 0, GF_FOP_GETXATTR);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.getxattr = fn;
+ args_getxattr_cbk_store(&stub->args_cbk, op_ret, op_errno, dict, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_setxattr_stub (call_frame_t *frame,
- fop_setxattr_t fn,
- loc_t *loc,
- dict_t *dict,
- int32_t flags, dict_t *xdata)
+fop_fsetxattr_stub(call_frame_t *frame, fop_fsetxattr_t fn, fd_t *fd,
+ dict_t *dict, int32_t flags, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 1, GF_FOP_SETXATTR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ GF_VALIDATE_OR_GOTO("call-stub", fd, out);
- stub->args.setxattr.fn = fn;
- loc_copy (&stub->args.setxattr.loc, loc);
- /* TODO */
- if (dict)
- stub->args.setxattr.dict = dict_ref (dict);
- stub->args.setxattr.flags = flags;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_FSETXATTR);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.fsetxattr = fn;
+ args_fsetxattr_store(&stub->args, fd, dict, flags, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_setxattr_cbk_stub (call_frame_t *frame,
- fop_setxattr_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
+fop_fsetxattr_cbk_stub(call_frame_t *frame, fop_fsetxattr_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
-
- stub = stub_new (frame, 0, GF_FOP_SETXATTR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.setxattr_cbk.fn = fn;
- stub->args.setxattr_cbk.op_ret = op_ret;
- stub->args.setxattr_cbk.op_errno = op_errno;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 0, GF_FOP_FSETXATTR);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.fsetxattr = fn;
+ args_fsetxattr_cbk_store(&stub->args_cbk, op_ret, op_errno, xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_getxattr_stub (call_frame_t *frame,
- fop_getxattr_t fn,
- loc_t *loc,
+fop_fgetxattr_stub(call_frame_t *frame, fop_fgetxattr_t fn, fd_t *fd,
const char *name, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
-
- stub = stub_new (frame, 1, GF_FOP_GETXATTR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ call_stub_t *stub = NULL;
- stub->args.getxattr.fn = fn;
- loc_copy (&stub->args.getxattr.loc, loc);
+ GF_VALIDATE_OR_GOTO("call-stub", fd, out);
- if (name)
- stub->args.getxattr.name = gf_strdup (name);
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_FGETXATTR);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.fgetxattr = fn;
+ args_fgetxattr_store(&stub->args, fd, name, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_getxattr_cbk_stub (call_frame_t *frame,
- fop_getxattr_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict, dict_t *xdata)
+fop_fgetxattr_cbk_stub(call_frame_t *frame, fop_fgetxattr_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 0, GF_FOP_GETXATTR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.getxattr_cbk.fn = fn;
- stub->args.getxattr_cbk.op_ret = op_ret;
- stub->args.getxattr_cbk.op_errno = op_errno;
- /* TODO */
- if (dict)
- stub->args.getxattr_cbk.dict = dict_ref (dict);
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 0, GF_FOP_GETXATTR);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.fgetxattr = fn;
+ args_fgetxattr_cbk_store(&stub->args_cbk, op_ret, op_errno, dict, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_fsetxattr_stub (call_frame_t *frame,
- fop_fsetxattr_t fn,
- fd_t *fd,
- dict_t *dict,
- int32_t flags, dict_t *xdata)
+fop_removexattr_stub(call_frame_t *frame, fop_removexattr_t fn, loc_t *loc,
+ const char *name, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", fd, out);
+ GF_VALIDATE_OR_GOTO("call-stub", loc, out);
+ GF_VALIDATE_OR_GOTO("call-stub", name, out);
- stub = stub_new (frame, 1, GF_FOP_FSETXATTR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.fsetxattr.fn = fn;
- stub->args.fsetxattr.fd = fd_ref (fd);
-
- /* TODO */
- if (dict)
- stub->args.fsetxattr.dict = dict_ref (dict);
- stub->args.fsetxattr.flags = flags;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_REMOVEXATTR);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.removexattr = fn;
+ args_removexattr_store(&stub->args, loc, name, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_fsetxattr_cbk_stub (call_frame_t *frame,
- fop_fsetxattr_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
+fop_removexattr_cbk_stub(call_frame_t *frame, fop_removexattr_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 0, GF_FOP_FSETXATTR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.fsetxattr_cbk.fn = fn;
- stub->args.fsetxattr_cbk.op_ret = op_ret;
- stub->args.fsetxattr_cbk.op_errno = op_errno;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 0, GF_FOP_REMOVEXATTR);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.removexattr = fn;
+ args_removexattr_cbk_store(&stub->args_cbk, op_ret, op_errno, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_fgetxattr_stub (call_frame_t *frame,
- fop_fgetxattr_t fn,
- fd_t *fd,
- const char *name, dict_t *xdata)
+fop_fremovexattr_stub(call_frame_t *frame, fop_fremovexattr_t fn, fd_t *fd,
+ const char *name, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", fd, out);
-
- stub = stub_new (frame, 1, GF_FOP_FGETXATTR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ call_stub_t *stub = NULL;
- stub->args.fgetxattr.fn = fn;
- stub->args.fgetxattr.fd = fd_ref (fd);
+ GF_VALIDATE_OR_GOTO("call-stub", fd, out);
+ GF_VALIDATE_OR_GOTO("call-stub", name, out);
- if (name)
- stub->args.fgetxattr.name = gf_strdup (name);
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_FREMOVEXATTR);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.fremovexattr = fn;
+ args_fremovexattr_store(&stub->args, fd, name, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_fgetxattr_cbk_stub (call_frame_t *frame,
- fop_fgetxattr_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict, dict_t *xdata)
+fop_fremovexattr_cbk_stub(call_frame_t *frame, fop_fremovexattr_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
-
- stub = stub_new (frame, 0, GF_FOP_GETXATTR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.fgetxattr_cbk.fn = fn;
- stub->args.fgetxattr_cbk.op_ret = op_ret;
- stub->args.fgetxattr_cbk.op_errno = op_errno;
+ call_stub_t *stub = NULL;
- /* TODO */
- if (dict)
- stub->args.fgetxattr_cbk.dict = dict_ref (dict);
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 0, GF_FOP_FREMOVEXATTR);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.fremovexattr = fn;
+ args_fremovexattr_cbk_store(&stub->args_cbk, op_ret, op_errno, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_removexattr_stub (call_frame_t *frame,
- fop_removexattr_t fn,
- loc_t *loc,
- const char *name, dict_t *xdata)
+fop_lk_stub(call_frame_t *frame, fop_lk_t fn, fd_t *fd, int32_t cmd,
+ struct gf_flock *lock, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
- GF_VALIDATE_OR_GOTO ("call-stub", name, out);
+ GF_VALIDATE_OR_GOTO("call-stub", lock, out);
- stub = stub_new (frame, 1, GF_FOP_REMOVEXATTR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.removexattr.fn = fn;
- loc_copy (&stub->args.removexattr.loc, loc);
- stub->args.removexattr.name = gf_strdup (name);
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_LK);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.lk = fn;
+ args_lk_store(&stub->args, fd, cmd, lock, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_removexattr_cbk_stub (call_frame_t *frame,
- fop_removexattr_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
+fop_lk_cbk_stub(call_frame_t *frame, fop_lk_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
-
- stub = stub_new (frame, 0, GF_FOP_REMOVEXATTR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.removexattr_cbk.fn = fn;
- stub->args.removexattr_cbk.op_ret = op_ret;
- stub->args.removexattr_cbk.op_errno = op_errno;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 0, GF_FOP_LK);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.lk = fn;
+ args_lk_cbk_store(&stub->args_cbk, op_ret, op_errno, lock, xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_fremovexattr_stub (call_frame_t *frame,
- fop_fremovexattr_t fn,
- fd_t *fd,
- const char *name, dict_t *xdata)
+fop_inodelk_stub(call_frame_t *frame, fop_inodelk_t fn, const char *volume,
+ loc_t *loc, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", fd, out);
- GF_VALIDATE_OR_GOTO ("call-stub", name, out);
+ GF_VALIDATE_OR_GOTO("call-stub", lock, out);
- stub = stub_new (frame, 1, GF_FOP_FREMOVEXATTR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.fremovexattr.fn = fn;
- stub->args.fremovexattr.fd = fd_ref (fd);
- stub->args.fremovexattr.name = gf_strdup (name);
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_INODELK);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.inodelk = fn;
+ args_inodelk_store(&stub->args, volume, loc, cmd, lock, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_fremovexattr_cbk_stub (call_frame_t *frame,
- fop_fremovexattr_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
+fop_inodelk_cbk_stub(call_frame_t *frame, fop_inodelk_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 0, GF_FOP_FREMOVEXATTR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.fremovexattr_cbk.fn = fn;
- stub->args.fremovexattr_cbk.op_ret = op_ret;
- stub->args.fremovexattr_cbk.op_errno = op_errno;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 0, GF_FOP_INODELK);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.inodelk = fn;
+ args_inodelk_cbk_store(&stub->args_cbk, op_ret, op_errno, xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_lk_stub (call_frame_t *frame,
- fop_lk_t fn,
- fd_t *fd,
- int32_t cmd,
- struct gf_flock *lock, dict_t *xdata)
+fop_finodelk_stub(call_frame_t *frame, fop_finodelk_t fn, const char *volume,
+ fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", lock, out);
+ GF_VALIDATE_OR_GOTO("call-stub", lock, out);
- stub = stub_new (frame, 1, GF_FOP_LK);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new(frame, 1, GF_FOP_FINODELK);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- stub->args.lk.fn = fn;
- if (fd)
- stub->args.lk.fd = fd_ref (fd);
- stub->args.lk.cmd = cmd;
- stub->args.lk.lock = *lock;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub->fn.finodelk = fn;
+ args_finodelk_store(&stub->args, volume, fd, cmd, lock, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_lk_cbk_stub (call_frame_t *frame,
- fop_lk_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct gf_flock *lock, dict_t *xdata)
+fop_finodelk_cbk_stub(call_frame_t *frame, fop_inodelk_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 0, GF_FOP_LK);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.lk_cbk.fn = fn;
- stub->args.lk_cbk.op_ret = op_ret;
- stub->args.lk_cbk.op_errno = op_errno;
- if (op_ret == 0)
- stub->args.lk_cbk.lock = *lock;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 0, GF_FOP_FINODELK);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.finodelk = fn;
+ args_finodelk_cbk_store(&stub->args_cbk, op_ret, op_errno, xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_inodelk_stub (call_frame_t *frame, fop_inodelk_t fn,
- const char *volume, loc_t *loc, int32_t cmd,
- struct gf_flock *lock, dict_t *xdata)
+fop_entrylk_stub(call_frame_t *frame, fop_entrylk_t fn, const char *volume,
+ loc_t *loc, const char *name, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", lock, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 1, GF_FOP_INODELK);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new(frame, 1, GF_FOP_ENTRYLK);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- stub->args.inodelk.fn = fn;
-
- if (volume)
- stub->args.inodelk.volume = gf_strdup (volume);
-
- loc_copy (&stub->args.inodelk.loc, loc);
- stub->args.inodelk.cmd = cmd;
- stub->args.inodelk.lock = *lock;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub->fn.entrylk = fn;
+ args_entrylk_store(&stub->args, volume, loc, name, cmd, type, xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_inodelk_cbk_stub (call_frame_t *frame, fop_inodelk_cbk_t fn,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+fop_entrylk_cbk_stub(call_frame_t *frame, fop_entrylk_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ stub = stub_new(frame, 0, GF_FOP_ENTRYLK);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- stub = stub_new (frame, 0, GF_FOP_INODELK);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub->fn_cbk.entrylk = fn;
+ args_entrylk_cbk_store(&stub->args_cbk, op_ret, op_errno, xdata);
+out:
+ return stub;
+}
- stub->args.inodelk_cbk.fn = fn;
- stub->args.inodelk_cbk.op_ret = op_ret;
- stub->args.inodelk_cbk.op_errno = op_errno;
+call_stub_t *
+fop_fentrylk_stub(call_frame_t *frame, fop_fentrylk_t fn, const char *volume,
+ fd_t *fd, const char *name, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_FENTRYLK);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.fentrylk = fn;
+ args_fentrylk_store(&stub->args, volume, fd, name, cmd, type, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_finodelk_stub (call_frame_t *frame, fop_finodelk_t fn,
- const char *volume, fd_t *fd, int32_t cmd,
- struct gf_flock *lock, dict_t *xdata)
+fop_fentrylk_cbk_stub(call_frame_t *frame, fop_fentrylk_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", lock, out);
+ stub = stub_new(frame, 0, GF_FOP_FENTRYLK);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- stub = stub_new (frame, 1, GF_FOP_FINODELK);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub->fn_cbk.fentrylk = fn;
+ args_fentrylk_cbk_store(&stub->args_cbk, op_ret, op_errno, xdata);
+out:
+ return stub;
+}
- stub->args.finodelk.fn = fn;
+call_stub_t *
+fop_readdirp_cbk_stub(call_frame_t *frame, fop_readdirp_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
- if (fd)
- stub->args.finodelk.fd = fd_ref (fd);
+ stub = stub_new(frame, 0, GF_FOP_READDIRP);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- if (volume)
- stub->args.finodelk.volume = gf_strdup (volume);
+ stub->fn_cbk.readdirp = fn;
+ args_readdirp_cbk_store(&stub->args_cbk, op_ret, op_errno, entries, xdata);
+out:
+ return stub;
+}
- stub->args.finodelk.cmd = cmd;
- stub->args.finodelk.lock = *lock;
+call_stub_t *
+fop_readdir_cbk_stub(call_frame_t *frame, fop_readdir_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, gf_dirent_t *entries, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 0, GF_FOP_READDIR);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.readdir = fn;
+ args_readdir_cbk_store(&stub->args_cbk, op_ret, op_errno, entries, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_finodelk_cbk_stub (call_frame_t *frame, fop_inodelk_cbk_t fn,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+fop_readdir_stub(call_frame_t *frame, fop_readdir_t fn, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ stub = stub_new(frame, 1, GF_FOP_READDIR);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- stub = stub_new (frame, 0, GF_FOP_FINODELK);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub->fn.readdir = fn;
+ args_readdir_store(&stub->args, fd, size, off, xdata);
+out:
+ return stub;
+}
- stub->args.finodelk_cbk.fn = fn;
- stub->args.finodelk_cbk.op_ret = op_ret;
- stub->args.finodelk_cbk.op_errno = op_errno;
+call_stub_t *
+fop_readdirp_stub(call_frame_t *frame, fop_readdirp_t fn, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_READDIRP);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.readdirp = fn;
+ args_readdirp_store(&stub->args, fd, size, off, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_entrylk_stub (call_frame_t *frame, fop_entrylk_t fn,
- const char *volume, loc_t *loc, const char *name,
- entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
+fop_rchecksum_stub(call_frame_t *frame, fop_rchecksum_t fn, fd_t *fd,
+ off_t offset, int32_t len, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
-
- stub = stub_new (frame, 1, GF_FOP_ENTRYLK);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ call_stub_t *stub = NULL;
- stub->args.entrylk.fn = fn;
+ GF_VALIDATE_OR_GOTO("call-stub", fd, out);
- if (volume)
- stub->args.entrylk.volume = gf_strdup (volume);
+ stub = stub_new(frame, 1, GF_FOP_RCHECKSUM);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- loc_copy (&stub->args.entrylk.loc, loc);
+ stub->fn.rchecksum = fn;
+ args_rchecksum_store(&stub->args, fd, offset, len, xdata);
+out:
+ return stub;
+}
- stub->args.entrylk.cmd = cmd;
- stub->args.entrylk.type = type;
- if (name)
- stub->args.entrylk.name = gf_strdup (name);
+call_stub_t *
+fop_rchecksum_cbk_stub(call_frame_t *frame, fop_rchecksum_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, uint32_t weak_checksum,
+ uint8_t *strong_checksum, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 0, GF_FOP_RCHECKSUM);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.rchecksum = fn;
+ args_rchecksum_cbk_store(&stub->args_cbk, op_ret, op_errno, weak_checksum,
+ strong_checksum, xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_entrylk_cbk_stub (call_frame_t *frame, fop_entrylk_cbk_t fn,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+fop_xattrop_cbk_stub(call_frame_t *frame, fop_xattrop_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, dict_t *xattr, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ stub = stub_new(frame, 0, GF_FOP_XATTROP);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- stub = stub_new (frame, 0, GF_FOP_ENTRYLK);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub->fn_cbk.xattrop = fn;
+ args_xattrop_cbk_store(&stub->args_cbk, op_ret, op_errno, xattr, xdata);
+out:
+ return stub;
+}
- stub->args.entrylk_cbk.fn = fn;
- stub->args.entrylk_cbk.op_ret = op_ret;
- stub->args.entrylk_cbk.op_errno = op_errno;
+call_stub_t *
+fop_fxattrop_cbk_stub(call_frame_t *frame, fop_fxattrop_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr,
+ dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 0, GF_FOP_FXATTROP);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.fxattrop = fn;
+ args_xattrop_cbk_store(&stub->args_cbk, op_ret, op_errno, xattr, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_fentrylk_stub (call_frame_t *frame, fop_fentrylk_t fn,
- const char *volume, fd_t *fd, const char *name,
- entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
+fop_xattrop_stub(call_frame_t *frame, fop_xattrop_t fn, loc_t *loc,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO("call-stub", xattr, out);
- stub = stub_new (frame, 1, GF_FOP_FENTRYLK);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new(frame, 1, GF_FOP_XATTROP);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- stub->args.fentrylk.fn = fn;
+ stub->fn.xattrop = fn;
+ args_xattrop_store(&stub->args, loc, optype, xattr, xdata);
+out:
+ return stub;
+}
- if (volume)
- stub->args.fentrylk.volume = gf_strdup (volume);
+call_stub_t *
+fop_fxattrop_stub(call_frame_t *frame, fop_fxattrop_t fn, fd_t *fd,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
- if (fd)
- stub->args.fentrylk.fd = fd_ref (fd);
- stub->args.fentrylk.cmd = cmd;
- stub->args.fentrylk.type = type;
- if (name)
- stub->args.fentrylk.name = gf_strdup (name);
+ GF_VALIDATE_OR_GOTO("call-stub", xattr, out);
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_FXATTROP);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.fxattrop = fn;
+ args_fxattrop_store(&stub->args, fd, optype, xattr, xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_fentrylk_cbk_stub (call_frame_t *frame, fop_fentrylk_cbk_t fn,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+fop_setattr_cbk_stub(call_frame_t *frame, fop_setattr_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ stub = stub_new(frame, 0, GF_FOP_SETATTR);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- stub = stub_new (frame, 0, GF_FOP_FENTRYLK);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub->fn_cbk.setattr = fn;
+ args_setattr_cbk_store(&stub->args_cbk, op_ret, op_errno, statpre, statpost,
+ xdata);
+out:
+ return stub;
+}
- stub->args.fentrylk_cbk.fn = fn;
- stub->args.fentrylk_cbk.op_ret = op_ret;
- stub->args.fentrylk_cbk.op_errno = op_errno;
+call_stub_t *
+fop_fsetattr_cbk_stub(call_frame_t *frame, fop_setattr_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 0, GF_FOP_FSETATTR);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.fsetattr = fn;
+ args_fsetattr_cbk_store(&stub->args_cbk, op_ret, op_errno, statpre,
+ statpost, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_readdirp_cbk_stub (call_frame_t *frame,
- fop_readdirp_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- gf_dirent_t *entries, dict_t *xdata)
+fop_setattr_stub(call_frame_t *frame, fop_setattr_t fn, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- call_stub_t *stub = NULL;
- gf_dirent_t *stub_entry = NULL, *entry = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
-
- stub = stub_new (frame, 0, GF_FOP_READDIRP);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.readdirp_cbk.fn = fn;
- stub->args.readdirp_cbk.op_ret = op_ret;
- stub->args.readdirp_cbk.op_errno = op_errno;
- INIT_LIST_HEAD (&stub->args.readdirp_cbk.entries.list);
-
- /* This check must come after the init of head above
- * so we're sure the list is empty for list_empty.
- */
- GF_VALIDATE_OR_GOTO ("call-stub", entries, out);
-
- if (op_ret > 0) {
- list_for_each_entry (entry, &entries->list, list) {
- stub_entry = gf_dirent_for_name (entry->d_name);
- if (!stub_entry)
- goto out;
- stub_entry->d_off = entry->d_off;
- stub_entry->d_ino = entry->d_ino;
- stub_entry->d_stat = entry->d_stat;
- list_add_tail (&stub_entry->list,
- &stub->args.readdirp_cbk.entries.list);
- }
- }
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ call_stub_t *stub = NULL;
+
+ GF_VALIDATE_OR_GOTO("call-stub", fn, out);
+
+ stub = stub_new(frame, 1, GF_FOP_SETATTR);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.setattr = fn;
+ args_setattr_store(&stub->args, loc, stbuf, valid, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_readdir_cbk_stub (call_frame_t *frame,
- fop_readdir_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- gf_dirent_t *entries, dict_t *xdata)
+fop_fsetattr_stub(call_frame_t *frame, fop_fsetattr_t fn, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- call_stub_t *stub = NULL;
- gf_dirent_t *stub_entry = NULL, *entry = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
-
- stub = stub_new (frame, 0, GF_FOP_READDIR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.readdir_cbk.fn = fn;
- stub->args.readdir_cbk.op_ret = op_ret;
- stub->args.readdir_cbk.op_errno = op_errno;
- INIT_LIST_HEAD (&stub->args.readdir_cbk.entries.list);
-
- /* This check must come after the init of head above
- * so we're sure the list is empty for list_empty.
- */
- GF_VALIDATE_OR_GOTO ("call-stub", entries, out);
-
- if (op_ret > 0) {
- list_for_each_entry (entry, &entries->list, list) {
- stub_entry = gf_dirent_for_name (entry->d_name);
- if (!stub_entry)
- goto out;
- stub_entry->d_off = entry->d_off;
- stub_entry->d_ino = entry->d_ino;
-
- list_add_tail (&stub_entry->list,
- &stub->args.readdir_cbk.entries.list);
- }
- }
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ call_stub_t *stub = NULL;
+
+ GF_VALIDATE_OR_GOTO("call-stub", fn, out);
+ stub = stub_new(frame, 1, GF_FOP_FSETATTR);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+
+ stub->fn.fsetattr = fn;
+ args_fsetattr_store(&stub->args, fd, stbuf, valid, xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_readdir_stub (call_frame_t *frame,
- fop_readdir_t fn,
- fd_t *fd,
- size_t size,
- off_t off, dict_t *xdata)
+fop_fallocate_cbk_stub(call_frame_t *frame, fop_fallocate_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- stub = stub_new (frame, 1, GF_FOP_READDIR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ call_stub_t *stub = NULL;
- stub->args.readdir.fn = fn;
- stub->args.readdir.fd = fd_ref (fd);
- stub->args.readdir.size = size;
- stub->args.readdir.off = off;
+ stub = stub_new(frame, 0, GF_FOP_FALLOCATE);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub->fn_cbk.fallocate = fn;
+ args_fallocate_cbk_store(&stub->args_cbk, op_ret, op_errno, statpre,
+ statpost, xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_readdirp_stub (call_frame_t *frame,
- fop_readdirp_t fn,
- fd_t *fd,
- size_t size,
- off_t off,
- dict_t *xdata)
+fop_fallocate_stub(call_frame_t *frame, fop_fallocate_t fn, fd_t *fd,
+ int32_t mode, off_t offset, size_t len, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 1, GF_FOP_READDIRP);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ GF_VALIDATE_OR_GOTO("call-stub", fn, out);
- stub->args.readdirp.fn = fn;
- stub->args.readdirp.fd = fd_ref (fd);
- stub->args.readdirp.size = size;
- stub->args.readdirp.off = off;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_FALLOCATE);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.fallocate = fn;
+ args_fallocate_store(&stub->args, fd, mode, offset, len, xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_rchecksum_stub (call_frame_t *frame,
- fop_rchecksum_t fn,
- fd_t *fd, off_t offset,
- int32_t len, dict_t *xdata)
+fop_discard_cbk_stub(call_frame_t *frame, fop_discard_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", fd, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 1, GF_FOP_RCHECKSUM);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new(frame, 0, GF_FOP_DISCARD);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- stub->args.rchecksum.fn = fn;
- stub->args.rchecksum.fd = fd_ref (fd);
- stub->args.rchecksum.offset = offset;
- stub->args.rchecksum.len = len;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub->fn_cbk.discard = fn;
+ args_discard_cbk_store(&stub->args_cbk, op_ret, op_errno, statpre, statpost,
+ xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_rchecksum_cbk_stub (call_frame_t *frame,
- fop_rchecksum_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- uint32_t weak_checksum,
- uint8_t *strong_checksum, dict_t *xdata)
+fop_discard_stub(call_frame_t *frame, fop_discard_t fn, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
+
+ GF_VALIDATE_OR_GOTO("call-stub", fn, out);
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ stub = stub_new(frame, 1, GF_FOP_DISCARD);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- stub = stub_new (frame, 0, GF_FOP_RCHECKSUM);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub->fn.discard = fn;
+ args_discard_store(&stub->args, fd, offset, len, xdata);
+out:
+ return stub;
+}
- stub->args.rchecksum_cbk.fn = fn;
- stub->args.rchecksum_cbk.op_ret = op_ret;
- stub->args.rchecksum_cbk.op_errno = op_errno;
+call_stub_t *
+fop_zerofill_cbk_stub(call_frame_t *frame, fop_zerofill_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
- if (op_ret >= 0)
- {
- stub->args.rchecksum_cbk.weak_checksum =
- weak_checksum;
+ stub = stub_new(frame, 0, GF_FOP_ZEROFILL);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- stub->args.rchecksum_cbk.strong_checksum =
- memdup (strong_checksum, MD5_DIGEST_LENGTH);
- }
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub->fn_cbk.zerofill = fn;
+ args_zerofill_cbk_store(&stub->args_cbk, op_ret, op_errno, statpre,
+ statpost, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_xattrop_cbk_stub (call_frame_t *frame,
- fop_xattrop_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
+fop_zerofill_stub(call_frame_t *frame, fop_zerofill_t fn, fd_t *fd,
+ off_t offset, off_t len, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 0, GF_FOP_XATTROP);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ GF_VALIDATE_OR_GOTO("call-stub", fn, out);
- stub->args.xattrop_cbk.fn = fn;
- stub->args.xattrop_cbk.op_ret = op_ret;
- stub->args.xattrop_cbk.op_errno = op_errno;
-
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_ZEROFILL);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.zerofill = fn;
+ args_zerofill_store(&stub->args, fd, offset, len, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_fxattrop_cbk_stub (call_frame_t *frame,
- fop_fxattrop_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *xattr, dict_t *xdata)
+fop_ipc_cbk_stub(call_frame_t *frame, fop_ipc_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
{
- call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 0, GF_FOP_FXATTROP);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new(frame, 0, GF_FOP_IPC);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- stub->args.fxattrop_cbk.fn = fn;
- stub->args.fxattrop_cbk.op_ret = op_ret;
- stub->args.fxattrop_cbk.op_errno = op_errno;
- if (xattr)
- stub->args.fxattrop_cbk.xattr = dict_ref (xattr);
-
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub->fn_cbk.ipc = fn;
+ args_ipc_cbk_store(&stub->args_cbk, op_ret, op_errno, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_xattrop_stub (call_frame_t *frame,
- fop_xattrop_t fn,
- loc_t *loc,
- gf_xattrop_flags_t optype,
- dict_t *xattr, dict_t *xdata)
+fop_ipc_stub(call_frame_t *frame, fop_ipc_t fn, int32_t op, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", xattr, out);
+ GF_VALIDATE_OR_GOTO("call-stub", fn, out);
- stub = stub_new (frame, 1, GF_FOP_XATTROP);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new(frame, 1, GF_FOP_IPC);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- stub->args.xattrop.fn = fn;
-
- loc_copy (&stub->args.xattrop.loc, loc);
+ stub->fn.ipc = fn;
+ args_ipc_store(&stub->args, op, xdata);
+out:
+ return stub;
+}
- stub->args.xattrop.optype = optype;
- stub->args.xattrop.xattr = dict_ref (xattr);
+call_stub_t *
+fop_lease_cbk_stub(call_frame_t *frame, fop_lease_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, struct gf_lease *lease, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 0, GF_FOP_LEASE);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.lease = fn;
+ args_lease_cbk_store(&stub->args_cbk, op_ret, op_errno, lease, xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_fxattrop_stub (call_frame_t *frame,
- fop_fxattrop_t fn,
- fd_t *fd,
- gf_xattrop_flags_t optype,
- dict_t *xattr, dict_t *xdata)
+fop_lease_stub(call_frame_t *frame, fop_lease_t fn, loc_t *loc,
+ struct gf_lease *lease, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", xattr, out);
+ GF_VALIDATE_OR_GOTO("call-stub", fn, out);
+ GF_VALIDATE_OR_GOTO("call-stub", lease, out);
- stub = stub_new (frame, 1, GF_FOP_FXATTROP);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new(frame, 1, GF_FOP_LEASE);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- stub->args.fxattrop.fn = fn;
+ stub->fn.lease = fn;
+ args_lease_store(&stub->args, loc, lease, xdata);
+out:
+ return stub;
+}
- stub->args.fxattrop.fd = fd_ref (fd);
+call_stub_t *
+fop_seek_cbk_stub(call_frame_t *frame, fop_seek_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, off_t offset, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
- stub->args.fxattrop.optype = optype;
- stub->args.fxattrop.xattr = dict_ref (xattr);
+ stub = stub_new(frame, 0, GF_FOP_SEEK);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub->fn_cbk.seek = fn;
+ args_seek_cbk_store(&stub->args_cbk, op_ret, op_errno, offset, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_setattr_cbk_stub (call_frame_t *frame,
- fop_setattr_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *statpre,
- struct iatt *statpost, dict_t *xdata)
+fop_seek_stub(call_frame_t *frame, fop_seek_t fn, fd_t *fd, off_t offset,
+ gf_seek_what_t what, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO("call-stub", fn, out);
- stub = stub_new (frame, 0, GF_FOP_SETATTR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new(frame, 1, GF_FOP_SEEK);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- stub->args.setattr_cbk.fn = fn;
+ stub->fn.seek = fn;
+ args_seek_store(&stub->args, fd, offset, what, xdata);
+out:
+ return stub;
+}
- stub->args.setattr_cbk.op_ret = op_ret;
- stub->args.setattr_cbk.op_errno = op_errno;
+call_stub_t *
+fop_getactivelk_cbk_stub(call_frame_t *frame, fop_getactivelk_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ lock_migration_info_t *lmi, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
- if (statpre)
- stub->args.setattr_cbk.statpre = *statpre;
- if (statpost)
- stub->args.setattr_cbk.statpost = *statpost;
+ stub = stub_new(frame, 0, GF_FOP_GETACTIVELK);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub->fn_cbk.getactivelk = fn;
+ args_getactivelk_cbk_store(&stub->args_cbk, op_ret, op_errno, lmi, xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_fsetattr_cbk_stub (call_frame_t *frame,
- fop_setattr_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *statpre,
- struct iatt *statpost, dict_t *xdata)
+fop_getactivelk_stub(call_frame_t *frame, fop_getactivelk_t fn, loc_t *loc,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 0, GF_FOP_FSETATTR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ GF_VALIDATE_OR_GOTO("call-stub", fn, out);
- stub->args.fsetattr_cbk.fn = fn;
+ stub = stub_new(frame, 1, GF_FOP_GETACTIVELK);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- stub->args.fsetattr_cbk.op_ret = op_ret;
- stub->args.fsetattr_cbk.op_errno = op_errno;
+ stub->fn.getactivelk = fn;
- if (statpre)
- stub->args.setattr_cbk.statpre = *statpre;
- if (statpost)
- stub->args.fsetattr_cbk.statpost = *statpost;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ loc_copy(&stub->args.loc, loc);
+ if (xdata)
+ stub->args.xdata = dict_ref(xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_setattr_stub (call_frame_t *frame,
- fop_setattr_t fn,
- loc_t *loc,
- struct iatt *stbuf,
- int32_t valid, dict_t *xdata)
+fop_setactivelk_cbk_stub(call_frame_t *frame, fop_setactivelk_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
+
+ stub = stub_new(frame, 0, GF_FOP_SETACTIVELK);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+
+ stub->fn_cbk.setactivelk = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", fn, out);
+ if (xdata)
+ stub->args.xdata = dict_ref(xdata);
- stub = stub_new (frame, 1, GF_FOP_SETATTR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+out:
+ return stub;
+}
- stub->args.setattr.fn = fn;
+call_stub_t *
+fop_setactivelk_stub(call_frame_t *frame, fop_setactivelk_t fn, loc_t *loc,
+ lock_migration_info_t *locklist, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
- loc_copy (&stub->args.setattr.loc, loc);
+ GF_VALIDATE_OR_GOTO("call-stub", fn, out);
- if (stbuf)
- stub->args.setattr.stbuf = *stbuf;
+ stub = stub_new(frame, 1, GF_FOP_SETACTIVELK);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- stub->args.setattr.valid = valid;
+ stub->fn.setactivelk = fn;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ args_setactivelk_store(&stub->args, loc, locklist, xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_fsetattr_stub (call_frame_t *frame,
- fop_fsetattr_t fn,
- fd_t *fd,
- struct iatt *stbuf,
- int32_t valid, dict_t *xdata)
+fop_copy_file_range_stub(call_frame_t *frame, fop_copy_file_range_t fn,
+ fd_t *fd_in, off64_t off_in, fd_t *fd_out,
+ off64_t off_out, size_t len, uint32_t flags,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", fn, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 1, GF_FOP_FSETATTR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ GF_VALIDATE_OR_GOTO("call-stub", fn, out);
- stub->args.fsetattr.fn = fn;
+ stub = stub_new(frame, 1, GF_FOP_COPY_FILE_RANGE);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- if (fd)
- stub->args.fsetattr.fd = fd_ref (fd);
+ stub->fn.copy_file_range = fn;
- if (stbuf)
- stub->args.fsetattr.stbuf = *stbuf;
-
- stub->args.fsetattr.valid = valid;
-
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ args_copy_file_range_store(&stub->args, fd_in, off_in, fd_out, off_out, len,
+ flags, xdata);
out:
- return stub;
+ return stub;
}
-static void
-call_resume_wind (call_stub_t *stub)
+call_stub_t *
+fop_copy_file_range_cbk_stub(call_frame_t *frame, fop_copy_file_range_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *stbuf, struct iatt *prebuf_dst,
+ struct iatt *postbuf_dst, dict_t *xdata)
{
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ call_stub_t *stub = NULL;
- switch (stub->fop) {
- case GF_FOP_OPEN:
- {
- stub->args.open.fn (stub->frame,
- stub->frame->this,
- &stub->args.open.loc,
- stub->args.open.flags, stub->args.open.fd,
- stub->xdata);
- break;
- }
- case GF_FOP_CREATE:
- {
- stub->args.create.fn (stub->frame,
- stub->frame->this,
- &stub->args.create.loc,
- stub->args.create.flags,
- stub->args.create.mode,
- stub->args.create.umask,
- stub->args.create.fd,
- stub->xdata);
- break;
- }
- case GF_FOP_STAT:
- {
- stub->args.stat.fn (stub->frame,
- stub->frame->this,
- &stub->args.stat.loc, stub->xdata);
- break;
- }
- case GF_FOP_READLINK:
- {
- stub->args.readlink.fn (stub->frame,
- stub->frame->this,
- &stub->args.readlink.loc,
- stub->args.readlink.size, stub->xdata);
- break;
- }
+ GF_VALIDATE_OR_GOTO("call-stub", fn, out);
- case GF_FOP_MKNOD:
- {
- stub->args.mknod.fn (stub->frame, stub->frame->this,
- &stub->args.mknod.loc,
- stub->args.mknod.mode,
- stub->args.mknod.rdev,
- stub->args.mknod.umask, stub->xdata);
- }
- break;
+ stub = stub_new(frame, 0, GF_FOP_COPY_FILE_RANGE);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- case GF_FOP_MKDIR:
- {
- stub->args.mkdir.fn (stub->frame, stub->frame->this,
- &stub->args.mkdir.loc,
- stub->args.mkdir.mode,
- stub->args.mkdir.umask, stub->xdata);
- }
- break;
+ stub->fn_cbk.copy_file_range = fn;
+ args_copy_file_range_cbk_store(&stub->args_cbk, op_ret, op_errno, stbuf,
+ prebuf_dst, postbuf_dst, xdata);
- case GF_FOP_UNLINK:
- {
- stub->args.unlink.fn (stub->frame,
- stub->frame->this,
- &stub->args.unlink.loc,
- stub->args.unlink.xflag, stub->xdata);
- }
- break;
+out:
+ return stub;
+}
- case GF_FOP_RMDIR:
- {
- stub->args.rmdir.fn (stub->frame, stub->frame->this,
- &stub->args.rmdir.loc,
- stub->args.rmdir.flags, stub->xdata);
- }
- break;
+call_stub_t *
+fop_put_stub(call_frame_t *frame, fop_put_t fn, loc_t *loc, mode_t mode,
+ mode_t umask, uint32_t flags, struct iovec *vector, int32_t count,
+ off_t offset, struct iobref *iobref, dict_t *xattr, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
- case GF_FOP_SYMLINK:
- {
- stub->args.symlink.fn (stub->frame,
- stub->frame->this,
- stub->args.symlink.linkname,
- &stub->args.symlink.loc,
- stub->args.symlink.umask, stub->xdata);
- }
- break;
+ GF_VALIDATE_OR_GOTO("call-stub", vector, out);
- case GF_FOP_RENAME:
- {
- stub->args.rename.fn (stub->frame,
- stub->frame->this,
- &stub->args.rename.old,
- &stub->args.rename.new, stub->xdata);
- }
- break;
+ stub = stub_new(frame, 1, GF_FOP_PUT);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- case GF_FOP_LINK:
- {
- stub->args.link.fn (stub->frame,
- stub->frame->this,
- &stub->args.link.oldloc,
- &stub->args.link.newloc, stub->xdata);
- }
- break;
+ stub->fn.put = fn;
+ args_put_store(&stub->args, loc, mode, umask, flags, vector, count, offset,
+ iobref, xattr, xdata);
+out:
+ return stub;
+}
- case GF_FOP_TRUNCATE:
- {
- stub->args.truncate.fn (stub->frame,
- stub->frame->this,
- &stub->args.truncate.loc,
- stub->args.truncate.off, stub->xdata);
- break;
- }
+call_stub_t *
+fop_put_cbk_stub(call_frame_t *frame, fop_put_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
- case GF_FOP_READ:
- {
- stub->args.readv.fn (stub->frame,
- stub->frame->this,
- stub->args.readv.fd,
- stub->args.readv.size,
- stub->args.readv.off,
- stub->args.readv.flags, stub->xdata);
- break;
- }
+ stub = stub_new(frame, 0, GF_FOP_PUT);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- case GF_FOP_WRITE:
- {
- stub->args.writev.fn (stub->frame,
- stub->frame->this,
- stub->args.writev.fd,
- stub->args.writev.vector,
- stub->args.writev.count,
- stub->args.writev.off,
- stub->args.writev.flags,
- stub->args.writev.iobref, stub->xdata);
- break;
- }
+ stub->fn_cbk.put = fn;
+ args_put_cbk_store(&stub->args_cbk, op_ret, op_errno, inode, buf, preparent,
+ postparent, xdata);
+out:
+ return stub;
+}
- case GF_FOP_STATFS:
- {
- stub->args.statfs.fn (stub->frame,
- stub->frame->this,
- &stub->args.statfs.loc, stub->xdata);
- break;
- }
- case GF_FOP_FLUSH:
- {
- stub->args.flush.fn (stub->frame,
- stub->frame->this,
- stub->args.flush.fd, stub->xdata);
- break;
- }
+call_stub_t *
+fop_icreate_stub(call_frame_t *frame, fop_icreate_t fn, loc_t *loc, mode_t mode,
+ dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
- case GF_FOP_FSYNC:
- {
- stub->args.fsync.fn (stub->frame,
- stub->frame->this,
- stub->args.fsync.fd,
- stub->args.fsync.datasync, stub->xdata);
- break;
- }
+ GF_VALIDATE_OR_GOTO("call-stub", fn, out);
- case GF_FOP_SETXATTR:
- {
- stub->args.setxattr.fn (stub->frame,
- stub->frame->this,
- &stub->args.setxattr.loc,
- stub->args.setxattr.dict,
- stub->args.setxattr.flags, stub->xdata);
- break;
- }
+ stub = stub_new(frame, 1, GF_FOP_ICREATE);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- case GF_FOP_GETXATTR:
- {
- stub->args.getxattr.fn (stub->frame,
- stub->frame->this,
- &stub->args.getxattr.loc,
- stub->args.getxattr.name, stub->xdata);
- break;
- }
+ stub->fn.icreate = fn;
- case GF_FOP_FSETXATTR:
- {
- stub->args.fsetxattr.fn (stub->frame,
- stub->frame->this,
- stub->args.fsetxattr.fd,
- stub->args.fsetxattr.dict,
- stub->args.fsetxattr.flags, stub->xdata);
- break;
- }
+ stub->args.mode = mode;
+ if (loc)
+ loc_copy(&stub->args.loc, loc);
+ if (xdata)
+ stub->args.xdata = dict_ref(xdata);
- case GF_FOP_FGETXATTR:
- {
- stub->args.fgetxattr.fn (stub->frame,
- stub->frame->this,
- stub->args.fgetxattr.fd,
- stub->args.fgetxattr.name, stub->xdata);
- break;
- }
+out:
+ return stub;
+}
- case GF_FOP_REMOVEXATTR:
- {
- stub->args.removexattr.fn (stub->frame,
- stub->frame->this,
- &stub->args.removexattr.loc,
- stub->args.removexattr.name, stub->xdata);
- break;
- }
+static void
+args_icreate_store_cbk(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (inode)
+ args->inode = inode_ref(inode);
+ if (buf)
+ args->stat = *buf;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+}
- case GF_FOP_FREMOVEXATTR:
- {
- stub->args.fremovexattr.fn (stub->frame,
- stub->frame->this,
- stub->args.fremovexattr.fd,
- stub->args.fremovexattr.name, stub->xdata);
- break;
- }
+call_stub_t *
+fop_icreate_cbk_stub(call_frame_t *frame, fop_icreate_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
- case GF_FOP_OPENDIR:
- {
- stub->args.opendir.fn (stub->frame,
- stub->frame->this,
- &stub->args.opendir.loc,
- stub->args.opendir.fd, stub->xdata);
- break;
- }
+ stub = stub_new(frame, 0, GF_FOP_ICREATE);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- case GF_FOP_FSYNCDIR:
- {
- stub->args.fsyncdir.fn (stub->frame,
- stub->frame->this,
- stub->args.fsyncdir.fd,
- stub->args.fsyncdir.datasync, stub->xdata);
- break;
- }
+ stub->fn_cbk.icreate = fn;
+ args_icreate_store_cbk(&stub->args_cbk, op_ret, op_errno, inode, buf,
+ xdata);
- case GF_FOP_ACCESS:
- {
- stub->args.access.fn (stub->frame,
- stub->frame->this,
- &stub->args.access.loc,
- stub->args.access.mask, stub->xdata);
- break;
- }
+out:
+ return stub;
+}
- case GF_FOP_FTRUNCATE:
- {
- stub->args.ftruncate.fn (stub->frame,
- stub->frame->this,
- stub->args.ftruncate.fd,
- stub->args.ftruncate.off, stub->xdata);
- break;
- }
+call_stub_t *
+fop_namelink_stub(call_frame_t *frame, fop_namelink_t fn, loc_t *loc,
+ dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
- case GF_FOP_FSTAT:
- {
- stub->args.fstat.fn (stub->frame,
- stub->frame->this,
- stub->args.fstat.fd, stub->xdata);
- break;
- }
+ GF_VALIDATE_OR_GOTO("call-stub", fn, out);
- case GF_FOP_LK:
- {
- stub->args.lk.fn (stub->frame,
- stub->frame->this,
- stub->args.lk.fd,
- stub->args.lk.cmd,
- &stub->args.lk.lock, stub->xdata);
- break;
- }
+ stub = stub_new(frame, 1, GF_FOP_NAMELINK);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- case GF_FOP_INODELK:
- {
- stub->args.inodelk.fn (stub->frame,
- stub->frame->this,
- stub->args.inodelk.volume,
- &stub->args.inodelk.loc,
- stub->args.inodelk.cmd,
- &stub->args.inodelk.lock, stub->xdata);
- break;
- }
+ stub->fn.namelink = fn;
- case GF_FOP_FINODELK:
- {
- stub->args.finodelk.fn (stub->frame,
- stub->frame->this,
- stub->args.finodelk.volume,
- stub->args.finodelk.fd,
- stub->args.finodelk.cmd,
- &stub->args.finodelk.lock, stub->xdata);
- break;
- }
+ if (loc)
+ loc_copy(&stub->args.loc, loc);
+ if (xdata)
+ stub->args.xdata = dict_ref(xdata);
- case GF_FOP_ENTRYLK:
- {
- stub->args.entrylk.fn (stub->frame,
- stub->frame->this,
- stub->args.entrylk.volume,
- &stub->args.entrylk.loc,
- stub->args.entrylk.name,
- stub->args.entrylk.cmd,
- stub->args.entrylk.type, stub->xdata);
- break;
- }
+out:
+ return stub;
+}
- case GF_FOP_FENTRYLK:
- {
- stub->args.fentrylk.fn (stub->frame,
- stub->frame->this,
- stub->args.fentrylk.volume,
- stub->args.fentrylk.fd,
- stub->args.fentrylk.name,
- stub->args.fentrylk.cmd,
- stub->args.fentrylk.type, stub->xdata);
- break;
- }
-
- break;
+static void
+args_namelink_store_cbk(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
- case GF_FOP_LOOKUP:
- {
- stub->args.lookup.fn (stub->frame,
- stub->frame->this,
- &stub->args.lookup.loc,
- stub->xdata);
- break;
- }
+ if (prebuf)
+ args->prestat = *prebuf;
+ if (postbuf)
+ args->poststat = *postbuf;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+}
- case GF_FOP_RCHECKSUM:
- {
- stub->args.rchecksum.fn (stub->frame,
- stub->frame->this,
- stub->args.rchecksum.fd,
- stub->args.rchecksum.offset,
- stub->args.rchecksum.len, stub->xdata);
- break;
- }
+call_stub_t *
+fop_namelink_cbk_stub(call_frame_t *frame, fop_namelink_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
- case GF_FOP_READDIR:
- {
- stub->args.readdir.fn (stub->frame,
- stub->frame->this,
- stub->args.readdir.fd,
- stub->args.readdir.size,
- stub->args.readdir.off, stub->xdata);
- break;
- }
+ stub = stub_new(frame, 0, GF_FOP_NAMELINK);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- case GF_FOP_READDIRP:
- {
- stub->args.readdirp.fn (stub->frame,
- stub->frame->this,
- stub->args.readdirp.fd,
- stub->args.readdirp.size,
- stub->args.readdirp.off,
- stub->xdata);
- break;
- }
+ stub->fn_cbk.namelink = fn;
+ args_namelink_store_cbk(&stub->args_cbk, op_ret, op_errno, prebuf, postbuf,
+ xdata);
- case GF_FOP_XATTROP:
- {
- stub->args.xattrop.fn (stub->frame,
- stub->frame->this,
- &stub->args.xattrop.loc,
- stub->args.xattrop.optype,
- stub->args.xattrop.xattr, stub->xdata);
-
- break;
- }
- case GF_FOP_FXATTROP:
- {
- stub->args.fxattrop.fn (stub->frame,
- stub->frame->this,
- stub->args.fxattrop.fd,
- stub->args.fxattrop.optype,
- stub->args.fxattrop.xattr, stub->xdata);
-
- break;
- }
- case GF_FOP_SETATTR:
- {
- stub->args.setattr.fn (stub->frame,
- stub->frame->this,
- &stub->args.setattr.loc,
- &stub->args.setattr.stbuf,
- stub->args.setattr.valid, stub->xdata);
- break;
- }
- case GF_FOP_FSETATTR:
- {
- stub->args.fsetattr.fn (stub->frame,
- stub->frame->this,
- stub->args.fsetattr.fd,
- &stub->args.fsetattr.stbuf,
- stub->args.fsetattr.valid, stub->xdata);
- break;
- }
- default:
- {
- gf_log_callingfn ("call-stub", GF_LOG_ERROR,
- "Invalid value of FOP (%d)",
- stub->fop);
- break;
- }
-
- }
out:
- return;
+ return stub;
}
-
-
-static void
-call_resume_unwind (call_stub_t *stub)
+void
+call_resume_wind(call_stub_t *stub)
{
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- switch (stub->fop) {
+ switch (stub->fop) {
case GF_FOP_OPEN:
- {
- if (!stub->args.open_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.open_cbk.op_ret,
- stub->args.open_cbk.op_errno,
- stub->args.open_cbk.fd, stub->xdata);
- else
- stub->args.open_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.open_cbk.op_ret,
- stub->args.open_cbk.op_errno,
- stub->args.open_cbk.fd, stub->xdata);
- break;
- }
-
+ stub->fn.open(stub->frame, stub->frame->this, &stub->args.loc,
+ stub->args.flags, stub->args.fd, stub->args.xdata);
+ break;
case GF_FOP_CREATE:
- {
- if (!stub->args.create_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.create_cbk.op_ret,
- stub->args.create_cbk.op_errno,
- stub->args.create_cbk.fd,
- stub->args.create_cbk.inode,
- &stub->args.create_cbk.buf,
- &stub->args.create_cbk.preparent,
- &stub->args.create_cbk.postparent, stub->xdata);
- else
- stub->args.create_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.create_cbk.op_ret,
- stub->args.create_cbk.op_errno,
- stub->args.create_cbk.fd,
- stub->args.create_cbk.inode,
- &stub->args.create_cbk.buf,
- &stub->args.create_cbk.preparent,
- &stub->args.create_cbk.postparent, stub->xdata);
-
- break;
- }
-
+ stub->fn.create(stub->frame, stub->frame->this, &stub->args.loc,
+ stub->args.flags, stub->args.mode, stub->args.umask,
+ stub->args.fd, stub->args.xdata);
+ break;
case GF_FOP_STAT:
- {
- if (!stub->args.stat_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.stat_cbk.op_ret,
- stub->args.stat_cbk.op_errno,
- &stub->args.stat_cbk.buf, stub->xdata);
- else
- stub->args.stat_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.stat_cbk.op_ret,
- stub->args.stat_cbk.op_errno,
- &stub->args.stat_cbk.buf, stub->xdata);
-
- break;
- }
-
+ stub->fn.stat(stub->frame, stub->frame->this, &stub->args.loc,
+ stub->args.xdata);
+ break;
case GF_FOP_READLINK:
- {
- if (!stub->args.readlink_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.readlink_cbk.op_ret,
- stub->args.readlink_cbk.op_errno,
- stub->args.readlink_cbk.buf,
- &stub->args.readlink_cbk.sbuf, stub->xdata);
- else
- stub->args.readlink_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.readlink_cbk.op_ret,
- stub->args.readlink_cbk.op_errno,
- stub->args.readlink_cbk.buf,
- &stub->args.readlink_cbk.sbuf, stub->xdata);
-
- break;
- }
-
+ stub->fn.readlink(stub->frame, stub->frame->this, &stub->args.loc,
+ stub->args.size, stub->args.xdata);
+ break;
case GF_FOP_MKNOD:
- {
- if (!stub->args.mknod_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.mknod_cbk.op_ret,
- stub->args.mknod_cbk.op_errno,
- stub->args.mknod_cbk.inode,
- &stub->args.mknod_cbk.buf,
- &stub->args.mknod_cbk.preparent,
- &stub->args.mknod_cbk.postparent, stub->xdata);
- else
- stub->args.mknod_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.mknod_cbk.op_ret,
- stub->args.mknod_cbk.op_errno,
- stub->args.mknod_cbk.inode,
- &stub->args.mknod_cbk.buf,
- &stub->args.mknod_cbk.preparent,
- &stub->args.mknod_cbk.postparent, stub->xdata);
- break;
- }
-
+ stub->fn.mknod(stub->frame, stub->frame->this, &stub->args.loc,
+ stub->args.mode, stub->args.rdev, stub->args.umask,
+ stub->args.xdata);
+ break;
case GF_FOP_MKDIR:
- {
- if (!stub->args.mkdir_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.mkdir_cbk.op_ret,
- stub->args.mkdir_cbk.op_errno,
- stub->args.mkdir_cbk.inode,
- &stub->args.mkdir_cbk.buf,
- &stub->args.mkdir_cbk.preparent,
- &stub->args.mkdir_cbk.postparent, stub->xdata);
- else
- stub->args.mkdir_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.mkdir_cbk.op_ret,
- stub->args.mkdir_cbk.op_errno,
- stub->args.mkdir_cbk.inode,
- &stub->args.mkdir_cbk.buf,
- &stub->args.mkdir_cbk.preparent,
- &stub->args.mkdir_cbk.postparent, stub->xdata);
-
- if (stub->args.mkdir_cbk.inode)
- inode_unref (stub->args.mkdir_cbk.inode);
-
- break;
- }
-
+ stub->fn.mkdir(stub->frame, stub->frame->this, &stub->args.loc,
+ stub->args.mode, stub->args.umask, stub->args.xdata);
+ break;
case GF_FOP_UNLINK:
- {
- if (!stub->args.unlink_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.unlink_cbk.op_ret,
- stub->args.unlink_cbk.op_errno,
- &stub->args.unlink_cbk.preparent,
- &stub->args.unlink_cbk.postparent, stub->xdata);
- else
- stub->args.unlink_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.unlink_cbk.op_ret,
- stub->args.unlink_cbk.op_errno,
- &stub->args.unlink_cbk.preparent,
- &stub->args.unlink_cbk.postparent, stub->xdata);
- break;
- }
-
+ stub->fn.unlink(stub->frame, stub->frame->this, &stub->args.loc,
+ stub->args.xflag, stub->args.xdata);
+ break;
case GF_FOP_RMDIR:
- {
- if (!stub->args.rmdir_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.rmdir_cbk.op_ret,
- stub->args.rmdir_cbk.op_errno,
- &stub->args.rmdir_cbk.preparent,
- &stub->args.rmdir_cbk.postparent, stub->xdata);
- else
- stub->args.rmdir_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.rmdir_cbk.op_ret,
- stub->args.rmdir_cbk.op_errno,
- &stub->args.rmdir_cbk.preparent,
- &stub->args.rmdir_cbk.postparent, stub->xdata);
- break;
- }
-
+ stub->fn.rmdir(stub->frame, stub->frame->this, &stub->args.loc,
+ stub->args.flags, stub->args.xdata);
+ break;
case GF_FOP_SYMLINK:
- {
- if (!stub->args.symlink_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.symlink_cbk.op_ret,
- stub->args.symlink_cbk.op_errno,
- stub->args.symlink_cbk.inode,
- &stub->args.symlink_cbk.buf,
- &stub->args.symlink_cbk.preparent,
- &stub->args.symlink_cbk.postparent, stub->xdata);
- else
- stub->args.symlink_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.symlink_cbk.op_ret,
- stub->args.symlink_cbk.op_errno,
- stub->args.symlink_cbk.inode,
- &stub->args.symlink_cbk.buf,
- &stub->args.symlink_cbk.preparent,
- &stub->args.symlink_cbk.postparent, stub->xdata);
- }
- break;
-
+ stub->fn.symlink(stub->frame, stub->frame->this,
+ stub->args.linkname, &stub->args.loc,
+ stub->args.umask, stub->args.xdata);
+ break;
case GF_FOP_RENAME:
- {
- if (!stub->args.rename_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.rename_cbk.op_ret,
- stub->args.rename_cbk.op_errno,
- &stub->args.rename_cbk.buf,
- &stub->args.rename_cbk.preoldparent,
- &stub->args.rename_cbk.postoldparent,
- &stub->args.rename_cbk.prenewparent,
- &stub->args.rename_cbk.postnewparent, stub->xdata);
- else
- stub->args.rename_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.rename_cbk.op_ret,
- stub->args.rename_cbk.op_errno,
- &stub->args.rename_cbk.buf,
- &stub->args.rename_cbk.preoldparent,
- &stub->args.rename_cbk.postoldparent,
- &stub->args.rename_cbk.prenewparent,
- &stub->args.rename_cbk.postnewparent, stub->xdata);
- break;
- }
-
+ stub->fn.rename(stub->frame, stub->frame->this, &stub->args.loc,
+ &stub->args.loc2, stub->args.xdata);
+ break;
case GF_FOP_LINK:
- {
- if (!stub->args.link_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.link_cbk.op_ret,
- stub->args.link_cbk.op_errno,
- stub->args.link_cbk.inode,
- &stub->args.link_cbk.buf, stub->xdata);
- else
- stub->args.link_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.link_cbk.op_ret,
- stub->args.link_cbk.op_errno,
- stub->args.link_cbk.inode,
- &stub->args.link_cbk.buf,
- &stub->args.link_cbk.preparent,
- &stub->args.link_cbk.postparent, stub->xdata);
- break;
- }
-
+ stub->fn.link(stub->frame, stub->frame->this, &stub->args.loc,
+ &stub->args.loc2, stub->args.xdata);
+ break;
case GF_FOP_TRUNCATE:
- {
- if (!stub->args.truncate_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.truncate_cbk.op_ret,
- stub->args.truncate_cbk.op_errno,
- &stub->args.truncate_cbk.prebuf,
- &stub->args.truncate_cbk.postbuf, stub->xdata);
- else
- stub->args.truncate_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.truncate_cbk.op_ret,
- stub->args.truncate_cbk.op_errno,
- &stub->args.truncate_cbk.prebuf,
- &stub->args.truncate_cbk.postbuf, stub->xdata);
- break;
- }
-
+ stub->fn.truncate(stub->frame, stub->frame->this, &stub->args.loc,
+ stub->args.offset, stub->args.xdata);
+ break;
case GF_FOP_READ:
- {
- if (!stub->args.readv_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.readv_cbk.op_ret,
- stub->args.readv_cbk.op_errno,
- stub->args.readv_cbk.vector,
- stub->args.readv_cbk.count,
- &stub->args.readv_cbk.stbuf,
- stub->args.readv_cbk.iobref, stub->xdata);
- else
- stub->args.readv_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.readv_cbk.op_ret,
- stub->args.readv_cbk.op_errno,
- stub->args.readv_cbk.vector,
- stub->args.readv_cbk.count,
- &stub->args.readv_cbk.stbuf,
- stub->args.readv_cbk.iobref, stub->xdata);
- }
- break;
-
+ stub->fn.readv(stub->frame, stub->frame->this, stub->args.fd,
+ stub->args.size, stub->args.offset, stub->args.flags,
+ stub->args.xdata);
+ break;
case GF_FOP_WRITE:
- {
- if (!stub->args.writev_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.writev_cbk.op_ret,
- stub->args.writev_cbk.op_errno,
- &stub->args.writev_cbk.prebuf,
- &stub->args.writev_cbk.postbuf, stub->xdata);
- else
- stub->args.writev_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.writev_cbk.op_ret,
- stub->args.writev_cbk.op_errno,
- &stub->args.writev_cbk.prebuf,
- &stub->args.writev_cbk.postbuf, stub->xdata);
- break;
- }
-
+ stub->fn.writev(stub->frame, stub->frame->this, stub->args.fd,
+ stub->args.vector, stub->args.count,
+ stub->args.offset, stub->args.flags,
+ stub->args.iobref, stub->args.xdata);
+ break;
case GF_FOP_STATFS:
- {
- if (!stub->args.statfs_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.statfs_cbk.op_ret,
- stub->args.statfs_cbk.op_errno,
- &(stub->args.statfs_cbk.buf), stub->xdata);
- else
- stub->args.statfs_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.statfs_cbk.op_ret,
- stub->args.statfs_cbk.op_errno,
- &(stub->args.statfs_cbk.buf), stub->xdata);
- }
- break;
-
+ stub->fn.statfs(stub->frame, stub->frame->this, &stub->args.loc,
+ stub->args.xdata);
+ break;
case GF_FOP_FLUSH:
- {
- if (!stub->args.flush_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.flush_cbk.op_ret,
- stub->args.flush_cbk.op_errno, stub->xdata);
- else
- stub->args.flush_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.flush_cbk.op_ret,
- stub->args.flush_cbk.op_errno, stub->xdata);
-
- break;
- }
-
+ stub->fn.flush(stub->frame, stub->frame->this, stub->args.fd,
+ stub->args.xdata);
+ break;
case GF_FOP_FSYNC:
- {
- if (!stub->args.fsync_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.fsync_cbk.op_ret,
- stub->args.fsync_cbk.op_errno,
- &stub->args.fsync_cbk.prebuf,
- &stub->args.fsync_cbk.postbuf, stub->xdata);
- else
- stub->args.fsync_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.fsync_cbk.op_ret,
- stub->args.fsync_cbk.op_errno,
- &stub->args.fsync_cbk.prebuf,
- &stub->args.fsync_cbk.postbuf, stub->xdata);
- break;
- }
-
+ stub->fn.fsync(stub->frame, stub->frame->this, stub->args.fd,
+ stub->args.datasync, stub->args.xdata);
+ break;
case GF_FOP_SETXATTR:
- {
- if (!stub->args.setxattr_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.setxattr_cbk.op_ret,
- stub->args.setxattr_cbk.op_errno, stub->xdata);
-
- else
- stub->args.setxattr_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.setxattr_cbk.op_ret,
- stub->args.setxattr_cbk.op_errno, stub->xdata);
-
- break;
- }
-
+ stub->fn.setxattr(stub->frame, stub->frame->this, &stub->args.loc,
+ stub->args.xattr, stub->args.flags,
+ stub->args.xdata);
+ break;
case GF_FOP_GETXATTR:
- {
- if (!stub->args.getxattr_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.getxattr_cbk.op_ret,
- stub->args.getxattr_cbk.op_errno,
- stub->args.getxattr_cbk.dict, stub->xdata);
- else
- stub->args.getxattr_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.getxattr_cbk.op_ret,
- stub->args.getxattr_cbk.op_errno,
- stub->args.getxattr_cbk.dict, stub->xdata);
- break;
- }
-
+ stub->fn.getxattr(stub->frame, stub->frame->this, &stub->args.loc,
+ stub->args.name, stub->args.xdata);
+ break;
case GF_FOP_FSETXATTR:
- {
- if (!stub->args.fsetxattr_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.fsetxattr_cbk.op_ret,
- stub->args.fsetxattr_cbk.op_errno, stub->xdata);
-
- else
- stub->args.fsetxattr_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.fsetxattr_cbk.op_ret,
- stub->args.fsetxattr_cbk.op_errno, stub->xdata);
-
- break;
- }
-
+ stub->fn.fsetxattr(stub->frame, stub->frame->this, stub->args.fd,
+ stub->args.xattr, stub->args.flags,
+ stub->args.xdata);
+ break;
case GF_FOP_FGETXATTR:
- {
- if (!stub->args.fgetxattr_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.fgetxattr_cbk.op_ret,
- stub->args.fgetxattr_cbk.op_errno,
- stub->args.fgetxattr_cbk.dict, stub->xdata);
- else
- stub->args.fgetxattr_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.fgetxattr_cbk.op_ret,
- stub->args.fgetxattr_cbk.op_errno,
- stub->args.fgetxattr_cbk.dict, stub->xdata);
- break;
- }
-
+ stub->fn.fgetxattr(stub->frame, stub->frame->this, stub->args.fd,
+ stub->args.name, stub->args.xdata);
+ break;
case GF_FOP_REMOVEXATTR:
- {
- if (!stub->args.removexattr_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.removexattr_cbk.op_ret,
- stub->args.removexattr_cbk.op_errno, stub->xdata);
- else
- stub->args.removexattr_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.removexattr_cbk.op_ret,
- stub->args.removexattr_cbk.op_errno, stub->xdata);
-
- break;
- }
-
+ stub->fn.removexattr(stub->frame, stub->frame->this,
+ &stub->args.loc, stub->args.name,
+ stub->args.xdata);
+ break;
case GF_FOP_FREMOVEXATTR:
- {
- if (!stub->args.fremovexattr_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.fremovexattr_cbk.op_ret,
- stub->args.fremovexattr_cbk.op_errno, stub->xdata);
- else
- stub->args.fremovexattr_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.fremovexattr_cbk.op_ret,
- stub->args.fremovexattr_cbk.op_errno, stub->xdata);
-
- break;
- }
-
+ stub->fn.fremovexattr(stub->frame, stub->frame->this, stub->args.fd,
+ stub->args.name, stub->args.xdata);
+ break;
case GF_FOP_OPENDIR:
- {
- if (!stub->args.opendir_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.opendir_cbk.op_ret,
- stub->args.opendir_cbk.op_errno,
- stub->args.opendir_cbk.fd, stub->xdata);
- else
- stub->args.opendir_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.opendir_cbk.op_ret,
- stub->args.opendir_cbk.op_errno,
- stub->args.opendir_cbk.fd, stub->xdata);
- break;
- }
-
+ stub->fn.opendir(stub->frame, stub->frame->this, &stub->args.loc,
+ stub->args.fd, stub->args.xdata);
+ break;
case GF_FOP_FSYNCDIR:
- {
- if (!stub->args.fsyncdir_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.fsyncdir_cbk.op_ret,
- stub->args.fsyncdir_cbk.op_errno, stub->xdata);
- else
- stub->args.fsyncdir_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.fsyncdir_cbk.op_ret,
- stub->args.fsyncdir_cbk.op_errno, stub->xdata);
- break;
- }
-
+ stub->fn.fsyncdir(stub->frame, stub->frame->this, stub->args.fd,
+ stub->args.datasync, stub->args.xdata);
+ break;
case GF_FOP_ACCESS:
- {
- if (!stub->args.access_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.access_cbk.op_ret,
- stub->args.access_cbk.op_errno, stub->xdata);
- else
- stub->args.access_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.access_cbk.op_ret,
- stub->args.access_cbk.op_errno, stub->xdata);
-
- break;
- }
+ stub->fn.access(stub->frame, stub->frame->this, &stub->args.loc,
+ stub->args.mask, stub->args.xdata);
+ break;
case GF_FOP_FTRUNCATE:
- {
- if (!stub->args.ftruncate_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.ftruncate_cbk.op_ret,
- stub->args.ftruncate_cbk.op_errno,
- &stub->args.ftruncate_cbk.prebuf,
- &stub->args.ftruncate_cbk.postbuf, stub->xdata);
- else
- stub->args.ftruncate_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.ftruncate_cbk.op_ret,
- stub->args.ftruncate_cbk.op_errno,
- &stub->args.ftruncate_cbk.prebuf,
- &stub->args.ftruncate_cbk.postbuf, stub->xdata);
- break;
- }
-
+ stub->fn.ftruncate(stub->frame, stub->frame->this, stub->args.fd,
+ stub->args.offset, stub->args.xdata);
+ break;
case GF_FOP_FSTAT:
- {
- if (!stub->args.fstat_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.fstat_cbk.op_ret,
- stub->args.fstat_cbk.op_errno,
- &stub->args.fstat_cbk.buf, stub->xdata);
- else
- stub->args.fstat_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.fstat_cbk.op_ret,
- stub->args.fstat_cbk.op_errno,
- &stub->args.fstat_cbk.buf, stub->xdata);
-
- break;
- }
-
+ stub->fn.fstat(stub->frame, stub->frame->this, stub->args.fd,
+ stub->args.xdata);
+ break;
case GF_FOP_LK:
- {
- if (!stub->args.lk_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.lk_cbk.op_ret,
- stub->args.lk_cbk.op_errno,
- &stub->args.lk_cbk.lock, stub->xdata);
- else
- stub->args.lk_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.lk_cbk.op_ret,
- stub->args.lk_cbk.op_errno,
- &stub->args.lk_cbk.lock, stub->xdata);
- break;
- }
-
+ stub->fn.lk(stub->frame, stub->frame->this, stub->args.fd,
+ stub->args.cmd, &stub->args.lock, stub->args.xdata);
+ break;
case GF_FOP_INODELK:
- {
- if (!stub->args.inodelk_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.inodelk_cbk.op_ret,
- stub->args.inodelk_cbk.op_errno, stub->xdata);
-
- else
- stub->args.inodelk_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.inodelk_cbk.op_ret,
- stub->args.inodelk_cbk.op_errno, stub->xdata);
- break;
- }
-
+ stub->fn.inodelk(stub->frame, stub->frame->this, stub->args.volume,
+ &stub->args.loc, stub->args.cmd, &stub->args.lock,
+ stub->args.xdata);
+ break;
case GF_FOP_FINODELK:
- {
- if (!stub->args.finodelk_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.finodelk_cbk.op_ret,
- stub->args.finodelk_cbk.op_errno, stub->xdata);
-
- else
- stub->args.finodelk_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.finodelk_cbk.op_ret,
- stub->args.finodelk_cbk.op_errno, stub->xdata);
- break;
- }
-
+ stub->fn.finodelk(stub->frame, stub->frame->this, stub->args.volume,
+ stub->args.fd, stub->args.cmd, &stub->args.lock,
+ stub->args.xdata);
+ break;
case GF_FOP_ENTRYLK:
- {
- if (!stub->args.entrylk_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.entrylk_cbk.op_ret,
- stub->args.entrylk_cbk.op_errno, stub->xdata);
-
- else
- stub->args.entrylk_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.entrylk_cbk.op_ret,
- stub->args.entrylk_cbk.op_errno, stub->xdata);
- break;
- }
-
+ stub->fn.entrylk(stub->frame, stub->frame->this, stub->args.volume,
+ &stub->args.loc, stub->args.name,
+ stub->args.entrylkcmd, stub->args.entrylktype,
+ stub->args.xdata);
+ break;
case GF_FOP_FENTRYLK:
- {
- if (!stub->args.fentrylk_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.fentrylk_cbk.op_ret,
- stub->args.fentrylk_cbk.op_errno, stub->xdata);
-
- else
- stub->args.fentrylk_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.fentrylk_cbk.op_ret,
- stub->args.fentrylk_cbk.op_errno, stub->xdata);
- break;
- }
-
+ stub->fn.fentrylk(stub->frame, stub->frame->this, stub->args.volume,
+ stub->args.fd, stub->args.name,
+ stub->args.entrylkcmd, stub->args.entrylktype,
+ stub->args.xdata);
+ break;
case GF_FOP_LOOKUP:
- {
- if (!stub->args.lookup_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.lookup_cbk.op_ret,
- stub->args.lookup_cbk.op_errno,
- stub->args.lookup_cbk.inode,
- &stub->args.lookup_cbk.buf,
- stub->xdata,
- &stub->args.lookup_cbk.postparent);
- else
- stub->args.lookup_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.lookup_cbk.op_ret,
- stub->args.lookup_cbk.op_errno,
- stub->args.lookup_cbk.inode,
- &stub->args.lookup_cbk.buf,
- stub->xdata,
- &stub->args.lookup_cbk.postparent);
- /* FIXME NULL should not be passed */
-
- if (stub->args.lookup_cbk.inode)
- inode_unref (stub->args.lookup_cbk.inode);
-
- break;
- }
-
+ stub->fn.lookup(stub->frame, stub->frame->this, &stub->args.loc,
+ stub->args.xdata);
+ break;
case GF_FOP_RCHECKSUM:
- {
- if (!stub->args.rchecksum_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.rchecksum_cbk.op_ret,
- stub->args.rchecksum_cbk.op_errno,
- stub->args.rchecksum_cbk.weak_checksum,
- stub->args.rchecksum_cbk.strong_checksum, stub->xdata);
- else
- stub->args.rchecksum_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.rchecksum_cbk.op_ret,
- stub->args.rchecksum_cbk.op_errno,
- stub->args.rchecksum_cbk.weak_checksum,
- stub->args.rchecksum_cbk.strong_checksum, stub->xdata);
-
- if (stub->args.rchecksum_cbk.op_ret >= 0)
- GF_FREE (stub->args.rchecksum_cbk.strong_checksum);
-
- break;
- }
-
+ stub->fn.rchecksum(stub->frame, stub->frame->this, stub->args.fd,
+ stub->args.offset, stub->args.size,
+ stub->args.xdata);
+ break;
case GF_FOP_READDIR:
- {
- if (!stub->args.readdir_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.readdir_cbk.op_ret,
- stub->args.readdir_cbk.op_errno,
- &stub->args.readdir_cbk.entries, stub->xdata);
- else
- stub->args.readdir_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.readdir_cbk.op_ret,
- stub->args.readdir_cbk.op_errno,
- &stub->args.readdir_cbk.entries, stub->xdata);
-
- if (stub->args.readdir_cbk.op_ret > 0)
- gf_dirent_free (&stub->args.readdir_cbk.entries);
-
- break;
- }
-
+ stub->fn.readdir(stub->frame, stub->frame->this, stub->args.fd,
+ stub->args.size, stub->args.offset,
+ stub->args.xdata);
+ break;
case GF_FOP_READDIRP:
- {
- if (!stub->args.readdirp_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.readdirp_cbk.op_ret,
- stub->args.readdirp_cbk.op_errno,
- &stub->args.readdirp_cbk.entries, stub->xdata);
- else
- stub->args.readdirp_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.readdirp_cbk.op_ret,
- stub->args.readdirp_cbk.op_errno,
- &stub->args.readdirp_cbk.entries, stub->xdata);
-
- if (stub->args.readdirp_cbk.op_ret > 0)
- gf_dirent_free (&stub->args.readdirp_cbk.entries);
-
- break;
- }
-
+ stub->fn.readdirp(stub->frame, stub->frame->this, stub->args.fd,
+ stub->args.size, stub->args.offset,
+ stub->args.xdata);
+ break;
case GF_FOP_XATTROP:
- {
- if (!stub->args.xattrop_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.xattrop_cbk.op_ret,
- stub->args.xattrop_cbk.op_errno, stub->xdata);
- else
- stub->args.xattrop_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.xattrop_cbk.op_ret,
- stub->args.xattrop_cbk.op_errno,
- stub->args.xattrop_cbk.xattr, stub->xdata);
-
- if (stub->args.xattrop_cbk.xattr)
- dict_unref (stub->args.xattrop_cbk.xattr);
-
- break;
- }
+ stub->fn.xattrop(stub->frame, stub->frame->this, &stub->args.loc,
+ stub->args.optype, stub->args.xattr,
+ stub->args.xdata);
+ break;
case GF_FOP_FXATTROP:
- {
- if (!stub->args.fxattrop_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.fxattrop_cbk.op_ret,
- stub->args.fxattrop_cbk.op_errno, stub->xdata);
- else
- stub->args.fxattrop_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.fxattrop_cbk.op_ret,
- stub->args.fxattrop_cbk.op_errno,
- stub->args.fxattrop_cbk.xattr, stub->xdata);
-
- if (stub->args.fxattrop_cbk.xattr)
- dict_unref (stub->args.fxattrop_cbk.xattr);
-
- break;
- }
+ stub->fn.fxattrop(stub->frame, stub->frame->this, stub->args.fd,
+ stub->args.optype, stub->args.xattr,
+ stub->args.xdata);
+ break;
case GF_FOP_SETATTR:
- {
- if (!stub->args.setattr_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.setattr_cbk.op_ret,
- stub->args.setattr_cbk.op_errno,
- &stub->args.setattr_cbk.statpre,
- &stub->args.setattr_cbk.statpost, stub->xdata);
- else
- stub->args.setattr_cbk.fn (
- stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.setattr_cbk.op_ret,
- stub->args.setattr_cbk.op_errno,
- &stub->args.setattr_cbk.statpre,
- &stub->args.setattr_cbk.statpost, stub->xdata);
- break;
- }
+ stub->fn.setattr(stub->frame, stub->frame->this, &stub->args.loc,
+ &stub->args.stat, stub->args.valid,
+ stub->args.xdata);
+ break;
case GF_FOP_FSETATTR:
- {
- if (!stub->args.fsetattr_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.fsetattr_cbk.op_ret,
- stub->args.fsetattr_cbk.op_errno,
- &stub->args.fsetattr_cbk.statpre,
- &stub->args.fsetattr_cbk.statpost, stub->xdata);
- else
- stub->args.fsetattr_cbk.fn (
- stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.fsetattr_cbk.op_ret,
- stub->args.fsetattr_cbk.op_errno,
- &stub->args.fsetattr_cbk.statpre,
- &stub->args.fsetattr_cbk.statpost, stub->xdata);
- break;
- }
- default:
- {
- gf_log_callingfn ("call-stub", GF_LOG_ERROR,
- "Invalid value of FOP (%d)",
- stub->fop);
- break;
- }
- }
-out:
- return;
-}
+ stub->fn.fsetattr(stub->frame, stub->frame->this, stub->args.fd,
+ &stub->args.stat, stub->args.valid,
+ stub->args.xdata);
+ break;
+ case GF_FOP_FALLOCATE:
+ stub->fn.fallocate(stub->frame, stub->frame->this, stub->args.fd,
+ stub->args.flags, stub->args.offset,
+ stub->args.size, stub->args.xdata);
+ break;
+ case GF_FOP_DISCARD:
+ stub->fn.discard(stub->frame, stub->frame->this, stub->args.fd,
+ stub->args.offset, stub->args.size,
+ stub->args.xdata);
+ break;
+ case GF_FOP_ZEROFILL:
+ stub->fn.zerofill(stub->frame, stub->frame->this, stub->args.fd,
+ stub->args.offset, stub->args.size,
+ stub->args.xdata);
+ break;
+ case GF_FOP_IPC:
+ stub->fn.ipc(stub->frame, stub->frame->this, stub->args.cmd,
+ stub->args.xdata);
+ break;
+ case GF_FOP_SEEK:
+ stub->fn.seek(stub->frame, stub->frame->this, stub->args.fd,
+ stub->args.offset, stub->args.what, stub->args.xdata);
+ break;
+ case GF_FOP_LEASE:
+ stub->fn.lease(stub->frame, stub->frame->this, &stub->args.loc,
+ &stub->args.lease, stub->args.xdata);
+ break;
+
+ case GF_FOP_GETACTIVELK:
+ stub->fn.getactivelk(stub->frame, stub->frame->this,
+ &stub->args.loc, stub->args.xdata);
+ break;
+
+ case GF_FOP_SETACTIVELK:
+ stub->fn.setactivelk(stub->frame, stub->frame->this,
+ &stub->args.loc, &stub->args.locklist,
+ stub->args.xdata);
+ break;
+
+ case GF_FOP_PUT:
+ stub->fn.put(stub->frame, stub->frame->this, &stub->args.loc,
+ stub->args.mode, stub->args.umask, stub->args.flags,
+ stub->args.vector, stub->args.count, stub->args.offset,
+ stub->args.iobref, stub->args.xattr, stub->args.xdata);
+ break;
+
+ case GF_FOP_COPY_FILE_RANGE:
+ stub->fn.copy_file_range(
+ stub->frame, stub->frame->this, stub->args.fd,
+ stub->args.off_in, stub->args.fd_dst, stub->args.off_out,
+ stub->args.size, stub->args.flags, stub->args.xdata);
+ break;
+ default:
+ gf_msg_callingfn("call-stub", GF_LOG_ERROR, EINVAL,
+ LG_MSG_INVALID_ENTRY,
+ "Invalid value of FOP"
+ " (%d)",
+ stub->fop);
+ break;
+ }
+out:
+ return;
+}
+
+#define STUB_UNWIND(stb, fop, args...) \
+ do { \
+ if (stb->fn_cbk.fop) \
+ stb->fn_cbk.fop(stb->frame, stb->frame->cookie, stb->frame->this, \
+ stb->args_cbk.op_ret, stb->args_cbk.op_errno, \
+ args); \
+ else \
+ STACK_UNWIND_STRICT(fop, stb->frame, stb->args_cbk.op_ret, \
+ stb->args_cbk.op_errno, args); \
+ } while (0)
static void
-call_stub_destroy_wind (call_stub_t *stub)
+call_resume_unwind(call_stub_t *stub)
{
- if (stub->xdata)
- dict_unref (stub->xdata);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- switch (stub->fop) {
+ switch (stub->fop) {
case GF_FOP_OPEN:
- {
- loc_wipe (&stub->args.open.loc);
- if (stub->args.open.fd)
- fd_unref (stub->args.open.fd);
- break;
- }
+ STUB_UNWIND(stub, open, stub->args_cbk.fd, stub->args_cbk.xdata);
+ break;
case GF_FOP_CREATE:
- {
- loc_wipe (&stub->args.create.loc);
- if (stub->args.create.fd)
- fd_unref (stub->args.create.fd);
- break;
- }
+ STUB_UNWIND(stub, create, stub->args_cbk.fd, stub->args_cbk.inode,
+ &stub->args_cbk.stat, &stub->args_cbk.preparent,
+ &stub->args_cbk.postparent, stub->args_cbk.xdata);
+ break;
case GF_FOP_STAT:
- {
- loc_wipe (&stub->args.stat.loc);
- break;
- }
+ STUB_UNWIND(stub, stat, &stub->args_cbk.stat, stub->args_cbk.xdata);
+ break;
case GF_FOP_READLINK:
- {
- loc_wipe (&stub->args.readlink.loc);
- break;
- }
-
+ STUB_UNWIND(stub, readlink, stub->args_cbk.buf,
+ &stub->args_cbk.stat, stub->args.xdata);
+ break;
case GF_FOP_MKNOD:
- {
- loc_wipe (&stub->args.mknod.loc);
- }
- break;
-
+ STUB_UNWIND(stub, mknod, stub->args_cbk.inode, &stub->args_cbk.stat,
+ &stub->args_cbk.preparent, &stub->args_cbk.postparent,
+ stub->args_cbk.xdata);
+ break;
case GF_FOP_MKDIR:
- {
- loc_wipe (&stub->args.mkdir.loc);
- }
- break;
-
+ STUB_UNWIND(stub, mkdir, stub->args_cbk.inode, &stub->args_cbk.stat,
+ &stub->args_cbk.preparent, &stub->args_cbk.postparent,
+ stub->args_cbk.xdata);
+ break;
case GF_FOP_UNLINK:
- {
- loc_wipe (&stub->args.unlink.loc);
- }
- break;
-
+ STUB_UNWIND(stub, unlink, &stub->args_cbk.preparent,
+ &stub->args_cbk.postparent, stub->args_cbk.xdata);
+ break;
case GF_FOP_RMDIR:
- {
- loc_wipe (&stub->args.rmdir.loc);
- }
- break;
-
+ STUB_UNWIND(stub, rmdir, &stub->args_cbk.preparent,
+ &stub->args_cbk.postparent, stub->args_cbk.xdata);
+ break;
case GF_FOP_SYMLINK:
- {
- GF_FREE ((char *)stub->args.symlink.linkname);
- loc_wipe (&stub->args.symlink.loc);
- }
- break;
-
+ STUB_UNWIND(stub, symlink, stub->args_cbk.inode,
+ &stub->args_cbk.stat, &stub->args_cbk.preparent,
+ &stub->args_cbk.postparent, stub->args_cbk.xdata);
+ break;
case GF_FOP_RENAME:
- {
- loc_wipe (&stub->args.rename.old);
- loc_wipe (&stub->args.rename.new);
- }
- break;
-
+ STUB_UNWIND(stub, rename, &stub->args_cbk.stat,
+ &stub->args_cbk.preparent, &stub->args_cbk.postparent,
+ &stub->args_cbk.preparent2, &stub->args_cbk.postparent2,
+ stub->args_cbk.xdata);
+ break;
case GF_FOP_LINK:
- {
- loc_wipe (&stub->args.link.oldloc);
- loc_wipe (&stub->args.link.newloc);
- }
- break;
-
+ STUB_UNWIND(stub, link, stub->args_cbk.inode, &stub->args_cbk.stat,
+ &stub->args_cbk.preparent, &stub->args_cbk.postparent,
+ stub->args_cbk.xdata);
+ break;
case GF_FOP_TRUNCATE:
- {
- loc_wipe (&stub->args.truncate.loc);
- break;
- }
-
+ STUB_UNWIND(stub, truncate, &stub->args_cbk.prestat,
+ &stub->args_cbk.poststat, stub->args_cbk.xdata);
+ break;
case GF_FOP_READ:
- {
- if (stub->args.readv.fd)
- fd_unref (stub->args.readv.fd);
- break;
- }
-
+ STUB_UNWIND(stub, readv, stub->args_cbk.vector,
+ stub->args_cbk.count, &stub->args_cbk.stat,
+ stub->args_cbk.iobref, stub->args_cbk.xdata);
+ break;
case GF_FOP_WRITE:
- {
- struct iobref *iobref = stub->args.writev.iobref;
- if (stub->args.writev.fd)
- fd_unref (stub->args.writev.fd);
- GF_FREE (stub->args.writev.vector);
- if (iobref)
- iobref_unref (iobref);
- break;
- }
-
+ STUB_UNWIND(stub, writev, &stub->args_cbk.prestat,
+ &stub->args_cbk.poststat, stub->args_cbk.xdata);
+ break;
case GF_FOP_STATFS:
- {
- loc_wipe (&stub->args.statfs.loc);
- break;
- }
+ STUB_UNWIND(stub, statfs, &stub->args_cbk.statvfs,
+ stub->args_cbk.xdata);
+ break;
case GF_FOP_FLUSH:
- {
- if (stub->args.flush.fd)
- fd_unref (stub->args.flush.fd);
- break;
- }
-
+ STUB_UNWIND(stub, flush, stub->args_cbk.xdata);
+ break;
case GF_FOP_FSYNC:
- {
- if (stub->args.fsync.fd)
- fd_unref (stub->args.fsync.fd);
- break;
- }
-
+ STUB_UNWIND(stub, fsync, &stub->args_cbk.prestat,
+ &stub->args_cbk.poststat, stub->args_cbk.xdata);
+ break;
case GF_FOP_SETXATTR:
- {
- loc_wipe (&stub->args.setxattr.loc);
- if (stub->args.setxattr.dict)
- dict_unref (stub->args.setxattr.dict);
- break;
- }
-
+ STUB_UNWIND(stub, setxattr, stub->args_cbk.xdata);
+ break;
case GF_FOP_GETXATTR:
- {
- if (stub->args.getxattr.name)
- GF_FREE ((char *)stub->args.getxattr.name);
- loc_wipe (&stub->args.getxattr.loc);
- break;
- }
-
+ STUB_UNWIND(stub, getxattr, stub->args_cbk.xattr,
+ stub->args_cbk.xdata);
+ break;
case GF_FOP_FSETXATTR:
- {
- fd_unref (stub->args.fsetxattr.fd);
- if (stub->args.fsetxattr.dict)
- dict_unref (stub->args.fsetxattr.dict);
- break;
- }
-
+ STUB_UNWIND(stub, fsetxattr, stub->args_cbk.xdata);
+ break;
case GF_FOP_FGETXATTR:
- {
- if (stub->args.fgetxattr.name)
- GF_FREE ((char *)stub->args.fgetxattr.name);
- fd_unref (stub->args.fgetxattr.fd);
- break;
- }
-
+ STUB_UNWIND(stub, fgetxattr, stub->args_cbk.xattr,
+ stub->args_cbk.xdata);
+ break;
case GF_FOP_REMOVEXATTR:
- {
- loc_wipe (&stub->args.removexattr.loc);
- GF_FREE ((char *)stub->args.removexattr.name);
- break;
- }
-
+ STUB_UNWIND(stub, removexattr, stub->args_cbk.xdata);
+ break;
case GF_FOP_FREMOVEXATTR:
- {
- fd_unref (stub->args.fremovexattr.fd);
- GF_FREE ((char *)stub->args.fremovexattr.name);
- break;
- }
-
+ STUB_UNWIND(stub, fremovexattr, stub->args_cbk.xdata);
+ break;
case GF_FOP_OPENDIR:
- {
- loc_wipe (&stub->args.opendir.loc);
- if (stub->args.opendir.fd)
- fd_unref (stub->args.opendir.fd);
- break;
- }
-
+ STUB_UNWIND(stub, opendir, stub->args_cbk.fd, stub->args_cbk.xdata);
+ break;
case GF_FOP_FSYNCDIR:
- {
- if (stub->args.fsyncdir.fd)
- fd_unref (stub->args.fsyncdir.fd);
- break;
- }
-
+ STUB_UNWIND(stub, fsyncdir, stub->args_cbk.xdata);
+ break;
case GF_FOP_ACCESS:
- {
- loc_wipe (&stub->args.access.loc);
- break;
- }
-
+ STUB_UNWIND(stub, access, stub->args_cbk.xdata);
+ break;
case GF_FOP_FTRUNCATE:
- {
- if (stub->args.ftruncate.fd)
- fd_unref (stub->args.ftruncate.fd);
- break;
- }
-
+ STUB_UNWIND(stub, ftruncate, &stub->args_cbk.prestat,
+ &stub->args_cbk.poststat, stub->args_cbk.xdata);
+ break;
case GF_FOP_FSTAT:
- {
- if (stub->args.fstat.fd)
- fd_unref (stub->args.fstat.fd);
- break;
- }
-
+ STUB_UNWIND(stub, fstat, &stub->args_cbk.stat,
+ stub->args_cbk.xdata);
+ break;
case GF_FOP_LK:
- {
- if (stub->args.lk.fd)
- fd_unref (stub->args.lk.fd);
- break;
- }
-
+ STUB_UNWIND(stub, lk, &stub->args_cbk.lock, stub->args_cbk.xdata);
+ break;
case GF_FOP_INODELK:
- {
- if (stub->args.inodelk.volume)
- GF_FREE ((char *)stub->args.inodelk.volume);
-
- loc_wipe (&stub->args.inodelk.loc);
- break;
- }
+ STUB_UNWIND(stub, inodelk, stub->args_cbk.xdata);
+ break;
case GF_FOP_FINODELK:
- {
- if (stub->args.finodelk.volume)
- GF_FREE ((char *)stub->args.finodelk.volume);
-
- if (stub->args.finodelk.fd)
- fd_unref (stub->args.finodelk.fd);
- break;
- }
+ STUB_UNWIND(stub, finodelk, stub->args_cbk.xdata);
+ break;
case GF_FOP_ENTRYLK:
- {
- if (stub->args.entrylk.volume)
- GF_FREE ((char *)stub->args.entrylk.volume);
-
- if (stub->args.entrylk.name)
- GF_FREE ((char *)stub->args.entrylk.name);
- loc_wipe (&stub->args.entrylk.loc);
- break;
- }
+ STUB_UNWIND(stub, entrylk, stub->args_cbk.xdata);
+ break;
case GF_FOP_FENTRYLK:
- {
- if (stub->args.fentrylk.volume)
- GF_FREE ((char *)stub->args.fentrylk.volume);
-
- if (stub->args.fentrylk.name)
- GF_FREE ((char *)stub->args.fentrylk.name);
-
- if (stub->args.fentrylk.fd)
- fd_unref (stub->args.fentrylk.fd);
- break;
- }
-
+ STUB_UNWIND(stub, fentrylk, stub->args_cbk.xdata);
+ break;
case GF_FOP_LOOKUP:
- {
- loc_wipe (&stub->args.lookup.loc);
- break;
- }
-
+ STUB_UNWIND(stub, lookup, stub->args_cbk.inode,
+ &stub->args_cbk.stat, stub->args_cbk.xdata,
+ &stub->args_cbk.postparent);
+ break;
case GF_FOP_RCHECKSUM:
- {
- if (stub->args.rchecksum.fd)
- fd_unref (stub->args.rchecksum.fd);
- break;
- }
-
+ STUB_UNWIND(stub, rchecksum, stub->args_cbk.weak_checksum,
+ stub->args_cbk.strong_checksum, stub->args_cbk.xdata);
+ break;
case GF_FOP_READDIR:
- {
- if (stub->args.readdir.fd)
- fd_unref (stub->args.readdir.fd);
- break;
- }
-
+ STUB_UNWIND(stub, readdir, &stub->args_cbk.entries,
+ stub->args_cbk.xdata);
+ break;
case GF_FOP_READDIRP:
- {
- if (stub->args.readdirp.fd)
- fd_unref (stub->args.readdirp.fd);
-
- break;
- }
-
+ STUB_UNWIND(stub, readdir, &stub->args_cbk.entries,
+ stub->args_cbk.xdata);
+ break;
case GF_FOP_XATTROP:
- {
- loc_wipe (&stub->args.xattrop.loc);
- dict_unref (stub->args.xattrop.xattr);
- break;
- }
+ STUB_UNWIND(stub, xattrop, stub->args_cbk.xattr,
+ stub->args_cbk.xdata);
+ break;
case GF_FOP_FXATTROP:
- {
- if (stub->args.fxattrop.fd)
- fd_unref (stub->args.fxattrop.fd);
- dict_unref (stub->args.fxattrop.xattr);
- break;
- }
+ STUB_UNWIND(stub, fxattrop, stub->args_cbk.xattr,
+ stub->args_cbk.xdata);
+ break;
case GF_FOP_SETATTR:
- {
- loc_wipe (&stub->args.setattr.loc);
- break;
- }
+ STUB_UNWIND(stub, setattr, &stub->args_cbk.prestat,
+ &stub->args_cbk.poststat, stub->args_cbk.xdata);
+ break;
case GF_FOP_FSETATTR:
- {
- if (stub->args.fsetattr.fd)
- fd_unref (stub->args.fsetattr.fd);
- break;
- }
+ STUB_UNWIND(stub, fsetattr, &stub->args_cbk.prestat,
+ &stub->args_cbk.poststat, stub->args_cbk.xdata);
+ break;
+ case GF_FOP_FALLOCATE:
+ STUB_UNWIND(stub, fallocate, &stub->args_cbk.prestat,
+ &stub->args_cbk.poststat, stub->args_cbk.xdata);
+ break;
+ case GF_FOP_DISCARD:
+ STUB_UNWIND(stub, discard, &stub->args_cbk.prestat,
+ &stub->args_cbk.poststat, stub->args_cbk.xdata);
+ break;
+ case GF_FOP_ZEROFILL:
+ STUB_UNWIND(stub, zerofill, &stub->args_cbk.prestat,
+ &stub->args_cbk.poststat, stub->args_cbk.xdata);
+ break;
+ case GF_FOP_IPC:
+ STUB_UNWIND(stub, ipc, stub->args_cbk.xdata);
+ break;
+ case GF_FOP_SEEK:
+ STUB_UNWIND(stub, seek, stub->args_cbk.offset,
+ stub->args_cbk.xdata);
+ break;
+ case GF_FOP_LEASE:
+ STUB_UNWIND(stub, lease, &stub->args_cbk.lease,
+ stub->args_cbk.xdata);
+ break;
+
+ case GF_FOP_GETACTIVELK:
+ STUB_UNWIND(stub, getactivelk, &stub->args_cbk.locklist,
+ stub->args_cbk.xdata);
+ break;
+
+ case GF_FOP_SETACTIVELK:
+ STUB_UNWIND(stub, setactivelk, stub->args_cbk.xdata);
+ break;
+
+ case GF_FOP_PUT:
+ STUB_UNWIND(stub, put, stub->args_cbk.inode, &stub->args_cbk.stat,
+ &stub->args_cbk.preparent, &stub->args_cbk.postparent,
+ stub->args_cbk.xdata);
+ break;
+
+ case GF_FOP_COPY_FILE_RANGE:
+ STUB_UNWIND(stub, copy_file_range, &stub->args_cbk.stat,
+ &stub->args_cbk.prestat, &stub->args_cbk.poststat,
+ stub->args_cbk.xdata);
+ break;
+
default:
- {
- gf_log_callingfn ("call-stub", GF_LOG_ERROR,
- "Invalid value of FOP (%d)",
- stub->fop);
- break;
- }
- }
+ gf_msg_callingfn("call-stub", GF_LOG_ERROR, EINVAL,
+ LG_MSG_INVALID_ENTRY,
+ "Invalid value of FOP"
+ " (%d)",
+ stub->fop);
+ break;
+ }
+out:
+ return;
}
-
static void
-call_stub_destroy_unwind (call_stub_t *stub)
+call_stub_wipe_args(call_stub_t *stub)
{
- if (stub->xdata)
- dict_unref (stub->xdata);
-
- switch (stub->fop) {
- case GF_FOP_OPEN:
- {
- if (stub->args.open_cbk.fd)
- fd_unref (stub->args.open_cbk.fd);
- }
- break;
-
- case GF_FOP_CREATE:
- {
- if (stub->args.create_cbk.fd)
- fd_unref (stub->args.create_cbk.fd);
-
- if (stub->args.create_cbk.inode)
- inode_unref (stub->args.create_cbk.inode);
- }
- break;
-
- case GF_FOP_STAT:
- break;
-
- case GF_FOP_READLINK:
- {
- if (stub->args.readlink_cbk.buf)
- GF_FREE ((char *)stub->args.readlink_cbk.buf);
- }
- break;
-
- case GF_FOP_MKNOD:
- {
- if (stub->args.mknod_cbk.inode)
- inode_unref (stub->args.mknod_cbk.inode);
- }
- break;
-
- case GF_FOP_MKDIR:
- {
- if (stub->args.mkdir_cbk.inode)
- inode_unref (stub->args.mkdir_cbk.inode);
- }
- break;
-
- case GF_FOP_UNLINK:
- break;
-
- case GF_FOP_RMDIR:
- break;
-
- case GF_FOP_SYMLINK:
- {
- if (stub->args.symlink_cbk.inode)
- inode_unref (stub->args.symlink_cbk.inode);
- }
- break;
-
- case GF_FOP_RENAME:
- break;
-
- case GF_FOP_LINK:
- {
- if (stub->args.link_cbk.inode)
- inode_unref (stub->args.link_cbk.inode);
- }
- break;
-
- case GF_FOP_TRUNCATE:
- break;
-
- case GF_FOP_READ:
- {
- if (stub->args.readv_cbk.op_ret >= 0) {
- struct iobref *iobref = stub->args.readv_cbk.iobref;
- GF_FREE (stub->args.readv_cbk.vector);
-
- if (iobref) {
- iobref_unref (iobref);
- }
- }
- }
- break;
-
- case GF_FOP_WRITE:
- break;
-
- case GF_FOP_STATFS:
- break;
-
- case GF_FOP_FLUSH:
- break;
-
- case GF_FOP_FSYNC:
- break;
-
- case GF_FOP_SETXATTR:
- break;
-
- case GF_FOP_GETXATTR:
- {
- if (stub->args.getxattr_cbk.dict)
- dict_unref (stub->args.getxattr_cbk.dict);
- }
- break;
-
- case GF_FOP_FSETXATTR:
- break;
-
- case GF_FOP_FGETXATTR:
- {
- if (stub->args.fgetxattr_cbk.dict)
- dict_unref (stub->args.fgetxattr_cbk.dict);
- }
- break;
-
- case GF_FOP_REMOVEXATTR:
- break;
- case GF_FOP_FREMOVEXATTR:
- break;
-
- case GF_FOP_OPENDIR:
- {
- if (stub->args.opendir_cbk.fd)
- fd_unref (stub->args.opendir_cbk.fd);
- }
- break;
-
- case GF_FOP_FSYNCDIR:
- break;
-
- case GF_FOP_ACCESS:
- break;
-
- case GF_FOP_FTRUNCATE:
- break;
+ args_wipe(&stub->args);
+}
- case GF_FOP_FSTAT:
- break;
+static void
+call_stub_wipe_args_cbk(call_stub_t *stub)
+{
+ args_cbk_wipe(&stub->args_cbk);
+}
- case GF_FOP_LK:
- break;
+void
+call_stub_destroy(call_stub_t *stub)
+{
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- case GF_FOP_INODELK:
- break;
+ if (stub->wind)
+ call_stub_wipe_args(stub);
+ else
+ call_stub_wipe_args_cbk(stub);
- case GF_FOP_FINODELK:
- break;
+ stub->stub_mem_pool = NULL;
- case GF_FOP_ENTRYLK:
- break;
+ mem_put(stub);
+out:
+ return;
+}
- case GF_FOP_FENTRYLK:
- break;
+void
+call_resume(call_stub_t *stub)
+{
+ xlator_t *old_THIS = NULL;
- case GF_FOP_LOOKUP:
- {
- if (stub->args.lookup_cbk.inode)
- inode_unref (stub->args.lookup_cbk.inode);
- }
- break;
+ errno = EINVAL;
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- case GF_FOP_RCHECKSUM:
- {
- if (stub->args.rchecksum_cbk.op_ret >= 0) {
- GF_FREE (stub->args.rchecksum_cbk.strong_checksum);
- }
- }
- break;
+ list_del_init(&stub->list);
- case GF_FOP_READDIR:
- {
- if (stub->args.readdir_cbk.op_ret > 0) {
- gf_dirent_free (&stub->args.readdir_cbk.entries);
- }
- }
- break;
+ old_THIS = THIS;
+ THIS = stub->frame->this;
+ {
+ if (stub->wind)
+ call_resume_wind(stub);
+ else
+ call_resume_unwind(stub);
+ }
+ THIS = old_THIS;
- case GF_FOP_READDIRP:
- {
- if (stub->args.readdirp_cbk.op_ret > 0) {
- gf_dirent_free (&stub->args.readdirp_cbk.entries);
- }
- }
- break;
+ call_stub_destroy(stub);
+out:
+ return;
+}
- case GF_FOP_XATTROP:
- {
- if (stub->args.xattrop_cbk.xattr)
- dict_unref (stub->args.xattrop_cbk.xattr);
- }
- break;
+void
+call_unwind_error(call_stub_t *stub, int op_ret, int op_errno)
+{
+ xlator_t *old_THIS = NULL;
- case GF_FOP_FXATTROP:
- {
- if (stub->args.fxattrop_cbk.xattr)
- dict_unref (stub->args.fxattrop_cbk.xattr);
- }
- break;
+ list_del_init(&stub->list);
- case GF_FOP_SETATTR:
- {
- break;
- }
+ old_THIS = THIS;
+ THIS = stub->frame->this;
+ {
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
+ call_resume_unwind(stub);
+ }
+ THIS = old_THIS;
- case GF_FOP_FSETATTR:
- {
- break;
- }
+ call_stub_destroy(stub);
- default:
- {
- gf_log_callingfn ("call-stub", GF_LOG_ERROR,
- "Invalid value of FOP (%d)",
- stub->fop);
- break;
- }
- }
+ return;
}
-
void
-call_stub_destroy (call_stub_t *stub)
+call_unwind_error_keep_stub(call_stub_t *stub, int op_ret, int op_errno)
{
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ xlator_t *old_THIS = NULL;
- if (stub->wind) {
- call_stub_destroy_wind (stub);
- } else {
- call_stub_destroy_unwind (stub);
- }
+ list_del_init(&stub->list);
- stub->stub_mem_pool = NULL;
- mem_put (stub);
-out:
- return;
+ old_THIS = THIS;
+ THIS = stub->frame->this;
+ {
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
+ call_resume_unwind(stub);
+ }
+
+ THIS = old_THIS;
+
+ return;
}
void
-call_resume (call_stub_t *stub)
+call_resume_keep_stub(call_stub_t *stub)
{
- xlator_t *old_THIS = NULL;
+ xlator_t *old_THIS = NULL;
+
+ errno = EINVAL;
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- errno = EINVAL;
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ list_del_init(&stub->list);
- list_del_init (&stub->list);
+ old_THIS = THIS;
+ THIS = stub->frame->this;
+ {
+ if (stub->wind)
+ call_resume_wind(stub);
+ else
+ call_resume_unwind(stub);
+ }
- old_THIS = THIS;
- THIS = stub->frame->this;
- {
- if (stub->wind)
- call_resume_wind (stub);
- else
- call_resume_unwind (stub);
- }
- THIS = old_THIS;
+ THIS = old_THIS;
- call_stub_destroy (stub);
out:
- return;
+ return;
}
diff --git a/libglusterfs/src/call-stub.h b/libglusterfs/src/call-stub.h
deleted file mode 100644
index 633fc4cbbe4..00000000000
--- a/libglusterfs/src/call-stub.h
+++ /dev/null
@@ -1,1134 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _CALL_STUB_H_
-#define _CALL_STUB_H_
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "xlator.h"
-#include "stack.h"
-#include "list.h"
-
-typedef struct {
- struct list_head list;
- char wind;
- call_frame_t *frame;
- glusterfs_fop_t fop;
- struct mem_pool *stub_mem_pool; /* pointer to stub mempool in ctx_t */
- dict_t *xdata; /* common accross all the fops */
-
- union {
- /* lookup */
- struct {
- fop_lookup_t fn;
- loc_t loc;
- } lookup;
- struct {
- fop_lookup_cbk_t fn;
- int32_t op_ret, op_errno;
- inode_t *inode;
- struct iatt buf;
- struct iatt postparent;
- } lookup_cbk;
-
- /* stat */
- struct {
- fop_stat_t fn;
- loc_t loc;
- } stat;
- struct {
- fop_stat_cbk_t fn;
- int32_t op_ret, op_errno;
- struct iatt buf;
- } stat_cbk;
-
- /* fstat */
- struct {
- fop_fstat_t fn;
- fd_t *fd;
- } fstat;
- struct {
- fop_fstat_cbk_t fn;
- int32_t op_ret, op_errno;
- struct iatt buf;
- } fstat_cbk;
-
- /* truncate */
- struct {
- fop_truncate_t fn;
- loc_t loc;
- off_t off;
- } truncate;
- struct {
- fop_truncate_cbk_t fn;
- int32_t op_ret, op_errno;
- struct iatt prebuf;
- struct iatt postbuf;
- } truncate_cbk;
-
- /* ftruncate */
- struct {
- fop_ftruncate_t fn;
- fd_t *fd;
- off_t off;
- } ftruncate;
- struct {
- fop_ftruncate_cbk_t fn;
- int32_t op_ret, op_errno;
- struct iatt prebuf;
- struct iatt postbuf;
- } ftruncate_cbk;
-
- /* access */
- struct {
- fop_access_t fn;
- loc_t loc;
- int32_t mask;
- } access;
- struct {
- fop_access_cbk_t fn;
- int32_t op_ret, op_errno;
- } access_cbk;
-
- /* readlink */
- struct {
- fop_readlink_t fn;
- loc_t loc;
- size_t size;
- } readlink;
- struct {
- fop_readlink_cbk_t fn;
- int32_t op_ret, op_errno;
- const char *buf;
- struct iatt sbuf;
- } readlink_cbk;
-
- /* mknod */
- struct {
- fop_mknod_t fn;
- loc_t loc;
- mode_t mode;
- dev_t rdev;
- mode_t umask;
- } mknod;
- struct {
- fop_mknod_cbk_t fn;
- int32_t op_ret, op_errno;
- inode_t *inode;
- struct iatt buf;
- struct iatt preparent;
- struct iatt postparent;
- } mknod_cbk;
-
- /* mkdir */
- struct {
- fop_mkdir_t fn;
- loc_t loc;
- mode_t mode;
- mode_t umask;
- } mkdir;
- struct {
- fop_mkdir_cbk_t fn;
- int32_t op_ret, op_errno;
- inode_t *inode;
- struct iatt buf;
- struct iatt preparent;
- struct iatt postparent;
- } mkdir_cbk;
-
- /* unlink */
- struct {
- fop_unlink_t fn;
- loc_t loc;
- int xflag;
- } unlink;
- struct {
- fop_unlink_cbk_t fn;
- int32_t op_ret, op_errno;
- struct iatt preparent;
- struct iatt postparent;
- } unlink_cbk;
-
- /* rmdir */
- struct {
- fop_rmdir_t fn;
- loc_t loc;
- int flags;
- } rmdir;
- struct {
- fop_rmdir_cbk_t fn;
- int32_t op_ret, op_errno;
- struct iatt preparent;
- struct iatt postparent;
- } rmdir_cbk;
-
- /* symlink */
- struct {
- fop_symlink_t fn;
- const char *linkname;
- loc_t loc;
- mode_t umask;
- } symlink;
- struct {
- fop_symlink_cbk_t fn;
- int32_t op_ret, op_errno;
- inode_t *inode;
- struct iatt buf;
- struct iatt preparent;
- struct iatt postparent;
- } symlink_cbk;
-
- /* rename */
- struct {
- fop_rename_t fn;
- loc_t old;
- loc_t new;
- } rename;
- struct {
- fop_rename_cbk_t fn;
- int32_t op_ret, op_errno;
- struct iatt buf;
- struct iatt preoldparent;
- struct iatt postoldparent;
- struct iatt prenewparent;
- struct iatt postnewparent;
- } rename_cbk;
-
- /* link */
- struct {
- fop_link_t fn;
- loc_t oldloc;
- loc_t newloc;
- } link;
- struct {
- fop_link_cbk_t fn;
- int32_t op_ret, op_errno;
- inode_t *inode;
- struct iatt buf;
- struct iatt preparent;
- struct iatt postparent;
- } link_cbk;
-
- /* create */
- struct {
- fop_create_t fn;
- loc_t loc;
- int32_t flags;
- mode_t mode;
- fd_t *fd;
- mode_t umask;
- } create;
- struct {
- fop_create_cbk_t fn;
- int32_t op_ret, op_errno;
- fd_t *fd;
- inode_t *inode;
- struct iatt buf;
- struct iatt preparent;
- struct iatt postparent;
- } create_cbk;
-
- /* open */
- struct {
- fop_open_t fn;
- loc_t loc;
- int32_t flags;
- fd_t *fd;
- } open;
- struct {
- fop_open_cbk_t fn;
- int32_t op_ret, op_errno;
- fd_t *fd;
- } open_cbk;
-
- /* readv */
- struct {
- fop_readv_t fn;
- fd_t *fd;
- size_t size;
- off_t off;
- uint32_t flags;
- } readv;
- struct {
- fop_readv_cbk_t fn;
- int32_t op_ret;
- int32_t op_errno;
- struct iovec *vector;
- int32_t count;
- struct iatt stbuf;
- struct iobref *iobref;
- } readv_cbk;
-
- /* writev */
- struct {
- fop_writev_t fn;
- fd_t *fd;
- struct iovec *vector;
- int32_t count;
- off_t off;
- uint32_t flags;
- struct iobref *iobref;
- } writev;
- struct {
- fop_writev_cbk_t fn;
- int32_t op_ret, op_errno;
- struct iatt prebuf;
- struct iatt postbuf;
- } writev_cbk;
-
- /* flush */
- struct {
- fop_flush_t fn;
- fd_t *fd;
- } flush;
- struct {
- fop_flush_cbk_t fn;
- int32_t op_ret, op_errno;
- } flush_cbk;
-
- /* fsync */
- struct {
- fop_fsync_t fn;
- fd_t *fd;
- int32_t datasync;
- } fsync;
- struct {
- fop_fsync_cbk_t fn;
- int32_t op_ret, op_errno;
- struct iatt prebuf;
- struct iatt postbuf;
- } fsync_cbk;
-
- /* opendir */
- struct {
- fop_opendir_t fn;
- loc_t loc;
- fd_t *fd;
- } opendir;
- struct {
- fop_opendir_cbk_t fn;
- int32_t op_ret, op_errno;
- fd_t *fd;
- } opendir_cbk;
-
-
- /* fsyncdir */
- struct {
- fop_fsyncdir_t fn;
- fd_t *fd;
- int32_t datasync;
- } fsyncdir;
- struct {
- fop_fsyncdir_cbk_t fn;
- int32_t op_ret, op_errno;
- } fsyncdir_cbk;
-
- /* statfs */
- struct {
- fop_statfs_t fn;
- loc_t loc;
- } statfs;
- struct {
- fop_statfs_cbk_t fn;
- int32_t op_ret, op_errno;
- struct statvfs buf;
- } statfs_cbk;
-
- /* setxattr */
- struct {
- fop_setxattr_t fn;
- loc_t loc;
- dict_t *dict;
- int32_t flags;
- } setxattr;
- struct {
- fop_setxattr_cbk_t fn;
- int32_t op_ret, op_errno;
- } setxattr_cbk;
-
- /* getxattr */
- struct {
- fop_getxattr_t fn;
- loc_t loc;
- const char *name;
- } getxattr;
- struct {
- fop_getxattr_cbk_t fn;
- int32_t op_ret, op_errno;
- dict_t *dict;
- } getxattr_cbk;
-
- /* fsetxattr */
- struct {
- fop_fsetxattr_t fn;
- fd_t *fd;
- dict_t *dict;
- int32_t flags;
- } fsetxattr;
- struct {
- fop_fsetxattr_cbk_t fn;
- int32_t op_ret, op_errno;
- } fsetxattr_cbk;
-
- /* fgetxattr */
- struct {
- fop_fgetxattr_t fn;
- fd_t *fd;
- const char *name;
- } fgetxattr;
- struct {
- fop_fgetxattr_cbk_t fn;
- int32_t op_ret, op_errno;
- dict_t *dict;
- } fgetxattr_cbk;
-
- /* removexattr */
- struct {
- fop_removexattr_t fn;
- loc_t loc;
- const char *name;
- } removexattr;
- struct {
- fop_removexattr_cbk_t fn;
- int32_t op_ret, op_errno;
- } removexattr_cbk;
-
-
- /* fremovexattr */
- struct {
- fop_fremovexattr_t fn;
- fd_t *fd;
- const char *name;
- } fremovexattr;
- struct {
- fop_fremovexattr_cbk_t fn;
- int32_t op_ret, op_errno;
- } fremovexattr_cbk;
-
- /* lk */
- struct {
- fop_lk_t fn;
- fd_t *fd;
- int32_t cmd;
- struct gf_flock lock;
- } lk;
- struct {
- fop_lk_cbk_t fn;
- int32_t op_ret, op_errno;
- struct gf_flock lock;
- } lk_cbk;
-
- /* inodelk */
- struct {
- fop_inodelk_t fn;
- const char *volume;
- loc_t loc;
- int32_t cmd;
- struct gf_flock lock;
- } inodelk;
-
- struct {
- fop_inodelk_cbk_t fn;
- int32_t op_ret, op_errno;
- } inodelk_cbk;
-
- /* finodelk */
- struct {
- fop_finodelk_t fn;
- const char *volume;
- fd_t *fd;
- int32_t cmd;
- struct gf_flock lock;
- } finodelk;
-
- struct {
- fop_finodelk_cbk_t fn;
- int32_t op_ret, op_errno;
- } finodelk_cbk;
-
- /* entrylk */
- struct {
- fop_entrylk_t fn;
- loc_t loc;
- const char *volume;
- const char *name;
- entrylk_cmd cmd;
- entrylk_type type;
- } entrylk;
-
- struct {
- fop_entrylk_cbk_t fn;
- int32_t op_ret, op_errno;
- } entrylk_cbk;
-
- /* fentrylk */
- struct {
- fop_fentrylk_t fn;
- fd_t *fd;
- const char *volume;
- const char *name;
- entrylk_cmd cmd;
- entrylk_type type;
- } fentrylk;
-
- struct {
- fop_fentrylk_cbk_t fn;
- int32_t op_ret, op_errno;
- } fentrylk_cbk;
-
- /* readdir */
- struct {
- fop_readdir_t fn;
- fd_t *fd;
- size_t size;
- off_t off;
- } readdir;
- struct {
- fop_readdir_cbk_t fn;
- int32_t op_ret, op_errno;
- gf_dirent_t entries;
- } readdir_cbk;
-
- /* readdirp */
- struct {
- fop_readdirp_t fn;
- fd_t *fd;
- size_t size;
- off_t off;
- } readdirp;
- struct {
- fop_readdirp_cbk_t fn;
- int32_t op_ret, op_errno;
- gf_dirent_t entries;
- } readdirp_cbk;
-
- /* rchecksum */
- struct {
- fop_rchecksum_t fn;
- fd_t *fd;
- off_t offset;
- int32_t len;
- } rchecksum;
- struct {
- fop_rchecksum_cbk_t fn;
- int32_t op_ret, op_errno;
- uint32_t weak_checksum;
- uint8_t *strong_checksum;
- } rchecksum_cbk;
-
- /* xattrop */
- struct {
- fop_xattrop_t fn;
- loc_t loc;
- gf_xattrop_flags_t optype;
- dict_t *xattr;
- } xattrop;
- struct {
- fop_xattrop_cbk_t fn;
- int32_t op_ret;
- int32_t op_errno;
- dict_t *xattr;
- } xattrop_cbk;
-
- /* fxattrop */
- struct {
- fop_fxattrop_t fn;
- fd_t *fd;
- gf_xattrop_flags_t optype;
- dict_t *xattr;
- } fxattrop;
- struct {
- fop_fxattrop_cbk_t fn;
- int32_t op_ret;
- int32_t op_errno;
- dict_t *xattr;
- } fxattrop_cbk;
-
- /* setattr */
- struct {
- fop_setattr_t fn;
- loc_t loc;
- struct iatt stbuf;
- int32_t valid;
- } setattr;
- struct {
- fop_setattr_cbk_t fn;
- int32_t op_ret;
- int32_t op_errno;
- struct iatt statpre;
- struct iatt statpost;
- } setattr_cbk;
-
- /* fsetattr */
- struct {
- fop_fsetattr_t fn;
- fd_t *fd;
- struct iatt stbuf;
- int32_t valid;
- } fsetattr;
- struct {
- fop_fsetattr_cbk_t fn;
- int32_t op_ret;
- int32_t op_errno;
- struct iatt statpre;
- struct iatt statpost;
- } fsetattr_cbk;
-
- } args;
-} call_stub_t;
-
-call_stub_t *
-fop_lookup_stub (call_frame_t *frame,
- fop_lookup_t fn,
- loc_t *loc,
- dict_t *xdata);
-
-call_stub_t *
-fop_lookup_cbk_stub (call_frame_t *frame,
- fop_lookup_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- dict_t *xdata,
- struct iatt *postparent);
-call_stub_t *
-fop_stat_stub (call_frame_t *frame,
- fop_stat_t fn,
- loc_t *loc, dict_t *xdata);
-call_stub_t *
-fop_stat_cbk_stub (call_frame_t *frame,
- fop_stat_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf, dict_t *xdata);
-call_stub_t *
-fop_fstat_stub (call_frame_t *frame,
- fop_fstat_t fn,
- fd_t *fd, dict_t *xdata);
-call_stub_t *
-fop_fstat_cbk_stub (call_frame_t *frame,
- fop_fstat_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf, dict_t *xdata);
-
-call_stub_t *
-fop_truncate_stub (call_frame_t *frame,
- fop_truncate_t fn,
- loc_t *loc,
- off_t off, dict_t *xdata);
-
-call_stub_t *
-fop_truncate_cbk_stub (call_frame_t *frame,
- fop_truncate_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata);
-
-call_stub_t *
-fop_ftruncate_stub (call_frame_t *frame,
- fop_ftruncate_t fn,
- fd_t *fd,
- off_t off, dict_t *xdata);
-
-call_stub_t *
-fop_ftruncate_cbk_stub (call_frame_t *frame,
- fop_ftruncate_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata);
-
-call_stub_t *
-fop_access_stub (call_frame_t *frame,
- fop_access_t fn,
- loc_t *loc,
- int32_t mask, dict_t *xdata);
-
-call_stub_t *
-fop_access_cbk_stub (call_frame_t *frame,
- fop_access_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata);
-
-call_stub_t *
-fop_readlink_stub (call_frame_t *frame,
- fop_readlink_t fn,
- loc_t *loc,
- size_t size, dict_t *xdata);
-
-call_stub_t *
-fop_readlink_cbk_stub (call_frame_t *frame,
- fop_readlink_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- const char *path,
- struct iatt *buf, dict_t *xdata);
-
-call_stub_t *
-fop_mknod_stub (call_frame_t *frame, fop_mknod_t fn, loc_t *loc, mode_t mode,
- dev_t rdev, mode_t umask, dict_t *xdata);
-
-call_stub_t *
-fop_mknod_cbk_stub (call_frame_t *frame,
- fop_mknod_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata);
-
-call_stub_t *
-fop_mkdir_stub (call_frame_t *frame, fop_mkdir_t fn, loc_t *loc, mode_t mode,
- mode_t umask, dict_t *xdata);
-
-call_stub_t *
-fop_mkdir_cbk_stub (call_frame_t *frame,
- fop_mkdir_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata);
-
-call_stub_t *
-fop_unlink_stub (call_frame_t *frame, fop_unlink_t fn,
- loc_t *loc, int xflag, dict_t *xdata);
-
-call_stub_t *
-fop_unlink_cbk_stub (call_frame_t *frame,
- fop_unlink_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata);
-
-call_stub_t *
-fop_rmdir_stub (call_frame_t *frame, fop_rmdir_t fn,
- loc_t *loc, int flags, dict_t *xdata);
-
-call_stub_t *
-fop_rmdir_cbk_stub (call_frame_t *frame,
- fop_rmdir_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata);
-
-call_stub_t *
-fop_symlink_stub (call_frame_t *frame, fop_symlink_t fn,
- const char *linkname, loc_t *loc, mode_t umask, dict_t *xdata);
-
-call_stub_t *
-fop_symlink_cbk_stub (call_frame_t *frame,
- fop_symlink_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata);
-
-call_stub_t *
-fop_rename_stub (call_frame_t *frame,
- fop_rename_t fn,
- loc_t *oldloc,
- loc_t *newloc, dict_t *xdata);
-
-call_stub_t *
-fop_rename_cbk_stub (call_frame_t *frame,
- fop_rename_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf,
- struct iatt *preoldparent,
- struct iatt *postoldparent,
- struct iatt *prenewparent,
- struct iatt *postnewparent, dict_t *xdata);
-
-call_stub_t *
-fop_link_stub (call_frame_t *frame,
- fop_link_t fn,
- loc_t *oldloc,
- loc_t *newloc, dict_t *xdata);
-
-call_stub_t *
-fop_link_cbk_stub (call_frame_t *frame,
- fop_link_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata);
-
-call_stub_t *
-fop_create_stub (call_frame_t *frame, fop_create_t fn,
- loc_t *loc, int32_t flags, mode_t mode,
- mode_t umask, fd_t *fd, dict_t *xdata);
-
-call_stub_t *
-fop_create_cbk_stub (call_frame_t *frame,
- fop_create_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata);
-
-call_stub_t *
-fop_open_stub (call_frame_t *frame,
- fop_open_t fn,
- loc_t *loc,
- int32_t flags,
- fd_t *fd,
- dict_t *xdata);
-
-call_stub_t *
-fop_open_cbk_stub (call_frame_t *frame,
- fop_open_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd, dict_t *xdata);
-
-call_stub_t *
-fop_readv_stub (call_frame_t *frame,
- fop_readv_t fn,
- fd_t *fd,
- size_t size,
- off_t off, uint32_t flags, dict_t *xdata);
-
-call_stub_t *
-fop_readv_cbk_stub (call_frame_t *frame,
- fop_readv_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iovec *vector,
- int32_t count,
- struct iatt *stbuf,
- struct iobref *iobref, dict_t *xdata);
-
-call_stub_t *
-fop_writev_stub (call_frame_t *frame,
- fop_writev_t fn,
- fd_t *fd,
- struct iovec *vector,
- int32_t count,
- off_t off, uint32_t flags,
- struct iobref *iobref, dict_t *xdata);
-
-call_stub_t *
-fop_writev_cbk_stub (call_frame_t *frame,
- fop_writev_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata);
-
-call_stub_t *
-fop_flush_stub (call_frame_t *frame,
- fop_flush_t fn,
- fd_t *fd, dict_t *xdata);
-
-call_stub_t *
-fop_flush_cbk_stub (call_frame_t *frame,
- fop_flush_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata);
-
-call_stub_t *
-fop_fsync_stub (call_frame_t *frame,
- fop_fsync_t fn,
- fd_t *fd,
- int32_t datasync, dict_t *xdata);
-
-call_stub_t *
-fop_fsync_cbk_stub (call_frame_t *frame,
- fop_fsync_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata);
-
-call_stub_t *
-fop_opendir_stub (call_frame_t *frame,
- fop_opendir_t fn,
- loc_t *loc, fd_t *fd, dict_t *xdata);
-
-call_stub_t *
-fop_opendir_cbk_stub (call_frame_t *frame,
- fop_opendir_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd, dict_t *xdata);
-
-call_stub_t *
-fop_fsyncdir_stub (call_frame_t *frame,
- fop_fsyncdir_t fn,
- fd_t *fd,
- int32_t datasync, dict_t *xdata);
-
-call_stub_t *
-fop_fsyncdir_cbk_stub (call_frame_t *frame,
- fop_fsyncdir_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata);
-
-call_stub_t *
-fop_statfs_stub (call_frame_t *frame,
- fop_statfs_t fn,
- loc_t *loc, dict_t *xdata);
-
-call_stub_t *
-fop_statfs_cbk_stub (call_frame_t *frame,
- fop_statfs_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct statvfs *buf, dict_t *xdata);
-
-call_stub_t *
-fop_setxattr_stub (call_frame_t *frame,
- fop_setxattr_t fn,
- loc_t *loc,
- dict_t *dict,
- int32_t flags, dict_t *xdata);
-
-call_stub_t *
-fop_setxattr_cbk_stub (call_frame_t *frame,
- fop_setxattr_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata);
-
-call_stub_t *
-fop_getxattr_stub (call_frame_t *frame,
- fop_getxattr_t fn,
- loc_t *loc,
- const char *name, dict_t *xdata);
-
-call_stub_t *
-fop_getxattr_cbk_stub (call_frame_t *frame,
- fop_getxattr_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *value, dict_t *xdata);
-
-call_stub_t *
-fop_fsetxattr_stub (call_frame_t *frame,
- fop_fsetxattr_t fn,
- fd_t *fd,
- dict_t *dict,
- int32_t flags, dict_t *xdata);
-
-call_stub_t *
-fop_fsetxattr_cbk_stub (call_frame_t *frame,
- fop_fsetxattr_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata);
-
-call_stub_t *
-fop_fgetxattr_stub (call_frame_t *frame,
- fop_fgetxattr_t fn,
- fd_t *fd,
- const char *name, dict_t *xdata);
-
-call_stub_t *
-fop_fgetxattr_cbk_stub (call_frame_t *frame,
- fop_fgetxattr_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *value, dict_t *xdata);
-
-call_stub_t *
-fop_removexattr_stub (call_frame_t *frame,
- fop_removexattr_t fn,
- loc_t *loc,
- const char *name, dict_t *xdata);
-
-call_stub_t *
-fop_removexattr_cbk_stub (call_frame_t *frame,
- fop_removexattr_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata);
-
-
-call_stub_t *
-fop_fremovexattr_stub (call_frame_t *frame,
- fop_fremovexattr_t fn,
- fd_t *fd,
- const char *name, dict_t *xdata);
-
-call_stub_t *
-fop_fremovexattr_cbk_stub (call_frame_t *frame,
- fop_fremovexattr_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata);
-
-call_stub_t *
-fop_lk_stub (call_frame_t *frame,
- fop_lk_t fn,
- fd_t *fd,
- int32_t cmd,
- struct gf_flock *lock, dict_t *xdata);
-
-call_stub_t *
-fop_lk_cbk_stub (call_frame_t *frame,
- fop_lk_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct gf_flock *lock, dict_t *xdata);
-
-call_stub_t *
-fop_inodelk_stub (call_frame_t *frame, fop_inodelk_t fn,
- const char *volume, loc_t *loc, int32_t cmd,
- struct gf_flock *lock, dict_t *xdata);
-
-call_stub_t *
-fop_finodelk_stub (call_frame_t *frame, fop_finodelk_t fn,
- const char *volume, fd_t *fd, int32_t cmd,
- struct gf_flock *lock, dict_t *xdata);
-
-call_stub_t *
-fop_entrylk_stub (call_frame_t *frame, fop_entrylk_t fn,
- const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type, dict_t *xdata);
-
-call_stub_t *
-fop_fentrylk_stub (call_frame_t *frame, fop_fentrylk_t fn,
- const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type, dict_t *xdata);
-
-call_stub_t *
-fop_inodelk_cbk_stub (call_frame_t *frame, fop_inodelk_cbk_t fn,
- int32_t op_ret, int32_t op_errno, dict_t *xdata);
-
-call_stub_t *
-fop_finodelk_cbk_stub (call_frame_t *frame, fop_inodelk_cbk_t fn,
- int32_t op_ret, int32_t op_errno, dict_t *xdata);
-
-call_stub_t *
-fop_entrylk_cbk_stub (call_frame_t *frame, fop_entrylk_cbk_t fn,
- int32_t op_ret, int32_t op_errno, dict_t *xdata);
-
-call_stub_t *
-fop_fentrylk_cbk_stub (call_frame_t *frame, fop_entrylk_cbk_t fn,
- int32_t op_ret, int32_t op_errno, dict_t *xdata);
-
-call_stub_t *
-fop_readdir_stub (call_frame_t *frame,
- fop_readdir_t fn,
- fd_t *fd,
- size_t size,
- off_t off, dict_t *xdata);
-
-call_stub_t *
-fop_readdirp_stub (call_frame_t *frame,
- fop_readdirp_t fn,
- fd_t *fd,
- size_t size,
- off_t off,
- dict_t *xdata);
-
-call_stub_t *
-fop_readdirp_cbk_stub (call_frame_t *frame,
- fop_readdir_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- gf_dirent_t *entries, dict_t *xdata);
-
-call_stub_t *
-fop_readdir_cbk_stub (call_frame_t *frame,
- fop_readdir_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- gf_dirent_t *entries, dict_t *xdata);
-
-call_stub_t *
-fop_rchecksum_stub (call_frame_t *frame,
- fop_rchecksum_t fn,
- fd_t *fd, off_t offset,
- int32_t len, dict_t *xdata);
-
-call_stub_t *
-fop_rchecksum_cbk_stub (call_frame_t *frame,
- fop_rchecksum_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- uint32_t weak_checksum,
- uint8_t *strong_checksum, dict_t *xdata);
-
-call_stub_t *
-fop_xattrop_stub (call_frame_t *frame,
- fop_xattrop_t fn,
- loc_t *loc,
- gf_xattrop_flags_t optype,
- dict_t *xattr, dict_t *xdata);
-
-call_stub_t *
-fop_xattrop_stub_cbk_stub (call_frame_t *frame,
- fop_xattrop_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata);
-
-call_stub_t *
-fop_fxattrop_stub (call_frame_t *frame,
- fop_fxattrop_t fn,
- fd_t *fd,
- gf_xattrop_flags_t optype,
- dict_t *xattr, dict_t *xdata);
-
-call_stub_t *
-fop_fxattrop_stub_cbk_stub (call_frame_t *frame,
- fop_xattrop_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata);
-
-call_stub_t *
-fop_setattr_stub (call_frame_t *frame,
- fop_setattr_t fn,
- loc_t *loc,
- struct iatt *stbuf,
- int32_t valid, dict_t *xdata);
-
-call_stub_t *
-fop_setattr_cbk_stub (call_frame_t *frame,
- fop_setattr_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *statpre,
- struct iatt *statpost, dict_t *xdata);
-
-call_stub_t *
-fop_fsetattr_stub (call_frame_t *frame,
- fop_fsetattr_t fn,
- fd_t *fd,
- struct iatt *stbuf,
- int32_t valid, dict_t *xdata);
-
-call_stub_t *
-fop_fsetattr_cbk_stub (call_frame_t *frame,
- fop_setattr_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *statpre,
- struct iatt *statpost, dict_t *xdata);
-
-void call_resume (call_stub_t *stub);
-void call_stub_destroy (call_stub_t *stub);
-#endif
diff --git a/libglusterfs/src/changelog.h b/libglusterfs/src/changelog.h
new file mode 100644
index 00000000000..a09d9f25287
--- /dev/null
+++ b/libglusterfs/src/changelog.h
@@ -0,0 +1,115 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GF_CHANGELOG_H
+#define _GF_CHANGELOG_H
+
+struct gf_brick_spec;
+
+/**
+ * Max bit shiter for event selection
+ */
+#define CHANGELOG_EV_SELECTION_RANGE 5
+
+#define CHANGELOG_OP_TYPE_JOURNAL (1 << 0)
+#define CHANGELOG_OP_TYPE_OPEN (1 << 1)
+#define CHANGELOG_OP_TYPE_CREATE (1 << 2)
+#define CHANGELOG_OP_TYPE_RELEASE (1 << 3)
+#define CHANGELOG_OP_TYPE_BR_RELEASE \
+ (1 << 4) /* logical release (last close()), \
+ sent by bitrot stub */
+#define CHANGELOG_OP_TYPE_MAX (1 << CHANGELOG_EV_SELECTION_RANGE)
+
+struct ev_open {
+ unsigned char gfid[16];
+ int32_t flags;
+};
+
+struct ev_creat {
+ unsigned char gfid[16];
+ int32_t flags;
+};
+
+struct ev_release {
+ unsigned char gfid[16];
+};
+
+struct ev_release_br {
+ unsigned long version;
+ unsigned char gfid[16];
+ int32_t sign_info;
+};
+
+struct ev_changelog {
+ char path[PATH_MAX];
+};
+
+typedef struct changelog_event {
+ unsigned int ev_type;
+
+ union {
+ struct ev_open open;
+ struct ev_creat create;
+ struct ev_release release;
+ struct ev_changelog journal;
+ struct ev_release_br releasebr;
+ } u;
+} changelog_event_t;
+
+#define CHANGELOG_EV_SIZE (sizeof(changelog_event_t))
+
+/**
+ * event callback, connected & disconnection defs
+ */
+typedef void(CALLBACK)(void *, char *, void *, changelog_event_t *);
+typedef void *(INIT)(void *, struct gf_brick_spec *);
+typedef void(FINI)(void *, char *, void *);
+typedef void(CONNECT)(void *, char *, void *);
+typedef void(DISCONNECT)(void *, char *, void *);
+
+struct gf_brick_spec {
+ char *brick_path;
+ unsigned int filter;
+
+ INIT *init;
+ FINI *fini;
+ CALLBACK *callback;
+ CONNECT *connected;
+ DISCONNECT *disconnected;
+
+ void *ptr;
+};
+
+/* API set */
+
+int
+gf_changelog_register(char *brick_path, char *scratch_dir, char *log_file,
+ int log_levl, int max_reconnects);
+ssize_t
+gf_changelog_scan();
+
+int
+gf_changelog_start_fresh();
+
+ssize_t
+gf_changelog_next_change(char *bufptr, size_t maxlen);
+
+int
+gf_changelog_done(char *file);
+
+/* newer flexible API */
+int
+gf_changelog_init(void *xl);
+
+int
+gf_changelog_register_generic(struct gf_brick_spec *bricks, int count,
+ int ordered, char *logfile, int lvl, void *xl);
+
+#endif
diff --git a/libglusterfs/src/checksum.c b/libglusterfs/src/checksum.c
index e14a3044c20..acdaed04ae2 100644
--- a/libglusterfs/src/checksum.c
+++ b/libglusterfs/src/checksum.c
@@ -9,57 +9,36 @@
*/
#include <openssl/md5.h>
+#include <openssl/sha.h>
+#include <zlib.h>
#include <stdint.h>
-
-#include "glusterfs.h"
+#include <string.h>
/*
- * The "weak" checksum required for the rsync algorithm,
- * adapted from the rsync source code. The following comment
- * appears there:
- *
- * "a simple 32 bit checksum that can be upadted from either end
- * (inspired by Mark Adler's Adler-32 checksum)"
+ * The "weak" checksum required for the rsync algorithm.
*
* Note: these functions are only called to compute checksums on
* pathnames; they don't need to handle arbitrarily long strings of
* data. Thus int32_t and uint32_t are sufficient
*/
-
uint32_t
-gf_rsync_weak_checksum (unsigned char *buf, size_t len)
+gf_rsync_weak_checksum(unsigned char *buf, size_t len)
{
- int32_t i = 0;
- uint32_t s1, s2;
-
- uint32_t csum;
-
- s1 = s2 = 0;
- if (len >= 4) {
- for (; i < (len-4); i+=4) {
- s2 += 4*(s1 + buf[i]) + 3*buf[i+1] + 2*buf[i+2] + buf[i+3];
- s1 += buf[i+0] + buf[i+1] + buf[i+2] + buf[i+3];
- }
- }
-
- for (; i < len; i++) {
- s1 += buf[i];
- s2 += s1;
- }
-
- csum = (s1 & 0xffff) + (s2 << 16);
-
- return csum;
+ return adler32(0, buf, len);
}
-
/*
- * The "strong" checksum required for the rsync algorithm,
- * adapted from the rsync source code.
+ * The "strong" checksum required for the rsync algorithm.
*/
+void
+gf_rsync_strong_checksum(unsigned char *data, size_t len,
+ unsigned char *sha256_md)
+{
+ SHA256((const unsigned char *)data, len, sha256_md);
+}
void
-gf_rsync_strong_checksum (unsigned char *data, size_t len, unsigned char *md5)
+gf_rsync_md5_checksum(unsigned char *data, size_t len, unsigned char *md5)
{
- MD5(data, len, md5);
+ MD5(data, len, md5);
}
diff --git a/libglusterfs/src/circ-buff.c b/libglusterfs/src/circ-buff.c
index 6c7907a095c..913115c7be1 100644
--- a/libglusterfs/src/circ-buff.c
+++ b/libglusterfs/src/circ-buff.c
@@ -8,163 +8,186 @@
cases as published by the Free Software Foundation.
*/
-#include "circ-buff.h"
+#include "glusterfs/circ-buff.h"
+#include "glusterfs/libglusterfs-messages.h"
+
+void
+cb_destroy_data(circular_buffer_t *cb, void (*destroy_buffer_data)(void *data))
+{
+ if (destroy_buffer_data)
+ destroy_buffer_data(cb->data);
+ GF_FREE(cb->data);
+ return;
+}
/* hold lock while calling this function */
int
-__cb_add_entry_buffer (buffer_t *buffer, void *item)
+__cb_add_entry_buffer(buffer_t *buffer, void *item)
{
- circular_buffer_t *ptr = NULL;
- int ret = -1;
- //DO we really need the assert here?
- GF_ASSERT (buffer->used_len <= buffer->size_buffer);
-
- if (buffer->use_once == _gf_true &&
- buffer->used_len == buffer->size_buffer) {
- gf_log ("", GF_LOG_WARNING, "buffer %p is use once buffer",
- buffer);
- return -1;
- } else {
- if (buffer->used_len == buffer->size_buffer) {
- if (buffer->cb[buffer->w_index]) {
- ptr = buffer->cb[buffer->w_index];
- if (ptr->data) {
- GF_FREE (ptr->data);
- ptr->data = NULL;
- GF_FREE (ptr);
- }
- buffer->cb[buffer->w_index] = NULL;
- ptr = NULL;
- }
+ circular_buffer_t *ptr = NULL;
+ int ret = -1;
+ // DO we really need the assert here?
+ GF_ASSERT(buffer->used_len <= buffer->size_buffer);
+
+ if (buffer->use_once == _gf_true &&
+ buffer->used_len == buffer->size_buffer) {
+ gf_msg("circ-buff", GF_LOG_WARNING, 0, LG_MSG_BUFFER_ERROR,
+ "buffer %p is use once buffer", buffer);
+ return -1;
+ } else {
+ if (buffer->used_len == buffer->size_buffer) {
+ if (buffer->cb[buffer->w_index]) {
+ ptr = buffer->cb[buffer->w_index];
+ if (ptr->data) {
+ cb_destroy_data(ptr, buffer->destroy_buffer_data);
+ ptr->data = NULL;
+ GF_FREE(ptr);
}
-
- buffer->cb[buffer->w_index] =
- GF_CALLOC (1, sizeof (circular_buffer_t),
- gf_common_mt_circular_buffer_t);
- if (!buffer->cb[buffer->w_index])
- return -1;
-
- buffer->cb[buffer->w_index]->data = item;
- ret = gettimeofday (&buffer->cb[buffer->w_index]->tv, NULL);
- if (ret == -1)
- gf_log_callingfn ("", GF_LOG_WARNING, "getting time of"
- "the day failed");
- buffer->w_index++;
- buffer->w_index %= buffer->size_buffer - 1;
- //used_buffer size cannot be greater than the total buffer size
-
- if (buffer->used_len < buffer->size_buffer)
- buffer->used_len++;
- return buffer->w_index;
+ buffer->cb[buffer->w_index] = NULL;
+ ptr = NULL;
+ }
}
+
+ buffer->cb[buffer->w_index] = GF_CALLOC(1, sizeof(circular_buffer_t),
+ gf_common_mt_circular_buffer_t);
+ if (!buffer->cb[buffer->w_index])
+ return -1;
+
+ buffer->cb[buffer->w_index]->data = item;
+ ret = gettimeofday(&buffer->cb[buffer->w_index]->tv, NULL);
+ if (ret == -1)
+ gf_msg_callingfn("circ-buff", GF_LOG_WARNING, 0,
+ LG_MSG_GETTIMEOFDAY_FAILED,
+ "getting time of the day failed");
+ buffer->w_index++;
+ buffer->w_index %= buffer->size_buffer;
+ // used_buffer size cannot be greater than the total buffer size
+
+ if (buffer->used_len < buffer->size_buffer)
+ buffer->used_len++;
+ return buffer->w_index;
+ }
}
int
-cb_add_entry_buffer (buffer_t *buffer, void *item)
+cb_add_entry_buffer(buffer_t *buffer, void *item)
{
- int write_index = -1;
+ int write_index = -1;
- pthread_mutex_lock (&buffer->lock);
- {
- write_index = __cb_add_entry_buffer (buffer, item);
- }
- pthread_mutex_unlock (&buffer->lock);
+ pthread_mutex_lock(&buffer->lock);
+ {
+ write_index = __cb_add_entry_buffer(buffer, item);
+ }
+ pthread_mutex_unlock(&buffer->lock);
- return write_index;
+ return write_index;
}
void
-cb_buffer_show (buffer_t *buffer)
+cb_buffer_show(buffer_t *buffer)
{
- pthread_mutex_lock (&buffer->lock);
- {
- gf_log ("", GF_LOG_DEBUG, "w_index: %d, size: %"GF_PRI_SIZET
- " used_buffer: %d", buffer->w_index,
- buffer->size_buffer,
- buffer->used_len);
- }
- pthread_mutex_unlock (&buffer->lock);
+ pthread_mutex_lock(&buffer->lock);
+ {
+ gf_msg_debug("circ-buff", 0,
+ "w_index: %d, size: %" GF_PRI_SIZET " used_buffer: %d",
+ buffer->w_index, buffer->size_buffer, buffer->used_len);
+ }
+ pthread_mutex_unlock(&buffer->lock);
}
void
-cb_buffer_dump (buffer_t *buffer, void *data,
- int (fn) (circular_buffer_t *buffer, void *data))
+cb_buffer_dump(buffer_t *buffer, void *data,
+ int(fn)(circular_buffer_t *buffer, void *data))
{
- int i = 0;
- circular_buffer_t *entry = NULL;
- int entries = 0;
-
- pthread_mutex_lock (&buffer->lock);
- {
- if (buffer->use_once == _gf_false) {
- for (i = (buffer->w_index - 1) ; entries <
- buffer->used_len ; entries++) {
- entry = buffer->cb[i];
- if (entry)
- fn (entry, data);
- if (0 == i)
- i = buffer->used_len - 1;
- else
- i = (i - 1) % (buffer->used_len - 1);
- }
- } else {
- for (i = 0; i < buffer->used_len ; i++) {
- entry = buffer->cb[i];
- fn (entry, data);
- }
- }
+ int index = 0;
+ circular_buffer_t *entry = NULL;
+ int entries = 0;
+ int ul = 0;
+ int w_ind = 0;
+ int size_buff = 0;
+ int i = 0;
+
+ ul = buffer->used_len;
+ w_ind = buffer->w_index;
+ size_buff = buffer->size_buffer;
+
+ pthread_mutex_lock(&buffer->lock);
+ {
+ if (buffer->use_once == _gf_false) {
+ index = (size_buff + (w_ind - ul)) % size_buff;
+ for (entries = 0; entries < buffer->used_len; entries++) {
+ entry = buffer->cb[index];
+ if (entry)
+ fn(entry, data);
+ else
+ gf_msg_callingfn("circ-buff", GF_LOG_WARNING, 0,
+ LG_MSG_NULL_PTR,
+ "Null entry in "
+ "circular buffer at "
+ "index %d.",
+ index);
+
+ index++;
+ index %= buffer->size_buffer;
+ }
+ } else {
+ for (i = 0; i < buffer->used_len; i++) {
+ entry = buffer->cb[i];
+ fn(entry, data);
+ }
}
- pthread_mutex_unlock (&buffer->lock);
+ }
+ pthread_mutex_unlock(&buffer->lock);
}
buffer_t *
-cb_buffer_new (size_t buffer_size, gf_boolean_t use_once)
+cb_buffer_new(size_t buffer_size, gf_boolean_t use_once,
+ void (*destroy_buffer_data)(void *data))
{
- buffer_t *buffer = NULL;
-
- buffer = GF_CALLOC (1, sizeof (*buffer), gf_common_mt_buffer_t);
- if (!buffer) {
- gf_log ("", GF_LOG_ERROR, "could not allocate the "
- "buffer");
- goto out;
- }
-
- buffer->cb = GF_CALLOC (buffer_size,
- sizeof (circular_buffer_t *),
- gf_common_mt_circular_buffer_t);
- if (!buffer->cb) {
- gf_log ("", GF_LOG_ERROR, "could not allocate the "
- "memory for the circular buffer");
- GF_FREE (buffer);
- buffer = NULL;
- goto out;
- }
-
- buffer->w_index = 0;
- buffer->size_buffer = buffer_size;
- buffer->use_once = use_once;
- buffer->used_len = 0;
- pthread_mutex_init (&buffer->lock, NULL);
+ buffer_t *buffer = NULL;
+
+ buffer = GF_CALLOC(1, sizeof(*buffer), gf_common_mt_buffer_t);
+ if (!buffer) {
+ goto out;
+ }
+
+ buffer->cb = GF_CALLOC(buffer_size, sizeof(circular_buffer_t *),
+ gf_common_mt_circular_buffer_t);
+ if (!buffer->cb) {
+ GF_FREE(buffer);
+ buffer = NULL;
+ goto out;
+ }
+
+ buffer->w_index = 0;
+ buffer->size_buffer = buffer_size;
+ buffer->use_once = use_once;
+ buffer->used_len = 0;
+ buffer->destroy_buffer_data = destroy_buffer_data;
+ pthread_mutex_init(&buffer->lock, NULL);
out:
- return buffer;
+ return buffer;
}
void
-cb_buffer_destroy (buffer_t *buffer)
+cb_buffer_destroy(buffer_t *buffer)
{
- int i = 0;
-
- if (buffer) {
- if (buffer->cb) {
- for (i = 0; i < buffer->used_len ; i++) {
- if (buffer->cb[i])
- GF_FREE (buffer->cb[i]);
- }
- GF_FREE (buffer->cb);
+ int i = 0;
+ circular_buffer_t *ptr = NULL;
+ if (buffer) {
+ if (buffer->cb) {
+ for (i = 0; i < buffer->used_len; i++) {
+ ptr = buffer->cb[i];
+ if (ptr->data) {
+ cb_destroy_data(ptr, buffer->destroy_buffer_data);
+ ptr->data = NULL;
+ GF_FREE(ptr);
}
- pthread_mutex_destroy (&buffer->lock);
- GF_FREE (buffer);
+ }
+ GF_FREE(buffer->cb);
}
+ pthread_mutex_destroy(&buffer->lock);
+ GF_FREE(buffer);
+ }
}
-
diff --git a/libglusterfs/src/circ-buff.h b/libglusterfs/src/circ-buff.h
deleted file mode 100644
index 5b5acc387bf..00000000000
--- a/libglusterfs/src/circ-buff.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _CB_H
-#define _CB_H
-
-#include "common-utils.h"
-#include "logging.h"
-#include "mem-types.h"
-
-#define BUFFER_SIZE 10
-#define TOTAL_SIZE BUFFER_SIZE + 1
-
-
-struct _circular_buffer {
- struct timeval tv;
- void *data;
-};
-
-typedef struct _circular_buffer circular_buffer_t;
-
-struct _buffer {
- unsigned int w_index;
- size_t size_buffer;
- gf_boolean_t use_once;
- /* This variable is assigned the proper value at the time of initing */
- /* the buffer. It indicates, whether the buffer should be used once */
- /* it becomes full. */
-
- int used_len;
- /* indicates the amount of circular buffer used. */
-
- circular_buffer_t **cb;
-
- pthread_mutex_t lock;
-};
-
-typedef struct _buffer buffer_t;
-
-int
-cb_add_entry_buffer (buffer_t *buffer, void *item);
-
-void
-cb_buffer_show (buffer_t *buffer);
-
-buffer_t *
-cb_buffer_new (size_t buffer_size,gf_boolean_t use_buffer_once);
-
-void
-cb_buffer_destroy (buffer_t *buffer);
-
-void
-cb_buffer_dump (buffer_t *buffer, void *data,
- int (fn) (circular_buffer_t *buffer, void *data));
-
-#endif /* _CB_H */
diff --git a/libglusterfs/src/client_t.c b/libglusterfs/src/client_t.c
new file mode 100644
index 00000000000..9d377c3c2e1
--- /dev/null
+++ b/libglusterfs/src/client_t.c
@@ -0,0 +1,825 @@
+/*
+ Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "glusterfs/glusterfs.h"
+#include "glusterfs/dict.h"
+#include "glusterfs/statedump.h"
+#include "glusterfs/client_t.h"
+#include "glusterfs/list.h"
+#include "glusterfs/libglusterfs-messages.h"
+
+static int
+gf_client_chain_client_entries(cliententry_t *entries, uint32_t startidx,
+ uint32_t endcount)
+{
+ uint32_t i = 0;
+
+ if (!entries) {
+ gf_msg_callingfn("client_t", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "!entries");
+ return -1;
+ }
+
+ /* Chain only till the second to last entry because we want to
+ * ensure that the last entry has GF_CLIENTTABLE_END.
+ */
+ for (i = startidx; i < (endcount - 1); i++)
+ entries[i].next_free = i + 1;
+
+ /* i has already been incremented up to the last entry. */
+ entries[i].next_free = GF_CLIENTTABLE_END;
+
+ return 0;
+}
+
+static int
+gf_client_clienttable_expand(clienttable_t *clienttable, uint32_t nr)
+{
+ cliententry_t *oldclients = NULL;
+ uint32_t oldmax_clients = -1;
+ int ret = -1;
+
+ if (clienttable == NULL || nr <= clienttable->max_clients) {
+ gf_msg_callingfn("client_t", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "invalid argument");
+ ret = EINVAL;
+ goto out;
+ }
+
+ oldclients = clienttable->cliententries;
+ oldmax_clients = clienttable->max_clients;
+
+ clienttable->cliententries = GF_CALLOC(nr, sizeof(cliententry_t),
+ gf_common_mt_cliententry_t);
+ if (!clienttable->cliententries) {
+ clienttable->cliententries = oldclients;
+ ret = 0;
+ goto out;
+ }
+ clienttable->max_clients = nr;
+
+ if (oldclients) {
+ uint32_t cpy = oldmax_clients * sizeof(cliententry_t);
+ memcpy(clienttable->cliententries, oldclients, cpy);
+ }
+
+ gf_client_chain_client_entries(clienttable->cliententries, oldmax_clients,
+ clienttable->max_clients);
+
+ /* Now that expansion is done, we must update the client list
+ * head pointer so that the client allocation functions can continue
+ * using the expanded table.
+ */
+ clienttable->first_free = oldmax_clients;
+ GF_FREE(oldclients);
+ ret = 0;
+out:
+ return ret;
+}
+
+clienttable_t *
+gf_clienttable_alloc(void)
+{
+ clienttable_t *clienttable = NULL;
+ int result = 0;
+
+ clienttable = GF_CALLOC(1, sizeof(clienttable_t),
+ gf_common_mt_clienttable_t);
+ if (!clienttable)
+ return NULL;
+
+ LOCK_INIT(&clienttable->lock);
+
+ result = gf_client_clienttable_expand(clienttable,
+ GF_CLIENTTABLE_INITIAL_SIZE);
+ if (result != 0) {
+ gf_msg("client_t", GF_LOG_ERROR, 0, LG_MSG_EXPAND_CLIENT_TABLE_FAILED,
+ "gf_client_clienttable_expand failed");
+ GF_FREE(clienttable);
+ return NULL;
+ }
+
+ return clienttable;
+}
+
+/*
+ * Increments ref.bind if the client is already present or creates a new
+ * client with ref.bind = 1,ref.count = 1 it signifies that
+ * as long as ref.bind is > 0 client should be alive.
+ */
+client_t *
+gf_client_get(xlator_t *this, client_auth_data_t *cred, char *client_uid,
+ char *subdir_mount)
+{
+ client_t *client = NULL;
+ cliententry_t *cliententry = NULL;
+ clienttable_t *clienttable = NULL;
+ unsigned int i = 0;
+
+ if (this == NULL || client_uid == NULL) {
+ gf_msg_callingfn("client_t", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "invalid argument");
+ errno = EINVAL;
+ return NULL;
+ }
+
+ clienttable = this->ctx->clienttable;
+
+ LOCK(&clienttable->lock);
+ {
+ for (; i < clienttable->max_clients; i++) {
+ client = clienttable->cliententries[i].client;
+ if (client == NULL)
+ continue;
+ /*
+ * look for matching client_uid, _and_
+ * if auth was used, matching auth flavour and data
+ */
+ if (strcmp(client_uid, client->client_uid) == 0 &&
+ (cred->flavour && (cred->flavour == client->auth.flavour &&
+ (size_t)cred->datalen == client->auth.len &&
+ memcmp(cred->authdata, client->auth.data,
+ client->auth.len) == 0))) {
+ GF_ATOMIC_INC(client->bind);
+ goto unlock;
+ }
+ }
+
+ client = GF_CALLOC(1, sizeof(client_t), gf_common_mt_client_t);
+ if (client == NULL) {
+ errno = ENOMEM;
+ goto unlock;
+ }
+
+ client->this = this;
+ if (subdir_mount != NULL)
+ client->subdir_mount = gf_strdup(subdir_mount);
+
+ LOCK_INIT(&client->scratch_ctx.lock);
+
+ client->client_uid = gf_strdup(client_uid);
+ if (client->client_uid == NULL) {
+ GF_FREE(client);
+ client = NULL;
+ errno = ENOMEM;
+ goto unlock;
+ }
+ client->scratch_ctx.count = GF_CLIENTCTX_INITIAL_SIZE;
+ client->scratch_ctx.ctx = GF_CALLOC(GF_CLIENTCTX_INITIAL_SIZE,
+ sizeof(struct client_ctx),
+ gf_common_mt_client_ctx);
+ if (client->scratch_ctx.ctx == NULL) {
+ GF_FREE(client->client_uid);
+ GF_FREE(client);
+ client = NULL;
+ errno = ENOMEM;
+ goto unlock;
+ }
+
+ GF_ATOMIC_INIT(client->bind, 1);
+ GF_ATOMIC_INIT(client->count, 1);
+ GF_ATOMIC_INIT(client->fd_cnt, 0);
+
+ client->auth.flavour = cred->flavour;
+ if (cred->flavour) {
+ client->auth.data = GF_MALLOC(cred->datalen, gf_common_mt_client_t);
+ if (client->auth.data == NULL) {
+ GF_FREE(client->scratch_ctx.ctx);
+ GF_FREE(client->client_uid);
+ GF_FREE(client);
+ client = NULL;
+ errno = ENOMEM;
+ goto unlock;
+ }
+ memcpy(client->auth.data, cred->authdata, cred->datalen);
+ client->auth.len = cred->datalen;
+ }
+
+ client->tbl_index = clienttable->first_free;
+ cliententry = &clienttable->cliententries[clienttable->first_free];
+ if (cliententry->next_free == GF_CLIENTTABLE_END) {
+ int result = gf_client_clienttable_expand(
+ clienttable,
+ clienttable->max_clients + GF_CLIENTTABLE_INITIAL_SIZE);
+ if (result != 0) {
+ GF_FREE(client->scratch_ctx.ctx);
+ GF_FREE(client->client_uid);
+ GF_FREE(client);
+ client = NULL;
+ errno = result;
+ goto unlock;
+ }
+ cliententry = &clienttable->cliententries[client->tbl_index];
+ cliententry->next_free = clienttable->first_free;
+ }
+ cliententry->client = client;
+ clienttable->first_free = cliententry->next_free;
+ cliententry->next_free = GF_CLIENTENTRY_ALLOCATED;
+ }
+unlock:
+ UNLOCK(&clienttable->lock);
+
+ if (client)
+ gf_msg_callingfn("client_t", GF_LOG_DEBUG, 0, LG_MSG_BIND_REF,
+ "%s: bind_ref: %" GF_PRI_ATOMIC
+ ", ref: "
+ "%" GF_PRI_ATOMIC,
+ client->client_uid, GF_ATOMIC_GET(client->bind),
+ GF_ATOMIC_GET(client->count));
+ return client;
+}
+
+void
+gf_client_put(client_t *client, gf_boolean_t *detached)
+{
+ gf_boolean_t unref = _gf_false;
+ int bind_ref;
+
+ if (client == NULL)
+ goto out;
+
+ if (detached)
+ *detached = _gf_false;
+
+ bind_ref = GF_ATOMIC_DEC(client->bind);
+ if (bind_ref == 0)
+ unref = _gf_true;
+
+ gf_msg_callingfn("client_t", GF_LOG_DEBUG, 0, LG_MSG_BIND_REF,
+ "%s: "
+ "bind_ref: %" GF_PRI_ATOMIC ", ref: %" GF_PRI_ATOMIC
+ ", "
+ "unref: %d",
+ client->client_uid, GF_ATOMIC_GET(client->bind),
+ GF_ATOMIC_GET(client->count), unref);
+ if (unref) {
+ if (detached)
+ *detached = _gf_true;
+ gf_client_unref(client);
+ }
+
+out:
+ return;
+}
+
+client_t *
+gf_client_ref(client_t *client)
+{
+ if (!client) {
+ gf_msg_callingfn("client_t", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "null client");
+ return NULL;
+ }
+
+ GF_ATOMIC_INC(client->count);
+ gf_msg_callingfn("client_t", GF_LOG_DEBUG, 0, LG_MSG_REF_COUNT,
+ "%s: "
+ "ref-count %" GF_PRI_ATOMIC,
+ client->client_uid, GF_ATOMIC_GET(client->count));
+ return client;
+}
+
+static void
+gf_client_destroy_recursive(xlator_t *xl, client_t *client)
+{
+ xlator_list_t *trav;
+
+ if (!xl->call_cleanup && xl->cbks->client_destroy) {
+ xl->cbks->client_destroy(xl, client);
+ }
+
+ for (trav = xl->children; trav; trav = trav->next) {
+ gf_client_destroy_recursive(trav->xlator, client);
+ }
+}
+
+static void
+client_destroy(client_t *client)
+{
+ clienttable_t *clienttable = NULL;
+ glusterfs_graph_t *gtrav = NULL;
+
+ if (client == NULL) {
+ gf_msg_callingfn("xlator", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "invalid argument");
+ goto out;
+ }
+
+ clienttable = client->this->ctx->clienttable;
+
+ LOCK(&clienttable->lock);
+ {
+ clienttable->cliententries[client->tbl_index].client = NULL;
+ clienttable->cliententries[client->tbl_index]
+ .next_free = clienttable->first_free;
+ clienttable->first_free = client->tbl_index;
+ }
+ UNLOCK(&clienttable->lock);
+
+ list_for_each_entry(gtrav, &client->this->ctx->graphs, list)
+ {
+ gf_client_destroy_recursive(gtrav->top, client);
+ }
+
+ if (client->subdir_inode)
+ inode_unref(client->subdir_inode);
+
+ LOCK_DESTROY(&client->scratch_ctx.lock);
+
+ GF_FREE(client->auth.data);
+ GF_FREE(client->auth.username);
+ GF_FREE(client->auth.passwd);
+ GF_FREE(client->scratch_ctx.ctx);
+ GF_FREE(client->client_uid);
+ GF_FREE(client->subdir_mount);
+ GF_FREE(client->client_name);
+ GF_FREE(client);
+out:
+ return;
+}
+
+static int
+gf_client_disconnect_recursive(xlator_t *xl, client_t *client)
+{
+ int ret = 0;
+ xlator_list_t *trav;
+
+ if (!xl->call_cleanup && xl->cbks->client_disconnect) {
+ ret = xl->cbks->client_disconnect(xl, client);
+ }
+
+ for (trav = xl->children; trav; trav = trav->next) {
+ ret |= gf_client_disconnect_recursive(trav->xlator, client);
+ }
+
+ return ret;
+}
+
+int
+gf_client_disconnect(client_t *client)
+{
+ int ret = 0;
+ glusterfs_graph_t *gtrav = NULL;
+
+ list_for_each_entry(gtrav, &client->this->ctx->graphs, list)
+ {
+ ret |= gf_client_disconnect_recursive(gtrav->top, client);
+ }
+
+ return ret;
+}
+
+void
+gf_client_unref(client_t *client)
+{
+ uint64_t refcount;
+
+ if (!client) {
+ gf_msg_callingfn("client_t", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "client is NULL");
+ return;
+ }
+
+ refcount = GF_ATOMIC_DEC(client->count);
+ gf_msg_callingfn("client_t", GF_LOG_DEBUG, 0, LG_MSG_REF_COUNT,
+ "%s: "
+ "ref-count %" GF_PRI_ATOMIC,
+ client->client_uid, refcount);
+ if (refcount == 0) {
+ gf_msg(THIS->name, GF_LOG_INFO, 0, LG_MSG_DISCONNECT_CLIENT,
+ "Shutting down connection %s", client->client_uid);
+ client_destroy(client);
+ }
+}
+
+static int
+__client_ctx_get_int(client_t *client, void *key, void **value)
+{
+ int index = 0;
+ int ret = 0;
+
+ for (index = 0; index < client->scratch_ctx.count; index++) {
+ if (client->scratch_ctx.ctx[index].ctx_key == key)
+ break;
+ }
+
+ if (index == client->scratch_ctx.count) {
+ ret = -1;
+ goto out;
+ }
+
+ if (value)
+ *value = client->scratch_ctx.ctx[index].ctx_value;
+
+out:
+ return ret;
+}
+
+static int
+__client_ctx_set_int(client_t *client, void *key, void *value)
+{
+ int index = 0;
+ int ret = 0;
+ int set_idx = -1;
+
+ for (index = 0; index < client->scratch_ctx.count; index++) {
+ if (!client->scratch_ctx.ctx[index].ctx_key) {
+ if (set_idx == -1)
+ set_idx = index;
+ /* don't break, to check if key already exists
+ further on */
+ }
+ if (client->scratch_ctx.ctx[index].ctx_key == key) {
+ set_idx = index;
+ break;
+ }
+ }
+
+ if (set_idx == -1) {
+ ret = -1;
+ goto out;
+ }
+
+ client->scratch_ctx.ctx[set_idx].ctx_key = key;
+ client->scratch_ctx.ctx[set_idx].ctx_value = value;
+
+out:
+ return ret;
+}
+
+/*will return success with old value if exist*/
+void *
+client_ctx_set(client_t *client, void *key, void *value)
+{
+ int ret = 0;
+ void *ret_value = NULL;
+
+ if (!client || !key || !value)
+ return NULL;
+
+ LOCK(&client->scratch_ctx.lock);
+ {
+ ret = __client_ctx_get_int(client, key, &ret_value);
+ if (!ret && ret_value) {
+ UNLOCK(&client->scratch_ctx.lock);
+ return ret_value;
+ }
+
+ ret = __client_ctx_set_int(client, key, value);
+ }
+ UNLOCK(&client->scratch_ctx.lock);
+
+ if (ret)
+ return NULL;
+ return value;
+}
+
+int
+client_ctx_get(client_t *client, void *key, void **value)
+{
+ int ret = 0;
+
+ if (!client || !key)
+ return -1;
+
+ LOCK(&client->scratch_ctx.lock);
+ {
+ ret = __client_ctx_get_int(client, key, value);
+ }
+ UNLOCK(&client->scratch_ctx.lock);
+
+ return ret;
+}
+
+static int
+__client_ctx_del_int(client_t *client, void *key, void **value)
+{
+ int index = 0;
+ int ret = 0;
+
+ for (index = 0; index < client->scratch_ctx.count; index++) {
+ if (client->scratch_ctx.ctx[index].ctx_key == key)
+ break;
+ }
+
+ if (index == client->scratch_ctx.count) {
+ ret = -1;
+ goto out;
+ }
+
+ if (value)
+ *value = client->scratch_ctx.ctx[index].ctx_value;
+
+ client->scratch_ctx.ctx[index].ctx_key = 0;
+ client->scratch_ctx.ctx[index].ctx_value = 0;
+
+out:
+ return ret;
+}
+
+int
+client_ctx_del(client_t *client, void *key, void **value)
+{
+ int ret = 0;
+
+ if (!client || !key)
+ return -1;
+
+ LOCK(&client->scratch_ctx.lock);
+ {
+ ret = __client_ctx_del_int(client, key, value);
+ }
+ UNLOCK(&client->scratch_ctx.lock);
+
+ return ret;
+}
+
+void
+client_ctx_dump(client_t *client, char *prefix)
+{
+#if 0 /* TBD, FIXME */
+ struct client_ctx *client_ctx = NULL;
+ xlator_t *xl = NULL;
+ int i = 0;
+
+ if ((client == NULL) || (client->ctx == NULL)) {
+ goto out;
+ }
+
+ LOCK (&client->ctx_lock);
+ if (client->ctx != NULL) {
+ client_ctx = GF_CALLOC (client->inode->table->xl->graph->ctx_count,
+ sizeof (*client_ctx),
+ gf_common_mt_client_ctx);
+ if (client_ctx == NULL) {
+ goto unlock;
+ }
+
+ for (i = 0; i < client->inode->table->xl->graph->ctx_count; i++) {
+ client_ctx[i] = client->ctx[i];
+ }
+ }
+unlock:
+ UNLOCK (&client->ctx_lock);
+
+ if (client_ctx == NULL) {
+ goto out;
+ }
+
+ for (i = 0; i < client->inode->table->xl->graph->ctx_count; i++) {
+ if (client_ctx[i].xl_key) {
+ xl = (xlator_t *)(long)client_ctx[i].xl_key;
+ if (xl->dumpops && xl->dumpops->clientctx)
+ xl->dumpops->clientctx (xl, client);
+ }
+ }
+out:
+ GF_FREE (client_ctx);
+#endif
+}
+
+/*
+ * the following functions are here to preserve legacy behavior of the
+ * protocol/server xlator dump, but perhaps they should just be folded
+ * into the client dump instead?
+ */
+int
+gf_client_dump_fdtables_to_dict(xlator_t *this, dict_t *dict)
+{
+ clienttable_t *clienttable = NULL;
+ int count = 0;
+ int ret = -1;
+#ifdef NOTYET
+ client_t *client = NULL;
+ char key[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+#endif
+
+ GF_VALIDATE_OR_GOTO(THIS->name, this, out);
+ GF_VALIDATE_OR_GOTO(this->name, dict, out);
+
+ clienttable = this->ctx->clienttable;
+
+ if (!clienttable)
+ return -1;
+
+#ifdef NOTYET
+ ret = TRY_LOCK(&clienttable->lock);
+ {
+ if (ret) {
+ gf_msg("client_t", GF_LOG_WARNING, 0, LG_MSG_LOCK_FAILED,
+ "Unable to acquire lock");
+ return -1;
+ }
+ for (; count < clienttable->max_clients; count++) {
+ if (GF_CLIENTENTRY_ALLOCATED !=
+ clienttable->cliententries[count].next_free)
+ continue;
+ client = clienttable->cliententries[count].client;
+ if (client->bound_xl &&
+ !strcmp(client->bound_xl->name, this->name)) {
+ snprintf(key, sizeof(key), "conn%d", count++);
+ fdtable_dump_to_dict(client->server_ctx.fdtable, key, dict);
+ }
+ }
+ }
+ UNLOCK(&clienttable->lock);
+#endif
+
+ ret = dict_set_int32(dict, "conncount", count);
+out:
+ return ret;
+}
+
+int
+gf_client_dump_fdtables(xlator_t *this)
+{
+ client_t *client = NULL;
+ clienttable_t *clienttable = NULL;
+ int count = 1;
+ int ret = -1;
+ char key[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+
+ GF_VALIDATE_OR_GOTO(THIS->name, this, out);
+
+ clienttable = this->ctx->clienttable;
+
+ if (!clienttable)
+ return -1;
+
+ ret = TRY_LOCK(&clienttable->lock);
+ {
+ if (ret) {
+ gf_msg("client_t", GF_LOG_WARNING, 0, LG_MSG_LOCK_FAILED,
+ "Unable to acquire lock");
+ return -1;
+ }
+
+ for (; count < clienttable->max_clients; count++) {
+ if (GF_CLIENTENTRY_ALLOCATED !=
+ clienttable->cliententries[count].next_free)
+ continue;
+ client = clienttable->cliententries[count].client;
+ if (client->client_uid) {
+ gf_proc_dump_build_key(key, "conn", "%d.id", count);
+ gf_proc_dump_write(key, "%s", client->client_uid);
+ }
+
+ if (client->subdir_mount) {
+ gf_proc_dump_build_key(key, "conn", "%d.subdir", count);
+ gf_proc_dump_write(key, "%s", client->subdir_mount);
+ }
+ gf_proc_dump_build_key(key, "conn", "%d.ref", count);
+ gf_proc_dump_write(key, "%" GF_PRI_ATOMIC,
+ GF_ATOMIC_GET(client->count));
+ if (client->bound_xl) {
+ gf_proc_dump_build_key(key, "conn", "%d.bound_xl", count);
+ gf_proc_dump_write(key, "%s", client->bound_xl->name);
+ }
+
+#ifdef NOTYET
+ gf_proc_dump_build_key(key, "conn", "%d.id", count);
+ fdtable_dump(client->server_ctx.fdtable, key);
+#endif
+ }
+ }
+
+ UNLOCK(&clienttable->lock);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+gf_client_dump_inodes_to_dict(xlator_t *this, dict_t *dict)
+{
+ client_t *client = NULL;
+ clienttable_t *clienttable = NULL;
+ xlator_t *prev_bound_xl = NULL;
+ char key[32] = {
+ 0,
+ };
+ int count = 0;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO(THIS->name, this, out);
+ GF_VALIDATE_OR_GOTO(this->name, dict, out);
+
+ clienttable = this->ctx->clienttable;
+
+ if (!clienttable)
+ return -1;
+
+ ret = LOCK(&clienttable->lock);
+ {
+ if (ret) {
+ gf_msg("client_t", GF_LOG_WARNING, 0, LG_MSG_LOCK_FAILED,
+ "Unable to acquire lock");
+ return -1;
+ }
+ for (; count < clienttable->max_clients; count++) {
+ if (GF_CLIENTENTRY_ALLOCATED !=
+ clienttable->cliententries[count].next_free)
+ continue;
+ client = clienttable->cliententries[count].client;
+ if (!strcmp(client->bound_xl->name, this->name)) {
+ if (client->bound_xl && client->bound_xl->itable) {
+ /* Presently every brick contains only
+ * one bound_xl for all connections.
+ * This will lead to duplicating of
+ * the inode lists, if listing is
+ * done for every connection. This
+ * simple check prevents duplication
+ * in the present case. If need arises
+ * the check can be improved.
+ */
+ if (client->bound_xl == prev_bound_xl)
+ continue;
+ prev_bound_xl = client->bound_xl;
+
+ snprintf(key, sizeof(key), "conn%d", count);
+ inode_table_dump_to_dict(client->bound_xl->itable, key,
+ dict);
+ }
+ }
+ }
+ }
+ UNLOCK(&clienttable->lock);
+
+ ret = dict_set_int32(dict, "conncount", count);
+
+out:
+ if (prev_bound_xl)
+ prev_bound_xl = NULL;
+ return ret;
+}
+
+int
+gf_client_dump_inodes(xlator_t *this)
+{
+ client_t *client = NULL;
+ clienttable_t *clienttable = NULL;
+ xlator_t *prev_bound_xl = NULL;
+ int count = 0;
+ int ret = -1;
+ char key[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+
+ GF_VALIDATE_OR_GOTO(THIS->name, this, out);
+
+ clienttable = this->ctx->clienttable;
+
+ if (!clienttable)
+ goto out;
+
+ ret = TRY_LOCK(&clienttable->lock);
+ {
+ if (ret) {
+ gf_msg("client_t", GF_LOG_WARNING, 0, LG_MSG_LOCK_FAILED,
+ "Unable to acquire lock");
+ goto out;
+ }
+
+ for (; count < clienttable->max_clients; count++) {
+ if (GF_CLIENTENTRY_ALLOCATED !=
+ clienttable->cliententries[count].next_free)
+ continue;
+ client = clienttable->cliententries[count].client;
+ if (client->bound_xl && client->bound_xl->itable) {
+ /* Presently every brick contains only
+ * one bound_xl for all connections.
+ * This will lead to duplicating of
+ * the inode lists, if listing is
+ * done for every connection. This
+ * simple check prevents duplication
+ * in the present case. If need arises
+ * the check can be improved.
+ */
+ if (client->bound_xl == prev_bound_xl)
+ continue;
+ prev_bound_xl = client->bound_xl;
+
+ gf_proc_dump_build_key(key, "conn", "%d.bound_xl.%s", count,
+ client->bound_xl->name);
+ inode_table_dump(client->bound_xl->itable, key);
+ }
+ }
+ }
+ UNLOCK(&clienttable->lock);
+
+ ret = 0;
+out:
+ return ret;
+}
diff --git a/libglusterfs/src/cluster-syncop.c b/libglusterfs/src/cluster-syncop.c
new file mode 100644
index 00000000000..6ee89ddfdcf
--- /dev/null
+++ b/libglusterfs/src/cluster-syncop.c
@@ -0,0 +1,1261 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/* Perform fop on all subvolumes represented by list[] array and wait
+ for all callbacks to return */
+
+/* NOTE: Cluster-syncop, like syncop blocks the executing thread until the
+ * responses are gathered if it is not executed as part of synctask. So it
+ * shouldn't be invoked in epoll worker thread */
+#include "glusterfs/cluster-syncop.h"
+#include "glusterfs/defaults.h"
+
+#define FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, fop, \
+ args...) \
+ do { \
+ int __i = 0; \
+ int __count = 0; \
+ cluster_local_t __local = { \
+ 0, \
+ }; \
+ void *__old_local = frame->local; \
+ \
+ __local.replies = replies; \
+ memset(output, 0, numsubvols); \
+ cluster_replies_wipe(replies, numsubvols); \
+ for (__i = 0; __i < numsubvols; __i++) \
+ INIT_LIST_HEAD(&replies[__i].entries.list); \
+ if (syncbarrier_init(&__local.barrier)) \
+ break; \
+ frame->local = &__local; \
+ for (__i = 0; __i < numsubvols; __i++) { \
+ if (on[__i]) { \
+ __count++; \
+ } \
+ } \
+ __local.barrier.waitfor = __count; \
+ for (__i = 0; __i < numsubvols; __i++) { \
+ if (!on[__i]) \
+ continue; \
+ STACK_WIND_COOKIE(frame, cluster_##fop##_cbk, (void *)(long)__i, \
+ subvols[__i], subvols[__i]->fops->fop, args); \
+ } \
+ syncbarrier_wait(&__local.barrier, __count); \
+ syncbarrier_destroy(&__local.barrier); \
+ frame->local = __old_local; \
+ STACK_RESET(frame->root); \
+ } while (0)
+
+#define FOP_SEQ(subvols, on, numsubvols, replies, output, frame, fop, args...) \
+ do { \
+ int __i = 0; \
+ \
+ cluster_local_t __local = { \
+ 0, \
+ }; \
+ void *__old_local = frame->local; \
+ __local.replies = replies; \
+ memset(output, 0, numsubvols); \
+ cluster_replies_wipe(replies, numsubvols); \
+ for (__i = 0; __i < numsubvols; __i++) \
+ INIT_LIST_HEAD(&replies[__i].entries.list); \
+ if (syncbarrier_init(&__local.barrier)) \
+ break; \
+ frame->local = &__local; \
+ for (__i = 0; __i < numsubvols; __i++) { \
+ if (!on[__i]) \
+ continue; \
+ STACK_WIND_COOKIE(frame, cluster_##fop##_cbk, (void *)(long)__i, \
+ subvols[__i], subvols[__i]->fops->fop, args); \
+ syncbarrier_wait(&__local.barrier, 1); \
+ } \
+ syncbarrier_destroy(&__local.barrier); \
+ frame->local = __old_local; \
+ STACK_RESET(frame->root); \
+ } while (0)
+
+#define FOP_CBK(fop, frame, cookie, args...) \
+ do { \
+ cluster_local_t *__local = frame->local; \
+ int __i = (long)cookie; \
+ args_##fop##_cbk_store(&__local->replies[__i], args); \
+ __local->replies[__i].valid = 1; \
+ syncbarrier_wake(&__local->barrier); \
+ } while (0)
+
+int32_t
+cluster_fop_success_fill(default_args_cbk_t *replies, int numsubvols,
+ unsigned char *success)
+{
+ int i = 0;
+ int count = 0;
+
+ for (i = 0; i < numsubvols; i++) {
+ if (replies[i].valid && replies[i].op_ret >= 0) {
+ success[i] = 1;
+ count++;
+ } else {
+ success[i] = 0;
+ }
+ }
+
+ return count;
+}
+
+void
+cluster_replies_wipe(default_args_cbk_t *replies, int numsubvols)
+{
+ int i = 0;
+
+ if (!replies)
+ return;
+
+ for (i = 0; i < numsubvols; i++)
+ args_cbk_wipe(&replies[i]);
+ memset(replies, 0, numsubvols * sizeof(*replies));
+}
+
+int32_t
+cluster_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
+{
+ FOP_CBK(lookup, frame, cookie, op_ret, op_errno, inode, buf, xdata,
+ postparent);
+ return 0;
+}
+
+int32_t
+cluster_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ FOP_CBK(stat, frame, cookie, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+int32_t
+cluster_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ FOP_CBK(truncate, frame, cookie, op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
+}
+
+int32_t
+cluster_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ FOP_CBK(ftruncate, frame, cookie, op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
+}
+
+int32_t
+cluster_access_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ FOP_CBK(access, frame, cookie, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+cluster_readlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, const char *path,
+ struct iatt *buf, dict_t *xdata)
+{
+ FOP_CBK(readlink, frame, cookie, op_ret, op_errno, path, buf, xdata);
+ return 0;
+}
+
+int32_t
+cluster_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ FOP_CBK(mknod, frame, cookie, op_ret, op_errno, inode, buf, preparent,
+ postparent, xdata);
+ return 0;
+}
+
+int32_t
+cluster_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ FOP_CBK(mkdir, frame, cookie, op_ret, op_errno, inode, buf, preparent,
+ postparent, xdata);
+ return 0;
+}
+
+int32_t
+cluster_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ FOP_CBK(unlink, frame, cookie, op_ret, op_errno, preparent, postparent,
+ xdata);
+ return 0;
+}
+
+int32_t
+cluster_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ FOP_CBK(rmdir, frame, cookie, op_ret, op_errno, preparent, postparent,
+ xdata);
+ return 0;
+}
+
+int32_t
+cluster_symlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ FOP_CBK(symlink, frame, cookie, op_ret, op_errno, inode, buf, preparent,
+ postparent, xdata);
+ return 0;
+}
+
+int32_t
+cluster_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ FOP_CBK(rename, frame, cookie, op_ret, op_errno, buf, preoldparent,
+ postoldparent, prenewparent, postnewparent, xdata);
+ return 0;
+}
+
+int32_t
+cluster_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ FOP_CBK(link, frame, cookie, op_ret, op_errno, inode, buf, preparent,
+ postparent, xdata);
+ return 0;
+}
+
+int32_t
+cluster_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ FOP_CBK(create, frame, cookie, op_ret, op_errno, fd, inode, buf, preparent,
+ postparent, xdata);
+ return 0;
+}
+
+int32_t
+cluster_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ FOP_CBK(open, frame, cookie, op_ret, op_errno, fd, xdata);
+ return 0;
+}
+
+int32_t
+cluster_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iovec *vector,
+ int32_t count, struct iatt *stbuf, struct iobref *iobref,
+ dict_t *xdata)
+{
+ FOP_CBK(readv, frame, cookie, op_ret, op_errno, vector, count, stbuf,
+ iobref, xdata);
+ return 0;
+}
+
+int32_t
+cluster_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ FOP_CBK(writev, frame, cookie, op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
+}
+
+int32_t
+cluster_put_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ FOP_CBK(put, frame, cookie, op_ret, op_errno, inode, buf, preparent,
+ postparent, xdata);
+ return 0;
+}
+
+int32_t
+cluster_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ FOP_CBK(flush, frame, cookie, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+cluster_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ FOP_CBK(fsync, frame, cookie, op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
+}
+
+int32_t
+cluster_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ FOP_CBK(fstat, frame, cookie, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+int32_t
+cluster_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ FOP_CBK(opendir, frame, cookie, op_ret, op_errno, fd, xdata);
+ return 0;
+}
+
+int32_t
+cluster_fsyncdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ FOP_CBK(fsyncdir, frame, cookie, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+cluster_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf,
+ dict_t *xdata)
+{
+ FOP_CBK(statfs, frame, cookie, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+int32_t
+cluster_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ FOP_CBK(setxattr, frame, cookie, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+cluster_fsetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ FOP_CBK(fsetxattr, frame, cookie, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+cluster_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ FOP_CBK(fgetxattr, frame, cookie, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+int32_t
+cluster_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ FOP_CBK(getxattr, frame, cookie, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+int32_t
+cluster_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ FOP_CBK(xattrop, frame, cookie, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+int32_t
+cluster_fxattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ FOP_CBK(fxattrop, frame, cookie, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+int32_t
+cluster_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ FOP_CBK(removexattr, frame, cookie, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+cluster_fremovexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ FOP_CBK(fremovexattr, frame, cookie, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+cluster_lk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
+{
+ FOP_CBK(lk, frame, cookie, op_ret, op_errno, lock, xdata);
+ return 0;
+}
+
+int32_t
+cluster_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ FOP_CBK(inodelk, frame, cookie, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+cluster_finodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ FOP_CBK(finodelk, frame, cookie, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+cluster_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ FOP_CBK(entrylk, frame, cookie, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+cluster_fentrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ FOP_CBK(fentrylk, frame, cookie, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+cluster_rchecksum_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, uint32_t weak_checksum,
+ uint8_t *strong_checksum, dict_t *xdata)
+{
+ FOP_CBK(rchecksum, frame, cookie, op_ret, op_errno, weak_checksum,
+ strong_checksum, xdata);
+ return 0;
+}
+
+int32_t
+cluster_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ FOP_CBK(readdir, frame, cookie, op_ret, op_errno, entries, xdata);
+ return 0;
+}
+
+int32_t
+cluster_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ FOP_CBK(readdirp, frame, cookie, op_ret, op_errno, entries, xdata);
+ return 0;
+}
+
+int32_t
+cluster_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ FOP_CBK(setattr, frame, cookie, op_ret, op_errno, statpre, statpost, xdata);
+ return 0;
+}
+
+int32_t
+cluster_fsetattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ FOP_CBK(fsetattr, frame, cookie, op_ret, op_errno, statpre, statpost,
+ xdata);
+ return 0;
+}
+
+int32_t
+cluster_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
+{
+ FOP_CBK(fallocate, frame, cookie, op_ret, op_errno, pre, post, xdata);
+ return 0;
+}
+
+int32_t
+cluster_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
+{
+ FOP_CBK(discard, frame, cookie, op_ret, op_errno, pre, post, xdata);
+ return 0;
+}
+
+int32_t
+cluster_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
+{
+ FOP_CBK(zerofill, frame, cookie, op_ret, op_errno, pre, post, xdata);
+ return 0;
+}
+
+int32_t
+cluster_ipc_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ FOP_CBK(ipc, frame, cookie, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+cluster_fgetxattr(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, fgetxattr, fd,
+ name, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_fsetxattr(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, fsetxattr, fd,
+ dict, flags, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_setxattr(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, setxattr, loc,
+ dict, flags, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_statfs(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, statfs, loc,
+ xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_fsyncdir(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, fsyncdir, fd,
+ flags, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_opendir(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, opendir, loc,
+ fd, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_fstat(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, fstat, fd,
+ xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_fsync(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, fsync, fd,
+ flags, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_flush(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, flush, fd,
+ xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_writev(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t off, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, writev, fd,
+ vector, count, off, flags, iobref, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_put(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, uint32_t flags, struct iovec *vector, int32_t count,
+ off_t offset, struct iobref *iobref, dict_t *xattr, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, put, loc, mode,
+ umask, flags, vector, count, offset, iobref, xattr, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_readv(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, readv, fd, size,
+ offset, flags, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_open(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, open, loc,
+ flags, fd, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_create(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, create, loc,
+ flags, mode, umask, fd, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_link(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, link, oldloc,
+ newloc, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_rename(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, rename, oldloc,
+ newloc, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int
+cluster_symlink(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, symlink,
+ linkpath, loc, umask, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_rmdir(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, rmdir, loc,
+ flags, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_unlink(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, unlink, loc,
+ xflag, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int
+cluster_mkdir(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, mkdir, loc,
+ mode, umask, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int
+cluster_mknod(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, mknod, loc,
+ mode, rdev, umask, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_readlink(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, readlink, loc,
+ size, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_access(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
+ dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, access, loc,
+ mask, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_ftruncate(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, ftruncate, fd,
+ offset, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_getxattr(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, getxattr, loc,
+ name, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_xattrop(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, xattrop, loc,
+ flags, dict, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_fxattrop(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, fxattrop, fd,
+ flags, dict, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_removexattr(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, removexattr,
+ loc, name, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_fremovexattr(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, fremovexattr,
+ fd, name, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_lk(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
+ struct gf_flock *lock, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, lk, fd, cmd,
+ lock, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_rchecksum(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ int32_t len, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, rchecksum, fd,
+ offset, len, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_readdir(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, readdir, fd,
+ size, off, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_readdirp(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, readdirp, fd,
+ size, off, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_setattr(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, setattr, loc,
+ stbuf, valid, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_truncate(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, truncate, loc,
+ offset, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_stat(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, stat, loc,
+ xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_lookup(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, lookup, loc,
+ xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_fsetattr(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, fsetattr, fd,
+ stbuf, valid, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_fallocate(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t keep_size, off_t offset, size_t len, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, fallocate, fd,
+ keep_size, offset, len, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_discard(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, discard, fd,
+ offset, len, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_zerofill(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, zerofill, fd,
+ offset, len, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_ipc(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, ipc, op, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int
+cluster_uninodelk(xlator_t **subvols, unsigned char *locked_on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, char *dom,
+ inode_t *inode, off_t off, size_t size)
+{
+ loc_t loc = {
+ 0,
+ };
+ struct gf_flock flock = {
+ 0,
+ };
+
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+
+ flock.l_type = F_UNLCK;
+ flock.l_start = off;
+ flock.l_len = size;
+
+ FOP_ONLIST(subvols, locked_on, numsubvols, replies, output, frame, inodelk,
+ dom, &loc, F_SETLK, &flock, NULL);
+
+ loc_wipe(&loc);
+
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int
+cluster_tryinodelk(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *locked_on,
+ call_frame_t *frame, xlator_t *this, char *dom,
+ inode_t *inode, off_t off, size_t size)
+{
+ struct gf_flock flock = {
+ 0,
+ };
+ loc_t loc = {0};
+
+ flock.l_type = F_WRLCK;
+ flock.l_start = off;
+ flock.l_len = size;
+
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+ FOP_ONLIST(subvols, on, numsubvols, replies, locked_on, frame, inodelk, dom,
+ &loc, F_SETLK, &flock, NULL);
+
+ loc_wipe(&loc);
+ return cluster_fop_success_fill(replies, numsubvols, locked_on);
+}
+
+int
+cluster_inodelk(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *locked_on,
+ call_frame_t *frame, xlator_t *this, char *dom, inode_t *inode,
+ off_t off, size_t size)
+{
+ struct gf_flock flock = {
+ 0,
+ };
+ int i = 0;
+ loc_t loc = {0};
+ unsigned char *output = NULL;
+
+ flock.l_type = F_WRLCK;
+ flock.l_start = off;
+ flock.l_len = size;
+
+ output = alloca(numsubvols);
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+ FOP_ONLIST(subvols, on, numsubvols, replies, locked_on, frame, inodelk, dom,
+ &loc, F_SETLK, &flock, NULL);
+
+ for (i = 0; i < numsubvols; i++) {
+ if (replies[i].op_ret == -1 && replies[i].op_errno == EAGAIN) {
+ cluster_fop_success_fill(replies, numsubvols, locked_on);
+ cluster_uninodelk(subvols, locked_on, numsubvols, replies, output,
+ frame, this, dom, inode, off, size);
+
+ FOP_SEQ(subvols, on, numsubvols, replies, locked_on, frame, inodelk,
+ dom, &loc, F_SETLKW, &flock, NULL);
+ break;
+ }
+ }
+
+ loc_wipe(&loc);
+ return cluster_fop_success_fill(replies, numsubvols, locked_on);
+}
+
+int
+cluster_unentrylk(xlator_t **subvols, unsigned char *locked_on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, char *dom,
+ inode_t *inode, const char *name)
+{
+ loc_t loc = {
+ 0,
+ };
+
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+
+ FOP_ONLIST(subvols, locked_on, numsubvols, replies, output, frame, entrylk,
+ dom, &loc, name, ENTRYLK_UNLOCK, ENTRYLK_WRLCK, NULL);
+
+ loc_wipe(&loc);
+
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int
+cluster_tryentrylk(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *locked_on,
+ call_frame_t *frame, xlator_t *this, char *dom,
+ inode_t *inode, const char *name)
+{
+ loc_t loc = {0};
+
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+ FOP_ONLIST(subvols, on, numsubvols, replies, locked_on, frame, entrylk, dom,
+ &loc, name, ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL);
+
+ loc_wipe(&loc);
+ return cluster_fop_success_fill(replies, numsubvols, locked_on);
+}
+
+int
+cluster_entrylk(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *locked_on,
+ call_frame_t *frame, xlator_t *this, char *dom, inode_t *inode,
+ const char *name)
+{
+ int i = 0;
+ loc_t loc = {0};
+ unsigned char *output = NULL;
+
+ output = alloca(numsubvols);
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+ FOP_ONLIST(subvols, on, numsubvols, replies, locked_on, frame, entrylk, dom,
+ &loc, name, ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL);
+
+ for (i = 0; i < numsubvols; i++) {
+ if (replies[i].op_ret == -1 && replies[i].op_errno == EAGAIN) {
+ cluster_fop_success_fill(replies, numsubvols, locked_on);
+ cluster_unentrylk(subvols, locked_on, numsubvols, replies, output,
+ frame, this, dom, inode, name);
+ FOP_SEQ(subvols, on, numsubvols, replies, locked_on, frame, entrylk,
+ dom, &loc, name, ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
+ break;
+ }
+ }
+
+ loc_wipe(&loc);
+ return cluster_fop_success_fill(replies, numsubvols, locked_on);
+}
+
+int
+cluster_tiebreaker_inodelk(xlator_t **subvols, unsigned char *on,
+ int numsubvols, default_args_cbk_t *replies,
+ unsigned char *locked_on, call_frame_t *frame,
+ xlator_t *this, char *dom, inode_t *inode, off_t off,
+ size_t size)
+{
+ struct gf_flock flock = {
+ 0,
+ };
+ int i = 0;
+ int num_success = 0;
+ loc_t loc = {0};
+ unsigned char *output = NULL;
+
+ flock.l_type = F_WRLCK;
+ flock.l_start = off;
+ flock.l_len = size;
+
+ output = alloca(numsubvols);
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+ FOP_ONLIST(subvols, on, numsubvols, replies, locked_on, frame, inodelk, dom,
+ &loc, F_SETLK, &flock, NULL);
+
+ for (i = 0; i < numsubvols; i++) {
+ if (replies[i].valid && replies[i].op_ret == 0) {
+ num_success++;
+ continue;
+ }
+
+ /* TODO: If earlier subvols fail with an error other
+ * than EAGAIN, we could still have 2 clients competing
+ * for the lock*/
+ if (replies[i].op_ret == -1 && replies[i].op_errno == EAGAIN) {
+ cluster_fop_success_fill(replies, numsubvols, locked_on);
+ cluster_uninodelk(subvols, locked_on, numsubvols, replies, output,
+ frame, this, dom, inode, off, size);
+
+ if (num_success) {
+ FOP_SEQ(subvols, on, numsubvols, replies, locked_on, frame,
+ inodelk, dom, &loc, F_SETLKW, &flock, NULL);
+ } else {
+ loc_wipe(&loc);
+ memset(locked_on, 0, numsubvols);
+ return 0;
+ }
+ break;
+ }
+ }
+
+ loc_wipe(&loc);
+ return cluster_fop_success_fill(replies, numsubvols, locked_on);
+}
+
+int
+cluster_tiebreaker_entrylk(xlator_t **subvols, unsigned char *on,
+ int numsubvols, default_args_cbk_t *replies,
+ unsigned char *locked_on, call_frame_t *frame,
+ xlator_t *this, char *dom, inode_t *inode,
+ const char *name)
+{
+ int i = 0;
+ loc_t loc = {0};
+ unsigned char *output = NULL;
+ int num_success = 0;
+
+ output = alloca(numsubvols);
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+ FOP_ONLIST(subvols, on, numsubvols, replies, locked_on, frame, entrylk, dom,
+ &loc, name, ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL);
+
+ for (i = 0; i < numsubvols; i++) {
+ if (replies[i].valid && replies[i].op_ret == 0) {
+ num_success++;
+ continue;
+ }
+ if (replies[i].op_ret == -1 && replies[i].op_errno == EAGAIN) {
+ cluster_fop_success_fill(replies, numsubvols, locked_on);
+ cluster_unentrylk(subvols, locked_on, numsubvols, replies, output,
+ frame, this, dom, inode, name);
+ if (num_success) {
+ FOP_SEQ(subvols, on, numsubvols, replies, locked_on, frame,
+ entrylk, dom, &loc, name, ENTRYLK_LOCK, ENTRYLK_WRLCK,
+ NULL);
+ } else {
+ loc_wipe(&loc);
+ memset(locked_on, 0, numsubvols);
+ return 0;
+ }
+ break;
+ }
+ }
+
+ loc_wipe(&loc);
+ return cluster_fop_success_fill(replies, numsubvols, locked_on);
+}
diff --git a/libglusterfs/src/common-utils.c b/libglusterfs/src/common-utils.c
index da0d381960d..682cbf28055 100644
--- a/libglusterfs/src/common-utils.c
+++ b/libglusterfs/src/common-utils.c
@@ -8,13 +8,10 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#ifdef HAVE_BACKTRACE
#include <execinfo.h>
+#else
+#include "execinfo_compat.h"
#endif
#include <stdio.h>
@@ -27,1997 +24,5442 @@
#include <time.h>
#include <locale.h>
#include <sys/socket.h>
-#include <sys/wait.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <signal.h>
-#include <stdlib.h>
+#include <assert.h>
+#include <libgen.h> /* for dirname() */
+#include <grp.h>
-#if defined GF_BSD_HOST_OS || defined GF_DARWIN_HOST_OS
+#if defined(GF_BSD_HOST_OS) || defined(GF_DARWIN_HOST_OS)
#include <sys/sysctl.h>
#endif
+#ifndef GF_LINUX_HOST_OS
+#include <sys/resource.h>
+#endif
+#ifdef HAVE_SYNCFS_SYS
+#include <sys/syscall.h>
+#endif
-#include "logging.h"
-#include "common-utils.h"
-#include "revision.h"
-#include "glusterfs.h"
-#include "stack.h"
-#include "globals.h"
-#include "lkowner.h"
+#include "glusterfs/compat-errno.h"
+#include "glusterfs/common-utils.h"
+#include "glusterfs/revision.h"
+#include "glusterfs/glusterfs.h"
+#include "glusterfs/stack.h"
+#include "glusterfs/lkowner.h"
+#include "glusterfs/syscall.h"
+#include "glusterfs/globals.h"
+#define XXH_INLINE_ALL
+#include "xxhash.h"
+#include <ifaddrs.h>
+#include "glusterfs/libglusterfs-messages.h"
+#include "glusterfs/glusterfs-acl.h"
+#ifdef __FreeBSD__
+#include <pthread_np.h>
+#undef BIT_SET
+#endif
#ifndef AI_ADDRCONFIG
#define AI_ADDRCONFIG 0
#endif /* AI_ADDRCONFIG */
+char *vol_type_str[] = {
+ "Distribute",
+ "Stripe [NOT SUPPORTED from v6.0]",
+ "Replicate",
+ "Striped-Replicate [NOT SUPPORTED from v6.0]",
+ "Disperse",
+ "Tier [NOT SUPPORTED from v6.0]",
+ "Distributed-Stripe [NOT SUPPORTED from v6.0]",
+ "Distributed-Replicate",
+ "Distributed-Striped-Replicate [NOT SUPPORTED from v6.0]",
+ "Distributed-Disperse",
+};
+
typedef int32_t (*rw_op_t)(int32_t fd, char *buf, int32_t size);
typedef int32_t (*rwv_op_t)(int32_t fd, const struct iovec *buf, int32_t size);
-struct dnscache6 {
- struct addrinfo *first;
- struct addrinfo *next;
-};
+char *xattrs_to_heal[] = {"user.",
+ POSIX_ACL_ACCESS_XATTR,
+ POSIX_ACL_DEFAULT_XATTR,
+ QUOTA_LIMIT_KEY,
+ QUOTA_LIMIT_OBJECTS_KEY,
+ GF_SELINUX_XATTR_KEY,
+ GF_XATTR_MDATA_KEY,
+ NULL};
+void
+gf_xxh64_wrapper(const unsigned char *data, size_t const len,
+ unsigned long long const seed, char *xxh64)
+{
+ unsigned short i = 0;
+ const unsigned short lim = GF_XXH64_DIGEST_LENGTH * 2 + 1;
+ XXH64_hash_t hash = 0;
+ XXH64_canonical_t c_hash = {
+ {
+ 0,
+ },
+ };
+ const uint8_t *p = (const uint8_t *)&c_hash;
+
+ hash = XXH64(data, len, seed);
+ XXH64_canonicalFromHash(&c_hash, hash);
+
+ for (i = 0; i < GF_XXH64_DIGEST_LENGTH; i++)
+ snprintf(xxh64 + i * 2, lim - i * 2, "%02x", p[i]);
+}
+
+/**
+ * This function takes following arguments
+ * @this: xlator
+ * @gfid: The gfid which has to be filled
+ * @hash: the 8 byte hash which has to be filled inside the gfid
+ * @index: the array element of the uuid_t structure (which is
+ * a array of unsigned char) from where the 8 bytes of
+ * the hash has to be filled. Since uuid_t contains 16
+ * char elements in the array, each byte of the hash has
+ * to be filled in one array element.
+ *
+ * This function is called twice for 2 hashes (of 8 byte each) to
+ * be filled in the gfid.
+ *
+ * The for loop in this function actually is doing these 2 things
+ * for each hash
+ *
+ * 1) One of the hashes
+ * tmp[0] = (hash_2 >> 56) & 0xff;
+ * tmp[1] = (hash_2 >> 48) & 0xff;
+ * tmp[2] = (hash_2 >> 40) & 0xff;
+ * tmp[3] = (hash_2 >> 32) & 0xff;
+ * tmp[4] = (hash_2 >> 24) & 0xff;
+ * tmp[5] = (hash_2 >> 16) & 0xff;
+ * tmp[6] = (hash_2 >> 8) & 0xff;
+ * tmp[7] = (hash_2) & 0xff;
+ *
+ * 2) The other hash:
+ * tmp[8] = (hash_1 >> 56) & 0xff;
+ * tmp[9] = (hash_1 >> 48) & 0xff;
+ * tmp[10] = (hash_1 >> 40) & 0xff;
+ * tmp[11] = (hash_1 >> 32) & 0xff;
+ * tmp[12] = (hash_1 >> 24) & 0xff;
+ * tmp[13] = (hash_1 >> 16) & 0xff;
+ * tmp[14] = (hash_1 >> 8) & 0xff;
+ * tmp[15] = (hash_1) & 0xff;
+ **/
+static int
+gf_gfid_from_xxh64(xlator_t *this, uuid_t gfid, XXH64_hash_t hash,
+ unsigned short index)
+{
+ int ret = -1;
+ int i = -1;
+
+ if ((index != 0) && (index != 8)) {
+ gf_msg_callingfn("gfid-from-xxh64", GF_LOG_WARNING, 0,
+ LG_MSG_INDEX_NOT_FOUND,
+ "index can only be either 0 or 8, as this"
+ "function's purpose is to encode a 8 byte "
+ "hash inside the gfid (index: %d)",
+ index);
+ goto out;
+ }
+
+ for (i = 0; i < sizeof(hash); i++) {
+ /*
+ * As of now the below statement is equivalent of this.
+ * gfid[index+i] = (hash >> (64 - (8 * (i+1)))) & 0xff;
+ */
+ gfid[index + i] = (hash >> ((sizeof(hash) * 8) - (8 * (i + 1)))) &
+ (0xff);
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/**
+ * This function does the same thing as gf_xxh64_wrapper. But gf_xxh64_wrapper
+ * does not return anything and in this xlator there is a need for both the
+ * actual hash and the canonicalized form of the hash.
+ *
+ * To summarize:
+ * - XXH64_hash_t is needed as return because, those bytes which contain the
+ * hash can be used for different purposes as needed. One example is
+ * to have those bytes copied into the uuid_t structure to be used as gfid
+ * - xxh64 string is needed because, it can be used as the key for generating
+ * the next hash (and any other purpose which might require canonical form
+ * of the hash).
+ **/
+XXH64_hash_t
+gf_xxh64_hash_wrapper(const unsigned char *data, size_t const len,
+ unsigned long long const seed, char *xxh64)
+{
+ unsigned short i = 0;
+ const unsigned short lim = GF_XXH64_DIGEST_LENGTH * 2 + 1;
+ XXH64_hash_t hash = 0;
+ XXH64_canonical_t c_hash = {
+ {
+ 0,
+ },
+ };
+ const uint8_t *p = (const uint8_t *)&c_hash;
+
+ hash = XXH64(data, len, seed);
+ XXH64_canonicalFromHash(&c_hash, hash);
+
+ for (i = 0; i < GF_XXH64_DIGEST_LENGTH; i++)
+ snprintf(xxh64 + i * 2, lim - i * 2, "%02x", p[i]);
+
+ return hash;
+}
+
+/**
+ * This is the algorithm followed for generating new gfid
+ * 1) generate xxh64 hash using snapname and original gfid of the object
+ * 2) Using the canonicalized form of above hash as the key, generate
+ * another hash
+ * 3) Combine both of the 8 byte hashes to generate a 16 byte uuid_t type
+ * 4) Use the above uuid as the gfid
+ *
+ * Each byte of the hash is stored separately in different elements of the
+ * character array represented by uuid_t
+ * Ex: tmp[0] = (hash_2 >> 56) & 0xFF
+ * This saves the most significant byte of hash_2 in tmp[0]
+ * tmp[1] = (hash_2 >> 48) & 0xFF
+ * This saves next most significant byte of hash_2 in tmp[1]
+ * .
+ * .
+ * So on.
+ * tmp[0] - tmp[7] holds the contents of hash_2
+ * tmp[8] - tmp[15] hold the conents of hash_1
+ *
+ * The hash generated (i.e. of type XXH64_hash_t) is 8 bytes long. And for
+ * gfid 16 byte uuid is needed. Hecne the 2 hashes are combined to form
+ * one 16 byte entity.
+ **/
int
-log_base2 (unsigned long x)
+gf_gfid_generate_from_xxh64(uuid_t gfid, char *key)
{
- int val = 0;
+ char xxh64_1[GF_XXH64_DIGEST_LENGTH * 2 + 1] = {
+ 0,
+ };
+ char xxh64_2[GF_XXH64_DIGEST_LENGTH * 2 + 1] = {
+ 0,
+ };
+ XXH64_hash_t hash_1 = 0;
+ XXH64_hash_t hash_2 = 0;
+ int ret = -1;
+ xlator_t *this = THIS;
+
+ hash_1 = gf_xxh64_hash_wrapper((unsigned char *)key, strlen(key),
+ GF_XXHSUM64_DEFAULT_SEED, xxh64_1);
+
+ hash_2 = gf_xxh64_hash_wrapper((unsigned char *)xxh64_1, strlen(xxh64_1),
+ GF_XXHSUM64_DEFAULT_SEED, xxh64_2);
+
+ /* hash_2 is saved in 1st 8 elements of uuid_t char array */
+ if (gf_gfid_from_xxh64(this, gfid, hash_2, 0)) {
+ gf_msg_callingfn(this->name, GF_LOG_WARNING, 0,
+ LG_MSG_XXH64_TO_GFID_FAILED,
+ "failed to encode the hash %llx into the 1st"
+ "half of gfid",
+ hash_2);
+ goto out;
+ }
+
+ /* hash_1 is saved in the remaining 8 elements of uuid_t */
+ if (gf_gfid_from_xxh64(this, gfid, hash_1, 8)) {
+ gf_msg_callingfn(this->name, GF_LOG_WARNING, 0,
+ LG_MSG_XXH64_TO_GFID_FAILED,
+ "failed to encode the hash %llx into the 2nd"
+ "half of gfid",
+ hash_1);
+ goto out;
+ }
+
+ gf_msg_debug(this->name, 0,
+ "gfid generated is %s (hash1: %llx) "
+ "hash2: %llx, xxh64_1: %s xxh64_2: %s",
+ uuid_utoa(gfid), hash_1, hash_2, xxh64_1, xxh64_2);
+
+ ret = 0;
+
+out:
+ return ret;
+}
- while (x > 1) {
- x /= 2;
- val++;
+/* works similar to mkdir(1) -p.
+ */
+int
+mkdir_p(char *path, mode_t mode, gf_boolean_t allow_symlinks)
+{
+ int i = 0;
+ int ret = -1;
+ char dir[PATH_MAX] = {
+ 0,
+ };
+ struct stat stbuf = {
+ 0,
+ };
+
+ const int path_len = min(strlen(path), PATH_MAX - 1);
+
+ snprintf(dir, path_len + 1, "%s", path);
+
+ i = (dir[0] == '/') ? 1 : 0;
+ do {
+ if (path[i] != '/' && path[i] != '\0')
+ continue;
+
+ dir[i] = '\0';
+ ret = sys_mkdir(dir, mode);
+ if (ret && errno != EEXIST) {
+ gf_smsg("", GF_LOG_ERROR, errno, LG_MSG_DIR_OP_FAILED, NULL);
+ goto out;
}
- return val;
+ if (ret && errno == EEXIST && !allow_symlinks) {
+ ret = sys_lstat(dir, &stbuf);
+ if (ret)
+ goto out;
+
+ if (S_ISLNK(stbuf.st_mode)) {
+ ret = -1;
+ gf_smsg("", GF_LOG_ERROR, 0, LG_MSG_DIR_IS_SYMLINK, "dir=%s",
+ dir, NULL);
+ goto out;
+ }
+ }
+ dir[i] = '/';
+
+ } while (path[i++] != '\0');
+
+ ret = sys_stat(dir, &stbuf);
+ if (ret || !S_ISDIR(stbuf.st_mode)) {
+ if (ret == 0)
+ errno = 0;
+ ret = -1;
+ gf_smsg("", GF_LOG_ERROR, errno, LG_MSG_DIR_OP_FAILED,
+ "possibly some of the components"
+ " were not directories",
+ NULL);
+ goto out;
+ }
+
+ ret = 0;
+out:
+
+ return ret;
+}
+
+int
+gf_lstat_dir(const char *path, struct stat *stbuf_in)
+{
+ int ret = -1;
+ struct stat stbuf = {
+ 0,
+ };
+
+ if (path == NULL) {
+ errno = EINVAL;
+ goto out;
+ }
+
+ ret = sys_lstat(path, &stbuf);
+ if (ret)
+ goto out;
+
+ if (!S_ISDIR(stbuf.st_mode)) {
+ errno = ENOTDIR;
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+
+out:
+ if (!ret && stbuf_in)
+ *stbuf_in = stbuf;
+
+ return ret;
+}
+
+int
+log_base2(unsigned long x)
+{
+ int val = 0;
+
+ while (x > 1) {
+ x /= 2;
+ val++;
+ }
+
+ return val;
}
+/**
+ * gf_rev_dns_lookup -- Perform a reverse DNS lookup on the IP address.
+ *
+ * @ip: The IP address to perform a reverse lookup on
+ *
+ * @return: success: Allocated string containing the hostname
+ * failure: NULL
+ */
+char *
+gf_rev_dns_lookup(const char *ip)
+{
+ char *fqdn = NULL;
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO("resolver", ip, out);
+
+ /* Get the FQDN */
+ ret = gf_get_hostname_from_ip((char *)ip, &fqdn);
+ if (ret != 0) {
+ gf_smsg("resolver", GF_LOG_INFO, errno, LG_MSG_RESOLVE_HOSTNAME_FAILED,
+ "hostname=%s", ip, NULL);
+ }
+out:
+ return fqdn;
+}
+
+/**
+ * gf_resolve_path_parent -- Given a path, returns an allocated string
+ * containing the parent's path.
+ * @path: Path to parse
+ * @return: The parent path if found, NULL otherwise
+ */
+char *
+gf_resolve_path_parent(const char *path)
+{
+ char *parent = NULL;
+ char *tmp = NULL;
+ char *pathc = NULL;
+
+ GF_VALIDATE_OR_GOTO(THIS->name, path, out);
+
+ if (0 == strlen(path)) {
+ gf_msg_callingfn(THIS->name, GF_LOG_DEBUG, 0, LG_MSG_INVALID_STRING,
+ "invalid string for 'path'");
+ goto out;
+ }
+
+ /* dup the parameter, we don't want to modify it */
+ pathc = strdupa(path);
+ if (!pathc) {
+ goto out;
+ }
+
+ /* Get the parent directory */
+ tmp = dirname(pathc);
+ if (strcmp(tmp, "/") == 0)
+ goto out;
+
+ parent = gf_strdup(tmp);
+out:
+ return parent;
+}
int32_t
-gf_resolve_ip6 (const char *hostname,
- uint16_t port,
- int family,
- void **dnscache,
- struct addrinfo **addr_info)
-{
- int32_t ret = 0;
- struct addrinfo hints;
- struct dnscache6 *cache = NULL;
- char service[NI_MAXSERV], host[NI_MAXHOST];
-
- if (!hostname) {
- gf_log_callingfn ("resolver", GF_LOG_WARNING, "hostname is NULL");
- return -1;
+gf_resolve_ip6(const char *hostname, uint16_t port, int family, void **dnscache,
+ struct addrinfo **addr_info)
+{
+ int32_t ret = 0;
+ struct addrinfo hints;
+ struct dnscache6 *cache = NULL;
+ char service[NI_MAXSERV], host[NI_MAXHOST];
+
+ if (!hostname) {
+ gf_msg_callingfn("resolver", GF_LOG_WARNING, 0, LG_MSG_HOSTNAME_NULL,
+ "hostname is NULL");
+ return -1;
+ }
+
+ if (!*dnscache) {
+ *dnscache = GF_CALLOC(1, sizeof(struct dnscache6),
+ gf_common_mt_dnscache6);
+ if (!*dnscache)
+ return -1;
+ }
+
+ cache = *dnscache;
+ if (cache->first && !cache->next) {
+ freeaddrinfo(cache->first);
+ cache->first = cache->next = NULL;
+ gf_msg_trace("resolver", 0, "flushing DNS cache");
+ }
+
+ if (!cache->first) {
+ char *port_str = NULL;
+ gf_msg_trace("resolver", 0,
+ "DNS cache not present, freshly "
+ "probing hostname: %s",
+ hostname);
+
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_family = family;
+ hints.ai_socktype = SOCK_STREAM;
+
+ ret = gf_asprintf(&port_str, "%d", port);
+ if (-1 == ret) {
+ return -1;
}
-
- if (!*dnscache) {
- *dnscache = GF_CALLOC (1, sizeof (struct dnscache6),
- gf_common_mt_dnscache6);
- if (!*dnscache)
- return -1;
+ if ((ret = getaddrinfo(hostname, port_str, &hints, &cache->first)) !=
+ 0) {
+ gf_smsg("resolver", GF_LOG_ERROR, 0, LG_MSG_GETADDRINFO_FAILED,
+ "family=%d", family, "ret=%s", gai_strerror(ret), NULL);
+
+ GF_FREE(*dnscache);
+ *dnscache = NULL;
+ GF_FREE(port_str);
+ return -1;
}
-
- cache = *dnscache;
- if (cache->first && !cache->next) {
- freeaddrinfo(cache->first);
- cache->first = cache->next = NULL;
- gf_log ("resolver", GF_LOG_TRACE,
- "flushing DNS cache");
+ GF_FREE(port_str);
+
+ cache->next = cache->first;
+ }
+
+ if (cache->next) {
+ ret = getnameinfo((struct sockaddr *)cache->next->ai_addr,
+ cache->next->ai_addrlen, host, sizeof(host), service,
+ sizeof(service), NI_NUMERICHOST);
+ if (ret != 0) {
+ gf_smsg("resolver", GF_LOG_ERROR, 0, LG_MSG_GETNAMEINFO_FAILED,
+ "ret=%s", gai_strerror(ret), NULL);
+ goto err;
}
- if (!cache->first) {
- char *port_str = NULL;
- gf_log ("resolver", GF_LOG_TRACE,
- "DNS cache not present, freshly probing hostname: %s",
- hostname);
-
- memset(&hints, 0, sizeof(hints));
- hints.ai_family = family;
- hints.ai_socktype = SOCK_STREAM;
- hints.ai_flags = AI_ADDRCONFIG;
-
- ret = gf_asprintf (&port_str, "%d", port);
- if (-1 == ret) {
- gf_log ("resolver", GF_LOG_ERROR, "asprintf failed");
- return -1;
- }
- if ((ret = getaddrinfo(hostname, port_str, &hints, &cache->first)) != 0) {
- gf_log ("resolver", GF_LOG_ERROR,
- "getaddrinfo failed (%s)", gai_strerror (ret));
-
- GF_FREE (*dnscache);
- *dnscache = NULL;
- GF_FREE (port_str);
- return -1;
- }
- GF_FREE (port_str);
-
- cache->next = cache->first;
+ gf_msg_debug("resolver", 0,
+ "returning ip-%s (port-%s) for "
+ "hostname: %s and port: %d",
+ host, service, hostname, port);
+
+ *addr_info = cache->next;
+ }
+
+ if (cache->next)
+ cache->next = cache->next->ai_next;
+ if (cache->next) {
+ ret = getnameinfo((struct sockaddr *)cache->next->ai_addr,
+ cache->next->ai_addrlen, host, sizeof(host), service,
+ sizeof(service), NI_NUMERICHOST);
+ if (ret != 0) {
+ gf_smsg("resolver", GF_LOG_ERROR, 0, LG_MSG_GETNAMEINFO_FAILED,
+ "ret=%s", gai_strerror(ret), NULL);
+ goto err;
}
- if (cache->next) {
- ret = getnameinfo((struct sockaddr *)cache->next->ai_addr,
- cache->next->ai_addrlen,
- host, sizeof (host),
- service, sizeof (service),
- NI_NUMERICHOST);
- if (ret != 0) {
- gf_log ("resolver", GF_LOG_ERROR,
- "getnameinfo failed (%s)", gai_strerror (ret));
- goto err;
- }
-
- gf_log ("resolver", GF_LOG_DEBUG,
- "returning ip-%s (port-%s) for hostname: %s and port: %d",
- host, service, hostname, port);
-
- *addr_info = cache->next;
+ gf_msg_debug("resolver", 0,
+ "next DNS query will return: "
+ "ip-%s port-%s",
+ host, service);
+ }
+
+ return 0;
+
+err:
+ freeaddrinfo(cache->first);
+ cache->first = cache->next = NULL;
+ GF_FREE(cache);
+ *dnscache = NULL;
+ return -1;
+}
+
+/**
+ * gf_dnscache_init -- Initializes a dnscache struct and sets the ttl
+ * to the specified value in the parameter.
+ *
+ * @ttl: the TTL in seconds
+ * @return: SUCCESS: Pointer to an allocated dnscache struct
+ * FAILURE: NULL
+ */
+struct dnscache *
+gf_dnscache_init(time_t ttl)
+{
+ struct dnscache *cache = GF_MALLOC(sizeof(*cache), gf_common_mt_dnscache);
+ if (!cache)
+ return NULL;
+
+ cache->cache_dict = dict_new();
+ if (!cache->cache_dict) {
+ GF_FREE(cache);
+ cache = NULL;
+ } else {
+ cache->ttl = ttl;
+ }
+
+ return cache;
+}
+
+/**
+ * gf_dnscache_deinit -- cleanup resources used by struct dnscache
+ */
+void
+gf_dnscache_deinit(struct dnscache *cache)
+{
+ if (!cache) {
+ gf_msg_plain(GF_LOG_WARNING, "dnscache is NULL");
+ return;
+ }
+ dict_unref(cache->cache_dict);
+ GF_FREE(cache);
+}
+
+/**
+ * gf_dnscache_entry_init -- Initialize a dnscache entry
+ *
+ * @return: SUCCESS: Pointer to an allocated dnscache entry struct
+ * FAILURE: NULL
+ */
+struct dnscache_entry *
+gf_dnscache_entry_init()
+{
+ struct dnscache_entry *entry = GF_CALLOC(1, sizeof(*entry),
+ gf_common_mt_dnscache_entry);
+ return entry;
+}
+
+/**
+ * gf_dnscache_entry_deinit -- Free memory used by a dnscache entry
+ *
+ * @entry: Pointer to deallocate
+ */
+void
+gf_dnscache_entry_deinit(struct dnscache_entry *entry)
+{
+ GF_FREE(entry->ip);
+ GF_FREE(entry->fqdn);
+ GF_FREE(entry);
+}
+
+/**
+ * gf_rev_dns_lookup -- Perform a reverse DNS lookup on the IP address.
+ *
+ * @ip: The IP address to perform a reverse lookup on
+ *
+ * @return: success: Allocated string containing the hostname
+ * failure: NULL
+ */
+char *
+gf_rev_dns_lookup_cached(const char *ip, struct dnscache *dnscache)
+{
+ char *fqdn = NULL;
+ int ret = 0;
+ dict_t *cache = NULL;
+ data_t *entrydata = NULL;
+ struct dnscache_entry *dnsentry = NULL;
+ gf_boolean_t from_cache = _gf_false;
+
+ if (!dnscache)
+ goto out;
+
+ cache = dnscache->cache_dict;
+
+ /* Quick cache lookup to see if we already hold it */
+ entrydata = dict_get(cache, (char *)ip);
+ if (entrydata) {
+ dnsentry = (struct dnscache_entry *)entrydata->data;
+ /* First check the TTL & timestamp */
+ if (gf_time() - dnsentry->timestamp > dnscache->ttl) {
+ gf_dnscache_entry_deinit(dnsentry);
+ entrydata->data = NULL; /* Mark this as 'null' so
+ * dict_del () doesn't try free
+ * this after we've already
+ * freed it.
+ */
+
+ dict_del(cache, (char *)ip); /* Remove this entry */
+ } else {
+ /* Cache entry is valid, get the FQDN and return */
+ fqdn = dnsentry->fqdn;
+ from_cache = _gf_true; /* Mark this as from cache */
+ goto out;
}
+ }
+
+ /* Get the FQDN */
+ ret = gf_get_hostname_from_ip((char *)ip, &fqdn);
+ if (ret != 0)
+ goto out;
- if (cache->next)
- cache->next = cache->next->ai_next;
- if (cache->next) {
- ret = getnameinfo((struct sockaddr *)cache->next->ai_addr,
- cache->next->ai_addrlen,
- host, sizeof (host),
- service, sizeof (service),
- NI_NUMERICHOST);
- if (ret != 0) {
- gf_log ("resolver", GF_LOG_ERROR,
- "getnameinfo failed (%s)", gai_strerror (ret));
- goto err;
- }
-
- gf_log ("resolver", GF_LOG_DEBUG,
- "next DNS query will return: ip-%s port-%s", host, service);
+ if (!fqdn) {
+ gf_log_callingfn("resolver", GF_LOG_CRITICAL,
+ "Allocation failed for the host address");
+ goto out;
+ }
+
+ from_cache = _gf_false;
+out:
+ /* Insert into the cache */
+ if (fqdn && !from_cache && ip) {
+ struct dnscache_entry *entry = gf_dnscache_entry_init();
+
+ if (entry) {
+ entry->fqdn = fqdn;
+ entry->ip = gf_strdup(ip);
+ entry->timestamp = gf_time();
+ entrydata = bin_to_data(entry, sizeof(*entry));
+ dict_set(cache, (char *)ip, entrydata);
}
+ }
+ return fqdn;
+}
- return 0;
+struct xldump {
+ int lineno;
+};
-err:
- freeaddrinfo (cache->first);
- cache->first = cache->next = NULL;
- GF_FREE (cache);
- *dnscache = NULL;
- return -1;
+/* to catch any format discrepencies that may arise in code */
+static int
+nprintf(struct xldump *dump, const char *fmt, ...)
+ __attribute__((__format__(__printf__, 2, 3)));
+static int
+nprintf(struct xldump *dump, const char *fmt, ...)
+{
+ va_list ap;
+ char *msg = NULL;
+ char header[32];
+ int ret = 0;
+
+ ret = snprintf(header, 32, "%3d:", ++dump->lineno);
+ if (ret < 0)
+ goto out;
+
+ va_start(ap, fmt);
+ ret = vasprintf(&msg, fmt, ap);
+ va_end(ap);
+ if (-1 == ret)
+ goto out;
+
+ /* NOTE: No ret value from gf_msg_plain, so unable to compute printed
+ * characters. The return value from nprintf is not used, so for now
+ * living with it */
+ gf_msg_plain(GF_LOG_WARNING, "%s %s", header, msg);
+
+out:
+ FREE(msg);
+ return 0;
+}
+
+static int
+xldump_options(dict_t *this, char *key, data_t *value, void *d)
+{
+ nprintf(d, " option %s %s", key, value->data);
+ return 0;
}
+static void
+xldump_subvolumes(xlator_t *this, void *d)
+{
+ xlator_list_t *subv = NULL;
+ int len = 0;
+ char *subvstr = NULL;
+
+ if (!this->children)
+ return;
+
+ for (subv = this->children; subv; subv = subv->next)
+ len += (strlen(subv->xlator->name) + 1);
+
+ subvstr = GF_MALLOC(len, gf_common_mt_strdup);
+
+ len = 0;
+ for (subv = this->children; subv; subv = subv->next)
+ len += sprintf(subvstr + len, "%s%s", subv->xlator->name,
+ subv->next ? " " : "");
+
+ nprintf(d, " subvolumes %s", subvstr);
+
+ GF_FREE(subvstr);
+}
+
+static void
+xldump(xlator_t *each, void *d)
+{
+ nprintf(d, "volume %s", each->name);
+ nprintf(d, " type %s", each->type);
+ dict_foreach(each->options, xldump_options, d);
+
+ xldump_subvolumes(each, d);
+
+ nprintf(d, "end-volume");
+ nprintf(d, " ");
+}
void
-gf_log_volume_file (FILE *specfp)
+gf_log_dump_graph(FILE *specfp, glusterfs_graph_t *graph)
{
- extern FILE *gf_log_logfile;
- int lcount = 0;
- char data[GF_UNIT_KB];
+ struct xldump xld = {
+ 0,
+ };
+
+ gf_msg_plain(GF_LOG_WARNING, "Final graph:");
+ gf_msg_plain(GF_LOG_WARNING,
+ "+---------------------------------------"
+ "---------------------------------------+");
- fseek (specfp, 0L, SEEK_SET);
+ xlator_foreach_depth_first(graph->top, xldump, &xld);
- fprintf (gf_log_logfile, "Given volfile:\n");
- fprintf (gf_log_logfile,
+ gf_msg_plain(GF_LOG_WARNING,
"+---------------------------------------"
- "---------------------------------------+\n");
- while (fgets (data, GF_UNIT_KB, specfp) != NULL){
- lcount++;
- fprintf (gf_log_logfile, "%3d: %s", lcount, data);
- }
- fprintf (gf_log_logfile,
- "\n+---------------------------------------"
- "---------------------------------------+\n");
- fflush (gf_log_logfile);
- fseek (specfp, 0L, SEEK_SET);
+ "---------------------------------------+");
}
static void
-gf_dump_config_flags (int fd)
+gf_dump_config_flags()
{
- int ret = 0;
-
- ret = write (fd, "configuration details:\n", 23);
- if (ret == -1)
- goto out;
+ gf_msg_plain_nomem(GF_LOG_ALERT, "configuration details:");
/* have argp */
#ifdef HAVE_ARGP
- ret = write (fd, "argp 1\n", 7);
- if (ret == -1)
- goto out;
+ gf_msg_plain_nomem(GF_LOG_ALERT, "argp 1");
#endif
/* ifdef if found backtrace */
#ifdef HAVE_BACKTRACE
- ret = write (fd, "backtrace 1\n", 12);
- if (ret == -1)
- goto out;
+ gf_msg_plain_nomem(GF_LOG_ALERT, "backtrace 1");
#endif
/* Berkeley-DB version has cursor->get() */
#ifdef HAVE_BDB_CURSOR_GET
- ret = write (fd, "bdb->cursor->get 1\n", 19);
- if (ret == -1)
- goto out;
+ gf_msg_plain_nomem(GF_LOG_ALERT, "bdb->cursor->get 1");
#endif
/* Define to 1 if you have the <db.h> header file. */
#ifdef HAVE_DB_H
- ret = write (fd, "db.h 1\n", 7);
- if (ret == -1)
- goto out;
+ gf_msg_plain_nomem(GF_LOG_ALERT, "db.h 1");
#endif
/* Define to 1 if you have the <dlfcn.h> header file. */
#ifdef HAVE_DLFCN_H
- ret = write (fd, "dlfcn 1\n", 8);
- if (ret == -1)
- goto out;
+ gf_msg_plain_nomem(GF_LOG_ALERT, "dlfcn 1");
#endif
/* define if fdatasync exists */
#ifdef HAVE_FDATASYNC
- ret = write (fd, "fdatasync 1\n", 12);
- if (ret == -1)
- goto out;
+ gf_msg_plain_nomem(GF_LOG_ALERT, "fdatasync 1");
#endif
/* Define to 1 if you have the `pthread' library (-lpthread). */
#ifdef HAVE_LIBPTHREAD
- ret = write (fd, "libpthread 1\n", 13);
- if (ret == -1)
- goto out;
+ gf_msg_plain_nomem(GF_LOG_ALERT, "libpthread 1");
#endif
/* define if llistxattr exists */
#ifdef HAVE_LLISTXATTR
- ret = write (fd, "llistxattr 1\n", 13);
- if (ret == -1)
- goto out;
+ gf_msg_plain_nomem(GF_LOG_ALERT, "llistxattr 1");
#endif
/* define if found setfsuid setfsgid */
#ifdef HAVE_SET_FSID
- ret = write (fd, "setfsid 1\n", 10);
- if (ret == -1)
- goto out;
+ gf_msg_plain_nomem(GF_LOG_ALERT, "setfsid 1");
#endif
/* define if found spinlock */
#ifdef HAVE_SPINLOCK
- ret = write (fd, "spinlock 1\n", 11);
- if (ret == -1)
- goto out;
+ gf_msg_plain_nomem(GF_LOG_ALERT, "spinlock 1");
#endif
/* Define to 1 if you have the <sys/epoll.h> header file. */
#ifdef HAVE_SYS_EPOLL_H
- ret = write (fd, "epoll.h 1\n", 10);
- if (ret == -1)
- goto out;
+ gf_msg_plain_nomem(GF_LOG_ALERT, "epoll.h 1");
#endif
/* Define to 1 if you have the <sys/extattr.h> header file. */
#ifdef HAVE_SYS_EXTATTR_H
- ret = write (fd, "extattr.h 1\n", 12);
- if (ret == -1)
- goto out;
+ gf_msg_plain_nomem(GF_LOG_ALERT, "extattr.h 1");
#endif
/* Define to 1 if you have the <sys/xattr.h> header file. */
#ifdef HAVE_SYS_XATTR_H
- ret = write (fd, "xattr.h 1\n", 10);
- if (ret == -1)
- goto out;
+ gf_msg_plain_nomem(GF_LOG_ALERT, "xattr.h 1");
#endif
/* define if found st_atim.tv_nsec */
#ifdef HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC
- ret = write (fd, "st_atim.tv_nsec 1\n", 18);
- if (ret == -1)
- goto out;
+ gf_msg_plain_nomem(GF_LOG_ALERT, "st_atim.tv_nsec 1");
#endif
/* define if found st_atimespec.tv_nsec */
#ifdef HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC
- ret = write (fd, "st_atimespec.tv_nsec 1\n",23);
- if (ret == -1)
- goto out;
+ gf_msg_plain_nomem(GF_LOG_ALERT, "st_atimespec.tv_nsec 1");
#endif
/* Define to the full name and version of this package. */
#ifdef PACKAGE_STRING
- {
- char msg[128];
- sprintf (msg, "package-string: %s\n", PACKAGE_STRING);
- ret = write (fd, msg, strlen (msg));
- if (ret == -1)
- goto out;
+ {
+ char *msg = NULL;
+ int ret = -1;
+
+ ret = gf_asprintf(&msg, "package-string: %s", PACKAGE_STRING);
+ if (ret >= 0) {
+ gf_msg_plain_nomem(GF_LOG_ALERT, msg);
+ GF_FREE(msg);
}
+ }
#endif
-out:
- return;
+ return;
}
-/* Obtain a backtrace and print it to stdout. */
-/* TODO: It looks like backtrace_symbols allocates memory,
- it may be problem because mostly memory allocation/free causes 'sigsegv' */
+/* Obtain a backtrace and print it to the log */
void
-gf_print_trace (int32_t signum)
+gf_print_trace(int32_t signum, glusterfs_ctx_t *ctx)
{
- extern FILE *gf_log_logfile;
- struct tm *tm = NULL;
- char msg[1024] = {0,};
- char timestr[256] = {0,};
- time_t utime = 0;
- int ret = 0;
- int fd = 0;
+ char msg[1024] = {
+ 0,
+ };
+ char timestr[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+ call_stack_t *stack = NULL;
+
+ /* Now every gf_log call will just write to a buffer and when the
+ * buffer becomes full, its written to the log-file. Suppose the process
+ * crashes and prints the backtrace in the log-file, then the previous
+ * log information will still be in the buffer itself. So flush the
+ * contents of the buffer to the log file before printing the backtrace
+ * which helps in debugging.
+ */
+ gf_log_flush();
+
+ gf_log_disable_suppression_before_exit(ctx);
+
+ /* Pending frames, (if any), list them in order */
+ gf_msg_plain_nomem(GF_LOG_ALERT, "pending frames:");
+ {
+ /* FIXME: traversing stacks outside pool->lock */
+ list_for_each_entry(stack, &ctx->pool->all_frames, all_frames)
+ {
+ if (stack->type == GF_OP_TYPE_FOP)
+ sprintf(msg, "frame : type(%d) op(%s)", stack->type,
+ gf_fop_list[stack->op]);
+ else
+ sprintf(msg, "frame : type(%d) op(%d)", stack->type, stack->op);
- fd = fileno (gf_log_logfile);
+ gf_msg_plain_nomem(GF_LOG_ALERT, msg);
+ }
+ }
+
+ sprintf(msg, "patchset: %s", GLUSTERFS_REPOSITORY_REVISION);
+ gf_msg_plain_nomem(GF_LOG_ALERT, msg);
+
+ sprintf(msg, "signal received: %d", signum);
+ gf_msg_plain_nomem(GF_LOG_ALERT, msg);
+ {
+ /* Dump the timestamp of the crash too, so the previous logs
+ can be related */
+ gf_time_fmt(timestr, sizeof timestr, gf_time(), gf_timefmt_FT);
+ gf_msg_plain_nomem(GF_LOG_ALERT, "time of crash: ");
+ gf_msg_plain_nomem(GF_LOG_ALERT, timestr);
+ }
+
+ gf_dump_config_flags();
+ gf_msg_backtrace_nomem(GF_LOG_ALERT, 200);
+ sprintf(msg, "---------");
+ gf_msg_plain_nomem(GF_LOG_ALERT, msg);
+
+ /* Send a signal to terminate the process */
+ signal(signum, SIG_DFL);
+ raise(signum);
+}
- /* Pending frames, (if any), list them in order */
- ret = write (fd, "pending frames:\n", 16);
- if (ret < 0)
- goto out;
+void
+trap(void)
+{
+}
- {
- glusterfs_ctx_t *ctx = glusterfs_ctx_get ();
- struct list_head *trav = ((call_pool_t *)ctx->pool)->all_frames.next;
- while (trav != (&((call_pool_t *)ctx->pool)->all_frames)) {
- call_frame_t *tmp = (call_frame_t *)(&((call_stack_t *)trav)->frames);
- if (tmp->root->type == GF_OP_TYPE_FOP)
- sprintf (msg,"frame : type(%d) op(%s)\n",
- tmp->root->type,
- gf_fop_list[tmp->root->op]);
- if (tmp->root->type == GF_OP_TYPE_MGMT)
- sprintf (msg,"frame : type(%d) op(%s)\n",
- tmp->root->type,
- gf_mgmt_list[tmp->root->op]);
-
- ret = write (fd, msg, strlen (msg));
- if (ret < 0)
- goto out;
-
- trav = trav->next;
- }
- ret = write (fd, "\n", 1);
- if (ret < 0)
- goto out;
- }
+char *
+gf_trim(char *string)
+{
+ register char *s, *t;
- sprintf (msg, "patchset: %s\n", GLUSTERFS_REPOSITORY_REVISION);
- ret = write (fd, msg, strlen (msg));
- if (ret < 0)
- goto out;
+ if (string == NULL) {
+ return NULL;
+ }
- sprintf (msg, "signal received: %d\n", signum);
- ret = write (fd, msg, strlen (msg));
- if (ret < 0)
- goto out;
+ for (s = string; isspace(*s); s++)
+ ;
- {
- /* Dump the timestamp of the crash too, so the previous logs
- can be related */
- utime = time (NULL);
- tm = localtime (&utime);
- strftime (timestr, 256, "%Y-%m-%d %H:%M:%S\n", tm);
- ret = write (fd, "time of crash: ", 15);
- if (ret < 0)
- goto out;
- ret = write (fd, timestr, strlen (timestr));
- if (ret < 0)
- goto out;
- }
+ if (*s == 0)
+ return s;
- gf_dump_config_flags (fd);
-#if HAVE_BACKTRACE
- /* Print 'backtrace' */
- {
- void *array[200];
- size_t size;
-
- size = backtrace (array, 200);
- backtrace_symbols_fd (&array[1], size-1, fd);
- sprintf (msg, "---------\n");
- ret = write (fd, msg, strlen (msg));
- if (ret < 0)
- goto out;
- }
-#endif /* HAVE_BACKTRACE */
+ t = s + strlen(s) - 1;
+ while (t > s && isspace(*t))
+ t--;
+ *++t = '\0';
-out:
- /* Send a signal to terminate the process */
- signal (signum, SIG_DFL);
- raise (signum);
+ return s;
}
-void
-trap (void)
+int
+gf_strstr(const char *str, const char *delim, const char *match)
{
+ char *tmp = NULL;
+ char *save_ptr = NULL;
+ char *tmp_str = NULL;
+
+ int ret = 0;
+
+ tmp_str = strdup(str);
+
+ if (str == NULL || delim == NULL || match == NULL || tmp_str == NULL) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "argument invalid");
+ ret = -1;
+ goto out;
+ }
+ tmp = strtok_r(tmp_str, delim, &save_ptr);
+
+ while (tmp) {
+ ret = strcmp(tmp, match);
+
+ if (ret == 0)
+ break;
+
+ tmp = strtok_r(NULL, delim, &save_ptr);
+ }
+
+out:
+ free(tmp_str);
+
+ return ret;
}
-char *
-gf_trim (char *string)
+int
+gf_volume_name_validate(const char *volume_name)
{
- register char *s, *t;
+ const char *vname = NULL;
- if (string == NULL) {
- return NULL;
- }
+ if (volume_name == NULL) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "argument invalid");
+ return -1;
+ }
- for (s = string; isspace (*s); s++)
- ;
+ if (!isalpha(volume_name[0]))
+ return 1;
- if (*s == 0)
- return s;
+ for (vname = &volume_name[1]; *vname != '\0'; vname++) {
+ if (!(isalnum(*vname) || *vname == '_'))
+ return 1;
+ }
- t = s + strlen (s) - 1;
- while (t > s && isspace (*t))
- t--;
- *++t = '\0';
+ return 0;
+}
- return s;
+int
+gf_string2time(const char *str, uint32_t *n)
+{
+ unsigned long value = 0;
+ char *tail = NULL;
+ int old_errno = 0;
+ const char *s = NULL;
+
+ if (str == NULL || n == NULL) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "argument invalid");
+ errno = EINVAL;
+ return -1;
+ }
+
+ for (s = str; *s != '\0'; s++) {
+ if (isspace(*s))
+ continue;
+ if (*s == '-')
+ return -1;
+ break;
+ }
+
+ old_errno = errno;
+ errno = 0;
+ value = strtol(str, &tail, 0);
+ if (str == tail)
+ errno = EINVAL;
+
+ if (errno == ERANGE || errno == EINVAL)
+ return -1;
+
+ if (errno == 0)
+ errno = old_errno;
+
+ if (((tail[0] == '\0') || ((tail[0] == 's') && (tail[1] == '\0')) ||
+ ((tail[0] == 's') && (tail[1] == 'e') && (tail[2] == 'c') &&
+ (tail[3] == '\0'))))
+ goto out;
+
+ else if (((tail[0] == 'm') && (tail[1] == '\0')) ||
+ ((tail[0] == 'm') && (tail[1] == 'i') && (tail[2] == 'n') &&
+ (tail[3] == '\0'))) {
+ value = value * GF_MINUTE_IN_SECONDS;
+ goto out;
+ }
+
+ else if (((tail[0] == 'h') && (tail[1] == '\0')) ||
+ ((tail[0] == 'h') && (tail[1] == 'r') && (tail[2] == '\0'))) {
+ value = value * GF_HOUR_IN_SECONDS;
+ goto out;
+ }
+
+ else if (((tail[0] == 'd') && (tail[1] == '\0')) ||
+ ((tail[0] == 'd') && (tail[1] == 'a') && (tail[2] == 'y') &&
+ (tail[3] == 's') && (tail[4] == '\0'))) {
+ value = value * GF_DAY_IN_SECONDS;
+ goto out;
+ }
+
+ else if (((tail[0] == 'w') && (tail[1] == '\0')) ||
+ ((tail[0] == 'w') && (tail[1] == 'k') && (tail[2] == '\0'))) {
+ value = value * GF_WEEK_IN_SECONDS;
+ goto out;
+ } else {
+ return -1;
+ }
+
+out:
+ *n = value;
+
+ return 0;
}
int
-gf_strsplit (const char *str, const char *delim,
- char ***tokens, int *token_count)
+gf_string2percent(const char *str, double *n)
{
- char *_running = NULL;
- char *running = NULL;
- char *token = NULL;
- char **token_list = NULL;
- int count = 0;
- int i = 0;
- int j = 0;
+ double value = 0;
+ char *tail = NULL;
+ int old_errno = 0;
+ const char *s = NULL;
+
+ if (str == NULL || n == NULL) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "argument invalid");
+ errno = EINVAL;
+ return -1;
+ }
+
+ for (s = str; *s != '\0'; s++) {
+ if (isspace(*s))
+ continue;
+ if (*s == '-')
+ return -1;
+ break;
+ }
+
+ old_errno = errno;
+ errno = 0;
+ value = strtod(str, &tail);
+ if (str == tail)
+ errno = EINVAL;
+
+ if (errno == ERANGE || errno == EINVAL)
+ return -1;
- if (str == NULL || delim == NULL || tokens == NULL || token_count == NULL) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "argument invalid");
- return -1;
- }
+ if (errno == 0)
+ errno = old_errno;
- _running = gf_strdup (str);
- if (_running == NULL)
- return -1;
+ if (!((tail[0] == '\0') || ((tail[0] == '%') && (tail[1] == '\0'))))
+ return -1;
- running = _running;
+ *n = value;
- while ((token = strsep (&running, delim)) != NULL) {
- if (token[0] != '\0')
- count++;
- }
- GF_FREE (_running);
+ return 0;
+}
- _running = gf_strdup (str);
- if (_running == NULL)
- return -1;
+static int
+_gf_string2long(const char *str, long *n, int base)
+{
+ long value = 0;
+ char *tail = NULL;
+ int old_errno = 0;
+
+ if (str == NULL || n == NULL) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "argument invalid");
+ errno = EINVAL;
+ return -1;
+ }
- running = _running;
+ old_errno = errno;
+ errno = 0;
+ value = strtol(str, &tail, base);
+ if (str == tail)
+ errno = EINVAL;
- if ((token_list = GF_CALLOC (count, sizeof (char *),
- gf_common_mt_char)) == NULL) {
- GF_FREE (_running);
- return -1;
- }
+ if (errno == ERANGE || errno == EINVAL)
+ return -1;
- while ((token = strsep (&running, delim)) != NULL) {
- if (token[0] == '\0')
- continue;
+ if (errno == 0)
+ errno = old_errno;
- token_list[i] = gf_strdup (token);
- if (token_list[i] == NULL)
- goto free_exit;
- i++;
- }
+ if (tail[0] != '\0')
+ return -1;
- GF_FREE (_running);
+ *n = value;
- *tokens = token_list;
- *token_count = count;
- return 0;
+ return 0;
+}
-free_exit:
- GF_FREE (_running);
- for (j = 0; j < i; j++)
- GF_FREE (token_list[j]);
+static int
+_gf_string2ulong(const char *str, unsigned long *n, int base)
+{
+ unsigned long value = 0;
+ char *tail = NULL;
+ int old_errno = 0;
+ const char *s = NULL;
+
+ if (str == NULL || n == NULL) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "argument invalid");
+ errno = EINVAL;
+ return -1;
+ }
+
+ for (s = str; *s != '\0'; s++) {
+ if (isspace(*s))
+ continue;
+ if (*s == '-')
+ return -1;
+ break;
+ }
+
+ old_errno = errno;
+ errno = 0;
+ value = strtoul(str, &tail, base);
+ if (str == tail)
+ errno = EINVAL;
+
+ if (errno == ERANGE || errno == EINVAL)
+ return -1;
- GF_FREE (token_list);
+ if (errno == 0)
+ errno = old_errno;
+
+ if (tail[0] != '\0')
return -1;
+
+ *n = value;
+
+ return 0;
}
-int
-gf_strstr (const char *str, const char *delim, const char *match)
+static int
+_gf_string2uint(const char *str, unsigned int *n, int base)
{
- char *tmp = NULL;
- char *save_ptr = NULL;
- char *tmp_str = NULL;
+ unsigned long value = 0;
+ char *tail = NULL;
+ int old_errno = 0;
+ const char *s = NULL;
+
+ if (str == NULL || n == NULL) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "argument invalid");
+ errno = EINVAL;
+ return -1;
+ }
+
+ for (s = str; *s != '\0'; s++) {
+ if (isspace(*s))
+ continue;
+ if (*s == '-')
+ return -1;
+ break;
+ }
+
+ old_errno = errno;
+ errno = 0;
+ value = strtoul(str, &tail, base);
+ if (str == tail)
+ errno = EINVAL;
+
+ if (errno == ERANGE || errno == EINVAL)
+ return -1;
- int ret = 0;
+ if (errno == 0)
+ errno = old_errno;
- tmp_str = strdup (str);
+ if (tail[0] != '\0')
+ return -1;
- if (str == NULL || delim == NULL || match == NULL || tmp_str == NULL) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "argument invalid");
- ret = -1;
- goto out;
- }
+ *n = (unsigned int)value;
+ return 0;
+}
- tmp = strtok_r (tmp_str, delim, &save_ptr);
+static int
+_gf_string2double(const char *str, double *n)
+{
+ double value = 0.0;
+ char *tail = NULL;
+ int old_errno = 0;
+
+ if (str == NULL || n == NULL) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "argument invalid");
+ errno = EINVAL;
+ return -1;
+ }
- while (tmp) {
- ret = strcmp (tmp, match);
+ old_errno = errno;
+ errno = 0;
+ value = strtod(str, &tail);
+ if (str == tail)
+ errno = EINVAL;
- if (ret == 0)
- break;
+ if (errno == ERANGE || errno == EINVAL)
+ return -1;
- tmp = strtok_r (NULL, delim, &save_ptr);
- }
+ if (errno == 0)
+ errno = old_errno;
-out:
- if (tmp_str)
- free (tmp_str);
+ if (tail[0] != '\0')
+ return -1;
- return ret;
+ *n = value;
+ return 0;
}
-int
-gf_volume_name_validate (const char *volume_name)
+static int
+_gf_string2longlong(const char *str, long long *n, int base)
{
- const char *vname = NULL;
+ long long value = 0;
+ char *tail = NULL;
+ int old_errno = 0;
+
+ if (str == NULL || n == NULL) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "argument invalid");
+ errno = EINVAL;
+ return -1;
+ }
- if (volume_name == NULL) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "argument invalid");
- return -1;
- }
+ old_errno = errno;
+ errno = 0;
+ value = strtoll(str, &tail, base);
+ if (str == tail)
+ errno = EINVAL;
+
+ if (errno == ERANGE || errno == EINVAL)
+ return -1;
- if (!isalpha (volume_name[0]))
- return 1;
+ if (errno == 0)
+ errno = old_errno;
- for (vname = &volume_name[1]; *vname != '\0'; vname++) {
- if (!(isalnum (*vname) || *vname == '_'))
- return 1;
- }
+ if (tail[0] != '\0')
+ return -1;
- return 0;
+ *n = value;
+
+ return 0;
}
+static int
+_gf_string2ulonglong(const char *str, unsigned long long *n, int base)
+{
+ unsigned long long value = 0;
+ char *tail = NULL;
+ int old_errno = 0;
+ const char *s = NULL;
+
+ if (str == NULL || n == NULL) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "argument invalid");
+ errno = EINVAL;
+ return -1;
+ }
+
+ for (s = str; *s != '\0'; s++) {
+ if (isspace(*s))
+ continue;
+ if (*s == '-')
+ return -1;
+ break;
+ }
+
+ old_errno = errno;
+ errno = 0;
+ value = strtoull(str, &tail, base);
+ if (str == tail)
+ errno = EINVAL;
+
+ if (errno == ERANGE || errno == EINVAL)
+ return -1;
+
+ if (errno == 0)
+ errno = old_errno;
+
+ if (tail[0] != '\0')
+ return -1;
+
+ *n = value;
+
+ return 0;
+}
int
-gf_string2time (const char *str, uint32_t *n)
+gf_string2long(const char *str, long *n)
{
- unsigned long value = 0;
- char *tail = NULL;
- int old_errno = 0;
- const char *s = NULL;
+ return _gf_string2long(str, n, 0);
+}
- if (str == NULL || n == NULL) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "argument invalid");
- errno = EINVAL;
- return -1;
- }
+int
+gf_string2ulong(const char *str, unsigned long *n)
+{
+ return _gf_string2ulong(str, n, 0);
+}
- for (s = str; *s != '\0'; s++) {
- if (isspace (*s))
- continue;
- if (*s == '-')
- return -1;
- break;
- }
+int
+gf_string2int(const char *str, int *n)
+{
+ long l = 0;
+ int ret = 0;
- old_errno = errno;
- errno = 0;
- value = strtol (str, &tail, 0);
+ ret = _gf_string2long(str, &l, 0);
- if (errno == ERANGE || errno == EINVAL)
- return -1;
+ *n = l;
+ return ret;
+}
- if (errno == 0)
- errno = old_errno;
+int
+gf_string2uint(const char *str, unsigned int *n)
+{
+ return _gf_string2uint(str, n, 0);
+}
- if (!((tail[0] == '\0') ||
- ((tail[0] == 's') && (tail[1] == '\0')) ||
- ((tail[0] == 's') && (tail[1] == 'e') &&
- (tail[2] == 'c') && (tail[3] == '\0'))))
- return -1;
+int
+gf_string2double(const char *str, double *n)
+{
+ return _gf_string2double(str, n);
+}
- *n = value;
+int
+gf_string2longlong(const char *str, long long *n)
+{
+ return _gf_string2longlong(str, n, 0);
+}
- return 0;
+int
+gf_string2ulonglong(const char *str, unsigned long long *n)
+{
+ return _gf_string2ulonglong(str, n, 0);
}
int
-gf_string2percent (const char *str, double *n)
+gf_string2int8(const char *str, int8_t *n)
{
- double value = 0;
- char *tail = NULL;
- int old_errno = 0;
- const char *s = NULL;
+ long l = 0L;
+ int rv = 0;
- if (str == NULL || n == NULL) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "argument invalid");
- errno = EINVAL;
- return -1;
- }
+ rv = _gf_string2long(str, &l, 0);
+ if (rv != 0)
+ return rv;
- for (s = str; *s != '\0'; s++) {
- if (isspace (*s))
- continue;
- if (*s == '-')
- return -1;
- break;
- }
+ if ((l >= INT8_MIN) && (l <= INT8_MAX)) {
+ *n = (int8_t)l;
+ return 0;
+ }
- old_errno = errno;
- errno = 0;
- value = strtod (str, &tail);
+ errno = ERANGE;
+ return -1;
+}
- if (errno == ERANGE || errno == EINVAL)
- return -1;
+int
+gf_string2int16(const char *str, int16_t *n)
+{
+ long l = 0L;
+ int rv = 0;
- if (errno == 0)
- errno = old_errno;
+ rv = _gf_string2long(str, &l, 0);
+ if (rv != 0)
+ return rv;
- if (!((tail[0] == '\0') ||
- ((tail[0] == '%') && (tail[1] == '\0'))))
- return -1;
+ if ((l >= INT16_MIN) && (l <= INT16_MAX)) {
+ *n = (int16_t)l;
+ return 0;
+ }
- *n = value;
+ errno = ERANGE;
+ return -1;
+}
+int
+gf_string2int32(const char *str, int32_t *n)
+{
+ long l = 0L;
+ int rv = 0;
+
+ rv = _gf_string2long(str, &l, 0);
+ if (rv != 0)
+ return rv;
+
+ if ((l >= INT32_MIN) && (l <= INT32_MAX)) {
+ *n = (int32_t)l;
return 0;
+ }
+
+ errno = ERANGE;
+ return -1;
}
+int
+gf_string2int64(const char *str, int64_t *n)
+{
+ long long l = 0LL;
+ int rv = 0;
-static int
-_gf_string2long (const char *str, long *n, int base)
+ rv = _gf_string2longlong(str, &l, 0);
+ if (rv != 0)
+ return rv;
+
+ *n = (int64_t)l;
+ return 0;
+}
+
+int
+gf_string2uint8(const char *str, uint8_t *n)
{
- long value = 0;
- char *tail = NULL;
- int old_errno = 0;
+ unsigned long l = 0L;
+ int rv = 0;
- if (str == NULL || n == NULL) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "argument invalid");
- errno = EINVAL;
- return -1;
- }
+ rv = _gf_string2ulong(str, &l, 0);
+ if (rv != 0)
+ return rv;
- old_errno = errno;
- errno = 0;
- value = strtol (str, &tail, base);
+ if (l <= UINT8_MAX) {
+ *n = (uint8_t)l;
+ return 0;
+ }
- if (errno == ERANGE || errno == EINVAL)
- return -1;
+ errno = ERANGE;
+ return -1;
+}
- if (errno == 0)
- errno = old_errno;
+int
+gf_string2uint16(const char *str, uint16_t *n)
+{
+ unsigned long l = 0L;
+ int rv = 0;
+
+ rv = _gf_string2ulong(str, &l, 0);
+ if (rv != 0)
+ return rv;
+
+ if (l <= UINT16_MAX) {
+ *n = (uint16_t)l;
+ return 0;
+ }
- if (tail[0] != '\0')
- return -1;
+ errno = ERANGE;
+ return -1;
+}
+
+int
+gf_string2uint32(const char *str, uint32_t *n)
+{
+ unsigned long l = 0L;
+ int rv = 0;
- *n = value;
+ rv = _gf_string2ulong(str, &l, 0);
+ if (rv != 0)
+ return rv;
+ if (l <= UINT32_MAX) {
+ *n = (uint32_t)l;
return 0;
+ }
+
+ errno = ERANGE;
+ return -1;
}
-static int
-_gf_string2ulong (const char *str, unsigned long *n, int base)
+int
+gf_string2uint64(const char *str, uint64_t *n)
{
- unsigned long value = 0;
- char *tail = NULL;
- int old_errno = 0;
- const char *s = NULL;
+ unsigned long long l = 0ULL;
+ int rv = 0;
- if (str == NULL || n == NULL) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "argument invalid");
- errno = EINVAL;
- return -1;
- }
+ rv = _gf_string2ulonglong(str, &l, 0);
+ if (rv != 0)
+ return rv;
- for (s = str; *s != '\0'; s++) {
- if (isspace (*s))
- continue;
- if (*s == '-')
- return -1;
- break;
- }
+ if (l <= UINT64_MAX) {
+ *n = (uint64_t)l;
+ return 0;
+ }
- old_errno = errno;
- errno = 0;
- value = strtoul (str, &tail, base);
+ errno = ERANGE;
+ return -1;
+}
- if (errno == ERANGE || errno == EINVAL)
- return -1;
+int
+gf_string2ulong_base10(const char *str, unsigned long *n)
+{
+ return _gf_string2ulong(str, n, 10);
+}
- if (errno == 0)
- errno = old_errno;
+int
+gf_string2uint_base10(const char *str, unsigned int *n)
+{
+ return _gf_string2uint(str, n, 10);
+}
- if (tail[0] != '\0')
- return -1;
+int
+gf_string2uint8_base10(const char *str, uint8_t *n)
+{
+ unsigned long l = 0L;
+ int rv = 0;
- *n = value;
+ rv = _gf_string2ulong(str, &l, 10);
+ if (rv != 0)
+ return rv;
+ if (l <= UINT8_MAX) {
+ *n = (uint8_t)l;
return 0;
+ }
+
+ errno = ERANGE;
+ return -1;
}
-static int
-_gf_string2uint (const char *str, unsigned int *n, int base)
+int
+gf_string2uint16_base10(const char *str, uint16_t *n)
{
- unsigned long value = 0;
- char *tail = NULL;
- int old_errno = 0;
- const char *s = NULL;
+ unsigned long l = 0L;
+ int rv = 0;
- if (str == NULL || n == NULL) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "argument invalid");
- errno = EINVAL;
- return -1;
- }
+ rv = _gf_string2ulong(str, &l, 10);
+ if (rv != 0)
+ return rv;
- for (s = str; *s != '\0'; s++) {
- if (isspace (*s))
- continue;
- if (*s == '-')
- return -1;
- break;
- }
+ if (l <= UINT16_MAX) {
+ *n = (uint16_t)l;
+ return 0;
+ }
- old_errno = errno;
- errno = 0;
- value = strtoul (str, &tail, base);
+ errno = ERANGE;
+ return -1;
+}
- if (errno == ERANGE || errno == EINVAL)
- return -1;
+int
+gf_string2uint32_base10(const char *str, uint32_t *n)
+{
+ unsigned long l = 0L;
+ int rv = 0;
- if (errno == 0)
- errno = old_errno;
+ rv = _gf_string2ulong(str, &l, 10);
+ if (rv != 0)
+ return rv;
- if (tail[0] != '\0')
- return -1;
+ if (l <= UINT32_MAX) {
+ *n = (uint32_t)l;
+ return 0;
+ }
- *n = (unsigned int)value;
+ errno = ERANGE;
+ return -1;
+}
+int
+gf_string2uint64_base10(const char *str, uint64_t *n)
+{
+ unsigned long long l = 0ULL;
+ int rv = 0;
+
+ rv = _gf_string2ulonglong(str, &l, 10);
+ if (rv != 0)
+ return rv;
+
+ if (l <= UINT64_MAX) {
+ *n = (uint64_t)l;
return 0;
+ }
+
+ errno = ERANGE;
+ return -1;
}
-static int
-_gf_string2double (const char *str, double *n)
+char *
+gf_uint64_2human_readable(uint64_t n)
{
- double value = 0.0;
- char *tail = NULL;
- int old_errno = 0;
+ int ret = 0;
+ char *str = NULL;
- if (str == NULL || n == NULL) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "argument invalid");
- errno = EINVAL;
- return -1;
- }
+ if (n >= GF_UNIT_PB) {
+ ret = gf_asprintf(&str, "%.1lfPB", ((double)n) / GF_UNIT_PB);
+ if (ret < 0)
+ goto err;
+ } else if (n >= GF_UNIT_TB) {
+ ret = gf_asprintf(&str, "%.1lfTB", ((double)n) / GF_UNIT_TB);
+ if (ret < 0)
+ goto err;
+ } else if (n >= GF_UNIT_GB) {
+ ret = gf_asprintf(&str, "%.1lfGB", ((double)n) / GF_UNIT_GB);
+ if (ret < 0)
+ goto err;
+ } else if (n >= GF_UNIT_MB) {
+ ret = gf_asprintf(&str, "%.1lfMB", ((double)n) / GF_UNIT_MB);
+ if (ret < 0)
+ goto err;
+ } else if (n >= GF_UNIT_KB) {
+ ret = gf_asprintf(&str, "%.1lfKB", ((double)n) / GF_UNIT_KB);
+ if (ret < 0)
+ goto err;
+ } else {
+ ret = gf_asprintf(&str, "%" PRIu64 "Bytes", n);
+ if (ret < 0)
+ goto err;
+ }
+ return str;
+err:
+ return NULL;
+}
- old_errno = errno;
- errno = 0;
- value = strtod (str, &tail);
+int
+gf_string2bytesize_range(const char *str, uint64_t *n, uint64_t umax)
+{
+ double value = 0.0;
+ int64_t int_value = 0;
+ uint64_t unit = 0;
+ int64_t max = 0;
+ char *tail = NULL;
+ int old_errno = 0;
+ const char *s = NULL;
+ gf_boolean_t fraction = _gf_false;
+
+ if (str == NULL || n == NULL) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "argument invalid");
+ errno = EINVAL;
+ return -1;
+ }
- if (errno == ERANGE || errno == EINVAL)
- return -1;
+ max = umax & 0x7fffffffffffffffLL;
- if (errno == 0)
- errno = old_errno;
+ for (s = str; *s != '\0'; s++) {
+ if (isspace(*s))
+ continue;
+ if (*s == '-')
+ return -1;
+ break;
+ }
- if (tail[0] != '\0')
- return -1;
+ if (strrchr(str, '.'))
+ fraction = _gf_true;
- *n = value;
+ old_errno = errno;
+ errno = 0;
+ if (fraction)
+ value = strtod(str, &tail);
+ else
+ int_value = strtoll(str, &tail, 10);
- return 0;
+ if (str == tail)
+ errno = EINVAL;
+
+ if (errno == ERANGE || errno == EINVAL)
+ return -1;
+
+ if (errno == 0)
+ errno = old_errno;
+
+ if (tail[0] != '\0') {
+ if (strcasecmp(tail, GF_UNIT_KB_STRING) == 0)
+ unit = GF_UNIT_KB;
+ else if (strcasecmp(tail, GF_UNIT_MB_STRING) == 0)
+ unit = GF_UNIT_MB;
+ else if (strcasecmp(tail, GF_UNIT_GB_STRING) == 0)
+ unit = GF_UNIT_GB;
+ else if (strcasecmp(tail, GF_UNIT_TB_STRING) == 0)
+ unit = GF_UNIT_TB;
+ else if (strcasecmp(tail, GF_UNIT_PB_STRING) == 0)
+ unit = GF_UNIT_PB;
+ else if (strcasecmp(tail, GF_UNIT_B_STRING) != 0)
+ return -1;
+
+ if (unit > 0) {
+ if (fraction)
+ value *= unit;
+ else
+ int_value *= unit;
+ }
+ }
+
+ if (fraction) {
+ if ((max - value) < 0) {
+ errno = ERANGE;
+ return -1;
+ }
+ *n = (uint64_t)value;
+ } else {
+ if ((max - int_value) < 0) {
+ errno = ERANGE;
+ return -1;
+ }
+ *n = int_value;
+ }
+
+ return 0;
}
-static int
-_gf_string2longlong (const char *str, long long *n, int base)
+int
+gf_string2bytesize_uint64(const char *str, uint64_t *n)
{
- long long value = 0;
- char *tail = NULL;
- int old_errno = 0;
+ return gf_string2bytesize_range(str, n, UINT64_MAX);
+}
- if (str == NULL || n == NULL) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "argument invalid");
- errno = EINVAL;
- return -1;
- }
+int
+gf_string2bytesize_int64(const char *str, int64_t *n)
+{
+ uint64_t u64 = 0;
+ int ret = 0;
- old_errno = errno;
- errno = 0;
- value = strtoll (str, &tail, base);
+ ret = gf_string2bytesize_range(str, &u64, INT64_MAX);
+ *n = (int64_t)u64;
+ return ret;
+}
- if (errno == ERANGE || errno == EINVAL)
- return -1;
+int
+gf_string2percent_or_bytesize(const char *str, double *n,
+ gf_boolean_t *is_percent)
+{
+ double value = 0ULL;
+ char *tail = NULL;
+ int old_errno = 0;
+ const char *s = NULL;
+
+ if (str == NULL || n == NULL) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "argument invalid");
+ errno = EINVAL;
+ return -1;
+ }
+
+ for (s = str; *s != '\0'; s++) {
+ if (isspace(*s))
+ continue;
+ if (*s == '-')
+ return -1;
+ break;
+ }
+
+ old_errno = errno;
+ errno = 0;
+ value = strtod(str, &tail);
+ if (str == tail)
+ errno = EINVAL;
+
+ if (errno == ERANGE || errno == EINVAL)
+ return -1;
- if (errno == 0)
- errno = old_errno;
+ if (errno == 0)
+ errno = old_errno;
+
+ /*Maximum accepted value for 64 bit OS will be (2^14 -1)PB*/
+ if (tail[0] != '\0') {
+ if (strcasecmp(tail, GF_UNIT_KB_STRING) == 0)
+ value *= GF_UNIT_KB;
+ else if (strcasecmp(tail, GF_UNIT_MB_STRING) == 0)
+ value *= GF_UNIT_MB;
+ else if (strcasecmp(tail, GF_UNIT_GB_STRING) == 0)
+ value *= GF_UNIT_GB;
+ else if (strcasecmp(tail, GF_UNIT_TB_STRING) == 0)
+ value *= GF_UNIT_TB;
+ else if (strcasecmp(tail, GF_UNIT_PB_STRING) == 0)
+ value *= GF_UNIT_PB;
+ else if (strcasecmp(tail, GF_UNIT_PERCENT_STRING) == 0)
+ *is_percent = _gf_true;
+ else
+ return -1;
+ }
- if (tail[0] != '\0')
- return -1;
+ /* Error out if we cannot store the value in uint64 */
+ if ((UINT64_MAX - value) < 0) {
+ errno = ERANGE;
+ return -1;
+ }
- *n = value;
+ *n = value;
- return 0;
+ return 0;
}
-static int
-_gf_string2ulonglong (const char *str, unsigned long long *n, int base)
+int64_t
+gf_str_to_long_long(const char *number)
{
- unsigned long long value = 0;
- char *tail = NULL;
- int old_errno = 0;
- const char *s = NULL;
+ int64_t unit = 1;
+ int64_t ret = 0;
+ char *endptr = NULL;
+ if (!number)
+ return 0;
- if (str == NULL || n == NULL) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "argument invalid");
- errno = EINVAL;
- return -1;
- }
+ ret = strtoll(number, &endptr, 0);
- for (s = str; *s != '\0'; s++) {
- if (isspace (*s))
- continue;
- if (*s == '-')
- return -1;
+ if (endptr) {
+ switch (*endptr) {
+ case 'G':
+ case 'g':
+ if ((*(endptr + 1) == 'B') || (*(endptr + 1) == 'b'))
+ unit = 1024 * 1024 * 1024;
+ break;
+ case 'M':
+ case 'm':
+ if ((*(endptr + 1) == 'B') || (*(endptr + 1) == 'b'))
+ unit = 1024 * 1024;
+ break;
+ case 'K':
+ case 'k':
+ if ((*(endptr + 1) == 'B') || (*(endptr + 1) == 'b'))
+ unit = 1024;
+ break;
+ case '%':
+ unit = 1;
+ break;
+ default:
+ unit = 1;
break;
}
+ }
+ return ret * unit;
+}
- old_errno = errno;
- errno = 0;
- value = strtoull (str, &tail, base);
+int
+gf_string2boolean(const char *str, gf_boolean_t *b)
+{
+ if (str == NULL) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "argument invalid");
+ return -1;
+ }
- if (errno == ERANGE || errno == EINVAL)
- return -1;
+ if ((strcasecmp(str, "1") == 0) || (strcasecmp(str, "on") == 0) ||
+ (strcasecmp(str, "yes") == 0) || (strcasecmp(str, "true") == 0) ||
+ (strcasecmp(str, "enable") == 0)) {
+ *b = _gf_true;
+ return 0;
+ }
- if (errno == 0)
- errno = old_errno;
+ if ((strcasecmp(str, "0") == 0) || (strcasecmp(str, "off") == 0) ||
+ (strcasecmp(str, "no") == 0) || (strcasecmp(str, "false") == 0) ||
+ (strcasecmp(str, "disable") == 0)) {
+ *b = _gf_false;
+ return 0;
+ }
- if (tail[0] != '\0')
- return -1;
+ return -1;
+}
- *n = value;
+int
+gf_strn2boolean(const char *str, const int len, gf_boolean_t *b)
+{
+ if (str == NULL) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "argument invalid");
+ return -1;
+ }
- return 0;
+ switch (len) {
+ case 1:
+ if (strcasecmp(str, "1") == 0) {
+ *b = _gf_true;
+ return 0;
+ } else if (strcasecmp(str, "0") == 0) {
+ *b = _gf_false;
+ return 0;
+ }
+ break;
+ case 2:
+ if (strcasecmp(str, "on") == 0) {
+ *b = _gf_true;
+ return 0;
+ } else if (strcasecmp(str, "no") == 0) {
+ *b = _gf_false;
+ return 0;
+ }
+ break;
+ case 3:
+ if (strcasecmp(str, "yes") == 0) {
+ *b = _gf_true;
+ return 0;
+ } else if (strcasecmp(str, "off") == 0) {
+ *b = _gf_false;
+ return 0;
+ }
+ break;
+ case 4:
+ if (strcasecmp(str, "true") == 0) {
+ *b = _gf_true;
+ return 0;
+ }
+ break;
+ case 5:
+ if (strcasecmp(str, "false") == 0) {
+ *b = _gf_false;
+ return 0;
+ }
+ break;
+ case 6:
+ if (strcasecmp(str, "enable") == 0) {
+ *b = _gf_true;
+ return 0;
+ }
+ break;
+ case 7:
+ if (strcasecmp(str, "disable") == 0) {
+ *b = _gf_false;
+ return 0;
+ }
+ break;
+ default:
+ return -1;
+ break;
+ }
+ return -1;
}
int
-gf_string2long (const char *str, long *n)
+gf_lockfd(int fd)
{
- return _gf_string2long (str, n, 0);
+ struct gf_flock fl;
+
+ fl.l_type = F_WRLCK;
+ fl.l_whence = SEEK_SET;
+ fl.l_start = 0;
+ fl.l_len = 0;
+
+ return fcntl(fd, F_SETLK, &fl);
}
int
-gf_string2ulong (const char *str, unsigned long *n)
+gf_unlockfd(int fd)
{
- return _gf_string2ulong (str, n, 0);
+ struct gf_flock fl;
+
+ fl.l_type = F_UNLCK;
+ fl.l_whence = SEEK_SET;
+ fl.l_start = 0;
+ fl.l_len = 0;
+
+ return fcntl(fd, F_SETLK, &fl);
}
+static void
+compute_checksum(char *buf, const ssize_t size, uint32_t *checksum)
+{
+ int ret = -1;
+ char *checksum_buf = NULL;
+
+ checksum_buf = (char *)(checksum);
+
+ if (!(*checksum)) {
+ checksum_buf[0] = 0xba;
+ checksum_buf[1] = 0xbe;
+ checksum_buf[2] = 0xb0;
+ checksum_buf[3] = 0x0b;
+ }
+
+ for (ret = 0; ret < (size - 4); ret += 4) {
+ checksum_buf[0] ^= (buf[ret]);
+ checksum_buf[1] ^= (buf[ret + 1] << 1);
+ checksum_buf[2] ^= (buf[ret + 2] << 2);
+ checksum_buf[3] ^= (buf[ret + 3] << 3);
+ }
+
+ for (ret = 0; ret <= (size % 4); ret++) {
+ checksum_buf[ret] ^= (buf[(size - 4) + ret] << ret);
+ }
+
+ return;
+}
+
+#define GF_CHECKSUM_BUF_SIZE 1024
+
int
-gf_string2int (const char *str, int *n)
+get_checksum_for_file(int fd, uint32_t *checksum, int op_version)
{
- long l = 0;
- int ret = 0;
+ int ret = -1;
+ char buf[GF_CHECKSUM_BUF_SIZE] = {
+ 0,
+ };
+
+ /* goto first place */
+ sys_lseek(fd, 0L, SEEK_SET);
+ do {
+ ret = sys_read(fd, &buf, GF_CHECKSUM_BUF_SIZE);
+ if (ret > 0) {
+ if (op_version < GD_OP_VERSION_5_4)
+ compute_checksum(buf, GF_CHECKSUM_BUF_SIZE, checksum);
+ else
+ compute_checksum(buf, ret, checksum);
+ }
+ } while (ret > 0);
- ret = _gf_string2long (str, &l, 0);
+ /* set it back */
+ sys_lseek(fd, 0L, SEEK_SET);
- *n = l;
- return ret;
+ return ret;
}
int
-gf_string2uint (const char *str, unsigned int *n)
+get_checksum_for_path(char *path, uint32_t *checksum, int op_version)
{
- return _gf_string2uint (str, n, 0);
+ int ret = -1;
+ int fd = -1;
+
+ GF_ASSERT(path);
+ GF_ASSERT(checksum);
+
+ fd = open(path, O_RDWR);
+
+ if (fd == -1) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, LG_MSG_PATH_OPEN_FAILED,
+ "path=%s", path, NULL);
+ goto out;
+ }
+
+ ret = get_checksum_for_file(fd, checksum, op_version);
+
+out:
+ if (fd != -1)
+ sys_close(fd);
+
+ return ret;
}
+/**
+ * get_file_mtime -- Given a path, get the mtime for the file
+ *
+ * @path: The filepath to check the mtime on
+ * @stamp: The parameter to set after we get the mtime
+ *
+ * @returns: success: 0
+ * errors : Errors returned by the stat () call
+ */
int
-gf_string2double (const char *str, double *n)
+get_file_mtime(const char *path, time_t *stamp)
{
- return _gf_string2double (str, n);
+ struct stat f_stat = {0};
+ int ret = -EINVAL;
+
+ GF_VALIDATE_OR_GOTO(THIS->name, path, out);
+ GF_VALIDATE_OR_GOTO(THIS->name, stamp, out);
+
+ ret = sys_stat(path, &f_stat);
+ if (ret < 0) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, LG_MSG_FILE_STAT_FAILED,
+ "path=%s", path, NULL);
+ goto out;
+ }
+
+ /* Set the mtime */
+ *stamp = f_stat.st_mtime;
+out:
+ return ret;
}
-int
-gf_string2longlong (const char *str, long long *n)
+/**
+ * gf_is_ip_in_net -- Checks if an IP Address is in a network.
+ * A network should be specified by something like
+ * '10.5.153.0/24' (in CIDR notation).
+ *
+ * @result : Sets to true if the IP is in the network
+ * @ip_str : The IP to check
+ * @network: The network to check the IP against.
+ *
+ * @return: success: _gf_true
+ * failure: -EINVAL for bad args, retval of inet_pton otherwise
+ */
+gf_boolean_t
+gf_is_ip_in_net(const char *network, const char *ip_str)
{
- return _gf_string2longlong (str, n, 0);
+ unsigned long ip_buf = 0;
+ unsigned long net_ip_buf = 0;
+ unsigned long subnet_mask = 0;
+ int ret = -EINVAL;
+ char *slash = NULL;
+ char *net_ip = NULL;
+ char *subnet = NULL;
+ char *net_str = NULL;
+ int family = AF_INET;
+ gf_boolean_t result = _gf_false;
+
+ GF_ASSERT(network);
+ GF_ASSERT(ip_str);
+
+ if (strchr(network, ':'))
+ family = AF_INET6;
+ else if (strchr(network, '.'))
+ family = AF_INET;
+ else {
+ goto out;
+ }
+
+ net_str = strdupa(network);
+ slash = strchr(net_str, '/');
+ if (!slash)
+ goto out;
+ *slash = '\0';
+
+ subnet = slash + 1;
+ net_ip = net_str;
+
+ /* Convert IP address to a long */
+ ret = inet_pton(family, ip_str, &ip_buf);
+ if (ret < 0)
+ gf_smsg("common-utils", GF_LOG_ERROR, errno, LG_MSG_INET_PTON_FAILED,
+ NULL);
+
+ /* Convert network IP address to a long */
+ ret = inet_pton(family, net_ip, &net_ip_buf);
+ if (ret < 0) {
+ gf_smsg("common-utils", GF_LOG_ERROR, errno, LG_MSG_INET_PTON_FAILED,
+ NULL);
+ goto out;
+ }
+
+ /* Converts /x into a mask */
+ subnet_mask = (1 << atoi(subnet)) - 1;
+
+ result = ((ip_buf & subnet_mask) == (net_ip_buf & subnet_mask));
+out:
+ return result;
}
-int
-gf_string2ulonglong (const char *str, unsigned long long *n)
+char *
+strtail(char *str, const char *pattern)
{
- return _gf_string2ulonglong (str, n, 0);
+ int i = 0;
+
+ for (i = 0; str[i] == pattern[i] && str[i]; i++)
+ ;
+
+ if (pattern[i] == '\0')
+ return str + i;
+
+ return NULL;
}
-int
-gf_string2int8 (const char *str, int8_t *n)
+void
+skipwhite(char **s)
{
- long l = 0L;
- int rv = 0;
+ while (isspace(**s))
+ (*s)++;
+}
- rv = _gf_string2long (str, &l, 0);
- if (rv != 0)
- return rv;
+void
+gf_strTrim(char **s)
+{
+ char *end = NULL;
- if (l >= INT8_MIN && l <= INT8_MAX) {
- *n = (int8_t) l;
- return 0;
- }
+ end = *s + strlen(*s) - 1;
+ while (end > *s && isspace((unsigned char)*end))
+ end--;
- errno = ERANGE;
- return -1;
+ *(end + 1) = '\0';
+
+ while (isspace(**s))
+ (*s)++;
+
+ return;
}
-int
-gf_string2int16 (const char *str, int16_t *n)
+char *
+nwstrtail(char *str, char *pattern)
{
- long l = 0L;
- int rv = 0;
+ for (;;) {
+ skipwhite(&str);
+ skipwhite(&pattern);
- rv = _gf_string2long (str, &l, 0);
- if (rv != 0)
- return rv;
+ if (*str != *pattern || !*str)
+ break;
- if (l >= INT16_MIN && l <= INT16_MAX) {
- *n = (int16_t) l;
- return 0;
- }
+ str++;
+ pattern++;
+ }
- errno = ERANGE;
- return -1;
+ return *pattern ? NULL : str;
}
-int
-gf_string2int32 (const char *str, int32_t *n)
+/**
+ * token_iter_init -- initialize tokenization
+ *
+ * @str: string to be tokenized
+ * @sep: token separator character
+ * @tit: pointer to iteration state
+ *
+ * @return: token string
+ *
+ * The returned token string and tit are
+ * not to be used directly, but through
+ * next_token().
+ */
+char *
+token_iter_init(char *str, char sep, token_iter_t *tit)
{
- long l = 0L;
- int rv = 0;
+ tit->end = str + strlen(str);
+ tit->sep = sep;
- rv = _gf_string2long (str, &l, 0);
- if (rv != 0)
- return rv;
+ return str;
+}
- if (l >= INT32_MIN && l <= INT32_MAX) {
- *n = (int32_t) l;
- return 0;
- }
+/**
+ * next_token -- fetch next token in tokenization
+ * inited by token_iter_init().
+ *
+ * @tokenp: pointer to token
+ * @tit: pointer to iteration state
+ *
+ * @return: true if iteration ends, else false
+ *
+ * The token pointed by @tokenp can be used
+ * after a call to next_token(). When next_token()
+ * returns true the iteration is to be stopped
+ * and the string with which the tokenization
+ * was inited (see token_iter_init() is restored,
+ * apart from dropped tokens (see drop_token()).
+ */
+gf_boolean_t
+next_token(char **tokenp, token_iter_t *tit)
+{
+ char *cursor = NULL;
+ gf_boolean_t is_last = _gf_false;
+
+ for (cursor = *tokenp; *cursor; cursor++)
+ ;
+ if (cursor < tit->end) {
+ /*
+ * We detect that in between current token and end a zero
+ * marker has already been inserted. This means that the
+ * token has already been returned. We restore the
+ * separator and move ahead.
+ */
+ *cursor = tit->sep;
+ *tokenp = cursor + 1;
+ }
+
+ for (cursor = *tokenp; *cursor && *cursor != tit->sep; cursor++)
+ ;
+ /* If the cursor ended up on a zero byte, then it's the last token. */
+ is_last = !*cursor;
+ /* Zero-terminate the token. */
+ *cursor = 0;
+
+ return is_last;
+}
- errno = ERANGE;
- return -1;
+/*
+ * drop_token -- drop a token during iterated calls of next_token().
+ *
+ * Sample program that uses these functions to tokenize
+ * a comma-separated first argument while dropping the
+ * rest of the arguments if they occur as token:
+ *
+ * #include <stdio.h>
+ * #include <stdlib.h>
+ * #include <string.h>
+ * #include "glusterfs/common-utils.h"
+ *
+ * int
+ * main (int argc, char **argv)
+ * {
+ * char *buf;
+ * char *token;
+ * token_iter_t tit;
+ * int i;
+ * gf_boolean_t iter_end;
+ *
+ * if (argc <= 1)
+ * abort();
+ *
+ * buf = strdup (argv[1]);
+ * if (!buf)
+ * abort();
+ *
+ * for (token = token_iter_init (buf, ',', &tit) ;;) {
+ * iter_end = next_token (&token, &tit);
+ * printf("found token: '%s'\n", token);
+ * for (i = 2; i < argc; i++) {
+ * if (strcmp (argv[i], token) == 0) {
+ * printf ("%s\n", "dropping token!");
+ * drop_token (token, &tit);
+ * break;
+ * }
+ * }
+ * if (iter_end)
+ * break;
+ * }
+ *
+ * printf ("finally: '%s'\n", buf);
+ *
+ * return 0;
+ * }
+ */
+void
+drop_token(char *token, token_iter_t *tit)
+{
+ char *cursor = NULL;
+
+ for (cursor = token; *cursor; cursor++)
+ ;
+ if (cursor < tit->end) {
+ /*
+ * We detect a zero inserted by next_token().
+ * Step the cursor and copy what comes after
+ * to token.
+ */
+ for (cursor++; cursor < tit->end; *token++ = *cursor++)
+ ;
+ }
+
+ /*
+ * Zero out the remainder of the buffer.
+ * It would be enough to insert just a single zero,
+ * but we continue 'till the end to have cleaner
+ * memory content.
+ */
+ for (cursor = token; cursor < tit->end; *cursor++ = 0)
+ ;
+
+ /* Adjust the end to point to the new terminating zero. */
+ tit->end = token;
}
-int
-gf_string2int64 (const char *str, int64_t *n)
+/* Syntax formed according to RFC 1912 (RFC 1123 & 952 are more restrictive) *
+ <hname> ::= <gen-name>*["."<gen-name>] *
+ <gen-name> ::= <let-or-digit> <[*[<let-or-digit-or-hyphen>]<let-or-digit>] */
+char
+valid_host_name(char *address, int length)
{
- long long l = 0LL;
- int rv = 0;
+ int i = 0;
+ int str_len = 0;
+ char ret = 1;
+ char *dup_addr = NULL;
+ char *temp_str = NULL;
+ char *save_ptr = NULL;
+
+ if ((length > _POSIX_HOST_NAME_MAX) || (length < 1)) {
+ ret = 0;
+ goto out;
+ }
+
+ dup_addr = gf_strdup(address);
+ if (!dup_addr) {
+ ret = 0;
+ goto out;
+ }
+
+ if (!isalnum(dup_addr[length - 1]) && (dup_addr[length - 1] != '*')) {
+ ret = 0;
+ goto out;
+ }
+
+ /* Check for consecutive dots, which is invalid in a hostname and is
+ * ignored by strtok()
+ */
+ if (strstr(dup_addr, "..")) {
+ ret = 0;
+ goto out;
+ }
+
+ /* gen-name */
+ temp_str = strtok_r(dup_addr, ".", &save_ptr);
+ do {
+ str_len = strlen(temp_str);
+
+ if (!isalnum(temp_str[0]) || !isalnum(temp_str[str_len - 1])) {
+ ret = 0;
+ goto out;
+ }
+ for (i = 1; i < str_len; i++) {
+ if (!isalnum(temp_str[i]) && (temp_str[i] != '-')) {
+ ret = 0;
+ goto out;
+ }
+ }
+ } while ((temp_str = strtok_r(NULL, ".", &save_ptr)));
- rv = _gf_string2longlong (str, &l, 0);
- if (rv != 0)
- return rv;
+out:
+ GF_FREE(dup_addr);
+ return ret;
+}
- if (l >= INT64_MIN && l <= INT64_MAX) {
- *n = (int64_t) l;
- return 0;
+/* Matches all ipv4 address, if wildcard_acc is true '*' wildcard pattern for*
+ subnets is considered as valid strings as well */
+char
+valid_ipv4_address(char *address, int length, gf_boolean_t wildcard_acc)
+{
+ int octets = 0;
+ int value = 0;
+ char *tmp = NULL, *ptr = NULL, *prev = NULL, *endptr = NULL;
+ char ret = 1;
+ int is_wildcard = 0;
+
+ tmp = gf_strdup(address);
+
+ /*
+ * To prevent cases where last character is '.' and which have
+ * consecutive dots like ".." as strtok ignore consecutive
+ * delimiters.
+ */
+ if (length <= 0 || (strstr(address, "..")) ||
+ (!isdigit(tmp[length - 1]) && (tmp[length - 1] != '*'))) {
+ ret = 0;
+ goto out;
+ }
+
+ prev = strtok_r(tmp, ".", &ptr);
+
+ while (prev != NULL) {
+ octets++;
+ if (wildcard_acc && !strcmp(prev, "*")) {
+ is_wildcard = 1;
+ } else {
+ value = strtol(prev, &endptr, 10);
+ if ((value > 255) || (value < 0) ||
+ (endptr != NULL && *endptr != '\0')) {
+ ret = 0;
+ goto out;
+ }
}
+ prev = strtok_r(NULL, ".", &ptr);
+ }
- errno = ERANGE;
- return -1;
+ if ((octets > 4) || (octets < 4 && !is_wildcard)) {
+ ret = 0;
+ }
+
+out:
+ GF_FREE(tmp);
+ return ret;
}
-int
-gf_string2uint8 (const char *str, uint8_t *n)
+char
+valid_cidr_address(char *cidr_address, gf_boolean_t wildcard_acc)
{
- unsigned long l = 0L;
- int rv = 0;
+ unsigned int net_mask = 0, len = 0;
+ char *temp = NULL, *cidr_str = NULL, ret = 1;
- rv = _gf_string2ulong (str, &l, 0);
- if (rv != 0)
- return rv;
+ cidr_str = strdupa(cidr_address);
+ temp = strstr(cidr_str, "/");
+ if (temp == NULL)
+ return 0; /* Since Invalid cidr ip address we return 0 */
- if (l >= 0 && l <= UINT8_MAX) {
- *n = (uint8_t) l;
- return 0;
- }
+ *temp = '\0';
+ temp++;
+ net_mask = (unsigned int)atoi(temp);
- errno = ERANGE;
- return -1;
+ if (net_mask > 32 || net_mask < 1)
+ return 0; /* Since Invalid cidr ip address we return 0*/
+
+ len = strlen(cidr_str);
+
+ ret = valid_ipv4_address(cidr_str, len, wildcard_acc);
+
+ return ret;
}
-int
-gf_string2uint16 (const char *str, uint16_t *n)
+/**
+ * valid_ipv4_subnetwork() takes the pattern and checks if it contains
+ * a valid ipv4 subnetwork pattern i.e. xx.xx.xx.xx/n. IPv4 address
+ * part (xx.xx.xx.xx) and mask bits length part (n). The mask bits length
+ * must be in 0-32 range (ipv4 addr is 32 bit). The pattern must be
+ * in this format.
+ *
+ * Returns _gf_true if both IP addr and mask bits len are valid
+ * _gf_false otherwise.
+ */
+gf_boolean_t
+valid_ipv4_subnetwork(const char *address)
{
- unsigned long l = 0L;
- int rv = 0;
+ char *slash = NULL;
+ char *paddr = NULL;
+ char *endptr = NULL;
+ long prefixlen = -1;
+ gf_boolean_t retv = _gf_true;
+
+ if (address == NULL) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "argument invalid");
+ return _gf_false;
+ }
+
+ paddr = gf_strdup(address);
+ if (paddr == NULL) /* ENOMEM */
+ return _gf_false;
+
+ /*
+ * INVALID: If '/' is not present OR
+ * Nothing specified after '/'
+ */
+ slash = strchr(paddr, '/');
+ if ((slash == NULL) || (slash[1] == '\0')) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0,
+ LG_MSG_INVALID_IPV4_FORMAT,
+ "Invalid IPv4 "
+ "subnetwork format");
+ retv = _gf_false;
+ goto out;
+ }
+
+ *slash = '\0';
+ retv = valid_ipv4_address(paddr, strlen(paddr), _gf_false);
+ if (retv == _gf_false) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0,
+ LG_MSG_INVALID_IPV4_FORMAT,
+ "Invalid IPv4 subnetwork address");
+ goto out;
+ }
+ /*
+ * Reset errno before checking it
+ */
+ errno = 0;
+ prefixlen = strtol(slash + 1, &endptr, 10);
+ if ((errno != 0) || (*endptr != '\0') || (prefixlen < 0) ||
+ (prefixlen > IPv4_ADDR_SIZE)) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0,
+ LG_MSG_INVALID_IPV4_FORMAT,
+ "Invalid IPv4 subnetwork mask");
+ retv = _gf_false;
+ goto out;
+ }
+
+ retv = _gf_true;
+out:
+ GF_FREE(paddr);
+ return retv;
+}
- rv = _gf_string2ulong (str, &l, 0);
- if (rv != 0)
- return rv;
+char
+valid_ipv6_address(char *address, int length, gf_boolean_t wildcard_acc)
+{
+ int hex_numbers = 0;
+ int value = 0;
+ int i = 0;
+ char *tmp = NULL, *ptr = NULL, *prev = NULL, *endptr = NULL;
+ char ret = 1;
+ int is_wildcard = 0;
+ int is_compressed = 0;
+
+ tmp = gf_strdup(address);
+
+ /* Check for '%' for link local addresses */
+ endptr = strchr(tmp, '%');
+ if (endptr) {
+ *endptr = '\0';
+ length = strlen(tmp);
+ endptr = NULL;
+ }
+
+ /* Check for compressed form */
+ if (length <= 0 || tmp[length - 1] == ':') {
+ ret = 0;
+ goto out;
+ }
+ for (i = 0; i < (length - 1); i++) {
+ if (tmp[i] == ':' && tmp[i + 1] == ':') {
+ if (is_compressed == 0)
+ is_compressed = 1;
+ else {
+ ret = 0;
+ goto out;
+ }
+ }
+ }
- if (l >= 0 && l <= UINT16_MAX) {
- *n = (uint16_t) l;
- return 0;
+ prev = strtok_r(tmp, ":", &ptr);
+
+ while (prev != NULL) {
+ hex_numbers++;
+ if (wildcard_acc && !strcmp(prev, "*")) {
+ is_wildcard = 1;
+ } else {
+ value = strtol(prev, &endptr, 16);
+ if ((value > 0xffff) || (value < 0) ||
+ (endptr != NULL && *endptr != '\0')) {
+ ret = 0;
+ goto out;
+ }
}
+ prev = strtok_r(NULL, ":", &ptr);
+ }
- errno = ERANGE;
- return -1;
+ if ((hex_numbers > 8) ||
+ (hex_numbers < 8 && !is_wildcard && !is_compressed)) {
+ ret = 0;
+ }
+
+out:
+ GF_FREE(tmp);
+ return ret;
}
-int
-gf_string2uint32 (const char *str, uint32_t *n)
+char
+valid_internet_address(char *address, gf_boolean_t wildcard_acc,
+ gf_boolean_t cidr)
{
- unsigned long l = 0L;
- int rv = 0;
+ char ret = 0;
+ int length = 0;
- rv = _gf_string2ulong (str, &l, 0);
- if (rv != 0)
- return rv;
+ if (address == NULL) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "argument invalid");
+ goto out;
+ }
- if (l >= 0 && l <= UINT32_MAX) {
- *n = (uint32_t) l;
- return 0;
+ length = strlen(address);
+ if (length == 0)
+ goto out;
+
+ if (cidr && valid_cidr_address(address, wildcard_acc)) {
+ ret = 1;
+ }
+
+ if (valid_ipv4_address(address, length, wildcard_acc) ||
+ valid_ipv6_address(address, length, wildcard_acc) ||
+ valid_host_name(address, length))
+ ret = 1;
+
+out:
+ return ret;
+}
+
+/**
+ * valid_mount_auth_address - Validate the rpc-auth.addr.allow/reject pattern
+ *
+ * @param address - Pattern to be validated
+ *
+ * @return _gf_true if "address" is "*" (anonymous) 'OR'
+ * if "address" is valid FQDN or valid IPv4/6 address 'OR'
+ * if "address" contains wildcard chars e.g. "'*' or '?' or
+ * '['" if "address" is valid ipv4 subnet pattern (xx.xx.xx.xx/n) _gf_false
+ * otherwise
+ *
+ *
+ * NB: If the user/admin set for wildcard pattern, then it does not have
+ * to be validated. Make it similar to the way exportfs (kNFS) works.
+ */
+gf_boolean_t
+valid_mount_auth_address(char *address)
+{
+ int length = 0;
+ char *cp = NULL;
+
+ /* 1. Check for "NULL and empty string */
+ if ((address == NULL) || (address[0] == '\0')) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "argument invalid");
+ return _gf_false;
+ }
+
+ /* 2. Check for Anonymous */
+ if (strcmp(address, "*") == 0)
+ return _gf_true;
+
+ for (cp = address; *cp; cp++) {
+ /* 3. Check for wildcard pattern */
+ if (*cp == '*' || *cp == '?' || *cp == '[') {
+ return _gf_true;
}
- errno = ERANGE;
- return -1;
+ /*
+ * 4. check for IPv4 subnetwork i.e. xx.xx.xx.xx/n
+ * TODO: check for IPv6 subnetwork
+ * NB: Wildcard must not be mixed with subnetwork.
+ */
+ if (*cp == '/') {
+ return valid_ipv4_subnetwork(address);
+ }
+ }
+
+ /* 5. Check for v4/v6 IP addr and FQDN/hostname */
+ length = strlen(address);
+ if ((valid_ipv4_address(address, length, _gf_false)) ||
+ (valid_ipv6_address(address, length, _gf_false)) ||
+ (valid_host_name(address, length))) {
+ return _gf_true;
+ }
+
+ return _gf_false;
}
-int
-gf_string2uint64 (const char *str, uint64_t *n)
+/**
+ * gf_sock_union_equal_addr - check if two given gf_sock_unions have same addr
+ *
+ * @param a - first sock union
+ * @param b - second sock union
+ * @return _gf_true if a and b have same ipv{4,6} addr, _gf_false otherwise
+ */
+gf_boolean_t
+gf_sock_union_equal_addr(union gf_sock_union *a, union gf_sock_union *b)
{
- unsigned long long l = 0ULL;
- int rv = 0;
+ if (!a || !b) {
+ gf_smsg("common-utils", GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY,
+ "gf_sock_union_equal_addr", NULL);
+ return _gf_false;
+ }
+
+ if (a->storage.ss_family != b->storage.ss_family)
+ return _gf_false;
+
+ switch (a->storage.ss_family) {
+ case AF_INET:
+ if (a->sin.sin_addr.s_addr == b->sin.sin_addr.s_addr)
+ return _gf_true;
+ else
+ return _gf_false;
+
+ case AF_INET6:
+ if (memcmp((void *)(&a->sin6.sin6_addr),
+ (void *)(&b->sin6.sin6_addr), sizeof(a->sin6.sin6_addr)))
+ return _gf_false;
+ else
+ return _gf_true;
+
+ default:
+ gf_msg_debug("common-utils", 0,
+ "Unsupported/invalid address "
+ "family");
+ break;
+ }
+
+ return _gf_false;
+}
- rv = _gf_string2ulonglong (str, &l, 0);
- if (rv != 0)
- return rv;
+/*
+ * Check if both have same network address.
+ * Extract the network address from the sockaddr(s) addr by applying the
+ * network mask. If they match, return boolean _gf_true, _gf_false otherwise.
+ *
+ * (x == y) <=> (x ^ y == 0)
+ * (x & y) ^ (x & z) <=> x & (y ^ z)
+ *
+ * ((ip1 & mask) == (ip2 & mask)) <=> ((mask & (ip1 ^ ip2)) == 0)
+ */
+gf_boolean_t
+mask_match(const uint32_t a, const uint32_t b, const uint32_t m)
+{
+ return (((a ^ b) & m) == 0);
+}
- if (l >= 0 && l <= UINT64_MAX) {
- *n = (uint64_t) l;
- return 0;
- }
+/*Thread safe conversion function*/
+char *
+uuid_utoa(uuid_t uuid)
+{
+ char *uuid_buffer = glusterfs_uuid_buf_get();
+ gf_uuid_unparse(uuid, uuid_buffer);
+ return uuid_buffer;
+}
- errno = ERANGE;
- return -1;
+/*Re-entrant conversion function*/
+char *
+uuid_utoa_r(uuid_t uuid, char *dst)
+{
+ if (!dst)
+ return NULL;
+ gf_uuid_unparse(uuid, dst);
+ return dst;
}
-int
-gf_string2ulong_base10 (const char *str, unsigned long *n)
+/*Thread safe conversion function*/
+char *
+lkowner_utoa(gf_lkowner_t *lkowner)
{
- return _gf_string2ulong (str, n, 10);
+ char *lkowner_buffer = glusterfs_lkowner_buf_get();
+ lkowner_unparse(lkowner, lkowner_buffer, GF_LKOWNER_BUF_SIZE);
+ return lkowner_buffer;
}
-int
-gf_string2uint_base10 (const char *str, unsigned int *n)
+/*Re-entrant conversion function*/
+char *
+lkowner_utoa_r(gf_lkowner_t *lkowner, char *dst, int len)
{
- return _gf_string2uint (str, n, 10);
+ if (!dst)
+ return NULL;
+ lkowner_unparse(lkowner, dst, len);
+ return dst;
}
-int
-gf_string2uint8_base10 (const char *str, uint8_t *n)
+gf_boolean_t
+is_valid_lease_id(const char *lease_id)
{
- unsigned long l = 0L;
- int rv = 0;
+ int i = 0;
+ gf_boolean_t valid = _gf_false;
- rv = _gf_string2ulong (str, &l, 10);
- if (rv != 0)
- return rv;
+ for (i = 0; i < LEASE_ID_SIZE; i++) {
+ if (lease_id[i] != 0) {
+ valid = _gf_true;
+ goto out;
+ }
+ }
+out:
+ return valid;
+}
- if (l >= 0 && l <= UINT8_MAX) {
- *n = (uint8_t) l;
- return 0;
+/* Lease_id can be a either in printable or non printable binary
+ * format. This function can be used to print any lease_id.
+ *
+ * This function returns a pointer to a buf, containing the ascii
+ * representation of the value in lease_id, in the following format:
+ * 4hexnum-4hexnum-4hexnum-4hexnum-4hexnum-4hexnum-4hexnum-4hexnum
+ *
+ * Eg: If lease_id = "lid1-clnt1" the printable string would be:
+ * 6c69-6431-2d63-6c6e-7431-0000-0000-0000
+ *
+ * Note: The pointer returned should not be stored for further use, as any
+ * subsequent call to this function will override the same buffer.
+ */
+char *
+leaseid_utoa(const char *lease_id)
+{
+ char *buf = NULL;
+ int i = 0;
+ int j = 0;
+
+ buf = glusterfs_leaseid_buf_get();
+ if (!buf)
+ goto out;
+
+ for (i = 0; i < LEASE_ID_SIZE; i++) {
+ if (i && !(i % 2)) {
+ buf[j] = '-';
+ j++;
}
+ sprintf(&buf[j], "%02hhx", lease_id[i]);
+ j += 2;
+ if (j == GF_LEASE_ID_BUF_SIZE)
+ break;
+ }
+ buf[GF_LEASE_ID_BUF_SIZE - 1] = '\0';
+out:
+ return buf;
+}
- errno = ERANGE;
- return -1;
+char *
+gf_leaseid_get()
+{
+ return glusterfs_leaseid_buf_get();
+}
+
+char *
+gf_existing_leaseid()
+{
+ return glusterfs_leaseid_exist();
+}
+
+void *
+gf_array_elem(void *a, int index, size_t elem_size)
+{
+ uint8_t *ptr = a;
+ return (void *)(ptr + index * elem_size);
+}
+
+void
+gf_elem_swap(void *x, void *y, size_t l)
+{
+ uint8_t *a = x, *b = y, c;
+ while (l--) {
+ c = *a;
+ *a++ = *b;
+ *b++ = c;
+ }
+}
+
+void
+gf_array_insertionsort(void *A, int l, int r, size_t elem_size, gf_cmp cmp)
+{
+ int i = l;
+ int N = r + 1;
+ void *Temp = NULL;
+ int j = 0;
+
+ for (i = l; i < N; i++) {
+ Temp = gf_array_elem(A, i, elem_size);
+ j = i - 1;
+ while (j >= 0 && (cmp(Temp, gf_array_elem(A, j, elem_size)) < 0)) {
+ gf_elem_swap(Temp, gf_array_elem(A, j, elem_size), elem_size);
+ Temp = gf_array_elem(A, j, elem_size);
+ j = j - 1;
+ }
+ }
}
int
-gf_string2uint16_base10 (const char *str, uint16_t *n)
+gf_is_str_int(const char *value)
{
- unsigned long l = 0L;
- int rv = 0;
+ int flag = 0;
+ char *str = NULL;
+ char *fptr = NULL;
- rv = _gf_string2ulong (str, &l, 10);
- if (rv != 0)
- return rv;
+ GF_VALIDATE_OR_GOTO(THIS->name, value, out);
- if (l >= 0 && l <= UINT16_MAX) {
- *n = (uint16_t) l;
- return 0;
+ str = gf_strdup(value);
+ if (!str)
+ goto out;
+
+ fptr = str;
+
+ while (*str) {
+ if (!isdigit(*str)) {
+ flag = 1;
+ goto out;
}
+ str++;
+ }
- errno = ERANGE;
- return -1;
+out:
+ GF_FREE(fptr);
+
+ return flag;
+}
+/*
+ * rounds up nr to power of two. If nr is already a power of two, just returns
+ * nr
+ */
+
+int32_t
+gf_roundup_power_of_two(int32_t nr)
+{
+ int32_t result = 1;
+
+ if (nr < 0) {
+ gf_smsg("common-utils", GF_LOG_WARNING, 0, LG_MSG_NEGATIVE_NUM_PASSED,
+ NULL);
+ result = -1;
+ goto out;
+ }
+
+ while (result < nr)
+ result *= 2;
+
+out:
+ return result;
+}
+
+/*
+ * rounds up nr to next power of two. If nr is already a power of two, next
+ * power of two is returned.
+ */
+
+int32_t
+gf_roundup_next_power_of_two(int32_t nr)
+{
+ int32_t result = 1;
+
+ if (nr < 0) {
+ gf_smsg("common-utils", GF_LOG_WARNING, 0, LG_MSG_NEGATIVE_NUM_PASSED,
+ NULL);
+ result = -1;
+ goto out;
+ }
+
+ while (result <= nr)
+ result *= 2;
+
+out:
+ return result;
}
int
-gf_string2uint32_base10 (const char *str, uint32_t *n)
+validate_brick_name(char *brick)
{
- unsigned long l = 0L;
- int rv = 0;
+ char *delimiter = NULL;
+ int ret = 0;
+ delimiter = strrchr(brick, ':');
+ if (!delimiter || delimiter == brick || *(delimiter + 1) != '/')
+ ret = -1;
- rv = _gf_string2ulong (str, &l, 10);
- if (rv != 0)
- return rv;
+ return ret;
+}
- if (l >= 0 && l <= UINT32_MAX) {
- *n = (uint32_t) l;
- return 0;
- }
+char *
+get_host_name(char *word, char **host)
+{
+ char *delimiter = NULL;
+ delimiter = strrchr(word, ':');
+ if (delimiter)
+ *delimiter = '\0';
+ else
+ return NULL;
+ *host = word;
+ return *host;
+}
- errno = ERANGE;
+char *
+get_path_name(char *word, char **path)
+{
+ char *delimiter = NULL;
+ delimiter = strchr(word, '/');
+ if (!delimiter)
+ return NULL;
+ *path = delimiter;
+ return *path;
+}
+
+void
+gf_path_strip_trailing_slashes(char *path)
+{
+ int i = 0;
+ int len = 0;
+
+ if (!path)
+ return;
+
+ len = strlen(path);
+ for (i = len - 1; i > 0; i--) {
+ if (path[i] != '/')
+ break;
+ }
+
+ if (i < (len - 1))
+ path[i + 1] = '\0';
+
+ return;
+}
+
+uint64_t
+get_mem_size()
+{
+ uint64_t memsize = -1;
+
+#if defined GF_LINUX_HOST_OS || defined GF_SOLARIS_HOST_OS
+
+ uint64_t page_size = 0;
+ uint64_t num_pages = 0;
+
+ page_size = sysconf(_SC_PAGESIZE);
+ num_pages = sysconf(_SC_PHYS_PAGES);
+
+ memsize = page_size * num_pages;
+#endif
+
+#if defined GF_DARWIN_HOST_OS || defined __FreeBSD__
+
+ size_t len = sizeof(memsize);
+ int name[] = {CTL_HW, HW_PHYSMEM};
+
+ sysctl(name, 2, &memsize, &len, NULL, 0);
+#endif
+
+#if defined __NetBSD__
+
+ size_t len = sizeof(memsize);
+ int name64[] = {CTL_HW, HW_PHYSMEM64};
+
+ sysctl(name64, 2, &memsize, &len, NULL, 0);
+ if (memsize == -1)
+ sysctl(name64, 2, &memsize, &len, NULL, 0);
+#endif
+ return memsize;
+}
+
+/* Strips all whitespace characters in a string and returns length of new string
+ * on success
+ */
+int
+gf_strip_whitespace(char *str, int len)
+{
+ int i = 0;
+ int new_len = 0;
+ char *new_str = NULL;
+
+ GF_ASSERT(str);
+
+ new_str = GF_MALLOC(len + 1, gf_common_mt_char);
+ if (new_str == NULL)
return -1;
+
+ for (i = 0; i < len; i++) {
+ if (!isspace(str[i]))
+ new_str[new_len++] = str[i];
+ }
+ new_str[new_len] = '\0';
+
+ if (new_len != len) {
+ snprintf(str, new_len + 1, "%s", new_str);
+ }
+
+ GF_FREE(new_str);
+ return new_len;
}
int
-gf_string2uint64_base10 (const char *str, uint64_t *n)
+gf_canonicalize_path(char *path)
{
- unsigned long long l = 0ULL;
- int rv = 0;
+ int ret = -1;
+ int path_len = 0;
+ int dir_path_len = 0;
+ char *tmppath = NULL;
+ char *dir = NULL;
+ char *tmpstr = NULL;
- rv = _gf_string2ulonglong (str, &l, 10);
- if (rv != 0)
- return rv;
+ if (!path || *path != '/')
+ goto out;
- if (l >= 0 && l <= UINT64_MAX) {
- *n = (uint64_t) l;
- return 0;
+ if (!strcmp(path, "/"))
+ return 0;
+
+ tmppath = gf_strdup(path);
+ if (!tmppath)
+ goto out;
+
+ /* Strip the extra slashes and return */
+ bzero(path, strlen(path));
+ path[0] = '/';
+ dir = strtok_r(tmppath, "/", &tmpstr);
+
+ while (dir) {
+ dir_path_len = strlen(dir);
+ memcpy((path + path_len + 1), dir, dir_path_len);
+ path_len += dir_path_len + 1;
+ dir = strtok_r(NULL, "/", &tmpstr);
+ if (dir) {
+ path[path_len] = '/';
}
+ }
+ path[path_len] = '\0';
+ ret = 0;
- errno = ERANGE;
- return -1;
+out:
+ if (ret)
+ gf_smsg("common-utils", GF_LOG_ERROR, 0, LG_MSG_PATH_ERROR, NULL);
+
+ GF_FREE(tmppath);
+
+ return ret;
+}
+
+static const char *__gf_timefmts[] = {
+ "%F %T", "%Y/%m/%d-%T", "%b %d %T", "%F %H%M%S", "%Y-%m-%d-%T", "%s",
+};
+
+static const char *__gf_zerotimes[] = {
+ "0000-00-00 00:00:00", "0000/00/00-00:00:00", "xxx 00 00:00:00",
+ "0000-00-00 000000", "0000-00-00-00:00:00", "0",
+};
+
+void
+_gf_timestuff(const char ***fmts, const char ***zeros)
+{
+ *fmts = __gf_timefmts;
+ *zeros = __gf_zerotimes;
}
char *
-gf_uint64_2human_readable (uint64_t n)
-{
- int ret = 0;
- char *str = NULL;
-
- if (n >= GF_UNIT_PB) {
- ret = gf_asprintf (&str, "%.1lfPB", ((double) n)/GF_UNIT_PB);
- if (ret < 0)
- goto err;
- } else if (n >= GF_UNIT_TB) {
- ret = gf_asprintf (&str, "%.1lfTB", ((double) n)/GF_UNIT_TB);
- if (ret < 0)
- goto err;
- } else if (n >= GF_UNIT_GB) {
- ret = gf_asprintf (&str, "%.1lfGB", ((double) n)/GF_UNIT_GB);
- if (ret < 0)
- goto err;
- } else if (n >= GF_UNIT_MB) {
- ret = gf_asprintf (&str, "%.1lfMB", ((double) n)/GF_UNIT_MB);
- if (ret < 0)
- goto err;
- } else if (n >= GF_UNIT_KB) {
- ret = gf_asprintf (&str, "%.1lfKB", ((double) n)/GF_UNIT_KB);
- if (ret < 0)
- goto err;
- } else {
- ret = gf_asprintf (&str, "%luBytes", n);
- if (ret < 0)
- goto err;
- }
- return str;
-err:
- return NULL;
+generate_glusterfs_ctx_id(void)
+{
+ uuid_t ctxid;
+ char *tmp = NULL;
+
+ gf_uuid_generate(ctxid);
+ tmp = uuid_utoa(ctxid);
+
+ return gf_strdup(tmp);
+}
+
+char *
+gf_get_reserved_ports()
+{
+ char *ports_info = NULL;
+#if defined GF_LINUX_HOST_OS
+ int proc_fd = -1;
+ char *proc_file = "/proc/sys/net/ipv4/ip_local_reserved_ports";
+ char buffer[4096] = {
+ 0,
+ };
+ int32_t ret = -1;
+
+ proc_fd = open(proc_file, O_RDONLY);
+ if (proc_fd == -1) {
+ /* What should be done in this case? error out from here
+ * and thus stop the glusterfs process from starting or
+ * continue with older method of using any of the available
+ * port? For now 2nd option is considered.
+ */
+ gf_smsg("glusterfs", GF_LOG_WARNING, errno, LG_MSG_FILE_OP_FAILED,
+ " /proc/sys/net/ipv4/ip_local_reserved_ports", NULL);
+ goto out;
+ }
+
+ ret = sys_read(proc_fd, buffer, sizeof(buffer) - 1);
+ if (ret < 0) {
+ gf_smsg("glusterfs", GF_LOG_WARNING, errno, LG_MSG_FILE_OP_FAILED,
+ "file=%s", proc_file, NULL);
+ goto out;
+ }
+
+ buffer[ret] = '\0';
+ ports_info = gf_strdup(buffer);
+
+out:
+ if (proc_fd != -1)
+ sys_close(proc_fd);
+#endif /* GF_LINUX_HOST_OS */
+ return ports_info;
}
int
-gf_string2bytesize (const char *str, uint64_t *n)
+gf_process_reserved_ports(unsigned char *ports, uint32_t ceiling)
{
- uint64_t value = 0ULL;
- char *tail = NULL;
- int old_errno = 0;
- const char *s = NULL;
+ int ret = -1;
- if (str == NULL || n == NULL) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "argument invalid");
- errno = EINVAL;
- return -1;
- }
+ memset(ports, 0, GF_PORT_ARRAY_SIZE);
- for (s = str; *s != '\0'; s++) {
- if (isspace (*s))
- continue;
- if (*s == '-')
- return -1;
- break;
- }
+#if defined GF_LINUX_HOST_OS
+ char *ports_info = NULL;
+ char *tmp = NULL;
+ char *blocked_port = NULL;
- old_errno = errno;
- errno = 0;
- value = strtoull (str, &tail, 10);
+ ports_info = gf_get_reserved_ports();
+ if (!ports_info) {
+ gf_smsg("glusterfs", GF_LOG_WARNING, 0, LG_MSG_RESERVED_PORTS_ERROR,
+ NULL);
+ goto out;
+ }
- if (errno == ERANGE || errno == EINVAL)
- return -1;
+ blocked_port = strtok_r(ports_info, ",\n", &tmp);
- if (errno == 0)
- errno = old_errno;
+ while (blocked_port) {
+ gf_ports_reserved(blocked_port, ports, ceiling);
+ blocked_port = strtok_r(NULL, ",\n", &tmp);
+ }
- if (tail[0] != '\0')
- {
- if (strcasecmp (tail, GF_UNIT_KB_STRING) == 0)
- value *= GF_UNIT_KB;
- else if (strcasecmp (tail, GF_UNIT_MB_STRING) == 0)
- value *= GF_UNIT_MB;
- else if (strcasecmp (tail, GF_UNIT_GB_STRING) == 0)
- value *= GF_UNIT_GB;
- else if (strcasecmp (tail, GF_UNIT_TB_STRING) == 0)
- value *= GF_UNIT_TB;
- else if (strcasecmp (tail, GF_UNIT_PB_STRING) == 0)
- value *= GF_UNIT_PB;
- else
- return -1;
- }
+ ret = 0;
- *n = value;
+out:
+ GF_FREE(ports_info);
- return 0;
+#else /* FIXME: Non Linux Host */
+ ret = 0;
+#endif /* GF_LINUX_HOST_OS */
+
+ return ret;
+}
+
+gf_boolean_t
+gf_ports_reserved(char *blocked_port, unsigned char *ports, uint32_t ceiling)
+{
+ gf_boolean_t result = _gf_false;
+ char *range_port = NULL;
+ int32_t tmp_port1 = -1;
+ int32_t tmp_port2 = -1;
+
+ if (strstr(blocked_port, "-") == NULL) {
+ /* get rid of the new line character*/
+ if (blocked_port[strlen(blocked_port) - 1] == '\n')
+ blocked_port[strlen(blocked_port) - 1] = '\0';
+ if (gf_string2int32(blocked_port, &tmp_port1) == 0) {
+ if (tmp_port1 > GF_PORT_MAX || tmp_port1 < 0) {
+ gf_smsg("glusterfs-socket", GF_LOG_WARNING, 0,
+ LG_MSG_INVALID_PORT, "port=%d", tmp_port1, NULL);
+ result = _gf_true;
+ goto out;
+ } else {
+ gf_msg_debug("glusterfs", 0,
+ "blocking port "
+ "%d",
+ tmp_port1);
+ BIT_SET(ports, tmp_port1);
+ }
+ } else {
+ gf_smsg("glusterfs-socket", GF_LOG_WARNING, 0, LG_MSG_INVALID_PORT,
+ "port=%s", blocked_port, NULL);
+ result = _gf_true;
+ goto out;
+ }
+ } else {
+ range_port = strtok(blocked_port, "-");
+ if (!range_port) {
+ result = _gf_true;
+ goto out;
+ }
+ if (gf_string2int32(range_port, &tmp_port1) == 0) {
+ if (tmp_port1 > ceiling)
+ tmp_port1 = ceiling;
+ if (tmp_port1 < 0)
+ tmp_port1 = 0;
+ }
+ range_port = strtok(NULL, "-");
+ if (!range_port) {
+ result = _gf_true;
+ goto out;
+ }
+ /* get rid of the new line character*/
+ if (range_port[strlen(range_port) - 1] == '\n')
+ range_port[strlen(range_port) - 1] = '\0';
+ if (gf_string2int32(range_port, &tmp_port2) == 0) {
+ if (tmp_port2 > ceiling)
+ tmp_port2 = ceiling;
+ if (tmp_port2 < 0)
+ tmp_port2 = 0;
+ }
+ gf_msg_debug("glusterfs", 0, "lower: %d, higher: %d", tmp_port1,
+ tmp_port2);
+ for (; tmp_port1 <= tmp_port2; tmp_port1++)
+ BIT_SET(ports, tmp_port1);
+ }
+
+out:
+ return result;
}
+/* Takes in client ip{v4,v6} and returns associated hostname, if any
+ * Also, allocates memory for the hostname.
+ * Returns: 0 for success, -1 for failure
+ */
int
-gf_string2percent_or_bytesize (const char *str,
- uint64_t *n,
- gf_boolean_t *is_percent)
+gf_get_hostname_from_ip(char *client_ip, char **hostname)
{
- uint64_t value = 0ULL;
- char *tail = NULL;
- int old_errno = 0;
- const char *s = NULL;
+ int ret = -1;
+ struct sockaddr *client_sockaddr = NULL;
+ struct sockaddr_in client_sock_in = {0};
+ struct sockaddr_in6 client_sock_in6 = {0};
+ char client_hostname[NI_MAXHOST] = {0};
+ char *client_ip_copy = NULL;
+ char *tmp = NULL;
+ char *ip = NULL;
+ size_t addr_sz = 0;
+
+ /* if ipv4, reverse lookup the hostname to
+ * allow FQDN based rpc authentication
+ */
+ if (!valid_ipv6_address(client_ip, strlen(client_ip), 0) &&
+ !valid_ipv4_address(client_ip, strlen(client_ip), 0)) {
+ /* most times, we get a.b.c.d:port form, so check that */
+ client_ip_copy = gf_strdup(client_ip);
+ if (!client_ip_copy)
+ goto out;
+
+ ip = strtok_r(client_ip_copy, ":", &tmp);
+ } else {
+ ip = client_ip;
+ }
+
+ if (valid_ipv4_address(ip, strlen(ip), 0) == _gf_true) {
+ client_sockaddr = (struct sockaddr *)&client_sock_in;
+ addr_sz = sizeof(client_sock_in);
+ client_sock_in.sin_family = AF_INET;
+ ret = inet_pton(AF_INET, ip, (void *)&client_sock_in.sin_addr.s_addr);
+
+ } else if (valid_ipv6_address(ip, strlen(ip), 0) == _gf_true) {
+ client_sockaddr = (struct sockaddr *)&client_sock_in6;
+ addr_sz = sizeof(client_sock_in6);
+
+ client_sock_in6.sin6_family = AF_INET6;
+ ret = inet_pton(AF_INET6, ip, (void *)&client_sock_in6.sin6_addr);
+ } else {
+ goto out;
+ }
+
+ if (ret != 1) {
+ ret = -1;
+ goto out;
+ }
+
+ /* You cannot just use sizeof (*client_sockaddr), as per the man page
+ * the (getnameinfo) size must be the size of the underlying sockaddr
+ * struct e.g. sockaddr_in6 or sockaddr_in. Failure to do so will
+ * break IPv6 hostname resolution (IPv4 will work only because
+ * the sockaddr_in struct happens to be of the correct size).
+ */
+ ret = getnameinfo(client_sockaddr, addr_sz, client_hostname,
+ sizeof(client_hostname), NULL, 0, 0);
+ if (ret) {
+ gf_smsg("common-utils", GF_LOG_ERROR, 0, LG_MSG_GETNAMEINFO_FAILED,
+ "ip=%s", client_ip, "ret=%s", gai_strerror(ret), NULL);
+ ret = -1;
+ goto out;
+ }
+
+ *hostname = gf_strdup((char *)client_hostname);
+out:
+ if (client_ip_copy)
+ GF_FREE(client_ip_copy);
+
+ return ret;
+}
- if (str == NULL || n == NULL) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING,
- "argument invalid");
- errno = EINVAL;
- return -1;
+gf_boolean_t
+gf_interface_search(char *ip)
+{
+ int32_t ret = -1;
+ gf_boolean_t found = _gf_false;
+ struct ifaddrs *ifaddr, *ifa;
+ int family;
+ char host[NI_MAXHOST];
+ xlator_t *this = NULL;
+ char *pct = NULL;
+
+ this = THIS;
+
+ ret = getifaddrs(&ifaddr);
+
+ if (ret != 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, LG_MSG_GETIFADDRS_FAILED, "ret=%s",
+ gai_strerror(ret), NULL);
+ goto out;
+ }
+
+ for (ifa = ifaddr; ifa != NULL; ifa = ifa->ifa_next) {
+ if (!ifa->ifa_addr) {
+ /*
+ * This seemingly happens if an interface hasn't
+ * been bound to a particular protocol (seen with
+ * TUN devices).
+ */
+ continue;
}
+ family = ifa->ifa_addr->sa_family;
- for (s = str; *s != '\0'; s++) {
- if (isspace (*s))
- continue;
- if (*s == '-')
- return -1;
- break;
+ if (family != AF_INET && family != AF_INET6)
+ continue;
+
+ ret = getnameinfo(ifa->ifa_addr,
+ (family == AF_INET) ? sizeof(struct sockaddr_in)
+ : sizeof(struct sockaddr_in6),
+ host, NI_MAXHOST, NULL, 0, NI_NUMERICHOST);
+
+ if (ret != 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, LG_MSG_GETNAMEINFO_FAILED,
+ "ret=%s", gai_strerror(ret), NULL);
+ goto out;
}
- old_errno = errno;
- errno = 0;
- value = strtoull (str, &tail, 10);
-
- if (errno == ERANGE || errno == EINVAL)
- return -1;
-
- if (errno == 0)
- errno = old_errno;
-
- if (tail[0] != '\0') {
- if (strcasecmp (tail, GF_UNIT_KB_STRING) == 0)
- value *= GF_UNIT_KB;
- else if (strcasecmp (tail, GF_UNIT_MB_STRING) == 0)
- value *= GF_UNIT_MB;
- else if (strcasecmp (tail, GF_UNIT_GB_STRING) == 0)
- value *= GF_UNIT_GB;
- else if (strcasecmp (tail, GF_UNIT_TB_STRING) == 0)
- value *= GF_UNIT_TB;
- else if (strcasecmp (tail, GF_UNIT_PB_STRING) == 0)
- value *= GF_UNIT_PB;
- else if (strcasecmp (tail, GF_UNIT_PERCENT_STRING) == 0)
- *is_percent = _gf_true;
- else
- return -1;
+ /*
+ * Sometimes the address comes back as addr%eth0 or
+ * similar. Since % is an invalid character, we can
+ * strip it out with confidence that doing so won't
+ * harm anything.
+ */
+ pct = index(host, '%');
+ if (pct) {
+ *pct = '\0';
}
- *n = value;
+ if (strncmp(ip, host, NI_MAXHOST) == 0) {
+ gf_msg_debug(this->name, 0,
+ "%s is local address at "
+ "interface %s",
+ ip, ifa->ifa_name);
+ found = _gf_true;
+ goto out;
+ }
+ }
+out:
+ if (ifaddr)
+ freeifaddrs(ifaddr);
+ return found;
+}
- return 0;
+char *
+get_ip_from_addrinfo(struct addrinfo *addr, char **ip)
+{
+ char buf[64];
+ void *in_addr = NULL;
+ struct sockaddr_in *s4 = NULL;
+ struct sockaddr_in6 *s6 = NULL;
+
+ switch (addr->ai_family) {
+ case AF_INET:
+ s4 = (struct sockaddr_in *)addr->ai_addr;
+ in_addr = &s4->sin_addr;
+ break;
+
+ case AF_INET6:
+ s6 = (struct sockaddr_in6 *)addr->ai_addr;
+ in_addr = &s6->sin6_addr;
+ break;
+
+ default:
+ gf_smsg("glusterd", GF_LOG_ERROR, 0, LG_MSG_INVALID_FAMILY, NULL);
+ return NULL;
+ }
+
+ if (!inet_ntop(addr->ai_family, in_addr, buf, sizeof(buf))) {
+ gf_smsg("glusterd", GF_LOG_ERROR, 0, LG_MSG_CONVERSION_FAILED, NULL);
+ return NULL;
+ }
+
+ *ip = gf_strdup(buf);
+ return *ip;
}
-int64_t
-gf_str_to_long_long (const char *number)
+gf_boolean_t
+gf_is_loopback_localhost(const struct sockaddr *sa, char *hostname)
{
- int64_t unit = 1;
- int64_t ret = 0;
- char *endptr = NULL ;
- if (!number)
- return 0;
+ GF_ASSERT(sa);
+
+ gf_boolean_t is_local = _gf_false;
+ const struct in_addr *addr4 = NULL;
+ const struct in6_addr *addr6 = NULL;
+ uint8_t *ap = NULL;
+ struct in6_addr loopbackaddr6 = IN6ADDR_LOOPBACK_INIT;
+
+ switch (sa->sa_family) {
+ case AF_INET:
+ addr4 = &(((struct sockaddr_in *)sa)->sin_addr);
+ ap = (uint8_t *)&addr4->s_addr;
+ if (ap[0] == 127)
+ is_local = _gf_true;
+ break;
+
+ case AF_INET6:
+ addr6 = &(((struct sockaddr_in6 *)sa)->sin6_addr);
+ if (memcmp(addr6, &loopbackaddr6, sizeof(loopbackaddr6)) == 0)
+ is_local = _gf_true;
+ break;
+
+ default:
+ if (hostname)
+ gf_smsg("glusterd", GF_LOG_ERROR, 0, LG_MSG_INVALID_FAMILY,
+ "family=%d", sa->sa_family, "hostname=%s", hostname,
+ NULL);
+ break;
+ }
+
+ return is_local;
+}
- ret = strtoll (number, &endptr, 0);
-
- if (endptr) {
- switch (*endptr) {
- case 'G':
- case 'g':
- if ((* (endptr + 1) == 'B') ||(* (endptr + 1) == 'b'))
- unit = 1024 * 1024 * 1024;
- break;
- case 'M':
- case 'm':
- if ((* (endptr + 1) == 'B') ||(* (endptr + 1) == 'b'))
- unit = 1024 * 1024;
- break;
- case 'K':
- case 'k':
- if ((* (endptr + 1) == 'B') ||(* (endptr + 1) == 'b'))
- unit = 1024;
- break;
- case '%':
- unit = 1;
- break;
- default:
- unit = 1;
- break;
- }
+gf_boolean_t
+gf_is_local_addr(char *hostname)
+{
+ int32_t ret = -1;
+ struct addrinfo *result = NULL;
+ struct addrinfo *res = NULL;
+ gf_boolean_t found = _gf_false;
+ char *ip = NULL;
+ xlator_t *this = NULL;
+ struct addrinfo hints;
+
+ this = THIS;
+
+ memset(&hints, 0, sizeof(hints));
+ /*
+ * Removing AI_ADDRCONFIG from default_hints
+ * for being able to use link local ipv6 addresses
+ */
+ hints.ai_family = AF_UNSPEC;
+
+ ret = getaddrinfo(hostname, NULL, &hints, &result);
+
+ if (ret != 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, LG_MSG_GETADDRINFO_FAILED,
+ "ret=%s", gai_strerror(ret), NULL);
+ goto out;
+ }
+
+ for (res = result; res != NULL; res = res->ai_next) {
+ get_ip_from_addrinfo(res, &ip);
+ gf_msg_debug(this->name, 0, "%s ", ip);
+
+ if (ip) {
+ found = (gf_is_loopback_localhost(res->ai_addr, hostname) ||
+ gf_interface_search(ip));
+ }
+ if (found) {
+ GF_FREE(ip);
+ goto out;
}
- return ret * unit;
+ GF_FREE(ip);
+ /* the above free will not set ip to NULL, and hence, there is
+ double free possible as the loop continues. set ip to NULL. */
+ ip = NULL;
+ }
+
+out:
+ if (result)
+ freeaddrinfo(result);
+
+ if (!found)
+ gf_msg_debug(this->name, 0, "%s is not local", hostname);
+
+ return found;
}
-int
-gf_string2boolean (const char *str, gf_boolean_t *b)
+gf_boolean_t
+gf_is_same_address(char *name1, char *name2)
{
- if (str == NULL) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "argument invalid");
- return -1;
+ struct addrinfo *addr1 = NULL;
+ struct addrinfo *addr2 = NULL;
+ struct addrinfo *p = NULL;
+ struct addrinfo *q = NULL;
+ gf_boolean_t ret = _gf_false;
+ int gai_err = 0;
+ struct addrinfo hints;
+
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_family = AF_UNSPEC;
+
+ gai_err = getaddrinfo(name1, NULL, &hints, &addr1);
+ if (gai_err != 0) {
+ gf_smsg(name1, GF_LOG_WARNING, 0, LG_MSG_GETADDRINFO_FAILED, "error=%s",
+ gai_strerror(gai_err), NULL);
+ goto out;
+ }
+
+ gai_err = getaddrinfo(name2, NULL, &hints, &addr2);
+ if (gai_err != 0) {
+ gf_smsg(name2, GF_LOG_WARNING, 0, LG_MSG_GETADDRINFO_FAILED, "error=%s",
+ gai_strerror(gai_err), NULL);
+ goto out;
+ }
+
+ for (p = addr1; p; p = p->ai_next) {
+ for (q = addr2; q; q = q->ai_next) {
+ if (p->ai_addrlen != q->ai_addrlen) {
+ continue;
+ }
+ if (memcmp(p->ai_addr, q->ai_addr, p->ai_addrlen)) {
+ continue;
+ }
+ ret = _gf_true;
+ goto out;
}
+ }
- if ((strcasecmp (str, "1") == 0) ||
- (strcasecmp (str, "on") == 0) ||
- (strcasecmp (str, "yes") == 0) ||
- (strcasecmp (str, "true") == 0) ||
- (strcasecmp (str, "enable") == 0)) {
- *b = _gf_true;
- return 0;
- }
+out:
+ if (addr1) {
+ freeaddrinfo(addr1);
+ }
+ if (addr2) {
+ freeaddrinfo(addr2);
+ }
+ return ret;
+}
- if ((strcasecmp (str, "0") == 0) ||
- (strcasecmp (str, "off") == 0) ||
- (strcasecmp (str, "no") == 0) ||
- (strcasecmp (str, "false") == 0) ||
- (strcasecmp (str, "disable") == 0)) {
- *b = _gf_false;
- return 0;
+/*
+ * Processes list of volfile servers.
+ * Format: <host1>:<port1> <host2>:<port2>...
+ */
+int
+gf_process_getspec_servers_list(cmd_args_t *cmd_args, const char *servers_list)
+{
+ char *tmp = NULL;
+ char *address = NULL;
+ char *host = NULL;
+ char *last_colon = NULL;
+ char *save_ptr = NULL;
+ int port = 0;
+ int ret = -1;
+
+ tmp = gf_strdup(servers_list);
+ if (!tmp) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ address = strtok_r(tmp, " ", &save_ptr);
+ if (!address) {
+ errno = EINVAL;
+ goto out;
+ }
+
+ while (1) {
+ last_colon = strrchr(address, ':');
+ if (!last_colon) {
+ errno = EINVAL;
+ ret = -1;
+ break;
+ }
+ *last_colon = '\0';
+ host = address;
+ port = atoi(last_colon + 1);
+ if (port <= 0) {
+ errno = EINVAL;
+ ret = -1;
+ break;
}
+ ret = gf_set_volfile_server_common(cmd_args, host,
+ GF_DEFAULT_VOLFILE_TRANSPORT, port);
+ if (ret && errno != EEXIST) {
+ break;
+ }
+ address = strtok_r(NULL, " ", &save_ptr);
+ if (!address) {
+ errno = 0;
+ ret = 0;
+ break;
+ }
+ }
- return -1;
-}
+out:
+ if (tmp) {
+ GF_FREE(tmp);
+ }
+ return ret;
+}
int
-gf_lockfd (int fd)
+gf_set_volfile_server_common(cmd_args_t *cmd_args, const char *host,
+ const char *transport, int port)
{
- struct gf_flock fl;
+ server_cmdline_t *server = NULL;
+ server_cmdline_t *tmp = NULL;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO(THIS->name, cmd_args, out);
+ GF_VALIDATE_OR_GOTO(THIS->name, host, out);
+ GF_VALIDATE_OR_GOTO(THIS->name, transport, out);
+
+ server = GF_CALLOC(1, sizeof(server_cmdline_t),
+ gf_common_mt_server_cmdline_t);
+ if (!server) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ INIT_LIST_HEAD(&server->list);
+
+ server->volfile_server = gf_strdup(host);
+ if (!server->volfile_server) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ server->transport = gf_strdup(transport);
+ if (!server->transport) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ server->port = port;
+
+ if (!cmd_args->volfile_server) {
+ cmd_args->volfile_server = server->volfile_server;
+ cmd_args->volfile_server_transport = server->transport;
+ cmd_args->volfile_server_port = server->port;
+ cmd_args->curr_server = server;
+ }
+
+ list_for_each_entry(tmp, &cmd_args->volfile_servers, list)
+ {
+ if ((!strcmp(tmp->volfile_server, server->volfile_server) &&
+ !strcmp(tmp->transport, server->transport) &&
+ (tmp->port == server->port))) {
+ /* Duplicate option given, log and ignore */
+ gf_smsg("gluster", GF_LOG_INFO, EEXIST, LG_MSG_DUPLICATE_ENTRY,
+ NULL);
+ ret = 0;
+ goto out;
+ }
+ }
+
+ list_add_tail(&server->list, &cmd_args->volfile_servers);
- fl.l_type = F_WRLCK;
- fl.l_whence = SEEK_SET;
- fl.l_start = 0;
- fl.l_len = 0;
+ ret = 0;
+out:
+ if (-1 == ret) {
+ if (server) {
+ GF_FREE(server->volfile_server);
+ GF_FREE(server->transport);
+ GF_FREE(server);
+ }
+ }
- return fcntl (fd, F_SETLK, &fl);
+ return ret;
}
+/* Sets log file path from user provided arguments */
+int
+gf_set_log_file_path(cmd_args_t *cmd_args, glusterfs_ctx_t *ctx)
+{
+ int i = 0;
+ int j = 0;
+ int ret = 0;
+ int tmp_len = 0;
+ char tmp_str[1024] = {
+ 0,
+ };
+
+ if (!cmd_args)
+ goto done;
+
+ if (cmd_args->mount_point) {
+ j = 0;
+ i = 0;
+ if (cmd_args->mount_point[0] == '/')
+ i = 1;
+ for (; i < strlen(cmd_args->mount_point); i++, j++) {
+ tmp_str[j] = cmd_args->mount_point[i];
+ if (cmd_args->mount_point[i] == '/')
+ tmp_str[j] = '-';
+ }
+
+ ret = gf_asprintf(&cmd_args->log_file,
+ DEFAULT_LOG_FILE_DIRECTORY "/%s.log", tmp_str);
+ if (ret > 0)
+ ret = 0;
+ goto done;
+ }
+
+ if (ctx && GF_GLUSTERD_PROCESS == ctx->process_mode) {
+ ret = gf_asprintf(&cmd_args->log_file,
+ DEFAULT_LOG_FILE_DIRECTORY "/%s.log", GLUSTERD_NAME);
+ if (ret > 0)
+ ret = 0;
+
+ goto done;
+ }
+
+ if (cmd_args->volfile) {
+ j = 0;
+ i = 0;
+ if (cmd_args->volfile[0] == '/')
+ i = 1;
+ for (; i < strlen(cmd_args->volfile); i++, j++) {
+ tmp_str[j] = cmd_args->volfile[i];
+ if (cmd_args->volfile[i] == '/')
+ tmp_str[j] = '-';
+ }
+ ret = gf_asprintf(&cmd_args->log_file,
+ DEFAULT_LOG_FILE_DIRECTORY "/%s.log", tmp_str);
+ if (ret > 0)
+ ret = 0;
+ goto done;
+ }
+
+ if (cmd_args->volfile_server) {
+ if (strncmp(cmd_args->volfile_server_transport, "unix", 4) == 0) {
+ if (cmd_args->volfile_server[0] == '/')
+ i = 1;
+ tmp_len = strlen(cmd_args->volfile_server);
+ for (j = 0; i < tmp_len; i++, j++) {
+ tmp_str[j] = cmd_args->volfile_server[i];
+ if (cmd_args->volfile_server[i] == '/')
+ tmp_str[j] = '-';
+ }
+ ret = gf_asprintf(&cmd_args->log_file, "%s/%s-%s-%d.log",
+ DEFAULT_LOG_FILE_DIRECTORY, tmp_str,
+ cmd_args->volfile_id, getpid());
+ } else {
+ ret = gf_asprintf(&cmd_args->log_file, "%s/%s-%s-%d.log",
+ DEFAULT_LOG_FILE_DIRECTORY,
+ cmd_args->volfile_server, cmd_args->volfile_id,
+ getpid());
+ }
+ if (ret > 0)
+ ret = 0;
+ }
+done:
+ return ret;
+}
int
-gf_unlockfd (int fd)
+gf_set_log_ident(cmd_args_t *cmd_args)
{
- struct gf_flock fl;
+ int ret = 0;
+ char *ptr = NULL;
- fl.l_type = F_UNLCK;
- fl.l_whence = SEEK_SET;
- fl.l_start = 0;
- fl.l_len = 0;
+ if (cmd_args->log_file == NULL) {
+ /* no ident source */
+ return 0;
+ }
+
+ /* TODO: Some idents would look like, etc-glusterfs-glusterd.vol, which
+ * seems ugly and can be bettered? */
+ /* just get the filename as the ident */
+ if (NULL != (ptr = strrchr(cmd_args->log_file, '/'))) {
+ ret = gf_asprintf(&cmd_args->log_ident, "%s", ptr + 1);
+ } else {
+ ret = gf_asprintf(&cmd_args->log_ident, "%s", cmd_args->log_file);
+ }
+
+ if (ret > 0)
+ ret = 0;
+ else
+ return ret;
- return fcntl (fd, F_SETLK, &fl);
+ /* remove .log suffix */
+ if (NULL != (ptr = strrchr(cmd_args->log_ident, '.'))) {
+ if (strcmp(ptr, ".log") == 0) {
+ ptr[0] = '\0';
+ }
+ }
+
+ return ret;
}
-static void
-compute_checksum (char *buf, size_t size, uint32_t *checksum)
+int
+gf_thread_cleanup_xint(pthread_t thread)
{
- int ret = -1;
- char *checksum_buf = NULL;
+ int ret = 0;
+ void *res = NULL;
- checksum_buf = (char *)(checksum);
+ ret = pthread_cancel(thread);
+ if (ret != 0)
+ goto error_return;
- if (!(*checksum)) {
- checksum_buf [0] = 0xba;
- checksum_buf [1] = 0xbe;
- checksum_buf [2] = 0xb0;
- checksum_buf [3] = 0x0b;
- }
+ ret = pthread_join(thread, &res);
+ if (ret != 0)
+ goto error_return;
- for (ret = 0; ret < (size - 4); ret += 4) {
- checksum_buf[0] ^= (buf[ret]);
- checksum_buf[1] ^= (buf[ret + 1] << 1) ;
- checksum_buf[2] ^= (buf[ret + 2] << 2);
- checksum_buf[3] ^= (buf[ret + 3] << 3);
- }
+ if (res != PTHREAD_CANCELED)
+ goto error_return;
- for (ret = 0; ret <= (size % 4); ret++) {
- checksum_buf[ret] ^= (buf[(size - 4) + ret] << ret);
- }
+ ret = 0;
+
+error_return:
+ return ret;
+}
+void
+gf_thread_set_vname(pthread_t thread, const char *name, va_list args)
+{
+ char thread_name[GF_THREAD_NAME_LIMIT];
+ int ret;
+
+ /* Initialize the thread name with the prefix (not NULL terminated). */
+ memcpy(thread_name, GF_THREAD_NAME_PREFIX,
+ sizeof(GF_THREAD_NAME_PREFIX) - 1);
+
+ ret = vsnprintf(thread_name + sizeof(GF_THREAD_NAME_PREFIX) - 1,
+ sizeof(thread_name) - sizeof(GF_THREAD_NAME_PREFIX) + 1,
+ name, args);
+ if (ret < 0) {
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_PTHREAD_NAMING_FAILED,
+ "name=%s", name, NULL);
return;
+ }
+
+ if (ret >= sizeof(thread_name)) {
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_THREAD_NAME_TOO_LONG,
+ "name=%s", thread_name, NULL);
+ }
+
+#ifdef GF_LINUX_HOST_OS
+ ret = pthread_setname_np(thread, thread_name);
+#elif defined(__NetBSD__)
+ ret = pthread_setname_np(thread, thread_name, NULL);
+#elif defined(__FreeBSD__)
+ pthread_set_name_np(thread, thread_name);
+ ret = 0;
+#else
+ ret = ENOSYS;
+#endif
+ if (ret != 0) {
+ gf_smsg(THIS->name, GF_LOG_WARNING, ret, LG_MSG_SET_THREAD_FAILED,
+ "name=%s", thread_name, NULL);
+ }
}
-#define GF_CHECKSUM_BUF_SIZE 1024
+void
+gf_thread_set_name(pthread_t thread, const char *name, ...)
+{
+ va_list args;
+
+ va_start(args, name);
+ gf_thread_set_vname(thread, name, args);
+ va_end(args);
+}
int
-get_checksum_for_file (int fd, uint32_t *checksum)
+gf_thread_vcreate(pthread_t *thread, const pthread_attr_t *attr,
+ void *(*start_routine)(void *), void *arg, const char *name,
+ va_list args)
{
- int ret = -1;
- char buf[GF_CHECKSUM_BUF_SIZE] = {0,};
+ sigset_t set, old;
+ int ret;
+
+ sigemptyset(&old);
+ sigfillset(&set);
+ sigdelset(&set, SIGSEGV);
+ sigdelset(&set, SIGBUS);
+ sigdelset(&set, SIGILL);
+ sigdelset(&set, SIGSYS);
+ sigdelset(&set, SIGFPE);
+ sigdelset(&set, SIGABRT);
+
+ pthread_sigmask(SIG_BLOCK, &set, &old);
+
+ ret = pthread_create(thread, attr, start_routine, arg);
+ if (ret != 0) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, ret, LG_MSG_THREAD_CREATE_FAILED,
+ NULL);
+ ret = -1;
+ } else if (name != NULL) {
+ gf_thread_set_vname(*thread, name, args);
+ }
+
+ pthread_sigmask(SIG_SETMASK, &old, NULL);
+
+ return ret;
+}
- /* goto first place */
- lseek (fd, 0L, SEEK_SET);
- do {
- ret = read (fd, &buf, GF_CHECKSUM_BUF_SIZE);
- if (ret > 0)
- compute_checksum (buf, GF_CHECKSUM_BUF_SIZE,
- checksum);
- } while (ret > 0);
+int
+gf_thread_create(pthread_t *thread, const pthread_attr_t *attr,
+ void *(*start_routine)(void *), void *arg, const char *name,
+ ...)
+{
+ va_list args;
+ int ret;
- /* set it back */
- lseek (fd, 0L, SEEK_SET);
+ va_start(args, name);
+ ret = gf_thread_vcreate(thread, attr, start_routine, arg, name, args);
+ va_end(args);
- return ret;
+ return ret;
}
+int
+gf_thread_create_detached(pthread_t *thread, void *(*start_routine)(void *),
+ void *arg, const char *name, ...)
+{
+ pthread_attr_t attr;
+ va_list args;
+ int ret = -1;
+
+ ret = pthread_attr_init(&attr);
+ if (ret) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, ret, LG_MSG_PTHREAD_ATTR_INIT_FAILED,
+ NULL);
+ return -1;
+ }
+
+ pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
+
+ va_start(args, name);
+ ret = gf_thread_vcreate(thread, &attr, start_routine, arg, name, args);
+ va_end(args);
+
+ pthread_attr_destroy(&attr);
+
+ return ret;
+}
int
-get_checksum_for_path (char *path, uint32_t *checksum)
+gf_skip_header_section(int fd, int header_len)
{
- int ret = -1;
- int fd = -1;
+ int ret = -1;
- GF_ASSERT (path);
- GF_ASSERT (checksum);
+ ret = sys_lseek(fd, header_len, SEEK_SET);
+ if (ret == (off_t)-1) {
+ gf_smsg("", GF_LOG_ERROR, 0, LG_MSG_SKIP_HEADER_FAILED, NULL);
+ } else {
+ ret = 0;
+ }
- fd = open (path, O_RDWR);
+ return ret;
+}
- if (fd == -1) {
- gf_log (THIS->name, GF_LOG_ERROR, "Unable to open %s, errno: %d",
- path, errno);
- goto out;
- }
+/* Below function is use to check at runtime if pid is running */
- ret = get_checksum_for_file (fd, checksum);
+gf_boolean_t
+gf_is_pid_running(int pid)
+{
+#ifdef __FreeBSD__
+ int ret = -1;
+
+ ret = sys_kill(pid, 0);
+ if (ret < 0) {
+ return _gf_false;
+ }
+#else
+ char fname[32] = {
+ 0,
+ };
+ int fd = -1;
+
+ snprintf(fname, sizeof(fname), "/proc/%d/cmdline", pid);
+
+ fd = sys_open(fname, O_RDONLY, 0);
+ if (fd < 0) {
+ return _gf_false;
+ }
+
+ sys_close(fd);
+#endif
+ return _gf_true;
+}
+gf_boolean_t
+gf_is_service_running(char *pidfile, int *pid)
+{
+ FILE *file = NULL;
+ gf_boolean_t running = _gf_false;
+ int ret = 0;
+ int fno = 0;
+
+ file = fopen(pidfile, "r+");
+ if (!file) {
+ goto out;
+ }
+
+ fno = fileno(file);
+ ret = lockf(fno, F_TEST, 0);
+ if (ret == -1) {
+ running = _gf_true;
+ }
+
+ ret = fscanf(file, "%d", pid);
+ if (ret <= 0) {
+ gf_smsg("", GF_LOG_ERROR, errno, LG_MSG_FILE_OP_FAILED, "pidfile=%s",
+ pidfile, NULL);
+ *pid = -1;
+ running = _gf_false;
+ goto out;
+ }
+
+ running = gf_is_pid_running(*pid);
out:
- if (fd != -1)
- close (fd);
+ if (file)
+ fclose(file);
+ return running;
+}
- return ret;
+/* Check if the pid is > 0 */
+gf_boolean_t
+gf_valid_pid(const char *pid, int length)
+{
+ gf_boolean_t ret = _gf_true;
+ pid_t value = 0;
+ char *end_ptr = NULL;
+
+ if (length <= 0) {
+ ret = _gf_false;
+ goto out;
+ }
+
+ value = strtol(pid, &end_ptr, 10);
+ if (value <= 0) {
+ ret = _gf_false;
+ }
+out:
+ return ret;
}
-char *
-strtail (char *str, const char *pattern)
+static int
+dht_is_linkfile_key(dict_t *this, char *key, data_t *value, void *data)
{
- int i = 0;
+ gf_boolean_t *linkfile_key_found = NULL;
- for (i = 0; str[i] == pattern[i] && str[i]; i++);
+ if (!data)
+ goto out;
- if (pattern[i] == '\0')
- return str + i;
+ linkfile_key_found = data;
- return NULL;
+ *linkfile_key_found = _gf_true;
+out:
+ return 0;
}
-void
-skipwhite (char **s)
+gf_boolean_t
+dht_is_linkfile(struct iatt *buf, dict_t *dict)
{
- while (isspace (**s))
- (*s)++;
+ gf_boolean_t linkfile_key_found = _gf_false;
+
+ if (!IS_DHT_LINKFILE_MODE(buf))
+ return _gf_false;
+
+ dict_foreach_fnmatch(dict, "*." DHT_LINKFILE_STR, dht_is_linkfile_key,
+ &linkfile_key_found);
+
+ return linkfile_key_found;
}
-char *
-nwstrtail (char *str, char *pattern)
+int
+gf_check_log_format(const char *value)
{
- for (;;) {
- skipwhite (&str);
- skipwhite (&pattern);
+ int log_format = -1;
- if (*str != *pattern || !*str)
- break;
+ if (!strcasecmp(value, GF_LOG_FORMAT_NO_MSG_ID))
+ log_format = gf_logformat_traditional;
+ else if (!strcasecmp(value, GF_LOG_FORMAT_WITH_MSG_ID))
+ log_format = gf_logformat_withmsgid;
- str++;
- pattern++;
- }
+ if (log_format == -1)
+ gf_smsg(THIS->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_LOG,
+ "possible_values=" GF_LOG_FORMAT_NO_MSG_ID
+ "|" GF_LOG_FORMAT_WITH_MSG_ID,
+ NULL);
- return *pattern ? NULL : str;
+ return log_format;
}
-void
-skipword (char **s)
+int
+gf_check_logger(const char *value)
{
- if (!*s)
- return;
+ int logger = -1;
+
+ if (!strcasecmp(value, GF_LOGGER_GLUSTER_LOG))
+ logger = gf_logger_glusterlog;
+ else if (!strcasecmp(value, GF_LOGGER_SYSLOG))
+ logger = gf_logger_syslog;
- skipwhite (s);
+ if (logger == -1)
+ gf_smsg(THIS->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_LOG,
+ "possible_values=" GF_LOGGER_GLUSTER_LOG "|" GF_LOGGER_SYSLOG,
+ NULL);
- while (!isspace(**s))
- (*s)++;
+ return logger;
}
-char *
-get_nth_word (const char *str, int n)
+/* gf_compare_sockaddr compares the given addresses @addr1 and @addr2 for
+ * equality, ie. if they both refer to the same address.
+ *
+ * This was inspired by sock_addr_cmp_addr() from
+ * https://www.opensource.apple.com/source/postfix/postfix-197/postfix/src/util/sock_addr.c
+ */
+gf_boolean_t
+gf_compare_sockaddr(const struct sockaddr *addr1, const struct sockaddr *addr2)
{
- char buf[4096] = {0};
- char *start = NULL;
- char *word = NULL;
- int i = 0;
- int word_len = 0;
- const char *end = NULL;
+ GF_ASSERT(addr1 != NULL);
+ GF_ASSERT(addr2 != NULL);
+
+ /* Obviously, the addresses don't match if their families are different
+ */
+ if (addr1->sa_family != addr2->sa_family)
+ return _gf_false;
+
+ if (AF_INET == addr1->sa_family) {
+ if (((struct sockaddr_in *)addr1)->sin_addr.s_addr ==
+ ((struct sockaddr_in *)addr2)->sin_addr.s_addr)
+ return _gf_true;
+
+ } else if (AF_INET6 == addr1->sa_family) {
+ if (memcmp((char *)&((struct sockaddr_in6 *)addr1)->sin6_addr,
+ (char *)&((struct sockaddr_in6 *)addr2)->sin6_addr,
+ sizeof(struct in6_addr)) == 0)
+ return _gf_true;
+ }
+ return _gf_false;
+}
- if (!str)
- goto out;
+/*
+ * gf_set_timestamp:
+ * It sets the mtime and atime of 'dest' file as of 'src'.
+ */
- snprintf (buf, sizeof (buf), "%s", str);
- start = buf;
+int
+gf_set_timestamp(const char *src, const char *dest)
+{
+ struct stat sb = {
+ 0,
+ };
+#if defined(HAVE_UTIMENSAT)
+ struct timespec new_time[2] = {{
+ 0,
+ },
+ {
+ 0,
+ }};
+#else
+ struct timeval new_time[2] = {{
+ 0,
+ },
+ {
+ 0,
+ }};
+#endif
+ int ret = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(src);
+ GF_ASSERT(dest);
+
+ ret = sys_stat(src, &sb);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, LG_MSG_FILE_STAT_FAILED,
+ "stat=%s", src, NULL);
+ goto out;
+ }
+ /* The granularity is nano seconds if `utimensat()` is available,
+ * and micro seconds otherwise.
+ */
+#if defined(HAVE_UTIMENSAT)
+ new_time[0].tv_sec = sb.st_atime;
+ new_time[0].tv_nsec = ST_ATIM_NSEC(&sb);
+
+ new_time[1].tv_sec = sb.st_mtime;
+ new_time[1].tv_nsec = ST_MTIM_NSEC(&sb);
+
+ /* dirfd = 0 is ignored because `dest` is an absolute path. */
+ ret = sys_utimensat(AT_FDCWD, dest, new_time, AT_SYMLINK_NOFOLLOW);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, LG_MSG_UTIMENSAT_FAILED,
+ "dest=%s", dest, NULL);
+ }
+#else
+ new_time[0].tv_sec = sb.st_atime;
+ new_time[0].tv_usec = ST_ATIM_NSEC(&sb) / 1000;
+
+ new_time[1].tv_sec = sb.st_mtime;
+ new_time[1].tv_usec = ST_MTIM_NSEC(&sb) / 1000;
+
+ ret = sys_utimes(dest, new_time);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, LG_MSG_UTIMES_FAILED,
+ "dest=%s", dest, NULL);
+ }
+#endif
+out:
+ return ret;
+}
- for (i = 0; i < n-1; i++)
- skipword (&start);
+static void
+gf_backtrace_end(char *buf, size_t frames)
+{
+ size_t pos = 0;
- skipwhite (&start);
- end = strpbrk ((const char *)start, " \t\n\0");
+ if (!buf)
+ return;
- if (!end)
- goto out;
+ pos = strlen(buf);
- word_len = abs (end - start);
+ frames = min(frames, GF_BACKTRACE_LEN - pos - 1);
- word = GF_CALLOC (1, word_len + 1, gf_common_mt_strdup);
- if (!word)
- goto out;
+ if (0 == frames)
+ return;
- strncpy (word, start, word_len);
- *(word + word_len) = '\0';
- out:
- return word;
+ memset(buf + pos, ')', frames);
+ buf[pos + frames] = '\0';
}
-/* Syntax formed according to RFC 1912 (RFC 1123 & 952 are more restrictive) *
- <hname> ::= <gen-name>*["."<gen-name>] *
- <gen-name> ::= <let-or-digit> <[*[<let-or-digit-or-hyphen>]<let-or-digit>] */
-char
-valid_host_name (char *address, int length)
+/*Returns bytes written*/
+static int
+gf_backtrace_append(char *buf, size_t pos, char *framestr)
{
- int i = 0;
- int str_len = 0;
- char ret = 1;
- char *dup_addr = NULL;
- char *temp_str = NULL;
- char *save_ptr = NULL;
+ if (pos >= GF_BACKTRACE_LEN)
+ return -1;
+ return snprintf(buf + pos, GF_BACKTRACE_LEN - pos, "(--> %s ", framestr);
+}
- if ((length > _POSIX_HOST_NAME_MAX) || (length == 1)) {
- ret = 0;
- goto out;
- }
+static int
+gf_backtrace_fillframes(char *buf)
+{
+ void *array[GF_BACKTRACE_FRAME_COUNT];
+ size_t frames = 0;
+ FILE *fp = NULL;
+ char callingfn[GF_BACKTRACE_FRAME_COUNT - 2][1024] = {
+ {0},
+ };
+ int ret = -1;
+ int fd = -1;
+ size_t idx = 0;
+ size_t pos = 0;
+ size_t inc = 0;
+ char tmpl[] = "/tmp/glfs-bt-XXXXXX";
+
+ frames = backtrace(array, GF_BACKTRACE_FRAME_COUNT);
+ if (!frames)
+ return -1;
- dup_addr = gf_strdup (address);
- if (!dup_addr) {
- ret = 0;
- goto out;
- }
+ /* coverity[secure_temp] mkstemp uses 0600 as the mode and is safe */
+ fd = mkstemp(tmpl);
+ if (fd == -1)
+ return -1;
- /* gen-name */
- temp_str = strtok_r (dup_addr, ".", &save_ptr);
- do {
- str_len = strlen (temp_str);
-
- if (!isalnum (temp_str[0]) ||
- !isalnum (temp_str[str_len-1])) {
- ret = 0;
- goto out;
- }
- for (i = 1; i < str_len; i++) {
- if (!isalnum (temp_str[i]) && (temp_str[i] != '-')) {
- ret = 0;
- goto out;
- }
- }
- } while ((temp_str = strtok_r (NULL, ".", &save_ptr)));
+ /* Calling unlink so that when the file is closed or program
+ * terminates the temporary file is deleted.
+ */
+ ret = sys_unlink(tmpl);
+ if (ret < 0) {
+ gf_smsg(THIS->name, GF_LOG_INFO, 0, LG_MSG_FILE_DELETE_FAILED,
+ "temporary_file=%s", tmpl, NULL);
+ }
+
+ /*The most recent two frames are the calling function and
+ * gf_backtrace_save, which we can infer.*/
+
+ backtrace_symbols_fd(&array[2], frames - 2, fd);
+
+ fp = fdopen(fd, "r");
+ if (!fp) {
+ sys_close(fd);
+ goto out;
+ }
+
+ ret = fseek(fp, 0L, SEEK_SET);
+ if (ret)
+ goto out;
+
+ pos = 0;
+ for (idx = 0; idx < frames - 2; idx++) {
+ ret = fscanf(fp, "%1023s", callingfn[idx]);
+ if (ret == EOF)
+ break;
+ inc = gf_backtrace_append(buf, pos, callingfn[idx]);
+ if (inc == -1)
+ break;
+ pos += inc;
+ }
+ gf_backtrace_end(buf, idx);
out:
- if (dup_addr)
- GF_FREE (dup_addr);
- return ret;
+ if (fp)
+ fclose(fp);
+
+ return (idx > 0) ? 0 : -1;
}
-/* Matches all ipv4 address, if wildcard_acc is true '*' wildcard pattern for*
- subnets is considerd as valid strings as well */
-char
-valid_ipv4_address (char *address, int length, gf_boolean_t wildcard_acc)
+/* Optionally takes @buf to save backtrace. If @buf is NULL, uses the
+ * pre-allocated ctx->btbuf to avoid allocating memory while printing
+ * backtrace.
+ * TODO: This API doesn't provide flexibility in terms of no. of frames
+ * of the backtrace is being saved in the buffer. Deferring fixing it
+ * when there is a real-use for that.*/
+
+char *
+gf_backtrace_save(char *buf)
{
- int octets = 0;
- int value = 0;
- char *tmp = NULL, *ptr = NULL, *prev = NULL, *endptr = NULL;
- char ret = 1;
- int is_wildcard = 0;
+ char *bt = NULL;
- tmp = gf_strdup (address);
+ if (!buf) {
+ bt = THIS->ctx->btbuf;
+ GF_ASSERT(bt);
- /* To prevent cases where last character is '.' */
- if (!isdigit (tmp[length - 1]) && (tmp[length - 1] != '*')) {
- ret = 0;
- goto out;
+ } else {
+ bt = buf;
+ }
+
+ if ((0 == gf_backtrace_fillframes(bt)))
+ return bt;
+
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_BACKTRACE_SAVE_FAILED, NULL);
+ return NULL;
+}
+
+gf_loglevel_t
+fop_log_level(glusterfs_fop_t fop, int op_errno)
+{
+ /* if gfid doesn't exist ESTALE comes */
+ if (op_errno == ENOENT || op_errno == ESTALE)
+ return GF_LOG_DEBUG;
+
+ if ((fop == GF_FOP_ENTRYLK) || (fop == GF_FOP_FENTRYLK) ||
+ (fop == GF_FOP_FINODELK) || (fop == GF_FOP_INODELK) ||
+ (fop == GF_FOP_LK)) {
+ /*
+ * if non-blocking lock fails EAGAIN comes
+ * if locks xlator is not loaded ENOSYS comes
+ */
+ if (op_errno == EAGAIN || op_errno == ENOSYS)
+ return GF_LOG_DEBUG;
+ }
+
+ if ((fop == GF_FOP_GETXATTR) || (fop == GF_FOP_FGETXATTR)) {
+ if (op_errno == ENOTSUP || op_errno == ENODATA)
+ return GF_LOG_DEBUG;
+ }
+
+ if ((fop == GF_FOP_SETXATTR) || (fop == GF_FOP_FSETXATTR) ||
+ (fop == GF_FOP_REMOVEXATTR) || (fop == GF_FOP_FREMOVEXATTR)) {
+ if (op_errno == ENOTSUP)
+ return GF_LOG_DEBUG;
+ }
+
+ if (fop == GF_FOP_MKNOD || fop == GF_FOP_MKDIR)
+ if (op_errno == EEXIST)
+ return GF_LOG_DEBUG;
+
+ if (fop == GF_FOP_SEEK) {
+#ifdef HAVE_SEEK_HOLE
+ if (op_errno == ENXIO) {
+ return GF_LOG_DEBUG;
}
+#else
+ return GF_LOG_DEBUG;
+#endif
+ }
- prev = tmp;
- prev = strtok_r (tmp, ".", &ptr);
-
- while (prev != NULL) {
- octets++;
- if (wildcard_acc && !strcmp (prev, "*")) {
- is_wildcard = 1;
- } else {
- value = strtol (prev, &endptr, 10);
- if ((value > 255) || (value < 0) ||
- (endptr != NULL && *endptr != '\0')) {
- ret = 0;
- goto out;
- }
- }
- prev = strtok_r (NULL, ".", &ptr);
+ return GF_LOG_ERROR;
+}
+
+/* This function will build absolute path of file/directory from the
+ * current location and relative path given from the current location
+ * For example consider our current path is /a/b/c/ and relative path
+ * from current location is ./../x/y/z .After parsing through this
+ * function the absolute path becomes /a/b/x/y/z/.
+ *
+ * The function gives a pointer to absolute path if it is successful
+ * and also returns zero.
+ * Otherwise function gives NULL pointer with returning an err value.
+ *
+ * So the user need to free memory allocated for path.
+ *
+ */
+
+int32_t
+gf_build_absolute_path(char *current_path, char *relative_path, char **path)
+{
+ char *absolute_path = NULL;
+ char *token = NULL;
+ char *component = NULL;
+ char *saveptr = NULL;
+ char *end = NULL;
+ int ret = 0;
+ size_t relativepath_len = 0;
+ size_t currentpath_len = 0;
+ size_t max_absolutepath_len = 0;
+
+ GF_ASSERT(current_path);
+ GF_ASSERT(relative_path);
+ GF_ASSERT(path);
+
+ if (!path || !current_path || !relative_path) {
+ ret = -EFAULT;
+ goto err;
+ }
+ /* Check for current and relative path
+ * current path should be absolute one and start from '/'
+ * relative path should not start from '/'
+ */
+ currentpath_len = strlen(current_path);
+ if (current_path[0] != '/' || (currentpath_len > PATH_MAX)) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, 0, LG_MSG_WRONG_VALUE,
+ "current-path=%s", current_path, NULL);
+ ret = -EINVAL;
+ goto err;
+ }
+
+ relativepath_len = strlen(relative_path);
+ if (relative_path[0] == '/' || (relativepath_len > PATH_MAX)) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, 0, LG_MSG_WRONG_VALUE,
+ "relative-path=%s", relative_path, NULL);
+ ret = -EINVAL;
+ goto err;
+ }
+
+ /* It is maximum possible value for absolute path */
+ max_absolutepath_len = currentpath_len + relativepath_len + 2;
+
+ absolute_path = GF_CALLOC(1, max_absolutepath_len, gf_common_mt_char);
+ if (!absolute_path) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ absolute_path[0] = '\0';
+
+ /* If current path is root i.e contains only "/", we do not
+ * need to copy it
+ */
+ if (strcmp(current_path, "/") != 0) {
+ strcpy(absolute_path, current_path);
+
+ /* We trim '/' at the end for easier string manipulation */
+ gf_path_strip_trailing_slashes(absolute_path);
+ }
+
+ /* Used to spilt relative path based on '/' */
+ component = gf_strdup(relative_path);
+ if (!component) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ /* In the relative path, we want to consider ".." and "."
+ * if token is ".." , we just need to reduce one level hierarchy
+ * if token is "." , we just ignore it
+ * if token is NULL , end of relative path
+ * if absolute path becomes '\0' and still "..", then it is a bad
+ * relative path, it points to out of boundary area and stop
+ * building the absolute path
+ * All other cases we just concatenate token to the absolute path
+ */
+ for (token = strtok_r(component, "/", &saveptr),
+ end = strchr(absolute_path, '\0');
+ token; token = strtok_r(NULL, "/", &saveptr)) {
+ if (strcmp(token, ".") == 0)
+ continue;
+
+ else if (strcmp(token, "..") == 0) {
+ if (absolute_path[0] == '\0') {
+ ret = -EACCES;
+ goto err;
+ }
+
+ end = strrchr(absolute_path, '/');
+ *end = '\0';
+ } else {
+ ret = snprintf(end, max_absolutepath_len - strlen(absolute_path),
+ "/%s", token);
+ end = strchr(absolute_path, '\0');
}
+ }
- if ((octets > 4) || (octets < 4 && !is_wildcard)) {
- ret = 0;
+ if (strlen(absolute_path) > PATH_MAX) {
+ ret = -EINVAL;
+ goto err;
+ }
+ *path = gf_strdup(absolute_path);
+
+err:
+ if (component)
+ GF_FREE(component);
+ if (absolute_path)
+ GF_FREE(absolute_path);
+ return ret;
+}
+
+/* This is an utility function which will recursively delete
+ * a folder and its contents.
+ *
+ * @param delete_path folder to be deleted.
+ *
+ * @return 0 on success and -1 on failure.
+ */
+int
+recursive_rmdir(const char *delete_path)
+{
+ int ret = -1;
+ char path[PATH_MAX] = {
+ 0,
+ };
+ struct stat st = {
+ 0,
+ };
+ DIR *dir = NULL;
+ struct dirent *entry = NULL;
+ struct dirent scratch[2] = {
+ {
+ 0,
+ },
+ };
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_VALIDATE_OR_GOTO(this->name, delete_path, out);
+
+ dir = sys_opendir(delete_path);
+ if (!dir) {
+ gf_msg_debug(this->name, 0,
+ "Failed to open directory %s. "
+ "Reason : %s",
+ delete_path, strerror(errno));
+ ret = 0;
+ goto out;
+ }
+
+ while ((entry = sys_readdir(dir, scratch))) {
+ if (gf_irrelevant_entry(entry))
+ continue;
+ snprintf(path, PATH_MAX, "%s/%s", delete_path, entry->d_name);
+ ret = sys_lstat(path, &st);
+ if (ret == -1) {
+ gf_msg_debug(this->name, 0,
+ "Failed to stat entry %s :"
+ " %s",
+ path, strerror(errno));
+ (void)sys_closedir(dir);
+ goto out;
}
+ if (S_ISDIR(st.st_mode))
+ ret = recursive_rmdir(path);
+ else
+ ret = sys_unlink(path);
+
+ if (ret) {
+ gf_msg_debug(this->name, 0,
+ " Failed to remove %s. "
+ "Reason : %s",
+ path, strerror(errno));
+ }
+
+ gf_msg_debug(this->name, 0, "%s %s",
+ ret ? "Failed to remove" : "Removed", entry->d_name);
+ }
+
+ ret = sys_closedir(dir);
+ if (ret) {
+ gf_msg_debug(this->name, 0,
+ "Failed to close dir %s. Reason :"
+ " %s",
+ delete_path, strerror(errno));
+ }
+
+ ret = sys_rmdir(delete_path);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "Failed to rmdir: %s,err: %s", delete_path,
+ strerror(errno));
+ }
+
out:
- GF_FREE (tmp);
- return ret;
+ return ret;
+}
+/*
+ * Input: Array of strings 'array' terminating in NULL
+ * string 'elem' to be searched in the array
+ *
+ * Output: Index of the element in the array if found, '-1' otherwise
+ */
+int
+gf_get_index_by_elem(char **array, char *elem)
+{
+ int i = 0;
+
+ for (i = 0; array[i]; i++) {
+ if (strcmp(elem, array[i]) == 0)
+ return i;
+ }
+
+ return -1;
}
-char
-valid_ipv6_address (char *address, int length, gf_boolean_t wildcard_acc)
+static int
+get_pathinfo_host(char *pathinfo, char *hostname, size_t size)
{
- int hex_numbers = 0;
- int value = 0;
- int i = 0;
- char *tmp = NULL, *ptr = NULL, *prev = NULL, *endptr = NULL;
- char ret = 1;
- int is_wildcard = 0;
- int is_compressed = 0;
+ char *start = NULL;
+ char *end = NULL;
+ int ret = -1;
+ int i = 0;
+
+ if (!pathinfo)
+ goto out;
+
+ start = strchr(pathinfo, ':');
+ if (!start)
+ goto out;
+
+ end = strrchr(pathinfo, ':');
+ if (start == end)
+ goto out;
+
+ memset(hostname, 0, size);
+ i = 0;
+ while (++start != end)
+ hostname[i++] = *start;
+ ret = 0;
+out:
+ return ret;
+}
- tmp = gf_strdup (address);
+/*Note: 'pathinfo' should be gathered only from one brick*/
+int
+glusterfs_is_local_pathinfo(char *pathinfo, gf_boolean_t *is_local)
+{
+ int ret = 0;
+ char pathinfohost[1024] = {0};
+ char localhost[1024] = {0};
+
+ *is_local = _gf_false;
+ ret = get_pathinfo_host(pathinfo, pathinfohost, sizeof(pathinfohost));
+ if (ret)
+ goto out;
+
+ ret = gethostname(localhost, sizeof(localhost));
+ if (ret)
+ goto out;
- /* Check for compressed form */
- if (tmp[length - 1] == ':') {
+ if (!strcmp(localhost, pathinfohost))
+ *is_local = _gf_true;
+out:
+ return ret;
+}
+
+ssize_t
+gf_nread(int fd, void *buf, size_t count)
+{
+ ssize_t ret = 0;
+ ssize_t read_bytes = 0;
+
+ for (read_bytes = 0; read_bytes < count; read_bytes += ret) {
+ ret = sys_read(fd, buf + read_bytes, count - read_bytes);
+ if (ret == 0) {
+ break;
+ } else if (ret < 0) {
+ if (errno == EINTR)
ret = 0;
+ else
goto out;
}
- for (i = 0; i < (length - 1) ; i++) {
- if (tmp[i] == ':' && tmp[i + 1] == ':') {
- if (is_compressed == 0)
- is_compressed = 1;
- else {
- ret = 0;
- goto out;
- }
- }
- }
+ }
- prev = strtok_r (tmp, ":", &ptr);
-
- while (prev != NULL) {
- hex_numbers++;
- if (wildcard_acc && !strcmp (prev, "*")) {
- is_wildcard = 1;
- } else {
- value = strtol (prev, &endptr, 16);
- if ((value > 0xffff) || (value < 0)
- || (endptr != NULL && *endptr != '\0')) {
- ret = 0;
- goto out;
- }
- }
- prev = strtok_r (NULL, ":", &ptr);
- }
+ ret = read_bytes;
+out:
+ return ret;
+}
- if ((hex_numbers > 8) || (hex_numbers < 8 && !is_wildcard
- && !is_compressed)) {
+ssize_t
+gf_nwrite(int fd, const void *buf, size_t count)
+{
+ ssize_t ret = 0;
+ ssize_t written = 0;
+
+ for (written = 0; written != count; written += ret) {
+ ret = sys_write(fd, buf + written, count - written);
+ if (ret < 0) {
+ if (errno == EINTR)
ret = 0;
+ else
+ goto out;
}
+ }
+ ret = written;
out:
- GF_FREE (tmp);
- return ret;
+ return ret;
}
-char
-valid_internet_address (char *address, gf_boolean_t wildcard_acc)
+void
+gf_free_mig_locks(lock_migration_info_t *locks)
{
- char ret = 0;
- int length = 0;
+ lock_migration_info_t *current = NULL;
+ lock_migration_info_t *temp = NULL;
- if (address == NULL) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "argument invalid");
- goto out;
- }
+ if (!locks)
+ return;
- length = strlen (address);
- if (length == 0)
- goto out;
+ if (list_empty(&locks->list))
+ return;
- if (valid_ipv4_address (address, length, wildcard_acc)
- || valid_ipv6_address (address, length, wildcard_acc)
- || valid_host_name (address, length))
- ret = 1;
+ list_for_each_entry_safe(current, temp, &locks->list, list)
+ {
+ list_del_init(&current->list);
+ GF_FREE(current->client_uid);
+ GF_FREE(current);
+ }
+}
+void
+_mask_cancellation(void)
+{
+ (void)pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
+}
+
+void
+_unmask_cancellation(void)
+{
+ (void)pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
+}
+
+/* This is a wrapper function to add a pointer to a list,
+ * which doesn't contain list member
+ */
+struct list_node *
+_list_node_add(void *ptr, struct list_head *list,
+ int (*compare)(struct list_head *, struct list_head *))
+{
+ struct list_node *node = NULL;
+
+ if (ptr == NULL || list == NULL)
+ goto out;
+
+ node = GF_CALLOC(1, sizeof(struct list_node), gf_common_list_node);
+
+ if (node == NULL)
+ goto out;
+
+ node->ptr = ptr;
+ if (compare)
+ list_add_order(&node->list, list, compare);
+ else
+ list_add_tail(&node->list, list);
out:
- return ret;
+ return node;
}
-/*Thread safe conversion function*/
-char *
-uuid_utoa (uuid_t uuid)
+struct list_node *
+list_node_add(void *ptr, struct list_head *list)
{
- char *uuid_buffer = glusterfs_uuid_buf_get();
- uuid_unparse (uuid, uuid_buffer);
- return uuid_buffer;
+ return _list_node_add(ptr, list, NULL);
}
-/*Re-entrant conversion function*/
-char *
-uuid_utoa_r (uuid_t uuid, char *dst)
+struct list_node *
+list_node_add_order(void *ptr, struct list_head *list,
+ int (*compare)(struct list_head *, struct list_head *))
{
- if(!dst)
- return NULL;
- uuid_unparse (uuid, dst);
- return dst;
+ return _list_node_add(ptr, list, compare);
}
-/*Thread safe conversion function*/
-char *
-lkowner_utoa (gf_lkowner_t *lkowner)
+void
+list_node_del(struct list_node *node)
{
- char *lkowner_buffer = glusterfs_lkowner_buf_get();
- lkowner_unparse (lkowner, lkowner_buffer, GF_LKOWNER_BUF_SIZE);
- return lkowner_buffer;
+ if (node == NULL)
+ return;
+
+ list_del_init(&node->list);
+ GF_FREE(node);
}
-/*Re-entrant conversion function*/
-char *
-lkowner_utoa_r (gf_lkowner_t *lkowner, char *dst, int len)
+const char *
+fop_enum_to_pri_string(glusterfs_fop_t fop)
{
- if(!dst)
- return NULL;
- lkowner_unparse (lkowner, dst, len);
- return dst;
+ switch (fop) {
+ case GF_FOP_OPEN:
+ case GF_FOP_STAT:
+ case GF_FOP_FSTAT:
+ case GF_FOP_LOOKUP:
+ case GF_FOP_ACCESS:
+ case GF_FOP_READLINK:
+ case GF_FOP_OPENDIR:
+ case GF_FOP_STATFS:
+ case GF_FOP_READDIR:
+ case GF_FOP_READDIRP:
+ case GF_FOP_GETACTIVELK:
+ case GF_FOP_SETACTIVELK:
+ case GF_FOP_ICREATE:
+ case GF_FOP_NAMELINK:
+ return "HIGH";
+
+ case GF_FOP_CREATE:
+ case GF_FOP_FLUSH:
+ case GF_FOP_LK:
+ case GF_FOP_INODELK:
+ case GF_FOP_FINODELK:
+ case GF_FOP_ENTRYLK:
+ case GF_FOP_FENTRYLK:
+ case GF_FOP_UNLINK:
+ case GF_FOP_SETATTR:
+ case GF_FOP_FSETATTR:
+ case GF_FOP_MKNOD:
+ case GF_FOP_MKDIR:
+ case GF_FOP_RMDIR:
+ case GF_FOP_SYMLINK:
+ case GF_FOP_RENAME:
+ case GF_FOP_LINK:
+ case GF_FOP_SETXATTR:
+ case GF_FOP_GETXATTR:
+ case GF_FOP_FGETXATTR:
+ case GF_FOP_FSETXATTR:
+ case GF_FOP_REMOVEXATTR:
+ case GF_FOP_FREMOVEXATTR:
+ case GF_FOP_IPC:
+ case GF_FOP_LEASE:
+ return "NORMAL";
+
+ case GF_FOP_READ:
+ case GF_FOP_WRITE:
+ case GF_FOP_FSYNC:
+ case GF_FOP_TRUNCATE:
+ case GF_FOP_FTRUNCATE:
+ case GF_FOP_FSYNCDIR:
+ case GF_FOP_XATTROP:
+ case GF_FOP_FXATTROP:
+ case GF_FOP_RCHECKSUM:
+ case GF_FOP_ZEROFILL:
+ case GF_FOP_FALLOCATE:
+ case GF_FOP_SEEK:
+ return "LOW";
+
+ case GF_FOP_NULL:
+ case GF_FOP_FORGET:
+ case GF_FOP_RELEASE:
+ case GF_FOP_RELEASEDIR:
+ case GF_FOP_GETSPEC:
+ case GF_FOP_MAXVALUE:
+ case GF_FOP_DISCARD:
+ return "LEAST";
+ default:
+ return "UNKNOWN";
+ }
}
-void* gf_array_elem (void *a, int index, size_t elem_size)
+const char *
+gf_inode_type_to_str(ia_type_t type)
{
- uint8_t* ptr = a;
- return (void*)(ptr + index * elem_size);
+ static const char *const str_ia_type[] = {
+ "UNKNOWN", "REGULAR FILE", "DIRECTORY", "LINK",
+ "BLOCK DEVICE", "CHARACTER DEVICE", "PIPE", "SOCKET"};
+ return str_ia_type[type];
}
-void
-gf_elem_swap (void *x, void *y, size_t l) {
- uint8_t *a = x, *b = y, c;
- while(l--) {
- c = *a;
- *a++ = *b;
- *b++ = c;
- }
+gf_boolean_t
+gf_is_zero_filled_stat(struct iatt *buf)
+{
+ if (!buf)
+ return 1;
+
+ /* Do not use st_dev because it is transformed to store the xlator id
+ * in place of the device number. Do not use st_ino because by this time
+ * we've already mapped the root ino to 1 so it is not guaranteed to be
+ * 0.
+ */
+ if ((buf->ia_nlink == 0) && (buf->ia_ctime == 0))
+ return 1;
+
+ return 0;
}
void
-gf_array_insertionsort (void *A, int l, int r, size_t elem_size,
- gf_cmp cmp)
-{
- int i = l;
- int N = r+1;
- void *Temp = NULL;
- int j = 0;
-
- for(i = l; i < N; i++) {
- Temp = gf_array_elem (A, i, elem_size);
- j = i - 1;
- while((cmp (Temp, gf_array_elem (A, j, elem_size))
- < 0) && j>=0) {
- gf_elem_swap (Temp, gf_array_elem (A, j, elem_size),
- elem_size);
- Temp = gf_array_elem (A, j, elem_size);
- j = j-1;
- }
- }
+gf_zero_fill_stat(struct iatt *buf)
+{
+ buf->ia_nlink = 0;
+ buf->ia_ctime = 0;
}
-int
-gf_is_str_int (const char *value)
+gf_boolean_t
+gf_is_valid_xattr_namespace(char *key)
{
- int flag = 0;
- char *str = NULL;
- char *fptr = NULL;
+ static char *xattr_namespaces[] = {"trusted.", "system.", "user.",
+ "security.", NULL};
+ int i = 0;
- GF_VALIDATE_OR_GOTO (THIS->name, value, out);
+ for (i = 0; xattr_namespaces[i]; i++) {
+ if (strncmp(key, xattr_namespaces[i], strlen(xattr_namespaces[i])) == 0)
+ return _gf_true;
+ }
- str = gf_strdup (value);
- if (!str)
- goto out;
+ return _gf_false;
+}
- fptr = str;
+ino_t
+gfid_to_ino(uuid_t gfid)
+{
+ ino_t ino = 0;
+ int32_t i;
- while (*str) {
- if (!isdigit(*str)) {
- flag = 1;
- goto out;
- }
- str++;
- }
+ for (i = 8; i < 16; i++) {
+ ino <<= 8;
+ ino += (uint8_t)gfid[i];
+ }
-out:
- if (fptr)
- GF_FREE (fptr);
+ return ino;
+}
- return flag;
+int
+gf_bits_count(uint64_t n)
+{
+ int val = 0;
+#if defined(__GNUC__) || defined(__clang__)
+ val = __builtin_popcountll(n);
+#else
+ n -= (n >> 1) & 0x5555555555555555ULL;
+ n = ((n >> 2) & 0x3333333333333333ULL) + (n & 0x3333333333333333ULL);
+ n = (n + (n >> 4)) & 0x0F0F0F0F0F0F0F0FULL;
+ n += n >> 8;
+ n += n >> 16;
+ n += n >> 32;
+ val = n & 0xFF;
+#endif
+ return val;
}
-/*
- * rounds up nr to power of two. If nr is already a power of two, just returns
- * nr
- */
-inline int32_t
-gf_roundup_power_of_two (uint32_t nr)
+int
+gf_bits_index(uint64_t n)
{
- uint32_t result = 1;
+#if defined(__GNUC__) || defined(__clang__)
+ return __builtin_ffsll(n) - 1;
+#else
+ return ffsll(n) - 1;
+#endif
+}
- if (nr < 0) {
- gf_log ("common-utils", GF_LOG_WARNING,
- "negative number passed");
- result = -1;
- goto out;
- }
+const char *
+gf_fop_string(glusterfs_fop_t fop)
+{
+ if ((fop > GF_FOP_NULL) && (fop < GF_FOP_MAXVALUE))
+ return gf_fop_list[fop];
+ return "INVALID";
+}
- while (result < nr)
- result *= 2;
+int
+gf_fop_int(char *fop)
+{
+ int i = 0;
-out:
- return result;
+ for (i = GF_FOP_NULL + 1; i < GF_FOP_MAXVALUE; i++) {
+ if (strcasecmp(fop, gf_fop_list[i]) == 0)
+ return i;
+ }
+ return -1;
}
-/*
- * rounds up nr to next power of two. If nr is already a power of two, next
- * power of two is returned.
- */
+int
+close_fds_except(int *fdv, size_t count)
+{
+ int i = 0;
+ size_t j = 0;
+ gf_boolean_t should_close = _gf_true;
+#ifdef GF_LINUX_HOST_OS
+ DIR *d = NULL;
+ struct dirent *de = NULL;
+ struct dirent scratch[2] = {
+ {
+ 0,
+ },
+ };
+ char *e = NULL;
-/*
- * rounds up nr to next power of two. If nr is already a power of two, next
- * power of two is returned.
- */
+ d = sys_opendir("/proc/self/fd");
+ if (!d)
+ return -1;
-inline int32_t
-gf_roundup_next_power_of_two (uint32_t nr)
-{
- int32_t result = 1;
+ for (;;) {
+ should_close = _gf_true;
- if (nr < 0) {
- gf_log ("common-utils", GF_LOG_WARNING,
- "negative number passed");
- result = -1;
- goto out;
+ errno = 0;
+ de = sys_readdir(d, scratch);
+ if (!de || errno != 0)
+ break;
+ i = strtoul(de->d_name, &e, 10);
+ if (*e != '\0' || i == dirfd(d))
+ continue;
+
+ for (j = 0; j < count; j++) {
+ if (i == fdv[j]) {
+ should_close = _gf_false;
+ break;
+ }
}
+ if (should_close)
+ sys_close(i);
+ }
+ sys_closedir(d);
+#else /* !GF_LINUX_HOST_OS */
+ struct rlimit rl;
+ int ret = -1;
+
+ ret = getrlimit(RLIMIT_NOFILE, &rl);
+ if (ret)
+ return ret;
- while (result <= nr)
- result *= 2;
-
-out:
- return result;
+ for (i = 0; i < rl.rlim_cur; i++) {
+ should_close = _gf_true;
+ for (j = 0; j < count; j++) {
+ if (i == fdv[j]) {
+ should_close = _gf_false;
+ break;
+ }
+ }
+ if (should_close)
+ sys_close(i);
+ }
+#endif /* !GF_LINUX_HOST_OS */
+ return 0;
}
+/**
+ * gf_getgrouplist - get list of groups to which a user belongs
+ *
+ * A convenience wrapper for getgrouplist(3).
+ *
+ * @param user - same as in getgrouplist(3)
+ * @param group - same as in getgrouplist(3)
+ * @param groups - pointer to a gid_t pointer
+ *
+ * gf_getgrouplist allocates a gid_t buffer which is big enough to
+ * hold the list of auxiliary group ids for user, up to the GF_MAX_AUX_GROUPS
+ * threshold. Upon successful invocation groups will be pointed to that buffer.
+ *
+ * @return success: the number of auxiliary group ids retrieved
+ * failure: -1
+ */
int
-validate_brick_name (char *brick)
+gf_getgrouplist(const char *user, gid_t group, gid_t **groups)
{
- char *delimiter = NULL;
- int ret = 0;
- delimiter = strrchr (brick, ':');
- if (!delimiter || delimiter == brick
- || *(delimiter+1) != '/')
- ret = -1;
+ int ret = -1;
+ int ngroups = SMALL_GROUP_COUNT;
- return ret;
+ *groups = GF_CALLOC(sizeof(gid_t), ngroups, gf_common_mt_groups_t);
+ if (!*groups)
+ return -1;
+
+ /*
+ * We are running getgrouplist() in a loop until we succeed (or hit
+ * certain exit conditions, see the comments below). This is because
+ * the indicated number of auxiliary groups that we obtain in case of
+ * the failure of the first invocation is not guaranteed to keep its
+ * validity upon the next invocation with a gid buffer of that size.
+ */
+ for (;;) {
+ int ngroups_old = ngroups;
+ ret = getgrouplist(user, group, *groups, &ngroups);
+ if (ret != -1)
+ break;
+
+ if (ngroups >= GF_MAX_AUX_GROUPS) {
+ /*
+ * This should not happen as GF_MAX_AUX_GROUPS is set
+ * to the max value of number of supported auxiliary
+ * groups across all platforms supported by GlusterFS.
+ * However, if it still happened some way, we wouldn't
+ * care about the incompleteness of the result, we'd
+ * just go on with what we got.
+ */
+ return GF_MAX_AUX_GROUPS;
+ } else if (ngroups <= ngroups_old) {
+ /*
+ * There is an edge case that getgrouplist() fails but
+ * ngroups remains the same. This is actually not
+ * specified in getgrouplist(3), but implementations
+ * can do this upon internal failure[1]. To avoid
+ * falling into an infinite loop when this happens, we
+ * break the loop if the getgrouplist call failed
+ * without an increase in the indicated group number.
+ *
+ * [1]
+ * https://sourceware.org/git/?p=glibc.git;a=blob;f=grp/initgroups.c;hb=refs/heads/release/2.25/master#l168
+ */
+ GF_FREE(*groups);
+ return -1;
+ }
+
+ *groups = GF_REALLOC(*groups, ngroups * sizeof(gid_t));
+ if (!*groups)
+ return -1;
+ }
+ return ret;
}
-char *
-get_host_name (char *word, char **host)
+int
+glusterfs_compute_sha256(const unsigned char *content, size_t size,
+ char *sha256_hash)
{
- char *delimiter = NULL;
- delimiter = strrchr (word, ':');
- if (delimiter)
- *delimiter = '\0';
- else
- return NULL;
- *host = word;
- return *host;
+ SHA256_CTX sha256;
+
+ SHA256_Init(&sha256);
+ SHA256_Update(&sha256, (const unsigned char *)(content), size);
+ SHA256_Final((unsigned char *)sha256_hash, &sha256);
+
+ return 0;
}
+/* * Safe wrapper function for strncpy.
+ * This wrapper makes sure that when there is no null byte among the first n in
+ * source srting for strncpy function call, the string placed in dest will be
+ * null-terminated.
+ */
char *
-get_path_name (char *word, char **path)
+gf_strncpy(char *dest, const char *src, const size_t dest_size)
{
- char *delimiter = NULL;
- delimiter = strchr (word, '/');
- if (!delimiter)
- return NULL;
- *path = delimiter;
- return *path;
+ strncpy(dest, src, dest_size - 1);
+ dest[dest_size - 1] = '\0';
+ return dest;
}
-void
-gf_path_strip_trailing_slashes (char *path)
+int
+gf_replace_old_iatt_in_dict(dict_t *xdata)
{
- int i = 0;
- int len = 0;
+ int ret;
+ struct old_iatt *o_iatt; /* old iatt structure */
+ struct iatt *c_iatt; /* current iatt */
- if (!path)
- return;
+ if (!xdata) {
+ return 0;
+ }
- len = strlen (path);
- for (i = len - 1; i > 0; i--) {
- if (path[i] != '/')
- break;
- }
+ ret = dict_get_bin(xdata, DHT_IATT_IN_XDATA_KEY, (void **)&c_iatt);
+ if (ret < 0) {
+ return 0;
+ }
- if (i < (len -1))
- path [i+1] = '\0';
+ o_iatt = GF_CALLOC(1, sizeof(struct old_iatt), gf_common_mt_char);
+ if (!o_iatt) {
+ return -1;
+ }
- return;
+ oldiatt_from_iatt(o_iatt, c_iatt);
+
+ ret = dict_set_bin(xdata, DHT_IATT_IN_XDATA_KEY, o_iatt,
+ sizeof(struct old_iatt));
+ if (ret) {
+ GF_FREE(o_iatt);
+ }
+
+ return ret;
}
-uint64_t
-get_mem_size ()
+int
+gf_replace_new_iatt_in_dict(dict_t *xdata)
{
- uint64_t memsize = -1;
+ int ret;
+ struct old_iatt *o_iatt; /* old iatt structure */
+ struct iatt *c_iatt; /* new iatt */
-#if defined GF_LINUX_HOST_OS || defined GF_SOLARIS_HOST_OS
+ if (!xdata) {
+ return 0;
+ }
- uint64_t page_size = 0;
- uint64_t num_pages = 0;
+ ret = dict_get_bin(xdata, DHT_IATT_IN_XDATA_KEY, (void **)&o_iatt);
+ if (ret < 0) {
+ return 0;
+ }
+
+ c_iatt = GF_CALLOC(1, sizeof(struct iatt), gf_common_mt_char);
+ if (!c_iatt) {
+ return -1;
+ }
- page_size = sysconf (_SC_PAGESIZE);
- num_pages = sysconf (_SC_PHYS_PAGES);
+ iatt_from_oldiatt(c_iatt, o_iatt);
- memsize = page_size * num_pages;
-#endif
+ ret = dict_set_bin(xdata, DHT_IATT_IN_XDATA_KEY, c_iatt,
+ sizeof(struct iatt));
+ if (ret) {
+ GF_FREE(c_iatt);
+ }
-#if defined GF_BSD_HOST_OS || defined GF_DARWIN_HOST_OS
+ return ret;
+}
- size_t len = sizeof(memsize);
- int name [] = { CTL_HW, HW_PHYSMEM };
+xlator_cmdline_option_t *
+find_xlator_option_in_cmd_args_t(const char *option_name, cmd_args_t *args)
+{
+ xlator_cmdline_option_t *pos = NULL;
+ xlator_cmdline_option_t *tmp = NULL;
+
+ list_for_each_entry_safe(pos, tmp, &args->xlator_options, cmd_args)
+ {
+ if (strcmp(pos->key, option_name) == 0)
+ return pos;
+ }
+ return NULL;
+}
- sysctl (name, 2, &memsize, &len, NULL, 0);
-#endif
- return memsize;
+int
+gf_d_type_from_ia_type(ia_type_t type)
+{
+ switch (type) {
+ case IA_IFDIR:
+ return DT_DIR;
+ case IA_IFCHR:
+ return DT_CHR;
+ case IA_IFBLK:
+ return DT_BLK;
+ case IA_IFIFO:
+ return DT_FIFO;
+ case IA_IFLNK:
+ return DT_LNK;
+ case IA_IFREG:
+ return DT_REG;
+ case IA_IFSOCK:
+ return DT_SOCK;
+ default:
+ return DT_UNKNOWN;
+ }
}
-/* Strips all whitespace characters in a string and returns length of new string
- * on success
- */
int
-gf_strip_whitespace (char *str, int len)
+gf_nanosleep(uint64_t nsec)
{
- int i = 0;
- int new_len = 0;
- char *new_str = NULL;
+ struct timespec req;
+ struct timespec rem;
+ int ret = -1;
- GF_ASSERT (str);
+ req.tv_sec = nsec / GF_SEC_IN_NS;
+ req.tv_nsec = nsec % GF_SEC_IN_NS;
- new_str = GF_CALLOC (1, len + 1, gf_common_mt_char);
- if (new_str == NULL)
- return -1;
+ do {
+ ret = nanosleep(&req, &rem);
+ req = rem;
+ } while (ret == -1 && errno == EINTR);
- for (i = 0; i < len; i++) {
- if (!isspace (str[i]))
- new_str[new_len++] = str[i];
- }
- new_str[new_len] = '\0';
+ return ret;
+}
- if (new_len != len) {
- memset (str, 0, len);
- strncpy (str, new_str, new_len);
- }
+int
+gf_syncfs(int fd)
+{
+ int ret = 0;
+#if defined(HAVE_SYNCFS)
+ /* Linux with glibc recent enough. */
+ ret = syncfs(fd);
+#elif defined(HAVE_SYNCFS_SYS)
+ /* Linux with no library function. */
+ ret = syscall(SYS_syncfs, fd);
+#else
+ /* Fallback to generic UNIX stuff. */
+ sync();
+#endif
+ return ret;
+}
- GF_FREE (new_str);
- return new_len;
+char **
+get_xattrs_to_heal()
+{
+ return xattrs_to_heal;
}
diff --git a/libglusterfs/src/common-utils.h b/libglusterfs/src/common-utils.h
deleted file mode 100644
index 0c6d7ef7a5d..00000000000
--- a/libglusterfs/src/common-utils.h
+++ /dev/null
@@ -1,487 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _COMMON_UTILS_H
-#define _COMMON_UTILS_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <stdint.h>
-#include <sys/uio.h>
-#include <netdb.h>
-#include <stdlib.h>
-#include <string.h>
-#include <assert.h>
-#include <pthread.h>
-#ifndef GF_BSD_HOST_OS
-#include <alloca.h>
-#endif
-
-void trap (void);
-
-#define GF_UNIVERSAL_ANSWER 42 /* :O */
-
-/* To solve type punned error */
-#define VOID(ptr) ((void **) ((void *) ptr))
-
-#include "logging.h"
-#include "glusterfs.h"
-#include "locking.h"
-#include "mem-pool.h"
-#include "uuid.h"
-
-
-
-#define min(a,b) ((a)<(b)?(a):(b))
-#define max(a,b) ((a)>(b)?(a):(b))
-#define roof(a,b) ((((a)+(b)-1)/((b)?(b):1))*(b))
-#define floor(a,b) (((a)/((b)?(b):1))*(b))
-
-
-#define GF_UNIT_KB 1024ULL
-#define GF_UNIT_MB 1048576ULL
-#define GF_UNIT_GB 1073741824ULL
-#define GF_UNIT_TB 1099511627776ULL
-#define GF_UNIT_PB 1125899906842624ULL
-
-#define GF_UNIT_KB_STRING "KB"
-#define GF_UNIT_MB_STRING "MB"
-#define GF_UNIT_GB_STRING "GB"
-#define GF_UNIT_TB_STRING "TB"
-#define GF_UNIT_PB_STRING "PB"
-
-#define GF_UNIT_PERCENT_STRING "%"
-
-#define GEOREP "geo-replication"
-#define GHADOOP "glusterfs-hadoop"
-
-#define WIPE(statp) do { typeof(*statp) z = {0,}; if (statp) *statp = z; } while (0)
-
-#define IS_EXT_FS(fs_name) \
- (!strcmp (fs_name, "ext2") || \
- !strcmp (fs_name, "ext3") || \
- !strcmp (fs_name, "ext4"))
-
-/* Defining this here as it is needed by glusterd for setting
- * nfs port in volume status.
- */
-#define GF_NFS3_PORT 38467
-
-enum _gf_boolean
-{
- _gf_false = 0,
- _gf_true = 1
-};
-
-/*
- * we could have initialized these as +ve values and treated
- * them as negative while comparing etc.. (which would have
- * saved us with the pain of assigning values), but since we
- * only have a couple of clients that use this feature, it's
- * okay.
- */
-enum _gf_client_pid
-{
- GF_CLIENT_PID_MAX = 0,
- GF_CLIENT_PID_GSYNCD = -1,
- GF_CLIENT_PID_HADOOP = -2,
- GF_CLIENT_PID_DEFRAG = -3,
-};
-
-typedef enum _gf_boolean gf_boolean_t;
-typedef enum _gf_client_pid gf_client_pid_t;
-typedef int (*gf_cmp) (void *, void *);
-
-void gf_global_variable_init(void);
-
-in_addr_t gf_resolve_ip (const char *hostname, void **dnscache);
-
-void gf_log_volume_file (FILE *specfp);
-void gf_print_trace (int32_t signal);
-
-extern char *gf_fop_list[GF_FOP_MAXVALUE];
-extern char *gf_mgmt_list[GF_MGMT_MAXVALUE];
-
-#define VECTORSIZE(count) (count * (sizeof (struct iovec)))
-
-#define STRLEN_0(str) (strlen(str) + 1)
-#define VALIDATE_OR_GOTO(arg,label) do { \
- if (!arg) { \
- errno = EINVAL; \
- gf_log_callingfn ((this ? (this->name) : \
- "(Govinda! Govinda!)"), \
- GF_LOG_WARNING, \
- "invalid argument: " #arg); \
- goto label; \
- } \
- } while (0)
-
-#define GF_VALIDATE_OR_GOTO(name,arg,label) do { \
- if (!arg) { \
- errno = EINVAL; \
- gf_log_callingfn (name, GF_LOG_ERROR, \
- "invalid argument: " #arg); \
- goto label; \
- } \
- } while (0)
-
-#define GF_VALIDATE_OR_GOTO_WITH_ERROR(name, arg, label, errno, error) do { \
- if (!arg) { \
- errno = error; \
- gf_log_callingfn (name, GF_LOG_ERROR, \
- "invalid argument: " #arg); \
- goto label; \
- } \
- }while (0)
-
-#define GF_ASSERT_AND_GOTO_WITH_ERROR(name, arg, label, errno, error) do { \
- if (!arg) { \
- GF_ASSERT (0); \
- errno = error; \
- goto label; \
- } \
- }while (0)
-
-#define GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO(name,arg,label) \
- do { \
- GF_VALIDATE_OR_GOTO (name, arg, label); \
- if ((arg[0]) != '/') { \
- errno = EINVAL; \
- gf_log_callingfn (name, GF_LOG_ERROR, \
- "invalid argument: " #arg); \
- goto label; \
- } \
- } while (0)
-
-#define GF_REMOVE_SLASH_FROM_PATH(path, string) \
- do { \
- int i = 0; \
- for (i = 1; i < strlen (path); i++) { \
- string[i-1] = path[i]; \
- if (string[i-1] == '/') \
- string[i-1] = '-'; \
- } \
- } while (0)
-
-
-#define GF_IF_INTERNAL_XATTR_GOTO(pattern, dict, trav, op_errno, label) \
- do { \
- if (!dict) { \
- gf_log (this->name, GF_LOG_ERROR, \
- "setxattr dict is null"); \
- goto label; \
- } \
- trav = dict->members_list; \
- while (trav) { \
- if (!fnmatch (pattern, trav->key, 0)) { \
- op_errno = EPERM; \
- gf_log (this->name, GF_LOG_ERROR, \
- "attempt to set internal" \
- " xattr: %s: %s", trav->key, \
- strerror (op_errno)); \
- goto label; \
- } \
- trav = trav->next; \
- } \
- } while (0)
-
-#define GF_IF_NATIVE_XATTR_GOTO(pattern, key, op_errno, label) \
- do { \
- if (!key) { \
- gf_log (this->name, GF_LOG_ERROR, \
- "no key for removexattr"); \
- goto label; \
- } \
- if (!fnmatch (pattern, key, 0)) { \
- op_errno = EPERM; \
- gf_log (this->name, GF_LOG_ERROR, \
- "attempt to remove internal " \
- "xattr: %s: %s", key, \
- strerror (op_errno)); \
- goto label; \
- } \
- } while (0)
-
-
-#define GF_FILE_CONTENT_REQUESTED(_xattr_req,_content_limit) \
- (dict_get_uint64 (_xattr_req, "glusterfs.content", _content_limit) == 0)
-
-#ifdef DEBUG
-#define GF_ASSERT(x) assert (x);
-#else
-#define GF_ASSERT(x) \
- do { \
- if (!(x)) { \
- gf_log_callingfn ("", GF_LOG_ERROR, \
- "Assertion failed: " #x); \
- } \
- } while (0)
-#endif
-
-#define GF_UUID_ASSERT(u) \
- if (uuid_is_null (u))\
- GF_ASSERT (!"uuid null");
-
-union gf_sock_union {
- struct sockaddr_storage storage;
- struct sockaddr_in6 sin6;
- struct sockaddr_in sin;
- struct sockaddr sa;
-};
-
-#define GF_HIDDEN_PATH ".glusterfs"
-
-static inline void
-iov_free (struct iovec *vector, int count)
-{
- int i;
-
- for (i = 0; i < count; i++)
- FREE (vector[i].iov_base);
-
- GF_FREE (vector);
-}
-
-
-static inline int
-iov_length (const struct iovec *vector, int count)
-{
- int i = 0;
- size_t size = 0;
-
- for (i = 0; i < count; i++)
- size += vector[i].iov_len;
-
- return size;
-}
-
-
-static inline struct iovec *
-iov_dup (struct iovec *vector, int count)
-{
- int bytecount = 0;
- int i;
- struct iovec *newvec = NULL;
-
- bytecount = (count * sizeof (struct iovec));
- newvec = GF_MALLOC (bytecount, gf_common_mt_iovec);
- if (!newvec)
- return NULL;
-
- for (i = 0; i < count; i++) {
- newvec[i].iov_len = vector[i].iov_len;
- newvec[i].iov_base = vector[i].iov_base;
- }
-
- return newvec;
-}
-
-
-static inline int
-iov_subset (struct iovec *orig, int orig_count,
- off_t src_offset, off_t dst_offset,
- struct iovec *new)
-{
- int new_count = 0;
- int i;
- off_t offset = 0;
- size_t start_offset = 0;
- size_t end_offset = 0;
-
-
- for (i = 0; i < orig_count; i++) {
- if ((offset + orig[i].iov_len < src_offset)
- || (offset > dst_offset)) {
- goto not_subset;
- }
-
- if (!new) {
- goto count_only;
- }
-
- start_offset = 0;
- end_offset = orig[i].iov_len;
-
- if (src_offset >= offset) {
- start_offset = (src_offset - offset);
- }
-
- if (dst_offset <= (offset + orig[i].iov_len)) {
- end_offset = (dst_offset - offset);
- }
-
- new[new_count].iov_base = orig[i].iov_base + start_offset;
- new[new_count].iov_len = end_offset - start_offset;
-
- count_only:
- new_count++;
-
- not_subset:
- offset += orig[i].iov_len;
- }
-
- return new_count;
-}
-
-
-static inline void
-iov_unload (char *buf, const struct iovec *vector, int count)
-{
- int i;
- int copied = 0;
-
- for (i = 0; i < count; i++) {
- memcpy (buf + copied, vector[i].iov_base, vector[i].iov_len);
- copied += vector[i].iov_len;
- }
-}
-
-
-static inline int
-mem_0filled (const char *buf, size_t size)
-{
- int i = 0;
- int ret = 0;
-
- for (i = 0; i < size; i++) {
- ret = buf[i];
- if (ret)
- break;
- }
-
- return ret;
-}
-
-
-static inline int
-iov_0filled (struct iovec *vector, int count)
-{
- int i = 0;
- int ret = 0;
-
- for (i = 0; i < count; i++) {
- ret = mem_0filled (vector[i].iov_base, vector[i].iov_len);
- if (ret)
- break;
- }
-
- return ret;
-}
-
-
-static inline void *
-memdup (const void *ptr, size_t size)
-{
- void *newptr = NULL;
-
- newptr = GF_MALLOC (size, gf_common_mt_memdup);
- if (!newptr)
- return NULL;
-
- memcpy (newptr, ptr, size);
- return newptr;
-}
-
-/*
- * rounds up nr to power of two. If nr is already a power of two, just returns
- * nr
- */
-
-int32_t gf_roundup_power_of_two (uint32_t nr);
-
-/*
- * rounds up nr to next power of two. If nr is already a power of two, next
- * power of two is returned.
- */
-
-int32_t gf_roundup_next_power_of_two (uint32_t nr);
-
-char *gf_trim (char *string);
-int gf_strsplit (const char *str, const char *delim,
- char ***tokens, int *token_count);
-int gf_volume_name_validate (const char *volume_name);
-
-int gf_string2long (const char *str, long *n);
-int gf_string2ulong (const char *str, unsigned long *n);
-int gf_string2int (const char *str, int *n);
-int gf_string2uint (const char *str, unsigned int *n);
-int gf_string2double (const char *str, double *n);
-int gf_string2longlong (const char *str, long long *n);
-int gf_string2ulonglong (const char *str, unsigned long long *n);
-
-int gf_string2int8 (const char *str, int8_t *n);
-int gf_string2int16 (const char *str, int16_t *n);
-int gf_string2int32 (const char *str, int32_t *n);
-int gf_string2int64 (const char *str, int64_t *n);
-int gf_string2uint8 (const char *str, uint8_t *n);
-int gf_string2uint16 (const char *str, uint16_t *n);
-int gf_string2uint32 (const char *str, uint32_t *n);
-int gf_string2uint64 (const char *str, uint64_t *n);
-
-int gf_strstr (const char *str, const char *delim, const char *match);
-
-int gf_string2ulong_base10 (const char *str, unsigned long *n);
-int gf_string2uint_base10 (const char *str, unsigned int *n);
-int gf_string2uint8_base10 (const char *str, uint8_t *n);
-int gf_string2uint16_base10 (const char *str, uint16_t *n);
-int gf_string2uint32_base10 (const char *str, uint32_t *n);
-int gf_string2uint64_base10 (const char *str, uint64_t *n);
-
-int gf_string2bytesize (const char *str, uint64_t *n);
-int gf_string2percent_or_bytesize (const char *str, uint64_t *n,
- gf_boolean_t *is_percent);
-
-int gf_string2boolean (const char *str, gf_boolean_t *b);
-int gf_string2percent (const char *str, double *n);
-int gf_string2time (const char *str, uint32_t *n);
-
-int gf_lockfd (int fd);
-int gf_unlockfd (int fd);
-
-int get_checksum_for_file (int fd, uint32_t *checksum);
-int log_base2 (unsigned long x);
-
-int get_checksum_for_path (char *path, uint32_t *checksum);
-
-char *strtail (char *str, const char *pattern);
-void skipwhite (char **s);
-char *nwstrtail (char *str, char *pattern);
-void skip_word (char **str);
-/* returns a new string with nth word of given string. n>=1 */
-char *get_nth_word (const char *str, int n);
-
-char valid_host_name (char *address, int length);
-char valid_ipv4_address (char *address, int length, gf_boolean_t wildcard_acc);
-char valid_ipv6_address (char *address, int length, gf_boolean_t wildcard_acc);
-char valid_internet_address (char *address, gf_boolean_t wildcard_acc);
-char valid_ipv4_wildcard_check (char *address);
-char valid_ipv6_wildcard_check (char *address);
-char valid_wildcard_internet_address (char *address);
-
-char *uuid_utoa (uuid_t uuid);
-char *uuid_utoa_r (uuid_t uuid, char *dst);
-char *lkowner_utoa (gf_lkowner_t *lkowner);
-char *lkowner_utoa_r (gf_lkowner_t *lkowner, char *dst, int len);
-
-void gf_array_insertionsort (void *a, int l, int r, size_t elem_size,
- gf_cmp cmp);
-int gf_is_str_int (const char *value);
-
-char *gf_uint64_2human_readable (uint64_t);
-int validate_brick_name (char *brick);
-char *get_host_name (char *word, char **host);
-char *get_path_name (char *word, char **path);
-void gf_path_strip_trailing_slashes (char *path);
-uint64_t get_mem_size ();
-int gf_strip_whitespace (char *str, int len);
-#endif /* _COMMON_UTILS_H */
diff --git a/libglusterfs/src/compat-errno.c b/libglusterfs/src/compat-errno.c
index fd5cc49cefa..df57e243239 100644
--- a/libglusterfs/src/compat-errno.c
+++ b/libglusterfs/src/compat-errno.c
@@ -8,15 +8,9 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include <stdint.h>
-#include "compat-errno.h"
-
+#include "glusterfs/compat-errno.h"
static int32_t gf_error_to_errno_array[1024];
static int32_t gf_errno_to_error_array[1024];
@@ -25,906 +19,937 @@ static int32_t gf_compat_errno_init_done;
#ifdef GF_SOLARIS_HOST_OS
static void
-init_compat_errno_arrays ()
+init_compat_errno_arrays()
{
-/* ENOMSG 35 / * No message of desired type */
- gf_error_to_errno_array[GF_ERROR_CODE_NOMSG] = ENOMSG;
- gf_errno_to_error_array[ENOMSG] = GF_ERROR_CODE_NOMSG;
-
-/* EIDRM 36 / * Identifier removed */
- gf_error_to_errno_array[GF_ERROR_CODE_IDRM] = EIDRM;
- gf_errno_to_error_array[EIDRM] = GF_ERROR_CODE_IDRM;
-
-/* ECHRNG 37 / * Channel number out of range */
- gf_error_to_errno_array[GF_ERROR_CODE_CHRNG] = ECHRNG;
- gf_errno_to_error_array[ECHRNG] = GF_ERROR_CODE_CHRNG;
-
-/* EL2NSYNC 38 / * Level 2 not synchronized */
- gf_error_to_errno_array[GF_ERROR_CODE_L2NSYNC] = EL2NSYNC;
- gf_errno_to_error_array[EL2NSYNC] = GF_ERROR_CODE_L2NSYNC;
-
-/* EL3HLT 39 / * Level 3 halted */
- gf_error_to_errno_array[GF_ERROR_CODE_L3HLT] = EL3HLT;
- gf_errno_to_error_array[EL3HLT] = GF_ERROR_CODE_L3HLT;
-
-/* EL3RST 40 / * Level 3 reset */
- gf_error_to_errno_array[GF_ERROR_CODE_L3RST] = EL3RST;
- gf_errno_to_error_array[EL3RST] = GF_ERROR_CODE_L3RST;
-
-/* ELNRNG 41 / * Link number out of range */
- gf_error_to_errno_array[GF_ERROR_CODE_LNRNG] = ELNRNG;
- gf_errno_to_error_array[ELNRNG] = GF_ERROR_CODE_LNRNG;
-
-/* EUNATCH 42 / * Protocol driver not attached */
- gf_error_to_errno_array[GF_ERROR_CODE_UNATCH] = EUNATCH;
- gf_errno_to_error_array[EUNATCH] = GF_ERROR_CODE_UNATCH;
-
-/* ENOCSI 43 / * No CSI structure available */
- gf_error_to_errno_array[GF_ERROR_CODE_NOCSI] = ENOCSI;
- gf_errno_to_error_array[ENOCSI] = GF_ERROR_CODE_NOCSI;
-
-/* EL2HLT 44 / * Level 2 halted */
- gf_error_to_errno_array[GF_ERROR_CODE_L2HLT] = EL2HLT;
- gf_errno_to_error_array[EL2HLT] = GF_ERROR_CODE_L2HLT;
-
-/* EDEADLK 45 / * Deadlock condition. */
- gf_error_to_errno_array[GF_ERROR_CODE_DEADLK] = EDEADLK;
- gf_errno_to_error_array[EDEADLK] = GF_ERROR_CODE_DEADLK;
-
-/* ENOLCK 46 / * No record locks available. */
- gf_error_to_errno_array[GF_ERROR_CODE_NOLCK] = ENOLCK;
- gf_errno_to_error_array[ENOLCK] = GF_ERROR_CODE_NOLCK;
-
-/* ECANCELED 47 / * Operation canceled */
- gf_error_to_errno_array[GF_ERROR_CODE_CANCELED] = ECANCELED;
- gf_errno_to_error_array[ECANCELED] = GF_ERROR_CODE_CANCELED;
-
-/* ENOTSUP 48 / * Operation not supported */
- gf_error_to_errno_array[GF_ERROR_CODE_NOTSUPP] = ENOTSUP;
- gf_errno_to_error_array[ENOTSUP] = GF_ERROR_CODE_NOTSUPP;
-
-/* Filesystem Quotas */
-/* EDQUOT 49 / * Disc quota exceeded */
- gf_error_to_errno_array[GF_ERROR_CODE_DQUOT] = EDQUOT;
- gf_errno_to_error_array[EDQUOT] = GF_ERROR_CODE_DQUOT;
-
-/* Convergent Error Returns */
-/* EBADE 50 / * invalid exchange */
- gf_error_to_errno_array[GF_ERROR_CODE_BADE] = EBADE;
- gf_errno_to_error_array[EBADE] = GF_ERROR_CODE_BADE;
-/* EBADR 51 / * invalid request descriptor */
- gf_error_to_errno_array[GF_ERROR_CODE_BADR] = EBADR;
- gf_errno_to_error_array[EBADR] = GF_ERROR_CODE_BADR;
-/* EXFULL 52 / * exchange full */
- gf_error_to_errno_array[GF_ERROR_CODE_XFULL] = EXFULL;
- gf_errno_to_error_array[EXFULL] = GF_ERROR_CODE_XFULL;
-/* ENOANO 53 / * no anode */
- gf_error_to_errno_array[GF_ERROR_CODE_NOANO] = ENOANO;
- gf_errno_to_error_array[ENOANO] = GF_ERROR_CODE_NOANO;
-/* EBADRQC 54 / * invalid request code */
- gf_error_to_errno_array[GF_ERROR_CODE_BADRQC] = EBADRQC;
- gf_errno_to_error_array[EBADRQC] = GF_ERROR_CODE_BADRQC;
-/* EBADSLT 55 / * invalid slot */
- gf_error_to_errno_array[GF_ERROR_CODE_BADSLT] = EBADSLT;
- gf_errno_to_error_array[EBADSLT] = GF_ERROR_CODE_BADSLT;
-/* EDEADLOCK 56 / * file locking deadlock error */
-/* This is same as EDEADLK on linux */
- gf_error_to_errno_array[GF_ERROR_CODE_DEADLK] = EDEADLOCK;
- gf_errno_to_error_array[EDEADLOCK] = GF_ERROR_CODE_DEADLK;
-
-/* EBFONT 57 / * bad font file fmt */
- gf_error_to_errno_array[GF_ERROR_CODE_BFONT] = EBFONT;
- gf_errno_to_error_array[EBFONT] = GF_ERROR_CODE_BFONT;
-
-/* Interprocess Robust Locks */
-/* EOWNERDEAD 58 / * process died with the lock */
- gf_error_to_errno_array[GF_ERROR_CODE_OWNERDEAD] = EOWNERDEAD;
- gf_errno_to_error_array[EOWNERDEAD] = GF_ERROR_CODE_OWNERDEAD;
-/* ENOTRECOVERABLE 59 / * lock is not recoverable */
- gf_error_to_errno_array[GF_ERROR_CODE_NOTRECOVERABLE] = ENOTRECOVERABLE;
- gf_errno_to_error_array[ENOTRECOVERABLE] = GF_ERROR_CODE_NOTRECOVERABLE;
-
-/* stream problems */
-/* ENOSTR 60 / * Device not a stream */
- gf_error_to_errno_array[GF_ERROR_CODE_NOSTR] = ENOSTR;
- gf_errno_to_error_array[ENOSTR] = GF_ERROR_CODE_NOSTR;
-/* ENODATA 61 / * no data (for no delay io) */
- gf_error_to_errno_array[GF_ERROR_CODE_NODATA] = ENODATA;
- gf_errno_to_error_array[ENODATA] = GF_ERROR_CODE_NODATA;
-/* ETIME 62 / * timer expired */
- gf_error_to_errno_array[GF_ERROR_CODE_TIME] = ETIME;
- gf_errno_to_error_array[ETIME] = GF_ERROR_CODE_TIME;
-/* ENOSR 63 / * out of streams resources */
- gf_error_to_errno_array[GF_ERROR_CODE_NOSR] = ENOSR;
- gf_errno_to_error_array[ENOSR] = GF_ERROR_CODE_NOSR;
-
-/* ENONET 64 / * Machine is not on the network */
- gf_error_to_errno_array[GF_ERROR_CODE_NONET] = ENONET;
- gf_errno_to_error_array[ENONET] = GF_ERROR_CODE_NONET;
-/* ENOPKG 65 / * Package not installed */
- gf_error_to_errno_array[GF_ERROR_CODE_NOPKG] = ENOPKG;
- gf_errno_to_error_array[ENOPKG] = GF_ERROR_CODE_NOPKG;
-/* EREMOTE 66 / * The object is remote */
- gf_error_to_errno_array[GF_ERROR_CODE_REMOTE] = EREMOTE;
- gf_errno_to_error_array[EREMOTE] = GF_ERROR_CODE_REMOTE;
-/* ENOLINK 67 / * the link has been severed */
- gf_error_to_errno_array[GF_ERROR_CODE_NOLINK] = ENOLINK;
- gf_errno_to_error_array[ENOLINK] = GF_ERROR_CODE_NOLINK;
-/* EADV 68 / * advertise error */
- gf_error_to_errno_array[GF_ERROR_CODE_ADV] = EADV;
- gf_errno_to_error_array[EADV] = GF_ERROR_CODE_ADV;
-/* ESRMNT 69 / * srmount error */
- gf_error_to_errno_array[GF_ERROR_CODE_SRMNT] = ESRMNT;
- gf_errno_to_error_array[ESRMNT] = GF_ERROR_CODE_SRMNT;
-
-/* ECOMM 70 / * Communication error on send */
- gf_error_to_errno_array[GF_ERROR_CODE_COMM] = ECOMM;
- gf_errno_to_error_array[ECOMM] = GF_ERROR_CODE_COMM;
-/* EPROTO 71 / * Protocol error */
- gf_error_to_errno_array[GF_ERROR_CODE_PROTO] = EPROTO;
- gf_errno_to_error_array[EPROTO] = GF_ERROR_CODE_PROTO;
-
-/* Interprocess Robust Locks */
-/* ELOCKUNMAPPED 72 / * locked lock was unmapped */
- gf_error_to_errno_array[GF_ERROR_CODE_LOCKUNMAPPED] = ELOCKUNMAPPED;
- gf_errno_to_error_array[ELOCKUNMAPPED] = GF_ERROR_CODE_LOCKUNMAPPED;
-
-/* ENOTACTIVE 73 / * Facility is not active */
- gf_error_to_errno_array[GF_ERROR_CODE_NOTACTIVE] = ENOTACTIVE;
- gf_errno_to_error_array[ENOTACTIVE] = GF_ERROR_CODE_NOTACTIVE;
-/* EMULTIHOP 74 / * multihop attempted */
- gf_error_to_errno_array[GF_ERROR_CODE_MULTIHOP] = EMULTIHOP;
- gf_errno_to_error_array[EMULTIHOP] = GF_ERROR_CODE_MULTIHOP;
-/* EBADMSG 77 / * trying to read unreadable message */
- gf_error_to_errno_array[GF_ERROR_CODE_BADMSG] = EBADMSG;
- gf_errno_to_error_array[EBADMSG] = GF_ERROR_CODE_BADMSG;
-/* ENAMETOOLONG 78 / * path name is too long */
- gf_error_to_errno_array[GF_ERROR_CODE_NAMETOOLONG] = ENAMETOOLONG;
- gf_errno_to_error_array[ENAMETOOLONG] = GF_ERROR_CODE_NAMETOOLONG;
-/* EOVERFLOW 79 / * value too large to be stored in data type */
- gf_error_to_errno_array[GF_ERROR_CODE_OVERFLOW] = EOVERFLOW;
- gf_errno_to_error_array[EOVERFLOW] = GF_ERROR_CODE_OVERFLOW;
-/* ENOTUNIQ 80 / * given log. name not unique */
- gf_error_to_errno_array[GF_ERROR_CODE_NOTUNIQ] = ENOTUNIQ;
- gf_errno_to_error_array[ENOTUNIQ] = GF_ERROR_CODE_NOTUNIQ;
-/* EBADFD 81 / * f.d. invalid for this operation */
- gf_error_to_errno_array[GF_ERROR_CODE_BADFD] = EBADFD;
- gf_errno_to_error_array[EBADFD] = GF_ERROR_CODE_BADFD;
-/* EREMCHG 82 / * Remote address changed */
- gf_error_to_errno_array[GF_ERROR_CODE_REMCHG] = EREMCHG;
- gf_errno_to_error_array[EREMCHG] = GF_ERROR_CODE_REMCHG;
-
-/* shared library problems */
-/* ELIBACC 83 / * Can't access a needed shared lib. */
- gf_error_to_errno_array[GF_ERROR_CODE_LIBACC] = ELIBACC;
- gf_errno_to_error_array[ELIBACC] = GF_ERROR_CODE_LIBACC;
-/* ELIBBAD 84 / * Accessing a corrupted shared lib. */
- gf_error_to_errno_array[GF_ERROR_CODE_LIBBAD] = ELIBBAD;
- gf_errno_to_error_array[ELIBBAD] = GF_ERROR_CODE_LIBBAD;
-/* ELIBSCN 85 / * .lib section in a.out corrupted. */
- gf_error_to_errno_array[GF_ERROR_CODE_LIBSCN] = ELIBSCN;
- gf_errno_to_error_array[ELIBSCN] = GF_ERROR_CODE_LIBSCN;
-/* ELIBMAX 86 / * Attempting to link in too many libs. */
- gf_error_to_errno_array[GF_ERROR_CODE_LIBMAX] = ELIBMAX;
- gf_errno_to_error_array[ELIBMAX] = GF_ERROR_CODE_LIBMAX;
-/* ELIBEXEC 87 / * Attempting to exec a shared library. */
- gf_error_to_errno_array[GF_ERROR_CODE_LIBEXEC] = ELIBEXEC;
- gf_errno_to_error_array[ELIBEXEC] = GF_ERROR_CODE_LIBEXEC;
-/* EILSEQ 88 / * Illegal byte sequence. */
- gf_error_to_errno_array[GF_ERROR_CODE_ILSEQ] = EILSEQ;
- gf_errno_to_error_array[EILSEQ] = GF_ERROR_CODE_ILSEQ;
-/* ENOSYS 89 / * Unsupported file system operation */
- gf_error_to_errno_array[GF_ERROR_CODE_NOSYS] = ENOSYS;
- gf_errno_to_error_array[ENOSYS] = GF_ERROR_CODE_NOSYS;
-/* ELOOP 90 / * Symbolic link loop */
- gf_error_to_errno_array[GF_ERROR_CODE_LOOP] = ELOOP;
- gf_errno_to_error_array[ELOOP] = GF_ERROR_CODE_LOOP;
-/* ERESTART 91 / * Restartable system call */
- gf_error_to_errno_array[GF_ERROR_CODE_RESTART] = ERESTART;
- gf_errno_to_error_array[ERESTART] = GF_ERROR_CODE_RESTART;
-/* ESTRPIPE 92 / * if pipe/FIFO, don't sleep in stream head */
- gf_error_to_errno_array[GF_ERROR_CODE_STRPIPE] = ESTRPIPE;
- gf_errno_to_error_array[ESTRPIPE] = GF_ERROR_CODE_STRPIPE;
-/* ENOTEMPTY 93 / * directory not empty */
- gf_error_to_errno_array[GF_ERROR_CODE_NOTEMPTY] = ENOTEMPTY;
- gf_errno_to_error_array[ENOTEMPTY] = GF_ERROR_CODE_NOTEMPTY;
-/* EUSERS 94 / * Too many users (for UFS) */
- gf_error_to_errno_array[GF_ERROR_CODE_USERS] = EUSERS;
- gf_errno_to_error_array[EUSERS] = GF_ERROR_CODE_USERS;
-
-/* BSD Networking Software */
- /* argument errors */
-/* ENOTSOCK 95 / * Socket operation on non-socket */
- gf_error_to_errno_array[GF_ERROR_CODE_NOTSOCK] = ENOTSOCK;
- gf_errno_to_error_array[ENOTSOCK] = GF_ERROR_CODE_NOTSOCK;
-/* EDESTADDRREQ 96 / * Destination address required */
- gf_error_to_errno_array[GF_ERROR_CODE_DESTADDRREQ] = EDESTADDRREQ;
- gf_errno_to_error_array[EDESTADDRREQ] = GF_ERROR_CODE_DESTADDRREQ;
-/* EMSGSIZE 97 / * Message too long */
- gf_error_to_errno_array[GF_ERROR_CODE_MSGSIZE] = EMSGSIZE;
- gf_errno_to_error_array[EMSGSIZE] = GF_ERROR_CODE_MSGSIZE;
-/* EPROTOTYPE 98 / * Protocol wrong type for socket */
- gf_error_to_errno_array[GF_ERROR_CODE_PROTOTYPE] = EPROTOTYPE;
- gf_errno_to_error_array[EPROTOTYPE] = GF_ERROR_CODE_PROTOTYPE;
-/* ENOPROTOOPT 99 / * Protocol not available */
- gf_error_to_errno_array[GF_ERROR_CODE_NOPROTOOPT] = ENOPROTOOPT;
- gf_errno_to_error_array[ENOPROTOOPT] = GF_ERROR_CODE_NOPROTOOPT;
-/* EPROTONOSUPPORT 120 / * Protocol not supported */
- gf_error_to_errno_array[GF_ERROR_CODE_PROTONOSUPPORT] = EPROTONOSUPPORT;
- gf_errno_to_error_array[EPROTONOSUPPORT] = GF_ERROR_CODE_PROTONOSUPPORT;
-/* ESOCKTNOSUPPORT 121 / * Socket type not supported */
- gf_error_to_errno_array[GF_ERROR_CODE_SOCKTNOSUPPORT] = ESOCKTNOSUPPORT;
- gf_errno_to_error_array[ESOCKTNOSUPPORT] = GF_ERROR_CODE_SOCKTNOSUPPORT;
-
-/* EOPNOTSUPP 122 / * Operation not supported on socket */
- gf_error_to_errno_array[GF_ERROR_CODE_OPNOTSUPP] = EOPNOTSUPP;
- gf_errno_to_error_array[EOPNOTSUPP] = GF_ERROR_CODE_OPNOTSUPP;
-/* EPFNOSUPPORT 123 / * Protocol family not supported */
- gf_error_to_errno_array[GF_ERROR_CODE_PFNOSUPPORT] = EPFNOSUPPORT;
- gf_errno_to_error_array[EPFNOSUPPORT] = GF_ERROR_CODE_PFNOSUPPORT;
-/* EAFNOSUPPORT 124 / * Address family not supported by */
- /* protocol family */
- gf_error_to_errno_array[GF_ERROR_CODE_AFNOSUPPORT] = EAFNOSUPPORT;
- gf_errno_to_error_array[EAFNOSUPPORT] = GF_ERROR_CODE_AFNOSUPPORT;
-/* EADDRINUSE 125 / * Address already in use */
- gf_error_to_errno_array[GF_ERROR_CODE_ADDRINUSE] = EADDRINUSE;
- gf_errno_to_error_array[EADDRINUSE] = GF_ERROR_CODE_ADDRINUSE;
-/* EADDRNOTAVAIL 126 / * Can't assign requested address */
- /* operational errors */
- gf_error_to_errno_array[GF_ERROR_CODE_ADDRNOTAVAIL] = EADDRNOTAVAIL;
- gf_errno_to_error_array[EADDRNOTAVAIL] = GF_ERROR_CODE_ADDRNOTAVAIL;
-/* ENETDOWN 127 / * Network is down */
- gf_error_to_errno_array[GF_ERROR_CODE_NETDOWN] = ENETDOWN;
- gf_errno_to_error_array[ENETDOWN] = GF_ERROR_CODE_NETDOWN;
-/* ENETUNREACH 128 / * Network is unreachable */
- gf_error_to_errno_array[GF_ERROR_CODE_NETUNREACH] = ENETUNREACH;
- gf_errno_to_error_array[ENETUNREACH] = GF_ERROR_CODE_NETUNREACH;
-/* ENETRESET 129 / * Network dropped connection because */
- /* of reset */
- gf_error_to_errno_array[GF_ERROR_CODE_NETRESET] = ENETRESET;
- gf_errno_to_error_array[ENETRESET] = GF_ERROR_CODE_NETRESET;
-/* ECONNABORTED 130 / * Software caused connection abort */
- gf_error_to_errno_array[GF_ERROR_CODE_CONNABORTED] = ECONNABORTED;
- gf_errno_to_error_array[ECONNABORTED] = GF_ERROR_CODE_CONNABORTED;
-/* ECONNRESET 131 / * Connection reset by peer */
- gf_error_to_errno_array[GF_ERROR_CODE_CONNRESET] = ECONNRESET;
- gf_errno_to_error_array[ECONNRESET] = GF_ERROR_CODE_CONNRESET;
-/* ENOBUFS 132 / * No buffer space available */
- gf_error_to_errno_array[GF_ERROR_CODE_NOBUFS] = ENOBUFS;
- gf_errno_to_error_array[ENOBUFS] = GF_ERROR_CODE_NOBUFS;
-/* EISCONN 133 / * Socket is already connected */
- gf_error_to_errno_array[GF_ERROR_CODE_ISCONN] = EISCONN;
- gf_errno_to_error_array[EISCONN] = GF_ERROR_CODE_ISCONN;
-/* ENOTCONN 134 / * Socket is not connected */
- gf_error_to_errno_array[GF_ERROR_CODE_NOTCONN] = ENOTCONN;
- gf_errno_to_error_array[ENOTCONN] = GF_ERROR_CODE_NOTCONN;
-/* XENIX has 135 - 142 */
-/* ESHUTDOWN 143 / * Can't send after socket shutdown */
- gf_error_to_errno_array[GF_ERROR_CODE_SHUTDOWN] = ESHUTDOWN;
- gf_errno_to_error_array[ESHUTDOWN] = GF_ERROR_CODE_SHUTDOWN;
-/* ETOOMANYREFS 144 / * Too many references: can't splice */
- gf_error_to_errno_array[GF_ERROR_CODE_TOOMANYREFS] = ETOOMANYREFS;
- gf_errno_to_error_array[ETOOMANYREFS] = GF_ERROR_CODE_TOOMANYREFS;
-/* ETIMEDOUT 145 / * Connection timed out */
- gf_error_to_errno_array[GF_ERROR_CODE_TIMEDOUT] = ETIMEDOUT;
- gf_errno_to_error_array[ETIMEDOUT] = GF_ERROR_CODE_TIMEDOUT;
-
-/* ECONNREFUSED 146 / * Connection refused */
- gf_error_to_errno_array[GF_ERROR_CODE_CONNREFUSED] = ECONNREFUSED;
- gf_errno_to_error_array[ECONNREFUSED] = GF_ERROR_CODE_CONNREFUSED;
-/* EHOSTDOWN 147 / * Host is down */
- gf_error_to_errno_array[GF_ERROR_CODE_HOSTDOWN] = EHOSTDOWN;
- gf_errno_to_error_array[EHOSTDOWN] = GF_ERROR_CODE_HOSTDOWN;
-/* EHOSTUNREACH 148 / * No route to host */
- gf_error_to_errno_array[GF_ERROR_CODE_HOSTUNREACH] = EHOSTUNREACH;
- gf_errno_to_error_array[EHOSTUNREACH] = GF_ERROR_CODE_HOSTUNREACH;
-/* EALREADY 149 / * operation already in progress */
- gf_error_to_errno_array[GF_ERROR_CODE_ALREADY] = EALREADY;
- gf_errno_to_error_array[EALREADY] = GF_ERROR_CODE_ALREADY;
-/* EINPROGRESS 150 / * operation now in progress */
- gf_error_to_errno_array[GF_ERROR_CODE_INPROGRESS] = EINPROGRESS;
- gf_errno_to_error_array[EINPROGRESS] = GF_ERROR_CODE_INPROGRESS;
-
-/* SUN Network File System */
-/* ESTALE 151 / * Stale NFS file handle */
- gf_error_to_errno_array[GF_ERROR_CODE_STALE] = ESTALE;
- gf_errno_to_error_array[ESTALE] = GF_ERROR_CODE_STALE;
-
- return ;
+ /* ENOMSG 35 / * No message of desired type */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOMSG] = ENOMSG;
+ gf_errno_to_error_array[ENOMSG] = GF_ERROR_CODE_NOMSG;
+
+ /* EIDRM 36 / * Identifier removed */
+ gf_error_to_errno_array[GF_ERROR_CODE_IDRM] = EIDRM;
+ gf_errno_to_error_array[EIDRM] = GF_ERROR_CODE_IDRM;
+
+ /* ECHRNG 37 / * Channel number out of range */
+ gf_error_to_errno_array[GF_ERROR_CODE_CHRNG] = ECHRNG;
+ gf_errno_to_error_array[ECHRNG] = GF_ERROR_CODE_CHRNG;
+
+ /* EL2NSYNC 38 / * Level 2 not synchronized */
+ gf_error_to_errno_array[GF_ERROR_CODE_L2NSYNC] = EL2NSYNC;
+ gf_errno_to_error_array[EL2NSYNC] = GF_ERROR_CODE_L2NSYNC;
+
+ /* EL3HLT 39 / * Level 3 halted */
+ gf_error_to_errno_array[GF_ERROR_CODE_L3HLT] = EL3HLT;
+ gf_errno_to_error_array[EL3HLT] = GF_ERROR_CODE_L3HLT;
+
+ /* EL3RST 40 / * Level 3 reset */
+ gf_error_to_errno_array[GF_ERROR_CODE_L3RST] = EL3RST;
+ gf_errno_to_error_array[EL3RST] = GF_ERROR_CODE_L3RST;
+
+ /* ELNRNG 41 / * Link number out of range */
+ gf_error_to_errno_array[GF_ERROR_CODE_LNRNG] = ELNRNG;
+ gf_errno_to_error_array[ELNRNG] = GF_ERROR_CODE_LNRNG;
+
+ /* EUNATCH 42 / * Protocol driver not attached */
+ gf_error_to_errno_array[GF_ERROR_CODE_UNATCH] = EUNATCH;
+ gf_errno_to_error_array[EUNATCH] = GF_ERROR_CODE_UNATCH;
+
+ /* ENOCSI 43 / * No CSI structure available */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOCSI] = ENOCSI;
+ gf_errno_to_error_array[ENOCSI] = GF_ERROR_CODE_NOCSI;
+
+ /* EL2HLT 44 / * Level 2 halted */
+ gf_error_to_errno_array[GF_ERROR_CODE_L2HLT] = EL2HLT;
+ gf_errno_to_error_array[EL2HLT] = GF_ERROR_CODE_L2HLT;
+
+ /* EDEADLK 45 / * Deadlock condition. */
+ gf_error_to_errno_array[GF_ERROR_CODE_DEADLK] = EDEADLK;
+ gf_errno_to_error_array[EDEADLK] = GF_ERROR_CODE_DEADLK;
+
+ /* ENOLCK 46 / * No record locks available. */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOLCK] = ENOLCK;
+ gf_errno_to_error_array[ENOLCK] = GF_ERROR_CODE_NOLCK;
+
+ /* ECANCELED 47 / * Operation canceled */
+ gf_error_to_errno_array[GF_ERROR_CODE_CANCELED] = ECANCELED;
+ gf_errno_to_error_array[ECANCELED] = GF_ERROR_CODE_CANCELED;
+
+ /* ENOTSUP 48 / * Operation not supported */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOTSUPP] = ENOTSUP;
+ gf_errno_to_error_array[ENOTSUP] = GF_ERROR_CODE_NOTSUPP;
+
+ /* Filesystem Quotas */
+ /* EDQUOT 49 / * Disc quota exceeded */
+ gf_error_to_errno_array[GF_ERROR_CODE_DQUOT] = EDQUOT;
+ gf_errno_to_error_array[EDQUOT] = GF_ERROR_CODE_DQUOT;
+
+ /* Convergent Error Returns */
+ /* EBADE 50 / * invalid exchange */
+ gf_error_to_errno_array[GF_ERROR_CODE_BADE] = EBADE;
+ gf_errno_to_error_array[EBADE] = GF_ERROR_CODE_BADE;
+ /* EBADR 51 / * invalid request descriptor */
+ gf_error_to_errno_array[GF_ERROR_CODE_BADR] = EBADR;
+ gf_errno_to_error_array[EBADR] = GF_ERROR_CODE_BADR;
+ /* EXFULL 52 / * exchange full */
+ gf_error_to_errno_array[GF_ERROR_CODE_XFULL] = EXFULL;
+ gf_errno_to_error_array[EXFULL] = GF_ERROR_CODE_XFULL;
+ /* ENOANO 53 / * no anode */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOANO] = ENOANO;
+ gf_errno_to_error_array[ENOANO] = GF_ERROR_CODE_NOANO;
+ /* EBADRQC 54 / * invalid request code */
+ gf_error_to_errno_array[GF_ERROR_CODE_BADRQC] = EBADRQC;
+ gf_errno_to_error_array[EBADRQC] = GF_ERROR_CODE_BADRQC;
+ /* EBADSLT 55 / * invalid slot */
+ gf_error_to_errno_array[GF_ERROR_CODE_BADSLT] = EBADSLT;
+ gf_errno_to_error_array[EBADSLT] = GF_ERROR_CODE_BADSLT;
+ /* EDEADLOCK 56 / * file locking deadlock error */
+ /* This is same as EDEADLK on linux */
+ gf_error_to_errno_array[GF_ERROR_CODE_DEADLK] = EDEADLOCK;
+ gf_errno_to_error_array[EDEADLOCK] = GF_ERROR_CODE_DEADLK;
+
+ /* EBFONT 57 / * bad font file fmt */
+ gf_error_to_errno_array[GF_ERROR_CODE_BFONT] = EBFONT;
+ gf_errno_to_error_array[EBFONT] = GF_ERROR_CODE_BFONT;
+
+ /* Interprocess Robust Locks */
+ /* EOWNERDEAD 58 / * process died with the lock */
+ gf_error_to_errno_array[GF_ERROR_CODE_OWNERDEAD] = EOWNERDEAD;
+ gf_errno_to_error_array[EOWNERDEAD] = GF_ERROR_CODE_OWNERDEAD;
+ /* ENOTRECOVERABLE 59 / * lock is not recoverable */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOTRECOVERABLE] = ENOTRECOVERABLE;
+ gf_errno_to_error_array[ENOTRECOVERABLE] = GF_ERROR_CODE_NOTRECOVERABLE;
+
+ /* stream problems */
+ /* ENOSTR 60 / * Device not a stream */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOSTR] = ENOSTR;
+ gf_errno_to_error_array[ENOSTR] = GF_ERROR_CODE_NOSTR;
+ /* ENODATA 61 / * no data (for no delay io) */
+ gf_error_to_errno_array[GF_ERROR_CODE_NODATA] = ENODATA;
+ gf_errno_to_error_array[ENODATA] = GF_ERROR_CODE_NODATA;
+ /* ETIME 62 / * timer expired */
+ gf_error_to_errno_array[GF_ERROR_CODE_TIME] = ETIME;
+ gf_errno_to_error_array[ETIME] = GF_ERROR_CODE_TIME;
+ /* ENOSR 63 / * out of streams resources */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOSR] = ENOSR;
+ gf_errno_to_error_array[ENOSR] = GF_ERROR_CODE_NOSR;
+
+ /* ENONET 64 / * Machine is not on the network */
+ gf_error_to_errno_array[GF_ERROR_CODE_NONET] = ENONET;
+ gf_errno_to_error_array[ENONET] = GF_ERROR_CODE_NONET;
+ /* ENOPKG 65 / * Package not installed */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOPKG] = ENOPKG;
+ gf_errno_to_error_array[ENOPKG] = GF_ERROR_CODE_NOPKG;
+ /* EREMOTE 66 / * The object is remote */
+ gf_error_to_errno_array[GF_ERROR_CODE_REMOTE] = EREMOTE;
+ gf_errno_to_error_array[EREMOTE] = GF_ERROR_CODE_REMOTE;
+ /* ENOLINK 67 / * the link has been severed */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOLINK] = ENOLINK;
+ gf_errno_to_error_array[ENOLINK] = GF_ERROR_CODE_NOLINK;
+ /* EADV 68 / * advertise error */
+ gf_error_to_errno_array[GF_ERROR_CODE_ADV] = EADV;
+ gf_errno_to_error_array[EADV] = GF_ERROR_CODE_ADV;
+ /* ESRMNT 69 / * srmount error */
+ gf_error_to_errno_array[GF_ERROR_CODE_SRMNT] = ESRMNT;
+ gf_errno_to_error_array[ESRMNT] = GF_ERROR_CODE_SRMNT;
+
+ /* ECOMM 70 / * Communication error on send */
+ gf_error_to_errno_array[GF_ERROR_CODE_COMM] = ECOMM;
+ gf_errno_to_error_array[ECOMM] = GF_ERROR_CODE_COMM;
+ /* EPROTO 71 / * Protocol error */
+ gf_error_to_errno_array[GF_ERROR_CODE_PROTO] = EPROTO;
+ gf_errno_to_error_array[EPROTO] = GF_ERROR_CODE_PROTO;
+
+ /* Interprocess Robust Locks */
+ /* ELOCKUNMAPPED 72 / * locked lock was unmapped */
+ gf_error_to_errno_array[GF_ERROR_CODE_LOCKUNMAPPED] = ELOCKUNMAPPED;
+ gf_errno_to_error_array[ELOCKUNMAPPED] = GF_ERROR_CODE_LOCKUNMAPPED;
+
+ /* ENOTACTIVE 73 / * Facility is not active */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOTACTIVE] = ENOTACTIVE;
+ gf_errno_to_error_array[ENOTACTIVE] = GF_ERROR_CODE_NOTACTIVE;
+ /* EMULTIHOP 74 / * multihop attempted */
+ gf_error_to_errno_array[GF_ERROR_CODE_MULTIHOP] = EMULTIHOP;
+ gf_errno_to_error_array[EMULTIHOP] = GF_ERROR_CODE_MULTIHOP;
+ /* EBADMSG 77 / * trying to read unreadable message */
+ gf_error_to_errno_array[GF_ERROR_CODE_BADMSG] = EBADMSG;
+ gf_errno_to_error_array[EBADMSG] = GF_ERROR_CODE_BADMSG;
+ /* ENAMETOOLONG 78 / * path name is too long */
+ gf_error_to_errno_array[GF_ERROR_CODE_NAMETOOLONG] = ENAMETOOLONG;
+ gf_errno_to_error_array[ENAMETOOLONG] = GF_ERROR_CODE_NAMETOOLONG;
+ /* EOVERFLOW 79 / * value too large to be stored in data type */
+ gf_error_to_errno_array[GF_ERROR_CODE_OVERFLOW] = EOVERFLOW;
+ gf_errno_to_error_array[EOVERFLOW] = GF_ERROR_CODE_OVERFLOW;
+ /* ENOTUNIQ 80 / * given log. name not unique */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOTUNIQ] = ENOTUNIQ;
+ gf_errno_to_error_array[ENOTUNIQ] = GF_ERROR_CODE_NOTUNIQ;
+ /* EBADFD 81 / * f.d. invalid for this operation */
+ gf_error_to_errno_array[GF_ERROR_CODE_BADFD] = EBADFD;
+ gf_errno_to_error_array[EBADFD] = GF_ERROR_CODE_BADFD;
+ /* EREMCHG 82 / * Remote address changed */
+ gf_error_to_errno_array[GF_ERROR_CODE_REMCHG] = EREMCHG;
+ gf_errno_to_error_array[EREMCHG] = GF_ERROR_CODE_REMCHG;
+
+ /* shared library problems */
+ /* ELIBACC 83 / * Can't access a needed shared lib. */
+ gf_error_to_errno_array[GF_ERROR_CODE_LIBACC] = ELIBACC;
+ gf_errno_to_error_array[ELIBACC] = GF_ERROR_CODE_LIBACC;
+ /* ELIBBAD 84 / * Accessing a corrupted shared lib. */
+ gf_error_to_errno_array[GF_ERROR_CODE_LIBBAD] = ELIBBAD;
+ gf_errno_to_error_array[ELIBBAD] = GF_ERROR_CODE_LIBBAD;
+ /* ELIBSCN 85 / * .lib section in a.out corrupted. */
+ gf_error_to_errno_array[GF_ERROR_CODE_LIBSCN] = ELIBSCN;
+ gf_errno_to_error_array[ELIBSCN] = GF_ERROR_CODE_LIBSCN;
+ /* ELIBMAX 86 / * Attempting to link in too many libs. */
+ gf_error_to_errno_array[GF_ERROR_CODE_LIBMAX] = ELIBMAX;
+ gf_errno_to_error_array[ELIBMAX] = GF_ERROR_CODE_LIBMAX;
+ /* ELIBEXEC 87 / * Attempting to exec a shared library. */
+ gf_error_to_errno_array[GF_ERROR_CODE_LIBEXEC] = ELIBEXEC;
+ gf_errno_to_error_array[ELIBEXEC] = GF_ERROR_CODE_LIBEXEC;
+ /* EILSEQ 88 / * Illegal byte sequence. */
+ gf_error_to_errno_array[GF_ERROR_CODE_ILSEQ] = EILSEQ;
+ gf_errno_to_error_array[EILSEQ] = GF_ERROR_CODE_ILSEQ;
+ /* ENOSYS 89 / * Unsupported file system operation */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOSYS] = ENOSYS;
+ gf_errno_to_error_array[ENOSYS] = GF_ERROR_CODE_NOSYS;
+ /* ELOOP 90 / * Symbolic link loop */
+ gf_error_to_errno_array[GF_ERROR_CODE_LOOP] = ELOOP;
+ gf_errno_to_error_array[ELOOP] = GF_ERROR_CODE_LOOP;
+ /* ERESTART 91 / * Restartable system call */
+ gf_error_to_errno_array[GF_ERROR_CODE_RESTART] = ERESTART;
+ gf_errno_to_error_array[ERESTART] = GF_ERROR_CODE_RESTART;
+ /* ESTRPIPE 92 / * if pipe/FIFO, don't sleep in stream head */
+ gf_error_to_errno_array[GF_ERROR_CODE_STRPIPE] = ESTRPIPE;
+ gf_errno_to_error_array[ESTRPIPE] = GF_ERROR_CODE_STRPIPE;
+ /* ENOTEMPTY 93 / * directory not empty */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOTEMPTY] = ENOTEMPTY;
+ gf_errno_to_error_array[ENOTEMPTY] = GF_ERROR_CODE_NOTEMPTY;
+ /* EUSERS 94 / * Too many users (for UFS) */
+ gf_error_to_errno_array[GF_ERROR_CODE_USERS] = EUSERS;
+ gf_errno_to_error_array[EUSERS] = GF_ERROR_CODE_USERS;
+
+ /* BSD Networking Software */
+ /* argument errors */
+ /* ENOTSOCK 95 / * Socket operation on non-socket */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOTSOCK] = ENOTSOCK;
+ gf_errno_to_error_array[ENOTSOCK] = GF_ERROR_CODE_NOTSOCK;
+ /* EDESTADDRREQ 96 / * Destination address required */
+ gf_error_to_errno_array[GF_ERROR_CODE_DESTADDRREQ] = EDESTADDRREQ;
+ gf_errno_to_error_array[EDESTADDRREQ] = GF_ERROR_CODE_DESTADDRREQ;
+ /* EMSGSIZE 97 / * Message too long */
+ gf_error_to_errno_array[GF_ERROR_CODE_MSGSIZE] = EMSGSIZE;
+ gf_errno_to_error_array[EMSGSIZE] = GF_ERROR_CODE_MSGSIZE;
+ /* EPROTOTYPE 98 / * Protocol wrong type for socket */
+ gf_error_to_errno_array[GF_ERROR_CODE_PROTOTYPE] = EPROTOTYPE;
+ gf_errno_to_error_array[EPROTOTYPE] = GF_ERROR_CODE_PROTOTYPE;
+ /* ENOPROTOOPT 99 / * Protocol not available */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOPROTOOPT] = ENOPROTOOPT;
+ gf_errno_to_error_array[ENOPROTOOPT] = GF_ERROR_CODE_NOPROTOOPT;
+ /* EPROTONOSUPPORT 120 / * Protocol not supported */
+ gf_error_to_errno_array[GF_ERROR_CODE_PROTONOSUPPORT] = EPROTONOSUPPORT;
+ gf_errno_to_error_array[EPROTONOSUPPORT] = GF_ERROR_CODE_PROTONOSUPPORT;
+ /* ESOCKTNOSUPPORT 121 / * Socket type not supported */
+ gf_error_to_errno_array[GF_ERROR_CODE_SOCKTNOSUPPORT] = ESOCKTNOSUPPORT;
+ gf_errno_to_error_array[ESOCKTNOSUPPORT] = GF_ERROR_CODE_SOCKTNOSUPPORT;
+
+ /* EOPNOTSUPP 122 / * Operation not supported on socket */
+ gf_error_to_errno_array[GF_ERROR_CODE_OPNOTSUPP] = EOPNOTSUPP;
+ gf_errno_to_error_array[EOPNOTSUPP] = GF_ERROR_CODE_OPNOTSUPP;
+ /* EPFNOSUPPORT 123 / * Protocol family not supported */
+ gf_error_to_errno_array[GF_ERROR_CODE_PFNOSUPPORT] = EPFNOSUPPORT;
+ gf_errno_to_error_array[EPFNOSUPPORT] = GF_ERROR_CODE_PFNOSUPPORT;
+ /* EAFNOSUPPORT 124 / * Address family not supported by */
+ /* protocol family */
+ gf_error_to_errno_array[GF_ERROR_CODE_AFNOSUPPORT] = EAFNOSUPPORT;
+ gf_errno_to_error_array[EAFNOSUPPORT] = GF_ERROR_CODE_AFNOSUPPORT;
+ /* EADDRINUSE 125 / * Address already in use */
+ gf_error_to_errno_array[GF_ERROR_CODE_ADDRINUSE] = EADDRINUSE;
+ gf_errno_to_error_array[EADDRINUSE] = GF_ERROR_CODE_ADDRINUSE;
+ /* EADDRNOTAVAIL 126 / * Can't assign requested address */
+ /* operational errors */
+ gf_error_to_errno_array[GF_ERROR_CODE_ADDRNOTAVAIL] = EADDRNOTAVAIL;
+ gf_errno_to_error_array[EADDRNOTAVAIL] = GF_ERROR_CODE_ADDRNOTAVAIL;
+ /* ENETDOWN 127 / * Network is down */
+ gf_error_to_errno_array[GF_ERROR_CODE_NETDOWN] = ENETDOWN;
+ gf_errno_to_error_array[ENETDOWN] = GF_ERROR_CODE_NETDOWN;
+ /* ENETUNREACH 128 / * Network is unreachable */
+ gf_error_to_errno_array[GF_ERROR_CODE_NETUNREACH] = ENETUNREACH;
+ gf_errno_to_error_array[ENETUNREACH] = GF_ERROR_CODE_NETUNREACH;
+ /* ENETRESET 129 / * Network dropped connection because */
+ /* of reset */
+ gf_error_to_errno_array[GF_ERROR_CODE_NETRESET] = ENETRESET;
+ gf_errno_to_error_array[ENETRESET] = GF_ERROR_CODE_NETRESET;
+ /* ECONNABORTED 130 / * Software caused connection abort */
+ gf_error_to_errno_array[GF_ERROR_CODE_CONNABORTED] = ECONNABORTED;
+ gf_errno_to_error_array[ECONNABORTED] = GF_ERROR_CODE_CONNABORTED;
+ /* ECONNRESET 131 / * Connection reset by peer */
+ gf_error_to_errno_array[GF_ERROR_CODE_CONNRESET] = ECONNRESET;
+ gf_errno_to_error_array[ECONNRESET] = GF_ERROR_CODE_CONNRESET;
+ /* ENOBUFS 132 / * No buffer space available */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOBUFS] = ENOBUFS;
+ gf_errno_to_error_array[ENOBUFS] = GF_ERROR_CODE_NOBUFS;
+ /* EISCONN 133 / * Socket is already connected */
+ gf_error_to_errno_array[GF_ERROR_CODE_ISCONN] = EISCONN;
+ gf_errno_to_error_array[EISCONN] = GF_ERROR_CODE_ISCONN;
+ /* ENOTCONN 134 / * Socket is not connected */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOTCONN] = ENOTCONN;
+ gf_errno_to_error_array[ENOTCONN] = GF_ERROR_CODE_NOTCONN;
+ /* XENIX has 135 - 142 */
+ /* ESHUTDOWN 143 / * Can't send after socket shutdown */
+ gf_error_to_errno_array[GF_ERROR_CODE_SHUTDOWN] = ESHUTDOWN;
+ gf_errno_to_error_array[ESHUTDOWN] = GF_ERROR_CODE_SHUTDOWN;
+ /* ETOOMANYREFS 144 / * Too many references: can't splice */
+ gf_error_to_errno_array[GF_ERROR_CODE_TOOMANYREFS] = ETOOMANYREFS;
+ gf_errno_to_error_array[ETOOMANYREFS] = GF_ERROR_CODE_TOOMANYREFS;
+ /* ETIMEDOUT 145 / * Connection timed out */
+ gf_error_to_errno_array[GF_ERROR_CODE_TIMEDOUT] = ETIMEDOUT;
+ gf_errno_to_error_array[ETIMEDOUT] = GF_ERROR_CODE_TIMEDOUT;
+
+ /* ECONNREFUSED 146 / * Connection refused */
+ gf_error_to_errno_array[GF_ERROR_CODE_CONNREFUSED] = ECONNREFUSED;
+ gf_errno_to_error_array[ECONNREFUSED] = GF_ERROR_CODE_CONNREFUSED;
+ /* EHOSTDOWN 147 / * Host is down */
+ gf_error_to_errno_array[GF_ERROR_CODE_HOSTDOWN] = EHOSTDOWN;
+ gf_errno_to_error_array[EHOSTDOWN] = GF_ERROR_CODE_HOSTDOWN;
+ /* EHOSTUNREACH 148 / * No route to host */
+ gf_error_to_errno_array[GF_ERROR_CODE_HOSTUNREACH] = EHOSTUNREACH;
+ gf_errno_to_error_array[EHOSTUNREACH] = GF_ERROR_CODE_HOSTUNREACH;
+ /* EALREADY 149 / * operation already in progress */
+ gf_error_to_errno_array[GF_ERROR_CODE_ALREADY] = EALREADY;
+ gf_errno_to_error_array[EALREADY] = GF_ERROR_CODE_ALREADY;
+ /* EINPROGRESS 150 / * operation now in progress */
+ gf_error_to_errno_array[GF_ERROR_CODE_INPROGRESS] = EINPROGRESS;
+ gf_errno_to_error_array[EINPROGRESS] = GF_ERROR_CODE_INPROGRESS;
+
+ /* SUN Network File System */
+ /* ESTALE 151 / * Stale NFS file handle */
+ gf_error_to_errno_array[GF_ERROR_CODE_STALE] = ESTALE;
+ gf_errno_to_error_array[ESTALE] = GF_ERROR_CODE_STALE;
+
+ return;
}
#endif /* GF_SOLARIS_HOST_OS */
#ifdef GF_DARWIN_HOST_OS
static void
-init_compat_errno_arrays ()
+init_compat_errno_arrays()
{
- /* EDEADLK 11 / * Resource deadlock would occur */
- gf_error_to_errno_array[GF_ERROR_CODE_DEADLK] = EDEADLK;
- gf_errno_to_error_array[EDEADLK] = GF_ERROR_CODE_DEADLK;
-
- /* EAGAIN 35 / * Try Again */
- gf_error_to_errno_array[GF_ERROR_CODE_AGAIN] = EAGAIN;
- gf_errno_to_error_array[EAGAIN] = GF_ERROR_CODE_AGAIN;
-
- /* EINPROGRESS 36 / * Operation now in progress */
- gf_error_to_errno_array[GF_ERROR_CODE_INPROGRESS] = EINPROGRESS;
- gf_errno_to_error_array[EINPROGRESS] = GF_ERROR_CODE_INPROGRESS;
-
- /* EALREADY 37 / * Operation already in progress */
- gf_error_to_errno_array[GF_ERROR_CODE_ALREADY] = EALREADY;
- gf_errno_to_error_array[EALREADY] = GF_ERROR_CODE_ALREADY;
-
- /* ENOTSOCK 38 / * Socket operation on non-socket */
- gf_error_to_errno_array[GF_ERROR_CODE_NOTSOCK] = ENOTSOCK;
- gf_errno_to_error_array[ENOTSOCK] = GF_ERROR_CODE_NOTSOCK;
-
- /* EDESTADDRREQ 39 / * Destination address required */
- gf_error_to_errno_array[GF_ERROR_CODE_DESTADDRREQ] = EDESTADDRREQ;
- gf_errno_to_error_array[EDESTADDRREQ] = GF_ERROR_CODE_DESTADDRREQ;
-
- /* EMSGSIZE 40 / * Message too long */
- gf_error_to_errno_array[GF_ERROR_CODE_MSGSIZE] = EMSGSIZE;
- gf_errno_to_error_array[EMSGSIZE] = GF_ERROR_CODE_MSGSIZE;
-
- /* EPROTOTYPE 41 / * Protocol wrong type for socket */
- gf_error_to_errno_array[GF_ERROR_CODE_PROTOTYPE] = EPROTOTYPE;
- gf_errno_to_error_array[EPROTOTYPE] = GF_ERROR_CODE_PROTOTYPE;
-
- /* ENOPROTOOPT 42 / * Protocol not available */
- gf_error_to_errno_array[GF_ERROR_CODE_NOPROTOOPT] = ENOPROTOOPT;
- gf_errno_to_error_array[ENOPROTOOPT] = GF_ERROR_CODE_NOPROTOOPT;
-
- /* EPROTONOSUPPORT 43 / * Protocol not supported */
- gf_error_to_errno_array[GF_ERROR_CODE_PROTONOSUPPORT] = EPROTONOSUPPORT;
- gf_errno_to_error_array[EPROTONOSUPPORT] = GF_ERROR_CODE_PROTONOSUPPORT;
-
- /* ESOCKTNOSUPPORT 44 / * Socket type not supported */
- gf_error_to_errno_array[GF_ERROR_CODE_SOCKTNOSUPPORT] = ESOCKTNOSUPPORT;
- gf_errno_to_error_array[ESOCKTNOSUPPORT] = GF_ERROR_CODE_SOCKTNOSUPPORT;
-
- /* EOPNOTSUPP 45 / * Operation not supported */
- gf_error_to_errno_array[GF_ERROR_CODE_OPNOTSUPP] = EOPNOTSUPP;
- gf_errno_to_error_array[EOPNOTSUPP] = GF_ERROR_CODE_OPNOTSUPP;
-
- /* EPFNOSUPPORT 46 / * Protocol family not supported */
- gf_error_to_errno_array[GF_ERROR_CODE_PFNOSUPPORT] = EPFNOSUPPORT;
- gf_errno_to_error_array[EPFNOSUPPORT] = GF_ERROR_CODE_PFNOSUPPORT;
-
- /* EAFNOSUPPORT 47 / * Address family not supported by protocol family */
- gf_error_to_errno_array[GF_ERROR_CODE_AFNOSUPPORT] = EAFNOSUPPORT;
- gf_errno_to_error_array[EAFNOSUPPORT] = GF_ERROR_CODE_AFNOSUPPORT;
-
- /* EADDRINUSE 48 / * Address already in use */
- gf_error_to_errno_array[GF_ERROR_CODE_ADDRINUSE] = EADDRINUSE;
- gf_errno_to_error_array[EADDRINUSE] = GF_ERROR_CODE_ADDRINUSE;
-
- /* EADDRNOTAVAIL 49 / * Can't assign requested address */
- gf_error_to_errno_array[GF_ERROR_CODE_ADDRNOTAVAIL] = EADDRNOTAVAIL;
- gf_errno_to_error_array[EADDRNOTAVAIL] = GF_ERROR_CODE_ADDRNOTAVAIL;
-
- /* ENETDOWN 50 / * Network is down */
- gf_error_to_errno_array[GF_ERROR_CODE_NETDOWN] = ENETDOWN;
- gf_errno_to_error_array[ENETDOWN] = GF_ERROR_CODE_NETDOWN;
-
- /* ENETUNREACH 51 / * Network is unreachable */
- gf_error_to_errno_array[GF_ERROR_CODE_NETUNREACH] = ENETUNREACH;
- gf_errno_to_error_array[ENETUNREACH] = GF_ERROR_CODE_NETUNREACH;
-
- /* ENETRESET 52 / * Network dropped connection on reset */
- gf_error_to_errno_array[GF_ERROR_CODE_NETRESET] = ENETRESET;
- gf_errno_to_error_array[ENETRESET] = GF_ERROR_CODE_NETRESET;
-
- /* ECONNABORTED 53 / * Software caused connection abort */
- gf_error_to_errno_array[GF_ERROR_CODE_CONNABORTED] = ECONNABORTED;
- gf_errno_to_error_array[ECONNABORTED] = GF_ERROR_CODE_CONNABORTED;
-
- /* ECONNRESET 54 / * Connection reset by peer */
- gf_error_to_errno_array[GF_ERROR_CODE_CONNRESET] = ECONNRESET;
- gf_errno_to_error_array[ECONNRESET] = GF_ERROR_CODE_CONNRESET;
-
- /* ENOBUFS 55 / * No buffer space available */
- gf_error_to_errno_array[GF_ERROR_CODE_NOBUFS] = ENOBUFS;
- gf_errno_to_error_array[ENOBUFS] = GF_ERROR_CODE_NOBUFS;
-
- /* EISCONN 56 / * Socket is already connected */
- gf_error_to_errno_array[GF_ERROR_CODE_ISCONN] = EISCONN;
- gf_errno_to_error_array[EISCONN] = GF_ERROR_CODE_ISCONN;
-
- /* ENOTCONN 57 / * Socket is not connected */
- gf_error_to_errno_array[GF_ERROR_CODE_NOTCONN] = ENOTCONN;
- gf_errno_to_error_array[ENOTCONN] = GF_ERROR_CODE_NOTCONN;
-
- /* ESHUTDOWN 58 / * Can't send after socket shutdown */
- gf_error_to_errno_array[GF_ERROR_CODE_SHUTDOWN] = ESHUTDOWN;
- gf_errno_to_error_array[ESHUTDOWN] = GF_ERROR_CODE_SHUTDOWN;
-
- /* ETOOMANYREFS 59 / * Too many references: can't splice */
- gf_error_to_errno_array[GF_ERROR_CODE_TOOMANYREFS] = ETOOMANYREFS;
- gf_errno_to_error_array[ETOOMANYREFS] = GF_ERROR_CODE_TOOMANYREFS;
-
- /* ETIMEDOUT 60 / * Operation timed out */
- gf_error_to_errno_array[GF_ERROR_CODE_TIMEDOUT] = ETIMEDOUT;
- gf_errno_to_error_array[ETIMEDOUT] = GF_ERROR_CODE_TIMEDOUT;
-
- /* ECONNREFUSED 61 / * Connection refused */
- gf_error_to_errno_array[GF_ERROR_CODE_CONNREFUSED] = ECONNREFUSED;
- gf_errno_to_error_array[ECONNREFUSED] = GF_ERROR_CODE_CONNREFUSED;
-
- /* ELOOP 62 / * Too many levels of symbolic links */
- gf_error_to_errno_array[GF_ERROR_CODE_LOOP] = ELOOP;
- gf_errno_to_error_array[ELOOP] = GF_ERROR_CODE_LOOP;
-
- /* ENAMETOOLONG 63 / * File name too long */
- gf_error_to_errno_array[GF_ERROR_CODE_NAMETOOLONG] = ENAMETOOLONG;
- gf_errno_to_error_array[ENAMETOOLONG] = GF_ERROR_CODE_NAMETOOLONG;
-
- /* EHOSTDOWN 64 / * Host is down */
- gf_error_to_errno_array[GF_ERROR_CODE_HOSTDOWN] = EHOSTDOWN;
- gf_errno_to_error_array[EHOSTDOWN] = GF_ERROR_CODE_HOSTDOWN;
-
- /* EHOSTUNREACH 65 / * No route to host */
- gf_error_to_errno_array[GF_ERROR_CODE_HOSTUNREACH] = EHOSTUNREACH;
- gf_errno_to_error_array[EHOSTUNREACH] = GF_ERROR_CODE_HOSTUNREACH;
-
- /* ENOTEMPTY 66 / * Directory not empty */
- gf_error_to_errno_array[GF_ERROR_CODE_NOTEMPTY] = ENOTEMPTY;
- gf_errno_to_error_array[ENOTEMPTY] = GF_ERROR_CODE_NOTEMPTY;
-
- /* EPROCLIM 67 / * Too many processes */
- gf_error_to_errno_array[GF_ERROR_CODE_PROCLIM] = EPROCLIM;
- gf_errno_to_error_array[EPROCLIM] = GF_ERROR_CODE_PROCLIM;
-
- /* EUSERS 68 / * Too many users */
- gf_error_to_errno_array[GF_ERROR_CODE_USERS] = EUSERS;
- gf_errno_to_error_array[EUSERS] = GF_ERROR_CODE_USERS;
-
- /* EDQUOT 69 / * Disc quota exceeded */
- gf_error_to_errno_array[GF_ERROR_CODE_DQUOT] = EDQUOT;
- gf_errno_to_error_array[EDQUOT] = GF_ERROR_CODE_DQUOT;
-
- /* ESTALE 70 / * Stale NFS file handle */
- gf_error_to_errno_array[GF_ERROR_CODE_STALE] = ESTALE;
- gf_errno_to_error_array[ESTALE] = GF_ERROR_CODE_STALE;
-
- /* EREMOTE 71 / * Too many levels of remote in path */
- gf_error_to_errno_array[GF_ERROR_CODE_REMOTE] = EREMOTE;
- gf_errno_to_error_array[EREMOTE] = GF_ERROR_CODE_REMOTE;
-
- /* EBADRPC 72 / * RPC struct is bad */
- gf_error_to_errno_array[GF_ERROR_CODE_BADRPC] = EBADRPC;
- gf_errno_to_error_array[EBADRPC] = GF_ERROR_CODE_BADRPC;
-
- /* ERPCMISMATCH 73 / * RPC version wrong */
- gf_error_to_errno_array[GF_ERROR_CODE_RPCMISMATCH] = ERPCMISMATCH;
- gf_errno_to_error_array[ERPCMISMATCH] = GF_ERROR_CODE_RPCMISMATCH;
-
- /* EPROGUNAVAIL 74 / * RPC prog. not avail */
- gf_error_to_errno_array[GF_ERROR_CODE_PROGUNAVAIL] = EPROGUNAVAIL;
- gf_errno_to_error_array[EPROGUNAVAIL] = GF_ERROR_CODE_PROGUNAVAIL;
-
- /* EPROGMISMATCH 75 / * Program version wrong */
- gf_error_to_errno_array[GF_ERROR_CODE_PROGMISMATCH] = EPROGMISMATCH;
- gf_errno_to_error_array[EPROGMISMATCH] = GF_ERROR_CODE_PROGMISMATCH;
-
- /* EPROCUNAVAIL 76 / * Bad procedure for program */
- gf_error_to_errno_array[GF_ERROR_CODE_PROCUNAVAIL] = EPROCUNAVAIL;
- gf_errno_to_error_array[EPROCUNAVAIL] = GF_ERROR_CODE_PROCUNAVAIL;
-
- /* ENOLCK 77 / * No locks available */
- gf_error_to_errno_array[GF_ERROR_CODE_NOLCK] = ENOLCK;
- gf_errno_to_error_array[ENOLCK] = GF_ERROR_CODE_NOLCK;
-
- /* ENOSYS 78 / * Function not implemented */
- gf_error_to_errno_array[GF_ERROR_CODE_NOSYS] = ENOSYS;
- gf_errno_to_error_array[ENOSYS] = GF_ERROR_CODE_NOSYS;
-
- /* EFTYPE 79 / * Inappropriate file type or format */
- gf_error_to_errno_array[GF_ERROR_CODE_FTYPE] = EFTYPE;
- gf_errno_to_error_array[EFTYPE] = GF_ERROR_CODE_FTYPE;
-
- /* EAUTH 80 / * Authentication error */
- gf_error_to_errno_array[GF_ERROR_CODE_AUTH] = EAUTH;
- gf_errno_to_error_array[EAUTH] = GF_ERROR_CODE_AUTH;
-
- /* ENEEDAUTH 81 / * Need authenticator */
- gf_error_to_errno_array[GF_ERROR_CODE_NEEDAUTH] = ENEEDAUTH;
- gf_errno_to_error_array[ENEEDAUTH] = GF_ERROR_CODE_NEEDAUTH;
-/* Intelligent device errors */
-/* EPWROFF 82 / * Device power is off */
- gf_error_to_errno_array[GF_ERROR_CODE_PWROFF] = EPWROFF;
- gf_errno_to_error_array[EPWROFF] = GF_ERROR_CODE_PWROFF;
-/* EDEVERR 83 / * Device error, e.g. paper out */
- gf_error_to_errno_array[GF_ERROR_CODE_DEVERR] = EDEVERR;
- gf_errno_to_error_array[EDEVERR] = GF_ERROR_CODE_DEVERR;
-
- /* EOVERFLOW 84 / * Value too large to be stored in data type */
- gf_error_to_errno_array[GF_ERROR_CODE_OVERFLOW] = EOVERFLOW;
- gf_errno_to_error_array[EOVERFLOW] = GF_ERROR_CODE_OVERFLOW;
-
-/* Program loading errors */
-/* EBADEXEC 85 / * Bad executable */
- gf_error_to_errno_array[GF_ERROR_CODE_BADEXEC] = EBADEXEC;
- gf_errno_to_error_array[EBADEXEC] = GF_ERROR_CODE_BADEXEC;
-
-/* EBADARCH 86 / * Bad CPU type in executable */
- gf_error_to_errno_array[GF_ERROR_CODE_BADARCH] = EBADARCH;
- gf_errno_to_error_array[EBADARCH] = GF_ERROR_CODE_BADARCH;
-
-/* ESHLIBVERS 87 / * Shared library version mismatch */
- gf_error_to_errno_array[GF_ERROR_CODE_SHLIBVERS] = ESHLIBVERS;
- gf_errno_to_error_array[ESHLIBVERS] = GF_ERROR_CODE_SHLIBVERS;
-
-/* EBADMACHO 88 / * Malformed Macho file */
- gf_error_to_errno_array[GF_ERROR_CODE_BADMACHO] = EBADMACHO;
- gf_errno_to_error_array[EBADMACHO] = GF_ERROR_CODE_BADMACHO;
+ /* EDEADLK 11 / * Resource deadlock would occur */
+ gf_error_to_errno_array[GF_ERROR_CODE_DEADLK] = EDEADLK;
+ gf_errno_to_error_array[EDEADLK] = GF_ERROR_CODE_DEADLK;
+
+ /* EAGAIN 35 / * Try Again */
+ gf_error_to_errno_array[GF_ERROR_CODE_AGAIN] = EAGAIN;
+ gf_errno_to_error_array[EAGAIN] = GF_ERROR_CODE_AGAIN;
+
+ /* EINPROGRESS 36 / * Operation now in progress */
+ gf_error_to_errno_array[GF_ERROR_CODE_INPROGRESS] = EINPROGRESS;
+ gf_errno_to_error_array[EINPROGRESS] = GF_ERROR_CODE_INPROGRESS;
+
+ /* EALREADY 37 / * Operation already in progress */
+ gf_error_to_errno_array[GF_ERROR_CODE_ALREADY] = EALREADY;
+ gf_errno_to_error_array[EALREADY] = GF_ERROR_CODE_ALREADY;
+
+ /* ENOTSOCK 38 / * Socket operation on non-socket
+ */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOTSOCK] = ENOTSOCK;
+ gf_errno_to_error_array[ENOTSOCK] = GF_ERROR_CODE_NOTSOCK;
+
+ /* EDESTADDRREQ 39 / * Destination address required */
+ gf_error_to_errno_array[GF_ERROR_CODE_DESTADDRREQ] = EDESTADDRREQ;
+ gf_errno_to_error_array[EDESTADDRREQ] = GF_ERROR_CODE_DESTADDRREQ;
+
+ /* EMSGSIZE 40 / * Message too long */
+ gf_error_to_errno_array[GF_ERROR_CODE_MSGSIZE] = EMSGSIZE;
+ gf_errno_to_error_array[EMSGSIZE] = GF_ERROR_CODE_MSGSIZE;
+
+ /* EPROTOTYPE 41 / * Protocol wrong type for socket
+ */
+ gf_error_to_errno_array[GF_ERROR_CODE_PROTOTYPE] = EPROTOTYPE;
+ gf_errno_to_error_array[EPROTOTYPE] = GF_ERROR_CODE_PROTOTYPE;
+
+ /* ENOPROTOOPT 42 / * Protocol not available */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOPROTOOPT] = ENOPROTOOPT;
+ gf_errno_to_error_array[ENOPROTOOPT] = GF_ERROR_CODE_NOPROTOOPT;
+
+ /* EPROTONOSUPPORT 43 / * Protocol not supported */
+ gf_error_to_errno_array[GF_ERROR_CODE_PROTONOSUPPORT] = EPROTONOSUPPORT;
+ gf_errno_to_error_array[EPROTONOSUPPORT] = GF_ERROR_CODE_PROTONOSUPPORT;
+
+ /* ESOCKTNOSUPPORT 44 / * Socket type not supported */
+ gf_error_to_errno_array[GF_ERROR_CODE_SOCKTNOSUPPORT] = ESOCKTNOSUPPORT;
+ gf_errno_to_error_array[ESOCKTNOSUPPORT] = GF_ERROR_CODE_SOCKTNOSUPPORT;
+
+ /* EOPNOTSUPP 45 / * Operation not supported */
+ gf_error_to_errno_array[GF_ERROR_CODE_OPNOTSUPP] = EOPNOTSUPP;
+ gf_errno_to_error_array[EOPNOTSUPP] = GF_ERROR_CODE_OPNOTSUPP;
+
+ /* EPFNOSUPPORT 46 / * Protocol family not supported */
+ gf_error_to_errno_array[GF_ERROR_CODE_PFNOSUPPORT] = EPFNOSUPPORT;
+ gf_errno_to_error_array[EPFNOSUPPORT] = GF_ERROR_CODE_PFNOSUPPORT;
+
+ /* EAFNOSUPPORT 47 / * Address family not supported by
+ * protocol family */
+ gf_error_to_errno_array[GF_ERROR_CODE_AFNOSUPPORT] = EAFNOSUPPORT;
+ gf_errno_to_error_array[EAFNOSUPPORT] = GF_ERROR_CODE_AFNOSUPPORT;
+
+ /* EADDRINUSE 48 / * Address already in use */
+ gf_error_to_errno_array[GF_ERROR_CODE_ADDRINUSE] = EADDRINUSE;
+ gf_errno_to_error_array[EADDRINUSE] = GF_ERROR_CODE_ADDRINUSE;
+
+ /* EADDRNOTAVAIL 49 / * Can't assign requested address
+ */
+ gf_error_to_errno_array[GF_ERROR_CODE_ADDRNOTAVAIL] = EADDRNOTAVAIL;
+ gf_errno_to_error_array[EADDRNOTAVAIL] = GF_ERROR_CODE_ADDRNOTAVAIL;
+
+ /* ENETDOWN 50 / * Network is down */
+ gf_error_to_errno_array[GF_ERROR_CODE_NETDOWN] = ENETDOWN;
+ gf_errno_to_error_array[ENETDOWN] = GF_ERROR_CODE_NETDOWN;
+
+ /* ENETUNREACH 51 / * Network is unreachable */
+ gf_error_to_errno_array[GF_ERROR_CODE_NETUNREACH] = ENETUNREACH;
+ gf_errno_to_error_array[ENETUNREACH] = GF_ERROR_CODE_NETUNREACH;
+
+ /* ENETRESET 52 / * Network dropped connection on
+ * reset */
+ gf_error_to_errno_array[GF_ERROR_CODE_NETRESET] = ENETRESET;
+ gf_errno_to_error_array[ENETRESET] = GF_ERROR_CODE_NETRESET;
+
+ /* ECONNABORTED 53 / * Software caused connection abort
+ */
+ gf_error_to_errno_array[GF_ERROR_CODE_CONNABORTED] = ECONNABORTED;
+ gf_errno_to_error_array[ECONNABORTED] = GF_ERROR_CODE_CONNABORTED;
+
+ /* ECONNRESET 54 / * Connection reset by peer */
+ gf_error_to_errno_array[GF_ERROR_CODE_CONNRESET] = ECONNRESET;
+ gf_errno_to_error_array[ECONNRESET] = GF_ERROR_CODE_CONNRESET;
+
+ /* ENOBUFS 55 / * No buffer space available */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOBUFS] = ENOBUFS;
+ gf_errno_to_error_array[ENOBUFS] = GF_ERROR_CODE_NOBUFS;
+
+ /* EISCONN 56 / * Socket is already connected */
+ gf_error_to_errno_array[GF_ERROR_CODE_ISCONN] = EISCONN;
+ gf_errno_to_error_array[EISCONN] = GF_ERROR_CODE_ISCONN;
+
+ /* ENOTCONN 57 / * Socket is not connected */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOTCONN] = ENOTCONN;
+ gf_errno_to_error_array[ENOTCONN] = GF_ERROR_CODE_NOTCONN;
+
+ /* ESHUTDOWN 58 / * Can't send after socket shutdown
+ */
+ gf_error_to_errno_array[GF_ERROR_CODE_SHUTDOWN] = ESHUTDOWN;
+ gf_errno_to_error_array[ESHUTDOWN] = GF_ERROR_CODE_SHUTDOWN;
+
+ /* ETOOMANYREFS 59 / * Too many references: can't
+ * splice */
+ gf_error_to_errno_array[GF_ERROR_CODE_TOOMANYREFS] = ETOOMANYREFS;
+ gf_errno_to_error_array[ETOOMANYREFS] = GF_ERROR_CODE_TOOMANYREFS;
+
+ /* ETIMEDOUT 60 / * Operation timed out */
+ gf_error_to_errno_array[GF_ERROR_CODE_TIMEDOUT] = ETIMEDOUT;
+ gf_errno_to_error_array[ETIMEDOUT] = GF_ERROR_CODE_TIMEDOUT;
+
+ /* ECONNREFUSED 61 / * Connection refused */
+ gf_error_to_errno_array[GF_ERROR_CODE_CONNREFUSED] = ECONNREFUSED;
+ gf_errno_to_error_array[ECONNREFUSED] = GF_ERROR_CODE_CONNREFUSED;
+
+ /* ELOOP 62 / * Too many levels of symbolic
+ * links */
+ gf_error_to_errno_array[GF_ERROR_CODE_LOOP] = ELOOP;
+ gf_errno_to_error_array[ELOOP] = GF_ERROR_CODE_LOOP;
+
+ /* ENAMETOOLONG 63 / * File name too long */
+ gf_error_to_errno_array[GF_ERROR_CODE_NAMETOOLONG] = ENAMETOOLONG;
+ gf_errno_to_error_array[ENAMETOOLONG] = GF_ERROR_CODE_NAMETOOLONG;
+
+ /* EHOSTDOWN 64 / * Host is down */
+ gf_error_to_errno_array[GF_ERROR_CODE_HOSTDOWN] = EHOSTDOWN;
+ gf_errno_to_error_array[EHOSTDOWN] = GF_ERROR_CODE_HOSTDOWN;
+
+ /* EHOSTUNREACH 65 / * No route to host */
+ gf_error_to_errno_array[GF_ERROR_CODE_HOSTUNREACH] = EHOSTUNREACH;
+ gf_errno_to_error_array[EHOSTUNREACH] = GF_ERROR_CODE_HOSTUNREACH;
+
+ /* ENOTEMPTY 66 / * Directory not empty */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOTEMPTY] = ENOTEMPTY;
+ gf_errno_to_error_array[ENOTEMPTY] = GF_ERROR_CODE_NOTEMPTY;
+
+ /* EPROCLIM 67 / * Too many processes */
+ gf_error_to_errno_array[GF_ERROR_CODE_PROCLIM] = EPROCLIM;
+ gf_errno_to_error_array[EPROCLIM] = GF_ERROR_CODE_PROCLIM;
+
+ /* EUSERS 68 / * Too many users */
+ gf_error_to_errno_array[GF_ERROR_CODE_USERS] = EUSERS;
+ gf_errno_to_error_array[EUSERS] = GF_ERROR_CODE_USERS;
+
+ /* EDQUOT 69 / * Disc quota exceeded */
+ gf_error_to_errno_array[GF_ERROR_CODE_DQUOT] = EDQUOT;
+ gf_errno_to_error_array[EDQUOT] = GF_ERROR_CODE_DQUOT;
+
+ /* ESTALE 70 / * Stale NFS file handle */
+ gf_error_to_errno_array[GF_ERROR_CODE_STALE] = ESTALE;
+ gf_errno_to_error_array[ESTALE] = GF_ERROR_CODE_STALE;
+
+ /* EREMOTE 71 / * Too many levels of remote in
+ * path */
+ gf_error_to_errno_array[GF_ERROR_CODE_REMOTE] = EREMOTE;
+ gf_errno_to_error_array[EREMOTE] = GF_ERROR_CODE_REMOTE;
+
+ /* EBADRPC 72 / * RPC struct is bad */
+ gf_error_to_errno_array[GF_ERROR_CODE_BADRPC] = EBADRPC;
+ gf_errno_to_error_array[EBADRPC] = GF_ERROR_CODE_BADRPC;
+
+ /* ERPCMISMATCH 73 / * RPC version wrong */
+ gf_error_to_errno_array[GF_ERROR_CODE_RPCMISMATCH] = ERPCMISMATCH;
+ gf_errno_to_error_array[ERPCMISMATCH] = GF_ERROR_CODE_RPCMISMATCH;
+
+ /* EPROGUNAVAIL 74 / * RPC prog. not avail */
+ gf_error_to_errno_array[GF_ERROR_CODE_PROGUNAVAIL] = EPROGUNAVAIL;
+ gf_errno_to_error_array[EPROGUNAVAIL] = GF_ERROR_CODE_PROGUNAVAIL;
+
+ /* EPROGMISMATCH 75 / * Program version wrong */
+ gf_error_to_errno_array[GF_ERROR_CODE_PROGMISMATCH] = EPROGMISMATCH;
+ gf_errno_to_error_array[EPROGMISMATCH] = GF_ERROR_CODE_PROGMISMATCH;
+
+ /* EPROCUNAVAIL 76 / * Bad procedure for program */
+ gf_error_to_errno_array[GF_ERROR_CODE_PROCUNAVAIL] = EPROCUNAVAIL;
+ gf_errno_to_error_array[EPROCUNAVAIL] = GF_ERROR_CODE_PROCUNAVAIL;
+
+ /* ENOLCK 77 / * No locks available */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOLCK] = ENOLCK;
+ gf_errno_to_error_array[ENOLCK] = GF_ERROR_CODE_NOLCK;
+
+ /* ENOSYS 78 / * Function not implemented */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOSYS] = ENOSYS;
+ gf_errno_to_error_array[ENOSYS] = GF_ERROR_CODE_NOSYS;
+
+ /* EFTYPE 79 / * Inappropriate file type or
+ * format */
+ gf_error_to_errno_array[GF_ERROR_CODE_FTYPE] = EFTYPE;
+ gf_errno_to_error_array[EFTYPE] = GF_ERROR_CODE_FTYPE;
+
+ /* EAUTH 80 / * Authentication error */
+ gf_error_to_errno_array[GF_ERROR_CODE_AUTH] = EAUTH;
+ gf_errno_to_error_array[EAUTH] = GF_ERROR_CODE_AUTH;
+
+ /* ENEEDAUTH 81 / * Need authenticator */
+ gf_error_to_errno_array[GF_ERROR_CODE_NEEDAUTH] = ENEEDAUTH;
+ gf_errno_to_error_array[ENEEDAUTH] = GF_ERROR_CODE_NEEDAUTH;
+ /* Intelligent device errors */
+ /* EPWROFF 82 / * Device power is off */
+ gf_error_to_errno_array[GF_ERROR_CODE_PWROFF] = EPWROFF;
+ gf_errno_to_error_array[EPWROFF] = GF_ERROR_CODE_PWROFF;
+ /* EDEVERR 83 / * Device error, e.g. paper out */
+ gf_error_to_errno_array[GF_ERROR_CODE_DEVERR] = EDEVERR;
+ gf_errno_to_error_array[EDEVERR] = GF_ERROR_CODE_DEVERR;
+
+ /* EOVERFLOW 84 / * Value too large to be stored in
+ * data type */
+ gf_error_to_errno_array[GF_ERROR_CODE_OVERFLOW] = EOVERFLOW;
+ gf_errno_to_error_array[EOVERFLOW] = GF_ERROR_CODE_OVERFLOW;
+
+ /* Program loading errors */
+ /* EBADEXEC 85 / * Bad executable */
+ gf_error_to_errno_array[GF_ERROR_CODE_BADEXEC] = EBADEXEC;
+ gf_errno_to_error_array[EBADEXEC] = GF_ERROR_CODE_BADEXEC;
+
+ /* EBADARCH 86 / * Bad CPU type in executable */
+ gf_error_to_errno_array[GF_ERROR_CODE_BADARCH] = EBADARCH;
+ gf_errno_to_error_array[EBADARCH] = GF_ERROR_CODE_BADARCH;
+
+ /* ESHLIBVERS 87 / * Shared library version mismatch */
+ gf_error_to_errno_array[GF_ERROR_CODE_SHLIBVERS] = ESHLIBVERS;
+ gf_errno_to_error_array[ESHLIBVERS] = GF_ERROR_CODE_SHLIBVERS;
+
+ /* EBADMACHO 88 / * Malformed Macho file */
+ gf_error_to_errno_array[GF_ERROR_CODE_BADMACHO] = EBADMACHO;
+ gf_errno_to_error_array[EBADMACHO] = GF_ERROR_CODE_BADMACHO;
#ifdef EDOOFUS
- /* EDOOFUS 88 / * Programming error */
- gf_error_to_errno_array[GF_ERROR_CODE_DOOFUS] = EDOOFUS;
- gf_errno_to_error_array[EDOOFUS] = GF_ERROR_CODE_DOOFUS;
+ /* EDOOFUS 88 / * Programming error */
+ gf_error_to_errno_array[GF_ERROR_CODE_DOOFUS] = EDOOFUS;
+ gf_errno_to_error_array[EDOOFUS] = GF_ERROR_CODE_DOOFUS;
#endif
- /* ECANCELED 89 / * Operation canceled */
- gf_error_to_errno_array[GF_ERROR_CODE_CANCELED] = ECANCELED;
- gf_errno_to_error_array[ECANCELED] = GF_ERROR_CODE_CANCELED;
-
- /* EIDRM 90 / * Identifier removed */
- gf_error_to_errno_array[GF_ERROR_CODE_IDRM] = EIDRM;
- gf_errno_to_error_array[EIDRM] = GF_ERROR_CODE_IDRM;
- /* ENOMSG 91 / * No message of desired type */
- gf_error_to_errno_array[GF_ERROR_CODE_NOMSG] = ENOMSG;
- gf_errno_to_error_array[ENOMSG] = GF_ERROR_CODE_NOMSG;
-
- /* EILSEQ 92 / * Illegal byte sequence */
- gf_error_to_errno_array[GF_ERROR_CODE_ILSEQ] = EILSEQ;
- gf_errno_to_error_array[EILSEQ] = GF_ERROR_CODE_ILSEQ;
-
- /* ENOATTR 93 / * Attribute not found */
- gf_error_to_errno_array[GF_ERROR_CODE_NOATTR] = ENOATTR;
- gf_errno_to_error_array[ENOATTR] = GF_ERROR_CODE_NOATTR;
-
- /* EBADMSG 94 / * Bad message */
- gf_error_to_errno_array[GF_ERROR_CODE_BADMSG] = EBADMSG;
- gf_errno_to_error_array[EBADMSG] = GF_ERROR_CODE_BADMSG;
-
- /* EMULTIHOP 95 / * Reserved */
- gf_error_to_errno_array[GF_ERROR_CODE_MULTIHOP] = EMULTIHOP;
- gf_errno_to_error_array[EMULTIHOP] = GF_ERROR_CODE_MULTIHOP;
-
- /* ENODATA 96 / * No message available on STREAM */
- gf_error_to_errno_array[GF_ERROR_CODE_NEEDAUTH] = ENEEDAUTH;
- gf_errno_to_error_array[ENEEDAUTH] = GF_ERROR_CODE_NEEDAUTH;
-
- /* ENOLINK 97 / * Reserved */
- gf_error_to_errno_array[GF_ERROR_CODE_NOLINK] = ENOLINK;
- gf_errno_to_error_array[ENOLINK] = GF_ERROR_CODE_NOLINK;
-
- /* ENOSR 98 / * No STREAM resources */
- gf_error_to_errno_array[GF_ERROR_CODE_NOSR] = ENOSR;
- gf_errno_to_error_array[ENOSR] = GF_ERROR_CODE_NOSR;
-
- /* ENOSTR 99 / * Not a STREAM */
- gf_error_to_errno_array[GF_ERROR_CODE_NOSTR] = ENOSTR;
- gf_errno_to_error_array[ENOSTR] = GF_ERROR_CODE_NOSTR;
-
-/* EPROTO 100 / * Protocol error */
- gf_error_to_errno_array[GF_ERROR_CODE_PROTO] = EPROTO;
- gf_errno_to_error_array[EPROTO] = GF_ERROR_CODE_PROTO;
-/* ETIME 101 / * STREAM ioctl timeout */
- gf_error_to_errno_array[GF_ERROR_CODE_TIME] = ETIME;
- gf_errno_to_error_array[ETIME] = GF_ERROR_CODE_TIME;
-
-/* This value is only discrete when compiling __DARWIN_UNIX03, or KERNEL */
-/* EOPNOTSUPP 102 / * Operation not supported on socket */
- gf_error_to_errno_array[GF_ERROR_CODE_OPNOTSUPP] = EOPNOTSUPP;
- gf_errno_to_error_array[EOPNOTSUPP] = GF_ERROR_CODE_OPNOTSUPP;
-
-/* ENOPOLICY 103 / * No such policy registered */
- gf_error_to_errno_array[GF_ERROR_CODE_NOPOLICY] = ENOPOLICY;
- gf_errno_to_error_array[ENOPOLICY] = GF_ERROR_CODE_NOPOLICY;
-
- return ;
+ /* ECANCELED 89 / * Operation canceled */
+ gf_error_to_errno_array[GF_ERROR_CODE_CANCELED] = ECANCELED;
+ gf_errno_to_error_array[ECANCELED] = GF_ERROR_CODE_CANCELED;
+
+ /* EIDRM 90 / * Identifier removed */
+ gf_error_to_errno_array[GF_ERROR_CODE_IDRM] = EIDRM;
+ gf_errno_to_error_array[EIDRM] = GF_ERROR_CODE_IDRM;
+ /* ENOMSG 91 / * No message of desired type */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOMSG] = ENOMSG;
+ gf_errno_to_error_array[ENOMSG] = GF_ERROR_CODE_NOMSG;
+
+ /* EILSEQ 92 / * Illegal byte sequence */
+ gf_error_to_errno_array[GF_ERROR_CODE_ILSEQ] = EILSEQ;
+ gf_errno_to_error_array[EILSEQ] = GF_ERROR_CODE_ILSEQ;
+
+ /* ENOATTR 93 / * Attribute not found */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOATTR] = ENOATTR;
+ gf_errno_to_error_array[ENOATTR] = GF_ERROR_CODE_NOATTR;
+
+ /* EBADMSG 94 / * Bad message */
+ gf_error_to_errno_array[GF_ERROR_CODE_BADMSG] = EBADMSG;
+ gf_errno_to_error_array[EBADMSG] = GF_ERROR_CODE_BADMSG;
+
+ /* EMULTIHOP 95 / * Reserved */
+ gf_error_to_errno_array[GF_ERROR_CODE_MULTIHOP] = EMULTIHOP;
+ gf_errno_to_error_array[EMULTIHOP] = GF_ERROR_CODE_MULTIHOP;
+
+ /* ENODATA 96 / * No message available on STREAM
+ */
+ gf_error_to_errno_array[GF_ERROR_CODE_NEEDAUTH] = ENEEDAUTH;
+ gf_errno_to_error_array[ENEEDAUTH] = GF_ERROR_CODE_NEEDAUTH;
+
+ /* ENOLINK 97 / * Reserved */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOLINK] = ENOLINK;
+ gf_errno_to_error_array[ENOLINK] = GF_ERROR_CODE_NOLINK;
+
+ /* ENOSR 98 / * No STREAM resources */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOSR] = ENOSR;
+ gf_errno_to_error_array[ENOSR] = GF_ERROR_CODE_NOSR;
+
+ /* ENOSTR 99 / * Not a STREAM */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOSTR] = ENOSTR;
+ gf_errno_to_error_array[ENOSTR] = GF_ERROR_CODE_NOSTR;
+
+ /* EPROTO 100 / * Protocol error */
+ gf_error_to_errno_array[GF_ERROR_CODE_PROTO] = EPROTO;
+ gf_errno_to_error_array[EPROTO] = GF_ERROR_CODE_PROTO;
+ /* ETIME 101 / * STREAM ioctl timeout */
+ gf_error_to_errno_array[GF_ERROR_CODE_TIME] = ETIME;
+ gf_errno_to_error_array[ETIME] = GF_ERROR_CODE_TIME;
+
+ /* This value is only discrete when compiling __DARWIN_UNIX03, or KERNEL */
+ /* EOPNOTSUPP 102 / * Operation not supported on
+ * socket */
+ gf_error_to_errno_array[GF_ERROR_CODE_OPNOTSUPP] = EOPNOTSUPP;
+ gf_errno_to_error_array[EOPNOTSUPP] = GF_ERROR_CODE_OPNOTSUPP;
+
+ /* ENOPOLICY 103 / * No such policy registered */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOPOLICY] = ENOPOLICY;
+ gf_errno_to_error_array[ENOPOLICY] = GF_ERROR_CODE_NOPOLICY;
+
+ return;
}
#endif /* GF_DARWIN_HOST_OS */
#ifdef GF_BSD_HOST_OS
static void
-init_compat_errno_arrays ()
+init_compat_errno_arrays()
{
- /* Quite a bit of things changed in FreeBSD - current */
-
- /* EAGAIN 35 / * Try Again */
- gf_error_to_errno_array[GF_ERROR_CODE_AGAIN] = EAGAIN;
- gf_errno_to_error_array[EAGAIN] = GF_ERROR_CODE_AGAIN;
-
- /* EDEADLK 11 / * Resource deadlock would occur */
- gf_error_to_errno_array[GF_ERROR_CODE_DEADLK] = EDEADLK;
- gf_errno_to_error_array[EDEADLK] = GF_ERROR_CODE_DEADLK;
-
- /* EINPROGRESS 36 / * Operation now in progress */
- gf_error_to_errno_array[GF_ERROR_CODE_INPROGRESS] = EINPROGRESS;
- gf_errno_to_error_array[EINPROGRESS] = GF_ERROR_CODE_INPROGRESS;
-
- /* EALREADY 37 / * Operation already in progress */
- gf_error_to_errno_array[GF_ERROR_CODE_ALREADY] = EALREADY;
- gf_errno_to_error_array[EALREADY] = GF_ERROR_CODE_ALREADY;
-
- /* ENOTSOCK 38 / * Socket operation on non-socket */
- gf_error_to_errno_array[GF_ERROR_CODE_NOTSOCK] = ENOTSOCK;
- gf_errno_to_error_array[ENOTSOCK] = GF_ERROR_CODE_NOTSOCK;
-
- /* EDESTADDRREQ 39 / * Destination address required */
- gf_error_to_errno_array[GF_ERROR_CODE_DESTADDRREQ] = EDESTADDRREQ;
- gf_errno_to_error_array[EDESTADDRREQ] = GF_ERROR_CODE_DESTADDRREQ;
-
- /* EMSGSIZE 40 / * Message too long */
- gf_error_to_errno_array[GF_ERROR_CODE_MSGSIZE] = EMSGSIZE;
- gf_errno_to_error_array[EMSGSIZE] = GF_ERROR_CODE_MSGSIZE;
-
- /* EPROTOTYPE 41 / * Protocol wrong type for socket */
- gf_error_to_errno_array[GF_ERROR_CODE_PROTOTYPE] = EPROTOTYPE;
- gf_errno_to_error_array[EPROTOTYPE] = GF_ERROR_CODE_PROTOTYPE;
-
- /* ENOPROTOOPT 42 / * Protocol not available */
- gf_error_to_errno_array[GF_ERROR_CODE_NOPROTOOPT] = ENOPROTOOPT;
- gf_errno_to_error_array[ENOPROTOOPT] = GF_ERROR_CODE_NOPROTOOPT;
-
- /* EPROTONOSUPPORT 43 / * Protocol not supported */
- gf_error_to_errno_array[GF_ERROR_CODE_PROTONOSUPPORT] = EPROTONOSUPPORT;
- gf_errno_to_error_array[EPROTONOSUPPORT] = GF_ERROR_CODE_PROTONOSUPPORT;
-
- /* ESOCKTNOSUPPORT 44 / * Socket type not supported */
- gf_error_to_errno_array[GF_ERROR_CODE_SOCKTNOSUPPORT] = ESOCKTNOSUPPORT;
- gf_errno_to_error_array[ESOCKTNOSUPPORT] = GF_ERROR_CODE_SOCKTNOSUPPORT;
-
- /* EOPNOTSUPP 45 / * Operation not supported */
- gf_error_to_errno_array[GF_ERROR_CODE_OPNOTSUPP] = EOPNOTSUPP;
- gf_errno_to_error_array[EOPNOTSUPP] = GF_ERROR_CODE_OPNOTSUPP;
-
- /* EPFNOSUPPORT 46 / * Protocol family not supported */
- gf_error_to_errno_array[GF_ERROR_CODE_PFNOSUPPORT] = EPFNOSUPPORT;
- gf_errno_to_error_array[EPFNOSUPPORT] = GF_ERROR_CODE_PFNOSUPPORT;
+ /* Quite a bit of things changed in FreeBSD - current */
+
+ /* EAGAIN 35 / * Try Again */
+ gf_error_to_errno_array[GF_ERROR_CODE_AGAIN] = EAGAIN;
+ gf_errno_to_error_array[EAGAIN] = GF_ERROR_CODE_AGAIN;
+
+ /* EDEADLK 11 / * Resource deadlock would occur */
+ gf_error_to_errno_array[GF_ERROR_CODE_DEADLK] = EDEADLK;
+ gf_errno_to_error_array[EDEADLK] = GF_ERROR_CODE_DEADLK;
+
+ /* EINPROGRESS 36 / * Operation now in progress */
+ gf_error_to_errno_array[GF_ERROR_CODE_INPROGRESS] = EINPROGRESS;
+ gf_errno_to_error_array[EINPROGRESS] = GF_ERROR_CODE_INPROGRESS;
+
+ /* EALREADY 37 / * Operation already in progress */
+ gf_error_to_errno_array[GF_ERROR_CODE_ALREADY] = EALREADY;
+ gf_errno_to_error_array[EALREADY] = GF_ERROR_CODE_ALREADY;
+
+ /* ENOTSOCK 38 / * Socket operation on non-socket
+ */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOTSOCK] = ENOTSOCK;
+ gf_errno_to_error_array[ENOTSOCK] = GF_ERROR_CODE_NOTSOCK;
+
+ /* EDESTADDRREQ 39 / * Destination address required */
+ gf_error_to_errno_array[GF_ERROR_CODE_DESTADDRREQ] = EDESTADDRREQ;
+ gf_errno_to_error_array[EDESTADDRREQ] = GF_ERROR_CODE_DESTADDRREQ;
+
+ /* EMSGSIZE 40 / * Message too long */
+ gf_error_to_errno_array[GF_ERROR_CODE_MSGSIZE] = EMSGSIZE;
+ gf_errno_to_error_array[EMSGSIZE] = GF_ERROR_CODE_MSGSIZE;
+
+ /* EPROTOTYPE 41 / * Protocol wrong type for socket
+ */
+ gf_error_to_errno_array[GF_ERROR_CODE_PROTOTYPE] = EPROTOTYPE;
+ gf_errno_to_error_array[EPROTOTYPE] = GF_ERROR_CODE_PROTOTYPE;
+
+ /* ENOPROTOOPT 42 / * Protocol not available */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOPROTOOPT] = ENOPROTOOPT;
+ gf_errno_to_error_array[ENOPROTOOPT] = GF_ERROR_CODE_NOPROTOOPT;
+
+ /* EPROTONOSUPPORT 43 / * Protocol not supported */
+ gf_error_to_errno_array[GF_ERROR_CODE_PROTONOSUPPORT] = EPROTONOSUPPORT;
+ gf_errno_to_error_array[EPROTONOSUPPORT] = GF_ERROR_CODE_PROTONOSUPPORT;
+
+ /* ESOCKTNOSUPPORT 44 / * Socket type not supported */
+ gf_error_to_errno_array[GF_ERROR_CODE_SOCKTNOSUPPORT] = ESOCKTNOSUPPORT;
+ gf_errno_to_error_array[ESOCKTNOSUPPORT] = GF_ERROR_CODE_SOCKTNOSUPPORT;
+
+ /* EOPNOTSUPP 45 / * Operation not supported */
+ gf_error_to_errno_array[GF_ERROR_CODE_OPNOTSUPP] = EOPNOTSUPP;
+ gf_errno_to_error_array[EOPNOTSUPP] = GF_ERROR_CODE_OPNOTSUPP;
+
+ /* EPFNOSUPPORT 46 / * Protocol family not supported */
+ gf_error_to_errno_array[GF_ERROR_CODE_PFNOSUPPORT] = EPFNOSUPPORT;
+ gf_errno_to_error_array[EPFNOSUPPORT] = GF_ERROR_CODE_PFNOSUPPORT;
+
+ /* EAFNOSUPPORT 47 / * Address family not supported by
+ * protocol family */
+ gf_error_to_errno_array[GF_ERROR_CODE_AFNOSUPPORT] = EAFNOSUPPORT;
+ gf_errno_to_error_array[EAFNOSUPPORT] = GF_ERROR_CODE_AFNOSUPPORT;
+
+ /* EADDRINUSE 48 / * Address already in use */
+ gf_error_to_errno_array[GF_ERROR_CODE_ADDRINUSE] = EADDRINUSE;
+ gf_errno_to_error_array[EADDRINUSE] = GF_ERROR_CODE_ADDRINUSE;
+
+ /* EADDRNOTAVAIL 49 / * Can't assign requested address
+ */
+ gf_error_to_errno_array[GF_ERROR_CODE_ADDRNOTAVAIL] = EADDRNOTAVAIL;
+ gf_errno_to_error_array[EADDRNOTAVAIL] = GF_ERROR_CODE_ADDRNOTAVAIL;
+
+ /* ENETDOWN 50 / * Network is down */
+ gf_error_to_errno_array[GF_ERROR_CODE_NETDOWN] = ENETDOWN;
+ gf_errno_to_error_array[ENETDOWN] = GF_ERROR_CODE_NETDOWN;
+
+ /* ENETUNREACH 51 / * Network is unreachable */
+ gf_error_to_errno_array[GF_ERROR_CODE_NETUNREACH] = ENETUNREACH;
+ gf_errno_to_error_array[ENETUNREACH] = GF_ERROR_CODE_NETUNREACH;
+
+ /* ENETRESET 52 / * Network dropped connection on
+ * reset */
+ gf_error_to_errno_array[GF_ERROR_CODE_NETRESET] = ENETRESET;
+ gf_errno_to_error_array[ENETRESET] = GF_ERROR_CODE_NETRESET;
+
+ /* ECONNABORTED 53 / * Software caused connection abort
+ */
+ gf_error_to_errno_array[GF_ERROR_CODE_CONNABORTED] = ECONNABORTED;
+ gf_errno_to_error_array[ECONNABORTED] = GF_ERROR_CODE_CONNABORTED;
+
+ /* ECONNRESET 54 / * Connection reset by peer */
+ gf_error_to_errno_array[GF_ERROR_CODE_CONNRESET] = ECONNRESET;
+ gf_errno_to_error_array[ECONNRESET] = GF_ERROR_CODE_CONNRESET;
+
+ /* ENOBUFS 55 / * No buffer space available */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOBUFS] = ENOBUFS;
+ gf_errno_to_error_array[ENOBUFS] = GF_ERROR_CODE_NOBUFS;
+
+ /* EISCONN 56 / * Socket is already connected */
+ gf_error_to_errno_array[GF_ERROR_CODE_ISCONN] = EISCONN;
+ gf_errno_to_error_array[EISCONN] = GF_ERROR_CODE_ISCONN;
+
+ /* ENOTCONN 57 / * Socket is not connected */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOTCONN] = ENOTCONN;
+ gf_errno_to_error_array[ENOTCONN] = GF_ERROR_CODE_NOTCONN;
+
+ /* ESHUTDOWN 58 / * Can't send after socket shutdown
+ */
+ gf_error_to_errno_array[GF_ERROR_CODE_SHUTDOWN] = ESHUTDOWN;
+ gf_errno_to_error_array[ESHUTDOWN] = GF_ERROR_CODE_SHUTDOWN;
+
+ /* ETOOMANYREFS 59 / * Too many references: can't
+ * splice */
+ gf_error_to_errno_array[GF_ERROR_CODE_TOOMANYREFS] = ETOOMANYREFS;
+ gf_errno_to_error_array[ETOOMANYREFS] = GF_ERROR_CODE_TOOMANYREFS;
+
+ /* ETIMEDOUT 60 / * Operation timed out */
+ gf_error_to_errno_array[GF_ERROR_CODE_TIMEDOUT] = ETIMEDOUT;
+ gf_errno_to_error_array[ETIMEDOUT] = GF_ERROR_CODE_TIMEDOUT;
+
+ /* ECONNREFUSED 61 / * Connection refused */
+ gf_error_to_errno_array[GF_ERROR_CODE_CONNREFUSED] = ECONNREFUSED;
+ gf_errno_to_error_array[ECONNREFUSED] = GF_ERROR_CODE_CONNREFUSED;
+
+ /* ELOOP 62 / * Too many levels of symbolic
+ * links */
+ gf_error_to_errno_array[GF_ERROR_CODE_LOOP] = ELOOP;
+ gf_errno_to_error_array[ELOOP] = GF_ERROR_CODE_LOOP;
+
+ /* ENAMETOOLONG 63 / * File name too long */
+ gf_error_to_errno_array[GF_ERROR_CODE_NAMETOOLONG] = ENAMETOOLONG;
+ gf_errno_to_error_array[ENAMETOOLONG] = GF_ERROR_CODE_NAMETOOLONG;
+
+ /* EHOSTDOWN 64 / * Host is down */
+ gf_error_to_errno_array[GF_ERROR_CODE_HOSTDOWN] = EHOSTDOWN;
+ gf_errno_to_error_array[EHOSTDOWN] = GF_ERROR_CODE_HOSTDOWN;
+
+ /* EHOSTUNREACH 65 / * No route to host */
+ gf_error_to_errno_array[GF_ERROR_CODE_HOSTUNREACH] = EHOSTUNREACH;
+ gf_errno_to_error_array[EHOSTUNREACH] = GF_ERROR_CODE_HOSTUNREACH;
+
+ /* ENOTEMPTY 66 / * Directory not empty */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOTEMPTY] = ENOTEMPTY;
+ gf_errno_to_error_array[ENOTEMPTY] = GF_ERROR_CODE_NOTEMPTY;
+
+ /* EPROCLIM 67 / * Too many processes */
+ gf_error_to_errno_array[GF_ERROR_CODE_PROCLIM] = EPROCLIM;
+ gf_errno_to_error_array[EPROCLIM] = GF_ERROR_CODE_PROCLIM;
+
+ /* EUSERS 68 / * Too many users */
+ gf_error_to_errno_array[GF_ERROR_CODE_USERS] = EUSERS;
+ gf_errno_to_error_array[EUSERS] = GF_ERROR_CODE_USERS;
+
+ /* EDQUOT 69 / * Disc quota exceeded */
+ gf_error_to_errno_array[GF_ERROR_CODE_DQUOT] = EDQUOT;
+ gf_errno_to_error_array[EDQUOT] = GF_ERROR_CODE_DQUOT;
+
+ /* ESTALE 70 / * Stale NFS file handle */
+ gf_error_to_errno_array[GF_ERROR_CODE_STALE] = ESTALE;
+ gf_errno_to_error_array[ESTALE] = GF_ERROR_CODE_STALE;
+
+ /* EREMOTE 71 / * Too many levels of remote in
+ * path */
+ gf_error_to_errno_array[GF_ERROR_CODE_REMOTE] = EREMOTE;
+ gf_errno_to_error_array[EREMOTE] = GF_ERROR_CODE_REMOTE;
+
+ /* EBADRPC 72 / * RPC struct is bad */
+ gf_error_to_errno_array[GF_ERROR_CODE_BADRPC] = EBADRPC;
+ gf_errno_to_error_array[EBADRPC] = GF_ERROR_CODE_BADRPC;
+
+ /* ERPCMISMATCH 73 / * RPC version wrong */
+ gf_error_to_errno_array[GF_ERROR_CODE_RPCMISMATCH] = ERPCMISMATCH;
+ gf_errno_to_error_array[ERPCMISMATCH] = GF_ERROR_CODE_RPCMISMATCH;
+
+ /* EPROGUNAVAIL 74 / * RPC prog. not avail */
+ gf_error_to_errno_array[GF_ERROR_CODE_PROGUNAVAIL] = EPROGUNAVAIL;
+ gf_errno_to_error_array[EPROGUNAVAIL] = GF_ERROR_CODE_PROGUNAVAIL;
+
+ /* EPROGMISMATCH 75 / * Program version wrong */
+ gf_error_to_errno_array[GF_ERROR_CODE_PROGMISMATCH] = EPROGMISMATCH;
+ gf_errno_to_error_array[EPROGMISMATCH] = GF_ERROR_CODE_PROGMISMATCH;
+
+ /* EPROCUNAVAIL 76 / * Bad procedure for program */
+ gf_error_to_errno_array[GF_ERROR_CODE_PROCUNAVAIL] = EPROCUNAVAIL;
+ gf_errno_to_error_array[EPROCUNAVAIL] = GF_ERROR_CODE_PROCUNAVAIL;
+
+ /* ENOLCK 77 / * No locks available */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOLCK] = ENOLCK;
+ gf_errno_to_error_array[ENOLCK] = GF_ERROR_CODE_NOLCK;
- /* EAFNOSUPPORT 47 / * Address family not supported by protocol family */
- gf_error_to_errno_array[GF_ERROR_CODE_AFNOSUPPORT] = EAFNOSUPPORT;
- gf_errno_to_error_array[EAFNOSUPPORT] = GF_ERROR_CODE_AFNOSUPPORT;
+ /* ENOSYS 78 / * Function not implemented */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOSYS] = ENOSYS;
+ gf_errno_to_error_array[ENOSYS] = GF_ERROR_CODE_NOSYS;
- /* EADDRINUSE 48 / * Address already in use */
- gf_error_to_errno_array[GF_ERROR_CODE_ADDRINUSE] = EADDRINUSE;
- gf_errno_to_error_array[EADDRINUSE] = GF_ERROR_CODE_ADDRINUSE;
+ /* EFTYPE 79 / * Inappropriate file type or
+ * format */
+ gf_error_to_errno_array[GF_ERROR_CODE_FTYPE] = EFTYPE;
+ gf_errno_to_error_array[EFTYPE] = GF_ERROR_CODE_FTYPE;
+
+ /* EAUTH 80 / * Authentication error */
+ gf_error_to_errno_array[GF_ERROR_CODE_AUTH] = EAUTH;
+ gf_errno_to_error_array[EAUTH] = GF_ERROR_CODE_AUTH;
+
+ /* ENEEDAUTH 81 / * Need authenticator */
+ gf_error_to_errno_array[GF_ERROR_CODE_NEEDAUTH] = ENEEDAUTH;
+ gf_errno_to_error_array[ENEEDAUTH] = GF_ERROR_CODE_NEEDAUTH;
+
+ /* EIDRM 82 / * Identifier removed */
+ gf_error_to_errno_array[GF_ERROR_CODE_IDRM] = EIDRM;
+ gf_errno_to_error_array[EIDRM] = GF_ERROR_CODE_IDRM;
- /* EADDRNOTAVAIL 49 / * Can't assign requested address */
- gf_error_to_errno_array[GF_ERROR_CODE_ADDRNOTAVAIL] = EADDRNOTAVAIL;
- gf_errno_to_error_array[EADDRNOTAVAIL] = GF_ERROR_CODE_ADDRNOTAVAIL;
+ /* ENOMSG 83 / * No message of desired type */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOMSG] = ENOMSG;
+ gf_errno_to_error_array[ENOMSG] = GF_ERROR_CODE_NOMSG;
- /* ENETDOWN 50 / * Network is down */
- gf_error_to_errno_array[GF_ERROR_CODE_NETDOWN] = ENETDOWN;
- gf_errno_to_error_array[ENETDOWN] = GF_ERROR_CODE_NETDOWN;
+ /* EOVERFLOW 84 / * Value too large to be stored in
+ * data type */
+ gf_error_to_errno_array[GF_ERROR_CODE_OVERFLOW] = EOVERFLOW;
+ gf_errno_to_error_array[EOVERFLOW] = GF_ERROR_CODE_OVERFLOW;
- /* ENETUNREACH 51 / * Network is unreachable */
- gf_error_to_errno_array[GF_ERROR_CODE_NETUNREACH] = ENETUNREACH;
- gf_errno_to_error_array[ENETUNREACH] = GF_ERROR_CODE_NETUNREACH;
+ /* ECANCELED 85 / * Operation canceled */
+ gf_error_to_errno_array[GF_ERROR_CODE_CANCELED] = ECANCELED;
+ gf_errno_to_error_array[ECANCELED] = GF_ERROR_CODE_CANCELED;
- /* ENETRESET 52 / * Network dropped connection on reset */
- gf_error_to_errno_array[GF_ERROR_CODE_NETRESET] = ENETRESET;
- gf_errno_to_error_array[ENETRESET] = GF_ERROR_CODE_NETRESET;
+ /* EILSEQ 86 / * Illegal byte sequence */
+ gf_error_to_errno_array[GF_ERROR_CODE_ILSEQ] = EILSEQ;
+ gf_errno_to_error_array[EILSEQ] = GF_ERROR_CODE_ILSEQ;
- /* ECONNABORTED 53 / * Software caused connection abort */
- gf_error_to_errno_array[GF_ERROR_CODE_CONNABORTED] = ECONNABORTED;
- gf_errno_to_error_array[ECONNABORTED] = GF_ERROR_CODE_CONNABORTED;
-
- /* ECONNRESET 54 / * Connection reset by peer */
- gf_error_to_errno_array[GF_ERROR_CODE_CONNRESET] = ECONNRESET;
- gf_errno_to_error_array[ECONNRESET] = GF_ERROR_CODE_CONNRESET;
-
- /* ENOBUFS 55 / * No buffer space available */
- gf_error_to_errno_array[GF_ERROR_CODE_NOBUFS] = ENOBUFS;
- gf_errno_to_error_array[ENOBUFS] = GF_ERROR_CODE_NOBUFS;
-
- /* EISCONN 56 / * Socket is already connected */
- gf_error_to_errno_array[GF_ERROR_CODE_ISCONN] = EISCONN;
- gf_errno_to_error_array[EISCONN] = GF_ERROR_CODE_ISCONN;
-
- /* ENOTCONN 57 / * Socket is not connected */
- gf_error_to_errno_array[GF_ERROR_CODE_NOTCONN] = ENOTCONN;
- gf_errno_to_error_array[ENOTCONN] = GF_ERROR_CODE_NOTCONN;
-
- /* ESHUTDOWN 58 / * Can't send after socket shutdown */
- gf_error_to_errno_array[GF_ERROR_CODE_SHUTDOWN] = ESHUTDOWN;
- gf_errno_to_error_array[ESHUTDOWN] = GF_ERROR_CODE_SHUTDOWN;
-
- /* ETOOMANYREFS 59 / * Too many references: can't splice */
- gf_error_to_errno_array[GF_ERROR_CODE_TOOMANYREFS] = ETOOMANYREFS;
- gf_errno_to_error_array[ETOOMANYREFS] = GF_ERROR_CODE_TOOMANYREFS;
-
- /* ETIMEDOUT 60 / * Operation timed out */
- gf_error_to_errno_array[GF_ERROR_CODE_TIMEDOUT] = ETIMEDOUT;
- gf_errno_to_error_array[ETIMEDOUT] = GF_ERROR_CODE_TIMEDOUT;
-
- /* ECONNREFUSED 61 / * Connection refused */
- gf_error_to_errno_array[GF_ERROR_CODE_CONNREFUSED] = ECONNREFUSED;
- gf_errno_to_error_array[ECONNREFUSED] = GF_ERROR_CODE_CONNREFUSED;
-
- /* ELOOP 62 / * Too many levels of symbolic links */
- gf_error_to_errno_array[GF_ERROR_CODE_LOOP] = ELOOP;
- gf_errno_to_error_array[ELOOP] = GF_ERROR_CODE_LOOP;
-
- /* ENAMETOOLONG 63 / * File name too long */
- gf_error_to_errno_array[GF_ERROR_CODE_NAMETOOLONG] = ENAMETOOLONG;
- gf_errno_to_error_array[ENAMETOOLONG] = GF_ERROR_CODE_NAMETOOLONG;
-
- /* EHOSTDOWN 64 / * Host is down */
- gf_error_to_errno_array[GF_ERROR_CODE_HOSTDOWN] = EHOSTDOWN;
- gf_errno_to_error_array[EHOSTDOWN] = GF_ERROR_CODE_HOSTDOWN;
-
- /* EHOSTUNREACH 65 / * No route to host */
- gf_error_to_errno_array[GF_ERROR_CODE_HOSTUNREACH] = EHOSTUNREACH;
- gf_errno_to_error_array[EHOSTUNREACH] = GF_ERROR_CODE_HOSTUNREACH;
-
- /* ENOTEMPTY 66 / * Directory not empty */
- gf_error_to_errno_array[GF_ERROR_CODE_NOTEMPTY] = ENOTEMPTY;
- gf_errno_to_error_array[ENOTEMPTY] = GF_ERROR_CODE_NOTEMPTY;
-
- /* EPROCLIM 67 / * Too many processes */
- gf_error_to_errno_array[GF_ERROR_CODE_PROCLIM] = EPROCLIM;
- gf_errno_to_error_array[EPROCLIM] = GF_ERROR_CODE_PROCLIM;
-
- /* EUSERS 68 / * Too many users */
- gf_error_to_errno_array[GF_ERROR_CODE_USERS] = EUSERS;
- gf_errno_to_error_array[EUSERS] = GF_ERROR_CODE_USERS;
-
- /* EDQUOT 69 / * Disc quota exceeded */
- gf_error_to_errno_array[GF_ERROR_CODE_DQUOT] = EDQUOT;
- gf_errno_to_error_array[EDQUOT] = GF_ERROR_CODE_DQUOT;
-
- /* ESTALE 70 / * Stale NFS file handle */
- gf_error_to_errno_array[GF_ERROR_CODE_STALE] = ESTALE;
- gf_errno_to_error_array[ESTALE] = GF_ERROR_CODE_STALE;
-
- /* EREMOTE 71 / * Too many levels of remote in path */
- gf_error_to_errno_array[GF_ERROR_CODE_REMOTE] = EREMOTE;
- gf_errno_to_error_array[EREMOTE] = GF_ERROR_CODE_REMOTE;
-
- /* EBADRPC 72 / * RPC struct is bad */
- gf_error_to_errno_array[GF_ERROR_CODE_BADRPC] = EBADRPC;
- gf_errno_to_error_array[EBADRPC] = GF_ERROR_CODE_BADRPC;
-
- /* ERPCMISMATCH 73 / * RPC version wrong */
- gf_error_to_errno_array[GF_ERROR_CODE_RPCMISMATCH] = ERPCMISMATCH;
- gf_errno_to_error_array[ERPCMISMATCH] = GF_ERROR_CODE_RPCMISMATCH;
-
- /* EPROGUNAVAIL 74 / * RPC prog. not avail */
- gf_error_to_errno_array[GF_ERROR_CODE_PROGUNAVAIL] = EPROGUNAVAIL;
- gf_errno_to_error_array[EPROGUNAVAIL] = GF_ERROR_CODE_PROGUNAVAIL;
-
- /* EPROGMISMATCH 75 / * Program version wrong */
- gf_error_to_errno_array[GF_ERROR_CODE_PROGMISMATCH] = EPROGMISMATCH;
- gf_errno_to_error_array[EPROGMISMATCH] = GF_ERROR_CODE_PROGMISMATCH;
-
- /* EPROCUNAVAIL 76 / * Bad procedure for program */
- gf_error_to_errno_array[GF_ERROR_CODE_PROCUNAVAIL] = EPROCUNAVAIL;
- gf_errno_to_error_array[EPROCUNAVAIL] = GF_ERROR_CODE_PROCUNAVAIL;
-
- /* ENOLCK 77 / * No locks available */
- gf_error_to_errno_array[GF_ERROR_CODE_NOLCK] = ENOLCK;
- gf_errno_to_error_array[ENOLCK] = GF_ERROR_CODE_NOLCK;
-
- /* ENOSYS 78 / * Function not implemented */
- gf_error_to_errno_array[GF_ERROR_CODE_NOSYS] = ENOSYS;
- gf_errno_to_error_array[ENOSYS] = GF_ERROR_CODE_NOSYS;
-
- /* EFTYPE 79 / * Inappropriate file type or format */
- gf_error_to_errno_array[GF_ERROR_CODE_FTYPE] = EFTYPE;
- gf_errno_to_error_array[EFTYPE] = GF_ERROR_CODE_FTYPE;
-
- /* EAUTH 80 / * Authentication error */
- gf_error_to_errno_array[GF_ERROR_CODE_AUTH] = EAUTH;
- gf_errno_to_error_array[EAUTH] = GF_ERROR_CODE_AUTH;
-
- /* ENEEDAUTH 81 / * Need authenticator */
- gf_error_to_errno_array[GF_ERROR_CODE_NEEDAUTH] = ENEEDAUTH;
- gf_errno_to_error_array[ENEEDAUTH] = GF_ERROR_CODE_NEEDAUTH;
-
- /* EIDRM 82 / * Identifier removed */
- gf_error_to_errno_array[GF_ERROR_CODE_IDRM] = EIDRM;
- gf_errno_to_error_array[EIDRM] = GF_ERROR_CODE_IDRM;
-
- /* ENOMSG 83 / * No message of desired type */
- gf_error_to_errno_array[GF_ERROR_CODE_NOMSG] = ENOMSG;
- gf_errno_to_error_array[ENOMSG] = GF_ERROR_CODE_NOMSG;
-
- /* EOVERFLOW 84 / * Value too large to be stored in data type */
- gf_error_to_errno_array[GF_ERROR_CODE_OVERFLOW] = EOVERFLOW;
- gf_errno_to_error_array[EOVERFLOW] = GF_ERROR_CODE_OVERFLOW;
-
- /* ECANCELED 85 / * Operation canceled */
- gf_error_to_errno_array[GF_ERROR_CODE_CANCELED] = ECANCELED;
- gf_errno_to_error_array[ECANCELED] = GF_ERROR_CODE_CANCELED;
-
- /* EILSEQ 86 / * Illegal byte sequence */
- gf_error_to_errno_array[GF_ERROR_CODE_ILSEQ] = EILSEQ;
- gf_errno_to_error_array[EILSEQ] = GF_ERROR_CODE_ILSEQ;
-
- /* ENOATTR 87 / * Attribute not found */
- gf_error_to_errno_array[GF_ERROR_CODE_NOATTR] = ENOATTR;
- gf_errno_to_error_array[ENOATTR] = GF_ERROR_CODE_NOATTR;
+ /* ENOATTR 87 / * Attribute not found */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOATTR] = ENOATTR;
+ gf_errno_to_error_array[ENOATTR] = GF_ERROR_CODE_NOATTR;
#ifdef EDOOFUS
- /* EDOOFUS 88 / * Programming error */
- gf_error_to_errno_array[GF_ERROR_CODE_DOOFUS] = EDOOFUS;
- gf_errno_to_error_array[EDOOFUS] = GF_ERROR_CODE_DOOFUS;
+ /* EDOOFUS 88 / * Programming error */
+ gf_error_to_errno_array[GF_ERROR_CODE_DOOFUS] = EDOOFUS;
+ gf_errno_to_error_array[EDOOFUS] = GF_ERROR_CODE_DOOFUS;
#endif
- /* EBADMSG 89 / * Bad message */
- gf_error_to_errno_array[GF_ERROR_CODE_BADMSG] = EBADMSG;
- gf_errno_to_error_array[EBADMSG] = GF_ERROR_CODE_BADMSG;
+ /* EBADMSG 89 / * Bad message */
+ gf_error_to_errno_array[GF_ERROR_CODE_BADMSG] = EBADMSG;
+ gf_errno_to_error_array[EBADMSG] = GF_ERROR_CODE_BADMSG;
- /* EMULTIHOP 90 / * Multihop attempted */
- gf_error_to_errno_array[GF_ERROR_CODE_MULTIHOP] = EMULTIHOP;
- gf_errno_to_error_array[EMULTIHOP] = GF_ERROR_CODE_MULTIHOP;
+#ifdef __NetBSD__
+ /* ENODATA 89 / * No message available */
+ gf_error_to_errno_array[GF_ERROR_CODE_NODATA] = ENODATA;
+ gf_errno_to_error_array[ENODATA] = GF_ERROR_CODE_NODATA;
+#endif
- /* ENOLINK 91 / * Link has been severed */
- gf_error_to_errno_array[GF_ERROR_CODE_NOLINK] = ENOLINK;
- gf_errno_to_error_array[ENOLINK] = GF_ERROR_CODE_NOLINK;
+ /* EMULTIHOP 90 / * Multihop attempted */
+ gf_error_to_errno_array[GF_ERROR_CODE_MULTIHOP] = EMULTIHOP;
+ gf_errno_to_error_array[EMULTIHOP] = GF_ERROR_CODE_MULTIHOP;
- /* EPROTO 92 / * Protocol error */
- gf_error_to_errno_array[GF_ERROR_CODE_PROTO] = EPROTO;
- gf_errno_to_error_array[EPROTO] = GF_ERROR_CODE_PROTO;
+ /* ENOLINK 91 / * Link has been severed */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOLINK] = ENOLINK;
+ gf_errno_to_error_array[ENOLINK] = GF_ERROR_CODE_NOLINK;
+ /* EPROTO 92 / * Protocol error */
+ gf_error_to_errno_array[GF_ERROR_CODE_PROTO] = EPROTO;
+ gf_errno_to_error_array[EPROTO] = GF_ERROR_CODE_PROTO;
- return ;
+ return;
}
#endif /* GF_BSD_HOST_OS */
#ifdef GF_LINUX_HOST_OS
static void
-init_compat_errno_arrays ()
+init_compat_errno_arrays()
{
- /* Things are fine. Everything should work seemlessly on GNU/Linux machines */
- return ;
+ /* Things are fine. Everything should work seemlessly on GNU/Linux machines
+ */
+ return;
}
#endif /* GF_LINUX_HOST_OS */
-
static void
-init_errno_arrays ()
+init_errno_arrays()
{
- int i;
- for (i=0; i < GF_ERROR_CODE_UNKNOWN; i++) {
- gf_errno_to_error_array[i] = i;
- gf_error_to_errno_array[i] = i;
- }
- /* Now change the order if it needs to be. */
- init_compat_errno_arrays();
-
- return;
+ int i;
+ for (i = 0; i < GF_ERROR_CODE_UNKNOWN; i++) {
+ gf_errno_to_error_array[i] = i;
+ gf_error_to_errno_array[i] = i;
+ }
+ /* Now change the order if it needs to be. */
+ init_compat_errno_arrays();
+
+ return;
}
int32_t
-gf_errno_to_error (int32_t op_errno)
+gf_errno_to_error(int32_t op_errno)
{
- if (!gf_compat_errno_init_done) {
- init_errno_arrays ();
- gf_compat_errno_init_done = 1;
- }
+ if (!gf_compat_errno_init_done) {
+ init_errno_arrays();
+ gf_compat_errno_init_done = 1;
+ }
- if ((op_errno > GF_ERROR_CODE_SUCCESS) && (op_errno < GF_ERROR_CODE_UNKNOWN))
- return gf_errno_to_error_array[op_errno];
+ if ((op_errno > GF_ERROR_CODE_SUCCESS) &&
+ (op_errno < GF_ERROR_CODE_UNKNOWN))
+ return gf_errno_to_error_array[op_errno];
- return op_errno;
+ return op_errno;
}
-
int32_t
-gf_error_to_errno (int32_t error)
+gf_error_to_errno(int32_t error)
{
- if (!gf_compat_errno_init_done) {
- init_errno_arrays ();
- gf_compat_errno_init_done = 1;
- }
+ if (!gf_compat_errno_init_done) {
+ init_errno_arrays();
+ gf_compat_errno_init_done = 1;
+ }
- if ((error > GF_ERROR_CODE_SUCCESS) && (error < GF_ERROR_CODE_UNKNOWN))
- return gf_error_to_errno_array[error];
+ if ((error > GF_ERROR_CODE_SUCCESS) && (error < GF_ERROR_CODE_UNKNOWN))
+ return gf_error_to_errno_array[error];
- return error;
+ return error;
}
diff --git a/libglusterfs/src/compat-errno.h b/libglusterfs/src/compat-errno.h
deleted file mode 100644
index 65e52081dd3..00000000000
--- a/libglusterfs/src/compat-errno.h
+++ /dev/null
@@ -1,231 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef __COMPAT_ERRNO_H__
-#define __COMPAT_ERRNO_H__
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <errno.h>
-
-#define GF_ERROR_CODE_SUCCESS 0
-#define GF_ERROR_CODE_UNKNOWN 1024
-#define GF_ERRNO_UNKNOWN 1024
-
-#define GF_ERROR_CODE_PERM 1 /* Operation not permitted */
-#define GF_ERROR_CODE_NOENT 2 /* No such file or directory */
-#define GF_ERROR_CODE_SRCH 3 /* No such process */
-#define GF_ERROR_CODE_INTR 4 /* Interrupted system call */
-#define GF_ERROR_CODE_IO 5 /* I/O error */
-#define GF_ERROR_CODE_NXIO 6 /* No such device or address */
-#define GF_ERROR_CODE_2BIG 7 /* Argument list too long */
-#define GF_ERROR_CODE_NOEXEC 8 /* Exec format error */
-#define GF_ERROR_CODE_BADF 9 /* Bad file number */
-#define GF_ERROR_CODE_CHILD 10 /* No child processes */
-#define GF_ERROR_CODE_AGAIN 11 /* Try again */
-#define GF_ERROR_CODE_NOMEM 12 /* Out of memory */
-#define GF_ERROR_CODE_ACCES 13 /* Permission denied */
-#define GF_ERROR_CODE_FAULT 14 /* Bad address */
-#define GF_ERROR_CODE_NOTBLK 15 /* Block device required */
-#define GF_ERROR_CODE_BUSY 16 /* Device or resource busy */
-#define GF_ERROR_CODE_EXIST 17 /* File exists */
-#define GF_ERROR_CODE_XDEV 18 /* Cross-device link */
-#define GF_ERROR_CODE_NODEV 19 /* No such device */
-#define GF_ERROR_CODE_NOTDIR 20 /* Not a directory */
-#define GF_ERROR_CODE_ISDIR 21 /* Is a directory */
-#define GF_ERROR_CODE_INVAL 22 /* Invalid argument */
-#define GF_ERROR_CODE_NFILE 23 /* File table overflow */
-#define GF_ERROR_CODE_MFILE 24 /* Too many open files */
-#define GF_ERROR_CODE_NOTTY 25 /* Not a typewriter */
-#define GF_ERROR_CODE_TXTBSY 26 /* Text file busy */
-#define GF_ERROR_CODE_FBIG 27 /* File too large */
-#define GF_ERROR_CODE_NOSPC 28 /* No space left on device */
-#define GF_ERROR_CODE_SPIPE 29 /* Illegal seek */
-#define GF_ERROR_CODE_ROFS 30 /* Read-only file system */
-#define GF_ERROR_CODE_MLINK 31 /* Too many links */
-#define GF_ERROR_CODE_PIPE 32 /* Broken pipe */
-#define GF_ERROR_CODE_DOM 33 /* Math argument out of domain of func */
-#define GF_ERROR_CODE_RANGE 34 /* Math result not representable */
-#define GF_ERROR_CODE_DEADLK 35 /* Resource deadlock would occur */
-#define GF_ERROR_CODE_NAMETOOLONG 36 /* File name too long */
-#define GF_ERROR_CODE_NOLCK 37 /* No record locks available */
-#define GF_ERROR_CODE_NOSYS 38 /* Function not implemented */
-#define GF_ERROR_CODE_NOTEMPTY 39 /* Directory not empty */
-#define GF_ERROR_CODE_LOOP 40 /* Too many symbolic links encountered */
-
-#define GF_ERROR_CODE_NOMSG 42 /* No message of desired type */
-#define GF_ERROR_CODE_IDRM 43 /* Identifier removed */
-#define GF_ERROR_CODE_CHRNG 44 /* Channel number out of range */
-#define GF_ERROR_CODE_L2NSYNC 45 /* Level 2 not synchronized */
-#define GF_ERROR_CODE_L3HLT 46 /* Level 3 halted */
-#define GF_ERROR_CODE_L3RST 47 /* Level 3 reset */
-#define GF_ERROR_CODE_LNRNG 48 /* Link number out of range */
-#define GF_ERROR_CODE_UNATCH 49 /* Protocol driver not attached */
-#define GF_ERROR_CODE_NOCSI 50 /* No CSI structure available */
-#define GF_ERROR_CODE_L2HLT 51 /* Level 2 halted */
-#define GF_ERROR_CODE_BADE 52 /* Invalid exchange */
-#define GF_ERROR_CODE_BADR 53 /* Invalid request descriptor */
-#define GF_ERROR_CODE_XFULL 54 /* Exchange full */
-#define GF_ERROR_CODE_NOANO 55 /* No anode */
-#define GF_ERROR_CODE_BADRQC 56 /* Invalid request code */
-#define GF_ERROR_CODE_BADSLT 57 /* Invalid slot */
-#define GF_ERROR_CODE_BFONT 59 /* Bad font file format */
-#define GF_ERROR_CODE_NOSTR 60 /* Device not a stream */
-#define GF_ERROR_CODE_NODATA 61 /* No data available */
-#define GF_ERROR_CODE_TIME 62 /* Timer expired */
-#define GF_ERROR_CODE_NOSR 63 /* Out of streams resources */
-#define GF_ERROR_CODE_NONET 64 /* Machine is not on the network */
-#define GF_ERROR_CODE_NOPKG 65 /* Package not installed */
-#define GF_ERROR_CODE_REMOTE 66 /* Object is remote */
-#define GF_ERROR_CODE_NOLINK 67 /* Link has been severed */
-#define GF_ERROR_CODE_ADV 68 /* Advertise error */
-#define GF_ERROR_CODE_SRMNT 69 /* Srmount error */
-#define GF_ERROR_CODE_COMM 70 /* Communication error on send */
-#define GF_ERROR_CODE_PROTO 71 /* Protocol error */
-#define GF_ERROR_CODE_MULTIHOP 72 /* Multihop attempted */
-#define GF_ERROR_CODE_DOTDOT 73 /* RFS specific error */
-#define GF_ERROR_CODE_BADMSG 74 /* Not a data message */
-#define GF_ERROR_CODE_OVERFLOW 75 /* Value too large for defined data type */
-#define GF_ERROR_CODE_NOTUNIQ 76 /* Name not unique on network */
-#define GF_ERROR_CODE_BADFD 77 /* File descriptor in bad state */
-#define GF_ERROR_CODE_REMCHG 78 /* Remote address changed */
-#define GF_ERROR_CODE_LIBACC 79 /* Can not access a needed shared library */
-#define GF_ERROR_CODE_LIBBAD 80 /* Accessing a corrupted shared library */
-#define GF_ERROR_CODE_LIBSCN 81 /* .lib section in a.out corrupted */
-#define GF_ERROR_CODE_LIBMAX 82 /* Attempting to link in too many shared libraries */
-#define GF_ERROR_CODE_LIBEXEC 83 /* Cannot exec a shared library directly */
-#define GF_ERROR_CODE_ILSEQ 84 /* Illegal byte sequence */
-#define GF_ERROR_CODE_RESTART 85 /* Interrupted system call should be restarted */
-#define GF_ERROR_CODE_STRPIPE 86 /* Streams pipe error */
-#define GF_ERROR_CODE_USERS 87 /* Too many users */
-#define GF_ERROR_CODE_NOTSOCK 88 /* Socket operation on non-socket */
-#define GF_ERROR_CODE_DESTADDRREQ 89 /* Destination address required */
-#define GF_ERROR_CODE_MSGSIZE 90 /* Message too long */
-#define GF_ERROR_CODE_PROTOTYPE 91 /* Protocol wrong type for socket */
-#define GF_ERROR_CODE_NOPROTOOPT 92 /* Protocol not available */
-#define GF_ERROR_CODE_PROTONOSUPPORT 93 /* Protocol not supported */
-#define GF_ERROR_CODE_SOCKTNOSUPPORT 94 /* Socket type not supported */
-#define GF_ERROR_CODE_OPNOTSUPP 95 /* Operation not supported on transport endpoint */
-#define GF_ERROR_CODE_PFNOSUPPORT 96 /* Protocol family not supported */
-#define GF_ERROR_CODE_AFNOSUPPORT 97 /* Address family not supported by protocol */
-#define GF_ERROR_CODE_ADDRINUSE 98 /* Address already in use */
-#define GF_ERROR_CODE_ADDRNOTAVAIL 99 /* Cannot assign requested address */
-#define GF_ERROR_CODE_NETDOWN 100 /* Network is down */
-#define GF_ERROR_CODE_NETUNREACH 101 /* Network is unreachable */
-#define GF_ERROR_CODE_NETRESET 102 /* Network dropped connection because of reset */
-#define GF_ERROR_CODE_CONNABORTED 103 /* Software caused connection abort */
-#define GF_ERROR_CODE_CONNRESET 104 /* Connection reset by peer */
-#define GF_ERROR_CODE_NOBUFS 105 /* No buffer space available */
-#define GF_ERROR_CODE_ISCONN 106 /* Transport endpoint is already connected */
-#define GF_ERROR_CODE_NOTCONN 107 /* Transport endpoint is not connected */
-#define GF_ERROR_CODE_SHUTDOWN 108 /* Cannot send after transport endpoint shutdown */
-#define GF_ERROR_CODE_TOOMANYREFS 109 /* Too many references: cannot splice */
-#define GF_ERROR_CODE_TIMEDOUT 110 /* Connection timed out */
-#define GF_ERROR_CODE_CONNREFUSED 111 /* Connection refused */
-#define GF_ERROR_CODE_HOSTDOWN 112 /* Host is down */
-#define GF_ERROR_CODE_HOSTUNREACH 113 /* No route to host */
-#define GF_ERROR_CODE_ALREADY 114 /* Operation already in progress */
-#define GF_ERROR_CODE_INPROGRESS 115 /* Operation now in progress */
-#define GF_ERROR_CODE_ALREADY 114 /* Operation already in progress */
-#define GF_ERROR_CODE_INPROGRESS 115 /* Operation now in progress */
-#define GF_ERROR_CODE_STALE 116 /* Stale NFS file handle */
-#define GF_ERROR_CODE_UCLEAN 117 /* Structure needs cleaning */
-#define GF_ERROR_CODE_NOTNAM 118 /* Not a XENIX named type file */
-#define GF_ERROR_CODE_NAVAIL 119 /* No XENIX semaphores available */
-#define GF_ERROR_CODE_ISNAM 120 /* Is a named type file */
-#define GF_ERROR_CODE_REMOTEIO 121 /* Remote I/O error */
-#define GF_ERROR_CODE_DQUOT 122 /* Quota exceeded */
-#define GF_ERROR_CODE_NOMEDIUM 123 /* No medium found */
-#define GF_ERROR_CODE_MEDIUMTYPE 124 /* Wrong medium type */
-#define GF_ERROR_CODE_CANCELED 125 /* Operation Canceled */
-#define GF_ERROR_CODE_NOKEY 126 /* Required key not available */
-#define GF_ERROR_CODE_KEYEXPIRED 127 /* Key has expired */
-#define GF_ERROR_CODE_KEYREVOKED 128 /* Key has been revoked */
-#define GF_ERROR_CODE_KEYREJECTED 129 /* Key was rejected by service */
-
-/* for robust mutexes */
-#define GF_ERROR_CODE_OWNERDEAD 130 /* Owner died */
-#define GF_ERROR_CODE_NOTRECOVERABLE 131 /* State not recoverable */
-
-
-
-/* Should never be seen by user programs */
-#define GF_ERROR_CODE_RESTARTSYS 512
-#define GF_ERROR_CODE_RESTARTNOINTR 513
-#define GF_ERROR_CODE_RESTARTNOHAND 514 /* restart if no handler.. */
-#define GF_ERROR_CODE_NOIOCTLCMD 515 /* No ioctl command */
-#define GF_ERROR_CODE_RESTART_RESTARTBLOCK 516 /* restart by calling sys_restart_syscall */
-
-/* Defined for the NFSv3 protocol */
-#define GF_ERROR_CODE_BADHANDLE 521 /* Illegal NFS file handle */
-#define GF_ERROR_CODE_NOTSYNC 522 /* Update synchronization mismatch */
-#define GF_ERROR_CODE_BADCOOKIE 523 /* Cookie is stale */
-#define GF_ERROR_CODE_NOTSUPP 524 /* Operation is not supported */
-#define GF_ERROR_CODE_TOOSMALL 525 /* Buffer or request is too small */
-#define GF_ERROR_CODE_SERVERFAULT 526 /* An untranslatable error occurred */
-#define GF_ERROR_CODE_BADTYPE 527 /* Type not supported by server */
-#define GF_ERROR_CODE_JUKEBOX 528 /* Request initiated, but will not complete before timeout */
-#define GF_ERROR_CODE_IOCBQUEUED 529 /* iocb queued, will get completion event */
-#define GF_ERROR_CODE_IOCBRETRY 530 /* iocb queued, will trigger a retry */
-
-/* Darwin OS X */
-#define GF_ERROR_CODE_NOPOLICY 701
-#define GF_ERROR_CODE_BADMACHO 702
-#define GF_ERROR_CODE_PWROFF 703
-#define GF_ERROR_CODE_DEVERR 704
-#define GF_ERROR_CODE_BADARCH 705
-#define GF_ERROR_CODE_BADEXEC 706
-#define GF_ERROR_CODE_SHLIBVERS 707
-
-
-
-/* Solaris */
-/* ENOTACTIVE 73 / * Facility is not active */
-#define GF_ERROR_CODE_NOTACTIVE 801
-/* ELOCKUNMAPPED 72 / * locked lock was unmapped */
-#define GF_ERROR_CODE_LOCKUNMAPPED 802
-
-/* BSD system */
-#define GF_ERROR_CODE_PROCLIM 901 /* Too many processes */
-#define GF_ERROR_CODE_BADRPC 902 /* RPC struct is bad */
-#define GF_ERROR_CODE_RPCMISMATCH 903 /* RPC version wrong */
-#define GF_ERROR_CODE_PROGUNAVAIL 904 /* RPC prog. not avail */
-#define GF_ERROR_CODE_PROGMISMATCH 905 /* Program version wrong */
-#define GF_ERROR_CODE_PROCUNAVAIL 905 /* Bad procedure for program */
-#define GF_ERROR_CODE_FTYPE 906 /* Inappropriate file type or format */
-#define GF_ERROR_CODE_AUTH 907 /* Authentication error */
-#define GF_ERROR_CODE_NEEDAUTH 908 /* Need authenticator */
-#define GF_ERROR_CODE_DOOFUS 909 /* Programming error */
-
-#define GF_ERROR_CODE_NOATTR GF_ERROR_CODE_NODATA /* Attribute not found */
-
-/* Either one of enodata or enoattr will be there in system */
-#ifndef ENOATTR
-#define ENOATTR ENODATA
-#endif /* ENOATTR */
-
-#ifndef ENODATA
-#define ENODATA ENOATTR
-#endif /* ENODATA */
-
-#ifndef EBADFD
-#define EBADFD EBADRPC
-#endif /* EBADFD */
-
-/* These functions are defined for all the OS flags, but content will
- * be different for each OS flag.
- */
-int32_t gf_errno_to_error (int32_t op_errno);
-int32_t gf_error_to_errno (int32_t error);
-
-#endif /* __COMPAT_ERRNO_H__ */
diff --git a/libglusterfs/src/compat.c b/libglusterfs/src/compat.c
index 2dcd56a4144..8a05a30a8fe 100644
--- a/libglusterfs/src/compat.c
+++ b/libglusterfs/src/compat.c
@@ -8,545 +8,611 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <string.h>
#include <stdlib.h>
+#include <unistd.h>
#include <stdarg.h>
#include <getopt.h>
#include <sys/types.h>
#include <dirent.h>
-#ifdef GF_SOLARIS_HOST_OS
-#include "logging.h"
-#endif /* GF_SOLARIS_HOST_OS */
-
-#include "compat.h"
-#include "common-utils.h"
-#include "iatt.h"
-#include "inode.h"
+#include "glusterfs/logging.h"
+#include "glusterfs/compat.h"
+#include "glusterfs/iatt.h"
+#include "glusterfs/syscall.h"
+#include "glusterfs/run.h"
+#include "glusterfs/libglusterfs-messages.h"
#ifdef GF_SOLARIS_HOST_OS
int
-solaris_fsetxattr(int fd, const char* key, const char *value, size_t size,
+solaris_fsetxattr(int fd, const char *key, const char *value, size_t size,
int flags)
{
- int attrfd = -1;
- int ret = 0;
-
- attrfd = openat (fd, key, flags|O_CREAT|O_WRONLY|O_XATTR, 0777);
- if (attrfd >= 0) {
- ftruncate (attrfd, 0);
- ret = write (attrfd, value, size);
- close (attrfd);
- } else {
- if (errno != ENOENT)
- gf_log ("libglusterfs", GF_LOG_ERROR,
- "Couldn't set extended attribute for %d (%d)",
- fd, errno);
- return -1;
- }
-
- return 0;
+ int attrfd = -1;
+ int ret = 0;
+
+ attrfd = openat(fd, key, flags | O_CREAT | O_WRONLY | O_XATTR, 0777);
+ if (attrfd >= 0) {
+ ftruncate(attrfd, 0);
+ ret = write(attrfd, value, size);
+ close(attrfd);
+ } else {
+ if (errno != ENOENT)
+ gf_msg("libglusterfs", GF_LOG_ERROR, errno,
+ LG_MSG_SET_ATTRIBUTE_FAILED,
+ "Couldn't set "
+ "extended attribute for %d",
+ fd);
+ return -1;
+ }
+
+ return 0;
}
-
int
-solaris_fgetxattr(int fd, const char* key, char *value, size_t size)
+solaris_fgetxattr(int fd, const char *key, char *value, size_t size)
{
- int attrfd = -1;
- int ret = 0;
-
- attrfd = openat (fd, key, O_RDONLY|O_XATTR);
- if (attrfd >= 0) {
- if (size == 0) {
- struct stat buf;
- fstat (attrfd, &buf);
- ret = buf.st_size;
- } else {
- ret = read (attrfd, value, size);
- }
- close (attrfd);
+ int attrfd = -1;
+ int ret = 0;
+
+ attrfd = openat(fd, key, O_RDONLY | O_XATTR);
+ if (attrfd >= 0) {
+ if (size == 0) {
+ struct stat buf;
+ fstat(attrfd, &buf);
+ ret = buf.st_size;
} else {
- if (errno != ENOENT)
- gf_log ("libglusterfs", GF_LOG_INFO,
- "Couldn't read extended attribute for the file %d (%d)",
- fd, errno);
- if (errno == ENOENT)
- errno = ENODATA;
- return -1;
+ ret = read(attrfd, value, size);
}
-
- return ret;
+ close(attrfd);
+ } else {
+ if (errno != ENOENT)
+ gf_msg("libglusterfs", GF_LOG_INFO, errno,
+ LG_MSG_READ_ATTRIBUTE_FAILED,
+ "Couldn't read "
+ "extended attribute for the file %d",
+ fd);
+ if (errno == ENOENT)
+ errno = ENODATA;
+ return -1;
+ }
+
+ return ret;
}
/* Solaris does not support xattr for symlinks and dev files. Since gfid and
other trusted attributes are stored as xattrs, we need to provide support for
- them. A mapped regular file is stored in the /.glusterfs_xattr_inode of the export dir.
- All xattr ops related to the special files are redirected to this map file.
+ them. A mapped regular file is stored in the /.glusterfs_xattr_inode of the
+ export dir. All xattr ops related to the special files are redirected to this
+ map file.
*/
int
-make_export_path (const char *real_path, char **path)
+make_export_path(const char *real_path, char **path)
{
- int ret = -1;
- char *tmp = NULL;
- char *export_path = NULL;
- char *dup = NULL;
- char *ptr = NULL;
- char *freeptr = NULL;
- uuid_t gfid = {0, };
-
- export_path = GF_CALLOC (1, sizeof (char) * PATH_MAX, 0);
- if (!export_path)
- goto out;
+ int ret = -1;
+ char *tmp = NULL;
+ char *export_path = NULL;
+ char *dup = NULL;
+ char *ptr = NULL;
+ char *freeptr = NULL;
+ uuid_t gfid = {
+ 0,
+ };
+
+ export_path = GF_CALLOC(1, sizeof(char) * PATH_MAX, 0);
+ if (!export_path)
+ goto out;
- dup = gf_strdup (real_path);
- if (!dup)
- goto out;
+ dup = gf_strdup(real_path);
+ if (!dup)
+ goto out;
- freeptr = dup;
- ret = solaris_getxattr ("/", GFID_XATTR_KEY, gfid, 16);
- /* Return value of getxattr */
+ freeptr = dup;
+ ret = solaris_getxattr("/", GFID_XATTR_KEY, gfid, 16);
+ /* Return value of getxattr */
+ if (ret == 16) {
+ if (__is_root_gfid(gfid)) {
+ strcat(export_path, "/");
+ ret = 0;
+ goto done;
+ }
+ }
+
+ do {
+ ptr = strtok_r(dup, "/", &tmp);
+ if (!ptr)
+ break;
+ strcat(export_path, dup);
+ ret = solaris_getxattr(export_path, GFID_XATTR_KEY, gfid, 16);
if (ret == 16) {
- if (__is_root_gfid (gfid)){
- strcat (export_path, "/");
- ret = 0;
- goto done;
- }
+ if (__is_root_gfid(gfid)) {
+ ret = 0;
+ goto done;
+ }
}
+ strcat(export_path, "/");
+ dup = tmp;
+ } while (ptr);
- do {
- ptr = strtok_r (dup, "/", &tmp);
- if (!ptr)
- break;
- strcat (export_path, dup);
- ret = solaris_getxattr (export_path, GFID_XATTR_KEY, gfid, 16);
- if (ret == 16) {
- if (__is_root_gfid (gfid)) {
- ret = 0;
- goto done;
- }
- }
- strcat (export_path, "/");
- dup = tmp;
- } while (ptr);
-
- goto out;
+ goto out;
done:
- if (!ret) {
- *path = export_path;
- }
+ if (!ret) {
+ *path = export_path;
+ }
out:
- if (freeptr)
- GF_FREE (freeptr);
- if (ret && export_path)
- GF_FREE (export_path);
+ GF_FREE(freeptr);
+ if (ret && export_path)
+ GF_FREE(export_path);
- return ret;
+ return ret;
}
int
-solaris_xattr_resolve_path (const char *real_path, char **path)
+solaris_xattr_resolve_path(const char *real_path, char **path)
{
- int ret = -1;
- char *export_path = NULL;
- char xattr_path[PATH_MAX] = {0, };
- struct stat lstatbuf = {0, };
- struct iatt stbuf = {0, };
- struct stat statbuf = {0, };
-
- ret = lstat (real_path, &lstatbuf);
- if (ret != 0 )
- return ret;
- iatt_from_stat (&stbuf, &lstatbuf);
- if (IA_ISREG(stbuf.ia_type) || IA_ISDIR(stbuf.ia_type))
- return -1;
-
- ret = make_export_path (real_path, &export_path);
- if (!ret && export_path) {
- strcat (export_path, "/"GF_SOLARIS_XATTR_DIR);
- if (lstat (export_path, &statbuf)) {
- ret = mkdir (export_path, 0777);
- if (ret && (errno != EEXIST)) {
- gf_log (THIS->name, GF_LOG_DEBUG, "mkdir failed,"
- " errno: %d", errno);
- goto out;
- }
- }
+ int ret = -1;
+ char *export_path = NULL;
+ char xattr_path[PATH_MAX] = {
+ 0,
+ };
+ struct stat lstatbuf = {
+ 0,
+ };
+ struct iatt stbuf = {
+ 0,
+ };
+ struct stat statbuf = {
+ 0,
+ };
+
+ ret = lstat(real_path, &lstatbuf);
+ if (ret != 0)
+ return ret;
+ iatt_from_stat(&stbuf, &lstatbuf);
+ if (IA_ISREG(stbuf.ia_type) || IA_ISDIR(stbuf.ia_type))
+ return -1;
+
+ ret = make_export_path(real_path, &export_path);
+ if (!ret && export_path) {
+ strcat(export_path, "/" GF_SOLARIS_XATTR_DIR);
+ if (lstat(export_path, &statbuf)) {
+ ret = mkdir(export_path, 0755);
+ if (ret && (errno != EEXIST)) {
+ gf_msg_debug(THIS->name, 0,
+ "mkdir failed,"
+ " errno: %d",
+ errno);
+ goto out;
+ }
+ }
- snprintf(xattr_path, PATH_MAX, "%s%s%lu", export_path,
- "/", stbuf.ia_ino);
+ snprintf(xattr_path, PATH_MAX, "%s%s%lu", export_path, "/",
+ stbuf.ia_ino);
- ret = lstat (xattr_path, &statbuf);
+ ret = lstat(xattr_path, &statbuf);
- if (ret) {
- ret = mknod (xattr_path, S_IFREG|O_WRONLY, 0);
- if (ret && (errno != EEXIST)) {
- gf_log (THIS->name, GF_LOG_WARNING,"Failed to create "
- "mapped file %s, error %d", xattr_path,
- errno);
- goto out;
- }
- }
- *path = gf_strdup (xattr_path);
+ if (ret) {
+ ret = mknod(xattr_path, S_IFREG | O_WRONLY, 0);
+ if (ret && (errno != EEXIST)) {
+ gf_msg(THIS->name, GF_LOG_WARNING, errno, LG_MSG_FILE_OP_FAILED,
+ "Failed to "
+ "create mapped file %s",
+ xattr_path);
+ goto out;
+ }
}
+ *path = gf_strdup(xattr_path);
+ }
out:
- if (export_path)
- GF_FREE (export_path);
- if (*path)
- return 0;
- else
- return -1;
+ GF_FREE(export_path);
+ if (*path)
+ return 0;
+ else
+ return -1;
}
int
-solaris_setxattr(const char *path, const char* key, const char *value,
+solaris_setxattr(const char *path, const char *key, const char *value,
size_t size, int flags)
{
- int attrfd = -1;
- int ret = 0;
- char *mapped_path = NULL;
-
- ret = solaris_xattr_resolve_path (path, &mapped_path);
- if (!ret) {
- attrfd = attropen (mapped_path, key, flags|O_CREAT|O_WRONLY,
- 0777);
- } else {
- attrfd = attropen (path, key, flags|O_CREAT|O_WRONLY, 0777);
- }
- if (attrfd >= 0) {
- ftruncate (attrfd, 0);
- ret = write (attrfd, value, size);
- close (attrfd);
- ret = 0;
- } else {
- if (errno != ENOENT)
- gf_log ("libglusterfs", GF_LOG_ERROR,
- "Couldn't set extended attribute for %s (%d)",
- path, errno);
- ret = -1;
- }
- if (mapped_path)
- GF_FREE (mapped_path);
- return ret;
+ int attrfd = -1;
+ int ret = 0;
+ char *mapped_path = NULL;
+
+ ret = solaris_xattr_resolve_path(path, &mapped_path);
+ if (!ret) {
+ attrfd = attropen(mapped_path, key, flags | O_CREAT | O_WRONLY, 0777);
+ } else {
+ attrfd = attropen(path, key, flags | O_CREAT | O_WRONLY, 0777);
+ }
+ if (attrfd >= 0) {
+ ftruncate(attrfd, 0);
+ ret = write(attrfd, value, size);
+ close(attrfd);
+ ret = 0;
+ } else {
+ if (errno != ENOENT)
+ gf_msg("libglusterfs", GF_LOG_ERROR, errno,
+ LG_MSG_SET_ATTRIBUTE_FAILED,
+ "Couldn't set "
+ "extended attribute for %s",
+ path);
+ ret = -1;
+ }
+ GF_FREE(mapped_path);
+ return ret;
}
-
int
solaris_listxattr(const char *path, char *list, size_t size)
{
- int attrdirfd = -1;
- ssize_t len = 0;
- DIR *dirptr = NULL;
- struct dirent *dent = NULL;
- int newfd = -1;
- char *mapped_path = NULL;
- int ret = -1;
-
- ret = solaris_xattr_resolve_path (path, &mapped_path);
- if (!ret) {
- attrdirfd = attropen (mapped_path, ".", O_RDONLY, 0);
- } else {
- attrdirfd = attropen (path, ".", O_RDONLY, 0);
- }
- if (attrdirfd >= 0) {
- newfd = dup(attrdirfd);
- dirptr = fdopendir(newfd);
- if (dirptr) {
- while ((dent = readdir(dirptr))) {
- size_t listlen = strlen(dent->d_name);
- if (!strcmp(dent->d_name, ".") ||
- !strcmp(dent->d_name, "..")) {
- /* we don't want "." and ".." here */
- continue;
- }
- if (size == 0) {
- /* return the current size of the list
- of extended attribute names*/
- len += listlen + 1;
- } else {
- /* check size and copy entry + null
- into list. */
- if ((len + listlen + 1) > size) {
- errno = ERANGE;
- len = -1;
- break;
- } else {
- strncpy(list + len, dent->d_name, listlen);
- len += listlen;
- list[len] = '\0';
- ++len;
- }
- }
- }
-
- if (closedir(dirptr) == -1) {
- close (attrdirfd);
- len = -1;
- goto out;
- }
+ int attrdirfd = -1;
+ ssize_t len = 0;
+ DIR *dirptr = NULL;
+ struct dirent *dent = NULL;
+ int newfd = -1;
+ char *mapped_path = NULL;
+ int ret = -1;
+
+ ret = solaris_xattr_resolve_path(path, &mapped_path);
+ if (!ret) {
+ attrdirfd = attropen(mapped_path, ".", O_RDONLY, 0);
+ } else {
+ attrdirfd = attropen(path, ".", O_RDONLY, 0);
+ }
+ if (attrdirfd >= 0) {
+ newfd = dup(attrdirfd);
+ dirptr = fdopendir(newfd);
+ if (dirptr) {
+ while ((dent = readdir(dirptr))) {
+ size_t listlen = strlen(dent->d_name);
+ if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, "..")) {
+ /* we don't want "." and ".." here */
+ continue;
+ }
+ if (size == 0) {
+ /* return the current size of the list
+ of extended attribute names*/
+ len += listlen + 1;
} else {
- close (attrdirfd);
+ /* check size and copy entry + null
+ into list. */
+ if ((len + listlen + 1) > size) {
+ errno = ERANGE;
len = -1;
- goto out;
+ break;
+ } else {
+ strncpy(list + len, dent->d_name, listlen);
+ len += listlen;
+ list[len] = '\0';
+ ++len;
+ }
}
- close (attrdirfd);
+ }
+
+ if (closedir(dirptr) == -1) {
+ close(attrdirfd);
+ len = -1;
+ goto out;
+ }
+ } else {
+ close(attrdirfd);
+ len = -1;
+ goto out;
}
+ close(attrdirfd);
+ }
out:
- if (mapped_path)
- GF_FREE (mapped_path);
- return len;
+ GF_FREE(mapped_path);
+ return len;
}
-
int
solaris_flistxattr(int fd, char *list, size_t size)
{
- int attrdirfd = -1;
- ssize_t len = 0;
- DIR *dirptr = NULL;
- struct dirent *dent = NULL;
- int newfd = -1;
-
- attrdirfd = openat (fd, ".", O_RDONLY, 0);
- if (attrdirfd >= 0) {
- newfd = dup(attrdirfd);
- dirptr = fdopendir(newfd);
- if (dirptr) {
- while ((dent = readdir(dirptr))) {
- size_t listlen = strlen(dent->d_name);
- if (!strcmp(dent->d_name, ".") ||
- !strcmp(dent->d_name, "..")) {
- /* we don't want "." and ".." here */
- continue;
- }
- if (size == 0) {
- /* return the current size of the list
- of extended attribute names*/
- len += listlen + 1;
- } else {
- /* check size and copy entry + null
- into list. */
- if ((len + listlen + 1) > size) {
- errno = ERANGE;
- len = -1;
- break;
- } else {
- strncpy(list + len, dent->d_name, listlen);
- len += listlen;
- list[len] = '\0';
- ++len;
- }
- }
- }
-
- if (closedir(dirptr) == -1) {
- close (attrdirfd);
- return -1;
- }
+ int attrdirfd = -1;
+ ssize_t len = 0;
+ DIR *dirptr = NULL;
+ struct dirent *dent = NULL;
+ int newfd = -1;
+
+ attrdirfd = openat(fd, ".", O_RDONLY, 0);
+ if (attrdirfd >= 0) {
+ newfd = dup(attrdirfd);
+ dirptr = fdopendir(newfd);
+ if (dirptr) {
+ while ((dent = readdir(dirptr))) {
+ size_t listlen = strlen(dent->d_name);
+ if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, "..")) {
+ /* we don't want "." and ".." here */
+ continue;
+ }
+ if (size == 0) {
+ /* return the current size of the list
+ of extended attribute names*/
+ len += listlen + 1;
} else {
- close (attrdirfd);
- return -1;
+ /* check size and copy entry + null
+ into list. */
+ if ((len + listlen + 1) > size) {
+ errno = ERANGE;
+ len = -1;
+ break;
+ } else {
+ strncpy(list + len, dent->d_name, listlen);
+ len += listlen;
+ list[len] = '\0';
+ ++len;
+ }
}
- close (attrdirfd);
+ }
+
+ if (closedir(dirptr) == -1) {
+ close(attrdirfd);
+ return -1;
+ }
+ } else {
+ close(attrdirfd);
+ return -1;
}
- return len;
+ close(attrdirfd);
+ }
+ return len;
}
-
int
-solaris_removexattr(const char *path, const char* key)
+solaris_removexattr(const char *path, const char *key)
{
- int ret = -1;
- int attrfd = -1;
- char *mapped_path = NULL;
-
- ret = solaris_xattr_resolve_path (path, &mapped_path);
- if (!ret) {
- attrfd = attropen (mapped_path, ".", O_RDONLY, 0);
- } else {
- attrfd = attropen (path, ".", O_RDONLY, 0);
- }
- if (attrfd >= 0) {
- ret = unlinkat (attrfd, key, 0);
- close (attrfd);
- } else {
- if (errno == ENOENT)
- errno = ENODATA;
- ret = -1;
- }
-
- if (mapped_path)
- GF_FREE (mapped_path);
-
- return ret;
+ int ret = -1;
+ int attrfd = -1;
+ char *mapped_path = NULL;
+
+ ret = solaris_xattr_resolve_path(path, &mapped_path);
+ if (!ret) {
+ attrfd = attropen(mapped_path, ".", O_RDONLY, 0);
+ } else {
+ attrfd = attropen(path, ".", O_RDONLY, 0);
+ }
+ if (attrfd >= 0) {
+ ret = unlinkat(attrfd, key, 0);
+ close(attrfd);
+ } else {
+ if (errno == ENOENT)
+ errno = ENODATA;
+ ret = -1;
+ }
+
+ GF_FREE(mapped_path);
+
+ return ret;
}
int
-solaris_getxattr(const char *path,
- const char* key,
- char *value,
- size_t size)
+solaris_getxattr(const char *path, const char *key, char *value, size_t size)
{
- int attrfd = -1;
- int ret = 0;
- char *mapped_path = NULL;
-
- ret = solaris_xattr_resolve_path (path, &mapped_path);
- if (!ret) {
- attrfd = attropen (mapped_path, key, O_RDONLY, 0);
- } else {
- attrfd = attropen (path, key, O_RDONLY, 0);
- }
-
- if (attrfd >= 0) {
- if (size == 0) {
- struct stat buf;
- fstat (attrfd, &buf);
- ret = buf.st_size;
- } else {
- ret = read (attrfd, value, size);
- }
- close (attrfd);
+ int attrfd = -1;
+ int ret = 0;
+ char *mapped_path = NULL;
+
+ ret = solaris_xattr_resolve_path(path, &mapped_path);
+ if (!ret) {
+ attrfd = attropen(mapped_path, key, O_RDONLY, 0);
+ } else {
+ attrfd = attropen(path, key, O_RDONLY, 0);
+ }
+
+ if (attrfd >= 0) {
+ if (size == 0) {
+ struct stat buf;
+ fstat(attrfd, &buf);
+ ret = buf.st_size;
} else {
- if (errno != ENOENT)
- gf_log ("libglusterfs", GF_LOG_INFO,
- "Couldn't read extended attribute for the file %s (%s)",
- path, strerror (errno));
- if (errno == ENOENT)
- errno = ENODATA;
- ret = -1;
+ ret = read(attrfd, value, size);
}
- if (mapped_path)
- GF_FREE (mapped_path);
- return ret;
+ close(attrfd);
+ } else {
+ if (errno != ENOENT)
+ gf_msg("libglusterfs", GF_LOG_INFO, errno,
+ LG_MSG_READ_ATTRIBUTE_FAILED,
+ "Couldn't read "
+ "extended attribute for the file %s",
+ path);
+ if (errno == ENOENT)
+ errno = ENODATA;
+ ret = -1;
+ }
+ GF_FREE(mapped_path);
+ return ret;
}
-
-char* strsep(char** str, const char* delims)
+char *
+strsep(char **str, const char *delims)
{
- char* token;
-
- if (*str==NULL) {
- /* No more tokens */
- return NULL;
- }
-
- token=*str;
- while (**str!='\0') {
- if (strchr(delims,**str)!=NULL) {
- **str='\0';
- (*str)++;
- return token;
- }
- (*str)++;
+ char *token;
+
+ if (*str == NULL) {
+ /* No more tokens */
+ return NULL;
+ }
+
+ token = *str;
+ while (**str != '\0') {
+ if (strchr(delims, **str) != NULL) {
+ **str = '\0';
+ (*str)++;
+ return token;
}
- /* There is no other token */
- *str=NULL;
- return token;
+ (*str)++;
+ }
+ /* There is no other token */
+ *str = NULL;
+ return token;
}
/* Code comes from libiberty */
int
-vasprintf (char **result, const char *format, va_list args)
+vasprintf(char **result, const char *format, va_list args)
{
- return gf_vasprintf(result, format, args);
+ return gf_vasprintf(result, format, args);
}
int
-asprintf (char **buf, const char *fmt, ...)
+asprintf(char **buf, const char *fmt, ...)
{
- int status;
- va_list ap;
+ int status;
+ va_list ap;
- va_start (ap, fmt);
- status = vasprintf (buf, fmt, ap);
- va_end (ap);
- return status;
+ va_start(ap, fmt);
+ status = vasprintf(buf, fmt, ap);
+ va_end(ap);
+ return status;
}
-int solaris_unlink (const char *path)
+int
+solaris_unlink(const char *path)
{
- char *mapped_path = NULL;
- struct stat stbuf = {0, };
- int ret = -1;
-
- ret = solaris_xattr_resolve_path (path, &mapped_path);
-
-
- if (!ret && mapped_path) {
- if (lstat(path, &stbuf)) {
- gf_log (THIS->name, GF_LOG_WARNING, "Stat failed on mapped"
- " file %s with error %d", mapped_path, errno);
- goto out;
- }
- if (stbuf.st_nlink == 1) {
- if(remove (mapped_path))
- gf_log (THIS->name, GF_LOG_WARNING, "Failed to remove mapped "
- "file %s. Errno %d", mapped_path, errno);
- }
-
+ char *mapped_path = NULL;
+ struct stat stbuf = {
+ 0,
+ };
+ int ret = -1;
+
+ ret = solaris_xattr_resolve_path(path, &mapped_path);
+
+ if (!ret && mapped_path) {
+ if (lstat(path, &stbuf)) {
+ gf_msg(THIS->name, GF_LOG_WARNING, errno, LG_MSG_FILE_OP_FAILED,
+ "Stat failed on "
+ "mapped file %s",
+ mapped_path);
+ goto out;
}
+ if (stbuf.st_nlink == 1) {
+ if (remove(mapped_path))
+ gf_msg(THIS->name, GF_LOG_WARNING, errno, LG_MSG_FILE_OP_FAILED,
+ "Failed to "
+ "remove mapped file %s",
+ mapped_path);
+ }
+ }
out:
- if (mapped_path)
- GF_FREE (mapped_path);
+ GF_FREE(mapped_path);
- return unlink (path);
+ return unlink(path);
}
int
-solaris_rename (const char *old_path, const char *new_path)
+solaris_rename(const char *old_path, const char *new_path)
{
- char *mapped_path = NULL;
- int ret = -1;
-
- ret = solaris_xattr_resolve_path (new_path, &mapped_path);
+ char *mapped_path = NULL;
+ int ret = -1;
+ ret = solaris_xattr_resolve_path(new_path, &mapped_path);
- if (!ret && mapped_path) {
- if (!remove (mapped_path))
- gf_log (THIS->name, GF_LOG_WARNING, "Failed to remove mapped "
- "file %s. Errno %d", mapped_path, errno);
- GF_FREE (mapped_path);
- }
-
- return rename(old_path, new_path);
+ if (!ret && mapped_path) {
+ if (!remove(mapped_path))
+ gf_msg(THIS->name, GF_LOG_WARNING, errno, LG_MSG_FILE_OP_FAILED,
+ "Failed to remove "
+ "mapped file %s.",
+ mapped_path);
+ GF_FREE(mapped_path);
+ }
+ return rename(old_path, new_path);
}
char *
-mkdtemp (char *tempstring)
+mkdtemp(char *tempstring)
{
- char *new_string = NULL;
- int ret = 0;
+ char *new_string = NULL;
+ int ret = 0;
- new_string = mkstemp (tempstring);
- if (!new_string)
- goto out;
+ new_string = mkstemp(tempstring);
+ if (!new_string)
+ goto out;
- ret = mkdir (new_string, 0700);
- if (ret < 0)
- new_string = NULL;
+ ret = mkdir(new_string, 0700);
+ if (ret < 0)
+ new_string = NULL;
out:
- return new_string;
+ return new_string;
}
#endif /* GF_SOLARIS_HOST_OS */
+#ifdef GF_BSD_HOST_OS
+void
+gf_extattr_list_reshape(char *bsd_list, ssize_t size)
+{
+ /*
+ * the format of bsd_list is
+ * <attr_len>attr<attr_len>attr...
+ * we try to reformat it as Linux's
+ * attr<\0>attr<\0>...
+ * */
+ if (NULL == bsd_list || size <= 0)
+ return;
+
+ size_t i = 0, j;
+
+ while (i < size) {
+ size_t attr_len = bsd_list[i];
+
+ for (j = i; j < i + attr_len; ++j)
+ bsd_list[j] = bsd_list[j + 1];
+ bsd_list[j] = '\0';
+
+ i += attr_len + 1;
+ gf_msg_debug("syscall", 0, "syscall debug: %lu", attr_len);
+ }
+}
+#endif /* GF_BSD_HOST_OS */
+
#ifndef HAVE_STRNLEN
size_t
strnlen(const char *string, size_t maxlen)
{
- int len = 0;
- while ((len < maxlen) && string[len])
- len++;
- return len;
+ int len = 0;
+ while ((len < maxlen) && string[len])
+ len++;
+ return len;
}
#endif /* STRNLEN */
+
+int
+gf_umount_lazy(char *xlname, char *path, int rmdir_flag)
+{
+ int ret = -1;
+ runner_t runner = {
+ 0,
+ };
+
+ runinit(&runner);
+#ifdef GF_LINUX_HOST_OS
+ runner_add_args(&runner, _PATH_UMOUNT, "-l", path, NULL);
+#else
+ if (rmdir_flag)
+ runner_add_args(&runner, SBIN_DIR "/umountd", "-r", path, NULL);
+ else
+ runner_add_args(&runner, SBIN_DIR "/umountd", path, NULL);
+#endif
+ ret = runner_run(&runner);
+ if (ret) {
+ gf_msg(xlname, GF_LOG_ERROR, errno, LG_MSG_UNMOUNT_FAILED,
+ "Lazy unmount of %s", path);
+ }
+
+#ifdef GF_LINUX_HOST_OS
+ if (!ret && rmdir_flag) {
+ ret = sys_rmdir(path);
+ if (ret)
+ gf_msg(xlname, GF_LOG_WARNING, errno, LG_MSG_DIR_OP_FAILED,
+ "rmdir %s", path);
+ }
+#endif
+
+ return ret;
+}
diff --git a/libglusterfs/src/compat.h b/libglusterfs/src/compat.h
deleted file mode 100644
index 596699fa7ce..00000000000
--- a/libglusterfs/src/compat.h
+++ /dev/null
@@ -1,393 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef __COMPAT_H__
-#define __COMPAT_H__
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <stdint.h>
-#include "dict.h"
-
-#ifndef LLONG_MAX
-#define LLONG_MAX __LONG_LONG_MAX__ /* compat with old gcc */
-#endif /* LLONG_MAX */
-
-
-#ifdef GF_LINUX_HOST_OS
-
-#define UNIX_PATH_MAX 108
-
-#include <sys/un.h>
-#include <linux/limits.h>
-#include <sys/xattr.h>
-#include <endian.h>
-
-
-#ifndef HAVE_LLISTXATTR
-
-/* This part is valid only incase of old glibc which doesn't support
- * 'llistxattr()' system calls.
- */
-
-#define lremovexattr(path,key) removexattr(path,key)
-#define llistxattr(path,key,size) listxattr(path,key,size)
-#define lgetxattr(path, key, value, size) getxattr(path,key,value,size)
-#define lsetxattr(path,key,value,size,flags) setxattr(path,key,value,size,flags)
-
-#endif /* HAVE_LLISTXATTR */
-#endif /* GF_LINUX_HOST_OS */
-
-#ifdef GF_BSD_HOST_OS
-/* In case of FreeBSD and NetBSD */
-
-#define UNIX_PATH_MAX 104
-#include <sys/types.h>
-
-#include <sys/un.h>
-#include <sys/endian.h>
-#include <sys/extattr.h>
-#ifdef HAVE_SYS_XATTR_H
-#include <sys/xattr.h>
-#endif /* HAVE_SYS_XATTR_H */
-#include <limits.h>
-
-#include <libgen.h>
-
-#ifndef XATTR_CREATE
-enum {
- ATTR_CREATE = 1,
-#define XATTR_CREATE ATTR_CREATE
- ATTR_REPLACE = 2
-#define XATTR_REPLACE ATTR_REPLACE
-};
-#endif /* XATTR_CREATE */
-
-
-#ifndef sighandler_t
-#define sighandler_t sig_t
-#endif
-
-#ifndef ino64_t
-#define ino64_t ino_t
-#endif
-
-#ifndef EUCLEAN
-#define EUCLEAN 0
-#endif
-
-#include <netinet/in.h>
-#ifndef s6_addr16
-#define s6_addr16 __u6_addr.__u6_addr16
-#endif
-#ifndef s6_addr32
-#define s6_addr32 __u6_addr.__u6_addr32
-#endif
-
-/* Posix dictates NAME_MAX to be used */
-# ifndef NAME_MAX
-# ifdef MAXNAMLEN
-# define NAME_MAX MAXNAMLEN
-# else
-# define NAME_MAX 255
-# endif
-# endif
-
-#define F_GETLK64 F_GETLK
-#define F_SETLK64 F_SETLK
-#define F_SETLKW64 F_SETLKW
-
-#ifdef __NetBSD__
-char *basename_r(const char *);
-char *dirname_r(char *);
-
-#define basename(path) basename_r(path)
-#define dirname(path) dirname_r(path)
-#endif /* __NetBSD__ */
-#endif /* GF_BSD_HOST_OS */
-
-#ifdef GF_DARWIN_HOST_OS
-
-#define UNIX_PATH_MAX 104
-#include <sys/types.h>
-
-#include <sys/un.h>
-#include <machine/endian.h>
-#include <sys/xattr.h>
-#include <limits.h>
-
-#include <libgen.h>
-
-
-#if __DARWIN_64_BIT_INO_T == 0
-# error '64 bit ino_t is must for GlusterFS to work, Compile with "CFLAGS=-D__DARWIN_64_BIT_INO_T"'
-#endif /* __DARWIN_64_BIT_INO_T */
-
-
-#if __DARWIN_64_BIT_INO_T == 0
-# error '64 bit ino_t is must for GlusterFS to work, Compile with "CFLAGS=-D__DARWIN_64_BIT_INO_T"'
-#endif /* __DARWIN_64_BIT_INO_T */
-
-#ifndef sighandler_t
-#define sighandler_t sig_t
-#endif
-
-#ifndef EUCLEAN
-#define EUCLEAN 0
-#endif
-
-#include <netinet/in.h>
-#ifndef s6_addr16
-#define s6_addr16 __u6_addr.__u6_addr16
-#endif
-#ifndef s6_addr32
-#define s6_addr32 __u6_addr.__u6_addr32
-#endif
-
-/* Posix dictates NAME_MAX to be used */
-# ifndef NAME_MAX
-# ifdef MAXNAMLEN
-# define NAME_MAX MAXNAMLEN
-# else
-# define NAME_MAX 255
-# endif
-# endif
-
-#define F_GETLK64 F_GETLK
-#define F_SETLK64 F_SETLK
-#define F_SETLKW64 F_SETLKW
-
-#ifndef FTW_CONTINUE
- #define FTW_CONTINUE 0
-#endif
-
-int32_t gf_darwin_compat_listxattr (int len, dict_t *dict, int size);
-int32_t gf_darwin_compat_getxattr (const char *key, dict_t *dict);
-int32_t gf_darwin_compat_setxattr (dict_t *dict);
-
-#endif /* GF_DARWIN_HOST_OS */
-
-#ifdef GF_SOLARIS_HOST_OS
-
-#define UNIX_PATH_MAX 108
-#define EUCLEAN 117
-
-#include <sys/un.h>
-#include <limits.h>
-#include <sys/stat.h>
-#include <unistd.h>
-#include <sys/fcntl.h>
-#include <libgen.h>
-#include <sys/mkdev.h>
-
-#ifndef lchmod
-#define lchmod chmod
-#endif
-
-#define lgetxattr(path, key, value, size) solaris_getxattr(path,key,value,size)
-enum {
- ATTR_CREATE = 1,
-#define XATTR_CREATE ATTR_CREATE
- ATTR_REPLACE = 2
-#define XATTR_REPLACE ATTR_REPLACE
-};
-
-/* This patch is not present in Solaris 10 and before */
-#ifndef dirfd
-#define dirfd(dirp) ((dirp)->dd_fd)
-#endif
-
-/* Posix dictates NAME_MAX to be used */
-# ifndef NAME_MAX
-# ifdef MAXNAMLEN
-# define NAME_MAX MAXNAMLEN
-# else
-# define NAME_MAX 255
-# endif
-# endif
-
-#include <netinet/in.h>
-#ifndef s6_addr16
-#define S6_ADDR16(x) ((uint16_t*) ((char*)&(x).s6_addr))
-#endif
-#ifndef s6_addr32
-#define s6_addr32 _S6_un._S6_u32
-#endif
-
-#define lutimes(filename,times) utimes(filename,times)
-
-#ifndef SEEK_SET
-#define SEEK_SET 0
-#endif
-
-enum {
- DT_UNKNOWN = 0,
-# define DT_UNKNOWN DT_UNKNOWN
- DT_FIFO = 1,
-# define DT_FIFO DT_FIFO
- DT_CHR = 2,
-# define DT_CHR DT_CHR
- DT_DIR = 4,
-# define DT_DIR DT_DIR
- DT_BLK = 6,
-# define DT_BLK DT_BLK
- DT_REG = 8,
-# define DT_REG DT_REG
- DT_LNK = 10,
-# define DT_LNK DT_LNK
- DT_SOCK = 12,
-# define DT_SOCK DT_SOCK
- DT_WHT = 14
-# define DT_WHT DT_WHT
-};
-
-#ifndef _PATH_MOUNTED
- #define _PATH_MOUNTED "/etc/mtab"
-#endif
-
-#ifndef O_ASYNC
- #ifdef FASYNC
- #define O_ASYNC FASYNC
- #else
- #define O_ASYNC 0
- #endif
-#endif
-
-#ifndef FTW_CONTINUE
- #define FTW_CONTINUE 0
-#endif
-
-int asprintf(char **string_ptr, const char *format, ...);
-
-int vasprintf (char **result, const char *format, va_list args);
-char* strsep(char** str, const char* delims);
-int solaris_listxattr(const char *path, char *list, size_t size);
-int solaris_removexattr(const char *path, const char* key);
-int solaris_getxattr(const char *path, const char* key,
- char *value, size_t size);
-int solaris_setxattr(const char *path, const char* key, const char *value,
- size_t size, int flags);
-int solaris_fgetxattr(int fd, const char* key,
- char *value, size_t size);
-int solaris_fsetxattr(int fd, const char* key, const char *value,
- size_t size, int flags);
-int solaris_flistxattr(int fd, char *list, size_t size);
-
-int solaris_rename (const char *oldpath, const char *newpath);
-
-int solaris_unlink (const char *pathname);
-
-char *mkdtemp (char *temp);
-
-#define GF_SOLARIS_XATTR_DIR ".glusterfs_xattr_inode"
-
-int solaris_xattr_resolve_path (const char *real_path, char **path);
-
-#endif /* GF_SOLARIS_HOST_OS */
-
-#ifndef HAVE_ARGP
-#include "argp.h"
-#else
-#include <argp.h>
-#endif /* HAVE_ARGP */
-
-#ifndef HAVE_STRNLEN
-size_t strnlen(const char *string, size_t maxlen);
-#endif /* STRNLEN */
-
-#ifndef strdupa
-#define strdupa(s) \
- (__extension__ \
- ({ \
- __const char *__old = (s); \
- size_t __len = strlen (__old) + 1; \
- char *__new = (char *) __builtin_alloca (__len); \
- (char *) memcpy (__new, __old, __len); \
- }))
-#endif
-
-#define GF_DIR_ALIGN(x) (((x) + sizeof (uint64_t) - 1) & ~(sizeof (uint64_t) - 1))
-
-#include <sys/types.h>
-#include <dirent.h>
-
-static inline int32_t
-dirent_size (struct dirent *entry)
-{
- int32_t size = -1;
-
-#ifdef GF_BSD_HOST_OS
- size = GF_DIR_ALIGN (24 /* FIX MEEEE!!! */ + entry->d_namlen);
-#endif
-#ifdef GF_DARWIN_HOST_OS
- size = GF_DIR_ALIGN (24 /* FIX MEEEE!!! */ + entry->d_namlen);
-#endif
-#ifdef GF_LINUX_HOST_OS
- size = GF_DIR_ALIGN (24 /* FIX MEEEE!!! */ + entry->d_reclen);
-#endif
-#ifdef GF_SOLARIS_HOST_OS
- size = GF_DIR_ALIGN (24 /* FIX MEEEE!!! */ + entry->d_reclen);
-#endif
- return size;
-}
-
-int gf_mkostemp (char *tmpl, int suffixlen, int flags);
-#define mkostemp(tmpl, flags) gf_mkostemp(tmpl, 0, flags);
-
-#ifdef HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC
-/* Linux, Solaris, Cygwin */
-#define ST_ATIM_NSEC(stbuf) ((stbuf)->st_atim.tv_nsec)
-#define ST_CTIM_NSEC(stbuf) ((stbuf)->st_ctim.tv_nsec)
-#define ST_MTIM_NSEC(stbuf) ((stbuf)->st_mtim.tv_nsec)
-#define ST_ATIM_NSEC_SET(stbuf, val) ((stbuf)->st_atim.tv_nsec = (val))
-#define ST_MTIM_NSEC_SET(stbuf, val) ((stbuf)->st_mtim.tv_nsec = (val))
-#define ST_CTIM_NSEC_SET(stbuf, val) ((stbuf)->st_ctim.tv_nsec = (val))
-#elif defined(HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC)
-/* FreeBSD, NetBSD */
-#define ST_ATIM_NSEC(stbuf) ((stbuf)->st_atimespec.tv_nsec)
-#define ST_CTIM_NSEC(stbuf) ((stbuf)->st_ctimespec.tv_nsec)
-#define ST_MTIM_NSEC(stbuf) ((stbuf)->st_mtimespec.tv_nsec)
-#define ST_ATIM_NSEC_SET(stbuf, val) ((stbuf)->st_atimespec.tv_nsec = (val))
-#define ST_MTIM_NSEC_SET(stbuf, val) ((stbuf)->st_mtimespec.tv_nsec = (val))
-#define ST_CTIM_NSEC_SET(stbuf, val) ((stbuf)->st_ctimespec.tv_nsec = (val))
-#else
-#define ST_ATIM_NSEC(stbuf) (0)
-#define ST_CTIM_NSEC(stbuf) (0)
-#define ST_MTIM_NSEC(stbuf) (0)
-#define ST_ATIM_NSEC_SET(stbuf, val) do { } while (0);
-#define ST_MTIM_NSEC_SET(stbuf, val) do { } while (0);
-#define ST_CTIM_NSEC_SET(stbuf, val) do { } while (0);
-#endif
-
-#ifndef IXDR_GET_LONG
-#define IXDR_GET_LONG(buf) ((long)IXDR_GET_U_INT32(buf))
-#endif
-
-#ifndef IXDR_PUT_LONG
-#define IXDR_PUT_LONG(buf, v) ((long)IXDR_PUT_INT32(buf, (long)(v)))
-#endif
-
-#ifndef IXDR_GET_U_LONG
-#define IXDR_GET_U_LONG(buf) ((u_long)IXDR_GET_LONG(buf))
-#endif
-
-#ifndef IXDR_PUT_U_LONG
-#define IXDR_PUT_U_LONG(buf, v) IXDR_PUT_LONG(buf, (long)(v))
-#endif
-
-#if defined(__GNUC__) && !defined(RELAX_POISONING)
-/* Use run API, see run.h */
-#pragma GCC poison system popen
-#endif
-
-#endif /* __COMPAT_H__ */
diff --git a/libglusterfs/src/ctx.c b/libglusterfs/src/ctx.c
new file mode 100644
index 00000000000..3d890b04ec9
--- /dev/null
+++ b/libglusterfs/src/ctx.c
@@ -0,0 +1,97 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <pthread.h>
+
+#include "glusterfs/globals.h"
+#include "glusterfs/glusterfs.h"
+#include "timer-wheel.h"
+
+glusterfs_ctx_t *
+glusterfs_ctx_new()
+{
+ glusterfs_ctx_t *ctx = NULL;
+
+ /* no GF_CALLOC here, gf_acct_mem_set_enable is not
+ yet decided at this point */
+ ctx = CALLOC(1, sizeof(*ctx));
+ if (!ctx) {
+ goto out;
+ }
+
+ ctx->mem_acct_enable = gf_global_mem_acct_enable_get();
+
+ INIT_LIST_HEAD(&ctx->graphs);
+ INIT_LIST_HEAD(&ctx->mempool_list);
+ INIT_LIST_HEAD(&ctx->volfile_list);
+
+ ctx->daemon_pipe[0] = -1;
+ ctx->daemon_pipe[1] = -1;
+
+ ctx->log.loglevel = DEFAULT_LOG_LEVEL;
+
+#if defined(RUN_WITH_MEMCHECK)
+ ctx->cmd_args.vgtool = _gf_memcheck;
+#elif defined(RUN_WITH_DRD)
+ ctx->cmd_args.vgtool = _gf_drd;
+#else
+ ctx->cmd_args.vgtool = _gf_none;
+#endif
+
+ /* lock is never destroyed! */
+ if (LOCK_INIT(&ctx->lock)) {
+ free(ctx);
+ ctx = NULL;
+ goto out;
+ }
+
+ GF_ATOMIC_INIT(ctx->stats.max_dict_pairs, 0);
+ GF_ATOMIC_INIT(ctx->stats.total_pairs_used, 0);
+ GF_ATOMIC_INIT(ctx->stats.total_dicts_used, 0);
+out:
+ return ctx;
+}
+
+static void
+glusterfs_ctx_tw_destroy(struct gf_ctx_tw *ctx_tw)
+{
+ if (ctx_tw->timer_wheel)
+ gf_tw_cleanup_timers(ctx_tw->timer_wheel);
+
+ GF_FREE(ctx_tw);
+}
+
+struct tvec_base *
+glusterfs_ctx_tw_get(glusterfs_ctx_t *ctx)
+{
+ struct gf_ctx_tw *ctx_tw = NULL;
+
+ LOCK(&ctx->lock);
+ {
+ if (ctx->tw) {
+ ctx_tw = GF_REF_GET(ctx->tw);
+ } else {
+ ctx_tw = GF_CALLOC(1, sizeof(struct gf_ctx_tw),
+ gf_common_mt_tw_ctx);
+ ctx_tw->timer_wheel = gf_tw_init_timers();
+ GF_REF_INIT(ctx_tw, glusterfs_ctx_tw_destroy);
+ ctx->tw = ctx_tw;
+ }
+ }
+ UNLOCK(&ctx->lock);
+
+ return ctx_tw->timer_wheel;
+}
+
+void
+glusterfs_ctx_tw_put(glusterfs_ctx_t *ctx)
+{
+ GF_REF_PUT(ctx->tw);
+}
diff --git a/libglusterfs/src/daemon.c b/libglusterfs/src/daemon.c
index 348e3ad4083..0a3e5438325 100644
--- a/libglusterfs/src/daemon.c
+++ b/libglusterfs/src/daemon.c
@@ -8,59 +8,58 @@
cases as published by the Free Software Foundation.
*/
-#include <fcntl.h>
#include <unistd.h>
#include <stdio.h>
-#include "daemon.h"
+#include "glusterfs/daemon.h"
int
-os_daemon_return (int nochdir, int noclose)
+os_daemon_return(int nochdir, int noclose)
{
- pid_t pid = -1;
- int ret = -1;
- FILE *ptr = NULL;
+ pid_t pid = -1;
+ int ret = -1;
+ FILE *ptr = NULL;
- ret = fork();
- if (ret)
- return ret;
+ ret = fork();
+ if (ret)
+ return ret;
- pid = setsid();
+ pid = setsid();
- if (pid == -1) {
- ret = -1;
- goto out;
- }
+ if (pid == -1) {
+ ret = -1;
+ goto out;
+ }
- if (!nochdir)
- ret = chdir("/");
+ if (!nochdir)
+ ret = chdir("/");
- if (!noclose) {
- ptr = freopen (DEVNULLPATH, "r", stdin);
- if (!ptr)
- goto out;
+ if (!noclose) {
+ ptr = freopen(DEVNULLPATH, "r", stdin);
+ if (!ptr)
+ goto out;
- ptr = freopen (DEVNULLPATH, "w", stdout);
- if (!ptr)
- goto out;
+ ptr = freopen(DEVNULLPATH, "w", stdout);
+ if (!ptr)
+ goto out;
- ptr = freopen (DEVNULLPATH, "w", stderr);
- if (!ptr)
- goto out;
- }
+ ptr = freopen(DEVNULLPATH, "w", stderr);
+ if (!ptr)
+ goto out;
+ }
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
int
-os_daemon (int nochdir, int noclose)
+os_daemon(int nochdir, int noclose)
{
- int ret = -1;
+ int ret = -1;
- ret = os_daemon_return (nochdir, noclose);
- if (ret <= 0)
- return ret;
+ ret = os_daemon_return(nochdir, noclose);
+ if (ret <= 0)
+ return ret;
- _exit (0);
+ _exit(0);
}
diff --git a/libglusterfs/src/default-args.c b/libglusterfs/src/default-args.c
new file mode 100644
index 00000000000..a0ba1cfb299
--- /dev/null
+++ b/libglusterfs/src/default-args.c
@@ -0,0 +1,1651 @@
+/*
+ Copyright (c) 2008-2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs/defaults.h"
+
+int
+args_lookup_store(default_args_t *args, loc_t *loc, dict_t *xdata)
+{
+ loc_copy(&args->loc, loc);
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_lookup_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ dict_t *xdata, struct iatt *postparent)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (inode)
+ args->inode = inode_ref(inode);
+ if (buf)
+ args->stat = *buf;
+ if (postparent)
+ args->postparent = *postparent;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_stat_store(default_args_t *args, loc_t *loc, dict_t *xdata)
+{
+ loc_copy(&args->loc, loc);
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_stat_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ struct iatt *buf, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (op_ret == 0)
+ args->stat = *buf;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_fstat_store(default_args_t *args, fd_t *fd, dict_t *xdata)
+{
+ if (fd)
+ args->fd = fd_ref(fd);
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_fstat_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ struct iatt *buf, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (buf)
+ args->stat = *buf;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_truncate_store(default_args_t *args, loc_t *loc, off_t off, dict_t *xdata)
+{
+ loc_copy(&args->loc, loc);
+ args->offset = off;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_truncate_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (prebuf)
+ args->prestat = *prebuf;
+ if (postbuf)
+ args->poststat = *postbuf;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_ftruncate_store(default_args_t *args, fd_t *fd, off_t off, dict_t *xdata)
+{
+ if (fd)
+ args->fd = fd_ref(fd);
+
+ args->offset = off;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_ftruncate_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (prebuf)
+ args->prestat = *prebuf;
+ if (postbuf)
+ args->poststat = *postbuf;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_access_store(default_args_t *args, loc_t *loc, int32_t mask, dict_t *xdata)
+{
+ loc_copy(&args->loc, loc);
+ args->mask = mask;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_access_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_readlink_store(default_args_t *args, loc_t *loc, size_t size,
+ dict_t *xdata)
+{
+ loc_copy(&args->loc, loc);
+ args->size = size;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_readlink_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, const char *path, struct iatt *stbuf,
+ dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (path)
+ args->buf = gf_strdup(path);
+ if (stbuf)
+ args->stat = *stbuf;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_mknod_store(default_args_t *args, loc_t *loc, mode_t mode, dev_t rdev,
+ mode_t umask, dict_t *xdata)
+{
+ loc_copy(&args->loc, loc);
+ args->mode = mode;
+ args->rdev = rdev;
+ args->umask = umask;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_mknod_cbk_store(default_args_cbk_t *args, int op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (inode)
+ args->inode = inode_ref(inode);
+ if (buf)
+ args->stat = *buf;
+ if (preparent)
+ args->preparent = *preparent;
+ if (postparent)
+ args->postparent = *postparent;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_mkdir_store(default_args_t *args, loc_t *loc, mode_t mode, mode_t umask,
+ dict_t *xdata)
+{
+ loc_copy(&args->loc, loc);
+ args->mode = mode;
+ args->umask = umask;
+
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_mkdir_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (inode)
+ args->inode = inode_ref(inode);
+ if (buf)
+ args->stat = *buf;
+ if (preparent)
+ args->preparent = *preparent;
+ if (postparent)
+ args->postparent = *postparent;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_unlink_store(default_args_t *args, loc_t *loc, int xflag, dict_t *xdata)
+{
+ loc_copy(&args->loc, loc);
+ args->xflag = xflag;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_unlink_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (preparent)
+ args->preparent = *preparent;
+ if (postparent)
+ args->postparent = *postparent;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_rmdir_store(default_args_t *args, loc_t *loc, int flags, dict_t *xdata)
+{
+ loc_copy(&args->loc, loc);
+ args->flags = flags;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_rmdir_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (preparent)
+ args->preparent = *preparent;
+ if (postparent)
+ args->postparent = *postparent;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_symlink_store(default_args_t *args, const char *linkname, loc_t *loc,
+ mode_t umask, dict_t *xdata)
+{
+ args->linkname = gf_strdup(linkname);
+ args->umask = umask;
+ loc_copy(&args->loc, loc);
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_symlink_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (inode)
+ args->inode = inode_ref(inode);
+ if (buf)
+ args->stat = *buf;
+ if (preparent)
+ args->preparent = *preparent;
+ if (postparent)
+ args->postparent = *postparent;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_rename_store(default_args_t *args, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ loc_copy(&args->loc, oldloc);
+ loc_copy(&args->loc2, newloc);
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_rename_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (buf)
+ args->stat = *buf;
+ if (preoldparent)
+ args->preparent = *preoldparent;
+ if (postoldparent)
+ args->postparent = *postoldparent;
+ if (prenewparent)
+ args->preparent2 = *prenewparent;
+ if (postnewparent)
+ args->postparent2 = *postnewparent;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_link_store(default_args_t *args, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ loc_copy(&args->loc, oldloc);
+ loc_copy(&args->loc2, newloc);
+
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_link_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (inode)
+ args->inode = inode_ref(inode);
+ if (buf)
+ args->stat = *buf;
+ if (preparent)
+ args->preparent = *preparent;
+ if (postparent)
+ args->postparent = *postparent;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_create_store(default_args_t *args, loc_t *loc, int32_t flags, mode_t mode,
+ mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ loc_copy(&args->loc, loc);
+ args->flags = flags;
+ args->mode = mode;
+ args->umask = umask;
+ if (fd)
+ args->fd = fd_ref(fd);
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_create_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (fd)
+ args->fd = fd_ref(fd);
+ if (inode)
+ args->inode = inode_ref(inode);
+ if (buf)
+ args->stat = *buf;
+ if (preparent)
+ args->preparent = *preparent;
+ if (postparent)
+ args->postparent = *postparent;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_open_store(default_args_t *args, loc_t *loc, int32_t flags, fd_t *fd,
+ dict_t *xdata)
+{
+ loc_copy(&args->loc, loc);
+ args->flags = flags;
+ if (fd)
+ args->fd = fd_ref(fd);
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_open_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ fd_t *fd, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (fd)
+ args->fd = fd_ref(fd);
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_readv_store(default_args_t *args, fd_t *fd, size_t size, off_t off,
+ uint32_t flags, dict_t *xdata)
+{
+ if (fd)
+ args->fd = fd_ref(fd);
+ args->size = size;
+ args->offset = off;
+ args->flags = flags;
+
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_readv_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ struct iovec *vector, int32_t count, struct iatt *stbuf,
+ struct iobref *iobref, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (op_ret >= 0) {
+ args->vector = iov_dup(vector, count);
+ args->count = count;
+ args->stat = *stbuf;
+ args->iobref = iobref_ref(iobref);
+ }
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_writev_store(default_args_t *args, fd_t *fd, struct iovec *vector,
+ int32_t count, off_t off, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
+{
+ if (fd)
+ args->fd = fd_ref(fd);
+ args->vector = iov_dup(vector, count);
+ args->count = count;
+ args->offset = off;
+ args->flags = flags;
+ args->iobref = iobref_ref(iobref);
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_writev_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (op_ret >= 0)
+ args->poststat = *postbuf;
+ if (prebuf)
+ args->prestat = *prebuf;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_put_store(default_args_t *args, loc_t *loc, mode_t mode, mode_t umask,
+ uint32_t flags, struct iovec *vector, int32_t count, off_t off,
+ struct iobref *iobref, dict_t *xattr, dict_t *xdata)
+{
+ loc_copy(&args->loc, loc);
+ args->mode = mode;
+ args->umask = umask;
+ args->flags = flags;
+ args->vector = iov_dup(vector, count);
+ args->count = count;
+ args->offset = off;
+ args->iobref = iobref_ref(iobref);
+ if (xattr)
+ args->xattr = dict_ref(xattr);
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_put_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (op_ret >= 0)
+ args->stat = *buf;
+ if (inode)
+ args->inode = inode_ref(inode);
+ if (preparent)
+ args->preparent = *preparent;
+ if (postparent)
+ args->postparent = *postparent;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+int
+args_flush_store(default_args_t *args, fd_t *fd, dict_t *xdata)
+{
+ if (fd)
+ args->fd = fd_ref(fd);
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_flush_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_fsync_store(default_args_t *args, fd_t *fd, int32_t datasync,
+ dict_t *xdata)
+{
+ if (fd)
+ args->fd = fd_ref(fd);
+ args->datasync = datasync;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_fsync_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (prebuf)
+ args->prestat = *prebuf;
+ if (postbuf)
+ args->poststat = *postbuf;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_opendir_store(default_args_t *args, loc_t *loc, fd_t *fd, dict_t *xdata)
+{
+ loc_copy(&args->loc, loc);
+ if (fd)
+ args->fd = fd_ref(fd);
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_opendir_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (fd)
+ args->fd = fd_ref(fd);
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_fsyncdir_store(default_args_t *args, fd_t *fd, int32_t datasync,
+ dict_t *xdata)
+{
+ if (fd)
+ args->fd = fd_ref(fd);
+ args->datasync = datasync;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+int
+args_fsyncdir_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_statfs_store(default_args_t *args, loc_t *loc, dict_t *xdata)
+{
+ loc_copy(&args->loc, loc);
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_statfs_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, struct statvfs *buf, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (op_ret == 0)
+ args->statvfs = *buf;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_setxattr_store(default_args_t *args, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ loc_copy(&args->loc, loc);
+ /* TODO */
+ if (dict)
+ args->xattr = dict_ref(dict);
+ args->flags = flags;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_setxattr_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_getxattr_store(default_args_t *args, loc_t *loc, const char *name,
+ dict_t *xdata)
+{
+ loc_copy(&args->loc, loc);
+
+ if (name)
+ args->name = gf_strdup(name);
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_getxattr_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (dict)
+ args->xattr = dict_ref(dict);
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_fsetxattr_store(default_args_t *args, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ args->fd = fd_ref(fd);
+
+ if (dict)
+ args->xattr = dict_ref(dict);
+ args->flags = flags;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_fsetxattr_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_fgetxattr_store(default_args_t *args, fd_t *fd, const char *name,
+ dict_t *xdata)
+{
+ args->fd = fd_ref(fd);
+
+ if (name)
+ args->name = gf_strdup(name);
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_fgetxattr_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (dict)
+ args->xattr = dict_ref(dict);
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_removexattr_store(default_args_t *args, loc_t *loc, const char *name,
+ dict_t *xdata)
+{
+ loc_copy(&args->loc, loc);
+ args->name = gf_strdup(name);
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_removexattr_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_fremovexattr_store(default_args_t *args, fd_t *fd, const char *name,
+ dict_t *xdata)
+{
+ args->fd = fd_ref(fd);
+ args->name = gf_strdup(name);
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_fremovexattr_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_lk_store(default_args_t *args, fd_t *fd, int32_t cmd,
+ struct gf_flock *lock, dict_t *xdata)
+{
+ if (fd)
+ args->fd = fd_ref(fd);
+ args->cmd = cmd;
+ args->lock = *lock;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_lk_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ struct gf_flock *lock, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (op_ret == 0)
+ args->lock = *lock;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_inodelk_store(default_args_t *args, const char *volume, loc_t *loc,
+ int32_t cmd, struct gf_flock *lock, dict_t *xdata)
+{
+ if (volume)
+ args->volume = gf_strdup(volume);
+
+ loc_copy(&args->loc, loc);
+ args->cmd = cmd;
+ args->lock = *lock;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_inodelk_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_finodelk_store(default_args_t *args, const char *volume, fd_t *fd,
+ int32_t cmd, struct gf_flock *lock, dict_t *xdata)
+{
+ if (fd)
+ args->fd = fd_ref(fd);
+
+ if (volume)
+ args->volume = gf_strdup(volume);
+
+ args->cmd = cmd;
+ args->lock = *lock;
+
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_finodelk_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_entrylk_store(default_args_t *args, const char *volume, loc_t *loc,
+ const char *name, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
+{
+ if (volume)
+ args->volume = gf_strdup(volume);
+
+ loc_copy(&args->loc, loc);
+
+ args->entrylkcmd = cmd;
+ args->entrylktype = type;
+
+ if (name)
+ args->name = gf_strdup(name);
+
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_entrylk_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_fentrylk_store(default_args_t *args, const char *volume, fd_t *fd,
+ const char *name, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
+{
+ if (volume)
+ args->volume = gf_strdup(volume);
+
+ if (fd)
+ args->fd = fd_ref(fd);
+ args->entrylkcmd = cmd;
+ args->entrylktype = type;
+ if (name)
+ args->name = gf_strdup(name);
+
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_fentrylk_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_readdirp_store(default_args_t *args, fd_t *fd, size_t size, off_t off,
+ dict_t *xdata)
+{
+ args->fd = fd_ref(fd);
+ args->size = size;
+ args->offset = off;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_readdirp_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, gf_dirent_t *entries, dict_t *xdata)
+{
+ gf_dirent_t *stub_entry = NULL, *entry = NULL;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (op_ret > 0) {
+ list_for_each_entry(entry, &entries->list, list)
+ {
+ stub_entry = gf_dirent_for_name(entry->d_name);
+ if (!stub_entry)
+ goto out;
+ stub_entry->d_off = entry->d_off;
+ stub_entry->d_ino = entry->d_ino;
+ stub_entry->d_stat = entry->d_stat;
+ stub_entry->d_type = entry->d_type;
+ if (entry->inode)
+ stub_entry->inode = inode_ref(entry->inode);
+ if (entry->dict)
+ stub_entry->dict = dict_ref(entry->dict);
+ list_add_tail(&stub_entry->list, &args->entries.list);
+ }
+ }
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+out:
+ return 0;
+}
+
+int
+args_readdir_store(default_args_t *args, fd_t *fd, size_t size, off_t off,
+ dict_t *xdata)
+{
+ args->fd = fd_ref(fd);
+ args->size = size;
+ args->offset = off;
+
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_readdir_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, gf_dirent_t *entries, dict_t *xdata)
+{
+ gf_dirent_t *stub_entry = NULL, *entry = NULL;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (op_ret > 0) {
+ list_for_each_entry(entry, &entries->list, list)
+ {
+ stub_entry = gf_dirent_for_name(entry->d_name);
+ if (!stub_entry)
+ goto out;
+ stub_entry->d_off = entry->d_off;
+ stub_entry->d_ino = entry->d_ino;
+ stub_entry->d_type = entry->d_type;
+ list_add_tail(&stub_entry->list, &args->entries.list);
+ }
+ }
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+out:
+ return 0;
+}
+
+int
+args_rchecksum_store(default_args_t *args, fd_t *fd, off_t offset, int32_t len,
+ dict_t *xdata)
+{
+ args->fd = fd_ref(fd);
+ args->offset = offset;
+ args->size = len;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_rchecksum_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, uint32_t weak_checksum,
+ uint8_t *strong_checksum, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (op_ret >= 0) {
+ args->weak_checksum = weak_checksum;
+ args->strong_checksum = gf_memdup(strong_checksum,
+ SHA256_DIGEST_LENGTH);
+ }
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_xattrop_store(default_args_t *args, loc_t *loc, gf_xattrop_flags_t optype,
+ dict_t *xattr, dict_t *xdata)
+{
+ loc_copy(&args->loc, loc);
+
+ args->optype = optype;
+ args->xattr = dict_ref(xattr);
+
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_xattrop_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, dict_t *xattr, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xattr)
+ args->xattr = dict_ref(xattr);
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_fxattrop_store(default_args_t *args, fd_t *fd, gf_xattrop_flags_t optype,
+ dict_t *xattr, dict_t *xdata)
+{
+ args->fd = fd_ref(fd);
+
+ args->optype = optype;
+ args->xattr = dict_ref(xattr);
+
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_fxattrop_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, dict_t *xattr, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xattr)
+ args->xattr = dict_ref(xattr);
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_setattr_store(default_args_t *args, loc_t *loc, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
+{
+ loc_copy(&args->loc, loc);
+
+ if (stbuf)
+ args->stat = *stbuf;
+
+ args->valid = valid;
+
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_setattr_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (statpre)
+ args->prestat = *statpre;
+ if (statpost)
+ args->poststat = *statpost;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_fsetattr_store(default_args_t *args, fd_t *fd, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
+{
+ if (fd)
+ args->fd = fd_ref(fd);
+
+ if (stbuf)
+ args->stat = *stbuf;
+
+ args->valid = valid;
+
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+int
+args_fsetattr_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (statpre)
+ args->prestat = *statpre;
+ if (statpost)
+ args->poststat = *statpost;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_fallocate_store(default_args_t *args, fd_t *fd, int32_t mode, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ if (fd)
+ args->fd = fd_ref(fd);
+
+ args->flags = mode;
+ args->offset = offset;
+ args->size = len;
+
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_fallocate_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (statpre)
+ args->prestat = *statpre;
+ if (statpost)
+ args->poststat = *statpost;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_discard_store(default_args_t *args, fd_t *fd, off_t offset, size_t len,
+ dict_t *xdata)
+{
+ if (fd)
+ args->fd = fd_ref(fd);
+
+ args->offset = offset;
+ args->size = len;
+
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_discard_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (statpre)
+ args->prestat = *statpre;
+ if (statpost)
+ args->poststat = *statpost;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_zerofill_store(default_args_t *args, fd_t *fd, off_t offset, off_t len,
+ dict_t *xdata)
+{
+ if (fd)
+ args->fd = fd_ref(fd);
+
+ args->offset = offset;
+ args->size = len;
+
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_zerofill_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (statpre)
+ args->prestat = *statpre;
+ if (statpost)
+ args->poststat = *statpost;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_ipc_store(default_args_t *args, int32_t op, dict_t *xdata)
+{
+ args->cmd = op;
+
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_ipc_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_seek_store(default_args_t *args, fd_t *fd, off_t offset,
+ gf_seek_what_t what, dict_t *xdata)
+{
+ if (fd)
+ args->fd = fd_ref(fd);
+
+ args->offset = offset;
+ args->what = what;
+
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_seek_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ off_t offset, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ args->offset = offset;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_getactivelk_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, lock_migration_info_t *locklist,
+ dict_t *xdata)
+{
+ lock_migration_info_t *stub_entry = NULL, *entry = NULL;
+ int ret = 0;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ /*op_ret needs to carry the number of locks present in the list*/
+ if (op_ret > 0) {
+ list_for_each_entry(entry, &locklist->list, list)
+ {
+ stub_entry = GF_CALLOC(1, sizeof(*stub_entry), gf_common_mt_char);
+ if (!stub_entry) {
+ ret = -1;
+ goto out;
+ }
+
+ INIT_LIST_HEAD(&stub_entry->list);
+ stub_entry->flock = entry->flock;
+
+ stub_entry->lk_flags = entry->lk_flags;
+
+ stub_entry->client_uid = gf_strdup(entry->client_uid);
+ if (!stub_entry->client_uid) {
+ GF_FREE(stub_entry);
+ ret = -1;
+ goto out;
+ }
+
+ list_add_tail(&stub_entry->list, &args->locklist.list);
+ }
+ }
+
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+out:
+ return ret;
+}
+
+int
+args_setactivelk_store(default_args_t *args, loc_t *loc,
+ lock_migration_info_t *locklist, dict_t *xdata)
+{
+ lock_migration_info_t *stub_entry = NULL, *entry = NULL;
+ int ret = 0;
+
+ list_for_each_entry(entry, &locklist->list, list)
+ {
+ stub_entry = GF_CALLOC(1, sizeof(*stub_entry), gf_common_mt_lock_mig);
+ if (!stub_entry) {
+ ret = -1;
+ goto out;
+ }
+
+ INIT_LIST_HEAD(&stub_entry->list);
+ stub_entry->flock = entry->flock;
+
+ stub_entry->lk_flags = entry->lk_flags;
+
+ stub_entry->client_uid = gf_strdup(entry->client_uid);
+ if (!stub_entry->client_uid) {
+ GF_FREE(stub_entry);
+ ret = -1;
+ goto out;
+ }
+
+ list_add_tail(&stub_entry->list, &args->locklist.list);
+ }
+
+ loc_copy(&args->loc, loc);
+
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+out:
+ return ret;
+}
+
+void
+args_lease_store(default_args_t *args, loc_t *loc, struct gf_lease *lease,
+ dict_t *xdata)
+{
+ loc_copy(&args->loc, loc);
+ args->lease = *lease;
+
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return;
+}
+
+void
+args_lease_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ struct gf_lease *lease, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (op_ret == 0)
+ args->lease = *lease;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+}
+
+int
+args_icreate_store(default_args_t *args, loc_t *loc, mode_t mode, dict_t *xdata)
+{
+ loc_copy(&args->loc, loc);
+ args->mode = mode;
+
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_namelink_store(default_args_t *args, loc_t *loc, dict_t *xdata)
+{
+ loc_copy(&args->loc, loc);
+
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_copy_file_range_store(default_args_t *args, fd_t *fd_in, off64_t off_in,
+ fd_t *fd_out, off64_t off_out, size_t len,
+ uint32_t flags, dict_t *xdata)
+{
+ if (fd_in)
+ args->fd = fd_ref(fd_in);
+ if (fd_out)
+ args->fd_dst = fd_ref(fd_out);
+ args->size = len;
+ args->off_in = off_in;
+ args->off_out = off_out;
+ args->flags = flags;
+
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_copy_file_range_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, struct iatt *stbuf,
+ struct iatt *prebuf_dst,
+ struct iatt *postbuf_dst, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (op_ret >= 0) {
+ if (postbuf_dst)
+ args->poststat = *postbuf_dst;
+ if (prebuf_dst)
+ args->prestat = *prebuf_dst;
+ if (stbuf)
+ args->stat = *stbuf;
+ }
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+void
+args_cbk_wipe(default_args_cbk_t *args_cbk)
+{
+ if (!args_cbk)
+ return;
+ if (args_cbk->inode)
+ inode_unref(args_cbk->inode);
+
+ GF_FREE((char *)args_cbk->buf);
+
+ GF_FREE(args_cbk->vector);
+
+ if (args_cbk->iobref)
+ iobref_unref(args_cbk->iobref);
+
+ if (args_cbk->fd)
+ fd_unref(args_cbk->fd);
+
+ if (args_cbk->xattr)
+ dict_unref(args_cbk->xattr);
+
+ GF_FREE(args_cbk->strong_checksum);
+
+ if (args_cbk->xdata)
+ dict_unref(args_cbk->xdata);
+
+ if (!list_empty(&args_cbk->entries.list))
+ gf_dirent_free(&args_cbk->entries);
+}
+
+void
+args_wipe(default_args_t *args)
+{
+ if (!args)
+ return;
+
+ loc_wipe(&args->loc);
+
+ loc_wipe(&args->loc2);
+
+ if (args->fd)
+ fd_unref(args->fd);
+
+ GF_FREE((char *)args->linkname);
+
+ GF_FREE(args->vector);
+
+ if (args->iobref)
+ iobref_unref(args->iobref);
+
+ if (args->xattr)
+ dict_unref(args->xattr);
+
+ if (args->xdata)
+ dict_unref(args->xdata);
+
+ GF_FREE((char *)args->name);
+
+ GF_FREE((char *)args->volume);
+}
+
+void
+args_cbk_init(default_args_cbk_t *args_cbk)
+{
+ INIT_LIST_HEAD(&args_cbk->entries);
+}
diff --git a/libglusterfs/src/defaults-tmpl.c b/libglusterfs/src/defaults-tmpl.c
new file mode 100644
index 00000000000..3cf707f42aa
--- /dev/null
+++ b/libglusterfs/src/defaults-tmpl.c
@@ -0,0 +1,247 @@
+/*
+ Copyright (c) 2008-2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/* libglusterfs/src/defaults.c:
+ This file contains functions, which are used to fill the 'fops', 'cbk'
+ structures in the xlator structures, if they are not written. Here, all the
+ function calls are plainly forwarded to the first child of the xlator, and
+ all the *_cbk function does plain STACK_UNWIND of the frame, and returns.
+
+ This function also implements *_resume () functions, which does same
+ operation as a fop().
+
+ All the functions are plain enough to understand.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs/xlator.h"
+#include "glusterfs/defaults.h"
+
+#pragma generate
+
+struct xlator_fops _default_fops = {
+ .create = default_create,
+ .open = default_open,
+ .stat = default_stat,
+ .readlink = default_readlink,
+ .mknod = default_mknod,
+ .mkdir = default_mkdir,
+ .unlink = default_unlink,
+ .rmdir = default_rmdir,
+ .symlink = default_symlink,
+ .rename = default_rename,
+ .link = default_link,
+ .truncate = default_truncate,
+ .readv = default_readv,
+ .writev = default_writev,
+ .statfs = default_statfs,
+ .flush = default_flush,
+ .fsync = default_fsync,
+ .setxattr = default_setxattr,
+ .getxattr = default_getxattr,
+ .fsetxattr = default_fsetxattr,
+ .fgetxattr = default_fgetxattr,
+ .removexattr = default_removexattr,
+ .fremovexattr = default_fremovexattr,
+ .opendir = default_opendir,
+ .readdir = default_readdir,
+ .readdirp = default_readdirp,
+ .fsyncdir = default_fsyncdir,
+ .access = default_access,
+ .ftruncate = default_ftruncate,
+ .fstat = default_fstat,
+ .lk = default_lk,
+ .inodelk = default_inodelk,
+ .finodelk = default_finodelk,
+ .entrylk = default_entrylk,
+ .fentrylk = default_fentrylk,
+ .lookup = default_lookup,
+ .rchecksum = default_rchecksum,
+ .xattrop = default_xattrop,
+ .fxattrop = default_fxattrop,
+ .setattr = default_setattr,
+ .fsetattr = default_fsetattr,
+ .fallocate = default_fallocate,
+ .discard = default_discard,
+ .zerofill = default_zerofill,
+ .ipc = default_ipc,
+ .seek = default_seek,
+
+ .getspec = default_getspec,
+ .getactivelk = default_getactivelk,
+ .setactivelk = default_setactivelk,
+ .put = default_put,
+ .icreate = default_icreate,
+ .namelink = default_namelink,
+ .copy_file_range = default_copy_file_range,
+};
+struct xlator_fops *default_fops = &_default_fops;
+
+/*
+ * Remaining functions don't follow the fop calling conventions, so they're
+ * not generated.
+ */
+
+int32_t
+default_forget(xlator_t *this, inode_t *inode)
+{
+ gf_log_callingfn(this->name, GF_LOG_DEBUG,
+ "xlator does not "
+ "implement forget_cbk");
+ return 0;
+}
+
+int32_t
+default_releasedir(xlator_t *this, fd_t *fd)
+{
+ gf_log_callingfn(this->name, GF_LOG_DEBUG,
+ "xlator does not "
+ "implement releasedir_cbk");
+ return 0;
+}
+
+int32_t
+default_release(xlator_t *this, fd_t *fd)
+{
+ gf_log_callingfn(this->name, GF_LOG_DEBUG,
+ "xlator does not "
+ "implement release_cbk");
+ return 0;
+}
+
+/* notify */
+int
+default_notify(xlator_t *this, int32_t event, void *data, ...)
+{
+ GF_UNUSED int ret = 0;
+ xlator_t *victim = data;
+
+ glusterfs_graph_t *graph = NULL;
+
+ GF_VALIDATE_OR_GOTO("notify", this, out);
+ graph = this->graph;
+ GF_VALIDATE_OR_GOTO(this->name, graph, out);
+
+ switch (event) {
+ case GF_EVENT_PARENT_UP:
+ case GF_EVENT_PARENT_DOWN: {
+ xlator_list_t *list = this->children;
+
+ while (list) {
+ if (victim && victim->cleanup_starting)
+ xlator_notify(list->xlator, event, victim);
+ else
+ xlator_notify(list->xlator, event, this);
+ list = list->next;
+ }
+ } break;
+ case GF_EVENT_CHILD_CONNECTING:
+ case GF_EVENT_CHILD_DOWN:
+ case GF_EVENT_CHILD_UP:
+ case GF_EVENT_AUTH_FAILED: {
+ xlator_list_t *parent = this->parents;
+
+ /*
+ * Handle case of CHILD_* & AUTH_FAILED event specially, send
+ * it to fuse.
+ */
+ if (!parent && this->ctx && this->ctx->master) {
+ xlator_notify(this->ctx->master, event, this->graph, NULL);
+ }
+
+ while (parent) {
+ if (parent->xlator->init_succeeded)
+ xlator_notify(parent->xlator, event, this, NULL);
+ parent = parent->next;
+ }
+
+ if (event == GF_EVENT_CHILD_DOWN &&
+ !(this->ctx && this->ctx->master) && (graph->top == this)) {
+ /* Make sure this is not a daemon with master xlator */
+ pthread_mutex_lock(&graph->mutex);
+ {
+ if (graph->parent_down ==
+ graph_total_client_xlator(graph)) {
+ graph->used = 0;
+ pthread_cond_broadcast(&graph->child_down_cond);
+ }
+ }
+ pthread_mutex_unlock(&graph->mutex);
+ }
+ } break;
+ case GF_EVENT_UPCALL: {
+ xlator_list_t *parent = this->parents;
+
+ if (!parent && this->ctx && this->ctx->master)
+ xlator_notify(this->ctx->master, event, data, NULL);
+
+ while (parent) {
+ if (parent->xlator->init_succeeded)
+ xlator_notify(parent->xlator, event, data, NULL);
+ parent = parent->next;
+ }
+ } break;
+ case GF_EVENT_CHILD_PING: {
+ xlator_list_t *parent = this->parents;
+
+ while (parent) {
+ if (parent->xlator->init_succeeded)
+ XLATOR_NOTIFY(ret, parent->xlator, event, this, data);
+ parent = parent->next;
+ }
+ } break;
+ case GF_EVENT_CLEANUP: {
+ xlator_list_t *list = this->children;
+
+ while (list) {
+ xlator_notify(list->xlator, event, this);
+ list = list->next;
+ }
+ } break;
+
+ default: {
+ xlator_list_t *parent = this->parents;
+
+ while (parent) {
+ if (parent->xlator->init_succeeded)
+ xlator_notify(parent->xlator, event, this, NULL);
+ parent = parent->next;
+ }
+ }
+ /*
+ * Apparently our picky-about-everything else coding standard allows
+ * adjacent same-indendation-level close braces. Clearly it has
+ * nothing to do with readability.
+ */
+ }
+out:
+ return 0;
+}
+
+int32_t
+default_mem_acct_init(xlator_t *this)
+{
+ int ret = -1;
+
+ ret = xlator_mem_acct_init(this, gf_common_mt_end);
+
+ return ret;
+}
+
+void
+default_fini(xlator_t *this)
+{
+ if (this && this->private)
+ GF_FREE(this->private);
+}
diff --git a/libglusterfs/src/defaults.c b/libglusterfs/src/defaults.c
deleted file mode 100644
index bf4d019349d..00000000000
--- a/libglusterfs/src/defaults.c
+++ /dev/null
@@ -1,1354 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-/* libglusterfs/src/defaults.c:
- This file contains functions, which are used to fill the 'fops', 'cbk'
- structures in the xlator structures, if they are not written. Here, all the
- function calls are plainly forwared to the first child of the xlator, and
- all the *_cbk function does plain STACK_UNWIND of the frame, and returns.
-
- This function also implements *_resume () functions, which does same
- operation as a fop().
-
- All the functions are plain enough to understand.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "xlator.h"
-
-/* _CBK function section */
-
-int32_t
-default_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *xdata, struct iatt *postparent)
-{
- STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf,
- xdata, postparent);
- return 0;
-}
-
-int32_t
-default_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf, xdata);
- return 0;
-}
-
-
-int32_t
-default_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, prebuf,
- postbuf, xdata);
- return 0;
-}
-
-int32_t
-default_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, prebuf,
- postbuf, xdata);
- return 0;
-}
-
-int32_t
-default_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (access, frame, op_ret, op_errno, xdata);
- return 0;
-}
-
-int32_t
-default_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, const char *path,
- struct iatt *buf, dict_t *xdata)
-{
- STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, path, buf,
- xdata);
- return 0;
-}
-
-
-int32_t
-default_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno, inode,
- buf, preparent, postparent, xdata);
- return 0;
-}
-
-int32_t
-default_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno, inode,
- buf, preparent, postparent, xdata);
- return 0;
-}
-
-int32_t
-default_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno, preparent,
- postparent, xdata);
- return 0;
-}
-
-int32_t
-default_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno, preparent,
- postparent, xdata);
- return 0;
-}
-
-
-int32_t
-default_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, inode, buf,
- preparent, postparent, xdata);
- return 0;
-}
-
-
-int32_t
-default_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (rename, frame, op_ret, op_errno, buf, preoldparent,
- postoldparent, prenewparent, postnewparent, xdata);
- return 0;
-}
-
-
-int32_t
-default_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (link, frame, op_ret, op_errno, inode, buf,
- preparent, postparent, xdata);
- return 0;
-}
-
-
-int32_t
-default_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
- preparent, postparent, xdata);
- return 0;
-}
-
-int32_t
-default_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
- return 0;
-}
-
-int32_t
-default_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct iatt *stbuf, struct iobref *iobref,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, count,
- stbuf, iobref, xdata);
- return 0;
-}
-
-
-int32_t
-default_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf, xdata);
- return 0;
-}
-
-
-int32_t
-default_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, xdata);
- return 0;
-}
-
-
-
-int32_t
-default_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf,
- xdata);
- return 0;
-}
-
-int32_t
-default_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, buf, xdata);
- return 0;
-}
-
-int32_t
-default_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd, xdata);
- return 0;
-}
-
-int32_t
-default_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno, xdata);
- return 0;
-}
-
-int32_t
-default_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct statvfs *buf,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, buf, xdata);
- return 0;
-}
-
-
-int32_t
-default_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xdata);
- return 0;
-}
-
-
-int32_t
-default_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, xdata);
- return 0;
-}
-
-
-
-int32_t
-default_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict, xdata);
- return 0;
-}
-
-
-int32_t
-default_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata);
- return 0;
-}
-
-int32_t
-default_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, dict, xdata);
- return 0;
-}
-
-int32_t
-default_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (fxattrop, frame, op_ret, op_errno, dict, xdata);
- return 0;
-}
-
-
-int32_t
-default_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, xdata);
- return 0;
-}
-
-
-int32_t
-default_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (fremovexattr, frame, op_ret, op_errno, xdata);
- return 0;
-}
-
-int32_t
-default_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, lock, xdata);
- return 0;
-}
-
-int32_t
-default_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno, xdata);
- return 0;
-}
-
-
-int32_t
-default_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (finodelk, frame, op_ret, op_errno, xdata);
- return 0;
-}
-
-int32_t
-default_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (entrylk, frame, op_ret, op_errno, xdata);
- return 0;
-}
-
-int32_t
-default_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (fentrylk, frame, op_ret, op_errno, xdata);
- return 0;
-}
-
-
-int32_t
-default_rchecksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, uint32_t weak_checksum,
- uint8_t *strong_checksum,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (rchecksum, frame, op_ret, op_errno, weak_checksum,
- strong_checksum, xdata);
- return 0;
-}
-
-
-int32_t
-default_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, entries, xdata);
- return 0;
-}
-
-
-int32_t
-default_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, xdata);
- return 0;
-}
-
-int32_t
-default_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *statpre,
- struct iatt *statpost,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, statpre,
- statpost, xdata);
- return 0;
-}
-
-int32_t
-default_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *statpre,
- struct iatt *statpost,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (fsetattr, frame, op_ret, op_errno, statpre,
- statpost, xdata);
- return 0;
-}
-
-int32_t
-default_getspec_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, char *spec_data)
-{
- STACK_UNWIND_STRICT (getspec, frame, op_ret, op_errno, spec_data);
- return 0;
-}
-
-/* RESUME */
-
-int32_t
-default_fgetxattr_resume (call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name, dict_t *xdata)
-{
- STACK_WIND (frame, default_fgetxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata);
- return 0;
-}
-
-int32_t
-default_fsetxattr_resume (call_frame_t *frame, xlator_t *this, fd_t *fd,
- dict_t *dict, int32_t flags, dict_t *xdata)
-{
- STACK_WIND (frame, default_fsetxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
- return 0;
-}
-
-int32_t
-default_setxattr_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *dict, int32_t flags, dict_t *xdata)
-{
- STACK_WIND (frame, default_setxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, xdata);
- return 0;
-}
-
-int32_t
-default_statfs_resume (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
-{
- STACK_WIND (frame, default_statfs_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->statfs, loc, xdata);
- return 0;
-}
-
-int32_t
-default_fsyncdir_resume (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t flags, dict_t *xdata)
-{
- STACK_WIND (frame, default_fsyncdir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsyncdir, fd, flags, xdata);
- return 0;
-}
-
-int32_t
-default_opendir_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
- fd_t *fd, dict_t *xdata)
-{
- STACK_WIND (frame, default_opendir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->opendir, loc, fd, xdata);
- return 0;
-}
-
-int32_t
-default_fstat_resume (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
-{
- STACK_WIND (frame, default_fstat_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat, fd, xdata);
- return 0;
-}
-
-int32_t
-default_fsync_resume (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t flags, dict_t *xdata)
-{
- STACK_WIND (frame, default_fsync_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsync, fd, flags, xdata);
- return 0;
-}
-
-int32_t
-default_flush_resume (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
-{
- STACK_WIND (frame, default_flush_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->flush, fd, xdata);
- return 0;
-}
-
-int32_t
-default_writev_resume (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int32_t count, off_t off,
- uint32_t flags, struct iobref *iobref, dict_t *xdata)
-{
- STACK_WIND (frame, default_writev_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->writev, fd, vector, count, off,
- flags, iobref, xdata);
- return 0;
-}
-
-int32_t
-default_readv_resume (call_frame_t *frame, xlator_t *this, fd_t *fd,
- size_t size, off_t offset, uint32_t flags, dict_t *xdata)
-{
- STACK_WIND (frame, default_readv_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata);
- return 0;
-}
-
-
-int32_t
-default_open_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flags, fd_t *fd, dict_t *xdata)
-{
- STACK_WIND (frame, default_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
- return 0;
-}
-
-int32_t
-default_create_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flags, mode_t mode, mode_t umask, fd_t *fd,
- dict_t *xdata)
-{
- STACK_WIND (frame, default_create_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create, loc, flags, mode, umask,
- fd, xdata);
- return 0;
-}
-
-int32_t
-default_link_resume (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
- loc_t *newloc, dict_t *xdata)
-{
- STACK_WIND (frame, default_link_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
- return 0;
-}
-
-int32_t
-default_rename_resume (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
- loc_t *newloc, dict_t *xdata)
-{
- STACK_WIND (frame, default_rename_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
- return 0;
-}
-
-
-int
-default_symlink_resume (call_frame_t *frame, xlator_t *this,
- const char *linkpath, loc_t *loc, mode_t umask,
- dict_t *xdata)
-{
- STACK_WIND (frame, default_symlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->symlink, linkpath, loc, umask,
- xdata);
- return 0;
-}
-
-int32_t
-default_rmdir_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int flags, dict_t *xdata)
-{
- STACK_WIND (frame, default_rmdir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rmdir, loc, flags, xdata);
- return 0;
-}
-
-int32_t
-default_unlink_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int xflag, dict_t *xdata)
-{
- STACK_WIND (frame, default_unlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
- return 0;
-}
-
-int
-default_mkdir_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
- mode_t mode, mode_t umask, dict_t *xdata)
-{
- STACK_WIND (frame, default_mkdir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata);
- return 0;
-}
-
-
-int
-default_mknod_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
- mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata)
-{
- STACK_WIND (frame, default_mknod_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask,
- xdata);
- return 0;
-}
-
-int32_t
-default_readlink_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
- size_t size, dict_t *xdata)
-{
- STACK_WIND (frame, default_readlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readlink, loc, size, xdata);
- return 0;
-}
-
-
-int32_t
-default_access_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t mask, dict_t *xdata)
-{
- STACK_WIND (frame, default_access_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->access, loc, mask, xdata);
- return 0;
-}
-
-int32_t
-default_ftruncate_resume (call_frame_t *frame, xlator_t *this, fd_t *fd,
- off_t offset, dict_t *xdata)
-{
- STACK_WIND (frame, default_ftruncate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
- return 0;
-}
-
-int32_t
-default_getxattr_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name, dict_t *xdata)
-{
- STACK_WIND (frame, default_getxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
- return 0;
-}
-
-
-int32_t
-default_xattrop_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
-{
- STACK_WIND (frame, default_xattrop_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->xattrop, loc, flags, dict, xdata);
- return 0;
-}
-
-int32_t
-default_fxattrop_resume (call_frame_t *frame, xlator_t *this, fd_t *fd,
- gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
-{
- STACK_WIND (frame, default_fxattrop_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fxattrop, fd, flags, dict, xdata);
- return 0;
-}
-
-int32_t
-default_removexattr_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name, dict_t *xdata)
-{
- STACK_WIND (frame, default_removexattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->removexattr, loc, name, xdata);
- return 0;
-}
-
-int32_t
-default_fremovexattr_resume (call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name, dict_t *xdata)
-{
- STACK_WIND (frame, default_fremovexattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata);
- return 0;
-}
-
-int32_t
-default_lk_resume (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t cmd, struct gf_flock *lock, dict_t *xdata)
-{
- STACK_WIND (frame, default_lk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lk, fd, cmd, lock, xdata);
- return 0;
-}
-
-
-int32_t
-default_inodelk_resume (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd,
- struct gf_flock *lock,
- dict_t *xdata)
-{
- STACK_WIND (frame, default_inodelk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->inodelk,
- volume, loc, cmd, lock, xdata);
- return 0;
-}
-
-int32_t
-default_finodelk_resume (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd,
- struct gf_flock *lock,
- dict_t *xdata)
-{
- STACK_WIND (frame, default_finodelk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->finodelk,
- volume, fd, cmd, lock, xdata);
- return 0;
-}
-
-int32_t
-default_entrylk_resume (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type,
- dict_t *xdata)
-{
- STACK_WIND (frame, default_entrylk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->entrylk,
- volume, loc, basename, cmd, type, xdata);
- return 0;
-}
-
-int32_t
-default_fentrylk_resume (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type,
- dict_t *xdata)
-{
- STACK_WIND (frame, default_fentrylk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fentrylk,
- volume, fd, basename, cmd, type, xdata);
- return 0;
-}
-
-int32_t
-default_rchecksum_resume (call_frame_t *frame, xlator_t *this, fd_t *fd,
- off_t offset, int32_t len,
- dict_t *xdata)
-{
- STACK_WIND (frame, default_rchecksum_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rchecksum, fd, offset, len, xdata);
- return 0;
-}
-
-
-int32_t
-default_readdir_resume (call_frame_t *frame, xlator_t *this, fd_t *fd,
- size_t size, off_t off,
- dict_t *xdata)
-{
- STACK_WIND (frame, default_readdir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readdir, fd, size, off, xdata);
- return 0;
-}
-
-
-int32_t
-default_readdirp_resume (call_frame_t *frame, xlator_t *this, fd_t *fd,
- size_t size, off_t off, dict_t *xdata)
-{
- STACK_WIND (frame, default_readdirp_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readdirp, fd, size, off, xdata);
- return 0;
-}
-
-int32_t
-default_setattr_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid,
- dict_t *xdata)
-{
- STACK_WIND (frame, default_setattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->setattr, loc, stbuf, valid, xdata);
- return 0;
-}
-
-int32_t
-default_truncate_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
- off_t offset,
- dict_t *xdata)
-{
- STACK_WIND (frame, default_truncate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
- return 0;
-}
-
-int32_t
-default_stat_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *xdata)
-{
- STACK_WIND (frame, default_stat_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->stat, loc, xdata);
- return 0;
-}
-
-int32_t
-default_lookup_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *xdata)
-{
- STACK_WIND (frame, default_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, xdata);
- return 0;
-}
-
-int32_t
-default_fsetattr_resume (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iatt *stbuf, int32_t valid,
- dict_t *xdata)
-{
- STACK_WIND (frame, default_fsetattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fsetattr, fd, stbuf, valid, xdata);
- return 0;
-}
-
-/* FOPS */
-
-int32_t
-default_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name, dict_t *xdata)
-{
- STACK_WIND (frame, default_fgetxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata);
- return 0;
-}
-
-int32_t
-default_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
- int32_t flags, dict_t *xdata)
-{
- STACK_WIND (frame, default_fsetxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
- return 0;
-}
-
-int32_t
-default_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
- int32_t flags, dict_t *xdata)
-{
- STACK_WIND (frame, default_setxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, xdata);
- return 0;
-}
-
-int32_t
-default_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
-{
- STACK_WIND (frame, default_statfs_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->statfs, loc, xdata);
- return 0;
-}
-
-int32_t
-default_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, dict_t *xdata)
-{
- STACK_WIND (frame, default_fsyncdir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsyncdir, fd, flags, xdata);
- return 0;
-}
-
-int32_t
-default_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, dict_t *xdata)
-{
- STACK_WIND (frame, default_opendir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->opendir, loc, fd, xdata);
- return 0;
-}
-
-int32_t
-default_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
-{
- STACK_WIND (frame, default_fstat_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat, fd, xdata);
- return 0;
-}
-
-int32_t
-default_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, dict_t *xdata)
-{
- STACK_WIND (frame, default_fsync_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsync, fd, flags, xdata);
- return 0;
-}
-
-int32_t
-default_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
-{
- STACK_WIND (frame, default_flush_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->flush, fd, xdata);
- return 0;
-}
-
-int32_t
-default_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int32_t count, off_t off, uint32_t flags,
- struct iobref *iobref, dict_t *xdata)
-{
- STACK_WIND (frame, default_writev_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->writev, fd, vector, count, off,
- flags, iobref, xdata);
- return 0;
-}
-
-int32_t
-default_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset, uint32_t flags, dict_t *xdata)
-{
- STACK_WIND (frame, default_readv_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata);
- return 0;
-}
-
-
-int32_t
-default_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, dict_t *xdata)
-{
- STACK_WIND (frame, default_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
- return 0;
-}
-
-int32_t
-default_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
-{
- STACK_WIND (frame, default_create_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create, loc, flags, mode, umask,
- fd, xdata);
- return 0;
-}
-
-int32_t
-default_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
- dict_t *xdata)
-{
- STACK_WIND (frame, default_link_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
- return 0;
-}
-
-int32_t
-default_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
- loc_t *newloc, dict_t *xdata)
-{
- STACK_WIND (frame, default_rename_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
- return 0;
-}
-
-
-int
-default_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
- loc_t *loc, mode_t umask, dict_t *xdata)
-{
- STACK_WIND (frame, default_symlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->symlink, linkpath, loc, umask,
- xdata);
- return 0;
-}
-
-int32_t
-default_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
- dict_t *xdata)
-{
- STACK_WIND (frame, default_rmdir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rmdir, loc, flags, xdata);
- return 0;
-}
-
-int32_t
-default_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
- dict_t *xdata)
-{
- STACK_WIND (frame, default_unlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
- return 0;
-}
-
-int
-default_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- mode_t umask, dict_t *xdata)
-{
- STACK_WIND (frame, default_mkdir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata);
- return 0;
-}
-
-
-int
-default_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dev_t rdev, mode_t umask, dict_t *xdata)
-{
- STACK_WIND (frame, default_mknod_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask,
- xdata);
- return 0;
-}
-
-int32_t
-default_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size, dict_t *xdata)
-{
- STACK_WIND (frame, default_readlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readlink, loc, size, xdata);
- return 0;
-}
-
-
-int32_t
-default_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask, dict_t *xdata)
-{
- STACK_WIND (frame, default_access_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->access, loc, mask, xdata);
- return 0;
-}
-
-int32_t
-default_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, dict_t *xdata)
-{
- STACK_WIND (frame, default_ftruncate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
- return 0;
-}
-
-int32_t
-default_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name, dict_t *xdata)
-{
- STACK_WIND (frame, default_getxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
- return 0;
-}
-
-
-int32_t
-default_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
-{
- STACK_WIND (frame, default_xattrop_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->xattrop, loc, flags, dict, xdata);
- return 0;
-}
-
-int32_t
-default_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
- gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
-{
- STACK_WIND (frame, default_fxattrop_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fxattrop, fd, flags, dict, xdata);
- return 0;
-}
-
-int32_t
-default_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name, dict_t *xdata)
-{
- STACK_WIND (frame, default_removexattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->removexattr, loc, name, xdata);
- return 0;
-}
-
-int32_t
-default_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name, dict_t *xdata)
-{
- STACK_WIND (frame, default_fremovexattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata);
- return 0;
-}
-
-int32_t
-default_lk (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t cmd, struct gf_flock *lock, dict_t *xdata)
-{
- STACK_WIND (frame, default_lk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lk, fd, cmd, lock, xdata);
- return 0;
-}
-
-
-int32_t
-default_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd,
- struct gf_flock *lock,
- dict_t *xdata)
-{
- STACK_WIND (frame, default_inodelk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->inodelk,
- volume, loc, cmd, lock, xdata);
- return 0;
-}
-
-int32_t
-default_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *lock,
- dict_t *xdata)
-{
- STACK_WIND (frame, default_finodelk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->finodelk,
- volume, fd, cmd, lock, xdata);
- return 0;
-}
-
-int32_t
-default_entrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type,
- dict_t *xdata)
-{
- STACK_WIND (frame, default_entrylk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->entrylk,
- volume, loc, basename, cmd, type, xdata);
- return 0;
-}
-
-int32_t
-default_fentrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type,
- dict_t *xdata)
-{
- STACK_WIND (frame, default_fentrylk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fentrylk,
- volume, fd, basename, cmd, type, xdata);
- return 0;
-}
-
-int32_t
-default_rchecksum (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- int32_t len,
- dict_t *xdata)
-{
- STACK_WIND (frame, default_rchecksum_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rchecksum, fd, offset, len, xdata);
- return 0;
-}
-
-
-int32_t
-default_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
- size_t size, off_t off,
- dict_t *xdata)
-{
- STACK_WIND (frame, default_readdir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readdir, fd, size, off, xdata);
- return 0;
-}
-
-
-int32_t
-default_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd,
- size_t size, off_t off, dict_t *xdata)
-{
- STACK_WIND (frame, default_readdirp_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readdirp, fd, size, off, xdata);
- return 0;
-}
-
-int32_t
-default_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid,
- dict_t *xdata)
-{
- STACK_WIND (frame, default_setattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->setattr, loc, stbuf, valid, xdata);
- return 0;
-}
-
-int32_t
-default_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
- dict_t *xdata)
-{
- STACK_WIND (frame, default_truncate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
- return 0;
-}
-
-int32_t
-default_stat (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *xdata)
-{
- STACK_WIND (frame, default_stat_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->stat, loc, xdata);
- return 0;
-}
-
-int32_t
-default_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *xdata)
-{
- STACK_WIND (frame, default_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, xdata);
- return 0;
-}
-
-int32_t
-default_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iatt *stbuf, int32_t valid,
- dict_t *xdata)
-{
- STACK_WIND (frame, default_fsetattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fsetattr, fd, stbuf, valid, xdata);
- return 0;
-}
-
-
-int32_t
-default_forget (xlator_t *this, inode_t *inode)
-{
- gf_log_callingfn (this->name, GF_LOG_WARNING, "xlator does not "
- "implement forget_cbk");
- return 0;
-}
-
-
-int32_t
-default_releasedir (xlator_t *this, fd_t *fd)
-{
- gf_log_callingfn (this->name, GF_LOG_WARNING, "xlator does not "
- "implement releasedir_cbk");
- return 0;
-}
-
-int32_t
-default_release (xlator_t *this, fd_t *fd)
-{
- gf_log_callingfn (this->name, GF_LOG_WARNING, "xlator does not "
- "implement release_cbk");
- return 0;
-}
-
-/* End of FOP/_CBK/_RESUME section */
-
-
-/* Management operations */
-
-int32_t
-default_getspec (call_frame_t *frame, xlator_t *this, const char *key,
- int32_t flags)
-{
- STACK_WIND (frame, default_getspec_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getspec, key, flags);
- return 0;
-}
-
-/* notify */
-int
-default_notify (xlator_t *this, int32_t event, void *data, ...)
-{
- switch (event)
- {
- case GF_EVENT_PARENT_UP:
- case GF_EVENT_PARENT_DOWN:
- {
- xlator_list_t *list = this->children;
-
- while (list) {
- xlator_notify (list->xlator, event, this);
- list = list->next;
- }
- }
- break;
- case GF_EVENT_CHILD_CONNECTING:
- case GF_EVENT_CHILD_MODIFIED:
- case GF_EVENT_CHILD_DOWN:
- case GF_EVENT_CHILD_UP:
- case GF_EVENT_AUTH_FAILED:
- {
- xlator_list_t *parent = this->parents;
- /* Handle case of CHILD_* & AUTH_FAILED event specially, send it to fuse */
- if (!parent && this->ctx && this->ctx->master)
- xlator_notify (this->ctx->master, event, this->graph, NULL);
-
- while (parent) {
- if (parent->xlator->init_succeeded)
- xlator_notify (parent->xlator, event,
- this, NULL);
- parent = parent->next;
- }
- }
- break;
- default:
- {
- xlator_list_t *parent = this->parents;
- while (parent) {
- if (parent->xlator->init_succeeded)
- xlator_notify (parent->xlator, event,
- this, NULL);
- parent = parent->next;
- }
- }
- }
-
- return 0;
-}
-
-int32_t
-default_mem_acct_init (xlator_t *this)
-{
- int ret = -1;
-
- ret = xlator_mem_acct_init (this, gf_common_mt_end);
-
- return ret;
-}
diff --git a/libglusterfs/src/defaults.h b/libglusterfs/src/defaults.h
deleted file mode 100644
index 8a9de7899d3..00000000000
--- a/libglusterfs/src/defaults.h
+++ /dev/null
@@ -1,673 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-/* libglusterfs/src/defaults.h:
- This file contains definition of default fops and mops functions.
-*/
-
-#ifndef _DEFAULTS_H
-#define _DEFAULTS_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "xlator.h"
-
-int32_t default_notify (xlator_t *this,
- int32_t event,
- void *data,
- ...);
-
-int32_t default_forget (xlator_t *this, inode_t *inode);
-
-int32_t default_release (xlator_t *this, fd_t *fd);
-
-int32_t default_releasedir (xlator_t *this, fd_t *fd);
-
-
-/* Management Operations */
-
-int32_t default_getspec (call_frame_t *frame,
- xlator_t *this,
- const char *key,
- int32_t flag);
-
-int32_t default_rchecksum (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd, off_t offset,
- int32_t len, dict_t *xdata);
-
-/* FileSystem operations */
-int32_t default_lookup (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *xdata);
-
-int32_t default_stat (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc, dict_t *xdata);
-
-int32_t default_fstat (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd, dict_t *xdata);
-
-int32_t default_truncate (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- off_t offset, dict_t *xdata);
-
-int32_t default_ftruncate (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- off_t offset, dict_t *xdata);
-
-int32_t default_access (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t mask, dict_t *xdata);
-
-int32_t default_readlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- size_t size, dict_t *xdata);
-
-int32_t default_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc,
- mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata);
-
-int32_t default_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata);
-
-int32_t default_unlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc, int xflag, dict_t *xdata);
-
-int32_t default_rmdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int xflag, dict_t *xdata);
-
-int32_t default_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkpath, loc_t *loc, mode_t umask,
- dict_t *xdata);
-
-int32_t default_rename (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc, dict_t *xdata);
-
-int32_t default_link (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc, dict_t *xdata);
-
-int32_t default_create (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, mode_t mode,
- mode_t umask, fd_t *fd, dict_t *xdata);
-
-int32_t default_open (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flags, fd_t *fd,
- dict_t *xdata);
-
-int32_t default_readv (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset,
- uint32_t flags, dict_t *xdata);
-
-int32_t default_writev (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- struct iovec *vector,
- int32_t count,
- off_t offset,
- uint32_t flags,
- struct iobref *iobref, dict_t *xdata);
-
-int32_t default_flush (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd, dict_t *xdata);
-
-int32_t default_fsync (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t datasync, dict_t *xdata);
-
-int32_t default_opendir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc, fd_t *fd, dict_t *xdata);
-
-int32_t default_fsyncdir (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t datasync, dict_t *xdata);
-
-int32_t default_statfs (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc, dict_t *xdata);
-
-int32_t default_setxattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *dict,
- int32_t flags, dict_t *xdata);
-
-int32_t default_getxattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name, dict_t *xdata);
-
-int32_t default_fsetxattr (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- dict_t *dict,
- int32_t flags, dict_t *xdata);
-
-int32_t default_fgetxattr (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- const char *name, dict_t *xdata);
-
-int32_t default_removexattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name, dict_t *xdata);
-
-int32_t default_fremovexattr (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- const char *name, dict_t *xdata);
-
-int32_t default_lk (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t cmd,
- struct gf_flock *flock, dict_t *xdata);
-
-int32_t default_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd,
- struct gf_flock *flock, dict_t *xdata);
-
-int32_t default_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd,
- struct gf_flock *flock, dict_t *xdata);
-
-int32_t default_entrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type, dict_t *xdata);
-
-int32_t default_fentrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type, dict_t *xdata);
-
-int32_t default_readdir (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size, off_t off, dict_t *xdata);
-
-int32_t default_readdirp (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size, off_t off, dict_t *xdata);
-
-int32_t default_xattrop (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- gf_xattrop_flags_t flags,
- dict_t *dict, dict_t *xdata);
-
-int32_t default_fxattrop (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- gf_xattrop_flags_t flags,
- dict_t *dict, dict_t *xdata);
-
-int32_t default_setattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- struct iatt *stbuf,
- int32_t valid, dict_t *xdata);
-
-int32_t default_fsetattr (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- struct iatt *stbuf,
- int32_t valid, dict_t *xdata);
-
-/* Resume */
-int32_t default_getspec_resume (call_frame_t *frame,
- xlator_t *this,
- const char *key,
- int32_t flag);
-
-int32_t default_rchecksum_resume (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd, off_t offset,
- int32_t len, dict_t *xdata);
-
-/* FileSystem operations */
-int32_t default_lookup_resume (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *xdata);
-
-int32_t default_stat_resume (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc, dict_t *xdata);
-
-int32_t default_fstat_resume (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd, dict_t *xdata);
-
-int32_t default_truncate_resume (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- off_t offset, dict_t *xdata);
-
-int32_t default_ftruncate_resume (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- off_t offset, dict_t *xdata);
-
-int32_t default_access_resume (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t mask, dict_t *xdata);
-
-int32_t default_readlink_resume (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- size_t size, dict_t *xdata);
-
-int32_t default_mknod_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
- mode_t mode, dev_t rdev, mode_t umask,
- dict_t *xdata);
-
-int32_t default_mkdir_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
- mode_t mode, mode_t umask, dict_t *xdata);
-
-int32_t default_unlink_resume (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc, int xflag, dict_t *xdata);
-
-int32_t default_rmdir_resume (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int xflag, dict_t *xdata);
-
-int32_t default_symlink_resume (call_frame_t *frame, xlator_t *this,
- const char *linkpath, loc_t *loc, mode_t umask,
- dict_t *xdata);
-
-int32_t default_rename_resume (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc, dict_t *xdata);
-
-int32_t default_link_resume (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc, dict_t *xdata);
-
-int32_t default_create_resume (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, mode_t mode,
- mode_t umask, fd_t *fd, dict_t *xdata);
-
-int32_t default_open_resume (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flags, fd_t *fd, dict_t *xdata);
-
-int32_t default_readv_resume (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset, uint32_t flags, dict_t *xdata);
-
-int32_t default_writev_resume (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- struct iovec *vector,
- int32_t count,
- off_t offset, uint32_t flags,
- struct iobref *iobref, dict_t *xdata);
-
-int32_t default_flush_resume (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd, dict_t *xdata);
-
-int32_t default_fsync_resume (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t datasync, dict_t *xdata);
-
-int32_t default_opendir_resume (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc, fd_t *fd, dict_t *xdata);
-
-int32_t default_fsyncdir_resume (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t datasync, dict_t *xdata);
-
-int32_t default_statfs_resume (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc, dict_t *xdata);
-
-int32_t default_setxattr_resume (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *dict,
- int32_t flags, dict_t *xdata);
-
-int32_t default_getxattr_resume (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name, dict_t *xdata);
-
-int32_t default_fsetxattr_resume (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- dict_t *dict,
- int32_t flags, dict_t *xdata);
-
-int32_t default_fgetxattr_resume (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- const char *name, dict_t *xdata);
-
-int32_t default_removexattr_resume (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name, dict_t *xdata);
-
-int32_t default_fremovexattr_resume (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- const char *name, dict_t *xdata);
-
-int32_t default_lk_resume (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t cmd,
- struct gf_flock *flock, dict_t *xdata);
-
-int32_t default_inodelk_resume (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd,
- struct gf_flock *flock, dict_t *xdata);
-
-int32_t default_finodelk_resume (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd,
- struct gf_flock *flock, dict_t *xdata);
-
-int32_t default_entrylk_resume (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type, dict_t *xdata);
-
-int32_t default_fentrylk_resume (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type, dict_t *xdata);
-
-int32_t default_readdir_resume (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size, off_t off, dict_t *xdata);
-
-int32_t default_readdirp_resume (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size, off_t off, dict_t *xdata);
-
-int32_t default_xattrop_resume (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- gf_xattrop_flags_t flags,
- dict_t *dict, dict_t *xdata);
-
-int32_t default_fxattrop_resume (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- gf_xattrop_flags_t flags,
- dict_t *dict, dict_t *xdata);
-int32_t default_rchecksum_resume (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd, off_t offset,
- int32_t len, dict_t *xdata);
-
-int32_t default_setattr_resume (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- struct iatt *stbuf,
- int32_t valid, dict_t *xdata);
-
-int32_t default_fsetattr_resume (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- struct iatt *stbuf,
- int32_t valid, dict_t *xdata);
-
-/* _cbk */
-
-int32_t
-default_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *xdata, struct iatt *postparent);
-
-int32_t
-default_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata);
-
-
-int32_t
-default_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata);
-
-int32_t
-default_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata);
-
-int32_t
-default_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata);
-
-int32_t
-default_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, const char *path,
- struct iatt *buf, dict_t *xdata);
-
-
-int32_t
-default_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata);
-
-int32_t
-default_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata);
-
-int32_t
-default_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata);
-
-int32_t
-default_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata);
-
-
-int32_t
-default_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata);
-
-
-int32_t
-default_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent, dict_t *xdata);
-
-
-int32_t
-default_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata);
-
-
-int32_t
-default_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata);
-
-int32_t
-default_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata);
-
-int32_t
-default_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct iatt *stbuf, struct iobref *iobref, dict_t *xdata);
-
-
-int32_t
-default_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata);
-
-
-int32_t
-default_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata);
-
-
-
-int32_t
-default_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata);
-
-int32_t
-default_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata);
-
-int32_t
-default_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata);
-
-int32_t
-default_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata);
-
-int32_t
-default_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct statvfs *buf, dict_t *xdata);
-
-
-int32_t
-default_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata);
-
-
-int32_t
-default_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata);
-
-
-
-int32_t
-default_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata);
-
-
-int32_t
-default_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata);
-
-int32_t
-default_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata);
-
-int32_t
-default_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata);
-
-
-int32_t
-default_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata);
-
-int32_t
-default_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata);
-
-int32_t
-default_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock, dict_t *xdata);
-
-int32_t
-default_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata);
-
-
-int32_t
-default_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata);
-
-int32_t
-default_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata);
-
-int32_t
-default_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata);
-
-
-int32_t
-default_rchecksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, uint32_t weak_checksum,
- uint8_t *strong_checksum, dict_t *xdata);
-
-
-int32_t
-default_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, dict_t *xdata);
-
-
-int32_t
-default_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, dict_t *xdata);
-
-int32_t
-default_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *statpre,
- struct iatt *statpost, dict_t *xdata);
-
-int32_t
-default_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *statpre,
- struct iatt *statpost, dict_t *xdata);
-
-int32_t
-default_getspec_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, char *spec_data);
-
-int32_t
-default_mem_acct_init (xlator_t *this);
-
-#endif /* _DEFAULTS_H */
diff --git a/libglusterfs/src/dict.c b/libglusterfs/src/dict.c
index 9b0d7ff18b7..1d9be9217a6 100644
--- a/libglusterfs/src/dict.c
+++ b/libglusterfs/src/dict.c
@@ -14,1102 +14,1474 @@
#include <stdio.h>
#include <inttypes.h>
#include <limits.h>
+#include <fnmatch.h>
+
+#include "glusterfs/dict.h"
+#define XXH_INLINE_ALL
+#include "xxhash.h"
+#include "glusterfs/compat.h"
+#include "glusterfs/compat-errno.h"
+#include "glusterfs/byte-order.h"
+#include "glusterfs/statedump.h"
+#include "glusterfs/libglusterfs-messages.h"
+
+struct dict_cmp {
+ dict_t *dict;
+ gf_boolean_t (*value_ignore)(char *k);
+};
+
+#define VALIDATE_DATA_AND_LOG(data, type, key, ret_val) \
+ do { \
+ if (!data || !data->data) { \
+ gf_msg_callingfn("dict", GF_LOG_DEBUG, EINVAL, LG_MSG_INVALID_ARG, \
+ "data is NULL"); \
+ return ret_val; \
+ } \
+ /* Not of the asked type, or old version */ \
+ if ((data->data_type != type) && \
+ (data->data_type != GF_DATA_TYPE_STR_OLD)) { \
+ gf_msg_callingfn("dict", GF_LOG_DEBUG, EINVAL, LG_MSG_INVALID_ARG, \
+ "key %s, %s type asked, has %s type", key, \
+ data_type_name[type], \
+ data_type_name[data->data_type]); \
+ } \
+ } while (0)
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "common-utils.h"
-#include "dict.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "compat.h"
-#include "byte-order.h"
-#include "globals.h"
-
-data_pair_t *
-get_new_data_pair ()
+static data_t *
+get_new_data()
{
- data_pair_t *data_pair_ptr = NULL;
-
- data_pair_ptr = mem_get0 (THIS->ctx->dict_pair_pool);
+ data_t *data = mem_get(THIS->ctx->dict_data_pool);
- return data_pair_ptr;
-}
-
-data_t *
-get_new_data ()
-{
- data_t *data = NULL;
+ if (!data)
+ return NULL;
- data = mem_get0 (THIS->ctx->dict_data_pool);
- if (!data) {
- return NULL;
- }
+ GF_ATOMIC_INIT(data->refcount, 0);
+ data->is_static = _gf_false;
- LOCK_INIT (&data->lock);
- return data;
+ return data;
}
-dict_t *
-get_new_dict_full (int size_hint)
+static dict_t *
+get_new_dict_full(int size_hint)
{
- dict_t *dict = mem_get0 (THIS->ctx->dict_pool);
-
- if (!dict) {
- return NULL;
- }
-
- dict->hash_size = size_hint;
- dict->members = mem_get0 (THIS->ctx->dict_pair_pool);
+ dict_t *dict = mem_get0(THIS->ctx->dict_pool);
+ if (!dict) {
+ return NULL;
+ }
+
+ dict->hash_size = size_hint;
+ if (size_hint == 1) {
+ /*
+ * This is the only case we ever see currently. If we ever
+ * need to support resizing the hash table, the resize function
+ * will have to take into account the possibility that
+ * "members" is not separately allocated (i.e. don't just call
+ * realloc() blindly.
+ */
+ dict->members = &dict->members_internal;
+ } else {
+ /*
+ * We actually need to allocate space for size_hint *pointers*
+ * but we actually allocate space for one *structure*. Since
+ * a data_pair_t consists of five pointers, we're wasting four
+ * pointers' worth for N=1, and will overrun what we allocated
+ * for N>5. If anybody ever starts using size_hint, we'll need
+ * to fix this.
+ */
+ GF_ASSERT(size_hint <= (sizeof(data_pair_t) / sizeof(data_pair_t *)));
+ dict->members = mem_get0(THIS->ctx->dict_pair_pool);
if (!dict->members) {
- mem_put (dict);
- return NULL;
+ mem_put(dict);
+ return NULL;
}
+ }
- LOCK_INIT (&dict->lock);
+ dict->free_pair.key = NULL;
+ dict->totkvlen = 0;
+ LOCK_INIT(&dict->lock);
- return dict;
+ return dict;
}
dict_t *
-get_new_dict (void)
+dict_new(void)
{
- return get_new_dict_full (1);
+ dict_t *dict = get_new_dict_full(1);
+
+ if (dict)
+ dict_ref(dict);
+
+ return dict;
}
-dict_t *
-dict_new (void)
+int32_t
+is_data_equal(data_t *one, data_t *two)
{
- dict_t *dict = NULL;
+ struct iatt *iatt1, *iatt2;
+ struct mdata_iatt *mdata_iatt1, *mdata_iatt2;
- dict = get_new_dict_full(1);
+ if (!one || !two || !one->data || !two->data) {
+ gf_msg_callingfn("dict", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "input arguments are provided "
+ "with value data_t as NULL");
+ return -1;
+ }
- if (dict)
- dict_ref (dict);
+ if (one == two)
+ return 1;
- return dict;
-}
+ if (one->data == two->data)
+ return 1;
+ if (one->data_type != two->data_type) {
+ return 0;
+ }
-int32_t
-is_data_equal (data_t *one,
- data_t *two)
-{
- if (!one || !two || !one->data || !two->data) {
- gf_log_callingfn ("dict", GF_LOG_ERROR,
- "input arguments are provided "
- "with value data_t as NULL");
- return -1;
- }
+ if (one->data_type == GF_DATA_TYPE_IATT) {
+ if ((one->len < sizeof(struct iatt)) ||
+ (two->len < sizeof(struct iatt))) {
+ return 0;
+ }
- if (one == two)
- return 1;
+ iatt1 = (struct iatt *)one->data;
+ iatt2 = (struct iatt *)two->data;
- if (one->len != two->len)
+ /* Two iatt structs are considered equal if main fields are
+ * equal, even if times differ.
+ * TODO: maybe when ctime if fully operational we could
+ * enforce time matching. */
+ if (iatt1->ia_ino != iatt2->ia_ino) {
+ return 0;
+ }
+ if (iatt1->ia_type != iatt2->ia_type) {
+ return 0;
+ }
+ if ((iatt1->ia_type == IA_IFBLK) || (iatt1->ia_type == IA_IFCHR)) {
+ if (iatt1->ia_rdev != iatt2->ia_rdev) {
return 0;
+ }
+ }
+ if (gf_uuid_compare(iatt1->ia_gfid, iatt2->ia_gfid) != 0) {
+ return 0;
+ }
- if (one->data == two->data)
- return 1;
+ /* TODO: ia_uid, ia_gid, ia_prot and ia_size can be changed
+ * with some commands. Here we don't have enough
+ * information to decide if they should match or not. */
+ /*
+ if ((iatt1->ia_uid != iatt2->ia_uid) ||
+ (iatt1->ia_gid != iatt2->ia_gid) ||
+ (st_mode_from_ia(iatt1->ia_prot, iatt1->ia_type) !=
+ st_mode_from_ia(iatt2->ia_prot,
+ iatt2->ia_type))) { return 0;
+ }
+ if (iatt1->ia_type == IA_IFREG) {
+ if (iatt1->ia_size != iatt2->ia_size) {
+ return 0;
+ }
+ }
+ */
+ return 1;
+ }
+ if (one->data_type == GF_DATA_TYPE_MDATA) {
+ if ((one->len < sizeof(struct mdata_iatt)) ||
+ (two->len < sizeof(struct mdata_iatt))) {
+ return 0;
+ }
+ mdata_iatt1 = (struct mdata_iatt *)one->data;
+ mdata_iatt2 = (struct mdata_iatt *)two->data;
+
+ if (mdata_iatt1->ia_atime != mdata_iatt2->ia_atime ||
+ mdata_iatt1->ia_mtime != mdata_iatt2->ia_mtime ||
+ mdata_iatt1->ia_ctime != mdata_iatt2->ia_ctime ||
+ mdata_iatt1->ia_atime_nsec != mdata_iatt2->ia_atime_nsec ||
+ mdata_iatt1->ia_mtime_nsec != mdata_iatt2->ia_mtime_nsec ||
+ mdata_iatt1->ia_ctime_nsec != mdata_iatt2->ia_ctime_nsec) {
+ return 0;
+ }
+ return 1;
+ }
+
+ if (one->len != two->len)
+ return 0;
- if (memcmp (one->data, two->data, one->len) == 0)
- return 1;
+ if (memcmp(one->data, two->data, one->len) == 0)
+ return 1;
- return 0;
+ return 0;
+}
+
+static int
+key_value_cmp(dict_t *one, char *key1, data_t *value1, void *data)
+{
+ struct dict_cmp *cmp = data;
+ dict_t *two = cmp->dict;
+ data_t *value2 = dict_get(two, key1);
+
+ if (value2) {
+ if (cmp->value_ignore && cmp->value_ignore(key1))
+ return 0;
+
+ if (is_data_equal(value1, value2) == 1)
+ return 0;
+ }
+
+ if (value2 == NULL) {
+ gf_msg_debug(THIS->name, 0, "'%s' found only on one dict", key1);
+ } else {
+ gf_msg_debug(THIS->name, 0,
+ "'%s' is different in two dicts "
+ "(%u, %u)",
+ key1, value1->len, value2->len);
+ }
+
+ return -1;
+}
+
+/* If both dicts are NULL then equal. If one of the dicts is NULL but the
+ * other has only ignorable keys then also they are equal. If both dicts are
+ * non-null then check if for each non-ignorable key, values are same or
+ * not. value_ignore function is used to skip comparing values for the keys
+ * which must be present in both the dictionaries but the value could be
+ * different.
+ */
+gf_boolean_t
+are_dicts_equal(dict_t *one, dict_t *two,
+ gf_boolean_t (*match)(dict_t *d, char *k, data_t *v,
+ void *data),
+ gf_boolean_t (*value_ignore)(char *k))
+{
+ int num_matches1 = 0;
+ int num_matches2 = 0;
+ struct dict_cmp cmp = {0};
+
+ if (one == two)
+ return _gf_true;
+
+ if (!match)
+ match = dict_match_everything;
+
+ if ((one == NULL) || (two == NULL)) {
+ num_matches1 = dict_foreach_match(one ? one : two, match, NULL,
+ dict_null_foreach_fn, NULL);
+ goto done;
+ }
+
+ cmp.dict = two;
+ cmp.value_ignore = value_ignore;
+ num_matches1 = dict_foreach_match(one, match, NULL, key_value_cmp, &cmp);
+
+ if (num_matches1 == -1)
+ return _gf_false;
+
+ if ((num_matches1 == one->count) && (one->count == two->count))
+ return _gf_true;
+
+ num_matches2 = dict_foreach_match(two, match, NULL, dict_null_foreach_fn,
+ NULL);
+done:
+ /* If the number of matches is same in 'two' then for all the
+ * valid-keys that exist in 'one' the value matched and no extra valid
+ * keys exist in 'two' alone. Otherwise there exists at least one extra
+ * valid-key in 'two' which doesn't exist in 'one' */
+ if (num_matches1 == num_matches2)
+ return _gf_true;
+ return _gf_false;
}
void
-data_destroy (data_t *data)
+data_destroy(data_t *data)
{
- if (data) {
- LOCK_DESTROY (&data->lock);
+ if (data) {
+ if (!data->is_static)
+ GF_FREE(data->data);
- if (!data->is_static) {
- if (data->data) {
- if (data->is_stdalloc)
- free (data->data);
- else
- GF_FREE (data->data);
- }
- }
-
- data->len = 0xbabababa;
- if (!data->is_const)
- mem_put (data);
- }
+ data->len = 0xbabababa;
+ mem_put(data);
+ }
}
data_t *
-data_copy (data_t *old)
+data_copy(data_t *old)
{
- if (!old) {
- gf_log_callingfn ("dict", GF_LOG_WARNING,
- "old is NULL");
- return NULL;
- }
+ if (!old) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, 0, LG_MSG_NULL_PTR,
+ "old is NULL");
+ return NULL;
+ }
- data_t *newdata = mem_get0 (THIS->ctx->dict_data_pool);
- if (!newdata) {
- return NULL;
- }
+ data_t *newdata = mem_get0(THIS->ctx->dict_data_pool);
+ if (!newdata) {
+ return NULL;
+ }
- if (old) {
- newdata->len = old->len;
- if (old->data) {
- newdata->data = memdup (old->data, old->len);
- if (!newdata->data)
- goto err_out;
- }
- }
+ newdata->len = old->len;
+ if (old->data) {
+ newdata->data = gf_memdup(old->data, old->len);
+ if (!newdata->data)
+ goto err_out;
+ }
+ newdata->data_type = old->data_type;
- LOCK_INIT (&newdata->lock);
- return newdata;
+ return newdata;
err_out:
+ mem_put(newdata);
- if (newdata->data)
- FREE (newdata->data);
- mem_put (newdata);
-
- return NULL;
+ return NULL;
}
+/* Always need to be called under lock
+ * Always this and key variables are not null -
+ * checked by callers.
+ */
static data_pair_t *
-_dict_lookup (dict_t *this, char *key)
+dict_lookup_common(const dict_t *this, const char *key, const uint32_t hash)
{
- if (!this || !key) {
- gf_log_callingfn ("dict", GF_LOG_WARNING,
- "!this || !key (%s)", key);
- return NULL;
- }
+ int hashval = 0;
+ data_pair_t *pair;
- int hashval = SuperFastHash (key, strlen (key)) % this->hash_size;
- data_pair_t *pair;
+ /* If the divisor is 1, the modulo is always 0,
+ * in such case avoid hash calculation.
+ */
+ if (this->hash_size != 1)
+ hashval = hash % this->hash_size;
- for (pair = this->members[hashval]; pair != NULL; pair = pair->hash_next) {
- if (pair->key && !strcmp (pair->key, key))
- return pair;
- }
+ for (pair = this->members[hashval]; pair != NULL; pair = pair->hash_next) {
+ if (pair->key && (hash == pair->key_hash) && !strcmp(pair->key, key))
+ return pair;
+ }
- return NULL;
+ return NULL;
}
int32_t
-dict_lookup (dict_t *this, char *key, data_pair_t **data)
+dict_lookup(dict_t *this, char *key, data_t **data)
{
- if (!this || !key || !data) {
- gf_log_callingfn ("dict", GF_LOG_WARNING,
- "!this || !key || !data");
- return -1;
- }
+ if (!this || !key || !data) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "!this || !key || "
+ "!data");
+ return -1;
+ }
- LOCK (&this->lock);
- {
- *data = _dict_lookup (this, key);
- }
- UNLOCK (&this->lock);
- if (*data)
- return 0;
- else
- return -1;
+ data_pair_t *tmp = NULL;
-}
+ uint32_t hash = (uint32_t)XXH64(key, strlen(key), 0);
-static int32_t
-_dict_set (dict_t *this,
- char *key,
- data_t *value)
-{
- int hashval;
- data_pair_t *pair;
- char key_free = 0;
- int tmp = 0;
- int ret = 0;
-
- if (!key) {
- ret = gf_asprintf (&key, "ref:%p", value);
- if (-1 == ret) {
- gf_log ("dict", GF_LOG_WARNING, "asprintf failed %s", key);
- return -1;
- }
- key_free = 1;
- }
+ LOCK(&this->lock);
+ {
+ tmp = dict_lookup_common(this, key, hash);
+ }
+ UNLOCK(&this->lock);
- tmp = SuperFastHash (key, strlen (key));
- hashval = (tmp % this->hash_size);
- pair = _dict_lookup (this, key);
+ if (!tmp)
+ return -1;
+ *data = tmp->value;
+ return 0;
+}
+
+static int32_t
+dict_set_lk(dict_t *this, char *key, const int key_len, data_t *value,
+ const uint32_t hash, gf_boolean_t replace)
+{
+ int hashval = 0;
+ data_pair_t *pair;
+ int key_free = 0;
+ uint32_t key_hash;
+ int keylen;
+
+ if (!key) {
+ keylen = gf_asprintf(&key, "ref:%p", value);
+ if (-1 == keylen) {
+ return -1;
+ }
+ key_free = 1;
+ key_hash = (uint32_t)XXH64(key, keylen, 0);
+ } else {
+ keylen = key_len;
+ key_hash = hash;
+ }
+
+ /* Search for a existing key if 'replace' is asked for */
+ if (replace) {
+ pair = dict_lookup_common(this, key, key_hash);
if (pair) {
- data_t *unref_data = pair->value;
- pair->value = data_ref (value);
- data_unref (unref_data);
- if (key_free)
- GF_FREE (key);
- /* Indicates duplicate key */
- return 0;
- }
- pair = mem_get0 (THIS->ctx->dict_pair_pool);
+ data_t *unref_data = pair->value;
+ pair->value = data_ref(value);
+ this->totkvlen += (value->len - unref_data->len);
+ data_unref(unref_data);
+ if (key_free)
+ GF_FREE(key);
+ /* Indicates duplicate key */
+ return 0;
+ }
+ }
+
+ if (this->free_pair.key) { /* the free_pair is used */
+ pair = mem_get(THIS->ctx->dict_pair_pool);
if (!pair) {
- return -1;
- }
-
- pair->key = (char *) GF_CALLOC (1, strlen (key) + 1,
- gf_common_mt_char);
+ if (key_free)
+ GF_FREE(key);
+ return -1;
+ }
+ } else { /* assign the pair to the free pair */
+ pair = &this->free_pair;
+ }
+
+ if (key_free) {
+ /* It's ours. Use it. */
+ pair->key = key;
+ key_free = 0;
+ } else {
+ pair->key = (char *)GF_MALLOC(keylen + 1, gf_common_mt_char);
if (!pair->key) {
- mem_put (pair);
-
- if (key_free)
- GF_FREE (key);
- return -1;
- }
-
- strcpy (pair->key, key);
- pair->value = data_ref (value);
-
- pair->hash_next = this->members[hashval];
- this->members[hashval] = pair;
-
- pair->next = this->members_list;
- pair->prev = NULL;
- if (this->members_list)
- this->members_list->prev = pair;
- this->members_list = pair;
- this->count++;
+ if (pair != &this->free_pair) {
+ mem_put(pair);
+ }
+ return -1;
+ }
+ strcpy(pair->key, key);
+ }
+ pair->key_hash = key_hash;
+ pair->value = data_ref(value);
+ this->totkvlen += (keylen + 1 + value->len);
+
+ /* If the divisor is 1, the modulo is always 0,
+ * in such case avoid hash calculation.
+ */
+ if (this->hash_size != 1) {
+ hashval = (key_hash % this->hash_size);
+ }
+ pair->hash_next = this->members[hashval];
+ this->members[hashval] = pair;
+
+ pair->next = this->members_list;
+ pair->prev = NULL;
+ if (this->members_list)
+ this->members_list->prev = pair;
+ this->members_list = pair;
+ this->count++;
+
+ if (key_free)
+ GF_FREE(key);
+
+ if (this->max_count < this->count)
+ this->max_count = this->count;
+ return 0;
+}
- if (key_free)
- GF_FREE (key);
- return 0;
+int32_t
+dict_set(dict_t *this, char *key, data_t *value)
+{
+ if (key)
+ return dict_setn(this, key, strlen(key), value);
+ else
+ return dict_setn(this, NULL, 0, value);
}
int32_t
-dict_set (dict_t *this,
- char *key,
- data_t *value)
+dict_setn(dict_t *this, char *key, const int keylen, data_t *value)
{
- int32_t ret;
+ int32_t ret;
+ uint32_t key_hash = 0;
- if (!this || !value) {
- gf_log_callingfn ("dict", GF_LOG_WARNING,
- "!this || !value for key=%s", key);
- return -1;
- }
+ if (!this || !value) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "!this || !value for "
+ "key=%s",
+ key);
+ return -1;
+ }
- LOCK (&this->lock);
+ if (key) {
+ key_hash = (uint32_t)XXH64(key, keylen, 0);
+ }
- ret = _dict_set (this, key, value);
+ LOCK(&this->lock);
- UNLOCK (&this->lock);
+ ret = dict_set_lk(this, key, keylen, value, key_hash, 1);
- return ret;
+ UNLOCK(&this->lock);
+
+ return ret;
}
+int32_t
+dict_add(dict_t *this, char *key, data_t *value)
+{
+ if (key)
+ return dict_addn(this, key, strlen(key), value);
+ else
+ return dict_addn(this, NULL, 0, value);
+}
-data_t *
-dict_get (dict_t *this, char *key)
+int32_t
+dict_addn(dict_t *this, char *key, const int keylen, data_t *value)
{
- data_pair_t *pair;
+ int32_t ret;
+ uint32_t key_hash = 0;
- if (!this || !key) {
- gf_log_callingfn ("dict", GF_LOG_INFO,
- "!this || key=%s", (key) ? key : "()");
- return NULL;
- }
+ if (!this || !value) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "!this || !value for key=%s", key);
+ return -1;
+ }
+
+ if (key) {
+ key_hash = (uint32_t)XXH64(key, keylen, 0);
+ }
- LOCK (&this->lock);
+ LOCK(&this->lock);
- pair = _dict_lookup (this, key);
+ ret = dict_set_lk(this, key, keylen, value, key_hash, 0);
- UNLOCK (&this->lock);
+ UNLOCK(&this->lock);
- if (pair)
- return pair->value;
+ return ret;
+}
+data_t *
+dict_get(dict_t *this, char *key)
+{
+ if (!this || !key) {
+ gf_msg_callingfn("dict", GF_LOG_DEBUG, EINVAL, LG_MSG_INVALID_ARG,
+ "!this || key=%s", (key) ? key : "()");
return NULL;
+ }
+
+ return dict_getn(this, key, strlen(key));
}
-void
-dict_del (dict_t *this, char *key)
+data_t *
+dict_getn(dict_t *this, char *key, const int keylen)
{
- if (!this || !key) {
- gf_log_callingfn ("dict", GF_LOG_WARNING,
- "!this || key=%s", key);
- return;
- }
+ data_pair_t *pair;
+ uint32_t hash;
- LOCK (&this->lock);
+ if (!this || !key) {
+ gf_msg_callingfn("dict", GF_LOG_DEBUG, EINVAL, LG_MSG_INVALID_ARG,
+ "!this || key=%s", (key) ? key : "()");
+ return NULL;
+ }
- int hashval = SuperFastHash (key, strlen (key)) % this->hash_size;
- data_pair_t *pair = this->members[hashval];
- data_pair_t *prev = NULL;
+ hash = (uint32_t)XXH64(key, keylen, 0);
- while (pair) {
- if (strcmp (pair->key, key) == 0) {
- if (prev)
- prev->hash_next = pair->hash_next;
- else
- this->members[hashval] = pair->hash_next;
+ LOCK(&this->lock);
+ {
+ pair = dict_lookup_common(this, key, hash);
+ }
+ UNLOCK(&this->lock);
- data_unref (pair->value);
+ if (pair)
+ return pair->value;
- if (pair->prev)
- pair->prev->next = pair->next;
- else
- this->members_list = pair->next;
+ return NULL;
+}
- if (pair->next)
- pair->next->prev = pair->prev;
+int
+dict_key_count(dict_t *this)
+{
+ int ret = -1;
- GF_FREE (pair->key);
- mem_put (pair);
- this->count--;
- break;
- }
+ if (!this) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "dict passed is NULL");
+ return ret;
+ }
- prev = pair;
- pair = pair->hash_next;
- }
+ LOCK(&this->lock);
+ {
+ ret = this->count;
+ }
+ UNLOCK(&this->lock);
- UNLOCK (&this->lock);
+ return ret;
+}
+void
+dict_del(dict_t *this, char *key)
+{
+ if (!this || !key) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "!this || key=%s", key);
return;
+ }
+
+ return dict_deln(this, key, strlen(key));
}
void
-dict_destroy (dict_t *this)
+dict_deln(dict_t *this, char *key, const int keylen)
{
- if (!this) {
- gf_log_callingfn ("dict", GF_LOG_WARNING, "dict is NULL");
- return;
- }
+ int hashval = 0;
+ uint32_t hash;
- data_pair_t *pair = this->members_list;
- data_pair_t *prev = this->members_list;
+ if (!this || !key) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "!this || key=%s", key);
+ return;
+ }
- LOCK_DESTROY (&this->lock);
+ hash = (uint32_t)XXH64(key, keylen, 0);
- while (prev) {
- pair = pair->next;
- data_unref (prev->value);
- GF_FREE (prev->key);
- mem_put (prev);
- prev = pair;
- }
+ LOCK(&this->lock);
- mem_put (this->members);
+ /* If the divisor is 1, the modulo is always 0,
+ * in such case avoid hash calculation.
+ */
+ if (this->hash_size != 1)
+ hashval = hash % this->hash_size;
- if (this->extra_free)
- GF_FREE (this->extra_free);
- if (this->extra_stdfree)
- free (this->extra_stdfree);
+ data_pair_t *pair = this->members[hashval];
+ data_pair_t *prev = NULL;
- if (!this->is_static)
- mem_put (this);
+ while (pair) {
+ if ((hash == pair->key_hash) && strcmp(pair->key, key) == 0) {
+ if (prev)
+ prev->hash_next = pair->hash_next;
+ else
+ this->members[hashval] = pair->hash_next;
- return;
-}
+ this->totkvlen -= pair->value->len;
+ data_unref(pair->value);
-void
-dict_unref (dict_t *this)
-{
- int32_t ref;
+ if (pair->prev)
+ pair->prev->next = pair->next;
+ else
+ this->members_list = pair->next;
- if (!this) {
- gf_log_callingfn ("dict", GF_LOG_WARNING, "dict is NULL");
- return;
- }
+ if (pair->next)
+ pair->next->prev = pair->prev;
- LOCK (&this->lock);
+ this->totkvlen -= (strlen(pair->key) + 1);
+ GF_FREE(pair->key);
+ if (pair == &this->free_pair) {
+ this->free_pair.key = NULL;
+ } else {
+ mem_put(pair);
+ }
+ this->count--;
+ break;
+ }
- this->refcount--;
- ref = this->refcount;
+ prev = pair;
+ pair = pair->hash_next;
+ }
- UNLOCK (&this->lock);
+ UNLOCK(&this->lock);
- if (!ref)
- dict_destroy (this);
+ return;
}
-dict_t *
-dict_ref (dict_t *this)
+void
+dict_destroy(dict_t *this)
{
- if (!this) {
- gf_log_callingfn ("dict", GF_LOG_WARNING, "dict is NULL");
- return NULL;
+ if (!this) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "dict is NULL");
+ return;
+ }
+
+ data_pair_t *pair = this->members_list;
+ data_pair_t *prev = this->members_list;
+ glusterfs_ctx_t *ctx = NULL;
+ uint64_t current_max = 0;
+ uint32_t total_pairs = 0;
+
+ LOCK_DESTROY(&this->lock);
+
+ while (prev) {
+ pair = pair->next;
+ data_unref(prev->value);
+ GF_FREE(prev->key);
+ if (prev != &this->free_pair) {
+ mem_put(prev);
+ } else {
+ this->free_pair.key = NULL;
}
+ total_pairs++;
+ prev = pair;
+ }
+
+ this->totkvlen = 0;
+ if (this->members != &this->members_internal) {
+ mem_put(this->members);
+ }
+
+ free(this->extra_stdfree);
- LOCK (&this->lock);
+ /* update 'ctx->stats.dict.details' using max_count */
+ ctx = THIS->ctx;
- this->refcount++;
+ /* NOTE: below logic is not totaly race proof */
+ /* thread0 and thread1 gets current_max as 10 */
+ /* thread0 has 'this->max_count as 11 */
+ /* thread1 has 'this->max_count as 20 */
+ /* thread1 goes ahead and sets the max_dict_pairs to 20 */
+ /* thread0 then goes and sets it to 11 */
+ /* As it is for information purpose only, no functionality will be
+ broken by this, but a point to consider about ATOMIC macros. */
+ current_max = GF_ATOMIC_GET(ctx->stats.max_dict_pairs);
+ if (current_max < this->max_count)
+ GF_ATOMIC_INIT(ctx->stats.max_dict_pairs, this->max_count);
- UNLOCK (&this->lock);
+ GF_ATOMIC_ADD(ctx->stats.total_pairs_used, total_pairs);
+ GF_ATOMIC_INC(ctx->stats.total_dicts_used);
- return this;
+ mem_put(this);
+
+ return;
}
void
-data_unref (data_t *this)
+dict_unref(dict_t *this)
{
- int32_t ref;
+ uint64_t ref = 0;
- if (!this) {
- gf_log_callingfn ("dict", GF_LOG_WARNING, "dict is NULL");
- return;
- }
+ if (!this) {
+ gf_msg_callingfn("dict", GF_LOG_DEBUG, EINVAL, LG_MSG_INVALID_ARG,
+ "dict is NULL");
+ return;
+ }
- LOCK (&this->lock);
+ ref = GF_ATOMIC_DEC(this->refcount);
- this->refcount--;
- ref = this->refcount;
+ if (!ref)
+ dict_destroy(this);
+}
- UNLOCK (&this->lock);
+dict_t *
+dict_ref(dict_t *this)
+{
+ if (!this) {
+ gf_msg_callingfn("dict", GF_LOG_DEBUG, EINVAL, LG_MSG_INVALID_ARG,
+ "dict is NULL");
+ return NULL;
+ }
- if (!ref)
- data_destroy (this);
+ GF_ATOMIC_INC(this->refcount);
+ return this;
}
-data_t *
-data_ref (data_t *this)
+void
+data_unref(data_t *this)
{
- if (!this) {
- gf_log_callingfn ("dict", GF_LOG_WARNING, "dict is NULL");
- return NULL;
- }
+ uint64_t ref;
- LOCK (&this->lock);
-
- this->refcount++;
+ if (!this) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "data is NULL");
+ return;
+ }
- UNLOCK (&this->lock);
+ ref = GF_ATOMIC_DEC(this->refcount);
- return this;
+ if (!ref)
+ data_destroy(this);
}
data_t *
-int_to_data (int64_t value)
+data_ref(data_t *this)
{
- int ret = 0;
- data_t *data = get_new_data ();
-
- if (!data) {
- return NULL;
- }
+ if (!this) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "data is NULL");
+ return NULL;
+ }
- ret = gf_asprintf (&data->data, "%"PRId64, value);
- if (-1 == ret) {
- gf_log ("dict", GF_LOG_DEBUG, "asprintf failed");
- return NULL;
- }
- data->len = strlen (data->data) + 1;
+ GF_ATOMIC_INC(this->refcount);
- return data;
+ return this;
}
data_t *
-data_from_int64 (int64_t value)
+int_to_data(int64_t value)
{
- int ret = 0;
- data_t *data = get_new_data ();
+ data_t *data = get_new_data();
- if (!data) {
- return NULL;
- }
- ret = gf_asprintf (&data->data, "%"PRId64, value);
- if (-1 == ret) {
- gf_log ("dict", GF_LOG_DEBUG, "asprintf failed");
- return NULL;
- }
- data->len = strlen (data->data) + 1;
+ if (!data) {
+ return NULL;
+ }
- return data;
+ data->len = gf_asprintf(&data->data, "%" PRId64, value);
+ if (-1 == data->len) {
+ gf_msg_debug("dict", 0, "asprintf failed");
+ data_destroy(data);
+ return NULL;
+ }
+ data->len++; /* account for terminating NULL */
+ data->data_type = GF_DATA_TYPE_INT;
+
+ return data;
}
data_t *
-data_from_int32 (int32_t value)
+data_from_int64(int64_t value)
{
- int ret = 0;
- data_t *data = get_new_data ();
-
- if (!data) {
- return NULL;
- }
- ret = gf_asprintf (&data->data, "%"PRId32, value);
- if (-1 == ret) {
- gf_log ("dict", GF_LOG_DEBUG, "asprintf failed");
- return NULL;
- }
+ data_t *data = get_new_data();
- data->len = strlen (data->data) + 1;
+ if (!data) {
+ return NULL;
+ }
+ data->len = gf_asprintf(&data->data, "%" PRId64, value);
+ if (-1 == data->len) {
+ gf_msg_debug("dict", 0, "asprintf failed");
+ data_destroy(data);
+ return NULL;
+ }
+ data->len++; /* account for terminating NULL */
+ data->data_type = GF_DATA_TYPE_INT;
- return data;
+ return data;
}
data_t *
-data_from_int16 (int16_t value)
+data_from_int32(int32_t value)
{
- int ret = 0;
- data_t *data = get_new_data ();
+ data_t *data = get_new_data();
- if (!data) {
- return NULL;
- }
- ret = gf_asprintf (&data->data, "%"PRId16, value);
- if (-1 == ret) {
- gf_log ("dict", GF_LOG_DEBUG, "asprintf failed");
- return NULL;
- }
+ if (!data) {
+ return NULL;
+ }
+ data->len = gf_asprintf(&data->data, "%" PRId32, value);
+ if (-1 == data->len) {
+ gf_msg_debug("dict", 0, "asprintf failed");
+ data_destroy(data);
+ return NULL;
+ }
- data->len = strlen (data->data) + 1;
+ data->len++; /* account for terminating NULL */
+ data->data_type = GF_DATA_TYPE_INT;
- return data;
+ return data;
}
data_t *
-data_from_int8 (int8_t value)
+data_from_int16(int16_t value)
{
- int ret = 0;
- data_t *data = get_new_data ();
+ data_t *data = get_new_data();
- if (!data) {
- return NULL;
- }
- ret = gf_asprintf (&data->data, "%d", value);
- if (-1 == ret) {
- gf_log ("dict", GF_LOG_DEBUG, "asprintf failed");
- return NULL;
- }
+ if (!data) {
+ return NULL;
+ }
+ data->len = gf_asprintf(&data->data, "%" PRId16, value);
+ if (-1 == data->len) {
+ gf_msg_debug("dict", 0, "asprintf failed");
+ data_destroy(data);
+ return NULL;
+ }
- data->len = strlen (data->data) + 1;
+ data->len++; /* account for terminating NULL */
+ data->data_type = GF_DATA_TYPE_INT;
- return data;
+ return data;
}
data_t *
-data_from_uint64 (uint64_t value)
+data_from_int8(int8_t value)
{
- int ret = 0;
- data_t *data = get_new_data ();
+ data_t *data = get_new_data();
- if (!data) {
- return NULL;
- }
- ret = gf_asprintf (&data->data, "%"PRIu64, value);
- if (-1 == ret) {
- gf_log ("dict", GF_LOG_DEBUG, "asprintf failed");
- return NULL;
- }
+ if (!data) {
+ return NULL;
+ }
+ data->len = gf_asprintf(&data->data, "%d", value);
+ if (-1 == data->len) {
+ gf_msg_debug("dict", 0, "asprintf failed");
+ data_destroy(data);
+ return NULL;
+ }
- data->len = strlen (data->data) + 1;
+ data->len++; /* account for terminating NULL */
+ data->data_type = GF_DATA_TYPE_INT;
- return data;
+ return data;
}
-static data_t *
-data_from_double (double value)
+data_t *
+data_from_uint64(uint64_t value)
{
- data_t *data = NULL;
- int ret = 0;
-
- data = get_new_data ();
+ data_t *data = get_new_data();
- if (!data) {
- return NULL;
- }
+ if (!data) {
+ return NULL;
+ }
+ data->len = gf_asprintf(&data->data, "%" PRIu64, value);
+ if (-1 == data->len) {
+ gf_msg_debug("dict", 0, "asprintf failed");
+ data_destroy(data);
+ return NULL;
+ }
- ret = gf_asprintf (&data->data, "%f", value);
- if (ret == -1) {
- return NULL;
- }
- data->len = strlen (data->data) + 1;
+ data->len++; /* account for terminating NULL */
+ data->data_type = GF_DATA_TYPE_UINT;
- return data;
+ return data;
}
-
data_t *
-data_from_uint32 (uint32_t value)
+data_from_double(double value)
{
- int ret = 0;
- data_t *data = get_new_data ();
+ data_t *data = get_new_data();
- if (!data) {
- return NULL;
- }
- ret = gf_asprintf (&data->data, "%"PRIu32, value);
- if (-1 == ret) {
- gf_log ("dict", GF_LOG_DEBUG, "asprintf failed");
- return NULL;
- }
+ if (!data) {
+ return NULL;
+ }
- data->len = strlen (data->data) + 1;
+ data->len = gf_asprintf(&data->data, "%f", value);
+ if (data->len == -1) {
+ gf_msg_debug("dict", 0, "asprintf failed");
+ data_destroy(data);
+ return NULL;
+ }
+ data->len++; /* account for terminating NULL */
+ data->data_type = GF_DATA_TYPE_DOUBLE;
- return data;
+ return data;
}
-
data_t *
-data_from_uint16 (uint16_t value)
+data_from_uint32(uint32_t value)
{
- int ret = 0;
- data_t *data = get_new_data ();
+ data_t *data = get_new_data();
- if (!data) {
- return NULL;
- }
- ret = gf_asprintf (&data->data, "%"PRIu16, value);
- if (-1 == ret) {
- return NULL;
- }
+ if (!data) {
+ return NULL;
+ }
+ data->len = gf_asprintf(&data->data, "%" PRIu32, value);
+ if (-1 == data->len) {
+ gf_msg_debug("dict", 0, "asprintf failed");
+ data_destroy(data);
+ return NULL;
+ }
- data->len = strlen (data->data) + 1;
+ data->len++; /* account for terminating NULL */
+ data->data_type = GF_DATA_TYPE_UINT;
- return data;
+ return data;
}
-
data_t *
-data_from_ptr (void *value)
+data_from_uint16(uint16_t value)
{
- if (!value) {
- gf_log_callingfn ("dict", GF_LOG_WARNING, "value is NULL");
- return NULL;
- }
+ data_t *data = get_new_data();
- data_t *data = get_new_data ();
+ if (!data) {
+ return NULL;
+ }
+ data->len = gf_asprintf(&data->data, "%" PRIu16, value);
+ if (-1 == data->len) {
+ gf_msg_debug("dict", 0, "asprintf failed");
+ data_destroy(data);
+ return NULL;
+ }
- if (!data) {
- return NULL;
- }
+ data->len++; /* account for terminating NULL */
+ data->data_type = GF_DATA_TYPE_UINT;
- data->data = value;
- return data;
+ return data;
}
-data_t *
-data_from_static_ptr (void *value)
+static data_t *
+data_from_ptr_common(void *value, gf_boolean_t is_static)
{
-/*
- this is valid to set 0 as value..
-
- if (!value) {
- gf_log ("dict", GF_LOG_CRITICAL,
- "@value=%p", value);
- return NULL;
- }
-*/
- data_t *data = get_new_data ();
+ /* it is valid to set 0/NULL as a value, no need to check *value */
- if (!data) {
- return NULL;
- }
+ data_t *data = get_new_data();
+ if (!data) {
+ return NULL;
+ }
- data->is_static = 1;
- data->data = value;
+ data->data = value;
+ data->len = 0;
+ data->is_static = is_static;
- return data;
+ data->data_type = GF_DATA_TYPE_PTR;
+ return data;
}
data_t *
-str_to_data (char *value)
+str_to_data(char *value)
{
- if (!value) {
- gf_log_callingfn ("dict", GF_LOG_WARNING, "value is NULL");
- return NULL;
- }
- data_t *data = get_new_data ();
-
- if (!data) {
- return NULL;
- }
- data->len = strlen (value) + 1;
-
- data->data = value;
- data->is_static = 1;
+ if (!value) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "value is NULL");
+ return NULL;
+ }
- return data;
+ return strn_to_data(value, strlen(value));
}
data_t *
-data_from_dynstr (char *value)
+strn_to_data(char *value, const int vallen)
{
- if (!value) {
- gf_log_callingfn ("dict", GF_LOG_WARNING, "value is NULL");
- return NULL;
- }
+ if (!value) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "value is NULL");
+ return NULL;
+ }
+ data_t *data = get_new_data();
- data_t *data = get_new_data ();
+ if (!data) {
+ return NULL;
+ }
+ data->len = vallen + 1;
+ data->data_type = GF_DATA_TYPE_STR;
- data->len = strlen (value) + 1;
- data->data = value;
+ data->data = value;
+ data->is_static = _gf_true;
- return data;
+ return data;
}
-data_t *
-data_from_dynmstr (char *value)
+static data_t *
+data_from_dynstr(char *value)
{
- if (!value) {
- gf_log_callingfn ("dict", GF_LOG_WARNING, "value is NULL");
- return NULL;
- }
+ if (!value) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "value is NULL");
+ return NULL;
+ }
- data_t *data = get_new_data ();
+ data_t *data = get_new_data();
- data->len = strlen (value) + 1;
- data->data = value;
- data->is_stdalloc = 1;
+ if (!data)
+ return NULL;
+ data->len = strlen(value) + 1;
+ data->data = value;
+ data->data_type = GF_DATA_TYPE_STR;
- return data;
+ return data;
}
data_t *
-data_from_dynptr (void *value, int32_t len)
+data_from_dynptr(void *value, int32_t len)
{
- data_t *data = get_new_data ();
+ data_t *data = get_new_data();
- if (!data)
- return NULL;
+ if (!data)
+ return NULL;
- data->len = len;
- data->data = value;
+ data->len = len;
+ data->data = value;
+ data->data_type = GF_DATA_TYPE_PTR;
- return data;
+ return data;
}
data_t *
-bin_to_data (void *value, int32_t len)
+bin_to_data(void *value, int32_t len)
{
- if (!value) {
- gf_log_callingfn ("dict", GF_LOG_WARNING, "value is NULL");
- return NULL;
- }
+ if (!value) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "value is NULL");
+ return NULL;
+ }
- data_t *data = get_new_data ();
+ data_t *data = get_new_data();
- if (!data)
- return NULL;
+ if (!data)
+ return NULL;
- data->is_static = 1;
- data->len = len;
- data->data = value;
+ data->is_static = _gf_true;
+ data->len = len;
+ data->data = value;
- return data;
+ return data;
}
-int64_t
-data_to_int64 (data_t *data)
-{
- if (!data) {
- gf_log_callingfn ("dict", GF_LOG_WARNING, "data is NULL");
- return -1;
- }
-
- char *str = alloca (data->len + 1);
- if (!str)
- return -1;
+static char *data_type_name[GF_DATA_TYPE_MAX] = {
+ [GF_DATA_TYPE_UNKNOWN] = "unknown",
+ [GF_DATA_TYPE_STR_OLD] = "string-old-version",
+ [GF_DATA_TYPE_INT] = "integer",
+ [GF_DATA_TYPE_UINT] = "unsigned integer",
+ [GF_DATA_TYPE_DOUBLE] = "float",
+ [GF_DATA_TYPE_STR] = "string",
+ [GF_DATA_TYPE_PTR] = "pointer",
+ [GF_DATA_TYPE_GFUUID] = "gf-uuid",
+ [GF_DATA_TYPE_IATT] = "iatt",
+ [GF_DATA_TYPE_MDATA] = "mdata",
+};
- memcpy (str, data->data, data->len);
- str[data->len] = '\0';
- return (int64_t) strtoull (str, NULL, 0);
-}
+int64_t
+data_to_int64(data_t *data)
+{
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_INT, "null", -1);
+
+ char *endptr = NULL;
+ int64_t value = 0;
+
+ errno = 0;
+ value = strtoll(data->data, &endptr, 0);
+
+ if (endptr && *endptr != '\0')
+ /* Unrecognized characters at the end of string. */
+ errno = EINVAL;
+ if (errno) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, errno,
+ LG_MSG_DATA_CONVERSION_ERROR,
+ "Error in data conversion: '%s' can't "
+ "be represented as int64_t",
+ data->data);
+ return -1;
+ }
+ return value;
+}
+
+/* Like above but implies signed range check. */
+
+#define DATA_TO_RANGED_SIGNED(endptr, value, data, type, min, max) \
+ do { \
+ errno = 0; \
+ value = strtoll(data->data, &endptr, 0); \
+ if (endptr && *endptr != '\0') \
+ errno = EINVAL; \
+ if (errno || value > max || value < min) { \
+ gf_msg_callingfn("dict", GF_LOG_WARNING, errno, \
+ LG_MSG_DATA_CONVERSION_ERROR, \
+ "Error in data conversion: '%s' can't " \
+ "be represented as " #type, \
+ data->data); \
+ return -1; \
+ } \
+ return (type)value; \
+ } while (0)
int32_t
-data_to_int32 (data_t *data)
+data_to_int32(data_t *data)
{
- if (!data) {
- gf_log_callingfn ("dict", GF_LOG_WARNING, "data is NULL");
- return -1;
- }
-
- char *str = alloca (data->len + 1);
- if (!str)
- return -1;
+ char *endptr = NULL;
+ int64_t value = 0;
- memcpy (str, data->data, data->len);
- str[data->len] = '\0';
-
- return strtoul (str, NULL, 0);
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_INT, "null", -1);
+ DATA_TO_RANGED_SIGNED(endptr, value, data, int32_t, INT_MIN, INT_MAX);
}
int16_t
-data_to_int16 (data_t *data)
+data_to_int16(data_t *data)
{
- int16_t value = 0;
-
- if (!data) {
- gf_log_callingfn ("dict", GF_LOG_WARNING, "data is NULL");
- return -1;
- }
-
- char *str = alloca (data->len + 1);
- if (!str)
- return -1;
-
- memcpy (str, data->data, data->len);
- str[data->len] = '\0';
-
- errno = 0;
- value = strtol (str, NULL, 0);
+ char *endptr = NULL;
+ int64_t value = 0;
- if ((SHRT_MAX > value) || (SHRT_MIN < value)) {
- errno = ERANGE;
- gf_log_callingfn ("dict", GF_LOG_WARNING,
- "Error in data conversion: "
- "detected overflow");
- return -1;
- }
-
- return (int16_t)value;
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_INT, "null", -1);
+ DATA_TO_RANGED_SIGNED(endptr, value, data, int16_t, SHRT_MIN, SHRT_MAX);
}
-
int8_t
-data_to_int8 (data_t *data)
+data_to_int8(data_t *data)
{
- int32_t value = 0;
-
- if (!data) {
- gf_log_callingfn ("dict", GF_LOG_WARNING, "data is NULL");
- return -1;
- }
-
- char *str = alloca (data->len + 1);
- if (!str)
- return -1;
-
- memcpy (str, data->data, data->len);
- str[data->len] = '\0';
-
- errno = 0;
- value = strtol (str, NULL, 0);
+ char *endptr = NULL;
+ int64_t value = 0;
- if ((SCHAR_MAX > value) || (SCHAR_MIN < value)) {
- errno = ERANGE;
- gf_log_callingfn ("dict", GF_LOG_WARNING,
- "Error in data conversion: "
- "detected overflow");
- return -1;
- }
-
- return (int8_t)value;
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_INT, "null", -1);
+ DATA_TO_RANGED_SIGNED(endptr, value, data, int8_t, CHAR_MIN, CHAR_MAX);
}
-
uint64_t
-data_to_uint64 (data_t *data)
-{
- if (!data)
- return -1;
- char *str = alloca (data->len + 1);
- if (!str)
- return -1;
-
- memcpy (str, data->data, data->len);
- str[data->len] = '\0';
-
- return strtoll (str, NULL, 0);
-}
+data_to_uint64(data_t *data)
+{
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_UINT, "null", -1);
+
+ char *endptr = NULL;
+ uint64_t value = 0;
+
+ errno = 0;
+ value = strtoull(data->data, &endptr, 0);
+
+ if (endptr && *endptr != '\0')
+ errno = EINVAL;
+ if (errno) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, errno,
+ LG_MSG_DATA_CONVERSION_ERROR,
+ "Error in data conversion: '%s' can't "
+ "be represented as uint64_t",
+ data->data);
+ return -1;
+ }
+ return value;
+}
+
+/* Like above but implies unsigned range check. */
+
+#define DATA_TO_RANGED_UNSIGNED(endptr, value, data, type, max) \
+ do { \
+ errno = 0; \
+ value = strtoull(data->data, &endptr, 0); \
+ if (endptr && *endptr != '\0') \
+ errno = EINVAL; \
+ if (errno || value > max) { \
+ gf_msg_callingfn("dict", GF_LOG_WARNING, errno, \
+ LG_MSG_DATA_CONVERSION_ERROR, \
+ "Error in data conversion: '%s' can't " \
+ "be represented as " #type, \
+ data->data); \
+ return -1; \
+ } \
+ return (type)value; \
+ } while (0)
uint32_t
-data_to_uint32 (data_t *data)
+data_to_uint32(data_t *data)
{
- if (!data)
- return -1;
+ char *endptr = NULL;
+ uint64_t value = 0;
- char *str = alloca (data->len + 1);
- if (!str)
- return -1;
-
- memcpy (str, data->data, data->len);
- str[data->len] = '\0';
-
- return strtol (str, NULL, 0);
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_UINT, "null", -1);
+ DATA_TO_RANGED_UNSIGNED(endptr, value, data, uint32_t, UINT_MAX);
}
uint16_t
-data_to_uint16 (data_t *data)
+data_to_uint16(data_t *data)
{
- uint16_t value = 0;
-
- if (!data)
- return -1;
+ char *endptr = NULL;
+ uint64_t value = 0;
- char *str = alloca (data->len + 1);
- if (!str)
- return -1;
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_UINT, "null", -1);
+ DATA_TO_RANGED_UNSIGNED(endptr, value, data, uint16_t, USHRT_MAX);
+}
- memcpy (str, data->data, data->len);
- str[data->len] = '\0';
+uint8_t
+data_to_uint8(data_t *data)
+{
+ char *endptr = NULL;
+ uint64_t value = 0;
- errno = 0;
- value = strtol (str, NULL, 0);
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_UINT, "null", -1);
+ DATA_TO_RANGED_UNSIGNED(endptr, value, data, uint8_t, UCHAR_MAX);
+}
- if ((USHRT_MAX - value) < 0) {
- errno = ERANGE;
- gf_log_callingfn ("dict", GF_LOG_WARNING,
- "Error in data conversion: "
- "overflow detected");
- return -1;
- }
+char *
+data_to_str(data_t *data)
+{
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_STR, "null", NULL);
+ return data->data;
+}
- return (uint16_t)value;
+void *
+data_to_ptr(data_t *data)
+{
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_PTR, "null", NULL);
+ return data->data;
}
-uint8_t
-data_to_uint8 (data_t *data)
+void *
+data_to_bin(data_t *data)
{
- uint32_t value = 0;
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_PTR, "null", NULL);
+ return data->data;
+}
- if (!data) {
- gf_log_callingfn ("dict", GF_LOG_WARNING, "data is NULL");
- return -1;
- }
+struct iatt *
+data_to_iatt(data_t *data, char *key)
+{
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_IATT, key, NULL);
- char *str = alloca (data->len + 1);
- if (!str)
- return -1;
+ /* We only check for smaller size. If it's bigger we simply ignore
+ * the extra data. This way it's easy to do changes in the future that
+ * pass more data but are backward compatible (if the initial contents
+ * of the struct are maintained, of course). */
+ if (data->len < sizeof(struct iatt)) {
+ gf_smsg("glusterfs", GF_LOG_ERROR, ENOBUFS, LG_MSG_UNDERSIZED_BUF,
+ "key=%s", key, NULL);
+ return NULL;
+ }
- memcpy (str, data->data, data->len);
- str[data->len] = '\0';
+ return (struct iatt *)data->data;
+}
- errno = 0;
- value = strtol (str, NULL, 0);
+int
+dict_null_foreach_fn(dict_t *d, char *k, data_t *v, void *tmp)
+{
+ return 0;
+}
- if ((UCHAR_MAX - value) < 0) {
- errno = ERANGE;
- gf_log_callingfn ("dict", GF_LOG_WARNING,
- "data conversion overflow detected (%s)",
- strerror(errno));
- return -1;
- }
+int
+dict_remove_foreach_fn(dict_t *d, char *k, data_t *v, void *_tmp)
+{
+ if (!d || !k) {
+ gf_smsg("glusterfs", GF_LOG_WARNING, EINVAL, LG_MSG_KEY_OR_VALUE_NULL,
+ "d=%s", d ? "key" : "dictionary", NULL);
+ return -1;
+ }
- return (uint8_t) value;
+ dict_del(d, k);
+ return 0;
}
-char *
-data_to_str (data_t *data)
+gf_boolean_t
+dict_match_everything(dict_t *d, char *k, data_t *v, void *data)
{
- if (!data) {
- gf_log_callingfn ("dict", GF_LOG_WARNING, "data is NULL");
- return NULL;
- }
- return data->data;
+ return _gf_true;
}
-void *
-data_to_ptr (data_t *data)
+int
+dict_foreach(dict_t *dict,
+ int (*fn)(dict_t *this, char *key, data_t *value, void *data),
+ void *data)
{
- if (!data) {
- gf_log_callingfn ("dict", GF_LOG_WARNING, "data is NULL");
- return NULL;
- }
- return data->data;
+ int ret = dict_foreach_match(dict, dict_match_everything, NULL, fn, data);
+
+ if (ret > 0)
+ ret = 0;
+
+ return ret;
}
-void *
-data_to_bin (data_t *data)
+/* return values:
+ -1 = failure,
+ 0 = no matches found,
+ +n = n number of matches
+*/
+int
+dict_foreach_match(dict_t *dict,
+ gf_boolean_t (*match)(dict_t *this, char *key, data_t *value,
+ void *mdata),
+ void *match_data,
+ int (*action)(dict_t *this, char *key, data_t *value,
+ void *adata),
+ void *action_data)
+{
+ if (!dict || !match || !action) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "dict|match|action is "
+ "NULL");
+ return -1;
+ }
+
+ int ret = -1;
+ int count = 0;
+ data_pair_t *pairs = dict->members_list;
+ data_pair_t *next = NULL;
+
+ while (pairs) {
+ next = pairs->next;
+ if (match(dict, pairs->key, pairs->value, match_data)) {
+ ret = action(dict, pairs->key, pairs->value, action_data);
+ if (ret < 0)
+ return ret;
+ count++;
+ }
+ pairs = next;
+ }
+
+ return count;
+}
+
+static gf_boolean_t
+dict_fnmatch(dict_t *d, char *k, data_t *val, void *match_data)
+{
+ return (fnmatch(match_data, k, 0) == 0);
+}
+/* return values:
+ -1 = failure,
+ 0 = no matches found,
+ +n = n number of matches
+*/
+int
+dict_foreach_fnmatch(dict_t *dict, char *pattern,
+ int (*fn)(dict_t *this, char *key, data_t *value,
+ void *data),
+ void *data)
{
- if (!data) {
- gf_log_callingfn ("dict", GF_LOG_WARNING, "data is NULL");
- return NULL;
- }
- return data->data;
+ return dict_foreach_match(dict, dict_fnmatch, pattern, fn, data);
}
-void
-dict_foreach (dict_t *dict,
- void (*fn)(dict_t *this,
- char *key,
- data_t *value,
- void *data),
- void *data)
-{
- if (!dict) {
- gf_log_callingfn ("dict", GF_LOG_WARNING,
- "dict is NULL");
- return;
- }
+/**
+ * dict_keys_join - pack the keys of the dictionary in a buffer.
+ *
+ * @value : buffer in which the keys will be packed (can be NULL)
+ * @size : size of the buffer which is sent (can be 0, in which case buffer
+ * is not packed but only length is returned)
+ * @dict : dictionary of which all the keys will be packed
+ * @filter_fn : keys matched in filter_fn() is counted.
+ *
+ * @return : @length of string after joining keys.
+ *
+ */
- data_pair_t *pairs = dict->members_list;
- data_pair_t *next = NULL;
+int
+dict_keys_join(void *value, int size, dict_t *dict, int (*filter_fn)(char *k))
+{
+ int len = 0;
+ data_pair_t *pairs = dict->members_list;
+ data_pair_t *next = NULL;
+
+ while (pairs) {
+ next = pairs->next;
- while (pairs) {
- next = pairs->next;
- fn (dict, pairs->key, pairs->value, data);
- pairs = next;
+ if (filter_fn && filter_fn(pairs->key)) {
+ pairs = next;
+ continue;
}
-}
+ if (value && (size > len))
+ strncpy(value + len, pairs->key, size - len);
-static void
-_copy (dict_t *unused,
- char *key,
- data_t *value,
- void *newdict)
-{
- dict_set ((dict_t *)newdict, key, (value));
+ len += (strlen(pairs->key) + 1);
+
+ pairs = next;
+ }
+
+ return len;
}
-static void
-_remove (dict_t *dict,
- char *key,
- data_t *value,
- void *unused)
+static int
+dict_copy_one(dict_t *unused, char *key, data_t *value, void *newdict)
{
- dict_del ((dict_t *)dict, key);
+ return dict_set((dict_t *)newdict, key, (value));
}
-
dict_t *
-dict_copy (dict_t *dict,
- dict_t *new)
+dict_copy(dict_t *dict, dict_t *new)
{
- if (!dict) {
- gf_log_callingfn ("dict", GF_LOG_WARNING, "dict is NULL");
- return NULL;
- }
+ if (!dict) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "dict is NULL");
+ return NULL;
+ }
- if (!new)
- new = get_new_dict_full (dict->hash_size);
+ if (!new)
+ new = get_new_dict_full(dict->hash_size);
- dict_foreach (dict, _copy, new);
+ dict_foreach(dict, dict_copy_one, new);
- return new;
+ return new;
}
int
-dict_reset (dict_t *dict)
-{
- int32_t ret = -1;
- if (!dict) {
- gf_log_callingfn ("dict", GF_LOG_WARNING, "dict is NULL");
- goto out;
- }
- dict_foreach (dict, _remove, NULL);
- ret = 0;
+dict_reset(dict_t *dict)
+{
+ int32_t ret = -1;
+ if (!dict) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "dict is NULL");
+ goto out;
+ }
+ dict_foreach(dict, dict_remove_foreach_fn, NULL);
+ ret = 0;
out:
- return ret;
+ return ret;
}
dict_t *
-dict_copy_with_ref (dict_t *dict,
- dict_t *new)
+dict_copy_with_ref(dict_t *dict, dict_t *new)
{
- dict_t *local_new = NULL;
+ dict_t *local_new = NULL;
- GF_VALIDATE_OR_GOTO("dict", dict, fail);
+ GF_VALIDATE_OR_GOTO("dict", dict, fail);
- if (new == NULL) {
- local_new = dict_new ();
- GF_VALIDATE_OR_GOTO("dict", local_new, fail);
- new = local_new;
- }
+ if (new == NULL) {
+ local_new = dict_new();
+ GF_VALIDATE_OR_GOTO("dict", local_new, fail);
+ new = local_new;
+ }
- dict_foreach (dict, _copy, new);
+ dict_foreach(dict, dict_copy_one, new);
fail:
- return new;
+ return new;
}
/*
@@ -1123,900 +1495,1292 @@ fail:
* -val error, val = errno
*/
-
static int
-dict_get_with_ref (dict_t *this, char *key, data_t **data)
+dict_get_with_refn(dict_t *this, char *key, const int keylen, data_t **data)
{
- data_pair_t * pair = NULL;
- int ret = -ENOENT;
+ data_pair_t *pair = NULL;
+ int ret = -ENOENT;
+ uint32_t hash;
- if (!this || !key || !data) {
- gf_log_callingfn ("dict", GF_LOG_WARNING,
- "dict OR key (%s) is NULL", key);
- ret = -EINVAL;
- goto err;
- }
+ hash = (uint32_t)XXH64(key, keylen, 0);
- LOCK (&this->lock);
- {
- pair = _dict_lookup (this, key);
- }
- UNLOCK (&this->lock);
+ LOCK(&this->lock);
+ {
+ pair = dict_lookup_common(this, key, hash);
if (pair) {
- ret = 0;
- *data = data_ref (pair->value);
+ ret = 0;
+ *data = data_ref(pair->value);
}
+ }
+ UNLOCK(&this->lock);
-err:
- return ret;
+ return ret;
+}
+
+int
+dict_get_with_ref(dict_t *this, char *key, data_t **data)
+{
+ if (!this || !key || !data) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "dict OR key (%s) is NULL", key);
+ return -EINVAL;
+ }
+
+ return dict_get_with_refn(this, key, strlen(key), data);
}
static int
-_data_to_ptr (data_t *data, void **val)
+data_to_ptr_common(data_t *data, void **val)
{
- int ret = 0;
+ int ret = 0;
- if (!data) {
- ret = -EINVAL;
- goto err;
- }
+ if (!data) {
+ ret = -EINVAL;
+ goto err;
+ }
- *val = data->data;
+ *val = data->data;
err:
- return ret;
+ return ret;
}
-
static int
-_data_to_int8 (data_t *data, int8_t *val)
+data_to_int8_ptr(data_t *data, int8_t *val)
{
- int ret = 0;
- char * str = NULL;
+ int ret = 0;
- if (!data || !val) {
- ret = -EINVAL;
- goto err;
- }
-
- str = alloca (data->len + 1);
- if (!str) {
- ret = -ENOMEM;
- goto err;
- }
- memcpy (str, data->data, data->len);
- str[data->len] = '\0';
+ if (!data || !val) {
+ ret = -EINVAL;
+ goto err;
+ }
- errno = 0;
- *val = strtol (str, NULL, 0);
- if (errno != 0)
- ret = -errno;
+ errno = 0;
+ *val = strtol(data->data, NULL, 0);
+ if (errno != 0)
+ ret = -errno;
err:
- return ret;
+ return ret;
}
static int
-_data_to_int16 (data_t *data, int16_t *val)
+data_to_int16_ptr(data_t *data, int16_t *val)
{
- int ret = 0;
- char * str = NULL;
-
- if (!data || !val) {
- ret = -EINVAL;
- goto err;
- }
+ int ret = 0;
- str = alloca (data->len + 1);
- if (!str) {
- ret = -ENOMEM;
- goto err;
- }
- memcpy (str, data->data, data->len);
- str[data->len] = '\0';
+ if (!data || !val) {
+ ret = -EINVAL;
+ goto err;
+ }
- errno = 0;
- *val = strtol (str, NULL, 0);
- if (errno != 0)
- ret = -errno;
+ errno = 0;
+ *val = strtol(data->data, NULL, 0);
+ if (errno != 0)
+ ret = -errno;
err:
- return ret;
+ return ret;
}
static int
-_data_to_int32 (data_t *data, int32_t *val)
+data_to_int32_ptr(data_t *data, int32_t *val)
{
- int ret = 0;
- char * str = NULL;
+ int ret = 0;
- if (!data || !val) {
- ret = -EINVAL;
- goto err;
- }
+ if (!data || !val) {
+ ret = -EINVAL;
+ goto err;
+ }
- str = alloca (data->len + 1);
- if (!str) {
- ret = -ENOMEM;
- goto err;
- }
- memcpy (str, data->data, data->len);
- str[data->len] = '\0';
-
- errno = 0;
- *val = strtol (str, NULL, 0);
- if (errno != 0)
- ret = -errno;
+ errno = 0;
+ *val = strtol(data->data, NULL, 0);
+ if (errno != 0)
+ ret = -errno;
err:
- return ret;
+ return ret;
}
static int
-_data_to_int64 (data_t *data, int64_t *val)
+data_to_int64_ptr(data_t *data, int64_t *val)
{
- int ret = 0;
- char * str = NULL;
+ int ret = 0;
- if (!data || !val) {
- ret = -EINVAL;
- goto err;
- }
+ if (!data || !val) {
+ ret = -EINVAL;
+ goto err;
+ }
- str = alloca (data->len + 1);
- if (!str) {
- ret = -ENOMEM;
- goto err;
- }
- memcpy (str, data->data, data->len);
- str[data->len] = '\0';
-
- errno = 0;
- *val = strtoll (str, NULL, 0);
- if (errno != 0)
- ret = -errno;
+ errno = 0;
+ *val = strtoll(data->data, NULL, 0);
+ if (errno != 0)
+ ret = -errno;
err:
- return ret;
+ return ret;
}
static int
-_data_to_uint16 (data_t *data, uint16_t *val)
+data_to_uint16_ptr(data_t *data, uint16_t *val)
{
- int ret = 0;
- char * str = NULL;
-
- if (!data || !val) {
- ret = -EINVAL;
- goto err;
- }
+ int ret = 0;
- str = alloca (data->len + 1);
- if (!str) {
- ret = -ENOMEM;
- goto err;
- }
- memcpy (str, data->data, data->len);
- str[data->len] = '\0';
+ if (!data || !val) {
+ ret = -EINVAL;
+ goto err;
+ }
- errno = 0;
- *val = strtoul (str, NULL, 0);
- if (errno != 0)
- ret = -errno;
+ errno = 0;
+ *val = strtoul(data->data, NULL, 0);
+ if (errno != 0)
+ ret = -errno;
err:
- return ret;
+ return ret;
}
static int
-_data_to_uint32 (data_t *data, uint32_t *val)
+data_to_uint32_ptr(data_t *data, uint32_t *val)
{
- int ret = 0;
- char * str = NULL;
+ int ret = 0;
- if (!data || !val) {
- ret = -EINVAL;
- goto err;
- }
+ if (!data || !val) {
+ ret = -EINVAL;
+ goto err;
+ }
- str = alloca (data->len + 1);
- if (!str) {
- ret = -ENOMEM;
- goto err;
- }
- memcpy (str, data->data, data->len);
- str[data->len] = '\0';
-
- errno = 0;
- *val = strtoul (str, NULL, 0);
- if (errno != 0)
- ret = -errno;
+ errno = 0;
+ *val = strtoul(data->data, NULL, 0);
+ if (errno != 0)
+ ret = -errno;
err:
- return ret;
+ return ret;
}
static int
-_data_to_uint64 (data_t *data, uint64_t *val)
+data_to_uint64_ptr(data_t *data, uint64_t *val)
{
- int ret = 0;
- char * str = NULL;
+ int ret = 0;
- if (!data || !val) {
- ret = -EINVAL;
- goto err;
- }
+ if (!data || !val) {
+ ret = -EINVAL;
+ goto err;
+ }
- str = alloca (data->len + 1);
- if (!str) {
- ret = -ENOMEM;
- goto err;
- }
- memcpy (str, data->data, data->len);
- str[data->len] = '\0';
-
- errno = 0;
- *val = strtoull (str, NULL, 0);
- if (errno != 0)
- ret = -errno;
+ errno = 0;
+ *val = strtoull(data->data, NULL, 0);
+ if (errno != 0)
+ ret = -errno;
err:
- return ret;
+ return ret;
}
static int
-_data_to_double (data_t *data, double *val)
+data_to_double_ptr(data_t *data, double *val)
{
- int ret = 0;
- char * str = NULL;
+ int ret = 0;
- if (!data || !val) {
- ret = -EINVAL;
- goto err;
- }
-
- str = alloca (data->len + 1);
- if (!str) {
- ret = -ENOMEM;
- goto err;
- }
- memcpy (str, data->data, data->len);
- str[data->len] = '\0';
+ if (!data || !val) {
+ ret = -EINVAL;
+ goto err;
+ }
- errno = 0;
- *val = strtod (str, NULL);
- if (errno != 0)
- ret = -errno;
+ errno = 0;
+ *val = strtod(data->data, NULL);
+ if (errno != 0)
+ ret = -errno;
err:
- return ret;
+ return ret;
}
int
-dict_get_int8 (dict_t *this, char *key, int8_t *val)
+dict_get_int8(dict_t *this, char *key, int8_t *val)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t *data = NULL;
+ int ret = 0;
- if (!this || !key || !val) {
- ret = -EINVAL;
- goto err;
- }
+ if (!val) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_get_with_ref (this, key, &data);
- if (ret != 0) {
- goto err;
- }
+ ret = dict_get_with_ref(this, key, &data);
+ if (ret != 0) {
+ goto err;
+ }
+
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_INT, key, -EINVAL);
- ret = _data_to_int8 (data, val);
+ ret = data_to_int8_ptr(data, val);
err:
- if (data)
- data_unref (data);
- return ret;
+ if (data)
+ data_unref(data);
+ return ret;
}
-
int
-dict_set_int8 (dict_t *this, char *key, int8_t val)
+dict_set_int8(dict_t *this, char *key, int8_t val)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t *data = NULL;
+ int ret = 0;
- data = data_from_int8 (val);
- if (!data) {
- ret = -EINVAL;
- goto err;
- }
+ data = data_from_int8(val);
+ if (!data) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_set (this, key, data);
+ ret = dict_set(this, key, data);
+ if (ret < 0)
+ data_destroy(data);
err:
- return ret;
+ return ret;
}
int
-dict_get_int16 (dict_t *this, char *key, int16_t *val)
+dict_get_int16(dict_t *this, char *key, int16_t *val)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t *data = NULL;
+ int ret = 0;
- if (!this || !key || !val) {
- ret = -EINVAL;
- goto err;
- }
+ if (!val) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_get_with_ref (this, key, &data);
- if (ret != 0) {
- goto err;
- }
+ ret = dict_get_with_ref(this, key, &data);
+ if (ret != 0) {
+ goto err;
+ }
+
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_INT, key, -EINVAL);
- ret = _data_to_int16 (data, val);
+ ret = data_to_int16_ptr(data, val);
err:
- if (data)
- data_unref (data);
- return ret;
+ if (data)
+ data_unref(data);
+ return ret;
}
+int
+dict_set_int16(dict_t *this, char *key, int16_t val)
+{
+ data_t *data = NULL;
+ int ret = 0;
+
+ data = data_from_int16(val);
+ if (!data) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ ret = dict_set(this, key, data);
+ if (ret < 0)
+ data_destroy(data);
+
+err:
+ return ret;
+}
int
-dict_set_int16 (dict_t *this, char *key, int16_t val)
+dict_get_int32n(dict_t *this, char *key, const int keylen, int32_t *val)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t *data = NULL;
+ int ret = 0;
- data = data_from_int16 (val);
- if (!data) {
- ret = -EINVAL;
- goto err;
- }
+ if (!this || !key || !val) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ ret = dict_get_with_refn(this, key, keylen, &data);
+ if (ret != 0) {
+ goto err;
+ }
+
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_INT, key, -EINVAL);
- ret = dict_set (this, key, data);
+ ret = data_to_int32_ptr(data, val);
err:
- return ret;
+ if (data)
+ data_unref(data);
+ return ret;
}
int
-dict_get_int32 (dict_t *this, char *key, int32_t *val)
+dict_get_int32(dict_t *this, char *key, int32_t *val)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t *data = NULL;
+ int ret = 0;
- if (!this || !key || !val) {
- ret = -EINVAL;
- goto err;
- }
+ if (!val) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_get_with_ref (this, key, &data);
- if (ret != 0) {
- goto err;
- }
+ ret = dict_get_with_ref(this, key, &data);
+ if (ret != 0) {
+ goto err;
+ }
- ret = _data_to_int32 (data, val);
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_INT, key, -EINVAL);
+
+ ret = data_to_int32_ptr(data, val);
err:
- if (data)
- data_unref (data);
- return ret;
+ if (data)
+ data_unref(data);
+ return ret;
}
+int
+dict_set_int32n(dict_t *this, char *key, const int keylen, int32_t val)
+{
+ data_t *data = NULL;
+ int ret = 0;
+
+ data = data_from_int32(val);
+ if (!data) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ ret = dict_setn(this, key, keylen, data);
+ if (ret < 0)
+ data_destroy(data);
+
+err:
+ return ret;
+}
int
-dict_set_int32 (dict_t *this, char *key, int32_t val)
+dict_set_int32(dict_t *this, char *key, int32_t val)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t *data = data_from_int32(val);
+ int ret = 0;
- data = data_from_int32 (val);
- if (!data) {
- ret = -EINVAL;
- goto err;
- }
+ if (!data) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_set (this, key, data);
+ ret = dict_set(this, key, data);
+ if (ret < 0)
+ data_destroy(data);
err:
- return ret;
+ return ret;
}
int
-dict_get_int64 (dict_t *this, char *key, int64_t *val)
+dict_get_int64(dict_t *this, char *key, int64_t *val)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t *data = NULL;
+ int ret = 0;
- if (!this || !key || !val) {
- ret = -EINVAL;
- goto err;
- }
+ if (!val) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_get_with_ref (this, key, &data);
- if (ret != 0) {
- goto err;
- }
+ ret = dict_get_with_ref(this, key, &data);
+ if (ret != 0) {
+ goto err;
+ }
+
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_INT, key, -EINVAL);
- ret = _data_to_int64 (data, val);
+ ret = data_to_int64_ptr(data, val);
err:
- if (data)
- data_unref (data);
- return ret;
+ if (data)
+ data_unref(data);
+ return ret;
}
-
int
-dict_set_int64 (dict_t *this, char *key, int64_t val)
+dict_set_int64(dict_t *this, char *key, int64_t val)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t *data = data_from_int64(val);
+ int ret = 0;
- data = data_from_int64 (val);
- if (!data) {
- ret = -EINVAL;
- goto err;
- }
+ if (!data) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_set (this, key, data);
+ ret = dict_set(this, key, data);
+ if (ret < 0)
+ data_destroy(data);
err:
- return ret;
+ return ret;
}
int
-dict_get_uint16 (dict_t *this, char *key, uint16_t *val)
+dict_get_uint16(dict_t *this, char *key, uint16_t *val)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t *data = NULL;
+ int ret = 0;
- if (!this || !key || !val) {
- ret = -EINVAL;
- goto err;
- }
+ if (!val) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_get_with_ref (this, key, &data);
- if (ret != 0) {
- goto err;
- }
+ ret = dict_get_with_ref(this, key, &data);
+ if (ret != 0) {
+ goto err;
+ }
+
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_UINT, key, -EINVAL);
- ret = _data_to_uint16 (data, val);
+ ret = data_to_uint16_ptr(data, val);
err:
- if (data)
- data_unref (data);
- return ret;
+ if (data)
+ data_unref(data);
+ return ret;
}
-
int
-dict_set_uint16 (dict_t *this, char *key, uint16_t val)
+dict_set_uint16(dict_t *this, char *key, uint16_t val)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t *data = data_from_uint16(val);
+ int ret = 0;
- data = data_from_uint16 (val);
- if (!data) {
- ret = -EINVAL;
- goto err;
- }
+ if (!data) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_set (this, key, data);
+ ret = dict_set(this, key, data);
+ if (ret < 0)
+ data_destroy(data);
err:
- return ret;
+ return ret;
}
int
-dict_get_uint32 (dict_t *this, char *key, uint32_t *val)
+dict_get_uint32(dict_t *this, char *key, uint32_t *val)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t *data = NULL;
+ int ret = 0;
- if (!this || !key || !val) {
- ret = -EINVAL;
- goto err;
- }
+ if (!val) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_get_with_ref (this, key, &data);
- if (ret != 0) {
- goto err;
- }
+ ret = dict_get_with_ref(this, key, &data);
+ if (ret != 0) {
+ goto err;
+ }
- ret = _data_to_uint32 (data, val);
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_UINT, key, -EINVAL);
+
+ ret = data_to_uint32_ptr(data, val);
err:
- if (data)
- data_unref (data);
- return ret;
+ if (data)
+ data_unref(data);
+ return ret;
}
-
-
int
-dict_set_uint32 (dict_t *this, char *key, uint32_t val)
+dict_set_uint32(dict_t *this, char *key, uint32_t val)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t *data = data_from_uint32(val);
+ int ret = 0;
- data = data_from_uint32 (val);
- if (!data) {
- ret = -EINVAL;
- goto err;
- }
+ if (!data) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_set (this, key, data);
+ ret = dict_set(this, key, data);
+ if (ret < 0)
+ data_destroy(data);
err:
- return ret;
+ return ret;
}
int
-dict_get_uint64 (dict_t *this, char *key, uint64_t *val)
+dict_get_uint64(dict_t *this, char *key, uint64_t *val)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t *data = NULL;
+ int ret = 0;
- if (!this || !key || !val) {
- ret = -EINVAL;
- goto err;
- }
+ if (!val) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_get_with_ref (this, key, &data);
- if (ret != 0) {
- goto err;
- }
+ ret = dict_get_with_ref(this, key, &data);
+ if (ret != 0) {
+ goto err;
+ }
+
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_UINT, key, -EINVAL);
- ret = _data_to_uint64 (data, val);
+ ret = data_to_uint64_ptr(data, val);
err:
- if (data)
- data_unref (data);
- return ret;
+ if (data)
+ data_unref(data);
+ return ret;
}
-
int
-dict_set_uint64 (dict_t *this, char *key, uint64_t val)
+dict_set_uint64(dict_t *this, char *key, uint64_t val)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t *data = data_from_uint64(val);
+ int ret = 0;
- data = data_from_uint64 (val);
- if (!data) {
- ret = -EINVAL;
- goto err;
- }
+ if (!data) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_set (this, key, data);
+ ret = dict_set(this, key, data);
+ if (ret < 0)
+ data_destroy(data);
err:
- return ret;
+ return ret;
}
+/*
+ * dict_check_flag can be used to check a one bit flag in an array of flags
+ * The flag argument indicates the bit position (within the array of bits).
+ * Currently limited to max of 256 flags for a key.
+ * return value,
+ * 1 : flag is set
+ * 0 : flag is not set
+ * <0: Error
+ */
int
-dict_get_double (dict_t *this, char *key, double *val)
+dict_check_flag(dict_t *this, char *key, int flag)
{
- data_t *data = NULL;
- int ret = 0;
+ data_t *data = NULL;
+ int ret = -ENOENT;
+
+ ret = dict_get_with_ref(this, key, &data);
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (BIT_VALUE((unsigned char *)(data->data), flag))
+ ret = 1;
+ else
+ ret = 0;
+
+ data_unref(data);
+ return ret;
+}
- if (!this || !key || !val) {
- ret = -EINVAL;
+/*
+ * _dict_modify_flag can be used to set/clear a bit flag in an array of flags
+ * flag: indicates the bit position. limited to max of DICT_MAX_FLAGS.
+ * op: Indicates operation DICT_FLAG_SET / DICT_FLAG_CLEAR
+ */
+static int
+_dict_modify_flag(dict_t *this, char *key, int flag, int op)
+{
+ data_t *data = NULL;
+ int ret = 0;
+ data_pair_t *pair = NULL;
+ char *ptr = NULL;
+ int hashval = 0;
+ uint32_t hash;
+
+ if (!this || !key) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "dict OR key (%s) is NULL", key);
+ ret = -EINVAL;
+ goto err;
+ }
+
+ /*
+ * Using a size of 32 bytes to support max of 256
+ * flags in a single key. This should be suffcient.
+ */
+ GF_ASSERT(flag >= 0 && flag < DICT_MAX_FLAGS);
+
+ hash = (uint32_t)XXH64(key, strlen(key), 0);
+ LOCK(&this->lock);
+ {
+ pair = dict_lookup_common(this, key, hash);
+
+ if (pair) {
+ data = pair->value;
+ if (op == DICT_FLAG_SET)
+ BIT_SET((unsigned char *)(data->data), flag);
+ else
+ BIT_CLEAR((unsigned char *)(data->data), flag);
+ } else {
+ ptr = GF_CALLOC(1, DICT_MAX_FLAGS / 8, gf_common_mt_char);
+ if (!ptr) {
+ gf_smsg("dict", GF_LOG_ERROR, ENOMEM, LG_MSG_NO_MEMORY,
+ "flag bit array", NULL);
+ ret = -ENOMEM;
goto err;
- }
+ }
+
+ data = data_from_dynptr(ptr, DICT_MAX_FLAGS / 8);
+
+ if (!data) {
+ gf_smsg("dict", GF_LOG_ERROR, ENOMEM, LG_MSG_NO_MEMORY, "data",
+ NULL);
+ GF_FREE(ptr);
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ if (op == DICT_FLAG_SET)
+ BIT_SET((unsigned char *)(data->data), flag);
+ else
+ BIT_CLEAR((unsigned char *)(data->data), flag);
- ret = dict_get_with_ref (this, key, &data);
- if (ret != 0) {
+ if (this->free_pair.key) { /* the free pair is in use */
+ pair = mem_get0(THIS->ctx->dict_pair_pool);
+ if (!pair) {
+ gf_smsg("dict", GF_LOG_ERROR, ENOMEM, LG_MSG_NO_MEMORY,
+ "dict pair", NULL);
+ ret = -ENOMEM;
+ goto err;
+ }
+ } else { /* use the free pair */
+ pair = &this->free_pair;
+ }
+
+ pair->key = (char *)GF_MALLOC(strlen(key) + 1, gf_common_mt_char);
+ if (!pair->key) {
+ gf_smsg("dict", GF_LOG_ERROR, ENOMEM, LG_MSG_NO_MEMORY,
+ "dict pair", NULL);
+ ret = -ENOMEM;
goto err;
+ }
+ strcpy(pair->key, key);
+ pair->key_hash = hash;
+ pair->value = data_ref(data);
+ this->totkvlen += (strlen(key) + 1 + data->len);
+ hashval = hash % this->hash_size;
+ pair->hash_next = this->members[hashval];
+ this->members[hashval] = pair;
+
+ pair->next = this->members_list;
+ pair->prev = NULL;
+ if (this->members_list)
+ this->members_list->prev = pair;
+ this->members_list = pair;
+ this->count++;
+
+ if (this->max_count < this->count)
+ this->max_count = this->count;
}
+ }
- ret = _data_to_double (data, val);
+ UNLOCK(&this->lock);
+ return 0;
err:
- if (data)
- data_unref (data);
- return ret;
+ if (key && this)
+ UNLOCK(&this->lock);
+
+ if (pair) {
+ if (pair->key) {
+ GF_FREE(pair->key);
+ pair->key = NULL;
+ }
+ if (pair != &this->free_pair) {
+ mem_put(pair);
+ }
+ }
+
+ if (data)
+ data_destroy(data);
+
+ gf_smsg("dict", GF_LOG_ERROR, EINVAL, LG_MSG_DICT_SET_FAILED, "key=%s", key,
+ NULL);
+
+ return ret;
}
+/*
+ * Todo:
+ * Add below primitives as needed:
+ * dict_check_flags(this, key, flag...): variadic function to check
+ * multiple flags at a time.
+ * dict_set_flags(this, key, flag...): set multiple flags
+ * dict_clear_flags(this, key, flag...): reset multiple flags
+ */
+
int
-dict_set_double (dict_t *this, char *key, double val)
+dict_set_flag(dict_t *this, char *key, int flag)
{
- data_t * data = NULL;
- int ret = 0;
+ return _dict_modify_flag(this, key, flag, DICT_FLAG_SET);
+}
- data = data_from_double (val);
- if (!data) {
- ret = -EINVAL;
- goto err;
- }
+int
+dict_clear_flag(dict_t *this, char *key, int flag)
+{
+ return _dict_modify_flag(this, key, flag, DICT_FLAG_CLEAR);
+}
+
+int
+dict_get_double(dict_t *this, char *key, double *val)
+{
+ data_t *data = NULL;
+ int ret = 0;
+
+ if (!val) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_set (this, key, data);
+ ret = dict_get_with_ref(this, key, &data);
+ if (ret != 0) {
+ goto err;
+ }
+
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_DOUBLE, key, -EINVAL);
+
+ ret = data_to_double_ptr(data, val);
err:
- return ret;
+ if (data)
+ data_unref(data);
+ return ret;
}
int
-dict_set_static_ptr (dict_t *this, char *key, void *ptr)
+dict_set_double(dict_t *this, char *key, double val)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t *data = data_from_double(val);
+ int ret = 0;
- data = data_from_static_ptr (ptr);
- if (!data) {
- ret = -EINVAL;
- goto err;
- }
+ if (!data) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_set (this, key, data);
+ ret = dict_set(this, key, data);
+ if (ret < 0)
+ data_destroy(data);
err:
- return ret;
+ return ret;
}
int
-dict_set_dynptr (dict_t *this, char *key, void *ptr, size_t len)
+dict_set_static_ptr(dict_t *this, char *key, void *ptr)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t *data = data_from_ptr_common(ptr, _gf_true);
+ int ret = 0;
- data = data_from_dynptr (ptr, len);
- if (!data) {
- ret = -EINVAL;
- goto err;
- }
+ if (!data) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_set (this, key, data);
+ ret = dict_set(this, key, data);
+ if (ret < 0)
+ data_destroy(data);
err:
- return ret;
+ return ret;
}
int
-dict_get_ptr (dict_t *this, char *key, void **ptr)
+dict_set_dynptr(dict_t *this, char *key, void *ptr, size_t len)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t *data = data_from_dynptr(ptr, len);
+ int ret = 0;
- if (!this || !key || !ptr) {
- ret = -EINVAL;
- goto err;
- }
+ if (!data) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_get_with_ref (this, key, &data);
- if (ret != 0) {
- goto err;
- }
+ ret = dict_set(this, key, data);
+ if (ret < 0)
+ data_destroy(data);
- ret = _data_to_ptr (data, ptr);
- if (ret != 0) {
- goto err;
- }
+err:
+ return ret;
+}
+
+int
+dict_get_ptr(dict_t *this, char *key, void **ptr)
+{
+ data_t *data = NULL;
+ int ret = 0;
+
+ if (!ptr) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ ret = dict_get_with_ref(this, key, &data);
+ if (ret != 0) {
+ goto err;
+ }
+
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_PTR, key, -EINVAL);
+
+ ret = data_to_ptr_common(data, ptr);
+ if (ret != 0) {
+ goto err;
+ }
err:
- if (data)
- data_unref (data);
+ if (data)
+ data_unref(data);
- return ret;
+ return ret;
}
int
-dict_get_ptr_and_len (dict_t *this, char *key, void **ptr, int *len)
+dict_get_ptr_and_len(dict_t *this, char *key, void **ptr, int *len)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t *data = NULL;
+ int ret = 0;
- if (!this || !key || !ptr) {
- ret = -EINVAL;
- goto err;
- }
+ if (!ptr) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_get_with_ref (this, key, &data);
- if (ret != 0) {
- goto err;
- }
+ ret = dict_get_with_ref(this, key, &data);
+ if (ret != 0) {
+ goto err;
+ }
- *len = data->len;
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_PTR, key, -EINVAL);
- ret = _data_to_ptr (data, ptr);
- if (ret != 0) {
- goto err;
- }
+ *len = data->len;
+
+ ret = data_to_ptr_common(data, ptr);
+ if (ret != 0) {
+ goto err;
+ }
err:
- if (data)
- data_unref (data);
+ if (data)
+ data_unref(data);
- return ret;
+ return ret;
}
+/* Get string - with known key length */
int
-dict_set_ptr (dict_t *this, char *key, void *ptr)
+dict_get_strn(dict_t *this, char *key, const int keylen, char **str)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t *data = NULL;
+ int ret = -EINVAL;
- data = data_from_ptr (ptr);
- if (!data) {
- ret = -EINVAL;
- goto err;
- }
+ if (!this || !key || !str) {
+ goto err;
+ }
+ ret = dict_get_with_refn(this, key, keylen, &data);
+ if (ret < 0) {
+ goto err;
+ }
- ret = dict_set (this, key, data);
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_STR, key, -EINVAL);
+
+ *str = data->data;
err:
- return ret;
-}
+ if (data)
+ data_unref(data);
+ return ret;
+}
int
-dict_get_str (dict_t *this, char *key, char **str)
+dict_get_str(dict_t *this, char *key, char **str)
{
- data_t * data = NULL;
- int ret = -EINVAL;
+ data_t *data = NULL;
+ int ret = -EINVAL;
- if (!this || !key || !str) {
- goto err;
- }
+ if (!str) {
+ goto err;
+ }
+ ret = dict_get_with_ref(this, key, &data);
+ if (ret < 0) {
+ goto err;
+ }
- ret = dict_get_with_ref (this, key, &data);
- if (ret < 0) {
- goto err;
- }
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_STR, key, -EINVAL);
- if (!data || !data->data) {
- goto err;
- }
- *str = data->data;
+ *str = data->data;
err:
- if (data)
- data_unref (data);
+ if (data)
+ data_unref(data);
- return ret;
+ return ret;
}
int
-dict_set_str (dict_t *this, char *key, char *str)
+dict_set_str(dict_t *this, char *key, char *str)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t *data = str_to_data(str);
+ int ret = 0;
- data = str_to_data (str);
- if (!data) {
- ret = -EINVAL;
- goto err;
- }
+ if (!data) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_set (this, key, data);
+ ret = dict_set(this, key, data);
+ if (ret < 0)
+ data_destroy(data);
err:
- return ret;
+ return ret;
}
+/* Set string - with known key length */
int
-dict_set_dynstr (dict_t *this, char *key, char *str)
+dict_set_strn(dict_t *this, char *key, const int keylen, char *str)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t *data = NULL;
+ int ret = 0;
- data = data_from_dynstr (str);
- if (!data) {
- ret = -EINVAL;
- goto err;
- }
+ data = str_to_data(str);
+ if (!data) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_set (this, key, data);
+ ret = dict_setn(this, key, keylen, data);
+ if (ret < 0)
+ data_destroy(data);
err:
- return ret;
+ return ret;
}
-/*
- for malloced strings we should do a free instead of GF_FREE
-*/
+/* Set string - with known key length and known value length */
int
-dict_set_dynmstr (dict_t *this, char *key, char *str)
+dict_set_nstrn(dict_t *this, char *key, const int keylen, char *str,
+ const int vallen)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t *data = strn_to_data(str, vallen);
+ int ret = 0;
- data = data_from_dynmstr (str);
- if (!data) {
- ret = -EINVAL;
- goto err;
- }
+ if (!data) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_set (this, key, data);
+ ret = dict_setn(this, key, keylen, data);
+ if (ret < 0)
+ data_destroy(data);
err:
- return ret;
+ return ret;
}
+int
+dict_set_dynstr_with_alloc(dict_t *this, char *key, const char *str)
+{
+ char *alloc_str = gf_strdup(str);
+ int ret = -1;
+
+ if (!alloc_str)
+ return ret;
+
+ ret = dict_set_dynstr(this, key, alloc_str);
+ if (ret == -EINVAL)
+ GF_FREE(alloc_str);
+
+ return ret;
+}
int
-dict_get_bin (dict_t *this, char *key, void **bin)
+dict_set_dynstr(dict_t *this, char *key, char *str)
{
- data_t * data = NULL;
- int ret = -EINVAL;
+ const int keylen = strlen(key);
+ return dict_set_dynstrn(this, key, keylen, str);
+}
- if (!this || !key || !bin) {
- goto err;
- }
+int
+dict_set_dynstrn(dict_t *this, char *key, const int keylen, char *str)
+{
+ data_t *data = data_from_dynstr(str);
+ int ret = 0;
- ret = dict_get_with_ref (this, key, &data);
- if (ret < 0) {
- goto err;
- }
+ if (!data) {
+ ret = -EINVAL;
+ goto err;
+ }
- if (!data || !data->data) {
- goto err;
- }
- *bin = data->data;
+ ret = dict_setn(this, key, keylen, data);
+ if (ret < 0)
+ data_destroy(data);
err:
- if (data)
- data_unref (data);
+ return ret;
+}
- return ret;
+/* This function is called only by the volgen for now.
+ Check how else you can handle it */
+int
+dict_set_option(dict_t *this, char *key, char *str)
+{
+ data_t *data = data_from_dynstr(str);
+ int ret = 0;
+
+ if (!data) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ data->data_type = GF_DATA_TYPE_STR_OLD;
+ ret = dict_set(this, key, data);
+ if (ret < 0)
+ data_destroy(data);
+err:
+ return ret;
}
+int
+dict_add_dynstr_with_alloc(dict_t *this, char *key, char *str)
+{
+ data_t *data = NULL;
+ int ret = 0;
+ char *alloc_str = gf_strdup(str);
+
+ if (!alloc_str)
+ goto out;
+
+ data = data_from_dynstr(alloc_str);
+ if (!data) {
+ GF_FREE(alloc_str);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = dict_add(this, key, data);
+ if (ret < 0)
+ data_destroy(data);
+
+out:
+ return ret;
+}
int
-dict_set_bin (dict_t *this, char *key, void *ptr, size_t size)
+dict_get_bin(dict_t *this, char *key, void **bin)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t *data = NULL;
+ int ret = -EINVAL;
- if (!ptr || (size < 0)) {
- ret = -EINVAL;
- goto err;
- }
+ if (!bin) {
+ goto err;
+ }
- data = bin_to_data (ptr, size);
- if (!data) {
- ret = -EINVAL;
- goto err;
- }
+ ret = dict_get_with_ref(this, key, &data);
+ if (ret < 0) {
+ goto err;
+ }
- data->data = ptr;
- data->len = size;
- data->is_static = 0;
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_PTR, key, ret);
- ret = dict_set (this, key, data);
+ *bin = data->data;
err:
- return ret;
+ if (data)
+ data_unref(data);
+
+ return ret;
}
+/********************************************************************
+ *
+ * dict_set_bin_common:
+ * This is the common function to set key and its value in
+ * dictionary. Flag(is_static) should be set appropriately based
+ * on the type of memory type used for value(*ptr). If flag is set
+ * to false value(*ptr) will be freed using GF_FREE() on destroy.
+ *
+ *******************************************************************/
+static int
+dict_set_bin_common(dict_t *this, char *key, void *ptr, size_t size,
+ gf_boolean_t is_static, gf_dict_data_type_t type)
+{
+ data_t *data = NULL;
+ int ret = 0;
+ if (!ptr || (size > DICT_KEY_VALUE_MAX_SIZE)) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ data = bin_to_data(ptr, size);
+ if (!data) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ data->is_static = is_static;
+ data->data_type = type;
+
+ ret = dict_set(this, key, data);
+ if (ret < 0) {
+ /* don't free data->data, let callers handle it */
+ data->data = NULL;
+ data_destroy(data);
+ }
+
+err:
+ return ret;
+}
+
+/********************************************************************
+ *
+ * dict_set_bin:
+ * Set key and its value in the dictionary. This function should
+ * be called if the value is stored in dynamic memory.
+ *
+ *******************************************************************/
int
-dict_set_static_bin (dict_t *this, char *key, void *ptr, size_t size)
+dict_set_bin(dict_t *this, char *key, void *ptr, size_t size)
{
- data_t * data = NULL;
- int ret = 0;
+ return dict_set_bin_common(this, key, ptr, size, _gf_false,
+ GF_DATA_TYPE_PTR);
+}
- if (!ptr || (size < 0)) {
- ret = -EINVAL;
- goto err;
- }
+/********************************************************************
+ *
+ * dict_set_static_bin:
+ * Set key and its value in the dictionary. This function should
+ * be called if the value is stored in static memory.
+ *
+ *******************************************************************/
+int
+dict_set_static_bin(dict_t *this, char *key, void *ptr, size_t size)
+{
+ return dict_set_bin_common(this, key, ptr, size, _gf_true,
+ GF_DATA_TYPE_PTR);
+}
- data = bin_to_data (ptr, size);
- if (!data) {
- ret = -EINVAL;
- goto err;
- }
+/* */
+int
+dict_set_gfuuid(dict_t *this, char *key, uuid_t gfid, bool is_static)
+{
+ return dict_set_bin_common(this, key, gfid, sizeof(uuid_t), is_static,
+ GF_DATA_TYPE_GFUUID);
+}
+
+int
+dict_get_gfuuid(dict_t *this, char *key, uuid_t *gfid)
+{
+ data_t *data = NULL;
+ int ret = -EINVAL;
- data->data = ptr;
- data->len = size;
- data->is_static = 1;
+ if (!gfid) {
+ goto err;
+ }
+ ret = dict_get_with_ref(this, key, &data);
+ if (ret < 0) {
+ goto err;
+ }
- ret = dict_set (this, key, data);
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_GFUUID, key, -EINVAL);
+
+ memcpy(*gfid, data->data, min(data->len, sizeof(uuid_t)));
err:
- return ret;
+ if (data)
+ data_unref(data);
+
+ return ret;
+}
+
+int
+dict_set_mdata(dict_t *this, char *key, struct mdata_iatt *mdata,
+ bool is_static)
+{
+ return dict_set_bin_common(this, key, mdata, sizeof(struct mdata_iatt),
+ is_static, GF_DATA_TYPE_MDATA);
}
+int
+dict_get_mdata(dict_t *this, char *key, struct mdata_iatt *mdata)
+{
+ data_t *data = NULL;
+ int ret = -EINVAL;
+
+ if (!mdata) {
+ goto err;
+ }
+ ret = dict_get_with_ref(this, key, &data);
+ if (ret < 0) {
+ goto err;
+ }
+
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_MDATA, key, -EINVAL);
+ if (data->len < sizeof(struct mdata_iatt)) {
+ gf_smsg("glusterfs", GF_LOG_ERROR, ENOBUFS, LG_MSG_UNDERSIZED_BUF,
+ "key=%s", key, NULL);
+ ret = -ENOBUFS;
+ goto err;
+ }
+
+ memcpy(mdata, data->data, min(data->len, sizeof(struct mdata_iatt)));
+
+err:
+ if (data)
+ data_unref(data);
+
+ return ret;
+}
+
+int
+dict_set_iatt(dict_t *this, char *key, struct iatt *iatt, bool is_static)
+{
+ return dict_set_bin_common(this, key, iatt, sizeof(struct iatt), is_static,
+ GF_DATA_TYPE_IATT);
+}
+
+int
+dict_get_iatt(dict_t *this, char *key, struct iatt *iatt)
+{
+ data_t *data = NULL;
+ int ret = -EINVAL;
+
+ if (!iatt) {
+ goto err;
+ }
+ ret = dict_get_with_ref(this, key, &data);
+ if (ret < 0) {
+ goto err;
+ }
+
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_IATT, key, -EINVAL);
+
+ memcpy(iatt, data->data, min(data->len, sizeof(struct iatt)));
+
+err:
+ if (data)
+ data_unref(data);
+
+ return ret;
+}
/**
* dict_get_str_boolean - get a boolean value based on string representation.
@@ -2042,41 +2806,77 @@ err:
*/
int
-dict_get_str_boolean (dict_t *this, char *key, int default_val)
+dict_get_str_boolean(dict_t *this, char *key, int default_val)
{
- data_t *data = NULL;
- gf_boolean_t boo = _gf_false;
- int ret = 0;
+ data_t *data = NULL;
+ gf_boolean_t boo = _gf_false;
+ int ret = 0;
- ret = dict_get_with_ref (this, key, &data);
- if (ret < 0) {
- if (ret == -ENOENT)
- ret = default_val;
- else
- ret = -1;
- goto err;
- }
-
- GF_ASSERT (data);
+ ret = dict_get_with_ref(this, key, &data);
+ if (ret < 0) {
+ if (ret == -ENOENT)
+ ret = default_val;
+ else
+ ret = -1;
+ goto err;
+ }
- if (!data->data) {
- ret = -1;
- goto err;
- }
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_INT, key, -EINVAL);
- ret = gf_string2boolean (data->data, &boo);
- if (ret == -1)
- goto err;
+ ret = gf_strn2boolean(data->data, data->len - 1, &boo);
+ if (ret == -1)
+ goto err;
- ret = boo;
+ ret = boo;
err:
- if (data)
- data_unref (data);
+ if (data)
+ data_unref(data);
- return ret;
+ return ret;
}
+int
+dict_rename_key(dict_t *this, char *key, char *replace_key)
+{
+ data_pair_t *pair = NULL;
+ int ret = -EINVAL;
+ uint32_t hash;
+ uint32_t replacekey_hash;
+ int replacekey_len;
+
+ /* replacing a key by itself is a NO-OP */
+ if (strcmp(key, replace_key) == 0)
+ return 0;
+
+ if (!this) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "dict is NULL");
+ return ret;
+ }
+
+ hash = (uint32_t)XXH64(key, strlen(key), 0);
+ replacekey_len = strlen(replace_key);
+ replacekey_hash = (uint32_t)XXH64(replace_key, replacekey_len, 0);
+
+ LOCK(&this->lock);
+ {
+ /* no need to data_ref(pair->value), dict_set_lk() does it */
+ pair = dict_lookup_common(this, key, hash);
+ if (!pair)
+ ret = -ENODATA;
+ else
+ ret = dict_set_lk(this, replace_key, replacekey_len, pair->value,
+ replacekey_hash, 1);
+ }
+ UNLOCK(&this->lock);
+
+ if (!ret)
+ /* only delete the key on success */
+ dict_del(this, key);
+
+ return ret;
+}
/**
* Serialization format:
@@ -2086,13 +2886,9 @@ err:
* 4 4 4 <key len> <value len>
*/
-#define DICT_HDR_LEN 4
-#define DICT_DATA_HDR_KEY_LEN 4
-#define DICT_DATA_HDR_VAL_LEN 4
-
/**
- * _dict_serialized_length - return the length of serialized dict. This
- * procedure has to be called with this->lock held.
+ * dict_serialized_length_lk - return the length of serialized dict. This
+ * procedure has to be called with this->lock held.
*
* @this : dict to be serialized
* @return: success: len
@@ -2100,155 +2896,103 @@ err:
*/
int
-_dict_serialized_length (dict_t *this)
+dict_serialized_length_lk(dict_t *this)
{
- int ret = -EINVAL;
- int count = 0;
- int len = 0;
- data_pair_t * pair = NULL;
+ int ret = -EINVAL;
+ int count = this->count;
+ const int keyhdrlen = DICT_DATA_HDR_KEY_LEN + DICT_DATA_HDR_VAL_LEN;
- len = DICT_HDR_LEN;
- count = this->count;
+ if (count < 0) {
+ gf_smsg("dict", GF_LOG_ERROR, EINVAL, LG_MSG_COUNT_LESS_THAN_ZERO,
+ "count=%d", count, NULL);
+ goto out;
+ }
- if (count < 0) {
- gf_log ("dict", GF_LOG_ERROR, "count (%d) < 0!", count);
- goto out;
- }
-
- pair = this->members_list;
-
- while (count) {
- if (!pair) {
- gf_log ("dict", GF_LOG_ERROR,
- "less than count data pairs found!");
- goto out;
- }
-
- len += DICT_DATA_HDR_KEY_LEN + DICT_DATA_HDR_VAL_LEN;
-
- if (!pair->key) {
- gf_log ("dict", GF_LOG_ERROR, "pair->key is null!");
- goto out;
- }
-
- len += strlen (pair->key) + 1 /* for '\0' */;
-
- if (!pair->value) {
- gf_log ("dict", GF_LOG_ERROR,
- "pair->value is null!");
- goto out;
- }
-
- if (pair->value->len < 0) {
- gf_log ("dict", GF_LOG_ERROR,
- "value->len (%d) < 0",
- pair->value->len);
- goto out;
- }
-
- len += pair->value->len;
-
- pair = pair->next;
- count--;
- }
-
- ret = len;
+ ret = DICT_HDR_LEN + this->totkvlen + (count * keyhdrlen);
out:
- return ret;
+ return ret;
}
/**
- * _dict_serialize - serialize a dictionary into a buffer. This procedure has
- * to be called with this->lock held.
+ * dict_serialize_lk - serialize a dictionary into a buffer. This procedure has
+ * to be called with this->lock held.
*
* @this: dict to serialize
* @buf: buffer to serialize into. This must be
- * atleast dict_serialized_length (this) large
+ * at least dict_serialized_length (this) large
*
* @return: success: 0
* failure: -errno
*/
-int
-_dict_serialize (dict_t *this, char *buf)
-{
- int ret = -1;
- data_pair_t * pair = NULL;
- int32_t count = 0;
- int32_t keylen = 0;
- int32_t vallen = 0;
- int32_t netword = 0;
-
-
- if (!buf) {
- gf_log ("dict", GF_LOG_ERROR,
- "buf is null!");
- goto out;
+static int
+dict_serialize_lk(dict_t *this, char *buf)
+{
+ int ret = -1;
+ data_pair_t *pair = this->members_list;
+ int32_t count = this->count;
+ int32_t keylen = 0;
+ int32_t netword = 0;
+
+ if (!buf) {
+ gf_smsg("dict", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG, NULL);
+ goto out;
+ }
+
+ if (count < 0) {
+ gf_smsg("dict", GF_LOG_ERROR, 0, LG_MSG_COUNT_LESS_THAN_ZERO,
+ "count=%d", count, NULL);
+ goto out;
+ }
+
+ netword = hton32(count);
+ memcpy(buf, &netword, sizeof(netword));
+ buf += DICT_HDR_LEN;
+
+ while (count) {
+ if (!pair) {
+ gf_smsg("dict", GF_LOG_ERROR, 0, LG_MSG_PAIRS_LESS_THAN_COUNT,
+ NULL);
+ goto out;
}
-
- count = this->count;
- if (count < 0) {
- gf_log ("dict", GF_LOG_ERROR, "count (%d) < 0!", count);
- goto out;
+ if (!pair->key) {
+ gf_smsg("dict", GF_LOG_ERROR, 0, LG_MSG_NULL_PTR, NULL);
+ goto out;
}
- netword = hton32 (count);
- memcpy (buf, &netword, sizeof(netword));
- buf += DICT_HDR_LEN;
- pair = this->members_list;
-
- while (count) {
- if (!pair) {
- gf_log ("dict", GF_LOG_ERROR,
- "less than count data pairs found!");
- goto out;
- }
-
- if (!pair->key) {
- gf_log ("dict", GF_LOG_ERROR,
- "pair->key is null!");
- goto out;
- }
-
- keylen = strlen (pair->key);
- netword = hton32 (keylen);
- memcpy (buf, &netword, sizeof(netword));
- buf += DICT_DATA_HDR_KEY_LEN;
+ keylen = strlen(pair->key);
+ netword = hton32(keylen);
+ memcpy(buf, &netword, sizeof(netword));
+ buf += DICT_DATA_HDR_KEY_LEN;
- if (!pair->value) {
- gf_log ("dict", GF_LOG_ERROR,
- "pair->value is null!");
- goto out;
- }
-
- vallen = pair->value->len;
- netword = hton32 (vallen);
- memcpy (buf, &netword, sizeof(netword));
- buf += DICT_DATA_HDR_VAL_LEN;
+ if (!pair->value) {
+ gf_smsg("dict", GF_LOG_ERROR, 0, LG_MSG_NULL_PTR, NULL);
+ goto out;
+ }
- memcpy (buf, pair->key, keylen);
- buf += keylen;
- *buf++ = '\0';
+ netword = hton32(pair->value->len);
+ memcpy(buf, &netword, sizeof(netword));
+ buf += DICT_DATA_HDR_VAL_LEN;
- if (!pair->value->data) {
- gf_log ("dict", GF_LOG_ERROR,
- "pair->value->data is null!");
- goto out;
- }
- memcpy (buf, pair->value->data, vallen);
- buf += vallen;
+ memcpy(buf, pair->key, keylen);
+ buf += keylen;
+ *buf++ = '\0';
- pair = pair->next;
- count--;
+ if (pair->value->data) {
+ memcpy(buf, pair->value->data, pair->value->len);
+ buf += pair->value->len;
}
- ret = 0;
+ pair = pair->next;
+ count--;
+ }
+
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
/**
* dict_serialized_length - return the length of serialized dict
*
@@ -2258,23 +3002,24 @@ out:
*/
int
-dict_serialized_length (dict_t *this)
+dict_serialized_length(dict_t *this)
{
- int ret = -EINVAL;
+ int ret = -EINVAL;
- if (!this) {
- gf_log_callingfn ("dict", GF_LOG_WARNING, "dict is null!");
- goto out;
- }
+ if (!this) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "dict is null!");
+ goto out;
+ }
- LOCK (&this->lock);
- {
- ret = _dict_serialized_length (this);
- }
- UNLOCK (&this->lock);
+ LOCK(&this->lock);
+ {
+ ret = dict_serialized_length_lk(this);
+ }
+ UNLOCK(&this->lock);
out:
- return ret;
+ return ret;
}
/**
@@ -2282,32 +3027,32 @@ out:
*
* @this: dict to serialize
* @buf: buffer to serialize into. This must be
- * atleast dict_serialized_length (this) large
+ * at least dict_serialized_length (this) large
*
* @return: success: 0
* failure: -errno
*/
int
-dict_serialize (dict_t *this, char *buf)
+dict_serialize(dict_t *this, char *buf)
{
- int ret = -1;
+ int ret = -1;
- if (!this || !buf) {
- gf_log_callingfn ("dict", GF_LOG_WARNING, "dict is null!");
- goto out;
- }
+ if (!this || !buf) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "dict is null!");
+ goto out;
+ }
- LOCK (&this->lock);
- {
- ret = _dict_serialize (this, buf);
- }
- UNLOCK (&this->lock);
+ LOCK(&this->lock);
+ {
+ ret = dict_serialize_lk(this, buf);
+ }
+ UNLOCK(&this->lock);
out:
- return ret;
+ return ret;
}
-
/**
* dict_unserialize - unserialize a buffer into a dict
*
@@ -2320,124 +3065,137 @@ out:
*/
int32_t
-dict_unserialize (char *orig_buf, int32_t size, dict_t **fill)
-{
- char *buf = NULL;
- int ret = -1;
- int32_t count = 0;
- int i = 0;
-
- data_t * value = NULL;
- char * key = NULL;
- int32_t keylen = 0;
- int32_t vallen = 0;
- int32_t hostord = 0;
-
- buf = orig_buf;
+dict_unserialize(char *orig_buf, int32_t size, dict_t **fill)
+{
+ char *buf = orig_buf;
+ int ret = -1;
+ int32_t count = 0;
+ int i = 0;
+
+ data_t *value = NULL;
+ char *key = NULL;
+ int32_t keylen = 0;
+ int32_t vallen = 0;
+ int32_t hostord = 0;
+
+ if (!buf) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "buf is null!");
+ goto out;
+ }
+
+ if (size == 0) {
+ gf_msg_callingfn("dict", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "size is 0!");
+ goto out;
+ }
+
+ if (!fill) {
+ gf_msg_callingfn("dict", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "fill is null!");
+ goto out;
+ }
+
+ if (!*fill) {
+ gf_msg_callingfn("dict", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "*fill is null!");
+ goto out;
+ }
+
+ if ((buf + DICT_HDR_LEN) > (orig_buf + size)) {
+ gf_msg_callingfn("dict", GF_LOG_ERROR, 0, LG_MSG_UNDERSIZED_BUF,
+ "undersized buffer "
+ "passed. available (%lu) < required (%lu)",
+ (long)(orig_buf + size), (long)(buf + DICT_HDR_LEN));
+ goto out;
+ }
+
+ memcpy(&hostord, buf, sizeof(hostord));
+ count = ntoh32(hostord);
+ buf += DICT_HDR_LEN;
+
+ if (count < 0) {
+ gf_smsg("dict", GF_LOG_ERROR, 0, LG_MSG_COUNT_LESS_THAN_ZERO,
+ "count=%d", count, NULL);
+ goto out;
+ }
+
+ /* count will be set by the dict_set's below */
+ (*fill)->count = 0;
+
+ for (i = 0; i < count; i++) {
+ if ((buf + DICT_DATA_HDR_KEY_LEN) > (orig_buf + size)) {
+ gf_msg_callingfn("dict", GF_LOG_ERROR, 0, LG_MSG_UNDERSIZED_BUF,
+ "undersized "
+ "buffer passed. available (%lu) < "
+ "required (%lu)",
+ (long)(orig_buf + size),
+ (long)(buf + DICT_DATA_HDR_KEY_LEN));
+ goto out;
+ }
+ memcpy(&hostord, buf, sizeof(hostord));
+ keylen = ntoh32(hostord);
+ buf += DICT_DATA_HDR_KEY_LEN;
+
+ if ((buf + DICT_DATA_HDR_VAL_LEN) > (orig_buf + size)) {
+ gf_msg_callingfn("dict", GF_LOG_ERROR, 0, LG_MSG_UNDERSIZED_BUF,
+ "undersized "
+ "buffer passed. available (%lu) < "
+ "required (%lu)",
+ (long)(orig_buf + size),
+ (long)(buf + DICT_DATA_HDR_VAL_LEN));
+ goto out;
+ }
+ memcpy(&hostord, buf, sizeof(hostord));
+ vallen = ntoh32(hostord);
+ buf += DICT_DATA_HDR_VAL_LEN;
+
+ if ((keylen < 0) || (vallen < 0)) {
+ gf_msg_callingfn("dict", GF_LOG_ERROR, 0, LG_MSG_UNDERSIZED_BUF,
+ "undersized length passed "
+ "key:%d val:%d",
+ keylen, vallen);
+ goto out;
+ }
+ if ((buf + keylen) > (orig_buf + size)) {
+ gf_msg_callingfn("dict", GF_LOG_ERROR, 0, LG_MSG_UNDERSIZED_BUF,
+ "undersized buffer passed. "
+ "available (%lu) < required (%lu)",
+ (long)(orig_buf + size), (long)(buf + keylen));
+ goto out;
+ }
+ key = buf;
+ buf += keylen + 1; /* for '\0' */
+
+ if ((buf + vallen) > (orig_buf + size)) {
+ gf_msg_callingfn("dict", GF_LOG_ERROR, 0, LG_MSG_UNDERSIZED_BUF,
+ "undersized buffer passed. "
+ "available (%lu) < required (%lu)",
+ (long)(orig_buf + size), (long)(buf + vallen));
+ goto out;
+ }
+ value = get_new_data();
- if (!buf) {
- gf_log_callingfn ("dict", GF_LOG_WARNING, "buf is null!");
- goto out;
- }
-
- if (size == 0) {
- gf_log_callingfn ("dict", GF_LOG_ERROR,
- "size is 0!");
- goto out;
- }
-
- if (!fill) {
- gf_log_callingfn ("dict", GF_LOG_ERROR,
- "fill is null!");
- goto out;
- }
-
- if (!*fill) {
- gf_log_callingfn ("dict", GF_LOG_ERROR,
- "*fill is null!");
- goto out;
- }
-
- if ((buf + DICT_HDR_LEN) > (orig_buf + size)) {
- gf_log_callingfn ("dict", GF_LOG_ERROR,
- "undersized buffer passed. "
- "available (%lu) < required (%lu)",
- (long)(orig_buf + size),
- (long)(buf + DICT_HDR_LEN));
- goto out;
+ if (!value) {
+ ret = -1;
+ goto out;
}
+ value->len = vallen;
+ value->data = gf_memdup(buf, vallen);
+ value->data_type = GF_DATA_TYPE_STR_OLD;
+ value->is_static = _gf_false;
+ buf += vallen;
- memcpy (&hostord, buf, sizeof(hostord));
- count = ntoh32 (hostord);
- buf += DICT_HDR_LEN;
-
- if (count < 0) {
- gf_log ("dict", GF_LOG_ERROR,
- "count (%d) <= 0", count);
- goto out;
- }
+ ret = dict_addn(*fill, key, keylen, value);
+ if (ret < 0)
+ goto out;
+ }
- /* count will be set by the dict_set's below */
- (*fill)->count = 0;
-
- for (i = 0; i < count; i++) {
- if ((buf + DICT_DATA_HDR_KEY_LEN) > (orig_buf + size)) {
- gf_log_callingfn ("dict", GF_LOG_ERROR,
- "undersized buffer passed. "
- "available (%lu) < required (%lu)",
- (long)(orig_buf + size),
- (long)(buf + DICT_DATA_HDR_KEY_LEN));
- goto out;
- }
- memcpy (&hostord, buf, sizeof(hostord));
- keylen = ntoh32 (hostord);
- buf += DICT_DATA_HDR_KEY_LEN;
-
- if ((buf + DICT_DATA_HDR_VAL_LEN) > (orig_buf + size)) {
- gf_log_callingfn ("dict", GF_LOG_ERROR,
- "undersized buffer passed. "
- "available (%lu) < required (%lu)",
- (long)(orig_buf + size),
- (long)(buf + DICT_DATA_HDR_VAL_LEN));
- goto out;
- }
- memcpy (&hostord, buf, sizeof(hostord));
- vallen = ntoh32 (hostord);
- buf += DICT_DATA_HDR_VAL_LEN;
-
- if ((buf + keylen) > (orig_buf + size)) {
- gf_log_callingfn ("dict", GF_LOG_ERROR,
- "undersized buffer passed. "
- "available (%lu) < required (%lu)",
- (long)(orig_buf + size),
- (long)(buf + keylen));
- goto out;
- }
- key = buf;
- buf += keylen + 1; /* for '\0' */
-
- if ((buf + vallen) > (orig_buf + size)) {
- gf_log_callingfn ("dict", GF_LOG_ERROR,
- "undersized buffer passed. "
- "available (%lu) < required (%lu)",
- (long)(orig_buf + size),
- (long)(buf + vallen));
- }
- value = get_new_data ();
- value->len = vallen;
- value->data = memdup (buf, vallen);
- value->is_static = 0;
- buf += vallen;
-
- dict_set (*fill, key, value);
- }
-
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
/**
* dict_allocate_and_serialize - serialize a dictionary into an allocated buffer
*
@@ -2450,50 +3208,49 @@ out:
*/
int32_t
-dict_allocate_and_serialize (dict_t *this, char **buf, size_t *length)
+dict_allocate_and_serialize(dict_t *this, char **buf, u_int *length)
{
- int ret = -EINVAL;
- ssize_t len = 0;
+ int ret = -EINVAL;
+ ssize_t len = 0;
- if (!this || !buf) {
- gf_log_callingfn ("dict", GF_LOG_DEBUG,
- "dict OR buf is NULL");
- goto out;
- }
+ if (!this || !buf) {
+ gf_msg_debug("dict", 0, "dict OR buf is NULL");
+ goto out;
+ }
- LOCK (&this->lock);
- {
- len = _dict_serialized_length (this);
- if (len < 0) {
- ret = len;
- goto unlock;
- }
+ LOCK(&this->lock);
+ {
+ len = dict_serialized_length_lk(this);
+ if (len < 0) {
+ ret = len;
+ goto unlock;
+ }
- *buf = GF_CALLOC (1, len, gf_common_mt_char);
- if (*buf == NULL) {
- ret = -ENOMEM;
- goto unlock;
- }
+ *buf = GF_MALLOC(len, gf_common_mt_char);
+ if (*buf == NULL) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
- ret = _dict_serialize (this, *buf);
- if (ret < 0) {
- GF_FREE (*buf);
- *buf = NULL;
- goto unlock;
- }
+ ret = dict_serialize_lk(this, *buf);
+ if (ret < 0) {
+ GF_FREE(*buf);
+ *buf = NULL;
+ goto unlock;
+ }
- if (length != NULL) {
- *length = len;
- }
+ if (length != NULL) {
+ *length = len;
}
+ }
unlock:
- UNLOCK (&this->lock);
+ UNLOCK(&this->lock);
out:
- return ret;
+ return ret;
}
/**
- * _dict_serialize_value_with_delim: serialize the values in the dictionary
+ * dict_serialize_value_with_delim_lk: serialize the values in the dictionary
* into a buffer separated by delimiter (except the last)
*
* @this : dictionary to serialize
@@ -2502,125 +3259,231 @@ out:
* @delimiter : the delimiter to separate the values
*
* @return : 0 -> success
- * : -errno -> faliure
+ * : -errno -> failure
*/
int
-_dict_serialize_value_with_delim (dict_t *this, char *buf, int32_t *serz_len,
- char delimiter)
-{
- int ret = -1;
- int32_t count = 0;
- int32_t vallen = 0;
- int32_t total_len = 0;
- data_pair_t *pair = NULL;
-
- if (!buf) {
- gf_log ("dict", GF_LOG_ERROR, "buf is null");
- goto out;
+dict_serialize_value_with_delim_lk(dict_t *this, char *buf, int32_t *serz_len,
+ char delimiter)
+{
+ int ret = -1;
+ int32_t count = this->count;
+ int32_t vallen = 0;
+ int32_t total_len = 0;
+ data_pair_t *pair = this->members_list;
+
+ if (!buf) {
+ gf_smsg("dict", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG, NULL);
+ goto out;
+ }
+
+ if (count < 0) {
+ gf_smsg("dict", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG, "count=%d",
+ count, NULL);
+ goto out;
+ }
+
+ while (count) {
+ if (!pair) {
+ gf_smsg("dict", GF_LOG_ERROR, 0, LG_MSG_PAIRS_LESS_THAN_COUNT,
+ NULL);
+ goto out;
}
- count = this->count;
- if (count < 0) {
- gf_log ("dict", GF_LOG_ERROR, "count (%d) < 0", count);
- goto out;
+ if (!pair->key || !pair->value) {
+ gf_smsg("dict", GF_LOG_ERROR, 0, LG_MSG_KEY_OR_VALUE_NULL, NULL);
+ goto out;
}
- pair = this->members_list;
+ if (!pair->value->data) {
+ gf_smsg("dict", GF_LOG_ERROR, 0, LG_MSG_NULL_VALUE_IN_DICT, NULL);
+ goto out;
+ }
- while (count) {
- if (!pair) {
- gf_log ("dict", GF_LOG_ERROR,
- "less than count data pairs found");
- goto out;
- }
+ vallen = pair->value->len - 1; // length includes \0
+ memcpy(buf, pair->value->data, vallen);
+ buf += vallen;
+ *buf++ = delimiter;
- if (!pair->key || !pair->value) {
- gf_log ("dict", GF_LOG_ERROR,
- "key or value is null");
- goto out;
- }
+ total_len += (vallen + 1);
- if (!pair->value->data) {
- gf_log ("dict", GF_LOG_ERROR,
- "null value found in dict");
- goto out;
- }
+ pair = pair->next;
+ count--;
+ }
- vallen = pair->value->len - 1; // length includes \0
- memcpy (buf, pair->value->data, vallen);
- buf += vallen;
- *buf++ = delimiter;
+ *--buf = '\0'; // remove the last delimiter
+ total_len--; // adjust the length
+ ret = 0;
- total_len += (vallen + 1);
+ if (serz_len)
+ *serz_len = total_len;
- pair = pair->next;
- count--;
- }
-
- *--buf = '\0'; // remove the last delimiter
- total_len--; // adjust the length
- ret = 0;
-
- if (serz_len)
- *serz_len = total_len;
+out:
+ return ret;
+}
- out:
- return ret;
+int
+dict_serialize_value_with_delim(dict_t *this, char *buf, int32_t *serz_len,
+ char delimiter)
+{
+ int ret = -1;
+
+ if (!this || !buf) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "dict is null!");
+ goto out;
+ }
+
+ LOCK(&this->lock);
+ {
+ ret = dict_serialize_value_with_delim_lk(this, buf, serz_len,
+ delimiter);
+ }
+ UNLOCK(&this->lock);
+out:
+ return ret;
}
int
-dict_serialize_value_with_delim (dict_t *this, char *buf, int32_t *serz_len,
- char delimiter)
+dict_dump_to_str(dict_t *dict, char *dump, int dumpsize, char *format)
{
- int ret = -1;
+ int ret = 0;
+ int dumplen = 0;
+ data_pair_t *trav = NULL;
- if (!this || !buf) {
- gf_log_callingfn ("dict", GF_LOG_WARNING, "dict is null!");
- goto out;
- }
+ if (!dict)
+ return 0;
- LOCK (&this->lock);
- {
- ret = _dict_serialize_value_with_delim (this, buf, serz_len, delimiter);
- }
- UNLOCK (&this->lock);
+ for (trav = dict->members_list; trav; trav = trav->next) {
+ ret = snprintf(&dump[dumplen], dumpsize - dumplen, format, trav->key,
+ trav->value->data);
+ if ((ret == -1) || !ret)
+ return ret;
+
+ dumplen += ret;
+ }
+ return 0;
+}
+
+void
+dict_dump_to_log(dict_t *dict)
+{
+ int ret = -1;
+ char *dump = NULL;
+ const int dump_size = 64 * 1024;
+ char *format = "(%s:%s)";
+
+ if (!dict) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "dict is NULL");
+ goto out;
+ }
+
+ dump = GF_MALLOC(dump_size, gf_common_mt_char);
+ if (!dump) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, ENOMEM, LG_MSG_NO_MEMORY,
+ "dump buffer is NULL");
+ goto out;
+ }
+
+ ret = dict_dump_to_str(dict, dump, dump_size, format);
+ if (ret) {
+ gf_smsg("dict", GF_LOG_WARNING, 0, LG_MSG_FAILED_TO_LOG_DICT, NULL);
+ goto out;
+ }
+ gf_smsg("dict", GF_LOG_INFO, 0, LG_MSG_DICT_ERROR, "dict=%p", dict,
+ "dump=%s", dump, NULL);
out:
- return ret;
+ GF_FREE(dump);
+
+ return;
}
void
-dict_dump (dict_t *this)
+dict_dump_to_statedump(dict_t *dict, char *dict_name, char *domain)
+{
+ int ret = -1;
+ char *dump = NULL;
+ const int dump_size = 64 * 1024;
+ char key[4096] = {
+ 0,
+ };
+ char *format = "\n\t%s:%s";
+
+ if (!dict) {
+ gf_msg_callingfn(domain, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "dict is NULL");
+ goto out;
+ }
+
+ dump = GF_MALLOC(dump_size, gf_common_mt_char);
+ if (!dump) {
+ gf_msg_callingfn(domain, GF_LOG_WARNING, ENOMEM, LG_MSG_NO_MEMORY,
+ "dump buffer is NULL");
+ goto out;
+ }
+
+ ret = dict_dump_to_str(dict, dump, dump_size, format);
+ if (ret) {
+ gf_smsg(domain, GF_LOG_WARNING, 0, LG_MSG_FAILED_TO_LOG_DICT, "name=%s",
+ dict_name, NULL);
+ goto out;
+ }
+ gf_proc_dump_build_key(key, domain, "%s", dict_name);
+ gf_proc_dump_write(key, "%s", dump);
+
+out:
+ GF_FREE(dump);
+
+ return;
+}
+
+dict_t *
+dict_for_key_value(const char *name, const char *value, size_t size,
+ gf_boolean_t is_static)
{
- int ret = 0;
- int dumplen = 0;
- data_pair_t *trav = NULL;
- char dump[64*1024]; /* This is debug only, hence
- performance should not matter */
+ dict_t *xattr = dict_new();
+ int ret = 0;
- if (!this) {
- gf_log_callingfn ("dict", GF_LOG_WARNING, "dict NULL");
- goto out;
- }
+ if (!xattr)
+ return NULL;
- dump[0] = '\0'; /* the array is not initialized to '\0' */
+ if (is_static)
+ ret = dict_set_static_bin(xattr, (char *)name, (void *)value, size);
+ else
+ ret = dict_set_bin(xattr, (char *)name, (void *)value, size);
- /* There is a possibility of issues if data is binary, ignore it
- for now as debugging is more important */
- for (trav = this->members_list; trav; trav = trav->next) {
- ret = snprintf (&dump[dumplen], ((64*1024) - dumplen - 1),
- "(%s:%s)", trav->key, trav->value->data);
- if ((ret == -1) || !ret)
- break;
+ if (ret) {
+ dict_destroy(xattr);
+ xattr = NULL;
+ }
- dumplen += ret;
- /* snprintf doesn't append a trailing '\0', add it here */
- dump[dumplen] = '\0';
- }
+ return xattr;
+}
+
+/*
+ * "strings" should be NULL terminated strings array.
+ */
+int
+dict_has_key_from_array(dict_t *dict, char **strings, gf_boolean_t *result)
+{
+ int i = 0;
+ uint32_t hash = 0;
- if (dumplen)
- gf_log_callingfn ("dict", GF_LOG_INFO,
- "dict=%p (%s)", this, dump);
+ if (!dict || !strings || !result)
+ return -EINVAL;
-out:
- return;
+ LOCK(&dict->lock);
+ {
+ for (i = 0; strings[i]; i++) {
+ hash = (uint32_t)XXH64(strings[i], strlen(strings[i]), 0);
+ if (dict_lookup_common(dict, strings[i], hash)) {
+ *result = _gf_true;
+ goto unlock;
+ }
+ }
+ *result = _gf_false;
+ }
+unlock:
+ UNLOCK(&dict->lock);
+ return 0;
}
diff --git a/libglusterfs/src/dict.h b/libglusterfs/src/dict.h
deleted file mode 100644
index 4e7cf2406eb..00000000000
--- a/libglusterfs/src/dict.h
+++ /dev/null
@@ -1,230 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _DICT_H
-#define _DICT_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <inttypes.h>
-#include <sys/uio.h>
-#include <pthread.h>
-
-#include "common-utils.h"
-
-typedef struct _data data_t;
-typedef struct _dict dict_t;
-typedef struct _data_pair data_pair_t;
-
-
-#define GF_PROTOCOL_DICT_SERIALIZE(this,from_dict,to,len,ope,labl) do { \
- int ret = 0; \
- size_t dictlen = 0; \
- \
- if (!from_dict) \
- break; \
- \
- ret = dict_allocate_and_serialize (from_dict, to, \
- &dictlen); \
- if (ret < 0) { \
- gf_log (this->name, GF_LOG_WARNING, \
- "failed to get serialized dict (%s)", \
- (#from_dict)); \
- ope = EINVAL; \
- goto labl; \
- } \
- len = dictlen; \
- } while (0)
-
-
-#define GF_PROTOCOL_DICT_UNSERIALIZE(xl,to,buff,len,ret,ope,labl) do { \
- char *buf = NULL; \
- if (!len) \
- break; \
- to = dict_new(); \
- GF_VALIDATE_OR_GOTO (xl->name, to, labl); \
- \
- buf = memdup (buff, len); \
- GF_VALIDATE_OR_GOTO (xl->name, buf, labl); \
- \
- ret = dict_unserialize (buf, len, &to); \
- if (ret < 0) { \
- gf_log (xl->name, GF_LOG_WARNING, \
- "failed to unserialize dictionary (%s)", \
- (#to)); \
- \
- ope = EINVAL; \
- GF_FREE (buf); \
- goto labl; \
- } \
- \
- to->extra_free = buf; \
- } while (0)
-
-struct _data {
- unsigned char is_static:1;
- unsigned char is_const:1;
- unsigned char is_stdalloc:1;
- int32_t len;
- char *data;
- int32_t refcount;
- gf_lock_t lock;
-};
-
-struct _data_pair {
- struct _data_pair *hash_next;
- struct _data_pair *prev;
- struct _data_pair *next;
- data_t *value;
- char *key;
-};
-
-struct _dict {
- unsigned char is_static:1;
- int32_t hash_size;
- int32_t count;
- int32_t refcount;
- data_pair_t **members;
- data_pair_t *members_list;
- char *extra_free;
- char *extra_stdfree;
- gf_lock_t lock;
-};
-
-
-int32_t is_data_equal (data_t *one, data_t *two);
-void data_destroy (data_t *data);
-
-int32_t dict_set (dict_t *this, char *key, data_t *value);
-data_t *dict_get (dict_t *this, char *key);
-void dict_del (dict_t *this, char *key);
-int dict_reset (dict_t *dict);
-
-int32_t dict_serialized_length (dict_t *dict);
-int32_t dict_serialize (dict_t *dict, char *buf);
-int32_t dict_unserialize (char *buf, int32_t size, dict_t **fill);
-
-int32_t dict_allocate_and_serialize (dict_t *this, char **buf, size_t *length);
-
-void dict_destroy (dict_t *dict);
-void dict_unref (dict_t *dict);
-dict_t *dict_ref (dict_t *dict);
-data_t *data_ref (data_t *data);
-void data_unref (data_t *data);
-
-int32_t dict_lookup (dict_t *this, char *key, data_pair_t **data);
-/*
- TODO: provide converts for differnt byte sizes, signedness, and void *
- */
-data_t *int_to_data (int64_t value);
-data_t *str_to_data (char *value);
-data_t *data_from_dynstr (char *value);
-data_t *data_from_dynptr (void *value, int32_t len);
-data_t *bin_to_data (void *value, int32_t len);
-data_t *static_str_to_data (char *value);
-data_t *static_bin_to_data (void *value);
-
-int64_t data_to_int64 (data_t *data);
-int32_t data_to_int32 (data_t *data);
-int16_t data_to_int16 (data_t *data);
-int8_t data_to_int8 (data_t *data);
-
-uint64_t data_to_uint64 (data_t *data);
-uint32_t data_to_uint32 (data_t *data);
-uint16_t data_to_uint16 (data_t *data);
-uint8_t data_to_uint8 (data_t *data);
-
-data_t *data_from_ptr (void *value);
-data_t *data_from_static_ptr (void *value);
-
-data_t *data_from_int64 (int64_t value);
-data_t *data_from_int32 (int32_t value);
-data_t *data_from_int16 (int16_t value);
-data_t *data_from_int8 (int8_t value);
-
-data_t *data_from_uint64 (uint64_t value);
-data_t *data_from_uint32 (uint32_t value);
-data_t *data_from_uint16 (uint16_t value);
-
-char *data_to_str (data_t *data);
-void *data_to_bin (data_t *data);
-void *data_to_ptr (data_t *data);
-
-data_t *get_new_data ();
-data_t * data_copy (data_t *old);
-dict_t *get_new_dict_full (int size_hint);
-dict_t *get_new_dict ();
-
-data_pair_t *get_new_data_pair ();
-
-void dict_foreach (dict_t *this,
- void (*fn)(dict_t *this,
- char *key,
- data_t *value,
- void *data),
- void *data);
-
-dict_t *dict_copy (dict_t *this, dict_t *new);
-
-/* CLEANED UP FUNCTIONS DECLARATIONS */
-GF_MUST_CHECK dict_t *dict_new (void);
-dict_t *dict_copy_with_ref (dict_t *this, dict_t *new);
-
-GF_MUST_CHECK int dict_reset (dict_t *dict);
-
-GF_MUST_CHECK int dict_get_int8 (dict_t *this, char *key, int8_t *val);
-GF_MUST_CHECK int dict_set_int8 (dict_t *this, char *key, int8_t val);
-
-GF_MUST_CHECK int dict_get_int16 (dict_t *this, char *key, int16_t *val);
-GF_MUST_CHECK int dict_set_int16 (dict_t *this, char *key, int16_t val);
-
-GF_MUST_CHECK int dict_get_int32 (dict_t *this, char *key, int32_t *val);
-GF_MUST_CHECK int dict_set_int32 (dict_t *this, char *key, int32_t val);
-
-GF_MUST_CHECK int dict_get_int64 (dict_t *this, char *key, int64_t *val);
-GF_MUST_CHECK int dict_set_int64 (dict_t *this, char *key, int64_t val);
-
-GF_MUST_CHECK int dict_get_uint16 (dict_t *this, char *key, uint16_t *val);
-GF_MUST_CHECK int dict_set_uint16 (dict_t *this, char *key, uint16_t val);
-
-GF_MUST_CHECK int dict_get_uint32 (dict_t *this, char *key, uint32_t *val);
-GF_MUST_CHECK int dict_set_uint32 (dict_t *this, char *key, uint32_t val);
-
-GF_MUST_CHECK int dict_get_uint64 (dict_t *this, char *key, uint64_t *val);
-GF_MUST_CHECK int dict_set_uint64 (dict_t *this, char *key, uint64_t val);
-
-GF_MUST_CHECK int dict_get_double (dict_t *this, char *key, double *val);
-GF_MUST_CHECK int dict_set_double (dict_t *this, char *key, double val);
-
-GF_MUST_CHECK int dict_set_static_ptr (dict_t *this, char *key, void *ptr);
-GF_MUST_CHECK int dict_get_ptr (dict_t *this, char *key, void **ptr);
-GF_MUST_CHECK int dict_get_ptr_and_len (dict_t *this, char *key, void **ptr, int *len);
-GF_MUST_CHECK int dict_set_ptr (dict_t *this, char *key, void *ptr);
-GF_MUST_CHECK int dict_set_dynptr (dict_t *this, char *key, void *ptr, size_t size);
-
-GF_MUST_CHECK int dict_get_bin (dict_t *this, char *key, void **ptr);
-GF_MUST_CHECK int dict_set_bin (dict_t *this, char *key, void *ptr, size_t size);
-GF_MUST_CHECK int dict_set_static_bin (dict_t *this, char *key, void *ptr, size_t size);
-
-GF_MUST_CHECK int dict_set_str (dict_t *this, char *key, char *str);
-GF_MUST_CHECK int dict_set_dynmstr (dict_t *this, char *key, char *str);
-GF_MUST_CHECK int dict_set_dynstr (dict_t *this, char *key, char *str);
-GF_MUST_CHECK int dict_get_str (dict_t *this, char *key, char **str);
-
-GF_MUST_CHECK int dict_get_str_boolean (dict_t *this, char *key, int default_val);
-GF_MUST_CHECK int dict_serialize_value_with_delim (dict_t *this, char *buf, int32_t *serz_len,
- char delimiter);
-
-void dict_dump (dict_t *dict);
-
-#endif
diff --git a/libglusterfs/src/event-epoll.c b/libglusterfs/src/event-epoll.c
new file mode 100644
index 00000000000..fb4fb845b40
--- /dev/null
+++ b/libglusterfs/src/event-epoll.c
@@ -0,0 +1,1032 @@
+/*
+ Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <pthread.h>
+#include <stdlib.h>
+#include <errno.h>
+
+#include "glusterfs/gf-event.h"
+#include "glusterfs/common-utils.h"
+#include "glusterfs/syscall.h"
+#include "glusterfs/libglusterfs-messages.h"
+
+#ifdef HAVE_SYS_EPOLL_H
+#include <sys/epoll.h>
+
+struct event_slot_epoll {
+ int fd;
+ int events;
+ int gen;
+ int idx;
+ gf_atomic_t ref;
+ int do_close;
+ int in_handler;
+ int handled_error;
+ void *data;
+ event_handler_t handler;
+ gf_lock_t lock;
+ struct list_head poller_death;
+};
+
+struct event_thread_data {
+ struct event_pool *event_pool;
+ int event_index;
+};
+
+static struct event_slot_epoll *
+__event_newtable(struct event_pool *event_pool, int table_idx)
+{
+ struct event_slot_epoll *table = NULL;
+ int i = -1;
+
+ table = GF_CALLOC(sizeof(*table), EVENT_EPOLL_SLOTS, gf_common_mt_ereg);
+ if (!table)
+ return NULL;
+
+ for (i = 0; i < EVENT_EPOLL_SLOTS; i++) {
+ table[i].fd = -1;
+ LOCK_INIT(&table[i].lock);
+ INIT_LIST_HEAD(&table[i].poller_death);
+ }
+
+ event_pool->ereg[table_idx] = table;
+ event_pool->slots_used[table_idx] = 0;
+
+ return table;
+}
+
+static int
+event_slot_ref(struct event_slot_epoll *slot)
+{
+ if (!slot)
+ return -1;
+
+ return GF_ATOMIC_INC(slot->ref);
+}
+
+static int
+__event_slot_alloc(struct event_pool *event_pool, int fd,
+ char notify_poller_death, struct event_slot_epoll **slot)
+{
+ int i = 0;
+ int j = 0;
+ int table_idx = -1;
+ int gen = -1;
+ struct event_slot_epoll *table = NULL;
+
+retry:
+
+ while (i < EVENT_EPOLL_TABLES) {
+ switch (event_pool->slots_used[i]) {
+ case EVENT_EPOLL_SLOTS:
+ break;
+ case 0:
+ if (!event_pool->ereg[i]) {
+ table = __event_newtable(event_pool, i);
+ if (!table)
+ return -1;
+ } else {
+ table = event_pool->ereg[i];
+ }
+ break;
+ default:
+ table = event_pool->ereg[i];
+ break;
+ }
+
+ if (table)
+ /* break out of the loop */
+ break;
+ i++;
+ }
+
+ if (!table)
+ return -1;
+
+ table_idx = i;
+
+ for (j = 0; j < EVENT_EPOLL_SLOTS; j++) {
+ if (table[j].fd == -1) {
+ /* wipe everything except bump the generation */
+ gen = table[j].gen;
+ memset(&table[j], 0, sizeof(table[j]));
+ table[j].gen = gen + 1;
+
+ LOCK_INIT(&table[j].lock);
+ INIT_LIST_HEAD(&table[j].poller_death);
+
+ table[j].fd = fd;
+ if (notify_poller_death) {
+ table[j].idx = table_idx * EVENT_EPOLL_SLOTS + j;
+ list_add_tail(&table[j].poller_death,
+ &event_pool->poller_death);
+ }
+
+ event_pool->slots_used[table_idx]++;
+
+ break;
+ }
+ }
+
+ if (j == EVENT_EPOLL_SLOTS) {
+ table = NULL;
+ i++;
+ goto retry;
+ } else {
+ (*slot) = &table[j];
+ event_slot_ref(*slot);
+ return table_idx * EVENT_EPOLL_SLOTS + j;
+ }
+}
+
+static int
+event_slot_alloc(struct event_pool *event_pool, int fd,
+ char notify_poller_death, struct event_slot_epoll **slot)
+{
+ int idx = -1;
+
+ pthread_mutex_lock(&event_pool->mutex);
+ {
+ idx = __event_slot_alloc(event_pool, fd, notify_poller_death, slot);
+ }
+ pthread_mutex_unlock(&event_pool->mutex);
+
+ return idx;
+}
+
+static void
+__event_slot_dealloc(struct event_pool *event_pool, int idx)
+{
+ int table_idx = 0;
+ int offset = 0;
+ struct event_slot_epoll *table = NULL;
+ struct event_slot_epoll *slot = NULL;
+ int fd = -1;
+
+ table_idx = idx / EVENT_EPOLL_SLOTS;
+ offset = idx % EVENT_EPOLL_SLOTS;
+
+ table = event_pool->ereg[table_idx];
+ if (!table)
+ return;
+
+ slot = &table[offset];
+ slot->gen++;
+
+ fd = slot->fd;
+ slot->fd = -1;
+ slot->handled_error = 0;
+ slot->in_handler = 0;
+ list_del_init(&slot->poller_death);
+ if (fd != -1)
+ event_pool->slots_used[table_idx]--;
+
+ return;
+}
+
+static void
+event_slot_dealloc(struct event_pool *event_pool, int idx)
+{
+ pthread_mutex_lock(&event_pool->mutex);
+ {
+ __event_slot_dealloc(event_pool, idx);
+ }
+ pthread_mutex_unlock(&event_pool->mutex);
+
+ return;
+}
+
+static struct event_slot_epoll *
+event_slot_get(struct event_pool *event_pool, int idx)
+{
+ struct event_slot_epoll *slot = NULL;
+ struct event_slot_epoll *table = NULL;
+ int table_idx = 0;
+ int offset = 0;
+
+ table_idx = idx / EVENT_EPOLL_SLOTS;
+ offset = idx % EVENT_EPOLL_SLOTS;
+
+ table = event_pool->ereg[table_idx];
+ if (!table)
+ return NULL;
+
+ slot = &table[offset];
+
+ event_slot_ref(slot);
+ return slot;
+}
+
+static void
+__event_slot_unref(struct event_pool *event_pool, struct event_slot_epoll *slot,
+ int idx)
+{
+ int ref = -1;
+ int fd = -1;
+ int do_close = 0;
+
+ ref = GF_ATOMIC_DEC(slot->ref);
+ if (ref)
+ /* slot still alive */
+ goto done;
+
+ LOCK(&slot->lock);
+ {
+ fd = slot->fd;
+ do_close = slot->do_close;
+ slot->do_close = 0;
+ }
+ UNLOCK(&slot->lock);
+
+ __event_slot_dealloc(event_pool, idx);
+
+ if (do_close)
+ sys_close(fd);
+done:
+ return;
+}
+
+static void
+event_slot_unref(struct event_pool *event_pool, struct event_slot_epoll *slot,
+ int idx)
+{
+ int ref = -1;
+ int fd = -1;
+ int do_close = 0;
+
+ ref = GF_ATOMIC_DEC(slot->ref);
+ if (ref)
+ /* slot still alive */
+ goto done;
+
+ LOCK(&slot->lock);
+ {
+ fd = slot->fd;
+ do_close = slot->do_close;
+ slot->do_close = 0;
+ }
+ UNLOCK(&slot->lock);
+
+ event_slot_dealloc(event_pool, idx);
+
+ if (do_close)
+ sys_close(fd);
+done:
+ return;
+}
+
+static struct event_pool *
+event_pool_new_epoll(int count, int eventthreadcount)
+{
+ struct event_pool *event_pool = NULL;
+ int epfd = -1;
+
+ event_pool = GF_CALLOC(1, sizeof(*event_pool), gf_common_mt_event_pool);
+
+ if (!event_pool)
+ goto out;
+
+ epfd = epoll_create(count);
+
+ if (epfd == -1) {
+ gf_smsg("epoll", GF_LOG_ERROR, errno, LG_MSG_EPOLL_FD_CREATE_FAILED,
+ NULL);
+ GF_FREE(event_pool->reg);
+ GF_FREE(event_pool);
+ event_pool = NULL;
+ goto out;
+ }
+
+ event_pool->fd = epfd;
+
+ event_pool->count = count;
+ INIT_LIST_HEAD(&event_pool->poller_death);
+ event_pool->eventthreadcount = eventthreadcount;
+ event_pool->auto_thread_count = 0;
+
+ pthread_mutex_init(&event_pool->mutex, NULL);
+
+out:
+ return event_pool;
+}
+
+static void
+__slot_update_events(struct event_slot_epoll *slot, int poll_in, int poll_out)
+{
+ switch (poll_in) {
+ case 1:
+ slot->events |= EPOLLIN;
+ break;
+ case 0:
+ slot->events &= ~EPOLLIN;
+ break;
+ case -1:
+ /* do nothing */
+ break;
+ default:
+ gf_smsg("epoll", GF_LOG_ERROR, 0, LG_MSG_INVALID_POLL_IN,
+ "value=%d", poll_in, NULL);
+ break;
+ }
+
+ switch (poll_out) {
+ case 1:
+ slot->events |= EPOLLOUT;
+ break;
+ case 0:
+ slot->events &= ~EPOLLOUT;
+ break;
+ case -1:
+ /* do nothing */
+ break;
+ default:
+ gf_smsg("epoll", GF_LOG_ERROR, 0, LG_MSG_INVALID_POLL_OUT,
+ "value=%d", poll_out, NULL);
+ break;
+ }
+}
+
+int
+event_register_epoll(struct event_pool *event_pool, int fd,
+ event_handler_t handler, void *data, int poll_in,
+ int poll_out, char notify_poller_death)
+{
+ int idx = -1;
+ int ret = -1;
+ int destroy = 0;
+ struct epoll_event epoll_event = {
+ 0,
+ };
+ struct event_data *ev_data = (void *)&epoll_event.data;
+ struct event_slot_epoll *slot = NULL;
+
+ GF_VALIDATE_OR_GOTO("event", event_pool, out);
+
+ /* TODO: Even with the below check, there is a possibility of race,
+ * What if the destroy mode is set after the check is done.
+ * Not sure of the best way to prevent this race, ref counting
+ * is one possibility.
+ * There is no harm in registering and unregistering the fd
+ * even after destroy mode is set, just that such fds will remain
+ * open until unregister is called, also the events on that fd will be
+ * notified, until one of the poller thread is alive.
+ */
+ pthread_mutex_lock(&event_pool->mutex);
+ {
+ destroy = event_pool->destroy;
+ }
+ pthread_mutex_unlock(&event_pool->mutex);
+
+ if (destroy == 1)
+ goto out;
+
+ idx = event_slot_alloc(event_pool, fd, notify_poller_death, &slot);
+ if (idx == -1) {
+ gf_smsg("epoll", GF_LOG_ERROR, 0, LG_MSG_SLOT_NOT_FOUND, "fd=%d", fd,
+ NULL);
+ return -1;
+ }
+
+ assert(slot->fd == fd);
+
+ LOCK(&slot->lock);
+ {
+ /* make epoll 'singleshot', which
+ means we need to re-add the fd with
+ epoll_ctl(EPOLL_CTL_MOD) after delivery of every
+ single event. This assures us that while a poller
+ thread has picked up and is processing an event,
+ another poller will not try to pick this at the same
+ time as well.
+ */
+
+ slot->events = EPOLLPRI | EPOLLHUP | EPOLLERR | EPOLLONESHOT;
+ slot->handler = handler;
+ slot->data = data;
+
+ __slot_update_events(slot, poll_in, poll_out);
+
+ epoll_event.events = slot->events;
+ ev_data->idx = idx;
+ ev_data->gen = slot->gen;
+
+ ret = epoll_ctl(event_pool->fd, EPOLL_CTL_ADD, fd, &epoll_event);
+ /* check ret after UNLOCK() to avoid deadlock in
+ event_slot_unref()
+ */
+ }
+ UNLOCK(&slot->lock);
+
+ if (ret == -1) {
+ gf_smsg("epoll", GF_LOG_ERROR, errno, LG_MSG_EPOLL_FD_ADD_FAILED,
+ "fd=%d", fd, "epoll_fd=%d", event_pool->fd, NULL);
+ event_slot_unref(event_pool, slot, idx);
+ idx = -1;
+ }
+
+ /* keep slot->ref (do not event_slot_unref) if successful */
+out:
+ return idx;
+}
+
+static int
+event_unregister_epoll_common(struct event_pool *event_pool, int fd, int idx,
+ int do_close)
+{
+ int ret = -1;
+ struct event_slot_epoll *slot = NULL;
+
+ GF_VALIDATE_OR_GOTO("event", event_pool, out);
+
+ /* During shutdown, it may happen that a socket registration with
+ * the event sub-system may fail and an rpc_transport_unref() may
+ * be called for such an unregistered socket with idx == -1. This
+ * may cause the following assert(slot->fd == fd) to fail.
+ */
+ if (idx < 0)
+ goto out;
+
+ slot = event_slot_get(event_pool, idx);
+ if (!slot) {
+ gf_smsg("epoll", GF_LOG_ERROR, 0, LG_MSG_SLOT_NOT_FOUND, "fd=%d", fd,
+ "idx=%d", idx, NULL);
+ return -1;
+ }
+
+ assert(slot->fd == fd);
+
+ LOCK(&slot->lock);
+ {
+ ret = epoll_ctl(event_pool->fd, EPOLL_CTL_DEL, fd, NULL);
+
+ if (ret == -1) {
+ gf_smsg("epoll", GF_LOG_ERROR, errno, LG_MSG_EPOLL_FD_DEL_FAILED,
+ "fd=%d", fd, "epoll_fd=%d", event_pool->fd, NULL);
+ goto unlock;
+ }
+
+ slot->do_close = do_close;
+ slot->gen++; /* detect unregister in dispatch_handler() */
+ }
+unlock:
+ UNLOCK(&slot->lock);
+
+ event_slot_unref(event_pool, slot, idx); /* one for event_register() */
+ event_slot_unref(event_pool, slot, idx); /* one for event_slot_get() */
+out:
+ return ret;
+}
+
+static int
+event_unregister_epoll(struct event_pool *event_pool, int fd, int idx_hint)
+{
+ int ret = -1;
+
+ ret = event_unregister_epoll_common(event_pool, fd, idx_hint, 0);
+
+ return ret;
+}
+
+static int
+event_unregister_close_epoll(struct event_pool *event_pool, int fd,
+ int idx_hint)
+{
+ int ret = -1;
+
+ ret = event_unregister_epoll_common(event_pool, fd, idx_hint, 1);
+
+ return ret;
+}
+
+static int
+event_select_on_epoll(struct event_pool *event_pool, int fd, int idx,
+ int poll_in, int poll_out)
+{
+ int ret = -1;
+ struct event_slot_epoll *slot = NULL;
+ struct epoll_event epoll_event = {
+ 0,
+ };
+ struct event_data *ev_data = (void *)&epoll_event.data;
+
+ GF_VALIDATE_OR_GOTO("event", event_pool, out);
+
+ slot = event_slot_get(event_pool, idx);
+ if (!slot) {
+ gf_smsg("epoll", GF_LOG_ERROR, 0, LG_MSG_SLOT_NOT_FOUND, "fd=%d", fd,
+ "idx=%d", idx, NULL);
+ return -1;
+ }
+
+ assert(slot->fd == fd);
+
+ LOCK(&slot->lock);
+ {
+ __slot_update_events(slot, poll_in, poll_out);
+
+ epoll_event.events = slot->events;
+ ev_data->idx = idx;
+ ev_data->gen = slot->gen;
+
+ if (slot->in_handler)
+ /*
+ * in_handler indicates at least one thread
+ * executing event_dispatch_epoll_handler()
+ * which will perform epoll_ctl(EPOLL_CTL_MOD)
+ * anyways (because of EPOLLET)
+ *
+ * This not only saves a system call, but also
+ * avoids possibility of another epoll thread
+ * picking up the next event while the ongoing
+ * handler is still in progress (and resulting
+ * in unnecessary contention on rpc_transport_t->mutex).
+ */
+ goto unlock;
+
+ ret = epoll_ctl(event_pool->fd, EPOLL_CTL_MOD, fd, &epoll_event);
+ if (ret == -1) {
+ gf_smsg("epoll", GF_LOG_ERROR, errno, LG_MSG_EPOLL_FD_MODIFY_FAILED,
+ "fd=%d", fd, "events=%d", epoll_event.events, NULL);
+ }
+ }
+unlock:
+ UNLOCK(&slot->lock);
+
+ event_slot_unref(event_pool, slot, idx);
+
+out:
+ return idx;
+}
+
+static int
+event_dispatch_epoll_handler(struct event_pool *event_pool,
+ struct epoll_event *event)
+{
+ struct event_data *ev_data = NULL;
+ struct event_slot_epoll *slot = NULL;
+ event_handler_t handler = NULL;
+ void *data = NULL;
+ int idx = -1;
+ int gen = -1;
+ int ret = -1;
+ int fd = -1;
+ gf_boolean_t handled_error_previously = _gf_false;
+
+ ev_data = (void *)&event->data;
+ handler = NULL;
+ data = NULL;
+
+ idx = ev_data->idx;
+ gen = ev_data->gen;
+
+ slot = event_slot_get(event_pool, idx);
+ if (!slot) {
+ gf_smsg("epoll", GF_LOG_ERROR, 0, LG_MSG_SLOT_NOT_FOUND, "idx=%d", idx,
+ NULL);
+ return -1;
+ }
+
+ LOCK(&slot->lock);
+ {
+ fd = slot->fd;
+ if (fd == -1) {
+ gf_smsg("epoll", GF_LOG_ERROR, 0, LG_MSG_STALE_FD_FOUND, "idx=%d",
+ idx, "gen=%d", gen, "events=%d", event->events,
+ "slot->gen=%d", slot->gen, NULL);
+ /* fd got unregistered in another thread */
+ goto pre_unlock;
+ }
+
+ if (gen != slot->gen) {
+ gf_smsg("epoll", GF_LOG_ERROR, 0, LG_MSG_GENERATION_MISMATCH,
+ "idx=%d", idx, "gen=%d", gen, "slot->gen=%d", slot->gen,
+ "slot->fd=%d", slot->fd, NULL);
+ /* slot was re-used and therefore is another fd! */
+ goto pre_unlock;
+ }
+
+ handler = slot->handler;
+ data = slot->data;
+
+ if (slot->in_handler > 0) {
+ /* Another handler is inprogress, skip this one. */
+ handler = NULL;
+ goto pre_unlock;
+ }
+
+ if (slot->handled_error) {
+ handled_error_previously = _gf_true;
+ } else {
+ slot->handled_error = (event->events & (EPOLLERR | EPOLLHUP));
+ slot->in_handler++;
+ }
+ }
+pre_unlock:
+ UNLOCK(&slot->lock);
+
+ ret = 0;
+
+ if (!handler)
+ goto out;
+
+ if (!handled_error_previously) {
+ handler(fd, idx, gen, data, (event->events & (EPOLLIN | EPOLLPRI)),
+ (event->events & (EPOLLOUT)),
+ (event->events & (EPOLLERR | EPOLLHUP)), 0);
+ }
+out:
+ event_slot_unref(event_pool, slot, idx);
+
+ return ret;
+}
+
+static void *
+event_dispatch_epoll_worker(void *data)
+{
+ struct epoll_event event;
+ int ret = -1;
+ struct event_thread_data *ev_data = data;
+ struct event_pool *event_pool;
+ int myindex = -1;
+ int timetodie = 0, gen = 0;
+ struct list_head poller_death_notify;
+ struct event_slot_epoll *slot = NULL, *tmp = NULL;
+
+ GF_VALIDATE_OR_GOTO("event", ev_data, out);
+
+ event_pool = ev_data->event_pool;
+ myindex = ev_data->event_index;
+
+ GF_VALIDATE_OR_GOTO("event", event_pool, out);
+
+ gf_smsg("epoll", GF_LOG_INFO, 0, LG_MSG_STARTED_EPOLL_THREAD, "index=%d",
+ myindex - 1, NULL);
+
+ pthread_mutex_lock(&event_pool->mutex);
+ {
+ event_pool->activethreadcount++;
+ }
+ pthread_mutex_unlock(&event_pool->mutex);
+
+ for (;;) {
+ if (event_pool->eventthreadcount < myindex) {
+ /* ...time to die, thread count was decreased below
+ * this threads index */
+ /* Start with extra safety at this point, reducing
+ * lock conention in normal case when threads are not
+ * reconfigured always */
+ pthread_mutex_lock(&event_pool->mutex);
+ {
+ if (event_pool->eventthreadcount < myindex) {
+ while (event_pool->poller_death_sliced) {
+ pthread_cond_wait(&event_pool->cond,
+ &event_pool->mutex);
+ }
+
+ INIT_LIST_HEAD(&poller_death_notify);
+ /* if found true in critical section,
+ * die */
+ event_pool->pollers[myindex - 1] = 0;
+ event_pool->activethreadcount--;
+ timetodie = 1;
+ gen = ++event_pool->poller_gen;
+ list_for_each_entry(slot, &event_pool->poller_death,
+ poller_death)
+ {
+ event_slot_ref(slot);
+ }
+
+ list_splice_init(&event_pool->poller_death,
+ &poller_death_notify);
+ event_pool->poller_death_sliced = 1;
+ pthread_cond_broadcast(&event_pool->cond);
+ }
+ }
+ pthread_mutex_unlock(&event_pool->mutex);
+ if (timetodie) {
+ list_for_each_entry(slot, &poller_death_notify, poller_death)
+ {
+ slot->handler(slot->fd, 0, gen, slot->data, 0, 0, 0, 1);
+ }
+
+ pthread_mutex_lock(&event_pool->mutex);
+ {
+ list_for_each_entry_safe(slot, tmp, &poller_death_notify,
+ poller_death)
+ {
+ __event_slot_unref(event_pool, slot, slot->idx);
+ }
+
+ list_splice(&poller_death_notify,
+ &event_pool->poller_death);
+ event_pool->poller_death_sliced = 0;
+ pthread_cond_broadcast(&event_pool->cond);
+ }
+ pthread_mutex_unlock(&event_pool->mutex);
+
+ gf_smsg("epoll", GF_LOG_INFO, 0, LG_MSG_EXITED_EPOLL_THREAD,
+ "index=%d", myindex, NULL);
+
+ goto out;
+ }
+ }
+
+ ret = epoll_wait(event_pool->fd, &event, 1, -1);
+
+ if (ret == 0)
+ /* timeout */
+ continue;
+
+ if (ret == -1 && errno == EINTR)
+ /* sys call */
+ continue;
+
+ ret = event_dispatch_epoll_handler(event_pool, &event);
+ if (ret) {
+ gf_smsg("epoll", GF_LOG_ERROR, 0, LG_MSG_DISPATCH_HANDLER_FAILED,
+ NULL);
+ }
+ }
+out:
+ if (ev_data)
+ GF_FREE(ev_data);
+ return NULL;
+}
+
+/* Attempts to start the # of configured pollers, ensuring at least the first
+ * is started in a joinable state */
+static int
+event_dispatch_epoll(struct event_pool *event_pool)
+{
+ int i = 0;
+ pthread_t t_id;
+ int pollercount = 0;
+ int ret = -1;
+ struct event_thread_data *ev_data = NULL;
+
+ /* Start the configured number of pollers */
+ pthread_mutex_lock(&event_pool->mutex);
+ {
+ pollercount = event_pool->eventthreadcount;
+
+ /* Set to MAX if greater */
+ if (pollercount > EVENT_MAX_THREADS)
+ pollercount = EVENT_MAX_THREADS;
+
+ /* Default pollers to 1 in case this is incorrectly set */
+ if (pollercount <= 0)
+ pollercount = 1;
+
+ event_pool->activethreadcount++;
+
+ for (i = 0; i < pollercount; i++) {
+ ev_data = GF_CALLOC(1, sizeof(*ev_data), gf_common_mt_event_pool);
+ if (!ev_data) {
+ if (i == 0) {
+ /* Need to succeed creating 0'th
+ * thread, to joinable and wait */
+ break;
+ } else {
+ /* Inability to create other threads
+ * are a lesser evil, and ignored */
+ continue;
+ }
+ }
+
+ ev_data->event_pool = event_pool;
+ ev_data->event_index = i + 1;
+
+ ret = gf_thread_create(&t_id, NULL, event_dispatch_epoll_worker,
+ ev_data, "epoll%03hx", i & 0x3ff);
+ if (!ret) {
+ event_pool->pollers[i] = t_id;
+
+ /* mark all threads other than one in index 0
+ * as detachable. Errors can be ignored, they
+ * spend their time as zombies if not detched
+ * and the thread counts are decreased */
+ if (i != 0)
+ pthread_detach(event_pool->pollers[i]);
+ } else {
+ gf_smsg("epoll", GF_LOG_WARNING, 0,
+ LG_MSG_START_EPOLL_THREAD_FAILED, "index=%d", i, NULL);
+ if (i == 0) {
+ GF_FREE(ev_data);
+ break;
+ } else {
+ GF_FREE(ev_data);
+ continue;
+ }
+ }
+ }
+ }
+ pthread_mutex_unlock(&event_pool->mutex);
+
+ /* Just wait for the first thread, that is created in a joinable state
+ * and will never die, ensuring this function never returns */
+ if (event_pool->pollers[0] != 0)
+ pthread_join(event_pool->pollers[0], NULL);
+
+ pthread_mutex_lock(&event_pool->mutex);
+ {
+ event_pool->activethreadcount--;
+ }
+ pthread_mutex_unlock(&event_pool->mutex);
+
+ return ret;
+}
+
+/**
+ * @param event_pool event_pool on which fds of interest are registered for
+ * events.
+ *
+ * @return 1 if at least one epoll worker thread is spawned, 0 otherwise
+ *
+ * NB This function SHOULD be called under event_pool->mutex.
+ */
+
+static int
+event_pool_dispatched_unlocked(struct event_pool *event_pool)
+{
+ return (event_pool->pollers[0] != 0);
+}
+
+int
+event_reconfigure_threads_epoll(struct event_pool *event_pool, int value)
+{
+ int i;
+ int ret = 0;
+ pthread_t t_id;
+ int oldthreadcount;
+ struct event_thread_data *ev_data = NULL;
+
+ pthread_mutex_lock(&event_pool->mutex);
+ {
+ /* Reconfigure to 0 threads is allowed only in destroy mode */
+ if (event_pool->destroy == 1) {
+ value = 0;
+ } else {
+ /* Set to MAX if greater */
+ if (value > EVENT_MAX_THREADS)
+ value = EVENT_MAX_THREADS;
+
+ /* Default pollers to 1 in case this is set incorrectly */
+ if (value <= 0)
+ value = 1;
+ }
+
+ oldthreadcount = event_pool->eventthreadcount;
+
+ /* Start 'worker' threads as necessary only if event_dispatch()
+ * was called before. If event_dispatch() was not called, there
+ * will be no epoll 'worker' threads running yet. */
+
+ if (event_pool_dispatched_unlocked(event_pool) &&
+ (oldthreadcount < value)) {
+ /* create more poll threads */
+ for (i = oldthreadcount; i < value; i++) {
+ /* Start a thread if the index at this location
+ * is a 0, so that the older thread is confirmed
+ * as dead */
+ if (event_pool->pollers[i] == 0) {
+ ev_data = GF_CALLOC(1, sizeof(*ev_data),
+ gf_common_mt_event_pool);
+ if (!ev_data) {
+ continue;
+ }
+
+ ev_data->event_pool = event_pool;
+ ev_data->event_index = i + 1;
+
+ ret = gf_thread_create(&t_id, NULL,
+ event_dispatch_epoll_worker, ev_data,
+ "epoll%03hx", i & 0x3ff);
+ if (ret) {
+ gf_smsg("epoll", GF_LOG_WARNING, 0,
+ LG_MSG_START_EPOLL_THREAD_FAILED, "index=%d", i,
+ NULL);
+ GF_FREE(ev_data);
+ } else {
+ pthread_detach(t_id);
+ event_pool->pollers[i] = t_id;
+ }
+ }
+ }
+ }
+
+ /* if value decreases, threads will terminate, themselves */
+ event_pool->eventthreadcount = value;
+ }
+ pthread_mutex_unlock(&event_pool->mutex);
+
+ return 0;
+}
+
+/* This function is the destructor for the event_pool data structure
+ * Should be called only after poller_threads_destroy() is called,
+ * else will lead to crashes.
+ */
+static int
+event_pool_destroy_epoll(struct event_pool *event_pool)
+{
+ int ret = 0, i = 0, j = 0;
+ struct event_slot_epoll *table = NULL;
+
+ ret = sys_close(event_pool->fd);
+
+ for (i = 0; i < EVENT_EPOLL_TABLES; i++) {
+ if (event_pool->ereg[i]) {
+ table = event_pool->ereg[i];
+ event_pool->ereg[i] = NULL;
+ for (j = 0; j < EVENT_EPOLL_SLOTS; j++) {
+ LOCK_DESTROY(&table[j].lock);
+ }
+ GF_FREE(table);
+ }
+ }
+
+ pthread_mutex_destroy(&event_pool->mutex);
+ pthread_cond_destroy(&event_pool->cond);
+
+ GF_FREE(event_pool->evcache);
+ GF_FREE(event_pool->reg);
+ GF_FREE(event_pool);
+
+ return ret;
+}
+
+static int
+event_handled_epoll(struct event_pool *event_pool, int fd, int idx, int gen)
+{
+ struct event_slot_epoll *slot = NULL;
+ struct epoll_event epoll_event = {
+ 0,
+ };
+ struct event_data *ev_data = (void *)&epoll_event.data;
+ int ret = 0;
+
+ slot = event_slot_get(event_pool, idx);
+ if (!slot) {
+ gf_smsg("epoll", GF_LOG_ERROR, 0, LG_MSG_SLOT_NOT_FOUND, "fd=%d", fd,
+ "idx=%d", idx, NULL);
+ return -1;
+ }
+
+ assert(slot->fd == fd);
+
+ LOCK(&slot->lock);
+ {
+ slot->in_handler--;
+
+ if (gen != slot->gen) {
+ /* event_unregister() happened while we were
+ in handler()
+ */
+ gf_msg_debug("epoll", 0,
+ "generation bumped on idx=%d"
+ " from gen=%d to slot->gen=%d, fd=%d, "
+ "slot->fd=%d",
+ idx, gen, slot->gen, fd, slot->fd);
+ goto unlock;
+ }
+
+ /* This call also picks up the changes made by another
+ thread calling event_select_on_epoll() while this
+ thread was busy in handler()
+ */
+ if (slot->in_handler == 0) {
+ epoll_event.events = slot->events;
+ ev_data->idx = idx;
+ ev_data->gen = gen;
+
+ ret = epoll_ctl(event_pool->fd, EPOLL_CTL_MOD, fd, &epoll_event);
+ }
+ }
+unlock:
+ UNLOCK(&slot->lock);
+
+ event_slot_unref(event_pool, slot, idx);
+
+ return ret;
+}
+
+struct event_ops event_ops_epoll = {
+ .new = event_pool_new_epoll,
+ .event_register = event_register_epoll,
+ .event_select_on = event_select_on_epoll,
+ .event_unregister = event_unregister_epoll,
+ .event_unregister_close = event_unregister_close_epoll,
+ .event_dispatch = event_dispatch_epoll,
+ .event_reconfigure_threads = event_reconfigure_threads_epoll,
+ .event_pool_destroy = event_pool_destroy_epoll,
+ .event_handled = event_handled_epoll,
+};
+
+#endif
diff --git a/libglusterfs/src/event-history.c b/libglusterfs/src/event-history.c
index fe511caeb1e..379fed866be 100644
--- a/libglusterfs/src/event-history.c
+++ b/libglusterfs/src/event-history.c
@@ -8,74 +8,75 @@
cases as published by the Free Software Foundation.
*/
-#include "event-history.h"
+#include "glusterfs/event-history.h"
+#include "glusterfs/libglusterfs-messages.h"
eh_t *
-eh_new (size_t buffer_size, gf_boolean_t use_buffer_once)
+eh_new(size_t buffer_size, gf_boolean_t use_buffer_once,
+ void (*destroy_buffer_data)(void *data))
{
- eh_t *history = NULL;
- buffer_t *buffer = NULL;
+ eh_t *history = NULL;
+ buffer_t *buffer = NULL;
- history = GF_CALLOC (1, sizeof (eh_t), gf_common_mt_eh_t);
- if (!history) {
- gf_log ("", GF_LOG_ERROR, "allocating history failed.");
- goto out;
- }
+ history = GF_CALLOC(1, sizeof(eh_t), gf_common_mt_eh_t);
+ if (!history) {
+ goto out;
+ }
- buffer = cb_buffer_new (buffer_size, use_buffer_once);
- if (!buffer) {
- gf_log ("", GF_LOG_ERROR, "allocating circular buffer failed");
- GF_FREE (history);
- history = NULL;
- }
+ buffer = cb_buffer_new(buffer_size, use_buffer_once, destroy_buffer_data);
+ if (!buffer) {
+ GF_FREE(history);
+ history = NULL;
+ goto out;
+ }
- history->buffer = buffer;
+ history->buffer = buffer;
- pthread_mutex_init (&history->lock, NULL);
+ pthread_mutex_init(&history->lock, NULL);
out:
- return history;
+ return history;
}
void
-eh_dump (eh_t *history, void *data,
- int (dump_fn) (circular_buffer_t *buffer, void *data))
+eh_dump(eh_t *history, void *data,
+ int(dump_fn)(circular_buffer_t *buffer, void *data))
{
- if (!history) {
- gf_log ("", GF_LOG_DEBUG, "history is NULL");
- goto out;
- }
+ if (!history) {
+ gf_msg_debug("event-history", 0, "history is NULL");
+ goto out;
+ }
- cb_buffer_dump (history->buffer, data, dump_fn);
+ cb_buffer_dump(history->buffer, data, dump_fn);
out:
- return;
+ return;
}
int
-eh_save_history (eh_t *history, void *data)
+eh_save_history(eh_t *history, void *data)
{
- int ret = -1;
+ int ret = -1;
- ret = cb_add_entry_buffer (history->buffer, data);
+ ret = cb_add_entry_buffer(history->buffer, data);
- return ret;
+ return ret;
}
int
-eh_destroy (eh_t *history)
+eh_destroy(eh_t *history)
{
- if (!history) {
- gf_log ("", GF_LOG_INFO, "history for the xlator is "
- "NULL");
- return -1;
- }
+ if (!history) {
+ gf_msg("event-history", GF_LOG_INFO, 0, LG_MSG_INVALID_ARG,
+ "history for the xlator is NULL");
+ return -1;
+ }
- cb_buffer_destroy (history->buffer);
- history->buffer = NULL;
+ cb_buffer_destroy(history->buffer);
+ history->buffer = NULL;
- pthread_mutex_destroy (&history->lock);
+ pthread_mutex_destroy(&history->lock);
- GF_FREE (history);
+ GF_FREE(history);
- return 0;
+ return 0;
}
diff --git a/libglusterfs/src/event-history.h b/libglusterfs/src/event-history.h
deleted file mode 100644
index b1750bbae96..00000000000
--- a/libglusterfs/src/event-history.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _EH_H
-#define _EH_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "mem-types.h"
-#include "circ-buff.h"
-
-struct event_hist
-{
- buffer_t *buffer;
- pthread_mutex_t lock;
-};
-
-typedef struct event_hist eh_t;
-
-void
-eh_dump (eh_t *event , void *data,
- int (fn) (circular_buffer_t *buffer, void *data));
-
-eh_t *
-eh_new (size_t buffer_size, gf_boolean_t use_buffer_once);
-
-int
-eh_save_history (eh_t *history, void *string);
-
-int
-eh_destroy (eh_t *history);
-
-#endif /* _EH_H */
diff --git a/libglusterfs/src/event-poll.c b/libglusterfs/src/event-poll.c
new file mode 100644
index 00000000000..2cba963f096
--- /dev/null
+++ b/libglusterfs/src/event-poll.c
@@ -0,0 +1,513 @@
+/*
+ Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <sys/poll.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+
+#include "glusterfs/logging.h"
+#include "glusterfs/gf-event.h"
+#include "glusterfs/mem-pool.h"
+#include "glusterfs/common-utils.h"
+#include "glusterfs/syscall.h"
+#include "glusterfs/libglusterfs-messages.h"
+
+struct event_slot_poll {
+ int fd;
+ int events;
+ void *data;
+ event_handler_t handler;
+};
+
+static int
+event_register_poll(struct event_pool *event_pool, int fd,
+ event_handler_t handler, void *data, int poll_in,
+ int poll_out, char notify_poller_death);
+
+static void
+__flush_fd(int fd, int idx, int gen, void *data, int poll_in, int poll_out,
+ int poll_err, char event_thread_died)
+{
+ char buf[64];
+ int ret = -1;
+
+ if (!poll_in)
+ return;
+
+ do {
+ ret = sys_read(fd, buf, 64);
+ if (ret == -1 && errno != EAGAIN) {
+ gf_smsg("poll", GF_LOG_ERROR, errno, LG_MSG_READ_FILE_FAILED,
+ "fd=%d", fd, NULL);
+ }
+ } while (ret == 64);
+
+ return;
+}
+
+static int
+__event_getindex(struct event_pool *event_pool, int fd, int idx)
+{
+ int ret = -1;
+ int i = 0;
+
+ GF_VALIDATE_OR_GOTO("event", event_pool, out);
+
+ /* lookup in used space based on index provided */
+ if (idx > -1 && idx < event_pool->used) {
+ if (event_pool->reg[idx].fd == fd) {
+ ret = idx;
+ goto out;
+ }
+ }
+
+ /* search in used space, if lookup fails */
+ for (i = 0; i < event_pool->used; i++) {
+ if (event_pool->reg[i].fd == fd) {
+ ret = i;
+ break;
+ }
+ }
+
+out:
+ return ret;
+}
+
+static struct event_pool *
+event_pool_new_poll(int count, int eventthreadcount)
+{
+ struct event_pool *event_pool = NULL;
+ int ret = -1;
+
+ event_pool = GF_CALLOC(1, sizeof(*event_pool), gf_common_mt_event_pool);
+
+ if (!event_pool)
+ return NULL;
+
+ event_pool->count = count;
+ event_pool->reg = GF_CALLOC(event_pool->count, sizeof(*event_pool->reg),
+ gf_common_mt_reg);
+
+ if (!event_pool->reg) {
+ GF_FREE(event_pool);
+ return NULL;
+ }
+
+ pthread_mutex_init(&event_pool->mutex, NULL);
+
+ ret = pipe(event_pool->breaker);
+
+ if (ret == -1) {
+ gf_smsg("poll", GF_LOG_ERROR, errno, LG_MSG_PIPE_CREATE_FAILED, NULL);
+ GF_FREE(event_pool->reg);
+ GF_FREE(event_pool);
+ return NULL;
+ }
+
+ ret = fcntl(event_pool->breaker[0], F_SETFL, O_NONBLOCK);
+ if (ret == -1) {
+ gf_smsg("poll", GF_LOG_ERROR, errno, LG_MSG_SET_PIPE_FAILED, NULL);
+ sys_close(event_pool->breaker[0]);
+ sys_close(event_pool->breaker[1]);
+ event_pool->breaker[0] = event_pool->breaker[1] = -1;
+
+ GF_FREE(event_pool->reg);
+ GF_FREE(event_pool);
+ return NULL;
+ }
+
+ ret = fcntl(event_pool->breaker[1], F_SETFL, O_NONBLOCK);
+ if (ret == -1) {
+ gf_smsg("poll", GF_LOG_ERROR, errno, LG_MSG_SET_PIPE_FAILED, NULL);
+
+ sys_close(event_pool->breaker[0]);
+ sys_close(event_pool->breaker[1]);
+ event_pool->breaker[0] = event_pool->breaker[1] = -1;
+
+ GF_FREE(event_pool->reg);
+ GF_FREE(event_pool);
+ return NULL;
+ }
+
+ ret = event_register_poll(event_pool, event_pool->breaker[0], __flush_fd,
+ NULL, 1, 0, 0);
+ if (ret == -1) {
+ gf_smsg("poll", GF_LOG_ERROR, 0, LG_MSG_REGISTER_PIPE_FAILED, NULL);
+ sys_close(event_pool->breaker[0]);
+ sys_close(event_pool->breaker[1]);
+ event_pool->breaker[0] = event_pool->breaker[1] = -1;
+
+ GF_FREE(event_pool->reg);
+ GF_FREE(event_pool);
+ return NULL;
+ }
+
+ if (eventthreadcount > 1) {
+ gf_smsg("poll", GF_LOG_INFO, 0, LG_MSG_POLL_IGNORE_MULTIPLE_THREADS,
+ "count=%d", eventthreadcount, NULL);
+ }
+
+ /* although, eventhreadcount for poll implementation is always
+ * going to be 1, eventthreadcount needs to be set to 1 so that
+ * rpcsvc_request_handler() thread scaling works flawlessly in
+ * both epoll and poll models
+ */
+ event_pool->eventthreadcount = 1;
+
+ return event_pool;
+}
+
+static int
+event_register_poll(struct event_pool *event_pool, int fd,
+ event_handler_t handler, void *data, int poll_in,
+ int poll_out, char notify_poller_death)
+{
+ int idx = -1;
+
+ GF_VALIDATE_OR_GOTO("event", event_pool, out);
+
+ pthread_mutex_lock(&event_pool->mutex);
+ {
+ if (event_pool->count == event_pool->used) {
+ event_pool->count += 256;
+
+ event_pool->reg = GF_REALLOC(
+ event_pool->reg, event_pool->count * sizeof(*event_pool->reg));
+ if (!event_pool->reg)
+ goto unlock;
+ }
+
+ idx = event_pool->used++;
+
+ event_pool->reg[idx].fd = fd;
+ event_pool->reg[idx].events = POLLPRI;
+ event_pool->reg[idx].handler = handler;
+ event_pool->reg[idx].data = data;
+
+ switch (poll_in) {
+ case 1:
+ event_pool->reg[idx].events |= POLLIN;
+ break;
+ case 0:
+ event_pool->reg[idx].events &= ~POLLIN;
+ break;
+ case -1:
+ /* do nothing */
+ break;
+ default:
+ gf_smsg("poll", GF_LOG_ERROR, 0, LG_MSG_INVALID_POLL_IN,
+ "value=%d", poll_in, NULL);
+ break;
+ }
+
+ switch (poll_out) {
+ case 1:
+ event_pool->reg[idx].events |= POLLOUT;
+ break;
+ case 0:
+ event_pool->reg[idx].events &= ~POLLOUT;
+ break;
+ case -1:
+ /* do nothing */
+ break;
+ default:
+ gf_smsg("poll", GF_LOG_ERROR, 0, LG_MSG_INVALID_POLL_OUT,
+ "value=%d", poll_out, NULL);
+ break;
+ }
+
+ event_pool->changed = 1;
+ }
+unlock:
+ pthread_mutex_unlock(&event_pool->mutex);
+
+out:
+ return idx;
+}
+
+static int
+event_unregister_poll(struct event_pool *event_pool, int fd, int idx_hint)
+{
+ int idx = -1;
+
+ GF_VALIDATE_OR_GOTO("event", event_pool, out);
+
+ pthread_mutex_lock(&event_pool->mutex);
+ {
+ idx = __event_getindex(event_pool, fd, idx_hint);
+
+ if (idx == -1) {
+ gf_smsg("poll", GF_LOG_ERROR, 0, LG_MSG_INDEX_NOT_FOUND, "fd=%d",
+ fd, "idx_hint=%d", idx_hint, NULL);
+ errno = ENOENT;
+ goto unlock;
+ }
+
+ event_pool->reg[idx] = event_pool->reg[--event_pool->used];
+ event_pool->changed = 1;
+ }
+unlock:
+ pthread_mutex_unlock(&event_pool->mutex);
+
+out:
+ return idx;
+}
+
+static int
+event_unregister_close_poll(struct event_pool *event_pool, int fd, int idx_hint)
+{
+ int ret = -1;
+
+ ret = event_unregister_poll(event_pool, fd, idx_hint);
+
+ sys_close(fd);
+
+ return ret;
+}
+
+static int
+event_select_on_poll(struct event_pool *event_pool, int fd, int idx_hint,
+ int poll_in, int poll_out)
+{
+ int idx = -1;
+
+ GF_VALIDATE_OR_GOTO("event", event_pool, out);
+
+ pthread_mutex_lock(&event_pool->mutex);
+ {
+ idx = __event_getindex(event_pool, fd, idx_hint);
+
+ if (idx == -1) {
+ gf_smsg("poll", GF_LOG_ERROR, 0, LG_MSG_INDEX_NOT_FOUND, "fd=%d",
+ fd, "idx_hint=%d", idx_hint, NULL);
+ errno = ENOENT;
+ goto unlock;
+ }
+
+ switch (poll_in) {
+ case 1:
+ event_pool->reg[idx].events |= POLLIN;
+ break;
+ case 0:
+ event_pool->reg[idx].events &= ~POLLIN;
+ break;
+ case -1:
+ /* do nothing */
+ break;
+ default:
+ /* TODO: log error */
+ break;
+ }
+
+ switch (poll_out) {
+ case 1:
+ event_pool->reg[idx].events |= POLLOUT;
+ break;
+ case 0:
+ event_pool->reg[idx].events &= ~POLLOUT;
+ break;
+ case -1:
+ /* do nothing */
+ break;
+ default:
+ /* TODO: log error */
+ break;
+ }
+
+ if (poll_in + poll_out > -2)
+ event_pool->changed = 1;
+ }
+unlock:
+ pthread_mutex_unlock(&event_pool->mutex);
+
+out:
+ return idx;
+}
+
+static int
+event_dispatch_poll_handler(struct event_pool *event_pool, struct pollfd *ufds,
+ int i)
+{
+ event_handler_t handler = NULL;
+ void *data = NULL;
+ int idx = -1;
+ int ret = 0;
+
+ handler = NULL;
+ data = NULL;
+
+ pthread_mutex_lock(&event_pool->mutex);
+ {
+ idx = __event_getindex(event_pool, ufds[i].fd, i);
+
+ if (idx == -1) {
+ gf_smsg("poll", GF_LOG_ERROR, 0, LG_MSG_INDEX_NOT_FOUND, "fd=%d",
+ ufds[i].fd, "idx_hint=%d", i, NULL);
+ goto unlock;
+ }
+
+ handler = event_pool->reg[idx].handler;
+ data = event_pool->reg[idx].data;
+ }
+unlock:
+ pthread_mutex_unlock(&event_pool->mutex);
+
+ if (handler)
+ handler(ufds[i].fd, idx, 0, data,
+ (ufds[i].revents & (POLLIN | POLLPRI)),
+ (ufds[i].revents & (POLLOUT)),
+ (ufds[i].revents & (POLLERR | POLLHUP | POLLNVAL)), 0);
+
+ return ret;
+}
+
+static int
+event_dispatch_poll_resize(struct event_pool *event_pool, struct pollfd *ufds,
+ int size)
+{
+ int i = 0;
+
+ pthread_mutex_lock(&event_pool->mutex);
+ {
+ if (event_pool->changed == 0) {
+ goto unlock;
+ }
+
+ if (event_pool->used > event_pool->evcache_size) {
+ GF_FREE(event_pool->evcache);
+
+ event_pool->evcache = ufds = NULL;
+
+ event_pool->evcache_size = event_pool->used;
+
+ ufds = GF_CALLOC(sizeof(struct pollfd), event_pool->evcache_size,
+ gf_common_mt_pollfd);
+ if (!ufds)
+ goto unlock;
+ event_pool->evcache = ufds;
+ }
+
+ if (ufds == NULL) {
+ goto unlock;
+ }
+
+ for (i = 0; i < event_pool->used; i++) {
+ ufds[i].fd = event_pool->reg[i].fd;
+ ufds[i].events = event_pool->reg[i].events;
+ ufds[i].revents = 0;
+ }
+
+ size = i;
+ }
+unlock:
+ pthread_mutex_unlock(&event_pool->mutex);
+
+ return size;
+}
+
+static int
+event_dispatch_poll(struct event_pool *event_pool)
+{
+ struct pollfd *ufds = NULL;
+ int size = 0;
+ int i = 0;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO("event", event_pool, out);
+
+ pthread_mutex_lock(&event_pool->mutex);
+ {
+ event_pool->activethreadcount = 1;
+ }
+ pthread_mutex_unlock(&event_pool->mutex);
+
+ while (1) {
+ pthread_mutex_lock(&event_pool->mutex);
+ {
+ if (event_pool->destroy == 1) {
+ event_pool->activethreadcount = 0;
+ pthread_cond_broadcast(&event_pool->cond);
+ pthread_mutex_unlock(&event_pool->mutex);
+ return 0;
+ }
+ }
+ pthread_mutex_unlock(&event_pool->mutex);
+
+ size = event_dispatch_poll_resize(event_pool, ufds, size);
+ ufds = event_pool->evcache;
+
+ ret = poll(ufds, size, 1);
+
+ if (ret == 0)
+ /* timeout */
+ continue;
+
+ if (ret == -1 && errno == EINTR)
+ /* sys call */
+ continue;
+
+ for (i = 0; i < size; i++) {
+ if (!ufds[i].revents)
+ continue;
+
+ event_dispatch_poll_handler(event_pool, ufds, i);
+ }
+ }
+
+out:
+ return -1;
+}
+
+int
+event_reconfigure_threads_poll(struct event_pool *event_pool, int value)
+{
+ /* No-op for poll */
+
+ return 0;
+}
+
+/* This function is the destructor for the event_pool data structure
+ * Should be called only after poller_threads_destroy() is called,
+ * else will lead to crashes.
+ */
+static int
+event_pool_destroy_poll(struct event_pool *event_pool)
+{
+ int ret = 0;
+
+ ret = sys_close(event_pool->breaker[0]);
+ if (ret)
+ return ret;
+
+ ret = sys_close(event_pool->breaker[1]);
+ if (ret)
+ return ret;
+
+ event_pool->breaker[0] = event_pool->breaker[1] = -1;
+
+ GF_FREE(event_pool->reg);
+ GF_FREE(event_pool);
+
+ return ret;
+}
+
+struct event_ops event_ops_poll = {
+ .new = event_pool_new_poll,
+ .event_register = event_register_poll,
+ .event_select_on = event_select_on_poll,
+ .event_unregister = event_unregister_poll,
+ .event_unregister_close = event_unregister_close_poll,
+ .event_dispatch = event_dispatch_poll,
+ .event_reconfigure_threads = event_reconfigure_threads_poll,
+ .event_pool_destroy = event_pool_destroy_poll};
diff --git a/libglusterfs/src/event.c b/libglusterfs/src/event.c
index 8f172fb24b8..402c253ca25 100644
--- a/libglusterfs/src/event.c
+++ b/libglusterfs/src/event.c
@@ -16,936 +16,290 @@
#include <errno.h>
#include <string.h>
-#include "logging.h"
-#include "event.h"
-#include "mem-pool.h"
-#include "common-utils.h"
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-static int
-event_register_poll (struct event_pool *event_pool, int fd,
- event_handler_t handler,
- void *data, int poll_in, int poll_out);
-
-
-static int
-__flush_fd (int fd, int idx, void *data,
- int poll_in, int poll_out, int poll_err)
-{
- char buf[64];
- int ret = -1;
-
- if (!poll_in)
- return ret;
-
- do {
- ret = read (fd, buf, 64);
- if (ret == -1 && errno != EAGAIN) {
- gf_log ("poll", GF_LOG_ERROR,
- "read on %d returned error (%s)",
- fd, strerror (errno));
- }
- } while (ret == 64);
-
- return ret;
-}
-
-
-static int
-__event_getindex (struct event_pool *event_pool, int fd, int idx)
-{
- int ret = -1;
- int i = 0;
-
- GF_VALIDATE_OR_GOTO ("event", event_pool, out);
+#include "glusterfs/gf-event.h"
+#include "glusterfs/timespec.h"
+#include "glusterfs/common-utils.h"
+#include "glusterfs/libglusterfs-messages.h"
+#include "glusterfs/syscall.h"
- if (idx > -1 && idx < event_pool->used) {
- if (event_pool->reg[idx].fd == fd)
- ret = idx;
- }
-
- for (i=0; ret == -1 && i<event_pool->used; i++) {
- if (event_pool->reg[i].fd == fd) {
- ret = i;
- break;
- }
- }
-
-out:
- return ret;
-}
-
-
-static struct event_pool *
-event_pool_new_poll (int count)
+struct event_pool *
+gf_event_pool_new(int count, int eventthreadcount)
{
- struct event_pool *event_pool = NULL;
- int ret = -1;
-
- event_pool = GF_CALLOC (1, sizeof (*event_pool),
- gf_common_mt_event_pool);
+ struct event_pool *event_pool = NULL;
+ extern struct event_ops event_ops_poll;
- if (!event_pool)
- return NULL;
-
- event_pool->count = count;
- event_pool->reg = GF_CALLOC (event_pool->count,
- sizeof (*event_pool->reg),
- gf_common_mt_reg);
-
- if (!event_pool->reg) {
- GF_FREE (event_pool);
- return NULL;
- }
-
- pthread_mutex_init (&event_pool->mutex, NULL);
-
- ret = pipe (event_pool->breaker);
-
- if (ret == -1) {
- gf_log ("poll", GF_LOG_ERROR,
- "pipe creation failed (%s)", strerror (errno));
- GF_FREE (event_pool->reg);
- GF_FREE (event_pool);
- return NULL;
- }
-
- ret = fcntl (event_pool->breaker[0], F_SETFL, O_NONBLOCK);
- if (ret == -1) {
- gf_log ("poll", GF_LOG_ERROR,
- "could not set pipe to non blocking mode (%s)",
- strerror (errno));
- close (event_pool->breaker[0]);
- close (event_pool->breaker[1]);
- event_pool->breaker[0] = event_pool->breaker[1] = -1;
-
- GF_FREE (event_pool->reg);
- GF_FREE (event_pool);
- return NULL;
- }
+#ifdef HAVE_SYS_EPOLL_H
+ extern struct event_ops event_ops_epoll;
- ret = fcntl (event_pool->breaker[1], F_SETFL, O_NONBLOCK);
- if (ret == -1) {
- gf_log ("poll", GF_LOG_ERROR,
- "could not set pipe to non blocking mode (%s)",
- strerror (errno));
+ event_pool = event_ops_epoll.new(count, eventthreadcount);
- close (event_pool->breaker[0]);
- close (event_pool->breaker[1]);
- event_pool->breaker[0] = event_pool->breaker[1] = -1;
+ if (event_pool) {
+ event_pool->ops = &event_ops_epoll;
+ } else {
+ gf_msg("event", GF_LOG_WARNING, 0, LG_MSG_FALLBACK_TO_POLL,
+ "falling back to poll based event handling");
+ }
+#endif
- GF_FREE (event_pool->reg);
- GF_FREE (event_pool);
- return NULL;
- }
+ if (!event_pool) {
+ event_pool = event_ops_poll.new(count, eventthreadcount);
- ret = event_register_poll (event_pool, event_pool->breaker[0],
- __flush_fd, NULL, 1, 0);
- if (ret == -1) {
- gf_log ("poll", GF_LOG_ERROR,
- "could not register pipe fd with poll event loop");
- close (event_pool->breaker[0]);
- close (event_pool->breaker[1]);
- event_pool->breaker[0] = event_pool->breaker[1] = -1;
-
- GF_FREE (event_pool->reg);
- GF_FREE (event_pool);
- return NULL;
- }
+ if (event_pool)
+ event_pool->ops = &event_ops_poll;
+ }
- return event_pool;
+ return event_pool;
}
-
-static int
-event_register_poll (struct event_pool *event_pool, int fd,
- event_handler_t handler,
- void *data, int poll_in, int poll_out)
+int
+gf_event_register(struct event_pool *event_pool, int fd,
+ event_handler_t handler, void *data, int poll_in,
+ int poll_out, char notify_poller_death)
{
- int idx = -1;
-
- GF_VALIDATE_OR_GOTO ("event", event_pool, out);
-
- pthread_mutex_lock (&event_pool->mutex);
- {
- if (event_pool->count == event_pool->used)
- {
- event_pool->count += 256;
-
- event_pool->reg = GF_REALLOC (event_pool->reg,
- event_pool->count *
- sizeof (*event_pool->reg));
- if (!event_pool->reg)
- goto unlock;
- }
-
- idx = event_pool->used++;
-
- event_pool->reg[idx].fd = fd;
- event_pool->reg[idx].events = POLLPRI;
- event_pool->reg[idx].handler = handler;
- event_pool->reg[idx].data = data;
-
- switch (poll_in) {
- case 1:
- event_pool->reg[idx].events |= POLLIN;
- break;
- case 0:
- event_pool->reg[idx].events &= ~POLLIN;
- break;
- case -1:
- /* do nothing */
- break;
- default:
- gf_log ("poll", GF_LOG_ERROR,
- "invalid poll_in value %d", poll_in);
- break;
- }
-
- switch (poll_out) {
- case 1:
- event_pool->reg[idx].events |= POLLOUT;
- break;
- case 0:
- event_pool->reg[idx].events &= ~POLLOUT;
- break;
- case -1:
- /* do nothing */
- break;
- default:
- gf_log ("poll", GF_LOG_ERROR,
- "invalid poll_out value %d", poll_out);
- break;
- }
-
- event_pool->changed = 1;
+ int ret = -1;
- }
-unlock:
- pthread_mutex_unlock (&event_pool->mutex);
+ GF_VALIDATE_OR_GOTO("event", event_pool, out);
+ ret = event_pool->ops->event_register(
+ event_pool, fd, handler, data, poll_in, poll_out, notify_poller_death);
out:
- return idx;
+ return ret;
}
-
-static int
-event_unregister_poll (struct event_pool *event_pool, int fd, int idx_hint)
+int
+gf_event_unregister(struct event_pool *event_pool, int fd, int idx)
{
- int idx = -1;
-
- GF_VALIDATE_OR_GOTO ("event", event_pool, out);
+ int ret = -1;
- pthread_mutex_lock (&event_pool->mutex);
- {
- idx = __event_getindex (event_pool, fd, idx_hint);
+ GF_VALIDATE_OR_GOTO("event", event_pool, out);
- if (idx == -1) {
- gf_log ("poll", GF_LOG_ERROR,
- "index not found for fd=%d (idx_hint=%d)",
- fd, idx_hint);
- errno = ENOENT;
- goto unlock;
- }
-
- event_pool->reg[idx] = event_pool->reg[--event_pool->used];
- event_pool->changed = 1;
- }
-unlock:
- pthread_mutex_unlock (&event_pool->mutex);
-
-out:
- return idx;
-}
-
-
-static int
-event_select_on_poll (struct event_pool *event_pool, int fd, int idx_hint,
- int poll_in, int poll_out)
-{
- int idx = -1;
-
- GF_VALIDATE_OR_GOTO ("event", event_pool, out);
-
- pthread_mutex_lock (&event_pool->mutex);
- {
- idx = __event_getindex (event_pool, fd, idx_hint);
-
- if (idx == -1) {
- gf_log ("poll", GF_LOG_ERROR,
- "index not found for fd=%d (idx_hint=%d)",
- fd, idx_hint);
- errno = ENOENT;
- goto unlock;
- }
-
- switch (poll_in) {
- case 1:
- event_pool->reg[idx].events |= POLLIN;
- break;
- case 0:
- event_pool->reg[idx].events &= ~POLLIN;
- break;
- case -1:
- /* do nothing */
- break;
- default:
- /* TODO: log error */
- break;
- }
-
- switch (poll_out) {
- case 1:
- event_pool->reg[idx].events |= POLLOUT;
- break;
- case 0:
- event_pool->reg[idx].events &= ~POLLOUT;
- break;
- case -1:
- /* do nothing */
- break;
- default:
- /* TODO: log error */
- break;
- }
-
- if (poll_in + poll_out > -2)
- event_pool->changed = 1;
- }
-unlock:
- pthread_mutex_unlock (&event_pool->mutex);
+ ret = event_pool->ops->event_unregister(event_pool, fd, idx);
out:
- return idx;
-}
-
-
-static int
-event_dispatch_poll_handler (struct event_pool *event_pool,
- struct pollfd *ufds, int i)
-{
- event_handler_t handler = NULL;
- void *data = NULL;
- int idx = -1;
- int ret = 0;
-
- handler = NULL;
- data = NULL;
-
- pthread_mutex_lock (&event_pool->mutex);
- {
- idx = __event_getindex (event_pool, ufds[i].fd, i);
-
- if (idx == -1) {
- gf_log ("poll", GF_LOG_ERROR,
- "index not found for fd=%d (idx_hint=%d)",
- ufds[i].fd, i);
- goto unlock;
- }
-
- handler = event_pool->reg[idx].handler;
- data = event_pool->reg[idx].data;
- }
-unlock:
- pthread_mutex_unlock (&event_pool->mutex);
-
- if (handler)
- ret = handler (ufds[i].fd, idx, data,
- (ufds[i].revents & (POLLIN|POLLPRI)),
- (ufds[i].revents & (POLLOUT)),
- (ufds[i].revents & (POLLERR|POLLHUP|POLLNVAL)));
-
- return ret;
+ return ret;
}
-
-static int
-event_dispatch_poll_resize (struct event_pool *event_pool,
- struct pollfd *ufds, int size)
-{
- int i = 0;
-
- pthread_mutex_lock (&event_pool->mutex);
- {
- if (event_pool->changed == 0) {
- goto unlock;
- }
-
- if (event_pool->used > event_pool->evcache_size) {
- if (event_pool->evcache)
- GF_FREE (event_pool->evcache);
-
- event_pool->evcache = ufds = NULL;
-
- event_pool->evcache_size = event_pool->used;
-
- ufds = GF_CALLOC (sizeof (struct pollfd),
- event_pool->evcache_size,
- gf_common_mt_pollfd);
- if (!ufds)
- goto unlock;
- event_pool->evcache = ufds;
- }
-
- for (i = 0; i < event_pool->used; i++) {
- ufds[i].fd = event_pool->reg[i].fd;
- ufds[i].events = event_pool->reg[i].events;
- ufds[i].revents = 0;
- }
-
- size = i;
- }
-unlock:
- pthread_mutex_unlock (&event_pool->mutex);
-
- return size;
-}
-
-
-static int
-event_dispatch_poll (struct event_pool *event_pool)
+int
+gf_event_unregister_close(struct event_pool *event_pool, int fd, int idx)
{
- struct pollfd *ufds = NULL;
- int size = 0;
- int i = 0;
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO ("event", event_pool, out);
-
- while (1) {
- size = event_dispatch_poll_resize (event_pool, ufds, size);
- ufds = event_pool->evcache;
-
- ret = poll (ufds, size, 1);
-
- if (ret == 0)
- /* timeout */
- continue;
+ int ret = -1;
- if (ret == -1 && errno == EINTR)
- /* sys call */
- continue;
+ GF_VALIDATE_OR_GOTO("event", event_pool, out);
- for (i = 0; i < size; i++) {
- if (!ufds[i].revents)
- continue;
-
- event_dispatch_poll_handler (event_pool, ufds, i);
- }
- }
+ ret = event_pool->ops->event_unregister_close(event_pool, fd, idx);
out:
- return -1;
+ return ret;
}
-
-static struct event_ops event_ops_poll = {
- .new = event_pool_new_poll,
- .event_register = event_register_poll,
- .event_select_on = event_select_on_poll,
- .event_unregister = event_unregister_poll,
- .event_dispatch = event_dispatch_poll
-};
-
-
-
-#ifdef HAVE_SYS_EPOLL_H
-#include <sys/epoll.h>
-
-
-static struct event_pool *
-event_pool_new_epoll (int count)
+int
+gf_event_select_on(struct event_pool *event_pool, int fd, int idx_hint,
+ int poll_in, int poll_out)
{
- struct event_pool *event_pool = NULL;
- int epfd = -1;
-
- event_pool = GF_CALLOC (1, sizeof (*event_pool),
- gf_common_mt_event_pool);
-
- if (!event_pool)
- goto out;
-
- event_pool->count = count;
- event_pool->reg = GF_CALLOC (event_pool->count,
- sizeof (*event_pool->reg),
- gf_common_mt_reg);
-
- if (!event_pool->reg) {
- GF_FREE (event_pool);
- event_pool = NULL;
- goto out;
- }
-
- epfd = epoll_create (count);
-
- if (epfd == -1) {
- gf_log ("epoll", GF_LOG_ERROR, "epoll fd creation failed (%s)",
- strerror (errno));
- GF_FREE (event_pool->reg);
- GF_FREE (event_pool);
- event_pool = NULL;
- goto out;
- }
-
- event_pool->fd = epfd;
+ int ret = -1;
- event_pool->count = count;
-
- pthread_mutex_init (&event_pool->mutex, NULL);
- pthread_cond_init (&event_pool->cond, NULL);
+ GF_VALIDATE_OR_GOTO("event", event_pool, out);
+ ret = event_pool->ops->event_select_on(event_pool, fd, idx_hint, poll_in,
+ poll_out);
out:
- return event_pool;
+ return ret;
}
-
int
-event_register_epoll (struct event_pool *event_pool, int fd,
- event_handler_t handler,
- void *data, int poll_in, int poll_out)
+gf_event_dispatch(struct event_pool *event_pool)
{
- int idx = -1;
- int ret = -1;
- struct epoll_event epoll_event = {0, };
- struct event_data *ev_data = (void *)&epoll_event.data;
-
-
- GF_VALIDATE_OR_GOTO ("event", event_pool, out);
-
- pthread_mutex_lock (&event_pool->mutex);
- {
- if (event_pool->count == event_pool->used) {
- event_pool->count *= 2;
-
- event_pool->reg = GF_REALLOC (event_pool->reg,
- event_pool->count *
- sizeof (*event_pool->reg));
-
- if (!event_pool->reg) {
- gf_log ("epoll", GF_LOG_ERROR,
- "event registry re-allocation failed");
- goto unlock;
- }
- }
-
- idx = event_pool->used;
- event_pool->used++;
-
- event_pool->reg[idx].fd = fd;
- event_pool->reg[idx].events = EPOLLPRI;
- event_pool->reg[idx].handler = handler;
- event_pool->reg[idx].data = data;
-
- switch (poll_in) {
- case 1:
- event_pool->reg[idx].events |= EPOLLIN;
- break;
- case 0:
- event_pool->reg[idx].events &= ~EPOLLIN;
- break;
- case -1:
- /* do nothing */
- break;
- default:
- gf_log ("epoll", GF_LOG_ERROR,
- "invalid poll_in value %d", poll_in);
- break;
- }
-
- switch (poll_out) {
- case 1:
- event_pool->reg[idx].events |= EPOLLOUT;
- break;
- case 0:
- event_pool->reg[idx].events &= ~EPOLLOUT;
- break;
- case -1:
- /* do nothing */
- break;
- default:
- gf_log ("epoll", GF_LOG_ERROR,
- "invalid poll_out value %d", poll_out);
- break;
- }
-
- event_pool->changed = 1;
-
- epoll_event.events = event_pool->reg[idx].events;
- ev_data->fd = fd;
- ev_data->idx = idx;
-
- ret = epoll_ctl (event_pool->fd, EPOLL_CTL_ADD, fd,
- &epoll_event);
-
- if (ret == -1) {
- gf_log ("epoll", GF_LOG_ERROR,
- "failed to add fd(=%d) to epoll fd(=%d) (%s)",
- fd, event_pool->fd, strerror (errno));
- goto unlock;
- }
-
- pthread_cond_broadcast (&event_pool->cond);
- }
-unlock:
- pthread_mutex_unlock (&event_pool->mutex);
+ int ret = -1;
-out:
- return ret;
-}
+ GF_VALIDATE_OR_GOTO("event", event_pool, out);
-
-static int
-event_unregister_epoll (struct event_pool *event_pool, int fd, int idx_hint)
-{
- int idx = -1;
- int ret = -1;
-
- struct epoll_event epoll_event = {0, };
- struct event_data *ev_data = (void *)&epoll_event.data;
- int lastidx = -1;
-
- GF_VALIDATE_OR_GOTO ("event", event_pool, out);
-
- pthread_mutex_lock (&event_pool->mutex);
- {
- idx = __event_getindex (event_pool, fd, idx_hint);
-
- if (idx == -1) {
- gf_log ("epoll", GF_LOG_ERROR,
- "index not found for fd=%d (idx_hint=%d)",
- fd, idx_hint);
- errno = ENOENT;
- goto unlock;
- }
-
- ret = epoll_ctl (event_pool->fd, EPOLL_CTL_DEL, fd, NULL);
-
- /* if ret is -1, this array member should never be accessed */
- /* if it is 0, the array member might be used by idx_cache
- * in which case the member should not be accessed till
- * it is reallocated
- */
-
- event_pool->reg[idx].fd = -1;
-
- if (ret == -1) {
- gf_log ("epoll", GF_LOG_ERROR,
- "fail to del fd(=%d) from epoll fd(=%d) (%s)",
- fd, event_pool->fd, strerror (errno));
- goto unlock;
- }
-
- lastidx = event_pool->used - 1;
- if (lastidx == idx) {
- event_pool->used--;
- goto unlock;
- }
-
- epoll_event.events = event_pool->reg[lastidx].events;
- ev_data->fd = event_pool->reg[lastidx].fd;
- ev_data->idx = idx;
-
- ret = epoll_ctl (event_pool->fd, EPOLL_CTL_MOD, ev_data->fd,
- &epoll_event);
- if (ret == -1) {
- gf_log ("epoll", GF_LOG_ERROR,
- "fail to modify fd(=%d) index %d to %d (%s)",
- ev_data->fd, event_pool->used, idx,
- strerror (errno));
- goto unlock;
- }
-
- /* just replace the unregistered idx by last one */
- event_pool->reg[idx] = event_pool->reg[lastidx];
- event_pool->used--;
- }
-unlock:
- pthread_mutex_unlock (&event_pool->mutex);
-
-out:
- return ret;
-}
-
-
-static int
-event_select_on_epoll (struct event_pool *event_pool, int fd, int idx_hint,
- int poll_in, int poll_out)
-{
- int idx = -1;
- int ret = -1;
-
- struct epoll_event epoll_event = {0, };
- struct event_data *ev_data = (void *)&epoll_event.data;
-
-
- GF_VALIDATE_OR_GOTO ("event", event_pool, out);
-
- pthread_mutex_lock (&event_pool->mutex);
- {
- idx = __event_getindex (event_pool, fd, idx_hint);
-
- if (idx == -1) {
- gf_log ("epoll", GF_LOG_ERROR,
- "index not found for fd=%d (idx_hint=%d)",
- fd, idx_hint);
- errno = ENOENT;
- goto unlock;
- }
-
- switch (poll_in) {
- case 1:
- event_pool->reg[idx].events |= EPOLLIN;
- break;
- case 0:
- event_pool->reg[idx].events &= ~EPOLLIN;
- break;
- case -1:
- /* do nothing */
- break;
- default:
- gf_log ("epoll", GF_LOG_ERROR,
- "invalid poll_in value %d", poll_in);
- break;
- }
-
- switch (poll_out) {
- case 1:
- event_pool->reg[idx].events |= EPOLLOUT;
- break;
- case 0:
- event_pool->reg[idx].events &= ~EPOLLOUT;
- break;
- case -1:
- /* do nothing */
- break;
- default:
- gf_log ("epoll", GF_LOG_ERROR,
- "invalid poll_out value %d", poll_out);
- break;
- }
-
- epoll_event.events = event_pool->reg[idx].events;
- ev_data->fd = fd;
- ev_data->idx = idx;
-
- ret = epoll_ctl (event_pool->fd, EPOLL_CTL_MOD, fd,
- &epoll_event);
- if (ret == -1) {
- gf_log ("epoll", GF_LOG_ERROR,
- "failed to modify fd(=%d) events to %d",
- fd, epoll_event.events);
- }
- }
-unlock:
- pthread_mutex_unlock (&event_pool->mutex);
+ ret = event_pool->ops->event_dispatch(event_pool);
+ if (ret)
+ goto out;
out:
- return ret;
+ return ret;
}
-
-static int
-event_dispatch_epoll_handler (struct event_pool *event_pool,
- struct epoll_event *events, int i)
-{
- struct event_data *event_data = NULL;
- event_handler_t handler = NULL;
- void *data = NULL;
- int idx = -1;
- int ret = -1;
-
-
- event_data = (void *)&events[i].data;
- handler = NULL;
- data = NULL;
-
- pthread_mutex_lock (&event_pool->mutex);
- {
- idx = __event_getindex (event_pool, event_data->fd,
- event_data->idx);
-
- if (idx == -1) {
- gf_log ("epoll", GF_LOG_ERROR,
- "index not found for fd(=%d) (idx_hint=%d)",
- event_data->fd, event_data->idx);
- goto unlock;
- }
-
- handler = event_pool->reg[idx].handler;
- data = event_pool->reg[idx].data;
- }
-unlock:
- pthread_mutex_unlock (&event_pool->mutex);
-
- if (handler)
- ret = handler (event_data->fd, event_data->idx, data,
- (events[i].events & (EPOLLIN|EPOLLPRI)),
- (events[i].events & (EPOLLOUT)),
- (events[i].events & (EPOLLERR|EPOLLHUP)));
- return ret;
-}
-
-
-static int
-event_dispatch_epoll (struct event_pool *event_pool)
+int
+gf_event_reconfigure_threads(struct event_pool *event_pool, int value)
{
- struct epoll_event *events = NULL;
- int size = 0;
- int i = 0;
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO ("event", event_pool, out);
-
- while (1) {
- pthread_mutex_lock (&event_pool->mutex);
- {
- while (event_pool->used == 0)
- pthread_cond_wait (&event_pool->cond,
- &event_pool->mutex);
+ int ret = -1;
- if (event_pool->used > event_pool->evcache_size) {
- if (event_pool->evcache)
- GF_FREE (event_pool->evcache);
+ GF_VALIDATE_OR_GOTO("event", event_pool, out);
- event_pool->evcache = events = NULL;
-
- event_pool->evcache_size =
- event_pool->used + 256;
-
- events = GF_CALLOC (event_pool->evcache_size,
- sizeof (struct epoll_event),
- gf_common_mt_epoll_event);
- if (!events)
- break;
-
- event_pool->evcache = events;
- }
- }
- pthread_mutex_unlock (&event_pool->mutex);
-
- ret = epoll_wait (event_pool->fd, event_pool->evcache,
- event_pool->evcache_size, -1);
-
- if (ret == 0)
- /* timeout */
- continue;
-
- if (ret == -1 && errno == EINTR)
- /* sys call */
- continue;
-
- size = ret;
-
- for (i = 0; i < size; i++) {
- if (!events || !events[i].events)
- continue;
-
- ret = event_dispatch_epoll_handler (event_pool,
- events, i);
- }
- }
+ /* call event refresh function */
+ ret = event_pool->ops->event_reconfigure_threads(event_pool, value);
out:
- return ret;
+ return ret;
}
-
-static struct event_ops event_ops_epoll = {
- .new = event_pool_new_epoll,
- .event_register = event_register_epoll,
- .event_select_on = event_select_on_epoll,
- .event_unregister = event_unregister_epoll,
- .event_dispatch = event_dispatch_epoll
-};
-
-#endif
-
-
-struct event_pool *
-event_pool_new (int count)
+int
+gf_event_pool_destroy(struct event_pool *event_pool)
{
- struct event_pool *event_pool = NULL;
-
-#ifdef HAVE_SYS_EPOLL_H
- event_pool = event_ops_epoll.new (count);
+ int ret = -1;
+ int destroy = 0, activethreadcount = 0;
- if (event_pool) {
- event_pool->ops = &event_ops_epoll;
- } else {
- gf_log ("event", GF_LOG_WARNING,
- "falling back to poll based event handling");
- }
-#endif
-
- if (!event_pool) {
- event_pool = event_ops_poll.new (count);
-
- if (event_pool)
- event_pool->ops = &event_ops_poll;
- }
+ GF_VALIDATE_OR_GOTO("event", event_pool, out);
- return event_pool;
-}
-
-
-int
-event_register (struct event_pool *event_pool, int fd,
- event_handler_t handler,
- void *data, int poll_in, int poll_out)
-{
- int ret = -1;
+ pthread_mutex_lock(&event_pool->mutex);
+ {
+ destroy = event_pool->destroy;
+ activethreadcount = event_pool->activethreadcount;
+ }
+ pthread_mutex_unlock(&event_pool->mutex);
- GF_VALIDATE_OR_GOTO ("event", event_pool, out);
+ if (!destroy || (activethreadcount > 0)) {
+ goto out;
+ }
- ret = event_pool->ops->event_register (event_pool, fd, handler, data,
- poll_in, poll_out);
+ ret = event_pool->ops->event_pool_destroy(event_pool);
out:
- return ret;
+ return ret;
}
-
-int
-event_unregister (struct event_pool *event_pool, int fd, int idx)
+void
+poller_destroy_handler(int fd, int idx, int gen, void *data, int poll_out,
+ int poll_in, int poll_err, char event_thread_exit)
{
- int ret = -1;
+ struct event_destroy_data *destroy = NULL;
+ int readfd = -1;
+ char buf = '\0';
- GF_VALIDATE_OR_GOTO ("event", event_pool, out);
+ destroy = data;
+ readfd = destroy->readfd;
+ if (readfd < 0) {
+ goto out;
+ }
- ret = event_pool->ops->event_unregister (event_pool, fd, idx);
+ while (sys_read(readfd, &buf, 1) > 0) {
+ }
out:
- return ret;
-}
+ gf_event_handled(destroy->pool, fd, idx, gen);
+ return;
+}
+/* This function destroys all the poller threads.
+ * Note: to be called before gf_event_pool_destroy is called.
+ * The order in which cleaning is performed:
+ * - Register a pipe fd(this is for waking threads in poll()/epoll_wait())
+ * - Set the destroy mode, which this no new event registration will succeed
+ * - Reconfigure the thread count to 0(this will succeed only in destroy mode)
+ * - Wake up all the threads in poll() or epoll_wait(), so that they can
+ * destroy themselves.
+ * - Wait for the thread to join(which will happen only after all the other
+ * threads are destroyed)
+ */
int
-event_select_on (struct event_pool *event_pool, int fd, int idx_hint,
- int poll_in, int poll_out)
+gf_event_dispatch_destroy(struct event_pool *event_pool)
{
- int ret = -1;
+ int ret = -1, threadcount = 0;
+ int fd[2] = {-1};
+ int idx = -1;
+ int flags = 0;
+ struct timespec sleep_till = {
+ 0,
+ };
+ struct event_destroy_data data = {
+ 0,
+ };
+
+ GF_VALIDATE_OR_GOTO("event", event_pool, out);
+
+ ret = pipe(fd);
+ if (ret < 0)
+ goto out;
+
+ /* Make the read end of the pipe nonblocking */
+ flags = fcntl(fd[0], F_GETFL);
+ flags |= O_NONBLOCK;
+ ret = fcntl(fd[0], F_SETFL, flags);
+ if (ret < 0)
+ goto out;
+
+ /* Make the write end of the pipe nonblocking */
+ flags = fcntl(fd[1], F_GETFL);
+ flags |= O_NONBLOCK;
+ ret = fcntl(fd[1], F_SETFL, flags);
+ if (ret < 0)
+ goto out;
+
+ data.pool = event_pool;
+ data.readfd = fd[1];
+
+ /* From the main thread register an event on the pipe fd[0],
+ */
+ idx = gf_event_register(event_pool, fd[0], poller_destroy_handler, &data, 1,
+ 0, 0);
+ if (idx < 0)
+ goto out;
+
+ /* Enter the destroy mode first, set this before reconfiguring to 0
+ * threads, to prevent further reconfigure to thread count > 0.
+ */
+ pthread_mutex_lock(&event_pool->mutex);
+ {
+ threadcount = event_pool->eventthreadcount;
+ event_pool->destroy = 1;
+ }
+ pthread_mutex_unlock(&event_pool->mutex);
+
+ ret = gf_event_reconfigure_threads(event_pool, 0);
+ if (ret < 0)
+ goto out;
+
+ /* Write something onto the write end of the pipe(fd[1]) so that
+ * poll wakes up and calls the handler, poller_destroy_handler()
+ */
+ pthread_mutex_lock(&event_pool->mutex);
+ {
+ /* Write to pipe(fd[1]) and then wait for 1 second or until
+ * a poller thread that is dying, broadcasts. Make sure we
+ * do not loop forever by limiting to 10 retries
+ */
+ int retry = 0;
+
+ while (event_pool->activethreadcount > 0 &&
+ (retry++ < (threadcount + 10))) {
+ if (sys_write(fd[1], "dummy", 6) == -1) {
+ break;
+ }
+ timespec_now_realtime(&sleep_till);
+ sleep_till.tv_sec += 1;
+ ret = pthread_cond_timedwait(&event_pool->cond, &event_pool->mutex,
+ &sleep_till);
+ if (ret) {
+ gf_msg_debug("event", 0,
+ "thread cond-timedwait failed "
+ "active-thread-count: %d, "
+ "retry: %d",
+ event_pool->activethreadcount, retry);
+ }
+ }
+ }
+ pthread_mutex_unlock(&event_pool->mutex);
- GF_VALIDATE_OR_GOTO ("event", event_pool, out);
+ ret = gf_event_unregister(event_pool, fd[0], idx);
- ret = event_pool->ops->event_select_on (event_pool, fd, idx_hint,
- poll_in, poll_out);
out:
- return ret;
-}
+ if (fd[0] != -1)
+ sys_close(fd[0]);
+ if (fd[1] != -1)
+ sys_close(fd[1]);
+ return ret;
+}
int
-event_dispatch (struct event_pool *event_pool)
+gf_event_handled(struct event_pool *event_pool, int fd, int idx, int gen)
{
- int ret = -1;
+ int ret = 0;
- GF_VALIDATE_OR_GOTO ("event", event_pool, out);
+ if (event_pool->ops->event_handled)
+ ret = event_pool->ops->event_handled(event_pool, fd, idx, gen);
- ret = event_pool->ops->event_dispatch (event_pool);
-
-out:
- return ret;
+ return ret;
}
diff --git a/libglusterfs/src/event.h b/libglusterfs/src/event.h
deleted file mode 100644
index f2f8029aeaa..00000000000
--- a/libglusterfs/src/event.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _EVENT_H_
-#define _EVENT_H_
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <pthread.h>
-
-struct event_pool;
-struct event_ops;
-struct event_data {
- int fd;
- int idx;
-} __attribute__ ((__packed__, __may_alias__));
-
-
-typedef int (*event_handler_t) (int fd, int idx, void *data,
- int poll_in, int poll_out, int poll_err);
-
-struct event_pool {
- struct event_ops *ops;
-
- int fd;
- int breaker[2];
-
- int count;
- struct {
- int fd;
- int events;
- void *data;
- event_handler_t handler;
- } *reg;
-
- int used;
- int idx_cache;
- int changed;
-
- pthread_mutex_t mutex;
- pthread_cond_t cond;
-
- void *evcache;
- int evcache_size;
-};
-
-struct event_ops {
- struct event_pool * (*new) (int count);
-
- int (*event_register) (struct event_pool *event_pool, int fd,
- event_handler_t handler,
- void *data, int poll_in, int poll_out);
-
- int (*event_select_on) (struct event_pool *event_pool, int fd, int idx,
- int poll_in, int poll_out);
-
- int (*event_unregister) (struct event_pool *event_pool, int fd, int idx);
-
- int (*event_dispatch) (struct event_pool *event_pool);
-};
-
-struct event_pool * event_pool_new (int count);
-int event_select_on (struct event_pool *event_pool, int fd, int idx,
- int poll_in, int poll_out);
-int event_register (struct event_pool *event_pool, int fd,
- event_handler_t handler,
- void *data, int poll_in, int poll_out);
-int event_unregister (struct event_pool *event_pool, int fd, int idx);
-int event_dispatch (struct event_pool *event_pool);
-
-#endif /* _EVENT_H_ */
diff --git a/libglusterfs/src/events.c b/libglusterfs/src/events.c
new file mode 100644
index 00000000000..33157549897
--- /dev/null
+++ b/libglusterfs/src/events.c
@@ -0,0 +1,136 @@
+/*
+ Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <time.h>
+#include <stdarg.h>
+#include <string.h>
+#include <netinet/in.h>
+#include <netdb.h>
+
+#include "glusterfs/syscall.h"
+#include "glusterfs/mem-pool.h"
+#include "glusterfs/glusterfs.h"
+#include "glusterfs/globals.h"
+#include "glusterfs/events.h"
+
+#define EVENT_HOST "127.0.0.1"
+#define EVENT_PORT 24009
+
+int
+_gf_event(eventtypes_t event, const char *fmt, ...)
+{
+ int ret = 0;
+ int sock = -1;
+ char *eventstr = NULL;
+ va_list arguments;
+ char *msg = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ char *host = NULL;
+ struct addrinfo hints;
+ struct addrinfo *result = NULL;
+ struct addrinfo *iter_result_ptr = NULL;
+ xlator_t *this = THIS;
+ char *volfile_server_transport = NULL;
+
+ /* Global context */
+ ctx = this->ctx;
+
+ if (event < 0 || event >= EVENT_LAST) {
+ ret = EVENT_ERROR_INVALID_INPUTS;
+ goto out;
+ }
+
+ if (ctx) {
+ volfile_server_transport = ctx->cmd_args.volfile_server_transport;
+ }
+ if (!volfile_server_transport) {
+ volfile_server_transport = "tcp";
+ }
+
+ /* host = NULL returns localhost */
+ if (ctx && ctx->cmd_args.volfile_server &&
+ (strcmp(volfile_server_transport, "unix"))) {
+ /* If it is client code then volfile_server is set
+ use that information to push the events. */
+ host = ctx->cmd_args.volfile_server;
+ }
+
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_family = AF_UNSPEC;
+ hints.ai_socktype = SOCK_DGRAM;
+ hints.ai_flags = AI_ADDRCONFIG;
+
+ if ((getaddrinfo(host, TOSTRING(EVENT_PORT), &hints, &result)) != 0) {
+ ret = EVENT_ERROR_RESOLVE;
+ goto out;
+ }
+
+ // iterate over the result and break when socket creation is success.
+ for (iter_result_ptr = result; iter_result_ptr != NULL;
+ iter_result_ptr = iter_result_ptr->ai_next) {
+ sock = socket(iter_result_ptr->ai_family, iter_result_ptr->ai_socktype,
+ iter_result_ptr->ai_protocol);
+ if (sock != -1) {
+ break;
+ }
+ }
+ /*
+ * If none of the addrinfo structures lead to a successful socket
+ * creation, socket creation has failed.
+ */
+ if (sock < 0) {
+ ret = EVENT_ERROR_SOCKET;
+ goto out;
+ }
+
+ va_start(arguments, fmt);
+ ret = gf_vasprintf(&msg, fmt, arguments);
+ va_end(arguments);
+
+ if (ret < 0) {
+ ret = EVENT_ERROR_INVALID_INPUTS;
+ goto out;
+ }
+
+ ret = gf_asprintf(&eventstr, "%u %d %s", (unsigned)gf_time(), event, msg);
+ GF_FREE(msg);
+ if (ret <= 0) {
+ ret = EVENT_ERROR_MSG_FORMAT;
+ goto out;
+ }
+
+ /* Send Message */
+ if (sendto(sock, eventstr, strlen(eventstr), 0, result->ai_addr,
+ result->ai_addrlen) <= 0) {
+ ret = EVENT_ERROR_SEND;
+ goto out;
+ }
+
+ ret = EVENT_SEND_OK;
+
+out:
+ if (sock >= 0) {
+ sys_close(sock);
+ }
+
+ /* Allocated by gf_asprintf */
+ if (eventstr)
+ GF_FREE(eventstr);
+
+ if (result)
+ freeaddrinfo(result);
+
+ return ret;
+}
diff --git a/libglusterfs/src/fd-lk.c b/libglusterfs/src/fd-lk.c
index 0c439fe6e93..c2d34f81c9c 100644
--- a/libglusterfs/src/fd-lk.c
+++ b/libglusterfs/src/fd-lk.c
@@ -8,463 +8,426 @@
cases as published by the Free Software Foundation.
*/
-#include "fd-lk.h"
-#include "common-utils.h"
-
+#include "glusterfs/fd-lk.h"
+#include "glusterfs/common-utils.h"
+#include "glusterfs/libglusterfs-messages.h"
int32_t
-_fd_lk_delete_lock (fd_lk_ctx_node_t *lock)
+_fd_lk_delete_lock(fd_lk_ctx_node_t *lock)
{
- int32_t ret = -1;
+ int32_t ret = -1;
- GF_VALIDATE_OR_GOTO ("fd-lk", lock, out);
+ GF_VALIDATE_OR_GOTO("fd-lk", lock, out);
- list_del_init (&lock->next);
+ list_del_init(&lock->next);
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
int32_t
-_fd_lk_destroy_lock (fd_lk_ctx_node_t *lock)
+_fd_lk_destroy_lock(fd_lk_ctx_node_t *lock)
{
- int32_t ret = -1;
+ int32_t ret = -1;
- GF_VALIDATE_OR_GOTO ("fd-lk", lock, out);
+ GF_VALIDATE_OR_GOTO("fd-lk", lock, out);
- GF_FREE (lock);
+ GF_FREE(lock);
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
int
-_fd_lk_destroy_lock_list (fd_lk_ctx_t *lk_ctx)
+_fd_lk_destroy_lock_list(fd_lk_ctx_t *lk_ctx)
{
- int ret = -1;
- fd_lk_ctx_node_t *lk = NULL;
- fd_lk_ctx_node_t *tmp = NULL;
-
- GF_VALIDATE_OR_GOTO ("fd-lk", lk_ctx, out);
-
- list_for_each_entry_safe (lk, tmp, &lk_ctx->lk_list, next) {
- _fd_lk_delete_lock (lk);
- _fd_lk_destroy_lock (lk);
- }
- ret = 0;
+ int ret = -1;
+ fd_lk_ctx_node_t *lk = NULL;
+ fd_lk_ctx_node_t *tmp = NULL;
+
+ GF_VALIDATE_OR_GOTO("fd-lk", lk_ctx, out);
+
+ list_for_each_entry_safe(lk, tmp, &lk_ctx->lk_list, next)
+ {
+ _fd_lk_delete_lock(lk);
+ _fd_lk_destroy_lock(lk);
+ }
+ ret = 0;
out:
- return ret;
+ return ret;
}
int
-fd_lk_ctx_unref (fd_lk_ctx_t *lk_ctx)
+fd_lk_ctx_unref(fd_lk_ctx_t *lk_ctx)
{
- int ref = -1;
+ int ref = -1;
- GF_VALIDATE_OR_GOTO ("fd-lk", lk_ctx, err);
+ GF_VALIDATE_OR_GOTO("fd-lk", lk_ctx, err);
- LOCK (&lk_ctx->lock);
- {
- ref = --lk_ctx->ref;
- if (ref < 0)
- GF_ASSERT (!ref);
- if (ref == 0)
- _fd_lk_destroy_lock_list (lk_ctx);
- }
- UNLOCK (&lk_ctx->lock);
+ ref = GF_ATOMIC_DEC(lk_ctx->ref);
+ if (ref < 0)
+ GF_ASSERT(!ref);
+ if (ref == 0)
+ _fd_lk_destroy_lock_list(lk_ctx);
- if (ref == 0) {
- LOCK_DESTROY (&lk_ctx->lock);
- GF_FREE (lk_ctx);
- }
+ if (ref == 0) {
+ LOCK_DESTROY(&lk_ctx->lock);
+ GF_FREE(lk_ctx);
+ }
- return 0;
+ return 0;
err:
- return -1;
-}
-
-fd_lk_ctx_t *
-_fd_lk_ctx_ref (fd_lk_ctx_t *lk_ctx)
-{
- if (!lk_ctx) {
- gf_log_callingfn ("fd", GF_LOG_WARNING,
- "invalid argument");
- return NULL;
- }
-
- ++lk_ctx->ref;
-
- return lk_ctx;
-}
-
-fd_lk_ctx_t *
-fd_lk_ctx_ref (fd_lk_ctx_t *lk_ctx)
-{
- fd_lk_ctx_t *new_lk_ctx = NULL;
-
- if (!lk_ctx) {
- gf_log_callingfn ("fd", GF_LOG_WARNING,
- "invalid argument");
- return NULL;
- }
-
- LOCK (&lk_ctx->lock);
- {
- new_lk_ctx = _fd_lk_ctx_ref (lk_ctx);
- }
- UNLOCK (&lk_ctx->lock);
-
- return new_lk_ctx;
+ return -1;
}
fd_lk_ctx_t *
-fd_lk_ctx_try_ref (fd_lk_ctx_t *lk_ctx)
+fd_lk_ctx_ref(fd_lk_ctx_t *lk_ctx)
{
- int ret = -1;
- fd_lk_ctx_t *new_lk_ctx = NULL;
-
- if (!lk_ctx) {
- goto out;
- }
+ if (!lk_ctx) {
+ gf_msg_callingfn("fd-lk", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "invalid argument");
+ return NULL;
+ }
- ret = TRY_LOCK (&lk_ctx->lock);
- if (ret)
- goto out;
+ GF_ATOMIC_INC(lk_ctx->ref);
- new_lk_ctx = _fd_lk_ctx_ref (lk_ctx);
- UNLOCK (&lk_ctx->lock);
-
-out:
- return new_lk_ctx;
+ return lk_ctx;
}
fd_lk_ctx_t *
-fd_lk_ctx_create ()
+fd_lk_ctx_create()
{
- fd_lk_ctx_t *fd_lk_ctx = NULL;
+ fd_lk_ctx_t *fd_lk_ctx = NULL;
- fd_lk_ctx = GF_CALLOC (1, sizeof (fd_lk_ctx_t),
- gf_common_mt_fd_lk_ctx_t);
- if (!fd_lk_ctx)
- goto out;
+ fd_lk_ctx = GF_CALLOC(1, sizeof(fd_lk_ctx_t), gf_common_mt_fd_lk_ctx_t);
+ if (!fd_lk_ctx)
+ goto out;
- INIT_LIST_HEAD (&fd_lk_ctx->lk_list);
+ INIT_LIST_HEAD(&fd_lk_ctx->lk_list);
- LOCK_INIT (&fd_lk_ctx->lock);
+ LOCK_INIT(&fd_lk_ctx->lock);
- fd_lk_ctx = fd_lk_ctx_ref (fd_lk_ctx);
+ fd_lk_ctx = fd_lk_ctx_ref(fd_lk_ctx);
out:
- return fd_lk_ctx;
+ return fd_lk_ctx;
}
int
-_fd_lk_insert_lock (fd_lk_ctx_t *lk_ctx,
- fd_lk_ctx_node_t *lock)
+_fd_lk_insert_lock(fd_lk_ctx_t *lk_ctx, fd_lk_ctx_node_t *lock)
{
- list_add_tail (&lock->next, &lk_ctx->lk_list);
- return 0;
+ list_add_tail(&lock->next, &lk_ctx->lk_list);
+ return 0;
}
static off_t
-_fd_lk_get_lock_len (off_t start, off_t end)
+_fd_lk_get_lock_len(off_t start, off_t end)
{
- if (end == LLONG_MAX)
- return 0;
- else
- return (end - start + 1);
+ if (end == LLONG_MAX)
+ return 0;
+ else
+ return (end - start + 1);
}
fd_lk_ctx_node_t *
-fd_lk_ctx_node_new (int32_t cmd, struct gf_flock *flock)
+fd_lk_ctx_node_new(int32_t cmd, struct gf_flock *flock)
{
- fd_lk_ctx_node_t *new_lock = NULL;
+ fd_lk_ctx_node_t *new_lock = NULL;
- /* TODO: get from mem-pool */
- new_lock = GF_CALLOC (1, sizeof (fd_lk_ctx_node_t),
- gf_common_mt_fd_lk_ctx_node_t);
- if (!new_lock)
- goto out;
+ /* TODO: get from mem-pool */
+ new_lock = GF_CALLOC(1, sizeof(fd_lk_ctx_node_t),
+ gf_common_mt_fd_lk_ctx_node_t);
+ if (!new_lock)
+ goto out;
- new_lock->cmd = cmd;
+ new_lock->cmd = cmd;
- if (flock) {
- new_lock->fl_type = flock->l_type;
- new_lock->fl_start = flock->l_start;
+ if (flock) {
+ new_lock->fl_type = flock->l_type;
+ new_lock->fl_start = flock->l_start;
- if (flock->l_len == 0)
- new_lock->fl_end = LLONG_MAX;
- else
- new_lock->fl_end = flock->l_start + flock->l_len - 1;
+ if (flock->l_len == 0)
+ new_lock->fl_end = LLONG_MAX;
+ else
+ new_lock->fl_end = flock->l_start + flock->l_len - 1;
- memcpy (&new_lock->user_flock, flock,
- sizeof (struct gf_flock));
- }
+ memcpy(&new_lock->user_flock, flock, sizeof(struct gf_flock));
+ }
- INIT_LIST_HEAD (&new_lock->next);
+ INIT_LIST_HEAD(&new_lock->next);
out:
- return new_lock;
+ return new_lock;
}
int32_t
-_fd_lk_delete_unlck_locks (fd_lk_ctx_t *lk_ctx)
+_fd_lk_delete_unlck_locks(fd_lk_ctx_t *lk_ctx)
{
- int32_t ret = -1;
- fd_lk_ctx_node_t *tmp = NULL;
- fd_lk_ctx_node_t *lk = NULL;
+ int32_t ret = -1;
+ fd_lk_ctx_node_t *tmp = NULL;
+ fd_lk_ctx_node_t *lk = NULL;
- GF_VALIDATE_OR_GOTO ("fd-lk", lk_ctx, out);
+ GF_VALIDATE_OR_GOTO("fd-lk", lk_ctx, out);
- list_for_each_entry_safe (lk, tmp, &lk_ctx->lk_list, next) {
- if (lk->fl_type == F_UNLCK) {
- _fd_lk_delete_lock (lk);
- _fd_lk_destroy_lock (lk);
- }
+ list_for_each_entry_safe(lk, tmp, &lk_ctx->lk_list, next)
+ {
+ if (lk->fl_type == F_UNLCK) {
+ _fd_lk_delete_lock(lk);
+ _fd_lk_destroy_lock(lk);
}
+ }
out:
- return ret;
+ return ret;
}
int
-fd_lk_overlap (fd_lk_ctx_node_t *l1,
- fd_lk_ctx_node_t *l2)
+fd_lk_overlap(fd_lk_ctx_node_t *l1, fd_lk_ctx_node_t *l2)
{
- if (l1->fl_end >= l2->fl_start &&
- l2->fl_end >= l1->fl_start)
- return 1;
+ if (l1->fl_end >= l2->fl_start && l2->fl_end >= l1->fl_start)
+ return 1;
- return 0;
+ return 0;
}
fd_lk_ctx_node_t *
-_fd_lk_add_locks (fd_lk_ctx_node_t *l1,
- fd_lk_ctx_node_t *l2)
+_fd_lk_add_locks(fd_lk_ctx_node_t *l1, fd_lk_ctx_node_t *l2)
{
- fd_lk_ctx_node_t *sum = NULL;
+ fd_lk_ctx_node_t *sum = NULL;
- sum = fd_lk_ctx_node_new (0, NULL);
- if (!sum)
- goto out;
+ sum = fd_lk_ctx_node_new(0, NULL);
+ if (!sum)
+ goto out;
- sum->fl_start = min (l1->fl_start, l2->fl_start);
- sum->fl_end = max (l1->fl_end, l2->fl_end);
+ sum->fl_start = min(l1->fl_start, l2->fl_start);
+ sum->fl_end = max(l1->fl_end, l2->fl_end);
- sum->user_flock.l_start = sum->fl_start;
- sum->user_flock.l_len = _fd_lk_get_lock_len (sum->fl_start,
- sum->fl_end);
+ sum->user_flock.l_start = sum->fl_start;
+ sum->user_flock.l_len = _fd_lk_get_lock_len(sum->fl_start, sum->fl_end);
out:
- return sum;
+ return sum;
}
/* Subtract two locks */
struct _values {
- fd_lk_ctx_node_t *locks[3];
+ fd_lk_ctx_node_t *locks[3];
};
int32_t
-_fd_lk_sub_locks (struct _values *v,
- fd_lk_ctx_node_t *big,
- fd_lk_ctx_node_t *small)
+_fd_lk_sub_locks(struct _values *v, fd_lk_ctx_node_t *big,
+ fd_lk_ctx_node_t *small)
{
- int32_t ret = -1;
-
- if ((big->fl_start == small->fl_start) &&
- (big->fl_end == small->fl_end)) {
- /* both edges coincide with big */
- v->locks[0] = fd_lk_ctx_node_new (small->cmd, NULL);
- if (!v->locks[0])
- goto out;
-
- memcpy (v->locks[0], big, sizeof (fd_lk_ctx_node_t));
-
- v->locks[0]->fl_type = small->fl_type;
- v->locks[0]->user_flock.l_type = small->fl_type;
- } else if ((small->fl_start > big->fl_start) &&
- (small->fl_end < big->fl_end)) {
- /* small lock is completely inside big lock,
- break it down into 3 different locks. */
- v->locks[0] = fd_lk_ctx_node_new (big->cmd, NULL);
- if (!v->locks[0])
- goto out;
-
- v->locks[1] = fd_lk_ctx_node_new (small->cmd, NULL);
- if (!v->locks[1])
- goto out;
-
- v->locks[2] = fd_lk_ctx_node_new (big->cmd, NULL);
- if (!v->locks[2])
- goto out;
-
- memcpy (v->locks[0], big, sizeof (fd_lk_ctx_node_t));
- v->locks[0]->fl_end = small->fl_start - 1;
- v->locks[0]->user_flock.l_len =
- _fd_lk_get_lock_len (v->locks[0]->fl_start,
- v->locks[0]->fl_end);
-
- memcpy (v->locks[1], small, sizeof (fd_lk_ctx_node_t));
-
- memcpy (v->locks[2], big, sizeof (fd_lk_ctx_node_t));
- v->locks[2]->fl_start = small->fl_end + 1;
- v->locks[2]->user_flock.l_len =
- _fd_lk_get_lock_len (v->locks[2]->fl_start,
- v->locks[2]->fl_end);
- } else if (small->fl_start == big->fl_start) {
- /* One of the ends co-incide, break the
- locks into two seperate parts */
- v->locks[0] = fd_lk_ctx_node_new (small->cmd, NULL);
- if (!v->locks[0])
- goto out;
-
- v->locks[1] = fd_lk_ctx_node_new (big->cmd, NULL);
- if (!v->locks[1])
- goto out;
-
- memcpy (v->locks[0], small, sizeof (fd_lk_ctx_node_t));
-
- memcpy (v->locks[1], big, sizeof (fd_lk_ctx_node_t));
- v->locks[1]->fl_start = small->fl_end + 1;
- v->locks[1]->user_flock.l_start = small->fl_end + 1;
- } else if (small->fl_end == big->fl_end) {
- /* One of the ends co-incide, break the
- locks into two seperate parts */
- v->locks[0] = fd_lk_ctx_node_new (small->cmd, NULL);
- if (!v->locks[0])
- goto out;
-
- v->locks[1] = fd_lk_ctx_node_new (big->cmd, NULL);
- if (!v->locks[1])
- goto out;
-
- memcpy (v->locks[0], big, sizeof (fd_lk_ctx_node_t));
- v->locks[0]->fl_end = small->fl_start - 1;
- v->locks[0]->user_flock.l_len =
- _fd_lk_get_lock_len (v->locks[0]->fl_start,
- v->locks[0]->fl_end);
-
- memcpy (v->locks[1], small, sizeof (fd_lk_ctx_node_t));
- } else {
- /* We should never come to this case */
- GF_ASSERT (!"Invalid case");
- }
- ret = 0;
+ int32_t ret = -1;
+
+ if ((big->fl_start == small->fl_start) && (big->fl_end == small->fl_end)) {
+ /* both edges coincide with big */
+ v->locks[0] = fd_lk_ctx_node_new(small->cmd, NULL);
+ if (!v->locks[0])
+ goto out;
+
+ memcpy(v->locks[0], big, sizeof(fd_lk_ctx_node_t));
+
+ v->locks[0]->fl_type = small->fl_type;
+ v->locks[0]->user_flock.l_type = small->fl_type;
+ } else if ((small->fl_start > big->fl_start) &&
+ (small->fl_end < big->fl_end)) {
+ /* small lock is completely inside big lock,
+ break it down into 3 different locks. */
+ v->locks[0] = fd_lk_ctx_node_new(big->cmd, NULL);
+ if (!v->locks[0])
+ goto out;
+
+ v->locks[1] = fd_lk_ctx_node_new(small->cmd, NULL);
+ if (!v->locks[1])
+ goto out;
+
+ v->locks[2] = fd_lk_ctx_node_new(big->cmd, NULL);
+ if (!v->locks[2])
+ goto out;
+
+ memcpy(v->locks[0], big, sizeof(fd_lk_ctx_node_t));
+ v->locks[0]->fl_end = small->fl_start - 1;
+ v->locks[0]->user_flock.l_len = _fd_lk_get_lock_len(
+ v->locks[0]->fl_start, v->locks[0]->fl_end);
+
+ memcpy(v->locks[1], small, sizeof(fd_lk_ctx_node_t));
+
+ memcpy(v->locks[2], big, sizeof(fd_lk_ctx_node_t));
+ v->locks[2]->fl_start = small->fl_end + 1;
+ v->locks[2]->user_flock.l_len = _fd_lk_get_lock_len(
+ v->locks[2]->fl_start, v->locks[2]->fl_end);
+ } else if (small->fl_start == big->fl_start) {
+ /* One of the ends co-incide, break the
+ locks into two separate parts */
+ v->locks[0] = fd_lk_ctx_node_new(small->cmd, NULL);
+ if (!v->locks[0])
+ goto out;
+
+ v->locks[1] = fd_lk_ctx_node_new(big->cmd, NULL);
+ if (!v->locks[1])
+ goto out;
+
+ memcpy(v->locks[0], small, sizeof(fd_lk_ctx_node_t));
+
+ memcpy(v->locks[1], big, sizeof(fd_lk_ctx_node_t));
+ v->locks[1]->fl_start = small->fl_end + 1;
+ v->locks[1]->user_flock.l_start = small->fl_end + 1;
+ } else if (small->fl_end == big->fl_end) {
+ /* One of the ends co-incide, break the
+ locks into two separate parts */
+ v->locks[0] = fd_lk_ctx_node_new(small->cmd, NULL);
+ if (!v->locks[0])
+ goto out;
+
+ v->locks[1] = fd_lk_ctx_node_new(big->cmd, NULL);
+ if (!v->locks[1])
+ goto out;
+
+ memcpy(v->locks[0], big, sizeof(fd_lk_ctx_node_t));
+ v->locks[0]->fl_end = small->fl_start - 1;
+ v->locks[0]->user_flock.l_len = _fd_lk_get_lock_len(
+ v->locks[0]->fl_start, v->locks[0]->fl_end);
+
+ memcpy(v->locks[1], small, sizeof(fd_lk_ctx_node_t));
+ } else {
+ /* We should never come to this case */
+ GF_ASSERT(!"Invalid case");
+ }
+ ret = 0;
out:
- return ret;
+ return ret;
}
static void
-_fd_lk_insert_and_merge (fd_lk_ctx_t *lk_ctx,
- fd_lk_ctx_node_t *lock)
+_fd_lk_insert_and_merge(fd_lk_ctx_t *lk_ctx, fd_lk_ctx_node_t *lock)
{
- int32_t ret = -1;
- int32_t i = 0;
- fd_lk_ctx_node_t *entry = NULL;
- fd_lk_ctx_node_t *t = NULL;
- fd_lk_ctx_node_t *sum = NULL;
- struct _values v = {.locks = {0, 0, 0 }};
-
- list_for_each_entry_safe (entry, t, &lk_ctx->lk_list, next) {
- if (!fd_lk_overlap (entry, lock))
- continue;
-
- if (entry->fl_type == lock->fl_type) {
- sum = _fd_lk_add_locks (entry, lock);
- if (!sum)
- return;
- sum->fl_type = entry->fl_type;
- sum->user_flock.l_type = entry->fl_type;
- _fd_lk_delete_lock (entry);
- _fd_lk_destroy_lock (entry);
- _fd_lk_destroy_lock (lock);
- _fd_lk_insert_and_merge (lk_ctx, sum);
- return;
- } else {
- sum = _fd_lk_add_locks (entry, lock);
- sum->fl_type = entry->fl_type;
- sum->user_flock.l_type = entry->fl_type;
- ret = _fd_lk_sub_locks (&v, sum, lock);
- if (ret)
- return;
- _fd_lk_delete_lock (entry);
- _fd_lk_destroy_lock (entry);
-
- _fd_lk_delete_lock (lock);
- _fd_lk_destroy_lock (lock);
-
- for (i = 0; i < 3; i++) {
- if (!v.locks[i])
- continue;
-
- INIT_LIST_HEAD (&v.locks[i]->next);
- _fd_lk_insert_and_merge (lk_ctx, v.locks[i]);
- }
- _fd_lk_delete_unlck_locks (lk_ctx);
- return;
- }
- }
-
- /* no conflicts, so just insert */
- if (lock->fl_type != F_UNLCK) {
- _fd_lk_insert_lock (lk_ctx, lock);
+ int32_t ret = -1;
+ int32_t i = 0;
+ fd_lk_ctx_node_t *entry = NULL;
+ fd_lk_ctx_node_t *t = NULL;
+ fd_lk_ctx_node_t *sum = NULL;
+ struct _values v = {.locks = {0, 0, 0}};
+
+ list_for_each_entry_safe(entry, t, &lk_ctx->lk_list, next)
+ {
+ if (!fd_lk_overlap(entry, lock))
+ continue;
+
+ if (entry->fl_type == lock->fl_type) {
+ sum = _fd_lk_add_locks(entry, lock);
+ if (!sum)
+ return;
+ sum->fl_type = entry->fl_type;
+ sum->user_flock.l_type = entry->fl_type;
+ _fd_lk_delete_lock(entry);
+ _fd_lk_destroy_lock(entry);
+ _fd_lk_destroy_lock(lock);
+ _fd_lk_insert_and_merge(lk_ctx, sum);
+ return;
} else {
- _fd_lk_destroy_lock_list (lk_ctx);
+ sum = _fd_lk_add_locks(entry, lock);
+ sum->fl_type = lock->fl_type;
+ sum->user_flock.l_type = lock->fl_type;
+ ret = _fd_lk_sub_locks(&v, sum, lock);
+ if (ret)
+ return;
+ _fd_lk_delete_lock(entry);
+ _fd_lk_destroy_lock(entry);
+
+ _fd_lk_delete_lock(lock);
+ _fd_lk_destroy_lock(lock);
+
+ _fd_lk_destroy_lock(sum);
+
+ for (i = 0; i < 3; i++) {
+ if (!v.locks[i])
+ continue;
+
+ INIT_LIST_HEAD(&v.locks[i]->next);
+ _fd_lk_insert_and_merge(lk_ctx, v.locks[i]);
+ }
+ _fd_lk_delete_unlck_locks(lk_ctx);
+ return;
}
+ }
+
+ /* no conflicts, so just insert */
+ if (lock->fl_type != F_UNLCK) {
+ _fd_lk_insert_lock(lk_ctx, lock);
+ } else {
+ _fd_lk_destroy_lock(lock);
+ }
}
static void
-print_lock_list (fd_lk_ctx_t *lk_ctx)
+print_lock_list(fd_lk_ctx_t *lk_ctx)
{
- fd_lk_ctx_node_t *lk = NULL;
-
- gf_log ("fd-lk", GF_LOG_DEBUG, "lock list:");
-
- list_for_each_entry (lk, &lk_ctx->lk_list, next)
- gf_log ("fd-lk", GF_LOG_DEBUG, "owner = %s, "
- "cmd = %s fl_type = %s, fs_start = %"PRId64", "
- "fs_end = %"PRId64", user_flock: l_type = %s, "
- "l_start = %"PRId64", l_len = %"PRId64", ",
- lkowner_utoa (&lk->user_flock.l_owner),
- get_lk_cmd (lk->cmd), get_lk_type (lk->fl_type),
- lk->fl_start, lk->fl_end,
- get_lk_type (lk->user_flock.l_type),
- lk->user_flock.l_start,
- lk->user_flock.l_len);
+ fd_lk_ctx_node_t *lk = NULL;
+
+ gf_msg_debug("fd-lk", 0, "lock list:");
+
+ list_for_each_entry(lk, &lk_ctx->lk_list, next)
+ gf_msg_debug("fd-lk", 0,
+ "owner = %s, cmd = %s fl_type = %s,"
+ " fs_start = %" PRId64 ", fs_end = %" PRId64
+ ", "
+ "user_flock: l_type = %s, l_start = %" PRId64
+ ", "
+ "l_len = %" PRId64 ", ",
+ lkowner_utoa(&lk->user_flock.l_owner), get_lk_cmd(lk->cmd),
+ get_lk_type(lk->fl_type), lk->fl_start, lk->fl_end,
+ get_lk_type(lk->user_flock.l_type), lk->user_flock.l_start,
+ lk->user_flock.l_len);
}
int
-fd_lk_insert_and_merge (fd_t *fd, int32_t cmd,
- struct gf_flock *flock)
+fd_lk_insert_and_merge(fd_t *fd, int32_t cmd, struct gf_flock *flock)
{
- int32_t ret = -1;
- fd_lk_ctx_t *lk_ctx = NULL;
- fd_lk_ctx_node_t *lk = NULL;
-
- GF_VALIDATE_OR_GOTO ("fd-lk", fd, out);
- GF_VALIDATE_OR_GOTO ("fd-lk", flock, out);
-
- lk_ctx = fd_lk_ctx_ref (fd->lk_ctx);
- lk = fd_lk_ctx_node_new (cmd, flock);
-
- gf_log ("fd-lk", GF_LOG_DEBUG,
- "new lock requrest: owner = %s, fl_type = %s, "
- "fs_start = %"PRId64", fs_end = %"PRId64", "
- "user_flock: l_type = %s, l_start = %"PRId64", "
- "l_len = %"PRId64, lkowner_utoa (&flock->l_owner),
- get_lk_type (lk->fl_type), lk->fl_start,
- lk->fl_end, get_lk_type (lk->user_flock.l_type),
- lk->user_flock.l_start,
- lk->user_flock.l_len);
-
- LOCK (&lk_ctx->lock);
- {
- _fd_lk_insert_and_merge (lk_ctx, lk);
- print_lock_list (lk_ctx);
- }
- UNLOCK (&lk_ctx->lock);
+ int32_t ret = -1;
+ fd_lk_ctx_t *lk_ctx = NULL;
+ fd_lk_ctx_node_t *lk = NULL;
+
+ GF_VALIDATE_OR_GOTO("fd-lk", fd, out);
+ GF_VALIDATE_OR_GOTO("fd-lk", flock, out);
+
+ lk_ctx = fd_lk_ctx_ref(fd->lk_ctx);
+ lk = fd_lk_ctx_node_new(cmd, flock);
+
+ gf_msg_debug("fd-lk", 0,
+ "new lock request: owner = %s, fl_type = %s"
+ ", fs_start = %" PRId64 ", fs_end = %" PRId64
+ ", user_flock:"
+ " l_type = %s, l_start = %" PRId64 ", l_len = %" PRId64,
+ lkowner_utoa(&flock->l_owner), get_lk_type(lk->fl_type),
+ lk->fl_start, lk->fl_end, get_lk_type(lk->user_flock.l_type),
+ lk->user_flock.l_start, lk->user_flock.l_len);
+
+ LOCK(&lk_ctx->lock);
+ {
+ _fd_lk_insert_and_merge(lk_ctx, lk);
+ print_lock_list(lk_ctx);
+ }
+ UNLOCK(&lk_ctx->lock);
+
+ fd_lk_ctx_unref(lk_ctx);
+
+ ret = 0;
+out:
+ return ret;
+}
- fd_lk_ctx_unref (lk_ctx);
+gf_boolean_t
+fd_lk_ctx_empty(fd_lk_ctx_t *lk_ctx)
+{
+ gf_boolean_t verdict = _gf_true;
- ret = 0;
-out:
- return ret;
+ if (!lk_ctx)
+ return _gf_true;
+
+ LOCK(&lk_ctx->lock);
+ {
+ verdict = list_empty(&lk_ctx->lk_list);
+ }
+ UNLOCK(&lk_ctx->lock);
+
+ return verdict;
}
diff --git a/libglusterfs/src/fd-lk.h b/libglusterfs/src/fd-lk.h
deleted file mode 100644
index bdea8c2a007..00000000000
--- a/libglusterfs/src/fd-lk.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _FD_LK_H
-#define _FD_LK_H
-
-#include "fd.h"
-#include "locking.h"
-#include "list.h"
-#include "logging.h"
-#include "mem-pool.h"
-#include "mem-types.h"
-#include "glusterfs.h"
-
-#define get_lk_type(type) \
- type == F_UNLCK ? "F_UNLCK" : (type == F_RDLCK ? "F_RDLCK" : "F_WRLCK")
-
-#define get_lk_cmd(cmd) \
- cmd == F_SETLKW ? "F_SETLKW" : (cmd == F_SETLK ? "F_SETLK" : "F_GETLK")
-
-struct _fd;
-
-struct fd_lk_ctx {
- struct list_head lk_list;
- int ref;
- gf_lock_t lock;
-};
-typedef struct fd_lk_ctx fd_lk_ctx_t;
-
-struct fd_lk_ctx_node {
- int32_t cmd;
- struct gf_flock user_flock;
- off_t fl_start;
- off_t fl_end;
- short fl_type;
- struct list_head next;
-};
-typedef struct fd_lk_ctx_node fd_lk_ctx_node_t;
-
-fd_lk_ctx_t *
-_fd_lk_ctx_ref (fd_lk_ctx_t *lk_ctx);
-
-fd_lk_ctx_t *
-fd_lk_ctx_ref (fd_lk_ctx_t *lk_ctx);
-
-fd_lk_ctx_t *
-fd_lk_ctx_try_ref (fd_lk_ctx_t *lk_ctx);
-
-fd_lk_ctx_t *
-fd_lk_ctx_create ();
-
-int
-fd_lk_insert_and_merge (struct _fd *lk_ctx, int32_t cmd,
- struct gf_flock *flock);
-
-int
-fd_lk_ctx_unref (fd_lk_ctx_t *lk_ctx);
-
-#endif /* _FD_LK_H */
diff --git a/libglusterfs/src/fd.c b/libglusterfs/src/fd.c
index e90683ce0cd..62606e91164 100644
--- a/libglusterfs/src/fd.c
+++ b/libglusterfs/src/fd.c
@@ -8,1181 +8,1196 @@
cases as published by the Free Software Foundation.
*/
-#include "fd.h"
-#include "glusterfs.h"
-#include "inode.h"
-#include "dict.h"
-#include "statedump.h"
-
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
+#include "glusterfs/fd.h"
+#include <errno.h> // for EINVAL, errno, ENOMEM
+#include <inttypes.h> // for PRIu64
+#include <stdint.h> // for UINT32_MAX
+#include <string.h> // for NULL, memcpy, memset, size_t
+#include "glusterfs/statedump.h"
static int
-gf_fd_fdtable_expand (fdtable_t *fdtable, uint32_t nr);
-
+gf_fd_fdtable_expand(fdtable_t *fdtable, uint32_t nr);
fd_t *
-__fd_ref (fd_t *fd);
+__fd_ref(fd_t *fd);
static int
-gf_fd_chain_fd_entries (fdentry_t *entries, uint32_t startidx,
- uint32_t endcount)
+gf_fd_chain_fd_entries(fdentry_t *entries, uint32_t startidx, uint32_t endcount)
{
- uint32_t i = 0;
+ uint32_t i = 0;
- if (!entries) {
- gf_log_callingfn ("fd", GF_LOG_WARNING, "!entries");
- return -1;
- }
+ if (!entries) {
+ gf_msg_callingfn("fd", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "!entries");
+ return -1;
+ }
- /* Chain only till the second to last entry because we want to
- * ensure that the last entry has GF_FDTABLE_END.
- */
- for (i = startidx; i < (endcount - 1); i++)
- entries[i].next_free = i + 1;
+ /* Chain only till the second to last entry because we want to
+ * ensure that the last entry has GF_FDTABLE_END.
+ */
+ for (i = startidx; i < (endcount - 1); i++)
+ entries[i].next_free = i + 1;
- /* i has already been incremented upto the last entry. */
- entries[i].next_free = GF_FDTABLE_END;
+ /* i has already been incremented up to the last entry. */
+ entries[i].next_free = GF_FDTABLE_END;
- return 0;
+ return 0;
}
-
static int
-gf_fd_fdtable_expand (fdtable_t *fdtable, uint32_t nr)
+gf_fd_fdtable_expand(fdtable_t *fdtable, uint32_t nr)
{
- fdentry_t *oldfds = NULL;
- uint32_t oldmax_fds = -1;
- int ret = -1;
-
- if (fdtable == NULL || nr < 0) {
- gf_log_callingfn ("fd", GF_LOG_ERROR, "invalid argument");
- ret = EINVAL;
- goto out;
- }
-
- nr /= (1024 / sizeof (fdentry_t));
- nr = gf_roundup_next_power_of_two (nr + 1);
- nr *= (1024 / sizeof (fdentry_t));
-
- oldfds = fdtable->fdentries;
- oldmax_fds = fdtable->max_fds;
-
- fdtable->fdentries = GF_CALLOC (nr, sizeof (fdentry_t),
- gf_common_mt_fdentry_t);
- if (!fdtable->fdentries) {
- ret = ENOMEM;
- goto out;
- }
- fdtable->max_fds = nr;
-
- if (oldfds) {
- uint32_t cpy = oldmax_fds * sizeof (fdentry_t);
- memcpy (fdtable->fdentries, oldfds, cpy);
- }
-
- gf_fd_chain_fd_entries (fdtable->fdentries, oldmax_fds,
- fdtable->max_fds);
-
- /* Now that expansion is done, we must update the fd list
- * head pointer so that the fd allocation functions can continue
- * using the expanded table.
- */
- fdtable->first_free = oldmax_fds;
- GF_FREE (oldfds);
- ret = 0;
+ fdentry_t *oldfds = NULL;
+ uint32_t oldmax_fds = -1;
+ int ret = -1;
+
+ if (fdtable == NULL || nr > UINT32_MAX) {
+ gf_msg_callingfn("fd", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "invalid argument");
+ ret = EINVAL;
+ goto out;
+ }
+
+ nr /= (1024 / sizeof(fdentry_t));
+ nr = gf_roundup_next_power_of_two(nr + 1);
+ nr *= (1024 / sizeof(fdentry_t));
+
+ oldfds = fdtable->fdentries;
+ oldmax_fds = fdtable->max_fds;
+
+ fdtable->fdentries = GF_CALLOC(nr, sizeof(fdentry_t),
+ gf_common_mt_fdentry_t);
+ if (!fdtable->fdentries) {
+ ret = ENOMEM;
+ goto out;
+ }
+ fdtable->max_fds = nr;
+
+ if (oldfds) {
+ uint32_t cpy = oldmax_fds * sizeof(fdentry_t);
+ memcpy(fdtable->fdentries, oldfds, cpy);
+ }
+
+ gf_fd_chain_fd_entries(fdtable->fdentries, oldmax_fds, fdtable->max_fds);
+
+ /* Now that expansion is done, we must update the fd list
+ * head pointer so that the fd allocation functions can continue
+ * using the expanded table.
+ */
+ fdtable->first_free = oldmax_fds;
+ GF_FREE(oldfds);
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
fdtable_t *
-gf_fd_fdtable_alloc (void)
+gf_fd_fdtable_alloc(void)
{
- fdtable_t *fdtable = NULL;
+ fdtable_t *fdtable = NULL;
- fdtable = GF_CALLOC (1, sizeof (*fdtable), gf_common_mt_fdtable_t);
- if (!fdtable)
- return NULL;
+ fdtable = GF_CALLOC(1, sizeof(*fdtable), gf_common_mt_fdtable_t);
+ if (!fdtable)
+ return NULL;
- pthread_mutex_init (&fdtable->lock, NULL);
+ pthread_rwlock_init(&fdtable->lock, NULL);
- pthread_mutex_lock (&fdtable->lock);
- {
- gf_fd_fdtable_expand (fdtable, 0);
- }
- pthread_mutex_unlock (&fdtable->lock);
+ pthread_rwlock_wrlock(&fdtable->lock);
+ {
+ gf_fd_fdtable_expand(fdtable, 0);
+ }
+ pthread_rwlock_unlock(&fdtable->lock);
- return fdtable;
+ return fdtable;
}
-
static fdentry_t *
-__gf_fd_fdtable_get_all_fds (fdtable_t *fdtable, uint32_t *count)
+__gf_fd_fdtable_get_all_fds(fdtable_t *fdtable, uint32_t *count)
{
- fdentry_t *fdentries = NULL;
+ fdentry_t *fdentries = NULL;
- if (count == NULL) {
- gf_log_callingfn ("fd", GF_LOG_WARNING, "!count");
- goto out;
- }
+ if (count == NULL) {
+ gf_msg_callingfn("fd", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "!count");
+ goto out;
+ }
- fdentries = fdtable->fdentries;
- fdtable->fdentries = GF_CALLOC (fdtable->max_fds, sizeof (fdentry_t),
- gf_common_mt_fdentry_t);
- gf_fd_chain_fd_entries (fdtable->fdentries, 0, fdtable->max_fds);
- *count = fdtable->max_fds;
+ fdentries = fdtable->fdentries;
+ fdtable->fdentries = GF_CALLOC(fdtable->max_fds, sizeof(fdentry_t),
+ gf_common_mt_fdentry_t);
+ gf_fd_chain_fd_entries(fdtable->fdentries, 0, fdtable->max_fds);
+ *count = fdtable->max_fds;
out:
- return fdentries;
+ return fdentries;
}
-
fdentry_t *
-gf_fd_fdtable_get_all_fds (fdtable_t *fdtable, uint32_t *count)
+gf_fd_fdtable_get_all_fds(fdtable_t *fdtable, uint32_t *count)
{
- fdentry_t *entries = NULL;
+ fdentry_t *entries = NULL;
- if (fdtable) {
- pthread_mutex_lock (&fdtable->lock);
- {
- entries = __gf_fd_fdtable_get_all_fds (fdtable, count);
- }
- pthread_mutex_unlock (&fdtable->lock);
+ if (fdtable) {
+ pthread_rwlock_wrlock(&fdtable->lock);
+ {
+ entries = __gf_fd_fdtable_get_all_fds(fdtable, count);
}
+ pthread_rwlock_unlock(&fdtable->lock);
+ }
- return entries;
+ return entries;
}
-
static fdentry_t *
-__gf_fd_fdtable_copy_all_fds (fdtable_t *fdtable, uint32_t *count)
+__gf_fd_fdtable_copy_all_fds(fdtable_t *fdtable, uint32_t *count)
{
- fdentry_t *fdentries = NULL;
- int i = 0;
+ fdentry_t *fdentries = NULL;
+ int i = 0;
- if (count == NULL) {
- gf_log_callingfn ("fd", GF_LOG_WARNING, "!count");
- goto out;
- }
+ if (count == NULL) {
+ gf_msg_callingfn("fd", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "!count");
+ goto out;
+ }
- fdentries = GF_CALLOC (fdtable->max_fds, sizeof (fdentry_t),
- gf_common_mt_fdentry_t);
- if (fdentries == NULL) {
- goto out;
- }
+ fdentries = GF_CALLOC(fdtable->max_fds, sizeof(fdentry_t),
+ gf_common_mt_fdentry_t);
+ if (fdentries == NULL) {
+ goto out;
+ }
- *count = fdtable->max_fds;
+ *count = fdtable->max_fds;
- for (i = 0; i < fdtable->max_fds; i++) {
- if (fdtable->fdentries[i].fd != NULL) {
- fdentries[i].fd = fd_ref (fdtable->fdentries[i].fd);
- }
+ for (i = 0; i < fdtable->max_fds; i++) {
+ if (fdtable->fdentries[i].fd != NULL) {
+ fdentries[i].fd = fd_ref(fdtable->fdentries[i].fd);
}
+ }
out:
- return fdentries;
+ return fdentries;
}
-
fdentry_t *
-gf_fd_fdtable_copy_all_fds (fdtable_t *fdtable, uint32_t *count)
+gf_fd_fdtable_copy_all_fds(fdtable_t *fdtable, uint32_t *count)
{
- fdentry_t *entries = NULL;
+ fdentry_t *entries = NULL;
- if (fdtable) {
- pthread_mutex_lock (&fdtable->lock);
- {
- entries = __gf_fd_fdtable_copy_all_fds (fdtable, count);
- }
- pthread_mutex_unlock (&fdtable->lock);
+ if (fdtable) {
+ pthread_rwlock_rdlock(&fdtable->lock);
+ {
+ entries = __gf_fd_fdtable_copy_all_fds(fdtable, count);
}
+ pthread_rwlock_unlock(&fdtable->lock);
+ }
- return entries;
+ return entries;
}
-
void
-gf_fd_fdtable_destroy (fdtable_t *fdtable)
+gf_fd_fdtable_destroy(fdtable_t *fdtable)
{
- struct list_head list = {0, };
- fd_t *fd = NULL;
- fdentry_t *fdentries = NULL;
- uint32_t fd_count = 0;
- int32_t i = 0;
-
- INIT_LIST_HEAD (&list);
-
- if (!fdtable) {
- gf_log_callingfn ("fd", GF_LOG_WARNING, "!fdtable");
- return;
- }
-
- pthread_mutex_lock (&fdtable->lock);
- {
- fdentries = __gf_fd_fdtable_get_all_fds (fdtable, &fd_count);
- GF_FREE (fdtable->fdentries);
- }
- pthread_mutex_unlock (&fdtable->lock);
-
- if (fdentries != NULL) {
- for (i = 0; i < fd_count; i++) {
- fd = fdentries[i].fd;
- if (fd != NULL) {
- fd_unref (fd);
- }
- }
-
- GF_FREE (fdentries);
- pthread_mutex_destroy (&fdtable->lock);
- GF_FREE (fdtable);
- }
+ struct list_head list = {
+ 0,
+ };
+ fd_t *fd = NULL;
+ fdentry_t *fdentries = NULL;
+ uint32_t fd_count = 0;
+ int32_t i = 0;
+
+ INIT_LIST_HEAD(&list);
+
+ if (!fdtable) {
+ gf_msg_callingfn("fd", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "!fdtable");
+ return;
+ }
+
+ pthread_rwlock_wrlock(&fdtable->lock);
+ {
+ fdentries = __gf_fd_fdtable_get_all_fds(fdtable, &fd_count);
+ GF_FREE(fdtable->fdentries);
+ }
+ pthread_rwlock_unlock(&fdtable->lock);
+
+ if (fdentries != NULL) {
+ for (i = 0; i < fd_count; i++) {
+ fd = fdentries[i].fd;
+ if (fd != NULL) {
+ fd_unref(fd);
+ }
+ }
+
+ GF_FREE(fdentries);
+ pthread_rwlock_destroy(&fdtable->lock);
+ GF_FREE(fdtable);
+ }
}
-
int
-gf_fd_unused_get (fdtable_t *fdtable, fd_t *fdptr)
+gf_fd_unused_get(fdtable_t *fdtable, fd_t *fdptr)
{
- int32_t fd = -1;
- fdentry_t *fde = NULL;
- int error;
- int alloc_attempts = 0;
-
- if (fdtable == NULL || fdptr == NULL) {
- gf_log_callingfn ("fd", GF_LOG_ERROR, "invalid argument");
- return EINVAL;
- }
-
- pthread_mutex_lock (&fdtable->lock);
- {
- fd_alloc_try_again:
- if (fdtable->first_free != GF_FDTABLE_END) {
- fde = &fdtable->fdentries[fdtable->first_free];
- fd = fdtable->first_free;
- fdtable->first_free = fde->next_free;
- fde->next_free = GF_FDENTRY_ALLOCATED;
- fde->fd = fdptr;
- } else {
- /* If this is true, there is something
- * seriously wrong with our data structures.
- */
- if (alloc_attempts >= 2) {
- gf_log ("fd", GF_LOG_ERROR,
- "multiple attempts to expand fd table"
- " have failed.");
- goto out;
- }
- error = gf_fd_fdtable_expand (fdtable,
- fdtable->max_fds + 1);
- if (error) {
- gf_log ("fd", GF_LOG_ERROR,
- "Cannot expand fdtable: %s",
- strerror (error));
- goto out;
- }
- ++alloc_attempts;
- /* At this point, the table stands expanded
- * with the first_free referring to the first
- * free entry in the new set of fdentries that
- * have just been allocated. That means, the
- * above logic should just work.
- */
- goto fd_alloc_try_again;
- }
- }
+ int32_t fd = -1;
+ fdentry_t *fde = NULL;
+ int error;
+ int alloc_attempts = 0;
+
+ if (fdtable == NULL || fdptr == NULL) {
+ gf_msg_callingfn("fd", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "invalid argument");
+ return EINVAL;
+ }
+
+ pthread_rwlock_wrlock(&fdtable->lock);
+ {
+ fd_alloc_try_again:
+ if (fdtable->first_free != GF_FDTABLE_END) {
+ fde = &fdtable->fdentries[fdtable->first_free];
+ fd = fdtable->first_free;
+ fdtable->first_free = fde->next_free;
+ fde->next_free = GF_FDENTRY_ALLOCATED;
+ fde->fd = fdptr;
+ } else {
+ /* If this is true, there is something
+ * seriously wrong with our data structures.
+ */
+ if (alloc_attempts >= 2) {
+ gf_msg("fd", GF_LOG_ERROR, 0, LG_MSG_EXPAND_FD_TABLE_FAILED,
+ "multiple attempts to expand fd table"
+ " have failed.");
+ goto out;
+ }
+ error = gf_fd_fdtable_expand(fdtable, fdtable->max_fds + 1);
+ if (error) {
+ gf_msg("fd", GF_LOG_ERROR, error, LG_MSG_EXPAND_FD_TABLE_FAILED,
+ "Cannot expand fdtable");
+ goto out;
+ }
+ ++alloc_attempts;
+ /* At this point, the table stands expanded
+ * with the first_free referring to the first
+ * free entry in the new set of fdentries that
+ * have just been allocated. That means, the
+ * above logic should just work.
+ */
+ goto fd_alloc_try_again;
+ }
+ }
out:
- pthread_mutex_unlock (&fdtable->lock);
+ pthread_rwlock_unlock(&fdtable->lock);
- return fd;
+ return fd;
}
-
-inline void
-gf_fd_put (fdtable_t *fdtable, int32_t fd)
+void
+gf_fd_put(fdtable_t *fdtable, int32_t fd)
{
- fd_t *fdptr = NULL;
- fdentry_t *fde = NULL;
+ fd_t *fdptr = NULL;
+ fdentry_t *fde = NULL;
- if (fd == -2)
- /* anonymous fd */
- return;
-
- if (fdtable == NULL || fd < 0) {
- gf_log_callingfn ("fd", GF_LOG_ERROR, "invalid argument");
- return;
- }
+ if (fd == GF_ANON_FD_NO)
+ return;
- if (!(fd < fdtable->max_fds)) {
- gf_log_callingfn ("fd", GF_LOG_ERROR, "invalid argument");
- return;
- }
+ if (fdtable == NULL || fd < 0) {
+ gf_msg_callingfn("fd", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "invalid argument");
+ return;
+ }
- pthread_mutex_lock (&fdtable->lock);
- {
- fde = &fdtable->fdentries[fd];
- /* If the entry is not allocated, put operation must return
- * without doing anything.
- * This has the potential of masking out any bugs in a user of
- * fd that ends up calling gf_fd_put twice for the same fd or
- * for an unallocated fd, but it is a price we have to pay for
- * ensuring sanity of our fd-table.
- */
- if (fde->next_free != GF_FDENTRY_ALLOCATED)
- goto unlock_out;
- fdptr = fde->fd;
- fde->fd = NULL;
- fde->next_free = fdtable->first_free;
- fdtable->first_free = fd;
- }
+ if (!(fd < fdtable->max_fds)) {
+ gf_msg_callingfn("fd", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "invalid argument");
+ return;
+ }
+
+ pthread_rwlock_wrlock(&fdtable->lock);
+ {
+ fde = &fdtable->fdentries[fd];
+ /* If the entry is not allocated, put operation must return
+ * without doing anything.
+ * This has the potential of masking out any bugs in a user of
+ * fd that ends up calling gf_fd_put twice for the same fd or
+ * for an unallocated fd, but it is a price we have to pay for
+ * ensuring sanity of our fd-table.
+ */
+ if (fde->next_free != GF_FDENTRY_ALLOCATED)
+ goto unlock_out;
+ fdptr = fde->fd;
+ fde->fd = NULL;
+ fde->next_free = fdtable->first_free;
+ fdtable->first_free = fd;
+ }
unlock_out:
- pthread_mutex_unlock (&fdtable->lock);
+ pthread_rwlock_unlock(&fdtable->lock);
- if (fdptr) {
- fd_unref (fdptr);
- }
+ if (fdptr) {
+ fd_unref(fdptr);
+ }
}
-
-inline void
-gf_fdptr_put (fdtable_t *fdtable, fd_t *fd)
+void
+gf_fdptr_put(fdtable_t *fdtable, fd_t *fd)
{
- fdentry_t *fde = NULL;
- int32_t i = 0;
+ fdentry_t *fde = NULL;
+ int32_t i = 0;
- if ((fdtable == NULL) || (fd == NULL)) {
- gf_log_callingfn ("fd", GF_LOG_ERROR, "invalid argument");
- return;
- }
-
- pthread_mutex_lock (&fdtable->lock);
- {
- for (i = 0; i < fdtable->max_fds; i++) {
- if (fdtable->fdentries[i].fd == fd) {
- fde = &fdtable->fdentries[i];
- break;
- }
- }
+ if ((fdtable == NULL) || (fd == NULL)) {
+ gf_msg_callingfn("fd", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "invalid argument");
+ return;
+ }
- if (fde == NULL) {
- gf_log_callingfn ("fd", GF_LOG_WARNING,
- "fd (%p) is not present in fdtable", fd);
- goto unlock_out;
- }
+ pthread_rwlock_wrlock(&fdtable->lock);
+ {
+ for (i = 0; i < fdtable->max_fds; i++) {
+ if (fdtable->fdentries[i].fd == fd) {
+ fde = &fdtable->fdentries[i];
+ break;
+ }
+ }
- /* If the entry is not allocated, put operation must return
- * without doing anything.
- * This has the potential of masking out any bugs in a user of
- * fd that ends up calling gf_fd_put twice for the same fd or
- * for an unallocated fd, but it is a price we have to pay for
- * ensuring sanity of our fd-table.
- */
- if (fde->next_free != GF_FDENTRY_ALLOCATED)
- goto unlock_out;
- fde->fd = NULL;
- fde->next_free = fdtable->first_free;
- fdtable->first_free = i;
+ if (fde == NULL) {
+ gf_msg_callingfn("fd", GF_LOG_WARNING, 0,
+ LG_MSG_FD_NOT_FOUND_IN_FDTABLE,
+ "fd (%p) is not present in fdtable", fd);
+ goto unlock_out;
}
+
+ /* If the entry is not allocated, put operation must return
+ * without doing anything.
+ * This has the potential of masking out any bugs in a user of
+ * fd that ends up calling gf_fd_put twice for the same fd or
+ * for an unallocated fd, but it is a price we have to pay for
+ * ensuring sanity of our fd-table.
+ */
+ if (fde->next_free != GF_FDENTRY_ALLOCATED)
+ goto unlock_out;
+ fde->fd = NULL;
+ fde->next_free = fdtable->first_free;
+ fdtable->first_free = i;
+ }
unlock_out:
- pthread_mutex_unlock (&fdtable->lock);
+ pthread_rwlock_unlock(&fdtable->lock);
- if ((fd != NULL) && (fde != NULL)) {
- fd_unref (fd);
- }
+ if ((fd != NULL) && (fde != NULL)) {
+ fd_unref(fd);
+ }
}
-
fd_t *
-gf_fd_fdptr_get (fdtable_t *fdtable, int64_t fd)
+gf_fd_fdptr_get(fdtable_t *fdtable, int64_t fd)
{
- fd_t *fdptr = NULL;
+ fd_t *fdptr = NULL;
- if (fdtable == NULL || fd < 0) {
- gf_log_callingfn ("fd", GF_LOG_ERROR, "invalid argument");
- errno = EINVAL;
- return NULL;
- }
+ if (fdtable == NULL || fd < 0) {
+ gf_msg_callingfn("fd", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "invalid argument");
+ errno = EINVAL;
+ return NULL;
+ }
- if (!(fd < fdtable->max_fds)) {
- gf_log_callingfn ("fd", GF_LOG_ERROR, "invalid argument");
- errno = EINVAL;
- return NULL;
- }
+ if (!(fd < fdtable->max_fds)) {
+ gf_msg_callingfn("fd", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "invalid argument");
+ errno = EINVAL;
+ return NULL;
+ }
- pthread_mutex_lock (&fdtable->lock);
- {
- fdptr = fdtable->fdentries[fd].fd;
- if (fdptr) {
- fd_ref (fdptr);
- }
+ pthread_rwlock_rdlock(&fdtable->lock);
+ {
+ fdptr = fdtable->fdentries[fd].fd;
+ if (fdptr) {
+ fd_ref(fdptr);
}
- pthread_mutex_unlock (&fdtable->lock);
+ }
+ pthread_rwlock_unlock(&fdtable->lock);
- return fdptr;
+ return fdptr;
}
-
fd_t *
-__fd_ref (fd_t *fd)
+__fd_ref(fd_t *fd)
{
- ++fd->refcount;
+ GF_ATOMIC_INC(fd->refcount);
- return fd;
+ return fd;
}
-
fd_t *
-fd_ref (fd_t *fd)
+fd_ref(fd_t *fd)
{
- fd_t *refed_fd = NULL;
-
- if (!fd) {
- gf_log_callingfn ("fd", GF_LOG_ERROR, "null fd");
- return NULL;
- }
+ if (!fd) {
+ gf_msg_callingfn("fd", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "null fd");
+ return NULL;
+ }
- LOCK (&fd->inode->lock);
- refed_fd = __fd_ref (fd);
- UNLOCK (&fd->inode->lock);
+ GF_ATOMIC_INC(fd->refcount);
- return refed_fd;
+ return fd;
}
-
-fd_t *
-__fd_unref (fd_t *fd)
+static void
+fd_destroy(fd_t *fd, gf_boolean_t bound)
{
- GF_ASSERT (fd->refcount);
-
- --fd->refcount;
-
- if (fd->refcount == 0) {
- list_del_init (&fd->inode_list);
- }
-
- return fd;
+ xlator_t *xl = NULL;
+ int i = 0;
+ xlator_t *old_THIS = NULL;
+
+ if (fd == NULL) {
+ gf_msg_callingfn("xlator", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "invalid argument");
+ goto out;
+ }
+
+ if (fd->inode == NULL) {
+ gf_msg_callingfn("xlator", GF_LOG_ERROR, 0, LG_MSG_FD_INODE_NULL,
+ "fd->inode is NULL");
+ goto out;
+ }
+ if (!fd->_ctx)
+ goto out;
+
+ if (IA_ISDIR(fd->inode->ia_type)) {
+ for (i = 0; i < fd->xl_count; i++) {
+ if (fd->_ctx[i].key) {
+ xl = fd->_ctx[i].xl_key;
+ old_THIS = THIS;
+ THIS = xl;
+ if (!xl->call_cleanup && xl->cbks->releasedir)
+ xl->cbks->releasedir(xl, fd);
+ THIS = old_THIS;
+ }
+ }
+ } else {
+ for (i = 0; i < fd->xl_count; i++) {
+ if (fd->_ctx[i].key) {
+ xl = fd->_ctx[i].xl_key;
+ old_THIS = THIS;
+ THIS = xl;
+ if (!xl->call_cleanup && xl->cbks->release)
+ xl->cbks->release(xl, fd);
+ THIS = old_THIS;
+ }
+ }
+ }
+
+ LOCK_DESTROY(&fd->lock);
+
+ GF_FREE(fd->_ctx);
+ if (bound) {
+ /*Decrease the count only after close happens on file*/
+ LOCK(&fd->inode->lock);
+ {
+ fd->inode->fd_count--;
+ }
+ UNLOCK(&fd->inode->lock);
+ }
+ inode_unref(fd->inode);
+ fd->inode = NULL;
+ fd_lk_ctx_unref(fd->lk_ctx);
+ mem_put(fd);
+out:
+ return;
}
-
-static void
-fd_destroy (fd_t *fd)
+void
+fd_close(fd_t *fd)
{
- xlator_t *xl = NULL;
- int i = 0;
- xlator_t *old_THIS = NULL;
+ xlator_t *xl, *old_THIS;
- if (fd == NULL){
- gf_log_callingfn ("xlator", GF_LOG_ERROR, "invalid argument");
- goto out;
- }
+ old_THIS = THIS;
- if (fd->inode == NULL){
- gf_log_callingfn ("xlator", GF_LOG_ERROR, "fd->inode is NULL");
- goto out;
- }
- if (!fd->_ctx)
- goto out;
+ for (xl = fd->inode->table->xl->graph->first; xl != NULL; xl = xl->next) {
+ if (!xl->call_cleanup) {
+ THIS = xl;
- if (IA_ISDIR (fd->inode->ia_type)) {
- for (i = 0; i < fd->xl_count; i++) {
- if (fd->_ctx[i].key) {
- xl = fd->_ctx[i].xl_key;
- old_THIS = THIS;
- THIS = xl;
- if (xl->cbks->releasedir)
- xl->cbks->releasedir (xl, fd);
- THIS = old_THIS;
- }
+ if (IA_ISDIR(fd->inode->ia_type)) {
+ if (xl->cbks->fdclosedir != NULL) {
+ xl->cbks->fdclosedir(xl, fd);
}
- } else {
- for (i = 0; i < fd->xl_count; i++) {
- if (fd->_ctx[i].key) {
- xl = fd->_ctx[i].xl_key;
- old_THIS = THIS;
- THIS = xl;
- if (xl->cbks->release)
- xl->cbks->release (xl, fd);
- THIS = old_THIS;
- }
+ } else {
+ if (xl->cbks->fdclose != NULL) {
+ xl->cbks->fdclose(xl, fd);
}
+ }
}
+ }
- LOCK_DESTROY (&fd->lock);
-
- GF_FREE (fd->_ctx);
- inode_unref (fd->inode);
- fd->inode = (inode_t *)0xaaaaaaaa;
- fd_lk_ctx_unref (fd->lk_ctx);
- mem_put (fd);
-out:
- return;
+ THIS = old_THIS;
}
-
void
-fd_unref (fd_t *fd)
+fd_unref(fd_t *fd)
{
- int32_t refcount = 0;
+ int32_t refcount = 0;
+ gf_boolean_t bound = _gf_false;
- if (!fd) {
- gf_log_callingfn ("fd", GF_LOG_ERROR, "fd is NULL");
- return;
- }
-
- LOCK (&fd->inode->lock);
- {
- __fd_unref (fd);
- refcount = fd->refcount;
- }
- UNLOCK (&fd->inode->lock);
+ if (!fd) {
+ gf_msg_callingfn("fd", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "fd is NULL");
+ return;
+ }
+ LOCK(&fd->inode->lock);
+ {
+ refcount = GF_ATOMIC_DEC(fd->refcount);
if (refcount == 0) {
- fd_destroy (fd);
+ if (!list_empty(&fd->inode_list)) {
+ list_del_init(&fd->inode_list);
+ fd->inode->active_fd_count--;
+ bound = _gf_true;
+ }
}
+ }
+ UNLOCK(&fd->inode->lock);
- return ;
-}
+ if (refcount == 0) {
+ fd_destroy(fd, bound);
+ }
+ return;
+}
-fd_t *
-__fd_bind (fd_t *fd)
+static fd_t *
+__fd_bind(fd_t *fd)
{
- list_del_init (&fd->inode_list);
- list_add (&fd->inode_list, &fd->inode->fd_list);
+ list_del_init(&fd->inode_list);
+ list_add(&fd->inode_list, &fd->inode->fd_list);
+ fd->inode->fd_count++;
+ fd->inode->active_fd_count++;
- return fd;
+ return fd;
}
-
fd_t *
-fd_bind (fd_t *fd)
+fd_bind(fd_t *fd)
{
- if (!fd || !fd->inode) {
- gf_log_callingfn ("fd", GF_LOG_ERROR, "!fd || !fd->inode");
- return NULL;
- }
+ if (!fd || !fd->inode) {
+ gf_msg_callingfn("fd", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "!fd || !fd->inode");
+ return NULL;
+ }
- LOCK (&fd->inode->lock);
- {
- fd = __fd_bind (fd);
- }
- UNLOCK (&fd->inode->lock);
+ LOCK(&fd->inode->lock);
+ {
+ fd = __fd_bind(fd);
+ }
+ UNLOCK(&fd->inode->lock);
- return fd;
+ return fd;
}
-
static fd_t *
-__fd_create (inode_t *inode, uint64_t pid)
+fd_allocate(inode_t *inode, uint64_t pid)
{
- fd_t *fd = NULL;
+ fd_t *fd;
- if (inode == NULL) {
- gf_log_callingfn ("fd", GF_LOG_ERROR, "invalid argument");
- return NULL;
- }
-
- fd = mem_get0 (inode->table->fd_mem_pool);
- if (!fd)
- goto out;
-
- fd->xl_count = inode->table->xl->graph->xl_count + 1;
-
- fd->_ctx = GF_CALLOC (1, (sizeof (struct _fd_ctx) * fd->xl_count),
- gf_common_mt_fd_ctx);
- if (!fd->_ctx)
- goto free_fd;
-
- fd->lk_ctx = fd_lk_ctx_create ();
- if (!fd->lk_ctx)
- goto free_fd_ctx;
+ if (inode == NULL) {
+ gf_msg_callingfn("fd", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "invalid argument");
+ return NULL;
+ }
- fd->inode = inode_ref (inode);
+ fd = mem_get0(inode->table->fd_mem_pool);
+ if (fd == NULL) {
+ return NULL;
+ }
+
+ fd->xl_count = inode->table->xl->graph->xl_count + 1;
+
+ fd->_ctx = GF_CALLOC(1, (sizeof(struct _fd_ctx) * fd->xl_count),
+ gf_common_mt_fd_ctx);
+ if (fd->_ctx == NULL) {
+ goto failed;
+ }
+
+ fd->lk_ctx = fd_lk_ctx_create();
+ if (fd->lk_ctx != NULL) {
+ /* We need to take a reference from the inode, but we cannot do it
+ * here because this function can be called with the inode lock taken
+ * and inode_ref() takes the inode's table lock. This is the reverse
+ * of the logical lock acquisition order and can cause a deadlock. So
+ * we simply assign the inode here and we delefate the inode reference
+ * responsibility to the caller (when this function succeeds and the
+ * inode lock is released). This is safe because the caller must hold
+ * a reference of the inode to use it, so it's guaranteed that the
+ * number of references won't reach 0 before the caller finishes.
+ *
+ * TODO: minimize use of locks in favor of atomic operations to avoid
+ * these dependencies. */
+ fd->inode = inode;
fd->pid = pid;
- INIT_LIST_HEAD (&fd->inode_list);
-
- LOCK_INIT (&fd->lock);
-out:
+ INIT_LIST_HEAD(&fd->inode_list);
+ LOCK_INIT(&fd->lock);
+ GF_ATOMIC_INIT(fd->refcount, 1);
return fd;
+ }
-free_fd_ctx:
- GF_FREE (fd->_ctx);
-free_fd:
- mem_put (fd);
+ GF_FREE(fd->_ctx);
- return NULL;
-}
+failed:
+ mem_put(fd);
+ return NULL;
+}
fd_t *
-fd_create (inode_t *inode, pid_t pid)
+fd_create_uint64(inode_t *inode, uint64_t pid)
{
- fd_t *fd = NULL;
+ fd_t *fd;
- fd = __fd_create (inode, (uint64_t)pid);
- if (!fd)
- goto out;
+ fd = fd_allocate(inode, pid);
+ if (fd != NULL) {
+ /* fd_allocate() doesn't get a reference from the inode. We need to
+ * take it here in case of success. */
+ inode_ref(inode);
+ }
- fd = fd_ref (fd);
-
-out:
- return fd;
+ return fd;
}
fd_t *
-fd_create_uint64 (inode_t *inode, uint64_t pid)
+fd_create(inode_t *inode, pid_t pid)
{
- fd_t *fd = NULL;
-
- fd = __fd_create (inode, pid);
- if (!fd)
- goto out;
-
- fd = fd_ref (fd);
-
-out:
- return fd;
+ return fd_create_uint64(inode, (uint64_t)pid);
}
-
static fd_t *
-__fd_lookup (inode_t *inode, uint64_t pid)
+__fd_lookup(inode_t *inode, uint64_t pid)
{
- fd_t *iter_fd = NULL;
- fd_t *fd = NULL;
+ fd_t *iter_fd = NULL;
+ fd_t *fd = NULL;
- if (list_empty (&inode->fd_list))
- return NULL;
+ if (list_empty(&inode->fd_list))
+ return NULL;
+ list_for_each_entry(iter_fd, &inode->fd_list, inode_list)
+ {
+ if (iter_fd->anonymous)
+ /* If someone was interested in getting an
+ anonymous fd (or was OK getting an anonymous fd),
+ they can as well call fd_anonymous() directly */
+ continue;
- list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
- if (!pid || iter_fd->pid == pid) {
- fd = __fd_ref (iter_fd);
- break;
- }
+ if (!pid || iter_fd->pid == pid) {
+ fd = __fd_ref(iter_fd);
+ break;
}
+ }
- return fd;
+ return fd;
}
-
fd_t *
-fd_lookup (inode_t *inode, pid_t pid)
+fd_lookup(inode_t *inode, pid_t pid)
{
- fd_t *fd = NULL;
+ fd_t *fd = NULL;
- if (!inode) {
- gf_log_callingfn ("fd", GF_LOG_WARNING, "!inode");
- return NULL;
- }
+ if (!inode) {
+ gf_msg_callingfn("fd", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "!inode");
+ return NULL;
+ }
- LOCK (&inode->lock);
- {
- fd = __fd_lookup (inode, (uint64_t)pid);
- }
- UNLOCK (&inode->lock);
+ LOCK(&inode->lock);
+ {
+ fd = __fd_lookup(inode, (uint64_t)pid);
+ }
+ UNLOCK(&inode->lock);
- return fd;
+ return fd;
}
fd_t *
-fd_lookup_uint64 (inode_t *inode, uint64_t pid)
+fd_lookup_uint64(inode_t *inode, uint64_t pid)
{
- fd_t *fd = NULL;
+ fd_t *fd = NULL;
- if (!inode) {
- gf_log_callingfn ("fd", GF_LOG_WARNING, "!inode");
- return NULL;
- }
+ if (!inode) {
+ gf_msg_callingfn("fd", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "!inode");
+ return NULL;
+ }
- LOCK (&inode->lock);
- {
- fd = __fd_lookup (inode, pid);
- }
- UNLOCK (&inode->lock);
+ LOCK(&inode->lock);
+ {
+ fd = __fd_lookup(inode, pid);
+ }
+ UNLOCK(&inode->lock);
- return fd;
+ return fd;
}
static fd_t *
-__fd_lookup_anonymous (inode_t *inode)
+__fd_lookup_anonymous(inode_t *inode, int32_t flags)
{
- fd_t *iter_fd = NULL;
- fd_t *fd = NULL;
+ fd_t *iter_fd = NULL;
+ fd_t *fd = NULL;
- if (list_empty (&inode->fd_list))
- return NULL;
+ if (list_empty(&inode->fd_list))
+ return NULL;
- list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
- if (iter_fd->anonymous) {
- fd = __fd_ref (iter_fd);
- break;
- }
+ list_for_each_entry(iter_fd, &inode->fd_list, inode_list)
+ {
+ if ((iter_fd->anonymous) && (flags == iter_fd->flags)) {
+ fd = __fd_ref(iter_fd);
+ break;
}
+ }
- return fd;
+ return fd;
}
fd_t *
-__fd_anonymous (inode_t *inode)
+fd_anonymous_with_flags(inode_t *inode, int32_t flags)
{
- fd_t *fd = NULL;
+ fd_t *fd = NULL;
+ bool ref = false;
- fd = __fd_lookup_anonymous (inode);
+ LOCK(&inode->lock);
- /* if (fd); then we already have increased the refcount in
- __fd_lookup_anonymous(), so no need of one more fd_ref().
- if (!fd); then both create and bind wont bump up the ref
- count, so we have to call fd_ref() after bind. */
- if (!fd) {
- fd = __fd_create (inode, 0);
+ fd = __fd_lookup_anonymous(inode, flags);
- if (!fd)
- return NULL;
+ /* if (fd); then we already have increased the refcount in
+ __fd_lookup_anonymous(), so no need of one more fd_ref().
+ if (!fd); then both create and bind won't bump up the ref
+ count, so we have to call fd_ref() after bind. */
+ if (fd == NULL) {
+ fd = fd_allocate(inode, 0);
+ if (fd != NULL) {
+ fd->anonymous = _gf_true;
+ fd->flags = GF_ANON_FD_FLAGS | (flags & O_DIRECT);
- fd->anonymous = _gf_true;
+ __fd_bind(fd);
- __fd_bind (fd);
-
- __fd_ref (fd);
+ ref = true;
}
+ }
- return fd;
-}
+ UNLOCK(&inode->lock);
+
+ if (ref) {
+ /* fd_allocate() doesn't get a reference from the inode. We need to
+ * take it here in case of success. */
+ inode_ref(inode);
+ }
+ return fd;
+}
fd_t *
-fd_anonymous (inode_t *inode)
+fd_anonymous(inode_t *inode)
{
- fd_t *fd = NULL;
+ return fd_anonymous_with_flags(inode, 0);
+}
- LOCK (&inode->lock);
- {
- fd = __fd_anonymous (inode);
- }
- UNLOCK (&inode->lock);
+fd_t *
+fd_lookup_anonymous(inode_t *inode, int32_t flags)
+{
+ fd_t *fd = NULL;
- return fd;
+ if (!inode) {
+ gf_msg_callingfn("fd", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "!inode");
+ return NULL;
+ }
+
+ LOCK(&inode->lock);
+ {
+ fd = __fd_lookup_anonymous(inode, flags);
+ }
+ UNLOCK(&inode->lock);
+ return fd;
}
-
gf_boolean_t
-fd_is_anonymous (fd_t *fd)
+fd_is_anonymous(fd_t *fd)
{
- return (fd && fd->anonymous);
+ return (fd && fd->anonymous);
}
-
uint8_t
-fd_list_empty (inode_t *inode)
+fd_list_empty(inode_t *inode)
{
- uint8_t empty = 0;
+ uint8_t empty = 0;
- LOCK (&inode->lock);
- {
- empty = list_empty (&inode->fd_list);
- }
- UNLOCK (&inode->lock);
+ LOCK(&inode->lock);
+ {
+ empty = list_empty(&inode->fd_list);
+ }
+ UNLOCK(&inode->lock);
- return empty;
+ return empty;
}
-
int
-__fd_ctx_set (fd_t *fd, xlator_t *xlator, uint64_t value)
+__fd_ctx_set(fd_t *fd, xlator_t *xlator, uint64_t value)
{
- int index = 0, new_xl_count = 0;
- int ret = 0;
- int set_idx = -1;
- void *begin = NULL;
- size_t diff = 0;
- struct _fd_ctx *tmp = NULL;
-
- if (!fd || !xlator)
- return -1;
-
- for (index = 0; index < fd->xl_count; index++) {
- if (!fd->_ctx[index].key) {
- if (set_idx == -1)
- set_idx = index;
- /* dont break, to check if key already exists
- further on */
- }
- if (fd->_ctx[index].xl_key == xlator) {
- set_idx = index;
- break;
- }
+ int index = 0, new_xl_count = 0;
+ int ret = 0;
+ int set_idx = -1;
+ void *begin = NULL;
+ size_t diff = 0;
+ struct _fd_ctx *tmp = NULL;
+
+ if (!fd || !xlator)
+ return -1;
+
+ for (index = 0; index < fd->xl_count; index++) {
+ if (!fd->_ctx[index].key) {
+ if (set_idx == -1)
+ set_idx = index;
+ /* don't break, to check if key already exists
+ further on */
+ }
+ if (fd->_ctx[index].xl_key == xlator) {
+ set_idx = index;
+ break;
}
+ }
- if (set_idx == -1) {
- set_idx = fd->xl_count;
-
- new_xl_count = fd->xl_count + xlator->graph->xl_count;
-
- tmp = GF_REALLOC (fd->_ctx,
- (sizeof (struct _fd_ctx)
- * new_xl_count));
- if (tmp == NULL) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING,
- "realloc of fd->_ctx for fd "
- "(ptr: %p) failed, cannot set the key"
- , fd);
- ret = -1;
- goto out;
- }
+ if (set_idx == -1) {
+ set_idx = fd->xl_count;
- fd->_ctx = tmp;
+ new_xl_count = fd->xl_count + xlator->graph->xl_count;
- begin = fd->_ctx;
- begin += (fd->xl_count * sizeof (struct _fd_ctx));
+ tmp = GF_REALLOC(fd->_ctx, (sizeof(struct _fd_ctx) * new_xl_count));
+ if (tmp == NULL) {
+ ret = -1;
+ goto out;
+ }
- diff = (new_xl_count - fd->xl_count )
- * sizeof (struct _fd_ctx);
+ fd->_ctx = tmp;
- memset (begin, 0, diff);
+ begin = fd->_ctx;
+ begin += (fd->xl_count * sizeof(struct _fd_ctx));
- fd->xl_count = new_xl_count;
- }
+ diff = (new_xl_count - fd->xl_count) * sizeof(struct _fd_ctx);
+
+ memset(begin, 0, diff);
+
+ fd->xl_count = new_xl_count;
+ }
- fd->_ctx[set_idx].xl_key = xlator;
- fd->_ctx[set_idx].value1 = value;
+ fd->_ctx[set_idx].xl_key = xlator;
+ fd->_ctx[set_idx].value1 = value;
out:
- return ret;
+ return ret;
}
-
int
-fd_ctx_set (fd_t *fd, xlator_t *xlator, uint64_t value)
+fd_ctx_set(fd_t *fd, xlator_t *xlator, uint64_t value)
{
- int ret = 0;
+ int ret = 0;
- if (!fd || !xlator) {
- gf_log_callingfn ("", GF_LOG_WARNING, "%p %p", fd, xlator);
- return -1;
- }
+ if (!fd || !xlator) {
+ gf_msg_callingfn("fd", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "%p %p", fd, xlator);
+ return -1;
+ }
- LOCK (&fd->lock);
- {
- ret = __fd_ctx_set (fd, xlator, value);
- }
- UNLOCK (&fd->lock);
+ LOCK(&fd->lock);
+ {
+ ret = __fd_ctx_set(fd, xlator, value);
+ }
+ UNLOCK(&fd->lock);
- return ret;
+ return ret;
}
-
int
-__fd_ctx_get (fd_t *fd, xlator_t *xlator, uint64_t *value)
+__fd_ctx_get(fd_t *fd, xlator_t *xlator, uint64_t *value)
{
- int index = 0;
- int ret = 0;
+ int index = 0;
+ int ret = 0;
- if (!fd || !xlator)
- return -1;
+ if (!fd || !xlator)
+ return -1;
- for (index = 0; index < fd->xl_count; index++) {
- if (fd->_ctx[index].xl_key == xlator)
- break;
- }
+ for (index = 0; index < fd->xl_count; index++) {
+ if (fd->_ctx[index].xl_key == xlator)
+ break;
+ }
- if (index == fd->xl_count) {
- ret = -1;
- goto out;
- }
+ if (index == fd->xl_count) {
+ ret = -1;
+ goto out;
+ }
- if (value)
- *value = fd->_ctx[index].value1;
+ if (value)
+ *value = fd->_ctx[index].value1;
out:
- return ret;
+ return ret;
}
-
int
-fd_ctx_get (fd_t *fd, xlator_t *xlator, uint64_t *value)
+fd_ctx_get(fd_t *fd, xlator_t *xlator, uint64_t *value)
{
- int ret = 0;
+ int ret = 0;
- if (!fd || !xlator)
- return -1;
+ if (!fd || !xlator)
+ return -1;
- LOCK (&fd->lock);
- {
- ret = __fd_ctx_get (fd, xlator, value);
- }
- UNLOCK (&fd->lock);
+ LOCK(&fd->lock);
+ {
+ ret = __fd_ctx_get(fd, xlator, value);
+ }
+ UNLOCK(&fd->lock);
- return ret;
+ return ret;
}
-
-static int
-__fd_ctx_del (fd_t *fd, xlator_t *xlator, uint64_t *value)
+int
+__fd_ctx_del(fd_t *fd, xlator_t *xlator, uint64_t *value)
{
- int index = 0;
- int ret = 0;
+ int index = 0;
+ int ret = 0;
- if (!fd || !xlator)
- return -1;
+ if (!fd || !xlator)
+ return -1;
- for (index = 0; index < fd->xl_count; index++) {
- if (fd->_ctx[index].xl_key == xlator)
- break;
- }
+ for (index = 0; index < fd->xl_count; index++) {
+ if (fd->_ctx[index].xl_key == xlator)
+ break;
+ }
- if (index == fd->xl_count) {
- ret = -1;
- goto out;
- }
+ if (index == fd->xl_count) {
+ ret = -1;
+ goto out;
+ }
- if (value)
- *value = fd->_ctx[index].value1;
+ if (value)
+ *value = fd->_ctx[index].value1;
- fd->_ctx[index].key = 0;
- fd->_ctx[index].value1 = 0;
+ fd->_ctx[index].key = 0;
+ fd->_ctx[index].value1 = 0;
out:
- return ret;
+ return ret;
}
-
int
-fd_ctx_del (fd_t *fd, xlator_t *xlator, uint64_t *value)
+fd_ctx_del(fd_t *fd, xlator_t *xlator, uint64_t *value)
{
- int ret = 0;
+ int ret = 0;
- if (!fd || !xlator)
- return -1;
+ if (!fd || !xlator)
+ return -1;
- LOCK (&fd->lock);
- {
- ret = __fd_ctx_del (fd, xlator, value);
- }
- UNLOCK (&fd->lock);
+ LOCK(&fd->lock);
+ {
+ ret = __fd_ctx_del(fd, xlator, value);
+ }
+ UNLOCK(&fd->lock);
- return ret;
+ return ret;
}
-
void
-fd_dump (fd_t *fd, char *prefix)
+fd_dump(fd_t *fd, char *prefix)
{
- char key[GF_DUMP_MAX_BUF_LEN];
-
- if (!fd)
- return;
+ char key[GF_DUMP_MAX_BUF_LEN];
- memset(key, 0, sizeof(key));
- gf_proc_dump_write("pid", "%llu", fd->pid);
- gf_proc_dump_write("refcount", "%d", fd->refcount);
- gf_proc_dump_write("flags", "%d", fd->flags);
+ if (!fd)
+ return;
- if (fd->inode) {
- gf_proc_dump_build_key (key, "inode", NULL);
- gf_proc_dump_add_section(key);
- inode_dump (fd->inode, key);
- }
+ gf_proc_dump_write("pid", "%" PRIu64, fd->pid);
+ gf_proc_dump_write("refcount", "%" GF_PRI_ATOMIC,
+ GF_ATOMIC_GET(fd->refcount));
+ gf_proc_dump_write("flags", "%d", fd->flags);
+ if (fd->inode) {
+ gf_proc_dump_build_key(key, "inode", NULL);
+ gf_proc_dump_add_section("%s", key);
+ inode_dump(fd->inode, key);
+ }
}
-
void
-fdentry_dump (fdentry_t *fdentry, char *prefix)
+fdentry_dump(fdentry_t *fdentry, char *prefix)
{
- if (!fdentry)
- return;
+ if (!fdentry)
+ return;
- if (GF_FDENTRY_ALLOCATED != fdentry->next_free)
- return;
+ if (GF_FDENTRY_ALLOCATED != fdentry->next_free)
+ return;
- if (fdentry->fd)
- fd_dump(fdentry->fd, prefix);
+ if (fdentry->fd)
+ fd_dump(fdentry->fd, prefix);
}
-
void
-fdtable_dump (fdtable_t *fdtable, char *prefix)
+fdtable_dump(fdtable_t *fdtable, char *prefix)
{
- char key[GF_DUMP_MAX_BUF_LEN];
- int i = 0;
- int ret = -1;
+ char key[GF_DUMP_MAX_BUF_LEN];
+ int i = 0;
+ int ret = -1;
- if (!fdtable)
- return;
+ if (!fdtable)
+ return;
- ret = pthread_mutex_trylock (&fdtable->lock);
+ ret = pthread_rwlock_tryrdlock(&fdtable->lock);
+ if (ret)
+ goto out;
- if (ret)
- goto out;
+ gf_proc_dump_build_key(key, prefix, "refcount");
+ gf_proc_dump_write(key, "%d", fdtable->refcount);
+ gf_proc_dump_build_key(key, prefix, "maxfds");
+ gf_proc_dump_write(key, "%d", fdtable->max_fds);
+ gf_proc_dump_build_key(key, prefix, "first_free");
+ gf_proc_dump_write(key, "%d", fdtable->first_free);
- memset(key, 0, sizeof(key));
- gf_proc_dump_build_key(key, prefix, "refcount");
- gf_proc_dump_write(key, "%d", fdtable->refcount);
- gf_proc_dump_build_key(key, prefix, "maxfds");
- gf_proc_dump_write(key, "%d", fdtable->max_fds);
- gf_proc_dump_build_key(key, prefix, "first_free");
- gf_proc_dump_write(key, "%d", fdtable->first_free);
-
- for ( i = 0 ; i < fdtable->max_fds; i++) {
- if (GF_FDENTRY_ALLOCATED ==
- fdtable->fdentries[i].next_free) {
- gf_proc_dump_build_key(key, prefix, "fdentry[%d]", i);
- gf_proc_dump_add_section(key);
- fdentry_dump(&fdtable->fdentries[i], key);
- }
+ for (i = 0; i < fdtable->max_fds; i++) {
+ if (GF_FDENTRY_ALLOCATED == fdtable->fdentries[i].next_free) {
+ gf_proc_dump_build_key(key, prefix, "fdentry[%d]", i);
+ gf_proc_dump_add_section("%s", key);
+ fdentry_dump(&fdtable->fdentries[i], key);
}
+ }
- pthread_mutex_unlock(&fdtable->lock);
+ pthread_rwlock_unlock(&fdtable->lock);
out:
- if (ret != 0)
- gf_proc_dump_write ("Unable to dump the fdtable",
- "(Lock acquistion failed) %p", fdtable);
- return;
+ if (ret != 0)
+ gf_proc_dump_write("Unable to dump the fdtable",
+ "(Lock acquistion failed) %p", fdtable);
+ return;
}
-
void
-fd_ctx_dump (fd_t *fd, char *prefix)
+fd_ctx_dump(fd_t *fd, char *prefix)
{
- struct _fd_ctx *fd_ctx = NULL;
- xlator_t *xl = NULL;
- int i = 0;
-
-
- if ((fd == NULL) || (fd->_ctx == NULL)) {
- goto out;
- }
-
- LOCK (&fd->lock);
- {
- if (fd->_ctx != NULL) {
- fd_ctx = GF_CALLOC (fd->xl_count, sizeof (*fd_ctx),
- gf_common_mt_fd_ctx);
- if (fd_ctx == NULL) {
- goto unlock;
- }
-
- for (i = 0; i < fd->xl_count; i++) {
- fd_ctx[i] = fd->_ctx[i];
- }
- }
- }
+ struct _fd_ctx *fd_ctx = NULL;
+ xlator_t *xl = NULL;
+ int i = 0;
+
+ if ((fd == NULL) || (fd->_ctx == NULL)) {
+ goto out;
+ }
+
+ LOCK(&fd->lock);
+ {
+ if (fd->_ctx != NULL) {
+ fd_ctx = GF_CALLOC(fd->xl_count, sizeof(*fd_ctx),
+ gf_common_mt_fd_ctx);
+ if (fd_ctx == NULL) {
+ goto unlock;
+ }
+
+ for (i = 0; i < fd->xl_count; i++) {
+ fd_ctx[i] = fd->_ctx[i];
+ }
+ }
+ }
unlock:
- UNLOCK (&fd->lock);
+ UNLOCK(&fd->lock);
- if (fd_ctx == NULL) {
- goto out;
- }
+ if (fd_ctx == NULL) {
+ goto out;
+ }
- for (i = 0; i < fd->xl_count; i++) {
- if (fd_ctx[i].xl_key) {
- xl = (xlator_t *)(long)fd_ctx[i].xl_key;
- if (xl->dumpops && xl->dumpops->fdctx)
- xl->dumpops->fdctx (xl, fd);
- }
+ for (i = 0; i < fd->xl_count; i++) {
+ if (fd_ctx[i].xl_key) {
+ xl = (xlator_t *)(long)fd_ctx[i].xl_key;
+ if (xl->dumpops && xl->dumpops->fdctx)
+ xl->dumpops->fdctx(xl, fd);
}
+ }
out:
- if (fd_ctx != NULL) {
- GF_FREE (fd_ctx);
- }
+ GF_FREE(fd_ctx);
- return;
+ return;
}
void
-fdentry_dump_to_dict (fdentry_t *fdentry, char *prefix, dict_t *dict,
- int *openfds)
+fdentry_dump_to_dict(fdentry_t *fdentry, char *prefix, dict_t *dict,
+ int *openfds)
{
- char key[GF_DUMP_MAX_BUF_LEN] = {0,};
- int ret = -1;
-
- if (!fdentry)
- return;
- if (!dict)
- return;
-
- if (GF_FDENTRY_ALLOCATED != fdentry->next_free)
- return;
-
- if (fdentry->fd) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.pid", prefix);
- ret = dict_set_int32 (dict, key, fdentry->fd->pid);
- if (ret)
- return;
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.refcount", prefix);
- ret = dict_set_int32 (dict, key, fdentry->fd->refcount);
- if (ret)
- return;
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.flags", prefix);
- ret = dict_set_int32 (dict, key, fdentry->fd->flags);
-
- (*openfds)++;
- }
- return;
-}
+ char key[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+ int ret = -1;
-void
-fdtable_dump_to_dict (fdtable_t *fdtable, char *prefix, dict_t *dict)
-{
- char key[GF_DUMP_MAX_BUF_LEN] = {0,};
- int i = 0;
- int openfds = 0;
- int ret = -1;
+ if (!fdentry)
+ return;
+ if (!dict)
+ return;
- if (!fdtable)
- return;
- if (!dict)
- return;
+ if (GF_FDENTRY_ALLOCATED != fdentry->next_free)
+ return;
- ret = pthread_mutex_trylock (&fdtable->lock);
+ if (fdentry->fd) {
+ snprintf(key, sizeof(key), "%s.pid", prefix);
+ ret = dict_set_uint64(dict, key, fdentry->fd->pid);
if (ret)
- goto out;
+ return;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.fdtable.refcount", prefix);
- ret = dict_set_int32 (dict, key, fdtable->refcount);
+ snprintf(key, sizeof(key), "%s.refcount", prefix);
+ ret = dict_set_int32(dict, key, GF_ATOMIC_GET(fdentry->fd->refcount));
if (ret)
- goto out;
+ return;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.fdtable.maxfds", prefix);
- ret = dict_set_uint32 (dict, key, fdtable->max_fds);
+ snprintf(key, sizeof(key), "%s.flags", prefix);
+ ret = dict_set_int32(dict, key, fdentry->fd->flags);
if (ret)
- goto out;
+ return;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.fdtable.firstfree", prefix);
- ret = dict_set_int32 (dict, key, fdtable->first_free);
- if (ret)
- goto out;
+ (*openfds)++;
+ }
+ return;
+}
- for (i = 0; i < fdtable->max_fds; i++) {
- if (GF_FDENTRY_ALLOCATED ==
- fdtable->fdentries[i].next_free) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.fdtable.fdentry%d",
- prefix, i);
- fdentry_dump_to_dict (&fdtable->fdentries[i], key,
- dict, &openfds);
- }
+void
+fdtable_dump_to_dict(fdtable_t *fdtable, char *prefix, dict_t *dict)
+{
+ char key[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+ int i = 0;
+ int openfds = 0;
+ int ret = -1;
+
+ if (!fdtable)
+ return;
+ if (!dict)
+ return;
+
+ ret = pthread_rwlock_tryrdlock(&fdtable->lock);
+ if (ret)
+ return;
+
+ snprintf(key, sizeof(key), "%s.fdtable.refcount", prefix);
+ ret = dict_set_int32(dict, key, fdtable->refcount);
+ if (ret)
+ goto out;
+
+ snprintf(key, sizeof(key), "%s.fdtable.maxfds", prefix);
+ ret = dict_set_uint32(dict, key, fdtable->max_fds);
+ if (ret)
+ goto out;
+
+ snprintf(key, sizeof(key), "%s.fdtable.firstfree", prefix);
+ ret = dict_set_int32(dict, key, fdtable->first_free);
+ if (ret)
+ goto out;
+
+ for (i = 0; i < fdtable->max_fds; i++) {
+ if (GF_FDENTRY_ALLOCATED == fdtable->fdentries[i].next_free) {
+ snprintf(key, sizeof(key), "%s.fdtable.fdentry%d", prefix, i);
+ fdentry_dump_to_dict(&fdtable->fdentries[i], key, dict, &openfds);
}
+ }
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.fdtable.openfds", prefix);
- ret = dict_set_int32 (dict, key, openfds);
+ snprintf(key, sizeof(key), "%s.fdtable.openfds", prefix);
+ ret = dict_set_int32(dict, key, openfds);
+ if (ret)
+ goto out;
out:
- pthread_mutex_unlock (&fdtable->lock);
- return;
+ pthread_rwlock_unlock(&fdtable->lock);
+ return;
}
diff --git a/libglusterfs/src/fd.h b/libglusterfs/src/fd.h
deleted file mode 100644
index 9454cac7199..00000000000
--- a/libglusterfs/src/fd.h
+++ /dev/null
@@ -1,181 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _FD_H
-#define _FD_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "list.h"
-#include <sys/types.h>
-#include <unistd.h>
-#include "glusterfs.h"
-#include "locking.h"
-#include "fd-lk.h"
-#include "common-utils.h"
-
-struct _inode;
-struct _dict;
-struct fd_lk_ctx;
-
-struct _fd_ctx {
- union {
- uint64_t key;
- void *xl_key;
- };
- union {
- uint64_t value1;
- void *ptr1;
- };
-};
-
-struct _fd {
- uint64_t pid;
- int32_t flags;
- int32_t refcount;
- struct list_head inode_list;
- struct _inode *inode;
- gf_lock_t lock; /* used ONLY for manipulating
- 'struct _fd_ctx' array (_ctx).*/
- struct _fd_ctx *_ctx;
- int xl_count; /* Number of xl referred in this fd */
- struct fd_lk_ctx *lk_ctx;
- gf_boolean_t anonymous; /* geo-rep anonymous fd */
-};
-typedef struct _fd fd_t;
-
-
-struct fd_table_entry {
- fd_t *fd;
- int next_free;
-};
-typedef struct fd_table_entry fdentry_t;
-
-
-struct _fdtable {
- int refcount;
- uint32_t max_fds;
- pthread_mutex_t lock;
- fdentry_t *fdentries;
- int first_free;
-};
-typedef struct _fdtable fdtable_t;
-
-
-/* Signifies no more entries in the fd table. */
-#define GF_FDTABLE_END -1
-
-/* This is used to invalidated
- * the next_free value in an fdentry that has been allocated
- */
-#define GF_FDENTRY_ALLOCATED -2
-
-#include "logging.h"
-#include "xlator.h"
-
-
-inline void
-gf_fd_put (fdtable_t *fdtable, int32_t fd);
-
-
-fd_t *
-gf_fd_fdptr_get (fdtable_t *fdtable, int64_t fd);
-
-
-fdtable_t *
-gf_fd_fdtable_alloc (void);
-
-
-int
-gf_fd_unused_get (fdtable_t *fdtable, fd_t *fdptr);
-
-
-fdentry_t *
-gf_fd_fdtable_get_all_fds (fdtable_t *fdtable, uint32_t *count);
-
-
-void
-gf_fd_fdtable_destroy (fdtable_t *fdtable);
-
-
-fd_t *
-__fd_ref (fd_t *fd);
-
-
-fd_t *
-fd_ref (fd_t *fd);
-
-
-void
-fd_unref (fd_t *fd);
-
-
-fd_t *
-fd_create (struct _inode *inode, pid_t pid);
-
-fd_t *
-fd_create_uint64 (struct _inode *inode, uint64_t pid);
-
-fd_t *
-fd_lookup (struct _inode *inode, pid_t pid);
-
-fd_t *
-fd_lookup_uint64 (struct _inode *inode, uint64_t pid);
-
-fd_t *
-fd_anonymous (inode_t *inode);
-
-
-gf_boolean_t
-fd_is_anonymous (fd_t *fd);
-
-
-uint8_t
-fd_list_empty (struct _inode *inode);
-
-
-fd_t *
-fd_bind (fd_t *fd);
-
-
-int
-fd_ctx_set (fd_t *fd, xlator_t *xlator, uint64_t value);
-
-
-int
-fd_ctx_get (fd_t *fd, xlator_t *xlator, uint64_t *value);
-
-
-int
-fd_ctx_del (fd_t *fd, xlator_t *xlator, uint64_t *value);
-
-
-int
-__fd_ctx_set (fd_t *fd, xlator_t *xlator, uint64_t value);
-
-
-int
-__fd_ctx_get (fd_t *fd, xlator_t *xlator, uint64_t *value);
-
-
-void
-fd_ctx_dump (fd_t *fd, char *prefix);
-
-fdentry_t *
-gf_fd_fdtable_copy_all_fds (fdtable_t *fdtable, uint32_t *count);
-
-
-void
-gf_fdptr_put (fdtable_t *fdtable, fd_t *fd);
-
-#endif /* _FD_H */
diff --git a/libglusterfs/src/gen-defaults.py b/libglusterfs/src/gen-defaults.py
new file mode 100755
index 00000000000..e31d3a9fe8a
--- /dev/null
+++ b/libglusterfs/src/gen-defaults.py
@@ -0,0 +1,81 @@
+#!/usr/bin/python3
+
+from __future__ import print_function
+import sys
+from generator import ops, fop_subs, cbk_subs, generate
+
+FAILURE_CBK_TEMPLATE = """
+int32_t
+default_@NAME@_failure_cbk (call_frame_t *frame, int32_t op_errno)
+{
+ STACK_UNWIND_STRICT (@NAME@, frame, -1, op_errno, @ERROR_ARGS@);
+ return 0;
+}
+"""
+
+CBK_RESUME_TEMPLATE = """
+int32_t
+default_@NAME@_cbk_resume (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, @LONG_ARGS@)
+{
+ STACK_UNWIND_STRICT (@NAME@, frame, op_ret, op_errno,
+ @SHORT_ARGS@);
+ return 0;
+}
+"""
+
+CBK_TEMPLATE = """
+int32_t
+default_@NAME@_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, @LONG_ARGS@)
+{
+ STACK_UNWIND_STRICT (@NAME@, frame, op_ret, op_errno,
+ @SHORT_ARGS@);
+ return 0;
+}
+"""
+
+RESUME_TEMPLATE = """
+int32_t
+default_@NAME@_resume (call_frame_t *frame, xlator_t *this, @LONG_ARGS@)
+{
+ STACK_WIND (frame, default_@NAME@_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->@NAME@,
+ @SHORT_ARGS@);
+ return 0;
+}
+"""
+
+FOP_TEMPLATE = """
+int32_t
+default_@NAME@ (
+ call_frame_t *frame,
+ xlator_t *this,
+ @LONG_ARGS@)
+{
+ STACK_WIND_TAIL (frame,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->@NAME@,
+ @SHORT_ARGS@);
+ return 0;
+}
+"""
+
+def gen_defaults ():
+ for name in list(ops.keys()):
+ print(generate(FAILURE_CBK_TEMPLATE, name, cbk_subs))
+ for name in list(ops.keys()):
+ print(generate(CBK_RESUME_TEMPLATE, name, cbk_subs))
+ for name in list(ops.keys()):
+ print(generate(CBK_TEMPLATE, name, cbk_subs))
+ for name in list(ops.keys()):
+ print(generate(RESUME_TEMPLATE, name, fop_subs))
+ for name in list(ops.keys()):
+ print(generate(FOP_TEMPLATE, name, fop_subs))
+
+for l in open(sys.argv[1], 'r').readlines():
+ if l.find('#pragma generate') != -1:
+ print("/* BEGIN GENERATED CODE - DO NOT MODIFY */")
+ gen_defaults()
+ print("/* END GENERATED CODE */")
+ else:
+ print(l[:-1])
diff --git a/libglusterfs/src/generator.py b/libglusterfs/src/generator.py
new file mode 100755
index 00000000000..5b7aa4764a0
--- /dev/null
+++ b/libglusterfs/src/generator.py
@@ -0,0 +1,777 @@
+#!/usr/bin/python3
+
+import string
+
+# ops format: 'fop-arg' name type stub-field [nosync]
+# 'cbk-arg' name type
+# 'extra' name type arg-str
+# 'journal' fop-type
+# 'link' inode iatt
+#
+# 'role' indicates the significance of this line to the code generator (sort of
+# our own type).
+#
+# For fop-arg, we first need to know the name and the type of the arg so that
+# we can generate SHORT_ARGS (for function calls) and LONG_ARGS (for
+# declarations). For code that uses stubs, we also need to know the name of
+# the stub field, which might be different than the argument itself. Lastly,
+# for code that uses syncops, we need to know whether whoever wrote the syncop
+# for this fop "forgot" to include this argument. (Editorial: this kind of
+# creeping inconsistency is why we should have used code generation for stubs
+# and syncops as well as defaults all along.) To address this need, we use the
+# optional 'nosync' field for arguments (e.g. mkdir.umask) that we should skip
+# in generated syncop code.
+#
+# 'cbk-arg' is like fop-arg but simpler and used for generating callbacks
+# instead of fop functions.
+#
+# 'extra' is also like fop-arg, but it's another hack for syncops. This time
+# the problem is that some of what would normally be *callback* arguments are
+# instead created in the caller and passed to the syncop. We handle that by
+# adding an entry at the appropriate place in the fop-arg list, with the name
+# and type to generate a declaration and an argument string to generate the
+# actual syncop call.
+#
+# The mere presence of a 'journal' item is sufficient for most of the journal
+# code to recognize that it should do something. However, reconciliation also
+# needs to decide how reconciliation builds the arguments it needs to call down
+# to the syncop layer, based on what's in the journal. To do that, we divide
+# ops into three types and store those types in the ops table. In general,
+# these three types work as follows.
+#
+# For an fd-op, the GFID in the journal is used (in loc.gfid) field to
+# look up an inode, then an anonymous fd is found/created for that inode.
+#
+# For an inode-op, the GFID in the journal is used the same way, but no fd
+# is needed.
+#
+# For an entry-op, the *parent* GFID and name from the journal are used to
+# look up an inode (via loc.pargfid and par.name respectively).
+#
+# The only places this seems to fall down is for link and create. In link,
+# which is generally an entry-op, the source is looked up as though it's an
+# inode-op. In create, we have an fd argument but it's really a return
+# argument so we get a fresh inode instead of looking one up. Those two cases
+# need to be handled as special cases in the reconciliation code.
+#
+# 'link' is (hopefully) the last of the journal/syncop hacks. Much like
+# 'extra', some values that are returned as callback arguments in the normal
+# case are handled differently for syncops. For syncops that create objects
+# (e.g. mkdir) we need to link those objects into our inode table. The 'inode'
+# and 'iatt' fields here give us the information we need to construct the
+# proper inode_link call(s).
+
+ops = {}
+xlator_cbks = {}
+xlator_dumpops = {}
+
+ops['fgetxattr'] = (
+ ('fop-arg', 'fd', 'fd_t *'),
+ ('fop-arg', 'name', 'const char *'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'dict', 'dict_t *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+
+ops['fsetxattr'] = (
+ ('fop-arg', 'fd', 'fd_t *', 'fd'),
+ ('fop-arg', 'dict', 'dict_t *', 'xattr'),
+ ('fop-arg', 'flags', 'int32_t', 'flags'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'fd-op'),
+)
+
+ops['setxattr'] = (
+ ('fop-arg', 'loc', 'loc_t *', 'loc'),
+ ('fop-arg', 'dict', 'dict_t *', 'xattr'),
+ ('fop-arg', 'flags', 'int32_t', 'flags'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'inode-op'),
+)
+
+ops['statfs'] = (
+ ('fop-arg', 'loc', 'loc_t *'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'buf', 'struct statvfs *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+
+ops['fsyncdir'] = (
+ ('fop-arg', 'fd', 'fd_t *'),
+ ('fop-arg', 'flags', 'int32_t'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+
+ops['opendir'] = (
+ ('fop-arg', 'loc', 'loc_t *'),
+ ('fop-arg', 'fd', 'fd_t *'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'fd', 'fd_t *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+
+ops['fstat'] = (
+ ('fop-arg', 'fd', 'fd_t *'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'buf', 'struct iatt *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+
+ops['fsync'] = (
+ ('fop-arg', 'fd', 'fd_t *'),
+ ('fop-arg', 'flags', 'int32_t'),
+ ('extra', 'preop', 'struct iatt', '&preop'),
+ ('extra', 'postop', 'struct iatt', '&postop'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'prebuf', 'struct iatt *'),
+ ('cbk-arg', 'postbuf', 'struct iatt *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+
+ops['flush'] = (
+ ('fop-arg', 'fd', 'fd_t *'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+
+ops['writev'] = (
+ ('fop-arg', 'fd', 'fd_t *', 'fd'),
+ ('fop-arg', 'vector', 'struct iovec *', 'vector'),
+ ('fop-arg', 'count', 'int32_t'),
+ ('fop-arg', 'off', 'off_t', 'offset'),
+ ('fop-arg', 'flags', 'uint32_t', 'flags'),
+ ('fop-arg', 'iobref', 'struct iobref *'),
+ ('extra', 'preop', 'struct iatt', '&preop'),
+ ('extra', 'postop', 'struct iatt', '&postop'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
+ ('cbk-arg', 'prebuf', 'struct iatt *'),
+ ('cbk-arg', 'postbuf', 'struct iatt *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'fd-op'),
+)
+
+ops['readv'] = (
+ ('fop-arg', 'fd', 'fd_t *'),
+ ('fop-arg', 'size', 'size_t'),
+ ('fop-arg', 'offset', 'off_t'),
+ ('fop-arg', 'flags', 'uint32_t'),
+ ('extra', 'iatt', 'struct iatt', '&iatt'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'vector', 'struct iovec *'),
+ ('cbk-arg', 'count', 'int32_t'),
+ ('cbk-arg', 'stbuf', 'struct iatt *'),
+ ('cbk-arg', 'iobref', 'struct iobref *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+
+ops['open'] = (
+ ('fop-arg', 'loc', 'loc_t *'),
+ ('fop-arg', 'flags', 'int32_t'),
+ ('fop-arg', 'fd', 'fd_t *'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'fd', 'fd_t *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+
+ops['create'] = (
+ ('fop-arg', 'loc', 'loc_t *', 'loc'),
+ ('fop-arg', 'flags', 'int32_t', 'flags'),
+ ('fop-arg', 'mode', 'mode_t', 'mode'),
+ ('fop-arg', 'umask', 'mode_t', 'umask', 'nosync'),
+ ('fop-arg', 'fd', 'fd_t *', 'fd'),
+ ('extra', 'iatt', 'struct iatt', '&iatt'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
+ ('cbk-arg', 'fd', 'fd_t *'),
+ ('cbk-arg', 'inode', 'inode_t *'),
+ ('cbk-arg', 'buf', 'struct iatt *'),
+ ('cbk-arg', 'preparent', 'struct iatt *'),
+ ('cbk-arg', 'postparent', 'struct iatt *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'entry-op'),
+ ('link', 'loc.inode', '&iatt'),
+)
+
+ops['link'] = (
+ ('fop-arg', 'oldloc', 'loc_t *', 'loc'),
+ ('fop-arg', 'newloc', 'loc_t *', 'loc2'),
+ ('extra', 'iatt', 'struct iatt', '&iatt'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
+ ('cbk-arg', 'inode', 'inode_t *'),
+ ('cbk-arg', 'buf', 'struct iatt *'),
+ ('cbk-arg', 'preparent', 'struct iatt *'),
+ ('cbk-arg', 'postparent', 'struct iatt *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'entry-op'),
+)
+
+ops['rename'] = (
+ ('fop-arg', 'oldloc', 'loc_t *', 'loc'),
+ ('fop-arg', 'newloc', 'loc_t *', 'loc2'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
+ ('cbk-arg', 'buf', 'struct iatt *'),
+ ('cbk-arg', 'preoldparent', 'struct iatt *'),
+ ('cbk-arg', 'postoldparent', 'struct iatt *'),
+ ('cbk-arg', 'prenewparent', 'struct iatt *'),
+ ('cbk-arg', 'postnewparent', 'struct iatt *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'entry-op'),
+)
+
+ops['symlink'] = (
+ ('fop-arg', 'linkpath', 'const char *', 'linkname'),
+ ('fop-arg', 'loc', 'loc_t *', 'loc'),
+ ('fop-arg', 'umask', 'mode_t', 'mode', 'nosync'),
+ ('extra', 'iatt', 'struct iatt', '&iatt'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
+ ('cbk-arg', 'inode', 'inode_t *'),
+ ('cbk-arg', 'buf', 'struct iatt *'),
+ ('cbk-arg', 'preparent', 'struct iatt *'),
+ ('cbk-arg', 'postparent', 'struct iatt *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'entry-op'),
+)
+
+ops['rmdir'] = (
+ ('fop-arg', 'loc', 'loc_t *', 'loc'),
+ ('fop-arg', 'flags', 'int32_t', 'flags'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
+ ('cbk-arg', 'preparent', 'struct iatt *'),
+ ('cbk-arg', 'postparent', 'struct iatt *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'entry-op'),
+)
+
+ops['unlink'] = (
+ ('fop-arg', 'loc', 'loc_t *', 'loc'),
+ ('fop-arg', 'flags', 'int32_t', 'flags', 'nosync'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
+ ('cbk-arg', 'preparent', 'struct iatt *'),
+ ('cbk-arg', 'postparent', 'struct iatt *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'entry-op'),
+)
+
+ops['mkdir'] = (
+ ('fop-arg', 'loc', 'loc_t *', 'loc'),
+ ('fop-arg', 'mode', 'mode_t', 'mode'),
+ ('fop-arg', 'umask', 'mode_t', 'umask', 'nosync'),
+ ('extra', 'iatt', 'struct iatt', '&iatt'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
+ ('cbk-arg', 'inode', 'inode_t *'),
+ ('cbk-arg', 'buf', 'struct iatt *'),
+ ('cbk-arg', 'preparent', 'struct iatt *'),
+ ('cbk-arg', 'postparent', 'struct iatt *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'entry-op'),
+ ('link', 'loc.inode', '&iatt'),
+)
+
+ops['mknod'] = (
+ ('fop-arg', 'loc', 'loc_t *', 'loc'),
+ ('fop-arg', 'mode', 'mode_t', 'mode'),
+ ('fop-arg', 'rdev', 'dev_t', 'rdev'),
+ ('fop-arg', 'umask', 'mode_t', 'umask', 'nosync'),
+ ('extra', 'iatt', 'struct iatt', '&iatt'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
+ ('cbk-arg', 'inode', 'inode_t *'),
+ ('cbk-arg', 'buf', 'struct iatt *'),
+ ('cbk-arg', 'preparent', 'struct iatt *'),
+ ('cbk-arg', 'postparent', 'struct iatt *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'entry-op'),
+)
+
+ops['readlink'] = (
+ ('fop-arg', 'loc', 'loc_t *'),
+ ('fop-arg', 'size', 'size_t'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'path', 'const char *'),
+ ('cbk-arg', 'buf', 'struct iatt *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+
+ops['access'] = (
+ ('fop-arg', 'loc', 'loc_t *'),
+ ('fop-arg', 'mask', 'int32_t'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+
+ops['ftruncate'] = (
+ ('fop-arg', 'fd', 'fd_t *', 'fd'),
+ ('fop-arg', 'offset', 'off_t', 'offset'),
+ ('extra', 'preop', 'struct iatt', '&preop'),
+ ('extra', 'postop', 'struct iatt', '&postop'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
+ ('cbk-arg', 'prebuf', 'struct iatt *'),
+ ('cbk-arg', 'postbuf', 'struct iatt *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'fd-op'),
+)
+
+ops['getxattr'] = (
+ ('fop-arg', 'loc', 'loc_t *'),
+ ('fop-arg', 'name', 'const char *'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'dict', 'dict_t *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+
+ops['xattrop'] = (
+ ('fop-arg', 'loc', 'loc_t *', 'loc'),
+ ('fop-arg', 'flags', 'gf_xattrop_flags_t', 'optype'),
+ ('fop-arg', 'dict', 'dict_t *', 'xattr'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
+ ('cbk-arg', 'dict', 'dict_t *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'inode-op'),
+)
+
+ops['fxattrop'] = (
+ ('fop-arg', 'fd', 'fd_t *', 'fd'),
+ ('fop-arg', 'flags', 'gf_xattrop_flags_t', 'optype'),
+ ('fop-arg', 'dict', 'dict_t *', 'xattr'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
+ ('cbk-arg', 'dict', 'dict_t *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'fd-op'),
+)
+
+ops['removexattr'] = (
+ ('fop-arg', 'loc', 'loc_t *', 'loc'),
+ ('fop-arg', 'name', 'const char *', 'name'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'inode-op'),
+)
+
+ops['fremovexattr'] = (
+ ('fop-arg', 'fd', 'fd_t *', 'fd'),
+ ('fop-arg', 'name', 'const char *', 'name'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'fd-op'),
+)
+
+ops['lk'] = (
+ ('fop-arg', 'fd', 'fd_t *'),
+ ('fop-arg', 'cmd', 'int32_t'),
+ ('fop-arg', 'lock', 'struct gf_flock *'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'lock', 'struct gf_flock *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+
+ops['inodelk'] = (
+ ('fop-arg', 'volume', 'const char *'),
+ ('fop-arg', 'loc', 'loc_t *'),
+ ('fop-arg', 'cmd', 'int32_t'),
+ ('fop-arg', 'lock', 'struct gf_flock *'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+
+ops['finodelk'] = (
+ ('fop-arg', 'volume', 'const char *'),
+ ('fop-arg', 'fd', 'fd_t *'),
+ ('fop-arg', 'cmd', 'int32_t'),
+ ('fop-arg', 'lock', 'struct gf_flock *'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+
+ops['entrylk'] = (
+ ('fop-arg', 'volume', 'const char *'),
+ ('fop-arg', 'loc', 'loc_t *'),
+ ('fop-arg', 'basename', 'const char *'),
+ ('fop-arg', 'cmd', 'entrylk_cmd'),
+ ('fop-arg', 'type', 'entrylk_type'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+
+ops['fentrylk'] = (
+ ('fop-arg', 'volume', 'const char *'),
+ ('fop-arg', 'fd', 'fd_t *'),
+ ('fop-arg', 'basename', 'const char *'),
+ ('fop-arg', 'cmd', 'entrylk_cmd'),
+ ('fop-arg', 'type', 'entrylk_type'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+
+ops['rchecksum'] = (
+ ('fop-arg', 'fd', 'fd_t *'),
+ ('fop-arg', 'offset', 'off_t'),
+ ('fop-arg', 'len', 'int32_t'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'weak_cksum', 'uint32_t'),
+ ('cbk-arg', 'strong_cksum', 'uint8_t *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+
+ops['readdir'] = (
+ ('fop-arg', 'fd', 'fd_t *'),
+ ('fop-arg', 'size', 'size_t'),
+ ('fop-arg', 'off', 'off_t'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'entries', 'gf_dirent_t *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+
+ops['readdirp'] = (
+ ('fop-arg', 'fd', 'fd_t *'),
+ ('fop-arg', 'size', 'size_t'),
+ ('fop-arg', 'off', 'off_t'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'entries', 'gf_dirent_t *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+
+ops['setattr'] = (
+ ('fop-arg', 'loc', 'loc_t *', 'loc'),
+ ('fop-arg', 'stbuf', 'struct iatt *', 'stat'),
+ ('fop-arg', 'valid', 'int32_t', 'valid'),
+ ('extra', 'preop', 'struct iatt', '&preop'),
+ ('extra', 'postop', 'struct iatt', '&postop'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
+ ('cbk-arg', 'statpre', 'struct iatt *'),
+ ('cbk-arg', 'statpost', 'struct iatt *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'inode-op'),
+)
+
+ops['truncate'] = (
+ ('fop-arg', 'loc', 'loc_t *', 'loc'),
+ ('fop-arg', 'offset', 'off_t', 'offset'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
+ ('cbk-arg', 'prebuf', 'struct iatt *'),
+ ('cbk-arg', 'postbuf', 'struct iatt *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'inode-op'),
+)
+
+ops['stat'] = (
+ ('fop-arg', 'loc', 'loc_t *'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'buf', 'struct iatt *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+
+ops['lookup'] = (
+ ('fop-arg', 'loc', 'loc_t *'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'inode', 'inode_t *'),
+ ('cbk-arg', 'buf', 'struct iatt *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+ # We could add xdata everywhere automatically if somebody hadn't put
+ # something after it here.
+ ('cbk-arg', 'postparent', 'struct iatt *'),
+)
+
+ops['fsetattr'] = (
+ ('fop-arg', 'fd', 'fd_t *', 'fd'),
+ ('fop-arg', 'stbuf', 'struct iatt *', 'stat'),
+ ('fop-arg', 'valid', 'int32_t', 'valid'),
+ ('extra', 'preop', 'struct iatt', '&preop'),
+ ('extra', 'postop', 'struct iatt', '&postop'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
+ ('cbk-arg', 'statpre', 'struct iatt *'),
+ ('cbk-arg', 'statpost', 'struct iatt *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'fd-op'),
+)
+
+ops['fallocate'] = (
+ ('fop-arg', 'fd', 'fd_t *', 'fd'),
+ ('fop-arg', 'keep_size', 'int32_t', 'mode'),
+ ('fop-arg', 'offset', 'off_t', 'offset'),
+ ('fop-arg', 'len', 'size_t', 'size'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
+ ('cbk-arg', 'pre', 'struct iatt *'),
+ ('cbk-arg', 'post', 'struct iatt *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'fd-op'),
+)
+
+ops['discard'] = (
+ ('fop-arg', 'fd', 'fd_t *', 'fd'),
+ ('fop-arg', 'offset', 'off_t', 'offset'),
+ ('fop-arg', 'len', 'size_t', 'size'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
+ ('cbk-arg', 'pre', 'struct iatt *'),
+ ('cbk-arg', 'post', 'struct iatt *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'fd-op'),
+)
+
+ops['zerofill'] = (
+ ('fop-arg', 'fd', 'fd_t *', 'fd'),
+ ('fop-arg', 'offset', 'off_t', 'offset'),
+ # As e.g. fallocate/discard (above) "len" should really be a size_t.
+ ('fop-arg', 'len', 'off_t', 'size'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
+ ('cbk-arg', 'pre', 'struct iatt *'),
+ ('cbk-arg', 'post', 'struct iatt *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'fd-op'),
+)
+
+ops['ipc'] = (
+ ('fop-arg', 'op', 'int32_t'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'fd-op'),
+)
+
+ops['seek'] = (
+ ('fop-arg', 'fd', 'fd_t *'),
+ ('fop-arg', 'offset', 'off_t'),
+ ('fop-arg', 'what', 'gf_seek_what_t'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'offset', 'off_t'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+
+ops['getspec'] = (
+ ('fop-arg', 'key', 'const char *'),
+ ('fop-arg', 'flags', 'int32_t'),
+ ('cbk-arg', 'spec_data', 'char *'),
+)
+
+ops['lease'] = (
+ ('fop-arg', 'loc', 'loc_t *'),
+ ('fop-arg', 'lease', 'struct gf_lease *'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'lease', 'struct gf_lease *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+
+ops['getactivelk'] = (
+ ('fop-arg', 'loc', 'loc_t *'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'locklist', 'lock_migration_info_t *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+
+ops['setactivelk'] = (
+ ('fop-arg', 'loc', 'loc_t *'),
+ ('fop-arg', 'locklist', 'lock_migration_info_t *'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+
+ops['put'] = (
+ ('fop-arg', 'loc', 'loc_t *', 'loc'),
+ ('fop-arg', 'mode', 'mode_t', 'mode'),
+ ('fop-arg', 'umask', 'mode_t', 'umask'),
+ ('fop-arg', 'flags', 'uint32_t', 'flags'),
+ ('fop-arg', 'vector', 'struct iovec *', 'vector'),
+ ('fop-arg', 'count', 'int32_t'),
+ ('fop-arg', 'off', 'off_t', 'offset'),
+ ('fop-arg', 'iobref', 'struct iobref *'),
+ ('fop-arg', 'dict', 'dict_t *', 'xattr'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
+ ('cbk-arg', 'inode', 'inode_t *'),
+ ('cbk-arg', 'buf', 'struct iatt *'),
+ ('cbk-arg', 'preparent', 'struct iatt *'),
+ ('cbk-arg', 'postparent', 'struct iatt *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+
+ops['icreate'] = (
+ ('fop-arg', 'loc', 'loc_t *'),
+ ('fop-arg', 'mode', 'mode_t'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'inode', 'inode_t *'),
+ ('cbk-arg', 'buf', 'struct iatt *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+
+ops['namelink'] = (
+ ('fop-arg', 'loc', 'loc_t *'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'prebuf', 'struct iatt *'),
+ ('cbk-arg', 'postbuf', 'struct iatt *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+
+ops['copy_file_range'] = (
+ ('fop-arg', 'fd_in', 'fd_t *'),
+ ('fop-arg', 'off_in', 'off64_t '),
+ ('fop-arg', 'fd_out', 'fd_t *'),
+ ('fop-arg', 'off_out', 'off64_t '),
+ ('fop-arg', 'len', 'size_t'),
+ ('fop-arg', 'flags', 'uint32_t'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'stbuf', 'struct iatt *'),
+ ('cbk-arg', 'prebuf_dst', 'struct iatt *'),
+ ('cbk-arg', 'postbuf_dst', 'struct iatt *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+#####################################################################
+xlator_cbks['forget'] = (
+ ('fn-arg', 'this', 'xlator_t *'),
+ ('fn-arg', 'inode', 'inode_t *'),
+ ('ret-val', 'int32_t', '0'),
+)
+
+xlator_cbks['release'] = (
+ ('fn-arg', 'this', 'xlator_t *'),
+ ('fn-arg', 'fd', 'fd_t *'),
+ ('ret-val', 'int32_t', '0'),
+)
+
+xlator_cbks['releasedir'] = (
+ ('fn-arg', 'this', 'xlator_t *'),
+ ('fn-arg', 'fd', 'fd_t *'),
+ ('ret-val', 'int32_t', '0'),
+)
+
+xlator_cbks['invalidate'] = (
+ ('fn-arg', 'this', 'xlator_t *'),
+ ('fn-arg', 'inode', 'inode_t *'),
+ ('ret-val', 'int32_t', '0'),
+)
+
+xlator_cbks['client_destroy'] = (
+ ('fn-arg', 'this', 'xlator_t *'),
+ ('fn-arg', 'client', 'client_t *'),
+ ('ret-val', 'int32_t', '0'),
+)
+
+xlator_cbks['client_disconnect'] = (
+ ('fn-arg', 'this', 'xlator_t *'),
+ ('fn-arg', 'client', 'client_t *'),
+ ('ret-val', 'int32_t', '0'),
+)
+
+xlator_cbks['ictxmerge'] = (
+ ('fn-arg', 'this', 'xlator_t *'),
+ ('fn-arg', 'fd', 'fd_t *'),
+ ('fn-arg', 'inode', 'inode_t *'),
+ ('fn-arg', 'linked_inode', 'inode_t *'),
+ ('ret-val', 'void', ''),
+)
+
+#####################################################################
+xlator_dumpops['priv'] = (
+ ('fn-arg', 'this', 'xlator_t *'),
+ ('ret-val', 'int32_t', '0'),
+)
+
+xlator_dumpops['inode'] = (
+ ('fn-arg', 'this', 'xlator_t *'),
+ ('ret-val', 'int32_t', '0'),
+)
+
+xlator_dumpops['fd'] = (
+ ('fn-arg', 'this', 'xlator_t *'),
+ ('ret-val', 'int32_t', '0'),
+)
+
+xlator_dumpops['inodectx'] = (
+ ('fn-arg', 'this', 'xlator_t *'),
+ ('fn-arg', 'ino', 'inode_t *'),
+ ('ret-val', 'int32_t', '0'),
+)
+
+xlator_dumpops['fdctx'] = (
+ ('fn-arg', 'this', 'xlator_t *'),
+ ('fn-arg', 'fd', 'fd_t *'),
+ ('ret-val', 'int32_t', '0'),
+)
+
+xlator_dumpops['priv_to_dict'] = (
+ ('fn-arg', 'this', 'xlator_t *'),
+ ('fn-arg', 'dict', 'dict_t *'),
+ ('ret-val', 'int32_t', '0'),
+)
+
+xlator_dumpops['inode_to_dict'] = (
+ ('fn-arg', 'this', 'xlator_t *'),
+ ('fn-arg', 'dict', 'dict_t *'),
+ ('ret-val', 'int32_t', '0'),
+)
+
+xlator_dumpops['fd_to_dict'] = (
+ ('fn-arg', 'this', 'xlator_t *'),
+ ('fn-arg', 'dict', 'dict_t *'),
+ ('ret-val', 'int32_t', '0'),
+)
+
+xlator_dumpops['inodectx_to_dict'] = (
+ ('fn-arg', 'this', 'xlator_t *'),
+ ('fn-arg', 'ino', 'inode_t *'),
+ ('fn-arg', 'dict', 'dict_t *'),
+ ('ret-val', 'int32_t', '0'),
+)
+
+xlator_dumpops['fdctx_to_dict'] = (
+ ('fn-arg', 'this', 'xlator_t *'),
+ ('fn-arg', 'fd', 'fd_t *'),
+ ('fn-arg', 'dict', 'dict_t *'),
+ ('ret-val', 'int32_t', '0'),
+)
+
+xlator_dumpops['history'] = (
+ ('fn-arg', 'this', 'xlator_t *'),
+ ('ret-val', 'int32_t', '0'),
+)
+
+def get_error_arg (type_str):
+ if type_str.find(" *") != -1:
+ return "NULL"
+ return "-1"
+
+def get_subs (names, types, cbktypes=None):
+ sdict = {}
+ sdict["@SHORT_ARGS@"] = ', '.join(names)
+ # Convert two separate tuples to one of (name, type) sub-tuples.
+ as_tuples = list(zip(types, names))
+ # Convert each sub-tuple into a "type name" string.
+ as_strings = [' '.join(item) for item in as_tuples]
+ # Join all of those into one big string.
+ sdict["@LONG_ARGS@"] = ',\n\t'.join(as_strings)
+ # So much more readable than string.join(map(string.join,zip(...))))
+ sdict["@ERROR_ARGS@"] = ', '.join(list(map(get_error_arg, types)))
+ if cbktypes is not None:
+ sdict["@CBK_ERROR_ARGS@"] = ', '.join(list(map(get_error_arg, cbktypes)))
+ return sdict
+
+def generate (tmpl, name, subs):
+ text = tmpl.replace("@NAME@", name)
+ if name == "writev":
+ # More spurious inconsistency.
+ text = text.replace("@UPNAME@", "WRITE")
+ elif name == "readv":
+ text = text.replace("@UPNAME@", "READ")
+ else:
+ text = text.replace("@UPNAME@", name.upper())
+ for old, new in subs[name].items():
+ text = text.replace(old, new)
+ # TBD: reindent/reformat the result for maximum readability.
+ return text
+
+fop_subs = {}
+cbk_subs = {}
+
+for name, args in ops.items():
+
+ # Create the necessary substitution strings for fops.
+ arg_names = [ a[1] for a in args if a[0] == 'fop-arg']
+ arg_types = [ a[2] for a in args if a[0] == 'fop-arg']
+ cbk_types = [ a[2] for a in args if a[0] == 'cbk-arg']
+ fop_subs[name] = get_subs(arg_names, arg_types, cbk_types)
+
+ # Same thing for callbacks.
+ arg_names = [ a[1] for a in args if a[0] == 'cbk-arg']
+ arg_types = [ a[2] for a in args if a[0] == 'cbk-arg']
+ cbk_subs[name] = get_subs(arg_names, arg_types)
+
+ # Callers can add other subs to these tables, or even create their
+ # own tables, using these same techniques, and then pass the result
+ # to generate() which would Do The Right Thing with them.
diff --git a/libglusterfs/src/gf-dirent.c b/libglusterfs/src/gf-dirent.c
index bb028c9671d..a809efc97ef 100644
--- a/libglusterfs/src/gf-dirent.c
+++ b/libglusterfs/src/gf-dirent.c
@@ -8,63 +8,232 @@
cases as published by the Free Software Foundation.
*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-
#include <stdio.h>
#include <string.h>
#include <stdint.h>
-#include "compat.h"
-#include "xlator.h"
+#include "glusterfs/compat.h"
+#include "glusterfs/syncop.h"
+
+#define ONE 1ULL
+#define PRESENT_D_OFF_BITS 63
+#define BACKEND_D_OFF_BITS 63
+#define TOP_BIT (ONE << (PRESENT_D_OFF_BITS - 1))
+#define MASK (~0ULL)
+#define SHIFT_BITS (max(0, (BACKEND_D_OFF_BITS - PRESENT_D_OFF_BITS + 1)))
+#define PRESENT_MASK (MASK >> (64 - PRESENT_D_OFF_BITS))
+
+static uint64_t
+bits_for(uint64_t num)
+{
+ uint64_t bits = 0, ctrl = 1;
+
+ while (ctrl < num) {
+ ctrl *= 2;
+ bits++;
+ }
+
+ return bits;
+}
+
+int
+gf_deitransform(xlator_t *this, uint64_t offset)
+{
+ int cnt = 0;
+ int max = 0;
+ int max_bits = 0;
+ uint64_t off_mask = 0;
+ uint64_t host_mask = 0;
+
+ max = glusterfs_get_leaf_count(this->graph);
+
+ if (max == 1) {
+ cnt = 0;
+ goto out;
+ }
+
+ if (offset & TOP_BIT) {
+ /* HUGE d_off */
+ max_bits = bits_for(max);
+ off_mask = (MASK << max_bits);
+ host_mask = ~(off_mask);
+
+ cnt = offset & host_mask;
+ } else {
+ /* small d_off */
+ cnt = offset % max;
+ }
+out:
+ return cnt;
+}
+
+uint64_t
+gf_dirent_orig_offset(xlator_t *this, uint64_t offset)
+{
+ int max = 0;
+ int max_bits = 0;
+ uint64_t off_mask = 0;
+ uint64_t orig_offset;
+
+ max = glusterfs_get_leaf_count(this->graph);
+
+ if (max == 1) {
+ orig_offset = offset;
+ goto out;
+ }
+
+ if (offset & TOP_BIT) {
+ /* HUGE d_off */
+ max_bits = bits_for(max);
+ off_mask = (MASK << max_bits);
+ orig_offset = ((offset & ~TOP_BIT) & off_mask) << SHIFT_BITS;
+ } else {
+ /* small d_off */
+ orig_offset = offset / max;
+ }
+out:
+ return orig_offset;
+}
+
+int
+gf_itransform(xlator_t *this, uint64_t x, uint64_t *y_p, int client_id)
+{
+ int max = 0;
+ uint64_t y = 0;
+ uint64_t hi_mask = 0;
+ uint64_t off_mask = 0;
+ int max_bits = 0;
+
+ if (x == ((uint64_t)-1)) {
+ y = (uint64_t)-1;
+ goto out;
+ }
+
+ if (!x) {
+ y = 0;
+ goto out;
+ }
+
+ max = glusterfs_get_leaf_count(this->graph);
+
+ if (max == 1) {
+ y = x;
+ goto out;
+ }
+
+ max_bits = bits_for(max);
+
+ hi_mask = ~(PRESENT_MASK >> (max_bits + 1));
+
+ if (x & hi_mask) {
+ /* HUGE d_off */
+ off_mask = MASK << max_bits;
+ y = TOP_BIT | ((x >> SHIFT_BITS) & off_mask) | client_id;
+ } else {
+ /* small d_off */
+ y = ((x * max) + client_id);
+ }
+
+out:
+ if (y_p)
+ *y_p = y;
+
+ return 0;
+}
gf_dirent_t *
-gf_dirent_for_name (const char *name)
+gf_dirent_for_name(const char *name)
{
- gf_dirent_t *gf_dirent = NULL;
+ gf_dirent_t *gf_dirent = NULL;
- /* TODO: use mem-pool */
- gf_dirent = GF_CALLOC (gf_dirent_size (name), 1,
- gf_common_mt_gf_dirent_t);
- if (!gf_dirent)
- return NULL;
+ /* TODO: use mem-pool */
+ gf_dirent = GF_CALLOC(gf_dirent_size(name), 1, gf_common_mt_gf_dirent_t);
+ if (!gf_dirent)
+ return NULL;
- INIT_LIST_HEAD (&gf_dirent->list);
- strcpy (gf_dirent->d_name, name);
+ INIT_LIST_HEAD(&gf_dirent->list);
+ strcpy(gf_dirent->d_name, name);
- gf_dirent->d_off = 0;
- gf_dirent->d_ino = -1;
- gf_dirent->d_type = 0;
- gf_dirent->d_len = strlen (name);
+ gf_dirent->d_off = 0;
+ gf_dirent->d_ino = -1;
+ gf_dirent->d_type = 0;
+ gf_dirent->d_len = strlen(name);
- return gf_dirent;
+ return gf_dirent;
}
+void
+gf_dirent_entry_free(gf_dirent_t *entry)
+{
+ if (!entry)
+ return;
+
+ if (entry->dict)
+ dict_unref(entry->dict);
+ if (entry->inode)
+ inode_unref(entry->inode);
+
+ list_del_init(&entry->list);
+ GF_FREE(entry);
+}
void
-gf_dirent_free (gf_dirent_t *entries)
+gf_dirent_free(gf_dirent_t *entries)
{
- gf_dirent_t *entry = NULL;
- gf_dirent_t *tmp = NULL;
+ gf_dirent_t *entry = NULL;
+ gf_dirent_t *tmp = NULL;
+
+ if (!entries)
+ return;
+
+ if (list_empty(&entries->list))
+ return;
+
+ list_for_each_entry_safe(entry, tmp, &entries->list, list)
+ {
+ gf_dirent_entry_free(entry);
+ }
+}
+
+gf_dirent_t *
+entry_copy(gf_dirent_t *source)
+{
+ gf_dirent_t *sink = NULL;
+
+ sink = gf_dirent_for_name(source->d_name);
+ if (!sink)
+ return NULL;
- if (!entries)
- return;
+ sink->d_off = source->d_off;
+ sink->d_ino = source->d_ino;
+ sink->d_type = source->d_type;
+ sink->d_stat = source->d_stat;
+ sink->d_len = source->d_len;
- if (list_empty (&entries->list))
- return;
+ if (source->inode)
+ sink->inode = inode_ref(source->inode);
- list_for_each_entry_safe (entry, tmp, &entries->list, list) {
- if (entry->dict)
- dict_unref (entry->dict);
- if (entry->inode)
- inode_unref (entry->inode);
+ if (source->dict)
+ sink->dict = dict_ref(source->dict);
+ return sink;
+}
+
+void
+gf_link_inode_from_dirent(xlator_t *this, inode_t *parent, gf_dirent_t *entry)
+{
+ inode_t *link_inode = NULL;
+ inode_t *tmp = NULL;
- list_del (&entry->list);
- GF_FREE (entry);
- }
+ if (!entry->inode)
+ return;
+ link_inode = inode_link(entry->inode, parent, entry->d_name,
+ &entry->d_stat);
+ if (!link_inode)
+ return;
+
+ inode_lookup(link_inode);
+ tmp = entry->inode;
+ entry->inode = link_inode;
+ inode_unref(tmp);
}
/* TODO: Currently, with this function, we will be breaking the
@@ -73,20 +242,60 @@ gf_dirent_free (gf_dirent_t *entries)
Need more thoughts before finalizing this function
*/
int
-gf_link_inodes_from_dirent (xlator_t *this, inode_t *parent,
- gf_dirent_t *entries)
+gf_link_inodes_from_dirent(xlator_t *this, inode_t *parent,
+ gf_dirent_t *entries)
+{
+ gf_dirent_t *entry = NULL;
+
+ list_for_each_entry(entry, &entries->list, list)
+ {
+ gf_link_inode_from_dirent(this, parent, entry);
+ }
+
+ return 0;
+}
+
+int
+gf_fill_iatt_for_dirent(gf_dirent_t *entry, inode_t *parent, xlator_t *subvol)
{
- gf_dirent_t *entry = NULL;
- inode_t *link_inode = NULL;
-
- list_for_each_entry (entry, &entries->list, list) {
- if (entry->inode) {
- link_inode = inode_link (entry->inode, parent,
- entry->d_name, &entry->d_stat);
- inode_lookup (link_inode);
- inode_unref (link_inode);
- }
- }
-
- return 0;
+ loc_t loc = {
+ 0,
+ };
+ int ret = -1;
+ char *path = NULL;
+ struct iatt iatt = {
+ 0,
+ };
+
+ loc.inode = inode_grep(parent->table, parent, entry->d_name);
+ if (!loc.inode) {
+ loc.inode = inode_new(parent->table);
+ gf_uuid_copy(loc.inode->gfid, entry->d_stat.ia_gfid);
+ }
+
+ gf_uuid_copy(loc.pargfid, parent->gfid);
+ loc.name = entry->d_name;
+ loc.parent = inode_ref(parent);
+ ret = inode_path(loc.parent, entry->d_name, &path);
+ loc.path = path;
+ if (ret < 0)
+ goto out;
+
+ ret = syncop_lookup(subvol, &loc, &iatt, NULL, NULL, NULL);
+ if (ret)
+ goto out;
+
+ entry->d_stat = iatt;
+ entry->inode = inode_ref(loc.inode);
+ /* We don't need to link inode here, because as part of readdirp_cbk
+ * we will link all dirents.
+ *
+ * Since we did a proper lookup, we don't need to set need_lookup
+ * flag.
+ */
+
+ ret = 0;
+out:
+ loc_wipe(&loc);
+ return ret;
}
diff --git a/libglusterfs/src/gf-dirent.h b/libglusterfs/src/gf-dirent.h
deleted file mode 100644
index 26cb5a66872..00000000000
--- a/libglusterfs/src/gf-dirent.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-
-#ifndef _GF_DIRENT_H
-#define _GF_DIRENT_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "iatt.h"
-#include "inode.h"
-
-#define gf_dirent_size(name) (sizeof (gf_dirent_t) + strlen (name) + 1)
-
-struct _dir_entry_t {
- struct _dir_entry_t *next;
- char *name;
- char *link;
- struct iatt buf;
-};
-
-
-struct _gf_dirent_t {
- union {
- struct list_head list;
- struct {
- struct _gf_dirent_t *next;
- struct _gf_dirent_t *prev;
- };
- };
- uint64_t d_ino;
- uint64_t d_off;
- uint32_t d_len;
- uint32_t d_type;
- struct iatt d_stat;
- dict_t *dict;
- inode_t *inode;
- char d_name[0];
-};
-
-#define DT_ISDIR(mode) (mode == DT_DIR)
-
-gf_dirent_t *gf_dirent_for_name (const char *name);
-void gf_dirent_free (gf_dirent_t *entries);
-int gf_link_inodes_from_dirent (xlator_t *this, inode_t *parent,
- gf_dirent_t *entries);
-
-#endif /* _GF_DIRENT_H */
diff --git a/libglusterfs/src/gidcache.c b/libglusterfs/src/gidcache.c
index c55ed2581ba..64a93802f76 100644
--- a/libglusterfs/src/gidcache.c
+++ b/libglusterfs/src/gidcache.c
@@ -8,30 +8,48 @@
cases as published by the Free Software Foundation.
*/
-#include "gidcache.h"
-#include "mem-pool.h"
+#include "glusterfs/gidcache.h"
+#include "glusterfs/mem-pool.h"
+#include "glusterfs/common-utils.h"
/*
* We treat this as a very simple set-associative LRU cache, with entries aged
* out after a configurable interval. Hardly rocket science, but lots of
* details to worry about.
*/
-#define BUCKET_START(p,n) ((p) + ((n) * AUX_GID_CACHE_ASSOC))
+#define BUCKET_START(p, n) ((p) + ((n)*AUX_GID_CACHE_ASSOC))
/*
* Initialize the cache.
*/
-int gid_cache_init(gid_cache_t *cache, uint32_t timeout)
+int
+gid_cache_init(gid_cache_t *cache, uint32_t timeout)
{
- if (!cache)
- return -1;
+ if (!cache)
+ return -1;
- LOCK_INIT(&cache->gc_lock);
- cache->gc_max_age = timeout;
- cache->gc_nbuckets = AUX_GID_CACHE_BUCKETS;
- memset(cache->gc_cache, 0, sizeof(gid_list_t) * AUX_GID_CACHE_SIZE);
+ LOCK_INIT(&cache->gc_lock);
+ cache->gc_max_age = timeout;
+ cache->gc_nbuckets = AUX_GID_CACHE_BUCKETS;
+ memset(cache->gc_cache, 0, sizeof(gid_list_t) * AUX_GID_CACHE_SIZE);
- return 0;
+ return 0;
+}
+
+/*
+ * Reconfigure the cache timeout.
+ */
+int
+gid_cache_reconf(gid_cache_t *cache, uint32_t timeout)
+{
+ if (!cache)
+ return -1;
+
+ LOCK(&cache->gc_lock);
+ cache->gc_max_age = timeout;
+ UNLOCK(&cache->gc_lock);
+
+ return 0;
}
/*
@@ -39,139 +57,155 @@ int gid_cache_init(gid_cache_t *cache, uint32_t timeout)
* an additional allocation and memory copy. The caller should copy the data and
* release (unlock) the cache as soon as possible.
*/
-const gid_list_t *gid_cache_lookup(gid_cache_t *cache, uint64_t id)
+const gid_list_t *
+gid_cache_lookup(gid_cache_t *cache, uint64_t id, uint64_t uid, uint64_t gid)
{
- int bucket;
- int i;
- time_t now;
- const gid_list_t *agl;
-
- LOCK(&cache->gc_lock);
- now = time(NULL);
- bucket = id % cache->gc_nbuckets;
- agl = BUCKET_START(cache->gc_cache, bucket);
- for (i = 0; i < AUX_GID_CACHE_ASSOC; i++, agl++) {
- if (!agl->gl_list)
- continue;
- if (agl->gl_id != id)
- continue;
-
- /*
- * We don't put new entries in the cache when expiration=0, but
- * there might be entries still in there if expiration was
- * changed very recently. Writing the check this way ensures
- * that they're not used.
- */
- if (now < agl->gl_deadline) {
- return agl;
- }
-
- /*
- * We're not going to find any more UID matches, and reaping
- * is handled further down to maintain LRU order.
- */
- break;
- }
- UNLOCK(&cache->gc_lock);
- return NULL;
+ int bucket;
+ int i;
+ time_t now;
+ const gid_list_t *agl;
+
+ now = gf_time();
+ LOCK(&cache->gc_lock);
+ bucket = id % cache->gc_nbuckets;
+ agl = BUCKET_START(cache->gc_cache, bucket);
+ for (i = 0; i < AUX_GID_CACHE_ASSOC; i++, agl++) {
+ if (!agl->gl_list)
+ continue;
+ if (agl->gl_id != id)
+ continue;
+
+ /*
+ @uid and @gid reflect the latest UID/GID of the
+ process performing the syscall (taken from frame->root).
+
+ If the UID and GID has changed for the PID since the
+ time we cached it, we should treat the cache as having
+ stale values and query them freshly.
+ */
+ if (agl->gl_uid != uid || agl->gl_gid != gid)
+ break;
+
+ /*
+ * We don't put new entries in the cache when expiration=0, but
+ * there might be entries still in there if expiration was
+ * changed very recently. Writing the check this way ensures
+ * that they're not used.
+ */
+ if (now < agl->gl_deadline) {
+ return agl;
+ }
+
+ /*
+ * We're not going to find any more UID matches, and reaping
+ * is handled further down to maintain LRU order.
+ */
+ break;
+ }
+ UNLOCK(&cache->gc_lock);
+ return NULL;
}
/*
* Release an entry found via lookup.
*/
-void gid_cache_release(gid_cache_t *cache, const gid_list_t *agl)
+void
+gid_cache_release(gid_cache_t *cache, const gid_list_t *agl)
{
- UNLOCK(&cache->gc_lock);
+ UNLOCK(&cache->gc_lock);
}
/*
* Add a new list entry to the cache. If an entry for this ID already exists,
* update it.
*/
-int gid_cache_add(gid_cache_t *cache, gid_list_t *gl)
+int
+gid_cache_add(gid_cache_t *cache, gid_list_t *gl)
{
- gid_list_t *agl;
- int bucket;
- int i;
- time_t now;
-
- if (!gl || !gl->gl_list)
- return -1;
-
- if (!cache->gc_max_age)
- return 0;
-
- LOCK(&cache->gc_lock);
- now = time(NULL);
-
- /*
- * Scan for the first free entry or one that matches this id. The id
- * check is added to address a bug where the cache might contain an
- * expired entry for this id. Since lookup occurs in LRU order and
- * does not reclaim entries, it will always return failure on discovery
- * of an expired entry. This leads to duplicate entries being added,
- * which still do not satisfy lookups until the expired entry (and
- * everything before it) is reclaimed.
- *
- * We address this through reuse of an entry already allocated to this
- * id, whether expired or not, since we have obviously already received
- * more recent data. The entry is repopulated with the new data and a new
- * deadline and is pushed forward to reside as the last populated entry in
- * the bucket.
- */
- bucket = gl->gl_id % cache->gc_nbuckets;
- agl = BUCKET_START(cache->gc_cache, bucket);
- for (i = 0; i < AUX_GID_CACHE_ASSOC; ++i, ++agl) {
- if (agl->gl_id == gl->gl_id)
- break;
- if (!agl->gl_list)
- break;
- }
-
- /*
- * The way we allocate free entries naturally places the newest
- * ones at the highest indices, so evicting the lowest makes
- * sense, but that also means we can't just replace it with the
- * one that caused the eviction. That would cause us to thrash
- * the first entry while others remain idle. Therefore, we
- * need to slide the other entries down and add the new one at
- * the end just as if the *last* slot had been free.
- *
- * Deadline expiration is also handled here, since the oldest
- * expired entry will be in the first position. This does mean
- * the bucket can stay full of expired entries if we're idle
- * but, if the small amount of extra memory or scan time before
- * we decide to evict someone ever become issues, we could
- * easily add a reaper thread.
- */
-
- if (i >= AUX_GID_CACHE_ASSOC) {
- /* cache full, evict the first (LRU) entry */
- i = 0;
- agl = BUCKET_START(cache->gc_cache, bucket);
- GF_FREE(agl->gl_list);
- } else if (agl->gl_list) {
- /* evict the old entry we plan to reuse */
- GF_FREE(agl->gl_list);
- }
-
- /*
- * If we have evicted an entry, slide the subsequent populated entries
- * back and populate the last entry.
- */
- for (; i < AUX_GID_CACHE_ASSOC - 1; i++) {
- if (!agl[1].gl_list)
- break;
- agl[0] = agl[1];
- agl++;
- }
-
- agl->gl_id = gl->gl_id;
- agl->gl_count = gl->gl_count;
- agl->gl_list = gl->gl_list;
- agl->gl_deadline = now + cache->gc_max_age;
-
- UNLOCK(&cache->gc_lock);
-
- return 1;
+ gid_list_t *agl;
+ int bucket;
+ int i;
+ time_t now;
+
+ if (!gl || !gl->gl_list)
+ return -1;
+
+ if (!cache->gc_max_age)
+ return 0;
+
+ now = gf_time();
+ LOCK(&cache->gc_lock);
+
+ /*
+ * Scan for the first free entry or one that matches this id. The id
+ * check is added to address a bug where the cache might contain an
+ * expired entry for this id. Since lookup occurs in LRU order and
+ * does not reclaim entries, it will always return failure on discovery
+ * of an expired entry. This leads to duplicate entries being added,
+ * which still do not satisfy lookups until the expired entry (and
+ * everything before it) is reclaimed.
+ *
+ * We address this through reuse of an entry already allocated to this
+ * id, whether expired or not, since we have obviously already received
+ * more recent data. The entry is repopulated with the new data and a new
+ * deadline and is pushed forward to reside as the last populated entry in
+ * the bucket.
+ */
+ bucket = gl->gl_id % cache->gc_nbuckets;
+ agl = BUCKET_START(cache->gc_cache, bucket);
+ for (i = 0; i < AUX_GID_CACHE_ASSOC; ++i, ++agl) {
+ if (agl->gl_id == gl->gl_id)
+ break;
+ if (!agl->gl_list)
+ break;
+ }
+
+ /*
+ * The way we allocate free entries naturally places the newest
+ * ones at the highest indices, so evicting the lowest makes
+ * sense, but that also means we can't just replace it with the
+ * one that caused the eviction. That would cause us to thrash
+ * the first entry while others remain idle. Therefore, we
+ * need to slide the other entries down and add the new one at
+ * the end just as if the *last* slot had been free.
+ *
+ * Deadline expiration is also handled here, since the oldest
+ * expired entry will be in the first position. This does mean
+ * the bucket can stay full of expired entries if we're idle
+ * but, if the small amount of extra memory or scan time before
+ * we decide to evict someone ever become issues, we could
+ * easily add a reaper thread.
+ */
+
+ if (i >= AUX_GID_CACHE_ASSOC) {
+ /* cache full, evict the first (LRU) entry */
+ i = 0;
+ agl = BUCKET_START(cache->gc_cache, bucket);
+ GF_FREE(agl->gl_list);
+ } else if (agl->gl_list) {
+ /* evict the old entry we plan to reuse */
+ GF_FREE(agl->gl_list);
+ }
+
+ /*
+ * If we have evicted an entry, slide the subsequent populated entries
+ * back and populate the last entry.
+ */
+ for (; i < AUX_GID_CACHE_ASSOC - 1; i++) {
+ if (!agl[1].gl_list)
+ break;
+ agl[0] = agl[1];
+ agl++;
+ }
+
+ agl->gl_id = gl->gl_id;
+ agl->gl_uid = gl->gl_uid;
+ agl->gl_gid = gl->gl_gid;
+ agl->gl_count = gl->gl_count;
+ agl->gl_list = gl->gl_list;
+ agl->gl_deadline = now + cache->gc_max_age;
+
+ UNLOCK(&cache->gc_lock);
+
+ return 1;
}
diff --git a/libglusterfs/src/globals.c b/libglusterfs/src/globals.c
index 11f62a5501f..ae06f8be386 100644
--- a/libglusterfs/src/globals.c
+++ b/libglusterfs/src/globals.c
@@ -8,392 +8,359 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif /* !_CONFIG_H */
-
#include <pthread.h>
-#include "glusterfs.h"
-#include "globals.h"
-#include "xlator.h"
-#include "mem-pool.h"
+#include "glusterfs/syncop.h"
+#include "glusterfs/libglusterfs-messages.h"
+
+const char *gf_fop_list[GF_FOP_MAXVALUE] = {
+ [GF_FOP_NULL] = "NULL",
+ [GF_FOP_STAT] = "STAT",
+ [GF_FOP_READLINK] = "READLINK",
+ [GF_FOP_MKNOD] = "MKNOD",
+ [GF_FOP_MKDIR] = "MKDIR",
+ [GF_FOP_UNLINK] = "UNLINK",
+ [GF_FOP_RMDIR] = "RMDIR",
+ [GF_FOP_SYMLINK] = "SYMLINK",
+ [GF_FOP_RENAME] = "RENAME",
+ [GF_FOP_LINK] = "LINK",
+ [GF_FOP_TRUNCATE] = "TRUNCATE",
+ [GF_FOP_OPEN] = "OPEN",
+ [GF_FOP_READ] = "READ",
+ [GF_FOP_WRITE] = "WRITE",
+ [GF_FOP_STATFS] = "STATFS",
+ [GF_FOP_FLUSH] = "FLUSH",
+ [GF_FOP_FSYNC] = "FSYNC",
+ [GF_FOP_SETXATTR] = "SETXATTR",
+ [GF_FOP_GETXATTR] = "GETXATTR",
+ [GF_FOP_REMOVEXATTR] = "REMOVEXATTR",
+ [GF_FOP_OPENDIR] = "OPENDIR",
+ [GF_FOP_FSYNCDIR] = "FSYNCDIR",
+ [GF_FOP_ACCESS] = "ACCESS",
+ [GF_FOP_CREATE] = "CREATE",
+ [GF_FOP_FTRUNCATE] = "FTRUNCATE",
+ [GF_FOP_FSTAT] = "FSTAT",
+ [GF_FOP_LK] = "LK",
+ [GF_FOP_LOOKUP] = "LOOKUP",
+ [GF_FOP_READDIR] = "READDIR",
+ [GF_FOP_INODELK] = "INODELK",
+ [GF_FOP_FINODELK] = "FINODELK",
+ [GF_FOP_ENTRYLK] = "ENTRYLK",
+ [GF_FOP_FENTRYLK] = "FENTRYLK",
+ [GF_FOP_XATTROP] = "XATTROP",
+ [GF_FOP_FXATTROP] = "FXATTROP",
+ [GF_FOP_FSETXATTR] = "FSETXATTR",
+ [GF_FOP_FGETXATTR] = "FGETXATTR",
+ [GF_FOP_RCHECKSUM] = "RCHECKSUM",
+ [GF_FOP_SETATTR] = "SETATTR",
+ [GF_FOP_FSETATTR] = "FSETATTR",
+ [GF_FOP_READDIRP] = "READDIRP",
+ [GF_FOP_GETSPEC] = "GETSPEC",
+ [GF_FOP_FORGET] = "FORGET",
+ [GF_FOP_RELEASE] = "RELEASE",
+ [GF_FOP_RELEASEDIR] = "RELEASEDIR",
+ [GF_FOP_FREMOVEXATTR] = "FREMOVEXATTR",
+ [GF_FOP_FALLOCATE] = "FALLOCATE",
+ [GF_FOP_DISCARD] = "DISCARD",
+ [GF_FOP_ZEROFILL] = "ZEROFILL",
+ [GF_FOP_IPC] = "IPC",
+ [GF_FOP_SEEK] = "SEEK",
+ [GF_FOP_LEASE] = "LEASE",
+ [GF_FOP_COMPOUND] = "COMPOUND",
+ [GF_FOP_GETACTIVELK] = "GETACTIVELK",
+ [GF_FOP_SETACTIVELK] = "SETACTIVELK",
+ [GF_FOP_PUT] = "PUT",
+ [GF_FOP_ICREATE] = "ICREATE",
+ [GF_FOP_NAMELINK] = "NAMELINK",
+ [GF_FOP_COPY_FILE_RANGE] = "COPY_FILE_RANGE",
+};
+const char *gf_upcall_list[GF_UPCALL_FLAGS_MAXVALUE] = {
+ [GF_UPCALL_NULL] = "NULL",
+ [GF_UPCALL] = "UPCALL",
+ [GF_UPCALL_CI_STAT] = "CI_IATT",
+ [GF_UPCALL_CI_XATTR] = "CI_XATTR",
+ [GF_UPCALL_CI_RENAME] = "CI_RENAME",
+ [GF_UPCALL_CI_NLINK] = "CI_UNLINK",
+ [GF_UPCALL_CI_FORGET] = "CI_FORGET",
+ [GF_UPCALL_LEASE_RECALL] = "LEASE_RECALL",
+};
+
+/* THIS */
-/* gf_*_list[] */
+/* This global ctx is a bad hack to prevent some of the libgfapi crashes.
+ * This should be removed once the patch on resource pool is accepted
+ */
+glusterfs_ctx_t *global_ctx = NULL;
+pthread_mutex_t global_ctx_mutex = PTHREAD_MUTEX_INITIALIZER;
+xlator_t global_xlator;
+static int gf_global_mem_acct_enable = 1;
+static pthread_once_t globals_inited = PTHREAD_ONCE_INIT;
-char *gf_fop_list[GF_FOP_MAXVALUE];
-char *gf_mgmt_list[GF_MGMT_MAXVALUE];
+static pthread_key_t free_key;
+static __thread xlator_t *thread_xlator = NULL;
+static __thread void *thread_synctask = NULL;
+static __thread void *thread_leaseid = NULL;
+static __thread struct syncopctx thread_syncopctx = {};
+static __thread char thread_uuid_buf[GF_UUID_BUF_SIZE] = {};
+static __thread char thread_lkowner_buf[GF_LKOWNER_BUF_SIZE] = {};
+static __thread char thread_leaseid_buf[GF_LEASE_ID_BUF_SIZE] = {};
-void
-gf_op_list_init()
+int
+gf_global_mem_acct_enable_get(void)
{
- gf_fop_list[GF_FOP_NULL] = "NULL";
- gf_fop_list[GF_FOP_STAT] = "STAT";
- gf_fop_list[GF_FOP_READLINK] = "READLINK";
- gf_fop_list[GF_FOP_MKNOD] = "MKNOD";
- gf_fop_list[GF_FOP_MKDIR] = "MKDIR";
- gf_fop_list[GF_FOP_UNLINK] = "UNLINK";
- gf_fop_list[GF_FOP_RMDIR] = "RMDIR";
- gf_fop_list[GF_FOP_SYMLINK] = "SYMLINK";
- gf_fop_list[GF_FOP_RENAME] = "RENAME";
- gf_fop_list[GF_FOP_LINK] = "LINK";
- gf_fop_list[GF_FOP_TRUNCATE] = "TRUNCATE";
- gf_fop_list[GF_FOP_OPEN] = "OPEN";
- gf_fop_list[GF_FOP_READ] = "READ";
- gf_fop_list[GF_FOP_WRITE] = "WRITE";
- gf_fop_list[GF_FOP_STATFS] = "STATFS";
- gf_fop_list[GF_FOP_FLUSH] = "FLUSH";
- gf_fop_list[GF_FOP_FSYNC] = "FSYNC";
- gf_fop_list[GF_FOP_SETXATTR] = "SETXATTR";
- gf_fop_list[GF_FOP_GETXATTR] = "GETXATTR";
- gf_fop_list[GF_FOP_REMOVEXATTR] = "REMOVEXATTR";
- gf_fop_list[GF_FOP_OPENDIR] = "OPENDIR";
- gf_fop_list[GF_FOP_FSYNCDIR] = "FSYNCDIR";
- gf_fop_list[GF_FOP_ACCESS] = "ACCESS";
- gf_fop_list[GF_FOP_CREATE] = "CREATE";
- gf_fop_list[GF_FOP_FTRUNCATE] = "FTRUNCATE";
- gf_fop_list[GF_FOP_FSTAT] = "FSTAT";
- gf_fop_list[GF_FOP_LK] = "LK";
- gf_fop_list[GF_FOP_LOOKUP] = "LOOKUP";
- gf_fop_list[GF_FOP_READDIR] = "READDIR";
- gf_fop_list[GF_FOP_INODELK] = "INODELK";
- gf_fop_list[GF_FOP_FINODELK] = "FINODELK";
- gf_fop_list[GF_FOP_ENTRYLK] = "ENTRYLK";
- gf_fop_list[GF_FOP_FENTRYLK] = "FENTRYLK";
- gf_fop_list[GF_FOP_XATTROP] = "XATTROP";
- gf_fop_list[GF_FOP_FXATTROP] = "FXATTROP";
- gf_fop_list[GF_FOP_FSETXATTR] = "FSETXATTR";
- gf_fop_list[GF_FOP_FGETXATTR] = "FGETXATTR";
- gf_fop_list[GF_FOP_RCHECKSUM] = "RCHECKSUM";
- gf_fop_list[GF_FOP_SETATTR] = "SETATTR";
- gf_fop_list[GF_FOP_FSETATTR] = "FSETATTR";
- gf_fop_list[GF_FOP_READDIRP] = "READDIRP";
- gf_fop_list[GF_FOP_GETSPEC] = "GETSPEC";
- gf_fop_list[GF_FOP_FORGET] = "FORGET";
- gf_fop_list[GF_FOP_RELEASE] = "RELEASE";
- gf_fop_list[GF_FOP_RELEASEDIR] = "RELEASEDIR";
-
- gf_fop_list[GF_MGMT_NULL] = "NULL";
- return;
+ return gf_global_mem_acct_enable;
}
+int
+gf_global_mem_acct_enable_set(int val)
+{
+ gf_global_mem_acct_enable = val;
+ return 0;
+}
-/* CTX */
-static glusterfs_ctx_t *glusterfs_ctx;
+static struct xlator_cbks global_cbks = {
+ .forget = NULL,
+ .release = NULL,
+ .releasedir = NULL,
+ .invalidate = NULL,
+ .client_destroy = NULL,
+ .client_disconnect = NULL,
+ .ictxmerge = NULL,
+ .ictxsize = NULL,
+ .fdctxsize = NULL,
+};
+/* This is required to get through the check in graph.c */
+static struct xlator_fops global_fops = {};
-int
-glusterfs_ctx_init ()
+static int
+global_xl_reconfigure(xlator_t *this, dict_t *options)
{
- int ret = 0;
+ int ret = -1;
+ gf_boolean_t bool_opt = _gf_false;
- if (glusterfs_ctx) {
- gf_log_callingfn ("", GF_LOG_WARNING, "init called again");
- goto out;
- }
+ /* This is not added in volume dump, hence adding the options in log
+ would be helpful for debugging later */
+ dict_dump_to_log(options);
- glusterfs_ctx = CALLOC (1, sizeof (*glusterfs_ctx));
- if (!glusterfs_ctx) {
- ret = -1;
- goto out;
- }
+ GF_OPTION_RECONF("measure-latency", bool_opt, options, bool, out);
+ this->ctx->measure_latency = bool_opt;
- INIT_LIST_HEAD (&glusterfs_ctx->graphs);
- INIT_LIST_HEAD (&glusterfs_ctx->mempool_list);
- ret = pthread_mutex_init (&glusterfs_ctx->lock, NULL);
+ GF_OPTION_RECONF("metrics-dump-path", this->ctx->config.metrics_dumppath,
+ options, str, out);
+ /* TODO: add more things here */
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
-glusterfs_ctx_t *
-glusterfs_ctx_get ()
+static int
+global_xl_init(xlator_t *this)
{
- return glusterfs_ctx;
-
-}
+ int ret = -1;
+ gf_boolean_t bool_opt = false;
+ GF_OPTION_INIT("measure-latency", bool_opt, bool, out);
+ this->ctx->measure_latency = bool_opt;
-/* THIS */
+ GF_OPTION_INIT("metrics-dump-path", this->ctx->config.metrics_dumppath, str,
+ out);
-xlator_t global_xlator;
-static pthread_key_t this_xlator_key;
+ ret = 0;
-void
-glusterfs_this_destroy (void *ptr)
-{
- if (ptr)
- FREE (ptr);
+out:
+ return ret;
}
-
-int
-glusterfs_this_init ()
+static void
+global_xl_fini(xlator_t *this)
{
- int ret = 0;
-
- ret = pthread_key_create (&this_xlator_key, glusterfs_this_destroy);
- if (ret != 0) {
- gf_log ("", GF_LOG_WARNING, "failed to create the pthread key");
- return ret;
- }
+ return;
+}
- global_xlator.name = "glusterfs";
- global_xlator.type = "global";
- global_xlator.ctx = glusterfs_ctx;
+struct volume_options global_xl_options[] = {
+ {.key = {"measure-latency"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ .op_version = {GD_OP_VERSION_4_0_0},
+ .flags = OPT_FLAG_SETTABLE,
+ .tags = {"global", "context"},
+ .description = "Use this option to toggle measuring latency"},
+ {.key = {"metrics-dump-path"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "{{gluster_workdir}}/metrics",
+ .op_version = {GD_OP_VERSION_4_0_0},
+ .flags = OPT_FLAG_SETTABLE,
+ .tags = {"global", "context"},
+ .description = "Use this option to set the metrics dump path"},
+
+ {
+ .key = {NULL},
+ },
+};
- INIT_LIST_HEAD (&global_xlator.volume_options);
+static volume_opt_list_t global_xl_opt_list;
- return ret;
+void
+glusterfs_this_init()
+{
+ global_xlator.name = "glusterfs";
+ global_xlator.type = GF_GLOBAL_XLATOR_NAME;
+ global_xlator.cbks = &global_cbks;
+ global_xlator.fops = &global_fops;
+ global_xlator.reconfigure = global_xl_reconfigure;
+ global_xlator.init = global_xl_init;
+ global_xlator.fini = global_xl_fini;
+
+ INIT_LIST_HEAD(&global_xlator.volume_options);
+ INIT_LIST_HEAD(&global_xl_opt_list.list);
+ global_xl_opt_list.given_opt = global_xl_options;
+
+ list_add_tail(&global_xl_opt_list.list, &global_xlator.volume_options);
}
-
xlator_t **
-__glusterfs_this_location ()
+__glusterfs_this_location()
{
- xlator_t **this_location = NULL;
- int ret = 0;
-
- this_location = pthread_getspecific (this_xlator_key);
+ xlator_t **this_location;
- if (!this_location) {
- this_location = CALLOC (1, sizeof (*this_location));
- if (!this_location)
- goto out;
+ this_location = &thread_xlator;
+ if (*this_location == NULL) {
+ thread_xlator = &global_xlator;
+ }
- ret = pthread_setspecific (this_xlator_key, this_location);
- if (ret != 0) {
- gf_log ("", GF_LOG_WARNING, "pthread setspecific failed");
-
- FREE (this_location);
- this_location = NULL;
- goto out;
- }
- }
-out:
- if (this_location) {
- if (!*this_location)
- *this_location = &global_xlator;
- }
- return this_location;
+ return this_location;
}
-
xlator_t *
-glusterfs_this_get ()
+glusterfs_this_get()
{
- xlator_t **this_location = NULL;
-
- this_location = __glusterfs_this_location ();
- if (!this_location)
- return &global_xlator;
-
- return *this_location;
+ return *__glusterfs_this_location();
}
-
-int
-glusterfs_this_set (xlator_t *this)
+void
+glusterfs_this_set(xlator_t *this)
{
- xlator_t **this_location = NULL;
-
- this_location = __glusterfs_this_location ();
- if (!this_location)
- return -ENOMEM;
-
- *this_location = this;
-
- return 0;
+ thread_xlator = this;
}
-/* SYNCTASK */
-
-static pthread_key_t synctask_key;
+/* SYNCOPCTX */
-
-int
-synctask_init ()
+void *
+syncopctx_getctx()
{
- int ret = 0;
-
- ret = pthread_key_create (&synctask_key, NULL);
-
- return ret;
+ return &thread_syncopctx;
}
+/* SYNCTASK */
void *
-synctask_get ()
+synctask_get()
{
- void *synctask = NULL;
-
- synctask = pthread_getspecific (synctask_key);
-
- return synctask;
+ return thread_synctask;
}
-
-int
-synctask_set (void *synctask)
+void
+synctask_set(void *synctask)
{
- int ret = 0;
+ thread_synctask = synctask;
+}
- pthread_setspecific (synctask_key, synctask);
+// UUID_BUFFER
- return ret;
+char *
+glusterfs_uuid_buf_get()
+{
+ return thread_uuid_buf;
}
-//UUID_BUFFER
+/* LKOWNER_BUFFER */
-static pthread_key_t uuid_buf_key;
-static char global_uuid_buf[GF_UUID_BUF_SIZE];
-void
-glusterfs_uuid_buf_destroy (void *ptr)
+char *
+glusterfs_lkowner_buf_get()
{
- if (ptr)
- FREE (ptr);
+ return thread_lkowner_buf;
}
-int
-glusterfs_uuid_buf_init ()
+/* Leaseid buffer */
+
+char *
+glusterfs_leaseid_buf_get()
{
- int ret = 0;
+ char *buf = NULL;
- ret = pthread_key_create (&uuid_buf_key,
- glusterfs_uuid_buf_destroy);
- return ret;
+ buf = thread_leaseid;
+ if (buf == NULL) {
+ buf = thread_leaseid_buf;
+ thread_leaseid = buf;
+ }
+
+ return buf;
}
char *
-glusterfs_uuid_buf_get ()
+glusterfs_leaseid_exist()
{
- char *buf;
- int ret = 0;
-
- buf = pthread_getspecific (uuid_buf_key);
- if(!buf) {
- buf = MALLOC (GF_UUID_BUF_SIZE);
- ret = pthread_setspecific (uuid_buf_key, (void *) buf);
- if(ret)
- buf = global_uuid_buf;
- }
- return buf;
+ return thread_leaseid;
}
-/* LKOWNER_BUFFER */
+static void
+glusterfs_cleanup(void *ptr)
+{
+ if (thread_syncopctx.groups != NULL) {
+ GF_FREE(thread_syncopctx.groups);
+ }
+
+ mem_pool_thread_destructor(NULL);
+}
-static pthread_key_t lkowner_buf_key;
-static char global_lkowner_buf[GF_LKOWNER_BUF_SIZE];
void
-glusterfs_lkowner_buf_destroy (void *ptr)
+gf_thread_needs_cleanup(void)
{
- if (ptr)
- FREE (ptr);
+ /* The value stored in free_key TLS is not really used for anything, but
+ * pthread implementation doesn't call the TLS destruction function unless
+ * it's != NULL. This function must be called whenever something is
+ * allocated for this thread so that glusterfs_cleanup() will be called
+ * and resources can be released. */
+ (void)pthread_setspecific(free_key, (void *)1);
}
-int
-glusterfs_lkowner_buf_init ()
+static void
+gf_globals_init_once()
{
- int ret = 0;
+ int ret = 0;
- ret = pthread_key_create (&lkowner_buf_key,
- glusterfs_lkowner_buf_destroy);
- return ret;
-}
+ glusterfs_this_init();
-char *
-glusterfs_lkowner_buf_get ()
-{
- char *buf;
- int ret = 0;
-
- buf = pthread_getspecific (lkowner_buf_key);
- if(!buf) {
- buf = MALLOC (GF_LKOWNER_BUF_SIZE);
- ret = pthread_setspecific (lkowner_buf_key, (void *) buf);
- if(ret)
- buf = global_lkowner_buf;
- }
- return buf;
+ /* This is needed only to cleanup the potential allocation of
+ * thread_syncopctx.groups. */
+ ret = pthread_key_create(&free_key, glusterfs_cleanup);
+ if (ret != 0) {
+ gf_msg("", GF_LOG_ERROR, ret, LG_MSG_PTHREAD_KEY_CREATE_FAILED,
+ "failed to create the pthread key");
+
+ gf_msg("", GF_LOG_CRITICAL, 0, LG_MSG_GLOBAL_INIT_FAILED,
+ "Exiting as global initialization failed");
+
+ exit(ret);
+ }
}
int
-glusterfs_globals_init ()
+glusterfs_globals_init(glusterfs_ctx_t *ctx)
{
- int ret = 0;
-
- gf_op_list_init ();
-
- gf_log_globals_init ();
-
- ret = glusterfs_ctx_init ();
- if (ret) {
- gf_log ("", GF_LOG_CRITICAL,
- "ERROR: glusterfs context init failed");
- goto out;
- }
-
- ret = glusterfs_this_init ();
- if (ret) {
- gf_log ("", GF_LOG_CRITICAL,
- "ERROR: glusterfs-translator init failed");
- goto out;
- }
-
- ret = glusterfs_uuid_buf_init ();
- if(ret) {
- gf_log ("", GF_LOG_CRITICAL,
- "ERROR: glusterfs uuid buffer init failed");
- goto out;
- }
-
- ret = glusterfs_lkowner_buf_init ();
- if(ret) {
- gf_log ("", GF_LOG_CRITICAL,
- "ERROR: glusterfs lkowner buffer init failed");
- goto out;
- }
-
- gf_mem_acct_enable_set ();
-
- ret = synctask_init ();
- if (ret) {
- gf_log ("", GF_LOG_CRITICAL,
- "ERROR: glusterfs synctask init failed");
- goto out;
- }
-out:
- return ret;
-}
+ int ret = 0;
+ gf_log_globals_init(ctx, GF_LOG_INFO);
-char eventstring[GF_EVENT_MAXVAL+1][64] = {
- "Invalid event",
- "Parent Up",
- "Poll In",
- "Poll Out",
- "Poll Err",
- "Child Up",
- "Child Down",
- "Child Connecting",
- "Child Modified",
- "Transport Cleanup",
- "Transport Connected",
- "Volfile Modified",
- "New Volfile",
- "Translator Info",
- "Xlator Op",
- "Authentication Failed",
- "Invalid event",
-};
+ ret = pthread_once(&globals_inited, gf_globals_init_once);
-/* Copy the string ptr contents if needed for yourself */
-char *
-glusterfs_strevent (glusterfs_event_t ev)
-{
- return eventstring[ev];
+ if (ret)
+ gf_msg("", GF_LOG_CRITICAL, ret, LG_MSG_PTHREAD_FAILED,
+ "pthread_once failed");
+
+ return ret;
}
diff --git a/libglusterfs/src/globals.h b/libglusterfs/src/globals.h
deleted file mode 100644
index e797db184bd..00000000000
--- a/libglusterfs/src/globals.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _GLOBALS_H
-#define _GLOBALS_H
-
-#define GF_DEFAULT_BASE_PORT 24007
-
-#include "glusterfs.h"
-
-/* CTX */
-#define CTX (glusterfs_ctx_get())
-
-glusterfs_ctx_t *glusterfs_ctx_get ();
-
-#include "xlator.h"
-
-/* THIS */
-#define THIS (*__glusterfs_this_location())
-
-#define GF_UUID_BUF_SIZE 50
-
-xlator_t **__glusterfs_this_location ();
-xlator_t *glusterfs_this_get ();
-int glusterfs_this_set (xlator_t *);
-
-/* task */
-void *synctask_get ();
-int synctask_set (void *);
-
-/* uuid_buf */
-char *glusterfs_uuid_buf_get();
-char *glusterfs_lkowner_buf_get();
-
-/* init */
-int glusterfs_globals_init (void);
-
-#endif /* !_GLOBALS_H */
diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h
deleted file mode 100644
index 8532b318235..00000000000
--- a/libglusterfs/src/glusterfs.h
+++ /dev/null
@@ -1,446 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _GLUSTERFS_H
-#define _GLUSTERFS_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-#include <netinet/in.h>
-#include <sys/socket.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/statvfs.h>
-#include <netdb.h>
-#include <errno.h>
-#include <dirent.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <arpa/inet.h>
-#include <sys/poll.h>
-#include <pthread.h>
-
-
-#include "list.h"
-#include "logging.h"
-
-#define GF_YES 1
-#define GF_NO 0
-
-#ifndef O_LARGEFILE
-/* savannah bug #20053, patch for compiling on darwin */
-#define O_LARGEFILE 0100000 /* from bits/fcntl.h */
-#endif
-
-#ifndef O_FMODE_EXEC
-/* redhat bug 843080, added from linux/fs.h */
-#define O_FMODE_EXEC 040 //0x20
-#endif
-
-#ifndef O_DIRECT
-/* savannah bug #20050, #20052 */
-#define O_DIRECT 0 /* From asm/fcntl.h */
-#endif
-
-#ifndef O_DIRECTORY
-/* FreeBSD does not need O_DIRECTORY */
-#define O_DIRECTORY 0
-#endif
-
-#ifndef EBADFD
-/* Mac OS X does not have EBADFD */
-#define EBADFD EBADF
-#endif
-
-#ifndef FNM_EXTMATCH
-#define FNM_EXTMATCH 0
-#endif
-
-#define ZR_MOUNTPOINT_OPT "mountpoint"
-#define ZR_ATTR_TIMEOUT_OPT "attribute-timeout"
-#define ZR_ENTRY_TIMEOUT_OPT "entry-timeout"
-#define ZR_DIRECT_IO_OPT "direct-io-mode"
-#define ZR_STRICT_VOLFILE_CHECK "strict-volfile-check"
-#define ZR_DUMP_FUSE "dump-fuse"
-
-#define GF_XATTR_CLRLK_CMD "glusterfs.clrlk"
-#define GF_XATTR_PATHINFO_KEY "trusted.glusterfs.pathinfo"
-#define GF_XATTR_NODE_UUID_KEY "trusted.glusterfs.node-uuid"
-#define GF_XATTR_VOL_ID_KEY "trusted.glusterfs.volume-id"
-
-#define GF_READDIR_SKIP_DIRS "readdir-filter-directories"
-
-#define XATTR_IS_PATHINFO(x) (strncmp (x, GF_XATTR_PATHINFO_KEY, \
- strlen (GF_XATTR_PATHINFO_KEY)) == 0)
-#define XATTR_IS_NODE_UUID(x) (strncmp (x, GF_XATTR_NODE_UUID_KEY, \
- strlen (GF_XATTR_NODE_UUID_KEY)) == 0)
-
-#define GF_XATTR_LINKINFO_KEY "trusted.distribute.linkinfo"
-#define GFID_XATTR_KEY "trusted.gfid"
-
-#define GLUSTERFS_INTERNAL_FOP_KEY "glusterfs-internal-fop"
-
-#define ZR_FILE_CONTENT_STR "glusterfs.file."
-#define ZR_FILE_CONTENT_STRLEN 15
-
-#define GLUSTERFS_OPEN_FD_COUNT "glusterfs.open-fd-count"
-#define GLUSTERFS_INODELK_COUNT "glusterfs.inodelk-count"
-#define GLUSTERFS_ENTRYLK_COUNT "glusterfs.entrylk-count"
-#define GLUSTERFS_POSIXLK_COUNT "glusterfs.posixlk-count"
-#define GLUSTERFS_PARENT_ENTRYLK "glusterfs.parent-entrylk"
-#define QUOTA_SIZE_KEY "trusted.glusterfs.quota.size"
-#define GFID_TO_PATH_KEY "glusterfs.gfid2path"
-
-/* Index xlator related */
-#define GF_XATTROP_INDEX_GFID "glusterfs.xattrop_index_gfid"
-
-#define GF_GFIDLESS_LOOKUP "gfidless-lookup"
-/* replace-brick and pump related internal xattrs */
-#define RB_PUMP_CMD_START "glusterfs.pump.start"
-#define RB_PUMP_CMD_PAUSE "glusterfs.pump.pause"
-#define RB_PUMP_CMD_COMMIT "glusterfs.pump.commit"
-#define RB_PUMP_CMD_ABORT "glusterfs.pump.abort"
-#define RB_PUMP_CMD_STATUS "glusterfs.pump.status"
-
-#define POSIX_ACL_DEFAULT_XATTR "system.posix_acl_default"
-#define POSIX_ACL_ACCESS_XATTR "system.posix_acl_access"
-
-#define GLUSTERFS_RDMA_INLINE_THRESHOLD (2048)
-#define GLUSTERFS_RDMA_MAX_HEADER_SIZE (228) /* (sizeof (rdma_header_t) \
- + RDMA_MAX_SEGMENTS \
- * sizeof (rdma_read_chunk_t))
- */
-
-#define GLUSTERFS_RPC_REPLY_SIZE 24
-
-#define ZR_FILE_CONTENT_REQUEST(key) (!strncmp(key, ZR_FILE_CONTENT_STR, \
- ZR_FILE_CONTENT_STRLEN))
-
-/* GlusterFS's maximum supported Auxilary GIDs */
-/* TODO: Keeping it to 200, so that we can fit in 2KB buffer for auth data
- * in RPC server code, if there is ever need for having more aux-gids, then
- * we have to add aux-gid in payload of actors */
-#define GF_MAX_AUX_GROUPS 200
-
-/* NOTE: add members ONLY at the end (just before _MAXVALUE) */
-typedef enum {
- GF_FOP_NULL = 0,
- GF_FOP_STAT,
- GF_FOP_READLINK,
- GF_FOP_MKNOD,
- GF_FOP_MKDIR,
- GF_FOP_UNLINK,
- GF_FOP_RMDIR,
- GF_FOP_SYMLINK,
- GF_FOP_RENAME,
- GF_FOP_LINK,
- GF_FOP_TRUNCATE,
- GF_FOP_OPEN,
- GF_FOP_READ,
- GF_FOP_WRITE,
- GF_FOP_STATFS,
- GF_FOP_FLUSH,
- GF_FOP_FSYNC, /* 15 */
- GF_FOP_SETXATTR,
- GF_FOP_GETXATTR,
- GF_FOP_REMOVEXATTR,
- GF_FOP_OPENDIR,
- GF_FOP_FSYNCDIR,
- GF_FOP_ACCESS,
- GF_FOP_CREATE,
- GF_FOP_FTRUNCATE,
- GF_FOP_FSTAT, /* 25 */
- GF_FOP_LK,
- GF_FOP_LOOKUP,
- GF_FOP_READDIR,
- GF_FOP_INODELK,
- GF_FOP_FINODELK,
- GF_FOP_ENTRYLK,
- GF_FOP_FENTRYLK,
- GF_FOP_XATTROP,
- GF_FOP_FXATTROP,
- GF_FOP_FGETXATTR,
- GF_FOP_FSETXATTR,
- GF_FOP_RCHECKSUM,
- GF_FOP_SETATTR,
- GF_FOP_FSETATTR,
- GF_FOP_READDIRP,
- GF_FOP_FORGET,
- GF_FOP_RELEASE,
- GF_FOP_RELEASEDIR,
- GF_FOP_GETSPEC,
- GF_FOP_FREMOVEXATTR,
- GF_FOP_MAXVALUE,
-} glusterfs_fop_t;
-
-
-typedef enum {
- GF_MGMT_NULL = 0,
- GF_MGMT_MAXVALUE,
-} glusterfs_mgmt_t;
-
-typedef enum {
- GF_OP_TYPE_NULL = 0,
- GF_OP_TYPE_FOP,
- GF_OP_TYPE_MGMT,
- GF_OP_TYPE_MAX,
-} gf_op_type_t;
-
-/* NOTE: all the miscellaneous flags used by GlusterFS should be listed here */
-typedef enum {
- GF_LK_GETLK = 0,
- GF_LK_SETLK,
- GF_LK_SETLKW,
- GF_LK_RESLK_LCK,
- GF_LK_RESLK_LCKW,
- GF_LK_RESLK_UNLCK,
- GF_LK_GETLK_FD,
-} glusterfs_lk_cmds_t;
-
-
-typedef enum {
- GF_LK_F_RDLCK = 0,
- GF_LK_F_WRLCK,
- GF_LK_F_UNLCK,
- GF_LK_EOL,
-} glusterfs_lk_types_t;
-
-typedef enum {
- F_RESLK_LCK = 200,
- F_RESLK_LCKW,
- F_RESLK_UNLCK,
- F_GETLK_FD,
-} glusterfs_lk_recovery_cmds_t;
-
-typedef enum {
- GF_LOCK_POSIX,
- GF_LOCK_INTERNAL
-} gf_lk_domain_t;
-
-
-typedef enum {
- ENTRYLK_LOCK,
- ENTRYLK_UNLOCK,
- ENTRYLK_LOCK_NB
-} entrylk_cmd;
-
-
-typedef enum {
- ENTRYLK_RDLCK,
- ENTRYLK_WRLCK
-} entrylk_type;
-
-
-typedef enum {
- GF_XATTROP_ADD_ARRAY,
- GF_XATTROP_ADD_ARRAY64
-} gf_xattrop_flags_t;
-
-
-#define GF_SET_IF_NOT_PRESENT 0x1 /* default behaviour */
-#define GF_SET_OVERWRITE 0x2 /* Overwrite with the buf given */
-#define GF_SET_DIR_ONLY 0x4
-#define GF_SET_EPOCH_TIME 0x8 /* used by afr dir lookup selfheal */
-
-/* Directory into which replicate self-heal will move deleted files and
- directories into. The storage/posix janitor thread will periodically
- clean up this directory */
-
-#define GF_REPLICATE_TRASH_DIR ".landfill"
-
-/* key value which quick read uses to get small files in lookup cbk */
-#define GF_CONTENT_KEY "glusterfs.content"
-
-struct _xlator_cmdline_option {
- struct list_head cmd_args;
- char *volume;
- char *key;
- char *value;
-};
-typedef struct _xlator_cmdline_option xlator_cmdline_option_t;
-
-
-#define GF_OPTION_ENABLE _gf_true
-#define GF_OPTION_DISABLE _gf_false
-#define GF_OPTION_DEFERRED 2
-
-struct _cmd_args {
- /* basic options */
- char *volfile_server;
- char *volfile;
- char *log_server;
- gf_loglevel_t log_level;
- char *log_file;
- int32_t max_connect_attempts;
- /* advanced options */
- uint32_t volfile_server_port;
- char *volfile_server_transport;
- uint32_t log_server_port;
- char *pid_file;
- char *sock_file;
- int no_daemon_mode;
- char *run_id;
- int debug_mode;
- int read_only;
- int acl;
- int selinux;
- int enable_ino32;
- int worm;
- int mac_compat;
- int gid_timeout;
- struct list_head xlator_options; /* list of xlator_option_t */
-
- /* fuse options */
- int fuse_direct_io_mode;
- int volfile_check;
- double fuse_entry_timeout;
- double fuse_attribute_timeout;
- char *volume_name;
- int fuse_nodev;
- int fuse_nosuid;
- char *dump_fuse;
- pid_t client_pid;
- int client_pid_set;
- unsigned uid_map_root;
- int background_qlen;
- int congestion_threshold;
-
- /* key args */
- char *mount_point;
- char *volfile_id;
-
- /* required for portmap */
- int brick_port;
- char *brick_name;
- int brick_port2;
-};
-typedef struct _cmd_args cmd_args_t;
-
-
-struct _glusterfs_graph {
- struct list_head list;
- char graph_uuid[128];
- struct timeval dob;
- void *first;
- void *top; /* selected by -n */
- int xl_count;
- int id; /* Used in logging */
- int used; /* Should be set when fuse gets
- first CHILD_UP */
- uint32_t volfile_checksum;
-};
-typedef struct _glusterfs_graph glusterfs_graph_t;
-
-
-struct _glusterfs_ctx {
- cmd_args_t cmd_args;
- char *process_uuid;
- FILE *pidfp;
- char fin;
- void *timer;
- void *ib;
- void *pool;
- void *event_pool;
- void *iobuf_pool;
- pthread_mutex_t lock;
- size_t page_size;
- struct list_head graphs; /* double linked list of graphs - one per volfile parse */
- glusterfs_graph_t *active; /* the latest graph in use */
- void *master; /* fuse, or libglusterfsclient (however, not protocol/server) */
- void *mgmt; /* xlator implementing MOPs for centralized logging, volfile server */
- void *listener; /* listener of the commands from glusterd */
- unsigned char measure_latency; /* toggle switch for latency measurement */
- pthread_t sigwaiter;
- struct mem_pool *stub_mem_pool;
- unsigned char cleanup_started;
- int graph_id; /* Incremented per graph, value should
- indicate how many times the graph has
- got changed */
- pid_t mnt_pid; /* pid of the mount agent */
- int process_mode; /*mode in which process is runninng*/
- struct syncenv *env; /* The env pointer to the synctasks */
-
- struct list_head mempool_list; /* used to keep a global list of
- mempools, used to log details of
- mempool in statedump */
- char *statedump_path;
-
- struct mem_pool *dict_pool;
- struct mem_pool *dict_pair_pool;
- struct mem_pool *dict_data_pool;
-
- int mem_accounting; /* if value is other than 0, it
- will be set */
-};
-typedef struct _glusterfs_ctx glusterfs_ctx_t;
-
-
-/* If you edit this structure then, make a corresponding change in
- * globals.c in the eventstring.
- */
-typedef enum {
- GF_EVENT_PARENT_UP = 1,
- GF_EVENT_POLLIN,
- GF_EVENT_POLLOUT,
- GF_EVENT_POLLERR,
- GF_EVENT_CHILD_UP,
- GF_EVENT_CHILD_DOWN,
- GF_EVENT_CHILD_CONNECTING,
- GF_EVENT_CHILD_MODIFIED,
- GF_EVENT_TRANSPORT_CLEANUP,
- GF_EVENT_TRANSPORT_CONNECTED,
- GF_EVENT_VOLFILE_MODIFIED,
- GF_EVENT_GRAPH_NEW,
- GF_EVENT_TRANSLATOR_INFO,
- GF_EVENT_TRANSLATOR_OP,
- GF_EVENT_AUTH_FAILED,
- GF_EVENT_VOLUME_DEFRAG,
- GF_EVENT_PARENT_DOWN,
- GF_EVENT_MAXVAL,
-} glusterfs_event_t;
-
-/* gf_lkowner_t is defined in lkowner.h */
-#include "lkowner.h"
-
-struct gf_flock {
- short l_type;
- short l_whence;
- off_t l_start;
- off_t l_len;
- pid_t l_pid;
- gf_lkowner_t l_owner;
-};
-
-extern char *glusterfs_strevent (glusterfs_event_t ev);
-
-#define GF_MUST_CHECK __attribute__((warn_unused_result))
-/*
- * Some macros (e.g. ALLOC_OR_GOTO) set variables in function scope, but the
- * calling function might not only declare the variable to keep the macro happy
- * and not use it otherwise. In such cases, the following can be used to
- * suppress the "set but not used" warning that would otherwise occur.
- */
-#define GF_UNUSED __attribute__((unused))
-
-int glusterfs_graph_prepare (glusterfs_graph_t *graph, glusterfs_ctx_t *ctx);
-int glusterfs_graph_destroy (glusterfs_graph_t *graph);
-int glusterfs_graph_activate (glusterfs_graph_t *graph, glusterfs_ctx_t *ctx);
-glusterfs_graph_t *glusterfs_graph_construct (FILE *fp);
-glusterfs_graph_t *glusterfs_graph_new ();
-int glusterfs_graph_reconfigure (glusterfs_graph_t *oldgraph,
- glusterfs_graph_t *newgraph);
-
-#endif /* _GLUSTERFS_H */
diff --git a/libglusterfs/src/glusterfs/async.h b/libglusterfs/src/glusterfs/async.h
new file mode 100644
index 00000000000..d1d70ae0bc7
--- /dev/null
+++ b/libglusterfs/src/glusterfs/async.h
@@ -0,0 +1,209 @@
+/*
+ Copyright (c) 2019 Red Hat, Inc <https://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __GLUSTERFS_ASYNC_H__
+#define __GLUSTERFS_ASYNC_H__
+
+#define _LGPL_SOURCE
+
+#include <sys/types.h>
+#include <signal.h>
+#include <errno.h>
+
+#ifdef URCU_OLD
+
+/* TODO: Fix the include paths. Since this is a .h included from many places
+ * it makes no sense to append a '-I$(CONTRIBDIR)/userspace-rcu/' to each
+ * Makefile.am. I've also seen some problems with CI builders (they
+ * failed to find the include files, but the same source on another setup
+ * is working fine). */
+#include "wfcqueue.h"
+#include "wfstack.h"
+
+#else /* !URCU_OLD */
+
+#include <urcu/wfcqueue.h>
+#include <urcu/wfstack.h>
+
+#endif /* URCU_OLD */
+
+#include "glusterfs/xlator.h"
+#include "glusterfs/common-utils.h"
+#include "glusterfs/list.h"
+#include "glusterfs/libglusterfs-messages.h"
+
+/* This is the name prefix that all worker threads will have. A number will
+ * be added to differentiate them. */
+#define GF_ASYNC_THREAD_NAME "tpw"
+
+/* This value determines the maximum number of threads that are allowed. */
+#define GF_ASYNC_MAX_THREADS 128
+
+/* This value determines how many additional threads will be started but will
+ * remain inactive until they are explicitly activated by the leader. This is
+ * useful to react faster to bursts of load, but at the same time we minimize
+ * contention if they are not really needed to handle current load.
+ *
+ * TODO: Instead of a fixed number, it would probably be better to use a
+ * prcentage of the available cores. */
+#define GF_ASYNC_SPARE_THREADS 2
+
+/* This value determines the signal used to wake the leader when new work has
+ * been added to the queue. To do so we reuse SIGALRM, since the most logical
+ * candidates (SIGUSR1/SIGUSR2) are already used. This signal must not be used
+ * by anything else in the process. */
+#define GF_ASYNC_SIGQUEUE SIGALRM
+
+/* This value determines the signal that will be used to transfer leader role
+ * to other workers. */
+#define GF_ASYNC_SIGCTRL SIGVTALRM
+
+#define gf_async_warning(_err, _msg, _args...) \
+ gf_msg("async", GF_LOG_WARNING, -(_err), LG_MSG_ASYNC_WARNING, _msg, \
+ ##_args)
+
+#define gf_async_error(_err, _msg, _args...) \
+ gf_msg("async", GF_LOG_ERROR, -(_err), LG_MSG_ASYNC_FAILURE, _msg, ##_args)
+
+#define gf_async_fatal(_err, _msg, _args...) \
+ do { \
+ GF_ABORT("Critical error in async module. Unable to continue. (" _msg \
+ "). Error %d.", \
+ ##_args, -(_err)); \
+ } while (0)
+
+struct _gf_async;
+typedef struct _gf_async gf_async_t;
+
+struct _gf_async_worker;
+typedef struct _gf_async_worker gf_async_worker_t;
+
+struct _gf_async_queue;
+typedef struct _gf_async_queue gf_async_queue_t;
+
+struct _gf_async_control;
+typedef struct _gf_async_control gf_async_control_t;
+
+typedef void (*gf_async_callback_f)(xlator_t *xl, gf_async_t *async);
+
+struct _gf_async {
+ /* TODO: remove dependency on xl/THIS. */
+ xlator_t *xl;
+ gf_async_callback_f cbk;
+ struct cds_wfcq_node queue;
+};
+
+struct _gf_async_worker {
+ /* Used to send asynchronous jobs related to the worker. */
+ gf_async_t async;
+
+ /* Member of the available workers stack. */
+ struct cds_wfs_node stack;
+
+ /* Thread object of the current worker. */
+ pthread_t thread;
+
+ /* Unique identifier of this worker. */
+ int32_t id;
+
+ /* Indicates if this worker is enabled. */
+ bool running;
+};
+
+struct _gf_async_queue {
+ /* Structures needed to manage a wait-free queue. For better performance
+ * they are placed in two different cache lines, as recommended by URCU
+ * documentation, even though in our case some threads will be producers
+ * and consumers at the same time. */
+ struct cds_wfcq_head head __attribute__((aligned(64)));
+ struct cds_wfcq_tail tail __attribute__((aligned(64)));
+};
+
+#define GF_ASYNC_COUNTS(_run, _stop) (((uint32_t)(_run) << 16) + (_stop))
+#define GF_ASYNC_COUNT_RUNNING(_count) ((_count) >> 16)
+#define GF_ASYNC_COUNT_STOPPING(_count) ((_count)&65535)
+
+struct _gf_async_control {
+ gf_async_queue_t queue;
+
+ /* Stack of unused workers. */
+ struct __cds_wfs_stack available;
+
+ /* Array of preallocated worker structures. */
+ gf_async_worker_t *table;
+
+ /* Used to synchronize main thread with workers on termination. */
+ pthread_barrier_t sync;
+
+ /* The id of the last thread that will be used for synchronization. */
+ pthread_t sync_thread;
+
+ /* Signal mask to wait for control signals from leader. */
+ sigset_t sigmask_ctrl;
+
+ /* Signal mask to wait for queued items. */
+ sigset_t sigmask_queue;
+
+ /* Saved signal handlers. */
+ struct sigaction handler_ctrl;
+ struct sigaction handler_queue;
+
+ /* PID of the current process. */
+ pid_t pid;
+
+ /* Maximum number of allowed threads. */
+ uint32_t max_threads;
+
+ /* Current number of running and stopping workers. This value is split
+ * into 2 16-bits fields to track both counters atomically at the same
+ * time. */
+ uint32_t counts;
+
+ /* It's used to control whether the asynchronous infrastructure is used
+ * or not. */
+ bool enabled;
+};
+
+extern gf_async_control_t gf_async_ctrl;
+
+int32_t
+gf_async_init(glusterfs_ctx_t *ctx);
+
+void
+gf_async_fini(void);
+
+void
+gf_async_adjust_threads(int32_t threads);
+
+static inline void
+gf_async(gf_async_t *async, xlator_t *xl, gf_async_callback_f cbk)
+{
+ if (!gf_async_ctrl.enabled) {
+ cbk(xl, async);
+ return;
+ }
+
+ async->xl = xl;
+ async->cbk = cbk;
+ cds_wfcq_node_init(&async->queue);
+ if (caa_unlikely(!cds_wfcq_enqueue(&gf_async_ctrl.queue.head,
+ &gf_async_ctrl.queue.tail,
+ &async->queue))) {
+ /* The queue was empty, so the leader could be sleeping. We need to
+ * wake it so that the new item can be processed. If the queue was not
+ * empty, we don't need to do anything special since the leader will
+ * take care of it. */
+ if (caa_unlikely(kill(gf_async_ctrl.pid, GF_ASYNC_SIGQUEUE) < 0)) {
+ gf_async_fatal(errno, "Unable to wake leader worker.");
+ };
+ }
+}
+
+#endif /* !__GLUSTERFS_ASYNC_H__ */
diff --git a/libglusterfs/src/glusterfs/atomic.h b/libglusterfs/src/glusterfs/atomic.h
new file mode 100644
index 00000000000..ced81748218
--- /dev/null
+++ b/libglusterfs/src/glusterfs/atomic.h
@@ -0,0 +1,459 @@
+/*
+ Copyright (c) 2017 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _ATOMIC_H
+#define _ATOMIC_H
+
+#include <inttypes.h>
+#include <stdbool.h>
+
+#include "glusterfs/locking.h"
+
+/* Macros used to join two arguments and generate a new macro name. */
+#define GF_ATOMIC_MACRO_1(_macro) _macro
+#define GF_ATOMIC_MACRO(_base, _name) GF_ATOMIC_MACRO_1(_base##_name)
+
+/* There's a problem on 32-bit architectures when we try to use atomic
+ * builtins with 64-bit types. Only way to solve the problem is to use
+ * a mutex to protect the access to the atomic, but we don't want to
+ * use mutexes for other smaller types that could work with the atomic
+ * builtins.
+ *
+ * So on each atomic type we add a field for the mutex if atomic operation
+ * is not supported and a dummy zero size field if it's supported. This way
+ * we can have different atomic types, some with a mutex and some without.
+ *
+ * To define these types, we use two macros:
+ *
+ * GF_ATOMIC_MUTEX_FIELD_0 = char lk[0]
+ * GF_ATOMIC_MUTEX_FILED_1 = gf_lock_t lk
+ *
+ * Both macros define the 'lk' field that will be used in the atomic
+ * structure. One when the atomic is supported by the architecture and
+ * another when not. We need to define the field even if it won't be
+ * used. Otherwise the compiler will return an error.
+ *
+ * Now we need to take the mutex or not depending on the existence of
+ * the mutex field in the structure. To do so we check the size of the
+ * structure, and if it's bigger than uint64_t (all structures with a
+ * mutex will be bigger), we use the mutex-based version. Otherwise we
+ * use the atomic builtin. This check is easily optimized out by the
+ * compiler, leaving a clean and efficient compiled code. */
+
+#define GF_ATOMIC_MUTEX_FIELD_0 char lk[0]
+#define GF_ATOMIC_MUTEX_FIELD_1 gf_lock_t lk
+
+/* We'll use SIZEOF_LONG to determine the architecture. 32-bit machines
+ * will have 4 here, while 64-bit machines will have 8. If additional
+ * needs or restrictions appear on other platforms, these tests can be
+ * extended to handle them. */
+
+/* GF_ATOMIC_SIZE_X macros map each type size to one of the
+ * GF_ATOMIC_MUTEX_FIELD_X macros, depending on detected conditions. */
+
+#if defined(HAVE_ATOMIC_BUILTINS) || defined(HAVE_SYNC_BUILTINS)
+
+#define GF_ATOMIC_SIZE_1 GF_ATOMIC_MUTEX_FIELD_0
+#define GF_ATOMIC_SIZE_2 GF_ATOMIC_MUTEX_FIELD_0
+#define GF_ATOMIC_SIZE_4 GF_ATOMIC_MUTEX_FIELD_0
+
+#if SIZEOF_LONG >= 8
+#define GF_ATOMIC_SIZE_8 GF_ATOMIC_MUTEX_FIELD_0
+#endif
+
+#endif /* HAVE_(ATOMIC|SYNC)_BUILTINS */
+
+/* Any GF_ATOMIC_SIZE_X macro not yet defined will use the mutex version */
+#ifndef GF_ATOMIC_SIZE_1
+#define GF_ATOMIC_SIZE_1 GF_ATOMIC_MUTEX_FIELD_1
+#endif
+
+#ifndef GF_ATOMIC_SIZE_2
+#define GF_ATOMIC_SIZE_2 GF_ATOMIC_MUTEX_FIELD_1
+#endif
+
+#ifndef GF_ATOMIC_SIZE_4
+#define GF_ATOMIC_SIZE_4 GF_ATOMIC_MUTEX_FIELD_1
+#endif
+
+#ifndef GF_ATOMIC_SIZE_8
+#define GF_ATOMIC_SIZE_8 GF_ATOMIC_MUTEX_FIELD_1
+#endif
+
+/* This macro is used to define all atomic types supported. First field
+ * represents the size of the type in bytes, and the second one the name. */
+#define GF_ATOMIC_TYPE(_size, _name) \
+ typedef struct _gf_atomic_##_name##_t { \
+ GF_ATOMIC_MACRO(GF_ATOMIC_SIZE_, _size); \
+ _name##_t value; \
+ } gf_atomic_##_name##_t
+
+/* The atomic types we support */
+GF_ATOMIC_TYPE(1, int8); /* gf_atomic_int8_t */
+GF_ATOMIC_TYPE(2, int16); /* gf_atomic_int16_t */
+GF_ATOMIC_TYPE(4, int32); /* gf_atomic_int32_t */
+GF_ATOMIC_TYPE(8, int64); /* gf_atomic_int64_t */
+GF_ATOMIC_TYPE(SIZEOF_LONG, intptr); /* gf_atomic_intptr_t */
+GF_ATOMIC_TYPE(1, uint8); /* gf_atomic_uint8_t */
+GF_ATOMIC_TYPE(2, uint16); /* gf_atomic_uint16_t */
+GF_ATOMIC_TYPE(4, uint32); /* gf_atomic_uint32_t */
+GF_ATOMIC_TYPE(8, uint64); /* gf_atomic_uint64_t */
+GF_ATOMIC_TYPE(SIZEOF_LONG, uintptr); /* gf_atomic_uintptr_t */
+
+/* Define the default atomic type as int64_t */
+#define gf_atomic_t gf_atomic_int64_t
+
+/* This macro will choose between the mutex based version and the atomic
+ * builtin version depending on the size of the atomic structure. */
+#define GF_ATOMIC_CHOOSE(_atomic, _op, _args...) \
+ ((sizeof(_atomic) > sizeof(uint64_t)) \
+ ? ({ \
+ GF_ATOMIC_MACRO(GF_ATOMIC_LOCK_, _op) \
+ (_atomic, ##_args); \
+ }) \
+ : ({ \
+ GF_ATOMIC_MACRO(GF_ATOMIC_BASE_, _op) \
+ (_atomic, ##_args); \
+ }))
+
+/* Macros to implement the mutex-based atomics. */
+#define GF_ATOMIC_OP_PREPARE(_atomic, _name) \
+ typeof(_atomic) *__atomic = &(_atomic); \
+ gf_lock_t *__lock = (gf_lock_t *)&__atomic->lk; \
+ LOCK(__lock); \
+ typeof(__atomic->value) _name = __atomic->value
+
+#define GF_ATOMIC_OP_STORE(_value) (__atomic->value = (_value))
+
+#define GF_ATOMIC_OP_RETURN(_value) \
+ ({ \
+ UNLOCK(__lock); \
+ _value; \
+ })
+
+#define GF_ATOMIC_LOCK_INIT(_atomic, _value) \
+ do { \
+ typeof(_atomic) *__atomic = &(_atomic); \
+ LOCK_INIT((gf_lock_t *)&__atomic->lk); \
+ __atomic->value = (_value); \
+ } while (0)
+
+#define GF_ATOMIC_LOCK_GET(_atomic) \
+ ({ \
+ GF_ATOMIC_OP_PREPARE(_atomic, __value); \
+ GF_ATOMIC_OP_RETURN(__value); \
+ })
+
+#define GF_ATOMIC_LOCK_ADD(_atomic, _value) \
+ ({ \
+ GF_ATOMIC_OP_PREPARE(_atomic, __value); \
+ GF_ATOMIC_OP_STORE(__value += (_value)); \
+ GF_ATOMIC_OP_RETURN(__value); \
+ })
+
+#define GF_ATOMIC_LOCK_SUB(_atomic, _value) \
+ ({ \
+ GF_ATOMIC_OP_PREPARE(_atomic, __value); \
+ GF_ATOMIC_OP_STORE(__value -= (_value)); \
+ GF_ATOMIC_OP_RETURN(__value); \
+ })
+
+#define GF_ATOMIC_LOCK_AND(_atomic, _value) \
+ ({ \
+ GF_ATOMIC_OP_PREPARE(_atomic, __value); \
+ GF_ATOMIC_OP_STORE(__value &= (_value)); \
+ GF_ATOMIC_OP_RETURN(__value); \
+ })
+
+#define GF_ATOMIC_LOCK_OR(_atomic, _value) \
+ ({ \
+ GF_ATOMIC_OP_PREPARE(_atomic, __value); \
+ GF_ATOMIC_OP_STORE(__value |= (_value)); \
+ GF_ATOMIC_OP_RETURN(__value); \
+ })
+
+#define GF_ATOMIC_LOCK_XOR(_atomic, _value) \
+ ({ \
+ GF_ATOMIC_OP_PREPARE(_atomic, __value); \
+ GF_ATOMIC_OP_STORE(__value ^= (_value)); \
+ GF_ATOMIC_OP_RETURN(__value); \
+ })
+
+#define GF_ATOMIC_LOCK_NAND(_atomic, _value) \
+ ({ \
+ GF_ATOMIC_OP_PREPARE(_atomic, __value); \
+ GF_ATOMIC_OP_STORE(__value = ~(__value & (_value))); \
+ GF_ATOMIC_OP_RETURN(__value); \
+ })
+
+#define GF_ATOMIC_LOCK_FETCH_ADD(_atomic, _value) \
+ ({ \
+ GF_ATOMIC_OP_PREPARE(_atomic, __value); \
+ GF_ATOMIC_OP_STORE(__value + (_value)); \
+ GF_ATOMIC_OP_RETURN(__value); \
+ })
+
+#define GF_ATOMIC_LOCK_FETCH_SUB(_atomic, _value) \
+ ({ \
+ GF_ATOMIC_OP_PREPARE(_atomic, __value); \
+ GF_ATOMIC_OP_STORE(__value - (_value)); \
+ GF_ATOMIC_OP_RETURN(__value); \
+ })
+
+#define GF_ATOMIC_LOCK_FETCH_AND(_atomic, _value) \
+ ({ \
+ GF_ATOMIC_OP_PREPARE(_atomic, __value); \
+ GF_ATOMIC_OP_STORE(__value &(_value)); \
+ GF_ATOMIC_OP_RETURN(__value); \
+ })
+
+#define GF_ATOMIC_LOCK_FETCH_OR(_atomic, _value) \
+ ({ \
+ GF_ATOMIC_OP_PREPARE(_atomic, __value); \
+ GF_ATOMIC_OP_STORE(__value | (_value)); \
+ GF_ATOMIC_OP_RETURN(__value); \
+ })
+
+#define GF_ATOMIC_LOCK_FETCH_XOR(_atomic, _value) \
+ ({ \
+ GF_ATOMIC_OP_PREPARE(_atomic, __value); \
+ GF_ATOMIC_OP_STORE(__value ^ (_value)); \
+ GF_ATOMIC_OP_RETURN(__value); \
+ })
+
+#define GF_ATOMIC_LOCK_FETCH_NAND(_atomic, _value) \
+ ({ \
+ GF_ATOMIC_OP_PREPARE(_atomic, __value); \
+ GF_ATOMIC_OP_STORE(~(__value & (_value))); \
+ GF_ATOMIC_OP_RETURN(__value); \
+ })
+
+#define GF_ATOMIC_LOCK_SWAP(_atomic, _value) \
+ ({ \
+ GF_ATOMIC_OP_PREPARE(_atomic, __value); \
+ GF_ATOMIC_OP_STORE(_value); \
+ GF_ATOMIC_OP_RETURN(__value); \
+ })
+
+#define GF_ATOMIC_LOCK_CMP_SWAP(_atomic, _expected, _value) \
+ ({ \
+ GF_ATOMIC_OP_PREPARE(_atomic, __value); \
+ bool __ret = (__value == (_expected)); \
+ if (__ret) { \
+ GF_ATOMIC_OP_STORE(_value); \
+ } \
+ GF_ATOMIC_OP_RETURN(__ret); \
+ })
+
+#if defined(HAVE_ATOMIC_BUILTINS)
+
+/* If compiler supports __atomic builtins, we use them. */
+
+#define GF_ATOMIC_BASE_INIT(_atomic, _value) \
+ __atomic_store_n(&(_atomic).value, (_value), __ATOMIC_RELEASE)
+
+#define GF_ATOMIC_BASE_GET(_atomic) \
+ __atomic_load_n(&(_atomic).value, __ATOMIC_ACQUIRE)
+
+#define GF_ATOMIC_BASE_ADD(_atomic, _value) \
+ __atomic_add_fetch(&(_atomic).value, (_value), __ATOMIC_ACQ_REL)
+
+#define GF_ATOMIC_BASE_SUB(_atomic, _value) \
+ __atomic_sub_fetch(&(_atomic).value, (_value), __ATOMIC_ACQ_REL)
+
+#define GF_ATOMIC_BASE_AND(_atomic, _value) \
+ __atomic_and_fetch(&(_atomic).value, (_value), __ATOMIC_ACQ_REL)
+
+#define GF_ATOMIC_BASE_OR(_atomic, _value) \
+ __atomic_or_fetch(&(_atomic).value, (_value), __ATOMIC_ACQ_REL)
+
+#define GF_ATOMIC_BASE_XOR(_atomic, _value) \
+ __atomic_xor_fetch(&(_atomic).value, (_value), __ATOMIC_ACQ_REL)
+
+#define GF_ATOMIC_BASE_NAND(_atomic, _value) \
+ __atomic_nand_fetch(&(_atomic).value, (_value), __ATOMIC_ACQ_REL)
+
+#define GF_ATOMIC_BASE_FETCH_ADD(_atomic, _value) \
+ __atomic_fetch_add(&(_atomic).value, (_value), __ATOMIC_ACQ_REL)
+
+#define GF_ATOMIC_BASE_FETCH_SUB(_atomic, _value) \
+ __atomic_fetch_sub(&(_atomic).value, (_value), __ATOMIC_ACQ_REL)
+
+#define GF_ATOMIC_BASE_FETCH_AND(_atomic, _value) \
+ __atomic_fetch_and(&(_atomic).value, (_value), __ATOMIC_ACQ_REL)
+
+#define GF_ATOMIC_BASE_FETCH_OR(_atomic, _value) \
+ __atomic_fetch_or(&(_atomic).value, (_value), __ATOMIC_ACQ_REL)
+
+#define GF_ATOMIC_BASE_FETCH_XOR(_atomic, _value) \
+ __atomic_fetch_xor(&(_atomic).value, (_value), __ATOMIC_ACQ_REL)
+
+#define GF_ATOMIC_BASE_FETCH_NAND(_atomic, _value) \
+ __atomic_fetch_nand(&(_atomic).value, (_value), __ATOMIC_ACQ_REL)
+
+#define GF_ATOMIC_BASE_SWAP(_atomic, _value) \
+ __atomic_exchange_n(&(_atomic).value, (_value), __ATOMIC_ACQ_REL)
+
+#define GF_ATOMIC_BASE_CMP_SWAP(_atomic, _expected, _value) \
+ ({ \
+ typeof((_atomic).value) __expected = (_expected); \
+ __atomic_compare_exchange_n(&(_atomic).value, &__expected, (_value), \
+ 0, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE); \
+ })
+
+#elif defined(HAVE_SYNC_BUILTINS)
+
+/* If compiler doesn't support __atomic builtins but supports __sync builtins,
+ * we use them. */
+
+#define GF_ATOMIC_BASE_INIT(_atomic, _value) \
+ do { \
+ (_atomic).value = (_value); \
+ __sync_synchronize(); \
+ } while (0)
+
+#define GF_ATOMIC_BASE_ADD(_atomic, _value) \
+ __sync_add_and_fetch(&(_atomic).value, (_value))
+
+#define GF_ATOMIC_BASE_SUB(_atomic, _value) \
+ __sync_sub_and_fetch(&(_atomic).value, (_value))
+
+#define GF_ATOMIC_BASE_AND(_atomic, _value) \
+ __sync_and_and_fetch(&(_atomic).value, (_value))
+
+#define GF_ATOMIC_BASE_OR(_atomic, _value) \
+ __sync_or_and_fetch(&(_atomic).value, (_value))
+
+#define GF_ATOMIC_BASE_XOR(_atomic, _value) \
+ __sync_xor_and_fetch(&(_atomic).value, (_value))
+
+#define GF_ATOMIC_BASE_NAND(_atomic, _value) \
+ __sync_nand_and_fetch(&(_atomic).value, (_value))
+
+#define GF_ATOMIC_BASE_FETCH_ADD(_atomic, _value) \
+ __sync_fetch_and_add(&(_atomic).value, (_value))
+
+#define GF_ATOMIC_BASE_FETCH_SUB(_atomic, _value) \
+ __sync_fetch_and_sub(&(_atomic).value, (_value))
+
+#define GF_ATOMIC_BASE_FETCH_AND(_atomic, _value) \
+ __sync_fetch_and_and(&(_atomic).value, (_value))
+
+#define GF_ATOMIC_BASE_FETCH_OR(_atomic, _value) \
+ __sync_fetch_and_or(&(_atomic).value, (_value))
+
+#define GF_ATOMIC_BASE_FETCH_XOR(_atomic, _value) \
+ __sync_fetch_and_xor(&(_atomic).value, (_value))
+
+#define GF_ATOMIC_BASE_FETCH_NAND(_atomic, _value) \
+ __sync_fetch_and_nand(&(_atomic).value, (_value))
+
+#define GF_ATOMIC_BASE_SWAP(_atomic, _value) \
+ ({ \
+ __sync_synchronize(); \
+ __sync_lock_test_and_set(&(_atomic).value, (_value)); \
+ })
+
+#define GF_ATOMIC_BASE_CMP_SWAP(_atomic, _expected, _value) \
+ __sync_bool_compare_and_swap(&(_atomic).value, (_expected), (_value))
+
+#define GF_ATOMIC_BASE_GET(_atomic) GF_ATOMIC_BASE_ADD(_atomic, 0)
+
+#else /* !HAVE_ATOMIC_BUILTINS && !HAVE_SYNC_BUILTINS */
+
+/* The compiler doesn't support any atomic builtin. We fallback to the
+ * mutex-based implementation. */
+
+#define GF_ATOMIC_BASE_INIT(_atomic, _value) \
+ GF_ATOMIC_LOCK_INIT(_atomic, _value)
+
+#define GF_ATOMIC_BASE_GET(_atomic) GF_ATOMIC_LOCK_GET(_atomic)
+
+#define GF_ATOMIC_BASE_ADD(_atomic, _value) GF_ATOMIC_LOCK_ADD(_atomic, _value)
+
+#define GF_ATOMIC_BASE_SUB(_atomic, _value) GF_ATOMIC_LOCK_SUB(_atomic, _value)
+
+#define GF_ATOMIC_BASE_AND(_atomic, _value) GF_ATOMIC_LOCK_AND(_atomic, _value)
+
+#define GF_ATOMIC_BASE_OR(_atomic, _value) GF_ATOMIC_LOCK_OR(_atomic, _value)
+
+#define GF_ATOMIC_BASE_XOR(_atomic, _value) GF_ATOMIC_LOCK_XOR(_atomic, _value)
+
+#define GF_ATOMIC_BASE_NAND(_atomic, _value) \
+ GF_ATOMIC_LOCK_NAND(_atomic, _value)
+
+#define GF_ATOMIC_BASE_FETCH_ADD(_atomic, _value) \
+ GF_ATOMIC_LOCK_FETCH_ADD(_atomic, _value)
+
+#define GF_ATOMIC_BASE_FETCH_SUB(_atomic, _value) \
+ GF_ATOMIC_LOCK_FETCH_SUB(_atomic, _value)
+
+#define GF_ATOMIC_BASE_FETCH_AND(_atomic, _value) \
+ GF_ATOMIC_LOCK_FETCH_AND(_atomic, _value)
+
+#define GF_ATOMIC_BASE_FETCH_OR(_atomic, _value) \
+ GF_ATOMIC_LOCK_FETCH_OR(_atomic, _value)
+
+#define GF_ATOMIC_BASE_FETCH_XOR(_atomic, _value) \
+ GF_ATOMIC_LOCK_FETCH_XOR(_atomic, _value)
+
+#define GF_ATOMIC_BASE_FETCH_NAND(_atomic, _value) \
+ GF_ATOMIC_LOCK_FETCH_NAND(_atomic, _value)
+
+#define GF_ATOMIC_BASE_SWAP(_atomic, _value) \
+ GF_ATOMIC_LOCK_SWAP(_atomic, _value)
+
+#define GF_ATOMIC_BASE_CMP_SWAP(_atomic, _expected, _value) \
+ GF_ATOMIC_LOCK_CMP_SWAP(_atomic, _expected, _value)
+
+#endif /* HAVE_(ATOMIC|SYNC)_BUILTINS */
+
+/* Here we declare the real atomic macros available to the user. */
+
+/* All macros have a 'gf_atomic_xxx' as 1st argument */
+
+#define GF_ATOMIC_INIT(_atomic, _value) GF_ATOMIC_CHOOSE(_atomic, INIT, _value)
+#define GF_ATOMIC_GET(_atomic) GF_ATOMIC_CHOOSE(_atomic, GET)
+#define GF_ATOMIC_ADD(_atomic, _value) GF_ATOMIC_CHOOSE(_atomic, ADD, _value)
+#define GF_ATOMIC_SUB(_atomic, _value) GF_ATOMIC_CHOOSE(_atomic, SUB, _value)
+#define GF_ATOMIC_AND(_atomic, _value) GF_ATOMIC_CHOOSE(_atomic, AND, _value)
+#define GF_ATOMIC_OR(_atomic, _value) GF_ATOMIC_CHOOSE(_atomic, OR, _value)
+#define GF_ATOMIC_XOR(_atomic, _value) GF_ATOMIC_CHOOSE(_atomic, XOR, _value)
+#define GF_ATOMIC_NAND(_atomic, _value) GF_ATOMIC_CHOOSE(_atomic, NAND, _value)
+
+#define GF_ATOMIC_FETCH_ADD(_atomic, _value) \
+ GF_ATOMIC_CHOOSE(_atomic, FETCH_ADD, _value)
+
+#define GF_ATOMIC_FETCH_SUB(_atomic, _value) \
+ GF_ATOMIC_CHOOSE(_atomic, FETCH_SUB, _value)
+
+#define GF_ATOMIC_FETCH_AND(_atomic, _value) \
+ GF_ATOMIC_CHOOSE(_atomic, FETCH_AND, _value)
+
+#define GF_ATOMIC_FETCH_OR(_atomic, _value) \
+ GF_ATOMIC_CHOOSE(_atomic, FETCH_OR, _value)
+
+#define GF_ATOMIC_FETCH_XOR(_atomic, _value) \
+ GF_ATOMIC_CHOOSE(_atomic, FETCH_XOR, _value)
+
+#define GF_ATOMIC_FETCH_NAND(_atomic, _value) \
+ GF_ATOMIC_CHOOSE(_atomic, FETCH_NAND, _value)
+
+#define GF_ATOMIC_SWAP(_atomic, _value) GF_ATOMIC_CHOOSE(_atomic, SWAP, _value)
+
+#define GF_ATOMIC_CMP_SWAP(_atomic, _expected, _value) \
+ GF_ATOMIC_CHOOSE(_atomic, CMP_SWAP, _expected, _value)
+
+#define GF_ATOMIC_INC(_atomic) GF_ATOMIC_ADD(_atomic, 1)
+#define GF_ATOMIC_DEC(_atomic) GF_ATOMIC_SUB(_atomic, 1)
+#define GF_ATOMIC_FETCH_INC(_atomic) GF_ATOMIC_FETCH_ADD(_atomic, 1)
+#define GF_ATOMIC_FETCH_DEC(_atomic) GF_ATOMIC_FETCH_SUB(_atomic, 1)
+
+#endif /* _ATOMIC_H */
diff --git a/libglusterfs/src/glusterfs/byte-order.h b/libglusterfs/src/glusterfs/byte-order.h
new file mode 100644
index 00000000000..fd8cef9e58d
--- /dev/null
+++ b/libglusterfs/src/glusterfs/byte-order.h
@@ -0,0 +1,279 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _BYTE_ORDER_H
+#define _BYTE_ORDER_H
+
+#include <inttypes.h>
+
+#define LS1 0x00ffU
+#define MS1 0xff00U
+#define LS2 0x0000ffffU
+#define MS2 0xffff0000U
+#define LS4 0x00000000ffffffffULL
+#define MS4 0xffffffff00000000ULL
+
+static uint16_t (*hton16)(uint16_t);
+static uint32_t (*hton32)(uint32_t);
+static uint64_t (*hton64)(uint64_t);
+
+#define ntoh16 hton16
+#define ntoh32 hton32
+#define ntoh64 hton64
+
+static uint16_t (*htole16)(uint16_t);
+static uint32_t (*htole32)(uint32_t);
+static uint64_t (*htole64)(uint64_t);
+
+#define letoh16 htole16
+#define letoh32 htole32
+#define letoh64 htole64
+
+static uint16_t (*htobe16)(uint16_t);
+static uint32_t (*htobe32)(uint32_t);
+static uint64_t (*htobe64)(uint64_t);
+
+#define betoh16 htobe16
+#define betoh32 htobe32
+#define betoh64 htobe64
+
+#define do_swap2(x) (((x & LS1) << 8) | (((x & MS1) >> 8)))
+#define do_swap4(x) ((do_swap2(x & LS2) << 16) | (do_swap2((x & MS2) >> 16)))
+#define do_swap8(x) ((do_swap4(x & LS4) << 32) | (do_swap4((x & MS4) >> 32)))
+
+static inline uint16_t
+__swap16(uint16_t x)
+{
+ return do_swap2(x);
+}
+
+static inline uint32_t
+__swap32(uint32_t x)
+{
+ return do_swap4(x);
+}
+
+static inline uint64_t
+__swap64(uint64_t x)
+{
+ return do_swap8(x);
+}
+
+static inline uint16_t
+__noswap16(uint16_t x)
+{
+ return x;
+}
+
+static inline uint32_t
+__noswap32(uint32_t x)
+{
+ return x;
+}
+
+static inline uint64_t
+__noswap64(uint64_t x)
+{
+ return x;
+}
+
+static inline uint16_t
+__byte_order_n16(uint16_t i)
+{
+ uint32_t num = 1;
+
+ if (((char *)(&num))[0] == 1) {
+ /* cpu is le */
+ hton16 = __swap16;
+ hton32 = __swap32;
+ hton64 = __swap64;
+ } else {
+ /* cpu is be */
+ hton16 = __noswap16;
+ hton32 = __noswap32;
+ hton64 = __noswap64;
+ }
+
+ return hton16(i);
+}
+
+static inline uint32_t
+__byte_order_n32(uint32_t i)
+{
+ uint32_t num = 1;
+
+ if (((char *)(&num))[0] == 1) {
+ /* cpu is le */
+ hton16 = __swap16;
+ hton32 = __swap32;
+ hton64 = __swap64;
+ } else {
+ /* cpu is be */
+ hton16 = __noswap16;
+ hton32 = __noswap32;
+ hton64 = __noswap64;
+ }
+
+ return hton32(i);
+}
+
+static inline uint64_t
+__byte_order_n64(uint64_t i)
+{
+ uint32_t num = 1;
+
+ if (((char *)(&num))[0] == 1) {
+ /* cpu is le */
+ hton16 = __swap16;
+ hton32 = __swap32;
+ hton64 = __swap64;
+ } else {
+ /* cpu is be */
+ hton16 = __noswap16;
+ hton32 = __noswap32;
+ hton64 = __noswap64;
+ }
+
+ return hton64(i);
+}
+
+static uint16_t (*hton16)(uint16_t) = __byte_order_n16;
+static uint32_t (*hton32)(uint32_t) = __byte_order_n32;
+static uint64_t (*hton64)(uint64_t) = __byte_order_n64;
+
+static inline uint16_t
+__byte_order_le16(uint16_t i)
+{
+ uint32_t num = 1;
+
+ if (((char *)(&num))[0] == 1) {
+ /* cpu is le */
+ htole16 = __noswap16;
+ htole32 = __noswap32;
+ htole64 = __noswap64;
+ } else {
+ /* cpu is be */
+ htole16 = __swap16;
+ htole32 = __swap32;
+ htole64 = __swap64;
+ }
+
+ return htole16(i);
+}
+
+static inline uint32_t
+__byte_order_le32(uint32_t i)
+{
+ uint32_t num = 1;
+
+ if (((char *)(&num))[0] == 1) {
+ /* cpu is le */
+ htole16 = __noswap16;
+ htole32 = __noswap32;
+ htole64 = __noswap64;
+ } else {
+ /* cpu is be */
+ htole16 = __swap16;
+ htole32 = __swap32;
+ htole64 = __swap64;
+ }
+
+ return htole32(i);
+}
+
+static inline uint64_t
+__byte_order_le64(uint64_t i)
+{
+ uint32_t num = 1;
+
+ if (((char *)(&num))[0] == 1) {
+ /* cpu is le */
+ htole16 = __noswap16;
+ htole32 = __noswap32;
+ htole64 = __noswap64;
+ } else {
+ /* cpu is be */
+ htole16 = __swap16;
+ htole32 = __swap32;
+ htole64 = __swap64;
+ }
+
+ return htole64(i);
+}
+
+static uint16_t (*htole16)(uint16_t) = __byte_order_le16;
+static uint32_t (*htole32)(uint32_t) = __byte_order_le32;
+static uint64_t (*htole64)(uint64_t) = __byte_order_le64;
+
+static inline uint16_t
+__byte_order_be16(uint16_t i)
+{
+ uint32_t num = 1;
+
+ if (((char *)(&num))[0] == 1) {
+ /* cpu is le */
+ htobe16 = __swap16;
+ htobe32 = __swap32;
+ htobe64 = __swap64;
+ } else {
+ /* cpu is be */
+ htobe16 = __noswap16;
+ htobe32 = __noswap32;
+ htobe64 = __noswap64;
+ }
+
+ return htobe16(i);
+}
+
+static inline uint32_t
+__byte_order_be32(uint32_t i)
+{
+ uint32_t num = 1;
+
+ if (((char *)(&num))[0] == 1) {
+ /* cpu is le */
+ htobe16 = __swap16;
+ htobe32 = __swap32;
+ htobe64 = __swap64;
+ } else {
+ /* cpu is be */
+ htobe16 = __noswap16;
+ htobe32 = __noswap32;
+ htobe64 = __noswap64;
+ }
+
+ return htobe32(i);
+}
+
+static inline uint64_t
+__byte_order_be64(uint64_t i)
+{
+ uint32_t num = 1;
+
+ if (((char *)(&num))[0] == 1) {
+ /* cpu is le */
+ htobe16 = __swap16;
+ htobe32 = __swap32;
+ htobe64 = __swap64;
+ } else {
+ /* cpu is be */
+ htobe16 = __noswap16;
+ htobe32 = __noswap32;
+ htobe64 = __noswap64;
+ }
+
+ return htobe64(i);
+}
+
+static uint16_t (*htobe16)(uint16_t) = __byte_order_be16;
+static uint32_t (*htobe32)(uint32_t) = __byte_order_be32;
+static uint64_t (*htobe64)(uint64_t) = __byte_order_be64;
+
+#endif /* _BYTE_ORDER_H */
diff --git a/libglusterfs/src/glusterfs/call-stub.h b/libglusterfs/src/glusterfs/call-stub.h
new file mode 100644
index 00000000000..8237ea459bf
--- /dev/null
+++ b/libglusterfs/src/glusterfs/call-stub.h
@@ -0,0 +1,622 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CALL_STUB_H_
+#define _CALL_STUB_H_
+
+#include "glusterfs/xlator.h"
+#include "glusterfs/defaults.h"
+#include "glusterfs/default-args.h"
+#include "glusterfs/stack.h"
+#include "glusterfs/list.h"
+
+typedef struct _call_stub {
+ struct list_head list;
+ call_frame_t *frame;
+ struct mem_pool *stub_mem_pool; /* pointer to stub mempool in ctx_t */
+ uint32_t jnl_meta_len;
+ uint32_t jnl_data_len;
+ void (*serialize)(struct _call_stub *, char *, char *);
+ union {
+ fop_lookup_t lookup;
+ fop_stat_t stat;
+ fop_fstat_t fstat;
+ fop_truncate_t truncate;
+ fop_ftruncate_t ftruncate;
+ fop_access_t access;
+ fop_readlink_t readlink;
+ fop_mknod_t mknod;
+ fop_mkdir_t mkdir;
+ fop_unlink_t unlink;
+ fop_rmdir_t rmdir;
+ fop_symlink_t symlink;
+ fop_rename_t rename;
+ fop_link_t link;
+ fop_create_t create;
+ fop_open_t open;
+ fop_readv_t readv;
+ fop_writev_t writev;
+ fop_flush_t flush;
+ fop_fsync_t fsync;
+ fop_opendir_t opendir;
+ fop_fsyncdir_t fsyncdir;
+ fop_statfs_t statfs;
+ fop_setxattr_t setxattr;
+ fop_getxattr_t getxattr;
+ fop_fgetxattr_t fgetxattr;
+ fop_fsetxattr_t fsetxattr;
+ fop_removexattr_t removexattr;
+ fop_fremovexattr_t fremovexattr;
+ fop_lk_t lk;
+ fop_inodelk_t inodelk;
+ fop_finodelk_t finodelk;
+ fop_entrylk_t entrylk;
+ fop_fentrylk_t fentrylk;
+ fop_readdir_t readdir;
+ fop_readdirp_t readdirp;
+ fop_rchecksum_t rchecksum;
+ fop_xattrop_t xattrop;
+ fop_fxattrop_t fxattrop;
+ fop_setattr_t setattr;
+ fop_fsetattr_t fsetattr;
+ fop_fallocate_t fallocate;
+ fop_discard_t discard;
+ fop_zerofill_t zerofill;
+ fop_ipc_t ipc;
+ fop_seek_t seek;
+ fop_lease_t lease;
+ fop_getactivelk_t getactivelk;
+ fop_setactivelk_t setactivelk;
+ fop_put_t put;
+ fop_icreate_t icreate;
+ fop_namelink_t namelink;
+ fop_copy_file_range_t copy_file_range;
+ } fn;
+
+ union {
+ fop_lookup_cbk_t lookup;
+ fop_stat_cbk_t stat;
+ fop_fstat_cbk_t fstat;
+ fop_truncate_cbk_t truncate;
+ fop_ftruncate_cbk_t ftruncate;
+ fop_access_cbk_t access;
+ fop_readlink_cbk_t readlink;
+ fop_mknod_cbk_t mknod;
+ fop_mkdir_cbk_t mkdir;
+ fop_unlink_cbk_t unlink;
+ fop_rmdir_cbk_t rmdir;
+ fop_symlink_cbk_t symlink;
+ fop_rename_cbk_t rename;
+ fop_link_cbk_t link;
+ fop_create_cbk_t create;
+ fop_open_cbk_t open;
+ fop_readv_cbk_t readv;
+ fop_writev_cbk_t writev;
+ fop_flush_cbk_t flush;
+ fop_fsync_cbk_t fsync;
+ fop_opendir_cbk_t opendir;
+ fop_fsyncdir_cbk_t fsyncdir;
+ fop_statfs_cbk_t statfs;
+ fop_setxattr_cbk_t setxattr;
+ fop_getxattr_cbk_t getxattr;
+ fop_fgetxattr_cbk_t fgetxattr;
+ fop_fsetxattr_cbk_t fsetxattr;
+ fop_removexattr_cbk_t removexattr;
+ fop_fremovexattr_cbk_t fremovexattr;
+ fop_lk_cbk_t lk;
+ fop_inodelk_cbk_t inodelk;
+ fop_finodelk_cbk_t finodelk;
+ fop_entrylk_cbk_t entrylk;
+ fop_fentrylk_cbk_t fentrylk;
+ fop_readdir_cbk_t readdir;
+ fop_readdirp_cbk_t readdirp;
+ fop_rchecksum_cbk_t rchecksum;
+ fop_xattrop_cbk_t xattrop;
+ fop_fxattrop_cbk_t fxattrop;
+ fop_setattr_cbk_t setattr;
+ fop_fsetattr_cbk_t fsetattr;
+ fop_fallocate_cbk_t fallocate;
+ fop_discard_cbk_t discard;
+ fop_zerofill_cbk_t zerofill;
+ fop_ipc_cbk_t ipc;
+ fop_seek_cbk_t seek;
+ fop_lease_cbk_t lease;
+ fop_getactivelk_cbk_t getactivelk;
+ fop_setactivelk_cbk_t setactivelk;
+ fop_put_cbk_t put;
+ fop_icreate_cbk_t icreate;
+ fop_namelink_cbk_t namelink;
+ fop_copy_file_range_cbk_t copy_file_range;
+ } fn_cbk;
+ glusterfs_fop_t fop;
+ gf_boolean_t poison;
+ char wind;
+ default_args_t args;
+ default_args_cbk_t args_cbk;
+} call_stub_t;
+
+call_stub_t *
+fop_lookup_stub(call_frame_t *frame, fop_lookup_t fn, loc_t *loc,
+ dict_t *xdata);
+
+call_stub_t *
+fop_lookup_cbk_stub(call_frame_t *frame, fop_lookup_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ dict_t *xdata, struct iatt *postparent);
+call_stub_t *
+fop_stat_stub(call_frame_t *frame, fop_stat_t fn, loc_t *loc, dict_t *xdata);
+call_stub_t *
+fop_stat_cbk_stub(call_frame_t *frame, fop_stat_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf, dict_t *xdata);
+call_stub_t *
+fop_fstat_stub(call_frame_t *frame, fop_fstat_t fn, fd_t *fd, dict_t *xdata);
+call_stub_t *
+fop_fstat_cbk_stub(call_frame_t *frame, fop_fstat_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf, dict_t *xdata);
+
+call_stub_t *
+fop_truncate_stub(call_frame_t *frame, fop_truncate_t fn, loc_t *loc, off_t off,
+ dict_t *xdata);
+
+call_stub_t *
+fop_truncate_cbk_stub(call_frame_t *frame, fop_truncate_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
+
+call_stub_t *
+fop_ftruncate_stub(call_frame_t *frame, fop_ftruncate_t fn, fd_t *fd, off_t off,
+ dict_t *xdata);
+
+call_stub_t *
+fop_ftruncate_cbk_stub(call_frame_t *frame, fop_ftruncate_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
+
+call_stub_t *
+fop_access_stub(call_frame_t *frame, fop_access_t fn, loc_t *loc, int32_t mask,
+ dict_t *xdata);
+
+call_stub_t *
+fop_access_cbk_stub(call_frame_t *frame, fop_access_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+call_stub_t *
+fop_readlink_stub(call_frame_t *frame, fop_readlink_t fn, loc_t *loc,
+ size_t size, dict_t *xdata);
+
+call_stub_t *
+fop_readlink_cbk_stub(call_frame_t *frame, fop_readlink_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, const char *path,
+ struct iatt *buf, dict_t *xdata);
+
+call_stub_t *
+fop_mknod_stub(call_frame_t *frame, fop_mknod_t fn, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata);
+
+call_stub_t *
+fop_mknod_cbk_stub(call_frame_t *frame, fop_mknod_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata);
+
+call_stub_t *
+fop_mkdir_stub(call_frame_t *frame, fop_mkdir_t fn, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata);
+
+call_stub_t *
+fop_mkdir_cbk_stub(call_frame_t *frame, fop_mkdir_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata);
+
+call_stub_t *
+fop_unlink_stub(call_frame_t *frame, fop_unlink_t fn, loc_t *loc, int xflag,
+ dict_t *xdata);
+
+call_stub_t *
+fop_unlink_cbk_stub(call_frame_t *frame, fop_unlink_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+call_stub_t *
+fop_rmdir_stub(call_frame_t *frame, fop_rmdir_t fn, loc_t *loc, int flags,
+ dict_t *xdata);
+
+call_stub_t *
+fop_rmdir_cbk_stub(call_frame_t *frame, fop_rmdir_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+call_stub_t *
+fop_symlink_stub(call_frame_t *frame, fop_symlink_t fn, const char *linkname,
+ loc_t *loc, mode_t umask, dict_t *xdata);
+
+call_stub_t *
+fop_symlink_cbk_stub(call_frame_t *frame, fop_symlink_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata);
+
+call_stub_t *
+fop_rename_stub(call_frame_t *frame, fop_rename_t fn, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata);
+
+call_stub_t *
+fop_rename_cbk_stub(call_frame_t *frame, fop_rename_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata);
+
+call_stub_t *
+fop_link_stub(call_frame_t *frame, fop_link_t fn, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata);
+
+call_stub_t *
+fop_link_cbk_stub(call_frame_t *frame, fop_link_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata);
+
+call_stub_t *
+fop_create_stub(call_frame_t *frame, fop_create_t fn, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata);
+
+call_stub_t *
+fop_create_cbk_stub(call_frame_t *frame, fop_create_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+call_stub_t *
+fop_open_stub(call_frame_t *frame, fop_open_t fn, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata);
+
+call_stub_t *
+fop_open_cbk_stub(call_frame_t *frame, fop_open_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, fd_t *fd, dict_t *xdata);
+
+call_stub_t *
+fop_readv_stub(call_frame_t *frame, fop_readv_t fn, fd_t *fd, size_t size,
+ off_t off, uint32_t flags, dict_t *xdata);
+
+call_stub_t *
+fop_readv_cbk_stub(call_frame_t *frame, fop_readv_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, struct iovec *vector, int32_t count,
+ struct iatt *stbuf, struct iobref *iobref, dict_t *xdata);
+
+call_stub_t *
+fop_writev_stub(call_frame_t *frame, fop_writev_t fn, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t off, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata);
+
+call_stub_t *
+fop_writev_cbk_stub(call_frame_t *frame, fop_writev_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata);
+
+call_stub_t *
+fop_flush_stub(call_frame_t *frame, fop_flush_t fn, fd_t *fd, dict_t *xdata);
+
+call_stub_t *
+fop_flush_cbk_stub(call_frame_t *frame, fop_flush_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+call_stub_t *
+fop_fsync_stub(call_frame_t *frame, fop_fsync_t fn, fd_t *fd, int32_t datasync,
+ dict_t *xdata);
+
+call_stub_t *
+fop_fsync_cbk_stub(call_frame_t *frame, fop_fsync_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata);
+
+call_stub_t *
+fop_opendir_stub(call_frame_t *frame, fop_opendir_t fn, loc_t *loc, fd_t *fd,
+ dict_t *xdata);
+
+call_stub_t *
+fop_opendir_cbk_stub(call_frame_t *frame, fop_opendir_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, fd_t *fd, dict_t *xdata);
+
+call_stub_t *
+fop_fsyncdir_stub(call_frame_t *frame, fop_fsyncdir_t fn, fd_t *fd,
+ int32_t datasync, dict_t *xdata);
+
+call_stub_t *
+fop_fsyncdir_cbk_stub(call_frame_t *frame, fop_fsyncdir_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+call_stub_t *
+fop_statfs_stub(call_frame_t *frame, fop_statfs_t fn, loc_t *loc,
+ dict_t *xdata);
+
+call_stub_t *
+fop_statfs_cbk_stub(call_frame_t *frame, fop_statfs_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, struct statvfs *buf, dict_t *xdata);
+
+call_stub_t *
+fop_setxattr_stub(call_frame_t *frame, fop_setxattr_t fn, loc_t *loc,
+ dict_t *dict, int32_t flags, dict_t *xdata);
+
+call_stub_t *
+fop_setxattr_cbk_stub(call_frame_t *frame, fop_setxattr_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+call_stub_t *
+fop_getxattr_stub(call_frame_t *frame, fop_getxattr_t fn, loc_t *loc,
+ const char *name, dict_t *xdata);
+
+call_stub_t *
+fop_getxattr_cbk_stub(call_frame_t *frame, fop_getxattr_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *value,
+ dict_t *xdata);
+
+call_stub_t *
+fop_fsetxattr_stub(call_frame_t *frame, fop_fsetxattr_t fn, fd_t *fd,
+ dict_t *dict, int32_t flags, dict_t *xdata);
+
+call_stub_t *
+fop_fsetxattr_cbk_stub(call_frame_t *frame, fop_fsetxattr_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+call_stub_t *
+fop_fgetxattr_stub(call_frame_t *frame, fop_fgetxattr_t fn, fd_t *fd,
+ const char *name, dict_t *xdata);
+
+call_stub_t *
+fop_fgetxattr_cbk_stub(call_frame_t *frame, fop_fgetxattr_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *value,
+ dict_t *xdata);
+
+call_stub_t *
+fop_removexattr_stub(call_frame_t *frame, fop_removexattr_t fn, loc_t *loc,
+ const char *name, dict_t *xdata);
+
+call_stub_t *
+fop_removexattr_cbk_stub(call_frame_t *frame, fop_removexattr_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+call_stub_t *
+fop_fremovexattr_stub(call_frame_t *frame, fop_fremovexattr_t fn, fd_t *fd,
+ const char *name, dict_t *xdata);
+
+call_stub_t *
+fop_fremovexattr_cbk_stub(call_frame_t *frame, fop_fremovexattr_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+call_stub_t *
+fop_lk_stub(call_frame_t *frame, fop_lk_t fn, fd_t *fd, int32_t cmd,
+ struct gf_flock *lock, dict_t *xdata);
+
+call_stub_t *
+fop_lk_cbk_stub(call_frame_t *frame, fop_lk_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, struct gf_flock *lock, dict_t *xdata);
+
+call_stub_t *
+fop_inodelk_stub(call_frame_t *frame, fop_inodelk_t fn, const char *volume,
+ loc_t *loc, int32_t cmd, struct gf_flock *lock, dict_t *xdata);
+
+call_stub_t *
+fop_finodelk_stub(call_frame_t *frame, fop_finodelk_t fn, const char *volume,
+ fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata);
+
+call_stub_t *
+fop_entrylk_stub(call_frame_t *frame, fop_entrylk_t fn, const char *volume,
+ loc_t *loc, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata);
+
+call_stub_t *
+fop_fentrylk_stub(call_frame_t *frame, fop_fentrylk_t fn, const char *volume,
+ fd_t *fd, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata);
+
+call_stub_t *
+fop_inodelk_cbk_stub(call_frame_t *frame, fop_inodelk_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+call_stub_t *
+fop_finodelk_cbk_stub(call_frame_t *frame, fop_inodelk_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+call_stub_t *
+fop_entrylk_cbk_stub(call_frame_t *frame, fop_entrylk_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+call_stub_t *
+fop_fentrylk_cbk_stub(call_frame_t *frame, fop_entrylk_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+call_stub_t *
+fop_readdir_stub(call_frame_t *frame, fop_readdir_t fn, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata);
+
+call_stub_t *
+fop_readdirp_stub(call_frame_t *frame, fop_readdirp_t fn, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata);
+
+call_stub_t *
+fop_readdirp_cbk_stub(call_frame_t *frame, fop_readdir_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, gf_dirent_t *entries, dict_t *xdata);
+
+call_stub_t *
+fop_readdir_cbk_stub(call_frame_t *frame, fop_readdir_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, gf_dirent_t *entries, dict_t *xdata);
+
+call_stub_t *
+fop_rchecksum_stub(call_frame_t *frame, fop_rchecksum_t fn, fd_t *fd,
+ off_t offset, int32_t len, dict_t *xdata);
+
+call_stub_t *
+fop_rchecksum_cbk_stub(call_frame_t *frame, fop_rchecksum_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, uint32_t weak_checksum,
+ uint8_t *strong_checksum, dict_t *xdata);
+
+call_stub_t *
+fop_xattrop_stub(call_frame_t *frame, fop_xattrop_t fn, loc_t *loc,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata);
+
+call_stub_t *
+fop_xattrop_stub_cbk_stub(call_frame_t *frame, fop_xattrop_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+call_stub_t *
+fop_fxattrop_stub(call_frame_t *frame, fop_fxattrop_t fn, fd_t *fd,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata);
+
+call_stub_t *
+fop_fxattrop_stub_cbk_stub(call_frame_t *frame, fop_xattrop_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+call_stub_t *
+fop_setattr_stub(call_frame_t *frame, fop_setattr_t fn, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata);
+
+call_stub_t *
+fop_setattr_cbk_stub(call_frame_t *frame, fop_setattr_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata);
+
+call_stub_t *
+fop_fsetattr_stub(call_frame_t *frame, fop_fsetattr_t fn, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata);
+
+call_stub_t *
+fop_fsetattr_cbk_stub(call_frame_t *frame, fop_setattr_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata);
+
+call_stub_t *
+fop_fallocate_stub(call_frame_t *frame, fop_fallocate_t fn, fd_t *fd,
+ int32_t mode, off_t offset, size_t len, dict_t *xdata);
+
+call_stub_t *
+fop_fallocate_cbk_stub(call_frame_t *frame, fop_fallocate_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata);
+
+call_stub_t *
+fop_discard_stub(call_frame_t *frame, fop_discard_t fn, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata);
+
+call_stub_t *
+fop_discard_cbk_stub(call_frame_t *frame, fop_discard_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata);
+
+call_stub_t *
+fop_zerofill_stub(call_frame_t *frame, fop_zerofill_t fn, fd_t *fd,
+ off_t offset, off_t len, dict_t *xdata);
+
+call_stub_t *
+fop_zerofill_cbk_stub(call_frame_t *frame, fop_zerofill_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata);
+
+call_stub_t *
+fop_ipc_stub(call_frame_t *frame, fop_ipc_t fn, int32_t op, dict_t *xdata);
+
+call_stub_t *
+fop_ipc_cbk_stub(call_frame_t *frame, fop_ipc_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+call_stub_t *
+fop_seek_stub(call_frame_t *frame, fop_seek_t fn, fd_t *fd, off_t offset,
+ gf_seek_what_t what, dict_t *xdata);
+
+call_stub_t *
+fop_seek_cbk_stub(call_frame_t *frame, fop_seek_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, off_t offset, dict_t *xdata);
+
+call_stub_t *
+fop_lease_stub(call_frame_t *frame, fop_lease_t fn, loc_t *loc,
+ struct gf_lease *lease, dict_t *xdata);
+
+call_stub_t *
+fop_lease_cbk_stub(call_frame_t *frame, fop_lease_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, struct gf_lease *lease, dict_t *xdata);
+
+call_stub_t *
+fop_getactivelk_stub(call_frame_t *frame, fop_getactivelk_t fn, loc_t *loc,
+ dict_t *xdata);
+
+call_stub_t *
+fop_getactivelk_cbk_stub(call_frame_t *frame, fop_getactivelk_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ lock_migration_info_t *lmi, dict_t *xdata);
+
+call_stub_t *
+fop_setactivelk_stub(call_frame_t *frame, fop_setactivelk_t fn, loc_t *loc,
+ lock_migration_info_t *locklist, dict_t *xdata);
+
+call_stub_t *
+fop_setactivelk_cbk_stub(call_frame_t *frame, fop_setactivelk_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+call_stub_t *
+fop_put_stub(call_frame_t *frame, fop_put_t fn, loc_t *loc, mode_t mode,
+ mode_t umask, uint32_t flags, struct iovec *vector, int32_t count,
+ off_t offset, struct iobref *iobref, dict_t *xattr, dict_t *xdata);
+
+call_stub_t *
+fop_put_cbk_stub(call_frame_t *frame, fop_put_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata);
+
+call_stub_t *
+fop_icreate_stub(call_frame_t *frame, fop_icreate_t fn, loc_t *loc, mode_t mode,
+ dict_t *xdata);
+
+call_stub_t *
+fop_namelink_stub(call_frame_t *frame, fop_namelink_t fn, loc_t *loc,
+ dict_t *xdata);
+
+call_stub_t *
+fop_icreate_cbk_stub(call_frame_t *frame, fop_icreate_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ dict_t *xdata);
+
+call_stub_t *
+fop_namelink_cbk_stub(call_frame_t *frame, fop_namelink_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
+
+call_stub_t *
+fop_copy_file_range_stub(call_frame_t *frame, fop_copy_file_range_t fn,
+ fd_t *fd_in, off64_t off_in, fd_t *fd_out,
+ off64_t off_out, size_t len, uint32_t flags,
+ dict_t *xdata);
+
+call_stub_t *
+fop_copy_file_range_cbk_stub(call_frame_t *frame, fop_copy_file_range_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *stbuf, struct iatt *prebuf_dst,
+ struct iatt *postbuf_dst, dict_t *xdata);
+
+void
+call_resume(call_stub_t *stub);
+void
+call_resume_keep_stub(call_stub_t *stub);
+void
+call_stub_destroy(call_stub_t *stub);
+void
+call_unwind_error(call_stub_t *stub, int op_ret, int op_errno);
+void
+call_unwind_error_keep_stub(call_stub_t *stub, int op_ret, int op_errno);
+
+/*
+ * Sometimes we might want to call just this, perhaps repeatedly, without
+ * having (or being able) to destroy and recreate it.
+ */
+void
+call_resume_wind(call_stub_t *stub);
+
+#endif
diff --git a/libglusterfs/src/checksum.h b/libglusterfs/src/glusterfs/checksum.h
index bf7eeede8fc..019bb14df71 100644
--- a/libglusterfs/src/checksum.h
+++ b/libglusterfs/src/glusterfs/checksum.h
@@ -12,9 +12,11 @@
#define __CHECKSUM_H__
uint32_t
-gf_rsync_weak_checksum (unsigned char *buf, size_t len);
+gf_rsync_weak_checksum(unsigned char *buf, size_t len);
void
-gf_rsync_strong_checksum (unsigned char *buf, size_t len, unsigned char *sum);
+gf_rsync_strong_checksum(unsigned char *buf, size_t len, unsigned char *sum);
+void
+gf_rsync_md5_checksum(unsigned char *data, size_t len, unsigned char *md5);
#endif /* __CHECKSUM_H__ */
diff --git a/libglusterfs/src/glusterfs/circ-buff.h b/libglusterfs/src/glusterfs/circ-buff.h
new file mode 100644
index 00000000000..822345b641b
--- /dev/null
+++ b/libglusterfs/src/glusterfs/circ-buff.h
@@ -0,0 +1,61 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CB_H
+#define _CB_H
+
+#include "glusterfs/common-utils.h"
+
+#define BUFFER_SIZE 10
+#define TOTAL_SIZE BUFFER_SIZE + 1
+
+struct _circular_buffer {
+ struct timeval tv;
+ void *data;
+};
+
+typedef struct _circular_buffer circular_buffer_t;
+
+struct _buffer {
+ unsigned int w_index;
+ size_t size_buffer;
+ gf_boolean_t use_once;
+ /* This variable is assigned the proper value at the time of initing */
+ /* the buffer. It indicates, whether the buffer should be used once */
+ /* it becomes full. */
+
+ int used_len;
+ /* indicates the amount of circular buffer used. */
+
+ circular_buffer_t **cb;
+ void (*destroy_buffer_data)(void *data);
+ pthread_mutex_t lock;
+};
+
+typedef struct _buffer buffer_t;
+
+int
+cb_add_entry_buffer(buffer_t *buffer, void *item);
+
+void
+cb_buffer_show(buffer_t *buffer);
+
+buffer_t *
+cb_buffer_new(size_t buffer_size, gf_boolean_t use_buffer_once,
+ void (*destroy_data)(void *data));
+
+void
+cb_buffer_destroy(buffer_t *buffer);
+
+void
+cb_buffer_dump(buffer_t *buffer, void *data,
+ int(fn)(circular_buffer_t *buffer, void *data));
+
+#endif /* _CB_H */
diff --git a/libglusterfs/src/glusterfs/client_t.h b/libglusterfs/src/glusterfs/client_t.h
new file mode 100644
index 00000000000..a2c508e1d5c
--- /dev/null
+++ b/libglusterfs/src/glusterfs/client_t.h
@@ -0,0 +1,147 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CLIENT_T_H
+#define _CLIENT_T_H
+
+#include "glusterfs/glusterfs.h"
+#include "glusterfs/locking.h" /* for gf_lock_t, not included by glusterfs.h */
+#include "glusterfs/atomic.h" /* for gf_atomic_t */
+
+/* auth_data structure is required by RPC layer. But as it is also used in
+ * client_t structure validation, comparision, it is critical that it is defined
+ * in the larger scope of libglusterfs, instead of libgfrpc. With this change,
+ * even RPC will use this structure */
+#define GF_CLIENTT_AUTH_BYTES 400
+typedef struct client_auth_data {
+ int flavour;
+ int datalen;
+ char authdata[GF_CLIENTT_AUTH_BYTES];
+} client_auth_data_t;
+
+struct client_ctx {
+ void *ctx_key;
+ void *ctx_value;
+};
+
+typedef struct _client {
+ struct {
+ /* e.g. protocol/server stashes its ctx here */
+ gf_lock_t lock;
+ unsigned short count;
+ struct client_ctx *ctx;
+ } scratch_ctx;
+ gf_atomic_t bind;
+ gf_atomic_t count;
+ xlator_t *bound_xl;
+ xlator_t *this;
+ int tbl_index;
+ char *client_uid;
+ char *client_name;
+ struct {
+ int flavour;
+ size_t len;
+ char *data;
+ char *username;
+ char *passwd;
+ } auth;
+
+ /* subdir_mount */
+ char *subdir_mount;
+ inode_t *subdir_inode;
+ uuid_t subdir_gfid;
+ int32_t opversion;
+ /* Variable to save fd_count for detach brick */
+ gf_atomic_t fd_cnt;
+} client_t;
+
+#define GF_CLIENTCTX_INITIAL_SIZE 8
+
+struct client_table_entry {
+ client_t *client;
+ int next_free;
+};
+typedef struct client_table_entry cliententry_t;
+
+struct clienttable {
+ unsigned int max_clients;
+ gf_lock_t lock;
+ cliententry_t *cliententries;
+ int first_free;
+ client_t *local;
+};
+typedef struct clienttable clienttable_t;
+
+#define GF_CLIENTTABLE_INITIAL_SIZE 128
+
+/* Signifies no more entries in the client table. */
+#define GF_CLIENTTABLE_END -1
+
+/* This is used to invalidate
+ * the next_free value in an cliententry that has been allocated
+ */
+#define GF_CLIENTENTRY_ALLOCATED -2
+
+void
+gf_client_put(client_t *client, gf_boolean_t *detached);
+
+clienttable_t *
+gf_clienttable_alloc(void);
+
+client_t *
+gf_client_ref(client_t *client);
+
+void
+gf_client_unref(client_t *client);
+
+int
+gf_client_dump_fdtable_to_dict(xlator_t *this, dict_t *dict);
+
+int
+gf_client_dump_fdtable(xlator_t *this);
+
+int
+gf_client_dump_inodes_to_dict(xlator_t *this, dict_t *dict);
+
+int
+gf_client_dump_inodes(xlator_t *this);
+
+void *
+client_ctx_set(client_t *client, void *key, void *value);
+
+int
+client_ctx_get(client_t *client, void *key, void **value);
+
+int
+client_ctx_del(client_t *client, void *key, void **value);
+
+void
+client_ctx_dump(client_t *client, char *prefix);
+
+int
+gf_client_dump_fdtables_to_dict(xlator_t *this, dict_t *dict);
+
+int
+gf_client_dump_fdtables(xlator_t *this);
+
+int
+gf_client_dump_inodes_to_dict(xlator_t *this, dict_t *dict);
+
+int
+gf_client_dump_inodes(xlator_t *this);
+
+int
+gf_client_disconnect(client_t *client);
+
+client_t *
+gf_client_get(xlator_t *this, client_auth_data_t *cred, char *client_uid,
+ char *subdir_mount);
+
+#endif /* _CLIENT_T_H */
diff --git a/libglusterfs/src/glusterfs/cluster-syncop.h b/libglusterfs/src/glusterfs/cluster-syncop.h
new file mode 100644
index 00000000000..d0ad5ed548c
--- /dev/null
+++ b/libglusterfs/src/glusterfs/cluster-syncop.h
@@ -0,0 +1,227 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CLUSTER_SYNCOP_H
+#define _CLUSTER_SYNCOP_H
+
+#include <sys/time.h>
+#include <pthread.h>
+#include <ucontext.h>
+
+#include "glusterfs/defaults.h"
+#include "glusterfs/default-args.h"
+#include "glusterfs/syncop.h"
+
+/*********************************************************************
+ *
+ * PARALLEL_FOP_ONLIST:
+ * Performs file operations in parallel on bricks.
+ * This macro expects a helper function(func) to implement the
+ * functionality.
+ *
+ ********************************************************************/
+#define PARALLEL_FOP_ONLIST(subvols, on, numsubvols, replies, frame, func, \
+ args...) \
+ do { \
+ int __i = 0; \
+ int __count = 0; \
+ cluster_local_t __local = { \
+ 0, \
+ }; \
+ void *__old_local = frame->local; \
+ \
+ __local.replies = replies; \
+ cluster_replies_wipe(replies, numsubvols); \
+ for (__i = 0; __i < numsubvols; __i++) \
+ INIT_LIST_HEAD(&replies[__i].entries.list); \
+ if (syncbarrier_init(&__local.barrier)) \
+ break; \
+ frame->local = &__local; \
+ for (__i = 0; __i < numsubvols; __i++) { \
+ if (on[__i]) { \
+ __count++; \
+ } \
+ } \
+ __local.barrier.waitfor = __count; \
+ for (__i = 0; __i < numsubvols; __i++) { \
+ if (on[__i]) { \
+ func(frame, subvols[__i], __i, ##args); \
+ } \
+ } \
+ syncbarrier_wait(&__local.barrier, __count); \
+ syncbarrier_destroy(&__local.barrier); \
+ frame->local = __old_local; \
+ STACK_RESET(frame->root); \
+ } while (0)
+
+typedef struct cluster_local_ {
+ default_args_cbk_t *replies;
+ syncbarrier_t barrier;
+} cluster_local_t;
+
+int32_t
+cluster_lookup(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata);
+int32_t
+cluster_setattr(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata);
+int32_t
+cluster_getxattr(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata);
+int32_t
+cluster_setxattr(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata);
+
+int
+cluster_inodelk(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *locked_on,
+ call_frame_t *frame, xlator_t *this, char *dom, inode_t *inode,
+ off_t off, size_t size);
+
+int
+cluster_uninodelk(xlator_t **subvols, unsigned char *locked_on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, char *dom,
+ inode_t *inode, off_t off, size_t size);
+
+int
+cluster_entrylk(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *locked_on,
+ call_frame_t *frame, xlator_t *this, char *dom, inode_t *inode,
+ const char *name);
+
+int32_t
+cluster_rmdir(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata);
+
+int32_t
+cluster_unlink(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata);
+
+int
+cluster_mkdir(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata);
+
+int32_t
+cluster_readlink(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata);
+
+int
+cluster_symlink(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata);
+
+int32_t
+cluster_link(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata);
+
+int
+cluster_mknod(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata);
+
+int
+cluster_unentrylk(xlator_t **subvols, unsigned char *locked_on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, char *dom,
+ inode_t *inode, const char *name);
+
+int
+cluster_tryentrylk(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *locked_on,
+ call_frame_t *frame, xlator_t *this, char *dom,
+ inode_t *inode, const char *name);
+
+int32_t
+cluster_fxattrop(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata);
+
+int32_t
+cluster_xattrop(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata);
+
+int32_t
+cluster_fstat(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata);
+
+int32_t
+cluster_ftruncate(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata);
+
+int32_t
+cluster_open(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata);
+
+int
+cluster_tryinodelk(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *locked_on,
+ call_frame_t *frame, xlator_t *this, char *dom,
+ inode_t *inode, off_t off, size_t size);
+
+int32_t
+cluster_fsetattr(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata);
+
+int32_t
+cluster_put(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, uint32_t flags, struct iovec *vector, int32_t count,
+ off_t offset, struct iobref *iobref, dict_t *xattr, dict_t *xdata);
+
+void
+cluster_replies_wipe(default_args_cbk_t *replies, int num_subvols);
+
+int32_t
+cluster_fop_success_fill(default_args_cbk_t *replies, int numsubvols,
+ unsigned char *success);
+
+int32_t
+cluster_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata);
+
+int
+cluster_tiebreaker_inodelk(xlator_t **subvols, unsigned char *on,
+ int numsubvols, default_args_cbk_t *replies,
+ unsigned char *locked_on, call_frame_t *frame,
+ xlator_t *this, char *dom, inode_t *inode, off_t off,
+ size_t size);
+#endif /* !_CLUSTER_SYNCOP_H */
diff --git a/libglusterfs/src/glusterfs/common-utils.h b/libglusterfs/src/glusterfs/common-utils.h
new file mode 100644
index 00000000000..f297fdab5c9
--- /dev/null
+++ b/libglusterfs/src/glusterfs/common-utils.h
@@ -0,0 +1,1256 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _COMMON_UTILS_H
+#define _COMMON_UTILS_H
+
+#include <stdint.h>
+#include <sys/uio.h>
+#include <netdb.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <openssl/md5.h>
+#ifndef GF_BSD_HOST_OS
+#include <alloca.h>
+#endif
+#include <limits.h>
+#include <fnmatch.h>
+#include <uuid/uuid.h>
+
+/* FreeBSD, etc. */
+#ifndef __BITS_PER_LONG
+#define __BITS_PER_LONG (CHAR_BIT * (sizeof(long)))
+#endif
+
+#ifndef ffsll
+#define ffsll(x) __builtin_ffsll(x)
+#endif
+
+void
+trap(void);
+
+#define GF_UNIVERSAL_ANSWER 42 /* :O */
+
+/* To solve type punned error */
+#define VOID(ptr) ((void **)((void *)ptr))
+
+#include "glusterfs/mem-pool.h"
+#include "glusterfs/compat-uuid.h"
+#include "glusterfs/iatt.h"
+#include "glusterfs/libglusterfs-messages.h"
+
+#define STRINGIFY(val) #val
+#define TOSTRING(val) STRINGIFY(val)
+
+#define alloca0(size) \
+ ({ \
+ void *__ptr; \
+ __ptr = alloca(size); \
+ memset(__ptr, 0, size); \
+ __ptr; \
+ })
+
+#define min(a, b) ((a) < (b) ? (a) : (b))
+#define max(a, b) ((a) > (b) ? (a) : (b))
+#define gf_roof(a, b) ((((a) + (b)-1) / ((b != 0) ? (b) : 1)) * (b))
+#define gf_floor(a, b) (((a) / ((b != 0) ? (b) : 1)) * (b))
+
+#define IPv4_ADDR_SIZE 32
+
+#define GF_UNIT_KB 1024ULL
+#define GF_UNIT_MB 1048576ULL
+#define GF_UNIT_GB 1073741824ULL
+#define GF_UNIT_TB 1099511627776ULL
+#define GF_UNIT_PB 1125899906842624ULL
+
+#define GF_UNIT_B_STRING "B"
+#define GF_UNIT_KB_STRING "KB"
+#define GF_UNIT_MB_STRING "MB"
+#define GF_UNIT_GB_STRING "GB"
+#define GF_UNIT_TB_STRING "TB"
+#define GF_UNIT_PB_STRING "PB"
+
+#define GF_UNIT_PERCENT_STRING "%"
+
+#define GEOREP "geo-replication"
+#define GLUSTERD_NAME "glusterd"
+
+#define GF_SELINUX_XATTR_KEY "security.selinux"
+
+#define WIPE(statp) \
+ do { \
+ typeof(*statp) z = { \
+ 0, \
+ }; \
+ if (statp) \
+ *statp = z; \
+ } while (0)
+
+#define IS_EXT_FS(fs_name) \
+ (!strcmp(fs_name, "ext2") || !strcmp(fs_name, "ext3") || \
+ !strcmp(fs_name, "ext4"))
+
+/* process mode definitions */
+#define GF_SERVER_PROCESS 0
+#define GF_CLIENT_PROCESS 1
+#define GF_GLUSTERD_PROCESS 2
+
+/* Defining this here as it is needed by glusterd for setting
+ * nfs port in volume status.
+ */
+#define GF_NFS3_PORT 2049
+
+#define GF_CLIENT_PORT_CEILING 1024
+#define GF_IANA_PRIV_PORTS_START 49152 /* RFC 6335 */
+#define GF_CLNT_INSECURE_PORT_CEILING (GF_IANA_PRIV_PORTS_START - 1)
+#define GF_PORT_MAX 65535
+#define GF_PORT_ARRAY_SIZE ((GF_PORT_MAX + 7) / 8)
+#define GF_LOCK_TIMER 180
+#define GF_MINUTE_IN_SECONDS 60
+#define GF_HOUR_IN_SECONDS (60 * 60)
+#define GF_DAY_IN_SECONDS (24 * 60 * 60)
+#define GF_WEEK_IN_SECONDS (7 * 24 * 60 * 60)
+#define GF_SEC_IN_NS 1000000000
+#define GF_MS_IN_NS 1000000
+#define GF_US_IN_NS 1000
+
+/* Default timeout for both barrier and changelog translator */
+#define BARRIER_TIMEOUT "120"
+
+/* Default value of signing waiting time to sign a file for bitrot */
+#define SIGNING_TIMEOUT "120"
+#define BR_WORKERS "4"
+
+/* xxhash */
+#define GF_XXH64_DIGEST_LENGTH 8
+#define GF_XXHSUM64_DEFAULT_SEED 0
+
+/* Shard */
+#define GF_XATTR_SHARD_FILE_SIZE "trusted.glusterfs.shard.file-size"
+#define SHARD_ROOT_GFID "be318638-e8a0-4c6d-977d-7a937aa84806"
+#define DOT_SHARD_REMOVE_ME_GFID "77dd5a45-dbf5-4592-b31b-b440382302e9"
+
+/* Lease: buffer length for stringified lease id
+ * Format: 4hexnum-4hexnum-4hexnum-4hexnum-4hexnum-4hexnum-4hexnum-4hexnum
+ * Eg:6c69-6431-2d63-6c6e-7431-0000-0000-0000
+ */
+#define GF_LEASE_ID_BUF_SIZE ((LEASE_ID_SIZE * 2) + (LEASE_ID_SIZE / 2))
+
+#define GF_PERCENTAGE(val, total) (((val)*100) / (total))
+
+/* pthread related */
+/* as per the man page, thread-name should be at max 16 bytes */
+/* with prefix of 'glfs_' (5), we are left with 11 more bytes */
+#define GF_THREAD_NAME_LIMIT 16
+#define GF_THREAD_NAME_PREFIX "glfs_"
+
+/* Advisory buffer size for formatted timestamps (see gf_time_fmt) */
+#define GF_TIMESTR_SIZE 256
+
+/*
+ * we could have initialized these as +ve values and treated
+ * them as negative while comparing etc.. (which would have
+ * saved us with the pain of assigning values), but since we
+ * only have a few clients that use this feature, it's okay.
+ */
+enum _gf_special_pid {
+ GF_CLIENT_PID_MAX = 0,
+ GF_CLIENT_PID_GSYNCD = -1,
+ GF_CLIENT_PID_HADOOP = -2,
+ GF_CLIENT_PID_DEFRAG = -3,
+ GF_CLIENT_PID_NO_ROOT_SQUASH = -4,
+ GF_CLIENT_PID_QUOTA_MOUNT = -5,
+ GF_CLIENT_PID_SELF_HEALD = -6,
+ GF_CLIENT_PID_GLFS_HEAL = -7,
+ GF_CLIENT_PID_BITD = -8,
+ GF_CLIENT_PID_SCRUB = -9,
+ GF_CLIENT_PID_TIER_DEFRAG = -10,
+ GF_SERVER_PID_TRASH = -11,
+ GF_CLIENT_PID_ADD_REPLICA_MOUNT = -12,
+ GF_CLIENT_PID_SET_UTIME = -13,
+};
+
+enum _gf_xlator_ipc_targets {
+ GF_IPC_TARGET_CHANGELOG = 0,
+ GF_IPC_TARGET_CTR = 1,
+ GF_IPC_TARGET_UPCALL = 2
+};
+
+typedef enum _gf_special_pid gf_special_pid_t;
+typedef enum _gf_xlator_ipc_targets _gf_xlator_ipc_targets_t;
+
+/* Array to hold custom xattr keys */
+extern char *xattrs_to_heal[];
+
+char **
+get_xattrs_to_heal();
+
+/* The DHT file rename operation is not a straightforward rename.
+ * It involves creating linkto and linkfiles, and can unlink or rename the
+ * source file depending on the hashed and cached subvols for the source
+ * and target files. this makes it difficult for geo-rep to figure out that
+ * a rename operation has taken place.
+ *
+ * We now send a special key and the values of the source and target pargfids
+ * and basenames to indicate to changelog that the operation in question
+ * should be treated as a rename. We are explicitly filling and sending this
+ * as a binary value in the dictionary as the unlink op will not have the
+ * source file information. The lengths of the src and target basenames
+ * are used to calculate where to start reading the names in the structure.
+ * XFS allows a max of 255 chars for filenames but other file systems might
+ * not have such restrictions
+ */
+typedef struct dht_changelog_rename_info {
+ uuid_t old_pargfid;
+ uuid_t new_pargfid;
+ int32_t oldname_len;
+ int32_t newname_len;
+ char buffer[1];
+} dht_changelog_rename_info_t;
+
+typedef int (*gf_cmp)(void *, void *);
+
+struct _dict;
+
+struct dnscache {
+ struct _dict *cache_dict;
+ time_t ttl;
+};
+
+struct dnscache_entry {
+ char *ip;
+ char *fqdn;
+ time_t timestamp;
+};
+
+struct dnscache6 {
+ struct addrinfo *first;
+ struct addrinfo *next;
+};
+
+struct list_node {
+ void *ptr;
+ struct list_head list;
+};
+
+extern char *vol_type_str[];
+
+struct list_node *
+list_node_add(void *ptr, struct list_head *list);
+struct list_node *
+list_node_add_order(void *ptr, struct list_head *list,
+ int (*compare)(struct list_head *, struct list_head *));
+void
+list_node_del(struct list_node *node);
+
+struct dnscache *
+gf_dnscache_init(time_t ttl);
+void
+gf_dnscache_deinit(struct dnscache *cache);
+struct dnscache_entry *
+gf_dnscache_entry_init(void);
+void
+gf_dnscache_entry_deinit(struct dnscache_entry *entry);
+char *
+gf_rev_dns_lookup_cached(const char *ip, struct dnscache *dnscache);
+
+char *
+gf_resolve_path_parent(const char *path);
+
+void
+gf_global_variable_init(void);
+
+int32_t
+gf_resolve_ip6(const char *hostname, uint16_t port, int family, void **dnscache,
+ struct addrinfo **addr_info);
+
+void
+gf_log_dump_graph(FILE *specfp, glusterfs_graph_t *graph);
+void
+gf_print_trace(int32_t signal, glusterfs_ctx_t *ctx);
+int
+gf_set_log_file_path(cmd_args_t *cmd_args, glusterfs_ctx_t *ctx);
+int
+gf_set_log_ident(cmd_args_t *cmd_args);
+
+int
+gf_process_getspec_servers_list(cmd_args_t *cmd_args, const char *servers_list);
+int
+gf_set_volfile_server_common(cmd_args_t *cmd_args, const char *host,
+ const char *transport, int port);
+
+static inline void
+BIT_SET(unsigned char *array, unsigned int index)
+{
+ unsigned int offset = index / 8;
+ unsigned int shift = index % 8;
+
+ array[offset] |= (1 << shift);
+}
+
+static inline void
+BIT_CLEAR(unsigned char *array, unsigned int index)
+{
+ unsigned int offset = index / 8;
+ unsigned int shift = index % 8;
+
+ array[offset] &= ~(1 << shift);
+}
+
+static inline unsigned int
+BIT_VALUE(unsigned char *array, unsigned int index)
+{
+ unsigned int offset = index / 8;
+ unsigned int shift = index % 8;
+
+ return (array[offset] >> shift) & 0x1;
+}
+
+#define VECTORSIZE(count) (count * (sizeof(struct iovec)))
+
+#define STRLEN_0(str) (strlen(str) + 1)
+
+#define VALIDATE_OR_GOTO(arg, label) \
+ do { \
+ if (!arg) { \
+ errno = EINVAL; \
+ gf_msg_callingfn((this ? (this->name) : "(Govinda! Govinda!)"), \
+ GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG, \
+ "invalid argument: " #arg); \
+ goto label; \
+ } \
+ } while (0)
+
+#define GF_VALIDATE_OR_GOTO(name, arg, label) \
+ do { \
+ if (!arg) { \
+ errno = EINVAL; \
+ gf_msg_callingfn(name, GF_LOG_ERROR, errno, LG_MSG_INVALID_ARG, \
+ "invalid argument: " #arg); \
+ goto label; \
+ } \
+ } while (0)
+
+#define GF_VALIDATE_OR_GOTO_WITH_ERROR(name, arg, label, errno, error) \
+ do { \
+ if (!arg) { \
+ errno = error; \
+ gf_msg_callingfn(name, GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG, \
+ "invalid argument: " #arg); \
+ goto label; \
+ } \
+ } while (0)
+
+#define GF_CHECK_ALLOC(arg, retval, label) \
+ do { \
+ if (!(arg)) { \
+ retval = -ENOMEM; \
+ goto label; \
+ } \
+ } while (0)
+
+#define GF_CHECK_ALLOC_AND_LOG(name, item, retval, msg, errlabel) \
+ do { \
+ if (!(item)) { \
+ (retval) = -ENOMEM; \
+ gf_msg(name, GF_LOG_CRITICAL, ENOMEM, LG_MSG_NO_MEMORY, (msg)); \
+ goto errlabel; \
+ } \
+ } while (0)
+
+#define GF_ASSERT_AND_GOTO_WITH_ERROR(name, arg, label, errno, error) \
+ do { \
+ if (!arg) { \
+ GF_ASSERT(0); \
+ errno = error; \
+ goto label; \
+ } \
+ } while (0)
+
+#define GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO(name, arg, label) \
+ do { \
+ GF_VALIDATE_OR_GOTO(name, arg, label); \
+ if ((arg[0]) != '/') { \
+ errno = EINVAL; \
+ gf_msg_callingfn(name, GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG, \
+ "invalid argument: " #arg); \
+ goto label; \
+ } \
+ } while (0)
+
+#define GF_REMOVE_SLASH_FROM_PATH(path, string) \
+ do { \
+ int i = 0; \
+ for (i = 1; i < strlen(path); i++) { \
+ string[i - 1] = path[i]; \
+ if (string[i - 1] == '/') \
+ string[i - 1] = '-'; \
+ } \
+ } while (0)
+
+#define GF_REMOVE_INTERNAL_XATTR(pattern, dict) \
+ do { \
+ if (!dict) { \
+ gf_msg(this->name, GF_LOG_ERROR, 0, LG_MSG_DICT_NULL, \
+ "dict is null"); \
+ break; \
+ } \
+ dict_foreach_fnmatch(dict, pattern, dict_remove_foreach_fn, NULL); \
+ } while (0)
+
+#define GF_IF_INTERNAL_XATTR_GOTO(pattern, dict, op_errno, label) \
+ do { \
+ if (!dict) { \
+ gf_msg(this->name, GF_LOG_ERROR, 0, LG_MSG_DICT_NULL, \
+ "setxattr dict is null"); \
+ goto label; \
+ } \
+ if (dict_foreach_fnmatch(dict, pattern, dict_null_foreach_fn, NULL) > \
+ 0) { \
+ op_errno = EPERM; \
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, LG_MSG_NO_PERM, \
+ "attempt to set internal" \
+ " xattr: %s", \
+ pattern); \
+ goto label; \
+ } \
+ } while (0)
+
+#define GF_IF_NATIVE_XATTR_GOTO(pattern, key, op_errno, label) \
+ do { \
+ if (!key) { \
+ gf_msg(this->name, GF_LOG_ERROR, 0, LG_MSG_NO_KEY, \
+ "no key for removexattr"); \
+ goto label; \
+ } \
+ if (!fnmatch(pattern, key, 0)) { \
+ op_errno = EPERM; \
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, LG_MSG_NO_PERM, \
+ "attempt to remove internal " \
+ "xattr: %s", \
+ key); \
+ goto label; \
+ } \
+ } while (0)
+
+#ifdef DEBUG
+#define GF_ASSERT(x) assert(x);
+#else
+#define GF_ASSERT(x) \
+ do { \
+ if (!(x)) { \
+ gf_msg_callingfn("", GF_LOG_ERROR, 0, LG_MSG_ASSERTION_FAILED, \
+ "Assertion failed: " #x); \
+ } \
+ } while (0)
+#endif
+
+/* Compile-time assert, borrowed from Linux kernel. */
+#ifdef HAVE_STATIC_ASSERT
+#define GF_STATIC_ASSERT(expr, ...) \
+ __gf_static_assert(expr, ##__VA_ARGS__, #expr)
+#define __gf_static_assert(expr, msg, ...) _Static_assert(expr, msg)
+#else
+#define GF_STATIC_ASSERT(expr, ...)
+#endif
+
+#define GF_ABORT(msg...) \
+ do { \
+ gf_msg_callingfn("", GF_LOG_CRITICAL, 0, LG_MSG_ASSERTION_FAILED, \
+ "Assertion failed: " msg); \
+ abort(); \
+ } while (0)
+
+#define GF_UUID_ASSERT(u) \
+ if (gf_uuid_is_null(u)) \
+ GF_ASSERT(!"uuid null");
+
+#define GF_IGNORE_IF_GSYNCD_SAFE_ERROR(frame, op_errno) \
+ (((frame->root->pid == GF_CLIENT_PID_GSYNCD) && \
+ (op_errno == EEXIST || op_errno == ENOENT)) \
+ ? 0 \
+ : 1)
+
+union gf_sock_union {
+ struct sockaddr_storage storage;
+ struct sockaddr_in6 sin6;
+ struct sockaddr_in sin;
+ struct sockaddr sa;
+};
+
+#define GF_HIDDEN_PATH ".glusterfs"
+#define GF_UNLINK_PATH GF_HIDDEN_PATH "/unlink"
+#define GF_LANDFILL_PATH GF_HIDDEN_PATH "/landfill"
+
+#define IOV_MIN(n) min(IOV_MAX, n)
+
+static inline gf_boolean_t
+gf_irrelevant_entry(struct dirent *entry)
+{
+ GF_ASSERT(entry);
+
+ return (!strcmp(entry->d_name, ".") ||
+ !fnmatch("*.tmp", entry->d_name, 0) ||
+ !strcmp(entry->d_name, ".."));
+}
+
+static inline void
+iov_free(struct iovec *vector, int count)
+{
+ int i;
+
+ for (i = 0; i < count; i++)
+ FREE(vector[i].iov_base);
+
+ GF_FREE(vector);
+}
+
+static inline int
+iov_length(const struct iovec *vector, int count)
+{
+ int i = 0;
+ size_t size = 0;
+
+ for (i = 0; i < count; i++)
+ size += vector[i].iov_len;
+
+ return size;
+}
+
+static inline struct iovec *
+iov_dup(const struct iovec *vector, int count)
+{
+ int bytecount = 0;
+ struct iovec *newvec = NULL;
+
+ bytecount = (count * sizeof(struct iovec));
+ newvec = GF_MALLOC(bytecount, gf_common_mt_iovec);
+ if (newvec != NULL) {
+ memcpy(newvec, vector, bytecount);
+ }
+
+ return newvec;
+}
+
+typedef struct _iov_iter {
+ const struct iovec *iovec;
+ void *ptr;
+ uint32_t len;
+ uint32_t count;
+} iov_iter_t;
+
+static inline bool
+iov_iter_init(iov_iter_t *iter, const struct iovec *iovec, uint32_t count,
+ uint32_t offset)
+{
+ uint32_t len;
+
+ while (count > 0) {
+ count--;
+ len = iovec->iov_len;
+ if (offset < len) {
+ iter->ptr = iovec->iov_base + offset;
+ iter->len = len - offset;
+ iter->iovec = iovec + 1;
+ iter->count = count;
+
+ return true;
+ }
+ offset -= len;
+ }
+
+ memset(iter, 0, sizeof(*iter));
+
+ return false;
+}
+
+static inline bool
+iov_iter_end(iov_iter_t *iter)
+{
+ return iter->count == 0;
+}
+
+static inline bool
+iov_iter_next(iov_iter_t *iter, uint32_t size)
+{
+ GF_ASSERT(size <= iter->len);
+
+ if (iter->len > size) {
+ iter->len -= size;
+ iter->ptr += size;
+
+ return true;
+ }
+ if (iter->count > 0) {
+ iter->count--;
+ iter->ptr = iter->iovec->iov_base;
+ iter->len = iter->iovec->iov_len;
+ iter->iovec++;
+
+ return true;
+ }
+
+ memset(iter, 0, sizeof(*iter));
+
+ return false;
+}
+
+static inline uint32_t
+iov_iter_copy(iov_iter_t *dst, iov_iter_t *src, uint32_t size)
+{
+ uint32_t len;
+
+ len = src->len;
+ if (len > dst->len) {
+ len = dst->len;
+ }
+ if (len > size) {
+ len = size;
+ }
+ memcpy(dst->ptr, src->ptr, len);
+
+ return len;
+}
+
+static inline uint32_t
+iov_iter_to_iovec(iov_iter_t *iter, struct iovec *iovec, int32_t idx,
+ uint32_t size)
+{
+ uint32_t len;
+
+ len = iter->len;
+ if (len > size) {
+ len = size;
+ }
+ iovec[idx].iov_base = iter->ptr;
+ iovec[idx].iov_len = len;
+
+ return len;
+}
+
+static inline int
+iov_subset(struct iovec *src, int src_count, uint32_t start, uint32_t size,
+ struct iovec **dst, int32_t dst_count)
+{
+ struct iovec iovec[src_count];
+ iov_iter_t iter;
+ uint32_t len;
+ int32_t idx;
+
+ if ((size == 0) || !iov_iter_init(&iter, src, src_count, start)) {
+ return 0;
+ }
+
+ idx = 0;
+ do {
+ len = iov_iter_to_iovec(&iter, iovec, idx, size);
+ idx++;
+ size -= len;
+ } while ((size > 0) && iov_iter_next(&iter, len));
+
+ if (*dst == NULL) {
+ *dst = iov_dup(iovec, idx);
+ if (*dst == NULL) {
+ return -1;
+ }
+ } else if (idx > dst_count) {
+ return -1;
+ } else {
+ memcpy(*dst, iovec, idx * sizeof(struct iovec));
+ }
+
+ return idx;
+}
+
+static inline int
+iov_skip(struct iovec *iovec, uint32_t count, uint32_t size)
+{
+ uint32_t len, idx;
+
+ idx = 0;
+ while ((size > 0) && (idx < count)) {
+ len = iovec[idx].iov_len;
+ if (len > size) {
+ iovec[idx].iov_len -= size;
+ iovec[idx].iov_base += size;
+ break;
+ }
+ idx++;
+ size -= len;
+ }
+
+ if (idx > 0) {
+ memmove(iovec, iovec + idx, (count - idx) * sizeof(struct iovec));
+ }
+
+ return count - idx;
+}
+
+static inline size_t
+iov_range_copy(const struct iovec *dst, uint32_t dst_count, uint32_t dst_offset,
+ const struct iovec *src, uint32_t src_count, uint32_t src_offset,
+ uint32_t size)
+{
+ iov_iter_t src_iter, dst_iter;
+ uint32_t len, total;
+
+ if ((size == 0) || !iov_iter_init(&src_iter, src, src_count, src_offset) ||
+ !iov_iter_init(&dst_iter, dst, dst_count, dst_offset)) {
+ return 0;
+ }
+
+ total = 0;
+ do {
+ len = iov_iter_copy(&dst_iter, &src_iter, size);
+ total += len;
+ size -= len;
+ } while ((size > 0) && iov_iter_next(&src_iter, len) &&
+ iov_iter_next(&dst_iter, len));
+
+ return total;
+}
+
+static inline void
+iov_unload(char *buf, const struct iovec *vector, int count)
+{
+ int i;
+ int copied = 0;
+
+ for (i = 0; i < count; i++) {
+ memcpy(buf + copied, vector[i].iov_base, vector[i].iov_len);
+ copied += vector[i].iov_len;
+ }
+}
+
+static inline size_t
+iov_load(const struct iovec *vector, int count, char *buf, int size)
+{
+ size_t left = size;
+ size_t cp = 0;
+ int ret = 0;
+ int i = 0;
+
+ while (left && i < count) {
+ cp = min(vector[i].iov_len, left);
+ if (vector[i].iov_base != buf + (size - left))
+ memcpy(vector[i].iov_base, buf + (size - left), cp);
+ ret += cp;
+ left -= cp;
+ if (left)
+ i++;
+ }
+
+ return ret;
+}
+
+static inline size_t
+iov_copy(const struct iovec *dst, int dcnt, const struct iovec *src, int scnt)
+{
+ return iov_range_copy(dst, dcnt, 0, src, scnt, 0, UINT32_MAX);
+}
+
+/* based on the amusing discussion @ https://rusty.ozlabs.org/?p=560 */
+static bool
+memeqzero(const void *data, size_t length)
+{
+ const unsigned char *p = data;
+ size_t len;
+
+ /* Check first 16 bytes manually */
+ for (len = 0; len < 16; len++) {
+ if (!length)
+ return true;
+ if (*p)
+ return false;
+ p++;
+ length--;
+ }
+
+ /* Now we know that's zero, memcmp with self. */
+ return memcmp(data, p, length) == 0;
+}
+
+static inline int
+mem_0filled(const char *buf, size_t size)
+{
+ return !memeqzero(buf, size);
+}
+
+static inline int
+iov_0filled(const struct iovec *vector, int count)
+{
+ int i = 0;
+ int ret = 0;
+
+ for (i = 0; i < count; i++) {
+ ret = mem_0filled(vector[i].iov_base, vector[i].iov_len);
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
+typedef enum {
+ gf_timefmt_default = 0,
+ gf_timefmt_FT = 0, /* YYYY-MM-DD hh:mm:ss */
+ gf_timefmt_Ymd_T, /* YYYY/MM-DD-hh:mm:ss */
+ gf_timefmt_bdT, /* MMM DD hh:mm:ss */
+ gf_timefmt_F_HMS, /* YYYY-MM-DD hhmmss */
+ gf_timefmt_dirent,
+ gf_timefmt_s,
+ gf_timefmt_last
+} gf_timefmts;
+
+static inline char *
+gf_time_fmt_tv(char *dst, size_t sz_dst, struct timeval *tv, unsigned int fmt)
+{
+ extern void _gf_timestuff(const char ***, const char ***);
+ static gf_timefmts timefmt_last = (gf_timefmts)-1;
+ static const char **fmts;
+ static const char **zeros;
+ struct tm tm, *res;
+ int localtime = 0;
+ int len = 0;
+ int pos = 0;
+
+ if (timefmt_last == ((gf_timefmts)-1)) {
+ _gf_timestuff(&fmts, &zeros);
+ timefmt_last = gf_timefmt_last;
+ }
+ if (timefmt_last <= fmt) {
+ fmt = gf_timefmt_default;
+ }
+ localtime = gf_log_get_localtime();
+ res = localtime ? localtime_r(&tv->tv_sec, &tm)
+ : gmtime_r(&tv->tv_sec, &tm);
+ if (tv->tv_sec && (res != NULL)) {
+ len = strftime(dst, sz_dst, fmts[fmt], &tm);
+ if (len == 0)
+ return dst;
+ pos += len;
+ if (tv->tv_usec >= 0) {
+ len = snprintf(dst + pos, sz_dst - pos, ".%" GF_PRI_SUSECONDS,
+ tv->tv_usec);
+ if (len >= sz_dst - pos)
+ return dst;
+ pos += len;
+ }
+ strftime(dst + pos, sz_dst - pos, " %z", &tm);
+ } else {
+ strncpy(dst, "N/A", sz_dst);
+ }
+ return dst;
+}
+
+static inline char *
+gf_time_fmt(char *dst, size_t sz_dst, time_t utime, unsigned int fmt)
+{
+ struct timeval tv = {utime, -1};
+
+ return gf_time_fmt_tv(dst, sz_dst, &tv, fmt);
+}
+
+/* This function helps us use gfid (unique identity) to generate inode's unique
+ * number in glusterfs.
+ */
+ino_t
+gfid_to_ino(uuid_t gfid);
+
+int
+mkdir_p(char *path, mode_t mode, gf_boolean_t allow_symlinks);
+/*
+ * rounds up nr to power of two. If nr is already a power of two, just returns
+ * nr
+ */
+
+int
+gf_lstat_dir(const char *path, struct stat *stbuf_in);
+
+int32_t
+gf_roundup_power_of_two(int32_t nr);
+
+/*
+ * rounds up nr to next power of two. If nr is already a power of two, next
+ * power of two is returned.
+ */
+
+int32_t
+gf_roundup_next_power_of_two(int32_t nr);
+
+char *
+gf_trim(char *string);
+int
+gf_volume_name_validate(const char *volume_name);
+
+int
+gf_string2long(const char *str, long *n);
+int
+gf_string2ulong(const char *str, unsigned long *n);
+int
+gf_string2int(const char *str, int *n);
+int
+gf_string2uint(const char *str, unsigned int *n);
+int
+gf_string2double(const char *str, double *n);
+int
+gf_string2longlong(const char *str, long long *n);
+int
+gf_string2ulonglong(const char *str, unsigned long long *n);
+
+int
+gf_string2int8(const char *str, int8_t *n);
+int
+gf_string2int16(const char *str, int16_t *n);
+int
+gf_string2int32(const char *str, int32_t *n);
+int
+gf_string2int64(const char *str, int64_t *n);
+int
+gf_string2uint8(const char *str, uint8_t *n);
+int
+gf_string2uint16(const char *str, uint16_t *n);
+int
+gf_string2uint32(const char *str, uint32_t *n);
+int
+gf_string2uint64(const char *str, uint64_t *n);
+
+int
+gf_strstr(const char *str, const char *delim, const char *match);
+
+int
+gf_string2ulong_base10(const char *str, unsigned long *n);
+int
+gf_string2uint_base10(const char *str, unsigned int *n);
+int
+gf_string2uint8_base10(const char *str, uint8_t *n);
+int
+gf_string2uint16_base10(const char *str, uint16_t *n);
+int
+gf_string2uint32_base10(const char *str, uint32_t *n);
+int
+gf_string2uint64_base10(const char *str, uint64_t *n);
+int
+gf_string2bytesize_uint64(const char *str, uint64_t *n);
+int
+gf_string2bytesize_int64(const char *str, int64_t *n);
+int
+gf_string2percent_or_bytesize(const char *str, double *n,
+ gf_boolean_t *is_percent);
+
+int
+gf_string2boolean(const char *str, gf_boolean_t *b);
+int
+gf_strn2boolean(const char *str, const int len, gf_boolean_t *b);
+int
+gf_string2percent(const char *str, double *n);
+int
+gf_string2time(const char *str, uint32_t *n);
+
+int
+gf_lockfd(int fd);
+int
+gf_unlockfd(int fd);
+
+int
+get_checksum_for_file(int fd, uint32_t *checksum, int op_version);
+int
+log_base2(unsigned long x);
+
+int
+get_checksum_for_path(char *path, uint32_t *checksum, int op_version);
+int
+get_file_mtime(const char *path, time_t *stamp);
+char *
+gf_resolve_path_parent(const char *path);
+
+char *
+strtail(char *str, const char *pattern);
+void
+skipwhite(char **s);
+char *
+nwstrtail(char *str, char *pattern);
+/* returns a new string with nth word of given string. n>=1 */
+
+typedef struct token_iter {
+ char *end;
+ char sep;
+} token_iter_t;
+char *
+token_iter_init(char *str, char sep, token_iter_t *tit);
+gf_boolean_t
+next_token(char **tokenp, token_iter_t *tit);
+void
+drop_token(char *token, token_iter_t *tit);
+
+gf_boolean_t
+mask_match(const uint32_t a, const uint32_t b, const uint32_t m);
+gf_boolean_t
+gf_is_ip_in_net(const char *network, const char *ip_str);
+char
+valid_host_name(char *address, int length);
+char
+valid_ipv4_address(char *address, int length, gf_boolean_t wildcard_acc);
+char
+valid_ipv6_address(char *address, int length, gf_boolean_t wildcard_acc);
+char
+valid_internet_address(char *address, gf_boolean_t wildcard_acc,
+ gf_boolean_t cidr);
+gf_boolean_t
+valid_mount_auth_address(char *address);
+gf_boolean_t
+valid_ipv4_subnetwork(const char *address);
+gf_boolean_t
+gf_sock_union_equal_addr(union gf_sock_union *a, union gf_sock_union *b);
+char *
+gf_rev_dns_lookup(const char *ip);
+
+char *
+uuid_utoa(uuid_t uuid);
+char *
+uuid_utoa_r(uuid_t uuid, char *dst);
+char *
+lkowner_utoa(gf_lkowner_t *lkowner);
+char *
+lkowner_utoa_r(gf_lkowner_t *lkowner, char *dst, int len);
+char *
+leaseid_utoa(const char *lease_id);
+gf_boolean_t
+is_valid_lease_id(const char *lease_id);
+char *
+gf_leaseid_get(void);
+char *
+gf_existing_leaseid(void);
+
+void
+gf_array_insertionsort(void *a, int l, int r, size_t elem_size, gf_cmp cmp);
+int
+gf_is_str_int(const char *value);
+
+char *gf_uint64_2human_readable(uint64_t);
+int
+validate_brick_name(char *brick);
+char *
+get_host_name(char *word, char **host);
+char *
+get_path_name(char *word, char **path);
+void
+gf_path_strip_trailing_slashes(char *path);
+uint64_t
+get_mem_size(void);
+int
+gf_strip_whitespace(char *str, int len);
+int
+gf_canonicalize_path(char *path);
+char *
+generate_glusterfs_ctx_id(void);
+char *
+gf_get_reserved_ports(void);
+int
+gf_process_reserved_ports(unsigned char *ports, uint32_t ceiling);
+gf_boolean_t
+gf_ports_reserved(char *blocked_port, unsigned char *ports, uint32_t ceiling);
+int
+gf_get_hostname_from_ip(char *client_ip, char **hostname);
+gf_boolean_t
+gf_is_local_addr(char *hostname);
+gf_boolean_t
+gf_is_same_address(char *host1, char *host2);
+void
+gf_xxh64_wrapper(const unsigned char *data, size_t const len,
+ unsigned long long const seed, char *xxh64);
+int
+gf_gfid_generate_from_xxh64(uuid_t gfid, char *key);
+
+int
+gf_set_timestamp(const char *src, const char *dest);
+
+int
+gf_thread_create(pthread_t *thread, const pthread_attr_t *attr,
+ void *(*start_routine)(void *), void *arg, const char *name,
+ ...) __attribute__((__format__(__printf__, 5, 6)));
+
+int
+gf_thread_vcreate(pthread_t *thread, const pthread_attr_t *attr,
+ void *(*start_routine)(void *), void *arg, const char *name,
+ va_list args);
+int
+gf_thread_create_detached(pthread_t *thread, void *(*start_routine)(void *),
+ void *arg, const char *name, ...)
+ __attribute__((__format__(__printf__, 4, 5)));
+
+void
+gf_thread_set_name(pthread_t thread, const char *name, ...)
+ __attribute__((__format__(__printf__, 2, 3)));
+
+void
+gf_thread_set_vname(pthread_t thread, const char *name, va_list args);
+gf_boolean_t
+gf_is_pid_running(int pid);
+gf_boolean_t
+gf_is_service_running(char *pidfile, int *pid);
+gf_boolean_t
+gf_valid_pid(const char *pid, int length);
+int
+gf_skip_header_section(int fd, int header_len);
+
+struct iatt;
+struct _dict;
+
+gf_boolean_t
+dht_is_linkfile(struct iatt *buf, struct _dict *dict);
+
+int
+gf_check_log_format(const char *value);
+
+int
+gf_check_logger(const char *value);
+
+gf_boolean_t
+gf_compare_sockaddr(const struct sockaddr *addr1, const struct sockaddr *addr2);
+
+char *
+gf_backtrace_save(char *buf);
+
+void
+gf_backtrace_done(char *buf);
+
+gf_loglevel_t
+fop_log_level(glusterfs_fop_t fop, int op_errno);
+
+int32_t
+gf_build_absolute_path(char *current_path, char *relative_path, char **path);
+
+int
+recursive_rmdir(const char *delete_path);
+
+int
+gf_get_index_by_elem(char **array, char *elem);
+
+int
+glusterfs_is_local_pathinfo(char *pathinfo, gf_boolean_t *local);
+
+int
+gf_thread_cleanup_xint(pthread_t thread);
+
+ssize_t
+gf_nread(int fd, void *buf, size_t count);
+
+ssize_t
+gf_nwrite(int fd, const void *buf, size_t count);
+
+void
+_mask_cancellation(void);
+void
+_unmask_cancellation(void);
+
+gf_boolean_t
+gf_is_zero_filled_stat(struct iatt *buf);
+
+void
+gf_zero_fill_stat(struct iatt *buf);
+
+gf_boolean_t
+gf_is_valid_xattr_namespace(char *k);
+
+const char *
+gf_inode_type_to_str(ia_type_t type);
+
+int32_t
+gf_bits_count(uint64_t n);
+
+int32_t
+gf_bits_index(uint64_t n);
+
+const char *
+gf_fop_string(glusterfs_fop_t fop);
+
+int
+gf_fop_int(char *fop);
+
+char *
+get_ip_from_addrinfo(struct addrinfo *addr, char **ip);
+
+int
+close_fds_except(int *fdv, size_t count);
+
+int
+gf_getgrouplist(const char *user, gid_t group, gid_t **groups);
+
+int
+glusterfs_compute_sha256(const unsigned char *content, size_t size,
+ char *sha256_hash);
+
+char *
+gf_strncpy(char *dest, const char *src, const size_t dest_size);
+
+void
+gf_strTrim(char **s);
+
+int
+gf_replace_old_iatt_in_dict(struct _dict *);
+
+int
+gf_replace_new_iatt_in_dict(struct _dict *);
+
+xlator_cmdline_option_t *
+find_xlator_option_in_cmd_args_t(const char *option_name, cmd_args_t *args);
+
+int
+gf_d_type_from_ia_type(ia_type_t type);
+
+int
+gf_syncfs(int fd);
+
+int
+gf_nanosleep(uint64_t nsec);
+
+static inline time_t
+gf_time(void)
+{
+ return time(NULL);
+}
+
+/* Return delta value in microseconds. */
+
+static inline double
+gf_tvdiff(struct timeval *start, struct timeval *end)
+{
+ struct timeval t;
+
+ if (start->tv_usec > end->tv_usec)
+ t.tv_sec = end->tv_sec - 1, t.tv_usec = end->tv_usec + 1000000;
+ else
+ t.tv_sec = end->tv_sec, t.tv_usec = end->tv_usec;
+
+ return (double)(t.tv_sec - start->tv_sec) * 1e6 +
+ (double)(t.tv_usec - start->tv_usec);
+}
+
+/* Return delta value in nanoseconds. */
+
+static inline double
+gf_tsdiff(struct timespec *start, struct timespec *end)
+{
+ struct timespec t;
+
+ if (start->tv_nsec > end->tv_nsec)
+ t.tv_sec = end->tv_sec - 1, t.tv_nsec = end->tv_nsec + 1000000000;
+ else
+ t.tv_sec = end->tv_sec, t.tv_nsec = end->tv_nsec;
+
+ return (double)(t.tv_sec - start->tv_sec) * 1e9 +
+ (double)(t.tv_nsec - start->tv_nsec);
+}
+
+#endif /* _COMMON_UTILS_H */
diff --git a/libglusterfs/src/glusterfs/compat-errno.h b/libglusterfs/src/glusterfs/compat-errno.h
new file mode 100644
index 00000000000..c4ab09ab0d5
--- /dev/null
+++ b/libglusterfs/src/glusterfs/compat-errno.h
@@ -0,0 +1,238 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __COMPAT_ERRNO_H__
+#define __COMPAT_ERRNO_H__
+
+#include <errno.h>
+
+#define GF_ERROR_CODE_SUCCESS 0
+#define GF_ERROR_CODE_UNKNOWN 1024
+#define GF_ERRNO_UNKNOWN 1024
+
+#define GF_ERROR_CODE_PERM 1 /* Operation not permitted */
+#define GF_ERROR_CODE_NOENT 2 /* No such file or directory */
+#define GF_ERROR_CODE_SRCH 3 /* No such process */
+#define GF_ERROR_CODE_INTR 4 /* Interrupted system call */
+#define GF_ERROR_CODE_IO 5 /* I/O error */
+#define GF_ERROR_CODE_NXIO 6 /* No such device or address */
+#define GF_ERROR_CODE_2BIG 7 /* Argument list too long */
+#define GF_ERROR_CODE_NOEXEC 8 /* Exec format error */
+#define GF_ERROR_CODE_BADF 9 /* Bad file number */
+#define GF_ERROR_CODE_CHILD 10 /* No child processes */
+#define GF_ERROR_CODE_AGAIN 11 /* Try again */
+#define GF_ERROR_CODE_NOMEM 12 /* Out of memory */
+#define GF_ERROR_CODE_ACCES 13 /* Permission denied */
+#define GF_ERROR_CODE_FAULT 14 /* Bad address */
+#define GF_ERROR_CODE_NOTBLK 15 /* Block device required */
+#define GF_ERROR_CODE_BUSY 16 /* Device or resource busy */
+#define GF_ERROR_CODE_EXIST 17 /* File exists */
+#define GF_ERROR_CODE_XDEV 18 /* Cross-device link */
+#define GF_ERROR_CODE_NODEV 19 /* No such device */
+#define GF_ERROR_CODE_NOTDIR 20 /* Not a directory */
+#define GF_ERROR_CODE_ISDIR 21 /* Is a directory */
+#define GF_ERROR_CODE_INVAL 22 /* Invalid argument */
+#define GF_ERROR_CODE_NFILE 23 /* File table overflow */
+#define GF_ERROR_CODE_MFILE 24 /* Too many open files */
+#define GF_ERROR_CODE_NOTTY 25 /* Not a typewriter */
+#define GF_ERROR_CODE_TXTBSY 26 /* Text file busy */
+#define GF_ERROR_CODE_FBIG 27 /* File too large */
+#define GF_ERROR_CODE_NOSPC 28 /* No space left on device */
+#define GF_ERROR_CODE_SPIPE 29 /* Illegal seek */
+#define GF_ERROR_CODE_ROFS 30 /* Read-only file system */
+#define GF_ERROR_CODE_MLINK 31 /* Too many links */
+#define GF_ERROR_CODE_PIPE 32 /* Broken pipe */
+#define GF_ERROR_CODE_DOM 33 /* Math argument out of domain of func */
+#define GF_ERROR_CODE_RANGE 34 /* Math result not representable */
+#define GF_ERROR_CODE_DEADLK 35 /* Resource deadlock would occur */
+#define GF_ERROR_CODE_NAMETOOLONG 36 /* File name too long */
+#define GF_ERROR_CODE_NOLCK 37 /* No record locks available */
+#define GF_ERROR_CODE_NOSYS 38 /* Function not implemented */
+#define GF_ERROR_CODE_NOTEMPTY 39 /* Directory not empty */
+#define GF_ERROR_CODE_LOOP 40 /* Too many symbolic links encountered */
+
+#define GF_ERROR_CODE_NOMSG 42 /* No message of desired type */
+#define GF_ERROR_CODE_IDRM 43 /* Identifier removed */
+#define GF_ERROR_CODE_CHRNG 44 /* Channel number out of range */
+#define GF_ERROR_CODE_L2NSYNC 45 /* Level 2 not synchronized */
+#define GF_ERROR_CODE_L3HLT 46 /* Level 3 halted */
+#define GF_ERROR_CODE_L3RST 47 /* Level 3 reset */
+#define GF_ERROR_CODE_LNRNG 48 /* Link number out of range */
+#define GF_ERROR_CODE_UNATCH 49 /* Protocol driver not attached */
+#define GF_ERROR_CODE_NOCSI 50 /* No CSI structure available */
+#define GF_ERROR_CODE_L2HLT 51 /* Level 2 halted */
+#define GF_ERROR_CODE_BADE 52 /* Invalid exchange */
+#define GF_ERROR_CODE_BADR 53 /* Invalid request descriptor */
+#define GF_ERROR_CODE_XFULL 54 /* Exchange full */
+#define GF_ERROR_CODE_NOANO 55 /* No anode */
+#define GF_ERROR_CODE_BADRQC 56 /* Invalid request code */
+#define GF_ERROR_CODE_BADSLT 57 /* Invalid slot */
+#define GF_ERROR_CODE_BFONT 59 /* Bad font file format */
+#define GF_ERROR_CODE_NOSTR 60 /* Device not a stream */
+#define GF_ERROR_CODE_NODATA 61 /* No data available */
+#define GF_ERROR_CODE_TIME 62 /* Timer expired */
+#define GF_ERROR_CODE_NOSR 63 /* Out of streams resources */
+#define GF_ERROR_CODE_NONET 64 /* Machine is not on the network */
+#define GF_ERROR_CODE_NOPKG 65 /* Package not installed */
+#define GF_ERROR_CODE_REMOTE 66 /* Object is remote */
+#define GF_ERROR_CODE_NOLINK 67 /* Link has been severed */
+#define GF_ERROR_CODE_ADV 68 /* Advertise error */
+#define GF_ERROR_CODE_SRMNT 69 /* Srmount error */
+#define GF_ERROR_CODE_COMM 70 /* Communication error on send */
+#define GF_ERROR_CODE_PROTO 71 /* Protocol error */
+#define GF_ERROR_CODE_MULTIHOP 72 /* Multihop attempted */
+#define GF_ERROR_CODE_DOTDOT 73 /* RFS specific error */
+#define GF_ERROR_CODE_BADMSG 74 /* Not a data message */
+#define GF_ERROR_CODE_OVERFLOW 75 /* Value too large for defined data type */
+#define GF_ERROR_CODE_NOTUNIQ 76 /* Name not unique on network */
+#define GF_ERROR_CODE_BADFD 77 /* File descriptor in bad state */
+#define GF_ERROR_CODE_REMCHG 78 /* Remote address changed */
+#define GF_ERROR_CODE_LIBACC 79 /* Can not access a needed shared library */
+#define GF_ERROR_CODE_LIBBAD 80 /* Accessing a corrupted shared library */
+#define GF_ERROR_CODE_LIBSCN 81 /* .lib section in a.out corrupted */
+#define GF_ERROR_CODE_LIBMAX \
+ 82 /* Attempting to link in too many shared libraries */
+#define GF_ERROR_CODE_LIBEXEC 83 /* Cannot exec a shared library directly */
+#define GF_ERROR_CODE_ILSEQ 84 /* Illegal byte sequence */
+#define GF_ERROR_CODE_RESTART \
+ 85 /* Interrupted system call should be restarted */
+#define GF_ERROR_CODE_STRPIPE 86 /* Streams pipe error */
+#define GF_ERROR_CODE_USERS 87 /* Too many users */
+#define GF_ERROR_CODE_NOTSOCK 88 /* Socket operation on non-socket */
+#define GF_ERROR_CODE_DESTADDRREQ 89 /* Destination address required */
+#define GF_ERROR_CODE_MSGSIZE 90 /* Message too long */
+#define GF_ERROR_CODE_PROTOTYPE 91 /* Protocol wrong type for socket */
+#define GF_ERROR_CODE_NOPROTOOPT 92 /* Protocol not available */
+#define GF_ERROR_CODE_PROTONOSUPPORT 93 /* Protocol not supported */
+#define GF_ERROR_CODE_SOCKTNOSUPPORT 94 /* Socket type not supported */
+#define GF_ERROR_CODE_OPNOTSUPP \
+ 95 /* Operation not supported on transport endpoint */
+#define GF_ERROR_CODE_PFNOSUPPORT 96 /* Protocol family not supported */
+#define GF_ERROR_CODE_AFNOSUPPORT \
+ 97 /* Address family not supported by protocol */
+#define GF_ERROR_CODE_ADDRINUSE 98 /* Address already in use */
+#define GF_ERROR_CODE_ADDRNOTAVAIL 99 /* Cannot assign requested address */
+#define GF_ERROR_CODE_NETDOWN 100 /* Network is down */
+#define GF_ERROR_CODE_NETUNREACH 101 /* Network is unreachable */
+#define GF_ERROR_CODE_NETRESET \
+ 102 /* Network dropped connection because of reset */
+#define GF_ERROR_CODE_CONNABORTED 103 /* Software caused connection abort */
+#define GF_ERROR_CODE_CONNRESET 104 /* Connection reset by peer */
+#define GF_ERROR_CODE_NOBUFS 105 /* No buffer space available */
+#define GF_ERROR_CODE_ISCONN 106 /* Transport endpoint is already connected */
+#define GF_ERROR_CODE_NOTCONN 107 /* Transport endpoint is not connected */
+#define GF_ERROR_CODE_SHUTDOWN \
+ 108 /* Cannot send after transport endpoint shutdown */
+#define GF_ERROR_CODE_TOOMANYREFS 109 /* Too many references: cannot splice */
+#define GF_ERROR_CODE_TIMEDOUT 110 /* Connection timed out */
+#define GF_ERROR_CODE_CONNREFUSED 111 /* Connection refused */
+#define GF_ERROR_CODE_HOSTDOWN 112 /* Host is down */
+#define GF_ERROR_CODE_HOSTUNREACH 113 /* No route to host */
+#define GF_ERROR_CODE_ALREADY 114 /* Operation already in progress */
+#define GF_ERROR_CODE_INPROGRESS 115 /* Operation now in progress */
+#define GF_ERROR_CODE_ALREADY 114 /* Operation already in progress */
+#define GF_ERROR_CODE_INPROGRESS 115 /* Operation now in progress */
+#define GF_ERROR_CODE_STALE 116 /* Stale NFS file handle */
+#define GF_ERROR_CODE_UCLEAN 117 /* Structure needs cleaning */
+#define GF_ERROR_CODE_NOTNAM 118 /* Not a XENIX named type file */
+#define GF_ERROR_CODE_NAVAIL 119 /* No XENIX semaphores available */
+#define GF_ERROR_CODE_ISNAM 120 /* Is a named type file */
+#define GF_ERROR_CODE_REMOTEIO 121 /* Remote I/O error */
+#define GF_ERROR_CODE_DQUOT 122 /* Quota exceeded */
+#define GF_ERROR_CODE_NOMEDIUM 123 /* No medium found */
+#define GF_ERROR_CODE_MEDIUMTYPE 124 /* Wrong medium type */
+#define GF_ERROR_CODE_CANCELED 125 /* Operation Canceled */
+#define GF_ERROR_CODE_NOKEY 126 /* Required key not available */
+#define GF_ERROR_CODE_KEYEXPIRED 127 /* Key has expired */
+#define GF_ERROR_CODE_KEYREVOKED 128 /* Key has been revoked */
+#define GF_ERROR_CODE_KEYREJECTED 129 /* Key was rejected by service */
+
+/* for robust mutexes */
+#define GF_ERROR_CODE_OWNERDEAD 130 /* Owner died */
+#define GF_ERROR_CODE_NOTRECOVERABLE 131 /* State not recoverable */
+
+/* Should never be seen by user programs */
+#define GF_ERROR_CODE_RESTARTSYS 512
+#define GF_ERROR_CODE_RESTARTNOINTR 513
+#define GF_ERROR_CODE_RESTARTNOHAND 514 /* restart if no handler.. */
+#define GF_ERROR_CODE_NOIOCTLCMD 515 /* No ioctl command */
+#define GF_ERROR_CODE_RESTART_RESTARTBLOCK \
+ 516 /* restart by calling sys_restart_syscall */
+
+/* Defined for the NFSv3 protocol */
+#define GF_ERROR_CODE_BADHANDLE 521 /* Illegal NFS file handle */
+#define GF_ERROR_CODE_NOTSYNC 522 /* Update synchronization mismatch */
+#define GF_ERROR_CODE_BADCOOKIE 523 /* Cookie is stale */
+#define GF_ERROR_CODE_NOTSUPP 524 /* Operation is not supported */
+#define GF_ERROR_CODE_TOOSMALL 525 /* Buffer or request is too small */
+#define GF_ERROR_CODE_SERVERFAULT 526 /* An untranslatable error occurred */
+#define GF_ERROR_CODE_BADTYPE 527 /* Type not supported by server */
+#define GF_ERROR_CODE_JUKEBOX \
+ 528 /* Request initiated, but will not complete before timeout */
+#define GF_ERROR_CODE_IOCBQUEUED \
+ 529 /* iocb queued, will get completion event */
+#define GF_ERROR_CODE_IOCBRETRY 530 /* iocb queued, will trigger a retry */
+
+/* Darwin OS X */
+#define GF_ERROR_CODE_NOPOLICY 701
+#define GF_ERROR_CODE_BADMACHO 702
+#define GF_ERROR_CODE_PWROFF 703
+#define GF_ERROR_CODE_DEVERR 704
+#define GF_ERROR_CODE_BADARCH 705
+#define GF_ERROR_CODE_BADEXEC 706
+#define GF_ERROR_CODE_SHLIBVERS 707
+
+/* Solaris */
+/* ENOTACTIVE 73 / * Facility is not active */
+#define GF_ERROR_CODE_NOTACTIVE 801
+/* ELOCKUNMAPPED 72 / * locked lock was unmapped */
+#define GF_ERROR_CODE_LOCKUNMAPPED 802
+
+/* BSD system */
+#define GF_ERROR_CODE_PROCLIM 901 /* Too many processes */
+#define GF_ERROR_CODE_BADRPC 902 /* RPC struct is bad */
+#define GF_ERROR_CODE_RPCMISMATCH 903 /* RPC version wrong */
+#define GF_ERROR_CODE_PROGUNAVAIL 904 /* RPC prog. not avail */
+#define GF_ERROR_CODE_PROGMISMATCH 905 /* Program version wrong */
+#define GF_ERROR_CODE_PROCUNAVAIL 905 /* Bad procedure for program */
+#define GF_ERROR_CODE_FTYPE 906 /* Inappropriate file type or format */
+#define GF_ERROR_CODE_AUTH 907 /* Authentication error */
+#define GF_ERROR_CODE_NEEDAUTH 908 /* Need authenticator */
+#define GF_ERROR_CODE_DOOFUS 909 /* Programming error */
+
+#define GF_ERROR_CODE_NOATTR GF_ERROR_CODE_NODATA /* Attribute not found */
+
+/* Either one of enodata or enoattr will be there in system */
+#ifndef ENOATTR
+#define ENOATTR ENODATA
+#endif /* ENOATTR */
+
+#ifndef ENODATA
+#define ENODATA ENOATTR
+#endif /* ENODATA */
+
+#ifndef EBADFD
+#define EBADFD EBADRPC
+#endif /* EBADFD */
+
+#if !defined(ENODATA)
+/* This happens on FreeBSD. Value borrowed from Linux. */
+#define ENODATA 61
+#endif
+
+/* These functions are defined for all the OS flags, but content will
+ * be different for each OS flag.
+ */
+int32_t
+gf_errno_to_error(int32_t op_errno);
+int32_t
+gf_error_to_errno(int32_t error);
+
+#endif /* __COMPAT_ERRNO_H__ */
diff --git a/libglusterfs/src/glusterfs/compat-uuid.h b/libglusterfs/src/glusterfs/compat-uuid.h
new file mode 100644
index 00000000000..6e7fdefbfab
--- /dev/null
+++ b/libglusterfs/src/glusterfs/compat-uuid.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+#ifndef _GF_UUID_H
+#define _GF_UUID_H
+
+#include <uuid/uuid.h>
+
+static inline void
+gf_uuid_clear(uuid_t uuid)
+{
+ uuid_clear(uuid);
+}
+
+static inline int
+gf_uuid_compare(uuid_t u1, uuid_t u2)
+{
+ return uuid_compare(u1, u2);
+}
+
+static inline void
+gf_uuid_copy(uuid_t dst, const uuid_t src)
+{
+ uuid_copy(dst, src);
+}
+
+static inline void
+gf_uuid_generate(uuid_t uuid)
+{
+ uuid_generate(uuid);
+}
+
+static inline int
+gf_uuid_is_null(uuid_t uuid)
+{
+ return uuid_is_null(uuid);
+}
+
+static inline int
+gf_uuid_parse(const char *in, uuid_t uuid)
+{
+ return uuid_parse(in, uuid);
+}
+
+static inline void
+gf_uuid_unparse(const uuid_t uuid, char *out)
+{
+ uuid_unparse(uuid, out);
+}
+
+/* TODO: add more uuid APIs, use constructs like this:
+#if defined(__NetBSD__) * NetBSD libc *
+
+#include <string.h>
+
+static inline void
+gf_uuid_clear (uuid_t uuid)
+{
+ memset (uuid, 0, sizeof (uuid_t));
+}
+#endif
+*/
+
+#endif /* _GF_UUID_H */
diff --git a/libglusterfs/src/glusterfs/compat.h b/libglusterfs/src/glusterfs/compat.h
new file mode 100644
index 00000000000..bf00d903152
--- /dev/null
+++ b/libglusterfs/src/glusterfs/compat.h
@@ -0,0 +1,544 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __COMPAT_H__
+#define __COMPAT_H__
+
+#include <stdint.h>
+
+#ifndef LLONG_MAX
+#define LLONG_MAX __LONG_LONG_MAX__ /* compat with old gcc */
+#endif /* LLONG_MAX */
+
+#ifdef GF_LINUX_HOST_OS
+
+#define UNIX_PATH_MAX 108
+
+#include <sys/un.h>
+#include <linux/limits.h>
+#include <sys/xattr.h>
+#include <linux/xattr.h>
+#include <endian.h>
+#ifdef HAVE_LINUX_FALLOC_H
+#include <linux/falloc.h>
+#endif
+
+#ifdef HAVE_ENDIAN_H
+#include <endian.h>
+#endif
+
+#ifndef _PATH_UMOUNT
+#define _PATH_UMOUNT "/bin/umount"
+#endif
+#define GF_XATTR_NAME_MAX XATTR_NAME_MAX
+#endif /* GF_LINUX_HOST_OS */
+
+/*
+ * Define the fallocate flags in case we do not have the header. This also
+ * accounts for older systems that do not define FALLOC_FL_PUNCH_HOLE.
+ */
+
+#ifndef FALLOC_FL_KEEP_SIZE
+#define FALLOC_FL_KEEP_SIZE 0x01 /* default is extend size */
+#endif
+#ifndef FALLOC_FL_PUNCH_HOLE
+#define FALLOC_FL_PUNCH_HOLE 0x02 /* de-allocates range */
+#endif
+#ifndef FALLOC_FL_ZERO_RANGE
+#define FALLOC_FL_ZERO_RANGE 0x10 /* zeroes out range */
+#endif
+#ifndef FALLOC_FL_COLLAPSE_RANGE
+#define FALLOC_FL_COLLAPSE_RANGE 0x08 /* reduces the size */
+#endif
+#ifndef FALLOC_FL_INSERT_RANGE
+#define FALLOC_FL_INSERT_RANGE 0x20 /* expands the size */
+#endif
+
+#ifndef HAVE_LLISTXATTR
+
+/* This part is valid only in case of old glibc which doesn't support
+ * 'llistxattr()' system calls.
+ */
+
+#define lremovexattr(path, key) removexattr(path, key)
+#define llistxattr(path, key, size) listxattr(path, key, size)
+#define lgetxattr(path, key, value, size) getxattr(path, key, value, size)
+#define lsetxattr(path, key, value, size, flags) \
+ setxattr(path, key, value, size, flags)
+
+#endif /* HAVE_LLISTXATTR */
+
+#ifdef GF_DARWIN_HOST_OS
+#include <machine/endian.h>
+#include <libkern/OSByteOrder.h>
+#include <sys/xattr.h>
+
+#define htobe16(x) OSSwapHostToBigInt16(x)
+#define htole16(x) OSSwapHostToLittleInt16(x)
+#define be16toh(x) OSSwapBigToHostInt16(x)
+#define le16toh(x) OSSwapLittleToHostInt16(x)
+
+#define htobe32(x) OSSwapHostToBigInt32(x)
+#define htole32(x) OSSwapHostToLittleInt32(x)
+#define be32toh(x) OSSwapBigToHostInt32(x)
+#define le32toh(x) OSSwapLittleToHostInt32(x)
+
+#define htobe64(x) OSSwapHostToBigInt64(x)
+#define htole64(x) OSSwapHostToLittleInt64(x)
+#define be64toh(x) OSSwapBigToHostInt64(x)
+#define le64toh(x) OSSwapLittleToHostInt64(x)
+
+#endif
+
+#ifdef GF_BSD_HOST_OS
+/* In case of FreeBSD and NetBSD */
+
+#define UNIX_PATH_MAX 104
+#include <sys/types.h>
+
+#include <sys/un.h>
+#include <sys/endian.h>
+#include <sys/extattr.h>
+#ifdef HAVE_SYS_XATTR_H
+#include <sys/xattr.h>
+#endif /* HAVE_SYS_XATTR_H */
+#include <limits.h>
+
+#include <libgen.h>
+/*
+ * This is where things like off64_t are defined.
+ * So include it before declaring _OFF64_T_DECLARED.
+ * If the freebsd version has support for off64_t
+ * including stdio.h should be sufficient.
+ */
+#include <stdio.h>
+
+#ifndef _OFF64_T_DECLARED
+/*
+ * Including <stdio.h> (done above) should actually define
+ * _OFF64_T_DECLARED with off64_t data type being available
+ * for consumption. But, off64_t data type is not recognizable
+ * for FreeBSD versions less than 11. Hence, int64_t is typedefed
+ * to off64_t.
+ */
+#define _OFF64_T_DECLARED
+typedef int64_t off64_t;
+#endif /* _OFF64_T_DECLARED */
+
+#ifndef XATTR_CREATE
+enum {
+ ATTR_CREATE = 1,
+#define XATTR_CREATE ATTR_CREATE
+ ATTR_REPLACE = 2
+#define XATTR_REPLACE ATTR_REPLACE
+};
+#endif /* XATTR_CREATE */
+
+#ifndef sighandler_t
+#define sighandler_t sig_t
+#endif
+
+#ifdef __FreeBSD__
+#undef ino_t
+#define ino_t uint64_t
+#include <sys/types.h>
+#include <sys/extattr.h>
+/* Using NAME_MAX since EXTATTR_MAXNAMELEN is inside a preprocessor conditional
+ * for the kernel
+ */
+#define GF_XATTR_NAME_MAX NAME_MAX
+#endif /* __FreeBSD__ */
+
+#ifdef __NetBSD__
+#define GF_XATTR_NAME_MAX XATTR_NAME_MAX
+#endif
+
+#ifndef ino64_t
+#define ino64_t ino_t
+#endif
+
+#ifndef EUCLEAN
+#define EUCLEAN 0
+#endif
+
+#include <netinet/in.h>
+#ifndef s6_addr16
+#define s6_addr16 __u6_addr.__u6_addr16
+#endif
+#ifndef s6_addr32
+#define s6_addr32 __u6_addr.__u6_addr32
+#endif
+
+#ifndef LOGIN_NAME_MAX
+#define LOGIN_NAME_MAX 256
+#endif
+
+/* Posix dictates NAME_MAX to be used */
+#ifndef NAME_MAX
+#ifdef MAXNAMLEN
+#define NAME_MAX MAXNAMLEN
+#else
+#define NAME_MAX 255
+#endif
+#endif
+
+#define F_GETLK64 F_GETLK
+#define F_SETLK64 F_SETLK
+#define F_SETLKW64 F_SETLKW
+#define FALLOC_FL_KEEP_SIZE 0x01 /* default is extend size */
+#define FALLOC_FL_PUNCH_HOLE 0x02 /* de-allocates range */
+#define FALLOC_FL_ZERO_RANGE 0x10 /* zeroes out range */
+#define FALLOC_FL_INSERT_RANGE 0x20 /* Expands the size */
+#define FALLOC_FL_COLLAPSE_RANGE 0x08 /* Reduces the size */
+
+#ifndef _PATH_UMOUNT
+#define _PATH_UMOUNT "/sbin/umount"
+#endif
+
+void
+gf_extattr_list_reshape(char *list, ssize_t size);
+
+#endif /* GF_BSD_HOST_OS */
+
+#ifdef GF_DARWIN_HOST_OS
+#include <machine/endian.h>
+#include <libkern/OSByteOrder.h>
+
+#define htobe16(x) OSSwapHostToBigInt16(x)
+#define htole16(x) OSSwapHostToLittleInt16(x)
+#define be16toh(x) OSSwapBigToHostInt16(x)
+#define le16toh(x) OSSwapLittleToHostInt16(x)
+
+#define htobe32(x) OSSwapHostToBigInt32(x)
+#define htole32(x) OSSwapHostToLittleInt32(x)
+#define be32toh(x) OSSwapBigToHostInt32(x)
+#define le32toh(x) OSSwapLittleToHostInt32(x)
+
+#define htobe64(x) OSSwapHostToBigInt64(x)
+#define htole64(x) OSSwapHostToLittleInt64(x)
+#define be64toh(x) OSSwapBigToHostInt64(x)
+#define le64toh(x) OSSwapLittleToHostInt64(x)
+
+#define UNIX_PATH_MAX 104
+/* OSX Yosemite now has this defined */
+#ifndef AT_SYMLINK_NOFOLLOW
+#define AT_SYMLINK_NOFOLLOW 0x100
+#endif
+#include <sys/types.h>
+
+#include <sys/un.h>
+#include <sys/xattr.h>
+#include <limits.h>
+
+#include <libgen.h>
+
+#if __DARWIN_64_BIT_INO_T == 0
+#error '64 bit ino_t is must for GlusterFS to work, Compile with "CFLAGS=-D__DARWIN_64_BIT_INO_T"'
+#endif /* __DARWIN_64_BIT_INO_T */
+
+#if __DARWIN_64_BIT_INO_T == 0
+#error '64 bit ino_t is must for GlusterFS to work, Compile with "CFLAGS=-D__DARWIN_64_BIT_INO_T"'
+#endif /* __DARWIN_64_BIT_INO_T */
+
+#ifndef sighandler_t
+#define sighandler_t sig_t
+#endif
+
+#ifndef EUCLEAN
+#define EUCLEAN 0
+#endif
+
+#include <netinet/in.h>
+#ifndef s6_addr16
+#define s6_addr16 __u6_addr.__u6_addr16
+#endif
+#ifndef s6_addr32
+#define s6_addr32 __u6_addr.__u6_addr32
+#endif
+
+/* Posix dictates NAME_MAX to be used */
+#ifndef NAME_MAX
+#ifdef MAXNAMLEN
+#define NAME_MAX MAXNAMLEN
+#else
+#define NAME_MAX 255
+#endif
+#endif
+
+#define F_GETLK64 F_GETLK
+#define F_SETLK64 F_SETLK
+#define F_SETLKW64 F_SETLKW
+
+#ifndef FTW_CONTINUE
+#define FTW_CONTINUE 0
+#endif
+
+#ifndef _PATH_UMOUNT
+#define _PATH_UMOUNT "/sbin/umount"
+#endif
+#endif /* GF_DARWIN_HOST_OS */
+
+#ifdef GF_SOLARIS_HOST_OS
+
+#define UNIX_PATH_MAX 108
+#define EUCLEAN 117
+
+#include <sys/un.h>
+#include <limits.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <sys/fcntl.h>
+#include <libgen.h>
+#include <sys/mkdev.h>
+
+#ifndef lchmod
+#define lchmod chmod
+#endif
+
+#define lgetxattr(path, key, value, size) \
+ solaris_getxattr(path, key, value, size)
+enum {
+ ATTR_CREATE = 1,
+#define XATTR_CREATE ATTR_CREATE
+ ATTR_REPLACE = 2
+#define XATTR_REPLACE ATTR_REPLACE
+};
+
+/* This patch is not present in Solaris 10 and before */
+#ifndef dirfd
+#define dirfd(dirp) ((dirp)->dd_fd)
+#endif
+
+/* Posix dictates NAME_MAX to be used */
+#ifndef NAME_MAX
+#ifdef MAXNAMLEN
+#define NAME_MAX MAXNAMLEN
+#else
+#define NAME_MAX 255
+#endif
+#endif
+
+#include <netinet/in.h>
+#ifndef s6_addr16
+#define S6_ADDR16(x) ((uint16_t *)((char *)&(x).s6_addr))
+#endif
+#ifndef s6_addr32
+#define s6_addr32 _S6_un._S6_u32
+#endif
+
+#define lutimes(filename, times) utimes(filename, times)
+
+#ifndef SEEK_SET
+#define SEEK_SET 0
+#endif
+
+enum {
+ DT_UNKNOWN = 0,
+#define DT_UNKNOWN DT_UNKNOWN
+ DT_FIFO = 1,
+#define DT_FIFO DT_FIFO
+ DT_CHR = 2,
+#define DT_CHR DT_CHR
+ DT_DIR = 4,
+#define DT_DIR DT_DIR
+ DT_BLK = 6,
+#define DT_BLK DT_BLK
+ DT_REG = 8,
+#define DT_REG DT_REG
+ DT_LNK = 10,
+#define DT_LNK DT_LNK
+ DT_SOCK = 12,
+#define DT_SOCK DT_SOCK
+ DT_WHT = 14
+#define DT_WHT DT_WHT
+};
+
+#ifndef _PATH_MOUNTED
+#define _PATH_MOUNTED "/etc/mtab"
+#endif
+#ifndef _PATH_UMOUNT
+#define _PATH_UMOUNT "/sbin/umount"
+#endif
+
+#ifndef O_ASYNC
+#ifdef FASYNC
+#define O_ASYNC FASYNC
+#else
+#define O_ASYNC 0
+#endif
+#endif
+
+#ifndef FTW_CONTINUE
+#define FTW_CONTINUE 0
+#endif
+
+int
+asprintf(char **string_ptr, const char *format, ...);
+
+int
+vasprintf(char **result, const char *format, va_list args);
+char *
+strsep(char **str, const char *delims);
+int
+solaris_listxattr(const char *path, char *list, size_t size);
+int
+solaris_removexattr(const char *path, const char *key);
+int
+solaris_getxattr(const char *path, const char *key, char *value, size_t size);
+int
+solaris_setxattr(const char *path, const char *key, const char *value,
+ size_t size, int flags);
+int
+solaris_fgetxattr(int fd, const char *key, char *value, size_t size);
+int
+solaris_fsetxattr(int fd, const char *key, const char *value, size_t size,
+ int flags);
+int
+solaris_flistxattr(int fd, char *list, size_t size);
+
+int
+solaris_rename(const char *oldpath, const char *newpath);
+
+int
+solaris_unlink(const char *pathname);
+
+char *
+mkdtemp(char *temp);
+
+#define GF_SOLARIS_XATTR_DIR ".glusterfs_xattr_inode"
+
+int
+solaris_xattr_resolve_path(const char *real_path, char **path);
+
+#endif /* GF_SOLARIS_HOST_OS */
+
+#ifndef HAVE_ARGP
+#include "argp.h"
+#else
+#include <argp.h>
+#endif /* HAVE_ARGP */
+
+#ifndef HAVE_STRNLEN
+size_t
+strnlen(const char *string, size_t maxlen);
+#endif /* STRNLEN */
+
+#ifndef strdupa
+#define strdupa(s) \
+ (__extension__({ \
+ __const char *__old = (s); \
+ size_t __len = strlen(__old) + 1; \
+ char *__new = (char *)__builtin_alloca(__len); \
+ (char *)memcpy(__new, __old, __len); \
+ }))
+#endif
+
+#define GF_DIR_ALIGN(x) (((x) + sizeof(uint64_t) - 1) & ~(sizeof(uint64_t) - 1))
+
+#include <sys/types.h>
+#include <dirent.h>
+
+static inline int32_t
+dirent_size(struct dirent *entry)
+{
+ int32_t size = -1;
+
+#ifdef GF_BSD_HOST_OS
+ size = GF_DIR_ALIGN(24 /* FIX MEEEE!!! */ + entry->d_namlen);
+#endif
+#ifdef GF_DARWIN_HOST_OS
+ size = GF_DIR_ALIGN(24 /* FIX MEEEE!!! */ + entry->d_namlen);
+#endif
+#ifdef GF_LINUX_HOST_OS
+ size = GF_DIR_ALIGN(24 /* FIX MEEEE!!! */ + entry->d_reclen);
+#endif
+#ifdef GF_SOLARIS_HOST_OS
+ size = GF_DIR_ALIGN(24 /* FIX MEEEE!!! */ + entry->d_reclen);
+#endif
+ return size;
+}
+
+#ifdef THREAD_UNSAFE_BASENAME
+char *
+basename_r(const char *);
+#define basename(path) basename_r(path)
+#endif /* THREAD_UNSAFE_BASENAME */
+
+#ifdef THREAD_UNSAFE_DIRNAME
+char *
+dirname_r(char *path);
+#define dirname(path) dirname_r(path)
+#endif /* THREAD_UNSAFE_DIRNAME */
+
+#ifdef HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC
+/* Linux, Solaris, Cygwin */
+#define ST_ATIM_SEC(stbuf) ((stbuf)->st_atim.tv_sec)
+#define ST_CTIM_SEC(stbuf) ((stbuf)->st_ctim.tv_sec)
+#define ST_MTIM_SEC(stbuf) ((stbuf)->st_mtim.tv_sec)
+#define ST_ATIM_SEC_SET(stbuf, val) ((stbuf)->st_atim.tv_sec = (val))
+#define ST_MTIM_SEC_SET(stbuf, val) ((stbuf)->st_mtim.tv_sec = (val))
+#define ST_CTIM_SEC_SET(stbuf, val) ((stbuf)->st_ctim.tv_sec = (val))
+#define ST_ATIM_NSEC(stbuf) ((stbuf)->st_atim.tv_nsec)
+#define ST_CTIM_NSEC(stbuf) ((stbuf)->st_ctim.tv_nsec)
+#define ST_MTIM_NSEC(stbuf) ((stbuf)->st_mtim.tv_nsec)
+#define ST_ATIM_NSEC_SET(stbuf, val) ((stbuf)->st_atim.tv_nsec = (val))
+#define ST_MTIM_NSEC_SET(stbuf, val) ((stbuf)->st_mtim.tv_nsec = (val))
+#define ST_CTIM_NSEC_SET(stbuf, val) ((stbuf)->st_ctim.tv_nsec = (val))
+#elif defined(HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC)
+/* FreeBSD, NetBSD */
+#define ST_ATIM_SEC(stbuf) ((stbuf)->st_atimespec.tv_sec)
+#define ST_CTIM_SEC(stbuf) ((stbuf)->st_ctimespec.tv_sec)
+#define ST_MTIM_SEC(stbuf) ((stbuf)->st_mtimespec.tv_sec)
+#define ST_ATIM_SEC_SET(stbuf, val) ((stbuf)->st_atimespec.tv_sec = (val))
+#define ST_MTIM_SEC_SET(stbuf, val) ((stbuf)->st_mtimespec.tv_sec = (val))
+#define ST_CTIM_SEC_SET(stbuf, val) ((stbuf)->st_ctimespec.tv_sec = (val))
+#define ST_ATIM_NSEC(stbuf) ((stbuf)->st_atimespec.tv_nsec)
+#define ST_CTIM_NSEC(stbuf) ((stbuf)->st_ctimespec.tv_nsec)
+#define ST_MTIM_NSEC(stbuf) ((stbuf)->st_mtimespec.tv_nsec)
+#define ST_ATIM_NSEC_SET(stbuf, val) ((stbuf)->st_atimespec.tv_nsec = (val))
+#define ST_MTIM_NSEC_SET(stbuf, val) ((stbuf)->st_mtimespec.tv_nsec = (val))
+#define ST_CTIM_NSEC_SET(stbuf, val) ((stbuf)->st_ctimespec.tv_nsec = (val))
+#else
+#define ST_ATIM_NSEC(stbuf) (0)
+#define ST_CTIM_NSEC(stbuf) (0)
+#define ST_MTIM_NSEC(stbuf) (0)
+#define ST_ATIM_NSEC_SET(stbuf, val) \
+ do { \
+ } while (0);
+#define ST_MTIM_NSEC_SET(stbuf, val) \
+ do { \
+ } while (0);
+#define ST_CTIM_NSEC_SET(stbuf, val) \
+ do { \
+ } while (0);
+#endif
+
+#ifdef GF_BSD_HOST_OS
+#define CLOCK_REALTIME_COARSE CLOCK_REALTIME
+#endif
+
+#if defined(__GNUC__) && !defined(RELAX_POISONING)
+/* Use run API, see run.h */
+#include <stdlib.h> /* system(), mkostemp() */
+#include <stdio.h> /* popen() */
+#ifdef GF_LINUX_HOST_OS
+#include <sys/sysmacros.h>
+#endif
+#pragma GCC poison system mkostemp popen
+#endif
+
+int
+gf_umount_lazy(char *xlname, char *path, int rmdir);
+
+#ifndef GF_XATTR_NAME_MAX
+#error 'Please define GF_XATTR_NAME_MAX for your OS distribution.'
+#endif
+
+#endif /* __COMPAT_H__ */
diff --git a/libglusterfs/src/daemon.h b/libglusterfs/src/glusterfs/daemon.h
index 80836a326d5..48850800b5e 100644
--- a/libglusterfs/src/daemon.h
+++ b/libglusterfs/src/glusterfs/daemon.h
@@ -11,13 +11,10 @@
#ifndef _DAEMON_H
#define _DAEMON_H
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#define DEVNULLPATH "/dev/null"
-int os_daemon_return(int nochdir, int noclose);
-int os_daemon(int nochdir, int noclose);
+int
+os_daemon_return(int nochdir, int noclose);
+int
+os_daemon(int nochdir, int noclose);
#endif /*_DAEMON_H */
diff --git a/libglusterfs/src/glusterfs/default-args.h b/libglusterfs/src/glusterfs/default-args.h
new file mode 100644
index 00000000000..ca7526fcab6
--- /dev/null
+++ b/libglusterfs/src/glusterfs/default-args.h
@@ -0,0 +1,455 @@
+/*
+ Copyright (c) 2008-2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/* libglusterfs/src/defaults.h:
+ This file contains definition of default fops and mops functions.
+*/
+
+#ifndef _DEFAULT_ARGS_H
+#define _DEFAULT_ARGS_H
+
+#include "glusterfs/xlator.h"
+
+int
+args_lookup_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ dict_t *xdata, struct iatt *postparent);
+
+int
+args_stat_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ struct iatt *buf, dict_t *xdata);
+
+int
+args_fstat_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ struct iatt *buf, dict_t *xdata);
+
+int
+args_truncate_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
+
+int
+args_ftruncate_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
+
+int
+args_access_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+int
+args_readlink_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, const char *path, struct iatt *stbuf,
+ dict_t *xdata);
+
+int
+args_mknod_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+int
+args_mkdir_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+int
+args_unlink_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+int
+args_rmdir_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata);
+
+int
+args_symlink_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata);
+
+int
+args_rename_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata);
+
+int
+args_link_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+int
+args_create_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+int
+args_open_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ fd_t *fd, dict_t *xdata);
+
+int
+args_readv_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ struct iovec *vector, int32_t count, struct iatt *stbuf,
+ struct iobref *iobref, dict_t *xdata);
+
+int
+args_writev_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
+
+int
+args_put_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+int
+args_flush_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ dict_t *xdata);
+
+int
+args_fsync_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata);
+
+int
+args_opendir_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, fd_t *fd, dict_t *xdata);
+
+int
+args_fsyncdir_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+int
+args_statfs_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, struct statvfs *buf, dict_t *xdata);
+
+int
+args_setxattr_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+int
+args_getxattr_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, dict_t *dict, dict_t *xdata);
+
+int
+args_fsetxattr_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+int
+args_fgetxattr_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, dict_t *dict, dict_t *xdata);
+
+int
+args_removexattr_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+int
+args_fremovexattr_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+int
+args_lk_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ struct gf_flock *lock, dict_t *xdata);
+
+int
+args_inodelk_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+int
+args_finodelk_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+int
+args_entrylk_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+int
+args_fentrylk_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+int
+args_readdirp_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, gf_dirent_t *entries, dict_t *xdata);
+
+int
+args_readdir_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, gf_dirent_t *entries, dict_t *xdata);
+
+int
+args_rchecksum_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, uint32_t weak_checksum,
+ uint8_t *strong_checksum, dict_t *xdata);
+
+int
+args_xattrop_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, dict_t *xattr, dict_t *xdata);
+
+int
+args_fxattrop_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, dict_t *xattr, dict_t *xdata);
+
+int
+args_setattr_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata);
+
+int
+args_fsetattr_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata);
+
+int
+args_fallocate_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata);
+
+int
+args_discard_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata);
+
+int
+args_zerofill_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata);
+
+int
+args_ipc_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ dict_t *xdata);
+
+int
+args_seek_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ off_t offset, dict_t *xdata);
+
+void
+args_lease_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ struct gf_lease *lease, dict_t *xdata);
+
+int
+args_copy_file_range_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, struct iatt *stbuf,
+ struct iatt *prebuf_dst,
+ struct iatt *postbuf_dst, dict_t *xdata);
+
+void
+args_cbk_wipe(default_args_cbk_t *args_cbk);
+
+void
+args_wipe(default_args_t *args);
+
+int
+args_lookup_store(default_args_t *args, loc_t *loc, dict_t *xdata);
+
+int
+args_stat_store(default_args_t *args, loc_t *loc, dict_t *xdata);
+
+int
+args_fstat_store(default_args_t *args, fd_t *fd, dict_t *xdata);
+
+int
+args_truncate_store(default_args_t *args, loc_t *loc, off_t off, dict_t *xdata);
+int
+args_ftruncate_store(default_args_t *args, fd_t *fd, off_t off, dict_t *xdata);
+
+int
+args_access_store(default_args_t *args, loc_t *loc, int32_t mask,
+ dict_t *xdata);
+
+int
+args_readlink_store(default_args_t *args, loc_t *loc, size_t size,
+ dict_t *xdata);
+
+int
+args_mknod_store(default_args_t *args, loc_t *loc, mode_t mode, dev_t rdev,
+ mode_t umask, dict_t *xdata);
+
+int
+args_mkdir_store(default_args_t *args, loc_t *loc, mode_t mode, mode_t umask,
+ dict_t *xdata);
+
+int
+args_unlink_store(default_args_t *args, loc_t *loc, int xflag, dict_t *xdata);
+
+int
+args_rmdir_store(default_args_t *args, loc_t *loc, int flags, dict_t *xdata);
+
+int
+args_symlink_store(default_args_t *args, const char *linkname, loc_t *loc,
+ mode_t umask, dict_t *xdata);
+
+int
+args_rename_store(default_args_t *args, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata);
+
+int
+args_link_store(default_args_t *args, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata);
+
+int
+args_create_store(default_args_t *args, loc_t *loc, int32_t flags, mode_t mode,
+ mode_t umask, fd_t *fd, dict_t *xdata);
+
+int
+args_open_store(default_args_t *args, loc_t *loc, int32_t flags, fd_t *fd,
+ dict_t *xdata);
+
+int
+args_readv_store(default_args_t *args, fd_t *fd, size_t size, off_t off,
+ uint32_t flags, dict_t *xdata);
+
+int
+args_writev_store(default_args_t *args, fd_t *fd, struct iovec *vector,
+ int32_t count, off_t off, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata);
+
+int
+args_put_store(default_args_t *args, loc_t *loc, mode_t mode, mode_t umask,
+ uint32_t flags, struct iovec *vector, int32_t count, off_t off,
+ struct iobref *iobref, dict_t *xattr, dict_t *xdata);
+
+int
+args_flush_store(default_args_t *args, fd_t *fd, dict_t *xdata);
+
+int
+args_fsync_store(default_args_t *args, fd_t *fd, int32_t datasync,
+ dict_t *xdata);
+
+int
+args_opendir_store(default_args_t *args, loc_t *loc, fd_t *fd, dict_t *xdata);
+
+int
+args_fsyncdir_store(default_args_t *args, fd_t *fd, int32_t datasync,
+ dict_t *xdata);
+
+int
+args_statfs_store(default_args_t *args, loc_t *loc, dict_t *xdata);
+
+int
+args_setxattr_store(default_args_t *args, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata);
+
+int
+args_getxattr_store(default_args_t *args, loc_t *loc, const char *name,
+ dict_t *xdata);
+
+int
+args_fsetxattr_store(default_args_t *args, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata);
+
+int
+args_fgetxattr_store(default_args_t *args, fd_t *fd, const char *name,
+ dict_t *xdata);
+
+int
+args_removexattr_store(default_args_t *args, loc_t *loc, const char *name,
+ dict_t *xdata);
+
+int
+args_fremovexattr_store(default_args_t *args, fd_t *fd, const char *name,
+ dict_t *xdata);
+
+int
+args_lk_store(default_args_t *args, fd_t *fd, int32_t cmd,
+ struct gf_flock *lock, dict_t *xdata);
+
+int
+args_inodelk_store(default_args_t *args, const char *volume, loc_t *loc,
+ int32_t cmd, struct gf_flock *lock, dict_t *xdata);
+
+int
+args_finodelk_store(default_args_t *args, const char *volume, fd_t *fd,
+ int32_t cmd, struct gf_flock *lock, dict_t *xdata);
+
+int
+args_entrylk_store(default_args_t *args, const char *volume, loc_t *loc,
+ const char *name, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata);
+
+int
+args_fentrylk_store(default_args_t *args, const char *volume, fd_t *fd,
+ const char *name, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata);
+int
+args_readdirp_store(default_args_t *args, fd_t *fd, size_t size, off_t off,
+ dict_t *xdata);
+
+int
+args_readdir_store(default_args_t *args, fd_t *fd, size_t size, off_t off,
+ dict_t *xdata);
+
+int
+args_rchecksum_store(default_args_t *args, fd_t *fd, off_t offset, int32_t len,
+ dict_t *xdata);
+
+int
+args_xattrop_store(default_args_t *args, loc_t *loc, gf_xattrop_flags_t optype,
+ dict_t *xattr, dict_t *xdata);
+
+int
+args_fxattrop_store(default_args_t *args, fd_t *fd, gf_xattrop_flags_t optype,
+ dict_t *xattr, dict_t *xdata);
+
+int
+args_setattr_store(default_args_t *args, loc_t *loc, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata);
+
+int
+args_fsetattr_store(default_args_t *args, fd_t *fd, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata);
+
+int
+args_fallocate_store(default_args_t *args, fd_t *fd, int32_t mode, off_t offset,
+ size_t len, dict_t *xdata);
+
+int
+args_discard_store(default_args_t *args, fd_t *fd, off_t offset, size_t len,
+ dict_t *xdata);
+
+int
+args_zerofill_store(default_args_t *args, fd_t *fd, off_t offset, off_t len,
+ dict_t *xdata);
+
+int
+args_ipc_store(default_args_t *args, int32_t op, dict_t *xdata);
+
+int
+args_seek_store(default_args_t *args, fd_t *fd, off_t offset,
+ gf_seek_what_t what, dict_t *xdata);
+
+void
+args_lease_store(default_args_t *args, loc_t *loc, struct gf_lease *lease,
+ dict_t *xdata);
+
+int
+args_getactivelk_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, lock_migration_info_t *locklist,
+ dict_t *xdata);
+
+int
+args_setactivelk_store(default_args_t *args, loc_t *loc,
+ lock_migration_info_t *locklist, dict_t *xdata);
+
+int
+args_icreate_store(default_args_t *args, loc_t *loc, mode_t mode,
+ dict_t *xdata);
+
+int
+args_namelink_store(default_args_t *args, loc_t *loc, dict_t *xdata);
+
+int
+args_copy_file_range_store(default_args_t *args, fd_t *fd_in, off64_t off_in,
+ fd_t *fd_out, off_t off64_out, size_t len,
+ uint32_t flags, dict_t *xdata);
+
+void
+args_cbk_init(default_args_cbk_t *args_cbk);
+#endif /* _DEFAULT_ARGS_H */
diff --git a/libglusterfs/src/glusterfs/defaults.h b/libglusterfs/src/glusterfs/defaults.h
new file mode 100644
index 00000000000..5a818eeb91a
--- /dev/null
+++ b/libglusterfs/src/glusterfs/defaults.h
@@ -0,0 +1,1275 @@
+/*
+ Copyright (c) 2008-2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/* libglusterfs/src/defaults.h:
+ This file contains definition of default fops and mops functions.
+*/
+
+#ifndef _DEFAULTS_H
+#define _DEFAULTS_H
+
+#include "glusterfs/xlator.h"
+
+typedef struct {
+ int op_ret;
+ int op_errno;
+ inode_t *inode;
+ struct iatt stat;
+ struct iatt prestat;
+ struct iatt poststat;
+ struct iatt preparent; /* @preoldparent in rename_cbk */
+ struct iatt postparent; /* @postoldparent in rename_cbk */
+ struct iatt preparent2; /* @prenewparent in rename_cbk */
+ struct iatt postparent2; /* @postnewparent in rename_cbk */
+ const char *buf;
+ struct iovec *vector;
+ int count;
+ struct iobref *iobref;
+ fd_t *fd;
+ struct statvfs statvfs;
+ dict_t *xattr;
+ struct gf_flock lock;
+ uint32_t weak_checksum;
+ uint8_t *strong_checksum;
+ dict_t *xdata;
+ gf_dirent_t entries;
+ off_t offset; /* seek hole/data */
+ int valid; /* If the response is valid or not. For call-stub it is
+ always valid irrespective of this */
+ struct gf_lease lease;
+ lock_migration_info_t locklist;
+} default_args_cbk_t;
+
+typedef struct {
+ loc_t loc; /* @old in rename(), link() */
+ loc_t loc2; /* @new in rename(), link() */
+ fd_t *fd; /* for all the fd based ops */
+ fd_t *fd_dst; /* Only for copy_file_range destination */
+ off_t offset;
+ /*
+ * According to the man page of copy_file_range,
+ * the offsets for source and destination file
+ * are of type loff_t. But the type loff_t is
+ * linux specific and is actual a typedef of
+ * off64_t.
+ */
+ off64_t off_in; /* For copy_file_range source fd */
+ off64_t off_out; /* For copy_file_range destination fd only */
+ int mask;
+ size_t size;
+ mode_t mode;
+ dev_t rdev;
+ mode_t umask;
+ int xflag;
+ int flags;
+ const char *linkname;
+ struct iovec *vector;
+ int count;
+ struct iobref *iobref;
+ int datasync;
+ dict_t *xattr;
+ const char *name;
+ int cmd;
+ struct gf_flock lock;
+ const char *volume;
+ entrylk_cmd entrylkcmd;
+ entrylk_type entrylktype;
+ gf_xattrop_flags_t optype;
+ int valid;
+ struct iatt stat;
+ gf_seek_what_t what;
+ dict_t *xdata;
+ struct gf_lease lease;
+ lock_migration_info_t locklist;
+} default_args_t;
+
+typedef struct {
+ int fop_enum;
+ unsigned int fop_length;
+ int *enum_list;
+ default_args_t *req_list;
+ dict_t *xdata;
+} compound_args_t;
+
+typedef struct {
+ int fop_enum;
+ unsigned int fop_length;
+ int *enum_list;
+ default_args_cbk_t *rsp_list;
+ dict_t *xdata;
+} compound_args_cbk_t;
+
+int32_t
+default_notify(xlator_t *this, int32_t event, void *data, ...);
+
+int32_t
+default_forget(xlator_t *this, inode_t *inode);
+
+int32_t
+default_release(xlator_t *this, fd_t *fd);
+
+int32_t
+default_releasedir(xlator_t *this, fd_t *fd);
+
+extern struct xlator_fops *default_fops;
+
+/* Management Operations */
+
+int32_t
+default_getspec(call_frame_t *frame, xlator_t *this, const char *key,
+ int32_t flag);
+
+int32_t
+default_rchecksum(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ int32_t len, dict_t *xdata);
+
+/* FileSystem operations */
+int32_t
+default_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata);
+
+int32_t
+default_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata);
+
+int32_t
+default_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata);
+
+int32_t
+default_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata);
+
+int32_t
+default_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata);
+
+int32_t
+default_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
+ dict_t *xdata);
+
+int32_t
+default_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata);
+
+int32_t
+default_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata);
+
+int32_t
+default_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata);
+
+int32_t
+default_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata);
+
+int32_t
+default_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata);
+
+int32_t
+default_symlink(call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata);
+
+int32_t
+default_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata);
+
+int32_t
+default_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata);
+
+int32_t
+default_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata);
+
+int32_t
+default_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata);
+
+int32_t
+default_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata);
+
+int32_t
+default_writev(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t offset,
+ uint32_t flags, struct iobref *iobref, dict_t *xdata);
+
+int32_t
+default_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata);
+
+int32_t
+default_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata);
+
+int32_t
+default_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata);
+
+int32_t
+default_fsyncdir(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t datasync, dict_t *xdata);
+
+int32_t
+default_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata);
+
+int32_t
+default_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata);
+
+int32_t
+default_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata);
+
+int32_t
+default_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata);
+
+int32_t
+default_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata);
+
+int32_t
+default_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata);
+
+int32_t
+default_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata);
+
+int32_t
+default_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata);
+
+int32_t
+default_inodelk(call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, int32_t cmd, struct gf_flock *flock, dict_t *xdata);
+
+int32_t
+default_finodelk(call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata);
+
+int32_t
+default_entrylk(call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata);
+
+int32_t
+default_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata);
+
+int32_t
+default_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata);
+
+int32_t
+default_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata);
+
+int32_t
+default_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata);
+
+int32_t
+default_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata);
+
+int32_t
+default_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata);
+
+int32_t
+default_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata);
+
+int32_t
+default_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t keep_size, off_t offset, size_t len, dict_t *xdata);
+
+int32_t
+default_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata);
+
+int32_t
+default_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata);
+
+int32_t
+default_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata);
+
+int32_t
+default_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ gf_seek_what_t what, dict_t *xdata);
+
+int32_t
+default_lease(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct gf_lease *lease, dict_t *xdata);
+
+int32_t
+default_getactivelk(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata);
+
+int32_t
+default_setactivelk(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ lock_migration_info_t *locklist, dict_t *xdata);
+
+int32_t
+default_put(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, uint32_t flags, struct iovec *vector, int32_t count,
+ off_t off, struct iobref *iobref, dict_t *xattr, dict_t *xdata);
+
+int32_t
+default_icreate(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dict_t *xdata);
+
+int32_t
+default_namelink(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata);
+
+int32_t
+default_copy_file_range(call_frame_t *frame, xlator_t *this, fd_t *fd_in,
+ off64_t off_in, fd_t *fd_out, off64_t off_out,
+ size_t len, uint32_t flags, dict_t *xdata);
+
+/* Resume */
+int32_t
+default_getspec_resume(call_frame_t *frame, xlator_t *this, const char *key,
+ int32_t flag);
+
+int32_t
+default_rchecksum_resume(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, int32_t len, dict_t *xdata);
+
+/* FileSystem operations */
+int32_t
+default_lookup_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata);
+
+int32_t
+default_stat_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata);
+
+int32_t
+default_fstat_resume(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *xdata);
+
+int32_t
+default_truncate_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ off_t offset, dict_t *xdata);
+
+int32_t
+default_ftruncate_resume(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, dict_t *xdata);
+
+int32_t
+default_access_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t mask, dict_t *xdata);
+
+int32_t
+default_readlink_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ size_t size, dict_t *xdata);
+
+int32_t
+default_mknod_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata);
+
+int32_t
+default_mkdir_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ mode_t mode, mode_t umask, dict_t *xdata);
+
+int32_t
+default_unlink_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int xflag, dict_t *xdata);
+
+int32_t
+default_rmdir_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata);
+
+int32_t
+default_symlink_resume(call_frame_t *frame, xlator_t *this,
+ const char *linkpath, loc_t *loc, mode_t umask,
+ dict_t *xdata);
+
+int32_t
+default_rename_resume(call_frame_t *frame, xlator_t *this, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata);
+
+int32_t
+default_link_resume(call_frame_t *frame, xlator_t *this, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata);
+
+int32_t
+default_create_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t flags, mode_t mode, mode_t umask, fd_t *fd,
+ dict_t *xdata);
+
+int32_t
+default_open_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t flags, fd_t *fd, dict_t *xdata);
+
+int32_t
+default_readv_resume(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata);
+
+int32_t
+default_writev_resume(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t offset,
+ uint32_t flags, struct iobref *iobref, dict_t *xdata);
+
+int32_t
+default_flush_resume(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *xdata);
+
+int32_t
+default_fsync_resume(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t datasync, dict_t *xdata);
+
+int32_t
+default_opendir_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ fd_t *fd, dict_t *xdata);
+
+int32_t
+default_fsyncdir_resume(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t datasync, dict_t *xdata);
+
+int32_t
+default_statfs_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata);
+
+int32_t
+default_setxattr_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *dict, int32_t flags, dict_t *xdata);
+
+int32_t
+default_getxattr_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata);
+
+int32_t
+default_fsetxattr_resume(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *dict, int32_t flags, dict_t *xdata);
+
+int32_t
+default_fgetxattr_resume(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata);
+
+int32_t
+default_removexattr_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata);
+
+int32_t
+default_fremovexattr_resume(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata);
+
+int32_t
+default_lk_resume(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata);
+
+int32_t
+default_inodelk_resume(call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, int32_t cmd, struct gf_flock *flock,
+ dict_t *xdata);
+
+int32_t
+default_finodelk_resume(call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, int32_t cmd, struct gf_flock *flock,
+ dict_t *xdata);
+
+int32_t
+default_entrylk_resume(call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata);
+
+int32_t
+default_fentrylk_resume(call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata);
+
+int32_t
+default_readdir_resume(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t off, dict_t *xdata);
+
+int32_t
+default_readdirp_resume(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t off, dict_t *xdata);
+
+int32_t
+default_xattrop_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata);
+
+int32_t
+default_fxattrop_resume(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata);
+int32_t
+default_rchecksum_resume(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, int32_t len, dict_t *xdata);
+
+int32_t
+default_setattr_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata);
+
+int32_t
+default_fsetattr_resume(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata);
+
+int32_t
+default_fallocate_resume(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t keep_size, off_t offset, size_t len,
+ dict_t *xdata);
+
+int32_t
+default_discard_resume(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, size_t len, dict_t *xdata);
+
+int32_t
+default_zerofill_resume(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, off_t len, dict_t *xdata);
+
+int32_t
+default_ipc_resume(call_frame_t *frame, xlator_t *this, int32_t op,
+ dict_t *xdata);
+
+int32_t
+default_seek_resume(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ gf_seek_what_t what, dict_t *xdata);
+
+int32_t
+default_lease_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct gf_lease *lease, dict_t *xdata);
+
+int32_t
+default_getactivelk_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata);
+
+int32_t
+default_setactivelk_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ lock_migration_info_t *locklist, dict_t *xdata);
+
+int32_t
+default_put_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, uint32_t flags, struct iovec *vector,
+ int32_t count, off_t off, struct iobref *iobref,
+ dict_t *xattr, dict_t *xdata);
+
+int32_t
+default_copy_file_range_resume(call_frame_t *frame, xlator_t *this, fd_t *fd_in,
+ off_t off64_in, fd_t *fd_out, off64_t off_out,
+ size_t len, uint32_t flags, dict_t *xdata);
+
+/* _cbk_resume */
+
+int32_t
+default_lookup_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent);
+
+int32_t
+default_stat_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata);
+
+int32_t
+default_truncate_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata);
+
+int32_t
+default_ftruncate_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata);
+
+int32_t
+default_access_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int32_t
+default_readlink_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, const char *path,
+ struct iatt *buf, dict_t *xdata);
+
+int32_t
+default_mknod_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+int32_t
+default_mkdir_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+int32_t
+default_unlink_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata);
+
+int32_t
+default_rmdir_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata);
+
+int32_t
+default_symlink_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+int32_t
+default_rename_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata);
+
+int32_t
+default_link_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+int32_t
+default_create_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata);
+
+int32_t
+default_open_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ dict_t *xdata);
+
+int32_t
+default_readv_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iovec *vector,
+ int32_t count, struct iatt *stbuf,
+ struct iobref *iobref, dict_t *xdata);
+
+int32_t
+default_writev_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
+
+int32_t
+default_flush_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int32_t
+default_fsync_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
+
+int32_t
+default_fstat_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata);
+
+int32_t
+default_opendir_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ dict_t *xdata);
+
+int32_t
+default_fsyncdir_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int32_t
+default_statfs_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf,
+ dict_t *xdata);
+
+int32_t
+default_setxattr_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int32_t
+default_fsetxattr_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int32_t
+default_fgetxattr_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata);
+
+int32_t
+default_getxattr_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata);
+
+int32_t
+default_xattrop_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata);
+
+int32_t
+default_fxattrop_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata);
+
+int32_t
+default_removexattr_cbk_resume(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xdata);
+
+int32_t
+default_fremovexattr_cbk_resume(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+int32_t
+default_lk_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata);
+
+int32_t
+default_inodelk_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int32_t
+default_finodelk_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int32_t
+default_entrylk_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int32_t
+default_fentrylk_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int32_t
+default_rchecksum_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ uint32_t weak_checksum, uint8_t *strong_checksum,
+ dict_t *xdata);
+
+int32_t
+default_readdir_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ gf_dirent_t *entries, dict_t *xdata);
+
+int32_t
+default_readdirp_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ gf_dirent_t *entries, dict_t *xdata);
+
+int32_t
+default_setattr_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *statpre, struct iatt *statpost,
+ dict_t *xdata);
+
+int32_t
+default_fsetattr_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *statpre, struct iatt *statpost,
+ dict_t *xdata);
+
+int32_t
+default_fallocate_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata);
+
+int32_t
+default_discard_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata);
+
+int32_t
+default_zerofill_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata);
+int32_t
+default_ipc_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int32_t
+default_seek_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, off_t offset,
+ dict_t *xdata);
+
+int32_t
+default_getspec_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, char *spec_data);
+
+int32_t
+default_lease_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct gf_lease *lease, dict_t *xdata);
+
+int32_t
+default_getactivelk_cbk_resume(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ lock_migration_info_t *locklist, dict_t *xdata);
+
+int32_t
+default_setactivelk_cbk_resume(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xdata);
+
+int32_t
+default_put_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+int32_t
+default_icreate_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ mode_t mode, dict_t *xdata);
+
+int32_t
+default_namelink_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata);
+
+int32_t
+default_copy_file_range_cbk_resume(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *stbuf,
+ struct iatt *prebuf_dst,
+ struct iatt *postbuf_dst, dict_t *xdata);
+
+/* _CBK */
+int32_t
+default_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent);
+
+int32_t
+default_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata);
+
+int32_t
+default_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
+
+int32_t
+default_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
+
+int32_t
+default_access_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int32_t
+default_readlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, const char *path,
+ struct iatt *buf, dict_t *xdata);
+
+int32_t
+default_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+int32_t
+default_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+int32_t
+default_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+int32_t
+default_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+int32_t
+default_symlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+int32_t
+default_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata);
+
+int32_t
+default_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+int32_t
+default_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+int32_t
+default_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata);
+
+int32_t
+default_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iovec *vector,
+ int32_t count, struct iatt *stbuf, struct iobref *iobref,
+ dict_t *xdata);
+
+int32_t
+default_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
+
+int32_t
+default_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int32_t
+default_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
+
+int32_t
+default_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata);
+
+int32_t
+default_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata);
+
+int32_t
+default_fsyncdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int32_t
+default_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf,
+ dict_t *xdata);
+
+int32_t
+default_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int32_t
+default_fsetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int32_t
+default_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata);
+
+int32_t
+default_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata);
+
+int32_t
+default_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata);
+
+int32_t
+default_fxattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata);
+
+int32_t
+default_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int32_t
+default_fremovexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int32_t
+default_lk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata);
+
+int32_t
+default_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int32_t
+default_finodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int32_t
+default_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int32_t
+default_fentrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int32_t
+default_rchecksum_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, uint32_t weak_checksum,
+ uint8_t *strong_checksum, dict_t *xdata);
+
+int32_t
+default_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata);
+
+int32_t
+default_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata);
+
+int32_t
+default_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata);
+
+int32_t
+default_fsetattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata);
+
+int32_t
+default_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata);
+
+int32_t
+default_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata);
+
+int32_t
+default_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata);
+
+int32_t
+default_ipc_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int32_t
+default_seek_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, off_t offset, dict_t *xdata);
+
+int32_t
+default_getspec_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, char *spec_data);
+
+int32_t
+default_lease_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_lease *lease,
+ dict_t *xdata);
+
+int32_t
+default_getactivelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ lock_migration_info_t *locklist, dict_t *xdata);
+
+int32_t
+default_setactivelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int32_t
+default_put_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+int32_t
+default_icreate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata);
+
+int32_t
+default_namelink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
+
+int32_t
+default_copy_file_range_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *stbuf, struct iatt *prebuf_dst,
+ struct iatt *postbuf_dst, dict_t *xdata);
+
+int32_t
+default_lookup_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_stat_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_truncate_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_ftruncate_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_access_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_readlink_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_mknod_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_mkdir_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_unlink_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_rmdir_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_symlink_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_rename_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_link_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_create_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_open_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_readv_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_writev_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_flush_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_fsync_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_fstat_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_opendir_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_fsyncdir_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_statfs_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_setxattr_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_fsetxattr_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_fgetxattr_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_getxattr_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_xattrop_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_fxattrop_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_removexattr_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_fremovexattr_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_lk_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_inodelk_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_finodelk_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_entrylk_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_fentrylk_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_rchecksum_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_readdir_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_readdirp_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_setattr_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_fsetattr_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_fallocate_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_discard_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_zerofill_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_getspec_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_ipc_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_seek_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_lease_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_getactivelk_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_setactivelk_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_put_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_icreate_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_namelink_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_copy_file_range_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_mem_acct_init(xlator_t *this);
+
+void
+default_fini(xlator_t *this);
+
+#endif /* _DEFAULTS_H */
diff --git a/libglusterfs/src/glusterfs/dict.h b/libglusterfs/src/glusterfs/dict.h
new file mode 100644
index 00000000000..d0467c6dfb6
--- /dev/null
+++ b/libglusterfs/src/glusterfs/dict.h
@@ -0,0 +1,420 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _DICT_H
+#define _DICT_H
+
+#include <inttypes.h>
+#include <sys/uio.h>
+#include <pthread.h>
+
+#include "glusterfs/common-utils.h"
+
+typedef struct _data data_t;
+typedef struct _dict dict_t;
+typedef struct _data_pair data_pair_t;
+
+#define dict_set_sizen(this, key, value) dict_setn(this, key, SLEN(key), value)
+
+#define dict_add_sizen(this, key, value) dict_addn(this, key, SLEN(key), value)
+
+#define dict_get_sizen(this, key) dict_getn(this, key, SLEN(key))
+
+#define dict_del_sizen(this, key) dict_deln(this, key, SLEN(key))
+
+#define dict_set_str_sizen(this, key, str) \
+ dict_set_strn(this, key, SLEN(key), str)
+
+#define dict_set_sizen_str_sizen(this, key, str) \
+ dict_set_nstrn(this, key, SLEN(key), str, SLEN(str))
+
+#define dict_set_dynstr_sizen(this, key, str) \
+ dict_set_dynstrn(this, key, SLEN(key), str)
+
+#define dict_get_str_sizen(this, key, str) \
+ dict_get_strn(this, key, SLEN(key), str)
+
+#define dict_get_int32_sizen(this, key, val) \
+ dict_get_int32n(this, key, SLEN(key), val)
+
+#define dict_set_int32_sizen(this, key, val) \
+ dict_set_int32n(this, key, SLEN(key), val)
+
+#define GF_PROTOCOL_DICT_SERIALIZE(this, from_dict, to, len, ope, labl) \
+ do { \
+ int _ret = 0; \
+ \
+ if (!from_dict) \
+ break; \
+ \
+ _ret = dict_allocate_and_serialize(from_dict, to, &len); \
+ if (_ret < 0) { \
+ gf_msg(this->name, GF_LOG_WARNING, 0, LG_MSG_DICT_SERIAL_FAILED, \
+ "failed to get serialized dict (%s)", (#from_dict)); \
+ ope = EINVAL; \
+ goto labl; \
+ } \
+ } while (0)
+
+#define GF_PROTOCOL_DICT_UNSERIALIZE(xl, to, buff, len, ret, ope, labl) \
+ do { \
+ if (!len) \
+ break; \
+ to = dict_new(); \
+ GF_VALIDATE_OR_GOTO(xl->name, to, labl); \
+ \
+ ret = dict_unserialize(buff, len, &to); \
+ if (ret < 0) { \
+ gf_msg(xl->name, GF_LOG_WARNING, 0, LG_MSG_DICT_UNSERIAL_FAILED, \
+ "failed to unserialize dictionary (%s)", (#to)); \
+ \
+ ope = EINVAL; \
+ goto labl; \
+ } \
+ \
+ } while (0)
+
+#define dict_foreach_inline(d, c) for (c = d->members_list; c; c = c->next)
+
+#define DICT_KEY_VALUE_MAX_SIZE 1048576
+#define DICT_MAX_FLAGS 256
+#define DICT_FLAG_SET 1
+#define DICT_FLAG_CLEAR 0
+#define DICT_HDR_LEN 4
+#define DICT_DATA_HDR_KEY_LEN 4
+#define DICT_DATA_HDR_VAL_LEN 4
+
+struct _data {
+ char *data;
+ gf_atomic_t refcount;
+ gf_dict_data_type_t data_type;
+ uint32_t len;
+ gf_boolean_t is_static;
+};
+
+struct _data_pair {
+ struct _data_pair *hash_next;
+ struct _data_pair *prev;
+ struct _data_pair *next;
+ data_t *value;
+ char *key;
+ uint32_t key_hash;
+};
+
+struct _dict {
+ uint64_t max_count;
+ int32_t hash_size;
+ int32_t count;
+ gf_atomic_t refcount;
+ data_pair_t **members;
+ data_pair_t *members_list;
+ char *extra_stdfree;
+ gf_lock_t lock;
+ data_pair_t *members_internal;
+ data_pair_t free_pair;
+ /* Variable to store total keylen + value->len */
+ uint32_t totkvlen;
+};
+
+typedef gf_boolean_t (*dict_match_t)(dict_t *d, char *k, data_t *v, void *data);
+
+int32_t
+is_data_equal(data_t *one, data_t *two);
+void
+data_destroy(data_t *data);
+
+/* function to set a key/value pair (overwrite existing if matches the key */
+int32_t
+dict_set(dict_t *this, char *key, data_t *value);
+int32_t
+dict_setn(dict_t *this, char *key, const int keylen, data_t *value);
+
+/* function to set a new key/value pair (without checking for duplicate) */
+int32_t
+dict_add(dict_t *this, char *key, data_t *value);
+int32_t
+dict_addn(dict_t *this, char *key, const int keylen, data_t *value);
+int
+dict_get_with_ref(dict_t *this, char *key, data_t **data);
+data_t *
+dict_get(dict_t *this, char *key);
+data_t *
+dict_getn(dict_t *this, char *key, const int keylen);
+void
+dict_del(dict_t *this, char *key);
+void
+dict_deln(dict_t *this, char *key, const int keylen);
+int
+dict_reset(dict_t *dict);
+
+int
+dict_key_count(dict_t *this);
+
+int32_t
+dict_serialized_length(dict_t *dict);
+int32_t
+dict_serialize(dict_t *dict, char *buf);
+int32_t
+dict_unserialize(char *buf, int32_t size, dict_t **fill);
+
+int32_t
+dict_allocate_and_serialize(dict_t *this, char **buf, u_int *length);
+
+void
+dict_unref(dict_t *dict);
+dict_t *
+dict_ref(dict_t *dict);
+data_t *
+data_ref(data_t *data);
+void
+data_unref(data_t *data);
+
+int32_t
+dict_lookup(dict_t *this, char *key, data_t **data);
+/*
+ TODO: provide converts for different byte sizes, signedness, and void *
+ */
+data_t *
+int_to_data(int64_t value);
+data_t *
+str_to_data(char *value);
+data_t *
+strn_to_data(char *value, const int vallen);
+data_t *
+data_from_dynptr(void *value, int32_t len);
+data_t *
+bin_to_data(void *value, int32_t len);
+data_t *
+static_str_to_data(char *value);
+data_t *
+static_bin_to_data(void *value);
+
+int64_t
+data_to_int64(data_t *data);
+int32_t
+data_to_int32(data_t *data);
+int16_t
+data_to_int16(data_t *data);
+int8_t
+data_to_int8(data_t *data);
+
+uint64_t
+data_to_uint64(data_t *data);
+uint32_t
+data_to_uint32(data_t *data);
+uint16_t
+data_to_uint16(data_t *data);
+uint8_t
+data_to_uint8(data_t *data);
+
+data_t *
+data_from_int64(int64_t value);
+data_t *
+data_from_int32(int32_t value);
+data_t *
+data_from_int16(int16_t value);
+data_t *
+data_from_int8(int8_t value);
+
+data_t *
+data_from_uint64(uint64_t value);
+data_t *
+data_from_uint32(uint32_t value);
+data_t *
+data_from_uint16(uint16_t value);
+
+char *
+data_to_str(data_t *data);
+void *
+data_to_bin(data_t *data);
+void *
+data_to_ptr(data_t *data);
+data_t *
+data_copy(data_t *old);
+struct iatt *
+data_to_iatt(data_t *data, char *key);
+
+int
+dict_foreach(dict_t *this,
+ int (*fn)(dict_t *this, char *key, data_t *value, void *data),
+ void *data);
+
+int
+dict_foreach_fnmatch(dict_t *dict, char *pattern,
+ int (*fn)(dict_t *this, char *key, data_t *value,
+ void *data),
+ void *data);
+
+int
+dict_foreach_match(dict_t *dict,
+ gf_boolean_t (*match)(dict_t *this, char *key, data_t *value,
+ void *mdata),
+ void *match_data,
+ int (*action)(dict_t *this, char *key, data_t *value,
+ void *adata),
+ void *action_data);
+
+int
+dict_null_foreach_fn(dict_t *d, char *k, data_t *v, void *tmp);
+int
+dict_remove_foreach_fn(dict_t *d, char *k, data_t *v, void *tmp);
+dict_t *
+dict_copy(dict_t *this, dict_t *new);
+int
+dict_keys_join(void *value, int size, dict_t *dict,
+ int (*filter_fn)(char *key));
+
+/* CLEANED UP FUNCTIONS DECLARATIONS */
+GF_MUST_CHECK dict_t *
+dict_new(void);
+dict_t *
+dict_copy_with_ref(dict_t *this, dict_t *new);
+
+GF_MUST_CHECK int
+dict_reset(dict_t *dict);
+
+GF_MUST_CHECK int
+dict_get_int8(dict_t *this, char *key, int8_t *val);
+GF_MUST_CHECK int
+dict_set_int8(dict_t *this, char *key, int8_t val);
+
+GF_MUST_CHECK int
+dict_get_int16(dict_t *this, char *key, int16_t *val);
+GF_MUST_CHECK int
+dict_set_int16(dict_t *this, char *key, int16_t val);
+
+GF_MUST_CHECK int
+dict_get_int32(dict_t *this, char *key, int32_t *val);
+GF_MUST_CHECK int
+dict_get_int32n(dict_t *this, char *key, const int keylen, int32_t *val);
+GF_MUST_CHECK int
+dict_set_int32(dict_t *this, char *key, int32_t val);
+GF_MUST_CHECK int
+dict_set_int32n(dict_t *this, char *key, const int keylen, int32_t val);
+
+GF_MUST_CHECK int
+dict_get_int64(dict_t *this, char *key, int64_t *val);
+GF_MUST_CHECK int
+dict_set_int64(dict_t *this, char *key, int64_t val);
+
+GF_MUST_CHECK int
+dict_get_uint16(dict_t *this, char *key, uint16_t *val);
+GF_MUST_CHECK int
+dict_set_uint16(dict_t *this, char *key, uint16_t val);
+
+GF_MUST_CHECK int
+dict_get_uint32(dict_t *this, char *key, uint32_t *val);
+GF_MUST_CHECK int
+dict_set_uint32(dict_t *this, char *key, uint32_t val);
+
+GF_MUST_CHECK int
+dict_get_uint64(dict_t *this, char *key, uint64_t *val);
+GF_MUST_CHECK int
+dict_set_uint64(dict_t *this, char *key, uint64_t val);
+
+GF_MUST_CHECK int
+dict_check_flag(dict_t *this, char *key, int flag);
+GF_MUST_CHECK int
+dict_set_flag(dict_t *this, char *key, int flag);
+GF_MUST_CHECK int
+dict_clear_flag(dict_t *this, char *key, int flag);
+
+GF_MUST_CHECK int
+dict_get_double(dict_t *this, char *key, double *val);
+GF_MUST_CHECK int
+dict_set_double(dict_t *this, char *key, double val);
+
+GF_MUST_CHECK int
+dict_set_static_ptr(dict_t *this, char *key, void *ptr);
+GF_MUST_CHECK int
+dict_get_ptr(dict_t *this, char *key, void **ptr);
+GF_MUST_CHECK int
+dict_get_ptr_and_len(dict_t *this, char *key, void **ptr, int *len);
+GF_MUST_CHECK int
+dict_set_dynptr(dict_t *this, char *key, void *ptr, size_t size);
+
+GF_MUST_CHECK int
+dict_get_bin(dict_t *this, char *key, void **ptr);
+GF_MUST_CHECK int
+dict_set_bin(dict_t *this, char *key, void *ptr, size_t size);
+GF_MUST_CHECK int
+dict_set_static_bin(dict_t *this, char *key, void *ptr, size_t size);
+
+GF_MUST_CHECK int
+dict_set_option(dict_t *this, char *key, char *str);
+GF_MUST_CHECK int
+dict_set_str(dict_t *this, char *key, char *str);
+GF_MUST_CHECK int
+dict_set_strn(dict_t *this, char *key, const int keylen, char *str);
+GF_MUST_CHECK int
+dict_set_nstrn(dict_t *this, char *key, const int keylen, char *str,
+ const int vallen);
+GF_MUST_CHECK int
+dict_set_dynstr(dict_t *this, char *key, char *str);
+GF_MUST_CHECK int
+dict_set_dynstrn(dict_t *this, char *key, const int keylen, char *str);
+GF_MUST_CHECK int
+dict_set_dynstr_with_alloc(dict_t *this, char *key, const char *str);
+GF_MUST_CHECK int
+dict_add_dynstr_with_alloc(dict_t *this, char *key, char *str);
+GF_MUST_CHECK int
+dict_get_str(dict_t *this, char *key, char **str);
+GF_MUST_CHECK int
+dict_get_strn(dict_t *this, char *key, const int keylen, char **str);
+
+GF_MUST_CHECK int
+dict_get_str_boolean(dict_t *this, char *key, int default_val);
+GF_MUST_CHECK int
+dict_rename_key(dict_t *this, char *key, char *replace_key);
+GF_MUST_CHECK int
+dict_serialize_value_with_delim(dict_t *this, char *buf, int32_t *serz_len,
+ char delimiter);
+
+GF_MUST_CHECK int
+dict_set_gfuuid(dict_t *this, char *key, uuid_t uuid, bool is_static);
+GF_MUST_CHECK int
+dict_get_gfuuid(dict_t *this, char *key, uuid_t *uuid);
+
+GF_MUST_CHECK int
+dict_set_iatt(dict_t *this, char *key, struct iatt *iatt, bool is_static);
+GF_MUST_CHECK int
+dict_get_iatt(dict_t *this, char *key, struct iatt *iatt);
+GF_MUST_CHECK int
+dict_set_mdata(dict_t *this, char *key, struct mdata_iatt *mdata,
+ bool is_static);
+GF_MUST_CHECK int
+dict_get_mdata(dict_t *this, char *key, struct mdata_iatt *mdata);
+
+void
+dict_dump_to_statedump(dict_t *dict, char *dict_name, char *domain);
+
+void
+dict_dump_to_log(dict_t *dict);
+
+int
+dict_dump_to_str(dict_t *dict, char *dump, int dumpsize, char *format);
+gf_boolean_t
+dict_match_everything(dict_t *d, char *k, data_t *v, void *data);
+
+dict_t *
+dict_for_key_value(const char *name, const char *value, size_t size,
+ gf_boolean_t is_static);
+
+gf_boolean_t
+are_dicts_equal(dict_t *one, dict_t *two,
+ gf_boolean_t (*match)(dict_t *d, char *k, data_t *v,
+ void *data),
+ gf_boolean_t (*value_ignore)(char *k));
+int
+dict_has_key_from_array(dict_t *dict, char **strings, gf_boolean_t *result);
+
+int
+dict_serialized_length_lk(dict_t *this);
+#endif
diff --git a/libglusterfs/src/glusterfs/event-history.h b/libglusterfs/src/glusterfs/event-history.h
new file mode 100644
index 00000000000..f0e0422418e
--- /dev/null
+++ b/libglusterfs/src/glusterfs/event-history.h
@@ -0,0 +1,40 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _EH_H
+#define _EH_H
+
+#include <pthread.h> // for pthread_mutex_t
+#include <stddef.h> // for size_t
+#include "glusterfs/circ-buff.h" // for buffer_t, circular_buffer_t
+#include "glusterfs/glusterfs.h" // for gf_boolean_t
+
+struct event_hist {
+ buffer_t *buffer;
+ pthread_mutex_t lock;
+};
+
+typedef struct event_hist eh_t;
+
+void
+eh_dump(eh_t *event, void *data,
+ int(fn)(circular_buffer_t *buffer, void *data));
+
+eh_t *
+eh_new(size_t buffer_size, gf_boolean_t use_buffer_once,
+ void (*destroy_data)(void *data));
+
+int
+eh_save_history(eh_t *history, void *string);
+
+int
+eh_destroy(eh_t *history);
+
+#endif /* _EH_H */
diff --git a/libglusterfs/src/glusterfs/events.h b/libglusterfs/src/glusterfs/events.h
new file mode 100644
index 00000000000..74c5326427b
--- /dev/null
+++ b/libglusterfs/src/glusterfs/events.h
@@ -0,0 +1,34 @@
+/*
+ Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __EVENTS_H__
+#define __EVENTS_H__
+
+#include "eventtypes.h"
+
+#ifdef USE_EVENTS
+int
+_gf_event(eventtypes_t event, const char *fmt, ...)
+ __attribute__((__format__(__printf__, 2, 3)));
+#else
+__attribute__((__format__(__printf__, 2, 3))) static inline int
+_gf_event(eventtypes_t event, const char *fmt, ...)
+{
+ return 0;
+}
+#endif /* USE_EVENTS */
+
+#define gf_event(event, fmt...) \
+ do { \
+ FMT_WARN(fmt); \
+ _gf_event(event, ##fmt); \
+ } while (0)
+
+#endif /* __EVENTS_H__ */
diff --git a/libglusterfs/src/glusterfs/fd-lk.h b/libglusterfs/src/glusterfs/fd-lk.h
new file mode 100644
index 00000000000..76cc680306a
--- /dev/null
+++ b/libglusterfs/src/glusterfs/fd-lk.h
@@ -0,0 +1,59 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _FD_LK_H
+#define _FD_LK_H
+
+#include "glusterfs/fd.h"
+#include "glusterfs/locking.h"
+#include "glusterfs/list.h"
+#include "glusterfs/glusterfs.h"
+
+#define get_lk_type(type) \
+ type == F_UNLCK ? "F_UNLCK" : (type == F_RDLCK ? "F_RDLCK" : "F_WRLCK")
+
+#define get_lk_cmd(cmd) \
+ cmd == F_SETLKW ? "F_SETLKW" : (cmd == F_SETLK ? "F_SETLK" : "F_GETLK")
+
+struct _fd;
+
+struct fd_lk_ctx {
+ struct list_head lk_list;
+ gf_atomic_t ref;
+ gf_lock_t lock;
+};
+typedef struct fd_lk_ctx fd_lk_ctx_t;
+
+struct fd_lk_ctx_node {
+ int32_t cmd;
+ struct gf_flock user_flock;
+ off_t fl_start;
+ off_t fl_end;
+ short fl_type;
+ struct list_head next;
+};
+typedef struct fd_lk_ctx_node fd_lk_ctx_node_t;
+
+fd_lk_ctx_t *
+fd_lk_ctx_ref(fd_lk_ctx_t *lk_ctx);
+
+fd_lk_ctx_t *
+fd_lk_ctx_create(void);
+
+int
+fd_lk_insert_and_merge(struct _fd *lk_ctx, int32_t cmd, struct gf_flock *flock);
+
+int
+fd_lk_ctx_unref(fd_lk_ctx_t *lk_ctx);
+
+gf_boolean_t
+fd_lk_ctx_empty(fd_lk_ctx_t *lk_ctx);
+
+#endif /* _FD_LK_H */
diff --git a/libglusterfs/src/glusterfs/fd.h b/libglusterfs/src/glusterfs/fd.h
new file mode 100644
index 00000000000..3ffaaa60504
--- /dev/null
+++ b/libglusterfs/src/glusterfs/fd.h
@@ -0,0 +1,169 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _FD_H
+#define _FD_H
+
+#include "glusterfs/list.h"
+#include <sys/types.h>
+#include <unistd.h>
+#include "glusterfs/glusterfs.h"
+#include "glusterfs/locking.h"
+#include "glusterfs/fd-lk.h"
+
+#define GF_ANON_FD_NO -2
+#define GF_ANON_FD_FLAGS (O_RDWR | O_LARGEFILE)
+
+struct _inode;
+struct _dict;
+struct fd_lk_ctx;
+
+struct _fd_ctx {
+ union {
+ uint64_t key;
+ void *xl_key;
+ };
+ union {
+ uint64_t value1;
+ void *ptr1;
+ };
+};
+
+struct _fd {
+ uint64_t pid;
+ int32_t flags;
+ gf_atomic_t refcount;
+ struct list_head inode_list;
+ struct _inode *inode;
+ gf_lock_t lock; /* used ONLY for manipulating
+ 'struct _fd_ctx' array (_ctx).*/
+ struct _fd_ctx *_ctx;
+ int xl_count; /* Number of xl referred in this fd */
+ struct fd_lk_ctx *lk_ctx;
+ gf_boolean_t anonymous; /* fd which does not have counterpart open
+ fd on backend (server for client, posix
+ for server). */
+};
+typedef struct _fd fd_t;
+
+struct fd_table_entry {
+ fd_t *fd;
+ int next_free;
+};
+typedef struct fd_table_entry fdentry_t;
+
+struct _fdtable {
+ int refcount;
+ uint32_t max_fds;
+ pthread_rwlock_t lock;
+ fdentry_t *fdentries;
+ int first_free;
+};
+typedef struct _fdtable fdtable_t;
+
+/* Signifies no more entries in the fd table. */
+#define GF_FDTABLE_END -1
+
+/* This is used to invalidated
+ * the next_free value in an fdentry that has been allocated
+ */
+#define GF_FDENTRY_ALLOCATED -2
+
+#include "glusterfs/logging.h"
+#include "glusterfs/xlator.h"
+
+void
+gf_fd_put(fdtable_t *fdtable, int32_t fd);
+
+fd_t *
+gf_fd_fdptr_get(fdtable_t *fdtable, int64_t fd);
+
+fdtable_t *
+gf_fd_fdtable_alloc(void);
+
+int
+gf_fd_unused_get(fdtable_t *fdtable, fd_t *fdptr);
+
+fdentry_t *
+gf_fd_fdtable_get_all_fds(fdtable_t *fdtable, uint32_t *count);
+
+void
+gf_fd_fdtable_destroy(fdtable_t *fdtable);
+
+fd_t *
+__fd_ref(fd_t *fd);
+
+fd_t *
+fd_ref(fd_t *fd);
+
+void
+fd_unref(fd_t *fd);
+
+void
+fd_close(fd_t *fd);
+
+fd_t *
+fd_create(struct _inode *inode, pid_t pid);
+
+fd_t *
+fd_create_uint64(struct _inode *inode, uint64_t pid);
+
+fd_t *
+fd_lookup(struct _inode *inode, pid_t pid);
+
+fd_t *
+fd_lookup_uint64(struct _inode *inode, uint64_t pid);
+
+fd_t *
+fd_lookup_anonymous(inode_t *inode, int32_t flags);
+
+fd_t *
+fd_anonymous(inode_t *inode);
+
+fd_t *
+fd_anonymous_with_flags(inode_t *inode, int32_t flags);
+
+gf_boolean_t
+fd_is_anonymous(fd_t *fd);
+
+uint8_t
+fd_list_empty(struct _inode *inode);
+
+fd_t *
+fd_bind(fd_t *fd);
+
+int
+fd_ctx_set(fd_t *fd, xlator_t *xlator, uint64_t value);
+
+int
+fd_ctx_get(fd_t *fd, xlator_t *xlator, uint64_t *value);
+
+int
+fd_ctx_del(fd_t *fd, xlator_t *xlator, uint64_t *value);
+
+int
+__fd_ctx_del(fd_t *fd, xlator_t *xlator, uint64_t *value);
+
+int
+__fd_ctx_set(fd_t *fd, xlator_t *xlator, uint64_t value);
+
+int
+__fd_ctx_get(fd_t *fd, xlator_t *xlator, uint64_t *value);
+
+void
+fd_ctx_dump(fd_t *fd, char *prefix);
+
+fdentry_t *
+gf_fd_fdtable_copy_all_fds(fdtable_t *fdtable, uint32_t *count);
+
+void
+gf_fdptr_put(fdtable_t *fdtable, fd_t *fd);
+
+#endif /* _FD_H */
diff --git a/libglusterfs/src/glusterfs/gf-dirent.h b/libglusterfs/src/glusterfs/gf-dirent.h
new file mode 100644
index 00000000000..e358da30f58
--- /dev/null
+++ b/libglusterfs/src/glusterfs/gf-dirent.h
@@ -0,0 +1,71 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GF_DIRENT_H
+#define _GF_DIRENT_H
+
+#include "glusterfs/iatt.h"
+#include "glusterfs/inode.h"
+
+#define gf_dirent_size(name) (sizeof(gf_dirent_t) + strlen(name) + 1)
+
+int
+gf_deitransform(xlator_t *this, uint64_t y);
+
+int
+gf_itransform(xlator_t *this, uint64_t x, uint64_t *y_p, int client_id);
+
+uint64_t
+gf_dirent_orig_offset(xlator_t *this, uint64_t offset);
+
+struct _dir_entry {
+ struct _dir_entry *next;
+ char *name;
+ char *link;
+ struct iatt buf;
+};
+
+struct _gf_dirent {
+ union {
+ struct list_head list;
+ struct {
+ struct _gf_dirent *next;
+ struct _gf_dirent *prev;
+ };
+ };
+ uint64_t d_ino;
+ uint64_t d_off;
+ uint32_t d_len;
+ uint32_t d_type;
+ struct iatt d_stat;
+ dict_t *dict;
+ inode_t *inode;
+ char d_name[];
+};
+
+#define DT_ISDIR(mode) (mode == DT_DIR)
+
+gf_dirent_t *
+gf_dirent_for_name(const char *name);
+gf_dirent_t *
+entry_copy(gf_dirent_t *source);
+void
+gf_dirent_entry_free(gf_dirent_t *entry);
+void
+gf_dirent_free(gf_dirent_t *entries);
+int
+gf_link_inodes_from_dirent(xlator_t *this, inode_t *parent,
+ gf_dirent_t *entries);
+int
+gf_fill_iatt_for_dirent(gf_dirent_t *entry, inode_t *parent, xlator_t *subvol);
+
+void
+gf_link_inode_from_dirent(xlator_t *this, inode_t *parent, gf_dirent_t *entry);
+#endif /* _GF_DIRENT_H */
diff --git a/libglusterfs/src/glusterfs/gf-event.h b/libglusterfs/src/glusterfs/gf-event.h
new file mode 100644
index 00000000000..40f8fbdf10a
--- /dev/null
+++ b/libglusterfs/src/glusterfs/gf-event.h
@@ -0,0 +1,140 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GF_EVENT_H_
+#define _GF_EVENT_H_
+
+#include <pthread.h>
+#include "common-utils.h"
+#include "list.h"
+
+struct event_pool;
+struct event_ops;
+struct event_slot_poll;
+struct event_slot_epoll;
+struct event_data {
+ int idx;
+ int gen;
+} __attribute__((__packed__, __may_alias__));
+
+typedef void (*event_handler_t)(int fd, int idx, int gen, void *data,
+ int poll_in, int poll_out, int poll_err,
+ char event_thread_exit);
+
+#define EVENT_EPOLL_TABLES 1024
+#define EVENT_EPOLL_SLOTS 1024
+#define EVENT_MAX_THREADS 1024
+
+/* See rpcsvc.h to check why. */
+GF_STATIC_ASSERT(EVENT_MAX_THREADS % __BITS_PER_LONG == 0);
+
+struct event_pool {
+ struct event_ops *ops;
+
+ int fd;
+ int breaker[2];
+
+ int count;
+ struct event_slot_poll *reg;
+ struct event_slot_epoll *ereg[EVENT_EPOLL_TABLES];
+ int slots_used[EVENT_EPOLL_TABLES];
+
+ struct list_head poller_death;
+ int poller_death_sliced; /* track whether the list of fds interested
+ * poller_death is sliced. If yes, new thread death
+ * notification has to wait till the list is added
+ * back
+ */
+ int poller_gen;
+ int used;
+ int changed;
+
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+
+ void *evcache;
+ int evcache_size;
+
+ /* NOTE: Currently used only when event processing is done using
+ * epoll. */
+ int eventthreadcount; /* number of event threads to execute. */
+ pthread_t pollers[EVENT_MAX_THREADS]; /* poller thread_id store, and live
+ status */
+ int destroy;
+ int activethreadcount;
+
+ /*
+ * Number of threads created by auto-scaling, *in addition to* the
+ * configured number of threads. This is only applicable on the
+ * server, where we try to keep the number of threads around the number
+ * of bricks. In that case, the configured number is just "extra"
+ * threads to handle requests in excess of one per brick (including
+ * requests on the GlusterD connection). For clients or GlusterD, this
+ * number will always be zero, so the "extra" is all we have.
+ *
+ * TBD: consider auto-scaling for clients as well
+ */
+ int auto_thread_count;
+};
+
+struct event_destroy_data {
+ int readfd;
+ struct event_pool *pool;
+};
+
+struct event_ops {
+ struct event_pool *(*new)(int count, int eventthreadcount);
+
+ int (*event_register)(struct event_pool *event_pool, int fd,
+ event_handler_t handler, void *data, int poll_in,
+ int poll_out, char notify_poller_death);
+
+ int (*event_select_on)(struct event_pool *event_pool, int fd, int idx,
+ int poll_in, int poll_out);
+
+ int (*event_unregister)(struct event_pool *event_pool, int fd, int idx);
+
+ int (*event_unregister_close)(struct event_pool *event_pool, int fd,
+ int idx);
+
+ int (*event_dispatch)(struct event_pool *event_pool);
+
+ int (*event_reconfigure_threads)(struct event_pool *event_pool,
+ int newcount);
+ int (*event_pool_destroy)(struct event_pool *event_pool);
+ int (*event_handled)(struct event_pool *event_pool, int fd, int idx,
+ int gen);
+};
+
+struct event_pool *
+gf_event_pool_new(int count, int eventthreadcount);
+int
+gf_event_select_on(struct event_pool *event_pool, int fd, int idx, int poll_in,
+ int poll_out);
+int
+gf_event_register(struct event_pool *event_pool, int fd,
+ event_handler_t handler, void *data, int poll_in,
+ int poll_out, char notify_poller_death);
+int
+gf_event_unregister(struct event_pool *event_pool, int fd, int idx);
+int
+gf_event_unregister_close(struct event_pool *event_pool, int fd, int idx);
+int
+gf_event_dispatch(struct event_pool *event_pool);
+int
+gf_event_reconfigure_threads(struct event_pool *event_pool, int value);
+int
+gf_event_pool_destroy(struct event_pool *event_pool);
+int
+gf_event_dispatch_destroy(struct event_pool *event_pool);
+int
+gf_event_handled(struct event_pool *event_pool, int fd, int idx, int gen);
+
+#endif /* _GF_EVENT_H_ */
diff --git a/libglusterfs/src/gidcache.h b/libglusterfs/src/glusterfs/gidcache.h
index f904f26eb97..ddaabd765b5 100644
--- a/libglusterfs/src/gidcache.h
+++ b/libglusterfs/src/glusterfs/gidcache.h
@@ -11,8 +11,8 @@
#ifndef __GIDCACHE_H__
#define __GIDCACHE_H__
-#include "glusterfs.h"
-#include "locking.h"
+#include "glusterfs/glusterfs.h"
+#include "glusterfs/locking.h"
/*
* TBD: make the cache size tunable
@@ -26,27 +26,35 @@
* to scan more entries with every lookup/update.
*/
-#define AUX_GID_CACHE_ASSOC 4
-#define AUX_GID_CACHE_BUCKETS 256
-#define AUX_GID_CACHE_SIZE (AUX_GID_CACHE_ASSOC * AUX_GID_CACHE_BUCKETS)
+#define AUX_GID_CACHE_ASSOC 4
+#define AUX_GID_CACHE_BUCKETS 256
+#define AUX_GID_CACHE_SIZE (AUX_GID_CACHE_ASSOC * AUX_GID_CACHE_BUCKETS)
typedef struct {
- uint64_t gl_id;
- int gl_count;
- gid_t *gl_list;
- time_t gl_deadline;
+ uint64_t gl_id;
+ uint64_t gl_uid;
+ uint64_t gl_gid;
+ int gl_count;
+ gid_t *gl_list;
+ time_t gl_deadline;
} gid_list_t;
typedef struct {
- gf_lock_t gc_lock;
- uint32_t gc_max_age;
- unsigned int gc_nbuckets;
- gid_list_t gc_cache[AUX_GID_CACHE_SIZE];
+ gf_lock_t gc_lock;
+ uint32_t gc_max_age;
+ unsigned int gc_nbuckets;
+ gid_list_t gc_cache[AUX_GID_CACHE_SIZE];
} gid_cache_t;
-int gid_cache_init(gid_cache_t *, uint32_t);
-const gid_list_t *gid_cache_lookup(gid_cache_t *, uint64_t);
-void gid_cache_release(gid_cache_t *, const gid_list_t *);
-int gid_cache_add(gid_cache_t *, gid_list_t *);
+int
+gid_cache_init(gid_cache_t *, uint32_t);
+int
+gid_cache_reconf(gid_cache_t *, uint32_t);
+const gid_list_t *
+gid_cache_lookup(gid_cache_t *, uint64_t, uint64_t, uint64_t);
+void
+gid_cache_release(gid_cache_t *, const gid_list_t *);
+int
+gid_cache_add(gid_cache_t *, gid_list_t *);
#endif /* __GIDCACHE_H__ */
diff --git a/libglusterfs/src/glusterfs/glfs-message-id.h b/libglusterfs/src/glusterfs/glfs-message-id.h
new file mode 100644
index 00000000000..a1a16ca1efb
--- /dev/null
+++ b/libglusterfs/src/glusterfs/glfs-message-id.h
@@ -0,0 +1,102 @@
+/*
+ Copyright (c) 2015-2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLFS_MESSAGE_ID_H_
+#define _GLFS_MESSAGE_ID_H_
+
+/* Base of all message IDs, all message IDs would be
+ * greater than this */
+#define GLFS_MSGID_BASE 100000
+
+/* Segment size of allocated range. Any component needing more than this
+ * segment size should take multiple segments (at times non contiguous,
+ * if extensions are being made post the next segment already allocated) */
+#define GLFS_MSGID_SEGMENT 1000
+
+/* Macro to define a range of messages for a component. The first argument is
+ * the name of the component. The second argument is the number of segments
+ * to allocate. The defined values will be GLFS_MSGID_COMP_<name> and
+ * GLFS_MSGID_COMP_<name>_END. */
+#define GLFS_MSGID_COMP(_name, _blocks) \
+ GLFS_MSGID_COMP_##_name, \
+ GLFS_MSGID_COMP_##_name##_END = (GLFS_MSGID_COMP_##_name + \
+ (GLFS_MSGID_SEGMENT * (_blocks)) - 1)
+
+#define GLFS_MSGID(_name, _msgs...) \
+ enum _msgid_table_##_name \
+ { \
+ GLFS_##_name##_COMP_BASE = GLFS_MSGID_COMP_##_name, ##_msgs, \
+ GLGS_##_name##_COMP_END \
+ }
+
+/* Per module message segments allocated */
+/* NOTE: For any new module add to the end the modules */
+enum _msgid_comp {
+ GLFS_MSGID_RESERVED = GLFS_MSGID_BASE - 1,
+
+ GLFS_MSGID_COMP(GLUSTERFSD, 1),
+ GLFS_MSGID_COMP(LIBGLUSTERFS, 1),
+ GLFS_MSGID_COMP(RPC_LIB, 1),
+ GLFS_MSGID_COMP(RPC_TRANS_RDMA, 1),
+ GLFS_MSGID_COMP(API, 1),
+ GLFS_MSGID_COMP(CLI, 1),
+ /* glusterd has a lot of messages, taking 2 segments for the same */
+ GLFS_MSGID_COMP(GLUSTERD, 2),
+ GLFS_MSGID_COMP(AFR, 1),
+ GLFS_MSGID_COMP(DHT, 1),
+ /* there is no component called 'common', however reserving this segment
+ * for common actions/errors like dict_{get/set}, memory accounting*/
+ GLFS_MSGID_COMP(COMMON, 1),
+ GLFS_MSGID_COMP(UPCALL, 1),
+ GLFS_MSGID_COMP(NFS, 1),
+ GLFS_MSGID_COMP(POSIX, 1),
+ GLFS_MSGID_COMP(PC, 1),
+ GLFS_MSGID_COMP(PS, 1),
+ GLFS_MSGID_COMP(BITROT_STUB, 1),
+ GLFS_MSGID_COMP(CHANGELOG, 1),
+ GLFS_MSGID_COMP(BITROT_BITD, 1),
+ GLFS_MSGID_COMP(RPC_TRANS_SOCKET, 1),
+ GLFS_MSGID_COMP(QUOTA, 1),
+ GLFS_MSGID_COMP(CTR, 1),
+ GLFS_MSGID_COMP(EC, 1),
+ GLFS_MSGID_COMP(IO_CACHE, 1),
+ GLFS_MSGID_COMP(IO_THREADS, 1),
+ GLFS_MSGID_COMP(MD_CACHE, 1),
+ GLFS_MSGID_COMP(OPEN_BEHIND, 1),
+ GLFS_MSGID_COMP(QUICK_READ, 1),
+ GLFS_MSGID_COMP(READ_AHEAD, 1),
+ GLFS_MSGID_COMP(READDIR_AHEAD, 1),
+ GLFS_MSGID_COMP(SYMLINK_CACHE, 1),
+ GLFS_MSGID_COMP(WRITE_BEHIND, 1),
+ GLFS_MSGID_COMP(CHANGELOG_LIB, 1),
+ GLFS_MSGID_COMP(SHARD, 1),
+ GLFS_MSGID_COMP(JBR, 1),
+ GLFS_MSGID_COMP(PL, 1),
+ GLFS_MSGID_COMP(DC, 1),
+ GLFS_MSGID_COMP(LEASES, 1),
+ GLFS_MSGID_COMP(INDEX, 1),
+ GLFS_MSGID_COMP(POSIX_ACL, 1),
+ GLFS_MSGID_COMP(NLC, 1),
+ GLFS_MSGID_COMP(SL, 1),
+ GLFS_MSGID_COMP(HAM, 1),
+ GLFS_MSGID_COMP(SDFS, 1),
+ GLFS_MSGID_COMP(QUIESCE, 1),
+ GLFS_MSGID_COMP(TA, 1),
+ GLFS_MSGID_COMP(SNAPVIEW_CLIENT, 1),
+ GLFS_MSGID_COMP(TEMPLATE, 1),
+ GLFS_MSGID_COMP(UTIME, 1),
+ GLFS_MSGID_COMP(SNAPVIEW_SERVER, 1),
+ GLFS_MSGID_COMP(CVLT, 1),
+ /* --- new segments for messages goes above this line --- */
+
+ GLFS_MSGID_END
+};
+
+#endif /* !_GLFS_MESSAGE_ID_H_ */
diff --git a/libglusterfs/src/glusterfs/globals.h b/libglusterfs/src/glusterfs/globals.h
new file mode 100644
index 00000000000..b22eaae6c2f
--- /dev/null
+++ b/libglusterfs/src/glusterfs/globals.h
@@ -0,0 +1,188 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLOBALS_H
+#define _GLOBALS_H
+
+#define GF_DEFAULT_BASE_PORT 24007
+#define GF_DEFAULT_VOLFILE_TRANSPORT "tcp"
+
+#define GF_GLOBAL_XLATOR_NAME "global"
+#define GD_OP_VERSION_KEY "operating-version"
+#define GD_MIN_OP_VERSION_KEY "minimum-operating-version"
+#define GD_MAX_OP_VERSION_KEY "maximum-operating-version"
+
+#define GF_PROTECT_FROM_EXTERNAL_WRITES "trusted.glusterfs.protect.writes"
+#define GF_AVOID_OVERWRITE "glusterfs.avoid.overwrite"
+#define GF_CLEAN_WRITE_PROTECTION "glusterfs.clean.writexattr"
+
+/* Gluster versions - OP-VERSION mapping
+ *
+ * 3.3.x - 1
+ * 3.4.x - 2
+ * 3.5.0 - 3
+ * 3.5.1 - 30501
+ * 3.6.0 - 30600
+ * 3.7.0 - 30700
+ * 3.7.1 - 30701
+ * 3.7.2 - 30702
+ *
+ * Starting with Gluster v3.6, the op-version will be multi-digit integer values
+ * based on the Glusterfs version, instead of a simply incrementing integer
+ * value. The op-version for a given X.Y.Z release will be an integer XYZ, with
+ * Y and Z 2 digit always 2 digits wide and padded with 0 when needed. This
+ * should allow for some gaps between two Y releases for backports of features
+ * in Z releases.
+ */
+#define GD_OP_VERSION_MIN \
+ 1 /* MIN is the fresh start op-version, mostly \
+ should not change */
+#define GD_OP_VERSION_MAX \
+ GD_OP_VERSION_9_0 /* MAX VERSION is the maximum \
+ count in VME table, should \
+ keep changing with \
+ introduction of newer \
+ versions */
+
+#define GD_OP_VERSION_3_6_0 30600 /* Op-Version for GlusterFS 3.6.0 */
+
+#define GD_OP_VERSION_3_7_0 30700 /* Op-version for GlusterFS 3.7.0 */
+
+#define GD_OP_VERSION_3_7_1 30701 /* Op-version for GlusterFS 3.7.1 */
+
+#define GD_OP_VERSION_3_7_2 30702 /* Op-version for GlusterFS 3.7.2 */
+
+#define GD_OP_VERSION_3_7_3 30703 /* Op-version for GlusterFS 3.7.3 */
+
+#define GD_OP_VERSION_3_7_4 30704 /* Op-version for GlusterFS 3.7.4 */
+
+#define GD_OP_VERSION_3_7_5 30705 /* Op-version for GlusterFS 3.7.5 */
+
+#define GD_OP_VERSION_3_7_6 30706 /* Op-version for GlusterFS 3.7.6 */
+
+#define GD_OP_VERSION_3_7_7 30707 /* Op-version for GlusterFS 3.7.7 */
+
+#define GD_OP_VERSION_3_7_10 30710 /* Op-version for GlusterFS 3.7.10 */
+
+#define GD_OP_VERSION_3_7_12 30712 /* Op-version for GlusterFS 3.7.12 */
+
+#define GD_OP_VERSION_3_8_0 30800 /* Op-version for GlusterFS 3.8.0 */
+
+#define GD_OP_VERSION_3_8_3 30803 /* Op-version for GlusterFS 3.8.3 */
+
+#define GD_OP_VERSION_3_8_4 30804 /* Op-version for GlusterFS 3.8.4 */
+
+#define GD_OP_VERSION_3_9_0 30900 /* Op-version for GlusterFS 3.9.0 */
+
+#define GD_OP_VERSION_3_9_1 30901 /* Op-version for GlusterFS 3.9.1 */
+
+#define GD_OP_VERSION_3_10_0 31000 /* Op-version for GlusterFS 3.10.0 */
+
+#define GD_OP_VERSION_3_10_1 31001 /* Op-version for GlusterFS 3.10.1 */
+
+#define GD_OP_VERSION_3_10_2 31002 /* Op-version for GlusterFS 3.10.2 */
+
+#define GD_OP_VERSION_3_11_0 31100 /* Op-version for GlusterFS 3.11.0 */
+
+#define GD_OP_VERSION_3_11_1 31101 /* Op-version for GlusterFS 3.11.1 */
+
+#define GD_OP_VERSION_3_12_0 31200 /* Op-version for GlusterFS 3.12.0 */
+
+#define GD_OP_VERSION_3_12_2 31202 /* Op-version for GlusterFS 3.12.2 */
+
+#define GD_OP_VERSION_3_12_3 31203 /* Op-version for GlusterFS 3.12.3 */
+
+#define GD_OP_VERSION_3_13_0 31300 /* Op-version for GlusterFS 3.13.0 */
+
+#define GD_OP_VERSION_3_13_1 31301 /* Op-version for GlusterFS 3.13.1 */
+
+#define GD_OP_VERSION_3_13_2 31302 /* Op-version for GlusterFS 3.13.2 */
+
+#define GD_OP_VERSION_4_0_0 40000 /* Op-version for GlusterFS 4.0.0 */
+
+#define GD_OP_VERSION_4_1_0 40100 /* Op-version for GlusterFS 4.1.0 */
+
+#define GD_OP_VERSION_5_0 50000 /* Op-version for GlusterFS 5.0 */
+
+#define GD_OP_VERSION_5_4 50400 /* Op-version for GlusterFS 5.4 */
+
+#define GD_OP_VERSION_6_0 60000 /* Op-version for GlusterFS 6.0 */
+
+#define GD_OP_VERSION_7_0 70000 /* Op-version for GlusterFS 7.0 */
+#define GD_OP_VERSION_7_1 70100 /* Op-version for GlusterFS 7.1 */
+#define GD_OP_VERSION_7_2 70200 /* Op-version for GlusterFS 7.2 */
+#define GD_OP_VERSION_7_3 70300 /* Op-version for GlusterFS 7.3 */
+
+#define GD_OP_VERSION_8_0 80000 /* Op-version for GlusterFS 8.0 */
+
+#define GD_OP_VERSION_9_0 90000 /* Op-version for GlusterFS 9.0 */
+
+#define GD_OP_VER_PERSISTENT_AFR_XATTRS GD_OP_VERSION_3_6_0
+
+#include "glusterfs/xlator.h"
+#include "glusterfs/options.h"
+
+/* THIS */
+#define THIS (*__glusterfs_this_location())
+#define DECLARE_OLD_THIS xlator_t *old_THIS = THIS
+
+xlator_t **
+__glusterfs_this_location(void);
+xlator_t *
+glusterfs_this_get(void);
+void
+glusterfs_this_set(xlator_t *);
+
+extern xlator_t global_xlator;
+extern struct volume_options global_xl_options[];
+
+/* syncopctx */
+void *
+syncopctx_getctx(void);
+
+/* task */
+void *
+synctask_get(void);
+void
+synctask_set(void *);
+
+/* uuid_buf */
+char *
+glusterfs_uuid_buf_get(void);
+/* lkowner_buf */
+char *
+glusterfs_lkowner_buf_get(void);
+/* leaseid buf */
+char *
+glusterfs_leaseid_buf_get(void);
+char *
+glusterfs_leaseid_exist(void);
+
+/* init */
+int
+glusterfs_globals_init(glusterfs_ctx_t *ctx);
+
+void
+gf_thread_needs_cleanup(void);
+
+struct tvec_base *
+glusterfs_ctx_tw_get(glusterfs_ctx_t *ctx);
+void
+glusterfs_ctx_tw_put(glusterfs_ctx_t *ctx);
+
+extern const char *gf_fop_list[];
+extern const char *gf_upcall_list[];
+
+/* mem acct enable/disable */
+int
+gf_global_mem_acct_enable_get(void);
+int
+gf_global_mem_acct_enable_set(int val);
+#endif /* !_GLOBALS_H */
diff --git a/libglusterfs/src/glusterfs/glusterfs-acl.h b/libglusterfs/src/glusterfs/glusterfs-acl.h
new file mode 100644
index 00000000000..987bf5fab0b
--- /dev/null
+++ b/libglusterfs/src/glusterfs/glusterfs-acl.h
@@ -0,0 +1,162 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLUSTERFS_ACL_H
+#define _GLUSTERFS_ACL_H
+
+/* WARNING: Much if this code is restricted to Linux usage.
+ *
+ * It would be much cleaner to replace the code with something that is based on
+ * libacl (or its libc implementation on *BSD).
+ *
+ * Initial work for replacing this Linux specific implementation has been
+ * started as part of the "Improve POSIX ACLs" feature. Functionality for this
+ * feature has been added to the end of this file.
+ */
+
+#include <stdint.h>
+#include <sys/types.h> /* For uid_t */
+
+#include "glusterfs/locking.h" /* For gf_lock_t in struct posix_acl_conf */
+
+#define ACL_PROGRAM 100227
+#define ACLV3_VERSION 3
+
+#define POSIX_ACL_MINIMAL_ACE_COUNT 3
+
+#define POSIX_ACL_READ (0x04)
+#define POSIX_ACL_WRITE (0x02)
+#define POSIX_ACL_EXECUTE (0x01)
+
+#define POSIX_ACL_UNDEFINED_TAG (0x00)
+#define POSIX_ACL_USER_OBJ (0x01)
+#define POSIX_ACL_USER (0x02)
+#define POSIX_ACL_GROUP_OBJ (0x04)
+#define POSIX_ACL_GROUP (0x08)
+#define POSIX_ACL_MASK (0x10)
+#define POSIX_ACL_OTHER (0x20)
+
+#define POSIX_ACL_UNDEFINED_ID (-1)
+
+#define POSIX_ACL_XATTR_VERSION (0x02)
+
+#define POSIX_ACL_ACCESS_XATTR "system.posix_acl_access"
+#define POSIX_ACL_DEFAULT_XATTR "system.posix_acl_default"
+
+struct posix_acl_xattr_entry {
+ uint16_t tag;
+ uint16_t perm;
+ uint32_t id;
+};
+
+struct posix_acl_xattr_header {
+ uint32_t version;
+ struct posix_acl_xattr_entry entries[];
+};
+
+typedef struct posix_acl_xattr_entry posix_acl_xattr_entry;
+typedef struct posix_acl_xattr_header posix_acl_xattr_header;
+
+static inline size_t
+posix_acl_xattr_size(unsigned int count)
+{
+ return (sizeof(posix_acl_xattr_header) +
+ (count * sizeof(posix_acl_xattr_entry)));
+}
+
+static inline ssize_t
+posix_acl_xattr_count(size_t size)
+{
+ if (size < sizeof(posix_acl_xattr_header))
+ return (-1);
+ size -= sizeof(posix_acl_xattr_header);
+ if (size % sizeof(posix_acl_xattr_entry))
+ return (-1);
+ return (size / sizeof(posix_acl_xattr_entry));
+}
+
+struct posix_ace {
+ uint16_t tag;
+ uint16_t perm;
+ uint32_t id;
+};
+
+struct posix_acl {
+ int refcnt;
+ int count;
+ struct posix_ace entries[];
+};
+
+struct posix_acl_ctx {
+ uid_t uid;
+ gid_t gid;
+ mode_t perm;
+ glusterfs_fop_t fop;
+ struct posix_acl *acl_access;
+ struct posix_acl *acl_default;
+};
+
+struct posix_acl_conf {
+ gf_lock_t acl_lock;
+ uid_t super_uid;
+ struct posix_acl *minimal_acl;
+};
+
+/* Above this comment, the legacy POSIX ACL support is kept until it is not
+ * used anymore. Below you will find the more portable version to support POSIX
+ * ACls based on the implementation of libacl (see sys/acl.h). */
+
+/* virtual xattrs passed over RPC, not stored on disk */
+#define GF_POSIX_ACL_ACCESS "glusterfs.posix.acl"
+#define GF_POSIX_ACL_DEFAULT "glusterfs.posix.default_acl"
+#define GF_POSIX_ACL_REQUEST(key) \
+ (!strncmp(key, GF_POSIX_ACL_ACCESS, SLEN(GF_POSIX_ACL_ACCESS)) || \
+ !strncmp(key, GF_POSIX_ACL_DEFAULT, SLEN(GF_POSIX_ACL_DEFAULT)))
+
+#ifdef HAVE_SYS_ACL_H /* only NetBSD does not support POSIX ACLs */
+
+#include <sys/acl.h>
+
+static inline const char *
+gf_posix_acl_get_key(const acl_type_t type)
+{
+ char *acl_key = NULL;
+
+ switch (type) {
+ case ACL_TYPE_ACCESS:
+ acl_key = GF_POSIX_ACL_ACCESS;
+ break;
+ case ACL_TYPE_DEFAULT:
+ acl_key = GF_POSIX_ACL_DEFAULT;
+ break;
+ default:
+ errno = EINVAL;
+ }
+
+ return acl_key;
+}
+
+static inline acl_type_t
+gf_posix_acl_get_type(const char *key)
+{
+ acl_type_t type = 0;
+
+ if (!strncmp(key, GF_POSIX_ACL_ACCESS, SLEN(GF_POSIX_ACL_ACCESS)))
+ type = ACL_TYPE_ACCESS;
+ else if (!strncmp(key, GF_POSIX_ACL_DEFAULT, SLEN(GF_POSIX_ACL_DEFAULT)))
+ type = ACL_TYPE_DEFAULT;
+ else
+ errno = EINVAL;
+
+ return type;
+}
+
+#endif /* HAVE_SYS_ACL_H */
+#endif /* _GLUSTERFS_ACL_H */
diff --git a/libglusterfs/src/glusterfs/glusterfs-fops.h b/libglusterfs/src/glusterfs/glusterfs-fops.h
new file mode 100644
index 00000000000..030b2701608
--- /dev/null
+++ b/libglusterfs/src/glusterfs/glusterfs-fops.h
@@ -0,0 +1,241 @@
+/*
+ Copyright (c) 2008-2019 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLUSTERFS_FOPS_H_
+#define _GLUSTERFS_FOPS_H_
+
+#include <glusterfs/compat.h>
+
+enum glusterfs_fop_t {
+ GF_FOP_NULL = 0,
+ GF_FOP_STAT = 0 + 1,
+ GF_FOP_READLINK = 0 + 2,
+ GF_FOP_MKNOD = 0 + 3,
+ GF_FOP_MKDIR = 0 + 4,
+ GF_FOP_UNLINK = 0 + 5,
+ GF_FOP_RMDIR = 0 + 6,
+ GF_FOP_SYMLINK = 0 + 7,
+ GF_FOP_RENAME = 0 + 8,
+ GF_FOP_LINK = 0 + 9,
+ GF_FOP_TRUNCATE = 0 + 10,
+ GF_FOP_OPEN = 0 + 11,
+ GF_FOP_READ = 0 + 12,
+ GF_FOP_WRITE = 0 + 13,
+ GF_FOP_STATFS = 0 + 14,
+ GF_FOP_FLUSH = 0 + 15,
+ GF_FOP_FSYNC = 0 + 16,
+ GF_FOP_SETXATTR = 0 + 17,
+ GF_FOP_GETXATTR = 0 + 18,
+ GF_FOP_REMOVEXATTR = 0 + 19,
+ GF_FOP_OPENDIR = 0 + 20,
+ GF_FOP_FSYNCDIR = 0 + 21,
+ GF_FOP_ACCESS = 0 + 22,
+ GF_FOP_CREATE = 0 + 23,
+ GF_FOP_FTRUNCATE = 0 + 24,
+ GF_FOP_FSTAT = 0 + 25,
+ GF_FOP_LK = 0 + 26,
+ GF_FOP_LOOKUP = 0 + 27,
+ GF_FOP_READDIR = 0 + 28,
+ GF_FOP_INODELK = 0 + 29,
+ GF_FOP_FINODELK = 0 + 30,
+ GF_FOP_ENTRYLK = 0 + 31,
+ GF_FOP_FENTRYLK = 0 + 32,
+ GF_FOP_XATTROP = 0 + 33,
+ GF_FOP_FXATTROP = 0 + 34,
+ GF_FOP_FGETXATTR = 0 + 35,
+ GF_FOP_FSETXATTR = 0 + 36,
+ GF_FOP_RCHECKSUM = 0 + 37,
+ GF_FOP_SETATTR = 0 + 38,
+ GF_FOP_FSETATTR = 0 + 39,
+ GF_FOP_READDIRP = 0 + 40,
+ GF_FOP_FORGET = 0 + 41,
+ GF_FOP_RELEASE = 0 + 42,
+ GF_FOP_RELEASEDIR = 0 + 43,
+ GF_FOP_GETSPEC = 0 + 44,
+ GF_FOP_FREMOVEXATTR = 0 + 45,
+ GF_FOP_FALLOCATE = 0 + 46,
+ GF_FOP_DISCARD = 0 + 47,
+ GF_FOP_ZEROFILL = 0 + 48,
+ GF_FOP_IPC = 0 + 49,
+ GF_FOP_SEEK = 0 + 50,
+ GF_FOP_LEASE = 0 + 51,
+ GF_FOP_COMPOUND = 0 + 52,
+ GF_FOP_GETACTIVELK = 0 + 53,
+ GF_FOP_SETACTIVELK = 0 + 54,
+ GF_FOP_PUT = 0 + 55,
+ GF_FOP_ICREATE = 0 + 56,
+ GF_FOP_NAMELINK = 0 + 57,
+ GF_FOP_COPY_FILE_RANGE = 0 + 58,
+ GF_FOP_MAXVALUE = 0 + 59,
+};
+typedef enum glusterfs_fop_t glusterfs_fop_t;
+
+enum glusterfs_event_t {
+ GF_EVENT_PARENT_UP = 1,
+ GF_EVENT_POLLIN = 1 + 1,
+ GF_EVENT_POLLOUT = 1 + 2,
+ GF_EVENT_POLLERR = 1 + 3,
+ GF_EVENT_CHILD_UP = 1 + 4,
+ GF_EVENT_CHILD_DOWN = 1 + 5,
+ GF_EVENT_CHILD_CONNECTING = 1 + 6,
+ GF_EVENT_CLEANUP = 9,
+ GF_EVENT_TRANSPORT_CONNECTED = 9 + 1,
+ GF_EVENT_VOLFILE_MODIFIED = 9 + 2,
+ GF_EVENT_GRAPH_NEW = 9 + 3,
+ GF_EVENT_TRANSLATOR_INFO = 9 + 4,
+ GF_EVENT_TRANSLATOR_OP = 9 + 5,
+ GF_EVENT_AUTH_FAILED = 9 + 6,
+ GF_EVENT_VOLUME_DEFRAG = 9 + 7,
+ GF_EVENT_PARENT_DOWN = 9 + 8,
+ GF_EVENT_VOLUME_BARRIER_OP = 9 + 9,
+ GF_EVENT_UPCALL = 9 + 10,
+ GF_EVENT_SCRUB_STATUS = 9 + 11,
+ GF_EVENT_SOME_DESCENDENT_DOWN = 9 + 12,
+ GF_EVENT_SCRUB_ONDEMAND = 9 + 13,
+ GF_EVENT_SOME_DESCENDENT_UP = 9 + 14,
+ GF_EVENT_CHILD_PING = 9 + 15,
+ GF_EVENT_MAXVAL = 9 + 16,
+};
+typedef enum glusterfs_event_t glusterfs_event_t;
+
+enum gf_op_type_t {
+ GF_OP_TYPE_NULL = 0,
+ GF_OP_TYPE_FOP = 0 + 1,
+ GF_OP_TYPE_MGMT = 0 + 2,
+ GF_OP_TYPE_MAX = 0 + 3,
+};
+typedef enum gf_op_type_t gf_op_type_t;
+
+enum glusterfs_lk_cmds_t {
+ GF_LK_GETLK = 0,
+ GF_LK_SETLK = 0 + 1,
+ GF_LK_SETLKW = 0 + 2,
+ GF_LK_RESLK_LCK = 0 + 3,
+ GF_LK_RESLK_LCKW = 0 + 4,
+ GF_LK_RESLK_UNLCK = 0 + 5,
+ GF_LK_GETLK_FD = 0 + 6,
+};
+typedef enum glusterfs_lk_cmds_t glusterfs_lk_cmds_t;
+
+enum glusterfs_lk_types_t {
+ GF_LK_F_RDLCK = 0,
+ GF_LK_F_WRLCK = 0 + 1,
+ GF_LK_F_UNLCK = 0 + 2,
+ GF_LK_EOL = 0 + 3,
+};
+typedef enum glusterfs_lk_types_t glusterfs_lk_types_t;
+
+enum gf_lease_types_t {
+ NONE = 0,
+ GF_RD_LEASE = 1,
+ GF_RW_LEASE = 2,
+ GF_LEASE_MAX_TYPE = 2 + 1,
+};
+typedef enum gf_lease_types_t gf_lease_types_t;
+
+enum gf_lease_cmds_t {
+ GF_GET_LEASE = 1,
+ GF_SET_LEASE = 2,
+ GF_UNLK_LEASE = 3,
+};
+typedef enum gf_lease_cmds_t gf_lease_cmds_t;
+
+#define LEASE_ID_SIZE 16 /* 128bits */
+
+struct gf_lease {
+ gf_lease_cmds_t cmd;
+ gf_lease_types_t lease_type;
+ char lease_id[LEASE_ID_SIZE];
+ u_int lease_flags;
+};
+typedef struct gf_lease gf_lease;
+
+enum glusterfs_lk_recovery_cmds_t {
+ F_RESLK_LCK = 200,
+ F_RESLK_LCKW = 200 + 1,
+ F_RESLK_UNLCK = 200 + 2,
+ F_GETLK_FD = 200 + 3,
+};
+typedef enum glusterfs_lk_recovery_cmds_t glusterfs_lk_recovery_cmds_t;
+
+enum gf_lk_domain_t {
+ GF_LOCK_POSIX = 0,
+ GF_LOCK_INTERNAL = 1,
+};
+typedef enum gf_lk_domain_t gf_lk_domain_t;
+
+enum entrylk_cmd {
+ ENTRYLK_LOCK = 0,
+ ENTRYLK_UNLOCK = 1,
+ ENTRYLK_LOCK_NB = 2,
+};
+typedef enum entrylk_cmd entrylk_cmd;
+
+enum entrylk_type {
+ ENTRYLK_RDLCK = 0,
+ ENTRYLK_WRLCK = 1,
+};
+typedef enum entrylk_type entrylk_type;
+#define GF_MAX_LOCK_OWNER_LEN 1024 /* 1kB as per NLM */
+#define GF_LKOWNER_BUF_SIZE \
+ ((GF_MAX_LOCK_OWNER_LEN * 2) + (GF_MAX_LOCK_OWNER_LEN / 8))
+
+struct gf_lkowner_t {
+ int len;
+ char data[GF_MAX_LOCK_OWNER_LEN];
+};
+typedef struct gf_lkowner_t gf_lkowner_t;
+
+enum gf_xattrop_flags_t {
+ GF_XATTROP_ADD_ARRAY = 0,
+ GF_XATTROP_ADD_ARRAY64 = 1,
+ GF_XATTROP_OR_ARRAY = 2,
+ GF_XATTROP_AND_ARRAY = 3,
+ GF_XATTROP_GET_AND_SET = 4,
+ GF_XATTROP_ADD_ARRAY_WITH_DEFAULT = 5,
+ GF_XATTROP_ADD_ARRAY64_WITH_DEFAULT = 6,
+};
+typedef enum gf_xattrop_flags_t gf_xattrop_flags_t;
+
+enum gf_seek_what_t {
+ GF_SEEK_DATA = 0,
+ GF_SEEK_HOLE = 1,
+};
+typedef enum gf_seek_what_t gf_seek_what_t;
+
+enum gf_upcall_flags_t {
+ GF_UPCALL_NULL = 0,
+ GF_UPCALL = 1,
+ GF_UPCALL_CI_STAT = 2,
+ GF_UPCALL_CI_XATTR = 3,
+ GF_UPCALL_CI_RENAME = 4,
+ GF_UPCALL_CI_NLINK = 5,
+ GF_UPCALL_CI_FORGET = 6,
+ GF_UPCALL_LEASE_RECALL = 7,
+ GF_UPCALL_FLAGS_MAXVALUE = 8,
+};
+typedef enum gf_upcall_flags_t gf_upcall_flags_t;
+
+enum gf_dict_data_type_t {
+ GF_DATA_TYPE_UNKNOWN = 0,
+ GF_DATA_TYPE_STR_OLD = 1,
+ GF_DATA_TYPE_INT = 2,
+ GF_DATA_TYPE_UINT = 3,
+ GF_DATA_TYPE_DOUBLE = 4,
+ GF_DATA_TYPE_STR = 5,
+ GF_DATA_TYPE_PTR = 6,
+ GF_DATA_TYPE_GFUUID = 7,
+ GF_DATA_TYPE_IATT = 8,
+ GF_DATA_TYPE_MDATA = 9,
+ GF_DATA_TYPE_MAX = 10,
+};
+typedef enum gf_dict_data_type_t gf_dict_data_type_t;
+
+#endif /* !_GLUSTERFS_FOPS_H */
diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h
new file mode 100644
index 00000000000..e6425618b7f
--- /dev/null
+++ b/libglusterfs/src/glusterfs/glusterfs.h
@@ -0,0 +1,838 @@
+/*
+ Copyright (c) 2008-2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLUSTERFS_H
+#define _GLUSTERFS_H
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <netinet/in.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/statvfs.h>
+#include <netdb.h>
+#include <errno.h>
+#include <dirent.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <arpa/inet.h>
+#include <sys/poll.h>
+#include <pthread.h>
+#include <limits.h> /* For PATH_MAX */
+#include <openssl/sha.h>
+
+#include "glusterfs/glusterfs-fops.h"
+#include "glusterfs/list.h"
+#include "glusterfs/locking.h"
+#include "glusterfs/logging.h"
+#include "glusterfs/lkowner.h"
+#include "glusterfs/compat-uuid.h"
+#include "glusterfs/refcount.h"
+#include "glusterfs/atomic.h"
+
+#define GF_YES 1
+#define GF_NO 0
+
+#define IS_ERROR(ret) ((ret) < 0)
+#define IS_SUCCESS(ret) ((ret) >= 0)
+
+#ifndef O_LARGEFILE
+/* savannah bug #20053, patch for compiling on darwin */
+#define O_LARGEFILE 0100000 /* from bits/fcntl.h */
+#endif
+
+#ifndef O_FMODE_EXEC
+/* redhat bug 843080, added from linux/fs.h */
+#define O_FMODE_EXEC 040 // 0x20
+#endif
+
+#ifndef O_DIRECT
+/* savannah bug #20050, #20052 */
+#define O_DIRECT 0 /* From asm/fcntl.h */
+#endif
+
+#ifndef O_DIRECTORY
+/* FreeBSD does not need O_DIRECTORY */
+#define O_DIRECTORY 0
+#endif
+
+#ifndef EBADFD
+/* Mac OS X does not have EBADFD */
+#define EBADFD EBADF
+#endif
+
+#ifndef FNM_EXTMATCH
+#define FNM_EXTMATCH 0
+#endif
+
+/*gets max-offset on all architectures correctly*/
+#define GF_OFF_MAX ((1ULL << (sizeof(off_t) * 8 - 1)) - 1ULL)
+
+#define GLUSTERD_MAX_SNAP_NAME 255
+#define GLUSTERFS_SOCKET_LISTEN_BACKLOG 1024
+#define GLUSTERD_BRICK_SERVERS "cluster.brick-vol-servers"
+#define SLEN(str) (sizeof(str) - 1)
+
+#define ZR_MOUNTPOINT_OPT "mountpoint"
+#define ZR_ATTR_TIMEOUT_OPT "attribute-timeout"
+#define ZR_ENTRY_TIMEOUT_OPT "entry-timeout"
+#define ZR_NEGATIVE_TIMEOUT_OPT "negative-timeout"
+#define ZR_DIRECT_IO_OPT "direct-io-mode"
+#define ZR_STRICT_VOLFILE_CHECK "strict-volfile-check"
+#define ZR_DUMP_FUSE "dump-fuse"
+#define ZR_FUSE_MOUNTOPTS "fuse-mountopts"
+#define IO_THREADS_QUEUE_SIZE_KEY "io-thread-queue-size"
+
+#define GF_XATTR_CLRLK_CMD "glusterfs.clrlk"
+#define GF_XATTR_PATHINFO_KEY "trusted.glusterfs.pathinfo"
+#define GF_XATTR_NODE_UUID_KEY "trusted.glusterfs.node-uuid"
+#define GF_XATTR_LIST_NODE_UUIDS_KEY "trusted.glusterfs.list-node-uuids"
+#define GF_REBAL_FIND_LOCAL_SUBVOL "glusterfs.find-local-subvol"
+#define GF_REBAL_OLD_FIND_LOCAL_SUBVOL "glusterfs.old-find-local-subvol"
+#define GF_XATTR_VOL_ID_KEY "trusted.glusterfs.volume-id"
+#define GF_XATTR_LOCKINFO_KEY "trusted.glusterfs.lockinfo"
+#define GF_META_LOCK_KEY "glusterfs.lock-migration-meta-lock"
+#define GF_META_UNLOCK_KEY "glusterfs.lock-migration-meta-unlock"
+#define GF_XATTR_GET_REAL_FILENAME_KEY "glusterfs.get_real_filename:"
+#define GF_XATTR_USER_PATHINFO_KEY "glusterfs.pathinfo"
+#define GF_INTERNAL_IGNORE_DEEM_STATFS "ignore-deem-statfs"
+#define GF_XATTR_IOSTATS_DUMP_KEY "trusted.io-stats-dump"
+
+#define GF_READDIR_SKIP_DIRS "readdir-filter-directories"
+#define GF_MDC_LOADED_KEY_NAMES "glusterfs.mdc.loaded.key.names"
+
+#define BD_XATTR_KEY "user.glusterfs"
+#define GF_PREOP_PARENT_KEY "glusterfs.preop.parent.key"
+#define GF_PREOP_CHECK_FAILED "glusterfs.preop.check.failed"
+
+#define XATTR_IS_PATHINFO(x) \
+ ((strncmp(x, GF_XATTR_PATHINFO_KEY, strlen(x)) == 0) || \
+ (strncmp(x, GF_XATTR_USER_PATHINFO_KEY, strlen(x)) == 0))
+#define XATTR_IS_NODE_UUID(x) \
+ (strncmp(x, GF_XATTR_NODE_UUID_KEY, SLEN(GF_XATTR_NODE_UUID_KEY)) == 0)
+#define XATTR_IS_NODE_UUID_LIST(x) \
+ (strncmp(x, GF_XATTR_LIST_NODE_UUIDS_KEY, \
+ SLEN(GF_XATTR_LIST_NODE_UUIDS_KEY)) == 0)
+#define XATTR_IS_LOCKINFO(x) \
+ (strncmp(x, GF_XATTR_LOCKINFO_KEY, SLEN(GF_XATTR_LOCKINFO_KEY)) == 0)
+
+#define XATTR_IS_BD(x) (strncmp(x, BD_XATTR_KEY, SLEN(BD_XATTR_KEY)) == 0)
+
+#define GF_XATTR_LINKINFO_KEY "trusted.distribute.linkinfo"
+#define GFID_XATTR_KEY "trusted.gfid"
+#define PGFID_XATTR_KEY_PREFIX "trusted.pgfid."
+#define GFID2PATH_VIRT_XATTR_KEY "glusterfs.gfidtopath"
+#define GFID2PATH_XATTR_KEY_PREFIX "trusted.gfid2path."
+#define GFID2PATH_XATTR_KEY_PREFIX_LENGTH 18
+#define VIRTUAL_GFID_XATTR_KEY_STR "glusterfs.gfid.string"
+#define VIRTUAL_GFID_XATTR_KEY "glusterfs.gfid"
+#define GF_XATTR_MDATA_KEY "trusted.glusterfs.mdata"
+#define UUID_CANONICAL_FORM_LEN 36
+
+#define GET_ANCESTRY_PATH_KEY "glusterfs.ancestry.path"
+#define GET_ANCESTRY_DENTRY_KEY "glusterfs.ancestry.dentry"
+
+#define BITROT_DEFAULT_CURRENT_VERSION (unsigned long)1
+#define BITROT_DEFAULT_SIGNING_VERSION (unsigned long)0
+
+/* on-disk object signature keys */
+#define BITROT_OBJECT_BAD_KEY "trusted.bit-rot.bad-file"
+#define BITROT_CURRENT_VERSION_KEY "trusted.bit-rot.version"
+#define BITROT_SIGNING_VERSION_KEY "trusted.bit-rot.signature"
+
+/* globally usable bad file marker */
+#define GLUSTERFS_BAD_INODE "glusterfs.bad-inode"
+
+/* on-disk size of signing xattr (not the signature itself) */
+#define BITROT_SIGNING_XATTR_SIZE_KEY "trusted.glusterfs.bit-rot.size"
+
+/* GET/SET object signature */
+#define GLUSTERFS_GET_OBJECT_SIGNATURE "trusted.glusterfs.get-signature"
+#define GLUSTERFS_SET_OBJECT_SIGNATURE "trusted.glusterfs.set-signature"
+
+/* operation needs to be durable on-disk */
+#define GLUSTERFS_DURABLE_OP "trusted.glusterfs.durable-op"
+
+/* key for version exchange b/w bitrot stub and changelog */
+#define GLUSTERFS_VERSION_XCHG_KEY "glusterfs.version.xchg"
+
+#define GLUSTERFS_INTERNAL_FOP_KEY "glusterfs-internal-fop"
+
+#define GF_ENFORCE_MANDATORY_LOCK "trusted.glusterfs.enforce-mandatory-lock"
+
+/* GlusterFS Internal FOP Indicator flags
+ * (To pass information on the context in which a paritcular
+ * fop is performed between translators)
+ * The presence of a particular flag must be treated as an
+ * indicator of the context, however the flag is added only in
+ * a scenario where there is a need for such context across translators.
+ * So it cannot be an absolute information on context.
+ */
+#define GF_INTERNAL_CTX_KEY "glusterfs.internal-ctx"
+
+/*
+ * Always append entries to end of the enum, do not delete entries.
+ * Currently dict_set_flag allows to set up to 256 flag, if the enum
+ * needs to grow beyond this dict_set_flag has to be changed accordingly
+ */
+enum gf_internal_fop_indicator {
+ GF_DHT_HEAL_DIR /* Index 0 in bit array*/
+};
+
+/* Todo:
+ * Add GF_FOP_LINK_FILE 0x2ULL
+ * address GLUSTERFS_MARKER_DONT_ACCOUNT_KEY and
+ * GLUSTERFS_INTERNAL_FOP_KEY with this flag
+ */
+
+#define DHT_CHANGELOG_RENAME_OP_KEY "changelog.rename-op"
+
+#define GLUSTERFS_WRITE_IS_APPEND "glusterfs.write-is-append"
+#define GLUSTERFS_WRITE_UPDATE_ATOMIC "glusterfs.write-update-atomic"
+#define GLUSTERFS_OPEN_FD_COUNT "glusterfs.open-fd-count"
+#define GLUSTERFS_ACTIVE_FD_COUNT "glusterfs.open-active-fd-count"
+#define GLUSTERFS_INODELK_COUNT "glusterfs.inodelk-count"
+#define GLUSTERFS_ENTRYLK_COUNT "glusterfs.entrylk-count"
+#define GLUSTERFS_POSIXLK_COUNT "glusterfs.posixlk-count"
+#define GLUSTERFS_PARENT_ENTRYLK "glusterfs.parent-entrylk"
+#define GLUSTERFS_INODELK_DOM_COUNT "glusterfs.inodelk-dom-count"
+#define GLUSTERFS_INODELK_DOM_PREFIX "glusterfs.inodelk-dom-prefix"
+#define GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS "glusterfs.multi-dom-lk-cnt-req"
+#define GFID_TO_PATH_KEY "glusterfs.gfid2path"
+#define GF_XATTR_STIME_PATTERN "trusted.glusterfs.*.stime"
+#define GF_XATTR_XTIME_PATTERN "trusted.glusterfs.*.xtime"
+#define GF_XATTR_TRIGGER_SYNC "glusterfs.geo-rep.trigger-sync"
+
+/* quota xattrs */
+#define QUOTA_SIZE_KEY "trusted.glusterfs.quota.size"
+#define QUOTA_LIMIT_KEY "trusted.glusterfs.quota.limit-set"
+#define QUOTA_LIMIT_OBJECTS_KEY "trusted.glusterfs.quota.limit-objects"
+#define VIRTUAL_QUOTA_XATTR_CLEANUP_KEY "glusterfs.quota-xattr-cleanup"
+#define QUOTA_READ_ONLY_KEY "trusted.glusterfs.quota.read-only"
+
+/* ctime related */
+#define CTIME_MDATA_XDATA_KEY "set-ctime-mdata"
+
+/* afr related */
+#define AFR_XATTR_PREFIX "trusted.afr"
+
+/* Index xlator related */
+#define GF_XATTROP_INDEX_GFID "glusterfs.xattrop_index_gfid"
+#define GF_XATTROP_ENTRY_CHANGES_GFID "glusterfs.xattrop_entry_changes_gfid"
+#define GF_XATTROP_INDEX_COUNT "glusterfs.xattrop_index_count"
+#define GF_XATTROP_DIRTY_GFID "glusterfs.xattrop_dirty_gfid"
+#define GF_XATTROP_DIRTY_COUNT "glusterfs.xattrop_dirty_count"
+#define GF_XATTROP_ENTRY_IN_KEY "glusterfs.xattrop-entry-create"
+#define GF_XATTROP_ENTRY_OUT_KEY "glusterfs.xattrop-entry-delete"
+#define GF_INDEX_IA_TYPE_GET_REQ "glusterfs.index-ia-type-get-req"
+#define GF_INDEX_IA_TYPE_GET_RSP "glusterfs.index-ia-type-get-rsp"
+
+#define GF_HEAL_INFO "glusterfs.heal-info"
+#define GF_AFR_HEAL_SBRAIN "glusterfs.heal-sbrain"
+#define GF_AFR_SBRAIN_STATUS "replica.split-brain-status"
+#define GF_AFR_SBRAIN_CHOICE "replica.split-brain-choice"
+#define GF_AFR_SPB_CHOICE_TIMEOUT "replica.split-brain-choice-timeout"
+#define GF_AFR_SBRAIN_RESOLVE "replica.split-brain-heal-finalize"
+#define GF_AFR_ADD_BRICK "trusted.add-brick"
+#define GF_AFR_REPLACE_BRICK "trusted.replace-brick"
+#define GF_AFR_DIRTY "trusted.afr.dirty"
+#define GF_XATTROP_ENTRY_OUT "glusterfs.xattrop-entry-delete"
+#define GF_XATTROP_PURGE_INDEX "glusterfs.xattrop-purge-index"
+
+#define GF_GFIDLESS_LOOKUP "gfidless-lookup"
+/* replace-brick and pump related internal xattrs */
+#define RB_PUMP_CMD_START "glusterfs.pump.start"
+#define RB_PUMP_CMD_PAUSE "glusterfs.pump.pause"
+#define RB_PUMP_CMD_COMMIT "glusterfs.pump.commit"
+#define RB_PUMP_CMD_ABORT "glusterfs.pump.abort"
+#define RB_PUMP_CMD_STATUS "glusterfs.pump.status"
+
+#define GLUSTERFS_MARKER_DONT_ACCOUNT_KEY "glusters.marker.dont-account"
+#define GLUSTERFS_RDMA_INLINE_THRESHOLD (2048)
+#define GLUSTERFS_RDMA_MAX_HEADER_SIZE \
+ (228) /* (sizeof (rdma_header_t) \
+ + RDMA_MAX_SEGMENTS \
+ * sizeof (rdma_read_chunk_t)) \
+ */
+
+#define GLUSTERFS_RPC_REPLY_SIZE 24
+
+#define STARTING_EVENT_THREADS 2
+
+#define DEFAULT_VAR_RUN_DIRECTORY DATADIR "/run/gluster"
+#define DEFAULT_GLUSTERFSD_MISC_DIRETORY DATADIR "/lib/misc/glusterfsd"
+#ifdef GF_LINUX_HOST_OS
+#define GLUSTERD_DEFAULT_WORKDIR DATADIR "/lib/glusterd"
+#else
+#define GLUSTERD_DEFAULT_WORKDIR DATADIR "/db/glusterd"
+#endif
+#define GF_REPLICATE_TRASH_DIR ".landfill"
+
+/* GlusterFS's maximum supported Auxiliary GIDs */
+#define GF_MAX_AUX_GROUPS 65535
+
+#define GF_UUID_BUF_SIZE 37 /* UUID_CANONICAL_FORM_LEN + NULL */
+#define GF_UUID_BNAME_BUF_SIZE (320) /* (64 + 256) */
+
+#define GF_REBALANCE_TID_KEY "rebalance-id"
+#define GF_REMOVE_BRICK_TID_KEY "remove-brick-id"
+#define GF_TIER_TID_KEY "tier-id"
+#define GF_TIER_ENABLED "tier-enabled"
+
+#define UUID_CANONICAL_FORM_LEN 36
+
+/* Adding this here instead of any glusterd*.h files as it is also required by
+ * cli
+ */
+#define DEFAULT_GLUSTERD_SOCKFILE DATADIR "/run/glusterd.socket"
+
+/* features/marker-quota also needs to have knowledge of link-files so as to
+ * exclude them from accounting.
+ */
+#define DHT_LINKFILE_MODE (S_ISVTX)
+
+#define IS_DHT_LINKFILE_MODE(iabuf) \
+ ((st_mode_from_ia((iabuf)->ia_prot, (iabuf)->ia_type) & ~S_IFMT) == \
+ DHT_LINKFILE_MODE)
+#define DHT_LINKFILE_STR "linkto"
+#define DHT_COMMITHASH_STR "commithash"
+
+#define DHT_SKIP_NON_LINKTO_UNLINK "unlink-only-if-dht-linkto-file"
+#define TIER_SKIP_NON_LINKTO_UNLINK "unlink-only-if-tier-linkto-file"
+#define DHT_SKIP_OPEN_FD_UNLINK "dont-unlink-for-open-fd"
+#define DHT_IATT_IN_XDATA_KEY "dht-get-iatt-in-xattr"
+#define DHT_MODE_IN_XDATA_KEY "dht-get-mode-in-xattr"
+#define GET_LINK_COUNT "get-link-count"
+#define GF_GET_SIZE "get-size"
+#define GF_PRESTAT "virt-gf-prestat"
+#define GF_POSTSTAT "virt-gf-poststat"
+
+/*CTR and Marker requires inode dentry link count from posix*/
+#define GF_RESPONSE_LINK_COUNT_XDATA "gf_response_link_count"
+#define GF_REQUEST_LINK_COUNT_XDATA "gf_request_link_count"
+
+#define GF_GET_FILE_BLOCK_COUNT "gf_get_file_block_count"
+
+#define CTR_ATTACH_TIER_LOOKUP "ctr_attach_tier_lookup"
+
+#define CLIENT_CMD_CONNECT "trusted.glusterfs.client-connect"
+#define CLIENT_CMD_DISCONNECT "trusted.glusterfs.client-disconnect"
+
+#define GF_LOG_LRU_BUFSIZE_DEFAULT 5
+#define GF_LOG_LRU_BUFSIZE_MIN 0
+#define GF_LOG_LRU_BUFSIZE_MAX 20
+#define GF_LOG_LRU_BUFSIZE_MIN_STR "0"
+#define GF_LOG_LRU_BUFSIZE_MAX_STR "20"
+
+#define GF_LOG_FLUSH_TIMEOUT_DEFAULT 120
+#define GF_LOG_FLUSH_TIMEOUT_MIN 30
+#define GF_LOG_FLUSH_TIMEOUT_MAX 300
+#define GF_LOG_FLUSH_TIMEOUT_MIN_STR "30"
+#define GF_LOG_FLUSH_TIMEOUT_MAX_STR "300"
+#define GF_LOG_LOCALTIME_DEFAULT 0
+
+#define GF_NETWORK_TIMEOUT 42
+
+#define GF_BACKTRACE_LEN 4096
+#define GF_BACKTRACE_FRAME_COUNT 7
+
+#define GF_LK_ADVISORY 0 /* maps to GLFS_LK_ADVISORY from libgfapi*/
+#define GF_LK_MANDATORY 1 /* maps to GLFS_LK_MANDATORY from libgfapi*/
+#define GF_LOCK_MODE "glusterfs.lk.lkmode"
+
+#define GF_CHECK_XATTR_KEY_AND_GOTO(key, cmpkey, errval, lbl) \
+ do { \
+ if (key && strcmp(key, cmpkey) == 0) { \
+ errval = -EINVAL; \
+ goto lbl; \
+ } \
+ } while (0)
+
+#define GF_CS_OBJECT_SIZE "trusted.glusterfs.cs.object_size"
+#define GF_CS_BLOCK_SIZE "trusted.glusterfs.cs.block_size"
+#define GF_CS_NUM_BLOCKS "trusted.glusterfs.cs.num_blocks"
+
+#define GF_CS_XATTR_ARCHIVE_UUID "trusted.cloudsync.uuid"
+
+#define GF_CS_OBJECT_UPLOAD_COMPLETE "trusted.glusterfs.csou.complete"
+#define GF_CS_OBJECT_REMOTE "trusted.glusterfs.cs.remote"
+#define GF_CS_OBJECT_DOWNLOADING "trusted.glusterfs.cs.downloading"
+#define GF_CS_OBJECT_DOWNLOADED "trusted.glusterfs.cs.downloaded"
+#define GF_CS_OBJECT_STATUS "trusted.glusterfs.cs.status"
+#define GF_CS_OBJECT_REPAIR "trusted.glusterfs.cs.repair"
+
+#define gf_boolean_t bool
+#define _gf_false false
+#define _gf_true true
+
+typedef enum {
+ GF_CS_LOCAL = 1,
+ GF_CS_REMOTE = 2,
+ GF_CS_REPAIR = 4,
+ GF_CS_DOWNLOADING = 8,
+ GF_CS_ERROR = 16,
+} gf_cs_obj_state;
+
+typedef enum {
+ GF_FOP_PRI_UNSPEC = -1, /* Priority not specified */
+ GF_FOP_PRI_HI = 0, /* low latency */
+ GF_FOP_PRI_NORMAL, /* normal */
+ GF_FOP_PRI_LO, /* bulk */
+ GF_FOP_PRI_LEAST, /* least */
+ GF_FOP_PRI_MAX, /* Highest */
+} gf_fop_pri_t;
+
+typedef enum {
+ /* The 'component' (xlator / option) is not yet setting the flag */
+ GF_UNCLASSIFIED = 0,
+ /* The 'component' is experimental, should not be recommened
+ in production mode */
+ GF_EXPERIMENTAL,
+ /* The 'component' is tech preview, ie, it is 'mostly' working as
+ expected, but can have some of the corner cases, which is not
+ handled. */
+ GF_TECH_PREVIEW,
+ /* The 'component' is good to run. Has good enough test and
+ documentation coverage. */
+ GF_MAINTAINED,
+ /* The component is:
+ - no more a focus
+ - no more solving a valid use case
+ - no more maintained, no volunteers to maintain
+ - there is 'maintained' or 'tech-preview' feature,
+ which does the same thing, better.
+ */
+ GF_DEPRECATED,
+ /* The 'component' is no more 'built'. */
+ GF_OBSOLETE,
+ /* The 'component' exist for Documentation purposes.
+ No real usecase */
+ GF_DOCUMENT_PURPOSE,
+} gf_category_t;
+
+static const char *const FOP_PRI_STRINGS[] = {"HIGH", "NORMAL", "LOW", "LEAST"};
+
+static inline const char *
+fop_pri_to_string(gf_fop_pri_t pri)
+{
+ if (IS_ERROR(pri))
+ return "UNSPEC";
+
+ if (pri >= GF_FOP_PRI_MAX)
+ return "INVALID";
+
+ return FOP_PRI_STRINGS[pri];
+}
+
+const char *
+fop_enum_to_pri_string(glusterfs_fop_t fop);
+
+#define GF_SET_IF_NOT_PRESENT 0x1 /* default behaviour */
+#define GF_SET_OVERWRITE 0x2 /* Overwrite with the buf given */
+#define GF_SET_DIR_ONLY 0x4
+#define GF_SET_EPOCH_TIME 0x8 /* used by afr dir lookup selfheal */
+#define GF_AUXILLARY_PARGFID 0xd /* RIO dummy parent gfid */
+
+/* key value which quick read uses to get small files in lookup cbk */
+#define GF_CONTENT_KEY "glusterfs.content"
+
+struct _xlator_cmdline_option {
+ struct list_head cmd_args;
+ char *volume;
+ char *key;
+ char *value;
+};
+typedef struct _xlator_cmdline_option xlator_cmdline_option_t;
+
+struct _server_cmdline {
+ struct list_head list;
+ char *volfile_server;
+ char *transport;
+ int port;
+};
+typedef struct _server_cmdline server_cmdline_t;
+
+#define GF_OPTION_ENABLE _gf_true
+#define GF_OPTION_DISABLE _gf_false
+#define GF_OPTION_DEFERRED 2
+
+typedef enum { _gf_none, _gf_memcheck, _gf_drd } gf_valgrind_tool;
+
+struct _cmd_args {
+ /* basic options */
+ char *volfile_server;
+ server_cmdline_t *curr_server;
+ /* List of backup volfile servers, including original */
+ struct list_head volfile_servers;
+ char *volfile;
+ char *log_server;
+ gf_loglevel_t log_level;
+ char *log_file;
+ char *log_ident;
+ gf_log_logger_t logger;
+ gf_log_format_t log_format;
+ uint32_t log_buf_size;
+ uint32_t log_flush_timeout;
+ int32_t max_connect_attempts;
+ char *print_exports;
+ char *print_netgroups;
+ int print_xlatordir;
+ int print_statedumpdir;
+ int print_logdir;
+ int print_libexecdir;
+ /* advanced options */
+ uint32_t volfile_server_port;
+ char *volfile_server_transport;
+ uint32_t log_server_port;
+ char *pid_file;
+ char *sock_file;
+ int no_daemon_mode;
+ char *run_id;
+ int debug_mode;
+ int read_only;
+ int acl;
+ int selinux;
+ int capability;
+ int enable_ino32;
+ int worm;
+ int mac_compat;
+ int fopen_keep_cache;
+ int gid_timeout;
+ char gid_timeout_set;
+ int aux_gfid_mount;
+
+ /* need a process wide timer-wheel? */
+ int global_timer_wheel;
+
+ /* list of xlator_option_t */
+ struct list_head xlator_options;
+
+ /* fuse options */
+ int fuse_direct_io_mode;
+ char *use_readdirp;
+ int no_root_squash;
+ int volfile_check;
+ double fuse_entry_timeout;
+ double fuse_negative_timeout;
+ double fuse_attribute_timeout;
+ char *volume_name;
+ int fuse_nodev;
+ int fuse_nosuid;
+ char *dump_fuse;
+ pid_t client_pid;
+ int client_pid_set;
+ unsigned uid_map_root;
+ int32_t lru_limit;
+ int32_t invalidate_limit;
+ int background_qlen;
+ int congestion_threshold;
+ char *fuse_mountopts;
+ int mem_acct;
+ int resolve_gids;
+
+ /* key args */
+ char *mount_point;
+ char *volfile_id;
+
+ /* required for portmap */
+ int brick_port;
+ char *brick_name;
+ int brick_port2;
+
+ /* Should management connections use SSL? */
+ int secure_mgmt;
+
+ /* Linux-only OOM killer adjustment */
+#ifdef GF_LINUX_HOST_OS
+ char *oom_score_adj;
+#endif
+
+ /* Run this process with valgrind? Might want to prevent calling
+ * functions that prevent valgrind from working correctly, like
+ * dlclose(). */
+ gf_valgrind_tool vgtool;
+
+ int localtime_logging;
+
+ /* For the subdir mount */
+ char *subdir_mount;
+
+ char *process_name;
+ char *event_history;
+ int thin_client;
+ uint32_t reader_thread_count;
+
+ /* FUSE writeback cache support */
+ int kernel_writeback_cache;
+ uint32_t attr_times_granularity;
+
+ int fuse_flush_handle_interrupt;
+ int fuse_auto_inval;
+
+ bool global_threading;
+ bool brick_mux;
+
+ uint32_t fuse_dev_eperm_ratelimit_ns;
+};
+typedef struct _cmd_args cmd_args_t;
+
+struct _glusterfs_graph {
+ struct list_head list;
+ struct timeval dob;
+ void *first;
+ void *top; /* selected by -n */
+ int xl_count;
+ int id; /* Used in logging */
+ int used; /* Should be set when fuse gets
+ first CHILD_UP */
+ uint32_t volfile_checksum;
+ uint32_t leaf_count;
+ void *last_xl; /* Stores the last xl of the graph, as of now only populated
+ in client multiplexed code path */
+ pthread_mutex_t mutex;
+ pthread_cond_t child_down_cond; /* for broadcasting CHILD_DOWN */
+ int parent_down;
+ char graph_uuid[128];
+ char volume_id[GF_UUID_BUF_SIZE];
+};
+typedef struct _glusterfs_graph glusterfs_graph_t;
+
+typedef int32_t (*glusterfsd_mgmt_event_notify_fn_t)(int32_t event, void *data,
+ ...);
+
+typedef enum {
+ MGMT_SSL_NEVER = 0,
+ MGMT_SSL_COPY_IO,
+ MGMT_SSL_ALWAYS
+} mgmt_ssl_t;
+
+struct tvec_base;
+
+/* reference counting for the global (per ctx) timer-wheel */
+struct gf_ctx_tw {
+ GF_REF_DECL;
+ struct tvec_base *timer_wheel; /* global timer-wheel instance */
+};
+
+struct _glusterfs_ctx {
+ cmd_args_t cmd_args;
+ char *process_uuid;
+ FILE *pidfp;
+ char fin;
+ void *timer;
+ void *ib;
+ struct call_pool *pool;
+ void *event_pool;
+ void *iobuf_pool;
+ void *logbuf_pool;
+ gf_lock_t lock;
+ size_t page_size;
+
+ /* one per volfile parse */
+ struct list_head graphs;
+
+ /* the latest graph in use */
+ glusterfs_graph_t *active;
+
+ /* fuse or nfs (but not protocol/server) */
+ void *master;
+
+ /* xlator implementing MOPs for centralized logging, volfile server */
+ void *mgmt;
+
+ /* listener of the commands from glusterd */
+ void *listener;
+
+ /* toggle switch for latency measurement */
+ unsigned char measure_latency;
+ pthread_t sigwaiter;
+ char *cmdlinestr;
+ struct mem_pool *stub_mem_pool;
+ unsigned char cleanup_started;
+ int graph_id; /* Incremented per graph, value should
+ indicate how many times the graph has
+ got changed */
+ pid_t mnt_pid; /* pid of the mount agent */
+ int process_mode; /*mode in which process is runninng*/
+ struct syncenv *env; /* The env pointer to the synctasks */
+
+ struct list_head mempool_list; /* used to keep a global list of
+ mempools, used to log details of
+ mempool in statedump */
+ char *statedump_path;
+
+ struct mem_pool *dict_pool;
+ struct mem_pool *dict_pair_pool;
+ struct mem_pool *dict_data_pool;
+
+ glusterfsd_mgmt_event_notify_fn_t notify; /* Used for xlators to make
+ call to fsd-mgmt */
+ gf_log_handle_t log; /* all logging related variables */
+
+ int mem_acct_enable;
+
+ int daemon_pipe[2];
+
+ struct clienttable *clienttable;
+
+ /*
+ * Should management connections use SSL? This is the only place we
+ * can put it where both daemon-startup and socket code will see it.
+ *
+ * Why is it an int? Because we're included before common-utils.h,
+ * which defines gf_boolean_t (what we really want). It doesn't make
+ * any sense, but it's not worth turning the codebase upside-down to
+ * fix it. Thus, an int.
+ */
+ int secure_mgmt;
+
+ /* The option is use to set cert_depth while management connection
+ use SSL
+ */
+ int ssl_cert_depth;
+
+ /*
+ * Should *our* server/inbound connections use SSL? This is only true
+ * if we're glusterd and secure_mgmt is set, or if we're glusterfsd
+ * and SSL is set on the I/O path. It should never be set e.g. for
+ * NFS.
+ */
+ mgmt_ssl_t secure_srvr;
+ /* Buffer to 'save' backtrace even under OOM-kill like situations*/
+ char btbuf[GF_BACKTRACE_LEN];
+
+ pthread_mutex_t notify_lock;
+ pthread_mutex_t cleanup_lock;
+ pthread_cond_t notify_cond;
+ int notifying;
+
+ struct gf_ctx_tw *tw; /* refcounted timer_wheel */
+
+ gf_lock_t volfile_lock;
+
+ /* configuration related elements, which gets changed
+ from global xlator */
+ struct {
+ char *metrics_dumppath;
+ } config;
+
+ struct {
+ gf_atomic_t max_dict_pairs;
+ gf_atomic_t total_pairs_used;
+ gf_atomic_t total_dicts_used;
+ } stats;
+
+ struct list_head volfile_list;
+ /* Add members to manage janitor threads for cleanup fd */
+ struct list_head janitor_fds;
+ pthread_cond_t fd_cond;
+ pthread_mutex_t fd_lock;
+ pthread_t janitor;
+ /* The variable is use to save total posix xlator count */
+ uint32_t pxl_count;
+
+ char volume_id[GF_UUID_BUF_SIZE]; /* Used only in protocol/client */
+};
+typedef struct _glusterfs_ctx glusterfs_ctx_t;
+
+typedef struct {
+ char volfile_checksum[SHA256_DIGEST_LENGTH];
+ char vol_id[NAME_MAX + 1];
+ struct list_head volfile_list;
+ glusterfs_graph_t *graph;
+ FILE *pidfp;
+} gf_volfile_t;
+
+glusterfs_ctx_t *
+glusterfs_ctx_new(void);
+
+struct gf_flock {
+ short l_type;
+ short l_whence;
+ off_t l_start;
+ off_t l_len;
+ pid_t l_pid;
+ gf_lkowner_t l_owner;
+};
+
+typedef struct lock_migration_info {
+ struct list_head list;
+ struct gf_flock flock;
+ char *client_uid;
+ uint32_t lk_flags;
+} lock_migration_info_t;
+
+#define GF_MUST_CHECK __attribute__((warn_unused_result))
+/*
+ * Some macros (e.g. ALLOC_OR_GOTO) set variables in function scope, but the
+ * calling function might not only declare the variable to keep the macro happy
+ * and not use it otherwise. In such cases, the following can be used to
+ * suppress the "set but not used" warning that would otherwise occur.
+ */
+#define GF_UNUSED __attribute__((unused))
+
+/*
+ * If present, this has the following effects:
+ *
+ * glusterd enables privileged commands over TCP
+ *
+ * all code enables SSL for outbound connections to management port
+ *
+ * glusterd enables SSL for inbound connections
+ *
+ * Servers and clients enable/disable SSL among themselves by other means.
+ * Making secure management connections conditional on a file is a bit of a
+ * hack, but we don't have any other place for such global settings across
+ * all of the affected components. Making it a compile-time option would
+ * reduce functionality, both for users and for testing (which can now be
+ * done using secure connections for all tests without change elsewhere).
+ *
+ */
+#define SECURE_ACCESS_FILE GLUSTERD_DEFAULT_WORKDIR "/secure-access"
+
+int
+glusterfs_graph_prepare(glusterfs_graph_t *graph, glusterfs_ctx_t *ctx,
+ char *volume_name);
+int
+glusterfs_graph_destroy_residual(glusterfs_graph_t *graph);
+int
+glusterfs_graph_deactivate(glusterfs_graph_t *graph);
+int
+glusterfs_graph_destroy(glusterfs_graph_t *graph);
+int
+glusterfs_get_leaf_count(glusterfs_graph_t *graph);
+int
+glusterfs_graph_activate(glusterfs_graph_t *graph, glusterfs_ctx_t *ctx);
+glusterfs_graph_t *
+glusterfs_graph_construct(FILE *fp);
+int
+glusterfs_graph_init(glusterfs_graph_t *graph);
+glusterfs_graph_t *
+glusterfs_graph_new(void);
+int
+glusterfs_graph_reconfigure(glusterfs_graph_t *oldgraph,
+ glusterfs_graph_t *newgraph);
+int
+glusterfs_graph_attach(glusterfs_graph_t *orig_graph, char *path,
+ glusterfs_graph_t **newgraph);
+int
+glusterfs_graph_parent_up(glusterfs_graph_t *graph);
+
+void
+gf_free_mig_locks(lock_migration_info_t *locks);
+
+int
+glusterfs_read_secure_access_file(void);
+int
+glusterfs_graph_fini(glusterfs_graph_t *graph);
+#endif /* _GLUSTERFS_H */
diff --git a/libglusterfs/src/graph-utils.h b/libglusterfs/src/glusterfs/graph-utils.h
index 207664fdb1f..247f1a55d5a 100644
--- a/libglusterfs/src/graph-utils.h
+++ b/libglusterfs/src/glusterfs/graph-utils.h
@@ -11,10 +11,10 @@
#ifndef _GRAPH_H_
#define _GRAPH_H_
-int glusterfs_graph_print_file (FILE *file, glusterfs_graph_t *graph);
-
-char *glusterfs_graph_print_buf (glusterfs_graph_t *graph);
-
-int glusterfs_xlator_link (xlator_t *pxl, xlator_t *cxl);
-void glusterfs_graph_set_first (glusterfs_graph_t *graph, xlator_t *xl);
+int
+glusterfs_graph_print_file(FILE *file, glusterfs_graph_t *graph);
+int
+glusterfs_xlator_link(xlator_t *pxl, xlator_t *cxl);
+void
+glusterfs_graph_set_first(glusterfs_graph_t *graph, xlator_t *xl);
#endif
diff --git a/libglusterfs/src/hashfn.h b/libglusterfs/src/glusterfs/hashfn.h
index 06ae37e796b..6e92e706d8c 100644
--- a/libglusterfs/src/hashfn.h
+++ b/libglusterfs/src/glusterfs/hashfn.h
@@ -11,17 +11,13 @@
#ifndef __HASHFN_H__
#define __HASHFN_H__
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include <sys/types.h>
#include <stdint.h>
-uint32_t SuperFastHash (const char * data, int32_t len);
+uint32_t
+SuperFastHash(const char *data, int32_t len);
-uint32_t gf_dm_hashfn (const char *msg, int len);
+uint32_t
+gf_dm_hashfn(const char *msg, int len);
-uint32_t ReallySimpleHash (char *path, int len);
#endif /* __HASHFN_H__ */
diff --git a/libglusterfs/src/glusterfs/iatt.h b/libglusterfs/src/glusterfs/iatt.h
new file mode 100644
index 00000000000..f03d68b02f0
--- /dev/null
+++ b/libglusterfs/src/glusterfs/iatt.h
@@ -0,0 +1,489 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _IATT_H
+#define _IATT_H
+
+#if defined(GF_LINUX_HOST_OS)
+#include <sys/sysmacros.h> /* for makedev(3), major(3), minor(3) */
+#endif
+#include <sys/types.h>
+#include <sys/stat.h> /* for iatt <--> stat conversions */
+#include <unistd.h>
+
+#include "glusterfs/compat.h"
+#include "glusterfs/compat-uuid.h"
+
+typedef enum {
+ IA_INVAL = 0,
+ IA_IFREG,
+ IA_IFDIR,
+ IA_IFLNK,
+ IA_IFBLK,
+ IA_IFCHR,
+ IA_IFIFO,
+ IA_IFSOCK
+} ia_type_t;
+
+typedef struct {
+ uint8_t suid : 1;
+ uint8_t sgid : 1;
+ uint8_t sticky : 1;
+ struct {
+ uint8_t read : 1;
+ uint8_t write : 1;
+ uint8_t exec : 1;
+ } owner, group, other;
+} ia_prot_t;
+
+struct iatt {
+ uint64_t ia_flags;
+ uint64_t ia_ino; /* inode number */
+ uint64_t ia_dev; /* backing device ID */
+ uint64_t ia_rdev; /* device ID (if special file) */
+ uint64_t ia_size; /* file size in bytes */
+ uint32_t ia_nlink; /* Link count */
+ uint32_t ia_uid; /* user ID of owner */
+ uint32_t ia_gid; /* group ID of owner */
+ uint32_t ia_blksize; /* blocksize for filesystem I/O */
+ uint64_t ia_blocks; /* number of 512B blocks allocated */
+ int64_t ia_atime; /* last access time */
+ int64_t ia_mtime; /* last modification time */
+ int64_t ia_ctime; /* last status change time */
+ int64_t ia_btime; /* creation time. Fill using statx */
+ uint32_t ia_atime_nsec;
+ uint32_t ia_mtime_nsec;
+ uint32_t ia_ctime_nsec;
+ uint32_t ia_btime_nsec;
+ uint64_t ia_attributes; /* chattr related:compressed, immutable,
+ * append only, encrypted etc.*/
+ uint64_t ia_attributes_mask; /* Mask for the attributes */
+
+ uuid_t ia_gfid;
+ ia_type_t ia_type; /* type of file */
+ ia_prot_t ia_prot; /* protection */
+};
+
+struct old_iatt {
+ uint64_t ia_ino; /* inode number */
+ uuid_t ia_gfid;
+ uint64_t ia_dev; /* backing device ID */
+ ia_type_t ia_type; /* type of file */
+ ia_prot_t ia_prot; /* protection */
+ uint32_t ia_nlink; /* Link count */
+ uint32_t ia_uid; /* user ID of owner */
+ uint32_t ia_gid; /* group ID of owner */
+ uint64_t ia_rdev; /* device ID (if special file) */
+ uint64_t ia_size; /* file size in bytes */
+ uint32_t ia_blksize; /* blocksize for filesystem I/O */
+ uint64_t ia_blocks; /* number of 512B blocks allocated */
+ uint32_t ia_atime; /* last access time */
+ uint32_t ia_atime_nsec;
+ uint32_t ia_mtime; /* last modification time */
+ uint32_t ia_mtime_nsec;
+ uint32_t ia_ctime; /* last status change time */
+ uint32_t ia_ctime_nsec;
+};
+
+struct mdata_iatt {
+ int64_t ia_atime; /* last access time */
+ int64_t ia_mtime; /* last modification time */
+ int64_t ia_ctime; /* last status change time */
+ uint32_t ia_atime_nsec;
+ uint32_t ia_mtime_nsec;
+ uint32_t ia_ctime_nsec;
+};
+
+/* 64-bit mask for valid members in struct iatt. */
+#define IATT_TYPE 0x0000000000000001U
+#define IATT_MODE 0x0000000000000002U
+#define IATT_NLINK 0x0000000000000004U
+#define IATT_UID 0x0000000000000008U
+#define IATT_GID 0x0000000000000010U
+#define IATT_ATIME 0x0000000000000020U
+#define IATT_MTIME 0x0000000000000040U
+#define IATT_CTIME 0x0000000000000080U
+#define IATT_INO 0x0000000000000100U
+#define IATT_SIZE 0x0000000000000200U
+#define IATT_BLOCKS 0x0000000000000400U
+#define IATT_BTIME 0x0000000000000800U
+#define IATT_GFID 0x0000000000001000U
+
+/* Macros for checking validity of struct iatt members.*/
+#define IATT_TYPE_VALID(iaflags) (iaflags & IATT_TYPE)
+#define IATT_MODE_VALID(iaflags) (iaflags & IATT_MODE)
+#define IATT_NLINK_VALID(iaflags) (iaflags & IATT_NLINK)
+#define IATT_UID_VALID(iaflags) (iaflags & IATT_UID)
+#define IATT_GID_VALID(iaflags) (iaflags & IATT_GID)
+#define IATT_ATIME_VALID(iaflags) (iaflags & IATT_ATIME)
+#define IATT_MTIME_VALID(iaflags) (iaflags & IATT_MTIME)
+#define IATT_CTIME_VALID(iaflags) (iaflags & IATT_CTIME)
+#define IATT_INO_VALID(iaflags) (iaflags & IATT_INO)
+#define IATT_SIZE_VALID(iaflags) (iaflags & IATT_SIZE)
+#define IATT_BLOCKS_VALID(iaflags) (iaflags & IATT_BLOCKS)
+#define IATT_BTIME_VALID(iaflags) (iaflags & IATT_BTIME)
+#define IATT_GFID_VALID(iaflags) (iaflags & IATT_GFID)
+
+#define IA_ISREG(t) (t == IA_IFREG)
+#define IA_ISDIR(t) (t == IA_IFDIR)
+#define IA_ISLNK(t) (t == IA_IFLNK)
+#define IA_ISBLK(t) (t == IA_IFBLK)
+#define IA_ISCHR(t) (t == IA_IFCHR)
+#define IA_ISFIFO(t) (t == IA_IFIFO)
+#define IA_ISSOCK(t) (t == IA_IFSOCK)
+#define IA_ISINVAL(t) (t == IA_INVAL)
+
+#define IA_PROT_RUSR(prot) ((prot).owner.read == 1)
+#define IA_PROT_WUSR(prot) ((prot).owner.write == 1)
+#define IA_PROT_XUSR(prot) ((prot).owner.exec == 1)
+
+#define IA_PROT_RGRP(prot) ((prot).group.read == 1)
+#define IA_PROT_WGRP(prot) ((prot).group.write == 1)
+#define IA_PROT_XGRP(prot) ((prot).group.exec == 1)
+
+#define IA_PROT_ROTH(prot) ((prot).other.read == 1)
+#define IA_PROT_WOTH(prot) ((prot).other.write == 1)
+#define IA_PROT_XOTH(prot) ((prot).other.exec == 1)
+
+#define IA_PROT_SUID(prot) ((prot).suid == 1)
+#define IA_PROT_SGID(prot) ((prot).sgid == 1)
+#define IA_PROT_STCKY(prot) ((prot).sticky == 1)
+
+#define IA_FILE_OR_DIR(t) (IA_ISREG(t) || IA_ISDIR(t))
+
+static inline uint32_t
+ia_major(uint64_t ia_dev)
+{
+ return (uint32_t)(ia_dev >> 32);
+}
+
+static inline uint32_t
+ia_minor(uint64_t ia_dev)
+{
+ return (uint32_t)(ia_dev & 0xffffffff);
+}
+
+static inline uint64_t
+ia_makedev(uint32_t ia_maj, uint32_t ia_min)
+{
+ return ((((uint64_t)ia_maj) << 32) | ia_min);
+}
+
+static inline ia_prot_t
+ia_prot_from_st_mode(mode_t mode)
+{
+ ia_prot_t ia_prot = {
+ 0,
+ };
+
+ if (mode & S_ISUID)
+ ia_prot.suid = 1;
+ if (mode & S_ISGID)
+ ia_prot.sgid = 1;
+ if (mode & S_ISVTX)
+ ia_prot.sticky = 1;
+
+ if (mode & S_IRUSR)
+ ia_prot.owner.read = 1;
+ if (mode & S_IWUSR)
+ ia_prot.owner.write = 1;
+ if (mode & S_IXUSR)
+ ia_prot.owner.exec = 1;
+
+ if (mode & S_IRGRP)
+ ia_prot.group.read = 1;
+ if (mode & S_IWGRP)
+ ia_prot.group.write = 1;
+ if (mode & S_IXGRP)
+ ia_prot.group.exec = 1;
+
+ if (mode & S_IROTH)
+ ia_prot.other.read = 1;
+ if (mode & S_IWOTH)
+ ia_prot.other.write = 1;
+ if (mode & S_IXOTH)
+ ia_prot.other.exec = 1;
+
+ return ia_prot;
+}
+
+static inline ia_type_t
+ia_type_from_st_mode(mode_t mode)
+{
+ ia_type_t type = IA_INVAL;
+
+ if (S_ISREG(mode))
+ type = IA_IFREG;
+ if (S_ISDIR(mode))
+ type = IA_IFDIR;
+ if (S_ISLNK(mode))
+ type = IA_IFLNK;
+ if (S_ISBLK(mode))
+ type = IA_IFBLK;
+ if (S_ISCHR(mode))
+ type = IA_IFCHR;
+ if (S_ISFIFO(mode))
+ type = IA_IFIFO;
+ if (S_ISSOCK(mode))
+ type = IA_IFSOCK;
+
+ return type;
+}
+
+static inline uint32_t
+st_mode_prot_from_ia(ia_prot_t prot)
+{
+ uint32_t prot_bit = 0;
+
+ if (prot.suid)
+ prot_bit |= S_ISUID;
+ if (prot.sgid)
+ prot_bit |= S_ISGID;
+ if (prot.sticky)
+ prot_bit |= S_ISVTX;
+
+ if (prot.owner.read)
+ prot_bit |= S_IRUSR;
+ if (prot.owner.write)
+ prot_bit |= S_IWUSR;
+ if (prot.owner.exec)
+ prot_bit |= S_IXUSR;
+
+ if (prot.group.read)
+ prot_bit |= S_IRGRP;
+ if (prot.group.write)
+ prot_bit |= S_IWGRP;
+ if (prot.group.exec)
+ prot_bit |= S_IXGRP;
+
+ if (prot.other.read)
+ prot_bit |= S_IROTH;
+ if (prot.other.write)
+ prot_bit |= S_IWOTH;
+ if (prot.other.exec)
+ prot_bit |= S_IXOTH;
+
+ return prot_bit;
+}
+
+static inline uint32_t
+st_mode_type_from_ia(ia_type_t type)
+{
+ uint32_t type_bit = 0;
+
+ switch (type) {
+ case IA_IFREG:
+ type_bit = S_IFREG;
+ break;
+ case IA_IFDIR:
+ type_bit = S_IFDIR;
+ break;
+ case IA_IFLNK:
+ type_bit = S_IFLNK;
+ break;
+ case IA_IFBLK:
+ type_bit = S_IFBLK;
+ break;
+ case IA_IFCHR:
+ type_bit = S_IFCHR;
+ break;
+ case IA_IFIFO:
+ type_bit = S_IFIFO;
+ break;
+ case IA_IFSOCK:
+ type_bit = S_IFSOCK;
+ break;
+ case IA_INVAL:
+ break;
+ }
+
+ return type_bit;
+}
+
+static inline mode_t
+st_mode_from_ia(ia_prot_t prot, ia_type_t type)
+{
+ mode_t st_mode = 0;
+ uint32_t type_bit = 0;
+ uint32_t prot_bit = 0;
+
+ type_bit = st_mode_type_from_ia(type);
+ prot_bit = st_mode_prot_from_ia(prot);
+
+ st_mode = (type_bit | prot_bit);
+
+ return st_mode;
+}
+
+static inline void
+iatt_to_mdata(struct mdata_iatt *mdata, struct iatt *iatt)
+{
+ mdata->ia_atime = iatt->ia_atime;
+ mdata->ia_atime_nsec = iatt->ia_atime_nsec;
+ mdata->ia_mtime = iatt->ia_mtime;
+ mdata->ia_mtime_nsec = iatt->ia_mtime_nsec;
+ mdata->ia_ctime = iatt->ia_ctime;
+ mdata->ia_ctime_nsec = iatt->ia_ctime_nsec;
+}
+
+static inline int
+iatt_from_stat(struct iatt *iatt, struct stat *stat)
+{
+ iatt->ia_dev = stat->st_dev;
+ iatt->ia_ino = stat->st_ino;
+
+ iatt->ia_type = ia_type_from_st_mode(stat->st_mode);
+ iatt->ia_prot = ia_prot_from_st_mode(stat->st_mode);
+
+ iatt->ia_nlink = stat->st_nlink;
+ iatt->ia_uid = stat->st_uid;
+ iatt->ia_gid = stat->st_gid;
+
+ iatt->ia_rdev = ia_makedev(major(stat->st_rdev), minor(stat->st_rdev));
+
+ iatt->ia_size = stat->st_size;
+ iatt->ia_blksize = stat->st_blksize;
+ iatt->ia_blocks = stat->st_blocks;
+
+ /* There is a possibility that the backend FS (like XFS) can
+ allocate blocks beyond EOF for better performance reasons, which
+ results in 'st_blocks' with higher values than what is consumed by
+ the file descriptor. This would break few logic inside GlusterFS,
+ like quota behavior etc, thus we need the exact number of blocks
+ which are consumed by the file to the higher layers inside GlusterFS.
+ Currently, this logic won't work for sparse files (ie, file with
+ holes)
+ */
+ {
+ uint64_t maxblocks;
+
+ maxblocks = (iatt->ia_size + 511) / 512;
+
+ if (iatt->ia_blocks > maxblocks)
+ iatt->ia_blocks = maxblocks;
+ }
+
+ iatt->ia_atime = stat->st_atime;
+ iatt->ia_atime_nsec = ST_ATIM_NSEC(stat);
+
+ iatt->ia_mtime = stat->st_mtime;
+ iatt->ia_mtime_nsec = ST_MTIM_NSEC(stat);
+
+ iatt->ia_ctime = stat->st_ctime;
+ iatt->ia_ctime_nsec = ST_CTIM_NSEC(stat);
+
+ /* Setting IATT_INO in ia_flags is done in posix_fill_ino_from_gfid. */
+ iatt->ia_flags = iatt->ia_flags | IATT_TYPE | IATT_MODE | IATT_NLINK |
+ IATT_UID | IATT_GID | IATT_SIZE | IATT_BLOCKS |
+ IATT_ATIME | IATT_MTIME | IATT_CTIME;
+
+ return 0;
+}
+
+static inline int
+iatt_to_stat(struct iatt *iatt, struct stat *stat)
+{
+ stat->st_dev = iatt->ia_dev;
+ stat->st_ino = iatt->ia_ino;
+
+ stat->st_mode = st_mode_from_ia(iatt->ia_prot, iatt->ia_type);
+
+ stat->st_nlink = iatt->ia_nlink;
+ stat->st_uid = iatt->ia_uid;
+ stat->st_gid = iatt->ia_gid;
+
+ stat->st_rdev = makedev(ia_major(iatt->ia_rdev), ia_minor(iatt->ia_rdev));
+
+ stat->st_size = iatt->ia_size;
+ stat->st_blksize = iatt->ia_blksize;
+ stat->st_blocks = iatt->ia_blocks;
+
+ stat->st_atime = iatt->ia_atime;
+ ST_ATIM_NSEC_SET(stat, iatt->ia_atime_nsec);
+
+ stat->st_mtime = iatt->ia_mtime;
+ ST_MTIM_NSEC_SET(stat, iatt->ia_mtime_nsec);
+
+ stat->st_ctime = iatt->ia_ctime;
+ ST_CTIM_NSEC_SET(stat, iatt->ia_ctime_nsec);
+
+ return 0;
+}
+
+static inline void
+oldiatt_from_iatt(struct old_iatt *o_iatt, struct iatt *c_iatt)
+{
+ o_iatt->ia_dev = c_iatt->ia_dev;
+ o_iatt->ia_ino = c_iatt->ia_ino;
+ o_iatt->ia_type = c_iatt->ia_type;
+ o_iatt->ia_prot = c_iatt->ia_prot;
+ o_iatt->ia_nlink = c_iatt->ia_nlink;
+ o_iatt->ia_uid = c_iatt->ia_uid;
+ o_iatt->ia_gid = c_iatt->ia_gid;
+ o_iatt->ia_rdev = c_iatt->ia_rdev;
+ o_iatt->ia_size = c_iatt->ia_size;
+ o_iatt->ia_blksize = c_iatt->ia_blksize;
+ o_iatt->ia_blocks = c_iatt->ia_blocks;
+ o_iatt->ia_atime = c_iatt->ia_atime;
+ o_iatt->ia_atime_nsec = c_iatt->ia_atime_nsec;
+ o_iatt->ia_mtime = c_iatt->ia_mtime;
+ o_iatt->ia_mtime_nsec = c_iatt->ia_mtime_nsec;
+ o_iatt->ia_ctime = c_iatt->ia_ctime;
+ o_iatt->ia_ctime_nsec = c_iatt->ia_ctime_nsec;
+
+ gf_uuid_copy(o_iatt->ia_gfid, c_iatt->ia_gfid);
+
+ return;
+}
+
+static inline void
+iatt_from_oldiatt(struct iatt *c_iatt, struct old_iatt *o_iatt)
+{
+ c_iatt->ia_dev = o_iatt->ia_dev;
+ c_iatt->ia_ino = o_iatt->ia_ino;
+ c_iatt->ia_type = o_iatt->ia_type;
+ c_iatt->ia_prot = o_iatt->ia_prot;
+ c_iatt->ia_nlink = o_iatt->ia_nlink;
+ c_iatt->ia_uid = o_iatt->ia_uid;
+ c_iatt->ia_gid = o_iatt->ia_gid;
+ c_iatt->ia_rdev = o_iatt->ia_rdev;
+ c_iatt->ia_size = o_iatt->ia_size;
+ c_iatt->ia_blksize = o_iatt->ia_blksize;
+ c_iatt->ia_blocks = o_iatt->ia_blocks;
+ c_iatt->ia_atime = o_iatt->ia_atime;
+ c_iatt->ia_atime_nsec = o_iatt->ia_atime_nsec;
+ c_iatt->ia_mtime = o_iatt->ia_mtime;
+ c_iatt->ia_mtime_nsec = o_iatt->ia_mtime_nsec;
+ c_iatt->ia_ctime = o_iatt->ia_ctime;
+ c_iatt->ia_ctime_nsec = o_iatt->ia_ctime_nsec;
+
+ gf_uuid_copy(c_iatt->ia_gfid, o_iatt->ia_gfid);
+
+ c_iatt->ia_attributes = 0;
+
+ c_iatt->ia_flags = IATT_TYPE | IATT_MODE | IATT_NLINK | IATT_INO |
+ IATT_UID | IATT_GID | IATT_SIZE | IATT_BLOCKS |
+ IATT_ATIME | IATT_MTIME | IATT_CTIME | IATT_GFID;
+
+ return;
+}
+
+static inline int
+is_same_mode(ia_prot_t prot1, ia_prot_t prot2)
+{
+ int ret = 0;
+
+ if (st_mode_prot_from_ia(prot1) != st_mode_prot_from_ia(prot2))
+ ret = -1;
+
+ return ret;
+}
+
+#endif /* _IATT_H */
diff --git a/libglusterfs/src/glusterfs/inode.h b/libglusterfs/src/glusterfs/inode.h
new file mode 100644
index 00000000000..4b28da510c7
--- /dev/null
+++ b/libglusterfs/src/glusterfs/inode.h
@@ -0,0 +1,306 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _INODE_H
+#define _INODE_H
+
+#include <stdint.h>
+#include <sys/types.h>
+
+#define LOOKUP_NEEDED 1
+#define LOOKUP_NOT_NEEDED 2
+
+#define DEFAULT_INODE_MEMPOOL_ENTRIES 32 * 1024
+#define INODE_PATH_FMT "<gfid:%s>"
+struct _inode_table;
+typedef struct _inode_table inode_table_t;
+
+struct _inode;
+typedef struct _inode inode_t;
+
+struct _dentry;
+typedef struct _dentry dentry_t;
+
+#include "glusterfs/list.h"
+#include "glusterfs/iatt.h"
+#include "glusterfs/compat-uuid.h"
+#include "glusterfs/fd.h"
+
+struct _inode_table {
+ pthread_mutex_t lock;
+ size_t hashsize; /* bucket size of inode hash and dentry hash */
+ char *name; /* name of the inode table, just for gf_log() */
+ inode_t *root; /* root directory inode, with number 1 */
+ xlator_t *xl; /* xlator to be called to do purge */
+ uint32_t lru_limit; /* maximum LRU cache size */
+ struct list_head *inode_hash; /* buckets for inode hash table */
+ struct list_head *name_hash; /* buckets for dentry hash table */
+ struct list_head active; /* list of inodes currently active (in an fop) */
+ uint32_t active_size; /* count of inodes in active list */
+ struct list_head lru; /* list of inodes recently used.
+ lru.next most recent */
+ uint32_t lru_size; /* count of inodes in lru list */
+ struct list_head purge; /* list of inodes to be purged soon */
+ uint32_t purge_size; /* count of inodes in purge list */
+
+ struct mem_pool *inode_pool; /* memory pool for inodes */
+ struct mem_pool *dentry_pool; /* memory pool for dentrys */
+ struct mem_pool *fd_mem_pool; /* memory pool for fd_t */
+ int ctxcount; /* number of slots in inode->ctx */
+
+ /* This is required for 'invalidation' when 'nlookup' would be used,
+ specially in case of fuse-bridge */
+ int32_t (*invalidator_fn)(xlator_t *, inode_t *);
+ xlator_t *invalidator_xl;
+ struct list_head invalidate; /* inodes which are in invalidation queue */
+ uint32_t invalidate_size; /* count of inodes in invalidation list */
+
+ /* flag to indicate whether the cleanup of the inode
+ table started or not */
+ gf_boolean_t cleanup_started;
+};
+
+struct _dentry {
+ struct list_head inode_list; /* list of dentries of inode */
+ struct list_head hash; /* hash table pointers */
+ inode_t *inode; /* inode of this directory entry */
+ char *name; /* name of the directory entry */
+ inode_t *parent; /* directory of the entry */
+};
+
+struct _inode_ctx {
+ union {
+ uint64_t key;
+ xlator_t *xl_key;
+ };
+ /* if value1 is 0, then field is not set.. */
+ union {
+ uint64_t value1;
+ void *ptr1;
+ };
+ /* if value2 is 0, then field is not set.. */
+ union {
+ uint64_t value2;
+ void *ptr2;
+ };
+ int ref; /* This is for debugging inode ref leaks,
+ basically helps in identifying the xlator
+ causing th ref leak, it is printed in
+ statedump */
+};
+
+struct _inode {
+ inode_table_t *table; /* the table this inode belongs to */
+ uuid_t gfid;
+ gf_lock_t lock;
+ gf_atomic_t nlookup;
+ uint32_t fd_count; /* Open fd count */
+ uint32_t active_fd_count; /* Active open fd count */
+ uint32_t ref; /* reference count on this inode */
+ ia_type_t ia_type; /* what kind of file */
+ struct list_head fd_list; /* list of open files on this inode */
+ struct list_head dentry_list; /* list of directory entries for this inode */
+ struct list_head hash; /* hash table pointers */
+ struct list_head list; /* active/lru/purge */
+
+ struct _inode_ctx *_ctx; /* replacement for dict_t *(inode->ctx) */
+ bool in_invalidate_list; /* Set if inode is in table invalidate list */
+ bool invalidate_sent; /* Set it if invalidator_fn is called for inode */
+};
+
+#define UUID0_STR "00000000-0000-0000-0000-000000000000"
+#define GFID_STR_PFX "<gfid:" UUID0_STR ">"
+#define GFID_STR_PFX_LEN (sizeof(GFID_STR_PFX) - 1)
+
+inode_table_t *
+inode_table_new(uint32_t lru_limit, xlator_t *xl);
+
+inode_table_t *
+inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl,
+ int32_t (*invalidator_fn)(xlator_t *, inode_t *),
+ xlator_t *invalidator_xl);
+
+void
+inode_table_destroy_all(glusterfs_ctx_t *ctx);
+
+void
+inode_table_destroy(inode_table_t *inode_table);
+
+inode_t *
+inode_new(inode_table_t *table);
+
+inode_t *
+inode_link(inode_t *inode, inode_t *parent, const char *name,
+ struct iatt *stbuf);
+
+void
+inode_unlink(inode_t *inode, inode_t *parent, const char *name);
+
+inode_t *
+inode_parent(inode_t *inode, uuid_t pargfid, const char *name);
+
+inode_t *
+inode_ref(inode_t *inode);
+
+inode_t *
+inode_unref(inode_t *inode);
+
+int
+inode_lookup(inode_t *inode);
+
+int
+inode_forget(inode_t *inode, uint64_t nlookup);
+int
+inode_forget_with_unref(inode_t *inode, uint64_t nlookup);
+
+int
+inode_ref_reduce_by_n(inode_t *inode, uint64_t nref);
+
+int
+inode_invalidate(inode_t *inode);
+
+int
+inode_rename(inode_table_t *table, inode_t *olddir, const char *oldname,
+ inode_t *newdir, const char *newname, inode_t *inode,
+ struct iatt *stbuf);
+
+inode_t *
+inode_grep(inode_table_t *table, inode_t *parent, const char *name);
+
+int
+inode_grep_for_gfid(inode_table_t *table, inode_t *parent, const char *name,
+ uuid_t gfid, ia_type_t *type);
+
+inode_t *
+inode_find(inode_table_t *table, uuid_t gfid);
+
+int
+inode_path(inode_t *inode, const char *name, char **bufp);
+
+int
+__inode_path(inode_t *inode, const char *name, char **bufp);
+
+inode_t *
+inode_from_path(inode_table_t *table, const char *path);
+
+inode_t *
+inode_resolve(inode_table_t *table, char *path);
+
+/* deal with inode ctx's both values */
+
+int
+inode_ctx_set2(inode_t *inode, xlator_t *xlator, uint64_t *value1,
+ uint64_t *value2);
+int
+__inode_ctx_set2(inode_t *inode, xlator_t *xlator, uint64_t *value1,
+ uint64_t *value2);
+
+int
+inode_ctx_get2(inode_t *inode, xlator_t *xlator, uint64_t *value1,
+ uint64_t *value2);
+int
+__inode_ctx_get2(inode_t *inode, xlator_t *xlator, uint64_t *value1,
+ uint64_t *value2);
+
+int
+inode_ctx_del2(inode_t *inode, xlator_t *xlator, uint64_t *value1,
+ uint64_t *value2);
+
+int
+inode_ctx_reset2(inode_t *inode, xlator_t *xlator, uint64_t *value1,
+ uint64_t *value2);
+
+/* deal with inode ctx's 1st value */
+
+int
+inode_ctx_set0(inode_t *inode, xlator_t *xlator, uint64_t *value1);
+
+int
+__inode_ctx_set0(inode_t *inode, xlator_t *xlator, uint64_t *value1);
+
+int
+inode_ctx_get0(inode_t *inode, xlator_t *xlator, uint64_t *value1);
+int
+__inode_ctx_get0(inode_t *inode, xlator_t *xlator, uint64_t *value1);
+
+int
+inode_ctx_reset0(inode_t *inode, xlator_t *xlator, uint64_t *value1);
+
+/* deal with inode ctx's 2st value */
+
+int
+inode_ctx_set1(inode_t *inode, xlator_t *xlator, uint64_t *value2);
+
+int
+__inode_ctx_set1(inode_t *inode, xlator_t *xlator, uint64_t *value2);
+
+int
+inode_ctx_get1(inode_t *inode, xlator_t *xlator, uint64_t *value2);
+int
+__inode_ctx_get1(inode_t *inode, xlator_t *xlator, uint64_t *value2);
+
+int
+inode_ctx_reset1(inode_t *inode, xlator_t *xlator, uint64_t *value2);
+
+static inline int
+__inode_ctx_put(inode_t *inode, xlator_t *this, uint64_t v)
+{
+ return __inode_ctx_set0(inode, this, &v);
+}
+
+static inline int
+inode_ctx_put(inode_t *inode, xlator_t *this, uint64_t v)
+{
+ return inode_ctx_set0(inode, this, &v);
+}
+
+#define __inode_ctx_set(i, x, v_p) __inode_ctx_set0(i, x, v_p)
+
+#define inode_ctx_set(i, x, v_p) inode_ctx_set0(i, x, v_p)
+
+#define inode_ctx_reset(i, x, v) inode_ctx_reset0(i, x, v)
+
+#define __inode_ctx_get(i, x, v) __inode_ctx_get0(i, x, v)
+
+#define inode_ctx_get(i, x, v) inode_ctx_get0(i, x, v)
+
+#define inode_ctx_del(i, x, v) inode_ctx_del2(i, x, v, 0)
+#define inode_ctx_del1(i, x, v) inode_ctx_del2(i, x, 0, v)
+
+gf_boolean_t
+__is_root_gfid(uuid_t gfid);
+
+void
+__inode_table_set_lru_limit(inode_table_t *table, uint32_t lru_limit);
+
+void
+inode_table_set_lru_limit(inode_table_t *table, uint32_t lru_limit);
+
+void
+inode_ctx_merge(fd_t *fd, inode_t *inode, inode_t *linked_inode);
+
+int
+inode_is_linked(inode_t *inode);
+
+void
+inode_set_need_lookup(inode_t *inode, xlator_t *this);
+
+gf_boolean_t
+inode_needs_lookup(inode_t *inode, xlator_t *this);
+
+int
+inode_has_dentry(inode_t *inode);
+
+size_t
+inode_ctx_size(inode_t *inode);
+
+void
+inode_find_directory_name(inode_t *inode, const char **name);
+#endif /* _INODE_H */
diff --git a/libglusterfs/src/glusterfs/iobuf.h b/libglusterfs/src/glusterfs/iobuf.h
new file mode 100644
index 00000000000..4bd443efd5e
--- /dev/null
+++ b/libglusterfs/src/glusterfs/iobuf.h
@@ -0,0 +1,194 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _IOBUF_H_
+#define _IOBUF_H_
+
+#include <stddef.h> // for size_t
+#include <sys/mman.h>
+#include "glusterfs/atomic.h" // for gf_atomic_t
+#include <sys/uio.h> // for struct iovec
+#include "glusterfs/locking.h" // for gf_lock_t
+#include "glusterfs/list.h"
+
+#define GF_VARIABLE_IOBUF_COUNT 32
+
+#define GF_RDMA_DEVICE_COUNT 8
+
+/* Lets try to define the new anonymous mapping
+ * flag, in case the system is still using the
+ * now deprecated MAP_ANON flag.
+ *
+ * Also, this should ideally be in a centralized/common
+ * header which can be used by other source files also.
+ */
+#ifndef MAP_ANONYMOUS
+#define MAP_ANONYMOUS MAP_ANON
+#endif
+
+#define GF_ALIGN_BUF(ptr, bound) \
+ ((void *)((unsigned long)(ptr + bound - 1) & (unsigned long)(~(bound - 1))))
+
+#define GF_IOBUF_ALIGN_SIZE 512
+
+/* one allocatable unit for the consumers of the IOBUF API */
+/* each unit hosts @page_size bytes of memory */
+struct iobuf;
+
+/* one region of memory mapped from the operating system */
+/* each region MMAPs @arena_size bytes of memory */
+/* each arena hosts @arena_size / @page_size IOBUFs */
+struct iobuf_arena;
+
+/* expandable and contractable pool of memory, internally broken into arenas */
+struct iobuf_pool;
+
+struct iobuf_init_config {
+ size_t pagesize;
+ int32_t num_pages;
+};
+
+struct iobuf {
+ union {
+ struct list_head list;
+ struct {
+ struct iobuf *next;
+ struct iobuf *prev;
+ };
+ };
+ struct iobuf_arena *iobuf_arena;
+
+ gf_lock_t lock; /* for ->ptr and ->ref */
+ gf_atomic_t ref; /* 0 == passive, >0 == active */
+
+ void *ptr; /* usable memory region by the consumer */
+
+ void *free_ptr; /* in case of stdalloc, this is the
+ one to be freed */
+};
+
+struct iobuf_arena {
+ union {
+ struct list_head list;
+ struct {
+ struct iobuf_arena *next;
+ struct iobuf_arena *prev;
+ };
+ };
+
+ struct list_head all_list;
+ size_t page_size; /* size of all iobufs in this arena */
+ size_t arena_size;
+ /* this is equal to rounded_size * num_iobufs.
+ (rounded_size comes with gf_iobuf_get_pagesize().) */
+ size_t page_count;
+
+ struct iobuf_pool *iobuf_pool;
+
+ void *mem_base;
+ struct iobuf *iobufs; /* allocated iobufs list */
+
+ struct iobuf active; /* head node iobuf
+ (unused by itself) */
+ struct iobuf passive; /* head node iobuf
+ (unused by itself) */
+ uint64_t alloc_cnt; /* total allocs in this pool */
+ int active_cnt;
+ int passive_cnt;
+ int max_active; /* max active buffers at a given time */
+};
+
+struct iobuf_pool {
+ pthread_mutex_t mutex;
+ size_t arena_size; /* size of memory region in
+ arena */
+ size_t default_page_size; /* default size of iobuf */
+
+ struct list_head all_arenas;
+ struct list_head arenas[GF_VARIABLE_IOBUF_COUNT];
+ /* array of arenas. Each element of the array is a list of arenas
+ holding iobufs of particular page_size */
+
+ struct list_head filled[GF_VARIABLE_IOBUF_COUNT];
+ /* array of arenas without free iobufs */
+
+ struct list_head purge[GF_VARIABLE_IOBUF_COUNT];
+ /* array of of arenas which can be purged */
+
+ uint64_t request_misses; /* mostly the requests for higher
+ value of iobufs */
+ int arena_cnt;
+ int rdma_device_count;
+ struct list_head *mr_list[GF_RDMA_DEVICE_COUNT];
+ void *device[GF_RDMA_DEVICE_COUNT];
+ int (*rdma_registration)(void **, void *);
+ int (*rdma_deregistration)(struct list_head **, struct iobuf_arena *);
+};
+
+struct iobuf_pool *
+iobuf_pool_new(void);
+void
+iobuf_pool_destroy(struct iobuf_pool *iobuf_pool);
+struct iobuf *
+iobuf_get(struct iobuf_pool *iobuf_pool);
+void
+iobuf_unref(struct iobuf *iobuf);
+struct iobuf *
+iobuf_ref(struct iobuf *iobuf);
+void
+iobuf_pool_destroy(struct iobuf_pool *iobuf_pool);
+void
+iobuf_to_iovec(struct iobuf *iob, struct iovec *iov);
+
+#define iobuf_ptr(iob) ((iob)->ptr)
+#define iobpool_default_pagesize(iobpool) ((iobpool)->default_page_size)
+#define iobuf_pagesize(iob) (iob->iobuf_arena->page_size)
+
+struct iobref {
+ gf_lock_t lock;
+ gf_atomic_t ref;
+ struct iobuf **iobrefs;
+ int allocated;
+ int used;
+};
+
+struct iobref *
+iobref_new(void);
+struct iobref *
+iobref_ref(struct iobref *iobref);
+void
+iobref_unref(struct iobref *iobref);
+int
+iobref_add(struct iobref *iobref, struct iobuf *iobuf);
+int
+iobref_merge(struct iobref *to, struct iobref *from);
+void
+iobref_clear(struct iobref *iobref);
+
+size_t
+iobuf_size(struct iobuf *iobuf);
+size_t
+iobref_size(struct iobref *iobref);
+void
+iobuf_stats_dump(struct iobuf_pool *iobuf_pool);
+
+struct iobuf *
+iobuf_get2(struct iobuf_pool *iobuf_pool, size_t page_size);
+
+struct iobuf *
+iobuf_get_page_aligned(struct iobuf_pool *iobuf_pool, size_t page_size,
+ size_t align_size);
+
+int
+iobuf_copy(struct iobuf_pool *iobuf_pool, const struct iovec *iovec_src,
+ int iovcnt, struct iobref **iobref, struct iobuf **iobuf,
+ struct iovec *iov_dst);
+
+#endif /* !_IOBUF_H_ */
diff --git a/libglusterfs/src/glusterfs/latency.h b/libglusterfs/src/glusterfs/latency.h
new file mode 100644
index 00000000000..4d601bbcbd6
--- /dev/null
+++ b/libglusterfs/src/glusterfs/latency.h
@@ -0,0 +1,33 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __LATENCY_H__
+#define __LATENCY_H__
+
+#include <inttypes.h>
+#include <time.h>
+
+typedef struct _gf_latency {
+ uint64_t min; /* min time for the call (nanoseconds) */
+ uint64_t max; /* max time for the call (nanoseconds) */
+ uint64_t total; /* total time (nanoseconds) */
+ uint64_t count;
+} gf_latency_t;
+
+gf_latency_t *
+gf_latency_new(size_t n);
+
+void
+gf_latency_reset(gf_latency_t *lat);
+
+void
+gf_latency_update(gf_latency_t *lat, struct timespec *begin,
+ struct timespec *end);
+#endif /* __LATENCY_H__ */
diff --git a/libglusterfs/src/glusterfs/libglusterfs-messages.h b/libglusterfs/src/glusterfs/libglusterfs-messages.h
new file mode 100644
index 00000000000..cb31dd7614b
--- /dev/null
+++ b/libglusterfs/src/glusterfs/libglusterfs-messages.h
@@ -0,0 +1,245 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+ */
+
+#ifndef _LG_MESSAGES_H_
+#define _LG_MESSAGES_H_
+
+#include "glusterfs/glfs-message-id.h"
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(
+ LIBGLUSTERFS, LG_MSG_ASPRINTF_FAILED, LG_MSG_INVALID_ENTRY,
+ LG_MSG_COUNT_LESS_THAN_ZERO, LG_MSG_COUNT_LESS_THAN_DATA_PAIRS,
+ LG_MSG_VALUE_LENGTH_LESS_THAN_ZERO, LG_MSG_PAIRS_LESS_THAN_COUNT,
+ LG_MSG_KEY_OR_VALUE_NULL, LG_MSG_FAILED_TO_LOG_DICT,
+ LG_MSG_NULL_VALUE_IN_DICT, LG_MSG_DIR_OP_FAILED,
+ LG_MSG_STORE_HANDLE_CREATE_FAILED, LG_MSG_FILE_OP_FAILED,
+ LG_MSG_FILE_STAT_FAILED, LG_MSG_LOCK_FAILED, LG_MSG_UNLOCK_FAILED,
+ LG_MSG_DICT_SERIAL_FAILED, LG_MSG_DICT_UNSERIAL_FAILED, LG_MSG_NO_MEMORY,
+ LG_MSG_VOLUME_ERROR, LG_MSG_SUB_VOLUME_ERROR, LG_MSG_SYNTAX_ERROR,
+ LG_MSG_BACKTICK_PARSE_FAILED, LG_MSG_BUFFER_ERROR, LG_MSG_STRDUP_ERROR,
+ LG_MSG_HASH_FUNC_ERROR, LG_MSG_GET_BUCKET_FAILED, LG_MSG_INSERT_FAILED,
+ LG_MSG_OUT_OF_RANGE, LG_MSG_VALIDATE_RETURNS, LG_MSG_VALIDATE_REC_FAILED,
+ LG_MSG_RB_TABLE_CREATE_FAILED, LG_MSG_PATH_NOT_FOUND,
+ LG_MSG_EXPAND_FD_TABLE_FAILED, LG_MSG_MAPPING_FAILED,
+ LG_MSG_INIT_IOBUF_FAILED, LG_MSG_PAGE_SIZE_EXCEEDED, LG_MSG_ARENA_NOT_FOUND,
+ LG_MSG_IOBUF_NOT_FOUND, LG_MSG_POOL_NOT_FOUND, LG_MSG_SET_ATTRIBUTE_FAILED,
+ LG_MSG_READ_ATTRIBUTE_FAILED, LG_MSG_UNMOUNT_FAILED,
+ LG_MSG_LATENCY_MEASUREMENT_STATE, LG_MSG_NO_PERM, LG_MSG_NO_KEY,
+ LG_MSG_DICT_NULL, LG_MSG_INIT_TIMER_FAILED, LG_MSG_FD_ANONYMOUS_FAILED,
+ LG_MSG_FD_CREATE_FAILED, LG_MSG_BUFFER_FULL, LG_MSG_FWRITE_FAILED,
+ LG_MSG_PRINT_FAILED, LG_MSG_MEM_POOL_DESTROY,
+ LG_MSG_EXPAND_CLIENT_TABLE_FAILED, LG_MSG_DISCONNECT_CLIENT,
+ LG_MSG_PIPE_CREATE_FAILED, LG_MSG_SET_PIPE_FAILED,
+ LG_MSG_REGISTER_PIPE_FAILED, LG_MSG_POLL_IGNORE_MULTIPLE_THREADS,
+ LG_MSG_INDEX_NOT_FOUND, LG_MSG_EPOLL_FD_CREATE_FAILED,
+ LG_MSG_SLOT_NOT_FOUND, LG_MSG_STALE_FD_FOUND, LG_MSG_GENERATION_MISMATCH,
+ LG_MSG_PTHREAD_KEY_CREATE_FAILED, LG_MSG_TRANSLATOR_INIT_FAILED,
+ LG_MSG_UUID_BUF_INIT_FAILED, LG_MSG_LKOWNER_BUF_INIT_FAILED,
+ LG_MSG_SYNCTASK_INIT_FAILED, LG_MSG_SYNCOPCTX_INIT_FAILED,
+ LG_MSG_GLOBAL_INIT_FAILED, LG_MSG_PTHREAD_FAILED, LG_MSG_DIR_IS_SYMLINK,
+ LG_MSG_RESOLVE_HOSTNAME_FAILED, LG_MSG_GETADDRINFO_FAILED,
+ LG_MSG_GETNAMEINFO_FAILED, LG_MSG_PATH_ERROR, LG_MSG_INET_PTON_FAILED,
+ LG_MSG_NEGATIVE_NUM_PASSED, LG_MSG_GETHOSTNAME_FAILED,
+ LG_MSG_RESERVED_PORTS_ERROR, LG_MSG_INVALID_PORT, LG_MSG_INVALID_FAMILY,
+ LG_MSG_CONVERSION_FAILED, LG_MSG_SKIP_HEADER_FAILED, LG_MSG_INVALID_LOG,
+ LG_MSG_UTIMES_FAILED, LG_MSG_BACKTRACE_SAVE_FAILED, LG_MSG_INIT_FAILED,
+ LG_MSG_VALIDATION_FAILED, LG_MSG_GRAPH_ERROR, LG_MSG_UNKNOWN_OPTIONS_FAILED,
+ LG_MSG_CTX_NULL, LG_MSG_TMPFILE_CREATE_FAILED, LG_MSG_DLOPEN_FAILED,
+ LG_MSG_LOAD_FAILED, LG_MSG_DLSYM_ERROR, LG_MSG_TREE_NOT_FOUND,
+ LG_MSG_PER_DENTRY, LG_MSG_DENTRY, LG_MSG_GETIFADDRS_FAILED,
+ LG_MSG_REGEX_OP_FAILED, LG_MSG_FRAME_ERROR, LG_MSG_SET_PARAM_FAILED,
+ LG_MSG_GET_PARAM_FAILED, LG_MSG_PREPARE_FAILED, LG_MSG_EXEC_FAILED,
+ LG_MSG_BINDING_FAILED, LG_MSG_DELETE_FAILED, LG_MSG_GET_ID_FAILED,
+ LG_MSG_CREATE_FAILED, LG_MSG_PARSE_FAILED, LG_MSG_GETCONTEXT_FAILED,
+ LG_MSG_UPDATE_FAILED, LG_MSG_QUERY_CALL_BACK_FAILED,
+ LG_MSG_GET_RECORD_FAILED, LG_MSG_DB_ERROR, LG_MSG_CONNECTION_ERROR,
+ LG_MSG_NOT_MULTITHREAD_MODE, LG_MSG_SKIP_PATH, LG_MSG_INVALID_FOP,
+ LG_MSG_QUERY_FAILED, LG_MSG_CLEAR_COUNTER_FAILED, LG_MSG_LOCK_LIST_FAILED,
+ LG_MSG_UNLOCK_LIST_FAILED, LG_MSG_ADD_TO_LIST_FAILED, LG_MSG_INIT_DB_FAILED,
+ LG_MSG_DELETE_FROM_LIST_FAILED, LG_MSG_CLOSE_CONNECTION_FAILED,
+ LG_MSG_INSERT_OR_UPDATE_FAILED, LG_MSG_FIND_OP_FAILED,
+ LG_MSG_CONNECTION_INIT_FAILED, LG_MSG_COMPLETED_TASK, LG_MSG_WAKE_UP_ZOMBIE,
+ LG_MSG_REWAITING_TASK, LG_MSG_SLEEP_ZOMBIE, LG_MSG_SWAPCONTEXT_FAILED,
+ LG_MSG_UNSUPPORTED_PLUGIN, LG_MSG_INVALID_DB_TYPE, LG_MSG_UNDERSIZED_BUF,
+ LG_MSG_DATA_CONVERSION_ERROR, LG_MSG_DICT_ERROR, LG_MSG_IOBUFS_NOT_FOUND,
+ LG_MSG_ENTRIES_NULL, LG_MSG_FD_NOT_FOUND_IN_FDTABLE,
+ LG_MSG_REALLOC_FOR_FD_PTR_FAILED, LG_MSG_DICT_SET_FAILED, LG_MSG_NULL_PTR,
+ LG_MSG_RBTHASH_INIT_BUCKET_FAILED, LG_MSG_ASSERTION_FAILED,
+ LG_MSG_HOSTNAME_NULL, LG_MSG_INVALID_IPV4_FORMAT,
+ LG_MSG_CTX_CLEANUP_STARTED, LG_MSG_TIMER_REGISTER_ERROR,
+ LG_MSG_PTR_HEADER_CORRUPTED, LG_MSG_INVALID_UPLINK, LG_MSG_CLIENT_NULL,
+ LG_MSG_XLATOR_DOES_NOT_IMPLEMENT, LG_MSG_DENTRY_NOT_FOUND,
+ LG_MSG_INODE_NOT_FOUND, LG_MSG_INODE_TABLE_NOT_FOUND,
+ LG_MSG_DENTRY_CREATE_FAILED, LG_MSG_INODE_CONTEXT_FREED,
+ LG_MSG_UNKNOWN_LOCK_TYPE, LG_MSG_UNLOCK_BEFORE_LOCK,
+ LG_MSG_LOCK_OWNER_ERROR, LG_MSG_MEMPOOL_PTR_NULL,
+ LG_MSG_QUOTA_XATTRS_MISSING, LG_MSG_INVALID_STRING, LG_MSG_BIND_REF,
+ LG_MSG_REF_COUNT, LG_MSG_INVALID_ARG, LG_MSG_VOL_OPTION_ADD,
+ LG_MSG_XLATOR_OPTION_INVALID, LG_MSG_GETTIMEOFDAY_FAILED,
+ LG_MSG_GRAPH_INIT_FAILED, LG_MSG_EVENT_NOTIFY_FAILED,
+ LG_MSG_ACTIVE_GRAPH_NULL, LG_MSG_VOLFILE_PARSE_ERROR, LG_MSG_FD_INODE_NULL,
+ LG_MSG_INVALID_VOLFILE_ENTRY, LG_MSG_PER_DENTRY_FAILED,
+ LG_MSG_PARENT_DENTRY_NOT_FOUND, LG_MSG_DENTRY_CYCLIC_LOOP,
+ LG_MSG_INVALID_POLL_IN, LG_MSG_INVALID_POLL_OUT, LG_MSG_EPOLL_FD_ADD_FAILED,
+ LG_MSG_EPOLL_FD_DEL_FAILED, LG_MSG_EPOLL_FD_MODIFY_FAILED,
+ LG_MSG_STARTED_EPOLL_THREAD, LG_MSG_EXITED_EPOLL_THREAD,
+ LG_MSG_START_EPOLL_THREAD_FAILED, LG_MSG_FALLBACK_TO_POLL,
+ LG_MSG_QUOTA_CONF_ERROR, LG_MSG_RBTHASH_GET_ENTRY_FAILED,
+ LG_MSG_RBTHASH_GET_BUCKET_FAILED, LG_MSG_RBTHASH_INSERT_FAILED,
+ LG_MSG_RBTHASH_INIT_ENTRY_FAILED, LG_MSG_TMPFILE_DELETE_FAILED,
+ LG_MSG_MEMPOOL_INVALID_FREE, LG_MSG_LOCK_FAILURE, LG_MSG_SET_LOG_LEVEL,
+ LG_MSG_VERIFY_PLATFORM, LG_MSG_RUNNER_LOG, LG_MSG_LEASEID_BUF_INIT_FAILED,
+ LG_MSG_PTHREAD_ATTR_INIT_FAILED, LG_MSG_INVALID_INODE_LIST,
+ LG_MSG_COMPACT_FAILED, LG_MSG_COMPACT_STATUS, LG_MSG_UTIMENSAT_FAILED,
+ LG_MSG_PTHREAD_NAMING_FAILED, LG_MSG_SYSCALL_RETURNS_WRONG,
+ LG_MSG_XXH64_TO_GFID_FAILED, LG_MSG_ASYNC_WARNING, LG_MSG_ASYNC_FAILURE,
+ LG_MSG_GRAPH_CLEANUP_FAILED, LG_MSG_GRAPH_SETUP_FAILED,
+ LG_MSG_GRAPH_DETACH_STARTED, LG_MSG_GRAPH_ATTACH_FAILED,
+ LG_MSG_GRAPH_ATTACH_PID_FILE_UPDATED, LG_MSG_DUPLICATE_ENTRY,
+ LG_MSG_THREAD_NAME_TOO_LONG, LG_MSG_SET_THREAD_FAILED,
+ LG_MSG_THREAD_CREATE_FAILED, LG_MSG_FILE_DELETE_FAILED, LG_MSG_WRONG_VALUE,
+ LG_MSG_PATH_OPEN_FAILED, LG_MSG_DISPATCH_HANDLER_FAILED,
+ LG_MSG_READ_FILE_FAILED, LG_MSG_ENTRIES_NOT_PROVIDED,
+ LG_MSG_ENTRIES_PROVIDED, LG_MSG_UNKNOWN_OPTION_TYPE,
+ LG_MSG_OPTION_DEPRECATED, LG_MSG_INVALID_INIT, LG_MSG_OBJECT_NULL,
+ LG_MSG_GRAPH_NOT_SET, LG_MSG_FILENAME_NOT_SPECIFIED, LG_MSG_STRUCT_MISS,
+ LG_MSG_METHOD_MISS, LG_MSG_INPUT_DATA_NULL, LG_MSG_OPEN_LOGFILE_FAILED);
+
+#define LG_MSG_EPOLL_FD_CREATE_FAILED_STR "epoll fd creation failed"
+#define LG_MSG_INVALID_POLL_IN_STR "invalid poll_in value"
+#define LG_MSG_INVALID_POLL_OUT_STR "invalid poll_out value"
+#define LG_MSG_SLOT_NOT_FOUND_STR "could not find slot"
+#define LG_MSG_EPOLL_FD_ADD_FAILED_STR "failed to add fd to epoll"
+#define LG_MSG_EPOLL_FD_DEL_FAILED_STR "fail to delete fd to epoll"
+#define LG_MSG_EPOLL_FD_MODIFY_FAILED_STR "failed to modify fd events"
+#define LG_MSG_STALE_FD_FOUND_STR "stale fd found"
+#define LG_MSG_GENERATION_MISMATCH_STR "generation mismatch"
+#define LG_MSG_STARTED_EPOLL_THREAD_STR "Started thread with index"
+#define LG_MSG_EXITED_EPOLL_THREAD_STR "Exited thread"
+#define LG_MSG_DISPATCH_HANDLER_FAILED_STR "Failed to dispatch handler"
+#define LG_MSG_START_EPOLL_THREAD_FAILED_STR "Failed to start thread"
+#define LG_MSG_PIPE_CREATE_FAILED_STR "pipe creation failed"
+#define LG_MSG_SET_PIPE_FAILED_STR "could not set pipe to non blocking mode"
+#define LG_MSG_REGISTER_PIPE_FAILED_STR \
+ "could not register pipe fd with poll event loop"
+#define LG_MSG_POLL_IGNORE_MULTIPLE_THREADS_STR \
+ "Currently poll does not use multiple event processing threads, count " \
+ "ignored"
+#define LG_MSG_INDEX_NOT_FOUND_STR "index not found"
+#define LG_MSG_READ_FILE_FAILED_STR "read on file returned error"
+#define LG_MSG_RB_TABLE_CREATE_FAILED_STR "Failed to create rb table bucket"
+#define LG_MSG_HASH_FUNC_ERROR_STR "Hash function not given"
+#define LG_MSG_ENTRIES_NOT_PROVIDED_STR \
+ "Both mem-pool and expected entries not provided"
+#define LG_MSG_ENTRIES_PROVIDED_STR \
+ "Both mem-pool and expected entries are provided"
+#define LG_MSG_RBTHASH_INIT_BUCKET_FAILED_STR "failed to init buckets"
+#define LG_MSG_RBTHASH_GET_ENTRY_FAILED_STR "Failed to get entry from mem-pool"
+#define LG_MSG_RBTHASH_GET_BUCKET_FAILED_STR "Failed to get bucket"
+#define LG_MSG_RBTHASH_INSERT_FAILED_STR "Failed to insert entry"
+#define LG_MSG_RBTHASH_INIT_ENTRY_FAILED_STR "Failed to init entry"
+#define LG_MSG_FILE_STAT_FAILED_STR "failed to stat"
+#define LG_MSG_INET_PTON_FAILED_STR "inet_pton() failed"
+#define LG_MSG_INVALID_ENTRY_STR "Invalid arguments"
+#define LG_MSG_NEGATIVE_NUM_PASSED_STR "negative number passed"
+#define LG_MSG_PATH_ERROR_STR "Path manipulation failed"
+#define LG_MSG_FILE_OP_FAILED_STR "could not open/read file, getting ports info"
+#define LG_MSG_RESERVED_PORTS_ERROR_STR \
+ "Not able to get reserved ports, hence there is a possibility that " \
+ "glusterfs may consume reserved port"
+#define LG_MSG_INVALID_PORT_STR "invalid port"
+#define LG_MSG_GETNAMEINFO_FAILED_STR "Could not lookup hostname"
+#define LG_MSG_GETIFADDRS_FAILED_STR "getifaddrs() failed"
+#define LG_MSG_INVALID_FAMILY_STR "Invalid family"
+#define LG_MSG_CONVERSION_FAILED_STR "String conversion failed"
+#define LG_MSG_GETADDRINFO_FAILED_STR "error in getaddrinfo"
+#define LG_MSG_DUPLICATE_ENTRY_STR "duplicate entry for volfile-server"
+#define LG_MSG_PTHREAD_NAMING_FAILED_STR "Failed to compose thread name"
+#define LG_MSG_THREAD_NAME_TOO_LONG_STR \
+ "Thread name is too long. It has been truncated"
+#define LG_MSG_SET_THREAD_FAILED_STR "Could not set thread name"
+#define LG_MSG_THREAD_CREATE_FAILED_STR "Thread creation failed"
+#define LG_MSG_PTHREAD_ATTR_INIT_FAILED_STR \
+ "Thread attribute initialization failed"
+#define LG_MSG_SKIP_HEADER_FAILED_STR "Failed to skip header section"
+#define LG_MSG_INVALID_LOG_STR "Invalid log-format"
+#define LG_MSG_UTIMENSAT_FAILED_STR "utimenstat failed"
+#define LG_MSG_UTIMES_FAILED_STR "utimes failed"
+#define LG_MSG_FILE_DELETE_FAILED_STR "Unable to delete file"
+#define LG_MSG_BACKTRACE_SAVE_FAILED_STR "Failed to save the backtrace"
+#define LG_MSG_WRONG_VALUE_STR "wrong value"
+#define LG_MSG_DIR_OP_FAILED_STR "Failed to create directory"
+#define LG_MSG_DIR_IS_SYMLINK_STR "dir is symlink"
+#define LG_MSG_RESOLVE_HOSTNAME_FAILED_STR "couldnot resolve hostname"
+#define LG_MSG_PATH_OPEN_FAILED_STR "Unable to open path"
+#define LG_MSG_NO_MEMORY_STR "Error allocating memory"
+#define LG_MSG_EVENT_NOTIFY_FAILED_STR "notification failed"
+#define LG_MSG_PER_DENTRY_FAILED_STR "per dentry fn returned"
+#define LG_MSG_PARENT_DENTRY_NOT_FOUND_STR "parent not found"
+#define LG_MSG_DENTRY_CYCLIC_LOOP_STR \
+ "detected cyclic loop formation during inode linkage"
+#define LG_MSG_CTX_NULL_STR "_ctx not found"
+#define LG_MSG_DENTRY_NOT_FOUND_STR "dentry not found"
+#define LG_MSG_OUT_OF_RANGE_STR "out of range"
+#define LG_MSG_UNKNOWN_OPTION_TYPE_STR "unknown option type"
+#define LG_MSG_VALIDATE_RETURNS_STR "validate of returned"
+#define LG_MSG_OPTION_DEPRECATED_STR \
+ "option is deprecated, continuing with correction"
+#define LG_MSG_VALIDATE_REC_FAILED_STR "validate_rec failed"
+#define LG_MSG_MAPPING_FAILED_STR "mapping failed"
+#define LG_MSG_INIT_IOBUF_FAILED_STR "init failed"
+#define LG_MSG_ARENA_NOT_FOUND_STR "arena not found"
+#define LG_MSG_PAGE_SIZE_EXCEEDED_STR \
+ "page_size of iobufs in arena being added is greater than max available"
+#define LG_MSG_POOL_NOT_FOUND_STR "pool not found"
+#define LG_MSG_IOBUF_NOT_FOUND_STR "iobuf not found"
+#define LG_MSG_DLOPEN_FAILED_STR "DL open failed"
+#define LG_MSG_DLSYM_ERROR_STR "dlsym missing"
+#define LG_MSG_LOAD_FAILED_STR "Failed to load xlator options table"
+#define LG_MSG_INPUT_DATA_NULL_STR \
+ "input data is null. cannot update the lru limit of the inode table. " \
+ "continuing with older value."
+#define LG_MSG_INIT_FAILED_STR "No init() found"
+#define LG_MSG_VOLUME_ERROR_STR \
+ "Initialization of volume failed. review your volfile again."
+#define LG_MSG_TREE_NOT_FOUND_STR "Translator tree not found"
+#define LG_MSG_SET_LOG_LEVEL_STR "setting log level"
+#define LG_MSG_INVALID_INIT_STR \
+ "Invalid log-level. possible values are DEBUG|WARNING|ERROR|NONE|TRACE"
+#define LG_MSG_OBJECT_NULL_STR "object is null, returning false."
+#define LG_MSG_GRAPH_NOT_SET_STR "Graph is not set for xlator"
+#define LG_MSG_OPEN_LOGFILE_FAILED_STR "failed to open logfile"
+#define LG_MSG_STRDUP_ERROR_STR "failed to create metrics dir"
+#define LG_MSG_FILENAME_NOT_SPECIFIED_STR "no filename specified"
+#define LG_MSG_UNDERSIZED_BUF_STR "data value is smaller than expected"
+#define LG_MSG_DICT_SET_FAILED_STR "unable to set dict"
+#define LG_MSG_COUNT_LESS_THAN_ZERO_STR "count < 0!"
+#define LG_MSG_PAIRS_LESS_THAN_COUNT_STR "less than count data pairs found"
+#define LG_MSG_NULL_PTR_STR "pair->key is null!"
+#define LG_MSG_VALUE_LENGTH_LESS_THAN_ZERO_STR "value->len < 0"
+#define LG_MSG_INVALID_ARG_STR "buf is null"
+#define LG_MSG_KEY_OR_VALUE_NULL_STR "key or value is null"
+#define LG_MSG_NULL_VALUE_IN_DICT_STR "null value found in dict"
+#define LG_MSG_FAILED_TO_LOG_DICT_STR "Failed to log dictionary"
+#define LG_MSG_DICT_ERROR_STR "dict error"
+#define LG_MSG_STRUCT_MISS_STR "struct missing"
+#define LG_MSG_METHOD_MISS_STR "method missing(init)"
+
+#endif /* !_LG_MESSAGES_H_ */
diff --git a/libglusterfs/src/glusterfs/list.h b/libglusterfs/src/glusterfs/list.h
new file mode 100644
index 00000000000..221a710ca30
--- /dev/null
+++ b/libglusterfs/src/glusterfs/list.h
@@ -0,0 +1,273 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _LLIST_H
+#define _LLIST_H
+
+struct list_head {
+ struct list_head *next;
+ struct list_head *prev;
+};
+
+#define INIT_LIST_HEAD(head) \
+ do { \
+ (head)->next = (head)->prev = head; \
+ } while (0)
+
+static inline void
+list_add(struct list_head *new, struct list_head *head)
+{
+ new->prev = head;
+ new->next = head->next;
+
+ new->prev->next = new;
+ new->next->prev = new;
+}
+
+static inline void
+list_add_tail(struct list_head *new, struct list_head *head)
+{
+ new->next = head;
+ new->prev = head->prev;
+
+ new->prev->next = new;
+ new->next->prev = new;
+}
+
+/* This function will insert the element to the list in a order.
+ Order will be based on the compare function provided as a input.
+ If element to be inserted in ascending order compare should return:
+ 0: if both the arguments are equal
+ >0: if first argument is greater than second argument
+ <0: if first argument is less than second argument */
+static inline void
+list_add_order(struct list_head *new, struct list_head *head,
+ int (*compare)(struct list_head *, struct list_head *))
+{
+ struct list_head *pos = head->prev;
+
+ while (pos != head) {
+ if (compare(new, pos) >= 0)
+ break;
+
+ /* Iterate the list in the reverse order. This will have
+ better efficiency if the elements are inserted in the
+ ascending order */
+ pos = pos->prev;
+ }
+
+ list_add(new, pos);
+}
+
+static inline void
+list_del(struct list_head *old)
+{
+ old->prev->next = old->next;
+ old->next->prev = old->prev;
+
+ old->next = (void *)0xbabebabe;
+ old->prev = (void *)0xcafecafe;
+}
+
+static inline void
+list_del_init(struct list_head *old)
+{
+ old->prev->next = old->next;
+ old->next->prev = old->prev;
+
+ old->next = old;
+ old->prev = old;
+}
+
+static inline void
+list_move(struct list_head *list, struct list_head *head)
+{
+ list_del(list);
+ list_add(list, head);
+}
+
+static inline void
+list_move_tail(struct list_head *list, struct list_head *head)
+{
+ list_del(list);
+ list_add_tail(list, head);
+}
+
+static inline int
+list_empty(struct list_head *head)
+{
+ return (head->next == head);
+}
+
+static inline void
+__list_splice(struct list_head *list, struct list_head *head)
+{
+ (list->prev)->next = (head->next);
+ (head->next)->prev = (list->prev);
+
+ (head)->next = (list->next);
+ (list->next)->prev = (head);
+}
+
+static inline void
+list_splice(struct list_head *list, struct list_head *head)
+{
+ if (list_empty(list))
+ return;
+
+ __list_splice(list, head);
+}
+
+/* Splice moves @list to the head of the list at @head. */
+static inline void
+list_splice_init(struct list_head *list, struct list_head *head)
+{
+ if (list_empty(list))
+ return;
+
+ __list_splice(list, head);
+ INIT_LIST_HEAD(list);
+}
+
+static inline void
+__list_append(struct list_head *list, struct list_head *head)
+{
+ (head->prev)->next = (list->next);
+ (list->next)->prev = (head->prev);
+ (head->prev) = (list->prev);
+ (list->prev)->next = head;
+}
+
+static inline void
+list_append(struct list_head *list, struct list_head *head)
+{
+ if (list_empty(list))
+ return;
+
+ __list_append(list, head);
+}
+
+/* Append moves @list to the end of @head */
+static inline void
+list_append_init(struct list_head *list, struct list_head *head)
+{
+ if (list_empty(list))
+ return;
+
+ __list_append(list, head);
+ INIT_LIST_HEAD(list);
+}
+
+static inline int
+list_is_last(struct list_head *list, struct list_head *head)
+{
+ return (list->next == head);
+}
+
+static inline int
+list_is_singular(struct list_head *head)
+{
+ return !list_empty(head) && (head->next == head->prev);
+}
+
+/**
+ * list_replace - replace old entry by new one
+ * @old : the element to be replaced
+ * @new : the new element to insert
+ *
+ * If @old was empty, it will be overwritten.
+ */
+static inline void
+list_replace(struct list_head *old, struct list_head *new)
+{
+ new->next = old->next;
+ new->next->prev = new;
+ new->prev = old->prev;
+ new->prev->next = new;
+}
+
+static inline void
+list_replace_init(struct list_head *old, struct list_head *new)
+{
+ list_replace(old, new);
+ INIT_LIST_HEAD(old);
+}
+
+/**
+ * list_rotate_left - rotate the list to the left
+ * @head: the head of the list
+ */
+static inline void
+list_rotate_left(struct list_head *head)
+{
+ struct list_head *first;
+
+ if (!list_empty(head)) {
+ first = head->next;
+ list_move_tail(first, head);
+ }
+}
+
+#define list_entry(ptr, type, member) \
+ ((type *)((char *)(ptr) - (unsigned long)(&((type *)0)->member)))
+
+#define list_first_entry(ptr, type, member) \
+ list_entry((ptr)->next, type, member)
+
+#define list_last_entry(ptr, type, member) list_entry((ptr)->prev, type, member)
+
+#define list_next_entry(pos, member) \
+ list_entry((pos)->member.next, typeof(*(pos)), member)
+
+#define list_prev_entry(pos, member) \
+ list_entry((pos)->member.prev, typeof(*(pos)), member)
+
+#define list_for_each(pos, head) \
+ for (pos = (head)->next; pos != (head); pos = pos->next)
+
+#define list_for_each_entry(pos, head, member) \
+ for (pos = list_entry((head)->next, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = list_entry(pos->member.next, typeof(*pos), member))
+
+#define list_for_each_entry_safe(pos, n, head, member) \
+ for (pos = list_entry((head)->next, typeof(*pos), member), \
+ n = list_entry(pos->member.next, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = n, n = list_entry(n->member.next, typeof(*n), member))
+
+#define list_for_each_entry_reverse(pos, head, member) \
+ for (pos = list_entry((head)->prev, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = list_entry(pos->member.prev, typeof(*pos), member))
+
+#define list_for_each_entry_safe_reverse(pos, n, head, member) \
+ for (pos = list_entry((head)->prev, typeof(*pos), member), \
+ n = list_entry(pos->member.prev, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = n, n = list_entry(n->member.prev, typeof(*n), member))
+
+/*
+ * This list implementation has some advantages, but one disadvantage: you
+ * can't use NULL to check whether you're at the head or tail. Thus, the
+ * address of the head has to be an argument for these macros.
+ */
+
+#define list_next(ptr, head, type, member) \
+ (((ptr)->member.next == head) \
+ ? NULL \
+ : list_entry((ptr)->member.next, type, member))
+
+#define list_prev(ptr, head, type, member) \
+ (((ptr)->member.prev == head) \
+ ? NULL \
+ : list_entry((ptr)->member.prev, type, member))
+
+#endif /* _LLIST_H */
diff --git a/libglusterfs/src/glusterfs/lkowner.h b/libglusterfs/src/glusterfs/lkowner.h
new file mode 100644
index 00000000000..692de34bc7a
--- /dev/null
+++ b/libglusterfs/src/glusterfs/lkowner.h
@@ -0,0 +1,93 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _LK_OWNER_H
+#define _LK_OWNER_H
+
+#include "glusterfs/glusterfs-fops.h"
+
+/* LKOWNER to string functions */
+static inline void
+lkowner_unparse(gf_lkowner_t *lkowner, char *buf, int buf_len)
+{
+ int i = 0;
+ int j = 0;
+
+ for (i = 0; i < lkowner->len; i++) {
+ if (i && !(i % 8)) {
+ buf[j] = '-';
+ j++;
+ }
+ sprintf(&buf[j], "%02hhx", lkowner->data[i]);
+ j += 2;
+ if (j == buf_len)
+ break;
+ }
+ if (j < buf_len)
+ buf[j] = '\0';
+}
+
+static inline void
+set_lk_owner_from_ptr(gf_lkowner_t *lkowner, void *data)
+{
+ int i = 0;
+ int j = 0;
+
+ lkowner->len = sizeof(unsigned long);
+ for (i = 0, j = 0; i < lkowner->len; i++, j += 8) {
+ lkowner->data[i] = (char)((((unsigned long)data) >> j) & 0xff);
+ }
+}
+
+static inline void
+set_lk_owner_from_uint64(gf_lkowner_t *lkowner, uint64_t data)
+{
+ int i = 0;
+ int j = 0;
+
+ lkowner->len = 8;
+ for (i = 0, j = 0; i < lkowner->len; i++, j += 8) {
+ lkowner->data[i] = (char)((data >> j) & 0xff);
+ }
+}
+
+/* Return true if the locks have the same owner */
+static inline int
+is_same_lkowner(gf_lkowner_t *l1, gf_lkowner_t *l2)
+{
+ return ((l1->len == l2->len) && !memcmp(l1->data, l2->data, l1->len));
+}
+
+static inline int
+is_lk_owner_null(gf_lkowner_t *lkowner)
+{
+ int is_null = 1;
+ int i = 0;
+
+ if (lkowner == NULL || lkowner->len == 0)
+ goto out;
+
+ for (i = 0; i < lkowner->len; i++) {
+ if (lkowner->data[i] != 0) {
+ is_null = 0;
+ break;
+ }
+ }
+out:
+ return is_null;
+}
+
+static inline void
+lk_owner_copy(gf_lkowner_t *dst, gf_lkowner_t *src)
+{
+ dst->len = src->len;
+ memcpy(dst->data, src->data, src->len);
+}
+#endif /* _LK_OWNER_H */
diff --git a/libglusterfs/src/glusterfs/locking.h b/libglusterfs/src/glusterfs/locking.h
new file mode 100644
index 00000000000..43cc87735d1
--- /dev/null
+++ b/libglusterfs/src/glusterfs/locking.h
@@ -0,0 +1,84 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _LOCKING_H
+#define _LOCKING_H
+
+#include <pthread.h>
+
+#if defined(GF_DARWIN_HOST_OS)
+#include <libkern/OSAtomic.h>
+#define pthread_spinlock_t OSSpinLock
+#define pthread_spin_lock(l) OSSpinLockLock(l)
+#define pthread_spin_unlock(l) OSSpinLockUnlock(l)
+#define pthread_spin_destroy(l) 0
+#define pthread_spin_init(l, v) (*l = v)
+#endif
+
+#if defined(HAVE_SPINLOCK)
+
+typedef union {
+ pthread_spinlock_t spinlock;
+ pthread_mutex_t mutex;
+} gf_lock_t;
+
+#if !defined(LOCKING_IMPL)
+extern int use_spinlocks;
+
+/*
+ * Using a dispatch table would be unpleasant because we're dealing with two
+ * different types. If the dispatch contains direct pointers to pthread_xx
+ * or mutex_xxx then we have to hope that every possible union alternative
+ * starts at the same address as the union itself. I'm old enough to remember
+ * compilers where this was not the case (for alignment reasons) so I'm a bit
+ * paranoid about that. Also, I don't like casting arguments through "void *"
+ * which we'd also have to do to avoid type errors. The other alternative would
+ * be to define actual functions which pick out the right union member, and put
+ * those in the dispatch tables. Now we have a pointer dereference through the
+ * dispatch table plus a function call, which is likely to be worse than the
+ * branching here from the ?: construct. If it were a clear win it might be
+ * worth the extra complexity, but for now this way seems preferable.
+ */
+
+#define LOCK_INIT(x) \
+ (use_spinlocks ? pthread_spin_init(&((x)->spinlock), 0) \
+ : pthread_mutex_init(&((x)->mutex), 0))
+
+#define LOCK(x) \
+ (use_spinlocks ? pthread_spin_lock(&((x)->spinlock)) \
+ : pthread_mutex_lock(&((x)->mutex)))
+
+#define TRY_LOCK(x) \
+ (use_spinlocks ? pthread_spin_trylock(&((x)->spinlock)) \
+ : pthread_mutex_trylock(&((x)->mutex)))
+
+#define UNLOCK(x) \
+ (use_spinlocks ? pthread_spin_unlock(&((x)->spinlock)) \
+ : pthread_mutex_unlock(&((x)->mutex)))
+
+#define LOCK_DESTROY(x) \
+ (use_spinlocks ? pthread_spin_destroy(&((x)->spinlock)) \
+ : pthread_mutex_destroy(&((x)->mutex)))
+
+#endif
+
+#else
+
+typedef pthread_mutex_t gf_lock_t;
+
+#define LOCK_INIT(x) pthread_mutex_init(x, 0)
+#define LOCK(x) pthread_mutex_lock(x)
+#define TRY_LOCK(x) pthread_mutex_trylock(x)
+#define UNLOCK(x) pthread_mutex_unlock(x)
+#define LOCK_DESTROY(x) pthread_mutex_destroy(x)
+
+#endif /* HAVE_SPINLOCK */
+
+#endif /* _LOCKING_H */
diff --git a/libglusterfs/src/glusterfs/logging.h b/libglusterfs/src/glusterfs/logging.h
new file mode 100644
index 00000000000..b3a6ac191f0
--- /dev/null
+++ b/libglusterfs/src/glusterfs/logging.h
@@ -0,0 +1,383 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __LOGGING_H__
+#define __LOGGING_H__
+
+#include <sys/time.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <pthread.h>
+#include "glusterfs/list.h"
+
+#ifdef GF_DARWIN_HOST_OS
+#define GF_PRI_FSBLK "u"
+#define GF_PRI_DEV PRId32
+#define GF_PRI_INODE PRIu64
+#define GF_PRI_NLINK PRIu32
+#define GF_PRI_SECOND "ld"
+#define GF_PRI_SUSECONDS "06d"
+#define GF_PRI_SNSECONDS "09ld"
+#define GF_PRI_USEC "d"
+#else
+#define GF_PRI_FSBLK PRIu64
+#define GF_PRI_DEV PRIu64
+#define GF_PRI_INODE PRIu64
+#define GF_PRI_NLINK PRIu32
+#define GF_PRI_SECOND "lu"
+#define GF_PRI_SUSECONDS "06ld"
+#define GF_PRI_SNSECONDS "09ld"
+#define GF_PRI_USEC "ld"
+#endif
+#define GF_PRI_BLKSIZE PRId32
+#define GF_PRI_SIZET "zu"
+#define GF_PRI_ATOMIC PRIu64
+
+#ifdef GF_DARWIN_HOST_OS
+#define GF_PRI_TIME "ld"
+#else
+#define GF_PRI_TIME PRIu64
+#endif
+
+#if 0
+/* Syslog definitions :-) */
+#define LOG_EMERG 0 /* system is unusable */
+#define LOG_ALERT 1 /* action must be taken immediately */
+#define LOG_CRIT 2 /* critical conditions */
+#define LOG_ERR 3 /* error conditions */
+#define LOG_WARNING 4 /* warning conditions */
+#define LOG_NOTICE 5 /* normal but significant condition */
+#define LOG_INFO 6 /* informational */
+#define LOG_DEBUG 7 /* debug-level messages */
+#endif
+
+#define GF_LOG_FORMAT_NO_MSG_ID "no-msg-id"
+#define GF_LOG_FORMAT_WITH_MSG_ID "with-msg-id"
+
+#define GF_LOGGER_GLUSTER_LOG "gluster-log"
+#define GF_LOGGER_SYSLOG "syslog"
+
+typedef enum {
+ GF_LOG_NONE,
+ GF_LOG_EMERG,
+ GF_LOG_ALERT,
+ GF_LOG_CRITICAL, /* fatal errors */
+ GF_LOG_ERROR, /* major failures (not necessarily fatal) */
+ GF_LOG_WARNING, /* info about normal operation */
+ GF_LOG_NOTICE,
+ GF_LOG_INFO, /* Normal information */
+ GF_LOG_DEBUG, /* internal errors */
+ GF_LOG_TRACE, /* full trace of operation */
+} gf_loglevel_t;
+
+/* format for the logs */
+typedef enum {
+ gf_logformat_traditional = 0, /* Format as in gluster 3.5 */
+ gf_logformat_withmsgid, /* Format enhanced with MsgID, ident, errstr */
+ gf_logformat_cee /* log enhanced format in cee */
+} gf_log_format_t;
+
+/* log infrastructure to log to */
+typedef enum {
+ gf_logger_glusterlog = 0, /* locations and files as in gluster 3.5 */
+ gf_logger_syslog /* log to (r)syslog, based on (r)syslog conf */
+ /* NOTE: In the future journald, lumberjack, next new thing here */
+} gf_log_logger_t;
+
+#define DEFAULT_LOG_FILE_DIRECTORY DATADIR "/log/glusterfs"
+#define DEFAULT_QUOTA_CRAWL_LOG_DIRECTORY DATADIR "/log/glusterfs/quota_crawl"
+#define DEFAULT_LOG_LEVEL GF_LOG_INFO
+
+typedef struct gf_log_handle_ {
+ pthread_mutex_t logfile_mutex;
+ gf_loglevel_t loglevel;
+ gf_loglevel_t sys_log_level;
+ int gf_log_syslog;
+ char *filename;
+ FILE *logfile;
+ FILE *gf_log_logfile;
+ char *cmd_log_filename;
+ FILE *cmdlogfile;
+ gf_log_logger_t logger;
+ gf_log_format_t logformat;
+ char *ident;
+ int log_control_file_found;
+ struct list_head lru_queue;
+ pthread_mutex_t log_buf_lock;
+ struct _gf_timer *log_flush_timer;
+ int localtime;
+ uint32_t lru_size;
+ uint32_t lru_cur_size;
+ uint32_t timeout;
+ uint8_t logrotate;
+ uint8_t cmd_history_logrotate;
+} gf_log_handle_t;
+
+typedef struct log_buf_ {
+ char *msg;
+ uint64_t msg_id;
+ int errnum;
+ struct timeval oldest;
+ struct timeval latest;
+ char *domain;
+ char *file;
+ char *function;
+ int32_t line;
+ gf_loglevel_t level;
+ int refcount;
+ int graph_id;
+ struct list_head msg_list;
+} log_buf_t;
+
+void
+gf_log_globals_init(void *ctx, gf_loglevel_t level);
+int
+gf_log_init(void *data, const char *filename, const char *ident);
+
+void
+gf_log_logrotate(int signum);
+
+void
+gf_log_cleanup(void);
+
+/* Internal interfaces to log messages with message IDs */
+int
+_gf_msg(const char *domain, const char *file, const char *function,
+ int32_t line, gf_loglevel_t level, int errnum, int trace,
+ uint64_t msgid, const char *fmt, ...)
+ __attribute__((__format__(__printf__, 9, 10)));
+
+void
+_gf_msg_backtrace_nomem(gf_loglevel_t level, int stacksize);
+
+int
+_gf_msg_plain(gf_loglevel_t level, const char *fmt, ...)
+ __attribute__((__format__(__printf__, 2, 3)));
+
+int
+_gf_msg_plain_nomem(gf_loglevel_t level, const char *msg);
+
+int
+_gf_msg_vplain(gf_loglevel_t level, const char *fmt, va_list ap);
+
+int
+_gf_msg_nomem(const char *domain, const char *file, const char *function,
+ int line, gf_loglevel_t level, size_t size);
+
+int
+_gf_log(const char *domain, const char *file, const char *function,
+ int32_t line, gf_loglevel_t level, const char *fmt, ...)
+ __attribute__((__format__(__printf__, 6, 7)));
+
+int
+_gf_log_callingfn(const char *domain, const char *file, const char *function,
+ int32_t line, gf_loglevel_t level, const char *fmt, ...)
+ __attribute__((__format__(__printf__, 6, 7)));
+
+int
+_gf_log_eh(const char *function, const char *fmt, ...)
+ __attribute__((__format__(__printf__, 2, 3)));
+
+/* treat GF_LOG_TRACE and GF_LOG_NONE as LOG_DEBUG and
+ * other level as is */
+#define SET_LOG_PRIO(level, priority) \
+ do { \
+ if (GF_LOG_TRACE == (level) || GF_LOG_NONE == (level)) { \
+ priority = LOG_DEBUG; \
+ } else { \
+ priority = (level)-1; \
+ } \
+ } while (0)
+
+/* extract just the file name from the path */
+#define GET_FILE_NAME_TO_LOG(file, basename) \
+ do { \
+ basename = strrchr((file), '/'); \
+ if (basename) \
+ basename++; \
+ else \
+ basename = (file); \
+ } while (0)
+
+#define PRINT_SIZE_CHECK(ret, label, strsize) \
+ do { \
+ if (ret < 0) \
+ goto label; \
+ if ((strsize - ret) > 0) { \
+ strsize -= ret; \
+ } else { \
+ ret = 0; \
+ goto label; \
+ } \
+ } while (0)
+
+#define FMT_WARN(fmt...) \
+ do { \
+ if (0) \
+ printf(fmt); \
+ } while (0)
+
+/* Interface to log messages with message IDs */
+#define gf_msg(dom, level, errnum, msgid, fmt...) \
+ do { \
+ _gf_msg(dom, __FILE__, __FUNCTION__, __LINE__, level, errnum, 0, \
+ msgid, ##fmt); \
+ } while (0)
+
+/* no frills, no thrills, just a vanilla message, used to print the graph */
+#define gf_msg_plain(level, fmt...) \
+ do { \
+ _gf_msg_plain(level, ##fmt); \
+ } while (0)
+
+#define gf_msg_plain_nomem(level, msg) \
+ do { \
+ _gf_msg_plain_nomem(level, msg); \
+ } while (0)
+
+#define gf_msg_vplain(level, fmt, va) \
+ do { \
+ _gf_msg_vplain(level, fmt, va); \
+ } while (0)
+
+#define gf_msg_backtrace_nomem(level, stacksize) \
+ do { \
+ _gf_msg_backtrace_nomem(level, stacksize); \
+ } while (0)
+
+#define gf_msg_callingfn(dom, level, errnum, msgid, fmt...) \
+ do { \
+ _gf_msg(dom, __FILE__, __FUNCTION__, __LINE__, level, errnum, 1, \
+ msgid, ##fmt); \
+ } while (0)
+
+/* No malloc or calloc should be called in this function */
+#define gf_msg_nomem(dom, level, size) \
+ do { \
+ _gf_msg_nomem(dom, __FILE__, __FUNCTION__, __LINE__, level, size); \
+ } while (0)
+
+/* Debug or trace messages do not need message IDs as these are more developer
+ * related. Hence, the following abstractions are provided for the same */
+#define gf_msg_debug(dom, errnum, fmt...) \
+ do { \
+ _gf_msg(dom, __FILE__, __FUNCTION__, __LINE__, GF_LOG_DEBUG, errnum, \
+ 0, 0, ##fmt); \
+ } while (0)
+
+#define gf_msg_trace(dom, errnum, fmt...) \
+ do { \
+ _gf_msg(dom, __FILE__, __FUNCTION__, __LINE__, GF_LOG_TRACE, errnum, \
+ 0, 0, ##fmt); \
+ } while (0)
+
+#define gf_log(dom, level, fmt...) \
+ do { \
+ FMT_WARN(fmt); \
+ _gf_log(dom, __FILE__, __FUNCTION__, __LINE__, level, ##fmt); \
+ } while (0)
+
+#define gf_log_eh(fmt...) \
+ do { \
+ FMT_WARN(fmt); \
+ _gf_log_eh(__FUNCTION__, ##fmt); \
+ } while (0)
+
+#define gf_log_callingfn(dom, level, fmt...) \
+ do { \
+ FMT_WARN(fmt); \
+ _gf_log_callingfn(dom, __FILE__, __FUNCTION__, __LINE__, level, \
+ ##fmt); \
+ } while (0)
+
+/* Log once in GF_UNIVERSAL_ANSWER times */
+#define GF_LOG_OCCASIONALLY(var, args...) \
+ if (var++ == 0 || !((var - 1) % GF_UNIVERSAL_ANSWER)) { \
+ gf_log(args); \
+ }
+
+struct _glusterfs_ctx;
+
+void
+gf_log_disable_syslog(void);
+void
+gf_log_enable_syslog(void);
+gf_loglevel_t
+gf_log_get_loglevel(void);
+void
+gf_log_set_loglevel(struct _glusterfs_ctx *ctx, gf_loglevel_t level);
+int
+gf_log_get_localtime(void);
+void
+gf_log_set_localtime(int);
+void
+gf_log_flush(void);
+gf_loglevel_t
+gf_log_get_xl_loglevel(void *xl);
+void
+gf_log_set_xl_loglevel(void *xl, gf_loglevel_t level);
+
+int
+gf_cmd_log(const char *domain, const char *fmt, ...)
+ __attribute__((__format__(__printf__, 2, 3)));
+
+int
+gf_cmd_log_init(const char *filename);
+
+void
+set_sys_log_level(gf_loglevel_t level);
+
+int
+gf_log_fini(void *data);
+
+void
+gf_log_set_logger(gf_log_logger_t logger);
+
+void
+gf_log_set_logformat(gf_log_format_t format);
+
+void
+gf_log_set_log_buf_size(uint32_t buf_size);
+
+void
+gf_log_set_log_flush_timeout(uint32_t timeout);
+
+void
+gf_log_flush_msgs(struct _glusterfs_ctx *ctx);
+
+int
+gf_log_inject_timer_event(struct _glusterfs_ctx *ctx);
+
+void
+gf_log_disable_suppression_before_exit(struct _glusterfs_ctx *ctx);
+
+#define GF_DEBUG(xl, format, args...) \
+ gf_log((xl)->name, GF_LOG_DEBUG, format, ##args)
+#define GF_INFO(xl, format, args...) \
+ gf_log((xl)->name, GF_LOG_INFO, format, ##args)
+#define GF_WARNING(xl, format, args...) \
+ gf_log((xl)->name, GF_LOG_WARNING, format, ##args)
+#define GF_ERROR(xl, format, args...) \
+ gf_log((xl)->name, GF_LOG_ERROR, format, ##args)
+
+int
+_gf_smsg(const char *domain, const char *file, const char *function,
+ int32_t line, gf_loglevel_t level, int errnum, int trace,
+ uint64_t msgid, const char *event, ...);
+
+/* Interface to log messages with message IDs */
+#define gf_smsg(dom, level, errnum, msgid, event...) \
+ do { \
+ _gf_smsg(dom, __FILE__, __FUNCTION__, __LINE__, level, errnum, 0, \
+ msgid, msgid##_STR, ##event); \
+ } while (0)
+
+#endif /* __LOGGING_H__ */
diff --git a/libglusterfs/src/glusterfs/lvm-defaults.h b/libglusterfs/src/glusterfs/lvm-defaults.h
new file mode 100644
index 00000000000..32feebf3f6e
--- /dev/null
+++ b/libglusterfs/src/glusterfs/lvm-defaults.h
@@ -0,0 +1,20 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _LVM_DEFAULTS_H
+#define _LVM_DEFAULTS_H
+
+#define LVM_RESIZE "/sbin/lvresize"
+#define LVM_CREATE "/sbin/lvcreate"
+#define LVM_CONVERT "/sbin/lvconvert"
+#define LVM_REMOVE "/sbin/lvremove"
+#define LVS "/sbin/lvs"
+
+#endif /* _LVM_DEFAULTS_H */
diff --git a/libglusterfs/src/glusterfs/mem-pool.h b/libglusterfs/src/glusterfs/mem-pool.h
new file mode 100644
index 00000000000..e5b3276d047
--- /dev/null
+++ b/libglusterfs/src/glusterfs/mem-pool.h
@@ -0,0 +1,336 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _MEM_POOL_H_
+#define _MEM_POOL_H_
+
+#include "glusterfs/list.h"
+#include "glusterfs/atomic.h"
+#include "glusterfs/logging.h"
+#include "glusterfs/mem-types.h"
+#include "glusterfs/glusterfs.h" /* for glusterfs_ctx_t */
+#include <stdlib.h>
+#include <inttypes.h>
+#include <string.h>
+#include <stdarg.h>
+
+/*
+ * Need this for unit tests since inline functions
+ * access memory allocation and need to use the
+ * unit test versions
+ */
+#ifdef UNIT_TESTING
+#include <stddef.h>
+#include <setjmp.h>
+#include <cmocka.h>
+#endif
+
+#define GF_MEM_TRAILER_SIZE 8
+#define GF_MEM_HEADER_MAGIC 0xCAFEBABE
+#define GF_MEM_TRAILER_MAGIC 0xBAADF00D
+#define GF_MEM_INVALID_MAGIC 0xDEADC0DE
+
+#define POOL_SMALLEST 7 /* i.e. 128 */
+#define POOL_LARGEST 20 /* i.e. 1048576 */
+#define NPOOLS (POOL_LARGEST - POOL_SMALLEST + 1)
+
+struct mem_acct_rec {
+ const char *typestr;
+ uint64_t size;
+ uint64_t max_size;
+ uint64_t total_allocs;
+ uint32_t num_allocs;
+ uint32_t max_num_allocs;
+ gf_lock_t lock;
+#ifdef DEBUG
+ struct list_head obj_list;
+#endif
+};
+
+struct mem_acct {
+ uint32_t num_types;
+ gf_atomic_t refcnt;
+ struct mem_acct_rec rec[0];
+};
+
+struct mem_header {
+ uint32_t type;
+ size_t size;
+ struct mem_acct *mem_acct;
+ uint32_t magic;
+#ifdef DEBUG
+ struct list_head acct_list;
+#endif
+ int padding[8];
+};
+
+#define GF_MEM_HEADER_SIZE (sizeof(struct mem_header))
+
+#ifdef DEBUG
+struct mem_invalid {
+ uint32_t magic;
+ void *mem_acct;
+ uint32_t type;
+ size_t size;
+ void *baseaddr;
+};
+#endif
+
+void *
+__gf_calloc(size_t cnt, size_t size, uint32_t type, const char *typestr);
+
+void *
+__gf_malloc(size_t size, uint32_t type, const char *typestr);
+
+void *
+__gf_realloc(void *ptr, size_t size);
+
+int
+gf_vasprintf(char **string_ptr, const char *format, va_list arg);
+
+int
+gf_asprintf(char **string_ptr, const char *format, ...)
+ __attribute__((__format__(__printf__, 2, 3)));
+
+void
+__gf_free(void *ptr);
+
+static inline void *
+__gf_default_malloc(size_t size)
+{
+ void *ptr = NULL;
+
+ ptr = malloc(size);
+ if (!ptr)
+ gf_msg_nomem("", GF_LOG_ALERT, size);
+
+ return ptr;
+}
+
+static inline void *
+__gf_default_calloc(int cnt, size_t size)
+{
+ void *ptr = NULL;
+
+ ptr = calloc(cnt, size);
+ if (!ptr)
+ gf_msg_nomem("", GF_LOG_ALERT, (cnt * size));
+
+ return ptr;
+}
+
+static inline void *
+__gf_default_realloc(void *oldptr, size_t size)
+{
+ void *ptr = NULL;
+
+ ptr = realloc(oldptr, size);
+ if (!ptr)
+ gf_msg_nomem("", GF_LOG_ALERT, size);
+
+ return ptr;
+}
+
+#define MALLOC(size) __gf_default_malloc(size)
+#define CALLOC(cnt, size) __gf_default_calloc(cnt, size)
+#define REALLOC(ptr, size) __gf_default_realloc(ptr, size)
+
+#define FREE(ptr) \
+ do { \
+ if (ptr != NULL) { \
+ free((void *)ptr); \
+ ptr = (void *)0xeeeeeeee; \
+ } \
+ } while (0)
+
+#define GF_CALLOC(nmemb, size, type) __gf_calloc(nmemb, size, type, #type)
+
+#define GF_MALLOC(size, type) __gf_malloc(size, type, #type)
+
+#define GF_REALLOC(ptr, size) __gf_realloc(ptr, size)
+
+#define GF_FREE(free_ptr) __gf_free(free_ptr)
+
+static inline char *
+gf_strndup(const char *src, size_t len)
+{
+ char *dup_str = NULL;
+
+ if (!src) {
+ goto out;
+ }
+
+ dup_str = GF_MALLOC(len + 1, gf_common_mt_strdup);
+ if (!dup_str) {
+ goto out;
+ }
+
+ memcpy(dup_str, src, len);
+ dup_str[len] = '\0';
+out:
+ return dup_str;
+}
+
+static inline char *
+gf_strdup(const char *src)
+{
+ if (!src)
+ return NULL;
+
+ return gf_strndup(src, strlen(src));
+}
+
+static inline void *
+gf_memdup(const void *src, size_t size)
+{
+ void *dup_mem = NULL;
+
+ dup_mem = GF_MALLOC(size, gf_common_mt_memdup);
+ if (!dup_mem)
+ goto out;
+
+ memcpy(dup_mem, src, size);
+
+out:
+ return dup_mem;
+}
+
+#ifdef GF_DISABLE_MEMPOOL
+
+/* No-op memory pool enough to fit current API without massive redesign. */
+
+struct mem_pool {
+ unsigned long sizeof_type;
+};
+
+#define mem_pools_init() \
+ do { \
+ } while (0)
+#define mem_pools_fini() \
+ do { \
+ } while (0)
+#define mem_pool_thread_destructor(pool_list) (void)pool_list
+
+#else /* !GF_DISABLE_MEMPOOL */
+
+/* kind of 'header' for the actual mem_pool_shared structure, this might make
+ * it possible to dump some more details in a statedump */
+struct mem_pool {
+ /* object size, without pooled_obj_hdr_t */
+ unsigned long sizeof_type;
+ unsigned long count; /* requested pool size (unused) */
+ char *name;
+ char *xl_name;
+ gf_atomic_t active; /* current allocations */
+#ifdef DEBUG
+ gf_atomic_t hit; /* number of allocations served from pt_pool */
+ gf_atomic_t miss; /* number of std allocs due to miss */
+#endif
+ struct list_head owner; /* glusterfs_ctx_t->mempool_list */
+ glusterfs_ctx_t *ctx; /* take ctx->lock when updating owner */
+
+ struct mem_pool_shared *pool; /* the initial pool that was returned */
+};
+
+typedef struct pooled_obj_hdr {
+ unsigned long magic;
+ struct pooled_obj_hdr *next;
+ struct per_thread_pool_list *pool_list;
+ unsigned int power_of_two;
+
+ /* track the pool that was used to request this object */
+ struct mem_pool *pool;
+} pooled_obj_hdr_t;
+
+/* Each memory block inside a pool has a fixed size that is a power of two.
+ * However each object will have a header that will reduce the available
+ * space. */
+#define AVAILABLE_SIZE(p2) ((1UL << (p2)) - sizeof(pooled_obj_hdr_t))
+
+typedef struct per_thread_pool {
+ /* the pool that was used to request this allocation */
+ struct mem_pool_shared *parent;
+ /* Everything else is protected by our own lock. */
+ pooled_obj_hdr_t *hot_list;
+ pooled_obj_hdr_t *cold_list;
+} per_thread_pool_t;
+
+typedef struct per_thread_pool_list {
+ /* thr_list is used to place the TLS pool_list into the active global list
+ * (pool_threads) or the inactive global list (pool_free_threads). It's
+ * protected by the global pool_lock. */
+ struct list_head thr_list;
+
+ /* This lock is used to update poison and the hot/cold lists of members
+ * of 'pools' array. */
+ pthread_spinlock_t lock;
+
+ /* This field is used to mark a pool_list as not being owned by any thread.
+ * This means that the sweeper thread won't be cleaning objects stored in
+ * its pools. mem_put() uses it to decide if the object being released is
+ * placed into its original pool_list or directly destroyed. */
+ bool poison;
+
+ /*
+ * There's really more than one pool, but the actual number is hidden
+ * in the implementation code so we just make it a single-element array
+ * here.
+ */
+ per_thread_pool_t pools[1];
+} per_thread_pool_list_t;
+
+/* actual pool structure, shared between different mem_pools */
+struct mem_pool_shared {
+ unsigned int power_of_two;
+ /*
+ * Updates to these are *not* protected by a global lock, so races
+ * could occur and the numbers might be slightly off. Don't expect
+ * them to line up exactly. It's the general trends that matter, and
+ * it's not worth the locked-bus-cycle overhead to make these precise.
+ */
+ gf_atomic_t allocs_hot;
+ gf_atomic_t allocs_cold;
+ gf_atomic_t allocs_stdc;
+ gf_atomic_t frees_to_list;
+};
+
+void
+mem_pools_init(void); /* start the pool_sweeper thread */
+void
+mem_pools_fini(void); /* cleanup memory pools */
+void
+mem_pool_thread_destructor(per_thread_pool_list_t *pool_list);
+
+#endif /* GF_DISABLE_MEMPOOL */
+
+struct mem_pool *
+mem_pool_new_fn(glusterfs_ctx_t *ctx, unsigned long sizeof_type,
+ unsigned long count, char *name);
+
+#define mem_pool_new(type, count) \
+ mem_pool_new_fn(THIS->ctx, sizeof(type), count, #type)
+
+#define mem_pool_new_ctx(ctx, type, count) \
+ mem_pool_new_fn(ctx, sizeof(type), count, #type)
+
+void
+mem_put(void *ptr);
+void *
+mem_get(struct mem_pool *pool);
+void *
+mem_get0(struct mem_pool *pool);
+
+void
+mem_pool_destroy(struct mem_pool *pool);
+
+void
+gf_mem_acct_enable_set(void *ctx);
+
+#endif /* _MEM_POOL_H */
diff --git a/libglusterfs/src/glusterfs/mem-types.h b/libglusterfs/src/glusterfs/mem-types.h
new file mode 100644
index 00000000000..d45d5b68c91
--- /dev/null
+++ b/libglusterfs/src/glusterfs/mem-types.h
@@ -0,0 +1,139 @@
+/*
+ Copyright (c) 2008-2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __MEM_TYPES_H__
+#define __MEM_TYPES_H__
+
+enum gf_common_mem_types_ {
+ gf_common_mt_dnscache6, /* used only in one location */
+ gf_common_mt_event_pool,
+ gf_common_mt_reg,
+ gf_common_mt_pollfd, /* used only in one location */
+ gf_common_mt_fdentry_t, /* used only in one location */
+ gf_common_mt_fdtable_t, /* used only in one location */
+ gf_common_mt_fd_ctx, /* used only in one location */
+ gf_common_mt_gf_dirent_t,
+ gf_common_mt_inode_t, /* used only in one location */
+ gf_common_mt_inode_ctx, /* used only in one location */
+ gf_common_mt_list_head,
+ gf_common_mt_inode_table_t, /* used only in one location */
+ gf_common_mt_xlator_t,
+ gf_common_mt_xlator_list_t, /* used only in one location */
+ gf_common_mt_volume_opt_list_t,
+ gf_common_mt_gf_timer_t, /* used only in one location */
+ gf_common_mt_gf_timer_registry_t, /* used only in one location */
+ gf_common_mt_auth_handle_t, /* used only in one location */
+ gf_common_mt_iobuf, /* used only in one location */
+ gf_common_mt_iobuf_arena, /* used only in one location */
+ gf_common_mt_iobref, /* used only in one location */
+ gf_common_mt_iobuf_pool, /* used only in one location */
+ gf_common_mt_iovec,
+ gf_common_mt_memdup, /* used only in one location */
+ gf_common_mt_asprintf, /* used only in one location */
+ gf_common_mt_strdup,
+ gf_common_mt_socket_private_t, /* used only in one location */
+ gf_common_mt_ioq, /* used only in one location */
+ gf_common_mt_char,
+ gf_common_mt_rbthash_table_t, /* used only in one location */
+ gf_common_mt_rbthash_bucket, /* used only in one location */
+ gf_common_mt_mem_pool, /* used only in one location */
+ gf_common_mt_rpcsvc_auth_list, /* used only in one location */
+ gf_common_mt_rpcsvc_t, /* used only in one location */
+ gf_common_mt_rpcsvc_program_t, /* used only in one location */
+ gf_common_mt_rpcsvc_listener_t, /* used only in one location */
+ gf_common_mt_rpcsvc_wrapper_t, /* used only in one location */
+ gf_common_mt_rpcclnt_t, /* used only in one location */
+ gf_common_mt_rpcclnt_savedframe_t, /* used only in one location */
+ gf_common_mt_rpc_trans_t,
+ gf_common_mt_rpc_trans_pollin_t, /* used only in one location */
+ gf_common_mt_rpc_trans_reqinfo_t, /* used only in one location */
+ gf_common_mt_glusterfs_graph_t,
+ gf_common_mt_rdma_private_t, /* used only in one location */
+ gf_common_mt_rpc_transport_t, /* used only in one location */
+ gf_common_mt_rdma_post_t, /* used only in one location */
+ gf_common_mt_qpent, /* used only in one location */
+ gf_common_mt_rdma_device_t, /* used only in one location */
+ gf_common_mt_rdma_arena_mr, /* used only in one location */
+ gf_common_mt_sge, /* used only in one location */
+ gf_common_mt_rpcclnt_cb_program_t, /* used only in one location */
+ gf_common_mt_libxl_marker_local, /* used only in one location */
+ gf_common_mt_graph_buf, /* used only in one location */
+ gf_common_mt_trie_trie, /* used only in one location */
+ gf_common_mt_trie_data, /* used only in one location */
+ gf_common_mt_trie_node, /* used only in one location */
+ gf_common_mt_trie_buf, /* used only in one location */
+ gf_common_mt_run_argv, /* used only in one location */
+ gf_common_mt_run_logbuf, /* used only in one location */
+ gf_common_mt_fd_lk_ctx_t, /* used only in one location */
+ gf_common_mt_fd_lk_ctx_node_t, /* used only in one location */
+ gf_common_mt_buffer_t, /* used only in one location */
+ gf_common_mt_circular_buffer_t, /* used only in one location */
+ gf_common_mt_eh_t,
+ gf_common_mt_store_handle_t, /* used only in one location */
+ gf_common_mt_store_iter_t, /* used only in one location */
+ gf_common_mt_drc_client_t, /* used only in one location */
+ gf_common_mt_drc_globals_t, /* used only in one location */
+ gf_common_mt_groups_t,
+ gf_common_mt_cliententry_t, /* used only in one location */
+ gf_common_mt_clienttable_t, /* used only in one location */
+ gf_common_mt_client_t, /* used only in one location */
+ gf_common_mt_client_ctx, /* used only in one location */
+ gf_common_mt_auxgids, /* used only in one location */
+ gf_common_mt_syncopctx, /* used only in one location */
+ gf_common_mt_iobrefs, /* used only in one location */
+ gf_common_mt_gsync_status_t,
+ gf_common_mt_uuid_t,
+ gf_common_mt_mgmt_v3_lock_obj_t, /* used only in one location */
+ gf_common_mt_txn_opinfo_obj_t, /* used only in one location */
+ gf_common_mt_strfd_t, /* used only in one location */
+ gf_common_mt_strfd_data_t, /* used only in one location */
+ gf_common_mt_regex_t, /* used only in one location */
+ gf_common_mt_ereg, /* used only in one location */
+ gf_common_mt_wr, /* used only in one location */
+ gf_common_mt_dnscache, /* used only in one location */
+ gf_common_mt_dnscache_entry, /* used only in one location */
+ gf_common_mt_parser_t, /* used only in one location */
+ gf_common_quota_meta_t,
+ gf_common_mt_rbuf_t, /* used only in one location */
+ gf_common_mt_rlist_t, /* used only in one location */
+ gf_common_mt_rvec_t, /* used only in one location */
+ /* glusterd can load the nfs-xlator dynamically and needs these two */
+ gf_common_mt_nfs_netgroups, /* used only in one location */
+ gf_common_mt_nfs_exports, /* used only in one location */
+ gf_common_mt_gf_brick_spec_t, /* used only in one location */
+ gf_common_mt_int,
+ gf_common_mt_pointer,
+ gf_common_mt_synctask, /* used only in one location */
+ gf_common_mt_syncstack, /* used only in one location */
+ gf_common_mt_syncenv, /* used only in one location */
+ gf_common_mt_scan_data, /* used only in one location */
+ gf_common_list_node,
+ gf_mt_default_args_t, /* used only in one location */
+ gf_mt_default_args_cbk_t, /* used only in one location */
+ /*used for compound fops*/
+ gf_mt_compound_req_t, /* used only in one location */
+ gf_mt_compound_rsp_t, /* used only in one location */
+ gf_common_mt_tw_ctx, /* used only in one location */
+ gf_common_mt_tw_timer_list,
+ /*lock migration*/
+ gf_common_mt_lock_mig,
+ /* throttle */
+ gf_common_mt_tbf_t, /* used only in one location */
+ gf_common_mt_tbf_bucket_t, /* used only in one location */
+ gf_common_mt_tbf_throttle_t, /* used only in one location */
+ gf_common_mt_pthread_t, /* used only in one location */
+ gf_common_ping_local_t, /* used only in one location */
+ gf_common_volfile_t,
+ gf_common_mt_mgmt_v3_lock_timer_t, /* used only in one location */
+ gf_common_mt_server_cmdline_t, /* used only in one location */
+ gf_common_mt_latency_t,
+ gf_common_mt_end
+};
+#endif
diff --git a/libglusterfs/src/glusterfs/monitoring.h b/libglusterfs/src/glusterfs/monitoring.h
new file mode 100644
index 00000000000..09d9f54e734
--- /dev/null
+++ b/libglusterfs/src/glusterfs/monitoring.h
@@ -0,0 +1,21 @@
+/*
+ Copyright (c) 2017 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __MONITORING_H__
+#define __MONITORING_H__
+
+#include "glusterfs/glusterfs.h"
+
+#define GLUSTER_METRICS_DIR "/var/run/gluster/metrics"
+
+char *
+gf_monitor_metrics(glusterfs_ctx_t *ctx);
+
+#endif /* __MONITORING_H__ */
diff --git a/libglusterfs/src/glusterfs/options.h b/libglusterfs/src/glusterfs/options.h
new file mode 100644
index 00000000000..747b13ba375
--- /dev/null
+++ b/libglusterfs/src/glusterfs/options.h
@@ -0,0 +1,327 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _OPTIONS_H
+#define _OPTIONS_H
+
+#include <stdio.h>
+#include <stdint.h>
+#include <inttypes.h>
+
+#include "glusterfs/xlator.h"
+#include "glusterfs/libglusterfs-messages.h"
+/* Add possible new type of option you may need */
+typedef enum {
+ GF_OPTION_TYPE_ANY = 0,
+ GF_OPTION_TYPE_STR,
+ GF_OPTION_TYPE_INT,
+ GF_OPTION_TYPE_SIZET,
+ GF_OPTION_TYPE_PERCENT,
+ GF_OPTION_TYPE_PERCENT_OR_SIZET,
+ GF_OPTION_TYPE_BOOL,
+ GF_OPTION_TYPE_XLATOR,
+ GF_OPTION_TYPE_PATH,
+ GF_OPTION_TYPE_TIME,
+ GF_OPTION_TYPE_DOUBLE,
+ GF_OPTION_TYPE_INTERNET_ADDRESS,
+ GF_OPTION_TYPE_INTERNET_ADDRESS_LIST,
+ GF_OPTION_TYPE_PRIORITY_LIST,
+ GF_OPTION_TYPE_SIZE_LIST,
+ GF_OPTION_TYPE_CLIENT_AUTH_ADDR,
+ GF_OPTION_TYPE_MAX,
+} volume_option_type_t;
+
+typedef enum {
+ GF_OPT_VALIDATE_BOTH = 0,
+ GF_OPT_VALIDATE_MIN,
+ GF_OPT_VALIDATE_MAX,
+} opt_validate_type_t;
+
+typedef enum {
+ OPT_FLAG_NONE = 0,
+ OPT_FLAG_SETTABLE = 1 << 0, /* can be set using volume set */
+ OPT_FLAG_CLIENT_OPT = 1 << 1, /* affects clients */
+ OPT_FLAG_GLOBAL = 1
+ << 2, /* affects all instances of the particular xlator */
+ OPT_FLAG_FORCE = 1 << 3, /* needs force to be reset */
+ OPT_FLAG_NEVER_RESET = 1 << 4, /* which should not be reset */
+ OPT_FLAG_DOC = 1 << 5, /* can be shown in volume set help */
+} opt_flags_t;
+
+typedef enum {
+ OPT_STATUS_ADVANCED = 0,
+ OPT_STATUS_BASIC = 1,
+ OPT_STATUS_EXPERIMENTAL = 2,
+ OPT_STATUS_DEPRECATED = 3,
+} opt_level_t;
+
+#define ZR_VOLUME_MAX_NUM_KEY 4
+#define ZR_OPTION_MAX_ARRAY_SIZE 64
+/* The maximum number of releases that an option could be backported to
+ * based on the release schedule as in August 2017 (3), plus one more
+ * Refer comment on volume_options.op_version for more information.
+ */
+#define GF_MAX_RELEASES 4
+
+/* Custom validation functoins for options
+ * TODO: Need to check what sorts of validation is being done, and decide if
+ * passing the volinfo is actually required. If it is, then we should possibly
+ * try a solution in GD2 for this.
+ */
+/* typedef int (*option_validation_fn) (glusterd_volinfo_t *volinfo, dict_t
+ *dict, char *key, char *value, char **op_errstr);
+ */
+
+/* Each translator should define this structure */
+/* XXX: This structure is in use by GD2, and SHOULD NOT be modified.
+ * If there is a need to add new members, add them to the end of the structure.
+ * If the struct must be modified, GD2 MUST be updated as well
+ */
+typedef struct volume_options {
+ char *key[ZR_VOLUME_MAX_NUM_KEY];
+ /* different key, same meaning */
+ volume_option_type_t type;
+ double min; /* 0 means no range */
+ double max; /* 0 means no range */
+ char *value[ZR_OPTION_MAX_ARRAY_SIZE];
+ /* If specified, will check for one of
+ the value from this array */
+ char *default_value;
+ char *description; /* about the key */
+ /* Required for int options where only the min value
+ * is given and is 0. This will cause validation not to
+ * happen
+ */
+ opt_validate_type_t validate;
+
+ /* The op-version at which this option was introduced.
+ * This is an array to support options that get backported to supported
+ * releases.
+ * Normally, an option introduced for a major release just has a single
+ * entry in the array, with op-version of the major release
+ * For an option that is backported, the op-versions of the all the
+ * releases it was ported to should be added, starting from the newest,
+ * to the oldest.
+ */
+ uint32_t op_version[GF_MAX_RELEASES];
+ /* The op-version at which this option was deprecated.
+ * Follows the same rules as above.
+ */
+ uint32_t deprecated[GF_MAX_RELEASES];
+ /* Additional flags for an option
+ * Check the OPT_FLAG_* enums for available flags
+ */
+ uint32_t flags;
+ /* Tags applicable to this option, which can be used to group similar
+ * options
+ */
+ char *tags[ZR_OPTION_MAX_ARRAY_SIZE];
+ /* A custom validation function if required
+ * TODO: See todo above for option_validation_fn
+ */
+ /* option_validation_fn validate_fn; */
+ /* This is actual key that should be set in the options dict. Can
+ * contain varstrings
+ */
+ char *setkey;
+
+ /* A 'level' is about the technical depth / understanding one
+ needs to handle the option. 'category' is based on
+ quality (ie, tests, people behind it, documentation available) */
+
+ /* The level at which the option is classified */
+ opt_level_t level;
+
+ /* Flag to understand how this option is categorized */
+ gf_category_t category;
+} volume_option_t;
+
+typedef struct vol_opt_list {
+ struct list_head list;
+ volume_option_t *given_opt;
+} volume_opt_list_t;
+
+int
+xlator_tree_reconfigure(xlator_t *old_xl, xlator_t *new_xl);
+int
+xlator_validate_rec(xlator_t *xlator, char **op_errstr);
+int
+graph_reconf_validateopt(glusterfs_graph_t *graph, char **op_errstr);
+int
+xlator_option_info_list(volume_opt_list_t *list, char *key, char **def_val,
+ char **descr);
+/*
+int validate_xlator_volume_options (xlator_t *xl, dict_t *options,
+ volume_option_t *opt, char **op_errstr);
+*/
+int
+xlator_options_validate_list(xlator_t *xl, dict_t *options,
+ volume_opt_list_t *list, char **op_errstr);
+int
+xlator_option_validate(xlator_t *xl, char *key, char *value,
+ volume_option_t *opt, char **op_errstr);
+int
+xlator_options_validate(xlator_t *xl, dict_t *options, char **errstr);
+
+int
+xlator_option_validate_addr_list(xlator_t *xl, const char *key,
+ const char *value, volume_option_t *opt,
+ char **op_errstr);
+
+volume_option_t *
+xlator_volume_option_get(xlator_t *xl, const char *key);
+
+volume_option_t *
+xlator_volume_option_get_list(volume_opt_list_t *vol_list, const char *key);
+
+#define DECLARE_INIT_OPT(type_t, type) \
+ int xlator_option_init_##type(xlator_t *this, dict_t *options, char *key, \
+ type_t *val_p);
+
+DECLARE_INIT_OPT(char *, str);
+DECLARE_INIT_OPT(uint64_t, uint64);
+DECLARE_INIT_OPT(int64_t, int64);
+DECLARE_INIT_OPT(uint32_t, uint32);
+DECLARE_INIT_OPT(int32_t, int32);
+DECLARE_INIT_OPT(uint64_t, size);
+DECLARE_INIT_OPT(uint64_t, size_uint64);
+DECLARE_INIT_OPT(double, percent);
+DECLARE_INIT_OPT(double, percent_or_size);
+DECLARE_INIT_OPT(gf_boolean_t, bool);
+DECLARE_INIT_OPT(xlator_t *, xlator);
+DECLARE_INIT_OPT(char *, path);
+DECLARE_INIT_OPT(double, double);
+DECLARE_INIT_OPT(uint32_t, time);
+
+#define DEFINE_INIT_OPT(type_t, type, conv) \
+ int xlator_option_init_##type(xlator_t *this, dict_t *options, char *key, \
+ type_t *val_p) \
+ { \
+ int ret = 0; \
+ volume_option_t *opt = NULL; \
+ char *def_value = NULL; \
+ char *set_value = NULL; \
+ char *value = NULL; \
+ xlator_t *old_THIS = NULL; \
+ \
+ opt = xlator_volume_option_get(this, key); \
+ if (!opt) { \
+ gf_msg(this->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ENTRY, \
+ "unknown option: %s", key); \
+ ret = -1; \
+ return ret; \
+ } \
+ def_value = opt->default_value; \
+ ret = dict_get_str(options, key, &set_value); \
+ \
+ if (def_value) \
+ value = def_value; \
+ if (set_value) \
+ value = set_value; \
+ if (!value) { \
+ gf_msg_trace(this->name, 0, "option %s not set", key); \
+ *val_p = (type_t)0; \
+ return 0; \
+ } \
+ if (value == def_value) { \
+ gf_msg_trace(this->name, 0, \
+ "option %s using default" \
+ " value %s", \
+ key, value); \
+ } else { \
+ gf_msg_debug(this->name, 0, \
+ "option %s using set" \
+ " value %s", \
+ key, value); \
+ } \
+ old_THIS = THIS; \
+ THIS = this; \
+ ret = conv(value, val_p); \
+ THIS = old_THIS; \
+ if (ret) { \
+ gf_msg(this->name, GF_LOG_INFO, 0, LG_MSG_CONVERSION_FAILED, \
+ "option %s conversion failed value %s", key, value); \
+ return ret; \
+ } \
+ ret = xlator_option_validate(this, key, value, opt, NULL); \
+ return ret; \
+ }
+
+#define GF_OPTION_INIT(key, val, type, err_label) \
+ do { \
+ int val_ret = 0; \
+ val_ret = xlator_option_init_##type(THIS, THIS->options, key, &(val)); \
+ if (val_ret) \
+ goto err_label; \
+ } while (0)
+
+#define DECLARE_RECONF_OPT(type_t, type) \
+ int xlator_option_reconf_##type(xlator_t *this, dict_t *options, \
+ char *key, int keylen, type_t *val_p);
+
+DECLARE_RECONF_OPT(char *, str);
+DECLARE_RECONF_OPT(uint64_t, uint64);
+DECLARE_RECONF_OPT(int64_t, int64);
+DECLARE_RECONF_OPT(uint32_t, uint32);
+DECLARE_RECONF_OPT(int32_t, int32);
+DECLARE_RECONF_OPT(uint64_t, size);
+DECLARE_RECONF_OPT(uint64_t, size_uint64);
+DECLARE_RECONF_OPT(double, percent);
+DECLARE_RECONF_OPT(double, percent_or_size);
+DECLARE_RECONF_OPT(gf_boolean_t, bool);
+DECLARE_RECONF_OPT(xlator_t *, xlator);
+DECLARE_RECONF_OPT(char *, path);
+DECLARE_RECONF_OPT(double, double);
+DECLARE_RECONF_OPT(uint32_t, time);
+
+#define DEFINE_RECONF_OPT(type_t, type, conv) \
+ int xlator_option_reconf_##type(xlator_t *this, dict_t *options, \
+ char *key, int keylen, type_t *val_p) \
+ { \
+ int ret = 0; \
+ char *value = NULL; \
+ xlator_t *old_THIS = NULL; \
+ \
+ volume_option_t *opt = xlator_volume_option_get(this, key); \
+ if (!opt) { \
+ gf_msg(this->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ENTRY, \
+ "unknown option: %s", key); \
+ return -1; \
+ } \
+ ret = dict_get_strn(options, key, keylen, &value); \
+ if (ret == 0 && value) { \
+ gf_msg(this->name, GF_LOG_INFO, 0, 0, \
+ "option %s using set value %s", key, value); \
+ } else if (opt->default_value) { \
+ value = opt->default_value; \
+ gf_msg_trace(this->name, 0, "option %s using default value %s", \
+ key, value); \
+ } else { \
+ gf_msg_trace(this->name, 0, "option %s not set", key); \
+ *val_p = (type_t)0; \
+ return 0; \
+ } \
+ \
+ old_THIS = THIS; \
+ THIS = this; \
+ ret = conv(value, val_p); \
+ THIS = old_THIS; \
+ if (ret) \
+ return ret; \
+ return xlator_option_validate(this, key, value, opt, NULL); \
+ }
+
+#define GF_OPTION_RECONF(key, val, opt, type, err_label) \
+ do { \
+ if (xlator_option_reconf_##type(THIS, opt, key, SLEN(key), &(val))) \
+ goto err_label; \
+ } while (0)
+
+#endif /* !_OPTIONS_H */
diff --git a/libglusterfs/src/glusterfs/parse-utils.h b/libglusterfs/src/glusterfs/parse-utils.h
new file mode 100644
index 00000000000..8653b9dd180
--- /dev/null
+++ b/libglusterfs/src/glusterfs/parse-utils.h
@@ -0,0 +1,50 @@
+/*
+ Copyright 2014-present Facebook. All Rights Reserved
+
+ This file is part of GlusterFS.
+
+ Author :
+ Shreyas Siravara <shreyas.siravara@gmail.com>
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _PARSE_UTILS_H
+#define _PARSE_UTILS_H
+
+#include <regex.h>
+
+#define GF_PARSE "parse-utils"
+
+struct parser {
+ regex_t preg; /* Compiled regex */
+ regmatch_t pmatch[1]; /* The match */
+ char *complete_str; /* The string we are parsing */
+ char *regex; /* Regex used to parse the string */
+ char *_rstr; /* Temp string to hold offsets */
+};
+
+/* Initializes some of the parsers variables */
+struct parser *
+parser_init(const char *regex);
+
+/* Sets the string to parse */
+int
+parser_set_string(struct parser *parser, const char *complete_str);
+
+/* Frees memory used by the string after all matches are found */
+int
+parser_unset_string(struct parser *parser);
+
+/* Free memory used by the parser */
+void
+parser_deinit(struct parser *ptr);
+
+/* Get the next matching string */
+char *
+parser_get_next_match(struct parser *parser);
+
+#endif /* _PARSE_UTILS_H */
diff --git a/libglusterfs/src/glusterfs/quota-common-utils.h b/libglusterfs/src/glusterfs/quota-common-utils.h
new file mode 100644
index 00000000000..0096e340756
--- /dev/null
+++ b/libglusterfs/src/glusterfs/quota-common-utils.h
@@ -0,0 +1,68 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _QUOTA_COMMON_UTILS_H
+#define _QUOTA_COMMON_UTILS_H
+
+#include "glusterfs/iatt.h"
+
+#define GF_QUOTA_CONF_VERSION 1.2
+#define QUOTA_CONF_HEADER "GlusterFS Quota conf | version: v1.2\n"
+#define QUOTA_CONF_HEADER_1_1 "GlusterFS Quota conf | version: v1.1\n"
+
+typedef enum {
+ GF_QUOTA_CONF_TYPE_USAGE = 1,
+ GF_QUOTA_CONF_TYPE_OBJECTS
+} gf_quota_conf_type_t;
+
+struct _quota_limits {
+ int64_t hl;
+ int64_t sl;
+} __attribute__((__packed__));
+typedef struct _quota_limits quota_limits_t;
+
+struct _quota_meta {
+ int64_t size;
+ int64_t file_count;
+ int64_t dir_count;
+} __attribute__((__packed__));
+typedef struct _quota_meta quota_meta_t;
+
+gf_boolean_t
+quota_meta_is_null(const quota_meta_t *meta);
+
+int32_t
+quota_data_to_meta(data_t *data, quota_meta_t *meta);
+
+int32_t
+quota_dict_get_inode_meta(dict_t *dict, char *key, const int keylen,
+ quota_meta_t *meta);
+
+int32_t
+quota_dict_get_meta(dict_t *dict, char *key, const int keylen,
+ quota_meta_t *meta);
+
+int32_t
+quota_dict_set_meta(dict_t *dict, char *key, const quota_meta_t *meta,
+ ia_type_t ia_type);
+
+int32_t
+quota_conf_read_header(int fd, char *buf);
+
+int32_t
+quota_conf_read_version(int fd, float *version);
+
+int32_t
+quota_conf_read_gfid(int fd, void *buf, char *type, float version);
+
+int32_t
+quota_conf_skip_header(int fd);
+
+#endif /* _QUOTA_COMMON_UTILS_H */
diff --git a/libglusterfs/src/glusterfs/rbthash.h b/libglusterfs/src/glusterfs/rbthash.h
new file mode 100644
index 00000000000..4c731de69c2
--- /dev/null
+++ b/libglusterfs/src/glusterfs/rbthash.h
@@ -0,0 +1,75 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __RBTHASH_TABLE_H_
+#define __RBTHASH_TABLE_H_
+
+#include <stdint.h> // for uint32_t
+#include "glusterfs/glusterfs.h" // for gf_boolean_t, glusterfs_ctx_t
+#include "glusterfs/list.h" // for list_head
+#include "glusterfs/locking.h" // for gf_lock_t
+struct mem_pool;
+
+#define GF_RBTHASH_MEMPOOL 16384 // 1048576
+#define GF_RBTHASH "rbthash"
+
+struct rbthash_bucket {
+ struct rb_table *bucket;
+ gf_lock_t bucketlock;
+};
+
+typedef struct rbthash_entry {
+ void *data;
+ void *key;
+ int keylen;
+ uint32_t keyhash;
+ struct list_head list;
+} rbthash_entry_t;
+
+typedef uint32_t (*rbt_hasher_t)(void *data, int len);
+typedef void (*rbt_data_destroyer_t)(void *data);
+typedef void (*rbt_traverse_t)(void *data, void *mydata);
+
+typedef struct rbthash_table {
+ int size;
+ int numbuckets;
+ struct mem_pool *entrypool;
+ gf_lock_t tablelock;
+ struct rbthash_bucket *buckets;
+ rbt_hasher_t hashfunc;
+ rbt_data_destroyer_t dfunc;
+ gf_boolean_t pool_alloced;
+ struct list_head list;
+} rbthash_table_t;
+
+extern rbthash_table_t *
+rbthash_table_init(glusterfs_ctx_t *ctx, int buckets, rbt_hasher_t hfunc,
+ rbt_data_destroyer_t dfunc, unsigned long expected_entries,
+ struct mem_pool *entrypool);
+
+extern int
+rbthash_insert(rbthash_table_t *tbl, void *data, void *key, int keylen);
+
+extern void *
+rbthash_get(rbthash_table_t *tbl, void *key, int keylen);
+
+extern void *
+rbthash_remove(rbthash_table_t *tbl, void *key, int keylen);
+
+extern void *
+rbthash_replace(rbthash_table_t *tbl, void *key, int keylen, void *newdata);
+
+extern void
+rbthash_table_destroy(rbthash_table_t *tbl);
+
+extern void
+rbthash_table_traverse(rbthash_table_t *tbl, rbt_traverse_t traverse,
+ void *mydata);
+#endif
diff --git a/libglusterfs/src/glusterfs/refcount.h b/libglusterfs/src/glusterfs/refcount.h
new file mode 100644
index 00000000000..cf922dabb05
--- /dev/null
+++ b/libglusterfs/src/glusterfs/refcount.h
@@ -0,0 +1,101 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _REFCOUNT_H
+#define _REFCOUNT_H
+
+/* check for compiler support for __sync_*_and_fetch()
+ *
+ * a more comprehensive feature test is shown at
+ * http://lists.iptel.org/pipermail/semsdev/2010-October/005075.html
+ * this is sufficient for RHEL5 i386 builds
+ */
+#if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) && \
+ !defined(__i386__)
+#undef REFCOUNT_NEEDS_LOCK
+#else
+#define REFCOUNT_NEEDS_LOCK
+#include "glusterfs/locking.h"
+#endif /* compiler support for __sync_*_and_fetch() */
+
+typedef void (*gf_ref_release_t)(void *data);
+
+struct _gf_ref {
+#ifdef REFCOUNT_NEEDS_LOCK
+ gf_lock_t lk; /* lock for atomically adjust cnt */
+#endif
+ unsigned int cnt; /* number of users, free on 0 */
+
+ gf_ref_release_t release; /* cleanup when cnt == 0 */
+ void *data; /* parameter passed to release() */
+};
+typedef struct _gf_ref gf_ref_t;
+
+/* _gf_ref_get -- increase the refcount
+ *
+ * @return: greater then 0 when a reference was taken, 0 when not
+ */
+void *
+_gf_ref_get(gf_ref_t *ref);
+
+/* _gf_ref_put -- decrease the refcount
+ *
+ * @return: greater then 0 when there are still references, 0 when cleanup
+ * should be done, gf_ref_release_t is called on cleanup
+ */
+unsigned int
+_gf_ref_put(gf_ref_t *ref);
+
+/* _gf_ref_init -- initialize an embedded refcount object
+ *
+ * @release: function to call when the refcount == 0
+ * @data: parameter to be passed to @release
+ */
+void
+_gf_ref_init(gf_ref_t *ref, gf_ref_release_t release, void *data);
+
+/*
+ * Strong suggestion to use the simplified GF_REF_* API.
+ */
+
+/* GF_REF_DECL -- declaration to put inside your structure
+ *
+ * Example:
+ * struct my_struct {
+ * GF_REF_DECL;
+ *
+ * ... // additional members
+ * };
+ */
+#define GF_REF_DECL gf_ref_t _ref
+
+/* GF_REF_INIT -- initialize a GF_REF_DECL structure
+ *
+ * @p: allocated structure with GF_REF_DECL
+ * @d: destructor to call once refcounting reaches 0
+ *
+ * Sets the refcount to 1.
+ */
+#define GF_REF_INIT(p, d) _gf_ref_init(&(p)->_ref, (gf_ref_release_t)d, p)
+
+/* GF_REF_GET -- increase the refcount of a GF_REF_DECL structure
+ *
+ * @return: greater then 0 when a reference was taken, 0 when not
+ */
+#define GF_REF_GET(p) _gf_ref_get(&(p)->_ref)
+
+/* GF_REF_PUT -- decrease the refcount of a GF_REF_DECL structure
+ *
+ * @return: greater then 0 when there are still references, 0 when cleanup
+ * should be done, gf_ref_release_t is called on cleanup
+ */
+#define GF_REF_PUT(p) _gf_ref_put(&(p)->_ref)
+
+#endif /* _REFCOUNT_H */
diff --git a/libglusterfs/src/glusterfs/revision.h b/libglusterfs/src/glusterfs/revision.h
new file mode 100644
index 00000000000..3c404d30e78
--- /dev/null
+++ b/libglusterfs/src/glusterfs/revision.h
@@ -0,0 +1 @@
+#define GLUSTERFS_REPOSITORY_REVISION "git://git.gluster.org/glusterfs.git"
diff --git a/libglusterfs/src/glusterfs/rot-buffs.h b/libglusterfs/src/glusterfs/rot-buffs.h
new file mode 100644
index 00000000000..9dc227d58b8
--- /dev/null
+++ b/libglusterfs/src/glusterfs/rot-buffs.h
@@ -0,0 +1,125 @@
+/*
+ Copyright (c) 2008-2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __ROT_BUFFS_H
+#define __ROT_BUFFS_H
+
+#include "glusterfs/list.h"
+#include "glusterfs/locking.h"
+#include "glusterfs/common-utils.h"
+
+typedef struct rbuf_iovec {
+ struct iovec iov;
+
+ struct list_head list;
+} rbuf_iovec_t;
+
+#define RBUF_IOVEC_SIZE (sizeof(rbuf_iovec_t))
+
+typedef struct rbuf_list {
+ gf_lock_t c_lock;
+
+ pthread_mutex_t b_lock; /* protects this structure */
+ pthread_cond_t b_cond; /* signal for writer completion */
+
+ gf_boolean_t awaiting;
+
+ unsigned long long pending; /* pending writers */
+ unsigned long long completed; /* completed writers */
+
+ rbuf_iovec_t *rvec; /* currently used IO vector */
+
+ struct list_head veclist; /* list of attached rbuf_iov */
+
+ unsigned long long used; /* consumable entries
+ attached in ->veclist */
+ unsigned long long total; /* total entries in ->veclist (used
+ during deallocation) */
+
+ unsigned long seq[2]; /* if interested, this whould store
+ the start sequence number and the
+ range */
+
+ struct list_head list; /* attachment to rbuf_t */
+} rbuf_list_t;
+
+struct rlist_iter {
+ struct list_head veclist;
+
+ unsigned long long iter;
+};
+
+#define RLIST_ENTRY_COUNT(rlist) rlist->used
+
+#define rlist_iter_init(riter, rlist) \
+ do { \
+ (riter)->iter = rlist->used; \
+ (riter)->veclist = rlist->veclist; \
+ } while (0)
+
+#define rvec_for_each_entry(pos, riter) \
+ for (pos = list_entry((riter)->veclist.next, typeof(*pos), list); \
+ (riter)->iter > 0; \
+ pos = list_entry(pos->list.next, typeof(*pos), list), \
+ --((riter)->iter))
+
+/**
+ * Sequence number assignment routine is called during buffer
+ * switch under rbuff ->lock.
+ */
+typedef void(sequence_fn)(rbuf_list_t *, void *);
+
+#define RLIST_STORE_SEQ(rlist, start, range) \
+ do { \
+ rlist->seq[0] = start; \
+ rlist->seq[1] = range; \
+ } while (0)
+
+#define RLIST_GET_SEQ(rlist, start, range) \
+ do { \
+ start = rlist->seq[0]; \
+ range = rlist->seq[1]; \
+ } while (0)
+
+typedef struct rbuf {
+ gf_lock_t lock; /* protects "current" rlist */
+
+ rbuf_list_t *current; /* cached pointer to first free rlist */
+
+ struct list_head freelist;
+} rbuf_t;
+
+typedef enum {
+ RBUF_CONSUMABLE = 1,
+ RBUF_BUSY,
+ RBUF_EMPTY,
+ RBUF_WOULD_STARVE,
+} rlist_retval_t;
+
+/* Initialization/Destruction */
+rbuf_t *
+rbuf_init(int);
+void
+rbuf_dtor(rbuf_t *);
+
+/* Producer API */
+char *
+rbuf_reserve_write_area(rbuf_t *, size_t, void **);
+int
+rbuf_write_complete(void *);
+
+/* Consumer API */
+int
+rbuf_get_buffer(rbuf_t *, void **, sequence_fn *, void *);
+int
+rbuf_wait_for_completion(rbuf_t *, void *, void (*)(rbuf_list_t *, void *),
+ void *);
+
+#endif
diff --git a/libglusterfs/src/run.h b/libglusterfs/src/glusterfs/run.h
index 508c59c1375..76af95fd27f 100644
--- a/libglusterfs/src/run.h
+++ b/libglusterfs/src/glusterfs/run.h
@@ -14,12 +14,12 @@
#define RUN_PIPE -1
struct runner {
- char **argv;
- unsigned argvlen;
- int runerr;
- pid_t chpid;
- int chfd[3];
- FILE *chio[3];
+ char **argv;
+ unsigned argvlen;
+ int runerr;
+ pid_t chpid;
+ int chfd[3];
+ FILE *chio[3];
};
typedef struct runner runner_t;
@@ -29,7 +29,8 @@ typedef struct runner runner_t;
*
* @param runner pointer to runner_t instance
*/
-void runinit (runner_t *runner);
+void
+runinit(runner_t *runner);
/**
* get FILE pointer to which child's stdio is redirected.
@@ -40,7 +41,8 @@ void runinit (runner_t *runner);
*
* @see runner_redir()
*/
-FILE *runner_chio (runner_t *runner, int fd);
+FILE *
+runner_chio(runner_t *runner, int fd);
/**
* add an argument.
@@ -52,7 +54,8 @@ FILE *runner_chio (runner_t *runner, int fd);
* @param runner pointer to runner_t instance
* @param arg command line argument
*/
-void runner_add_arg (runner_t *runner, const char *arg);
+void
+runner_add_arg(runner_t *runner, const char *arg);
/**
* add a sequence of arguments.
@@ -66,7 +69,8 @@ void runner_add_arg (runner_t *runner, const char *arg);
*
* @see runner_add_arg()
*/
-void runner_add_args (runner_t *runner, ...);
+void
+runner_add_args(runner_t *runner, ...);
/**
* add an argument with printf style formatting.
@@ -76,8 +80,9 @@ void runner_add_args (runner_t *runner, ...);
* @param runner pointer to runner_t instance
* @param format printf style format specifier
*/
-void runner_argprintf (runner_t *runner, const char *format, ...);
-
+void
+runner_argprintf(runner_t *runner, const char *format, ...)
+ __attribute__((__format__(__printf__, 2, 3)));
/**
* log a message about the command to be run.
*
@@ -89,8 +94,9 @@ void runner_argprintf (runner_t *runner, const char *format, ...);
*
* @see gf_log()
*/
-void runner_log (runner_t *runner, const char *dom, gf_loglevel_t lvl,
- const char *msg);
+void
+runner_log(runner_t *runner, const char *dom, gf_loglevel_t lvl,
+ const char *msg);
/**
* set up redirection for child.
@@ -111,19 +117,20 @@ void runner_log (runner_t *runner, const char *dom, gf_loglevel_t lvl,
* @see runner_start(), dup(2), runner_chio(), runner_start()
*/
void
-runner_redir (runner_t *runner, int fd, int tgt_fd);
+runner_redir(runner_t *runner, int fd, int tgt_fd);
/**
* spawn child with accumulated arg list.
*
* @param runner pointer to runner_t instance
*
- * @return 0 on succesful spawn
+ * @return 0 on successful spawn
* -1 on failure (either due to earlier errors or execve(2) failing)
*
* @see runner_cout()
*/
-int runner_start (runner_t *runner);
+int
+runner_start(runner_t *runner);
/**
* complete operation and free resources.
@@ -140,7 +147,8 @@ int runner_start (runner_t *runner);
*
* @see waitpid(2)
*/
-int runner_end (runner_t *runner);
+int
+runner_end(runner_t *runner);
/**
* variant of runner_end() which does not free internal data
@@ -148,7 +156,8 @@ int runner_end (runner_t *runner);
*
* @see runner_end()
*/
-int runner_end_reuse (runner_t *runner);
+int
+runner_end_reuse(runner_t *runner);
/**
* spawn and child, take it to completion and free resources.
@@ -163,7 +172,15 @@ int runner_end_reuse (runner_t *runner);
*
* @see runner_start(), runner_end()
*/
-int runner_run (runner_t *runner);
+int
+runner_run(runner_t *runner);
+
+/**
+ * variant for runner_run() which does not wait for acknowledgement
+ * from child, and always assumes it succeeds.
+ */
+int
+runner_run_nowait(runner_t *runner);
/**
* variant of runner_run() which does not free internal data
@@ -171,7 +188,8 @@ int runner_run (runner_t *runner);
*
* @see runner_run()
*/
-int runner_run_reuse (runner_t *runner);
+int
+runner_run_reuse(runner_t *runner);
/**
* run a command with args.
@@ -183,6 +201,7 @@ int runner_run_reuse (runner_t *runner);
* @return 0 on success
* -1 on failure
*/
-int runcmd (const char *arg, ...);
+int
+runcmd(const char *arg, ...);
#endif
diff --git a/libglusterfs/src/glusterfs/stack.h b/libglusterfs/src/glusterfs/stack.h
new file mode 100644
index 00000000000..536a330d38b
--- /dev/null
+++ b/libglusterfs/src/glusterfs/stack.h
@@ -0,0 +1,555 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/*
+ This file defines MACROS and static inlines used to emulate a function
+ call over asynchronous communication with remote server
+*/
+
+#ifndef _STACK_H
+#define _STACK_H
+
+struct _call_stack;
+typedef struct _call_stack call_stack_t;
+struct _call_frame;
+typedef struct _call_frame call_frame_t;
+struct call_pool;
+typedef struct call_pool call_pool_t;
+
+#include <sys/time.h>
+
+#include "glusterfs/xlator.h"
+#include "glusterfs/dict.h"
+#include "glusterfs/list.h"
+#include "glusterfs/common-utils.h"
+#include "glusterfs/lkowner.h"
+#include "glusterfs/client_t.h"
+#include "glusterfs/libglusterfs-messages.h"
+#include "glusterfs/timespec.h"
+
+#define NFS_PID 1
+#define LOW_PRIO_PROC_PID -1
+
+#define STACK_ERR_XL_NAME(stack) (stack->err_xl ? stack->err_xl->name : "-")
+#define STACK_CLIENT_NAME(stack) \
+ (stack->client ? stack->client->client_uid : "-")
+
+typedef int32_t (*ret_fn_t)(call_frame_t *frame, call_frame_t *prev_frame,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ ...);
+
+void
+gf_frame_latency_update(call_frame_t *frame);
+
+struct call_pool {
+ union {
+ struct list_head all_frames;
+ struct {
+ call_stack_t *next_call;
+ call_stack_t *prev_call;
+ } all_stacks;
+ };
+ int64_t cnt;
+ gf_atomic_t total_count;
+ gf_lock_t lock;
+ struct mem_pool *frame_mem_pool;
+ struct mem_pool *stack_mem_pool;
+};
+
+struct _call_frame {
+ call_stack_t *root; /* stack root */
+ call_frame_t *parent; /* previous BP */
+ struct list_head frames;
+ void *local; /* local variables */
+ xlator_t *this; /* implicit object */
+ ret_fn_t ret; /* op_return address */
+ int32_t ref_count;
+ gf_lock_t lock;
+ void *cookie; /* unique cookie */
+ gf_boolean_t complete;
+
+ glusterfs_fop_t op;
+ struct timespec begin; /* when this frame was created */
+ struct timespec end; /* when this frame completed */
+ const char *wind_from;
+ const char *wind_to;
+ const char *unwind_from;
+ const char *unwind_to;
+};
+
+struct _ns_info {
+ uint32_t hash; /* Hash of the namespace from SuperFastHash */
+ gf_boolean_t found; /* Set to true if we found a namespace */
+};
+
+typedef struct _ns_info ns_info_t;
+
+#define SMALL_GROUP_COUNT 128
+
+struct _call_stack {
+ union {
+ struct list_head all_frames;
+ struct {
+ call_stack_t *next_call;
+ call_stack_t *prev_call;
+ };
+ };
+ call_pool_t *pool;
+ gf_lock_t stack_lock;
+ client_t *client;
+ uint64_t unique;
+ void *state; /* pointer to request state */
+ uid_t uid;
+ gid_t gid;
+ pid_t pid;
+ char identifier[UNIX_PATH_MAX];
+ uint16_t ngrps;
+ uint32_t groups_small[SMALL_GROUP_COUNT];
+ uint32_t *groups_large;
+ uint32_t *groups;
+ gf_lkowner_t lk_owner;
+ glusterfs_ctx_t *ctx;
+
+ struct list_head myframes; /* List of call_frame_t that go
+ to make the call stack */
+
+ int32_t op;
+ int8_t type;
+ struct timespec tv;
+ xlator_t *err_xl;
+ int32_t error;
+
+ uint32_t flags; /* use it wisely, think of it as a mechanism to
+ send information over the wire too */
+ struct timespec ctime; /* timestamp, most probably set at
+ creation of stack. */
+
+ ns_info_t ns_info;
+};
+
+/* call_stack flags field users */
+#define MDATA_CTIME (1 << 0)
+#define MDATA_MTIME (1 << 1)
+#define MDATA_ATIME (1 << 2)
+#define MDATA_PAR_CTIME (1 << 3)
+#define MDATA_PAR_MTIME (1 << 4)
+#define MDATA_PAR_ATIME (1 << 5)
+
+#define frame_set_uid_gid(frm, u, g) \
+ do { \
+ if (frm) { \
+ (frm)->root->uid = u; \
+ (frm)->root->gid = g; \
+ (frm)->root->ngrps = 0; \
+ } \
+ } while (0);
+
+struct xlator_fops;
+
+static inline void
+FRAME_DESTROY(call_frame_t *frame)
+{
+ void *local = NULL;
+
+ if (frame->root->ctx->measure_latency)
+ gf_frame_latency_update(frame);
+
+ list_del_init(&frame->frames);
+ if (frame->local) {
+ local = frame->local;
+ frame->local = NULL;
+ }
+
+ LOCK_DESTROY(&frame->lock);
+ mem_put(frame);
+
+ if (local)
+ mem_put(local);
+}
+
+static inline void
+STACK_DESTROY(call_stack_t *stack)
+{
+ call_frame_t *frame = NULL;
+ call_frame_t *tmp = NULL;
+
+ LOCK(&stack->pool->lock);
+ {
+ list_del_init(&stack->all_frames);
+ stack->pool->cnt--;
+ }
+ UNLOCK(&stack->pool->lock);
+
+ LOCK_DESTROY(&stack->stack_lock);
+
+ list_for_each_entry_safe(frame, tmp, &stack->myframes, frames)
+ {
+ FRAME_DESTROY(frame);
+ }
+
+ GF_FREE(stack->groups_large);
+
+ mem_put(stack);
+}
+
+static inline void
+STACK_RESET(call_stack_t *stack)
+{
+ call_frame_t *frame = NULL;
+ call_frame_t *tmp = NULL;
+ call_frame_t *last = NULL;
+ struct list_head toreset = {0};
+
+ INIT_LIST_HEAD(&toreset);
+
+ /* We acquire call_pool->lock only to remove the frames from this stack
+ * to preserve atomicity. This synchronizes across concurrent requests
+ * like statedump, STACK_DESTROY etc. */
+
+ LOCK(&stack->pool->lock);
+ {
+ last = list_last_entry(&stack->myframes, call_frame_t, frames);
+ list_del_init(&last->frames);
+ list_splice_init(&stack->myframes, &toreset);
+ list_add(&last->frames, &stack->myframes);
+ }
+ UNLOCK(&stack->pool->lock);
+
+ list_for_each_entry_safe(frame, tmp, &toreset, frames)
+ {
+ FRAME_DESTROY(frame);
+ }
+}
+
+#define FRAME_SU_DO(frm, local_type) \
+ do { \
+ local_type *__local = (frm)->local; \
+ __local->uid = frm->root->uid; \
+ __local->gid = frm->root->gid; \
+ __local->pid = frm->root->pid; \
+ frm->root->uid = 0; \
+ frm->root->gid = 0; \
+ frm->root->pid = GF_CLIENT_PID_NO_ROOT_SQUASH; \
+ } while (0);
+
+#define FRAME_SU_UNDO(frm, local_type) \
+ do { \
+ local_type *__local = (frm)->local; \
+ frm->root->uid = __local->uid; \
+ frm->root->gid = __local->gid; \
+ frm->root->pid = __local->pid; \
+ } while (0);
+
+/* NOTE: make sure to keep this as an macro, mainly because, we need 'fn'
+ field here to be the proper fn ptr, so its address is valid entry in
+ 'xlator_fops' struct.
+ To understand this, check the `xlator.h:struct xlator_fops`, and then
+ see a STACK_WIND call, which generally calls `subvol->fops->fop`, so
+ the address offset should give the index */
+
+/* +1 is required as 0 means NULL fop, and we don't have a variable for it */
+#define get_fop_index_from_fn(xl, fn) \
+ (1 + (((long)&(fn) - (long)&((xl)->fops->stat)) / sizeof(void *)))
+
+/* NOTE: the above reason holds good here too. But notice that we are getting
+ the base address of the 'stat' fop, which is the first entry in the fop
+ structure. All we need to do is move as much as 'idx' fields, and get the
+ actual pointer from that field. */
+
+static inline void *
+get_the_pt_fop(void *base_fop, int fop_idx)
+{
+ void *target_addr = (base_fop + ((fop_idx - 1) * sizeof(void *)));
+ /* all below type casting is for not getting warning. */
+ return (void *)*(unsigned long *)target_addr;
+}
+
+/* make a call without switching frames */
+#define STACK_WIND_TAIL(frame, obj, fn, params...) \
+ do { \
+ xlator_t *old_THIS = NULL; \
+ xlator_t *next_xl = obj; \
+ typeof(fn) next_xl_fn = fn; \
+ int opn = get_fop_index_from_fn((next_xl), (fn)); \
+ \
+ frame->this = next_xl; \
+ frame->wind_to = #fn; \
+ old_THIS = THIS; \
+ THIS = next_xl; \
+ gf_msg_trace("stack-trace", 0, \
+ "stack-address: %p, " \
+ "winding from %s to %s", \
+ frame->root, old_THIS->name, THIS->name); \
+ /* Need to capture counts at leaf node */ \
+ if (!next_xl->pass_through && !next_xl->children) { \
+ GF_ATOMIC_INC(next_xl->stats.total.metrics[opn].fop); \
+ GF_ATOMIC_INC(next_xl->stats.interval.metrics[opn].fop); \
+ GF_ATOMIC_INC(next_xl->stats.total.count); \
+ GF_ATOMIC_INC(next_xl->stats.interval.count); \
+ } \
+ \
+ if (next_xl->pass_through) { \
+ next_xl_fn = get_the_pt_fop(&next_xl->pass_through_fops->stat, \
+ opn); \
+ } \
+ next_xl_fn(frame, next_xl, params); \
+ THIS = old_THIS; \
+ } while (0)
+
+/* make a call */
+#define STACK_WIND(frame, rfn, obj, fn, params...) \
+ STACK_WIND_COMMON(frame, rfn, 0, NULL, obj, fn, params)
+
+/* make a call with a cookie */
+#define STACK_WIND_COOKIE(frame, rfn, cky, obj, fn, params...) \
+ STACK_WIND_COMMON(frame, rfn, 1, cky, obj, fn, params)
+
+/* Cookie passed as the argument can be NULL (ptr) or 0 (int). Hence we
+ have to have a mechanism to separate out the two STACK_WIND formats.
+ Needed a common macro, as other than for cookie, all the other code
+ is common across.
+ */
+#define STACK_WIND_COMMON(frame, rfn, has_cookie, cky, obj, fn, params...) \
+ do { \
+ call_frame_t *_new = NULL; \
+ xlator_t *old_THIS = NULL; \
+ typeof(fn) next_xl_fn = fn; \
+ \
+ _new = mem_get0(frame->root->pool->frame_mem_pool); \
+ if (!_new) { \
+ break; \
+ } \
+ typeof(fn##_cbk) tmp_cbk = rfn; \
+ _new->root = frame->root; \
+ _new->this = obj; \
+ _new->ret = (ret_fn_t)tmp_cbk; \
+ _new->parent = frame; \
+ /* (void *) is required for avoiding gcc warning */ \
+ _new->cookie = ((has_cookie == 1) ? (void *)(cky) : (void *)_new); \
+ _new->wind_from = __FUNCTION__; \
+ _new->wind_to = #fn; \
+ _new->unwind_to = #rfn; \
+ LOCK_INIT(&_new->lock); \
+ LOCK(&frame->root->stack_lock); \
+ { \
+ list_add(&_new->frames, &frame->root->myframes); \
+ frame->ref_count++; \
+ } \
+ UNLOCK(&frame->root->stack_lock); \
+ fn##_cbk = rfn; \
+ old_THIS = THIS; \
+ THIS = obj; \
+ gf_msg_trace("stack-trace", 0, \
+ "stack-address: %p, " \
+ "winding from %s to %s", \
+ frame->root, old_THIS->name, THIS->name); \
+ if (obj->ctx->measure_latency) \
+ timespec_now(&_new->begin); \
+ _new->op = get_fop_index_from_fn((_new->this), (fn)); \
+ if (!obj->pass_through) { \
+ GF_ATOMIC_INC(obj->stats.total.metrics[_new->op].fop); \
+ GF_ATOMIC_INC(obj->stats.interval.metrics[_new->op].fop); \
+ GF_ATOMIC_INC(obj->stats.total.count); \
+ GF_ATOMIC_INC(obj->stats.interval.count); \
+ } else { \
+ /* we want to get to the actual fop to call */ \
+ next_xl_fn = get_the_pt_fop(&obj->pass_through_fops->stat, \
+ _new->op); \
+ } \
+ next_xl_fn(_new, obj, params); \
+ THIS = old_THIS; \
+ } while (0)
+
+#define STACK_UNWIND STACK_UNWIND_STRICT
+
+/* return from function in type-safe way */
+#define STACK_UNWIND_STRICT(fop, frame, op_ret, op_errno, params...) \
+ do { \
+ fop_##fop##_cbk_t fn = NULL; \
+ call_frame_t *_parent = NULL; \
+ xlator_t *old_THIS = NULL; \
+ \
+ if (!frame) { \
+ gf_msg("stack", GF_LOG_CRITICAL, 0, LG_MSG_FRAME_ERROR, "!frame"); \
+ break; \
+ } \
+ if ((op_ret) < 0) { \
+ gf_msg_debug("stack-trace", op_errno, \
+ "stack-address: %p, " \
+ "%s returned %d error: %s", \
+ frame->root, THIS->name, (int32_t)(op_ret), \
+ strerror(op_errno)); \
+ } else { \
+ gf_msg_trace("stack-trace", 0, \
+ "stack-address: %p, " \
+ "%s returned %d", \
+ frame->root, THIS->name, (int32_t)(op_ret)); \
+ } \
+ fn = (fop_##fop##_cbk_t)frame->ret; \
+ _parent = frame->parent; \
+ LOCK(&frame->root->stack_lock); \
+ { \
+ _parent->ref_count--; \
+ if ((op_ret) < 0 && (op_errno) != frame->root->error) { \
+ frame->root->err_xl = frame->this; \
+ frame->root->error = (op_errno); \
+ } else if ((op_ret) == 0) { \
+ frame->root->err_xl = NULL; \
+ frame->root->error = 0; \
+ } \
+ } \
+ UNLOCK(&frame->root->stack_lock); \
+ old_THIS = THIS; \
+ THIS = _parent->this; \
+ frame->complete = _gf_true; \
+ frame->unwind_from = __FUNCTION__; \
+ if (frame->this->ctx->measure_latency) { \
+ timespec_now(&frame->end); \
+ /* required for top most xlator */ \
+ if (_parent->ret == NULL) \
+ timespec_now(&_parent->end); \
+ } \
+ if (op_ret < 0) { \
+ GF_ATOMIC_INC(THIS->stats.total.metrics[frame->op].cbk); \
+ GF_ATOMIC_INC(THIS->stats.interval.metrics[frame->op].cbk); \
+ } \
+ fn(_parent, frame->cookie, _parent->this, op_ret, op_errno, params); \
+ THIS = old_THIS; \
+ } while (0)
+
+static inline int
+call_stack_alloc_groups(call_stack_t *stack, int ngrps)
+{
+ if (ngrps <= SMALL_GROUP_COUNT) {
+ stack->groups = stack->groups_small;
+ } else {
+ GF_FREE(stack->groups_large);
+ stack->groups_large = GF_CALLOC(ngrps, sizeof(gid_t),
+ gf_common_mt_groups_t);
+ if (!stack->groups_large)
+ return -1;
+ stack->groups = stack->groups_large;
+ }
+
+ stack->ngrps = ngrps;
+
+ return 0;
+}
+
+static inline int
+call_stack_groups_capacity(call_stack_t *stack)
+{
+ return max(stack->ngrps, SMALL_GROUP_COUNT);
+}
+
+static inline int
+call_frames_count(call_stack_t *call_stack)
+{
+ call_frame_t *pos;
+ int32_t count = 0;
+
+ if (!call_stack)
+ return count;
+
+ list_for_each_entry(pos, &call_stack->myframes, frames) count++;
+
+ return count;
+}
+
+static inline call_frame_t *
+copy_frame(call_frame_t *frame)
+{
+ call_stack_t *newstack = NULL;
+ call_stack_t *oldstack = NULL;
+ call_frame_t *newframe = NULL;
+
+ if (!frame) {
+ return NULL;
+ }
+
+ newstack = mem_get0(frame->root->pool->stack_mem_pool);
+ if (newstack == NULL) {
+ return NULL;
+ }
+
+ INIT_LIST_HEAD(&newstack->myframes);
+
+ newframe = mem_get0(frame->root->pool->frame_mem_pool);
+ if (!newframe) {
+ mem_put(newstack);
+ return NULL;
+ }
+
+ newframe->this = frame->this;
+ newframe->root = newstack;
+ INIT_LIST_HEAD(&newframe->frames);
+ list_add(&newframe->frames, &newstack->myframes);
+
+ oldstack = frame->root;
+
+ newstack->uid = oldstack->uid;
+ newstack->gid = oldstack->gid;
+ newstack->pid = oldstack->pid;
+ newstack->op = oldstack->op;
+ newstack->type = oldstack->type;
+ newstack->ctime = oldstack->ctime;
+ newstack->flags = oldstack->flags;
+ if (call_stack_alloc_groups(newstack, oldstack->ngrps) != 0) {
+ mem_put(newstack);
+ return NULL;
+ }
+ if (!oldstack->groups) {
+ gf_msg_debug("stack", EINVAL, "groups is null (ngrps: %d)",
+ oldstack->ngrps);
+ /* Considering 'groups' is NULL, set ngrps to 0 */
+ oldstack->ngrps = 0;
+
+ if (oldstack->groups_large)
+ oldstack->groups = oldstack->groups_large;
+ else
+ oldstack->groups = oldstack->groups_small;
+ }
+ newstack->ngrps = oldstack->ngrps;
+ memcpy(newstack->groups, oldstack->groups, sizeof(gid_t) * oldstack->ngrps);
+ newstack->unique = oldstack->unique;
+ newstack->pool = oldstack->pool;
+ newstack->lk_owner = oldstack->lk_owner;
+ newstack->ctx = oldstack->ctx;
+
+ if (newstack->ctx->measure_latency) {
+ timespec_now(&newstack->tv);
+ memcpy(&newframe->begin, &newstack->tv, sizeof(newstack->tv));
+ }
+
+ LOCK_INIT(&newframe->lock);
+ LOCK_INIT(&newstack->stack_lock);
+
+ LOCK(&oldstack->pool->lock);
+ {
+ list_add(&newstack->all_frames, &oldstack->all_frames);
+ newstack->pool->cnt++;
+ }
+ UNLOCK(&oldstack->pool->lock);
+ GF_ATOMIC_INC(newstack->pool->total_count);
+
+ return newframe;
+}
+
+void
+call_stack_set_groups(call_stack_t *stack, int ngrps, gid_t **groupbuf_p);
+void
+gf_proc_dump_pending_frames(call_pool_t *call_pool);
+void
+gf_proc_dump_pending_frames_to_dict(call_pool_t *call_pool, dict_t *dict);
+call_frame_t *
+create_frame(xlator_t *xl, call_pool_t *pool);
+gf_boolean_t
+__is_fuse_call(call_frame_t *frame);
+#endif /* _STACK_H */
diff --git a/libglusterfs/src/glusterfs/statedump.h b/libglusterfs/src/glusterfs/statedump.h
new file mode 100644
index 00000000000..ce082706bdf
--- /dev/null
+++ b/libglusterfs/src/glusterfs/statedump.h
@@ -0,0 +1,132 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef STATEDUMP_H
+#define STATEDUMP_H
+
+#include <stdarg.h>
+#include "glusterfs/inode.h"
+#include "glusterfs/strfd.h"
+
+#define GF_DUMP_MAX_BUF_LEN 4096
+
+typedef struct gf_dump_xl_options_ {
+ gf_boolean_t dump_priv;
+ gf_boolean_t dump_inode;
+ gf_boolean_t dump_fd;
+ gf_boolean_t dump_inodectx;
+ gf_boolean_t dump_fdctx;
+ gf_boolean_t dump_history;
+} gf_dump_xl_options_t;
+
+typedef struct gf_dump_options_ {
+ gf_boolean_t dump_mem;
+ gf_boolean_t dump_iobuf;
+ gf_boolean_t dump_callpool;
+ gf_dump_xl_options_t xl_options; // options for all xlators
+ char *dump_path;
+} gf_dump_options_t;
+
+extern gf_dump_options_t dump_options;
+
+__attribute__((__format__(__printf__, 3, 4))) static inline void
+_gf_proc_dump_build_key(char *key, const char *prefix, const char *fmt, ...)
+{
+ va_list ap;
+ int32_t len;
+
+ len = snprintf(key, GF_DUMP_MAX_BUF_LEN, "%s.", prefix);
+ if (len >= 0) {
+ va_start(ap, fmt);
+ len = vsnprintf(key + len, GF_DUMP_MAX_BUF_LEN - len, fmt, ap);
+ va_end(ap);
+ }
+ if (len < 0) {
+ *key = 0;
+ }
+}
+
+#define gf_proc_dump_build_key(key, key_prefix, fmt...) \
+ { \
+ _gf_proc_dump_build_key(key, key_prefix, ##fmt); \
+ }
+
+#define GF_PROC_DUMP_SET_OPTION(opt, val) opt = val
+
+#define GF_CHECK_DUMP_OPTION_ENABLED(option_dump, var, label) \
+ do { \
+ if (option_dump == _gf_true) { \
+ var = _gf_false; \
+ goto label; \
+ } \
+ } while (0);
+
+void
+gf_proc_dump_init();
+
+void
+gf_proc_dump_fini(void);
+
+void
+gf_proc_dump_cleanup(void);
+
+void
+gf_proc_dump_info(int signum, glusterfs_ctx_t *ctx);
+
+int
+gf_proc_dump_add_section(char *key, ...)
+ __attribute__((__format__(__printf__, 1, 2)));
+
+int
+gf_proc_dump_write(char *key, char *value, ...)
+ __attribute__((__format__(__printf__, 2, 3)));
+
+void
+inode_table_dump(inode_table_t *itable, char *prefix);
+
+void
+inode_table_dump_to_dict(inode_table_t *itable, char *prefix, dict_t *dict);
+
+void
+fdtable_dump(fdtable_t *fdtable, char *prefix);
+
+void
+fdtable_dump_to_dict(fdtable_t *fdtable, char *prefix, dict_t *dict);
+
+void
+inode_dump(inode_t *inode, char *prefix);
+
+void
+gf_proc_dump_mem_info_to_dict(dict_t *dict);
+
+void
+gf_proc_dump_mempool_info_to_dict(glusterfs_ctx_t *ctx, dict_t *dict);
+
+void
+glusterd_init(int sig);
+
+void
+gf_proc_dump_xlator_private(xlator_t *this, strfd_t *strfd);
+
+void
+gf_proc_dump_mallinfo(strfd_t *strfd);
+
+void
+gf_proc_dump_xlator_history(xlator_t *this, strfd_t *strfd);
+
+void
+gf_proc_dump_xlator_meminfo(xlator_t *this, strfd_t *strfd);
+
+void
+gf_proc_dump_xlator_profile(xlator_t *this, strfd_t *strfd);
+
+void
+gf_latency_statedump_and_reset(char *key, gf_latency_t *lat);
+#endif /* STATEDUMP_H */
diff --git a/libglusterfs/src/glusterfs/store.h b/libglusterfs/src/glusterfs/store.h
new file mode 100644
index 00000000000..a1f70c7b840
--- /dev/null
+++ b/libglusterfs/src/glusterfs/store.h
@@ -0,0 +1,112 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _GLUSTERD_STORE_H_
+#define _GLUSTERD_STORE_H_
+
+#include "glusterfs/compat.h"
+#include "glusterfs/glusterfs.h"
+
+struct gf_store_handle_ {
+ char *path;
+ int fd;
+ int tmp_fd;
+ FILE *read;
+ int locked; /* state of lockf() */
+};
+
+typedef struct gf_store_handle_ gf_store_handle_t;
+
+struct gf_store_iter_ {
+ FILE *file;
+ char filepath[PATH_MAX];
+};
+
+typedef struct gf_store_iter_ gf_store_iter_t;
+
+typedef enum {
+ GD_STORE_SUCCESS,
+ GD_STORE_KEY_NULL,
+ GD_STORE_VALUE_NULL,
+ GD_STORE_KEY_VALUE_NULL,
+ GD_STORE_EOF,
+ GD_STORE_ENOMEM,
+ GD_STORE_STAT_FAILED
+} gf_store_op_errno_t;
+
+int32_t
+gf_store_mkdir(char *path);
+
+int32_t
+gf_store_handle_create_on_absence(gf_store_handle_t **shandle, char *path);
+
+int32_t
+gf_store_mkstemp(gf_store_handle_t *shandle);
+
+int
+gf_store_sync_direntry(char *path);
+
+int32_t
+gf_store_rename_tmppath(gf_store_handle_t *shandle);
+
+int32_t
+gf_store_unlink_tmppath(gf_store_handle_t *shandle);
+
+int
+gf_store_read_and_tokenize(FILE *file, char **iter_key, char **iter_val,
+ gf_store_op_errno_t *store_errno);
+
+int32_t
+gf_store_retrieve_value(gf_store_handle_t *handle, char *key, char **value);
+
+int32_t
+gf_store_save_value(int fd, char *key, char *value);
+
+int32_t
+gf_store_save_items(int fd, char *items);
+
+int32_t
+gf_store_handle_new(const char *path, gf_store_handle_t **handle);
+
+int
+gf_store_handle_retrieve(char *path, gf_store_handle_t **handle);
+
+int32_t
+gf_store_handle_destroy(gf_store_handle_t *handle);
+
+int32_t
+gf_store_iter_new(gf_store_handle_t *shandle, gf_store_iter_t **iter);
+
+int32_t
+gf_store_validate_key_value(char *storepath, char *key, char *val,
+ gf_store_op_errno_t *op_errno);
+
+int32_t
+gf_store_iter_get_next(gf_store_iter_t *iter, char **key, char **value,
+ gf_store_op_errno_t *op_errno);
+
+int32_t
+gf_store_iter_get_matching(gf_store_iter_t *iter, char *key, char **value);
+
+int32_t
+gf_store_iter_destroy(gf_store_iter_t **iter);
+
+char *
+gf_store_strerror(gf_store_op_errno_t op_errno);
+
+int
+gf_store_lock(gf_store_handle_t *sh);
+
+void
+gf_store_unlock(gf_store_handle_t *sh);
+
+int
+gf_store_locked_local(gf_store_handle_t *sh);
+
+#endif
diff --git a/libglusterfs/src/glusterfs/strfd.h b/libglusterfs/src/glusterfs/strfd.h
new file mode 100644
index 00000000000..861cd02e005
--- /dev/null
+++ b/libglusterfs/src/glusterfs/strfd.h
@@ -0,0 +1,34 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _STRFD_H
+#define _STRFD_H
+
+typedef struct {
+ void *data;
+ size_t alloc_size;
+ size_t size;
+ off_t pos;
+} strfd_t;
+
+strfd_t *
+strfd_open();
+
+int
+strprintf(strfd_t *strfd, const char *fmt, ...)
+ __attribute__((__format__(__printf__, 2, 3)));
+
+int
+strvprintf(strfd_t *strfd, const char *fmt, va_list ap);
+
+int
+strfd_close(strfd_t *strfd);
+
+#endif
diff --git a/libglusterfs/src/glusterfs/syncop-utils.h b/libglusterfs/src/glusterfs/syncop-utils.h
new file mode 100644
index 00000000000..1f3ee403edc
--- /dev/null
+++ b/libglusterfs/src/glusterfs/syncop-utils.h
@@ -0,0 +1,54 @@
+/*
+ Copyright (c) 2015, Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _SYNCOP_UTILS_H
+#define _SYNCOP_UTILS_H
+
+typedef int (*syncop_dir_scan_fn_t)(xlator_t *subvol, gf_dirent_t *entry,
+ loc_t *parent, void *data);
+int
+syncop_ftw(xlator_t *subvol, loc_t *loc, int pid, void *data,
+ int (*fn)(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
+ void *data));
+
+int
+syncop_mt_dir_scan(call_frame_t *frame, xlator_t *subvol, loc_t *loc, int pid,
+ void *data, syncop_dir_scan_fn_t fn, dict_t *xdata,
+ uint32_t max_jobs, uint32_t max_qlen);
+
+int
+syncop_dir_scan(xlator_t *subvol, loc_t *loc, int pid, void *data,
+ int (*fn)(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
+ void *data));
+
+int
+syncop_dirfd(xlator_t *subvol, loc_t *loc, fd_t **fd, int pid);
+
+int
+syncop_is_subvol_local(xlator_t *this, loc_t *loc, gf_boolean_t *is_local);
+
+int
+syncop_gfid_to_path(inode_table_t *itable, xlator_t *subvol, uuid_t gfid,
+ char **path_p);
+
+int
+syncop_ftw_throttle(xlator_t *subvol, loc_t *loc, int pid, void *data,
+ int (*fn)(xlator_t *subvol, gf_dirent_t *entry,
+ loc_t *parent, void *data),
+ int count, int sleep_time);
+int
+syncop_inode_find(xlator_t *this, xlator_t *subvol, uuid_t gfid,
+ inode_t **inode, dict_t *xdata, dict_t **rsp_dict);
+
+int
+syncop_gfid_to_path_hard(inode_table_t *itable, xlator_t *subvol, uuid_t gfid,
+ inode_t *inode, char **path_p,
+ gf_boolean_t hard_resolve);
+#endif /* _SYNCOP_H */
diff --git a/libglusterfs/src/glusterfs/syncop.h b/libglusterfs/src/glusterfs/syncop.h
new file mode 100644
index 00000000000..4e9241a32fc
--- /dev/null
+++ b/libglusterfs/src/glusterfs/syncop.h
@@ -0,0 +1,718 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _SYNCOP_H
+#define _SYNCOP_H
+
+#include <sys/time.h>
+#include <pthread.h>
+#include <ucontext.h>
+#include "glusterfs/dict.h" // for dict_t
+#include "glusterfs/stack.h" // for call_frame_t, STACK_DESTROY, STACK_...
+#include "glusterfs/timer.h"
+
+#define SYNCENV_PROC_MAX 16
+#define SYNCENV_PROC_MIN 2
+#define SYNCPROC_IDLE_TIME 600
+
+/*
+ * Flags for syncopctx valid elements
+ */
+#define SYNCOPCTX_UID 0x00000001
+#define SYNCOPCTX_GID 0x00000002
+#define SYNCOPCTX_GROUPS 0x00000004
+#define SYNCOPCTX_PID 0x00000008
+#define SYNCOPCTX_LKOWNER 0x00000010
+
+#ifdef HAVE_TSAN_API
+/* Currently hardcoded within thread context maintained by the sanitizer. */
+#define TSAN_THREAD_NAMELEN 64
+#endif
+
+struct synctask;
+struct syncproc;
+struct syncenv;
+struct synccond;
+
+typedef int (*synctask_cbk_t)(int ret, call_frame_t *frame, void *opaque);
+
+typedef int (*synctask_fn_t)(void *opaque);
+
+typedef enum {
+ SYNCTASK_INIT = 0,
+ SYNCTASK_RUN,
+ SYNCTASK_SUSPEND,
+ SYNCTASK_WAIT,
+ SYNCTASK_DONE,
+ SYNCTASK_ZOMBIE,
+} synctask_state_t;
+
+/* for one sequential execution of @syncfn */
+struct synctask {
+ struct list_head all_tasks;
+ struct syncenv *env;
+ xlator_t *xl;
+ call_frame_t *frame;
+ call_frame_t *opframe;
+ synctask_cbk_t synccbk;
+ synctask_fn_t syncfn;
+ struct timespec *delta;
+ gf_timer_t *timer;
+ struct synccond *synccond;
+ void *opaque;
+ void *stack;
+ synctask_state_t state;
+ int woken;
+ int slept;
+ int ret;
+
+ uid_t uid;
+ gid_t gid;
+
+#ifdef HAVE_TSAN_API
+ struct {
+ void *fiber;
+ char name[TSAN_THREAD_NAMELEN];
+ } tsan;
+#endif
+
+ ucontext_t ctx;
+ struct syncproc *proc;
+
+ pthread_mutex_t mutex; /* for synchronous spawning of synctask */
+ pthread_cond_t cond;
+ int done;
+
+ struct list_head waitq; /* can wait only "once" at a time */
+};
+
+struct syncproc {
+ pthread_t processor;
+
+#ifdef HAVE_TSAN_API
+ struct {
+ void *fiber;
+ char name[TSAN_THREAD_NAMELEN];
+ } tsan;
+#endif
+
+ ucontext_t sched;
+ struct syncenv *env;
+ struct synctask *current;
+};
+
+/* hosts the scheduler thread and framework for executing synctasks */
+struct syncenv {
+ struct syncproc proc[SYNCENV_PROC_MAX];
+
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+
+ struct list_head runq;
+ struct list_head waitq;
+
+ int procs;
+ int procs_idle;
+
+ int runcount;
+
+ int procmin;
+ int procmax;
+
+ size_t stacksize;
+
+ int destroy; /* FLAG to mark syncenv is in destroy mode
+ so that no more synctasks are accepted*/
+};
+
+typedef enum { LOCK_NULL = 0, LOCK_TASK, LOCK_THREAD } lock_type_t;
+
+typedef enum {
+ SYNC_LOCK_DEFAULT = 0,
+ SYNC_LOCK_RECURSIVE, /*it allows recursive locking*/
+} lock_attr_t;
+
+struct synclock {
+ pthread_mutex_t guard; /* guard the remaining members, pair @cond */
+ pthread_cond_t cond; /* waiting non-synctasks */
+ struct list_head waitq; /* waiting synctasks */
+ volatile int lock; /* true(non zero) or false(zero), lock status */
+ lock_attr_t attr;
+ struct synctask *owner; /* NULL if current owner is not a synctask */
+ pthread_t owner_tid;
+ lock_type_t type;
+};
+typedef struct synclock synclock_t;
+
+struct synccond {
+ pthread_mutex_t pmutex;
+ pthread_cond_t pcond;
+ struct list_head waitq;
+};
+typedef struct synccond synccond_t;
+
+struct syncbarrier {
+ gf_boolean_t initialized; /*Set on successful initialization*/
+ pthread_mutex_t guard; /* guard the remaining members, pair @cond */
+ pthread_cond_t cond; /* waiting non-synctasks */
+ struct list_head waitq; /* waiting synctasks */
+ int count; /* count the number of wakes */
+ int waitfor; /* no. of wakes until which task can be in
+ waitq before being woken up. */
+};
+typedef struct syncbarrier syncbarrier_t;
+
+struct syncargs {
+ int op_ret;
+ int op_errno;
+
+ /*
+ * The below 3 iatt structures are used in the fops
+ * whose callbacks get struct iatt as one of the
+ * a return arguments. Currently, the maximum number
+ * of iatt structures returned is 3 for some fops
+ * such as mknod, copy_file_range, mkdir etc. So
+ * all the following 3 iatt structures would be used
+ * for those fops.
+ */
+ struct iatt iatt1;
+ struct iatt iatt2;
+ struct iatt iatt3;
+ dict_t *xattr;
+ struct statvfs statvfs_buf;
+ struct iovec *vector;
+ int count;
+ struct iobref *iobref;
+ char *buffer;
+ dict_t *xdata;
+ struct gf_flock flock;
+ struct gf_lease lease;
+ dict_t *dict_out;
+
+ /* some more _cbk needs */
+ uuid_t uuid;
+ char *errstr;
+ dict_t *dict;
+ pthread_mutex_t lock_dict;
+
+ syncbarrier_t barrier;
+
+ /* do not touch */
+ struct synctask *task;
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+ int done;
+
+ gf_dirent_t entries;
+ off_t offset;
+
+ lock_migration_info_t locklist;
+};
+
+struct syncopctx {
+ unsigned int valid; /* valid flags for elements that are set */
+ uid_t uid;
+ gid_t gid;
+ int grpsize;
+ int ngrps;
+ gid_t *groups;
+ pid_t pid;
+ gf_lkowner_t lk_owner;
+};
+
+#define __yawn(args) \
+ do { \
+ args->task = synctask_get(); \
+ if (args->task) \
+ break; \
+ pthread_mutex_init(&args->mutex, NULL); \
+ pthread_cond_init(&args->cond, NULL); \
+ args->done = 0; \
+ } while (0)
+
+#define __wake(args) \
+ do { \
+ if (args->task) { \
+ synctask_wake(args->task); \
+ } else { \
+ pthread_mutex_lock(&args->mutex); \
+ { \
+ args->done = 1; \
+ pthread_cond_signal(&args->cond); \
+ } \
+ pthread_mutex_unlock(&args->mutex); \
+ } \
+ } while (0)
+
+#define __yield(args) \
+ do { \
+ if (args->task) { \
+ synctask_yield(args->task, NULL); \
+ } else { \
+ pthread_mutex_lock(&args->mutex); \
+ { \
+ while (!args->done) \
+ pthread_cond_wait(&args->cond, &args->mutex); \
+ } \
+ pthread_mutex_unlock(&args->mutex); \
+ pthread_mutex_destroy(&args->mutex); \
+ pthread_cond_destroy(&args->cond); \
+ } \
+ } while (0)
+
+#define SYNCOP(subvol, stb, cbk, fn_op, params...) \
+ do { \
+ struct synctask *task = NULL; \
+ call_frame_t *frame = NULL; \
+ \
+ task = synctask_get(); \
+ stb->task = task; \
+ if (task) \
+ frame = copy_frame(task->opframe); \
+ else \
+ frame = syncop_create_frame(THIS); \
+ \
+ if (!frame) { \
+ stb->op_ret = -1; \
+ stb->op_errno = errno; \
+ break; \
+ } \
+ \
+ if (task) { \
+ frame->root->uid = task->uid; \
+ frame->root->gid = task->gid; \
+ } \
+ \
+ __yawn(stb); \
+ \
+ frame->op = get_fop_index_from_fn(subvol, fn_op); \
+ STACK_WIND_COOKIE(frame, cbk, (void *)stb, subvol, fn_op, params); \
+ \
+ __yield(stb); \
+ STACK_DESTROY(frame->root); \
+ } while (0)
+
+/*
+ * syncop_xxx() calls are executed in two ways, one is inside a synctask where
+ * the executing function will do 'swapcontext' and the other is without
+ * synctask where the executing thread is made to wait using pthread_cond_wait.
+ * Executing thread may change when syncop_xxx() is executed inside a synctask.
+ * This leads to errno_location change i.e. errno may give errno of
+ * non-executing thread. So errno is not touched inside a synctask execution.
+ * All gfapi calls are executed using the second way of executing syncop_xxx()
+ * where the executing thread waits using pthread_cond_wait so it is ok to set
+ * errno in these cases. The following macro makes syncop_xxx() behave just
+ * like a system call, where -1 is returned and errno is set when a failure
+ * occurs.
+ */
+#define DECODE_SYNCOP_ERR(ret) \
+ do { \
+ if (ret < 0) { \
+ errno = -ret; \
+ ret = -1; \
+ } else { \
+ errno = 0; \
+ } \
+ } while (0)
+
+#define SYNCENV_DEFAULT_STACKSIZE (2 * 1024 * 1024)
+
+struct syncenv *
+syncenv_new(size_t stacksize, int procmin, int procmax);
+void
+syncenv_destroy(struct syncenv *);
+void
+syncenv_scale(struct syncenv *env);
+
+int
+synctask_new1(struct syncenv *, size_t stacksize, synctask_fn_t, synctask_cbk_t,
+ call_frame_t *frame, void *);
+int
+synctask_new(struct syncenv *, synctask_fn_t, synctask_cbk_t,
+ call_frame_t *frame, void *);
+struct synctask *
+synctask_create(struct syncenv *, size_t stacksize, synctask_fn_t,
+ synctask_cbk_t, call_frame_t *, void *);
+int
+synctask_join(struct synctask *task);
+void
+synctask_wake(struct synctask *task);
+void
+synctask_yield(struct synctask *task, struct timespec *delta);
+void
+synctask_sleep(int32_t secs);
+void
+synctask_waitfor(struct synctask *task, int count);
+
+#define synctask_barrier_init(args) syncbarrier_init(&args->barrier)
+#define synctask_barrier_wait(args, n) syncbarrier_wait(&args->barrier, n)
+#define synctask_barrier_wake(args) syncbarrier_wake(&args->barrier)
+
+int
+synctask_setid(struct synctask *task, uid_t uid, gid_t gid);
+#define SYNCTASK_SETID(uid, gid) synctask_setid(synctask_get(), uid, gid);
+
+int
+syncopctx_setfsuid(void *uid);
+int
+syncopctx_setfsgid(void *gid);
+int
+syncopctx_setfsgroups(int count, const void *groups);
+int
+syncopctx_setfspid(void *pid);
+int
+syncopctx_setfslkowner(gf_lkowner_t *lk_owner);
+
+static inline call_frame_t *
+syncop_create_frame(xlator_t *this)
+{
+ call_frame_t *frame = NULL;
+ int ngrps = -1;
+ struct syncopctx *opctx = NULL;
+
+ frame = create_frame(this, this->ctx->pool);
+ if (!frame)
+ return NULL;
+
+ frame->root->type = GF_OP_TYPE_FOP;
+ opctx = syncopctx_getctx();
+
+ if (opctx && (opctx->valid & SYNCOPCTX_PID))
+ frame->root->pid = opctx->pid;
+ else
+ frame->root->pid = getpid();
+
+ if (opctx && (opctx->valid & SYNCOPCTX_UID))
+ frame->root->uid = opctx->uid;
+ else
+ frame->root->uid = geteuid();
+
+ if (opctx && (opctx->valid & SYNCOPCTX_GID))
+ frame->root->gid = opctx->gid;
+ else
+ frame->root->gid = getegid();
+
+ if (opctx && (opctx->valid & SYNCOPCTX_GROUPS)) {
+ ngrps = opctx->ngrps;
+
+ if (ngrps != 0 && opctx->groups != NULL) {
+ if (call_stack_alloc_groups(frame->root, ngrps) != 0) {
+ STACK_DESTROY(frame->root);
+ return NULL;
+ }
+
+ memcpy(frame->root->groups, opctx->groups, (sizeof(gid_t) * ngrps));
+ }
+ } else {
+ ngrps = getgroups(0, 0);
+ if (ngrps < 0) {
+ STACK_DESTROY(frame->root);
+ return NULL;
+ }
+
+ if (call_stack_alloc_groups(frame->root, ngrps) != 0) {
+ STACK_DESTROY(frame->root);
+ return NULL;
+ }
+
+ if (getgroups(ngrps, frame->root->groups) < 0) {
+ STACK_DESTROY(frame->root);
+ return NULL;
+ }
+ }
+
+ if (opctx && (opctx->valid & SYNCOPCTX_LKOWNER))
+ frame->root->lk_owner = opctx->lk_owner;
+
+ return frame;
+}
+
+int
+synclock_init(synclock_t *lock, lock_attr_t attr);
+int
+synclock_destroy(synclock_t *lock);
+int
+synclock_lock(synclock_t *lock);
+int
+synclock_trylock(synclock_t *lock);
+int
+synclock_unlock(synclock_t *lock);
+
+int32_t
+synccond_init(synccond_t *cond);
+
+void
+synccond_destroy(synccond_t *cond);
+
+int
+synccond_wait(synccond_t *cond, synclock_t *lock);
+
+int
+synccond_timedwait(synccond_t *cond, synclock_t *lock, struct timespec *delta);
+
+void
+synccond_signal(synccond_t *cond);
+
+void
+synccond_broadcast(synccond_t *cond);
+
+int
+syncbarrier_init(syncbarrier_t *barrier);
+int
+syncbarrier_wait(syncbarrier_t *barrier, int waitfor);
+int
+syncbarrier_wake(syncbarrier_t *barrier);
+int
+syncbarrier_destroy(syncbarrier_t *barrier);
+
+int
+syncop_lookup(xlator_t *subvol, loc_t *loc,
+ /* out */
+ struct iatt *iatt, struct iatt *parent,
+ /* xdata */
+ dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_readdirp(xlator_t *subvol, fd_t *fd, size_t size, off_t off,
+ /* out */
+ gf_dirent_t *entries, dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_readdir(xlator_t *subvol, fd_t *fd, size_t size, off_t off,
+ gf_dirent_t *entries, dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_opendir(xlator_t *subvol, loc_t *loc, fd_t *fd, dict_t *xdata_in,
+ dict_t **xdata_out);
+
+int
+syncop_setattr(xlator_t *subvol, loc_t *loc, struct iatt *iatt, int valid,
+ /* out */
+ struct iatt *preop, struct iatt *postop, dict_t *xdata_in,
+ dict_t **xdata_out);
+
+int
+syncop_fsetattr(xlator_t *subvol, fd_t *fd, struct iatt *iatt, int valid,
+ /* out */
+ struct iatt *preop, struct iatt *postop, dict_t *xdata_in,
+ dict_t **xdata_out);
+
+int
+syncop_statfs(xlator_t *subvol, loc_t *loc,
+ /* out */
+ struct statvfs *buf, dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_setxattr(xlator_t *subvol, loc_t *loc, dict_t *dict, int32_t flags,
+ dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_fsetxattr(xlator_t *subvol, fd_t *fd, dict_t *dict, int32_t flags,
+ dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_listxattr(xlator_t *subvol, loc_t *loc, dict_t **dict, dict_t *xdata_in,
+ dict_t **xdata_out);
+
+int
+syncop_getxattr(xlator_t *xl, loc_t *loc, dict_t **dict, const char *key,
+ dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_fgetxattr(xlator_t *xl, fd_t *fd, dict_t **dict, const char *key,
+ dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_removexattr(xlator_t *subvol, loc_t *loc, const char *name,
+ dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_fremovexattr(xlator_t *subvol, fd_t *fd, const char *name,
+ dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_create(xlator_t *subvol, loc_t *loc, int32_t flags, mode_t mode,
+ fd_t *fd, struct iatt *iatt, dict_t *xdata_in,
+ dict_t **xdata_out);
+
+int
+syncop_open(xlator_t *subvol, loc_t *loc, int32_t flags, fd_t *fd,
+ dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_close(fd_t *fd);
+
+int
+syncop_write(xlator_t *subvol, fd_t *fd, const char *buf, int size,
+ off_t offset, struct iobref *iobref, uint32_t flags,
+ dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_writev(xlator_t *subvol, fd_t *fd, const struct iovec *vector,
+ int32_t count, off_t offset, struct iobref *iobref,
+ uint32_t flags, struct iatt *preiatt, struct iatt *postiatt,
+ dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_readv(xlator_t *subvol, fd_t *fd, size_t size, off_t off, uint32_t flags,
+ /* out */
+ struct iovec **vector, int *count, struct iobref **iobref,
+ struct iatt *iatt, dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_ftruncate(xlator_t *subvol, fd_t *fd, off_t offset, struct iatt *preiatt,
+ struct iatt *postiatt, dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_truncate(xlator_t *subvol, loc_t *loc, off_t offset, dict_t *xdata_in,
+ dict_t **xdata_out);
+
+int
+syncop_unlink(xlator_t *subvol, loc_t *loc, dict_t *xdata_in,
+ dict_t **xdata_out);
+
+int
+syncop_rmdir(xlator_t *subvol, loc_t *loc, int flags, dict_t *xdata_in,
+ dict_t **xdata_out);
+
+int
+syncop_fsync(xlator_t *subvol, fd_t *fd, int dataonly, struct iatt *preiatt,
+ struct iatt *postiatt, dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_flush(xlator_t *subvol, fd_t *fd, dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_fstat(xlator_t *subvol, fd_t *fd, struct iatt *stbuf, dict_t *xdata_in,
+ dict_t **xdata_out);
+
+int
+syncop_stat(xlator_t *subvol, loc_t *loc, struct iatt *stbuf, dict_t *xdata_in,
+ dict_t **xdata_out);
+
+int
+syncop_symlink(xlator_t *subvol, loc_t *loc, const char *newpath,
+ struct iatt *iatt, dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_readlink(xlator_t *subvol, loc_t *loc, char **buffer, size_t size,
+ dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_mknod(xlator_t *subvol, loc_t *loc, mode_t mode, dev_t rdev,
+ struct iatt *iatt, dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_mkdir(xlator_t *subvol, loc_t *loc, mode_t mode, struct iatt *iatt,
+ dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_link(xlator_t *subvol, loc_t *oldloc, loc_t *newloc, struct iatt *iatt,
+ dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_fsyncdir(xlator_t *subvol, fd_t *fd, int datasync, dict_t *xdata_in,
+ dict_t **xdata_out);
+
+int
+syncop_access(xlator_t *subvol, loc_t *loc, int32_t mask, dict_t *xdata_in,
+ dict_t **xdata_out);
+
+int
+syncop_fallocate(xlator_t *subvol, fd_t *fd, int32_t keep_size, off_t offset,
+ size_t len, dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_discard(xlator_t *subvol, fd_t *fd, off_t offset, size_t len,
+ dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_zerofill(xlator_t *subvol, fd_t *fd, off_t offset, off_t len,
+ dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_rename(xlator_t *subvol, loc_t *oldloc, loc_t *newloc, dict_t *xdata_in,
+ dict_t **xdata_out);
+
+int
+syncop_lk(xlator_t *subvol, fd_t *fd, int cmd, struct gf_flock *flock,
+ dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_inodelk(xlator_t *subvol, const char *volume, loc_t *loc, int32_t cmd,
+ struct gf_flock *lock, dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_lease(xlator_t *subvol, loc_t *loc, struct gf_lease *lease,
+ dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_ipc(xlator_t *subvol, int op, dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_xattrop(xlator_t *subvol, loc_t *loc, gf_xattrop_flags_t flags,
+ dict_t *dict, dict_t *xdata_in, dict_t **dict_out,
+ dict_t **xdata_out);
+
+int
+syncop_fxattrop(xlator_t *subvol, fd_t *fd, gf_xattrop_flags_t flags,
+ dict_t *dict, dict_t *xdata_in, dict_t **dict_out,
+ dict_t **xdata_out);
+
+int
+syncop_seek(xlator_t *subvol, fd_t *fd, off_t offset, gf_seek_what_t what,
+ dict_t *xdata_in, off_t *off);
+
+int
+syncop_getactivelk(xlator_t *subvol, loc_t *loc,
+ lock_migration_info_t *locklist, dict_t *xdata_in,
+ dict_t **xdata_out);
+
+int
+syncop_setactivelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int
+syncop_setactivelk(xlator_t *subvol, loc_t *loc,
+ lock_migration_info_t *locklist, dict_t *xdata_in,
+ dict_t **xdata_out);
+
+int
+syncop_put(xlator_t *subvol, loc_t *loc, mode_t mode, mode_t umask,
+ uint32_t flags, struct iovec *vector, int32_t count, off_t offset,
+ struct iobref *iobref, dict_t *xattr, struct iatt *iatt,
+ dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_setactivelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int
+syncop_icreate(xlator_t *subvol, loc_t *loc, mode_t mode, dict_t *xdata_out);
+
+int
+syncop_entrylk(xlator_t *subvol, const char *volume, loc_t *loc,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_copy_file_range(xlator_t *subvol, fd_t *fd_in, off64_t off_in,
+ fd_t *fd_out, off64_t off_out, size_t len,
+ uint32_t flags, struct iatt *stbuf,
+ struct iatt *preiatt_dst, struct iatt *postiatt_dst,
+ dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_copy_file_range_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *stbuf,
+ struct iatt *prebuf_dst, struct iatt *postbuf_dst,
+ dict_t *xdata);
+
+#endif /* _SYNCOP_H */
diff --git a/libglusterfs/src/glusterfs/syscall.h b/libglusterfs/src/glusterfs/syscall.h
new file mode 100644
index 00000000000..b6d3ab4f2ad
--- /dev/null
+++ b/libglusterfs/src/glusterfs/syscall.h
@@ -0,0 +1,278 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __SYSCALL_H__
+#define __SYSCALL_H__
+
+#include <dirent.h>
+#include <sys/uio.h>
+#include <sys/statvfs.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/socket.h>
+#include <stdio.h>
+
+/* GF follows the Linux XATTR definition, which differs in Darwin. */
+#define GF_XATTR_CREATE 0x1 /* set value, fail if attr already exists */
+#define GF_XATTR_REPLACE 0x2 /* set value, fail if attr does not exist */
+
+/* Linux kernel version 2.6.x don't have these defined
+ define if not defined */
+
+#ifndef XATTR_SECURITY_PREFIX
+#define XATTR_SECURITY_PREFIX "security."
+#define XATTR_SECURITY_PREFIX_LEN (sizeof(XATTR_SECURITY_PREFIX) - 1)
+#endif
+
+#ifndef XATTR_SYSTEM_PREFIX
+#define XATTR_SYSTEM_PREFIX "system."
+#define XATTR_SYSTEM_PREFIX_LEN (sizeof(XATTR_SYSTEM_PREFIX) - 1)
+#endif
+
+#ifndef XATTR_TRUSTED_PREFIX
+#define XATTR_TRUSTED_PREFIX "trusted."
+#define XATTR_TRUSTED_PREFIX_LEN (sizeof(XATTR_TRUSTED_PREFIX) - 1)
+#endif
+
+#ifndef XATTR_USER_PREFIX
+#define XATTR_USER_PREFIX "user."
+#define XATTR_USER_PREFIX_LEN (sizeof(XATTR_USER_PREFIX) - 1)
+#endif
+
+#if defined(GF_DARWIN_HOST_OS)
+#include <sys/xattr.h>
+#define XATTR_DARWIN_NOSECURITY XATTR_NOSECURITY
+#define XATTR_DARWIN_NODEFAULT XATTR_NODEFAULT
+#define XATTR_DARWIN_SHOWCOMPRESSION XATTR_SHOWCOMPRESSION
+#endif
+
+int
+sys_lstat(const char *path, struct stat *buf);
+
+int
+sys_stat(const char *path, struct stat *buf);
+
+int
+sys_fstat(int fd, struct stat *buf);
+
+int
+sys_fstatat(int dirfd, const char *pathname, struct stat *buf, int flags);
+
+int
+sys_open(const char *pathname, int flags, int mode);
+
+int
+sys_openat(int dirfd, const char *pathname, int flags, int mode);
+
+DIR *
+sys_opendir(const char *name);
+
+struct dirent *
+sys_readdir(DIR *dir, struct dirent *de);
+
+ssize_t
+sys_readlink(const char *path, char *buf, size_t bufsiz);
+
+int
+sys_closedir(DIR *dir);
+
+int
+sys_mknod(const char *pathname, mode_t mode, dev_t dev);
+
+int
+sys_mkdir(const char *pathname, mode_t mode);
+
+int
+sys_mkdirat(int dirfd, const char *pathname, mode_t mode);
+
+int
+sys_unlink(const char *pathname);
+
+int
+sys_unlinkat(int dfd, const char *pathname);
+
+int
+sys_rmdir(const char *pathname);
+
+int
+sys_symlink(const char *oldpath, const char *newpath);
+
+int
+sys_symlinkat(const char *oldpath, int dirfd, const char *newpath);
+
+int
+sys_rename(const char *oldpath, const char *newpath);
+
+int
+sys_link(const char *oldpath, const char *newpath);
+
+int
+sys_linkat(int oldfd, const char *oldpath, int newfd, const char *newpath);
+
+int
+sys_chmod(const char *path, mode_t mode);
+
+int
+sys_fchmod(int fd, mode_t mode);
+
+int
+sys_chown(const char *path, uid_t owner, gid_t group);
+
+int
+sys_fchown(int fd, uid_t owner, gid_t group);
+
+int
+sys_lchown(const char *path, uid_t owner, gid_t group);
+
+int
+sys_truncate(const char *path, off_t length);
+
+int
+sys_ftruncate(int fd, off_t length);
+
+int
+sys_utimes(const char *filename, const struct timeval times[2]);
+
+#if defined(HAVE_UTIMENSAT)
+int
+sys_utimensat(int dirfd, const char *filename, const struct timespec times[2],
+ int flags);
+#endif
+
+int
+sys_futimes(int fd, const struct timeval times[2]);
+
+int
+sys_creat(const char *pathname, mode_t mode);
+
+ssize_t
+sys_readv(int fd, const struct iovec *iov, int iovcnt);
+
+ssize_t
+sys_writev(int fd, const struct iovec *iov, int iovcnt);
+
+ssize_t
+sys_read(int fd, void *buf, size_t count);
+
+ssize_t
+sys_write(int fd, const void *buf, size_t count);
+
+off_t
+sys_lseek(int fd, off_t offset, int whence);
+
+int
+sys_statvfs(const char *path, struct statvfs *buf);
+
+int
+sys_fstatvfs(int fd, struct statvfs *buf);
+
+int
+sys_close(int fd);
+
+int
+sys_fsync(int fd);
+
+int
+sys_fdatasync(int fd);
+
+void
+gf_add_prefix(const char *ns, const char *key, char **newkey);
+
+void
+gf_remove_prefix(const char *ns, const char *key, char **newkey);
+
+int
+sys_lsetxattr(const char *path, const char *name, const void *value,
+ size_t size, int flags);
+
+ssize_t
+sys_llistxattr(const char *path, char *list, size_t size);
+
+ssize_t
+sys_lgetxattr(const char *path, const char *name, void *value, size_t size);
+
+ssize_t
+sys_fgetxattr(int filedes, const char *name, void *value, size_t size);
+
+int
+sys_fsetxattr(int filedes, const char *name, const void *value, size_t size,
+ int flags);
+
+ssize_t
+sys_flistxattr(int filedes, char *list, size_t size);
+
+int
+sys_lremovexattr(const char *path, const char *name);
+
+int
+sys_fremovexattr(int filedes, const char *name);
+
+int
+sys_access(const char *pathname, int mode);
+
+int
+sys_fallocate(int fd, int mode, off_t offset, off_t len);
+
+ssize_t
+sys_preadv(int fd, const struct iovec *iov, int iovcnt, off_t offset);
+
+ssize_t
+sys_pwritev(int fd, const struct iovec *iov, int iovcnt, off_t offset);
+
+ssize_t
+sys_pread(int fd, void *buf, size_t count, off_t offset);
+
+ssize_t
+sys_pwrite(int fd, const void *buf, size_t count, off_t offset);
+
+int
+sys_socket(int domain, int type, int protocol);
+
+int
+sys_accept(int sock, struct sockaddr *sockaddr, socklen_t *socklen, int flags);
+
+#ifdef GF_BSD_HOST_OS
+#ifndef _OFF64_T_DECLARED
+/*
+ * Including <stdio.h> (done above) should actually define
+ * _OFF64_T_DECLARED with off64_t data type being available
+ * for consumption. But, off64_t data type is not recognizable
+ * for FreeBSD versions less than 11. Hence, int64_t is typedefed
+ * to off64_t.
+ */
+#define _OFF64_T_DECLARED
+typedef int64_t off64_t;
+#endif /* _OFF64_T_DECLARED */
+#endif /* GF_BSD_HOST_OS */
+
+/*
+ * According to the man page of copy_file_range, both off_in and off_out are
+ * pointers to the data type loff_t (i.e. loff_t *). But, freebsd does not
+ * have (and recognize) loff_t. Since loff_t is 64 bits, use off64_t
+ * instead. Since it's a pointer type it should be okay. It just needs
+ * to be a pointer-to-64-bit pointer for both 32- and 64-bit platforms.
+ * off64_t is recognized by freebsd.
+ * TODO: In future, when freebsd can recognize loff_t, probably revisit this
+ * and change the off_in and off_out to (loff_t *).
+ */
+ssize_t
+sys_copy_file_range(int fd_in, off64_t *off_in, int fd_out, off64_t *off_out,
+ size_t len, unsigned int flags);
+
+int
+sys_kill(pid_t pid, int sig);
+
+#ifdef __FreeBSD__
+int
+sys_sysctl(const int *name, u_int namelen, void *oldp, size_t *oldlenp,
+ const void *newp, size_t newlen);
+#endif
+
+#endif /* __SYSCALL_H__ */
diff --git a/libglusterfs/src/glusterfs/template-component-messages.h b/libglusterfs/src/glusterfs/template-component-messages.h
new file mode 100644
index 00000000000..aa7ad3d1baa
--- /dev/null
+++ b/libglusterfs/src/glusterfs/template-component-messages.h
@@ -0,0 +1,28 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+ */
+
+#ifndef _component_MESSAGES_H_
+#define _component_MESSAGES_H_
+
+#include "glusterfs/glfs-message-id.h"
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(component, message id, message id, ...);
+
+#endif /* !_component_MESSAGES_H_ */
diff --git a/libglusterfs/src/glusterfs/throttle-tbf.h b/libglusterfs/src/glusterfs/throttle-tbf.h
new file mode 100644
index 00000000000..cccb13c83d9
--- /dev/null
+++ b/libglusterfs/src/glusterfs/throttle-tbf.h
@@ -0,0 +1,74 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "glusterfs/list.h"
+#include "glusterfs/xlator.h"
+#include "glusterfs/locking.h"
+
+#ifndef THROTTLE_TBF_H__
+#define THROTTLE_TBF_H__
+
+typedef enum tbf_ops {
+ TBF_OP_MIN = -1,
+ TBF_OP_HASH = 0, /* checksum calculation */
+ TBF_OP_READ = 1, /* inode read(s) */
+ TBF_OP_READDIR = 2, /* dentry read(s) */
+ TBF_OP_MAX = 3,
+} tbf_ops_t;
+
+/**
+ * Operation rate specification
+ */
+typedef struct tbf_opspec {
+ tbf_ops_t op;
+
+ unsigned long rate;
+
+ unsigned long maxlimit;
+
+ unsigned long token_gen_interval; /* Token generation interval in usec */
+} tbf_opspec_t;
+
+/**
+ * Token bucket for each operation type
+ */
+typedef struct tbf_bucket {
+ gf_lock_t lock;
+
+ pthread_t tokener; /* token generator thread */
+
+ unsigned long tokenrate; /* token generation rate */
+
+ unsigned long tokens; /* number of current tokens */
+
+ unsigned long maxtokens; /* maximum token in the bucket */
+
+ struct list_head queued; /* list of non-conformant requests */
+
+ unsigned long token_gen_interval; /* Token generation interval in usec */
+} tbf_bucket_t;
+
+typedef struct tbf {
+ tbf_bucket_t **bucket;
+} tbf_t;
+
+tbf_t *
+tbf_init(tbf_opspec_t *, unsigned int);
+
+int
+tbf_mod(tbf_t *, tbf_opspec_t *);
+
+void
+tbf_throttle(tbf_t *, tbf_ops_t, unsigned long);
+
+#define TBF_THROTTLE_BEGIN(tbf, op, tokens) (tbf_throttle(tbf, op, tokens))
+#define TBF_THROTTLE_END(tbf, op, tokens)
+
+#endif /** THROTTLE_TBF_H__ */
diff --git a/libglusterfs/src/glusterfs/timer.h b/libglusterfs/src/glusterfs/timer.h
new file mode 100644
index 00000000000..ae5b2edf451
--- /dev/null
+++ b/libglusterfs/src/glusterfs/timer.h
@@ -0,0 +1,56 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _TIMER_H
+#define _TIMER_H
+
+#include "glusterfs/glusterfs.h"
+#include "glusterfs/xlator.h"
+#include <sys/time.h>
+#include <pthread.h>
+
+typedef void (*gf_timer_cbk_t)(void *);
+
+struct _gf_timer {
+ union {
+ struct list_head list;
+ struct {
+ struct _gf_timer *next;
+ struct _gf_timer *prev;
+ };
+ };
+ struct timespec at;
+ gf_timer_cbk_t callbk;
+ void *data;
+ xlator_t *xl;
+ gf_boolean_t fired;
+};
+
+struct _gf_timer_registry {
+ struct list_head active;
+ pthread_mutex_t lock;
+ pthread_cond_t cond;
+ pthread_t th;
+ char fin;
+};
+
+typedef struct _gf_timer gf_timer_t;
+typedef struct _gf_timer_registry gf_timer_registry_t;
+
+gf_timer_t *
+gf_timer_call_after(glusterfs_ctx_t *ctx, struct timespec delta,
+ gf_timer_cbk_t cbk, void *data);
+
+int32_t
+gf_timer_call_cancel(glusterfs_ctx_t *ctx, gf_timer_t *event);
+
+void
+gf_timer_registry_destroy(glusterfs_ctx_t *ctx);
+#endif /* _TIMER_H */
diff --git a/libglusterfs/src/glusterfs/timespec.h b/libglusterfs/src/glusterfs/timespec.h
new file mode 100644
index 00000000000..bb9ab446a5f
--- /dev/null
+++ b/libglusterfs/src/glusterfs/timespec.h
@@ -0,0 +1,33 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __INCLUDE_TIMESPEC_H__
+#define __INCLUDE_TIMESPEC_H__
+
+#include <stdint.h>
+#include <sys/time.h>
+
+#define TS(ts) ((ts.tv_sec * 1000000000LL) + ts.tv_nsec)
+#define NANO (+1.0E-9)
+#define GIGA UINT64_C(1000000000)
+
+void
+timespec_now(struct timespec *ts);
+void
+timespec_now_realtime(struct timespec *ts);
+void
+timespec_adjust_delta(struct timespec *ts, struct timespec delta);
+void
+timespec_sub(const struct timespec *begin, const struct timespec *end,
+ struct timespec *res);
+int
+timespec_cmp(const struct timespec *lhs_ts, const struct timespec *rhs_ts);
+
+#endif /* __INCLUDE_TIMESPEC_H__ */
diff --git a/libglusterfs/src/glusterfs/trie.h b/libglusterfs/src/glusterfs/trie.h
new file mode 100644
index 00000000000..6d2d8015964
--- /dev/null
+++ b/libglusterfs/src/glusterfs/trie.h
@@ -0,0 +1,52 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _TRIE_H_
+#define _TRIE_H_
+
+struct trienode;
+typedef struct trienode trienode_t;
+
+struct trie;
+typedef struct trie trie_t;
+
+struct trienodevec {
+ trienode_t **nodes;
+ unsigned cnt;
+};
+
+trie_t *
+trie_new();
+
+int
+trie_add(trie_t *trie, const char *word);
+
+void
+trie_destroy(trie_t *trie);
+
+void
+trie_destroy_bynode(trienode_t *node);
+
+int
+trie_measure(trie_t *trie, const char *word, trienode_t **nodes, int nodecnt);
+
+int
+trie_measure_vec(trie_t *trie, const char *word, struct trienodevec *nodevec);
+
+void
+trie_reset_search(trie_t *trie);
+
+int
+trienode_get_dist(trienode_t *node);
+
+int
+trienode_get_word(trienode_t *node, char **buf);
+
+#endif
diff --git a/libglusterfs/src/glusterfs/upcall-utils.h b/libglusterfs/src/glusterfs/upcall-utils.h
new file mode 100644
index 00000000000..0de8428c5fc
--- /dev/null
+++ b/libglusterfs/src/glusterfs/upcall-utils.h
@@ -0,0 +1,110 @@
+/*
+ Copyright (c) 2015, Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _UPCALL_UTILS_H
+#define _UPCALL_UTILS_H
+
+#include "glusterfs/iatt.h"
+#include "glusterfs/compat-uuid.h"
+#include "glusterfs/compat.h"
+
+/* Flags sent for cache_invalidation */
+#define UP_NLINK 0x00000001 /* update nlink */
+#define UP_MODE 0x00000002 /* update mode and ctime */
+#define UP_OWN 0x00000004 /* update mode,uid,gid and ctime */
+#define UP_SIZE 0x00000008 /* update fsize */
+#define UP_TIMES 0x00000010 /* update all times */
+#define UP_ATIME 0x00000020 /* update atime only */
+#define UP_PERM \
+ 0x00000040 /* update fields needed for permission \
+ checking */
+#define UP_RENAME \
+ 0x00000080 /* this is a rename op - delete the cache \
+ entry */
+#define UP_FORGET \
+ 0x00000100 /* inode_forget on server side - \
+ invalidate the cache entry */
+#define UP_PARENT_TIMES 0x00000200 /* update parent dir times */
+
+#define UP_XATTR 0x00000400 /* update the xattrs and ctime */
+#define UP_XATTR_RM 0x00000800 /* Remove the xattrs and update ctime */
+
+#define UP_EXPLICIT_LOOKUP 0x00001000 /* Request an explicit lookup */
+
+#define UP_INVAL_ATTR 0x00002000 /* Request to invalidate iatt and xatt */
+
+/* for fops - open, read, lk, */
+#define UP_UPDATE_CLIENT (UP_ATIME)
+
+/* for fop - write, truncate */
+#define UP_WRITE_FLAGS (UP_SIZE | UP_TIMES)
+
+/* for fop - setattr */
+#define UP_ATTR_FLAGS (UP_SIZE | UP_TIMES | UP_OWN | UP_MODE | UP_PERM)
+/* for fop - rename */
+#define UP_RENAME_FLAGS (UP_RENAME)
+
+/* to invalidate parent directory entries for fops -rename, unlink, rmdir,
+ * mkdir, create */
+#define UP_PARENT_DENTRY_FLAGS (UP_PARENT_TIMES)
+
+/* for fop - unlink, link, rmdir, mkdir */
+#define UP_NLINK_FLAGS (UP_NLINK | UP_TIMES)
+
+#define IATT_UPDATE_FLAGS \
+ (UP_NLINK | UP_MODE | UP_OWN | UP_SIZE | UP_TIMES | UP_ATIME | UP_PERM)
+
+typedef enum {
+ GF_UPCALL_EVENT_NULL,
+ GF_UPCALL_CACHE_INVALIDATION,
+ GF_UPCALL_RECALL_LEASE,
+ GF_UPCALL_INODELK_CONTENTION,
+ GF_UPCALL_ENTRYLK_CONTENTION,
+} gf_upcall_event_t;
+
+struct gf_upcall {
+ char *client_uid;
+ uuid_t gfid;
+ uint32_t event_type;
+ void *data;
+};
+
+struct gf_upcall_cache_invalidation {
+ uint32_t flags;
+ uint32_t expire_time_attr;
+ struct iatt stat;
+ struct iatt p_stat; /* parent dir stat */
+ struct iatt oldp_stat; /* oldparent dir stat */
+ dict_t *dict; /* For xattrs */
+};
+
+struct gf_upcall_recall_lease {
+ uint32_t lease_type; /* Lease type to which client can downgrade to*/
+ uuid_t tid; /* transaction id of the fop that caused
+ the recall */
+ dict_t *dict;
+};
+
+struct gf_upcall_inodelk_contention {
+ struct gf_flock flock;
+ pid_t pid;
+ const char *domain;
+ dict_t *xdata;
+};
+
+struct gf_upcall_entrylk_contention {
+ uint32_t type;
+ pid_t pid;
+ const char *name;
+ const char *domain;
+ dict_t *xdata;
+};
+
+#endif /* _UPCALL_UTILS_H */
diff --git a/libglusterfs/src/glusterfs/xlator.h b/libglusterfs/src/glusterfs/xlator.h
new file mode 100644
index 00000000000..4fd3abdaeff
--- /dev/null
+++ b/libglusterfs/src/glusterfs/xlator.h
@@ -0,0 +1,1106 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _XLATOR_H
+#define _XLATOR_H
+
+#include <stdint.h> // for int32_t
+#include <sys/types.h> // for off_t, mode_t, off64_t, dev_t
+#include "glusterfs/glusterfs-fops.h" // for GF_FOP_MAXVALUE, entrylk_cmd
+#include "glusterfs/atomic.h" // for gf_atomic_t
+#include "glusterfs/glusterfs.h" // for gf_boolean_t, glusterfs_ctx_t
+#include "glusterfs/compat-uuid.h" // for uuid_t
+#include "glusterfs/compat.h"
+#include "glusterfs/event-history.h"
+#include "glusterfs/dict.h"
+#include "glusterfs/latency.h"
+
+#define FIRST_CHILD(xl) (xl->children->xlator)
+#define SECOND_CHILD(xl) (xl->children->next->xlator)
+
+#define GF_SET_ATTR_MODE 0x1
+#define GF_SET_ATTR_UID 0x2
+#define GF_SET_ATTR_GID 0x4
+#define GF_SET_ATTR_SIZE 0x8
+#define GF_SET_ATTR_ATIME 0x10
+#define GF_SET_ATTR_MTIME 0x20
+#define GF_SET_ATTR_CTIME 0x40
+#define GF_ATTR_ATIME_NOW 0x80
+#define GF_ATTR_MTIME_NOW 0x100
+
+#define gf_attr_mode_set(mode) ((mode)&GF_SET_ATTR_MODE)
+#define gf_attr_uid_set(mode) ((mode)&GF_SET_ATTR_UID)
+#define gf_attr_gid_set(mode) ((mode)&GF_SET_ATTR_GID)
+#define gf_attr_size_set(mode) ((mode)&GF_SET_ATTR_SIZE)
+#define gf_attr_atime_set(mode) ((mode)&GF_SET_ATTR_ATIME)
+#define gf_attr_mtime_set(mode) ((mode)&GF_SET_ATTR_MTIME)
+
+struct _xlator;
+typedef struct _xlator xlator_t;
+struct _dir_entry;
+typedef struct _dir_entry dir_entry_t;
+struct _gf_dirent;
+typedef struct _gf_dirent gf_dirent_t;
+struct _loc;
+typedef struct _loc loc_t;
+
+typedef int32_t (*event_notify_fn_t)(xlator_t *this, int32_t event, void *data,
+ ...);
+
+#include "glusterfs/list.h"
+#include "glusterfs/gf-dirent.h"
+#include "glusterfs/stack.h"
+#include "glusterfs/iobuf.h"
+#include "glusterfs/globals.h"
+#include "glusterfs/iatt.h"
+#include "glusterfs/options.h"
+#include "glusterfs/client_t.h"
+
+struct _loc {
+ const char *path;
+ const char *name;
+ inode_t *inode;
+ inode_t *parent;
+ /* Currently all location based operations are through 'gfid' of inode.
+ * But the 'inode->gfid' only gets set in higher most layer (as in,
+ * 'fuse', 'protocol/server', or 'nfs/server'). So if translators want
+ * to send fops on a inode before the 'inode->gfid' is set, they have to
+ * make use of below 'gfid' fields
+ */
+ uuid_t gfid;
+ uuid_t pargfid;
+};
+
+typedef int32_t (*fop_getspec_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, char *spec_data);
+
+typedef int32_t (*fop_rchecksum_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, uint32_t weak_checksum,
+ uint8_t *strong_checksum, dict_t *xdata);
+
+typedef int32_t (*fop_getspec_t)(call_frame_t *frame, xlator_t *this,
+ const char *key, int32_t flag);
+
+typedef int32_t (*fop_rchecksum_t)(call_frame_t *frame, xlator_t *this,
+ fd_t *fd, off_t offset, int32_t len,
+ dict_t *xdata);
+
+typedef int32_t (*fop_lookup_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent);
+
+typedef int32_t (*fop_stat_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf,
+ dict_t *xdata);
+
+typedef int32_t (*fop_fstat_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf,
+ dict_t *xdata);
+
+typedef int32_t (*fop_truncate_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
+
+typedef int32_t (*fop_ftruncate_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
+
+typedef int32_t (*fop_access_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+typedef int32_t (*fop_readlink_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, const char *path,
+ struct iatt *buf, dict_t *xdata);
+
+typedef int32_t (*fop_mknod_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+typedef int32_t (*fop_mkdir_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+typedef int32_t (*fop_unlink_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+typedef int32_t (*fop_rmdir_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+typedef int32_t (*fop_symlink_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+typedef int32_t (*fop_rename_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent,
+ struct iatt *postoldparent,
+ struct iatt *prenewparent,
+ struct iatt *postnewparent, dict_t *xdata);
+
+typedef int32_t (*fop_link_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+typedef int32_t (*fop_create_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+typedef int32_t (*fop_open_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, fd_t *fd, dict_t *xdata);
+
+typedef int32_t (*fop_readv_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iovec *vector,
+ int32_t count, struct iatt *stbuf,
+ struct iobref *iobref, dict_t *xdata);
+
+typedef int32_t (*fop_writev_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
+
+typedef int32_t (*fop_flush_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+typedef int32_t (*fop_fsync_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
+
+typedef int32_t (*fop_opendir_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, fd_t *fd, dict_t *xdata);
+
+typedef int32_t (*fop_fsyncdir_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+typedef int32_t (*fop_statfs_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct statvfs *buf,
+ dict_t *xdata);
+
+typedef int32_t (*fop_setxattr_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+typedef int32_t (*fop_getxattr_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *dict,
+ dict_t *xdata);
+
+typedef int32_t (*fop_fsetxattr_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+typedef int32_t (*fop_fgetxattr_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *dict,
+ dict_t *xdata);
+
+typedef int32_t (*fop_removexattr_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+typedef int32_t (*fop_fremovexattr_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+typedef int32_t (*fop_lk_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct gf_flock *flock,
+ dict_t *xdata);
+
+typedef int32_t (*fop_inodelk_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+typedef int32_t (*fop_finodelk_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+typedef int32_t (*fop_entrylk_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+typedef int32_t (*fop_fentrylk_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+typedef int32_t (*fop_readdir_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata);
+
+typedef int32_t (*fop_readdirp_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata);
+
+typedef int32_t (*fop_xattrop_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xattr,
+ dict_t *xdata);
+
+typedef int32_t (*fop_fxattrop_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xattr,
+ dict_t *xdata);
+
+typedef int32_t (*fop_setattr_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *preop_stbuf,
+ struct iatt *postop_stbuf, dict_t *xdata);
+
+typedef int32_t (*fop_fsetattr_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *preop_stbuf,
+ struct iatt *postop_stbuf, dict_t *xdata);
+
+typedef int32_t (*fop_fallocate_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *preop_stbuf,
+ struct iatt *postop_stbuf,
+ dict_t *xdata);
+
+typedef int32_t (*fop_discard_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *preop_stbuf,
+ struct iatt *postop_stbuf, dict_t *xdata);
+
+typedef int32_t (*fop_zerofill_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *preop_stbuf,
+ struct iatt *postop_stbuf, dict_t *xdata);
+
+typedef int32_t (*fop_ipc_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+typedef int32_t (*fop_seek_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, off_t offset,
+ dict_t *xdata);
+
+typedef int32_t (*fop_lease_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct gf_lease *lease,
+ dict_t *xdata);
+typedef int32_t (*fop_compound_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, void *data,
+ dict_t *xdata);
+
+typedef int32_t (*fop_getactivelk_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno,
+ lock_migration_info_t *locklist,
+ dict_t *xdata);
+
+typedef int32_t (*fop_setactivelk_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+typedef int32_t (*fop_put_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+typedef int32_t (*fop_icreate_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata);
+
+typedef int32_t (*fop_namelink_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
+
+typedef int32_t (*fop_copy_file_range_cbk_t)(
+ call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *stbuf, struct iatt *prebuf_dst,
+ struct iatt *postbuf_dst, dict_t *xdata);
+
+typedef int32_t (*fop_lookup_t)(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata);
+
+typedef int32_t (*fop_stat_t)(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata);
+
+typedef int32_t (*fop_fstat_t)(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *xdata);
+
+typedef int32_t (*fop_truncate_t)(call_frame_t *frame, xlator_t *this,
+ loc_t *loc, off_t offset, dict_t *xdata);
+
+typedef int32_t (*fop_ftruncate_t)(call_frame_t *frame, xlator_t *this,
+ fd_t *fd, off_t offset, dict_t *xdata);
+
+typedef int32_t (*fop_access_t)(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t mask, dict_t *xdata);
+
+typedef int32_t (*fop_readlink_t)(call_frame_t *frame, xlator_t *this,
+ loc_t *loc, size_t size, dict_t *xdata);
+
+typedef int32_t (*fop_mknod_t)(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ mode_t mode, dev_t rdev, mode_t umask,
+ dict_t *xdata);
+
+typedef int32_t (*fop_mkdir_t)(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ mode_t mode, mode_t umask, dict_t *xdata);
+
+typedef int32_t (*fop_unlink_t)(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int xflags, dict_t *xdata);
+
+typedef int32_t (*fop_rmdir_t)(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int xflags, dict_t *xdata);
+
+typedef int32_t (*fop_symlink_t)(call_frame_t *frame, xlator_t *this,
+ const char *linkname, loc_t *loc, mode_t umask,
+ dict_t *xdata);
+
+typedef int32_t (*fop_rename_t)(call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata);
+
+typedef int32_t (*fop_link_t)(call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata);
+
+typedef int32_t (*fop_create_t)(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t flags, mode_t mode, mode_t umask,
+ fd_t *fd, dict_t *xdata);
+
+/* Tell subsequent writes on the fd_t to fsync after every writev fop without
+ * requiring a fsync fop.
+ */
+#define GF_OPEN_FSYNC 0x01
+
+/* Tell write-behind to disable writing behind despite O_SYNC not being set.
+ */
+#define GF_OPEN_NOWB 0x02
+
+typedef int32_t (*fop_open_t)(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t flags, fd_t *fd, dict_t *xdata);
+
+typedef int32_t (*fop_readv_t)(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t offset, uint32_t flags,
+ dict_t *xdata);
+
+typedef int32_t (*fop_writev_t)(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count,
+ off_t offset, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata);
+
+typedef int32_t (*fop_flush_t)(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *xdata);
+
+typedef int32_t (*fop_fsync_t)(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t datasync, dict_t *xdata);
+
+typedef int32_t (*fop_opendir_t)(call_frame_t *frame, xlator_t *this,
+ loc_t *loc, fd_t *fd, dict_t *xdata);
+
+typedef int32_t (*fop_fsyncdir_t)(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t datasync, dict_t *xdata);
+
+typedef int32_t (*fop_statfs_t)(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata);
+
+typedef int32_t (*fop_setxattr_t)(call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *dict, int32_t flags,
+ dict_t *xdata);
+
+typedef int32_t (*fop_getxattr_t)(call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name, dict_t *xdata);
+
+typedef int32_t (*fop_fsetxattr_t)(call_frame_t *frame, xlator_t *this,
+ fd_t *fd, dict_t *dict, int32_t flags,
+ dict_t *xdata);
+
+typedef int32_t (*fop_fgetxattr_t)(call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata);
+
+typedef int32_t (*fop_removexattr_t)(call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name,
+ dict_t *xdata);
+
+typedef int32_t (*fop_fremovexattr_t)(call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name,
+ dict_t *xdata);
+
+typedef int32_t (*fop_lk_t)(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t cmd, struct gf_flock *flock, dict_t *xdata);
+
+typedef int32_t (*fop_inodelk_t)(call_frame_t *frame, xlator_t *this,
+ const char *volume, loc_t *loc, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata);
+
+typedef int32_t (*fop_finodelk_t)(call_frame_t *frame, xlator_t *this,
+ const char *volume, fd_t *fd, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata);
+
+typedef int32_t (*fop_entrylk_t)(call_frame_t *frame, xlator_t *this,
+ const char *volume, loc_t *loc,
+ const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata);
+
+typedef int32_t (*fop_fentrylk_t)(call_frame_t *frame, xlator_t *this,
+ const char *volume, fd_t *fd,
+ const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata);
+
+typedef int32_t (*fop_readdir_t)(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t offset, dict_t *xdata);
+
+typedef int32_t (*fop_readdirp_t)(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t offset, dict_t *xdata);
+
+typedef int32_t (*fop_xattrop_t)(call_frame_t *frame, xlator_t *this,
+ loc_t *loc, gf_xattrop_flags_t optype,
+ dict_t *xattr, dict_t *xdata);
+
+typedef int32_t (*fop_fxattrop_t)(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t optype, dict_t *xattr,
+ dict_t *xdata);
+
+typedef int32_t (*fop_setattr_t)(call_frame_t *frame, xlator_t *this,
+ loc_t *loc, struct iatt *stbuf, int32_t valid,
+ dict_t *xdata);
+
+typedef int32_t (*fop_fsetattr_t)(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid,
+ dict_t *xdata);
+
+typedef int32_t (*fop_fallocate_t)(call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int32_t keep_size, off_t offset,
+ size_t len, dict_t *xdata);
+
+typedef int32_t (*fop_discard_t)(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, size_t len, dict_t *xdata);
+
+typedef int32_t (*fop_zerofill_t)(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, off_t len, dict_t *xdata);
+
+typedef int32_t (*fop_ipc_t)(call_frame_t *frame, xlator_t *this, int32_t op,
+ dict_t *xdata);
+
+typedef int32_t (*fop_seek_t)(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, gf_seek_what_t what, dict_t *xdata);
+
+typedef int32_t (*fop_lease_t)(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct gf_lease *lease, dict_t *xdata);
+
+typedef int32_t (*fop_compound_t)(call_frame_t *frame, xlator_t *this,
+ void *args, dict_t *xdata);
+
+typedef int32_t (*fop_getactivelk_t)(call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *xdata);
+
+typedef int32_t (*fop_setactivelk_t)(call_frame_t *frame, xlator_t *this,
+ loc_t *loc,
+ lock_migration_info_t *locklist,
+ dict_t *xdata);
+
+typedef int32_t (*fop_put_t)(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ mode_t mode, mode_t umask, uint32_t flags,
+ struct iovec *vector, int32_t count, off_t offset,
+ struct iobref *iobref, dict_t *xattr,
+ dict_t *xdata);
+
+typedef int32_t (*fop_icreate_t)(call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode, dict_t *xdata);
+
+typedef int32_t (*fop_namelink_t)(call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *xdata);
+typedef int32_t (*fop_copy_file_range_t)(call_frame_t *frame, xlator_t *this,
+ fd_t *fd_in, off64_t off_in,
+ fd_t *fd_out, off64_t off_out,
+ size_t len, uint32_t flags,
+ dict_t *xdata);
+
+/* WARNING: make sure the list is in order with FOP definition in
+ `rpc/xdr/src/glusterfs-fops.x`.
+ If it is not in order, mainly the metrics related feature would be broken */
+struct xlator_fops {
+ fop_stat_t stat;
+ fop_readlink_t readlink;
+ fop_mknod_t mknod;
+ fop_mkdir_t mkdir;
+ fop_unlink_t unlink;
+ fop_rmdir_t rmdir;
+ fop_symlink_t symlink;
+ fop_rename_t rename;
+ fop_link_t link;
+ fop_truncate_t truncate;
+ fop_open_t open;
+ fop_readv_t readv;
+ fop_writev_t writev;
+ fop_statfs_t statfs;
+ fop_flush_t flush;
+ fop_fsync_t fsync;
+ fop_setxattr_t setxattr;
+ fop_getxattr_t getxattr;
+ fop_removexattr_t removexattr;
+ fop_opendir_t opendir;
+ fop_fsyncdir_t fsyncdir;
+ fop_access_t access;
+ fop_create_t create;
+ fop_ftruncate_t ftruncate;
+ fop_fstat_t fstat;
+ fop_lk_t lk;
+ fop_lookup_t lookup;
+ fop_readdir_t readdir;
+ fop_inodelk_t inodelk;
+ fop_finodelk_t finodelk;
+ fop_entrylk_t entrylk;
+ fop_fentrylk_t fentrylk;
+ fop_xattrop_t xattrop;
+ fop_fxattrop_t fxattrop;
+ fop_fgetxattr_t fgetxattr;
+ fop_fsetxattr_t fsetxattr;
+ fop_rchecksum_t rchecksum;
+ fop_setattr_t setattr;
+ fop_fsetattr_t fsetattr;
+ fop_readdirp_t readdirp;
+
+ /* These 3 are required to keep the index same as GF_FOP_##FOP */
+ void *forget_placeholder;
+ void *release_placeholder;
+ void *releasedir_placeholder;
+
+ fop_getspec_t getspec;
+ fop_fremovexattr_t fremovexattr;
+ fop_fallocate_t fallocate;
+ fop_discard_t discard;
+ fop_zerofill_t zerofill;
+ fop_ipc_t ipc;
+ fop_seek_t seek;
+ fop_lease_t lease;
+ fop_compound_t compound;
+ fop_getactivelk_t getactivelk;
+ fop_setactivelk_t setactivelk;
+ fop_put_t put;
+ fop_icreate_t icreate;
+ fop_namelink_t namelink;
+ fop_copy_file_range_t copy_file_range;
+
+ /* these entries are used for a typechecking hack in STACK_WIND _only_ */
+ /* make sure to add _cbk variables only after defining regular fops as
+ its relative position is used to get the index */
+ fop_stat_cbk_t stat_cbk;
+ fop_readlink_cbk_t readlink_cbk;
+ fop_mknod_cbk_t mknod_cbk;
+ fop_mkdir_cbk_t mkdir_cbk;
+ fop_unlink_cbk_t unlink_cbk;
+ fop_rmdir_cbk_t rmdir_cbk;
+ fop_symlink_cbk_t symlink_cbk;
+ fop_rename_cbk_t rename_cbk;
+ fop_link_cbk_t link_cbk;
+ fop_truncate_cbk_t truncate_cbk;
+ fop_open_cbk_t open_cbk;
+ fop_readv_cbk_t readv_cbk;
+ fop_writev_cbk_t writev_cbk;
+ fop_statfs_cbk_t statfs_cbk;
+ fop_flush_cbk_t flush_cbk;
+ fop_fsync_cbk_t fsync_cbk;
+ fop_setxattr_cbk_t setxattr_cbk;
+ fop_getxattr_cbk_t getxattr_cbk;
+ fop_removexattr_cbk_t removexattr_cbk;
+ fop_opendir_cbk_t opendir_cbk;
+ fop_fsyncdir_cbk_t fsyncdir_cbk;
+ fop_access_cbk_t access_cbk;
+ fop_create_cbk_t create_cbk;
+ fop_ftruncate_cbk_t ftruncate_cbk;
+ fop_fstat_cbk_t fstat_cbk;
+ fop_lk_cbk_t lk_cbk;
+ fop_lookup_cbk_t lookup_cbk;
+ fop_readdir_cbk_t readdir_cbk;
+ fop_inodelk_cbk_t inodelk_cbk;
+ fop_finodelk_cbk_t finodelk_cbk;
+ fop_entrylk_cbk_t entrylk_cbk;
+ fop_fentrylk_cbk_t fentrylk_cbk;
+ fop_xattrop_cbk_t xattrop_cbk;
+ fop_fxattrop_cbk_t fxattrop_cbk;
+ fop_fgetxattr_cbk_t fgetxattr_cbk;
+ fop_fsetxattr_cbk_t fsetxattr_cbk;
+ fop_rchecksum_cbk_t rchecksum_cbk;
+ fop_setattr_cbk_t setattr_cbk;
+ fop_fsetattr_cbk_t fsetattr_cbk;
+ fop_readdirp_cbk_t readdirp_cbk;
+
+ /* These 3 are required to keep the index same as GF_FOP_##FOP */
+ void *forget_placeholder_cbk;
+ void *release_placeholder_cbk;
+ void *releasedir_placeholder_cbk;
+
+ fop_getspec_cbk_t getspec_cbk;
+ fop_fremovexattr_cbk_t fremovexattr_cbk;
+ fop_fallocate_cbk_t fallocate_cbk;
+ fop_discard_cbk_t discard_cbk;
+ fop_zerofill_cbk_t zerofill_cbk;
+ fop_ipc_cbk_t ipc_cbk;
+ fop_seek_cbk_t seek_cbk;
+ fop_lease_cbk_t lease_cbk;
+ fop_compound_cbk_t compound_cbk;
+ fop_getactivelk_cbk_t getactivelk_cbk;
+ fop_setactivelk_cbk_t setactivelk_cbk;
+ fop_put_cbk_t put_cbk;
+ fop_icreate_cbk_t icreate_cbk;
+ fop_namelink_cbk_t namelink_cbk;
+ fop_copy_file_range_cbk_t copy_file_range_cbk;
+};
+
+typedef int32_t (*cbk_forget_t)(xlator_t *this, inode_t *inode);
+
+typedef int32_t (*cbk_release_t)(xlator_t *this, fd_t *fd);
+
+typedef int32_t (*cbk_invalidate_t)(xlator_t *this, inode_t *inode);
+
+typedef int32_t (*cbk_client_t)(xlator_t *this, client_t *client);
+
+typedef void (*cbk_ictxmerge_t)(xlator_t *this, fd_t *fd, inode_t *inode,
+ inode_t *linked_inode);
+
+typedef size_t (*cbk_inodectx_size_t)(xlator_t *this, inode_t *inode);
+
+typedef size_t (*cbk_fdctx_size_t)(xlator_t *this, fd_t *fd);
+
+typedef void (*cbk_fdclose_t)(xlator_t *this, fd_t *fd);
+
+struct xlator_cbks {
+ cbk_forget_t forget;
+ cbk_release_t release;
+ cbk_release_t releasedir;
+ cbk_invalidate_t invalidate;
+ cbk_client_t client_destroy;
+ cbk_client_t client_disconnect;
+ cbk_ictxmerge_t ictxmerge;
+ cbk_inodectx_size_t ictxsize;
+ cbk_fdctx_size_t fdctxsize;
+ cbk_fdclose_t fdclose;
+ cbk_fdclose_t fdclosedir;
+};
+
+typedef int32_t (*dumpop_priv_t)(xlator_t *this);
+
+typedef int32_t (*dumpop_inode_t)(xlator_t *this);
+
+typedef int32_t (*dumpop_fd_t)(xlator_t *this);
+
+typedef int32_t (*dumpop_inodectx_t)(xlator_t *this, inode_t *ino);
+
+typedef int32_t (*dumpop_fdctx_t)(xlator_t *this, fd_t *fd);
+
+typedef int32_t (*dumpop_priv_to_dict_t)(xlator_t *this, dict_t *dict,
+ char *brickname);
+
+typedef int32_t (*dumpop_inode_to_dict_t)(xlator_t *this, dict_t *dict);
+
+typedef int32_t (*dumpop_fd_to_dict_t)(xlator_t *this, dict_t *dict);
+
+typedef int32_t (*dumpop_inodectx_to_dict_t)(xlator_t *this, inode_t *ino,
+ dict_t *dict);
+
+typedef int32_t (*dumpop_fdctx_to_dict_t)(xlator_t *this, fd_t *fd,
+ dict_t *dict);
+
+typedef int32_t (*dumpop_eh_t)(xlator_t *this);
+
+struct xlator_dumpops {
+ dumpop_priv_t priv;
+ dumpop_inode_t inode;
+ dumpop_fd_t fd;
+ dumpop_inodectx_t inodectx;
+ dumpop_fdctx_t fdctx;
+ dumpop_priv_to_dict_t priv_to_dict;
+ dumpop_inode_to_dict_t inode_to_dict;
+ dumpop_fd_to_dict_t fd_to_dict;
+ dumpop_inodectx_to_dict_t inodectx_to_dict;
+ dumpop_fdctx_to_dict_t fdctx_to_dict;
+ dumpop_eh_t history;
+};
+
+typedef struct xlator_list {
+ xlator_t *xlator;
+ struct xlator_list *next;
+} xlator_list_t;
+
+typedef struct fop_metrics {
+ gf_atomic_t fop;
+ gf_atomic_t cbk; /* only updaed when there is failure */
+} fop_metrics_t;
+
+struct _xlator {
+ /* Built during parsing */
+ char *name;
+ char *type;
+ char *instance_name; /* Used for multi NFSd */
+ xlator_t *next;
+ xlator_t *prev;
+ xlator_list_t *parents;
+ xlator_list_t *children;
+ dict_t *options;
+
+ /* Set after doing dlopen() */
+ void *dlhandle;
+ struct xlator_fops *fops;
+ struct xlator_cbks *cbks;
+ struct xlator_dumpops *dumpops;
+ struct list_head volume_options; /* list of volume_option_t */
+
+ void (*fini)(xlator_t *this);
+ int32_t (*init)(xlator_t *this);
+ int32_t (*reconfigure)(xlator_t *this, dict_t *options);
+ int32_t (*mem_acct_init)(xlator_t *this);
+ int32_t (*dump_metrics)(xlator_t *this, int fd);
+
+ event_notify_fn_t notify;
+
+ gf_loglevel_t loglevel; /* Log level for translator */
+
+ struct {
+ struct {
+ /* for latency measurement */
+ fop_metrics_t metrics[GF_FOP_MAXVALUE];
+
+ gf_atomic_t count;
+ } total;
+
+ struct {
+ /* for latency measurement */
+ gf_latency_t latencies[GF_FOP_MAXVALUE];
+ /* for latency measurement */
+ fop_metrics_t metrics[GF_FOP_MAXVALUE];
+
+ gf_atomic_t count;
+ } interval;
+ } stats;
+
+ /* Misc */
+ eh_t *history; /* event history context */
+ glusterfs_ctx_t *ctx;
+ glusterfs_graph_t *graph; /* not set for fuse */
+ inode_table_t *itable;
+ char init_succeeded;
+ void *private;
+ struct mem_acct *mem_acct;
+ uint64_t winds;
+ char switched;
+
+ /* for the memory pool of 'frame->local' */
+ struct mem_pool *local_pool;
+ gf_boolean_t is_autoloaded;
+
+ /* Saved volfile ID (used for multiplexing) */
+ char *volfile_id;
+
+ /* Its used as an index to inode_ctx*/
+ uint32_t xl_id;
+
+ /* op_version: initialized in xlator code itself */
+ uint32_t op_version[GF_MAX_RELEASES];
+
+ /* flags: initialized in xlator code itself */
+ uint32_t flags;
+
+ /* id: unique, initialized in xlator code itself */
+ uint32_t id;
+
+ /* identifier: a full string which can unique identify the xlator */
+ char *identifier;
+
+ /* Is this pass_through? */
+ gf_boolean_t pass_through;
+ struct xlator_fops *pass_through_fops;
+
+ /* cleanup flag to avoid races during xlator cleanup */
+ uint32_t cleanup_starting;
+
+ /* flag to avoid recall of xlator_mem_cleanup for xame xlator */
+ uint32_t call_cleanup;
+
+ /* Flag to understand how this xlator is categorized */
+ gf_category_t category;
+
+ /* Variable to save xprt associated for detach brick */
+ gf_atomic_t xprtrefcnt;
+
+ /* Flag to notify got CHILD_DOWN event for detach brick */
+ uint32_t notify_down;
+
+ /* Flag to avoid throw duplicate PARENT_DOWN event */
+ uint32_t parent_down;
+};
+
+/* This would be the only structure which needs to be exported by
+ the translators. For the backward compatibility, in 4.x series
+ even the old exported fields will be supported */
+/* XXX: This struct is in use by GD2, and hence SHOULD NOT be modified.
+ * If the struct must be modified, see instructions at the comment with
+ * GD2MARKER below.
+ */
+typedef struct {
+ /* op_version: will be used by volume generation logic to figure
+ out whether to insert it in graph or no, based on cluster's
+ operating version.
+ default value: 0, which means good to insert always */
+ uint32_t op_version[GF_MAX_RELEASES];
+
+ /* flags: will be used by volume generation logic to optimize the
+ placements etc.
+ default value: 0, which means don't treat it specially */
+ uint32_t flags;
+
+ /* xlator_id: unique per xlator. make sure to have no collission
+ in this ID */
+ uint32_t xlator_id;
+
+ /* identifier: a string constant */
+ char *identifier;
+
+ /* struct options: if the translator takes any 'options' from the
+ volume file, then that should be defined here. optional. */
+ volume_option_t *options;
+
+ /* Flag to understand how this xlator is categorized */
+ gf_category_t category;
+
+ /* XXX: GD2MARKER
+ * If a new member that needs to be visible to GD2 is introduced,
+ * add it above this comment.
+ * Any other new members need to be added below this comment, or at the
+ * end of the struct
+ */
+
+ /* init(): mandatory method, will be called during the
+ graph initialization */
+ int32_t (*init)(xlator_t *this);
+
+ /* fini(): optional method, will be initialized to default
+ method which would just free the 'xlator->private' variable.
+ This method is called when the graph is no more in use, and
+ is being destroyed. Also when SIGTERM is received */
+ void (*fini)(xlator_t *this);
+
+ /* reconfigure(): optional method, will be initialized to default
+ method in case not provided by xlator. This method is called
+ when there are only option changes in xlator, and no graph change.
+ eg., a 'gluster volume set' command */
+ int32_t (*reconfigure)(xlator_t *this, dict_t *options);
+
+ /* mem_acct_init(): used for memory accounting inside of the xlator.
+ optional. called during translator initialization */
+ int32_t (*mem_acct_init)(xlator_t *this);
+
+ /* dump_metrics(): used for providing internal metrics. optional */
+ int32_t (*dump_metrics)(xlator_t *this, int fd);
+
+ /* notify(): used for handling the notification of events from either
+ the parent or child in the graph. optional. */
+ event_notify_fn_t notify;
+
+ /* struct fops: mandatory. provides all the filesystem operations
+ methods of the xlator */
+ struct xlator_fops *fops;
+ /* struct cbks: optional. provides methods to handle
+ inode forgets, and fd releases */
+ struct xlator_cbks *cbks;
+
+ /* dumpops: a structure again, with methods to dump the details.
+ optional. */
+ struct xlator_dumpops *dumpops;
+
+ /* struct pass_through_fops: optional. provides all the filesystem
+ operations which should be used if the xlator is marked as pass_through
+ */
+ /* by default, the default_fops would be used */
+ struct xlator_fops *pass_through_fops;
+} xlator_api_t;
+
+#define xlator_has_parent(xl) (xl->parents != NULL)
+
+#define XLATOR_NOTIFY(ret, _xl, params...) \
+ do { \
+ xlator_t *_old_THIS = NULL; \
+ \
+ _old_THIS = THIS; \
+ THIS = _xl; \
+ \
+ ret = _xl->notify(_xl, params); \
+ \
+ THIS = _old_THIS; \
+ } while (0);
+
+int32_t
+xlator_set_type_virtual(xlator_t *xl, const char *type);
+
+int32_t
+xlator_set_type(xlator_t *xl, const char *type);
+
+int32_t
+xlator_dynload(xlator_t *xl);
+
+xlator_t *
+file_to_xlator_tree(glusterfs_ctx_t *ctx, FILE *fp);
+
+int
+xlator_notify(xlator_t *this, int32_t event, void *data, ...);
+int
+xlator_init(xlator_t *this);
+int
+xlator_destroy(xlator_t *xl);
+
+int32_t
+xlator_tree_init(xlator_t *xl);
+int32_t
+xlator_tree_free_members(xlator_t *xl);
+int32_t
+xlator_tree_free_memacct(xlator_t *xl);
+
+void
+xlator_tree_fini(xlator_t *xl);
+
+void
+xlator_foreach(xlator_t *this, void (*fn)(xlator_t *each, void *data),
+ void *data);
+
+void
+xlator_foreach_depth_first(xlator_t *this,
+ void (*fn)(xlator_t *each, void *data), void *data);
+
+xlator_t *
+xlator_search_by_name(xlator_t *any, const char *name);
+xlator_t *
+get_xlator_by_name(xlator_t *this, char *target);
+xlator_t *
+get_xlator_by_type(xlator_t *this, char *target);
+
+void
+xlator_set_inode_lru_limit(xlator_t *this, void *data);
+
+void
+inode_destroy_notify(inode_t *inode, const char *xlname);
+
+int
+loc_copy(loc_t *dst, loc_t *src);
+int
+loc_copy_overload_parent(loc_t *dst, loc_t *src, inode_t *parent);
+#define loc_dup(src, dst) loc_copy(dst, src)
+void
+loc_wipe(loc_t *loc);
+int
+loc_path(loc_t *loc, const char *bname);
+void
+loc_gfid(loc_t *loc, uuid_t gfid);
+void
+loc_pargfid(loc_t *loc, uuid_t pargfid);
+char *
+loc_gfid_utoa(loc_t *loc);
+gf_boolean_t
+loc_is_root(loc_t *loc);
+int32_t
+loc_build_child(loc_t *child, loc_t *parent, char *name);
+gf_boolean_t
+loc_is_nameless(loc_t *loc);
+int
+xlator_mem_acct_init(xlator_t *xl, int num_types);
+void
+xlator_mem_acct_unref(struct mem_acct *mem_acct);
+int
+is_gf_log_command(xlator_t *trans, const char *name, char *value, size_t size);
+int
+glusterd_check_log_level(const char *value);
+int
+xlator_volopt_dynload(char *xlator_type, void **dl_handle,
+ volume_opt_list_t *vol_opt_handle);
+enum gf_hdsk_event_notify_op {
+ GF_EN_DEFRAG_STATUS,
+ GF_EN_MAX,
+};
+gf_boolean_t
+is_graph_topology_equal(glusterfs_graph_t *graph1, glusterfs_graph_t *graph2);
+int
+glusterfs_volfile_reconfigure(FILE *newvolfile_fp, glusterfs_ctx_t *ctx);
+
+int
+gf_volfile_reconfigure(int oldvollen, FILE *newvolfile_fp, glusterfs_ctx_t *ctx,
+ const char *oldvolfile);
+
+int
+loc_touchup(loc_t *loc, const char *name);
+
+int
+glusterfs_leaf_position(xlator_t *tgt);
+
+int
+glusterfs_reachable_leaves(xlator_t *base, dict_t *leaves);
+
+int
+xlator_subvolume_count(xlator_t *this);
+
+void
+xlator_init_lock(void);
+void
+xlator_init_unlock(void);
+int
+copy_opts_to_child(xlator_t *src, xlator_t *dst, char *glob);
+
+int
+glusterfs_delete_volfile_checksum(glusterfs_ctx_t *ctx, const char *volfile_id);
+int
+xlator_memrec_free(xlator_t *xl);
+
+void
+xlator_mem_cleanup(xlator_t *this);
+
+void
+handle_default_options(xlator_t *xl, dict_t *options);
+
+void
+gluster_graph_take_reference(xlator_t *tree);
+
+gf_boolean_t
+mgmt_is_multiplexed_daemon(char *name);
+
+gf_boolean_t
+xlator_is_cleanup_starting(xlator_t *this);
+int
+graph_total_client_xlator(glusterfs_graph_t *graph);
+#endif /* _XLATOR_H */
diff --git a/libglusterfs/src/graph-print.c b/libglusterfs/src/graph-print.c
index 66712986496..595d74330a1 100644
--- a/libglusterfs/src/graph-print.c
+++ b/libglusterfs/src/graph-print.c
@@ -8,181 +8,128 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include <sys/uio.h>
-#include "common-utils.h"
-#include "xlator.h"
-#include "graph-utils.h"
-
-
+#include "glusterfs/common-utils.h"
+#include "glusterfs/xlator.h"
+#include "glusterfs/graph-utils.h"
+#include "glusterfs/libglusterfs-messages.h"
struct gf_printer {
- ssize_t (*write) (struct gf_printer *gp, char *buf, size_t len);
- void *priv;
+ ssize_t (*write)(struct gf_printer *gp, char *buf, size_t len);
+ void *priv;
+ int len;
};
static ssize_t
-gp_write_file (struct gf_printer *gp, char *buf, size_t len)
+gp_write_file(struct gf_printer *gp, char *buf, size_t len)
{
- FILE *f = gp->priv;
+ FILE *f = gp->priv;
- if (fwrite (buf, len, 1, f) != 1) {
- gf_log ("graph-print", GF_LOG_ERROR, "fwrite failed (%s)",
- strerror (errno));
+ if (fwrite(buf, len, 1, f) != 1) {
+ gf_msg("graph-print", GF_LOG_ERROR, errno, LG_MSG_FWRITE_FAILED,
+ "fwrite failed");
- return -1;
- }
+ return -1;
+ }
- return len;
+ return len;
}
-static ssize_t
-gp_write_buf (struct gf_printer *gp, char *buf, size_t len)
+static int
+gpprintf(struct gf_printer *gp, const char *format, ...)
{
- struct iovec *iov = gp->priv;
+ va_list arg;
+ char *str = NULL;
+ int ret = 0;
- if (iov->iov_len < len) {
- gf_log ("graph-print", GF_LOG_ERROR, "buffer full");
+ va_start(arg, format);
+ ret = gf_vasprintf(&str, format, arg);
+ va_end(arg);
- return -1;
- }
+ if (ret < 0)
+ return ret;
- memcpy (iov->iov_base, buf, len);
- iov->iov_base += len;
- iov->iov_len -= len;
+ ret = gp->write(gp, str, ret);
- return len;
+ GF_FREE(str);
+
+ return ret;
}
+#define GPPRINTF(gp, fmt, ...) \
+ do { \
+ ret = gpprintf(gp, fmt, ##__VA_ARGS__); \
+ if (ret == -1) \
+ goto out; \
+ else \
+ gp->len += ret; \
+ } while (0)
+
static int
-gpprintf (struct gf_printer *gp, const char *format, ...)
+_print_volume_options(dict_t *d, char *k, data_t *v, void *tmp)
{
- va_list arg;
- char *str = NULL;
- int ret = 0;
+ struct gf_printer *gp = tmp;
+ int ret = 0;
+ GPPRINTF(gp, " option %s %s\n", k, v->data);
+ return 0;
+out:
+ /* means, it is a failure */
+ return -1;
+}
- va_start (arg, format);
- ret = gf_vasprintf (&str, format, arg);
- va_end (arg);
+static int
+glusterfs_graph_print(struct gf_printer *gp, glusterfs_graph_t *graph)
+{
+ xlator_t *trav = NULL;
+ xlator_list_t *xch = NULL;
+ int ret = 0;
+ ssize_t len = 0;
- if (ret < 0)
- return ret;
+ if (!graph->first)
+ return 0;
- ret = gp->write (gp, str, ret);
+ for (trav = graph->first; trav->next; trav = trav->next)
+ ;
+ for (; trav; trav = trav->prev) {
+ GPPRINTF(gp, "volume %s\n type %s\n", trav->name, trav->type);
- GF_FREE (str);
+ ret = dict_foreach(trav->options, _print_volume_options, gp);
+ if (ret)
+ goto out;
- return ret;
-}
+ if (trav->children) {
+ GPPRINTF(gp, " subvolumes");
-static int
-glusterfs_graph_print (struct gf_printer *gp, glusterfs_graph_t *graph)
-{
-#define GPPRINTF(gp, fmt, ...) do { \
- ret = gpprintf (gp, fmt, ## __VA_ARGS__); \
- if (ret == -1) \
- goto out; \
- else \
- len += ret; \
- } while (0)
-
- xlator_t *trav = NULL;
- data_pair_t *pair = NULL;
- xlator_list_t *xch = NULL;
- int ret = 0;
- ssize_t len = 0;
-
- if (!graph->first)
- return 0;
-
- for (trav = graph->first; trav->next; trav = trav->next);
- for (; trav; trav = trav->prev) {
- GPPRINTF (gp, "volume %s\n type %s\n", trav->name,
- trav->type);
-
- for (pair = trav->options->members_list; pair && pair->next;
- pair = pair->next);
- for (; pair; pair = pair->prev)
- GPPRINTF (gp, " option %s %s\n", pair->key,
- pair->value->data);
-
- if (trav->children) {
- GPPRINTF (gp, " subvolumes");
-
- for (xch = trav->children; xch; xch = xch->next)
- GPPRINTF (gp, " %s", xch->xlator->name);
-
- GPPRINTF (gp, "\n");
- }
-
- GPPRINTF (gp, "end-volume\n");
- if (trav != graph->first)
- GPPRINTF (gp, "\n");
+ for (xch = trav->children; xch; xch = xch->next)
+ GPPRINTF(gp, " %s", xch->xlator->name);
+
+ GPPRINTF(gp, "\n");
}
+ GPPRINTF(gp, "end-volume\n");
+ if (trav != graph->first)
+ GPPRINTF(gp, "\n");
+ }
+
out:
- if (ret == -1) {
- gf_log ("graph-print", GF_LOG_ERROR, "printing failed");
+ len = gp->len;
+ if (ret == -1) {
+ gf_msg("graph-print", GF_LOG_ERROR, 0, LG_MSG_PRINT_FAILED,
+ "printing failed");
- return -1;
- }
+ return -1;
+ }
- return len;
+ return len;
#undef GPPRINTF
}
int
-glusterfs_graph_print_file (FILE *file, glusterfs_graph_t *graph)
+glusterfs_graph_print_file(FILE *file, glusterfs_graph_t *graph)
{
- struct gf_printer gp = { .write = gp_write_file,
- .priv = file
- };
-
- return glusterfs_graph_print (&gp, graph);
-}
-
-char *
-glusterfs_graph_print_buf (glusterfs_graph_t *graph)
-{
- FILE *f = NULL;
- struct iovec iov = {0,};
- int len = 0;
- char *buf = NULL;
- struct gf_printer gp = { .write = gp_write_buf,
- .priv = &iov
- };
-
- f = fopen ("/dev/null", "a");
- if (!f) {
- gf_log ("graph-print", GF_LOG_ERROR,
- "cannot open /dev/null (%s)", strerror (errno));
-
- return NULL;
- }
- len = glusterfs_graph_print_file (f, graph);
- fclose (f);
- if (len == -1)
- return NULL;
-
- buf = GF_CALLOC (1, len + 1, gf_common_mt_graph_buf);
- if (!buf) {
- return NULL;
- }
- iov.iov_base = buf;
- iov.iov_len = len;
-
- len = glusterfs_graph_print (&gp, graph);
- if (len == -1) {
- GF_FREE (buf);
-
- return NULL;
- }
+ struct gf_printer gp = {.write = gp_write_file, .priv = file};
- return buf;
+ return glusterfs_graph_print(&gp, graph);
}
diff --git a/libglusterfs/src/graph.c b/libglusterfs/src/graph.c
index 65cbb2e8323..13f298eb3bd 100644
--- a/libglusterfs/src/graph.c
+++ b/libglusterfs/src/graph.c
@@ -8,19 +8,32 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "xlator.h"
-#include <dlfcn.h>
-#include <netdb.h>
-#include <fnmatch.h>
-#include "defaults.h"
-
-
-
+#include <stdint.h> // for uint32_t
+#include <sys/time.h> // for timeval
+#include <errno.h> // for EIO, errno, EINVAL, ENOMEM
+#include <fnmatch.h> // for fnmatch, FNM_NOESCAPE
+#include <openssl/sha.h> // for SHA256_DIGEST_LENGTH
+#include <regex.h> // for regmatch_t, regcomp
+#include <stdio.h> // for fclose, fopen, snprintf
+#include <stdlib.h> // for NULL, atoi, mkstemp
+#include <string.h> // for strcmp, strerror, memcpy
+#include <strings.h> // for rindex
+#include <sys/stat.h> // for stat
+#include <sys/time.h> // for gettimeofday
+#include <unistd.h> // for gethostname, getpid
+#include "glusterfs/common-utils.h" // for gf_strncpy, gf_time_fmt
+#include "glusterfs/defaults.h"
+#include "glusterfs/dict.h" // for dict_foreach, dict_set_...
+#include "glusterfs/globals.h" // for xlator_t, xlator_list_t
+#include "glusterfs/glusterfs.h" // for glusterfs_graph_t, glus...
+#include "glusterfs/glusterfs-fops.h" // for GF_EVENT_GRAPH_NEW, GF_...
+#include "glusterfs/libglusterfs-messages.h" // for LG_MSG_GRAPH_ERROR, LG_...
+#include "glusterfs/list.h" // for list_add, list_del_init
+#include "glusterfs/logging.h" // for gf_msg, GF_LOG_ERROR
+#include "glusterfs/mem-pool.h" // for GF_FREE, gf_strdup, GF_...
+#include "glusterfs/mem-types.h" // for gf_common_mt_xlator_list_t
+#include "glusterfs/options.h" // for xlator_tree_reconfigure
+#include "glusterfs/syscall.h" // for sys_close, sys_stat
#if 0
static void
@@ -28,20 +41,17 @@ _gf_dump_details (int argc, char **argv)
{
extern FILE *gf_log_logfile;
int i = 0;
- char timestr[256];
+ char timestr[GF_TIMESTR_SIZE];
time_t utime = 0;
- struct tm *tm = NULL;
pid_t mypid = 0;
struct utsname uname_buf = {{0, }, };
int uname_ret = -1;
- utime = time (NULL);
- tm = localtime (&utime);
mypid = getpid ();
uname_ret = uname (&uname_buf);
- /* Which git? What time? */
- strftime (timestr, 256, "%Y-%m-%d %H:%M:%S", tm);
+ utime = time (NULL);
+ gf_time_fmt (timestr, sizeof timestr, utime, gf_timefmt_FT);
fprintf (gf_log_logfile,
"========================================"
"========================================\n");
@@ -74,463 +84,1797 @@ _gf_dump_details (int argc, char **argv)
}
#endif
+int
+glusterfs_read_secure_access_file(void)
+{
+ FILE *fp = NULL;
+ char line[100] = {
+ 0,
+ };
+ int cert_depth = 1; /* Default SSL CERT DEPTH */
+ regex_t regcmpl;
+ char *key = {"^option transport.socket.ssl-cert-depth"};
+ char keyval[50] = {
+ 0,
+ };
+ int start = 0, end = 0, copy_len = 0;
+ regmatch_t result[1] = {{0}};
+
+ fp = fopen(SECURE_ACCESS_FILE, "r");
+ if (!fp)
+ goto out;
+
+ /* Check if any line matches with key */
+ while (fgets(line, sizeof(line), fp) != NULL) {
+ if (regcomp(&regcmpl, key, REG_EXTENDED)) {
+ goto out;
+ }
+ if (!regexec(&regcmpl, line, 1, result, 0)) {
+ start = result[0].rm_so;
+ end = result[0].rm_eo;
+ copy_len = end - start;
+ gf_strncpy(keyval, line + copy_len, sizeof(keyval));
+ if (keyval[0]) {
+ cert_depth = atoi(keyval);
+ if (cert_depth == 0)
+ cert_depth = 1; /* Default SSL CERT DEPTH */
+ break;
+ }
+ }
+ regfree(&regcmpl);
+ }
+out:
+ if (fp)
+ fclose(fp);
+ return cert_depth;
+}
-int
-glusterfs_xlator_link (xlator_t *pxl, xlator_t *cxl)
+xlator_t *
+glusterfs_get_last_xlator(glusterfs_graph_t *graph)
{
- xlator_list_t *xlchild = NULL;
- xlator_list_t *xlparent = NULL;
- xlator_list_t **tmp = NULL;
+ xlator_t *trav = graph->first;
+ if (!trav)
+ return NULL;
- xlparent = (void *) GF_CALLOC (1, sizeof (*xlparent),
- gf_common_mt_xlator_list_t);
- if (!xlparent)
- return -1;
+ while (trav->next)
+ trav = trav->next;
- xlchild = (void *) GF_CALLOC (1, sizeof (*xlchild),
- gf_common_mt_xlator_list_t);
- if (!xlchild) {
- GF_FREE (xlparent);
+ return trav;
+}
- return -1;
+xlator_t *
+glusterfs_mux_xlator_unlink(xlator_t *pxl, xlator_t *cxl)
+{
+ xlator_list_t *unlink = NULL;
+ xlator_list_t *prev = NULL;
+ xlator_list_t **tmp = NULL;
+ xlator_t *next_child = NULL;
+ xlator_t *xl = NULL;
+
+ for (tmp = &pxl->children; *tmp; tmp = &(*tmp)->next) {
+ if ((*tmp)->xlator == cxl) {
+ unlink = *tmp;
+ *tmp = (*tmp)->next;
+ if (*tmp)
+ next_child = (*tmp)->xlator;
+ break;
}
+ prev = *tmp;
+ }
- xlparent->xlator = pxl;
- for (tmp = &cxl->parents; *tmp; tmp = &(*tmp)->next);
- *tmp = xlparent;
+ if (!prev)
+ xl = pxl;
+ else if (prev->xlator)
+ xl = prev->xlator->graph->last_xl;
- xlchild->xlator = cxl;
- for (tmp = &pxl->children; *tmp; tmp = &(*tmp)->next);
- *tmp = xlchild;
+ if (xl)
+ xl->next = next_child;
+ if (next_child)
+ next_child->prev = xl;
- return 0;
+ GF_FREE(unlink);
+ return next_child;
}
+int
+glusterfs_xlator_link(xlator_t *pxl, xlator_t *cxl)
+{
+ xlator_list_t *xlchild = NULL;
+ xlator_list_t *xlparent = NULL;
+ xlator_list_t **tmp = NULL;
+
+ xlparent = (void *)GF_CALLOC(1, sizeof(*xlparent),
+ gf_common_mt_xlator_list_t);
+ if (!xlparent)
+ return -1;
+
+ xlchild = (void *)GF_CALLOC(1, sizeof(*xlchild),
+ gf_common_mt_xlator_list_t);
+ if (!xlchild) {
+ GF_FREE(xlparent);
+
+ return -1;
+ }
+
+ xlparent->xlator = pxl;
+ for (tmp = &cxl->parents; *tmp; tmp = &(*tmp)->next)
+ ;
+ *tmp = xlparent;
+
+ xlchild->xlator = cxl;
+ for (tmp = &pxl->children; *tmp; tmp = &(*tmp)->next)
+ ;
+ *tmp = xlchild;
+
+ return 0;
+}
void
-glusterfs_graph_set_first (glusterfs_graph_t *graph, xlator_t *xl)
+glusterfs_graph_set_first(glusterfs_graph_t *graph, xlator_t *xl)
{
- xl->next = graph->first;
- if (graph->first)
- ((xlator_t *)graph->first)->prev = xl;
- graph->first = xl;
+ xl->next = graph->first;
+ if (graph->first)
+ ((xlator_t *)graph->first)->prev = xl;
+ graph->first = xl;
- graph->xl_count++;
+ graph->xl_count++;
+ xl->xl_id = graph->xl_count;
}
-
int
-glusterfs_graph_insert (glusterfs_graph_t *graph, glusterfs_ctx_t *ctx,
- const char *type, const char *name)
+glusterfs_graph_insert(glusterfs_graph_t *graph, glusterfs_ctx_t *ctx,
+ const char *type, const char *name,
+ gf_boolean_t autoload)
{
- xlator_t *ixl = NULL;
+ xlator_t *ixl = NULL;
- if (!ctx->master) {
- gf_log ("glusterfs", GF_LOG_ERROR,
- "volume \"%s\" can be added from command line only "
- "on client side", type);
+ if (!ctx->master) {
+ gf_msg("glusterfs", GF_LOG_ERROR, 0, LG_MSG_VOLUME_ERROR,
+ "volume \"%s\" can be added from command line only "
+ "on client side",
+ type);
- return -1;
- }
+ return -1;
+ }
- ixl = GF_CALLOC (1, sizeof (*ixl), gf_common_mt_xlator_t);
- if (!ixl)
- return -1;
-
- ixl->ctx = ctx;
- ixl->graph = graph;
- ixl->options = get_new_dict ();
- if (!ixl->options)
- goto err;
-
- ixl->name = gf_strdup (name);
- if (!ixl->name)
- goto err;
-
- if (xlator_set_type (ixl, type) == -1) {
- gf_log ("glusterfs", GF_LOG_ERROR,
- "%s (%s) initialization failed",
- name, type);
- return -1;
- }
+ ixl = GF_CALLOC(1, sizeof(*ixl), gf_common_mt_xlator_t);
+ if (!ixl)
+ return -1;
- if (glusterfs_xlator_link (ixl, graph->top) == -1)
- goto err;
- glusterfs_graph_set_first (graph, ixl);
- graph->top = ixl;
+ ixl->ctx = ctx;
+ ixl->graph = graph;
+ ixl->options = dict_new();
+ if (!ixl->options)
+ goto err;
- return 0;
-err:
- xlator_destroy (ixl);
+ ixl->name = gf_strdup(name);
+ if (!ixl->name)
+ goto err;
+
+ ixl->is_autoloaded = autoload;
+
+ if (xlator_set_type(ixl, type) == -1) {
+ gf_msg("glusterfs", GF_LOG_ERROR, 0, LG_MSG_INIT_FAILED,
+ "%s (%s) initialization failed", name, type);
return -1;
+ }
+
+ if (glusterfs_xlator_link(ixl, graph->top) == -1)
+ goto err;
+ glusterfs_graph_set_first(graph, ixl);
+ graph->top = ixl;
+
+ return 0;
+err:
+ xlator_destroy(ixl);
+ return -1;
}
int
-glusterfs_graph_acl (glusterfs_graph_t *graph, glusterfs_ctx_t *ctx)
+glusterfs_graph_acl(glusterfs_graph_t *graph, glusterfs_ctx_t *ctx)
{
- int ret = 0;
- cmd_args_t *cmd_args = NULL;
+ int ret = 0;
+ cmd_args_t *cmd_args = NULL;
- cmd_args = &ctx->cmd_args;
+ cmd_args = &ctx->cmd_args;
- if (!cmd_args->acl)
- return 0;
+ if (!cmd_args->acl)
+ return 0;
- ret = glusterfs_graph_insert (graph, ctx, "system/posix-acl",
- "posix-acl-autoload");
- return ret;
+ ret = glusterfs_graph_insert(graph, ctx, "system/posix-acl",
+ "posix-acl-autoload", 1);
+ return ret;
}
int
-glusterfs_graph_worm (glusterfs_graph_t *graph, glusterfs_ctx_t *ctx)
+glusterfs_graph_worm(glusterfs_graph_t *graph, glusterfs_ctx_t *ctx)
{
- int ret = 0;
- cmd_args_t *cmd_args = NULL;
+ int ret = 0;
+ cmd_args_t *cmd_args = NULL;
- cmd_args = &ctx->cmd_args;
+ cmd_args = &ctx->cmd_args;
- if (!cmd_args->worm)
- return 0;
+ if (!cmd_args->worm)
+ return 0;
- ret = glusterfs_graph_insert (graph, ctx, "features/worm",
- "worm-autoload");
- return ret;
+ ret = glusterfs_graph_insert(graph, ctx, "features/worm", "worm-autoload",
+ 1);
+ return ret;
}
int
-glusterfs_graph_mac_compat (glusterfs_graph_t *graph, glusterfs_ctx_t *ctx)
+glusterfs_graph_meta(glusterfs_graph_t *graph, glusterfs_ctx_t *ctx)
{
- int ret = 0;
- cmd_args_t *cmd_args = NULL;
+ int ret = 0;
- cmd_args = &ctx->cmd_args;
+ if (!ctx->master)
+ return 0;
- if (cmd_args->mac_compat == GF_OPTION_DISABLE)
- return 0;
+ ret = glusterfs_graph_insert(graph, ctx, "meta", "meta-autoload", 1);
+ return ret;
+}
- ret = glusterfs_graph_insert (graph, ctx, "features/mac-compat",
- "mac-compat-autoload");
+int
+glusterfs_graph_mac_compat(glusterfs_graph_t *graph, glusterfs_ctx_t *ctx)
+{
+ int ret = 0;
+ cmd_args_t *cmd_args = NULL;
- return ret;
+ cmd_args = &ctx->cmd_args;
+
+ if (cmd_args->mac_compat == GF_OPTION_DISABLE)
+ return 0;
+
+ ret = glusterfs_graph_insert(graph, ctx, "features/mac-compat",
+ "mac-compat-autoload", 1);
+
+ return ret;
}
+int
+glusterfs_graph_gfid_access(glusterfs_graph_t *graph, glusterfs_ctx_t *ctx)
+{
+ int ret = 0;
+ cmd_args_t *cmd_args = NULL;
+
+ cmd_args = &ctx->cmd_args;
+
+ if (!cmd_args->aux_gfid_mount)
+ return 0;
+
+ ret = glusterfs_graph_insert(graph, ctx, "features/gfid-access",
+ "gfid-access-autoload", 1);
+ return ret;
+}
static void
-gf_add_cmdline_options (glusterfs_graph_t *graph, cmd_args_t *cmd_args)
-{
- int ret = 0;
- xlator_t *trav = NULL;
- xlator_cmdline_option_t *cmd_option = NULL;
-
- trav = graph->first;
-
- while (trav) {
- list_for_each_entry (cmd_option,
- &cmd_args->xlator_options, cmd_args) {
- if (!fnmatch (cmd_option->volume,
- trav->name, FNM_NOESCAPE)) {
- ret = dict_set_str (trav->options,
- cmd_option->key,
- cmd_option->value);
- if (ret == 0) {
- gf_log (trav->name, GF_LOG_INFO,
- "adding option '%s' for "
- "volume '%s' with value '%s'",
- cmd_option->key, trav->name,
- cmd_option->value);
- } else {
- gf_log (trav->name, GF_LOG_WARNING,
- "adding option '%s' for "
- "volume '%s' failed: %s",
- cmd_option->key, trav->name,
- strerror (-ret));
- }
- }
+gf_add_cmdline_options(glusterfs_graph_t *graph, cmd_args_t *cmd_args)
+{
+ int ret = 0;
+ xlator_t *trav = NULL;
+ xlator_cmdline_option_t *cmd_option = NULL;
+
+ trav = graph->first;
+
+ while (trav) {
+ list_for_each_entry(cmd_option, &cmd_args->xlator_options, cmd_args)
+ {
+ if (!fnmatch(cmd_option->volume, trav->name, FNM_NOESCAPE)) {
+ ret = dict_set_str(trav->options, cmd_option->key,
+ cmd_option->value);
+ if (ret == 0) {
+ gf_msg(trav->name, GF_LOG_TRACE, 0, LG_MSG_VOL_OPTION_ADD,
+ "adding option '%s' for "
+ "volume '%s' with value '%s'",
+ cmd_option->key, trav->name, cmd_option->value);
+ } else {
+ gf_msg(trav->name, GF_LOG_WARNING, -ret,
+ LG_MSG_VOL_OPTION_ADD,
+ "adding option '%s' for "
+ "volume '%s' failed",
+ cmd_option->key, trav->name);
}
- trav = trav->next;
+ }
}
+ trav = trav->next;
+ }
}
-
int
-glusterfs_graph_validate_options (glusterfs_graph_t *graph)
+glusterfs_graph_validate_options(glusterfs_graph_t *graph)
{
- xlator_t *trav = NULL;
- int ret = -1;
- char *errstr = NULL;
+ xlator_t *trav = NULL;
+ int ret = -1;
+ char *errstr = NULL;
- trav = graph->first;
+ trav = graph->first;
- while (trav) {
- if (list_empty (&trav->volume_options))
- continue;
+ while (trav) {
+ if (list_empty(&trav->volume_options)) {
+ trav = trav->next;
+ continue;
+ }
- ret = xlator_options_validate (trav, trav->options, &errstr);
- if (ret) {
- gf_log (trav->name, GF_LOG_ERROR,
- "validation failed: %s", errstr);
- return ret;
- }
- trav = trav->next;
+ ret = xlator_options_validate(trav, trav->options, &errstr);
+ if (ret) {
+ gf_msg(trav->name, GF_LOG_ERROR, 0, LG_MSG_VALIDATION_FAILED,
+ "validation failed: "
+ "%s",
+ errstr);
+ return ret;
}
+ trav = trav->next;
+ }
- return 0;
+ return 0;
}
-
int
-glusterfs_graph_init (glusterfs_graph_t *graph)
+glusterfs_graph_init(glusterfs_graph_t *graph)
{
- xlator_t *trav = NULL;
- int ret = -1;
+ xlator_t *trav = NULL;
+ int ret = -1;
- trav = graph->first;
+ trav = graph->first;
- while (trav) {
- ret = xlator_init (trav);
- if (ret) {
- gf_log (trav->name, GF_LOG_ERROR,
- "initializing translator failed");
- return ret;
- }
- trav = trav->next;
+ while (trav) {
+ ret = xlator_init(trav);
+ if (ret) {
+ gf_msg(trav->name, GF_LOG_ERROR, 0, LG_MSG_TRANSLATOR_INIT_FAILED,
+ "initializing translator failed");
+ return ret;
}
+ trav = trav->next;
+ }
- return 0;
+ return 0;
}
+int
+glusterfs_graph_deactivate(glusterfs_graph_t *graph)
+{
+ xlator_t *top = NULL;
-static void
-_log_if_unknown_option (dict_t *dict, char *key, data_t *value, void *data)
+ if (graph == NULL)
+ goto out;
+
+ top = graph->top;
+ xlator_tree_fini(top);
+out:
+ return 0;
+}
+
+static int
+_log_if_unknown_option(dict_t *dict, char *key, data_t *value, void *data)
{
- volume_option_t *found = NULL;
- xlator_t *xl = NULL;
+ volume_option_t *found = NULL;
+ xlator_t *xl = NULL;
- xl = data;
+ xl = data;
- found = xlator_volume_option_get (xl, key);
+ found = xlator_volume_option_get(xl, key);
- if (!found) {
- gf_log (xl->name, GF_LOG_WARNING,
- "option '%s' is not recognized", key);
- }
+ if (!found) {
+ gf_msg(xl->name, GF_LOG_DEBUG, 0, LG_MSG_XLATOR_OPTION_INVALID,
+ "option '%s' is not recognized", key);
+ }
- return;
+ return 0;
}
-
static void
-_xlator_check_unknown_options (xlator_t *xl, void *data)
+_xlator_check_unknown_options(xlator_t *xl, void *data)
{
- dict_foreach (xl->options, _log_if_unknown_option, xl);
+ dict_foreach(xl->options, _log_if_unknown_option, xl);
}
-
-int
-glusterfs_graph_unknown_options (glusterfs_graph_t *graph)
+static int
+glusterfs_graph_unknown_options(glusterfs_graph_t *graph)
{
- xlator_foreach (graph->first, _xlator_check_unknown_options, NULL);
- return 0;
+ xlator_foreach(graph->first, _xlator_check_unknown_options, NULL);
+ return 0;
}
+static void
+fill_uuid(char *uuid, int size, struct timeval tv)
+{
+ char hostname[50] = {
+ 0,
+ };
+ char now_str[GF_TIMESTR_SIZE];
+
+ if (gethostname(hostname, sizeof(hostname) - 1) != 0) {
+ gf_msg("graph", GF_LOG_ERROR, errno, LG_MSG_GETHOSTNAME_FAILED,
+ "gethostname failed");
+ hostname[sizeof(hostname) - 1] = '\0';
+ }
-void
-fill_uuid (char *uuid, int size)
+ gf_time_fmt_tv(now_str, sizeof now_str, &tv, gf_timefmt_dirent);
+ snprintf(uuid, size, "%s-%d-%s", hostname, getpid(), now_str);
+
+ return;
+}
+
+static int
+glusterfs_graph_settop(glusterfs_graph_t *graph, char *volume_name,
+ gf_boolean_t exact_match)
{
- char hostname[256] = {0,};
- struct timeval tv = {0,};
- struct tm now = {0, };
- char now_str[32];
+ int ret = -1;
+ xlator_t *trav = NULL;
- if (gettimeofday (&tv, NULL) == -1) {
- gf_log ("graph", GF_LOG_ERROR,
- "gettimeofday: failed %s",
- strerror (errno));
+ if (!volume_name || !exact_match) {
+ graph->top = graph->first;
+ ret = 0;
+ } else {
+ for (trav = graph->first; trav; trav = trav->next) {
+ if (strcmp(trav->name, volume_name) == 0) {
+ graph->top = trav;
+ ret = 0;
+ break;
+ }
}
+ }
+
+ return ret;
+}
+
+int
+glusterfs_graph_parent_up(glusterfs_graph_t *graph)
+{
+ xlator_t *trav = NULL;
+ int ret = -1;
+
+ trav = graph->first;
- if (gethostname (hostname, 256) == -1) {
- gf_log ("graph", GF_LOG_ERROR,
- "gethostname: failed %s",
- strerror (errno));
+ while (trav) {
+ if (!xlator_has_parent(trav)) {
+ ret = xlator_notify(trav, GF_EVENT_PARENT_UP, trav);
}
- localtime_r (&tv.tv_sec, &now);
- strftime (now_str, 32, "%Y/%m/%d-%H:%M:%S", &now);
- snprintf (uuid, size, "%s-%d-%s:%"GF_PRI_SUSECONDS,
- hostname, getpid(), now_str, tv.tv_usec);
+ if (ret)
+ break;
- return;
-}
+ trav = trav->next;
+ }
+ return ret;
+}
int
-glusterfs_graph_settop (glusterfs_graph_t *graph, glusterfs_ctx_t *ctx)
+glusterfs_graph_prepare(glusterfs_graph_t *graph, glusterfs_ctx_t *ctx,
+ char *volume_name)
{
- const char *volume_name = NULL;
- xlator_t *trav = NULL;
+ xlator_t *trav = NULL;
+ int ret = 0;
+
+ /* XXX: CHECKSUM */
+
+ /* XXX: attach to -n volname */
+ /* A '/' in the volume name suggests brick multiplexing is used, find
+ * the top of the (sub)graph. The volname MUST match the subvol in this
+ * case. In other cases (like for gfapi) the default top for the
+ * (sub)graph is ok. */
+ if (!volume_name) {
+ /* GlusterD does not pass a volume_name */
+ ret = glusterfs_graph_settop(graph, volume_name, _gf_false);
+ } else if (strncmp(volume_name, "/snaps/", 7) == 0) {
+ /* snap shots have their top xlator named like "/snaps/..." */
+ ret = glusterfs_graph_settop(graph, volume_name, _gf_false);
+ } else if (volume_name[0] == '/') {
+ /* brick multiplexing passes the brick path */
+ ret = glusterfs_graph_settop(graph, volume_name, _gf_true);
+ } else {
+ ret = glusterfs_graph_settop(graph, volume_name, _gf_false);
+ }
+
+ if (ret) {
+ gf_msg("graph", GF_LOG_ERROR, EINVAL, LG_MSG_GRAPH_ERROR,
+ "glusterfs graph settop failed");
+ errno = EINVAL;
+ return -1;
+ }
- volume_name = ctx->cmd_args.volume_name;
+ /* XXX: WORM VOLUME */
+ ret = glusterfs_graph_worm(graph, ctx);
+ if (ret) {
+ gf_msg("graph", GF_LOG_ERROR, 0, LG_MSG_GRAPH_ERROR,
+ "glusterfs graph worm failed");
+ return -1;
+ }
+ ret = glusterfs_graph_acl(graph, ctx);
+ if (ret) {
+ gf_msg("graph", GF_LOG_ERROR, 0, LG_MSG_GRAPH_ERROR,
+ "glusterfs graph ACL failed");
+ return -1;
+ }
- if (!volume_name) {
- graph->top = graph->first;
- return 0;
- }
+ /* XXX: MAC COMPAT */
+ ret = glusterfs_graph_mac_compat(graph, ctx);
+ if (ret) {
+ gf_msg("graph", GF_LOG_ERROR, 0, LG_MSG_GRAPH_ERROR,
+ "glusterfs graph mac compat failed");
+ return -1;
+ }
- for (trav = graph->first; trav; trav = trav->next) {
- if (strcmp (trav->name, volume_name) == 0) {
- graph->top = trav;
- return 0;
- }
- }
+ /* XXX: gfid-access */
+ ret = glusterfs_graph_gfid_access(graph, ctx);
+ if (ret) {
+ gf_msg("graph", GF_LOG_ERROR, 0, LG_MSG_GRAPH_ERROR,
+ "glusterfs graph 'gfid-access' failed");
+ return -1;
+ }
+ /* XXX: topmost xlator */
+ ret = glusterfs_graph_meta(graph, ctx);
+ if (ret) {
+ gf_msg("graph", GF_LOG_ERROR, 0, LG_MSG_GRAPH_ERROR,
+ "glusterfs graph meta failed");
return -1;
+ }
+
+ /* XXX: this->ctx setting */
+ for (trav = graph->first; trav; trav = trav->next) {
+ trav->ctx = ctx;
+ }
+
+ /* XXX: DOB setting */
+ gettimeofday(&graph->dob, NULL);
+
+ fill_uuid(graph->graph_uuid, sizeof(graph->graph_uuid), graph->dob);
+
+ graph->id = ctx->graph_id++;
+
+ /* XXX: --xlator-option additions */
+ gf_add_cmdline_options(graph, &ctx->cmd_args);
+
+ return 0;
+}
+
+static xlator_t *
+glusterfs_root(glusterfs_graph_t *graph)
+{
+ return graph->first;
+}
+
+static int
+glusterfs_is_leaf(xlator_t *xl)
+{
+ int ret = 0;
+
+ if (!xl->children)
+ ret = 1;
+
+ return ret;
+}
+
+static uint32_t
+glusterfs_count_leaves(xlator_t *xl)
+{
+ int n = 0;
+ xlator_list_t *list = NULL;
+
+ if (glusterfs_is_leaf(xl))
+ n = 1;
+ else
+ for (list = xl->children; list; list = list->next)
+ n += glusterfs_count_leaves(list->xlator);
+
+ return n;
+}
+
+int
+glusterfs_get_leaf_count(glusterfs_graph_t *graph)
+{
+ return graph->leaf_count;
}
+static int
+_glusterfs_leaf_position(xlator_t *tgt, int *id, xlator_t *xl)
+{
+ xlator_list_t *list = NULL;
+ int found = 0;
+
+ if (xl == tgt)
+ found = 1;
+ else if (glusterfs_is_leaf(xl))
+ *id += 1;
+ else
+ for (list = xl->children; !found && list; list = list->next)
+ found = _glusterfs_leaf_position(tgt, id, list->xlator);
+
+ return found;
+}
int
-glusterfs_graph_parent_up (glusterfs_graph_t *graph)
+glusterfs_leaf_position(xlator_t *tgt)
{
- xlator_t *trav = NULL;
- int ret = -1;
+ xlator_t *root = NULL;
+ int pos = 0;
- trav = graph->first;
+ root = glusterfs_root(tgt->graph);
- while (trav) {
- if (!xlator_has_parent (trav)) {
- ret = xlator_notify (trav, GF_EVENT_PARENT_UP, trav);
- }
+ if (!_glusterfs_leaf_position(tgt, &pos, root))
+ pos = -1;
- if (ret)
- break;
+ return pos;
+}
+
+static int
+_glusterfs_reachable_leaves(xlator_t *base, xlator_t *xl, dict_t *leaves)
+{
+ xlator_list_t *list = NULL;
+ int err = 1;
+ int pos = 0;
+ char *strpos = NULL;
+
+ if (glusterfs_is_leaf(xl)) {
+ pos = glusterfs_leaf_position(xl);
+ if (pos < 0)
+ goto out;
- trav = trav->next;
+ err = gf_asprintf(&strpos, "%d", pos);
+
+ if (err >= 0) {
+ err = dict_set_static_ptr(leaves, strpos, base);
+ GF_FREE(strpos);
}
+ } else {
+ for (err = 0, list = xl->children; !err && list; list = list->next)
+ err = _glusterfs_reachable_leaves(base, list->xlator, leaves);
+ }
- return ret;
+out:
+ return err;
}
+/*
+ * This function determines which leaves are children (or grandchildren)
+ * of the given base. The base may have multiple sub volumes. Each sub
+ * volumes in turn may have sub volumes.. until the leaves are reached.
+ * Each leaf is numbered 1,2,3,...etc.
+ *
+ * The base translator calls this function to see which of *its* subvolumes
+ * it would forward an FOP to, to *get to* a particular leaf.
+ * That information is built into the "leaves" dictionary.
+ * key:destination leaf# -> value:base subvolume xlator.
+ */
int
-glusterfs_graph_prepare (glusterfs_graph_t *graph, glusterfs_ctx_t *ctx)
+glusterfs_reachable_leaves(xlator_t *base, dict_t *leaves)
{
- xlator_t *trav = NULL;
- int ret = 0;
+ xlator_list_t *list = NULL;
+ int err = 0;
- /* XXX: CHECKSUM */
+ for (list = base->children; !err && list; list = list->next)
+ err = _glusterfs_reachable_leaves(list->xlator, list->xlator, leaves);
- /* XXX: attach to -n volname */
- ret = glusterfs_graph_settop (graph, ctx);
- if (ret) {
- gf_log ("graph", GF_LOG_ERROR, "glusterfs graph settop failed");
- return -1;
- }
+ return err;
+}
- /* XXX: WORM VOLUME */
- ret = glusterfs_graph_worm (graph, ctx);
- if (ret) {
- gf_log ("graph", GF_LOG_ERROR, "glusterfs graph worm failed");
- return -1;
- }
- ret = glusterfs_graph_acl (graph, ctx);
+int
+glusterfs_graph_activate(glusterfs_graph_t *graph, glusterfs_ctx_t *ctx)
+{
+ int ret = 0;
+ xlator_t *root = NULL;
+
+ root = glusterfs_root(graph);
+
+ graph->leaf_count = glusterfs_count_leaves(root);
+
+ /* XXX: all xlator options validation */
+ ret = glusterfs_graph_validate_options(graph);
+ if (ret) {
+ gf_msg("graph", GF_LOG_ERROR, 0, LG_MSG_VALIDATION_FAILED,
+ "validate options failed");
+ return ret;
+ }
+
+ /* XXX: perform init () */
+ ret = glusterfs_graph_init(graph);
+ if (ret) {
+ gf_msg("graph", GF_LOG_ERROR, 0, LG_MSG_GRAPH_INIT_FAILED,
+ "init failed");
+ return ret;
+ }
+
+ ret = glusterfs_graph_unknown_options(graph);
+ if (ret) {
+ gf_msg("graph", GF_LOG_ERROR, 0, LG_MSG_UNKNOWN_OPTIONS_FAILED,
+ "unknown options "
+ "failed");
+ return ret;
+ }
+
+ /* XXX: log full graph (_gf_dump_details) */
+
+ list_add(&graph->list, &ctx->graphs);
+ ctx->active = graph;
+
+ /* XXX: attach to master and set active pointer */
+ if (ctx->master) {
+ ret = xlator_notify(ctx->master, GF_EVENT_GRAPH_NEW, graph);
if (ret) {
- gf_log ("graph", GF_LOG_ERROR, "glusterfs graph ACL failed");
- return -1;
+ gf_msg("graph", GF_LOG_ERROR, 0, LG_MSG_EVENT_NOTIFY_FAILED,
+ "graph new notification failed");
+ return ret;
}
+ ((xlator_t *)ctx->master)->next = graph->top;
+ }
+
+ /* XXX: perform parent up */
+ ret = glusterfs_graph_parent_up(graph);
+ if (ret) {
+ gf_msg("graph", GF_LOG_ERROR, 0, LG_MSG_EVENT_NOTIFY_FAILED,
+ "parent up notification failed");
+ return ret;
+ }
+
+ return 0;
+}
+
+int
+xlator_equal_rec(xlator_t *xl1, xlator_t *xl2)
+{
+ xlator_list_t *trav1 = NULL;
+ xlator_list_t *trav2 = NULL;
+ int ret = 0;
- /* XXX: MAC COMPAT */
- ret = glusterfs_graph_mac_compat (graph, ctx);
+ if (xl1 == NULL || xl2 == NULL) {
+ gf_msg_debug("xlator", 0, "invalid argument");
+ return -1;
+ }
+
+ trav1 = xl1->children;
+ trav2 = xl2->children;
+
+ while (trav1 && trav2) {
+ ret = xlator_equal_rec(trav1->xlator, trav2->xlator);
if (ret) {
- gf_log ("graph", GF_LOG_ERROR, "glusterfs graph mac compat failed");
- return -1;
+ gf_msg_debug("glusterfsd-mgmt", 0,
+ "xlators children "
+ "not equal");
+ goto out;
}
- /* XXX: this->ctx setting */
- for (trav = graph->first; trav; trav = trav->next) {
- trav->ctx = ctx;
- }
+ trav1 = trav1->next;
+ trav2 = trav2->next;
+ }
+
+ if (trav1 || trav2) {
+ ret = -1;
+ goto out;
+ }
+
+ if (strcmp(xl1->name, xl2->name)) {
+ ret = -1;
+ goto out;
+ }
+
+ /* type could have changed even if xlator names match,
+ e.g cluster/distribute and cluster/nufa share the same
+ xlator name
+ */
+ if (strcmp(xl1->type, xl2->type)) {
+ ret = -1;
+ goto out;
+ }
+out:
+ return ret;
+}
- /* XXX: DOB setting */
- gettimeofday (&graph->dob, NULL);
+gf_boolean_t
+is_graph_topology_equal(glusterfs_graph_t *graph1, glusterfs_graph_t *graph2)
+{
+ xlator_t *trav1 = NULL;
+ xlator_t *trav2 = NULL;
+ gf_boolean_t ret = _gf_true;
+ xlator_list_t *ltrav;
+
+ trav1 = graph1->first;
+ trav2 = graph2->first;
+
+ if (strcmp(trav2->type, "protocol/server") == 0) {
+ trav2 = trav2->children->xlator;
+ for (ltrav = trav1->children; ltrav; ltrav = ltrav->next) {
+ trav1 = ltrav->xlator;
+ if (!trav1->cleanup_starting && !strcmp(trav1->name, trav2->name)) {
+ break;
+ }
+ }
+ if (!ltrav) {
+ return _gf_false;
+ }
+ }
- fill_uuid (graph->graph_uuid, 128);
+ ret = xlator_equal_rec(trav1, trav2);
- graph->id = ctx->graph_id++;
+ if (ret) {
+ gf_msg_debug("glusterfsd-mgmt", 0, "graphs are not equal");
+ ret = _gf_false;
+ goto out;
+ }
- /* XXX: --xlator-option additions */
- gf_add_cmdline_options (graph, &ctx->cmd_args);
+ ret = _gf_true;
+ gf_msg_debug("glusterfsd-mgmt", 0, "graphs are equal");
+out:
+ return ret;
+}
- return 0;
+/* Function has 3types of return value 0, -ve , 1
+ * return 0 =======> reconfiguration of options has succeeded
+ * return 1 =======> the graph has to be reconstructed and all the
+ * xlators should be inited return -1(or -ve) =======> Some Internal Error
+ * occurred during the operation
+ */
+int
+glusterfs_volfile_reconfigure(FILE *newvolfile_fp, glusterfs_ctx_t *ctx)
+{
+ glusterfs_graph_t *oldvolfile_graph = NULL;
+ glusterfs_graph_t *newvolfile_graph = NULL;
+
+ int ret = -1;
+
+ if (!ctx) {
+ gf_msg("glusterfsd-mgmt", GF_LOG_ERROR, 0, LG_MSG_CTX_NULL,
+ "ctx is NULL");
+ goto out;
+ }
+
+ oldvolfile_graph = ctx->active;
+ if (!oldvolfile_graph) {
+ ret = 1;
+ goto out;
+ }
+
+ newvolfile_graph = glusterfs_graph_construct(newvolfile_fp);
+
+ if (!newvolfile_graph) {
+ goto out;
+ }
+
+ glusterfs_graph_prepare(newvolfile_graph, ctx, ctx->cmd_args.volume_name);
+
+ if (!is_graph_topology_equal(oldvolfile_graph, newvolfile_graph)) {
+ ret = 1;
+ gf_msg_debug("glusterfsd-mgmt", 0,
+ "Graph topology not "
+ "equal(should call INIT)");
+ goto out;
+ }
+
+ gf_msg_debug("glusterfsd-mgmt", 0,
+ "Only options have changed in the"
+ " new graph");
+
+ ret = glusterfs_graph_reconfigure(oldvolfile_graph, newvolfile_graph);
+ if (ret) {
+ gf_msg_debug("glusterfsd-mgmt", 0,
+ "Could not reconfigure "
+ "new options in old graph");
+ goto out;
+ }
+
+ ret = 0;
+out:
+
+ if (newvolfile_graph)
+ glusterfs_graph_destroy(newvolfile_graph);
+
+ return ret;
}
+/* This function need to remove. This added to support gfapi volfile
+ * reconfigure.
+ */
int
-glusterfs_graph_activate (glusterfs_graph_t *graph, glusterfs_ctx_t *ctx)
+gf_volfile_reconfigure(int oldvollen, FILE *newvolfile_fp, glusterfs_ctx_t *ctx,
+ const char *oldvolfile)
{
- int ret = 0;
+ glusterfs_graph_t *oldvolfile_graph = NULL;
+ glusterfs_graph_t *newvolfile_graph = NULL;
+ FILE *oldvolfile_fp = NULL;
+ /*Since the function mkstemp() replaces XXXXXX,
+ * assigning it to a variable
+ */
+ char temp_file[] = "/tmp/temp_vol_file_XXXXXX";
+ gf_boolean_t active_graph_found = _gf_true;
+
+ int ret = -1;
+ int u_ret = -1;
+ int file_desc = -1;
+
+ if (!oldvollen) {
+ ret = 1; // Has to call INIT for the whole graph
+ goto out;
+ }
+
+ if (!ctx) {
+ gf_msg("glusterfsd-mgmt", GF_LOG_ERROR, 0, LG_MSG_CTX_NULL,
+ "ctx is NULL");
+ goto out;
+ }
+
+ oldvolfile_graph = ctx->active;
+ if (!oldvolfile_graph) {
+ active_graph_found = _gf_false;
+ gf_msg("glusterfsd-mgmt", GF_LOG_ERROR, 0, LG_MSG_ACTIVE_GRAPH_NULL,
+ "glusterfs_ctx->active is NULL");
+
+ /* coverity[secure_temp] mkstemp uses 0600 as the mode and is safe */
+ file_desc = mkstemp(temp_file);
+ if (file_desc < 0) {
+ gf_msg("glusterfsd-mgmt", GF_LOG_ERROR, errno,
+ LG_MSG_TMPFILE_CREATE_FAILED,
+ "Unable to "
+ "create temporary volfile");
+ goto out;
+ }
- /* XXX: all xlator options validation */
- ret = glusterfs_graph_validate_options (graph);
- if (ret) {
- gf_log ("graph", GF_LOG_ERROR, "validate options failed");
- return ret;
+ /*Calling unlink so that when the file is closed or program
+ *terminates the tempfile is deleted.
+ */
+ u_ret = sys_unlink(temp_file);
+
+ if (u_ret < 0) {
+ gf_msg("glusterfsd-mgmt", GF_LOG_ERROR, errno,
+ LG_MSG_TMPFILE_DELETE_FAILED,
+ "Temporary file"
+ " delete failed.");
+ sys_close(file_desc);
+ goto out;
}
- /* XXX: perform init () */
- ret = glusterfs_graph_init (graph);
- if (ret) {
- gf_log ("graph", GF_LOG_ERROR, "init failed");
- return ret;
+ oldvolfile_fp = fdopen(file_desc, "w+b");
+ if (!oldvolfile_fp)
+ goto out;
+
+ fwrite(oldvolfile, oldvollen, 1, oldvolfile_fp);
+ fflush(oldvolfile_fp);
+ if (ferror(oldvolfile_fp)) {
+ goto out;
}
- ret = glusterfs_graph_unknown_options (graph);
- if (ret) {
- gf_log ("graph", GF_LOG_ERROR, "unknown options failed");
- return ret;
+ oldvolfile_graph = glusterfs_graph_construct(oldvolfile_fp);
+ if (!oldvolfile_graph)
+ goto out;
+ }
+
+ newvolfile_graph = glusterfs_graph_construct(newvolfile_fp);
+ if (!newvolfile_graph) {
+ goto out;
+ }
+
+ glusterfs_graph_prepare(newvolfile_graph, ctx, ctx->cmd_args.volume_name);
+
+ if (!is_graph_topology_equal(oldvolfile_graph, newvolfile_graph)) {
+ ret = 1;
+ gf_msg_debug("glusterfsd-mgmt", 0,
+ "Graph topology not "
+ "equal(should call INIT)");
+ goto out;
+ }
+
+ gf_msg_debug("glusterfsd-mgmt", 0,
+ "Only options have changed in the"
+ " new graph");
+
+ /* */
+ ret = glusterfs_graph_reconfigure(oldvolfile_graph, newvolfile_graph);
+ if (ret) {
+ gf_msg_debug("glusterfsd-mgmt", 0,
+ "Could not reconfigure "
+ "new options in old graph");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (oldvolfile_fp)
+ fclose(oldvolfile_fp);
+
+ /* Do not simply destroy the old graph here. If the oldgraph
+ is constructed here in this function itself instead of getting
+ it from ctx->active (which happens only of ctx->active is NULL),
+ then destroy the old graph. If some i/o is still happening in
+ the old graph and the old graph is obtained from ctx->active,
+ then destroying the graph will cause problems.
+ */
+ if (!active_graph_found && oldvolfile_graph)
+ glusterfs_graph_destroy(oldvolfile_graph);
+ if (newvolfile_graph)
+ glusterfs_graph_destroy(newvolfile_graph);
+
+ return ret;
+}
+
+int
+glusterfs_graph_reconfigure(glusterfs_graph_t *oldgraph,
+ glusterfs_graph_t *newgraph)
+{
+ xlator_t *old_xl = NULL;
+ xlator_t *new_xl = NULL;
+ xlator_list_t *trav;
+
+ GF_ASSERT(oldgraph);
+ GF_ASSERT(newgraph);
+
+ old_xl = oldgraph->first;
+ while (old_xl->is_autoloaded) {
+ old_xl = old_xl->children->xlator;
+ }
+
+ new_xl = newgraph->first;
+ while (new_xl->is_autoloaded) {
+ new_xl = new_xl->children->xlator;
+ }
+
+ if (strcmp(old_xl->type, "protocol/server") != 0) {
+ return xlator_tree_reconfigure(old_xl, new_xl);
+ }
+
+ /* Some options still need to be handled by the server translator. */
+ if (old_xl->reconfigure) {
+ old_xl->reconfigure(old_xl, new_xl->options);
+ }
+
+ (void)copy_opts_to_child(new_xl, FIRST_CHILD(new_xl), "*auth*");
+ new_xl = FIRST_CHILD(new_xl);
+
+ for (trav = old_xl->children; trav; trav = trav->next) {
+ if (!trav->xlator->cleanup_starting &&
+ !strcmp(trav->xlator->name, new_xl->name)) {
+ return xlator_tree_reconfigure(trav->xlator, new_xl);
}
+ }
+
+ return -1;
+}
+
+int
+glusterfs_graph_destroy_residual(glusterfs_graph_t *graph)
+{
+ int ret = -1;
- /* XXX: log full graph (_gf_dump_details) */
+ if (graph == NULL)
+ return ret;
- list_add (&graph->list, &ctx->graphs);
- ctx->active = graph;
+ ret = xlator_tree_free_memacct(graph->first);
- /* XXX: attach to master and set active pointer */
- if (ctx->master)
- ret = xlator_notify (ctx->master, GF_EVENT_GRAPH_NEW, graph);
- if (ret) {
- gf_log ("graph", GF_LOG_ERROR, "graph new notification failed");
- return ret;
+ list_del_init(&graph->list);
+ pthread_mutex_destroy(&graph->mutex);
+ pthread_cond_destroy(&graph->child_down_cond);
+ GF_FREE(graph);
+
+ return ret;
+}
+
+/* This function destroys all the xlator members except for the
+ * xlator strcuture and its mem accounting field.
+ *
+ * If otherwise, it would destroy the master xlator object as well
+ * its mem accounting, which would mean after calling glusterfs_graph_destroy()
+ * there cannot be any reference to GF_FREE() from the master xlator, this is
+ * not possible because of the following dependencies:
+ * - glusterfs_ctx_t will have mem pools allocated by the master xlators
+ * - xlator objects will have references to those mem pools(g: dict)
+ *
+ * Ordering the freeing in any of the order will also not solve the dependency:
+ * - Freeing xlator objects(including memory accounting) before mem pools
+ * destruction will mean not use GF_FREE while destroying mem pools.
+ * - Freeing mem pools and then destroying xlator objects would lead to crashes
+ * when xlator tries to unref dict or other mem pool objects.
+ *
+ * Hence the way chosen out of this interdependency is to split xlator object
+ * free into two stages:
+ * - Free all the xlator members excpet for its mem accounting structure
+ * - Free all the mem accouting structures of xlator along with the xlator
+ * object itself.
+ */
+int
+glusterfs_graph_destroy(glusterfs_graph_t *graph)
+{
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO("graph", graph, out);
+
+ ret = xlator_tree_free_members(graph->first);
+
+ ret = glusterfs_graph_destroy_residual(graph);
+out:
+ return ret;
+}
+
+int
+glusterfs_graph_fini(glusterfs_graph_t *graph)
+{
+ xlator_t *trav = NULL;
+
+ trav = graph->first;
+
+ while (trav) {
+ if (trav->init_succeeded) {
+ trav->cleanup_starting = 1;
+ trav->fini(trav);
+ if (trav->local_pool) {
+ mem_pool_destroy(trav->local_pool);
+ trav->local_pool = NULL;
+ }
+ if (trav->itable) {
+ inode_table_destroy(trav->itable);
+ trav->itable = NULL;
+ }
+ trav->init_succeeded = 0;
}
+ trav = trav->next;
+ }
- /* XXX: perform parent up */
- ret = glusterfs_graph_parent_up (graph);
- if (ret) {
- gf_log ("graph", GF_LOG_ERROR, "parent up notification failed");
- return ret;
+ return 0;
+}
+
+int
+glusterfs_graph_attach(glusterfs_graph_t *orig_graph, char *path,
+ glusterfs_graph_t **newgraph)
+{
+ xlator_t *this = THIS;
+ FILE *fp;
+ glusterfs_graph_t *graph;
+ xlator_t *xl;
+ char *volfile_id = NULL;
+ char *volfile_content = NULL;
+ struct stat stbuf = {
+ 0,
+ };
+ size_t file_len = -1;
+ gf_volfile_t *volfile_obj = NULL;
+ int ret = -1;
+ char sha256_hash[SHA256_DIGEST_LENGTH] = {
+ 0,
+ };
+
+ if (!orig_graph) {
+ return -EINVAL;
+ }
+
+ ret = sys_stat(path, &stbuf);
+ if (ret < 0) {
+ gf_log(THIS->name, GF_LOG_ERROR, "Unable to stat %s (%s)", path,
+ strerror(errno));
+ return -EINVAL;
+ }
+
+ file_len = stbuf.st_size;
+ volfile_content = GF_MALLOC(file_len + 1, gf_common_mt_char);
+ if (!volfile_content)
+ return -ENOMEM;
+
+ fp = fopen(path, "r");
+ if (!fp) {
+ gf_log(THIS->name, GF_LOG_WARNING, "oops, %s disappeared on us", path);
+ GF_FREE(volfile_content);
+ return -EIO;
+ }
+
+ ret = fread(volfile_content, sizeof(char), file_len, fp);
+ if (ret == file_len) {
+ glusterfs_compute_sha256((const unsigned char *)volfile_content,
+ file_len, sha256_hash);
+ } else {
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "read failed on path %s. File size=%" GF_PRI_SIZET
+ "read size=%d",
+ path, file_len, ret);
+ GF_FREE(volfile_content);
+ fclose(fp);
+ return -EIO;
+ }
+
+ GF_FREE(volfile_content);
+
+ graph = glusterfs_graph_construct(fp);
+ fclose(fp);
+ if (!graph) {
+ gf_log(this->name, GF_LOG_WARNING, "could not create graph from %s",
+ path);
+ return -EIO;
+ }
+
+ /*
+ * If there's a server translator on top, we want whatever's below
+ * that.
+ */
+ xl = graph->first;
+ if (strcmp(xl->type, "protocol/server") == 0) {
+ (void)copy_opts_to_child(xl, FIRST_CHILD(xl), "*auth*");
+ xl = FIRST_CHILD(xl);
+ }
+ graph->first = xl;
+ *newgraph = graph;
+
+ volfile_id = strstr(path, "/snaps/");
+ if (!volfile_id) {
+ volfile_id = rindex(path, '/');
+ if (volfile_id) {
+ ++volfile_id;
+ }
+ }
+ if (volfile_id) {
+ xl->volfile_id = gf_strdup(volfile_id);
+ /* There's a stray ".vol" at the end. */
+ xl->volfile_id[strlen(xl->volfile_id) - 4] = '\0';
+ }
+
+ /* TODO memory leaks everywhere need to free graph in case of error */
+ if (glusterfs_graph_prepare(graph, this->ctx, xl->name)) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "failed to prepare graph for xlator %s", xl->name);
+ return -EIO;
+ } else if (glusterfs_graph_init(graph)) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "failed to initialize graph for xlator %s", xl->name);
+ return -EIO;
+ } else if (glusterfs_xlator_link(orig_graph->top, graph->top)) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "failed to link the graphs for xlator %s ", xl->name);
+ return -EIO;
+ }
+
+ if (!volfile_obj) {
+ volfile_obj = GF_CALLOC(1, sizeof(gf_volfile_t), gf_common_volfile_t);
+ if (!volfile_obj) {
+ return -EIO;
}
+ }
+
+ INIT_LIST_HEAD(&volfile_obj->volfile_list);
+ snprintf(volfile_obj->vol_id, sizeof(volfile_obj->vol_id), "%s",
+ xl->volfile_id);
+ memcpy(volfile_obj->volfile_checksum, sha256_hash,
+ sizeof(volfile_obj->volfile_checksum));
+ list_add(&volfile_obj->volfile_list, &this->ctx->volfile_list);
+
+ return 0;
+}
+int
+glusterfs_muxsvc_cleanup_parent(glusterfs_ctx_t *ctx,
+ glusterfs_graph_t *parent_graph)
+{
+ if (parent_graph) {
+ if (parent_graph->first) {
+ xlator_destroy(parent_graph->first);
+ }
+ ctx->active = NULL;
+ GF_FREE(parent_graph);
+ parent_graph = NULL;
+ }
+ return 0;
+}
+void *
+glusterfs_graph_cleanup(void *arg)
+{
+ glusterfs_graph_t *graph = NULL;
+ glusterfs_ctx_t *ctx = THIS->ctx;
+ int ret = -1;
+ graph = arg;
+
+ if (!graph)
+ return NULL;
+
+ /* To destroy the graph, fitst sent a GF_EVENT_PARENT_DOWN
+ * Then wait for GF_EVENT_CHILD_DOWN to get on the top
+ * xl. Once we have GF_EVENT_CHILD_DOWN event, then proceed
+ * to fini.
+ *
+ * During fini call, this will take a last unref on rpc and
+ * rpc_transport_object.
+ */
+ if (graph->first)
+ default_notify(graph->first, GF_EVENT_PARENT_DOWN, graph->first);
+
+ ret = pthread_mutex_lock(&graph->mutex);
+ if (ret != 0) {
+ gf_msg("glusterfs", GF_LOG_ERROR, EAGAIN, LG_MSG_GRAPH_CLEANUP_FAILED,
+ "Failed to acquire a lock");
+ goto out;
+ }
+ /* check and wait for CHILD_DOWN for top xlator*/
+ while (graph->used) {
+ ret = pthread_cond_wait(&graph->child_down_cond, &graph->mutex);
+ if (ret != 0)
+ gf_msg("glusterfs", GF_LOG_INFO, 0, LG_MSG_GRAPH_CLEANUP_FAILED,
+ "cond wait failed ");
+ }
+
+ ret = pthread_mutex_unlock(&graph->mutex);
+ if (ret != 0) {
+ gf_msg("glusterfs", GF_LOG_ERROR, EAGAIN, LG_MSG_GRAPH_CLEANUP_FAILED,
+ "Failed to release a lock");
+ }
+
+ /* Though we got a child down on top xlator, we have to wait until
+ * all the notifier to exit. Because there should not be any threads
+ * that access xl variables.
+ */
+ pthread_mutex_lock(&ctx->notify_lock);
+ {
+ while (ctx->notifying)
+ pthread_cond_wait(&ctx->notify_cond, &ctx->notify_lock);
+ }
+ pthread_mutex_unlock(&ctx->notify_lock);
+
+ pthread_mutex_lock(&ctx->cleanup_lock);
+ {
+ glusterfs_graph_fini(graph);
+ glusterfs_graph_destroy(graph);
+ }
+ pthread_mutex_unlock(&ctx->cleanup_lock);
+out:
+ return NULL;
+}
+
+glusterfs_graph_t *
+glusterfs_muxsvc_setup_parent_graph(glusterfs_ctx_t *ctx, char *name,
+ char *type)
+{
+ glusterfs_graph_t *parent_graph = NULL;
+ xlator_t *ixl = NULL;
+ int ret = -1;
+ parent_graph = GF_CALLOC(1, sizeof(*parent_graph),
+ gf_common_mt_glusterfs_graph_t);
+ if (!parent_graph)
+ goto out;
+
+ INIT_LIST_HEAD(&parent_graph->list);
+
+ ctx->active = parent_graph;
+ ixl = GF_CALLOC(1, sizeof(*ixl), gf_common_mt_xlator_t);
+ if (!ixl)
+ goto out;
+
+ ixl->ctx = ctx;
+ ixl->graph = parent_graph;
+ ixl->options = dict_new();
+ if (!ixl->options)
+ goto out;
+
+ ixl->name = gf_strdup(name);
+ if (!ixl->name)
+ goto out;
+
+ ixl->is_autoloaded = 1;
+
+ if (xlator_set_type(ixl, type) == -1) {
+ gf_msg("glusterfs", GF_LOG_ERROR, EINVAL, LG_MSG_GRAPH_SETUP_FAILED,
+ "%s (%s) set type failed", name, type);
+ goto out;
+ }
+
+ glusterfs_graph_set_first(parent_graph, ixl);
+ parent_graph->top = ixl;
+ ixl = NULL;
+
+ gettimeofday(&parent_graph->dob, NULL);
+ fill_uuid(parent_graph->graph_uuid, 128, parent_graph->dob);
+ parent_graph->id = ctx->graph_id++;
+ ret = 0;
+out:
+ if (ixl)
+ xlator_destroy(ixl);
+
+ if (ret) {
+ glusterfs_muxsvc_cleanup_parent(ctx, parent_graph);
+ parent_graph = NULL;
+ }
+ return parent_graph;
+}
+
+int
+glusterfs_svc_mux_pidfile_cleanup(gf_volfile_t *volfile_obj)
+{
+ if (!volfile_obj || !volfile_obj->pidfp)
return 0;
+
+ gf_msg_trace("glusterfsd", 0, "pidfile %s cleanup", volfile_obj->vol_id);
+
+ lockf(fileno(volfile_obj->pidfp), F_ULOCK, 0);
+ fclose(volfile_obj->pidfp);
+ volfile_obj->pidfp = NULL;
+
+ return 0;
}
int
-glusterfs_graph_reconfigure (glusterfs_graph_t *oldgraph,
- glusterfs_graph_t *newgraph)
+glusterfs_process_svc_detach(glusterfs_ctx_t *ctx, gf_volfile_t *volfile_obj)
{
- xlator_t *old_xl = NULL;
- xlator_t *new_xl = NULL;
+ xlator_t *last_xl = NULL;
+ glusterfs_graph_t *graph = NULL;
+ glusterfs_graph_t *parent_graph = NULL;
+ pthread_t clean_graph = {
+ 0,
+ };
+ int ret = -1;
+ xlator_t *xl = NULL;
+
+ if (!ctx || !ctx->active || !volfile_obj)
+ goto out;
+
+ pthread_mutex_lock(&ctx->cleanup_lock);
+ {
+ parent_graph = ctx->active;
+ graph = volfile_obj->graph;
+ if (!graph)
+ goto unlock;
+ if (graph->first)
+ xl = graph->first;
+
+ last_xl = graph->last_xl;
+ if (last_xl)
+ last_xl->next = NULL;
+ if (!xl || xl->cleanup_starting)
+ goto unlock;
+
+ xl->cleanup_starting = 1;
+ gf_msg("mgmt", GF_LOG_INFO, 0, LG_MSG_GRAPH_DETACH_STARTED,
+ "detaching child %s", volfile_obj->vol_id);
+
+ list_del_init(&volfile_obj->volfile_list);
+ glusterfs_mux_xlator_unlink(parent_graph->top, xl);
+ glusterfs_svc_mux_pidfile_cleanup(volfile_obj);
+ parent_graph->last_xl = glusterfs_get_last_xlator(parent_graph);
+ parent_graph->xl_count -= graph->xl_count;
+ parent_graph->leaf_count -= graph->leaf_count;
+ parent_graph->id++;
+ ret = 0;
+ }
+unlock:
+ pthread_mutex_unlock(&ctx->cleanup_lock);
+out:
+ if (!ret) {
+ list_del_init(&volfile_obj->volfile_list);
+ if (graph) {
+ ret = gf_thread_create_detached(
+ &clean_graph, glusterfs_graph_cleanup, graph, "graph_clean");
+ if (ret) {
+ gf_msg("glusterfs", GF_LOG_ERROR, EINVAL,
+ LG_MSG_GRAPH_CLEANUP_FAILED,
+ "%s failed to create clean "
+ "up thread",
+ volfile_obj->vol_id);
+ ret = 0;
+ }
+ }
+ GF_FREE(volfile_obj);
+ }
+ return ret;
+}
- GF_ASSERT (oldgraph);
- GF_ASSERT (newgraph);
+int
+glusterfs_svc_mux_pidfile_setup(gf_volfile_t *volfile_obj, const char *pid_file)
+{
+ int ret = -1;
+ FILE *pidfp = NULL;
+
+ if (!pid_file || !volfile_obj)
+ goto out;
+
+ if (volfile_obj->pidfp) {
+ ret = 0;
+ goto out;
+ }
+ pidfp = fopen(pid_file, "a+");
+ if (!pidfp) {
+ goto out;
+ }
+ volfile_obj->pidfp = pidfp;
+
+ ret = lockf(fileno(pidfp), F_TLOCK, 0);
+ if (ret) {
+ ret = 0;
+ goto out;
+ }
+out:
+ return ret;
+}
- old_xl = oldgraph->first;
- new_xl = newgraph->first;
+int
+glusterfs_svc_mux_pidfile_update(gf_volfile_t *volfile_obj,
+ const char *pid_file, pid_t pid)
+{
+ int ret = 0;
+ FILE *pidfp = NULL;
+ int old_pid;
+
+ if (!volfile_obj->pidfp) {
+ ret = glusterfs_svc_mux_pidfile_setup(volfile_obj, pid_file);
+ if (ret == -1)
+ goto out;
+ }
+ pidfp = volfile_obj->pidfp;
+ ret = fscanf(pidfp, "%d", &old_pid);
+ if (ret <= 0) {
+ goto update;
+ }
+ if (old_pid == pid) {
+ ret = 0;
+ goto out;
+ } else {
+ gf_msg("mgmt", GF_LOG_INFO, 0, LG_MSG_GRAPH_ATTACH_PID_FILE_UPDATED,
+ "Old pid=%d found in pidfile %s. Cleaning the old pid and "
+ "Updating new pid=%d",
+ old_pid, pid_file, pid);
+ }
+update:
+ ret = sys_ftruncate(fileno(pidfp), 0);
+ if (ret) {
+ gf_msg("glusterfsd", GF_LOG_ERROR, errno,
+ LG_MSG_GRAPH_ATTACH_PID_FILE_UPDATED,
+ "pidfile %s truncation failed", pid_file);
+ goto out;
+ }
+
+ ret = fprintf(pidfp, "%d\n", pid);
+ if (ret <= 0) {
+ gf_msg("glusterfsd", GF_LOG_ERROR, errno,
+ LG_MSG_GRAPH_ATTACH_PID_FILE_UPDATED, "pidfile %s write failed",
+ pid_file);
+ goto out;
+ }
+
+ ret = fflush(pidfp);
+ if (ret) {
+ gf_msg("glusterfsd", GF_LOG_ERROR, errno,
+ LG_MSG_GRAPH_ATTACH_PID_FILE_UPDATED, "pidfile %s write failed",
+ pid_file);
+ goto out;
+ }
+out:
+ return ret;
+}
- return xlator_tree_reconfigure (old_xl, new_xl);
+int
+glusterfs_update_mux_pid(dict_t *dict, gf_volfile_t *volfile_obj)
+{
+ char *file = NULL;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO("graph", dict, out);
+ GF_VALIDATE_OR_GOTO("graph", volfile_obj, out);
+
+ ret = dict_get_str(dict, "pidfile", &file);
+ if (ret < 0) {
+ gf_msg("mgmt", GF_LOG_ERROR, EINVAL, LG_MSG_GRAPH_SETUP_FAILED,
+ "Failed to get pidfile from dict for volfile_id=%s",
+ volfile_obj->vol_id);
+ }
+
+ ret = glusterfs_svc_mux_pidfile_update(volfile_obj, file, getpid());
+ if (ret < 0) {
+ ret = -1;
+ gf_msg("mgmt", GF_LOG_ERROR, EINVAL, LG_MSG_GRAPH_SETUP_FAILED,
+ "Failed to update "
+ "the pidfile for volfile_id=%s",
+ volfile_obj->vol_id);
+
+ goto out;
+ }
+
+ if (ret == 1)
+ gf_msg("mgmt", GF_LOG_INFO, 0, LG_MSG_GRAPH_ATTACH_PID_FILE_UPDATED,
+ "PID %d updated in pidfile=%s", getpid(), file);
+ ret = 0;
+out:
+ return ret;
+}
+int
+glusterfs_process_svc_attach_volfp(glusterfs_ctx_t *ctx, FILE *fp,
+ char *volfile_id, char *checksum,
+ dict_t *dict)
+{
+ glusterfs_graph_t *graph = NULL;
+ glusterfs_graph_t *parent_graph = NULL;
+ glusterfs_graph_t *clean_graph = NULL;
+ int ret = -1;
+ xlator_t *xl = NULL;
+ xlator_t *last_xl = NULL;
+ gf_volfile_t *volfile_obj = NULL;
+ pthread_t thread_id = {
+ 0,
+ };
+
+ if (!ctx)
+ goto out;
+ parent_graph = ctx->active;
+ graph = glusterfs_graph_construct(fp);
+ if (!graph) {
+ gf_msg("glusterfsd", GF_LOG_ERROR, EINVAL, LG_MSG_GRAPH_ATTACH_FAILED,
+ "failed to construct the graph");
+ goto out;
+ }
+ graph->parent_down = 0;
+ graph->last_xl = glusterfs_get_last_xlator(graph);
+
+ for (xl = graph->first; xl; xl = xl->next) {
+ if (strcmp(xl->type, "mount/fuse") == 0) {
+ gf_msg("glusterfsd", GF_LOG_ERROR, EINVAL,
+ LG_MSG_GRAPH_ATTACH_FAILED,
+ "fuse xlator cannot be specified in volume file");
+ goto out;
+ }
+ }
+
+ graph->leaf_count = glusterfs_count_leaves(glusterfs_root(graph));
+ xl = graph->first;
+ /* TODO memory leaks everywhere need to free graph in case of error */
+ if (glusterfs_graph_prepare(graph, ctx, xl->name)) {
+ gf_msg("glusterfsd", GF_LOG_WARNING, EINVAL, LG_MSG_GRAPH_ATTACH_FAILED,
+ "failed to prepare graph for xlator %s", xl->name);
+ ret = -1;
+ goto out;
+ } else if (glusterfs_graph_init(graph)) {
+ gf_msg("glusterfsd", GF_LOG_WARNING, EINVAL, LG_MSG_GRAPH_ATTACH_FAILED,
+ "failed to initialize graph for xlator %s", xl->name);
+ ret = -1;
+ goto out;
+ } else if (glusterfs_graph_parent_up(graph)) {
+ gf_msg("glusterfsd", GF_LOG_WARNING, EINVAL, LG_MSG_GRAPH_ATTACH_FAILED,
+ "failed to link the graphs for xlator %s ", xl->name);
+ ret = -1;
+ goto out;
+ }
+
+ if (!parent_graph) {
+ parent_graph = glusterfs_muxsvc_setup_parent_graph(ctx, "glustershd",
+ "debug/io-stats");
+ if (!parent_graph)
+ goto out;
+ ((xlator_t *)parent_graph->top)->next = xl;
+ clean_graph = parent_graph;
+ } else {
+ last_xl = parent_graph->last_xl;
+ if (last_xl)
+ last_xl->next = xl;
+ xl->prev = last_xl;
+ }
+ parent_graph->last_xl = graph->last_xl;
+
+ ret = glusterfs_xlator_link(parent_graph->top, xl);
+ if (ret) {
+ gf_msg("graph", GF_LOG_ERROR, 0, LG_MSG_EVENT_NOTIFY_FAILED,
+ "parent up notification failed");
+ goto out;
+ }
+ parent_graph->xl_count += graph->xl_count;
+ parent_graph->leaf_count += graph->leaf_count;
+ parent_graph->id++;
+
+ volfile_obj = GF_CALLOC(1, sizeof(gf_volfile_t), gf_common_volfile_t);
+ if (!volfile_obj) {
+ ret = -1;
+ goto out;
+ }
+ volfile_obj->pidfp = NULL;
+ snprintf(volfile_obj->vol_id, sizeof(volfile_obj->vol_id), "%s",
+ volfile_id);
+
+ if (strcmp(ctx->cmd_args.process_name, "glustershd") == 0) {
+ ret = glusterfs_update_mux_pid(dict, volfile_obj);
+ if (ret == -1) {
+ GF_FREE(volfile_obj);
+ goto out;
+ }
+ }
+
+ graph->used = 1;
+ parent_graph->id++;
+ list_add(&graph->list, &ctx->graphs);
+ INIT_LIST_HEAD(&volfile_obj->volfile_list);
+ volfile_obj->graph = graph;
+ memcpy(volfile_obj->volfile_checksum, checksum,
+ sizeof(volfile_obj->volfile_checksum));
+ list_add_tail(&volfile_obj->volfile_list, &ctx->volfile_list);
+ gf_log_dump_graph(fp, graph);
+ graph = NULL;
+
+ ret = 0;
+out:
+ if (ret) {
+ if (graph) {
+ gluster_graph_take_reference(graph->first);
+ ret = gf_thread_create_detached(&thread_id, glusterfs_graph_cleanup,
+ graph, "graph_clean");
+ if (ret) {
+ gf_msg("glusterfs", GF_LOG_ERROR, EINVAL,
+ LG_MSG_GRAPH_CLEANUP_FAILED,
+ "%s failed to create clean "
+ "up thread",
+ volfile_id);
+ ret = 0;
+ }
+ }
+ if (clean_graph)
+ glusterfs_muxsvc_cleanup_parent(ctx, clean_graph);
+ }
+ return ret;
}
int
-glusterfs_graph_destroy (glusterfs_graph_t *graph)
+glusterfs_mux_volfile_reconfigure(FILE *newvolfile_fp, glusterfs_ctx_t *ctx,
+ gf_volfile_t *volfile_obj, char *checksum,
+ dict_t *dict)
{
- return 0;
+ glusterfs_graph_t *oldvolfile_graph = NULL;
+ glusterfs_graph_t *newvolfile_graph = NULL;
+ char vol_id[NAME_MAX + 1];
+
+ int ret = -1;
+
+ if (!ctx) {
+ gf_msg("glusterfsd-mgmt", GF_LOG_ERROR, 0, LG_MSG_CTX_NULL,
+ "ctx is NULL");
+ goto out;
+ }
+
+ /* Change the message id */
+ if (!volfile_obj) {
+ gf_msg("glusterfsd-mgmt", GF_LOG_ERROR, 0, LG_MSG_CTX_NULL,
+ "failed to get volfile object");
+ goto out;
+ }
+
+ oldvolfile_graph = volfile_obj->graph;
+ if (!oldvolfile_graph) {
+ goto out;
+ }
+
+ newvolfile_graph = glusterfs_graph_construct(newvolfile_fp);
+
+ if (!newvolfile_graph) {
+ goto out;
+ }
+ newvolfile_graph->last_xl = glusterfs_get_last_xlator(newvolfile_graph);
+
+ glusterfs_graph_prepare(newvolfile_graph, ctx, newvolfile_graph->first);
+
+ if (!is_graph_topology_equal(oldvolfile_graph, newvolfile_graph)) {
+ ret = snprintf(vol_id, sizeof(vol_id), "%s", volfile_obj->vol_id);
+ if (ret < 0)
+ goto out;
+ ret = glusterfs_process_svc_detach(ctx, volfile_obj);
+ if (ret) {
+ gf_msg("glusterfsd-mgmt", GF_LOG_ERROR, EINVAL,
+ LG_MSG_GRAPH_CLEANUP_FAILED,
+ "Could not detach "
+ "old graph. Aborting the reconfiguration operation");
+ goto out;
+ }
+ volfile_obj = NULL;
+ ret = glusterfs_process_svc_attach_volfp(ctx, newvolfile_fp, vol_id,
+ checksum, dict);
+ goto out;
+ }
+
+ gf_msg_debug("glusterfsd-mgmt", 0,
+ "Only options have changed in the"
+ " new graph");
+
+ ret = glusterfs_graph_reconfigure(oldvolfile_graph, newvolfile_graph);
+ if (ret) {
+ gf_msg_debug("glusterfsd-mgmt", 0,
+ "Could not reconfigure "
+ "new options in old graph");
+ goto out;
+ }
+ memcpy(volfile_obj->volfile_checksum, checksum,
+ sizeof(volfile_obj->volfile_checksum));
+
+ ret = 0;
+out:
+
+ if (newvolfile_graph)
+ glusterfs_graph_destroy(newvolfile_graph);
+
+ return ret;
}
diff --git a/libglusterfs/src/graph.l b/libglusterfs/src/graph.l
index 04fff2582fb..b9d4b2b6828 100644
--- a/libglusterfs/src/graph.l
+++ b/libglusterfs/src/graph.l
@@ -1,60 +1,40 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
%x STRING
%option yylineno
%option noinput
%{
#define YYSTYPE char *
-#include "xlator.h"
+#include "glusterfs/xlator.h"
#include "y.tab.h"
#include <string.h>
-#define START_STRSIZE 32
static char *text;
-static int text_asize;
static int text_size;
void append_string(const char *str, int size)
{
- int new_size = text_size + size + 1;
- if (new_size > text_asize) {
- new_size += START_STRSIZE - 1;
- new_size &= -START_STRSIZE;
- if (!text) {
- text = GF_CALLOC (1, new_size,
- gf_common_mt_char);
- } else {
- text = GF_REALLOC (text, new_size);
- }
- if (!text) {
- gf_log ("parser", GF_LOG_ERROR,
- "out of memory");
- return;
- }
- text_asize = new_size;
- }
- memcpy(text + text_size, str, size);
- text_size += size;
- text[text_size] = 0;
+ int new_size = text_size + size + 1;
+ if (!text) {
+ text = GF_CALLOC (1, new_size, gf_common_mt_char);
+ } else {
+ text = GF_REALLOC (text, new_size);
+ }
+ if (!text) {
+ return;
+ }
+ memcpy(text + text_size, str, size);
+ text_size += size;
+ text[text_size] = 0;
}
%}
@@ -77,13 +57,15 @@ TYPE [t][y][p][e]
\\. { append_string (yytext + 1, yyleng - 1); }
\" {
if (0) {
- yyunput (0, NULL);
+ yyunput (0, NULL);
}
BEGIN (INITIAL);
- yylval = text;
+ graphyylval = text;
+ text = NULL;
+ text_size = 0;
return STRING_TOK;
- }
+ }
}
-[^ \t\r\n\"\\]+ { yylval = gf_strdup (yytext) ; return ID; }
+[^ \t\r\n\"\\]+ { graphyylval = gf_strdup (yytext) ; return ID; }
[ \t\r\n]+ ;
%%
diff --git a/libglusterfs/src/graph.y b/libglusterfs/src/graph.y
index 62c57dd6c5f..e63febdc08b 100644
--- a/libglusterfs/src/graph.y
+++ b/libglusterfs/src/graph.y
@@ -1,23 +1,13 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
%token VOLUME_BEGIN VOLUME_END OPTION NEWLINE SUBVOLUME ID WHITESPACE COMMENT TYPE STRING_TOK
%{
@@ -28,12 +18,15 @@
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/wait.h>
+#include <pthread.h>
#define RELAX_POISONING
-#include "xlator.h"
-#include "graph-utils.h"
-#include "logging.h"
+#include "glusterfs/xlator.h"
+#include "glusterfs/graph-utils.h"
+#include "glusterfs/logging.h"
+#include "glusterfs/syscall.h"
+#include "glusterfs/libglusterfs-messages.h"
static int new_volume (char *name);
static int volume_type (char *type);
@@ -47,8 +40,8 @@ static void option_error (void);
#define YYSTYPE char *
#define GF_CMD_BUFFER_LEN (8 * GF_UNIT_KB)
-int yyerror (const char *);
-int yylex ();
+int graphyyerror (const char *);
+int graphyylex ();
%}
@@ -88,11 +81,11 @@ glusterfs_graph_t *construct;
static void
type_error (void)
{
- extern int yylineno;
+ extern int graphyylineno;
- gf_log ("parser", GF_LOG_ERROR,
+ gf_msg ("parser", GF_LOG_ERROR, 0, LG_MSG_VOLFILE_PARSE_ERROR,
"Volume %s, before line %d: Please specify volume type",
- curr->name, yylineno);
+ curr->name, graphyylineno);
return;
}
@@ -100,11 +93,11 @@ type_error (void)
static void
sub_error (void)
{
- extern int yylineno;
+ extern int graphyylineno;
- gf_log ("parser", GF_LOG_ERROR,
+ gf_msg ("parser", GF_LOG_ERROR, 0, LG_MSG_VOLFILE_PARSE_ERROR,
"Volume %s, before line %d: Please specify subvolumes",
- curr->name, yylineno);
+ curr->name, graphyylineno);
return;
}
@@ -112,12 +105,12 @@ sub_error (void)
static void
option_error (void)
{
- extern int yylineno;
+ extern int graphyylineno;
- gf_log ("parser", GF_LOG_ERROR,
+ gf_msg ("parser", GF_LOG_ERROR, 0, LG_MSG_VOLFILE_PARSE_ERROR,
"Volume %s, before line %d: Please specify "
"option <key> <value>",
- curr->name, yylineno);
+ curr->name, graphyylineno);
return;
}
@@ -125,21 +118,20 @@ option_error (void)
static int
new_volume (char *name)
{
- extern int yylineno;
+ extern int graphyylineno;
xlator_t *trav = NULL;
int ret = 0;
if (!name) {
- gf_log ("parser", GF_LOG_DEBUG,
- "Invalid argument name: '%s'", name);
+ gf_msg_debug ("parser", 0,"Invalid argument name");
ret = -1;
goto out;
}
if (curr) {
- gf_log ("parser", GF_LOG_ERROR,
- "new volume (%s) defintion in line %d unexpected",
- name, yylineno);
+ gf_msg ("parser", GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY,
+ "new volume (%s) definition in line %d unexpected",
+ name, graphyylineno);
ret = -1;
goto out;
}
@@ -148,7 +140,6 @@ new_volume (char *name)
gf_common_mt_xlator_t);
if (!curr) {
- gf_log ("parser", GF_LOG_ERROR, "Out of memory");
ret = -1;
goto out;
}
@@ -157,9 +148,9 @@ new_volume (char *name)
while (trav) {
if (!strcmp (name, trav->name)) {
- gf_log ("parser", GF_LOG_ERROR,
- "Line %d: volume '%s' defined again",
- yylineno, name);
+ gf_msg ("parser", GF_LOG_ERROR, 0,
+ LG_MSG_VOLFILE_PARSE_ERROR, "Line %d: volume "
+ "'%s' defined again", graphyylineno, name);
ret = -1;
goto out;
}
@@ -173,7 +164,8 @@ new_volume (char *name)
goto out;
}
- curr->options = get_new_dict ();
+ INIT_LIST_HEAD(&curr->volume_options);
+ curr->options = dict_new ();
if (!curr->options) {
GF_FREE (curr->name);
@@ -191,8 +183,9 @@ new_volume (char *name)
construct->first = curr;
construct->xl_count++;
+ curr->xl_id = construct->xl_count;
- gf_log ("parser", GF_LOG_TRACE, "New node for '%s'", name);
+ gf_msg_trace ("parser", 0, "New node for '%s'", name);
out:
GF_FREE (name);
@@ -204,26 +197,26 @@ out:
static int
volume_type (char *type)
{
- extern int yylineno;
+ extern int graphyylineno;
int32_t ret = 0;
if (!type) {
- gf_log ("parser", GF_LOG_DEBUG, "Invalid argument type");
+ gf_msg_debug ("parser", 0, "Invalid argument type");
ret = -1;
goto out;
}
ret = xlator_set_type (curr, type);
if (ret) {
- gf_log ("parser", GF_LOG_ERROR,
+ gf_msg ("parser", GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY,
"Volume '%s', line %d: type '%s' is not valid or "
"not found on this machine",
- curr->name, yylineno, type);
+ curr->name, graphyylineno, type);
ret = -1;
goto out;
}
- gf_log ("parser", GF_LOG_TRACE, "Type:%s:%s", curr->name, type);
+ gf_msg_trace ("parser", 0, "Type:%s:%s", curr->name, type);
out:
GF_FREE (type);
@@ -235,30 +228,30 @@ out:
static int
volume_option (char *key, char *value)
{
- extern int yylineno;
+ extern int graphyylineno;
int ret = 0;
char *set_value = NULL;
if (!key || !value){
- gf_log ("parser", GF_LOG_ERROR, "Invalid argument");
+ gf_msg ("parser", GF_LOG_ERROR, 0,
+ LG_MSG_INVALID_VOLFILE_ENTRY, "Invalid argument");
ret = -1;
goto out;
}
set_value = gf_strdup (value);
- ret = dict_set_dynstr (curr->options, key, set_value);
+ ret = dict_set_option (curr->options, key, set_value);
if (ret == 1) {
- gf_log ("parser", GF_LOG_ERROR,
- "Volume '%s', line %d: duplicate entry "
- "('option %s') present",
- curr->name, yylineno, key);
+ gf_msg ("parser", GF_LOG_ERROR, 0,
+ LG_MSG_INVALID_VOLFILE_ENTRY, "Volume '%s', line %d: "
+ "duplicate entry ('option %s') present",
+ curr->name, graphyylineno, key);
ret = -1;
goto out;
}
- gf_log ("parser", GF_LOG_TRACE, "Option:%s:%s:%s",
- curr->name, key, value);
+ gf_msg_trace ("parser", 0, "Option:%s:%s:%s", curr->name, key, value);
out:
GF_FREE (key);
@@ -271,12 +264,13 @@ out:
static int
volume_sub (char *sub)
{
- extern int yylineno;
+ extern int graphyylineno;
xlator_t *trav = NULL;
int ret = 0;
if (!sub) {
- gf_log ("parser", GF_LOG_ERROR, "Invalid subvolumes argument");
+ gf_msg ("parser", GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY,
+ "Invalid subvolumes argument");
ret = -1;
goto out;
}
@@ -290,30 +284,28 @@ volume_sub (char *sub)
}
if (!trav) {
- gf_log ("parser", GF_LOG_ERROR,
+ gf_msg ("parser", GF_LOG_ERROR, 0, LG_MSG_SUB_VOLUME_ERROR,
"Volume '%s', line %d: subvolume '%s' is not defined "
- "prior to usage",
- curr->name, yylineno, sub);
+ "prior to usage",curr->name, graphyylineno, sub);
ret = -1;
goto out;
}
if (trav == curr) {
- gf_log ("parser", GF_LOG_ERROR,
+ gf_msg ("parser", GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY,
"Volume '%s', line %d: has '%s' itself as subvolume",
- curr->name, yylineno, sub);
+ curr->name, graphyylineno, sub);
ret = -1;
goto out;
}
ret = glusterfs_xlator_link (curr, trav);
if (ret) {
- gf_log ("parser", GF_LOG_ERROR, "Out of memory");
ret = -1;
goto out;
}
- gf_log ("parser", GF_LOG_TRACE, "child:%s->%s", curr->name, sub);
+ gf_msg_trace ("parser", 0, "child:%s->%s", curr->name, sub);
out:
GF_FREE (sub);
@@ -326,11 +318,11 @@ static int
volume_end (void)
{
if (!curr->fops) {
- gf_log ("parser", GF_LOG_ERROR,
+ gf_msg ("parser", GF_LOG_ERROR, 0, LG_MSG_VOLUME_ERROR,
"\"type\" not specified for volume %s", curr->name);
return -1;
}
- gf_log ("parser", GF_LOG_TRACE, "end:%s", curr->name);
+ gf_msg_trace ("parser", 0, "end:%s", curr->name);
curr = NULL;
return 0;
@@ -338,55 +330,52 @@ volume_end (void)
int
-yywrap ()
+graphyywrap ()
{
return 1;
}
int
-yyerror (const char *str)
+graphyyerror (const char *str)
{
- extern char *yytext;
- extern int yylineno;
-
- if (curr && curr->name && yytext) {
- if (!strcmp (yytext, "volume")) {
- gf_log ("parser", GF_LOG_ERROR,
- "'end-volume' not defined for volume '%s'",
- curr->name);
- } else if (!strcmp (yytext, "type")) {
- gf_log ("parser", GF_LOG_ERROR,
+ extern char *graphyytext;
+ extern int graphyylineno;
+
+ if (curr && curr->name && graphyytext) {
+ if (!strcmp (graphyytext, "volume")) {
+ gf_msg ("parser", GF_LOG_ERROR, 0,
+ LG_MSG_VOLUME_ERROR, "'end-volume' not"
+ " defined for volume '%s'", curr->name);
+ } else if (!strcmp (graphyytext, "type")) {
+ gf_msg ("parser", GF_LOG_ERROR, 0, LG_MSG_VOLUME_ERROR,
"line %d: duplicate 'type' defined for "
- "volume '%s'",
- yylineno, curr->name);
- } else if (!strcmp (yytext, "subvolumes")) {
- gf_log ("parser", GF_LOG_ERROR,
- "line %d: duplicate 'subvolumes' defined for "
- "volume '%s'",
- yylineno, curr->name);
+ "volume '%s'", graphyylineno, curr->name);
+ } else if (!strcmp (graphyytext, "subvolumes")) {
+ gf_msg ("parser", GF_LOG_ERROR, 0,
+ LG_MSG_SUB_VOLUME_ERROR, "line %d: duplicate "
+ "'subvolumes' defined for volume '%s'",
+ graphyylineno, curr->name);
} else if (curr) {
- gf_log ("parser", GF_LOG_ERROR,
+ gf_msg ("parser", GF_LOG_ERROR, 0, LG_MSG_SYNTAX_ERROR,
"syntax error: line %d (volume '%s'): \"%s\""
"\nallowed tokens are 'volume', 'type', "
"'subvolumes', 'option', 'end-volume'()",
- yylineno, curr->name,
- yytext);
+ graphyylineno, curr->name, graphyytext);
} else {
- gf_log ("parser", GF_LOG_ERROR,
+ gf_msg ("parser", GF_LOG_ERROR, 0, LG_MSG_SYNTAX_ERROR,
"syntax error: line %d (just after volume "
"'%s'): \"%s\"\n(%s)",
- yylineno, curr->name,
- yytext,
+ graphyylineno, curr->name, graphyytext,
"allowed tokens are 'volume', 'type', "
"'subvolumes', 'option', 'end-volume'");
}
} else {
- gf_log ("parser", GF_LOG_ERROR,
- "syntax error in line %d: \"%s\" \n"
+ gf_msg ("parser", GF_LOG_ERROR, 0, LG_MSG_SYNTAX_ERROR,
+ "syntax error in line %d: \"%s\"\n"
"(allowed tokens are 'volume', 'type', "
"'subvolumes', 'option', 'end-volume')\n",
- yylineno, yytext);
+ graphyylineno, graphyytext);
}
return -1;
@@ -404,7 +393,8 @@ execute_cmd (char *cmd, char **result, size_t size)
fpp = popen (cmd, "r");
if (!fpp) {
- gf_log ("parser", GF_LOG_ERROR, "%s: failed to popen", cmd);
+ gf_msg ("parser", GF_LOG_ERROR, 0, LG_MSG_FILE_OP_FAILED,
+ "%s: failed to popen", cmd);
return -1;
}
@@ -454,7 +444,6 @@ preprocess (FILE *srcfp, FILE *dstfp)
cmd = GF_CALLOC (cmd_buf_size, 1,
gf_common_mt_char);
if (cmd == NULL) {
- gf_log ("parser", GF_LOG_ERROR, "Out of memory");
return -1;
}
@@ -462,7 +451,6 @@ preprocess (FILE *srcfp, FILE *dstfp)
gf_common_mt_char);
if (result == NULL) {
GF_FREE (cmd);
- gf_log ("parser", GF_LOG_ERROR, "Out of memory");
return -1;
}
@@ -491,6 +479,7 @@ preprocess (FILE *srcfp, FILE *dstfp)
cmd_buf_size *= 2;
cmd = GF_REALLOC (cmd, cmd_buf_size);
if (cmd == NULL) {
+ GF_FREE (result);
return -1;
}
@@ -523,15 +512,16 @@ preprocess (FILE *srcfp, FILE *dstfp)
}
if (in_backtick) {
- gf_log ("parser", GF_LOG_ERROR,
- "Unterminated backtick in volume specfication file at line (%d), column (%d).",
- line, column);
+ gf_msg ("parser", GF_LOG_ERROR, 0, LG_MSG_VOLUME_ERROR,
+ "Unterminated backtick in volume specification file at "
+ "line (%d), column (%d).", line, column);
ret = -1;
}
out:
fseek (srcfp, 0L, SEEK_SET);
fseek (dstfp, 0L, SEEK_SET);
+
GF_FREE (cmd);
GF_FREE (result);
@@ -539,7 +529,7 @@ out:
}
-extern FILE *yyin;
+extern FILE *graphyyin;
glusterfs_graph_t *
glusterfs_graph_new ()
@@ -553,6 +543,9 @@ glusterfs_graph_new ()
INIT_LIST_HEAD (&graph->list);
+ pthread_mutex_init(&graph->mutex, NULL);
+ pthread_cond_init(&graph->child_down_cond, NULL);
+
gettimeofday (&graph->dob, NULL);
return graph;
@@ -563,52 +556,66 @@ glusterfs_graph_t *
glusterfs_graph_construct (FILE *fp)
{
int ret = 0;
+ int tmp_fd = -1;
glusterfs_graph_t *graph = NULL;
- FILE *tmp_file = NULL;
+ FILE *tmp_file = NULL;
+ char template[] = "/tmp/tmp.XXXXXX";
+ static pthread_mutex_t graph_mutex = PTHREAD_MUTEX_INITIALIZER;
graph = glusterfs_graph_new ();
if (!graph)
- return NULL;
+ goto err;
- tmp_file = tmpfile ();
+ /* coverity[secure_temp] mkstemp uses 0600 as the mode and is safe */
+ tmp_fd = mkstemp (template);
+ if (-1 == tmp_fd)
+ goto err;
- if (tmp_file == NULL) {
- gf_log ("parser", GF_LOG_ERROR,
- "cannot create temparory file");
+ ret = sys_unlink (template);
+ if (ret < 0) {
+ gf_msg ("parser", GF_LOG_WARNING, 0, LG_MSG_FILE_OP_FAILED,
+ "Unable to delete file: %s", template);
+ }
- glusterfs_graph_destroy (graph);
- return NULL;
- }
+ tmp_file = fdopen (tmp_fd, "w+b");
+ if (!tmp_file)
+ goto err;
- ret = preprocess (fp, tmp_file);
- if (ret < 0) {
- gf_log ("parser", GF_LOG_ERROR,
- "parsing of backticks failed");
+ ret = preprocess (fp, tmp_file);
+ if (ret < 0) {
+ gf_msg ("parser", GF_LOG_ERROR, 0, LG_MSG_BACKTICK_PARSE_FAILED,
+ "parsing of backticks failed");
+ goto err;
+ }
- glusterfs_graph_destroy (graph);
- fclose (tmp_file);
- return NULL;
+ pthread_mutex_lock (&graph_mutex);
+ {
+ graphyyin = tmp_file;
+ construct = graph;
+ ret = yyparse ();
+ construct = NULL;
}
-
- yyin = tmp_file;
-
- construct = graph;
-
- ret = yyparse ();
-
- construct = NULL;
-
- fclose (tmp_file);
+ pthread_mutex_unlock (&graph_mutex);
if (ret == 1) {
- gf_log ("parser", GF_LOG_DEBUG,
- "parsing of volfile failed, please review it "
- "once more");
-
- glusterfs_graph_destroy (graph);
- return NULL;
+ gf_msg_debug ("parser", 0, "parsing of volfile failed, please "
+ "review it once more");
+ goto err;
}
+ fclose (tmp_file);
return graph;
+err:
+ if (tmp_file) {
+ fclose (tmp_file);
+ } else {
+ gf_msg ("parser", GF_LOG_ERROR, 0, LG_MSG_FILE_OP_FAILED,
+ "cannot create temporary file");
+ if (-1 != tmp_fd)
+ sys_close (tmp_fd);
+ }
+
+ glusterfs_graph_destroy (graph);
+ return NULL;
}
diff --git a/libglusterfs/src/hashfn.c b/libglusterfs/src/hashfn.c
index f79165b221e..d2237e99f83 100644
--- a/libglusterfs/src/hashfn.c
+++ b/libglusterfs/src/hashfn.c
@@ -11,29 +11,11 @@
#include <stdint.h>
#include <stdlib.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "hashfn.h"
-
-#define get16bits(d) (*((const uint16_t *) (d)))
+#define get16bits(d) (*((const uint16_t *)(d)))
#define DM_DELTA 0x9E3779B9
-#define DM_FULLROUNDS 10 /* 32 is overkill, 16 is strong crypto */
-#define DM_PARTROUNDS 6 /* 6 gets complete mixing */
-
-
-uint32_t
-ReallySimpleHash (char *path, int len)
-{
- uint32_t hash = 0;
- for (;len > 0; len--)
- hash ^= (char)path[len];
-
- return hash;
-}
+#define DM_FULLROUNDS 10 /* 32 is overkill, 16 is strong crypto */
+#define DM_PARTROUNDS 6 /* 6 gets complete mixing */
/*
This is apparently the "fastest hash function for strings".
@@ -42,146 +24,145 @@ ReallySimpleHash (char *path, int len)
/* In any case make sure, you return 1 */
-uint32_t SuperFastHash (const char * data, int32_t len) {
- uint32_t hash = len, tmp;
- int32_t rem;
-
- if (len <= 1 || data == NULL) return 1;
-
- rem = len & 3;
- len >>= 2;
-
- /* Main loop */
- for (;len > 0; len--) {
- hash += get16bits (data);
- tmp = (get16bits (data+2) << 11) ^ hash;
- hash = (hash << 16) ^ tmp;
- data += 2*sizeof (uint16_t);
- hash += hash >> 11;
- }
-
- /* Handle end cases */
- switch (rem) {
- case 3: hash += get16bits (data);
- hash ^= hash << 16;
- hash ^= data[sizeof (uint16_t)] << 18;
- hash += hash >> 11;
- break;
- case 2: hash += get16bits (data);
- hash ^= hash << 11;
- hash += hash >> 17;
- break;
- case 1: hash += *data;
- hash ^= hash << 10;
- hash += hash >> 1;
- }
-
- /* Force "avalanching" of final 127 bits */
- hash ^= hash << 3;
- hash += hash >> 5;
- hash ^= hash << 4;
- hash += hash >> 17;
- hash ^= hash << 25;
- hash += hash >> 6;
-
- return hash;
+uint32_t
+SuperFastHash(const char *data, int32_t len)
+{
+ uint32_t hash = len, tmp;
+ int32_t rem;
+
+ if (len <= 1 || data == NULL)
+ return 1;
+
+ rem = len & 3;
+ len >>= 2;
+
+ /* Main loop */
+ for (; len > 0; len--) {
+ hash += get16bits(data);
+ tmp = (get16bits(data + 2) << 11) ^ hash;
+ hash = (hash << 16) ^ tmp;
+ data += 2 * sizeof(uint16_t);
+ hash += hash >> 11;
+ }
+
+ /* Handle end cases */
+ switch (rem) {
+ case 3:
+ hash += get16bits(data);
+ hash ^= hash << 16;
+ hash ^= data[sizeof(uint16_t)] << 18;
+ hash += hash >> 11;
+ break;
+ case 2:
+ hash += get16bits(data);
+ hash ^= hash << 11;
+ hash += hash >> 17;
+ break;
+ case 1:
+ hash += *data;
+ hash ^= hash << 10;
+ hash += hash >> 1;
+ }
+
+ /* Force "avalanching" of final 127 bits */
+ hash ^= hash << 3;
+ hash += hash >> 5;
+ hash ^= hash << 4;
+ hash += hash >> 17;
+ hash ^= hash << 25;
+ hash += hash >> 6;
+
+ return hash;
}
-
/* Davies-Meyer hashing function implementation
*/
static int
-dm_round (int rounds, uint32_t *array, uint32_t *h0, uint32_t *h1)
+dm_round(int rounds, uint32_t *array, uint32_t *h0, uint32_t *h1)
{
- uint32_t sum = 0;
- int n = 0;
- uint32_t b0 = 0;
- uint32_t b1 = 0;
-
- b0 = *h0;
- b1 = *h1;
-
- n = rounds;
-
- do {
- sum += DM_DELTA;
- b0 += ((b1 << 4) + array[0])
- ^ (b1 + sum)
- ^ ((b1 >> 5) + array[1]);
- b1 += ((b0 << 4) + array[2])
- ^ (b0 + sum)
- ^ ((b0 >> 5) + array[3]);
- } while (--n);
-
- *h0 += b0;
- *h1 += b1;
-
- return 0;
-}
+ uint32_t sum = 0;
+ int n = 0;
+ uint32_t b0 = 0;
+ uint32_t b1 = 0;
+
+ b0 = *h0;
+ b1 = *h1;
+ n = rounds;
+
+ do {
+ sum += DM_DELTA;
+ b0 += ((b1 << 4) + array[0]) ^ (b1 + sum) ^ ((b1 >> 5) + array[1]);
+ b1 += ((b0 << 4) + array[2]) ^ (b0 + sum) ^ ((b0 >> 5) + array[3]);
+ } while (--n);
+
+ *h0 += b0;
+ *h1 += b1;
+
+ return 0;
+}
uint32_t
-__pad (int len)
+__pad(int len)
{
- uint32_t pad = 0;
+ uint32_t pad = 0;
- pad = (uint32_t) len | ((uint32_t) len << 8);
- pad |= pad << 16;
+ pad = (uint32_t)len | ((uint32_t)len << 8);
+ pad |= pad << 16;
- return pad;
+ return pad;
}
uint32_t
-gf_dm_hashfn (const char *msg, int len)
+gf_dm_hashfn(const char *msg, int len)
{
- uint32_t h0 = 0x9464a485;
- uint32_t h1 = 0x542e1a94;
- uint32_t array[4];
- uint32_t pad = 0;
- int i = 0;
- int j = 0;
- int full_quads = 0;
- int full_words = 0;
- int full_bytes = 0;
- uint32_t *intmsg = NULL;
- int word = 0;
-
-
- intmsg = (uint32_t *) msg;
- pad = __pad (len);
-
- full_bytes = len;
- full_words = len / 4;
- full_quads = len / 16;
-
- for (i = 0; i < full_quads; i++) {
- for (j = 0; j < 4; j++) {
- word = *intmsg;
- array[j] = word;
- intmsg++;
- full_words--;
- full_bytes -= 4;
- }
- dm_round (DM_PARTROUNDS, &array[0], &h0, &h1);
- }
-
+ uint32_t h0 = 0x9464a485;
+ uint32_t h1 = 0x542e1a94;
+ uint32_t array[4];
+ uint32_t pad = 0;
+ int i = 0;
+ int j = 0;
+ int full_quads = 0;
+ int full_words = 0;
+ int full_bytes = 0;
+ uint32_t *intmsg = NULL;
+ int word = 0;
+
+ intmsg = (uint32_t *)msg;
+ pad = __pad(len);
+
+ full_bytes = len;
+ full_words = len / 4;
+ full_quads = len / 16;
+
+ for (i = 0; i < full_quads; i++) {
for (j = 0; j < 4; j++) {
- if (full_words) {
- word = *intmsg;
- array[j] = word;
- intmsg++;
- full_words--;
- full_bytes -= 4;
- } else {
- array[j] = pad;
- while (full_bytes) {
- array[j] <<= 8;
- array[j] |= msg[len - full_bytes];
- full_bytes--;
- }
- }
+ word = *intmsg;
+ array[j] = word;
+ intmsg++;
+ full_words--;
+ full_bytes -= 4;
+ }
+ dm_round(DM_PARTROUNDS, &array[0], &h0, &h1);
+ }
+
+ for (j = 0; j < 4; j++) {
+ if (full_words) {
+ word = *intmsg;
+ array[j] = word;
+ intmsg++;
+ full_words--;
+ full_bytes -= 4;
+ } else {
+ array[j] = pad;
+ while (full_bytes) {
+ array[j] <<= 8;
+ array[j] |= msg[len - full_bytes];
+ full_bytes--;
+ }
}
- dm_round (DM_FULLROUNDS, &array[0], &h0, &h1);
+ }
+ dm_round(DM_FULLROUNDS, &array[0], &h0, &h1);
- return h0 ^ h1;
+ return h0 ^ h1;
}
diff --git a/libglusterfs/src/iatt.h b/libglusterfs/src/iatt.h
deleted file mode 100644
index 60ae5904703..00000000000
--- a/libglusterfs/src/iatt.h
+++ /dev/null
@@ -1,331 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-
-#ifndef _IATT_H
-#define _IATT_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <sys/types.h>
-#include <sys/stat.h> /* for iatt <--> stat conversions */
-#include <unistd.h>
-
-#include "compat.h"
-#include "uuid.h"
-
-typedef enum {
- IA_INVAL = 0,
- IA_IFREG,
- IA_IFDIR,
- IA_IFLNK,
- IA_IFBLK,
- IA_IFCHR,
- IA_IFIFO,
- IA_IFSOCK
-} ia_type_t;
-
-
-typedef struct {
- uint8_t suid:1;
- uint8_t sgid:1;
- uint8_t sticky:1;
- struct {
- uint8_t read:1;
- uint8_t write:1;
- uint8_t exec:1;
- } owner, group, other;
-} ia_prot_t;
-
-
-struct iatt {
- uint64_t ia_ino; /* inode number */
- uuid_t ia_gfid;
- uint64_t ia_dev; /* backing device ID */
- ia_type_t ia_type; /* type of file */
- ia_prot_t ia_prot; /* protection */
- uint32_t ia_nlink; /* Link count */
- uint32_t ia_uid; /* user ID of owner */
- uint32_t ia_gid; /* group ID of owner */
- uint64_t ia_rdev; /* device ID (if special file) */
- uint64_t ia_size; /* file size in bytes */
- uint32_t ia_blksize; /* blocksize for filesystem I/O */
- uint64_t ia_blocks; /* number of 512B blocks allocated */
- uint32_t ia_atime; /* last access time */
- uint32_t ia_atime_nsec;
- uint32_t ia_mtime; /* last modification time */
- uint32_t ia_mtime_nsec;
- uint32_t ia_ctime; /* last status change time */
- uint32_t ia_ctime_nsec;
-};
-
-
-#define IA_ISREG(t) (t == IA_IFREG)
-#define IA_ISDIR(t) (t == IA_IFDIR)
-#define IA_ISLNK(t) (t == IA_IFLNK)
-#define IA_ISBLK(t) (t == IA_IFBLK)
-#define IA_ISCHR(t) (t == IA_IFCHR)
-#define IA_ISFIFO(t) (t == IA_IFIFO)
-#define IA_ISSOCK(t) (t == IA_IFSOCK)
-
-#define IA_PROT_RUSR(prot) ((prot).owner.read == 1)
-#define IA_PROT_WUSR(prot) ((prot).owner.write == 1)
-#define IA_PROT_XUSR(prot) ((prot).owner.exec == 1)
-
-#define IA_PROT_RGRP(prot) ((prot).group.read == 1)
-#define IA_PROT_WGRP(prot) ((prot).group.write == 1)
-#define IA_PROT_XGRP(prot) ((prot).group.exec == 1)
-
-#define IA_PROT_ROTH(prot) ((prot).other.read == 1)
-#define IA_PROT_WOTH(prot) ((prot).other.write == 1)
-#define IA_PROT_XOTH(prot) ((prot).other.exec == 1)
-
-#define IA_PROT_SUID(prot) ((prot).suid == 1)
-#define IA_PROT_SGID(prot) ((prot).sgid == 1)
-#define IA_PROT_STCKY(prot) ((prot).sticky == 1)
-
-#define IA_FILE_OR_DIR(t) (IA_ISREG(t) || IA_ISDIR(t))
-
-static inline uint32_t
-ia_major (uint64_t ia_dev)
-{
- return (uint32_t) (ia_dev >> 32);
-}
-
-
-static inline uint32_t
-ia_minor (uint64_t ia_dev)
-{
- return (uint32_t) (ia_dev & 0xffffffff);
-}
-
-
-static inline uint64_t
-ia_makedev (uint32_t ia_maj, uint32_t ia_min)
-{
- return ((((uint64_t) ia_maj) << 32) | ia_min);
-}
-
-
-static inline ia_prot_t
-ia_prot_from_st_mode (mode_t mode)
-{
- ia_prot_t ia_prot = {0, };
-
- if (mode & S_ISUID)
- ia_prot.suid = 1;
- if (mode & S_ISGID)
- ia_prot.sgid = 1;
- if (mode & S_ISVTX)
- ia_prot.sticky = 1;
-
- if (mode & S_IRUSR)
- ia_prot.owner.read = 1;
- if (mode & S_IWUSR)
- ia_prot.owner.write = 1;
- if (mode & S_IXUSR)
- ia_prot.owner.exec = 1;
-
- if (mode & S_IRGRP)
- ia_prot.group.read = 1;
- if (mode & S_IWGRP)
- ia_prot.group.write = 1;
- if (mode & S_IXGRP)
- ia_prot.group.exec = 1;
-
- if (mode & S_IROTH)
- ia_prot.other.read = 1;
- if (mode & S_IWOTH)
- ia_prot.other.write = 1;
- if (mode & S_IXOTH)
- ia_prot.other.exec = 1;
-
- return ia_prot;
-}
-
-
-static inline ia_type_t
-ia_type_from_st_mode (mode_t mode)
-{
- ia_type_t type = IA_INVAL;
-
- if (S_ISREG (mode))
- type = IA_IFREG;
- if (S_ISDIR (mode))
- type = IA_IFDIR;
- if (S_ISLNK (mode))
- type = IA_IFLNK;
- if (S_ISBLK (mode))
- type = IA_IFBLK;
- if (S_ISCHR (mode))
- type = IA_IFCHR;
- if (S_ISFIFO (mode))
- type = IA_IFIFO;
- if (S_ISSOCK (mode))
- type = IA_IFSOCK;
-
- return type;
-}
-
-
-static inline mode_t
-st_mode_from_ia (ia_prot_t prot, ia_type_t type)
-{
- mode_t st_mode = 0;
- uint32_t type_bit = 0;
- uint32_t prot_bit = 0;
-
- switch (type) {
- case IA_IFREG:
- type_bit = S_IFREG;
- break;
- case IA_IFDIR:
- type_bit = S_IFDIR;
- break;
- case IA_IFLNK:
- type_bit = S_IFLNK;
- break;
- case IA_IFBLK:
- type_bit = S_IFBLK;
- break;
- case IA_IFCHR:
- type_bit = S_IFCHR;
- break;
- case IA_IFIFO:
- type_bit = S_IFIFO;
- break;
- case IA_IFSOCK:
- type_bit = S_IFSOCK;
- break;
- case IA_INVAL:
- break;
- }
-
- if (prot.suid)
- prot_bit |= S_ISUID;
- if (prot.sgid)
- prot_bit |= S_ISGID;
- if (prot.sticky)
- prot_bit |= S_ISVTX;
-
- if (prot.owner.read)
- prot_bit |= S_IRUSR;
- if (prot.owner.write)
- prot_bit |= S_IWUSR;
- if (prot.owner.exec)
- prot_bit |= S_IXUSR;
-
- if (prot.group.read)
- prot_bit |= S_IRGRP;
- if (prot.group.write)
- prot_bit |= S_IWGRP;
- if (prot.group.exec)
- prot_bit |= S_IXGRP;
-
- if (prot.other.read)
- prot_bit |= S_IROTH;
- if (prot.other.write)
- prot_bit |= S_IWOTH;
- if (prot.other.exec)
- prot_bit |= S_IXOTH;
-
- st_mode = (type_bit | prot_bit);
-
- return st_mode;
-}
-
-
-static inline int
-iatt_from_stat (struct iatt *iatt, struct stat *stat)
-{
- iatt->ia_dev = stat->st_dev;
- iatt->ia_ino = stat->st_ino;
-
- iatt->ia_type = ia_type_from_st_mode (stat->st_mode);
- iatt->ia_prot = ia_prot_from_st_mode (stat->st_mode);
-
- iatt->ia_nlink = stat->st_nlink;
- iatt->ia_uid = stat->st_uid;
- iatt->ia_gid = stat->st_gid;
-
- iatt->ia_rdev = ia_makedev (major (stat->st_rdev),
- minor (stat->st_rdev));
-
- iatt->ia_size = stat->st_size;
- iatt->ia_blksize = stat->st_blksize;
- iatt->ia_blocks = stat->st_blocks;
-
- /* There is a possibility that the backend FS (like XFS) can
- allocate blocks beyond EOF for better performance reasons, which
- results in 'st_blocks' with higher values than what is consumed by
- the file descriptor. This would break few logic inside GlusterFS,
- like quota behavior etc, thus we need the exact number of blocks
- which are consumed by the file to the higher layers inside GlusterFS.
- Currently, this logic won't work for sparse files (ie, file with
- holes)
- */
- {
- uint64_t maxblocks;
-
- maxblocks = (iatt->ia_size + 511) / 512;
-
- if (iatt->ia_blocks > maxblocks)
- iatt->ia_blocks = maxblocks;
- }
-
- iatt->ia_atime = stat->st_atime;
- iatt->ia_atime_nsec = ST_ATIM_NSEC (stat);
-
- iatt->ia_mtime = stat->st_mtime;
- iatt->ia_mtime_nsec = ST_MTIM_NSEC (stat);
-
- iatt->ia_ctime = stat->st_ctime;
- iatt->ia_ctime_nsec = ST_CTIM_NSEC (stat);
-
- return 0;
-}
-
-
-static inline int
-iatt_to_stat (struct iatt *iatt, struct stat *stat)
-{
- stat->st_dev = iatt->ia_dev;
- stat->st_ino = iatt->ia_ino;
-
- stat->st_mode = st_mode_from_ia (iatt->ia_prot, iatt->ia_type);
-
- stat->st_nlink = iatt->ia_nlink;
- stat->st_uid = iatt->ia_uid;
- stat->st_gid = iatt->ia_gid;
-
- stat->st_rdev = makedev (ia_major (iatt->ia_rdev),
- ia_minor (iatt->ia_rdev));
-
- stat->st_size = iatt->ia_size;
- stat->st_blksize = iatt->ia_blksize;
- stat->st_blocks = iatt->ia_blocks;
-
- stat->st_atime = iatt->ia_atime;
- ST_ATIM_NSEC_SET (stat, iatt->ia_atime_nsec);
-
- stat->st_mtime = iatt->ia_mtime;
- ST_MTIM_NSEC_SET (stat, iatt->ia_mtime_nsec);
-
- stat->st_ctime = iatt->ia_ctime;
- ST_CTIM_NSEC_SET (stat, iatt->ia_ctime_nsec);
-
- return 0;
-}
-
-
-#endif /* _IATT_H */
diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c
index e32eddb5d1c..dbadf77442d 100644
--- a/libglusterfs/src/inode.c
+++ b/libglusterfs/src/inode.c
@@ -8,1754 +8,2670 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "inode.h"
-#include "fd.h"
-#include "common-utils.h"
-#include "statedump.h"
+#include "glusterfs/inode.h"
+#include "glusterfs/common-utils.h"
+#include "glusterfs/statedump.h"
#include <pthread.h>
#include <sys/types.h>
#include <stdint.h>
-#include "list.h"
-#include <time.h>
+#include "glusterfs/list.h"
#include <assert.h>
+#include "glusterfs/libglusterfs-messages.h"
/* TODO:
move latest accessed dentry to list_head of inode
*/
-#define INODE_DUMP_LIST(head, key_buf, key_prefix, list_type) \
- { \
- int i = 1; \
- inode_t *inode = NULL; \
- list_for_each_entry (inode, head, list) { \
- gf_proc_dump_build_key(key_buf, key_prefix, \
- "%s.%d",list_type, i++); \
- gf_proc_dump_add_section(key_buf); \
- inode_dump(inode, key); \
- } \
- }
+// clang-format off
+/*
+
+Details as per Xavi:
+
+ I think we should have 3 lists: active, lru and invalidate.
+
+We'll need 3 things: refs, nlookups and invalidate_sent flag. Any change of
+refs, invalidate_sent flag and moving from one list to another must be done
+atomically.
+
+With this information, these are the states that cause a transition:
+
+ refs nlookups inv_sent op
+ 1 0 0 unref -> refs = 0, active--->destroy
+ 1 1 0 unref -> refs = 0, active--->lru
+ 1 1 0 forget -> nlookups = 0, active--->active
+ *0 1 0 forget -> nlookups = 0, lru--->destroy
+ *0 1 1 forget -> nlookups = 0, invalidate--->destroy
+ 0 1 0 ref -> refs = 1, lru--->active
+ 0 1 1 ref -> refs = 1, inv_sent = 0, invalidate--->active
+ 0 1 0 overflow -> refs = 1, inv_sent = 1, lru--->invalidate
+ 1 1 1 unref -> refs = 0, invalidate--->invalidate
+ 1 1 1 forget -> nlookups = 0, inv_sent = 0, invalidate--->active
+
+(*) technically these combinations cannot happen because a forget sent by the
+kernel first calls ref() and then unref(). However it's equivalent.
+
+overflow means that lru list has grown beyond the limit and the inode needs to
+be invalidated. All other combinations do not cause a change in state or are not
+possible.
+
+Based on this, the code could be similar to this:
+
+ ref(inode, inv)
+ {
+ if (refs == 0) {
+ if (inv_sent) {
+ invalidate_count--;
+ inv_sent = 0;
+ } else {
+ lru_count--;
+ }
+ if (inv) {
+ inv_sent = 1;
+ invalidate_count++;
+ list_move(inode, invalidate);
+ } else {
+ active_count++;
+ list_move(inode, active);
+ }
+ }
+ refs++;
+ }
+
+ unref(inode, clear)
+ {
+ if (clear && inv_sent) {
+ // there is a case of fuse itself sending forget, without
+ // invalidate, after entry delete, like unlink(), rmdir().
+ inv_sent = 0;
+ invalidate_count--;
+ active_count++;
+ list_move(inode, active);
+ }
+ refs--;
+ if ((refs == 0) && !inv_sent) {
+ active_count--;
+ if (nlookups == 0) {
+ destroy(inode);
+ } else {
+ lru_count++;
+ list_move(inode, lru);
+ }
+ }
+ }
+
+ forget(inode)
+ {
+ ref(inode, false);
+ nlookups--;
+ unref(inode, true);
+ }
+
+ overflow(inode)
+ {
+ ref(inode, true);
+ invalidator(inode);
+ unref(inode, false);
+ }
+
+*/
+// clang-format on
+
+#define INODE_DUMP_LIST(head, key_buf, key_prefix, list_type) \
+ { \
+ int i = 1; \
+ inode_t *inode = NULL; \
+ list_for_each_entry(inode, head, list) \
+ { \
+ gf_proc_dump_build_key(key_buf, key_prefix, "%s.%d", list_type, \
+ i++); \
+ gf_proc_dump_add_section("%s", key_buf); \
+ inode_dump(inode, key); \
+ } \
+ }
static inode_t *
-__inode_unref (inode_t *inode);
+__inode_unref(inode_t *inode, bool clear);
static int
-inode_table_prune (inode_table_t *table);
+inode_table_prune(inode_table_t *table);
void
-fd_dump (struct list_head *head, char *prefix);
+fd_dump(struct list_head *head, char *prefix);
static int
-hash_dentry (inode_t *parent, const char *name, int mod)
+hash_dentry(inode_t *parent, const char *name, int mod)
{
- int hash = 0;
- int ret = 0;
+ int hash = 0;
+ int ret = 0;
- hash = *name;
- if (hash) {
- for (name += 1; *name != '\0'; name++) {
- hash = (hash << 5) - hash + *name;
- }
+ hash = *name;
+ if (hash) {
+ for (name += 1; *name != '\0'; name++) {
+ hash = (hash << 5) - hash + *name;
}
- ret = (hash + (unsigned long)parent) % mod;
+ }
+ ret = (hash + (unsigned long)parent) % mod;
- return ret;
+ return ret;
}
-
static int
-hash_gfid (uuid_t uuid, int mod)
+hash_gfid(uuid_t uuid, int mod)
{
- int ret = 0;
-
- ret = uuid[15] + (uuid[14] << 8);
-
- return ret;
+ return ((uuid[15] + (uuid[14] << 8)) % mod);
}
-
static void
-__dentry_hash (dentry_t *dentry)
+__dentry_hash(dentry_t *dentry, const int hash)
{
- inode_table_t *table = NULL;
- int hash = 0;
+ inode_table_t *table = NULL;
- if (!dentry) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "dentry not found");
- return;
- }
-
- table = dentry->inode->table;
- hash = hash_dentry (dentry->parent, dentry->name,
- table->hashsize);
+ table = dentry->inode->table;
- list_del_init (&dentry->hash);
- list_add (&dentry->hash, &table->name_hash[hash]);
+ list_del_init(&dentry->hash);
+ list_add(&dentry->hash, &table->name_hash[hash]);
}
-
static int
-__is_dentry_hashed (dentry_t *dentry)
+__is_dentry_hashed(dentry_t *dentry)
{
- if (!dentry) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "dentry not found");
- return 0;
- }
-
- return !list_empty (&dentry->hash);
+ return !list_empty(&dentry->hash);
}
-
static void
-__dentry_unhash (dentry_t *dentry)
+__dentry_unhash(dentry_t *dentry)
{
- if (!dentry) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "dentry not found");
- return;
- }
-
- list_del_init (&dentry->hash);
+ list_del_init(&dentry->hash);
}
-
static void
-__dentry_unset (dentry_t *dentry)
+dentry_destroy(dentry_t *dentry)
{
- if (!dentry) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "dentry not found");
- return;
- }
-
- __dentry_unhash (dentry);
+ if (!dentry)
+ return;
- list_del_init (&dentry->inode_list);
+ GF_FREE(dentry->name);
+ dentry->name = NULL;
+ mem_put(dentry);
- if (dentry->name)
- GF_FREE (dentry->name);
+ return;
+}
- if (dentry->parent) {
- __inode_unref (dentry->parent);
- dentry->parent = NULL;
- }
+static dentry_t *
+__dentry_unset(dentry_t *dentry)
+{
+ if (!dentry)
+ return NULL;
- mem_put (dentry);
-}
+ __dentry_unhash(dentry);
+ list_del_init(&dentry->inode_list);
-static int
-__foreach_ancestor_dentry (dentry_t *dentry,
- int (per_dentry_fn) (dentry_t *dentry,
- void *data),
- void *data)
-{
- inode_t *parent = NULL;
- dentry_t *each = NULL;
- int ret = 0;
-
- if (!dentry) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "dentry not found");
- return 0;
- }
+ if (dentry->parent) {
+ __inode_unref(dentry->parent, false);
+ dentry->parent = NULL;
+ }
- ret = per_dentry_fn (dentry, data);
- if (ret) {
- gf_log (THIS->name, GF_LOG_WARNING, "per dentry fn returned %d", ret);
- goto out;
- }
+ return dentry;
+}
- parent = dentry->parent;
- if (!parent) {
- gf_log (THIS->name, GF_LOG_WARNING, "parent not found");
- goto out;
- }
+static int
+__foreach_ancestor_dentry(dentry_t *dentry,
+ int(per_dentry_fn)(dentry_t *dentry, void *data),
+ void *data)
+{
+ inode_t *parent = NULL;
+ dentry_t *each = NULL;
+ int ret = 0;
- list_for_each_entry (each, &parent->dentry_list, inode_list) {
- ret = __foreach_ancestor_dentry (each, per_dentry_fn, data);
- if (ret)
- goto out;
- }
+ if (!dentry) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_DENTRY_NOT_FOUND,
+ "dentry not found");
+ return 0;
+ }
+
+ ret = per_dentry_fn(dentry, data);
+ if (ret) {
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_PER_DENTRY_FAILED,
+ "ret=%d", ret, NULL);
+ goto out;
+ }
+
+ parent = dentry->parent;
+ if (!parent) {
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_PARENT_DENTRY_NOT_FOUND,
+ NULL);
+ goto out;
+ }
+
+ list_for_each_entry(each, &parent->dentry_list, inode_list)
+ {
+ ret = __foreach_ancestor_dentry(each, per_dentry_fn, data);
+ if (ret)
+ goto out;
+ }
out:
- return ret;
+ return ret;
}
-
static int
-__check_cycle (dentry_t *a_dentry, void *data)
+__check_cycle(dentry_t *a_dentry, void *data)
{
- inode_t *link_inode = NULL;
+ inode_t *link_inode = NULL;
- link_inode = data;
+ link_inode = data;
- if (a_dentry->parent == link_inode)
- return 1;
+ if (a_dentry->parent == link_inode)
+ return 1;
- return 0;
+ return 0;
}
-
static int
-__is_dentry_cyclic (dentry_t *dentry)
+__is_dentry_cyclic(dentry_t *dentry)
{
- int ret = 0;
- inode_t *inode = NULL;
- char *name = "<nul>";
-
- ret = __foreach_ancestor_dentry (dentry, __check_cycle,
- dentry->inode);
- if (ret) {
- inode = dentry->inode;
+ int ret = 0;
- if (dentry->name)
- name = dentry->name;
+ ret = __foreach_ancestor_dentry(dentry, __check_cycle, dentry->inode);
+ if (ret) {
+ gf_smsg(dentry->inode->table->name, GF_LOG_CRITICAL, 0,
+ LG_MSG_DENTRY_CYCLIC_LOOP, "gfid=%s name=-%s",
+ uuid_utoa(dentry->inode->gfid), dentry->name, NULL);
+ }
- gf_log (dentry->inode->table->name, GF_LOG_CRITICAL,
- "detected cyclic loop formation during inode linkage."
- " inode (%s) linking under itself as %s",
- uuid_utoa (inode->gfid), name);
- }
-
- return ret;
+ return ret;
}
-
static void
-__inode_unhash (inode_t *inode)
+__inode_unhash(inode_t *inode)
{
- if (!inode) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "inode not found");
- return;
- }
-
- list_del_init (&inode->hash);
+ list_del_init(&inode->hash);
}
-
static int
-__is_inode_hashed (inode_t *inode)
+__is_inode_hashed(inode_t *inode)
{
- if (!inode) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "inode not found");
- return 0;
- }
-
- return !list_empty (&inode->hash);
+ return !list_empty(&inode->hash);
}
-
static void
-__inode_hash (inode_t *inode)
+__inode_hash(inode_t *inode, const int hash)
{
- inode_table_t *table = NULL;
- int hash = 0;
-
- if (!inode) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "inode not found");
- return;
- }
+ inode_table_t *table = inode->table;
- table = inode->table;
- hash = hash_gfid (inode->gfid, 65536);
-
- list_del_init (&inode->hash);
- list_add (&inode->hash, &table->inode_hash[hash]);
+ list_del_init(&inode->hash);
+ list_add(&inode->hash, &table->inode_hash[hash]);
}
-
static dentry_t *
-__dentry_search_for_inode (inode_t *inode, uuid_t pargfid, const char *name)
+__dentry_search_for_inode(inode_t *inode, uuid_t pargfid, const char *name)
{
- dentry_t *dentry = NULL;
- dentry_t *tmp = NULL;
+ dentry_t *dentry = NULL;
+ dentry_t *tmp = NULL;
- if (!inode || !name) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "inode || name not found");
- return NULL;
- }
+ /* earlier, just the ino was sent, which could have been 0, now
+ we deal with gfid, and if sent gfid is null or 0, no need to
+ continue with the check */
+ if (!pargfid || gf_uuid_is_null(pargfid))
+ return NULL;
- /* earlier, just the ino was sent, which could have been 0, now
- we deal with gfid, and if sent gfid is null or 0, no need to
- continue with the check */
- if (!pargfid || uuid_is_null (pargfid))
- return NULL;
-
- list_for_each_entry (tmp, &inode->dentry_list, inode_list) {
- if ((uuid_compare (tmp->parent->gfid, pargfid) == 0) &&
- !strcmp (tmp->name, name)) {
- dentry = tmp;
- break;
- }
+ list_for_each_entry(tmp, &inode->dentry_list, inode_list)
+ {
+ if ((gf_uuid_compare(tmp->parent->gfid, pargfid) == 0) &&
+ !strcmp(tmp->name, name)) {
+ dentry = tmp;
+ break;
}
+ }
- return dentry;
+ return dentry;
}
-
static void
-__inode_destroy (inode_t *inode)
+__inode_ctx_free(inode_t *inode)
{
- int index = 0;
- xlator_t *xl = NULL;
- xlator_t *old_THIS = NULL;
-
- if (!inode) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "inode not found");
- return;
- }
-
- if (!inode->_ctx) {
- gf_log (THIS->name, GF_LOG_WARNING, "_ctx not found");
- goto noctx;
- }
+ int index = 0;
+ xlator_t *xl = NULL;
+ xlator_t *old_THIS = NULL;
+
+ if (!inode->_ctx) {
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_CTX_NULL, NULL);
+ goto noctx;
+ }
+
+ for (index = 0; index < inode->table->ctxcount; index++) {
+ if (inode->_ctx[index].value1 || inode->_ctx[index].value2) {
+ xl = (xlator_t *)(long)inode->_ctx[index].xl_key;
+ if (xl && !xl->call_cleanup && xl->cbks->forget) {
+ old_THIS = THIS;
+ THIS = xl;
+ xl->cbks->forget(xl, inode);
+ THIS = old_THIS;
+ }
+ }
+ }
+
+ GF_FREE(inode->_ctx);
+ inode->_ctx = NULL;
- for (index = 0; index < inode->table->xl->graph->xl_count; index++) {
- if (inode->_ctx[index].xl_key) {
- xl = (xlator_t *)(long)inode->_ctx[index].xl_key;
- old_THIS = THIS;
- THIS = xl;
- if (xl->cbks->forget)
- xl->cbks->forget (xl, inode);
- THIS = old_THIS;
- }
- }
-
- GF_FREE (inode->_ctx);
noctx:
- LOCK_DESTROY (&inode->lock);
- // memset (inode, 0xb, sizeof (*inode));
- mem_put (inode);
+ return;
}
-
static void
-__inode_activate (inode_t *inode)
+__inode_destroy(inode_t *inode)
{
- if (!inode)
- return;
+ __inode_ctx_free(inode);
- list_move (&inode->list, &inode->table->active);
- inode->table->active_size++;
+ LOCK_DESTROY(&inode->lock);
+ // memset (inode, 0xb, sizeof (*inode));
+ mem_put(inode);
}
-
-static void
-__inode_passivate (inode_t *inode)
+void
+inode_ctx_merge(fd_t *fd, inode_t *inode, inode_t *linked_inode)
{
- dentry_t *dentry = NULL;
- dentry_t *t = NULL;
+ int index = 0;
+ xlator_t *xl = NULL;
+ xlator_t *old_THIS = NULL;
- if (!inode) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "inode not found");
- return;
- }
+ if (!fd || !inode || !linked_inode) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "invalid inode");
+ return;
+ }
- list_move_tail (&inode->list, &inode->table->lru);
- inode->table->lru_size++;
+ if (!inode->_ctx || !linked_inode->_ctx) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "invalid inode context");
+ return;
+ }
+
+ for (; index < inode->table->ctxcount; index++) {
+ if (inode->_ctx[index].xl_key) {
+ xl = (xlator_t *)(long)inode->_ctx[index].xl_key;
- list_for_each_entry_safe (dentry, t, &inode->dentry_list, inode_list) {
- if (!__is_dentry_hashed (dentry))
- __dentry_unset (dentry);
+ old_THIS = THIS;
+ THIS = xl;
+ if (xl->cbks->ictxmerge)
+ xl->cbks->ictxmerge(xl, fd, inode, linked_inode);
+ THIS = old_THIS;
}
+ }
}
+static void
+__inode_activate(inode_t *inode)
+{
+ list_move(&inode->list, &inode->table->active);
+ inode->table->active_size++;
+}
static void
-__inode_retire (inode_t *inode)
+__inode_passivate(inode_t *inode)
{
- dentry_t *dentry = NULL;
- dentry_t *t = NULL;
+ dentry_t *dentry = NULL;
+ dentry_t *t = NULL;
- if (!inode) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "inode not found");
- return;
- }
+ list_move_tail(&inode->list, &inode->table->lru);
+ inode->table->lru_size++;
- list_move_tail (&inode->list, &inode->table->purge);
- inode->table->purge_size++;
+ list_for_each_entry_safe(dentry, t, &inode->dentry_list, inode_list)
+ {
+ if (!__is_dentry_hashed(dentry))
+ dentry_destroy(__dentry_unset(dentry));
+ }
+}
- __inode_unhash (inode);
+static void
+__inode_retire(inode_t *inode)
+{
+ dentry_t *dentry = NULL;
+ dentry_t *t = NULL;
- list_for_each_entry_safe (dentry, t, &inode->dentry_list, inode_list) {
- __dentry_unset (dentry);
- }
+ list_move_tail(&inode->list, &inode->table->purge);
+ inode->table->purge_size++;
+
+ __inode_unhash(inode);
+
+ list_for_each_entry_safe(dentry, t, &inode->dentry_list, inode_list)
+ {
+ dentry_destroy(__dentry_unset(dentry));
+ }
}
+static int
+__inode_get_xl_index(inode_t *inode, xlator_t *xlator)
+{
+ int set_idx = -1;
+
+ if ((inode->_ctx[xlator->xl_id].xl_key != NULL) &&
+ (inode->_ctx[xlator->xl_id].xl_key != xlator))
+ goto out;
+
+ set_idx = xlator->xl_id;
+ inode->_ctx[set_idx].xl_key = xlator;
+
+out:
+ return set_idx;
+}
static inode_t *
-__inode_unref (inode_t *inode)
+__inode_unref(inode_t *inode, bool clear)
{
- if (!inode)
- return NULL;
+ int index = 0;
+ xlator_t *this = NULL;
+ uint64_t nlookup = 0;
+
+ /*
+ * Root inode should always be in active list of inode table. So unrefs
+ * on root inode are no-ops.
+ */
+ if (__is_root_gfid(inode->gfid))
+ return inode;
+
+ /*
+ * No need to acquire inode table's lock
+ * as __inode_unref is called after acquiding
+ * the inode table's lock.
+ */
+ if (inode->table->cleanup_started && !inode->ref)
+ /*
+ * There is a good chance that, the inode
+ * on which unref came has already been
+ * zero refed and added to the purge list.
+ * This can happen when inode table is
+ * being destroyed (glfs_fini is something
+ * which destroys the inode table).
+ *
+ * Consider a directory 'a' which has a file
+ * 'b'. Now as part of inode table destruction
+ * zero refing of inodes does not happen from
+ * leaf to the root. It happens in the order
+ * inodes are present in the list. So, in this
+ * example, the dentry of 'b' would have its
+ * parent set to the inode of 'a'. So if
+ * 'a' gets zero refed first (as part of
+ * inode table cleanup) and then 'b' has to
+ * zero refed, then dentry_unset is called on
+ * the dentry of 'b' and it further goes on to
+ * call inode_unref on b's parent which is 'a'.
+ * In this situation, GF_ASSERT would be called
+ * below as the refcount of 'a' has been already set
+ * to zero.
+ *
+ * So return the inode if the inode table cleanup
+ * has already started and inode refcount is 0.
+ */
+ return inode;
- if (__is_root_gfid(inode->gfid))
- return inode;
+ this = THIS;
- GF_ASSERT (inode->ref);
+ if (clear && inode->in_invalidate_list) {
+ inode->in_invalidate_list = false;
+ inode->table->invalidate_size--;
+ __inode_activate(inode);
+ }
+ GF_ASSERT(inode->ref);
- --inode->ref;
+ --inode->ref;
- if (!inode->ref) {
- inode->table->active_size--;
+ index = __inode_get_xl_index(inode, this);
+ if (index >= 0) {
+ inode->_ctx[index].xl_key = this;
+ inode->_ctx[index].ref--;
+ }
- if (inode->nlookup)
- __inode_passivate (inode);
- else
- __inode_retire (inode);
- }
+ if (!inode->ref && !inode->in_invalidate_list) {
+ inode->table->active_size--;
- return inode;
-}
+ nlookup = GF_ATOMIC_GET(inode->nlookup);
+ if (nlookup)
+ __inode_passivate(inode);
+ else
+ __inode_retire(inode);
+ }
+ return inode;
+}
static inode_t *
-__inode_ref (inode_t *inode)
+__inode_ref(inode_t *inode, bool is_invalidate)
{
- if (!inode)
- return NULL;
+ int index = 0;
+ xlator_t *this = NULL;
+
+ if (!inode)
+ return NULL;
+
+ this = THIS;
+
+ /*
+ * Root inode should always be in active list of inode table. So unrefs
+ * on root inode are no-ops. If we do not allow unrefs but allow refs,
+ * it leads to refcount overflows and deleting and adding the inode
+ * to active-list, which is ugly. active_size (check __inode_activate)
+ * in inode table increases which is wrong. So just keep the ref
+ * count as 1 always
+ */
+ if (__is_root_gfid(inode->gfid) && inode->ref)
+ return inode;
- if (!inode->ref) {
- inode->table->lru_size--;
- __inode_activate (inode);
+ if (!inode->ref) {
+ if (inode->in_invalidate_list) {
+ inode->in_invalidate_list = false;
+ inode->table->invalidate_size--;
+ } else {
+ inode->table->lru_size--;
}
- inode->ref++;
+ if (is_invalidate) {
+ inode->in_invalidate_list = true;
+ inode->table->invalidate_size++;
+ list_move_tail(&inode->list, &inode->table->invalidate);
+ } else {
+ __inode_activate(inode);
+ }
+ }
- return inode;
-}
+ inode->ref++;
+
+ index = __inode_get_xl_index(inode, this);
+ if (index >= 0) {
+ inode->_ctx[index].xl_key = this;
+ inode->_ctx[index].ref++;
+ }
+ return inode;
+}
inode_t *
-inode_unref (inode_t *inode)
+inode_unref(inode_t *inode)
{
- inode_table_t *table = NULL;
+ inode_table_t *table = NULL;
- if (!inode)
- return NULL;
+ if (!inode)
+ return NULL;
- table = inode->table;
+ table = inode->table;
- pthread_mutex_lock (&table->lock);
- {
- inode = __inode_unref (inode);
- }
- pthread_mutex_unlock (&table->lock);
+ pthread_mutex_lock(&table->lock);
+ {
+ inode = __inode_unref(inode, false);
+ }
+ pthread_mutex_unlock(&table->lock);
- inode_table_prune (table);
+ inode_table_prune(table);
- return inode;
+ return inode;
}
-
inode_t *
-inode_ref (inode_t *inode)
+inode_ref(inode_t *inode)
{
- inode_table_t *table = NULL;
+ inode_table_t *table = NULL;
- if (!inode)
- return NULL;
+ if (!inode)
+ return NULL;
- table = inode->table;
+ table = inode->table;
- pthread_mutex_lock (&table->lock);
- {
- inode = __inode_ref (inode);
- }
- pthread_mutex_unlock (&table->lock);
+ pthread_mutex_lock(&table->lock);
+ {
+ inode = __inode_ref(inode, false);
+ }
+ pthread_mutex_unlock(&table->lock);
- return inode;
+ return inode;
}
-
static dentry_t *
-__dentry_create (inode_t *inode, inode_t *parent, const char *name)
+dentry_create(inode_t *inode, inode_t *parent, const char *name)
{
- dentry_t *newd = NULL;
+ dentry_t *newd = NULL;
- if (!inode || !parent || !name) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING,
- "inode || parent || name not found");
- return NULL;
- }
+ newd = mem_get0(parent->table->dentry_pool);
+ if (newd == NULL) {
+ goto out;
+ }
- newd = mem_get0 (parent->table->dentry_pool);
- if (newd == NULL) {
- goto out;
- }
+ INIT_LIST_HEAD(&newd->inode_list);
+ INIT_LIST_HEAD(&newd->hash);
- INIT_LIST_HEAD (&newd->inode_list);
- INIT_LIST_HEAD (&newd->hash);
-
- newd->name = gf_strdup (name);
- if (newd->name == NULL) {
- mem_put (newd);
- newd = NULL;
- goto out;
- }
-
- if (parent)
- newd->parent = __inode_ref (parent);
+ newd->name = gf_strdup(name);
+ if (newd->name == NULL) {
+ mem_put(newd);
+ newd = NULL;
+ goto out;
+ }
- list_add (&newd->inode_list, &inode->dentry_list);
- newd->inode = inode;
+ newd->inode = inode;
out:
- return newd;
+ return newd;
}
-
static inode_t *
-__inode_create (inode_table_t *table)
+inode_create(inode_table_t *table)
{
- inode_t *newi = NULL;
-
- if (!table) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "table not found");
- return NULL;
- }
+ inode_t *newi = NULL;
- newi = mem_get0 (table->inode_pool);
- if (!newi) {
- goto out;
- }
-
- newi->table = table;
+ newi = mem_get0(table->inode_pool);
+ if (!newi) {
+ goto out;
+ }
- LOCK_INIT (&newi->lock);
+ newi->table = table;
- INIT_LIST_HEAD (&newi->fd_list);
- INIT_LIST_HEAD (&newi->list);
- INIT_LIST_HEAD (&newi->hash);
- INIT_LIST_HEAD (&newi->dentry_list);
+ LOCK_INIT(&newi->lock);
- newi->_ctx = GF_CALLOC (1, (sizeof (struct _inode_ctx) *
- table->xl->graph->xl_count),
- gf_common_mt_inode_ctx);
-
- if (newi->_ctx == NULL) {
- LOCK_DESTROY (&newi->lock);
- mem_put (newi);
- newi = NULL;
- goto out;
- }
+ INIT_LIST_HEAD(&newi->fd_list);
+ INIT_LIST_HEAD(&newi->list);
+ INIT_LIST_HEAD(&newi->hash);
+ INIT_LIST_HEAD(&newi->dentry_list);
- list_add (&newi->list, &table->lru);
- table->lru_size++;
+ newi->_ctx = GF_CALLOC(1, (sizeof(struct _inode_ctx) * table->ctxcount),
+ gf_common_mt_inode_ctx);
+ if (newi->_ctx == NULL) {
+ LOCK_DESTROY(&newi->lock);
+ mem_put(newi);
+ newi = NULL;
+ goto out;
+ }
out:
-
- return newi;
+ return newi;
}
-
inode_t *
-inode_new (inode_table_t *table)
+inode_new(inode_table_t *table)
{
- inode_t *inode = NULL;
-
- if (!table) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "inode not found");
- return NULL;
- }
-
- pthread_mutex_lock (&table->lock);
+ inode_t *inode = NULL;
+
+ if (!table) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0,
+ LG_MSG_INODE_TABLE_NOT_FOUND,
+ "inode not "
+ "found");
+ return NULL;
+ }
+
+ inode = inode_create(table);
+ if (inode) {
+ pthread_mutex_lock(&table->lock);
{
- inode = __inode_create (table);
- if (inode != NULL) {
- __inode_ref (inode);
- }
+ list_add(&inode->list, &table->lru);
+ table->lru_size++;
+ __inode_ref(inode, false);
}
- pthread_mutex_unlock (&table->lock);
+ pthread_mutex_unlock(&table->lock);
+ }
- return inode;
+ return inode;
}
-
+/* Reduce the ref count by value 'nref'
+ * Args:
+ * inode - address of the inode to operate on
+ * nref - number to subtracted from inode->ref
+ * if nref is 0, then the ref count is overwritten 0
+ *
+ * This function may cause the purging of the inode,
+ * hence to be used only in destructor functions and not otherwise.
+ */
static inode_t *
-__inode_lookup (inode_t *inode)
+__inode_ref_reduce_by_n(inode_t *inode, uint64_t nref)
{
- if (!inode)
- return NULL;
+ uint64_t nlookup = 0;
- inode->nlookup++;
+ GF_ASSERT(inode->ref >= nref);
- return inode;
-}
+ inode->ref -= nref;
+
+ if (!nref)
+ inode->ref = 0;
+
+ if (!inode->ref) {
+ inode->table->active_size--;
+ nlookup = GF_ATOMIC_GET(inode->nlookup);
+ if (nlookup)
+ __inode_passivate(inode);
+ else
+ __inode_retire(inode);
+ }
+
+ return inode;
+}
static inode_t *
-__inode_forget (inode_t *inode, uint64_t nlookup)
+inode_forget_atomic(inode_t *inode, uint64_t nlookup)
{
- if (!inode)
- return NULL;
-
- GF_ASSERT (inode->nlookup >= nlookup);
+ uint64_t inode_lookup = 0;
- inode->nlookup -= nlookup;
+ if (!inode)
+ return NULL;
- if (!nlookup)
- inode->nlookup = 0;
+ if (nlookup == 0) {
+ GF_ATOMIC_INIT(inode->nlookup, 0);
+ } else {
+ inode_lookup = GF_ATOMIC_FETCH_SUB(inode->nlookup, nlookup);
+ GF_ASSERT(inode_lookup >= nlookup);
+ }
- return inode;
+ return inode;
}
-
dentry_t *
-__dentry_grep (inode_table_t *table, inode_t *parent, const char *name)
+__dentry_grep(inode_table_t *table, inode_t *parent, const char *name,
+ const int hash)
{
- int hash = 0;
- dentry_t *dentry = NULL;
- dentry_t *tmp = NULL;
-
- if (!table || !name || !parent)
- return NULL;
-
- hash = hash_dentry (parent, name, table->hashsize);
+ dentry_t *dentry = NULL;
+ dentry_t *tmp = NULL;
- list_for_each_entry (tmp, &table->name_hash[hash], hash) {
- if (tmp->parent == parent && !strcmp (tmp->name, name)) {
- dentry = tmp;
- break;
- }
+ list_for_each_entry(tmp, &table->name_hash[hash], hash)
+ {
+ if (tmp->parent == parent && !strcmp(tmp->name, name)) {
+ dentry = tmp;
+ break;
}
+ }
- return dentry;
+ return dentry;
}
-
inode_t *
-inode_grep (inode_table_t *table, inode_t *parent, const char *name)
+inode_grep(inode_table_t *table, inode_t *parent, const char *name)
{
- inode_t *inode = NULL;
- dentry_t *dentry = NULL;
-
- if (!table || !parent || !name) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING,
- "table || parent || name not found");
- return NULL;
- }
-
- pthread_mutex_lock (&table->lock);
- {
- dentry = __dentry_grep (table, parent, name);
-
- if (dentry)
- inode = dentry->inode;
-
- if (inode)
- __inode_ref (inode);
- }
- pthread_mutex_unlock (&table->lock);
-
- return inode;
+ inode_t *inode = NULL;
+ dentry_t *dentry = NULL;
+
+ if (!table || !parent || !name) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "table || parent || name"
+ " not found");
+ return NULL;
+ }
+
+ int hash = hash_dentry(parent, name, table->hashsize);
+
+ pthread_mutex_lock(&table->lock);
+ {
+ dentry = __dentry_grep(table, parent, name, hash);
+ if (dentry) {
+ inode = dentry->inode;
+ if (inode)
+ __inode_ref(inode, false);
+ }
+ }
+ pthread_mutex_unlock(&table->lock);
+
+ return inode;
}
-
inode_t *
-inode_resolve (inode_table_t *table, char *path)
+inode_resolve(inode_table_t *table, char *path)
{
- char *tmp = NULL, *bname = NULL, *str = NULL, *saveptr = NULL;
- inode_t *inode = NULL, *parent = NULL;
+ char *tmp = NULL, *bname = NULL, *str = NULL, *saveptr = NULL;
+ inode_t *inode = NULL, *parent = NULL;
- if ((path == NULL) || (table == NULL)) {
- goto out;
- }
+ if ((path == NULL) || (table == NULL)) {
+ goto out;
+ }
- parent = inode_ref (table->root);
- str = tmp = gf_strdup (path);
+ parent = inode_ref(table->root);
+ str = tmp = gf_strdup(path);
+ if (str == NULL) {
+ goto out;
+ }
- while (1) {
- bname = strtok_r (str, "/", &saveptr);
- if (bname == NULL) {
- break;
- }
-
- if (inode != NULL) {
- inode_unref (inode);
- }
+ while (1) {
+ bname = strtok_r(str, "/", &saveptr);
+ if (bname == NULL) {
+ break;
+ }
- inode = inode_grep (table, parent, bname);
- if (inode == NULL) {
- break;
- }
+ if (inode != NULL) {
+ inode_unref(inode);
+ }
- if (parent != NULL) {
- inode_unref (parent);
- }
+ inode = inode_grep(table, parent, bname);
+ if (inode == NULL) {
+ break;
+ }
- parent = inode_ref (inode);
- str = NULL;
+ if (parent != NULL) {
+ inode_unref(parent);
}
- inode_unref (parent);
- GF_FREE (tmp);
+ parent = inode_ref(inode);
+ str = NULL;
+ }
+
+ inode_unref(parent);
+ GF_FREE(tmp);
out:
- return inode;
+ return inode;
}
-
int
-inode_grep_for_gfid (inode_table_t *table, inode_t *parent, const char *name,
- uuid_t gfid, ia_type_t *type)
+inode_grep_for_gfid(inode_table_t *table, inode_t *parent, const char *name,
+ uuid_t gfid, ia_type_t *type)
{
- inode_t *inode = NULL;
- dentry_t *dentry = NULL;
- int ret = -1;
-
- if (!table || !parent || !name) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING,
- "table || parent || name not found");
- return ret;
- }
-
- pthread_mutex_lock (&table->lock);
- {
- dentry = __dentry_grep (table, parent, name);
+ inode_t *inode = NULL;
+ dentry_t *dentry = NULL;
+ int ret = -1;
+
+ if (!table || !parent || !name) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "table || parent || name"
+ " not found");
+ return ret;
+ }
- if (dentry)
- inode = dentry->inode;
+ int hash = hash_dentry(parent, name, table->hashsize);
- if (inode) {
- uuid_copy (gfid, inode->gfid);
- *type = inode->ia_type;
- ret = 0;
- }
+ pthread_mutex_lock(&table->lock);
+ {
+ dentry = __dentry_grep(table, parent, name, hash);
+ if (dentry) {
+ inode = dentry->inode;
+ if (inode) {
+ gf_uuid_copy(gfid, inode->gfid);
+ *type = inode->ia_type;
+ ret = 0;
+ }
}
- pthread_mutex_unlock (&table->lock);
+ }
+ pthread_mutex_unlock(&table->lock);
- return ret;
+ return ret;
}
-
/* return 1 if gfid is of root, 0 if not */
gf_boolean_t
-__is_root_gfid (uuid_t gfid)
+__is_root_gfid(uuid_t gfid)
{
- uuid_t root;
+ static uuid_t root = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
- memset (root, 0, 16);
- root[15] = 1;
+ if (gf_uuid_compare(gfid, root) == 0)
+ return _gf_true;
- if (uuid_compare (gfid, root) == 0)
- return _gf_true;
-
- return _gf_false;
+ return _gf_false;
}
-
inode_t *
-__inode_find (inode_table_t *table, uuid_t gfid)
+__inode_find(inode_table_t *table, uuid_t gfid, const int hash)
{
- inode_t *inode = NULL;
- inode_t *tmp = NULL;
- int hash = 0;
+ inode_t *inode = NULL;
+ inode_t *tmp = NULL;
- if (!table) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "table not found");
- goto out;
- }
-
- if (__is_root_gfid (gfid))
- return table->root;
+ if (__is_root_gfid(gfid))
+ return table->root;
- hash = hash_gfid (gfid, 65536);
-
- list_for_each_entry (tmp, &table->inode_hash[hash], hash) {
- if (uuid_compare (tmp->gfid, gfid) == 0) {
- inode = tmp;
- break;
- }
+ list_for_each_entry(tmp, &table->inode_hash[hash], hash)
+ {
+ if (gf_uuid_compare(tmp->gfid, gfid) == 0) {
+ inode = tmp;
+ break;
}
+ }
-out:
- return inode;
+ return inode;
}
-
inode_t *
-inode_find (inode_table_t *table, uuid_t gfid)
+inode_find(inode_table_t *table, uuid_t gfid)
{
- inode_t *inode = NULL;
-
- if (!table) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "table not found");
- return NULL;
- }
-
- pthread_mutex_lock (&table->lock);
- {
- inode = __inode_find (table, gfid);
- if (inode)
- __inode_ref (inode);
- }
- pthread_mutex_unlock (&table->lock);
-
- return inode;
+ inode_t *inode = NULL;
+
+ if (!table) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0,
+ LG_MSG_INODE_TABLE_NOT_FOUND,
+ "table not "
+ "found");
+ return NULL;
+ }
+
+ int hash = hash_gfid(gfid, 65536);
+
+ pthread_mutex_lock(&table->lock);
+ {
+ inode = __inode_find(table, gfid, hash);
+ if (inode)
+ __inode_ref(inode, false);
+ }
+ pthread_mutex_unlock(&table->lock);
+
+ return inode;
}
-
static inode_t *
-__inode_link (inode_t *inode, inode_t *parent, const char *name,
- struct iatt *iatt)
+__inode_link(inode_t *inode, inode_t *parent, const char *name,
+ struct iatt *iatt, const int dhash)
{
- dentry_t *dentry = NULL;
- dentry_t *old_dentry = NULL;
- inode_t *old_inode = NULL;
- inode_table_t *table = NULL;
- inode_t *link_inode = NULL;
+ dentry_t *dentry = NULL;
+ dentry_t *old_dentry = NULL;
+ inode_t *old_inode = NULL;
+ inode_table_t *table = NULL;
+ inode_t *link_inode = NULL;
+ char link_uuid_str[64] = {0}, parent_uuid_str[64] = {0};
- if (!inode)
- return NULL;
+ table = inode->table;
- table = inode->table;
- if (!table)
- return NULL;
+ if (parent) {
+ /* We should prevent inode linking between different
+ inode tables. This can cause errors which is very
+ hard to catch/debug. */
+ if (inode->table != parent->table) {
+ errno = EINVAL;
+ GF_ASSERT(!"link attempted b/w inodes of diff table");
+ }
- if (parent) {
- /* We should prevent inode linking between different
- inode tables. This can cause errors which is very
- hard to catch/debug. */
- if (inode->table != parent->table) {
- GF_ASSERT (!"link attempted b/w inodes of diff table");
- }
+ if (parent->ia_type != IA_IFDIR) {
+ errno = EINVAL;
+ GF_ASSERT(!"link attempted on non-directory parent");
+ return NULL;
}
- link_inode = inode;
+ if (!name || strlen(name) == 0) {
+ errno = EINVAL;
+ GF_ASSERT (!"link attempted with no basename on "
+ "parent");
+ return NULL;
+ }
+ }
- if (!__is_inode_hashed (inode)) {
- if (!iatt)
- return NULL;
+ link_inode = inode;
- if (uuid_is_null (iatt->ia_gfid))
- return NULL;
+ if (!__is_inode_hashed(inode)) {
+ if (!iatt) {
+ errno = EINVAL;
+ return NULL;
+ }
- uuid_copy (inode->gfid, iatt->ia_gfid);
- inode->ia_type = iatt->ia_type;
+ if (gf_uuid_is_null(iatt->ia_gfid)) {
+ errno = EINVAL;
+ return NULL;
+ }
- old_inode = __inode_find (table, inode->gfid);
+ int ihash = hash_gfid(iatt->ia_gfid, 65536);
- if (old_inode) {
- link_inode = old_inode;
- } else {
- __inode_hash (inode);
- }
- }
+ old_inode = __inode_find(table, iatt->ia_gfid, ihash);
- if (name) {
- if (!strcmp(name, ".") || !strcmp(name, ".."))
- return link_inode;
- }
+ if (old_inode) {
+ link_inode = old_inode;
+ } else {
+ gf_uuid_copy(inode->gfid, iatt->ia_gfid);
+ inode->ia_type = iatt->ia_type;
+ __inode_hash(inode, ihash);
+ }
+ } else {
+ /* @old_inode serves another important purpose - it indicates
+ to the code further below whether a dentry cycle check is
+ required or not (a new inode linkage can never result in
+ creation of a loop.)
+
+ if the given @inode is already hashed, it actually means
+ it is an "old" inode and deserves to undergo the cyclic
+ check.
+ */
+ old_inode = inode;
+ }
+
+ if (name && (!strcmp(name, ".") || !strcmp(name, ".."))) {
+ return link_inode;
+ }
+
+ /* use only link_inode beyond this point */
+ if (parent) {
+ old_dentry = __dentry_grep(table, parent, name, dhash);
+
+ if (!old_dentry || old_dentry->inode != link_inode) {
+ dentry = dentry_create(link_inode, parent, name);
+ if (!dentry) {
+ gf_msg_callingfn(THIS->name, GF_LOG_ERROR, 0,
+ LG_MSG_DENTRY_CREATE_FAILED,
+ "dentry create failed on "
+ "inode %s with parent %s",
+ uuid_utoa_r(link_inode->gfid, link_uuid_str),
+ uuid_utoa_r(parent->gfid, parent_uuid_str));
+ errno = ENOMEM;
+ return NULL;
+ }
- /* use only link_inode beyond this point */
- if (parent) {
- old_dentry = __dentry_grep (table, parent, name);
-
- if (!old_dentry || old_dentry->inode != link_inode) {
- dentry = __dentry_create (link_inode, parent, name);
- if (!dentry) {
- gf_log_callingfn (THIS->name, GF_LOG_ERROR,
- "dentry create failed on "
- "inode %s with parent %s",
- uuid_utoa (link_inode->gfid),
- uuid_utoa (parent->gfid));
- return NULL;
- }
- if (old_inode && __is_dentry_cyclic (dentry)) {
- __dentry_unset (dentry);
- return NULL;
- }
- __dentry_hash (dentry);
-
- if (old_dentry)
- __dentry_unset (old_dentry);
- }
+ /* dentry linking needs to happen inside lock */
+ dentry->parent = __inode_ref(parent, false);
+ list_add(&dentry->inode_list, &link_inode->dentry_list);
+
+ if (old_inode && __is_dentry_cyclic(dentry)) {
+ errno = ELOOP;
+ dentry_destroy(__dentry_unset(dentry));
+ return NULL;
+ }
+ __dentry_hash(dentry, dhash);
+
+ if (old_dentry)
+ dentry_destroy(__dentry_unset(old_dentry));
}
+ }
- return link_inode;
+ return link_inode;
}
-
inode_t *
-inode_link (inode_t *inode, inode_t *parent, const char *name,
- struct iatt *iatt)
+inode_link(inode_t *inode, inode_t *parent, const char *name, struct iatt *iatt)
{
- inode_table_t *table = NULL;
- inode_t *linked_inode = NULL;
+ int hash = 0;
+ inode_table_t *table = NULL;
+ inode_t *linked_inode = NULL;
+
+ if (!inode) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
+ "inode not found");
+ return NULL;
+ }
+
+ table = inode->table;
+
+ if (parent && name) {
+ hash = hash_dentry(parent, name, table->hashsize);
+ }
+
+ if (name && strchr(name, '/')) {
+ GF_ASSERT(!"inode link attempted with '/' in name");
+ return NULL;
+ }
+
+ pthread_mutex_lock(&table->lock);
+ {
+ linked_inode = __inode_link(inode, parent, name, iatt, hash);
+ if (linked_inode)
+ __inode_ref(linked_inode, false);
+ }
+ pthread_mutex_unlock(&table->lock);
+
+ inode_table_prune(table);
+
+ return linked_inode;
+}
- if (!inode) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "inode not found");
- return NULL;
- }
+int
+inode_lookup(inode_t *inode)
+{
+ if (!inode) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
+ "inode not found");
+ return -1;
+ }
- table = inode->table;
+ GF_ATOMIC_INC(inode->nlookup);
- pthread_mutex_lock (&table->lock);
- {
- linked_inode = __inode_link (inode, parent, name, iatt);
+ return 0;
+}
- if (linked_inode)
- __inode_ref (linked_inode);
- }
- pthread_mutex_unlock (&table->lock);
+int
+inode_ref_reduce_by_n(inode_t *inode, uint64_t nref)
+{
+ inode_table_t *table = NULL;
- inode_table_prune (table);
+ if (!inode) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
+ "inode not found");
+ return -1;
+ }
- return linked_inode;
-}
+ table = inode->table;
+ pthread_mutex_lock(&table->lock);
+ {
+ __inode_ref_reduce_by_n(inode, nref);
+ }
+ pthread_mutex_unlock(&table->lock);
+
+ inode_table_prune(table);
+
+ return 0;
+}
int
-inode_lookup (inode_t *inode)
+inode_forget(inode_t *inode, uint64_t nlookup)
{
- inode_table_t *table = NULL;
+ inode_table_t *table = NULL;
- if (!inode) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "inode not found");
- return -1;
- }
+ if (!inode) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
+ "inode not found");
+ return -1;
+ }
- table = inode->table;
+ table = inode->table;
- pthread_mutex_lock (&table->lock);
- {
- __inode_lookup (inode);
- }
- pthread_mutex_unlock (&table->lock);
+ inode_forget_atomic(inode, nlookup);
- return 0;
-}
+ inode_table_prune(table);
+ return 0;
+}
int
-inode_forget (inode_t *inode, uint64_t nlookup)
+inode_forget_with_unref(inode_t *inode, uint64_t nlookup)
{
- inode_table_t *table = NULL;
+ inode_table_t *table = NULL;
- if (!inode) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "inode not found");
- return -1;
- }
+ if (!inode) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
+ "inode not found");
+ return -1;
+ }
- table = inode->table;
+ table = inode->table;
- pthread_mutex_lock (&table->lock);
- {
- __inode_forget (inode, nlookup);
- }
- pthread_mutex_unlock (&table->lock);
+ pthread_mutex_lock(&table->lock);
+ {
+ inode_forget_atomic(inode, nlookup);
+ __inode_unref(inode, true);
+ }
+ pthread_mutex_unlock(&table->lock);
- inode_table_prune (table);
+ inode_table_prune(table);
- return 0;
+ return 0;
}
-
-static void
-__inode_unlink (inode_t *inode, inode_t *parent, const char *name)
+/*
+ * Invalidate an inode. This is invoked when a translator decides that an
+ * inode's cache is no longer valid. Any translator interested in taking action
+ * in this situation can define the invalidate callback.
+ */
+int
+inode_invalidate(inode_t *inode)
{
- dentry_t *dentry = NULL;
+ int ret = 0;
+ xlator_t *xl = NULL;
+ xlator_t *old_THIS = NULL;
+
+ if (!inode) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
+ "inode not found");
+ return -1;
+ }
+
+ /*
+ * The master xlator is not in the graph but it can define an invalidate
+ * handler.
+ */
+ xl = inode->table->xl->ctx->master;
+ if (xl && xl->cbks->invalidate) {
+ old_THIS = THIS;
+ THIS = xl;
+ ret = xl->cbks->invalidate(xl, inode);
+ THIS = old_THIS;
+ if (ret)
+ return ret;
+ }
- if (!inode || !parent || !name)
- return;
+ xl = inode->table->xl->graph->first;
+ while (xl) {
+ old_THIS = THIS;
+ THIS = xl;
+ if (xl->cbks->invalidate)
+ ret = xl->cbks->invalidate(xl, inode);
+ THIS = old_THIS;
- dentry = __dentry_search_for_inode (inode, parent->gfid, name);
+ if (ret)
+ break;
- /* dentry NULL for corrupted backend */
- if (dentry)
- __dentry_unset (dentry);
+ xl = xl->next;
+ }
+
+ return ret;
}
+static dentry_t *
+__inode_unlink(inode_t *inode, inode_t *parent, const char *name)
+{
+ dentry_t *dentry = NULL;
+ char pgfid[64] = {0};
+ char gfid[64] = {0};
+
+ dentry = __dentry_search_for_inode(inode, parent->gfid, name);
+
+ /* dentry NULL for corrupted backend */
+ if (dentry) {
+ dentry = __dentry_unset(dentry);
+ } else {
+ gf_smsg("inode", GF_LOG_WARNING, 0, LG_MSG_DENTRY_NOT_FOUND,
+ "parent-gfid=%s name=%s gfid%s",
+ uuid_utoa_r(parent->gfid, pgfid), name,
+ uuid_utoa_r(inode->gfid, gfid), NULL);
+ }
+
+ return dentry;
+}
void
-inode_unlink (inode_t *inode, inode_t *parent, const char *name)
+inode_unlink(inode_t *inode, inode_t *parent, const char *name)
{
- inode_table_t *table = NULL;
+ inode_table_t *table;
+ dentry_t *dentry;
- if (!inode) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "inode not found");
- return;
- }
+ if (!inode || !parent || !name)
+ return;
- table = inode->table;
+ table = inode->table;
- pthread_mutex_lock (&table->lock);
- {
- __inode_unlink (inode, parent, name);
- }
- pthread_mutex_unlock (&table->lock);
+ pthread_mutex_lock(&table->lock);
+ {
+ dentry = __inode_unlink(inode, parent, name);
+ }
+ pthread_mutex_unlock(&table->lock);
- inode_table_prune (table);
-}
+ dentry_destroy(dentry);
+ inode_table_prune(table);
+}
int
-inode_rename (inode_table_t *table, inode_t *srcdir, const char *srcname,
- inode_t *dstdir, const char *dstname, inode_t *inode,
- struct iatt *iatt)
+inode_rename(inode_table_t *table, inode_t *srcdir, const char *srcname,
+ inode_t *dstdir, const char *dstname, inode_t *inode,
+ struct iatt *iatt)
{
- if (!inode) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "inode not found");
- return -1;
- }
+ int hash = 0;
+ dentry_t *dentry = NULL;
- table = inode->table;
+ if (!inode) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
+ "inode not found");
+ return -1;
+ }
- pthread_mutex_lock (&table->lock);
- {
- __inode_link (inode, dstdir, dstname, iatt);
- __inode_unlink (inode, srcdir, srcname);
- }
- pthread_mutex_unlock (&table->lock);
+ table = inode->table;
- inode_table_prune (table);
+ if (dstname && strchr(dstname, '/')) {
+ GF_ASSERT(!"inode link attempted with '/' in name");
+ return -1;
+ }
- return 0;
-}
+ if (dstdir && dstname) {
+ hash = hash_dentry(dstdir, dstname, table->hashsize);
+ }
+
+ pthread_mutex_lock(&table->lock);
+ {
+ __inode_link(inode, dstdir, dstname, iatt, hash);
+ /* pick the old dentry */
+ dentry = __inode_unlink(inode, srcdir, srcname);
+ }
+ pthread_mutex_unlock(&table->lock);
+
+ /* free the old dentry */
+ dentry_destroy(dentry);
+ inode_table_prune(table);
+
+ return 0;
+}
static dentry_t *
-__dentry_search_arbit (inode_t *inode)
+__dentry_search_arbit(inode_t *inode)
{
- dentry_t *dentry = NULL;
- dentry_t *trav = NULL;
+ dentry_t *dentry = NULL;
+ dentry_t *trav = NULL;
- if (!inode)
- return NULL;
+ if (!inode)
+ return NULL;
- list_for_each_entry (trav, &inode->dentry_list, inode_list) {
- if (__is_dentry_hashed (trav)) {
- dentry = trav;
- break;
- }
+ list_for_each_entry(trav, &inode->dentry_list, inode_list)
+ {
+ if (__is_dentry_hashed(trav)) {
+ dentry = trav;
+ break;
}
+ }
- if (!dentry) {
- list_for_each_entry (trav, &inode->dentry_list, inode_list) {
- dentry = trav;
- break;
- }
+ if (!dentry) {
+ list_for_each_entry(trav, &inode->dentry_list, inode_list)
+ {
+ dentry = trav;
+ break;
}
+ }
- return dentry;
+ return dentry;
}
-
inode_t *
-inode_parent (inode_t *inode, uuid_t pargfid, const char *name)
+inode_parent(inode_t *inode, uuid_t pargfid, const char *name)
{
- inode_t *parent = NULL;
- inode_table_t *table = NULL;
- dentry_t *dentry = NULL;
-
- if (!inode) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "inode not found");
- return NULL;
+ inode_t *parent = NULL;
+ inode_table_t *table = NULL;
+ dentry_t *dentry = NULL;
+
+ if (!inode) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
+ "inode not found");
+ return NULL;
+ }
+
+ table = inode->table;
+
+ pthread_mutex_lock(&table->lock);
+ {
+ if (pargfid && !gf_uuid_is_null(pargfid) && name) {
+ dentry = __dentry_search_for_inode(inode, pargfid, name);
+ } else {
+ dentry = __dentry_search_arbit(inode);
}
- table = inode->table;
-
- pthread_mutex_lock (&table->lock);
- {
- if (pargfid && !uuid_is_null (pargfid) && name) {
- dentry = __dentry_search_for_inode (inode, pargfid, name);
- } else {
- dentry = __dentry_search_arbit (inode);
- }
-
- if (dentry)
- parent = dentry->parent;
+ if (dentry)
+ parent = dentry->parent;
- if (parent)
- __inode_ref (parent);
- }
- pthread_mutex_unlock (&table->lock);
+ if (parent)
+ __inode_ref(parent, false);
+ }
+ pthread_mutex_unlock(&table->lock);
- return parent;
+ return parent;
}
+static int
+__inode_has_dentry(inode_t *inode)
+{
+ return !list_empty(&inode->dentry_list);
+}
int
-__inode_path (inode_t *inode, const char *name, char **bufp)
-{
- inode_table_t *table = NULL;
- inode_t *itrav = NULL;
- dentry_t *trav = NULL;
- size_t i = 0, size = 0;
- int64_t ret = 0;
- int len = 0;
- char *buf = NULL;
-
- if (!inode || uuid_is_null (inode->gfid)) {
- GF_ASSERT (0);
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "invalid inode");
- return -1;
- }
+inode_has_dentry(inode_t *inode)
+{
+ int dentry_present = 0;
- table = inode->table;
+ if (!inode) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
+ "inode not found");
+ return 0;
+ }
- itrav = inode;
- for (trav = __dentry_search_arbit (itrav); trav;
- trav = __dentry_search_arbit (itrav)) {
- itrav = trav->parent;
- i ++; /* "/" */
- i += strlen (trav->name);
- if (i > PATH_MAX) {
- gf_log (table->name, GF_LOG_CRITICAL,
- "possible infinite loop detected, "
- "forcing break. name=(%s)", name);
- ret = -ENOENT;
- goto out;
- }
- }
+ LOCK(&inode->lock);
+ {
+ dentry_present = __inode_has_dentry(inode);
+ }
+ UNLOCK(&inode->lock);
- if (!__is_root_gfid (itrav->gfid)) {
- /* "<gfid:00000000-0000-0000-0000-000000000000>"/path */
- i += GFID_STR_PFX_LEN;
- }
+ return dentry_present;
+}
+
+int
+__inode_path(inode_t *inode, const char *name, char **bufp)
+{
+ inode_table_t *table = NULL;
+ inode_t *itrav = NULL;
+ dentry_t *trav = NULL;
+ size_t i = 0, size = 0;
+ int64_t ret = 0;
+ int len = 0;
+ char *buf = NULL;
+
+ if (!inode || gf_uuid_is_null(inode->gfid)) {
+ GF_ASSERT(0);
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "invalid inode");
+ return -EINVAL;
+ }
+
+ table = inode->table;
+
+ itrav = inode;
+ for (trav = __dentry_search_arbit(itrav); trav;
+ trav = __dentry_search_arbit(itrav)) {
+ itrav = trav->parent;
+ i++; /* "/" */
+ i += strlen(trav->name);
+ if (i > PATH_MAX) {
+ gf_smsg(table->name, GF_LOG_CRITICAL, 0, LG_MSG_DENTRY_CYCLIC_LOOP,
+ "name=%s", name, NULL);
+ ret = -ENOENT;
+ goto out;
+ }
+ }
+
+ if (!__is_root_gfid(itrav->gfid)) {
+ /* "<gfid:00000000-0000-0000-0000-000000000000>"/path */
+ i += GFID_STR_PFX_LEN;
+ }
+
+ if (name) {
+ i++;
+ i += strlen(name);
+ }
+
+ ret = i;
+ size = i + 1;
+ buf = GF_CALLOC(size, sizeof(char), gf_common_mt_char);
+ if (buf) {
+ buf[size - 1] = 0;
if (name) {
- i++;
- i += strlen (name);
+ len = strlen(name);
+ memcpy(buf + (i - len), name, len);
+ buf[i - len - 1] = '/';
+ i -= (len + 1);
}
- ret = i;
- size = i + 1;
- buf = GF_CALLOC (size, sizeof (char), gf_common_mt_char);
- if (buf) {
-
- buf[size - 1] = 0;
-
- if (name) {
- len = strlen (name);
- strncpy (buf + (i - len), name, len);
- buf[i-len-1] = '/';
- i -= (len + 1);
- }
-
- itrav = inode;
- for (trav = __dentry_search_arbit (itrav); trav;
- trav = __dentry_search_arbit (itrav)) {
- itrav = trav->parent;
- len = strlen (trav->name);
- strncpy (buf + (i - len), trav->name, len);
- buf[i-len-1] = '/';
- i -= (len + 1);
- }
-
- if (!__is_root_gfid (itrav->gfid)) {
- snprintf (&buf[i-GFID_STR_PFX_LEN], GFID_STR_PFX_LEN,
- INODE_PATH_FMT, uuid_utoa (itrav->gfid));
- buf[i-1] = '>';
- }
+ itrav = inode;
+ for (trav = __dentry_search_arbit(itrav); trav;
+ trav = __dentry_search_arbit(itrav)) {
+ itrav = trav->parent;
+ len = strlen(trav->name);
+ memcpy(buf + (i - len), trav->name, len);
+ buf[i - len - 1] = '/';
+ i -= (len + 1);
+ }
- *bufp = buf;
- } else {
- ret = -ENOMEM;
+ if (!__is_root_gfid(itrav->gfid)) {
+ snprintf(&buf[i - GFID_STR_PFX_LEN], GFID_STR_PFX_LEN,
+ INODE_PATH_FMT, uuid_utoa(itrav->gfid));
+ buf[i - 1] = '>';
}
+ *bufp = buf;
+ } else {
+ ret = -ENOMEM;
+ }
+
out:
- if (__is_root_gfid (inode->gfid) && !name) {
- ret = 1;
- if (buf) {
- GF_FREE (buf);
- }
- buf = GF_CALLOC (ret + 1, sizeof (char), gf_common_mt_char);
- if (buf) {
- strcpy (buf, "/");
- *bufp = buf;
- } else {
- ret = -ENOMEM;
- }
+ if (__is_root_gfid(inode->gfid) && !name) {
+ ret = 1;
+ GF_FREE(buf);
+ buf = GF_CALLOC(ret + 1, sizeof(char), gf_common_mt_char);
+ if (buf) {
+ strcpy(buf, "/");
+ *bufp = buf;
+ } else {
+ ret = -ENOMEM;
}
+ }
- if (ret < 0)
- *bufp = NULL;
- return ret;
+ if (ret < 0)
+ *bufp = NULL;
+ return ret;
}
-
int
-inode_path (inode_t *inode, const char *name, char **bufp)
+inode_path(inode_t *inode, const char *name, char **bufp)
{
- inode_table_t *table = NULL;
- int ret = -1;
+ inode_table_t *table = NULL;
+ int ret = -1;
- if (!inode)
- return -1;
+ if (!inode)
+ return -EINVAL;
- table = inode->table;
+ table = inode->table;
- pthread_mutex_lock (&table->lock);
- {
- ret = __inode_path (inode, name, bufp);
- }
- pthread_mutex_unlock (&table->lock);
+ pthread_mutex_lock(&table->lock);
+ {
+ ret = __inode_path(inode, name, bufp);
+ }
+ pthread_mutex_unlock(&table->lock);
- return ret;
+ return ret;
}
-
-static int
-inode_table_prune (inode_table_t *table)
+void
+__inode_table_set_lru_limit(inode_table_t *table, uint32_t lru_limit)
{
- int ret = 0;
- struct list_head purge = {0, };
- inode_t *del = NULL;
- inode_t *tmp = NULL;
- inode_t *entry = NULL;
-
- if (!table)
- return -1;
-
- INIT_LIST_HEAD (&purge);
+ table->lru_limit = lru_limit;
+ return;
+}
- pthread_mutex_lock (&table->lock);
- {
- while (table->lru_limit
- && table->lru_size > (table->lru_limit)) {
+void
+inode_table_set_lru_limit(inode_table_t *table, uint32_t lru_limit)
+{
+ pthread_mutex_lock(&table->lock);
+ {
+ __inode_table_set_lru_limit(table, lru_limit);
+ }
+ pthread_mutex_unlock(&table->lock);
- entry = list_entry (table->lru.next, inode_t, list);
+ inode_table_prune(table);
- table->lru_size--;
- __inode_retire (entry);
+ return;
+}
- ret++;
+static int
+inode_table_prune(inode_table_t *table)
+{
+ int ret = 0;
+ int ret1 = 0;
+ struct list_head purge = {
+ 0,
+ };
+ inode_t *del = NULL;
+ inode_t *tmp = NULL;
+ inode_t *entry = NULL;
+ uint64_t nlookup = 0;
+ int64_t lru_size = 0;
+
+ if (!table)
+ return -1;
+
+ INIT_LIST_HEAD(&purge);
+
+ pthread_mutex_lock(&table->lock);
+ {
+ if (!table->lru_limit)
+ goto purge_list;
+
+ lru_size = table->lru_size;
+ while (lru_size > (table->lru_limit)) {
+ if (list_empty(&table->lru)) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0,
+ LG_MSG_INVALID_INODE_LIST,
+ "Empty inode lru list found"
+ " but with (%d) lru_size",
+ table->lru_size);
+ break;
+ }
+
+ lru_size--;
+ entry = list_entry(table->lru.next, inode_t, list);
+ /* The logic of invalidation is required only if invalidator_fn
+ is present */
+ if (table->invalidator_fn) {
+ /* check for valid inode with 'nlookup' */
+ nlookup = GF_ATOMIC_GET(entry->nlookup);
+ if (nlookup) {
+ if (entry->invalidate_sent) {
+ list_move_tail(&entry->list, &table->lru);
+ continue;
+ }
+ __inode_ref(entry, true);
+ tmp = entry;
+ break;
}
-
- list_splice_init (&table->purge, &purge);
- table->purge_size = 0;
- }
- pthread_mutex_unlock (&table->lock);
-
+ }
+
+ table->lru_size--;
+ __inode_retire(entry);
+ ret++;
+ }
+
+ purge_list:
+ list_splice_init(&table->purge, &purge);
+ table->purge_size = 0;
+ }
+ pthread_mutex_unlock(&table->lock);
+
+ /* Pick 1 inode for invalidation */
+ if (tmp) {
+ xlator_t *old_THIS = THIS;
+ THIS = table->invalidator_xl;
+ ret1 = table->invalidator_fn(table->invalidator_xl, tmp);
+ THIS = old_THIS;
+ pthread_mutex_lock(&table->lock);
{
- list_for_each_entry_safe (del, tmp, &purge, list) {
- list_del_init (&del->list);
- __inode_forget (del, 0);
- __inode_destroy (del);
- }
- }
-
- return ret;
+ if (!ret1) {
+ tmp->invalidate_sent = true;
+ __inode_unref(tmp, false);
+ } else {
+ /* Move this back to the lru list*/
+ __inode_unref(tmp, true);
+ }
+ }
+ pthread_mutex_unlock(&table->lock);
+ }
+
+ /* Just so that if purge list is handled too, then clear it off */
+ list_for_each_entry_safe(del, tmp, &purge, list)
+ {
+ list_del_init(&del->list);
+ inode_forget_atomic(del, 0);
+ __inode_destroy(del);
+ }
+
+ return ret;
}
-
static void
-__inode_table_init_root (inode_table_t *table)
+__inode_table_init_root(inode_table_t *table)
{
- inode_t *root = NULL;
- struct iatt iatt = {0, };
+ inode_t *root = NULL;
+ struct iatt iatt = {
+ 0,
+ };
- if (!table)
- return;
+ if (!table)
+ return;
- root = __inode_create (table);
+ root = inode_create(table);
- iatt.ia_gfid[15] = 1;
- iatt.ia_ino = 1;
- iatt.ia_type = IA_IFDIR;
+ list_add(&root->list, &table->lru);
+ table->lru_size++;
- table->root = root;
- __inode_link (root, NULL, NULL, &iatt);
-}
+ iatt.ia_gfid[15] = 1;
+ iatt.ia_ino = 1;
+ iatt.ia_type = IA_IFDIR;
+ __inode_link(root, NULL, NULL, &iatt, 0);
+ table->root = root;
+}
inode_table_t *
-inode_table_new (size_t lru_limit, xlator_t *xl)
+inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl,
+ int32_t (*invalidator_fn)(xlator_t *, inode_t *),
+ xlator_t *invalidator_xl)
{
- inode_table_t *new = NULL;
- int ret = -1;
- int i = 0;
+ inode_table_t *new = NULL;
+ uint32_t mem_pool_size = lru_limit;
+ int ret = -1;
+ int i = 0;
- new = (void *)GF_CALLOC(1, sizeof (*new), gf_common_mt_inode_table_t);
- if (!new)
- return NULL;
+ new = (void *)GF_CALLOC(1, sizeof(*new), gf_common_mt_inode_table_t);
+ if (!new)
+ return NULL;
- new->xl = xl;
+ new->xl = xl;
+ new->ctxcount = xl->graph->xl_count + 1;
- new->lru_limit = lru_limit;
+ new->lru_limit = lru_limit;
+ new->invalidator_fn = invalidator_fn;
+ new->invalidator_xl = invalidator_xl;
- new->hashsize = 14057; /* TODO: Random Number?? */
+ new->hashsize = 14057; /* TODO: Random Number?? */
- /* In case FUSE is initing the inode table. */
- if (lru_limit == 0)
- lru_limit = DEFAULT_INODE_MEMPOOL_ENTRIES;
+ /* In case FUSE is initing the inode table. */
+ if (!mem_pool_size || (mem_pool_size > DEFAULT_INODE_MEMPOOL_ENTRIES))
+ mem_pool_size = DEFAULT_INODE_MEMPOOL_ENTRIES;
- new->inode_pool = mem_pool_new (inode_t, lru_limit);
+ new->inode_pool = mem_pool_new(inode_t, mem_pool_size);
+ if (!new->inode_pool)
+ goto out;
- if (!new->inode_pool)
- goto out;
+ new->dentry_pool = mem_pool_new(dentry_t, mem_pool_size);
+ if (!new->dentry_pool)
+ goto out;
- new->dentry_pool = mem_pool_new (dentry_t, lru_limit);
+ new->inode_hash = (void *)GF_CALLOC(65536, sizeof(struct list_head),
+ gf_common_mt_list_head);
+ if (!new->inode_hash)
+ goto out;
- if (!new->dentry_pool)
- goto out;
+ new->name_hash = (void *)GF_CALLOC(new->hashsize, sizeof(struct list_head),
+ gf_common_mt_list_head);
+ if (!new->name_hash)
+ goto out;
- new->inode_hash = (void *)GF_CALLOC (65536,
- sizeof (struct list_head),
- gf_common_mt_list_head);
- if (!new->inode_hash)
- goto out;
+ /* if number of fd open in one process is more than this,
+ we may hit perf issues */
+ new->fd_mem_pool = mem_pool_new(fd_t, 1024);
- new->name_hash = (void *)GF_CALLOC (new->hashsize,
- sizeof (struct list_head),
- gf_common_mt_list_head);
- if (!new->name_hash)
- goto out;
+ if (!new->fd_mem_pool)
+ goto out;
- /* if number of fd open in one process is more than this,
- we may hit perf issues */
- new->fd_mem_pool = mem_pool_new (fd_t, 1024);
+ for (i = 0; i < 65536; i++) {
+ INIT_LIST_HEAD(&new->inode_hash[i]);
+ }
- if (!new->fd_mem_pool)
- goto out;
+ for (i = 0; i < new->hashsize; i++) {
+ INIT_LIST_HEAD(&new->name_hash[i]);
+ }
- for (i = 0; i < 65536; i++) {
- INIT_LIST_HEAD (&new->inode_hash[i]);
- }
+ INIT_LIST_HEAD(&new->active);
+ INIT_LIST_HEAD(&new->lru);
+ INIT_LIST_HEAD(&new->purge);
+ INIT_LIST_HEAD(&new->invalidate);
+ ret = gf_asprintf(&new->name, "%s/inode", xl->name);
+ if (-1 == ret) {
+ /* TODO: This should be ok to continue, check with avati */
+ ;
+ }
- for (i = 0; i < new->hashsize; i++) {
- INIT_LIST_HEAD (&new->name_hash[i]);
- }
+ new->cleanup_started = _gf_false;
- INIT_LIST_HEAD (&new->active);
- INIT_LIST_HEAD (&new->lru);
- INIT_LIST_HEAD (&new->purge);
+ __inode_table_init_root(new);
- ret = gf_asprintf (&new->name, "%s/inode", xl->name);
- if (-1 == ret) {
- /* TODO: This should be ok to continue, check with avati */
- ;
- }
+ pthread_mutex_init(&new->lock, NULL);
- __inode_table_init_root (new);
+ ret = 0;
+out:
+ if (ret) {
+ if (new) {
+ GF_FREE(new->inode_hash);
+ GF_FREE(new->name_hash);
+ if (new->dentry_pool)
+ mem_pool_destroy(new->dentry_pool);
+ if (new->inode_pool)
+ mem_pool_destroy(new->inode_pool);
+ GF_FREE(new);
+ new = NULL;
+ }
+ }
+
+ return new;
+}
- pthread_mutex_init (&new->lock, NULL);
+inode_table_t *
+inode_table_new(uint32_t lru_limit, xlator_t *xl)
+{
+ /* Only fuse for now requires the inode table with invalidator */
+ return inode_table_with_invalidator(lru_limit, xl, NULL, NULL);
+}
- ret = 0;
-out:
- if (ret) {
- if (new) {
- if (new->inode_hash)
- GF_FREE (new->inode_hash);
- if (new->name_hash)
- GF_FREE (new->name_hash);
- if (new->dentry_pool)
- mem_pool_destroy (new->dentry_pool);
- if (new->inode_pool)
- mem_pool_destroy (new->inode_pool);
- GF_FREE (new);
- new = NULL;
- }
+int
+inode_table_ctx_free(inode_table_t *table)
+{
+ int ret = 0;
+ inode_t *del = NULL;
+ inode_t *tmp = NULL;
+ int purge_count = 0;
+ int lru_count = 0;
+ int active_count = 0;
+ xlator_t *this = NULL;
+ int itable_size = 0;
+
+ if (!table)
+ return -1;
+
+ this = THIS;
+
+ pthread_mutex_lock(&table->lock);
+ {
+ list_for_each_entry_safe(del, tmp, &table->purge, list)
+ {
+ if (del->_ctx) {
+ __inode_ctx_free(del);
+ purge_count++;
+ }
}
- return new;
+ list_for_each_entry_safe(del, tmp, &table->lru, list)
+ {
+ if (del->_ctx) {
+ __inode_ctx_free(del);
+ lru_count++;
+ }
+ }
+
+ /* should the contexts of active inodes be freed?
+ * Since before this function being called fds would have
+ * been migrated and would have held the ref on the new
+ * inode from the new inode table, the older inode would not
+ * be used.
+ */
+ list_for_each_entry_safe(del, tmp, &table->active, list)
+ {
+ if (del->_ctx) {
+ __inode_ctx_free(del);
+ active_count++;
+ }
+ }
+ }
+ pthread_mutex_unlock(&table->lock);
+
+ ret = purge_count + lru_count + active_count;
+ itable_size = table->active_size + table->lru_size + table->purge_size;
+ gf_msg_callingfn(this->name, GF_LOG_INFO, 0, LG_MSG_INODE_CONTEXT_FREED,
+ "total %d (itable size: "
+ "%d) inode contexts have been freed (active: %d, ("
+ "active size: %d), lru: %d, (lru size: %d), purge: "
+ "%d, (purge size: %d))",
+ ret, itable_size, active_count, table->active_size,
+ lru_count, table->lru_size, purge_count,
+ table->purge_size);
+ return ret;
}
+void
+inode_table_destroy_all(glusterfs_ctx_t *ctx)
+{
+ glusterfs_graph_t *trav_graph = NULL, *tmp = NULL;
+ xlator_t *tree = NULL;
+ inode_table_t *inode_table = NULL;
+
+ if (ctx == NULL)
+ goto out;
+
+ /* TODO: Traverse ctx->graphs with in ctx->lock and also the other
+ * graph additions and traversals in ctx->lock.
+ */
+ list_for_each_entry_safe(trav_graph, tmp, &ctx->graphs, list)
+ {
+ tree = trav_graph->first;
+ inode_table = tree->itable;
+ tree->itable = NULL;
+ if (inode_table)
+ inode_table_destroy(inode_table);
+ }
+out:
+ return;
+}
-inode_t *
-inode_from_path (inode_table_t *itable, const char *path)
+void
+inode_table_destroy(inode_table_t *inode_table)
{
- inode_t *inode = NULL;
- inode_t *parent = NULL;
- inode_t *root = NULL;
- inode_t *curr = NULL;
- char *pathname = NULL;
- char *component = NULL, *next_component = NULL;
- char *strtokptr = NULL;
+ inode_t *trav = NULL;
- if (!itable || !path)
- return NULL;
+ if (inode_table == NULL)
+ return;
- /* top-down approach */
- pathname = gf_strdup (path);
- if (pathname == NULL) {
- goto out;
- }
+ /* Ideally at this point in time, there should be no inodes with
+ * refs remaining. But there are quite a few chances where the inodes
+ * leak. So we can take three approaches for cleaning up the inode table:
+ * 1. Assume there are no leaks and then send a forget on all the inodes
+ * in lru list.(If no leaks there should be no inodes in active list)
+ * 2. Knowing there could be leaks and not freeing those inodes will
+ * also not free its inode context and this could leak a lot of
+ * memory, force free the inodes by changing the ref to 0.
+ * The problem with this is that any reference to inode after this
+ * calling this function will lead to a crash.
+ * 3. Knowing there could be leakes, just free the inode contexts of
+ * all the inodes. and let the inodes be alive. This way the major
+ * memory consumed by the inode contexts are freed, but there can
+ * be errors when any inode contexts are accessed after destroying
+ * this table.
+ *
+ * Not sure which is the approach to be taken, going by approach 2.
+ */
+
+ /* Approach 3:
+ * ret = inode_table_ctx_free (inode_table);
+ */
+ pthread_mutex_lock(&inode_table->lock);
+ {
+ inode_table->cleanup_started = _gf_true;
+ /* Process lru list first as we need to unset their dentry
+ * entries (the ones which may not be unset during
+ * '__inode_passivate' as they were hashed) which in turn
+ * shall unref their parent
+ *
+ * These parent inodes when unref'ed may well again fall
+ * into lru list and if we are at the end of traversing
+ * the list, we may miss to delete/retire that entry. Hence
+ * traverse the lru list till it gets empty.
+ */
+ while (!list_empty(&inode_table->lru)) {
+ trav = list_first_entry(&inode_table->lru, inode_t, list);
+ inode_forget_atomic(trav, 0);
+ __inode_retire(trav);
+ inode_table->lru_size--;
+ }
+
+ /* Same logic for invalidate list */
+ while (!list_empty(&inode_table->invalidate)) {
+ trav = list_first_entry(&inode_table->invalidate, inode_t, list);
+ inode_forget_atomic(trav, 0);
+ __inode_retire(trav);
+ inode_table->invalidate_size--;
+ }
+
+ while (!list_empty(&inode_table->active)) {
+ trav = list_first_entry(&inode_table->active, inode_t, list);
+ /* forget and unref the inode to retire and add it to
+ * purge list. By this time there should not be any
+ * inodes present in the active list except for root
+ * inode. Its a ref_leak otherwise. */
+ if (trav && (trav != inode_table->root))
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0,
+ LG_MSG_REF_COUNT,
+ "Active inode(%p) with refcount"
+ "(%d) found during cleanup",
+ trav, trav->ref);
+ inode_forget_atomic(trav, 0);
+ __inode_ref_reduce_by_n(trav, 0);
+ }
+ }
+ pthread_mutex_unlock(&inode_table->lock);
+
+ inode_table_prune(inode_table);
+
+ GF_FREE(inode_table->inode_hash);
+ GF_FREE(inode_table->name_hash);
+ if (inode_table->dentry_pool)
+ mem_pool_destroy(inode_table->dentry_pool);
+ if (inode_table->inode_pool)
+ mem_pool_destroy(inode_table->inode_pool);
+ if (inode_table->fd_mem_pool)
+ mem_pool_destroy(inode_table->fd_mem_pool);
+
+ pthread_mutex_destroy(&inode_table->lock);
+
+ GF_FREE(inode_table->name);
+ GF_FREE(inode_table);
+
+ return;
+}
- root = itable->root;
- parent = inode_ref (root);
- component = strtok_r (pathname, "/", &strtokptr);
+inode_t *
+inode_from_path(inode_table_t *itable, const char *path)
+{
+ inode_t *inode = NULL;
+ inode_t *parent = NULL;
+ inode_t *root = NULL;
+ inode_t *curr = NULL;
+ char *pathname = NULL;
+ char *component = NULL, *next_component = NULL;
+ char *strtokptr = NULL;
- if (component == NULL)
- /* root inode */
- inode = inode_ref (parent);
+ if (!itable || !path)
+ return NULL;
- while (component) {
- curr = inode_grep (itable, parent, component);
+ /* top-down approach */
+ pathname = gf_strdup(path);
+ if (pathname == NULL) {
+ goto out;
+ }
- if (curr == NULL) {
- strtok_r (NULL, "/", &strtokptr);
- break;
- }
+ root = itable->root;
+ parent = inode_ref(root);
+ component = strtok_r(pathname, "/", &strtokptr);
- next_component = strtok_r (NULL, "/", &strtokptr);
+ if (component == NULL)
+ /* root inode */
+ inode = inode_ref(parent);
- if (next_component) {
- inode_unref (parent);
- parent = curr;
- curr = NULL;
- } else {
- inode = curr;
- }
+ while (component) {
+ curr = inode_grep(itable, parent, component);
- component = next_component;
+ if (curr == NULL) {
+ strtok_r(NULL, "/", &strtokptr);
+ break;
}
- if (parent)
- inode_unref (parent);
+ next_component = strtok_r(NULL, "/", &strtokptr);
+
+ if (next_component) {
+ inode_unref(parent);
+ parent = curr;
+ curr = NULL;
+ } else {
+ inode = curr;
+ }
+
+ component = next_component;
+ }
+
+ if (parent)
+ inode_unref(parent);
- if (pathname)
- GF_FREE (pathname);
+ GF_FREE(pathname);
out:
- return inode;
+ return inode;
}
+void
+inode_set_need_lookup(inode_t *inode, xlator_t *this)
+{
+ uint64_t need_lookup = LOOKUP_NEEDED;
-int
-__inode_ctx_set2 (inode_t *inode, xlator_t *xlator, uint64_t *value1_p,
- uint64_t *value2_p)
-{
- int ret = 0;
- int index = 0;
- int set_idx = -1;
-
- if (!inode || !xlator)
- return -1;
-
- for (index = 0; index < xlator->graph->xl_count; index++) {
- if (!inode->_ctx[index].xl_key) {
- if (set_idx == -1)
- set_idx = index;
- /* dont break, to check if key already exists
- further on */
- }
- if (inode->_ctx[index].xl_key == xlator) {
- set_idx = index;
- break;
- }
- }
+ if (!inode || !this)
+ return;
- if (set_idx == -1) {
- ret = -1;
- goto out;;
- }
+ inode_ctx_set(inode, this, &need_lookup);
- inode->_ctx[set_idx].xl_key = xlator;
- if (value1_p)
- inode->_ctx[set_idx].value1 = *value1_p;
- if (value2_p)
- inode->_ctx[set_idx].value2 = *value2_p;
-out:
+ return;
+}
+
+/* Function behaviour:
+ * Function return true if inode_ctx is not present,
+ * or value stored in inode_ctx is LOOKUP_NEEDED.
+ * If inode_ctx value is LOOKUP_NOT_NEEDED, which means
+ * inode_ctx is present for xlator this, but no lookup
+ * needed.
+ */
+gf_boolean_t
+inode_needs_lookup(inode_t *inode, xlator_t *this)
+{
+ uint64_t need_lookup = 0;
+ gf_boolean_t ret = _gf_false;
+ int op_ret = -1;
+
+ if (!inode || !this)
return ret;
+
+ op_ret = inode_ctx_get(inode, this, &need_lookup);
+ if (op_ret == -1) {
+ ret = _gf_true;
+ } else if (need_lookup == LOOKUP_NEEDED) {
+ ret = _gf_true;
+ need_lookup = LOOKUP_NOT_NEEDED;
+ inode_ctx_set(inode, this, &need_lookup);
+ }
+
+ return ret;
+}
+
+int
+__inode_ctx_set2(inode_t *inode, xlator_t *xlator, uint64_t *value1_p,
+ uint64_t *value2_p)
+{
+ int ret = 0;
+ int set_idx = -1;
+
+ if (!inode || !xlator || !inode->_ctx)
+ return -1;
+
+ set_idx = __inode_get_xl_index(inode, xlator);
+ if (set_idx == -1) {
+ ret = -1;
+ goto out;
+ ;
+ }
+
+ inode->_ctx[set_idx].xl_key = xlator;
+ if (value1_p)
+ inode->_ctx[set_idx].value1 = *value1_p;
+ if (value2_p)
+ inode->_ctx[set_idx].value2 = *value2_p;
+out:
+ return ret;
+}
+
+int
+__inode_ctx_set0(inode_t *inode, xlator_t *xlator, uint64_t *value1_p)
+{
+ return __inode_ctx_set2(inode, xlator, value1_p, NULL);
}
+int
+__inode_ctx_set1(inode_t *inode, xlator_t *xlator, uint64_t *value2_p)
+{
+ return __inode_ctx_set2(inode, xlator, NULL, value2_p);
+}
int
-inode_ctx_set2 (inode_t *inode, xlator_t *xlator, uint64_t *value1_p,
- uint64_t *value2_p)
+inode_ctx_set2(inode_t *inode, xlator_t *xlator, uint64_t *value1_p,
+ uint64_t *value2_p)
{
- int ret = 0;
+ int ret = 0;
- if (!inode || !xlator)
- return -1;
+ if (!inode || !xlator)
+ return -1;
- LOCK (&inode->lock);
- {
- ret = __inode_ctx_set2 (inode, xlator, value1_p, value2_p);
- }
- UNLOCK (&inode->lock);
+ LOCK(&inode->lock);
+ {
+ ret = __inode_ctx_set2(inode, xlator, value1_p, value2_p);
+ }
+ UNLOCK(&inode->lock);
- return ret;
+ return ret;
}
+int
+inode_ctx_set1(inode_t *inode, xlator_t *xlator, uint64_t *value2_p)
+{
+ int ret = 0;
+
+ if (!inode || !xlator)
+ return -1;
+ LOCK(&inode->lock);
+ {
+ ret = __inode_ctx_set1(inode, xlator, value2_p);
+ }
+ UNLOCK(&inode->lock);
+
+ return ret;
+}
int
-__inode_ctx_get2 (inode_t *inode, xlator_t *xlator, uint64_t *value1,
- uint64_t *value2)
+inode_ctx_set0(inode_t *inode, xlator_t *xlator, uint64_t *value1_p)
{
- int index = 0;
- int ret = 0;
+ int ret = 0;
- if (!inode || !xlator)
- return -1;
+ if (!inode || !xlator)
+ return -1;
- for (index = 0; index < xlator->graph->xl_count; index++) {
- if (inode->_ctx[index].xl_key == xlator)
- break;
- }
+ LOCK(&inode->lock);
+ {
+ ret = __inode_ctx_set0(inode, xlator, value1_p);
+ }
+ UNLOCK(&inode->lock);
- if (index == xlator->graph->xl_count) {
- ret = -1;
- goto out;
- }
+ return ret;
+}
- if (value1)
- *value1 = inode->_ctx[index].value1;
+int
+__inode_ctx_get2(inode_t *inode, xlator_t *xlator, uint64_t *value1,
+ uint64_t *value2)
+{
+ int index = 0;
+ int ret = -1;
+
+ if (!inode || !xlator || !inode->_ctx)
+ goto out;
- if (value2)
- *value2 = inode->_ctx[index].value2;
+ index = xlator->xl_id;
+ if (inode->_ctx[index].xl_key != xlator)
+ goto out;
+ if (inode->_ctx[index].value1) {
+ if (value1) {
+ *value1 = inode->_ctx[index].value1;
+ ret = 0;
+ }
+ }
+ if (inode->_ctx[index].value2) {
+ if (value2) {
+ *value2 = inode->_ctx[index].value2;
+ ret = 0;
+ }
+ }
out:
- return ret;
+ return ret;
}
+int
+__inode_ctx_get0(inode_t *inode, xlator_t *xlator, uint64_t *value1)
+{
+ uint64_t tmp_value = 0;
+ int ret = 0;
+
+ ret = __inode_ctx_get2(inode, xlator, &tmp_value, NULL);
+ if (!ret && value1)
+ *value1 = tmp_value;
+
+ return ret;
+}
int
-inode_ctx_get2 (inode_t *inode, xlator_t *xlator, uint64_t *value1,
- uint64_t *value2)
+__inode_ctx_get1(inode_t *inode, xlator_t *xlator, uint64_t *value2)
{
- int ret = 0;
+ uint64_t tmp_value = 0;
+ int ret = 0;
- if (!inode || !xlator)
- return -1;
+ ret = __inode_ctx_get2(inode, xlator, NULL, &tmp_value);
+ if (!ret && value2)
+ *value2 = tmp_value;
- LOCK (&inode->lock);
- {
- ret = __inode_ctx_get2 (inode, xlator, value1, value2);
- }
- UNLOCK (&inode->lock);
+ return ret;
+}
- return ret;
+int
+inode_ctx_get2(inode_t *inode, xlator_t *xlator, uint64_t *value1,
+ uint64_t *value2)
+{
+ int ret = 0;
+
+ if (!inode || !xlator)
+ return -1;
+
+ LOCK(&inode->lock);
+ {
+ ret = __inode_ctx_get2(inode, xlator, value1, value2);
+ }
+ UNLOCK(&inode->lock);
+
+ return ret;
}
+int
+inode_ctx_get1(inode_t *inode, xlator_t *xlator, uint64_t *value2)
+{
+ int ret = 0;
+
+ if (!inode || !xlator)
+ return -1;
+
+ LOCK(&inode->lock);
+ {
+ ret = __inode_ctx_get1(inode, xlator, value2);
+ }
+ UNLOCK(&inode->lock);
+
+ return ret;
+}
int
-inode_ctx_del2 (inode_t *inode, xlator_t *xlator, uint64_t *value1,
- uint64_t *value2)
+inode_ctx_get0(inode_t *inode, xlator_t *xlator, uint64_t *value1)
{
- int index = 0;
- int ret = 0;
+ int ret = 0;
- if (!inode || !xlator)
- return -1;
+ if (!inode || !xlator)
+ return -1;
- LOCK (&inode->lock);
- {
- for (index = 0; index < xlator->graph->xl_count; index++) {
- if (inode->_ctx[index].xl_key == xlator)
- break;
- }
+ LOCK(&inode->lock);
+ {
+ ret = __inode_ctx_get0(inode, xlator, value1);
+ }
+ UNLOCK(&inode->lock);
- if (index == xlator->graph->xl_count) {
- ret = -1;
- goto unlock;
- }
+ return ret;
+}
- if (value1)
- *value1 = inode->_ctx[index].value1;
- if (value2)
- *value2 = inode->_ctx[index].value2;
+int
+inode_ctx_del2(inode_t *inode, xlator_t *xlator, uint64_t *value1,
+ uint64_t *value2)
+{
+ int index = 0;
+ int ret = 0;
- inode->_ctx[index].key = 0;
- inode->_ctx[index].value1 = 0;
- inode->_ctx[index].value2 = 0;
+ if (!inode || !xlator)
+ return -1;
+
+ LOCK(&inode->lock);
+ {
+ if (!inode->_ctx)
+ goto unlock;
+
+ index = xlator->xl_id;
+ if (inode->_ctx[index].xl_key != xlator) {
+ ret = -1;
+ goto unlock;
}
+
+ if (inode->_ctx[index].value1 && value1)
+ *value1 = inode->_ctx[index].value1;
+ if (inode->_ctx[index].value2 && value2)
+ *value2 = inode->_ctx[index].value2;
+
+ inode->_ctx[index].key = 0;
+ inode->_ctx[index].xl_key = NULL;
+ inode->_ctx[index].value1 = 0;
+ inode->_ctx[index].value2 = 0;
+ }
unlock:
- UNLOCK (&inode->lock);
+ UNLOCK(&inode->lock);
- return ret;
+ return ret;
}
-
-void
-inode_dump (inode_t *inode, char *prefix)
+/* function behavior:
+ - if value1 is set, value1 in ctx is reset to 0 with current value passed
+ back in value1 address.
+ - if value2 is set, value2 in ctx is reset to 0 with current value passed
+ back in value2 address.
+ - if both are set, both fields are reset.
+*/
+static int
+__inode_ctx_reset2(inode_t *inode, xlator_t *xlator, uint64_t *value1,
+ uint64_t *value2)
{
- int ret = -1;
- xlator_t *xl = NULL;
- int i = 0;
- fd_t *fd = NULL;
- struct _inode_ctx *inode_ctx = NULL;
- struct list_head fd_list;
+ int index = 0;
+ int ret = 0;
- if (!inode)
- return;
+ if (!inode || !xlator)
+ return -1;
- INIT_LIST_HEAD (&fd_list);
+ LOCK(&inode->lock);
+ {
+ index = xlator->xl_id;
+ if (inode->_ctx[index].xl_key != xlator) {
+ ret = -1;
+ goto unlock;
+ }
- ret = TRY_LOCK(&inode->lock);
- if (ret != 0) {
- return;
+ if (inode->_ctx[index].value1 && value1) {
+ *value1 = inode->_ctx[index].value1;
+ inode->_ctx[index].value1 = 0;
+ }
+ if (inode->_ctx[index].value2 && value2) {
+ *value2 = inode->_ctx[index].value2;
+ inode->_ctx[index].value2 = 0;
}
+ }
+unlock:
+ UNLOCK(&inode->lock);
- {
- gf_proc_dump_write("gfid", "%s", uuid_utoa (inode->gfid));
- gf_proc_dump_write("nlookup", "%ld", inode->nlookup);
- gf_proc_dump_write("ref", "%u", inode->ref);
- gf_proc_dump_write("ia_type", "%d", inode->ia_type);
- if (inode->_ctx) {
- inode_ctx = GF_CALLOC (inode->table->xl->graph->xl_count,
- sizeof (*inode_ctx),
- gf_common_mt_inode_ctx);
- if (inode_ctx == NULL) {
- goto unlock;
- }
-
- for (i = 0; i < inode->table->xl->graph->xl_count; i++) {
- inode_ctx[i] = inode->_ctx[i];
- }
- }
+ return ret;
+}
- if (dump_options.xl_options.dump_fdctx != _gf_true)
- goto unlock;
+int
+inode_ctx_reset2(inode_t *inode, xlator_t *xlator, uint64_t *value1_p,
+ uint64_t *value2_p)
+{
+ uint64_t tmp_value1 = 0;
+ uint64_t tmp_value2 = 0;
+ int ret = 0;
+ ret = __inode_ctx_reset2(inode, xlator, &tmp_value1, &tmp_value2);
+ if (!ret) {
+ if (value1_p)
+ *value1_p = tmp_value1;
+ if (value2_p)
+ *value2_p = tmp_value2;
+ }
+ return ret;
+}
- list_for_each_entry (fd, &inode->fd_list, inode_list) {
- fd_ctx_dump (fd, prefix);
- }
- }
-unlock:
- UNLOCK(&inode->lock);
-
- if (inode_ctx && (dump_options.xl_options.dump_inodectx == _gf_true)) {
- for (i = 0; i < inode->table->xl->graph->xl_count; i++) {
- if (inode_ctx[i].xl_key) {
- xl = (xlator_t *)(long)inode_ctx[i].xl_key;
- if (xl->dumpops && xl->dumpops->inodectx)
- xl->dumpops->inodectx (xl, inode);
- }
- }
- }
+int
+inode_ctx_reset1(inode_t *inode, xlator_t *xlator, uint64_t *value2_p)
+{
+ uint64_t tmp_value2 = 0;
+ int ret = 0;
- if (inode_ctx != NULL) {
- GF_FREE (inode_ctx);
- }
+ ret = __inode_ctx_reset2(inode, xlator, NULL, &tmp_value2);
- return;
+ if (!ret && value2_p)
+ *value2_p = tmp_value2;
+
+ return ret;
}
+int
+inode_ctx_reset0(inode_t *inode, xlator_t *xlator, uint64_t *value1_p)
+{
+ uint64_t tmp_value1 = 0;
+ int ret = 0;
-void
-inode_table_dump (inode_table_t *itable, char *prefix)
+ ret = __inode_ctx_reset2(inode, xlator, &tmp_value1, NULL);
+
+ if (!ret && value1_p)
+ *value1_p = tmp_value1;
+
+ return ret;
+}
+
+int
+inode_is_linked(inode_t *inode)
{
+ int ret = 0;
+ inode_table_t *table = NULL;
- char key[GF_DUMP_MAX_BUF_LEN];
- int ret = 0;
+ if (!inode) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
+ "inode not found");
+ return 0;
+ }
- if (!itable)
- return;
+ table = inode->table;
- memset(key, 0, sizeof(key));
- ret = pthread_mutex_trylock(&itable->lock);
+ pthread_mutex_lock(&table->lock);
+ {
+ ret = __is_inode_hashed(inode);
+ }
+ pthread_mutex_unlock(&table->lock);
- if (ret != 0) {
- return;
+ return ret;
+}
+
+void
+inode_dump(inode_t *inode, char *prefix)
+{
+ int ret = -1;
+ xlator_t *xl = NULL;
+ int i = 0;
+ fd_t *fd = NULL;
+ struct _inode_ctx *inode_ctx = NULL;
+ struct list_head fd_list;
+ int ref = 0;
+ char key[GF_DUMP_MAX_BUF_LEN];
+ uint64_t nlookup = 0;
+
+ if (!inode)
+ return;
+
+ INIT_LIST_HEAD(&fd_list);
+
+ ret = TRY_LOCK(&inode->lock);
+ if (ret != 0) {
+ return;
+ }
+
+ {
+ nlookup = GF_ATOMIC_GET(inode->nlookup);
+ gf_proc_dump_write("gfid", "%s", uuid_utoa(inode->gfid));
+ gf_proc_dump_write("nlookup", "%" PRIu64, nlookup);
+ gf_proc_dump_write("fd-count", "%u", inode->fd_count);
+ gf_proc_dump_write("active-fd-count", "%u", inode->active_fd_count);
+ gf_proc_dump_write("ref", "%u", inode->ref);
+ gf_proc_dump_write("invalidate-sent", "%d", inode->invalidate_sent);
+ gf_proc_dump_write("ia_type", "%d", inode->ia_type);
+ if (inode->_ctx) {
+ inode_ctx = GF_CALLOC(inode->table->ctxcount, sizeof(*inode_ctx),
+ gf_common_mt_inode_ctx);
+ if (inode_ctx == NULL) {
+ goto unlock;
+ }
+
+ for (i = 0; i < inode->table->ctxcount; i++) {
+ inode_ctx[i] = inode->_ctx[i];
+ xl = inode_ctx[i].xl_key;
+ ref = inode_ctx[i].ref;
+ if (ref != 0 && xl) {
+ gf_proc_dump_build_key(key, "ref_by_xl:", "%s", xl->name);
+ gf_proc_dump_write(key, "%d", ref);
+ }
+ }
}
- gf_proc_dump_build_key(key, prefix, "hashsize");
- gf_proc_dump_write(key, "%d", itable->hashsize);
- gf_proc_dump_build_key(key, prefix, "name");
- gf_proc_dump_write(key, "%s", itable->name);
+ if (dump_options.xl_options.dump_fdctx != _gf_true)
+ goto unlock;
- gf_proc_dump_build_key(key, prefix, "lru_limit");
- gf_proc_dump_write(key, "%d", itable->lru_limit);
- gf_proc_dump_build_key(key, prefix, "active_size");
- gf_proc_dump_write(key, "%d", itable->active_size);
- gf_proc_dump_build_key(key, prefix, "lru_size");
- gf_proc_dump_write(key, "%d", itable->lru_size);
- gf_proc_dump_build_key(key, prefix, "purge_size");
- gf_proc_dump_write(key, "%d", itable->purge_size);
+ list_for_each_entry(fd, &inode->fd_list, inode_list)
+ {
+ fd_ctx_dump(fd, prefix);
+ }
+ }
+unlock:
+ UNLOCK(&inode->lock);
+
+ if (inode_ctx && (dump_options.xl_options.dump_inodectx == _gf_true)) {
+ for (i = 0; i < inode->table->ctxcount; i++) {
+ if (inode_ctx[i].xl_key) {
+ xl = (xlator_t *)(long)inode_ctx[i].xl_key;
+ if (xl->dumpops && xl->dumpops->inodectx)
+ xl->dumpops->inodectx(xl, inode);
+ }
+ }
+ }
- INODE_DUMP_LIST(&itable->active, key, prefix, "active");
- INODE_DUMP_LIST(&itable->lru, key, prefix, "lru");
- INODE_DUMP_LIST(&itable->purge, key, prefix, "purge");
+ GF_FREE(inode_ctx);
- pthread_mutex_unlock(&itable->lock);
+ return;
}
void
-inode_dump_to_dict (inode_t *inode, char *prefix, dict_t *dict)
+inode_table_dump(inode_table_t *itable, char *prefix)
{
- int ret = -1;
- char key[GF_DUMP_MAX_BUF_LEN] = {0,};
+ char key[GF_DUMP_MAX_BUF_LEN];
+ int ret = 0;
- ret = TRY_LOCK (&inode->lock);
- if (ret)
- return;
+ if (!itable)
+ return;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.gfid", prefix);
- ret = dict_set_dynstr (dict, key, gf_strdup (uuid_utoa (inode->gfid)));
- if (ret)
- goto out;
+ ret = pthread_mutex_trylock(&itable->lock);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.nlookup", prefix);
- ret = dict_set_uint64 (dict, key, inode->nlookup);
- if (ret)
- goto out;
+ if (ret != 0) {
+ return;
+ }
+
+ gf_proc_dump_build_key(key, prefix, "hashsize");
+ gf_proc_dump_write(key, "%" GF_PRI_SIZET, itable->hashsize);
+ gf_proc_dump_build_key(key, prefix, "name");
+ gf_proc_dump_write(key, "%s", itable->name);
+
+ gf_proc_dump_build_key(key, prefix, "lru_limit");
+ gf_proc_dump_write(key, "%d", itable->lru_limit);
+ gf_proc_dump_build_key(key, prefix, "active_size");
+ gf_proc_dump_write(key, "%d", itable->active_size);
+ gf_proc_dump_build_key(key, prefix, "lru_size");
+ gf_proc_dump_write(key, "%d", itable->lru_size);
+ gf_proc_dump_build_key(key, prefix, "purge_size");
+ gf_proc_dump_write(key, "%d", itable->purge_size);
+ gf_proc_dump_build_key(key, prefix, "invalidate_size");
+ gf_proc_dump_write(key, "%d", itable->invalidate_size);
+
+ INODE_DUMP_LIST(&itable->active, key, prefix, "active");
+ INODE_DUMP_LIST(&itable->lru, key, prefix, "lru");
+ INODE_DUMP_LIST(&itable->purge, key, prefix, "purge");
+ INODE_DUMP_LIST(&itable->invalidate, key, prefix, "invalidate");
+
+ pthread_mutex_unlock(&itable->lock);
+}
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.ref", prefix);
- ret = dict_set_uint32 (dict, key, inode->ref);
- if (ret)
- goto out;
+void
+inode_dump_to_dict(inode_t *inode, char *prefix, dict_t *dict)
+{
+ int ret = -1;
+ char key[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+ uint64_t nlookup = 0;
+
+ ret = TRY_LOCK(&inode->lock);
+ if (ret)
+ return;
+
+ snprintf(key, sizeof(key), "%s.gfid", prefix);
+ ret = dict_set_dynstr(dict, key, gf_strdup(uuid_utoa(inode->gfid)));
+ if (ret)
+ goto out;
+
+ snprintf(key, sizeof(key), "%s.nlookup", prefix);
+ nlookup = GF_ATOMIC_GET(inode->nlookup);
+ ret = dict_set_uint64(dict, key, nlookup);
+ if (ret)
+ goto out;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.ia_type", prefix);
- ret = dict_set_int32 (dict, key, inode->ia_type);
+ snprintf(key, sizeof(key), "%s.ref", prefix);
+ ret = dict_set_uint32(dict, key, inode->ref);
+ if (ret)
+ goto out;
+
+ snprintf(key, sizeof(key), "%s.ia_type", prefix);
+ ret = dict_set_int32(dict, key, inode->ia_type);
+ if (ret)
+ goto out;
out:
- UNLOCK (&inode->lock);
- return;
+ UNLOCK(&inode->lock);
+ return;
}
void
-inode_table_dump_to_dict (inode_table_t *itable, char *prefix, dict_t *dict)
+inode_table_dump_to_dict(inode_table_t *itable, char *prefix, dict_t *dict)
{
- char key[GF_DUMP_MAX_BUF_LEN] = {0,};
- int ret = 0;
- inode_t *inode = NULL;
- int count = 0;
+ char key[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+ int ret = 0;
+#ifdef DEBUG
+ inode_t *inode = NULL;
+ int count = 0;
+#endif
+ ret = pthread_mutex_trylock(&itable->lock);
+ if (ret)
+ return;
- ret = pthread_mutex_trylock (&itable->lock);
- if (ret)
- return;
+ snprintf(key, sizeof(key), "%s.itable.lru_limit", prefix);
+ ret = dict_set_uint32(dict, key, itable->lru_limit);
+ if (ret)
+ goto out;
+
+ snprintf(key, sizeof(key), "%s.itable.active_size", prefix);
+ ret = dict_set_uint32(dict, key, itable->active_size);
+ if (ret)
+ goto out;
+
+ snprintf(key, sizeof(key), "%s.itable.lru_size", prefix);
+ ret = dict_set_uint32(dict, key, itable->lru_size);
+ if (ret)
+ goto out;
+
+ snprintf(key, sizeof(key), "%s.itable.purge_size", prefix);
+ ret = dict_set_uint32(dict, key, itable->purge_size);
+ if (ret)
+ goto out;
+
+#ifdef DEBUG
+ /* Dumping inode details in dictionary and sending it to CLI is not
+ required as when a developer (or support team) asks for this command
+ output, they just want to get top level detail of inode table.
+ If one wants to debug, let them take statedump and debug, this
+ wouldn't be available in CLI during production setup.
+ */
+ list_for_each_entry(inode, &itable->active, list)
+ {
+ snprintf(key, sizeof(key), "%s.itable.active%d", prefix, count++);
+ inode_dump_to_dict(inode, key, dict);
+ }
+ count = 0;
+
+ list_for_each_entry(inode, &itable->lru, list)
+ {
+ snprintf(key, sizeof(key), "%s.itable.lru%d", prefix, count++);
+ inode_dump_to_dict(inode, key, dict);
+ }
+ count = 0;
+
+ list_for_each_entry(inode, &itable->purge, list)
+ {
+ snprintf(key, sizeof(key), "%s.itable.purge%d", prefix, count++);
+ inode_dump_to_dict(inode, key, dict);
+ }
+#endif
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.itable.active_size", prefix);
- ret = dict_set_uint32 (dict, key, itable->active_size);
- if (ret)
- goto out;
+out:
+ pthread_mutex_unlock(&itable->lock);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.itable.lru_size", prefix);
- ret = dict_set_uint32 (dict, key, itable->lru_size);
- if (ret)
- goto out;
+ return;
+}
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.itable.purge_size", prefix);
- ret = dict_set_uint32 (dict, key, itable->purge_size);
- if (ret)
- goto out;
+size_t
+inode_ctx_size(inode_t *inode)
+{
+ int i = 0;
+ size_t size = 0;
+ xlator_t *xl = NULL, *old_THIS = NULL;
- list_for_each_entry (inode, &itable->active, list) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.itable.active%d", prefix,
- count++);
- inode_dump_to_dict (inode, key, dict);
- }
- count = 0;
+ if (!inode)
+ goto out;
- list_for_each_entry (inode, &itable->lru, list) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.itable.lru%d", prefix,
- count++);
- inode_dump_to_dict (inode, key, dict);
- }
- count = 0;
+ LOCK(&inode->lock);
+ {
+ for (i = 0; i < inode->table->ctxcount; i++) {
+ if (!inode->_ctx[i].xl_key)
+ continue;
+
+ xl = (xlator_t *)(long)inode->_ctx[i].xl_key;
+ old_THIS = THIS;
+ THIS = xl;
+
+ /* If inode ref is taken when THIS is global xlator,
+ * the ctx xl_key is set, but the value is NULL.
+ * For global xlator the cbks can be NULL, hence check
+ * for the same */
+ if (!xl->cbks) {
+ THIS = old_THIS;
+ continue;
+ }
+
+ if (xl->cbks->ictxsize)
+ size += xl->cbks->ictxsize(xl, inode);
- list_for_each_entry (inode, &itable->purge, list) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.itable.purge%d", prefix,
- count++);
- inode_dump_to_dict (inode, key, dict);
+ THIS = old_THIS;
}
+ }
+ UNLOCK(&inode->lock);
out:
- pthread_mutex_unlock (&itable->lock);
+ return size;
+}
+
+/* *
+ * This function finds name of the inode, if it has dentry. The dentry will be
+ * created only if inode_link happens with valid parent and name. And this
+ * function is only applicable for directories because multiple dentries are
+ * not possible(no hardlinks)
+ * */
+void
+inode_find_directory_name(inode_t *inode, const char **name)
+{
+ dentry_t *dentry = NULL;
+ GF_VALIDATE_OR_GOTO("inode", inode, out);
+ GF_VALIDATE_OR_GOTO("inode", name, out);
+
+ if (!IA_ISDIR(inode->ia_type))
return;
+
+ pthread_mutex_lock(&inode->table->lock);
+ {
+ dentry = __dentry_search_arbit(inode);
+ if (dentry) {
+ *name = dentry->name;
+ }
+ }
+ pthread_mutex_unlock(&inode->table->lock);
+out:
+ return;
}
diff --git a/libglusterfs/src/inode.h b/libglusterfs/src/inode.h
deleted file mode 100644
index 41003df71ca..00000000000
--- a/libglusterfs/src/inode.h
+++ /dev/null
@@ -1,202 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _INODE_H
-#define _INODE_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <stdint.h>
-#include <sys/types.h>
-
-#define DEFAULT_INODE_MEMPOOL_ENTRIES 32 * 1024
-#define INODE_PATH_FMT "<gfid:%s>"
-struct _inode_table;
-typedef struct _inode_table inode_table_t;
-
-struct _inode;
-typedef struct _inode inode_t;
-
-struct _dentry;
-typedef struct _dentry dentry_t;
-
-#include "list.h"
-#include "xlator.h"
-#include "iatt.h"
-#include "uuid.h"
-
-
-struct _inode_table {
- pthread_mutex_t lock;
- size_t hashsize; /* bucket size of inode hash and dentry hash */
- char *name; /* name of the inode table, just for gf_log() */
- inode_t *root; /* root directory inode, with number 1 */
- xlator_t *xl; /* xlator to be called to do purge */
- uint32_t lru_limit; /* maximum LRU cache size */
- struct list_head *inode_hash; /* buckets for inode hash table */
- struct list_head *name_hash; /* buckets for dentry hash table */
- struct list_head active; /* list of inodes currently active (in an fop) */
- uint32_t active_size; /* count of inodes in active list */
- struct list_head lru; /* list of inodes recently used.
- lru.next most recent */
- uint32_t lru_size; /* count of inodes in lru list */
- struct list_head purge; /* list of inodes to be purged soon */
- uint32_t purge_size; /* count of inodes in purge list */
-
- struct mem_pool *inode_pool; /* memory pool for inodes */
- struct mem_pool *dentry_pool; /* memory pool for dentrys */
- struct mem_pool *fd_mem_pool; /* memory pool for fd_t */
-};
-
-
-struct _dentry {
- struct list_head inode_list; /* list of dentries of inode */
- struct list_head hash; /* hash table pointers */
- inode_t *inode; /* inode of this directory entry */
- char *name; /* name of the directory entry */
- inode_t *parent; /* directory of the entry */
-};
-
-struct _inode_ctx {
- union {
- uint64_t key;
- xlator_t *xl_key;
- };
- union {
- uint64_t value1;
- void *ptr1;
- };
- union {
- uint64_t value2;
- void *ptr2;
- };
-};
-
-struct _inode {
- inode_table_t *table; /* the table this inode belongs to */
- uuid_t gfid;
- gf_lock_t lock;
- uint64_t nlookup;
- uint32_t ref; /* reference count on this inode */
- ia_type_t ia_type; /* what kind of file */
- struct list_head fd_list; /* list of open files on this inode */
- struct list_head dentry_list; /* list of directory entries for this inode */
- struct list_head hash; /* hash table pointers */
- struct list_head list; /* active/lru/purge */
-
- struct _inode_ctx *_ctx; /* replacement for dict_t *(inode->ctx) */
-};
-
-
-#define UUID0_STR "00000000-0000-0000-0000-000000000000"
-#define GFID_STR_PFX "<gfid:" UUID0_STR ">"
-#define GFID_STR_PFX_LEN (sizeof (GFID_STR_PFX) - 1)
-
-inode_table_t *
-inode_table_new (size_t lru_limit, xlator_t *xl);
-
-inode_t *
-inode_new (inode_table_t *table);
-
-inode_t *
-inode_link (inode_t *inode, inode_t *parent,
- const char *name, struct iatt *stbuf);
-
-void
-inode_unlink (inode_t *inode, inode_t *parent, const char *name);
-
-inode_t *
-inode_parent (inode_t *inode, uuid_t pargfid, const char *name);
-
-inode_t *
-inode_ref (inode_t *inode);
-
-inode_t *
-inode_unref (inode_t *inode);
-
-int
-inode_lookup (inode_t *inode);
-
-int
-inode_forget (inode_t *inode, uint64_t nlookup);
-
-int
-inode_rename (inode_table_t *table, inode_t *olddir, const char *oldname,
- inode_t *newdir, const char *newname,
- inode_t *inode, struct iatt *stbuf);
-
-inode_t *
-inode_grep (inode_table_t *table, inode_t *parent, const char *name);
-
-int
-inode_grep_for_gfid (inode_table_t *table, inode_t *parent, const char *name,
- uuid_t gfid, ia_type_t *type);
-
-inode_t *
-inode_find (inode_table_t *table, uuid_t gfid);
-
-int
-inode_path (inode_t *inode, const char *name, char **bufp);
-
-int
-__inode_path (inode_t *inode, const char *name, char **bufp);
-
-inode_t *
-inode_from_path (inode_table_t *table, const char *path);
-
-int
-inode_ctx_set2 (inode_t *inode, xlator_t *xlator, uint64_t *value1,
- uint64_t *value2);
-int
-__inode_ctx_set2 (inode_t *inode, xlator_t *xlator, uint64_t *value1,
- uint64_t *value2);
-
-int
-inode_ctx_get2 (inode_t *inode, xlator_t *xlator, uint64_t *value1,
- uint64_t *value2);
-int
-__inode_ctx_get2 (inode_t *inode, xlator_t *xlator, uint64_t *value1,
- uint64_t *value2);
-
-int
-inode_ctx_del2 (inode_t *inode, xlator_t *xlator, uint64_t *value1,
- uint64_t *value2);
-
-inode_t *
-inode_resolve (inode_table_t *table, char *path);
-
-#define __inode_ctx_set(i,x,v_p) __inode_ctx_set2(i,x,v_p,0)
-#define inode_ctx_set(i,x,v_p) inode_ctx_set2(i,x,v_p,0)
-
-static inline int
-__inode_ctx_put(inode_t *inode, xlator_t *this, uint64_t v)
-{
- return __inode_ctx_set2 (inode, this, &v, 0);
-}
-
-static inline int
-inode_ctx_put(inode_t *inode, xlator_t *this, uint64_t v)
-{
- return inode_ctx_set2(inode, this, &v, 0);
-}
-
-#define __inode_ctx_get(i,x,v) __inode_ctx_get2(i,x,v,0)
-#define inode_ctx_get(i,x,v) inode_ctx_get2(i,x,v,0)
-
-#define inode_ctx_del(i,x,v) inode_ctx_del2(i,x,v,0)
-
-
-gf_boolean_t
-__is_root_gfid (uuid_t gfid);
-
-#endif /* _INODE_H */
diff --git a/libglusterfs/src/iobuf.c b/libglusterfs/src/iobuf.c
index f68c6c74879..4e7d2958764 100644
--- a/libglusterfs/src/iobuf.c
+++ b/libglusterfs/src/iobuf.c
@@ -8,1084 +8,1139 @@
cases as published by the Free Software Foundation.
*/
-
-#include "iobuf.h"
-#include "statedump.h"
+#include "glusterfs/iobuf.h"
+#include "glusterfs/statedump.h"
#include <stdio.h>
-
+#include "glusterfs/libglusterfs-messages.h"
/*
TODO: implement destroy margins and prefetching of arenas
*/
-#define IOBUF_ARENA_MAX_INDEX (sizeof (gf_iobuf_init_config) / \
- (sizeof (struct iobuf_init_config)))
+#define IOBUF_ARENA_MAX_INDEX \
+ (sizeof(gf_iobuf_init_config) / (sizeof(struct iobuf_init_config)))
/* Make sure this array is sorted based on pagesize */
-struct iobuf_init_config gf_iobuf_init_config[] = {
- /* { pagesize, num_pages }, */
- {128, 1024},
- {512, 512},
- {2 * 1024, 512},
- {8 * 1024, 128},
- {32 * 1024, 64},
- {128 * 1024, 32},
- {256 * 1024, 8},
- {1 * 1024 * 1024, 2},
+static const struct iobuf_init_config gf_iobuf_init_config[] = {
+ /* { pagesize, num_pages }, */
+ {128, 1024}, {512, 512}, {2 * 1024, 512}, {8 * 1024, 128},
+ {32 * 1024, 64}, {128 * 1024, 32}, {256 * 1024, 8}, {1 * 1024 * 1024, 2},
};
-int
-gf_iobuf_get_arena_index (size_t page_size)
+static int
+gf_iobuf_get_arena_index(const size_t page_size)
{
- int i = -1;
-
- for (i = 0; i < IOBUF_ARENA_MAX_INDEX; i++) {
- if (page_size <= gf_iobuf_init_config[i].pagesize)
- break;
- }
+ int i;
- if (i >= IOBUF_ARENA_MAX_INDEX)
- i = -1;
+ for (i = 0; i < IOBUF_ARENA_MAX_INDEX; i++) {
+ if (page_size <= gf_iobuf_init_config[i].pagesize)
+ return i;
+ }
- return i;
+ return -1;
}
-size_t
-gf_iobuf_get_pagesize (size_t page_size)
+static size_t
+gf_iobuf_get_pagesize(const size_t page_size, int *index)
{
- int i = 0;
- size_t size = 0;
-
- for (i = 0; i < IOBUF_ARENA_MAX_INDEX; i++) {
- size = gf_iobuf_init_config[i].pagesize;
- if (page_size <= size)
- break;
+ int i;
+ size_t size = 0;
+
+ for (i = 0; i < IOBUF_ARENA_MAX_INDEX; i++) {
+ size = gf_iobuf_init_config[i].pagesize;
+ if (page_size <= size) {
+ if (index != NULL)
+ *index = i;
+ return size;
}
+ }
- if (i >= IOBUF_ARENA_MAX_INDEX)
- size = -1;
-
- return size;
+ return -1;
}
-void
-__iobuf_arena_init_iobufs (struct iobuf_arena *iobuf_arena)
+static void
+__iobuf_arena_init_iobufs(struct iobuf_arena *iobuf_arena)
{
- int iobuf_cnt = 0;
- struct iobuf *iobuf = NULL;
- int offset = 0;
- int i = 0;
-
- GF_VALIDATE_OR_GOTO ("iobuf", iobuf_arena, out);
-
- iobuf_cnt = iobuf_arena->page_count;
-
- iobuf_arena->iobufs = GF_CALLOC (sizeof (*iobuf), iobuf_cnt,
- gf_common_mt_iobuf);
- if (!iobuf_arena->iobufs)
- return;
+ const int iobuf_cnt = iobuf_arena->page_count;
+ struct iobuf *iobuf = NULL;
+ int offset = 0;
+ int i = 0;
+
+ iobuf_arena->iobufs = GF_CALLOC(sizeof(*iobuf), iobuf_cnt,
+ gf_common_mt_iobuf);
+ if (!iobuf_arena->iobufs)
+ return;
- iobuf = iobuf_arena->iobufs;
- for (i = 0; i < iobuf_cnt; i++) {
- INIT_LIST_HEAD (&iobuf->list);
- LOCK_INIT (&iobuf->lock);
+ iobuf = iobuf_arena->iobufs;
+ for (i = 0; i < iobuf_cnt; i++) {
+ INIT_LIST_HEAD(&iobuf->list);
+ LOCK_INIT(&iobuf->lock);
- iobuf->iobuf_arena = iobuf_arena;
+ iobuf->iobuf_arena = iobuf_arena;
- iobuf->ptr = iobuf_arena->mem_base + offset;
+ iobuf->ptr = iobuf_arena->mem_base + offset;
- list_add (&iobuf->list, &iobuf_arena->passive.list);
- iobuf_arena->passive_cnt++;
+ list_add(&iobuf->list, &iobuf_arena->passive.list);
+ iobuf_arena->passive_cnt++;
- offset += iobuf_arena->page_size;
- iobuf++;
- }
+ offset += iobuf_arena->page_size;
+ iobuf++;
+ }
-out:
- return;
+ return;
}
-
-void
-__iobuf_arena_destroy_iobufs (struct iobuf_arena *iobuf_arena)
+static void
+__iobuf_arena_destroy_iobufs(struct iobuf_arena *iobuf_arena)
{
- int iobuf_cnt = 0;
- struct iobuf *iobuf = NULL;
- int i = 0;
-
- GF_VALIDATE_OR_GOTO ("iobuf", iobuf_arena, out);
+ int iobuf_cnt = 0;
+ struct iobuf *iobuf = NULL;
+ int i = 0;
- iobuf_cnt = iobuf_arena->page_count;
-
- if (!iobuf_arena->iobufs) {
- gf_log_callingfn (THIS->name, GF_LOG_ERROR, "iobufs not found");
- return;
- }
+ if (!iobuf_arena->iobufs) {
+ gf_msg_callingfn(THIS->name, GF_LOG_ERROR, 0, LG_MSG_IOBUFS_NOT_FOUND,
+ "iobufs not found");
+ return;
+ }
- iobuf = iobuf_arena->iobufs;
- for (i = 0; i < iobuf_cnt; i++) {
- GF_ASSERT (iobuf->ref == 0);
+ iobuf_cnt = iobuf_arena->page_count;
+ iobuf = iobuf_arena->iobufs;
+ for (i = 0; i < iobuf_cnt; i++) {
+ GF_ASSERT(GF_ATOMIC_GET(iobuf->ref) == 0);
- list_del_init (&iobuf->list);
- iobuf++;
- }
+ LOCK_DESTROY(&iobuf->lock);
+ list_del_init(&iobuf->list);
+ iobuf++;
+ }
- GF_FREE (iobuf_arena->iobufs);
+ GF_FREE(iobuf_arena->iobufs);
-out:
- return;
+ return;
}
-
-void
-__iobuf_arena_destroy (struct iobuf_arena *iobuf_arena)
+static void
+__iobuf_arena_destroy(struct iobuf_pool *iobuf_pool,
+ struct iobuf_arena *iobuf_arena)
{
- GF_VALIDATE_OR_GOTO ("iobuf", iobuf_arena, out);
+ GF_VALIDATE_OR_GOTO("iobuf", iobuf_arena, out);
- __iobuf_arena_destroy_iobufs (iobuf_arena);
+ if (iobuf_pool->rdma_deregistration)
+ iobuf_pool->rdma_deregistration(iobuf_pool->mr_list, iobuf_arena);
- if (iobuf_arena->mem_base
- && iobuf_arena->mem_base != MAP_FAILED)
- munmap (iobuf_arena->mem_base, iobuf_arena->arena_size);
+ __iobuf_arena_destroy_iobufs(iobuf_arena);
- GF_FREE (iobuf_arena);
+ if (iobuf_arena->mem_base && iobuf_arena->mem_base != MAP_FAILED)
+ munmap(iobuf_arena->mem_base, iobuf_arena->arena_size);
+
+ GF_FREE(iobuf_arena);
out:
- return;
+ return;
}
-
-struct iobuf_arena *
-__iobuf_arena_alloc (struct iobuf_pool *iobuf_pool, size_t page_size,
- int32_t num_iobufs)
+static struct iobuf_arena *
+__iobuf_arena_alloc(struct iobuf_pool *iobuf_pool, size_t page_size,
+ int32_t num_iobufs)
{
- struct iobuf_arena *iobuf_arena = NULL;
- size_t rounded_size = 0;
+ struct iobuf_arena *iobuf_arena = NULL;
+ size_t rounded_size = 0;
+ int index = 0; /* unused */
- GF_VALIDATE_OR_GOTO ("iobuf", iobuf_pool, out);
+ GF_VALIDATE_OR_GOTO("iobuf", iobuf_pool, out);
- iobuf_arena = GF_CALLOC (sizeof (*iobuf_arena), 1,
- gf_common_mt_iobuf_arena);
- if (!iobuf_arena)
- goto err;
+ iobuf_arena = GF_CALLOC(sizeof(*iobuf_arena), 1, gf_common_mt_iobuf_arena);
+ if (!iobuf_arena)
+ goto err;
- INIT_LIST_HEAD (&iobuf_arena->list);
- INIT_LIST_HEAD (&iobuf_arena->active.list);
- INIT_LIST_HEAD (&iobuf_arena->passive.list);
- iobuf_arena->iobuf_pool = iobuf_pool;
+ INIT_LIST_HEAD(&iobuf_arena->list);
+ INIT_LIST_HEAD(&iobuf_arena->all_list);
+ INIT_LIST_HEAD(&iobuf_arena->active.list);
+ INIT_LIST_HEAD(&iobuf_arena->passive.list);
+ iobuf_arena->iobuf_pool = iobuf_pool;
- rounded_size = gf_iobuf_get_pagesize (page_size);
+ rounded_size = gf_iobuf_get_pagesize(page_size, &index);
- iobuf_arena->page_size = rounded_size;
- iobuf_arena->page_count = num_iobufs;
+ iobuf_arena->page_size = rounded_size;
+ iobuf_arena->page_count = num_iobufs;
- iobuf_arena->arena_size = rounded_size * num_iobufs;
+ iobuf_arena->arena_size = rounded_size * num_iobufs;
- iobuf_arena->mem_base = mmap (NULL, iobuf_arena->arena_size,
- PROT_READ|PROT_WRITE,
- MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
- if (iobuf_arena->mem_base == MAP_FAILED) {
- gf_log (THIS->name, GF_LOG_WARNING, "maping failed");
- goto err;
- }
+ iobuf_arena->mem_base = mmap(NULL, iobuf_arena->arena_size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (iobuf_arena->mem_base == MAP_FAILED) {
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_MAPPING_FAILED, NULL);
+ goto err;
+ }
- __iobuf_arena_init_iobufs (iobuf_arena);
- if (!iobuf_arena->iobufs) {
- gf_log (THIS->name, GF_LOG_ERROR, "init failed");
- goto err;
- }
+ if (iobuf_pool->rdma_registration) {
+ iobuf_pool->rdma_registration(iobuf_pool->device, iobuf_arena);
+ }
- iobuf_pool->arena_cnt++;
+ list_add_tail(&iobuf_arena->all_list, &iobuf_pool->all_arenas);
- return iobuf_arena;
+ __iobuf_arena_init_iobufs(iobuf_arena);
+ if (!iobuf_arena->iobufs) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, 0, LG_MSG_INIT_IOBUF_FAILED, NULL);
+ goto err;
+ }
-err:
- __iobuf_arena_destroy (iobuf_arena);
-
-out:
- return NULL;
-}
+ iobuf_pool->arena_cnt++;
+ return iobuf_arena;
-struct iobuf_arena *
-__iobuf_arena_unprune (struct iobuf_pool *iobuf_pool, size_t page_size)
-{
- struct iobuf_arena *iobuf_arena = NULL;
- struct iobuf_arena *tmp = NULL;
- int index = 0;
-
- GF_VALIDATE_OR_GOTO ("iobuf", iobuf_pool, out);
-
- index = gf_iobuf_get_arena_index (page_size);
- if (index == -1) {
- gf_log ("iobuf", GF_LOG_ERROR, "page_size (%zu) of "
- "iobufs in arena being added is greater than max "
- "available", page_size);
- return NULL;
- }
+err:
+ __iobuf_arena_destroy(iobuf_pool, iobuf_arena);
- list_for_each_entry (tmp, &iobuf_pool->purge[index], list) {
- list_del_init (&tmp->list);
- iobuf_arena = tmp;
- break;
- }
out:
- return iobuf_arena;
+ return NULL;
}
-
-struct iobuf_arena *
-__iobuf_pool_add_arena (struct iobuf_pool *iobuf_pool, size_t page_size,
- int32_t num_pages)
+static struct iobuf_arena *
+__iobuf_arena_unprune(struct iobuf_pool *iobuf_pool, const size_t page_size,
+ const int index)
{
- struct iobuf_arena *iobuf_arena = NULL;
- int index = 0;
-
- index = gf_iobuf_get_arena_index (page_size);
- if (index == -1) {
- gf_log ("iobuf", GF_LOG_ERROR, "page_size (%zu) of "
- "iobufs in arena being added is greater than max "
- "available", page_size);
- return NULL;
- }
-
- iobuf_arena = __iobuf_arena_unprune (iobuf_pool, page_size);
+ struct iobuf_arena *iobuf_arena = NULL;
+ struct iobuf_arena *tmp = NULL;
- if (!iobuf_arena)
- iobuf_arena = __iobuf_arena_alloc (iobuf_pool, page_size,
- num_pages);
+ GF_VALIDATE_OR_GOTO("iobuf", iobuf_pool, out);
- if (!iobuf_arena) {
- gf_log (THIS->name, GF_LOG_WARNING, "arena not found");
- return NULL;
- }
-
- list_add_tail (&iobuf_arena->list, &iobuf_pool->arenas[index]);
-
- return iobuf_arena;
+ list_for_each_entry(tmp, &iobuf_pool->purge[index], list)
+ {
+ list_del_init(&tmp->list);
+ iobuf_arena = tmp;
+ break;
+ }
+out:
+ return iobuf_arena;
}
-
-struct iobuf_arena *
-iobuf_pool_add_arena (struct iobuf_pool *iobuf_pool, size_t page_size,
- int32_t num_pages)
+static struct iobuf_arena *
+__iobuf_pool_add_arena(struct iobuf_pool *iobuf_pool, const size_t page_size,
+ const int32_t num_pages, const int index)
{
- struct iobuf_arena *iobuf_arena = NULL;
+ struct iobuf_arena *iobuf_arena = NULL;
- GF_VALIDATE_OR_GOTO ("iobuf", iobuf_pool, out);
+ iobuf_arena = __iobuf_arena_unprune(iobuf_pool, page_size, index);
- pthread_mutex_lock (&iobuf_pool->mutex);
- {
- iobuf_arena = __iobuf_pool_add_arena (iobuf_pool, page_size,
- num_pages);
+ if (!iobuf_arena) {
+ iobuf_arena = __iobuf_arena_alloc(iobuf_pool, page_size, num_pages);
+ if (!iobuf_arena) {
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_ARENA_NOT_FOUND,
+ NULL);
+ return NULL;
}
- pthread_mutex_unlock (&iobuf_pool->mutex);
+ }
+ list_add(&iobuf_arena->list, &iobuf_pool->arenas[index]);
-out:
- return iobuf_arena;
+ return iobuf_arena;
}
-
+/* This function destroys all the iobufs and the iobuf_pool */
void
-iobuf_pool_destroy (struct iobuf_pool *iobuf_pool)
+iobuf_pool_destroy(struct iobuf_pool *iobuf_pool)
{
- struct iobuf_arena *iobuf_arena = NULL;
- struct iobuf_arena *tmp = NULL;
- int i = 0;
+ struct iobuf_arena *iobuf_arena = NULL;
+ struct iobuf_arena *tmp = NULL;
+ int i = 0;
- GF_VALIDATE_OR_GOTO ("iobuf", iobuf_pool, out);
+ GF_VALIDATE_OR_GOTO("iobuf", iobuf_pool, out);
+ pthread_mutex_lock(&iobuf_pool->mutex);
+ {
for (i = 0; i < IOBUF_ARENA_MAX_INDEX; i++) {
- list_for_each_entry_safe (iobuf_arena, tmp,
- &iobuf_pool->arenas[i], list) {
- list_del_init (&iobuf_arena->list);
- iobuf_pool->arena_cnt--;
- __iobuf_arena_destroy (iobuf_arena);
- }
-
+ list_for_each_entry_safe(iobuf_arena, tmp, &iobuf_pool->arenas[i],
+ list)
+ {
+ list_del_init(&iobuf_arena->list);
+ iobuf_pool->arena_cnt--;
+
+ __iobuf_arena_destroy(iobuf_pool, iobuf_arena);
+ }
+ list_for_each_entry_safe(iobuf_arena, tmp, &iobuf_pool->purge[i],
+ list)
+ {
+ list_del_init(&iobuf_arena->list);
+ iobuf_pool->arena_cnt--;
+ __iobuf_arena_destroy(iobuf_pool, iobuf_arena);
+ }
+ /* If there are no iobuf leaks, there should be no
+ * arenas in the filled list. If at all there are any
+ * arenas in the filled list, the below function will
+ * assert.
+ */
+ list_for_each_entry_safe(iobuf_arena, tmp, &iobuf_pool->filled[i],
+ list)
+ {
+ list_del_init(&iobuf_arena->list);
+ iobuf_pool->arena_cnt--;
+ __iobuf_arena_destroy(iobuf_pool, iobuf_arena);
+ }
+ /* If there are no iobuf leaks, there shoould be
+ * no standard allocated arenas, iobuf_put will free
+ * such arenas.
+ * TODO: Free the stdalloc arenas forcefully if present?
+ */
}
+ }
+ pthread_mutex_unlock(&iobuf_pool->mutex);
+
+ pthread_mutex_destroy(&iobuf_pool->mutex);
+
+ GF_FREE(iobuf_pool);
out:
- return;
+ return;
}
static void
-iobuf_create_stdalloc_arena (struct iobuf_pool *iobuf_pool)
+iobuf_create_stdalloc_arena(struct iobuf_pool *iobuf_pool)
{
- struct iobuf_arena *iobuf_arena = NULL;
+ struct iobuf_arena *iobuf_arena = NULL;
- /* No locking required here as its called only once during init */
- iobuf_arena = GF_CALLOC (sizeof (*iobuf_arena), 1,
- gf_common_mt_iobuf_arena);
- if (!iobuf_arena)
- goto err;
+ /* No locking required here as its called only once during init */
+ iobuf_arena = GF_CALLOC(sizeof(*iobuf_arena), 1, gf_common_mt_iobuf_arena);
+ if (!iobuf_arena)
+ goto err;
- INIT_LIST_HEAD (&iobuf_arena->list);
- INIT_LIST_HEAD (&iobuf_arena->active.list);
- INIT_LIST_HEAD (&iobuf_arena->passive.list);
+ INIT_LIST_HEAD(&iobuf_arena->list);
+ INIT_LIST_HEAD(&iobuf_arena->active.list);
+ INIT_LIST_HEAD(&iobuf_arena->passive.list);
- iobuf_arena->iobuf_pool = iobuf_pool;
+ iobuf_arena->iobuf_pool = iobuf_pool;
- iobuf_arena->page_size = 0x7fffffff;
+ iobuf_arena->page_size = 0x7fffffff;
- list_add_tail (&iobuf_arena->list,
- &iobuf_pool->arenas[IOBUF_ARENA_MAX_INDEX]);
+ list_add_tail(&iobuf_arena->list,
+ &iobuf_pool->arenas[IOBUF_ARENA_MAX_INDEX]);
err:
- return;
+ return;
}
struct iobuf_pool *
-iobuf_pool_new (void)
+iobuf_pool_new(void)
{
- struct iobuf_pool *iobuf_pool = NULL;
- int i = 0;
- size_t page_size = 0;
- size_t arena_size = 0;
- int32_t num_pages = 0;
-
- iobuf_pool = GF_CALLOC (sizeof (*iobuf_pool), 1,
- gf_common_mt_iobuf_pool);
- if (!iobuf_pool)
- goto out;
-
- pthread_mutex_init (&iobuf_pool->mutex, NULL);
- for (i = 0; i <= IOBUF_ARENA_MAX_INDEX; i++) {
- INIT_LIST_HEAD (&iobuf_pool->arenas[i]);
- INIT_LIST_HEAD (&iobuf_pool->filled[i]);
- INIT_LIST_HEAD (&iobuf_pool->purge[i]);
- }
-
- iobuf_pool->default_page_size = 128 * GF_UNIT_KB;
-
- arena_size = 0;
- for (i = 0; i < IOBUF_ARENA_MAX_INDEX; i++) {
- page_size = gf_iobuf_init_config[i].pagesize;
- num_pages = gf_iobuf_init_config[i].num_pages;
-
- iobuf_pool_add_arena (iobuf_pool, page_size, num_pages);
-
- arena_size += page_size * num_pages;
- }
-
- /* Need an arena to handle all the bigger iobuf requests */
- iobuf_create_stdalloc_arena (iobuf_pool);
-
- iobuf_pool->arena_size = arena_size;
+ struct iobuf_pool *iobuf_pool = NULL;
+ int i = 0;
+ size_t page_size = 0;
+ size_t arena_size = 0;
+ int32_t num_pages = 0;
+
+ iobuf_pool = GF_CALLOC(sizeof(*iobuf_pool), 1, gf_common_mt_iobuf_pool);
+ if (!iobuf_pool)
+ goto out;
+ INIT_LIST_HEAD(&iobuf_pool->all_arenas);
+ pthread_mutex_init(&iobuf_pool->mutex, NULL);
+ for (i = 0; i <= IOBUF_ARENA_MAX_INDEX; i++) {
+ INIT_LIST_HEAD(&iobuf_pool->arenas[i]);
+ INIT_LIST_HEAD(&iobuf_pool->filled[i]);
+ INIT_LIST_HEAD(&iobuf_pool->purge[i]);
+ }
+
+ iobuf_pool->default_page_size = 128 * GF_UNIT_KB;
+
+ iobuf_pool->rdma_registration = NULL;
+ iobuf_pool->rdma_deregistration = NULL;
+
+ for (i = 0; i < GF_RDMA_DEVICE_COUNT; i++) {
+ iobuf_pool->device[i] = NULL;
+ iobuf_pool->mr_list[i] = NULL;
+ }
+
+ /* No locking required here
+ * as no one else can use this pool yet
+ */
+ for (i = 0; i < IOBUF_ARENA_MAX_INDEX; i++) {
+ page_size = gf_iobuf_init_config[i].pagesize;
+ num_pages = gf_iobuf_init_config[i].num_pages;
+
+ if (__iobuf_pool_add_arena(iobuf_pool, page_size, num_pages, i) != NULL)
+ arena_size += page_size * num_pages;
+ }
+
+ /* Need an arena to handle all the bigger iobuf requests */
+ iobuf_create_stdalloc_arena(iobuf_pool);
+
+ iobuf_pool->arena_size = arena_size;
out:
- return iobuf_pool;
+ return iobuf_pool;
}
-
-void
-__iobuf_arena_prune (struct iobuf_pool *iobuf_pool,
- struct iobuf_arena *iobuf_arena, int index)
+static void
+__iobuf_arena_prune(struct iobuf_pool *iobuf_pool,
+ struct iobuf_arena *iobuf_arena, const int index)
{
- GF_VALIDATE_OR_GOTO ("iobuf", iobuf_pool, out);
-
- /* code flow comes here only if the arena is in purge list and we can
- * free the arena only if we have atleast one arena in 'arenas' list
- * (ie, at least few iobufs free in arena), that way, there won't
- * be spurious mmap/unmap of buffers
- */
- if (list_empty (&iobuf_pool->arenas[index]))
- goto out;
+ /* code flow comes here only if the arena is in purge list and we can
+ * free the arena only if we have at least one arena in 'arenas' list
+ * (ie, at least few iobufs free in arena), that way, there won't
+ * be spurious mmap/unmap of buffers
+ */
+ if (list_empty(&iobuf_pool->arenas[index]))
+ goto out;
- /* All cases matched, destroy */
- list_del_init (&iobuf_arena->list);
- iobuf_pool->arena_cnt--;
+ /* All cases matched, destroy */
+ list_del_init(&iobuf_arena->list);
+ list_del_init(&iobuf_arena->all_list);
+ iobuf_pool->arena_cnt--;
- __iobuf_arena_destroy (iobuf_arena);
+ __iobuf_arena_destroy(iobuf_pool, iobuf_arena);
out:
- return;
+ return;
}
-
void
-iobuf_pool_prune (struct iobuf_pool *iobuf_pool)
+iobuf_pool_prune(struct iobuf_pool *iobuf_pool)
{
- struct iobuf_arena *iobuf_arena = NULL;
- struct iobuf_arena *tmp = NULL;
- int i = 0;
+ struct iobuf_arena *iobuf_arena = NULL;
+ struct iobuf_arena *tmp = NULL;
+ int i = 0;
- GF_VALIDATE_OR_GOTO ("iobuf", iobuf_pool, out);
+ GF_VALIDATE_OR_GOTO("iobuf", iobuf_pool, out);
- pthread_mutex_lock (&iobuf_pool->mutex);
- {
- for (i = 0; i < IOBUF_ARENA_MAX_INDEX; i++) {
- if (list_empty (&iobuf_pool->arenas[i])) {
- continue;
- }
-
- list_for_each_entry_safe (iobuf_arena, tmp,
- &iobuf_pool->purge[i], list) {
- __iobuf_arena_prune (iobuf_pool, iobuf_arena, i);
- }
- }
+ pthread_mutex_lock(&iobuf_pool->mutex);
+ {
+ for (i = 0; i < IOBUF_ARENA_MAX_INDEX; i++) {
+ if (list_empty(&iobuf_pool->arenas[i])) {
+ continue;
+ }
+
+ list_for_each_entry_safe(iobuf_arena, tmp, &iobuf_pool->purge[i],
+ list)
+ {
+ __iobuf_arena_prune(iobuf_pool, iobuf_arena, i);
+ }
}
- pthread_mutex_unlock (&iobuf_pool->mutex);
+ }
+ pthread_mutex_unlock(&iobuf_pool->mutex);
out:
- return;
+ return;
}
-
-struct iobuf_arena *
-__iobuf_select_arena (struct iobuf_pool *iobuf_pool, size_t page_size)
+/* Always called under the iobuf_pool mutex lock */
+static struct iobuf_arena *
+__iobuf_select_arena(struct iobuf_pool *iobuf_pool, const size_t page_size,
+ const int index)
{
- struct iobuf_arena *iobuf_arena = NULL;
- struct iobuf_arena *trav = NULL;
- int index = 0;
-
- GF_VALIDATE_OR_GOTO ("iobuf", iobuf_pool, out);
-
- index = gf_iobuf_get_arena_index (page_size);
- if (index == -1) {
- gf_log ("iobuf", GF_LOG_ERROR, "page_size (%zu) of "
- "iobufs in arena being added is greater than max "
- "available", page_size);
- return NULL;
+ struct iobuf_arena *iobuf_arena = NULL;
+ struct iobuf_arena *trav = NULL;
+
+ /* look for unused iobuf from the head-most arena */
+ list_for_each_entry(trav, &iobuf_pool->arenas[index], list)
+ {
+ if (trav->passive_cnt) {
+ iobuf_arena = trav;
+ break;
}
+ }
- /* look for unused iobuf from the head-most arena */
- list_for_each_entry (trav, &iobuf_pool->arenas[index], list) {
- if (trav->passive_cnt) {
- iobuf_arena = trav;
- break;
- }
- }
+ if (!iobuf_arena) {
+ /* all arenas were full, find the right count to add */
+ iobuf_arena = __iobuf_pool_add_arena(
+ iobuf_pool, page_size, gf_iobuf_init_config[index].num_pages,
+ index);
+ }
- if (!iobuf_arena) {
- /* all arenas were full, find the right count to add */
- iobuf_arena = __iobuf_pool_add_arena (iobuf_pool, page_size,
- gf_iobuf_init_config[index].num_pages);
- }
-
-out:
- return iobuf_arena;
+ return iobuf_arena;
}
-
-struct iobuf *
-__iobuf_ref (struct iobuf *iobuf)
+/* Always called under the iobuf_pool mutex lock */
+static struct iobuf *
+__iobuf_get(struct iobuf_pool *iobuf_pool, const size_t page_size,
+ const int index)
{
- iobuf->ref++;
-
- return iobuf;
-}
-
+ struct iobuf *iobuf = NULL;
+ struct iobuf_arena *iobuf_arena = NULL;
-struct iobuf *
-__iobuf_unref (struct iobuf *iobuf)
-{
- iobuf->ref--;
-
- return iobuf;
-}
-
-struct iobuf *
-__iobuf_get (struct iobuf_arena *iobuf_arena, size_t page_size)
-{
- struct iobuf *iobuf = NULL;
- struct iobuf_pool *iobuf_pool = NULL;
- int index = 0;
-
- GF_VALIDATE_OR_GOTO ("iobuf", iobuf_arena, out);
-
- iobuf_pool = iobuf_arena->iobuf_pool;
-
- list_for_each_entry (iobuf, &iobuf_arena->passive.list, list)
- break;
+ /* most eligible arena for picking an iobuf */
+ iobuf_arena = __iobuf_select_arena(iobuf_pool, page_size, index);
+ if (!iobuf_arena)
+ return NULL;
- list_del (&iobuf->list);
- iobuf_arena->passive_cnt--;
+ list_for_each_entry(iobuf, &iobuf_arena->passive.list, list) break;
- list_add (&iobuf->list, &iobuf_arena->active.list);
- iobuf_arena->active_cnt++;
+ list_del(&iobuf->list);
+ iobuf_arena->passive_cnt--;
- /* no resetting requied for this element */
- iobuf_arena->alloc_cnt++;
+ list_add(&iobuf->list, &iobuf_arena->active.list);
+ iobuf_arena->active_cnt++;
- if (iobuf_arena->max_active < iobuf_arena->active_cnt)
- iobuf_arena->max_active = iobuf_arena->active_cnt;
+ /* no resetting requied for this element */
+ iobuf_arena->alloc_cnt++;
- if (iobuf_arena->passive_cnt == 0) {
- index = gf_iobuf_get_arena_index (page_size);
- if (index == -1) {
- gf_log ("iobuf", GF_LOG_ERROR, "page_size (%zu) of "
- "iobufs in arena being added is greater "
- "than max available", page_size);
- goto out;
- }
+ if (iobuf_arena->max_active < iobuf_arena->active_cnt)
+ iobuf_arena->max_active = iobuf_arena->active_cnt;
- list_del (&iobuf_arena->list);
- list_add (&iobuf_arena->list, &iobuf_pool->filled[index]);
- }
+ if (iobuf_arena->passive_cnt == 0) {
+ list_del(&iobuf_arena->list);
+ list_add(&iobuf_arena->list, &iobuf_pool->filled[index]);
+ }
-out:
- return iobuf;
+ return iobuf;
}
-struct iobuf *
-iobuf_get_from_stdalloc (struct iobuf_pool *iobuf_pool, size_t page_size)
+static struct iobuf *
+iobuf_get_from_stdalloc(struct iobuf_pool *iobuf_pool, const size_t page_size)
{
- struct iobuf *iobuf = NULL;
- struct iobuf_arena *iobuf_arena = NULL;
- struct iobuf_arena *trav = NULL;
- int ret = -1;
-
- /* The first arena in the 'MAX-INDEX' will always be used for misc */
- list_for_each_entry (trav, &iobuf_pool->arenas[IOBUF_ARENA_MAX_INDEX],
- list) {
- iobuf_arena = trav;
- break;
- }
-
- iobuf = GF_CALLOC (1, sizeof (*iobuf), gf_common_mt_iobuf);
- if (!iobuf)
- goto out;
-
- /* 4096 is the alignment */
- iobuf->free_ptr = GF_CALLOC (1, ((page_size + GF_IOBUF_ALIGN_SIZE) - 1),
- gf_common_mt_char);
- if (!iobuf->free_ptr)
- goto out;
-
- iobuf->ptr = GF_ALIGN_BUF (iobuf->free_ptr, GF_IOBUF_ALIGN_SIZE);
- iobuf->iobuf_arena = iobuf_arena;
- LOCK_INIT (&iobuf->lock);
-
- /* Hold a ref because you are allocating and using it */
- iobuf->ref = 1;
-
- ret = 0;
+ struct iobuf *iobuf = NULL;
+ struct iobuf_arena *iobuf_arena = NULL;
+ struct iobuf_arena *trav = NULL;
+ int ret = -1;
+
+ /* The first arena in the 'MAX-INDEX' will always be used for misc */
+ list_for_each_entry(trav, &iobuf_pool->arenas[IOBUF_ARENA_MAX_INDEX], list)
+ {
+ iobuf_arena = trav;
+ break;
+ }
+
+ iobuf = GF_CALLOC(1, sizeof(*iobuf), gf_common_mt_iobuf);
+ if (!iobuf)
+ goto out;
+
+ /* 4096 is the alignment */
+ iobuf->free_ptr = GF_CALLOC(1, ((page_size + GF_IOBUF_ALIGN_SIZE) - 1),
+ gf_common_mt_char);
+ if (!iobuf->free_ptr)
+ goto out;
+
+ iobuf->ptr = GF_ALIGN_BUF(iobuf->free_ptr, GF_IOBUF_ALIGN_SIZE);
+ iobuf->iobuf_arena = iobuf_arena;
+ LOCK_INIT(&iobuf->lock);
+
+ /* Hold a ref because you are allocating and using it */
+ GF_ATOMIC_INIT(iobuf->ref, 1);
+
+ ret = 0;
out:
- if (ret && iobuf) {
- if (iobuf->free_ptr)
- GF_FREE (iobuf->free_ptr);
- GF_FREE (iobuf);
- iobuf = NULL;
- }
+ if (ret && iobuf) {
+ GF_FREE(iobuf->free_ptr);
+ GF_FREE(iobuf);
+ iobuf = NULL;
+ }
- return iobuf;
+ return iobuf;
}
-
struct iobuf *
-iobuf_get2 (struct iobuf_pool *iobuf_pool, size_t page_size)
+iobuf_get2(struct iobuf_pool *iobuf_pool, size_t page_size)
{
- struct iobuf *iobuf = NULL;
- struct iobuf_arena *iobuf_arena = NULL;
- size_t rounded_size = 0;
-
- if (page_size == 0) {
- page_size = iobuf_pool->default_page_size;
- }
-
- rounded_size = gf_iobuf_get_pagesize (page_size);
- if (rounded_size == -1) {
- /* make sure to provide the requested buffer with standard
- memory allocations */
- iobuf = iobuf_get_from_stdalloc (iobuf_pool, page_size);
-
- gf_log ("iobuf", GF_LOG_DEBUG, "request for iobuf of size %zu "
- "is serviced using standard calloc() (%p) as it "
- "exceeds the maximum available buffer size",
- page_size, iobuf);
-
- iobuf_pool->request_misses++;
- return iobuf;
+ struct iobuf *iobuf = NULL;
+ size_t rounded_size = 0;
+ int index = 0;
+
+ if (page_size == 0) {
+ page_size = iobuf_pool->default_page_size;
+ }
+
+ rounded_size = gf_iobuf_get_pagesize(page_size, &index);
+ if (rounded_size == -1) {
+ /* make sure to provide the requested buffer with standard
+ memory allocations */
+ iobuf = iobuf_get_from_stdalloc(iobuf_pool, page_size);
+
+ gf_msg_debug("iobuf", 0,
+ "request for iobuf of size %zu "
+ "is serviced using standard calloc() (%p) as it "
+ "exceeds the maximum available buffer size",
+ page_size, iobuf);
+
+ iobuf_pool->request_misses++;
+ return iobuf;
+ } else if (index == -1) {
+ gf_smsg("iobuf", GF_LOG_ERROR, 0, LG_MSG_PAGE_SIZE_EXCEEDED,
+ "page_size=%zu", page_size, NULL);
+ return NULL;
+ }
+
+ pthread_mutex_lock(&iobuf_pool->mutex);
+ {
+ iobuf = __iobuf_get(iobuf_pool, rounded_size, index);
+ if (!iobuf) {
+ pthread_mutex_unlock(&iobuf_pool->mutex);
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_IOBUF_NOT_FOUND,
+ NULL);
+ goto post_unlock;
}
- pthread_mutex_lock (&iobuf_pool->mutex);
- {
- /* most eligible arena for picking an iobuf */
- iobuf_arena = __iobuf_select_arena (iobuf_pool, rounded_size);
- if (!iobuf_arena)
- goto unlock;
-
- iobuf = __iobuf_get (iobuf_arena, rounded_size);
- if (!iobuf)
- goto unlock;
-
- __iobuf_ref (iobuf);
- }
-unlock:
- pthread_mutex_unlock (&iobuf_pool->mutex);
-
- return iobuf;
+ iobuf_ref(iobuf);
+ }
+ pthread_mutex_unlock(&iobuf_pool->mutex);
+post_unlock:
+ return iobuf;
}
struct iobuf *
-iobuf_get (struct iobuf_pool *iobuf_pool)
+iobuf_get_page_aligned(struct iobuf_pool *iobuf_pool, size_t page_size,
+ size_t align_size)
{
- struct iobuf *iobuf = NULL;
- struct iobuf_arena *iobuf_arena = NULL;
+ size_t req_size = 0;
+ struct iobuf *iobuf = NULL;
- GF_VALIDATE_OR_GOTO ("iobuf", iobuf_pool, out);
+ req_size = page_size;
- pthread_mutex_lock (&iobuf_pool->mutex);
- {
- /* most eligible arena for picking an iobuf */
- iobuf_arena = __iobuf_select_arena (iobuf_pool,
- iobuf_pool->default_page_size);
- if (!iobuf_arena) {
- gf_log (THIS->name, GF_LOG_WARNING, "arena not found");
- goto unlock;
- }
-
- iobuf = __iobuf_get (iobuf_arena,
- iobuf_pool->default_page_size);
- if (!iobuf) {
- gf_log (THIS->name, GF_LOG_WARNING, "iobuf not found");
- goto unlock;
- }
-
- __iobuf_ref (iobuf);
- }
-unlock:
- pthread_mutex_unlock (&iobuf_pool->mutex);
+ if (req_size == 0) {
+ req_size = iobuf_pool->default_page_size;
+ }
-out:
- return iobuf;
+ iobuf = iobuf_get2(iobuf_pool, req_size + align_size);
+ if (!iobuf)
+ return NULL;
+ /* If std allocation was used, then free_ptr will be non-NULL. In this
+ * case, we do not want to modify the original free_ptr.
+ * On the other hand, if the buf was gotten through the available
+ * arenas, then we use iobuf->free_ptr to store the original
+ * pointer to the offset into the mmap'd block of memory and in turn
+ * reuse iobuf->ptr to hold the page-aligned address. And finally, in
+ * iobuf_put(), we copy iobuf->free_ptr into iobuf->ptr - back to where
+ * it was originally when __iobuf_get() returned this iobuf.
+ */
+ if (!iobuf->free_ptr)
+ iobuf->free_ptr = iobuf->ptr;
+ iobuf->ptr = GF_ALIGN_BUF(iobuf->ptr, align_size);
+
+ return iobuf;
}
-void
-__iobuf_put (struct iobuf *iobuf, struct iobuf_arena *iobuf_arena)
+struct iobuf *
+iobuf_get(struct iobuf_pool *iobuf_pool)
{
- struct iobuf_pool *iobuf_pool = NULL;
- int index = 0;
-
- GF_VALIDATE_OR_GOTO ("iobuf", iobuf_arena, out);
- GF_VALIDATE_OR_GOTO ("iobuf", iobuf, out);
+ struct iobuf *iobuf = NULL;
+ int index = 0;
- iobuf_pool = iobuf_arena->iobuf_pool;
-
- index = gf_iobuf_get_arena_index (iobuf_arena->page_size);
- if (index == -1) {
- gf_log ("iobuf", GF_LOG_DEBUG, "freeing the iobuf (%p) "
- "allocated with standard calloc()", iobuf);
-
- /* free up properly without bothering about lists and all */
- LOCK_DESTROY (&iobuf->lock);
- GF_FREE (iobuf->free_ptr);
- GF_FREE (iobuf);
- return;
- }
+ GF_VALIDATE_OR_GOTO("iobuf", iobuf_pool, out);
- if (iobuf_arena->passive_cnt == 0) {
- list_del (&iobuf_arena->list);
- list_add_tail (&iobuf_arena->list, &iobuf_pool->arenas[index]);
+ index = gf_iobuf_get_arena_index(iobuf_pool->default_page_size);
+ if (index == -1) {
+ gf_smsg("iobuf", GF_LOG_ERROR, 0, LG_MSG_PAGE_SIZE_EXCEEDED,
+ "page_size=%zu", iobuf_pool->default_page_size, NULL);
+ return NULL;
+ }
+
+ pthread_mutex_lock(&iobuf_pool->mutex);
+ {
+ iobuf = __iobuf_get(iobuf_pool, iobuf_pool->default_page_size, index);
+ if (!iobuf) {
+ pthread_mutex_unlock(&iobuf_pool->mutex);
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_IOBUF_NOT_FOUND,
+ NULL);
+ goto out;
}
- list_del_init (&iobuf->list);
- iobuf_arena->active_cnt--;
-
- list_add (&iobuf->list, &iobuf_arena->passive.list);
- iobuf_arena->passive_cnt++;
+ iobuf_ref(iobuf);
+ }
+ pthread_mutex_unlock(&iobuf_pool->mutex);
- if (iobuf_arena->active_cnt == 0) {
- list_del (&iobuf_arena->list);
- list_add_tail (&iobuf_arena->list, &iobuf_pool->purge[index]);
- __iobuf_arena_prune (iobuf_pool, iobuf_arena, index);
- }
out:
- return;
+ return iobuf;
}
+static void
+__iobuf_put(struct iobuf *iobuf, struct iobuf_arena *iobuf_arena)
+{
+ struct iobuf_pool *iobuf_pool = NULL;
+ int index = 0;
+
+ iobuf_pool = iobuf_arena->iobuf_pool;
+
+ index = gf_iobuf_get_arena_index(iobuf_arena->page_size);
+ if (index == -1) {
+ gf_msg_debug("iobuf", 0,
+ "freeing the iobuf (%p) "
+ "allocated with standard calloc()",
+ iobuf);
+
+ /* free up properly without bothering about lists and all */
+ LOCK_DESTROY(&iobuf->lock);
+ GF_FREE(iobuf->free_ptr);
+ GF_FREE(iobuf);
+ return;
+ }
+
+ if (iobuf_arena->passive_cnt == 0) {
+ list_del(&iobuf_arena->list);
+ list_add_tail(&iobuf_arena->list, &iobuf_pool->arenas[index]);
+ }
+
+ list_del_init(&iobuf->list);
+ iobuf_arena->active_cnt--;
+
+ if (iobuf->free_ptr) {
+ iobuf->ptr = iobuf->free_ptr;
+ iobuf->free_ptr = NULL;
+ }
+
+ list_add(&iobuf->list, &iobuf_arena->passive.list);
+ iobuf_arena->passive_cnt++;
+
+ if (iobuf_arena->active_cnt == 0) {
+ list_del(&iobuf_arena->list);
+ list_add_tail(&iobuf_arena->list, &iobuf_pool->purge[index]);
+ GF_VALIDATE_OR_GOTO("iobuf", iobuf_pool, out);
+ __iobuf_arena_prune(iobuf_pool, iobuf_arena, index);
+ }
+out:
+ return;
+}
void
-iobuf_put (struct iobuf *iobuf)
+iobuf_put(struct iobuf *iobuf)
{
- struct iobuf_arena *iobuf_arena = NULL;
- struct iobuf_pool *iobuf_pool = NULL;
+ struct iobuf_arena *iobuf_arena = NULL;
+ struct iobuf_pool *iobuf_pool = NULL;
- GF_VALIDATE_OR_GOTO ("iobuf", iobuf, out);
+ GF_VALIDATE_OR_GOTO("iobuf", iobuf, out);
- iobuf_arena = iobuf->iobuf_arena;
- if (!iobuf_arena) {
- gf_log (THIS->name, GF_LOG_WARNING, "arena not found");
- return;
- }
+ iobuf_arena = iobuf->iobuf_arena;
+ if (!iobuf_arena) {
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_ARENA_NOT_FOUND, NULL);
+ return;
+ }
- iobuf_pool = iobuf_arena->iobuf_pool;
- if (!iobuf_pool) {
- gf_log (THIS->name, GF_LOG_WARNING, "iobuf pool not found");
- return;
- }
+ iobuf_pool = iobuf_arena->iobuf_pool;
+ if (!iobuf_pool) {
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_POOL_NOT_FOUND, "iobuf",
+ NULL);
+ return;
+ }
- pthread_mutex_lock (&iobuf_pool->mutex);
- {
- __iobuf_put (iobuf, iobuf_arena);
- }
- pthread_mutex_unlock (&iobuf_pool->mutex);
+ pthread_mutex_lock(&iobuf_pool->mutex);
+ {
+ __iobuf_put(iobuf, iobuf_arena);
+ }
+ pthread_mutex_unlock(&iobuf_pool->mutex);
out:
- return;
+ return;
}
-
void
-iobuf_unref (struct iobuf *iobuf)
+iobuf_unref(struct iobuf *iobuf)
{
- int ref = 0;
+ int ref = 0;
- GF_VALIDATE_OR_GOTO ("iobuf", iobuf, out);
+ GF_VALIDATE_OR_GOTO("iobuf", iobuf, out);
- LOCK (&iobuf->lock);
- {
- __iobuf_unref (iobuf);
- ref = iobuf->ref;
- }
- UNLOCK (&iobuf->lock);
+ ref = GF_ATOMIC_DEC(iobuf->ref);
- if (!ref)
- iobuf_put (iobuf);
+ if (!ref)
+ iobuf_put(iobuf);
out:
- return;
+ return;
}
-
struct iobuf *
-iobuf_ref (struct iobuf *iobuf)
+iobuf_ref(struct iobuf *iobuf)
{
- GF_VALIDATE_OR_GOTO ("iobuf", iobuf, out);
-
- LOCK (&iobuf->lock);
- {
- __iobuf_ref (iobuf);
- }
- UNLOCK (&iobuf->lock);
+ GF_VALIDATE_OR_GOTO("iobuf", iobuf, out);
+ GF_ATOMIC_INC(iobuf->ref);
out:
- return iobuf;
+ return iobuf;
}
-
struct iobref *
-iobref_new ()
+iobref_new()
{
- struct iobref *iobref = NULL;
+ struct iobref *iobref = NULL;
+
+ iobref = GF_MALLOC(sizeof(*iobref), gf_common_mt_iobref);
+ if (!iobref)
+ return NULL;
- iobref = GF_CALLOC (sizeof (*iobref), 1,
- gf_common_mt_iobref);
- if (!iobref)
- return NULL;
+ iobref->iobrefs = GF_CALLOC(sizeof(*iobref->iobrefs), 16,
+ gf_common_mt_iobrefs);
+ if (!iobref->iobrefs) {
+ GF_FREE(iobref);
+ return NULL;
+ }
- LOCK_INIT (&iobref->lock);
+ iobref->allocated = 16;
+ iobref->used = 0;
- iobref->ref++;
+ LOCK_INIT(&iobref->lock);
- return iobref;
+ GF_ATOMIC_INIT(iobref->ref, 1);
+ return iobref;
}
-
struct iobref *
-iobref_ref (struct iobref *iobref)
+iobref_ref(struct iobref *iobref)
{
- GF_VALIDATE_OR_GOTO ("iobuf", iobref, out);
-
- LOCK (&iobref->lock);
- {
- iobref->ref++;
- }
- UNLOCK (&iobref->lock);
+ GF_VALIDATE_OR_GOTO("iobuf", iobref, out);
+ GF_ATOMIC_INC(iobref->ref);
out:
- return iobref;
+ return iobref;
}
-
void
-iobref_destroy (struct iobref *iobref)
+iobref_destroy(struct iobref *iobref)
{
- int i = 0;
- struct iobuf *iobuf = NULL;
+ int i = 0;
+ struct iobuf *iobuf = NULL;
- GF_VALIDATE_OR_GOTO ("iobuf", iobref, out);
+ GF_VALIDATE_OR_GOTO("iobuf", iobref, out);
- for (i = 0; i < GF_IOBREF_IOBUF_COUNT; i++) {
- iobuf = iobref->iobrefs[i];
+ for (i = 0; i < iobref->allocated; i++) {
+ iobuf = iobref->iobrefs[i];
- iobref->iobrefs[i] = NULL;
- if (iobuf)
- iobuf_unref (iobuf);
- }
+ iobref->iobrefs[i] = NULL;
+ if (iobuf)
+ iobuf_unref(iobuf);
+ }
- GF_FREE (iobref);
+ GF_FREE(iobref->iobrefs);
+ GF_FREE(iobref);
out:
- return;
+ return;
}
+void
+iobref_unref(struct iobref *iobref)
+{
+ int ref = 0;
+
+ GF_VALIDATE_OR_GOTO("iobuf", iobref, out);
+ ref = GF_ATOMIC_DEC(iobref->ref);
+
+ if (!ref)
+ iobref_destroy(iobref);
+
+out:
+ return;
+}
void
-iobref_unref (struct iobref *iobref)
+iobref_clear(struct iobref *iobref)
{
- int ref = 0;
+ int i = 0;
- GF_VALIDATE_OR_GOTO ("iobuf", iobref, out);
+ GF_VALIDATE_OR_GOTO("iobuf", iobref, out);
- LOCK (&iobref->lock);
- {
- ref = (--iobref->ref);
+ for (; i < iobref->allocated; i++) {
+ if (iobref->iobrefs[i] != NULL) {
+ iobuf_unref(iobref->iobrefs[i]);
+ } else {
+ /** iobuf's are attached serially */
+ break;
}
- UNLOCK (&iobref->lock);
+ }
- if (!ref)
- iobref_destroy (iobref);
+ iobref_unref(iobref);
out:
- return;
+ return;
}
+static void
+__iobref_grow(struct iobref *iobref)
+{
+ void *newptr = NULL;
+ int i = 0;
+
+ newptr = GF_REALLOC(iobref->iobrefs,
+ iobref->allocated * 2 * (sizeof(*iobref->iobrefs)));
+ if (newptr) {
+ iobref->iobrefs = newptr;
+ iobref->allocated *= 2;
+
+ for (i = iobref->used; i < iobref->allocated; i++)
+ iobref->iobrefs[i] = NULL;
+ }
+}
int
-__iobref_add (struct iobref *iobref, struct iobuf *iobuf)
+__iobref_add(struct iobref *iobref, struct iobuf *iobuf)
{
- int i = 0;
- int ret = -ENOMEM;
-
- GF_VALIDATE_OR_GOTO ("iobuf", iobref, out);
- GF_VALIDATE_OR_GOTO ("iobuf", iobuf, out);
-
- for (i = 0; i < GF_IOBREF_IOBUF_COUNT; i++) {
- if (iobref->iobrefs[i] == NULL) {
- iobref->iobrefs[i] = iobuf_ref (iobuf);
- ret = 0;
- break;
- }
+ int i = 0;
+ int ret = -ENOMEM;
+
+ GF_VALIDATE_OR_GOTO("iobuf", iobref, out);
+ GF_VALIDATE_OR_GOTO("iobuf", iobuf, out);
+
+ if (iobref->used == iobref->allocated) {
+ __iobref_grow(iobref);
+
+ if (iobref->used == iobref->allocated) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
+
+ for (i = 0; i < iobref->allocated; i++) {
+ if (iobref->iobrefs[i] == NULL) {
+ iobref->iobrefs[i] = iobuf_ref(iobuf);
+ iobref->used++;
+ ret = 0;
+ break;
}
+ }
out:
- return ret;
+ return ret;
}
-
int
-iobref_add (struct iobref *iobref, struct iobuf *iobuf)
+iobref_add(struct iobref *iobref, struct iobuf *iobuf)
{
- int ret = -EINVAL;
+ int ret = -EINVAL;
- GF_VALIDATE_OR_GOTO ("iobuf", iobref, out);
- GF_VALIDATE_OR_GOTO ("iobuf", iobuf, out);
+ GF_VALIDATE_OR_GOTO("iobuf", iobref, out);
+ GF_VALIDATE_OR_GOTO("iobuf", iobuf, out);
- LOCK (&iobref->lock);
- {
- ret = __iobref_add (iobref, iobuf);
- }
- UNLOCK (&iobref->lock);
+ LOCK(&iobref->lock);
+ {
+ ret = __iobref_add(iobref, iobuf);
+ }
+ UNLOCK(&iobref->lock);
out:
- return ret;
+ return ret;
}
-
int
-iobref_merge (struct iobref *to, struct iobref *from)
+iobref_merge(struct iobref *to, struct iobref *from)
{
- int i = 0;
- int ret = -1;
- struct iobuf *iobuf = NULL;
+ int i = 0;
+ int ret = 0;
+ struct iobuf *iobuf = NULL;
- GF_VALIDATE_OR_GOTO ("iobuf", to, out);
- GF_VALIDATE_OR_GOTO ("iobuf", from, out);
+ GF_VALIDATE_OR_GOTO("iobuf", to, out);
+ GF_VALIDATE_OR_GOTO("iobuf", from, out);
- LOCK (&from->lock);
- {
- for (i = 0; i < GF_IOBREF_IOBUF_COUNT; i++) {
- iobuf = from->iobrefs[i];
+ LOCK(&from->lock);
+ {
+ for (i = 0; i < from->allocated; i++) {
+ iobuf = from->iobrefs[i];
- if (!iobuf)
- break;
+ if (!iobuf)
+ break;
- ret = iobref_add (to, iobuf);
+ ret = iobref_add(to, iobuf);
- if (ret < 0)
- break;
- }
+ if (ret < 0)
+ break;
}
- UNLOCK (&from->lock);
+ }
+ UNLOCK(&from->lock);
out:
- return ret;
+ return ret;
}
-
size_t
-iobuf_size (struct iobuf *iobuf)
+iobuf_size(struct iobuf *iobuf)
{
- size_t size = 0;
+ size_t size = 0;
- GF_VALIDATE_OR_GOTO ("iobuf", iobuf, out);
+ GF_VALIDATE_OR_GOTO("iobuf", iobuf, out);
- if (!iobuf->iobuf_arena) {
- gf_log (THIS->name, GF_LOG_WARNING, "arena not found");
- goto out;
- }
+ if (!iobuf->iobuf_arena) {
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_ARENA_NOT_FOUND, NULL);
+ goto out;
+ }
- if (!iobuf->iobuf_arena->iobuf_pool) {
- gf_log (THIS->name, GF_LOG_WARNING, "pool not found");
- goto out;
- }
+ if (!iobuf->iobuf_arena->iobuf_pool) {
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_POOL_NOT_FOUND, NULL);
+ goto out;
+ }
- size = iobuf->iobuf_arena->page_size;
+ size = iobuf->iobuf_arena->page_size;
out:
- return size;
+ return size;
}
-
size_t
-iobref_size (struct iobref *iobref)
+iobref_size(struct iobref *iobref)
{
- size_t size = 0;
- int i = 0;
+ size_t size = 0;
+ int i = 0;
- GF_VALIDATE_OR_GOTO ("iobuf", iobref, out);
+ GF_VALIDATE_OR_GOTO("iobuf", iobref, out);
- LOCK (&iobref->lock);
- {
- for (i = 0; i < GF_IOBREF_IOBUF_COUNT; i++) {
- if (iobref->iobrefs[i])
- size += iobuf_size (iobref->iobrefs[i]);
- }
+ LOCK(&iobref->lock);
+ {
+ for (i = 0; i < iobref->allocated; i++) {
+ if (iobref->iobrefs[i])
+ size += iobuf_size(iobref->iobrefs[i]);
}
- UNLOCK (&iobref->lock);
+ }
+ UNLOCK(&iobref->lock);
out:
- return size;
+ return size;
}
void
-iobuf_info_dump (struct iobuf *iobuf, const char *key_prefix)
+iobuf_info_dump(struct iobuf *iobuf, const char *key_prefix)
{
- char key[GF_DUMP_MAX_BUF_LEN];
- struct iobuf my_iobuf;
- int ret = 0;
-
- GF_VALIDATE_OR_GOTO ("iobuf", iobuf, out);
+ char key[GF_DUMP_MAX_BUF_LEN];
+ struct iobuf my_iobuf;
+ int ret = 0;
- memset(&my_iobuf, 0, sizeof(my_iobuf));
+ GF_VALIDATE_OR_GOTO("iobuf", iobuf, out);
- ret = TRY_LOCK(&iobuf->lock);
- if (ret) {
- return;
- }
- memcpy(&my_iobuf, iobuf, sizeof(my_iobuf));
- UNLOCK(&iobuf->lock);
+ ret = TRY_LOCK(&iobuf->lock);
+ if (ret) {
+ return;
+ }
+ memcpy(&my_iobuf, iobuf, sizeof(my_iobuf));
+ UNLOCK(&iobuf->lock);
- gf_proc_dump_build_key(key, key_prefix,"ref");
- gf_proc_dump_write(key, "%d", my_iobuf.ref);
- gf_proc_dump_build_key(key, key_prefix,"ptr");
- gf_proc_dump_write(key, "%p", my_iobuf.ptr);
+ gf_proc_dump_build_key(key, key_prefix, "ref");
+ gf_proc_dump_write(key, "%" GF_PRI_ATOMIC, GF_ATOMIC_GET(my_iobuf.ref));
+ gf_proc_dump_build_key(key, key_prefix, "ptr");
+ gf_proc_dump_write(key, "%p", my_iobuf.ptr);
out:
- return;
+ return;
}
void
-iobuf_arena_info_dump (struct iobuf_arena *iobuf_arena, const char *key_prefix)
+iobuf_arena_info_dump(struct iobuf_arena *iobuf_arena, const char *key_prefix)
{
- char key[GF_DUMP_MAX_BUF_LEN];
- int i = 1;
- struct iobuf *trav;
-
- GF_VALIDATE_OR_GOTO ("iobuf", iobuf_arena, out);
-
- gf_proc_dump_build_key(key, key_prefix,"mem_base");
- gf_proc_dump_write(key, "%p", iobuf_arena->mem_base);
- gf_proc_dump_build_key(key, key_prefix, "active_cnt");
- gf_proc_dump_write(key, "%d", iobuf_arena->active_cnt);
- gf_proc_dump_build_key(key, key_prefix, "passive_cnt");
- gf_proc_dump_write(key, "%d", iobuf_arena->passive_cnt);
- gf_proc_dump_build_key(key, key_prefix, "alloc_cnt");
- gf_proc_dump_write(key, "%"PRIu64, iobuf_arena->alloc_cnt);
- gf_proc_dump_build_key(key, key_prefix, "max_active");
- gf_proc_dump_write(key, "%"PRIu64, iobuf_arena->max_active);
- gf_proc_dump_build_key(key, key_prefix, "page_size");
- gf_proc_dump_write(key, "%"PRIu64, iobuf_arena->page_size);
- list_for_each_entry (trav, &iobuf_arena->active.list, list) {
- gf_proc_dump_build_key(key, key_prefix,"active_iobuf.%d", i++);
- gf_proc_dump_add_section(key);
- iobuf_info_dump(trav, key);
- }
+ char key[GF_DUMP_MAX_BUF_LEN];
+ int i = 1;
+ struct iobuf *trav;
+
+ GF_VALIDATE_OR_GOTO("iobuf", iobuf_arena, out);
+
+ gf_proc_dump_build_key(key, key_prefix, "mem_base");
+ gf_proc_dump_write(key, "%p", iobuf_arena->mem_base);
+ gf_proc_dump_build_key(key, key_prefix, "active_cnt");
+ gf_proc_dump_write(key, "%d", iobuf_arena->active_cnt);
+ gf_proc_dump_build_key(key, key_prefix, "passive_cnt");
+ gf_proc_dump_write(key, "%d", iobuf_arena->passive_cnt);
+ gf_proc_dump_build_key(key, key_prefix, "alloc_cnt");
+ gf_proc_dump_write(key, "%" PRIu64, iobuf_arena->alloc_cnt);
+ gf_proc_dump_build_key(key, key_prefix, "max_active");
+ gf_proc_dump_write(key, "%d", iobuf_arena->max_active);
+ gf_proc_dump_build_key(key, key_prefix, "page_size");
+ gf_proc_dump_write(key, "%" GF_PRI_SIZET, iobuf_arena->page_size);
+ list_for_each_entry(trav, &iobuf_arena->active.list, list)
+ {
+ gf_proc_dump_build_key(key, key_prefix, "active_iobuf.%d", i++);
+ gf_proc_dump_add_section("%s", key);
+ iobuf_info_dump(trav, key);
+ }
out:
- return;
+ return;
}
void
-iobuf_stats_dump (struct iobuf_pool *iobuf_pool)
+iobuf_stats_dump(struct iobuf_pool *iobuf_pool)
{
- char msg[1024];
- struct iobuf_arena *trav = NULL;
- int i = 1;
- int j = 0;
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO ("iobuf", iobuf_pool, out);
+ char msg[1024];
+ struct iobuf_arena *trav = NULL;
+ int i = 1;
+ int j = 0;
+ int ret = -1;
- memset(msg, 0, sizeof(msg));
+ GF_VALIDATE_OR_GOTO("iobuf", iobuf_pool, out);
- ret = pthread_mutex_trylock(&iobuf_pool->mutex);
+ ret = pthread_mutex_trylock(&iobuf_pool->mutex);
- if (ret) {
- return;
+ if (ret) {
+ return;
+ }
+ gf_proc_dump_add_section("iobuf.global");
+ gf_proc_dump_write("iobuf_pool", "%p", iobuf_pool);
+ gf_proc_dump_write("iobuf_pool.default_page_size", "%" GF_PRI_SIZET,
+ iobuf_pool->default_page_size);
+ gf_proc_dump_write("iobuf_pool.arena_size", "%" GF_PRI_SIZET,
+ iobuf_pool->arena_size);
+ gf_proc_dump_write("iobuf_pool.arena_cnt", "%d", iobuf_pool->arena_cnt);
+ gf_proc_dump_write("iobuf_pool.request_misses", "%" PRId64,
+ iobuf_pool->request_misses);
+
+ for (j = 0; j < IOBUF_ARENA_MAX_INDEX; j++) {
+ list_for_each_entry(trav, &iobuf_pool->arenas[j], list)
+ {
+ snprintf(msg, sizeof(msg), "arena.%d", i);
+ gf_proc_dump_add_section("%s", msg);
+ iobuf_arena_info_dump(trav, msg);
+ i++;
}
- gf_proc_dump_add_section("iobuf.global");
- gf_proc_dump_write("iobuf_pool","%p", iobuf_pool);
- gf_proc_dump_write("iobuf_pool.default_page_size", "%d",
- iobuf_pool->default_page_size);
- gf_proc_dump_write("iobuf_pool.arena_size", "%d",
- iobuf_pool->arena_size);
- gf_proc_dump_write("iobuf_pool.arena_cnt", "%d",
- iobuf_pool->arena_cnt);
- gf_proc_dump_write("iobuf_pool.request_misses", "%"PRId64,
- iobuf_pool->request_misses);
-
- for (j = 0; j < IOBUF_ARENA_MAX_INDEX; j++) {
- list_for_each_entry (trav, &iobuf_pool->arenas[j], list) {
- snprintf(msg, sizeof(msg),
- "arena.%d", i);
- gf_proc_dump_add_section(msg);
- iobuf_arena_info_dump(trav,msg);
- i++;
- }
- list_for_each_entry (trav, &iobuf_pool->purge[j], list) {
- snprintf(msg, sizeof(msg),
- "purge.%d", i);
- gf_proc_dump_add_section(msg);
- iobuf_arena_info_dump(trav,msg);
- i++;
- }
- list_for_each_entry (trav, &iobuf_pool->filled[j], list) {
- snprintf(msg, sizeof(msg),
- "filled.%d", i);
- gf_proc_dump_add_section(msg);
- iobuf_arena_info_dump(trav,msg);
- i++;
- }
-
+ list_for_each_entry(trav, &iobuf_pool->purge[j], list)
+ {
+ snprintf(msg, sizeof(msg), "purge.%d", i);
+ gf_proc_dump_add_section("%s", msg);
+ iobuf_arena_info_dump(trav, msg);
+ i++;
}
+ list_for_each_entry(trav, &iobuf_pool->filled[j], list)
+ {
+ snprintf(msg, sizeof(msg), "filled.%d", i);
+ gf_proc_dump_add_section("%s", msg);
+ iobuf_arena_info_dump(trav, msg);
+ i++;
+ }
+ }
- pthread_mutex_unlock(&iobuf_pool->mutex);
+ pthread_mutex_unlock(&iobuf_pool->mutex);
out:
- return;
+ return;
}
-
void
iobuf_to_iovec(struct iobuf *iob, struct iovec *iov)
{
- GF_VALIDATE_OR_GOTO ("iobuf", iob, out);
- GF_VALIDATE_OR_GOTO ("iobuf", iov, out);
+ GF_VALIDATE_OR_GOTO("iobuf", iob, out);
+ GF_VALIDATE_OR_GOTO("iobuf", iov, out);
- iov->iov_base = iobuf_ptr (iob);
- iov->iov_len = iobuf_pagesize (iob);
+ iov->iov_base = iobuf_ptr(iob);
+ iov->iov_len = iobuf_pagesize(iob);
out:
- return;
+ return;
+}
+
+int
+iobuf_copy(struct iobuf_pool *iobuf_pool, const struct iovec *iovec_src,
+ int iovcnt, struct iobref **iobref, struct iobuf **iobuf,
+ struct iovec *iov_dst)
+{
+ size_t size = -1;
+ int ret = 0;
+
+ size = iov_length(iovec_src, iovcnt);
+
+ *iobuf = iobuf_get2(iobuf_pool, size);
+ if (!(*iobuf)) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ *iobref = iobref_new();
+ if (!(*iobref)) {
+ iobuf_unref(*iobuf);
+ errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ ret = iobref_add(*iobref, *iobuf);
+ if (ret) {
+ iobuf_unref(*iobuf);
+ iobref_unref(*iobref);
+ errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ iov_unload(iobuf_ptr(*iobuf), iovec_src, iovcnt);
+
+ iov_dst->iov_base = iobuf_ptr(*iobuf);
+ iov_dst->iov_len = size;
+
+out:
+ return ret;
}
diff --git a/libglusterfs/src/iobuf.h b/libglusterfs/src/iobuf.h
deleted file mode 100644
index b9c2a380711..00000000000
--- a/libglusterfs/src/iobuf.h
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _IOBUF_H_
-#define _IOBUF_H_
-
-#include "list.h"
-#include "common-utils.h"
-#include <pthread.h>
-#include <sys/mman.h>
-#include <sys/uio.h>
-
-#define GF_VARIABLE_IOBUF_COUNT 32
-#define GF_IOBREF_IOBUF_COUNT 16
-
-/* Lets try to define the new anonymous mapping
- * flag, in case the system is still using the
- * now deprecated MAP_ANON flag.
- *
- * Also, this should ideally be in a centralized/common
- * header which can be used by other source files also.
- */
-#ifndef MAP_ANONYMOUS
-#define MAP_ANONYMOUS MAP_ANON
-#endif
-
-#define GF_ALIGN_BUF(ptr,bound) ((void *)((unsigned long)(ptr + bound - 1) & \
- (unsigned long)(~(bound - 1))))
-
-#define GF_IOBUF_ALIGN_SIZE 512
-
-/* one allocatable unit for the consumers of the IOBUF API */
-/* each unit hosts @page_size bytes of memory */
-struct iobuf;
-
-/* one region of memory MMAPed from the operating system */
-/* each region MMAPs @arena_size bytes of memory */
-/* each arena hosts @arena_size / @page_size IOBUFs */
-struct iobuf_arena;
-
-/* expandable and contractable pool of memory, internally broken into arenas */
-struct iobuf_pool;
-
-struct iobuf_init_config {
- size_t pagesize;
- int32_t num_pages;
-};
-
-struct iobuf {
- union {
- struct list_head list;
- struct {
- struct iobuf *next;
- struct iobuf *prev;
- };
- };
- struct iobuf_arena *iobuf_arena;
-
- gf_lock_t lock; /* for ->ptr and ->ref */
- int ref; /* 0 == passive, >0 == active */
-
- void *ptr; /* usable memory region by the consumer */
-
- void *free_ptr; /* in case of stdalloc, this is the
- one to be freed */
-};
-
-
-struct iobuf_arena {
- union {
- struct list_head list;
- struct {
- struct iobuf_arena *next;
- struct iobuf_arena *prev;
- };
- };
-
- size_t page_size; /* size of all iobufs in this arena */
- size_t arena_size; /* this is equal to
- (iobuf_pool->arena_size / page_size)
- * page_size */
- size_t page_count;
-
- struct iobuf_pool *iobuf_pool;
-
- void *mem_base;
- struct iobuf *iobufs; /* allocated iobufs list */
-
- int active_cnt;
- struct iobuf active; /* head node iobuf
- (unused by itself) */
- int passive_cnt;
- struct iobuf passive; /* head node iobuf
- (unused by itself) */
- uint64_t alloc_cnt; /* total allocs in this pool */
- int max_active; /* max active buffers at a given time */
-};
-
-
-struct iobuf_pool {
- pthread_mutex_t mutex;
- size_t arena_size; /* size of memory region in
- arena */
- size_t default_page_size; /* default size of iobuf */
-
- int arena_cnt;
- struct list_head arenas[GF_VARIABLE_IOBUF_COUNT];
- /* array of arenas. Each element of the array is a list of arenas
- holding iobufs of particular page_size */
-
- struct list_head filled[GF_VARIABLE_IOBUF_COUNT];
- /* array of arenas without free iobufs */
-
- struct list_head purge[GF_VARIABLE_IOBUF_COUNT];
- /* array of of arenas which can be purged */
-
- uint64_t request_misses; /* mostly the requests for higher
- value of iobufs */
-};
-
-
-struct iobuf_pool *iobuf_pool_new (void);
-void iobuf_pool_destroy (struct iobuf_pool *iobuf_pool);
-struct iobuf *iobuf_get (struct iobuf_pool *iobuf_pool);
-void iobuf_unref (struct iobuf *iobuf);
-struct iobuf *iobuf_ref (struct iobuf *iobuf);
-void iobuf_pool_destroy (struct iobuf_pool *iobuf_pool);
-void iobuf_to_iovec(struct iobuf *iob, struct iovec *iov);
-
-#define iobuf_ptr(iob) ((iob)->ptr)
-#define iobpool_default_pagesize(iobpool) ((iobpool)->default_page_size)
-#define iobuf_pagesize(iob) (iob->iobuf_arena->page_size)
-
-
-struct iobref {
- gf_lock_t lock;
- int ref;
- struct iobuf *iobrefs[GF_IOBREF_IOBUF_COUNT];
-};
-
-struct iobref *iobref_new ();
-struct iobref *iobref_ref (struct iobref *iobref);
-void iobref_unref (struct iobref *iobref);
-int iobref_add (struct iobref *iobref, struct iobuf *iobuf);
-int iobref_merge (struct iobref *to, struct iobref *from);
-
-
-size_t iobuf_size (struct iobuf *iobuf);
-size_t iobref_size (struct iobref *iobref);
-void iobuf_stats_dump (struct iobuf_pool *iobuf_pool);
-
-struct iobuf *
-iobuf_get2 (struct iobuf_pool *iobuf_pool, size_t page_size);
-#endif /* !_IOBUF_H_ */
diff --git a/libglusterfs/src/latency.c b/libglusterfs/src/latency.c
index 58e8b915852..ce4b0e8255d 100644
--- a/libglusterfs/src/latency.c
+++ b/libglusterfs/src/latency.c
@@ -8,167 +8,77 @@
cases as published by the Free Software Foundation.
*/
-
/*
* This file contains functions to support dumping of
* latencies of FOPs broken down by subvolumes.
*/
-#include "glusterfs.h"
-#include "stack.h"
-#include "xlator.h"
-#include "common-utils.h"
-#include "statedump.h"
-
+#include "glusterfs/glusterfs.h"
+#include "glusterfs/statedump.h"
-void
-gf_set_fop_from_fn_pointer (call_frame_t *frame, struct xlator_fops *fops, void *fn)
+gf_latency_t *
+gf_latency_new(size_t n)
{
- glusterfs_fop_t fop = -1;
+ int i = 0;
+ gf_latency_t *lat = NULL;
- if (fops->stat == fn)
- fop = GF_FOP_STAT;
- else if (fops->readlink == fn)
- fop = GF_FOP_READLINK;
- else if (fops->mknod == fn)
- fop = GF_FOP_MKNOD;
- else if (fops->mkdir == fn)
- fop = GF_FOP_MKDIR;
- else if (fops->unlink == fn)
- fop = GF_FOP_UNLINK;
- else if (fops->rmdir == fn)
- fop = GF_FOP_RMDIR;
- else if (fops->symlink == fn)
- fop = GF_FOP_SYMLINK;
- else if (fops->rename == fn)
- fop = GF_FOP_RENAME;
- else if (fops->link == fn)
- fop = GF_FOP_LINK;
- else if (fops->truncate == fn)
- fop = GF_FOP_TRUNCATE;
- else if (fops->open == fn)
- fop = GF_FOP_OPEN;
- else if (fops->readv == fn)
- fop = GF_FOP_READ;
- else if (fops->writev == fn)
- fop = GF_FOP_WRITE;
- else if (fops->statfs == fn)
- fop = GF_FOP_STATFS;
- else if (fops->flush == fn)
- fop = GF_FOP_FLUSH;
- else if (fops->fsync == fn)
- fop = GF_FOP_FSYNC;
- else if (fops->setxattr == fn)
- fop = GF_FOP_SETXATTR;
- else if (fops->getxattr == fn)
- fop = GF_FOP_GETXATTR;
- else if (fops->removexattr == fn)
- fop = GF_FOP_REMOVEXATTR;
- else if (fops->opendir == fn)
- fop = GF_FOP_OPENDIR;
- else if (fops->fsyncdir == fn)
- fop = GF_FOP_FSYNCDIR;
- else if (fops->access == fn)
- fop = GF_FOP_ACCESS;
- else if (fops->create == fn)
- fop = GF_FOP_CREATE;
- else if (fops->ftruncate == fn)
- fop = GF_FOP_FTRUNCATE;
- else if (fops->fstat == fn)
- fop = GF_FOP_FSTAT;
- else if (fops->lk == fn)
- fop = GF_FOP_LK;
- else if (fops->lookup == fn)
- fop = GF_FOP_LOOKUP;
- else if (fops->readdir == fn)
- fop = GF_FOP_READDIR;
- else if (fops->inodelk == fn)
- fop = GF_FOP_INODELK;
- else if (fops->finodelk == fn)
- fop = GF_FOP_FINODELK;
- else if (fops->entrylk == fn)
- fop = GF_FOP_ENTRYLK;
- else if (fops->fentrylk == fn)
- fop = GF_FOP_FENTRYLK;
- else if (fops->xattrop == fn)
- fop = GF_FOP_XATTROP;
- else if (fops->fxattrop == fn)
- fop = GF_FOP_FXATTROP;
- else if (fops->fgetxattr == fn)
- fop = GF_FOP_FGETXATTR;
- else if (fops->fsetxattr == fn)
- fop = GF_FOP_FSETXATTR;
- else if (fops->rchecksum == fn)
- fop = GF_FOP_RCHECKSUM;
- else if (fops->setattr == fn)
- fop = GF_FOP_SETATTR;
- else if (fops->fsetattr == fn)
- fop = GF_FOP_FSETATTR;
- else if (fops->readdirp == fn)
- fop = GF_FOP_READDIRP;
- else if (fops->getspec == fn)
- fop = GF_FOP_GETSPEC;
- else
- fop = -1;
+ lat = GF_MALLOC(n * sizeof(*lat), gf_common_mt_latency_t);
+ if (!lat)
+ return NULL;
- frame->op = fop;
+ for (i = 0; i < n; i++) {
+ gf_latency_reset(lat + i);
+ }
+ return lat;
}
-
void
-gf_update_latency (call_frame_t *frame)
+gf_latency_update(gf_latency_t *lat, struct timespec *begin,
+ struct timespec *end)
{
- double elapsed;
- struct timeval *begin, *end;
-
- fop_latency_t *lat;
+ if (!(begin->tv_sec && end->tv_sec)) {
+ /*Measure latency might have been enabled/disabled during the op*/
+ return;
+ }
- begin = &frame->begin;
- end = &frame->end;
+ double elapsed = gf_tsdiff(begin, end);
- elapsed = (end->tv_sec - begin->tv_sec) * 1e6
- + (end->tv_usec - begin->tv_usec);
+ if (lat->max < elapsed)
+ lat->max = elapsed;
- lat = &frame->this->latencies[frame->op];
+ if (lat->min > elapsed)
+ lat->min = elapsed;
- lat->total += elapsed;
- lat->count++;
- lat->mean = lat->mean + (elapsed - lat->mean) / lat->count;
+ lat->total += elapsed;
+ lat->count++;
}
-
void
-gf_proc_dump_latency_info (xlator_t *xl)
+gf_latency_reset(gf_latency_t *lat)
{
- char key_prefix[GF_DUMP_MAX_BUF_LEN];
- char key[GF_DUMP_MAX_BUF_LEN];
- int i;
-
- snprintf (key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.latency", xl->name);
- gf_proc_dump_add_section (key_prefix);
-
- for (i = 0; i < GF_FOP_MAXVALUE; i++) {
- gf_proc_dump_build_key (key, key_prefix, gf_fop_list[i]);
-
- gf_proc_dump_write (key, "%.03f,%"PRId64",%.03f",
- xl->latencies[i].mean,
- xl->latencies[i].count,
- xl->latencies[i].total);
- }
+ if (!lat)
+ return;
+ memset(lat, 0, sizeof(*lat));
+ lat->min = ULLONG_MAX;
+ /* make sure 'min' is set to high value, so it would be
+ properly set later */
}
-
void
-gf_latency_toggle (int signum)
+gf_frame_latency_update(call_frame_t *frame)
{
- glusterfs_ctx_t *ctx = NULL;
-
- ctx = glusterfs_ctx_get ();
-
- if (ctx) {
- ctx->measure_latency = !ctx->measure_latency;
- gf_log ("[core]", GF_LOG_INFO,
- "Latency measurement turned %s",
- ctx->measure_latency ? "on" : "off");
- }
+ gf_latency_t *lat;
+ /* Can happen mostly at initiator xlator, as STACK_WIND/UNWIND macros
+ set it right anyways for those frames */
+ if (!frame->op)
+ frame->op = frame->root->op;
+
+ if (frame->op < 0 || frame->op >= GF_FOP_MAXVALUE) {
+ gf_log("[core]", GF_LOG_WARNING, "Invalid frame op value: %d",
+ frame->op);
+ return;
+ }
+
+ lat = &frame->this->stats.interval.latencies[frame->op];
+ gf_latency_update(lat, &frame->begin, &frame->end);
}
diff --git a/libglusterfs/src/latency.h b/libglusterfs/src/latency.h
deleted file mode 100644
index 16c5994b008..00000000000
--- a/libglusterfs/src/latency.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef __LATENCY_H__
-#define __LATENCY_H__
-
-
-typedef struct fop_latency {
- uint64_t min; /* min time for the call (microseconds) */
- uint64_t max; /* max time for the call (microseconds) */
- double total; /* total time (microseconds) */
- double std; /* standard deviation */
- double mean; /* mean (microseconds) */
- uint64_t count;
-} fop_latency_t;
-
-void
-gf_latency_toggle (int signum);
-
-#endif /* __LATENCY_H__ */
diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym
new file mode 100644
index 00000000000..5f18cd56cbe
--- /dev/null
+++ b/libglusterfs/src/libglusterfs.sym
@@ -0,0 +1,1193 @@
+are_dicts_equal
+args_access_cbk_store
+args_access_store
+args_create_cbk_store
+args_create_store
+args_discard_cbk_store
+args_discard_store
+args_entrylk_cbk_store
+args_entrylk_store
+args_fallocate_cbk_store
+args_fallocate_store
+args_fentrylk_cbk_store
+args_fentrylk_store
+args_fgetxattr_cbk_store
+args_fgetxattr_store
+args_finodelk_cbk_store
+args_finodelk_store
+args_flush_cbk_store
+args_flush_store
+args_fremovexattr_cbk_store
+args_fremovexattr_store
+args_fsetattr_cbk_store
+args_fsetattr_store
+args_fsetxattr_cbk_store
+args_fsetxattr_store
+args_fstat_cbk_store
+args_fstat_store
+args_fsync_cbk_store
+args_fsyncdir_cbk_store
+args_fsyncdir_store
+args_fsync_store
+args_ftruncate_cbk_store
+args_ftruncate_store
+args_fxattrop_cbk_store
+args_fxattrop_store
+args_getxattr_cbk_store
+args_getxattr_store
+args_inodelk_cbk_store
+args_inodelk_store
+args_ipc_cbk_store
+args_lease_cbk_store
+args_lease_store
+args_link_cbk_store
+args_link_store
+args_lk_cbk_store
+args_lk_store
+args_lookup_cbk_store
+args_lookup_store
+args_mkdir_cbk_store
+args_mkdir_store
+args_mknod_cbk_store
+args_mknod_store
+args_open_cbk_store
+args_opendir_cbk_store
+args_opendir_store
+args_open_store
+args_rchecksum_cbk_store
+args_rchecksum_store
+args_readdir_cbk_store
+args_readdirp_cbk_store
+args_readdirp_store
+args_readdir_store
+args_readlink_cbk_store
+args_readlink_store
+args_readv_cbk_store
+args_readv_store
+args_removexattr_cbk_store
+args_removexattr_store
+args_rename_cbk_store
+args_rename_store
+args_rmdir_cbk_store
+args_rmdir_store
+args_seek_cbk_store
+args_seek_store
+args_setattr_cbk_store
+args_setattr_store
+args_setxattr_cbk_store
+args_setxattr_store
+args_stat_cbk_store
+args_statfs_cbk_store
+args_statfs_store
+args_stat_store
+args_symlink_cbk_store
+args_symlink_store
+args_truncate_cbk_store
+args_truncate_store
+args_unlink_cbk_store
+args_unlink_store
+args_writev_cbk_store
+args_writev_store
+args_xattrop_cbk_store
+args_xattrop_store
+args_zerofill_cbk_store
+args_zerofill_store
+args_copy_file_range_cbk_store
+args_copy_file_range_store
+bin_to_data
+call_resume
+call_resume_keep_stub
+call_resume_wind
+call_stack_set_groups
+call_stub_destroy
+call_unwind_error
+call_unwind_error_keep_stub
+client_ctx_del
+client_ctx_get
+client_ctx_set
+client_dump
+close_fds_except
+cluster_fop_success_fill
+cluster_fstat
+cluster_ftruncate
+cluster_fxattrop
+cluster_getxattr
+cluster_inodelk
+cluster_link
+cluster_lookup
+cluster_mkdir
+cluster_mknod
+cluster_open
+cluster_readlink
+cluster_replies_wipe
+cluster_rmdir
+cluster_setattr
+cluster_setxattr
+cluster_symlink
+cluster_tiebreaker_inodelk
+cluster_uninodelk
+cluster_unlink
+cluster_xattrop
+cluster_xattrop_cbk
+copy_opts_to_child
+create_frame
+data_copy
+data_destroy
+data_from_dynptr
+data_from_uint64
+data_ref
+data_to_bin
+data_to_int32
+data_to_int64
+data_to_ptr
+data_to_str
+data_to_uint16
+data_to_uint32
+data_to_uint64
+data_to_uint8
+data_to_iatt
+data_unref
+default_access
+default_access_cbk
+default_access_failure_cbk
+default_access_resume
+default_create
+default_create_cbk
+default_create_failure_cbk
+default_create_resume
+default_discard
+default_discard_cbk
+default_discard_failure_cbk
+default_discard_resume
+default_entrylk
+default_entrylk_cbk
+default_entrylk_failure_cbk
+default_entrylk_resume
+default_fallocate
+default_fallocate_cbk
+default_fallocate_failure_cbk
+default_fallocate_resume
+default_fentrylk
+default_fentrylk_cbk
+default_fentrylk_failure_cbk
+default_fentrylk_resume
+default_fgetxattr
+default_fgetxattr_cbk
+default_fgetxattr_failure_cbk
+default_fgetxattr_resume
+default_finodelk
+default_finodelk_cbk
+default_finodelk_failure_cbk
+default_finodelk_resume
+default_flush
+default_flush_cbk
+default_flush_failure_cbk
+default_flush_resume
+default_forget
+default_fremovexattr
+default_fremovexattr_cbk
+default_fremovexattr_failure_cbk
+default_fremovexattr_resume
+default_fsetattr
+default_fsetattr_cbk
+default_fsetattr_failure_cbk
+default_fsetattr_resume
+default_fsetxattr
+default_fsetxattr_cbk
+default_fsetxattr_failure_cbk
+default_fsetxattr_resume
+default_fstat
+default_fstat_cbk
+default_fstat_failure_cbk
+default_fstat_resume
+default_fsync
+default_fsync_cbk
+default_fsyncdir
+default_fsyncdir_cbk
+default_fsyncdir_failure_cbk
+default_fsyncdir_resume
+default_fsync_failure_cbk
+default_fsync_resume
+default_ftruncate
+default_ftruncate_cbk
+default_ftruncate_failure_cbk
+default_ftruncate_resume
+default_fxattrop
+default_fxattrop_cbk
+default_fxattrop_failure_cbk
+default_fxattrop_resume
+default_getactivelk
+default_getactivelk_failure_cbk
+default_getactivelk_resume
+default_getspec
+default_getspec_cbk
+default_getxattr
+default_getxattr_cbk
+default_getxattr_failure_cbk
+default_getxattr_resume
+default_inodelk
+default_inodelk_cbk
+default_inodelk_failure_cbk
+default_inodelk_resume
+default_ipc
+default_ipc_cbk
+default_lease
+default_lease_failure_cbk
+default_lease_resume
+default_link
+default_link_cbk
+default_link_failure_cbk
+default_link_resume
+default_lk
+default_lk_cbk
+default_lk_failure_cbk
+default_lk_resume
+default_lookup
+default_lookup_cbk
+default_lookup_failure_cbk
+default_lookup_resume
+default_mem_acct_init
+default_mkdir
+default_mkdir_cbk
+default_mkdir_failure_cbk
+default_mkdir_resume
+default_mknod
+default_mknod_cbk
+default_mknod_failure_cbk
+default_mknod_resume
+default_notify
+default_open
+default_open_cbk
+default_opendir
+default_opendir_cbk
+default_opendir_failure_cbk
+default_opendir_resume
+default_open_failure_cbk
+default_open_resume
+default_rchecksum
+default_rchecksum_cbk
+default_rchecksum_failure_cbk
+default_rchecksum_resume
+default_readdir
+default_readdir_cbk
+default_readdir_failure_cbk
+default_readdirp
+default_readdirp_cbk
+default_readdirp_failure_cbk
+default_readdirp_resume
+default_readdir_resume
+default_readlink
+default_readlink_cbk
+default_readlink_failure_cbk
+default_readlink_resume
+default_readv
+default_readv_cbk
+default_readv_failure_cbk
+default_readv_resume
+default_release
+default_releasedir
+default_removexattr
+default_removexattr_cbk
+default_removexattr_failure_cbk
+default_removexattr_resume
+default_rename
+default_rename_cbk
+default_rename_failure_cbk
+default_rename_resume
+default_rmdir
+default_rmdir_cbk
+default_rmdir_failure_cbk
+default_rmdir_resume
+default_seek
+default_seek_cbk
+default_seek_failure_cbk
+default_seek_resume
+default_setactivelk
+default_setactivelk_failure_cbk
+default_setactivelk_resume
+default_setattr
+default_setattr_cbk
+default_setattr_failure_cbk
+default_setattr_resume
+default_setxattr
+default_setxattr_cbk
+default_setxattr_failure_cbk
+default_setxattr_resume
+default_stat
+default_stat_cbk
+default_stat_failure_cbk
+default_statfs
+default_statfs_cbk
+default_statfs_failure_cbk
+default_statfs_resume
+default_stat_resume
+default_symlink
+default_symlink_cbk
+default_symlink_failure_cbk
+default_symlink_resume
+default_truncate
+default_truncate_cbk
+default_truncate_failure_cbk
+default_truncate_resume
+default_unlink
+default_unlink_cbk
+default_unlink_failure_cbk
+default_unlink_resume
+default_writev
+default_writev_cbk
+default_writev_failure_cbk
+default_writev_resume
+default_xattrop
+default_xattrop_cbk
+default_xattrop_failure_cbk
+default_xattrop_resume
+default_zerofill
+default_zerofill_cbk
+default_zerofill_failure_cbk
+default_zerofill_resume
+default_put
+default_put_cbk
+default_put_failure_cbk
+default_put_resume
+default_copy_file_range
+default_copy_file_range_cbk
+default_copy_file_range_failure_cbk
+default_copy_file_range_resume
+dht_is_linkfile
+dict_add
+dict_addn
+dict_add_dynstr_with_alloc
+dict_allocate_and_serialize
+dict_copy
+dict_copy_with_ref
+dict_del
+dict_deln
+dict_dump_to_statedump
+dict_dump_to_str
+dict_dump_to_log
+dict_foreach
+dict_foreach_fnmatch
+dict_foreach_match
+dict_for_key_value
+dict_get
+dict_getn
+dict_get_bin
+dict_get_double
+dict_get_gfuuid
+dict_get_iatt
+dict_get_mdata
+dict_get_int16
+dict_get_int32
+dict_get_int32n
+dict_get_int64
+dict_get_int8
+dict_get_ptr
+dict_get_ptr_and_len
+dict_get_str
+dict_get_strn
+dict_get_str_boolean
+dict_get_uint32
+dict_get_uint64
+dict_get_with_ref
+dict_has_key_from_array
+dict_key_count
+dict_keys_join
+dict_lookup
+dict_new
+dict_null_foreach_fn
+dict_ref
+dict_remove_foreach_fn
+dict_rename_key
+dict_reset
+dict_serialize
+dict_serialized_length
+dict_serialized_length_lk
+dict_serialize_value_with_delim
+dict_set
+dict_setn
+dict_set_bin
+dict_set_double
+dict_set_dynptr
+dict_set_dynstr
+dict_set_dynstrn
+dict_set_dynstr_with_alloc
+dict_set_gfuuid
+dict_set_iatt
+dict_set_mdata
+dict_set_int16
+dict_set_int32
+dict_set_int32n
+dict_set_int64
+dict_set_int8
+dict_set_static_bin
+dict_set_static_ptr
+dict_set_str
+dict_set_strn
+dict_setn_nstrn
+dict_set_nstrn
+dict_set_uint32
+dict_set_uint64
+dict_set_flag
+dict_clear_flag
+dict_check_flag
+dict_unref
+dict_unserialize
+drop_token
+eh_destroy
+eh_dump
+eh_new
+eh_save_history
+entry_copy
+gf_event_dispatch
+gf_event_dispatch_destroy
+gf_event_handled
+gf_event_pool_destroy
+gf_event_pool_new
+gf_event_reconfigure_threads
+gf_event_register
+gf_event_select_on
+gf_event_unregister
+gf_event_unregister_close
+fd_anonymous
+fd_anonymous_with_flags
+fd_bind
+fd_close
+fd_create
+fd_create_uint64
+__fd_ctx_del
+fd_ctx_del
+fd_ctx_dump
+__fd_ctx_get
+fd_ctx_get
+__fd_ctx_set
+fd_ctx_set
+fd_is_anonymous
+fd_list_empty
+fd_lk_ctx_empty
+fd_lk_ctx_ref
+fd_lk_ctx_unref
+fd_lk_insert_and_merge
+fd_lookup
+fd_lookup_anonymous
+fd_lookup_uint64
+__fd_ref
+fd_ref
+fd_unref
+_fini
+fop_access_stub
+fop_create_stub
+fop_copy_file_range_stub
+fop_copy_file_range_cbk_stub
+fop_discard_stub
+fop_entrylk_stub
+fop_enum_to_pri_string
+fop_fallocate_stub
+fop_fentrylk_stub
+fop_fgetxattr_stub
+fop_finodelk_stub
+fop_flush_stub
+fop_fremovexattr_cbk_stub
+fop_fremovexattr_stub
+fop_fsetattr_stub
+fop_fsetxattr_cbk_stub
+fop_fsetxattr_stub
+fop_fstat_stub
+fop_fsync_cbk_stub
+fop_fsyncdir_stub
+fop_fsync_stub
+fop_ftruncate_cbk_stub
+fop_ftruncate_stub
+fop_fxattrop_stub
+fop_getactivelk_stub
+fop_getxattr_stub
+fop_icreate_stub
+fop_inodelk_stub
+fop_ipc_stub
+fop_lease_stub
+fop_link_stub
+fop_lk_stub
+fop_log_level
+fop_lookup_cbk_stub
+fop_lookup_stub
+fop_mkdir_stub
+fop_mknod_stub
+fop_namelink_stub
+fop_opendir_stub
+fop_open_stub
+fop_put_stub
+fop_rchecksum_stub
+fop_readdirp_stub
+fop_readdir_stub
+fop_readlink_stub
+fop_readv_stub
+fop_removexattr_cbk_stub
+fop_removexattr_stub
+fop_rename_cbk_stub
+fop_rename_stub
+fop_rmdir_cbk_stub
+fop_rmdir_stub
+fop_seek_stub
+fop_setactivelk_stub
+fop_setattr_stub
+fop_setxattr_stub
+fop_statfs_stub
+fop_stat_stub
+fop_symlink_stub
+fop_truncate_cbk_stub
+fop_truncate_stub
+fop_unlink_cbk_stub
+fop_unlink_stub
+fop_writev_cbk_stub
+fop_writev_stub
+fop_xattrop_stub
+fop_zerofill_stub
+generate_glusterfs_ctx_id
+get_checksum_for_file
+get_checksum_for_path
+get_file_mtime
+get_host_name
+get_mem_size
+get_path_name
+get_xlator_by_name
+get_xlator_by_type
+gf_array_insertionsort
+gf_asprintf
+gf_async
+gf_async_adjust_threads
+gf_async_ctrl
+gf_async_init
+gf_async_fini
+gf_backtrace_save
+gf_bits_count
+gf_bits_index
+gf_build_absolute_path
+__gf_calloc
+gf_canonicalize_path
+gf_check_log_format
+gf_check_logger
+gf_client_disconnect
+gf_client_dump_fdtables
+gf_client_dump_fdtables_to_dict
+gf_client_dump_inodes
+gf_client_dump_inodes_to_dict
+gf_client_get
+gf_client_put
+gf_client_ref
+gf_clienttable_alloc
+gf_client_unref
+gf_cmd_log
+gf_cmd_log_init
+gf_compare_sockaddr
+gf_deitransform
+gf_dirent_entry_free
+gf_dirent_for_name
+gf_dirent_free
+gf_dirent_orig_offset
+gf_dm_hashfn
+gf_dnscache_init
+gf_dnscache_deinit
+gf_errno_to_error
+gf_error_to_errno
+_gf_event
+gf_fd_fdptr_get
+gf_fd_fdtable_alloc
+gf_fd_fdtable_copy_all_fds
+gf_fd_fdtable_destroy
+gf_fd_fdtable_get_all_fds
+gf_fdptr_put
+gf_fd_put
+gf_fd_unused_get
+gf_fill_iatt_for_dirent
+gf_fop_int
+gf_fop_string
+__gf_free
+gf_free_mig_locks
+gf_getgrouplist
+gf_get_hostname_from_ip
+gf_get_index_by_elem
+gf_global_mem_acct_enable_set
+gfid_to_ino
+gf_inode_type_to_str
+gf_is_ip_in_net
+gf_is_local_addr
+gf_is_same_address
+gf_is_service_running
+gf_is_str_int
+gf_is_valid_xattr_namespace
+gf_is_zero_filled_stat
+gf_itransform
+gf_link_inodes_from_dirent
+_gf_log
+_gf_log_callingfn
+gf_log_disable_suppression_before_exit
+gf_log_dump_graph
+_gf_log_eh
+gf_log_fini
+gf_log_get_localtime
+gf_log_get_loglevel
+gf_log_globals_init
+gf_log_init
+gf_log_inject_timer_event
+gf_log_logrotate
+gf_log_set_localtime
+gf_log_set_log_buf_size
+gf_log_set_log_flush_timeout
+gf_log_set_logformat
+gf_log_set_logger
+gf_log_set_loglevel
+gf_lstat_dir
+__gf_malloc
+gf_mem_acct_enable_set
+gf_monitor_metrics
+_gf_msg
+_gf_msg_nomem
+gf_nwrite
+gf_path_strip_trailing_slashes
+gf_print_trace
+gf_proc_dump_add_section
+gf_proc_dump_info
+gf_proc_dump_init
+gf_proc_dump_mallinfo
+gf_proc_dump_mem_info
+gf_proc_dump_mem_info_to_dict
+gf_proc_dump_mempool_info
+gf_proc_dump_mempool_info_to_dict
+gf_proc_dump_pending_frames
+gf_proc_dump_pending_frames_to_dict
+gf_proc_dump_write
+gf_proc_dump_xlator_history
+gf_proc_dump_xlator_meminfo
+gf_proc_dump_xlator_private
+gf_proc_dump_xlator_profile
+gf_process_getspec_servers_list
+gf_process_reserved_ports
+__gf_realloc
+_gf_ref_get
+_gf_ref_init
+_gf_ref_put
+gf_resolve_ip6
+gf_resolve_path_parent
+gf_rev_dns_lookup
+gf_rev_dns_lookup_cached
+gf_rsync_strong_checksum
+gf_rsync_md5_checksum
+gf_rsync_weak_checksum
+gf_set_log_file_path
+gf_set_log_ident
+gf_set_timestamp
+gf_set_volfile_server_common
+_gf_smsg
+gf_sock_union_equal_addr
+gf_store_handle_create_on_absence
+gf_store_handle_destroy
+gf_store_handle_new
+gf_store_handle_retrieve
+gf_store_iter_destroy
+gf_store_iter_get_matching
+gf_store_iter_get_next
+gf_store_iter_new
+gf_store_lock
+gf_store_locked_local
+gf_store_mkdir
+gf_store_mkstemp
+gf_store_read_and_tokenize
+gf_store_rename_tmppath
+gf_store_retrieve_value
+gf_store_save_value
+gf_store_save_items
+gf_store_unlink_tmppath
+gf_store_unlock
+gf_string2boolean
+gf_string2bytesize_int64
+gf_string2bytesize_uint64
+gf_string2double
+gf_string2int
+gf_string2int32
+gf_string2percent
+gf_string2time
+gf_string2uint
+gf_string2uint32
+gf_string2uint64
+gf_string2uint_base10
+gf_strip_whitespace
+gf_strncpy
+gf_strTrim
+gf_strstr
+gf_thread_cleanup_xint
+gf_thread_create
+gf_thread_vcreate
+gf_thread_create_detached
+gf_thread_set_name
+gf_thread_set_vname
+gf_timer_call_after
+gf_timer_call_cancel
+gf_timer_registry_destroy
+_gf_timestuff
+gf_trim
+gf_tw_add_timer
+gf_tw_del_timer
+gf_tw_mod_timer
+gf_tw_mod_timer_pending
+gf_uint64_2human_readable
+gf_umount_lazy
+gf_update_latency
+gf_uuid_clear
+gf_uuid_compare
+gf_uuid_copy
+gf_uuid_is_null
+gf_uuid_generate
+gf_uuid_parse
+gf_uuid_unparse
+gf_valid_pid
+gf_vasprintf
+gf_volfile_reconfigure
+gf_xxh64_wrapper
+gf_zero_fill_stat
+gid_cache_add
+gid_cache_init
+gid_cache_lookup
+gid_cache_reconf
+gid_cache_release
+glusterd_check_log_level
+glusterfs_compute_sha256
+glusterfs_ctx_new
+glusterfs_ctx_tw_get
+glusterfs_ctx_tw_put
+glusterfs_delete_volfile_checksum
+glusterfs_globals_init
+glusterfs_graph_activate
+glusterfs_graph_attach
+glusterfs_graph_construct
+glusterfs_graph_deactivate
+glusterfs_graph_destroy
+glusterfs_graph_destroy_residual
+glusterfs_graph_prepare
+glusterfs_read_secure_access_file
+glusterfs_graph_print_file
+glusterfs_graph_set_first
+glusterfs_is_local_pathinfo
+glusterfs_leaf_position
+glusterfs_reachable_leaves
+__glusterfs_this_location
+glusterfs_this_set
+glusterfs_volfile_reconfigure
+glusterfs_xlator_link
+graph_reconf_validateopt
+_init
+inode_ctx_del2
+__inode_ctx_get0
+inode_ctx_get0
+__inode_ctx_get1
+inode_ctx_get1
+__inode_ctx_get2
+inode_ctx_get2
+inode_ctx_merge
+inode_ctx_reset0
+inode_ctx_reset1
+inode_ctx_reset2
+__inode_ctx_set0
+inode_ctx_set0
+__inode_ctx_set1
+inode_ctx_set1
+__inode_ctx_set2
+inode_ctx_set2
+inode_ctx_size
+inode_dump
+inode_dump_to_dict
+__inode_find
+inode_find
+inode_find_directory_name
+inode_forget
+inode_forget_with_unref
+inode_from_path
+inode_grep
+inode_grep_for_gfid
+inode_has_dentry
+inode_invalidate
+inode_is_linked
+inode_link
+inode_lookup
+inode_needs_lookup
+inode_new
+inode_parent
+__inode_path
+inode_path
+inode_ref
+inode_ref_reduce_by_n
+inode_rename
+inode_resolve
+inode_set_need_lookup
+inode_table_ctx_free
+inode_table_destroy
+inode_table_destroy_all
+inode_table_dump
+inode_table_dump_to_dict
+inode_table_new
+inode_table_with_invalidator
+__inode_table_set_lru_limit
+inode_table_set_lru_limit
+inode_unlink
+inode_unref
+int_to_data
+iobref_add
+iobref_clear
+iobref_merge
+iobref_new
+iobref_ref
+iobref_size
+iobref_unref
+iobuf_get
+iobuf_get2
+iobuf_get_page_aligned
+iobuf_pool_destroy
+iobuf_pool_new
+iobuf_size
+iobuf_to_iovec
+iobuf_unref
+iobuf_copy
+is_data_equal
+__is_fuse_call
+is_gf_log_command
+is_graph_topology_equal
+__is_root_gfid
+is_valid_lease_id
+leaseid_utoa
+gf_existing_leaseid
+gf_leaseid_get
+list_node_add
+list_node_add_order
+list_node_del
+lkowner_utoa
+loc_copy
+loc_copy_overload_parent
+loc_gfid
+loc_gfid_utoa
+loc_is_nameless
+loc_is_root
+loc_pargfid
+loc_path
+loc_touchup
+loc_wipe
+log_base2
+_mask_cancellation
+mask_match
+mem_get
+mem_get0
+mem_pool_destroy
+mem_pool_new_fn
+mem_pools_fini
+mem_pools_init
+mem_put
+mkdir_p
+next_token
+nwstrtail
+os_daemon
+os_daemon_return
+parser_deinit
+parser_get_next_match
+parser_init
+parser_set_string
+parser_unset_string
+quota_conf_read_gfid
+quota_conf_read_version
+quota_conf_skip_header
+quota_data_to_meta
+quota_dict_get_inode_meta
+quota_dict_get_meta
+quota_dict_set_meta
+quota_meta_is_null
+rb_create
+rb_delete
+rb_destroy
+rb_find
+rb_probe
+rbthash_get
+rbthash_insert
+rbthash_remove
+rbthash_table_destroy
+rbthash_table_init
+rbuf_dtor
+rbuf_get_buffer
+rbuf_init
+rbuf_reserve_write_area
+rbuf_wait_for_completion
+rbuf_write_complete
+recursive_rmdir
+runcmd
+runinit
+runner
+runner_add_arg
+runner_add_args
+runner_argprintf
+runner_chio
+runner_end
+runner_log
+runner_redir
+runner_run
+runner_run_nowait
+runner_run_reuse
+runner_start
+set_sys_log_level
+skipwhite
+strfd_close
+strfd_open
+strprintf
+strtail
+str_to_data
+SuperFastHash
+syncbarrier_destroy
+syncbarrier_init
+syncbarrier_wait
+syncbarrier_wake
+synccond_init
+synccond_destroy
+synccond_wait
+synccond_timedwait
+synccond_signal
+synccond_broadcast
+syncenv_destroy
+syncenv_new
+synclock_destroy
+synclock_init
+synclock_lock
+synclock_trylock
+synclock_unlock
+syncop_access
+syncop_close
+syncop_create
+syncop_copy_file_range
+syncopctx_getctx
+syncopctx_setfsgid
+syncopctx_setfsgroups
+syncopctx_setfslkowner
+syncopctx_setfspid
+syncopctx_setfsuid
+syncop_dirfd
+syncop_dir_scan
+syncop_discard
+syncop_fallocate
+syncop_flush
+syncop_fgetxattr
+syncop_fremovexattr
+syncop_fsetattr
+syncop_fsetxattr
+syncop_fstat
+syncop_fsync
+syncop_fsyncdir
+syncop_ftruncate
+syncop_ftw
+syncop_ftw_throttle
+syncop_fxattrop
+syncop_getactivelk
+syncop_getxattr
+syncop_gfid_to_path
+syncop_gfid_to_path_hard
+syncop_inode_find
+syncop_inodelk
+syncop_entrylk
+syncop_ipc
+syncop_is_subvol_local
+syncop_link
+syncop_listxattr
+syncop_lk
+syncop_lookup
+syncop_mkdir
+syncop_mknod
+syncop_mt_dir_scan
+syncop_open
+syncop_opendir
+syncop_readdir
+syncop_readdirp
+syncop_readlink
+syncop_readv
+syncop_removexattr
+syncop_rename
+syncop_rmdir
+syncop_seek
+syncop_setactivelk
+syncop_setattr
+syncop_setxattr
+syncop_stat
+syncop_statfs
+syncop_symlink
+syncop_truncate
+syncop_unlink
+syncop_write
+syncop_writev
+syncop_xattrop
+syncop_zerofill
+syncop_lease
+synctask_get
+synctask_new
+synctask_new1
+synctask_set
+synctask_setid
+synctask_sleep
+synctask_wake
+synctask_yield
+sys_access
+sys_chmod
+sys_chown
+sys_close
+sys_closedir
+sys_copy_file_range
+sys_creat
+sys_fallocate
+sys_fchmod
+sys_fchown
+sys_fdatasync
+sys_fgetxattr
+sys_flistxattr
+sys_fremovexattr
+sys_fsetxattr
+sys_fstat
+sys_fstatat
+sys_fsync
+sys_ftruncate
+sys_futimes
+sys_lchown
+sys_lgetxattr
+sys_link
+sys_linkat
+sys_llistxattr
+sys_lremovexattr
+sys_lseek
+sys_lsetxattr
+sys_lstat
+sys_mkdir
+sys_mkdirat
+sys_mknod
+sys_open
+sys_openat
+sys_opendir
+sys_pread
+sys_pwrite
+sys_pwritev
+sys_read
+sys_readdir
+sys_readlink
+sys_readv
+sys_rename
+sys_rmdir
+sys_stat
+sys_statvfs
+sys_symlink
+sys_symlinkat
+sys_truncate
+sys_unlink
+sys_unlinkat
+sys_utimensat
+sys_write
+sys_writev
+sys_socket
+sys_accept
+sys_kill
+sys_sysctl
+tbf_init
+tbf_throttle
+timespec_now
+timespec_now_realtime
+timespec_sub
+timespec_adjust_delta
+timespec_cmp
+token_iter_init
+trap
+trie_add
+trie_destroy
+trie_measure
+trie_measure_vec
+trie_new
+trienode_get_word
+_unmask_cancellation
+uuid_utoa
+uuid_utoa_r
+validate_brick_name
+valid_host_name
+valid_ipv4_address
+valid_internet_address
+xlator_destroy
+xlator_foreach
+xlator_foreach_depth_first
+xlator_init
+xlator_mem_acct_init
+xlator_mem_acct_unref
+xlator_notify
+xlator_option_info_list
+xlator_option_init_bool
+xlator_option_init_double
+xlator_option_init_int32
+xlator_option_init_path
+xlator_option_init_percent
+xlator_option_init_percent_or_size
+xlator_option_init_size
+xlator_option_init_size_uint64
+xlator_option_init_size_int64
+xlator_option_init_str
+xlator_option_init_time
+xlator_option_init_uint32
+xlator_option_init_uint64
+xlator_option_init_int64
+xlator_option_init_xlator
+xlator_option_reconf_bool
+xlator_option_reconf_int32
+xlator_option_reconf_path
+xlator_option_reconf_percent
+xlator_option_reconf_percent_or_size
+xlator_option_reconf_size
+xlator_option_reconf_size_uint64
+xlator_option_reconf_size_int64
+xlator_option_reconf_str
+xlator_option_reconf_time
+xlator_option_reconf_uint32
+xlator_option_reconf_uint64
+xlator_option_reconf_int64
+xlator_option_reconf_xlator
+xlator_options_validate
+xlator_options_validate_list
+xlator_option_validate
+xlator_option_validate_addr_list
+xlator_search_by_name
+xlator_set_inode_lru_limit
+xlator_set_type
+xlator_set_type_virtual
+xlator_subvolume_count
+xlator_tree_free_members
+xlator_volopt_dynload
+xlator_volume_option_get
+xlator_volume_option_get_list
+xlator_memrec_free
+xlator_mem_cleanup
+gluster_graph_take_reference
+default_fops
+gf_fop_list
+gf_upcall_list
+vol_type_str
+global_ctx
+global_ctx_mutex
+global_xlator
+use_spinlocks
+dump_options
+glusterfs_leaseid_buf_get
+glusterfs_leaseid_exist
+gf_replace_old_iatt_in_dict
+gf_replace_new_iatt_in_dict
+gf_changelog_init
+gf_changelog_register_generic
+gf_gfid_generate_from_xxh64
+find_xlator_option_in_cmd_args_t
+gf_d_type_from_ia_type
+glusterfs_graph_fini
+glusterfs_process_svc_attach_volfp
+glusterfs_mux_volfile_reconfigure
+glusterfs_process_svc_detach
+mgmt_is_multiplexed_daemon
+xlator_is_cleanup_starting
+gf_nanosleep
+gf_syncfs
+graph_total_client_xlator
+get_xattrs_to_heal
+gf_latency_statedump_and_reset
+gf_latency_new
+gf_latency_reset
+gf_latency_update
+gf_frame_latency_update
diff --git a/libglusterfs/src/list.h b/libglusterfs/src/list.h
deleted file mode 100644
index 35fccdf25a5..00000000000
--- a/libglusterfs/src/list.h
+++ /dev/null
@@ -1,178 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _LLIST_H
-#define _LLIST_H
-
-
-struct list_head {
- struct list_head *next;
- struct list_head *prev;
-};
-
-
-#define INIT_LIST_HEAD(head) do { \
- (head)->next = (head)->prev = head; \
- } while (0)
-
-
-static inline void
-list_add (struct list_head *new, struct list_head *head)
-{
- new->prev = head;
- new->next = head->next;
-
- new->prev->next = new;
- new->next->prev = new;
-}
-
-
-static inline void
-list_add_tail (struct list_head *new, struct list_head *head)
-{
- new->next = head;
- new->prev = head->prev;
-
- new->prev->next = new;
- new->next->prev = new;
-}
-
-
-static inline void
-list_del (struct list_head *old)
-{
- old->prev->next = old->next;
- old->next->prev = old->prev;
-
- old->next = (void *)0xbabebabe;
- old->prev = (void *)0xcafecafe;
-}
-
-
-static inline void
-list_del_init (struct list_head *old)
-{
- old->prev->next = old->next;
- old->next->prev = old->prev;
-
- old->next = old;
- old->prev = old;
-}
-
-
-static inline void
-list_move (struct list_head *list, struct list_head *head)
-{
- list_del (list);
- list_add (list, head);
-}
-
-
-static inline void
-list_move_tail (struct list_head *list, struct list_head *head)
-{
- list_del (list);
- list_add_tail (list, head);
-}
-
-
-static inline int
-list_empty (struct list_head *head)
-{
- return (head->next == head);
-}
-
-
-static inline void
-__list_splice (struct list_head *list, struct list_head *head)
-{
- (list->prev)->next = (head->next);
- (head->next)->prev = (list->prev);
-
- (head)->next = (list->next);
- (list->next)->prev = (head);
-}
-
-
-static inline void
-list_splice (struct list_head *list, struct list_head *head)
-{
- if (list_empty (list))
- return;
-
- __list_splice (list, head);
-}
-
-
-/* Splice moves @list to the head of the list at @head. */
-static inline void
-list_splice_init (struct list_head *list, struct list_head *head)
-{
- if (list_empty (list))
- return;
-
- __list_splice (list, head);
- INIT_LIST_HEAD (list);
-}
-
-
-static inline void
-__list_append (struct list_head *list, struct list_head *head)
-{
- (head->prev)->next = (list->next);
- (list->next)->prev = (head->prev);
- (head->prev) = (list->prev);
- (list->prev)->next = head;
-}
-
-
-static inline void
-list_append (struct list_head *list, struct list_head *head)
-{
- if (list_empty (list))
- return;
-
- __list_append (list, head);
-}
-
-
-/* Append moves @list to the end of @head */
-static inline void
-list_append_init (struct list_head *list, struct list_head *head)
-{
- if (list_empty (list))
- return;
-
- __list_append (list, head);
- INIT_LIST_HEAD (list);
-}
-
-
-#define list_entry(ptr, type, member) \
- ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
-
-
-#define list_for_each(pos, head) \
- for (pos = (head)->next; pos != (head); pos = pos->next)
-
-
-#define list_for_each_entry(pos, head, member) \
- for (pos = list_entry((head)->next, typeof(*pos), member); \
- &pos->member != (head); \
- pos = list_entry(pos->member.next, typeof(*pos), member))
-
-
-#define list_for_each_entry_safe(pos, n, head, member) \
- for (pos = list_entry((head)->next, typeof(*pos), member), \
- n = list_entry(pos->member.next, typeof(*pos), member); \
- &pos->member != (head); \
- pos = n, n = list_entry(n->member.next, typeof(*n), member))
-
-#endif /* _LLIST_H */
diff --git a/libglusterfs/src/lkowner.h b/libglusterfs/src/lkowner.h
deleted file mode 100644
index 969d13e5044..00000000000
--- a/libglusterfs/src/lkowner.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _LK_OWNER_H
-#define _LK_OWNER_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#define GF_MAX_LOCK_OWNER_LEN 1024 /* 1kB as per NLM */
-
-/* 16strings-16strings-... */
-#define GF_LKOWNER_BUF_SIZE ((GF_MAX_LOCK_OWNER_LEN * 2) + \
- (GF_MAX_LOCK_OWNER_LEN / 8))
-
-typedef struct gf_lkowner_ {
- int len;
- char data[GF_MAX_LOCK_OWNER_LEN];
-} gf_lkowner_t;
-
-
-/* LKOWNER to string functions */
-static inline void
-lkowner_unparse (gf_lkowner_t *lkowner, char *buf, int buf_len)
-{
- int i = 0;
- int j = 0;
-
- for (i = 0; i < lkowner->len; i++) {
- if (i && !(i % 8)) {
- buf[j] = '-';
- j++;
- }
- sprintf (&buf[j], "%02hhx", lkowner->data[i]);
- j += 2;
- if (j == buf_len)
- break;
- }
- if (j < buf_len)
- buf[j] = '\0';
-}
-
-static inline void
-set_lk_owner_from_ptr (gf_lkowner_t *lkowner, void *data)
-{
- int i = 0;
- int j = 0;
-
- lkowner->len = sizeof (unsigned long);
- for (i = 0, j = 0; i < lkowner->len; i++, j += 8) {
- lkowner->data[i] = (char)((((unsigned long)data) >> j) & 0xff);
- }
-}
-
-static inline void
-set_lk_owner_from_uint64 (gf_lkowner_t *lkowner, uint64_t data)
-{
- int i = 0;
- int j = 0;
-
- lkowner->len = 8;
- for (i = 0, j = 0; i < lkowner->len; i++, j += 8) {
- lkowner->data[i] = (char)((data >> j) & 0xff);
- }
-}
-
-/* Return true if the locks have the same owner */
-static inline int
-is_same_lkowner (gf_lkowner_t *l1, gf_lkowner_t *l2)
-{
- return ((l1->len == l2->len) && !memcmp(l1->data, l2->data, l1->len));
-}
-
-#endif /* _LK_OWNER_H */
diff --git a/libglusterfs/src/locking.c b/libglusterfs/src/locking.c
new file mode 100644
index 00000000000..7577054e33a
--- /dev/null
+++ b/libglusterfs/src/locking.c
@@ -0,0 +1,27 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#if defined(HAVE_SPINLOCK)
+/* None of this matters otherwise. */
+
+#include <pthread.h>
+#include <unistd.h>
+
+#define LOCKING_IMPL
+#include "glusterfs/locking.h"
+
+int use_spinlocks = 0;
+
+static void __attribute__((constructor)) gf_lock_setup(void)
+{
+ // use_spinlocks = (sysconf(_SC_NPROCESSORS_ONLN) > 1);
+}
+
+#endif
diff --git a/libglusterfs/src/locking.h b/libglusterfs/src/locking.h
deleted file mode 100644
index 79c6992af0b..00000000000
--- a/libglusterfs/src/locking.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _LOCKING_H
-#define _LOCKING_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <pthread.h>
-
-#if HAVE_SPINLOCK
-#define LOCK_INIT(x) pthread_spin_init (x, 0)
-#define LOCK(x) pthread_spin_lock (x)
-#define TRY_LOCK(x) pthread_spin_trylock (x)
-#define UNLOCK(x) pthread_spin_unlock (x)
-#define LOCK_DESTROY(x) pthread_spin_destroy (x)
-
-typedef pthread_spinlock_t gf_lock_t;
-#else
-#define LOCK_INIT(x) pthread_mutex_init (x, 0)
-#define LOCK(x) pthread_mutex_lock (x)
-#define TRY_LOCK(x) pthread_mutex_trylock (x)
-#define UNLOCK(x) pthread_mutex_unlock (x)
-#define LOCK_DESTROY(x) pthread_mutex_destroy (x)
-
-typedef pthread_mutex_t gf_lock_t;
-#endif /* HAVE_SPINLOCK */
-
-
-#endif /* _LOCKING_H */
diff --git a/libglusterfs/src/logging.c b/libglusterfs/src/logging.c
index c47237f829e..a930d3e3b63 100644
--- a/libglusterfs/src/logging.c
+++ b/libglusterfs/src/logging.c
@@ -8,11 +8,6 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include <errno.h>
#include <pthread.h>
#include <stdio.h>
@@ -21,712 +16,2431 @@
#include <locale.h>
#include <string.h>
#include <stdlib.h>
-
-#include "xlator.h"
-#include "logging.h"
-#include "defaults.h"
-
-#ifdef GF_LINUX_HOST_OS
#include <syslog.h>
-#endif
+#include <sys/resource.h>
#ifdef HAVE_BACKTRACE
#include <execinfo.h>
+#else
+#include "execinfo_compat.h"
#endif
+#include <sys/stat.h>
-static pthread_mutex_t logfile_mutex;
-static char *filename = NULL;
-static uint8_t logrotate = 0;
-static FILE *logfile = NULL;
-static gf_loglevel_t loglevel = GF_LOG_INFO;
-static int gf_log_syslog = 1;
-static gf_loglevel_t sys_log_level = GF_LOG_CRITICAL;
+#include "glusterfs/syscall.h"
-char gf_log_xl_log_set;
-gf_loglevel_t gf_log_loglevel = GF_LOG_INFO; /* extern'd */
-FILE *gf_log_logfile;
+#define GF_JSON_MSG_LENGTH 8192
+#define GF_SYSLOG_CEE_FORMAT \
+ "@cee: {\"msg\": \"%s\", \"gf_code\": \"%u\", \"gf_message\": \"%s\"}"
+#define GF_LOG_CONTROL_FILE "/etc/glusterfs/logger.conf"
+#define GF_LOG_BACKTRACE_DEPTH 5
+#define GF_LOG_BACKTRACE_SIZE 4096
+#define GF_MAX_SLOG_PAIR_COUNT 100
-static char *cmd_log_filename = NULL;
-static FILE *cmdlogfile = NULL;
+#include "glusterfs/logging.h"
+#include "glusterfs/glusterfs.h"
+#include "glusterfs/timer.h"
+#include "glusterfs/libglusterfs-messages.h"
+
+/* Do not replace gf_log in TEST_LOG with gf_msg, as there is a slight chance
+ * that it could lead to an infinite recursion.*/
+#define TEST_LOG(__msg, __args...) \
+ gf_log("logging-infra", GF_LOG_DEBUG, __msg, ##__args);
+
+static void
+gf_log_flush_timeout_cbk(void *data);
+
+int
+gf_log_inject_timer_event(glusterfs_ctx_t *ctx);
+
+static void
+gf_log_flush_extra_msgs(glusterfs_ctx_t *ctx, uint32_t new);
+
+static int
+log_buf_init(log_buf_t *buf, const char *domain, const char *file,
+ const char *function, int32_t line, gf_loglevel_t level,
+ int errnum, uint64_t msgid, char **appmsgstr, int graph_id);
+static void
+gf_log_rotate(glusterfs_ctx_t *ctx);
+
+static char gf_level_strings[] = {
+ ' ', /* NONE */
+ 'M', /* EMERGENCY */
+ 'A', /* ALERT */
+ 'C', /* CRITICAL */
+ 'E', /* ERROR */
+ 'W', /* WARNING */
+ 'N', /* NOTICE */
+ 'I', /* INFO */
+ 'D', /* DEBUG */
+ 'T', /* TRACE */
+};
void
-gf_log_logrotate (int signum)
+gf_log_logrotate(int signum)
{
- logrotate = 1;
+ if (THIS->ctx) {
+ THIS->ctx->log.logrotate = 1;
+ THIS->ctx->log.cmd_history_logrotate = 1;
+ }
}
void
-gf_log_enable_syslog (void)
+gf_log_enable_syslog(void)
{
- gf_log_syslog = 1;
+ if (THIS->ctx)
+ THIS->ctx->log.gf_log_syslog = 1;
}
void
-gf_log_disable_syslog (void)
+gf_log_disable_syslog(void)
{
- gf_log_syslog = 0;
+ if (THIS->ctx)
+ THIS->ctx->log.gf_log_syslog = 0;
}
gf_loglevel_t
-gf_log_get_loglevel (void)
+gf_log_get_loglevel(void)
{
- return loglevel;
+ if (THIS->ctx)
+ return THIS->ctx->log.loglevel;
+ else
+ /* return global defaults (see gf_log_globals_init) */
+ return GF_LOG_INFO;
}
void
-gf_log_set_loglevel (gf_loglevel_t level)
+gf_log_set_loglevel(glusterfs_ctx_t *ctx, gf_loglevel_t level)
{
- gf_log_loglevel = loglevel = level;
+ if (ctx)
+ ctx->log.loglevel = level;
}
+int
+gf_log_get_localtime(void)
+{
+ if (THIS->ctx)
+ return THIS->ctx->log.localtime;
+ else
+ /* return global defaults (see gf_log_globals_init) */
+ return 0;
+}
-gf_loglevel_t
-gf_log_get_xl_loglevel (void *this)
+void
+gf_log_set_localtime(int on_off)
{
- xlator_t *xl = this;
- if (!xl)
- return 0;
- return xl->loglevel;
+ if (THIS->ctx)
+ THIS->ctx->log.localtime = on_off;
}
void
-gf_log_set_xl_loglevel (void *this, gf_loglevel_t level)
+gf_log_flush(void)
{
- xlator_t *xl = this;
- if (!xl)
- return;
- gf_log_xl_log_set = 1;
- xl->loglevel = level;
+ xlator_t *this = THIS;
+ glusterfs_ctx_t *ctx = this->ctx;
+
+ if (ctx && ctx->log.logger == gf_logger_glusterlog) {
+ pthread_mutex_lock(&ctx->log.logfile_mutex);
+ fflush(ctx->log.gf_log_logfile);
+ pthread_mutex_unlock(&ctx->log.logfile_mutex);
+ }
+
+ return;
}
void
-gf_log_fini (void)
+gf_log_set_xl_loglevel(void *this, gf_loglevel_t level)
{
- pthread_mutex_destroy (&logfile_mutex);
+ xlator_t *xl = this;
+ if (!xl)
+ return;
+ xl->loglevel = level;
}
+/* TODO: The following get/set functions are yet not invoked from anywhere
+ * in the code. The _intention_ is to pass CLI arguments to various daemons
+ * that are started, which would then invoke these set APIs as required.
+ *
+ * glusterd would read the defaults from its .vol file configuration shipped
+ * as a part of the packages distributed.
+ *
+ * For any gluster* daemon that is started the shipped configuration becomes the
+ * default, if a volume has to change its logging format or logger, then a
+ * gluster CLI is invoked to set this property for the volume in question.
+ *
+ * The property is maintained by glusterd, and passed to the daemon as a CLI
+ * option, IOW persistence of the option is maintained by glusterd persistent
+ * storage (i.e .vol file) only
+ *
+ * care needs to be taken to configure and start daemons based on the versions
+ * that supports these features */
void
-gf_log_globals_init (void)
+gf_log_set_logformat(gf_log_format_t format)
{
- pthread_mutex_init (&logfile_mutex, NULL);
+ if (THIS->ctx)
+ THIS->ctx->log.logformat = format;
+}
-#ifdef GF_LINUX_HOST_OS
- /* For the 'syslog' output. one can grep 'GlusterFS' in syslog
- for serious logs */
- openlog ("GlusterFS", LOG_PID, LOG_DAEMON);
-#endif
+void
+gf_log_set_logger(gf_log_logger_t logger)
+{
+ if (THIS->ctx)
+ THIS->ctx->log.logger = logger;
}
-int
-gf_log_init (const char *file)
+gf_loglevel_t
+gf_log_get_xl_loglevel(void *this)
{
- int fd = -1;
+ xlator_t *xl = this;
+ if (!xl)
+ return 0;
+ return xl->loglevel;
+}
- if (!file){
- fprintf (stderr, "ERROR: no filename specified\n");
- return -1;
- }
+void
+gf_log_set_log_buf_size(uint32_t buf_size)
+{
+ uint32_t old = 0;
+ glusterfs_ctx_t *ctx = THIS->ctx;
+
+ pthread_mutex_lock(&ctx->log.log_buf_lock);
+ {
+ old = ctx->log.lru_size;
+ ctx->log.lru_size = buf_size;
+ }
+ pthread_mutex_unlock(&ctx->log.log_buf_lock);
+
+ /* If the old size is less than/equal to the new size, then do nothing.
+ *
+ * But if the new size is less than the old size, then
+ * a. If the cur size of the buf is less than or equal the new size,
+ * then do nothing.
+ * b. But if the current size of the buf is greater than the new size,
+ * then flush the least recently used (cur size - new_size) msgs
+ * to disk.
+ */
+ if (buf_size < old)
+ gf_log_flush_extra_msgs(ctx, buf_size);
+}
- if (strcmp (file, "-") == 0) {
- gf_log_logfile = stderr;
+void
+gf_log_set_log_flush_timeout(uint32_t timeout)
+{
+ THIS->ctx->log.timeout = timeout;
+}
- return 0;
- }
+/* If log_buf_init() fails (indicated by a return value of -1),
+ * call log_buf_destroy() to clean up memory allocated in heap and to return
+ * the log_buf_t object back to its memory pool.
+ */
+static int
+log_buf_init(log_buf_t *buf, const char *domain, const char *file,
+ const char *function, int32_t line, gf_loglevel_t level,
+ int errnum, uint64_t msgid, char **appmsgstr, int graph_id)
+{
+ int ret = -1;
- filename = gf_strdup (file);
- if (!filename) {
- fprintf (stderr, "ERROR: updating log-filename failed: %s\n",
- strerror (errno));
- return -1;
+ if (!buf || !domain || !file || !function || !appmsgstr || !*appmsgstr)
+ goto out;
+
+ buf->msg = gf_strdup(*appmsgstr);
+ if (!buf->msg)
+ goto out;
+
+ buf->msg_id = msgid;
+ buf->errnum = errnum;
+ buf->domain = gf_strdup(domain);
+ if (!buf->domain)
+ goto out;
+
+ buf->file = gf_strdup(file);
+ if (!buf->file)
+ goto out;
+
+ buf->function = gf_strdup(function);
+ if (!buf->function)
+ goto out;
+
+ buf->line = line;
+ buf->level = level;
+ buf->refcount = 0;
+ buf->graph_id = graph_id;
+ INIT_LIST_HEAD(&buf->msg_list);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static int
+log_buf_destroy(log_buf_t *buf)
+{
+ if (!buf)
+ return -1;
+
+ GF_FREE(buf->msg);
+ GF_FREE(buf->domain);
+ GF_FREE(buf->file);
+ GF_FREE(buf->function);
+
+ mem_put(buf);
+ return 0;
+}
+
+static void
+gf_log_rotate(glusterfs_ctx_t *ctx)
+{
+ int fd = -1;
+ FILE *new_logfile = NULL;
+ FILE *old_logfile = NULL;
+
+ /* not involving locks on initial check to speed it up */
+ if (ctx->log.logrotate) {
+ /* let only one winner through on races */
+ pthread_mutex_lock(&ctx->log.logfile_mutex);
+
+ if (!ctx->log.logrotate) {
+ pthread_mutex_unlock(&ctx->log.logfile_mutex);
+ return;
+ } else {
+ ctx->log.logrotate = 0;
+ pthread_mutex_unlock(&ctx->log.logfile_mutex);
}
- fd = open (file, O_CREAT | O_RDONLY, S_IRUSR | S_IWUSR);
+ fd = sys_open(ctx->log.filename, O_CREAT | O_WRONLY | O_APPEND,
+ S_IRUSR | S_IWUSR);
if (fd < 0) {
- fprintf (stderr, "ERROR: failed to create logfile \"%s\" (%s)\n",
- file, strerror (errno));
- return -1;
+ gf_smsg("logrotate", GF_LOG_ERROR, errno,
+ LG_MSG_OPEN_LOGFILE_FAILED, NULL);
+ return;
}
- close (fd);
- logfile = fopen (file, "a");
- if (!logfile){
- fprintf (stderr, "ERROR: failed to open logfile \"%s\" (%s)\n",
- file, strerror (errno));
- return -1;
+ new_logfile = fdopen(fd, "a");
+ if (!new_logfile) {
+ gf_smsg("logrotate", GF_LOG_CRITICAL, errno,
+ LG_MSG_OPEN_LOGFILE_FAILED, "filename=%s",
+ ctx->log.filename, NULL);
+ sys_close(fd);
+ return;
}
- gf_log_logfile = logfile;
+ pthread_mutex_lock(&ctx->log.logfile_mutex);
+ {
+ if (ctx->log.logfile)
+ old_logfile = ctx->log.logfile;
- return 0;
+ ctx->log.gf_log_logfile = ctx->log.logfile = new_logfile;
+ }
+ pthread_mutex_unlock(&ctx->log.logfile_mutex);
+
+ if (old_logfile != NULL)
+ fclose(old_logfile);
+ }
+
+ return;
}
+void
+gf_log_globals_fini(void)
+{
+ /* TODO: Nobody is invoking the fini, but cleanup needs to happen here,
+ * needs cleanup for, log.ident, log.filename, closelog, log file close
+ * rotate state, possibly under a lock */
+ pthread_mutex_destroy(&THIS->ctx->log.logfile_mutex);
+ pthread_mutex_destroy(&THIS->ctx->log.log_buf_lock);
+}
+
+void
+gf_log_disable_suppression_before_exit(glusterfs_ctx_t *ctx)
+{
+ /*
+ * First set log buf size to 0. This would ensure two things:
+ * i. that all outstanding log messages are flushed to disk, and
+ * ii. all subsequent calls to gf_msg will result in the logs getting
+ * directly flushed to disk without being buffered.
+ *
+ * Then, cancel the current log timer event.
+ */
+
+ gf_log_set_log_buf_size(0);
+ pthread_mutex_lock(&ctx->log.log_buf_lock);
+ {
+ if (ctx->log.log_flush_timer) {
+ gf_timer_call_cancel(ctx, ctx->log.log_flush_timer);
+ ctx->log.log_flush_timer = NULL;
+ }
+ }
+ pthread_mutex_unlock(&ctx->log.log_buf_lock);
+}
+/** gf_log_fini - function to perform the cleanup of the log information
+ * @data - glusterfs context
+ * @return: success: 0
+ * failure: -1
+ */
+int
+gf_log_fini(void *data)
+{
+ glusterfs_ctx_t *ctx = data;
+ int ret = 0;
+ FILE *old_logfile = NULL;
+
+ if (ctx == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ gf_log_disable_suppression_before_exit(ctx);
+
+ pthread_mutex_lock(&ctx->log.logfile_mutex);
+ {
+ if (ctx->log.logfile) {
+ old_logfile = ctx->log.logfile;
+
+ /* Logfile needs to be set to NULL, so that any
+ call to gf_log after calling gf_log_fini, will
+ log the message to stderr.
+ */
+ ctx->log.loglevel = GF_LOG_NONE;
+ ctx->log.logfile = NULL;
+ }
+ }
+ pthread_mutex_unlock(&ctx->log.logfile_mutex);
-struct _msg_queue {
- struct list_head msgs;
-};
+ if (old_logfile && (fclose(old_logfile) != 0))
+ ret = -1;
-struct _log_msg {
- const char *msg;
- struct list_head queue;
-};
+ GF_FREE(ctx->log.ident);
+ GF_FREE(ctx->log.filename);
+out:
+ return ret;
+}
-void
-gf_log_lock (void)
+/**
+ * gf_openlog -function to open syslog specific to gluster based on
+ * existence of file /etc/glusterfs/logger.conf
+ * @ident: optional identification string similar to openlog()
+ * @option: optional value to option to openlog(). Passing -1 uses
+ * 'LOG_PID | LOG_NDELAY' as default
+ * @facility: optional facility code similar to openlog(). Passing -1
+ * uses LOG_DAEMON as default
+ *
+ * @return: void
+ */
+static void
+gf_openlog(const char *ident, int option, int facility)
{
- pthread_mutex_lock (&logfile_mutex);
+ int _option = option;
+ int _facility = facility;
+
+ if (-1 == _option) {
+ _option = LOG_PID | LOG_NDELAY;
+ }
+ if (-1 == _facility) {
+ _facility = LOG_LOCAL1;
+ }
+
+ /* TODO: Should check for errors here and return appropriately */
+ setlocale(LC_ALL, "");
+ setlocale(LC_NUMERIC, "C"); /* C-locale for strtod, ... */
+ /* close the previous syslog if open as we are changing settings */
+ closelog();
+ openlog(ident, _option, _facility);
}
+/**
+ * _json_escape -function to convert string to json encoded string
+ * @str: input string
+ * @buf: buffer to store encoded string
+ * @len: length of @buf
+ *
+ * @return: success: last unprocessed character position by pointer in @str
+ * failure: NULL
+ *
+ * Internal function. Heavily inspired by _ul_str_escape() function in
+ * libumberlog
+ *
+ * Sample output:
+ * [1] str = "devel error"
+ * buf = "devel error"
+ * [2] str = "devel error"
+ * buf = "devel\terror"
+ * [3] str = "I/O error on "/tmp/foo" file"
+ * buf = "I/O error on \"/tmp/foo\" file"
+ * [4] str = "I/O erroron /tmp/bar file"
+ * buf = "I/O error\u001bon /tmp/bar file"
+ *
+ */
+static char *
+_json_escape(const char *str, char *buf, size_t len)
+{
+ static const unsigned char json_exceptions[UCHAR_MAX + 1] = {
+ [0x01] = 1, [0x02] = 1, [0x03] = 1, [0x04] = 1, [0x05] = 1, [0x06] = 1,
+ [0x07] = 1, [0x08] = 1, [0x09] = 1, [0x0a] = 1, [0x0b] = 1, [0x0c] = 1,
+ [0x0d] = 1, [0x0e] = 1, [0x0f] = 1, [0x10] = 1, [0x11] = 1, [0x12] = 1,
+ [0x13] = 1, [0x14] = 1, [0x15] = 1, [0x16] = 1, [0x17] = 1, [0x18] = 1,
+ [0x19] = 1, [0x1a] = 1, [0x1b] = 1, [0x1c] = 1, [0x1d] = 1, [0x1e] = 1,
+ [0x1f] = 1, ['\\'] = 1, ['"'] = 1};
+ static const char json_hex_chars[16] = "0123456789abcdef";
+ unsigned char *p = NULL;
+ size_t pos = 0;
+
+ if (!str || !buf || len <= 0) {
+ return NULL;
+ }
+
+ for (p = (unsigned char *)str; *p && (pos + 1) < len; p++) {
+ if (json_exceptions[*p] == 0) {
+ buf[pos++] = *p;
+ continue;
+ }
+
+ if ((pos + 2) >= len) {
+ break;
+ }
+
+ switch (*p) {
+ case '\b':
+ buf[pos++] = '\\';
+ buf[pos++] = 'b';
+ break;
+ case '\n':
+ buf[pos++] = '\\';
+ buf[pos++] = 'n';
+ break;
+ case '\r':
+ buf[pos++] = '\\';
+ buf[pos++] = 'r';
+ break;
+ case '\t':
+ buf[pos++] = '\\';
+ buf[pos++] = 't';
+ break;
+ case '\\':
+ buf[pos++] = '\\';
+ buf[pos++] = '\\';
+ break;
+ case '"':
+ buf[pos++] = '\\';
+ buf[pos++] = '"';
+ break;
+ default:
+ if ((pos + 6) >= len) {
+ buf[pos] = '\0';
+ return (char *)p;
+ }
+ buf[pos++] = '\\';
+ buf[pos++] = 'u';
+ buf[pos++] = '0';
+ buf[pos++] = '0';
+ buf[pos++] = json_hex_chars[(*p) >> 4];
+ buf[pos++] = json_hex_chars[(*p) & 0xf];
+ break;
+ }
+ }
+
+ buf[pos] = '\0';
+ return (char *)p;
+}
+
+/**
+ * gf_syslog -function to submit message to syslog specific to gluster
+ * @facility_priority: facility_priority of syslog()
+ * @format: optional format string to syslog()
+ *
+ * @return: void
+ */
+static void
+gf_syslog(int facility_priority, char *format, ...)
+{
+ char *msg = NULL;
+ char json_msg[GF_JSON_MSG_LENGTH];
+ GF_UNUSED char *p = NULL;
+ va_list ap;
+
+ GF_ASSERT(format);
+
+ va_start(ap, format);
+ if (vasprintf(&msg, format, ap) != -1) {
+ p = _json_escape(msg, json_msg, GF_JSON_MSG_LENGTH);
+ syslog(facility_priority, "%s", msg);
+ free(msg);
+ } else
+ syslog(GF_LOG_CRITICAL, "vasprintf() failed, out of memory?");
+ va_end(ap);
+}
void
-gf_log_unlock (void)
+gf_log_globals_init(void *data, gf_loglevel_t level)
{
- pthread_mutex_unlock (&logfile_mutex);
+ glusterfs_ctx_t *ctx = data;
+
+ pthread_mutex_init(&ctx->log.logfile_mutex, NULL);
+
+ ctx->log.loglevel = level;
+ ctx->log.gf_log_syslog = 1;
+ ctx->log.sys_log_level = GF_LOG_CRITICAL;
+ ctx->log.logger = gf_logger_glusterlog;
+ ctx->log.logformat = gf_logformat_withmsgid;
+ ctx->log.lru_size = GF_LOG_LRU_BUFSIZE_DEFAULT;
+ ctx->log.timeout = GF_LOG_FLUSH_TIMEOUT_DEFAULT;
+ ctx->log.localtime = GF_LOG_LOCALTIME_DEFAULT;
+
+ pthread_mutex_init(&ctx->log.log_buf_lock, NULL);
+
+ INIT_LIST_HEAD(&ctx->log.lru_queue);
+
+#ifdef GF_LINUX_HOST_OS
+ /* For the 'syslog' output. one can grep 'GlusterFS' in syslog
+ for serious logs */
+ openlog("GlusterFS", LOG_PID, LOG_DAEMON);
+#endif
}
+int
+gf_log_init(void *data, const char *file, const char *ident)
+{
+ glusterfs_ctx_t *ctx = data;
+ int fd = -1;
+ struct stat buf;
+
+ if (ctx == NULL) {
+ fprintf(stderr, "ERROR: ctx is NULL\n");
+ return -1;
+ }
+ if (ident) {
+ GF_FREE(ctx->log.ident);
+ ctx->log.ident = gf_strdup(ident);
+ }
+
+ /* we keep the files and the syslog open, so that on logger change, we
+ * are ready to log anywhere, that the new value specifies */
+ if (ctx->log.ident) {
+ gf_openlog(ctx->log.ident, -1, LOG_DAEMON);
+ } else {
+ gf_openlog(NULL, -1, LOG_DAEMON);
+ }
+ /* TODO: make FACILITY configurable than LOG_DAEMON */
+ if (sys_stat(GF_LOG_CONTROL_FILE, &buf) == 0) {
+ /* use syslog logging */
+ ctx->log.log_control_file_found = 1;
+ } else {
+ /* use old style logging */
+ ctx->log.log_control_file_found = 0;
+ }
+
+ if (!file) {
+ fprintf(stderr, "ERROR: no filename specified\n");
+ return -1;
+ }
+
+ /* free the (possible) previous filename */
+ GF_FREE(ctx->log.filename);
+ ctx->log.filename = NULL;
+
+ /* close and reopen logfile for log rotate */
+ if (ctx->log.logfile) {
+ fclose(ctx->log.logfile);
+ ctx->log.logfile = NULL;
+ ctx->log.gf_log_logfile = NULL;
+ }
+
+ if (strcmp(file, "-") == 0) {
+ int dupfd = -1;
+
+ ctx->log.filename = gf_strdup("/dev/stderr");
+ if (!ctx->log.filename) {
+ fprintf(stderr, "ERROR: strdup failed\n");
+ return -1;
+ }
+
+ dupfd = dup(fileno(stderr));
+ if (dupfd == -1) {
+ fprintf(stderr, "ERROR: could not dup %d (%s)\n", fileno(stderr),
+ strerror(errno));
+ return -1;
+ }
+
+ ctx->log.logfile = fdopen(dupfd, "a");
+ if (!ctx->log.logfile) {
+ fprintf(stderr, "ERROR: could not fdopen on %d (%s)\n", dupfd,
+ strerror(errno));
+ sys_close(dupfd);
+ return -1;
+ }
+ } else {
+ /* Also create parent dir */
+ char *logdir = gf_strdup(file);
+ if (!logdir) {
+ return -1;
+ }
+ char *tmp_index = rindex(logdir, '/');
+ if (tmp_index) {
+ tmp_index[0] = '\0';
+ }
+ if (mkdir_p(logdir, 0755, true)) {
+ /* EEXIST is handled in mkdir_p() itself */
+ gf_smsg("logging", GF_LOG_ERROR, 0, LG_MSG_STRDUP_ERROR,
+ "logdir=%s", logdir, "errno=%s", strerror(errno), NULL);
+ GF_FREE(logdir);
+ return -1;
+ }
+ /* no need of this variable */
+ GF_FREE(logdir);
+
+ ctx->log.filename = gf_strdup(file);
+ if (!ctx->log.filename) {
+ fprintf(stderr,
+ "ERROR: updating log-filename failed: "
+ "%s\n",
+ strerror(errno));
+ return -1;
+ }
+
+ fd = sys_open(file, O_CREAT | O_WRONLY | O_APPEND, S_IRUSR | S_IWUSR);
+ if (fd < 0) {
+ fprintf(stderr,
+ "ERROR: failed to create logfile"
+ " \"%s\" (%s)\n",
+ file, strerror(errno));
+ return -1;
+ }
+
+ ctx->log.logfile = fdopen(fd, "a");
+ if (!ctx->log.logfile) {
+ fprintf(stderr,
+ "ERROR: failed to open logfile \"%s\" "
+ "(%s)\n",
+ file, strerror(errno));
+ sys_close(fd);
+ return -1;
+ }
+ }
+
+ ctx->log.gf_log_logfile = ctx->log.logfile;
+
+ return 0;
+}
void
-gf_log_cleanup (void)
+set_sys_log_level(gf_loglevel_t level)
{
- pthread_mutex_destroy (&logfile_mutex);
+ if (THIS->ctx)
+ THIS->ctx->log.sys_log_level = level;
}
-void
-set_sys_log_level (gf_loglevel_t level)
+/* Check if we should be logging
+ * Return value: _gf_false : Print the log
+ * _gf_true : Do not Print the log
+ */
+static gf_boolean_t
+skip_logging(xlator_t *this, gf_loglevel_t level)
{
- sys_log_level = level;
+ gf_loglevel_t existing_level = this->loglevel ? this->loglevel
+ : this->ctx->log.loglevel;
+ if (level > existing_level) {
+ return _gf_true;
+ }
+
+ if (level == GF_LOG_NONE) {
+ return _gf_true;
+ }
+
+ return _gf_false;
}
int
-_gf_log_nomem (const char *domain, const char *file,
- const char *function, int line, gf_loglevel_t level,
- size_t size)
-{
- const char *basename = NULL;
- struct tm *tm = NULL;
- xlator_t *this = NULL;
- struct timeval tv = {0,};
- int ret = 0;
- char msg[8092];
- char timestr[256];
- char callstr[4096];
-
- this = THIS;
-
- if (gf_log_xl_log_set) {
- if (this->loglevel && (level > this->loglevel))
- goto out;
- else if (level > gf_log_loglevel)
- goto out;
- }
-
- static char *level_strings[] = {"", /* NONE */
- "M", /* EMERGENCY */
- "A", /* ALERT */
- "C", /* CRITICAL */
- "E", /* ERROR */
- "W", /* WARNING */
- "N", /* NOTICE */
- "I", /* INFO */
- "D", /* DEBUG */
- "T", /* TRACE */
- ""};
-
- if (!domain || !file || !function) {
- fprintf (stderr,
- "logging: %s:%s():%d: invalid argument\n",
- __FILE__, __PRETTY_FUNCTION__, __LINE__);
- return -1;
+_gf_log_callingfn(const char *domain, const char *file, const char *function,
+ int line, gf_loglevel_t level, const char *fmt, ...)
+{
+ const char *basename = NULL;
+ xlator_t *this = THIS;
+ char *logline = NULL;
+ char *msg = NULL;
+ char timestr[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+ char *callstr = NULL;
+ struct timeval tv = {
+ 0,
+ };
+ int ret = 0;
+ va_list ap;
+ glusterfs_ctx_t *ctx = this->ctx;
+
+ if (!ctx)
+ goto out;
+
+ if (skip_logging(this, level))
+ goto out;
+
+ if (!domain || !file || !function || !fmt) {
+ fprintf(stderr, "logging: %s:%s():%d: invalid argument\n", __FILE__,
+ __PRETTY_FUNCTION__, __LINE__);
+ return -1;
+ }
+
+ basename = strrchr(file, '/');
+ if (basename)
+ basename++;
+ else
+ basename = file;
+
+ /*Saving the backtrace to pre-allocated ctx->btbuf
+ * to avoid allocating memory from the heap*/
+ callstr = gf_backtrace_save(NULL);
+
+ va_start(ap, fmt);
+ ret = vasprintf(&msg, fmt, ap);
+ va_end(ap);
+ if (-1 == ret) {
+ goto out;
+ }
+
+ if (ctx->log.log_control_file_found) {
+ int priority;
+ /* treat GF_LOG_TRACE and GF_LOG_NONE as LOG_DEBUG and
+ other level as is */
+ if (GF_LOG_TRACE == level || GF_LOG_NONE == level) {
+ priority = LOG_DEBUG;
+ } else {
+ priority = level - 1;
}
-#if HAVE_BACKTRACE
- /* Print 'calling function' */
- do {
- void *array[5];
- char **callingfn = NULL;
- size_t bt_size = 0;
-
- bt_size = backtrace (array, 5);
- if (bt_size)
- callingfn = backtrace_symbols (&array[2], bt_size-2);
- if (!callingfn)
- break;
-
- if (bt_size == 5)
- snprintf (callstr, 4096, "(-->%s (-->%s (-->%s)))",
- callingfn[2], callingfn[1], callingfn[0]);
- if (bt_size == 4)
- snprintf (callstr, 4096, "(-->%s (-->%s))",
- callingfn[1], callingfn[0]);
- if (bt_size == 3)
- snprintf (callstr, 4096, "(-->%s)", callingfn[0]);
-
- free (callingfn);
- } while (0);
-#endif /* HAVE_BACKTRACE */
-
- ret = gettimeofday (&tv, NULL);
- if (-1 == ret)
- goto out;
+ gf_syslog(priority, "[%s:%d:%s] %s %d-%s: %s", basename, line, function,
+ callstr, ((this->graph) ? this->graph->id : 0), domain, msg);
+
+ goto out;
+ }
+
+ ret = gettimeofday(&tv, NULL);
+ if (-1 == ret)
+ goto out;
+
+ gf_time_fmt_tv(timestr, sizeof timestr, &tv, gf_timefmt_FT);
+
+ ret = gf_asprintf(&logline, "[%s] %c [%s:%d:%s] %s %d-%s: %s\n", timestr,
+ gf_level_strings[level], basename, line, function,
+ callstr, ((this->graph) ? this->graph->id : 0), domain,
+ msg);
+ if (-1 == ret) {
+ goto out;
+ }
+
+ pthread_mutex_lock(&ctx->log.logfile_mutex);
+ {
+ if (ctx->log.logfile) {
+ fputs(logline, ctx->log.logfile);
+ fflush(ctx->log.logfile);
+ } else if (ctx->log.loglevel >= level) {
+ fputs(logline, stderr);
+ fflush(stderr);
+ }
- tm = localtime (&tv.tv_sec);
+#ifdef GF_LINUX_HOST_OS
+ /* We want only serious log in 'syslog', not our debug
+ and trace logs */
+ if (ctx->log.gf_log_syslog && level &&
+ (level <= ctx->log.sys_log_level))
+ syslog((level - 1), "%s", logline);
+#endif
+ }
- pthread_mutex_lock (&logfile_mutex);
- {
- strftime (timestr, 256, "%Y-%m-%d %H:%M:%S", tm);
- snprintf (timestr + strlen (timestr), 256 - strlen (timestr),
- ".%"GF_PRI_SUSECONDS, tv.tv_usec);
+ pthread_mutex_unlock(&ctx->log.logfile_mutex);
- basename = strrchr (file, '/');
- if (basename)
- basename++;
- else
- basename = file;
-
- ret = sprintf (msg, "[%s] %s [%s:%d:%s] %s %s: no memory "
- "available for size (%"GF_PRI_SIZET")",
- timestr, level_strings[level],
- basename, line, function, callstr,
- domain, size);
- if (-1 == ret) {
- goto unlock;
- }
+out:
+
+ GF_FREE(logline);
+
+ FREE(msg);
+
+ return ret;
+}
- if (logfile) {
- fprintf (logfile, "%s\n", msg);
- fflush (logfile);
+static int
+_gf_msg_plain_internal(gf_loglevel_t level, const char *msg)
+{
+ xlator_t *this = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ int priority;
+
+ this = THIS;
+ ctx = this->ctx;
+
+ /* log to the configured logging service */
+ switch (ctx->log.logger) {
+ case gf_logger_syslog:
+ if (ctx->log.log_control_file_found && ctx->log.gf_log_syslog) {
+ SET_LOG_PRIO(level, priority);
+
+ syslog(priority, "%s", msg);
+ break;
+ }
+ /* NOTE: If syslog control file is absent, which is another
+ * way to control logging to syslog, then we will fall through
+ * to the gluster log. The ideal way to do things would be to
+ * not have the extra control file check */
+ case gf_logger_glusterlog:
+ pthread_mutex_lock(&ctx->log.logfile_mutex);
+ {
+ if (ctx->log.logfile) {
+ fprintf(ctx->log.logfile, "%s\n", msg);
+ fflush(ctx->log.logfile);
} else {
- fprintf (stderr, "%s\n", msg);
+ fprintf(stderr, "%s\n", msg);
+ fflush(stderr);
}
#ifdef GF_LINUX_HOST_OS
- /* We want only serious log in 'syslog', not our debug
- and trace logs */
- if (gf_log_syslog && level && (level <= sys_log_level))
- syslog ((level-1), "%s\n", msg);
+ /* We want only serious logs in 'syslog', not our debug
+ * and trace logs */
+ if (ctx->log.gf_log_syslog && level &&
+ (level <= ctx->log.sys_log_level))
+ syslog((level - 1), "%s\n", msg);
#endif
- }
+ }
+ pthread_mutex_unlock(&ctx->log.logfile_mutex);
+
+ break;
+ }
+
+ return 0;
+}
+
+int
+_gf_msg_plain(gf_loglevel_t level, const char *fmt, ...)
+{
+ xlator_t *this = NULL;
+ int ret = 0;
+ va_list ap;
+ char *msg = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+
+ this = THIS;
+ ctx = this->ctx;
+
+ if (!ctx)
+ goto out;
+
+ if (skip_logging(this, level))
+ goto out;
+
+ va_start(ap, fmt);
+ ret = vasprintf(&msg, fmt, ap);
+ va_end(ap);
+ if (-1 == ret) {
+ goto out;
+ }
+
+ ret = _gf_msg_plain_internal(level, msg);
+
+ FREE(msg);
-unlock:
- pthread_mutex_unlock (&logfile_mutex);
out:
- return ret;
- }
+ return ret;
+}
int
-_gf_log_callingfn (const char *domain, const char *file, const char *function,
- int line, gf_loglevel_t level, const char *fmt, ...)
-{
- const char *basename = NULL;
- struct tm *tm = NULL;
- xlator_t *this = NULL;
- char *str1 = NULL;
- char *str2 = NULL;
- char *msg = NULL;
- char timestr[256] = {0,};
- char callstr[4096] = {0,};
- struct timeval tv = {0,};
- size_t len = 0;
- int ret = 0;
- va_list ap;
-
- this = THIS;
-
- if (gf_log_xl_log_set) {
- if (this->loglevel && (level > this->loglevel))
- goto out;
- else if (level > gf_log_loglevel)
- goto out;
- }
-
- static char *level_strings[] = {"", /* NONE */
- "M", /* EMERGENCY */
- "A", /* ALERT */
- "C", /* CRITICAL */
- "E", /* ERROR */
- "W", /* WARNING */
- "N", /* NOTICE */
- "I", /* INFO */
- "D", /* DEBUG */
- "T", /* TRACE */
- ""};
-
- if (!domain || !file || !function || !fmt) {
- fprintf (stderr,
- "logging: %s:%s():%d: invalid argument\n",
- __FILE__, __PRETTY_FUNCTION__, __LINE__);
- return -1;
- }
+_gf_msg_vplain(gf_loglevel_t level, const char *fmt, va_list ap)
+{
+ xlator_t *this = NULL;
+ int ret = 0;
+ char *msg = NULL;
+ glusterfs_ctx_t *ctx = NULL;
-#if HAVE_BACKTRACE
- /* Print 'calling function' */
- do {
- void *array[5];
- char **callingfn = NULL;
- size_t size = 0;
-
- size = backtrace (array, 5);
- if (size)
- callingfn = backtrace_symbols (&array[2], size-2);
- if (!callingfn)
- break;
-
- if (size == 5)
- snprintf (callstr, 4096, "(-->%s (-->%s (-->%s)))",
- callingfn[2], callingfn[1], callingfn[0]);
- if (size == 4)
- snprintf (callstr, 4096, "(-->%s (-->%s))",
- callingfn[1], callingfn[0]);
- if (size == 3)
- snprintf (callstr, 4096, "(-->%s)", callingfn[0]);
-
- free (callingfn);
- } while (0);
-#endif /* HAVE_BACKTRACE */
-
- ret = gettimeofday (&tv, NULL);
- if (-1 == ret)
- goto out;
+ this = THIS;
+ ctx = this->ctx;
- tm = localtime (&tv.tv_sec);
+ if (!ctx)
+ goto out;
- pthread_mutex_lock (&logfile_mutex);
- {
- va_start (ap, fmt);
+ if (skip_logging(this, level))
+ goto out;
- strftime (timestr, 256, "%Y-%m-%d %H:%M:%S", tm);
- snprintf (timestr + strlen (timestr), 256 - strlen (timestr),
- ".%"GF_PRI_SUSECONDS, tv.tv_usec);
+ ret = vasprintf(&msg, fmt, ap);
+ if (-1 == ret) {
+ goto out;
+ }
- basename = strrchr (file, '/');
- if (basename)
- basename++;
- else
- basename = file;
-
- ret = gf_asprintf (&str1, "[%s] %s [%s:%d:%s] %s %d-%s: ",
- timestr, level_strings[level],
- basename, line, function, callstr,
- ((this->graph) ? this->graph->id:0), domain);
- if (-1 == ret) {
- goto unlock;
- }
+ ret = _gf_msg_plain_internal(level, msg);
- ret = vasprintf (&str2, fmt, ap);
- if (-1 == ret) {
- goto unlock;
- }
+ FREE(msg);
+out:
+ return ret;
+}
- va_end (ap);
+int
+_gf_msg_plain_nomem(gf_loglevel_t level, const char *msg)
+{
+ xlator_t *this = NULL;
+ int ret = 0;
+ glusterfs_ctx_t *ctx = NULL;
- len = strlen (str1);
- msg = GF_MALLOC (len + strlen (str2) + 1, gf_common_mt_char);
+ this = THIS;
+ ctx = this->ctx;
- strcpy (msg, str1);
- strcpy (msg + len, str2);
+ if (!ctx)
+ goto out;
- if (logfile) {
- fprintf (logfile, "%s\n", msg);
- fflush (logfile);
- } else {
- fprintf (stderr, "%s\n", msg);
+ if (skip_logging(this, level))
+ goto out;
+
+ ret = _gf_msg_plain_internal(level, msg);
+
+out:
+ return ret;
+}
+
+void
+_gf_msg_backtrace_nomem(gf_loglevel_t level, int stacksize)
+{
+ xlator_t *this = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ void *array[200];
+ size_t bt_size = 0;
+ int fd = -1;
+
+ this = THIS;
+ ctx = this->ctx;
+
+ if (!ctx)
+ goto out;
+
+ /* syslog does not have fd support, hence no no-mem variant */
+ if (ctx->log.logger != gf_logger_glusterlog)
+ goto out;
+
+ if (skip_logging(this, level))
+ goto out;
+
+ bt_size = backtrace(array, ((stacksize <= 200) ? stacksize : 200));
+ if (!bt_size)
+ goto out;
+ pthread_mutex_lock(&ctx->log.logfile_mutex);
+ {
+ fd = ctx->log.logfile ? fileno(ctx->log.logfile) : fileno(stderr);
+ if (fd != -1) {
+ /* print to the file fd, to prevent any
+ allocations from backtrace_symbols
+ */
+ backtrace_symbols_fd(&array[0], bt_size, fd);
+ }
+ }
+ pthread_mutex_unlock(&ctx->log.logfile_mutex);
+
+out:
+ return;
+}
+
+int
+_gf_msg_backtrace(int stacksize, char *callstr, size_t strsize)
+{
+ int ret = -1;
+ int i = 0;
+ int size = 0;
+ int savstrsize = strsize;
+ void *array[200];
+ char **callingfn = NULL;
+
+ /* We chop off last 2 anyway, so if request is less than tolerance
+ * nothing to do */
+ if (stacksize < 3)
+ goto out;
+
+ size = backtrace(array, ((stacksize <= 200) ? stacksize : 200));
+ if ((size - 3) < 0)
+ goto out;
+ if (size)
+ callingfn = backtrace_symbols(&array[2], size - 2);
+ if (!callingfn)
+ goto out;
+
+ ret = snprintf(callstr, strsize, "(");
+ PRINT_SIZE_CHECK(ret, out, strsize);
+
+ for ((i = size - 3); i >= 0; i--) {
+ ret = snprintf(callstr + savstrsize - strsize, strsize, "-->%s ",
+ callingfn[i]);
+ PRINT_SIZE_CHECK(ret, out, strsize);
+ }
+
+ ret = snprintf(callstr + savstrsize - strsize, strsize, ")");
+ PRINT_SIZE_CHECK(ret, out, strsize);
+out:
+ FREE(callingfn);
+ return ret;
+}
+
+int
+_gf_msg_nomem(const char *domain, const char *file, const char *function,
+ int line, gf_loglevel_t level, size_t size)
+{
+ const char *basename = NULL;
+ xlator_t *this = NULL;
+ struct timeval tv = {
+ 0,
+ };
+ int ret = 0;
+ int fd = -1;
+ char msg[2048] = {
+ 0,
+ };
+ char timestr[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+ glusterfs_ctx_t *ctx = NULL;
+ int wlen = 0;
+ int priority;
+ struct rusage r_usage;
+
+ this = THIS;
+ ctx = this->ctx;
+
+ if (!ctx)
+ goto out;
+
+ if (skip_logging(this, level))
+ goto out;
+
+ if (!domain || !file || !function) {
+ fprintf(stderr, "logging: %s:%s():%d: invalid argument\n", __FILE__,
+ __PRETTY_FUNCTION__, __LINE__);
+ return -1;
+ }
+
+ GET_FILE_NAME_TO_LOG(file, basename);
+
+ ret = gettimeofday(&tv, NULL);
+ if (-1 == ret)
+ goto out;
+ gf_time_fmt_tv(timestr, sizeof timestr, &tv, gf_timefmt_FT);
+
+ /* TODO: Currently we print in the enhanced format, with a message ID
+ * of 0. Need to enhance this to support format as configured */
+ wlen = snprintf(
+ msg, sizeof msg,
+ "[%s] %c [MSGID: %" PRIu64
+ "]"
+ " [%s:%d:%s] %s: no memory "
+ "available for size (%" GF_PRI_SIZET
+ ") current memory usage in kilobytes %ld"
+ " [call stack follows]\n",
+ timestr, gf_level_strings[level], (uint64_t)0, basename, line, function,
+ domain, size,
+ (!getrusage(RUSAGE_SELF, &r_usage) ? r_usage.ru_maxrss : 0));
+ if (-1 == wlen) {
+ ret = -1;
+ goto out;
+ }
+
+ /* log to the configured logging service */
+ switch (ctx->log.logger) {
+ case gf_logger_syslog:
+ if (ctx->log.log_control_file_found && ctx->log.gf_log_syslog) {
+ SET_LOG_PRIO(level, priority);
+
+ /* if syslog allocates, then this may fail, but we
+ * cannot do much about it at the moment */
+ /* There is no fd for syslog, hence no stack printed */
+ syslog(priority, "%s", msg);
+ break;
+ }
+ /* NOTE: If syslog control file is absent, which is another
+ * way to control logging to syslog, then we will fall through
+ * to the gluster log. The ideal way to do things would be to
+ * not have the extra control file check */
+ case gf_logger_glusterlog:
+ pthread_mutex_lock(&ctx->log.logfile_mutex);
+ {
+ fd = ctx->log.logfile ? fileno(ctx->log.logfile)
+ : fileno(stderr);
+ if (fd == -1) {
+ pthread_mutex_unlock(&ctx->log.logfile_mutex);
+ goto out;
}
+ /* write directly to the fd to prevent out of order
+ * message and stack */
+ ret = sys_write(fd, msg, wlen);
+ if (ret == -1) {
+ pthread_mutex_unlock(&ctx->log.logfile_mutex);
+ goto out;
+ }
#ifdef GF_LINUX_HOST_OS
/* We want only serious log in 'syslog', not our debug
- and trace logs */
- if (gf_log_syslog && level && (level <= sys_log_level))
- syslog ((level-1), "%s\n", msg);
+ * and trace logs */
+ if (ctx->log.gf_log_syslog && level &&
+ (level <= ctx->log.sys_log_level))
+ syslog((level - 1), "%s\n", msg);
#endif
- }
+ }
+ pthread_mutex_unlock(&ctx->log.logfile_mutex);
-unlock:
- pthread_mutex_unlock (&logfile_mutex);
+ _gf_msg_backtrace_nomem(level, GF_LOG_BACKTRACE_DEPTH);
+
+ break;
+ }
+
+out:
+ return ret;
+}
+
+static int
+gf_log_syslog(glusterfs_ctx_t *ctx, const char *domain, const char *file,
+ const char *function, int32_t line, gf_loglevel_t level,
+ int errnum, uint64_t msgid, char **appmsgstr, char *callstr,
+ int graph_id, gf_log_format_t fmt)
+{
+ int priority;
+
+ SET_LOG_PRIO(level, priority);
+
+ /* log with appropriate format */
+ switch (fmt) {
+ case gf_logformat_traditional:
+ if (!callstr) {
+ if (errnum)
+ syslog(priority, "[%s:%d:%s] %d-%s: %s [%s]", file, line,
+ function, graph_id, domain, *appmsgstr,
+ strerror(errnum));
+ else
+ syslog(priority, "[%s:%d:%s] %d-%s: %s", file, line,
+ function, graph_id, domain, *appmsgstr);
+ } else {
+ if (errnum)
+ syslog(priority,
+ "[%s:%d:%s] %s %d-%s:"
+ " %s [%s]",
+ file, line, function, callstr, graph_id, domain,
+ *appmsgstr, strerror(errnum));
+ else
+ syslog(priority, "[%s:%d:%s] %s %d-%s: %s", file, line,
+ function, callstr, graph_id, domain, *appmsgstr);
+ }
+ break;
+ case gf_logformat_withmsgid:
+ if (!callstr) {
+ if (errnum)
+ syslog(priority,
+ "[MSGID: %" PRIu64
+ "]"
+ " [%s:%d:%s] %d-%s: %s [%s]",
+ msgid, file, line, function, graph_id, domain,
+ *appmsgstr, strerror(errnum));
+ else
+ syslog(priority,
+ "[MSGID: %" PRIu64
+ "]"
+ " [%s:%d:%s] %d-%s: %s",
+ msgid, file, line, function, graph_id, domain,
+ *appmsgstr);
+ } else {
+ if (errnum)
+ syslog(priority,
+ "[MSGID: %" PRIu64
+ "]"
+ " [%s:%d:%s] %s %d-%s: %s [%s]",
+ msgid, file, line, function, callstr, graph_id,
+ domain, *appmsgstr, strerror(errnum));
+ else
+ syslog(priority,
+ "[MSGID: %" PRIu64
+ "]"
+ " [%s:%d:%s] %s %d-%s: %s",
+ msgid, file, line, function, callstr, graph_id,
+ domain, *appmsgstr);
+ }
+ break;
+ case gf_logformat_cee:
+ /* TODO: Enhance CEE with additional parameters */
+ gf_syslog(priority, "[%s:%d:%s] %d-%s: %s", file, line, function,
+ graph_id, domain, *appmsgstr);
+ break;
+
+ default:
+ /* NOTE: should not get here without logging */
+ break;
+ }
+
+ /* TODO: There can be no errors from gf_syslog? */
+ return 0;
+}
+
+static int
+gf_log_glusterlog(glusterfs_ctx_t *ctx, const char *domain, const char *file,
+ const char *function, int32_t line, gf_loglevel_t level,
+ int errnum, uint64_t msgid, char **appmsgstr, char *callstr,
+ struct timeval tv, int graph_id, gf_log_format_t fmt)
+{
+ char timestr[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+ char *header = NULL;
+ char *footer = NULL;
+ int ret = 0;
+
+ /* rotate if required */
+ gf_log_rotate(ctx);
+
+ /* format the time stamp */
+ gf_time_fmt_tv(timestr, sizeof timestr, &tv, gf_timefmt_FT);
+
+ /* generate footer */
+ if (errnum) {
+ ret = gf_asprintf(&footer, " [%s]\n", strerror(errnum));
+ } else {
+ ret = gf_asprintf(&footer, " \n");
+ }
+ if (-1 == ret) {
+ goto err;
+ }
+
+ /* generate message, inc. the header */
+ if (fmt == gf_logformat_traditional) {
+ if (!callstr) {
+ ret = gf_asprintf(&header,
+ "[%s] %c [%s:%d:%s]"
+ " %d-%s: %s",
+ timestr, gf_level_strings[level], file, line,
+ function, graph_id, domain, *appmsgstr);
+ } else {
+ ret = gf_asprintf(&header,
+ "[%s] %c [%s:%d:%s] %s"
+ " %d-%s: %s",
+ timestr, gf_level_strings[level], file, line,
+ function, callstr, graph_id, domain, *appmsgstr);
+ }
+ } else { /* gf_logformat_withmsgid */
+ /* CEE log format unsupported in logger_glusterlog, so just
+ * print enhanced log format */
+ if (!callstr) {
+ ret = gf_asprintf(&header,
+ "[%s] %c [MSGID: %" PRIu64
+ "]"
+ " [%s:%d:%s] %d-%s: %s",
+ timestr, gf_level_strings[level], msgid, file,
+ line, function, graph_id, domain, *appmsgstr);
+ } else {
+ ret = gf_asprintf(&header,
+ "[%s] %c [MSGID: %" PRIu64
+ "]"
+ " [%s:%d:%s] %s %d-%s: %s",
+ timestr, gf_level_strings[level], msgid, file,
+ line, function, callstr, graph_id, domain,
+ *appmsgstr);
+ }
+ }
+ if (-1 == ret) {
+ goto err;
+ }
+
+ /* send the full message to log */
+
+ pthread_mutex_lock(&ctx->log.logfile_mutex);
+ {
+ if (ctx->log.logfile) {
+ fprintf(ctx->log.logfile, "%s%s", header, footer);
+ fflush(ctx->log.logfile);
+ } else if (ctx->log.loglevel >= level) {
+ fprintf(stderr, "%s%s", header, footer);
+ fflush(stderr);
+ }
- if (msg) {
- GF_FREE (msg);
+#ifdef GF_LINUX_HOST_OS
+ /* We want only serious logs in 'syslog', not our debug
+ * and trace logs */
+ if (ctx->log.gf_log_syslog && level &&
+ (level <= ctx->log.sys_log_level)) {
+ syslog((level - 1), "%s%s", header, footer);
}
+#endif
+ }
- if (str1)
- GF_FREE (str1);
+ /* TODO: Plugin in memory log buffer retention here. For logs not
+ * flushed during cores, it would be useful to retain some of the last
+ * few messages in memory */
+ pthread_mutex_unlock(&ctx->log.logfile_mutex);
+ ret = 0;
- if (str2)
- FREE (str2);
+err:
+ GF_FREE(header);
+ GF_FREE(footer);
-out:
- return ret;
+ return ret;
}
-int
-_gf_log (const char *domain, const char *file, const char *function, int line,
- gf_loglevel_t level, const char *fmt, ...)
-{
- const char *basename = NULL;
- FILE *new_logfile = NULL;
- va_list ap;
- struct tm *tm = NULL;
- char timestr[256];
- struct timeval tv = {0,};
-
- char *str1 = NULL;
- char *str2 = NULL;
- char *msg = NULL;
- size_t len = 0;
- int ret = 0;
- int fd = -1;
- xlator_t *this = NULL;
-
- this = THIS;
-
- if (gf_log_xl_log_set) {
- if (this->loglevel && (level > this->loglevel))
- goto out;
- else if (level > gf_log_loglevel)
- goto out;
- }
-
- static char *level_strings[] = {"", /* NONE */
- "M", /* EMERGENCY */
- "A", /* ALERT */
- "C", /* CRITICAL */
- "E", /* ERROR */
- "W", /* WARNING */
- "N", /* NOTICE */
- "I", /* INFO */
- "D", /* DEBUG */
- "T", /* TRACE */
- ""};
-
- if (!domain || !file || !function || !fmt) {
- fprintf (stderr,
- "logging: %s:%s():%d: invalid argument\n",
- __FILE__, __PRETTY_FUNCTION__, __LINE__);
- return -1;
+static int
+gf_syslog_log_repetitions(const char *domain, const char *file,
+ const char *function, int32_t line,
+ gf_loglevel_t level, int errnum, uint64_t msgid,
+ char **appmsgstr, char *callstr, int refcount,
+ struct timeval oldest, struct timeval latest,
+ int graph_id)
+{
+ int priority;
+ char timestr_latest[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+ char timestr_oldest[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+
+ SET_LOG_PRIO(level, priority);
+
+ gf_time_fmt_tv(timestr_latest, sizeof timestr_latest, &latest,
+ gf_timefmt_FT);
+ gf_time_fmt_tv(timestr_oldest, sizeof timestr_oldest, &oldest,
+ gf_timefmt_FT);
+
+ if (errnum) {
+ syslog(priority,
+ "The message \"[MSGID: %" PRIu64
+ "] [%s:%d:%s] "
+ "%d-%s: %s [%s] \" repeated %d times between %s"
+ " and %s",
+ msgid, file, line, function, graph_id, domain, *appmsgstr,
+ strerror(errnum), refcount, timestr_oldest, timestr_latest);
+ } else {
+ syslog(priority,
+ "The message \"[MSGID: %" PRIu64
+ "] [%s:%d:%s] "
+ "%d-%s: %s \" repeated %d times between %s"
+ " and %s",
+ msgid, file, line, function, graph_id, domain, *appmsgstr,
+ refcount, timestr_oldest, timestr_latest);
+ }
+ return 0;
+}
+
+static int
+gf_glusterlog_log_repetitions(glusterfs_ctx_t *ctx, const char *domain,
+ const char *file, const char *function,
+ int32_t line, gf_loglevel_t level, int errnum,
+ uint64_t msgid, char **appmsgstr, char *callstr,
+ int refcount, struct timeval oldest,
+ struct timeval latest, int graph_id)
+{
+ int ret = 0;
+ char timestr_latest[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+ char timestr_oldest[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+ char errstr[256] = {
+ 0,
+ };
+ char *header = NULL;
+ char *footer = NULL;
+
+ if (!ctx)
+ goto err;
+
+ gf_log_rotate(ctx);
+
+ ret = gf_asprintf(&header,
+ "The message \"%c [MSGID: %" PRIu64
+ "]"
+ " [%s:%d:%s] %d-%s: %s",
+ gf_level_strings[level], msgid, file, line, function,
+ graph_id, domain, *appmsgstr);
+ if (-1 == ret) {
+ goto err;
+ }
+
+ gf_time_fmt_tv(timestr_latest, sizeof timestr_latest, &latest,
+ gf_timefmt_FT);
+
+ gf_time_fmt_tv(timestr_oldest, sizeof timestr_oldest, &oldest,
+ gf_timefmt_FT);
+
+ if (errnum)
+ snprintf(errstr, sizeof(errstr) - 1, " [%s]", strerror(errnum));
+
+ ret = gf_asprintf(&footer, "%s\" repeated %d times between [%s] and [%s]",
+ errstr, refcount, timestr_oldest, timestr_latest);
+ if (-1 == ret) {
+ ret = -1;
+ goto err;
+ }
+
+ pthread_mutex_lock(&ctx->log.logfile_mutex);
+ {
+ if (ctx->log.logfile) {
+ fprintf(ctx->log.logfile, "%s%s\n", header, footer);
+ fflush(ctx->log.logfile);
+ } else if (ctx->log.loglevel >= level) {
+ fprintf(stderr, "%s%s\n", header, footer);
+ fflush(stderr);
}
+#ifdef GF_LINUX_HOST_OS
+ /* We want only serious logs in 'syslog', not our debug
+ * and trace logs */
+ if (ctx->log.gf_log_syslog && level &&
+ (level <= ctx->log.sys_log_level))
+ syslog((level - 1), "%s%s\n", header, footer);
+#endif
+ }
- if (logrotate) {
- logrotate = 0;
+ /* TODO: Plugin in memory log buffer retention here. For logs not
+ * flushed during cores, it would be useful to retain some of the last
+ * few messages in memory */
+ pthread_mutex_unlock(&ctx->log.logfile_mutex);
+ ret = 0;
- fd = open (filename, O_CREAT | O_RDONLY, S_IRUSR | S_IWUSR);
- if (fd < 0) {
- gf_log ("logrotate", GF_LOG_ERROR,
- "%s", strerror (errno));
- return -1;
- }
- close (fd);
-
- new_logfile = fopen (filename, "a");
- if (!new_logfile) {
- gf_log ("logrotate", GF_LOG_CRITICAL,
- "failed to open logfile %s (%s)",
- filename, strerror (errno));
- goto log;
- }
+err:
+ GF_FREE(header);
+ GF_FREE(footer);
- if (logfile)
- fclose (logfile);
+ return ret;
+}
- gf_log_logfile = logfile = new_logfile;
- }
+static int
+gf_log_print_with_repetitions(glusterfs_ctx_t *ctx, const char *domain,
+ const char *file, const char *function,
+ int32_t line, gf_loglevel_t level, int errnum,
+ uint64_t msgid, char **appmsgstr, char *callstr,
+ int refcount, struct timeval oldest,
+ struct timeval latest, int graph_id)
+{
+ int ret = -1;
+ gf_log_logger_t logger = ctx->log.logger;
+
+ switch (logger) {
+ case gf_logger_syslog:
+ if (ctx->log.log_control_file_found && ctx->log.gf_log_syslog) {
+ ret = gf_syslog_log_repetitions(
+ domain, file, function, line, level, errnum, msgid,
+ appmsgstr, callstr, refcount, oldest, latest, graph_id);
+ break;
+ }
+ /* NOTE: If syslog control file is absent, which is another
+ * way to control logging to syslog, then we will fall through
+ * to the gluster log. The ideal way to do things would be to
+ * not have the extra control file check */
+
+ case gf_logger_glusterlog:
+ ret = gf_glusterlog_log_repetitions(
+ ctx, domain, file, function, line, level, errnum, msgid,
+ appmsgstr, callstr, refcount, oldest, latest, graph_id);
+ break;
+ }
+
+ return ret;
+}
-log:
- ret = gettimeofday (&tv, NULL);
- if (-1 == ret)
- goto out;
+static int
+gf_log_print_plain_fmt(glusterfs_ctx_t *ctx, const char *domain,
+ const char *file, const char *function, int32_t line,
+ gf_loglevel_t level, int errnum, uint64_t msgid,
+ char **appmsgstr, char *callstr, struct timeval tv,
+ int graph_id, gf_log_format_t fmt)
+{
+ int ret = -1;
+ gf_log_logger_t logger = 0;
+
+ logger = ctx->log.logger;
+
+ /* log to the configured logging service */
+ switch (logger) {
+ case gf_logger_syslog:
+ if (ctx->log.log_control_file_found && ctx->log.gf_log_syslog) {
+ ret = gf_log_syslog(ctx, domain, file, function, line, level,
+ errnum, msgid, appmsgstr, callstr, graph_id,
+ fmt);
+ break;
+ }
+ /* NOTE: If syslog control file is absent, which is another
+ * way to control logging to syslog, then we will fall through
+ * to the gluster log. The ideal way to do things would be to
+ * not have the extra control file check */
+ case gf_logger_glusterlog:
+ ret = gf_log_glusterlog(ctx, domain, file, function, line, level,
+ errnum, msgid, appmsgstr, callstr, tv,
+ graph_id, fmt);
+ break;
+ }
+
+ return ret;
+}
+
+void
+gf_log_flush_message(log_buf_t *buf, glusterfs_ctx_t *ctx)
+{
+ if (buf->refcount == 1) {
+ (void)gf_log_print_plain_fmt(ctx, buf->domain, buf->file, buf->function,
+ buf->line, buf->level, buf->errnum,
+ buf->msg_id, &buf->msg, NULL, buf->latest,
+ buf->graph_id, gf_logformat_withmsgid);
+ }
+
+ if (buf->refcount > 1) {
+ gf_log_print_with_repetitions(
+ ctx, buf->domain, buf->file, buf->function, buf->line, buf->level,
+ buf->errnum, buf->msg_id, &buf->msg, NULL, buf->refcount,
+ buf->oldest, buf->latest, buf->graph_id);
+ }
+ return;
+}
+
+static void
+gf_log_flush_list(struct list_head *copy, glusterfs_ctx_t *ctx)
+{
+ log_buf_t *iter = NULL;
+ log_buf_t *tmp = NULL;
+
+ list_for_each_entry_safe(iter, tmp, copy, msg_list)
+ {
+ gf_log_flush_message(iter, ctx);
+ list_del_init(&iter->msg_list);
+ log_buf_destroy(iter);
+ }
+}
- tm = localtime (&tv.tv_sec);
+void
+gf_log_flush_msgs(glusterfs_ctx_t *ctx)
+{
+ struct list_head copy;
+
+ INIT_LIST_HEAD(&copy);
- pthread_mutex_lock (&logfile_mutex);
+ pthread_mutex_lock(&ctx->log.log_buf_lock);
+ {
+ list_splice_init(&ctx->log.lru_queue, &copy);
+ ctx->log.lru_cur_size = 0;
+ }
+ pthread_mutex_unlock(&ctx->log.log_buf_lock);
+
+ gf_log_flush_list(&copy, ctx);
+
+ return;
+}
+
+static void
+gf_log_flush_extra_msgs(glusterfs_ctx_t *ctx, uint32_t new)
+{
+ int count = 0;
+ int i = 0;
+ log_buf_t *iter = NULL;
+ log_buf_t *tmp = NULL;
+ struct list_head copy;
+
+ INIT_LIST_HEAD(&copy);
+
+ /* If the number of outstanding log messages does not cause list
+ * overflow even after reducing the size of the list, then do nothing.
+ * Otherwise (that is if there are more items in the list than there
+ * need to be after reducing its size), move the least recently used
+ * 'diff' elements to be flushed into a separate list...
+ */
+
+ pthread_mutex_lock(&ctx->log.log_buf_lock);
+ {
+ if (ctx->log.lru_cur_size <= new)
+ goto unlock;
+ count = ctx->log.lru_cur_size - new;
+ list_for_each_entry_safe(iter, tmp, &ctx->log.lru_queue, msg_list)
{
- va_start (ap, fmt);
+ if (i == count)
+ break;
- strftime (timestr, 256, "%Y-%m-%d %H:%M:%S", tm);
- snprintf (timestr + strlen (timestr), 256 - strlen (timestr),
- ".%"GF_PRI_SUSECONDS, tv.tv_usec);
+ list_del_init(&iter->msg_list);
+ list_add_tail(&iter->msg_list, &copy);
+ i++;
+ }
+ ctx->log.lru_cur_size = ctx->log.lru_cur_size - count;
+ }
+ // ... quickly unlock ...
+unlock:
+ pthread_mutex_unlock(&ctx->log.log_buf_lock);
+ if (list_empty(&copy))
+ return;
+
+ TEST_LOG(
+ "Log buffer size reduced. About to flush %d extra log "
+ "messages",
+ count);
+ // ... and then flush them outside the lock.
+ gf_log_flush_list(&copy, ctx);
+ TEST_LOG("Just flushed %d extra log messages", count);
+
+ return;
+}
- basename = strrchr (file, '/');
- if (basename)
- basename++;
- else
- basename = file;
-
- ret = gf_asprintf (&str1, "[%s] %s [%s:%d:%s] %d-%s: ",
- timestr, level_strings[level],
- basename, line, function,
- ((this->graph)?this->graph->id:0), domain);
- if (-1 == ret) {
- goto unlock;
- }
+static int
+__gf_log_inject_timer_event(glusterfs_ctx_t *ctx)
+{
+ int ret = -1;
+ struct timespec timeout = {
+ 0,
+ };
- ret = vasprintf (&str2, fmt, ap);
- if (-1 == ret) {
- goto unlock;
- }
+ if (!ctx)
+ goto out;
- va_end (ap);
+ if (ctx->log.log_flush_timer) {
+ gf_timer_call_cancel(ctx, ctx->log.log_flush_timer);
+ ctx->log.log_flush_timer = NULL;
+ }
- len = strlen (str1);
- msg = GF_MALLOC (len + strlen (str2) + 1, gf_common_mt_char);
+ timeout.tv_sec = ctx->log.timeout;
+ timeout.tv_nsec = 0;
- strcpy (msg, str1);
- strcpy (msg + len, str2);
+ TEST_LOG("Starting timer now. Timeout = %u, current buf size = %d",
+ ctx->log.timeout, ctx->log.lru_size);
+ ctx->log.log_flush_timer = gf_timer_call_after(
+ ctx, timeout, gf_log_flush_timeout_cbk, (void *)ctx);
+ if (!ctx->log.log_flush_timer)
+ goto out;
- if (logfile) {
- fprintf (logfile, "%s\n", msg);
- fflush (logfile);
- } else {
- fprintf (stderr, "%s\n", msg);
- }
+ ret = 0;
-#ifdef GF_LINUX_HOST_OS
- /* We want only serious log in 'syslog', not our debug
- and trace logs */
- if (gf_log_syslog && level && (level <= sys_log_level))
- syslog ((level-1), "%s\n", msg);
-#endif
- }
+out:
+ return ret;
+}
-unlock:
- pthread_mutex_unlock (&logfile_mutex);
+int
+gf_log_inject_timer_event(glusterfs_ctx_t *ctx)
+{
+ int ret = -1;
+
+ if (!ctx)
+ return -1;
+
+ pthread_mutex_lock(&ctx->log.log_buf_lock);
+ {
+ ret = __gf_log_inject_timer_event(ctx);
+ }
+ pthread_mutex_unlock(&ctx->log.log_buf_lock);
+
+ return ret;
+}
+
+void
+gf_log_flush_timeout_cbk(void *data)
+{
+ glusterfs_ctx_t *ctx = NULL;
+
+ ctx = (glusterfs_ctx_t *)data;
+
+ TEST_LOG(
+ "Log timer timed out. About to flush outstanding messages if "
+ "present");
+ gf_log_flush_msgs(ctx);
+
+ (void)gf_log_inject_timer_event(ctx);
+
+ return;
+}
- if (msg) {
- GF_FREE (msg);
+static int
+_gf_msg_internal(const char *domain, const char *file, const char *function,
+ int32_t line, gf_loglevel_t level, int errnum, uint64_t msgid,
+ char **appmsgstr, char *callstr, int graph_id)
+{
+ int ret = -1;
+ uint32_t size = 0;
+ const char *basename = NULL;
+ xlator_t *this = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ log_buf_t *iter = NULL;
+ log_buf_t *buf_tmp = NULL;
+ log_buf_t *buf_new = NULL;
+ log_buf_t *first = NULL;
+ struct timeval tv = {
+ 0,
+ };
+ gf_boolean_t found = _gf_false;
+ gf_boolean_t flush_lru = _gf_false;
+ gf_boolean_t flush_logged_msg = _gf_false;
+
+ this = THIS;
+ ctx = this->ctx;
+
+ if (!ctx)
+ goto out;
+
+ GET_FILE_NAME_TO_LOG(file, basename);
+
+ ret = gettimeofday(&tv, NULL);
+ if (ret)
+ goto out;
+
+ /* If this function is called via _gf_msg_callingfn () (indicated by a
+ * non-NULL callstr), or if the logformat is traditional, flush the
+ * message directly to disk.
+ */
+
+ if ((callstr) || (ctx->log.logformat == gf_logformat_traditional)) {
+ ret = gf_log_print_plain_fmt(ctx, domain, basename, function, line,
+ level, errnum, msgid, appmsgstr, callstr,
+ tv, graph_id, gf_logformat_traditional);
+ goto out;
+ }
+
+ pthread_mutex_lock(&ctx->log.log_buf_lock);
+ {
+ /* Check if the msg being logged is already part of the list */
+ list_for_each_entry_safe_reverse(iter, buf_tmp, &ctx->log.lru_queue,
+ msg_list)
+ {
+ if (first == NULL)
+ // Remember the first (lru) element in first ptr
+ first = iter;
+
+ /* Try to fail the search early on by doing the less
+ * expensive integer comparisons and continue to string
+ * parameter comparisons only after all int parameters
+ * are found to be matching.
+ */
+ if (line != iter->line)
+ continue;
+
+ if (errnum != iter->errnum)
+ continue;
+
+ if (msgid != iter->msg_id)
+ continue;
+
+ if (level != iter->level)
+ continue;
+
+ if (graph_id != iter->graph_id)
+ continue;
+
+ if (strcmp(domain, iter->domain))
+ continue;
+
+ if (strcmp(basename, iter->file))
+ continue;
+
+ if (strcmp(function, iter->function))
+ continue;
+
+ if (strcmp(*appmsgstr, iter->msg))
+ continue;
+
+ // Ah! Found a match!
+ list_move_tail(&iter->msg_list, &ctx->log.lru_queue);
+ iter->refcount++;
+ found = _gf_true;
+ // Update the 'latest' timestamp.
+ memcpy((void *)&(iter->latest), (void *)&tv,
+ sizeof(struct timeval));
+ break;
+ }
+ if (found) {
+ ret = 0;
+ goto unlock;
+ }
+ // else ...
+
+ size = ctx->log.lru_size;
+ /* If the upper limit on the log buf size is 0, flush the msg to
+ * disk directly after unlock. There's no need to buffer the
+ * msg here.
+ */
+ if (size == 0) {
+ flush_logged_msg = _gf_true;
+ goto unlock;
+ } else if (((ctx->log.lru_cur_size + 1) > size) && (first)) {
+ /* If the list is full, flush the lru msg to disk and also
+ * release it after unlock, and ...
+ * */
+ if (first->refcount >= 1)
+ TEST_LOG(
+ "Buffer overflow of a buffer whose size limit "
+ "is %d. About to flush least recently used log"
+ " message to disk",
+ size);
+ list_del_init(&first->msg_list);
+ ctx->log.lru_cur_size--;
+ flush_lru = _gf_true;
+ }
+ /* create a new list element, initialise and enqueue it.
+ * Additionally, this being the first occurrence of the msg,
+ * log it directly to disk after unlock. */
+ buf_new = mem_get0(THIS->ctx->logbuf_pool);
+ if (!buf_new) {
+ ret = -1;
+ goto unlock;
+ }
+ ret = log_buf_init(buf_new, domain, basename, function, line, level,
+ errnum, msgid, appmsgstr, graph_id);
+ if (ret) {
+ log_buf_destroy(buf_new);
+ goto unlock;
}
- if (str1)
- GF_FREE (str1);
+ memcpy((void *)&(buf_new->latest), (void *)&tv, sizeof(struct timeval));
+ memcpy((void *)&(buf_new->oldest), (void *)&tv, sizeof(struct timeval));
- if (str2)
- FREE (str2);
+ list_add_tail(&buf_new->msg_list, &ctx->log.lru_queue);
+ ctx->log.lru_cur_size++;
+ flush_logged_msg = _gf_true;
+ ret = 0;
+ }
+unlock:
+ pthread_mutex_unlock(&ctx->log.log_buf_lock);
+
+ /* Value of @ret is a don't-care below since irrespective of success or
+ * failure post setting of @flush_lru, @first must be flushed and freed.
+ */
+ if (flush_lru) {
+ gf_log_flush_message(first, ctx);
+ log_buf_destroy(first);
+ }
+ /* Similarly, irrespective of whether all operations since setting of
+ * @flush_logged_msg were successful or not, flush the message being
+ * logged to disk in the plain format.
+ */
+ if (flush_logged_msg) {
+ ret = gf_log_print_plain_fmt(ctx, domain, basename, function, line,
+ level, errnum, msgid, appmsgstr, callstr,
+ tv, graph_id, gf_logformat_withmsgid);
+ }
out:
- return (0);
+ return ret;
}
int
-gf_log_eh (void *data)
+_gf_msg(const char *domain, const char *file, const char *function,
+ int32_t line, gf_loglevel_t level, int errnum, int trace,
+ uint64_t msgid, const char *fmt, ...)
{
- int ret = -1;
+ int ret = 0;
+ char *msgstr = NULL;
+ va_list ap;
+ xlator_t *this = THIS;
+ glusterfs_ctx_t *ctx = NULL;
+ char *callstr = NULL;
+ int log_inited = 0;
+
+ if (this == NULL)
+ return -1;
+
+ ctx = this->ctx;
+ if (ctx == NULL) {
+ /* messages before context initialization are ignored */
+ return -1;
+ }
+
+ /* check if we should be logging */
+ if (skip_logging(this, level))
+ goto out;
+
+ /* in args check */
+ if (!domain || !file || !function || !fmt) {
+ fprintf(stderr, "logging: %s:%s():%d: invalid argument\n", __FILE__,
+ __PRETTY_FUNCTION__, __LINE__);
+ return -1;
+ }
+
+ /* form the message */
+ va_start(ap, fmt);
+ ret = vasprintf(&msgstr, fmt, ap);
+ va_end(ap);
+
+ /* log */
+ if (ret != -1) {
+ if (trace) {
+ callstr = GF_MALLOC(GF_LOG_BACKTRACE_SIZE, gf_common_mt_char);
+ if (callstr == NULL)
+ return -1;
- ret = eh_save_history (THIS->history, data);
+ ret = _gf_msg_backtrace(GF_LOG_BACKTRACE_DEPTH, callstr,
+ GF_LOG_BACKTRACE_SIZE);
+ if (ret < 0) {
+ GF_FREE(callstr);
+ callstr = NULL;
+ }
+ }
- return ret;
+ pthread_mutex_lock(&ctx->log.logfile_mutex);
+ {
+ if (ctx->log.logfile) {
+ log_inited = 1;
+ }
+ }
+ pthread_mutex_unlock(&ctx->log.logfile_mutex);
+
+ if (!log_inited && ctx->log.gf_log_syslog) {
+ ret = gf_log_syslog(
+ ctx, domain, file, function, line, level, errnum, msgid,
+ &msgstr, (callstr ? callstr : NULL),
+ (this->graph) ? this->graph->id : 0, gf_logformat_traditional);
+ } else {
+ ret = _gf_msg_internal(domain, file, function, line, level, errnum,
+ msgid, &msgstr, (callstr ? callstr : NULL),
+ (this->graph) ? this->graph->id : 0);
+ }
+ } else {
+ /* man (3) vasprintf states on error strp contents
+ * are undefined, be safe */
+ msgstr = NULL;
+ }
+ if (callstr)
+ GF_FREE(callstr);
+ FREE(msgstr);
+
+out:
+ return ret;
}
+/* TODO: Deprecate (delete) _gf_log, _gf_log_callingfn,
+ * once messages are changed to use _gf_msgXXX APIs for logging */
int
-gf_cmd_log_init (const char *filename)
+_gf_log(const char *domain, const char *file, const char *function, int line,
+ gf_loglevel_t level, const char *fmt, ...)
{
- int fd = -1;
- xlator_t *this = NULL;
+ const char *basename = NULL;
+ FILE *new_logfile = NULL;
+ va_list ap;
+ char timestr[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+ struct timeval tv = {
+ 0,
+ };
+ char *logline = NULL;
+ char *msg = NULL;
+ int ret = 0;
+ int fd = -1;
+ xlator_t *this = THIS;
+ glusterfs_ctx_t *ctx = this->ctx;
+
+ if (!ctx)
+ goto out;
+
+ if (skip_logging(this, level))
+ goto out;
+
+ if (!domain || !file || !function || !fmt) {
+ fprintf(stderr, "logging: %s:%s():%d: invalid argument\n", __FILE__,
+ __PRETTY_FUNCTION__, __LINE__);
+ return -1;
+ }
+
+ basename = strrchr(file, '/');
+ if (basename)
+ basename++;
+ else
+ basename = file;
+
+ va_start(ap, fmt);
+ ret = vasprintf(&msg, fmt, ap);
+ va_end(ap);
+ if (-1 == ret) {
+ goto err;
+ }
+
+ if (ctx->log.log_control_file_found) {
+ int priority;
+ /* treat GF_LOG_TRACE and GF_LOG_NONE as LOG_DEBUG and
+ other level as is */
+ if (GF_LOG_TRACE == level || GF_LOG_NONE == level) {
+ priority = LOG_DEBUG;
+ } else {
+ priority = level - 1;
+ }
- this = THIS;
+ gf_syslog(priority, "[%s:%d:%s] %d-%s: %s", basename, line, function,
+ ((this->graph) ? this->graph->id : 0), domain, msg);
+ goto err;
+ }
- if (!filename){
- gf_log (this->name, GF_LOG_CRITICAL, "gf_cmd_log_init: no "
- "filename specified\n");
- return -1;
+ if (ctx->log.logrotate) {
+ ctx->log.logrotate = 0;
+
+ fd = sys_open(ctx->log.filename, O_CREAT | O_RDONLY, S_IRUSR | S_IWUSR);
+ if (fd < 0) {
+ gf_smsg("logrotate", GF_LOG_ERROR, errno,
+ LG_MSG_OPEN_LOGFILE_FAILED, NULL);
+ return -1;
+ }
+ sys_close(fd);
+
+ new_logfile = fopen(ctx->log.filename, "a");
+ if (!new_logfile) {
+ gf_smsg("logrotate", GF_LOG_CRITICAL, errno,
+ LG_MSG_OPEN_LOGFILE_FAILED, "filename=%s",
+ ctx->log.filename, NULL);
+ goto log;
}
- cmd_log_filename = gf_strdup (filename);
- if (!cmd_log_filename) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "gf_cmd_log_init: strdup error\n");
- return -1;
+ pthread_mutex_lock(&ctx->log.logfile_mutex);
+ {
+ if (ctx->log.logfile)
+ fclose(ctx->log.logfile);
+
+ ctx->log.gf_log_logfile = ctx->log.logfile = new_logfile;
}
- /* close and reopen cmdlogfile for log rotate*/
- if (cmdlogfile) {
- fclose (cmdlogfile);
- cmdlogfile = NULL;
+ pthread_mutex_unlock(&ctx->log.logfile_mutex);
+ }
+
+log:
+ ret = gettimeofday(&tv, NULL);
+ if (-1 == ret)
+ goto out;
+
+ gf_time_fmt_tv(timestr, sizeof timestr, &tv, gf_timefmt_FT);
+
+ ret = gf_asprintf(&logline, "[%s] %c [%s:%d:%s] %d-%s: %s\n", timestr,
+ gf_level_strings[level], basename, line, function,
+ ((this->graph) ? this->graph->id : 0), domain, msg);
+ if (-1 == ret) {
+ goto err;
+ }
+
+ pthread_mutex_lock(&ctx->log.logfile_mutex);
+ {
+ if (ctx->log.logfile) {
+ fputs(logline, ctx->log.logfile);
+ fflush(ctx->log.logfile);
+ } else if (ctx->log.loglevel >= level) {
+ fputs(logline, stderr);
+ fflush(stderr);
}
- fd = open (cmd_log_filename, O_CREAT | O_RDONLY, S_IRUSR | S_IWUSR);
+#ifdef GF_LINUX_HOST_OS
+ /* We want only serious log in 'syslog', not our debug
+ and trace logs */
+ if (ctx->log.gf_log_syslog && level &&
+ (level <= ctx->log.sys_log_level))
+ syslog((level - 1), "%s", logline);
+#endif
+ }
+
+ pthread_mutex_unlock(&ctx->log.logfile_mutex);
+
+err:
+ GF_FREE(logline);
+
+ FREE(msg);
+
+out:
+ return (0);
+}
+
+int
+_gf_log_eh(const char *function, const char *fmt, ...)
+{
+ int ret = -1;
+ va_list ap;
+ char *logline = NULL;
+ char *msg = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ va_start(ap, fmt);
+ ret = vasprintf(&msg, fmt, ap);
+ va_end(ap);
+ if (-1 == ret) {
+ goto out;
+ }
+
+ ret = gf_asprintf(&logline, "[%d] %s: %s",
+ ((this->graph) ? this->graph->id : 0), function, msg);
+ if (-1 == ret) {
+ goto out;
+ }
+
+ ret = eh_save_history(this->history, logline);
+
+out:
+ GF_FREE(logline);
+
+ FREE(msg);
+
+ return ret;
+}
+
+int
+gf_cmd_log_init(const char *filename)
+{
+ int fd = -1;
+ xlator_t *this = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+
+ this = THIS;
+ ctx = this->ctx;
+
+ if (!ctx)
+ return -1;
+
+ if (!filename) {
+ gf_smsg(this->name, GF_LOG_CRITICAL, 0, LG_MSG_FILENAME_NOT_SPECIFIED,
+ "gf_cmd_log_init", NULL);
+ return -1;
+ }
+
+ ctx->log.cmd_log_filename = gf_strdup(filename);
+ if (!ctx->log.cmd_log_filename) {
+ return -1;
+ }
+ /* close and reopen cmdlogfile for log rotate*/
+ if (ctx->log.cmdlogfile) {
+ fclose(ctx->log.cmdlogfile);
+ ctx->log.cmdlogfile = NULL;
+ }
+
+ fd = sys_open(ctx->log.cmd_log_filename, O_CREAT | O_WRONLY | O_APPEND,
+ S_IRUSR | S_IWUSR);
+ if (fd < 0) {
+ gf_smsg(this->name, GF_LOG_CRITICAL, errno, LG_MSG_OPEN_LOGFILE_FAILED,
+ "cmd_log_file", NULL);
+ return -1;
+ }
+
+ ctx->log.cmdlogfile = fdopen(fd, "a");
+ if (!ctx->log.cmdlogfile) {
+ gf_smsg(this->name, GF_LOG_CRITICAL, errno, LG_MSG_OPEN_LOGFILE_FAILED,
+ "gf_cmd_log_init: %s", ctx->log.cmd_log_filename, NULL);
+ sys_close(fd);
+ return -1;
+ }
+ return 0;
+}
+
+int
+gf_cmd_log(const char *domain, const char *fmt, ...)
+{
+ va_list ap;
+ char timestr[GF_TIMESTR_SIZE];
+ struct timeval tv = {
+ 0,
+ };
+ char *logline = NULL;
+ char *msg = NULL;
+ int ret = 0;
+ int fd = -1;
+ glusterfs_ctx_t *ctx = NULL;
+
+ ctx = THIS->ctx;
+
+ if (!ctx)
+ return -1;
+
+ if (!ctx->log.cmdlogfile)
+ return -1;
+
+ if (!domain || !fmt) {
+ gf_msg_trace("glusterd", 0, "logging: invalid argument\n");
+ return -1;
+ }
+
+ ret = gettimeofday(&tv, NULL);
+ if (ret == -1)
+ goto out;
+ va_start(ap, fmt);
+ ret = vasprintf(&msg, fmt, ap);
+ va_end(ap);
+ if (ret == -1) {
+ goto out;
+ }
+
+ gf_time_fmt_tv(timestr, sizeof timestr, &tv, gf_timefmt_FT);
+
+ ret = gf_asprintf(&logline, "[%s] %s : %s\n", timestr, domain, msg);
+ if (ret == -1) {
+ goto out;
+ }
+
+ /* close and reopen cmdlogfile fd for in case of log rotate*/
+ if (ctx->log.cmd_history_logrotate) {
+ ctx->log.cmd_history_logrotate = 0;
+
+ if (ctx->log.cmdlogfile) {
+ fclose(ctx->log.cmdlogfile);
+ ctx->log.cmdlogfile = NULL;
+ }
+
+ fd = sys_open(ctx->log.cmd_log_filename, O_CREAT | O_WRONLY | O_APPEND,
+ S_IRUSR | S_IWUSR);
if (fd < 0) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "%s", strerror (errno));
- return -1;
+ gf_smsg(THIS->name, GF_LOG_CRITICAL, errno,
+ LG_MSG_OPEN_LOGFILE_FAILED, "name=%s",
+ ctx->log.cmd_log_filename, NULL);
+ ret = -1;
+ goto out;
}
- close (fd);
- cmdlogfile = fopen (cmd_log_filename, "a");
- if (!cmdlogfile){
- gf_log (this->name, GF_LOG_CRITICAL,
- "gf_cmd_log_init: failed to open logfile \"%s\" "
- "(%s)\n", cmd_log_filename, strerror (errno));
- return -1;
+ ctx->log.cmdlogfile = fdopen(fd, "a");
+ if (!ctx->log.cmdlogfile) {
+ gf_smsg(THIS->name, GF_LOG_CRITICAL, errno,
+ LG_MSG_OPEN_LOGFILE_FAILED, "name=%s",
+ ctx->log.cmd_log_filename, NULL);
+ ret = -1;
+ sys_close(fd);
+ goto out;
}
- return 0;
+ }
+
+ fputs(logline, ctx->log.cmdlogfile);
+ fflush(ctx->log.cmdlogfile);
+
+out:
+ GF_FREE(logline);
+
+ FREE(msg);
+
+ return ret;
}
-int
-gf_cmd_log (const char *domain, const char *fmt, ...)
-{
- va_list ap;
- struct tm *tm = NULL;
- char timestr[256];
- struct timeval tv = {0,};
- char *str1 = NULL;
- char *str2 = NULL;
- char *msg = NULL;
- size_t len = 0;
- int ret = 0;
-
- if (!cmdlogfile)
- return -1;
+static int
+_do_slog_format(int errnum, const char *event, va_list inp, char **msg)
+{
+ va_list valist_tmp;
+ int i = 0;
+ int j = 0;
+ int k = 0;
+ int ret = 0;
+ char *fmt = NULL;
+ char *buffer = NULL;
+ int num_format_chars = 0;
+ char format_char = '%';
+ char *tmp1 = NULL;
+ char *tmp2 = NULL;
+ char temp_sep[3] = "";
+
+ tmp2 = gf_strdup("");
+ if (!tmp2) {
+ ret = -1;
+ goto out;
+ }
+
+ /* Hardcoded value for max key value pairs, exits early */
+ /* from loop if found NULL */
+ for (i = 0; i < GF_MAX_SLOG_PAIR_COUNT; i++) {
+ fmt = va_arg(inp, char *);
+ if (fmt == NULL) {
+ break;
+ }
+ /* Get number of times % is used in input for formatting, */
+ /* this count will be used to skip those many args from the */
+ /* main list and will be used to format inner format */
+ num_format_chars = 0;
+ for (k = 0; fmt[k] != '\0'; k++) {
+ /* If %% is used then that is escaped */
+ if (fmt[k] == format_char && fmt[k + 1] == format_char) {
+ k++;
+ } else if (fmt[k] == format_char) {
+ num_format_chars++;
+ }
+ }
- if (!domain || !fmt) {
- gf_log ("glusterd", GF_LOG_TRACE,
- "logging: invalid argument\n");
- return -1;
+ tmp1 = gf_strdup(tmp2);
+ if (!tmp1) {
+ ret = -1;
+ goto out;
}
- ret = gettimeofday (&tv, NULL);
- if (ret == -1)
- goto out;
+ GF_FREE(tmp2);
+ tmp2 = NULL;
- tm = localtime (&tv.tv_sec);
+ if (num_format_chars > 0) {
+ /* Make separate valist and format the string */
+ va_copy(valist_tmp, inp);
+ ret = gf_vasprintf(&buffer, fmt, valist_tmp);
+ if (ret < 0) {
+ va_end(valist_tmp);
+ goto out;
+ }
+ va_end(valist_tmp);
- va_start (ap, fmt);
- strftime (timestr, 256, "%Y-%m-%d %H:%M:%S", tm);
- snprintf (timestr + strlen (timestr), 256 - strlen (timestr),
- ".%"GF_PRI_SUSECONDS, tv.tv_usec);
+ for (j = 0; j < num_format_chars; j++) {
+ /* Skip the va_arg value since these values
+ are already used for internal formatting */
+ (void)va_arg(inp, void *);
+ }
- ret = gf_asprintf (&str1, "[%s] %s : ",
- timestr, domain);
- if (ret == -1) {
+ ret = gf_asprintf(&tmp2, "%s%s{%s}", tmp1, temp_sep, buffer);
+ if (ret < 0)
goto out;
- }
- ret = vasprintf (&str2, fmt, ap);
- if (ret == -1) {
+ GF_FREE(buffer);
+ buffer = NULL;
+ } else {
+ ret = gf_asprintf(&tmp2, "%s%s{%s}", tmp1, temp_sep, fmt);
+ if (ret < 0)
goto out;
}
- va_end (ap);
+ /* Set seperator for next iteration */
+ temp_sep[0] = ',';
+ temp_sep[1] = ' ';
+ temp_sep[2] = 0;
+
+ GF_FREE(tmp1);
+ tmp1 = NULL;
+ }
+
+ tmp1 = gf_strdup(tmp2);
+ if (!tmp1) {
+ ret = -1;
+ goto out;
+ }
+ GF_FREE(tmp2);
+ tmp2 = NULL;
+
+ if (errnum) {
+ ret = gf_asprintf(&tmp2, "%s [%s%s{errno=%d}, {error=%s}]", event, tmp1,
+ temp_sep, errnum, strerror(errnum));
+ } else {
+ ret = gf_asprintf(&tmp2, "%s [%s]", event, tmp1);
+ }
+
+ if (ret == -1)
+ goto out;
+
+ *msg = gf_strdup(tmp2);
+ if (!*msg)
+ ret = -1;
- len = strlen (str1);
- msg = GF_MALLOC (len + strlen (str2) + 1, gf_common_mt_char);
+out:
+ if (buffer)
+ GF_FREE(buffer);
- strcpy (msg, str1);
- strcpy (msg + len, str2);
+ if (tmp1)
+ GF_FREE(tmp1);
- fprintf (cmdlogfile, "%s\n", msg);
- fflush (cmdlogfile);
+ if (tmp2)
+ GF_FREE(tmp2);
-out:
- if (msg) {
- GF_FREE (msg);
- }
+ return ret;
+}
+
+int
+_gf_smsg(const char *domain, const char *file, const char *function,
+ int32_t line, gf_loglevel_t level, int errnum, int trace,
+ uint64_t msgid, const char *event, ...)
+{
+ va_list valist;
+ char *msg = NULL;
+ int ret = 0;
+ xlator_t *this = THIS;
- if (str1)
- GF_FREE (str1);
+ if (skip_logging(this, level))
+ return ret;
- if (str2)
- FREE (str2);
+ va_start(valist, event);
+ ret = _do_slog_format(errnum, event, valist, &msg);
+ if (ret == -1)
+ goto out;
- return (0);
+ /* Pass errnum as zero since it is already formated as required */
+ ret = _gf_msg(domain, file, function, line, level, 0, trace, msgid, "%s",
+ msg);
+
+out:
+ va_end(valist);
+ if (msg)
+ GF_FREE(msg);
+ return ret;
}
diff --git a/libglusterfs/src/logging.h b/libglusterfs/src/logging.h
deleted file mode 100644
index bbf0d9a3891..00000000000
--- a/libglusterfs/src/logging.h
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef __LOGGING_H__
-#define __LOGGING_H__
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <stdint.h>
-#include <stdio.h>
-#include <stdarg.h>
-
-#ifdef GF_DARWIN_HOST_OS
-#define GF_PRI_FSBLK "u"
-#define GF_PRI_DEV PRId32
-#define GF_PRI_NLINK PRIu16
-#define GF_PRI_SUSECONDS "06d"
-#else
-#define GF_PRI_FSBLK PRIu64
-#define GF_PRI_DEV PRIu64
-#define GF_PRI_NLINK PRIu32
-#define GF_PRI_SUSECONDS "06ld"
-#endif
-#define GF_PRI_BLKSIZE PRId32
-#define GF_PRI_SIZET "zu"
-
-#if 0
-/* Syslog definitions :-) */
-#define LOG_EMERG 0 /* system is unusable */
-#define LOG_ALERT 1 /* action must be taken immediately */
-#define LOG_CRIT 2 /* critical conditions */
-#define LOG_ERR 3 /* error conditions */
-#define LOG_WARNING 4 /* warning conditions */
-#define LOG_NOTICE 5 /* normal but significant condition */
-#define LOG_INFO 6 /* informational */
-#define LOG_DEBUG 7 /* debug-level messages */
-#endif
-
-typedef enum {
- GF_LOG_NONE,
- GF_LOG_EMERG,
- GF_LOG_ALERT,
- GF_LOG_CRITICAL, /* fatal errors */
- GF_LOG_ERROR, /* major failures (not necessarily fatal) */
- GF_LOG_WARNING, /* info about normal operation */
- GF_LOG_NOTICE,
- GF_LOG_INFO, /* Normal information */
- GF_LOG_DEBUG, /* internal errors */
- GF_LOG_TRACE, /* full trace of operation */
-} gf_loglevel_t;
-
-extern gf_loglevel_t gf_log_loglevel;
-extern char gf_log_xl_log_set;
-
-#define FMT_WARN(fmt...) do { if (0) printf (fmt); } while (0)
-
-#define gf_log(dom, levl, fmt...) do { \
- FMT_WARN (fmt); \
- \
- if ((levl > gf_log_loglevel) && !gf_log_xl_log_set) \
- break; \
- _gf_log (dom, __FILE__, __FUNCTION__, __LINE__, \
- levl, ##fmt); \
- } while (0)
-
-#define gf_log_callingfn(dom, levl, fmt...) do { \
- FMT_WARN (fmt); \
- \
- if ((levl > gf_log_loglevel) && !gf_log_xl_log_set) \
- break; \
- _gf_log_callingfn (dom, __FILE__, __FUNCTION__, __LINE__, \
- levl, ##fmt); \
- } while (0)
-
-
-/* No malloc or calloc should be called in this function */
-#define gf_log_nomem(dom, levl, size) do { \
- if ((levl > gf_log_loglevel) && !gf_log_xl_log_set) \
- break; \
- _gf_log_nomem (dom, __FILE__, __FUNCTION__, __LINE__, \
- levl, size); \
- } while (0)
-
-
-/* Log once in GF_UNIVERSAL_ANSWER times */
-#define GF_LOG_OCCASIONALLY(var, args...) if (!(var++%GF_UNIVERSAL_ANSWER)) { \
- gf_log (args); \
- }
-
-
-void gf_log_logrotate (int signum);
-int
-gf_log_eh (void *data);
-void gf_log_globals_init (void);
-int gf_log_init (const char *filename);
-void gf_log_cleanup (void);
-
-int _gf_log (const char *domain, const char *file,
- const char *function, int32_t line, gf_loglevel_t level,
- const char *fmt, ...)
- __attribute__ ((__format__ (__printf__, 6, 7)));
-int _gf_log_callingfn (const char *domain, const char *file,
- const char *function, int32_t line, gf_loglevel_t level,
- const char *fmt, ...)
- __attribute__ ((__format__ (__printf__, 6, 7)));
-
-int _gf_log_nomem (const char *domain, const char *file,
- const char *function, int line, gf_loglevel_t level,
- size_t size);
-
-int gf_log_from_client (const char *msg, char *identifier);
-
-void gf_log_lock (void);
-void gf_log_unlock (void);
-
-void gf_log_disable_syslog (void);
-void gf_log_enable_syslog (void);
-gf_loglevel_t gf_log_get_loglevel (void);
-void gf_log_set_loglevel (gf_loglevel_t level);
-gf_loglevel_t gf_log_get_xl_loglevel (void *xl);
-void gf_log_set_xl_loglevel (void *xl, gf_loglevel_t level);
-
-#define GF_DEBUG(xl, format, args...) \
- gf_log ((xl)->name, GF_LOG_DEBUG, format, ##args)
-#define GF_INFO(xl, format, args...) \
- gf_log ((xl)->name, GF_LOG_INFO, format, ##args)
-#define GF_WARNING(xl, format, args...) \
- gf_log ((xl)->name, GF_LOG_WARNING, format, ##args)
-#define GF_ERROR(xl, format, args...) \
- gf_log ((xl)->name, GF_LOG_ERROR, format, ##args)
-
-int gf_cmd_log (const char *domain, const char *fmt, ...)
- __attribute__ ((__format__ (__printf__, 2, 3)));
-
-int gf_cmd_log_init (const char *filename);
-
-void set_sys_log_level (gf_loglevel_t level);
-#endif /* __LOGGING_H__ */
diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c
index 3e8100c6438..2d5a12b0a00 100644
--- a/libglusterfs/src/mem-pool.c
+++ b/libglusterfs/src/mem-pool.c
@@ -8,579 +8,925 @@
cases as published by the Free Software Foundation.
*/
-#include "mem-pool.h"
-#include "logging.h"
-#include "xlator.h"
+#include "glusterfs/mem-pool.h"
+#include "glusterfs/common-utils.h" // for GF_ASSERT, gf_thread_cr...
+#include "glusterfs/globals.h" // for xlator_t, THIS
#include <stdlib.h>
#include <stdarg.h>
-#define GF_MEM_POOL_LIST_BOUNDARY (sizeof(struct list_head))
-#define GF_MEM_POOL_PTR (sizeof(struct mem_pool*))
-#define GF_MEM_POOL_PAD_BOUNDARY (GF_MEM_POOL_LIST_BOUNDARY + GF_MEM_POOL_PTR + sizeof(int))
-#define mem_pool_chunkhead2ptr(head) ((head) + GF_MEM_POOL_PAD_BOUNDARY)
-#define mem_pool_ptr2chunkhead(ptr) ((ptr) - GF_MEM_POOL_PAD_BOUNDARY)
-#define is_mem_chunk_in_use(ptr) (*ptr == 1)
-#define mem_pool_from_ptr(ptr) ((ptr) + GF_MEM_POOL_LIST_BOUNDARY)
+#include "unittest/unittest.h"
+#include "glusterfs/libglusterfs-messages.h"
-#define GF_MEM_HEADER_SIZE (4 + sizeof (size_t) + sizeof (xlator_t *) + 4 + 8)
-#define GF_MEM_TRAILER_SIZE 8
+void
+gf_mem_acct_enable_set(void *data)
+{
+ glusterfs_ctx_t *ctx = NULL;
-#define GF_MEM_HEADER_MAGIC 0xCAFEBABE
-#define GF_MEM_TRAILER_MAGIC 0xBAADF00D
+ REQUIRE(data != NULL);
-#define GLUSTERFS_ENV_MEM_ACCT_STR "GLUSTERFS_DISABLE_MEM_ACCT"
+ ctx = data;
-static int gf_mem_acct_enable = 0;
+ GF_ASSERT(ctx != NULL);
-int
-gf_mem_acct_is_enabled ()
-{
- return gf_mem_acct_enable;
+ ctx->mem_acct_enable = 1;
+
+ ENSURE(1 == ctx->mem_acct_enable);
+
+ return;
}
-void
-gf_mem_acct_enable_set ()
+static void *
+gf_mem_header_prepare(struct mem_header *header, size_t size)
{
-#ifdef DEBUG
- gf_mem_acct_enable = 1;
- return;
-#endif
- glusterfs_ctx_t *ctx = NULL;
- char *opt = NULL;
- long val = -1;
-
- gf_mem_acct_enable = 0;
+ void *ptr;
- ctx = glusterfs_ctx_get ();
+ header->size = size;
- if (ctx->mem_accounting) {
- gf_mem_acct_enable = 1;
- return;
- }
+ ptr = header + 1;
- opt = getenv (GLUSTERFS_ENV_MEM_ACCT_STR);
- if (opt) {
- val = strtol (opt, NULL, 0);
- if (val)
- gf_mem_acct_enable = 1;
- }
+ /* data follows in this gap of 'size' bytes */
+ *(uint32_t *)(ptr + size) = GF_MEM_TRAILER_MAGIC;
- return;
+ return ptr;
}
-void
-gf_mem_set_acct_info (xlator_t *xl, char **alloc_ptr,
- size_t size, uint32_t type)
+static void *
+gf_mem_set_acct_info(struct mem_acct *mem_acct, struct mem_header *header,
+ size_t size, uint32_t type, const char *typestr)
{
+ struct mem_acct_rec *rec = NULL;
+ bool new_ref = false;
- char *ptr = NULL;
+ if (mem_acct != NULL) {
+ GF_ASSERT(type <= mem_acct->num_types);
- if (!alloc_ptr)
- return;
-
- ptr = (char *) (*alloc_ptr);
+ rec = &mem_acct->rec[type];
+ LOCK(&rec->lock);
+ {
+ if (!rec->typestr) {
+ rec->typestr = typestr;
+ }
+ rec->size += size;
+ new_ref = (rec->num_allocs == 0);
+ rec->num_allocs++;
+ rec->total_allocs++;
+ rec->max_size = max(rec->max_size, rec->size);
+ rec->max_num_allocs = max(rec->max_num_allocs, rec->num_allocs);
- if (!xl) {
- GF_ASSERT (0);
+#ifdef DEBUG
+ list_add(&header->acct_list, &rec->obj_list);
+#endif
}
+ UNLOCK(&rec->lock);
- if (!(xl->mem_acct.rec)) {
- GF_ASSERT (0);
+ /* We only take a reference for each memory type used, not for each
+ * allocation. This minimizes the use of atomic operations. */
+ if (new_ref) {
+ GF_ATOMIC_INC(mem_acct->refcnt);
}
+ }
- if (type > xl->mem_acct.num_types) {
- GF_ASSERT (0);
- }
+ header->type = type;
+ header->mem_acct = mem_acct;
+ header->magic = GF_MEM_HEADER_MAGIC;
+
+ return gf_mem_header_prepare(header, size);
+}
- LOCK(&xl->mem_acct.rec[type].lock);
+static void *
+gf_mem_update_acct_info(struct mem_acct *mem_acct, struct mem_header *header,
+ size_t size)
+{
+ struct mem_acct_rec *rec = NULL;
+
+ if (mem_acct != NULL) {
+ rec = &mem_acct->rec[header->type];
+ LOCK(&rec->lock);
{
- xl->mem_acct.rec[type].size += size;
- xl->mem_acct.rec[type].num_allocs++;
- xl->mem_acct.rec[type].total_allocs++;
- xl->mem_acct.rec[type].max_size =
- max (xl->mem_acct.rec[type].max_size,
- xl->mem_acct.rec[type].size);
- xl->mem_acct.rec[type].max_num_allocs =
- max (xl->mem_acct.rec[type].max_num_allocs,
- xl->mem_acct.rec[type].num_allocs);
+ rec->size += size - header->size;
+ rec->total_allocs++;
+ rec->max_size = max(rec->max_size, rec->size);
+
+#ifdef DEBUG
+ /* The old 'header' already was present in 'obj_list', but
+ * realloc() could have changed its address. We need to remove
+ * the old item from the list and add the new one. This can be
+ * done this way because list_move() doesn't use the pointers
+ * to the old location (which are not valid anymore) already
+ * present in the list, it simply overwrites them. */
+ list_move(&header->acct_list, &rec->obj_list);
+#endif
}
- UNLOCK(&xl->mem_acct.rec[type].lock);
-
- *(uint32_t *)(ptr) = type;
- ptr = ptr + 4;
- memcpy (ptr, &size, sizeof(size_t));
- ptr += sizeof (size_t);
- memcpy (ptr, &xl, sizeof(xlator_t *));
- ptr += sizeof (xlator_t *);
- *(uint32_t *)(ptr) = GF_MEM_HEADER_MAGIC;
- ptr = ptr + 4;
- ptr = ptr + 8; //padding
- *(uint32_t *) (ptr + size) = GF_MEM_TRAILER_MAGIC;
-
- *alloc_ptr = (void *)ptr;
- return;
+ UNLOCK(&rec->lock);
+ }
+
+ return gf_mem_header_prepare(header, size);
}
+static bool
+gf_mem_acct_enabled(void)
+{
+ xlator_t *x = THIS;
+ /* Low-level __gf_xxx() may be called
+ before ctx is initialized. */
+ return x->ctx && x->ctx->mem_acct_enable;
+}
void *
-__gf_calloc (size_t nmemb, size_t size, uint32_t type)
+__gf_calloc(size_t nmemb, size_t size, uint32_t type, const char *typestr)
{
- size_t tot_size = 0;
- size_t req_size = 0;
- char *ptr = NULL;
- xlator_t *xl = NULL;
+ size_t tot_size = 0;
+ size_t req_size = 0;
+ void *ptr = NULL;
+ xlator_t *xl = NULL;
- if (!gf_mem_acct_enable)
- return CALLOC (nmemb, size);
+ if (!gf_mem_acct_enabled())
+ return CALLOC(nmemb, size);
- xl = THIS;
+ xl = THIS;
- req_size = nmemb * size;
- tot_size = req_size + GF_MEM_HEADER_SIZE + GF_MEM_TRAILER_SIZE;
+ req_size = nmemb * size;
+ tot_size = req_size + GF_MEM_HEADER_SIZE + GF_MEM_TRAILER_SIZE;
- ptr = calloc (1, tot_size);
+ ptr = calloc(1, tot_size);
- if (!ptr) {
- gf_log_nomem ("", GF_LOG_ALERT, tot_size);
- return NULL;
- }
- gf_mem_set_acct_info (xl, &ptr, req_size, type);
+ if (!ptr) {
+ gf_msg_nomem("", GF_LOG_ALERT, tot_size);
+ return NULL;
+ }
- return (void *)ptr;
+ return gf_mem_set_acct_info(xl->mem_acct, ptr, req_size, type, typestr);
}
void *
-__gf_malloc (size_t size, uint32_t type)
+__gf_malloc(size_t size, uint32_t type, const char *typestr)
{
- size_t tot_size = 0;
- char *ptr = NULL;
- xlator_t *xl = NULL;
+ size_t tot_size = 0;
+ void *ptr = NULL;
+ xlator_t *xl = NULL;
- if (!gf_mem_acct_enable)
- return MALLOC (size);
+ if (!gf_mem_acct_enabled())
+ return MALLOC(size);
- xl = THIS;
+ xl = THIS;
- tot_size = size + GF_MEM_HEADER_SIZE + GF_MEM_TRAILER_SIZE;
+ tot_size = size + GF_MEM_HEADER_SIZE + GF_MEM_TRAILER_SIZE;
- ptr = malloc (tot_size);
- if (!ptr) {
- gf_log_nomem ("", GF_LOG_ALERT, tot_size);
- return NULL;
- }
- gf_mem_set_acct_info (xl, &ptr, size, type);
+ ptr = malloc(tot_size);
+ if (!ptr) {
+ gf_msg_nomem("", GF_LOG_ALERT, tot_size);
+ return NULL;
+ }
- return (void *)ptr;
+ return gf_mem_set_acct_info(xl->mem_acct, ptr, size, type, typestr);
}
void *
-__gf_realloc (void *ptr, size_t size)
+__gf_realloc(void *ptr, size_t size)
{
- size_t tot_size = 0;
- char *orig_ptr = NULL;
- xlator_t *xl = NULL;
- uint32_t type = 0;
+ size_t tot_size = 0;
+ struct mem_header *header = NULL;
+
+ if (!gf_mem_acct_enabled())
+ return REALLOC(ptr, size);
- if (!gf_mem_acct_enable)
- return REALLOC (ptr, size);
+ REQUIRE(NULL != ptr);
- tot_size = size + GF_MEM_HEADER_SIZE + GF_MEM_TRAILER_SIZE;
+ header = (struct mem_header *)(ptr - GF_MEM_HEADER_SIZE);
+ GF_ASSERT(header->magic == GF_MEM_HEADER_MAGIC);
- orig_ptr = (char *)ptr - 8 - 4;
+ tot_size = size + GF_MEM_HEADER_SIZE + GF_MEM_TRAILER_SIZE;
+ header = realloc(header, tot_size);
+ if (!header) {
+ gf_msg_nomem("", GF_LOG_ALERT, tot_size);
+ return NULL;
+ }
- GF_ASSERT (*(uint32_t *)orig_ptr == GF_MEM_HEADER_MAGIC);
+ return gf_mem_update_acct_info(header->mem_acct, header, size);
+}
- orig_ptr = orig_ptr - sizeof(xlator_t *);
- xl = *((xlator_t **)orig_ptr);
+int
+gf_vasprintf(char **string_ptr, const char *format, va_list arg)
+{
+ va_list arg_save;
+ char *str = NULL;
+ int size = 0;
+ int rv = 0;
+
+ if (!string_ptr || !format)
+ return -1;
+
+ va_copy(arg_save, arg);
+
+ size = vsnprintf(NULL, 0, format, arg);
+ size++;
+ str = GF_MALLOC(size, gf_common_mt_asprintf);
+ if (str == NULL) {
+ /* log is done in GF_MALLOC itself */
+ va_end(arg_save);
+ return -1;
+ }
+ rv = vsnprintf(str, size, format, arg_save);
+
+ *string_ptr = str;
+ va_end(arg_save);
+ return (rv);
+}
- orig_ptr = (char *)ptr - GF_MEM_HEADER_SIZE;
- type = *(uint32_t *)orig_ptr;
+int
+gf_asprintf(char **string_ptr, const char *format, ...)
+{
+ va_list arg;
+ int rv = 0;
- ptr = realloc (orig_ptr, tot_size);
- if (!ptr) {
- gf_log_nomem ("", GF_LOG_ALERT, tot_size);
- return NULL;
- }
+ va_start(arg, format);
+ rv = gf_vasprintf(string_ptr, format, arg);
+ va_end(arg);
- gf_mem_set_acct_info (xl, (char **)&ptr, size, type);
+ return rv;
+}
- return (void *)ptr;
+#ifdef DEBUG
+void
+__gf_mem_invalidate(void *ptr)
+{
+ struct mem_header *header = ptr;
+ void *end = NULL;
+
+ struct mem_invalid inval = {
+ .magic = GF_MEM_INVALID_MAGIC,
+ .mem_acct = header->mem_acct,
+ .type = header->type,
+ .size = header->size,
+ .baseaddr = ptr + GF_MEM_HEADER_SIZE,
+ };
+
+ /* calculate the last byte of the allocated area */
+ end = ptr + GF_MEM_HEADER_SIZE + inval.size + GF_MEM_TRAILER_SIZE;
+
+ /* overwrite the old mem_header */
+ memcpy(ptr, &inval, sizeof(inval));
+ ptr += sizeof(inval);
+
+ /* zero out remaining (old) mem_header bytes) */
+ memset(ptr, 0x00, sizeof(*header) - sizeof(inval));
+ ptr += sizeof(*header) - sizeof(inval);
+
+ /* zero out the first byte of data */
+ *(uint32_t *)(ptr) = 0x00;
+ ptr += 1;
+
+ /* repeated writes of invalid structurein data area */
+ while ((ptr + (sizeof(inval))) < (end - 1)) {
+ memcpy(ptr, &inval, sizeof(inval));
+ ptr += sizeof(inval);
+ }
+
+ /* fill out remaining data area with 0xff */
+ memset(ptr, 0xff, end - ptr);
+}
+#endif /* DEBUG */
+
+/* Coverity taint NOTE: pointers passed to free, would operate on
+pointer-GF_MEM_HEADER_SIZE content and if the pointer was used for any IO
+related purpose, the pointer stands tainted, and hence coverity would consider
+access to the said region as tainted. The following directive to coverity hence
+sanitizes the pointer, thus removing any taint to the same within this function.
+If the pointer is accessed outside the scope of this function without any
+checks on content read from an IO operation, taints will still be reported, and
+needs appropriate addressing. */
+
+/* coverity[ +tainted_data_sanitize : arg-0 ] */
+static void
+gf_free_sanitize(void *s)
+{
}
-int
-gf_vasprintf (char **string_ptr, const char *format, va_list arg)
+void
+__gf_free(void *free_ptr)
{
- va_list arg_save;
- char *str = NULL;
- int size = 0;
- int rv = 0;
-
- if (!string_ptr || !format)
- return -1;
-
- va_copy (arg_save, arg);
-
- size = vsnprintf (NULL, 0, format, arg);
- size++;
- str = GF_MALLOC (size, gf_common_mt_asprintf);
- if (str == NULL) {
- /* log is done in GF_MALLOC itself */
- return -1;
+ void *ptr = NULL;
+ struct mem_acct *mem_acct;
+ struct mem_header *header = NULL;
+ bool last_ref = false;
+
+ if (!gf_mem_acct_enabled()) {
+ FREE(free_ptr);
+ return;
+ }
+
+ if (!free_ptr)
+ return;
+
+ gf_free_sanitize(free_ptr);
+ ptr = free_ptr - GF_MEM_HEADER_SIZE;
+ header = (struct mem_header *)ptr;
+
+ // Possible corruption, assert here
+ GF_ASSERT(GF_MEM_HEADER_MAGIC == header->magic);
+
+ mem_acct = header->mem_acct;
+ if (!mem_acct) {
+ goto free;
+ }
+
+ // This points to a memory overrun
+ GF_ASSERT(GF_MEM_TRAILER_MAGIC ==
+ *(uint32_t *)((char *)free_ptr + header->size));
+
+ LOCK(&mem_acct->rec[header->type].lock);
+ {
+ mem_acct->rec[header->type].size -= header->size;
+ mem_acct->rec[header->type].num_allocs--;
+ /* If all the instances are freed up then ensure typestr is set
+ * to NULL */
+ if (!mem_acct->rec[header->type].num_allocs) {
+ last_ref = true;
+ mem_acct->rec[header->type].typestr = NULL;
}
- rv = vsnprintf (str, size, format, arg_save);
+#ifdef DEBUG
+ list_del(&header->acct_list);
+#endif
+ }
+ UNLOCK(&mem_acct->rec[header->type].lock);
- *string_ptr = str;
- return (rv);
+ if (last_ref) {
+ xlator_mem_acct_unref(mem_acct);
+ }
+
+free:
+#ifdef DEBUG
+ __gf_mem_invalidate(ptr);
+#endif
+
+ FREE(ptr);
}
-int
-gf_asprintf (char **string_ptr, const char *format, ...)
+#if defined(GF_DISABLE_MEMPOOL)
+
+struct mem_pool *
+mem_pool_new_fn(glusterfs_ctx_t *ctx, unsigned long sizeof_type,
+ unsigned long count, char *name)
{
- va_list arg;
- int rv = 0;
+ struct mem_pool *new;
- va_start (arg, format);
- rv = gf_vasprintf (string_ptr, format, arg);
- va_end (arg);
+ new = GF_MALLOC(sizeof(struct mem_pool), gf_common_mt_mem_pool);
+ if (!new)
+ return NULL;
- return rv;
+ new->sizeof_type = sizeof_type;
+ return new;
}
void
-__gf_free (void *free_ptr)
+mem_pool_destroy(struct mem_pool *pool)
{
- size_t req_size = 0;
- char *ptr = NULL;
- uint32_t type = 0;
- xlator_t *xl = NULL;
-
- if (!gf_mem_acct_enable) {
- FREE (free_ptr);
- return;
- }
+ GF_FREE(pool);
+}
- if (!free_ptr)
- return;
+#else /* !GF_DISABLE_MEMPOOL */
- ptr = (char *)free_ptr - 8 - 4;
+static pthread_mutex_t pool_lock = PTHREAD_MUTEX_INITIALIZER;
+static struct list_head pool_threads;
+static pthread_mutex_t pool_free_lock = PTHREAD_MUTEX_INITIALIZER;
+static struct list_head pool_free_threads;
+static struct mem_pool_shared pools[NPOOLS];
+static size_t pool_list_size;
- if (GF_MEM_HEADER_MAGIC != *(uint32_t *)ptr) {
- //Possible corruption, assert here
- GF_ASSERT (0);
- }
+static __thread per_thread_pool_list_t *thread_pool_list = NULL;
- *(uint32_t *)ptr = 0;
+#define N_COLD_LISTS 1024
+#define POOL_SWEEP_SECS 30
- ptr = ptr - sizeof(xlator_t *);
- memcpy (&xl, ptr, sizeof(xlator_t *));
+typedef struct {
+ pooled_obj_hdr_t *cold_lists[N_COLD_LISTS];
+ unsigned int n_cold_lists;
+} sweep_state_t;
- if (!xl) {
- //gf_free expects xl to be available
- GF_ASSERT (0);
- }
+enum init_state {
+ GF_MEMPOOL_INIT_NONE = 0,
+ GF_MEMPOOL_INIT_EARLY,
+ GF_MEMPOOL_INIT_LATE,
+ GF_MEMPOOL_INIT_DESTROY
+};
- if (!xl->mem_acct.rec) {
- ptr = (char *)free_ptr - GF_MEM_HEADER_SIZE;
- goto free;
+static enum init_state init_done = GF_MEMPOOL_INIT_NONE;
+static pthread_mutex_t init_mutex = PTHREAD_MUTEX_INITIALIZER;
+static unsigned int init_count = 0;
+static pthread_t sweeper_tid;
+
+static bool
+collect_garbage(sweep_state_t *state, per_thread_pool_list_t *pool_list)
+{
+ unsigned int i;
+ per_thread_pool_t *pt_pool;
+
+ (void)pthread_spin_lock(&pool_list->lock);
+
+ for (i = 0; i < NPOOLS; ++i) {
+ pt_pool = &pool_list->pools[i];
+ if (pt_pool->cold_list) {
+ if (state->n_cold_lists >= N_COLD_LISTS) {
+ (void)pthread_spin_unlock(&pool_list->lock);
+ return true;
+ }
+ state->cold_lists[state->n_cold_lists++] = pt_pool->cold_list;
}
+ pt_pool->cold_list = pt_pool->hot_list;
+ pt_pool->hot_list = NULL;
+ }
+ (void)pthread_spin_unlock(&pool_list->lock);
- ptr = ptr - sizeof(size_t);
- memcpy (&req_size, ptr, sizeof (size_t));
- ptr = ptr - 4;
- type = *(uint32_t *)ptr;
+ return false;
+}
- if (GF_MEM_TRAILER_MAGIC != *(uint32_t *)
- ((char *)free_ptr + req_size)) {
- // This points to a memory overrun
- GF_ASSERT (0);
- }
- *(uint32_t *) ((char *)free_ptr + req_size) = 0;
+static void
+free_obj_list(pooled_obj_hdr_t *victim)
+{
+ pooled_obj_hdr_t *next;
- LOCK (&xl->mem_acct.rec[type].lock);
+ while (victim) {
+ next = victim->next;
+ free(victim);
+ victim = next;
+ }
+}
+
+static void *
+pool_sweeper(void *arg)
+{
+ sweep_state_t state;
+ per_thread_pool_list_t *pool_list;
+ uint32_t i;
+ bool pending;
+
+ /*
+ * This is all a bit inelegant, but the point is to avoid doing
+ * expensive things (like freeing thousands of objects) while holding a
+ * global lock. Thus, we split each iteration into two passes, with
+ * only the first and fastest holding the lock.
+ */
+
+ pending = true;
+
+ for (;;) {
+ /* If we know there's pending work to do (or it's the first run), we
+ * do collect garbage more often. */
+ sleep(pending ? POOL_SWEEP_SECS / 5 : POOL_SWEEP_SECS);
+
+ (void)pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
+ state.n_cold_lists = 0;
+ pending = false;
+
+ /* First pass: collect stuff that needs our attention. */
+ (void)pthread_mutex_lock(&pool_lock);
+ list_for_each_entry(pool_list, &pool_threads, thr_list)
{
- xl->mem_acct.rec[type].size -= req_size;
- xl->mem_acct.rec[type].num_allocs--;
+ if (collect_garbage(&state, pool_list)) {
+ pending = true;
+ }
}
- UNLOCK (&xl->mem_acct.rec[type].lock);
-free:
- FREE (ptr);
-}
+ (void)pthread_mutex_unlock(&pool_lock);
+ /* Second pass: free cold objects from live pools. */
+ for (i = 0; i < state.n_cold_lists; ++i) {
+ free_obj_list(state.cold_lists[i]);
+ }
+ (void)pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
+ }
+ return NULL;
+}
-struct mem_pool *
-mem_pool_new_fn (unsigned long sizeof_type,
- unsigned long count, char *name)
+void
+mem_pool_thread_destructor(per_thread_pool_list_t *pool_list)
{
- struct mem_pool *mem_pool = NULL;
- unsigned long padded_sizeof_type = 0;
- void *pool = NULL;
- int i = 0;
- int ret = 0;
- struct list_head *list = NULL;
- glusterfs_ctx_t *ctx = NULL;
-
- if (!sizeof_type || !count) {
- gf_log_callingfn ("mem-pool", GF_LOG_ERROR, "invalid argument");
- return NULL;
+ per_thread_pool_t *pt_pool;
+ uint32_t i;
+
+ if (pool_list == NULL) {
+ pool_list = thread_pool_list;
+ }
+
+ /* The current thread is terminating. None of the allocated objects will
+ * be used again. We can directly destroy them here instead of delaying
+ * it until the next sweeper loop. */
+ if (pool_list != NULL) {
+ /* Remove pool_list from the global list to avoid that sweeper
+ * could touch it. */
+ pthread_mutex_lock(&pool_lock);
+ list_del(&pool_list->thr_list);
+ pthread_mutex_unlock(&pool_lock);
+
+ /* We need to protect hot/cold changes from potential mem_put() calls
+ * that reference this pool_list. Once poison is set to true, we are
+ * sure that no one else will touch hot/cold lists. The only possible
+ * race is when at the same moment a mem_put() is adding a new item
+ * to the hot list. We protect from that by taking pool_list->lock.
+ * After that we don't need the lock to destroy the hot/cold lists. */
+ pthread_spin_lock(&pool_list->lock);
+ pool_list->poison = true;
+ pthread_spin_unlock(&pool_list->lock);
+
+ for (i = 0; i < NPOOLS; i++) {
+ pt_pool = &pool_list->pools[i];
+
+ free_obj_list(pt_pool->hot_list);
+ pt_pool->hot_list = NULL;
+
+ free_obj_list(pt_pool->cold_list);
+ pt_pool->cold_list = NULL;
}
- padded_sizeof_type = sizeof_type + GF_MEM_POOL_PAD_BOUNDARY;
- mem_pool = GF_CALLOC (sizeof (*mem_pool), 1, gf_common_mt_mem_pool);
- if (!mem_pool)
- return NULL;
+ pthread_mutex_lock(&pool_free_lock);
+ list_add(&pool_list->thr_list, &pool_free_threads);
+ pthread_mutex_unlock(&pool_free_lock);
- ret = gf_asprintf (&mem_pool->name, "%s:%s", THIS->name, name);
- if (ret < 0)
- return NULL;
+ thread_pool_list = NULL;
+ }
+}
- if (!mem_pool->name) {
- GF_FREE (mem_pool);
- return NULL;
- }
+static __attribute__((constructor)) void
+mem_pools_preinit(void)
+{
+ unsigned int i;
- LOCK_INIT (&mem_pool->lock);
- INIT_LIST_HEAD (&mem_pool->list);
- INIT_LIST_HEAD (&mem_pool->global_list);
+ INIT_LIST_HEAD(&pool_threads);
+ INIT_LIST_HEAD(&pool_free_threads);
- mem_pool->padded_sizeof_type = padded_sizeof_type;
- mem_pool->cold_count = count;
- mem_pool->real_sizeof_type = sizeof_type;
+ for (i = 0; i < NPOOLS; ++i) {
+ pools[i].power_of_two = POOL_SMALLEST + i;
- pool = GF_CALLOC (count, padded_sizeof_type, gf_common_mt_long);
- if (!pool) {
- GF_FREE (mem_pool->name);
- GF_FREE (mem_pool);
- return NULL;
- }
+ GF_ATOMIC_INIT(pools[i].allocs_hot, 0);
+ GF_ATOMIC_INIT(pools[i].allocs_cold, 0);
+ GF_ATOMIC_INIT(pools[i].allocs_stdc, 0);
+ GF_ATOMIC_INIT(pools[i].frees_to_list, 0);
+ }
- for (i = 0; i < count; i++) {
- list = pool + (i * (padded_sizeof_type));
- INIT_LIST_HEAD (list);
- list_add_tail (list, &mem_pool->list);
- }
+ pool_list_size = sizeof(per_thread_pool_list_t) +
+ sizeof(per_thread_pool_t) * (NPOOLS - 1);
- mem_pool->pool = pool;
- mem_pool->pool_end = pool + (count * (padded_sizeof_type));
+ init_done = GF_MEMPOOL_INIT_EARLY;
+}
- /* add this pool to the global list */
- ctx = glusterfs_ctx_get ();
- if (!ctx)
- goto out;
+static __attribute__((destructor)) void
+mem_pools_postfini(void)
+{
+ /* TODO: This function should destroy all per thread memory pools that
+ * are still alive, but this is not possible right now because glibc
+ * starts calling destructors as soon as exit() is called, and
+ * gluster doesn't ensure that all threads have been stopped before
+ * calling exit(). Existing threads would crash when they try to use
+ * memory or they terminate if we destroy things here.
+ *
+ * When we propertly terminate all threads, we can add the needed
+ * code here. Till then we need to leave the memory allocated. Most
+ * probably this function will be executed on process termination,
+ * so the memory will be released anyway by the system. */
+}
- list_add (&mem_pool->global_list, &ctx->mempool_list);
+/* Call mem_pools_init() once threading has been configured completely. This
+ * prevent the pool_sweeper thread from getting killed once the main() thread
+ * exits during deamonizing. */
+void
+mem_pools_init(void)
+{
+ pthread_mutex_lock(&init_mutex);
+ if ((init_count++) == 0) {
+ (void)gf_thread_create(&sweeper_tid, NULL, pool_sweeper, NULL,
+ "memsweep");
+
+ init_done = GF_MEMPOOL_INIT_LATE;
+ }
+ pthread_mutex_unlock(&init_mutex);
+}
-out:
- return mem_pool;
+void
+mem_pools_fini(void)
+{
+ pthread_mutex_lock(&init_mutex);
+ switch (init_count) {
+ case 0:
+ /*
+ * If init_count is already zero (as e.g. if somebody called this
+ * before mem_pools_init) then the sweeper was probably never even
+ * started so we don't need to stop it. Even if there's some crazy
+ * circumstance where there is a sweeper but init_count is still
+ * zero, that just means we'll leave it running. Not perfect, but
+ * far better than any known alternative.
+ */
+ break;
+ case 1: {
+ /* if mem_pools_init() was not called, sweeper_tid will be invalid
+ * and the functions will error out. That is not critical. In all
+ * other cases, the sweeper_tid will be valid and the thread gets
+ * stopped. */
+ (void)pthread_cancel(sweeper_tid);
+ (void)pthread_join(sweeper_tid, NULL);
+
+ /* There could be threads still running in some cases, so we can't
+ * destroy pool_lists in use. We can also not destroy unused
+ * pool_lists because some allocated objects may still be pointing
+ * to them. */
+ mem_pool_thread_destructor(NULL);
+
+ init_done = GF_MEMPOOL_INIT_DESTROY;
+ /* Fall through. */
+ }
+ default:
+ --init_count;
+ }
+ pthread_mutex_unlock(&init_mutex);
}
-void*
-mem_get0 (struct mem_pool *mem_pool)
+void
+mem_pool_destroy(struct mem_pool *pool)
{
- void *ptr = NULL;
+ if (!pool)
+ return;
- if (!mem_pool) {
- gf_log_callingfn ("mem-pool", GF_LOG_ERROR, "invalid argument");
- return NULL;
- }
+ /* remove this pool from the owner (glusterfs_ctx_t) */
+ LOCK(&pool->ctx->lock);
+ {
+ list_del(&pool->owner);
+ }
+ UNLOCK(&pool->ctx->lock);
+
+ /* free this pool, but keep the mem_pool_shared */
+ GF_FREE(pool);
+
+ /*
+ * Pools are now permanent, so the mem_pool->pool is kept around. All
+ * of the objects *in* the pool will eventually be freed via the
+ * pool-sweeper thread, and this way we don't have to add a lot of
+ * reference-counting complexity.
+ */
+}
- ptr = mem_get(mem_pool);
+struct mem_pool *
+mem_pool_new_fn(glusterfs_ctx_t *ctx, unsigned long sizeof_type,
+ unsigned long count, char *name)
+{
+ unsigned long extra_size, size;
+ unsigned int power;
+ struct mem_pool *new = NULL;
+ struct mem_pool_shared *pool = NULL;
+
+ if (!sizeof_type) {
+ gf_msg_callingfn("mem-pool", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "invalid argument");
+ return NULL;
+ }
+
+ /* This is the overhead we'll have because of memory accounting for each
+ * memory block. */
+ extra_size = sizeof(pooled_obj_hdr_t);
+
+ /* We need to compute the total space needed to hold the data type and
+ * the header. Given that the smallest block size we have in the pools
+ * is 2^POOL_SMALLEST, we need to take the MAX(size, 2^POOL_SMALLEST).
+ * However, since this value is only needed to compute its rounded
+ * logarithm in base 2, and this only depends on the highest bit set,
+ * we can simply do a bitwise or with the minimum size. We need to
+ * subtract 1 for correct handling of sizes that are exactly a power
+ * of 2. */
+ size = (sizeof_type + extra_size - 1UL) | ((1UL << POOL_SMALLEST) - 1UL);
+
+ /* We compute the logarithm in base 2 rounded up of the resulting size.
+ * This value will identify which pool we need to use from the pools of
+ * powers of 2. This is equivalent to finding the position of the highest
+ * bit set. */
+ power = sizeof(size) * 8 - __builtin_clzl(size);
+ if (power > POOL_LARGEST) {
+ gf_msg_callingfn("mem-pool", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "invalid argument");
+ return NULL;
+ }
+ pool = &pools[power - POOL_SMALLEST];
+
+ new = GF_MALLOC(sizeof(struct mem_pool), gf_common_mt_mem_pool);
+ if (!new)
+ return NULL;
+
+ new->ctx = ctx;
+ new->sizeof_type = sizeof_type;
+ new->count = count;
+ new->name = name;
+ new->xl_name = THIS->name;
+ new->pool = pool;
+ GF_ATOMIC_INIT(new->active, 0);
+#ifdef DEBUG
+ GF_ATOMIC_INIT(new->hit, 0);
+ GF_ATOMIC_INIT(new->miss, 0);
+#endif
+ INIT_LIST_HEAD(&new->owner);
- if (ptr)
- memset(ptr, 0, mem_pool->real_sizeof_type);
+ LOCK(&ctx->lock);
+ {
+ list_add(&new->owner, &ctx->mempool_list);
+ }
+ UNLOCK(&ctx->lock);
- return ptr;
+ return new;
}
-void *
-mem_get (struct mem_pool *mem_pool)
+per_thread_pool_list_t *
+mem_get_pool_list(void)
{
- struct list_head *list = NULL;
- void *ptr = NULL;
- int *in_use = NULL;
- struct mem_pool **pool_ptr = NULL;
-
- if (!mem_pool) {
- gf_log_callingfn ("mem-pool", GF_LOG_ERROR, "invalid argument");
- return NULL;
+ per_thread_pool_list_t *pool_list;
+ unsigned int i;
+
+ pool_list = thread_pool_list;
+ if (pool_list) {
+ return pool_list;
+ }
+
+ (void)pthread_mutex_lock(&pool_free_lock);
+ if (!list_empty(&pool_free_threads)) {
+ pool_list = list_entry(pool_free_threads.next, per_thread_pool_list_t,
+ thr_list);
+ list_del(&pool_list->thr_list);
+ }
+ (void)pthread_mutex_unlock(&pool_free_lock);
+
+ if (!pool_list) {
+ pool_list = MALLOC(pool_list_size);
+ if (!pool_list) {
+ return NULL;
}
- LOCK (&mem_pool->lock);
- {
- mem_pool->alloc_count++;
- if (mem_pool->cold_count) {
- list = mem_pool->list.next;
- list_del (list);
-
- mem_pool->hot_count++;
- mem_pool->cold_count--;
-
- if (mem_pool->max_alloc < mem_pool->hot_count)
- mem_pool->max_alloc = mem_pool->hot_count;
-
- ptr = list;
- in_use = (ptr + GF_MEM_POOL_LIST_BOUNDARY +
- GF_MEM_POOL_PTR);
- *in_use = 1;
-
- goto fwd_addr_out;
- }
-
- /* This is a problem area. If we've run out of
- * chunks in our slab above, we need to allocate
- * enough memory to service this request.
- * The problem is, these individual chunks will fail
- * the first address range check in __is_member. Now, since
- * we're not allocating a full second slab, we wont have
- * enough info perform the range check in __is_member.
- *
- * I am working around this by performing a regular allocation
- * , just the way the caller would've done when not using the
- * mem-pool. That also means, we're not padding the size with
- * the list_head structure because, this will not be added to
- * the list of chunks that belong to the mem-pool allocated
- * initially.
- *
- * This is the best we can do without adding functionality for
- * managing multiple slabs. That does not interest us at present
- * because it is too much work knowing that a better slab
- * allocator is coming RSN.
- */
- mem_pool->pool_misses++;
- mem_pool->curr_stdalloc++;
- if (mem_pool->max_stdalloc < mem_pool->curr_stdalloc)
- mem_pool->max_stdalloc = mem_pool->curr_stdalloc;
- ptr = GF_CALLOC (1, mem_pool->padded_sizeof_type,
- gf_common_mt_mem_pool);
- gf_log_callingfn ("mem-pool", GF_LOG_DEBUG, "Mem pool is full. "
- "Callocing mem");
-
- /* Memory coming from the heap need not be transformed from a
- * chunkhead to a usable pointer since it is not coming from
- * the pool.
- */
+ INIT_LIST_HEAD(&pool_list->thr_list);
+ (void)pthread_spin_init(&pool_list->lock, PTHREAD_PROCESS_PRIVATE);
+ for (i = 0; i < NPOOLS; ++i) {
+ pool_list->pools[i].parent = &pools[i];
+ pool_list->pools[i].hot_list = NULL;
+ pool_list->pools[i].cold_list = NULL;
}
-fwd_addr_out:
- pool_ptr = mem_pool_from_ptr (ptr);
- *pool_ptr = (struct mem_pool *)mem_pool;
- ptr = mem_pool_chunkhead2ptr (ptr);
- UNLOCK (&mem_pool->lock);
+ }
- return ptr;
-}
+ /* There's no need to take pool_list->lock, because this is already an
+ * atomic operation and we don't need to synchronize it with any change
+ * in hot/cold lists. */
+ pool_list->poison = false;
+ (void)pthread_mutex_lock(&pool_lock);
+ list_add(&pool_list->thr_list, &pool_threads);
+ (void)pthread_mutex_unlock(&pool_lock);
-static int
-__is_member (struct mem_pool *pool, void *ptr)
-{
- if (!pool || !ptr) {
- gf_log_callingfn ("mem-pool", GF_LOG_ERROR, "invalid argument");
- return -1;
- }
+ thread_pool_list = pool_list;
- if (ptr < pool->pool || ptr >= pool->pool_end)
- return 0;
+ /* Ensure that all memory objects associated to the new pool_list are
+ * destroyed when the thread terminates. */
+ gf_thread_needs_cleanup();
- if ((mem_pool_ptr2chunkhead (ptr) - pool->pool)
- % pool->padded_sizeof_type)
- return -1;
-
- return 1;
+ return pool_list;
}
-
-void
-mem_put (void *ptr)
+static pooled_obj_hdr_t *
+mem_get_from_pool(struct mem_pool *mem_pool)
{
- struct list_head *list = NULL;
- int *in_use = NULL;
- void *head = NULL;
- struct mem_pool **tmp = NULL;
- struct mem_pool *pool = NULL;
-
- if (!ptr) {
- gf_log_callingfn ("mem-pool", GF_LOG_ERROR, "invalid argument");
- return;
- }
+ per_thread_pool_list_t *pool_list;
+ per_thread_pool_t *pt_pool;
+ pooled_obj_hdr_t *retval;
+#ifdef DEBUG
+ gf_boolean_t hit = _gf_true;
+#endif
- list = head = mem_pool_ptr2chunkhead (ptr);
- tmp = mem_pool_from_ptr (head);
- if (!tmp) {
- gf_log_callingfn ("mem-pool", GF_LOG_ERROR,
- "ptr header is corrupted");
- return;
+ pool_list = mem_get_pool_list();
+ if (!pool_list || pool_list->poison) {
+ return NULL;
+ }
+
+ pt_pool = &pool_list->pools[mem_pool->pool->power_of_two - POOL_SMALLEST];
+
+ (void)pthread_spin_lock(&pool_list->lock);
+
+ retval = pt_pool->hot_list;
+ if (retval) {
+ pt_pool->hot_list = retval->next;
+ (void)pthread_spin_unlock(&pool_list->lock);
+ GF_ATOMIC_INC(pt_pool->parent->allocs_hot);
+ } else {
+ retval = pt_pool->cold_list;
+ if (retval) {
+ pt_pool->cold_list = retval->next;
+ (void)pthread_spin_unlock(&pool_list->lock);
+ GF_ATOMIC_INC(pt_pool->parent->allocs_cold);
+ } else {
+ (void)pthread_spin_unlock(&pool_list->lock);
+ GF_ATOMIC_INC(pt_pool->parent->allocs_stdc);
+ retval = malloc(1 << pt_pool->parent->power_of_two);
+#ifdef DEBUG
+ hit = _gf_false;
+#endif
}
+ }
- pool = *tmp;
- if (!pool) {
- gf_log_callingfn ("mem-pool", GF_LOG_ERROR,
- "mem-pool ptr is NULL");
- return;
- }
- LOCK (&pool->lock);
- {
+ if (retval != NULL) {
+ retval->pool = mem_pool;
+ retval->power_of_two = mem_pool->pool->power_of_two;
+#ifdef DEBUG
+ if (hit == _gf_true)
+ GF_ATOMIC_INC(mem_pool->hit);
+ else
+ GF_ATOMIC_INC(mem_pool->miss);
+#endif
+ retval->magic = GF_MEM_HEADER_MAGIC;
+ retval->pool_list = pool_list;
+ }
- switch (__is_member (pool, ptr))
- {
- case 1:
- in_use = (head + GF_MEM_POOL_LIST_BOUNDARY +
- GF_MEM_POOL_PTR);
- if (!is_mem_chunk_in_use(in_use)) {
- gf_log_callingfn ("mem-pool", GF_LOG_CRITICAL,
- "mem_put called on freed ptr %p of mem "
- "pool %p", ptr, pool);
- break;
- }
- pool->hot_count--;
- pool->cold_count++;
- *in_use = 0;
- list_add (list, &pool->list);
- break;
- case -1:
- /* For some reason, the address given is within
- * the address range of the mem-pool but does not align
- * with the expected start of a chunk that includes
- * the list headers also. Sounds like a problem in
- * layers of clouds up above us. ;)
- */
- abort ();
- break;
- case 0:
- /* The address is outside the range of the mem-pool. We
- * assume here that this address was allocated at a
- * point when the mem-pool was out of chunks in mem_get
- * or the programmer has made a mistake by calling the
- * wrong de-allocation interface. We do
- * not have enough info to distinguish between the two
- * situations.
- */
- pool->curr_stdalloc--;
- GF_FREE (list);
- break;
- default:
- /* log error */
- break;
- }
- }
- UNLOCK (&pool->lock);
+ return retval;
}
-void
-mem_pool_destroy (struct mem_pool *pool)
+#endif /* GF_DISABLE_MEMPOOL */
+
+void *
+mem_get0(struct mem_pool *mem_pool)
{
- if (!pool)
- return;
+ void *ptr = mem_get(mem_pool);
+ if (ptr) {
+#if defined(GF_DISABLE_MEMPOOL)
+ memset(ptr, 0, mem_pool->sizeof_type);
+#else
+ memset(ptr, 0, AVAILABLE_SIZE(mem_pool->pool->power_of_two));
+#endif
+ }
+
+ return ptr;
+}
- gf_log (THIS->name, GF_LOG_INFO, "size=%lu max=%d total=%"PRIu64,
- pool->padded_sizeof_type, pool->max_alloc, pool->alloc_count);
+void *
+mem_get(struct mem_pool *mem_pool)
+{
+ if (!mem_pool) {
+ gf_msg_callingfn("mem-pool", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "invalid argument");
+ return NULL;
+ }
+
+#if defined(GF_DISABLE_MEMPOOL)
+ return GF_MALLOC(mem_pool->sizeof_type, gf_common_mt_mem_pool);
+#else
+ pooled_obj_hdr_t *retval = mem_get_from_pool(mem_pool);
+ if (!retval) {
+ return NULL;
+ }
+
+ GF_ATOMIC_INC(mem_pool->active);
+
+ return retval + 1;
+#endif /* GF_DISABLE_MEMPOOL */
+}
- list_del (&pool->global_list);
+void
+mem_put(void *ptr)
+{
+#if defined(GF_DISABLE_MEMPOOL)
+ GF_FREE(ptr);
+#else
+ pooled_obj_hdr_t *hdr;
+ per_thread_pool_list_t *pool_list;
+ per_thread_pool_t *pt_pool;
+
+ if (!ptr) {
+ gf_msg_callingfn("mem-pool", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "invalid argument");
+ return;
+ }
- LOCK_DESTROY (&pool->lock);
- GF_FREE (pool->name);
- GF_FREE (pool->pool);
- GF_FREE (pool);
+ hdr = ((pooled_obj_hdr_t *)ptr) - 1;
+ if (hdr->magic != GF_MEM_HEADER_MAGIC) {
+ /* Not one of ours; don't touch it. */
+ return;
+ }
+ if (!hdr->pool_list) {
+ gf_msg_callingfn("mem-pool", GF_LOG_CRITICAL, EINVAL,
+ LG_MSG_INVALID_ARG,
+ "invalid argument hdr->pool_list NULL");
return;
+ }
+
+ pool_list = hdr->pool_list;
+ pt_pool = &pool_list->pools[hdr->power_of_two - POOL_SMALLEST];
+
+ if (hdr->pool)
+ GF_ATOMIC_DEC(hdr->pool->active);
+
+ hdr->magic = GF_MEM_INVALID_MAGIC;
+
+ (void)pthread_spin_lock(&pool_list->lock);
+ if (!pool_list->poison) {
+ hdr->next = pt_pool->hot_list;
+ pt_pool->hot_list = hdr;
+ (void)pthread_spin_unlock(&pool_list->lock);
+ GF_ATOMIC_INC(pt_pool->parent->frees_to_list);
+ } else {
+ /* If the owner thread of this element has terminated, we simply
+ * release its memory. */
+ (void)pthread_spin_unlock(&pool_list->lock);
+ free(hdr);
+ }
+#endif /* GF_DISABLE_MEMPOOL */
}
diff --git a/libglusterfs/src/mem-pool.h b/libglusterfs/src/mem-pool.h
deleted file mode 100644
index b3a25b25e7e..00000000000
--- a/libglusterfs/src/mem-pool.h
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _MEM_POOL_H_
-#define _MEM_POOL_H_
-
-#include "list.h"
-#include "locking.h"
-#include "logging.h"
-#include "mem-types.h"
-#include <stdlib.h>
-#include <inttypes.h>
-#include <string.h>
-#include <stdarg.h>
-
-
-struct mem_acct {
- uint32_t num_types;
- struct mem_acct_rec *rec;
-};
-
-struct mem_acct_rec {
- size_t size;
- size_t max_size;
- uint32_t num_allocs;
- uint32_t total_allocs;
- uint32_t max_num_allocs;
- gf_lock_t lock;
-};
-
-
-void *
-__gf_calloc (size_t cnt, size_t size, uint32_t type);
-
-void *
-__gf_malloc (size_t size, uint32_t type);
-
-void *
-__gf_realloc (void *ptr, size_t size);
-
-int
-gf_vasprintf (char **string_ptr, const char *format, va_list arg);
-
-int
-gf_asprintf (char **string_ptr, const char *format, ...);
-
-void
-__gf_free (void *ptr);
-
-
-static inline
-void* __gf_default_malloc (size_t size)
-{
- void *ptr = NULL;
-
- ptr = malloc (size);
- if (!ptr)
- gf_log_nomem ("", GF_LOG_ALERT, size);
-
- return ptr;
-}
-
-static inline
-void* __gf_default_calloc (int cnt, size_t size)
-{
- void *ptr = NULL;
-
- ptr = calloc (cnt, size);
- if (!ptr)
- gf_log_nomem ("", GF_LOG_ALERT, (cnt * size));
-
- return ptr;
-}
-
-static inline
-void* __gf_default_realloc (void *oldptr, size_t size)
-{
- void *ptr = NULL;
-
- ptr = realloc (oldptr, size);
- if (!ptr)
- gf_log_nomem ("", GF_LOG_ALERT, size);
-
- return ptr;
-}
-
-#define MALLOC(size) __gf_default_malloc(size)
-#define CALLOC(cnt,size) __gf_default_calloc(cnt,size)
-#define REALLOC(ptr,size) __gf_default_realloc(ptr,size)
-
-#define FREE(ptr) \
- if (ptr != NULL) { \
- free ((void *)ptr); \
- ptr = (void *)0xeeeeeeee; \
- }
-
-#define GF_CALLOC(nmemb, size, type) __gf_calloc (nmemb, size, type)
-
-#define GF_MALLOC(size, type) __gf_malloc (size, type)
-
-#define GF_REALLOC(ptr, size) __gf_realloc (ptr, size)
-
-#define GF_FREE(free_ptr) __gf_free (free_ptr)
-
-static inline
-char * gf_strdup (const char *src)
-{
-
- char *dup_str = NULL;
- size_t len = 0;
-
- len = strlen (src) + 1;
-
- dup_str = GF_CALLOC(1, len, gf_common_mt_strdup);
-
- if (!dup_str)
- return NULL;
-
- memcpy (dup_str, src, len);
-
- return dup_str;
-}
-
-struct mem_pool {
- struct list_head list;
- int hot_count;
- int cold_count;
- gf_lock_t lock;
- unsigned long padded_sizeof_type;
- void *pool;
- void *pool_end;
- int real_sizeof_type;
- uint64_t alloc_count;
- uint64_t pool_misses;
- int max_alloc;
- int curr_stdalloc;
- int max_stdalloc;
- char *name;
- struct list_head global_list;
-};
-
-struct mem_pool *
-mem_pool_new_fn (unsigned long sizeof_type, unsigned long count, char *name);
-
-#define mem_pool_new(type,count) mem_pool_new_fn (sizeof(type), count, #type)
-
-void mem_put (void *ptr);
-void *mem_get (struct mem_pool *pool);
-void *mem_get0 (struct mem_pool *pool);
-
-void mem_pool_destroy (struct mem_pool *pool);
-
-int gf_mem_acct_is_enabled ();
-void gf_mem_acct_enable_set ();
-
-#endif /* _MEM_POOL_H */
diff --git a/libglusterfs/src/mem-types.h b/libglusterfs/src/mem-types.h
deleted file mode 100644
index 12379bf3181..00000000000
--- a/libglusterfs/src/mem-types.h
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef __MEM_TYPES_H__
-#define __MEM_TYPES_H__
-
-
-enum gf_common_mem_types_ {
- gf_common_mt_call_stub_t = 0,
- gf_common_mt_dnscache6 = 1,
- gf_common_mt_data_pair_t = 2,
- gf_common_mt_data_t = 3,
- gf_common_mt_dict_t = 4,
- gf_common_mt_event_pool = 5,
- gf_common_mt_reg = 6,
- gf_common_mt_pollfd = 7,
- gf_common_mt_epoll_event = 8,
- gf_common_mt_fdentry_t = 9,
- gf_common_mt_fdtable_t = 10,
- gf_common_mt_fd_t = 11,
- gf_common_mt_fd_ctx = 12,
- gf_common_mt_gf_dirent_t = 13,
- gf_common_mt_glusterfs_ctx_t = 14,
- gf_common_mt_dentry_t = 15,
- gf_common_mt_inode_t = 16,
- gf_common_mt_inode_ctx = 17,
- gf_common_mt_list_head = 18,
- gf_common_mt_inode_table_t = 19,
- gf_common_mt_xlator_t = 20,
- gf_common_mt_xlator_list_t = 21,
- gf_common_mt_log_msg = 22,
- gf_common_mt_client_log = 23,
- gf_common_mt_volume_opt_list_t = 24,
- gf_common_mt_gf_hdr_common_t = 25,
- gf_common_mt_call_frame_t = 26,
- gf_common_mt_call_stack_t = 27,
- gf_common_mt_gf_timer_t = 28,
- gf_common_mt_gf_timer_registry_t = 29,
- gf_common_mt_transport = 30,
- gf_common_mt_transport_msg = 31,
- gf_common_mt_auth_handle_t = 32,
- gf_common_mt_iobuf = 33,
- gf_common_mt_iobuf_arena = 34,
- gf_common_mt_iobref = 35,
- gf_common_mt_iobuf_pool = 36,
- gf_common_mt_iovec = 37,
- gf_common_mt_memdup = 38,
- gf_common_mt_asprintf = 39,
- gf_common_mt_strdup = 40,
- gf_common_mt_socket_private_t = 41,
- gf_common_mt_ioq = 42,
- gf_common_mt_transport_t = 43,
- gf_common_mt_socket_local_t = 44,
- gf_common_mt_char = 45,
- gf_common_mt_rbthash_table_t = 46,
- gf_common_mt_rbthash_bucket = 47,
- gf_common_mt_mem_pool = 48,
- gf_common_mt_long = 49,
- gf_common_mt_rpcsvc_auth_list = 50,
- gf_common_mt_rpcsvc_t = 51,
- gf_common_mt_rpcsvc_conn_t = 52,
- gf_common_mt_rpcsvc_program_t = 53,
- gf_common_mt_rpcsvc_listener_t = 54,
- gf_common_mt_rpcsvc_wrapper_t = 55,
- gf_common_mt_rpcsvc_stage_t = 56,
- gf_common_mt_rpcclnt_t = 57,
- gf_common_mt_rpcclnt_savedframe_t = 58,
- gf_common_mt_rpc_trans_t = 59,
- gf_common_mt_rpc_trans_pollin_t = 60,
- gf_common_mt_rpc_trans_handover_t = 61,
- gf_common_mt_rpc_trans_reqinfo_t = 62,
- gf_common_mt_rpc_trans_rsp_t = 63,
- gf_common_mt_glusterfs_graph_t = 64,
- gf_common_mt_rdma_private_t = 65,
- gf_common_mt_rdma_ioq_t = 66,
- gf_common_mt_rpc_transport_t = 67,
- gf_common_mt_rdma_local_t = 68,
- gf_common_mt_rdma_post_t = 69,
- gf_common_mt_qpent = 70,
- gf_common_mt_rdma_device_t = 71,
- gf_common_mt_rdma_context_t = 72,
- gf_common_mt_sge = 73,
- gf_common_mt_rpcclnt_cb_program_t = 74,
- gf_common_mt_libxl_marker_local = 75,
- gf_common_mt_graph_buf = 76,
- gf_common_mt_trie_trie = 77,
- gf_common_mt_trie_data = 78,
- gf_common_mt_trie_node = 79,
- gf_common_mt_trie_buf = 80,
- gf_common_mt_trie_end = 81,
- gf_common_mt_run_argv = 82,
- gf_common_mt_run_logbuf = 83,
- gf_common_mt_fd_lk_ctx_t = 84,
- gf_common_mt_fd_lk_ctx_node_t = 85,
- gf_common_mt_buffer_t = 86,
- gf_common_mt_circular_buffer_t = 87,
- gf_common_mt_eh_t = 88,
- gf_common_mt_end = 89
-};
-#endif
diff --git a/libglusterfs/src/monitoring.c b/libglusterfs/src/monitoring.c
new file mode 100644
index 00000000000..fbb68dc8622
--- /dev/null
+++ b/libglusterfs/src/monitoring.c
@@ -0,0 +1,282 @@
+/*
+ Copyright (c) 2017 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "glusterfs/monitoring.h"
+#include "glusterfs/xlator.h"
+#include "glusterfs/syscall.h"
+
+#include <stdlib.h>
+
+static void
+dump_mem_acct_details(xlator_t *xl, int fd)
+{
+ struct mem_acct_rec *mem_rec;
+ int i = 0;
+
+ if (!xl || !xl->mem_acct || (xl->ctx->active != xl->graph))
+ return;
+
+ dprintf(fd, "# %s.%s.total.num_types %d\n", xl->type, xl->name,
+ xl->mem_acct->num_types);
+
+ dprintf(fd,
+ "# type, in-use-size, in-use-units, max-size, "
+ "max-units, total-allocs\n");
+
+ for (i = 0; i < xl->mem_acct->num_types; i++) {
+ mem_rec = &xl->mem_acct->rec[i];
+ if (mem_rec->num_allocs == 0)
+ continue;
+ dprintf(fd, "# %s, %" PRIu64 ", %u, %" PRIu64 ", %u, %" PRIu64 "\n",
+ mem_rec->typestr, mem_rec->size, mem_rec->num_allocs,
+ mem_rec->max_size, mem_rec->max_num_allocs,
+ mem_rec->total_allocs);
+ }
+}
+
+static void
+dump_global_memory_accounting(int fd)
+{
+#if MEMORY_ACCOUNTING_STATS
+ int i = 0;
+ uint64_t count = 0;
+
+ uint64_t tcalloc = GF_ATOMIC_GET(gf_memory_stat_counts.total_calloc);
+ uint64_t tmalloc = GF_ATOMIC_GET(gf_memory_stat_counts.total_malloc);
+ uint64_t tfree = GF_ATOMIC_GET(gf_memory_stat_counts.total_free);
+
+ dprintf(fd, "memory.total.calloc %lu\n", tcalloc);
+ dprintf(fd, "memory.total.malloc %lu\n", tmalloc);
+ dprintf(fd, "memory.total.realloc %lu\n",
+ GF_ATOMIC_GET(gf_memory_stat_counts.total_realloc));
+ dprintf(fd, "memory.total.free %lu\n", tfree);
+ dprintf(fd, "memory.total.in-use %lu\n", ((tcalloc + tmalloc) - tfree));
+
+ for (i = 0; i < GF_BLK_MAX_VALUE; i++) {
+ count = GF_ATOMIC_GET(gf_memory_stat_counts.blk_size[i]);
+ dprintf(fd, "memory.total.blk_size.%s %lu\n",
+ gf_mem_stats_blk[i].blk_size_str, count);
+ }
+
+ dprintf(fd, "#----\n");
+#endif
+
+ /* This is not a metric to be watched in admin guide,
+ but keeping it here till we resolve all leak-issues
+ would be great */
+}
+
+static void
+dump_latency_and_count(xlator_t *xl, int fd)
+{
+ int32_t index = 0;
+ uint64_t fop;
+ uint64_t cbk;
+ uint64_t count;
+
+ if (xl->winds) {
+ dprintf(fd, "%s.total.pending-winds.count %" PRIu64 "\n", xl->name,
+ xl->winds);
+ }
+
+ /* Need 'fuse' data, and don't need all the old graph info */
+ if ((xl != xl->ctx->master) && (xl->ctx->active != xl->graph))
+ return;
+
+ count = GF_ATOMIC_GET(xl->stats.total.count);
+ dprintf(fd, "%s.total.fop-count %" PRIu64 "\n", xl->name, count);
+
+ count = GF_ATOMIC_GET(xl->stats.interval.count);
+ dprintf(fd, "%s.interval.fop-count %" PRIu64 "\n", xl->name, count);
+ GF_ATOMIC_INIT(xl->stats.interval.count, 0);
+
+ for (index = 0; index < GF_FOP_MAXVALUE; index++) {
+ fop = GF_ATOMIC_GET(xl->stats.total.metrics[index].fop);
+ if (fop) {
+ dprintf(fd, "%s.total.%s.count %" PRIu64 "\n", xl->name,
+ gf_fop_list[index], fop);
+ }
+ fop = GF_ATOMIC_GET(xl->stats.interval.metrics[index].fop);
+ if (fop) {
+ dprintf(fd, "%s.interval.%s.count %" PRIu64 "\n", xl->name,
+ gf_fop_list[index], fop);
+ }
+ cbk = GF_ATOMIC_GET(xl->stats.interval.metrics[index].cbk);
+ if (cbk) {
+ dprintf(fd, "%s.interval.%s.fail_count %" PRIu64 "\n", xl->name,
+ gf_fop_list[index], cbk);
+ }
+ if (xl->stats.interval.latencies[index].count != 0) {
+ dprintf(fd, "%s.interval.%s.latency %lf\n", xl->name,
+ gf_fop_list[index],
+ (((double)xl->stats.interval.latencies[index].total) /
+ xl->stats.interval.latencies[index].count));
+ dprintf(fd, "%s.interval.%s.max %" PRIu64 "\n", xl->name,
+ gf_fop_list[index],
+ xl->stats.interval.latencies[index].max);
+ dprintf(fd, "%s.interval.%s.min %" PRIu64 "\n", xl->name,
+ gf_fop_list[index],
+ xl->stats.interval.latencies[index].min);
+ }
+ GF_ATOMIC_INIT(xl->stats.interval.metrics[index].cbk, 0);
+ GF_ATOMIC_INIT(xl->stats.interval.metrics[index].fop, 0);
+ }
+ memset(xl->stats.interval.latencies, 0,
+ sizeof(xl->stats.interval.latencies));
+}
+
+static inline void
+dump_call_stack_details(glusterfs_ctx_t *ctx, int fd)
+{
+ dprintf(fd, "total.stack.count %" PRIu64 "\n",
+ GF_ATOMIC_GET(ctx->pool->total_count));
+ dprintf(fd, "total.stack.in-flight %" PRIu64 "\n", ctx->pool->cnt);
+}
+
+static inline void
+dump_dict_details(glusterfs_ctx_t *ctx, int fd)
+{
+ uint64_t total_dicts = 0;
+ uint64_t total_pairs = 0;
+
+ total_dicts = GF_ATOMIC_GET(ctx->stats.total_dicts_used);
+ total_pairs = GF_ATOMIC_GET(ctx->stats.total_pairs_used);
+
+ dprintf(fd, "total.dict.max-pairs-per %" PRIu64 "\n",
+ GF_ATOMIC_GET(ctx->stats.max_dict_pairs));
+ dprintf(fd, "total.dict.pairs-used %" PRIu64 "\n", total_pairs);
+ dprintf(fd, "total.dict.used %" PRIu64 "\n", total_dicts);
+ dprintf(fd, "total.dict.average-pairs %" PRIu64 "\n",
+ (total_pairs / total_dicts));
+}
+
+static void
+dump_inode_stats(glusterfs_ctx_t *ctx, int fd)
+{
+}
+
+static void
+dump_global_metrics(glusterfs_ctx_t *ctx, int fd)
+{
+ struct timeval tv;
+ time_t nowtime;
+ struct tm *nowtm;
+ char tmbuf[64] = {
+ 0,
+ };
+
+ gettimeofday(&tv, NULL);
+ nowtime = tv.tv_sec;
+ nowtm = localtime(&nowtime);
+ strftime(tmbuf, sizeof tmbuf, "%Y-%m-%d %H:%M:%S", nowtm);
+
+ /* Let every file have information on which process dumped info */
+ dprintf(fd, "## %s\n", ctx->cmdlinestr);
+ dprintf(fd, "### %s\n", tmbuf);
+ dprintf(fd, "### BrickName: %s\n", ctx->cmd_args.brick_name);
+ dprintf(fd, "### MountName: %s\n", ctx->cmd_args.mount_point);
+ dprintf(fd, "### VolumeName: %s\n", ctx->cmd_args.volume_name);
+
+ /* Dump memory accounting */
+ dump_global_memory_accounting(fd);
+ dprintf(fd, "# -----\n");
+
+ dump_call_stack_details(ctx, fd);
+ dump_dict_details(ctx, fd);
+ dprintf(fd, "# -----\n");
+
+ dump_inode_stats(ctx, fd);
+ dprintf(fd, "# -----\n");
+}
+
+static void
+dump_xl_metrics(glusterfs_ctx_t *ctx, int fd)
+{
+ xlator_t *xl;
+
+ xl = ctx->active->top;
+
+ while (xl) {
+ dump_latency_and_count(xl, fd);
+ dump_mem_acct_details(xl, fd);
+ if (xl->dump_metrics)
+ xl->dump_metrics(xl, fd);
+ xl = xl->next;
+ }
+
+ if (ctx->master) {
+ xl = ctx->master;
+
+ dump_latency_and_count(xl, fd);
+ dump_mem_acct_details(xl, fd);
+ if (xl->dump_metrics)
+ xl->dump_metrics(xl, fd);
+ }
+
+ return;
+}
+
+char *
+gf_monitor_metrics(glusterfs_ctx_t *ctx)
+{
+ int ret = -1;
+ int fd = 0;
+ char *filepath = NULL, *dumppath = NULL;
+
+ gf_msg_trace("monitoring", 0, "received monitoring request (sig:USR2)");
+
+ dumppath = ctx->config.metrics_dumppath;
+ if (dumppath == NULL) {
+ dumppath = GLUSTER_METRICS_DIR;
+ }
+ ret = mkdir_p(dumppath, 0755, true);
+ if (ret) {
+ /* EEXIST is handled in mkdir_p() itself */
+ gf_msg("monitoring", GF_LOG_ERROR, 0, LG_MSG_STRDUP_ERROR,
+ "failed to create metrics dir %s (%s)", dumppath,
+ strerror(errno));
+ return NULL;
+ }
+
+ ret = gf_asprintf(&filepath, "%s/gmetrics.XXXXXX", dumppath);
+ if (ret < 0) {
+ return NULL;
+ }
+
+ /* coverity[secure_temp] mkstemp uses 0600 as the mode and is safe */
+ fd = mkstemp(filepath);
+ if (fd < 0) {
+ gf_msg("monitoring", GF_LOG_ERROR, 0, LG_MSG_STRDUP_ERROR,
+ "failed to open tmp file %s (%s)", filepath, strerror(errno));
+ GF_FREE(filepath);
+ return NULL;
+ }
+
+ dump_global_metrics(ctx, fd);
+
+ dump_xl_metrics(ctx, fd);
+
+ /* This below line is used just to capture any errors with dprintf() */
+ ret = dprintf(fd, "\n# End of metrics\n");
+ if (ret < 0) {
+ gf_msg("monitoring", GF_LOG_WARNING, 0, LG_MSG_STRDUP_ERROR,
+ "dprintf() failed: %s", strerror(errno));
+ }
+
+ ret = sys_fsync(fd);
+ if (ret < 0) {
+ gf_msg("monitoring", GF_LOG_WARNING, 0, LG_MSG_STRDUP_ERROR,
+ "fsync() failed: %s", strerror(errno));
+ }
+ sys_close(fd);
+
+ /* Figure this out, not happy with returning this string */
+ return filepath;
+}
diff --git a/libglusterfs/src/options.c b/libglusterfs/src/options.c
index f4cb0425893..f6b5aa0ea23 100644
--- a/libglusterfs/src/options.c
+++ b/libglusterfs/src/options.c
@@ -8,1054 +8,1242 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include <fnmatch.h>
-#include "xlator.h"
-#include "defaults.h"
+#include "glusterfs/xlator.h"
+#include "glusterfs/defaults.h"
+#include "glusterfs/libglusterfs-messages.h"
#define GF_OPTION_LIST_EMPTY(_opt) (_opt->value[0] == NULL)
+static int
+xlator_option_validate_path(xlator_t *xl, const char *key, const char *value,
+ volume_option_t *opt, char **op_errstr)
+{
+ int ret = -1;
+ char errstr[256];
+
+ if (strstr(value, "../")) {
+ snprintf(errstr, 256, "invalid path given '%s'", value);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s",
+ errstr, NULL);
+ goto out;
+ }
+
+ /* Make sure the given path is valid */
+ if (value[0] != '/') {
+ snprintf(errstr, 256,
+ "option %s %s: '%s' is not an "
+ "absolute path name",
+ key, value, value);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s",
+ errstr, NULL);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret && op_errstr)
+ *op_errstr = gf_strdup(errstr);
+ return ret;
+}
static int
-xlator_option_validate_path (xlator_t *xl, const char *key, const char *value,
- volume_option_t *opt, char **op_errstr)
+xlator_option_validate_int(xlator_t *xl, const char *key, const char *value,
+ volume_option_t *opt, char **op_errstr)
{
- int ret = -1;
- char errstr[256];
-
- if (strstr (value, "../")) {
- snprintf (errstr, 256,
- "invalid path given '%s'",
- value);
- gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
- goto out;
+ long long inputll = 0;
+ unsigned long long uinputll = 0;
+ int ret = -1;
+ char errstr[256];
+
+ /* Check the range */
+ if (gf_string2longlong(value, &inputll) != 0) {
+ snprintf(errstr, 256, "invalid number format \"%s\" in option \"%s\"",
+ value, key);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s",
+ errstr, NULL);
+ goto out;
+ }
+
+ /* Handle '-0' */
+ if ((inputll == 0) && (gf_string2ulonglong(value, &uinputll) != 0)) {
+ snprintf(errstr, 256, "invalid number format \"%s\" in option \"%s\"",
+ value, key);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s",
+ errstr, NULL);
+ goto out;
+ }
+
+ if ((opt->min == 0) && (opt->max == 0) &&
+ (opt->validate == GF_OPT_VALIDATE_BOTH)) {
+ gf_msg_trace(xl->name, 0,
+ "no range check required for "
+ "'option %s %s'",
+ key, value);
+ ret = 0;
+ goto out;
+ }
+
+ if (opt->validate == GF_OPT_VALIDATE_MIN) {
+ if (inputll < opt->min) {
+ snprintf(errstr, 256,
+ "'%lld' in 'option %s %s' is smaller than "
+ "minimum value '%.0f'",
+ inputll, key, value, opt->min);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s",
+ errstr, NULL);
+ goto out;
}
-
- /* Make sure the given path is valid */
- if (value[0] != '/') {
- snprintf (errstr, 256,
- "option %s %s: '%s' is not an "
- "absolute path name",
- key, value, value);
- gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
- goto out;
+ } else if (opt->validate == GF_OPT_VALIDATE_MAX) {
+ if (inputll > opt->max) {
+ snprintf(errstr, 256,
+ "'%lld' in 'option %s %s' is greater than "
+ "maximum value '%.0f'",
+ inputll, key, value, opt->max);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s",
+ errstr, NULL);
+ goto out;
}
-
- ret = 0;
+ } else if ((inputll < opt->min) || (inputll > opt->max)) {
+ snprintf(errstr, 256,
+ "'%lld' in 'option %s %s' is out of range "
+ "[%.0f - %.0f]",
+ inputll, key, value, opt->min, opt->max);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_OUT_OF_RANGE, "error=%s",
+ errstr, NULL);
+ goto out;
+ }
+
+ ret = 0;
out:
- if (ret && op_errstr)
- *op_errstr = gf_strdup (errstr);
- return ret;
+ if (ret && op_errstr)
+ *op_errstr = gf_strdup(errstr);
+ return ret;
}
static int
-xlator_option_validate_int (xlator_t *xl, const char *key, const char *value,
- volume_option_t *opt, char **op_errstr)
+xlator_option_validate_sizet(xlator_t *xl, const char *key, const char *value,
+ volume_option_t *opt, char **op_errstr)
{
- long long inputll = 0;
- int ret = -1;
- char errstr[256];
+ uint64_t size = 0;
+ int ret = 0;
+ char errstr[256];
+
+ /* Check the range */
+ if (gf_string2bytesize_uint64(value, &size) != 0) {
+ snprintf(errstr, 256, "invalid number format \"%s\" in option \"%s\"",
+ value, key);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s",
+ errstr, NULL);
+ ret = -1;
+ goto out;
+ }
+
+ if ((opt->min == 0) && (opt->max == 0)) {
+ gf_msg_trace(xl->name, 0,
+ "no range check required for "
+ "'option %s %s'",
+ key, value);
+ goto out;
+ }
+
+ if ((size < opt->min) || (size > opt->max)) {
+ snprintf(errstr, 256,
+ "'%" PRIu64
+ "' in 'option %s %s' is out of range [%.0f - %.0f]",
+ size, key, value, opt->min, opt->max);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_OUT_OF_RANGE, "error=%s",
+ errstr, NULL);
+ ret = -1;
+ }
- /* Check the range */
- if (gf_string2longlong (value, &inputll) != 0) {
- snprintf (errstr, 256,
- "invalid number format \"%s\" in option \"%s\"",
- value, key);
- gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
- goto out;
- }
+out:
+ if (ret && op_errstr)
+ *op_errstr = gf_strdup(errstr);
+ return ret;
+}
- if ((opt->min == 0) && (opt->max == 0)) {
- gf_log (xl->name, GF_LOG_TRACE,
- "no range check required for 'option %s %s'",
- key, value);
- ret = 0;
- goto out;
- }
+static int
+xlator_option_validate_bool(xlator_t *xl, const char *key, const char *value,
+ volume_option_t *opt, char **op_errstr)
+{
+ int ret = -1;
+ char errstr[256];
+ gf_boolean_t is_valid;
+
+ /* Check if the value is one of
+ '0|1|on|off|no|yes|true|false|enable|disable' */
+
+ if (gf_string2boolean(value, &is_valid) != 0) {
+ snprintf(errstr, 256, "option %s %s: '%s' is not a valid boolean value",
+ key, value, value);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s",
+ errstr, NULL);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret && op_errstr)
+ *op_errstr = gf_strdup(errstr);
+ return ret;
+}
- if ((inputll < opt->min) || (inputll > opt->max)) {
- snprintf (errstr, 256,
- "'%lld' in 'option %s %s' is out of range "
- "[%"PRId64" - %"PRId64"]",
- inputll, key, value, opt->min, opt->max);
- gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
- goto out;
+static int
+xlator_option_validate_xlator(xlator_t *xl, const char *key, const char *value,
+ volume_option_t *opt, char **op_errstr)
+{
+ int ret = -1;
+ char errstr[256];
+ xlator_t *xlopt = NULL;
+
+ /* Check if the value is one of the xlators */
+ xlopt = xl;
+ while (xlopt->prev)
+ xlopt = xlopt->prev;
+
+ while (xlopt) {
+ if (strcmp(value, xlopt->name) == 0) {
+ ret = 0;
+ break;
}
-
- ret = 0;
+ xlopt = xlopt->next;
+ }
+
+ if (!xlopt) {
+ snprintf(errstr, 256, "option %s %s: '%s' is not a valid volume name",
+ key, value, value);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s",
+ errstr, NULL);
+ goto out;
+ }
+
+ ret = 0;
out:
- if (ret && op_errstr)
- *op_errstr = gf_strdup (errstr);
- return ret;
+ if (ret && op_errstr)
+ *op_errstr = gf_strdup(errstr);
+ return ret;
}
+static void
+set_error_str(char *errstr, size_t len, volume_option_t *opt, const char *key,
+ const char *value)
+{
+ int i = 0;
+ int ret = 0;
+
+ ret = snprintf(errstr, len,
+ "option %s %s: '%s' is not valid "
+ "(possible options are ",
+ key, value, value);
+
+ for (i = 0; (i < ZR_OPTION_MAX_ARRAY_SIZE) && opt->value[i];) {
+ ret += snprintf(errstr + ret, len - ret, "%s", opt->value[i]);
+ if (((++i) < ZR_OPTION_MAX_ARRAY_SIZE) && (opt->value[i]))
+ ret += snprintf(errstr + ret, len - ret, ", ");
+ else
+ ret += snprintf(errstr + ret, len - ret, ".)");
+ }
+ return;
+}
static int
-xlator_option_validate_sizet (xlator_t *xl, const char *key, const char *value,
- volume_option_t *opt, char **op_errstr)
+is_all_whitespaces(const char *value)
{
- uint64_t size = 0;
- int ret = -1;
- char errstr[256];
+ int i = 0;
- /* Check the range */
- if (gf_string2bytesize (value, &size) != 0) {
- snprintf (errstr, 256,
- "invalid number format \"%s\" in option \"%s\"",
- value, key);
- gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
- goto out;
- }
+ if (value == NULL)
+ return -1;
- if ((opt->min == 0) && (opt->max == 0)) {
- gf_log (xl->name, GF_LOG_TRACE,
- "no range check required for 'option %s %s'",
- key, value);
- ret = 0;
- goto out;
- }
-
- if ((size < opt->min) || (size > opt->max)) {
- if (strncmp (key, "cache-size", 10) == 0) {
- snprintf (errstr, 256, "Cache size %"PRId64" is out of "
- "range [%"PRId64" - %"PRId64"]",
- size, opt->min, opt->max);
- //*op_errstr = gf_strdup (errstr);
- gf_log (xl->name, GF_LOG_WARNING, "%s", errstr);
- ret = 0;
- goto out;
- } else {
- snprintf (errstr, 256,
- "'%"PRId64"' in 'option %s %s' "
- "is out of range [%"PRId64" - %"PRId64"]",
- size, key, value, opt->min, opt->max);
- gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
- goto out;
- }
- }
+ for (i = 0; value[i] != '\0'; i++) {
+ if (value[i] == ' ')
+ continue;
+ else
+ return 0;
+ }
- ret = 0;
-out:
- if (ret && op_errstr)
- *op_errstr = gf_strdup (errstr);
- return ret;
+ return 1;
}
-
static int
-xlator_option_validate_bool (xlator_t *xl, const char *key, const char *value,
- volume_option_t *opt, char **op_errstr)
+xlator_option_validate_str(xlator_t *xl, const char *key, const char *value,
+ volume_option_t *opt, char **op_errstr)
{
- int ret = -1;
- char errstr[256];
- gf_boolean_t bool;
+ int ret = -1;
+ int i = 0;
+ /* Check if the '*str' is valid */
+ if (GF_OPTION_LIST_EMPTY(opt)) {
+ ret = 0;
+ goto out;
+ }
- /* Check if the value is one of
- '0|1|on|off|no|yes|true|false|enable|disable' */
+ if (is_all_whitespaces(value) == 1)
+ goto out;
- if (gf_string2boolean (value, &bool) != 0) {
- snprintf (errstr, 256,
- "option %s %s: '%s' is not a valid boolean value",
- key, value, value);
- gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
- goto out;
+ for (i = 0; (i < ZR_OPTION_MAX_ARRAY_SIZE) && opt->value[i]; i++) {
+#ifdef GF_DARWIN_HOST_OS
+ if (fnmatch(opt->value[i], value, 0) == 0) {
+ ret = 0;
+ break;
+ }
+#else
+ if (fnmatch(opt->value[i], value, FNM_EXTMATCH) == 0) {
+ ret = 0;
+ break;
}
+#endif
+ }
+
+ if ((i == ZR_OPTION_MAX_ARRAY_SIZE) || (!opt->value[i]))
+ goto out;
+ /* enter here only if
+ * 1. reached end of opt->value array and haven't
+ * validated input
+ * OR
+ * 2. valid input list is less than
+ * ZR_OPTION_MAX_ARRAY_SIZE and input has not
+ * matched all possible input values.
+ */
+
+ ret = 0;
- ret = 0;
out:
- if (ret && op_errstr)
- *op_errstr = gf_strdup (errstr);
- return ret;
+ if (ret) {
+ char errstr[4096];
+ set_error_str(errstr, sizeof(errstr), opt, key, value);
+
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s",
+ errstr, NULL);
+ if (op_errstr)
+ *op_errstr = gf_strdup(errstr);
+ }
+ return ret;
}
-
static int
-xlator_option_validate_xlator (xlator_t *xl, const char *key, const char *value,
+xlator_option_validate_percent(xlator_t *xl, const char *key, const char *value,
volume_option_t *opt, char **op_errstr)
{
- int ret = -1;
- char errstr[256];
- xlator_t *xlopt = NULL;
-
-
- /* Check if the value is one of the xlators */
- xlopt = xl;
- while (xlopt->prev)
- xlopt = xlopt->prev;
-
- while (xlopt) {
- if (strcmp (value, xlopt->name) == 0) {
- ret = 0;
- break;
- }
- xlopt = xlopt->next;
- }
-
- if (!xlopt) {
- snprintf (errstr, 256,
- "option %s %s: '%s' is not a valid volume name",
- key, value, value);
- gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
- goto out;
- }
-
- ret = 0;
+ double percent = 0;
+ int ret = -1;
+ char errstr[256];
+
+ /* Check if the value is valid percentage */
+ if (gf_string2percent(value, &percent) != 0) {
+ snprintf(errstr, 256, "invalid percent format \"%s\" in \"option %s\"",
+ value, key);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s",
+ errstr, NULL);
+ goto out;
+ }
+
+ if ((percent < 0.0) || (percent > 100.0)) {
+ snprintf(errstr, 256,
+ "'%lf' in 'option %s %s' is out of range [0 - 100]", percent,
+ key, value);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_OUT_OF_RANGE, "error=%s",
+ errstr, NULL);
+ goto out;
+ }
+
+ ret = 0;
out:
- if (ret && op_errstr)
- *op_errstr = gf_strdup (errstr);
- return ret;
+ if (ret && op_errstr)
+ *op_errstr = gf_strdup(errstr);
+ return ret;
}
-
static int
-xlator_option_validate_str (xlator_t *xl, const char *key, const char *value,
- volume_option_t *opt, char **op_errstr)
+xlator_option_validate_fractional_value(const char *value)
{
- int ret = -1;
- int i = 0;
- char errstr[256];
- char given_array[4096] = {0,};
-
- /* Check if the '*str' is valid */
- if (GF_OPTION_LIST_EMPTY(opt)) {
- ret = 0;
- goto out;
- }
+ const char *s = NULL;
+ int ret = 0;
- for (i = 0; (i < ZR_OPTION_MAX_ARRAY_SIZE) && opt->value[i]; i++) {
- #ifdef GF_DARWIN_HOST_OS
- if (fnmatch (opt->value[i], value, 0) == 0) {
- ret = 0;
- break;
- }
- #else
- if (fnmatch (opt->value[i], value, FNM_EXTMATCH) == 0) {
- ret = 0;
- break;
- }
- #endif
- }
-
- if (((i < ZR_OPTION_MAX_ARRAY_SIZE) && (!opt->value[i])) ||
- (i == ZR_OPTION_MAX_ARRAY_SIZE)) {
- /* enter here only if
- * 1. reached end of opt->value array and haven't
- * validated input
- * OR
- * 2. valid input list is less than
- * ZR_OPTION_MAX_ARRAY_SIZE and input has not
- * matched all possible input values.
- */
-
- for (i = 0; (i < ZR_OPTION_MAX_ARRAY_SIZE) && opt->value[i];) {
- strcat (given_array, opt->value[i]);
- if (((++i) < ZR_OPTION_MAX_ARRAY_SIZE) &&
- (opt->value[i]))
- strcat (given_array, ", ");
- else
- strcat (given_array, ".");
- }
- snprintf (errstr, 256,
- "option %s %s: '%s' is not valid "
- "(possible options are %s)",
- key, value, value, given_array);
- gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
- goto out;
+ s = strchr(value, '.');
+ if (s) {
+ for (s = s + 1; *s != '\0'; s++) {
+ if (*s != '0') {
+ return -1;
+ }
}
+ }
- ret = 0;
-out:
- if (ret && op_errstr)
- *op_errstr = gf_strdup (errstr);
- return ret;
+ return ret;
}
-
static int
-xlator_option_validate_percent (xlator_t *xl, const char *key, const char *value,
- volume_option_t *opt, char **op_errstr)
+xlator_option_validate_percent_or_sizet(xlator_t *xl, const char *key,
+ const char *value, volume_option_t *opt,
+ char **op_errstr)
{
- double percent = 0;
- int ret = -1;
- char errstr[256];
-
- /* Check if the value is valid percentage */
- if (gf_string2percent (value, &percent) != 0) {
- snprintf (errstr, 256,
- "invalid percent format \"%s\" in \"option %s\"",
- value, key);
- gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
+ int ret = -1;
+ char errstr[256];
+ double size = 0;
+ gf_boolean_t is_percent = _gf_false;
+
+ if (gf_string2percent_or_bytesize(value, &size, &is_percent) == 0) {
+ if (is_percent) {
+ if ((size < 0.0) || (size > 100.0)) {
+ snprintf(errstr, sizeof(errstr),
+ "'%lf' in 'option %s %s' is out"
+ " of range [0 - 100]",
+ size, key, value);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_OUT_OF_RANGE,
+ "error=%s", errstr, NULL);
goto out;
+ }
+ ret = 0;
+ goto out;
}
- if ((percent < 0.0) || (percent > 100.0)) {
- snprintf (errstr, 256,
- "'%lf' in 'option %s %s' is out of range [0 - 100]",
- percent, key, value);
- gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
- goto out;
+ /*Input value of size(in byte) should not be fractional*/
+ ret = xlator_option_validate_fractional_value(value);
+ if (ret) {
+ snprintf(errstr, sizeof(errstr),
+ "'%lf' in 'option %s"
+ " %s' should not be fractional value. Use "
+ "valid unsigned integer value.",
+ size, key, value);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s",
+ errstr, NULL);
+ goto out;
}
+ /* Check the range */
+ if ((opt->min == 0) && (opt->max == 0)) {
+ gf_msg_trace(xl->name, 0,
+ "no range check required "
+ "for 'option %s %s'",
+ key, value);
+ ret = 0;
+ goto out;
+ }
+ if ((size < opt->min) || (size > opt->max)) {
+ snprintf(errstr, 256,
+ "'%lf' in 'option %s %s'"
+ " is out of range [%.0f - %.0f]",
+ size, key, value, opt->min, opt->max);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_OUT_OF_RANGE, "error=%s",
+ errstr, NULL);
+ goto out;
+ }
ret = 0;
-out:
- if (ret && op_errstr)
- *op_errstr = gf_strdup (errstr);
- return ret;
-}
+ goto out;
+ }
-static int
-xlator_option_validate_percent_or_sizet (xlator_t *xl, const char *key,
- const char *value,
- volume_option_t *opt, char **op_errstr)
-{
- int ret = -1;
- char errstr[256];
- uint64_t size = 0;
- gf_boolean_t is_percent = _gf_false;
-
- if (gf_string2percent_or_bytesize (value, &size, &is_percent) == 0) {
- if (is_percent) {
- ret = 0;
- goto out;
- }
- /* Check the range */
- if ((opt->min == 0) && (opt->max == 0)) {
- gf_log (xl->name, GF_LOG_TRACE,
- "no range check required for "
- "'option %s %s'",
- key, value);
- ret = 0;
- goto out;
- }
- if ((size < opt->min) || (size > opt->max)) {
- snprintf (errstr, 256,
- "'%"PRId64"' in 'option %s %s'"
- " is out of range [%"PRId64" -"
- " %"PRId64"]",
- size, key, value, opt->min, opt->max);
- gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
- goto out;
- }
- ret = 0;
- goto out;
- }
-
- /* If control reaches here, invalid argument */
-
- snprintf (errstr, 256,
- "invalid number format \"%s\" in \"option %s\"",
- value, key);
- gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
+ /* If control reaches here, invalid argument */
+ snprintf(errstr, 256, "invalid number format \"%s\" in \"option %s\"",
+ value, key);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s", errstr,
+ NULL);
out:
- if (ret && op_errstr)
- *op_errstr = gf_strdup (errstr);
- return ret;
+ if (ret && op_errstr)
+ *op_errstr = gf_strdup(errstr);
+ return ret;
}
-
static int
-xlator_option_validate_time (xlator_t *xl, const char *key, const char *value,
- volume_option_t *opt, char **op_errstr)
+xlator_option_validate_time(xlator_t *xl, const char *key, const char *value,
+ volume_option_t *opt, char **op_errstr)
{
- int ret = -1;
- char errstr[256];
- uint32_t input_time = 0;
-
- /* Check if the value is valid time */
- if (gf_string2time (value, &input_time) != 0) {
- snprintf (errstr, 256,
- "invalid time format \"%s\" in "
- "\"option %s\"",
- value, key);
- gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
- goto out;
- }
-
- if ((opt->min == 0) && (opt->max == 0)) {
- gf_log (xl->name, GF_LOG_TRACE,
- "no range check required for "
- "'option %s %s'",
- key, value);
- ret = 0;
- goto out;
- }
-
- if ((input_time < opt->min) || (input_time > opt->max)) {
- snprintf (errstr, 256,
- "'%"PRIu32"' in 'option %s %s' is "
- "out of range [%"PRId64" - %"PRId64"]",
- input_time, key, value,
- opt->min, opt->max);
- gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
- goto out;
- }
-
+ int ret = -1;
+ char errstr[256];
+ uint32_t input_time = 0;
+
+ /* Check if the value is valid time */
+ if (gf_string2time(value, &input_time) != 0) {
+ snprintf(errstr, 256,
+ "invalid time format \"%s\" in "
+ "\"option %s\"",
+ value, key);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s",
+ errstr, NULL);
+ goto out;
+ }
+
+ if ((opt->min == 0) && (opt->max == 0)) {
+ gf_msg_trace(xl->name, 0,
+ "no range check required for "
+ "'option %s %s'",
+ key, value);
ret = 0;
+ goto out;
+ }
+
+ if ((input_time < opt->min) || (input_time > opt->max)) {
+ snprintf(errstr, 256,
+ "'%" PRIu32
+ "' in 'option %s %s' is "
+ "out of range [%.0f - %.0f]",
+ input_time, key, value, opt->min, opt->max);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_OUT_OF_RANGE, "error=%s",
+ errstr, NULL);
+ goto out;
+ }
+
+ ret = 0;
out:
- if (ret && op_errstr)
- *op_errstr = gf_strdup (errstr);
- return ret;
+ if (ret && op_errstr)
+ *op_errstr = gf_strdup(errstr);
+ return ret;
}
-
static int
-xlator_option_validate_double (xlator_t *xl, const char *key, const char *value,
- volume_option_t *opt, char **op_errstr)
+xlator_option_validate_double(xlator_t *xl, const char *key, const char *value,
+ volume_option_t *opt, char **op_errstr)
{
- int ret = -1;
- char errstr[256];
- double val = 0.0;
-
- /* Check if the value is valid double */
- if (gf_string2double (value, &val) != 0) {
- snprintf (errstr, 256,
- "invalid double \"%s\" in \"option %s\"",
- value, key);
- gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
- goto out;
- }
-
- if (val < 0.0) {
- snprintf (errstr, 256,
- "invalid double \"%s\" in \"option %s\"",
- value, key);
- gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
- goto out;
+ double input = 0.0;
+ int ret = -1;
+ char errstr[256];
+
+ /* Check the range */
+ if (gf_string2double(value, &input) != 0) {
+ snprintf(errstr, 256, "invalid number format \"%s\" in option \"%s\"",
+ value, key);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s",
+ errstr, NULL);
+ goto out;
+ }
+
+ if ((opt->min == 0) && (opt->max == 0) &&
+ (opt->validate == GF_OPT_VALIDATE_BOTH)) {
+ gf_msg_trace(xl->name, 0,
+ "no range check required for "
+ "'option %s %s'",
+ key, value);
+ ret = 0;
+ goto out;
+ }
+
+ if (opt->validate == GF_OPT_VALIDATE_MIN) {
+ if (input < opt->min) {
+ snprintf(errstr, 256,
+ "'%f' in 'option %s %s' is smaller than "
+ "minimum value '%f'",
+ input, key, value, opt->min);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s",
+ errstr, NULL);
+ goto out;
}
-
- if ((opt->min == 0) && (opt->max == 0)) {
- gf_log (xl->name, GF_LOG_TRACE,
- "no range check required for 'option %s %s'",
- key, value);
- ret = 0;
- goto out;
+ } else if (opt->validate == GF_OPT_VALIDATE_MAX) {
+ if (input > opt->max) {
+ snprintf(errstr, 256,
+ "'%f' in 'option %s %s' is greater than "
+ "maximum value '%f'",
+ input, key, value, opt->max);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s",
+ errstr, NULL);
+ goto out;
}
-
- ret = 0;
+ } else if ((input < opt->min) || (input > opt->max)) {
+ snprintf(errstr, 256,
+ "'%f' in 'option %s %s' is out of range "
+ "[%f - %f]",
+ input, key, value, opt->min, opt->max);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_OUT_OF_RANGE, "error=%s",
+ errstr, NULL);
+ goto out;
+ }
+
+ ret = 0;
out:
- if (ret && op_errstr)
- *op_errstr = gf_strdup (errstr);
- return ret;
+ if (ret && op_errstr)
+ *op_errstr = gf_strdup(errstr);
+ return ret;
}
-
static int
-xlator_option_validate_addr (xlator_t *xl, const char *key, const char *value,
- volume_option_t *opt, char **op_errstr)
+xlator_option_validate_addr(xlator_t *xl, const char *key, const char *value,
+ volume_option_t *opt, char **op_errstr)
{
- int ret = -1;
- char errstr[256];
-
- if (!valid_internet_address ((char *)value, _gf_false)) {
- snprintf (errstr, 256,
- "option %s %s: '%s' is not a valid internet-address,"
- " it does not conform to standards.",
- key, value, value);
- gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
- if (op_errstr)
- *op_errstr = gf_strdup (errstr);
- }
+ int ret = -1;
+ char errstr[256];
- ret = 0;
+ if (!valid_internet_address((char *)value, _gf_false, _gf_false)) {
+ snprintf(errstr, 256, "option %s %s: Can not parse %s address", key,
+ value, value);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s",
+ errstr, NULL);
+ if (op_errstr)
+ *op_errstr = gf_strdup(errstr);
+ }
- return ret;
+ ret = 0;
+
+ return ret;
}
-static int
-xlator_option_validate_addr_list (xlator_t *xl, const char *key,
- const char *value, volume_option_t *opt,
- char **op_errstr)
+int
+xlator_option_validate_addr_list(xlator_t *xl, const char *key,
+ const char *value, volume_option_t *opt,
+ char **op_errstr)
{
- int ret = -1;
- char *dup_val = NULL;
- char *addr_tok = NULL;
- char *save_ptr = NULL;
- char errstr[256];
-
- dup_val = gf_strdup (value);
- if (!dup_val) {
- ret = -1;
- snprintf (errstr, 256, "internal error, out of memory.");
+ int ret = -1;
+ char *dup_val = NULL;
+ char *addr_tok = NULL;
+ char *save_ptr = NULL;
+ char *entry = NULL;
+ char *entry_ptr = NULL;
+ char *dir_and_addr = NULL;
+ char *addr_ptr = NULL;
+ char *addr_list = NULL;
+ char *addr = NULL;
+ char *dir = NULL;
+
+ dup_val = gf_strdup(value);
+ if (!dup_val)
+ goto out;
+
+ if (dup_val[0] != '/' && !strchr(dup_val, '(')) {
+ /* Possible old format, handle it for back-ward compatibility */
+ addr_tok = strtok_r(dup_val, ",", &save_ptr);
+ while (addr_tok) {
+ if (!valid_internet_address(addr_tok, _gf_true, _gf_true))
goto out;
- }
- addr_tok = strtok_r (dup_val, ",", &save_ptr);
- while (addr_tok) {
- if (!valid_internet_address (addr_tok, _gf_true)) {
- snprintf (errstr, 256,
- "option %s %s: '%s' is not a valid "
- "internet-address-list",
- key, value, value);
- gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
- ret = -1;
- goto out;
- }
- addr_tok = strtok_r (NULL, ",", &save_ptr);
+ addr_tok = strtok_r(NULL, ",", &save_ptr);
}
ret = 0;
- out:
- if (op_errstr && ret)
- *op_errstr = gf_strdup (errstr);
- if (dup_val)
- GF_FREE (dup_val);
+ goto out;
+ }
+
+ /* Lets handle the value with new format */
+ entry = strtok_r(dup_val, ",", &entry_ptr);
+ while (entry) {
+ dir_and_addr = gf_strdup(entry);
+ if (!dir_and_addr)
+ goto out;
+
+ dir = strtok_r(dir_and_addr, "(", &addr_ptr);
+ if (dir[0] != '/') {
+ /* Valid format should be starting from '/' */
+ goto out;
+ }
+ /* dir = strtok_r (NULL, " =", &addr_tmp); */
+ addr = strtok_r(NULL, ")", &addr_ptr);
+ if (!addr)
+ goto out;
+
+ addr_list = gf_strdup(addr);
+ if (!addr_list)
+ goto out;
+
+ /* This format be separated by '|' */
+ addr_tok = strtok_r(addr_list, "|", &save_ptr);
+ if (addr_tok == NULL)
+ goto out;
+ while (addr_tok) {
+ if (!valid_internet_address(addr_tok, _gf_true, _gf_true))
+ goto out;
+
+ addr_tok = strtok_r(NULL, "|", &save_ptr);
+ }
+ entry = strtok_r(NULL, ",", &entry_ptr);
+ GF_FREE(dir_and_addr);
+ GF_FREE(addr_list);
+ addr_list = NULL;
+ dir_and_addr = NULL;
+ }
- return ret;
+ ret = 0;
+
+out:
+ if (ret) {
+ char errstr[4096];
+ snprintf(errstr, sizeof(errstr),
+ "option %s %s: '%s' is not "
+ "a valid internet-address-list",
+ key, value, value);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s",
+ errstr, NULL);
+ if (op_errstr)
+ *op_errstr = gf_strdup(errstr);
+ }
+ GF_FREE(dup_val);
+ GF_FREE(dir_and_addr);
+ GF_FREE(addr_list);
+ return ret;
+}
+
+static int
+xlator_option_validate_mntauth(xlator_t *xl, const char *key, const char *value,
+ volume_option_t *opt, char **op_errstr)
+{
+ int ret = -1;
+ char *dup_val = NULL;
+ char *addr_tok = NULL;
+ char *save_ptr = NULL;
+
+ dup_val = gf_strdup(value);
+ if (!dup_val)
+ goto out;
+
+ addr_tok = strtok_r(dup_val, ",", &save_ptr);
+ if (addr_tok == NULL)
+ goto out;
+ while (addr_tok) {
+ if (!valid_mount_auth_address(addr_tok))
+ goto out;
+
+ addr_tok = strtok_r(NULL, ",", &save_ptr);
+ }
+ ret = 0;
+
+out:
+ if (ret) {
+ char errstr[4096];
+ snprintf(errstr, sizeof(errstr),
+ "option %s %s: '%s' is not "
+ "a valid mount-auth-address",
+ key, value, value);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s",
+ errstr, NULL);
+ if (op_errstr)
+ *op_errstr = gf_strdup(errstr);
+ }
+ GF_FREE(dup_val);
+
+ return ret;
}
/*XXX: the rules to validate are as per block-size required for stripe xlator */
static int
-gf_validate_size (const char *sizestr, volume_option_t *opt)
+gf_validate_size(const char *sizestr, volume_option_t *opt)
{
- uint64_t value = 0;
- int ret = 0;
+ uint64_t value = 0;
+ int ret = 0;
- GF_ASSERT (opt);
+ GF_ASSERT(opt);
- if (gf_string2bytesize (sizestr, &value) != 0 ||
- value < opt->min ||
- value % 512) {
- ret = -1;
- goto out;
- }
+ if (gf_string2bytesize_uint64(sizestr, &value) != 0 || value < opt->min ||
+ value % 512) {
+ ret = -1;
+ goto out;
+ }
- out:
- gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+out:
+ gf_msg_debug(THIS->name, 0, "Returning %d", ret);
+ return ret;
}
static int
-gf_validate_number (const char *numstr, volume_option_t *opt)
+gf_validate_number(const char *numstr, volume_option_t *opt)
{
- int32_t value;
- return gf_string2int32 (numstr, &value);
+ int32_t value;
+ return gf_string2int32(numstr, &value);
}
/* Parses the string to be of the form <key1>:<value1>,<key2>:<value2>... *
* takes two optional validaters key_validator and value_validator */
static int
-validate_list_elements (const char *string, volume_option_t *opt,
- int (key_validator)( const char *),
- int (value_validator)( const char *, volume_option_t *))
+validate_list_elements(const char *string, volume_option_t *opt,
+ int(key_validator)(const char *),
+ int(value_validator)(const char *, volume_option_t *))
{
+ char *dup_string = NULL;
+ char *str_sav = NULL;
+ char *substr_sav = NULL;
+ char *str_ptr = NULL;
+ char *key = NULL;
+ char *value = NULL;
+ int ret = 0;
+
+ GF_ASSERT(string);
+
+ dup_string = gf_strdup(string);
+ if (NULL == dup_string)
+ goto out;
+
+ str_ptr = strtok_r(dup_string, ",", &str_sav);
+ if (str_ptr == NULL) {
+ ret = -1;
+ goto out;
+ }
+ while (str_ptr) {
+ key = strtok_r(str_ptr, ":", &substr_sav);
+ if (!key || (key_validator && key_validator(key))) {
+ ret = -1;
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INVALID_ENTRY,
+ "list=%s", string, "key=%s", key ? key : "", NULL);
+ goto out;
+ }
- char *dup_string = NULL;
- char *str_sav = NULL;
- char *substr_sav = NULL;
- char *str_ptr = NULL;
- char *key = NULL;
- char *value = NULL;
- int ret = 0;
-
- GF_ASSERT (string);
+ value = strtok_r(NULL, ":", &substr_sav);
+ if (!value || (value_validator && value_validator(value, opt))) {
+ ret = -1;
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INVALID_ENTRY,
+ "list=%s", string, "value=%s", key, NULL);
+ goto out;
+ }
- dup_string = gf_strdup (string);
- if (NULL == dup_string)
- goto out;
+ str_ptr = strtok_r(NULL, ",", &str_sav);
+ substr_sav = NULL;
+ }
- str_ptr = strtok_r (dup_string, ",", &str_sav);
- while (str_ptr) {
-
- key = strtok_r (str_ptr, ":", &substr_sav);
- if (!key ||
- (key_validator && key_validator(key))) {
- ret = -1;
- gf_log (THIS->name, GF_LOG_WARNING,
- "invalid list '%s', key '%s' not valid.",
- string, key);
- goto out;
- }
-
- value = strtok_r (NULL, ":", &substr_sav);
- if (!value ||
- (value_validator && value_validator(value, opt))) {
- ret = -1;
- gf_log (THIS->name, GF_LOG_WARNING,
- "invalid list '%s', value '%s' not valid.",
- string, key);
- goto out;
- }
-
- str_ptr = strtok_r (NULL, ",", &str_sav);
- substr_sav = NULL;
- }
- out:
- if (dup_string)
- GF_FREE (dup_string);
- gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+out:
+ GF_FREE(dup_string);
+ gf_msg_debug(THIS->name, 0, "Returning %d", ret);
+ return ret;
}
static int
-xlator_option_validate_priority_list (xlator_t *xl, const char *key,
- const char *value, volume_option_t *opt,
- char **op_errstr)
+xlator_option_validate_priority_list(xlator_t *xl, const char *key,
+ const char *value, volume_option_t *opt,
+ char **op_errstr)
{
- int ret =0;
- char errstr[1024] = {0, };
-
- GF_ASSERT (value);
-
- ret = validate_list_elements (value, opt, NULL, &gf_validate_number);
- if (ret) {
- snprintf (errstr, 1024,
- "option %s %s: '%s' is not a valid "
- "priority-list", key, value, value);
- *op_errstr = gf_strdup (errstr);
- }
-
- return ret;
+ int ret = 0;
+ char errstr[1024] = {
+ 0,
+ };
+
+ GF_ASSERT(value);
+
+ ret = validate_list_elements(value, opt, NULL, &gf_validate_number);
+ if (ret) {
+ snprintf(errstr, 1024,
+ "option %s %s: '%s' is not a valid "
+ "priority-list",
+ key, value, value);
+ *op_errstr = gf_strdup(errstr);
+ }
+
+ return ret;
}
static int
-xlator_option_validate_size_list (xlator_t *xl, const char *key,
- const char *value, volume_option_t *opt,
- char **op_errstr)
+xlator_option_validate_size_list(xlator_t *xl, const char *key,
+ const char *value, volume_option_t *opt,
+ char **op_errstr)
{
-
- int ret = 0;
- char errstr[1024] = {0, };
-
- GF_ASSERT (value);
-
- ret = gf_validate_size (value, opt);
- if (ret)
- ret = validate_list_elements (value, opt, NULL, &gf_validate_size);
-
- if (ret) {
- snprintf (errstr, 1024,
- "option %s %s: '%s' is not a valid "
- "size-list", key, value, value);
- *op_errstr = gf_strdup (errstr);
- }
-
- return ret;
-
+ int ret = 0;
+ char errstr[1024] = {
+ 0,
+ };
+
+ GF_ASSERT(value);
+
+ ret = gf_validate_size(value, opt);
+ if (ret)
+ ret = validate_list_elements(value, opt, NULL, &gf_validate_size);
+
+ if (ret) {
+ snprintf(errstr, 1024,
+ "option %s %s: '%s' is not a valid "
+ "size-list",
+ key, value, value);
+ *op_errstr = gf_strdup(errstr);
+ }
+
+ return ret;
}
static int
-xlator_option_validate_any (xlator_t *xl, const char *key, const char *value,
- volume_option_t *opt, char **op_errstr)
+xlator_option_validate_any(xlator_t *xl, const char *key, const char *value,
+ volume_option_t *opt, char **op_errstr)
{
- return 0;
+ return 0;
}
-typedef int (xlator_option_validator_t) (xlator_t *xl, const char *key,
- const char *value,
- volume_option_t *opt, char **operrstr);
+typedef int(xlator_option_validator_t)(xlator_t *xl, const char *key,
+ const char *value, volume_option_t *opt,
+ char **operrstr);
int
-xlator_option_validate (xlator_t *xl, char *key, char *value,
- volume_option_t *opt, char **op_errstr)
+xlator_option_validate(xlator_t *xl, char *key, char *value,
+ volume_option_t *opt, char **op_errstr)
{
- int ret = -1;
- xlator_option_validator_t *validate;
- xlator_option_validator_t *validators[] = {
- [GF_OPTION_TYPE_PATH] = xlator_option_validate_path,
- [GF_OPTION_TYPE_INT] = xlator_option_validate_int,
- [GF_OPTION_TYPE_SIZET] = xlator_option_validate_sizet,
- [GF_OPTION_TYPE_BOOL] = xlator_option_validate_bool,
- [GF_OPTION_TYPE_XLATOR] = xlator_option_validate_xlator,
- [GF_OPTION_TYPE_STR] = xlator_option_validate_str,
- [GF_OPTION_TYPE_PERCENT] = xlator_option_validate_percent,
- [GF_OPTION_TYPE_PERCENT_OR_SIZET] =
- xlator_option_validate_percent_or_sizet,
- [GF_OPTION_TYPE_TIME] = xlator_option_validate_time,
- [GF_OPTION_TYPE_DOUBLE] = xlator_option_validate_double,
- [GF_OPTION_TYPE_INTERNET_ADDRESS] = xlator_option_validate_addr,
- [GF_OPTION_TYPE_INTERNET_ADDRESS_LIST] =
- xlator_option_validate_addr_list,
- [GF_OPTION_TYPE_PRIORITY_LIST] =
- xlator_option_validate_priority_list,
- [GF_OPTION_TYPE_SIZE_LIST] = xlator_option_validate_size_list,
- [GF_OPTION_TYPE_ANY] = xlator_option_validate_any,
- [GF_OPTION_TYPE_MAX] = NULL,
- };
-
- if (opt->type < 0 || opt->type >= GF_OPTION_TYPE_MAX) {
- gf_log (xl->name, GF_LOG_ERROR,
- "unknown option type '%d'", opt->type);
- goto out;
- }
-
- validate = validators[opt->type];
-
- ret = validate (xl, key, value, opt, op_errstr);
+ int ret = -1;
+ xlator_option_validator_t *validate;
+ xlator_option_validator_t *validators[] = {
+ [GF_OPTION_TYPE_PATH] = xlator_option_validate_path,
+ [GF_OPTION_TYPE_INT] = xlator_option_validate_int,
+ [GF_OPTION_TYPE_SIZET] = xlator_option_validate_sizet,
+ [GF_OPTION_TYPE_BOOL] = xlator_option_validate_bool,
+ [GF_OPTION_TYPE_XLATOR] = xlator_option_validate_xlator,
+ [GF_OPTION_TYPE_STR] = xlator_option_validate_str,
+ [GF_OPTION_TYPE_PERCENT] = xlator_option_validate_percent,
+ [GF_OPTION_TYPE_PERCENT_OR_SIZET] =
+ xlator_option_validate_percent_or_sizet,
+ [GF_OPTION_TYPE_TIME] = xlator_option_validate_time,
+ [GF_OPTION_TYPE_DOUBLE] = xlator_option_validate_double,
+ [GF_OPTION_TYPE_INTERNET_ADDRESS] = xlator_option_validate_addr,
+ [GF_OPTION_TYPE_INTERNET_ADDRESS_LIST] =
+ xlator_option_validate_addr_list,
+ [GF_OPTION_TYPE_PRIORITY_LIST] = xlator_option_validate_priority_list,
+ [GF_OPTION_TYPE_SIZE_LIST] = xlator_option_validate_size_list,
+ [GF_OPTION_TYPE_ANY] = xlator_option_validate_any,
+ [GF_OPTION_TYPE_CLIENT_AUTH_ADDR] = xlator_option_validate_mntauth,
+ [GF_OPTION_TYPE_MAX] = NULL,
+ };
+
+ if (opt->type > GF_OPTION_TYPE_MAX) {
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_UNKNOWN_OPTION_TYPE,
+ "type=%d", opt->type, NULL);
+ goto out;
+ }
+
+ validate = validators[opt->type];
+
+ ret = validate(xl, key, value, opt, op_errstr);
out:
- return ret;
+ return ret;
}
-
-static volume_option_t *
-xlator_volume_option_get_list (volume_opt_list_t *vol_list, const char *key)
+volume_option_t *
+xlator_volume_option_get_list(volume_opt_list_t *vol_list, const char *key)
{
- volume_option_t *opt = NULL;
- volume_opt_list_t *opt_list = NULL;
- volume_option_t *found = NULL;
- int index = 0;
- int i = 0;
- char *cmp_key = NULL;
-
- if (!vol_list->given_opt) {
- opt_list = list_entry (vol_list->list.next, volume_opt_list_t,
- list);
- opt = opt_list->given_opt;
- } else
- opt = vol_list->given_opt;
-
- for (index = 0; opt[index].key[0]; index++) {
- for (i = 0; i < ZR_VOLUME_MAX_NUM_KEY; i++) {
- cmp_key = opt[index].key[i];
- if (!cmp_key)
- break;
- if (fnmatch (cmp_key, key, FNM_NOESCAPE) == 0) {
- found = &opt[index];
- goto out;
- }
- }
+ volume_option_t *opt = NULL;
+ volume_opt_list_t *opt_list = NULL;
+ int index = 0;
+ int i = 0;
+ char *cmp_key = NULL;
+
+ if (!vol_list->given_opt) {
+ opt_list = list_entry(vol_list->list.next, volume_opt_list_t, list);
+ opt = opt_list->given_opt;
+ } else
+ opt = vol_list->given_opt;
+
+ for (index = 0; opt[index].key[0]; index++) {
+ for (i = 0; i < ZR_VOLUME_MAX_NUM_KEY; i++) {
+ cmp_key = opt[index].key[i];
+ if (!cmp_key)
+ break;
+ if (fnmatch(cmp_key, key, FNM_NOESCAPE) == 0) {
+ return &opt[index];
+ }
}
-out:
- return found;
-}
+ }
+ return NULL;
+}
volume_option_t *
-xlator_volume_option_get (xlator_t *xl, const char *key)
+xlator_volume_option_get(xlator_t *xl, const char *key)
{
- volume_opt_list_t *vol_list = NULL;
- volume_option_t *found = NULL;
+ volume_opt_list_t *vol_list = NULL;
+ volume_option_t *found = NULL;
- list_for_each_entry (vol_list, &xl->volume_options, list) {
- found = xlator_volume_option_get_list (vol_list, key);
- if (found)
- break;
- }
+ list_for_each_entry(vol_list, &xl->volume_options, list)
+ {
+ found = xlator_volume_option_get_list(vol_list, key);
+ if (found)
+ break;
+ }
- return found;
+ return found;
}
-
-static void
-xl_opt_validate (dict_t *dict, char *key, data_t *value, void *data)
+static int
+xl_opt_validate(dict_t *dict, char *key, data_t *value, void *data)
{
- xlator_t *xl = NULL;
- volume_opt_list_t *vol_opt = NULL;
- volume_option_t *opt = NULL;
- int ret = 0;
- char *errstr = NULL;
-
- struct {
- xlator_t *this;
- volume_opt_list_t *vol_opt;
- char *errstr;
- } *stub;
-
- stub = data;
- xl = stub->this;
- vol_opt = stub->vol_opt;
-
- opt = xlator_volume_option_get_list (vol_opt, key);
- if (!opt)
- return;
-
- ret = xlator_option_validate (xl, key, value->data, opt, &errstr);
- if (ret)
- gf_log (xl->name, GF_LOG_WARNING, "validate of %s returned %d",
- key, ret);
-
- if (errstr)
- /* possible small leak of previously set stub->errstr */
- stub->errstr = errstr;
-
- if (fnmatch (opt->key[0], key, FNM_NOESCAPE) != 0) {
- gf_log (xl->name, GF_LOG_WARNING, "option '%s' is deprecated, "
- "preferred is '%s', continuing with correction",
- key, opt->key[0]);
- dict_set (dict, opt->key[0], value);
- dict_del (dict, key);
- }
- return;
-}
+ xlator_t *xl = NULL;
+ volume_opt_list_t *vol_opt = NULL;
+ volume_option_t *opt = NULL;
+ int ret = 0;
+ char *errstr = NULL;
+
+ struct {
+ xlator_t *this;
+ volume_opt_list_t *vol_opt;
+ char *errstr;
+ } * stub;
+
+ stub = data;
+ xl = stub->this;
+ vol_opt = stub->vol_opt;
+
+ opt = xlator_volume_option_get_list(vol_opt, key);
+ if (!opt)
+ return 0;
+ ret = xlator_option_validate(xl, key, value->data, opt, &errstr);
+ if (ret)
+ gf_smsg(xl->name, GF_LOG_WARNING, 0, LG_MSG_VALIDATE_RETURNS, "key=%s",
+ key, "ret=%d", ret, NULL);
+
+ if (errstr)
+ /* possible small leak of previously set stub->errstr */
+ stub->errstr = errstr;
+
+ if (fnmatch(opt->key[0], key, FNM_NOESCAPE) != 0) {
+ gf_smsg(xl->name, GF_LOG_DEBUG, 0, LG_MSG_OPTION_DEPRECATED, "key=%s",
+ key, "preferred=%s", opt->key[0], NULL);
+ dict_set(dict, opt->key[0], value);
+ dict_del(dict, key);
+ }
+ return 0;
+}
int
-xlator_options_validate_list (xlator_t *xl, dict_t *options,
- volume_opt_list_t *vol_opt, char **op_errstr)
+xlator_options_validate_list(xlator_t *xl, dict_t *options,
+ volume_opt_list_t *vol_opt, char **op_errstr)
{
- int ret = 0;
- struct {
- xlator_t *this;
- volume_opt_list_t *vol_opt;
- char *errstr;
- } stub;
-
- stub.this = xl;
- stub.vol_opt = vol_opt;
- stub.errstr = NULL;
-
- dict_foreach (options, xl_opt_validate, &stub);
- if (stub.errstr) {
- ret = -1;
- if (op_errstr)
- *op_errstr = stub.errstr;
- }
-
- return ret;
+ int ret = 0;
+ struct {
+ xlator_t *this;
+ volume_opt_list_t *vol_opt;
+ char *errstr;
+ } stub;
+
+ stub.this = xl;
+ stub.vol_opt = vol_opt;
+ stub.errstr = NULL;
+
+ dict_foreach(options, xl_opt_validate, &stub);
+ if (stub.errstr) {
+ ret = -1;
+ if (op_errstr)
+ *op_errstr = stub.errstr;
+ }
+
+ return ret;
}
-
int
-xlator_options_validate (xlator_t *xl, dict_t *options, char **op_errstr)
+xlator_options_validate(xlator_t *xl, dict_t *options, char **op_errstr)
{
- int ret = 0;
- volume_opt_list_t *vol_opt = NULL;
-
-
- if (!xl) {
- gf_log (THIS->name, GF_LOG_DEBUG, "'this' not a valid ptr");
- ret = -1;
- goto out;
- }
-
- if (list_empty (&xl->volume_options))
- goto out;
-
- list_for_each_entry (vol_opt, &xl->volume_options, list) {
- ret = xlator_options_validate_list (xl, options, vol_opt,
- op_errstr);
- }
+ int ret = 0;
+ volume_opt_list_t *vol_opt = NULL;
+
+ if (!xl) {
+ gf_msg_debug(THIS->name, 0, "'this' not a valid ptr");
+ ret = -1;
+ goto out;
+ }
+
+ if (list_empty(&xl->volume_options))
+ goto out;
+
+ list_for_each_entry(vol_opt, &xl->volume_options, list)
+ {
+ ret = xlator_options_validate_list(xl, options, vol_opt, op_errstr);
+ }
out:
- return ret;
+ return ret;
}
-
int
-xlator_validate_rec (xlator_t *xlator, char **op_errstr)
+xlator_validate_rec(xlator_t *xlator, char **op_errstr)
{
- int ret = -1;
- xlator_list_t *trav = NULL;
- xlator_t *old_THIS = NULL;
+ int ret = -1;
+ xlator_list_t *trav = NULL;
+ xlator_t *old_THIS = NULL;
- GF_VALIDATE_OR_GOTO ("xlator", xlator, out);
+ GF_VALIDATE_OR_GOTO("xlator", xlator, out);
- trav = xlator->children;
+ trav = xlator->children;
- while (trav) {
- if (xlator_validate_rec (trav->xlator, op_errstr)) {
- gf_log ("xlator", GF_LOG_WARNING, "validate_rec failed");
- goto out;
- }
-
- trav = trav->next;
+ while (trav) {
+ if (xlator_validate_rec(trav->xlator, op_errstr)) {
+ gf_smsg("xlator", GF_LOG_WARNING, 0, LG_MSG_VALIDATE_REC_FAILED,
+ NULL);
+ goto out;
}
- if (xlator_dynload (xlator))
- gf_log (xlator->name, GF_LOG_DEBUG, "Did not load the symbols");
+ trav = trav->next;
+ }
- old_THIS = THIS;
- THIS = xlator;
+ if (xlator_dynload(xlator))
+ gf_msg_debug(xlator->name, 0, "Did not load the symbols");
- /* Need this here, as this graph has not yet called init() */
- if (!xlator->mem_acct.num_types) {
- if (!xlator->mem_acct_init)
- xlator->mem_acct_init = default_mem_acct_init;
- xlator->mem_acct_init (xlator);
- }
+ old_THIS = THIS;
+ THIS = xlator;
- ret = xlator_options_validate (xlator, xlator->options, op_errstr);
- THIS = old_THIS;
+ /* Need this here, as this graph has not yet called init() */
+ if (!xlator->mem_acct) {
+ if (!xlator->mem_acct_init)
+ xlator->mem_acct_init = default_mem_acct_init;
+ xlator->mem_acct_init(xlator);
+ }
- if (ret) {
- gf_log (xlator->name, GF_LOG_INFO, "%s", *op_errstr);
- goto out;
- }
+ ret = xlator_options_validate(xlator, xlator->options, op_errstr);
+ THIS = old_THIS;
- gf_log (xlator->name, GF_LOG_DEBUG, "Validated options");
+ if (ret) {
+ gf_smsg(xlator->name, GF_LOG_INFO, 0, LG_MSG_INVALID_ENTRY, "%s",
+ *op_errstr, NULL);
+ goto out;
+ }
- ret = 0;
+ gf_msg_debug(xlator->name, 0, "Validated options");
+
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
int
-graph_reconf_validateopt (glusterfs_graph_t *graph, char **op_errstr)
+graph_reconf_validateopt(glusterfs_graph_t *graph, char **op_errstr)
{
- xlator_t *xlator = NULL;
- int ret = -1;
+ xlator_t *xlator = NULL;
+ int ret = -1;
- GF_ASSERT (graph);
+ GF_ASSERT(graph);
- xlator = graph->first;
+ xlator = graph->first;
- ret = xlator_validate_rec (xlator, op_errstr);
+ ret = xlator_validate_rec(xlator, op_errstr);
- return ret;
+ return ret;
}
-
static int
-xlator_reconfigure_rec (xlator_t *old_xl, xlator_t *new_xl)
+xlator_reconfigure_rec(xlator_t *old_xl, xlator_t *new_xl)
{
- xlator_list_t *trav1 = NULL;
- xlator_list_t *trav2 = NULL;
- int32_t ret = -1;
- xlator_t *old_THIS = NULL;
+ xlator_list_t *trav1 = NULL;
+ xlator_list_t *trav2 = NULL;
+ int32_t ret = -1;
+ xlator_t *old_THIS = NULL;
- GF_VALIDATE_OR_GOTO ("xlator", old_xl, out);
- GF_VALIDATE_OR_GOTO ("xlator", new_xl, out);
+ GF_VALIDATE_OR_GOTO("xlator", old_xl, out);
+ GF_VALIDATE_OR_GOTO("xlator", new_xl, out);
- trav1 = old_xl->children;
- trav2 = new_xl->children;
+ trav1 = old_xl->children;
+ trav2 = new_xl->children;
- while (trav1 && trav2) {
- ret = xlator_reconfigure_rec (trav1->xlator, trav2->xlator);
- if (ret)
- goto out;
+ while (trav1 && trav2) {
+ ret = xlator_reconfigure_rec(trav1->xlator, trav2->xlator);
+ if (ret)
+ goto out;
- gf_log (trav1->xlator->name, GF_LOG_DEBUG, "reconfigured");
+ gf_msg_debug(trav1->xlator->name, 0, "reconfigured");
- trav1 = trav1->next;
- trav2 = trav2->next;
- }
+ trav1 = trav1->next;
+ trav2 = trav2->next;
+ }
- if (old_xl->reconfigure) {
- old_THIS = THIS;
- THIS = old_xl;
+ if (old_xl->reconfigure) {
+ old_THIS = THIS;
+ THIS = old_xl;
- ret = old_xl->reconfigure (old_xl, new_xl->options);
+ xlator_init_lock();
+ handle_default_options(old_xl, new_xl->options);
+ ret = old_xl->reconfigure(old_xl, new_xl->options);
+ xlator_init_unlock();
- THIS = old_THIS;
+ THIS = old_THIS;
- if (ret)
- goto out;
- } else {
- gf_log (old_xl->name, GF_LOG_DEBUG, "No reconfigure() found");
- }
+ if (ret)
+ goto out;
+ } else {
+ gf_msg_debug(old_xl->name, 0, "No reconfigure() found");
+ }
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
int
-xlator_tree_reconfigure (xlator_t *old_xl, xlator_t *new_xl)
+xlator_tree_reconfigure(xlator_t *old_xl, xlator_t *new_xl)
{
- xlator_t *new_top = NULL;
- xlator_t *old_top = NULL;
+ xlator_t *new_top = NULL;
+ xlator_t *old_top = NULL;
- GF_ASSERT (old_xl);
- GF_ASSERT (new_xl);
+ GF_ASSERT(old_xl);
+ GF_ASSERT(new_xl);
- old_top = old_xl;
- new_top = new_xl;
+ old_top = old_xl;
+ new_top = new_xl;
- return xlator_reconfigure_rec (old_top, new_top);
+ return xlator_reconfigure_rec(old_top, new_top);
}
-
int
-xlator_option_info_list (volume_opt_list_t *list, char *key,
- char **def_val, char **descr)
+xlator_option_info_list(volume_opt_list_t *list, char *key, char **def_val,
+ char **descr)
{
- int ret = -1;
- volume_option_t *opt = NULL;
+ int ret = -1;
+ volume_option_t *opt = NULL;
+ opt = xlator_volume_option_get_list(list, key);
+ if (!opt)
+ goto out;
- opt = xlator_volume_option_get_list (list, key);
- if (!opt)
- goto out;
-
- if (def_val)
- *def_val = opt->default_value;
- if (descr)
- *descr = opt->description;
+ if (def_val)
+ *def_val = opt->default_value;
+ if (descr)
+ *descr = opt->description;
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
static int
-not_null (char *in, char **out)
+pass(char *in, char **out)
{
- if (!in || !out)
- return -1;
-
- *out = in;
- return 0;
+ *out = in;
+ return 0;
}
-
static int
-xl_by_name (char *in, xlator_t **out)
+xl_by_name(char *in, xlator_t **out)
{
- xlator_t *xl = NULL;
+ xlator_t *xl = NULL;
- xl = xlator_search_by_name (THIS, in);
+ xl = xlator_search_by_name(THIS, in);
- if (!xl)
- return -1;
- *out = xl;
- return 0;
+ if (!xl)
+ return -1;
+ *out = xl;
+ return 0;
}
-
static int
-pc_or_size (char *in, double *out)
+pc_or_size(char *in, double *out)
{
- double pc = 0;
- int ret = 0;
- uint64_t size = 0;
-
- if (gf_string2percent (in, &pc) == 0) {
- if (pc > 100.0) {
- ret = gf_string2bytesize (in, &size);
- if (!ret)
- *out = size;
- } else {
- *out = pc;
- }
+ double pc = 0;
+ int ret = 0;
+ uint64_t size = 0;
+
+ if (gf_string2percent(in, &pc) == 0) {
+ if (pc > 100.0) {
+ ret = gf_string2bytesize_uint64(in, &size);
+ if (!ret)
+ *out = size;
} else {
- ret = gf_string2bytesize (in, &size);
- if (!ret)
- *out = size;
+ *out = pc;
}
- return ret;
+ } else {
+ ret = gf_string2bytesize_uint64(in, &size);
+ if (!ret)
+ *out = size;
+ }
+ return ret;
}
-
-DEFINE_INIT_OPT(char *, str, not_null);
+DEFINE_INIT_OPT(char *, str, pass);
DEFINE_INIT_OPT(uint64_t, uint64, gf_string2uint64);
DEFINE_INIT_OPT(int64_t, int64, gf_string2int64);
DEFINE_INIT_OPT(uint32_t, uint32, gf_string2uint32);
DEFINE_INIT_OPT(int32_t, int32, gf_string2int32);
-DEFINE_INIT_OPT(uint64_t, size, gf_string2bytesize);
+DEFINE_INIT_OPT(uint64_t, size, gf_string2bytesize_uint64);
+DEFINE_INIT_OPT(uint64_t, size_uint64, gf_string2bytesize_uint64);
DEFINE_INIT_OPT(double, percent, gf_string2percent);
DEFINE_INIT_OPT(double, percent_or_size, pc_or_size);
DEFINE_INIT_OPT(gf_boolean_t, bool, gf_string2boolean);
DEFINE_INIT_OPT(xlator_t *, xlator, xl_by_name);
-DEFINE_INIT_OPT(char *, path, not_null);
-
-
+DEFINE_INIT_OPT(char *, path, pass);
+DEFINE_INIT_OPT(double, double, gf_string2double);
+DEFINE_INIT_OPT(uint32_t, time, gf_string2time);
-DEFINE_RECONF_OPT(char *, str, not_null);
+DEFINE_RECONF_OPT(char *, str, pass);
DEFINE_RECONF_OPT(uint64_t, uint64, gf_string2uint64);
DEFINE_RECONF_OPT(int64_t, int64, gf_string2int64);
DEFINE_RECONF_OPT(uint32_t, uint32, gf_string2uint32);
DEFINE_RECONF_OPT(int32_t, int32, gf_string2int32);
-DEFINE_RECONF_OPT(uint64_t, size, gf_string2bytesize);
+DEFINE_RECONF_OPT(uint64_t, size, gf_string2bytesize_uint64);
+DEFINE_RECONF_OPT(uint64_t, size_uint64, gf_string2bytesize_uint64);
DEFINE_RECONF_OPT(double, percent, gf_string2percent);
DEFINE_RECONF_OPT(double, percent_or_size, pc_or_size);
DEFINE_RECONF_OPT(gf_boolean_t, bool, gf_string2boolean);
DEFINE_RECONF_OPT(xlator_t *, xlator, xl_by_name);
-DEFINE_RECONF_OPT(char *, path, not_null);
+DEFINE_RECONF_OPT(char *, path, pass);
+DEFINE_RECONF_OPT(double, double, gf_string2double);
+DEFINE_RECONF_OPT(uint32_t, time, gf_string2time);
diff --git a/libglusterfs/src/options.h b/libglusterfs/src/options.h
deleted file mode 100644
index f0acb33688d..00000000000
--- a/libglusterfs/src/options.h
+++ /dev/null
@@ -1,242 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _OPTIONS_H
-#define _OPTIONS_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <stdio.h>
-#include <stdint.h>
-#include <inttypes.h>
-
-#include "xlator.h"
-/* Add possible new type of option you may need */
-typedef enum {
- GF_OPTION_TYPE_ANY = 0,
- GF_OPTION_TYPE_STR,
- GF_OPTION_TYPE_INT,
- GF_OPTION_TYPE_SIZET,
- GF_OPTION_TYPE_PERCENT,
- GF_OPTION_TYPE_PERCENT_OR_SIZET,
- GF_OPTION_TYPE_BOOL,
- GF_OPTION_TYPE_XLATOR,
- GF_OPTION_TYPE_PATH,
- GF_OPTION_TYPE_TIME,
- GF_OPTION_TYPE_DOUBLE,
- GF_OPTION_TYPE_INTERNET_ADDRESS,
- GF_OPTION_TYPE_INTERNET_ADDRESS_LIST,
- GF_OPTION_TYPE_PRIORITY_LIST,
- GF_OPTION_TYPE_SIZE_LIST,
- GF_OPTION_TYPE_MAX,
-} volume_option_type_t;
-
-
-#define ZR_VOLUME_MAX_NUM_KEY 4
-#define ZR_OPTION_MAX_ARRAY_SIZE 64
-
-/* Each translator should define this structure */
-typedef struct volume_options {
- char *key[ZR_VOLUME_MAX_NUM_KEY];
- /* different key, same meaning */
- volume_option_type_t type;
- int64_t min; /* 0 means no range */
- int64_t max; /* 0 means no range */
- char *value[ZR_OPTION_MAX_ARRAY_SIZE];
- /* If specified, will check for one of
- the value from this array */
- char *default_value;
- char *description; /* about the key */
-} volume_option_t;
-
-
-typedef struct vol_opt_list {
- struct list_head list;
- volume_option_t *given_opt;
-} volume_opt_list_t;
-
-
-int xlator_tree_reconfigure (xlator_t *old_xl, xlator_t *new_xl);
-int xlator_validate_rec (xlator_t *xlator, char **op_errstr);
-int graph_reconf_validateopt (glusterfs_graph_t *graph, char **op_errstr);
-int xlator_option_info_list (volume_opt_list_t *list, char *key,
- char **def_val, char **descr);
-/*
-int validate_xlator_volume_options (xlator_t *xl, dict_t *options,
- volume_option_t *opt, char **op_errstr);
-*/
-int xlator_options_validate_list (xlator_t *xl, dict_t *options,
- volume_opt_list_t *list, char **op_errstr);
-int xlator_option_validate (xlator_t *xl, char *key, char *value,
- volume_option_t *opt, char **op_errstr);
-int xlator_options_validate (xlator_t *xl, dict_t *options, char **errstr);
-volume_option_t *
-xlator_volume_option_get (xlator_t *xl, const char *key);
-
-
-#define DECLARE_INIT_OPT(type_t, type) \
-int \
-xlator_option_init_##type (xlator_t *this, dict_t *options, char *key, \
- type_t *val_p);
-
-DECLARE_INIT_OPT(char *, str);
-DECLARE_INIT_OPT(uint64_t, uint64);
-DECLARE_INIT_OPT(int64_t, int64);
-DECLARE_INIT_OPT(uint32_t, uint32);
-DECLARE_INIT_OPT(int32_t, int32);
-DECLARE_INIT_OPT(uint64_t, size);
-DECLARE_INIT_OPT(double, percent);
-DECLARE_INIT_OPT(double, percent_or_size);
-DECLARE_INIT_OPT(gf_boolean_t, bool);
-DECLARE_INIT_OPT(xlator_t *, xlator);
-DECLARE_INIT_OPT(char *, path);
-
-
-#define DEFINE_INIT_OPT(type_t, type, conv) \
-int \
-xlator_option_init_##type (xlator_t *this, dict_t *options, char *key, \
- type_t *val_p) \
-{ \
- int ret = 0; \
- volume_option_t *opt = NULL; \
- char *def_value = NULL; \
- char *set_value = NULL; \
- char *value = NULL; \
- xlator_t *old_THIS = NULL; \
- \
- opt = xlator_volume_option_get (this, key); \
- if (!opt) { \
- gf_log (this->name, GF_LOG_WARNING, \
- "unknown option: %s", key); \
- ret = -1; \
- return ret; \
- } \
- def_value = opt->default_value; \
- ret = dict_get_str (options, key, &set_value); \
- \
- if (def_value) \
- value = def_value; \
- if (set_value) \
- value = set_value; \
- if (!value) { \
- gf_log (this->name, GF_LOG_TRACE, "option %s not set", \
- key); \
- return 0; \
- } \
- if (value == def_value) { \
- gf_log (this->name, GF_LOG_TRACE, \
- "option %s using default value %s", \
- key, value); \
- } else { \
- gf_log (this->name, GF_LOG_DEBUG, \
- "option %s using set value %s", \
- key, value); \
- } \
- old_THIS = THIS; \
- THIS = this; \
- ret = conv (value, val_p); \
- THIS = old_THIS; \
- if (ret) \
- return ret; \
- ret = xlator_option_validate (this, key, value, opt, NULL); \
- return ret; \
-}
-
-#define GF_OPTION_INIT(key, val, type, err_label) do { \
- int val_ret = 0; \
- val_ret = xlator_option_init_##type (THIS, THIS->options, \
- key, &(val)); \
- if (val_ret) \
- goto err_label; \
- } while (0)
-
-
-
-#define DECLARE_RECONF_OPT(type_t, type) \
-int \
-xlator_option_reconf_##type (xlator_t *this, dict_t *options, char *key,\
- type_t *val_p);
-
-DECLARE_RECONF_OPT(char *, str);
-DECLARE_RECONF_OPT(uint64_t, uint64);
-DECLARE_RECONF_OPT(int64_t, int64);
-DECLARE_RECONF_OPT(uint32_t, uint32);
-DECLARE_RECONF_OPT(int32_t, int32);
-DECLARE_RECONF_OPT(uint64_t, size);
-DECLARE_RECONF_OPT(double, percent);
-DECLARE_RECONF_OPT(double, percent_or_size);
-DECLARE_RECONF_OPT(gf_boolean_t, bool);
-DECLARE_RECONF_OPT(xlator_t *, xlator);
-DECLARE_RECONF_OPT(char *, path);
-
-
-#define DEFINE_RECONF_OPT(type_t, type, conv) \
-int \
-xlator_option_reconf_##type (xlator_t *this, dict_t *options, char *key, \
- type_t *val_p) \
-{ \
- int ret = 0; \
- volume_option_t *opt = NULL; \
- char *def_value = NULL; \
- char *set_value = NULL; \
- char *value = NULL; \
- xlator_t *old_THIS = NULL; \
- \
- opt = xlator_volume_option_get (this, key); \
- if (!opt) { \
- gf_log (this->name, GF_LOG_WARNING, \
- "unknown option: %s", key); \
- ret = -1; \
- return ret; \
- } \
- def_value = opt->default_value; \
- ret = dict_get_str (options, key, &set_value); \
- \
- if (def_value) \
- value = def_value; \
- if (set_value) \
- value = set_value; \
- if (!value) { \
- gf_log (this->name, GF_LOG_TRACE, "option %s not set", \
- key); \
- return 0; \
- } \
- if (value == def_value) { \
- gf_log (this->name, GF_LOG_TRACE, \
- "option %s using default value %s", \
- key, value); \
- } else { \
- gf_log (this->name, GF_LOG_DEBUG, \
- "option %s using set value %s", \
- key, value); \
- } \
- old_THIS = THIS; \
- THIS = this; \
- ret = conv (value, val_p); \
- THIS = old_THIS; \
- if (ret) \
- return ret; \
- ret = xlator_option_validate (this, key, value, opt, NULL); \
- return ret; \
-}
-
-#define GF_OPTION_RECONF(key, val, opt, type, err_label) do { \
- int val_ret = 0; \
- val_ret = xlator_option_reconf_##type (THIS, opt, key, \
- &(val)); \
- if (val_ret) \
- goto err_label; \
- } while (0)
-
-
-#endif /* !_OPTIONS_H */
diff --git a/libglusterfs/src/parse-utils.c b/libglusterfs/src/parse-utils.c
new file mode 100644
index 00000000000..4531d5f0170
--- /dev/null
+++ b/libglusterfs/src/parse-utils.c
@@ -0,0 +1,174 @@
+/*
+ Copyright 2014-present Facebook. All Rights Reserved
+
+ This file is part of GlusterFS.
+
+ Author :
+ Shreyas Siravara <shreyas.siravara@gmail.com>
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <regex.h>
+#include <stdio.h>
+
+#include "glusterfs/parse-utils.h"
+#include "glusterfs/mem-pool.h"
+#include "glusterfs/common-utils.h"
+#include "glusterfs/libglusterfs-messages.h"
+
+/**
+ * parser_init: Initialize a parser with the a string to parse and
+ * the regex we want to use to parse it.
+ *
+ * @complete_str: the string to parse
+ * @regex : the regex to use
+ *
+ * Notes : It is up to the caller to call the parser_deinit () function
+ * to free the allocated parser.
+ *
+ * @return : success: parser ptr (on successful compilation and allocation)
+ * : failure: NULL (on failure to compile regex or allocate memory)
+ */
+struct parser *
+parser_init(const char *regex)
+{
+ int rc = 0;
+ struct parser *parser = NULL;
+
+ parser = GF_MALLOC(sizeof(*parser), gf_common_mt_parser_t);
+ if (!parser)
+ goto out;
+
+ parser->regex = gf_strdup(regex);
+ if (!parser->regex) {
+ GF_FREE(parser);
+ parser = NULL;
+ goto out;
+ }
+
+ rc = regcomp(&parser->preg, parser->regex, REG_EXTENDED);
+ if (rc != 0) {
+ gf_msg(GF_PARSE, GF_LOG_INFO, 0, LG_MSG_REGEX_OP_FAILED,
+ "Failed to compile regex pattern.");
+ parser_deinit(parser);
+ parser = NULL;
+ goto out;
+ }
+ parser->complete_str = NULL;
+out:
+ return parser;
+}
+
+/**
+ * parser_set_string -- Set the string in the parser that we want to parse.
+ * Subsequent calls to get_next_match () will use this
+ * string along with the regex that the parser was
+ * initialized with.
+ *
+ * @parser : The parser to use
+ * @complete_str: The string to set in the parser (what we are going parse)
+ *
+ * @return: success: 0
+ * failure: -EINVAL for NULL args, -ENOMEM for allocation errors
+ */
+int
+parser_set_string(struct parser *parser, const char *complete_str)
+{
+ int ret = -EINVAL;
+
+ GF_VALIDATE_OR_GOTO(GF_PARSE, parser, out);
+ GF_VALIDATE_OR_GOTO(GF_PARSE, complete_str, out);
+
+ parser->complete_str = gf_strdup(complete_str);
+ GF_CHECK_ALLOC_AND_LOG(GF_PARSE, parser, ret, "Failed to duplicate string!",
+ out);
+
+ /* Point the temp internal string to what we just dup'ed */
+ parser->_rstr = (char *)parser->complete_str;
+ ret = 0;
+out:
+ return ret;
+}
+
+/**
+ * parser_unset_string -- Free the string that was set to be parsed.
+ * This function needs to be called after
+ * parser_set_string and parser_get_next_match
+ * in order to free memory used by the string.
+ *
+ * @parser : The parser to free memory in
+ * @return : success: 0
+ * : failure: -EINVAL on NULL args
+ */
+int
+parser_unset_string(struct parser *parser)
+{
+ int ret = -EINVAL;
+
+ GF_VALIDATE_OR_GOTO(GF_PARSE, parser, out);
+
+ GF_FREE(parser->complete_str);
+ parser->complete_str = NULL; /* Avoid double frees in parser_deinit */
+ ret = 0;
+out:
+ return ret;
+}
+
+/**
+ * parser_deinit: Free the parser and all the memory allocated by it
+ *
+ * @parser : Parser to free
+ *
+ * @return : nothing
+ */
+void
+parser_deinit(struct parser *ptr)
+{
+ if (!ptr)
+ return;
+
+ regfree(&ptr->preg);
+ GF_FREE(ptr->complete_str);
+ GF_FREE(ptr->regex);
+ GF_FREE(ptr);
+}
+
+/**
+ * parser_get_match: Given the parser that is configured with a compiled regex,
+ * return the next match in the string.
+ *
+ * @parser : Parser to use
+ *
+ * @return : success: Pointer to matched character
+ * : failure: NULL
+ */
+char *
+parser_get_next_match(struct parser *parser)
+{
+ int rc = -EINVAL;
+ size_t copy_len = 0;
+ char *match = NULL;
+
+ GF_VALIDATE_OR_GOTO(GF_PARSE, parser, out);
+
+ rc = regexec(&parser->preg, parser->_rstr, 1, parser->pmatch, 0);
+ if (rc != 0) {
+ gf_msg_debug(GF_PARSE, 0, "Could not match %s with regex %s",
+ parser->_rstr, parser->regex);
+ goto out;
+ }
+
+ copy_len = parser->pmatch[0].rm_eo - parser->pmatch[0].rm_so;
+
+ match = gf_strndup(parser->_rstr + parser->pmatch[0].rm_so, copy_len);
+ GF_CHECK_ALLOC_AND_LOG(GF_PARSE, match, rc, "Duplicating match failed!",
+ out);
+
+ parser->_rstr = &parser->_rstr[parser->pmatch[0].rm_eo];
+out:
+ return match;
+}
diff --git a/libglusterfs/src/quota-common-utils.c b/libglusterfs/src/quota-common-utils.c
new file mode 100644
index 00000000000..804e2f0ad4b
--- /dev/null
+++ b/libglusterfs/src/quota-common-utils.c
@@ -0,0 +1,241 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "glusterfs/dict.h"
+#include "glusterfs/logging.h"
+#include "glusterfs/byte-order.h"
+#include "glusterfs/quota-common-utils.h"
+#include "glusterfs/common-utils.h"
+#include "glusterfs/libglusterfs-messages.h"
+
+gf_boolean_t
+quota_meta_is_null(const quota_meta_t *meta)
+{
+ if (meta->size == 0 && meta->file_count == 0 && meta->dir_count == 0)
+ return _gf_true;
+
+ return _gf_false;
+}
+
+int32_t
+quota_data_to_meta(data_t *data, quota_meta_t *meta)
+{
+ int32_t ret = -1;
+ quota_meta_t *value = NULL;
+ int64_t *size = NULL;
+
+ if (!data || !meta)
+ goto out;
+
+ if (data->len > sizeof(int64_t)) {
+ value = (quota_meta_t *)data->data;
+ meta->size = ntoh64(value->size);
+ meta->file_count = ntoh64(value->file_count);
+ if (data->len > (sizeof(int64_t)) * 2)
+ meta->dir_count = ntoh64(value->dir_count);
+ else
+ meta->dir_count = 0;
+ } else {
+ size = (int64_t *)data->data;
+ meta->size = ntoh64(*size);
+ meta->file_count = 0;
+ meta->dir_count = 0;
+ /* This can happen during software upgrade.
+ * Older version of glusterfs will not have inode count.
+ * Return failure, this will be healed as part of lookup
+ */
+ gf_msg_callingfn("quota", GF_LOG_DEBUG, 0, LG_MSG_QUOTA_XATTRS_MISSING,
+ "Object quota "
+ "xattrs missing: len = %d",
+ data->len);
+ ret = -2;
+ goto out;
+ }
+
+ ret = 0;
+out:
+
+ return ret;
+}
+
+int32_t
+quota_dict_get_inode_meta(dict_t *dict, char *key, const int keylen,
+ quota_meta_t *meta)
+{
+ int32_t ret = -1;
+ data_t *data = NULL;
+
+ if (!dict || !key || !meta)
+ goto out;
+
+ data = dict_getn(dict, key, keylen);
+ if (!data || !data->data)
+ goto out;
+
+ ret = quota_data_to_meta(data, meta);
+
+out:
+
+ return ret;
+}
+
+int32_t
+quota_dict_get_meta(dict_t *dict, char *key, const int keylen,
+ quota_meta_t *meta)
+{
+ int32_t ret = -1;
+
+ ret = quota_dict_get_inode_meta(dict, key, keylen, meta);
+ if (ret == -2)
+ ret = 0;
+
+ return ret;
+}
+
+int32_t
+quota_dict_set_meta(dict_t *dict, char *key, const quota_meta_t *meta,
+ ia_type_t ia_type)
+{
+ int32_t ret = -ENOMEM;
+ quota_meta_t *value = NULL;
+
+ value = GF_MALLOC(sizeof(quota_meta_t), gf_common_quota_meta_t);
+ if (value == NULL) {
+ goto out;
+ }
+
+ value->size = hton64(meta->size);
+ value->file_count = hton64(meta->file_count);
+ value->dir_count = hton64(meta->dir_count);
+
+ if (ia_type == IA_IFDIR) {
+ ret = dict_set_bin(dict, key, value, sizeof(*value));
+ } else {
+ /* For a file we don't need to store dir_count in the
+ * quota size xattr, so we set the len of the data in the dict
+ * as 128bits, so when the posix xattrop reads the dict, it only
+ * performs operations on size and file_count
+ */
+ ret = dict_set_bin(dict, key, value, sizeof(*value) - sizeof(int64_t));
+ }
+
+ if (ret < 0) {
+ gf_msg_callingfn("quota", GF_LOG_ERROR, 0, LG_MSG_DICT_SET_FAILED,
+ "dict set failed");
+ GF_FREE(value);
+ }
+
+out:
+ return ret;
+}
+
+int32_t
+quota_conf_read_header(int fd, char *buf)
+{
+ int ret = 0;
+ const int header_len = SLEN(QUOTA_CONF_HEADER);
+
+ ret = gf_nread(fd, buf, header_len);
+ if (ret <= 0) {
+ goto out;
+ } else if (ret > 0 && ret != header_len) {
+ ret = -1;
+ goto out;
+ }
+
+ buf[header_len - 1] = 0;
+
+out:
+ if (ret < 0)
+ gf_msg_callingfn("quota", GF_LOG_ERROR, 0, LG_MSG_QUOTA_CONF_ERROR,
+ "failed to read "
+ "header from a quota conf");
+
+ return ret;
+}
+
+int32_t
+quota_conf_read_version(int fd, float *version)
+{
+ int ret = 0;
+ char buf[PATH_MAX] = "";
+ char *tail = NULL;
+ float value = 0.0f;
+
+ ret = quota_conf_read_header(fd, buf);
+ if (ret == 0) {
+ /* quota.conf is empty */
+ value = GF_QUOTA_CONF_VERSION;
+ goto out;
+ } else if (ret < 0) {
+ goto out;
+ }
+
+ value = strtof((buf + strlen(buf) - 3), &tail);
+ if (tail[0] != '\0') {
+ ret = -1;
+ gf_msg_callingfn("quota", GF_LOG_ERROR, 0, LG_MSG_QUOTA_CONF_ERROR,
+ "invalid quota conf"
+ " version");
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ if (ret >= 0)
+ *version = value;
+ else
+ gf_msg_callingfn("quota", GF_LOG_ERROR, 0, LG_MSG_QUOTA_CONF_ERROR,
+ "failed to "
+ "read version from a quota conf header");
+
+ return ret;
+}
+
+int32_t
+quota_conf_read_gfid(int fd, void *buf, char *type, float version)
+{
+ int ret = 0;
+
+ ret = gf_nread(fd, buf, 16);
+ if (ret <= 0)
+ goto out;
+
+ if (ret != 16) {
+ ret = -1;
+ goto out;
+ }
+
+ if (version >= 1.2f) {
+ ret = gf_nread(fd, type, 1);
+ if (ret != 1) {
+ ret = -1;
+ goto out;
+ }
+ ret = 17;
+ } else {
+ *type = GF_QUOTA_CONF_TYPE_USAGE;
+ }
+
+out:
+ if (ret < 0)
+ gf_msg_callingfn("quota", GF_LOG_ERROR, 0, LG_MSG_QUOTA_CONF_ERROR,
+ "failed to "
+ "read gfid from a quota conf");
+
+ return ret;
+}
+
+int32_t
+quota_conf_skip_header(int fd)
+{
+ return gf_skip_header_section(fd, strlen(QUOTA_CONF_HEADER));
+}
diff --git a/libglusterfs/src/rbthash.c b/libglusterfs/src/rbthash.c
index 4f04fed9311..c90b5a21f44 100644
--- a/libglusterfs/src/rbthash.c
+++ b/libglusterfs/src/rbthash.c
@@ -8,68 +8,66 @@
cases as published by the Free Software Foundation.
*/
-
-#include "rbthash.h"
+#include "glusterfs/rbthash.h"
#include "rb.h"
-#include "locking.h"
-#include "mem-pool.h"
-#include "logging.h"
+#include "glusterfs/locking.h"
+#include "glusterfs/mem-pool.h"
+#include "glusterfs/logging.h"
+#include "glusterfs/libglusterfs-messages.h"
#include <pthread.h>
#include <string.h>
-
int
-rbthash_comparator (void *entry1, void *entry2, void *param)
+rbthash_comparator(void *entry1, void *entry2, void *param)
{
- int ret = 0;
- rbthash_entry_t *e1 = NULL;
- rbthash_entry_t *e2 = NULL;
+ int ret = 0;
+ rbthash_entry_t *e1 = NULL;
+ rbthash_entry_t *e2 = NULL;
- if ((!entry1) || (!entry2) || (!param))
- return -1;
+ if ((!entry1) || (!entry2) || (!param))
+ return -1;
- e1 = (rbthash_entry_t *)entry1;
- e2 = (rbthash_entry_t *)entry2;
+ e1 = (rbthash_entry_t *)entry1;
+ e2 = (rbthash_entry_t *)entry2;
- if (e1->keylen != e2->keylen) {
- if (e1->keylen < e2->keylen)
- ret = -1;
- else if (e1->keylen > e2->keylen)
- ret = 1;
- } else
- ret = memcmp (e1->key, e2->key, e1->keylen);
+ if (e1->keylen != e2->keylen) {
+ if (e1->keylen < e2->keylen)
+ ret = -1;
+ else if (e1->keylen > e2->keylen)
+ ret = 1;
+ } else
+ ret = memcmp(e1->key, e2->key, e1->keylen);
- return ret;
+ return ret;
}
-
int
-__rbthash_init_buckets (rbthash_table_t *tbl, int buckets)
+__rbthash_init_buckets(rbthash_table_t *tbl, int buckets)
{
- int i = 0;
- int ret = -1;
-
- if (!tbl)
- return -1;
-
- for (; i < buckets; i++) {
- LOCK_INIT (&tbl->buckets[i].bucketlock);
- tbl->buckets[i].bucket = rb_create ((rb_comparison_func *)rbthash_comparator, tbl, NULL);
- if (!tbl->buckets[i].bucket) {
- gf_log (GF_RBTHASH, GF_LOG_ERROR, "Failed to create rb"
- " table bucket");
- ret = -1;
- goto err;
- }
+ int i = 0;
+ int ret = -1;
+
+ if (!tbl)
+ return -1;
+
+ for (; i < buckets; i++) {
+ LOCK_INIT(&tbl->buckets[i].bucketlock);
+ tbl->buckets[i].bucket = rb_create(
+ (rb_comparison_func *)rbthash_comparator, tbl, NULL);
+ if (!tbl->buckets[i].bucket) {
+ gf_smsg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_RB_TABLE_CREATE_FAILED,
+ NULL);
+ ret = -1;
+ goto err;
}
+ }
- ret = 0;
+ ret = 0;
err:
- return ret;
+ return ret;
}
-
/*
* rbthash_table_init - Initialize a RBT based hash table
* @buckets - Number of buckets in the hash table
@@ -81,379 +79,376 @@ err:
*/
rbthash_table_t *
-rbthash_table_init (int buckets, rbt_hasher_t hfunc,
- rbt_data_destroyer_t dfunc,
- unsigned long expected_entries,
- struct mem_pool *entrypool)
+rbthash_table_init(glusterfs_ctx_t *ctx, int buckets, rbt_hasher_t hfunc,
+ rbt_data_destroyer_t dfunc, unsigned long expected_entries,
+ struct mem_pool *entrypool)
{
- rbthash_table_t *newtab = NULL;
- int ret = -1;
-
- if (!hfunc) {
- gf_log (GF_RBTHASH, GF_LOG_ERROR, "Hash function not given");
- return NULL;
+ rbthash_table_t *newtab = NULL;
+ int ret = -1;
+
+ if (!hfunc) {
+ gf_smsg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_HASH_FUNC_ERROR, NULL);
+ return NULL;
+ }
+
+ if (!entrypool && !expected_entries) {
+ gf_smsg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_ENTRIES_NOT_PROVIDED, NULL);
+ return NULL;
+ }
+
+ if (entrypool && expected_entries) {
+ gf_smsg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_ENTRIES_PROVIDED, NULL);
+ return NULL;
+ }
+
+ newtab = GF_CALLOC(1, sizeof(*newtab), gf_common_mt_rbthash_table_t);
+ if (!newtab)
+ return NULL;
+
+ newtab->buckets = GF_CALLOC(buckets, sizeof(struct rbthash_bucket),
+ gf_common_mt_rbthash_bucket);
+ if (!newtab->buckets) {
+ goto free_newtab;
+ }
+
+ if (expected_entries) {
+ newtab->entrypool = mem_pool_new_ctx(ctx, rbthash_entry_t,
+ expected_entries);
+ if (!newtab->entrypool) {
+ goto free_buckets;
}
-
- if (!entrypool && !expected_entries) {
- gf_log (GF_RBTHASH, GF_LOG_ERROR,
- "Both mem-pool and expected entries not provided");
- return NULL;
- }
-
- if (entrypool && expected_entries) {
- gf_log (GF_RBTHASH, GF_LOG_ERROR,
- "Both mem-pool and expected entries are provided");
- return NULL;
- }
-
-
- newtab = GF_CALLOC (1, sizeof (*newtab),
- gf_common_mt_rbthash_table_t);
- if (!newtab)
- return NULL;
-
- newtab->buckets = GF_CALLOC (buckets, sizeof (struct rbthash_bucket),
- gf_common_mt_rbthash_bucket);
- if (!newtab->buckets) {
- goto free_newtab;
- }
-
- if (expected_entries) {
- newtab->entrypool =
- mem_pool_new (rbthash_entry_t, expected_entries);
- if (!newtab->entrypool) {
- gf_log (GF_RBTHASH, GF_LOG_ERROR,
- "Failed to allocate mem-pool");
- goto free_buckets;
- }
- newtab->pool_alloced = _gf_true;
- } else {
- newtab->entrypool = entrypool;
- }
-
- LOCK_INIT (&newtab->tablelock);
- INIT_LIST_HEAD (&newtab->list);
- newtab->numbuckets = buckets;
- ret = __rbthash_init_buckets (newtab, buckets);
-
- if (ret == -1) {
- gf_log (GF_RBTHASH, GF_LOG_ERROR, "Failed to init buckets");
- if (newtab->pool_alloced)
- mem_pool_destroy (newtab->entrypool);
- } else {
- gf_log (GF_RBTHASH, GF_LOG_TRACE, "Inited hash table: buckets:"
- " %d", buckets);
- }
-
- newtab->hashfunc = hfunc;
- newtab->dfunc = dfunc;
+ newtab->pool_alloced = _gf_true;
+ } else {
+ newtab->entrypool = entrypool;
+ }
+
+ LOCK_INIT(&newtab->tablelock);
+ INIT_LIST_HEAD(&newtab->list);
+ newtab->numbuckets = buckets;
+ ret = __rbthash_init_buckets(newtab, buckets);
+
+ if (ret == -1) {
+ gf_smsg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_RBTHASH_INIT_BUCKET_FAILED,
+ NULL);
+ if (newtab->pool_alloced)
+ mem_pool_destroy(newtab->entrypool);
+ } else {
+ gf_msg_trace(GF_RBTHASH, 0,
+ "Inited hash table: buckets:"
+ " %d",
+ buckets);
+ }
+
+ newtab->hashfunc = hfunc;
+ newtab->dfunc = dfunc;
free_buckets:
- if (ret == -1)
- GF_FREE (newtab->buckets);
+ if (ret == -1)
+ GF_FREE(newtab->buckets);
free_newtab:
- if (ret == -1) {
- GF_FREE (newtab);
- newtab = NULL;
- }
+ if (ret == -1) {
+ GF_FREE(newtab);
+ newtab = NULL;
+ }
- return newtab;
+ return newtab;
}
rbthash_entry_t *
-rbthash_init_entry (rbthash_table_t *tbl, void *data, void *key, int keylen)
+rbthash_init_entry(rbthash_table_t *tbl, void *data, void *key, int keylen)
{
- int ret = -1;
- rbthash_entry_t *entry = NULL;
-
- if ((!tbl) || (!data) || (!key))
- return NULL;
-
- entry = mem_get (tbl->entrypool);
- if (!entry) {
- gf_log (GF_RBTHASH, GF_LOG_ERROR, "Failed to get entry from"
- " mem-pool");
- goto ret;
- }
-
- entry->data = data;
- entry->key = GF_CALLOC (keylen, sizeof (char), gf_common_mt_char);
- if (!entry->key) {
- goto free_entry;
- }
-
- INIT_LIST_HEAD (&entry->list);
- memcpy (entry->key, key, keylen);
- entry->keylen = keylen;
- entry->keyhash = tbl->hashfunc (entry->key, entry->keylen);
- gf_log (GF_RBTHASH, GF_LOG_TRACE, "HASH: %u", entry->keyhash);
-
- ret = 0;
+ int ret = -1;
+ rbthash_entry_t *entry = NULL;
+
+ if ((!tbl) || (!data) || (!key))
+ return NULL;
+
+ entry = mem_get(tbl->entrypool);
+ if (!entry) {
+ gf_smsg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_RBTHASH_GET_ENTRY_FAILED,
+ NULL);
+ goto ret;
+ }
+
+ entry->data = data;
+ entry->key = GF_MALLOC(keylen, gf_common_mt_char);
+ if (!entry->key) {
+ goto free_entry;
+ }
+
+ INIT_LIST_HEAD(&entry->list);
+ memcpy(entry->key, key, keylen);
+ entry->keylen = keylen;
+ entry->keyhash = tbl->hashfunc(entry->key, entry->keylen);
+ gf_msg_trace(GF_RBTHASH, 0, "HASH: %u", entry->keyhash);
+
+ ret = 0;
free_entry:
- if (ret == -1) {
- mem_put (entry);
- entry = NULL;
- }
+ if (ret == -1) {
+ mem_put(entry);
+ entry = NULL;
+ }
ret:
- return entry;
+ return entry;
}
-
void
-rbthash_deinit_entry (rbthash_table_t *tbl, rbthash_entry_t *entry)
+rbthash_deinit_entry(rbthash_table_t *tbl, rbthash_entry_t *entry)
{
+ if (!entry)
+ return;
- if (!entry)
- return;
-
- if (entry->key)
- GF_FREE (entry->key);
-
- if (tbl) {
- if ((entry->data) && (tbl->dfunc))
- tbl->dfunc (entry->data);
+ GF_FREE(entry->key);
- LOCK (&tbl->tablelock);
- {
- list_del_init (&entry->list);
- }
- UNLOCK (&tbl->tablelock);
+ if (tbl) {
+ if ((entry->data) && (tbl->dfunc))
+ tbl->dfunc(entry->data);
- mem_put (entry);
+ LOCK(&tbl->tablelock);
+ {
+ list_del_init(&entry->list);
}
+ UNLOCK(&tbl->tablelock);
- return;
-}
+ mem_put(entry);
+ }
+ return;
+}
-inline struct rbthash_bucket *
-rbthash_entry_bucket (rbthash_table_t *tbl, rbthash_entry_t * entry)
+static struct rbthash_bucket *
+rbthash_entry_bucket(rbthash_table_t *tbl, rbthash_entry_t *entry)
{
- int nbucket = 0;
+ int nbucket = 0;
- nbucket = (entry->keyhash % tbl->numbuckets);
- gf_log (GF_RBTHASH, GF_LOG_TRACE, "BUCKET: %d", nbucket);
- return &tbl->buckets[nbucket];
+ nbucket = (entry->keyhash % tbl->numbuckets);
+ gf_msg_trace(GF_RBTHASH, 0, "BUCKET: %d", nbucket);
+ return &tbl->buckets[nbucket];
}
-
int
-rbthash_insert_entry (rbthash_table_t *tbl, rbthash_entry_t *entry)
+rbthash_insert_entry(rbthash_table_t *tbl, rbthash_entry_t *entry)
{
- struct rbthash_bucket *bucket = NULL;
- int ret = -1;
-
- if ((!tbl) || (!entry))
- return -1;
-
- bucket = rbthash_entry_bucket (tbl, entry);
- if (!bucket) {
- gf_log (GF_RBTHASH, GF_LOG_ERROR, "Failed to get bucket");
- goto err;
+ struct rbthash_bucket *bucket = NULL;
+ int ret = -1;
+
+ if ((!tbl) || (!entry))
+ return -1;
+
+ bucket = rbthash_entry_bucket(tbl, entry);
+ if (!bucket) {
+ gf_smsg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_RBTHASH_GET_BUCKET_FAILED,
+ NULL);
+ goto err;
+ }
+
+ ret = 0;
+ LOCK(&bucket->bucketlock);
+ {
+ if (!rb_probe(bucket->bucket, (void *)entry)) {
+ UNLOCK(&bucket->bucketlock);
+ gf_smsg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_RBTHASH_INSERT_FAILED,
+ NULL);
+ ret = -1;
+ goto err;
}
-
- ret = 0;
- LOCK (&bucket->bucketlock);
- {
- if (!rb_probe (bucket->bucket, (void *)entry)) {
- gf_log (GF_RBTHASH, GF_LOG_ERROR, "Failed to insert"
- " entry");
- ret = -1;
- }
- }
- UNLOCK (&bucket->bucketlock);
+ }
+ UNLOCK(&bucket->bucketlock);
err:
- return ret;
+ return ret;
}
-
int
-rbthash_insert (rbthash_table_t *tbl, void *data, void *key, int keylen)
+rbthash_insert(rbthash_table_t *tbl, void *data, void *key, int keylen)
{
- rbthash_entry_t *entry = NULL;
- int ret = -1;
-
- if ((!tbl) || (!data) || (!key))
- return -1;
-
- entry = rbthash_init_entry (tbl, data, key, keylen);
- if (!entry) {
- gf_log (GF_RBTHASH, GF_LOG_ERROR, "Failed to init entry");
- goto err;
- }
-
- ret = rbthash_insert_entry (tbl, entry);
-
- if (ret == -1) {
- gf_log (GF_RBTHASH, GF_LOG_ERROR, "Failed to insert entry");
- rbthash_deinit_entry (tbl, entry);
- }
-
- LOCK (&tbl->tablelock);
- {
- list_add_tail (&entry->list, &tbl->list);
- }
- UNLOCK (&tbl->tablelock);
+ rbthash_entry_t *entry = NULL;
+ int ret = -1;
+
+ if ((!tbl) || (!data) || (!key))
+ return -1;
+
+ entry = rbthash_init_entry(tbl, data, key, keylen);
+ if (!entry) {
+ gf_smsg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_RBTHASH_INIT_ENTRY_FAILED,
+ NULL);
+ goto err;
+ }
+
+ ret = rbthash_insert_entry(tbl, entry);
+
+ if (ret == -1) {
+ gf_smsg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_RBTHASH_INSERT_FAILED,
+ NULL);
+ rbthash_deinit_entry(tbl, entry);
+ goto err;
+ }
+
+ LOCK(&tbl->tablelock);
+ {
+ list_add_tail(&entry->list, &tbl->list);
+ }
+ UNLOCK(&tbl->tablelock);
err:
- return ret;
+ return ret;
}
-inline struct rbthash_bucket *
-rbthash_key_bucket (rbthash_table_t *tbl, void *key, int keylen)
+static struct rbthash_bucket *
+rbthash_key_bucket(rbthash_table_t *tbl, void *key, int keylen)
{
- uint32_t keyhash = 0;
- int nbucket = 0;
+ uint32_t keyhash = 0;
+ int nbucket = 0;
- if ((!tbl) || (!key))
- return NULL;
+ if ((!tbl) || (!key))
+ return NULL;
- keyhash = tbl->hashfunc (key, keylen);
- gf_log (GF_RBTHASH, GF_LOG_TRACE, "HASH: %u", keyhash);
- nbucket = (keyhash % tbl->numbuckets);
- gf_log (GF_RBTHASH, GF_LOG_TRACE, "BUCKET: %u", nbucket);
+ keyhash = tbl->hashfunc(key, keylen);
+ gf_msg_trace(GF_RBTHASH, 0, "HASH: %u", keyhash);
+ nbucket = (keyhash % tbl->numbuckets);
+ gf_msg_trace(GF_RBTHASH, 0, "BUCKET: %u", nbucket);
- return &tbl->buckets[nbucket];
+ return &tbl->buckets[nbucket];
}
-
void *
-rbthash_get (rbthash_table_t *tbl, void *key, int keylen)
+rbthash_get(rbthash_table_t *tbl, void *key, int keylen)
{
- struct rbthash_bucket *bucket = NULL;
- rbthash_entry_t *entry = NULL;
- rbthash_entry_t searchentry = {0, };
-
- if ((!tbl) || (!key))
- return NULL;
-
- bucket = rbthash_key_bucket (tbl, key, keylen);
- if (!bucket) {
- gf_log (GF_RBTHASH, GF_LOG_ERROR, "Failed to get bucket");
- return NULL;
- }
-
- searchentry.key = key;
- searchentry.keylen = keylen;
- LOCK (&bucket->bucketlock);
- {
- entry = rb_find (bucket->bucket, &searchentry);
- }
- UNLOCK (&bucket->bucketlock);
-
- if (!entry)
- return NULL;
-
- return entry->data;
+ struct rbthash_bucket *bucket = NULL;
+ rbthash_entry_t *entry = NULL;
+ rbthash_entry_t searchentry = {
+ 0,
+ };
+
+ if ((!tbl) || (!key))
+ return NULL;
+
+ bucket = rbthash_key_bucket(tbl, key, keylen);
+ if (!bucket) {
+ gf_smsg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_RBTHASH_GET_BUCKET_FAILED,
+ NULL);
+ return NULL;
+ }
+
+ searchentry.key = key;
+ searchentry.keylen = keylen;
+ LOCK(&bucket->bucketlock);
+ {
+ entry = rb_find(bucket->bucket, &searchentry);
+ }
+ UNLOCK(&bucket->bucketlock);
+
+ if (!entry)
+ return NULL;
+
+ return entry->data;
}
-
void *
-rbthash_remove (rbthash_table_t *tbl, void *key, int keylen)
+rbthash_remove(rbthash_table_t *tbl, void *key, int keylen)
{
- struct rbthash_bucket *bucket = NULL;
- rbthash_entry_t *entry = NULL;
- rbthash_entry_t searchentry = {0, };
- void *dataref = NULL;
-
- if ((!tbl) || (!key))
- return NULL;
-
- bucket = rbthash_key_bucket (tbl, key, keylen);
- if (!bucket) {
- gf_log (GF_RBTHASH, GF_LOG_ERROR, "Failed to get bucket");
- return NULL;
- }
-
- searchentry.key = key;
- searchentry.keylen = keylen;
-
- LOCK (&bucket->bucketlock);
- {
- entry = rb_delete (bucket->bucket, &searchentry);
- }
- UNLOCK (&bucket->bucketlock);
-
- if (!entry)
- return NULL;
-
- GF_FREE (entry->key);
- dataref = entry->data;
-
- LOCK (&tbl->tablelock);
- {
- list_del_init (&entry->list);
- }
- UNLOCK (&tbl->tablelock);
-
- mem_put (entry);
-
- return dataref;
+ struct rbthash_bucket *bucket = NULL;
+ rbthash_entry_t *entry = NULL;
+ rbthash_entry_t searchentry = {
+ 0,
+ };
+ void *dataref = NULL;
+
+ if ((!tbl) || (!key))
+ return NULL;
+
+ bucket = rbthash_key_bucket(tbl, key, keylen);
+ if (!bucket) {
+ gf_smsg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_RBTHASH_GET_BUCKET_FAILED,
+ NULL);
+ return NULL;
+ }
+
+ searchentry.key = key;
+ searchentry.keylen = keylen;
+
+ LOCK(&bucket->bucketlock);
+ {
+ entry = rb_delete(bucket->bucket, &searchentry);
+ }
+ UNLOCK(&bucket->bucketlock);
+
+ if (!entry)
+ return NULL;
+
+ GF_FREE(entry->key);
+ dataref = entry->data;
+
+ LOCK(&tbl->tablelock);
+ {
+ list_del_init(&entry->list);
+ }
+ UNLOCK(&tbl->tablelock);
+
+ mem_put(entry);
+
+ return dataref;
}
-
void
-rbthash_entry_deiniter (void *entry, void *rbparam)
+rbthash_entry_deiniter(void *entry, void *rbparam)
{
- if (!entry)
- return;
+ if (!entry)
+ return;
- rbthash_deinit_entry (rbparam, entry);
+ rbthash_deinit_entry(rbparam, entry);
}
-
void
-rbthash_table_destroy_buckets (rbthash_table_t *tbl)
+rbthash_table_destroy_buckets(rbthash_table_t *tbl)
{
- int x = 0;
- if (!tbl)
- return;
+ int x = 0;
+ if (!tbl)
+ return;
- for (;x < tbl->numbuckets; x++) {
- LOCK_DESTROY (&tbl->buckets[x].bucketlock);
- rb_destroy (tbl->buckets[x].bucket, rbthash_entry_deiniter);
- }
+ for (; x < tbl->numbuckets; x++) {
+ LOCK_DESTROY(&tbl->buckets[x].bucketlock);
+ rb_destroy(tbl->buckets[x].bucket, rbthash_entry_deiniter);
+ }
- return;
+ return;
}
-
void
-rbthash_table_destroy (rbthash_table_t *tbl)
+rbthash_table_destroy(rbthash_table_t *tbl)
{
- if (!tbl)
- return;
+ if (!tbl)
+ return;
- rbthash_table_destroy_buckets (tbl);
- if (tbl->pool_alloced)
- mem_pool_destroy (tbl->entrypool);
+ rbthash_table_destroy_buckets(tbl);
+ if (tbl->pool_alloced)
+ mem_pool_destroy(tbl->entrypool);
- GF_FREE (tbl->buckets);
- GF_FREE (tbl);
+ GF_FREE(tbl->buckets);
+ GF_FREE(tbl);
}
-
void
-rbthash_table_traverse (rbthash_table_t *tbl, rbt_traverse_t traverse,
- void *mydata)
+rbthash_table_traverse(rbthash_table_t *tbl, rbt_traverse_t traverse,
+ void *mydata)
{
- rbthash_entry_t *entry = NULL;
+ rbthash_entry_t *entry = NULL;
- if ((tbl == NULL) || (traverse == NULL)) {
- goto out;
- }
+ if ((tbl == NULL) || (traverse == NULL)) {
+ goto out;
+ }
- LOCK (&tbl->tablelock);
+ LOCK(&tbl->tablelock);
+ {
+ list_for_each_entry(entry, &tbl->list, list)
{
- list_for_each_entry (entry, &tbl->list, list) {
- traverse (entry->data, mydata);
- }
+ traverse(entry->data, mydata);
}
- UNLOCK (&tbl->tablelock);
+ }
+ UNLOCK(&tbl->tablelock);
out:
- return;
+ return;
}
diff --git a/libglusterfs/src/rbthash.h b/libglusterfs/src/rbthash.h
deleted file mode 100644
index b093ce9982d..00000000000
--- a/libglusterfs/src/rbthash.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef __RBTHASH_TABLE_H_
-#define __RBTHASH_TABLE_H_
-#include "rb.h"
-#include "locking.h"
-#include "mem-pool.h"
-#include "logging.h"
-#include "common-utils.h"
-#include "list.h"
-
-#include <pthread.h>
-
-#define GF_RBTHASH_MEMPOOL 16384 //1048576
-#define GF_RBTHASH "rbthash"
-
-struct rbthash_bucket {
- struct rb_table *bucket;
- gf_lock_t bucketlock;
-};
-
-typedef struct rbthash_entry {
- void *data;
- void *key;
- int keylen;
- uint32_t keyhash;
- struct list_head list;
-} rbthash_entry_t;
-
-typedef uint32_t (*rbt_hasher_t) (void *data, int len);
-typedef void (*rbt_data_destroyer_t) (void *data);
-typedef void (*rbt_traverse_t) (void *data, void *mydata);
-
-typedef struct rbthash_table {
- int size;
- int numbuckets;
- struct mem_pool *entrypool;
- gf_lock_t tablelock;
- struct rbthash_bucket *buckets;
- rbt_hasher_t hashfunc;
- rbt_data_destroyer_t dfunc;
- gf_boolean_t pool_alloced;
- struct list_head list;
-} rbthash_table_t;
-
-extern rbthash_table_t *
-rbthash_table_init (int buckets, rbt_hasher_t hfunc,
- rbt_data_destroyer_t dfunc, unsigned long expected_entries,
- struct mem_pool *entrypool);
-
-extern int
-rbthash_insert (rbthash_table_t *tbl, void *data, void *key, int keylen);
-
-extern void *
-rbthash_get (rbthash_table_t *tbl, void *key, int keylen);
-
-extern void *
-rbthash_remove (rbthash_table_t *tbl, void *key, int keylen);
-
-extern void *
-rbthash_replace (rbthash_table_t *tbl, void *key, int keylen, void *newdata);
-
-extern void
-rbthash_table_destroy (rbthash_table_t *tbl);
-
-extern void
-rbthash_table_traverse (rbthash_table_t *tbl, rbt_traverse_t traverse,
- void *mydata);
-#endif
diff --git a/libglusterfs/src/refcount.c b/libglusterfs/src/refcount.c
new file mode 100644
index 00000000000..d5a5a82fa0f
--- /dev/null
+++ b/libglusterfs/src/refcount.c
@@ -0,0 +1,108 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "glusterfs/common-utils.h"
+#include "glusterfs/refcount.h"
+
+#ifndef REFCOUNT_NEEDS_LOCK
+
+void *
+_gf_ref_get(gf_ref_t *ref)
+{
+ unsigned int cnt = __sync_fetch_and_add(&ref->cnt, 1);
+
+ /* if cnt == 0, we're in a fatal position, the object will be free'd
+ *
+ * There is a race when two threads do a _gf_ref_get(). Only one of
+ * them may get a 0 returned. That is acceptable, because one
+ * _gf_ref_get() returning 0 should be handled as a fatal problem and
+ * when correct usage/locking is used, it should never happen.
+ */
+ GF_ASSERT(cnt != 0);
+
+ return cnt ? ref->data : NULL;
+}
+
+unsigned int
+_gf_ref_put(gf_ref_t *ref)
+{
+ unsigned int cnt = __sync_fetch_and_sub(&ref->cnt, 1);
+
+ /* if cnt == 1, the last user just did a _gf_ref_put()
+ *
+ * When cnt == 0, one _gf_ref_put() was done too much and there has
+ * been a thread using the refcounted structure when it was not
+ * supposed to.
+ */
+ GF_ASSERT(cnt != 0);
+
+ if (cnt == 1 && ref->release)
+ ref->release(ref->data);
+
+ return (cnt != 1);
+}
+
+#else
+
+void *
+_gf_ref_get(gf_ref_t *ref)
+{
+ unsigned int cnt = 0;
+
+ LOCK(&ref->lk);
+ {
+ /* never can be 0, should have been free'd */
+ if (ref->cnt > 0)
+ cnt = ++ref->cnt;
+ else
+ GF_ASSERT(ref->cnt > 0);
+ }
+ UNLOCK(&ref->lk);
+
+ return cnt ? ref->data : NULL;
+}
+
+unsigned int
+_gf_ref_put(gf_ref_t *ref)
+{
+ unsigned int cnt = 0;
+ int release = 0;
+
+ LOCK(&ref->lk);
+ {
+ if (ref->cnt != 0) {
+ cnt = --ref->cnt;
+ /* call release() only when cnt == 0 */
+ release = (cnt == 0);
+ } else
+ GF_ASSERT(ref->cnt != 0);
+ }
+ UNLOCK(&ref->lk);
+
+ if (release && ref->release)
+ ref->release(ref->data);
+
+ return !release;
+}
+
+#endif /* REFCOUNT_NEEDS_LOCK */
+
+void
+_gf_ref_init(gf_ref_t *ref, gf_ref_release_t release, void *data)
+{
+ GF_ASSERT(ref);
+
+#ifdef REFCOUNT_NEEDS_LOCK
+ LOCK_INIT(&ref->lk);
+#endif
+ ref->cnt = 1;
+ ref->release = release;
+ ref->data = data;
+}
diff --git a/libglusterfs/src/revision.h b/libglusterfs/src/revision.h
deleted file mode 100644
index c1ea4a9e9fe..00000000000
--- a/libglusterfs/src/revision.h
+++ /dev/null
@@ -1 +0,0 @@
-#define GLUSTERFS_REPOSITORY_REVISION "git://git.gluster.com/glusterfs.git"
diff --git a/libglusterfs/src/rot-buffs.c b/libglusterfs/src/rot-buffs.c
new file mode 100644
index 00000000000..260bf16ecea
--- /dev/null
+++ b/libglusterfs/src/rot-buffs.c
@@ -0,0 +1,490 @@
+/*
+ Copyright (c) 2008-2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <math.h>
+
+#include "glusterfs/mem-types.h"
+#include "glusterfs/mem-pool.h"
+
+#include "glusterfs/rot-buffs.h"
+
+/**
+ * Producer-Consumer based on top of rotational buffers.
+ *
+ * This favours writers (producer) and keeps the critical section
+ * light weight. Buffer switch happens when a consumer wants to
+ * consume data. This is the slow path and waits for pending
+ * writes to finish.
+ *
+ * TODO: do away with opaques (use arrays with indexing).
+ */
+
+#define ROT_BUFF_DEFAULT_COUNT 2
+#define ROT_BUFF_ALLOC_SIZE (1 * 1024 * 1024) /* 1MB per iovec */
+
+#define RLIST_IOV_MELDED_ALLOC_SIZE (RBUF_IOVEC_SIZE + ROT_BUFF_ALLOC_SIZE)
+
+/**
+ * iovec list is not shrunk (deallocated) if usage/total count
+ * falls in this range. this is the fast path and should satisfy
+ * most of the workloads. for the rest shrinking iovec list is
+ * generous.
+ */
+#define RVEC_LOW_WATERMARK_COUNT 1
+#define RVEC_HIGH_WATERMARK_COUNT (1 << 4)
+
+static inline rbuf_list_t *
+rbuf_current_buffer(rbuf_t *rbuf)
+{
+ return rbuf->current;
+}
+
+static void
+rlist_mark_waiting(rbuf_list_t *rlist)
+{
+ LOCK(&rlist->c_lock);
+ {
+ rlist->awaiting = _gf_true;
+ }
+ UNLOCK(&rlist->c_lock);
+}
+
+static int
+__rlist_has_waiter(rbuf_list_t *rlist)
+{
+ return (rlist->awaiting == _gf_true);
+}
+
+static void *
+rbuf_alloc_rvec()
+{
+ return GF_CALLOC(1, RLIST_IOV_MELDED_ALLOC_SIZE, gf_common_mt_rvec_t);
+}
+
+static void
+rlist_reset_vector_usage(rbuf_list_t *rlist)
+{
+ rlist->used = 1;
+}
+
+static void
+rlist_increment_vector_usage(rbuf_list_t *rlist)
+{
+ rlist->used++;
+}
+
+static void
+rlist_increment_total_usage(rbuf_list_t *rlist)
+{
+ rlist->total++;
+}
+
+static int
+rvec_in_watermark_range(rbuf_list_t *rlist)
+{
+ return ((rlist->total >= RVEC_LOW_WATERMARK_COUNT) &&
+ (rlist->total <= RVEC_HIGH_WATERMARK_COUNT));
+}
+
+static void
+rbuf_reset_rvec(rbuf_iovec_t *rvec)
+{
+ GF_VALIDATE_OR_GOTO("libglusterfs", rvec, err);
+ /* iov_base is _never_ modified */
+ rvec->iov.iov_len = 0;
+err:
+ return;
+}
+
+/* TODO: alloc multiple rbuf_iovec_t */
+static int
+rlist_add_new_vec(rbuf_list_t *rlist)
+{
+ rbuf_iovec_t *rvec = NULL;
+
+ rvec = (rbuf_iovec_t *)rbuf_alloc_rvec();
+ if (!rvec)
+ return -1;
+ INIT_LIST_HEAD(&rvec->list);
+ rvec->iov.iov_base = ((char *)rvec) + RBUF_IOVEC_SIZE;
+ rvec->iov.iov_len = 0;
+
+ list_add_tail(&rvec->list, &rlist->veclist);
+
+ rlist->rvec = rvec; /* cache the latest */
+
+ rlist_increment_vector_usage(rlist);
+ rlist_increment_total_usage(rlist);
+
+ return 0;
+}
+
+static void
+rlist_free_rvec(rbuf_iovec_t *rvec)
+{
+ if (!rvec)
+ return;
+ list_del(&rvec->list);
+ GF_FREE(rvec);
+}
+
+static void
+rlist_purge_all_rvec(rbuf_list_t *rlist)
+{
+ rbuf_iovec_t *rvec = NULL;
+
+ if (!rlist)
+ return;
+ while (!list_empty(&rlist->veclist)) {
+ rvec = list_first_entry(&rlist->veclist, rbuf_iovec_t, list);
+ rlist_free_rvec(rvec);
+ }
+}
+
+static void
+rlist_shrink_rvec(rbuf_list_t *rlist, unsigned long long shrink)
+{
+ rbuf_iovec_t *rvec = NULL;
+
+ while (!list_empty(&rlist->veclist) && (shrink-- > 0)) {
+ rvec = list_first_entry(&rlist->veclist, rbuf_iovec_t, list);
+ rlist_free_rvec(rvec);
+ }
+}
+
+static void
+rbuf_purge_rlist(rbuf_t *rbuf)
+{
+ rbuf_list_t *rlist = NULL;
+
+ while (!list_empty(&rbuf->freelist)) {
+ rlist = list_first_entry(&rbuf->freelist, rbuf_list_t, list);
+ list_del(&rlist->list);
+
+ rlist_purge_all_rvec(rlist);
+
+ LOCK_DESTROY(&rlist->c_lock);
+
+ (void)pthread_mutex_destroy(&rlist->b_lock);
+ (void)pthread_cond_destroy(&rlist->b_cond);
+
+ GF_FREE(rlist);
+ }
+}
+
+rbuf_t *
+rbuf_init(int bufcount)
+{
+ int j = 0;
+ int ret = 0;
+ rbuf_t *rbuf = NULL;
+ rbuf_list_t *rlist = NULL;
+
+ if (bufcount <= 0)
+ bufcount = ROT_BUFF_DEFAULT_COUNT;
+
+ rbuf = GF_CALLOC(1, sizeof(rbuf_t), gf_common_mt_rbuf_t);
+ if (!rbuf)
+ goto error_return;
+
+ LOCK_INIT(&rbuf->lock);
+ INIT_LIST_HEAD(&rbuf->freelist);
+
+ /* it could have been one big calloc() but this is just once.. */
+ for (j = 0; j < bufcount; j++) {
+ rlist = GF_CALLOC(1, sizeof(rbuf_list_t), gf_common_mt_rlist_t);
+ if (!rlist) {
+ ret = -1;
+ break;
+ }
+
+ INIT_LIST_HEAD(&rlist->list);
+ INIT_LIST_HEAD(&rlist->veclist);
+
+ rlist->pending = rlist->completed = 0;
+
+ ret = rlist_add_new_vec(rlist);
+ if (ret)
+ break;
+
+ LOCK_INIT(&rlist->c_lock);
+
+ rlist->awaiting = _gf_false;
+ ret = pthread_mutex_init(&rlist->b_lock, 0);
+ if (ret != 0) {
+ GF_FREE(rlist);
+ break;
+ }
+
+ ret = pthread_cond_init(&rlist->b_cond, 0);
+ if (ret != 0) {
+ GF_FREE(rlist);
+ break;
+ }
+
+ list_add_tail(&rlist->list, &rbuf->freelist);
+ }
+
+ if (ret != 0)
+ goto dealloc_rlist;
+
+ /* cache currently used buffer: first in the list */
+ rbuf->current = list_first_entry(&rbuf->freelist, rbuf_list_t, list);
+ return rbuf;
+
+dealloc_rlist:
+ rbuf_purge_rlist(rbuf);
+ LOCK_DESTROY(&rbuf->lock);
+ GF_FREE(rbuf);
+error_return:
+ return NULL;
+}
+
+void
+rbuf_dtor(rbuf_t *rbuf)
+{
+ if (!rbuf)
+ return;
+ rbuf->current = NULL;
+ rbuf_purge_rlist(rbuf);
+ LOCK_DESTROY(&rbuf->lock);
+
+ GF_FREE(rbuf);
+}
+
+static char *
+rbuf_adjust_write_area(struct iovec *iov, size_t bytes)
+{
+ char *wbuf = NULL;
+
+ wbuf = iov->iov_base + iov->iov_len;
+ iov->iov_len += bytes;
+ return wbuf;
+}
+
+static char *
+rbuf_alloc_write_area(rbuf_list_t *rlist, size_t bytes)
+{
+ int ret = 0;
+ struct iovec *iov = NULL;
+
+ /* check for available space in _current_ IO buffer */
+ iov = &rlist->rvec->iov;
+ if (iov->iov_len + bytes <= ROT_BUFF_ALLOC_SIZE)
+ return rbuf_adjust_write_area(iov, bytes); /* fast path */
+
+ /* not enough bytes, try next available buffers */
+ if (list_is_last(&rlist->rvec->list, &rlist->veclist)) {
+ /* OH! consumed all vector buffers */
+ GF_ASSERT(rlist->used == rlist->total);
+ ret = rlist_add_new_vec(rlist);
+ if (ret)
+ goto error_return;
+ } else {
+ /* not the end, have available rbuf_iovec's */
+ rlist->rvec = list_next_entry(rlist->rvec, list);
+ rlist->used++;
+ rbuf_reset_rvec(rlist->rvec);
+ }
+
+ iov = &rlist->rvec->iov;
+ return rbuf_adjust_write_area(iov, bytes);
+
+error_return:
+ return NULL;
+}
+
+char *
+rbuf_reserve_write_area(rbuf_t *rbuf, size_t bytes, void **opaque)
+{
+ char *wbuf = NULL;
+ rbuf_list_t *rlist = NULL;
+
+ if (!rbuf || (bytes <= 0) || (bytes > ROT_BUFF_ALLOC_SIZE) || !opaque)
+ return NULL;
+
+ LOCK(&rbuf->lock);
+ {
+ rlist = rbuf_current_buffer(rbuf);
+ wbuf = rbuf_alloc_write_area(rlist, bytes);
+ if (!wbuf)
+ goto unblock;
+ rlist->pending++;
+ }
+unblock:
+ UNLOCK(&rbuf->lock);
+
+ if (wbuf)
+ *opaque = rlist;
+ return wbuf;
+}
+
+static void
+rbuf_notify_waiter(rbuf_list_t *rlist)
+{
+ pthread_mutex_lock(&rlist->b_lock);
+ {
+ pthread_cond_signal(&rlist->b_cond);
+ }
+ pthread_mutex_unlock(&rlist->b_lock);
+}
+
+int
+rbuf_write_complete(void *opaque)
+{
+ rbuf_list_t *rlist = NULL;
+ gf_boolean_t notify = _gf_false;
+
+ if (!opaque)
+ return -1;
+
+ rlist = opaque;
+
+ LOCK(&rlist->c_lock);
+ {
+ rlist->completed++;
+ /**
+ * it's safe to test ->pending without rbuf->lock *only* if
+ * there's a waiter as there can be no new incoming writes.
+ */
+ if (__rlist_has_waiter(rlist) && (rlist->completed == rlist->pending))
+ notify = _gf_true;
+ }
+ UNLOCK(&rlist->c_lock);
+
+ if (notify)
+ rbuf_notify_waiter(rlist);
+
+ return 0;
+}
+
+int
+rbuf_get_buffer(rbuf_t *rbuf, void **opaque, sequence_fn *seqfn, void *mydata)
+{
+ int retval = RBUF_CONSUMABLE;
+ rbuf_list_t *rlist = NULL;
+
+ if (!rbuf || !opaque)
+ return -1;
+
+ LOCK(&rbuf->lock);
+ {
+ rlist = rbuf_current_buffer(rbuf);
+ if (!rlist->pending) {
+ retval = RBUF_EMPTY;
+ goto unblock;
+ }
+
+ if (list_is_singular(&rbuf->freelist)) {
+ /**
+ * removal would lead to writer starvation, disallow
+ * switching.
+ */
+ retval = RBUF_WOULD_STARVE;
+ goto unblock;
+ }
+
+ list_del_init(&rlist->list);
+ if (seqfn)
+ seqfn(rlist, mydata);
+ rbuf->current = list_first_entry(&rbuf->freelist, rbuf_list_t, list);
+ }
+unblock:
+ UNLOCK(&rbuf->lock);
+
+ if (retval == RBUF_CONSUMABLE)
+ *opaque = rlist; /* caller _owns_ the buffer */
+
+ return retval;
+}
+
+/**
+ * Wait for completion of pending writers and invoke dispatcher
+ * routine (for buffer consumption).
+ */
+
+static void
+__rbuf_wait_for_writers(rbuf_list_t *rlist)
+{
+ while (rlist->completed != rlist->pending)
+ pthread_cond_wait(&rlist->b_cond, &rlist->b_lock);
+}
+
+#ifndef M_E
+#define M_E 2.7
+#endif
+
+static void
+rlist_shrink_vector(rbuf_list_t *rlist)
+{
+ unsigned long long shrink = 0;
+
+ /**
+ * fast path: don't bother to deallocate if vectors are hardly
+ * used.
+ */
+ if (rvec_in_watermark_range(rlist))
+ return;
+
+ /**
+ * Calculate the shrink count based on total allocated vectors.
+ * Note that the calculation sticks to rlist->total irrespective
+ * of the actual usage count (rlist->used). Later, ->used could
+ * be used to apply slack to the calculation based on how much
+ * it lags from ->total. For now, let's stick to slow decay.
+ */
+ shrink = rlist->total - (rlist->total * pow(M_E, -0.2));
+
+ rlist_shrink_rvec(rlist, shrink);
+ rlist->total -= shrink;
+}
+
+int
+rbuf_wait_for_completion(rbuf_t *rbuf, void *opaque,
+ void (*fn)(rbuf_list_t *, void *), void *arg)
+{
+ rbuf_list_t *rlist = NULL;
+
+ if (!rbuf || !opaque)
+ return -1;
+
+ rlist = opaque;
+
+ pthread_mutex_lock(&rlist->b_lock);
+ {
+ rlist_mark_waiting(rlist);
+ __rbuf_wait_for_writers(rlist);
+ }
+ pthread_mutex_unlock(&rlist->b_lock);
+
+ /**
+ * from here on, no need of locking until the rlist is put
+ * back into rotation.
+ */
+
+ fn(rlist, arg); /* invoke dispatcher */
+
+ rlist->awaiting = _gf_false;
+ rlist->pending = rlist->completed = 0;
+
+ rlist_shrink_vector(rlist);
+ rlist_reset_vector_usage(rlist);
+
+ rlist->rvec = list_first_entry(&rlist->veclist, rbuf_iovec_t, list);
+ rbuf_reset_rvec(rlist->rvec);
+
+ LOCK(&rbuf->lock);
+ {
+ list_add_tail(&rlist->list, &rbuf->freelist);
+ }
+ UNLOCK(&rbuf->lock);
+
+ return 0;
+}
diff --git a/libglusterfs/src/run.c b/libglusterfs/src/run.c
index 34d75df69c2..58f95a7e610 100644
--- a/libglusterfs/src/run.c
+++ b/libglusterfs/src/run.c
@@ -23,9 +23,31 @@
#include <assert.h>
#include <signal.h>
#include <sys/wait.h>
-#include <sys/resource.h>
+#include "glusterfs/syscall.h"
-#ifdef RUN_STANDALONE
+/*
+ * Following defines are available for helping development:
+ * RUN_STANDALONE and RUN_DO_DEMO.
+ *
+ * Compiling a standalone object file with no dependencies
+ * on glusterfs:
+ * $ cc -DRUN_STANDALONE -c run.c
+ *
+ * Compiling a demo program that exercises bits of run.c
+ * functionality (linking to glusterfs):
+ * $ cc -DRUN_DO_DEMO -orun run.c `pkg-config --libs --cflags glusterfs-api`
+ *
+ * Compiling a demo program that exercises bits of run.c
+ * functionality (with no dependence on glusterfs):
+ *
+ * $ cc -DRUN_DO_DEMO -DRUN_STANDALONE -orun run.c
+ */
+#if defined(RUN_STANDALONE) || defined(RUN_DO_DEMO)
+int
+close_fds_except(int *fdv, size_t count);
+#define sys_read(f, b, c) read(f, b, c)
+#define sys_write(f, b, c) write(f, b, c)
+#define sys_close(f) close(f)
#define GF_CALLOC(n, s, t) calloc(n, s)
#define GF_ASSERT(cond) assert(cond)
#define GF_REALLOC(p, s) realloc(p, s)
@@ -33,458 +55,516 @@
#define gf_strdup(s) strdup(s)
#define gf_vasprintf(p, f, va) vasprintf(p, f, va)
#define gf_loglevel_t int
-#define gf_log(dom, levl, fmt, args...) printf("LOG: " fmt "\n", ##args)
+#define gf_msg_callingfn(dom, level, errnum, msgid, fmt, args...) \
+ printf("LOG: " fmt "\n", ##args)
#define LOG_DEBUG 0
+#ifdef RUN_STANDALONE
+#include <stdbool.h>
+#include <sys/resource.h>
+int
+close_fds_except(int *fdv, size_t count)
+{
+ int i = 0;
+ size_t j = 0;
+ bool should_close = true;
+ struct rlimit rl;
+ int ret = -1;
+
+ ret = getrlimit(RLIMIT_NOFILE, &rl);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < rl.rlim_cur; i++) {
+ should_close = true;
+ for (j = 0; j < count; j++) {
+ if (i == fdv[j]) {
+ should_close = false;
+ break;
+ }
+ }
+ if (should_close)
+ sys_close(i);
+ }
+ return 0;
+}
+#endif
#ifdef __linux__
#define GF_LINUX_HOST_OS
#endif
-#else /* ! RUN_STANDALONE */
-#include "glusterfs.h"
-#include "common-utils.h"
+#else /* ! RUN_STANDALONE || RUN_DO_DEMO */
+#include "glusterfs/glusterfs.h"
+#include "glusterfs/common-utils.h"
+#include "glusterfs/libglusterfs-messages.h"
#endif
-#include "run.h"
-
+#include "glusterfs/run.h"
void
-runinit (runner_t *runner)
+runinit(runner_t *runner)
{
- int i = 0;
-
- runner->argvlen = 64;
- runner->argv = GF_CALLOC (runner->argvlen,
- sizeof (*runner->argv),
- gf_common_mt_run_argv);
- runner->runerr = runner->argv ? 0 : errno;
- runner->chpid = -1;
- for (i = 0; i < 3; i++) {
- runner->chfd[i] = -1;
- runner->chio[i] = NULL;
- }
+ int i = 0;
+
+ runner->argvlen = 64;
+ runner->argv = GF_CALLOC(runner->argvlen, sizeof(*runner->argv),
+ gf_common_mt_run_argv);
+ runner->runerr = runner->argv ? 0 : errno;
+ runner->chpid = -1;
+ for (i = 0; i < 3; i++) {
+ runner->chfd[i] = -1;
+ runner->chio[i] = NULL;
+ }
}
FILE *
-runner_chio (runner_t *runner, int fd)
+runner_chio(runner_t *runner, int fd)
{
- GF_ASSERT (fd > 0 && fd < 3);
+ GF_ASSERT(fd > 0 && fd < 3);
+ if ((fd > 0) && (fd < 3))
return runner->chio[fd];
+
+ return NULL;
}
static void
-runner_insert_arg (runner_t *runner, char *arg)
+runner_insert_arg(runner_t *runner, char *arg)
{
- int i = 0;
+ int i = 0;
- GF_ASSERT (arg);
+ GF_ASSERT(arg);
- if (runner->runerr)
- return;
+ if (runner->runerr || !runner->argv)
+ return;
- for (i = 0; i < runner->argvlen; i++) {
- if (runner->argv[i] == NULL)
- break;
- }
- GF_ASSERT (i < runner->argvlen);
-
- if (i == runner->argvlen - 1) {
- runner->argv = GF_REALLOC (runner->argv,
- runner->argvlen * 2 * sizeof (*runner->argv));
- if (!runner->argv) {
- runner->runerr = errno;
- return;
- }
- memset (/* "+" is aware of the type of its left side,
- * no need to multiply with type-size */
- runner->argv + runner->argvlen,
- 0, runner->argvlen * sizeof (*runner->argv));
- runner->argvlen *= 2;
- }
+ for (i = 0; i < runner->argvlen; i++) {
+ if (runner->argv[i] == NULL)
+ break;
+ }
+ GF_ASSERT(i < runner->argvlen);
- runner->argv[i] = arg;
+ if (i == runner->argvlen - 1) {
+ runner->argv = GF_REALLOC(runner->argv,
+ runner->argvlen * 2 * sizeof(*runner->argv));
+ if (!runner->argv) {
+ runner->runerr = errno;
+ return;
+ }
+ memset(/* "+" is aware of the type of its left side,
+ * no need to multiply with type-size */
+ runner->argv + runner->argvlen, 0,
+ runner->argvlen * sizeof(*runner->argv));
+ runner->argvlen *= 2;
+ }
+
+ runner->argv[i] = arg;
}
void
-runner_add_arg (runner_t *runner, const char *arg)
+runner_add_arg(runner_t *runner, const char *arg)
{
- arg = gf_strdup (arg);
- if (!arg) {
- runner->runerr = errno;
- return;
- }
+ arg = gf_strdup(arg);
+ if (!arg) {
+ runner->runerr = errno;
+ return;
+ }
- runner_insert_arg (runner, (char *)arg);
+ runner_insert_arg(runner, (char *)arg);
}
static void
-runner_va_add_args (runner_t *runner, va_list argp)
+runner_va_add_args(runner_t *runner, va_list argp)
{
- const char *arg;
+ const char *arg;
- while ((arg = va_arg (argp, const char *)))
- runner_add_arg (runner, arg);
+ while ((arg = va_arg(argp, const char *)))
+ runner_add_arg(runner, arg);
}
void
-runner_add_args (runner_t *runner, ...)
+runner_add_args(runner_t *runner, ...)
{
- va_list argp;
+ va_list argp;
- va_start (argp, runner);
- runner_va_add_args (runner, argp);
- va_end (argp);
+ va_start(argp, runner);
+ runner_va_add_args(runner, argp);
+ va_end(argp);
}
void
-runner_argprintf (runner_t *runner, const char *format, ...)
+runner_argprintf(runner_t *runner, const char *format, ...)
{
- va_list argva;
- char *arg = NULL;
- int ret = 0;
+ va_list argva;
+ char *arg = NULL;
+ int ret = 0;
- va_start (argva, format);
- ret = gf_vasprintf (&arg, format, argva);
- va_end (argva);
+ va_start(argva, format);
+ ret = gf_vasprintf(&arg, format, argva);
+ va_end(argva);
- if (ret < 0) {
- runner->runerr = errno;
- return;
- }
+ if (ret < 0) {
+ runner->runerr = errno;
+ return;
+ }
- runner_insert_arg (runner, arg);
+ runner_insert_arg(runner, arg);
}
void
-runner_log (runner_t *runner, const char *dom, gf_loglevel_t lvl,
- const char *msg)
+runner_log(runner_t *runner, const char *dom, gf_loglevel_t lvl,
+ const char *msg)
{
- char *buf = NULL;
- size_t len = 0;
- int i = 0;
-
- if (runner->runerr)
- return;
-
- for (i = 0;; i++) {
- if (runner->argv[i] == NULL)
- break;
- len += (strlen (runner->argv[i]) + 1);
- }
-
- buf = GF_CALLOC (1, len + 1, gf_common_mt_run_logbuf);
- if (!buf) {
- runner->runerr = errno;
- return;
- }
- for (i = 0;; i++) {
- if (runner->argv[i] == NULL)
- break;
- strcat (buf, runner->argv[i]);
- strcat (buf, " ");
- }
- if (len > 0)
- buf[len - 1] = '\0';
-
- gf_log (dom, lvl, "%s: %s", msg, buf);
-
- GF_FREE (buf);
+ char *buf = NULL;
+ size_t len = 0;
+ int i = 0;
+
+ if (runner->runerr)
+ return;
+
+ for (i = 0;; i++) {
+ if (runner->argv[i] == NULL)
+ break;
+ len += (strlen(runner->argv[i]) + 1);
+ }
+
+ buf = GF_CALLOC(1, len + 1, gf_common_mt_run_logbuf);
+ if (!buf) {
+ runner->runerr = errno;
+ return;
+ }
+ for (i = 0;; i++) {
+ if (runner->argv[i] == NULL)
+ break;
+ strcat(buf, runner->argv[i]);
+ strcat(buf, " ");
+ }
+ if (len > 0)
+ buf[len - 1] = '\0';
+
+ gf_msg_callingfn(dom, lvl, 0, LG_MSG_RUNNER_LOG, "%s: %s", msg, buf);
+
+ GF_FREE(buf);
}
void
-runner_redir (runner_t *runner, int fd, int tgt_fd)
+runner_redir(runner_t *runner, int fd, int tgt_fd)
{
- GF_ASSERT (fd > 0 && fd < 3);
+ GF_ASSERT(fd > 0 && fd < 3);
+ if ((fd > 0) && (fd < 3))
runner->chfd[fd] = (tgt_fd >= 0) ? tgt_fd : -2;
}
int
-runner_start (runner_t *runner)
+runner_start(runner_t *runner)
{
- int pi[3][2] = {{-1, -1}, {-1, -1}, {-1, -1}};
- int xpi[2];
- int ret = 0;
- int errno_priv = 0;
- int i = 0;
- sigset_t set;
-
- if (runner->runerr) {
- errno = runner->runerr;
- return -1;
- }
-
- GF_ASSERT (runner->argv[0]);
-
- /* set up a channel to child to communicate back
- * possible execve(2) failures
- */
- ret = pipe(xpi);
- if (ret != -1)
- ret = fcntl (xpi[1], F_SETFD, FD_CLOEXEC);
-
- for (i = 0; i < 3; i++) {
- if (runner->chfd[i] != -2)
- continue;
- ret = pipe (pi[i]);
- if (ret != -1) {
- runner->chio[i] = fdopen (pi[i][i ? 0 : 1], i ? "r" : "w");
- if (!runner->chio[i])
- ret = -1;
- }
+ int pi[3][2] = {{-1, -1}, {-1, -1}, {-1, -1}};
+ int xpi[2];
+ int ret = 0;
+ int errno_priv = 0;
+ int i = 0;
+ sigset_t set;
+
+ if (runner->runerr || !runner->argv) {
+ errno = (runner->runerr) ? runner->runerr : EINVAL;
+ return -1;
+ }
+
+ GF_ASSERT(runner->argv[0]);
+
+ /* set up a channel to child to communicate back
+ * possible execve(2) failures
+ */
+ ret = pipe(xpi);
+ if (ret != -1)
+ ret = fcntl(xpi[1], F_SETFD, FD_CLOEXEC);
+
+ for (i = 0; i < 3; i++) {
+ if (runner->chfd[i] != -2)
+ continue;
+ ret = pipe(pi[i]);
+ if (ret != -1) {
+ runner->chio[i] = fdopen(pi[i][i ? 0 : 1], i ? "r" : "w");
+ if (!runner->chio[i])
+ ret = -1;
}
+ }
- if (ret != -1)
- runner->chpid = fork ();
- switch (runner->chpid) {
+ if (ret != -1)
+ runner->chpid = fork();
+ switch (runner->chpid) {
case -1:
- errno_priv = errno;
- close (xpi[0]);
- close (xpi[1]);
- for (i = 0; i < 3; i++) {
- close (pi[i][0]);
- close (pi[i][1]);
- }
- errno = errno_priv;
- return -1;
+ errno_priv = errno;
+ sys_close(xpi[0]);
+ sys_close(xpi[1]);
+ for (i = 0; i < 3; i++) {
+ sys_close(pi[i][0]);
+ sys_close(pi[i][1]);
+ }
+ errno = errno_priv;
+ return -1;
case 0:
- for (i = 0; i < 3; i++)
- close (pi[i][i ? 0 : 1]);
- close (xpi[0]);
- ret = 0;
-
- for (i = 0; i < 3; i++) {
- if (ret == -1)
- break;
- switch (runner->chfd[i]) {
- case -1:
- /* no redir */
- break;
- case -2:
- /* redir to pipe */
- ret = dup2 (pi[i][i ? 1 : 0], i);
- break;
- default:
- /* redir to file */
- ret = dup2 (runner->chfd[i], i);
- }
- }
-
- if (ret != -1 ) {
-#ifdef GF_LINUX_HOST_OS
- DIR *d = NULL;
- struct dirent *de = NULL;
- char *e = NULL;
-
- d = opendir ("/proc/self/fd");
- if (d) {
- while ((de = readdir (d))) {
- i = strtoul (de->d_name, &e, 10);
- if (*e == '\0' && i > 2 &&
- i != dirfd (d) && i != xpi[1])
- close (i);
- }
- closedir (d);
- } else
- ret = -1;
-#else
- struct rlimit rl;
- ret = getrlimit (RLIMIT_NOFILE, &rl);
- GF_ASSERT (ret == 0);
-
- for (i = 3; i < rl.rlim_cur; i++) {
- if (i != xpi[1])
- close (i);
- }
-#endif
- }
-
- if (ret != -1) {
- /* save child from inheriting our singal handling */
- sigemptyset (&set);
- sigprocmask (SIG_SETMASK, &set, NULL);
-
- execvp (runner->argv[0], runner->argv);
- }
- ret = write (xpi[1], &errno, sizeof (errno));
- _exit (1);
- }
-
- errno_priv = errno;
- for (i = 0; i < 3; i++)
- close (pi[i][i ? 1 : 0]);
- close (xpi[1]);
- if (ret == -1) {
- for (i = 0; i < 3; i++) {
- if (runner->chio[i]) {
- fclose (runner->chio[i]);
- runner->chio[i] = NULL;
- }
+ for (i = 0; i < 3; i++)
+ sys_close(pi[i][i ? 0 : 1]);
+ sys_close(xpi[0]);
+ ret = 0;
+
+ for (i = 0; i < 3; i++) {
+ if (ret == -1)
+ break;
+ switch (runner->chfd[i]) {
+ case -1:
+ /* no redir */
+ break;
+ case -2:
+ /* redir to pipe */
+ ret = dup2(pi[i][i ? 1 : 0], i);
+ break;
+ default:
+ /* redir to file */
+ ret = dup2(runner->chfd[i], i);
}
- } else {
- ret = read (xpi[0], (char *)&errno_priv, sizeof (errno_priv));
- close (xpi[0]);
- if (ret <= 0)
- return 0;
- GF_ASSERT (ret == sizeof (errno_priv));
+ }
+
+ if (ret != -1) {
+ int fdv[4] = {0, 1, 2, xpi[1]};
+
+ ret = close_fds_except(fdv, sizeof(fdv) / sizeof(*fdv));
+ }
+
+ if (ret != -1) {
+ /* save child from inheriting our signal handling */
+ sigemptyset(&set);
+ sigprocmask(SIG_SETMASK, &set, NULL);
+
+ execvp(runner->argv[0], runner->argv);
+ }
+ ret = sys_write(xpi[1], &errno, sizeof(errno));
+ _exit(1);
+ }
+
+ errno_priv = errno;
+ for (i = 0; i < 3; i++)
+ sys_close(pi[i][i ? 1 : 0]);
+ sys_close(xpi[1]);
+ if (ret == -1) {
+ for (i = 0; i < 3; i++) {
+ if (runner->chio[i]) {
+ fclose(runner->chio[i]);
+ runner->chio[i] = NULL;
+ }
}
- errno = errno_priv;
- return -1;
+ } else {
+ ret = sys_read(xpi[0], (char *)&errno_priv, sizeof(errno_priv));
+ sys_close(xpi[0]);
+ if (ret <= 0)
+ return 0;
+ GF_ASSERT(ret == sizeof(errno_priv));
+ }
+ errno = errno_priv;
+ return -1;
}
int
-runner_end_reuse (runner_t *runner)
+runner_end_reuse(runner_t *runner)
{
- int i = 0;
- int ret = -1;
- int chstat = 0;
-
- if (runner->chpid > 0) {
- if (waitpid (runner->chpid, &chstat, 0) == runner->chpid)
- ret = chstat;
+ int i = 0;
+ int ret = 1;
+ int chstat = 0;
+
+ if (runner->chpid > 0) {
+ if (waitpid(runner->chpid, &chstat, 0) == runner->chpid) {
+ if (WIFEXITED(chstat)) {
+ ret = WEXITSTATUS(chstat);
+ } else {
+ ret = chstat;
+ }
}
+ }
- for (i = 0; i < 3; i++) {
- if (runner->chio[i]) {
- fclose (runner->chio[i]);
- runner->chio[i] = NULL;
- }
+ for (i = 0; i < 3; i++) {
+ if (runner->chio[i]) {
+ fclose(runner->chio[i]);
+ runner->chio[i] = NULL;
}
+ }
- return ret;
+ return -ret;
}
int
-runner_end (runner_t *runner)
+runner_end(runner_t *runner)
{
- int i = 0;
- int ret = -1;
- char **p = NULL;
+ int i = 0;
+ int ret = -1;
+ char **p = NULL;
- ret = runner_end_reuse (runner);
+ ret = runner_end_reuse(runner);
- if (runner->argv) {
- for (p = runner->argv; *p; p++)
- GF_FREE (*p);
- GF_FREE (runner->argv);
- }
- for (i = 0; i < 3; i++)
- close (runner->chfd[i]);
+ if (runner->argv) {
+ for (p = runner->argv; *p; p++)
+ GF_FREE(*p);
+ GF_FREE(runner->argv);
+ }
+ for (i = 0; i < 3; i++)
+ sys_close(runner->chfd[i]);
- return ret;
+ return ret;
}
static int
-runner_run_generic (runner_t *runner, int (*rfin)(runner_t *runner))
+runner_run_generic(runner_t *runner, int (*rfin)(runner_t *runner))
{
- int ret = 0;
+ int ret = 0;
- ret = runner_start (runner);
+ ret = runner_start(runner);
+ if (ret)
+ goto out;
+ ret = rfin(runner);
- return -(rfin (runner) || ret);
+out:
+ return ret;
}
int
-runner_run (runner_t *runner)
+runner_run(runner_t *runner)
{
- return runner_run_generic (runner, runner_end);
+ return runner_run_generic(runner, runner_end);
}
int
-runner_run_reuse (runner_t *runner)
+runner_run_nowait(runner_t *runner)
{
- return runner_run_generic (runner, runner_end_reuse);
+ int pid;
+
+ pid = fork();
+
+ if (!pid) {
+ setsid();
+ _exit(runner_start(runner));
+ }
+
+ if (pid > 0)
+ runner->chpid = pid;
+ return runner_end(runner);
}
int
-runcmd (const char *arg, ...)
+runner_run_reuse(runner_t *runner)
{
- runner_t runner;
- va_list argp;
+ return runner_run_generic(runner, runner_end_reuse);
+}
- runinit (&runner);
- /* ISO C requires a named argument before '...' */
- runner_add_arg (&runner, arg);
+int
+runcmd(const char *arg, ...)
+{
+ runner_t runner;
+ va_list argp;
+
+ runinit(&runner);
+ /* ISO C requires a named argument before '...' */
+ runner_add_arg(&runner, arg);
- va_start (argp, arg);
- runner_va_add_args (&runner, argp);
- va_end (argp);
+ va_start(argp, arg);
+ runner_va_add_args(&runner, argp);
+ va_end(argp);
- return runner_run (&runner);
+ return runner_run(&runner);
}
-#ifdef RUN_DO_TESTS
+#ifdef RUN_DO_DEMO
static void
-TBANNER (const char *txt)
+TBANNER(const char *txt)
{
- printf("######\n### testing %s\n", txt);
+ printf("######\n### demoing %s\n", txt);
}
int
-main (int argc, char **argv)
+main(int argc, char **argv)
{
- runner_t runner;
- char buf[80];
- char *wdbuf;;
- int ret;
- int fd;
- long pathmax = pathconf ("/", _PC_PATH_MAX);
- struct timeval tv = {0,};
- struct timeval *tvp = NULL;
-
- wdbuf = malloc (pathmax);
- assert (wdbuf);
- getcwd (wdbuf, pathmax);
-
- TBANNER ("basic functionality");
- runcmd ("echo", "a", "b", NULL);
-
- TBANNER ("argv extension");
- runcmd ("echo", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10",
- "11", "12", "13", "14", "15", "16", "17", "18", "19", "20",
- "21", "22", "23", "24", "25", "26", "27", "28", "29", "30",
- "31", "32", "33", "34", "35", "36", "37", "38", "39", "40",
- "41", "42", "43", "44", "45", "46", "47", "48", "49", "50",
- "51", "52", "53", "54", "55", "56", "57", "58", "59", "60",
- "61", "62", "63", "64", "65", "66", "67", "68", "69", "70",
- "71", "72", "73", "74", "75", "76", "77", "78", "79", "80",
- "81", "82", "83", "84", "85", "86", "87", "88", "89", "90",
- "91", "92", "93", "94", "95", "96", "97", "98", "99", "100", NULL);
-
- TBANNER ("add_args, argprintf, log, and popen-style functionality");
- runinit (&runner);
- runner_add_args (&runner, "echo", "pid:", NULL);
- runner_argprintf (&runner, "%d\n", getpid());
- runner_add_arg (&runner, "wd:");
- runner_add_arg (&runner, wdbuf);
- runner_redir (&runner, 1, RUN_PIPE);
- runner_start (&runner);
- runner_log (&runner, "(x)", LOG_DEBUG, "starting program");
- while (fgets (buf, sizeof(buf), runner_chio (&runner, 1)))
- printf ("got: %s", buf);
- runner_end (&runner);
-
- TBANNER ("execve error reporting");
- ret = runcmd ("bafflavvitty", NULL);
- printf ("%d %d [%s]\n", ret, errno, strerror (errno));
-
- TBANNER ("output redirection");
- fd = open ("/tmp/foof", O_WRONLY|O_CREAT|O_TRUNC, 0600);
- assert (fd != -1);
- runinit (&runner);
- runner_add_args (&runner, "echo", "foo", NULL);
- runner_redir (&runner, 1, fd);
- ret = runner_run (&runner);
- printf ("%d", ret);
- if (ret != 0)
- printf (" %d [%s]", errno, strerror (errno));
- putchar ('\n');
-
- if (argc > 1) {
- tv.tv_sec = strtoul (argv[1], NULL, 10);
- if (tv.tv_sec > 0)
- tvp = &tv;
- select (0, 0, 0, 0, tvp);
- }
-
- return 0;
+ runner_t runner;
+ char buf[80];
+ char *wdbuf;
+ ;
+ int ret;
+ int fd;
+ long pathmax = pathconf("/", _PC_PATH_MAX);
+ struct timeval tv = {
+ 0,
+ };
+ struct timeval *tvp = NULL;
+ char *tfile;
+
+ wdbuf = malloc(pathmax);
+ assert(wdbuf);
+ getcwd(wdbuf, pathmax);
+
+ TBANNER("basic functionality: running \"echo a b\"");
+ runcmd("echo", "a", "b", NULL);
+
+ TBANNER("argv extension: running \"echo 1 2 ... 100\"");
+ runcmd("echo", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11",
+ "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22",
+ "23", "24", "25", "26", "27", "28", "29", "30", "31", "32", "33",
+ "34", "35", "36", "37", "38", "39", "40", "41", "42", "43", "44",
+ "45", "46", "47", "48", "49", "50", "51", "52", "53", "54", "55",
+ "56", "57", "58", "59", "60", "61", "62", "63", "64", "65", "66",
+ "67", "68", "69", "70", "71", "72", "73", "74", "75", "76", "77",
+ "78", "79", "80", "81", "82", "83", "84", "85", "86", "87", "88",
+ "89", "90", "91", "92", "93", "94", "95", "96", "97", "98", "99",
+ "100", NULL);
+
+ TBANNER(
+ "add_args, argprintf, log, and popen-style functionality:\n"
+ " running a multiline echo command, emit a log about it,\n"
+ " redirect it to a pipe, read output lines\n"
+ " and print them prefixed with \"got: \"");
+ runinit(&runner);
+ runner_add_args(&runner, "echo", "pid:", NULL);
+ runner_argprintf(&runner, "%d\n", getpid());
+ runner_add_arg(&runner, "wd:");
+ runner_add_arg(&runner, wdbuf);
+ runner_redir(&runner, 1, RUN_PIPE);
+ runner_start(&runner);
+ runner_log(&runner, "(x)", LOG_DEBUG, "starting program");
+ while (fgets(buf, sizeof(buf), runner_chio(&runner, 1)))
+ printf("got: %s", buf);
+ runner_end(&runner);
+
+ TBANNER("execve error reporting: running a non-existent command");
+ ret = runcmd("bafflavvitty", NULL);
+ printf("%d %d [%s]\n", ret, errno, strerror(errno));
+
+ TBANNER(
+ "output redirection: running \"echo foo\" redirected "
+ "to a temp file");
+ tfile = strdup("/tmp/foofXXXXXX");
+ assert(tfile);
+ fd = mkstemp(tfile);
+ assert(fd != -1);
+ printf("redirecting to %s\n", tfile);
+ runinit(&runner);
+ runner_add_args(&runner, "echo", "foo", NULL);
+ runner_redir(&runner, 1, fd);
+ ret = runner_run(&runner);
+ printf("runner_run returned: %d", ret);
+ if (ret != 0)
+ printf(", with errno %d [%s]", errno, strerror(errno));
+ putchar('\n');
+
+ /* sleep for seconds given as argument (0 means forever)
+ * to allow investigation of post-execution state to
+ * cbeck for resource leaks (eg. zombies).
+ */
+ if (argc > 1) {
+ tv.tv_sec = strtoul(argv[1], NULL, 10);
+ printf("### %s", "sleeping for");
+ if (tv.tv_sec > 0) {
+ printf(" %d seconds\n", tv.tv_sec);
+ tvp = &tv;
+ } else
+ printf("%s\n", "ever");
+ select(0, 0, 0, 0, tvp);
+ }
+
+ return 0;
}
#endif
diff --git a/libglusterfs/src/scheduler.c b/libglusterfs/src/scheduler.c
deleted file mode 100644
index 9817f3e26b6..00000000000
--- a/libglusterfs/src/scheduler.c
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <dlfcn.h>
-#include <netdb.h>
-#include "xlator.h"
-#include "scheduler.h"
-#include "list.h"
-
-struct sched_ops *
-get_scheduler (xlator_t *xl, const char *name)
-{
- struct sched_ops *tmp_sched = NULL;
- volume_opt_list_t *vol_opt = NULL;
- char *sched_file = NULL;
- void *handle = NULL;
- int ret = 0;
-
- if (name == NULL) {
- gf_log ("scheduler", GF_LOG_ERROR,
- "'name' not specified, EINVAL");
- return NULL;
- }
-
- ret = gf_asprintf (&sched_file, "%s/%s.so", SCHEDULERDIR, name);
- if (-1 == ret) {
- gf_log ("scheduler", GF_LOG_ERROR, "asprintf failed");
- return NULL;
- }
-
- gf_log ("scheduler", GF_LOG_DEBUG,
- "attempt to load file %s.so", name);
-
- handle = dlopen (sched_file, RTLD_LAZY);
- if (!handle) {
- gf_log ("scheduler", GF_LOG_ERROR,
- "dlopen(%s): %s", sched_file, dlerror ());
- GF_FREE(sched_file);
- return NULL;
- }
-
- tmp_sched = dlsym (handle, "sched");
- if (!tmp_sched) {
- gf_log ("scheduler", GF_LOG_ERROR,
- "dlsym(sched) on %s", dlerror ());
- GF_FREE(sched_file);
- return NULL;
- }
-
- vol_opt = GF_CALLOC (1, sizeof (volume_opt_list_t),
- gf_common_mt_volume_opt_list_t);
- vol_opt->given_opt = dlsym (handle, "options");
- if (vol_opt->given_opt == NULL) {
- gf_log ("scheduler", GF_LOG_DEBUG,
- "volume option validation not specified");
- } else {
- list_add_tail (&vol_opt->list, &xl->volume_options);
- if (validate_xlator_volume_options (xl, vol_opt->given_opt)
- == -1) {
- gf_log ("scheduler", GF_LOG_ERROR,
- "volume option validation failed");
- GF_FREE(sched_file);
- return NULL;
- }
- }
- GF_FREE(sched_file);
- GF_FREE (vol_opt);
-
- return tmp_sched;
-}
diff --git a/libglusterfs/src/scheduler.h b/libglusterfs/src/scheduler.h
deleted file mode 100644
index 2f1e12205df..00000000000
--- a/libglusterfs/src/scheduler.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _SCHEDULER_H
-#define _SCHEDULER_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "xlator.h"
-
-struct sched_ops {
- int32_t (*init) (xlator_t *this);
- void (*fini) (xlator_t *this);
- void (*update) (xlator_t *this);
- xlator_t *(*schedule) (xlator_t *this, const void *path);
- void (*notify) (xlator_t *xl, int32_t event, void *data);
- int32_t (*mem_acct_init) (xlator_t *this);
-};
-
-extern struct sched_ops *get_scheduler (xlator_t *xl, const char *name);
-
-#endif /* _SCHEDULER_H */
diff --git a/libglusterfs/src/stack.c b/libglusterfs/src/stack.c
index f922be83c7f..1531f0da43f 100644
--- a/libglusterfs/src/stack.c
+++ b/libglusterfs/src/stack.c
@@ -8,351 +8,445 @@
cases as published by the Free Software Foundation.
*/
-#include "statedump.h"
-#include "stack.h"
+#include "glusterfs/statedump.h"
+#include "glusterfs/stack.h"
+#include "glusterfs/libglusterfs-messages.h"
-static inline
-int call_frames_count (call_frame_t *call_frame)
+call_frame_t *
+create_frame(xlator_t *xl, call_pool_t *pool)
{
- call_frame_t *pos;
- int32_t count = 0;
-
- if (!call_frame)
- return count;
-
- for (pos = call_frame; pos != NULL; pos = pos->next)
- count++;
+ call_stack_t *stack = NULL;
+ call_frame_t *frame = NULL;
+ static uint64_t unique = 0;
+
+ if (!xl || !pool) {
+ return NULL;
+ }
+
+ stack = mem_get0(pool->stack_mem_pool);
+ if (!stack)
+ return NULL;
+
+ INIT_LIST_HEAD(&stack->myframes);
+
+ frame = mem_get0(pool->frame_mem_pool);
+ if (!frame) {
+ mem_put(stack);
+ return NULL;
+ }
+
+ frame->root = stack;
+ frame->this = xl;
+ LOCK_INIT(&frame->lock);
+ INIT_LIST_HEAD(&frame->frames);
+ list_add(&frame->frames, &stack->myframes);
+
+ stack->pool = pool;
+ stack->ctx = xl->ctx;
+
+ if (frame->root->ctx->measure_latency) {
+ timespec_now(&stack->tv);
+ memcpy(&frame->begin, &stack->tv, sizeof(stack->tv));
+ }
+
+ LOCK(&pool->lock);
+ {
+ list_add(&stack->all_frames, &pool->all_frames);
+ pool->cnt++;
+ stack->unique = unique++;
+ }
+ UNLOCK(&pool->lock);
+ GF_ATOMIC_INC(pool->total_count);
+
+ LOCK_INIT(&stack->stack_lock);
+
+ return frame;
+}
- return count;
+void
+call_stack_set_groups(call_stack_t *stack, int ngrps, gid_t **groupbuf_p)
+{
+ /* We take the ownership of the passed group buffer. */
+
+ if (ngrps <= SMALL_GROUP_COUNT) {
+ memcpy(stack->groups_small, *groupbuf_p, sizeof(gid_t) * ngrps);
+ stack->groups = stack->groups_small;
+ GF_FREE(*groupbuf_p);
+ } else {
+ stack->groups_large = *groupbuf_p;
+ stack->groups = stack->groups_large;
+ }
+
+ stack->ngrps = ngrps;
+ /* Set a canary. */
+ *groupbuf_p = (void *)0xdeadf00d;
}
void
-gf_proc_dump_call_frame (call_frame_t *call_frame, const char *key_buf,...)
+gf_proc_dump_call_frame(call_frame_t *call_frame, const char *key_buf, ...)
{
+ char prefix[GF_DUMP_MAX_BUF_LEN];
+ va_list ap;
+ call_frame_t my_frame = {
+ 0,
+ };
+
+ int ret = -1;
+ char timestr[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+ int len;
+
+ if (!call_frame)
+ return;
- char prefix[GF_DUMP_MAX_BUF_LEN];
- va_list ap;
- call_frame_t my_frame;
- int ret = -1;
+ GF_ASSERT(key_buf);
- if (!call_frame)
- return;
+ va_start(ap, key_buf);
+ vsnprintf(prefix, GF_DUMP_MAX_BUF_LEN, key_buf, ap);
+ va_end(ap);
- GF_ASSERT (key_buf);
+ ret = TRY_LOCK(&call_frame->lock);
+ if (ret)
+ goto out;
- memset(prefix, 0, sizeof(prefix));
- memset(&my_frame, 0, sizeof(my_frame));
- va_start(ap, key_buf);
- vsnprintf(prefix, GF_DUMP_MAX_BUF_LEN, key_buf, ap);
- va_end(ap);
+ memcpy(&my_frame, call_frame, sizeof(my_frame));
+ UNLOCK(&call_frame->lock);
- ret = TRY_LOCK(&call_frame->lock);
- if (ret) {
- gf_log("", GF_LOG_WARNING, "Unable to dump call frame"
- " errno: %s", strerror (errno));
- return;
- }
+ if (my_frame.root->ctx->measure_latency) {
+ gf_time_fmt(timestr, sizeof(timestr), my_frame.begin.tv_sec,
+ gf_timefmt_FT);
+ len = strlen(timestr);
+ snprintf(timestr + len, sizeof(timestr) - len, ".%" GF_PRI_SNSECONDS,
+ my_frame.begin.tv_nsec);
+ gf_proc_dump_write("frame-creation-time", "%s", timestr);
+ gf_proc_dump_write(
+ "timings", "%ld.%" GF_PRI_SNSECONDS " -> %ld.%" GF_PRI_SNSECONDS,
+ my_frame.begin.tv_sec, my_frame.begin.tv_nsec, my_frame.end.tv_sec,
+ my_frame.end.tv_nsec);
+ }
- memcpy(&my_frame, call_frame, sizeof(my_frame));
- UNLOCK(&call_frame->lock);
+ gf_proc_dump_write("frame", "%p", call_frame);
+ gf_proc_dump_write("ref_count", "%d", my_frame.ref_count);
+ gf_proc_dump_write("translator", "%s", my_frame.this->name);
+ gf_proc_dump_write("complete", "%d", my_frame.complete);
- gf_proc_dump_write("ref_count", "%d", my_frame.ref_count);
- gf_proc_dump_write("translator", "%s", my_frame.this->name);
- gf_proc_dump_write("complete", "%d", my_frame.complete);
- if (my_frame.parent)
- gf_proc_dump_write("parent", "%s", my_frame.parent->this->name);
+ if (my_frame.parent)
+ gf_proc_dump_write("parent", "%s", my_frame.parent->this->name);
- if (my_frame.wind_from)
- gf_proc_dump_write("wind_from", "%s", my_frame.wind_from);
+ if (my_frame.wind_from)
+ gf_proc_dump_write("wind_from", "%s", my_frame.wind_from);
- if (my_frame.wind_to)
- gf_proc_dump_write("wind_to", "%s", my_frame.wind_to);
+ if (my_frame.wind_to)
+ gf_proc_dump_write("wind_to", "%s", my_frame.wind_to);
- if (my_frame.unwind_from)
- gf_proc_dump_write("unwind_from", "%s", my_frame.unwind_from);
+ if (my_frame.unwind_from)
+ gf_proc_dump_write("unwind_from", "%s", my_frame.unwind_from);
- if (my_frame.unwind_to)
- gf_proc_dump_write("unwind_to", "%s", my_frame.unwind_to);
-}
+ if (my_frame.unwind_to)
+ gf_proc_dump_write("unwind_to", "%s", my_frame.unwind_to);
+ ret = 0;
+out:
+ if (ret) {
+ gf_proc_dump_write("Unable to dump the frame information",
+ "(Lock acquisition failed)");
+ return;
+ }
+}
void
-gf_proc_dump_call_stack (call_stack_t *call_stack, const char *key_buf,...)
+gf_proc_dump_call_stack(call_stack_t *call_stack, const char *key_buf, ...)
{
- char prefix[GF_DUMP_MAX_BUF_LEN];
- va_list ap;
- call_frame_t *trav;
- int32_t cnt, i;
-
- if (!call_stack)
- return;
-
- GF_ASSERT (key_buf);
-
- cnt = call_frames_count(&call_stack->frames);
-
- memset(prefix, 0, sizeof(prefix));
- va_start(ap, key_buf);
- vsnprintf(prefix, GF_DUMP_MAX_BUF_LEN, key_buf, ap);
- va_end(ap);
-
- gf_proc_dump_write("uid", "%d", call_stack->uid);
- gf_proc_dump_write("gid", "%d", call_stack->gid);
- gf_proc_dump_write("pid", "%d", call_stack->pid);
- gf_proc_dump_write("unique", "%Ld", call_stack->unique);
- gf_proc_dump_write("lk-owner", "%s", lkowner_utoa (&call_stack->lk_owner));
-
- if (call_stack->type == GF_OP_TYPE_FOP)
- gf_proc_dump_write("op", "%s", gf_fop_list[call_stack->op]);
- else if (call_stack->type == GF_OP_TYPE_MGMT)
- gf_proc_dump_write("op", "%s", gf_mgmt_list[call_stack->op]);
-
- gf_proc_dump_write("type", "%d", call_stack->type);
- gf_proc_dump_write("cnt", "%d", cnt);
-
- trav = &call_stack->frames;
-
- for (i = 1; i <= cnt; i++) {
- if (trav) {
- gf_proc_dump_add_section("%s.frame.%d", prefix, i);
- gf_proc_dump_call_frame(trav, "%s.frame.%d", prefix, i);
- trav = trav->next;
- }
- }
+ char prefix[GF_DUMP_MAX_BUF_LEN];
+ va_list ap;
+ call_frame_t *trav;
+ int32_t i = 1, cnt = 0;
+ char timestr[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+ int len;
+
+ if (!call_stack)
+ return;
+
+ GF_ASSERT(key_buf);
+
+ va_start(ap, key_buf);
+ vsnprintf(prefix, GF_DUMP_MAX_BUF_LEN, key_buf, ap);
+ va_end(ap);
+
+ cnt = call_frames_count(call_stack);
+ gf_time_fmt(timestr, sizeof(timestr), call_stack->tv.tv_sec, gf_timefmt_FT);
+ len = strlen(timestr);
+ snprintf(timestr + len, sizeof(timestr) - len, ".%" GF_PRI_SNSECONDS,
+ call_stack->tv.tv_nsec);
+ gf_proc_dump_write("callstack-creation-time", "%s", timestr);
+
+ gf_proc_dump_write("stack", "%p", call_stack);
+ gf_proc_dump_write("uid", "%d", call_stack->uid);
+ gf_proc_dump_write("gid", "%d", call_stack->gid);
+ gf_proc_dump_write("pid", "%d", call_stack->pid);
+ gf_proc_dump_write("unique", "%" PRIu64, call_stack->unique);
+ gf_proc_dump_write("lk-owner", "%s", lkowner_utoa(&call_stack->lk_owner));
+ gf_proc_dump_write("ctime", "%" GF_PRI_SECOND ".%" GF_PRI_SNSECONDS,
+ call_stack->tv.tv_sec, call_stack->tv.tv_nsec);
+
+ if (call_stack->type == GF_OP_TYPE_FOP)
+ gf_proc_dump_write("op", "%s", (char *)gf_fop_list[call_stack->op]);
+ else
+ gf_proc_dump_write("op", "stack");
+
+ gf_proc_dump_write("type", "%d", call_stack->type);
+ gf_proc_dump_write("cnt", "%d", cnt);
+
+ list_for_each_entry(trav, &call_stack->myframes, frames)
+ {
+ gf_proc_dump_add_section("%s.frame.%d", prefix, i);
+ gf_proc_dump_call_frame(trav, "%s.frame.%d", prefix, i);
+ i++;
+ }
}
void
-gf_proc_dump_pending_frames (call_pool_t *call_pool)
+gf_proc_dump_pending_frames(call_pool_t *call_pool)
{
+ call_stack_t *trav = NULL;
+ int i = 1;
+ int ret = -1;
+ gf_boolean_t section_added = _gf_false;
- call_stack_t *trav = NULL;
- int i = 1;
- int ret = -1;
-
- if (!call_pool)
- return;
-
- ret = TRY_LOCK (&(call_pool->lock));
- if (ret) {
- gf_log("", GF_LOG_WARNING, "Unable to dump call pool"
- " errno: %d", errno);
- return;
- }
+ if (!call_pool)
+ return;
+ ret = TRY_LOCK(&(call_pool->lock));
+ if (ret)
+ goto out;
- gf_proc_dump_add_section("global.callpool");
- gf_proc_dump_write("callpool_address","%p", call_pool);
- gf_proc_dump_write("callpool.cnt","%d", call_pool->cnt);
+ gf_proc_dump_add_section("global.callpool");
+ section_added = _gf_true;
+ gf_proc_dump_write("callpool_address", "%p", call_pool);
+ gf_proc_dump_write("callpool.cnt", "%" PRId64, call_pool->cnt);
+ list_for_each_entry(trav, &call_pool->all_frames, all_frames)
+ {
+ gf_proc_dump_add_section("global.callpool.stack.%d", i);
+ gf_proc_dump_call_stack(trav, "global.callpool.stack.%d", i);
+ i++;
+ }
+ UNLOCK(&(call_pool->lock));
- list_for_each_entry (trav, &call_pool->all_frames, all_frames) {
- gf_proc_dump_add_section("global.callpool.stack.%d",i);
- gf_proc_dump_call_stack(trav, "global.callpool.stack.%d", i);
- i++;
- }
- UNLOCK (&(call_pool->lock));
+ ret = 0;
+out:
+ if (ret) {
+ if (_gf_false == section_added)
+ gf_proc_dump_add_section("global.callpool");
+ gf_proc_dump_write("Unable to dump the callpool",
+ "(Lock acquisition failed) %p", call_pool);
+ }
+ return;
}
void
-gf_proc_dump_call_frame_to_dict (call_frame_t *call_frame,
- char *prefix, dict_t *dict)
+gf_proc_dump_call_frame_to_dict(call_frame_t *call_frame, char *prefix,
+ dict_t *dict)
{
- int ret = -1;
- char key[GF_DUMP_MAX_BUF_LEN] = {0,};
- call_frame_t tmp_frame = {0,};
+ int ret = -1;
+ char key[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+ char msg[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+ call_frame_t tmp_frame = {
+ 0,
+ };
+
+ if (!call_frame || !dict)
+ return;
- if (!call_frame || !dict)
- return;
+ ret = TRY_LOCK(&call_frame->lock);
+ if (ret)
+ return;
+ memcpy(&tmp_frame, call_frame, sizeof(tmp_frame));
+ UNLOCK(&call_frame->lock);
+
+ snprintf(key, sizeof(key), "%s.refcount", prefix);
+ ret = dict_set_int32(dict, key, tmp_frame.ref_count);
+ if (ret)
+ return;
- ret = TRY_LOCK (&call_frame->lock);
+ snprintf(key, sizeof(key), "%s.translator", prefix);
+ ret = dict_set_dynstr(dict, key, gf_strdup(tmp_frame.this->name));
+ if (ret)
+ return;
+
+ snprintf(key, sizeof(key), "%s.complete", prefix);
+ ret = dict_set_int32(dict, key, tmp_frame.complete);
+ if (ret)
+ return;
+
+ if (tmp_frame.root->ctx->measure_latency) {
+ snprintf(key, sizeof(key), "%s.timings", prefix);
+ snprintf(msg, sizeof(msg),
+ "%ld.%" GF_PRI_SNSECONDS " -> %ld.%" GF_PRI_SNSECONDS,
+ tmp_frame.begin.tv_sec, tmp_frame.begin.tv_nsec,
+ tmp_frame.end.tv_sec, tmp_frame.end.tv_nsec);
+ ret = dict_set_str(dict, key, msg);
if (ret)
- return;
- memcpy (&tmp_frame, call_frame, sizeof (tmp_frame));
- UNLOCK (&call_frame->lock);
+ return;
+ }
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.refcount", prefix);
- ret = dict_set_int32 (dict, key, tmp_frame.ref_count);
+ if (tmp_frame.parent) {
+ snprintf(key, sizeof(key), "%s.parent", prefix);
+ ret = dict_set_dynstr(dict, key,
+ gf_strdup(tmp_frame.parent->this->name));
if (ret)
- return;
+ return;
+ }
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.translator", prefix);
- ret = dict_set_dynstr (dict, key, gf_strdup (tmp_frame.this->name));
+ if (tmp_frame.wind_from) {
+ snprintf(key, sizeof(key), "%s.windfrom", prefix);
+ ret = dict_set_dynstr(dict, key, gf_strdup(tmp_frame.wind_from));
if (ret)
- return;
+ return;
+ }
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.complete", prefix);
- ret = dict_set_int32 (dict, key, tmp_frame.complete);
+ if (tmp_frame.wind_to) {
+ snprintf(key, sizeof(key), "%s.windto", prefix);
+ ret = dict_set_dynstr(dict, key, gf_strdup(tmp_frame.wind_to));
if (ret)
- return;
-
- if (tmp_frame.parent) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.parent", prefix);
- ret = dict_set_dynstr (dict, key,
- gf_strdup (tmp_frame.parent->this->name));
- if (ret)
- return;
- }
-
- if (tmp_frame.wind_from) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.windfrom", prefix);
- ret = dict_set_dynstr (dict, key,
- gf_strdup (tmp_frame.wind_from));
- if (ret)
- return;
- }
-
- if (tmp_frame.wind_to) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.windto", prefix);
- ret = dict_set_dynstr (dict, key,
- gf_strdup (tmp_frame.wind_to));
- if (ret)
- return;
- }
-
- if (tmp_frame.unwind_from) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.unwindfrom", prefix);
- ret = dict_set_dynstr (dict, key,
- gf_strdup (tmp_frame.unwind_from));
- if (ret)
- return;
- }
-
- if (tmp_frame.unwind_to) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.unwind_to", prefix);
- ret = dict_set_dynstr (dict, key,
- gf_strdup (tmp_frame.unwind_to));
- }
+ return;
+ }
- return;
+ if (tmp_frame.unwind_from) {
+ snprintf(key, sizeof(key), "%s.unwindfrom", prefix);
+ ret = dict_set_dynstr(dict, key, gf_strdup(tmp_frame.unwind_from));
+ if (ret)
+ return;
+ }
+
+ if (tmp_frame.unwind_to) {
+ snprintf(key, sizeof(key), "%s.unwind_to", prefix);
+ ret = dict_set_dynstr(dict, key, gf_strdup(tmp_frame.unwind_to));
+ if (ret)
+ return;
+ }
+
+ return;
}
void
-gf_proc_dump_call_stack_to_dict (call_stack_t *call_stack,
- char *prefix, dict_t *dict)
+gf_proc_dump_call_stack_to_dict(call_stack_t *call_stack, char *prefix,
+ dict_t *dict)
{
- int ret = -1;
- char key[GF_DUMP_MAX_BUF_LEN] = {0,};
- call_frame_t *trav = NULL;
- int count = 0;
- int i = 0;
-
- if (!call_stack || !dict)
- return;
+ int ret = -1;
+ char key[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+ call_frame_t *trav = NULL;
+ int i = 0;
+ int count = 0;
+
+ if (!call_stack || !dict)
+ return;
- count = call_frames_count (&call_stack->frames);
+ count = call_frames_count(call_stack);
+ snprintf(key, sizeof(key), "%s.uid", prefix);
+ ret = dict_set_int32(dict, key, call_stack->uid);
+ if (ret)
+ return;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.uid", prefix);
- ret = dict_set_int32 (dict, key, call_stack->uid);
- if (ret)
- return;
+ snprintf(key, sizeof(key), "%s.gid", prefix);
+ ret = dict_set_int32(dict, key, call_stack->gid);
+ if (ret)
+ return;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.gid", prefix);
- ret = dict_set_int32 (dict, key, call_stack->gid);
- if (ret)
- return;
+ snprintf(key, sizeof(key), "%s.pid", prefix);
+ ret = dict_set_int32(dict, key, call_stack->pid);
+ if (ret)
+ return;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.pid", prefix);
- ret = dict_set_int32 (dict, key, call_stack->pid);
- if (ret)
- return;
+ snprintf(key, sizeof(key), "%s.unique", prefix);
+ ret = dict_set_uint64(dict, key, call_stack->unique);
+ if (ret)
+ return;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.unique", prefix);
- ret = dict_set_uint64 (dict, key, call_stack->unique);
- if (ret)
- return;
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.op", prefix);
- if (call_stack->type == GF_OP_TYPE_FOP)
- ret = dict_set_str (dict, key,
- gf_fop_list[call_stack->op]);
- else if (call_stack->type == GF_OP_TYPE_MGMT)
- ret = dict_set_str (dict, key,
- gf_mgmt_list[call_stack->op]);
- if (ret)
- return;
+ snprintf(key, sizeof(key), "%s.op", prefix);
+ if (call_stack->type == GF_OP_TYPE_FOP)
+ ret = dict_set_str(dict, key, (char *)gf_fop_list[call_stack->op]);
+ else
+ ret = dict_set_str(dict, key, "other");
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.type", prefix);
- ret = dict_set_int32 (dict, key, call_stack->type);
- if (ret)
- return;
+ if (ret)
+ return;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.count", prefix);
- ret = dict_set_int32 (dict, key, count);
- if (ret)
- return;
-
- trav = &call_stack->frames;
- for (i = 0; i < count; i++) {
- if (trav) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.frame%d",
- prefix, i);
- gf_proc_dump_call_frame_to_dict (trav, key, dict);
- trav = trav->next;
- }
- }
+ snprintf(key, sizeof(key), "%s.type", prefix);
+ ret = dict_set_int32(dict, key, call_stack->type);
+ if (ret)
+ return;
+ snprintf(key, sizeof(key), "%s.count", prefix);
+ ret = dict_set_int32(dict, key, count);
+ if (ret)
return;
+
+ list_for_each_entry(trav, &call_stack->myframes, frames)
+ {
+ snprintf(key, sizeof(key), "%s.frame%d", prefix, i);
+ gf_proc_dump_call_frame_to_dict(trav, key, dict);
+ i++;
+ }
+
+ return;
}
void
-gf_proc_dump_pending_frames_to_dict (call_pool_t *call_pool, dict_t *dict)
+gf_proc_dump_pending_frames_to_dict(call_pool_t *call_pool, dict_t *dict)
{
- int ret = -1;
- call_stack_t *trav = NULL;
- char key[GF_DUMP_MAX_BUF_LEN] = {0,};
- int i = 0;
-
- if (!call_pool || !dict)
- return;
-
- ret = TRY_LOCK (&call_pool->lock);
- if (ret) {
- gf_log (THIS->name, GF_LOG_WARNING, "Unable to dump call pool"
- " to dict. errno: %d", errno);
- return;
- }
-
- ret = dict_set_int32 (dict, "callpool.count", call_pool->cnt);
- if (ret)
- goto out;
+ int ret = -1;
+ call_stack_t *trav = NULL;
+ char key[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+ int i = 0;
+
+ if (!call_pool || !dict)
+ return;
+
+ ret = TRY_LOCK(&call_pool->lock);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_WARNING, errno, LG_MSG_LOCK_FAILURE,
+ "Unable to dump call "
+ "pool to dict.");
+ return;
+ }
+
+ ret = dict_set_int32(dict, "callpool.count", call_pool->cnt);
+ if (ret)
+ goto out;
- list_for_each_entry (trav, &call_pool->all_frames, all_frames) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "callpool.stack%d", i);
- gf_proc_dump_call_stack_to_dict (trav, key, dict);
- i++;
- }
+ list_for_each_entry(trav, &call_pool->all_frames, all_frames)
+ {
+ snprintf(key, sizeof(key), "callpool.stack%d", i);
+ gf_proc_dump_call_stack_to_dict(trav, key, dict);
+ i++;
+ }
out:
- UNLOCK (&call_pool->lock);
+ UNLOCK(&call_pool->lock);
- return;
+ return;
}
gf_boolean_t
-__is_fuse_call (call_frame_t *frame)
+__is_fuse_call(call_frame_t *frame)
{
- gf_boolean_t is_fuse_call = _gf_false;
- GF_ASSERT (frame);
- GF_ASSERT (frame->root);
+ gf_boolean_t is_fuse_call = _gf_false;
+ GF_ASSERT(frame);
+ GF_ASSERT(frame->root);
- if (NFS_PID != frame->root->pid)
- is_fuse_call = _gf_true;
- return is_fuse_call;
+ if (NFS_PID != frame->root->pid)
+ is_fuse_call = _gf_true;
+ return is_fuse_call;
}
diff --git a/libglusterfs/src/stack.h b/libglusterfs/src/stack.h
deleted file mode 100644
index 63307192ae5..00000000000
--- a/libglusterfs/src/stack.h
+++ /dev/null
@@ -1,429 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-/*
- This file defines MACROS and static inlines used to emulate a function
- call over asynchronous communication with remote server
-*/
-
-#ifndef _STACK_H
-#define _STACK_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-struct _call_stack_t;
-typedef struct _call_stack_t call_stack_t;
-struct _call_frame_t;
-typedef struct _call_frame_t call_frame_t;
-struct _call_pool_t;
-typedef struct _call_pool_t call_pool_t;
-
-#include <sys/time.h>
-
-#include "xlator.h"
-#include "dict.h"
-#include "list.h"
-#include "common-utils.h"
-#include "globals.h"
-#include "lkowner.h"
-
-#define NFS_PID 1
-#define LOW_PRIO_PROC_PID -1
-typedef int32_t (*ret_fn_t) (call_frame_t *frame,
- call_frame_t *prev_frame,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- ...);
-
-struct _call_pool_t {
- union {
- struct list_head all_frames;
- struct {
- call_stack_t *next_call;
- call_stack_t *prev_call;
- } all_stacks;
- };
- int64_t cnt;
- gf_lock_t lock;
- struct mem_pool *frame_mem_pool;
- struct mem_pool *stack_mem_pool;
-};
-
-struct _call_frame_t {
- call_stack_t *root; /* stack root */
- call_frame_t *parent; /* previous BP */
- call_frame_t *next;
- call_frame_t *prev; /* maintenance list */
- void *local; /* local variables */
- xlator_t *this; /* implicit object */
- ret_fn_t ret; /* op_return address */
- int32_t ref_count;
- gf_lock_t lock;
- void *cookie; /* unique cookie */
- gf_boolean_t complete;
-
- glusterfs_fop_t op;
- struct timeval begin; /* when this frame was created */
- struct timeval end; /* when this frame completed */
- const char *wind_from;
- const char *wind_to;
- const char *unwind_from;
- const char *unwind_to;
-};
-
-struct _call_stack_t {
- union {
- struct list_head all_frames;
- struct {
- call_stack_t *next_call;
- call_stack_t *prev_call;
- };
- };
- call_pool_t *pool;
- gf_lock_t stack_lock;
- void *trans;
- uint64_t unique;
- void *state; /* pointer to request state */
- uid_t uid;
- gid_t gid;
- pid_t pid;
- uint16_t ngrps;
- uint32_t groups[GF_MAX_AUX_GROUPS];
- gf_lkowner_t lk_owner;
-
- call_frame_t frames;
-
- int32_t op;
- int8_t type;
-};
-
-
-#define frame_set_uid_gid(frm, u, g) \
- do { \
- if (frm) { \
- (frm)->root->uid = u; \
- (frm)->root->gid = g; \
- (frm)->root->ngrps = 0; \
- } \
- } while (0); \
-
-
-struct xlator_fops;
-
-void
-gf_set_fop_from_fn_pointer (call_frame_t *frame, struct xlator_fops *fops,
- void *fn);
-
-void
-gf_update_latency (call_frame_t *frame);
-
-static inline void
-FRAME_DESTROY (call_frame_t *frame)
-{
- void *local = NULL;
- if (frame->next)
- frame->next->prev = frame->prev;
- if (frame->prev)
- frame->prev->next = frame->next;
- if (frame->local) {
- local = frame->local;
- frame->local = NULL;
-
- }
-
- LOCK_DESTROY (&frame->lock);
- mem_put (frame);
-
- if (local)
- mem_put (local);
-}
-
-
-static inline void
-STACK_DESTROY (call_stack_t *stack)
-{
- void *local = NULL;
-
- LOCK (&stack->pool->lock);
- {
- list_del_init (&stack->all_frames);
- stack->pool->cnt--;
- }
- UNLOCK (&stack->pool->lock);
-
- if (stack->frames.local) {
- local = stack->frames.local;
- stack->frames.local = NULL;
- }
-
- LOCK_DESTROY (&stack->frames.lock);
- LOCK_DESTROY (&stack->stack_lock);
-
- while (stack->frames.next) {
- FRAME_DESTROY (stack->frames.next);
- }
- mem_put (stack);
-
- if (local)
- mem_put (local);
-}
-
-static inline void
-STACK_RESET (call_stack_t *stack)
-{
- void *local = NULL;
-
- if (stack->frames.local) {
- local = stack->frames.local;
- stack->frames.local = NULL;
- }
-
- while (stack->frames.next) {
- FRAME_DESTROY (stack->frames.next);
- }
-
- if (local)
- mem_put (local);
-}
-
-#define cbk(x) cbk_##x
-
-#define FRAME_SU_DO(frm, local_type) \
- do { \
- local_type *__local = (frm)->local; \
- __local->uid = frm->root->uid; \
- __local->gid = frm->root->gid; \
- frm->root->uid = 0; \
- frm->root->gid = 0; \
- } while (0); \
-
-#define FRAME_SU_UNDO(frm, local_type) \
- do { \
- local_type *__local = (frm)->local; \
- frm->root->uid = __local->uid; \
- frm->root->gid = __local->gid; \
- } while (0); \
-
-
-/* make a call */
-#define STACK_WIND(frame, rfn, obj, fn, params ...) \
- do { \
- call_frame_t *_new = NULL; \
- xlator_t *old_THIS = NULL; \
- \
- _new = mem_get0 (frame->root->pool->frame_mem_pool); \
- if (!_new) { \
- gf_log ("stack", GF_LOG_ERROR, "alloc failed"); \
- break; \
- } \
- typeof(fn##_cbk) tmp_cbk = rfn; \
- _new->root = frame->root; \
- _new->this = obj; \
- _new->ret = (ret_fn_t) tmp_cbk; \
- _new->parent = frame; \
- _new->cookie = _new; \
- _new->wind_from = __FUNCTION__; \
- _new->wind_to = #fn; \
- _new->unwind_to = #rfn; \
- LOCK_INIT (&_new->lock); \
- LOCK(&frame->root->stack_lock); \
- { \
- _new->next = frame->root->frames.next; \
- _new->prev = &frame->root->frames; \
- if (frame->root->frames.next) \
- frame->root->frames.next->prev = _new; \
- frame->root->frames.next = _new; \
- frame->ref_count++; \
- } \
- UNLOCK(&frame->root->stack_lock); \
- old_THIS = THIS; \
- THIS = obj; \
- fn (_new, obj, params); \
- THIS = old_THIS; \
- } while (0)
-
-
-/* make a call with a cookie */
-#define STACK_WIND_COOKIE(frame, rfn, cky, obj, fn, params ...) \
- do { \
- call_frame_t *_new = NULL; \
- xlator_t *old_THIS = NULL; \
- \
- _new = mem_get0 (frame->root->pool->frame_mem_pool); \
- if (!_new) { \
- gf_log ("stack", GF_LOG_ERROR, "alloc failed"); \
- break; \
- } \
- typeof(fn##_cbk) tmp_cbk = rfn; \
- _new->root = frame->root; \
- _new->this = obj; \
- _new->ret = (ret_fn_t) tmp_cbk; \
- _new->parent = frame; \
- _new->cookie = cky; \
- _new->wind_from = __FUNCTION__; \
- _new->wind_to = #fn; \
- _new->unwind_to = #rfn; \
- LOCK_INIT (&_new->lock); \
- LOCK(&frame->root->stack_lock); \
- { \
- frame->ref_count++; \
- _new->next = frame->root->frames.next; \
- _new->prev = &frame->root->frames; \
- if (frame->root->frames.next) \
- frame->root->frames.next->prev = _new; \
- frame->root->frames.next = _new; \
- } \
- UNLOCK(&frame->root->stack_lock); \
- fn##_cbk = rfn; \
- old_THIS = THIS; \
- THIS = obj; \
- fn (_new, obj, params); \
- THIS = old_THIS; \
- } while (0)
-
-
-/* return from function */
-#define STACK_UNWIND(frame, params ...) \
- do { \
- ret_fn_t fn = NULL; \
- call_frame_t *_parent = NULL; \
- xlator_t *old_THIS = NULL; \
- if (!frame) { \
- gf_log ("stack", GF_LOG_CRITICAL, "!frame"); \
- break; \
- } \
- fn = frame->ret; \
- _parent = frame->parent; \
- LOCK(&frame->root->stack_lock); \
- { \
- _parent->ref_count--; \
- } \
- UNLOCK(&frame->root->stack_lock); \
- old_THIS = THIS; \
- THIS = _parent->this; \
- frame->complete = _gf_true; \
- frame->unwind_from = __FUNCTION__; \
- fn (_parent, frame->cookie, _parent->this, params); \
- THIS = old_THIS; \
- } while (0)
-
-
-/* return from function in type-safe way */
-#define STACK_UNWIND_STRICT(op, frame, params ...) \
- do { \
- fop_##op##_cbk_t fn = NULL; \
- call_frame_t *_parent = NULL; \
- xlator_t *old_THIS = NULL; \
- \
- if (!frame) { \
- gf_log ("stack", GF_LOG_CRITICAL, "!frame"); \
- break; \
- } \
- fn = (fop_##op##_cbk_t )frame->ret; \
- _parent = frame->parent; \
- LOCK(&frame->root->stack_lock); \
- { \
- _parent->ref_count--; \
- } \
- UNLOCK(&frame->root->stack_lock); \
- old_THIS = THIS; \
- THIS = _parent->this; \
- frame->complete = _gf_true; \
- frame->unwind_from = __FUNCTION__; \
- fn (_parent, frame->cookie, _parent->this, params); \
- THIS = old_THIS; \
- } while (0)
-
-
-static inline call_frame_t *
-copy_frame (call_frame_t *frame)
-{
- call_stack_t *newstack = NULL;
- call_stack_t *oldstack = NULL;
-
- if (!frame) {
- return NULL;
- }
-
- newstack = mem_get0 (frame->root->pool->stack_mem_pool);
- if (newstack == NULL) {
- return NULL;
- }
-
- oldstack = frame->root;
-
- newstack->uid = oldstack->uid;
- newstack->gid = oldstack->gid;
- newstack->pid = oldstack->pid;
- newstack->ngrps = oldstack->ngrps;
- newstack->op = oldstack->op;
- newstack->type = oldstack->type;
- memcpy (newstack->groups, oldstack->groups,
- sizeof (gid_t) * GF_MAX_AUX_GROUPS);
- newstack->unique = oldstack->unique;
-
- newstack->frames.this = frame->this;
- newstack->frames.root = newstack;
- newstack->pool = oldstack->pool;
- newstack->lk_owner = oldstack->lk_owner;
-
- LOCK_INIT (&newstack->frames.lock);
- LOCK_INIT (&newstack->stack_lock);
-
- LOCK (&oldstack->pool->lock);
- {
- list_add (&newstack->all_frames, &oldstack->all_frames);
- newstack->pool->cnt++;
- }
- UNLOCK (&oldstack->pool->lock);
-
- return &newstack->frames;
-}
-
-
-static inline call_frame_t *
-create_frame (xlator_t *xl, call_pool_t *pool)
-{
- call_stack_t *stack = NULL;
-
- if (!xl || !pool) {
- return NULL;
- }
-
- stack = mem_get0 (pool->stack_mem_pool);
- if (!stack)
- return NULL;
-
- stack->pool = pool;
- stack->frames.root = stack;
- stack->frames.this = xl;
-
- LOCK (&pool->lock);
- {
- list_add (&stack->all_frames, &pool->all_frames);
- pool->cnt++;
- }
- UNLOCK (&pool->lock);
-
- LOCK_INIT (&stack->frames.lock);
- LOCK_INIT (&stack->stack_lock);
-
- return &stack->frames;
-}
-
-void gf_proc_dump_pending_frames(call_pool_t *call_pool);
-void gf_proc_dump_pending_frames_to_dict (call_pool_t *call_pool,
- dict_t *dict);
-gf_boolean_t __is_fuse_call (call_frame_t *frame);
-#endif /* _STACK_H */
diff --git a/libglusterfs/src/statedump.c b/libglusterfs/src/statedump.c
index ca3737ea6ad..65f0eb5c7f3 100644
--- a/libglusterfs/src/statedump.c
+++ b/libglusterfs/src/statedump.c
@@ -9,12 +9,11 @@
*/
#include <stdarg.h>
-#include "glusterfs.h"
-#include "logging.h"
-#include "iobuf.h"
-#include "statedump.h"
-#include "stack.h"
-#include "common-utils.h"
+#include "glusterfs/glusterfs.h"
+#include "glusterfs/logging.h"
+#include "glusterfs/statedump.h"
+#include "glusterfs/stack.h"
+#include "glusterfs/syscall.h"
#ifdef HAVE_MALLOC_H
#include <malloc.h>
@@ -24,808 +23,1031 @@
'deadlock' with statedump. This is because statedump happens
inside a signal handler and cannot afford to block on a lock.*/
#ifdef gf_log
-# undef gf_log
+#undef gf_log
#endif
-#define GF_PROC_DUMP_IS_OPTION_ENABLED(opt) \
- (dump_options.dump_##opt == _gf_true)
+#define GF_PROC_DUMP_IS_OPTION_ENABLED(opt) \
+ (dump_options.dump_##opt == _gf_true)
-#define GF_PROC_DUMP_IS_XL_OPTION_ENABLED(opt) \
- (dump_options.xl_options.dump_##opt == _gf_true)
+#define GF_PROC_DUMP_IS_XL_OPTION_ENABLED(opt) \
+ (dump_options.xl_options.dump_##opt == _gf_true)
extern xlator_t global_xlator;
-static pthread_mutex_t gf_proc_dump_mutex;
+static pthread_mutex_t gf_proc_dump_mutex;
static int gf_dump_fd = -1;
gf_dump_options_t dump_options;
+static strfd_t *gf_dump_strfd = NULL;
static void
-gf_proc_dump_lock (void)
+gf_proc_dump_lock(void)
{
- pthread_mutex_lock (&gf_proc_dump_mutex);
+ pthread_mutex_lock(&gf_proc_dump_mutex);
}
-
static void
-gf_proc_dump_unlock (void)
+gf_proc_dump_unlock(void)
{
- pthread_mutex_unlock (&gf_proc_dump_mutex);
+ pthread_mutex_unlock(&gf_proc_dump_mutex);
}
static int
-gf_proc_dump_open (char *tmpname)
+gf_proc_dump_open(char *tmpname)
{
- int dump_fd = -1;
+ int dump_fd = -1;
- dump_fd = mkostemp (tmpname, O_CREAT|O_RDWR|O_TRUNC|O_APPEND);
- if (dump_fd < 0)
- return -1;
+ mode_t mask = umask(S_IRWXG | S_IRWXO);
+ dump_fd = mkstemp(tmpname);
+ umask(mask);
+ if (dump_fd < 0)
+ return -1;
- gf_dump_fd = dump_fd;
- return 0;
+ gf_dump_fd = dump_fd;
+ return 0;
}
static void
-gf_proc_dump_close (void)
+gf_proc_dump_close(void)
{
- close (gf_dump_fd);
- gf_dump_fd = -1;
+ sys_close(gf_dump_fd);
+ gf_dump_fd = -1;
}
static int
-gf_proc_dump_set_path (char *dump_options_file)
+gf_proc_dump_set_path(char *dump_options_file)
{
- int ret = -1;
- FILE *fp = NULL;
- char buf[256];
- char *key = NULL, *value = NULL;
- char *saveptr = NULL;
+ int ret = -1;
+ FILE *fp = NULL;
+ char buf[256];
+ char *key = NULL, *value = NULL;
+ char *saveptr = NULL;
+
+ fp = fopen(dump_options_file, "r");
+ if (!fp)
+ goto out;
+
+ ret = fscanf(fp, "%255s", buf);
+
+ while (ret != EOF) {
+ key = strtok_r(buf, "=", &saveptr);
+ if (!key) {
+ ret = fscanf(fp, "%255s", buf);
+ continue;
+ }
- fp = fopen (dump_options_file, "r");
- if (!fp)
- goto out;
+ value = strtok_r(NULL, "=", &saveptr);
- ret = fscanf (fp, "%s", buf);
-
- while (ret != EOF) {
- key = strtok_r (buf, "=", &saveptr);
- if (!key) {
- ret = fscanf (fp, "%s", buf);
- continue;
- }
-
- value = strtok_r (NULL, "=", &saveptr);
-
- if (!value) {
- ret = fscanf (fp, "%s", buf);
- continue;
- }
- if (!strcmp (key, "path")) {
- dump_options.dump_path = gf_strdup (value);
- break;
- }
+ if (!value) {
+ ret = fscanf(fp, "%255s", buf);
+ continue;
+ }
+ if (!strcmp(key, "path")) {
+ dump_options.dump_path = gf_strdup(value);
+ break;
}
+ }
out:
- if (fp)
- fclose (fp);
- return ret;
+ if (fp)
+ fclose(fp);
+ return ret;
+}
+
+static int
+gf_proc_dump_add_section_fd(char *key, va_list ap)
+{
+ char buf[GF_DUMP_MAX_BUF_LEN];
+ int len;
+
+ GF_ASSERT(key);
+
+ len = snprintf(buf, GF_DUMP_MAX_BUF_LEN, "\n[");
+ len += vsnprintf(buf + len, GF_DUMP_MAX_BUF_LEN - len, key, ap);
+ len += snprintf(buf + len, GF_DUMP_MAX_BUF_LEN - len, "]\n");
+ return sys_write(gf_dump_fd, buf, len);
+}
+
+static int
+gf_proc_dump_add_section_strfd(char *key, va_list ap)
+{
+ int ret = 0;
+
+ ret += strprintf(gf_dump_strfd, "[");
+ ret += strvprintf(gf_dump_strfd, key, ap);
+ ret += strprintf(gf_dump_strfd, "]\n");
+
+ return ret;
}
int
-gf_proc_dump_add_section (char *key, ...)
+gf_proc_dump_add_section(char *key, ...)
{
+ va_list ap;
+ int ret = 0;
- char buf[GF_DUMP_MAX_BUF_LEN];
- va_list ap;
+ va_start(ap, key);
+ if (gf_dump_strfd)
+ ret = gf_proc_dump_add_section_strfd(key, ap);
+ else
+ ret = gf_proc_dump_add_section_fd(key, ap);
+ va_end(ap);
+
+ return ret;
+}
+
+static int
+gf_proc_dump_write_fd(char *key, char *value, va_list ap)
+{
+ char buf[GF_DUMP_MAX_BUF_LEN];
+ int len = 0;
- GF_ASSERT(key);
+ GF_ASSERT(key);
- memset (buf, 0, sizeof(buf));
- snprintf (buf, GF_DUMP_MAX_BUF_LEN, "\n[");
- va_start (ap, key);
- vsnprintf (buf + strlen(buf),
- GF_DUMP_MAX_BUF_LEN - strlen (buf), key, ap);
- va_end (ap);
- snprintf (buf + strlen(buf),
- GF_DUMP_MAX_BUF_LEN - strlen (buf), "]\n");
- return write (gf_dump_fd, buf, strlen (buf));
+ len = snprintf(buf, GF_DUMP_MAX_BUF_LEN, "%s=", key);
+ len += vsnprintf(buf + len, GF_DUMP_MAX_BUF_LEN - len, value, ap);
+
+ len += snprintf(buf + len, GF_DUMP_MAX_BUF_LEN - len, "\n");
+ return sys_write(gf_dump_fd, buf, len);
}
+static int
+gf_proc_dump_write_strfd(char *key, char *value, va_list ap)
+{
+ int ret = 0;
+
+ ret += strprintf(gf_dump_strfd, "%s = ", key);
+ ret += strvprintf(gf_dump_strfd, value, ap);
+ ret += strprintf(gf_dump_strfd, "\n");
+
+ return ret;
+}
int
-gf_proc_dump_write (char *key, char *value,...)
+gf_proc_dump_write(char *key, char *value, ...)
{
+ int ret = 0;
+ va_list ap;
- char buf[GF_DUMP_MAX_BUF_LEN];
- int offset = 0;
- va_list ap;
+ va_start(ap, value);
+ if (gf_dump_strfd)
+ ret = gf_proc_dump_write_strfd(key, value, ap);
+ else
+ ret = gf_proc_dump_write_fd(key, value, ap);
+ va_end(ap);
- GF_ASSERT (key);
+ return ret;
+}
- offset = strlen (key);
+void
+gf_latency_statedump_and_reset(char *key, gf_latency_t *lat)
+{
+ /* Doesn't make sense to continue if there are no fops
+ came in the given interval */
+ if (!lat || !lat->count)
+ return;
+ gf_proc_dump_write(key,
+ "AVG:%lf CNT:%" PRIu64 " TOTAL:%" PRIu64 " MIN:%" PRIu64
+ " MAX:%" PRIu64,
+ (((double)lat->total) / lat->count), lat->count,
+ lat->total, lat->min, lat->max);
+ gf_latency_reset(lat);
+}
- memset (buf, 0, GF_DUMP_MAX_BUF_LEN);
- snprintf (buf, GF_DUMP_MAX_BUF_LEN, "%s", key);
- snprintf (buf + offset, GF_DUMP_MAX_BUF_LEN - offset, "=");
- offset += 1;
- va_start (ap, value);
- vsnprintf (buf + offset, GF_DUMP_MAX_BUF_LEN - offset, value, ap);
- va_end (ap);
+void
+gf_proc_dump_xl_latency_info(xlator_t *xl)
+{
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+ char key[GF_DUMP_MAX_BUF_LEN];
+ int i;
+
+ snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.latency", xl->name);
+ gf_proc_dump_add_section("%s", key_prefix);
+
+ for (i = 0; i < GF_FOP_MAXVALUE; i++) {
+ gf_proc_dump_build_key(key, key_prefix, "%s", (char *)gf_fop_list[i]);
+
+ gf_latency_t *lat = &xl->stats.interval.latencies[i];
- offset = strlen (buf);
- snprintf (buf + offset, GF_DUMP_MAX_BUF_LEN - offset, "\n");
- return write (gf_dump_fd, buf, strlen (buf));
+ gf_latency_statedump_and_reset(key, lat);
+ }
}
static void
-gf_proc_dump_xlator_mem_info (xlator_t *xl)
-{
- int i = 0;
- struct mem_acct rec = {0,};
-
- if (!xl)
- return;
-
- if (!xl->mem_acct.rec)
- return;
-
- gf_proc_dump_add_section ("%s.%s - Memory usage", xl->type, xl->name);
- gf_proc_dump_write ("num_types", "%d", xl->mem_acct.num_types);
-
- for (i = 0; i < xl->mem_acct.num_types; i++) {
- if (!(memcmp (&xl->mem_acct.rec[i], &rec,
- sizeof (struct mem_acct))))
- continue;
-
- gf_proc_dump_add_section ("%s.%s - usage-type %d memusage",
- xl->type, xl->name, i);
- gf_proc_dump_write ("size", "%u", xl->mem_acct.rec[i].size);
- gf_proc_dump_write ("num_allocs", "%u",
- xl->mem_acct.rec[i].num_allocs);
- gf_proc_dump_write ("max_size", "%u",
- xl->mem_acct.rec[i].max_size);
- gf_proc_dump_write ("max_num_allocs", "%u",
- xl->mem_acct.rec[i].max_num_allocs);
- gf_proc_dump_write ("total_allocs", "%u",
- xl->mem_acct.rec[i].total_allocs);
- }
+gf_proc_dump_xlator_mem_info(xlator_t *xl)
+{
+ int i = 0;
+ if (!xl)
return;
+
+ if (!xl->mem_acct)
+ return;
+
+ gf_proc_dump_add_section("%s.%s - Memory usage", xl->type, xl->name);
+ gf_proc_dump_write("num_types", "%d", xl->mem_acct->num_types);
+
+ for (i = 0; i < xl->mem_acct->num_types; i++) {
+ if (xl->mem_acct->rec[i].num_allocs == 0)
+ continue;
+
+ gf_proc_dump_add_section("%s.%s - usage-type %s memusage", xl->type,
+ xl->name, xl->mem_acct->rec[i].typestr);
+ gf_proc_dump_write("size", "%" PRIu64, xl->mem_acct->rec[i].size);
+ gf_proc_dump_write("num_allocs", "%u", xl->mem_acct->rec[i].num_allocs);
+ gf_proc_dump_write("max_size", "%" PRIu64,
+ xl->mem_acct->rec[i].max_size);
+ gf_proc_dump_write("max_num_allocs", "%u",
+ xl->mem_acct->rec[i].max_num_allocs);
+ gf_proc_dump_write("total_allocs", "%" PRIu64,
+ xl->mem_acct->rec[i].total_allocs);
+ }
+
+ return;
}
static void
-gf_proc_dump_xlator_mem_info_only_in_use (xlator_t *xl)
+gf_proc_dump_xlator_mem_info_only_in_use(xlator_t *xl)
{
- int i = 0;
+ int i = 0;
- if (!xl)
- return;
+ if (!xl)
+ return;
- if (!xl->mem_acct.rec)
- return;
+ if (!xl->mem_acct)
+ return;
- gf_proc_dump_add_section ("%s.%s - Memory usage", xl->type, xl->name);
- gf_proc_dump_write ("num_types", "%d", xl->mem_acct.num_types);
+ gf_proc_dump_add_section("%s.%s - Memory usage", xl->type, xl->name);
+ gf_proc_dump_write("num_types", "%d", xl->mem_acct->num_types);
- for (i = 0; i < xl->mem_acct.num_types; i++) {
- if (!xl->mem_acct.rec[i].size)
- continue;
+ for (i = 0; i < xl->mem_acct->num_types; i++) {
+ if (!xl->mem_acct->rec[i].size)
+ continue;
- gf_proc_dump_add_section ("%s.%s - usage-type %d", xl->type,
- xl->name,i);
+ gf_proc_dump_add_section("%s.%s - usage-type %d", xl->type, xl->name,
+ i);
- gf_proc_dump_write ("size", "%u",
- xl->mem_acct.rec[i].size);
- gf_proc_dump_write ("max_size", "%u",
- xl->mem_acct.rec[i].max_size);
- gf_proc_dump_write ("num_allocs", "%u",
- xl->mem_acct.rec[i].num_allocs);
- gf_proc_dump_write ("max_num_allocs", "%u",
- xl->mem_acct.rec[i].max_num_allocs);
- gf_proc_dump_write ("total_allocs", "%u",
- xl->mem_acct.rec[i].total_allocs);
- }
+ gf_proc_dump_write("size", "%" PRIu64, xl->mem_acct->rec[i].size);
+ gf_proc_dump_write("max_size", "%" PRIu64,
+ xl->mem_acct->rec[i].max_size);
+ gf_proc_dump_write("num_allocs", "%u", xl->mem_acct->rec[i].num_allocs);
+ gf_proc_dump_write("max_num_allocs", "%u",
+ xl->mem_acct->rec[i].max_num_allocs);
+ gf_proc_dump_write("total_allocs", "%" PRIu64,
+ xl->mem_acct->rec[i].total_allocs);
+ }
- return;
+ return;
}
-
-
/* Currently this dumps only mallinfo. More can be built on here */
void
-gf_proc_dump_mem_info ()
-{
-#ifdef HAVE_MALLOC_STATS
- struct mallinfo info;
-
- memset (&info, 0, sizeof (struct mallinfo));
- info = mallinfo ();
-
- gf_proc_dump_add_section ("mallinfo");
- gf_proc_dump_write ("mallinfo_arena", "%d", info.arena);
- gf_proc_dump_write ("mallinfo_ordblks", "%d", info.ordblks);
- gf_proc_dump_write ("mallinfo_smblks", "%d", info.smblks);
- gf_proc_dump_write ("mallinfo_hblks", "%d", info.hblks);
- gf_proc_dump_write ("mallinfo_hblkhd", "%d", info.hblkhd);
- gf_proc_dump_write ("mallinfo_usmblks", "%d", info.usmblks);
- gf_proc_dump_write ("mallinfo_fsmblks", "%d", info.fsmblks);
- gf_proc_dump_write ("mallinfo_uordblks", "%d", info.uordblks);
- gf_proc_dump_write ("mallinfo_fordblks", "%d", info.fordblks);
- gf_proc_dump_write ("mallinfo_keepcost", "%d", info.keepcost);
+gf_proc_dump_mem_info()
+{
+#ifdef HAVE_MALLINFO
+ struct mallinfo info;
+
+ memset(&info, 0, sizeof(struct mallinfo));
+ info = mallinfo();
+
+ gf_proc_dump_add_section("mallinfo");
+ gf_proc_dump_write("mallinfo_arena", "%d", info.arena);
+ gf_proc_dump_write("mallinfo_ordblks", "%d", info.ordblks);
+ gf_proc_dump_write("mallinfo_smblks", "%d", info.smblks);
+ gf_proc_dump_write("mallinfo_hblks", "%d", info.hblks);
+ gf_proc_dump_write("mallinfo_hblkhd", "%d", info.hblkhd);
+ gf_proc_dump_write("mallinfo_usmblks", "%d", info.usmblks);
+ gf_proc_dump_write("mallinfo_fsmblks", "%d", info.fsmblks);
+ gf_proc_dump_write("mallinfo_uordblks", "%d", info.uordblks);
+ gf_proc_dump_write("mallinfo_fordblks", "%d", info.fordblks);
+ gf_proc_dump_write("mallinfo_keepcost", "%d", info.keepcost);
#endif
- gf_proc_dump_xlator_mem_info(&global_xlator);
-
+ gf_proc_dump_xlator_mem_info(&global_xlator);
}
void
-gf_proc_dump_mem_info_to_dict (dict_t *dict)
+gf_proc_dump_mem_info_to_dict(dict_t *dict)
{
- if (!dict)
- return;
-#ifdef HAVE_MALLOC_STATS
- struct mallinfo info;
- int ret = -1;
+ if (!dict)
+ return;
+#ifdef HAVE_MALLINFO
+ struct mallinfo info;
+ int ret = -1;
- memset (&info, 0, sizeof(struct mallinfo));
- info = mallinfo ();
+ memset(&info, 0, sizeof(struct mallinfo));
+ info = mallinfo();
- ret = dict_set_int32 (dict, "mallinfo.arena", info.arena);
- if (ret)
- return;
+ ret = dict_set_int32(dict, "mallinfo.arena", info.arena);
+ if (ret)
+ return;
- ret = dict_set_int32 (dict, "mallinfo.ordblks", info.ordblks);
- if (ret)
- return;
+ ret = dict_set_int32(dict, "mallinfo.ordblks", info.ordblks);
+ if (ret)
+ return;
- ret = dict_set_int32 (dict, "mallinfo.smblks", info.smblks);
- if (ret)
- return;
+ ret = dict_set_int32(dict, "mallinfo.smblks", info.smblks);
+ if (ret)
+ return;
- ret = dict_set_int32 (dict, "mallinfo.hblks", info.hblks);
- if (ret)
- return;
+ ret = dict_set_int32(dict, "mallinfo.hblks", info.hblks);
+ if (ret)
+ return;
- ret = dict_set_int32 (dict, "mallinfo.hblkhd", info.hblkhd);
- if (ret)
- return;
+ ret = dict_set_int32(dict, "mallinfo.hblkhd", info.hblkhd);
+ if (ret)
+ return;
- ret = dict_set_int32 (dict, "mallinfo.usmblks", info.usmblks);
- if (ret)
- return;
+ ret = dict_set_int32(dict, "mallinfo.usmblks", info.usmblks);
+ if (ret)
+ return;
- ret = dict_set_int32 (dict, "mallinfo.fsmblks", info.fsmblks);
- if (ret)
- return;
+ ret = dict_set_int32(dict, "mallinfo.fsmblks", info.fsmblks);
+ if (ret)
+ return;
- ret = dict_set_int32 (dict, "mallinfo.uordblks", info.uordblks);
- if (ret)
- return;
+ ret = dict_set_int32(dict, "mallinfo.uordblks", info.uordblks);
+ if (ret)
+ return;
- ret = dict_set_int32 (dict, "mallinfo.fordblks", info.fordblks);
- if (ret)
- return;
+ ret = dict_set_int32(dict, "mallinfo.fordblks", info.fordblks);
+ if (ret)
+ return;
- ret = dict_set_int32 (dict, "mallinfo.keepcost", info.keepcost);
- if (ret)
- return;
-#endif
+ ret = dict_set_int32(dict, "mallinfo.keepcost", info.keepcost);
+ if (ret)
return;
+#endif
+ return;
+}
+
+void
+gf_proc_dump_mempool_info(glusterfs_ctx_t *ctx)
+{
+#ifdef GF_DISABLE_MEMPOOL
+ gf_proc_dump_write("built with --disable-mempool", " so no memory pools");
+#else
+ struct mem_pool *pool = NULL;
+
+ gf_proc_dump_add_section("mempool");
+
+ LOCK(&ctx->lock);
+ {
+ list_for_each_entry(pool, &ctx->mempool_list, owner)
+ {
+ int64_t active = GF_ATOMIC_GET(pool->active);
+
+ gf_proc_dump_write("-----", "-----");
+ gf_proc_dump_write("pool-name", "%s", pool->name);
+ gf_proc_dump_write("xlator-name", "%s", pool->xl_name);
+ gf_proc_dump_write("active-count", "%" GF_PRI_ATOMIC, active);
+ gf_proc_dump_write("sizeof-type", "%lu", pool->sizeof_type);
+ gf_proc_dump_write("padded-sizeof", "%d",
+ 1 << pool->pool->power_of_two);
+ gf_proc_dump_write("size", "%" PRId64,
+ (1 << pool->pool->power_of_two) * active);
+ gf_proc_dump_write("shared-pool", "%p", pool->pool);
+ }
+ }
+ UNLOCK(&ctx->lock);
+#endif /* GF_DISABLE_MEMPOOL */
}
void
-gf_proc_dump_mempool_info (glusterfs_ctx_t *ctx)
+gf_proc_dump_mempool_info_to_dict(glusterfs_ctx_t *ctx, dict_t *dict)
{
- struct mem_pool *pool = NULL;
+#ifndef GF_DISABLE_MEMPOOL
+ struct mem_pool *pool = NULL;
+ char key[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+ int count = 0;
+ int ret = -1;
+
+ if (!ctx || !dict)
+ return;
- gf_proc_dump_add_section ("mempool");
+ LOCK(&ctx->lock);
+ {
+ list_for_each_entry(pool, &ctx->mempool_list, owner)
+ {
+ int64_t active = GF_ATOMIC_GET(pool->active);
- list_for_each_entry (pool, &ctx->mempool_list, global_list) {
- gf_proc_dump_write ("-----", "-----");
- gf_proc_dump_write ("pool-name", "%s", pool->name);
- gf_proc_dump_write ("hot-count", "%d", pool->hot_count);
- gf_proc_dump_write ("cold-count", "%d", pool->cold_count);
- gf_proc_dump_write ("padded_sizeof", "%lu",
- pool->padded_sizeof_type);
- gf_proc_dump_write ("alloc-count", "%"PRIu64, pool->alloc_count);
- gf_proc_dump_write ("max-alloc", "%d", pool->max_alloc);
+ snprintf(key, sizeof(key), "pool%d.name", count);
+ ret = dict_set_str(dict, key, pool->name);
+ if (ret)
+ goto out;
- gf_proc_dump_write ("pool-misses", "%"PRIu64, pool->pool_misses);
- gf_proc_dump_write ("max-stdalloc", "%d", pool->max_stdalloc);
+ snprintf(key, sizeof(key), "pool%d.active-count", count);
+ ret = dict_set_uint64(dict, key, active);
+ if (ret)
+ goto out;
+
+ snprintf(key, sizeof(key), "pool%d.sizeof-type", count);
+ ret = dict_set_uint64(dict, key, pool->sizeof_type);
+ if (ret)
+ goto out;
+
+ snprintf(key, sizeof(key), "pool%d.padded-sizeof", count);
+ ret = dict_set_uint64(dict, key, 1 << pool->pool->power_of_two);
+ if (ret)
+ goto out;
+
+ snprintf(key, sizeof(key), "pool%d.size", count);
+ ret = dict_set_uint64(dict, key,
+ (1 << pool->pool->power_of_two) * active);
+ if (ret)
+ goto out;
+
+ snprintf(key, sizeof(key), "pool%d.shared-pool", count);
+ ret = dict_set_static_ptr(dict, key, pool->pool);
+ if (ret)
+ goto out;
}
+ }
+out:
+ UNLOCK(&ctx->lock);
+#endif /* !GF_DISABLE_MEMPOOL */
}
void
-gf_proc_dump_mempool_info_to_dict (glusterfs_ctx_t *ctx, dict_t *dict)
-{
- struct mem_pool *pool = NULL;
- char key[GF_DUMP_MAX_BUF_LEN] = {0,};
- int count = 0;
- int ret = -1;
-
- if (!ctx || !dict)
- return;
-
- list_for_each_entry (pool, &ctx->mempool_list, global_list) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "pool%d.name", count);
- ret = dict_set_str (dict, key, pool->name);
- if (ret)
- return;
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "pool%d.hotcount", count);
- ret = dict_set_int32 (dict, key, pool->hot_count);
- if (ret)
- return;
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "pool%d.coldcount", count);
- ret = dict_set_int32 (dict, key, pool->cold_count);
- if (ret)
- return;
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "pool%d.paddedsizeof", count);
- ret = dict_set_uint64 (dict, key, pool->padded_sizeof_type);
- if (ret)
- return;
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "pool%d.alloccount", count);
- ret = dict_set_uint64 (dict, key, pool->alloc_count);
- if (ret)
- return;
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "pool%d.max_alloc", count);
- ret = dict_set_int32 (dict, key, pool->max_alloc);
- if (ret)
- return;
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "pool%d.max-stdalloc", count);
- ret = dict_set_int32 (dict, key, pool->max_stdalloc);
- if (ret)
- return;
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "pool%d.pool-misses", count);
- ret = dict_set_uint64 (dict, key, pool->pool_misses);
- if (ret)
- return;
- count++;
- }
- ret = dict_set_int32 (dict, "mempool-count", count);
+gf_proc_dump_latency_info(xlator_t *xl);
- return;
+void
+gf_proc_dump_dict_info(glusterfs_ctx_t *ctx)
+{
+ int64_t total_dicts = 0;
+ int64_t total_pairs = 0;
+
+ total_dicts = GF_ATOMIC_GET(ctx->stats.total_dicts_used);
+ total_pairs = GF_ATOMIC_GET(ctx->stats.total_pairs_used);
+
+ gf_proc_dump_write("max-pairs-per-dict", "%" GF_PRI_ATOMIC,
+ GF_ATOMIC_GET(ctx->stats.max_dict_pairs));
+ gf_proc_dump_write("total-pairs-used", "%" PRId64, total_pairs);
+ gf_proc_dump_write("total-dicts-used", "%" PRId64, total_dicts);
+ gf_proc_dump_write("average-pairs-per-dict", "%" PRId64,
+ (total_pairs / total_dicts));
}
-void gf_proc_dump_latency_info (xlator_t *xl);
-
-void
-gf_proc_dump_xlator_info (xlator_t *top)
+static void
+gf_proc_dump_single_xlator_info(xlator_t *trav)
{
- xlator_t *trav = NULL;
- glusterfs_ctx_t *ctx = NULL;
- char itable_key[1024] = {0,};
+ glusterfs_ctx_t *ctx = trav->ctx;
+ char itable_key[1024] = {
+ 0,
+ };
- if (!top)
- return;
+ if (trav->cleanup_starting)
+ return;
- ctx = glusterfs_ctx_get ();
+ if (ctx->measure_latency)
+ gf_proc_dump_xl_latency_info(trav);
- trav = top;
- while (trav) {
+ gf_proc_dump_xlator_mem_info(trav);
- if (ctx->measure_latency)
- gf_proc_dump_latency_info (trav);
+ if (GF_PROC_DUMP_IS_XL_OPTION_ENABLED(inode) && (trav->itable)) {
+ snprintf(itable_key, sizeof(itable_key), "%d.%s.itable", ctx->graph_id,
+ trav->name);
+ }
- gf_proc_dump_xlator_mem_info(trav);
+ if (!trav->dumpops) {
+ return;
+ }
- if (GF_PROC_DUMP_IS_XL_OPTION_ENABLED (inode) &&
- (trav->itable)) {
- snprintf (itable_key, 1024, "%d.%s.itable",
- ctx->graph_id, trav->name);
- }
+ if (trav->dumpops->priv && GF_PROC_DUMP_IS_XL_OPTION_ENABLED(priv))
+ trav->dumpops->priv(trav);
- if (!trav->dumpops) {
- trav = trav->next;
- continue;
- }
+ if (GF_PROC_DUMP_IS_XL_OPTION_ENABLED(inode) && (trav->dumpops->inode))
+ trav->dumpops->inode(trav);
+ if (trav->dumpops->fd && GF_PROC_DUMP_IS_XL_OPTION_ENABLED(fd))
+ trav->dumpops->fd(trav);
- if (trav->dumpops->priv &&
- GF_PROC_DUMP_IS_XL_OPTION_ENABLED (priv))
- trav->dumpops->priv (trav);
+ if (trav->dumpops->history && GF_PROC_DUMP_IS_XL_OPTION_ENABLED(history))
+ trav->dumpops->history(trav);
+}
- if (GF_PROC_DUMP_IS_XL_OPTION_ENABLED (inode) &&
- (trav->dumpops->inode))
- trav->dumpops->inode (trav);
+static void
+gf_proc_dump_per_xlator_info(xlator_t *top)
+{
+ xlator_t *trav = top;
+
+ while (trav && !trav->cleanup_starting) {
+ gf_proc_dump_single_xlator_info(trav);
+ trav = trav->next;
+ }
+}
+
+void
+gf_proc_dump_xlator_info(xlator_t *top, gf_boolean_t brick_mux)
+{
+ xlator_t *trav = NULL;
+ xlator_list_t **trav_p = NULL;
- if (trav->dumpops->fd &&
- GF_PROC_DUMP_IS_XL_OPTION_ENABLED (fd))
- trav->dumpops->fd (trav);
+ if (!top)
+ return;
- if (trav->dumpops->history &&
- GF_PROC_DUMP_IS_XL_OPTION_ENABLED (history))
- trav->dumpops->history (trav);
+ trav = top;
+ gf_proc_dump_per_xlator_info(trav);
- trav = trav->next;
+ if (brick_mux) {
+ trav_p = &top->children;
+ while (*trav_p) {
+ trav = (*trav_p)->xlator;
+ gf_proc_dump_per_xlator_info(trav);
+ trav_p = &(*trav_p)->next;
}
+ }
- return;
+ return;
}
static void
-gf_proc_dump_oldgraph_xlator_info (xlator_t *top)
+gf_proc_dump_oldgraph_xlator_info(xlator_t *top)
{
- xlator_t *trav = NULL;
+ xlator_t *trav = NULL;
- if (!top)
- return;
+ if (!top)
+ return;
- trav = top;
- while (trav) {
- gf_proc_dump_xlator_mem_info_only_in_use (trav);
+ trav = top;
+ while (trav) {
+ gf_proc_dump_xlator_mem_info_only_in_use(trav);
- if (GF_PROC_DUMP_IS_XL_OPTION_ENABLED (inode) &&
- (trav->itable)) {
- /*TODO: dump inode table info if necessary by
- printing the graph id (taken by glusterfs_cbtx_t)
- in the key
- */
- }
+ if (GF_PROC_DUMP_IS_XL_OPTION_ENABLED(inode) && (trav->itable)) {
+ /*TODO: dump inode table info if necessary by
+ printing the graph id (taken by glusterfs_cbtx_t)
+ in the key
+ */
+ }
- if (!trav->dumpops) {
- trav = trav->next;
- continue;
- }
+ if (!trav->dumpops) {
+ trav = trav->next;
+ continue;
+ }
- if (GF_PROC_DUMP_IS_XL_OPTION_ENABLED (inode) &&
- (trav->dumpops->inode))
- trav->dumpops->inode (trav);
+ if (GF_PROC_DUMP_IS_XL_OPTION_ENABLED(inode) && (trav->dumpops->inode))
+ trav->dumpops->inode(trav);
- if (trav->dumpops->fd &&
- GF_PROC_DUMP_IS_XL_OPTION_ENABLED (fd))
- trav->dumpops->fd (trav);
+ if (trav->dumpops->fd && GF_PROC_DUMP_IS_XL_OPTION_ENABLED(fd))
+ trav->dumpops->fd(trav);
- trav = trav->next;
- }
+ trav = trav->next;
+ }
- return;
+ return;
}
static int
-gf_proc_dump_enable_all_options ()
+gf_proc_dump_enable_all_options()
{
-
- GF_PROC_DUMP_SET_OPTION (dump_options.dump_mem, _gf_true);
- GF_PROC_DUMP_SET_OPTION (dump_options.dump_iobuf, _gf_true);
- GF_PROC_DUMP_SET_OPTION (dump_options.dump_callpool, _gf_true);
- GF_PROC_DUMP_SET_OPTION (dump_options.xl_options.dump_priv, _gf_true);
- GF_PROC_DUMP_SET_OPTION (dump_options.xl_options.dump_inode, _gf_true);
- GF_PROC_DUMP_SET_OPTION (dump_options.xl_options.dump_fd, _gf_true);
- GF_PROC_DUMP_SET_OPTION (dump_options.xl_options.dump_inodectx,
- _gf_true);
- GF_PROC_DUMP_SET_OPTION (dump_options.xl_options.dump_fdctx, _gf_true);
- GF_PROC_DUMP_SET_OPTION (dump_options.xl_options.dump_history,
- _gf_true);
-
- return 0;
+ GF_PROC_DUMP_SET_OPTION(dump_options.dump_mem, _gf_true);
+ GF_PROC_DUMP_SET_OPTION(dump_options.dump_iobuf, _gf_true);
+ GF_PROC_DUMP_SET_OPTION(dump_options.dump_callpool, _gf_true);
+ GF_PROC_DUMP_SET_OPTION(dump_options.xl_options.dump_priv, _gf_true);
+ GF_PROC_DUMP_SET_OPTION(dump_options.xl_options.dump_inode, _gf_true);
+ GF_PROC_DUMP_SET_OPTION(dump_options.xl_options.dump_fd, _gf_true);
+ GF_PROC_DUMP_SET_OPTION(dump_options.xl_options.dump_inodectx, _gf_true);
+ GF_PROC_DUMP_SET_OPTION(dump_options.xl_options.dump_fdctx, _gf_true);
+ GF_PROC_DUMP_SET_OPTION(dump_options.xl_options.dump_history, _gf_true);
+
+ return 0;
}
gf_boolean_t
-is_gf_proc_dump_all_disabled ()
-{
- gf_boolean_t all_disabled = _gf_true;
-
- GF_CHECK_DUMP_OPTION_ENABLED (dump_options.dump_mem, all_disabled, out);
- GF_CHECK_DUMP_OPTION_ENABLED (dump_options.dump_iobuf, all_disabled, out);
- GF_CHECK_DUMP_OPTION_ENABLED (dump_options.dump_callpool, all_disabled,
- out);
- GF_CHECK_DUMP_OPTION_ENABLED (dump_options.xl_options.dump_priv,
- all_disabled, out);
- GF_CHECK_DUMP_OPTION_ENABLED (dump_options.xl_options.dump_inode,
- all_disabled, out);
- GF_CHECK_DUMP_OPTION_ENABLED (dump_options.xl_options.dump_fd,
- all_disabled, out);
- GF_CHECK_DUMP_OPTION_ENABLED (dump_options.xl_options.dump_inodectx,
- all_disabled, out);
- GF_CHECK_DUMP_OPTION_ENABLED (dump_options.xl_options.dump_fdctx,
- all_disabled, out);
- GF_CHECK_DUMP_OPTION_ENABLED (dump_options.xl_options.dump_history,
- all_disabled, out);
+is_gf_proc_dump_all_disabled()
+{
+ gf_boolean_t all_disabled = _gf_true;
+
+ GF_CHECK_DUMP_OPTION_ENABLED(dump_options.dump_mem, all_disabled, out);
+ GF_CHECK_DUMP_OPTION_ENABLED(dump_options.dump_iobuf, all_disabled, out);
+ GF_CHECK_DUMP_OPTION_ENABLED(dump_options.dump_callpool, all_disabled, out);
+ GF_CHECK_DUMP_OPTION_ENABLED(dump_options.xl_options.dump_priv,
+ all_disabled, out);
+ GF_CHECK_DUMP_OPTION_ENABLED(dump_options.xl_options.dump_inode,
+ all_disabled, out);
+ GF_CHECK_DUMP_OPTION_ENABLED(dump_options.xl_options.dump_fd, all_disabled,
+ out);
+ GF_CHECK_DUMP_OPTION_ENABLED(dump_options.xl_options.dump_inodectx,
+ all_disabled, out);
+ GF_CHECK_DUMP_OPTION_ENABLED(dump_options.xl_options.dump_fdctx,
+ all_disabled, out);
+ GF_CHECK_DUMP_OPTION_ENABLED(dump_options.xl_options.dump_history,
+ all_disabled, out);
out:
- return all_disabled;
+ return all_disabled;
}
-/* These options are dumped by default if /tmp/glusterdump.options
+/* These options are dumped by default if glusterdump.options
file exists and it is emtpty
*/
static int
-gf_proc_dump_enable_default_options ()
+gf_proc_dump_enable_default_options()
{
- GF_PROC_DUMP_SET_OPTION (dump_options.dump_mem, _gf_true);
- GF_PROC_DUMP_SET_OPTION (dump_options.dump_callpool, _gf_true);
+ GF_PROC_DUMP_SET_OPTION(dump_options.dump_mem, _gf_true);
+ GF_PROC_DUMP_SET_OPTION(dump_options.dump_callpool, _gf_true);
- return 0;
+ return 0;
}
-
static int
-gf_proc_dump_disable_all_options ()
-{
-
- GF_PROC_DUMP_SET_OPTION (dump_options.dump_mem, _gf_false);
- GF_PROC_DUMP_SET_OPTION (dump_options.dump_iobuf, _gf_false);
- GF_PROC_DUMP_SET_OPTION (dump_options.dump_callpool, _gf_false);
- GF_PROC_DUMP_SET_OPTION (dump_options.xl_options.dump_priv, _gf_false);
- GF_PROC_DUMP_SET_OPTION (dump_options.xl_options.dump_inode,
- _gf_false);
- GF_PROC_DUMP_SET_OPTION (dump_options.xl_options.dump_fd, _gf_false);
- GF_PROC_DUMP_SET_OPTION (dump_options.xl_options.dump_inodectx,
- _gf_false);
- GF_PROC_DUMP_SET_OPTION (dump_options.xl_options.dump_fdctx, _gf_false);
- GF_PROC_DUMP_SET_OPTION (dump_options.xl_options.dump_history,
- _gf_false);
- return 0;
+gf_proc_dump_disable_all_options()
+{
+ GF_PROC_DUMP_SET_OPTION(dump_options.dump_mem, _gf_false);
+ GF_PROC_DUMP_SET_OPTION(dump_options.dump_iobuf, _gf_false);
+ GF_PROC_DUMP_SET_OPTION(dump_options.dump_callpool, _gf_false);
+ GF_PROC_DUMP_SET_OPTION(dump_options.xl_options.dump_priv, _gf_false);
+ GF_PROC_DUMP_SET_OPTION(dump_options.xl_options.dump_inode, _gf_false);
+ GF_PROC_DUMP_SET_OPTION(dump_options.xl_options.dump_fd, _gf_false);
+ GF_PROC_DUMP_SET_OPTION(dump_options.xl_options.dump_inodectx, _gf_false);
+ GF_PROC_DUMP_SET_OPTION(dump_options.xl_options.dump_fdctx, _gf_false);
+ GF_PROC_DUMP_SET_OPTION(dump_options.xl_options.dump_history, _gf_false);
+ return 0;
}
static int
-gf_proc_dump_parse_set_option (char *key, char *value)
-{
- gf_boolean_t *opt_key = NULL;
- gf_boolean_t opt_value = _gf_false;
- char buf[GF_DUMP_MAX_BUF_LEN];
- int ret = -1;
-
- if (!strcasecmp (key, "all")) {
- (void)gf_proc_dump_enable_all_options ();
- return 0;
- } else if (!strcasecmp (key, "mem")) {
- opt_key = &dump_options.dump_mem;
- } else if (!strcasecmp (key, "iobuf")) {
- opt_key = &dump_options.dump_iobuf;
- } else if (!strcasecmp (key, "callpool")) {
- opt_key = &dump_options.dump_callpool;
- } else if (!strcasecmp (key, "priv")) {
- opt_key = &dump_options.xl_options.dump_priv;
- } else if (!strcasecmp (key, "fd")) {
- opt_key = &dump_options.xl_options.dump_fd;
- } else if (!strcasecmp (key, "inode")) {
- opt_key = &dump_options.xl_options.dump_inode;
- } else if (!strcasecmp (key, "inodectx")) {
- opt_key = &dump_options.xl_options.dump_inodectx;
- } else if (!strcasecmp (key, "fdctx")) {
- opt_key = &dump_options.xl_options.dump_fdctx;
- } else if (!strcasecmp (key, "history")) {
- opt_key = &dump_options.xl_options.dump_history;
- }
-
- if (!opt_key) {
- //None of dump options match the key, return back
- snprintf (buf, sizeof (buf), "[Warning]:None of the options "
- "matched key : %s\n", key);
- ret = write (gf_dump_fd, buf, strlen (buf));
-
- if (ret >= 0)
- ret = -1;
- goto out;
-
+gf_proc_dump_parse_set_option(char *key, char *value)
+{
+ gf_boolean_t *opt_key = NULL;
+ gf_boolean_t opt_value = _gf_false;
+ char buf[GF_DUMP_MAX_BUF_LEN];
+ int ret = -1;
+ int len;
+
+ if (!strcasecmp(key, "all")) {
+ (void)gf_proc_dump_enable_all_options();
+ return 0;
+ } else if (!strcasecmp(key, "mem")) {
+ opt_key = &dump_options.dump_mem;
+ } else if (!strcasecmp(key, "iobuf")) {
+ opt_key = &dump_options.dump_iobuf;
+ } else if (!strcasecmp(key, "callpool")) {
+ opt_key = &dump_options.dump_callpool;
+ } else if (!strcasecmp(key, "priv")) {
+ opt_key = &dump_options.xl_options.dump_priv;
+ } else if (!strcasecmp(key, "fd")) {
+ opt_key = &dump_options.xl_options.dump_fd;
+ } else if (!strcasecmp(key, "inode")) {
+ opt_key = &dump_options.xl_options.dump_inode;
+ } else if (!strcasecmp(key, "inodectx")) {
+ opt_key = &dump_options.xl_options.dump_inodectx;
+ } else if (!strcasecmp(key, "fdctx")) {
+ opt_key = &dump_options.xl_options.dump_fdctx;
+ } else if (!strcasecmp(key, "history")) {
+ opt_key = &dump_options.xl_options.dump_history;
+ }
+
+ if (!opt_key) {
+ // None of dump options match the key, return back
+ len = snprintf(buf, sizeof(buf),
+ "[Warning]:None of the options "
+ "matched key : %s\n",
+ key);
+ if (len < 0)
+ ret = -1;
+ else {
+ ret = sys_write(gf_dump_fd, buf, len);
+ if (ret >= 0)
+ ret = -1;
}
+ goto out;
+ }
- opt_value = (strncasecmp (value, "yes", 3) ?
- _gf_false: _gf_true);
+ opt_value = (strncasecmp(value, "yes", 3) ? _gf_false : _gf_true);
- GF_PROC_DUMP_SET_OPTION (*opt_key, opt_value);
+ GF_PROC_DUMP_SET_OPTION(*opt_key, opt_value);
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
static int
-gf_proc_dump_options_init ()
-{
- int ret = -1;
- FILE *fp = NULL;
- char buf[256];
- char *key = NULL, *value = NULL;
- char *saveptr = NULL;
- char dump_option_file[PATH_MAX];
-
- /* glusterd will create a file /tmp/glusterdump.<pid>.options and
- sets the statedump options for the process and the file is removed
- after the statedump is taken. Direct issue of SIGUSR1 does not have
- mechanism for considering the statedump options. So to have a way
- of configuring the statedump of all the glusterfs processes through
- both cli command and SIGUSR1, /tmp/glusterdump.options file
- is searched and the options mentioned in it are given the higher
- priority.
- */
- snprintf (dump_option_file, sizeof (dump_option_file),
- "/tmp/glusterdump.options");
- fp = fopen (dump_option_file, "r");
- if (!fp) {
- snprintf (dump_option_file, sizeof (dump_option_file),
- "/tmp/glusterdump.%d.options", getpid ());
-
- fp = fopen (dump_option_file, "r");
+gf_proc_dump_options_init()
+{
+ int ret = -1;
+ FILE *fp = NULL;
+ char buf[256];
+ char *key = NULL, *value = NULL;
+ char *saveptr = NULL;
+ char dump_option_file[PATH_MAX];
+
+ /* glusterd will create a file glusterdump.<pid>.options and
+ sets the statedump options for the process and the file is removed
+ after the statedump is taken. Direct issue of SIGUSR1 does not have
+ mechanism for considering the statedump options. So to have a way
+ of configuring the statedump of all the glusterfs processes through
+ both cli command and SIGUSR1, glusterdump.options file is searched
+ and the options mentioned in it are given the higher priority.
+ */
+ snprintf(dump_option_file, sizeof(dump_option_file),
+ DEFAULT_VAR_RUN_DIRECTORY "/glusterdump.options");
+ fp = fopen(dump_option_file, "r");
+ if (!fp) {
+ snprintf(dump_option_file, sizeof(dump_option_file),
+ DEFAULT_VAR_RUN_DIRECTORY "/glusterdump.%d.options", getpid());
+
+ fp = fopen(dump_option_file, "r");
- if (!fp) {
- //ENOENT, return success
- (void) gf_proc_dump_enable_all_options ();
- return 0;
- }
+ if (!fp) {
+ // ENOENT, return success
+ (void)gf_proc_dump_enable_all_options();
+ return 0;
}
+ }
- (void) gf_proc_dump_disable_all_options ();
+ (void)gf_proc_dump_disable_all_options();
- // swallow the errors if setting statedump file path is failed.
- ret = gf_proc_dump_set_path (dump_option_file);
+ // swallow the errors if setting statedump file path is failed.
+ (void)gf_proc_dump_set_path(dump_option_file);
- ret = fscanf (fp, "%s", buf);
+ ret = fscanf(fp, "%255s", buf);
- while (ret != EOF) {
- key = strtok_r (buf, "=", &saveptr);
- if (!key) {
- ret = fscanf (fp, "%s", buf);
- continue;
- }
-
- value = strtok_r (NULL, "=", &saveptr);
+ while (ret != EOF) {
+ key = strtok_r(buf, "=", &saveptr);
+ if (!key) {
+ ret = fscanf(fp, "%255s", buf);
+ continue;
+ }
- if (!value) {
- ret = fscanf (fp, "%s", buf);
- continue;
- }
+ value = strtok_r(NULL, "=", &saveptr);
- gf_proc_dump_parse_set_option (key, value);
+ if (!value) {
+ ret = fscanf(fp, "%255s", buf);
+ continue;
}
- if (is_gf_proc_dump_all_disabled ())
- (void) gf_proc_dump_enable_default_options ();
+ gf_proc_dump_parse_set_option(key, value);
+ }
- if (fp)
- fclose (fp);
+ if (is_gf_proc_dump_all_disabled())
+ (void)gf_proc_dump_enable_default_options();
- return 0;
+ if (fp)
+ fclose(fp);
+
+ return 0;
}
void
-gf_proc_dump_info (int signum)
-{
- int i = 0;
- int ret = -1;
- glusterfs_ctx_t *ctx = NULL;
- glusterfs_graph_t *trav = NULL;
- char brick_name[PATH_MAX] = {0,};
- struct timeval tv = {0,};
- char timestr[256] = {0,};
- char sign_string[512] = {0,};
- char tmp_dump_name[] = "/tmp/dumpXXXXXX";
- char path[PATH_MAX] = {0,};
-
- gf_proc_dump_lock ();
-
- ctx = glusterfs_ctx_get ();
- if (!ctx)
- goto out;
-
- if (ctx->cmd_args.brick_name) {
- GF_REMOVE_SLASH_FROM_PATH (ctx->cmd_args.brick_name, brick_name);
- } else
- strncpy (brick_name, "glusterdump", sizeof (brick_name));
-
- ret = gf_proc_dump_options_init ();
- if (ret < 0)
- goto out;
-
- snprintf (path, sizeof (path), "%s/%s.%d.dump.%"PRIu64,
- ((dump_options.dump_path != NULL)?dump_options.dump_path:
- ((ctx->statedump_path != NULL)?ctx->statedump_path:"/tmp")),
- brick_name, getpid(), (uint64_t) time (NULL));
-
- ret = gf_proc_dump_open (tmp_dump_name);
- if (ret < 0)
- goto out;
-
- //continue even though gettimeofday() has failed
- ret = gettimeofday (&tv, NULL);
- if (0 == ret) {
- strftime (timestr, sizeof timestr, "%Y-%m-%d %H:%M:%S",
- localtime (&tv.tv_sec));
- snprintf (timestr + strlen (timestr),
- sizeof timestr - strlen (timestr),
- ".%"GF_PRI_SUSECONDS, tv.tv_usec);
+gf_proc_dump_info(int signum, glusterfs_ctx_t *ctx)
+{
+ int i = 0;
+ int ret = -1;
+ glusterfs_graph_t *trav = NULL;
+ char brick_name[PATH_MAX] = {
+ 0,
+ };
+ char timestr[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+ char sign_string[512] = {
+ 0,
+ };
+ char tmp_dump_name[PATH_MAX] = {
+ 0,
+ };
+ char path[PATH_MAX] = {
+ 0,
+ };
+ struct timeval tv = {
+ 0,
+ };
+ gf_boolean_t is_brick_mux = _gf_false;
+ xlator_t *top = NULL;
+ xlator_list_t **trav_p = NULL;
+ int brick_count = 0;
+ int len = 0;
+
+ gf_msg_trace("dump", 0, "received statedump request (sig:USR1)");
+
+ if (!ctx)
+ goto out;
+
+ /*
+ * Multiplexed daemons can change the active graph when attach/detach
+ * is called. So this has to be protected with the cleanup lock.
+ */
+ if (mgmt_is_multiplexed_daemon(ctx->cmd_args.process_name))
+ pthread_mutex_lock(&ctx->cleanup_lock);
+ gf_proc_dump_lock();
+
+ if (!mgmt_is_multiplexed_daemon(ctx->cmd_args.process_name) &&
+ (ctx && ctx->active)) {
+ top = ctx->active->first;
+ for (trav_p = &top->children; *trav_p; trav_p = &(*trav_p)->next) {
+ brick_count++;
}
- snprintf (sign_string, sizeof (sign_string), "DUMP-START-TIME: %s\n",
- timestr);
+ if (brick_count > 1)
+ is_brick_mux = _gf_true;
+ }
+
+ if (ctx->cmd_args.brick_name) {
+ GF_REMOVE_SLASH_FROM_PATH(ctx->cmd_args.brick_name, brick_name);
+ } else
+ snprintf(brick_name, sizeof(brick_name), "glusterdump");
+
+ ret = gf_proc_dump_options_init();
+ if (ret < 0)
+ goto out;
+
+ ret = snprintf(
+ path, sizeof(path), "%s/%s.%d.dump.%" PRIu64,
+ ((dump_options.dump_path != NULL)
+ ? dump_options.dump_path
+ : ((ctx->statedump_path != NULL) ? ctx->statedump_path
+ : DEFAULT_VAR_RUN_DIRECTORY)),
+ brick_name, getpid(), (uint64_t)gf_time());
+ if ((ret < 0) || (ret >= sizeof(path))) {
+ goto out;
+ }
+
+ snprintf(
+ tmp_dump_name, PATH_MAX, "%s/dumpXXXXXX",
+ ((dump_options.dump_path != NULL)
+ ? dump_options.dump_path
+ : ((ctx->statedump_path != NULL) ? ctx->statedump_path
+ : DEFAULT_VAR_RUN_DIRECTORY)));
+
+ ret = gf_proc_dump_open(tmp_dump_name);
+ if (ret < 0)
+ goto out;
+
+ // continue even though gettimeofday() has failed
+ ret = gettimeofday(&tv, NULL);
+ if (0 == ret) {
+ gf_time_fmt_tv(timestr, sizeof timestr, &tv, gf_timefmt_FT);
+ }
+
+ len = snprintf(sign_string, sizeof(sign_string), "DUMP-START-TIME: %s\n",
+ timestr);
+
+ // swallow the errors of write for start and end marker
+ (void)sys_write(gf_dump_fd, sign_string, len);
+
+ memset(timestr, 0, sizeof(timestr));
+
+ if (GF_PROC_DUMP_IS_OPTION_ENABLED(mem)) {
+ gf_proc_dump_mem_info();
+ gf_proc_dump_mempool_info(ctx);
+ }
+
+ if (GF_PROC_DUMP_IS_OPTION_ENABLED(iobuf))
+ iobuf_stats_dump(ctx->iobuf_pool);
+ if (GF_PROC_DUMP_IS_OPTION_ENABLED(callpool))
+ gf_proc_dump_pending_frames(ctx->pool);
+
+ /* dictionary stats */
+ gf_proc_dump_add_section("dict");
+ gf_proc_dump_dict_info(ctx);
+
+ if (ctx->master) {
+ gf_proc_dump_add_section("fuse");
+ gf_proc_dump_single_xlator_info(ctx->master);
+ }
+
+ if (ctx->active) {
+ gf_proc_dump_add_section("active graph - %d", ctx->graph_id);
+ gf_proc_dump_xlator_info(ctx->active->top, is_brick_mux);
+ }
+
+ i = 0;
+ list_for_each_entry(trav, &ctx->graphs, list)
+ {
+ if (trav == ctx->active)
+ continue;
+
+ gf_proc_dump_add_section("oldgraph[%d]", i);
+
+ gf_proc_dump_oldgraph_xlator_info(trav->top);
+ i++;
+ }
+
+ ret = gettimeofday(&tv, NULL);
+ if (0 == ret) {
+ gf_time_fmt_tv(timestr, sizeof timestr, &tv, gf_timefmt_FT);
+ }
+
+ len = snprintf(sign_string, sizeof(sign_string), "\nDUMP-END-TIME: %s",
+ timestr);
+ (void)sys_write(gf_dump_fd, sign_string, len);
+
+ if (gf_dump_fd != -1)
+ gf_proc_dump_close();
+ sys_rename(tmp_dump_name, path);
+out:
+ GF_FREE(dump_options.dump_path);
+ dump_options.dump_path = NULL;
+ if (ctx) {
+ gf_proc_dump_unlock();
+ if (mgmt_is_multiplexed_daemon(ctx->cmd_args.process_name))
+ pthread_mutex_unlock(&ctx->cleanup_lock);
+ }
+
+ return;
+}
- //swallow the errors of write for start and end marker
- ret = write (gf_dump_fd, sign_string, strlen (sign_string));
+void
+gf_proc_dump_fini(void)
+{
+ pthread_mutex_destroy(&gf_proc_dump_mutex);
+}
- memset (sign_string, 0, sizeof (sign_string));
- memset (timestr, 0, sizeof (timestr));
- memset (&tv, 0, sizeof (tv));
+void
+gf_proc_dump_init()
+{
+ pthread_mutex_init(&gf_proc_dump_mutex, NULL);
- if (GF_PROC_DUMP_IS_OPTION_ENABLED (mem)) {
- gf_proc_dump_mem_info ();
- gf_proc_dump_mempool_info (ctx);
- }
+ return;
+}
- if (GF_PROC_DUMP_IS_OPTION_ENABLED (iobuf))
- iobuf_stats_dump (ctx->iobuf_pool);
- if (GF_PROC_DUMP_IS_OPTION_ENABLED (callpool))
- gf_proc_dump_pending_frames (ctx->pool);
+void
+gf_proc_dump_cleanup(void)
+{
+ pthread_mutex_destroy(&gf_proc_dump_mutex);
+}
- if (ctx->master) {
- gf_proc_dump_add_section ("fuse");
- gf_proc_dump_xlator_info (ctx->master);
- }
+void
+gf_proc_dump_xlator_private(xlator_t *this, strfd_t *strfd)
+{
+ gf_proc_dump_lock();
+ {
+ gf_dump_strfd = strfd;
- if (ctx->active) {
- gf_proc_dump_add_section ("active graph - %d", ctx->graph_id);
- gf_proc_dump_xlator_info (ctx->active->top);
- }
+ if (this->dumpops && this->dumpops->priv)
+ this->dumpops->priv(this);
- i = 0;
- list_for_each_entry (trav, &ctx->graphs, list) {
- if (trav == ctx->active)
- continue;
+ gf_dump_strfd = NULL;
+ }
+ gf_proc_dump_unlock();
+}
- gf_proc_dump_add_section ("oldgraph[%d]", i);
+void
+gf_proc_dump_mallinfo(strfd_t *strfd)
+{
+ gf_proc_dump_lock();
+ {
+ gf_dump_strfd = strfd;
- gf_proc_dump_oldgraph_xlator_info (trav->top);
- i++;
- }
+ gf_proc_dump_mem_info();
- ret = gettimeofday (&tv, NULL);
- if (0 == ret) {
- strftime (timestr, sizeof timestr, "%Y-%m-%d %H:%M:%s",
- localtime (&tv.tv_sec));
- snprintf (timestr + strlen (timestr),
- sizeof timestr - strlen (timestr),
- ".%"GF_PRI_SUSECONDS, tv.tv_usec);
- }
+ gf_dump_strfd = NULL;
+ }
+ gf_proc_dump_unlock();
+}
- snprintf (sign_string, sizeof (sign_string), "\nDUMP-END-TIME: %s",
- timestr);
- ret = write (gf_dump_fd, sign_string, strlen (sign_string));
+void
+gf_proc_dump_xlator_history(xlator_t *this, strfd_t *strfd)
+{
+ gf_proc_dump_lock();
+ {
+ gf_dump_strfd = strfd;
-out:
- if (gf_dump_fd != -1)
- gf_proc_dump_close ();
- rename (tmp_dump_name, path);
- GF_FREE (dump_options.dump_path);
- dump_options.dump_path = NULL;
- gf_proc_dump_unlock ();
+ if (this->dumpops && this->dumpops->history)
+ this->dumpops->history(this);
- return;
+ gf_dump_strfd = NULL;
+ }
+ gf_proc_dump_unlock();
}
-
void
-gf_proc_dump_fini (void)
+gf_proc_dump_xlator_itable(xlator_t *this, strfd_t *strfd)
{
- pthread_mutex_destroy (&gf_proc_dump_mutex);
-}
+ gf_proc_dump_lock();
+ {
+ gf_dump_strfd = strfd;
+ gf_dump_strfd = NULL;
+ }
+ gf_proc_dump_unlock();
+}
void
-gf_proc_dump_init ()
+gf_proc_dump_xlator_meminfo(xlator_t *this, strfd_t *strfd)
{
- pthread_mutex_init (&gf_proc_dump_mutex, NULL);
+ gf_proc_dump_lock();
+ {
+ gf_dump_strfd = strfd;
- return;
-}
+ gf_proc_dump_xlator_mem_info(this);
+ gf_dump_strfd = NULL;
+ }
+ gf_proc_dump_unlock();
+}
void
-gf_proc_dump_cleanup (void)
+gf_proc_dump_xlator_profile(xlator_t *this, strfd_t *strfd)
{
- pthread_mutex_destroy (&gf_proc_dump_mutex);
+ gf_proc_dump_lock();
+ {
+ gf_dump_strfd = strfd;
+
+ gf_proc_dump_xl_latency_info(this);
+
+ gf_dump_strfd = NULL;
+ }
+ gf_proc_dump_unlock();
}
diff --git a/libglusterfs/src/statedump.h b/libglusterfs/src/statedump.h
deleted file mode 100644
index e8d304e66d4..00000000000
--- a/libglusterfs/src/statedump.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-
-#ifndef STATEDUMP_H
-#define STATEDUMP_H
-
-#include <stdarg.h>
-#include "inode.h"
-
-#define GF_DUMP_MAX_BUF_LEN 4096
-
-typedef struct gf_dump_xl_options_ {
- gf_boolean_t dump_priv;
- gf_boolean_t dump_inode;
- gf_boolean_t dump_fd;
- gf_boolean_t dump_inodectx;
- gf_boolean_t dump_fdctx;
- gf_boolean_t dump_history;
-} gf_dump_xl_options_t;
-
-typedef struct gf_dump_options_ {
- gf_boolean_t dump_mem;
- gf_boolean_t dump_iobuf;
- gf_boolean_t dump_callpool;
- gf_dump_xl_options_t xl_options; //options for all xlators
- char *dump_path;
-} gf_dump_options_t;
-
-extern gf_dump_options_t dump_options;
-
-static inline
-void _gf_proc_dump_build_key (char *key, const char *prefix, char *fmt,...)
-{
- char buf[GF_DUMP_MAX_BUF_LEN];
- va_list ap;
-
- memset(buf, 0, sizeof(buf));
- va_start(ap, fmt);
- vsnprintf(buf, GF_DUMP_MAX_BUF_LEN, fmt, ap);
- va_end(ap);
- snprintf(key, GF_DUMP_MAX_BUF_LEN, "%s.%s", prefix, buf);
-}
-
-#define gf_proc_dump_build_key(key, key_prefix, fmt...) \
- { \
- _gf_proc_dump_build_key(key, key_prefix, ##fmt); \
- }
-
-#define GF_PROC_DUMP_SET_OPTION(opt,val) opt = val
-
-#define GF_CHECK_DUMP_OPTION_ENABLED(option_dump, var, label) \
- do { \
- if (option_dump == _gf_true) { \
- var = _gf_false; \
- goto label; \
- } \
- } while (0);
-
-void gf_proc_dump_init();
-
-void gf_proc_dump_fini(void);
-
-void gf_proc_dump_cleanup(void);
-
-void gf_proc_dump_info(int signum);
-
-int gf_proc_dump_add_section(char *key,...);
-
-int gf_proc_dump_write(char *key, char *value,...);
-
-void inode_table_dump(inode_table_t *itable, char *prefix);
-
-void inode_table_dump_to_dict (inode_table_t *itable, char *prefix, dict_t *dict);
-
-void fdtable_dump(fdtable_t *fdtable, char *prefix);
-
-void fdtable_dump_to_dict (fdtable_t *fdtable, char *prefix, dict_t *dict);
-
-void inode_dump(inode_t *inode, char *prefix);
-
-void gf_proc_dump_mem_info_to_dict (dict_t *dict);
-
-void gf_proc_dump_mempool_info_to_dict (glusterfs_ctx_t *ctx, dict_t *dict);
-
-void glusterd_init (int sig);
-#endif /* STATEDUMP_H */
diff --git a/libglusterfs/src/store.c b/libglusterfs/src/store.c
new file mode 100644
index 00000000000..5c316b9291a
--- /dev/null
+++ b/libglusterfs/src/store.c
@@ -0,0 +1,744 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <inttypes.h>
+#include <libgen.h>
+
+#include "glusterfs/glusterfs.h"
+#include "glusterfs/store.h"
+#include "glusterfs/xlator.h"
+#include "glusterfs/syscall.h"
+#include "glusterfs/libglusterfs-messages.h"
+
+int32_t
+gf_store_mkdir(char *path)
+{
+ int32_t ret = -1;
+
+ ret = mkdir_p(path, 0755, _gf_true);
+
+ if ((-1 == ret) && (EEXIST != errno)) {
+ gf_msg("", GF_LOG_ERROR, errno, LG_MSG_DIR_OP_FAILED,
+ "mkdir()"
+ " failed on path %s.",
+ path);
+ } else {
+ ret = 0;
+ }
+
+ return ret;
+}
+
+int32_t
+gf_store_handle_create_on_absence(gf_store_handle_t **shandle, char *path)
+{
+ GF_ASSERT(shandle);
+ int32_t ret = 0;
+
+ if (*shandle == NULL) {
+ ret = gf_store_handle_new(path, shandle);
+
+ if (ret) {
+ gf_msg("", GF_LOG_ERROR, 0, LG_MSG_STORE_HANDLE_CREATE_FAILED,
+ "Unable to"
+ " create store handle for path: %s",
+ path);
+ }
+ }
+ return ret;
+}
+
+int32_t
+gf_store_mkstemp(gf_store_handle_t *shandle)
+{
+ char tmppath[PATH_MAX] = {
+ 0,
+ };
+
+ GF_VALIDATE_OR_GOTO("store", shandle, out);
+ GF_VALIDATE_OR_GOTO("store", shandle->path, out);
+
+ snprintf(tmppath, sizeof(tmppath), "%s.tmp", shandle->path);
+ shandle->tmp_fd = open(tmppath, O_RDWR | O_CREAT | O_TRUNC, 0600);
+ if (shandle->tmp_fd < 0) {
+ gf_msg("", GF_LOG_ERROR, errno, LG_MSG_FILE_OP_FAILED,
+ "Failed to open %s.", tmppath);
+ }
+out:
+ return shandle->tmp_fd;
+}
+
+int
+gf_store_sync_direntry(char *path)
+{
+ int ret = -1;
+ int dirfd = -1;
+ char *dir = NULL;
+ char *pdir = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ dir = gf_strdup(path);
+ if (!dir)
+ goto out;
+
+ pdir = dirname(dir);
+ dirfd = open(pdir, O_RDONLY);
+ if (dirfd == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, LG_MSG_DIR_OP_FAILED,
+ "Failed to open directory %s.", pdir);
+ goto out;
+ }
+
+ ret = sys_fsync(dirfd);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, LG_MSG_DIR_OP_FAILED,
+ "Failed to fsync %s.", pdir);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (dirfd >= 0) {
+ ret = sys_close(dirfd);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, LG_MSG_DIR_OP_FAILED,
+ "Failed to close %s", pdir);
+ }
+ }
+
+ if (dir)
+ GF_FREE(dir);
+
+ return ret;
+}
+
+int32_t
+gf_store_rename_tmppath(gf_store_handle_t *shandle)
+{
+ int32_t ret = -1;
+ char tmppath[PATH_MAX] = {
+ 0,
+ };
+
+ GF_VALIDATE_OR_GOTO("store", shandle, out);
+ GF_VALIDATE_OR_GOTO("store", shandle->path, out);
+
+ ret = sys_fsync(shandle->tmp_fd);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, errno, LG_MSG_FILE_OP_FAILED,
+ "Failed to fsync %s", shandle->path);
+ goto out;
+ }
+ snprintf(tmppath, sizeof(tmppath), "%s.tmp", shandle->path);
+ ret = sys_rename(tmppath, shandle->path);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, errno, LG_MSG_FILE_OP_FAILED,
+ "Failed to rename %s to %s", tmppath, shandle->path);
+ goto out;
+ }
+
+ ret = gf_store_sync_direntry(tmppath);
+out:
+ if (shandle && shandle->tmp_fd >= 0) {
+ sys_close(shandle->tmp_fd);
+ shandle->tmp_fd = -1;
+ }
+ return ret;
+}
+
+int32_t
+gf_store_unlink_tmppath(gf_store_handle_t *shandle)
+{
+ int32_t ret = -1;
+ char tmppath[PATH_MAX] = {
+ 0,
+ };
+
+ GF_VALIDATE_OR_GOTO("store", shandle, out);
+ GF_VALIDATE_OR_GOTO("store", shandle->path, out);
+
+ snprintf(tmppath, sizeof(tmppath), "%s.tmp", shandle->path);
+ ret = sys_unlink(tmppath);
+ if (ret && (errno != ENOENT)) {
+ gf_msg("", GF_LOG_ERROR, errno, LG_MSG_FILE_OP_FAILED,
+ "Failed to mv %s to %s", tmppath, shandle->path);
+ } else {
+ ret = 0;
+ }
+out:
+ if (shandle && shandle->tmp_fd >= 0) {
+ sys_close(shandle->tmp_fd);
+ shandle->tmp_fd = -1;
+ }
+ return ret;
+}
+
+int
+gf_store_read_and_tokenize(FILE *file, char **iter_key, char **iter_val,
+ gf_store_op_errno_t *store_errno)
+{
+ int32_t ret = -1;
+ char *savetok = NULL;
+ char *key = NULL;
+ char *value = NULL;
+ char *temp = NULL;
+ size_t str_len = 0;
+ char str[8192];
+
+ GF_ASSERT(file);
+ GF_ASSERT(iter_key);
+ GF_ASSERT(iter_val);
+ GF_ASSERT(store_errno);
+
+retry:
+ temp = fgets(str, 8192, file);
+ if (temp == NULL || feof(file)) {
+ ret = -1;
+ *store_errno = GD_STORE_EOF;
+ goto out;
+ }
+
+ if (strcmp(str, "\n") == 0)
+ goto retry;
+
+ str_len = strlen(str);
+ str[str_len - 1] = '\0';
+ /* Truncate the "\n", as fgets stores "\n" in str */
+
+ key = strtok_r(str, "=", &savetok);
+ if (!key) {
+ ret = -1;
+ *store_errno = GD_STORE_KEY_NULL;
+ goto out;
+ }
+
+ value = strtok_r(NULL, "", &savetok);
+ if (!value) {
+ ret = -1;
+ *store_errno = GD_STORE_VALUE_NULL;
+ goto out;
+ }
+
+ *iter_key = key;
+ *iter_val = value;
+ *store_errno = GD_STORE_SUCCESS;
+ ret = 0;
+out:
+ return ret;
+}
+
+int32_t
+gf_store_retrieve_value(gf_store_handle_t *handle, char *key, char **value)
+{
+ int32_t ret = -1;
+ char *iter_key = NULL;
+ char *iter_val = NULL;
+ gf_store_op_errno_t store_errno = GD_STORE_SUCCESS;
+
+ GF_ASSERT(handle);
+
+ if (handle->locked == F_ULOCK)
+ /* no locking is used handle->fd gets closed() after usage */
+ handle->fd = open(handle->path, O_RDWR);
+ else
+ /* handle->fd is valid already, kept open for lockf() */
+ sys_lseek(handle->fd, 0, SEEK_SET);
+
+ if (handle->fd == -1) {
+ gf_msg("", GF_LOG_ERROR, errno, LG_MSG_FILE_OP_FAILED,
+ "Unable to open file %s", handle->path);
+ goto out;
+ }
+ if (!handle->read) {
+ int duped_fd = dup(handle->fd);
+
+ if (duped_fd >= 0)
+ handle->read = fdopen(duped_fd, "r");
+ if (!handle->read) {
+ if (duped_fd != -1)
+ sys_close(duped_fd);
+ gf_msg("", GF_LOG_ERROR, errno, LG_MSG_FILE_OP_FAILED,
+ "Unable to open file %s", handle->path);
+ goto out;
+ }
+ } else {
+ fseek(handle->read, 0, SEEK_SET);
+ }
+ do {
+ ret = gf_store_read_and_tokenize(handle->read, &iter_key, &iter_val,
+ &store_errno);
+ if (ret < 0) {
+ gf_msg_trace("", 0,
+ "error while reading key '%s': "
+ "%s",
+ key, gf_store_strerror(store_errno));
+ goto out;
+ }
+
+ gf_msg_trace("", 0, "key %s read", iter_key);
+
+ if (!strcmp(key, iter_key)) {
+ gf_msg_debug("", 0, "key %s found", key);
+ ret = 0;
+ if (iter_val)
+ *value = gf_strdup(iter_val);
+ goto out;
+ }
+ } while (1);
+out:
+ if (handle->read) {
+ fclose(handle->read);
+ handle->read = NULL;
+ }
+
+ if (handle->fd > 0 && handle->locked == F_ULOCK) {
+ /* only invalidate handle->fd if not locked */
+ sys_close(handle->fd);
+ }
+
+ return ret;
+}
+
+int32_t
+gf_store_save_value(int fd, char *key, char *value)
+{
+ int32_t ret = -1;
+ int dup_fd = -1;
+ FILE *fp = NULL;
+
+ GF_ASSERT(fd > 0);
+ GF_ASSERT(key);
+ GF_ASSERT(value);
+
+ dup_fd = dup(fd);
+ if (dup_fd == -1)
+ goto out;
+
+ fp = fdopen(dup_fd, "a+");
+ if (fp == NULL) {
+ gf_msg(THIS->name, GF_LOG_WARNING, errno, LG_MSG_FILE_OP_FAILED,
+ "fdopen failed.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = fprintf(fp, "%s=%s\n", key, value);
+ if (ret < 0) {
+ gf_msg(THIS->name, GF_LOG_WARNING, errno, LG_MSG_FILE_OP_FAILED,
+ "Unable to store key: %s, value: %s.", key, value);
+ ret = -1;
+ goto out;
+ }
+
+ ret = fflush(fp);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_WARNING, errno, LG_MSG_FILE_OP_FAILED,
+ "fflush failed.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (fp)
+ fclose(fp);
+
+ gf_msg_debug(THIS->name, 0, "returning: %d", ret);
+ return ret;
+}
+
+int32_t
+gf_store_save_items(int fd, char *items)
+{
+ int32_t ret = -1;
+ int dup_fd = -1;
+ FILE *fp = NULL;
+
+ GF_ASSERT(fd > 0);
+ GF_ASSERT(items);
+
+ dup_fd = dup(fd);
+ if (dup_fd == -1)
+ goto out;
+
+ fp = fdopen(dup_fd, "a+");
+ if (fp == NULL) {
+ gf_msg(THIS->name, GF_LOG_WARNING, errno, LG_MSG_FILE_OP_FAILED,
+ "fdopen failed.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = fputs(items, fp);
+ if (ret < 0) {
+ gf_msg(THIS->name, GF_LOG_WARNING, errno, LG_MSG_FILE_OP_FAILED,
+ "Unable to store items: %s", items);
+ ret = -1;
+ goto out;
+ }
+
+ ret = fflush(fp);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_WARNING, errno, LG_MSG_FILE_OP_FAILED,
+ "fflush failed.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (fp)
+ fclose(fp);
+
+ gf_msg_debug(THIS->name, 0, "returning: %d", ret);
+ return ret;
+}
+
+int32_t
+gf_store_handle_new(const char *path, gf_store_handle_t **handle)
+{
+ int32_t ret = -1;
+ gf_store_handle_t *shandle = NULL;
+ int fd = -1;
+ char *spath = NULL;
+
+ shandle = GF_CALLOC(1, sizeof(*shandle), gf_common_mt_store_handle_t);
+ if (!shandle)
+ goto out;
+
+ spath = gf_strdup(path);
+ if (!spath)
+ goto out;
+
+ fd = open(path, O_RDWR | O_CREAT | O_APPEND, 0600);
+ if (fd < 0) {
+ gf_msg("", GF_LOG_ERROR, errno, LG_MSG_FILE_OP_FAILED,
+ "Failed to open file: %s.", path);
+ goto out;
+ }
+
+ ret = gf_store_sync_direntry(spath);
+ if (ret)
+ goto out;
+
+ shandle->path = spath;
+ shandle->locked = F_ULOCK;
+ *handle = shandle;
+ shandle->tmp_fd = -1;
+
+ ret = 0;
+out:
+ if (fd >= 0)
+ sys_close(fd);
+
+ if (ret) {
+ GF_FREE(spath);
+ GF_FREE(shandle);
+ }
+
+ gf_msg_debug("", 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+gf_store_handle_retrieve(char *path, gf_store_handle_t **handle)
+{
+ int32_t ret = -1;
+ struct stat statbuf = {0};
+
+ ret = sys_stat(path, &statbuf);
+ if (ret) {
+ gf_msg("", GF_LOG_ERROR, errno, LG_MSG_PATH_NOT_FOUND,
+ "Path "
+ "corresponding to %s.",
+ path);
+ goto out;
+ }
+ ret = gf_store_handle_new(path, handle);
+out:
+ gf_msg_debug("", 0, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+gf_store_handle_destroy(gf_store_handle_t *handle)
+{
+ int32_t ret = -1;
+
+ if (!handle) {
+ ret = 0;
+ goto out;
+ }
+
+ GF_FREE(handle->path);
+
+ GF_FREE(handle);
+
+ ret = 0;
+
+out:
+ gf_msg_debug("", 0, "Returning %d", ret);
+
+ return ret;
+}
+
+int32_t
+gf_store_iter_new(gf_store_handle_t *shandle, gf_store_iter_t **iter)
+{
+ int32_t ret = -1;
+ FILE *fp = NULL;
+ gf_store_iter_t *tmp_iter = NULL;
+
+ GF_ASSERT(shandle);
+ GF_ASSERT(iter);
+
+ fp = fopen(shandle->path, "r");
+ if (!fp) {
+ gf_msg("", GF_LOG_ERROR, errno, LG_MSG_FILE_OP_FAILED,
+ "Unable to open file %s", shandle->path);
+ goto out;
+ }
+
+ tmp_iter = GF_CALLOC(1, sizeof(*tmp_iter), gf_common_mt_store_iter_t);
+ if (!tmp_iter)
+ goto out;
+
+ if (snprintf(tmp_iter->filepath, sizeof(tmp_iter->filepath), "%s",
+ shandle->path) >= sizeof(tmp_iter->filepath))
+ goto out;
+
+ tmp_iter->file = fp;
+
+ *iter = tmp_iter;
+ tmp_iter = NULL;
+ ret = 0;
+
+out:
+ if (ret && fp)
+ fclose(fp);
+
+ GF_FREE(tmp_iter);
+
+ gf_msg_debug("", 0, "Returning with %d", ret);
+ return ret;
+}
+
+int32_t
+gf_store_validate_key_value(char *storepath, char *key, char *val,
+ gf_store_op_errno_t *op_errno)
+{
+ int ret = 0;
+
+ GF_ASSERT(op_errno);
+ GF_ASSERT(storepath);
+
+ if ((key == NULL) && (val == NULL)) {
+ ret = -1;
+ gf_msg("", GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY,
+ "Glusterd "
+ "store may be corrupted, Invalid key and value (null)"
+ " in %s",
+ storepath);
+ *op_errno = GD_STORE_KEY_VALUE_NULL;
+ } else if (key == NULL) {
+ ret = -1;
+ gf_msg("", GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY,
+ "Glusterd "
+ "store may be corrupted, Invalid key (null) in %s",
+ storepath);
+ *op_errno = GD_STORE_KEY_NULL;
+ } else if (val == NULL) {
+ ret = -1;
+ gf_msg("", GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY,
+ "Glusterd "
+ "store may be corrupted, Invalid value (null) for key"
+ " %s in %s",
+ key, storepath);
+ *op_errno = GD_STORE_VALUE_NULL;
+ } else {
+ ret = 0;
+ *op_errno = GD_STORE_SUCCESS;
+ }
+
+ return ret;
+}
+
+int32_t
+gf_store_iter_get_next(gf_store_iter_t *iter, char **key, char **value,
+ gf_store_op_errno_t *op_errno)
+{
+ int32_t ret = -1;
+ char *iter_key = NULL;
+ char *iter_val = NULL;
+ gf_store_op_errno_t store_errno = GD_STORE_SUCCESS;
+
+ GF_ASSERT(iter);
+ GF_ASSERT(key);
+ GF_ASSERT(value);
+
+ ret = gf_store_read_and_tokenize(iter->file, &iter_key, &iter_val,
+ &store_errno);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = gf_store_validate_key_value(iter->filepath, iter_key, iter_val,
+ &store_errno);
+ if (ret)
+ goto out;
+
+ *key = gf_strdup(iter_key);
+ if (!*key) {
+ ret = -1;
+ store_errno = GD_STORE_ENOMEM;
+ goto out;
+ }
+ *value = gf_strdup(iter_val);
+ if (!*value) {
+ ret = -1;
+ store_errno = GD_STORE_ENOMEM;
+ goto out;
+ }
+ ret = 0;
+
+out:
+ if (ret) {
+ GF_FREE(*key);
+ GF_FREE(*value);
+ *key = NULL;
+ *value = NULL;
+ }
+ if (op_errno)
+ *op_errno = store_errno;
+
+ gf_msg_debug("", 0, "Returning with %d", ret);
+ return ret;
+}
+
+int32_t
+gf_store_iter_get_matching(gf_store_iter_t *iter, char *key, char **value)
+{
+ int32_t ret = -1;
+ char *tmp_key = NULL;
+ char *tmp_value = NULL;
+
+ ret = gf_store_iter_get_next(iter, &tmp_key, &tmp_value, NULL);
+ while (!ret) {
+ if (!strncmp(key, tmp_key, strlen(key))) {
+ *value = tmp_value;
+ GF_FREE(tmp_key);
+ goto out;
+ }
+ GF_FREE(tmp_key);
+ tmp_key = NULL;
+ GF_FREE(tmp_value);
+ tmp_value = NULL;
+ ret = gf_store_iter_get_next(iter, &tmp_key, &tmp_value, NULL);
+ }
+out:
+ return ret;
+}
+
+int32_t
+gf_store_iter_destroy(gf_store_iter_t **iter)
+{
+ int32_t ret = -1;
+
+ if (!(*iter))
+ return 0;
+
+ /* gf_store_iter_new will not return a valid iter object with iter->file
+ * being NULL*/
+ ret = fclose((*iter)->file);
+ if (ret)
+ gf_msg("", GF_LOG_ERROR, errno, LG_MSG_FILE_OP_FAILED,
+ "Unable"
+ " to close file: %s, ret: %d",
+ (*iter)->filepath, ret);
+
+ GF_FREE(*iter);
+ *iter = NULL;
+
+ return ret;
+}
+
+char *
+gf_store_strerror(gf_store_op_errno_t op_errno)
+{
+ switch (op_errno) {
+ case GD_STORE_SUCCESS:
+ return "Success";
+ case GD_STORE_KEY_NULL:
+ return "Invalid Key";
+ case GD_STORE_VALUE_NULL:
+ return "Invalid Value";
+ case GD_STORE_KEY_VALUE_NULL:
+ return "Invalid Key and Value";
+ case GD_STORE_EOF:
+ return "No data";
+ case GD_STORE_ENOMEM:
+ return "No memory";
+ default:
+ return "Invalid errno";
+ }
+}
+
+int
+gf_store_lock(gf_store_handle_t *sh)
+{
+ int ret;
+
+ GF_ASSERT(sh);
+ GF_ASSERT(sh->path);
+ GF_ASSERT(sh->locked == F_ULOCK);
+
+ sh->fd = open(sh->path, O_RDWR);
+ if (sh->fd == -1) {
+ gf_msg("", GF_LOG_ERROR, errno, LG_MSG_FILE_OP_FAILED,
+ "Failed to open '%s'", sh->path);
+ return -1;
+ }
+
+ ret = lockf(sh->fd, F_LOCK, 0);
+ if (ret)
+ gf_msg("", GF_LOG_ERROR, errno, LG_MSG_LOCK_FAILED,
+ "Failed to gain lock on '%s'", sh->path);
+ else
+ /* sh->locked is protected by the lockf(sh->fd) above */
+ sh->locked = F_LOCK;
+
+ return ret;
+}
+
+void
+gf_store_unlock(gf_store_handle_t *sh)
+{
+ GF_ASSERT(sh);
+ GF_ASSERT(sh->locked == F_LOCK);
+
+ sh->locked = F_ULOCK;
+
+ /* does not matter if this fails, locks are released on close anyway */
+ if (lockf(sh->fd, F_ULOCK, 0) == -1)
+ gf_msg("", GF_LOG_ERROR, errno, LG_MSG_UNLOCK_FAILED,
+ "Failed to release lock on '%s'", sh->path);
+
+ sys_close(sh->fd);
+}
+
+int
+gf_store_locked_local(gf_store_handle_t *sh)
+{
+ GF_ASSERT(sh);
+
+ return (sh->locked == F_LOCK);
+}
diff --git a/libglusterfs/src/strfd.c b/libglusterfs/src/strfd.c
new file mode 100644
index 00000000000..8a2580edc85
--- /dev/null
+++ b/libglusterfs/src/strfd.c
@@ -0,0 +1,93 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <stdarg.h>
+
+#include "glusterfs/mem-types.h"
+#include "glusterfs/mem-pool.h"
+#include "glusterfs/strfd.h"
+#include "glusterfs/common-utils.h"
+
+strfd_t *
+strfd_open()
+{
+ strfd_t *strfd = NULL;
+
+ strfd = GF_CALLOC(1, sizeof(*strfd), gf_common_mt_strfd_t);
+
+ return strfd;
+}
+
+int
+strvprintf(strfd_t *strfd, const char *fmt, va_list ap)
+{
+ char *str = NULL;
+ int size = 0;
+
+ size = vasprintf(&str, fmt, ap);
+
+ if (size < 0)
+ return size;
+
+ if (!strfd->alloc_size) {
+ strfd->data = GF_CALLOC(max(size + 1, 4096), 1,
+ gf_common_mt_strfd_data_t);
+ if (!strfd->data) {
+ free(str); /* NOT GF_FREE */
+ return -1;
+ }
+ strfd->alloc_size = max(size + 1, 4096);
+ }
+
+ if (strfd->alloc_size <= (strfd->size + size)) {
+ char *tmp_ptr = NULL;
+ int new_size = max(
+ (strfd->alloc_size * 2),
+ gf_roundup_next_power_of_two(strfd->size + size + 1));
+ tmp_ptr = GF_REALLOC(strfd->data, new_size);
+ if (!tmp_ptr) {
+ free(str); /* NOT GF_FREE */
+ return -1;
+ }
+ strfd->alloc_size = new_size;
+ strfd->data = tmp_ptr;
+ }
+
+ /* Copy the trailing '\0', but do not account for it in ->size.
+ This allows safe use of strfd->data as a string. */
+ memcpy(strfd->data + strfd->size, str, size + 1);
+ strfd->size += size;
+
+ free(str); /* NOT GF_FREE */
+
+ return size;
+}
+
+int
+strprintf(strfd_t *strfd, const char *fmt, ...)
+{
+ int ret = 0;
+ va_list ap;
+
+ va_start(ap, fmt);
+ ret = strvprintf(strfd, fmt, ap);
+ va_end(ap);
+
+ return ret;
+}
+
+int
+strfd_close(strfd_t *strfd)
+{
+ GF_FREE(strfd->data);
+ GF_FREE(strfd);
+
+ return 0;
+}
diff --git a/libglusterfs/src/syncop-utils.c b/libglusterfs/src/syncop-utils.c
new file mode 100644
index 00000000000..d9f1723856d
--- /dev/null
+++ b/libglusterfs/src/syncop-utils.c
@@ -0,0 +1,669 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "glusterfs/syncop.h"
+#include "glusterfs/syncop-utils.h"
+#include "glusterfs/common-utils.h"
+#include "glusterfs/libglusterfs-messages.h"
+
+struct syncop_dir_scan_data {
+ xlator_t *subvol;
+ loc_t *parent;
+ void *data;
+ gf_dirent_t *q;
+ gf_dirent_t *entry;
+ pthread_cond_t *cond;
+ pthread_mutex_t *mut;
+ syncop_dir_scan_fn_t fn;
+ uint32_t *jobs_running;
+ uint32_t *qlen;
+ int32_t *retval;
+};
+
+int
+syncop_dirfd(xlator_t *subvol, loc_t *loc, fd_t **fd, int pid)
+{
+ int ret = 0;
+ fd_t *dirfd = NULL;
+
+ if (!fd)
+ return -EINVAL;
+
+ dirfd = fd_create(loc->inode, pid);
+ if (!dirfd) {
+ gf_msg(subvol->name, GF_LOG_ERROR, errno, LG_MSG_FD_CREATE_FAILED,
+ "fd_create of %s", uuid_utoa(loc->gfid));
+ ret = -errno;
+ goto out;
+ }
+
+ ret = syncop_opendir(subvol, loc, dirfd, NULL, NULL);
+ if (ret) {
+ /*
+ * On Linux, if the brick was not updated, opendir will
+ * fail. We therefore use backward compatible code
+ * that violate the standards by reusing offsets
+ * in seekdir() from different DIR *, but it works on Linux.
+ *
+ * On other systems it never worked, hence we do not need
+ * to provide backward-compatibility.
+ */
+#ifdef GF_LINUX_HOST_OS
+ fd_unref(dirfd);
+ dirfd = fd_anonymous(loc->inode);
+ if (!dirfd) {
+ gf_msg(subvol->name, GF_LOG_ERROR, errno,
+ LG_MSG_FD_ANONYMOUS_FAILED,
+ "fd_anonymous of "
+ "%s",
+ uuid_utoa(loc->gfid));
+ ret = -errno;
+ goto out;
+ }
+ ret = 0;
+#else /* GF_LINUX_HOST_OS */
+ fd_unref(dirfd);
+ gf_msg(subvol->name, GF_LOG_ERROR, errno, LG_MSG_DIR_OP_FAILED,
+ "opendir of %s", uuid_utoa(loc->gfid));
+ goto out;
+#endif /* GF_LINUX_HOST_OS */
+ } else {
+ fd_bind(dirfd);
+ }
+out:
+ if (ret == 0)
+ *fd = dirfd;
+ return ret;
+}
+
+int
+syncop_ftw(xlator_t *subvol, loc_t *loc, int pid, void *data,
+ int (*fn)(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
+ void *data))
+{
+ loc_t child_loc = {
+ 0,
+ };
+ fd_t *fd = NULL;
+ uint64_t offset = 0;
+ gf_dirent_t *entry = NULL;
+ int ret = 0;
+ gf_dirent_t entries;
+
+ ret = syncop_dirfd(subvol, loc, &fd, pid);
+ if (ret)
+ goto out;
+
+ INIT_LIST_HEAD(&entries.list);
+
+ while ((ret = syncop_readdirp(subvol, fd, 131072, offset, &entries, NULL,
+ NULL))) {
+ if (ret < 0)
+ break;
+
+ if (ret > 0) {
+ /* If the entries are only '.', and '..' then ret
+ * value will be non-zero. so set it to zero here. */
+ ret = 0;
+ }
+ list_for_each_entry(entry, &entries.list, list)
+ {
+ offset = entry->d_off;
+
+ if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, ".."))
+ continue;
+
+ gf_link_inode_from_dirent(NULL, fd->inode, entry);
+
+ ret = fn(subvol, entry, loc, data);
+ if (ret)
+ break;
+
+ if (entry->d_stat.ia_type == IA_IFDIR) {
+ child_loc.inode = inode_ref(entry->inode);
+ gf_uuid_copy(child_loc.gfid, entry->inode->gfid);
+ ret = syncop_ftw(subvol, &child_loc, pid, data, fn);
+ loc_wipe(&child_loc);
+ if (ret)
+ break;
+ }
+ }
+
+ gf_dirent_free(&entries);
+ if (ret)
+ break;
+ }
+
+out:
+ if (fd)
+ fd_unref(fd);
+ return ret;
+}
+
+/**
+ * Syncop_ftw_throttle can be used in a configurable way to control
+ * the speed at which crawling is done. It takes 2 more arguments
+ * compared to syncop_ftw.
+ * After @count entries are finished in a directory (to be
+ * precise, @count files) sleep for @sleep_time seconds.
+ * If either @count or @sleep_time is <=0, then it behaves similar to
+ * syncop_ftw.
+ */
+int
+syncop_ftw_throttle(xlator_t *subvol, loc_t *loc, int pid, void *data,
+ int (*fn)(xlator_t *subvol, gf_dirent_t *entry,
+ loc_t *parent, void *data),
+ int count, int sleep_time)
+{
+ loc_t child_loc = {
+ 0,
+ };
+ fd_t *fd = NULL;
+ uint64_t offset = 0;
+ gf_dirent_t *entry = NULL;
+ int ret = 0;
+ gf_dirent_t entries;
+ int tmp = 0;
+
+ if (sleep_time <= 0) {
+ ret = syncop_ftw(subvol, loc, pid, data, fn);
+ goto out;
+ }
+
+ ret = syncop_dirfd(subvol, loc, &fd, pid);
+ if (ret)
+ goto out;
+
+ INIT_LIST_HEAD(&entries.list);
+
+ while ((ret = syncop_readdirp(subvol, fd, 131072, offset, &entries, NULL,
+ NULL))) {
+ if (ret < 0)
+ break;
+
+ if (ret > 0) {
+ /* If the entries are only '.', and '..' then ret
+ * value will be non-zero. so set it to zero here. */
+ ret = 0;
+ }
+
+ tmp = 0;
+
+ list_for_each_entry(entry, &entries.list, list)
+ {
+ offset = entry->d_off;
+
+ if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, ".."))
+ continue;
+
+ if (++tmp >= count) {
+ tmp = 0;
+ sleep(sleep_time);
+ }
+
+ gf_link_inode_from_dirent(NULL, fd->inode, entry);
+
+ ret = fn(subvol, entry, loc, data);
+ if (ret)
+ continue;
+
+ if (entry->d_stat.ia_type == IA_IFDIR) {
+ child_loc.inode = inode_ref(entry->inode);
+ gf_uuid_copy(child_loc.gfid, entry->inode->gfid);
+ ret = syncop_ftw_throttle(subvol, &child_loc, pid, data, fn,
+ count, sleep_time);
+ loc_wipe(&child_loc);
+ if (ret)
+ continue;
+ }
+ }
+
+ gf_dirent_free(&entries);
+ if (ret)
+ break;
+ }
+
+out:
+ if (fd)
+ fd_unref(fd);
+ return ret;
+}
+
+static void
+_scan_data_destroy(struct syncop_dir_scan_data *data)
+{
+ GF_FREE(data);
+}
+
+static int
+_dir_scan_job_fn_cbk(int ret, call_frame_t *frame, void *opaque)
+{
+ struct syncop_dir_scan_data *scan_data = opaque;
+
+ _scan_data_destroy(scan_data);
+ return 0;
+}
+
+static int
+_dir_scan_job_fn(void *data)
+{
+ struct syncop_dir_scan_data *scan_data = data;
+ gf_dirent_t *entry = NULL;
+ int ret = 0;
+
+ entry = scan_data->entry;
+ scan_data->entry = NULL;
+ do {
+ ret = scan_data->fn(scan_data->subvol, entry, scan_data->parent,
+ scan_data->data);
+ gf_dirent_entry_free(entry);
+ entry = NULL;
+ pthread_mutex_lock(scan_data->mut);
+ {
+ if (ret)
+ *scan_data->retval |= ret;
+ if (list_empty(&scan_data->q->list)) {
+ (*scan_data->jobs_running)--;
+ pthread_cond_broadcast(scan_data->cond);
+ } else {
+ entry = list_first_entry(&scan_data->q->list,
+ typeof(*scan_data->q), list);
+ list_del_init(&entry->list);
+ (*scan_data->qlen)--;
+ }
+ }
+ pthread_mutex_unlock(scan_data->mut);
+ } while (entry);
+
+ return ret;
+}
+
+static int
+_run_dir_scan_task(call_frame_t *frame, xlator_t *subvol, loc_t *parent,
+ gf_dirent_t *q, gf_dirent_t *entry, int *retval,
+ pthread_mutex_t *mut, pthread_cond_t *cond,
+ uint32_t *jobs_running, uint32_t *qlen,
+ syncop_dir_scan_fn_t fn, void *data)
+{
+ int ret = 0;
+ struct syncop_dir_scan_data *scan_data = NULL;
+
+ scan_data = GF_CALLOC(1, sizeof(struct syncop_dir_scan_data),
+ gf_common_mt_scan_data);
+ if (!scan_data) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ scan_data->subvol = subvol;
+ scan_data->parent = parent;
+ scan_data->data = data;
+ scan_data->mut = mut;
+ scan_data->cond = cond;
+ scan_data->fn = fn;
+ scan_data->jobs_running = jobs_running;
+ scan_data->entry = entry;
+ scan_data->q = q;
+ scan_data->qlen = qlen;
+ scan_data->retval = retval;
+
+ ret = synctask_new(subvol->ctx->env, _dir_scan_job_fn, _dir_scan_job_fn_cbk,
+ frame, scan_data);
+out:
+ if (ret < 0) {
+ gf_dirent_entry_free(entry);
+ _scan_data_destroy(scan_data);
+ pthread_mutex_lock(mut);
+ {
+ *jobs_running = *jobs_running - 1;
+ }
+ pthread_mutex_unlock(mut);
+ /*No need to cond-broadcast*/
+ }
+ return ret;
+}
+
+int
+syncop_mt_dir_scan(call_frame_t *frame, xlator_t *subvol, loc_t *loc, int pid,
+ void *data, syncop_dir_scan_fn_t fn, dict_t *xdata,
+ uint32_t max_jobs, uint32_t max_qlen)
+{
+ fd_t *fd = NULL;
+ uint64_t offset = 0;
+ gf_dirent_t *last = NULL;
+ int ret = 0;
+ int retval = 0;
+ gf_dirent_t q;
+ gf_dirent_t *entry = NULL;
+ gf_dirent_t *tmp = NULL;
+ uint32_t jobs_running = 0;
+ uint32_t qlen = 0;
+ pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
+ pthread_mutex_t mut = PTHREAD_MUTEX_INITIALIZER;
+ gf_dirent_t entries;
+ xlator_t *this = NULL;
+
+ if (frame) {
+ this = frame->this;
+ } else {
+ this = THIS;
+ }
+
+ /*For this functionality to be implemented in general, we need
+ * synccond_t infra which doesn't block the executing thread. Until then
+ * return failures inside synctask if they use this.*/
+ if (synctask_get())
+ return -ENOTSUP;
+
+ if (max_jobs == 0)
+ return -EINVAL;
+
+ /*Code becomes simpler this way. cond_wait just on qlength.
+ * Little bit of cheating*/
+ if (max_qlen == 0)
+ max_qlen = 1;
+
+ ret = syncop_dirfd(subvol, loc, &fd, pid);
+ if (ret)
+ goto out;
+
+ INIT_LIST_HEAD(&entries.list);
+ INIT_LIST_HEAD(&q.list);
+
+ while ((ret = syncop_readdir(subvol, fd, 131072, offset, &entries, xdata,
+ NULL))) {
+ if (ret < 0)
+ break;
+
+ if (ret > 0) {
+ /* If the entries are only '.', and '..' then ret
+ * value will be non-zero. so set it to zero here. */
+ ret = 0;
+ }
+
+ last = list_last_entry(&entries.list, typeof(*last), list);
+ offset = last->d_off;
+
+ list_for_each_entry_safe(entry, tmp, &entries.list, list)
+ {
+ if (this && this->cleanup_starting)
+ goto out;
+
+ list_del_init(&entry->list);
+ if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..")) {
+ gf_dirent_entry_free(entry);
+ continue;
+ }
+
+ if (entry->d_stat.ia_type == IA_IFDIR) {
+ ret = fn(subvol, entry, loc, data);
+ gf_dirent_entry_free(entry);
+ if (ret)
+ goto out;
+ continue;
+ }
+
+ if (retval) /*Any jobs failed?*/
+ goto out;
+
+ pthread_mutex_lock(&mut);
+ {
+ while (qlen == max_qlen)
+ pthread_cond_wait(&cond, &mut);
+ if (max_jobs == jobs_running) {
+ list_add_tail(&entry->list, &q.list);
+ qlen++;
+ entry = NULL;
+ } else {
+ jobs_running++;
+ }
+ }
+ pthread_mutex_unlock(&mut);
+
+ if (!entry)
+ continue;
+
+ ret = _run_dir_scan_task(frame, subvol, loc, &q, entry, &retval,
+ &mut, &cond, &jobs_running, &qlen, fn,
+ data);
+ if (ret)
+ goto out;
+ }
+ }
+
+out:
+ if (fd)
+ fd_unref(fd);
+
+ pthread_mutex_lock(&mut);
+ {
+ while (jobs_running)
+ pthread_cond_wait(&cond, &mut);
+ }
+ pthread_mutex_unlock(&mut);
+
+ gf_dirent_free(&q);
+ gf_dirent_free(&entries);
+
+ return ret | retval;
+}
+
+int
+syncop_dir_scan(xlator_t *subvol, loc_t *loc, int pid, void *data,
+ int (*fn)(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
+ void *data))
+{
+ fd_t *fd = NULL;
+ uint64_t offset = 0;
+ gf_dirent_t *entry = NULL;
+ int ret = 0;
+ gf_dirent_t entries;
+
+ ret = syncop_dirfd(subvol, loc, &fd, pid);
+ if (ret)
+ goto out;
+
+ INIT_LIST_HEAD(&entries.list);
+
+ while ((ret = syncop_readdir(subvol, fd, 131072, offset, &entries, NULL,
+ NULL))) {
+ if (ret < 0)
+ break;
+
+ if (ret > 0) {
+ /* If the entries are only '.', and '..' then ret
+ * value will be non-zero. so set it to zero here. */
+ ret = 0;
+ }
+
+ list_for_each_entry(entry, &entries.list, list)
+ {
+ offset = entry->d_off;
+
+ if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, ".."))
+ continue;
+
+ ret = fn(subvol, entry, loc, data);
+ if (ret)
+ break;
+ }
+ gf_dirent_free(&entries);
+ if (ret)
+ break;
+ }
+
+out:
+ if (fd)
+ fd_unref(fd);
+ return ret;
+}
+
+int
+syncop_is_subvol_local(xlator_t *this, loc_t *loc, gf_boolean_t *is_local)
+{
+ char *pathinfo = NULL;
+ dict_t *xattr = NULL;
+ int ret = 0;
+
+ if (!this || !this->type || !is_local)
+ return -EINVAL;
+
+ if (strcmp(this->type, "protocol/client") != 0)
+ return -EINVAL;
+
+ *is_local = _gf_false;
+
+ ret = syncop_getxattr(this, loc, &xattr, GF_XATTR_PATHINFO_KEY, NULL, NULL);
+ if (ret < 0) {
+ ret = -1;
+ goto out;
+ }
+
+ if (!xattr) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = dict_get_str(xattr, GF_XATTR_PATHINFO_KEY, &pathinfo);
+ if (ret)
+ goto out;
+
+ ret = glusterfs_is_local_pathinfo(pathinfo, is_local);
+
+ gf_msg_debug(this->name, 0, "subvol %s is %slocal", this->name,
+ *is_local ? "" : "not ");
+
+out:
+ if (xattr)
+ dict_unref(xattr);
+
+ return ret;
+}
+
+/**
+ * For hard resove, it it telling posix to make use of the
+ * gfid2path extended attribute stored on disk. Otherwise
+ * posix xlator (with GFID_TO_PATH_KEY as the key) will just
+ * do a in memory inode_path to get the path. Depending upon
+ * the consumer of this function, they can choose how they want
+ * to proceed. If doing a xattr operation sounds costly, then
+ * use GFID_TO_PATH_KEY as the key for getxattr.
+ **/
+
+int
+syncop_gfid_to_path_hard(inode_table_t *itable, xlator_t *subvol, uuid_t gfid,
+ inode_t *inode, char **path_p,
+ gf_boolean_t hard_resolve)
+{
+ int ret = 0;
+ char *path = NULL;
+ loc_t loc = {
+ 0,
+ };
+ dict_t *xattr = NULL;
+
+ gf_uuid_copy(loc.gfid, gfid);
+
+ if (!inode)
+ loc.inode = inode_new(itable);
+ else
+ loc.inode = inode_ref(inode);
+
+ if (!hard_resolve)
+ ret = syncop_getxattr(subvol, &loc, &xattr, GFID_TO_PATH_KEY, NULL,
+ NULL);
+ else
+ ret = syncop_getxattr(subvol, &loc, &xattr, GFID2PATH_VIRT_XATTR_KEY,
+ NULL, NULL);
+
+ if (ret < 0)
+ goto out;
+
+ /*
+ * posix will do dict_set_dynstr for GFID_TO_PATH_KEY i.e.
+ * for in memory search for the path. And for on disk xattr
+ * fetching of the path for the key GFID2PATH_VIRT_XATTR_KEY
+ * it uses dict_set_dynptr. So, for GFID2PATH_VIRT_XATTR_KEY
+ * use dict_get_ptr to avoid dict complaining about type
+ * mismatch (i.e. str vs ptr)
+ */
+ if (!hard_resolve)
+ ret = dict_get_str(xattr, GFID_TO_PATH_KEY, &path);
+ else
+ ret = dict_get_ptr(xattr, GFID2PATH_VIRT_XATTR_KEY, (void **)&path);
+
+ if (ret || !path) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (path_p) {
+ *path_p = gf_strdup(path);
+ if (!*path_p) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
+
+ ret = 0;
+
+out:
+ if (xattr)
+ dict_unref(xattr);
+ loc_wipe(&loc);
+
+ return ret;
+}
+
+int
+syncop_gfid_to_path(inode_table_t *itable, xlator_t *subvol, uuid_t gfid,
+ char **path_p)
+{
+ return syncop_gfid_to_path_hard(itable, subvol, gfid, NULL, path_p,
+ _gf_false);
+}
+
+int
+syncop_inode_find(xlator_t *this, xlator_t *subvol, uuid_t gfid,
+ inode_t **inode, dict_t *xdata, dict_t **rsp_dict)
+{
+ int ret = 0;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ *inode = NULL;
+
+ *inode = inode_find(this->itable, gfid);
+ if (*inode)
+ goto out;
+
+ loc.inode = inode_new(this->itable);
+ if (!loc.inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ gf_uuid_copy(loc.gfid, gfid);
+
+ ret = syncop_lookup(subvol, &loc, &iatt, NULL, xdata, rsp_dict);
+ if (ret < 0)
+ goto out;
+
+ *inode = inode_link(loc.inode, NULL, NULL, &iatt);
+ if (!*inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+out:
+ loc_wipe(&loc);
+ return ret;
+}
diff --git a/libglusterfs/src/syncop.c b/libglusterfs/src/syncop.c
index c84832dfbcf..df20cec559f 100644
--- a/libglusterfs/src/syncop.c
+++ b/libglusterfs/src/syncop.c
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
This file is licensed to you under your choice of the GNU Lesser
@@ -8,1369 +8,3565 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
+#include "glusterfs/syncop.h"
+#include "glusterfs/libglusterfs-messages.h"
+
+#ifdef HAVE_TSAN_API
+#include <sanitizer/tsan_interface.h>
#endif
-#include "syncop.h"
+int
+syncopctx_setfsuid(void *uid)
+{
+ struct syncopctx *opctx = NULL;
+ int ret = 0;
+
+ /* In args check */
+ if (!uid) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ opctx = syncopctx_getctx();
+
+ opctx->uid = *(uid_t *)uid;
+ opctx->valid |= SYNCOPCTX_UID;
+
+out:
+ return ret;
+}
+
+int
+syncopctx_setfsgid(void *gid)
+{
+ struct syncopctx *opctx = NULL;
+ int ret = 0;
+
+ /* In args check */
+ if (!gid) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ opctx = syncopctx_getctx();
+
+ opctx->gid = *(gid_t *)gid;
+ opctx->valid |= SYNCOPCTX_GID;
+
+out:
+ return ret;
+}
+
+int
+syncopctx_setfsgroups(int count, const void *groups)
+{
+ struct syncopctx *opctx = NULL;
+ gid_t *tmpgroups = NULL;
+ int ret = 0;
+
+ /* In args check */
+ if (count != 0 && !groups) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ opctx = syncopctx_getctx();
+
+ /* resize internal groups as required */
+ if (count && opctx->grpsize < count) {
+ if (opctx->groups) {
+ /* Group list will be updated later, so no need to keep current
+ * data and waste time copying it. It's better to free the current
+ * allocation and then allocate a fresh new memory block. */
+ GF_FREE(opctx->groups);
+ opctx->groups = NULL;
+ opctx->grpsize = 0;
+ }
+ tmpgroups = GF_MALLOC(count * sizeof(gid_t), gf_common_mt_syncopctx);
+ if (tmpgroups == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ opctx->groups = tmpgroups;
+ opctx->grpsize = count;
+ }
+
+ /* copy out the groups passed */
+ if (count)
+ memcpy(opctx->groups, groups, (sizeof(gid_t) * count));
+
+ /* set/reset the ngrps, this is where reset of groups is handled */
+ opctx->ngrps = count;
+
+ if ((opctx->valid & SYNCOPCTX_GROUPS) == 0) {
+ /* This is the first time we are storing groups into the TLS structure
+ * so we mark the current thread so that it will be properly cleaned
+ * up when the thread terminates. */
+ gf_thread_needs_cleanup();
+ }
+ opctx->valid |= SYNCOPCTX_GROUPS;
+
+out:
+ return ret;
+}
+
+int
+syncopctx_setfspid(void *pid)
+{
+ struct syncopctx *opctx = NULL;
+ int ret = 0;
+
+ /* In args check */
+ if (!pid) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ opctx = syncopctx_getctx();
+
+ opctx->pid = *(pid_t *)pid;
+ opctx->valid |= SYNCOPCTX_PID;
+
+out:
+ return ret;
+}
+
+int
+syncopctx_setfslkowner(gf_lkowner_t *lk_owner)
+{
+ struct syncopctx *opctx = NULL;
+ int ret = 0;
+
+ /* In args check */
+ if (!lk_owner) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ opctx = syncopctx_getctx();
+
+ opctx->lk_owner = *lk_owner;
+ opctx->valid |= SYNCOPCTX_LKOWNER;
+
+out:
+ return ret;
+}
+
+void *
+syncenv_processor(void *thdata);
static void
-__run (struct synctask *task)
+__run(struct synctask *task)
{
- struct syncenv *env = NULL;
+ struct syncenv *env = NULL;
+ int32_t total, ret, i;
- env = task->env;
+ env = task->env;
- list_del_init (&task->all_tasks);
- switch (task->state) {
- case SYNCTASK_INIT:
+ list_del_init(&task->all_tasks);
+ switch (task->state) {
+ case SYNCTASK_INIT:
case SYNCTASK_SUSPEND:
- break;
- case SYNCTASK_RUN:
- gf_log (task->xl->name, GF_LOG_WARNING,
- "re-running already running task");
- env->runcount--;
- break;
- case SYNCTASK_WAIT:
- env->waitcount--;
- break;
- case SYNCTASK_DONE:
- gf_log (task->xl->name, GF_LOG_WARNING,
- "running completed task");
- break;
- }
-
- list_add_tail (&task->all_tasks, &env->runq);
- env->runcount++;
- task->state = SYNCTASK_RUN;
+ break;
+ case SYNCTASK_RUN:
+ gf_msg_debug(task->xl->name, 0,
+ "re-running already running"
+ " task");
+ env->runcount--;
+ break;
+ case SYNCTASK_WAIT:
+ break;
+ case SYNCTASK_DONE:
+ gf_msg(task->xl->name, GF_LOG_WARNING, 0, LG_MSG_COMPLETED_TASK,
+ "running completed task");
+ return;
+ case SYNCTASK_ZOMBIE:
+ gf_msg(task->xl->name, GF_LOG_WARNING, 0, LG_MSG_WAKE_UP_ZOMBIE,
+ "attempted to wake up "
+ "zombie!!");
+ return;
+ }
+
+ list_add_tail(&task->all_tasks, &env->runq);
+ task->state = SYNCTASK_RUN;
+
+ env->runcount++;
+
+ total = env->procs + env->runcount - env->procs_idle;
+ if (total > env->procmax) {
+ total = env->procmax;
+ }
+ if (total > env->procs) {
+ for (i = 0; i < env->procmax; i++) {
+ if (env->proc[i].env == NULL) {
+ env->proc[i].env = env;
+ ret = gf_thread_create(&env->proc[i].processor, NULL,
+ syncenv_processor, &env->proc[i],
+ "sproc%d", i);
+ if ((ret < 0) || (++env->procs >= total)) {
+ break;
+ }
+ }
+ }
+ }
}
-
static void
-__wait (struct synctask *task)
+__wait(struct synctask *task)
{
- struct syncenv *env = NULL;
+ struct syncenv *env = NULL;
- env = task->env;
+ env = task->env;
- list_del_init (&task->all_tasks);
- switch (task->state) {
- case SYNCTASK_INIT:
+ list_del_init(&task->all_tasks);
+ switch (task->state) {
+ case SYNCTASK_INIT:
case SYNCTASK_SUSPEND:
- break;
- case SYNCTASK_RUN:
- env->runcount--;
- break;
- case SYNCTASK_WAIT:
- gf_log (task->xl->name, GF_LOG_WARNING,
- "re-waiting already waiting task");
- env->waitcount--;
- break;
- case SYNCTASK_DONE:
- gf_log (task->xl->name, GF_LOG_WARNING,
- "running completed task");
- break;
- }
-
- list_add_tail (&task->all_tasks, &env->waitq);
- env->waitcount++;
- task->state = SYNCTASK_WAIT;
+ break;
+ case SYNCTASK_RUN:
+ env->runcount--;
+ break;
+ case SYNCTASK_WAIT:
+ gf_msg(task->xl->name, GF_LOG_WARNING, 0, LG_MSG_REWAITING_TASK,
+ "re-waiting already waiting "
+ "task");
+ break;
+ case SYNCTASK_DONE:
+ gf_msg(task->xl->name, GF_LOG_WARNING, 0, LG_MSG_COMPLETED_TASK,
+ "running completed task");
+ return;
+ case SYNCTASK_ZOMBIE:
+ gf_msg(task->xl->name, GF_LOG_WARNING, 0, LG_MSG_SLEEP_ZOMBIE,
+ "attempted to sleep a zombie!!");
+ return;
+ }
+
+ list_add_tail(&task->all_tasks, &env->waitq);
+ task->state = SYNCTASK_WAIT;
}
+void
+synctask_yield(struct synctask *task, struct timespec *delta)
+{
+ xlator_t *oldTHIS = THIS;
+
+#if defined(__NetBSD__) && defined(_UC_TLSBASE)
+ /* Preserve pthread private pointer through swapcontex() */
+ task->proc->sched.uc_flags &= ~_UC_TLSBASE;
+#endif
+
+ task->delta = delta;
+
+ if (task->state != SYNCTASK_DONE) {
+ task->state = SYNCTASK_SUSPEND;
+ }
+
+#ifdef HAVE_TSAN_API
+ __tsan_switch_to_fiber(task->proc->tsan.fiber, 0);
+#endif
+
+ if (swapcontext(&task->ctx, &task->proc->sched) < 0) {
+ gf_msg("syncop", GF_LOG_ERROR, errno, LG_MSG_SWAPCONTEXT_FAILED,
+ "swapcontext failed");
+ }
+
+ THIS = oldTHIS;
+}
void
-synctask_yield (struct synctask *task)
+synctask_sleep(int32_t secs)
{
- if (swapcontext (&task->ctx, &task->proc->sched) < 0) {
- gf_log ("syncop", GF_LOG_ERROR,
- "swapcontext failed (%s)", strerror (errno));
- }
+ struct timespec delta;
+ struct synctask *task;
+
+ task = synctask_get();
+
+ if (task == NULL) {
+ sleep(secs);
+ } else {
+ delta.tv_sec = secs;
+ delta.tv_nsec = 0;
+
+ synctask_yield(task, &delta);
+ }
}
+static void
+__synctask_wake(struct synctask *task)
+{
+ task->woken = 1;
+
+ if (task->slept)
+ __run(task);
+
+ pthread_cond_broadcast(&task->env->cond);
+}
void
-synctask_wake (struct synctask *task)
+synctask_wake(struct synctask *task)
{
- struct syncenv *env = NULL;
+ struct syncenv *env = NULL;
- env = task->env;
+ env = task->env;
- pthread_mutex_lock (&env->mutex);
- {
- task->woken = 1;
+ pthread_mutex_lock(&env->mutex);
+ {
+ if (task->timer != NULL) {
+ if (gf_timer_call_cancel(task->xl->ctx, task->timer) != 0) {
+ goto unlock;
+ }
- if (task->slept)
- __run (task);
+ task->timer = NULL;
+ task->synccond = NULL;
}
- pthread_mutex_unlock (&env->mutex);
- pthread_cond_broadcast (&env->cond);
+ __synctask_wake(task);
+ }
+unlock:
+ pthread_mutex_unlock(&env->mutex);
}
void
-synctask_wrap (struct synctask *old_task)
+synctask_wrap(void)
{
- struct synctask *task = NULL;
+ struct synctask *task = NULL;
- /* Do not trust the pointer received. It may be
- wrong and can lead to crashes. */
+ /* Do not trust the pointer received. It may be
+ wrong and can lead to crashes. */
- task = synctask_get ();
- task->ret = task->syncfn (task->opaque);
- if (task->synccbk)
- task->synccbk (task->ret, task->frame, task->opaque);
+ task = synctask_get();
+ task->ret = task->syncfn(task->opaque);
+ if (task->synccbk)
+ task->synccbk(task->ret, task->frame, task->opaque);
- task->state = SYNCTASK_DONE;
+ task->state = SYNCTASK_DONE;
- synctask_yield (task);
+ synctask_yield(task, NULL);
}
-
void
-synctask_destroy (struct synctask *task)
+synctask_destroy(struct synctask *task)
{
- if (!task)
- return;
+ if (!task)
+ return;
- if (task->stack)
- FREE (task->stack);
+ GF_FREE(task->stack);
- if (task->opframe)
- STACK_DESTROY (task->opframe->root);
+ if (task->opframe)
+ STACK_DESTROY(task->opframe->root);
- pthread_mutex_destroy (&task->mutex);
+ if (task->synccbk == NULL) {
+ pthread_mutex_destroy(&task->mutex);
+ pthread_cond_destroy(&task->cond);
+ }
- pthread_cond_destroy (&task->cond);
+#ifdef HAVE_TSAN_API
+ __tsan_destroy_fiber(task->tsan.fiber);
+#endif
- FREE (task);
+ GF_FREE(task);
}
-
void
-synctask_done (struct synctask *task)
+synctask_done(struct synctask *task)
{
- if (task->synccbk) {
- synctask_destroy (task);
- return;
- }
+ if (task->synccbk) {
+ synctask_destroy(task);
+ return;
+ }
- pthread_mutex_lock (&task->mutex);
- {
- task->done = 1;
- pthread_cond_broadcast (&task->cond);
- }
- pthread_mutex_unlock (&task->mutex);
+ pthread_mutex_lock(&task->mutex);
+ {
+ task->state = SYNCTASK_ZOMBIE;
+ task->done = 1;
+ pthread_cond_broadcast(&task->cond);
+ }
+ pthread_mutex_unlock(&task->mutex);
}
-
int
-synctask_new (struct syncenv *env, synctask_fn_t fn, synctask_cbk_t cbk,
- call_frame_t *frame, void *opaque)
+synctask_setid(struct synctask *task, uid_t uid, gid_t gid)
{
- struct synctask *newtask = NULL;
- xlator_t *this = THIS;
- int ret = 0;
+ if (!task)
+ return -1;
- VALIDATE_OR_GOTO (env, err);
- VALIDATE_OR_GOTO (fn, err);
+ if (uid != -1)
+ task->uid = uid;
- newtask = CALLOC (1, sizeof (*newtask));
- if (!newtask)
- return -ENOMEM;
+ if (gid != -1)
+ task->gid = gid;
- newtask->frame = frame;
- if (!frame) {
- newtask->opframe = create_frame (this, this->ctx->pool);
- } else {
- newtask->opframe = copy_frame (frame);
- }
- if (!newtask->opframe)
- goto err;
- newtask->env = env;
- newtask->xl = this;
- newtask->syncfn = fn;
- newtask->synccbk = cbk;
- newtask->opaque = opaque;
-
- INIT_LIST_HEAD (&newtask->all_tasks);
-
- if (getcontext (&newtask->ctx) < 0) {
- gf_log ("syncop", GF_LOG_ERROR,
- "getcontext failed (%s)",
- strerror (errno));
- goto err;
- }
+ return 0;
+}
- newtask->stack = CALLOC (1, env->stacksize);
- if (!newtask->stack) {
- gf_log ("syncop", GF_LOG_ERROR,
- "out of memory for stack");
- goto err;
- }
+struct synctask *
+synctask_create(struct syncenv *env, size_t stacksize, synctask_fn_t fn,
+ synctask_cbk_t cbk, call_frame_t *frame, void *opaque)
+{
+ struct synctask *newtask = NULL;
+ xlator_t *this = THIS;
+ int destroymode = 0;
+
+ VALIDATE_OR_GOTO(env, err);
+ VALIDATE_OR_GOTO(fn, err);
+
+ /* Check if the syncenv is in destroymode i.e. destroy is SET.
+ * If YES, then don't allow any new synctasks on it. Return NULL.
+ */
+ pthread_mutex_lock(&env->mutex);
+ {
+ destroymode = env->destroy;
+ }
+ pthread_mutex_unlock(&env->mutex);
+
+ /* syncenv is in DESTROY mode, return from here */
+ if (destroymode)
+ return NULL;
- newtask->ctx.uc_stack.ss_sp = newtask->stack;
+ newtask = GF_CALLOC(1, sizeof(*newtask), gf_common_mt_synctask);
+ if (!newtask)
+ return NULL;
+
+ newtask->frame = frame;
+ if (!frame) {
+ newtask->opframe = create_frame(this, this->ctx->pool);
+ if (!newtask->opframe)
+ goto err;
+ set_lk_owner_from_ptr(&newtask->opframe->root->lk_owner,
+ newtask->opframe->root);
+ } else {
+ newtask->opframe = copy_frame(frame);
+ }
+ if (!newtask->opframe)
+ goto err;
+ newtask->env = env;
+ newtask->xl = this;
+ newtask->syncfn = fn;
+ newtask->synccbk = cbk;
+ newtask->opaque = opaque;
+
+ /* default to the uid/gid of the passed frame */
+ newtask->uid = newtask->opframe->root->uid;
+ newtask->gid = newtask->opframe->root->gid;
+
+ INIT_LIST_HEAD(&newtask->all_tasks);
+ INIT_LIST_HEAD(&newtask->waitq);
+
+ if (getcontext(&newtask->ctx) < 0) {
+ gf_msg("syncop", GF_LOG_ERROR, errno, LG_MSG_GETCONTEXT_FAILED,
+ "getcontext failed");
+ goto err;
+ }
+
+ if (stacksize <= 0) {
+ newtask->stack = GF_CALLOC(1, env->stacksize, gf_common_mt_syncstack);
newtask->ctx.uc_stack.ss_size = env->stacksize;
+ } else {
+ newtask->stack = GF_CALLOC(1, stacksize, gf_common_mt_syncstack);
+ newtask->ctx.uc_stack.ss_size = stacksize;
+ }
- makecontext (&newtask->ctx, (void *) synctask_wrap, 2, newtask);
+ if (!newtask->stack) {
+ goto err;
+ }
- newtask->state = SYNCTASK_INIT;
+ newtask->ctx.uc_stack.ss_sp = newtask->stack;
- newtask->slept = 1;
+ makecontext(&newtask->ctx, (void (*)(void))synctask_wrap, 0);
- if (!cbk) {
- pthread_mutex_init (&newtask->mutex, NULL);
- pthread_cond_init (&newtask->cond, NULL);
- newtask->done = 0;
- }
+#ifdef HAVE_TSAN_API
+ newtask->tsan.fiber = __tsan_create_fiber(0);
+ snprintf(newtask->tsan.name, TSAN_THREAD_NAMELEN, "<synctask of %s>",
+ this->name);
+ __tsan_set_fiber_name(newtask->tsan.fiber, newtask->tsan.name);
+#endif
- synctask_wake (newtask);
+ newtask->state = SYNCTASK_INIT;
- if (!cbk) {
- pthread_mutex_lock (&newtask->mutex);
- {
- while (!newtask->done) {
- pthread_cond_wait (&newtask->cond, &newtask->mutex);
- }
- }
- pthread_mutex_unlock (&newtask->mutex);
+ newtask->slept = 1;
- ret = newtask->ret;
+ if (!cbk) {
+ pthread_mutex_init(&newtask->mutex, NULL);
+ pthread_cond_init(&newtask->cond, NULL);
+ newtask->done = 0;
+ }
- synctask_destroy (newtask);
- }
+ synctask_wake(newtask);
- return ret;
+ return newtask;
err:
- if (newtask) {
- if (newtask->stack)
- FREE (newtask->stack);
- if (newtask->opframe)
- STACK_DESTROY (newtask->opframe->root);
- FREE (newtask);
- }
- return -1;
+ if (newtask) {
+ GF_FREE(newtask->stack);
+ if (newtask->opframe)
+ STACK_DESTROY(newtask->opframe->root);
+ GF_FREE(newtask);
+ }
+
+ return NULL;
}
+int
+synctask_join(struct synctask *task)
+{
+ int ret = 0;
-struct synctask *
-syncenv_task (struct syncproc *proc)
+ pthread_mutex_lock(&task->mutex);
+ {
+ while (!task->done)
+ pthread_cond_wait(&task->cond, &task->mutex);
+ }
+ pthread_mutex_unlock(&task->mutex);
+
+ ret = task->ret;
+
+ synctask_destroy(task);
+
+ return ret;
+}
+
+int
+synctask_new1(struct syncenv *env, size_t stacksize, synctask_fn_t fn,
+ synctask_cbk_t cbk, call_frame_t *frame, void *opaque)
{
- struct syncenv *env = NULL;
- struct synctask *task = NULL;
+ struct synctask *newtask = NULL;
+ int ret = 0;
- env = proc->env;
+ newtask = synctask_create(env, stacksize, fn, cbk, frame, opaque);
+ if (!newtask)
+ return -1;
- pthread_mutex_lock (&env->mutex);
- {
- while (list_empty (&env->runq))
- pthread_cond_wait (&env->cond, &env->mutex);
+ if (!cbk)
+ ret = synctask_join(newtask);
- task = list_entry (env->runq.next, struct synctask, all_tasks);
+ return ret;
+}
- list_del_init (&task->all_tasks);
- env->runcount--;
+int
+synctask_new(struct syncenv *env, synctask_fn_t fn, synctask_cbk_t cbk,
+ call_frame_t *frame, void *opaque)
+{
+ return synctask_new1(env, 0, fn, cbk, frame, opaque);
+}
- task->proc = proc;
+struct synctask *
+syncenv_task(struct syncproc *proc)
+{
+ struct syncenv *env = NULL;
+ struct synctask *task = NULL;
+ struct timespec sleep_till = {
+ 0,
+ };
+ int ret = 0;
+
+ env = proc->env;
+
+ pthread_mutex_lock(&env->mutex);
+ {
+ while (list_empty(&env->runq)) {
+ /* If either of the conditions are met then exit
+ * the current thread:
+ * 1. syncenv has to scale down(procs > procmin)
+ * 2. syncenv is in destroy mode and no tasks in
+ * either waitq or runq.
+ *
+ * At any point in time, a task can be either in runq,
+ * or in executing state or in the waitq. Once the
+ * destroy mode is set, no new synctask creates will
+ * be allowed, but whatever in waitq or runq should be
+ * allowed to finish before exiting any of the syncenv
+ * processor threads.
+ */
+ if (((ret == ETIMEDOUT) && (env->procs > env->procmin)) ||
+ (env->destroy && list_empty(&env->waitq))) {
+ task = NULL;
+ env->procs--;
+ memset(proc, 0, sizeof(*proc));
+ pthread_cond_broadcast(&env->cond);
+ goto unlock;
+ }
+
+ env->procs_idle++;
+
+ sleep_till.tv_sec = gf_time() + SYNCPROC_IDLE_TIME;
+ ret = pthread_cond_timedwait(&env->cond, &env->mutex, &sleep_till);
+
+ env->procs_idle--;
}
- pthread_mutex_unlock (&env->mutex);
- return task;
+ task = list_entry(env->runq.next, struct synctask, all_tasks);
+
+ list_del_init(&task->all_tasks);
+ env->runcount--;
+
+ task->woken = 0;
+ task->slept = 0;
+
+ task->proc = proc;
+ }
+unlock:
+ pthread_mutex_unlock(&env->mutex);
+
+ return task;
}
+static void
+synctask_timer(void *data)
+{
+ struct synctask *task = data;
+ struct synccond *cond;
+
+ cond = task->synccond;
+ if (cond != NULL) {
+ pthread_mutex_lock(&cond->pmutex);
+
+ list_del_init(&task->waitq);
+ task->synccond = NULL;
+
+ pthread_mutex_unlock(&cond->pmutex);
+
+ task->ret = -ETIMEDOUT;
+ }
+
+ pthread_mutex_lock(&task->env->mutex);
+
+ gf_timer_call_cancel(task->xl->ctx, task->timer);
+ task->timer = NULL;
+
+ __synctask_wake(task);
+
+ pthread_mutex_unlock(&task->env->mutex);
+}
void
-synctask_switchto (struct synctask *task)
+synctask_switchto(struct synctask *task)
{
- struct syncenv *env = NULL;
+ struct syncenv *env = NULL;
- env = task->env;
+ env = task->env;
- synctask_set (task);
- THIS = task->xl;
+ synctask_set(task);
+ THIS = task->xl;
- task->woken = 0;
- task->slept = 0;
+#if defined(__NetBSD__) && defined(_UC_TLSBASE)
+ /* Preserve pthread private pointer through swapcontex() */
+ task->ctx.uc_flags &= ~_UC_TLSBASE;
+#endif
- if (swapcontext (&task->proc->sched, &task->ctx) < 0) {
- gf_log ("syncop", GF_LOG_ERROR,
- "swapcontext failed (%s)", strerror (errno));
- }
+#ifdef HAVE_TSAN_API
+ __tsan_switch_to_fiber(task->tsan.fiber, 0);
+#endif
- if (task->state == SYNCTASK_DONE) {
- synctask_done (task);
- return;
- }
+ if (swapcontext(&task->proc->sched, &task->ctx) < 0) {
+ gf_msg("syncop", GF_LOG_ERROR, errno, LG_MSG_SWAPCONTEXT_FAILED,
+ "swapcontext failed");
+ }
- pthread_mutex_lock (&env->mutex);
- {
- if (task->woken) {
- __run (task);
- } else {
- task->slept = 1;
- __wait (task);
- }
+ if (task->state == SYNCTASK_DONE) {
+ synctask_done(task);
+ return;
+ }
+
+ pthread_mutex_lock(&env->mutex);
+ {
+ if (task->woken) {
+ __run(task);
+ } else {
+ task->slept = 1;
+ __wait(task);
+
+ if (task->delta != NULL) {
+ task->timer = gf_timer_call_after(task->xl->ctx, *task->delta,
+ synctask_timer, task);
+ }
}
- pthread_mutex_unlock (&env->mutex);
+
+ task->delta = NULL;
+ }
+ pthread_mutex_unlock(&env->mutex);
}
void *
-syncenv_processor (void *thdata)
+syncenv_processor(void *thdata)
{
- struct syncenv *env = NULL;
- struct syncproc *proc = NULL;
- struct synctask *task = NULL;
+ struct syncproc *proc = NULL;
+ struct synctask *task = NULL;
- proc = thdata;
- env = proc->env;
+ proc = thdata;
- for (;;) {
- task = syncenv_task (proc);
+#ifdef HAVE_TSAN_API
+ proc->tsan.fiber = __tsan_create_fiber(0);
+ snprintf(proc->tsan.name, TSAN_THREAD_NAMELEN, "<sched of syncenv@%p>",
+ proc);
+ __tsan_set_fiber_name(proc->tsan.fiber, proc->tsan.name);
+#endif
- synctask_switchto (task);
+ while ((task = syncenv_task(proc)) != NULL) {
+ synctask_switchto(task);
+ }
- syncenv_scale (env);
+#ifdef HAVE_TSAN_API
+ __tsan_destroy_fiber(proc->tsan.fiber);
+#endif
+
+ return NULL;
+}
+
+/* The syncenv threads are cleaned up in this routine.
+ */
+void
+syncenv_destroy(struct syncenv *env)
+{
+ if (env == NULL)
+ return;
+
+ /* SET the 'destroy' in syncenv structure to prohibit any
+ * further synctask(s) on this syncenv which is in destroy mode.
+ *
+ * If syncenv threads are in pthread cond wait with no tasks in
+ * their run or wait queue, then the threads are woken up by
+ * broadcasting the cond variable and if destroy field is set,
+ * the infinite loop in syncenv_processor is broken and the
+ * threads return.
+ *
+ * If syncenv threads have tasks in runq or waitq, the tasks are
+ * completed and only then the thread returns.
+ */
+ pthread_mutex_lock(&env->mutex);
+ {
+ env->destroy = 1;
+ /* This broadcast will wake threads in pthread_cond_wait
+ * in syncenv_task
+ */
+ pthread_cond_broadcast(&env->cond);
+
+ /* when the syncenv_task() thread is exiting, it broadcasts to
+ * wake the below wait.
+ */
+ while (env->procs != 0) {
+ pthread_cond_wait(&env->cond, &env->mutex);
}
+ }
+ pthread_mutex_unlock(&env->mutex);
+ pthread_mutex_destroy(&env->mutex);
+ pthread_cond_destroy(&env->cond);
+
+ GF_FREE(env);
+
+ return;
+}
+
+struct syncenv *
+syncenv_new(size_t stacksize, int procmin, int procmax)
+{
+ struct syncenv *newenv = NULL;
+ int ret = 0;
+ int i = 0;
+
+ if (!procmin || procmin < 0)
+ procmin = SYNCENV_PROC_MIN;
+ if (!procmax || procmax > SYNCENV_PROC_MAX)
+ procmax = SYNCENV_PROC_MAX;
+
+ if (procmin > procmax)
+ return NULL;
+
+ newenv = GF_CALLOC(1, sizeof(*newenv), gf_common_mt_syncenv);
+
+ if (!newenv)
return NULL;
+
+ pthread_mutex_init(&newenv->mutex, NULL);
+ pthread_cond_init(&newenv->cond, NULL);
+
+ INIT_LIST_HEAD(&newenv->runq);
+ INIT_LIST_HEAD(&newenv->waitq);
+
+ newenv->stacksize = SYNCENV_DEFAULT_STACKSIZE;
+ if (stacksize)
+ newenv->stacksize = stacksize;
+ newenv->procmin = procmin;
+ newenv->procmax = procmax;
+ newenv->procs_idle = 0;
+
+ for (i = 0; i < newenv->procmin; i++) {
+ newenv->proc[i].env = newenv;
+ ret = gf_thread_create(&newenv->proc[i].processor, NULL,
+ syncenv_processor, &newenv->proc[i], "sproc%d",
+ i);
+ if (ret)
+ break;
+ newenv->procs++;
+ }
+
+ if (ret != 0) {
+ syncenv_destroy(newenv);
+ newenv = NULL;
+ }
+
+ return newenv;
}
+int
+synclock_init(synclock_t *lock, lock_attr_t attr)
+{
+ if (!lock)
+ return -1;
-void
-syncenv_scale (struct syncenv *env)
-{
- int thmax = 0;
- int i = 0;
- int ret = 0;
-
- pthread_mutex_lock (&env->mutex);
- {
- if (env->procs > env->runcount)
- goto unlock;
-
- thmax = min (env->runcount, SYNCENV_PROC_MAX);
- for (i = env->procs; i < thmax; i++) {
- env->proc[i].env = env;
- ret = pthread_create (&env->proc[i].processor, NULL,
- syncenv_processor, &env->proc[i]);
- if (ret)
- break;
- env->procs++;
- }
- }
+ pthread_cond_init(&lock->cond, 0);
+ lock->type = LOCK_NULL;
+ lock->owner = NULL;
+ lock->owner_tid = 0;
+ lock->lock = 0;
+ lock->attr = attr;
+ INIT_LIST_HEAD(&lock->waitq);
+
+ return pthread_mutex_init(&lock->guard, 0);
+}
+
+int
+synclock_destroy(synclock_t *lock)
+{
+ if (!lock)
+ return -1;
+
+ pthread_cond_destroy(&lock->cond);
+ return pthread_mutex_destroy(&lock->guard);
+}
+
+static int
+__synclock_lock(struct synclock *lock)
+{
+ struct synctask *task = NULL;
+
+ if (!lock)
+ return -1;
+
+ task = synctask_get();
+
+ if (lock->lock && (lock->attr == SYNC_LOCK_RECURSIVE)) {
+ /*Recursive lock (if same owner requested for lock again then
+ *increment lock count and return success).
+ *Note:same number of unlocks required.
+ */
+ switch (lock->type) {
+ case LOCK_TASK:
+ if (task == lock->owner) {
+ lock->lock++;
+ gf_msg_trace("", 0,
+ "Recursive lock called by"
+ " sync task.owner= %p,lock=%d",
+ lock->owner, lock->lock);
+ return 0;
+ }
+ break;
+ case LOCK_THREAD:
+ if (pthread_equal(pthread_self(), lock->owner_tid)) {
+ lock->lock++;
+ gf_msg_trace("", 0,
+ "Recursive lock called by"
+ " thread ,owner=%u lock=%d",
+ (unsigned int)lock->owner_tid, lock->lock);
+ return 0;
+ }
+ break;
+ default:
+ gf_msg("", GF_LOG_CRITICAL, 0, LG_MSG_UNKNOWN_LOCK_TYPE,
+ "unknown lock type");
+ break;
+ }
+ }
+
+ while (lock->lock) {
+ if (task) {
+ /* called within a synctask */
+ task->woken = 0;
+ list_add_tail(&task->waitq, &lock->waitq);
+ pthread_mutex_unlock(&lock->guard);
+ synctask_yield(task, NULL);
+ /* task is removed from waitq in unlock,
+ * under lock->guard.*/
+ pthread_mutex_lock(&lock->guard);
+ } else {
+ /* called by a non-synctask */
+ pthread_cond_wait(&lock->cond, &lock->guard);
+ }
+ }
+
+ if (task) {
+ lock->type = LOCK_TASK;
+ lock->owner = task; /* for synctask*/
+
+ } else {
+ lock->type = LOCK_THREAD;
+ lock->owner_tid = pthread_self(); /* for non-synctask */
+ }
+ lock->lock = 1;
+
+ return 0;
+}
+
+int
+synclock_lock(synclock_t *lock)
+{
+ int ret = 0;
+
+ pthread_mutex_lock(&lock->guard);
+ {
+ ret = __synclock_lock(lock);
+ }
+ pthread_mutex_unlock(&lock->guard);
+
+ return ret;
+}
+
+int
+synclock_trylock(synclock_t *lock)
+{
+ int ret = 0;
+
+ errno = 0;
+
+ pthread_mutex_lock(&lock->guard);
+ {
+ if (lock->lock) {
+ errno = EBUSY;
+ ret = -1;
+ goto unlock;
+ }
+
+ ret = __synclock_lock(lock);
+ }
unlock:
- pthread_mutex_unlock (&env->mutex);
+ pthread_mutex_unlock(&lock->guard);
+
+ return ret;
}
+static int
+__synclock_unlock(synclock_t *lock)
+{
+ struct synctask *task = NULL;
+ struct synctask *curr = NULL;
+
+ if (!lock)
+ return -1;
-void
-syncenv_destroy (struct syncenv *env)
+ if (lock->lock == 0) {
+ gf_msg("", GF_LOG_CRITICAL, 0, LG_MSG_UNLOCK_BEFORE_LOCK,
+ "Unlock called before lock ");
+ return -1;
+ }
+ curr = synctask_get();
+ /*unlock should be called by lock owner
+ *i.e this will not allow the lock in nonsync task and unlock
+ * in sync task and vice-versa
+ */
+ switch (lock->type) {
+ case LOCK_TASK:
+ if (curr == lock->owner) {
+ lock->lock--;
+ gf_msg_trace("", 0,
+ "Unlock success %p, remaining"
+ " locks=%d",
+ lock->owner, lock->lock);
+ } else {
+ gf_msg("", GF_LOG_WARNING, 0, LG_MSG_LOCK_OWNER_ERROR,
+ "Unlock called by %p, but lock held by %p", curr,
+ lock->owner);
+ }
+
+ break;
+ case LOCK_THREAD:
+ if (pthread_equal(pthread_self(), lock->owner_tid)) {
+ lock->lock--;
+ gf_msg_trace("", 0,
+ "Unlock success %u, remaining "
+ "locks=%d",
+ (unsigned int)lock->owner_tid, lock->lock);
+ } else {
+ gf_msg("", GF_LOG_WARNING, 0, LG_MSG_LOCK_OWNER_ERROR,
+ "Unlock called by %u, but lock held by %u",
+ (unsigned int)pthread_self(),
+ (unsigned int)lock->owner_tid);
+ }
+
+ break;
+ default:
+ break;
+ }
+
+ if (lock->lock > 0) {
+ return 0;
+ }
+ lock->type = LOCK_NULL;
+ lock->owner = NULL;
+ lock->owner_tid = 0;
+ lock->lock = 0;
+ /* There could be both synctasks and non synctasks
+ waiting (or none, or either). As a mid-approach
+ between maintaining too many waiting counters
+ at one extreme and a thundering herd on unlock
+ at the other, call a cond_signal (which wakes
+ one waiter) and first synctask waiter. So at
+ most we have two threads waking up to grab the
+ just released lock.
+ */
+ pthread_cond_signal(&lock->cond);
+ if (!list_empty(&lock->waitq)) {
+ task = list_entry(lock->waitq.next, struct synctask, waitq);
+ list_del_init(&task->waitq);
+ synctask_wake(task);
+ }
+
+ return 0;
+}
+
+int
+synclock_unlock(synclock_t *lock)
{
+ int ret = 0;
+
+ pthread_mutex_lock(&lock->guard);
+ {
+ ret = __synclock_unlock(lock);
+ }
+ pthread_mutex_unlock(&lock->guard);
+ return ret;
}
+/* Condition variables */
-struct syncenv *
-syncenv_new (size_t stacksize)
+int32_t
+synccond_init(synccond_t *cond)
{
- struct syncenv *newenv = NULL;
- int ret = 0;
- int i = 0;
+ int32_t ret;
- newenv = CALLOC (1, sizeof (*newenv));
+ INIT_LIST_HEAD(&cond->waitq);
- if (!newenv)
- return NULL;
+ ret = pthread_mutex_init(&cond->pmutex, NULL);
+ if (ret != 0) {
+ return -ret;
+ }
- pthread_mutex_init (&newenv->mutex, NULL);
- pthread_cond_init (&newenv->cond, NULL);
+ ret = pthread_cond_init(&cond->pcond, NULL);
+ if (ret != 0) {
+ pthread_mutex_destroy(&cond->pmutex);
+ }
- INIT_LIST_HEAD (&newenv->runq);
- INIT_LIST_HEAD (&newenv->waitq);
+ return -ret;
+}
- newenv->stacksize = SYNCENV_DEFAULT_STACKSIZE;
- if (stacksize)
- newenv->stacksize = stacksize;
+void
+synccond_destroy(synccond_t *cond)
+{
+ pthread_cond_destroy(&cond->pcond);
+ pthread_mutex_destroy(&cond->pmutex);
+}
+
+int
+synccond_timedwait(synccond_t *cond, synclock_t *lock, struct timespec *delta)
+{
+ struct timespec now;
+ struct synctask *task = NULL;
+ int ret;
- for (i = 0; i < SYNCENV_PROC_MIN; i++) {
- newenv->proc[i].env = newenv;
- ret = pthread_create (&newenv->proc[i].processor, NULL,
- syncenv_processor, &newenv->proc[i]);
- if (ret)
- break;
- newenv->procs++;
+ task = synctask_get();
+
+ if (task == NULL) {
+ if (delta != NULL) {
+ timespec_now_realtime(&now);
+ timespec_adjust_delta(&now, *delta);
+ }
+
+ pthread_mutex_lock(&cond->pmutex);
+
+ if (delta == NULL) {
+ ret = -pthread_cond_wait(&cond->pcond, &cond->pmutex);
+ } else {
+ ret = -pthread_cond_timedwait(&cond->pcond, &cond->pmutex, &now);
}
+ } else {
+ pthread_mutex_lock(&cond->pmutex);
+
+ list_add_tail(&task->waitq, &cond->waitq);
+ task->synccond = cond;
- if (ret != 0)
- syncenv_destroy (newenv);
+ ret = synclock_unlock(lock);
+ if (ret == 0) {
+ pthread_mutex_unlock(&cond->pmutex);
- return newenv;
+ synctask_yield(task, delta);
+
+ ret = synclock_lock(lock);
+ if (ret == 0) {
+ ret = task->ret;
+ }
+ task->ret = 0;
+
+ return ret;
+ }
+
+ list_del_init(&task->waitq);
+ }
+
+ pthread_mutex_unlock(&cond->pmutex);
+
+ return ret;
}
+int
+synccond_wait(synccond_t *cond, synclock_t *lock)
+{
+ return synccond_timedwait(cond, lock, NULL);
+}
-/* FOPS */
+void
+synccond_signal(synccond_t *cond)
+{
+ struct synctask *task;
+
+ pthread_mutex_lock(&cond->pmutex);
+
+ if (!list_empty(&cond->waitq)) {
+ task = list_first_entry(&cond->waitq, struct synctask, waitq);
+ list_del_init(&task->waitq);
+
+ pthread_mutex_unlock(&cond->pmutex);
+
+ synctask_wake(task);
+ } else {
+ pthread_cond_signal(&cond->pcond);
+
+ pthread_mutex_unlock(&cond->pmutex);
+ }
+}
+
+void
+synccond_broadcast(synccond_t *cond)
+{
+ struct list_head list;
+ struct synctask *task;
+
+ INIT_LIST_HEAD(&list);
+
+ pthread_mutex_lock(&cond->pmutex);
+ list_splice_init(&cond->waitq, &list);
+ pthread_cond_broadcast(&cond->pcond);
+
+ pthread_mutex_unlock(&cond->pmutex);
+
+ while (!list_empty(&list)) {
+ task = list_first_entry(&list, struct synctask, waitq);
+ list_del_init(&task->waitq);
+
+ synctask_wake(task);
+ }
+}
+
+/* Barriers */
int
-syncop_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, inode_t *inode,
- struct iatt *iatt, dict_t *xdata, struct iatt *parent)
+syncbarrier_init(struct syncbarrier *barrier)
{
- struct syncargs *args = NULL;
+ int ret = 0;
+ if (!barrier) {
+ errno = EINVAL;
+ return -1;
+ }
- args = cookie;
+ ret = pthread_cond_init(&barrier->cond, 0);
+ if (ret) {
+ errno = ret;
+ return -1;
+ }
+ barrier->count = 0;
+ barrier->waitfor = 0;
+ INIT_LIST_HEAD(&barrier->waitq);
+
+ ret = pthread_mutex_init(&barrier->guard, 0);
+ if (ret) {
+ (void)pthread_cond_destroy(&barrier->cond);
+ errno = ret;
+ return -1;
+ }
+ barrier->initialized = _gf_true;
+ return 0;
+}
- args->op_ret = op_ret;
- args->op_errno = op_errno;
+int
+syncbarrier_destroy(struct syncbarrier *barrier)
+{
+ int ret = 0;
+ int ret1 = 0;
+ if (!barrier) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ if (barrier->initialized) {
+ ret = pthread_cond_destroy(&barrier->cond);
+ ret1 = pthread_mutex_destroy(&barrier->guard);
+ barrier->initialized = _gf_false;
+ }
+ if (ret || ret1) {
+ errno = ret ? ret : ret1;
+ return -1;
+ }
+ return 0;
+}
+
+static int
+__syncbarrier_wait(struct syncbarrier *barrier, int waitfor)
+{
+ struct synctask *task = NULL;
+
+ if (!barrier) {
+ errno = EINVAL;
+ return -1;
+ }
- if (op_ret == 0) {
- args->iatt1 = *iatt;
- args->iatt2 = *parent;
- if (xdata)
- args->xdata = dict_ref (xdata);
+ task = synctask_get();
+
+ while (barrier->count < waitfor) {
+ if (task) {
+ /* called within a synctask */
+ list_add_tail(&task->waitq, &barrier->waitq);
+ pthread_mutex_unlock(&barrier->guard);
+ synctask_yield(task, NULL);
+ pthread_mutex_lock(&barrier->guard);
+ } else {
+ /* called by a non-synctask */
+ pthread_cond_wait(&barrier->cond, &barrier->guard);
}
+ }
+
+ barrier->count = 0;
- __wake (args);
+ return 0;
+}
+int
+syncbarrier_wait(struct syncbarrier *barrier, int waitfor)
+{
+ int ret = 0;
+
+ pthread_mutex_lock(&barrier->guard);
+ {
+ ret = __syncbarrier_wait(barrier, waitfor);
+ }
+ pthread_mutex_unlock(&barrier->guard);
+
+ return ret;
+}
+
+static int
+__syncbarrier_wake(struct syncbarrier *barrier)
+{
+ struct synctask *task = NULL;
+
+ if (!barrier) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ barrier->count++;
+ if (barrier->waitfor && (barrier->count < barrier->waitfor))
return 0;
+
+ pthread_cond_signal(&barrier->cond);
+ if (!list_empty(&barrier->waitq)) {
+ task = list_entry(barrier->waitq.next, struct synctask, waitq);
+ list_del_init(&task->waitq);
+ synctask_wake(task);
+ }
+ barrier->waitfor = 0;
+
+ return 0;
}
+int
+syncbarrier_wake(struct syncbarrier *barrier)
+{
+ int ret = 0;
+
+ pthread_mutex_lock(&barrier->guard);
+ {
+ ret = __syncbarrier_wake(barrier);
+ }
+ pthread_mutex_unlock(&barrier->guard);
+
+ return ret;
+}
+
+/* FOPS */
int
-syncop_lookup (xlator_t *subvol, loc_t *loc, dict_t *xdata_req,
- struct iatt *iatt, dict_t **xdata_rsp, struct iatt *parent)
+syncop_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, inode_t *inode, struct iatt *iatt,
+ dict_t *xdata, struct iatt *parent)
{
- struct syncargs args = {0, };
+ struct syncargs *args = NULL;
- SYNCOP (subvol, (&args), syncop_lookup_cbk, subvol->fops->lookup,
- loc, xdata_req);
+ args = cookie;
- if (iatt)
- *iatt = args.iatt1;
- if (parent)
- *parent = args.iatt2;
- if (xdata_rsp)
- *xdata_rsp = args.xdata;
- else if (args.xdata)
- dict_unref (args.xdata);
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
- errno = args.op_errno;
- return args.op_ret;
+ if (op_ret == 0) {
+ args->iatt1 = *iatt;
+ args->iatt2 = *parent;
+ }
+
+ __wake(args);
+
+ return 0;
}
-static gf_dirent_t *
-entry_copy (gf_dirent_t *source)
+int
+syncop_lookup(xlator_t *subvol, loc_t *loc, struct iatt *iatt,
+ struct iatt *parent, dict_t *xdata_in, dict_t **xdata_out)
{
- gf_dirent_t *sink = NULL;
+ struct syncargs args = {
+ 0,
+ };
- sink = gf_dirent_for_name (source->d_name);
+ SYNCOP(subvol, (&args), syncop_lookup_cbk, subvol->fops->lookup, loc,
+ xdata_in);
- sink->d_off = source->d_off;
- sink->d_ino = source->d_ino;
- sink->d_type = source->d_type;
- sink->d_stat = source->d_stat;
+ if (iatt)
+ *iatt = args.iatt1;
+ if (parent)
+ *parent = args.iatt2;
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
- return sink;
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
}
int32_t
-syncop_readdirp_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- gf_dirent_t *entries, dict_t *xdata)
-{
- struct syncargs *args = NULL;
- gf_dirent_t *entry = NULL;
- gf_dirent_t *tmp = NULL;
-
- int count = 0;
-
- args = cookie;
-
- INIT_LIST_HEAD (&args->entries.list);
-
- args->op_ret = op_ret;
- args->op_errno = op_errno;
-
- if (op_ret >= 0) {
- list_for_each_entry (entry, &entries->list, list) {
- tmp = entry_copy (entry);
- gf_log (this->name, GF_LOG_TRACE,
- "adding entry=%s, count=%d",
- tmp->d_name, count);
- list_add_tail (&tmp->list, &(args->entries.list));
- count++;
- }
- }
+syncop_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+ gf_dirent_t *entry = NULL;
+ gf_dirent_t *tmp = NULL;
- __wake (args);
+ int count = 0;
- return 0;
+ args = cookie;
+
+ INIT_LIST_HEAD(&args->entries.list);
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ if (op_ret >= 0) {
+ list_for_each_entry(entry, &entries->list, list)
+ {
+ tmp = entry_copy(entry);
+ if (!tmp) {
+ args->op_ret = -1;
+ args->op_errno = ENOMEM;
+ gf_dirent_free(&(args->entries));
+ break;
+ }
+ gf_msg_trace(this->name, 0,
+ "adding entry=%s, "
+ "count=%d",
+ tmp->d_name, count);
+ list_add_tail(&tmp->list, &(args->entries.list));
+ count++;
+ }
+ }
+
+ __wake(args);
+ return 0;
}
int
-syncop_readdirp (xlator_t *subvol,
- fd_t *fd,
- size_t size,
- off_t off,
- dict_t *dict,
- gf_dirent_t *entries)
+syncop_readdirp(xlator_t *subvol, fd_t *fd, size_t size, off_t off,
+ gf_dirent_t *entries, dict_t *xdata_in, dict_t **xdata_out)
{
- struct syncargs args = {0, };
+ struct syncargs args = {
+ 0,
+ };
- SYNCOP (subvol, (&args), syncop_readdirp_cbk, subvol->fops->readdirp,
- fd, size, off, dict);
+ SYNCOP(subvol, (&args), syncop_readdirp_cbk, subvol->fops->readdirp, fd,
+ size, off, xdata_in);
- if (entries)
- list_splice_init (&args.entries.list, &entries->list);
- /* TODO: need to free all the 'args.entries' in 'else' case */
+ if (entries)
+ list_splice_init(&args.entries.list, &entries->list);
+ else
+ gf_dirent_free(&args.entries);
- errno = args.op_errno;
- return args.op_ret;
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
}
int32_t
-syncop_readdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- gf_dirent_t *entries, dict_t *xdata)
-{
- struct syncargs *args = NULL;
- gf_dirent_t *entry = NULL;
- gf_dirent_t *tmp = NULL;
-
- int count = 0;
-
- args = cookie;
-
- INIT_LIST_HEAD (&args->entries.list);
-
- args->op_ret = op_ret;
- args->op_errno = op_errno;
-
- if (op_ret >= 0) {
- list_for_each_entry (entry, &entries->list, list) {
- tmp = entry_copy (entry);
- gf_log (this->name, GF_LOG_TRACE,
- "adding entry=%s, count=%d",
- tmp->d_name, count);
- list_add_tail (&tmp->list, &(args->entries.list));
- count++;
- }
- }
+syncop_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+ gf_dirent_t *entry = NULL;
+ gf_dirent_t *tmp = NULL;
- __wake (args);
+ int count = 0;
- return 0;
+ args = cookie;
+
+ INIT_LIST_HEAD(&args->entries.list);
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ if (op_ret >= 0) {
+ list_for_each_entry(entry, &entries->list, list)
+ {
+ tmp = entry_copy(entry);
+ if (!tmp) {
+ args->op_ret = -1;
+ args->op_errno = ENOMEM;
+ gf_dirent_free(&(args->entries));
+ break;
+ }
+ gf_msg_trace(this->name, 0,
+ "adding "
+ "entry=%s, count=%d",
+ tmp->d_name, count);
+ list_add_tail(&tmp->list, &(args->entries.list));
+ count++;
+ }
+ }
+ __wake(args);
+
+ return 0;
}
int
-syncop_readdir (xlator_t *subvol,
- fd_t *fd,
- size_t size,
- off_t off,
- gf_dirent_t *entries)
+syncop_readdir(xlator_t *subvol, fd_t *fd, size_t size, off_t off,
+ gf_dirent_t *entries, dict_t *xdata_in, dict_t **xdata_out)
{
- struct syncargs args = {0, };
+ struct syncargs args = {
+ 0,
+ };
- SYNCOP (subvol, (&args), syncop_readdir_cbk, subvol->fops->readdir,
- fd, size, off, NULL);
+ SYNCOP(subvol, (&args), syncop_readdir_cbk, subvol->fops->readdir, fd, size,
+ off, xdata_in);
- if (entries)
- list_splice_init (&args.entries.list, &entries->list);
- /* TODO: need to free all the 'args.entries' in 'else' case */
+ if (entries)
+ list_splice_init(&args.entries.list, &entries->list);
+ else
+ gf_dirent_free(&args.entries);
- errno = args.op_errno;
- return args.op_ret;
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
}
int32_t
-syncop_opendir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd, dict_t *xdata)
+syncop_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- struct syncargs *args = NULL;
+ struct syncargs *args = NULL;
- args = cookie;
+ args = cookie;
- args->op_ret = op_ret;
- args->op_errno = op_errno;
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
- __wake (args);
+ __wake(args);
- return 0;
+ return 0;
}
int
-syncop_opendir (xlator_t *subvol,
- loc_t *loc,
- fd_t *fd)
+syncop_opendir(xlator_t *subvol, loc_t *loc, fd_t *fd, dict_t *xdata_in,
+ dict_t **xdata_out)
{
- struct syncargs args = {0, };
+ struct syncargs args = {
+ 0,
+ };
- SYNCOP (subvol, (&args), syncop_opendir_cbk, subvol->fops->opendir,
- loc, fd, NULL);
+ SYNCOP(subvol, (&args), syncop_opendir_cbk, subvol->fops->opendir, loc, fd,
+ xdata_in);
- errno = args.op_errno;
- return args.op_ret;
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
}
int
-syncop_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+syncop_fsyncdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ __wake(args);
+
+ return 0;
+}
+
+int
+syncop_fsyncdir(xlator_t *subvol, fd_t *fd, int datasync, dict_t *xdata_in,
+ dict_t **xdata_out)
+{
+ struct syncargs args = {
+ 0,
+ };
+
+ SYNCOP(subvol, (&args), syncop_fsyncdir_cbk, subvol->fops->fsyncdir, fd,
+ datasync, xdata_in);
+
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
+}
+
+int
+syncop_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ __wake(args);
+
+ return 0;
+}
+
+int
+syncop_removexattr(xlator_t *subvol, loc_t *loc, const char *name,
+ dict_t *xdata_in, dict_t **xdata_out)
+{
+ struct syncargs args = {
+ 0,
+ };
+
+ SYNCOP(subvol, (&args), syncop_removexattr_cbk, subvol->fops->removexattr,
+ loc, name, xdata_in);
+
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
+}
+
+int
+syncop_fremovexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, dict_t *xdata)
{
- struct syncargs *args = NULL;
+ struct syncargs *args = NULL;
- args = cookie;
+ args = cookie;
- args->op_ret = op_ret;
- args->op_errno = op_errno;
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
- __wake (args);
+ __wake(args);
- return 0;
+ return 0;
}
int
-syncop_removexattr (xlator_t *subvol, loc_t *loc, const char *name)
+syncop_fremovexattr(xlator_t *subvol, fd_t *fd, const char *name,
+ dict_t *xdata_in, dict_t **xdata_out)
{
- struct syncargs args = {0, };
+ struct syncargs args = {
+ 0,
+ };
- SYNCOP (subvol, (&args), syncop_removexattr_cbk, subvol->fops->removexattr,
- loc, name, NULL);
+ SYNCOP(subvol, (&args), syncop_fremovexattr_cbk, subvol->fops->fremovexattr,
+ fd, name, xdata_in);
- errno = args.op_errno;
- return args.op_ret;
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
}
int
-syncop_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xdata)
+syncop_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
{
- struct syncargs *args = NULL;
+ struct syncargs *args = NULL;
- args = cookie;
+ args = cookie;
- args->op_ret = op_ret;
- args->op_errno = op_errno;
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
- __wake (args);
+ __wake(args);
- return 0;
+ return 0;
}
int
-syncop_fremovexattr (xlator_t *subvol, fd_t *fd, const char *name)
+syncop_setxattr(xlator_t *subvol, loc_t *loc, dict_t *dict, int32_t flags,
+ dict_t *xdata_in, dict_t **xdata_out)
{
- struct syncargs args = {0, };
+ struct syncargs args = {
+ 0,
+ };
- SYNCOP (subvol, (&args), syncop_fremovexattr_cbk,
- subvol->fops->fremovexattr, fd, name, NULL);
+ SYNCOP(subvol, (&args), syncop_setxattr_cbk, subvol->fops->setxattr, loc,
+ dict, flags, xdata_in);
- errno = args.op_errno;
- return args.op_ret;
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
}
int
-syncop_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+syncop_fsetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, dict_t *xdata)
{
- struct syncargs *args = NULL;
+ struct syncargs *args = NULL;
- args = cookie;
+ args = cookie;
- args->op_ret = op_ret;
- args->op_errno = op_errno;
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
- __wake (args);
+ __wake(args);
- return 0;
+ return 0;
}
+int
+syncop_fsetxattr(xlator_t *subvol, fd_t *fd, dict_t *dict, int32_t flags,
+ dict_t *xdata_in, dict_t **xdata_out)
+{
+ struct syncargs args = {
+ 0,
+ };
+
+ SYNCOP(subvol, (&args), syncop_fsetxattr_cbk, subvol->fops->fsetxattr, fd,
+ dict, flags, xdata_in);
+
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
+}
int
-syncop_setxattr (xlator_t *subvol, loc_t *loc, dict_t *dict, int32_t flags)
+syncop_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *dict, dict_t *xdata)
{
- struct syncargs args = {0, };
+ struct syncargs *args = NULL;
- SYNCOP (subvol, (&args), syncop_setxattr_cbk, subvol->fops->setxattr,
- loc, dict, flags, NULL);
+ args = cookie;
- errno = args.op_errno;
- return args.op_ret;
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ if (op_ret >= 0)
+ args->xattr = dict_ref(dict);
+
+ __wake(args);
+
+ return 0;
}
int
-syncop_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xdata)
+syncop_listxattr(xlator_t *subvol, loc_t *loc, dict_t **dict, dict_t *xdata_in,
+ dict_t **xdata_out)
{
- struct syncargs *args = NULL;
+ struct syncargs args = {
+ 0,
+ };
- args = cookie;
+ SYNCOP(subvol, (&args), syncop_getxattr_cbk, subvol->fops->getxattr, loc,
+ NULL, xdata_in);
- args->op_ret = op_ret;
- args->op_errno = op_errno;
+ if (dict)
+ *dict = args.xattr;
+ else if (args.xattr)
+ dict_unref(args.xattr);
- __wake (args);
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
- return 0;
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
}
+int
+syncop_getxattr(xlator_t *subvol, loc_t *loc, dict_t **dict, const char *key,
+ dict_t *xdata_in, dict_t **xdata_out)
+{
+ struct syncargs args = {
+ 0,
+ };
+
+ SYNCOP(subvol, (&args), syncop_getxattr_cbk, subvol->fops->getxattr, loc,
+ key, xdata_in);
+
+ if (dict)
+ *dict = args.xattr;
+ else if (args.xattr)
+ dict_unref(args.xattr);
+
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
+}
int
-syncop_fsetxattr (xlator_t *subvol, fd_t *fd, dict_t *dict, int32_t flags)
+syncop_fgetxattr(xlator_t *subvol, fd_t *fd, dict_t **dict, const char *key,
+ dict_t *xdata_in, dict_t **xdata_out)
{
- struct syncargs args = {0, };
+ struct syncargs args = {
+ 0,
+ };
- SYNCOP (subvol, (&args), syncop_fsetxattr_cbk, subvol->fops->fsetxattr,
- fd, dict, flags, NULL);
+ SYNCOP(subvol, (&args), syncop_getxattr_cbk, subvol->fops->fgetxattr, fd,
+ key, xdata_in);
- errno = args.op_errno;
- return args.op_ret;
+ if (dict)
+ *dict = args.xattr;
+ else if (args.xattr)
+ dict_unref(args.xattr);
+
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
}
int
-syncop_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *dict, dict_t *xdata)
+syncop_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf,
+ dict_t *xdata)
+
{
- struct syncargs *args = NULL;
+ struct syncargs *args = NULL;
- args = cookie;
+ args = cookie;
- args->op_ret = op_ret;
- args->op_errno = op_errno;
- if (op_ret >= 0)
- args->xattr = dict_ref (dict);
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
- __wake (args);
+ if (op_ret == 0) {
+ args->statvfs_buf = *buf;
+ }
- return 0;
+ __wake(args);
+
+ return 0;
}
int
-syncop_listxattr (xlator_t *subvol, loc_t *loc, dict_t **dict)
+syncop_statfs(xlator_t *subvol, loc_t *loc, struct statvfs *buf,
+ dict_t *xdata_in, dict_t **xdata_out)
+
{
- struct syncargs args = {0, };
+ struct syncargs args = {
+ 0,
+ };
- SYNCOP (subvol, (&args), syncop_getxattr_cbk, subvol->fops->getxattr,
- loc, NULL, NULL);
+ SYNCOP(subvol, (&args), syncop_statfs_cbk, subvol->fops->statfs, loc,
+ xdata_in);
- if (dict)
- *dict = args.xattr;
- else if (args.xattr)
- dict_unref (args.xattr);
+ if (buf)
+ *buf = args.statvfs_buf;
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
- errno = args.op_errno;
- return args.op_ret;
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
}
int
-syncop_getxattr (xlator_t *subvol, loc_t *loc, dict_t **dict, const char *key)
+syncop_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *preop,
+ struct iatt *postop, dict_t *xdata)
{
- struct syncargs args = {0, };
+ struct syncargs *args = NULL;
- SYNCOP (subvol, (&args), syncop_getxattr_cbk, subvol->fops->getxattr,
- loc, key, NULL);
+ args = cookie;
- if (dict)
- *dict = args.xattr;
- else if (args.xattr)
- dict_unref (args.xattr);
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
- errno = args.op_errno;
- return args.op_ret;
+ if (op_ret == 0) {
+ args->iatt1 = *preop;
+ args->iatt2 = *postop;
+ }
+
+ __wake(args);
+
+ return 0;
}
int
-syncop_fgetxattr (xlator_t *subvol, fd_t *fd, dict_t **dict, const char *key)
+syncop_setattr(xlator_t *subvol, loc_t *loc, struct iatt *iatt, int valid,
+ struct iatt *preop, struct iatt *postop, dict_t *xdata_in,
+ dict_t **xdata_out)
{
- struct syncargs args = {0, };
+ struct syncargs args = {
+ 0,
+ };
- SYNCOP (subvol, (&args), syncop_getxattr_cbk, subvol->fops->fgetxattr,
- fd, key, NULL);
+ SYNCOP(subvol, (&args), syncop_setattr_cbk, subvol->fops->setattr, loc,
+ iatt, valid, xdata_in);
- if (dict)
- *dict = args.xattr;
- else if (args.xattr)
- dict_unref (args.xattr);
+ if (preop)
+ *preop = args.iatt1;
+ if (postop)
+ *postop = args.iatt2;
- errno = args.op_errno;
- return args.op_ret;
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
}
int
-syncop_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct statvfs *buf, dict_t *xdata)
+syncop_fsetattr(xlator_t *subvol, fd_t *fd, struct iatt *iatt, int valid,
+ struct iatt *preop, struct iatt *postop, dict_t *xdata_in,
+ dict_t **xdata_out)
+{
+ struct syncargs args = {
+ 0,
+ };
+
+ SYNCOP(subvol, (&args), syncop_setattr_cbk, subvol->fops->fsetattr, fd,
+ iatt, valid, xdata_in);
+ if (preop)
+ *preop = args.iatt1;
+ if (postop)
+ *postop = args.iatt2;
+
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
+}
+
+int32_t
+syncop_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- struct syncargs *args = NULL;
+ struct syncargs *args = NULL;
- args = cookie;
+ args = cookie;
- args->op_ret = op_ret;
- args->op_errno = op_errno;
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
- if (op_ret == 0) {
- args->statvfs_buf = *buf;
- }
+ __wake(args);
- __wake (args);
+ return 0;
+}
- return 0;
+int
+syncop_open(xlator_t *subvol, loc_t *loc, int32_t flags, fd_t *fd,
+ dict_t *xdata_in, dict_t **xdata_out)
+{
+ struct syncargs args = {
+ 0,
+ };
+
+ SYNCOP(subvol, (&args), syncop_open_cbk, subvol->fops->open, loc, flags, fd,
+ xdata_in);
+
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
+}
+
+int32_t
+syncop_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iovec *vector,
+ int32_t count, struct iatt *stbuf, struct iobref *iobref,
+ dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ INIT_LIST_HEAD(&args->entries.list);
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ if (args->op_ret >= 0) {
+ if (iobref)
+ args->iobref = iobref_ref(iobref);
+ args->vector = iov_dup(vector, count);
+ args->count = count;
+ args->iatt1 = *stbuf;
+ }
+
+ __wake(args);
+
+ return 0;
}
+int
+syncop_readv(xlator_t *subvol, fd_t *fd, size_t size, off_t off, uint32_t flags,
+ struct iovec **vector, int *count, struct iobref **iobref,
+ struct iatt *iatt, dict_t *xdata_in, dict_t **xdata_out)
+{
+ struct syncargs args = {
+ 0,
+ };
+
+ SYNCOP(subvol, (&args), syncop_readv_cbk, subvol->fops->readv, fd, size,
+ off, flags, xdata_in);
+
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+
+ if (iatt)
+ *iatt = args.iatt1;
+
+ if (args.op_ret < 0)
+ goto out;
+
+ if (vector)
+ *vector = args.vector;
+ else
+ GF_FREE(args.vector);
+
+ if (count)
+ *count = args.count;
+
+ /* Do we need a 'ref' here? */
+ if (iobref)
+ *iobref = args.iobref;
+ else if (args.iobref)
+ iobref_unref(args.iobref);
+
+out:
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
+}
int
-syncop_statfs (xlator_t *subvol, loc_t *loc, struct statvfs *buf)
+syncop_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ if (op_ret >= 0) {
+ args->iatt1 = *prebuf;
+ args->iatt2 = *postbuf;
+ }
+
+ __wake(args);
+ return 0;
+}
+
+int
+syncop_writev(xlator_t *subvol, fd_t *fd, const struct iovec *vector,
+ int32_t count, off_t offset, struct iobref *iobref,
+ uint32_t flags, struct iatt *preiatt, struct iatt *postiatt,
+ dict_t *xdata_in, dict_t **xdata_out)
{
- struct syncargs args = {0, };
+ struct syncargs args = {
+ 0,
+ };
- SYNCOP (subvol, (&args), syncop_statfs_cbk, subvol->fops->statfs,
- loc, NULL);
+ SYNCOP(subvol, (&args), syncop_writev_cbk, subvol->fops->writev, fd,
+ (struct iovec *)vector, count, offset, flags, iobref, xdata_in);
- if (buf)
- *buf = args.statvfs_buf;
+ if (preiatt)
+ *preiatt = args.iatt1;
+ if (postiatt)
+ *postiatt = args.iatt2;
- errno = args.op_errno;
- return args.op_ret;
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
}
int
-syncop_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- struct iatt *preop, struct iatt *postop, dict_t *xdata)
+syncop_write(xlator_t *subvol, fd_t *fd, const char *buf, int size,
+ off_t offset, struct iobref *iobref, uint32_t flags,
+ dict_t *xdata_in, dict_t **xdata_out)
{
- struct syncargs *args = NULL;
+ struct syncargs args = {
+ 0,
+ };
+ struct iovec vec = {
+ 0,
+ };
- args = cookie;
+ vec.iov_len = size;
+ vec.iov_base = (void *)buf;
- args->op_ret = op_ret;
- args->op_errno = op_errno;
+ SYNCOP(subvol, (&args), syncop_writev_cbk, subvol->fops->writev, fd, &vec,
+ 1, offset, flags, iobref, xdata_in);
- if (op_ret == 0) {
- args->iatt1 = *preop;
- args->iatt2 = *postop;
- }
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
- __wake (args);
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
+}
- return 0;
+int
+syncop_close(fd_t *fd)
+{
+ if (fd)
+ fd_unref(fd);
+ return 0;
}
+int32_t
+syncop_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ if (buf)
+ args->iatt1 = *buf;
+
+ __wake(args);
+
+ return 0;
+}
int
-syncop_setattr (xlator_t *subvol, loc_t *loc, struct iatt *iatt, int valid,
- struct iatt *preop, struct iatt *postop)
+syncop_create(xlator_t *subvol, loc_t *loc, int32_t flags, mode_t mode,
+ fd_t *fd, struct iatt *iatt, dict_t *xdata_in, dict_t **xdata_out)
{
- struct syncargs args = {0, };
+ struct syncargs args = {
+ 0,
+ };
- SYNCOP (subvol, (&args), syncop_setattr_cbk, subvol->fops->setattr,
- loc, iatt, valid, NULL);
+ SYNCOP(subvol, (&args), syncop_create_cbk, subvol->fops->create, loc, flags,
+ mode, 0, fd, xdata_in);
- if (preop)
- *preop = args.iatt1;
- if (postop)
- *postop = args.iatt2;
+ if (iatt)
+ *iatt = args.iatt1;
- errno = args.op_errno;
- return args.op_ret;
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
}
+int32_t
+syncop_put_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ if (buf)
+ args->iatt1 = *buf;
+
+ __wake(args);
+
+ return 0;
+}
int
-syncop_fsetattr (xlator_t *subvol, fd_t *fd, struct iatt *iatt, int valid,
- struct iatt *preop, struct iatt *postop)
+syncop_put(xlator_t *subvol, loc_t *loc, mode_t mode, mode_t umask,
+ uint32_t flags, struct iovec *vector, int32_t count, off_t offset,
+ struct iobref *iobref, dict_t *xattr, struct iatt *iatt,
+ dict_t *xdata_in, dict_t **xdata_out)
{
- struct syncargs args = {0, };
+ struct syncargs args = {
+ 0,
+ };
- SYNCOP (subvol, (&args), syncop_setattr_cbk, subvol->fops->fsetattr,
- fd, iatt, valid, NULL);
+ SYNCOP(subvol, (&args), syncop_put_cbk, subvol->fops->put, loc, mode, umask,
+ flags, (struct iovec *)vector, count, offset, iobref, xattr,
+ xdata_in);
- if (preop)
- *preop = args.iatt1;
- if (postop)
- *postop = args.iatt2;
+ if (iatt)
+ *iatt = args.iatt1;
- errno = args.op_errno;
- return args.op_ret;
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
}
+int
+syncop_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ struct syncargs *args = NULL;
-int32_t
-syncop_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ __wake(args);
+
+ return 0;
+}
+
+int
+syncop_unlink(xlator_t *subvol, loc_t *loc, dict_t *xdata_in,
+ dict_t **xdata_out)
{
- struct syncargs *args = NULL;
+ struct syncargs args = {
+ 0,
+ };
+
+ SYNCOP(subvol, (&args), syncop_unlink_cbk, subvol->fops->unlink, loc, 0,
+ xdata_in);
- args = cookie;
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
- args->op_ret = op_ret;
- args->op_errno = op_errno;
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
+}
- if (op_ret != -1)
- fd_ref (fd);
+int
+syncop_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ struct syncargs *args = NULL;
- __wake (args);
+ args = cookie;
- return 0;
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ __wake(args);
+
+ return 0;
}
int
-syncop_open (xlator_t *subvol, loc_t *loc, int32_t flags, fd_t *fd)
+syncop_rmdir(xlator_t *subvol, loc_t *loc, int flags, dict_t *xdata_in,
+ dict_t **xdata_out)
{
- struct syncargs args = {0, };
+ struct syncargs args = {
+ 0,
+ };
- SYNCOP (subvol, (&args), syncop_open_cbk, subvol->fops->open,
- loc, flags, fd, NULL);
+ SYNCOP(subvol, (&args), syncop_rmdir_cbk, subvol->fops->rmdir, loc, flags,
+ xdata_in);
- errno = args.op_errno;
- return args.op_ret;
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
}
+int
+syncop_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
-int32_t
-syncop_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct iatt *stbuf, struct iobref *iobref,
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ if (buf)
+ args->iatt1 = *buf;
+
+ __wake(args);
+
+ return 0;
+}
+
+int
+syncop_link(xlator_t *subvol, loc_t *oldloc, loc_t *newloc, struct iatt *iatt,
+ dict_t *xdata_in, dict_t **xdata_out)
+{
+ struct syncargs args = {
+ 0,
+ };
+
+ SYNCOP(subvol, (&args), syncop_link_cbk, subvol->fops->link, oldloc, newloc,
+ xdata_in);
+
+ if (iatt)
+ *iatt = args.iatt1;
+
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+
+ return args.op_ret;
+}
+
+int
+syncop_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
dict_t *xdata)
{
- struct syncargs *args = NULL;
+ struct syncargs *args = NULL;
- args = cookie;
+ args = cookie;
- INIT_LIST_HEAD (&args->entries.list);
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
- args->op_ret = op_ret;
- args->op_errno = op_errno;
+ __wake(args);
- if (args->op_ret >= 0) {
- if (iobref)
- args->iobref = iobref_ref (iobref);
- args->vector = iov_dup (vector, count);
- args->count = count;
- }
+ return 0;
+}
- __wake (args);
+int
+syncop_rename(xlator_t *subvol, loc_t *oldloc, loc_t *newloc, dict_t *xdata_in,
+ dict_t **xdata_out)
+{
+ struct syncargs args = {
+ 0,
+ };
- return 0;
+ SYNCOP(subvol, (&args), syncop_rename_cbk, subvol->fops->rename, oldloc,
+ newloc, xdata_in);
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+
+ return args.op_ret;
}
int
-syncop_readv (xlator_t *subvol, fd_t *fd, size_t size, off_t off,
- uint32_t flags, struct iovec **vector, int *count,
- struct iobref **iobref)
+syncop_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- struct syncargs args = {0, };
+ struct syncargs *args = NULL;
- SYNCOP (subvol, (&args), syncop_readv_cbk, subvol->fops->readv,
- fd, size, off, flags, NULL);
+ args = cookie;
- if (vector)
- *vector = args.vector;
- else if (args.vector)
- GF_FREE (args.vector);
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
- if (count)
- *count = args.count;
+ if (op_ret >= 0) {
+ args->iatt1 = *prebuf;
+ args->iatt2 = *postbuf;
+ }
- /* Do we need a 'ref' here? */
- if (iobref)
- *iobref = args.iobref;
- else if (args.iobref)
- iobref_unref (args.iobref);
+ __wake(args);
- errno = args.op_errno;
- return args.op_ret;
+ return 0;
+}
+
+int
+syncop_ftruncate(xlator_t *subvol, fd_t *fd, off_t offset, struct iatt *preiatt,
+ struct iatt *postiatt, dict_t *xdata_in, dict_t **xdata_out)
+{
+ struct syncargs args = {
+ 0,
+ };
+
+ SYNCOP(subvol, (&args), syncop_ftruncate_cbk, subvol->fops->ftruncate, fd,
+ offset, xdata_in);
+ if (preiatt)
+ *preiatt = args.iatt1;
+ if (postiatt)
+ *postiatt = args.iatt2;
+
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
}
int
-syncop_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+syncop_truncate(xlator_t *subvol, loc_t *loc, off_t offset, dict_t *xdata_in,
+ dict_t **xdata_out)
{
- struct syncargs *args = NULL;
+ struct syncargs args = {
+ 0,
+ };
- args = cookie;
+ SYNCOP(subvol, (&args), syncop_ftruncate_cbk, subvol->fops->truncate, loc,
+ offset, xdata_in);
- args->op_ret = op_ret;
- args->op_errno = op_errno;
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
- __wake (args);
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
+}
- return 0;
+int
+syncop_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ if (op_ret >= 0) {
+ args->iatt1 = *prebuf;
+ args->iatt2 = *postbuf;
+ }
+
+ __wake(args);
+
+ return 0;
}
int
-syncop_writev (xlator_t *subvol, fd_t *fd, struct iovec *vector,
- int32_t count, off_t offset, struct iobref *iobref,
- uint32_t flags)
+syncop_fsync(xlator_t *subvol, fd_t *fd, int dataonly, struct iatt *preiatt,
+ struct iatt *postiatt, dict_t *xdata_in, dict_t **xdata_out)
{
- struct syncargs args = {0, };
+ struct syncargs args = {
+ 0,
+ };
- SYNCOP (subvol, (&args), syncop_writev_cbk, subvol->fops->writev,
- fd, vector, count, offset, flags, iobref, NULL);
+ SYNCOP(subvol, (&args), syncop_fsync_cbk, subvol->fops->fsync, fd, dataonly,
+ xdata_in);
- errno = args.op_errno;
- return args.op_ret;
+ if (preiatt)
+ *preiatt = args.iatt1;
+ if (postiatt)
+ *postiatt = args.iatt2;
+
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
}
-int syncop_write (xlator_t *subvol, fd_t *fd, const char *buf, int size,
- off_t offset, struct iobref *iobref, uint32_t flags)
+int
+syncop_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- struct syncargs args = {0,};
- struct iovec vec = {0,};
+ struct syncargs *args = NULL;
- vec.iov_len = size;
- vec.iov_base = (void *)buf;
+ args = cookie;
- SYNCOP (subvol, (&args), syncop_writev_cbk, subvol->fops->writev,
- fd, &vec, 1, offset, flags, iobref, NULL);
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
- errno = args.op_errno;
- return args.op_ret;
+ __wake(args);
+
+ return 0;
}
+int
+syncop_flush(xlator_t *subvol, fd_t *fd, dict_t *xdata_in, dict_t **xdata_out)
+{
+ struct syncargs args = {0};
+
+ SYNCOP(subvol, (&args), syncop_flush_cbk, subvol->fops->flush, fd,
+ xdata_in);
+
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
+}
int
-syncop_close (fd_t *fd)
+syncop_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
+ dict_t *xdata)
{
- if (fd)
- fd_unref (fd);
- return 0;
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ if (op_ret == 0)
+ args->iatt1 = *stbuf;
+
+ __wake(args);
+
+ return 0;
+}
+
+int
+syncop_fstat(xlator_t *subvol, fd_t *fd, struct iatt *stbuf, dict_t *xdata_in,
+ dict_t **xdata_out)
+{
+ struct syncargs args = {
+ 0,
+ };
+
+ SYNCOP(subvol, (&args), syncop_fstat_cbk, subvol->fops->fstat, fd,
+ xdata_in);
+
+ if (stbuf)
+ *stbuf = args.iatt1;
+
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
+}
+
+int
+syncop_stat(xlator_t *subvol, loc_t *loc, struct iatt *stbuf, dict_t *xdata_in,
+ dict_t **xdata_out)
+{
+ struct syncargs args = {
+ 0,
+ };
+
+ SYNCOP(subvol, (&args), syncop_fstat_cbk, subvol->fops->stat, loc,
+ xdata_in);
+
+ if (stbuf)
+ *stbuf = args.iatt1;
+
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
}
int32_t
-syncop_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+syncop_symlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
struct iatt *postparent, dict_t *xdata)
{
- struct syncargs *args = NULL;
+ struct syncargs *args = NULL;
- args = cookie;
+ args = cookie;
- args->op_ret = op_ret;
- args->op_errno = op_errno;
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
- if (op_ret != -1)
- fd_ref (fd);
+ if (buf)
+ args->iatt1 = *buf;
- __wake (args);
+ __wake(args);
- return 0;
+ return 0;
}
int
-syncop_create (xlator_t *subvol, loc_t *loc, int32_t flags, mode_t mode,
- fd_t *fd, dict_t *xdata)
+syncop_symlink(xlator_t *subvol, loc_t *loc, const char *newpath,
+ struct iatt *iatt, dict_t *xdata_in, dict_t **xdata_out)
{
- struct syncargs args = {0, };
+ struct syncargs args = {
+ 0,
+ };
- SYNCOP (subvol, (&args), syncop_create_cbk, subvol->fops->create,
- loc, flags, mode, 0, fd, xdata);
+ SYNCOP(subvol, (&args), syncop_symlink_cbk, subvol->fops->symlink, newpath,
+ loc, 0, xdata_in);
- errno = args.op_errno;
- return args.op_ret;
+ if (iatt)
+ *iatt = args.iatt1;
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
}
int
-syncop_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+syncop_readlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, const char *path,
+ struct iatt *stbuf, dict_t *xdata)
{
- struct syncargs *args = NULL;
+ struct syncargs *args = NULL;
- args = cookie;
+ args = cookie;
- args->op_ret = op_ret;
- args->op_errno = op_errno;
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
- __wake (args);
+ if ((op_ret != -1) && path)
+ args->buffer = gf_strdup(path);
- return 0;
+ __wake(args);
+
+ return 0;
}
int
-syncop_unlink (xlator_t *subvol, loc_t *loc)
+syncop_readlink(xlator_t *subvol, loc_t *loc, char **buffer, size_t size,
+ dict_t *xdata_in, dict_t **xdata_out)
{
- struct syncargs args = {0, };
+ struct syncargs args = {
+ 0,
+ };
- SYNCOP (subvol, (&args), syncop_unlink_cbk, subvol->fops->unlink, loc,
- 0, NULL);
+ SYNCOP(subvol, (&args), syncop_readlink_cbk, subvol->fops->readlink, loc,
+ size, xdata_in);
- errno = args.op_errno;
- return args.op_ret;
+ if (buffer)
+ *buffer = args.buffer;
+ else
+ GF_FREE(args.buffer);
+
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
}
int
-syncop_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
+syncop_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
struct iatt *postparent, dict_t *xdata)
{
- struct syncargs *args = NULL;
+ struct syncargs *args = NULL;
- args = cookie;
+ args = cookie;
- args->op_ret = op_ret;
- args->op_errno = op_errno;
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
- __wake (args);
+ if (buf)
+ args->iatt1 = *buf;
- return 0;
+ __wake(args);
+
+ return 0;
}
+int
+syncop_mknod(xlator_t *subvol, loc_t *loc, mode_t mode, dev_t rdev,
+ struct iatt *iatt, dict_t *xdata_in, dict_t **xdata_out)
+{
+ struct syncargs args = {
+ 0,
+ };
+
+ SYNCOP(subvol, (&args), syncop_mknod_cbk, subvol->fops->mknod, loc, mode,
+ rdev, 0, xdata_in);
+
+ if (iatt)
+ *iatt = args.iatt1;
+
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
+}
int
-syncop_link (xlator_t *subvol, loc_t *oldloc, loc_t *newloc)
+syncop_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- struct syncargs args = {0, };
+ struct syncargs *args = NULL;
- SYNCOP (subvol, (&args), syncop_link_cbk, subvol->fops->link,
- oldloc, newloc, NULL);
+ args = cookie;
- errno = args.op_errno;
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
- return args.op_ret;
+ if (buf)
+ args->iatt1 = *buf;
+
+ __wake(args);
+
+ return 0;
}
int
-syncop_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+syncop_mkdir(xlator_t *subvol, loc_t *loc, mode_t mode, struct iatt *iatt,
+ dict_t *xdata_in, dict_t **xdata_out)
{
- struct syncargs *args = NULL;
+ struct syncargs args = {
+ 0,
+ };
- args = cookie;
+ SYNCOP(subvol, (&args), syncop_mkdir_cbk, subvol->fops->mkdir, loc, mode, 0,
+ xdata_in);
- args->op_ret = op_ret;
- args->op_errno = op_errno;
+ if (iatt)
+ *iatt = args.iatt1;
- __wake (args);
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
- return 0;
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
}
int
-syncop_ftruncate (xlator_t *subvol, fd_t *fd, off_t offset)
+syncop_access_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- struct syncargs args = {0, };
+ struct syncargs *args = NULL;
- SYNCOP (subvol, (&args), syncop_ftruncate_cbk, subvol->fops->ftruncate,
- fd, offset, NULL);
+ args = cookie;
- errno = args.op_errno;
- return args.op_ret;
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ __wake(args);
+
+ return 0;
}
+/* posix_acl xlator will respond in different ways for access calls from
+ fuse and access calls from nfs. For fuse, checking op_ret is sufficient
+ to check whether the access call is successful or not. But for nfs the
+ mode of the access that is permitted is put into op_errno before unwind.
+ With syncop, the caller of syncop_access will not be able to get the
+ mode of the access despite call being successul (since syncop_access
+ returns only the op_ret collected in args).
+ Now, if access call is failed, then args.op_ret is returned to recognise
+ the failure. But if op_ret is zero, then the mode of access which is
+ set in args.op_errno is returned. Thus the caller of syncop_access
+ has to check whether the return value is less than zero or not. If the
+ return value it got is less than zero, then access call is failed.
+ If it is not, then the access call is successful and the value the caller
+ got is the mode of the access.
+*/
int
-syncop_truncate (xlator_t *subvol, loc_t *loc, off_t offset)
+syncop_access(xlator_t *subvol, loc_t *loc, int32_t mask, dict_t *xdata_in,
+ dict_t **xdata_out)
{
- struct syncargs args = {0, };
+ struct syncargs args = {
+ 0,
+ };
- SYNCOP (subvol, (&args), syncop_ftruncate_cbk, subvol->fops->truncate,
- loc, offset, NULL);
+ SYNCOP(subvol, (&args), syncop_access_cbk, subvol->fops->access, loc, mask,
+ xdata_in);
- errno = args.op_errno;
- return args.op_ret;
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_errno;
}
int
-syncop_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+syncop_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- struct syncargs *args = NULL;
+ struct syncargs *args = NULL;
- args = cookie;
+ args = cookie;
- args->op_ret = op_ret;
- args->op_errno = op_errno;
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
- __wake (args);
+ __wake(args);
- return 0;
+ return 0;
+}
+
+int
+syncop_fallocate(xlator_t *subvol, fd_t *fd, int32_t keep_size, off_t offset,
+ size_t len, dict_t *xdata_in, dict_t **xdata_out)
+{
+ struct syncargs args = {
+ 0,
+ };
+
+ SYNCOP(subvol, (&args), syncop_fallocate_cbk, subvol->fops->fallocate, fd,
+ keep_size, offset, len, xdata_in);
+
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
}
int
-syncop_fsync (xlator_t *subvol, fd_t *fd)
+syncop_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- struct syncargs args = {0, };
+ struct syncargs *args = NULL;
- SYNCOP (subvol, (&args), syncop_fsync_cbk, subvol->fops->fsync,
- fd, 0, NULL);
+ args = cookie;
- errno = args.op_errno;
- return args.op_ret;
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ __wake(args);
+
+ return 0;
}
int
-syncop_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *stbuf, dict_t *xdata)
+syncop_discard(xlator_t *subvol, fd_t *fd, off_t offset, size_t len,
+ dict_t *xdata_in, dict_t **xdata_out)
{
- struct syncargs *args = NULL;
+ struct syncargs args = {
+ 0,
+ };
- args = cookie;
+ SYNCOP(subvol, (&args), syncop_discard_cbk, subvol->fops->discard, fd,
+ offset, len, xdata_in);
- args->op_ret = op_ret;
- args->op_errno = op_errno;
- if (op_ret == 0)
- args->iatt1 = *stbuf;
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
- __wake (args);
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
+}
- return 0;
+int
+syncop_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ __wake(args);
+
+ return 0;
}
int
-syncop_fstat (xlator_t *subvol, fd_t *fd, struct iatt *stbuf)
+syncop_zerofill(xlator_t *subvol, fd_t *fd, off_t offset, off_t len,
+ dict_t *xdata_in, dict_t **xdata_out)
{
- struct syncargs args = {0, };
+ struct syncargs args = {
+ 0,
+ };
- SYNCOP (subvol, (&args), syncop_fstat_cbk, subvol->fops->fstat,
- fd, NULL);
+ SYNCOP(subvol, (&args), syncop_zerofill_cbk, subvol->fops->zerofill, fd,
+ offset, len, xdata_in);
- if (stbuf)
- *stbuf = args.iatt1;
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
- errno = args.op_errno;
- return args.op_ret;
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
+}
+
+int
+syncop_ipc_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ __wake(args);
+
+ return 0;
+}
+
+int
+syncop_ipc(xlator_t *subvol, int32_t op, dict_t *xdata_in, dict_t **xdata_out)
+{
+ struct syncargs args = {
+ 0,
+ };
+
+ SYNCOP(subvol, (&args), syncop_ipc_cbk, subvol->fops->ipc, op, xdata_in);
+
+ if (args.xdata) {
+ if (xdata_out) {
+ /*
+ * We're passing this reference to the caller, along
+ * with the pointer itself. That means they're
+ * responsible for calling dict_unref at some point.
+ */
+ *xdata_out = args.xdata;
+ } else {
+ dict_unref(args.xdata);
+ }
+ }
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
}
int
-syncop_stat (xlator_t *subvol, loc_t *loc, struct iatt *stbuf)
+syncop_seek_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, off_t offset, dict_t *xdata)
{
- struct syncargs args = {0, };
+ struct syncargs *args = NULL;
+
+ args = cookie;
- SYNCOP (subvol, (&args), syncop_fstat_cbk, subvol->fops->stat,
- loc, NULL);
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ args->offset = offset;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
- if (stbuf)
- *stbuf = args.iatt1;
+ __wake(args);
- errno = args.op_errno;
+ return 0;
+}
+
+int
+syncop_seek(xlator_t *subvol, fd_t *fd, off_t offset, gf_seek_what_t what,
+ dict_t *xdata_in, off_t *off)
+{
+ struct syncargs args = {
+ 0,
+ };
+
+ SYNCOP(subvol, (&args), syncop_seek_cbk, subvol->fops->seek, fd, offset,
+ what, xdata_in);
+
+ if (args.op_ret < 0) {
+ return -args.op_errno;
+ } else {
+ if (off)
+ *off = args.offset;
return args.op_ret;
+ }
+}
+
+int
+syncop_lease_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct gf_lease *lease, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ if (lease)
+ args->lease = *lease;
+
+ __wake(args);
+
+ return 0;
+}
+
+int
+syncop_lease(xlator_t *subvol, loc_t *loc, struct gf_lease *lease,
+ dict_t *xdata_in, dict_t **xdata_out)
+{
+ struct syncargs args = {
+ 0,
+ };
+
+ SYNCOP(subvol, (&args), syncop_lease_cbk, subvol->fops->lease, loc, lease,
+ xdata_in);
+
+ *lease = args.lease;
+
+ if (args.xdata) {
+ if (xdata_out) {
+ /*
+ * We're passing this reference to the caller, along
+ * with the pointer itself. That means they're
+ * responsible for calling dict_unref at some point.
+ */
+ *xdata_out = args.xdata;
+ } else {
+ dict_unref(args.xdata);
+ }
+ }
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
+}
+
+int
+syncop_lk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct gf_flock *flock, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ if (flock)
+ args->flock = *flock;
+ __wake(args);
+
+ return 0;
+}
+
+int
+syncop_lk(xlator_t *subvol, fd_t *fd, int cmd, struct gf_flock *flock,
+ dict_t *xdata_in, dict_t **xdata_out)
+{
+ struct syncargs args = {
+ 0,
+ };
+
+ SYNCOP(subvol, (&args), syncop_lk_cbk, subvol->fops->lk, fd, cmd, flock,
+ xdata_in);
+
+ *flock = args.flock;
+
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
}
int32_t
-syncop_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+syncop_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- struct syncargs *args = NULL;
+ struct syncargs *args = NULL;
- args = cookie;
+ args = cookie;
- args->op_ret = op_ret;
- args->op_errno = op_errno;
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
- __wake (args);
+ __wake(args);
- return 0;
+ return 0;
}
int
-syncop_symlink (xlator_t *subvol, loc_t *loc, char *newpath, dict_t *dict)
+syncop_inodelk(xlator_t *subvol, const char *volume, loc_t *loc, int32_t cmd,
+ struct gf_flock *lock, dict_t *xdata_in, dict_t **xdata_out)
{
- struct syncargs args = {0, };
+ struct syncargs args = {
+ 0,
+ };
- SYNCOP (subvol, (&args), syncop_symlink_cbk, subvol->fops->symlink,
- newpath, loc, 0, dict);
+ SYNCOP(subvol, (&args), syncop_inodelk_cbk, subvol->fops->inodelk, volume,
+ loc, cmd, lock, xdata_in);
- errno = args.op_errno;
- return args.op_ret;
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+
+ return args.op_ret;
+}
+
+int32_t
+syncop_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ __wake(args);
+ return 0;
}
int
-syncop_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, const char *path,
- struct iatt *stbuf, dict_t *xdata)
+syncop_entrylk(xlator_t *subvol, const char *volume, loc_t *loc,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata_in, dict_t **xdata_out)
{
- struct syncargs *args = NULL;
+ struct syncargs args = {
+ 0,
+ };
- args = cookie;
+ SYNCOP(subvol, (&args), syncop_entrylk_cbk, subvol->fops->entrylk, volume,
+ loc, basename, cmd, type, xdata_in);
- args->op_ret = op_ret;
- args->op_errno = op_errno;
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
- if ((op_ret != -1) && path)
- args->buffer = gf_strdup (path);
+ if (args.op_ret < 0)
+ return -args.op_errno;
- __wake (args);
+ return args.op_ret;
+}
- return 0;
+int32_t
+syncop_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ if (dict)
+ args->dict_out = dict_ref(dict);
+
+ __wake(args);
+
+ return 0;
}
int
-syncop_readlink (xlator_t *subvol, loc_t *loc, char **buffer, size_t size)
+syncop_xattrop(xlator_t *subvol, loc_t *loc, gf_xattrop_flags_t flags,
+ dict_t *dict, dict_t *xdata_in, dict_t **dict_out,
+ dict_t **xdata_out)
{
- struct syncargs args = {0, };
+ struct syncargs args = {
+ 0,
+ };
- SYNCOP (subvol, (&args), syncop_readlink_cbk, subvol->fops->readlink,
- loc, size, NULL);
+ SYNCOP(subvol, (&args), syncop_xattrop_cbk, subvol->fops->xattrop, loc,
+ flags, dict, xdata_in);
- if (buffer)
- *buffer = args.buffer;
- else if (args.buffer)
- GF_FREE (args.buffer);
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
- errno = args.op_errno;
- return args.op_ret;
+ if (dict_out)
+ *dict_out = args.dict_out;
+ else if (args.dict_out)
+ dict_unref(args.dict_out);
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+
+ return args.op_ret;
}
int
-syncop_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+syncop_fxattrop(xlator_t *subvol, fd_t *fd, gf_xattrop_flags_t flags,
+ dict_t *dict, dict_t *xdata_in, dict_t **dict_out,
+ dict_t **xdata_out)
{
- struct syncargs *args = NULL;
+ struct syncargs args = {
+ 0,
+ };
- args = cookie;
+ SYNCOP(subvol, (&args), syncop_xattrop_cbk, subvol->fops->fxattrop, fd,
+ flags, dict, xdata_in);
- args->op_ret = op_ret;
- args->op_errno = op_errno;
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
- __wake (args);
+ if (dict_out)
+ *dict_out = args.dict_out;
+ else if (args.dict_out)
+ dict_unref(args.dict_out);
- return 0;
+ if (args.op_ret < 0)
+ return -args.op_errno;
+
+ return args.op_ret;
+}
+
+int32_t
+syncop_getactivelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ lock_migration_info_t *locklist, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+ lock_migration_info_t *tmp = NULL;
+ lock_migration_info_t *entry = NULL;
+
+ args = cookie;
+
+ INIT_LIST_HEAD(&args->locklist.list);
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ if (op_ret > 0) {
+ list_for_each_entry(tmp, &locklist->list, list)
+ {
+ entry = GF_CALLOC(1, sizeof(lock_migration_info_t),
+ gf_common_mt_char);
+
+ if (!entry) {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, 0,
+ "lock mem allocation failed");
+ gf_free_mig_locks(&args->locklist);
+
+ break;
+ }
+
+ INIT_LIST_HEAD(&entry->list);
+
+ entry->flock = tmp->flock;
+
+ entry->lk_flags = tmp->lk_flags;
+
+ entry->client_uid = gf_strdup(tmp->client_uid);
+
+ list_add_tail(&entry->list, &args->locklist.list);
+ }
+ }
+
+ __wake(args);
+
+ return 0;
}
int
-syncop_mknod (xlator_t *subvol, loc_t *loc, mode_t mode, dev_t rdev,
- dict_t *dict)
+syncop_getactivelk(xlator_t *subvol, loc_t *loc,
+ lock_migration_info_t *locklist, dict_t *xdata_in,
+ dict_t **xdata_out)
{
- struct syncargs args = {0, };
+ struct syncargs args = {
+ 0,
+ };
- SYNCOP (subvol, (&args), syncop_mknod_cbk, subvol->fops->mknod,
- loc, mode, rdev, 0, dict);
+ SYNCOP(subvol, (&args), syncop_getactivelk_cbk, subvol->fops->getactivelk,
+ loc, xdata_in);
- errno = args.op_errno;
- return args.op_ret;
+ if (locklist)
+ list_splice_init(&args.locklist.list, &locklist->list);
+ else
+ gf_free_mig_locks(&args.locklist);
+
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+
+ return args.op_ret;
+}
+
+int
+syncop_setactivelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ __wake(args);
+
+ return 0;
+}
+
+int
+syncop_setactivelk(xlator_t *subvol, loc_t *loc,
+ lock_migration_info_t *locklist, dict_t *xdata_in,
+ dict_t **xdata_out)
+{
+ struct syncargs args = {
+ 0,
+ };
+
+ SYNCOP(subvol, (&args), syncop_setactivelk_cbk, subvol->fops->setactivelk,
+ loc, locklist, xdata_in);
+
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+
+ return args.op_ret;
+}
+
+int
+syncop_icreate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ if (buf)
+ args->iatt1 = *buf;
+
+ __wake(args);
+
+ return 0;
+}
+
+int
+syncop_namelink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ __wake(args);
+
+ return 0;
+}
+
+int
+syncop_copy_file_range(xlator_t *subvol, fd_t *fd_in, off64_t off_in,
+ fd_t *fd_out, off64_t off_out, size_t len,
+ uint32_t flags, struct iatt *stbuf,
+ struct iatt *preiatt_dst, struct iatt *postiatt_dst,
+ dict_t *xdata_in, dict_t **xdata_out)
+{
+ struct syncargs args = {
+ 0,
+ };
+
+ SYNCOP(subvol, (&args), syncop_copy_file_range_cbk,
+ subvol->fops->copy_file_range, fd_in, off_in, fd_out, off_out, len,
+ flags, xdata_in);
+
+ if (stbuf) {
+ *stbuf = args.iatt1;
+ }
+ if (preiatt_dst) {
+ *preiatt_dst = args.iatt2;
+ }
+ if (postiatt_dst) {
+ *postiatt_dst = args.iatt3;
+ }
+
+ if (xdata_out) {
+ *xdata_out = args.xdata;
+ } else if (args.xdata) {
+ dict_unref(args.xdata);
+ }
+
+ errno = args.op_errno;
+ return args.op_ret;
+}
+
+int
+syncop_copy_file_range_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *stbuf,
+ struct iatt *prebuf_dst, struct iatt *postbuf_dst,
+ dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ if (op_ret >= 0) {
+ args->iatt1 = *stbuf;
+ args->iatt2 = *prebuf_dst;
+ args->iatt3 = *postbuf_dst;
+ }
+
+ __wake(args);
+ return 0;
}
diff --git a/libglusterfs/src/syncop.h b/libglusterfs/src/syncop.h
deleted file mode 100644
index 726e8c49a0d..00000000000
--- a/libglusterfs/src/syncop.h
+++ /dev/null
@@ -1,205 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _SYNCOP_H
-#define _SYNCOP_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "xlator.h"
-#include <sys/time.h>
-#include <pthread.h>
-#include <ucontext.h>
-
-#define SYNCENV_PROC_MAX 16
-#define SYNCENV_PROC_MIN 2
-
-struct synctask;
-struct syncproc;
-struct syncenv;
-
-
-typedef int (*synctask_cbk_t) (int ret, call_frame_t *frame, void *opaque);
-
-typedef int (*synctask_fn_t) (void *opaque);
-
-
-typedef enum {
- SYNCTASK_INIT = 0,
- SYNCTASK_RUN,
- SYNCTASK_SUSPEND,
- SYNCTASK_WAIT,
- SYNCTASK_DONE,
-} synctask_state_t;
-
-/* for one sequential execution of @syncfn */
-struct synctask {
- struct list_head all_tasks;
- struct syncenv *env;
- xlator_t *xl;
- call_frame_t *frame;
- call_frame_t *opframe;
- synctask_cbk_t synccbk;
- synctask_fn_t syncfn;
- synctask_state_t state;
- void *opaque;
- void *stack;
- int woken;
- int slept;
- int ret;
-
- ucontext_t ctx;
- struct syncproc *proc;
-
- pthread_mutex_t mutex; /* for synchronous spawning of synctask */
- pthread_cond_t cond;
- int done;
-};
-
-
-struct syncproc {
- pthread_t processor;
- ucontext_t sched;
- struct syncenv *env;
- struct synctask *current;
-};
-
-/* hosts the scheduler thread and framework for executing synctasks */
-struct syncenv {
- struct syncproc proc[SYNCENV_PROC_MAX];
- int procs;
-
- struct list_head runq;
- int runcount;
- struct list_head waitq;
- int waitcount;
-
- pthread_mutex_t mutex;
- pthread_cond_t cond;
-
- size_t stacksize;
-};
-
-
-struct syncargs {
- int op_ret;
- int op_errno;
- struct iatt iatt1;
- struct iatt iatt2;
- dict_t *xattr;
- gf_dirent_t entries;
- struct statvfs statvfs_buf;
- struct iovec *vector;
- int count;
- struct iobref *iobref;
- char *buffer;
- dict_t *xdata;
-
- /* some more _cbk needs */
- uuid_t uuid;
- char *errstr;
- dict_t *dict;
-
- /* do not touch */
- struct synctask *task;
-};
-
-#define __wake(args) synctask_wake(args->task)
-
-
-#define SYNCOP(subvol, stb, cbk, op, params ...) do { \
- struct synctask *task = NULL; \
- \
- task = synctask_get (); \
- stb->task = task; \
- \
- STACK_WIND_COOKIE (task->opframe, cbk, (void *)stb, \
- subvol, op, params); \
- task->state = SYNCTASK_SUSPEND; \
- synctask_yield (stb->task); \
- STACK_RESET (task->opframe->root); \
- } while (0)
-
-
-#define SYNCENV_DEFAULT_STACKSIZE (2 * 1024 * 1024)
-
-struct syncenv * syncenv_new ();
-void syncenv_destroy (struct syncenv *);
-void syncenv_scale (struct syncenv *env);
-
-int synctask_new (struct syncenv *, synctask_fn_t, synctask_cbk_t, call_frame_t* frame, void *);
-void synctask_wake (struct synctask *task);
-void synctask_yield (struct synctask *task);
-
-int syncop_lookup (xlator_t *subvol, loc_t *loc, dict_t *xattr_req,
- /* out */
- struct iatt *iatt, dict_t **xattr_rsp, struct iatt *parent);
-
-int syncop_readdirp (xlator_t *subvol, fd_t *fd, size_t size, off_t off,
- dict_t *dict,
- /* out */
- gf_dirent_t *entries);
-
-int syncop_readdir (xlator_t *subvol, fd_t *fd, size_t size, off_t off,
- gf_dirent_t *entries);
-
-int syncop_opendir (xlator_t *subvol, loc_t *loc, fd_t *fd);
-
-int syncop_setattr (xlator_t *subvol, loc_t *loc, struct iatt *iatt, int valid,
- /* out */
- struct iatt *preop, struct iatt *postop);
-
-int syncop_fsetattr (xlator_t *subvol, fd_t *fd, struct iatt *iatt, int valid,
- /* out */
- struct iatt *preop, struct iatt *postop);
-
-int syncop_statfs (xlator_t *subvol, loc_t *loc, struct statvfs *buf);
-
-int syncop_setxattr (xlator_t *subvol, loc_t *loc, dict_t *dict, int32_t flags);
-int syncop_fsetxattr (xlator_t *subvol, fd_t *fd, dict_t *dict, int32_t flags);
-int syncop_listxattr (xlator_t *subvol, loc_t *loc, dict_t **dict);
-int syncop_getxattr (xlator_t *xl, loc_t *loc, dict_t **dict, const char *key);
-int syncop_fgetxattr (xlator_t *xl, fd_t *fd, dict_t **dict, const char *key);
-int syncop_removexattr (xlator_t *subvol, loc_t *loc, const char *name);
-int syncop_fremovexattr (xlator_t *subvol, fd_t *fd, const char *name);
-
-int syncop_create (xlator_t *subvol, loc_t *loc, int32_t flags, mode_t mode,
- fd_t *fd, dict_t *dict);
-int syncop_open (xlator_t *subvol, loc_t *loc, int32_t flags, fd_t *fd);
-int syncop_close (fd_t *fd);
-
-int syncop_write (xlator_t *subvol, fd_t *fd, const char *buf, int size,
- off_t offset, struct iobref *iobref, uint32_t flags);
-int syncop_writev (xlator_t *subvol, fd_t *fd, struct iovec *vector,
- int32_t count, off_t offset, struct iobref *iobref,
- uint32_t flags);
-int syncop_readv (xlator_t *subvol, fd_t *fd, size_t size, off_t off,
- uint32_t flags,
- /* out */
- struct iovec **vector, int *count, struct iobref **iobref);
-
-int syncop_ftruncate (xlator_t *subvol, fd_t *fd, off_t offset);
-int syncop_truncate (xlator_t *subvol, loc_t *loc, off_t offset);
-
-int syncop_unlink (xlator_t *subvol, loc_t *loc);
-
-int syncop_fsync (xlator_t *subvol, fd_t *fd);
-int syncop_fstat (xlator_t *subvol, fd_t *fd, struct iatt *stbuf);
-int syncop_stat (xlator_t *subvol, loc_t *loc, struct iatt *stbuf);
-
-int syncop_symlink (xlator_t *subvol, loc_t *loc, char *newpath, dict_t *dict);
-int syncop_readlink (xlator_t *subvol, loc_t *loc, char **buffer, size_t size);
-int syncop_mknod (xlator_t *subvol, loc_t *loc, mode_t mode, dev_t rdev,
- dict_t *dict);
-int syncop_link (xlator_t *subvol, loc_t *oldloc, loc_t *newloc);
-#endif /* _SYNCOP_H */
diff --git a/libglusterfs/src/syscall.c b/libglusterfs/src/syscall.c
index bb834dbfd9f..04400f98b6c 100644
--- a/libglusterfs/src/syscall.c
+++ b/libglusterfs/src/syscall.c
@@ -8,453 +8,869 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "compat.h"
-#include "syscall.h"
+#include "glusterfs/compat.h"
+#include "glusterfs/syscall.h"
+#include "glusterfs/mem-pool.h"
+#include "glusterfs/libglusterfs-messages.h"
+#ifdef __FreeBSD__
+#include <sys/sysctl.h>
+#include <signal.h>
+#endif
#include <sys/types.h>
#include <utime.h>
#include <sys/time.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdarg.h>
+#ifdef HAVE_COPY_FILE_RANGE_SYS
+#include <sys/syscall.h>
+#endif
+
+#define FS_ERROR_LOG(result) \
+ do { \
+ gf_msg_callingfn("FS", GF_LOG_CRITICAL, EIO, \
+ LG_MSG_SYSCALL_RETURNS_WRONG, \
+ "returned %zd for the syscall", (ssize_t)result); \
+ } while (0)
+
+/*
+ * Input to these macros is generally a function call, so capture the result
+ * i.e. (_ret) in another variable and use that instead of using _ret again
+ */
+#define FS_RET_CHECK(_ret, err) \
+ ({ \
+ typeof(_ret) _result = (_ret); \
+ if (_result < -1) { \
+ FS_ERROR_LOG(_result); \
+ _result = -1; \
+ err = EIO; \
+ } \
+ _result; \
+ })
+
+#define FS_RET_CHECK0(_ret, err) \
+ ({ \
+ typeof(_ret) _result0 = (_ret); \
+ if (_result0 < -1 || _result0 > 0) { \
+ FS_ERROR_LOG(_result0); \
+ _result0 = -1; \
+ err = EIO; \
+ } \
+ _result0; \
+ })
+
+#define FS_RET_CHECK_ERRNO(_ret, err) \
+ ({ \
+ typeof(_ret) _result1 = (_ret); \
+ if (_result1 < 0) { \
+ FS_ERROR_LOG(_result1); \
+ _result1 = -1; \
+ err = EIO; \
+ } else if (_result1 > 0) { \
+ err = _result1; \
+ _result1 = -1; \
+ } \
+ _result1; \
+ })
int
-sys_lstat (const char *path, struct stat *buf)
+sys_lstat(const char *path, struct stat *buf)
{
- return lstat (path, buf);
+ return FS_RET_CHECK0(lstat(path, buf), errno);
}
+int
+sys_stat(const char *path, struct stat *buf)
+{
+ return FS_RET_CHECK0(stat(path, buf), errno);
+}
int
-sys_stat (const char *path, struct stat *buf)
+sys_fstat(int fd, struct stat *buf)
{
- return stat (path, buf);
+ return FS_RET_CHECK0(fstat(fd, buf), errno);
}
+int
+sys_fstatat(int dirfd, const char *pathname, struct stat *buf, int flags)
+{
+#ifdef GF_DARWIN_HOST_OS
+ if (fchdir(dirfd) < 0)
+ return -1;
+ if (flags & AT_SYMLINK_NOFOLLOW)
+ return FS_RET_CHECK0(lstat(pathname, buf), errno);
+ else
+ return FS_RET_CHECK0(stat(pathname, buf), errno);
+#else
+ return FS_RET_CHECK0(fstatat(dirfd, pathname, buf, flags), errno);
+#endif
+}
int
-sys_fstat (int fd, struct stat *buf)
+sys_openat(int dirfd, const char *pathname, int flags, int mode)
{
- return fstat (fd, buf);
+ int fd;
+
+#ifdef GF_DARWIN_HOST_OS
+ if (fchdir(dirfd) < 0)
+ return -1;
+ fd = open(pathname, flags, mode);
+ /* TODO: Shouldn't we restore the old current directory */
+#else /* GF_DARWIN_HOST_OS */
+ fd = openat(dirfd, pathname, flags, mode);
+#ifdef __FreeBSD__
+ /* On FreeBSD S_ISVTX flag is ignored for an open() with O_CREAT set.
+ * We need to force the flag using fchmod(). */
+ if ((fd >= 0) && ((flags & O_CREAT) != 0) && ((mode & S_ISVTX) != 0)) {
+ sys_fchmod(fd, mode);
+ /* TODO: It's unlikely that fchmod could fail here. However,
+ if it fails we cannot always restore the old state
+ (if the file existed, we cannot recover it). We would
+ need many more system calls to correctly handle all
+ possible cases and it doesn't worth it. For now we
+ simply ignore the error. */
+ }
+#endif /* __FreeBSD__ */
+#endif /* !GF_DARWIN_HOST_OS */
+
+ return FS_RET_CHECK(fd, errno);
}
+int
+sys_open(const char *pathname, int flags, int mode)
+{
+ return FS_RET_CHECK(sys_openat(AT_FDCWD, pathname, flags, mode), errno);
+}
DIR *
-sys_opendir (const char *name)
+sys_opendir(const char *name)
{
- return opendir (name);
+ return opendir(name);
}
-
-struct dirent *
-sys_readdir (DIR *dir)
+int
+sys_mkdirat(int dirfd, const char *pathname, mode_t mode)
{
- return readdir (dir);
+#ifdef GF_DARWIN_HOST_OS
+ if (fchdir(dirfd) < 0)
+ return -1;
+ return FS_RET_CHECK0(mkdir(pathname, mode), errno);
+#else
+ return FS_RET_CHECK0(mkdirat(dirfd, pathname, mode), errno);
+#endif
}
+struct dirent *
+sys_readdir(DIR *dir, struct dirent *de)
+{
+#if !defined(__GLIBC__)
+ /*
+ * World+Dog says glibc's readdir(3) is MT-SAFE as long as
+ * two threads are not accessing the same DIR; there's a
+ * potential buffer overflow in glibc's readdir_r(3); and
+ * glibc's readdir_r(3) is deprecated after version 2.22
+ * with presumed eventual removal.
+ * Given all that, World+Dog says everyone should just use
+ * readdir(3). But it's unknown, unclear whether the same
+ * is also true for *BSD, MacOS, and, etc.
+ */
+ struct dirent *entry = NULL;
+
+ (void)readdir_r(dir, de, &entry);
+ return entry;
+#else
+ return readdir(dir);
+#endif
+}
ssize_t
-sys_readlink (const char *path, char *buf, size_t bufsiz)
+sys_readlink(const char *path, char *buf, size_t bufsiz)
{
- return readlink (path, buf, bufsiz);
+ return FS_RET_CHECK(readlink(path, buf, bufsiz), errno);
}
-
int
-sys_closedir (DIR *dir)
+sys_closedir(DIR *dir)
{
- return closedir (dir);
+ return FS_RET_CHECK0(closedir(dir), errno);
}
-
int
-sys_mknod (const char *pathname, mode_t mode, dev_t dev)
+sys_mknod(const char *pathname, mode_t mode, dev_t dev)
{
- return mknod (pathname, mode, dev);
+ return FS_RET_CHECK0(mknod(pathname, mode, dev), errno);
}
-
int
-sys_mkdir (const char *pathname, mode_t mode)
+sys_mkdir(const char *pathname, mode_t mode)
{
- return mkdir (pathname, mode);
+ return FS_RET_CHECK0(mkdir(pathname, mode), errno);
}
-
int
-sys_unlink (const char *pathname)
+sys_unlink(const char *pathname)
{
#ifdef GF_SOLARIS_HOST_OS
- return solaris_unlink (pathname);
+ return FS_RET_CHECK0(solaris_unlink(pathname), errno);
#endif
- return unlink (pathname);
+ return FS_RET_CHECK0(unlink(pathname), errno);
}
-
int
-sys_rmdir (const char *pathname)
+sys_unlinkat(int dfd, const char *pathname)
{
- return rmdir (pathname);
+#ifdef GF_SOLARIS_HOST_OS
+ return FS_RET_CHECK0(solaris_unlinkat(dfd, pathname, 0), errno);
+#endif
+ return FS_RET_CHECK0(unlinkat(dfd, pathname, 0), errno);
}
+int
+sys_rmdir(const char *pathname)
+{
+ return FS_RET_CHECK0(rmdir(pathname), errno);
+}
int
-sys_symlink (const char *oldpath, const char *newpath)
+sys_symlink(const char *oldpath, const char *newpath)
{
- return symlink (oldpath, newpath);
+ return FS_RET_CHECK0(symlink(oldpath, newpath), errno);
}
+int
+sys_symlinkat(const char *oldpath, int dirfd, const char *newpath)
+{
+ return FS_RET_CHECK0(symlinkat(oldpath, dirfd, newpath), errno);
+}
int
-sys_rename (const char *oldpath, const char *newpath)
+sys_rename(const char *oldpath, const char *newpath)
{
#ifdef GF_SOLARIS_HOST_OS
- return solaris_rename (oldpath, newpath);
+ return FS_RET_CHECK0(solaris_rename(oldpath, newpath), errno);
#endif
- return rename (oldpath, newpath);
+ return FS_RET_CHECK0(rename(oldpath, newpath), errno);
}
+int
+sys_link(const char *oldpath, const char *newpath)
+{
+#ifdef HAVE_LINKAT
+ /*
+ * On most systems (Linux being the notable exception), link(2)
+ * first resolves symlinks. If the target is a directory or
+ * is nonexistent, it will fail. linkat(2) operates on the
+ * symlink instead of its target when the AT_SYMLINK_FOLLOW
+ * flag is not supplied.
+ */
+ return FS_RET_CHECK0(linkat(AT_FDCWD, oldpath, AT_FDCWD, newpath, 0),
+ errno);
+#else
+ return FS_RET_CHECK0(link(oldpath, newpath), errno);
+#endif
+}
int
-sys_link (const char *oldpath, const char *newpath)
+sys_linkat(int oldfd, const char *oldpath, int newfd, const char *newpath)
{
- return link (oldpath, newpath);
+ return FS_RET_CHECK0(linkat(oldfd, oldpath, newfd, newpath, 0), errno);
}
-
int
-sys_chmod (const char *path, mode_t mode)
+sys_chmod(const char *path, mode_t mode)
{
- return chmod (path, mode);
+ return FS_RET_CHECK0(chmod(path, mode), errno);
}
-
int
-sys_fchmod (int fd, mode_t mode)
+sys_fchmod(int fd, mode_t mode)
{
- return fchmod (fd, mode);
+ return FS_RET_CHECK0(fchmod(fd, mode), errno);
}
-
int
-sys_chown (const char *path, uid_t owner, gid_t group)
+sys_chown(const char *path, uid_t owner, gid_t group)
{
- return chown (path, owner, group);
+ return FS_RET_CHECK0(chown(path, owner, group), errno);
}
-
int
-sys_fchown (int fd, uid_t owner, gid_t group)
+sys_fchown(int fd, uid_t owner, gid_t group)
{
- return fchown (fd, owner, group);
+ return FS_RET_CHECK0(fchown(fd, owner, group), errno);
}
-
int
-sys_lchown (const char *path, uid_t owner, gid_t group)
+sys_lchown(const char *path, uid_t owner, gid_t group)
{
- return lchown (path, owner, group);
+ return FS_RET_CHECK0(lchown(path, owner, group), errno);
}
-
int
-sys_truncate (const char *path, off_t length)
+sys_truncate(const char *path, off_t length)
{
- return truncate (path, length);
+ return FS_RET_CHECK0(truncate(path, length), errno);
}
-
int
-sys_ftruncate (int fd, off_t length)
+sys_ftruncate(int fd, off_t length)
{
- return ftruncate (fd, length);
+ return FS_RET_CHECK0(ftruncate(fd, length), errno);
}
-
int
-sys_utimes (const char *filename, const struct timeval times[2])
+sys_utimes(const char *filename, const struct timeval times[2])
{
- return utimes (filename, times);
+ return FS_RET_CHECK0(utimes(filename, times), errno);
}
+#if defined(HAVE_UTIMENSAT)
+int
+sys_utimensat(int dirfd, const char *filename, const struct timespec times[2],
+ int flags)
+{
+ return FS_RET_CHECK0(utimensat(dirfd, filename, times, flags), errno);
+}
+#endif
int
-sys_creat (const char *pathname, mode_t mode)
+sys_futimes(int fd, const struct timeval times[2])
{
- return creat (pathname, mode);
+ return futimes(fd, times);
}
+int
+sys_creat(const char *pathname, mode_t mode)
+{
+ return FS_RET_CHECK(sys_open(pathname, O_CREAT | O_TRUNC | O_WRONLY, mode),
+ errno);
+}
ssize_t
-sys_readv (int fd, const struct iovec *iov, int iovcnt)
+sys_readv(int fd, const struct iovec *iov, int iovcnt)
{
- return readv (fd, iov, iovcnt);
+ return FS_RET_CHECK(readv(fd, iov, iovcnt), errno);
}
+ssize_t
+sys_writev(int fd, const struct iovec *iov, int iovcnt)
+{
+ return FS_RET_CHECK(writev(fd, iov, iovcnt), errno);
+}
ssize_t
-sys_writev (int fd, const struct iovec *iov, int iovcnt)
+sys_read(int fd, void *buf, size_t count)
{
- return writev (fd, iov, iovcnt);
+ return FS_RET_CHECK(read(fd, buf, count), errno);
}
+ssize_t
+sys_write(int fd, const void *buf, size_t count)
+{
+ return FS_RET_CHECK(write(fd, buf, count), errno);
+}
ssize_t
-sys_read (int fd, void *buf, size_t count)
+sys_preadv(int fd, const struct iovec *iov, int iovcnt, off_t offset)
{
- return read (fd, buf, count);
+ return FS_RET_CHECK(preadv(fd, iov, iovcnt, offset), errno);
}
+ssize_t
+sys_pwritev(int fd, const struct iovec *iov, int iovcnt, off_t offset)
+{
+ return FS_RET_CHECK(pwritev(fd, iov, iovcnt, offset), errno);
+}
ssize_t
-sys_write (int fd, const void *buf, size_t count)
+sys_pread(int fd, void *buf, size_t count, off_t offset)
{
- return write (fd, buf, count);
+ return FS_RET_CHECK(pread(fd, buf, count, offset), errno);
}
+ssize_t
+sys_pwrite(int fd, const void *buf, size_t count, off_t offset)
+{
+ return FS_RET_CHECK(pwrite(fd, buf, count, offset), errno);
+}
off_t
-sys_lseek (int fd, off_t offset, int whence)
+sys_lseek(int fd, off_t offset, int whence)
{
- return lseek (fd, offset, whence);
+ return FS_RET_CHECK(lseek(fd, offset, whence), errno);
}
-
int
-sys_statvfs (const char *path, struct statvfs *buf)
+sys_statvfs(const char *path, struct statvfs *buf)
{
- return statvfs (path, buf);
-}
+ int ret;
+
+ ret = statvfs(path, buf);
+#ifdef __FreeBSD__
+ /* FreeBSD doesn't return the expected value in buf->f_bsize. It
+ * contains the optimal I/O size instead of the file system block
+ * size. Gluster expects that this field contains the block size.
+ */
+ if (ret == 0) {
+ buf->f_bsize = buf->f_frsize;
+ }
+#endif /* __FreeBSD__ */
+ return FS_RET_CHECK0(ret, errno);
+}
int
-sys_close (int fd)
+sys_fstatvfs(int fd, struct statvfs *buf)
{
- int ret = -1;
+ int ret;
- if (fd >= 0)
- ret = close (fd);
+ ret = fstatvfs(fd, buf);
+#ifdef __FreeBSD__
+ /* FreeBSD doesn't return the expected value in buf->f_bsize. It
+ * contains the optimal I/O size instead of the file system block
+ * size. Gluster expects this field to contain the block size.
+ */
+ if (ret == 0) {
+ buf->f_bsize = buf->f_frsize;
+ }
+#endif /* __FreeBSD__ */
- return ret;
+ return FS_RET_CHECK0(ret, errno);
}
-
int
-sys_fsync (int fd)
+sys_close(int fd)
{
- return fsync (fd);
+ int ret = -1;
+
+ if (fd >= 0)
+ ret = close(fd);
+
+ return FS_RET_CHECK0(ret, errno);
}
+int
+sys_fsync(int fd)
+{
+ return FS_RET_CHECK0(fsync(fd), errno);
+}
int
-sys_fdatasync (int fd)
+sys_fdatasync(int fd)
{
-#ifdef HAVE_FDATASYNC
- return fdatasync (fd);
+#ifdef GF_DARWIN_HOST_OS
+ return FS_RET_CHECK0(fcntl(fd, F_FULLFSYNC), errno);
+#elif __FreeBSD__
+ return FS_RET_CHECK0(fsync(fd), errno);
#else
- return 0;
+ return FS_RET_CHECK0(fdatasync(fd), errno);
#endif
}
+void
+gf_add_prefix(const char *ns, const char *key, char **newkey)
+{
+ /* if we don't have any namespace, append USER NS */
+ if (strncmp(key, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) &&
+ strncmp(key, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) &&
+ strncmp(key, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) &&
+ strncmp(key, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) {
+ int ns_length = strlen(ns);
+ *newkey = GF_MALLOC(ns_length + strlen(key) + 10, gf_common_mt_char);
+ if (!*newkey)
+ return;
+ strcpy(*newkey, ns);
+ strcat(*newkey, key);
+ } else {
+ *newkey = gf_strdup(key);
+ }
+}
-int
-sys_lsetxattr (const char *path, const char *name, const void *value,
- size_t size, int flags)
+void
+gf_remove_prefix(const char *ns, const char *key, char **newkey)
{
+ int ns_length = strlen(ns);
+ if (strncmp(key, ns, ns_length) == 0) {
+ *newkey = GF_MALLOC(-ns_length + strlen(key) + 10, gf_common_mt_char);
+ if (!*newkey)
+ return;
+ strcpy(*newkey, key + ns_length);
+ } else {
+ *newkey = gf_strdup(key);
+ }
+}
+int
+sys_lsetxattr(const char *path, const char *name, const void *value,
+ size_t size, int flags)
+{
#if defined(GF_LINUX_HOST_OS) || defined(__NetBSD__)
- return lsetxattr (path, name, value, size, flags);
+ return FS_RET_CHECK0(lsetxattr(path, name, value, size, flags), errno);
#endif
#ifdef GF_BSD_HOST_OS
- return extattr_set_link (path, EXTATTR_NAMESPACE_USER,
- name, value, size);
+ return FS_RET_CHECK(
+ extattr_set_link(path, EXTATTR_NAMESPACE_USER, name, value, size),
+ errno);
#endif
#ifdef GF_SOLARIS_HOST_OS
- return solaris_setxattr (path, name, value, size, flags);
+ return FS_RET_CHECK0(solaris_setxattr(path, name, value, size, flags),
+ errno);
#endif
#ifdef GF_DARWIN_HOST_OS
- return setxattr (path, name, value, size, 0,
- flags|XATTR_NOFOLLOW);
+ /* OS X clients will carry other flags, which will be used on a
+ OS X host, but masked out on others. GF assume NOFOLLOW on Linux,
+ enforcing */
+ return FS_RET_CHECK0(setxattr(path, name, value, size, 0,
+ (flags & ~XATTR_NOSECURITY) | XATTR_NOFOLLOW),
+ errno);
#endif
-
}
-
ssize_t
-sys_llistxattr (const char *path, char *list, size_t size)
+sys_llistxattr(const char *path, char *list, size_t size)
{
-
#if defined(GF_LINUX_HOST_OS) || defined(__NetBSD__)
- return llistxattr (path, list, size);
+ return FS_RET_CHECK(llistxattr(path, list, size), errno);
#endif
#ifdef GF_BSD_HOST_OS
- return extattr_list_link (path, EXTATTR_NAMESPACE_USER, list, size);
+ ssize_t ret = FS_RET_CHECK(
+ extattr_list_link(path, EXTATTR_NAMESPACE_USER, list, size), errno);
+ gf_extattr_list_reshape(list, ret);
+ return ret;
#endif
#ifdef GF_SOLARIS_HOST_OS
- return solaris_listxattr (path, list, size);
+ return FS_RET_CHECK(solaris_listxattr(path, list, size), errno);
#endif
#ifdef GF_DARWIN_HOST_OS
- return listxattr (path, list, size, XATTR_NOFOLLOW);
+ return FS_RET_CHECK(listxattr(path, list, size, XATTR_NOFOLLOW), errno);
#endif
-
}
-
ssize_t
-sys_lgetxattr (const char *path, const char *name, void *value, size_t size)
+sys_lgetxattr(const char *path, const char *name, void *value, size_t size)
{
-
#if defined(GF_LINUX_HOST_OS) || defined(__NetBSD__)
- return lgetxattr (path, name, value, size);
+ return FS_RET_CHECK(lgetxattr(path, name, value, size), errno);
#endif
#ifdef GF_BSD_HOST_OS
- return extattr_get_link (path, EXTATTR_NAMESPACE_USER, name, value,
- size);
+ return FS_RET_CHECK(
+ extattr_get_link(path, EXTATTR_NAMESPACE_USER, name, value, size),
+ errno);
#endif
#ifdef GF_SOLARIS_HOST_OS
- return solaris_getxattr (path, name, value, size);
+ return FS_RET_CHECK(solaris_getxattr(path, name, value, size), errno);
#endif
#ifdef GF_DARWIN_HOST_OS
- return getxattr (path, name, value, size, 0, XATTR_NOFOLLOW);
+ return FS_RET_CHECK(getxattr(path, name, value, size, 0, XATTR_NOFOLLOW),
+ errno);
#endif
-
}
-
ssize_t
-sys_fgetxattr (int filedes, const char *name, void *value, size_t size)
+sys_fgetxattr(int filedes, const char *name, void *value, size_t size)
{
-
#if defined(GF_LINUX_HOST_OS) || defined(__NetBSD__)
- return fgetxattr (filedes, name, value, size);
+ return FS_RET_CHECK(fgetxattr(filedes, name, value, size), errno);
#endif
#ifdef GF_BSD_HOST_OS
- return extattr_get_fd (filedes, EXTATTR_NAMESPACE_USER, name,
- value, size);
+ return FS_RET_CHECK(
+ extattr_get_fd(filedes, EXTATTR_NAMESPACE_USER, name, value, size),
+ errno);
#endif
#ifdef GF_SOLARIS_HOST_OS
- return solaris_fgetxattr (filedes, name, value, size);
+ return FS_RET_CHECK(solaris_fgetxattr(filedes, name, value, size), errno);
#endif
#ifdef GF_DARWIN_HOST_OS
- return fgetxattr (filedes, name, value, size, 0, 0);
+ return FS_RET_CHECK(fgetxattr(filedes, name, value, size, 0, 0), errno);
#endif
-
}
-
int
-sys_fremovexattr (int filedes, const char *name)
+sys_fremovexattr(int filedes, const char *name)
{
-
#if defined(GF_LINUX_HOST_OS) || defined(__NetBSD__)
- return fremovexattr (filedes, name);
+ return FS_RET_CHECK0(fremovexattr(filedes, name), errno);
#endif
- errno = ENOSYS;
- return -1;
-#if 0 /* TODO: to port to other OSes, fill in each of below */
#ifdef GF_BSD_HOST_OS
- return extattr_remove_fd (filedes, EXTATTR_NAMESPACE_USER, name);
+ return FS_RET_CHECK0(
+ extattr_delete_fd(filedes, EXTATTR_NAMESPACE_USER, name), errno);
#endif
#ifdef GF_SOLARIS_HOST_OS
- return solaris_fremovexattr (filedes, name);
+ return FS_RET_CHECK0(solaris_fremovexattr(filedes, name), errno);
#endif
#ifdef GF_DARWIN_HOST_OS
- return fremovexattr (filedes, name, 0);
-#endif
+ return FS_RET_CHECK0(fremovexattr(filedes, name, 0), errno);
#endif
}
-
int
-sys_fsetxattr (int filedes, const char *name, const void *value,
- size_t size, int flags)
+sys_fsetxattr(int filedes, const char *name, const void *value, size_t size,
+ int flags)
{
-
#if defined(GF_LINUX_HOST_OS) || defined(__NetBSD__)
- return fsetxattr (filedes, name, value, size, flags);
+ return FS_RET_CHECK0(fsetxattr(filedes, name, value, size, flags), errno);
#endif
#ifdef GF_BSD_HOST_OS
- return extattr_set_fd (filedes, EXTATTR_NAMESPACE_USER, name,
- value, size);
+ return FS_RET_CHECK(
+ extattr_set_fd(filedes, EXTATTR_NAMESPACE_USER, name, value, size),
+ errno);
#endif
#ifdef GF_SOLARIS_HOST_OS
- return solaris_fsetxattr (filedes, name, value, size, flags);
+ return FS_RET_CHECK0(solaris_fsetxattr(filedes, name, value, size, flags),
+ errno);
#endif
#ifdef GF_DARWIN_HOST_OS
- return fsetxattr (filedes, name, value, size, 0, flags);
+ return FS_RET_CHECK0(
+ fsetxattr(filedes, name, value, size, 0, flags & ~XATTR_NOSECURITY),
+ errno);
#endif
-
}
-
ssize_t
-sys_flistxattr (int filedes, char *list, size_t size)
+sys_flistxattr(int filedes, char *list, size_t size)
{
-
#if defined(GF_LINUX_HOST_OS) || defined(__NetBSD__)
- return flistxattr (filedes, list, size);
+ return FS_RET_CHECK(flistxattr(filedes, list, size), errno);
#endif
#ifdef GF_BSD_HOST_OS
- return extattr_list_fd (filedes, EXTATTR_NAMESPACE_USER, list, size);
+ ssize_t ret = FS_RET_CHECK(
+ extattr_list_fd(filedes, EXTATTR_NAMESPACE_USER, list, size), errno);
+ gf_extattr_list_reshape(list, ret);
+ return ret;
#endif
#ifdef GF_SOLARIS_HOST_OS
- return solaris_flistxattr (filedes, list, size);
+ return FS_RET_CHECK(solaris_flistxattr(filedes, list, size), errno);
#endif
#ifdef GF_DARWIN_HOST_OS
- return flistxattr (filedes, list, size, XATTR_NOFOLLOW);
+ return FS_RET_CHECK(flistxattr(filedes, list, size, XATTR_NOFOLLOW), errno);
#endif
-
}
-
int
-sys_lremovexattr (const char *path, const char *name)
+sys_lremovexattr(const char *path, const char *name)
{
-
#if defined(GF_LINUX_HOST_OS) || defined(__NetBSD__)
- return lremovexattr (path, name);
+ return FS_RET_CHECK0(lremovexattr(path, name), errno);
#endif
#ifdef GF_BSD_HOST_OS
- return extattr_delete_link (path, EXTATTR_NAMESPACE_USER, name);
+ return FS_RET_CHECK0(
+ extattr_delete_link(path, EXTATTR_NAMESPACE_USER, name), errno);
#endif
#ifdef GF_SOLARIS_HOST_OS
- return solaris_removexattr (path, name);
+ return FS_RET_CHECK0(solaris_removexattr(path, name), errno);
#endif
#ifdef GF_DARWIN_HOST_OS
- return removexattr (path, name, XATTR_NOFOLLOW);
+ return FS_RET_CHECK0(removexattr(path, name, XATTR_NOFOLLOW), errno);
+#endif
+}
+
+int
+sys_access(const char *pathname, int mode)
+{
+ return FS_RET_CHECK0(access(pathname, mode), errno);
+}
+
+int
+sys_fallocate(int fd, int mode, off_t offset, off_t len)
+{
+#ifdef HAVE_FALLOCATE
+ return FS_RET_CHECK0(fallocate(fd, mode, offset, len), errno);
+#endif
+
+#ifdef HAVE_POSIX_FALLOCATE
+ if (mode) {
+ /* keep size not supported */
+ errno = EOPNOTSUPP;
+ return -1;
+ }
+
+ return FS_RET_CHECK_ERRNO(posix_fallocate(fd, offset, len), errno);
+#endif
+
+#if defined(F_ALLOCATECONTIG) && defined(GF_DARWIN_HOST_OS)
+ /* C conversion from C++ implementation for OSX by Mozilla Foundation */
+ if (mode) {
+ /* keep size not supported */
+ errno = EOPNOTSUPP;
+ return -1;
+ }
+ /*
+ * The F_PREALLOCATE command operates on the following structure:
+ *
+ * typedef struct fstore {
+ * u_int32_t fst_flags; // IN: flags word
+ * int fst_posmode; // IN: indicates offset field
+ * off_t fst_offset; // IN: start of the region
+ * off_t fst_length; // IN: size of the region
+ * off_t fst_bytesalloc; // OUT: number of bytes allocated
+ * } fstore_t;
+ *
+ * The flags (fst_flags) for the F_PREALLOCATE command are as follows:
+ * F_ALLOCATECONTIG Allocate contiguous space.
+ * F_ALLOCATEALL Allocate all requested space or no space at all.
+ *
+ * The position modes (fst_posmode) for the F_PREALLOCATE command
+ * indicate how to use the offset field. The modes are as follows:
+ * F_PEOFPOSMODE Allocate from the physical end of file.
+ * F_VOLPOSMODE Allocate from the volume offset.
+ *
+ */
+
+ int ret;
+ fstore_t store = {F_ALLOCATECONTIG, F_PEOFPOSMODE, offset, len, 0};
+ ret = fcntl(fd, F_PREALLOCATE, &store);
+ if (ret == -1) {
+ store.fst_flags = F_ALLOCATEALL;
+ ret = fcntl(fd, F_PREALLOCATE, &store);
+ }
+ if (ret == -1)
+ return ret;
+ return FS_RET_CHECK0(ftruncate(fd, offset + len), errno);
+#endif
+ errno = ENOSYS;
+ return -1;
+}
+
+int
+sys_socket(int domain, int type, int protocol)
+{
+#ifdef SOCK_CLOEXEC
+ return socket(domain, type | SOCK_CLOEXEC, protocol);
+#else
+ int fd = -1;
+
+ fd = socket(domain, type, protocol);
+ if (fd >= 0)
+ fcntl(fd, F_SETFD, FD_CLOEXEC);
+ return fd;
+#endif
+}
+
+#if (defined(HAVE_ACCEPT4) || defined(HAVE_PACCEPT))
+static inline int
+prep_accept_flags(int flags)
+{
+ if (flags & O_NONBLOCK) {
+ flags &= ~O_NONBLOCK;
+ flags |= SOCK_NONBLOCK;
+ }
+
+ flags |= SOCK_CLOEXEC;
+
+ return flags;
+}
#endif
+int
+sys_accept(int sock, struct sockaddr *sockaddr, socklen_t *socklen, int flags)
+{
+ int newsock = -1;
+
+#ifdef HAVE_ACCEPT4
+
+ flags = prep_accept_flags(flags);
+ newsock = accept4(sock, sockaddr, socklen, flags);
+
+#elif HAVE_PACCEPT
+ flags = prep_accept_flags(flags);
+ newsock = paccept(sock, sockaddr, socklen, NULL, flags);
+
+#else
+ int op_errno = 0;
+ int curflag = 0;
+ int ret = 0;
+
+ newsock = accept(sock, sockaddr, socklen);
+ if (newsock != -1) {
+ curflag = fcntl(newsock, F_GETFL);
+ if (fcntl(newsock, F_SETFL, curflag | flags) == -1) {
+ op_errno = errno;
+ goto err;
+ }
+
+ curflag = fcntl(newsock, F_GETFD);
+ if (fcntl(newsock, F_SETFD, curflag | FD_CLOEXEC) == -1) {
+ op_errno = errno;
+ goto err;
+ }
+ }
+
+err:
+ if (op_errno) {
+ close(newsock);
+ errno = op_errno;
+ return -1;
+ }
+
+#endif
+ return newsock;
+}
+
+ssize_t
+sys_copy_file_range(int fd_in, off64_t *off_in, int fd_out, off64_t *off_out,
+ size_t len, unsigned int flags)
+{
+ /*
+ * TODO: Add check for other platofrms like freebsd etc if this syscall is
+ * not generic.
+ * This is what the function does.
+ * 1) Check whether copy_file_range API is present. If so call it.
+ * 2) If copy_file_range API is not present, then check whether
+ * the system call is there. If so, then use syscall to invoke
+ * SYS_copy_file_range system call.
+ * 3) If neither of the above is present, then return ENOSYS.
+ */
+#ifdef HAVE_COPY_FILE_RANGE
+ return FS_RET_CHECK(
+ copy_file_range(fd_in, off_in, fd_out, off_out, len, flags), errno);
+#else
+#ifdef HAVE_COPY_FILE_RANGE_SYS
+ return syscall(SYS_copy_file_range, fd_in, off_in, fd_out, off_out, len,
+ flags);
+#else
+ errno = ENOSYS;
+ return -1;
+#endif /* HAVE_COPY_FILE_RANGE_SYS */
+#endif /* HAVE_COPY_FILE_RANGE */
}
+#ifdef __FreeBSD__
+int
+sys_kill(pid_t pid, int sig)
+{
+ return FS_RET_CHECK0(kill(pid, sig), errno);
+}
int
-sys_access (const char *pathname, int mode)
+sys_sysctl(const int *name, u_int namelen, void *oldp, size_t *oldlenp,
+ const void *newp, size_t newlen)
{
- return access (pathname, mode);
+ return FS_RET_CHECK0(sysctl(name, namelen, oldp, oldlenp, newp, newlen),
+ errno);
}
+#endif
diff --git a/libglusterfs/src/syscall.h b/libglusterfs/src/syscall.h
deleted file mode 100644
index d5c6ce5f67b..00000000000
--- a/libglusterfs/src/syscall.h
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef __SYSCALL_H__
-#define __SYSCALL_H__
-
-int
-sys_lstat (const char *path, struct stat *buf);
-
-int
-sys_stat (const char *path, struct stat *buf);
-
-int
-sys_fstat (int fd, struct stat *buf);
-
-DIR *
-sys_opendir (const char *name);
-
-struct dirent *
-sys_readdir (DIR *dir);
-
-ssize_t
-sys_readlink (const char *path, char *buf, size_t bufsiz);
-
-int
-sys_closedir (DIR *dir);
-
-int
-sys_mknod (const char *pathname, mode_t mode, dev_t dev);
-
-int
-sys_mkdir (const char *pathname, mode_t mode);
-
-int
-sys_unlink (const char *pathname);
-
-int
-sys_rmdir (const char *pathname);
-
-int
-sys_symlink (const char *oldpath, const char *newpath);
-
-int
-sys_rename (const char *oldpath, const char *newpath);
-
-int
-sys_link (const char *oldpath, const char *newpath);
-
-int
-sys_chmod (const char *path, mode_t mode);
-
-int
-sys_fchmod (int fd, mode_t mode);
-
-int
-sys_chown (const char *path, uid_t owner, gid_t group);
-
-int
-sys_fchown (int fd, uid_t owner, gid_t group);
-
-int
-sys_lchown (const char *path, uid_t owner, gid_t group);
-
-int
-sys_truncate (const char *path, off_t length);
-
-int
-sys_ftruncate (int fd, off_t length);
-
-int
-sys_utimes (const char *filename, const struct timeval times[2]);
-
-int
-sys_creat (const char *pathname, mode_t mode);
-
-ssize_t
-sys_readv (int fd, const struct iovec *iov, int iovcnt);
-
-ssize_t
-sys_writev (int fd, const struct iovec *iov, int iovcnt);
-
-ssize_t
-sys_read (int fd, void *buf, size_t count);
-
-ssize_t
-sys_write (int fd, const void *buf, size_t count);
-
-off_t
-sys_lseek (int fd, off_t offset, int whence);
-
-int
-sys_statvfs (const char *path, struct statvfs *buf);
-
-int
-sys_close (int fd);
-
-int
-sys_fsync (int fd);
-
-int
-sys_fdatasync (int fd);
-
-int
-sys_lsetxattr (const char *path, const char *name, const void *value,
- size_t size, int flags);
-
-ssize_t
-sys_llistxattr (const char *path, char *list, size_t size);
-
-ssize_t
-sys_lgetxattr (const char *path, const char *name, void *value, size_t size);
-
-ssize_t
-sys_fgetxattr (int filedes, const char *name, void *value, size_t size);
-
-int
-sys_fsetxattr (int filedes, const char *name, const void *value,
- size_t size, int flags);
-
-ssize_t
-sys_flistxattr (int filedes, char *list, size_t size);
-
-int
-sys_lremovexattr (const char *path, const char *name);
-
-int
-sys_fremovexattr (int filedes, const char *name);
-
-int
-sys_access (const char *pathname, int mode);
-
-int
-sys_ftruncate (int fd, off_t length);
-
-#endif /* __SYSCALL_H__ */
diff --git a/libglusterfs/src/throttle-tbf.c b/libglusterfs/src/throttle-tbf.c
new file mode 100644
index 00000000000..e11ca4f9d35
--- /dev/null
+++ b/libglusterfs/src/throttle-tbf.c
@@ -0,0 +1,290 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/**
+ *
+ * Basic token bucket implementation for rate limiting. As of now interfaces
+ * to throttle disk read request, directory entry scan and hash calculation
+ * are available. To throttle a particular request (operation), the call needs
+ * to be wrapped in-between throttling APIs, for e.g.
+ *
+ * TBF_THROTTLE_BEGIN (...); <-- induces "delays" if required
+ * {
+ * call (...);
+ * }
+ * TBF_THROTTLE_END (...); <-- not used atm, maybe needed later
+ *
+ */
+
+#include "glusterfs/mem-pool.h"
+#include "glusterfs/throttle-tbf.h"
+
+typedef struct tbf_throttle {
+ char done;
+
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+
+ unsigned long tokens;
+
+ struct list_head list;
+} tbf_throttle_t;
+
+static tbf_throttle_t *
+tbf_init_throttle(unsigned long tokens_required)
+{
+ tbf_throttle_t *throttle = NULL;
+
+ throttle = GF_CALLOC(1, sizeof(*throttle), gf_common_mt_tbf_throttle_t);
+ if (!throttle)
+ return NULL;
+
+ throttle->done = 0;
+ throttle->tokens = tokens_required;
+ INIT_LIST_HEAD(&throttle->list);
+
+ (void)pthread_mutex_init(&throttle->mutex, NULL);
+ (void)pthread_cond_init(&throttle->cond, NULL);
+
+ return throttle;
+}
+
+void
+_tbf_dispatch_queued(tbf_bucket_t *bucket)
+{
+ gf_boolean_t xcont = _gf_false;
+ tbf_throttle_t *tmp = NULL;
+ tbf_throttle_t *throttle = NULL;
+
+ list_for_each_entry_safe(throttle, tmp, &bucket->queued, list)
+ {
+ pthread_mutex_lock(&throttle->mutex);
+ {
+ if (bucket->tokens < throttle->tokens) {
+ xcont = _gf_true;
+ goto unblock;
+ }
+
+ /* this request can now be serviced */
+ throttle->done = 1;
+ list_del_init(&throttle->list);
+
+ bucket->tokens -= throttle->tokens;
+ pthread_cond_signal(&throttle->cond);
+ }
+ unblock:
+ pthread_mutex_unlock(&throttle->mutex);
+ if (xcont)
+ break;
+ }
+}
+
+void *
+tbf_tokengenerator(void *arg)
+{
+ unsigned long tokenrate = 0;
+ unsigned long maxtokens = 0;
+ unsigned long token_gen_interval = 0;
+ tbf_bucket_t *bucket = arg;
+
+ tokenrate = bucket->tokenrate;
+ maxtokens = bucket->maxtokens;
+ token_gen_interval = bucket->token_gen_interval;
+
+ while (1) {
+ gf_nanosleep(token_gen_interval * GF_US_IN_NS);
+
+ LOCK(&bucket->lock);
+ {
+ bucket->tokens += tokenrate;
+ if (bucket->tokens > maxtokens)
+ bucket->tokens = maxtokens;
+
+ if (!list_empty(&bucket->queued))
+ _tbf_dispatch_queued(bucket);
+ }
+ UNLOCK(&bucket->lock);
+ }
+
+ return NULL;
+}
+
+/**
+ * There is lazy synchronization between this routine (when invoked
+ * under tbf_mod() context) and tbf_throttle(). *bucket is
+ * updated _after_ all the required variables are initialized.
+ */
+static int32_t
+tbf_init_bucket(tbf_t *tbf, tbf_opspec_t *spec)
+{
+ int ret = 0;
+ tbf_bucket_t *curr = NULL;
+ tbf_bucket_t **bucket = NULL;
+
+ GF_ASSERT(spec->op >= TBF_OP_MIN);
+ GF_ASSERT(spec->op <= TBF_OP_MAX);
+
+ /* no rate? no throttling. */
+ if (!spec->rate)
+ return 0;
+
+ bucket = tbf->bucket + spec->op;
+
+ curr = GF_CALLOC(1, sizeof(*curr), gf_common_mt_tbf_bucket_t);
+ if (!curr)
+ goto error_return;
+
+ LOCK_INIT(&curr->lock);
+ INIT_LIST_HEAD(&curr->queued);
+
+ curr->tokens = 0;
+ curr->tokenrate = spec->rate;
+ curr->maxtokens = spec->maxlimit;
+ curr->token_gen_interval = spec->token_gen_interval;
+
+ ret = gf_thread_create(&curr->tokener, NULL, tbf_tokengenerator, curr,
+ "tbfclock");
+ if (ret != 0)
+ goto freemem;
+
+ *bucket = curr;
+ return 0;
+
+freemem:
+ LOCK_DESTROY(&curr->lock);
+ GF_FREE(curr);
+error_return:
+ return -1;
+}
+
+#define TBF_ALLOC_SIZE (sizeof(tbf_t) + (TBF_OP_MAX * sizeof(tbf_bucket_t)))
+
+tbf_t *
+tbf_init(tbf_opspec_t *tbfspec, unsigned int count)
+{
+ int32_t i = 0;
+ int32_t ret = 0;
+ tbf_t *tbf = NULL;
+ tbf_opspec_t *opspec = NULL;
+
+ tbf = GF_CALLOC(1, TBF_ALLOC_SIZE, gf_common_mt_tbf_t);
+ if (!tbf)
+ goto error_return;
+
+ tbf->bucket = (tbf_bucket_t **)((char *)tbf + sizeof(*tbf));
+ for (i = 0; i < TBF_OP_MAX; i++) {
+ *(tbf->bucket + i) = NULL;
+ }
+
+ for (i = 0; i < count; i++) {
+ opspec = tbfspec + i;
+
+ ret = tbf_init_bucket(tbf, opspec);
+ if (ret)
+ break;
+ }
+
+ if (ret)
+ goto error_return;
+
+ return tbf;
+
+error_return:
+ return NULL;
+}
+
+static void
+tbf_mod_bucket(tbf_bucket_t *bucket, tbf_opspec_t *spec)
+{
+ LOCK(&bucket->lock);
+ {
+ bucket->tokens = 0;
+ bucket->tokenrate = spec->rate;
+ bucket->maxtokens = spec->maxlimit;
+ }
+ UNLOCK(&bucket->lock);
+
+ /* next token tick would unqueue pending operations */
+}
+
+int
+tbf_mod(tbf_t *tbf, tbf_opspec_t *tbfspec)
+{
+ int ret = 0;
+ tbf_bucket_t *bucket = NULL;
+ tbf_ops_t op = TBF_OP_MIN;
+
+ if (!tbf || !tbfspec)
+ return -1;
+
+ op = tbfspec->op;
+
+ GF_ASSERT(op >= TBF_OP_MIN);
+ GF_ASSERT(op <= TBF_OP_MAX);
+
+ bucket = *(tbf->bucket + op);
+ if (bucket) {
+ tbf_mod_bucket(bucket, tbfspec);
+ } else {
+ ret = tbf_init_bucket(tbf, tbfspec);
+ }
+
+ return ret;
+}
+
+void
+tbf_throttle(tbf_t *tbf, tbf_ops_t op, unsigned long tokens_requested)
+{
+ char waitq = 0;
+ tbf_bucket_t *bucket = NULL;
+ tbf_throttle_t *throttle = NULL;
+
+ GF_ASSERT(op >= TBF_OP_MIN);
+ GF_ASSERT(op <= TBF_OP_MAX);
+
+ bucket = *(tbf->bucket + op);
+ if (!bucket)
+ return;
+
+ LOCK(&bucket->lock);
+ {
+ /**
+ * if there are enough tokens in the bucket there is no need
+ * to throttle the request: therefore, consume the required
+ * number of tokens and continue.
+ */
+ if (tokens_requested <= bucket->tokens) {
+ bucket->tokens -= tokens_requested;
+ } else {
+ throttle = tbf_init_throttle(tokens_requested);
+ if (!throttle) /* let it slip through for now.. */
+ goto unblock;
+
+ waitq = 1;
+ pthread_mutex_lock(&throttle->mutex);
+ list_add_tail(&throttle->list, &bucket->queued);
+ }
+ }
+unblock:
+ UNLOCK(&bucket->lock);
+
+ if (waitq) {
+ while (!throttle->done) {
+ pthread_cond_wait(&throttle->cond, &throttle->mutex);
+ }
+
+ pthread_mutex_unlock(&throttle->mutex);
+
+ pthread_mutex_destroy(&throttle->mutex);
+ pthread_cond_destroy(&throttle->cond);
+
+ GF_FREE(throttle);
+ }
+}
diff --git a/libglusterfs/src/timer.c b/libglusterfs/src/timer.c
index ae40142ad51..66c861b04cd 100644
--- a/libglusterfs/src/timer.c
+++ b/libglusterfs/src/timer.c
@@ -8,213 +8,249 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+#include "glusterfs/timer.h"
+#include "glusterfs/logging.h"
+#include "glusterfs/common-utils.h"
+#include "glusterfs/globals.h"
+#include "glusterfs/timespec.h"
+#include "glusterfs/libglusterfs-messages.h"
-#include "timer.h"
-#include "logging.h"
-#include "common-utils.h"
-#include "globals.h"
-
-#define TS(tv) ((((unsigned long long) tv.tv_sec) * 1000000) + (tv.tv_usec))
+/* fwd decl */
+static gf_timer_registry_t *
+gf_timer_registry_init(glusterfs_ctx_t *);
gf_timer_t *
-gf_timer_call_after (glusterfs_ctx_t *ctx,
- struct timeval delta,
- gf_timer_cbk_t callbk,
- void *data)
+gf_timer_call_after(glusterfs_ctx_t *ctx, struct timespec delta,
+ gf_timer_cbk_t callbk, void *data)
{
- gf_timer_registry_t *reg = NULL;
- gf_timer_t *event = NULL;
- gf_timer_t *trav = NULL;
- unsigned long long at = 0L;
-
- if (ctx == NULL)
- {
- gf_log_callingfn ("timer", GF_LOG_ERROR, "invalid argument");
- return NULL;
- }
+ gf_timer_registry_t *reg = NULL;
+ gf_timer_t *event = NULL;
+ gf_timer_t *trav = NULL;
+ uint64_t at = 0;
+
+ if ((ctx == NULL) || (ctx->cleanup_started)) {
+ gf_msg_callingfn("timer", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "Either ctx is NULL or"
+ " ctx cleanup started");
+ return NULL;
+ }
- reg = gf_timer_registry_init (ctx);
+ reg = gf_timer_registry_init(ctx);
- if (!reg) {
- gf_log_callingfn ("timer", GF_LOG_ERROR, "!reg");
- return NULL;
- }
+ if (!reg) {
+ gf_msg_callingfn("timer", GF_LOG_ERROR, 0, LG_MSG_TIMER_REGISTER_ERROR,
+ "!reg");
+ return NULL;
+ }
- event = GF_CALLOC (1, sizeof (*event), gf_common_mt_gf_timer_t);
- if (!event) {
- return NULL;
- }
- gettimeofday (&event->at, NULL);
- event->at.tv_usec = ((event->at.tv_usec + delta.tv_usec) % 1000000);
- event->at.tv_sec += ((event->at.tv_usec + delta.tv_usec) / 1000000);
- event->at.tv_sec += delta.tv_sec;
- at = TS (event->at);
- event->callbk = callbk;
- event->data = data;
- event->xl = THIS;
- pthread_mutex_lock (&reg->lock);
+ event = GF_CALLOC(1, sizeof(*event), gf_common_mt_gf_timer_t);
+ if (!event) {
+ return NULL;
+ }
+ timespec_now(&event->at);
+ timespec_adjust_delta(&event->at, delta);
+ at = TS(event->at);
+ event->callbk = callbk;
+ event->data = data;
+ event->xl = THIS;
+ pthread_mutex_lock(&reg->lock);
+ {
+ list_for_each_entry_reverse(trav, &reg->active, list)
{
- trav = reg->active.prev;
- while (trav != &reg->active) {
- if (TS (trav->at) < at)
- break;
- trav = trav->prev;
- }
- event->prev = trav;
- event->next = event->prev->next;
- event->prev->next = event;
- event->next->prev = event;
+ if (TS(trav->at) < at)
+ break;
+ }
+ list_add(&event->list, &trav->list);
+ if (&trav->list == &reg->active) {
+ pthread_cond_signal(&reg->cond);
}
- pthread_mutex_unlock (&reg->lock);
- return event;
+ }
+ pthread_mutex_unlock(&reg->lock);
+ return event;
}
int32_t
-gf_timer_call_stale (gf_timer_registry_t *reg,
- gf_timer_t *event)
+gf_timer_call_cancel(glusterfs_ctx_t *ctx, gf_timer_t *event)
{
- if (reg == NULL || event == NULL)
- {
- gf_log_callingfn ("timer", GF_LOG_ERROR, "invalid argument");
- return 0;
- }
-
- event->next->prev = event->prev;
- event->prev->next = event->next;
- event->next = &reg->stale;
- event->prev = event->next->prev;
- event->next->prev = event;
- event->prev->next = event;
-
+ gf_timer_registry_t *reg = NULL;
+ gf_boolean_t fired = _gf_false;
+
+ if (ctx == NULL || event == NULL) {
+ gf_msg_callingfn("timer", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "invalid argument");
+ return -1;
+ }
+
+ if (ctx->cleanup_started) {
+ gf_msg_callingfn("timer", GF_LOG_INFO, 0, LG_MSG_CTX_CLEANUP_STARTED,
+ "ctx cleanup started");
+ return -1;
+ }
+
+ LOCK(&ctx->lock);
+ {
+ reg = ctx->timer;
+ }
+ UNLOCK(&ctx->lock);
+
+ if (!reg) {
+ /* This can happen when cleanup may have just started and
+ * gf_timer_registry_destroy() sets ctx->timer to NULL.
+ * gf_timer_proc() takes care of cleaning up the events.
+ */
+ return -1;
+ }
+
+ pthread_mutex_lock(&reg->lock);
+ {
+ fired = event->fired;
+ if (fired)
+ goto unlock;
+ list_del(&event->list);
+ }
+unlock:
+ pthread_mutex_unlock(&reg->lock);
+
+ if (!fired) {
+ GF_FREE(event);
return 0;
+ }
+ return -1;
}
-int32_t
-gf_timer_call_cancel (glusterfs_ctx_t *ctx,
- gf_timer_t *event)
+static void *
+gf_timer_proc(void *data)
{
- gf_timer_registry_t *reg = NULL;
+ gf_timer_registry_t *reg = data;
+ gf_timer_t *event = NULL;
+ gf_timer_t *tmp = NULL;
+ xlator_t *old_THIS = NULL;
+
+ pthread_mutex_lock(&reg->lock);
+
+ while (!reg->fin) {
+ if (list_empty(&reg->active)) {
+ pthread_cond_wait(&reg->cond, &reg->lock);
+ } else {
+ struct timespec now;
+
+ timespec_now(&now);
+ event = list_first_entry(&reg->active, gf_timer_t, list);
+ if (TS(now) < TS(event->at)) {
+ now = event->at;
+ pthread_cond_timedwait(&reg->cond, &reg->lock, &now);
+ } else {
+ event->fired = _gf_true;
+ list_del_init(&event->list);
+
+ pthread_mutex_unlock(&reg->lock);
+
+ old_THIS = NULL;
+ if (event->xl) {
+ old_THIS = THIS;
+ THIS = event->xl;
+ }
+ event->callbk(event->data);
+ GF_FREE(event);
+ if (old_THIS) {
+ THIS = old_THIS;
+ }
- if (ctx == NULL || event == NULL)
- {
- gf_log_callingfn ("timer", GF_LOG_ERROR, "invalid argument");
- return 0;
+ pthread_mutex_lock(&reg->lock);
+ }
}
+ }
+
+ /* Do not call gf_timer_call_cancel(),
+ * it will lead to deadlock
+ */
+ list_for_each_entry_safe(event, tmp, &reg->active, list)
+ {
+ list_del(&event->list);
+ /* TODO Possible resource leak
+ * Before freeing the event, we need to call the respective
+ * event functions and free any resources.
+ * For example, In case of rpc_clnt_reconnect, we need to
+ * unref rpc object which was taken when added to timer
+ * wheel.
+ */
+ GF_FREE(event);
+ }
+
+ pthread_mutex_unlock(&reg->lock);
+
+ return NULL;
+}
- reg = gf_timer_registry_init (ctx);
- if (!reg) {
- gf_log ("timer", GF_LOG_ERROR, "!reg");
- GF_FREE (event);
- return 0;
+static gf_timer_registry_t *
+gf_timer_registry_init(glusterfs_ctx_t *ctx)
+{
+ gf_timer_registry_t *reg = NULL;
+ int ret = -1;
+ pthread_condattr_t attr;
+
+ LOCK(&ctx->lock);
+ {
+ reg = ctx->timer;
+ if (reg) {
+ UNLOCK(&ctx->lock);
+ goto out;
}
-
- pthread_mutex_lock (&reg->lock);
- {
- event->next->prev = event->prev;
- event->prev->next = event->next;
+ reg = GF_CALLOC(1, sizeof(*reg), gf_common_mt_gf_timer_registry_t);
+ if (!reg) {
+ UNLOCK(&ctx->lock);
+ goto out;
}
- pthread_mutex_unlock (&reg->lock);
+ ctx->timer = reg;
+ pthread_mutex_init(&reg->lock, NULL);
+ pthread_condattr_init(&attr);
+ pthread_condattr_setclock(&attr, CLOCK_MONOTONIC);
+ pthread_cond_init(&reg->cond, &attr);
+ INIT_LIST_HEAD(&reg->active);
+ }
+ UNLOCK(&ctx->lock);
+ ret = gf_thread_create(&reg->th, NULL, gf_timer_proc, reg, "timer");
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, ret, LG_MSG_PTHREAD_FAILED,
+ "Thread creation failed");
+ }
- GF_FREE (event);
- return 0;
+out:
+ return reg;
}
-void *
-gf_timer_proc (void *ctx)
+void
+gf_timer_registry_destroy(glusterfs_ctx_t *ctx)
{
- gf_timer_registry_t *reg = NULL;
- const struct timespec sleepts = {.tv_sec = 1, .tv_nsec = 0, };
+ pthread_t thr_id;
+ gf_timer_registry_t *reg = NULL;
- if (ctx == NULL)
- {
- gf_log_callingfn ("timer", GF_LOG_ERROR, "invalid argument");
- return NULL;
- }
+ if (ctx == NULL)
+ return;
- reg = gf_timer_registry_init (ctx);
- if (!reg) {
- gf_log ("timer", GF_LOG_ERROR, "!reg");
- return NULL;
- }
-
- while (!reg->fin) {
- unsigned long long now;
- struct timeval now_tv;
- gf_timer_t *event = NULL;
-
- gettimeofday (&now_tv, NULL);
- now = TS (now_tv);
- while (1) {
- unsigned long long at;
- char need_cbk = 0;
-
- pthread_mutex_lock (&reg->lock);
- {
- event = reg->active.next;
- at = TS (event->at);
- if (event != &reg->active && now >= at) {
- need_cbk = 1;
- gf_timer_call_stale (reg, event);
- }
- }
- pthread_mutex_unlock (&reg->lock);
- if (event->xl)
- THIS = event->xl;
- if (need_cbk)
- event->callbk (event->data);
-
- else
- break;
- }
- nanosleep (&sleepts, NULL);
- }
+ LOCK(&ctx->lock);
+ {
+ reg = ctx->timer;
+ ctx->timer = NULL;
+ }
+ UNLOCK(&ctx->lock);
- pthread_mutex_lock (&reg->lock);
- {
- while (reg->active.next != &reg->active) {
- gf_timer_call_cancel (ctx, reg->active.next);
- }
+ if (!reg)
+ return;
- while (reg->stale.next != &reg->stale) {
- gf_timer_call_cancel (ctx, reg->stale.next);
- }
- }
- pthread_mutex_unlock (&reg->lock);
- pthread_mutex_destroy (&reg->lock);
- GF_FREE (((glusterfs_ctx_t *)ctx)->timer);
+ thr_id = reg->th;
- return NULL;
-}
+ pthread_mutex_lock(&reg->lock);
-gf_timer_registry_t *
-gf_timer_registry_init (glusterfs_ctx_t *ctx)
-{
- if (ctx == NULL) {
- gf_log_callingfn ("timer", GF_LOG_ERROR, "invalid argument");
- return NULL;
- }
+ reg->fin = 1;
+ pthread_cond_signal(&reg->cond);
- if (!ctx->timer) {
- gf_timer_registry_t *reg = NULL;
+ pthread_mutex_unlock(&reg->lock);
- reg = GF_CALLOC (1, sizeof (*reg),
- gf_common_mt_gf_timer_registry_t);
- if (!reg)
- goto out;
+ pthread_join(thr_id, NULL);
- pthread_mutex_init (&reg->lock, NULL);
- reg->active.next = &reg->active;
- reg->active.prev = &reg->active;
- reg->stale.next = &reg->stale;
- reg->stale.prev = &reg->stale;
+ pthread_cond_destroy(&reg->cond);
+ pthread_mutex_destroy(&reg->lock);
- ctx->timer = reg;
- pthread_create (&reg->th, NULL, gf_timer_proc, ctx);
- }
-out:
- return ctx->timer;
+ GF_FREE(reg);
}
diff --git a/libglusterfs/src/timer.h b/libglusterfs/src/timer.h
deleted file mode 100644
index 2954f6aff5d..00000000000
--- a/libglusterfs/src/timer.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _TIMER_H
-#define _TIMER_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "xlator.h"
-#include <sys/time.h>
-#include <pthread.h>
-
-typedef void (*gf_timer_cbk_t) (void *);
-
-struct _gf_timer {
- struct _gf_timer *next, *prev;
- struct timeval at;
- gf_timer_cbk_t callbk;
- void *data;
- xlator_t *xl;
-};
-
-struct _gf_timer_registry {
- pthread_t th;
- char fin;
- struct _gf_timer stale;
- struct _gf_timer active;
- pthread_mutex_t lock;
-};
-
-typedef struct _gf_timer gf_timer_t;
-typedef struct _gf_timer_registry gf_timer_registry_t;
-
-gf_timer_t *
-gf_timer_call_after (glusterfs_ctx_t *ctx,
- struct timeval delta,
- gf_timer_cbk_t cbk,
- void *data);
-
-int32_t
-gf_timer_call_cancel (glusterfs_ctx_t *ctx,
- gf_timer_t *event);
-
-void *
-gf_timer_proc (void *data);
-
-gf_timer_registry_t *
-gf_timer_registry_init (glusterfs_ctx_t *ctx);
-
-#endif /* _TIMER_H */
diff --git a/libglusterfs/src/timespec.c b/libglusterfs/src/timespec.c
new file mode 100644
index 00000000000..96cef5c6f07
--- /dev/null
+++ b/libglusterfs/src/timespec.c
@@ -0,0 +1,129 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <stdio.h>
+#include <inttypes.h>
+#include <time.h>
+#include <sys/time.h>
+#include <string.h>
+
+#if defined GF_DARWIN_HOST_OS
+#include <mach/mach_time.h>
+static mach_timebase_info_data_t gf_timebase;
+#endif
+
+#include "glusterfs/timespec.h"
+#include "glusterfs/libglusterfs-messages.h"
+#include "glusterfs/common-utils.h"
+
+void
+timespec_now(struct timespec *ts)
+{
+#if defined GF_LINUX_HOST_OS || defined GF_SOLARIS_HOST_OS || \
+ defined GF_BSD_HOST_OS
+ if (0 == clock_gettime(CLOCK_MONOTONIC, ts)) {
+ /* All good */
+ return;
+ }
+
+ /* Fall back, but there is hope in gettimeofday() syscall */
+ struct timeval tv;
+ if (0 == gettimeofday(&tv, NULL)) {
+ /* Again, all good */
+ TIMEVAL_TO_TIMESPEC(&tv, ts);
+ return;
+ }
+
+ /* If control hits here, there is surely a problem,
+ mainly because, as per man page too, these syscalls
+ shouldn't fail. Best way is to ABORT, because it is
+ not right */
+ GF_ABORT("gettimeofday() failed!!");
+
+#elif defined GF_DARWIN_HOST_OS
+ uint64_t time = mach_absolute_time();
+ static double scaling = 0.0;
+
+ if (mach_timebase_info(&gf_timebase) != KERN_SUCCESS) {
+ gf_timebase.numer = 1;
+ gf_timebase.denom = 1;
+ }
+ if (gf_timebase.denom == 0) {
+ gf_timebase.numer = 1;
+ gf_timebase.denom = 1;
+ }
+
+ scaling = (double)gf_timebase.numer / (double)gf_timebase.denom;
+ time *= scaling;
+
+ ts->tv_sec = (time * NANO);
+ ts->tv_nsec = (time - (ts->tv_sec * GIGA));
+
+#endif /* Platform verification */
+}
+
+void
+timespec_now_realtime(struct timespec *ts)
+{
+#if defined GF_LINUX_HOST_OS || defined GF_SOLARIS_HOST_OS || \
+ defined GF_BSD_HOST_OS
+ if (0 == clock_gettime(CLOCK_REALTIME, ts)) {
+ return;
+ }
+#endif
+
+ /* Fall back to gettimeofday()*/
+ struct timeval tv = {
+ 0,
+ };
+ if (0 == gettimeofday(&tv, NULL)) {
+ TIMEVAL_TO_TIMESPEC(&tv, ts);
+ return;
+ }
+
+ return;
+}
+
+void
+timespec_adjust_delta(struct timespec *ts, struct timespec delta)
+{
+ ts->tv_nsec = ((ts->tv_nsec + delta.tv_nsec) % 1000000000);
+ ts->tv_sec += ((ts->tv_nsec + delta.tv_nsec) / 1000000000);
+ ts->tv_sec += delta.tv_sec;
+}
+
+void
+timespec_sub(const struct timespec *begin, const struct timespec *end,
+ struct timespec *res)
+{
+ if (end->tv_nsec < begin->tv_nsec) {
+ res->tv_sec = end->tv_sec - begin->tv_sec - 1;
+ res->tv_nsec = end->tv_nsec + 1000000000 - begin->tv_nsec;
+ } else {
+ res->tv_sec = end->tv_sec - begin->tv_sec;
+ res->tv_nsec = end->tv_nsec - begin->tv_nsec;
+ }
+}
+
+int
+timespec_cmp(const struct timespec *lhs_ts, const struct timespec *rhs_ts)
+{
+ if (lhs_ts->tv_sec < rhs_ts->tv_sec) {
+ return -1;
+ } else if (lhs_ts->tv_sec > rhs_ts->tv_sec) {
+ return 1;
+ } else if (lhs_ts->tv_nsec < rhs_ts->tv_nsec) {
+ return -1;
+ } else if (lhs_ts->tv_nsec > rhs_ts->tv_nsec) {
+ return 1;
+ }
+
+ return 0;
+}
diff --git a/libglusterfs/src/trie.c b/libglusterfs/src/trie.c
index b7c59784218..809550b864c 100644
--- a/libglusterfs/src/trie.c
+++ b/libglusterfs/src/trie.c
@@ -10,380 +10,357 @@
#include <stdio.h>
#include <string.h>
-#include <stdlib.h>
-#include <ctype.h>
-#include "common-utils.h"
-#include "trie.h"
+#include "glusterfs/common-utils.h"
+#include "glusterfs/trie.h"
#define DISTANCE_EDIT 1
-#define DISTANCE_INS 1
-#define DISTANCE_DEL 1
-
+#define DISTANCE_INS 1
+#define DISTANCE_DEL 1
struct trienode {
- char id;
- char eow;
- int depth;
- void *data;
- struct trie *trie;
- struct trienode *parent;
- struct trienode *subnodes[255];
+ char id;
+ char eow;
+ int depth;
+ void *data;
+ struct trie *trie;
+ struct trienode *parent;
+ struct trienode *subnodes[255];
};
struct trie {
- struct trienode root;
- int nodecnt;
- size_t len;
+ struct trienode root;
+ int nodecnt;
+ size_t len;
};
-
trie_t *
-trie_new ()
+trie_new()
{
- trie_t *trie = NULL;
+ trie_t *trie = NULL;
- trie = GF_CALLOC (1, sizeof (*trie), gf_common_mt_trie_trie);
- if (!trie)
- return NULL;
+ trie = GF_CALLOC(1, sizeof(*trie), gf_common_mt_trie_trie);
+ if (!trie)
+ return NULL;
- trie->root.trie = trie;
+ trie->root.trie = trie;
- return trie;
+ return trie;
}
-
static trienode_t *
-trie_subnode (trienode_t *node, int id)
+trie_subnode(trienode_t *node, int id)
{
- trienode_t *subnode = NULL;
-
- subnode = node->subnodes[id];
- if (!subnode) {
- subnode = GF_CALLOC (1, sizeof (*subnode),
- gf_common_mt_trie_node);
- if (!subnode)
- return NULL;
-
- subnode->id = id;
- subnode->depth = node->depth + 1;
- node->subnodes[id] = subnode;
- subnode->parent = node;
- subnode->trie = node->trie;
- node->trie->nodecnt++;
- }
-
- return subnode;
+ trienode_t *subnode = NULL;
+
+ subnode = node->subnodes[id];
+ if (!subnode) {
+ subnode = GF_CALLOC(1, sizeof(*subnode), gf_common_mt_trie_node);
+ if (!subnode)
+ return NULL;
+
+ subnode->id = id;
+ subnode->depth = node->depth + 1;
+ node->subnodes[id] = subnode;
+ subnode->parent = node;
+ subnode->trie = node->trie;
+ node->trie->nodecnt++;
+ }
+
+ return subnode;
}
-
int
-trie_add (trie_t *trie, const char *dword)
+trie_add(trie_t *trie, const char *dword)
{
- trienode_t *node = NULL;
- int i = 0;
- char id = 0;
- trienode_t *subnode = NULL;
+ trienode_t *node = NULL;
+ int i = 0;
+ char id = 0;
+ trienode_t *subnode = NULL;
- node = &trie->root;
+ node = &trie->root;
- for (i = 0; i < strlen (dword); i++) {
- id = dword[i];
+ for (i = 0; i < strlen(dword); i++) {
+ id = dword[i];
- subnode = trie_subnode (node, id);
- if (!subnode)
- return -1;
- node = subnode;
- }
+ subnode = trie_subnode(node, id);
+ if (!subnode)
+ return -1;
+ node = subnode;
+ }
- node->eow = 1;
+ node->eow = 1;
- return 0;
+ return 0;
}
static void
-trienode_free (trienode_t *node)
+trienode_free(trienode_t *node)
{
- trienode_t *trav = NULL;
- int i = 0;
+ trienode_t *trav = NULL;
+ int i = 0;
- for (i = 0; i < 255; i++) {
- trav = node->subnodes[i];
+ for (i = 0; i < 255; i++) {
+ trav = node->subnodes[i];
- if (trav)
- trienode_free (trav);
- }
+ if (trav)
+ trienode_free(trav);
+ }
- if (node->data)
- GF_FREE (node->data);
- GF_FREE (node);
+ GF_FREE(node->data);
+ GF_FREE(node);
}
-
void
-trie_destroy (trie_t *trie)
+trie_destroy(trie_t *trie)
{
- trienode_free ((trienode_t *)trie);
+ trienode_free((trienode_t *)trie);
}
-
void
-trie_destroy_bynode (trienode_t *node)
+trie_destroy_bynode(trienode_t *node)
{
- trie_destroy (node->trie);
+ trie_destroy(node->trie);
}
-
static int
-trienode_walk (trienode_t *node, int (*fn)(trienode_t *node, void *data),
- void *data, int eowonly)
+trienode_walk(trienode_t *node, int (*fn)(trienode_t *node, void *data),
+ void *data, int eowonly)
{
- trienode_t *trav = NULL;
- int i = 0;
- int cret = 0;
- int ret = 0;
-
- if (!eowonly || node->eow)
- ret = fn (node, data);
-
- if (ret)
- goto out;
-
- for (i = 0; i < 255; i++) {
- trav = node->subnodes[i];
- if (!trav)
- continue;
-
- cret = trienode_walk (trav, fn, data, eowonly);
- if (cret < 0) {
- ret = cret;
- goto out;
- }
- ret += cret;
+ trienode_t *trav = NULL;
+ int i = 0;
+ int cret = 0;
+ int ret = 0;
+
+ if (!eowonly || node->eow)
+ ret = fn(node, data);
+
+ if (ret)
+ goto out;
+
+ for (i = 0; i < 255; i++) {
+ trav = node->subnodes[i];
+ if (!trav)
+ continue;
+
+ cret = trienode_walk(trav, fn, data, eowonly);
+ if (cret < 0) {
+ ret = cret;
+ goto out;
}
+ ret += cret;
+ }
out:
- return ret;
+ return ret;
}
-
static int
-trie_walk (trie_t *trie, int (*fn)(trienode_t *node, void *data),
- void *data, int eowonly)
+trie_walk(trie_t *trie, int (*fn)(trienode_t *node, void *data), void *data,
+ int eowonly)
{
- return trienode_walk (&trie->root, fn, data, eowonly);
+ return trienode_walk(&trie->root, fn, data, eowonly);
}
-
static void
-print_node (trienode_t *node, char **buf)
+print_node(trienode_t *node, char **buf)
{
- if (!node->parent)
- return;
+ if (!node->parent)
+ return;
- if (node->parent) {
- print_node (node->parent, buf);
- *(*buf)++ = node->id;
- }
+ if (node->parent) {
+ print_node(node->parent, buf);
+ *(*buf)++ = node->id;
+ }
}
-
int
-trienode_get_word (trienode_t *node, char **bufp)
+trienode_get_word(trienode_t *node, char **bufp)
{
- char *buf = NULL;
+ char *buf = NULL;
- buf = GF_CALLOC (1, node->depth + 1, gf_common_mt_trie_buf);
- if (!buf)
- return -1;
- *bufp = buf;
+ buf = GF_CALLOC(1, node->depth + 1, gf_common_mt_trie_buf);
+ if (!buf)
+ return -1;
+ *bufp = buf;
- print_node (node, &buf);
+ print_node(node, &buf);
- return 0;
+ return 0;
}
-
static int
-calc_dist (trienode_t *node, void *data)
+calc_dist(trienode_t *node, void *data)
{
- const char *word = NULL;
- int i = 0;
- int *row = NULL;
- int *uprow = NULL;
- int distu = 0;
- int distl = 0;
- int distul = 0;
-
- word = data;
-
- node->data = GF_CALLOC (node->trie->len, sizeof (int),
- gf_common_mt_trie_data);
- if (!node->data)
- return -1;
- row = node->data;
-
- if (!node->parent) {
- for (i = 0; i < node->trie->len; i++)
- row[i] = i+1;
-
- return 0;
- }
+ const char *word = NULL;
+ int i = 0;
+ int *row = NULL;
+ int *uprow = NULL;
+ int distu = 0;
+ int distl = 0;
+ int distul = 0;
+
+ word = data;
+
+ node->data = GF_CALLOC(node->trie->len, sizeof(int),
+ gf_common_mt_trie_data);
+ if (!node->data)
+ return -1;
+ row = node->data;
+
+ if (!node->parent) {
+ for (i = 0; i < node->trie->len; i++)
+ row[i] = i + 1;
- uprow = node->parent->data;
+ return 0;
+ }
- distu = node->depth; /* up node */
- distul = node->parent->depth; /* up-left node */
+ uprow = node->parent->data;
- for (i = 0; i < node->trie->len; i++) {
- distl = uprow[i]; /* left node */
+ distu = node->depth; /* up node */
+ distul = node->parent->depth; /* up-left node */
- if (word[i] == node->id)
- row[i] = distul;
- else
- row[i] = min ((distul + DISTANCE_EDIT),
- min ((distu + DISTANCE_DEL),
- (distl + DISTANCE_INS)));
+ for (i = 0; i < node->trie->len; i++) {
+ distl = uprow[i]; /* left node */
- distu = row[i];
- distul = distl;
- }
+ if (word[i] == node->id)
+ row[i] = distul;
+ else
+ row[i] = min((distul + DISTANCE_EDIT),
+ min((distu + DISTANCE_DEL), (distl + DISTANCE_INS)));
- return 0;
-}
+ distu = row[i];
+ distul = distl;
+ }
+ return 0;
+}
int
-trienode_get_dist (trienode_t *node)
+trienode_get_dist(trienode_t *node)
{
- int *row = NULL;
+ int *row = NULL;
- row = node->data;
+ row = node->data;
- return row[node->trie->len - 1];
+ return row[node->trie->len - 1];
}
-
struct trienodevec_w {
- struct trienodevec *vec;
- const char *word;
+ struct trienodevec *vec;
+ const char *word;
};
-
static void
-trienodevec_clear (struct trienodevec *nodevec)
+trienodevec_clear(struct trienodevec *nodevec)
{
- memset(nodevec->nodes, 0, sizeof (*nodevec->nodes) * nodevec->cnt);
+ memset(nodevec->nodes, 0, sizeof(*nodevec->nodes) * nodevec->cnt);
}
-
static int
-collect_closest (trienode_t *node, void *data)
+collect_closest(trienode_t *node, void *data)
{
- struct trienodevec_w *nodevec_w = NULL;
- struct trienodevec *nodevec = NULL;
- int dist = 0;
- int i = 0;
-
- nodevec_w = data;
- nodevec = nodevec_w->vec;
-
- if (calc_dist (node, (void *)nodevec_w->word))
- return -1;
-
- if (!node->eow || !nodevec->cnt)
- return 0;
-
- dist = trienode_get_dist (node);
-
- /*
- * I thought that when descending further after some dictionary word dw,
- * if we see that child's distance is bigger than it was for dw, then we
- * can prune this branch, as it can contain only worse nodes.
- *
- * This conjecture fails, see eg:
- *
- * d("AB", "B") = 1;
- * d("AB", "BA") = 2;
- * d("AB", "BAB") = 1;
- *
- * -- if both "B" and "BAB" are in dict., then pruning at "BA" * would
- * miss "BAB".
- *
- * (example courtesy of Richard Bann <richardbann at gmail.com>)
-
- if (node->parent->eow && dist > trienode_get_dist (node->parent))
- return 1;
-
- */
-
- if (nodevec->nodes[0] &&
- dist < trienode_get_dist (nodevec->nodes[0])) {
- /* improving over the findings so far */
- trienodevec_clear (nodevec);
- nodevec->nodes[0] = node;
- } else if (!nodevec->nodes[0] ||
- dist == trienode_get_dist (nodevec->nodes[0])) {
- /* as good as the best so far, add if there is free space */
- for (i = 0; i < nodevec->cnt; i++) {
- if (!nodevec->nodes[i]) {
- nodevec->nodes[i] = node;
- break;
- }
- }
- }
+ struct trienodevec_w *nodevec_w = NULL;
+ struct trienodevec *nodevec = NULL;
+ int dist = 0;
+ int i = 0;
+
+ nodevec_w = data;
+ nodevec = nodevec_w->vec;
+ if (calc_dist(node, (void *)nodevec_w->word))
+ return -1;
+
+ if (!node->eow || !nodevec->cnt)
return 0;
-}
+ dist = trienode_get_dist(node);
+
+ /*
+ * I thought that when descending further after some dictionary word dw,
+ * if we see that child's distance is bigger than it was for dw, then we
+ * can prune this branch, as it can contain only worse nodes.
+ *
+ * This conjecture fails, see eg:
+ *
+ * d("AB", "B") = 1;
+ * d("AB", "BA") = 2;
+ * d("AB", "BAB") = 1;
+ *
+ * -- if both "B" and "BAB" are in dict., then pruning at "BA" * would
+ * miss "BAB".
+ *
+ * (example courtesy of Richard Bann <richardbann at gmail.com>)
+
+ if (node->parent->eow && dist > trienode_get_dist (node->parent))
+ return 1;
+
+ */
+
+ if (nodevec->nodes[0] && dist < trienode_get_dist(nodevec->nodes[0])) {
+ /* improving over the findings so far */
+ trienodevec_clear(nodevec);
+ nodevec->nodes[0] = node;
+ } else if (!nodevec->nodes[0] ||
+ dist == trienode_get_dist(nodevec->nodes[0])) {
+ /* as good as the best so far, add if there is free space */
+ for (i = 0; i < nodevec->cnt; i++) {
+ if (!nodevec->nodes[i]) {
+ nodevec->nodes[i] = node;
+ break;
+ }
+ }
+ }
+
+ return 0;
+}
int
-trie_measure (trie_t *trie, const char *word, trienode_t **nodes,
- int nodecnt)
+trie_measure(trie_t *trie, const char *word, trienode_t **nodes, int nodecnt)
{
- struct trienodevec nodevec = {0,};
+ struct trienodevec nodevec = {
+ 0,
+ };
- nodevec.nodes = nodes;
- nodevec.cnt = nodecnt;
+ nodevec.nodes = nodes;
+ nodevec.cnt = nodecnt;
- return trie_measure_vec (trie, word, &nodevec);
+ return trie_measure_vec(trie, word, &nodevec);
}
-
int
-trie_measure_vec (trie_t *trie, const char *word, struct trienodevec *nodevec)
+trie_measure_vec(trie_t *trie, const char *word, struct trienodevec *nodevec)
{
- struct trienodevec_w nodevec_w = {0,};
- int ret = 0;
+ struct trienodevec_w nodevec_w = {
+ 0,
+ };
+ int ret = 0;
- trie->len = strlen (word);
+ trie->len = strlen(word);
- trienodevec_clear (nodevec);
- nodevec_w.vec = nodevec;
- nodevec_w.word = word;
+ trienodevec_clear(nodevec);
+ nodevec_w.vec = nodevec;
+ nodevec_w.word = word;
- ret = trie_walk (trie, collect_closest, &nodevec_w, 0);
- if (ret > 0)
- ret = 0;
+ ret = trie_walk(trie, collect_closest, &nodevec_w, 0);
+ if (ret > 0)
+ ret = 0;
- return ret;
+ return ret;
}
-
static int
-trienode_reset (trienode_t *node, void *data)
+trienode_reset(trienode_t *node, void *data)
{
- if (node->data)
- GF_FREE (node->data);
+ GF_FREE(node->data);
- return 0;
+ return 0;
}
-
void
-trie_reset_search (trie_t *trie)
+trie_reset_search(trie_t *trie)
{
- trie->len = 0;
+ trie->len = 0;
- trie_walk (trie, trienode_reset, NULL, 0);
+ trie_walk(trie, trienode_reset, NULL, 0);
}
diff --git a/libglusterfs/src/trie.h b/libglusterfs/src/trie.h
deleted file mode 100644
index 0356e66210e..00000000000
--- a/libglusterfs/src/trie.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _TRIE_H_
-#define _TRIE_H_
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-struct trienode;
-typedef struct trienode trienode_t;
-
-struct trie;
-typedef struct trie trie_t;
-
-struct trienodevec {
- trienode_t **nodes;
- unsigned cnt;
-};
-
-
-trie_t *trie_new ();
-
-int trie_add (trie_t *trie, const char *word);
-
-void trie_destroy (trie_t *trie);
-
-void trie_destroy_bynode (trienode_t *node);
-
-int trie_measure (trie_t *trie, const char *word, trienode_t **nodes,
- int nodecnt);
-
-int trie_measure_vec (trie_t *trie, const char *word,
- struct trienodevec *nodevec);
-
-void trie_reset_search (trie_t *trie);
-
-int trienode_get_dist (trienode_t *node);
-
-int trienode_get_word (trienode_t *node, char **buf);
-
-#endif
diff --git a/libglusterfs/src/unittest/global_mock.c b/libglusterfs/src/unittest/global_mock.c
new file mode 100644
index 00000000000..2fcf96dbad8
--- /dev/null
+++ b/libglusterfs/src/unittest/global_mock.c
@@ -0,0 +1,25 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "glusterfs/logging.h"
+#include "glusterfs/xlator.h"
+
+#include <stdarg.h>
+#include <stddef.h>
+#include <setjmp.h>
+#include <inttypes.h>
+
+#include <cmocka.h>
+
+xlator_t **
+__glusterfs_this_location()
+{
+ return ((xlator_t **)(uintptr_t)mock());
+}
diff --git a/libglusterfs/src/unittest/log_mock.c b/libglusterfs/src/unittest/log_mock.c
new file mode 100644
index 00000000000..60f6530726b
--- /dev/null
+++ b/libglusterfs/src/unittest/log_mock.c
@@ -0,0 +1,52 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "glusterfs/logging.h"
+#include "glusterfs/xlator.h"
+
+#include <stdarg.h>
+#include <stddef.h>
+#include <setjmp.h>
+#include <inttypes.h>
+
+#include <cmocka.h>
+
+int
+_gf_log(const char *domain, const char *file, const char *function,
+ int32_t line, gf_loglevel_t level, const char *fmt, ...)
+{
+ return 0;
+}
+
+int
+_gf_log_callingfn(const char *domain, const char *file, const char *function,
+ int32_t line, gf_loglevel_t level, const char *fmt, ...)
+{
+ return 0;
+}
+
+int
+_gf_log_nomem(const char *domain, const char *file, const char *function,
+ int line, gf_loglevel_t level, size_t size)
+{
+ return 0;
+}
+
+int
+_gf_msg_nomem(const char *domain, const char *file, const char *function,
+ int line, gf_loglevel_t level, size_t size)
+{
+ return 0;
+}
+
+void
+gf_log_globals_init(void *data, gf_loglevel_t level)
+{
+}
diff --git a/libglusterfs/src/unittest/mem_pool_unittest.c b/libglusterfs/src/unittest/mem_pool_unittest.c
new file mode 100644
index 00000000000..9ca324329ba
--- /dev/null
+++ b/libglusterfs/src/unittest/mem_pool_unittest.c
@@ -0,0 +1,483 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "glusterfs/mem-pool.h"
+#include "glusterfs/logging.h"
+#include "glusterfs/xlator.h"
+
+#include <stdarg.h>
+#include <stddef.h>
+#include <setjmp.h>
+#include <inttypes.h>
+#include <string.h>
+#include <cmocka_pbc.h>
+#include <cmocka.h>
+
+#ifndef assert_ptr_equal
+#define assert_ptr_equal(a, b) \
+ _assert_int_equal(cast_ptr_to_largest_integral_type(a), \
+ cast_ptr_to_largest_integral_type(b), __FILE__, \
+ __LINE__)
+#endif
+
+/*
+ * memory header for gf_mem_set_acct_info
+ */
+typedef struct __attribute__((packed)) {
+ uint32_t type;
+ size_t size;
+ xlator_t *xl;
+ uint32_t header_magic;
+ uint8_t pad[8];
+} mem_header_t;
+
+/*
+ * Prototypes to private functions
+ */
+int
+gf_mem_set_acct_info(xlator_t *xl, char **alloc_ptr, size_t size, uint32_t type,
+ const char *typestr);
+
+/*
+ * Helper functions
+ */
+static xlator_t *
+helper_xlator_init(uint32_t num_types)
+{
+ xlator_t *xl;
+ int i, ret;
+
+ REQUIRE(num_types > 0);
+
+ xl = test_calloc(1, sizeof(xlator_t));
+ assert_non_null(xl);
+ xl->mem_acct->num_types = num_types;
+ xl->mem_acct = test_calloc(sizeof(struct mem_acct) +
+ sizeof(struct mem_acct_rec) * num_types);
+ assert_non_null(xl->mem_acct);
+
+ xl->ctx = test_calloc(1, sizeof(glusterfs_ctx_t));
+ assert_non_null(xl->ctx);
+
+ for (i = 0; i < num_types; i++) {
+ ret = LOCK_INIT(&(xl->mem_acct->rec[i].lock));
+ assert_int_equal(ret, 0);
+ }
+
+ ENSURE(num_types == xl->mem_acct->num_types);
+ ENSURE(NULL != xl);
+
+ return xl;
+}
+
+static int
+helper_xlator_destroy(xlator_t *xl)
+{
+ int i, ret;
+
+ for (i = 0; i < xl->mem_acct->num_types; i++) {
+ ret = LOCK_DESTROY(&(xl->mem_acct->rec[i].lock));
+ assert_int_equal(ret, 0);
+ }
+
+ free(xl->mem_acct->rec);
+ free(xl->ctx);
+ free(xl);
+ return 0;
+}
+
+static void
+helper_check_memory_headers(char *mem, xlator_t *xl, size_t size, uint32_t type)
+{
+ mem_header_t *p;
+
+ p = (mem_header_t *)mem, assert_int_equal(p->type, type);
+ assert_int_equal(p->size, size);
+ assert_true(p->xl == xl);
+ assert_int_equal(p->header_magic, GF_MEM_HEADER_MAGIC);
+ assert_true(*(uint32_t *)(mem + sizeof(mem_header_t) + size) ==
+ GF_MEM_TRAILER_MAGIC);
+}
+
+/*
+ * Tests
+ */
+static void
+test_gf_mem_acct_enable_set(void **state)
+{
+ (void)state;
+ glusterfs_ctx_t test_ctx;
+
+ expect_assert_failure(gf_mem_acct_enable_set(NULL));
+
+ memset(&test_ctx, 0, sizeof(test_ctx));
+ assert_true(NULL == test_ctx.process_uuid);
+ gf_mem_acct_enable_set((void *)&test_ctx);
+ assert_true(1 == test_ctx.mem_acct_enable);
+ assert_true(NULL == test_ctx.process_uuid);
+}
+
+static void
+test_gf_mem_set_acct_info_asserts(void **state)
+{
+ xlator_t *xl;
+ xlator_t xltest;
+ char *alloc_ptr;
+ size_t size;
+ uint32_t type;
+
+ memset(&xltest, 0, sizeof(xlator_t));
+ xl = (xlator_t *)0xBADD;
+ alloc_ptr = (char *)0xBADD;
+ size = 8196;
+ type = 0;
+
+ // Check xl is NULL
+ expect_assert_failure(
+ gf_mem_set_acct_info(NULL, &alloc_ptr, size, type, ""));
+ // Check xl->mem_acct = NULL
+ expect_assert_failure(
+ gf_mem_set_acct_info(&xltest, &alloc_ptr, 0, type, ""));
+ // Check type <= xl->mem_acct->num_types
+ type = 100;
+ expect_assert_failure(
+ gf_mem_set_acct_info(&xltest, &alloc_ptr, 0, type, ""));
+ // Check alloc is NULL
+ assert_int_equal(-1, gf_mem_set_acct_info(&xltest, NULL, size, type, ""));
+
+ // Initialize xl
+ xl = helper_xlator_init(10);
+
+ // Test number of types
+ type = 100;
+ assert_true(NULL != xl->mem_acct);
+ assert_true(type > xl->mem_acct->num_types);
+ expect_assert_failure(gf_mem_set_acct_info(xl, &alloc_ptr, size, type, ""));
+
+ helper_xlator_destroy(xl);
+}
+
+static void
+test_gf_mem_set_acct_info_memory(void **state)
+{
+ xlator_t *xl;
+ char *alloc_ptr;
+ char *temp_ptr;
+ size_t size;
+ uint32_t type;
+ const char *typestr = "TEST";
+
+ size = 8196;
+ type = 9;
+
+ // Initialize xl
+ xl = helper_xlator_init(10);
+ assert_null(xl->mem_acct->rec[type].typestr);
+
+ // Test allocation
+ temp_ptr = test_calloc(1, size + GF_MEM_HEADER_SIZE + GF_MEM_TRAILER_SIZE);
+ assert_non_null(temp_ptr);
+ alloc_ptr = temp_ptr;
+ gf_mem_set_acct_info(xl, &alloc_ptr, size, type, typestr);
+
+ // Check values
+ assert_ptr_equal(typestr, xl->mem_acct->rec[type].typestr);
+ assert_int_equal(xl->mem_acct->rec[type].size, size);
+ assert_int_equal(xl->mem_acct->rec[type].num_allocs, 1);
+ assert_int_equal(xl->mem_acct->rec[type].total_allocs, 1);
+ assert_int_equal(xl->mem_acct->rec[type].max_size, size);
+ assert_int_equal(xl->mem_acct->rec[type].max_num_allocs, 1);
+
+ // Check memory
+ helper_check_memory_headers(temp_ptr, xl, size, type);
+
+ // Check that alloc_ptr has been moved correctly
+ // by gf_mem_set_acct_info
+ {
+ mem_header_t *p;
+
+ p = (mem_header_t *)temp_ptr;
+ p++;
+ p->type = 1234;
+ assert_int_equal(*(uint32_t *)alloc_ptr, p->type);
+ }
+
+ free(temp_ptr);
+ helper_xlator_destroy(xl);
+}
+
+static void
+test_gf_calloc_default_calloc(void **state)
+{
+ xlator_t *xl;
+ void *mem;
+ size_t size;
+ uint32_t type;
+
+ // Initialize xl
+ xl = helper_xlator_init(10);
+ assert_int_equal(xl->ctx->mem_acct_enable, 0);
+ will_return(__glusterfs_this_location, &xl);
+
+ // Call __gf_calloc
+ size = 1024;
+ type = 3;
+ mem = __gf_calloc(1, size, type, "3");
+ assert_non_null(mem);
+ memset(mem, 0x5A, size);
+
+ // Check xl did not change
+ assert_int_equal(xl->mem_acct->rec[type].size, 0);
+ assert_int_equal(xl->mem_acct->rec[type].num_allocs, 0);
+ assert_int_equal(xl->mem_acct->rec[type].total_allocs, 0);
+ assert_int_equal(xl->mem_acct->rec[type].max_size, 0);
+ assert_int_equal(xl->mem_acct->rec[type].max_num_allocs, 0);
+
+ free(mem);
+ helper_xlator_destroy(xl);
+}
+
+static void
+test_gf_calloc_mem_acct_enabled(void **state)
+{
+ xlator_t *xl;
+ void *mem;
+ size_t size;
+ uint32_t type;
+
+ // Initialize xl
+ xl = helper_xlator_init(10);
+ assert_int_equal(xl->ctx->mem_acct_enable, 0);
+ xl->ctx->mem_acct_enable = 1;
+
+ // For line mem-pool.c:115 and mem-pool:118
+ will_return_always(__glusterfs_this_location, &xl);
+
+ // Call __gf_calloc
+ size = 1024;
+ type = 3;
+ mem = __gf_calloc(1, size, type, "3");
+ assert_non_null(mem);
+ memset(mem, 0x5A, size);
+
+ // Check xl values
+ assert_int_equal(xl->mem_acct->rec[type].size, size);
+ assert_int_equal(xl->mem_acct->rec[type].num_allocs, 1);
+ assert_int_equal(xl->mem_acct->rec[type].total_allocs, 1);
+ assert_int_equal(xl->mem_acct->rec[type].max_size, size);
+ assert_int_equal(xl->mem_acct->rec[type].max_num_allocs, 1);
+
+ // Check memory
+ helper_check_memory_headers(mem - sizeof(mem_header_t), xl, size, type);
+ free(mem - sizeof(mem_header_t));
+ helper_xlator_destroy(xl);
+}
+
+static void
+test_gf_malloc_default_malloc(void **state)
+{
+ xlator_t *xl;
+ void *mem;
+ size_t size;
+ uint32_t type;
+
+ // Initialize xl
+ xl = helper_xlator_init(10);
+ assert_int_equal(xl->ctx->mem_acct_enable, 0);
+ will_return(__glusterfs_this_location, &xl);
+
+ // Call __gf_malloc
+ size = 1024;
+ type = 3;
+ mem = __gf_malloc(size, type, "3");
+ assert_non_null(mem);
+ memset(mem, 0x5A, size);
+
+ // Check xl did not change
+ assert_int_equal(xl->mem_acct->rec[type].size, 0);
+ assert_int_equal(xl->mem_acct->rec[type].num_allocs, 0);
+ assert_int_equal(xl->mem_acct->rec[type].total_allocs, 0);
+ assert_int_equal(xl->mem_acct->rec[type].max_size, 0);
+ assert_int_equal(xl->mem_acct->rec[type].max_num_allocs, 0);
+
+ free(mem);
+ helper_xlator_destroy(xl);
+}
+
+static void
+test_gf_malloc_mem_acct_enabled(void **state)
+{
+ xlator_t *xl;
+ void *mem;
+ size_t size;
+ uint32_t type;
+
+ // Initialize xl
+ xl = helper_xlator_init(10);
+ assert_int_equal(xl->ctx->mem_acct_enable, 0);
+ xl->ctx->mem_acct_enable = 1;
+
+ // For line mem-pool.c:115 and mem-pool:118
+ will_return_always(__glusterfs_this_location, &xl);
+
+ // Call __gf_malloc
+ size = 1024;
+ type = 3;
+ mem = __gf_malloc(size, type, "3");
+ assert_non_null(mem);
+ memset(mem, 0x5A, size);
+
+ // Check xl values
+ assert_int_equal(xl->mem_acct->rec[type].size, size);
+ assert_int_equal(xl->mem_acct->rec[type].num_allocs, 1);
+ assert_int_equal(xl->mem_acct->rec[type].total_allocs, 1);
+ assert_int_equal(xl->mem_acct->rec[type].max_size, size);
+ assert_int_equal(xl->mem_acct->rec[type].max_num_allocs, 1);
+
+ // Check memory
+ helper_check_memory_headers(mem - sizeof(mem_header_t), xl, size, type);
+ free(mem - sizeof(mem_header_t));
+ helper_xlator_destroy(xl);
+}
+
+static void
+test_gf_realloc_default_realloc(void **state)
+{
+ xlator_t *xl;
+ void *mem;
+ size_t size;
+ uint32_t type;
+
+ // Initialize xl
+ xl = helper_xlator_init(10);
+ assert_int_equal(xl->ctx->mem_acct_enable, 0);
+ will_return_always(__glusterfs_this_location, &xl);
+
+ // Call __gf_malloc then realloc
+ size = 10;
+ type = 3;
+ mem = __gf_malloc(size, type, "3");
+ assert_non_null(mem);
+ memset(mem, 0xA5, size);
+
+ size = 1024;
+ mem = __gf_realloc(mem, size);
+ assert_non_null(mem);
+ memset(mem, 0x5A, size);
+
+ // Check xl did not change
+ assert_int_equal(xl->mem_acct->rec[type].size, 0);
+ assert_int_equal(xl->mem_acct->rec[type].num_allocs, 0);
+ assert_int_equal(xl->mem_acct->rec[type].total_allocs, 0);
+ assert_int_equal(xl->mem_acct->rec[type].max_size, 0);
+ assert_int_equal(xl->mem_acct->rec[type].max_num_allocs, 0);
+
+ free(mem);
+ helper_xlator_destroy(xl);
+}
+
+static void
+test_gf_realloc_mem_acct_enabled(void **state)
+{
+ xlator_t *xl;
+ void *mem;
+ size_t size;
+ uint32_t type;
+
+ // Initialize xl
+ xl = helper_xlator_init(10);
+ assert_int_equal(xl->ctx->mem_acct_enable, 0);
+ xl->ctx->mem_acct_enable = 1;
+
+ // For line mem-pool.c:115 and mem-pool:118
+ will_return_always(__glusterfs_this_location, &xl);
+
+ // Call __gf_malloc then realloc
+ size = 1024;
+ type = 3;
+ mem = __gf_malloc(size, type, "3");
+ assert_non_null(mem);
+ memset(mem, 0xA5, size);
+
+ size = 2048;
+ mem = __gf_realloc(mem, size);
+ assert_non_null(mem);
+ memset(mem, 0x5A, size);
+
+ // Check xl values
+ //
+ // :TODO: This is really weird. I would have expected
+ // xl to only have a size equal to that of the realloc
+ // not to the realloc + the malloc.
+ // Is this a bug?
+ //
+ assert_int_equal(xl->mem_acct->rec[type].size, size + 1024);
+ assert_int_equal(xl->mem_acct->rec[type].num_allocs, 2);
+ assert_int_equal(xl->mem_acct->rec[type].total_allocs, 2);
+ assert_int_equal(xl->mem_acct->rec[type].max_size, size + 1024);
+ assert_int_equal(xl->mem_acct->rec[type].max_num_allocs, 2);
+
+ // Check memory
+ helper_check_memory_headers(mem - sizeof(mem_header_t), xl, size, type);
+ free(mem - sizeof(mem_header_t));
+ helper_xlator_destroy(xl);
+}
+
+static void
+test_gf_realloc_ptr(void **state)
+{
+ xlator_t *xl;
+ void *mem;
+ size_t size;
+
+ // Initialize xl
+ xl = helper_xlator_init(10);
+ assert_int_equal(xl->ctx->mem_acct_enable, 0);
+
+ // For line mem-pool.c:115 and mem-pool:118
+ will_return_always(__glusterfs_this_location, &xl);
+
+ // Tests according to the manpage for realloc
+
+ // Like a malloc
+ size = 1024;
+ mem = __gf_realloc(NULL, size);
+ assert_non_null(mem);
+ memset(mem, 0xA5, size);
+
+ // Like a free
+ mem = __gf_realloc(mem, 0);
+ assert_null(mem);
+
+ // Now enable xl context
+ xl->ctx->mem_acct_enable = 1;
+ expect_assert_failure(__gf_realloc(NULL, size));
+
+ helper_xlator_destroy(xl);
+}
+
+int
+main(void)
+{
+ const struct CMUnitTest libglusterfs_mem_pool_tests[] = {
+ cmocka_unit_test(test_gf_mem_acct_enable_set),
+ cmocka_unit_test(test_gf_mem_set_acct_info_asserts),
+ cmocka_unit_test(test_gf_mem_set_acct_info_memory),
+ cmocka_unit_test(test_gf_calloc_default_calloc),
+ cmocka_unit_test(test_gf_calloc_mem_acct_enabled),
+ cmocka_unit_test(test_gf_malloc_default_malloc),
+ cmocka_unit_test(test_gf_malloc_mem_acct_enabled),
+ cmocka_unit_test(test_gf_realloc_default_realloc),
+ cmocka_unit_test(test_gf_realloc_mem_acct_enabled),
+ cmocka_unit_test(test_gf_realloc_ptr),
+ };
+
+ return cmocka_run_group_tests(libglusterfs_mem_pool_tests, NULL, NULL);
+}
diff --git a/libglusterfs/src/unittest/unittest.h b/libglusterfs/src/unittest/unittest.h
new file mode 100644
index 00000000000..58b3e28bb6e
--- /dev/null
+++ b/libglusterfs/src/unittest/unittest.h
@@ -0,0 +1,47 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GF_UNITTEST_H_
+#define _GF_UNITTEST_H_
+
+#ifdef UNIT_TESTING
+#include <stdarg.h>
+#include <stddef.h>
+#include <setjmp.h>
+#include <cmocka_pbc.h>
+#include <cmocka.h>
+
+extern void
+mock_assert(const int result, const char *const expression,
+ const char *const file, const int line);
+
+// Change GF_CALLOC and GF_FREE to use
+// cmocka memory allocation versions
+#ifdef UNIT_TESTING
+#undef GF_CALLOC
+#define GF_CALLOC(n, s, t) test_calloc(n, s)
+#undef GF_FREE
+#define GF_FREE test_free
+
+/* Catch intended assert()'s while unit-testing */
+extern void
+mock_assert(const int result, const char *const expression,
+ const char *const file, const int line);
+
+#undef assert
+#define assert(expression) \
+ mock_assert((int)(expression), #expression, __FILE__, __LINE__);
+#endif
+#else
+#define REQUIRE(p) /**/
+#define ENSURE(p) /**/
+#endif
+
+#endif /* _GF_UNITTEST */
diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c
index 470087df9c9..9a2582d45d5 100644
--- a/libglusterfs/src/xlator.c
+++ b/libglusterfs/src/xlator.c
@@ -8,769 +8,1586 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "xlator.h"
+#include "glusterfs/xlator.h"
#include <dlfcn.h>
#include <netdb.h>
#include <fnmatch.h>
-#include "defaults.h"
+#include "glusterfs/defaults.h"
+#include "glusterfs/libglusterfs-messages.h"
-#define SET_DEFAULT_FOP(fn) do { \
- if (!xl->fops->fn) \
- xl->fops->fn = default_##fn; \
- } while (0)
+#define SET_DEFAULT_FOP(fn) \
+ do { \
+ if (!xl->fops->fn) \
+ xl->fops->fn = default_##fn; \
+ if (!xl->pass_through_fops->fn) \
+ xl->pass_through_fops->fn = default_##fn; \
+ } while (0)
-#define SET_DEFAULT_CBK(fn) do { \
- if (!xl->cbks->fn) \
- xl->cbks->fn = default_##fn; \
- } while (0)
+#define SET_DEFAULT_CBK(fn) \
+ do { \
+ if (!xl->cbks->fn) \
+ xl->cbks->fn = default_##fn; \
+ } while (0)
+pthread_mutex_t xlator_init_mutex = PTHREAD_MUTEX_INITIALIZER;
-static void
-fill_defaults (xlator_t *xl)
+void
+xlator_init_lock(void)
{
- if (xl == NULL) {
- gf_log_callingfn ("xlator", GF_LOG_WARNING, "invalid argument");
- return;
- }
+ (void)pthread_mutex_lock(&xlator_init_mutex);
+}
- SET_DEFAULT_FOP (create);
- SET_DEFAULT_FOP (open);
- SET_DEFAULT_FOP (stat);
- SET_DEFAULT_FOP (readlink);
- SET_DEFAULT_FOP (mknod);
- SET_DEFAULT_FOP (mkdir);
- SET_DEFAULT_FOP (unlink);
- SET_DEFAULT_FOP (rmdir);
- SET_DEFAULT_FOP (symlink);
- SET_DEFAULT_FOP (rename);
- SET_DEFAULT_FOP (link);
- SET_DEFAULT_FOP (truncate);
- SET_DEFAULT_FOP (readv);
- SET_DEFAULT_FOP (writev);
- SET_DEFAULT_FOP (statfs);
- SET_DEFAULT_FOP (flush);
- SET_DEFAULT_FOP (fsync);
- SET_DEFAULT_FOP (setxattr);
- SET_DEFAULT_FOP (getxattr);
- SET_DEFAULT_FOP (fsetxattr);
- SET_DEFAULT_FOP (fgetxattr);
- SET_DEFAULT_FOP (removexattr);
- SET_DEFAULT_FOP (fremovexattr);
- SET_DEFAULT_FOP (opendir);
- SET_DEFAULT_FOP (readdir);
- SET_DEFAULT_FOP (readdirp);
- SET_DEFAULT_FOP (fsyncdir);
- SET_DEFAULT_FOP (access);
- SET_DEFAULT_FOP (ftruncate);
- SET_DEFAULT_FOP (fstat);
- SET_DEFAULT_FOP (lk);
- SET_DEFAULT_FOP (inodelk);
- SET_DEFAULT_FOP (finodelk);
- SET_DEFAULT_FOP (entrylk);
- SET_DEFAULT_FOP (fentrylk);
- SET_DEFAULT_FOP (lookup);
- SET_DEFAULT_FOP (rchecksum);
- SET_DEFAULT_FOP (xattrop);
- SET_DEFAULT_FOP (fxattrop);
- SET_DEFAULT_FOP (setattr);
- SET_DEFAULT_FOP (fsetattr);
-
- SET_DEFAULT_FOP (getspec);
-
- SET_DEFAULT_CBK (release);
- SET_DEFAULT_CBK (releasedir);
- SET_DEFAULT_CBK (forget);
-
- if (!xl->notify)
- xl->notify = default_notify;
-
- if (!xl->mem_acct_init)
- xl->mem_acct_init = default_mem_acct_init;
+void
+xlator_init_unlock(void)
+{
+ (void)pthread_mutex_unlock(&xlator_init_mutex);
+}
- return;
+static struct xlator_cbks default_cbks = {};
+struct volume_options default_options[] = {
+ {
+ .key = {"log-level"},
+ .type = GF_OPTION_TYPE_STR,
+ .op_version = {GD_OP_VERSION_6_0},
+ .flags = OPT_FLAG_SETTABLE,
+ .tags = {"generic"},
+ .value = {"DEBUG", "WARNING", "ERROR", "INFO", "CRITICAL", "NONE",
+ "TRACE"},
+ .description = "Option to set log-level of given translator",
+ },
+ {
+ .key = {NULL},
+ },
+};
+
+/* Handle the common options in each translator */
+void
+handle_default_options(xlator_t *xl, dict_t *options)
+{
+ int ret;
+ char *value;
+
+ /* log-level */
+ ret = dict_get_str(options, "log-level", &value);
+ if (!ret) {
+ int log_level = glusterd_check_log_level(value);
+ if (log_level != -1) {
+ xl->loglevel = log_level;
+ }
+ }
}
+static void
+fill_defaults(xlator_t *xl)
+{
+ if (xl == NULL) {
+ gf_msg_callingfn("xlator", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "invalid argument");
+ return;
+ }
+
+ if (!xl->pass_through_fops)
+ xl->pass_through_fops = default_fops;
+
+ SET_DEFAULT_FOP(create);
+ SET_DEFAULT_FOP(open);
+ SET_DEFAULT_FOP(stat);
+ SET_DEFAULT_FOP(readlink);
+ SET_DEFAULT_FOP(mknod);
+ SET_DEFAULT_FOP(mkdir);
+ SET_DEFAULT_FOP(unlink);
+ SET_DEFAULT_FOP(rmdir);
+ SET_DEFAULT_FOP(symlink);
+ SET_DEFAULT_FOP(rename);
+ SET_DEFAULT_FOP(link);
+ SET_DEFAULT_FOP(truncate);
+ SET_DEFAULT_FOP(readv);
+ SET_DEFAULT_FOP(writev);
+ SET_DEFAULT_FOP(statfs);
+ SET_DEFAULT_FOP(flush);
+ SET_DEFAULT_FOP(fsync);
+ SET_DEFAULT_FOP(setxattr);
+ SET_DEFAULT_FOP(getxattr);
+ SET_DEFAULT_FOP(fsetxattr);
+ SET_DEFAULT_FOP(fgetxattr);
+ SET_DEFAULT_FOP(removexattr);
+ SET_DEFAULT_FOP(fremovexattr);
+ SET_DEFAULT_FOP(opendir);
+ SET_DEFAULT_FOP(readdir);
+ SET_DEFAULT_FOP(readdirp);
+ SET_DEFAULT_FOP(fsyncdir);
+ SET_DEFAULT_FOP(access);
+ SET_DEFAULT_FOP(ftruncate);
+ SET_DEFAULT_FOP(fstat);
+ SET_DEFAULT_FOP(lk);
+ SET_DEFAULT_FOP(inodelk);
+ SET_DEFAULT_FOP(finodelk);
+ SET_DEFAULT_FOP(entrylk);
+ SET_DEFAULT_FOP(fentrylk);
+ SET_DEFAULT_FOP(lookup);
+ SET_DEFAULT_FOP(rchecksum);
+ SET_DEFAULT_FOP(xattrop);
+ SET_DEFAULT_FOP(fxattrop);
+ SET_DEFAULT_FOP(setattr);
+ SET_DEFAULT_FOP(fsetattr);
+ SET_DEFAULT_FOP(fallocate);
+ SET_DEFAULT_FOP(discard);
+ SET_DEFAULT_FOP(zerofill);
+ SET_DEFAULT_FOP(ipc);
+ SET_DEFAULT_FOP(seek);
+ SET_DEFAULT_FOP(lease);
+ SET_DEFAULT_FOP(getactivelk);
+ SET_DEFAULT_FOP(setactivelk);
+ SET_DEFAULT_FOP(put);
+
+ SET_DEFAULT_FOP(getspec);
+ SET_DEFAULT_FOP(icreate);
+ SET_DEFAULT_FOP(namelink);
+ SET_DEFAULT_FOP(copy_file_range);
+
+ if (!xl->cbks)
+ xl->cbks = &default_cbks;
+
+ SET_DEFAULT_CBK(release);
+ SET_DEFAULT_CBK(releasedir);
+ SET_DEFAULT_CBK(forget);
+
+ if (!xl->fini)
+ xl->fini = default_fini;
+
+ if (!xl->notify)
+ xl->notify = default_notify;
+
+ if (!xl->mem_acct_init)
+ xl->mem_acct_init = default_mem_acct_init;
+
+ return;
+}
int
-xlator_set_type_virtual (xlator_t *xl, const char *type)
+xlator_set_type_virtual(xlator_t *xl, const char *type)
{
- GF_VALIDATE_OR_GOTO ("xlator", xl, out);
- GF_VALIDATE_OR_GOTO ("xlator", type, out);
+ GF_VALIDATE_OR_GOTO("xlator", xl, out);
+ GF_VALIDATE_OR_GOTO("xlator", type, out);
- xl->type = gf_strdup (type);
+ xl->type = gf_strdup(type);
- if (xl->type)
- return 0;
+ if (xl->type)
+ return 0;
out:
- return -1;
+ return -1;
}
-
int
-xlator_volopt_dynload (char *xlator_type, void **dl_handle,
- volume_opt_list_t *opt_list)
+xlator_volopt_dynload(char *xlator_type, void **dl_handle,
+ volume_opt_list_t *opt_list)
{
- int ret = -1;
- char *name = NULL;
- void *handle = NULL;
- volume_opt_list_t *vol_opt = NULL;
-
- GF_VALIDATE_OR_GOTO ("xlator", xlator_type, out);
-
- GF_ASSERT (dl_handle);
-
- if (*dl_handle)
- if (dlclose (*dl_handle))
- gf_log ("xlator", GF_LOG_WARNING, "Unable to close "
- "previously opened handle( may be stale)."
- "Ignoring the invalid handle");
-
- ret = gf_asprintf (&name, "%s/%s.so", XLATORDIR, xlator_type);
- if (-1 == ret) {
- gf_log ("xlator", GF_LOG_ERROR, "asprintf failed");
- goto out;
+ int ret = -1;
+ int flag = 0;
+ char *name = NULL;
+ void *handle = NULL;
+ xlator_api_t *xlapi = NULL;
+ volume_option_t *opt = NULL;
+
+ GF_VALIDATE_OR_GOTO("xlator", xlator_type, out);
+
+ /* socket.so doesn't fall under the default xlator directory, hence we
+ * need this check */
+ if (!strstr(xlator_type, "rpc-transport"))
+ ret = gf_asprintf(&name, "%s/%s.so", XLATORDIR, xlator_type);
+ else {
+ flag = 1;
+ ret = gf_asprintf(&name, "%s/%s.so", XLATORPARENTDIR, xlator_type);
+ }
+ if (-1 == ret) {
+ goto out;
+ }
+
+ ret = -1;
+
+ gf_msg_trace("xlator", 0, "attempt to load file %s", name);
+
+ handle = dlopen(name, RTLD_NOW);
+ if (!handle) {
+ gf_smsg("xlator", GF_LOG_WARNING, 0, LG_MSG_DLOPEN_FAILED, "error=%s",
+ dlerror(), NULL);
+ goto out;
+ }
+
+ if (flag == 0) {
+ /* check new struct first, and then check this */
+ xlapi = dlsym(handle, "xlator_api");
+ if (!xlapi) {
+ gf_smsg("xlator", GF_LOG_ERROR, 0, LG_MSG_DLSYM_ERROR, "error=%s",
+ dlerror(), NULL);
+ goto out;
}
- ret = -1;
+ opt_list->given_opt = xlapi->options;
+ if (!opt_list->given_opt) {
+ gf_smsg("xlator", GF_LOG_ERROR, 0, LG_MSG_LOAD_FAILED, NULL);
+ goto out;
+ }
+ } else {
+ opt = dlsym(handle, "options");
+ if (!opt) {
+ gf_smsg("xlator", GF_LOG_ERROR, 0, LG_MSG_DLSYM_ERROR, "error=%s",
+ dlerror(), NULL);
+ goto out;
+ }
- gf_log ("xlator", GF_LOG_TRACE, "attempt to load file %s", name);
+ opt_list->given_opt = opt;
+ }
- handle = dlopen (name, RTLD_NOW|RTLD_GLOBAL);
- if (!handle) {
- gf_log ("xlator", GF_LOG_WARNING, "%s", dlerror ());
- goto out;
- }
- *dl_handle = handle;
+ *dl_handle = handle;
+ handle = NULL;
+ ret = 0;
+out:
+ GF_FREE(name);
+ if (handle)
+ dlclose(handle);
- vol_opt = GF_CALLOC (1, sizeof (volume_opt_list_t),
- gf_common_mt_volume_opt_list_t);
+ gf_msg_debug("xlator", 0, "Returning %d", ret);
+ return ret;
+}
+static int
+xlator_dynload_apis(xlator_t *xl)
+{
+ int ret = -1;
+ void *handle = NULL;
+ volume_opt_list_t *vol_opt = NULL;
+ xlator_api_t *xlapi = NULL;
+ int i = 0;
+
+ handle = xl->dlhandle;
+
+ xlapi = dlsym(handle, "xlator_api");
+ if (!xlapi) {
+ gf_smsg("xlator", GF_LOG_ERROR, 0, LG_MSG_DLSYM_ERROR, "dlsym=%s",
+ dlerror(), NULL);
+ ret = -1;
+ goto out;
+ }
+
+ xl->fops = xlapi->fops;
+ if (!xl->fops) {
+ gf_smsg("xlator", GF_LOG_WARNING, 0, LG_MSG_STRUCT_MISS, "name=%s",
+ xl->name, NULL);
+ goto out;
+ }
+
+ xl->cbks = xlapi->cbks;
+ if (!xl->cbks) {
+ gf_msg_trace("xlator", 0, "%s: struct missing (cbks)", xl->name);
+ }
+
+ xl->init = xlapi->init;
+ if (!xl->init) {
+ gf_smsg("xlator", GF_LOG_WARNING, 0, LG_MSG_METHOD_MISS, "name=%s",
+ xl->name, NULL);
+ goto out;
+ }
+
+ xl->fini = xlapi->fini;
+ if (!xl->fini) {
+ gf_msg_trace("xlator", 0, "%s: method missing (fini)", xl->name);
+ }
+
+ xl->reconfigure = xlapi->reconfigure;
+ if (!xl->reconfigure) {
+ gf_msg_trace("xlator", 0, "%s: method missing (reconfigure)", xl->name);
+ }
+ xl->notify = xlapi->notify;
+ if (!xl->notify) {
+ gf_msg_trace("xlator", 0, "%s: method missing (notify)", xl->name);
+ }
+ xl->dumpops = xlapi->dumpops;
+ if (!xl->dumpops) {
+ gf_msg_trace("xlator", 0, "%s: method missing (dumpops)", xl->name);
+ }
+ xl->mem_acct_init = xlapi->mem_acct_init;
+ if (!xl->mem_acct_init) {
+ gf_msg_trace("xlator", 0, "%s: method missing (mem_acct_init)",
+ xl->name);
+ }
+
+ xl->dump_metrics = xlapi->dump_metrics;
+ if (!xl->dump_metrics) {
+ gf_msg_trace("xlator", 0, "%s: method missing (dump_metrics)",
+ xl->name);
+ }
+
+ xl->pass_through_fops = xlapi->pass_through_fops;
+ if (!xl->pass_through_fops) {
+ gf_msg_trace("xlator", 0,
+ "%s: method missing (pass_through_fops), "
+ "falling back to default",
+ xl->name);
+ }
+
+ vol_opt = GF_CALLOC(1, sizeof(volume_opt_list_t),
+ gf_common_mt_volume_opt_list_t);
+ if (!vol_opt) {
+ goto out;
+ }
+ INIT_LIST_HEAD(&vol_opt->list);
+
+ vol_opt->given_opt = default_options;
+ list_add_tail(&vol_opt->list, &xl->volume_options);
+
+ if (xlapi->options) {
+ vol_opt = GF_CALLOC(1, sizeof(volume_opt_list_t),
+ gf_common_mt_volume_opt_list_t);
if (!vol_opt) {
- goto out;
+ goto out;
}
+ INIT_LIST_HEAD(&vol_opt->list);
- if (!(vol_opt->given_opt = dlsym (handle, "options"))) {
- dlerror ();
- gf_log ("xlator", GF_LOG_DEBUG,
- "Strict option validation not enforced -- neglecting");
- }
- opt_list->given_opt = vol_opt->given_opt;
+ vol_opt->given_opt = xlapi->options;
+ list_add_tail(&vol_opt->list, &xl->volume_options);
+ }
- INIT_LIST_HEAD (&vol_opt->list);
- list_add_tail (&vol_opt->list, &opt_list->list);
+ xl->id = xlapi->xlator_id;
+ xl->flags = xlapi->flags;
+ xl->identifier = xlapi->identifier;
+ xl->category = xlapi->category;
- ret = 0;
- out:
- gf_log ("xlator", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ memcpy(xl->op_version, xlapi->op_version,
+ sizeof(uint32_t) * GF_MAX_RELEASES);
-}
+ for (i = 0; i < GF_FOP_MAXVALUE; i++) {
+ gf_latency_reset(&xl->stats.interval.latencies[i]);
+ }
+ ret = 0;
+out:
+ return ret;
+}
int
-xlator_dynload (xlator_t *xl)
+xlator_dynload(xlator_t *xl)
{
- int ret = -1;
- char *name = NULL;
- void *handle = NULL;
- volume_opt_list_t *vol_opt = NULL;
+ int ret = -1;
+ char *name = NULL;
+ void *handle = NULL;
+ GF_VALIDATE_OR_GOTO("xlator", xl, out);
- GF_VALIDATE_OR_GOTO ("xlator", xl, out);
+ INIT_LIST_HEAD(&xl->volume_options);
- INIT_LIST_HEAD (&xl->volume_options);
+ ret = gf_asprintf(&name, "%s/%s.so", XLATORDIR, xl->type);
+ if (-1 == ret) {
+ goto out;
+ }
- ret = gf_asprintf (&name, "%s/%s.so", XLATORDIR, xl->type);
- if (-1 == ret) {
- gf_log ("xlator", GF_LOG_ERROR, "asprintf failed");
- goto out;
- }
-
- ret = -1;
+ ret = -1;
- gf_log ("xlator", GF_LOG_TRACE, "attempt to load file %s", name);
+ gf_msg_trace("xlator", 0, "attempt to load file %s", name);
- handle = dlopen (name, RTLD_NOW|RTLD_GLOBAL);
- if (!handle) {
- gf_log ("xlator", GF_LOG_WARNING, "%s", dlerror ());
- goto out;
- }
- xl->dlhandle = handle;
+ handle = dlopen(name, RTLD_NOW);
+ if (!handle) {
+ gf_smsg("xlator", GF_LOG_WARNING, 0, LG_MSG_DLOPEN_FAILED, "error=%s",
+ dlerror(), NULL);
+ goto out;
+ }
+ xl->dlhandle = handle;
- if (!(xl->fops = dlsym (handle, "fops"))) {
- gf_log ("xlator", GF_LOG_WARNING, "dlsym(fops) on %s",
- dlerror ());
- goto out;
- }
+ ret = xlator_dynload_apis(xl);
+ if (-1 == ret)
+ goto out;
- if (!(xl->cbks = dlsym (handle, "cbks"))) {
- gf_log ("xlator", GF_LOG_WARNING, "dlsym(cbks) on %s",
- dlerror ());
- goto out;
- }
+ fill_defaults(xl);
- if (!(xl->init = dlsym (handle, "init"))) {
- gf_log ("xlator", GF_LOG_WARNING, "dlsym(init) on %s",
- dlerror ());
- goto out;
- }
+ ret = 0;
- if (!(xl->fini = dlsym (handle, "fini"))) {
- gf_log ("xlator", GF_LOG_WARNING, "dlsym(fini) on %s",
- dlerror ());
- goto out;
- }
-
- if (!(xl->notify = dlsym (handle, "notify"))) {
- gf_log ("xlator", GF_LOG_TRACE,
- "dlsym(notify) on %s -- neglecting", dlerror ());
- }
-
- if (!(xl->dumpops = dlsym (handle, "dumpops"))) {
- gf_log ("xlator", GF_LOG_TRACE,
- "dlsym(dumpops) on %s -- neglecting", dlerror ());
- }
+out:
+ GF_FREE(name);
+ return ret;
+}
- if (!(xl->mem_acct_init = dlsym (handle, "mem_acct_init"))) {
- gf_log (xl->name, GF_LOG_TRACE,
- "dlsym(mem_acct_init) on %s -- neglecting",
- dlerror ());
- }
+int
+xlator_set_type(xlator_t *xl, const char *type)
+{
+ int ret = 0;
- if (!(xl->reconfigure = dlsym (handle, "reconfigure"))) {
- gf_log ("xlator", GF_LOG_TRACE,
- "dlsym(reconfigure) on %s -- neglecting",
- dlerror());
- }
+ /* Handle 'global' translator differently */
+ if (!strncmp(GF_GLOBAL_XLATOR_NAME, type, SLEN(GF_GLOBAL_XLATOR_NAME))) {
+ volume_opt_list_t *vol_opt = NULL;
- vol_opt = GF_CALLOC (1, sizeof (volume_opt_list_t),
- gf_common_mt_volume_opt_list_t);
+ /* set the required values from Global xlator */
+ xl->type = gf_strdup(GF_GLOBAL_XLATOR_NAME);
+ xl->cbks = global_xlator.cbks;
+ xl->fops = global_xlator.fops;
+ xl->init = global_xlator.init;
+ xl->fini = global_xlator.fini;
+ xl->reconfigure = global_xlator.reconfigure;
+ vol_opt = GF_CALLOC(1, sizeof(volume_opt_list_t),
+ gf_common_mt_volume_opt_list_t);
if (!vol_opt) {
- goto out;
+ ret = -1;
+ goto out;
}
- if (!(vol_opt->given_opt = dlsym (handle, "options"))) {
- dlerror ();
- gf_log (xl->name, GF_LOG_TRACE,
- "Strict option validation not enforced -- neglecting");
- }
- list_add_tail (&vol_opt->list, &xl->volume_options);
+ vol_opt->given_opt = global_xl_options;
- fill_defaults (xl);
+ INIT_LIST_HEAD(&xl->volume_options);
+ INIT_LIST_HEAD(&vol_opt->list);
+ list_add_tail(&vol_opt->list, &xl->volume_options);
+ fill_defaults(xl);
ret = 0;
+ goto out;
+ }
+ ret = xlator_set_type_virtual(xl, type);
+ if (!ret)
+ ret = xlator_dynload(xl);
out:
- if (name)
- GF_FREE (name);
- return ret;
+ return ret;
}
-
-int
-xlator_set_type (xlator_t *xl, const char *type)
+void
+xlator_set_inode_lru_limit(xlator_t *this, void *data)
{
- int ret = 0;
+ int inode_lru_limit = 0;
- ret = xlator_set_type_virtual (xl, type);
- if (!ret)
- ret = xlator_dynload (xl);
+ if (this->itable) {
+ if (!data) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, LG_MSG_INPUT_DATA_NULL,
+ NULL);
+ goto out;
+ }
+ inode_lru_limit = *(int *)data;
+ inode_table_set_lru_limit(this->itable, inode_lru_limit);
+ }
- return ret;
+out:
+ return;
}
-
void
-xlator_foreach (xlator_t *this,
- void (*fn)(xlator_t *each,
- void *data),
- void *data)
+xlator_foreach(xlator_t *this, void (*fn)(xlator_t *each, void *data),
+ void *data)
{
- xlator_t *first = NULL;
- xlator_t *old_THIS = NULL;
+ xlator_t *first = NULL;
+ xlator_t *old_THIS = NULL;
- GF_VALIDATE_OR_GOTO ("xlator", this, out);
- GF_VALIDATE_OR_GOTO ("xlator", fn, out);
+ GF_VALIDATE_OR_GOTO("xlator", this, out);
+ GF_VALIDATE_OR_GOTO("xlator", fn, out);
- first = this;
+ first = this;
- while (first->prev)
- first = first->prev;
+ while (first->prev)
+ first = first->prev;
- while (first) {
- old_THIS = THIS;
- THIS = first;
+ while (first) {
+ old_THIS = THIS;
+ THIS = first;
- fn (first, data);
+ fn(first, data);
- THIS = old_THIS;
- first = first->next;
- }
+ THIS = old_THIS;
+ first = first->next;
+ }
out:
- return;
+ return;
}
+void
+xlator_foreach_depth_first(xlator_t *this,
+ void (*fn)(xlator_t *each, void *data), void *data)
+{
+ xlator_list_t *subv = NULL;
+
+ subv = this->children;
+
+ while (subv) {
+ xlator_foreach_depth_first(subv->xlator, fn, data);
+ subv = subv->next;
+ }
+
+ fn(this, data);
+}
xlator_t *
-xlator_search_by_name (xlator_t *any, const char *name)
+xlator_search_by_name(xlator_t *any, const char *name)
{
- xlator_t *search = NULL;
+ xlator_t *search = NULL;
- GF_VALIDATE_OR_GOTO ("xlator", any, out);
- GF_VALIDATE_OR_GOTO ("xlator", name, out);
+ GF_VALIDATE_OR_GOTO("xlator", any, out);
+ GF_VALIDATE_OR_GOTO("xlator", name, out);
- search = any;
+ search = any;
- while (search->prev)
- search = search->prev;
+ while (search->prev)
+ search = search->prev;
- while (search) {
- if (!strcmp (search->name, name))
- break;
- search = search->next;
- }
+ while (search) {
+ if (!strcmp(search->name, name))
+ break;
+ search = search->next;
+ }
out:
- return search;
+ return search;
}
+/*
+ * With brick multiplexing, we sort of have multiple graphs, so
+ * xlator_search_by_name might not find what we want. Also, the translator
+ * we're looking for might not be a direct child if something else was put in
+ * between (as already happened with decompounder before that was fixed) and
+ * it's hard to debug why our translator wasn't found. Using a recursive tree
+ * search instead of a linear search works around both problems.
+ */
+static xlator_t *
+get_xlator_by_name_or_type(xlator_t *this, char *target, int is_name)
+{
+ xlator_list_t *trav;
+ xlator_t *child_xl;
+ char *value;
+
+ for (trav = this->children; trav; trav = trav->next) {
+ value = is_name ? trav->xlator->name : trav->xlator->type;
+ if (!strcmp(value, target) && !trav->xlator->cleanup_starting) {
+ return trav->xlator;
+ }
+ child_xl = get_xlator_by_name_or_type(trav->xlator, target, is_name);
+ if (child_xl) {
+ /*
+ * If the xlator we're looking for is somewhere down
+ * the stack, get_xlator_by_name expects to get a
+ * pointer to the top of its subtree (child of "this")
+ * while get_xlator_by_type expects a pointer to what
+ * we actually found. Handle both cases here.
+ *
+ * TBD: rename the functions and fix callers to better
+ * reflect the difference in semantics.
+ */
+ return is_name ? trav->xlator : child_xl;
+ }
+ }
+
+ return NULL;
+}
+
+xlator_t *
+get_xlator_by_name(xlator_t *this, char *target)
+{
+ return get_xlator_by_name_or_type(this, target, 1);
+}
+
+xlator_t *
+get_xlator_by_type(xlator_t *this, char *target)
+{
+ return get_xlator_by_name_or_type(this, target, 0);
+}
static int
__xlator_init(xlator_t *xl)
{
- xlator_t *old_THIS = NULL;
- int ret = 0;
+ xlator_t *old_THIS = NULL;
+ int ret = 0;
+ int fop_idx = 0;
- old_THIS = THIS;
- THIS = xl;
+ old_THIS = THIS;
+ THIS = xl;
- ret = xl->init (xl);
+ /* initialize the metrics related locks */
+ for (fop_idx = 0; fop_idx < GF_FOP_MAXVALUE; fop_idx++) {
+ GF_ATOMIC_INIT(xl->stats.total.metrics[fop_idx].fop, 0);
+ GF_ATOMIC_INIT(xl->stats.total.metrics[fop_idx].cbk, 0);
- THIS = old_THIS;
+ GF_ATOMIC_INIT(xl->stats.interval.metrics[fop_idx].fop, 0);
+ GF_ATOMIC_INIT(xl->stats.interval.metrics[fop_idx].cbk, 0);
+ }
+ GF_ATOMIC_INIT(xl->stats.total.count, 0);
+ GF_ATOMIC_INIT(xl->stats.interval.count, 0);
- return ret;
-}
+ xlator_init_lock();
+ handle_default_options(xl, xl->options);
+ ret = xl->init(xl);
+ xlator_init_unlock();
+ THIS = old_THIS;
+
+ return ret;
+}
int
-xlator_init (xlator_t *xl)
+xlator_init(xlator_t *xl)
{
- int32_t ret = -1;
+ int32_t ret = -1;
- GF_VALIDATE_OR_GOTO ("xlator", xl, out);
+ GF_VALIDATE_OR_GOTO("xlator", xl, out);
- if (xl->mem_acct_init)
- xl->mem_acct_init (xl);
+ if (xl->mem_acct_init)
+ xl->mem_acct_init(xl);
- if (!xl->init) {
- gf_log (xl->name, GF_LOG_WARNING, "No init() found");
- goto out;
- }
+ xl->instance_name = NULL;
+ GF_ATOMIC_INIT(xl->xprtrefcnt, 0);
+ if (!xl->init) {
+ gf_smsg(xl->name, GF_LOG_WARNING, 0, LG_MSG_INIT_FAILED, NULL);
+ goto out;
+ }
- ret = __xlator_init (xl);
+ ret = __xlator_init(xl);
- if (ret) {
- gf_log (xl->name, GF_LOG_ERROR,
- "Initialization of volume '%s' failed,"
- " review your volfile again",
- xl->name);
- goto out;
- }
+ if (ret) {
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_VOLUME_ERROR, "name=%s",
+ xl->name, NULL);
+ goto out;
+ }
- xl->init_succeeded = 1;
-
- ret = 0;
+ xl->init_succeeded = 1;
+ /*xl->cleanup_starting = 0;
+ xl->call_cleanup = 0;
+ */
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
static void
-xlator_fini_rec (xlator_t *xl)
+xlator_fini_rec(xlator_t *xl)
{
- xlator_list_t *trav = NULL;
- xlator_t *old_THIS = NULL;
-
- GF_VALIDATE_OR_GOTO ("xlator", xl, out);
+ xlator_list_t *trav = NULL;
+ xlator_t *old_THIS = NULL;
- trav = xl->children;
+ GF_VALIDATE_OR_GOTO("xlator", xl, out);
- while (trav) {
- if (!trav->xlator->init_succeeded) {
- break;
- }
+ trav = xl->children;
- xlator_fini_rec (trav->xlator);
- gf_log (trav->xlator->name, GF_LOG_DEBUG, "fini done");
- trav = trav->next;
+ while (trav) {
+ if (!trav->xlator->init_succeeded) {
+ break;
}
- if (xl->init_succeeded) {
- if (xl->fini) {
- old_THIS = THIS;
- THIS = xl;
-
- xl->fini (xl);
-
- if (xl->local_pool)
- mem_pool_destroy (xl->local_pool);
-
- THIS = old_THIS;
- } else {
- gf_log (xl->name, GF_LOG_DEBUG, "No fini() found");
- }
- xl->init_succeeded = 0;
+ xlator_fini_rec(trav->xlator);
+ gf_msg_debug(trav->xlator->name, 0, "fini done");
+ trav = trav->next;
+ }
+
+ xl->cleanup_starting = 1;
+ if (xl->init_succeeded) {
+ if (xl->fini) {
+ old_THIS = THIS;
+ THIS = xl;
+
+ xl->fini(xl);
+
+ if (xl->local_pool) {
+ mem_pool_destroy(xl->local_pool);
+ xl->local_pool = NULL;
+ }
+ if (xl->itable) {
+ inode_table_destroy(xl->itable);
+ xl->itable = NULL;
+ }
+
+ THIS = old_THIS;
+ } else {
+ gf_msg_debug(xl->name, 0, "No fini() found");
}
+ xl->init_succeeded = 0;
+ }
out:
- return;
+ return;
}
-
int
-xlator_notify (xlator_t *xl, int event, void *data, ...)
+xlator_notify(xlator_t *xl, int event, void *data, ...)
{
- xlator_t *old_THIS = NULL;
- int ret = 0;
+ xlator_t *old_THIS = NULL;
+ int ret = 0;
- old_THIS = THIS;
- THIS = xl;
+ old_THIS = THIS;
+ THIS = xl;
- ret = xl->notify (xl, event, data);
+ ret = xl->notify(xl, event, data);
- THIS = old_THIS;
+ THIS = old_THIS;
- return ret;
+ return ret;
}
-
int
-xlator_mem_acct_init (xlator_t *xl, int num_types)
+xlator_mem_acct_init(xlator_t *xl, int num_types)
{
- int i = 0;
- int ret = 0;
+ int i = 0;
+ int ret = 0;
+
+ if (!xl)
+ return -1;
- if (!gf_mem_acct_is_enabled())
- return 0;
+ if (!xl->ctx)
+ return -1;
- if (!xl)
- return -1;
+ if (!xl->ctx->mem_acct_enable)
+ return 0;
- xl->mem_acct.num_types = num_types;
+ xl->mem_acct = MALLOC(sizeof(struct mem_acct) +
+ sizeof(struct mem_acct_rec) * num_types);
- xl->mem_acct.rec = CALLOC(num_types, sizeof(struct mem_acct_rec));
+ if (!xl->mem_acct) {
+ return -1;
+ }
- if (!xl->mem_acct.rec) {
- return -1;
- }
+ xl->mem_acct->num_types = num_types;
+ GF_ATOMIC_INIT(xl->mem_acct->refcnt, 1);
- for (i = 0; i < num_types; i++) {
- ret = LOCK_INIT(&(xl->mem_acct.rec[i].lock));
- if (ret) {
- fprintf(stderr, "Unable to lock..errno : %d",errno);
- }
+ for (i = 0; i < num_types; i++) {
+ memset(&xl->mem_acct->rec[i], 0, sizeof(struct mem_acct_rec));
+ ret = LOCK_INIT(&(xl->mem_acct->rec[i].lock));
+ if (ret) {
+ fprintf(stderr, "Unable to lock..errno : %d", errno);
}
+#ifdef DEBUG
+ INIT_LIST_HEAD(&(xl->mem_acct->rec[i].obj_list));
+#endif
+ }
- return 0;
+ return 0;
}
+void
+xlator_mem_acct_unref(struct mem_acct *mem_acct)
+{
+ uint32_t i;
+
+ if (GF_ATOMIC_DEC(mem_acct->refcnt) == 0) {
+ for (i = 0; i < mem_acct->num_types; i++) {
+ LOCK_DESTROY(&(mem_acct->rec[i].lock));
+ }
+ FREE(mem_acct);
+ }
+}
void
-xlator_tree_fini (xlator_t *xl)
+xlator_tree_fini(xlator_t *xl)
{
- xlator_t *top = NULL;
+ xlator_t *top = NULL;
- GF_VALIDATE_OR_GOTO ("xlator", xl, out);
+ GF_VALIDATE_OR_GOTO("xlator", xl, out);
- top = xl;
- xlator_fini_rec (top);
+ top = xl;
+ xlator_fini_rec(top);
out:
- return;
+ return;
}
+int
+xlator_list_destroy(xlator_list_t *list)
+{
+ xlator_list_t *next = NULL;
+
+ while (list) {
+ next = list->next;
+ GF_FREE(list);
+ list = next;
+ }
+
+ return 0;
+}
int
-xlator_tree_free (xlator_t *tree)
+xlator_memrec_free(xlator_t *xl)
{
- xlator_t *trav = tree;
- xlator_t *prev = tree;
+ struct mem_acct *mem_acct = NULL;
- if (!tree) {
- gf_log ("parser", GF_LOG_ERROR, "Translator tree not found");
- return -1;
- }
+ if (!xl) {
+ return 0;
+ }
+ mem_acct = xl->mem_acct;
- while (prev) {
- trav = prev->next;
- dict_destroy (prev->options);
- GF_FREE (prev->name);
- GF_FREE (prev->type);
- GF_FREE (prev);
- prev = trav;
- }
+ if (mem_acct) {
+ xlator_mem_acct_unref(mem_acct);
+ xl->mem_acct = NULL;
+ }
+
+ return 0;
+}
+
+static int
+xlator_members_free(xlator_t *xl)
+{
+ volume_opt_list_t *vol_opt = NULL;
+ volume_opt_list_t *tmp = NULL;
+
+ if (!xl)
+ return 0;
+
+ GF_FREE(xl->name);
+ GF_FREE(xl->type);
+ if (!(xl->ctx && xl->ctx->cmd_args.vgtool != _gf_none) && xl->dlhandle)
+ dlclose(xl->dlhandle);
+ if (xl->options)
+ dict_unref(xl->options);
+
+ xlator_list_destroy(xl->children);
+
+ xlator_list_destroy(xl->parents);
+
+ list_for_each_entry_safe(vol_opt, tmp, &xl->volume_options, list)
+ {
+ list_del_init(&vol_opt->list);
+ GF_FREE(vol_opt);
+ }
+
+ return 0;
+}
+
+/* This function destroys all the xlator members except for the
+ * xlator strcuture and its mem accounting field.
+ *
+ * If otherwise, it would destroy the master xlator object as well
+ * its mem accounting, which would mean after calling glusterfs_graph_destroy()
+ * there cannot be any reference to GF_FREE() from the master xlator, this is
+ * not possible because of the following dependencies:
+ * - glusterfs_ctx_t will have mem pools allocated by the master xlators
+ * - xlator objects will have references to those mem pools(g: dict)
+ *
+ * Ordering the freeing in any of the order will also not solve the dependency:
+ * - Freeing xlator objects(including memory accounting) before mem pools
+ * destruction will mean not use GF_FREE while destroying mem pools.
+ * - Freeing mem pools and then destroying xlator objects would lead to crashes
+ * when xlator tries to unref dict or other mem pool objects.
+ *
+ * Hence the way chosen out of this interdependency is to split xlator object
+ * free into two stages:
+ * - Free all the xlator members excpet for its mem accounting structure
+ * - Free all the mem accouting structures of xlator along with the xlator
+ * object itself.
+ *
+ * This two stages of destruction, is mainly required for glfs_fini().
+ */
+int
+xlator_tree_free_members(xlator_t *tree)
+{
+ xlator_t *trav = tree;
+ xlator_t *prev = tree;
+
+ if (!tree) {
+ gf_smsg("parser", GF_LOG_ERROR, 0, LG_MSG_TREE_NOT_FOUND, NULL);
+ return -1;
+ }
+
+ while (prev) {
+ trav = prev->next;
+ xlator_members_free(prev);
+ prev = trav;
+ }
+
+ return 0;
+}
+
+int
+xlator_tree_free_memacct(xlator_t *tree)
+{
+ xlator_t *trav = tree;
+ xlator_t *prev = tree;
+
+ if (!tree) {
+ gf_smsg("parser", GF_LOG_ERROR, 0, LG_MSG_TREE_NOT_FOUND, NULL);
+ return -1;
+ }
+
+ while (prev) {
+ trav = prev->next;
+ xlator_memrec_free(prev);
+ GF_FREE(prev);
+ prev = trav;
+ }
+
+ return 0;
+}
+
+static int
+xlator_mem_free(xlator_t *xl)
+{
+ volume_opt_list_t *vol_opt = NULL;
+ volume_opt_list_t *tmp = NULL;
+
+ if (!xl)
return 0;
+
+ if (xl->options) {
+ dict_unref(xl->options);
+ xl->options = NULL;
+ }
+
+ list_for_each_entry_safe(vol_opt, tmp, &xl->volume_options, list)
+ {
+ list_del_init(&vol_opt->list);
+ GF_FREE(vol_opt);
+ }
+
+ xlator_memrec_free(xl);
+
+ return 0;
}
+static void
+xlator_call_fini(xlator_t *this)
+{
+ if (!this || this->call_cleanup)
+ return;
+ this->cleanup_starting = 1;
+ this->call_cleanup = 1;
+ xlator_call_fini(this->next);
+ this->fini(this);
+}
void
-loc_wipe (loc_t *loc)
+xlator_mem_cleanup(xlator_t *this)
{
- if (loc->inode) {
- inode_unref (loc->inode);
- loc->inode = NULL;
- }
- if (loc->path) {
- GF_FREE ((char *)loc->path);
- loc->path = NULL;
+ xlator_list_t *list = this->children;
+ xlator_t *trav = list->xlator;
+ inode_table_t *inode_table = NULL;
+ xlator_t *prev = trav;
+ glusterfs_ctx_t *ctx = NULL;
+ xlator_list_t **trav_p = NULL;
+ xlator_t *top = NULL;
+ xlator_t *victim = NULL;
+ glusterfs_graph_t *graph = NULL;
+ gf_boolean_t graph_cleanup = _gf_false;
+
+ if (this->call_cleanup || !this->ctx)
+ return;
+
+ this->call_cleanup = 1;
+ ctx = this->ctx;
+
+ inode_table = this->itable;
+ if (inode_table) {
+ inode_table_destroy(inode_table);
+ this->itable = NULL;
+ }
+
+ xlator_call_fini(trav);
+
+ while (prev) {
+ trav = prev->next;
+ xlator_mem_free(prev);
+ prev = trav;
+ }
+
+ if (this->fini) {
+ this->fini(this);
+ }
+
+ xlator_mem_free(this);
+
+ if (ctx->active) {
+ top = ctx->active->first;
+ LOCK(&ctx->volfile_lock);
+ for (trav_p = &top->children; *trav_p; trav_p = &(*trav_p)->next) {
+ victim = (*trav_p)->xlator;
+ if (victim->call_cleanup && !strcmp(victim->name, this->name)) {
+ graph_cleanup = _gf_true;
+ (*trav_p) = (*trav_p)->next;
+ break;
+ }
}
+ UNLOCK(&ctx->volfile_lock);
+ }
- if (loc->parent) {
- inode_unref (loc->parent);
- loc->parent = NULL;
+ if (graph_cleanup) {
+ prev = this;
+ graph = ctx->active;
+ pthread_mutex_lock(&graph->mutex);
+ while (prev) {
+ trav = prev->next;
+ GF_FREE(prev);
+ prev = trav;
}
+ pthread_mutex_unlock(&graph->mutex);
+ }
+}
- memset (loc, 0, sizeof (*loc));
+void
+loc_wipe(loc_t *loc)
+{
+ if (loc->inode) {
+ inode_unref(loc->inode);
+ loc->inode = NULL;
+ }
+ if (loc->path) {
+ GF_FREE((char *)loc->path);
+ loc->path = NULL;
+ }
+
+ if (loc->parent) {
+ inode_unref(loc->parent);
+ loc->parent = NULL;
+ }
+
+ memset(loc, 0, sizeof(*loc));
}
int
-loc_path (loc_t *loc, const char *bname)
+loc_path(loc_t *loc, const char *bname)
{
- int ret = 0;
+ int ret = 0;
- if (loc->path)
- goto out;
+ if (loc->path)
+ goto out;
- ret = -1;
+ ret = -1;
- if (bname && !strlen (bname))
- bname = NULL;
+ if (bname && !strlen(bname))
+ bname = NULL;
- if (!bname)
- goto inode_path;
+ if (!bname)
+ goto inode_path;
- if (loc->parent && !uuid_is_null (loc->parent->gfid)) {
- ret = inode_path (loc->parent, bname, (char**)&loc->path);
- } else if (!uuid_is_null (loc->pargfid)) {
- ret = gf_asprintf ((char**)&loc->path, INODE_PATH_FMT"/%s",
- uuid_utoa (loc->pargfid), bname);
- }
+ if (loc->parent && !gf_uuid_is_null(loc->parent->gfid)) {
+ ret = inode_path(loc->parent, bname, (char **)&loc->path);
+ } else if (!gf_uuid_is_null(loc->pargfid)) {
+ ret = gf_asprintf((char **)&loc->path, INODE_PATH_FMT "/%s",
+ uuid_utoa(loc->pargfid), bname);
+ }
- if (loc->path)
- goto out;
+ if (loc->path)
+ goto out;
inode_path:
- if (loc->inode && !uuid_is_null (loc->inode->gfid)) {
- ret = inode_path (loc->inode, NULL, (char **)&loc->path);
- } else if (!uuid_is_null (loc->gfid)) {
- ret = gf_asprintf ((char**)&loc->path, INODE_PATH_FMT,
- uuid_utoa (loc->gfid));
- }
+ if (loc->inode && !gf_uuid_is_null(loc->inode->gfid)) {
+ ret = inode_path(loc->inode, NULL, (char **)&loc->path);
+ } else if (!gf_uuid_is_null(loc->gfid)) {
+ ret = gf_asprintf((char **)&loc->path, INODE_PATH_FMT,
+ uuid_utoa(loc->gfid));
+ }
out:
- return ret;
+ return ret;
+}
+
+void
+loc_gfid(loc_t *loc, uuid_t gfid)
+{
+ if (!gfid)
+ goto out;
+ gf_uuid_clear(gfid);
+
+ if (!loc)
+ goto out;
+ else if (!gf_uuid_is_null(loc->gfid))
+ gf_uuid_copy(gfid, loc->gfid);
+ else if (loc->inode && (!gf_uuid_is_null(loc->inode->gfid)))
+ gf_uuid_copy(gfid, loc->inode->gfid);
+out:
+ return;
+}
+
+void
+loc_pargfid(loc_t *loc, uuid_t gfid)
+{
+ if (!gfid)
+ goto out;
+ gf_uuid_clear(gfid);
+
+ if (!loc)
+ goto out;
+ else if (!gf_uuid_is_null(loc->pargfid))
+ gf_uuid_copy(gfid, loc->pargfid);
+ else if (loc->parent && (!gf_uuid_is_null(loc->parent->gfid)))
+ gf_uuid_copy(gfid, loc->parent->gfid);
+out:
+ return;
+}
+
+char *
+loc_gfid_utoa(loc_t *loc)
+{
+ uuid_t gfid = {
+ 0,
+ };
+ loc_gfid(loc, gfid);
+ return uuid_utoa(gfid);
}
int
-loc_copy (loc_t *dst, loc_t *src)
+loc_touchup(loc_t *loc, const char *name)
{
- int ret = -1;
+ char *path = NULL;
+ int ret = 0;
+
+ if (loc->path)
+ goto out;
+
+ if (loc->parent && name && strlen(name)) {
+ ret = inode_path(loc->parent, name, &path);
+ if (path) /*Guaranteed to have trailing '/' */
+ loc->name = strrchr(path, '/') + 1;
+
+ if (gf_uuid_is_null(loc->pargfid))
+ gf_uuid_copy(loc->pargfid, loc->parent->gfid);
+ } else if (loc->inode) {
+ ret = inode_path(loc->inode, 0, &path);
+ if (gf_uuid_is_null(loc->gfid))
+ gf_uuid_copy(loc->gfid, loc->inode->gfid);
+ }
+
+ if (ret < 0 || !path) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ loc->path = path;
+ ret = 0;
+out:
+ return ret;
+}
- GF_VALIDATE_OR_GOTO ("xlator", dst, err);
- GF_VALIDATE_OR_GOTO ("xlator", src, err);
+int
+loc_copy_overload_parent(loc_t *dst, loc_t *src, inode_t *parent)
+{
+ int ret = -1;
- uuid_copy (dst->gfid, src->gfid);
- uuid_copy (dst->pargfid, src->pargfid);
- uuid_copy (dst->gfid, src->gfid);
+ GF_VALIDATE_OR_GOTO("xlator", dst, err);
+ GF_VALIDATE_OR_GOTO("xlator", src, err);
+ GF_VALIDATE_OR_GOTO("xlator", parent, err);
- if (src->inode)
- dst->inode = inode_ref (src->inode);
+ gf_uuid_copy(dst->gfid, src->gfid);
+ gf_uuid_copy(dst->pargfid, parent->gfid);
- if (src->parent)
- dst->parent = inode_ref (src->parent);
+ if (src->inode)
+ dst->inode = inode_ref(src->inode);
- if (src->path) {
- dst->path = gf_strdup (src->path);
+ if (parent)
+ dst->parent = inode_ref(parent);
- if (!dst->path)
- goto out;
+ if (src->path) {
+ dst->path = gf_strdup(src->path);
- if (src->name)
- dst->name = strrchr (dst->path, '/');
- if (dst->name)
- dst->name++;
- }
+ if (!dst->path)
+ goto out;
- ret = 0;
+ if (src->name)
+ dst->name = strrchr(dst->path, '/');
+ if (dst->name)
+ dst->name++;
+ } else if (src->name) {
+ dst->name = src->name;
+ }
+
+ ret = 0;
out:
- if (ret == -1)
- loc_wipe (dst);
+ if (ret == -1)
+ loc_wipe(dst);
err:
- return ret;
+ return ret;
}
-
int
-xlator_list_destroy (xlator_list_t *list)
+loc_copy(loc_t *dst, loc_t *src)
{
- xlator_list_t *next = NULL;
+ int ret = -1;
- while (list) {
- next = list->next;
- GF_FREE (list);
- list = next;
- }
+ GF_VALIDATE_OR_GOTO("xlator", dst, err);
+ GF_VALIDATE_OR_GOTO("xlator", src, err);
- return 0;
+ if (!gf_uuid_is_null(src->gfid))
+ gf_uuid_copy(dst->gfid, src->gfid);
+ else if (src->inode && !gf_uuid_is_null(src->inode->gfid))
+ gf_uuid_copy(dst->gfid, src->inode->gfid);
+
+ gf_uuid_copy(dst->pargfid, src->pargfid);
+
+ if (src->inode)
+ dst->inode = inode_ref(src->inode);
+
+ if (src->parent)
+ dst->parent = inode_ref(src->parent);
+
+ if (src->path) {
+ dst->path = gf_strdup(src->path);
+
+ if (!dst->path)
+ goto out;
+
+ if (src->name)
+ dst->name = strrchr(dst->path, '/');
+ if (dst->name)
+ dst->name++;
+ } else if (src->name) {
+ dst->name = src->name;
+ }
+
+ ret = 0;
+out:
+ if (ret == -1)
+ loc_wipe(dst);
+
+err:
+ return ret;
}
+gf_boolean_t
+loc_is_root(loc_t *loc)
+{
+ if (loc && __is_root_gfid(loc->gfid)) {
+ return _gf_true;
+ } else if (loc && loc->inode && __is_root_gfid(loc->inode->gfid)) {
+ return _gf_true;
+ }
-int
-xlator_destroy (xlator_t *xl)
+ return _gf_false;
+}
+
+int32_t
+loc_build_child(loc_t *child, loc_t *parent, char *name)
{
- volume_opt_list_t *vol_opt = NULL;
- volume_opt_list_t *tmp = NULL;
+ int32_t ret = -1;
- if (!xl)
- return 0;
+ GF_VALIDATE_OR_GOTO("xlator", child, out);
+ GF_VALIDATE_OR_GOTO("xlator", parent, out);
+ GF_VALIDATE_OR_GOTO("xlator", name, out);
- if (xl->name)
- GF_FREE (xl->name);
- if (xl->type)
- GF_FREE (xl->type);
- if (xl->dlhandle)
- dlclose (xl->dlhandle);
- if (xl->options)
- dict_destroy (xl->options);
+ loc_gfid(parent, child->pargfid);
- xlator_list_destroy (xl->children);
+ if (strcmp(parent->path, "/") == 0)
+ ret = gf_asprintf((char **)&child->path, "/%s", name);
+ else
+ ret = gf_asprintf((char **)&child->path, "%s/%s", parent->path, name);
- xlator_list_destroy (xl->parents);
+ if (ret < 0 || !child->path) {
+ ret = -1;
+ goto out;
+ }
- list_for_each_entry_safe (vol_opt, tmp, &xl->volume_options, list) {
- list_del_init (&vol_opt->list);
- GF_FREE (vol_opt);
- }
+ child->name = strrchr(child->path, '/') + 1;
- GF_FREE (xl);
+ child->parent = inode_ref(parent->inode);
+ child->inode = inode_new(parent->inode->table);
- return 0;
+ if (!child->inode) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ if ((ret < 0) && child)
+ loc_wipe(child);
+
+ return ret;
}
+gf_boolean_t
+loc_is_nameless(loc_t *loc)
+{
+ gf_boolean_t ret = _gf_false;
+
+ GF_VALIDATE_OR_GOTO("xlator", loc, out);
+
+ if ((!loc->parent && gf_uuid_is_null(loc->pargfid)) || !loc->name)
+ ret = _gf_true;
+out:
+ return ret;
+}
int
-is_gf_log_command (xlator_t *this, const char *name, char *value)
-{
- xlator_t *trav = NULL;
- char key[1024] = {0,};
- int ret = -1;
- int log_level = -1;
- gf_boolean_t syslog_flag = 0;
- glusterfs_ctx_t *ctx = NULL;
-
- if (!strcmp ("trusted.glusterfs.syslog", name)) {
- ret = gf_string2boolean (value, &syslog_flag);
- if (ret) {
- ret = EOPNOTSUPP;
- goto out;
- }
- if (syslog_flag)
- gf_log_enable_syslog ();
- else
- gf_log_disable_syslog ();
-
- goto out;
- }
+xlator_destroy(xlator_t *xl)
+{
+ if (!xl)
+ return 0;
- if (fnmatch ("trusted.glusterfs*set-log-level", name, FNM_NOESCAPE))
- goto out;
+ xlator_members_free(xl);
+ xlator_memrec_free(xl);
+ GF_FREE(xl);
- log_level = glusterd_check_log_level (value);
- if (log_level == -1) {
- ret = EOPNOTSUPP;
- goto out;
- }
+ return 0;
+}
- /* Some crude way to change the log-level of process */
- if (!strcmp (name, "trusted.glusterfs.set-log-level")) {
- /* */
- gf_log ("glusterfs", gf_log_get_loglevel(),
- "setting log level to %d (old-value=%d)",
- log_level, gf_log_get_loglevel());
- gf_log_set_loglevel (log_level);
- ret = 0;
- goto out;
- }
+static int32_t
+gf_bin_to_string(char *dst, size_t size, void *src, size_t len)
+{
+ if (len >= size) {
+ return EINVAL;
+ }
- if (!strcmp (name, "trusted.glusterfs.fuse.set-log-level")) {
- /* */
- gf_log (this->name, gf_log_get_xl_loglevel (this),
- "setting log level to %d (old-value=%d)",
- log_level, gf_log_get_xl_loglevel (this));
- gf_log_set_xl_loglevel (this, log_level);
- ret = 0;
- goto out;
- }
+ memcpy(dst, src, len);
+ dst[len] = 0;
+
+ return 0;
+}
- ctx = glusterfs_ctx_get();
- if (!ctx)
- goto out;
- if (!ctx->active)
- goto out;
- trav = ctx->active->top;
-
- while (trav) {
- snprintf (key, 1024, "trusted.glusterfs.%s.set-log-level",
- trav->name);
- if (fnmatch (name, key, FNM_NOESCAPE) == 0) {
- gf_log (trav->name, gf_log_get_xl_loglevel (trav),
- "setting log level to %d (old-value=%d)",
- log_level, gf_log_get_xl_loglevel (trav));
- gf_log_set_xl_loglevel (trav, log_level);
- ret = 0;
- }
- trav = trav->next;
+int
+is_gf_log_command(xlator_t *this, const char *name, char *value, size_t size)
+{
+ xlator_t *trav = NULL;
+ char key[1024] = {
+ 0,
+ };
+ int ret = -1;
+ int log_level = -1;
+ gf_boolean_t syslog_flag = 0;
+ glusterfs_ctx_t *ctx = NULL;
+
+ if (!strcmp("trusted.glusterfs.syslog", name)) {
+ ret = gf_bin_to_string(key, sizeof(key), value, size);
+ if (ret != 0) {
+ goto out;
+ }
+ ret = gf_string2boolean(key, &syslog_flag);
+ if (ret) {
+ ret = EOPNOTSUPP;
+ goto out;
}
+ if (syslog_flag)
+ gf_log_enable_syslog();
+ else
+ gf_log_disable_syslog();
+
+ goto out;
+ }
+
+ if (fnmatch("trusted.glusterfs*set-log-level", name, FNM_NOESCAPE))
+ goto out;
+
+ ret = gf_bin_to_string(key, sizeof(key), value, size);
+ if (ret != 0) {
+ goto out;
+ }
+
+ log_level = glusterd_check_log_level(key);
+ if (log_level == -1) {
+ ret = EOPNOTSUPP;
+ goto out;
+ }
+
+ /* Some crude way to change the log-level of process */
+ if (!strcmp(name, "trusted.glusterfs.set-log-level")) {
+ gf_smsg("glusterfs", gf_log_get_loglevel(), 0, LG_MSG_SET_LOG_LEVEL,
+ "new-value=%d", log_level, "old-value=%d",
+ gf_log_get_loglevel(), NULL);
+ gf_log_set_loglevel(this->ctx, log_level);
+ ret = 0;
+ goto out;
+ }
+
+ if (!strcmp(name, "trusted.glusterfs.fuse.set-log-level")) {
+ /* */
+ gf_smsg(this->name, gf_log_get_xl_loglevel(this), 0,
+ LG_MSG_SET_LOG_LEVEL, "new-value=%d", log_level, "old-value=%d",
+ gf_log_get_xl_loglevel(this), NULL);
+ gf_log_set_xl_loglevel(this, log_level);
+ ret = 0;
+ goto out;
+ }
+
+ ctx = this->ctx;
+ if (!ctx)
+ goto out;
+ if (!ctx->active)
+ goto out;
+ trav = ctx->active->top;
+
+ while (trav) {
+ snprintf(key, 1024, "trusted.glusterfs.%s.set-log-level", trav->name);
+ if (fnmatch(name, key, FNM_NOESCAPE) == 0) {
+ gf_smsg(trav->name, gf_log_get_xl_loglevel(trav), 0,
+ LG_MSG_SET_LOG_LEVEL, "new-value%d", log_level,
+ "old-value=%d", gf_log_get_xl_loglevel(trav), NULL);
+ gf_log_set_xl_loglevel(trav, log_level);
+ ret = 0;
+ }
+ trav = trav->next;
+ }
out:
- return ret;
+ return ret;
+}
+
+int
+glusterd_check_log_level(const char *value)
+{
+ int log_level = -1;
+
+ if (!strcasecmp(value, "CRITICAL")) {
+ log_level = GF_LOG_CRITICAL;
+ } else if (!strcasecmp(value, "ERROR")) {
+ log_level = GF_LOG_ERROR;
+ } else if (!strcasecmp(value, "WARNING")) {
+ log_level = GF_LOG_WARNING;
+ } else if (!strcasecmp(value, "INFO")) {
+ log_level = GF_LOG_INFO;
+ } else if (!strcasecmp(value, "DEBUG")) {
+ log_level = GF_LOG_DEBUG;
+ } else if (!strcasecmp(value, "TRACE")) {
+ log_level = GF_LOG_TRACE;
+ } else if (!strcasecmp(value, "NONE")) {
+ log_level = GF_LOG_NONE;
+ }
+
+ if (log_level == -1)
+ gf_smsg(THIS->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_INIT, NULL);
+
+ return log_level;
+}
+
+int
+xlator_subvolume_count(xlator_t *this)
+{
+ int i = 0;
+ xlator_list_t *list = NULL;
+
+ for (list = this->children; list; list = list->next)
+ i++;
+ return i;
}
+static int
+_copy_opt_to_child(dict_t *options, char *key, data_t *value, void *data)
+{
+ xlator_t *child = data;
+
+ gf_log(__func__, GF_LOG_DEBUG, "copying %s to child %s", key, child->name);
+ dict_set(child->options, key, value);
+
+ return 0;
+}
int
-glusterd_check_log_level (const char *value)
-{
- int log_level = -1;
-
- if (!strcasecmp (value, "CRITICAL")) {
- log_level = GF_LOG_CRITICAL;
- } else if (!strcasecmp (value, "ERROR")) {
- log_level = GF_LOG_ERROR;
- } else if (!strcasecmp (value, "WARNING")) {
- log_level = GF_LOG_WARNING;
- } else if (!strcasecmp (value, "INFO")) {
- log_level = GF_LOG_INFO;
- } else if (!strcasecmp (value, "DEBUG")) {
- log_level = GF_LOG_DEBUG;
- } else if (!strcasecmp (value, "TRACE")) {
- log_level = GF_LOG_TRACE;
- } else if (!strcasecmp (value, "NONE")) {
- log_level = GF_LOG_NONE;
+copy_opts_to_child(xlator_t *src, xlator_t *dst, char *glob)
+{
+ return dict_foreach_fnmatch(src->options, glob, _copy_opt_to_child, dst);
+}
+
+int
+glusterfs_delete_volfile_checksum(glusterfs_ctx_t *ctx, const char *volfile_id)
+{
+ gf_volfile_t *volfile_tmp = NULL;
+ gf_volfile_t *volfile_obj = NULL;
+
+ list_for_each_entry(volfile_tmp, &ctx->volfile_list, volfile_list)
+ {
+ if (!strcmp(volfile_id, volfile_tmp->vol_id)) {
+ list_del_init(&volfile_tmp->volfile_list);
+ volfile_obj = volfile_tmp;
+ break;
}
+ }
+
+ if (volfile_obj) {
+ GF_FREE(volfile_obj);
+ } else {
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "failed to get volfile "
+ "checksum for volfile id %s.",
+ volfile_id);
+ }
+
+ return 0;
+}
- if (log_level == -1)
- gf_log (THIS->name, GF_LOG_ERROR, "Invalid log-level. possible values "
- "are DEBUG|WARNING|ERROR|CRITICAL|NONE|TRACE");
+/*
+ The function is required to take dict ref for every xlator at graph.
+ At the time of compare graph topology create a graph and populate
+ key values in the dictionary, after finished graph comparison we do destroy
+ the new graph.At the time of construct graph we don't take any reference
+ so to avoid dict leak at the of destroying graph due to ref counter underflow
+ we need to call dict_ref here.
- return log_level;
+*/
+
+void
+gluster_graph_take_reference(xlator_t *tree)
+{
+ xlator_t *trav = tree;
+ xlator_t *prev = tree;
+
+ if (!tree) {
+ gf_smsg("parser", GF_LOG_ERROR, 0, LG_MSG_TREE_NOT_FOUND, NULL);
+ return;
+ }
+
+ while (prev) {
+ trav = prev->next;
+ if (prev->options)
+ dict_ref(prev->options);
+ prev = trav;
+ }
+ return;
+}
+
+gf_boolean_t
+mgmt_is_multiplexed_daemon(char *name)
+{
+ const char *mux_daemons[] = {"glustershd", NULL};
+ int i;
+
+ if (!name)
+ return _gf_false;
+
+ for (i = 0; mux_daemons[i]; i++) {
+ if (!strcmp(name, mux_daemons[i]))
+ return _gf_true;
+ }
+ return _gf_false;
+}
+
+gf_boolean_t
+xlator_is_cleanup_starting(xlator_t *this)
+{
+ gf_boolean_t cleanup = _gf_false;
+ glusterfs_graph_t *graph = NULL;
+ xlator_t *xl = NULL;
+
+ if (!this) {
+ gf_smsg("xlator", GF_LOG_WARNING, EINVAL, LG_MSG_OBJECT_NULL, "xlator",
+ NULL);
+ goto out;
+ }
+
+ graph = this->graph;
+ if (!graph) {
+ gf_smsg("xlator", GF_LOG_WARNING, EINVAL, LG_MSG_GRAPH_NOT_SET,
+ "name=%s", this->name, NULL);
+ goto out;
+ }
+
+ xl = graph->first;
+ if (xl && xl->cleanup_starting)
+ cleanup = _gf_true;
+out:
+ return cleanup;
}
+int
+graph_total_client_xlator(glusterfs_graph_t *graph)
+{
+ xlator_t *xl = NULL;
+ int count = 0;
+
+ if (!graph) {
+ gf_smsg("xlator", GF_LOG_WARNING, EINVAL, LG_MSG_OBJECT_NULL, "graph",
+ NULL);
+ goto out;
+ }
+
+ xl = graph->first;
+ if (!strcmp(xl->type, "protocol/server")) {
+ gf_msg_debug(xl->name, 0, "Return because it is a server graph");
+ return 0;
+ }
+
+ while (xl) {
+ if (strcmp(xl->type, "protocol/client") == 0) {
+ count++;
+ }
+ xl = xl->next;
+ }
+out:
+ return count;
+}
diff --git a/libglusterfs/src/xlator.h b/libglusterfs/src/xlator.h
deleted file mode 100644
index b7b59fac956..00000000000
--- a/libglusterfs/src/xlator.h
+++ /dev/null
@@ -1,877 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _XLATOR_H
-#define _XLATOR_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <stdio.h>
-#include <stdint.h>
-#include <inttypes.h>
-
-#include "event-history.h"
-#include "logging.h"
-#include "common-utils.h"
-#include "dict.h"
-#include "compat.h"
-#include "list.h"
-#include "latency.h"
-
-#define FIRST_CHILD(xl) (xl->children->xlator)
-
-#define GF_SET_ATTR_MODE 0x1
-#define GF_SET_ATTR_UID 0x2
-#define GF_SET_ATTR_GID 0x4
-#define GF_SET_ATTR_SIZE 0x8
-#define GF_SET_ATTR_ATIME 0x10
-#define GF_SET_ATTR_MTIME 0x20
-
-#define gf_attr_mode_set(mode) ((mode) & GF_SET_ATTR_MODE)
-#define gf_attr_uid_set(mode) ((mode) & GF_SET_ATTR_UID)
-#define gf_attr_gid_set(mode) ((mode) & GF_SET_ATTR_GID)
-#define gf_attr_size_set(mode) ((mode) & GF_SET_ATTR_SIZE)
-#define gf_attr_atime_set(mode) ((mode) & GF_SET_ATTR_ATIME)
-#define gf_attr_mtime_set(mode) ((mode) & GF_SET_ATTR_MTIME)
-
-struct _xlator;
-typedef struct _xlator xlator_t;
-struct _dir_entry_t;
-typedef struct _dir_entry_t dir_entry_t;
-struct _gf_dirent_t;
-typedef struct _gf_dirent_t gf_dirent_t;
-struct _loc;
-typedef struct _loc loc_t;
-
-
-typedef int32_t (*event_notify_fn_t) (xlator_t *this, int32_t event, void *data,
- ...);
-
-#include "list.h"
-#include "gf-dirent.h"
-#include "stack.h"
-#include "iobuf.h"
-#include "inode.h"
-#include "fd.h"
-#include "globals.h"
-#include "iatt.h"
-#include "options.h"
-
-
-struct _loc {
- const char *path;
- const char *name;
- inode_t *inode;
- inode_t *parent;
- /* Currently all location based operations are through 'gfid' of inode.
- * But the 'inode->gfid' only gets set in higher most layer (as in,
- * 'fuse', 'protocol/server', or 'nfs/server'). So if translators want
- * to send fops on a inode before the 'inode->gfid' is set, they have to
- * make use of below 'gfid' fields
- */
- uuid_t gfid;
- uuid_t pargfid;
-};
-
-
-typedef int32_t (*fop_getspec_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- char *spec_data);
-
-typedef int32_t (*fop_rchecksum_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- uint32_t weak_checksum,
- uint8_t *strong_checksum,
- dict_t *xdata);
-
-
-typedef int32_t (*fop_getspec_t) (call_frame_t *frame,
- xlator_t *this,
- const char *key,
- int32_t flag);
-
-typedef int32_t (*fop_rchecksum_t) (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd, off_t offset,
- int32_t len, dict_t *xdata);
-
-
-typedef int32_t (*fop_lookup_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- dict_t *xdata,
- struct iatt *postparent);
-
-typedef int32_t (*fop_stat_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf, dict_t *xdata);
-
-typedef int32_t (*fop_fstat_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf, dict_t *xdata);
-
-typedef int32_t (*fop_truncate_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata);
-
-typedef int32_t (*fop_ftruncate_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata);
-
-typedef int32_t (*fop_access_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata);
-
-typedef int32_t (*fop_readlink_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- const char *path,
- struct iatt *buf, dict_t *xdata);
-
-typedef int32_t (*fop_mknod_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata);
-
-typedef int32_t (*fop_mkdir_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata);
-
-typedef int32_t (*fop_unlink_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata);
-
-typedef int32_t (*fop_rmdir_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata);
-
-typedef int32_t (*fop_symlink_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata);
-
-typedef int32_t (*fop_rename_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf,
- struct iatt *preoldparent,
- struct iatt *postoldparent,
- struct iatt *prenewparent,
- struct iatt *postnewparent, dict_t *xdata);
-
-typedef int32_t (*fop_link_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata);
-
-typedef int32_t (*fop_create_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata);
-
-typedef int32_t (*fop_open_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd, dict_t *xdata);
-
-typedef int32_t (*fop_readv_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iovec *vector,
- int32_t count,
- struct iatt *stbuf,
- struct iobref *iobref, dict_t *xdata);
-
-typedef int32_t (*fop_writev_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata);
-
-typedef int32_t (*fop_flush_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata);
-
-typedef int32_t (*fop_fsync_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata);
-
-typedef int32_t (*fop_opendir_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd, dict_t *xdata);
-
-typedef int32_t (*fop_fsyncdir_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata);
-
-typedef int32_t (*fop_statfs_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct statvfs *buf, dict_t *xdata);
-
-typedef int32_t (*fop_setxattr_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata);
-
-typedef int32_t (*fop_getxattr_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict, dict_t *xdata);
-
-typedef int32_t (*fop_fsetxattr_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata);
-
-typedef int32_t (*fop_fgetxattr_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict, dict_t *xdata);
-
-typedef int32_t (*fop_removexattr_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata);
-
-typedef int32_t (*fop_fremovexattr_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata);
-
-typedef int32_t (*fop_lk_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct gf_flock *flock, dict_t *xdata);
-
-typedef int32_t (*fop_inodelk_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata);
-
-typedef int32_t (*fop_finodelk_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata);
-
-typedef int32_t (*fop_entrylk_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata);
-
-typedef int32_t (*fop_fentrylk_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata);
-
-typedef int32_t (*fop_readdir_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- gf_dirent_t *entries, dict_t *xdata);
-
-typedef int32_t (*fop_readdirp_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- gf_dirent_t *entries, dict_t *xdata);
-
-typedef int32_t (*fop_xattrop_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *xattr, dict_t *xdata);
-
-typedef int32_t (*fop_fxattrop_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *xattr, dict_t *xdata);
-
-
-typedef int32_t (*fop_setattr_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preop_stbuf,
- struct iatt *postop_stbuf, dict_t *xdata);
-
-typedef int32_t (*fop_fsetattr_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preop_stbuf,
- struct iatt *postop_stbuf, dict_t *xdata);
-
-typedef int32_t (*fop_lookup_t) (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *xdata);
-
-typedef int32_t (*fop_stat_t) (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc, dict_t *xdata);
-
-typedef int32_t (*fop_fstat_t) (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd, dict_t *xdata);
-
-typedef int32_t (*fop_truncate_t) (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- off_t offset, dict_t *xdata);
-
-typedef int32_t (*fop_ftruncate_t) (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- off_t offset, dict_t *xdata);
-
-typedef int32_t (*fop_access_t) (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t mask, dict_t *xdata);
-
-typedef int32_t (*fop_readlink_t) (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- size_t size, dict_t *xdata);
-
-typedef int32_t (*fop_mknod_t) (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t rdev,
- mode_t umask, dict_t *xdata);
-
-typedef int32_t (*fop_mkdir_t) (call_frame_t *frame, xlator_t *this, loc_t *loc,
- mode_t mode, mode_t umask, dict_t *xdata);
-
-typedef int32_t (*fop_unlink_t) (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int xflags, dict_t *xdata);
-
-typedef int32_t (*fop_rmdir_t) (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int xflags, dict_t *xdata);
-
-typedef int32_t (*fop_symlink_t) (call_frame_t *frame, xlator_t *this,
- const char *linkname, loc_t *loc,
- mode_t umask, dict_t *xdata);
-
-typedef int32_t (*fop_rename_t) (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc, dict_t *xdata);
-
-typedef int32_t (*fop_link_t) (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc, dict_t *xdata);
-
-typedef int32_t (*fop_create_t) (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, mode_t mode,
- mode_t umask, fd_t *fd, dict_t *xdata);
-
-/* Tell subsequent writes on the fd_t to fsync after every writev fop without
- * requiring a fsync fop.
- */
-#define GF_OPEN_FSYNC 0x01
-
-/* Tell write-behind to disable writing behind despite O_SYNC not being set.
- */
-#define GF_OPEN_NOWB 0x02
-
-typedef int32_t (*fop_open_t) (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags,
- fd_t *fd, dict_t *xdata);
-
-typedef int32_t (*fop_readv_t) (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset,
- uint32_t flags, dict_t *xdata);
-
-typedef int32_t (*fop_writev_t) (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- struct iovec *vector,
- int32_t count,
- off_t offset,
- uint32_t flags,
- struct iobref *iobref, dict_t *xdata);
-
-typedef int32_t (*fop_flush_t) (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd, dict_t *xdata);
-
-typedef int32_t (*fop_fsync_t) (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t datasync, dict_t *xdata);
-
-typedef int32_t (*fop_opendir_t) (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- fd_t *fd, dict_t *xdata);
-
-typedef int32_t (*fop_fsyncdir_t) (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t datasync, dict_t *xdata);
-
-typedef int32_t (*fop_statfs_t) (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc, dict_t *xdata);
-
-typedef int32_t (*fop_setxattr_t) (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *dict,
- int32_t flags, dict_t *xdata);
-
-typedef int32_t (*fop_getxattr_t) (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name, dict_t *xdata);
-
-typedef int32_t (*fop_fsetxattr_t) (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- dict_t *dict,
- int32_t flags, dict_t *xdata);
-
-typedef int32_t (*fop_fgetxattr_t) (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- const char *name, dict_t *xdata);
-
-typedef int32_t (*fop_removexattr_t) (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name, dict_t *xdata);
-
-typedef int32_t (*fop_fremovexattr_t) (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- const char *name, dict_t *xdata);
-
-typedef int32_t (*fop_lk_t) (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t cmd,
- struct gf_flock *flock, dict_t *xdata);
-
-typedef int32_t (*fop_inodelk_t) (call_frame_t *frame,
- xlator_t *this,
- const char *volume,
- loc_t *loc,
- int32_t cmd,
- struct gf_flock *flock, dict_t *xdata);
-
-typedef int32_t (*fop_finodelk_t) (call_frame_t *frame,
- xlator_t *this,
- const char *volume,
- fd_t *fd,
- int32_t cmd,
- struct gf_flock *flock, dict_t *xdata);
-
-typedef int32_t (*fop_entrylk_t) (call_frame_t *frame,
- xlator_t *this,
- const char *volume, loc_t *loc,
- const char *basename, entrylk_cmd cmd,
- entrylk_type type, dict_t *xdata);
-
-typedef int32_t (*fop_fentrylk_t) (call_frame_t *frame,
- xlator_t *this,
- const char *volume, fd_t *fd,
- const char *basename, entrylk_cmd cmd,
- entrylk_type type, dict_t *xdata);
-
-typedef int32_t (*fop_readdir_t) (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset, dict_t *xdata);
-
-typedef int32_t (*fop_readdirp_t) (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset,
- dict_t *xdata);
-
-typedef int32_t (*fop_xattrop_t) (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- gf_xattrop_flags_t optype,
- dict_t *xattr, dict_t *xdata);
-
-typedef int32_t (*fop_fxattrop_t) (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- gf_xattrop_flags_t optype,
- dict_t *xattr, dict_t *xdata);
-
-typedef int32_t (*fop_setattr_t) (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- struct iatt *stbuf,
- int32_t valid, dict_t *xdata);
-
-typedef int32_t (*fop_fsetattr_t) (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- struct iatt *stbuf,
- int32_t valid, dict_t *xdata);
-
-
-struct xlator_fops {
- fop_lookup_t lookup;
- fop_stat_t stat;
- fop_fstat_t fstat;
- fop_truncate_t truncate;
- fop_ftruncate_t ftruncate;
- fop_access_t access;
- fop_readlink_t readlink;
- fop_mknod_t mknod;
- fop_mkdir_t mkdir;
- fop_unlink_t unlink;
- fop_rmdir_t rmdir;
- fop_symlink_t symlink;
- fop_rename_t rename;
- fop_link_t link;
- fop_create_t create;
- fop_open_t open;
- fop_readv_t readv;
- fop_writev_t writev;
- fop_flush_t flush;
- fop_fsync_t fsync;
- fop_opendir_t opendir;
- fop_readdir_t readdir;
- fop_readdirp_t readdirp;
- fop_fsyncdir_t fsyncdir;
- fop_statfs_t statfs;
- fop_setxattr_t setxattr;
- fop_getxattr_t getxattr;
- fop_fsetxattr_t fsetxattr;
- fop_fgetxattr_t fgetxattr;
- fop_removexattr_t removexattr;
- fop_fremovexattr_t fremovexattr;
- fop_lk_t lk;
- fop_inodelk_t inodelk;
- fop_finodelk_t finodelk;
- fop_entrylk_t entrylk;
- fop_fentrylk_t fentrylk;
- fop_rchecksum_t rchecksum;
- fop_xattrop_t xattrop;
- fop_fxattrop_t fxattrop;
- fop_setattr_t setattr;
- fop_fsetattr_t fsetattr;
- fop_getspec_t getspec;
-
- /* these entries are used for a typechecking hack in STACK_WIND _only_ */
- fop_lookup_cbk_t lookup_cbk;
- fop_stat_cbk_t stat_cbk;
- fop_fstat_cbk_t fstat_cbk;
- fop_truncate_cbk_t truncate_cbk;
- fop_ftruncate_cbk_t ftruncate_cbk;
- fop_access_cbk_t access_cbk;
- fop_readlink_cbk_t readlink_cbk;
- fop_mknod_cbk_t mknod_cbk;
- fop_mkdir_cbk_t mkdir_cbk;
- fop_unlink_cbk_t unlink_cbk;
- fop_rmdir_cbk_t rmdir_cbk;
- fop_symlink_cbk_t symlink_cbk;
- fop_rename_cbk_t rename_cbk;
- fop_link_cbk_t link_cbk;
- fop_create_cbk_t create_cbk;
- fop_open_cbk_t open_cbk;
- fop_readv_cbk_t readv_cbk;
- fop_writev_cbk_t writev_cbk;
- fop_flush_cbk_t flush_cbk;
- fop_fsync_cbk_t fsync_cbk;
- fop_opendir_cbk_t opendir_cbk;
- fop_readdir_cbk_t readdir_cbk;
- fop_readdirp_cbk_t readdirp_cbk;
- fop_fsyncdir_cbk_t fsyncdir_cbk;
- fop_statfs_cbk_t statfs_cbk;
- fop_setxattr_cbk_t setxattr_cbk;
- fop_getxattr_cbk_t getxattr_cbk;
- fop_fsetxattr_cbk_t fsetxattr_cbk;
- fop_fgetxattr_cbk_t fgetxattr_cbk;
- fop_removexattr_cbk_t removexattr_cbk;
- fop_fremovexattr_cbk_t fremovexattr_cbk;
- fop_lk_cbk_t lk_cbk;
- fop_inodelk_cbk_t inodelk_cbk;
- fop_finodelk_cbk_t finodelk_cbk;
- fop_entrylk_cbk_t entrylk_cbk;
- fop_fentrylk_cbk_t fentrylk_cbk;
- fop_rchecksum_cbk_t rchecksum_cbk;
- fop_xattrop_cbk_t xattrop_cbk;
- fop_fxattrop_cbk_t fxattrop_cbk;
- fop_setattr_cbk_t setattr_cbk;
- fop_fsetattr_cbk_t fsetattr_cbk;
- fop_getspec_cbk_t getspec_cbk;
-};
-
-typedef int32_t (*cbk_forget_t) (xlator_t *this,
- inode_t *inode);
-
-typedef int32_t (*cbk_release_t) (xlator_t *this,
- fd_t *fd);
-
-struct xlator_cbks {
- cbk_forget_t forget;
- cbk_release_t release;
- cbk_release_t releasedir;
-};
-
-typedef int32_t (*dumpop_priv_t) (xlator_t *this);
-
-typedef int32_t (*dumpop_inode_t) (xlator_t *this);
-
-typedef int32_t (*dumpop_fd_t) (xlator_t *this);
-
-typedef int32_t (*dumpop_inodectx_t) (xlator_t *this, inode_t *ino);
-
-typedef int32_t (*dumpop_fdctx_t) (xlator_t *this, fd_t *fd);
-
-typedef int32_t (*dumpop_priv_to_dict_t) (xlator_t *this, dict_t *dict);
-
-typedef int32_t (*dumpop_inode_to_dict_t) (xlator_t *this, dict_t *dict);
-
-typedef int32_t (*dumpop_fd_to_dict_t) (xlator_t *this, dict_t *dict);
-
-typedef int32_t (*dumpop_inodectx_to_dict_t) (xlator_t *this, inode_t *ino,
- dict_t *dict);
-
-typedef int32_t (*dumpop_fdctx_to_dict_t) (xlator_t *this, fd_t *fd,
- dict_t *dict);
-
-typedef int32_t (*dumpop_eh_t) (xlator_t *this);
-
-struct xlator_dumpops {
- dumpop_priv_t priv;
- dumpop_inode_t inode;
- dumpop_fd_t fd;
- dumpop_inodectx_t inodectx;
- dumpop_fdctx_t fdctx;
- dumpop_priv_to_dict_t priv_to_dict;
- dumpop_inode_to_dict_t inode_to_dict;
- dumpop_fd_to_dict_t fd_to_dict;
- dumpop_inodectx_to_dict_t inodectx_to_dict;
- dumpop_fdctx_to_dict_t fdctx_to_dict;
- dumpop_eh_t history;
-};
-
-typedef struct xlator_list {
- xlator_t *xlator;
- struct xlator_list *next;
-} xlator_list_t;
-
-
-struct _xlator {
- /* Built during parsing */
- char *name;
- char *type;
- xlator_t *next;
- xlator_t *prev;
- xlator_list_t *parents;
- xlator_list_t *children;
- dict_t *options;
-
- /* Set after doing dlopen() */
- void *dlhandle;
- struct xlator_fops *fops;
- struct xlator_cbks *cbks;
- struct xlator_dumpops *dumpops;
- struct list_head volume_options; /* list of volume_option_t */
-
- void (*fini) (xlator_t *this);
- int32_t (*init) (xlator_t *this);
- int32_t (*reconfigure) (xlator_t *this, dict_t *options);
- int32_t (*mem_acct_init) (xlator_t *this);
- event_notify_fn_t notify;
-
- gf_loglevel_t loglevel; /* Log level for translator */
-
- /* for latency measurement */
- fop_latency_t latencies[GF_FOP_MAXVALUE];
-
- /* Misc */
- eh_t *history; /* event history context */
- glusterfs_ctx_t *ctx;
- glusterfs_graph_t *graph; /* not set for fuse */
- inode_table_t *itable;
- char init_succeeded;
- void *private;
- struct mem_acct mem_acct;
- uint64_t winds;
- char switched;
-
- /* for the memory pool of 'frame->local' */
- struct mem_pool *local_pool;
-};
-
-#define xlator_has_parent(xl) (xl->parents != NULL)
-
-#define XLATOR_NOTIFY(_xl, params ...) \
- do { \
- xlator_t *_old_THIS = NULL; \
- \
- _old_THIS = THIS; \
- THIS = _xl; \
- \
- ret = _xl->notify (_xl, params);\
- \
- THIS = _old_THIS; \
- } while (0);
-
-int32_t xlator_set_type_virtual (xlator_t *xl, const char *type);
-
-int32_t xlator_set_type (xlator_t *xl, const char *type);
-
-int32_t xlator_dynload (xlator_t *xl);
-
-xlator_t *file_to_xlator_tree (glusterfs_ctx_t *ctx,
- FILE *fp);
-
-int xlator_notify (xlator_t *this, int32_t event, void *data, ...);
-int xlator_init (xlator_t *this);
-int xlator_destroy (xlator_t *xl);
-
-int32_t xlator_tree_init (xlator_t *xl);
-int32_t xlator_tree_free (xlator_t *xl);
-
-void xlator_tree_fini (xlator_t *xl);
-
-void xlator_foreach (xlator_t *this,
- void (*fn) (xlator_t *each,
- void *data),
- void *data);
-
-xlator_t *xlator_search_by_name (xlator_t *any, const char *name);
-
-void inode_destroy_notify (inode_t *inode, const char *xlname);
-
-int loc_copy (loc_t *dst, loc_t *src);
-#define loc_dup(src, dst) loc_copy(dst, src)
-void loc_wipe (loc_t *loc);
-int loc_path (loc_t *loc, const char *bname);
-int xlator_mem_acct_init (xlator_t *xl, int num_types);
-int is_gf_log_command (xlator_t *trans, const char *name, char *value);
-int glusterd_check_log_level (const char *value);
-int xlator_volopt_dynload (char *xlator_type, void **dl_handle,
- volume_opt_list_t *vol_opt_handle);
-int32_t glusterfs_rebalance_event_notify (dict_t *dict);
-#endif /* _XLATOR_H */
diff --git a/libglusterfsclient/src/Makefile.am b/libglusterfsclient/src/Makefile.am
deleted file mode 100644
index 32811c0d5bf..00000000000
--- a/libglusterfsclient/src/Makefile.am
+++ /dev/null
@@ -1,16 +0,0 @@
-lib_LTLIBRARIES = libglusterfsclient.la
-noinst_HEADERS = libglusterfsclient-internals.h
-libglusterfsclient_HEADERS = libglusterfsclient.h
-libglusterfsclientdir = $(includedir)
-
-libglusterfsclient_la_SOURCES = libglusterfsclient.c libglusterfsclient-dentry.c
-libglusterfsclient_la_CFLAGS = -fPIC -Wall
-libglusterfsclient_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-libglusterfsclient_la_CPPFLAGS = -D_FILE_OFFSET_BITS=64 -D$(GF_HOST_OS) -D__USE_FILE_OFFSET64 -D_GNU_SOURCE -I$(top_srcdir)/libglusterfs/src -DDATADIR=\"$(localstatedir)\" -DCONFDIR=\"$(sysconfdir)/glusterfs\" $(GF_CFLAGS)
-libglusterfsclient_la_LDFLAGS = -shared -nostartfiles
-
-CLEANFILES =
-
-$(top_builddir)/libglusterfs/src/libglusterfs.la:
- $(MAKE) -C $(top_builddir)/libglusterfs/src/ all
-
diff --git a/libglusterfsclient/src/libglusterfsclient-dentry.c b/libglusterfsclient/src/libglusterfsclient-dentry.c
deleted file mode 100644
index 3fa5f6e1e74..00000000000
--- a/libglusterfsclient/src/libglusterfsclient-dentry.c
+++ /dev/null
@@ -1,404 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#include "libglusterfsclient.h"
-#include "libglusterfsclient-internals.h"
-#include <libgen.h>
-
-#define LIBGLUSTERFS_CLIENT_DENTRY_LOC_PREPARE(_new_loc, _loc, _parent, \
- _resolved) do { \
- size_t pathlen = 0; \
- size_t resolvedlen = 0; \
- char *path = NULL; \
- int pad = 0; \
- pathlen = strlen (_loc->path) + 1; \
- path = CALLOC (1, pathlen); \
- _new_loc.parent = _parent; \
- resolvedlen = strlen (_resolved); \
- strncpy (path, _resolved, resolvedlen); \
- if (resolvedlen == 1) /* only root resolved */ \
- pad = 0; \
- else { \
- pad = 1; \
- path[resolvedlen] = '/'; \
- } \
- strcpy_till (path + resolvedlen + pad, \
- loc->path + resolvedlen + pad, '/'); \
- _new_loc.path = path; \
- _new_loc.name = strrchr (path, '/'); \
- if (_new_loc.name) \
- _new_loc.name++; \
- }while (0);
-
-
-/* strcpy_till - copy @dname to @dest, until 'delim' is encountered in @dest
- * @dest - destination string
- * @dname - source string
- * @delim - delimiter character
- *
- * return - NULL is returned if '0' is encountered in @dname, otherwise returns
- * a pointer to remaining string begining in @dest.
- */
-static char *
-strcpy_till (char *dest, const char *dname, char delim)
-{
- char *src = NULL;
- int idx = 0;
- char *ret = NULL;
-
- src = (char *)dname;
- while (src[idx] && (src[idx] != delim)) {
- dest[idx] = src[idx];
- idx++;
- }
-
- dest[idx] = 0;
-
- if (src[idx] == 0)
- ret = NULL;
- else
- ret = &(src[idx]);
-
- return ret;
-}
-
-/* __libgf_client_path_to_parenti - derive parent inode for @path. if immediate
- * parent is not available in the dentry cache, return nearest
- * available parent inode and set @reslv to the path of
- * the returned directory.
- *
- * @itable - inode table
- * @path - path whose parent has to be looked up.
- * @reslv - if immediate parent is not available, reslv will be set to path of the
- * resolved parent.
- *
- * return - should never return NULL. should at least return '/' inode.
- */
-static inode_t *
-__libgf_client_path_to_parenti (libglusterfs_client_ctx_t *ctx,
- inode_table_t *itable, const char *path,
- char **reslv)
-{
- char *resolved_till = NULL;
- char *strtokptr = NULL;
- char *component = NULL;
- char *next_component = NULL;
- char *pathdup = NULL;
- inode_t *curr = NULL;
- inode_t *parent = NULL;
- size_t pathlen = 0;
- loc_t rootloc = {0, };
- int ret = -1;
-
- pathlen = STRLEN_0 (path);
- resolved_till = CALLOC (1, pathlen);
-
- GF_VALIDATE_OR_GOTO("libglusterfsclient-dentry", resolved_till, out);
- pathdup = strdup (path);
- GF_VALIDATE_OR_GOTO("libglusterfsclient-dentry", pathdup, out);
-
- parent = inode_ref (itable->root);
- /* If the root inode's is outdated, send a revalidate on it.
- * A revalidate on root inode also reduces the window in which an
- * op will fail over distribute because the layout of the root
- * directory did not get constructed when we sent the lookup on
- * root in glusterfs_init. That can happen when not all children of a
- * distribute volume were up at the time of glusterfs_init.
- */
- if (!libgf_is_iattr_cache_valid (ctx, parent, NULL,
- LIBGF_VALIDATE_LOOKUP)) {
- libgf_client_loc_fill (&rootloc, ctx, 1, 0, "/");
- ret = libgf_client_lookup (ctx, &rootloc, NULL, NULL, NULL);
- if (ret == -1) {
- gf_log ("libglusterfsclient-dentry", GF_LOG_ERROR,
- "Root inode revalidation failed");
- inode_unref (parent);
- parent = NULL;
- goto out;
- }
- libgf_client_loc_wipe (&rootloc);
- }
-
- curr = NULL;
-
- component = strtok_r (pathdup, "/", &strtokptr);
-
- while (component) {
- curr = inode_search (itable, parent->ino, component);
- if (!curr) {
- break;
- }
- if (!libgf_is_iattr_cache_valid (ctx, curr, NULL,
- LIBGF_VALIDATE_LOOKUP))
- break;
-
- /* It is OK to append the component even if it is the
- last component in the path, because, if 'next_component'
- returns NULL, @parent will remain the same and
- @resolved_till will not be sent back
- */
- strcat (resolved_till, "/");
- strcat (resolved_till, component);
-
- next_component = strtok_r (NULL, "/", &strtokptr);
-
- if (next_component) {
- inode_unref (parent);
- parent = curr;
- curr = NULL;
- } else {
- /* will break */
- inode_unref (curr);
- }
-
- component = next_component;
- }
-
- if (resolved_till[0] == '\0') {
- strcat (resolved_till, "/");
- }
-
- free (pathdup);
-
- if (reslv) {
- *reslv = resolved_till;
- } else {
- FREE (resolved_till);
- }
-
-out:
- return parent;
-}
-
-static inline void
-libgf_client_update_resolved (const char *path, char *resolved)
-{
- int32_t pathlen = 0;
- char *tmp = NULL, *dest = NULL, *dname = NULL;
- char append_slash = 0;
-
- pathlen = strlen (resolved);
- tmp = (char *)(resolved + pathlen);
- if (*((char *) (resolved + pathlen - 1)) != '/') {
- tmp[0] = '/';
- append_slash = 1;
- }
-
- if (append_slash) {
- dest = tmp + 1;
- } else {
- dest = tmp;
- }
-
- if (*((char *) path + pathlen) == '/') {
- dname = (char *) path + pathlen + 1;
- } else {
- dname = (char *) path + pathlen;
- }
-
- strcpy_till (dest, dname, '/');
-}
-
-/* __do_path_resolve - resolve @loc->path into @loc->inode and @loc->parent. also
- * update the dentry cache
- *
- * @loc - loc to resolve.
- * @ctx - libglusterfsclient context
- * @lookup_basename - flag whether to lookup basename(loc->path)
- *
- * return - 0 on success
- * -1 on failure
- *
- */
-static int32_t
-__do_path_resolve (loc_t *loc, libglusterfs_client_ctx_t *ctx,
- char lookup_basename)
-{
- int32_t op_ret = -1;
- char *resolved = NULL;
- inode_t *parent = NULL, *inode = NULL;
- dentry_t *dentry = NULL;
- loc_t new_loc = {0, };
- char *pathname = NULL, *directory = NULL;
- char *file = NULL;
-
- parent = loc->parent;
- if (parent) {
- inode_ref (parent);
- gf_log ("libglusterfsclient-dentry", GF_LOG_DEBUG,
- "loc->parent(%"PRId64") already present. sending "
- "lookup for %"PRId64"/%s", parent->ino, parent->ino,
- loc->path);
- resolved = strdup (loc->path);
- resolved = dirname (resolved);
- } else {
- parent = __libgf_client_path_to_parenti (ctx, ctx->itable,
- loc->path, &resolved);
- }
-
- if (parent == NULL) {
- /* fire in the bush.. run! run!! run!!! */
- gf_log ("libglusterfsclient-dentry",
- GF_LOG_CRITICAL,
- "failed to get parent inode number");
- op_ret = -1;
- goto out;
- }
-
- gf_log ("libglusterfsclient-dentry",
- GF_LOG_DEBUG,
- "resolved path(%s) till %"PRId64"(%s). "
- "sending lookup for remaining path",
- loc->path, parent->ino, resolved);
-
- pathname = strdup (loc->path);
- directory = dirname (pathname);
- pathname = NULL;
-
- while (strcmp (resolved, directory) != 0)
- {
- dentry = NULL;
-
- LIBGLUSTERFS_CLIENT_DENTRY_LOC_PREPARE (new_loc, loc, parent,
- resolved);
-
- if (pathname) {
- free (pathname);
- pathname = NULL;
- }
-
- pathname = strdup (new_loc.path);
- file = basename (pathname);
-
- new_loc.inode = inode_search (ctx->itable, parent->ino, file);
- if (new_loc.inode) {
- if (libgf_is_iattr_cache_valid (ctx, new_loc.inode,
- NULL,
- LIBGF_VALIDATE_LOOKUP))
- dentry = dentry_search_for_inode (new_loc.inode,
- parent->ino,
- file);
- }
-
- if (dentry == NULL) {
- op_ret = libgf_client_lookup (ctx, &new_loc, NULL, NULL,
- 0);
- if (op_ret == -1) {
- inode_ref (new_loc.parent);
- libgf_client_loc_wipe (&new_loc);
- goto out;
- }
- }
-
- parent = inode_ref (new_loc.inode);
- libgf_client_loc_wipe (&new_loc);
-
- libgf_client_update_resolved (loc->path, resolved);
- }
-
- if (pathname) {
- free (pathname);
- pathname = NULL;
- }
-
- if (lookup_basename) {
- pathname = strdup (loc->path);
- file = basename (pathname);
-
- inode = inode_search (ctx->itable, parent->ino, file);
- if (!inode) {
- libgf_client_loc_fill (&new_loc, ctx, 0, parent->ino,
- file);
-
- op_ret = libgf_client_lookup (ctx, &new_loc, NULL, NULL,
- 0);
- if (op_ret == -1) {
- libgf_client_loc_wipe (&new_loc);
- goto out;
- }
-
- inode = inode_ref (new_loc.inode);
- libgf_client_loc_wipe (&new_loc);
- }
- }
-
- op_ret = 0;
-out:
- loc->inode = inode;
- loc->parent = parent;
-
- FREE (resolved);
- if (pathname) {
- FREE (pathname);
- }
-
- if (directory) {
- FREE (directory);
- }
-
- return op_ret;
-}
-
-
-/* resolves loc->path to loc->parent and loc->inode */
-int32_t
-libgf_client_path_lookup (loc_t *loc,
- libglusterfs_client_ctx_t *ctx,
- char lookup_basename)
-{
- char *pathname = NULL;
- char *directory = NULL;
- inode_t *inode = NULL;
- inode_t *parent = NULL;
- int32_t op_ret = 0;
-
- pathname = strdup (loc->path);
- directory = dirname (pathname);
- parent = inode_from_path (ctx->itable, directory);
-
- if (parent != NULL) {
- loc->parent = parent;
-
- if (!lookup_basename) {
- gf_log ("libglusterfsclient",
- GF_LOG_DEBUG,
- "resolved dirname(%s) to %"PRId64,
- loc->path, parent->ino);
- goto out;
- } else {
- inode = inode_from_path (ctx->itable, loc->path);
- if (inode != NULL) {
- gf_log ("libglusterfsclient",
- GF_LOG_DEBUG,
- "resolved path(%s) to %"PRId64"/%"PRId64,
- loc->path, parent->ino, inode->ino);
- loc->inode = inode;
- goto out;
- }
- }
- }
-
- if (parent) {
- inode_unref (parent);
- } else if (inode) {
- inode_unref (inode);
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "undesired behaviour. inode(%"PRId64") for %s "
- "exists without parent (%s)",
- inode->ino, loc->path, directory);
- }
- op_ret = __do_path_resolve (loc, ctx, lookup_basename);
-out:
- if (pathname)
- free (pathname);
-
- return op_ret;
-}
diff --git a/libglusterfsclient/src/libglusterfsclient-internals.h b/libglusterfsclient/src/libglusterfsclient-internals.h
deleted file mode 100755
index 7b62ce8efbf..00000000000
--- a/libglusterfsclient/src/libglusterfsclient-internals.h
+++ /dev/null
@@ -1,289 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef __LIBGLUSTERFSCLIENT_INTERNALS_H
-#define __LIBGLUSTERFSCLIENT_INTERNALS_H
-
-#include <glusterfs.h>
-#include <logging.h>
-#include <inode.h>
-#include <pthread.h>
-#include <stack.h>
-#include <list.h>
-#include <signal.h>
-#include <call-stub.h>
-#include <sys/time.h>
-#include <sys/resource.h>
-#include <fd.h>
-#include <dirent.h>
-
-#define LIBGF_IOBUF_SIZE (128 *GF_UNIT_KB)
-typedef void (*sighandler_t) (int);
-typedef struct list_head list_head_t;
-
-typedef struct libglusterfs_client_ctx {
- glusterfs_ctx_t gf_ctx;
- inode_table_t *itable;
- pthread_t reply_thread;
- call_pool_t pool;
- uint32_t counter;
- time_t lookup_timeout;
- time_t stat_timeout;
- /* We generate a fake fsid for the subvolume being
- * accessed through this context.
- */
- dev_t fake_fsid;
- pid_t pid;
-}libglusterfs_client_ctx_t;
-
-typedef struct signal_handler {
- int signo;
- sighandler_t handler;
- list_head_t next;
-}libgf_client_signal_handler_t ;
-
-typedef struct {
- pthread_mutex_t lock;
- pthread_cond_t reply_cond;
- call_stub_t *reply_stub;
- char complete;
- union {
- struct {
- char is_revalidate;
- loc_t *loc;
- int32_t size;
- } lookup;
- }fop;
- fd_t *fd; /* Needed here because we need a ref to the dir
- fd in the libgf_client_readdir_cbk in order
- to process the dirents received, without
- having them added to the reply stub.
- Also used in updating iattr cache. See
- readv_cbk for eg.
- */
-}libgf_client_local_t;
-
-typedef struct {
- pthread_cond_t init_con_established;
- pthread_mutex_t lock;
- char complete;
-}libglusterfs_client_private_t;
-
-typedef struct {
- pthread_mutex_t lock;
- uint32_t previous_lookup_time;
- uint32_t previous_stat_time;
- struct iatt stbuf;
-} libglusterfs_client_inode_ctx_t;
-
-/* Our dirent cache is very simplistic when it comes to directory
- * reading workloads. It assumes that all directory traversal operations happen
- * sequentially and that readdir callers dont go jumping around the directory
- * using seekdir, rewinddir. Thats why you'll notice that seekdir, rewinddir
- * API in libglusterfsclient only set the offset. The consequence is that when
- * libgf_dcache_readdir finds that the offset presented to it, is not
- * the same as the offset of the previous dirent returned by dcache (..stored
- * in struct direntcache->prev_off..), it realises that a non-sequential
- * directory read is in progress and returns 0 to signify that the cache is
- * not valid.
- * This could be made a bit more intelligent by using a data structure like
- * a hash-table or a balanced binary tree that allows us to search for the
- * existence of particular offsets in the cache without performing a list or
- * array traversal.
- * Dont use a simple binary search tree because
- * there is no guarantee that offsets in a sequential reading of the directory
- * will be just random integers. If for some reason they are sequential, a BST
- * will end up becoming a list.
- */
-struct direntcache {
- gf_dirent_t entries; /* Head of list of cached dirents. */
- gf_dirent_t *next; /* Pointer to the next entry that
- * should be sent by readdir */
- uint64_t prev_off; /* Offset where the next read will
- * happen.
- */
-};
-
-typedef struct {
- pthread_mutex_t lock;
- off_t offset;
- libglusterfs_client_ctx_t *ctx;
- /* `man readdir` says readdir is non-re-entrant
- * only if two readdirs are racing on the same
- * handle.
- */
- struct dirent dirp;
- struct direntcache *dcache;
- char vpath[PATH_MAX];
-} libglusterfs_client_fd_ctx_t;
-
-typedef struct libglusterfs_client_async_local {
- void *cbk_data;
- union {
- struct {
- fd_t *fd;
- glusterfs_readv_cbk_t cbk;
- char update_offset;
- }readv_cbk;
-
- struct {
- fd_t *fd;
- glusterfs_write_cbk_t cbk;
- }write_cbk;
-
- struct {
- fd_t *fd;
- }close_cbk;
-
- struct {
- void *buf;
- size_t size;
- loc_t *loc;
- char is_revalidate;
- glusterfs_get_cbk_t cbk;
- }lookup_cbk;
- }fop;
-}libglusterfs_client_async_local_t;
-
-#define LIBGF_STACK_WIND_AND_WAIT(frame, rfn, obj, fn, params ...) \
- do { \
- STACK_WIND (frame, rfn, obj, fn, params); \
- pthread_mutex_lock (&local->lock); \
- { \
- while (!local->complete) { \
- pthread_cond_wait (&local->reply_cond, \
- &local->lock); \
- } \
- } \
- pthread_mutex_unlock (&local->lock); \
- } while (0)
-
-
-#define LIBGF_CLIENT_SIGNAL(signal_handler_list, signo, handler) \
- do { \
- libgf_client_signal_handler_t *libgf_handler = CALLOC (1, \
- sizeof (*libgf_handler)); \
- ERR_ABORT (libgf_handler); \
- libgf_handler->signo = signo; \
- libgf_handler->handler = signal (signo, handler); \
- list_add (&libgf_handler->next, signal_handler_list); \
- } while (0)
-
-#define LIBGF_INSTALL_SIGNAL_HANDLERS(signal_handlers) \
- do { \
- INIT_LIST_HEAD (&signal_handlers); \
- /* Handle SIGABORT and SIGSEGV */ \
- LIBGF_CLIENT_SIGNAL (&signal_handlers, SIGSEGV, gf_print_trace); \
- LIBGF_CLIENT_SIGNAL (&signal_handlers, SIGABRT, gf_print_trace); \
- LIBGF_CLIENT_SIGNAL (&signal_handlers, SIGHUP, gf_log_logrotate); \
- /* LIBGF_CLIENT_SIGNAL (SIGTERM, glusterfs_cleanup_and_exit); */ \
- } while (0)
-
-#define LIBGF_RESTORE_SIGNAL_HANDLERS(local) \
- do { \
- libgf_client_signal_handler_t *ptr = NULL, *tmp = NULL; \
- list_for_each_entry_safe (ptr, tmp, &local->signal_handlers,\
- next) { \
- signal (ptr->signo, ptr->handler); \
- FREE (ptr); \
- } \
- } while (0)
-
-#define LIBGF_CLIENT_FOP_ASYNC(ctx, local, ret_fn, op, args ...) \
- do { \
- call_frame_t *frame = get_call_frame_for_req (ctx, 1); \
- xlator_t *xl = frame->this->children ? \
- frame->this->children->xlator : NULL; \
- frame->root->state = ctx; \
- frame->local = local; \
- STACK_WIND (frame, ret_fn, xl, xl->fops->op, args); \
- } while (0)
-
-#define LIBGF_CLIENT_FOP(ctx, stub, op, local, args ...) \
- do { \
- call_frame_t *frame = get_call_frame_for_req (ctx, 1); \
- xlator_t *xl = frame->this->children ? \
- frame->this->children->xlator : NULL; \
- if (!local) { \
- local = CALLOC (1, sizeof (*local)); \
- } \
- ERR_ABORT (local); \
- frame->local = local; \
- frame->root->state = ctx; \
- pthread_cond_init (&local->reply_cond, NULL); \
- pthread_mutex_init (&local->lock, NULL); \
- LIBGF_STACK_WIND_AND_WAIT (frame, libgf_client_##op##_cbk, xl, \
- xl->fops->op, args); \
- stub = local->reply_stub; \
- FREE (frame->local); \
- frame->local = NULL; \
- STACK_DESTROY (frame->root); \
- } while (0)
-
-#define LIBGF_REPLY_NOTIFY(local) \
- do { \
- pthread_mutex_lock (&local->lock); \
- { \
- local->complete = 1; \
- pthread_cond_broadcast (&local->reply_cond); \
- } \
- pthread_mutex_unlock (&local->lock); \
- } while (0)
-
-
-void
-libgf_client_loc_wipe (loc_t *loc);
-
-int32_t
-libgf_client_loc_fill (loc_t *loc,
- libglusterfs_client_ctx_t *ctx,
- ino_t ino,
- ino_t par,
- const char *name);
-
-int32_t
-libgf_client_path_lookup (loc_t *loc,
- libglusterfs_client_ctx_t *ctx,
- char lookup_basename);
-
-int32_t
-libgf_client_lookup (libglusterfs_client_ctx_t *ctx,
- loc_t *loc,
- struct iatt *stbuf,
- dict_t **dict,
- dict_t *xattr_req);
-
-/* We're not expecting more than 10-15
- * VMPs per process so a list is acceptable.
- */
-struct vmp_entry {
- struct list_head list;
- char * vmp;
- int vmplen;
- glusterfs_handle_t handle;
-};
-
-#define LIBGF_UPDATE_LOOKUP 0x1
-#define LIBGF_UPDATE_STAT 0x2
-#define LIBGF_UPDATE_ALL (LIBGF_UPDATE_LOOKUP | LIBGF_UPDATE_STAT)
-
-#define LIBGF_VALIDATE_LOOKUP 0x1
-#define LIBGF_VALIDATE_STAT 0x2
-
-#define LIBGF_INVALIDATE_LOOKUP 0x1
-#define LIBGF_INVALIDATE_STAT 0x2
-int
-libgf_is_iattr_cache_valid (libglusterfs_client_ctx_t *ctx, inode_t *inode,
- struct iatt *sbuf, int flags);
-
-int
-libgf_update_iattr_cache (inode_t *inode, int flags, struct iatt *buf);
-
-#endif
diff --git a/libglusterfsclient/src/libglusterfsclient.c b/libglusterfsclient/src/libglusterfsclient.c
deleted file mode 100755
index aaff49416a9..00000000000
--- a/libglusterfsclient/src/libglusterfsclient.c
+++ /dev/null
@@ -1,8160 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _GNU_SOURCE
-#define _GNU_SOURCE
-#endif
-
-#include <stdio.h>
-#include <errno.h>
-#include <libgen.h>
-#include <stddef.h>
-
-#include <sys/time.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#ifdef GF_SOLARIS_HOST_OS
-#include <sys/statfs.h>
-#endif
-#include <unistd.h>
-#include <xlator.h>
-#include <timer.h>
-#include "defaults.h"
-#include <time.h>
-#include <poll.h>
-#include "transport.h"
-#include "event.h"
-#include "libglusterfsclient.h"
-#include "libglusterfsclient-internals.h"
-#include "compat.h"
-#include "compat-errno.h"
-#ifndef GF_SOLARIS_HOST_OS
-#include <sys/vfs.h>
-#endif
-#include <utime.h>
-#include <sys/param.h>
-#include <list.h>
-#include <stdarg.h>
-#include <sys/statvfs.h>
-#include "hashfn.h"
-#include <sys/select.h>
-
-#define LIBGF_XL_NAME "libglusterfsclient"
-#define LIBGLUSTERFS_INODE_TABLE_LRU_LIMIT 1000 //14057
-#define LIBGF_SENDFILE_BLOCK_SIZE 4096
-#define LIBGF_READDIR_BLOCK 4096
-#define libgf_path_absolute(path) ((path)[0] == '/')
-
-static inline xlator_t *
-libglusterfs_graph (xlator_t *graph);
-int32_t libgf_client_readlink (libglusterfs_client_ctx_t *ctx, loc_t *loc,
- char *buf, size_t bufsize);
-
-int
-libgf_realpath_loc_fill (libglusterfs_client_ctx_t *ctx, char *link,
- loc_t *targetloc);
-static int first_init = 1;
-
-/* The global list of virtual mount points */
-struct {
- struct list_head list;
- int entries;
-}vmplist;
-
-
-/* Protects the VMP list above. */
-pthread_mutex_t vmplock = PTHREAD_MUTEX_INITIALIZER;
-
-/* Ensures only one thread is ever calling glusterfs_mount.
- * Since that function internally calls routines which
- * use the yacc parser code using global vars, this process
- * needs to be syncronised.
- */
-pthread_mutex_t mountlock = PTHREAD_MUTEX_INITIALIZER;
-
-static char cwd[PATH_MAX];
-static char cwd_inited = 0;
-static pthread_mutex_t cwdlock = PTHREAD_MUTEX_INITIALIZER;
-
-char *
-libgf_vmp_virtual_path (struct vmp_entry *entry, const char *path, char *vpath)
-{
- char *tmp = NULL;
-
- tmp = ((char *)(path + (entry->vmplen-1)));
- if (strlen (tmp) > 0) {
- if (tmp[0] != '/') {
- vpath[0] = '/';
- vpath[1] = '\0';
- strcat (&vpath[1], tmp);
- } else
- strcpy (vpath, tmp);
- } else {
- vpath[0] = '/';
- vpath[1] = '\0';
- }
-
- return vpath;
-}
-
-char *
-zr_build_process_uuid ()
-{
- char tmp_str[1024] = {0,};
- char hostname[256] = {0,};
- struct timeval tv = {0,};
- struct tm now = {0, };
- char now_str[32];
-
- if (-1 == gettimeofday(&tv, NULL)) {
- gf_log ("", GF_LOG_ERROR,
- "gettimeofday: failed %s",
- strerror (errno));
- }
-
- if (-1 == gethostname (hostname, 256)) {
- gf_log ("", GF_LOG_ERROR,
- "gethostname: failed %s",
- strerror (errno));
- }
-
- localtime_r (&tv.tv_sec, &now);
- strftime (now_str, 32, "%Y/%m/%d-%H:%M:%S", &now);
- snprintf (tmp_str, 1024, "%s-%d-%s:%ld",
- hostname, getpid(), now_str, tv.tv_usec);
-
- return strdup (tmp_str);
-}
-
-
-int32_t
-libgf_client_forget (xlator_t *this,
- inode_t *inode)
-{
- uint64_t ptr = 0;
- libglusterfs_client_inode_ctx_t *ctx = NULL;
-
- inode_ctx_del (inode, this, &ptr);
- ctx = (libglusterfs_client_inode_ctx_t *)(long) ptr;
-
- FREE (ctx);
-
- return 0;
-}
-
-xlator_t *
-libgf_inode_to_xlator (inode_t *inode)
-{
- if (!inode)
- return NULL;
-
- if (!inode->table)
- return NULL;
-
- if (!inode->table->xl)
- return NULL;
-
- if (!inode->table->xl->ctx)
- return NULL;
-
- return inode->table->xl->ctx->top;
-}
-
-libglusterfs_client_fd_ctx_t *
-libgf_get_fd_ctx (fd_t *fd)
-{
- uint64_t ctxaddr = 0;
- libglusterfs_client_fd_ctx_t *ctx = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, fd, out);
-
- if (fd_ctx_get (fd, libgf_inode_to_xlator (fd->inode), &ctxaddr) == -1)
- goto out;
-
- ctx = (libglusterfs_client_fd_ctx_t *)(long)ctxaddr;
-
-out:
- return ctx;
-}
-
-libglusterfs_client_fd_ctx_t *
-libgf_alloc_fd_ctx (libglusterfs_client_ctx_t *ctx, fd_t *fd, char *vpath)
-{
- libglusterfs_client_fd_ctx_t *fdctx = NULL;
- uint64_t ctxaddr = 0;
-
- fdctx = CALLOC (1, sizeof (*fdctx));
- if (fdctx == NULL) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR,
- "memory allocation failure");
- fdctx = NULL;
- goto out;
- }
-
- pthread_mutex_init (&fdctx->lock, NULL);
- fdctx->ctx = ctx;
- ctxaddr = (uint64_t) (long)fdctx;
-
- if (fd->inode) {
- if (IA_ISDIR (fd->inode->ia_type)) {
- fdctx->dcache = CALLOC (1, sizeof (struct direntcache));
- if (fdctx->dcache)
- INIT_LIST_HEAD (&fdctx->dcache->entries.list);
- /* If the calloc fails, we can still continue
- * working as the dcache is not required for correct
- * operation.
- */
- }
- }
-
- if (vpath != NULL) {
- strcpy (fdctx->vpath, vpath);
- if (vpath[strlen(vpath) - 1] != '/') {
- strcat (fdctx->vpath, "/");
- }
- }
-
- fd_ctx_set (fd, libgf_inode_to_xlator (fd->inode), ctxaddr);
-out:
- return fdctx;
-}
-
-libglusterfs_client_fd_ctx_t *
-libgf_del_fd_ctx (fd_t *fd)
-{
- uint64_t ctxaddr = 0;
- libglusterfs_client_fd_ctx_t *ctx = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, fd, out);
-
- if (fd_ctx_del (fd, libgf_inode_to_xlator (fd->inode) , &ctxaddr) == -1)
- goto out;
-
- ctx = (libglusterfs_client_fd_ctx_t *)(long)ctxaddr;
-
-out:
- return ctx;
-}
-
-void
-libgf_dcache_invalidate (fd_t *fd)
-{
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
-
- if (!fd)
- return;
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No fd context present");
- errno = EBADF;
- return;
- }
-
- if (!fd_ctx->dcache) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No dcache present");
- return;
- }
-
- if (!list_empty (&fd_ctx->dcache->entries.list))
- gf_dirent_free (&fd_ctx->dcache->entries);
-
- INIT_LIST_HEAD (&fd_ctx->dcache->entries.list);
-
- fd_ctx->dcache->next = NULL;
- fd_ctx->dcache->prev_off = 0;
-
- return;
-}
-
-/* The first entry in the entries is always a placeholder
- * or the list head. The real entries begin from entries->next.
- */
-int
-libgf_dcache_update (libglusterfs_client_ctx_t *ctx, fd_t *fd,
- gf_dirent_t *entries)
-{
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- int op_ret = -1;
-
- if ((!ctx) || (!fd) || (!entries)) {
- errno = EINVAL;
- goto out;
- }
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No fd context present");
- errno = EBADF;
- goto out;
- }
-
- /* dcache is not enabled. */
- if (!fd_ctx->dcache) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No dcache present");
- op_ret = 0;
- goto out;
- }
-
- /* If we're updating, we must begin with invalidating any previous
- * entries.
- */
- libgf_dcache_invalidate (fd);
-
- fd_ctx->dcache->next = entries->next;
- /* We still need to store a pointer to the head
- * so we start free'ing from the head when invalidation
- * is required.
- *
- * Need to delink the entries from the list
- * given to us by an underlying translators. Most translators will
- * free this list after this call so we must preserve the dirents in
- * order to cache them.
- */
- list_splice_init (&entries->list, &fd_ctx->dcache->entries.list);
- op_ret = 0;
-out:
- return op_ret;
-}
-
-int
-libgf_dcache_readdir (libglusterfs_client_ctx_t *ctx, fd_t *fd,
- struct dirent *dirp, off_t *offset)
-{
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- int cachevalid = 0;
-
- if ((!ctx) || (!fd) || (!dirp) || (!offset))
- return 0;
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No fd context present");
- errno = EBADF;
- goto out;
- }
-
- if (!fd_ctx->dcache) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No dcache present");
- goto out;
- }
-
- /* We've either run out of entries in the cache
- * or the cache is empty.
- */
- if (!fd_ctx->dcache->next) {
- gf_log (LIBGF_XL_NAME, GF_LOG_TRACE, "No entries present");
- goto out;
- }
-
- /* The dirent list is created as a circular linked list
- * so this check is needed to ensure, we dont start
- * reading old entries again.
- * If we're reached this situation, the cache is exhausted
- * and we'll need to pre-fetch more entries to continue serving.
- */
- if (fd_ctx->dcache->next == &fd_ctx->dcache->entries) {
- gf_log (LIBGF_XL_NAME, GF_LOG_TRACE, "Entries exhausted");
- goto out;
- }
-
- /* During sequential reading we generally expect that the offset
- * requested is the same as the offset we served in the previous call
- * to readdir. But, seekdir, rewinddir and libgf_dcache_invalidate
- * require special handling because seekdir/rewinddir change the offset
- * in the fd_ctx and libgf_dcache_invalidate changes the prev_off.
- */
- if (*offset != fd_ctx->dcache->prev_off) {
- /* For all cases of the if branch above, we know that the
- * cache is now invalid except for the case below. It handles
- * the case where the two offset values above are different
- * but different because the previous readdir block was
- * exhausted, resulting in a prev_off being set to 0 in
- * libgf_dcache_invalidate, while the requested offset is non
- * zero because that is what we returned for the last dirent
- * of the previous readdir block.
- */
- if ((*offset != 0) && (fd_ctx->dcache->prev_off == 0)) {
- gf_log (LIBGF_XL_NAME, GF_LOG_TRACE, "Entries"
- " exhausted");
- cachevalid = 1;
- } else
- gf_log (LIBGF_XL_NAME, GF_LOG_TRACE, "Dcache"
- " invalidated previously");
- } else
- cachevalid = 1;
-
- if (!cachevalid)
- goto out;
-
- dirp->d_ino = fd_ctx->dcache->next->d_ino;
- strncpy (dirp->d_name, fd_ctx->dcache->next->d_name,
- fd_ctx->dcache->next->d_len);
-
- *offset = fd_ctx->dcache->next->d_off;
- dirp->d_off = *offset;
- fd_ctx->dcache->prev_off = fd_ctx->dcache->next->d_off;
- fd_ctx->dcache->next = fd_ctx->dcache->next->next;
-
-out:
- return cachevalid;
-}
-
-
-int32_t
-libgf_client_release (xlator_t *this,
- fd_t *fd)
-{
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- fd_ctx = libgf_get_fd_ctx (fd);
- if (IA_ISDIR (fd->inode->ia_type)) {
- libgf_dcache_invalidate (fd);
- FREE (fd_ctx->dcache);
- }
-
- libgf_del_fd_ctx (fd);
- if (fd_ctx != NULL) {
- pthread_mutex_destroy (&fd_ctx->lock);
- FREE (fd_ctx);
- }
-
- return 0;
-}
-
-libglusterfs_client_inode_ctx_t *
-libgf_get_inode_ctx (inode_t *inode)
-{
- uint64_t ctxaddr = 0;
- libglusterfs_client_inode_ctx_t *ictx = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, inode, out);
- if (inode_ctx_get (inode, libgf_inode_to_xlator (inode), &ctxaddr) < 0)
- goto out;
-
- ictx = (libglusterfs_client_inode_ctx_t *)(long)ctxaddr;
-
-out:
- return ictx;
-}
-
-libglusterfs_client_inode_ctx_t *
-libgf_del_inode_ctx (inode_t *inode)
-{
- uint64_t ctxaddr = 0;
- libglusterfs_client_inode_ctx_t *ictx = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, inode, out);
- if (inode_ctx_del (inode, libgf_inode_to_xlator (inode), &ctxaddr) < 0)
- goto out;
-
- ictx = (libglusterfs_client_inode_ctx_t *)(long)ctxaddr;
-
-out:
- return ictx;
-}
-
-libglusterfs_client_inode_ctx_t *
-libgf_alloc_inode_ctx (libglusterfs_client_ctx_t *ctx, inode_t *inode)
-{
- uint64_t ctxaddr = 0;
- libglusterfs_client_inode_ctx_t *ictx = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, inode, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- ictx = CALLOC (1, sizeof (*ictx));
- if (ictx == NULL) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR,
- "memory allocation failure");
- goto out;
- }
-
- pthread_mutex_init (&ictx->lock, NULL);
- ctxaddr = (uint64_t) (long)ictx;
- if (inode_ctx_put (inode, libgf_inode_to_xlator (inode), ctxaddr) < 0){
- FREE (ictx);
- ictx = NULL;
- }
-
-out:
- return ictx;
-}
-
-int
-libgf_transform_iattr (libglusterfs_client_ctx_t *libctx, inode_t *inode,
- struct iatt *buf)
-{
-
- if ((!libctx) || (!buf) || (!inode))
- return -1;
-
- buf->ia_dev = libctx->fake_fsid;
- /* If the inode is root, the inode number must be 1 not the
- * ino received from the file system.
- */
- if ((inode->ino == 1) && (buf))
- buf->ia_ino = 1;
-
- return 0;
-}
-
-int
-libgf_update_iattr_cache (inode_t *inode, int flags, struct iatt *buf)
-{
- libglusterfs_client_inode_ctx_t *inode_ctx = NULL;
- time_t current = 0;
- int op_ret = -1;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, inode, out);
-
- inode_ctx = libgf_get_inode_ctx (inode);
- if (!inode_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No inode context"
- " present");
- errno = EINVAL;
- op_ret = -1;
- goto out;
- }
-
- pthread_mutex_lock (&inode_ctx->lock);
- {
- /* Take a timestamp only after we've acquired the
- * lock.
- */
- current = time (NULL);
- if (flags & LIBGF_UPDATE_LOOKUP) {
- gf_log (LIBGF_XL_NAME, GF_LOG_TRACE, "Updating lookup");
- inode_ctx->previous_lookup_time = current;
- }
-
- if (flags & LIBGF_UPDATE_STAT) {
- gf_log (LIBGF_XL_NAME, GF_LOG_TRACE, "Updating stat");
-
- /* Update the cached stat struct only if a new
- * stat buf is given.
- */
- if (buf != NULL) {
- inode_ctx->previous_stat_time = current;
- memcpy (&inode_ctx->stbuf, buf,
- sizeof (inode_ctx->stbuf));
- }
- }
- }
- pthread_mutex_unlock (&inode_ctx->lock);
- op_ret = 0;
-
-out:
- return op_ret;
-}
-
-
-int
-libgf_invalidate_iattr_cache (inode_t *inode, int flags)
-{
- libglusterfs_client_inode_ctx_t *ictx = NULL;
-
- if (!inode)
- return -1;
-
- ictx = libgf_get_inode_ctx (inode);
- if (!ictx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No inode context"
- " present");
- return -1;
- }
-
- pthread_mutex_lock (&ictx->lock);
- {
- if (flags & LIBGF_INVALIDATE_LOOKUP) {
- gf_log (LIBGF_XL_NAME, GF_LOG_TRACE, "Invalidating"
- " lookup");
- ictx->previous_lookup_time = 0;
- }
-
- if (flags & LIBGF_INVALIDATE_STAT) {
- gf_log (LIBGF_XL_NAME, GF_LOG_TRACE, "Invalidating"
- " stat");
- ictx->previous_stat_time = 0;
- }
-
- }
- pthread_mutex_unlock (&ictx->lock);
-
- return 0;
-}
-
-
-int
-libgf_is_iattr_cache_valid (libglusterfs_client_ctx_t *ctx, inode_t *inode,
- struct iatt *sbuf, int flags)
-{
- time_t current = 0;
- time_t prev = 0;
- libglusterfs_client_inode_ctx_t *inode_ctx = NULL;
- int cache_valid = 0;
- time_t timeout = 0;
-
- if (inode == NULL)
- return 0;
-
- inode_ctx = libgf_get_inode_ctx (inode);
- if (!inode_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No inode context"
- " present\n");
- return 0;
- }
-
- pthread_mutex_lock (&inode_ctx->lock);
- {
- current = time (NULL);
- if (flags & LIBGF_VALIDATE_LOOKUP) {
- gf_log (LIBGF_XL_NAME, GF_LOG_TRACE, "Checking lookup");
- prev = inode_ctx->previous_lookup_time;
- timeout = ctx->lookup_timeout;
- } else {
- gf_log (LIBGF_XL_NAME, GF_LOG_TRACE, "Checking stat");
- prev = inode_ctx->previous_stat_time;
- timeout = ctx->stat_timeout;
- }
-
- /* Even if the timeout is set to -1 to cache
- * infinitely, fops like write must invalidate the
- * stat cache because writev_cbk cannot update
- * the cache using the stat returned to it. This is
- * because write-behind can return a stat bufs filled
- * with zeroes.
- */
- if (prev == 0) {
- cache_valid = 0;
- gf_log (LIBGF_XL_NAME, GF_LOG_TRACE, "Cache Invalid");
- goto iattr_unlock_out;
- }
-
- /* Cache infinitely */
- if (timeout == (time_t)-1) {
- cache_valid = 1;
- gf_log (LIBGF_XL_NAME, GF_LOG_TRACE, "Caching On and "
- "valid");
- goto iattr_unlock_out;
- }
-
- /* Disable caching completely */
- if (timeout == 0) {
- cache_valid = 0;
- gf_log (LIBGF_XL_NAME, GF_LOG_TRACE, "Cache disabled");
- goto iattr_unlock_out;
- }
-
- if ((prev > 0) && (timeout >= (current - prev))) {
- gf_log (LIBGF_XL_NAME, GF_LOG_TRACE, "Cache valid");
- cache_valid = 1;
- }
-
- if (flags & LIBGF_VALIDATE_LOOKUP)
- goto iattr_unlock_out;
-
- if ((cache_valid) && (sbuf))
- *sbuf = inode_ctx->stbuf;
- }
-iattr_unlock_out:
- pthread_mutex_unlock (&inode_ctx->lock);
-
- return cache_valid;
-}
-
-int32_t
-libgf_client_releasedir (xlator_t *this,
- fd_t *fd)
-{
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- fd_ctx = libgf_get_fd_ctx (fd);
- if (IA_ISDIR (fd->inode->ia_type)) {
- libgf_dcache_invalidate (fd);
- FREE (fd_ctx->dcache);
- }
-
- libgf_del_fd_ctx (fd);
- if (fd_ctx != NULL) {
- pthread_mutex_destroy (&fd_ctx->lock);
- FREE (fd_ctx);
- }
-
- return 0;
-}
-
-void *poll_proc (void *ptr)
-{
- glusterfs_ctx_t *ctx = ptr;
-
- event_dispatch (ctx->event_pool);
-
- return NULL;
-}
-
-
-int32_t
-xlator_graph_init (xlator_t *xl)
-{
- xlator_t *trav = xl;
- int32_t ret = -1;
-
- while (trav->prev)
- trav = trav->prev;
-
- while (trav) {
- if (!trav->ready) {
- ret = xlator_tree_init (trav);
- if (ret < 0)
- break;
- }
- trav = trav->next;
- }
-
- return ret;
-}
-
-
-void
-xlator_graph_fini (xlator_t *xl)
-{
- xlator_t *trav = xl;
- while (trav->prev)
- trav = trav->prev;
-
- while (trav) {
- if (!trav->init_succeeded) {
- break;
- }
-
- xlator_tree_fini (trav);
- trav = trav->next;
- }
-}
-
-/* Returns a pointer to the @n'th char matching
- * @c in string @str, starting the search from right or
- * end-of-string, rather than starting from left, as rindex
- * function does.
- */
-char *
-libgf_rrindex (char *str, int c, int n)
-{
- int len = 0;
- int occurrence = 0;
-
- if (str == NULL)
- return NULL;
-
- len = strlen (str);
- /* Point to last character of string. */
- str += (len - 1);
- while (len > 0) {
- if ((int)*str == c) {
- ++occurrence;
- if (occurrence == n)
- break;
- }
- --len;
- --str;
- }
-
- return str;
-}
-
-char *
-libgf_trim_to_prev_dir (char * path)
-{
- char *idx = NULL;
- int len = 0;
-
- if (!path)
- return NULL;
-
- /* Check if we're already at root, if yes
- * then there is no prev dir.
- */
- len = strlen (path);
- if (len == 1)
- return path;
-
- if (path[len - 1] == '/') {
- path[len - 1] = '\0';
- }
-
- idx = libgf_rrindex (path, '/', 1);
- /* Move to the char after the / */
- ++idx;
- *idx = '\0';
-
- return path;
-}
-
-
-char *
-libgf_prepend_cwd (const char *userpath, char *abspath, int size)
-{
- if ((!userpath) || (!abspath))
- return NULL;
-
- if (!getcwd (abspath, size))
- return NULL;
-
- strcat (abspath, "/");
- strcat (abspath, userpath);
-
- return abspath;
-}
-
-
-/* Performs a lightweight path resolution that only
- * looks for . and .. and replaces those with the
- * proper names.
- *
- * FIXME: This is a stop-gap measure till we have full
- * fledge path resolution going in here.
- * Function returns path strdup'ed so remember to FREE the
- * string as required.
- */
-char *
-libgf_resolve_path_light (char *path)
-{
- char *respath = NULL;
- char *saveptr = NULL;
- char *tok = NULL;
- int len = 0;
- int addslash = 0;
- char mypath[PATH_MAX];
-
- if (!path)
- goto out;
-
- memset (mypath, 0, PATH_MAX);
-
- if (!libgf_path_absolute (path))
- libgf_prepend_cwd (path, mypath, PATH_MAX);
- else
- strcpy (mypath, path);
-
- len = strlen (mypath);
- if (len == 0) {
- goto out;
- }
-
- respath = calloc (PATH_MAX, sizeof (char));
- if (respath == NULL) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR,"Memory allocation failed");
- goto out;
- }
-
- /* The path only contains a / or a //, so simply add a /
- * and return.
- * This needs special handling because the loop below does
- * not allow us to do so through strtok.
- */
- if (((mypath[0] == '/') && (len == 1))
- || (strcmp (mypath, "//") == 0)) {
- strcat (respath, "/");
- goto out;
- }
-
- tok = strtok_r (mypath, "/", &saveptr);
- addslash = 0;
- strcat (respath, "/");
- while (tok) {
- if (addslash) {
- if ((strcmp (tok, ".") != 0)
- && (strcmp (tok, "..") != 0)) {
- strcat (respath, "/");
- }
- }
-
- if ((strcmp (tok, ".") != 0) && (strcmp (tok, "..") != 0)) {
- strcat (respath, tok);
- addslash = 1;
- } else if ((strcmp (tok, "..") == 0)) {
- libgf_trim_to_prev_dir (respath);
- addslash = 0;
- }
-
- tok = strtok_r (NULL, "/", &saveptr);
- }
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Path: %s, Resolved Path: %s",
- path, respath);
-out:
- return respath;
-}
-
-void
-libgf_client_loc_wipe (loc_t *loc)
-{
- if (loc->path) {
- FREE (loc->path);
- }
-
- if (loc->parent) {
- inode_unref (loc->parent);
- loc->parent = NULL;
- }
-
- if (loc->inode) {
- inode_unref (loc->inode);
- loc->inode = NULL;
- }
-
- loc->path = loc->name = NULL;
- loc->ino = 0;
-}
-
-
-int32_t
-libgf_client_loc_fill (loc_t *loc,
- libglusterfs_client_ctx_t *ctx,
- ino_t ino,
- ino_t par,
- const char *name)
-{
- inode_t *inode = NULL, *parent = NULL;
- int32_t ret = -1;
- char *path = NULL;
-
- /* resistance against multiple invocation of loc_fill not to get
- reference leaks via inode_search() */
-
- inode = loc->inode;
-
- if (!inode) {
- if (ino)
- inode = inode_search (ctx->itable, ino, NULL);
-
- if (inode)
- goto inode_found;
-
- if (par && name)
- inode = inode_search (ctx->itable, par, name);
- }
-
-inode_found:
- if (inode) {
- loc->ino = inode->ino;
- loc->inode = inode;
- }
-
- parent = loc->parent;
- if (!parent) {
- if (inode)
- parent = inode_parent (inode, par, name);
- else
- parent = inode_search (ctx->itable, par, NULL);
- loc->parent = parent;
- }
-
- if (!loc->path) {
- if (name && parent) {
- ret = inode_path (parent, name, &path);
- if (ret <= 0) {
- gf_log ("glusterfs-fuse", GF_LOG_ERROR,
- "inode_path failed for %"PRId64"/%s",
- parent->ino, name);
- goto fail;
- } else {
- loc->path = path;
- }
- } else if (inode) {
- ret = inode_path (inode, NULL, &path);
- if (ret <= 0) {
- gf_log ("glusterfs-fuse", GF_LOG_ERROR,
- "inode_path failed for %"PRId64,
- inode->ino);
- goto fail;
- } else {
- loc->path = path;
- }
- }
- }
-
- if (loc->path) {
- loc->name = strrchr (loc->path, '/');
- if (loc->name)
- loc->name++;
- else loc->name = "";
- }
-
- if ((ino != 1) &&
- (parent == NULL)) {
- gf_log ("fuse-bridge", GF_LOG_ERROR,
- "failed to search parent for %"PRId64"/%s (%"PRId64")",
- (ino_t)par, name, (ino_t)ino);
- ret = -1;
- goto fail;
- }
- ret = 0;
-fail:
- return ret;
-}
-
-
-static call_frame_t *
-get_call_frame_for_req (libglusterfs_client_ctx_t *ctx, char d)
-{
- call_pool_t *pool = ctx->gf_ctx.pool;
- xlator_t *this = ctx->gf_ctx.graph;
- call_frame_t *frame = NULL;
-
-
- frame = create_frame (this, pool);
-
- frame->root->uid = geteuid ();
- frame->root->gid = getegid ();
- frame->root->pid = ctx->pid;
- frame->root->unique = ctx->counter++;
-
- return frame;
-}
-
-void
-libgf_client_fini (xlator_t *this)
-{
- FREE (this->private);
- return;
-}
-
-
-int32_t
-libgf_client_notify (xlator_t *this,
- int32_t event,
- void *data,
- ...)
-{
- libglusterfs_client_private_t *priv = this->private;
-
- switch (event)
- {
- case GF_EVENT_CHILD_UP:
- pthread_mutex_lock (&priv->lock);
- {
- priv->complete = 1;
- pthread_cond_broadcast (&priv->init_con_established);
- }
- pthread_mutex_unlock (&priv->lock);
- break;
-
- default:
- default_notify (this, event, data);
- }
-
- return 0;
-}
-
-int32_t
-libgf_client_init (xlator_t *this)
-{
- return 0;
-}
-
-glusterfs_handle_t
-glusterfs_init (glusterfs_init_params_t *init_ctx, uint32_t fakefsid)
-{
- libglusterfs_client_ctx_t *ctx = NULL;
- libglusterfs_client_private_t *priv = NULL;
- FILE *specfp = NULL;
- xlator_t *graph = NULL, *trav = NULL;
- call_pool_t *pool = NULL;
- int32_t ret = 0;
- struct rlimit lim;
- uint32_t xl_count = 0;
- loc_t new_loc = {0, };
- struct timeval tv = {0, };
- uint32_t len = 0;
- char buf[PATH_MAX];
-
- if (!init_ctx || (!init_ctx->specfile && !init_ctx->specfp)) {
- errno = EINVAL;
- return NULL;
- }
-
- ctx = CALLOC (1, sizeof (*ctx));
- if (!ctx) {
- fprintf (stderr,
- "libglusterfsclient: %s:%s():%d: out of memory\n",
- __FILE__, __PRETTY_FUNCTION__, __LINE__);
-
- errno = ENOMEM;
- return NULL;
- }
-
- ctx->lookup_timeout = init_ctx->lookup_timeout;
- ctx->stat_timeout = init_ctx->stat_timeout;
- ctx->fake_fsid = fakefsid;
- ctx->pid = getpid ();
- pthread_mutex_init (&ctx->gf_ctx.lock, NULL);
-
- pool = ctx->gf_ctx.pool = CALLOC (1, sizeof (call_pool_t));
- if (!pool) {
- errno = ENOMEM;
- FREE (ctx);
- return NULL;
- }
-
- LOCK_INIT (&pool->lock);
- INIT_LIST_HEAD (&pool->all_frames);
-
- /* FIXME: why is count hardcoded to 16384 */
- ctx->gf_ctx.event_pool = event_pool_new (16384);
- ctx->gf_ctx.page_size = LIBGF_IOBUF_SIZE;
- ctx->gf_ctx.iobuf_pool = iobuf_pool_new (8 * 1048576,
- ctx->gf_ctx.page_size);
-
- lim.rlim_cur = RLIM_INFINITY;
- lim.rlim_max = RLIM_INFINITY;
- setrlimit (RLIMIT_CORE, &lim);
- setrlimit (RLIMIT_NOFILE, &lim);
-
- ctx->gf_ctx.cmd_args.log_level = GF_LOG_WARNING;
-
- if (init_ctx->logfile)
- ctx->gf_ctx.cmd_args.log_file = strdup (init_ctx->logfile);
- else
- ctx->gf_ctx.cmd_args.log_file = strdup ("/dev/stderr");
-
- if (init_ctx->loglevel) {
- if (!strncasecmp (init_ctx->loglevel, "DEBUG",
- strlen ("DEBUG"))) {
- ctx->gf_ctx.cmd_args.log_level = GF_LOG_DEBUG;
- } else if (!strncasecmp (init_ctx->loglevel, "WARNING",
- strlen ("WARNING"))) {
- ctx->gf_ctx.cmd_args.log_level = GF_LOG_WARNING;
- } else if (!strncasecmp (init_ctx->loglevel, "CRITICAL",
- strlen ("CRITICAL"))) {
- ctx->gf_ctx.cmd_args.log_level = GF_LOG_CRITICAL;
- } else if (!strncasecmp (init_ctx->loglevel, "NONE",
- strlen ("NONE"))) {
- ctx->gf_ctx.cmd_args.log_level = GF_LOG_NONE;
- } else if (!strncasecmp (init_ctx->loglevel, "ERROR",
- strlen ("ERROR"))) {
- ctx->gf_ctx.cmd_args.log_level = GF_LOG_ERROR;
- } else if (!strncasecmp (init_ctx->loglevel, "TRACE",
- strlen ("TRACE"))) {
- ctx->gf_ctx.cmd_args.log_level = GF_LOG_TRACE;
- } else {
- fprintf (stderr,
- "libglusterfsclient: %s:%s():%d: Unrecognized log-level \"%s\", possible values are \"DEBUG|WARNING|[ERROR]|CRITICAL|NONE|TRACE\"\n",
- __FILE__, __PRETTY_FUNCTION__, __LINE__,
- init_ctx->loglevel);
- FREE (ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.pool);
- FREE (ctx->gf_ctx.event_pool);
- FREE (ctx);
- errno = EINVAL;
- return NULL;
- }
- }
-
- if (first_init)
- {
- memset (buf, 0, PATH_MAX);
-
- if (getcwd (buf, PATH_MAX) == NULL) {
- fprintf (stderr, "libglusterfsclient: cannot get "
- "current working directory (%s)",
- strerror (errno));
- FREE (ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.pool);
- FREE (ctx->gf_ctx.event_pool);
- FREE (ctx);
- return NULL;
- }
-
- len = strlen (buf);
- if ((buf[len - 1] != '/')) {
- if ((len + 2) > PATH_MAX) {
- errno = ENAMETOOLONG;
- fprintf (stderr, "libglusterfsclient: cannot"
- "get current working directory (%s)",
- strerror (errno));
- FREE (ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.pool);
- FREE (ctx->gf_ctx.event_pool);
- FREE (ctx);
- return NULL;
- }
-
- strcat (buf, "/");
- }
-
- pthread_mutex_lock (&cwdlock);
- {
- strcpy (cwd, buf);
- cwd_inited = 1;
- }
- pthread_mutex_unlock (&cwdlock);
-
- ret = gf_log_init (ctx->gf_ctx.cmd_args.log_file);
- if (ret == -1) {
- fprintf (stderr,
- "libglusterfsclient: %s:%s():%d: failed to open logfile \"%s\"\n",
- __FILE__, __PRETTY_FUNCTION__, __LINE__,
- ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.pool);
- FREE (ctx->gf_ctx.event_pool);
- FREE (ctx);
- return NULL;
- }
-
- gf_log_set_loglevel (ctx->gf_ctx.cmd_args.log_level);
- }
-
- if (init_ctx->specfp) {
- specfp = init_ctx->specfp;
- if (fseek (specfp, 0L, SEEK_SET)) {
- fprintf (stderr,
- "libglusterfsclient: %s:%s():%d: fseek on volume file stream failed (%s)\n",
- __FILE__, __PRETTY_FUNCTION__, __LINE__,
- strerror (errno));
- FREE (ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.pool);
- FREE (ctx->gf_ctx.event_pool);
- FREE (ctx);
- return NULL;
- }
- } else if (init_ctx->specfile) {
- specfp = fopen (init_ctx->specfile, "r");
- ctx->gf_ctx.cmd_args.volume_file = strdup (init_ctx->specfile);
- }
-
- if (!specfp) {
- fprintf (stderr,
- "libglusterfsclient: %s:%s():%d: could not open volfile: %s\n",
- __FILE__, __PRETTY_FUNCTION__, __LINE__,
- strerror (errno));
- FREE (ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.cmd_args.volume_file);
- FREE (ctx->gf_ctx.pool);
- FREE (ctx->gf_ctx.event_pool);
- FREE (ctx);
- return NULL;
- }
-
- if (init_ctx->volume_name) {
- ctx->gf_ctx.cmd_args.volume_name = strdup (init_ctx->volume_name);
- }
-
- graph = file_to_xlator_tree (&ctx->gf_ctx, specfp);
- if (!graph) {
- fprintf (stderr,
- "libglusterfsclient: %s:%s():%d: cannot create configuration graph (%s)\n",
- __FILE__, __PRETTY_FUNCTION__, __LINE__,
- strerror (errno));
-
- FREE (ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.cmd_args.volume_file);
- FREE (ctx->gf_ctx.cmd_args.volume_name);
- FREE (ctx->gf_ctx.pool);
- FREE (ctx->gf_ctx.event_pool);
- FREE (ctx);
- return NULL;
- }
-
- if (init_ctx->volume_name) {
- trav = graph;
- while (trav) {
- if (strcmp (trav->name, init_ctx->volume_name) == 0) {
- graph = trav;
- break;
- }
- trav = trav->next;
- }
- }
-
- ctx->gf_ctx.graph = libglusterfs_graph (graph);
- if (!ctx->gf_ctx.graph) {
- fprintf (stderr,
- "libglusterfsclient: %s:%s():%d: graph creation failed (%s)\n",
- __FILE__, __PRETTY_FUNCTION__, __LINE__,
- strerror (errno));
-
- xlator_tree_free (graph);
- FREE (ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.cmd_args.volume_file);
- FREE (ctx->gf_ctx.cmd_args.volume_name);
- FREE (ctx->gf_ctx.pool);
- FREE (ctx->gf_ctx.event_pool);
- FREE (ctx);
- return NULL;
- }
- graph = ctx->gf_ctx.graph;
- ctx->gf_ctx.top = graph;
-
- trav = graph;
- while (trav) {
- xl_count++; /* Getting this value right is very important */
- trav = trav->next;
- }
-
- ctx->gf_ctx.xl_count = xl_count + 1;
-
- priv = CALLOC (1, sizeof (*priv));
- if (!priv) {
- fprintf (stderr,
- "libglusterfsclient: %s:%s():%d: cannot allocate memory (%s)\n",
- __FILE__, __PRETTY_FUNCTION__, __LINE__,
- strerror (errno));
-
- xlator_tree_free (graph);
- FREE (ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.cmd_args.volume_file);
- FREE (ctx->gf_ctx.cmd_args.volume_name);
- FREE (ctx->gf_ctx.pool);
- FREE (ctx->gf_ctx.event_pool);
- /* inode_table_destroy (ctx->itable); */
- FREE (ctx);
-
- return NULL;
- }
-
- pthread_cond_init (&priv->init_con_established, NULL);
- pthread_mutex_init (&priv->lock, NULL);
-
- graph->private = priv;
- ctx->itable = inode_table_new (LIBGLUSTERFS_INODE_TABLE_LRU_LIMIT,
- graph);
- if (!ctx->itable) {
- fprintf (stderr,
- "libglusterfsclient: %s:%s():%d: cannot create inode table\n",
- __FILE__, __PRETTY_FUNCTION__, __LINE__);
- xlator_tree_free (graph);
- FREE (ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.cmd_args.volume_file);
- FREE (ctx->gf_ctx.cmd_args.volume_name);
-
- FREE (ctx->gf_ctx.pool);
- FREE (ctx->gf_ctx.event_pool);
- xlator_tree_free (graph);
- /* TODO: destroy graph */
- /* inode_table_destroy (ctx->itable); */
- FREE (ctx);
-
- return NULL;
- }
-
- set_global_ctx_ptr (&ctx->gf_ctx);
- ctx->gf_ctx.process_uuid = zr_build_process_uuid ();
-
- if (xlator_graph_init (graph) == -1) {
- fprintf (stderr,
- "libglusterfsclient: %s:%s():%d: graph initialization failed\n",
- __FILE__, __PRETTY_FUNCTION__, __LINE__);
- xlator_tree_free (graph);
- FREE (ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.cmd_args.volume_file);
- FREE (ctx->gf_ctx.cmd_args.volume_name);
- FREE (ctx->gf_ctx.pool);
- FREE (ctx->gf_ctx.event_pool);
- /* TODO: destroy graph */
- /* inode_table_destroy (ctx->itable); */
- FREE (ctx);
- return NULL;
- }
-
- /* Send notify to all translator saying things are ready */
- graph->notify (graph, GF_EVENT_PARENT_UP, graph);
-
- if (gf_timer_registry_init (&ctx->gf_ctx) == NULL) {
- fprintf (stderr,
- "libglusterfsclient: %s:%s():%d: timer init failed (%s)\n",
- __FILE__, __PRETTY_FUNCTION__, __LINE__,
- strerror (errno));
-
- xlator_graph_fini (graph);
- xlator_tree_free (graph);
- FREE (ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.cmd_args.volume_file);
- FREE (ctx->gf_ctx.cmd_args.volume_name);
-
- FREE (ctx->gf_ctx.pool);
- FREE (ctx->gf_ctx.event_pool);
- /* TODO: destroy graph */
- /* inode_table_destroy (ctx->itable); */
- FREE (ctx);
- return NULL;
- }
-
- if ((ret = pthread_create (&ctx->reply_thread, NULL, poll_proc,
- (void *)&ctx->gf_ctx))) {
- fprintf (stderr,
- "libglusterfsclient: %s:%s():%d: reply thread creation failed\n",
- __FILE__, __PRETTY_FUNCTION__, __LINE__);
- xlator_graph_fini (graph);
- xlator_tree_free (graph);
- FREE (ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.cmd_args.volume_file);
- FREE (ctx->gf_ctx.cmd_args.volume_name);
-
- FREE (ctx->gf_ctx.pool);
- FREE (ctx->gf_ctx.event_pool);
- /* TODO: destroy graph */
- /* inode_table_destroy (ctx->itable); */
- FREE (ctx);
- return NULL;
- }
-
- pthread_mutex_lock (&priv->lock);
- {
- while (!priv->complete) {
- pthread_cond_wait (&priv->init_con_established,
- &priv->lock);
- }
- }
- pthread_mutex_unlock (&priv->lock);
-
- /*
- * wait for some time to allow initialization of all children of
- * distribute before sending lookup on '/'
- */
-
- tv.tv_sec = 0;
- tv.tv_usec = (100 * 1000);
- select (0, NULL, NULL, NULL, &tv);
-
- /* workaround for xlators like dht which require lookup to be sent
- * on / */
- libgf_client_loc_fill (&new_loc, ctx, 1, 0, "/");
- ret = libgf_client_lookup (ctx, &new_loc, NULL, NULL, NULL);
- if (ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR, "lookup of /"
- " failed");
- return NULL;
- }
- libgf_client_loc_wipe (&new_loc);
-
- first_init = 0;
-
- return ctx;
-}
-
-struct vmp_entry *
-libgf_init_vmpentry (char *vmp, glusterfs_handle_t *vmphandle)
-{
- struct vmp_entry *entry = NULL;
- int vmplen = 0;
- int appendslash = 0;
- int ret = -1;
-
- entry = CALLOC (1, sizeof (struct vmp_entry));
- if (!entry) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR,"Memory allocation failed");
- return NULL;
- }
-
- vmplen = strlen (vmp);
- if (vmp[vmplen - 1] != '/') {
- vmplen++;
- appendslash = 1;
- }
-
- entry->vmp = CALLOC (vmplen + 1, sizeof (char));
- if (!entry->vmp) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Memory allocation "
- "failed");
- goto free_entry;
- }
-
- strcpy (entry->vmp, vmp);
- if (appendslash) {
- entry->vmp[vmplen-1] = '/';
- entry->vmp[vmplen] = '\0';
- }
-
- entry->vmplen = vmplen;
- entry->handle = vmphandle;
- INIT_LIST_HEAD (&entry->list);
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "New VMP entry: %s", vmp);
-
- ret = 0;
-
-free_entry:
- if (ret == -1) {
- if (entry->vmp)
- FREE (entry->vmp);
- if (entry)
- FREE (entry);
- entry = NULL;
- }
- return entry;
-}
-
-void
-libgf_free_vmp_entry (struct vmp_entry *entry)
-{
- FREE (entry->vmp);
- FREE (entry);
-}
-
-int
-libgf_count_path_components (char *path)
-{
- int compos = 0;
- char *pathdup = NULL;
- int len = 0;
-
- if (!path)
- return -1;
-
- pathdup = strdup (path);
- if (!pathdup)
- return -1;
-
- len = strlen (pathdup);
- if (pathdup[len - 1] == '/')
- pathdup[len - 1] = '\0';
-
- path = pathdup;
- while ((path = strchr (path, '/'))) {
- compos++;
- ++path;
- }
-
- free (pathdup);
- return compos;
-}
-
-/* Returns the number of components that match between
- * the VMP and the path. Assumes string1 is vmp entry.
- * Assumes both are absolute paths.
- */
-int
-libgf_strmatchcount (char *string1, char *string2)
-{
- int matchcount = 0;
- char *s1dup = NULL, *s2dup = NULL;
- char *tok1 = NULL, *saveptr1 = NULL;
- char *tok2 = NULL, *saveptr2 = NULL;
-
- if ((!string1) || (!string2))
- return 0;
-
- s1dup = strdup (string1);
- if (!s1dup)
- return 0;
-
- s2dup = strdup (string2);
- if (!s2dup)
- goto free_s1;
-
- string1 = s1dup;
- string2 = s2dup;
-
- tok1 = strtok_r(string1, "/", &saveptr1);
- tok2 = strtok_r (string2, "/", &saveptr2);
- while (tok1) {
- if (!tok2)
- break;
-
- if (strcmp (tok1, tok2) != 0)
- break;
-
- matchcount++;
- tok1 = strtok_r(NULL, "/", &saveptr1);
- tok2 = strtok_r (NULL, "/", &saveptr2);
- }
-
- free (s2dup);
-free_s1:
- free (s1dup);
- return matchcount;
-}
-
-int
-libgf_vmp_entry_match (struct vmp_entry *entry, char *path)
-{
- return libgf_strmatchcount (entry->vmp, path);
-}
-
-#define LIBGF_VMP_EXACT 1
-#define LIBGF_VMP_LONGESTPREFIX 0
-
-
-/* copies vmp from the vmp-entry having glusterfs handle @handle, into @vmp */
-char *
-libgf_vmp_search_vmp (glusterfs_handle_t handle, char *vmp, size_t vmp_size)
-{
- char *res = NULL;
- struct vmp_entry *entry = NULL;
-
- if (handle == NULL) {
- goto out;
- }
-
- pthread_mutex_lock (&vmplock);
- {
- if (vmplist.entries == 0) {
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Virtual Mount Point "
- "list is empty.");
- goto unlock;
- }
-
- list_for_each_entry(entry, &vmplist.list, list) {
- if (entry->handle == handle) {
- if ((vmp_size) < (strlen (entry->vmp) + 1)) {
- errno = ENAMETOOLONG;
- goto unlock;
- }
-
- strcpy (vmp, entry->vmp);
- res = vmp;
- break;
- }
- }
- }
-unlock:
- pthread_mutex_unlock (&vmplock);
-
-out:
- return res;
-}
-
-
-struct vmp_entry *
-_libgf_vmp_search_entry (char *path, int searchtype)
-{
- struct vmp_entry *entry = NULL;
- int matchcount = 0;
- struct vmp_entry *maxentry = NULL;
- int maxcount = 0;
- int vmpcompcount = 0;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "VMP Search: path %s, type: %s",
- path, (searchtype == LIBGF_VMP_EXACT)?"Exact":"LongestPrefix");
- if (vmplist.entries == 0) {
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Virtual Mount Point "
- "list is empty.");
- goto out;
- }
-
- list_for_each_entry(entry, &vmplist.list, list) {
- vmpcompcount = libgf_count_path_components (entry->vmp);
- matchcount = libgf_vmp_entry_match (entry, path);
- gf_log (LIBGF_XL_NAME, GF_LOG_TRACE, "Candidate VMP: %s,"
- " Matchcount: %d", entry->vmp, matchcount);
- if ((matchcount > maxcount) && (matchcount == vmpcompcount)) {
- maxcount = matchcount;
- maxentry = entry;
- }
- }
-
- /* To ensure that the longest prefix matched entry is also an exact
- * match, this is used to check whether duplicate entries are present
- * in the vmplist.
- */
- vmpcompcount = 0;
- if ((searchtype == LIBGF_VMP_EXACT) && (maxentry)) {
- vmpcompcount = libgf_count_path_components (maxentry->vmp);
- matchcount = libgf_count_path_components (path);
- gf_log (LIBGF_XL_NAME, GF_LOG_TRACE, "Exact Check: VMP: %s,"
- " CompCount: %d, Path: %s, CompCount: %d",
- maxentry->vmp, vmpcompcount, path, matchcount);
- if (vmpcompcount != matchcount) {
- gf_log (LIBGF_XL_NAME, GF_LOG_TRACE, "No Match");
- maxentry = NULL;
- } else
- gf_log (LIBGF_XL_NAME, GF_LOG_TRACE, "Matches!");
- }
-
-out:
- return maxentry;
-}
-
-/* Used to search for a exactly matching VMP entry.
- */
-struct vmp_entry *
-libgf_vmp_search_exact_entry (char *path)
-{
- struct vmp_entry *entry = NULL;
-
- if (!path)
- goto out;
-
- pthread_mutex_lock (&vmplock);
- {
- entry = _libgf_vmp_search_entry (path, LIBGF_VMP_EXACT);
- }
- pthread_mutex_unlock (&vmplock);
-
-out:
- if (entry)
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "VMP Entry found: path :%s"
- " vmp: %s", path, entry->vmp);
- else
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "VMP Entry not found: path"
- ": %s", path);
-
- return entry;
-}
-
-
-/* Used to search for a longest prefix matching VMP entry.
- */
-struct vmp_entry *
-libgf_vmp_search_entry (char *path)
-{
- struct vmp_entry *entry = NULL;
-
- if (!path)
- goto out;
-
- pthread_mutex_lock (&vmplock);
- {
- entry = _libgf_vmp_search_entry (path, LIBGF_VMP_LONGESTPREFIX);
- }
- pthread_mutex_unlock (&vmplock);
-
-out:
- if (entry)
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "VMP Entry found: path :%s"
- " vmp: %s", path, entry->vmp);
- else
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "VMP Entry not found: path"
- ": %s", path);
-
- return entry;
-}
-
-int
-libgf_vmp_map_ghandle (char *vmp, glusterfs_handle_t *vmphandle)
-{
- int ret = -1;
- struct vmp_entry *vmpentry = NULL;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "New Entry: %s", vmp);
- vmpentry = libgf_init_vmpentry (vmp, vmphandle);
- if (!vmpentry) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Failed to create VMP"
- " entry");
- goto out;
- }
-
- pthread_mutex_lock (&vmplock);
- {
- if (vmplist.entries == 0) {
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Empty list");
- INIT_LIST_HEAD (&vmplist.list);
- }
-
- list_add_tail (&vmpentry->list, &vmplist.list);
- ++vmplist.entries;
- }
- pthread_mutex_unlock (&vmplock);
- ret = 0;
-
-out:
- return ret;
-}
-
-/* Path must be validated already. */
-glusterfs_handle_t
-libgf_vmp_get_ghandle (char * path)
-{
- struct vmp_entry *entry = NULL;
-
- entry = libgf_vmp_search_entry (path);
-
- if (entry == NULL)
- return NULL;
-
- return entry->handle;
-}
-
-
-/* Returns the handle for the path given in @path,
- * @path can be a relative path. The point is, here we
- * perform any path resolution that is needed and then
- * search for the corresponding vmp handle.
- * @vpath is a result-value argument in that the virtual
- * path inside the handle is copied into it.
- */
-glusterfs_handle_t
-libgf_resolved_path_handle (const char *path, char *vpath)
-{
- char *respath = NULL;
- struct vmp_entry *entry = NULL;
- glusterfs_handle_t handle = NULL;
- char *tmp = NULL;
-
- if ((!path) || (!vpath))
- return NULL;
-
- /* We only want compaction before VMP entry search because the
- * VMP cannot be search unless we have an absolute path.
- * For absolute paths, we search for VMP first, then perform the
- * path compaction on the given virtual path.
- */
- if (!libgf_path_absolute (path)) {
- respath = libgf_resolve_path_light ((char *)path);
- if (respath == NULL)
- return NULL;
- }
-
- /* This condition is needed because in case of absolute paths, the path
- * would already include the VMP and we want to ensure that any path
- * compaction that happens does not exclude the VMP. In the absence of
- * this condition an absolute path might get compacted to "/", i.e.
- * exclude the VMP, and the search will fail.
- *
- * For relative paths, respath will aleady include a potential VMP
- * as a consequence of us prepending the CWD in resolve_light above.
- */
- if (libgf_path_absolute (path)) {
- entry = libgf_vmp_search_entry ((char *)path);
- if (!entry)
- goto free_respath;
- tmp = libgf_vmp_virtual_path (entry, path, vpath);
- if (!tmp)
- goto free_respath;
-
- respath = libgf_resolve_path_light (vpath);
- strcpy (vpath, respath);
- } else {
- entry = libgf_vmp_search_entry (respath);
- if (!entry)
- goto free_respath;
- tmp = libgf_vmp_virtual_path (entry, respath, vpath);
- if (!tmp)
- goto free_respath;
- }
-
- handle = entry->handle;
-free_respath:
- if (respath)
- free (respath); /* Alloced in libgf_resolve_path_light */
-
- return handle;
-}
-
-
-int
-glusterfs_mount (char *vmp, glusterfs_init_params_t *ipars)
-{
- glusterfs_handle_t vmphandle = NULL;
- int ret = -1;
- char *vmp_resolved = NULL;
- struct vmp_entry *vmp_entry = NULL;
- uint32_t vmphash = 0;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, vmp, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ipars, out);
-
- vmp_resolved = libgf_resolve_path_light (vmp);
- if (!vmp_resolved) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Path compaction failed");
- goto out;
- }
-
- vmphash = (dev_t)ReallySimpleHash (vmp, strlen (vmp));
- pthread_mutex_lock (&mountlock);
- {
- vmp_entry = libgf_vmp_search_exact_entry (vmp);
- if (vmp_entry) {
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Entry exists");
- ret = 0;
- goto unlock;
- }
-
- vmphandle = glusterfs_init (ipars, vmphash);
- if (!vmphandle) {
- errno = EINVAL;
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "GlusterFS context"
- " init failed");
- goto unlock;
- }
-
- ret = libgf_vmp_map_ghandle (vmp_resolved, vmphandle);
- if (ret == -1) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Failed to map new"
- " handle: %s", vmp);
- glusterfs_fini (vmphandle);
- }
- }
-unlock:
- pthread_mutex_unlock (&mountlock);
-
-out:
- if (vmp_resolved)
- FREE (vmp_resolved);
-
- return ret;
-}
-
-inline int
-_libgf_umount (char *vmp)
-{
- struct vmp_entry *entry= NULL;
- int ret = -1;
-
- entry = _libgf_vmp_search_entry (vmp, LIBGF_VMP_EXACT);
- if (entry == NULL) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path (%s) not mounted", vmp);
- goto out;
- }
-
- if (entry->handle == NULL) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path (%s) has no corresponding glusterfs handle",
- vmp);
- goto out;
- }
-
-/* ret = glusterfs_fini (entry->handle); */
- list_del_init (&entry->list);
- libgf_free_vmp_entry (entry);
-
- vmplist.entries--;
-
-out:
- return ret;
-}
-
-inline int
-libgf_umount (char *vmp)
-{
- int ret = -1;
-
- pthread_mutex_lock (&vmplock);
- {
- ret = _libgf_umount (vmp);
- }
- pthread_mutex_unlock (&vmplock);
-
- return ret;
-}
-
-int
-glusterfs_umount (char *vmp)
-{
- int ret = -1;
- char *vmp_resolved = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, vmp, out);
-
- vmp_resolved = libgf_resolve_path_light (vmp);
- if (!vmp_resolved) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Path compaction failed");
- goto out;
- }
-
- ret = libgf_umount (vmp_resolved);
-
-out:
- if (vmp_resolved)
- FREE (vmp_resolved);
-
- return ret;
-}
-
-int
-glusterfs_umount_all (void)
-{
- struct vmp_entry *entry = NULL, *tmp = NULL;
-
- pthread_mutex_lock (&vmplock);
- {
- if (vmplist.entries > 0) {
- list_for_each_entry_safe (entry, tmp, &vmplist.list,
- list) {
- /* even if there are errors, continue with other
- mounts
- */
- _libgf_umount (entry->vmp);
- }
- }
- }
- pthread_mutex_unlock (&vmplock);
-
- return 0;
-}
-
-void
-glusterfs_reset (void)
-{
- INIT_LIST_HEAD (&vmplist.list);
- vmplist.entries = 0;
-
- memset (&vmplock, 0, sizeof (vmplock));
- pthread_mutex_init (&vmplock, NULL);
-
- first_init = 1;
-}
-
-void
-glusterfs_log_lock (void)
-{
- gf_log_lock ();
-}
-
-
-void glusterfs_log_unlock (void)
-{
- gf_log_unlock ();
-}
-
-
-void
-libgf_wait_for_frames_unwind (libglusterfs_client_ctx_t *ctx)
-{
- call_pool_t *pool = NULL;
- int canreturn = 0;
-
- if (!ctx)
- return;
-
- pool = (call_pool_t *)ctx->gf_ctx.pool;
- while (1) {
- LOCK (&pool->lock);
- {
- if (pool->cnt == 0) {
- canreturn = 1;
- goto unlock_out;
- }
- }
-unlock_out:
- UNLOCK (&pool->lock);
-
- if (canreturn)
- break;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Waiting for call frames");
- sleep (1);
- }
-
- return;
-}
-
-
-int
-glusterfs_fini (glusterfs_handle_t handle)
-{
- libglusterfs_client_ctx_t *ctx = handle;
-
- libgf_wait_for_frames_unwind (ctx);
-
- FREE (ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.cmd_args.volume_file);
- FREE (ctx->gf_ctx.cmd_args.volume_name);
- FREE (ctx->gf_ctx.pool);
- FREE (ctx->gf_ctx.event_pool);
- mem_pool_destroy (ctx->itable->inode_pool);
- mem_pool_destroy (ctx->itable->dentry_pool);
- mem_pool_destroy (ctx->itable->fd_mem_pool);
- /* iobuf_pool_destroy (ctx->gf_ctx.iobuf_pool); */
- ((gf_timer_registry_t *)ctx->gf_ctx.timer)->fin = 1;
-
- xlator_graph_fini (ctx->gf_ctx.graph);
- xlator_tree_free (ctx->gf_ctx.graph);
- ctx->gf_ctx.graph = NULL;
- pthread_cancel (ctx->reply_thread);
-
- FREE (ctx);
-
- return 0;
-}
-
-
-int32_t
-libgf_client_lookup_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- dict_t *dict,
- struct iatt *postparent)
-{
- libgf_client_local_t *local = frame->local;
- libglusterfs_client_ctx_t *ctx = frame->root->state;
- dict_t *xattr_req = NULL;
-
- if (op_ret == 0) {
- inode_t *parent = NULL;
-
- if (local->fop.lookup.loc->ino == 1) {
- buf->ia_ino = 1;
- }
-
- parent = local->fop.lookup.loc->parent;
- if (inode->ino != 1) {
- inode = inode_link (inode, parent,
- local->fop.lookup.loc->name, buf);
- }
-
- libgf_transform_iattr (ctx, inode, buf);
- inode_lookup (inode);
- } else {
- if ((local->fop.lookup.is_revalidate == 0)
- && (op_errno == ENOENT)) {
- gf_log ("libglusterfsclient", GF_LOG_DEBUG,
- "%"PRId64": (op_num=%d) %s => -1 (%s)",
- frame->root->unique, frame->root->op,
- local->fop.lookup.loc->path,
- strerror (op_errno));
- } else {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "%"PRId64": (op_num=%d) %s => -1 (%s)",
- frame->root->unique, frame->root->op,
- local->fop.lookup.loc->path,
- strerror (op_errno));
- }
-
- if (local->fop.lookup.is_revalidate == 1) {
- int32_t ret = 0;
- inode_unref (local->fop.lookup.loc->inode);
- local->fop.lookup.loc->inode = inode_new (ctx->itable);
- local->fop.lookup.is_revalidate = 2;
-
- if (local->fop.lookup.size > 0) {
- xattr_req = dict_new ();
- ret = dict_set (xattr_req, "glusterfs.content",
- data_from_uint64 (local->fop.lookup.size));
- if (ret == -1) {
- op_ret = -1;
- /* TODO: set proper error code */
- op_errno = errno;
- inode = NULL;
- buf = NULL;
- dict = NULL;
- dict_unref (xattr_req);
- goto out;
- }
- }
-
- STACK_WIND (frame, libgf_client_lookup_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->lookup,
- local->fop.lookup.loc, xattr_req);
-
- if (xattr_req) {
- dict_unref (xattr_req);
- xattr_req = NULL;
- }
-
- return 0;
- }
- }
-
-out:
- local->reply_stub = fop_lookup_cbk_stub (frame, NULL, op_ret, op_errno,
- inode, buf, dict, postparent);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int32_t
-libgf_client_lookup (libglusterfs_client_ctx_t *ctx,
- loc_t *loc,
- struct iatt *stbuf,
- dict_t **dict,
- dict_t *xattr_req)
-{
- call_stub_t *stub = NULL;
- int32_t op_ret;
- libgf_client_local_t *local = NULL;
- inode_t *inode = NULL;
-
- local = CALLOC (1, sizeof (*local));
- if (!local) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Memory allocation"
- " failed");
- errno = ENOMEM;
- return -1;
- }
-
- if (loc->inode) {
- local->fop.lookup.is_revalidate = 1;
- loc->ino = loc->inode->ino;
- }
- else
- loc->inode = inode_new (ctx->itable);
-
- local->fop.lookup.loc = loc;
-
- LIBGF_CLIENT_FOP(ctx, stub, lookup, local, loc, xattr_req);
-
- op_ret = stub->args.lookup_cbk.op_ret;
- errno = stub->args.lookup_cbk.op_errno;
-
- if (op_ret == -1)
- goto out;
-
- inode = stub->args.lookup_cbk.inode;
- if (!(libgf_get_inode_ctx (inode)))
- libgf_alloc_inode_ctx (ctx, inode);
- libgf_transform_iattr (ctx, inode, &stub->args.lookup_cbk.buf);
- libgf_update_iattr_cache (inode, LIBGF_UPDATE_ALL,
- &stub->args.lookup_cbk.buf);
- if (stbuf)
- *stbuf = stub->args.lookup_cbk.buf;
-
- if (dict)
- *dict = dict_ref (stub->args.lookup_cbk.dict);
-
- if (inode != loc->inode) {
- inode_unref (loc->inode);
- loc->inode = inode_ref (inode);
- }
-
-out:
- call_stub_destroy (stub);
- return op_ret;
-}
-
-int
-glusterfs_glh_get (glusterfs_handle_t handle, const char *path, void *buf,
- size_t size, struct stat *stbuf)
-{
- int32_t op_ret = -1;
- loc_t loc = {0, };
- libglusterfs_client_ctx_t *ctx = handle;
- dict_t *dict = NULL;
- dict_t *xattr_req = NULL;
- char *name = NULL, *pathname = NULL;
- struct iatt iatt = {0,};
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, size %lu", path,
- (long unsigned)size);
- if (size < 0) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Invalid size");
- errno = EINVAL;
- goto out;
- }
-
- if (size == 0) {
- op_ret = 0;
- goto out;
- }
-
- loc.path = strdup (path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Path compaction failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 0);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for (%s)", loc.path);
- goto out;
- }
-
- pathname = strdup (loc.path);
- name = basename (pathname);
-
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino, name);
- if (op_ret < 0) {
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- if (size) {
- xattr_req = dict_new ();
- op_ret = dict_set (xattr_req, "glusterfs.content",
- data_from_uint64 (size));
- if (op_ret < 0) {
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "setting requested content size dictionary failed");
- goto out;
- }
- }
-
- op_ret = libgf_client_lookup (ctx, &loc, &iatt, &dict, xattr_req);
- iatt_to_stat (&iatt, stbuf);
- if (!op_ret && stbuf && (iatt.ia_size <= size) && dict && buf) {
- data_t *mem_data = NULL;
- void *mem = NULL;
-
- mem_data = dict_get (dict, "glusterfs.content");
- if (mem_data) {
- mem = data_to_ptr (mem_data);
- }
-
- if (mem != NULL) {
- memcpy (buf, mem, iatt.ia_size);
- }
- }
-
-out:
- if (xattr_req) {
- dict_unref (xattr_req);
- }
-
- if (dict) {
- dict_unref (dict);
- }
-
- if (pathname) {
- FREE (pathname);
- }
- libgf_client_loc_wipe (&loc);
-
- return op_ret;
-}
-
-int
-glusterfs_get (const char *path, void *buf, size_t size, struct stat *stbuf)
-{
- int op_ret = -1;
- glusterfs_handle_t h = NULL;
- char vpath[PATH_MAX];
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, buf, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, stbuf, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, size %lu", path,
- (long unsigned)size);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_get (h, vpath, buf, size, stbuf);
-
-out:
- return op_ret;
-}
-
-int
-libgf_client_lookup_async_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *stbuf,
- dict_t *dict,
- struct iatt *postparent)
-{
- libglusterfs_client_async_local_t *local = frame->local;
- glusterfs_get_cbk_t lookup_cbk = local->fop.lookup_cbk.cbk;
- libglusterfs_client_ctx_t *ctx = frame->root->state;
- glusterfs_iobuf_t *iobuf = NULL;
- dict_t *xattr_req = NULL;
- inode_t *parent = NULL;
- struct stat stat = {0,};
-
- if (op_ret == 0) {
- parent = local->fop.lookup_cbk.loc->parent;
- inode_link (inode, parent, local->fop.lookup_cbk.loc->name,
- stbuf);
- libgf_transform_iattr (ctx, inode, stbuf);
- if (!(libgf_get_inode_ctx (inode)))
- libgf_alloc_inode_ctx (ctx, inode);
- libgf_update_iattr_cache (inode, LIBGF_UPDATE_ALL, stbuf);
- inode_lookup (inode);
- } else {
- if ((local->fop.lookup_cbk.is_revalidate == 0)
- && (op_errno == ENOENT)) {
- gf_log ("libglusterfsclient", GF_LOG_DEBUG,
- "%"PRId64": (op_num=%d) %s => -1 (%s)",
- frame->root->unique, frame->root->op,
- local->fop.lookup_cbk.loc->path,
- strerror (op_errno));
- } else {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "%"PRId64": (op_num=%d) %s => -1 (%s)",
- frame->root->unique, frame->root->op,
- local->fop.lookup_cbk.loc->path,
- strerror (op_errno));
- }
-
- if (local->fop.lookup_cbk.is_revalidate == 1) {
- int32_t ret = 0;
- inode_unref (local->fop.lookup_cbk.loc->inode);
- local->fop.lookup_cbk.loc->inode = inode_new (ctx->itable);
- local->fop.lookup_cbk.is_revalidate = 2;
-
- if (local->fop.lookup_cbk.size > 0) {
- xattr_req = dict_new ();
- ret = dict_set (xattr_req, "glusterfs.content",
- data_from_uint64 (local->fop.lookup_cbk.size));
- if (ret == -1) {
- op_ret = -1;
- /* TODO: set proper error code */
- op_errno = errno;
- inode = NULL;
- stbuf = NULL;
- dict = NULL;
- dict_unref (xattr_req);
- goto out;
- }
- }
-
-
- STACK_WIND (frame, libgf_client_lookup_async_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->lookup,
- local->fop.lookup_cbk.loc, xattr_req);
-
- if (xattr_req) {
- dict_unref (xattr_req);
- xattr_req = NULL;
- }
-
- return 0;
- }
- }
-
-out:
- if (!op_ret && local->fop.lookup_cbk.size && dict) {
- data_t *mem_data = NULL;
- void *mem = NULL;
- struct iovec *vector = NULL;
-
- mem_data = dict_get (dict, "glusterfs.content");
- if (mem_data) {
- mem = data_to_ptr (mem_data);
- }
-
- if (mem && stbuf->ia_size <= local->fop.lookup_cbk.size) {
- iobuf = CALLOC (1, sizeof (*iobuf));
- ERR_ABORT (iobuf);
-
- vector = CALLOC (1, sizeof (*vector));
- ERR_ABORT (vector);
- vector->iov_base = mem;
- vector->iov_len = stbuf->ia_size;
-
- iobuf->vector = vector;
- iobuf->count = 1;
- iobuf->dictref = dict_ref (dict);
- }
- }
-
- iatt_to_stat (stbuf, &stat);
- lookup_cbk (op_ret, op_errno, iobuf, &stat, local->cbk_data);
-
- libgf_client_loc_wipe (local->fop.lookup_cbk.loc);
- free (local->fop.lookup_cbk.loc);
-
- free (local);
- frame->local = NULL;
- STACK_DESTROY (frame->root);
-
- return 0;
-}
-
-/* TODO: implement async dentry lookup */
-
-int
-glusterfs_get_async (glusterfs_handle_t handle,
- const char *path,
- size_t size,
- glusterfs_get_cbk_t cbk,
- void *cbk_data)
-{
- loc_t *loc = NULL;
- libglusterfs_client_ctx_t *ctx = handle;
- libglusterfs_client_async_local_t *local = NULL;
- int32_t op_ret = 0;
- dict_t *xattr_req = NULL;
- char *name = NULL, *pathname = NULL;
-
- if (!ctx || !path || path[0] != '/') {
- errno = EINVAL;
- op_ret = -1;
- goto out;
- }
-
- if (size < 0) {
- errno = EINVAL;
- op_ret = -1;
- goto out;
- }
-
- if (size == 0) {
- op_ret = 0;
- goto out;
- }
-
- local = CALLOC (1, sizeof (*local));
- local->fop.lookup_cbk.is_revalidate = 1;
-
- loc = CALLOC (1, sizeof (*loc));
- loc->path = strdup (path);
- op_ret = libgf_client_path_lookup (loc, ctx, 1);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "path lookup failed for (%s)", path);
- goto out;
- }
-
- pathname = strdup (path);
- name = basename (pathname);
- op_ret = libgf_client_loc_fill (loc, ctx, 0, loc->parent->ino, name);
- if (op_ret < 0) {
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- if (!loc->inode) {
- loc->inode = inode_new (ctx->itable);
- local->fop.lookup_cbk.is_revalidate = 0;
- }
-
- local->fop.lookup_cbk.cbk = cbk;
- local->fop.lookup_cbk.size = size;
- local->fop.lookup_cbk.loc = loc;
- local->cbk_data = cbk_data;
-
- if (size > 0) {
- xattr_req = dict_new ();
- op_ret = dict_set (xattr_req, "glusterfs.content",
- data_from_uint64 (size));
- if (op_ret < 0) {
- dict_unref (xattr_req);
- xattr_req = NULL;
- goto out;
- }
- }
-
- LIBGF_CLIENT_FOP_ASYNC (ctx,
- local,
- libgf_client_lookup_async_cbk,
- lookup,
- loc,
- xattr_req);
- if (xattr_req) {
- dict_unref (xattr_req);
- xattr_req = NULL;
- }
-
-out:
- if (pathname) {
- FREE (pathname);
- }
-
- return op_ret;
-}
-
-int32_t
-libgf_client_getxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict)
-{
-
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_getxattr_cbk_stub (frame, NULL, op_ret,
- op_errno, dict);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-size_t
-libgf_client_getxattr (libglusterfs_client_ctx_t *ctx,
- loc_t *loc,
- const char *name,
- void *value,
- size_t size)
-{
- call_stub_t *stub = NULL;
- int32_t op_ret = 0;
- libgf_client_local_t *local = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, getxattr, local, loc, name);
-
- op_ret = stub->args.getxattr_cbk.op_ret;
- errno = stub->args.getxattr_cbk.op_errno;
-
- if (op_ret >= 0) {
- /*
- gf_log ("LIBGF_CLIENT", GF_LOG_DEBUG,
- "%"PRId64": %s => %d", frame->root->unique,
- state->fuse_loc.loc.path, op_ret);
- */
-
- data_t *value_data = dict_get (stub->args.getxattr_cbk.dict,
- (char *)name);
-
- if (value_data) {
- int32_t copy_len = 0;
-
- /* Don't return the value for '\0' */
- op_ret = value_data->len;
- if ((size > 0) && (value != NULL)) {
- copy_len = size < value_data->len ?
- size : value_data->len;
- memcpy (value, value_data->data, copy_len);
- op_ret = copy_len;
- }
- } else {
- errno = ENODATA;
- op_ret = -1;
- }
- }
-
- call_stub_destroy (stub);
- return op_ret;
-}
-
-#define LIBGF_DO_GETXATTR 1
-#define LIBGF_DO_LGETXATTR 2
-
-ssize_t
-__glusterfs_glh_getxattr (glusterfs_handle_t handle, const char *path,
- const char *name, void *value, size_t size,
- int whichop)
-{
- int32_t op_ret = -1;
- loc_t loc = {0, };
- libglusterfs_client_ctx_t *ctx = handle;
- char *file = NULL;
- char *pathres = NULL, *tmp = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, name, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, name %s, size %lu,"
- " op %d", path, name, (long unsigned)size, whichop);
- if (name[0] == '\0') {
- errno = EINVAL;
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Invalid argument: Name"
- " not NULL terminated");
- goto out;
- }
-
- if (size < 0) {
- errno = EINVAL;
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Invalid argument: size is"
- " less than zero");
- goto out;
- }
-
- pathres = strdup (path);
- if (!pathres) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
-
- loc.path = strdup (pathres);
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for (%s)", loc.path);
- goto out;
- }
-
- tmp = strdup (pathres);
- file = basename (tmp);
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino, file);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- if (whichop == LIBGF_DO_LGETXATTR)
- goto do_getx;
-
- if (!IA_ISLNK (loc.inode->ia_type))
- goto do_getx;
-
- libgf_client_loc_wipe (&loc);
- op_ret = libgf_realpath_loc_fill (ctx, (char *)pathres, &loc);
- if (op_ret == -1) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "realpath failed");
- goto out;
- }
-
-do_getx:
- op_ret = libgf_client_getxattr (ctx, &loc, name, value, size);
-
-out:
- if (tmp) {
- FREE (tmp);
- }
-
- if (pathres)
- FREE (pathres);
-
- libgf_client_loc_wipe (&loc);
-
- return op_ret;
-}
-
-ssize_t
-glusterfs_glh_getxattr (glusterfs_handle_t handle, const char *path,
- const char *name, void *value, size_t size)
-{
- return __glusterfs_glh_getxattr (handle, path, name, value, size,
- LIBGF_DO_GETXATTR);
-}
-
-ssize_t
-glusterfs_glh_lgetxattr (glusterfs_handle_t handle, const char *path,
- const char *name, void *value, size_t size)
-{
- return __glusterfs_glh_getxattr (handle, path, name, value, size,
- LIBGF_DO_LGETXATTR);
-}
-
-ssize_t
-glusterfs_getxattr (const char *path, const char *name, void *value,
- size_t size)
-{
- int op_ret = -1;
- char vpath[PATH_MAX];
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, name, out);
-
- if ((size > 0) && (value == NULL)) {
- errno = EINVAL;
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Invalid argument value");
- goto out;
- }
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, name %s, size %lu",
- path, name, (long unsigned)size);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = __glusterfs_glh_getxattr (h, vpath, name, value, size,
- LIBGF_DO_GETXATTR);
-
-out:
- return op_ret;
-}
-
-ssize_t
-glusterfs_lgetxattr (const char *path, const char *name, void *value,
- size_t size)
-{
- int op_ret = -1;
- char vpath[PATH_MAX];
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, name, out);
-
- if ((size > 0) && (value == NULL)) {
- errno = EINVAL;
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Invalid argument value");
- goto out;
- }
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, name %s, size %lu",
- path, name, (long unsigned)size);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = __glusterfs_glh_getxattr (h, vpath, name, value, size,
- LIBGF_DO_LGETXATTR);
-
-out:
- return op_ret;
-}
-
-static int32_t
-libgf_client_open_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_open_cbk_stub (frame, NULL, op_ret, op_errno,
- fd);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-
-int
-libgf_client_open (libglusterfs_client_ctx_t *ctx,
- loc_t *loc,
- fd_t *fd,
- int flags)
-{
- call_stub_t *stub = NULL;
- int32_t op_ret = 0;
- libgf_client_local_t *local = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, open, local, loc, flags, fd, 0);
-
- op_ret = stub->args.open_cbk.op_ret;
- errno = stub->args.open_cbk.op_errno;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "open: path %s, status: %d, errno"
- " %d", loc->path, op_ret, errno);
- if (op_ret != -1)
- fd_bind (fd);
- call_stub_destroy (stub);
- return op_ret;
-}
-
-static int32_t
-libgf_client_create_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_create_cbk_stub (frame, NULL, op_ret, op_errno,
- fd, inode, buf, preparent,
- postparent);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int
-libgf_client_creat (libglusterfs_client_ctx_t *ctx,
- loc_t *loc,
- fd_t *fd,
- int flags,
- mode_t mode)
-{
- call_stub_t *stub = NULL;
- int32_t op_ret = 0;
- libgf_client_local_t *local = NULL;
- inode_t *libgf_inode = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, create, local, loc, flags, mode, fd);
-
- op_ret = stub->args.create_cbk.op_ret;
- errno = stub->args.create_cbk.op_errno;
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Create: path %s, status: %d,"
- " errno: %d", loc->path, op_ret, errno);
- if (op_ret == -1)
- goto out;
-
- libgf_inode = stub->args.create_cbk.inode;
- inode_link (libgf_inode, loc->parent, loc->name,
- &stub->args.create_cbk.buf);
- libgf_transform_iattr (ctx, libgf_inode, &stub->args.create_cbk.buf);
-
- inode_lookup (libgf_inode);
-
- libgf_alloc_inode_ctx (ctx, libgf_inode);
- libgf_update_iattr_cache (libgf_inode, LIBGF_UPDATE_ALL,
- &stub->args.create_cbk.buf);
-
-out:
- call_stub_destroy (stub);
- return op_ret;
-}
-
-int32_t
-libgf_client_opendir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_opendir_cbk_stub (frame, NULL, op_ret, op_errno,
- fd);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int
-libgf_client_opendir (libglusterfs_client_ctx_t *ctx,
- loc_t *loc,
- fd_t *fd)
-{
- call_stub_t *stub = NULL;
- int32_t op_ret = -1;
- libgf_client_local_t *local = NULL;
-
- if (((fd->flags & O_ACCMODE) == O_WRONLY)
- || ((fd->flags & O_ACCMODE) == O_RDWR)) {
- errno = EISDIR;
- goto out;
- }
- LIBGF_CLIENT_FOP (ctx, stub, opendir, local, loc, fd);
-
- op_ret = stub->args.opendir_cbk.op_ret;
- errno = stub->args.opendir_cbk.op_errno;
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "opendir: path %s, status %d,"
- " errno %d", loc->path, op_ret, errno);
- if (op_ret != -1)
- fd_bind (fd);
-
- call_stub_destroy (stub);
-out:
- return op_ret;
-}
-
-glusterfs_file_t
-glusterfs_glh_open (glusterfs_handle_t handle, const char *path, int flags,...)
-{
- loc_t loc = {0, };
- long op_ret = -1;
- fd_t *fd = NULL;
- int32_t ret = -1;
- libglusterfs_client_ctx_t *ctx = handle;
- char *name = NULL, *pathname = NULL;
- libglusterfs_client_inode_ctx_t *inode_ctx = NULL;
- mode_t mode = 0;
- va_list ap;
- char *pathres = NULL;
- char *vpath = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- pathres = strdup (path);
- if (!pathres) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
-
- loc.path = strdup (pathres);
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
-
- if ((op_ret == -1) && ((flags & O_CREAT) != O_CREAT)) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for (%s)", loc.path);
- goto out;
- }
-
- if (!op_ret && ((flags & O_CREAT) == O_CREAT)
- && ((flags & O_EXCL) == O_EXCL)) {
- errno = EEXIST;
- op_ret = -1;
- goto out;
- }
-
- if (op_ret == 0) {
- flags &= ~O_CREAT;
- }
-
- if ((op_ret == -1) && ((flags & O_CREAT) == O_CREAT)) {
- libgf_client_loc_wipe (&loc);
- loc.path = strdup (pathres);
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 0);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for parent while trying to"
- " create (%s)", pathres);
- goto out;
- }
-
- loc.inode = inode_new (ctx->itable);
- }
-
- pathname = strdup (pathres);
- name = basename (pathname);
-
- ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino, name);
- if (ret == -1) {
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- fd = fd_create (loc.inode, ctx->pid);
- fd->flags = flags;
-
- if (((flags & O_CREAT) == O_CREAT)) {
- /* If we have the st_mode for the basename, check if
- * it is a directory here itself, rather than sending
- * a network message through libgf_client_creat, and
- * then receiving a EISDIR.
- */
- if (IA_ISDIR (loc.inode->ia_type)) {
- errno = EISDIR;
- op_ret = -1;
- goto op_over;
- }
- va_start (ap, flags);
- mode = va_arg (ap, mode_t);
- va_end (ap);
- op_ret = libgf_client_creat (ctx, &loc, fd, flags, mode);
- } else {
- if (IA_ISDIR (loc.inode->ia_type))
- op_ret = libgf_client_opendir (ctx, &loc, fd);
- else
- op_ret = libgf_client_open (ctx, &loc, fd, flags);
- }
-
-op_over:
- if (op_ret == -1) {
- fd_unref (fd);
- fd = NULL;
- goto out;
- }
-
- vpath = NULL;
- if (IA_ISDIR (loc.inode->ia_type)) {
- vpath = (char *)path;
- }
-
- if (!libgf_get_fd_ctx (fd)) {
- if (!libgf_alloc_fd_ctx (ctx, fd, vpath)) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Failed to"
- " allocate fd context");
- errno = EINVAL;
- op_ret = -1;
- goto out;
- }
- }
-
- if ((flags & O_TRUNC) && (((flags & O_ACCMODE) == O_RDWR)
- || ((flags & O_ACCMODE) == O_WRONLY))) {
- inode_ctx = libgf_get_inode_ctx (fd->inode);
- if (IA_ISREG (inode_ctx->stbuf.ia_type)) {
- inode_ctx->stbuf.ia_size = 0;
- inode_ctx->stbuf.ia_blocks = 0;
- }
- }
-
-out:
- libgf_client_loc_wipe (&loc);
-
- if (pathname) {
- FREE (pathname);
- }
-
- if (pathres)
- FREE (pathres);
-
- return fd;
-}
-
-glusterfs_file_t
-glusterfs_open (const char *path, int flags, ...)
-{
- va_list ap;
- glusterfs_file_t fh = NULL;
- glusterfs_handle_t h = NULL;
- mode_t mode = 0;
- char vpath[PATH_MAX];
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- if (flags & O_CREAT) {
- va_start (ap, flags);
- mode = va_arg (ap, mode_t);
- va_end (ap);
- fh = glusterfs_glh_open (h, vpath, flags, mode);
- } else
- fh = glusterfs_glh_open (h, vpath, flags);
-out:
- return fh;
-}
-
-glusterfs_file_t
-glusterfs_glh_creat (glusterfs_handle_t handle, const char *path, mode_t mode)
-{
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
- return glusterfs_glh_open (handle, path,
- (O_CREAT | O_WRONLY | O_TRUNC), mode);
-}
-
-glusterfs_file_t
-glusterfs_creat (const char *path, mode_t mode)
-{
- glusterfs_file_t fh = NULL;
- char vpath[PATH_MAX];
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- fh = glusterfs_glh_creat (h, vpath, mode);
-
-out:
- return fh;
-}
-
-int32_t
-libgf_client_flush_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_flush_cbk_stub (frame, NULL, op_ret, op_errno);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-
-int
-libgf_client_flush (libglusterfs_client_ctx_t *ctx, fd_t *fd)
-{
- call_stub_t *stub;
- int32_t op_ret;
- libgf_client_local_t *local = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, flush, local, fd);
-
- op_ret = stub->args.flush_cbk.op_ret;
- errno = stub->args.flush_cbk.op_errno;
-
- call_stub_destroy (stub);
- return op_ret;
-}
-
-
-int
-glusterfs_close (glusterfs_file_t fd)
-{
- int32_t op_ret = -1;
- libglusterfs_client_ctx_t *ctx = NULL;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
-
- if (!fd) {
- errno = EINVAL;
- goto out;
- }
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No fd context present");
- errno = EBADF;
- goto out;
- }
- ctx = fd_ctx->ctx;
-
- op_ret = libgf_client_flush (ctx, (fd_t *)fd);
-
- fd_unref ((fd_t *)fd);
-
-out:
- return op_ret;
-}
-
-int32_t
-libgf_client_setxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_setxattr_cbk_stub (frame, NULL, op_ret,
- op_errno);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int
-libgf_client_setxattr (libglusterfs_client_ctx_t *ctx,
- loc_t *loc,
- const char *name,
- const void *value,
- size_t size,
- int flags)
-{
- call_stub_t *stub = NULL;
- int32_t op_ret = 0;
- dict_t *dict;
- libgf_client_local_t *local = NULL;
-
- dict = get_new_dict ();
-
- dict_set (dict, (char *)name,
- bin_to_data ((void *)value, size));
- dict_ref (dict);
-
-
- LIBGF_CLIENT_FOP (ctx, stub, setxattr, local, loc, dict, flags);
-
- op_ret = stub->args.setxattr_cbk.op_ret;
- errno = stub->args.setxattr_cbk.op_errno;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "path %s, name %s, status %d,"
- "errno %d", loc->path, name, op_ret, errno);
- dict_unref (dict);
- call_stub_destroy (stub);
- return op_ret;
-}
-
-
-#define LIBGF_DO_SETXATTR 1
-#define LIBGF_DO_LSETXATTR 2
-
-int
-__glusterfs_glh_setxattr (glusterfs_handle_t handle, const char *path,
- const char *name, const void *value,
- size_t size, int flags, int whichop)
-{
- int32_t op_ret = -1;
- loc_t loc = {0, };
- libglusterfs_client_ctx_t *ctx = handle;
- char *tmppath = NULL;
- loc_t *realloc = NULL;
- char *pathres = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "path %s, name %s, op %d", path
- ,name, whichop);
- if (size <= 0) {
- errno = EINVAL;
- goto out;
- }
-
- pathres = strdup (path);
- if (!pathres) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
-
- loc.path = strdup (pathres);
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for (%s)", pathres);
- goto out;
- }
-
- tmppath = strdup (pathres);
-
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino,
- basename (tmppath));
- FREE (tmppath);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- realloc = &loc;
- if (whichop == LIBGF_DO_LSETXATTR)
- goto do_setx;
-
- if (!IA_ISLNK (loc.inode->ia_type))
- goto do_setx;
-
- libgf_client_loc_wipe (&loc);
- realloc = &loc;
- libgf_realpath_loc_fill (ctx, (char *)pathres, realloc);
-
-do_setx:
- if (!op_ret)
- op_ret = libgf_client_setxattr (ctx, realloc, name, value,
- size, flags);
-
-out:
- if (pathres)
- FREE (pathres);
-
- libgf_client_loc_wipe (realloc);
- return op_ret;
-}
-
-int
-glusterfs_glh_setxattr (glusterfs_handle_t handle, const char *path,
- const char *name, const void *value, size_t size,
- int flags)
-{
- return __glusterfs_glh_setxattr (handle, path, name, value, size, flags
- , LIBGF_DO_SETXATTR);
-}
-
-int
-glusterfs_glh_lsetxattr (glusterfs_handle_t handle, const char *path,
- const char *name, const void *value, size_t size,
- int flags)
-{
- return __glusterfs_glh_setxattr (handle, path, name, value, size, flags
- , LIBGF_DO_LSETXATTR);
-}
-
-int
-glusterfs_setxattr (const char *path, const char *name, const void *value,
- size_t size, int flags)
-{
- int op_ret = -1;
- char vpath[PATH_MAX];
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, name, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, value, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "path %s, name %s", path, name);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = __glusterfs_glh_setxattr (h, vpath, name, value, size, flags,
- LIBGF_DO_SETXATTR);
-
-out:
- return op_ret;
-}
-
-int
-glusterfs_lsetxattr (const char *path, const char *name, const void *value,
- size_t size, int flags)
-{
- int op_ret = -1;
- char vpath[PATH_MAX];
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, name, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, value, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "path %s, name %s", path, name);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = __glusterfs_glh_setxattr (h, vpath, name, value, size, flags,
- LIBGF_DO_LSETXATTR);
-
-out:
- return op_ret;
-}
-
-int32_t
-libgf_client_fsetxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_fsetxattr_cbk_stub (frame, NULL, op_ret,
- op_errno);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int
-libgf_client_fsetxattr (libglusterfs_client_ctx_t *ctx,
- fd_t *fd,
- const char *name,
- const void *value,
- size_t size,
- int flags)
-{
- call_stub_t *stub = NULL;
- int32_t op_ret = 0;
- dict_t *dict;
- libgf_client_local_t *local = NULL;
-
- dict = get_new_dict ();
-
- dict_set (dict, (char *)name,
- bin_to_data ((void *)value, size));
- dict_ref (dict);
-
- LIBGF_CLIENT_FOP (ctx, stub, fsetxattr, local, fd, dict, flags);
-
- op_ret = stub->args.fsetxattr_cbk.op_ret;
- errno = stub->args.fsetxattr_cbk.op_errno;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "name %s, status %d, errno %d",
- name, op_ret, errno);
- dict_unref (dict);
- call_stub_destroy (stub);
-
- return op_ret;
-}
-
-int
-glusterfs_fsetxattr (glusterfs_file_t fd,
- const char *name,
- const void *value,
- size_t size,
- int flags)
-{
- int32_t op_ret = 0;
- fd_t *__fd = fd;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- libglusterfs_client_ctx_t *ctx = NULL;
-
- if (!fd) {
- errno = EINVAL;
- op_ret = -1;
- gf_log("libglusterfsclient",
- GF_LOG_ERROR,
- "invalid fd");
- goto out;
- }
-
- if (size <= 0) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Invalid argument: size is"
- " less than or equal to zero");
- errno = EINVAL;
- op_ret = -1;
- goto out;
- }
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No fd context present");
- errno = EBADF;
- op_ret = -1;
- goto out;
- }
-
- ctx = fd_ctx->ctx;
- op_ret = libgf_client_fsetxattr (ctx, __fd, name, value, size,
- flags);
-
-out:
- return op_ret;
-}
-
-int32_t
-libgf_client_fgetxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict)
-{
-
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_fgetxattr_cbk_stub (frame, NULL, op_ret,
- op_errno, dict);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-size_t
-libgf_client_fgetxattr (libglusterfs_client_ctx_t *ctx,
- fd_t *fd,
- const char *name,
- void *value,
- size_t size)
-{
- call_stub_t *stub = NULL;
- int32_t op_ret = 0;
- libgf_client_local_t *local = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, fgetxattr, local, fd, name);
-
- op_ret = stub->args.fgetxattr_cbk.op_ret;
- errno = stub->args.fgetxattr_cbk.op_errno;
-
- if (op_ret >= 0) {
- /*
- gf_log ("LIBGF_CLIENT", GF_LOG_DEBUG,
- "%"PRId64": %s => %d", frame->root->unique,
- state->fuse_loc.loc.path, op_ret);
- */
-
- data_t *value_data = dict_get (stub->args.fgetxattr_cbk.dict,
- (char *)name);
-
- if (value_data) {
- int32_t copy_len = 0;
-
- /* Don't return the value for '\0' */
- op_ret = value_data->len;
- copy_len = size < value_data->len ?
- size : value_data->len;
- memcpy (value, value_data->data, copy_len);
- } else {
- errno = ENODATA;
- op_ret = -1;
- }
- }
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "name %s, status %d, errno %d",
- name, op_ret, errno);
- call_stub_destroy (stub);
- return op_ret;
-}
-
-ssize_t
-glusterfs_fgetxattr (glusterfs_file_t fd,
- const char *name,
- void *value,
- size_t size)
-{
- int32_t op_ret = 0;
- libglusterfs_client_ctx_t *ctx;
- fd_t *__fd = (fd_t *)fd;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "name %s", name);
- if (size < 0) {
- errno = EINVAL;
- op_ret = -1;
- goto out;
- }
-
- if (size == 0)
- goto out;
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No fd context present");
- errno = EBADF;
- op_ret = -1;
- goto out;
- }
-
- ctx = fd_ctx->ctx;
- op_ret = libgf_client_fgetxattr (ctx, __fd, name, value, size);
-out:
- return op_ret;
-}
-
-ssize_t
-glusterfs_listxattr (glusterfs_handle_t handle,
- const char *path,
- char *list,
- size_t size)
-{
- return ENOSYS;
-}
-
-ssize_t
-glusterfs_llistxattr (glusterfs_handle_t handle,
- const char *path,
- char *list,
- size_t size)
-{
- return ENOSYS;
-}
-
-ssize_t
-glusterfs_flistxattr (glusterfs_file_t fd,
- char *list,
- size_t size)
-{
- return ENOSYS;
-}
-
-int
-glusterfs_removexattr (glusterfs_handle_t handle,
- const char *path,
- const char *name)
-{
- return ENOSYS;
-}
-
-int
-glusterfs_lremovexattr (glusterfs_handle_t handle,
- const char *path,
- const char *name)
-{
- return ENOSYS;
-}
-
-int
-glusterfs_fremovexattr (glusterfs_file_t fd,
- const char *name)
-{
- return ENOSYS;
-}
-
-int32_t
-libgf_client_readv_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iovec *vector,
- int32_t count,
- struct iatt *stbuf,
- struct iobref *iobref)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_readv_cbk_stub (frame, NULL, op_ret, op_errno,
- vector, count, stbuf, iobref);
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int
-libgf_client_iobuf_read (libglusterfs_client_ctx_t *ctx, fd_t *fd, void *buf,
- size_t size, off_t offset)
-{
- call_stub_t *stub = NULL;
- struct iovec *vector = NULL;
- int32_t op_ret = -1;
- int count = 0;
- libgf_client_local_t *local = NULL;
- struct iatt *stbuf = NULL;
-
- local = CALLOC (1, sizeof (*local));
- ERR_ABORT (local);
- local->fd = fd;
- LIBGF_CLIENT_FOP (ctx, stub, readv, local, fd, size, offset);
-
- op_ret = stub->args.readv_cbk.op_ret;
- errno = stub->args.readv_cbk.op_errno;
- count = stub->args.readv_cbk.count;
- vector = stub->args.readv_cbk.vector;
- if (op_ret > 0) {
- int i = 0;
- op_ret = 0;
- while (size && (i < count)) {
- int len = (size < vector[i].iov_len) ?
- size : vector[i].iov_len;
- memcpy (buf, vector[i++].iov_base, len);
- buf += len;
- size -= len;
- op_ret += len;
- }
- stbuf = &stub->args.readv_cbk.stbuf;
- libgf_transform_iattr (ctx, fd->inode, stbuf);
- libgf_invalidate_iattr_cache (fd->inode, LIBGF_INVALIDATE_STAT);
- }
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "size %lu, offset %"PRIu64,
- (long unsigned)size, offset);
- call_stub_destroy (stub);
- return op_ret;
-}
-
-int
-libgf_client_read (libglusterfs_client_ctx_t *ctx, fd_t *fd, void *buf,
- size_t size, off_t offset)
-{
- int32_t op_ret = -1;
- int32_t ret = 0;
- size_t tmp = 0;
-
- while (size != 0) {
- tmp = ((size > LIBGF_IOBUF_SIZE) ? LIBGF_IOBUF_SIZE :
- size);
- op_ret = libgf_client_iobuf_read (ctx, fd, buf, tmp, offset);
- if (op_ret < 0) {
- ret = op_ret;
- break;
- }
-
- ret += op_ret;
-
- if (op_ret < tmp)
- break;
-
- size -= op_ret;
- offset += op_ret;
- buf = (char *)buf + op_ret;
- }
-
- return ret;
-}
-
-ssize_t
-glusterfs_read (glusterfs_file_t fd, void *buf, size_t nbytes)
-{
- int32_t op_ret = -1;
- off_t offset = 0;
- libglusterfs_client_ctx_t *ctx = NULL;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
-
- if (nbytes < 0) {
- errno = EINVAL;
- goto out;
- }
-
- if (nbytes == 0) {
- op_ret = 0;
- goto out;
- }
-
- if (fd == 0) {
- errno = EINVAL;
- goto out;
- }
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No fd context present");
- errno = EBADF;
- goto out;
- }
-
- pthread_mutex_lock (&fd_ctx->lock);
- {
- ctx = fd_ctx->ctx;
- offset = fd_ctx->offset;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
-
- op_ret = libgf_client_read (ctx, (fd_t *)fd, buf, nbytes, offset);
-
- if (op_ret > 0) {
- offset += op_ret;
- pthread_mutex_lock (&fd_ctx->lock);
- {
- fd_ctx->offset = offset;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
- }
-
-out:
- return op_ret;
-}
-
-
-ssize_t
-libgf_client_iobuf_readv (libglusterfs_client_ctx_t *ctx, fd_t *fd,
- const struct iovec *dst_vector, int count,
- size_t size, off_t offset, int *idx,
- off_t *vec_offset)
-{
- call_stub_t *stub = NULL;
- struct iovec *src_vector = NULL;
- int32_t op_ret = -1;
- libgf_client_local_t *local = NULL;
- int src = 0, dst = 0;
- int src_count = 0, dst_count = 0;
- int len = 0, src_len = 0, dst_len = 0;
- off_t src_offset = 0, dst_offset = 0;
- struct iatt *stbuf = NULL;
-
- dst = *idx;
- dst_offset = *vec_offset;
-
- local = CALLOC (1, sizeof (*local));
- ERR_ABORT (local);
- local->fd = fd;
- LIBGF_CLIENT_FOP (ctx, stub, readv, local, fd, size, offset);
-
- op_ret = stub->args.readv_cbk.op_ret;
- errno = stub->args.readv_cbk.op_errno;
- src_count = stub->args.readv_cbk.count;
- src_vector = stub->args.readv_cbk.vector;
- if (op_ret > 0) {
- while ((size != 0) && (dst < dst_count) && (src < src_count)) {
- src_len = src_vector[src].iov_len - src_offset;
- dst_len = dst_vector[dst].iov_len - dst_offset;
-
- len = (src_len < dst_len) ? src_len : dst_len;
- if (len > size) {
- len = size;
- }
-
- memcpy (dst_vector[dst].iov_base + dst_offset,
- src_vector[src].iov_base + src_offset, len);
-
- size -= len;
- src_offset += len;
- dst_offset += len;
-
- if (src_offset == src_vector[src].iov_len) {
- src_offset = 0;
- src++;
- }
-
- if (dst_offset == dst_vector[dst].iov_len) {
- dst_offset = 0;
- dst++;
- }
- }
-
- stbuf = &stub->args.readv_cbk.stbuf;
- libgf_transform_iattr (ctx, fd->inode, stbuf);
- libgf_invalidate_iattr_cache (fd->inode, LIBGF_UPDATE_STAT);
- }
-
- *idx = dst;
- *vec_offset = dst_offset;
-
- call_stub_destroy (stub);
- return op_ret;
-}
-
-
-ssize_t
-libgf_client_readv (libglusterfs_client_ctx_t *ctx, fd_t *fd,
- const struct iovec *dst_vector, int dst_count, off_t offset)
-{
- int32_t op_ret = -1;
- size_t size = 0, tmp = 0, ret = 0;
- int i = 0;
- int dst_idx = 0;
- off_t dst_offset = 0;
-
- for (i = 0; i < dst_count; i++)
- {
- size += dst_vector[i].iov_len;
- }
-
- while (size != 0) {
- tmp = ((size > LIBGF_IOBUF_SIZE) ? LIBGF_IOBUF_SIZE : size);
- op_ret = libgf_client_iobuf_readv (ctx, fd, dst_vector,
- dst_count, tmp, offset,
- &dst_idx, &dst_offset);
- if (op_ret <= 0) {
- break;
- }
-
- offset += op_ret;
- size -= op_ret;
- ret += op_ret;
- }
-
- return ret;
-}
-
-
-ssize_t
-glusterfs_readv (glusterfs_file_t fd, const struct iovec *vec, int count)
-{
- int32_t op_ret = -1;
- off_t offset = 0;
- libglusterfs_client_ctx_t *ctx = NULL;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
-
- if (count < 0) {
- errno = EINVAL;
- goto out;
- }
-
- if (count == 0) {
- op_ret = 0;
- goto out;
- }
-
- if (!fd) {
- errno = EINVAL;
- goto out;
- }
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- errno = EBADF;
- goto out;
- }
-
- pthread_mutex_lock (&fd_ctx->lock);
- {
- ctx = fd_ctx->ctx;
- offset = fd_ctx->offset;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
-
- op_ret = libgf_client_readv (ctx, (fd_t *)fd, vec, count, offset);
-
- if (op_ret > 0) {
- offset += op_ret;
- pthread_mutex_lock (&fd_ctx->lock);
- {
- fd_ctx->offset = offset;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
- }
-
-out:
- return op_ret;
-}
-
-
-ssize_t
-glusterfs_pread (glusterfs_file_t fd,
- void *buf,
- size_t count,
- off_t offset)
-{
- int32_t op_ret = -1;
- libglusterfs_client_ctx_t *ctx = NULL;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
-
- if (count < 0) {
- errno = EINVAL;
- goto out;
- }
-
- if (count == 0) {
- op_ret = 0;
- goto out;
- }
-
- if (!fd) {
- errno = EINVAL;
- goto out;
- }
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- errno = EBADF;
- goto out;
- }
-
- ctx = fd_ctx->ctx;
-
- op_ret = libgf_client_read (ctx, (fd_t *)fd, buf, count, offset);
-
-out:
- return op_ret;
-}
-
-
-int
-libgf_client_writev_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_writev_cbk_stub (frame, NULL, op_ret, op_errno,
- prebuf, postbuf);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-
-int
-libgf_client_iobuf_write (libglusterfs_client_ctx_t *ctx, fd_t *fd, char *addr,
- size_t size, off_t offset)
-{
- struct iobref *ioref = NULL;
- struct iobuf *iob = NULL;
- int op_ret = -1;
- struct iovec iov = {0, };
- call_stub_t *stub = NULL;
- libgf_client_local_t *local = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, fd, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, addr, out);
-
- ioref = iobref_new ();
- if (!ioref) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Out of memory");
- goto out;
- }
-
- iob = iobuf_get (ctx->gf_ctx.iobuf_pool);
- if (!iob) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Out of memory");
- goto out;
- }
-
- memcpy (iob->ptr, addr, size);
- iobref_add (ioref, iob);
-
- iov.iov_base = iob->ptr;
- iov.iov_len = size;
-
- LIBGF_CLIENT_FOP (ctx, stub, writev, local, fd, &iov,
- 1, offset, ioref);
-
- op_ret = stub->args.writev_cbk.op_ret;
- errno = stub->args.writev_cbk.op_errno;
-
- /* We need to invalidate because it is possible that write-behind
- * is a translator below us and returns a stat filled with zeroes.
- */
- libgf_invalidate_iattr_cache (fd->inode, LIBGF_INVALIDATE_STAT);
-
-out:
- if (iob) {
- iobuf_unref (iob);
- }
-
- if (ioref) {
- iobref_unref (ioref);
- }
-
- call_stub_destroy (stub);
- return op_ret;
-}
-
-int
-libgf_client_writev (libglusterfs_client_ctx_t *ctx,
- fd_t *fd,
- struct iovec *vector,
- int count,
- off_t offset)
-{
- int op_ret = 0;
- int written = 0;
- int writesize = 0;
- int size = 0;
- char *base = NULL;
- int i = 0;
-
- for (i = 0; i < count; i++) {
- size = vector[i].iov_len;
- base = vector[i].iov_base;
-
- while (size > 0) {
- writesize = (size > LIBGF_IOBUF_SIZE) ?
- LIBGF_IOBUF_SIZE : size;
-
- written = libgf_client_iobuf_write (ctx, fd, base,
- writesize, offset);
-
- if (written == -1)
- goto out;
-
- op_ret += written;
- base += written;
- size -= written;
- offset += written;
- }
- }
-
-out:
- return op_ret;
-}
-
-
-ssize_t
-glusterfs_write (glusterfs_file_t fd,
- const void *buf,
- size_t n)
-{
- int32_t op_ret = -1;
- off_t offset = 0;
- struct iovec vector;
- libglusterfs_client_ctx_t *ctx = NULL;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
-
- if (n < 0) {
- errno = EINVAL;
- goto out;
- }
-
- if (n == 0) {
- op_ret = 0;
- goto out;
- }
-
- if (!fd) {
- errno = EINVAL;
- goto out;
- }
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- errno = EBADF;
- goto out;
- }
-
- ctx = fd_ctx->ctx;
-
- pthread_mutex_lock (&fd_ctx->lock);
- {
- offset = fd_ctx->offset;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
-
- vector.iov_base = (void *)buf;
- vector.iov_len = n;
-
- op_ret = libgf_client_writev (ctx,
- (fd_t *)fd,
- &vector,
- 1,
- offset);
-
- if (op_ret >= 0) {
- offset += op_ret;
- pthread_mutex_lock (&fd_ctx->lock);
- {
- fd_ctx->offset = offset;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
- }
-
-out:
- return op_ret;
-}
-
-ssize_t
-glusterfs_writev (glusterfs_file_t fd,
- const struct iovec *vector,
- int count)
-{
- int32_t op_ret = -1;
- off_t offset = 0;
- libglusterfs_client_ctx_t *ctx = NULL;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
-
- if (count < 0) {
- errno = EINVAL;
- goto out;
- }
-
- if (count == 0) {
- op_ret = 0;
- goto out;
- }
-
- if (!fd) {
- errno = EINVAL;
- goto out;
- }
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- errno = EBADF;
- goto out;
- }
-
- ctx = fd_ctx->ctx;
-
- pthread_mutex_lock (&fd_ctx->lock);
- {
- offset = fd_ctx->offset;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
-
-
- op_ret = libgf_client_writev (ctx,
- (fd_t *)fd,
- (struct iovec *)vector,
- count,
- offset);
-
- if (op_ret >= 0) {
- offset += op_ret;
- pthread_mutex_lock (&fd_ctx->lock);
- {
- fd_ctx->offset = offset;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
- }
-
-out:
- return op_ret;
-}
-
-
-ssize_t
-glusterfs_pwrite (glusterfs_file_t fd,
- const void *buf,
- size_t count,
- off_t offset)
-{
- int32_t op_ret = -1;
- struct iovec vector;
- libglusterfs_client_ctx_t *ctx = NULL;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
-
- if (count < 0) {
- errno = EINVAL;
- goto out;
- }
-
- if (count == 0) {
- op_ret = 0;
- goto out;
- }
-
- if (!fd) {
- errno = EINVAL;
- goto out;
- }
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- errno = EBADF;
- goto out;
- }
-
- ctx = fd_ctx->ctx;
-
- vector.iov_base = (void *)buf;
- vector.iov_len = count;
-
- op_ret = libgf_client_writev (ctx,
- (fd_t *)fd,
- &vector,
- 1,
- offset);
-
-out:
- return op_ret;
-}
-
-
-int32_t
-libgf_client_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- gf_dirent_t *entries)
-{
- libgf_client_local_t *local = frame->local;
-
- /* Note, we dont let entries reach the stub because there it gets copied
- * while we can simply delink the entries here and link them into our
- * dcache, thereby avoiding the need to perform more allocations and
- * copies.
- */
- local->reply_stub = fop_readdirp_cbk_stub (frame, NULL, op_ret,
- op_errno, NULL);
- if (op_ret > 0)
- libgf_dcache_update (frame->root->state, local->fd, entries);
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int
-libgf_client_readdir (libglusterfs_client_ctx_t *ctx, fd_t *fd,
- struct dirent *dirp, off_t *offset)
-{
- call_stub_t *stub = NULL;
- int op_ret = -1;
- libgf_client_local_t *local = NULL;
-
- if (libgf_dcache_readdir (ctx, fd, dirp, offset))
- return 1;
- local = CALLOC (1, sizeof (*local));
- ERR_ABORT (local);
- local->fd = fd;
- LIBGF_CLIENT_FOP (ctx, stub, readdirp, local, fd,
- LIBGF_READDIR_BLOCK, *offset);
-
- errno = stub->args.readdir_cbk.op_errno;
-
- op_ret = libgf_dcache_readdir (ctx, fd, dirp, offset);
- call_stub_destroy (stub);
- return op_ret;
-}
-
-
-int
-glusterfs_readdir_r (glusterfs_dir_t dirfd, struct dirent *entry,
- struct dirent **result)
-{
- int op_ret = -1;
- libglusterfs_client_ctx_t *ctx = NULL;
- off_t offset = 0;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- struct dirent *dirp = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, entry, out);
-
- fd_ctx = libgf_get_fd_ctx (dirfd);
- if (!fd_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "fd context not present");
- errno = EBADF;
- goto out;
- }
-
- pthread_mutex_lock (&fd_ctx->lock);
- {
- ctx = fd_ctx->ctx;
- offset = fd_ctx->offset;
- dirp = &fd_ctx->dirp;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "offset %"PRIu64, offset);
- memset (dirp, 0, sizeof (struct dirent));
- op_ret = libgf_client_readdir (ctx, (fd_t *)dirfd, dirp,
- &offset);
- if (op_ret <= 0) {
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "readdir failed:"
- " %s", strerror (errno));
- if (result && (op_ret == 0)) {
- *result = NULL;
- } else if (op_ret < 0){
- op_ret = errno;
- }
- goto unlock;
- }
-
- fd_ctx->offset = offset;
-
- if (result) {
- *result = memcpy (entry, dirp, sizeof (*entry));
- } else {
- memcpy (entry, dirp, sizeof (*entry));
- }
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "new offset %"PRIu64", "
- " entry %s", offset, entry->d_name);
- op_ret = 0;
- }
-unlock:
- pthread_mutex_unlock (&fd_ctx->lock);
-
-out:
- return op_ret;
-}
-
-
-void *
-glusterfs_readdir (glusterfs_dir_t dirfd)
-{
- int op_ret = -1;
- libglusterfs_client_ctx_t *ctx = NULL;
- off_t offset = 0;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- struct dirent *dirp = NULL;
-
- fd_ctx = libgf_get_fd_ctx (dirfd);
- if (!fd_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "fd context not present");
- errno = EBADF;
- goto out;
- }
-
- pthread_mutex_lock (&fd_ctx->lock);
- {
- ctx = fd_ctx->ctx;
- offset = fd_ctx->offset;
- dirp = &fd_ctx->dirp;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "offset %"PRIu64, offset);
- memset (dirp, 0, sizeof (struct dirent));
- op_ret = libgf_client_readdir (ctx, (fd_t *)dirfd, dirp, &offset);
-
- if (op_ret <= 0) {
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "readdir failed: %s",
- strerror (errno));
- dirp = NULL;
- goto out;
- }
-
- pthread_mutex_lock (&fd_ctx->lock);
- {
- fd_ctx->offset = offset;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "new offset %"PRIu64", entry %s",
- offset, dirp->d_name);
-out:
- return dirp;
-}
-
-
-int
-glusterfs_getdents (glusterfs_file_t fd, struct dirent *dirp,
- unsigned int count)
-{
- int op_ret = -1;
- libglusterfs_client_ctx_t *ctx = NULL;
- off_t offset = 0;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- errno = EBADF;
- goto out;
- }
-
- pthread_mutex_lock (&fd_ctx->lock);
- {
- ctx = fd_ctx->ctx;
- offset = fd_ctx->offset;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
-
- op_ret = libgf_client_readdir (ctx, (fd_t *)fd, dirp, &offset);
-
- if (op_ret > 0) {
- pthread_mutex_lock (&fd_ctx->lock);
- {
- fd_ctx->offset = offset;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
- }
-
-out:
- return op_ret;
-}
-
-
-static int32_t
-libglusterfs_readv_async_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iovec *vector,
- int32_t count,
- struct iatt *stbuf,
- struct iobref *iobref)
-{
- glusterfs_iobuf_t *buf;
- libglusterfs_client_async_local_t *local = frame->local;
- fd_t *__fd = local->fop.readv_cbk.fd;
- glusterfs_readv_cbk_t readv_cbk = local->fop.readv_cbk.cbk;
-
- buf = CALLOC (1, sizeof (*buf));
- ERR_ABORT (buf);
-
- if (vector) {
- buf->vector = iov_dup (vector, count);
- }
-
- buf->count = count;
-
- if (iobref) {
- buf->iobref = iobref_ref (iobref);
- }
-
- if (op_ret > 0) {
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- fd_ctx = libgf_get_fd_ctx (__fd);
-
- /* update offset only if we have used offset stored in fd_ctx */
- if (local->fop.readv_cbk.update_offset) {
- pthread_mutex_lock (&fd_ctx->lock);
- {
- fd_ctx->offset += op_ret;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
- }
- }
-
- readv_cbk (op_ret, op_errno, buf, local->cbk_data);
-
- FREE (local);
- frame->local = NULL;
- STACK_DESTROY (frame->root);
-
- return 0;
-}
-
-void
-glusterfs_free (glusterfs_iobuf_t *buf)
-{
- //iov_free (buf->vector, buf->count);
- FREE (buf->vector);
- if (buf->iobref)
- iobref_unref ((struct iobref *) buf->iobref);
- if (buf->dictref)
- dict_unref ((dict_t *) buf->dictref);
- FREE (buf);
-}
-
-int
-glusterfs_read_async (glusterfs_file_t fd,
- size_t nbytes,
- off_t offset,
- glusterfs_readv_cbk_t readv_cbk,
- void *cbk_data)
-{
- libglusterfs_client_ctx_t *ctx;
- fd_t *__fd = (fd_t *)fd;
- libglusterfs_client_async_local_t *local = NULL;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- int32_t op_ret = 0;
-
- if (nbytes < 0) {
- errno = EINVAL;
- op_ret = -1;
- goto out;
- }
-
- if (nbytes == 0) {
- op_ret = 0;
- goto out;
- }
-
- local = CALLOC (1, sizeof (*local));
- ERR_ABORT (local);
- local->fop.readv_cbk.fd = __fd;
- local->fop.readv_cbk.cbk = readv_cbk;
- local->cbk_data = cbk_data;
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- errno = EBADF;
- op_ret = -1;
- goto out;
- }
-
- ctx = fd_ctx->ctx;
-
- if (offset < 0) {
- pthread_mutex_lock (&fd_ctx->lock);
- {
- offset = fd_ctx->offset;
- local->fop.readv_cbk.update_offset = 1;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
- }
-
- LIBGF_CLIENT_FOP_ASYNC (ctx,
- local,
- libglusterfs_readv_async_cbk,
- readv,
- __fd,
- nbytes,
- offset);
-
-out:
- return op_ret;
-}
-
-static int32_t
-libglusterfs_writev_async_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf)
-{
- libglusterfs_client_async_local_t *local = frame->local;
- fd_t *fd = NULL;
- glusterfs_write_cbk_t write_cbk;
-
- write_cbk = local->fop.write_cbk.cbk;
- fd = local->fop.write_cbk.fd;
-
- if (op_ret > 0) {
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- fd_ctx = libgf_get_fd_ctx (fd);
- pthread_mutex_lock (&fd_ctx->lock);
- {
- fd_ctx->offset += op_ret;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
- }
-
- write_cbk (op_ret, op_errno, local->cbk_data);
-
- STACK_DESTROY (frame->root);
- return 0;
-}
-
-int32_t
-glusterfs_write_async (glusterfs_file_t fd,
- const void *buf,
- size_t nbytes,
- off_t offset,
- glusterfs_write_cbk_t write_cbk,
- void *cbk_data)
-{
- fd_t *__fd = (fd_t *)fd;
- struct iovec vector;
- off_t __offset = offset;
- libglusterfs_client_ctx_t *ctx = NULL;
- libglusterfs_client_async_local_t *local = NULL;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- int32_t op_ret = 0;
- struct iobref *iobref = NULL;
-
- if (nbytes == 0) {
- op_ret = 0;
- goto out;
- }
-
- if (nbytes < 0) {
- op_ret = -1;
- errno = EINVAL;
- goto out;
- }
-
- local = CALLOC (1, sizeof (*local));
- ERR_ABORT (local);
- local->fop.write_cbk.fd = __fd;
- local->fop.write_cbk.cbk = write_cbk;
- local->cbk_data = cbk_data;
-
- vector.iov_base = (void *)buf;
- vector.iov_len = nbytes;
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- errno = EBADF;
- op_ret = -1;
- goto out;
- }
-
- ctx = fd_ctx->ctx;
-
- if (offset < 0) {
- pthread_mutex_lock (&fd_ctx->lock);
- {
- __offset = fd_ctx->offset;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
- }
-
- iobref = iobref_new ();
- LIBGF_CLIENT_FOP_ASYNC (ctx,
- local,
- libglusterfs_writev_async_cbk,
- writev,
- __fd,
- &vector,
- 1,
- __offset,
- iobref);
- iobref_unref (iobref);
-
-out:
- return op_ret;
-}
-
-off_t
-glusterfs_lseek (glusterfs_file_t fd, off_t offset, int whence)
-{
- off_t __offset = 0;
- int32_t op_ret = -1;
- fd_t *__fd = (fd_t *)fd;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- libglusterfs_client_ctx_t *ctx = NULL;
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- errno = EBADF;
- __offset = -1;
- goto out;
- }
-
- ctx = fd_ctx->ctx;
-
- switch (whence)
- {
- case SEEK_SET:
- __offset = offset;
- break;
-
- case SEEK_CUR:
- pthread_mutex_lock (&fd_ctx->lock);
- {
- __offset = fd_ctx->offset;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
-
- __offset += offset;
- break;
-
- case SEEK_END:
- {
- char cache_valid = 0;
- off_t end = 0;
- loc_t loc = {0, };
- struct iatt stbuf = {0, };
-
- cache_valid = libgf_is_iattr_cache_valid (ctx, __fd->inode,
- &stbuf,
- LIBGF_VALIDATE_STAT);
- if (cache_valid) {
- end = stbuf.ia_size;
- } else {
- op_ret = libgf_client_loc_fill (&loc, ctx,
- __fd->inode->ino, 0,
- NULL);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, returning EINVAL");
- errno = EINVAL;
- libgf_client_loc_wipe (&loc);
- __offset = -1;
- goto out;
- }
-
- op_ret = libgf_client_lookup (ctx, &loc, &stbuf, NULL,
- NULL);
- if (op_ret < 0) {
- __offset = -1;
- libgf_client_loc_wipe (&loc);
- goto out;
- }
-
- end = stbuf.ia_size;
- }
-
- __offset = end + offset;
- libgf_client_loc_wipe (&loc);
- }
- break;
-
- default:
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "invalid value for whence");
- __offset = -1;
- errno = EINVAL;
- goto out;
- }
-
- pthread_mutex_lock (&fd_ctx->lock);
- {
- fd_ctx->offset = __offset;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
-
-out:
- return __offset;
-}
-
-
-int32_t
-libgf_client_stat_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_stat_cbk_stub (frame,
- NULL,
- op_ret,
- op_errno,
- buf);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int32_t
-libgf_client_stat (libglusterfs_client_ctx_t *ctx,
- loc_t *loc,
- struct iatt *stbuf)
-{
- call_stub_t *stub = NULL;
- int32_t op_ret = 0;
- libgf_client_local_t *local = NULL;
- struct iatt cachedbuf = {0, };
-
- if (libgf_is_iattr_cache_valid (ctx, loc->inode, &cachedbuf,
- LIBGF_VALIDATE_STAT)) {
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Cache will be used");
- if (stbuf)
- memcpy (stbuf, &cachedbuf, sizeof (struct stat));
- goto out;
- }
-
- LIBGF_CLIENT_FOP (ctx, stub, stat, local, loc);
-
- op_ret = stub->args.stat_cbk.op_ret;
- errno = stub->args.stat_cbk.op_errno;
- libgf_transform_iattr (ctx, loc->inode, &stub->args.stat_cbk.buf);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, status %d, errno %d",
- loc->path, op_ret, errno);
-
- if (op_ret == 0) {
- if (stbuf)
- *stbuf = stub->args.stat_cbk.buf;
-
- libgf_update_iattr_cache (loc->inode, LIBGF_UPDATE_STAT,
- &stub->args.stat_cbk.buf);
- }
-
- call_stub_destroy (stub);
-
-out:
- return op_ret;
-}
-
-int
-libgf_realpath_loc_fill (libglusterfs_client_ctx_t *ctx, char *link,
- loc_t *targetloc)
-{
- int op_ret = -1;
- char *target = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, link, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, targetloc, out);
-
- targetloc->path = glusterfs_glh_realpath (ctx, link, NULL);
-
- if (targetloc->path == NULL)
- goto out;
-
- op_ret = libgf_client_path_lookup (targetloc, ctx, 1);
- if (op_ret == -1)
- goto out;
-
- target = strdup (targetloc->path);
- op_ret = libgf_client_loc_fill (targetloc, ctx, 0,
- targetloc->parent->ino,
- basename (target));
- if (op_ret == -1) {
- errno = EINVAL;
- goto out;
- }
-
-out:
- if (target)
- FREE (target);
-
- return op_ret;
-}
-
-#define LIBGF_DO_LSTAT 0x01
-#define LIBGF_DO_STAT 0x02
-
-int
-__glusterfs_stat (glusterfs_handle_t handle, const char *path,
- struct stat *buf, int whichstat)
-{
- int32_t op_ret = -1;
- loc_t loc = {0, };
- libglusterfs_client_ctx_t *ctx = handle;
- char *name = NULL, *pathname = NULL;
- loc_t targetloc = {0, };
- loc_t *real_loc = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, op: %d", path,
- whichstat);
-
- loc.path = strdup (path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for (%s)", loc.path);
- goto out;
- }
-
- pathname = strdup (loc.path);
- name = basename (pathname);
-
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino, name);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, returning EINVAL");
- errno = EINVAL;
- goto out;
- }
- real_loc = &loc;
- /* The stat fop in glusterfs calls lstat. So we have to
- * provide the POSIX compatible stat fop. To do so, we need to ensure
- * that if the @path is a symlink, we must perform a stat on the
- * target of that symlink than the symlink itself(..because if
- * do a stat on the symlink, we're actually doing what lstat
- * should do. See posix_stat
- */
- if (whichstat & LIBGF_DO_LSTAT)
- goto lstat_fop;
-
- if (!IA_ISLNK (loc.inode->ia_type))
- goto lstat_fop;
-
- op_ret = libgf_realpath_loc_fill (ctx, (char *)loc.path, &targetloc);
- if (op_ret == -1)
- goto out;
- real_loc = &targetloc;
-
-lstat_fop:
-
- if (!op_ret) {
- struct iatt iatt;
- op_ret = libgf_client_stat (ctx, real_loc, &iatt);
- iatt_to_stat (&iatt, buf);
- }
-
-out:
- if (pathname) {
- FREE (pathname);
- }
-
- libgf_client_loc_wipe (&loc);
- libgf_client_loc_wipe (&targetloc);
-
- return op_ret;
-}
-
-int
-glusterfs_glh_stat (glusterfs_handle_t handle, const char *path,
- struct stat *buf)
-{
- return __glusterfs_stat (handle, path, buf, LIBGF_DO_STAT);
-}
-
-int
-glusterfs_stat (const char *path, struct stat *buf)
-{
- glusterfs_handle_t h = NULL;
- int op_ret = -1;
- char vpath[PATH_MAX];
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, buf, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_stat (h, vpath, buf);
-
-out:
- return op_ret;
-}
-
-int
-glusterfs_glh_lstat (glusterfs_handle_t handle, const char *path, struct stat *buf)
-{
- return __glusterfs_stat (handle, path, buf, LIBGF_DO_LSTAT);
-}
-
-int
-glusterfs_lstat (const char *path, struct stat *buf)
-{
- glusterfs_handle_t h = NULL;
- int op_ret = -1;
- char vpath[PATH_MAX];
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, buf, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_lstat (h, vpath, buf);
-out:
- return op_ret;
-}
-
-static int32_t
-libgf_client_fstat_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_fstat_cbk_stub (frame,
- NULL,
- op_ret,
- op_errno,
- buf);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-
-}
-
-int32_t
-libgf_client_fstat (libglusterfs_client_ctx_t *ctx,
- fd_t *fd,
- struct stat *buf)
-{
- call_stub_t *stub = NULL;
- int32_t op_ret = 0;
- libgf_client_local_t *local = NULL;
- struct iatt cachedbuf = {0, };
-
- if (libgf_is_iattr_cache_valid (ctx, fd->inode, &cachedbuf,
- LIBGF_VALIDATE_STAT)) {
- if (buf)
- memcpy (buf, &cachedbuf, sizeof (struct stat));
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Cache will be used");
- goto out;
- }
-
- LIBGF_CLIENT_FOP (ctx, stub, fstat, local, fd);
-
- op_ret = stub->args.fstat_cbk.op_ret;
- errno = stub->args.fstat_cbk.op_errno;
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "status %d, errno %d", op_ret,
- errno);
-
- if (op_ret == 0) {
- libgf_transform_iattr (ctx, fd->inode,
- &stub->args.fstat_cbk.buf);
- if (buf)
- iatt_to_stat (&stub->args.fstat_cbk.buf, buf);
- libgf_update_iattr_cache (fd->inode, LIBGF_UPDATE_STAT,
- &stub->args.fstat_cbk.buf);
- }
- call_stub_destroy (stub);
-
-out:
- return op_ret;
-}
-
-int32_t
-glusterfs_fstat (glusterfs_file_t fd, struct stat *buf)
-{
- libglusterfs_client_ctx_t *ctx;
- fd_t *__fd = (fd_t *)fd;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- int32_t op_ret = -1;
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No fd context present");
- errno = EBADF;
- op_ret = -1;
- goto out;
- }
-
- ctx = fd_ctx->ctx;
-
- op_ret = libgf_client_fstat (ctx, __fd, buf);
-
-out:
- return op_ret;
-}
-
-
-static int32_t
-libgf_client_mkdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_mkdir_cbk_stub (frame, NULL, op_ret, op_errno,
- inode, buf, preparent,
- postparent);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-
-static int32_t
-libgf_client_mkdir (libglusterfs_client_ctx_t *ctx,
- loc_t *loc,
- mode_t mode)
-{
- int32_t op_ret = -1;
- call_stub_t *stub = NULL;
- libgf_client_local_t *local = NULL;
- inode_t *libgf_inode = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, mkdir, local, loc, mode);
- op_ret = stub->args.mkdir_cbk.op_ret;
- errno = stub->args.mkdir_cbk.op_errno;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, status %d, errno %d",
- loc->path, op_ret, errno);
- if (op_ret == -1)
- goto out;
-
- libgf_inode = stub->args.mkdir_cbk.inode;
- inode_link (libgf_inode, loc->parent, loc->name,
- &stub->args.mkdir_cbk.buf);
- libgf_transform_iattr (ctx, libgf_inode, &stub->args.mkdir_cbk.buf);
-
- inode_lookup (libgf_inode);
-
- libgf_alloc_inode_ctx (ctx, libgf_inode);
- libgf_update_iattr_cache (libgf_inode, LIBGF_UPDATE_ALL,
- &stub->args.mkdir_cbk.buf);
-
-out:
- call_stub_destroy (stub);
-
- return op_ret;
-}
-
-
-int32_t
-glusterfs_glh_mkdir (glusterfs_handle_t handle, const char *path, mode_t mode)
-{
- libglusterfs_client_ctx_t *ctx = handle;
- loc_t loc = {0, };
- char *pathname = NULL, *name = NULL;
- int32_t op_ret = -1;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- loc.path = strdup (path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == 0) {
- op_ret = -1;
- errno = EEXIST;
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 0);
- if (op_ret == -1) {
- errno = ENOENT;
- goto out;
- }
-
- pathname = strdup (loc.path);
- name = basename (pathname);
-
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino, name);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- loc.inode = inode_new (ctx->itable);
- op_ret = libgf_client_mkdir (ctx, &loc, mode);
- if (op_ret == -1) {
- goto out;
- }
-
-out:
- libgf_client_loc_wipe (&loc);
- if (pathname) {
- free (pathname);
- pathname = NULL;
- }
-
- return op_ret;
-}
-
-int32_t
-glusterfs_mkdir (const char *path, mode_t mode)
-{
- int op_ret = -1;
- char vpath[PATH_MAX];
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_mkdir (h, vpath, mode);
-out:
- return op_ret;
-}
-
-static int32_t
-libgf_client_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,struct iatt *preparent,
- struct iatt *postparent)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_rmdir_cbk_stub (frame, NULL, op_ret, op_errno,
- preparent, postparent);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-static int32_t
-libgf_client_rmdir (libglusterfs_client_ctx_t *ctx, loc_t *loc)
-{
- int32_t op_ret = -1;
- call_stub_t *stub = NULL;
- libgf_client_local_t *local = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, rmdir, local, loc);
-
- op_ret = stub->args.rmdir_cbk.op_ret;
- errno = stub->args.rmdir_cbk.op_errno;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, status %d, errno %d",
- loc->path, op_ret, errno);
- if (stub->args.rmdir_cbk.op_ret != 0)
- goto out;
-
- inode_unlink (loc->inode, loc->parent, loc->name);
-
-out:
- call_stub_destroy (stub);
-
- return op_ret;
-}
-
-int32_t
-glusterfs_glh_rmdir (glusterfs_handle_t handle, const char *path)
-{
- libglusterfs_client_ctx_t *ctx = handle;
- loc_t loc = {0, };
- char *pathname = NULL, *name = NULL;
- int32_t op_ret = -1;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- loc.path = libgf_resolve_path_light ((char *)path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Path compaction failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for (%s)", loc.path);
- goto out;
- }
-
- pathname = strdup (loc.path);
- name = basename (pathname);
-
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino, name);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- op_ret = libgf_client_rmdir (ctx, &loc);
- if (op_ret == -1) {
- goto out;
- }
-
-out:
- libgf_client_loc_wipe (&loc);
-
- if (pathname) {
- free (pathname);
- pathname = NULL;
- }
-
- return op_ret;
-}
-
-int32_t
-glusterfs_rmdir (const char *path)
-{
- int op_ret = -1;
- char vpath[PATH_MAX];
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_rmdir (h, vpath);
-out:
- return op_ret;
-}
-
-int
-libgf_client_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_setattr_cbk_stub (frame, NULL,
- op_ret, op_errno,
- preop, postop);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int
-libgf_client_setattr (libglusterfs_client_ctx_t *ctx, loc_t * loc,
- struct iatt *stbuf, int32_t valid)
-{
- int op_ret = -1;
- libgf_client_local_t *local = NULL;
- call_stub_t *stub = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, setattr, local, loc,
- stbuf, valid);
-
- op_ret = stub->args.setattr_cbk.op_ret;
- errno = stub->args.setattr_cbk.op_errno;
-
- if (op_ret == -1)
- goto out;
-
- libgf_transform_iattr (ctx, loc->inode,
- &stub->args.setattr_cbk.statpost);
- libgf_update_iattr_cache (loc->inode, LIBGF_UPDATE_STAT,
- &stub->args.setattr_cbk.statpost);
-out:
- call_stub_destroy (stub);
- return op_ret;
-}
-
-
-int
-glusterfs_glh_chmod (glusterfs_handle_t handle, const char *path, mode_t mode)
-{
- int op_ret = -1;
- libglusterfs_client_ctx_t *ctx = handle;
- loc_t loc = {0, };
- char *name = NULL;
- struct iatt stbuf = {0,};
- int32_t valid = 0;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- stbuf.ia_prot = ia_prot_from_st_mode (mode);
- valid |= GF_SET_ATTR_MODE;
-
- loc.path = strdup (path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == -1)
- goto out;
-
- name = strdup (loc.path);
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino,
- basename (name));
- if (op_ret == -1) {
- errno = EINVAL;
- goto out;
- }
-
- op_ret = libgf_client_setattr (ctx, &loc, &stbuf, valid);
-
-out:
- if (name)
- FREE (name);
-
- libgf_client_loc_wipe (&loc);
- return op_ret;
-}
-
-int
-glusterfs_chmod (const char *path, mode_t mode)
-{
- int op_ret = -1;
- char vpath[PATH_MAX];
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_chmod (h, vpath, mode);
-out:
- return op_ret;
-}
-
-
-#define LIBGF_DO_CHOWN 1
-#define LIBGF_DO_LCHOWN 2
-
-int
-__glusterfs_chown (glusterfs_handle_t handle, const char *path, uid_t owner,
- gid_t group, int whichop)
-{
- int op_ret = -1;
- libglusterfs_client_ctx_t *ctx = handle;
- loc_t loc = {0, };
- char *name = NULL;
- loc_t *oploc = NULL;
- loc_t targetloc = {0, };
- struct iatt stbuf = {0,};
- int32_t valid = 0;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, op %d", path, whichop);
- stbuf.ia_uid = owner;
- stbuf.ia_gid = group;
- valid |= (GF_SET_ATTR_UID | GF_SET_ATTR_GID);
-
- loc.path = strdup (path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == -1)
- goto out;
-
- name = strdup (loc.path);
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino,
- basename ((char *)name));
- if (op_ret == -1) {
- errno = EINVAL;
- goto out;
- }
-
- oploc = &loc;
- if (whichop == LIBGF_DO_LCHOWN)
- goto do_lchown;
-
- if (!IA_ISLNK (loc.inode->ia_type))
- goto do_lchown;
-
- op_ret = libgf_realpath_loc_fill (ctx, (char *)loc.path, &targetloc);
- if (op_ret == -1)
- goto out;
-
- oploc = &targetloc;
-do_lchown:
- op_ret = libgf_client_setattr (ctx, oploc, &stbuf, valid);
-out:
- if (name)
- FREE (name);
- libgf_client_loc_wipe (&loc);
- libgf_client_loc_wipe (&targetloc);
- return op_ret;
-}
-
-int
-glusterfs_glh_chown (glusterfs_handle_t handle, const char *path, uid_t owner,
- gid_t group)
-{
- return __glusterfs_chown (handle, path, owner, group, LIBGF_DO_CHOWN);
-}
-
-int
-glusterfs_chown (const char *path, uid_t owner, gid_t group)
-{
- int op_ret = -1;
- char vpath[PATH_MAX];
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_chown (h, vpath, owner, group);
-
-out:
- return op_ret;
-}
-
-int
-glusterfs_glh_lchown (glusterfs_handle_t handle, const char *path, uid_t owner,
- gid_t group)
-{
- return __glusterfs_chown (handle, path, owner, group, LIBGF_DO_LCHOWN);
-}
-
-int
-glusterfs_lchown (const char *path, uid_t owner, gid_t group)
-{
- int op_ret = -1;
- char vpath[PATH_MAX];
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_lchown (h, vpath, owner, group);
-out:
- return op_ret;
-}
-
-glusterfs_dir_t
-glusterfs_glh_opendir (glusterfs_handle_t handle, const char *path)
-{
- int op_ret = -1;
- libglusterfs_client_ctx_t *ctx = handle;
- loc_t loc = {0, };
- fd_t *dirfd = NULL;
- char *name = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- loc.path = strdup (path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
-
- if (op_ret == -1)
- goto out;
-
- name = strdup (loc.path);
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino,
- basename (name));
- if (op_ret == -1) {
- errno = EINVAL;
- goto out;
- }
-
- if (!IA_ISDIR (loc.inode->ia_type) && !IA_ISLNK (loc.inode->ia_type)) {
- errno = ENOTDIR;
- op_ret = -1;
- goto out;
- }
-
- dirfd = fd_create (loc.inode, ctx->pid);
- op_ret = libgf_client_opendir (ctx, &loc, dirfd);
-
- if (op_ret == -1) {
- fd_unref (dirfd);
- dirfd = NULL;
- goto out;
- }
-
- if (!libgf_get_fd_ctx (dirfd)) {
- if (!(libgf_alloc_fd_ctx (ctx, dirfd, (char *)path))) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Context "
- "allocation failed");
- op_ret = -1;
- errno = EINVAL;
- goto out;
- }
- }
-
-out:
- if (name)
- FREE (name);
-
- if (op_ret == -1) {
- fd_unref (dirfd);
- dirfd = NULL;
- }
-
- libgf_client_loc_wipe (&loc);
- return dirfd;
-}
-
-glusterfs_dir_t
-glusterfs_opendir (const char *path)
-{
- char vpath[PATH_MAX];
- glusterfs_dir_t dir = NULL;
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- dir = glusterfs_glh_opendir (h, vpath);
-out:
- return dir;
-}
-
-int
-glusterfs_closedir (glusterfs_dir_t dirfd)
-{
- int op_ret = -1;
- libglusterfs_client_fd_ctx_t *fdctx = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, dirfd, out);
- fdctx = libgf_get_fd_ctx (dirfd);
-
- if (fdctx == NULL) {
- errno = EBADF;
- op_ret = -1;
- goto out;
- }
-
- op_ret = libgf_client_flush (fdctx->ctx, (fd_t *)dirfd);
- fd_unref ((fd_t *)dirfd);
-
-out:
- return op_ret;
-}
-
-
-int
-libgf_client_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_fsetattr_cbk_stub (frame, NULL,
- op_ret, op_errno,
- preop, postop);
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-
-int
-libgf_client_fsetattr (libglusterfs_client_ctx_t *ctx, fd_t *fd,
- struct iatt *stbuf, int32_t valid)
-{
- int op_ret = -1;
- libgf_client_local_t *local = NULL;
- call_stub_t *stub = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, fsetattr, local, fd, stbuf, valid);
-
- op_ret = stub->args.fsetattr_cbk.op_ret;
- errno = stub->args.fsetattr_cbk.op_errno;
-
- if (op_ret == -1)
- goto out;
-
- libgf_transform_iattr (ctx, fd->inode,
- &stub->args.fsetattr_cbk.statpost);
- libgf_update_iattr_cache (fd->inode, LIBGF_UPDATE_STAT,
- &stub->args.fsetattr_cbk.statpost);
-out:
- call_stub_destroy (stub);
- return op_ret;
-}
-
-
-int
-glusterfs_fchmod (glusterfs_file_t fd, mode_t mode)
-{
- libglusterfs_client_fd_ctx_t *fdctx = NULL;
- int op_ret = -1;
- struct iatt stbuf = {0,};
- int32_t valid = 0;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, fd, out);
- fdctx = libgf_get_fd_ctx (fd);
-
- if (!fdctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "No fd context present");
- errno = EBADF;
- goto out;
- }
-
- stbuf.ia_prot = ia_prot_from_st_mode (mode);
- valid |= GF_SET_ATTR_MODE;
-
- op_ret = libgf_client_fsetattr (fdctx->ctx, fd, &stbuf, valid);
-out:
- return op_ret;
-}
-
-
-int
-glusterfs_fchown (glusterfs_file_t fd, uid_t uid, gid_t gid)
-{
- int op_ret = -1;
- libglusterfs_client_fd_ctx_t *fdctx = NULL;
- struct iatt stbuf = {0,};
- int32_t valid = 0;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, fd, out);
-
- fdctx = libgf_get_fd_ctx (fd);
- if (!fd) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No fd context present");
- errno = EBADF;
- goto out;
- }
- stbuf.ia_uid = uid;
- stbuf.ia_gid = gid;
-
- valid |= (GF_SET_ATTR_UID | GF_SET_ATTR_GID);
-
- op_ret = libgf_client_fsetattr (fdctx->ctx, fd, &stbuf, valid);
-
-out:
- return op_ret;
-}
-
-int
-libgf_client_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *xlator,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_fsync_cbk_stub (frame, NULL, op_ret, op_errno,
- prebuf, postbuf);
-
- LIBGF_REPLY_NOTIFY (local);
-
- return 0;
-}
-
-int
-libgf_client_fsync (libglusterfs_client_ctx_t *ctx, fd_t *fd)
-{
- libgf_client_local_t *local = NULL;
- call_stub_t *stub = NULL;
- int op_ret = -1;
-
- LIBGF_CLIENT_FOP (ctx, stub, fsync, local, fd, 0);
-
- op_ret = stub->args.fsync_cbk.op_ret;
- errno = stub->args.fsync_cbk.op_errno;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "status %d, errno %d", op_ret,
- errno);
- call_stub_destroy (stub);
-
- return op_ret;
-}
-
-int
-glusterfs_fsync (glusterfs_file_t *fd)
-{
- libglusterfs_client_fd_ctx_t *fdctx = NULL;
- int op_ret = -1;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, fd, out);
-
- fdctx = libgf_get_fd_ctx ((fd_t *)fd);
- if (!fdctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No fd context present");
- errno = EBADF;
- goto out;
- }
-
- op_ret = libgf_client_fsync (fdctx->ctx, (fd_t *)fd);
-
-out:
- return op_ret;
-}
-
-int
-libgf_client_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *xlator
- ,int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_ftruncate_cbk_stub (frame, NULL, op_ret,
- op_errno, prebuf, postbuf);
-
- LIBGF_REPLY_NOTIFY (local);
-
- return 0;
-}
-
-int
-libgf_client_ftruncate (libglusterfs_client_ctx_t *ctx, fd_t *fd,
- off_t length)
-{
- libgf_client_local_t *local = NULL;
- call_stub_t *stub = NULL;
- int op_ret = -1;
- libglusterfs_client_fd_ctx_t *fdctx = NULL;
-
- if (!(((fd->flags & O_ACCMODE) == O_RDWR)
- || ((fd->flags & O_ACCMODE) == O_WRONLY))) {
- errno = EBADF;
- goto out;
- }
-
- LIBGF_CLIENT_FOP (ctx, stub, ftruncate, local, fd, length);
-
- op_ret = stub->args.ftruncate_cbk.op_ret;
- errno = stub->args.ftruncate_cbk.op_errno;
-
- if (op_ret == -1)
- goto out;
-
- libgf_transform_iattr (ctx, fd->inode,
- &stub->args.ftruncate_cbk.postbuf);
- libgf_update_iattr_cache (fd->inode, LIBGF_UPDATE_STAT,
- &stub->args.ftruncate_cbk.postbuf);
-
- fdctx = libgf_get_fd_ctx (fd);
- if (!fd) {
- errno = EINVAL;
- op_ret = -1;
- goto out;
- }
-
- pthread_mutex_lock (&fdctx->lock);
- {
- fdctx->offset = stub->args.ftruncate_cbk.postbuf.ia_size;
- }
- pthread_mutex_unlock (&fdctx->lock);
-
-out:
- call_stub_destroy (stub);
- return op_ret;
-}
-
-int
-glusterfs_ftruncate (glusterfs_file_t fd, off_t length)
-{
- libglusterfs_client_fd_ctx_t *fdctx = NULL;
- int op_ret = -1;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, fd, out);
-
- fdctx = libgf_get_fd_ctx (fd);
- if (!fdctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No fd context present");
- errno = EBADF;
- goto out;
- }
-
- op_ret = libgf_client_ftruncate (fdctx->ctx, fd, length);
-
-out:
- return op_ret;
-}
-
-int
-libgf_client_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_link_cbk_stub (frame, NULL, op_ret, op_errno,
- inode, buf, preparent,
- postparent);
-
- LIBGF_REPLY_NOTIFY (local);
-
- return 0;
-}
-
-int
-libgf_client_link (libglusterfs_client_ctx_t *ctx, loc_t *old, loc_t *new)
-{
- call_stub_t *stub = NULL;
- libgf_client_local_t *local = NULL;
- int op_ret = -1;
- inode_t *inode = NULL;
- struct iatt *sbuf = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, link, local, old, new);
-
- op_ret = stub->args.link_cbk.op_ret;
- errno = stub->args.link_cbk.op_errno;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "old %s, new %s, status %d,"
- " errno %d", old->path, new->path, op_ret, errno);
- if (op_ret == -1)
- goto out;
-
- inode = stub->args.link_cbk.inode;
- sbuf = &stub->args.link_cbk.buf;
- inode_link (inode, new->parent, basename ((char *)new->path), sbuf);
- libgf_transform_iattr (ctx, inode, sbuf);
- inode_lookup (inode);
- libgf_update_iattr_cache (inode, LIBGF_UPDATE_STAT, sbuf);
-
-out:
- call_stub_destroy (stub);
- return op_ret;
-}
-
-int
-glusterfs_glh_link (glusterfs_handle_t handle, const char *oldpath,
- const char *newpath)
-{
- libglusterfs_client_ctx_t *ctx = handle;
- int op_ret = -1;
- loc_t old = {0,};
- loc_t new = {0,};
- char *oldname = NULL;
- char *newname = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, oldpath, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, newpath, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "old %s, new %s", oldpath,
- newpath);
-
- old.path = strdup (oldpath);
- if (!old.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&old, ctx, 1);
- if (op_ret == -1) {
- errno = ENOENT;
- goto out;
- }
-
- oldname = strdup (old.path);
- op_ret = libgf_client_loc_fill (&old, ctx, 0, old.parent->ino,
- basename (oldname));
- if (op_ret == -1) {
- errno = EINVAL;
- goto out;
- }
-
- if (IA_ISDIR (old.inode->ia_type)) {
- errno = EPERM;
- op_ret = -1;
- goto out;
- }
-
- new.path = strdup (newpath);
- if (!new.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&new, ctx, 1);
- if (op_ret == 0) {
- errno = EEXIST;
- op_ret = -1;
- goto out;
- }
-
- newname = strdup (new.path);
- new.inode = inode_ref (old.inode);
- libgf_client_loc_fill (&new, ctx, 0, new.parent->ino,
- basename (newname));
- op_ret = libgf_client_link (ctx, &old, &new);
-
-out:
- if (oldname)
- FREE (oldname);
- if (newname)
- FREE (newname);
- libgf_client_loc_wipe (&old);
- libgf_client_loc_wipe (&new);
-
- return op_ret;
-}
-
-int
-glusterfs_link (const char *oldpath, const char *newpath)
-{
- int op_ret = -1;
- char oldvpath[PATH_MAX];
- char newvpath[PATH_MAX];
- glusterfs_handle_t oldh = NULL;
- glusterfs_handle_t newh = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, oldpath, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, newpath, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "old %s, new %s", oldpath,
- newpath);
-
- oldh = libgf_resolved_path_handle (oldpath, oldvpath);
- if (!oldh) {
- errno = ENODEV;
- goto out;
- }
-
- newh = libgf_resolved_path_handle (newpath, newvpath);
- if (!newh) {
- errno = ENODEV;
- goto out;
- }
-
- /* Cannot hard link across glusterfs mounts. */
- if (newh != oldh) {
- errno = EXDEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_link (newh, oldvpath, newvpath);
-out:
- return op_ret;
-}
-
-int32_t
-libgf_client_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct statvfs *buf)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_statfs_cbk_stub (frame, NULL, op_ret, op_errno,
- buf);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int32_t
-libgf_client_statvfs (libglusterfs_client_ctx_t *ctx, loc_t *loc,
- struct statvfs *buf)
-{
- call_stub_t *stub = NULL;
- libgf_client_local_t *local = NULL;
- int32_t op_ret = -1;
-
- /* statfs fop receives struct statvfs as an argument */
-
- /* libgf_client_statfs_cbk will be the callback, not
- libgf_client_statvfs_cbk. see definition of LIBGF_CLIENT_FOP
- */
- LIBGF_CLIENT_FOP (ctx, stub, statfs, local, loc);
-
- op_ret = stub->args.statfs_cbk.op_ret;
- errno = stub->args.statfs_cbk.op_errno;
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, status %d, errno %d",
- loc->path, op_ret, errno);
- if (op_ret == -1)
- goto out;
-
- if (buf)
- memcpy (buf, &stub->args.statfs_cbk.buf, sizeof (*buf));
-out:
- call_stub_destroy (stub);
- return op_ret;
-}
-
-int
-glusterfs_glh_statfs (glusterfs_handle_t handle, const char *path,
- struct statfs *buf)
-{
- struct statvfs stvfs = {0, };
- int32_t op_ret = -1;
- loc_t loc = {0, };
- libglusterfs_client_ctx_t *ctx = handle;
- char *name = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- loc.path = strdup (path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for (%s)", loc.path);
- goto out;
- }
-
- name = strdup (loc.path);
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino,
- basename (name));
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, "
- "returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- op_ret = libgf_client_statvfs (ctx, &loc, &stvfs);
- if (op_ret == 0) {
-#ifdef GF_SOLARIS_HOST_OS
- buf->f_fstyp = 0;
- buf->f_bsize = stvfs.f_bsize;
- buf->f_blocks = stvfs.f_blocks;
- buf->f_bfree = stvfs.f_bfree;
- buf->f_files = stvfs.f_bavail;
- buf->f_ffree = stvfs.f_ffree;
-#else
- buf->f_type = 0;
- buf->f_bsize = stvfs.f_bsize;
- buf->f_blocks = stvfs.f_blocks;
- buf->f_bfree = stvfs.f_bfree;
- buf->f_bavail = stvfs.f_bavail;
- buf->f_files = stvfs.f_bavail;
- buf->f_ffree = stvfs.f_ffree;
- /* FIXME: buf->f_fsid has either "val" or "__val" as member
- based on conditional macro expansion. see definition of
- fsid_t - Raghu
- It seems have different structure member names on
- different archs, so I am stepping down to doing a struct
- to struct copy. :Shehjar
- */
- memcpy (&buf->f_fsid, &stvfs.f_fsid, sizeof (stvfs.f_fsid));
- buf->f_namelen = stvfs.f_namemax;
-#endif
- }
-
-out:
- if (name)
- FREE (name);
- libgf_client_loc_wipe (&loc);
- return op_ret;
-}
-
-int
-glusterfs_statfs (const char *path, struct statfs *buf)
-{
- int op_ret = -1;
- char vpath[PATH_MAX];
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, buf, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_statfs (h, vpath, buf);
-out:
- return op_ret;
-}
-
-int
-glusterfs_glh_statvfs (glusterfs_handle_t handle, const char *path,
- struct statvfs *buf)
-{
- int32_t op_ret = -1;
- loc_t loc = {0, };
- libglusterfs_client_ctx_t *ctx = handle;
- char *name = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- loc.path = strdup (path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for (%s)", loc.path);
- goto out;
- }
-
- name = strdup (loc.path);
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino,
- basename (name));
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, returning"
- " EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- op_ret = libgf_client_statvfs (ctx, &loc, buf);
- if (op_ret != -1)
- /* Should've been a call to libgf_transform_iattr but
- * that only handles struct stat
- */
- buf->f_fsid = (unsigned long)ctx->fake_fsid;
-
-out:
- if (name)
- FREE (name);
- libgf_client_loc_wipe (&loc);
- return op_ret;
-}
-
-int
-glusterfs_statvfs (const char *path, struct statvfs *buf)
-{
- char vpath[PATH_MAX];
- glusterfs_handle_t h = NULL;
- int op_ret = -1;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, buf, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_statvfs (h, vpath, buf);
-out:
- return op_ret;
-}
-
-int32_t
-libgf_client_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_rename_cbk_stub (frame, NULL, op_ret, op_errno,
- buf, preoldparent,
- postoldparent, prenewparent,
- postnewparent);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int32_t
-libgf_client_rename (libglusterfs_client_ctx_t *ctx, loc_t *oldloc,
- loc_t *newloc)
-{
- int op_ret = -1;
- libgf_client_local_t *local = NULL;
- call_stub_t *stub = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, rename, local, oldloc, newloc);
-
- op_ret = stub->args.rename_cbk.op_ret;
- errno = stub->args.rename_cbk.op_errno;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "old %s, new %s, status %d, errno"
- " %d", oldloc->path, newloc->path, op_ret, errno);
- if (op_ret == -1)
- goto out;
-
- if (!libgf_get_inode_ctx (newloc->inode))
- libgf_alloc_inode_ctx (ctx, newloc->inode);
-
- libgf_transform_iattr (ctx, newloc->inode, &stub->args.rename_cbk.buf);
- libgf_update_iattr_cache (newloc->inode, LIBGF_UPDATE_STAT,
- &stub->args.rename_cbk.buf);
-
- inode_unlink (oldloc->inode, oldloc->parent, oldloc->name);
-out:
- call_stub_destroy (stub);
- return op_ret;
-}
-
-int
-glusterfs_glh_rename (glusterfs_handle_t handle, const char *oldpath,
- const char *newpath)
-{
- int32_t op_ret = -1;
- loc_t oldloc = {0, }, newloc = {0, };
- libglusterfs_client_ctx_t *ctx = handle;
- char *newname = NULL;
- char *oldname = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, oldpath, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, newpath, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "old %s, new %s", oldpath,
- newpath);
-
- oldloc.path = strdup (oldpath);
- if (!oldloc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&oldloc, ctx, 1);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for (%s)", oldloc.path);
- goto out;
- }
-
- newloc.path = strdup (newpath);
- if (!newloc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "strdup failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&newloc, ctx, 1);
-
- oldname = strdup (oldloc.path);
- op_ret = libgf_client_loc_fill (&oldloc, ctx, 0, oldloc.parent->ino,
- basename (oldname));
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1,"
- " returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- newname = strdup (newloc.path);
- op_ret = libgf_client_loc_fill (&newloc, ctx, 0, newloc.parent->ino,
- basename (newname));
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1,"
- " returning EINVAL");
- errno = EINVAL;
- goto out;
- }
- op_ret = libgf_client_rename (ctx, &oldloc, &newloc);
-
-out:
- if (oldname)
- FREE (oldname);
- if (newname)
- FREE (newname);
- libgf_client_loc_wipe (&newloc);
- libgf_client_loc_wipe (&oldloc);
-
- return op_ret;
-}
-
-
-int
-glusterfs_rename (const char *oldpath, const char *newpath)
-{
- int op_ret = -1;
- char oldvpath[PATH_MAX];
- char newvpath[PATH_MAX];
- glusterfs_handle_t oldh = NULL;
- glusterfs_handle_t newh = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, oldpath, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, newpath, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Old %s, new %s", oldpath,
- newpath);
-
- oldh = libgf_resolved_path_handle (oldpath, oldvpath);
- if (!oldh) {
- errno = ENODEV;
- goto out;
- }
-
- newh = libgf_resolved_path_handle (newpath, newvpath);
- if (!newh) {
- errno = ENODEV;
- goto out;
- }
-
- if (oldh != newh) {
- errno = EXDEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_rename (oldh, oldvpath, newvpath);
-out:
- return op_ret;
-}
-
-
-int
-glusterfs_glh_utimes (glusterfs_handle_t handle, const char *path,
- const struct timeval times[2])
-{
- int32_t op_ret = -1;
- loc_t loc = {0, };
- libglusterfs_client_ctx_t *ctx = handle;
- char *name = NULL;
- struct iatt stbuf = {0,};
- int32_t valid = 0;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
- stbuf.ia_atime = times[0].tv_sec;
- stbuf.ia_atime_nsec = times[0].tv_usec * 1000;
- stbuf.ia_mtime = times[1].tv_sec;
- stbuf.ia_mtime_nsec = times[1].tv_usec * 1000;
-
- valid |= (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME);
-
- loc.path = strdup (path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for (%s)", loc.path);
- goto out;
- }
-
- name = strdup (loc.path);
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino,
- basename (name));
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1"
- " returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- op_ret = libgf_client_setattr (ctx, &loc, &stbuf, valid);
-out:
- if (name)
- FREE (name);
- libgf_client_loc_wipe (&loc);
- return op_ret;
-}
-
-int
-glusterfs_utimes (const char *path, const struct timeval times[2])
-{
- char vpath[PATH_MAX];
- int op_ret = -1;
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_utimes (h, vpath, times);
-out:
- return op_ret;
-}
-
-int
-glusterfs_glh_utime (glusterfs_handle_t handle, const char *path,
- const struct utimbuf *buf)
-{
- int32_t op_ret = -1;
- loc_t loc = {0, };
- libglusterfs_client_ctx_t *ctx = handle;
- char *name = NULL;
- struct iatt stbuf = {0,};
- int32_t valid = 0;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
- if (buf) {
- stbuf.ia_atime = buf->actime;
- stbuf.ia_atime_nsec = 0;
-
- stbuf.ia_mtime = buf->modtime;
- stbuf.ia_mtime_nsec = 0;
- }
-
- valid |= (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME);
-
- loc.path = strdup (path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for (%s)", loc.path);
- goto out;
- }
-
- name = strdup (loc.path);
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino,
- basename (name));
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1,"
- " returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- op_ret = libgf_client_setattr (ctx, &loc, &stbuf, valid);
-out:
- if (name)
- FREE (name);
- libgf_client_loc_wipe (&loc);
- return op_ret;
-}
-
-int
-glusterfs_utime (const char *path, const struct utimbuf *buf)
-{
- char vpath[PATH_MAX];
- int op_ret = -1;
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, buf, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_utime (h, vpath, buf);
-out:
- return op_ret;
-}
-
-static int32_t
-libgf_client_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_mknod_cbk_stub (frame, NULL, op_ret, op_errno,
- inode, buf, preparent,
- postparent);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-static int32_t
-libgf_client_mknod (libglusterfs_client_ctx_t *ctx, loc_t *loc, mode_t mode,
- dev_t rdev)
-{
- int32_t op_ret = -1;
- call_stub_t *stub = NULL;
- libgf_client_local_t *local = NULL;
- inode_t *inode = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, mknod, local, loc, mode, rdev);
-
- op_ret = stub->args.mknod_cbk.op_ret;
- errno = stub->args.mknod_cbk.op_errno;
- if (op_ret == -1)
- goto out;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, status %d, errno %d",
- loc->path, op_ret, errno);
- inode = stub->args.mknod_cbk.inode;
- inode_link (inode, loc->parent, loc->name, &stub->args.mknod_cbk.buf);
- libgf_transform_iattr (ctx, inode, &stub->args.mknod_cbk.buf);
- inode_lookup (inode);
-
- if (!libgf_alloc_inode_ctx (ctx, inode))
- libgf_alloc_inode_ctx (ctx, inode);
-
- libgf_update_iattr_cache (inode, LIBGF_UPDATE_STAT,
- &stub->args.mknod_cbk.buf);
-
-out:
- call_stub_destroy (stub);
- return op_ret;
-}
-
-int
-glusterfs_glh_mknod(glusterfs_handle_t handle, const char *path, mode_t mode,
- dev_t dev)
-{
- libglusterfs_client_ctx_t *ctx = handle;
- loc_t loc = {0, };
- char *name = NULL;
- int32_t op_ret = -1;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- loc.path = strdup (path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == 0) {
- op_ret = -1;
- errno = EEXIST;
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 0);
- if (op_ret == -1) {
- errno = ENOENT;
- goto out;
- }
-
- name = strdup (loc.path);
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino,
- basename (name));
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, "
- " returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- loc.inode = inode_new (ctx->itable);
- op_ret = libgf_client_mknod (ctx, &loc, mode, dev);
-
-out:
- libgf_client_loc_wipe (&loc);
- if (name)
- FREE (name);
-
- return op_ret;
-}
-
-int
-glusterfs_mknod(const char *pathname, mode_t mode, dev_t dev)
-{
- char vpath[PATH_MAX];
- int op_ret = -1;
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, pathname, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", pathname);
-
- h = libgf_resolved_path_handle (pathname, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
-out:
- return op_ret;
-}
-
-int
-glusterfs_glh_mkfifo (glusterfs_handle_t handle, const char *path, mode_t mode)
-{
-
- libglusterfs_client_ctx_t *ctx = handle;
- loc_t loc = {0, };
- char *name = NULL;
- int32_t op_ret = -1;
- dev_t dev = 0;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
- loc.path = libgf_resolve_path_light ((char *)path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Failed to resolve name");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == 0) {
- op_ret = -1;
- errno = EEXIST;
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 0);
- if (op_ret == -1) {
- errno = ENOENT;
- goto out;
- }
-
- name = strdup (loc.path);
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino,
- basename (name));
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, "
- "returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- loc.inode = inode_new (ctx->itable);
- op_ret = libgf_client_mknod (ctx, &loc, mode | S_IFIFO, dev);
-
-out:
- libgf_client_loc_wipe (&loc);
- if (name)
- free (name);
-
- return op_ret;
-}
-
-int
-glusterfs_mkfifo (const char *path, mode_t mode)
-{
- int op_ret = -1;
- char vpath[PATH_MAX];
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_mkfifo (h, vpath, mode);
-out:
- return op_ret;
-}
-
-int32_t
-libgf_client_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_unlink_cbk_stub (frame, NULL, op_ret, op_errno,
- preparent, postparent);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int
-libgf_client_unlink (libglusterfs_client_ctx_t *ctx, loc_t *loc)
-{
- int op_ret = -1;
- libgf_client_local_t *local = NULL;
- call_stub_t *stub = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, unlink, local, loc);
-
- op_ret = stub->args.unlink_cbk.op_ret;
- errno = stub->args.unlink_cbk.op_errno;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", loc->path);
- if (op_ret == -1)
- goto out;
-
- inode_unlink (loc->inode, loc->parent, loc->name);
-
-out:
- call_stub_destroy (stub);
- return op_ret;
-}
-
-int
-glusterfs_glh_unlink (glusterfs_handle_t handle, const char *path)
-{
- int32_t op_ret = -1;
- loc_t loc = {0, };
- libglusterfs_client_ctx_t *ctx = handle;
- char *name = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- loc.path = strdup (path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for (%s)", loc.path);
- goto out;
- }
-
- name = strdup (loc.path);
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino,
- basename (name));
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, "
- " returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- op_ret = libgf_client_unlink (ctx, &loc);
-
-out:
- if (name)
- FREE (name);
- libgf_client_loc_wipe (&loc);
- return op_ret;
-}
-
-int
-glusterfs_unlink (const char *path)
-{
- char vpath[PATH_MAX];
- int op_ret = -1;
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_unlink (h, vpath);
-
-out:
- return op_ret;
-}
-
-static int32_t
-libgf_client_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_symlink_cbk_stub (frame, NULL, op_ret,
- op_errno, inode, buf,
- preparent, postparent);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int32_t
-libgf_client_symlink (libglusterfs_client_ctx_t *ctx, const char *linkpath,
- loc_t *loc)
-{
- int op_ret = -1;
- libgf_client_local_t *local = NULL;
- call_stub_t *stub = NULL;
- inode_t *inode = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, symlink, local, linkpath, loc);
-
- op_ret = stub->args.symlink_cbk.op_ret;
- errno = stub->args.symlink_cbk.op_errno;
- if (op_ret == -1)
- goto out;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "target: %s, link: %s, status %d"
- " errno %d", linkpath, loc->path, op_ret, errno);
- inode = stub->args.symlink_cbk.inode;
- inode_link (inode, loc->parent, loc->name,
- &stub->args.symlink_cbk.buf);
- libgf_transform_iattr (ctx, inode, &stub->args.symlink_cbk.buf);
- inode_lookup (inode);
- if (!libgf_get_inode_ctx (inode))
- libgf_alloc_inode_ctx (ctx, inode);
-
- libgf_update_iattr_cache (inode, LIBGF_UPDATE_STAT,
- &stub->args.symlink_cbk.buf);
-out:
- call_stub_destroy (stub);
- return op_ret;
-}
-
-int
-glusterfs_glh_symlink (glusterfs_handle_t handle, const char *oldpath,
- const char *newpath)
-{
- int32_t op_ret = -1;
- libglusterfs_client_ctx_t *ctx = handle;
- loc_t oldloc = {0, };
- loc_t newloc = {0, };
- char *oldname = NULL;
- char *newname = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, newpath, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "target: %s, link: %s", oldpath,
- newpath);
- /* Old path does not need to be interpreted or looked up */
- oldloc.path = strdup (oldpath);
-
- newloc.path = strdup (newpath);
- if (!newloc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&newloc, ctx, 1);
- if (op_ret == 0) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "new path (%s) already exists, "
- " returning EEXIST", newloc.path);
- op_ret = -1;
- errno = EEXIST;
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&newloc, ctx, 0);
- if (op_ret == -1) {
- errno = ENOENT;
- goto out;
- }
-
- newloc.inode = inode_new (ctx->itable);
- newname = strdup (newloc.path);
- op_ret = libgf_client_loc_fill (&newloc, ctx, 0, newloc.parent->ino,
- basename (newname));
-
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, "
- "returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- op_ret = libgf_client_symlink (ctx, oldpath, &newloc);
-
-out:
- if (newname)
- FREE (newname);
-
- if (oldname)
- FREE (oldname);
- libgf_client_loc_wipe (&oldloc);
- libgf_client_loc_wipe (&newloc);
- return op_ret;
-}
-
-int
-glusterfs_symlink (const char *oldpath, const char *newpath)
-{
- char vpath[PATH_MAX];
- int op_ret = -1;
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, oldpath, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, newpath, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "target: %s, link: %s", oldpath,
- newpath);
-
- h = libgf_resolved_path_handle (newpath, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_symlink (h, oldpath, vpath);
-out:
- return op_ret;
-}
-
-
-int32_t
-libgf_client_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- const char *path, struct iatt *sbuf)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_readlink_cbk_stub (frame, NULL, op_ret,
- op_errno, path, sbuf);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int32_t
-libgf_client_readlink (libglusterfs_client_ctx_t *ctx, loc_t *loc, char *buf,
- size_t bufsize)
-{
- int op_ret = -1;
- libgf_client_local_t *local = NULL;
- call_stub_t *stub = NULL;
- size_t cpy_size = 0;
-
- LIBGF_CLIENT_FOP (ctx, stub, readlink, local, loc, bufsize);
-
- op_ret = stub->args.readlink_cbk.op_ret;
- errno = stub->args.readlink_cbk.op_errno;
-
- if (op_ret != -1) {
- cpy_size = ((op_ret <= bufsize) ? op_ret : bufsize);
- memcpy (buf, stub->args.readlink_cbk.buf, cpy_size);
- op_ret = cpy_size;
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "link: %s, target: %s,"
- " status %d, errno %d", loc->path, buf, op_ret, errno);
- } else
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "link: %s, status %d, "
- "errno %d", loc->path, op_ret, errno);
-
- call_stub_destroy (stub);
- return op_ret;
-}
-
-ssize_t
-glusterfs_glh_readlink (glusterfs_handle_t handle, const char *path, char *buf,
- size_t bufsize)
-{
- int32_t op_ret = -1;
- loc_t loc = {0, };
- libglusterfs_client_ctx_t *ctx = handle;
- char *name = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
- if (bufsize < 0) {
- errno = EINVAL;
- goto out;
- }
-
- if (bufsize == 0) {
- op_ret = 0;
- goto out;
- }
-
- loc.path = strdup (path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for (%s)", loc.path);
- goto out;
- }
-
- name = strdup (loc.path);
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino,
- basename (name));
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, "
- "returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- op_ret = libgf_client_readlink (ctx, &loc, buf, bufsize);
-
-out:
- if (name)
- FREE (name);
-
- libgf_client_loc_wipe (&loc);
- return op_ret;
-}
-
-ssize_t
-glusterfs_readlink (const char *path, char *buf, size_t bufsize)
-{
- char vpath[PATH_MAX];
- int op_ret = -1;
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, buf, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_readlink (h, vpath, buf, bufsize);
-out:
- return op_ret;
-}
-
-char *
-glusterfs_glh_realpath (glusterfs_handle_t handle, const char *path,
- char *resolved_path)
-{
- char *buf = NULL;
- char *rpath = NULL;
- char *start = NULL, *end = NULL;
- char *dest = NULL;
- libglusterfs_client_ctx_t *ctx = handle;
- long int path_max = 0;
- char *ptr = NULL;
- struct stat stbuf = {0, };
- long int new_size = 0;
- char *new_rpath = NULL;
- int dest_offset = 0;
- char *rpath_limit = 0;
- int ret = 0, num_links = 0;
- char *vpath = NULL, *tmppath = NULL;
- char absolute_path[PATH_MAX];
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
-#ifdef PATH_MAX
- path_max = PATH_MAX;
-#else
- path_max = pathconf (path, _PC_PATH_MAX);
- if (path_max <= 0) {
- path_max = 1024;
- }
-#endif
-
- if (resolved_path == NULL) {
- rpath = CALLOC (1, path_max);
- if (rpath == NULL) {
- errno = ENOMEM;
- goto out;
- }
- } else {
- rpath = resolved_path;
- }
-
- rpath_limit = rpath + path_max;
-
- if (path[0] == '/') {
- rpath[0] = '/';
- dest = rpath + 1;
- } else {
- /*
- FIXME: can $CWD be a valid path on glusterfs server? hence is
- it better to handle this case or just return EINVAL for
- relative paths?
- */
- ptr = getcwd (rpath, path_max);
- if (ptr == NULL) {
- goto err;
- }
- dest = rpath + strlen (rpath);
- }
-
- for (start = end = (char *)path; *end; start = end) {
- if (dest[-1] != '/') {
- *dest++ = '/';
- }
-
- while (*start == '/') {
- start++;
- }
-
- for (end = start; *end && *end != '/'; end++);
-
- if ((end - start) == 0) {
- break;
- }
-
- if ((end - start == 1) && (start[0] == '.')) {
- /* do nothing */
- } else if (((end - start) == 2) && (start[0] == '.')
- && (start[1] == '.')) {
- if (dest > rpath + 1) {
- while (--dest[-1] != '/');
- }
- } else {
- if ((dest + (end - start + 1)) >= rpath_limit) {
- if (resolved_path == NULL) {
- errno = ENAMETOOLONG;
- if (dest > rpath + 1)
- dest--;
- *dest = '\0';
- goto err;
- }
-
- dest_offset = dest - rpath;
- new_size = rpath_limit - rpath;
- if ((end - start + 1) > path_max) {
- new_size = (end - start + 1);
- } else {
- new_size = path_max;
- }
-
- new_rpath = realloc (rpath, new_size);
- if (new_rpath == NULL) {
- goto err;
- }
-
-
- dest = new_rpath + dest_offset;
- rpath = new_rpath;
- rpath_limit = rpath + new_size;
- }
-
- memcpy (dest, start, end - start);
- dest += end - start;
- *dest = '\0';
-
- ret = glusterfs_glh_lstat (handle, rpath, &stbuf);
- if (ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "glusterfs_glh_stat returned -1 for"
- " path (%s):(%s)", rpath,
- strerror (errno));
- goto err;
- }
-
- if (S_ISLNK (stbuf.st_mode)) {
- buf = calloc (1, path_max);
- if (buf == NULL) {
- errno = ENOMEM;
- goto err;
- }
-
- if (++num_links > MAXSYMLINKS)
- {
- errno = ELOOP;
- FREE (buf);
- goto err;
- }
-
- ret = glusterfs_glh_readlink (handle, rpath,
- buf,
- path_max - 1);
- if (ret < 0) {
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "glusterfs_readlink returned %d"
- " for path (%s):(%s)",
- ret, rpath, strerror (errno));
- FREE (buf);
- goto err;
- }
- buf[ret] = '\0';
-
- if (buf[0] != '/') {
- tmppath = strdup (rpath);
- tmppath = dirname (tmppath);
- sprintf (absolute_path, "%s/%s",
- tmppath, buf);
- FREE (buf);
- buf = libgf_resolve_path_light ((char *)absolute_path);
- FREE (tmppath);
- }
-
- rpath = glusterfs_glh_realpath (handle, buf,
- rpath);
- FREE (buf);
- if (rpath == NULL) {
- goto out;
- }
- dest = rpath + strlen (rpath);
-
- } else if (!S_ISDIR (stbuf.st_mode) && *end != '\0') {
- errno = ENOTDIR;
- goto err;
- }
- }
- }
- if (dest > rpath + 1 && dest[-1] == '/')
- --dest;
- *dest = '\0';
-
-out:
- if (vpath)
- FREE (vpath);
- return rpath;
-
-err:
- if (vpath)
- FREE (vpath);
- if (resolved_path == NULL) {
- FREE (rpath);
- }
-
- return NULL;
-}
-
-char *
-glusterfs_realpath (const char *path, char *resolved_path)
-{
- char *res = NULL;
- char vpath[PATH_MAX];
- glusterfs_handle_t h = NULL;
- char *realp = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- realp = CALLOC (PATH_MAX, sizeof (char));
- if (!realp)
- goto out;
-
- libgf_vmp_search_vmp (h, realp, PATH_MAX);
- res = glusterfs_glh_realpath (h, vpath, resolved_path);
- if (!res)
- goto out;
-
- /* This copy is needed to ensure that when we return the real resolved
- * path, we return a path that accounts for the app's view of the
- * path, i.e. it starts with the VMP, in case this is an absolute path.
- */
- if (libgf_path_absolute (path)) {
- strcat (realp, resolved_path);
- strcpy (resolved_path, realp);
- }
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, resolved %s", path,
- resolved_path);
-out:
- if (realp)
- FREE (realp);
-
- return res;
-}
-
-int
-glusterfs_glh_remove (glusterfs_handle_t handle, const char *path)
-{
- loc_t loc = {0, };
- int op_ret = -1;
- libglusterfs_client_ctx_t *ctx = handle;
- char *name = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, handle, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- loc.path = strdup (path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == -1)
- goto out;
-
- name = strdup (loc.path);
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino,
- basename (name));
- if (op_ret == -1)
- goto out;
-
- if (IA_ISDIR (loc.inode->ia_type))
- op_ret = libgf_client_rmdir (ctx, &loc);
- else
- op_ret = libgf_client_unlink (ctx, &loc);
-
-out:
- if (name)
- FREE (name);
- return op_ret;
-
-}
-
-int
-glusterfs_remove(const char *pathname)
-{
- int op_ret = -1;
- char vpath[PATH_MAX];
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, pathname, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", pathname);
-
- h = libgf_resolved_path_handle (pathname, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_remove (h, vpath);
-out:
- return op_ret;
-}
-
-void
-glusterfs_rewinddir (glusterfs_dir_t dirfd)
-{
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
-
- fd_ctx = libgf_get_fd_ctx ((fd_t *)dirfd);
- if (!fd_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No fd context present");
- errno = EBADF;
- goto out;
- }
-
- pthread_mutex_lock (&fd_ctx->lock);
- {
- fd_ctx->offset = 0;
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Offset: %"PRIu64,
- fd_ctx->offset);
- }
- pthread_mutex_unlock (&fd_ctx->lock);
-
-out:
- return;
-}
-
-void
-glusterfs_seekdir (glusterfs_dir_t dirfd, off_t offset)
-{
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
-
- fd_ctx = libgf_get_fd_ctx ((fd_t *)dirfd);
- if (!fd_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No fd context present");
- goto out;
- }
-
- pthread_mutex_lock (&fd_ctx->lock);
- {
- fd_ctx->offset = offset;
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Offset: %"PRIu64,
- fd_ctx->offset);
- }
- pthread_mutex_unlock (&fd_ctx->lock);
-
-out:
- return;
-}
-
-off_t
-glusterfs_telldir (glusterfs_dir_t dirfd)
-{
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- off_t off = -1;
-
- fd_ctx = libgf_get_fd_ctx ((fd_t *)dirfd);
- if (!fd_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No fd context present");
- errno = EBADF;
- goto out;
- }
-
- pthread_mutex_lock (&fd_ctx->lock);
- {
- off = fd_ctx->offset;
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Offset: %"PRIu64,
- fd_ctx->offset);
- }
- pthread_mutex_unlock (&fd_ctx->lock);
-
-out:
- return off;
-}
-
-struct libgf_client_sendfile_data {
- int reads_sent;
- int reads_completed;
- int out_fd;
- int32_t op_ret;
- int32_t op_errno;
- pthread_mutex_t lock;
- pthread_cond_t cond;
-};
-
-int
-libgf_client_sendfile_read_cbk (int op_ret, int op_errno,
- glusterfs_iobuf_t *buf, void *cbk_data)
-{
- struct libgf_client_sendfile_data *sendfile_data = cbk_data;
- int bytes = 0;
-
- if (op_ret > 0) {
- bytes = writev (sendfile_data->out_fd, buf->vector, buf->count);
- if (bytes != op_ret) {
- op_ret = -1;
- op_errno = errno;
- }
-
- glusterfs_free (buf);
- }
-
- pthread_mutex_lock (&sendfile_data->lock);
- {
- if (sendfile_data->op_ret != -1) {
- if (op_ret == -1) {
- sendfile_data->op_ret = -1;
- sendfile_data->op_errno = op_errno;
- } else {
- sendfile_data->op_ret += op_ret;
- }
- }
-
- sendfile_data->reads_completed++;
-
- if (sendfile_data->reads_completed
- == sendfile_data->reads_sent) {
- pthread_cond_broadcast (&sendfile_data->cond);
- }
- }
- pthread_mutex_unlock (&sendfile_data->lock);
-
- return 0;
-}
-
-
-ssize_t
-glusterfs_sendfile (int out_fd, glusterfs_file_t in_fd, off_t *offset,
- size_t count)
-{
- ssize_t ret = -1;
- struct libgf_client_sendfile_data cbk_data = {0, };
- off_t off = -1;
- size_t size = 0;
- int flags = 0;
- int non_block = 0;
-
-
- pthread_mutex_init (&cbk_data.lock, NULL);
- pthread_cond_init (&cbk_data.cond, NULL);
- cbk_data.out_fd = out_fd;
-
- if (offset) {
- off = *offset;
- }
-
- flags = fcntl (out_fd, F_GETFL);
-
- if (flags != -1) {
- non_block = flags & O_NONBLOCK;
-
- if (non_block) {
- ret = fcntl (out_fd, F_SETFL, flags & ~O_NONBLOCK);
- }
- }
-
- while (count != 0) {
- /*
- * FIXME: what's the optimal size for reads and writes?
- */
- size = (count > LIBGF_SENDFILE_BLOCK_SIZE) ?
- LIBGF_SENDFILE_BLOCK_SIZE : count;
-
- /*
- * we don't wait for reply to previous read, we just send all
- * reads in a single go.
- */
- ret = glusterfs_read_async (in_fd, size, off,
- libgf_client_sendfile_read_cbk,
- &cbk_data);
- if (ret == -1) {
- break;
- }
-
- pthread_mutex_lock (&cbk_data.lock);
- {
- cbk_data.reads_sent++;
- }
- pthread_mutex_unlock (&cbk_data.lock);
-
- if (offset) {
- off += size;
- }
-
- count -= size;
- }
-
- pthread_mutex_lock (&cbk_data.lock);
- {
- /*
- * if we've not received replies to all the reads we've sent,
- * wait for them
- */
- if (cbk_data.reads_sent > cbk_data.reads_completed) {
- pthread_cond_wait (&cbk_data.cond,
- &cbk_data.lock);
- }
- }
- pthread_mutex_unlock (&cbk_data.lock);
-
- if (offset != NULL) {
- *offset = off;
- }
-
- /* if we were able to stack_wind all the reads */
-
- if (ret == 0) {
- ret = cbk_data.op_ret;
- errno = cbk_data.op_errno;
- }
-
- if (non_block) {
- fcntl (out_fd, F_SETFL, flags);
- }
-
- return ret;
-}
-
-
-static int32_t
-libgf_client_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct flock *lock)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_lk_cbk_stub (frame, NULL, op_ret, op_errno,
- lock);
-
- LIBGF_REPLY_NOTIFY (local);
-
- return 0;
-}
-
-
-int
-libgf_client_lk (libglusterfs_client_ctx_t *ctx, fd_t *fd, int cmd,
- struct flock *lock)
-{
- call_stub_t *stub = NULL;
- int32_t op_ret;
- libgf_client_local_t *local = NULL;
-
- LIBGF_CLIENT_FOP(ctx, stub, lk, local, fd, cmd, lock);
-
- op_ret = stub->args.lk_cbk.op_ret;
- errno = stub->args.lk_cbk.op_errno;
- if (op_ret == 0) {
- *lock = stub->args.lk_cbk.lock;
- }
-
- call_stub_destroy (stub);
- return op_ret;
-}
-
-
-int
-glusterfs_fcntl (glusterfs_file_t fd, int cmd, ...)
-{
- int ret = -1;
- struct flock *lock = NULL;
- va_list ap;
- libglusterfs_client_ctx_t *ctx = NULL;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, fd, out);
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- errno = EBADF;
- goto out;
- }
-
- ctx = fd_ctx->ctx;
-
- switch (cmd) {
- case F_SETLK:
- case F_SETLKW:
- case F_GETLK:
-#if F_SETLK != F_SETLK64
- case F_SETLK64:
-#endif
-#if F_SETLKW != F_SETLKW64
- case F_SETLKW64:
-#endif
-#if F_GETLK != F_GETLK64
- case F_GETLK64:
-#endif
- va_start (ap, cmd);
- lock = va_arg (ap, struct flock *);
- va_end (ap);
-
- if (!lock) {
- errno = EINVAL;
- goto out;
- }
-
- ret = libgf_client_lk (ctx, fd, cmd, lock);
- break;
-
- default:
- errno = EINVAL;
- break;
- }
-
-out:
- return ret;
-}
-
-
-int
-libgf_client_chdir (const char *path)
-{
- int op_ret = 0;
- uint32_t resulting_cwd_len = 0;
-
- pthread_mutex_lock (&cwdlock);
- {
- if (!libgf_path_absolute (path)) {
- resulting_cwd_len = strlen (path) + strlen (cwd)
- + ((path[strlen (path) - 1] == '/')
- ? 0 : 1) + 1;
-
- if (resulting_cwd_len > PATH_MAX) {
- op_ret = -1;
- errno = ENAMETOOLONG;
- goto unlock;
- }
- strcat (cwd, path);
- } else {
- resulting_cwd_len = strlen (path)
- + ((path[strlen (path) - 1] == '/')
- ? 0 : 1) + 1;
-
- if (resulting_cwd_len > PATH_MAX) {
- op_ret = -1;
- errno = ENAMETOOLONG;
- goto unlock;
- }
-
- strcpy (cwd, path);
- }
-
- if (cwd[strlen (cwd) - 1] != '/') {
- strcat (cwd, "/");
- }
- }
-unlock:
- pthread_mutex_unlock (&cwdlock);
-
- return op_ret;
-}
-
-
-int
-glusterfs_fchdir (glusterfs_file_t fd)
-{
- int op_ret = -1;
- char vpath[PATH_MAX];
- char vmp[PATH_MAX];
- char *res = NULL;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- glusterfs_handle_t handle = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, fd, out);
-
- /* FIXME: there is a race-condition between glusterfs_fchdir and
- glusterfs_close. If two threads of application call glusterfs_fchdir
- and glusterfs_close on the same fd, there is a possibility of
- glusterfs_fchdir accessing freed memory of fd_ctx.
- */
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- errno = EBADF;
- goto out;
- }
-
- pthread_mutex_lock (&fd_ctx->lock);
- {
- handle = fd_ctx->ctx;
- strcpy (vpath, fd_ctx->vpath);
- }
- pthread_mutex_unlock (&fd_ctx->lock);
-
- if (vpath[0] == '\0') {
- errno = ENOTDIR;
- goto out;
- }
-
- res = libgf_vmp_search_vmp (handle, vmp, PATH_MAX);
- if (res == NULL) {
- errno = EBADF;
- goto out;
- }
-
- /* both vmp and vpath are terminated with '/'. Also path starts with a
- '/'. Hence the extra '/' amounts to NULL character at the end of the
- string.
- */
- if ((strlen (vmp) + strlen (vpath)) > PATH_MAX) {
- errno = ENAMETOOLONG;
- goto out;
- }
-
- pthread_mutex_lock (&cwdlock);
- {
- strcpy (cwd, vmp);
- res = vpath;
- if (res[0] == '/') {
- res++;
- }
-
- strcat (cwd, res);
- }
- pthread_mutex_unlock (&cwdlock);
-
- op_ret = 0;
-out:
- return op_ret;
-}
-
-
-int
-glusterfs_chdir (const char *path)
-{
- int32_t op_ret = -1;
- glusterfs_handle_t handle = NULL;
- loc_t loc = {0, };
- char vpath[PATH_MAX];
-
- handle = libgf_resolved_path_handle (path, vpath);
-
- if (handle != NULL) {
- loc.path = strdup (vpath);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Path compaction "
- "failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, handle, 0);
- }
-
- if ((handle == NULL) || (op_ret == 0)) {
- op_ret = libgf_client_chdir (path);
- }
-
-out:
- return op_ret;
-}
-
-
-char *
-glusterfs_getcwd (char *buf, size_t size)
-{
- char *res = NULL;
- size_t len = 0;
- loc_t loc = {0, };
- glusterfs_handle_t handle = NULL;
- char vpath[PATH_MAX];
- int32_t op_ret = 0;
-
- pthread_mutex_lock (&cwdlock);
- {
- if (!cwd_inited) {
- errno = ENODEV;
- goto unlock;
- }
-
- if (buf == NULL) {
- buf = CALLOC (1, len);
- if (buf == NULL) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR,
- "out of memory");
- goto unlock;
- }
- } else {
- if (size == 0) {
- errno = EINVAL;
- goto unlock;
- }
-
- if (len > size) {
- errno = ERANGE;
- goto unlock;
- }
- }
-
- strcpy (buf, cwd);
- res = buf;
- }
-unlock:
- pthread_mutex_unlock (&cwdlock);
-
- if (res != NULL) {
- handle = libgf_resolved_path_handle (res, vpath);
-
- if (handle != NULL) {
- loc.path = strdup (vpath);
- if (loc.path == NULL) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR,
- "strdup failed");
- } else {
- op_ret = libgf_client_path_lookup (&loc, handle,
- 0);
- if (op_ret == -1) {
- res = NULL;
- }
- }
- }
- }
-
- return res;
-}
-
-int32_t
-libgf_client_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_truncate_cbk_stub (frame, NULL, op_ret,
- op_errno, prebuf, postbuf);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int32_t
-libgf_client_truncate (libglusterfs_client_ctx_t *ctx,
- loc_t *loc, off_t length)
-{
- call_stub_t *stub = NULL;
- int32_t op_ret = 0;
- libgf_client_local_t *local = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, truncate, local, loc, length);
-
- op_ret = stub->args.truncate_cbk.op_ret;
- errno = stub->args.truncate_cbk.op_errno;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, status %d, errno %d",
- loc->path, op_ret, errno);
-
- if (op_ret == -1) {
- goto out;
- }
-
- libgf_transform_iattr (ctx, loc->inode,
- &stub->args.truncate_cbk.postbuf);
-
- libgf_update_iattr_cache (loc->inode, LIBGF_UPDATE_STAT,
- &stub->args.truncate_cbk.postbuf);
- call_stub_destroy (stub);
-
-out:
- return op_ret;
-}
-
-int
-glusterfs_glh_truncate (glusterfs_handle_t handle, const char *path,
- off_t length)
-{
- int32_t op_ret = -1;
- loc_t loc = {0, };
- libglusterfs_client_ctx_t *ctx = handle;
- char *name = NULL, *pathname = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- loc.path = strdup (path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for (%s)", loc.path);
- goto out;
- }
-
- pathname = strdup (loc.path);
- name = basename (pathname);
-
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino, name);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- op_ret = libgf_client_truncate (ctx, &loc, length);
-
-out:
- libgf_client_loc_wipe (&loc);
-
- return op_ret;
-}
-
-int
-glusterfs_truncate (const char *path, off_t length)
-{
- int op_ret = -1;
- char vpath[PATH_MAX];
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path:%s length:%"PRIu64, path,
- length);
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_truncate (h, vpath, length);
-out:
- return op_ret;
-}
-
-static struct xlator_fops libgf_client_fops = {
-};
-
-static struct xlator_cbks libgf_client_cbks = {
- .forget = libgf_client_forget,
- .release = libgf_client_release,
- .releasedir = libgf_client_releasedir,
-};
-
-static inline xlator_t *
-libglusterfs_graph (xlator_t *graph)
-{
- int ret = 0;
- xlator_t *top = NULL;
- xlator_list_t *xlchild, *xlparent;
-
- top = CALLOC (1, sizeof (*top));
- ERR_ABORT (top);
-
- xlchild = CALLOC (1, sizeof(*xlchild));
- ERR_ABORT (xlchild);
- xlchild->xlator = graph;
- top->children = xlchild;
- top->ctx = graph->ctx;
- top->next = graph;
- top->name = strdup (LIBGF_XL_NAME);
-
- xlparent = CALLOC (1, sizeof(*xlparent));
- xlparent->xlator = top;
- graph->parents = xlparent;
- ret = asprintf (&top->type, LIBGF_XL_NAME);
- if (-1 == ret) {
- fprintf (stderr, "failed to set the top xl's type");
- }
-
- top->init = libgf_client_init;
- top->fops = &libgf_client_fops;
- top->mops = &libgf_client_mops;
- top->cbks = &libgf_client_cbks;
- top->notify = libgf_client_notify;
- top->fini = libgf_client_fini;
- // fill_defaults (top);
-
- return top;
-}
diff --git a/libglusterfsclient/src/libglusterfsclient.h b/libglusterfsclient/src/libglusterfsclient.h
deleted file mode 100755
index 1691a2faad3..00000000000
--- a/libglusterfsclient/src/libglusterfsclient.h
+++ /dev/null
@@ -1,1363 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _LIBGLUSTERFSCLIENT_H
-#define _LIBGLUSTERFSCLIENT_H
-
-#ifndef __BEGIN_DECLS
-#ifdef __cplusplus
-#define __BEGIN_DECLS extern "C" {
-#else
-#define __BEGIN_DECLS
-#endif
-#endif
-
-#ifndef __END_DECLS
-#ifdef __cplusplus
-#define __END_DECLS }
-#else
-#define __END_DECLS
-#endif
-#endif
-
-
-__BEGIN_DECLS
-
-#include <stdio.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <dirent.h>
-#include <sys/statfs.h>
-#include <sys/statvfs.h>
-#include <utime.h>
-#include <sys/time.h>
-#include <stdint.h>
-
-typedef struct {
- struct iovec *vector;
- int count;
- void *iobref;
- void *dictref;
-} glusterfs_iobuf_t;
-
-
-typedef
-int (*glusterfs_readv_cbk_t) (int op_ret, int op_errno, glusterfs_iobuf_t *buf,
- void *cbk_data);
-
-typedef
-int (*glusterfs_write_cbk_t) (int op_ret, int op_errno, void *cbk_data);
-
-typedef
-int (*glusterfs_get_cbk_t) (int op_ret, int op_errno, glusterfs_iobuf_t *buf,
- struct stat *stbuf, void *cbk_data);
-
-
-/* Data Interface
- * The first section describes the data structures required for
- * using libglusterfsclient.
- */
-
-/* This structure needs to be filled up and
- * passed to te glusterfs_init function which uses
- * the params passed herein to initialize a glusterfs
- * client context and then connect to a glusterfs server.
- */
-typedef struct {
- char *logfile; /* Path to the file which will store
- the log.
- */
- char *loglevel; /* The log level required for
- reporting various events within
- libglusterfsclient.
- */
- struct {
- char *specfile; /* Users can either open a volume or
- specfile and assign the pointer to
- specfp, or just refer to the volume
- /spec file path in specfile.
- */
- FILE *specfp;
- };
- char *volume_name; /* The volume file could describe many
- volumes but the specific volume
- within that file is chosen by
- specifying the volume name here.
- */
- unsigned long lookup_timeout; /* libglusterclient provides the inode
- numbers to be cached by the library.
- The duration for which these are
- cached are defined by lookup_timeout
- . In Seconds.
- */
- unsigned long stat_timeout; /* The file attributes received from
- a stat syscall can also be cached
- for the duration specified in this
- member. In Seconds.
- */
-} glusterfs_init_params_t;
-
-
-
-/* This is the handle returned by glusterfs_init
- * once the initialization is complete.
- * Users should treat this as an opaque handle.
- */
-typedef void * glusterfs_handle_t;
-
-
-
-/* These identifiers are used as handles for files and dirs.
- * Users of libglusterfsclient should not in anyway try to interpret
- * the actual structures these will point to.
- */
-typedef void * glusterfs_file_t;
-typedef void * glusterfs_dir_t;
-
-
-/* Function Call Interface */
-/* libglusterfsclient initialization function.
- * @ctx : the structure described above filled with required values.
- * @fakefsid: User generated fsid to be used to identify this
- * volume.
- *
- * Returns NULL on failure and the non-NULL pointer on success.
- * On failure, the error description might be present in the logfile
- * depending on the log level.
- */
-glusterfs_handle_t
-glusterfs_init (glusterfs_init_params_t *ctx, uint32_t fakefsid);
-
-
-
-/* Used to destroy a glusterfs client context and the
- * connection to the glusterfs server.
- *
- * @handle : The glusterfs handle returned by glusterfs_init.
- */
-int
-glusterfs_fini (glusterfs_handle_t handle);
-
-
-
-/* libglusterfs client provides two interfaces.
- * 1. handle-based interface
- * Functions that comprise the handle-based interface accept the
- * glusterfs_handle_t as the first argument. It specifies the
- * glusterfs client context over which to perform the operation.
- *
- * 2. Virtual Mount Point based interface:
- * Functions that do not require a handle to be given in order to
- * identify which client context to operate on. This interface
- * internally determines the corresponding client context for the
- * given path. The down-side is that a virtual mount point (VMP) needs to be
- * registered with the library. A VMP is just a string that maps to a
- * glusterfs_handle_t. The advantage of a VMP based interface is that
- * a user program using multiple client contexts does not need to
- * maintain its own mapping between paths and the corresponding
- * handles.
- */
-
-
-
-/* glusterfs_mount is the function that allows users to register a VMP
- * along with the parameters, which will be used to initialize a
- * context. Applications calling glusterfs_mount do not need to
- * initialized a context using the glusterfs_init interface.
- *
- * @vmp : The virtual mount point.
- * @ipars : Initialization parameters populated as described
- * earlier.
- *
- * Returns 0 on success, and -1 on failure.
- */
-int
-glusterfs_mount (char *vmp, glusterfs_init_params_t *ipars);
-
-
-
-/* glusterfs_umount is the VMP equivalent of glusterfs_fini.
- *
- * @vmp : The VMP which was initialized using glusterfs_mount.
- *
- * Returns 0 on sucess, and -1 on failure.
- */
-int
-glusterfs_umount (char *vmp);
-
-
-/* glusterfs_umount_all unmounts all the mounts */
-int
-glusterfs_umount_all (void);
-
-
-/* For smaller files, application can use just
- * glusterfs_get/glusterfs_get_async
- * to read the whole content. Limit of the file-sizes to be read in
- * glusterfs_get/glusterfs_get_async is passed in the size argument
- */
-
-/* glusterfs_glh_get:
- * @handle : glusterfs handle
- * @path : path to be looked upon
- * @size : upper limit of file-sizes to be read in lookup
- * @stbuf : attribute buffer
- */
-
-int
-glusterfs_glh_get (glusterfs_handle_t handle, const char *path, void *buf,
- size_t size, struct stat *stbuf);
-
-int
-glusterfs_get (const char *path, void *buf, size_t size, struct stat *stbuf);
-
-int
-glusterfs_get_async (glusterfs_handle_t handle, const char *path, size_t size,
- glusterfs_get_cbk_t cbk, void *cbk_data);
-
-
-
-/* Opens a file. Corresponds to the open syscall.
- *
- * @handle : Handle returned from glusterfs_init
- * @path : Path to the file or directory on the glusterfs
- * export. Must be absolute to the export on the server.
- * @flags : flags to control open behaviour.
- * @... : The mode_t argument that defines the mode for a new
- * file, in case a new file is being created using the
- * O_CREAT flag in @flags.
- *
- * Returns a non-NULL handle on success. NULL on failure and sets
- * errno accordingly.
- */
-glusterfs_file_t
-glusterfs_glh_open (glusterfs_handle_t handle, const char *path, int flags,
- ...);
-
-
-/* Opens a file without having to specify a handle.
- *
- * @path : Path to the file to open in the glusterfs export.
- * The path to the file in glusterfs export must be
- * pre-fixed with the VMP string registered with
- * glusterfs_mount.
- * @flags : flags to control open behaviour.
- * @... : The mode_t argument that defines the mode for a new
- * file, in case a new file is being created using the
- * O_CREAT flag in @flags.
- *
- * Returns 0 on success, -1 on failure with errno set accordingly.
- */
-glusterfs_file_t
-glusterfs_open (const char *path, int flags, ...);
-
-
-
-/* Creates a file. Corresponds to the creat syscall.
- *
- * @handle : Handle returned from glusterfs_init
- * @path : Path to the file that needs to be created in the
- * glusterfs export.
- * @mode : File creation mode.
- *
- * Returns the file handle on success. NULL on error with errno set as
- * required.
- */
-glusterfs_file_t
-glusterfs_glh_creat (glusterfs_handle_t handle, const char *path, mode_t mode);
-
-
-
-/* VMP-based creat.
- * @path : Path to the file to be created. Must be
- * pre-prepended with the VMP string registered with
- * glusterfs_mount.
- * @mode : File creation mode.
- *
- * Returns file handle on success. NULL handle on error with errno set
- * accordingly.
- */
-glusterfs_file_t
-glusterfs_creat (const char *path, mode_t mode);
-
-
-
-/* Close the file identified by the handle.
- *
- * @fd : Closes the file.
- *
- * Returns 0 on success, -1 on error with errno set accordingly.
- */
-int
-glusterfs_close (glusterfs_file_t fd);
-
-
-
-/* Get struct stat for the file in path.
- *
- * @handle : The handle that identifies a glusterfs client
- * context.
- * @path : The file for which we need to get struct stat.
- * @stbuf : The buffer into which the file's stat is copied.
- *
- * Returns 0 on success and -1 on error with errno set accordingly.
- */
-int
-glusterfs_glh_stat (glusterfs_handle_t handle, const char *path,
- struct stat *stbuf);
-
-
-/* Get struct stat for file in path.
- *
- * @path : The file for which struct stat is required.
- * @sbuf : The buffer into which the stat structure is copied.
- *
- * Returns 0 on success and -1 on error with errno set accordingly.
- */
-int
-glusterfs_stat (const char *path, struct stat *buf);
-
-
-
-/* Gets stat struct for the file.
- *
- * @handle : The handle identifying a glusterfs client context.
- * @path : Path to the file for which stat structure is
- * required. If path is a symlink, the symlink is
- * interpreted and the stat structure returned for the
- * target of the link.
- * @buf : The buffer into which the stat structure is copied.
- *
- * Returns 0 on success and -1 on error with errno set accordingly.
- */
-int
-glusterfs_glh_lstat (glusterfs_handle_t handle, const char *path,
- struct stat *buf);
-
-
-
-/* Gets stat struct for a file.
- *
- * @path : The file to get the struct stat for.
- * @buf : The receiving struct stat buffer.
- *
- * Returns 0 on success and -1 on error with errno set accordingly.
- */
-int
-glusterfs_lstat (const char *path, struct stat *buf);
-
-
-
-/* Get stat structure for a file.
- *
- * @fd : The file handle identifying a file on the glusterfs
- * server.
- * @stbuf : The buffer into which the stat data is copied.
- *
- * Returns 0 on success and -1 on error with errno set accordingly.
- */
-int
-glusterfs_fstat (glusterfs_file_t fd, struct stat *stbuf);
-
-int
-glusterfs_glh_setxattr (glusterfs_handle_t handle, const char *path,
- const char *name, const void *value,
- size_t size, int flags);
-
-int
-glusterfs_glh_lsetxattr (glusterfs_handle_t handle, const char *path,
- const char *name, const void *value, size_t size,
- int flags);
-
-int
-glusterfs_setxattr (const char *path, const char *name, const void *value,
- size_t size, int flags);
-
-int
-glusterfs_lsetxattr (const char *path, const char *name, const void *value,
- size_t size, int flags);
-
-int
-glusterfs_fsetxattr (glusterfs_file_t fd, const char *name, const void *value,
- size_t size, int flags);
-
-ssize_t
-glusterfs_glh_getxattr (glusterfs_handle_t handle, const char *path,
- const char *name, void *value, size_t size);
-
-ssize_t
-glusterfs_glh_lgetxattr (glusterfs_handle_t handle, const char *path,
- const char *name, void *value, size_t size);
-
-ssize_t
-glusterfs_getxattr (const char *path, const char *name, void *value,
- size_t size);
-
-ssize_t
-glusterfs_lgetxattr (const char *path, const char *name, void *value,
- size_t size);
-
-ssize_t
-glusterfs_fgetxattr (glusterfs_file_t fd, const char *name, void *value,
- size_t size);
-
-ssize_t
-glusterfs_listxattr (glusterfs_handle_t handle, const char *path, char *list,
- size_t size);
-
-ssize_t
-glusterfs_llistxattr (glusterfs_handle_t handle, const char *path, char *list,
- size_t size);
-
-ssize_t
-glusterfs_flistxattr (glusterfs_file_t fd, char *list, size_t size);
-
-int
-glusterfs_removexattr (glusterfs_handle_t handle, const char *path,
- const char *name);
-
-int
-glusterfs_lremovexattr (glusterfs_handle_t handle, const char *path,
- const char *name);
-
-int
-glusterfs_fremovexattr (glusterfs_file_t fd, const char *name);
-
-
-
-/* Read data from a file.
- * @fd : Handle returned by glusterfs_open or
- * glusterfs_glh_open.
- * @buf : Buffer to read the data into.
- * @nbytes : Number of bytes to read.
- *
- * Returns number of bytes actually read on success or -1 on error
- * with errno set to the appropriate error number.
- */
-ssize_t
-glusterfs_read (glusterfs_file_t fd, void *buf, size_t nbytes);
-
-
-
-/* Read data into an array of buffers.
- *
- * @fd : File handle returned by glusterfs_open or
- * glusterfs_glh_open.
- * @vec : Array of buffers into which the data is read.
- * @count : Number of iovecs referred to by vec.
- *
- * Returns number of bytes read on success or -1 on error with errno
- * set appropriately.
- */
-ssize_t
-glusterfs_readv (glusterfs_file_t fd, const struct iovec *vec, int count);
-
-int
-glusterfs_read_async (glusterfs_file_t fd, size_t nbytes, off_t offset,
- glusterfs_readv_cbk_t readv_cbk, void *cbk_data);
-
-
-
-/* Write data into a file.
- *
- * @fd : File handle returned from glusterfs_open or
- * glusterfs_glh_open.
- * @buf : Buffer which is written to the file.
- * @nbytes : Number bytes of the @buf written to the file.
- *
- * On success, returns number of bytes written. On error, returns -1
- * with errno set appropriately.
- */
-ssize_t
-glusterfs_write (glusterfs_file_t fd, const void *buf, size_t nbytes);
-
-
-
-/* Writes an array of buffers into a file.
- *
- * @fd : The file handle returned from glusterfs_open or
- * glusterfs_glh_open.
- * @vector : Array of buffers to be written to the file.
- * @count : Number of separate buffers in the @vector array.
- *
- * Returns number of bytes written on success or -1 on error with
- * errno set approriately.
- */
-ssize_t
-glusterfs_writev (glusterfs_file_t fd, const struct iovec *vector, int count);
-
-int
-glusterfs_write_async (glusterfs_file_t fd, const void *buf, size_t nbytes,
- off_t offset, glusterfs_write_cbk_t write_cbk,
- void *cbk_data);
-
-int
-glusterfs_writev_async (glusterfs_file_t fd, const struct iovec *vector,
- int count, off_t offset,
- glusterfs_write_cbk_t write_cbk, void *cbk_data);
-
-
-
-/* Read from a file starting at a given offset.
- *
- * @fd : File handle returned from glusterfs_open or
- * glusterfs_glh_open.
- * @buf : Buffer to read the data into.
- * @nbytes : Number of bytes to read.
- * @offset : The offset to start reading @nbytes from.
- *
- * Returns number of bytes read on success or -1 on error with errno
- * set appropriately.
- */
-ssize_t
-glusterfs_pread (glusterfs_file_t fd, void *buf, size_t nbytes, off_t offset);
-
-
-
-/* Write to a file starting at a given offset.
- *
- * @fd : Flie handle returned from glusterfs_open or
- * glusterfs_glh_open.
- * @buf : Buffer that will be written to the file.
- * @nbytes : Number of bytes to write from @buf.
- * @offset : The starting offset from where @nbytes will be
- * written.
- *
- * Returns number of bytes written on success and -1 on error with
- * errno set appropriately.
- */
-ssize_t
-glusterfs_pwrite (glusterfs_file_t fd, const void *buf, size_t nbytes,
- off_t offset);
-
-
-
-/* Seek to an offset in the file.
- *
- * @fd : File handle in which to seek to. File handle
- * returned by glusterfs_open or glusterfs_glh_open.
- * @offset : Offset to seek to in the given file.
- * @whence : Determines how the offset is interpreted by this
- * syscall. The behaviour is similar to the options
- * provided by the POSIX lseek system call. See man lseek
- * for more details.
- *
- * On success, returns the resulting absolute offset in the file after the seek
- * operation is performed. ON error, returns -1 with errno set
- * appropriately.
- */
-off_t
-glusterfs_lseek (glusterfs_file_t fd, off_t offset, int whence);
-
-
-
-/* Create a directory.
- *
- * @handle : The handle of the glusterfs context in which the
- * directory needs to be created.
- * @path : The absolute path within the glusterfs context where
- * the directory needs to be created.
- * @mode : The mode bits for the newly created directory.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_mkdir (glusterfs_handle_t handle, const char *path, mode_t mode);
-
-
-
-/* Create a directory.
- *
- * @path : Path to the directory that needs to be created. This
- * path must be prefixed with the VMP of the particular glusterfs
- * context.
- * @mode : Mode flags for the newly created directory.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_mkdir (const char *path, mode_t mode);
-
-
-
-/* Remove a directory.
- *
- * @handle : Handle of the glusterfs context from which to remove
- * the directory.
- * @path : The path of the directory to be removed in the glusterfs
- * context.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_rmdir (glusterfs_handle_t handle, const char *path);
-
-
-
-/* Remove a directory.
- *
- * @path : The absolute path to the directory to be removed.
- * This path must be pre-fixed with the VMP of the
- * particular glusterfs context in which this directory
- * resides.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_rmdir (const char *path);
-
-
-
-/* Read directory entries.
- *
- * @fd : The handle of the directory to be read. This handle
- * is the one returned by opendir.
- *
- * Returns the directory entry on success and NULL pointer on error
- * with errno set appropriately.
- */
-void *
-glusterfs_readdir (glusterfs_dir_t dirfd);
-
-
-
-/* re-entrant version of glusterfs_readdir.
- *
- * @dirfd : The handle of directory to be read. This handle is the one
- * returned by opendir.
- * @entry : Pointer to storage to store a directory entry. The storage
- * pointed to by entry shall be large enough for a dirent with
- * an array of char d_name members containing at least
- * {NAME_MAX}+1 elements.
- * @result : Upon successful return, the pointer returned at *result shall
- * have the same value as the argument entry. Upon reaching the
- * end of the directory stream, this pointer shall have the
- * value NULL.
- */
-int
-glusterfs_readdir_r (glusterfs_dir_t dirfd, struct dirent *entry,
- struct dirent **result);
-
-/* Close a directory handle.
- *
- * @fd : The directory handle to be closed.
- *
- * Returns 0 on success and -1 on error with errno set to 0.
- */
-int
-glusterfs_closedir (glusterfs_dir_t dirfd);
-/* FIXME: remove getdents */
-int
-glusterfs_getdents (glusterfs_dir_t fd, struct dirent *dirp,
- unsigned int count);
-
-
-
-/* Create device node.
- *
- * @handle : glusterfs context in which to create the device
- * node.
- * @pathname : The absolute path of the device to be created in the
- * given glusterfs context.
- *
- * @mode : Mode flags to apply to the newly created node.
- * @dev : Device numbers that will apply to the node.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_mknod(glusterfs_handle_t handle, const char *pathname,
- mode_t mode, dev_t dev);
-
-
-
-/* Create a device node.
- *
- * @pathname : The full path of the node to be created. This path
- * should be pre-pended with the VMP of the glusterfs
- * context in which this node is to be created.
- * @mode : Mode flags that will be applied to the newly created
- * device file.
- * @dev : The device numbers that will be associated with the
- * device node.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_mknod(const char *pathname, mode_t mode, dev_t dev);
-
-
-
-/* Returns the real absolute path of the given path.
- *
- * @handle : The glusterfs context in which the path resides in.
- * @path : The path to be resolved.
- * @resolved_path : The resolved path is stored in this buffer
- * provided by the caller.
- *
- * Returns a pointer to resolved_path on success and NULL on error
- * with errno set appropriately.
- *
- * See man realpath for details.
- */
-char *
-glusterfs_glh_realpath (glusterfs_handle_t handle, const char *path,
- char *resolved_path);
-
-
-/* Returns the real absolute path of the given path.
- *
- * @path : The path to be resolved. This path must be
- * pre-fixed with the VMP of the glusterfs
- * context in which the file resides.
- *
- * @resolved_path : The resolved path is stored in this user
- * provided buffer.
- *
- * Returns a pointer to resolved_path on success, and NULL on error
- * with errno set appropriately.
- */
-char *
-glusterfs_realpath (const char *path, char *resolved_path);
-
-
-
-/* Change mode flags on a path.
- *
- * @handle : Handle of the glusterfs instance in which the path
- * resides.
- * @path : The path whose mode bits need to be changed.
- * @mode : The new mode bits.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_chmod (glusterfs_handle_t handle, const char *path, mode_t mode);
-
-
-
-/* Change mode flags on a path.
- *
- * @path : The path whose mode bits need to be changed. The
- * path should be pre-fixed with the VMP that identifies the
- * glusterfs context within which the path resides.
- * @mode : The new mode bits.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_chmod (const char *path, mode_t mode);
-
-
-
-/* Change the owner of a path.
- * If @path is a symlink, it is dereferenced and the ownership change
- * happens on the target.
- *
- * @handle : Handle of the glusterfs context in which the path
- * resides.
- * @path : The path whose owner needs to be changed.
- * @owner : ID of the new owner.
- * @group : ID of the new group.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_chown (glusterfs_handle_t handle, const char *path, uid_t owner,
- gid_t group);
-
-
-
-/* Change the owner of a path.
- *
- * If @path is a symlink, it is dereferenced and the ownership change
- * happens on the target.
- * @path : The path whose owner needs to be changed. Path must
- * be pre-fixed with the VMP that identifies the
- * glusterfs context in which the path resides.
- * @owner : ID of the new owner.
- * @group : ID of the new group.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_chown (const char *path, uid_t owner, gid_t group);
-
-
-
-/* Change the owner of the file.
- *
- * @fd : Handle of the file whose owner needs to be changed.
- * @owner : ID of the new owner.
- * @group : ID of the new group.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_fchown (glusterfs_file_t fd, uid_t owner, gid_t group);
-
-
-
-/* Open a directory.
- *
- * @handle : Handle that identifies a glusterfs context.
- * @path : Path to the directory in the glusterfs context.
- *
- * Returns a non-NULL handle on success and NULL on failure with errno
- * set appropriately.
- */
-glusterfs_dir_t
-glusterfs_glh_opendir (glusterfs_handle_t handle, const char *path);
-
-
-
-/* Open a directory.
- *
- * @path : Path to the directory. The path must be prepended
- * with the VMP in order to identify the glusterfs
- * context in which path resides.
- *
- * Returns a non-NULL handle on success and NULL on failure with errno
- * set appropriately.
- */
-glusterfs_dir_t
-glusterfs_opendir (const char *path);
-
-
-
-/* Change the mode bits on an open file.
- *
- * @fd : The file whose mode bits need to be changed.
- * @mode : The new mode bits.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_fchmod (glusterfs_file_t fd, mode_t mode);
-
-
-
-/* Sync the file contents to storage.
- *
- * @fd : The file whose contents need to be sync'ed to
- * storage.
- *
- * Return 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_fsync (glusterfs_file_t *fd);
-
-
-
-/* Truncate an open file.
- *
- * @fd : The file to truncate.
- * @length : The length to truncate to.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_ftruncate (glusterfs_file_t fd, off_t length);
-
-
-
-/* Create a hard link between two paths.
- *
- * @handle : glusterfs context in which both paths should reside.
- * @oldpath : The existing path to link to.
- * @newpath : The new path which will be linked to @oldpath.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_link (glusterfs_handle_t handle, const char *oldpath,
- const char *newpath);
-
-
-
-/* Create a hard link between two paths.
- *
- * @oldpath : The existing path to link to.
- * @newpath : The new path which will be linked to @oldpath.
- *
- * Both paths should exist on the same glusterfs context and should be
- * prefixed with the same VMP.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_link (const char *oldpath, const char *newpath);
-
-
-
-/* Get stats about the underlying file system.
- *
- * @handle : Identifies the glusterfs context in which resides
- * the given path.
- * @path : stats are returned for the file system on which file
- * is located.
- * @buf : The buffer into which the stats are copied.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_statfs (glusterfs_handle_t handle, const char *path,
- struct statfs *buf);
-
-
-
-/* Get stats about the underlying file system.
- *
- * @path : stats are returned for the file system on which file
- * is located. @path must start with the VMP of the
- * glusterfs context on which the file reside.
- * @buf : The buffer into which the stats are copied.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_statfs (const char *path, struct statfs *buf);
-
-
-
-/* Get stats about the underlying file system.
- *
- * @handle : Identifies the glusterfs context in which resides
- * the given path.
- * @path : stats are returned for the file system on which file
- * is located.
- * @buf : The buffer into which the stats are copied.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_statvfs (glusterfs_handle_t handle, const char *path,
- struct statvfs *buf);
-
-
-
-/* Get stats about the underlying file system.
- *
- * @path : stats are returned for the file system on which file
- * is located. @path must start with the VMP of the
- * glusterfs context on which the file reside.
- * @buf : The buffer into which the stats are copied.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_statvfs (const char *path, struct statvfs *buf);
-
-
-
-/* Set the atime and mtime values for a given path.
- *
- * @handle : The handle identifying the glusterfs context.
- * @path : The path for which the times need to be changed.
- * @times : The array containing new time stamps for the file.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_utimes (glusterfs_handle_t handle, const char *path,
- const struct timeval times[2]);
-
-
-
-/* Set the atime and mtime values for a given path.
- *
- * @path : The path for which the times need to be changed.
- * @times : The array containing new time stamps for the file.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_utimes (const char *path, const struct timeval times[2]);
-
-
-
-/* Set the atime and mtime values for a given path.
- *
- * @handle : The handle identifying the glusterfs context.
- * @path : The path for which the times need to be changed.
- * @buf : The structure containing new time stamps for the file.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_utime (glusterfs_handle_t handle, const char *path,
- const struct utimbuf *buf);
-
-
-
-/* Set the atime and mtime values for a given path.
- *
- * @path : The path for which the times need to be changed.
- * @buf : The structure containing new time stamps for the file.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_utime (const char *path, const struct utimbuf *buf);
-
-
-
-/* Create FIFO at the given path.
- *
- * @handle : The glusterfs context in which to create that FIFO.
- * @path : The path within the context where the FIFO is to be
- * created.
- * @mode : The mode bits for the newly create FIFO.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_mkfifo (glusterfs_handle_t handle, const char *path,
- mode_t mode);
-
-
-
-/* Create FIFO at the given path.
- *
- * @path : The path within the context where the FIFO is to be
- * created. @path should begin with the VMP of the
- * glusterfs context in which the FIFO needs to be
- * created.
- * @mode : The mode bits for the newly create FIFO.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_mkfifo (const char *path, mode_t mode);
-
-
-
-/* Unlink a file.
- *
- * @handle : Handle that identifies a glusterfs instance.
- * @path : Path in the glusterfs instance that needs to be
- * unlinked.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_unlink (glusterfs_handle_t handle, const char *path);
-
-
-
-/* Unlink a file.
- *
- * @path : Path in the glusterfs instance that needs to be
- * unlinked.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_unlink (const char *path);
-
-
-
-/* Create a symbolic link.
- *
- * @handle : The handle identifying the glusterfs context.
- * @oldpath : The existing path to which a symlink needs to be
- * created.
- * @newpath : The new path which will be symlinked to the
- * @oldpath.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_symlink (glusterfs_handle_t handle, const char *oldpath,
- const char *newpath);
-
-
-
-/* Create a symbolic link.
- *
- * @oldpath : The existing path to which a symlink needs to be
- * created.
- * @newpath : The new path which will be symlinked to the
- * @oldpath.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_symlink (const char *oldpath, const char *newpath);
-
-
-
-/* Read a symbolic link.
- *
- * @handle : Handle identifying the glusterfs context.
- * @path : The symlink that needs to be read.
- * @buf : The buffer into which the target of @path will be
- * stored.
- * @bufsize : Size of the buffer allocated to @buf.
- *
- * Returns number of bytes copied into @buf and -1 on error with errno
- * set appropriately.
- */
-ssize_t
-glusterfs_glh_readlink (glusterfs_handle_t handle, const char *path, char *buf,
- size_t bufsize);
-
-
-
-/* Read a symbolic link.
- *
- * @path : The symlink that needs to be read.
- * @buf : The buffer into which the target of @path will be
- * stored.
- * @bufsize : Size of the buffer allocated to @buf.
- *
- * Returns number of bytes copied into @buf and -1 on error with errno
- * set appropriately.
- */
-ssize_t
-glusterfs_readlink (const char *path, char *buf, size_t bufsize);
-
-
-
-/* Rename a file or directory.
- *
- * @handle : The identifier of a glusterfs context.
- * @oldpath : The path to be renamed.
- * @newpath : The new name for the @oldpath.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_rename (glusterfs_handle_t handle, const char *oldpath,
- const char *newpath);
-
-
-
-/* Rename a file or directory.
- * @oldpath : The path to be renamed.
- * @newpath : The new name for the @oldpath.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_rename (const char *oldpath, const char *newpath);
-
-
-
-/* Remove a file or directory in the given glusterfs context.
- *
- * @handle : Handle identifying the glusterfs context.
- * @path : Path of the file or directory to be removed.
- *
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_remove (glusterfs_handle_t handle, const char *path);
-
-
-
-/* Remove a file or directory.
- *
- * @path : Path of the file or directory to be removed. The
- * path must be pre-fixed with the VMP.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_remove (const char *path);
-
-
-
-/* Change the owner of the given path.
- *
- * If @path is a symlink, the ownership change happens on the symlink.
- *
- * @handle : Handle identifying the glusterfs client context.
- * @path : Path whose owner needs to be changed.
- * @owner : New owner ID
- * @group : New Group ID
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_lchown (glusterfs_handle_t handle, const char *path, uid_t owner,
- gid_t group);
-
-
-
-/* Change the owner of the given path.
- *
- * If @path is a symlink, the ownership change happens on the symlink.
- *
- * @path : Path whose owner needs to be changed.
- * @owner : New owner ID
- * @group : New Group ID
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-
-int
-glusterfs_lchown (const char *path, uid_t owner, gid_t group);
-
-
-
-/* Rewind directory stream pointer to beginning of the directory.
- *
- * @dirfd : Directory handle returned by glusterfs_open on
- * glusterfs_opendir.
- *
- * Returns no value.
- */
-void
-glusterfs_rewinddir (glusterfs_dir_t dirfd);
-
-
-
-/* Seek to the given offset in the directory handle.
- *
- * @dirfd : Directory handle returned by glusterfs_open on
- * glusterfs_opendir.
- * @offset : The offset to seek to.
- *
- * Returns no value.
- */
-void
-glusterfs_seekdir (glusterfs_dir_t dirfd, off_t offset);
-
-
-
-/* Return the current offset in a directory stream.
- *
- * @dirfd : Directory handle returned by glusterfs_open on
- * glusterfs_opendir.
- *
- * Returns the offset in the directory or -1 on error with errno set
- * appropriately.
- */
-off_t
-glusterfs_telldir (glusterfs_dir_t dirfd);
-
-
-/* Write count bytes from in_fd to out_fd, starting at *offset.
- * glusterfs_sendfile aims at eliminating memory copy at the end of
- * each read from in_fd, copying the file directly to out_fd from the buffer
- * provided by glusterfs.
- *
- * @out_fd: file descriptor opened for writing
- *
- * @in_fd: glusterfs file handle to the file to be read from.
- *
- * @offset: If offset is not NULL, then it points to a variable holding the file
- * offset from which glusterfs_sendfile() will start reading data
- * from in_fd. When glusterfs_sendfile() returns, this variable will
- * be set to the offset of the byte following the last byte that was
- * read. If offset is not NULL, then glusterfs_sendfile() does not
- * modify the current file offset of in_fd; otherwise the current file
- * offset is adjusted to reflect the number of bytes read from in_fd.
- *
- * @count: number of bytes to copy between the file descriptors.
- */
-
-ssize_t
-glusterfs_sendfile (int out_fd, glusterfs_file_t in_fd, off_t *offset,
- size_t count);
-
-/* manipulate file descriptor
- * This api can have 3 forms similar to fcntl(2).
- *
- * int
- * glusterfs_fcntl (glusterfs_file_t fd, int cmd)
- *
- * int
- * glusterfs_fcntl (glusterfs_file_t fd, int cmd, long arg)
- *
- * int
- * glusterfs_fcntl (glusterfs_file_t fd, int cmd, struct flock *lock)
- *
- * @fd : file handle returned by glusterfs_open or glusterfs_create.
- * @cmd : Though the aim is to implement all possible commands supported by
- * fcntl(2), currently following commands are supported.
- * F_SETLK, F_SETLKW, F_GETLK - used to acquire, release, and test for
- * the existence of record locks (also
- * known as file-segment or file-region
- * locks). More detailed explanation is
- * found in 'man 2 fcntl'
- */
-
-int
-glusterfs_fcntl (glusterfs_file_t fd, int cmd, ...);
-
-/*
- * Change the current working directory to @path
- *
- * @path : path to change the current working directory to.
- *
- * Returns 0 on success and -1 on failure with errno set appropriately.
- */
-int
-glusterfs_chdir (const char *path);
-
-/*
- * Change the current working directory to the path @fd is opened on.
- *
- * @fd : current working directory will be changed to path @fd is opened on.
- *
- * Returns 0 on success and -1 on with errno set appropriately.
- */
-int
-glusterfs_fchdir (glusterfs_file_t fd);
-
-/* copies the current working directory into @buf if it is big enough
- *
- * @buf: buffer to copy into it. If @buf is NULL, a buffer will be allocated.
- * The size of the buffer will be @size if it is not zero, otherwise the
- * size will be big enough to hold the current working directory.
- * @size: size of the buffer.
- *
- * Returns the pointer to buffer holding current working directory on success
- * and NULL on failure.
- */
-
-char *
-glusterfs_getcwd (char *buf, size_t size);
-
-/*
- * Truncate the file to a specified length.
- *
- * @path : path to the file.
- * @length : length to which the file has to be truncated.
- *
- * Returns 0 on success and -1 on failure with errno set appropriately
- */
-
-int
-glusterfs_truncate (const char *path, off_t length);
-
-
-/* FIXME: review the need for these apis */
-/* added for log related initialization in booster fork implementation */
-void
-glusterfs_reset (void);
-
-void
-glusterfs_log_lock (void);
-
-void
-glusterfs_log_unlock (void);
-/* Used to free the glusterfs_read_buf passed to the application from
- glusterfs_read_async_cbk
-*/
-void
-glusterfs_free (glusterfs_iobuf_t *buf);
-
-__END_DECLS
-
-#endif /* !_LIBGLUSTERFSCLIENT_H */
diff --git a/mod_glusterfs/Makefile.am b/mod_glusterfs/Makefile.am
deleted file mode 100644
index 0abe8dcfcbb..00000000000
--- a/mod_glusterfs/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = apache lighttpd
-
-CLEANFILES =
diff --git a/mod_glusterfs/apache/1.3/src/Makefile.am b/mod_glusterfs/apache/1.3/src/Makefile.am
deleted file mode 100644
index 6bb3075f564..00000000000
--- a/mod_glusterfs/apache/1.3/src/Makefile.am
+++ /dev/null
@@ -1,30 +0,0 @@
-mod_glusterfs_PROGRAMS = mod_glusterfs.so
-mod_glusterfsdir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/apache/1.3
-
-mod_glusterfs_so_SOURCES = mod_glusterfs.c
-
-all: mod_glusterfs.so
-
-mod_glusterfs.so: $(top_srcdir)/mod_glusterfs/apache/1.3/src/mod_glusterfs.c $(top_builddir)/libglusterfsclient/src/libglusterfsclient.la
- ln -sf $(top_srcdir)/mod_glusterfs/apache/1.3/src/mod_glusterfs.c $(top_builddir)/mod_glusterfs/apache/1.3/src/mod_glusterfs-build.c
- $(APXS) -c -Wc,-g3 -Wc,-O0 -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64 -D_GNU_SOURCE -I$(top_srcdir)/libglusterfsclient/src -Wl,-rpath,$(libdir) -Wl,-rpath,$(top_builddir)/libglusterfsclient/src/.libs/ $(top_builddir)/libglusterfsclient/src/.libs/libglusterfsclient.so mod_glusterfs-build.c -o $(top_builddir)/mod_glusterfs/apache/1.3/src/mod_glusterfs.so
-
-$(top_builddir)/libglusterfsclient/src/libglusterfsclient.la:
- $(MAKE) -C $(top_builddir)/libglusterfsclient/src/ all
-
-install-data-local:
- @echo ""
- @echo ""
- @echo "**********************************************************************************"
- @echo "* TO INSTALL MODGLUSTERFS, PLEASE USE, "
- @echo "* $(APXS) -n glusterfs -ia $(mod_glusterfsdir)/mod_glusterfs.so "
- @echo "**********************************************************************************"
- @echo ""
- @echo ""
-
-#install:
-# cp -fv mod_glusterfs.so $(HTTPD_LIBEXECDIR)
-# cp -fv httpd.conf $(HTTPD_CONF_DIR)
-
-clean:
- -rm -fv *.so *.o mod_glusterfs-build.c
diff --git a/mod_glusterfs/apache/1.3/src/README.txt b/mod_glusterfs/apache/1.3/src/README.txt
deleted file mode 100644
index 378a51d79ae..00000000000
--- a/mod_glusterfs/apache/1.3/src/README.txt
+++ /dev/null
@@ -1,107 +0,0 @@
-What is mod_glusterfs?
-======================
-* mod_glusterfs is a module for apache written for efficient serving of files from glusterfs.
- mod_glusterfs interfaces with glusterfs using apis provided by libglusterfsclient.
-
-* this README speaks about installation of apache-1.3.x, where x is any minor version.
-
-Prerequisites for mod_glusterfs
-===============================
-Though mod_glusterfs has been written as a module, with an intent of making no changes to the way apache has
-been built, currently following points have to be taken care of:
-
-* module "so" has to be enabled, for apache to support modules.
-* since glusterfs is compiled with _FILE_OFFSET_BITS=64 and __USE_FILE_OFFSET64 flags, mod_glusterfs and apache
- in turn have to be compiled with the above two flags.
-
- $ tar xzvf apache-1.3.9.tar.gz
- $ cd apache-1.3.9/
- $ # add -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64 to EXTRA_CFLAGS in src/Configuration.
- $ ./configure --prefix=/usr --enable-module=so
- $ cd src
- $ ./Configure
- $ cd ../
- $ make install
- $ httpd -l | grep -i mod_so
- mod_so.c
-
-* if multiple apache installations are present, make sure to pass --with-apxs=/path/to/apxs/of/proper/version to configure script while building glusterfs.
-
-Build/Install mod_glusterfs
-===========================
-* mod_glusterfs is provided with glusterfs--mainline--3.0 and all releases from the same branch.
-
-* building glusterfs also builds mod_glusterfs. But 'make install' of glusterfs installs mod_glusterfs.so to
- glusterfs install directory instead of the apache modules directory.
-
-* 'make install' of glusterfs will print a message similar to the one given below, which is self explanatory.
- Make sure to use apxs of proper apache version in case of multiple apache installations. This will copy
- mod_glusterfs.so to modules directory of proper apache version and modify the appropriate httpd.conf to enable
- mod_glusterfs.
-
-**********************************************************************************************
-* TO INSTALL MODGLUSTERFS, PLEASE USE,
-* apxs -n mod_glusterfs -ia /usr/lib/glusterfs/1.4.0pre2/apache-1.3/mod_glusterfs.so
-**********************************************************************************************
-
-Configuration
-=============
-* Following configuration has to be added to httpd.conf.
-
- <Location "/glusterfs">
- GlusterfsLogfile "/var/log/glusterfs/glusterfs.log"
- GlusterfsLoglevel "warning"
- GlusterfsVolumeSpecfile "/etc/glusterfs/glusterfs-client.spec"
- GlusterfsCacheTimeout "600"
- GlusterfsXattrFileSize "65536"
- SetHandler "glusterfs-handler"
- </Location>
-
-* GlusterfsVolumeSpecfile (COMPULSORY)
- Path to the the glusterfs volume specification file.
-
-* GlusterfsLogfile (COMPULSORY)
- Path to the glusterfs logfile.
-
-* GlusterfsLoglevel (OPTIONAL, default = warning)
- Severity of messages that are to be logged. Allowed values are critical, error, warning, debug, none
- in the decreasing order of severity.
-
-* GlusterfsCacheTimeOut (OPTIONAL, default = 0)
- Timeout values for glusterfs stat and lookup cache.
-
-* GlusterfsXattrFileSize (OPTIONAL, default = 0)
- Files with sizes upto and including this value are fetched through the extended attribute interface of
- glusterfs rather than the usual open-read-close set of operations. For files of small sizes, it is recommended
- to use extended attribute interface.
-
-* With the above configuration all the requests to httpd of the form www.example.org/glusterfs/path/to/file are
- served from glusterfs.
-
-Miscellaneous points
-====================
-* httpd by default runs with username "nobody" and group "nogroup". Permissions of logfile and specfile have to
- be set suitably.
-
-* Since mod_glusterfs runs with permissions of nobody.nogroup, glusterfs has to use only login based
- authentication. See docs/authentication.txt for more details.
-
-* To copy the data served by httpd into glusterfs mountpoint, glusterfs can be started with the
- volume-specification file provided to mod_glusterfs. Any tool like cp can then be used.
-
-* To run in gdb, apache has to be compiled with -lpthread, since libglusterfsclient is multithreaded.
- If not on Linux gdb runs into errors like:
- "Error while reading shared library symbols:
- Cannot find new threads: generic error"
-
-* when used with ib-verbs transport, ib_verbs initialization fails.
- reason for this is that apache runs as non-privileged user and the amount of memory that can be
- locked by default is not sufficient for ib-verbs. to fix this, as root run,
-
- # ulimit -l unlimited
-
- and then start apache.
-
-TODO
-====
-* directory listing for the directories accessed through mod_glusterfs.
diff --git a/mod_glusterfs/apache/1.3/src/mod_glusterfs.c b/mod_glusterfs/apache/1.3/src/mod_glusterfs.c
deleted file mode 100644
index c1380a4fda6..00000000000
--- a/mod_glusterfs/apache/1.3/src/mod_glusterfs.c
+++ /dev/null
@@ -1,507 +0,0 @@
-/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef CORE_PRIVATE
-#define CORE_PRIVATE
-#endif
-
-#include <httpd.h>
-#include <http_config.h>
-#include <http_core.h>
-#include <http_request.h>
-#include <http_protocol.h>
-#include <http_log.h>
-#include <http_main.h>
-#include <util_script.h>
-#include <libglusterfsclient.h>
-#include <sys/uio.h>
-#include <pthread.h>
-
-#define GLUSTERFS_INVALID_LOGLEVEL "mod_glusterfs: Unrecognized log-level "\
- "\"%s\", possible values are \"DEBUG|"\
- "WARNING|ERROR|CRITICAL|NONE\"\n"
-
-#define GLUSTERFS_HANDLER "glusterfs-handler"
-#define GLUSTERFS_CHUNK_SIZE 131072
-
-module MODULE_VAR_EXPORT glusterfs_module;
-
-/*TODO: verify error returns to server core */
-
-typedef struct glusterfs_dir_config {
- char *logfile;
- char *loglevel;
- char *specfile;
- char *mount_dir;
- char *buf;
- size_t xattr_file_size;
- uint32_t cache_timeout;
-} glusterfs_dir_config_t;
-
-typedef struct glusterfs_async_local {
- int op_ret;
- int op_errno;
- char async_read_complete;
- off_t length;
- off_t read_bytes;
- glusterfs_iobuf_t *buf;
- request_rec *request;
- pthread_mutex_t lock;
- pthread_cond_t cond;
-}glusterfs_async_local_t;
-
-#define GLUSTERFS_CMD_PERMS ACCESS_CONF
-
-static glusterfs_dir_config_t *
-mod_glusterfs_dconfig(request_rec *r)
-{
- glusterfs_dir_config_t *dir_config = NULL;
- if (r->per_dir_config != NULL) {
- dir_config = ap_get_module_config (r->per_dir_config,
- &glusterfs_module);
- }
-
- return dir_config;
-}
-
-static
-const char *add_xattr_file_size(cmd_parms *cmd, void *dummy, char *arg)
-{
- glusterfs_dir_config_t *dir_config = dummy;
- dir_config->xattr_file_size = atoi (arg);
- return NULL;
-}
-
-static
-const char *set_cache_timeout(cmd_parms *cmd, void *dummy, char *arg)
-{
- glusterfs_dir_config_t *dir_config = dummy;
- dir_config->cache_timeout = atoi (arg);
- return NULL;
-}
-
-static
-const char *set_loglevel(cmd_parms *cmd, void *dummy, char *arg)
-{
- glusterfs_dir_config_t *dir_config = dummy;
- char *error = NULL;
- if (strncasecmp (arg, "DEBUG", strlen ("DEBUG"))
- && strncasecmp (arg, "WARNING", strlen ("WARNING"))
- && strncasecmp (arg, "CRITICAL", strlen ("CRITICAL"))
- && strncasecmp (arg, "NONE", strlen ("NONE"))
- && strncasecmp (arg, "ERROR", strlen ("ERROR")))
- error = GLUSTERFS_INVALID_LOGLEVEL;
- else
- dir_config->loglevel = arg;
-
- return error;
-}
-
-static
-const char *add_logfile(cmd_parms *cmd, void *dummy, char *arg)
-{
- glusterfs_dir_config_t *dir_config = dummy;
- dir_config->logfile = arg;
-
- return NULL;
-}
-
-static
-const char *add_specfile(cmd_parms *cmd, void *dummy, char *arg)
-{
- glusterfs_dir_config_t *dir_config = dummy;
-
- dir_config->specfile = arg;
-
- return NULL;
-}
-
-static void *
-mod_glusterfs_create_dir_config(pool *p, char *dirspec)
-{
- glusterfs_dir_config_t *dir_config = NULL;
-
- dir_config = (glusterfs_dir_config_t *) ap_pcalloc(p,
- sizeof(*dir_config));
-
- dir_config->mount_dir = dirspec;
- dir_config->logfile = dir_config->specfile = (char *)0;
- dir_config->loglevel = "warning";
- dir_config->cache_timeout = 0;
- dir_config->buf = NULL;
-
- return (void *) dir_config;
-}
-
-static void
-mod_glusterfs_child_init(server_rec *s, pool *p)
-{
- void **urls = NULL;
- int n, i;
- core_server_config *mod_core_config = ap_get_module_config (s->module_config,
- &core_module);
- glusterfs_dir_config_t *dir_config = NULL;
- glusterfs_init_params_t params = {0, };
-
- n = mod_core_config->sec_url->nelts;
- urls = (void **)mod_core_config->sec_url->elts;
- for (i = 0; i < n; i++) {
- dir_config = ap_get_module_config (urls[i], &glusterfs_module);
-
- if (dir_config) {
- memset (&params, 0, sizeof (params));
-
- params.logfile = dir_config->logfile;
- params.loglevel = dir_config->loglevel;
- params.lookup_timeout = dir_config->cache_timeout;
- params.stat_timeout = dir_config->cache_timeout;
- params.specfile = dir_config->specfile;
-
- glusterfs_mount (dir_config->mount_dir, &params);
- }
- dir_config = NULL;
- }
-}
-
-static void
-mod_glusterfs_child_exit(server_rec *s, pool *p)
-{
- void **urls = NULL;
- int n, i;
- core_server_config *mod_core_config = NULL;
- glusterfs_dir_config_t *dir_config = NULL;
-
- mod_core_config = ap_get_module_config (s->module_config, &core_module);
- n = mod_core_config->sec_url->nelts;
- urls = (void **)mod_core_config->sec_url->elts;
- for (i = 0; i < n; i++) {
- dir_config = ap_get_module_config (urls[i], &glusterfs_module);
- if (dir_config) {
- glusterfs_umount (dir_config->mount_dir);
- }
- }
-}
-
-static int mod_glusterfs_fixup(request_rec *r)
-{
- glusterfs_dir_config_t *dir_config = NULL;
- int access_status;
- int ret;
- char *path = NULL;
-
- dir_config = mod_glusterfs_dconfig(r);
-
- if (dir_config && dir_config->mount_dir
- && !(strncmp (ap_pstrcat (r->pool, dir_config->mount_dir, "/",
- NULL),
- r->uri, strlen (dir_config->mount_dir) + 1)
- && !r->handler))
- r->handler = ap_pstrdup (r->pool, GLUSTERFS_HANDLER);
-
- if (!r->handler || (r->handler && strcmp (r->handler,
- GLUSTERFS_HANDLER)))
- return DECLINED;
-
- path = r->uri;
-
- memset (&r->finfo, 0, sizeof (r->finfo));
-
- dir_config->buf = calloc (1, dir_config->xattr_file_size);
- if (!dir_config->buf) {
- return HTTP_INTERNAL_SERVER_ERROR;
- }
-
- ret = glusterfs_get (path, dir_config->buf,
- dir_config->xattr_file_size, &r->finfo);
-
- if (ret == -1 || r->finfo.st_size > dir_config->xattr_file_size
- || S_ISDIR (r->finfo.st_mode)) {
- free (dir_config->buf);
- dir_config->buf = NULL;
-
- if (ret == -1) {
- int error = HTTP_NOT_FOUND;
- char *emsg = NULL;
- if (r->path_info == NULL) {
- emsg = ap_pstrcat(r->pool, strerror (errno),
- r->filename, NULL);
- }
- else {
- emsg = ap_pstrcat(r->pool, strerror (errno),
- r->filename, r->path_info,
- NULL);
- }
- ap_log_rerror(APLOG_MARK, APLOG_ERR|APLOG_NOERRNO, r,
- "%s", emsg);
- if (errno != ENOENT) {
- error = HTTP_INTERNAL_SERVER_ERROR;
- }
- return error;
- }
- }
-
- if (r->uri && strlen (r->uri) && r->uri[strlen(r->uri) - 1] == '/')
- r->handler = NULL;
-
- r->filename = ap_pstrcat (r->pool, r->filename, r->path_info, NULL);
-
- if ((access_status = ap_find_types(r)) != 0) {
- return DECLINED;
- }
-
- return OK;
-}
-
-
-int
-mod_glusterfs_readv_async_cbk (int32_t op_ret, int32_t op_errno,
- glusterfs_iobuf_t *buf, void *cbk_data)
-{
- glusterfs_async_local_t *local = cbk_data;
-
- pthread_mutex_lock (&local->lock);
- {
- local->async_read_complete = 1;
- local->buf = buf;
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- pthread_cond_signal (&local->cond);
- }
- pthread_mutex_unlock (&local->lock);
-
- return 0;
-}
-
-/* use read_async just to avoid memcpy of read buffer in libglusterfsclient */
-static int
-mod_glusterfs_read_async (request_rec *r, glusterfs_file_t fd, off_t offset,
- off_t length)
-{
- glusterfs_async_local_t local;
- off_t end;
- int nbytes;
- int complete;
- pthread_cond_init (&local.cond, NULL);
- pthread_mutex_init (&local.lock, NULL);
-
- memset (&local, 0, sizeof (local));
- local.request = r;
-
- if (length > 0)
- end = offset + length;
-
- do {
- glusterfs_iobuf_t *buf;
- int i;
- if (length > 0) {
- nbytes = end - offset;
- if (nbytes > GLUSTERFS_CHUNK_SIZE)
- nbytes = GLUSTERFS_CHUNK_SIZE;
- } else
- nbytes = GLUSTERFS_CHUNK_SIZE;
-
- glusterfs_read_async(fd,
- nbytes,
- offset,
- mod_glusterfs_readv_async_cbk,
- (void *)&local);
-
- pthread_mutex_lock (&local.lock);
- {
- while (!local.async_read_complete) {
- pthread_cond_wait (&local.cond, &local.lock);
- }
-
- local.async_read_complete = 0;
- buf = local.buf;
-
- if (length < 0)
- complete = (local.op_ret <= 0);
- else {
- local.read_bytes += local.op_ret;
- complete = ((local.read_bytes == length)
- || (local.op_ret < 0));
- }
- }
- pthread_mutex_unlock (&local.lock);
-
- for (i = 0; i < buf->count; i++) {
- if (ap_rwrite (buf->vector[i].iov_base,
- buf->vector[i].iov_len, r) < 0) {
- local.op_ret = -1;
- complete = 1;
- break;
- }
- }
-
- glusterfs_free (buf);
-
- offset += nbytes;
- } while (!complete);
-
- return (local.op_ret < 0 ? SERVER_ERROR : OK);
-}
-
-static int
-mod_glusterfs_handler(request_rec *r)
-{
- glusterfs_dir_config_t *dir_config;
- char *path = NULL;
- int error = OK;
- int rangestatus = 0;
- int errstatus = OK;
- glusterfs_file_t fd;
-
- if (!r->handler || (r->handler && strcmp (r->handler,
- GLUSTERFS_HANDLER)))
- return DECLINED;
-
- if (r->uri[0] == '\0' || r->uri[strlen(r->uri) - 1] == '/') {
- return DECLINED;
- }
-
- dir_config = mod_glusterfs_dconfig (r);
-
- if (r->method_number != M_GET) {
- return METHOD_NOT_ALLOWED;
- }
-
- ap_update_mtime(r, r->finfo.st_mtime);
- ap_set_last_modified(r);
- ap_set_etag(r);
- ap_table_setn(r->headers_out, "Accept-Ranges", "bytes");
- if (((errstatus = ap_meets_conditions(r)) != OK)
- || (errstatus = ap_set_content_length(r, r->finfo.st_size))) {
- return errstatus;
- }
- rangestatus = ap_set_byterange(r);
- ap_send_http_header(r);
-
- if (r->finfo.st_size <= dir_config->xattr_file_size && dir_config->buf) {
- if (!r->header_only) {
- error = OK;
- ap_log_rerror (APLOG_MARK, APLOG_NOTICE, r,
- "fetching data from glusterfs through "
- "xattr interface\n");
-
- if (!rangestatus) {
- if (ap_rwrite (dir_config->buf,
- r->finfo.st_size, r) < 0) {
- error = HTTP_INTERNAL_SERVER_ERROR;
- }
- } else {
- long offset, length;
- while (ap_each_byterange (r, &offset, &length)) {
- if (ap_rwrite (dir_config->buf + offset,
- length, r) < 0) {
- error = HTTP_INTERNAL_SERVER_ERROR;
- break;
- }
- }
- }
- }
-
- free (dir_config->buf);
- dir_config->buf = NULL;
-
- return error;
- }
-
- path = r->uri;
- fd = glusterfs_open (path , O_RDONLY, 0);
-
- if (fd == 0) {
- ap_log_rerror(APLOG_MARK, APLOG_ERR, r,
- "file permissions deny server access: %s",
- r->filename);
- return FORBIDDEN;
- }
-
- if (!r->header_only) {
- if (!rangestatus) {
- mod_glusterfs_read_async (r, fd, 0, -1);
- } else {
- long offset, length;
- while (ap_each_byterange(r, &offset, &length)) {
- mod_glusterfs_read_async (r, fd, offset, length);
- }
- }
- }
-
- glusterfs_close (fd);
- return error;
-}
-
-static const command_rec mod_glusterfs_cmds[] =
-{
- {"GlusterfsLogfile", add_logfile, NULL,
- GLUSTERFS_CMD_PERMS, TAKE1,
- "Glusterfs Logfile"},
- {"GlusterfsLoglevel", set_loglevel, NULL,
- GLUSTERFS_CMD_PERMS, TAKE1,
- "Glusterfs Loglevel:anyone of none, critical, error, warning, debug"},
- {"GlusterfsCacheTimeout", set_cache_timeout, NULL,
- GLUSTERFS_CMD_PERMS, TAKE1,
- "Timeout value in seconds for caching lookups and stats"},
- {"GlusterfsVolumeSpecfile", add_specfile, NULL,
- GLUSTERFS_CMD_PERMS, TAKE1,
- "Glusterfs Specfile required to access contents of this directory"},
- {"GlusterfsXattrFileSize", add_xattr_file_size, NULL,
- GLUSTERFS_CMD_PERMS, TAKE1,
- "Maximum size of the file to be fetched using xattr interface of "
- "glusterfs"},
- {NULL}
-};
-
-static const handler_rec mod_glusterfs_handlers[] =
-{
- {GLUSTERFS_HANDLER, mod_glusterfs_handler},
- {NULL}
-};
-
-module glusterfs_module =
-{
- STANDARD_MODULE_STUFF,
- NULL,
- mod_glusterfs_create_dir_config, /* per-directory config creator */
- NULL,
- NULL, /* server config creator */
- NULL, /* server config merger */
- mod_glusterfs_cmds, /* command table */
- mod_glusterfs_handlers, /* [7] list of handlers */
- NULL, /* [2] filename-to-URI translation */
- NULL, /* [5] check/validate user_id */
- NULL, /* [6] check user_id is valid *here* */
- NULL, /* [4] check access by host address */
- NULL, /* [7] MIME type checker/setter */
- mod_glusterfs_fixup, /* [8] fixups */
- NULL, /* [10] logger */
-#if MODULE_MAGIC_NUMBER >= 19970103
- NULL, /* [3] header parser */
-#endif
-#if MODULE_MAGIC_NUMBER >= 19970719
- mod_glusterfs_child_init, /* process initializer */
-#endif
-#if MODULE_MAGIC_NUMBER >= 19970728
- mod_glusterfs_child_exit, /* process exit/cleanup */
-#endif
-#if MODULE_MAGIC_NUMBER >= 19970902
- NULL /* [1] post read_request handling */
-#endif
-};
diff --git a/mod_glusterfs/apache/2.2/src/Makefile.am b/mod_glusterfs/apache/2.2/src/Makefile.am
deleted file mode 100644
index 1e8f3a31ea0..00000000000
--- a/mod_glusterfs/apache/2.2/src/Makefile.am
+++ /dev/null
@@ -1,31 +0,0 @@
-mod_glusterfs_PROGRAMS = mod_glusterfs.so
-mod_glusterfsdir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/apache/2.2
-
-mod_glusterfs_so_SOURCES = mod_glusterfs.c
-
-all: mod_glusterfs.so
-
-mod_glusterfs.so: $(top_srcdir)/mod_glusterfs/apache/2.2/src/mod_glusterfs.c $(top_builddir)/libglusterfsclient/src/libglusterfsclient.la
- ln -sf $(top_srcdir)/mod_glusterfs/apache/2.2/src/mod_glusterfs.c $(top_builddir)/mod_glusterfs/apache/2.2/src/mod_glusterfs-build.c
- $(APXS) -c -o mod_glusterfs.la -Wc,-g3 -Wc,-O0 -DLINUX=2 -D_REENTRANT -D_GNU_SOURCE -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64 -I$(top_srcdir)/libglusterfsclient/src -L$(top_builddir)/libglusterfsclient/src/.libs/ -lglusterfsclient mod_glusterfs-build.c
- -ln -sf .libs/mod_glusterfs.so mod_glusterfs.so
-
-$(top_builddir)/libglusterfsclient/src/libglusterfsclient.la:
- $(MAKE) -C $(top_builddir)/libglusterfsclient/src/ all
-
-install-data-local:
- @echo ""
- @echo ""
- @echo "**********************************************************************************"
- @echo "* TO INSTALL MODGLUSTERFS, PLEASE USE, "
- @echo "* $(APXS) -n glusterfs -ia $(mod_glusterfsdir)/mod_glusterfs.so "
- @echo "**********************************************************************************"
- @echo ""
- @echo ""
-
-#install:
-# cp -fv mod_glusterfs.so $(HTTPD_LIBEXECDIR)
-# cp -fv httpd.conf $(HTTPD_CONF_DIR)
-
-clean:
- rm -fv *.so *.o
diff --git a/mod_glusterfs/apache/2.2/src/README.txt b/mod_glusterfs/apache/2.2/src/README.txt
deleted file mode 100644
index 214a2535b5a..00000000000
--- a/mod_glusterfs/apache/2.2/src/README.txt
+++ /dev/null
@@ -1,105 +0,0 @@
-What is mod_glusterfs?
-======================
-* mod_glusterfs is a module for apache written for efficient serving of files from glusterfs.
- mod_glusterfs interfaces with glusterfs using apis provided by libglusterfsclient.
-
-* this README speaks about installing mod_glusterfs for httpd-2.2 and higher.
-
-Prerequisites for mod_glusterfs
-===============================
-Though mod_glusterfs has been written as a module, with an intent of making no changes to
-the way apache has been built, currently following points have to be taken care of:
-
-* since glusterfs is compiled with _FILE_OFFSET_BITS=64 and __USE_FILE_OFFSET64 flags, mod_glusterfs and apache
- in turn have to be compiled with the above two flags.
-
- $ tar xzf httpd-2.2.10.tar.gz
- $ cd httpd-2.2.10/
- $ export CFLAGS='-D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64'
- $ ./configure --prefix=/usr
- $ make
- $ make install
- $ httpd -l | grep -i mod_so
- mod_so.c
-
-* if multiple apache installations are present, make sure to pass --with-apxs=/path/to/apxs/of/proper/version
- to configure script while building glusterfs.
-
-Build/Install mod_glusterfs
-===========================
-* mod_glusterfs is provided with glusterfs--mainline--3.0 and all releases from the same branch.
-
-* building glusterfs also builds mod_glusterfs. But 'make install' of glusterfs installs mod_glusterfs.so to
- glusterfs install directory instead of the apache modules directory.
-
-* 'make install' of glusterfs will print a message similar to the one given below, which is self explanatory.
- Make sure to use apxs of proper apache version in case of multiple apache installations. This will copy
- mod_glusterfs.so to modules directory of proper apache version and modify the appropriate httpd.conf to enable
- mod_glusterfs.
-
-**********************************************************************************
-* TO INSTALL MODGLUSTERFS, PLEASE USE,
-* apxs -n glusterfs -ia /usr/lib/glusterfs/2.0.0rc4/apache/2.2/mod_glusterfs.so
-**********************************************************************************
-
-Configuration
-=============
-* Following configuration has to be added to httpd.conf.
-
- <Location "/glusterfs">
- GlusterfsLogfile "/var/log/glusterfs/glusterfs.log"
- GlusterfsLoglevel "warning"
- GlusterfsVolumeSpecfile "/etc/glusterfs/glusterfs-client.spec"
- GlusterfsCacheTimeout "600"
- GlusterfsXattrFileSize "65536"
- SetHandler "glusterfs-handler"
- </Location>
-
-* GlusterfsVolumeSpecfile (COMPULSORY)
- Path to the the glusterfs volume specification file.
-
-* GlusterfsLogfile (COMPULSORY)
- Path to the glusterfs logfile.
-
-* GlusterfsLoglevel (OPTIONAL, default = warning)
- Severity of messages that are to be logged. Allowed values are critical, error, warning, debug, none
- in the decreasing order of severity.
-
-* GlusterfsCacheTimeOut (OPTIONAL, default = 0)
- Timeout values for glusterfs stat and lookup cache.
-
-* GlusterfsXattrFileSize (OPTIONAL, default = 0)
- Files with sizes upto and including this value are fetched through the extended attribute interface of
- glusterfs rather than the usual open-read-close set of operations. For files of small sizes, it is recommended
- to use extended attribute interface.
-
-* With the above configuration all the requests to httpd of the form www.example.org/glusterfs/path/to/file are
- served from glusterfs.
-
-* mod_glusterfs also implements mod_dir and mod_autoindex behaviour for files under glusterfs mount.
- Hence it also takes the directives related to both of these modules. For more details, refer the
- documentation for both of these modules.
-
-Miscellaneous points
-====================
-* httpd by default runs with username "nobody" and group "nogroup". Permissions of logfile and specfile have to
- be set suitably.
-
-* Since mod_glusterfs runs with permissions of nobody.nogroup, glusterfs has to use only login based
- authentication. See docs/authentication.txt for more details.
-
-* To copy the data served by httpd into glusterfs mountpoint, glusterfs can be started with the
- volume-specification file provided to mod_glusterfs. Any tool like cp can then be used.
-
-* To run in gdb, apache has to be compiled with -lpthread, since libglusterfsclient is
- multithreaded. If not on Linux gdb runs into errors like:
- "Error while reading shared library symbols:
- Cannot find new threads: generic error"
-
-* when used with ib-verbs transport, ib_verbs initialization fails.
- reason for this is that apache runs as non-privileged user and the amount of memory that can be
- locked by default is not sufficient for ib-verbs. to fix this, as root run,
-
- # ulimit -l unlimited
-
- and then start apache.
diff --git a/mod_glusterfs/apache/2.2/src/mod_glusterfs.c b/mod_glusterfs/apache/2.2/src/mod_glusterfs.c
deleted file mode 100644
index d2b9f323266..00000000000
--- a/mod_glusterfs/apache/2.2/src/mod_glusterfs.c
+++ /dev/null
@@ -1,3627 +0,0 @@
-/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef CORE_PRIVATE
-#define CORE_PRIVATE
-#endif
-
-#ifndef NO_CONTENT_TYPE
-#define NO_CONTENT_TYPE "none"
-#endif
-
-#define BYTERANGE_FMT "%" APR_OFF_T_FMT "-%" APR_OFF_T_FMT "/%" APR_OFF_T_FMT
-
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <unistd.h>
-
-#include <httpd.h>
-#include <http_config.h>
-#include <http_core.h>
-#include <http_request.h>
-#include <http_protocol.h>
-#include <http_log.h>
-#include <http_main.h>
-#include <util_script.h>
-#include <util_filter.h>
-#include <libglusterfsclient.h>
-#include <sys/uio.h>
-#include <pthread.h>
-#include <apr.h>
-#include <apr_strings.h>
-#include <apr_buckets.h>
-#include <apr_fnmatch.h>
-#include <apr_lib.h>
-
-#define GLUSTERFS_INVALID_LOGLEVEL "mod_glfs: Unrecognized log-level \"%s\", "\
- " possible values are \"DEBUG|WARNING|"\
- "ERROR|CRITICAL|NONE\"\n"
-
-#define GLUSTERFS_HANDLER "glusterfs-handler"
-#define GLUSTERFS_CHUNK_SIZE 131072
-
-static char c_by_encoding, c_by_type, c_by_path;
-
-#define BY_ENCODING &c_by_encoding
-#define BY_TYPE &c_by_type
-#define BY_PATH &c_by_path
-
-module AP_MODULE_DECLARE_DATA glusterfs_module;
-extern module core_module;
-
-#define NO_OPTIONS (1 << 0) /* Indexing options */
-#define ICONS_ARE_LINKS (1 << 1)
-#define SCAN_HTML_TITLES (1 << 2)
-#define SUPPRESS_ICON (1 << 3)
-#define SUPPRESS_LAST_MOD (1 << 4)
-#define SUPPRESS_SIZE (1 << 5)
-#define SUPPRESS_DESC (1 << 6)
-#define SUPPRESS_PREAMBLE (1 << 7)
-#define SUPPRESS_COLSORT (1 << 8)
-#define SUPPRESS_RULES (1 << 9)
-#define FOLDERS_FIRST (1 << 10)
-#define VERSION_SORT (1 << 11)
-#define TRACK_MODIFIED (1 << 12)
-#define FANCY_INDEXING (1 << 13)
-#define TABLE_INDEXING (1 << 14)
-#define IGNORE_CLIENT (1 << 15)
-#define IGNORE_CASE (1 << 16)
-#define EMIT_XHTML (1 << 17)
-#define SHOW_FORBIDDEN (1 << 18)
-
-#define K_NOADJUST 0
-#define K_ADJUST 1
-#define K_UNSET 2
-
-/*
- * Define keys for sorting.
- */
-#define K_NAME 'N' /* Sort by file name (default) */
-#define K_LAST_MOD 'M' /* Last modification date */
-#define K_SIZE 'S' /* Size (absolute, not as displayed) */
-#define K_DESC 'D' /* Description */
-#define K_VALID "NMSD" /* String containing _all_ valid K_ opts */
-
-#define D_ASCENDING 'A'
-#define D_DESCENDING 'D'
-#define D_VALID "AD" /* String containing _all_ valid D_ opts */
-
-/*
- * These are the dimensions of the default icons supplied with Apache.
- */
-#define DEFAULT_ICON_WIDTH 20
-#define DEFAULT_ICON_HEIGHT 22
-
-/*
- * Other default dimensions.
- */
-#define DEFAULT_NAME_WIDTH 23
-#define DEFAULT_DESC_WIDTH 23
-
-struct mod_glfs_ai_item {
- char *type;
- char *apply_to;
- char *apply_path;
- char *data;
-};
-
-typedef struct mod_glfs_ai_desc_t {
- char *pattern;
- char *description;
- int full_path;
- int wildcards;
-} mod_glfs_ai_desc_t;
-
-typedef enum {
- SLASH_OFF = 0,
- SLASH_ON,
- SLASH_UNSET
-} mod_glfs_dir_slash_cfg;
-
-/* static ap_filter_rec_t *mod_glfs_output_filter_handle; */
-
-/*TODO: verify error returns to server core */
-
-typedef struct glusterfs_dir_config {
- char *logfile;
- char *loglevel;
- char *specfile;
- char *mount_dir;
- char *buf;
-
- size_t xattr_file_size;
- uint32_t cache_timeout;
-
- /* mod_dir options */
- apr_array_header_t *index_names;
- mod_glfs_dir_slash_cfg do_slash;
-
- /* autoindex options */
- char *default_icon;
- char *style_sheet;
- apr_int32_t opts;
- apr_int32_t incremented_opts;
- apr_int32_t decremented_opts;
- int name_width;
- int name_adjust;
- int desc_width;
- int desc_adjust;
- int icon_width;
- int icon_height;
- char default_keyid;
- char default_direction;
-
- apr_array_header_t *icon_list;
- apr_array_header_t *alt_list;
- apr_array_header_t *desc_list;
- apr_array_header_t *ign_list;
- apr_array_header_t *hdr_list;
- apr_array_header_t *rdme_list;
-
- char *ctype;
- char *charset;
-} glusterfs_dir_config_t;
-
-typedef struct glusterfs_async_local {
- int op_ret;
- int op_errno;
- char async_read_complete;
- off_t length;
- off_t read_bytes;
- glusterfs_iobuf_t *buf;
- request_rec *request;
- pthread_mutex_t lock;
- pthread_cond_t cond;
-}glusterfs_async_local_t;
-
-#define GLUSTERFS_CMD_PERMS ACCESS_CONF
-
-
-static glusterfs_dir_config_t *
-mod_glfs_dconfig (request_rec *r)
-{
- glusterfs_dir_config_t *dir_config = NULL;
- if (r->per_dir_config != NULL) {
- dir_config = ap_get_module_config (r->per_dir_config,
- &glusterfs_module);
- }
-
- return dir_config;
-}
-
-
-static const char *
-cmd_add_xattr_file_size (cmd_parms *cmd, void *dummy, const char *arg)
-{
- glusterfs_dir_config_t *dir_config = dummy;
- dir_config->xattr_file_size = atoi (arg);
- return NULL;
-}
-
-
-static const char *
-cmd_set_cache_timeout (cmd_parms *cmd, void *dummy, const char *arg)
-{
- glusterfs_dir_config_t *dir_config = dummy;
- dir_config->cache_timeout = atoi (arg);
- return NULL;
-}
-
-
-static const char *
-cmd_set_loglevel (cmd_parms *cmd, void *dummy, const char *arg)
-{
- glusterfs_dir_config_t *dir_config = dummy;
- char *error = NULL;
- if (strncasecmp (arg, "DEBUG", strlen ("DEBUG"))
- && strncasecmp (arg, "WARNING", strlen ("WARNING"))
- && strncasecmp (arg, "CRITICAL", strlen ("CRITICAL"))
- && strncasecmp (arg, "NONE", strlen ("NONE"))
- && strncasecmp (arg, "ERROR", strlen ("ERROR")))
- error = GLUSTERFS_INVALID_LOGLEVEL;
- else
- dir_config->loglevel = apr_pstrdup (cmd->pool, arg);
-
- return error;
-}
-
-static const char *
-cmd_add_logfile (cmd_parms *cmd, void *dummy, const char *arg)
-{
- glusterfs_dir_config_t *dir_config = dummy;
- dir_config->logfile = apr_pstrdup (cmd->pool, arg);
-
- return NULL;
-}
-
-
-static const char *
-cmd_add_volume_specfile (cmd_parms *cmd, void *dummy, const char *arg)
-{
- glusterfs_dir_config_t *dir_config = dummy;
-
- dir_config->specfile = apr_pstrdup (cmd->pool, arg);
-
- return NULL;
-}
-
-#define WILDCARDS_REQUIRED 0
-
-static const char *
-cmd_add_desc (cmd_parms *cmd, void *d, const char *desc,
- const char *to)
-{
- glusterfs_dir_config_t *dcfg = NULL;
- mod_glfs_ai_desc_t *desc_entry = NULL;
- char *prefix = "";
-
- dcfg = (glusterfs_dir_config_t *) d;
- desc_entry = (mod_glfs_ai_desc_t *) apr_array_push(dcfg->desc_list);
- desc_entry->full_path = (ap_strchr_c(to, '/') == NULL) ? 0 : 1;
- desc_entry->wildcards = (WILDCARDS_REQUIRED
- || desc_entry->full_path
- || apr_fnmatch_test(to));
- if (desc_entry->wildcards) {
- prefix = desc_entry->full_path ? "*/" : "*";
- desc_entry->pattern = apr_pstrcat(dcfg->desc_list->pool,
- prefix, to, "*", NULL);
- }
- else {
- desc_entry->pattern = apr_pstrdup(dcfg->desc_list->pool, to);
- }
- desc_entry->description = apr_pstrdup(dcfg->desc_list->pool, desc);
- return NULL;
-}
-
-
-static void push_item(apr_array_header_t *arr, char *type, const char *to,
- const char *path, const char *data)
-{
- struct mod_glfs_ai_item *p = NULL;
-
- p = (struct mod_glfs_ai_item *) apr_array_push(arr);
-
- if (!to) {
- to = "";
- }
- if (!path) {
- path = "";
- }
-
- p->type = type;
- p->data = data ? apr_pstrdup(arr->pool, data) : NULL;
- p->apply_path = apr_pstrcat(arr->pool, path, "*", NULL);
-
- if ((type == BY_PATH) && (!ap_is_matchexp(to))) {
- p->apply_to = apr_pstrcat(arr->pool, "*", to, NULL);
- }
- else if (to) {
- p->apply_to = apr_pstrdup(arr->pool, to);
- }
- else {
- p->apply_to = NULL;
- }
-}
-
-
-static const char *
-cmd_add_ignore (cmd_parms *cmd, void *d, const char *ext)
-{
- push_item(((glusterfs_dir_config_t *) d)->ign_list, 0, ext, cmd->path,
- NULL);
- return NULL;
-}
-
-
-static const char *
-cmd_add_header (cmd_parms *cmd, void *d, const char *name)
-{
- push_item(((glusterfs_dir_config_t *) d)->hdr_list, 0, NULL, cmd->path,
- name);
- return NULL;
-}
-
-
-static const char *
-cmd_add_readme (cmd_parms *cmd, void *d, const char *name)
-{
- push_item(((glusterfs_dir_config_t *) d)->rdme_list, 0, NULL, cmd->path,
- name);
- return NULL;
-}
-
-
-static const char *
-cmd_add_opts (cmd_parms *cmd, void *d, int argc, char *const argv[])
-{
- int i = 0, option = 0;
- char *w = NULL;
- apr_int32_t opts;
- apr_int32_t opts_add;
- apr_int32_t opts_remove;
- char action = 0;
- glusterfs_dir_config_t *d_cfg = (glusterfs_dir_config_t *) d;
-
- opts = d_cfg->opts;
- opts_add = d_cfg->incremented_opts;
- opts_remove = d_cfg->decremented_opts;
-
- for (i = 0; i < argc; i++) {
- w = argv[i];
-
- if ((*w == '+') || (*w == '-')) {
- action = *(w++);
- }
- else {
- action = '\0';
- }
- if (!strcasecmp(w, "FancyIndexing")) {
- option = FANCY_INDEXING;
- }
- else if (!strcasecmp(w, "FoldersFirst")) {
- option = FOLDERS_FIRST;
- }
- else if (!strcasecmp(w, "HTMLTable")) {
- option = TABLE_INDEXING;
- }
- else if (!strcasecmp(w, "IconsAreLinks")) {
- option = ICONS_ARE_LINKS;
- }
- else if (!strcasecmp(w, "IgnoreCase")) {
- option = IGNORE_CASE;
- }
- else if (!strcasecmp(w, "IgnoreClient")) {
- option = IGNORE_CLIENT;
- }
- else if (!strcasecmp(w, "ScanHTMLTitles")) {
- option = SCAN_HTML_TITLES;
- }
- else if (!strcasecmp(w, "SuppressColumnSorting")) {
- option = SUPPRESS_COLSORT;
- }
- else if (!strcasecmp(w, "SuppressDescription")) {
- option = SUPPRESS_DESC;
- }
- else if (!strcasecmp(w, "SuppressHTMLPreamble")) {
- option = SUPPRESS_PREAMBLE;
- }
- else if (!strcasecmp(w, "SuppressIcon")) {
- option = SUPPRESS_ICON;
- }
- else if (!strcasecmp(w, "SuppressLastModified")) {
- option = SUPPRESS_LAST_MOD;
- }
- else if (!strcasecmp(w, "SuppressSize")) {
- option = SUPPRESS_SIZE;
- }
- else if (!strcasecmp(w, "SuppressRules")) {
- option = SUPPRESS_RULES;
- }
- else if (!strcasecmp(w, "TrackModified")) {
- option = TRACK_MODIFIED;
- }
- else if (!strcasecmp(w, "VersionSort")) {
- option = VERSION_SORT;
- }
- else if (!strcasecmp(w, "XHTML")) {
- option = EMIT_XHTML;
- }
- else if (!strcasecmp(w, "ShowForbidden")) {
- option = SHOW_FORBIDDEN;
- }
- else if (!strcasecmp(w, "None")) {
- if (action != '\0') {
- return "Cannot combine '+' or '-' with 'None' "
- "keyword";
- }
- opts = NO_OPTIONS;
- opts_add = 0;
- opts_remove = 0;
- }
- else if (!strcasecmp(w, "IconWidth")) {
- if (action != '-') {
- d_cfg->icon_width = DEFAULT_ICON_WIDTH;
- }
- else {
- d_cfg->icon_width = 0;
- }
- }
- else if (!strncasecmp(w, "IconWidth=", 10)) {
- if (action == '-') {
- return "Cannot combine '-' with IconWidth=n";
- }
- d_cfg->icon_width = atoi(&w[10]);
- }
- else if (!strcasecmp(w, "IconHeight")) {
- if (action != '-') {
- d_cfg->icon_height = DEFAULT_ICON_HEIGHT;
- }
- else {
- d_cfg->icon_height = 0;
- }
- }
- else if (!strncasecmp(w, "IconHeight=", 11)) {
- if (action == '-') {
- return "Cannot combine '-' with IconHeight=n";
- }
- d_cfg->icon_height = atoi(&w[11]);
- }
- else if (!strcasecmp(w, "NameWidth")) {
- if (action != '-') {
- return "NameWidth with no value may only appear"
- " as "
- "'-NameWidth'";
- }
- d_cfg->name_width = DEFAULT_NAME_WIDTH;
- d_cfg->name_adjust = K_NOADJUST;
- }
- else if (!strncasecmp(w, "NameWidth=", 10)) {
- if (action == '-') {
- return "Cannot combine '-' with NameWidth=n";
- }
- if (w[10] == '*') {
- d_cfg->name_adjust = K_ADJUST;
- }
- else {
- int width = atoi(&w[10]);
-
- if (width && (width < 5)) {
- return "NameWidth value must be greater"
- " than 5";
- }
- d_cfg->name_width = width;
- d_cfg->name_adjust = K_NOADJUST;
- }
- }
- else if (!strcasecmp(w, "DescriptionWidth")) {
- if (action != '-') {
- return "DescriptionWidth with no value may only"
- " appear as "
- "'-DescriptionWidth'";
- }
- d_cfg->desc_width = DEFAULT_DESC_WIDTH;
- d_cfg->desc_adjust = K_NOADJUST;
- }
- else if (!strncasecmp(w, "DescriptionWidth=", 17)) {
- if (action == '-') {
- return "Cannot combine '-' with "
- "DescriptionWidth=n";
- }
- if (w[17] == '*') {
- d_cfg->desc_adjust = K_ADJUST;
- }
- else {
- int width = atoi(&w[17]);
-
- if (width && (width < 12)) {
- return "DescriptionWidth value must be "
- "greater than 12";
- }
- d_cfg->desc_width = width;
- d_cfg->desc_adjust = K_NOADJUST;
- }
- }
- else if (!strncasecmp(w, "Type=", 5)) {
- d_cfg->ctype = apr_pstrdup(cmd->pool, &w[5]);
- }
- else if (!strncasecmp(w, "Charset=", 8)) {
- d_cfg->charset = apr_pstrdup(cmd->pool, &w[8]);
- }
- else {
- return "Invalid directory indexing option";
- }
- if (action == '\0') {
- opts |= option;
- opts_add = 0;
- opts_remove = 0;
- }
- else if (action == '+') {
- opts_add |= option;
- opts_remove &= ~option;
- }
- else {
- opts_remove |= option;
- opts_add &= ~option;
- }
- }
- if ((opts & NO_OPTIONS) && (opts & ~NO_OPTIONS)) {
- return "Cannot combine other IndexOptions keywords with 'None'";
- }
- d_cfg->incremented_opts = opts_add;
- d_cfg->decremented_opts = opts_remove;
- d_cfg->opts = opts;
- return NULL;
-}
-
-
-static const char *
-cmd_set_default_order(cmd_parms *cmd, void *m,
- const char *direction, const char *key)
-{
- glusterfs_dir_config_t *d_cfg = (glusterfs_dir_config_t *) m;
-
- if (!strcasecmp(direction, "Ascending")) {
- d_cfg->default_direction = D_ASCENDING;
- }
- else if (!strcasecmp(direction, "Descending")) {
- d_cfg->default_direction = D_DESCENDING;
- }
- else {
- return "First keyword must be 'Ascending' or 'Descending'";
- }
-
- if (!strcasecmp(key, "Name")) {
- d_cfg->default_keyid = K_NAME;
- }
- else if (!strcasecmp(key, "Date")) {
- d_cfg->default_keyid = K_LAST_MOD;
- }
- else if (!strcasecmp(key, "Size")) {
- d_cfg->default_keyid = K_SIZE;
- }
- else if (!strcasecmp(key, "Description")) {
- d_cfg->default_keyid = K_DESC;
- }
- else {
- return "Second keyword must be 'Name', 'Date', 'Size', or "
- "'Description'";
- }
-
- return NULL;
-}
-
-
-static char c_by_encoding, c_by_type, c_by_path;
-
-#define BY_ENCODING &c_by_encoding
-#define BY_TYPE &c_by_type
-#define BY_PATH &c_by_path
-
-/*
- * This routine puts the standard HTML header at the top of the index page.
- * We include the DOCTYPE because we may be using features therefrom (i.e.,
- * HEIGHT and WIDTH attributes on the icons if we're FancyIndexing).
- */
-static void emit_preamble(request_rec *r, int xhtml, const char *title)
-{
- glusterfs_dir_config_t *d;
-
- d = (glusterfs_dir_config_t *) ap_get_module_config(r->per_dir_config,
- &glusterfs_module);
-
- if (xhtml) {
- ap_rvputs(r, DOCTYPE_XHTML_1_0T,
- "<html xmlns=\"http://www.w3.org/1999/xhtml\">\n"
- " <head>\n <title>Index of ", title,
- "</title>\n", NULL);
- } else {
- ap_rvputs(r, DOCTYPE_HTML_3_2,
- "<html>\n <head>\n"
- " <title>Index of ", title,
- "</title>\n", NULL);
- }
-
- if (d->style_sheet != NULL) {
- ap_rvputs(r, " <link rel=\"stylesheet\" href=\"",
- d->style_sheet, "\" type=\"text/css\"",
- xhtml ? " />\n" : ">\n", NULL);
- }
- ap_rvputs(r, " </head>\n <body>\n", NULL);
-}
-
-
-static const char *cmd_add_alt(cmd_parms *cmd, void *d, const char *alt,
- const char *to)
-{
- if (cmd->info == BY_PATH) {
- if (!strcmp(to, "**DIRECTORY**")) {
- to = "^^DIRECTORY^^";
- }
- }
- if (cmd->info == BY_ENCODING) {
- char *tmp = apr_pstrdup(cmd->pool, to);
- ap_str_tolower(tmp);
- to = tmp;
- }
-
- push_item(((glusterfs_dir_config_t *) d)->alt_list, cmd->info, to,
- cmd->path, alt);
- return NULL;
-}
-
-static const char *cmd_add_icon(cmd_parms *cmd, void *d, const char *icon,
- const char *to)
-{
- char *iconbak = apr_pstrdup(cmd->pool, icon);
- char *alt = NULL, *cl = NULL, *tmp = NULL;
-
- if (icon[0] == '(') {
- cl = strchr(iconbak, ')');
-
- if (cl == NULL) {
- return "missing closing paren";
- }
- alt = ap_getword_nc(cmd->pool, &iconbak, ',');
- *cl = '\0'; /* Lose closing paren */
- cmd_add_alt(cmd, d, &alt[1], to);
- }
- if (cmd->info == BY_PATH) {
- if (!strcmp(to, "**DIRECTORY**")) {
- to = "^^DIRECTORY^^";
- }
- }
- if (cmd->info == BY_ENCODING) {
- tmp = apr_pstrdup(cmd->pool, to);
- ap_str_tolower(tmp);
- to = tmp;
- }
-
- push_item(((glusterfs_dir_config_t *) d)->icon_list, cmd->info, to,
- cmd->path, iconbak);
- return NULL;
-}
-
-
-static void *
-mod_glfs_create_dir_config(apr_pool_t *p, char *dirspec)
-{
- glusterfs_dir_config_t *dir_config = NULL;
-
- dir_config = (glusterfs_dir_config_t *) apr_pcalloc(p,
- sizeof(*dir_config));
-
- dir_config->mount_dir = dirspec;
- dir_config->logfile = dir_config->specfile = (char *)0;
- dir_config->loglevel = "warning";
- dir_config->cache_timeout = 0;
- dir_config->buf = NULL;
-
- /* mod_dir options init */
- dir_config->index_names = NULL;
- dir_config->do_slash = SLASH_UNSET;
-
- /* autoindex options init */
- dir_config->icon_width = 0;
- dir_config->icon_height = 0;
- dir_config->name_width = DEFAULT_NAME_WIDTH;
- dir_config->name_adjust = K_UNSET;
- dir_config->desc_width = DEFAULT_DESC_WIDTH;
- dir_config->desc_adjust = K_UNSET;
- dir_config->icon_list = apr_array_make(p, 4,
- sizeof(struct mod_glfs_ai_item));
- dir_config->alt_list = apr_array_make(p, 4,
- sizeof(struct mod_glfs_ai_item));
- dir_config->desc_list = apr_array_make(p, 4,
- sizeof(mod_glfs_ai_desc_t));
- dir_config->ign_list = apr_array_make(p, 4,
- sizeof(struct mod_glfs_ai_item));
- dir_config->hdr_list = apr_array_make(p, 4,
- sizeof(struct mod_glfs_ai_item));
- dir_config->rdme_list = apr_array_make(p, 4,
- sizeof(struct mod_glfs_ai_item));
- dir_config->opts = 0;
- dir_config->incremented_opts = 0;
- dir_config->decremented_opts = 0;
- dir_config->default_keyid = '\0';
- dir_config->default_direction = '\0';
-
- return (void *) dir_config;
-}
-
-
-static void *
-mod_glfs_merge_dir_config(apr_pool_t *p, void *parent_conf,
- void *newloc_conf)
-{
- glusterfs_dir_config_t *new = NULL;
- glusterfs_dir_config_t *add = NULL;
- glusterfs_dir_config_t *base = NULL;
-
- new = (glusterfs_dir_config_t *)
- apr_pcalloc(p, sizeof(glusterfs_dir_config_t));
- add = newloc_conf;
- base = parent_conf;
-
- if (add->logfile)
- new->logfile = apr_pstrdup (p, add->logfile);
-
- if (add->loglevel)
- new->loglevel = apr_pstrdup (p, add->loglevel);
-
- if (add->specfile)
- new->specfile = apr_pstrdup (p, add->specfile);
-
- if (add->mount_dir)
- new->mount_dir = apr_pstrdup (p, add->mount_dir);
-
- new->xattr_file_size = add->xattr_file_size;
- new->cache_timeout = add->cache_timeout;
- new->buf = add->buf;
-
- /* mod_dir */
- new->index_names = add->index_names ?
- add->index_names : base->index_names;
- new->do_slash =
- (add->do_slash == SLASH_UNSET) ? base->do_slash : add->do_slash;
-
- /* auto index */
- new->default_icon = add->default_icon ? add->default_icon
- : base->default_icon;
- new->style_sheet = add->style_sheet ? add->style_sheet
- : base->style_sheet;
- new->icon_height = add->icon_height ?
- add->icon_height : base->icon_height;
- new->icon_width = add->icon_width ? add->icon_width : base->icon_width;
-
- new->ctype = add->ctype ? add->ctype : base->ctype;
- new->charset = add->charset ? add->charset : base->charset;
-
- new->alt_list = apr_array_append(p, add->alt_list, base->alt_list);
- new->ign_list = apr_array_append(p, add->ign_list, base->ign_list);
- new->hdr_list = apr_array_append(p, add->hdr_list, base->hdr_list);
- new->desc_list = apr_array_append(p, add->desc_list, base->desc_list);
- new->icon_list = apr_array_append(p, add->icon_list, base->icon_list);
- new->rdme_list = apr_array_append(p, add->rdme_list, base->rdme_list);
- if (add->opts & NO_OPTIONS) {
- /*
- * If the current directory says 'no options' then we also
- * clear any incremental mods from being inheritable further down.
- */
- new->opts = NO_OPTIONS;
- new->incremented_opts = 0;
- new->decremented_opts = 0;
- }
- else {
- /*
- * If there were any nonincremental options selected for
- * this directory, they dominate and we don't inherit *anything.*
- * Contrariwise, we *do* inherit if the only settings here are
- * incremental ones.
- */
- if (add->opts == 0) {
- new->incremented_opts = (base->incremented_opts
- | add->incremented_opts)
- & ~add->decremented_opts;
- new->decremented_opts = (base->decremented_opts
- | add->decremented_opts);
- /*
- * We may have incremental settings, so make sure we
- * don't inadvertently inherit an IndexOptions None
- * from above.
- */
- new->opts = (base->opts & ~NO_OPTIONS);
- }
- else {
- /*
- * There are local nonincremental settings, which clear
- * all inheritance from above. They *are* the new
- * base settings.
- */
- new->opts = add->opts;;
- }
- /*
- * We're guaranteed that there'll be no overlap between
- * the add-options and the remove-options.
- */
- new->opts |= new->incremented_opts;
- new->opts &= ~new->decremented_opts;
- }
- /*
- * Inherit the NameWidth settings if there aren't any specific to
- * the new location; otherwise we'll end up using the defaults set
- * in the config-rec creation routine.
- */
- if (add->name_adjust == K_UNSET) {
- new->name_width = base->name_width;
- new->name_adjust = base->name_adjust;
- }
- else {
- new->name_width = add->name_width;
- new->name_adjust = add->name_adjust;
- }
-
- /*
- * Likewise for DescriptionWidth.
- */
- if (add->desc_adjust == K_UNSET) {
- new->desc_width = base->desc_width;
- new->desc_adjust = base->desc_adjust;
- }
- else {
- new->desc_width = add->desc_width;
- new->desc_adjust = add->desc_adjust;
- }
-
- new->default_keyid = add->default_keyid ? add->default_keyid
- : base->default_keyid;
- new->default_direction = add->default_direction ? add->default_direction
- : base->default_direction;
-
- return (void *) new;
-}
-
-
-static void
-mod_glfs_child_init(apr_pool_t *p, server_rec *s)
-{
- int i = 0, num_sec = 0, ret = 0;
- core_server_config *sconf = NULL;
- ap_conf_vector_t **sec_ent = NULL;
- glusterfs_dir_config_t *dir_config = NULL;
- glusterfs_init_params_t ctx = {0, };
-
- sconf = (core_server_config *) ap_get_module_config (s->module_config,
- &core_module);
- sec_ent = (ap_conf_vector_t **) sconf->sec_url->elts;
- num_sec = sconf->sec_url->nelts;
-
- for (i = 0; i < num_sec; i++) {
- dir_config = ap_get_module_config (sec_ent[i],
- &glusterfs_module);
-
- if (dir_config) {
- memset (&ctx, 0, sizeof (ctx));
-
- ctx.logfile = dir_config->logfile;
- ctx.loglevel = dir_config->loglevel;
- ctx.lookup_timeout = dir_config->cache_timeout;
- ctx.stat_timeout = dir_config->cache_timeout;
- ctx.specfile = dir_config->specfile;
-
- ret = glusterfs_mount (dir_config->mount_dir, &ctx);
- if (ret != 0) {
- ap_log_error(APLOG_MARK, APLOG_ERR,
- APR_EGENERAL, s,
- "mod_glfs_child_init: "
- "glusterfs_init failed, check "
- "glusterfs logfile %s for more "
- "details",
- dir_config->logfile);
- }
- }
- dir_config = NULL;
- }
-}
-
-
-static void
-mod_glfs_child_exit(server_rec *s, apr_pool_t *p)
-{
- int i = 0, num_sec = 0;
- core_server_config *sconf = NULL;
- ap_conf_vector_t **sec_ent = NULL;
- glusterfs_dir_config_t *dir_config = NULL;
- glusterfs_init_params_t ctx = {0, };
-
- sconf = ap_get_module_config(s->module_config,
- &core_module);
- sec_ent = (ap_conf_vector_t **) sconf->sec_url->elts;
- num_sec = sconf->sec_url->nelts;
- for (i = 0; i < num_sec; i++) {
- dir_config = ap_get_module_config (sec_ent[i],
- &glusterfs_module);
- if (dir_config) {
- glusterfs_umount (dir_config->mount_dir);
- }
- }
-}
-
-static apr_filetype_e filetype_from_mode(mode_t mode)
-{
- apr_filetype_e type = APR_NOFILE;
-
- if (S_ISREG(mode))
- type = APR_REG;
- else if (S_ISDIR(mode))
- type = APR_DIR;
- else if (S_ISCHR(mode))
- type = APR_CHR;
- else if (S_ISBLK(mode))
- type = APR_BLK;
- else if (S_ISFIFO(mode))
- type = APR_PIPE;
- else if (S_ISLNK(mode))
- type = APR_LNK;
- else if (S_ISSOCK(mode))
- type = APR_SOCK;
- else
- type = APR_UNKFILE;
- return type;
-}
-
-
-static void fill_out_finfo(apr_finfo_t *finfo, struct stat *info,
- apr_int32_t wanted)
-{
- finfo->valid = APR_FINFO_MIN | APR_FINFO_IDENT | APR_FINFO_NLINK
- | APR_FINFO_OWNER | APR_FINFO_PROT;
- finfo->protection = apr_unix_mode2perms(info->st_mode);
- finfo->filetype = filetype_from_mode(info->st_mode);
- finfo->user = info->st_uid;
- finfo->group = info->st_gid;
- finfo->size = info->st_size;
- finfo->device = info->st_dev;
- finfo->nlink = info->st_nlink;
-
- /* Check for overflow if storing a 64-bit st_ino in a 32-bit
- * apr_ino_t for LFS builds: */
- if (sizeof(apr_ino_t) >= sizeof(info->st_ino)
- || (apr_ino_t)info->st_ino == info->st_ino) {
- finfo->inode = info->st_ino;
- } else {
- finfo->valid &= ~APR_FINFO_INODE;
- }
-
- apr_time_ansi_put(&finfo->atime, info->st_atime);
-#ifdef HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC
- finfo->atime += info->st_atim.tv_nsec / APR_TIME_C(1000);
-#elif defined(HAVE_STRUCT_STAT_ST_ATIMENSEC)
- finfo->atime += info->st_atimensec / APR_TIME_C(1000);
-#elif defined(HAVE_STRUCT_STAT_ST_ATIME_N)
- finfo->ctime += info->st_atime_n / APR_TIME_C(1000);
-#endif
-
- apr_time_ansi_put(&finfo->mtime, info->st_mtime);
-#ifdef HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC
- finfo->mtime += info->st_mtim.tv_nsec / APR_TIME_C(1000);
-#elif defined(HAVE_STRUCT_STAT_ST_MTIMENSEC)
- finfo->mtime += info->st_mtimensec / APR_TIME_C(1000);
-#elif defined(HAVE_STRUCT_STAT_ST_MTIME_N)
- finfo->ctime += info->st_mtime_n / APR_TIME_C(1000);
-#endif
-
- apr_time_ansi_put(&finfo->ctime, info->st_ctime);
-#ifdef HAVE_STRUCT_STAT_ST_CTIM_TV_NSEC
- finfo->ctime += info->st_ctim.tv_nsec / APR_TIME_C(1000);
-#elif defined(HAVE_STRUCT_STAT_ST_CTIMENSEC)
- finfo->ctime += info->st_ctimensec / APR_TIME_C(1000);
-#elif defined(HAVE_STRUCT_STAT_ST_CTIME_N)
- finfo->ctime += info->st_ctime_n / APR_TIME_C(1000);
-#endif
-
-#ifdef HAVE_STRUCT_STAT_ST_BLOCKS
-#ifdef DEV_BSIZE
- finfo->csize = (apr_off_t)info->st_blocks * (apr_off_t)DEV_BSIZE;
-#else
- finfo->csize = (apr_off_t)info->st_blocks * (apr_off_t)512;
-#endif
- finfo->valid |= APR_FINFO_CSIZE;
-#endif
-}
-
-
-static int
-mod_glfs_map_to_storage(request_rec *r)
-{
- glusterfs_dir_config_t *dir_config = NULL, *tmp = NULL;
- int access_status = 0, ret = 0;
- char *path = NULL;
- struct stat st = {0, };
- core_server_config *sconf = NULL;
- ap_conf_vector_t **sec_ent = NULL;
- int num_sec = 0, i = 0;
-
- sconf = (core_server_config *) ap_get_module_config (r->server->module_config,
- &core_module);
- sec_ent = (ap_conf_vector_t **) sconf->sec_url->elts;
- num_sec = sconf->sec_url->nelts;
-
- for (i = 0; i < num_sec; i++) {
- tmp = ap_get_module_config (sec_ent[i], &glusterfs_module);
-
- if (tmp && !strncmp (tmp->mount_dir, r->uri,
- strlen (tmp->mount_dir))) {
- if (!dir_config ||
- strlen (tmp->mount_dir)
- > strlen (dir_config->mount_dir)) {
- dir_config = tmp;
- }
- }
-
- }
-
- if (dir_config && dir_config->mount_dir
- && !(strncmp (apr_pstrcat (r->pool, dir_config->mount_dir, "/",
- NULL), r->uri,
- strlen (dir_config->mount_dir) + 1)
- && !r->handler))
- r->handler = GLUSTERFS_HANDLER;
-
- if (!r->handler || (r->handler && strcmp (r->handler,
- GLUSTERFS_HANDLER)))
- return DECLINED;
-
- path = r->uri;
-
- memset (&r->finfo, 0, sizeof (r->finfo));
-
- dir_config->buf = calloc (1, dir_config->xattr_file_size);
- if (!dir_config->buf) {
- return HTTP_INTERNAL_SERVER_ERROR;
- }
-
- ret = glusterfs_get (path, dir_config->buf,
- dir_config->xattr_file_size, &st);
-
- if (ret == -1 || st.st_size > dir_config->xattr_file_size
- || S_ISDIR (st.st_mode)) {
- free (dir_config->buf);
- dir_config->buf = NULL;
-
- if (ret == -1) {
- int error = HTTP_NOT_FOUND;
- char *emsg = NULL;
- if (r->path_info == NULL) {
- emsg = apr_pstrcat(r->pool, strerror (errno),
- r->filename, NULL);
- }
- else {
- emsg = apr_pstrcat(r->pool, strerror (errno),
- r->filename, r->path_info,
- NULL);
- }
- ap_log_rerror(APLOG_MARK, APLOG_ERR|APLOG_NOERRNO, 0,
- r, "%s", emsg);
- if (errno != ENOENT) {
- error = HTTP_INTERNAL_SERVER_ERROR;
- }
- return error;
- }
- }
-
- r->finfo.pool = r->pool;
- r->finfo.fname = r->filename;
- fill_out_finfo (&r->finfo, &st,
- APR_FINFO_MIN | APR_FINFO_IDENT | APR_FINFO_NLINK |
- APR_FINFO_OWNER | APR_FINFO_PROT);
-
- /* allow core module to run directory_walk() and location_walk() */
- return DECLINED;
-}
-
-
-static int
-mod_glfs_readv_async_cbk (int32_t op_ret, int32_t op_errno,
- glusterfs_iobuf_t *buf, void *cbk_data)
-{
- glusterfs_async_local_t *local = cbk_data;
-
- pthread_mutex_lock (&local->lock);
- {
- local->async_read_complete = 1;
- local->buf = buf;
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- pthread_cond_signal (&local->cond);
- }
- pthread_mutex_unlock (&local->lock);
-
- return 0;
-}
-
-/* use read_async just to avoid memcpy of read buffer in libglusterfsclient */
-static int
-mod_glfs_read_async (request_rec *r, apr_bucket_brigade *bb,
- glusterfs_file_t fd,
- apr_off_t offset, apr_off_t length)
-{
- glusterfs_async_local_t local = {0, };
- off_t end = 0;
- int nbytes = 0, complete = 0;
- conn_rec *c = r->connection;
- apr_bucket *e = NULL;
- apr_status_t status = APR_SUCCESS;
- glusterfs_iobuf_t *buf = NULL;
-
- if (length == 0) {
- return 0;
- }
-
- pthread_cond_init (&local.cond, NULL);
- pthread_mutex_init (&local.lock, NULL);
-
- memset (&local, 0, sizeof (local));
- local.request = r;
-
- if (length > 0)
- end = offset + length;
-
- do {
- if (length > 0) {
- nbytes = end - offset;
- if (nbytes > GLUSTERFS_CHUNK_SIZE)
- nbytes = GLUSTERFS_CHUNK_SIZE;
- } else
- nbytes = GLUSTERFS_CHUNK_SIZE;
-
- glusterfs_read_async(fd,
- nbytes,
- offset,
- mod_glfs_readv_async_cbk,
- (void *)&local);
-
- pthread_mutex_lock (&local.lock);
- {
- while (!local.async_read_complete) {
- pthread_cond_wait (&local.cond, &local.lock);
- }
-
- local.async_read_complete = 0;
- buf = local.buf;
-
- if (length < 0)
- complete = (local.op_ret <= 0);
- else {
- local.read_bytes += local.op_ret;
- complete = ((local.read_bytes == length) ||
- (local.op_ret < 0));
- }
- }
- pthread_mutex_unlock (&local.lock);
-
- if (!bb) {
- bb = apr_brigade_create (r->pool, c->bucket_alloc);
- }
- apr_brigade_writev (bb, NULL, NULL, buf->vector, buf->count);
-
- /*
- * make sure all the data is written out, since we call
- * glusterfs_free on buf once ap_pass_brigade returns
- */
- e = apr_bucket_flush_create (c->bucket_alloc);
- APR_BRIGADE_INSERT_TAIL (bb, e);
-
- status = ap_pass_brigade (r->output_filters, bb);
- if (status != APR_SUCCESS) {
- /* no way to know what type of error occurred */
- ap_log_rerror(APLOG_MARK, APLOG_DEBUG, status, r,
- "mod_glfs_handler: ap_pass_brigade "
- "returned %i",
- status);
- complete = 1;
- local.op_ret = -1;
- }
-
- glusterfs_free (buf);
-
- /*
- * bb has already been cleaned up by core_output_filter,
- * just being paranoid
- */
- apr_brigade_cleanup (bb);
-
- offset += nbytes;
- } while (!complete);
-
- return (local.op_ret < 0 ? HTTP_INTERNAL_SERVER_ERROR : OK);
-}
-
-static int
-parse_byterange(char *range, apr_off_t clength,
- apr_off_t *start, apr_off_t *end)
-{
- char *dash = NULL, *errp = NULL;
- apr_off_t number;
-
- dash = strchr(range, '-');
- if (!dash) {
- return 0;
- }
-
- if ((dash == range)) {
- /* In the form "-5" */
- if (apr_strtoff(&number, dash+1, &errp, 10) || *errp) {
- return 0;
- }
- *start = clength - number;
- *end = clength - 1;
- }
- else {
- *dash++ = '\0';
- if (apr_strtoff(&number, range, &errp, 10) || *errp) {
- return 0;
- }
- *start = number;
- if (*dash) {
- if (apr_strtoff(&number, dash, &errp, 10) || *errp) {
- return 0;
- }
- *end = number;
- }
- else { /* "5-" */
- *end = clength - 1;
- }
- }
-
- if (*start < 0) {
- *start = 0;
- }
-
- if (*end >= clength) {
- *end = clength - 1;
- }
-
- if (*start > *end) {
- return -1;
- }
-
- return (*start > 0 || *end < clength);
-}
-
-
-static int use_range_x(request_rec *r)
-{
- const char *ua = NULL;
- return (apr_table_get(r->headers_in, "Request-Range")
- || ((ua = apr_table_get(r->headers_in, "User-Agent"))
- && ap_strstr_c(ua, "MSIE 3")));
-}
-
-
-static int ap_set_byterange(request_rec *r)
-{
- const char *range = NULL, *if_range = NULL, *match = NULL, *ct = NULL;
- int num_ranges = 0;
-
- if (r->assbackwards) {
- return 0;
- }
-
- /* Check for Range request-header (HTTP/1.1) or Request-Range for
- * backwards-compatibility with second-draft Luotonen/Franks
- * byte-ranges (e.g. Netscape Navigator 2-3).
- *
- * We support this form, with Request-Range, and (farther down) we
- * send multipart/x-byteranges instead of multipart/byteranges for
- * Request-Range based requests to work around a bug in Netscape
- * Navigator 2-3 and MSIE 3.
- */
-
- if (!(range = apr_table_get(r->headers_in, "Range"))) {
- range = apr_table_get(r->headers_in, "Request-Range");
- }
-
- if (!range || strncasecmp(range, "bytes=", 6) || r->status != HTTP_OK) {
- return 0;
- }
-
- /* is content already a single range? */
- if (apr_table_get(r->headers_out, "Content-Range")) {
- return 0;
- }
-
- /* is content already a multiple range? */
- if ((ct = apr_table_get(r->headers_out, "Content-Type"))
- && (!strncasecmp(ct, "multipart/byteranges", 20)
- || !strncasecmp(ct, "multipart/x-byteranges", 22))) {
- return 0;
- }
-
- /* Check the If-Range header for Etag or Date.
- * Note that this check will return false (as required) if either
- * of the two etags are weak.
- */
- if ((if_range = apr_table_get(r->headers_in, "If-Range"))) {
- if (if_range[0] == '"') {
- if (!(match = apr_table_get(r->headers_out, "Etag"))
- || (strcmp(if_range, match) != 0)) {
- return 0;
- }
- }
- else if (!(match = apr_table_get(r->headers_out,
- "Last-Modified"))
- || (strcmp(if_range, match) != 0)) {
- return 0;
- }
- }
-
- if (!ap_strchr_c(range, ',')) {
- /* a single range */
- num_ranges = 1;
- }
- else {
- /* a multiple range */
- num_ranges = 2;
- }
-
- r->status = HTTP_PARTIAL_CONTENT;
- r->range = range + 6;
-
- return num_ranges;
-}
-
-
-static void
-mod_glfs_handle_byte_ranges (request_rec *r, glusterfs_file_t fd,
- int num_ranges)
-{
- conn_rec *c = r->connection;
- char *ts = NULL, *boundary = NULL, *bound_head = NULL;
- const char *orig_ct = NULL;
- char *current = NULL, *end = NULL;
- apr_bucket_brigade *bsend = NULL;
- apr_bucket *e = NULL;
- apr_off_t range_start, range_end;
- apr_status_t rv = APR_SUCCESS;
- char found = 0;
- apr_bucket *e2 = NULL, *ec = NULL;
-
- orig_ct = ap_make_content_type (r, r->content_type);
-
- if (num_ranges > 1) {
- boundary = apr_psprintf(r->pool, "%" APR_UINT64_T_HEX_FMT "%lx",
- (apr_uint64_t)r->request_time,
- (long) getpid());
-
- ap_set_content_type(r, apr_pstrcat(r->pool, "multipart",
- use_range_x(r) ? "/x-" : "/",
- "byteranges; boundary=",
- boundary, NULL));
-
- if (strcasecmp(orig_ct, NO_CONTENT_TYPE)) {
- bound_head = apr_pstrcat(r->pool,
- CRLF "--", boundary,
- CRLF "Content-type: ",
- orig_ct,
- CRLF "Content-range: bytes ",
- NULL);
- }
- else {
- /* if we have no type for the content, do our best */
- bound_head = apr_pstrcat(r->pool,
- CRLF "--", boundary,
- CRLF "Content-range: bytes ",
- NULL);
- }
- }
-
- while ((current = ap_getword(r->pool, &r->range, ','))
- && (rv = parse_byterange(current, r->finfo.size, &range_start,
- &range_end))) {
- bsend = NULL;
- if (rv == -1) {
- continue;
- }
-
- found = 1;
-
- /* For single range requests, we must produce Content-Range
- * header. Otherwise, we need to produce the multipart
- * boundaries.
- */
- if (num_ranges == 1) {
- apr_table_setn(r->headers_out, "Content-Range",
- apr_psprintf(r->pool,
- "bytes " BYTERANGE_FMT,
- range_start, range_end,
- r->finfo.size));
- }
- else {
- /* this brigade holds what we will be sending */
- bsend = apr_brigade_create(r->pool, c->bucket_alloc);
-
- e = apr_bucket_pool_create(bound_head,
- strlen(bound_head),
- r->pool, c->bucket_alloc);
- APR_BRIGADE_INSERT_TAIL(bsend, e);
-
- ts = apr_psprintf(r->pool, BYTERANGE_FMT CRLF CRLF,
- range_start, range_end,
- r->finfo.size);
- e = apr_bucket_pool_create(ts, strlen(ts), r->pool,
- c->bucket_alloc);
- APR_BRIGADE_INSERT_TAIL(bsend, e);
- }
- mod_glfs_read_async (r, bsend, fd, range_start,
- (range_end + 1 - range_start));
- }
-
- bsend = apr_brigade_create (r->pool, c->bucket_alloc);
-
- if (found == 0) {
- r->status = HTTP_OK;
- /* bsend is assumed to be empty if we get here. */
- e = ap_bucket_error_create(HTTP_RANGE_NOT_SATISFIABLE, NULL,
- r->pool, c->bucket_alloc);
- APR_BRIGADE_INSERT_TAIL(bsend, e);
- e = apr_bucket_eos_create(c->bucket_alloc);
- APR_BRIGADE_INSERT_TAIL(bsend, e);
- ap_pass_brigade (r->output_filters, bsend);
- return;
- }
-
- if (num_ranges > 1) {
- /* add the final boundary */
- end = apr_pstrcat(r->pool, CRLF "--", boundary, "--" CRLF,
- NULL);
-// ap_xlate_proto_to_ascii(end, strlen(end));
- e = apr_bucket_pool_create(end, strlen(end), r->pool,
- c->bucket_alloc);
- APR_BRIGADE_INSERT_TAIL(bsend, e);
- }
-
- ap_pass_brigade (r->output_filters, bsend);
-}
-
-
-
-/****************************************************************
- *
- * Looking things up in config entries...
- */
-
-/* Structure used to hold entries when we're actually building an index */
-
-struct ent {
- char *name;
- char *icon;
- char *alt;
- char *desc;
- apr_off_t size;
- apr_time_t lm;
- struct ent *next;
- int ascending, ignore_case, version_sort;
- char key;
- int isdir;
-};
-
-static char *find_item(request_rec *r, apr_array_header_t *list, int path_only)
-{
- const char *content_type = NULL;
- const char *content_encoding = NULL;
- char *path = NULL;
- int i = 0;
- struct mod_glfs_ai_item *items = NULL;
- struct mod_glfs_ai_item *p = NULL;
-
- content_type = ap_field_noparam(r->pool, r->content_type);
- content_encoding = r->content_encoding;
- path = r->filename;
- items = (struct mod_glfs_ai_item *) list->elts;
-
- for (i = 0; i < list->nelts; ++i) {
- p = &items[i];
- /* Special cased for ^^DIRECTORY^^ and ^^BLANKICON^^ */
- if ((path[0] == '^') || (!ap_strcmp_match(path,
- p->apply_path))) {
- if (!*(p->apply_to)) {
- return p->data;
- }
- else if (p->type == BY_PATH || path[0] == '^') {
- if (!ap_strcmp_match(path, p->apply_to)) {
- return p->data;
- }
- }
- else if (!path_only) {
- if (!content_encoding) {
- if (p->type == BY_TYPE) {
- if (content_type
- && !ap_strcasecmp_match(content_type,
- p->apply_to)) {
- return p->data;
- }
- }
- }
- else {
- if (p->type == BY_ENCODING) {
- if (!ap_strcasecmp_match(content_encoding,
- p->apply_to)) {
- return p->data;
- }
- }
- }
- }
- }
- }
- return NULL;
-}
-
-#define find_icon(d,p,t) find_item(p,d->icon_list,t)
-#define find_alt(d,p,t) find_item(p,d->alt_list,t)
-#define find_header(d,p) find_item(p,d->hdr_list,0)
-#define find_readme(d,p) find_item(p,d->rdme_list,0)
-
-static char *find_default_item(char *bogus_name, apr_array_header_t *list)
-{
- request_rec r;
- /* Bleah. I tried to clean up find_item, and it lead to this bit
- * of ugliness. Note that the fields initialized are precisely
- * those that find_item looks at...
- */
- r.filename = bogus_name;
- r.content_type = r.content_encoding = NULL;
- return find_item(&r, list, 1);
-}
-
-#define find_default_icon(d,n) find_default_item(n, d->icon_list)
-#define find_default_alt(d,n) find_default_item(n, d->alt_list)
-
-/*
- * Look through the list of pattern/description pairs and return the first one
- * if any) that matches the filename in the request. If multiple patterns
- * match, only the first one is used; since the order in the array is the
- * same as the order in which directives were processed, earlier matching
- * directives will dominate.
- */
-
-#ifdef CASE_BLIND_FILESYSTEM
-#define MATCH_FLAGS APR_FNM_CASE_BLIND
-#else
-#define MATCH_FLAGS 0
-#endif
-
-static char *find_desc(glusterfs_dir_config_t *dcfg, const char *filename_full)
-{
- int i = 0;
- mod_glfs_ai_desc_t *list = NULL;
- const char *filename_only = NULL, *filename = NULL;
- mod_glfs_ai_desc_t *tuple = &list[i];
- int found = 0;
-
- list = (mod_glfs_ai_desc_t *) dcfg->desc_list->elts;
- /*
- * If the filename includes a path, extract just the name itself
- * for the simple matches.
- */
- if ((filename_only = ap_strrchr_c(filename_full, '/')) == NULL) {
- filename_only = filename_full;
- }
- else {
- filename_only++;
- }
- for (i = 0; i < dcfg->desc_list->nelts; ++i) {
- /*
- * Only use the full-path filename if the pattern contains '/'s.
- */
- filename = (tuple->full_path) ? filename_full : filename_only;
- /*
- * Make the comparison using the cheapest method; only do
- * wildcard checking if we must.
- */
- if (tuple->wildcards) {
- found = (apr_fnmatch(tuple->pattern, filename,
- MATCH_FLAGS) == 0);
- }
- else {
- found = (ap_strstr_c(filename, tuple->pattern) != NULL);
- }
- if (found) {
- return tuple->description;
- }
- }
- return NULL;
-}
-
-static int ignore_entry(glusterfs_dir_config_t *d, char *path)
-{
- apr_array_header_t *list = d->ign_list;
- struct mod_glfs_ai_item *items = (struct mod_glfs_ai_item *) list->elts;
- char *tt = NULL, *ap = NULL;
- int i = 0;
- struct mod_glfs_ai_item *p = &items[i];
-
- if ((tt = strrchr(path, '/')) == NULL) {
- tt = path;
- }
- else {
- tt++;
- }
-
- for (i = 0; i < list->nelts; ++i) {
- p = &items[i];
- if ((ap = strrchr(p->apply_to, '/')) == NULL) {
- ap = p->apply_to;
- }
- else {
- ap++;
- }
-
-#ifndef CASE_BLIND_FILESYSTEM
- if (!ap_strcmp_match(path, p->apply_path)
- && !ap_strcmp_match(tt, ap)) {
- return 1;
- }
-#else /* !CASE_BLIND_FILESYSTEM */
- /*
- * On some platforms, the match must be case-blind. This is really
- * a factor of the filesystem involved, but we can't detect that
- * reliably - so we have to granularise at the OS level.
- */
- if (!ap_strcasecmp_match(path, p->apply_path)
- && !ap_strcasecmp_match(tt, ap)) {
- return 1;
- }
-#endif /* !CASE_BLIND_FILESYSTEM */
- }
- return 0;
-}
-
-/*****************************************************************
- *
- * Actually generating output
- */
-
-/*
- * Elements of the emitted document:
- * Preamble
- * Emitted unless SUPPRESS_PREAMBLE is set AND ap_run_sub_req
- * succeeds for the (content_type == text/html) header file.
- * Header file
- * Emitted if found (and able).
- * H1 tag line
- * Emitted if a header file is NOT emitted.
- * Directory stuff
- * Always emitted.
- * HR
- * Emitted if FANCY_INDEXING is set.
- * Readme file
- * Emitted if found (and able).
- * ServerSig
- * Emitted if ServerSignature is not Off AND a readme file
- * is NOT emitted.
- * Postamble
- * Emitted unless SUPPRESS_PREAMBLE is set AND ap_run_sub_req
- * succeeds for the (content_type == text/html) readme file.
- */
-
-
-/*
- * emit a plain text file
- */
-static void do_emit_plain(request_rec *r, apr_file_t *f)
-{
- char buf[AP_IOBUFSIZE + 1];
- int ch = 0;
- apr_size_t i = 0, c = 0, n = 0;
- apr_status_t rv = APR_SUCCESS;
-
- ap_rputs("<pre>\n", r);
- while (!apr_file_eof(f)) {
- do {
- n = sizeof(char) * AP_IOBUFSIZE;
- rv = apr_file_read(f, buf, &n);
- } while (APR_STATUS_IS_EINTR(rv));
- if (n == 0 || rv != APR_SUCCESS) {
- /* ###: better error here? */
- break;
- }
- buf[n] = '\0';
- c = 0;
- while (c < n) {
- for (i = c; i < n; i++) {
- if (buf[i] == '<' || buf[i] == '>'
- || buf[i] == '&') {
- break;
- }
- }
- ch = buf[i];
- buf[i] = '\0';
- ap_rputs(&buf[c], r);
- if (ch == '<') {
- ap_rputs("&lt;", r);
- }
- else if (ch == '>') {
- ap_rputs("&gt;", r);
- }
- else if (ch == '&') {
- ap_rputs("&amp;", r);
- }
- c = i + 1;
- }
- }
- ap_rputs("</pre>\n", r);
-}
-
-/*
- * Handle the preamble through the H1 tag line, inclusive. Locate
- * the file with a subrequests. Process text/html documents by actually
- * running the subrequest; text/xxx documents get copied verbatim,
- * and any other content type is ignored. This means that a non-text
- * document (such as HEADER.gif) might get multiviewed as the result
- * instead of a text document, meaning nothing will be displayed, but
- * oh well.
- */
-static void emit_head(request_rec *r, char *header_fname, int suppress_amble,
- int emit_xhtml, char *title)
-{
- apr_table_t *hdrs = r->headers_in;
- apr_file_t *f = NULL;
- request_rec *rr = NULL;
- int emit_amble = 1;
- int emit_H1 = 1;
- const char *r_accept = NULL;
- const char *r_accept_enc = NULL;
-
- /*
- * If there's a header file, send a subrequest to look for it. If it's
- * found and html do the subrequest, otherwise handle it
- */
- r_accept = apr_table_get(hdrs, "Accept");
- r_accept_enc = apr_table_get(hdrs, "Accept-Encoding");
- apr_table_setn(hdrs, "Accept", "text/html, text/plain");
- apr_table_unset(hdrs, "Accept-Encoding");
-
-
- if ((header_fname != NULL) && r->args) {
- header_fname = apr_pstrcat(r->pool, header_fname, "?", r->args,
- NULL);
- }
-
- if ((header_fname != NULL)
- && (rr = ap_sub_req_lookup_uri(header_fname, r, r->output_filters))
- && (rr->status == HTTP_OK)
- && (rr->filename != NULL)
- && (rr->finfo.filetype == APR_REG)) {
- /*
- * Check for the two specific cases we allow: text/html and
- * text/anything-else. The former is allowed to be processed for
- * SSIs.
- */
- if (rr->content_type != NULL) {
- if (!strcasecmp(ap_field_noparam(r->pool,
- rr->content_type),
- "text/html")) {
- ap_filter_t *f = NULL;
-
- /* Hope everything will work... */
- emit_amble = 0;
- emit_H1 = 0;
-
- if (! suppress_amble) {
- emit_preamble(r, emit_xhtml, title);
- }
- /* This is a hack, but I can't find any better
- * way to do this. The problem is that we have
- * already created the sub-request,
- * but we just inserted the OLD_WRITE filter,
- * and the sub-request needs to pass its data
- * through the OLD_WRITE filter, or things go
- * horribly wrong (missing data, data in
- * the wrong order, etc). To fix it, if you
- * create a sub-request and then insert the
- * OLD_WRITE filter before you run the request,
- * you need to make sure that the sub-request
- * data goes through the OLD_WRITE filter. Just
- * steal this code. The long-term solution is
- * to remove the ap_r* functions.
- */
- for (f=rr->output_filters;
- f->frec != ap_subreq_core_filter_handle;
- f = f->next);
- f->next = r->output_filters;
-
- /*
- * If there's a problem running the subrequest,
- * display the preamble if we didn't do it
- * before -- the header file didn't get displayed.
- */
- if (ap_run_sub_req(rr) != OK) {
- /* It didn't work */
- emit_amble = suppress_amble;
- emit_H1 = 1;
- }
- }
- else if (!strncasecmp("text/", rr->content_type, 5)) {
- /*
- * If we can open the file, prefix it with the
- * preamble regardless; since we'll be sending
- * a <pre> block around the file's contents,
- * any HTML header it had won't end up
- * where it belongs.
- */
- if (apr_file_open(&f, rr->filename, APR_READ,
- APR_OS_DEFAULT, r->pool)
- == APR_SUCCESS) {
- emit_preamble(r, emit_xhtml, title);
- emit_amble = 0;
- do_emit_plain(r, f);
- apr_file_close(f);
- emit_H1 = 0;
- }
- }
- }
- }
-
- if (r_accept) {
- apr_table_setn(hdrs, "Accept", r_accept);
- }
- else {
- apr_table_unset(hdrs, "Accept");
- }
-
- if (r_accept_enc) {
- apr_table_setn(hdrs, "Accept-Encoding", r_accept_enc);
- }
-
- if (emit_amble) {
- emit_preamble(r, emit_xhtml, title);
- }
- if (emit_H1) {
- ap_rvputs(r, "<h1>Index of ", title, "</h1>\n", NULL);
- }
- if (rr != NULL) {
- ap_destroy_sub_req(rr);
- }
-}
-
-
-/*
- * Handle the Readme file through the postamble, inclusive. Locate
- * the file with a subrequests. Process text/html documents by actually
- * running the subrequest; text/xxx documents get copied verbatim,
- * and any other content type is ignored. This means that a non-text
- * document (such as FOOTER.gif) might get multiviewed as the result
- * instead of a text document, meaning nothing will be displayed, but
- * oh well.
- */
-static void emit_tail(request_rec *r, char *readme_fname, int suppress_amble)
-{
- apr_file_t *f = NULL;
- request_rec *rr = NULL;
- int suppress_post = 0, suppress_sig = 0;
-
- /*
- * If there's a readme file, send a subrequest to look for it. If it's
- * found and a text file, handle it -- otherwise fall through and
- * pretend there's nothing there.
- */
- if ((readme_fname != NULL)
- && (rr = ap_sub_req_lookup_uri(readme_fname, r, r->output_filters))
- && (rr->status == HTTP_OK)
- && (rr->filename != NULL)
- && rr->finfo.filetype == APR_REG) {
- /*
- * Check for the two specific cases we allow: text/html and
- * text/anything-else. The former is allowed to be processed for
- * SSIs.
- */
- if (rr->content_type != NULL) {
- if (!strcasecmp(ap_field_noparam(r->pool,
- rr->content_type),
- "text/html")) {
- ap_filter_t *f;
- for (f=rr->output_filters;
- f->frec != ap_subreq_core_filter_handle;
- f = f->next);
- f->next = r->output_filters;
-
-
- if (ap_run_sub_req(rr) == OK) {
- /* worked... */
- suppress_sig = 1;
- suppress_post = suppress_amble;
- }
- }
- else if (!strncasecmp("text/", rr->content_type, 5)) {
- /*
- * If we can open the file, suppress the signature.
- */
- if (apr_file_open(&f, rr->filename, APR_READ,
- APR_OS_DEFAULT, r->pool)
- == APR_SUCCESS) {
- do_emit_plain(r, f);
- apr_file_close(f);
- suppress_sig = 1;
- }
- }
- }
- }
-
- if (!suppress_sig) {
- ap_rputs(ap_psignature("", r), r);
- }
- if (!suppress_post) {
- ap_rputs("</body></html>\n", r);
- }
- if (rr != NULL) {
- ap_destroy_sub_req(rr);
- }
-}
-
-
-static char *find_title(request_rec *r)
-{
- char titlebuf[MAX_STRING_LEN], *find = "<title>";
- apr_file_t *thefile = NULL;
- int x = 0, y = 0, p = 0;
- apr_size_t n;
-
- if (r->status != HTTP_OK) {
- return NULL;
- }
- if ((r->content_type != NULL)
- && (!strcasecmp(ap_field_noparam(r->pool, r->content_type),
- "text/html")
- || !strcmp(r->content_type, INCLUDES_MAGIC_TYPE))
- && !r->content_encoding) {
- if (apr_file_open(&thefile, r->filename, APR_READ,
- APR_OS_DEFAULT, r->pool) != APR_SUCCESS) {
- return NULL;
- }
- n = sizeof(char) * (MAX_STRING_LEN - 1);
- apr_file_read(thefile, titlebuf, &n);
- if (n <= 0) {
- apr_file_close(thefile);
- return NULL;
- }
- titlebuf[n] = '\0';
- for (x = 0, p = 0; titlebuf[x]; x++) {
- if (apr_tolower(titlebuf[x]) == find[p]) {
- if (!find[++p]) {
- if ((p = ap_ind(&titlebuf[++x], '<'))
- != -1) {
- titlebuf[x + p] = '\0';
- }
- /* Scan for line breaks for Tanmoy's
- secretary
- */
- for (y = x; titlebuf[y]; y++) {
- if ((titlebuf[y] == CR)
- || (titlebuf[y] == LF)) {
- if (y == x) {
- x++;
- }
- else {
- titlebuf[y] = ' ';
- }
- }
- }
- apr_file_close(thefile);
- return apr_pstrdup(r->pool,
- &titlebuf[x]);
- }
- }
- else {
- p = 0;
- }
- }
- apr_file_close(thefile);
- }
- return NULL;
-}
-
-static struct ent *make_parent_entry(apr_int32_t autoindex_opts,
- glusterfs_dir_config_t *d,
- request_rec *r, char keyid,
- char direction)
-{
- struct ent *p = NULL;
- char *testpath = NULL;
- /*
- * p->name is now the true parent URI.
- * testpath is a crafted lie, so that the syntax '/some/..'
- * (or simply '..')be used to describe 'up' from '/some/'
- * when processeing IndexIgnore, and Icon|Alt|Desc configs.
- */
-
- p = (struct ent *) apr_pcalloc(r->pool, sizeof(struct ent));
- /* The output has always been to the parent. Don't make ourself
- * our own parent (worthless cyclical reference).
- */
- if (!(p->name = ap_make_full_path(r->pool, r->uri, "../"))) {
- return (NULL);
- }
- ap_getparents(p->name);
- if (!*p->name) {
- return (NULL);
- }
-
- /* IndexIgnore has always compared "/thispath/.." */
- testpath = ap_make_full_path(r->pool, r->filename, "..");
- if (ignore_entry(d, testpath)) {
- return (NULL);
- }
-
- p->size = -1;
- p->lm = -1;
- p->key = apr_toupper(keyid);
- p->ascending = (apr_toupper(direction) == D_ASCENDING);
- p->version_sort = autoindex_opts & VERSION_SORT;
- if (autoindex_opts & FANCY_INDEXING) {
- if (!(p->icon = find_default_icon(d, testpath))) {
- p->icon = find_default_icon(d, "^^DIRECTORY^^");
- }
- if (!(p->alt = find_default_alt(d, testpath))) {
- if (!(p->alt = find_default_alt(d, "^^DIRECTORY^^"))) {
- p->alt = "DIR";
- }
- }
- p->desc = find_desc(d, testpath);
- }
- return p;
-}
-
-static struct ent *make_autoindex_entry(const apr_finfo_t *dirent,
- int autoindex_opts,
- glusterfs_dir_config_t *d,
- request_rec *r, char keyid,
- char direction,
- const char *pattern)
-{
- request_rec *rr = NULL;
- struct ent *p = NULL;
- int show_forbidden = 0;
-
- /* Dot is ignored, Parent is handled by make_parent_entry() */
- if ((dirent->name[0] == '.') && (!dirent->name[1]
- || ((dirent->name[1] == '.')
- && !dirent->name[2])))
- return (NULL);
-
- /*
- * On some platforms, the match must be case-blind. This is really
- * a factor of the filesystem involved, but we can't detect that
- * reliably - so we have to granularise at the OS level.
- */
- if (pattern && (apr_fnmatch(pattern, dirent->name,
- APR_FNM_NOESCAPE | APR_FNM_PERIOD
-#ifdef CASE_BLIND_FILESYSTEM
- | APR_FNM_CASE_BLIND
-#endif
- )
- != APR_SUCCESS)) {
- return (NULL);
- }
-
- if (ignore_entry(d, ap_make_full_path(r->pool,
- r->filename, dirent->name))) {
- return (NULL);
- }
-
- if (!(rr = ap_sub_req_lookup_dirent(dirent, r, AP_SUBREQ_NO_ARGS,
- NULL))) {
- return (NULL);
- }
-
- if((autoindex_opts & SHOW_FORBIDDEN)
- && (rr->status == HTTP_UNAUTHORIZED
- || rr->status == HTTP_FORBIDDEN)) {
- show_forbidden = 1;
- }
-
- if ((rr->finfo.filetype != APR_DIR && rr->finfo.filetype != APR_REG)
- || !(rr->status == OK || ap_is_HTTP_SUCCESS(rr->status)
- || ap_is_HTTP_REDIRECT(rr->status)
- || show_forbidden == 1)) {
- ap_destroy_sub_req(rr);
- return (NULL);
- }
-
- p = (struct ent *) apr_pcalloc(r->pool, sizeof(struct ent));
- if (dirent->filetype == APR_DIR) {
- p->name = apr_pstrcat(r->pool, dirent->name, "/", NULL);
- }
- else {
- p->name = apr_pstrdup(r->pool, dirent->name);
- }
- p->size = -1;
- p->icon = NULL;
- p->alt = NULL;
- p->desc = NULL;
- p->lm = -1;
- p->isdir = 0;
- p->key = apr_toupper(keyid);
- p->ascending = (apr_toupper(direction) == D_ASCENDING);
- p->version_sort = !!(autoindex_opts & VERSION_SORT);
- p->ignore_case = !!(autoindex_opts & IGNORE_CASE);
-
- if (autoindex_opts & (FANCY_INDEXING | TABLE_INDEXING)) {
- p->lm = rr->finfo.mtime;
- if (dirent->filetype == APR_DIR) {
- if (autoindex_opts & FOLDERS_FIRST) {
- p->isdir = 1;
- }
- rr->filename = ap_make_dirstr_parent (rr->pool,
- rr->filename);
-
- /* omit the trailing slash (1.3 compat) */
- rr->filename[strlen(rr->filename) - 1] = '\0';
-
- if (!(p->icon = find_icon(d, rr, 1))) {
- p->icon = find_default_icon(d, "^^DIRECTORY^^");
- }
- if (!(p->alt = find_alt(d, rr, 1))) {
- if (!(p->alt = find_default_alt(d,
- "^^DIRECTORY^^"))) {
- p->alt = "DIR";
- }
- }
- }
- else {
- p->icon = find_icon(d, rr, 0);
- p->alt = find_alt(d, rr, 0);
- p->size = rr->finfo.size;
- }
-
- p->desc = find_desc(d, rr->filename);
-
- if ((!p->desc) && (autoindex_opts & SCAN_HTML_TITLES)) {
- p->desc = apr_pstrdup(r->pool, find_title(rr));
- }
- }
- ap_destroy_sub_req(rr);
- /*
- * We don't need to take any special action for the file size key.
- * If we did, it would go here.
- */
- if (keyid == K_LAST_MOD) {
- if (p->lm < 0) {
- p->lm = 0;
- }
- }
- return (p);
-}
-
-static char *terminate_description(glusterfs_dir_config_t *d, char *desc,
- apr_int32_t autoindex_opts, int desc_width)
-{
- int maxsize = desc_width;
- register int x = 0;
-
- /*
- * If there's no DescriptionWidth in effect, default to the old
- * behaviour of adjusting the description size depending upon
- * what else is being displayed. Otherwise, stick with the
- * setting.
- */
- if (d->desc_adjust == K_UNSET) {
- if (autoindex_opts & SUPPRESS_ICON) {
- maxsize += 6;
- }
- if (autoindex_opts & SUPPRESS_LAST_MOD) {
- maxsize += 19;
- }
- if (autoindex_opts & SUPPRESS_SIZE) {
- maxsize += 7;
- }
- }
- for (x = 0; desc[x] && ((maxsize > 0) || (desc[x] == '<')); x++) {
- if (desc[x] == '<') {
- while (desc[x] != '>') {
- if (!desc[x]) {
- maxsize = 0;
- break;
- }
- ++x;
- }
- }
- else if (desc[x] == '&') {
- /* entities like &auml; count as one character */
- --maxsize;
- for ( ; desc[x] != ';'; ++x) {
- if (desc[x] == '\0') {
- maxsize = 0;
- break;
- }
- }
- }
- else {
- --maxsize;
- }
- }
- if (!maxsize && desc[x] != '\0') {
- desc[x - 1] = '>'; /* Grump. */
- desc[x] = '\0'; /* Double Grump! */
- }
- return desc;
-}
-
-/*
- * Emit the anchor for the specified field. If a field is the key for the
- * current request, the link changes its meaning to reverse the order when
- * selected again. Non-active fields always start in ascending order.
- */
-static void emit_link(request_rec *r, const char *anchor, char column,
- char curkey, char curdirection,
- const char *colargs, int nosort)
-{
- char qvalue[9];
-
- if (!nosort) {
-
- qvalue[0] = '?';
- qvalue[1] = 'C';
- qvalue[2] = '=';
- qvalue[3] = column;
- qvalue[4] = ';';
- qvalue[5] = 'O';
- qvalue[6] = '=';
- /* reverse? */
- qvalue[7] = ((curkey == column) && (curdirection == D_ASCENDING))
- ? D_DESCENDING : D_ASCENDING;
- qvalue[8] = '\0';
- ap_rvputs(r, "<a href=\"", qvalue, colargs ? colargs : "",
- "\">", anchor, "</a>", NULL);
- }
- else {
- ap_rputs(anchor, r);
- }
-}
-
-static void output_directories(struct ent **ar, int n,
- glusterfs_dir_config_t *d, request_rec *r,
- apr_int32_t autoindex_opts, char keyid,
- char direction, const char *colargs)
-{
- int x = 0;
- apr_size_t rv = APR_SUCCESS;
- char *name = NULL, *tp = NULL;
- int static_columns = 0;
- apr_pool_t *scratch = NULL;
- int name_width = 0, desc_width = 0, cols = 0;
- char *name_scratch = NULL, *pad_scratch = NULL, *breakrow = "";
- char *anchor = NULL, *t = NULL, *t2 = NULL;
- int nwidth = 0;
- char time_str[MAX_STRING_LEN];
- apr_time_exp_t ts = {0, };
- char buf[5];
-
- name = r->uri;
- static_columns = !!(autoindex_opts & SUPPRESS_COLSORT);
- apr_pool_create(&scratch, r->pool);
- if (name[0] == '\0') {
- name = "/";
- }
-
- name_width = d->name_width;
- desc_width = d->desc_width;
-
- if ((autoindex_opts & (FANCY_INDEXING | TABLE_INDEXING))
- == FANCY_INDEXING) {
- if (d->name_adjust == K_ADJUST) {
- for (x = 0; x < n; x++) {
- int t = 0;
- t = strlen(ar[x]->name);
- if (t > name_width) {
- name_width = t;
- }
- }
- }
-
- if (d->desc_adjust == K_ADJUST) {
- for (x = 0; x < n; x++) {
- if (ar[x]->desc != NULL) {
- int t = 0;
- t = strlen(ar[x]->desc);
- if (t > desc_width) {
- desc_width = t;
- }
- }
- }
- }
- }
- name_scratch = apr_palloc(r->pool, name_width + 1);
- pad_scratch = apr_palloc(r->pool, name_width + 1);
- memset(pad_scratch, ' ', name_width);
- pad_scratch[name_width] = '\0';
-
- if (autoindex_opts & TABLE_INDEXING) {
- cols = 1;
- ap_rputs("<table><tr>", r);
- if (!(autoindex_opts & SUPPRESS_ICON)) {
- ap_rputs("<th>", r);
- if ((tp = find_default_icon(d, "^^BLANKICON^^"))) {
- ap_rvputs(r, "<img src=\"",
- ap_escape_html(scratch, tp),
- "\" alt=\"[ICO]\"", NULL);
- if (d->icon_width) {
- ap_rprintf(r, " width=\"%d\"",
- d->icon_width);
- }
- if (d->icon_height) {
- ap_rprintf(r, " height=\"%d\"",
- d->icon_height);
- }
-
- if (autoindex_opts & EMIT_XHTML) {
- ap_rputs(" /", r);
- }
- ap_rputs("></th>", r);
- }
- else {
- ap_rputs("&nbsp;</th>", r);
- }
-
- ++cols;
- }
- ap_rputs("<th>", r);
- emit_link(r, "Name", K_NAME, keyid, direction,
- colargs, static_columns);
- if (!(autoindex_opts & SUPPRESS_LAST_MOD)) {
- ap_rputs("</th><th>", r);
- emit_link(r, "Last modified", K_LAST_MOD, keyid,
- direction, colargs, static_columns);
- ++cols;
- }
- if (!(autoindex_opts & SUPPRESS_SIZE)) {
- ap_rputs("</th><th>", r);
- emit_link(r, "Size", K_SIZE, keyid, direction,
- colargs, static_columns);
- ++cols;
- }
- if (!(autoindex_opts & SUPPRESS_DESC)) {
- ap_rputs("</th><th>", r);
- emit_link(r, "Description", K_DESC, keyid, direction,
- colargs, static_columns);
- ++cols;
- }
- if (!(autoindex_opts & SUPPRESS_RULES)) {
- breakrow = apr_psprintf(r->pool,
- "<tr><th colspan=\"%d\">"
- "<hr%s></th></tr>\n", cols,
- (autoindex_opts & EMIT_XHTML)
- ? " /" : "");
- }
- ap_rvputs(r, "</th></tr>", breakrow, NULL);
- }
- else if (autoindex_opts & FANCY_INDEXING) {
- ap_rputs("<pre>", r);
- if (!(autoindex_opts & SUPPRESS_ICON)) {
- if ((tp = find_default_icon(d, "^^BLANKICON^^"))) {
- ap_rvputs(r, "<img src=\"",
- ap_escape_html(scratch, tp),
- "\" alt=\"Icon \"", NULL);
- if (d->icon_width) {
- ap_rprintf(r, " width=\"%d\"",
- d->icon_width);
- }
- if (d->icon_height) {
- ap_rprintf(r, " height=\"%d\"",
- d->icon_height);
- }
-
- if (autoindex_opts & EMIT_XHTML) {
- ap_rputs(" /", r);
- }
- ap_rputs("> ", r);
- }
- else {
- ap_rputs(" ", r);
- }
- }
- emit_link(r, "Name", K_NAME, keyid, direction,
- colargs, static_columns);
- ap_rputs(pad_scratch + 4, r);
- /*
- * Emit the guaranteed-at-least-one-space-between-columns byte.
- */
- ap_rputs(" ", r);
- if (!(autoindex_opts & SUPPRESS_LAST_MOD)) {
- emit_link(r, "Last modified", K_LAST_MOD, keyid,
- direction, colargs, static_columns);
- ap_rputs(" ", r);
- }
- if (!(autoindex_opts & SUPPRESS_SIZE)) {
- emit_link(r, "Size", K_SIZE, keyid, direction,
- colargs, static_columns);
- ap_rputs(" ", r);
- }
- if (!(autoindex_opts & SUPPRESS_DESC)) {
- emit_link(r, "Description", K_DESC, keyid, direction,
- colargs, static_columns);
- }
- if (!(autoindex_opts & SUPPRESS_RULES)) {
- ap_rputs("<hr", r);
- if (autoindex_opts & EMIT_XHTML) {
- ap_rputs(" /", r);
- }
- ap_rputs(">", r);
- }
- else {
- ap_rputc('\n', r);
- }
- }
- else {
- ap_rputs("<ul>", r);
- }
-
- for (x = 0; x < n; x++) {
- apr_pool_clear(scratch);
-
- t = ar[x]->name;
- anchor = ap_escape_html(scratch, ap_os_escape_path(scratch, t,
- 0));
-
- if (!x && t[0] == '/') {
- t2 = "Parent Directory";
- }
- else {
- t2 = t;
- }
-
- if (autoindex_opts & TABLE_INDEXING) {
- ap_rputs("<tr>", r);
- if (!(autoindex_opts & SUPPRESS_ICON)) {
- ap_rputs("<td valign=\"top\">", r);
- if (autoindex_opts & ICONS_ARE_LINKS) {
- ap_rvputs(r, "<a href=\"", anchor,
- "\">", NULL);
- }
- if ((ar[x]->icon) || d->default_icon) {
- ap_rvputs(r, "<img src=\"",
- ap_escape_html(scratch,
- ar[x]->icon ?
- ar[x]->icon
- : d->default_icon),
- "\" alt=\"[",
- (ar[x]->alt ?
- ar[x]->alt : " "),
- "]\"", NULL);
- if (d->icon_width) {
- ap_rprintf(r, " width=\"%d\"",
- d->icon_width);
- }
- if (d->icon_height) {
- ap_rprintf(r, " height=\"%d\"",
- d->icon_height);
- }
-
- if (autoindex_opts & EMIT_XHTML) {
- ap_rputs(" /", r);
- }
- ap_rputs(">", r);
- }
- else {
- ap_rputs("&nbsp;", r);
- }
- if (autoindex_opts & ICONS_ARE_LINKS) {
- ap_rputs("</a></td>", r);
- }
- else {
- ap_rputs("</td>", r);
- }
- }
- if (d->name_adjust == K_ADJUST) {
- ap_rvputs(r, "<td><a href=\"", anchor, "\">",
- ap_escape_html(scratch, t2), "</a>",
- NULL);
- }
- else {
- nwidth = strlen(t2);
- if (nwidth > name_width) {
- memcpy(name_scratch, t2, name_width - 3);
- name_scratch[name_width - 3] = '.';
- name_scratch[name_width - 2] = '.';
- name_scratch[name_width - 1] = '>';
- name_scratch[name_width] = 0;
- t2 = name_scratch;
- nwidth = name_width;
- }
- ap_rvputs(r, "<td><a href=\"", anchor, "\">",
- ap_escape_html(scratch, t2),
- "</a>", pad_scratch + nwidth, NULL);
- }
- if (!(autoindex_opts & SUPPRESS_LAST_MOD)) {
- if (ar[x]->lm != -1) {
- char time_str[MAX_STRING_LEN];
- apr_time_exp_t ts;
- apr_time_exp_lt(&ts, ar[x]->lm);
- apr_strftime(time_str, &rv,
- MAX_STRING_LEN,
- "</td><td align=\"right\""
- ">%d-%b-%Y %H:%M ",
- &ts);
- ap_rputs(time_str, r);
- }
- else {
- ap_rputs("</td><td>&nbsp;", r);
- }
- }
- if (!(autoindex_opts & SUPPRESS_SIZE)) {
- ap_rvputs(r, "</td><td align=\"right\">",
- apr_strfsize(ar[x]->size, buf), NULL);
- }
- if (!(autoindex_opts & SUPPRESS_DESC)) {
- if (ar[x]->desc) {
- if (d->desc_adjust == K_ADJUST) {
- ap_rvputs(r, "</td><td>",
- ar[x]->desc, NULL);
- }
- else {
- ap_rvputs(r, "</td><td>",
- terminate_description(d, ar[x]->desc,
- autoindex_opts,
- desc_width), NULL);
- }
- }
- }
- else {
- ap_rputs("</td><td>&nbsp;", r);
- }
- ap_rputs("</td></tr>\n", r);
- }
- else if (autoindex_opts & FANCY_INDEXING) {
- if (!(autoindex_opts & SUPPRESS_ICON)) {
- if (autoindex_opts & ICONS_ARE_LINKS) {
- ap_rvputs(r, "<a href=\"", anchor,
- "\">", NULL);
- }
- if ((ar[x]->icon) || d->default_icon) {
- ap_rvputs(r, "<img src=\"",
- ap_escape_html(scratch,
- ar[x]->icon ?
- ar[x]->icon
- : d->default_icon),
- "\" alt=\"[",
- (ar[x]->alt ? ar[x]->alt
- : " "),
- "]\"", NULL);
- if (d->icon_width) {
- ap_rprintf(r, " width=\"%d\"",
- d->icon_width);
- }
- if (d->icon_height) {
- ap_rprintf(r, " height=\"%d\"",
- d->icon_height);
- }
-
- if (autoindex_opts & EMIT_XHTML) {
- ap_rputs(" /", r);
- }
- ap_rputs(">", r);
- }
- else {
- ap_rputs(" ", r);
- }
- if (autoindex_opts & ICONS_ARE_LINKS) {
- ap_rputs("</a> ", r);
- }
- else {
- ap_rputc(' ', r);
- }
- }
- nwidth = strlen(t2);
- if (nwidth > name_width) {
- memcpy(name_scratch, t2, name_width - 3);
- name_scratch[name_width - 3] = '.';
- name_scratch[name_width - 2] = '.';
- name_scratch[name_width - 1] = '>';
- name_scratch[name_width] = 0;
- t2 = name_scratch;
- nwidth = name_width;
- }
- ap_rvputs(r, "<a href=\"", anchor, "\">",
- ap_escape_html(scratch, t2),
- "</a>", pad_scratch + nwidth, NULL);
- /*
- * The blank before the storm.. er, before the next
- * field.
- */
- ap_rputs(" ", r);
- if (!(autoindex_opts & SUPPRESS_LAST_MOD)) {
- if (ar[x]->lm != -1) {
- apr_time_exp_lt(&ts, ar[x]->lm);
- apr_strftime(time_str, &rv,
- MAX_STRING_LEN,
- "%d-%b-%Y %H:%M ", &ts);
- ap_rputs(time_str, r);
- }
- else {
- /* Length="22-Feb-1998 23:42 "
- * (see 4 lines above)
- */
- ap_rputs(" ", r);
- }
- }
- if (!(autoindex_opts & SUPPRESS_SIZE)) {
- ap_rputs(apr_strfsize(ar[x]->size, buf), r);
- ap_rputs(" ", r);
- }
- if (!(autoindex_opts & SUPPRESS_DESC)) {
- if (ar[x]->desc) {
- ap_rputs(terminate_description(d,
- ar[x]->desc,
- autoindex_opts,
- desc_width), r);
- }
- }
- ap_rputc('\n', r);
- }
- else {
- ap_rvputs(r, "<li><a href=\"", anchor, "\"> ",
- ap_escape_html(scratch, t2),
- "</a></li>\n", NULL);
- }
- }
- if (autoindex_opts & TABLE_INDEXING) {
- ap_rvputs(r, breakrow, "</table>\n", NULL);
- }
- else if (autoindex_opts & FANCY_INDEXING) {
- if (!(autoindex_opts & SUPPRESS_RULES)) {
- ap_rputs("<hr", r);
- if (autoindex_opts & EMIT_XHTML) {
- ap_rputs(" /", r);
- }
- ap_rputs("></pre>\n", r);
- }
- else {
- ap_rputs("</pre>\n", r);
- }
- }
- else {
- ap_rputs("</ul>\n", r);
- }
-}
-
-/*
- * Compare two file entries according to the sort criteria. The return
- * is essentially a signum function value.
- */
-
-static int dsortf(struct ent **e1, struct ent **e2)
-{
- struct ent *c1 = NULL, *c2 = NULL;
- int result = 0;
-
- /*
- * First, see if either of the entries is for the parent directory.
- * If so, that *always* sorts lower than anything else.
- */
- if ((*e1)->name[0] == '/') {
- return -1;
- }
- if ((*e2)->name[0] == '/') {
- return 1;
- }
- /*
- * Now see if one's a directory and one isn't, if we're set
- * isdir for FOLDERS_FIRST.
- */
- if ((*e1)->isdir != (*e2)->isdir) {
- return (*e1)->isdir ? -1 : 1;
- }
- /*
- * All of our comparisons will be of the c1 entry against the c2 one,
- * so assign them appropriately to take care of the ordering.
- */
- if ((*e1)->ascending) {
- c1 = *e1;
- c2 = *e2;
- }
- else {
- c1 = *e2;
- c2 = *e1;
- }
-
- switch (c1->key) {
- case K_LAST_MOD:
- if (c1->lm > c2->lm) {
- return 1;
- }
- else if (c1->lm < c2->lm) {
- return -1;
- }
- break;
- case K_SIZE:
- if (c1->size > c2->size) {
- return 1;
- }
- else if (c1->size < c2->size) {
- return -1;
- }
- break;
- case K_DESC:
- if (c1->version_sort) {
- result = apr_strnatcmp(c1->desc ? c1->desc : "",
- c2->desc ? c2->desc : "");
- }
- else {
- result = strcmp(c1->desc ? c1->desc : "",
- c2->desc ? c2->desc : "");
- }
- if (result) {
- return result;
- }
- break;
- }
-
- /* names may identical when treated case-insensitively,
- * so always fall back on strcmp() flavors to put entries
- * in deterministic order. This means that 'ABC' and 'abc'
- * will always appear in the same order, rather than
- * variably between 'ABC abc' and 'abc ABC' order.
- */
-
- if (c1->version_sort) {
- if (c1->ignore_case) {
- result = apr_strnatcasecmp (c1->name, c2->name);
- }
- if (!result) {
- result = apr_strnatcmp(c1->name, c2->name);
- }
- }
-
- /* The names may be identical in respects other other than
- * filename case when strnatcmp is used above, so fall back
- * to strcmp on conflicts so that fn1.01.zzz and fn1.1.zzz
- * are also sorted in a deterministic order.
- */
-
- if (!result && c1->ignore_case) {
- result = strcasecmp (c1->name, c2->name);
- }
-
- if (!result) {
- result = strcmp (c1->name, c2->name);
- }
-
- return result;
-}
-
-
-static int
-mod_glfs_index_directory (request_rec *r,
- glusterfs_dir_config_t *autoindex_conf)
-{
- char *title_name = NULL, *title_endp = NULL;
- char *pstring = NULL, *colargs = NULL;
- char *path = NULL, *fname = NULL, *charset = NULL;
- char *fullpath = NULL, *name = NULL, *ctype = NULL;
- apr_finfo_t dirent;
- glusterfs_file_t fd = NULL;
- apr_status_t status = APR_SUCCESS;
- int num_ent = 0, x;
- struct ent *head = NULL, *p = NULL;
- struct ent **ar = NULL;
- const char *qstring = NULL;
- apr_int32_t autoindex_opts = autoindex_conf->opts;
- char keyid, direction;
- apr_size_t dirpathlen;
- glusterfs_dir_config_t *dir_config = NULL;
- int ret = -1;
- struct dirent *entry = NULL;
- struct stat st = {0, };
-
- name = r->filename;
- title_name = ap_escape_html(r->pool, r->uri);
- ctype = "text/html";
- dir_config = mod_glfs_dconfig (r);
- if (dir_config == NULL) {
- return HTTP_INTERNAL_SERVER_ERROR;
- }
-
- path = r->uri;
- fd = glusterfs_open (path, O_RDONLY, 0);
- if (fd == 0) {
- ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
- "file permissions deny server access: %s",
- r->filename);
- return HTTP_FORBIDDEN;
- }
-
- if (autoindex_conf->ctype) {
- ctype = autoindex_conf->ctype;
- }
- if (autoindex_conf->charset) {
- charset = autoindex_conf->charset;
- }
- else {
-#if APR_HAS_UNICODE_FS
- charset = "UTF-8";
-#else
- charset = "ISO-8859-1";
-#endif
- }
- if (*charset) {
- ap_set_content_type(r, apr_pstrcat(r->pool, ctype, ";charset=",
- charset, NULL));
- }
- else {
- ap_set_content_type(r, ctype);
- }
-
- if (autoindex_opts & TRACK_MODIFIED) {
- ap_update_mtime(r, r->finfo.mtime);
- ap_set_last_modified(r);
- ap_set_etag(r);
- }
- if (r->header_only) {
- glusterfs_close (fd);
- return 0;
- }
-
- /*
- * If there is no specific ordering defined for this directory,
- * default to ascending by filename.
- */
- keyid = autoindex_conf->default_keyid
- ? autoindex_conf->default_keyid : K_NAME;
- direction = autoindex_conf->default_direction
- ? autoindex_conf->default_direction : D_ASCENDING;
-
- /*
- * Figure out what sort of indexing (if any) we're supposed to use.
- *
- * If no QUERY_STRING was specified or client query strings have been
- * explicitly disabled.
- * If we are ignoring the client, suppress column sorting as well.
- */
- if (autoindex_opts & IGNORE_CLIENT) {
- qstring = NULL;
- autoindex_opts |= SUPPRESS_COLSORT;
- colargs = "";
- }
- else {
- char fval[5], vval[5], *ppre = "", *epattern = "";
- fval[0] = '\0'; vval[0] = '\0';
- qstring = r->args;
-
- while (qstring && *qstring) {
-
- /* C= First Sort key Column (N, M, S, D) */
- if ( qstring[0] == 'C' && qstring[1] == '='
- && qstring[2] && strchr(K_VALID, qstring[2])
- && ( qstring[3] == '&' || qstring[3] == ';'
- || !qstring[3])) {
- keyid = qstring[2];
- qstring += qstring[3] ? 4 : 3;
- }
-
- /* O= Sort order (A, D) */
- else if ( qstring[0] == 'O' && qstring[1] == '='
- && ( (qstring[2] == D_ASCENDING)
- || (qstring[2] == D_DESCENDING))
- && ( qstring[3] == '&'
- || qstring[3] == ';'
- || !qstring[3])) {
- direction = qstring[2];
- qstring += qstring[3] ? 4 : 3;
- }
-
- /* F= Output Format (0 plain, 1 fancy (pre), 2 table) */
- else if ( qstring[0] == 'F' && qstring[1] == '='
- && qstring[2] && strchr("012", qstring[2])
- && ( qstring[3] == '&' || qstring[3] == ';'
- || !qstring[3])) {
- if (qstring[2] == '0') {
- autoindex_opts &= ~(FANCY_INDEXING
- | TABLE_INDEXING);
- }
- else if (qstring[2] == '1') {
- autoindex_opts = (autoindex_opts
- | FANCY_INDEXING)
- & ~TABLE_INDEXING;
- }
- else if (qstring[2] == '2') {
- autoindex_opts |= FANCY_INDEXING
- | TABLE_INDEXING;
- }
- strcpy(fval, ";F= ");
- fval[3] = qstring[2];
- qstring += qstring[3] ? 4 : 3;
- }
-
- /* V= Version sort (0, 1) */
- else if ( qstring[0] == 'V' && qstring[1] == '='
- && (qstring[2] == '0' || qstring[2] == '1')
- && ( qstring[3] == '&' || qstring[3] == ';'
- || !qstring[3])) {
- if (qstring[2] == '0') {
- autoindex_opts &= ~VERSION_SORT;
- }
- else if (qstring[2] == '1') {
- autoindex_opts |= VERSION_SORT;
- }
- strcpy(vval, ";V= ");
- vval[3] = qstring[2];
- qstring += qstring[3] ? 4 : 3;
- }
-
- /* P= wildcard pattern (*.foo) */
- else if (qstring[0] == 'P' && qstring[1] == '=') {
- const char *eos = qstring += 2; /* for efficiency */
-
- while (*eos && *eos != '&' && *eos != ';') {
- ++eos;
- }
-
- if (eos == qstring) {
- pstring = NULL;
- }
- else {
- pstring = apr_pstrndup(r->pool, qstring,
- eos - qstring);
- if (ap_unescape_url(pstring) != OK) {
- /* ignore the pattern, if it's bad. */
- pstring = NULL;
- }
- else {
- ppre = ";P=";
- /* be correct */
- epattern = ap_escape_uri(r->pool,
- pstring);
- }
- }
-
- if (*eos && *++eos) {
- qstring = eos;
- }
- else {
- qstring = NULL;
- }
- }
-
- /* Syntax error? Ignore the remainder! */
- else {
- qstring = NULL;
- }
- }
- colargs = apr_pstrcat(r->pool, fval, vval, ppre, epattern,
- NULL);
- }
-
- /* Spew HTML preamble */
- title_endp = title_name + strlen(title_name) - 1;
-
- while (title_endp > title_name && *title_endp == '/') {
- *title_endp-- = '\0';
- }
-
- emit_head(r, find_header(autoindex_conf, r),
- autoindex_opts & SUPPRESS_PREAMBLE,
- autoindex_opts & EMIT_XHTML, title_name);
-
- /*
- * Since we don't know how many dir. entries there are, put them into a
- * linked list and then arrayificate them so qsort can use them.
- */
- head = NULL;
- p = make_parent_entry(autoindex_opts, autoindex_conf, r, keyid,
- direction);
- if (p != NULL) {
- p->next = head;
- head = p;
- num_ent++;
- }
- fullpath = apr_palloc(r->pool, APR_PATH_MAX);
- dirpathlen = strlen(name);
- memcpy(fullpath, name, dirpathlen);
-
- do {
- entry = glusterfs_readdir (fd);
- if (entry == NULL) {
- break;
- }
-
- fname = apr_pstrcat (r->pool, path, entry->d_name, NULL);
-
- ret = glusterfs_stat (fname, &st);
- if (ret != 0) {
- break;
- }
-
- dirent.fname = fname;
- dirent.name = apr_pstrdup (r->pool, entry->d_name);
- fill_out_finfo (&dirent, &st,
- APR_FINFO_MIN | APR_FINFO_IDENT
- | APR_FINFO_NLINK | APR_FINFO_OWNER
- | APR_FINFO_PROT);
-
- p = make_autoindex_entry(&dirent, autoindex_opts,
- autoindex_conf, r,
- keyid, direction, pstring);
- if (p != NULL) {
- p->next = head;
- head = p;
- num_ent++;
- }
- } while (1);
-
- if (num_ent > 0) {
- ar = (struct ent **) apr_palloc(r->pool,
- num_ent * sizeof(struct ent *));
- p = head;
- x = 0;
- while (p) {
- ar[x++] = p;
- p = p->next;
- }
-
- qsort((void *) ar, num_ent, sizeof(struct ent *),
- (int (*)(const void *, const void *)) dsortf);
- }
- output_directories(ar, num_ent, autoindex_conf, r, autoindex_opts,
- keyid, direction, colargs);
- glusterfs_close (fd);
-
- emit_tail(r, find_readme(autoindex_conf, r),
- autoindex_opts & SUPPRESS_PREAMBLE);
-
- return 0;
-}
-
-
-static int
-handle_autoindex(request_rec *r)
-{
- glusterfs_dir_config_t *dir_config = NULL;
- int allow_opts;
-
- allow_opts = ap_allow_options(r);
-
- r->allowed |= (AP_METHOD_BIT << M_GET);
- if (r->method_number != M_GET) {
- return DECLINED;
- }
-
- dir_config = mod_glfs_dconfig (r);
-
- /* OK, nothing easy. Trot out the heavy artillery... */
-
- if (allow_opts & OPT_INDEXES) {
- int errstatus;
-
- if ((errstatus = ap_discard_request_body(r)) != OK) {
- return errstatus;
- }
-
- /* KLUDGE --- make the sub_req lookups happen in the right
- * directory. Fixing this in the sub_req_lookup functions
- * themselves is difficult, and would probably break
- * virtual includes...
- */
-
- if (r->filename[strlen(r->filename) - 1] != '/') {
- r->filename = apr_pstrcat(r->pool, r->filename, "/",
- NULL);
- }
- return mod_glfs_index_directory(r, dir_config);
- } else {
- ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
- "Directory index forbidden by "
- "Options directive: %s", r->filename);
- return HTTP_FORBIDDEN;
- }
-}
-
-
-static int
-mod_glfs_handler (request_rec *r)
-{
- conn_rec *c = r->connection;
- apr_bucket_brigade *bb;
- apr_bucket *e;
- core_dir_config *d;
- int errstatus;
- glusterfs_file_t fd = NULL;
- apr_status_t status;
- glusterfs_dir_config_t *dir_config = NULL;
- char *path = NULL;
- int num_ranges = 0;
- apr_size_t size = 0;
- apr_off_t range_start = 0, range_end = 0;
- char *current = NULL;
- apr_status_t rv = 0;
- core_request_config *req_cfg = NULL;
-
- /* XXX if/when somebody writes a content-md5 filter we either need to
- * remove this support or coordinate when to use the filter vs.
- * when to use this code
- * The current choice of when to compute the md5 here matches the 1.3
- * support fairly closely (unlike 1.3, we don't handle computing md5
- * when the charset is translated).
- */
-
- int bld_content_md5;
- if (!r->handler || (r->handler
- && strcmp (r->handler, GLUSTERFS_HANDLER)))
- return DECLINED;
-
- if (r->uri[0] == '\0') {
- return DECLINED;
- }
-
- if (r->finfo.filetype == APR_DIR) {
- return handle_autoindex (r);
- }
-
- dir_config = mod_glfs_dconfig (r);
-
- ap_allow_standard_methods(r, MERGE_ALLOW, M_GET, -1);
-
- /* We understood the (non-GET) method, but it might not be legal for
- this particular resource. Check to see if the 'deliver_script'
- flag is set. If so, then we go ahead and deliver the file since
- it isn't really content (only GET normally returns content).
-
- Note: based on logic further above, the only possible non-GET
- method at this point is POST. In the future, we should enable
- script delivery for all methods. */
- if (r->method_number != M_GET) {
- req_cfg = ap_get_module_config(r->request_config, &core_module);
- if (!req_cfg->deliver_script) {
- /* The flag hasn't been set for this request. Punt. */
- ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
- "This resource does not accept the %s "
- "method.",
- r->method);
- return HTTP_METHOD_NOT_ALLOWED;
- }
- }
-
- d = (core_dir_config *)ap_get_module_config(r->per_dir_config,
- &core_module);
- bld_content_md5 = (d->content_md5 & 1)
- && r->output_filters->frec->ftype != AP_FTYPE_RESOURCE;
-
- if ((errstatus = ap_discard_request_body(r)) != OK) {
- return errstatus;
- }
-
- if (r->finfo.filetype == 0) {
- ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
- "File does not exist: %s", r->filename);
- return HTTP_NOT_FOUND;
- }
-
- if ((r->used_path_info != AP_REQ_ACCEPT_PATH_INFO) &&
- r->path_info && *r->path_info)
- {
- /* default to reject */
- ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
- "File does not exist: %s",
- apr_pstrcat(r->pool, r->filename, r->path_info,
- NULL));
- return HTTP_NOT_FOUND;
- }
-
- ap_update_mtime (r, r->finfo.mtime);
- ap_set_last_modified (r);
- ap_set_etag (r);
- apr_table_setn (r->headers_out, "Accept-Ranges", "bytes");
-
- num_ranges = ap_set_byterange(r);
- if (num_ranges == 0) {
- size = r->finfo.size;
- } else {
- char *tmp = apr_pstrdup (r->pool, r->range);
- while ((current = ap_getword(r->pool, (const char **)&tmp, ','))
- && (rv = parse_byterange(current, r->finfo.size,
- &range_start, &range_end))) {
- size += (range_end - range_start);
- }
- }
-
- ap_set_content_length (r, size);
-
- if ((errstatus = ap_meets_conditions(r)) != OK) {
- r->status = errstatus;
- }
-
- /*
- * file is small enough to have already got the content in
- * glusterfs_lookup
- */
- if (r->finfo.size <= dir_config->xattr_file_size && dir_config->buf) {
- if (bld_content_md5) {
- apr_table_setn (r->headers_out, "Content-MD5",
- (const char *)ap_md5_binary(r->pool,
- dir_config->buf
- , r->finfo.size));
- }
-
- ap_log_rerror (APLOG_MARK, APLOG_NOTICE, 0, r,
- "fetching data from glusterfs through xattr "
- "interface\n");
-
- bb = apr_brigade_create(r->pool, c->bucket_alloc);
-
- e = apr_bucket_heap_create (dir_config->buf, r->finfo.size,
- free, c->bucket_alloc);
- APR_BRIGADE_INSERT_TAIL (bb, e);
-
- e = apr_bucket_eos_create(c->bucket_alloc);
- APR_BRIGADE_INSERT_TAIL(bb, e);
-
- dir_config->buf = NULL;
-
- /* let the byterange_filter handle multipart requests */
- status = ap_pass_brigade(r->output_filters, bb);
- if (status == APR_SUCCESS
- || r->status != HTTP_OK
- || c->aborted) {
- return OK;
- }
- else {
- /* no way to know what type of error occurred */
- ap_log_rerror(APLOG_MARK, APLOG_DEBUG, status, r,
- "mod_glfs_handler: ap_pass_brigade "
- "returned %i",
- status);
- return HTTP_INTERNAL_SERVER_ERROR;
- }
- }
-
- /* do standard open/read/close to fetch content */
- path = r->uri;
-
- fd = glusterfs_open (path, O_RDONLY, 0);
- if (fd == 0) {
- ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
- "file permissions deny server access: %s",
- r->filename);
- return HTTP_FORBIDDEN;
- }
-
- /*
- * byterange_filter cannot handle range requests, since we are not
- * sending the whole data in a single brigade
- */
-
-
- if (num_ranges == 0) {
- mod_glfs_read_async (r, NULL, fd, 0, -1);
- } else {
- mod_glfs_handle_byte_ranges (r, fd, num_ranges);
- }
-
- glusterfs_close (fd);
-}
-
-
-#if 0
-static apr_status_t
-mod_glfs_output_filter (ap_filter_t *f,
- apr_bucket_brigade *b)
-{
- size_t size = 0;
- apr_bucket_t *e = NULL;
- size = atol (apr_table_get (r->notes, MOD_GLFS_SIZE));
-
- for (e = APR_BRIGADE_FIRST(b);
- e != APR_BRIGADE_SENTINEL(b);
- e = APR_BUCKET_NEXT(e))
- {
- /* FIXME: can there be more than one heap buckets? */
- if (e->type == &apr_bucket_type_heap) {
- break;
- }
- }
-
- if (e != APR_BRIGADE_SENTINEL(b)) {
- e->length = size;
- }
-
- return ap_pass_brigade (f->next, b);
-}
-#endif
-
-static int
-mod_glfs_fixup_dir(request_rec *r)
-{
- glusterfs_dir_config_t *d = NULL;
- char *dummy_ptr[1];
- char **names_ptr = NULL, *name_ptr = NULL;
- int num_names;
- int error_notfound = 0;
- char *ifile = NULL;
- request_rec *rr = NULL;
-
- /* only handle requests against directories */
- if (r->finfo.filetype != APR_DIR) {
- return DECLINED;
- }
-
- if (!r->handler || strcmp (r->handler, GLUSTERFS_HANDLER)) {
- return DECLINED;
- }
-
- /* Never tolerate path_info on dir requests */
- if (r->path_info && *r->path_info) {
- return DECLINED;
- }
-
- d = (glusterfs_dir_config_t *)ap_get_module_config(r->per_dir_config,
- &glusterfs_module);
-
- /* Redirect requests that are not '/' terminated */
- if (r->uri[0] == '\0' || r->uri[strlen(r->uri) - 1] != '/')
- {
- if (!d->do_slash) {
- return DECLINED;
- }
-
- /* Only redirect non-get requests if we have no note to warn
- * that this browser cannot handle redirs on non-GET requests
- * (such as Microsoft's WebFolders).
- */
- if ((r->method_number != M_GET)
- && apr_table_get(r->subprocess_env, "redirect-carefully")) {
- return DECLINED;
- }
-
- if (r->args != NULL) {
- ifile = apr_pstrcat(r->pool, ap_escape_uri(r->pool,
- r->uri),
- "/", "?", r->args, NULL);
- }
- else {
- ifile = apr_pstrcat(r->pool, ap_escape_uri(r->pool,
- r->uri),
- "/", NULL);
- }
-
- apr_table_setn(r->headers_out, "Location",
- ap_construct_url(r->pool, ifile, r));
- return HTTP_MOVED_PERMANENTLY;
- }
-
- if (d->index_names) {
- names_ptr = (char **)d->index_names->elts;
- num_names = d->index_names->nelts;
- }
- else {
- dummy_ptr[0] = AP_DEFAULT_INDEX;
- names_ptr = dummy_ptr;
- num_names = 1;
- }
-
- for (; num_names; ++names_ptr, --num_names) {
- /* XXX: Is this name_ptr considered escaped yet, or not??? */
- name_ptr = *names_ptr;
-
- /* Once upon a time args were handled _after_ the successful
- * redirect. But that redirect might then _refuse_ the
- * given r->args, creating a nasty tangle. It seems safer to
- * consider the r->args while we determine if name_ptr is our
- * viable index, and therefore set them up correctly on redirect.
- */
- if (r->args != NULL) {
- name_ptr = apr_pstrcat(r->pool, name_ptr, "?", r->args,
- NULL);
- }
-
- rr = ap_sub_req_lookup_uri(name_ptr, r, NULL);
-
- /* The sub request lookup is very liberal, and the core
- * map_to_storage handler will almost always result in HTTP_OK
- * as /foo/index.html may be /foo with PATH_INFO="/index.html",
- * or even / with PATH_INFO="/foo/index.html". To get around
- * this we insist that the the index be a regular filetype.
- *
- * Another reason is that the core handler also makes the
- * assumption that if r->finfo is still NULL by the time it
- * gets called, the file does not exist.
- */
- if (rr->status == HTTP_OK
- && ( (rr->handler && !strcmp(rr->handler, "proxy-server"))
- || rr->finfo.filetype == APR_REG)) {
- ap_internal_fast_redirect(rr, r);
- return OK;
- }
-
- /* If the request returned a redirect, propagate it to the
- * client
- */
-
- if (ap_is_HTTP_REDIRECT(rr->status)
- || (rr->status == HTTP_NOT_ACCEPTABLE && num_names == 1)
- || (rr->status == HTTP_UNAUTHORIZED && num_names == 1)) {
-
- apr_pool_join(r->pool, rr->pool);
- error_notfound = rr->status;
- r->notes = apr_table_overlay(r->pool, r->notes,
- rr->notes);
- r->headers_out = apr_table_overlay(r->pool,
- r->headers_out,
- rr->headers_out);
- r->err_headers_out = apr_table_overlay(r->pool,
- r->err_headers_out,
- rr->err_headers_out);
- return error_notfound;
- }
-
- /* If the request returned something other than 404 (or 200),
- * it means the module encountered some sort of problem. To be
- * secure, we should return the error, rather than allow
- * autoindex to create a (possibly unsafe) directory index.
- *
- * So we store the error, and if none of the listed files
- * exist, we return the last error response we got, instead
- * of a directory listing.
- */
- if (rr->status && rr->status != HTTP_NOT_FOUND
- && rr->status != HTTP_OK) {
- error_notfound = rr->status;
- }
-
- ap_destroy_sub_req(rr);
- }
-
- if (error_notfound) {
- return error_notfound;
- }
-
- /* nothing for us to do, pass on through */
- return DECLINED;
-}
-
-
-static void
-mod_glfs_register_hooks(apr_pool_t *p)
-{
- ap_hook_child_init (mod_glfs_child_init, NULL, NULL, APR_HOOK_MIDDLE);
- ap_hook_handler (mod_glfs_handler, NULL, NULL, APR_HOOK_REALLY_FIRST);
- ap_hook_map_to_storage (mod_glfs_map_to_storage, NULL, NULL,
- APR_HOOK_REALLY_FIRST);
- ap_hook_fixups(mod_glfs_fixup_dir,NULL,NULL,APR_HOOK_LAST);
-
-/* mod_glfs_output_filter_handle =
- ap_register_output_filter ("MODGLFS", mod_glfs_output_filter,
- NULL, AP_FTYPE_PROTOCOL); */
-}
-
-static const char *
-cmd_add_index (cmd_parms *cmd, void *dummy, const char *arg)
-{
- glusterfs_dir_config_t *d = dummy;
-
- if (!d->index_names) {
- d->index_names = apr_array_make(cmd->pool, 2, sizeof(char *));
- }
- *(const char **)apr_array_push(d->index_names) = arg;
- return NULL;
-}
-
-static const char *
-cmd_configure_slash (cmd_parms *cmd, void *d_, int arg)
-{
- glusterfs_dir_config_t *d = d_;
-
- d->do_slash = arg ? SLASH_ON : SLASH_OFF;
- return NULL;
-}
-
-#define DIR_CMD_PERMS OR_INDEXES
-
-static const
-command_rec mod_glfs_cmds[] =
-{
- AP_INIT_TAKE1(
- "GlusterfsLogfile",
- cmd_add_logfile,
- NULL,
- ACCESS_CONF, /*FIXME: allow overriding in .htaccess files */
- "Glusterfs logfile"
- ),
-
- AP_INIT_TAKE1(
- "GlusterfsLoglevel",
- cmd_set_loglevel,
- NULL,
- ACCESS_CONF,
- "Glusterfs loglevel:anyone of none, critical, error, warning, "
- "debug"
- ),
-
- AP_INIT_TAKE1(
- "GlusterfsCacheTimeout",
- cmd_set_cache_timeout,
- NULL,
- ACCESS_CONF,
- "Timeout value in seconds for lookup and stat cache of "
- "libglusterfsclient"
- ),
-
- AP_INIT_TAKE1(
- "GlusterfsVolumeSpecfile",
- cmd_add_volume_specfile,
- NULL,
- ACCESS_CONF,
- "Glusterfs Volume specfication file specifying filesystem "
- "under this directory"
- ),
-
- AP_INIT_TAKE1(
- "GlusterfsXattrFileSize",
- cmd_add_xattr_file_size,
- NULL,
- ACCESS_CONF,
- "Maximum size of the file that can be fetched through "
- "extended attribute interface of libglusterfsclient"
- ),
-
- /* mod_dir cmds */
- AP_INIT_ITERATE("DirectoryIndex", cmd_add_index,
- NULL, DIR_CMD_PERMS,
- "a list of file names"),
-
- AP_INIT_FLAG("DirectorySlash", cmd_configure_slash,
- NULL, DIR_CMD_PERMS,
- "On or Off"),
-
- /* autoindex cmds */
- AP_INIT_ITERATE2("AddIcon", cmd_add_icon,
- BY_PATH, DIR_CMD_PERMS,
- "an icon URL followed by one or more filenames"),
-
- AP_INIT_ITERATE2("AddIconByType", cmd_add_icon,
- BY_TYPE, DIR_CMD_PERMS,
- "an icon URL followed by one or more MIME types"),
-
- AP_INIT_ITERATE2("AddIconByEncoding", cmd_add_icon,
- BY_ENCODING, DIR_CMD_PERMS,
- "an icon URL followed by one or more content encodings"),
-
- AP_INIT_ITERATE2("AddAlt", cmd_add_alt, BY_PATH,
- DIR_CMD_PERMS,
- "alternate descriptive text followed by one or more "
- "filenames"),
-
- AP_INIT_ITERATE2("AddAltByType", cmd_add_alt,
- BY_TYPE, DIR_CMD_PERMS,
- "alternate descriptive text followed by one or more "
- "MIME types"),
-
- AP_INIT_ITERATE2("AddAltByEncoding", cmd_add_alt,
- BY_ENCODING, DIR_CMD_PERMS,
- "alternate descriptive text followed by one or more "
- "content encodings"),
-
- AP_INIT_TAKE_ARGV("IndexOptions", cmd_add_opts,
- NULL, DIR_CMD_PERMS,
- "one or more index options [+|-][]"),
-
- AP_INIT_TAKE2("IndexOrderDefault", cmd_set_default_order,
- NULL, DIR_CMD_PERMS,
- "{Ascending,Descending} {Name,Size,Description,Date}"),
-
- AP_INIT_ITERATE("IndexIgnore", cmd_add_ignore,
- NULL, DIR_CMD_PERMS,
- "one or more file extensions"),
-
- AP_INIT_ITERATE2("AddDescription", cmd_add_desc,
- BY_PATH, DIR_CMD_PERMS,
- "Descriptive text followed by one or more filenames"),
-
- AP_INIT_TAKE1("HeaderName", cmd_add_header,
- NULL, DIR_CMD_PERMS,
- "a filename"),
-
- AP_INIT_TAKE1("ReadmeName", cmd_add_readme,
- NULL, DIR_CMD_PERMS,
- "a filename"),
-
- AP_INIT_RAW_ARGS("FancyIndexing", ap_set_deprecated,
- NULL, OR_ALL,
- "The FancyIndexing directive is no longer supported. "
- "Use IndexOptions FancyIndexing."),
-
- AP_INIT_TAKE1("DefaultIcon", ap_set_string_slot,
- (void *)APR_OFFSETOF(glusterfs_dir_config_t,
- default_icon),
- DIR_CMD_PERMS, "an icon URL"),
-
- AP_INIT_TAKE1("IndexStyleSheet", ap_set_string_slot,
- (void *)APR_OFFSETOF(glusterfs_dir_config_t, style_sheet),
- DIR_CMD_PERMS, "URL to style sheet"),
-
- {NULL}
-};
-
-module AP_MODULE_DECLARE_DATA glusterfs_module =
-{
- STANDARD20_MODULE_STUFF,
- mod_glfs_create_dir_config,
- mod_glfs_merge_dir_config,
- NULL, //mod_glfs_create_server_config,
- NULL, //mod_glfs_merge_server_config,
- mod_glfs_cmds,
- mod_glfs_register_hooks,
-};
diff --git a/mod_glusterfs/apache/Makefile.am b/mod_glusterfs/apache/Makefile.am
deleted file mode 100644
index bda03931052..00000000000
--- a/mod_glusterfs/apache/Makefile.am
+++ /dev/null
@@ -1,10 +0,0 @@
-SUBDIRS = $(MOD_GLUSTERFS_HTTPD_VERSION)
-
-EXTRA_DIST = 1.3/Makefile.am 1.3/Makefile.in \
- 1.3/src/Makefile.am 1.3/src/Makefile.in \
- 1.3/src/mod_glusterfs.c \
- 1.3/src/README.txt \
- 2.2/Makefile.am 2.2/Makefile.in \
- 2.2/src/Makefile.am 2.2/src/Makefile.in \
- 2.2/src/mod_glusterfs.c
-CLEANFILES =
diff --git a/mod_glusterfs/lighttpd/1.4/Makefile.am b/mod_glusterfs/lighttpd/1.4/Makefile.am
deleted file mode 100644
index eda329111dc..00000000000
--- a/mod_glusterfs/lighttpd/1.4/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-EXTRA_DIST = Makefile.am.diff mod_glusterfs.c mod_glusterfs.h README.txt
-
-CLEANFILES =
diff --git a/mod_glusterfs/lighttpd/1.4/Makefile.am.diff b/mod_glusterfs/lighttpd/1.4/Makefile.am.diff
deleted file mode 100644
index 375696b5d8f..00000000000
--- a/mod_glusterfs/lighttpd/1.4/Makefile.am.diff
+++ /dev/null
@@ -1,29 +0,0 @@
---- lighttpd-1.4.19/src/Makefile.am 2008-04-16 18:42:18.000000000 +0400
-+++ lighttpd-1.4.19.mod/src/Makefile.am 2008-04-16 18:41:11.000000000 +0400
-@@ -1,4 +1,4 @@
--AM_CFLAGS = $(FAM_CFLAGS)
-+AM_CFLAGS = $(FAM_CFLAGS) -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64
-
- noinst_PROGRAMS=proc_open lemon # simple-fcgi #graphic evalo bench ajp ssl error_test adserver gen-license
- sbin_PROGRAMS=lighttpd lighttpd-angel
-@@ -241,6 +241,11 @@
- mod_accesslog_la_LDFLAGS = -module -export-dynamic -avoid-version -no-undefined
- mod_accesslog_la_LIBADD = $(common_libadd)
-
-+lib_LTLIBRARIES += mod_glusterfs.la
-+mod_glusterfs_la_SOURCES = mod_glusterfs.c
-+mod_glusterfs_la_CFLAGS = $(AM_CFLAGS)
-+mod_glusterfs_la_LDFLAGS = -module -export-dynamic -avoid-version -no-undefined -lglusterfsclient -lpthread
-+mod_glusterfs_la_LIBADD = $(common_libadd)
-
- hdr = server.h buffer.h network.h log.h keyvalue.h \
- response.h request.h fastcgi.h chunk.h \
-@@ -254,7 +259,7 @@
- configparser.h mod_ssi_exprparser.h \
- sys-mmap.h sys-socket.h mod_cml.h mod_cml_funcs.h \
- splaytree.h proc_open.h status_counter.h \
-- mod_magnet_cache.h
-+ mod_magnet_cache.h mod_glusterfs.h
-
- DEFS= @DEFS@ -DLIBRARY_DIR="\"$(libdir)\"" -DSBIN_DIR="\"$(sbindir)\""
-
diff --git a/mod_glusterfs/lighttpd/1.4/README.txt b/mod_glusterfs/lighttpd/1.4/README.txt
deleted file mode 100644
index 786a146e44d..00000000000
--- a/mod_glusterfs/lighttpd/1.4/README.txt
+++ /dev/null
@@ -1,57 +0,0 @@
-Introduction
-============
-mod_glusterfs is a module written for lighttpd to speed up the access of files present on glusterfs. mod_glusterfs uses libglusterfsclient library provided for glusterfs and hence can be used without fuse (File System in User Space).
-
-Usage
-=====
-To use mod_glusterfs with lighttpd-1.4, copy mod_glusterfs.c and mod_glusterfs.h into src/ of lighttpd-1.4 source tree, and apply the Makefile.am.diff to src/Makefile.am. Re-run ./autogen.sh on the top level of the lighttpd-1.4 build tree and recompile.
-
-# cp mod_glusterfs.[ch] /home/glusterfs/lighttpd-1.4/src/
-# cp Makefile.am.diff /home/glusterfs/lighttpd-1.4/
-# cd /home/glusterfs/lighttpd-1.4
-# patch -p1 < Makefile.am.diff
-# ./autogen.sh
-# ./configure
-# make
-# make install
-
-Configuration
-=============
-* mod_glusterfs should be listed at the begining of the list server.modules in lighttpd.conf.
-
-Below is a snippet from lighttpd.conf concerning to mod_glusterfs.
-
-$HTTP["url"] =~ "^/glusterfs" {
- glusterfs.prefix = "/glusterfs"
- glusterfs.document-root = "/home/glusterfs/document-root"
- glusterfs.logfile = "/var/log/glusterfs-logfile"
- glusterfs.volume-specfile = "/etc/glusterfs/glusterfs.vol"
- glusterfs.loglevel = "error"
- glusterfs.cache-timeout = 300
- glusterfs.xattr-interface-size-limit = "65536"
-}
-
-* $HTTP["url"] =~ "^/glusterfs"
- A perl style regular expression used to match against the url. If regular expression matches the url, the url is handled by mod_glusterfs. Note that the pattern given here should match glusterfs.prefix.
-
-* glusterfs.prefix (COMPULSORY)
- A string to be present at the starting of the file path in the url so that the file would be handled by glusterfs.
- Eg., A GET request on the url http://www.example.com/glusterfs-prefix/some-dir/example-file will result in fetching of the file "/some-dir/example-file" from glusterfs mount if glusterfs.prefix is set to "/glusterfs-prefix".
-
-* glusterfs.volume-specfile (COMPULSORY)
- Path to the the glusterfs volume specification file.
-
-* glusterfs.logfile (COMPULSORY)
- Path to the glusterfs logfile.
-
-* glusterfs.loglevel (OPTIONAL, default = warning)
- Allowed values are critical, error, warning, debug, none in the decreasing order of severity of error conditions.
-
-* glusterfs.cache-timeout (OPTIONAL, default = 0)
- Timeout values for glusterfs stat and lookup cache.
-
-* glusterfs.document-root (COMPULSORY)
- An absolute path, relative to which all the files are fetched from glusterfs.
-
-* glusterfs.xattr-interface-size-limit (OPTIONAL, default = 0)
- Files with sizes upto and including this value are fetched through the extended attribute interface of glusterfs rather than the usual open-read-close set of operations. For files of small sizes, it is recommended to use extended attribute interface.
diff --git a/mod_glusterfs/lighttpd/1.4/mod_glusterfs.c b/mod_glusterfs/lighttpd/1.4/mod_glusterfs.c
deleted file mode 100644
index 295c9704c92..00000000000
--- a/mod_glusterfs/lighttpd/1.4/mod_glusterfs.c
+++ /dev/null
@@ -1,1820 +0,0 @@
-/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include <ctype.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <pthread.h>
-#include <sys/types.h>
-#include <fcntl.h>
-
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include <errno.h>
-#include <unistd.h>
-#include <assert.h>
-
-#include "base.h"
-#include "log.h"
-#include "buffer.h"
-
-#include "plugin.h"
-
-#include "stat_cache.h"
-#include "mod_glusterfs.h"
-#include "etag.h"
-#include "http_chunk.h"
-#include "response.h"
-
-#include "fdevent.h"
-#include <libglusterfsclient.h>
-
-#ifdef HAVE_ATTR_ATTRIBUTES_H
-#include <attr/attributes.h>
-#endif
-
-#ifdef HAVE_FAM_H
-# include <fam.h>
-#endif
-
-#include "sys-mmap.h"
-
-/* NetBSD 1.3.x needs it */
-#ifndef MAP_FAILED
-# define MAP_FAILED -1
-#endif
-
-#ifndef O_LARGEFILE
-# define O_LARGEFILE 0
-#endif
-
-#ifndef HAVE_LSTAT
-#define lstat stat
-#endif
-
-#if 0
-/*
- enables debug code for testing if all nodes in the stat-cache as accessable
-*/
-#define DEBUG_STAT_CACHE
-#endif
-
-#ifdef HAVE_LSTAT
-#undef HAVE_LSTAT
-#endif
-
-#define GLUSTERFS_FILE_CHUNK (FILE_CHUNK + 1)
-
-/*
- Keep this value large. Each glusterfs_async_read of GLUSTERFS_CHUNK_SIZE
- results in a network_backend_write of the read data
-*/
-
-#define GLUSTERFS_CHUNK_SIZE 8192
-
-/**
- * this is a staticfile for a lighttpd plugin
- *
- */
-
-typedef struct glusterfs_async_local {
- int op_ret;
- int op_errno;
- char async_read_complete;
- off_t length;
- size_t read_bytes;
- glusterfs_iobuf_t *buf;
- pthread_mutex_t lock;
- pthread_cond_t cond;
-} glusterfs_async_local_t;
-
-
-typedef struct {
- glusterfs_file_t fd;
- void *buf;
- buffer *glusterfs_path;
- /* off_t response_content_length; */
- int prefix;
-}mod_glusterfs_ctx_t;
-
-/* plugin config for all request/connections */
-typedef struct {
- buffer *logfile;
- buffer *loglevel;
- buffer *specfile;
- buffer *prefix;
- buffer *xattr_file_size;
- buffer *document_root;
- array *exclude_exts;
- unsigned short cache_timeout;
- char mounted;
-} plugin_config;
-
-static int (*network_backend_write)(struct server *srv, connection *con, int fd,
- chunkqueue *cq);
-
-typedef struct {
- PLUGIN_DATA;
- buffer *range_buf;
- plugin_config **config_storage;
-
- plugin_config conf;
-} plugin_data;
-
-typedef struct {
- chunkqueue *cq;
- glusterfs_iobuf_t *buf;
- size_t length;
-}mod_glusterfs_chunkqueue;
-
-#ifdef HAVE_FAM_H
-typedef struct {
- FAMRequest *req;
- FAMConnection *fc;
-
- buffer *name;
-
- int version;
-} fam_dir_entry;
-#endif
-
-/* the directory name is too long to always compare on it
- * - we need a hash
- * - the hash-key is used as sorting criteria for a tree
- * - a splay-tree is used as we can use the caching effect of it
- */
-
-/* we want to cleanup the stat-cache every few seconds, let's say 10
- *
- * - remove entries which are outdated since 30s
- * - remove entries which are fresh but havn't been used since 60s
- * - if we don't have a stat-cache entry for a directory, release it from the
- * monitor
- */
-
-#ifdef DEBUG_STAT_CACHE
-typedef struct {
- int *ptr;
-
- size_t used;
- size_t size;
-} fake_keys;
-
-static fake_keys ctrl;
-#endif
-
-int
-mod_glusterfs_readv_async_cbk (int op_ret, int op_errno,
- glusterfs_iobuf_t *buf,
- void *cbk_data)
-{
- glusterfs_async_local_t *local = cbk_data;
- pthread_mutex_lock (&local->lock);
- {
- local->async_read_complete = 1;
- local->buf = buf;
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- pthread_cond_signal (&local->cond);
- }
- pthread_mutex_unlock (&local->lock);
-
- return 0;
-}
-
-static int
-mod_glusterfs_read_async (server *srv, connection *con, chunk *glusterfs_chunk)
-{
- glusterfs_async_local_t local;
- off_t end = 0;
- int nbytes;
- int complete;
- chunkqueue *cq = NULL;
- chunk *c = NULL;
- off_t offset = glusterfs_chunk->file.start;
- size_t length = glusterfs_chunk->file.length;
- glusterfs_file_t fd = glusterfs_chunk->file.name;
-
- pthread_cond_init (&local.cond, NULL);
- pthread_mutex_init (&local.lock, NULL);
-
- //local.fd = fd;
- memset (&local, 0, sizeof (local));
-
- if (length > 0)
- end = offset + length;
-
- cq = chunkqueue_init ();
- if (!cq) {
- con->http_status = 500;
- return HANDLER_FINISHED;
- }
-
- do {
- glusterfs_iobuf_t *buf;
- int i;
- if (length > 0) {
- nbytes = end - offset;
- if (nbytes > GLUSTERFS_CHUNK_SIZE)
- nbytes = GLUSTERFS_CHUNK_SIZE;
- } else
- nbytes = GLUSTERFS_CHUNK_SIZE;
-
- glusterfs_read_async(fd,
- nbytes,
- offset,
- mod_glusterfs_readv_async_cbk,
- (void *)&local);
-
- pthread_mutex_lock (&local.lock);
- {
- while (!local.async_read_complete) {
- pthread_cond_wait (&local.cond, &local.lock);
- }
-
- local.async_read_complete = 0;
- buf = local.buf;
-
- if ((int)length < 0)
- complete = (local.op_ret <= 0);
- else {
- local.read_bytes += local.op_ret;
- complete = ((local.read_bytes == length)
- || (local.op_ret <= 0));
- }
- }
- pthread_mutex_unlock (&local.lock);
-
- if (local.op_ret > 0) {
- unsigned long check = 0;
- for (i = 0; i < buf->count; i++) {
- buffer *nw_write_buf = buffer_init ();
-
- check += buf->vector[i].iov_len;
-
- nw_write_buf->used = buf->vector[i].iov_len + 1;
- nw_write_buf->size = buf->vector[i].iov_len;
- nw_write_buf->ptr = buf->vector[i].iov_base;
-
- offset += local.op_ret;
- chunkqueue_append_buffer_weak(cq, nw_write_buf);
- }
-
- network_backend_write (srv, con, con->fd, cq);
-
- if (chunkqueue_written (cq) != local.op_ret) {
- mod_glusterfs_chunkqueue *gf_cq;
- glusterfs_chunk->file.start = offset;
- if ((int)glusterfs_chunk->file.length > 0)
- glusterfs_chunk->file.length -= local.read_bytes;
-
- gf_cq = calloc (1, sizeof (*gf_cq));
- /* ERR_ABORT (gf_cq); */
- gf_cq->cq = cq;
- gf_cq->buf = buf;
- gf_cq->length = local.op_ret;
- glusterfs_chunk->file.mmap.start =(char *)gf_cq;
- return local.read_bytes;
- }
-
- for (c = cq->first ; c; c = c->next)
- c->mem->ptr = NULL;
-
- chunkqueue_reset (cq);
- }
-
- glusterfs_free (buf);
- } while (!complete);
-
- chunkqueue_free (cq);
- glusterfs_close (fd);
-
- if (local.op_ret < 0)
- con->http_status = 500;
-
- return (local.op_ret < 0 ? HANDLER_FINISHED : HANDLER_GO_ON);
-}
-
-int mod_glusterfs_network_backend_write(struct server *srv, connection *con,
- int fd, chunkqueue *cq)
-{
- chunk *c, *prev, *first;
- int chunks_written = 0;
- int error = 0;
-
- for (first = prev = c = cq->first; c; c = c->next, chunks_written++) {
-
- if (c->type == MEM_CHUNK && c->mem->used && !c->mem->ptr) {
- if (cq->first != c) {
- prev->next = NULL;
-
- /* call stored network_backend_write */
- network_backend_write (srv, con, fd, cq);
-
- prev->next = c;
- }
- cq->first = c->next;
-
- if (c->file.fd < 0) {
- error = HANDLER_ERROR;
- break;
- }
-
- if (c->file.mmap.start) {
- chunk *tmp;
- mod_glusterfs_chunkqueue *gf_cq = NULL;
-
- gf_cq = (mod_glusterfs_chunkqueue *)c->file.mmap.start;
-
- network_backend_write (srv, con, fd, gf_cq->cq);
-
- if ((size_t)chunkqueue_written (gf_cq->cq)
- != gf_cq->length) {
- cq->first = first;
- return chunks_written;
- }
- for (tmp = gf_cq->cq->first ; tmp;
- tmp = tmp->next)
- tmp->mem->ptr = NULL;
-
- chunkqueue_free (gf_cq->cq);
- glusterfs_free (gf_cq->buf);
- free (gf_cq);
- c->file.mmap.start = NULL;
- }
-
- mod_glusterfs_read_async (srv, con, c);
- if (c->file.mmap.start) {
- /* pending chunkqueue from
- mod_glusterfs_read_async to be written to
- network */
- cq->first = first;
- return chunks_written;
- }
-
- buffer_free (c->mem);
- c->mem = NULL;
-
- c->type = FILE_CHUNK;
- c->offset = c->file.length = 0;
- c->file.name = NULL;
-
- if (first == c)
- first = c->next;
-
- if (cq->last == c)
- cq->last = NULL;
-
- prev->next = c->next;
-
- free(c);
- }
- prev = c;
- }
-
- network_backend_write (srv, con, fd, cq);
-
- cq->first = first;
-
- return chunks_written;
-}
-
-int chunkqueue_append_glusterfs_file (connection *con, glusterfs_file_t fd,
- off_t offset, size_t len, size_t buf_size)
-{
- chunk *c = NULL;
- c = chunkqueue_get_append_tempfile (con->write_queue);
-
- if (c->file.is_temp) {
- close (c->file.fd);
- unlink (c->file.name->ptr);
- }
-
- c->type = MEM_CHUNK;
-
- buffer_free (c->mem);
-
- c->mem = buffer_init ();
- c->mem->used = len + 1;
- c->mem->size = buf_size;
- c->mem->ptr = NULL;
- c->offset = 0;
-
- buffer_free (c->file.name);
-
- /* fd returned by libglusterfsclient is a pointer */
- c->file.name = (buffer *)fd;
- c->file.start = offset;
- c->file.length = len;
-
- //c->file.fd = fd;
- c->file.mmap.start = NULL;
- return 0;
-}
-
-/* init the plugin data */
-INIT_FUNC(mod_glusterfs_init) {
- plugin_data *p;
-
- p = calloc(1, sizeof(*p));
- network_backend_write = NULL;
-
- return p;
-}
-
-/* detroy the plugin data */
-FREE_FUNC(mod_glusterfs_free) {
- plugin_data *p = p_d;
-
- UNUSED (srv);
-
- if (!p) return HANDLER_GO_ON;
-
- if (p->config_storage) {
- size_t i;
- for (i = 0; i < srv->config_context->used; i++) {
- plugin_config *s = p->config_storage[i];
-
- buffer_free (s->logfile);
- buffer_free (s->loglevel);
- buffer_free (s->specfile);
- buffer_free (s->prefix);
- buffer_free (s->xattr_file_size);
- buffer_free (s->document_root);
- array_free (s->exclude_exts);
-
- free (s);
- }
- free (p->config_storage);
- }
- buffer_free (p->range_buf);
-
- free (p);
-
- return HANDLER_GO_ON;
-}
-
-SETDEFAULTS_FUNC(mod_glusterfs_set_defaults) {
- plugin_data *p = p_d;
- size_t i = 0;
-
- config_values_t cv[] = {
- { "glusterfs.logfile", NULL, T_CONFIG_STRING,
- T_CONFIG_SCOPE_CONNECTION },
-
- { "glusterfs.loglevel", NULL, T_CONFIG_STRING,
- T_CONFIG_SCOPE_CONNECTION },
-
- { "glusterfs.volume-specfile", NULL, T_CONFIG_STRING,
- T_CONFIG_SCOPE_CONNECTION },
-
- { "glusterfs.cache-timeout", NULL, T_CONFIG_SHORT,
- T_CONFIG_SCOPE_CONNECTION },
-
- { "glusterfs.exclude-extensions", NULL, T_CONFIG_ARRAY,
- T_CONFIG_SCOPE_CONNECTION },
-
- /*TODO: get the prefix from config_conext and
- remove glusterfs.prefix from conf file */
- { "glusterfs.prefix", NULL, T_CONFIG_STRING,
- T_CONFIG_SCOPE_CONNECTION },
-
- { "glusterfs.xattr-interface-size-limit", NULL, T_CONFIG_STRING,
- T_CONFIG_SCOPE_CONNECTION },
-
- { "glusterfs.document-root", NULL, T_CONFIG_STRING,
- T_CONFIG_SCOPE_CONNECTION },
-
- { NULL, NULL, T_CONFIG_UNSET,
- T_CONFIG_SCOPE_UNSET }
- };
-
- p->config_storage = calloc(1,
- srv->config_context->used
- * sizeof(specific_config *));
- /* ERR_ABORT (p->config_storage);*/
- p->range_buf = buffer_init ();
-
- for (i = 0; i < srv->config_context->used; i++) {
- plugin_config *s;
-
- s = calloc(1, sizeof(plugin_config));
- /* ERR_ABORT (s); */
- s->logfile = buffer_init ();
- s->loglevel = buffer_init ();
- s->specfile = buffer_init ();
- s->document_root = buffer_init ();
- s->exclude_exts = array_init ();
- s->prefix = buffer_init ();
- s->xattr_file_size = buffer_init ();
-
- cv[0].destination = s->logfile;
- cv[1].destination = s->loglevel;
- cv[2].destination = s->specfile;
- cv[3].destination = &s->cache_timeout;
- cv[4].destination = s->exclude_exts;
- cv[5].destination = s->prefix;
- cv[6].destination = s->xattr_file_size;
- cv[7].destination = s->document_root;
- p->config_storage[i] = s;
-
- if (0 != config_insert_values_global(srv,
- ((data_config *)srv->config_context->data[i])->value,
- cv)) {
- return HANDLER_FINISHED;
- }
- }
-
- return HANDLER_GO_ON;
-}
-
-#define PATCH(x) \
- p->conf.x = s->x;
-
-static int mod_glusterfs_patch_connection(server *srv, connection *con,
- plugin_data *p) {
- size_t i, j;
- plugin_config *s;
-
- /* skip the first, the global context */
- /*
- glusterfs related config can only occur inside
- $HTTP["url"] == "<glusterfs-prefix>"
- */
- p->conf.logfile = NULL;
- p->conf.loglevel = NULL;
- p->conf.specfile = NULL;
- p->conf.cache_timeout = 0;
- p->conf.exclude_exts = NULL;
- p->conf.prefix = NULL;
- p->conf.xattr_file_size = NULL;
- p->conf.document_root = NULL;
-
- for (i = 1; i < srv->config_context->used; i++) {
- data_config *dc = (data_config *)srv->config_context->data[i];
- s = p->config_storage[i];
-
- /* condition didn't match */
- if (!config_check_cond(srv, con, dc)) continue;
-
- /* merge config */
- for (j = 0; j < dc->value->used; j++) {
- data_unset *du = dc->value->data[j];
-
- if (buffer_is_equal_string (du->key,
- CONST_STR_LEN("glusterfs.logfile"))) {
- PATCH (logfile);
- } else if (buffer_is_equal_string (du->key,
- CONST_STR_LEN("glusterfs.loglevel"))) {
- PATCH (loglevel);
- } else if (buffer_is_equal_string (du->key,
- CONST_STR_LEN ("glusterfs.volume-specfile"))) {
- PATCH (specfile);
- } else if (buffer_is_equal_string (du->key,
- CONST_STR_LEN("glusterfs.cache-timeout"))) {
- PATCH (cache_timeout);
- } else if (buffer_is_equal_string (du->key,
- CONST_STR_LEN ("glusterfs.exclude-extensions"))) {
- PATCH (exclude_exts);
- } else if (buffer_is_equal_string (du->key,
- CONST_STR_LEN ("glusterfs.prefix"))) {
- PATCH (prefix);
- } else if (buffer_is_equal_string (du->key,
- CONST_STR_LEN ("glusterfs.xattr-interface-size-limit"))) {
- PATCH (xattr_file_size);
- } else if (buffer_is_equal_string (du->key,
- CONST_STR_LEN ("glusterfs.document-root"))) {
- PATCH (document_root);
- }
- }
- }
- return 0;
-}
-
-#undef PATCH
-
-static int http_response_parse_range(server *srv, connection *con,
- plugin_data *p) {
- int multipart = 0;
- int error;
- off_t start, end;
- const char *s, *minus;
- char *boundary = "fkj49sn38dcn3";
- data_string *ds;
- stat_cache_entry *sce = NULL;
- buffer *content_type = NULL;
- size_t size = 0;
- mod_glusterfs_ctx_t *ctx = con->plugin_ctx[p->id];
-
- if (p->conf.xattr_file_size && p->conf.xattr_file_size->ptr) {
- size = atoi (p->conf.xattr_file_size->ptr);
- }
-
- if (HANDLER_ERROR == stat_cache_get_entry(srv, con, con->physical.path,
- &sce)) {
- SEGFAULT();
- }
-
- start = 0;
- end = sce->st.st_size - 1;
-
- con->response.content_length = 0;
-
- if (NULL != (ds = (data_string *)array_get_element(con->response.headers,
- "Content-Type"))) {
- content_type = ds->value;
- }
-
- for (s = con->request.http_range, error = 0;
- !error && *s && NULL != (minus = strchr(s, '-')); ) {
- char *err;
- off_t la, le;
-
- if (s == minus) {
- /* -<stop> */
-
- le = strtoll(s, &err, 10);
-
- if (le == 0) {
- /* RFC 2616 - 14.35.1 */
-
- con->http_status = 416;
- error = 1;
- } else if (*err == '\0') {
- /* end */
- s = err;
-
- end = sce->st.st_size - 1;
- start = sce->st.st_size + le;
- } else if (*err == ',') {
- multipart = 1;
- s = err + 1;
-
- end = sce->st.st_size - 1;
- start = sce->st.st_size + le;
- } else {
- error = 1;
- }
-
- } else if (*(minus+1) == '\0' || *(minus+1) == ',') {
- /* <start>- */
-
- la = strtoll(s, &err, 10);
-
- if (err == minus) {
- /* ok */
-
- if (*(err + 1) == '\0') {
- s = err + 1;
-
- end = sce->st.st_size - 1;
- start = la;
-
- } else if (*(err + 1) == ',') {
- multipart = 1;
- s = err + 2;
-
- end = sce->st.st_size - 1;
- start = la;
- } else {
- error = 1;
- }
- } else {
- /* error */
- error = 1;
- }
- } else {
- /* <start>-<stop> */
-
- la = strtoll(s, &err, 10);
-
- if (err == minus) {
- le = strtoll(minus+1, &err, 10);
-
- /* RFC 2616 - 14.35.1 */
- if (la > le) {
- error = 1;
- }
-
- if (*err == '\0') {
- /* ok, end*/
- s = err;
-
- end = le;
- start = la;
- } else if (*err == ',') {
- multipart = 1;
- s = err + 1;
-
- end = le;
- start = la;
- } else {
- /* error */
-
- error = 1;
- }
- } else {
- /* error */
-
- error = 1;
- }
- }
-
- if (!error) {
- if (start < 0) start = 0;
-
- /* RFC 2616 - 14.35.1 */
- if (end > sce->st.st_size - 1) end = sce->st.st_size - 1;
-
- if (start > sce->st.st_size - 1) {
- error = 1;
-
- con->http_status = 416;
- }
- }
-
- if (!error) {
- if (multipart) {
- /* write boundary-header */
- buffer *b;
-
- b = chunkqueue_get_append_buffer(con->write_queue);
-
- buffer_copy_string(b, "\r\n--");
- buffer_append_string(b, boundary);
-
- /* write Content-Range */
- buffer_append_string(b, "\r\nContent-Range: "
- "bytes ");
- buffer_append_off_t(b, start);
- buffer_append_string(b, "-");
- buffer_append_off_t(b, end);
- buffer_append_string(b, "/");
- buffer_append_off_t(b, sce->st.st_size);
-
- buffer_append_string(b, "\r\nContent-Type: ");
- buffer_append_string_buffer(b, content_type);
-
- /* write END-OF-HEADER */
- buffer_append_string(b, "\r\n\r\n");
-
- con->response.content_length += b->used - 1;
-
- }
-
- if ((size_t)sce->st.st_size >= size) {
- chunkqueue_append_glusterfs_file(con, ctx->fd,
- start,
- end - start,
- size);
- } else {
- if (!start) {
- buffer *mem = buffer_init ();
- mem->ptr = ctx->buf;
- mem->used = mem->size = sce->st.st_size;
- http_chunk_append_buffer (srv, con, mem);
- ctx->buf = NULL;
- } else {
- chunkqueue_append_mem (con->write_queue,
- ((char *)ctx->buf)
- + start,
- end - start + 1);
- }
- }
-
- con->response.content_length += end - start + 1;
- }
- }
-
- if (ctx->buf) {
- free (ctx->buf);
- ctx->buf = NULL;
- }
-
- /* something went wrong */
- if (error) return -1;
-
- if (multipart) {
- /* add boundary end */
- buffer *b;
-
- b = chunkqueue_get_append_buffer(con->write_queue);
-
- buffer_copy_string_len(b, "\r\n--", 4);
- buffer_append_string(b, boundary);
- buffer_append_string_len(b, "--\r\n", 4);
-
- con->response.content_length += b->used - 1;
-
- /* set header-fields */
-
- buffer_copy_string(p->range_buf, "multipart/byteranges; boundary=");
- buffer_append_string(p->range_buf, boundary);
-
- /* overwrite content-type */
- response_header_overwrite(srv, con, CONST_STR_LEN("Content-Type"),
- CONST_BUF_LEN(p->range_buf));
- } else {
- /* add Content-Range-header */
-
- buffer_copy_string(p->range_buf, "bytes ");
- buffer_append_off_t(p->range_buf, start);
- buffer_append_string(p->range_buf, "-");
- buffer_append_off_t(p->range_buf, end);
- buffer_append_string(p->range_buf, "/");
- buffer_append_off_t(p->range_buf, sce->st.st_size);
-
- response_header_insert(srv, con, CONST_STR_LEN("Content-Range"),
- CONST_BUF_LEN(p->range_buf));
- }
-
- /* ok, the file is set-up */
- return 0;
-}
-
-PHYSICALPATH_FUNC(mod_glusterfs_handle_physical) {
- plugin_data *p = p_d;
- stat_cache_entry *sce;
- mod_glusterfs_ctx_t *plugin_ctx = NULL;
- size_t size = 0;
- int ret = 0;
-
- if (con->http_status != 0) return HANDLER_GO_ON;
- if (con->uri.path->used == 0) return HANDLER_GO_ON;
- if (con->physical.path->used == 0) return HANDLER_GO_ON;
-
- if (con->mode != DIRECT) return HANDLER_GO_ON;
-
- /*
- network_backend_write = srv->network_backend_write;
- srv->network_backend_write = mod_glusterfs_network_backend_write;
- */
-
- switch (con->request.http_method) {
- case HTTP_METHOD_GET:
- case HTTP_METHOD_POST:
- case HTTP_METHOD_HEAD:
- break;
-
- default:
- return HANDLER_GO_ON;
- }
-
- mod_glusterfs_patch_connection(srv, con, p);
- if (!p->conf.prefix || p->conf.prefix->used == 0) {
- return HANDLER_GO_ON;
- }
-
- if (!p->conf.document_root || p->conf.document_root->used == 0) {
- log_error_write(srv, __FILE__, __LINE__, "s",
- "glusterfs.document-root is not specified");
- con->http_status = 500;
- return HANDLER_FINISHED;
- }
-
- if (!p->conf.mounted) {
- glusterfs_init_params_t ctx;
-
- if (!p->conf.specfile || p->conf.specfile->used == 0) {
- return HANDLER_GO_ON;
- }
- memset (&ctx, 0, sizeof (ctx));
-
- ctx.specfile = p->conf.specfile->ptr;
- ctx.logfile = p->conf.logfile->ptr;
- ctx.loglevel = p->conf.loglevel->ptr;
- ctx.lookup_timeout = ctx.stat_timeout = p->conf.cache_timeout;
-
- ret = glusterfs_mount (p->conf.prefix->ptr, &ctx);
- if (ret != 0) {
- con->http_status = 500;
- log_error_write(srv, __FILE__, __LINE__, "sbs",
- "glusterfs initialization failed, "
- "please check your configuration. "
- "Glusterfs logfile ",
- p->conf.logfile,
- "might contain details");
- return HANDLER_FINISHED;
- }
- p->conf.mounted = 1;
- }
-
- size = 0;
- if (p->conf.xattr_file_size && p->conf.xattr_file_size->ptr)
- size = atoi (p->conf.xattr_file_size->ptr);
-
- if (!con->plugin_ctx[p->id]) {
-/* FIXME: what if multiple files are requested from a single connection? */
-/* TODO: check whether this works fine for HTTP protocol 1.1 */
-
- buffer *tmp_buf = buffer_init_buffer (con->physical.basedir);
-
- plugin_ctx = calloc (1, sizeof (*plugin_ctx));
- /* ERR_ABORT (plugin_ctx); */
- con->plugin_ctx[p->id] = plugin_ctx;
-
- buffer_append_string_buffer (tmp_buf, p->conf.prefix);
- buffer_path_simplify (tmp_buf, tmp_buf);
-
- plugin_ctx->prefix = tmp_buf->used - 1;
- if (tmp_buf->ptr[plugin_ctx->prefix - 1] == '/')
- plugin_ctx->prefix--;
-
- buffer_free (tmp_buf);
- } else
- /*FIXME: error!! error!! */
- plugin_ctx = con->plugin_ctx[p->id];
-
-
- if (size)
- {
- plugin_ctx->buf = malloc (size);
- /* ERR_ABORT (plugin_ctx->buf); */
- }
-
- plugin_ctx->glusterfs_path = buffer_init ();
- buffer_copy_string_buffer (plugin_ctx->glusterfs_path,
- p->conf.prefix);
- buffer_append_string (plugin_ctx->glusterfs_path,
- p->conf.document_root->ptr);
- buffer_append_string (plugin_ctx->glusterfs_path, "/");
- buffer_append_string (plugin_ctx->glusterfs_path,
- con->physical.path->ptr + plugin_ctx->prefix);
- buffer_path_simplify (plugin_ctx->glusterfs_path,
- plugin_ctx->glusterfs_path);
-
- if (glusterfs_stat_cache_get_entry (srv, con,
- plugin_ctx->glusterfs_path,
- con->physical.path, plugin_ctx->buf,
- size, &sce) == HANDLER_ERROR) {
- if (errno == ENOENT)
- con->http_status = 404;
- else
- con->http_status = 403;
-
- free (plugin_ctx->buf);
- buffer_free (plugin_ctx->glusterfs_path);
- plugin_ctx->glusterfs_path = NULL;
- plugin_ctx->buf = NULL;
-
- free (plugin_ctx);
- con->plugin_ctx[p->id] = NULL;
-
- return HANDLER_FINISHED;
- }
-
- if (!(S_ISREG (sce->st.st_mode) && (size_t)sce->st.st_size < size)) {
- free (plugin_ctx->buf);
- plugin_ctx->buf = NULL;
- }
-
- return HANDLER_GO_ON;
-}
-
-static int http_chunk_append_len(server *srv, connection *con, size_t len) {
- size_t i, olen = len, j;
- buffer *b;
-
- b = srv->tmp_chunk_len;
-
- if (len == 0) {
- buffer_copy_string(b, "0");
- } else {
- for (i = 0; i < 8 && len; i++) {
- len >>= 4;
- }
-
- /* i is the number of hex digits we have */
- buffer_prepare_copy(b, i + 1);
-
- for (j = i-1, len = olen; j+1 > 0; j--) {
- b->ptr[j] = (len & 0xf) + (((len & 0xf) <= 9) ?
- '0' : 'a' - 10);
- len >>= 4;
- }
- b->used = i;
- b->ptr[b->used++] = '\0';
- }
-
- buffer_append_string(b, "\r\n");
- chunkqueue_append_buffer(con->write_queue, b);
-
- return 0;
-}
-
-int http_chunk_append_glusterfs_file_chunk(server *srv, connection *con,
- glusterfs_file_t fd, off_t offset,
- off_t len, size_t buf_size) {
- chunkqueue *cq;
-
- if (!con) return -1;
-
- cq = con->write_queue;
-
- if (con->response.transfer_encoding & HTTP_TRANSFER_ENCODING_CHUNKED) {
- http_chunk_append_len(srv, con, len);
- }
-
- chunkqueue_append_glusterfs_file (con, fd, offset, len, buf_size);
-
- if ((con->response.transfer_encoding & HTTP_TRANSFER_ENCODING_CHUNKED)
- && (len > 0)) {
- chunkqueue_append_mem(cq, "\r\n", 2 + 1);
- }
-
- return 0;
-}
-
-int http_chunk_append_glusterfs_mem(server *srv, connection *con,
- char * mem, size_t len,
- size_t buf_size)
-{
- chunkqueue *cq = NULL;
- buffer *buf = NULL;
-
- if (!con) return -1;
-
- cq = con->write_queue;
-
- if (len == 0) {
- free (mem);
- if (con->response.transfer_encoding
- & HTTP_TRANSFER_ENCODING_CHUNKED) {
- chunkqueue_append_mem(cq, "0\r\n\r\n", 5 + 1);
- } else {
- chunkqueue_append_mem(cq, "", 1);
- }
- return 0;
- }
-
- if (con->response.transfer_encoding & HTTP_TRANSFER_ENCODING_CHUNKED) {
- http_chunk_append_len(srv, con, len - 1);
- }
-
- buf = buffer_init ();
-
- buf->used = len + 1;
- buf->size = buf_size;
- buf->ptr = (char *)mem;
- chunkqueue_append_buffer_weak (cq, buf);
-
- if (con->response.transfer_encoding & HTTP_TRANSFER_ENCODING_CHUNKED) {
- chunkqueue_append_mem(cq, "\r\n", 2 + 1);
- }
-
- return 0;
-}
-
-
-
-URIHANDLER_FUNC(mod_glusterfs_subrequest) {
- plugin_data *p = p_d;
- stat_cache_entry *sce = NULL;
- int s_len;
- char allow_caching = 1;
- size_t size = 0;
- mod_glusterfs_ctx_t *ctx = con->plugin_ctx[p->id];
-
- /* someone else has done a decision for us */
- if (con->http_status != 0) return HANDLER_GO_ON;
- if (con->uri.path->used == 0) return HANDLER_GO_ON;
- if (con->physical.path->used == 0) return HANDLER_GO_ON;
-
- /* someone else has handled this request */
- if (con->mode != DIRECT) return HANDLER_GO_ON;
-
- /* we only handle GET, POST and HEAD */
- switch(con->request.http_method) {
- case HTTP_METHOD_GET:
- case HTTP_METHOD_POST:
- case HTTP_METHOD_HEAD:
- break;
- default:
- return HANDLER_GO_ON;
- }
-
- mod_glusterfs_patch_connection(srv, con, p);
-
- if (!p->conf.prefix || !p->conf.prefix->used)
- return HANDLER_GO_ON;
-
- s_len = con->uri.path->used - 1;
- /* ignore certain extensions */
- /*
- for (k = 0; k < p->conf.exclude_exts->used; k++) {
- data_string *ds;
- ds = (data_string *)p->conf.exclude_exts->data[k];
-
- if (ds->value->used == 0) continue;
-
- if (!strncmp (ds->value->ptr, con->uri.path->ptr,
- strlen (ds->value->ptr)))
- break;
- }
-
- if (k == p->conf.exclude_exts->used) {
- return HANDLER_GO_ON;
- }
- */
-
- if (con->conf.log_request_handling) {
- log_error_write(srv, __FILE__, __LINE__, "s",
- "-- serving file from glusterfs");
- }
-
- if (HANDLER_ERROR == stat_cache_get_entry(srv, con, con->physical.path,
- &sce)) {
- con->http_status = 403;
-
- log_error_write(srv, __FILE__, __LINE__, "sbsb",
- "not a regular file:", con->uri.path,
- "->", con->physical.path);
-
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
-
- return HANDLER_FINISHED;
- }
-
- if (con->uri.path->ptr[s_len] == '/' || !S_ISREG(sce->st.st_mode)) {
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
- return HANDLER_FINISHED;
- }
-
- if (p->conf.xattr_file_size && p->conf.xattr_file_size->ptr)
- size = atoi (p->conf.xattr_file_size->ptr);
-
- if ((size_t)sce->st.st_size > size) {
- ctx->fd = glusterfs_open (ctx->glusterfs_path->ptr, O_RDONLY,
- 0);
-
- if (((long)ctx->fd) == 0) {
- con->http_status = 403;
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
- return HANDLER_FINISHED;
- }
- }
-
- buffer_free (ctx->glusterfs_path);
- ctx->glusterfs_path = NULL;
-
- /* we only handline regular files */
-#ifdef HAVE_LSTAT
- if ((sce->is_symlink == 1) && !con->conf.follow_symlink) {
- con->http_status = 403;
-
- if (con->conf.log_request_handling) {
- log_error_write(srv, __FILE__, __LINE__, "s",
- "-- access denied due symlink "
- "restriction");
- log_error_write(srv, __FILE__, __LINE__, "sb",
- "Path :", con->physical.path);
- }
-
- buffer_reset(con->physical.path);
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
- return HANDLER_FINISHED;
- }
-#endif
- if (!S_ISREG(sce->st.st_mode)) {
- con->http_status = 404;
-
- if (con->conf.log_file_not_found) {
- log_error_write(srv, __FILE__, __LINE__, "sbsb",
- "not a regular file:", con->uri.path,
- "->", sce->name);
- }
-
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
-
- return HANDLER_FINISHED;
- }
-
- /* mod_compress might set several data directly, don't overwrite them */
-
- /* set response content-type, if not set already */
-
- if (NULL == array_get_element(con->response.headers, "Content-Type")) {
- if (buffer_is_empty(sce->content_type)) {
- /* we are setting application/octet-stream, but also "
- * announce that this header field might change in "
- * the seconds few requests. This should fix the
- * aggressive caching of FF and the script download
- * seen by the first installations
- */
- response_header_overwrite(srv, con,
- CONST_STR_LEN("Content-Type"),
- CONST_STR_LEN("application/"
- "octet-stream"));
-
- allow_caching = 0;
- } else {
- response_header_overwrite(srv, con,
- CONST_STR_LEN("Content-Type"),
- CONST_BUF_LEN(sce->content_type));
- }
- }
-
- if (con->conf.range_requests) {
- response_header_overwrite(srv, con,
- CONST_STR_LEN("Accept-Ranges"),
- CONST_STR_LEN("bytes"));
- }
-
- /* TODO: Allow Cachable requests */
-#if 0
- if (allow_caching) {
- if (p->conf.etags_used && con->etag_flags != 0
- && !buffer_is_empty(sce->etag)) {
- if (NULL == array_get_element(con->response.headers,
- "ETag")) {
- /* generate e-tag */
- etag_mutate(con->physical.etag, sce->etag);
-
- response_header_overwrite(srv, con,
- CONST_STR_LEN("ETag"),
- CONST_BUF_LEN(con->physical.etag));
- }
- }
-
- /* prepare header */
- if (NULL == (ds = (data_string *)array_get_element(con->response.headers,
- "Last-Modified"))) {
- mtime = strftime_cache_get(srv, sce->st.st_mtime);
- response_header_overwrite(srv, con, CONST_STR_LEN("Last-Modified"),
- CONST_BUF_LEN(mtime));
- } else {
- mtime = ds->value;
- }
-
- if (HANDLER_FINISHED == http_response_handle_cachable(srv, con,
- mtime)) {
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
- return HANDLER_FINISHED;
- }
- }
-#endif
-
- /*TODO: Read about etags */
- if (con->request.http_range && con->conf.range_requests) {
- int do_range_request = 1;
- data_string *ds = NULL;
- buffer *mtime = NULL;
- /* check if we have a conditional GET */
-
- /* prepare header */
- if (NULL == (ds = (data_string *)array_get_element(con->response.headers,
- "Last-Modified"))) {
- mtime = strftime_cache_get(srv, sce->st.st_mtime);
- response_header_overwrite(srv, con, CONST_STR_LEN("Last-Modified"),
- CONST_BUF_LEN(mtime));
- } else {
- mtime = ds->value;
- }
-
- if (NULL != (ds = (data_string *)array_get_element(con->request.headers,
- "If-Range"))) {
- /* if the value is the same as our ETag, we do a Range-request,
- * otherwise a full 200 */
-
- if (ds->value->ptr[0] == '"') {
- /**
- * client wants a ETag
- */
- if (!con->physical.etag) {
- do_range_request = 0;
- } else if (!buffer_is_equal(ds->value,
- con->physical.etag)) {
- do_range_request = 0;
- }
- } else if (!mtime) {
- /**
- * we don't have a Last-Modified and can match
- * the If-Range:
- *
- * sending all
- */
- do_range_request = 0;
- } else if (!buffer_is_equal(ds->value, mtime)) {
- do_range_request = 0;
- }
- }
-
- if (do_range_request) {
- /* content prepared, I'm done */
- con->file_finished = 1;
-
- if (0 == http_response_parse_range(srv, con, p)) {
- con->http_status = 206;
- }
-
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
- return HANDLER_FINISHED;
- }
- }
-
- /* if we are still here, prepare body */
-
- /* we add it here for all requests
- * the HEAD request will drop it afterwards again
- */
- /*TODO check whether 1 should be subtracted */
-
- if (p->conf.xattr_file_size && p->conf.xattr_file_size->ptr)
- size = atoi (p->conf.xattr_file_size->ptr);
-
- if (size <= (size_t)sce->st.st_size) {
- http_chunk_append_glusterfs_file_chunk (srv, con, ctx->fd, 0,
- sce->st.st_size, size);
- } else {
- http_chunk_append_glusterfs_mem (srv, con, ctx->buf,
- sce->st.st_size, size);
- }
-
- con->http_status = 200;
- con->file_finished = 1;
-
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
-
- return HANDLER_FINISHED;
-}
-
-#if 0
-URIHANDLER_FUNC(mod_glusterfs_request_done)
-{
- mod_glusterfs_iobuf_t *cur = first, *prev;
- while (cur) {
- prev = cur;
- glusterfs_free (cur->buf);
- cur = cur->next;
- free (prev);
- }
- first = NULL
- }
-#endif
-
-/* this function is called at dlopen() time and inits the callbacks */
-CONNECTION_FUNC(mod_glusterfs_connection_reset)
-{
- (void) p_d;
- (void) con;
- if (!network_backend_write)
- network_backend_write = srv->network_backend_write;
-
- srv->network_backend_write = mod_glusterfs_network_backend_write;
-
- return HANDLER_GO_ON;
-}
-
-int mod_glusterfs_plugin_init(plugin *p) {
- p->version = LIGHTTPD_VERSION_ID;
- p->name = buffer_init_string("glusterfs");
- p->init = mod_glusterfs_init;
- p->handle_physical = mod_glusterfs_handle_physical;
- p->handle_subrequest_start = mod_glusterfs_subrequest;
- // p->handle_request_done = mod_glusterfs_request_done;
- p->set_defaults = mod_glusterfs_set_defaults;
- p->connection_reset = mod_glusterfs_connection_reset;
- p->cleanup = mod_glusterfs_free;
-
- p->data = NULL;
-
- return 0;
-}
-
-
-/* mod_glusterfs_stat_cache */
-static stat_cache_entry * stat_cache_entry_init(void) {
- stat_cache_entry *sce = NULL;
-
- sce = calloc(1, sizeof(*sce));
- /* ERR_ABORT (sce); */
-
- sce->name = buffer_init();
- sce->etag = buffer_init();
- sce->content_type = buffer_init();
-
- return sce;
-}
-
-#ifdef HAVE_FAM_H
-static fam_dir_entry * fam_dir_entry_init(void) {
- fam_dir_entry *fam_dir = NULL;
-
- fam_dir = calloc(1, sizeof(*fam_dir));
- /* ERR_ABORT (fam_dir); */
-
- fam_dir->name = buffer_init();
-
- return fam_dir;
-}
-
-static void fam_dir_entry_free(void *data) {
- fam_dir_entry *fam_dir = data;
-
- if (!fam_dir) return;
-
- FAMCancelMonitor(fam_dir->fc, fam_dir->req);
-
- buffer_free(fam_dir->name);
- free(fam_dir->req);
-
- free(fam_dir);
-}
-#endif
-
-#ifdef HAVE_XATTR
-static int stat_cache_attr_get(buffer *buf, char *name) {
- int attrlen;
- int ret;
-
- attrlen = 1024;
- buffer_prepare_copy(buf, attrlen);
- attrlen--;
- if(0 == (ret = attr_get(name, "Content-Type", buf->ptr, &attrlen, 0))) {
- buf->used = attrlen + 1;
- buf->ptr[attrlen] = '\0';
- }
- return ret;
-}
-#endif
-
-/* the famous DJB hash function for strings */
-static uint32_t hashme(buffer *str) {
- uint32_t hash = 5381;
- const char *s;
- for (s = str->ptr; *s; s++) {
- hash = ((hash << 5) + hash) + *s;
- }
-
- hash &= ~(1 << 31); /* strip the highest bit */
-
- return hash;
-}
-
-
-#ifdef HAVE_LSTAT
-static int stat_cache_lstat(server *srv, buffer *dname, struct stat *lst) {
- if (lstat(dname->ptr, lst) == 0) {
- return S_ISLNK(lst->st_mode) ? 0 : 1;
- }
- else {
- log_error_write(srv, __FILE__, __LINE__, "sbs",
- "lstat failed for:",
- dname, strerror(errno));
- };
- return -1;
-}
-#endif
-
-/***
- *
- *
- *
- * returns:
- * - HANDLER_FINISHED on cache-miss (don't forget to reopen the file)
- * - HANDLER_ERROR on stat() failed -> see errno for problem
- */
-
-handler_t glusterfs_stat_cache_get_entry(server *srv,
- connection *con,
- buffer *glusterfs_path,
- buffer *name,
- void *buf,
- size_t size,
- stat_cache_entry **ret_sce)
-{
-#ifdef HAVE_FAM_H
- fam_dir_entry *fam_dir = NULL;
- int dir_ndx = -1;
- splay_tree *dir_node = NULL;
-#endif
- stat_cache_entry *sce = NULL;
- stat_cache *sc;
- struct stat st;
- size_t k;
-#ifdef DEBUG_STAT_CACHE
- size_t i;
-#endif
- int file_ndx;
- splay_tree *file_node = NULL;
-
- *ret_sce = NULL;
- memset (&st, 0, sizeof (st));
-
- /*
- * check if the directory for this file has changed
- */
-
- sc = srv->stat_cache;
-
- buffer_copy_string_buffer(sc->hash_key, name);
- buffer_append_long(sc->hash_key, con->conf.follow_symlink);
-
- file_ndx = hashme(sc->hash_key);
- sc->files = splaytree_splay(sc->files, file_ndx);
-
-#ifdef DEBUG_STAT_CACHE
- for (i = 0; i < ctrl.used; i++) {
- if (ctrl.ptr[i] == file_ndx) break;
- }
-#endif
-
- if (sc->files && (sc->files->key == file_ndx)) {
-#ifdef DEBUG_STAT_CACHE
- /* it was in the cache */
- assert(i < ctrl.used);
-#endif
-
- /* we have seen this file already and
- * don't stat() it again in the same second */
-
- file_node = sc->files;
-
- sce = file_node->data;
-
- /* check if the name is the same, we might have a collision */
-
- if (buffer_is_equal(name, sce->name)) {
- if (srv->srvconf.stat_cache_engine
- == STAT_CACHE_ENGINE_SIMPLE) {
- if (sce->stat_ts == srv->cur_ts && !buf) {
- *ret_sce = sce;
- return HANDLER_GO_ON;
- }
- }
- } else {
- /* oops, a collision,
- *
- * file_node is used by the FAM check below to see if
- * we know this file and if we can save a stat().
- *
- * BUT, the sce is not reset here as the entry into
- * the cache is ok, we it is just not pointing to
- * our requested file.
- */
-
- file_node = NULL;
- }
- } else {
-#ifdef DEBUG_STAT_CACHE
- if (i != ctrl.used) {
- fprintf(stderr, "%s.%d: %08x was already inserted "
- "but not found in cache, %s\n",
- __FILE__, __LINE__, file_ndx, name->ptr);
- }
- assert(i == ctrl.used);
-#endif
- }
- /*
- * *lol*
- * - open() + fstat() on a named-pipe results in a (intended) hang.
- * - stat() if regular file + open() to see if we can read from it is
- * better
- *
- * */
- if (-1 == glusterfs_get (glusterfs_path->ptr, buf, size, &st)) {
- return HANDLER_ERROR;
- }
-
- if (NULL == sce) {
- int osize = 0;
-
- if (sc->files) {
- osize = sc->files->size;
- }
-
- sce = stat_cache_entry_init();
- buffer_copy_string_buffer(sce->name, name);
-
- sc->files = splaytree_insert(sc->files, file_ndx, sce);
-#ifdef DEBUG_STAT_CACHE
- if (ctrl.size == 0) {
- ctrl.size = 16;
- ctrl.used = 0;
- ctrl.ptr = malloc(ctrl.size * sizeof(*ctrl.ptr));
- /* ERR_ABORT (ctrl.ptr); */
- } else if (ctrl.size == ctrl.used) {
- ctrl.size += 16;
- ctrl.ptr = realloc(ctrl.ptr,
- ctrl.size * sizeof(*ctrl.ptr));
- /* ERR_ABORT (ctrl.ptr); */
- }
-
- ctrl.ptr[ctrl.used++] = file_ndx;
-
- assert(sc->files);
- assert(sc->files->data == sce);
- assert(osize + 1 == splaytree_size(sc->files));
-#endif
- }
-
- sce->st = st;
- sce->stat_ts = srv->cur_ts;
-
- /* catch the obvious symlinks
- *
- * this is not a secure check as we still have a race-condition between
- * the stat() and the open. We can only solve this by
- * 1. open() the file
- * 2. fstat() the fd
- *
- * and keeping the file open for the rest of the time. But this can
- * only be done at network level.
- *
- * per default it is not a symlink
- * */
-#ifdef HAVE_LSTAT
- sce->is_symlink = 0;
-
- /* we want to only check for symlinks if we should block symlinks.
- */
- if (!con->conf.follow_symlink) {
- if (stat_cache_lstat(srv, name, &lst) == 0) {
-#ifdef DEBUG_STAT_CACHE
- log_error_write(srv, __FILE__, __LINE__, "sb",
- "found symlink", name);
-#endif
- sce->is_symlink = 1;
- }
-
- /*
- * we assume "/" can not be symlink, so
- * skip the symlink stuff if our path is /
- **/
- else if ((name->used > 2)) {
- buffer *dname;
- char *s_cur;
-
- dname = buffer_init();
- buffer_copy_string_buffer(dname, name);
-
- while ((s_cur = strrchr(dname->ptr,'/'))) {
- *s_cur = '\0';
- dname->used = s_cur - dname->ptr + 1;
- if (dname->ptr == s_cur) {
-#ifdef DEBUG_STAT_CACHE
- log_error_write(srv, __FILE__, __LINE__,
- "s", "reached /");
-#endif
- break;
- }
-#ifdef DEBUG_STAT_CACHE
- log_error_write(srv, __FILE__, __LINE__, "sbs",
- "checking if", dname, "is a "
- "symlink");
-#endif
- if (stat_cache_lstat(srv, dname, &lst) == 0) {
- sce->is_symlink = 1;
-#ifdef DEBUG_STAT_CACHE
- log_error_write(srv, __FILE__, __LINE__,
- "sb",
- "found symlink", dname);
-#endif
- break;
- };
- };
- buffer_free(dname);
- };
- };
-#endif
-
- if (S_ISREG(st.st_mode)) {
- /* determine mimetype */
- buffer_reset(sce->content_type);
-
- for (k = 0; k < con->conf.mimetypes->used; k++) {
- data_string *ds = NULL;
- buffer *type = NULL;
-
- ds = (data_string *)con->conf.mimetypes->data[k];
- type = ds->key;
- if (type->used == 0) continue;
-
- /* check if the right side is the same */
- if (type->used > name->used) continue;
-
- if (0 == strncasecmp(name->ptr + name->used - type->used,
- type->ptr, type->used - 1)) {
- buffer_copy_string_buffer(sce->content_type,
- ds->value);
- break;
- }
- }
- etag_create(sce->etag, &(sce->st), con->etag_flags);
-#ifdef HAVE_XATTR
- if (con->conf.use_xattr && buffer_is_empty(sce->content_type)) {
- stat_cache_attr_get(sce->content_type, name->ptr);
- }
-#endif
- } else if (S_ISDIR(st.st_mode)) {
- etag_create(sce->etag, &(sce->st), con->etag_flags);
- }
-
-#ifdef HAVE_FAM_H
- if (sc->fam &&
- (srv->srvconf.stat_cache_engine == STAT_CACHE_ENGINE_FAM)) {
- /* is this directory already registered ? */
- if (!dir_node) {
- fam_dir = fam_dir_entry_init();
- fam_dir->fc = sc->fam;
-
- buffer_copy_string_buffer(fam_dir->name, sc->dir_name);
-
- fam_dir->version = 1;
-
- fam_dir->req = calloc(1, sizeof(FAMRequest));
- /* ERR_ABORT (fam_dir->req); */
-
- if (0 != FAMMonitorDirectory(sc->fam, fam_dir->name->ptr,
- fam_dir->req, fam_dir)) {
-
- log_error_write(srv, __FILE__, __LINE__, "sbsbs",
- "monitoring dir failed:",
- fam_dir->name,
- "file:", name,
- FamErrlist[FAMErrno]);
-
- fam_dir_entry_free(fam_dir);
- } else {
- int osize = 0;
-
- if (sc->dirs) {
- osize = sc->dirs->size;
- }
-
- sc->dirs = splaytree_insert(sc->dirs, dir_ndx,
- fam_dir);
- assert(sc->dirs);
- assert(sc->dirs->data == fam_dir);
- assert(osize == (sc->dirs->size - 1));
- }
- } else {
- fam_dir = dir_node->data;
- }
-
- /* bind the fam_fc to the stat() cache entry */
-
- if (fam_dir) {
- sce->dir_version = fam_dir->version;
- sce->dir_ndx = dir_ndx;
- }
- }
-#endif
-
- *ret_sce = sce;
-
- return HANDLER_GO_ON;
-}
-
-/**
- * remove stat() from cache which havn't been stat()ed for
- * more than 10 seconds
- *
- *
- * walk though the stat-cache, collect the ids which are too old
- * and remove them in a second loop
- */
-
-static int stat_cache_tag_old_entries(server *srv, splay_tree *t, int *keys,
- size_t *ndx) {
- stat_cache_entry *sce;
-
- if (!t) return 0;
-
- stat_cache_tag_old_entries(srv, t->left, keys, ndx);
- stat_cache_tag_old_entries(srv, t->right, keys, ndx);
-
- sce = t->data;
-
- if (srv->cur_ts - sce->stat_ts > 2) {
- keys[(*ndx)++] = t->key;
- }
-
- return 0;
-}
diff --git a/mod_glusterfs/lighttpd/1.4/mod_glusterfs.h b/mod_glusterfs/lighttpd/1.4/mod_glusterfs.h
deleted file mode 100644
index 9d73d6999ed..00000000000
--- a/mod_glusterfs/lighttpd/1.4/mod_glusterfs.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _MOD_GLUSTERFS_FILE_CACHE_H_
-#define _MOD_GLUSTERFS_FILE_CACHE_H_
-
-#include "stat_cache.h"
-#include <libglusterfsclient.h>
-#include "base.h"
-
-handler_t glusterfs_stat_cache_get_entry(server *srv, connection *con,
- buffer *glusterfs_path, buffer *name,
- void *buf, size_t size,
- stat_cache_entry **fce);
-
-#endif
diff --git a/mod_glusterfs/lighttpd/1.5/Makefile.am b/mod_glusterfs/lighttpd/1.5/Makefile.am
deleted file mode 100644
index eda329111dc..00000000000
--- a/mod_glusterfs/lighttpd/1.5/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-EXTRA_DIST = Makefile.am.diff mod_glusterfs.c mod_glusterfs.h README.txt
-
-CLEANFILES =
diff --git a/mod_glusterfs/lighttpd/1.5/Makefile.am.diff b/mod_glusterfs/lighttpd/1.5/Makefile.am.diff
deleted file mode 100644
index 375696b5d8f..00000000000
--- a/mod_glusterfs/lighttpd/1.5/Makefile.am.diff
+++ /dev/null
@@ -1,29 +0,0 @@
---- lighttpd-1.4.19/src/Makefile.am 2008-04-16 18:42:18.000000000 +0400
-+++ lighttpd-1.4.19.mod/src/Makefile.am 2008-04-16 18:41:11.000000000 +0400
-@@ -1,4 +1,4 @@
--AM_CFLAGS = $(FAM_CFLAGS)
-+AM_CFLAGS = $(FAM_CFLAGS) -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64
-
- noinst_PROGRAMS=proc_open lemon # simple-fcgi #graphic evalo bench ajp ssl error_test adserver gen-license
- sbin_PROGRAMS=lighttpd lighttpd-angel
-@@ -241,6 +241,11 @@
- mod_accesslog_la_LDFLAGS = -module -export-dynamic -avoid-version -no-undefined
- mod_accesslog_la_LIBADD = $(common_libadd)
-
-+lib_LTLIBRARIES += mod_glusterfs.la
-+mod_glusterfs_la_SOURCES = mod_glusterfs.c
-+mod_glusterfs_la_CFLAGS = $(AM_CFLAGS)
-+mod_glusterfs_la_LDFLAGS = -module -export-dynamic -avoid-version -no-undefined -lglusterfsclient -lpthread
-+mod_glusterfs_la_LIBADD = $(common_libadd)
-
- hdr = server.h buffer.h network.h log.h keyvalue.h \
- response.h request.h fastcgi.h chunk.h \
-@@ -254,7 +259,7 @@
- configparser.h mod_ssi_exprparser.h \
- sys-mmap.h sys-socket.h mod_cml.h mod_cml_funcs.h \
- splaytree.h proc_open.h status_counter.h \
-- mod_magnet_cache.h
-+ mod_magnet_cache.h mod_glusterfs.h
-
- DEFS= @DEFS@ -DLIBRARY_DIR="\"$(libdir)\"" -DSBIN_DIR="\"$(sbindir)\""
-
diff --git a/mod_glusterfs/lighttpd/1.5/README.txt b/mod_glusterfs/lighttpd/1.5/README.txt
deleted file mode 100644
index bdbdfffbc50..00000000000
--- a/mod_glusterfs/lighttpd/1.5/README.txt
+++ /dev/null
@@ -1,57 +0,0 @@
-Introduction
-============
-mod_glusterfs is a module written for lighttpd to speed up the access of files present on glusterfs. mod_glusterfs uses libglusterfsclient library provided for glusterfs and hence can be used without fuse (File System in User Space).
-
-Usage
-=====
-To use mod_glusterfs with lighttpd-1.5, copy mod_glusterfs.c and mod_glusterfs.h into src/ of lighttpd-1.5 source tree, and apply the Makefile.am.diff to src/Makefile.am. Re-run ./autogen.sh on the top level of the lighttpd-1.5 build tree and recompile.
-
-# cp mod_glusterfs.[ch] /home/glusterfs/lighttpd-1.5/src/
-# cp Makefile.am.diff /home/glusterfs/lighttpd-1.5/
-# cd /home/glusterfs/lighttpd-1.5
-# patch -p1 < Makefile.am.diff
-# ./autogen.sh
-# ./configure
-# make
-# make install
-
-Configuration
-=============
-* mod_glusterfs should be listed at the begining of the list server.modules in lighttpd.conf.
-
-Below is a snippet from lighttpd.conf concerning to mod_glusterfs.
-
-$HTTP["url"] =~ "^/glusterfs" {
- glusterfs.prefix = "/glusterfs"
- glusterfs.logfile = "/var/log/glusterfs-logfile"
- glusterfs.document-root = "/home/glusterfs/document-root"
- glusterfs.volume-specfile = "/etc/glusterfs/glusterfs.vol"
- glusterfs.loglevel = "error"
- glusterfs.cache-timeout = 300
- glusterfs.xattr-interface-size-limit = "65536"
-}
-
-* $HTTP["url"] =~ "^/glusterfs"
- A perl style regular expression used to match against the url. If regular expression matches the url, the url is handled by mod_glusterfs. Note that the pattern given here should match glusterfs.prefix.
-
-* glusterfs.prefix (COMPULSORY)
- A string to be present at the starting of the file path in the url so that the file would be handled by glusterfs.
- Eg., A GET request on the url http://www.example.com/glusterfs-prefix/some-dir/example-file will result in fetching of the file "/some-dir/example-file" from glusterfs mount if glusterfs.prefix is set to "/glusterfs-prefix".
-
-* glusterfs.volume-specfile (COMPULSORY)
- Path to the the glusterfs volume specification file.
-
-* glusterfs.logfile (COMPULSORY)
- Path to the glusterfs logfile.
-
-* glusterfs.loglevel (OPTIONAL, default = warning)
- Allowed values are critical, error, warning, debug, none in the decreasing order of severity of error conditions.
-
-* glusterfs.cache-timeout (OPTIONAL, default = 0)
- Timeout values for glusterfs stat and lookup cache.
-
-* glusterfs.document-root (COMPULSORY)
- An absolute path, relative to which all the files are fetched from glusterfs.
-
-* glusterfs.xattr-interface-size-limit (OPTIONAL, default = 0)
- Files with sizes upto and including this value are fetched through the extended attribute interface of glusterfs rather than the usual open-read-close set of operations. For files of small sizes, it is recommended to use extended attribute interface.
diff --git a/mod_glusterfs/lighttpd/1.5/mod_glusterfs.c b/mod_glusterfs/lighttpd/1.5/mod_glusterfs.c
deleted file mode 100644
index 67f7a7eacf5..00000000000
--- a/mod_glusterfs/lighttpd/1.5/mod_glusterfs.c
+++ /dev/null
@@ -1,1476 +0,0 @@
-/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include <ctype.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <pthread.h>
-#include <sys/types.h>
-#include <fcntl.h>
-
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include <errno.h>
-#include <unistd.h>
-#include <assert.h>
-
-#include "base.h"
-#include "log.h"
-#include "buffer.h"
-
-#include "plugin.h"
-
-#include "stat_cache.h"
-#include "mod_glusterfs.h"
-#include "etag.h"
-#include "response.h"
-
-#include "fdevent.h"
-#include "joblist.h"
-#include "http_req_range.h"
-#include "connections.h"
-#include "configfile.h"
-
-#include <libglusterfsclient.h>
-
-#ifdef HAVE_ATTR_ATTRIBUTES_H
-#include <attr/attributes.h>
-#endif
-
-#ifdef HAVE_FAM_H
-# include <fam.h>
-#endif
-
-#include "sys-mmap.h"
-
-/* NetBSD 1.3.x needs it */
-#ifndef MAP_FAILED
-# define MAP_FAILED -1
-#endif
-
-#ifndef O_LARGEFILE
-# define O_LARGEFILE 0
-#endif
-
-#ifndef HAVE_LSTAT
-#define lstat stat
-#endif
-
-#if 0
-/* enables debug code for testing if all nodes in the stat-cache as accessable */
-#define DEBUG_STAT_CACHE
-#endif
-
-#ifdef HAVE_LSTAT
-#undef HAVE_LSTAT
-#endif
-
-#define GLUSTERFS_FILE_CHUNK (FILE_CHUNK + 1)
-
-/* Keep this value large. Each glusterfs_async_read of GLUSTERFS_CHUNK_SIZE results in a network_backend_write of the read data*/
-
-#define GLUSTERFS_CHUNK_SIZE 8192
-
-/**
- * this is a staticfile for a lighttpd plugin
- *
- */
-
-
-/* plugin config for all request/connections */
-
-typedef struct {
- buffer *logfile;
- buffer *loglevel;
- buffer *specfile;
- buffer *prefix;
- buffer *xattr_file_size;
- buffer *document_root;
- array *exclude_exts;
- unsigned short cache_timeout;
-
- /* FIXME: its a pointer, hence cant be short */
- unsigned long handle;
-} plugin_config;
-
-static network_status_t (*network_backend_write)(struct server *srv, connection *con, iosocket *sock, chunkqueue *cq);
-
-typedef struct {
- PLUGIN_DATA;
- buffer *range_buf;
- plugin_config **config_storage;
- http_req_range *ranges;
- plugin_config conf;
-} plugin_data;
-
-typedef struct glusterfs_async_local {
- int op_ret;
- int op_errno;
- pthread_mutex_t lock;
- pthread_cond_t cond;
- connection *con;
- server *srv;
- plugin_data *p;
-
- union {
- struct {
- char async_read_complete;
- off_t length;
- size_t read_bytes;
- glusterfs_read_buf_t *buf;
- }readv;
-
- struct {
- buffer *name;
- buffer *hash_key;
- size_t size;
- }lookup;
- }fop;
-} glusterfs_async_local_t;
-
-typedef struct {
- unsigned long fd;
- buffer *glusterfs_path;
- void *buf;
- off_t response_content_length;
- int prefix;
-}mod_glusterfs_ctx_t;
-
-typedef struct {
- chunkqueue *cq;
- glusterfs_read_buf_t *buf;
- size_t length;
-}mod_glusterfs_chunkqueue;
-
-#ifdef HAVE_FAM_H
-typedef struct {
- FAMRequest *req;
- FAMConnection *fc;
-
- buffer *name;
-
- int version;
-} fam_dir_entry;
-#endif
-
-/* the directory name is too long to always compare on it
- * - we need a hash
- * - the hash-key is used as sorting criteria for a tree
- * - a splay-tree is used as we can use the caching effect of it
- */
-
-/* we want to cleanup the stat-cache every few seconds, let's say 10
- *
- * - remove entries which are outdated since 30s
- * - remove entries which are fresh but havn't been used since 60s
- * - if we don't have a stat-cache entry for a directory, release it from the monitor
- */
-
-#ifdef DEBUG_STAT_CACHE
-typedef struct {
- int *ptr;
-
- size_t used;
- size_t size;
-} fake_keys;
-
-static fake_keys ctrl;
-#endif
-
-static stat_cache_entry *
-stat_cache_entry_init(void)
-{
- stat_cache_entry *sce = NULL;
-
- sce = calloc(1, sizeof(*sce));
- /* ERR_ABORT (sce); */
-
- sce->name = buffer_init();
- sce->etag = buffer_init();
- sce->content_type = buffer_init();
-
- return sce;
-}
-
-int chunkqueue_append_glusterfs_mem (chunkqueue *cq, const char * mem, size_t len) {
- buffer *buf = NULL;
-
- buf = chunkqueue_get_append_buffer (cq);
-
- if (buf->ptr)
- free (buf->ptr);
-
- buf->used = len + 1;
- buf->ptr = (char *)mem;
- buf->size = len;
-
- return 0;
-}
-
-static int
-glusterfs_lookup_async_cbk (int op_ret,
- int op_errno,
- void *buf,
- struct stat *st,
- void *cbk_data)
-{
- glusterfs_async_local_t *local = cbk_data;
-
- mod_glusterfs_ctx_t *ctx = NULL;
- ctx = local->con->plugin_ctx[local->p->id];
-
- assert (ctx->buf== buf);
-
- if (op_ret || !(S_ISREG (st->st_mode) && (size_t)st->st_size <= local->fop.lookup.size)) {
-
- free (ctx->buf);
- ctx->buf = NULL;
-
- if (op_ret) {
- buffer_free (ctx->glusterfs_path);
- ctx->glusterfs_path = NULL;
- free (ctx);
- local->con->plugin_ctx[local->p->id] = NULL;
-
- if (op_errno == ENOENT)
- local->con->http_status = 404;
- else
- local->con->http_status = 403;
- }
- }
-
- if (!op_ret) {
- stat_cache_entry *sce = NULL;
- stat_cache *sc = local->srv->stat_cache;
-
- sce = (stat_cache_entry *)g_hash_table_lookup(sc->files, local->fop.lookup.hash_key);
-
- if (!sce) {
- sce = stat_cache_entry_init();
-
- buffer_copy_string_buffer(sce->name, local->fop.lookup.name);
- g_hash_table_insert(sc->files, buffer_init_string(BUF_STR(local->fop.lookup.hash_key)), sce);
- }
-
- sce->state = STAT_CACHE_ENTRY_STAT_FINISHED;
- sce->stat_ts = time (NULL);
- memcpy (&sce->st, st, sizeof (*st));
- }
-
- g_async_queue_push (local->srv->joblist_queue, local->con);
- /*
- joblist_append (local->srv, local->con);
- kill (getpid(), SIGUSR1);
- */
- free (local);
- return 0;
-}
-
-static handler_t
-glusterfs_stat_cache_get_entry_async (server *srv,
- connection *con,
- plugin_data *p,
- buffer *glusterfs_path,
- buffer *name,
- void *buf,
- size_t size,
- stat_cache_entry **ret_sce)
-{
- stat_cache_entry *sce = NULL;
- stat_cache *sc;
- glusterfs_async_local_t *local = NULL;
-
- *ret_sce = NULL;
-
- /*
- * check if the directory for this file has changed
- */
-
- sc = srv->stat_cache;
-
- buffer_copy_string_buffer(sc->hash_key, name);
- buffer_append_long(sc->hash_key, con->conf.follow_symlink);
-
- if ((sce = (stat_cache_entry *)g_hash_table_lookup(sc->files, sc->hash_key))) {
- /* know this entry already */
-
- if (sce->state == STAT_CACHE_ENTRY_STAT_FINISHED &&
- !buf) {
- /* verify that this entry is still fresh */
-
- *ret_sce = sce;
-
- return HANDLER_GO_ON;
- }
- }
-
-
- /*
- * *lol*
- * - open() + fstat() on a named-pipe results in a (intended) hang.
- * - stat() if regular file + open() to see if we can read from it is better
- *
- * */
-
- /* pass a job to the stat-queue */
-
- local = calloc (1, sizeof (*local));
- /* ERR_ABORT (local); */
- local->con = con;
- local->srv = srv;
- local->p = p;
- local->fop.lookup.name = buffer_init_buffer (name);
- local->fop.lookup.hash_key = buffer_init_buffer (sc->hash_key);
- local->fop.lookup.size = size;
-
- if (glusterfs_lookup_async ((libglusterfs_handle_t )p->conf.handle, glusterfs_path->ptr, buf, size, glusterfs_lookup_async_cbk, (void *) local)) {
- free (local);
- return HANDLER_ERROR;
- }
-
- return HANDLER_WAIT_FOR_EVENT;
-}
-
-int
-mod_glusterfs_readv_async_cbk (glusterfs_read_buf_t *buf,
- void *cbk_data)
-{
- glusterfs_async_local_t *local = cbk_data;
- pthread_mutex_lock (&local->lock);
- {
- local->fop.readv.async_read_complete = 1;
- local->fop.readv.buf = buf;
-
- pthread_cond_signal (&local->cond);
- }
- pthread_mutex_unlock (&local->lock);
-
- return 0;
-}
-
-network_status_t
-mod_glusterfs_read_async (server *srv, connection *con, chunk *glusterfs_chunk)
-{
- glusterfs_async_local_t local;
- off_t end = 0;
- int nbytes;
- int complete;
- chunkqueue *cq = NULL;
- chunk *c = NULL;
- off_t offset = glusterfs_chunk->file.start;
- size_t length = glusterfs_chunk->file.length;
- unsigned long fd = (unsigned long)glusterfs_chunk->file.name;
- network_status_t ret;
-
- pthread_cond_init (&local.cond, NULL);
- pthread_mutex_init (&local.lock, NULL);
-
- //local.fd = fd;
- memset (&local, 0, sizeof (local));
-
- if (length > 0)
- end = offset + length;
-
- cq = chunkqueue_init ();
- if (!cq) {
- con->http_status = 500;
- return NETWORK_STATUS_FATAL_ERROR;
- }
-
- do {
- glusterfs_read_buf_t *buf;
- int i;
- if (length > 0) {
- nbytes = end - offset;
- if (nbytes > GLUSTERFS_CHUNK_SIZE)
- nbytes = GLUSTERFS_CHUNK_SIZE;
- } else
- nbytes = GLUSTERFS_CHUNK_SIZE;
-
- glusterfs_read_async(fd,
- nbytes,
- offset,
- mod_glusterfs_readv_async_cbk,
- (void *)&local);
-
- pthread_mutex_lock (&local.lock);
- {
- while (!local.fop.readv.async_read_complete) {
- pthread_cond_wait (&local.cond, &local.lock);
- }
-
- local.op_ret = local.fop.readv.buf->op_ret;
- local.op_errno = local.fop.readv.buf->op_errno;
-
- local.fop.readv.async_read_complete = 0;
- buf = local.fop.readv.buf;
-
- if ((int)length < 0)
- complete = (local.fop.readv.buf->op_ret <= 0);
- else {
- local.fop.readv.read_bytes += local.fop.readv.buf->op_ret;
- complete = ((local.fop.readv.read_bytes == length) || (local.fop.readv.buf->op_ret <= 0));
- }
- }
- pthread_mutex_unlock (&local.lock);
-
- if (local.op_ret > 0) {
- for (i = 0; i < buf->count; i++) {
- buffer *nw_write_buf = chunkqueue_get_append_buffer (cq);
-
- nw_write_buf->used = nw_write_buf->size = buf->vector[i].iov_len + 1;
- nw_write_buf->ptr = buf->vector[i].iov_base;
-
- // buffer_copy_memory (nw_write_buf, buf->vector[i].iov_base, buf->vector[i].iov_len + 1);
- offset += local.op_ret;
- }
-
- ret = network_backend_write (srv, con, con->sock, cq);
-
- if (chunkqueue_written (cq) != local.op_ret) {
- mod_glusterfs_chunkqueue *gf_cq;
- glusterfs_chunk->file.start = offset;
- if ((int)glusterfs_chunk->file.length > 0)
- glusterfs_chunk->file.length -= local.fop.readv.read_bytes;
-
- gf_cq = calloc (1, sizeof (*gf_cq));
- /* ERR_ABORT (qf_cq); */
- gf_cq->cq = cq;
- gf_cq->buf = buf;
- gf_cq->length = local.op_ret;
- glusterfs_chunk->file.mmap.start = (char *)gf_cq;
- return ret;
- }
-
- for (c = cq->first ; c; c = c->next)
- c->mem->ptr = NULL;
-
- chunkqueue_reset (cq);
- }
-
- glusterfs_free (buf);
- } while (!complete);
-
- chunkqueue_free (cq);
- glusterfs_close (fd);
-
- if (local.op_ret < 0)
- con->http_status = 500;
-
- return (local.op_ret < 0 ? NETWORK_STATUS_FATAL_ERROR : NETWORK_STATUS_SUCCESS);
-}
-
-network_status_t mod_glusterfs_network_backend_write(struct server *srv, connection *con, iosocket *sock, chunkqueue *cq)
-{
- chunk *c, *prev, *first;
- int chunks_written = 0;
- int error = 0;
- network_status_t ret;
-
- for (first = prev = c = cq->first; c; c = c->next, chunks_written++) {
-
- if (c->type == MEM_CHUNK && c->mem->used && !c->mem->ptr) {
- if (cq->first != c) {
- prev->next = NULL;
-
- /* call stored network_backend_write */
- ret = network_backend_write (srv, con, sock, cq);
-
- prev->next = c;
- if (ret != NETWORK_STATUS_SUCCESS) {
- cq->first = first;
- return ret;
- }
- }
- cq->first = c->next;
-
- if (c->file.fd < 0) {
- error = HANDLER_ERROR;
- break;
- }
-
- if (c->file.mmap.start) {
- chunk *tmp;
- size_t len;
- mod_glusterfs_chunkqueue *gf_cq = (mod_glusterfs_chunkqueue *)c->file.mmap.start;
-
- ret = network_backend_write (srv, con, sock, gf_cq->cq);
-
- if ((len = (size_t)chunkqueue_written (gf_cq->cq)) != gf_cq->length) {
- gf_cq->length -= len;
- cq->first = first;
- chunkqueue_remove_finished_chunks (gf_cq->cq);
- return ret;
- }
-
- for (tmp = gf_cq->cq->first ; tmp; tmp = tmp->next)
- tmp->mem->ptr = NULL;
-
- chunkqueue_free (gf_cq->cq);
- glusterfs_free (gf_cq->buf);
- free (gf_cq);
- c->file.mmap.start = NULL;
- }
-
- ret = mod_glusterfs_read_async (srv, con, c); //c->file.fd, c->file.start, -1);//c->file.length);
- if (c->file.mmap.start) {
- /* pending chunkqueue from mod_glusterfs_read_async to be written to network */
- cq->first = first;
- return ret;
- }
-
- buffer_free (c->mem);
- c->mem = NULL;
-
- c->type = FILE_CHUNK;
- c->offset = c->file.length = 0;
- c->file.name = NULL;
-
- if (first == c)
- first = c->next;
-
- if (cq->last == c)
- cq->last = NULL;
-
- prev->next = c->next;
-
- free(c);
- }
- prev = c;
- }
-
- ret = network_backend_write (srv, con, sock, cq);
-
- cq->first = first;
-
- return ret;
-}
-
-#if 0
-int chunkqueue_append_glusterfs_file (chunkqueue *cq, unsigned long fd, off_t offset, off_t len)
-{
- chunk *c = NULL;
- c = chunkqueue_get_append_tempfile (cq);
-
- if (c->file.is_temp) {
- close (c->file.fd);
- unlink (c->file.name->ptr);
- }
-
- c->type = MEM_CHUNK;
-
- c->mem = buffer_init ();
- c->mem->used = len + 1;
- c->mem->ptr = NULL;
- c->offset = 0;
-
- /* buffer_copy_string_buffer (c->file.name, fn); */
- c->file.start = offset;
- c->file.length = len;
- /* buffer_free (c->file.name); */
-
- /* identify chunk as glusterfs related */
- c->file.mmap.start = MAP_FAILED;
- /* c->file.mmap.length = c->file.mmap.offset = len;*/
-
- return 0;
-}
-#endif
-
-int chunkqueue_append_dummy_mem_chunk (chunkqueue *cq, off_t len)
-{
- chunk *c = NULL;
- c = chunkqueue_get_append_tempfile (cq);
-
- if (c->file.is_temp) {
- close (c->file.fd);
- unlink (c->file.name->ptr);
- c->file.is_temp = 0;
- }
-
- c->type = MEM_CHUNK;
-
- c->mem->used = len + 1;
- c->offset = len;
- c->mem->ptr = NULL;
-
- return 0;
-}
-
-int chunkqueue_append_glusterfs_file (chunkqueue *cq, unsigned long fd, off_t offset, off_t len)
-{
- chunk *c = NULL;
- c = chunkqueue_get_append_tempfile (cq);
-
- if (c->file.is_temp) {
- close (c->file.fd);
- unlink (c->file.name->ptr);
- c->file.is_temp = 0;
- }
-
- c->type = MEM_CHUNK;
-
- c->mem = buffer_init ();
- c->mem->used = len + 1;
- c->mem->ptr = NULL;
- c->offset = 0;
-
- /* buffer_copy_string_buffer (c->file.name, fn); */
- buffer_free (c->file.name);
-
- /* fd returned by libglusterfsclient is a pointer */
- c->file.name = (buffer *)fd;
- c->file.start = offset;
- c->file.length = len;
-
- //c->file.fd = fd;
- c->file.mmap.start = NULL;
- return 0;
-}
-
-/* init the plugin data */
-INIT_FUNC(mod_glusterfs_init) {
- plugin_data *p;
-
- UNUSED (srv);
- p = calloc(1, sizeof(*p));
- /* ERR_ABORT (p); */
- network_backend_write = NULL;
- p->ranges = http_request_range_init();
- return p;
-}
-
-/* detroy the plugin data */
-FREE_FUNC(mod_glusterfs_free) {
- plugin_data *p = p_d;
-
- UNUSED (srv);
-
- if (!p) return HANDLER_GO_ON;
-
- if (p->config_storage) {
- size_t i;
- for (i = 0; i < srv->config_context->used; i++) {
- plugin_config *s = p->config_storage[i];
-
- buffer_free (s->logfile);
- buffer_free (s->loglevel);
- buffer_free (s->specfile);
- buffer_free (s->prefix);
- buffer_free (s->xattr_file_size);
- buffer_free (s->document_root);
- array_free (s->exclude_exts);
-
- free (s);
- }
- free (p->config_storage);
- }
- buffer_free (p->range_buf);
- http_request_range_free (p->ranges);
-
- free (p);
-
- return HANDLER_GO_ON;
-}
-
-SETDEFAULTS_FUNC(mod_glusterfs_set_defaults) {
- plugin_data *p = p_d;
- size_t i = 0;
-
- config_values_t cv[] = {
- { "glusterfs.logfile", NULL, T_CONFIG_STRING, T_CONFIG_SCOPE_CONNECTION },
-
- { "glusterfs.loglevel", NULL, T_CONFIG_STRING, T_CONFIG_SCOPE_CONNECTION },
- { "glusterfs.volume-specfile", NULL, T_CONFIG_STRING, T_CONFIG_SCOPE_CONNECTION },
- { "glusterfs.cache-timeout", NULL, T_CONFIG_SHORT, T_CONFIG_SCOPE_CONNECTION },
-
- { "glusterfs.exclude-extensions", NULL, T_CONFIG_ARRAY, T_CONFIG_SCOPE_CONNECTION },
-
- /*TODO: get the prefix from config_conext and remove glusterfs.prefix from conf file */
- { "glusterfs.prefix", NULL, T_CONFIG_STRING, T_CONFIG_SCOPE_CONNECTION },
-
- { "glusterfs.xattr-interface-size-limit", NULL, T_CONFIG_STRING, T_CONFIG_SCOPE_CONNECTION },
-
- { "glusterfs.document-root", NULL, T_CONFIG_STRING, T_CONFIG_SCOPE_CONNECTION },
-
- { NULL, NULL, T_CONFIG_UNSET, T_CONFIG_SCOPE_UNSET }
- };
-
- p->config_storage = calloc(1, srv->config_context->used * sizeof(specific_config *));
- /* ERR_ABORT (p->config_storage); */
- p->range_buf = buffer_init ();
-
- for (i = 0; i < srv->config_context->used; i++) {
- plugin_config *s;
-
- s = calloc(1, sizeof(plugin_config));
- /* ERR_ABORT (s); */
- s->logfile = buffer_init ();
- s->loglevel = buffer_init ();
- s->specfile = buffer_init ();
- s->exclude_exts = array_init ();
- s->prefix = buffer_init ();
- s->xattr_file_size = buffer_init ();
- s->document_root = buffer_init ();
-
- cv[0].destination = s->logfile;
- cv[1].destination = s->loglevel;
- cv[2].destination = s->specfile;
- cv[3].destination = &s->cache_timeout;
- cv[4].destination = s->exclude_exts;
- cv[5].destination = s->prefix;
- cv[6].destination = s->xattr_file_size;
- cv[7].destination = s->document_root;
-
- p->config_storage[i] = s;
-
- if (0 != config_insert_values_global(srv, ((data_config *)srv->config_context->data[i])->value, cv)) {
- return HANDLER_FINISHED;
- }
- }
-
- return HANDLER_GO_ON;
-}
-
-#define PATCH(x) \
- p->conf.x = s->x;
-
-static int mod_glusterfs_patch_connection(server *srv, connection *con, plugin_data *p) {
- size_t i, j;
- plugin_config *s;
-
- p->conf.logfile = NULL;
- p->conf.loglevel = NULL;
- p->conf.specfile = NULL;
- p->conf.cache_timeout = 0;
- p->conf.exclude_exts = NULL;
- p->conf.prefix = NULL;
- p->conf.xattr_file_size = NULL;
- p->conf.exclude_exts = NULL;
-
- /* skip the first, the global context */
- /* glusterfs related config can only occur inside $HTTP["url"] == "<glusterfs-prefix>" */
- for (i = 1; i < srv->config_context->used; i++) {
- data_config *dc = (data_config *)srv->config_context->data[i];
- s = p->config_storage[i];
-
- /* condition didn't match */
- if (!config_check_cond(srv, con, dc)) continue;
-
- /* merge config */
- for (j = 0; j < dc->value->used; j++) {
- data_unset *du = dc->value->data[j];
-
- if (buffer_is_equal_string (du->key, CONST_STR_LEN("glusterfs.logfile"))) {
- PATCH (logfile);
- } else if (buffer_is_equal_string (du->key, CONST_STR_LEN("glusterfs.loglevel"))) {
- PATCH (loglevel);
- } else if (buffer_is_equal_string (du->key, CONST_STR_LEN ("glusterfs.volume-specfile"))) {
- PATCH (specfile);
- } else if (buffer_is_equal_string (du->key, CONST_STR_LEN("glusterfs.cache-timeout"))) {
- PATCH (cache_timeout);
- } else if (buffer_is_equal_string (du->key, CONST_STR_LEN ("glusterfs.exclude-extensions"))) {
- PATCH (exclude_exts);
- } else if (buffer_is_equal_string (du->key, CONST_STR_LEN ("glusterfs.prefix"))) {
- PATCH (prefix);
- } else if (buffer_is_equal_string (du->key, CONST_STR_LEN ("glusterfs.xattr-interface-size-limit"))) {
- PATCH (xattr_file_size);
- } else if (buffer_is_equal_string (du->key, CONST_STR_LEN ("glusterfs.document-root"))) {
- PATCH (document_root);
- }
- }
- }
- return 0;
-}
-
-#undef PATCH
-
-static int http_response_parse_range(server *srv, connection *con, plugin_data *p) {
- int multipart = 0;
- char *boundary = "fkj49sn38dcn3";
- data_string *ds;
- stat_cache_entry *sce = NULL;
- buffer *content_type = NULL;
- buffer *range = NULL;
- http_req_range *ranges, *r;
- mod_glusterfs_ctx_t *ctx = con->plugin_ctx[p->id];
- size_t size = 0;
-
- if (!ctx) {
- return -1;
- }
-
- if (NULL != (ds = (data_string *)array_get_element(con->request.headers, CONST_STR_LEN("Range")))) {
- range = ds->value;
- } else {
- /* we don't have a Range header */
-
- return -1;
- }
-
- if (HANDLER_ERROR == stat_cache_get_entry(srv, con, con->physical.path, &sce)) {
- SEGFAULT();
- }
-
- ctx->response_content_length = con->response.content_length = 0;
-
- if (NULL != (ds = (data_string *)array_get_element(con->response.headers, CONST_STR_LEN("Content-Type")))) {
- content_type = ds->value;
- }
-
- /* start the range-header parser
- * bytes=<num> */
-
- ranges = p->ranges;
- http_request_range_reset(ranges);
- switch (http_request_range_parse(range, ranges)) {
- case PARSE_ERROR:
- return -1; /* no range valid Range Header */
- case PARSE_SUCCESS:
- break;
- default:
- TRACE("%s", "foobar");
- return -1;
- }
-
- if (ranges->next) {
- multipart = 1;
- }
-
- if (p->conf.xattr_file_size && p->conf.xattr_file_size->ptr) {
- size = atoi (p->conf.xattr_file_size->ptr);
- }
-
- /* patch the '-1' */
- for (r = ranges; r; r = r->next) {
- if (r->start == -1) {
- /* -<end>
- *
- * the last <end> bytes */
- r->start = sce->st.st_size - r->end;
- r->end = sce->st.st_size - 1;
- }
- if (r->end == -1) {
- /* <start>-
- * all but the first <start> bytes */
-
- r->end = sce->st.st_size - 1;
- }
-
- if (r->end > sce->st.st_size - 1) {
- /* RFC 2616 - 14.35.1
- *
- * if last-byte-pos not present or > size-of-file
- * take the size-of-file
- *
- * */
- r->end = sce->st.st_size - 1;
- }
-
- if (r->start > sce->st.st_size - 1) {
- /* RFC 2616 - 14.35.1
- *
- * if first-byte-pos > file-size, 416
- */
-
- con->http_status = 416;
- return -1;
- }
-
- if (r->start > r->end) {
- /* RFC 2616 - 14.35.1
- *
- * if last-byte-pos is present, it has to be >= first-byte-pos
- *
- * invalid ranges have to be handle as no Range specified
- * */
-
- return -1;
- }
- }
-
- if (r) {
- /* we ran into an range violation */
- return -1;
- }
-
- if (multipart) {
- buffer *b;
- for (r = ranges; r; r = r->next) {
- /* write boundary-header */
-
- b = chunkqueue_get_append_buffer(con->send);
-
- buffer_copy_string(b, "\r\n--");
- buffer_append_string(b, boundary);
-
- /* write Content-Range */
- buffer_append_string(b, "\r\nContent-Range: bytes ");
- buffer_append_off_t(b, r->start);
- buffer_append_string(b, "-");
- buffer_append_off_t(b, r->end);
- buffer_append_string(b, "/");
- buffer_append_off_t(b, sce->st.st_size);
-
- buffer_append_string(b, "\r\nContent-Type: ");
- buffer_append_string_buffer(b, content_type);
-
- /* write END-OF-HEADER */
- buffer_append_string(b, "\r\n\r\n");
-
- con->response.content_length += b->used - 1;
- ctx->response_content_length += b->used - 1;
- con->send->bytes_in += b->used - 1;
-
- if ((size_t)sce->st.st_size > size) {
- chunkqueue_append_glusterfs_file(con->send_raw, ctx->fd, r->start, r->end - r->start + 1);
- con->send_raw->bytes_in += (r->end - r->start + 1);
- chunkqueue_append_dummy_mem_chunk (con->send, r->end - r->start + 1);
- } else {
- chunkqueue_append_mem (con->send, ((char *)ctx->buf) + r->start, r->end - r->start + 1);
- free (ctx->buf);
- ctx->buf = NULL;
- }
-
- con->response.content_length += r->end - r->start + 1;
- ctx->response_content_length += r->end - r->start + 1;
- con->send->bytes_in += r->end - r->start + 1;
- }
-
- /* add boundary end */
- b = chunkqueue_get_append_buffer(con->send);
-
- buffer_copy_string_len(b, "\r\n--", 4);
- buffer_append_string(b, boundary);
- buffer_append_string_len(b, "--\r\n", 4);
-
- con->response.content_length += b->used - 1;
- ctx->response_content_length += b->used - 1;
- con->send->bytes_in += b->used - 1;
-
- /* set header-fields */
-
- buffer_copy_string(p->range_buf, "multipart/byteranges; boundary=");
- buffer_append_string(p->range_buf, boundary);
-
- /* overwrite content-type */
- response_header_overwrite(srv, con, CONST_STR_LEN("Content-Type"), CONST_BUF_LEN(p->range_buf));
-
- } else {
- r = ranges;
-
- chunkqueue_append_glusterfs_file(con->send_raw, ctx->fd, r->start, r->end - r->start + 1);
- con->send_raw->bytes_in += (r->end - r->start + 1);
- chunkqueue_append_dummy_mem_chunk (con->send, r->end - r->start + 1);
- con->response.content_length += r->end - r->start + 1;
- ctx->response_content_length += r->end - r->start + 1;
- con->send->bytes_in += r->end - r->start + 1;
-
- buffer_copy_string(p->range_buf, "bytes ");
- buffer_append_off_t(p->range_buf, r->start);
- buffer_append_string(p->range_buf, "-");
- buffer_append_off_t(p->range_buf, r->end);
- buffer_append_string(p->range_buf, "/");
- buffer_append_off_t(p->range_buf, sce->st.st_size);
-
- response_header_insert(srv, con, CONST_STR_LEN("Content-Range"), CONST_BUF_LEN(p->range_buf));
- }
-
- /* ok, the file is set-up */
- return 0;
-}
-
-PHYSICALPATH_FUNC(mod_glusterfs_handle_physical) {
- plugin_data *p = p_d;
- stat_cache_entry *sce;
- size_t size = 0;
- handler_t ret = 0;
- mod_glusterfs_ctx_t *plugin_ctx = NULL;
-
- if (con->http_status != 0) return HANDLER_GO_ON;
- if (con->uri.path->used == 0) return HANDLER_GO_ON;
- if (con->physical.path->used == 0) return HANDLER_GO_ON;
-
- if (con->mode != DIRECT) return HANDLER_GO_ON;
-
- /*
- network_backend_write = srv->network_backend_write;
- srv->network_backend_write = mod_glusterfs_network_backend_write;
- */
-
- switch (con->request.http_method) {
- case HTTP_METHOD_GET:
- case HTTP_METHOD_POST:
- case HTTP_METHOD_HEAD:
- break;
-
- default:
- return HANDLER_GO_ON;
- }
-
- mod_glusterfs_patch_connection(srv, con, p);
-
- if (!p->conf.prefix || !p->conf.prefix->ptr) {
- return HANDLER_GO_ON;
- }
-
- if (!p->conf.document_root || p->conf.document_root->used == 0) {
- log_error_write(srv, __FILE__, __LINE__, "s", "glusterfs.document-root is not specified");
- con->http_status = 500;
- return HANDLER_FINISHED;
- }
-
- if (p->conf.handle <= 0) {
- glusterfs_init_ctx_t ctx;
-
- if (!p->conf.specfile || p->conf.specfile->used == 0) {
- return HANDLER_GO_ON;
- }
- memset (&ctx, 0, sizeof (ctx));
-
- ctx.specfile = p->conf.specfile->ptr;
- ctx.logfile = p->conf.logfile->ptr;
- ctx.loglevel = p->conf.loglevel->ptr;
- ctx.lookup_timeout = ctx.stat_timeout = p->conf.cache_timeout;
-
- p->conf.handle = (unsigned long)glusterfs_init (&ctx);
-
- if (p->conf.handle <= 0) {
- con->http_status = 500;
- log_error_write(srv, __FILE__, __LINE__, "sbs", "glusterfs initialization failed, please check your configuration. Glusterfs logfile ", p->conf.logfile, "might contain details");
- return HANDLER_FINISHED;
- }
- }
-
- size = 0;
- if (p->conf.xattr_file_size && p->conf.xattr_file_size->ptr)
- size = atoi (p->conf.xattr_file_size->ptr);
-
- if (!con->plugin_ctx[p->id]) {
- buffer *tmp_buf = buffer_init_buffer (con->physical.basedir);
-
- plugin_ctx = calloc (1, sizeof (*plugin_ctx));
- /* ERR_ABORT (plugin_ctx); */
- con->plugin_ctx[p->id] = plugin_ctx;
-
- buffer_append_string_buffer (tmp_buf, p->conf.prefix);
- buffer_path_simplify (tmp_buf, tmp_buf);
-
- plugin_ctx->prefix = tmp_buf->used - 1;
- if (tmp_buf->ptr[plugin_ctx->prefix - 1] == '/')
- plugin_ctx->prefix--;
-
- buffer_free (tmp_buf);
- } else
- /*FIXME: error!! error!! */
- plugin_ctx = con->plugin_ctx[p->id];
-
-
- if (size)
- {
- plugin_ctx->buf = malloc (size);
- /* ERR_ABORT (plugin_ctx->buf); */
- }
-
- plugin_ctx->glusterfs_path = buffer_init ();
- buffer_copy_string_buffer (plugin_ctx->glusterfs_path, p->conf.document_root);
- buffer_append_string (plugin_ctx->glusterfs_path, "/");
- buffer_append_string (plugin_ctx->glusterfs_path, con->physical.path->ptr + plugin_ctx->prefix);
- buffer_path_simplify (plugin_ctx->glusterfs_path, plugin_ctx->glusterfs_path);
-
- ret = glusterfs_stat_cache_get_entry_async (srv, con, p, plugin_ctx->glusterfs_path, con->physical.path, plugin_ctx->buf, size, &sce);
-
- if (ret == HANDLER_ERROR) {
- free (plugin_ctx->buf);
- plugin_ctx->buf = NULL;
-
- buffer_free (plugin_ctx->glusterfs_path);
- plugin_ctx->glusterfs_path = NULL;
-
- free (plugin_ctx);
- con->plugin_ctx[p->id] = NULL;
-
- con->http_status = 500;
- ret = HANDLER_FINISHED;
- }
-
- return ret;
-}
-
-URIHANDLER_FUNC(mod_glusterfs_subrequest) {
- plugin_data *p = p_d;
- stat_cache_entry *sce = NULL;
- int s_len;
- unsigned long fd;
- char allow_caching = 1;
- size_t size = 0;
- mod_glusterfs_ctx_t *ctx = con->plugin_ctx[p->id];
-
- /* someone else has done a decision for us */
- if (con->http_status != 0) return HANDLER_GO_ON;
- if (con->uri.path->used == 0) return HANDLER_GO_ON;
- if (con->physical.path->used == 0) return HANDLER_GO_ON;
-
- /* someone else has handled this request */
- if (con->mode != DIRECT) return HANDLER_GO_ON;
-
- /* we only handle GET, POST and HEAD */
- switch(con->request.http_method) {
- case HTTP_METHOD_GET:
- case HTTP_METHOD_POST:
- case HTTP_METHOD_HEAD:
- break;
- default:
- return HANDLER_GO_ON;
- }
-
- mod_glusterfs_patch_connection(srv, con, p);
-
- if (!p->conf.prefix || !p->conf.prefix->ptr)
- return HANDLER_GO_ON;
-
- if (!ctx) {
- con->http_status = 500;
- return HANDLER_FINISHED;
- }
-
- s_len = con->uri.path->used - 1;
- /* ignore certain extensions */
- /*
- for (k = 0; k < p->conf.exclude_exts->used; k++) {
- data_string *ds;
- ds = (data_string *)p->conf.exclude_exts->data[k];
-
- if (ds->value->used == 0) continue;
-
- if (!strncmp (ds->value->ptr, con->uri.path->ptr, strlen (ds->value->ptr)))
- break;
- }
-
- if (k == p->conf.exclude_exts->used) {
- return HANDLER_GO_ON;
- }
- */
-
- if (con->conf.log_request_handling) {
- log_error_write(srv, __FILE__, __LINE__, "s", "-- serving file from glusterfs");
- }
-
- if (HANDLER_ERROR == stat_cache_get_entry(srv, con, con->physical.path, &sce)) {
- con->http_status = 403;
-
- /* this might happen if the sce is removed from stat-cache after a successful glusterfs_lookup */
- if (ctx) {
- if (ctx->buf) {
- free (ctx->buf);
- ctx->buf = NULL;
- }
-
- if (ctx->glusterfs_path) {
- buffer_free (ctx->glusterfs_path);
- ctx->glusterfs_path = NULL;
- }
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
- }
-
- log_error_write(srv, __FILE__, __LINE__, "sbsb",
- "not a regular file:", con->uri.path,
- "->", con->physical.path);
-
- return HANDLER_FINISHED;
- }
-
- if (con->uri.path->ptr[s_len] == '/' || !S_ISREG(sce->st.st_mode)) {
- if (ctx) {
- if (ctx->glusterfs_path) {
- buffer_free (ctx->glusterfs_path);
- ctx->glusterfs_path = NULL;
- }
-
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
- }
-
- return HANDLER_FINISHED;
- }
-
- if (p->conf.xattr_file_size && p->conf.xattr_file_size->ptr)
- size = atoi (p->conf.xattr_file_size->ptr);
-
- if ((size_t)sce->st.st_size > size) {
-
- fd = glusterfs_open ((libglusterfs_handle_t ) ((unsigned long)p->conf.handle), ctx->glusterfs_path->ptr, O_RDONLY, 0);
-
- if (!fd) {
- if (ctx) {
- if (ctx->glusterfs_path) {
- buffer_free (ctx->glusterfs_path);
- ctx->glusterfs_path = NULL;
- }
-
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
- }
-
- con->http_status = 403;
- return HANDLER_FINISHED;
- }
- ctx->fd = fd;
- }
-
- /* we only handline regular files */
-#ifdef HAVE_LSTAT
- if ((sce->is_symlink == 1) && !con->conf.follow_symlink) {
- con->http_status = 403;
-
- if (con->conf.log_request_handling) {
- log_error_write(srv, __FILE__, __LINE__, "s", "-- access denied due symlink restriction");
- log_error_write(srv, __FILE__, __LINE__, "sb", "Path :", con->physical.path);
- }
-
- buffer_reset(con->physical.path);
- if (ctx) {
- if (ctx->glusterfs_path) {
- buffer_free (ctx->glusterfs_path);
- ctx->glusterfs_path = NULL;
- }
-
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
- }
-
- return HANDLER_FINISHED;
- }
-#endif
- if (!S_ISREG(sce->st.st_mode)) {
- con->http_status = 404;
-
- if (con->conf.log_file_not_found) {
- log_error_write(srv, __FILE__, __LINE__, "sbsb",
- "not a regular file:", con->uri.path,
- "->", sce->name);
- }
-
- if (ctx) {
- if (ctx->glusterfs_path) {
- buffer_free (ctx->glusterfs_path);
- ctx->glusterfs_path = NULL;
- }
-
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
- }
-
- return HANDLER_FINISHED;
- }
-
- /* mod_compress might set several data directly, don't overwrite them */
-
- /* set response content-type, if not set already */
-
- if (NULL == array_get_element(con->response.headers, CONST_STR_LEN("Content-Type"))) {
- if (buffer_is_empty(sce->content_type)) {
- /* we are setting application/octet-stream, but also announce that
- * this header field might change in the seconds few requests
- *
- * This should fix the aggressive caching of FF and the script download
- * seen by the first installations
- */
- response_header_overwrite(srv, con, CONST_STR_LEN("Content-Type"), CONST_STR_LEN("application/octet-stream"));
-
- allow_caching = 0;
- } else {
- response_header_overwrite(srv, con, CONST_STR_LEN("Content-Type"), CONST_BUF_LEN(sce->content_type));
- }
- }
-
- if (con->conf.range_requests) {
- response_header_overwrite(srv, con, CONST_STR_LEN("Accept-Ranges"), CONST_STR_LEN("bytes"));
- }
-
- /* TODO: Allow Cachable requests */
-#if 0
- if (allow_caching) {
- if (p->conf.etags_used && con->etag_flags != 0 && !buffer_is_empty(sce->etag)) {
- if (NULL == array_get_element(con->response.headers, "ETag")) {
- /* generate e-tag */
- etag_mutate(con->physical.etag, sce->etag);
-
- response_header_overwrite(srv, con, CONST_STR_LEN("ETag"), CONST_BUF_LEN(con->physical.etag));
- }
- }
-
- /* prepare header */
- if (NULL == (ds = (data_string *)array_get_element(con->response.headers, "Last-Modified"))) {
- mtime = strftime_cache_get(srv, sce->st.st_mtime);
- response_header_overwrite(srv, con, CONST_STR_LEN("Last-Modified"), CONST_BUF_LEN(mtime));
- } else {
- mtime = ds->value;
- }
-
- if (HANDLER_FINISHED == http_response_handle_cachable(srv, con, mtime)) {
- if (ctx) {
- if (ctx->glusterfs_path) {
- buffer_free (ctx->glusterfs_path);
- ctx->glusterfs_path = NULL;
- }
-
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
- }
-
- return HANDLER_FINISHED;
- }
- }
-#endif
-
- /*TODO: Read about etags */
- if (NULL != array_get_element(con->request.headers, CONST_STR_LEN("Range")) && con->conf.range_requests) {
- int do_range_request = 1;
- data_string *ds = NULL;
- buffer *mtime = NULL;
- /* check if we have a conditional GET */
-
- /* prepare header */
- if (NULL == (ds = (data_string *)array_get_element(con->response.headers, CONST_STR_LEN("Last-Modified")))) {
- mtime = strftime_cache_get(srv, sce->st.st_mtime);
- response_header_overwrite(srv, con, CONST_STR_LEN("Last-Modified"), CONST_BUF_LEN(mtime));
- } else {
- mtime = ds->value;
- }
-
- if (NULL != (ds = (data_string *)array_get_element(con->request.headers, CONST_STR_LEN("If-Range")))) {
- /* if the value is the same as our ETag, we do a Range-request,
- * otherwise a full 200 */
-
- if (ds->value->ptr[0] == '"') {
- /**
- * client wants a ETag
- */
- if (!con->physical.etag) {
- do_range_request = 0;
- } else if (!buffer_is_equal(ds->value, con->physical.etag)) {
- do_range_request = 0;
- }
- } else if (!mtime) {
- /**
- * we don't have a Last-Modified and can match the If-Range:
- *
- * sending all
- */
- do_range_request = 0;
- } else if (!buffer_is_equal(ds->value, mtime)) {
- do_range_request = 0;
- }
- }
-
- if (do_range_request) {
- /* content prepared, I'm done */
- con->send->is_closed = 1;
-
- if (0 == http_response_parse_range(srv, con, p)) {
- con->http_status = 206;
- }
- if (ctx) {
- if (ctx->glusterfs_path) {
- buffer_free (ctx->glusterfs_path);
- ctx->glusterfs_path = NULL;
- }
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
- }
-
- return HANDLER_FINISHED;
- }
- }
-
- /* if we are still here, prepare body */
-
- /* we add it here for all requests
- * the HEAD request will drop it afterwards again
- */
-
- if (p->conf.xattr_file_size && p->conf.xattr_file_size->ptr)
- size = atoi (p->conf.xattr_file_size->ptr);
-
- if (size < (size_t)sce->st.st_size) {
- chunkqueue_append_glusterfs_file (con->send_raw, fd, 0, sce->st.st_size);
- con->send_raw->bytes_in += sce->st.st_size;
- chunkqueue_append_dummy_mem_chunk (con->send, sce->st.st_size);
- } else {
- if (!ctx->buf) {
- con->http_status = 404;
- return HANDLER_ERROR;
- }
- chunkqueue_append_glusterfs_mem (con->send, ctx->buf, sce->st.st_size);
- ctx->buf = NULL;
- }
- ctx->response_content_length = con->response.content_length = sce->st.st_size;
-
- con->send->is_closed = 1;
- con->send->bytes_in = sce->st.st_size;
-
- return HANDLER_FINISHED;
-}
-
-/* this function is called at dlopen() time and inits the callbacks */
-CONNECTION_FUNC(mod_glusterfs_connection_reset)
-{
- (void) p_d;
- (void) con;
- if (!network_backend_write)
- network_backend_write = srv->network_backend_write;
-
- srv->network_backend_write = mod_glusterfs_network_backend_write;
-
- return HANDLER_GO_ON;
-}
-
-URIHANDLER_FUNC(mod_glusterfs_response_done) {
- plugin_data *p = p_d;
- UNUSED (srv);
- mod_glusterfs_ctx_t *ctx = con->plugin_ctx[p->id];
-
- con->plugin_ctx[p->id] = NULL;
- if (ctx->glusterfs_path) {
- free (ctx->glusterfs_path);
- }
-
- free (ctx);
- return HANDLER_GO_ON;
-}
-
-int mod_glusterfs_plugin_init(plugin *p) {
- p->version = LIGHTTPD_VERSION_ID;
- p->name = buffer_init_string("glusterfs");
- p->init = mod_glusterfs_init;
- p->handle_physical = mod_glusterfs_handle_physical;
- p->handle_start_backend = mod_glusterfs_subrequest;
- p->handle_response_done = mod_glusterfs_response_done;
- p->set_defaults = mod_glusterfs_set_defaults;
- p->connection_reset = mod_glusterfs_connection_reset;
- p->cleanup = mod_glusterfs_free;
-
- p->data = NULL;
-
- return 0;
-}
diff --git a/mod_glusterfs/lighttpd/1.5/mod_glusterfs.h b/mod_glusterfs/lighttpd/1.5/mod_glusterfs.h
deleted file mode 100644
index 5f9bb2c5bf0..00000000000
--- a/mod_glusterfs/lighttpd/1.5/mod_glusterfs.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _MOD_GLUSTERFS_FILE_CACHE_H_
-#define _MOD_GLUSTERFS_FILE_CACHE_H_
-
-#include "stat_cache.h"
-#include <libglusterfsclient.h>
-#include "base.h"
-
-handler_t glusterfs_stat_cache_get_entry(server *srv, connection *con, libglusterfs_handle_t handle, buffer *glusterfs_path, buffer *name, void *buf, size_t size, stat_cache_entry **fce);
-
-#endif
diff --git a/mod_glusterfs/lighttpd/Makefile.am b/mod_glusterfs/lighttpd/Makefile.am
deleted file mode 100644
index c934412b3dc..00000000000
--- a/mod_glusterfs/lighttpd/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = 1.4 1.5
-
-CLEANFILES =
diff --git a/rfc.sh b/rfc.sh
index b06ac512d7d..e7faec9ea0f 100755
--- a/rfc.sh
+++ b/rfc.sh
@@ -1,21 +1,62 @@
#!/bin/sh -e
+# Since we run with '#!/bin/sh -e'
+# '$? != 0' will force an exit,
+# i.e. where we are interested in the result of a command,
+# we have to run the command in an if-statement.
+UPSTREAM=${GLUSTER_UPSTREAM}
+if [ "x$UPSTREAM" -eq "x" ]; then
+ for rmt in $(git remote); do
+ rmt_repo=$(git remote show $rmt -n | grep Fetch | awk '{ print $3 }');
+ if [ $rmt_repo -eq "git@github:gluster/glusterfs" ]; then
+ UPSTREAM=$rmt
+ echo "Picked $rmt as upstream remote"
+ break
+ fi
+ done
+fi
-branch="master";
+USER_REPO=${GLUSTER_USER_REPO:-origin}
+if [ "x${USER_REPO}" -eq "x${UPSTREAM}" ] ; then
+ echo "When you submit patches, it should get submitted to your fork, not to upstream directly"
+ echo "If you are not sure, check `for rmt in $(git remote); do git remote show $rmt -n; done`"
+ echo "And pick the correct remote you would like to push to and do `export GLUSTER_USER_REPO=$rmt`"
+ echo ""
+ echo "Exiting..."
+ exit 1
+fi
+
+while getopts "v" opt; do
+ case $opt in
+ v)
+ # Verbose mode
+ git () { >&2 echo "git $@" && `which git` $@; }
+ ;;
+ esac
+done
+# Move the positional arguments to the beginning
+shift $((OPTIND-1))
+
+
+branch="devel";
+
set_hooks_commit_msg()
{
f=".git/hooks/commit-msg";
- u="http://review.gluster.com/tools/hooks/commit-msg";
+ u="http://review.gluster.org/tools/hooks/commit-msg";
if [ -x "$f" ]; then
return;
fi
- curl -o $f $u || wget -O $f $u;
+ curl -L -o $f $u || wget -O $f $u;
+
+ chmod +x $f
- chmod +x .git/hooks/commit-msg;
+ # Let the 'Change-Id: ' header get assigned on first run of rfc.sh
+ GIT_EDITOR=true git commit --amend;
}
@@ -28,15 +69,120 @@ is_num()
[ -z "$(echo $num | sed -e 's/[0-9]//g')" ]
}
+backport_id_message()
+{
+ echo ""
+ echo "This commit is to a non-devel branch, and hence is treated as a backport."
+ echo ""
+ echo "For backports we would like to retain the same gerrit Change-Id across"
+ echo "branches. On auto inspection it is found that a gerrit Change-Id is"
+ echo "missing, or the Change-Id is not found on your local devel branch"
+ echo ""
+ echo "This could mean a few things:"
+ echo " 1. This is not a backport, hence choose Y on the prompt to proceed"
+ echo " 2. Your $USER_REPO/devel is not up to date, hence the script is unable"
+ echo " to find the corresponding Change-Id on devel. Either choose N,"
+ echo " 'git fetch', and try again, OR if you are sure you used the"
+ echo " same Change-Id, choose Y at the prompt to proceed"
+ echo " 3. You commented or removed the Change-Id in your commit message after"
+ echo " cherry picking the commit. Choose N, fix the commit message to"
+ echo " use the same Change-Id as 'devel' (git commit --amend), resubmit"
+ echo ""
+}
-rebase_changes()
+check_backport()
{
- git fetch origin;
+ moveon='N'
+
+ # Backports are never made to 'devel'
+ if [ $branch = "devel" ]; then
+ return;
+ fi
+
+ # Extract the change ID from the commit message
+ changeid=$(git log -n1 --format='%b' | grep -i '^Change-Id: ' | awk '{print $2}')
- GIT_EDITOR=$0 git rebase -i origin/$branch;
+ # If there is no change ID ask if we should continue
+ if [ -z "$changeid" ]; then
+ backport_id_message;
+ echo -n "Did not find a Change-Id for a possible backport. Continue (y/N): "
+ read moveon
+ else
+ # Search 'devel' for the same change ID (rebase_changes has run, so we
+ # should never not find a Change-Id)
+ mchangeid=$(git log $UPSTREAM/devel --format='%b' --grep="^Change-Id: ${changeid}" | grep ${changeid} | awk '{print $2}')
+
+ # Check if we found the change ID on 'devel', else throw a message to
+ # decide if we should continue.
+ # NOTE: If 'devel' was not rebased, we will not find the Change-ID and
+ # could hit a false positive case here (or if someone checks out some
+ # other branch as 'devel').
+ if [ "${mchangeid}" = "${changeid}" ]; then
+ moveon="Y"
+ else
+ backport_id_message;
+ echo "Change-Id of commit: $changeid"
+ echo "Change-Id on devel: $mchangeid"
+ echo -n "Did not find mentioned Change-Id on 'devel' for a possible backport. Continue (y/N): "
+ read moveon
+ fi
+ fi
+
+ if [ "${moveon}" = 'Y' ] || [ "${moveon}" = 'y' ]; then
+ return;
+ else
+ exit 1
+ fi
}
+rebase_changes()
+{
+ GIT_EDITOR=$0 git rebase -i $UPSTREAM/$branch;
+}
+
+
+# Regex elaborated:
+# grep options:
+# -w -> --word-regexp (from the man page)
+# Select only those lines containing matches that form whole words.
+# The test is that the matching substring must either be at the
+# beginning of the line, or preceded by a non-word constituent
+# character. Similarly, it must be either at the end of the line or
+# followed by a non-word constituent character. Word-constituent
+# characters are letters, digits, and the underscore.
+#
+# IOW, the above helps us find the pattern with leading or training
+# spaces or non word consituents like , or ;
+#
+# -i -> --ignore-case (case insensitive search)
+#
+# -o -> --only-matching (only print matching portion of the line)
+#
+# -E -> --extended-regexp (use extended regular expression)
+#
+# ^
+# The search begins at the start of each line
+#
+# [[:space:]]*
+# Any number of spaces is accepted
+#
+# (Fixes|Updates)
+# Finds 'Fixes' OR 'Updates' in any case combination
+#
+# (:)?
+# Followed by an optional : (colon)
+#
+# [[:space:]]+
+# Followed by 1 or more spaces
+#
+# #
+# Followed by #
+#
+# [[:digit:]]+
+# Followed by 1 or more digits
+REFRE="^[[:space:]]*(Fixes|Updates)(:)?[[:space:]]+#[[:digit:]]+"
+
editor_mode()
{
if [ $(basename "$1") = "git-rebase-todo" ]; then
@@ -45,22 +191,34 @@ editor_mode()
fi
if [ $(basename "$1") = "COMMIT_EDITMSG" ]; then
- if grep -qi '^BUG: ' $1; then
+ # see note above function warn_reference_missing for regex elaboration
+ # Lets first check for github issues
+ ref=$(git log -n1 --format='%b' | grep -iow -E "${REFRE}" | awk -F '#' '{print $2}');
+ if [ "x${ref}" != "x" ]; then
return;
fi
+
while true; do
echo Commit: "\"$(head -n 1 $1)\""
- echo -n "Enter Bug ID: "
- read bug
- if [ -z "$bug" ]; then
+ echo -n "Github Issue ID: "
+ read issue
+ if [ -z "$issue" ]; then
return;
fi
- if ! is_num "$bug"; then
- echo "Invalid Bug ID ($bug)!!!";
+ if ! is_num "$issue"; then
+ echo "Invalid Github Issue ID!!!";
continue;
fi
- sed "/^Change-Id:/{p; s/^.*$/BUG: $bug/;}" $1 > $1.new && \
+ echo "Select yes '(y)' if this patch fixes the issue/feature completely,"
+ echo -n "or is the last of the patchset which brings feature (Y/n): "
+ read fixes
+ fixes_string="Fixes"
+ if [ "${fixes}" = 'N' ] || [ "${fixes}" = 'n' ]; then
+ fixes_string="Updates"
+ fi
+
+ sed "/^Change-Id:/{p; s/^.*$/${fixes_string}: #${issue}/;}" $1 > $1.new && \
mv $1.new $1;
return;
done
@@ -76,24 +234,85 @@ EOF
assert_diverge()
{
- git diff origin/$branch..HEAD | grep -q .;
+ git diff $UPSTREAM/$branch..HEAD | grep -q .;
}
+warn_reference_missing()
+{
+ echo ""
+ echo "=== Missing a reference in commit! ==="
+ echo ""
+ echo "Gluster commits are made with a reference to a github issue"
+ echo ""
+ echo "A check on the commit message, reveals that there is no "
+ echo "github issue referenced in the commit message."
+ echo ""
+ echo "https://github.com/gluster/glusterfs/issues/new"
+ echo ""
+ echo "Please open an issue and reference the same in the commit message "
+ echo "using the following tags:"
+ echo ""
+ echo "\"Fixes: #NNNN\" OR \"Updates: #NNNN\","
+ echo "where NNNN is the issue id"
+ echo ""
+ echo "You may abort the submission choosing 'N' below and use"
+ echo "'git commit --amend' to add the issue reference before posting"
+ echo "to gerrit."
+ echo ""
+ echo -n "Missing reference to a github issue. Continue (y/N): "
+ read moveon
+ if [ "${moveon}" = 'Y' ] || [ "${moveon}" = 'y' ]; then
+ return;
+ else
+ exit 1
+ fi
+}
+
main()
{
+ set_hooks_commit_msg;
+
+ # rfc.sh calls itself from rebase_changes, which uses rfc.sh as the EDITOR
+ # thus, getting the commit message to work with in the editor_mode.
if [ -e "$1" ]; then
editor_mode "$@";
return;
fi
- set_hooks_commit_msg;
+ git fetch $UPSTREAM;
rebase_changes;
+ check_backport;
+
assert_diverge;
- bug=$(git show --format='%b' | grep -i '^BUG: ' | awk '{print $2}');
+ # see note above variable REFRE for regex elaboration
+ reference=$(git log -n1 --format='%b' | grep -iow -E "${REFRE}" | awk -F '#' '{print $2}');
+
+ # If this is a commit against 'devel' and does not have a github
+ # issue reference. Warn the contributor that one of the 2 is required
+ if [ -z "${reference}" ] && [ $branch = "devel" ]; then
+ warn_reference_missing;
+ fi
+
+ # TODO: add clang-format command here. It will after the changes are done everywhere else
+ clang_format=$(clang-format --version)
+ if [ ! -z "${clang_format}" ]; then
+ # Considering git show may not give any files as output matching the
+ # criteria, good to tell script not to fail on error
+ set +e
+ list_of_files=$(git show --pretty="format:" --name-only |
+ grep -v "contrib/" | egrep --color=never "*\.[ch]$");
+ if [ ! -z "${list_of_files}" ]; then
+ echo "${list_of_files}" | xargs clang-format -i
+ fi
+ set -e
+ else
+ echo "High probability of your patch not passing smoke due to coding standard check"
+ echo "Please install 'clang-format' to format the patch before submitting"
+ fi
if [ "$DRY_RUN" = 1 ]; then
drier='echo -e Please use the following command to send your commits to review:\n\n'
@@ -101,10 +320,10 @@ main()
drier=
fi
- if [ -z "$bug" ]; then
- $drier git push origin HEAD:refs/for/$branch/rfc;
+ if [ -z "${reference}" ]; then
+ $drier git push $USER_REPO HEAD:temp_${branch}/$(date +%Y-%m-%d_%s);
else
- $drier git push origin HEAD:refs/for/$branch/bug-$bug;
+ $drier git push $USER_REPO HEAD:issue${reference}_${branch};
fi
}
diff --git a/rpc/Makefile.am b/rpc/Makefile.am
index ffb76e901fc..183b7a0352f 100644
--- a/rpc/Makefile.am
+++ b/rpc/Makefile.am
@@ -1 +1 @@
-SUBDIRS = rpc-lib rpc-transport xdr
+SUBDIRS = xdr rpc-lib rpc-transport
diff --git a/rpc/rpc-lib/src/Makefile.am b/rpc/rpc-lib/src/Makefile.am
index 8b087301c63..35c9db07e7f 100644
--- a/rpc/rpc-lib/src/Makefile.am
+++ b/rpc/rpc-lib/src/Makefile.am
@@ -1,16 +1,29 @@
lib_LTLIBRARIES = libgfrpc.la
libgfrpc_la_SOURCES = auth-unix.c rpcsvc-auth.c rpcsvc.c auth-null.c \
- rpc-transport.c xdr-rpc.c xdr-rpcclnt.c rpc-clnt.c auth-glusterfs.c
+ rpc-transport.c xdr-rpc.c xdr-rpcclnt.c rpc-clnt.c auth-glusterfs.c \
+ rpc-drc.c rpc-clnt-ping.c \
+ autoscale-threads.c mgmt-pmap.c
-libgfrpc_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+EXTRA_DIST = libgfrpc.sym
-noinst_HEADERS = rpcsvc.h rpc-transport.h xdr-common.h xdr-rpc.h xdr-rpcclnt.h \
- rpc-clnt.h rpcsvc-common.h protocol-common.h
+libgfrpc_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
+ $(top_builddir)/rpc/xdr/src/libgfxdr.la
+libgfrpc_la_LDFLAGS = -version-info $(LIBGFRPC_LT_VERSION) $(GF_LDFLAGS) \
+ -export-symbols $(top_srcdir)/rpc/rpc-lib/src/libgfrpc.sym
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) \
+libgfrpc_la_HEADERS = rpcsvc.h rpc-transport.h xdr-common.h xdr-rpc.h xdr-rpcclnt.h \
+ rpc-clnt.h rpcsvc-common.h protocol-common.h rpc-drc.h rpc-clnt-ping.h \
+ rpc-lib-messages.h
+
+libgfrpc_ladir = $(includedir)/glusterfs/rpc
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
-I$(top_srcdir)/rpc/xdr/src \
- -DRPC_TRANSPORTDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/rpc-transport\"
+ -I$(top_builddir)/rpc/xdr/src \
+ -DRPC_TRANSPORTDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/rpc-transport\" \
+ -I$(top_srcdir)/contrib/rbtree
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES = *~
diff --git a/rpc/rpc-lib/src/auth-glusterfs.c b/rpc/rpc-lib/src/auth-glusterfs.c
index 5f41c829672..69a96f7512f 100644
--- a/rpc/rpc-lib/src/auth-glusterfs.c
+++ b/rpc/rpc-lib/src/auth-glusterfs.c
@@ -8,235 +8,379 @@
cases as published by the Free Software Foundation.
*/
-
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "rpcsvc.h"
-#include "list.h"
-#include "dict.h"
+#include <glusterfs/dict.h>
#include "xdr-rpc.h"
#include "xdr-common.h"
#include "rpc-common-xdr.h"
+#include "glusterfs4-xdr.h"
/* V1 */
ssize_t
-xdr_to_glusterfs_auth (char *buf, struct auth_glusterfs_parms *req)
+xdr_to_glusterfs_auth(char *buf, struct auth_glusterfs_parms *req)
{
- XDR xdr;
- ssize_t ret = -1;
-
- if ((!buf) || (!req))
- return -1;
-
- xdrmem_create (&xdr, buf, sizeof (struct auth_glusterfs_parms),
- XDR_DECODE);
- if (!xdr_auth_glusterfs_parms (&xdr, req)) {
- gf_log ("", GF_LOG_WARNING,
- "failed to decode glusterfs parameters");
- ret = -1;
- goto ret;
- }
-
- ret = (((size_t)(&xdr)->x_private) - ((size_t)(&xdr)->x_base));
-ret:
- return ret;
+ XDR xdr;
+ ssize_t ret = -1;
+
+ if ((!buf) || (!req))
+ return -1;
+
+ xdrmem_create(&xdr, buf, sizeof(struct auth_glusterfs_parms), XDR_DECODE);
+ if (!xdr_auth_glusterfs_parms(&xdr, req)) {
+ gf_log("", GF_LOG_WARNING, "failed to decode glusterfs parameters");
+ ret = -1;
+ goto ret;
+ }
+ ret = (((size_t)(&xdr)->x_private) - ((size_t)(&xdr)->x_base));
+ret:
+ return ret;
}
int
-auth_glusterfs_request_init (rpcsvc_request_t *req, void *priv)
+auth_glusterfs_request_init(rpcsvc_request_t *req, void *priv)
{
- if (!req)
- return -1;
- memset (req->verf.authdata, 0, GF_MAX_AUTH_BYTES);
- req->verf.datalen = 0;
- req->verf.flavour = AUTH_NULL;
-
- return 0;
+ return 0;
}
-int auth_glusterfs_authenticate (rpcsvc_request_t *req, void *priv)
+int
+auth_glusterfs_authenticate(rpcsvc_request_t *req, void *priv)
{
- struct auth_glusterfs_parms au = {0,};
-
- int ret = RPCSVC_AUTH_REJECT;
- int gidcount = 0;
- int j = 0;
- int i = 0;
-
- if (!req)
- return ret;
-
- ret = xdr_to_glusterfs_auth (req->cred.authdata, &au);
- if (ret == -1) {
- gf_log ("", GF_LOG_WARNING,
- "failed to decode glusterfs credentials");
- ret = RPCSVC_AUTH_REJECT;
- goto err;
- }
-
- req->pid = au.pid;
- req->uid = au.uid;
- req->gid = au.gid;
- req->lk_owner.len = 8;
- {
- for (i = 0; i < req->lk_owner.len; i++, j += 8)
- req->lk_owner.data[i] = (char)((au.lk_owner >> j) & 0xff);
- }
- req->auxgidcount = au.ngrps;
-
- if (req->auxgidcount > 16) {
- gf_log ("", GF_LOG_WARNING,
- "more than 16 aux gids found, failing authentication");
- ret = RPCSVC_AUTH_REJECT;
- goto err;
- }
-
- for (gidcount = 0; gidcount < au.ngrps; ++gidcount)
- req->auxgids[gidcount] = au.groups[gidcount];
-
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "Auth Info: pid: %u, uid: %d"
- ", gid: %d, owner: %s",
- req->pid, req->uid, req->gid, lkowner_utoa (&req->lk_owner));
- ret = RPCSVC_AUTH_ACCEPT;
-err:
+ struct auth_glusterfs_parms au = {
+ 0,
+ };
+
+ int ret = RPCSVC_AUTH_REJECT;
+ int j = 0;
+ int i = 0;
+ int gidcount = 0;
+
+ if (!req)
return ret;
+
+ ret = xdr_to_glusterfs_auth(req->cred.authdata, &au);
+ if (ret == -1) {
+ gf_log("", GF_LOG_WARNING, "failed to decode glusterfs credentials");
+ ret = RPCSVC_AUTH_REJECT;
+ goto err;
+ }
+
+ req->pid = au.pid;
+ req->uid = au.uid;
+ req->gid = au.gid;
+ req->lk_owner.len = 8;
+ {
+ for (i = 0; i < req->lk_owner.len; i++, j += 8)
+ req->lk_owner.data[i] = (char)((au.lk_owner >> j) & 0xff);
+ }
+ req->auxgidcount = au.ngrps;
+
+ if (req->auxgidcount > 16) {
+ gf_log("", GF_LOG_WARNING,
+ "more than 16 aux gids found, failing authentication");
+ ret = RPCSVC_AUTH_REJECT;
+ goto err;
+ }
+
+ if (req->auxgidcount > SMALL_GROUP_COUNT) {
+ req->auxgidlarge = GF_CALLOC(req->auxgidcount, sizeof(req->auxgids[0]),
+ gf_common_mt_auxgids);
+ req->auxgids = req->auxgidlarge;
+ } else {
+ req->auxgids = req->auxgidsmall;
+ }
+
+ if (!req->auxgids) {
+ gf_log("auth-glusterfs", GF_LOG_WARNING, "cannot allocate gid list");
+ ret = RPCSVC_AUTH_REJECT;
+ goto err;
+ }
+
+ for (gidcount = 0; gidcount < au.ngrps; ++gidcount)
+ req->auxgids[gidcount] = au.groups[gidcount];
+
+ gf_log(GF_RPCSVC, GF_LOG_TRACE,
+ "Auth Info: pid: %u, uid: %d"
+ ", gid: %d, owner: %s",
+ req->pid, req->uid, req->gid, lkowner_utoa(&req->lk_owner));
+ ret = RPCSVC_AUTH_ACCEPT;
+err:
+ return ret;
}
rpcsvc_auth_ops_t auth_glusterfs_ops = {
- .transport_init = NULL,
- .request_init = auth_glusterfs_request_init,
- .authenticate = auth_glusterfs_authenticate
-};
-
-rpcsvc_auth_t rpcsvc_auth_glusterfs = {
- .authname = "AUTH_GLUSTERFS",
- .authnum = AUTH_GLUSTERFS,
- .authops = &auth_glusterfs_ops,
- .authprivate = NULL
-};
+ .transport_init = NULL,
+ .request_init = auth_glusterfs_request_init,
+ .authenticate = auth_glusterfs_authenticate};
+rpcsvc_auth_t rpcsvc_auth_glusterfs = {.authname = "AUTH_GLUSTERFS",
+ .authnum = AUTH_GLUSTERFS,
+ .authops = &auth_glusterfs_ops,
+ .authprivate = NULL};
rpcsvc_auth_t *
-rpcsvc_auth_glusterfs_init (rpcsvc_t *svc, dict_t *options)
+rpcsvc_auth_glusterfs_init(rpcsvc_t *svc, dict_t *options)
{
- return &rpcsvc_auth_glusterfs;
+ return &rpcsvc_auth_glusterfs;
}
/* V2 */
ssize_t
-xdr_to_glusterfs_auth_v2 (char *buf, struct auth_glusterfs_parms_v2 *req)
+xdr_to_glusterfs_auth_v2(char *buf, struct auth_glusterfs_parms_v2 *req)
{
- XDR xdr;
- ssize_t ret = -1;
+ XDR xdr;
+ ssize_t ret = -1;
- if ((!buf) || (!req))
- return -1;
+ if ((!buf) || (!req))
+ return -1;
- xdrmem_create (&xdr, buf, GF_MAX_AUTH_BYTES, XDR_DECODE);
- if (!xdr_auth_glusterfs_parms_v2 (&xdr, req)) {
- gf_log ("", GF_LOG_WARNING,
- "failed to decode glusterfs v2 parameters");
- ret = -1;
- goto ret;
- }
+ xdrmem_create(&xdr, buf, GF_MAX_AUTH_BYTES, XDR_DECODE);
+ if (!xdr_auth_glusterfs_parms_v2(&xdr, req)) {
+ gf_log("", GF_LOG_WARNING, "failed to decode glusterfs v2 parameters");
+ ret = -1;
+ goto ret;
+ }
- ret = (((size_t)(&xdr)->x_private) - ((size_t)(&xdr)->x_base));
+ ret = (((size_t)(&xdr)->x_private) - ((size_t)(&xdr)->x_base));
ret:
- return ret;
-
+ return ret;
}
int
-auth_glusterfs_v2_request_init (rpcsvc_request_t *req, void *priv)
+auth_glusterfs_v2_request_init(rpcsvc_request_t *req, void *priv)
{
- if (!req)
- return -1;
- memset (req->verf.authdata, 0, GF_MAX_AUTH_BYTES);
- req->verf.datalen = 0;
- req->verf.flavour = AUTH_NULL;
-
- return 0;
+ return 0;
}
-int auth_glusterfs_v2_authenticate (rpcsvc_request_t *req, void *priv)
+int
+auth_glusterfs_v2_authenticate(rpcsvc_request_t *req, void *priv)
{
- struct auth_glusterfs_parms_v2 au = {0,};
- int ret = RPCSVC_AUTH_REJECT;
- int i = 0;
-
- if (!req)
- return ret;
-
- ret = xdr_to_glusterfs_auth_v2 (req->cred.authdata, &au);
- if (ret == -1) {
- gf_log ("", GF_LOG_WARNING,
- "failed to decode glusterfs credentials");
- ret = RPCSVC_AUTH_REJECT;
- goto err;
- }
-
- req->pid = au.pid;
- req->uid = au.uid;
- req->gid = au.gid;
- req->lk_owner.len = au.lk_owner.lk_owner_len;
- req->auxgidcount = au.groups.groups_len;
-
- if (req->auxgidcount > GF_MAX_AUX_GROUPS) {
- gf_log ("", GF_LOG_WARNING,
- "more than max aux gids found (%d) , truncating it "
- "to %d and continuing", au.groups.groups_len,
- GF_MAX_AUX_GROUPS);
- req->auxgidcount = GF_MAX_AUX_GROUPS;
- }
-
- if (req->lk_owner.len > GF_MAX_LOCK_OWNER_LEN) {
- gf_log ("", GF_LOG_WARNING,
- "lkowner field > 1k, failing authentication");
- ret = RPCSVC_AUTH_REJECT;
- goto err;
- }
-
- for (i = 0; i < req->auxgidcount; ++i)
- req->auxgids[i] = au.groups.groups_val[i];
-
- for (i = 0; i < au.lk_owner.lk_owner_len; ++i)
- req->lk_owner.data[i] = au.lk_owner.lk_owner_val[i];
-
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "Auth Info: pid: %u, uid: %d"
- ", gid: %d, owner: %s",
- req->pid, req->uid, req->gid, lkowner_utoa (&req->lk_owner));
- ret = RPCSVC_AUTH_ACCEPT;
+ struct auth_glusterfs_parms_v2 au = {
+ 0,
+ };
+ int ret = RPCSVC_AUTH_REJECT;
+ int i = 0;
+ int max_groups = 0;
+ int max_lk_owner_len = 0;
+
+ if (!req)
+ return ret;
+
+ ret = xdr_to_glusterfs_auth_v2(req->cred.authdata, &au);
+ if (ret == -1) {
+ gf_log("", GF_LOG_WARNING, "failed to decode glusterfs credentials");
+ ret = RPCSVC_AUTH_REJECT;
+ goto err;
+ }
+
+ req->pid = au.pid;
+ req->uid = au.uid;
+ req->gid = au.gid;
+ req->lk_owner.len = au.lk_owner.lk_owner_len;
+ req->auxgidcount = au.groups.groups_len;
+
+ /* the number of groups and size of lk_owner depend on each other */
+ max_groups = GF_AUTH_GLUSTERFS_MAX_GROUPS(req->lk_owner.len,
+ AUTH_GLUSTERFS_v2);
+ max_lk_owner_len = GF_AUTH_GLUSTERFS_MAX_LKOWNER(req->auxgidcount,
+ AUTH_GLUSTERFS_v2);
+
+ if (req->auxgidcount > max_groups) {
+ gf_log("", GF_LOG_WARNING,
+ "more than max aux gids found (%d) , truncating it "
+ "to %d and continuing",
+ au.groups.groups_len, max_groups);
+ req->auxgidcount = max_groups;
+ }
+
+ if (req->lk_owner.len > max_lk_owner_len) {
+ gf_log("", GF_LOG_WARNING,
+ "lkowner field to big (%d), depends on the number of "
+ "groups (%d), failing authentication",
+ req->lk_owner.len, req->auxgidcount);
+ ret = RPCSVC_AUTH_REJECT;
+ goto err;
+ }
+
+ if (req->auxgidcount > SMALL_GROUP_COUNT) {
+ req->auxgidlarge = GF_CALLOC(req->auxgidcount, sizeof(req->auxgids[0]),
+ gf_common_mt_auxgids);
+ req->auxgids = req->auxgidlarge;
+ } else {
+ req->auxgids = req->auxgidsmall;
+ }
+
+ if (!req->auxgids) {
+ gf_log("auth-glusterfs-v2", GF_LOG_WARNING, "cannot allocate gid list");
+ ret = RPCSVC_AUTH_REJECT;
+ goto err;
+ }
+
+ for (i = 0; i < req->auxgidcount; ++i)
+ req->auxgids[i] = au.groups.groups_val[i];
+
+ for (i = 0; i < au.lk_owner.lk_owner_len; ++i)
+ req->lk_owner.data[i] = au.lk_owner.lk_owner_val[i];
+
+ gf_log(GF_RPCSVC, GF_LOG_TRACE,
+ "Auth Info: pid: %u, uid: %d"
+ ", gid: %d, owner: %s",
+ req->pid, req->uid, req->gid, lkowner_utoa(&req->lk_owner));
+ ret = RPCSVC_AUTH_ACCEPT;
err:
- /* TODO: instead use alloca() for these variables */
- if (au.groups.groups_val)
- free (au.groups.groups_val);
- if (au.lk_owner.lk_owner_val)
- free (au.lk_owner.lk_owner_val);
+ /* TODO: instead use alloca() for these variables */
+ free(au.groups.groups_val);
+ free(au.lk_owner.lk_owner_val);
- return ret;
+ return ret;
}
rpcsvc_auth_ops_t auth_glusterfs_ops_v2 = {
- .transport_init = NULL,
- .request_init = auth_glusterfs_v2_request_init,
- .authenticate = auth_glusterfs_v2_authenticate
-};
+ .transport_init = NULL,
+ .request_init = auth_glusterfs_v2_request_init,
+ .authenticate = auth_glusterfs_v2_authenticate};
+
+rpcsvc_auth_t rpcsvc_auth_glusterfs_v2 = {.authname = "AUTH_GLUSTERFS-v2",
+ .authnum = AUTH_GLUSTERFS_v2,
+ .authops = &auth_glusterfs_ops_v2,
+ .authprivate = NULL};
+
+rpcsvc_auth_t *
+rpcsvc_auth_glusterfs_v2_init(rpcsvc_t *svc, dict_t *options)
+{
+ return &rpcsvc_auth_glusterfs_v2;
+}
+
+/* V3 */
+
+ssize_t
+xdr_to_glusterfs_auth_v3(char *buf, struct auth_glusterfs_params_v3 *req)
+{
+ XDR xdr;
+ ssize_t ret = -1;
+
+ if ((!buf) || (!req))
+ return -1;
+
+ xdrmem_create(&xdr, buf, GF_MAX_AUTH_BYTES, XDR_DECODE);
+ if (!xdr_auth_glusterfs_params_v3(&xdr, req)) {
+ gf_log("", GF_LOG_WARNING, "failed to decode glusterfs v3 parameters");
+ ret = -1;
+ goto ret;
+ }
+
+ ret = (((size_t)(&xdr)->x_private) - ((size_t)(&xdr)->x_base));
+ret:
+ return ret;
+}
+
+int
+auth_glusterfs_v3_request_init(rpcsvc_request_t *req, void *priv)
+{
+ return 0;
+}
+
+int
+auth_glusterfs_v3_authenticate(rpcsvc_request_t *req, void *priv)
+{
+ struct auth_glusterfs_params_v3 au = {
+ 0,
+ };
+ int ret = RPCSVC_AUTH_REJECT;
+ int i = 0;
+ int max_groups = 0;
+ int max_lk_owner_len = 0;
+
+ if (!req)
+ return ret;
+
+ ret = xdr_to_glusterfs_auth_v3(req->cred.authdata, &au);
+ if (ret == -1) {
+ gf_log("", GF_LOG_WARNING, "failed to decode glusterfs credentials");
+ ret = RPCSVC_AUTH_REJECT;
+ goto err;
+ }
+
+ req->pid = au.pid;
+ req->uid = au.uid;
+ req->gid = au.gid;
+ req->lk_owner.len = au.lk_owner.lk_owner_len;
+ req->auxgidcount = au.groups.groups_len;
+
+ /* the number of groups and size of lk_owner depend on each other */
+ max_groups = GF_AUTH_GLUSTERFS_MAX_GROUPS(req->lk_owner.len,
+ AUTH_GLUSTERFS_v3);
+ max_lk_owner_len = GF_AUTH_GLUSTERFS_MAX_LKOWNER(req->auxgidcount,
+ AUTH_GLUSTERFS_v3);
+
+ if (req->auxgidcount > max_groups) {
+ gf_log("", GF_LOG_WARNING,
+ "more than max aux gids found (%d) , truncating it "
+ "to %d and continuing",
+ au.groups.groups_len, max_groups);
+ req->auxgidcount = max_groups;
+ }
+
+ if (req->lk_owner.len > max_lk_owner_len) {
+ gf_log("", GF_LOG_WARNING,
+ "lkowner field to big (%d), depends on the number of "
+ "groups (%d), failing authentication",
+ req->lk_owner.len, req->auxgidcount);
+ ret = RPCSVC_AUTH_REJECT;
+ goto err;
+ }
+
+ if (req->auxgidcount > SMALL_GROUP_COUNT) {
+ req->auxgidlarge = GF_CALLOC(req->auxgidcount, sizeof(req->auxgids[0]),
+ gf_common_mt_auxgids);
+ req->auxgids = req->auxgidlarge;
+ } else {
+ req->auxgids = req->auxgidsmall;
+ }
+
+ if (!req->auxgids) {
+ gf_log("auth-glusterfs-v2", GF_LOG_WARNING, "cannot allocate gid list");
+ ret = RPCSVC_AUTH_REJECT;
+ goto err;
+ }
+
+ for (i = 0; i < req->auxgidcount; ++i)
+ req->auxgids[i] = au.groups.groups_val[i];
+
+ for (i = 0; i < au.lk_owner.lk_owner_len; ++i)
+ req->lk_owner.data[i] = au.lk_owner.lk_owner_val[i];
+
+ /* All new things, starting glusterfs-4.0.0 */
+ req->flags = au.flags;
+ req->ctime.tv_sec = au.ctime_sec;
+ req->ctime.tv_nsec = au.ctime_nsec;
+
+ gf_log(GF_RPCSVC, GF_LOG_TRACE,
+ "Auth Info: pid: %u, uid: %d"
+ ", gid: %d, owner: %s, flags: %d",
+ req->pid, req->uid, req->gid, lkowner_utoa(&req->lk_owner),
+ req->flags);
+ ret = RPCSVC_AUTH_ACCEPT;
+err:
+ /* TODO: instead use alloca() for these variables */
+ free(au.groups.groups_val);
+ free(au.lk_owner.lk_owner_val);
+
+ return ret;
+}
-rpcsvc_auth_t rpcsvc_auth_glusterfs_v2 = {
- .authname = "AUTH_GLUSTERFS-v2",
- .authnum = AUTH_GLUSTERFS_v2,
- .authops = &auth_glusterfs_ops_v2,
- .authprivate = NULL
-};
+rpcsvc_auth_ops_t auth_glusterfs_ops_v3 = {
+ .transport_init = NULL,
+ .request_init = auth_glusterfs_v3_request_init,
+ .authenticate = auth_glusterfs_v3_authenticate};
+rpcsvc_auth_t rpcsvc_auth_glusterfs_v3 = {.authname = "AUTH_GLUSTERFS-v3",
+ .authnum = AUTH_GLUSTERFS_v3,
+ .authops = &auth_glusterfs_ops_v3,
+ .authprivate = NULL};
rpcsvc_auth_t *
-rpcsvc_auth_glusterfs_v2_init (rpcsvc_t *svc, dict_t *options)
+rpcsvc_auth_glusterfs_v3_init(rpcsvc_t *svc, dict_t *options)
{
- return &rpcsvc_auth_glusterfs_v2;
+ return &rpcsvc_auth_glusterfs_v3;
}
diff --git a/rpc/rpc-lib/src/auth-null.c b/rpc/rpc-lib/src/auth-null.c
index ebdcc8ff8e7..6d059b9da50 100644
--- a/rpc/rpc-lib/src/auth-null.c
+++ b/rpc/rpc-lib/src/auth-null.c
@@ -8,54 +8,33 @@
cases as published by the Free Software Foundation.
*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "rpcsvc.h"
-#include "list.h"
-#include "dict.h"
-
+#include <glusterfs/dict.h>
int
-auth_null_request_init (rpcsvc_request_t *req, void *priv)
+auth_null_request_init(rpcsvc_request_t *req, void *priv)
{
- if (!req)
- return -1;
-
- memset (req->cred.authdata, 0, GF_MAX_AUTH_BYTES);
- req->cred.datalen = 0;
-
- memset (req->verf.authdata, 0, GF_MAX_AUTH_BYTES);
- req->verf.datalen = 0;
-
- return 0;
+ return 0;
}
-int auth_null_authenticate (rpcsvc_request_t *req, void *priv)
+int
+auth_null_authenticate(rpcsvc_request_t *req, void *priv)
{
- /* Always succeed. */
- return RPCSVC_AUTH_ACCEPT;
+ /* Always succeed. */
+ return RPCSVC_AUTH_ACCEPT;
}
-rpcsvc_auth_ops_t auth_null_ops = {
- .transport_init = NULL,
- .request_init = auth_null_request_init,
- .authenticate = auth_null_authenticate
-};
-
-rpcsvc_auth_t rpcsvc_auth_null = {
- .authname = "AUTH_NULL",
- .authnum = AUTH_NULL,
- .authops = &auth_null_ops,
- .authprivate = NULL
-};
+rpcsvc_auth_ops_t auth_null_ops = {.transport_init = NULL,
+ .request_init = auth_null_request_init,
+ .authenticate = auth_null_authenticate};
+rpcsvc_auth_t rpcsvc_auth_null = {.authname = "AUTH_NULL",
+ .authnum = AUTH_NULL,
+ .authops = &auth_null_ops,
+ .authprivate = NULL};
rpcsvc_auth_t *
-rpcsvc_auth_null_init (rpcsvc_t *svc, dict_t *options)
+rpcsvc_auth_null_init(rpcsvc_t *svc, dict_t *options)
{
- return &rpcsvc_auth_null;
+ return &rpcsvc_auth_null;
}
diff --git a/rpc/rpc-lib/src/auth-unix.c b/rpc/rpc-lib/src/auth-unix.c
index 6251d60a896..61d475a5e84 100644
--- a/rpc/rpc-lib/src/auth-unix.c
+++ b/rpc/rpc-lib/src/auth-unix.c
@@ -8,75 +8,59 @@
cases as published by the Free Software Foundation.
*/
-
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "rpcsvc.h"
-#include "list.h"
-#include "dict.h"
+#include <glusterfs/dict.h>
#include "xdr-rpc.h"
-
int
-auth_unix_request_init (rpcsvc_request_t *req, void *priv)
+auth_unix_request_init(rpcsvc_request_t *req, void *priv)
{
- if (!req)
- return -1;
- memset (req->verf.authdata, 0, GF_MAX_AUTH_BYTES);
- req->verf.datalen = 0;
- req->verf.flavour = AUTH_NULL;
-
- return 0;
+ return 0;
}
-int auth_unix_authenticate (rpcsvc_request_t *req, void *priv)
+int
+auth_unix_authenticate(rpcsvc_request_t *req, void *priv)
{
- int ret = RPCSVC_AUTH_REJECT;
- struct authunix_parms aup;
- char machname[MAX_MACHINE_NAME];
+ int ret = RPCSVC_AUTH_REJECT;
+ struct authunix_parms aup;
+ char machname[MAX_MACHINE_NAME];
- if (!req)
- return ret;
-
- ret = xdr_to_auth_unix_cred (req->cred.authdata, req->cred.datalen,
- &aup, machname, req->auxgids);
- if (ret == -1) {
- gf_log ("", GF_LOG_WARNING, "failed to decode unix credentials");
- ret = RPCSVC_AUTH_REJECT;
- goto err;
- }
-
- req->uid = aup.aup_uid;
- req->gid = aup.aup_gid;
- req->auxgidcount = aup.aup_len;
+ if (!req)
+ return ret;
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "Auth Info: machine name: %s, uid: %d"
- ", gid: %d", machname, req->uid, req->gid);
- ret = RPCSVC_AUTH_ACCEPT;
+ req->auxgids = req->auxgidsmall;
+ ret = xdr_to_auth_unix_cred(req->cred.authdata, req->cred.datalen, &aup,
+ machname, req->auxgids);
+ if (ret == -1) {
+ gf_log("", GF_LOG_WARNING, "failed to decode unix credentials");
+ ret = RPCSVC_AUTH_REJECT;
+ goto err;
+ }
+
+ req->uid = aup.aup_uid;
+ req->gid = aup.aup_gid;
+ req->auxgidcount = aup.aup_len;
+
+ gf_log(GF_RPCSVC, GF_LOG_TRACE,
+ "Auth Info: machine name: %s, uid: %d"
+ ", gid: %d",
+ machname, req->uid, req->gid);
+ ret = RPCSVC_AUTH_ACCEPT;
err:
- return ret;
+ return ret;
}
-rpcsvc_auth_ops_t auth_unix_ops = {
- .transport_init = NULL,
- .request_init = auth_unix_request_init,
- .authenticate = auth_unix_authenticate
-};
-
-rpcsvc_auth_t rpcsvc_auth_unix = {
- .authname = "AUTH_UNIX",
- .authnum = AUTH_UNIX,
- .authops = &auth_unix_ops,
- .authprivate = NULL
-};
+rpcsvc_auth_ops_t auth_unix_ops = {.transport_init = NULL,
+ .request_init = auth_unix_request_init,
+ .authenticate = auth_unix_authenticate};
+rpcsvc_auth_t rpcsvc_auth_unix = {.authname = "AUTH_UNIX",
+ .authnum = AUTH_UNIX,
+ .authops = &auth_unix_ops,
+ .authprivate = NULL};
rpcsvc_auth_t *
-rpcsvc_auth_unix_init (rpcsvc_t *svc, dict_t *options)
+rpcsvc_auth_unix_init(rpcsvc_t *svc, dict_t *options)
{
- return &rpcsvc_auth_unix;
+ return &rpcsvc_auth_unix;
}
diff --git a/rpc/rpc-lib/src/autoscale-threads.c b/rpc/rpc-lib/src/autoscale-threads.c
new file mode 100644
index 00000000000..a954ae7a27a
--- /dev/null
+++ b/rpc/rpc-lib/src/autoscale-threads.c
@@ -0,0 +1,22 @@
+/*
+ Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/gf-event.h>
+#include "rpcsvc.h"
+
+void
+rpcsvc_autoscale_threads(glusterfs_ctx_t *ctx, rpcsvc_t *rpc, int incr)
+{
+ struct event_pool *pool = ctx->event_pool;
+ int thread_count = pool->eventthreadcount;
+
+ pool->auto_thread_count += incr;
+ (void)gf_event_reconfigure_threads(pool, thread_count + incr);
+}
diff --git a/rpc/rpc-lib/src/libgfrpc.sym b/rpc/rpc-lib/src/libgfrpc.sym
new file mode 100644
index 00000000000..e026d80259b
--- /dev/null
+++ b/rpc/rpc-lib/src/libgfrpc.sym
@@ -0,0 +1,68 @@
+is_rpc_clnt_disconnected
+rpcclnt_cbk_program_register
+rpc_clnt_cleanup_and_start
+rpc_clnt_connection_cleanup
+rpc_clnt_disable
+rpc_clnt_new
+rpc_clnt_reconfig
+rpc_clnt_reconnect
+rpc_clnt_reconnect_cleanup
+rpc_clnt_ref
+rpc_clnt_register_notify
+rpc_clnt_start
+rpc_clnt_submit
+rpc_clnt_unref
+rpc_reply_to_xdr
+rpcsvc_auth_array
+rpcsvc_auth_check
+rpcsvc_auth_reconf
+rpcsvc_auth_unix_auxgids
+rpcsvc_callback_submit
+rpcsvc_create_listener
+rpcsvc_create_listeners
+rpcsvc_drc_init
+rpcsvc_drc_priv
+rpcsvc_drc_reconfigure
+rpcsvc_get_program_vector_sizer
+rpcsvc_init
+rpcsvc_destroy
+rpcsvc_init_options
+rpcsvc_listener_destroy
+rpcsvc_program_register
+rpcsvc_program_register_portmap
+rpcsvc_program_register_rpcbind6
+rpcsvc_program_unregister
+rpcsvc_program_unregister_portmap
+rpcsvc_program_unregister_rpcbind6
+rpcsvc_reconfigure_options
+rpcsvc_register_notify
+rpcsvc_register_portmap_enabled
+rpcsvc_request_submit
+rpcsvc_set_outstanding_rpc_limit
+rpcsvc_set_throttle_on
+rpcsvc_submit_generic
+rpcsvc_submit_message
+rpcsvc_transport_peeraddr
+rpcsvc_transport_peername
+rpcsvc_transport_privport_check
+rpcsvc_transport_unix_options_build
+rpcsvc_transport_volume_allowed
+rpcsvc_transport_connect
+rpcsvc_transport_getpeeraddr
+rpcsvc_unregister_notify
+rpcsvc_volume_allowed
+rpc_transport_count
+rpc_transport_connect
+rpc_transport_disconnect
+rpc_transport_get_peeraddr
+rpc_transport_inet_options_build
+rpc_transport_keepalive_options_set
+rpc_transport_notify
+rpc_transport_pollin_alloc
+rpc_transport_pollin_destroy
+rpc_transport_ref
+rpc_transport_unix_options_build
+rpc_transport_unref
+rpc_clnt_mgmt_pmap_signout
+rpcsvc_autoscale_threads
+rpcsvc_statedump
diff --git a/rpc/rpc-lib/src/mgmt-pmap.c b/rpc/rpc-lib/src/mgmt-pmap.c
new file mode 100644
index 00000000000..25a7148e5a3
--- /dev/null
+++ b/rpc/rpc-lib/src/mgmt-pmap.c
@@ -0,0 +1,147 @@
+/*
+ Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "portmap-xdr.h"
+#include "protocol-common.h"
+#include "rpc-clnt.h"
+#include "xdr-generic.h"
+
+/* Defining a minimal RPC client program for portmap signout
+ */
+char *clnt_pmap_signout_procs[GF_PMAP_MAXVALUE] = {
+ [GF_PMAP_SIGNOUT] = "SIGNOUT",
+};
+
+rpc_clnt_prog_t clnt_pmap_signout_prog = {
+ .progname = "Gluster Portmap",
+ .prognum = GLUSTER_PMAP_PROGRAM,
+ .progver = GLUSTER_PMAP_VERSION,
+ .procnames = clnt_pmap_signout_procs,
+};
+
+static int
+mgmt_pmap_signout_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ pmap_signout_rsp rsp = {
+ 0,
+ };
+ int ret = 0;
+ call_frame_t *frame = NULL;
+
+ frame = myframe;
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_pmap_signout_rsp);
+ if (ret < 0) {
+ gf_log(THIS->name, GF_LOG_ERROR, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ if (-1 == rsp.op_ret) {
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "failed to register the port with glusterd");
+ goto out;
+ }
+out:
+ if (frame) {
+ STACK_DESTROY(frame->root);
+ }
+
+ return 0;
+}
+
+int
+rpc_clnt_mgmt_pmap_signout(glusterfs_ctx_t *ctx, char *brickname)
+{
+ int ret = 0;
+ pmap_signout_req req = {
+ 0,
+ };
+ call_frame_t *frame = NULL;
+ cmd_args_t *cmd_args = NULL;
+ char brick_name[PATH_MAX] = {
+ 0,
+ };
+ struct iovec iov = {
+ 0,
+ };
+ struct iobuf *iobuf = NULL;
+ struct iobref *iobref = NULL;
+ ssize_t xdr_size = 0;
+
+ frame = create_frame(THIS, ctx->pool);
+ cmd_args = &ctx->cmd_args;
+
+ if (!cmd_args->brick_port && (!cmd_args->brick_name || !brickname)) {
+ gf_log("fsd-mgmt", GF_LOG_DEBUG,
+ "portmapper signout arguments not given");
+ goto out;
+ }
+
+ if (cmd_args->volfile_server_transport &&
+ !strcmp(cmd_args->volfile_server_transport, "rdma")) {
+ snprintf(brick_name, sizeof(brick_name), "%s.rdma",
+ cmd_args->brick_name);
+ req.brick = brick_name;
+ } else {
+ if (brickname)
+ req.brick = brickname;
+ else
+ req.brick = cmd_args->brick_name;
+ }
+
+ req.port = cmd_args->brick_port;
+ req.rdma_port = cmd_args->brick_port2;
+
+ /* mgmt_submit_request is not available in libglusterfs.
+ * Need to serialize and submit manually.
+ */
+ iobref = iobref_new();
+ if (!iobref) {
+ goto out;
+ }
+
+ xdr_size = xdr_sizeof((xdrproc_t)xdr_pmap_signout_req, &req);
+ iobuf = iobuf_get2(ctx->iobuf_pool, xdr_size);
+ if (!iobuf) {
+ goto out;
+ };
+
+ iobref_add(iobref, iobuf);
+
+ iov.iov_base = iobuf->ptr;
+ iov.iov_len = iobuf_pagesize(iobuf);
+
+ /* Create the xdr payload */
+ ret = xdr_serialize_generic(iov, &req, (xdrproc_t)xdr_pmap_signout_req);
+ if (ret == -1) {
+ gf_log(THIS->name, GF_LOG_WARNING, "failed to create XDR payload");
+ goto out;
+ }
+ iov.iov_len = ret;
+
+ ret = rpc_clnt_submit(ctx->mgmt, &clnt_pmap_signout_prog, GF_PMAP_SIGNOUT,
+ mgmt_pmap_signout_cbk, &iov, 1, NULL, 0, iobref,
+ frame, NULL, 0, NULL, 0, NULL);
+out:
+ if (iobref)
+ iobref_unref(iobref);
+
+ if (iobuf)
+ iobuf_unref(iobuf);
+ return ret;
+}
diff --git a/rpc/rpc-lib/src/protocol-common.h b/rpc/rpc-lib/src/protocol-common.h
index f23a83f927c..0cb5862e9a9 100644
--- a/rpc/rpc-lib/src/protocol-common.h
+++ b/rpc/rpc-lib/src/protocol-common.h
@@ -12,219 +12,371 @@
#define _PROTOCOL_COMMON_H
enum gf_fop_procnum {
- GFS3_OP_NULL, /* 0 */
- GFS3_OP_STAT,
- GFS3_OP_READLINK,
- GFS3_OP_MKNOD,
- GFS3_OP_MKDIR,
- GFS3_OP_UNLINK,
- GFS3_OP_RMDIR,
- GFS3_OP_SYMLINK,
- GFS3_OP_RENAME,
- GFS3_OP_LINK,
- GFS3_OP_TRUNCATE,
- GFS3_OP_OPEN,
- GFS3_OP_READ,
- GFS3_OP_WRITE,
- GFS3_OP_STATFS,
- GFS3_OP_FLUSH,
- GFS3_OP_FSYNC,
- GFS3_OP_SETXATTR,
- GFS3_OP_GETXATTR,
- GFS3_OP_REMOVEXATTR,
- GFS3_OP_OPENDIR,
- GFS3_OP_FSYNCDIR,
- GFS3_OP_ACCESS,
- GFS3_OP_CREATE,
- GFS3_OP_FTRUNCATE,
- GFS3_OP_FSTAT,
- GFS3_OP_LK,
- GFS3_OP_LOOKUP,
- GFS3_OP_READDIR,
- GFS3_OP_INODELK,
- GFS3_OP_FINODELK,
- GFS3_OP_ENTRYLK,
- GFS3_OP_FENTRYLK,
- GFS3_OP_XATTROP,
- GFS3_OP_FXATTROP,
- GFS3_OP_FGETXATTR,
- GFS3_OP_FSETXATTR,
- GFS3_OP_RCHECKSUM,
- GFS3_OP_SETATTR,
- GFS3_OP_FSETATTR,
- GFS3_OP_READDIRP,
- GFS3_OP_RELEASE,
- GFS3_OP_RELEASEDIR,
- GFS3_OP_FREMOVEXATTR,
- GFS3_OP_MAXVALUE,
-} ;
+ GFS3_OP_NULL, /* 0 */
+ GFS3_OP_STAT,
+ GFS3_OP_READLINK,
+ GFS3_OP_MKNOD,
+ GFS3_OP_MKDIR,
+ GFS3_OP_UNLINK,
+ GFS3_OP_RMDIR,
+ GFS3_OP_SYMLINK,
+ GFS3_OP_RENAME,
+ GFS3_OP_LINK,
+ GFS3_OP_TRUNCATE,
+ GFS3_OP_OPEN,
+ GFS3_OP_READ,
+ GFS3_OP_WRITE,
+ GFS3_OP_STATFS,
+ GFS3_OP_FLUSH,
+ GFS3_OP_FSYNC,
+ GFS3_OP_SETXATTR,
+ GFS3_OP_GETXATTR,
+ GFS3_OP_REMOVEXATTR,
+ GFS3_OP_OPENDIR,
+ GFS3_OP_FSYNCDIR,
+ GFS3_OP_ACCESS,
+ GFS3_OP_CREATE,
+ GFS3_OP_FTRUNCATE,
+ GFS3_OP_FSTAT,
+ GFS3_OP_LK,
+ GFS3_OP_LOOKUP,
+ GFS3_OP_READDIR,
+ GFS3_OP_INODELK,
+ GFS3_OP_FINODELK,
+ GFS3_OP_ENTRYLK,
+ GFS3_OP_FENTRYLK,
+ GFS3_OP_XATTROP,
+ GFS3_OP_FXATTROP,
+ GFS3_OP_FGETXATTR,
+ GFS3_OP_FSETXATTR,
+ GFS3_OP_RCHECKSUM,
+ GFS3_OP_SETATTR,
+ GFS3_OP_FSETATTR,
+ GFS3_OP_READDIRP,
+ GFS3_OP_RELEASE,
+ GFS3_OP_RELEASEDIR,
+ GFS3_OP_FREMOVEXATTR,
+ GFS3_OP_FALLOCATE,
+ GFS3_OP_DISCARD,
+ GFS3_OP_ZEROFILL,
+ GFS3_OP_IPC,
+ GFS3_OP_SEEK,
+ GFS3_OP_COMPOUND,
+ GFS3_OP_LEASE,
+ GFS3_OP_GETACTIVELK,
+ GFS3_OP_SETACTIVELK,
+ GFS3_OP_ICREATE,
+ GFS3_OP_NAMELINK,
+ GFS3_OP_PUT,
+ GFS3_OP_COPY_FILE_RANGE,
+ GFS3_OP_MAXVALUE,
+};
enum gf_handshake_procnum {
- GF_HNDSK_NULL,
- GF_HNDSK_SETVOLUME,
- GF_HNDSK_GETSPEC,
- GF_HNDSK_PING,
- GF_HNDSK_SET_LK_VER,
- GF_HNDSK_EVENT_NOTIFY,
- GF_HNDSK_MAXVALUE,
+ GF_HNDSK_NULL,
+ GF_HNDSK_SETVOLUME,
+ GF_HNDSK_GETSPEC,
+ GF_HNDSK_PING,
+ GF_HNDSK_SET_LK_VER,
+ GF_HNDSK_EVENT_NOTIFY,
+ GF_HNDSK_GET_VOLUME_INFO,
+ GF_HNDSK_GET_SNAPSHOT_INFO,
+ GF_HNDSK_MAXVALUE,
};
enum gf_pmap_procnum {
- GF_PMAP_NULL = 0,
- GF_PMAP_PORTBYBRICK,
- GF_PMAP_BRICKBYPORT,
- GF_PMAP_SIGNUP,
- GF_PMAP_SIGNIN,
- GF_PMAP_SIGNOUT,
- GF_PMAP_MAXVALUE,
+ GF_PMAP_NULL = 0,
+ GF_PMAP_PORTBYBRICK,
+ GF_PMAP_BRICKBYPORT,
+ /*
+ * SIGNUP is not used, and shouldn't be used. It was kept here only
+ * to avoid changing the numbers for things that come after it in this
+ * list.
+ */
+ GF_PMAP_SIGNUP,
+ GF_PMAP_SIGNIN,
+ GF_PMAP_SIGNOUT,
+ GF_PMAP_MAXVALUE,
+};
+
+enum gf_aggregator_procnum {
+ GF_AGGREGATOR_NULL = 0,
+ GF_AGGREGATOR_LOOKUP,
+ GF_AGGREGATOR_GETLIMIT,
+ GF_AGGREGATOR_MAXVALUE,
};
enum gf_pmap_port_type {
- GF_PMAP_PORT_FREE = 0,
- GF_PMAP_PORT_FOREIGN,
- GF_PMAP_PORT_LEASED,
- GF_PMAP_PORT_NONE,
- GF_PMAP_PORT_BRICKSERVER,
+ GF_PMAP_PORT_FREE = 0,
+ GF_PMAP_PORT_FOREIGN, /* it actually means, not sure who is using it, but it
+ is in-use */
+ GF_PMAP_PORT_LEASED,
+ GF_PMAP_PORT_ANY,
+ GF_PMAP_PORT_BRICKSERVER, /* port used by brick process */
};
typedef enum gf_pmap_port_type gf_pmap_port_type_t;
enum gf_probe_resp {
- GF_PROBE_SUCCESS,
- GF_PROBE_LOCALHOST,
- GF_PROBE_FRIEND,
- GF_PROBE_ANOTHER_CLUSTER,
- GF_PROBE_VOLUME_CONFLICT,
- GF_PROBE_UNKNOWN_PEER,
- GF_PROBE_ADD_FAILED,
- GF_PROBE_QUORUM_NOT_MET
+ GF_PROBE_SUCCESS,
+ GF_PROBE_LOCALHOST,
+ GF_PROBE_FRIEND,
+ GF_PROBE_ANOTHER_CLUSTER,
+ GF_PROBE_VOLUME_CONFLICT,
+ GF_PROBE_SAME_UUID,
+ GF_PROBE_UNKNOWN_PEER,
+ GF_PROBE_ADD_FAILED,
+ GF_PROBE_QUORUM_NOT_MET,
+ GF_PROBE_MISSED_SNAP_CONFLICT,
+ GF_PROBE_SNAP_CONFLICT,
+ GF_PROBE_FRIEND_DETACHING,
};
enum gf_deprobe_resp {
- GF_DEPROBE_SUCCESS,
- GF_DEPROBE_LOCALHOST,
- GF_DEPROBE_NOT_FRIEND,
- GF_DEPROBE_BRICK_EXIST,
- GF_DEPROBE_FRIEND_DOWN,
- GF_DEPROBE_QUORUM_NOT_MET,
+ GF_DEPROBE_SUCCESS,
+ GF_DEPROBE_LOCALHOST,
+ GF_DEPROBE_NOT_FRIEND,
+ GF_DEPROBE_BRICK_EXIST,
+ GF_DEPROBE_FRIEND_DOWN,
+ GF_DEPROBE_QUORUM_NOT_MET,
+ GF_DEPROBE_FRIEND_DETACHING,
+ GF_DEPROBE_SNAP_BRICK_EXIST,
};
enum gf_cbk_procnum {
- GF_CBK_NULL = 0,
- GF_CBK_FETCHSPEC,
- GF_CBK_INO_FLUSH,
- GF_CBK_EVENT_NOTIFY,
- GF_CBK_MAXVALUE,
+ GF_CBK_NULL = 0,
+ GF_CBK_FETCHSPEC,
+ GF_CBK_INO_FLUSH,
+ GF_CBK_EVENT_NOTIFY,
+ GF_CBK_GET_SNAPS,
+ GF_CBK_CACHE_INVALIDATION,
+ GF_CBK_CHILD_UP,
+ GF_CBK_CHILD_DOWN,
+ GF_CBK_RECALL_LEASE,
+ GF_CBK_STATEDUMP,
+ GF_CBK_INODELK_CONTENTION,
+ GF_CBK_ENTRYLK_CONTENTION,
+ GF_CBK_MAXVALUE,
};
enum gluster_cli_procnum {
- GLUSTER_CLI_NULL, /* 0 */
- GLUSTER_CLI_PROBE,
- GLUSTER_CLI_DEPROBE,
- GLUSTER_CLI_LIST_FRIENDS,
- GLUSTER_CLI_CREATE_VOLUME,
- GLUSTER_CLI_GET_VOLUME,
- GLUSTER_CLI_GET_NEXT_VOLUME,
- GLUSTER_CLI_DELETE_VOLUME,
- GLUSTER_CLI_START_VOLUME,
- GLUSTER_CLI_STOP_VOLUME,
- GLUSTER_CLI_RENAME_VOLUME,
- GLUSTER_CLI_DEFRAG_VOLUME,
- GLUSTER_CLI_SET_VOLUME,
- GLUSTER_CLI_ADD_BRICK,
- GLUSTER_CLI_REMOVE_BRICK,
- GLUSTER_CLI_REPLACE_BRICK,
- GLUSTER_CLI_LOG_ROTATE,
- GLUSTER_CLI_GETSPEC,
- GLUSTER_CLI_PMAP_PORTBYBRICK,
- GLUSTER_CLI_SYNC_VOLUME,
- GLUSTER_CLI_RESET_VOLUME,
- GLUSTER_CLI_FSM_LOG,
- GLUSTER_CLI_GSYNC_SET,
- GLUSTER_CLI_PROFILE_VOLUME,
- GLUSTER_CLI_QUOTA,
- GLUSTER_CLI_TOP_VOLUME,
- GLUSTER_CLI_GETWD,
- GLUSTER_CLI_STATUS_VOLUME,
- GLUSTER_CLI_STATUS_ALL,
- GLUSTER_CLI_MOUNT,
- GLUSTER_CLI_UMOUNT,
- GLUSTER_CLI_HEAL_VOLUME,
- GLUSTER_CLI_STATEDUMP_VOLUME,
- GLUSTER_CLI_LIST_VOLUME,
- GLUSTER_CLI_CLRLOCKS_VOLUME,
- GLUSTER_CLI_MAXVALUE,
+ GLUSTER_CLI_NULL, /* 0 */
+ GLUSTER_CLI_PROBE,
+ GLUSTER_CLI_DEPROBE,
+ GLUSTER_CLI_LIST_FRIENDS,
+ GLUSTER_CLI_CREATE_VOLUME,
+ GLUSTER_CLI_GET_VOLUME,
+ GLUSTER_CLI_GET_NEXT_VOLUME,
+ GLUSTER_CLI_DELETE_VOLUME,
+ GLUSTER_CLI_START_VOLUME,
+ GLUSTER_CLI_STOP_VOLUME,
+ GLUSTER_CLI_RENAME_VOLUME,
+ GLUSTER_CLI_DEFRAG_VOLUME,
+ GLUSTER_CLI_SET_VOLUME,
+ GLUSTER_CLI_ADD_BRICK,
+ GLUSTER_CLI_REMOVE_BRICK,
+ GLUSTER_CLI_REPLACE_BRICK,
+ GLUSTER_CLI_LOG_ROTATE,
+ GLUSTER_CLI_GETSPEC,
+ GLUSTER_CLI_PMAP_PORTBYBRICK,
+ GLUSTER_CLI_SYNC_VOLUME,
+ GLUSTER_CLI_RESET_VOLUME,
+ GLUSTER_CLI_FSM_LOG,
+ GLUSTER_CLI_GSYNC_SET,
+ GLUSTER_CLI_PROFILE_VOLUME,
+ GLUSTER_CLI_QUOTA,
+ GLUSTER_CLI_TOP_VOLUME,
+ GLUSTER_CLI_GETWD,
+ GLUSTER_CLI_STATUS_VOLUME,
+ GLUSTER_CLI_STATUS_ALL,
+ GLUSTER_CLI_MOUNT,
+ GLUSTER_CLI_UMOUNT,
+ GLUSTER_CLI_HEAL_VOLUME,
+ GLUSTER_CLI_STATEDUMP_VOLUME,
+ GLUSTER_CLI_LIST_VOLUME,
+ GLUSTER_CLI_CLRLOCKS_VOLUME,
+ GLUSTER_CLI_UUID_RESET,
+ GLUSTER_CLI_UUID_GET,
+ GLUSTER_CLI_COPY_FILE,
+ GLUSTER_CLI_SYS_EXEC,
+ GLUSTER_CLI_SNAP,
+ GLUSTER_CLI_BARRIER_VOLUME,
+ GLUSTER_CLI_GET_VOL_OPT,
+ GLUSTER_CLI_GANESHA,
+ GLUSTER_CLI_BITROT,
+ GLUSTER_CLI_ATTACH_TIER,
+ GLUSTER_CLI_TIER,
+ GLUSTER_CLI_GET_STATE,
+ GLUSTER_CLI_RESET_BRICK,
+ GLUSTER_CLI_REMOVE_TIER_BRICK,
+ GLUSTER_CLI_ADD_TIER_BRICK,
+ GLUSTER_CLI_MAXVALUE,
};
enum glusterd_mgmt_procnum {
- GLUSTERD_MGMT_NULL, /* 0 */
- GLUSTERD_MGMT_CLUSTER_LOCK,
- GLUSTERD_MGMT_CLUSTER_UNLOCK,
- GLUSTERD_MGMT_STAGE_OP,
- GLUSTERD_MGMT_COMMIT_OP,
- GLUSTERD_MGMT_MAXVALUE,
+ GLUSTERD_MGMT_NULL, /* 0 */
+ GLUSTERD_MGMT_CLUSTER_LOCK,
+ GLUSTERD_MGMT_CLUSTER_UNLOCK,
+ GLUSTERD_MGMT_STAGE_OP,
+ GLUSTERD_MGMT_COMMIT_OP,
+ GLUSTERD_MGMT_MAXVALUE,
};
enum glusterd_friend_procnum {
- GLUSTERD_FRIEND_NULL, /* 0 */
- GLUSTERD_PROBE_QUERY,
- GLUSTERD_FRIEND_ADD,
- GLUSTERD_FRIEND_REMOVE,
- GLUSTERD_FRIEND_UPDATE,
- GLUSTERD_FRIEND_MAXVALUE,
+ GLUSTERD_FRIEND_NULL, /* 0 */
+ GLUSTERD_PROBE_QUERY,
+ GLUSTERD_FRIEND_ADD,
+ GLUSTERD_FRIEND_REMOVE,
+ GLUSTERD_FRIEND_UPDATE,
+ GLUSTERD_FRIEND_MAXVALUE,
};
enum glusterd_brick_procnum {
- GLUSTERD_BRICK_NULL, /* 0 */
- GLUSTERD_BRICK_TERMINATE,
- GLUSTERD_BRICK_XLATOR_INFO,
- GLUSTERD_BRICK_XLATOR_OP,
- GLUSTERD_BRICK_STATUS,
- GLUSTERD_BRICK_OP,
- GLUSTERD_BRICK_XLATOR_DEFRAG,
- GLUSTERD_NODE_PROFILE,
- GLUSTERD_NODE_STATUS,
- GLUSTERD_BRICK_MAXVALUE,
+ GLUSTERD_BRICK_NULL, /* 0 */
+ GLUSTERD_BRICK_TERMINATE,
+ GLUSTERD_BRICK_XLATOR_INFO,
+ GLUSTERD_BRICK_XLATOR_OP,
+ GLUSTERD_BRICK_STATUS,
+ GLUSTERD_BRICK_OP,
+ GLUSTERD_BRICK_XLATOR_DEFRAG,
+ GLUSTERD_NODE_PROFILE,
+ GLUSTERD_NODE_STATUS,
+ GLUSTERD_VOLUME_BARRIER_OP,
+ GLUSTERD_BRICK_BARRIER,
+ GLUSTERD_NODE_BITROT,
+ GLUSTERD_BRICK_ATTACH,
+ GLUSTERD_DUMP_METRICS,
+ GLUSTERD_SVC_ATTACH,
+ GLUSTERD_SVC_DETACH,
+ GLUSTERD_BRICK_MAXVALUE,
+};
+
+enum glusterd_mgmt_hndsk_procnum {
+ GD_MGMT_HNDSK_NULL,
+ GD_MGMT_HNDSK_VERSIONS,
+ GD_MGMT_HNDSK_VERSIONS_ACK,
+ GD_MGMT_HNDSK_MAXVALUE,
};
typedef enum {
- GF_AFR_OP_INVALID,
- GF_AFR_OP_HEAL_INDEX,
- GF_AFR_OP_HEAL_FULL,
- GF_AFR_OP_INDEX_SUMMARY,
- GF_AFR_OP_HEALED_FILES,
- GF_AFR_OP_HEAL_FAILED_FILES,
- GF_AFR_OP_SPLIT_BRAIN_FILES
-} gf_xl_afr_op_t ;
+ GF_SHD_OP_INVALID,
+ GF_SHD_OP_HEAL_INDEX,
+ GF_SHD_OP_HEAL_FULL,
+ GF_SHD_OP_INDEX_SUMMARY,
+ GF_SHD_OP_HEALED_FILES,
+ GF_SHD_OP_HEAL_FAILED_FILES,
+ GF_SHD_OP_SPLIT_BRAIN_FILES,
+ GF_SHD_OP_STATISTICS,
+ GF_SHD_OP_STATISTICS_HEAL_COUNT,
+ GF_SHD_OP_STATISTICS_HEAL_COUNT_PER_REPLICA,
+ GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE,
+ GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK,
+ GF_SHD_OP_HEAL_ENABLE,
+ GF_SHD_OP_HEAL_DISABLE,
+ GF_SHD_OP_SBRAIN_HEAL_FROM_LATEST_MTIME,
+ GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE,
+ GF_SHD_OP_GRANULAR_ENTRY_HEAL_DISABLE,
+ GF_SHD_OP_HEAL_SUMMARY,
+} gf_xl_afr_op_t;
-enum gf_hdsk_event_notify_op {
- GF_EN_DEFRAG_STATUS,
- GF_EN_MAX,
+struct gf_gsync_detailed_status_ {
+ char node[NAME_MAX];
+ char master[NAME_MAX];
+ char brick[PATH_MAX];
+ char slave_user[NAME_MAX];
+ char slave[NAME_MAX];
+ char slave_node[NAME_MAX];
+ char worker_status[NAME_MAX];
+ char crawl_status[NAME_MAX];
+ char last_synced[NAME_MAX];
+ char last_synced_utc[NAME_MAX];
+ char entry[NAME_MAX];
+ char data[NAME_MAX];
+ char meta[NAME_MAX];
+ char failures[NAME_MAX];
+ char checkpoint_time[NAME_MAX];
+ char checkpoint_time_utc[NAME_MAX];
+ char checkpoint_completed[NAME_MAX];
+ char checkpoint_completion_time[NAME_MAX];
+ char checkpoint_completion_time_utc[NAME_MAX];
+ char brick_host_uuid[NAME_MAX];
+ char slavekey[NAME_MAX];
+ char session_slave[NAME_MAX];
};
-#define GLUSTER_HNDSK_PROGRAM 14398633 /* Completely random */
-#define GLUSTER_HNDSK_VERSION 2 /* 0.0.1 */
+enum glusterd_mgmt_v3_procnum {
+ GLUSTERD_MGMT_V3_NULL, /* 0 */
+ GLUSTERD_MGMT_V3_LOCK,
+ GLUSTERD_MGMT_V3_PRE_VALIDATE,
+ GLUSTERD_MGMT_V3_BRICK_OP,
+ GLUSTERD_MGMT_V3_COMMIT,
+ GLUSTERD_MGMT_V3_POST_COMMIT,
+ GLUSTERD_MGMT_V3_POST_VALIDATE,
+ GLUSTERD_MGMT_V3_UNLOCK,
+ GLUSTERD_MGMT_V3_MAXVALUE,
+};
+
+typedef struct gf_gsync_detailed_status_ gf_gsync_status_t;
-#define GLUSTER_PMAP_PROGRAM 34123456
-#define GLUSTER_PMAP_VERSION 1
+enum gf_get_volume_info_type {
+ GF_GET_VOLUME_NONE, /* 0 */
+ GF_GET_VOLUME_UUID
+};
-#define GLUSTER_CBK_PROGRAM 52743234 /* Completely random */
-#define GLUSTER_CBK_VERSION 1 /* 0.0.1 */
+typedef enum gf_get_volume_info_type gf_get_volume_info_type;
+
+enum gf_get_snapshot_info_type {
+ GF_GET_SNAPSHOT_LIST,
+};
+typedef enum gf_get_snapshot_info_type gf_get_snapshot_info_type;
-#define GLUSTER3_1_FOP_PROGRAM 1298437 /* Completely random */
-#define GLUSTER3_1_FOP_VERSION 330 /* 3.3.0 */
-#define GLUSTER3_1_FOP_PROCCNT GFS3_OP_MAXVALUE
+enum gf_getspec_flags_type { GF_GETSPEC_FLAG_SERVERS_LIST = 1 };
+typedef enum gf_getspec_flags_type gf_getspec_flags_type;
+
+#define GLUSTER_HNDSK_PROGRAM 14398633 /* Completely random */
+#define GLUSTER_HNDSK_VERSION 2 /* 0.0.2 */
+
+#define GLUSTER_PMAP_PROGRAM 34123456
+#define GLUSTER_PMAP_VERSION 1
+
+#define GLUSTER_CBK_PROGRAM 52743234 /* Completely random */
+#define GLUSTER_CBK_VERSION 1 /* 0.0.1 */
+
+#define GLUSTER_FOP_PROGRAM 1298437 /* Completely random */
+#define GLUSTER_FOP_VERSION 330 /* 3.3.0 */
+#define GLUSTER_FOP_PROCCNT GFS3_OP_MAXVALUE
+
+#define GLUSTER_FOP_VERSION_v2 400 /* 4.0.0 */
+
+/* Aggregator */
+#define GLUSTER_AGGREGATOR_PROGRAM 29852134 /* Completely random */
+#define GLUSTER_AGGREGATOR_VERSION 1
/* Second version */
-#define GD_MGMT_PROGRAM 1238433 /* Completely random */
-#define GD_MGMT_VERSION 2 /* 0.0.2 */
+#define GD_MGMT_PROGRAM 1238433 /* Completely random */
+#define GD_MGMT_VERSION 2 /* 0.0.2 */
+
+#define GD_FRIEND_PROGRAM 1238437 /* Completely random */
+#define GD_FRIEND_VERSION 2 /* 0.0.2 */
+
+#define GLUSTER_CLI_PROGRAM 1238463 /* Completely random */
+#define GLUSTER_CLI_VERSION 2 /* 0.0.2 */
+
+#define GD_BRICK_PROGRAM 4867634 /*Completely random*/
+#define GD_BRICK_VERSION 2
-#define GD_FRIEND_PROGRAM 1238437 /* Completely random */
-#define GD_FRIEND_VERSION 2 /* 0.0.2 */
+/* Third version */
+#define GD_MGMT_V3_VERSION 3
-#define GLUSTER_CLI_PROGRAM 1238463 /* Completely random */
-#define GLUSTER_CLI_VERSION 2 /* 0.0.2 */
+/* OP-VERSION handshake */
+#define GD_MGMT_HNDSK_PROGRAM 1239873 /* Completely random */
+#define GD_MGMT_HNDSK_VERSION 1
-#define GD_BRICK_PROGRAM 4867634 /*Completely random*/
-#define GD_BRICK_VERSION 2
+#define GD_VOLUME_NAME_MAX \
+ ((NAME_MAX + 1) - 5) /* Maximum size of volume name */
+#define GD_VOLUME_NAME_MAX_TIER \
+ (GD_VOLUME_NAME_MAX + 5) /* +5 needed for '-hot' \
+ and '-cold' suffixes*/
+#define GLUSTER_PROCESS_UUID_FMT \
+ "CTX_ID:%s-GRAPH_ID:%d-PID:%d-HOST:%s-PC_NAME:%s-RECON_NO:%s"
#endif /* !_PROTOCOL_COMMON_H */
diff --git a/rpc/rpc-lib/src/rpc-clnt-ping.c b/rpc/rpc-lib/src/rpc-clnt-ping.c
new file mode 100644
index 00000000000..31f17841bea
--- /dev/null
+++ b/rpc/rpc-lib/src/rpc-clnt-ping.c
@@ -0,0 +1,357 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "rpc-clnt.h"
+#include "rpc-clnt-ping.h"
+#include <glusterfs/byte-order.h>
+#include "xdr-rpcclnt.h"
+#include "rpc-transport.h"
+#include "protocol-common.h"
+#include <glusterfs/mem-pool.h>
+#include "xdr-rpc.h"
+#include "rpc-common-xdr.h"
+#include <glusterfs/timespec.h>
+
+char *clnt_ping_procs[GF_DUMP_MAXVALUE] = {
+ [GF_DUMP_PING] = "NULL",
+};
+struct rpc_clnt_program clnt_ping_prog = {
+ .progname = "GF-DUMP",
+ .prognum = GLUSTER_DUMP_PROGRAM,
+ .progver = GLUSTER_DUMP_VERSION,
+ .procnames = clnt_ping_procs,
+};
+
+struct ping_local {
+ struct rpc_clnt *rpc;
+ struct timespec submit_time;
+};
+
+/* Must be called under conn->lock */
+static int
+__rpc_clnt_rearm_ping_timer(struct rpc_clnt *rpc, gf_timer_cbk_t cbk)
+{
+ rpc_clnt_connection_t *conn = &rpc->conn;
+ rpc_transport_t *trans = conn->trans;
+ struct timespec timeout = {
+ 0,
+ };
+ gf_timer_t *timer = NULL;
+
+ if (conn->ping_timer) {
+ gf_log_callingfn("", GF_LOG_CRITICAL,
+ "%s: ping timer event already scheduled",
+ conn->trans->peerinfo.identifier);
+ return -1;
+ }
+
+ timeout.tv_sec = conn->ping_timeout;
+ timeout.tv_nsec = 0;
+
+ rpc_clnt_ref(rpc);
+ timer = gf_timer_call_after(rpc->ctx, timeout, cbk, (void *)rpc);
+ if (timer == NULL) {
+ gf_log(trans->name, GF_LOG_WARNING, "unable to setup ping timer");
+
+ /* This unref can't be the last. We just took a ref few lines
+ * above. So this can be performed under conn->lock. */
+ rpc_clnt_unref(rpc);
+ conn->ping_started = 0;
+ return -1;
+ }
+
+ conn->ping_timer = timer;
+ conn->ping_started = 1;
+ return 0;
+}
+
+/* Must be called under conn->lock */
+int
+rpc_clnt_remove_ping_timer_locked(struct rpc_clnt *rpc)
+{
+ rpc_clnt_connection_t *conn = &rpc->conn;
+ gf_timer_t *timer = NULL;
+
+ if (conn->ping_timer) {
+ timer = conn->ping_timer;
+ conn->ping_timer = NULL;
+ gf_timer_call_cancel(rpc->ctx, timer);
+ conn->ping_started = 0;
+ return 1;
+ }
+
+ /* This is to account for rpc_clnt_disable that might have set
+ * conn->trans to NULL. */
+ if (conn->trans)
+ gf_log_callingfn("", GF_LOG_DEBUG,
+ "%s: ping timer event "
+ "already removed",
+ conn->trans->peerinfo.identifier);
+
+ return 0;
+}
+
+static void
+rpc_clnt_start_ping(void *rpc_ptr);
+
+void
+rpc_clnt_ping_timer_expired(void *rpc_ptr)
+{
+ struct rpc_clnt *rpc = NULL;
+ rpc_transport_t *trans = NULL;
+ rpc_clnt_connection_t *conn = NULL;
+ int disconnect = 0;
+ struct timespec current = {
+ 0,
+ };
+ int unref = 0;
+
+ rpc = (struct rpc_clnt *)rpc_ptr;
+ conn = &rpc->conn;
+ trans = conn->trans;
+
+ if (!trans) {
+ gf_log("ping-timer", GF_LOG_WARNING, "transport not initialized");
+ goto out;
+ }
+
+ timespec_now_realtime(&current);
+ pthread_mutex_lock(&conn->lock);
+ {
+ unref = rpc_clnt_remove_ping_timer_locked(rpc);
+
+ if (((current.tv_sec - conn->last_received.tv_sec) <
+ conn->ping_timeout) ||
+ ((current.tv_sec - conn->last_sent.tv_sec) < conn->ping_timeout)) {
+ gf_log(trans->name, GF_LOG_TRACE,
+ "ping timer expired but transport activity "
+ "detected - not bailing transport");
+ if (__rpc_clnt_rearm_ping_timer(rpc, rpc_clnt_ping_timer_expired) ==
+ -1) {
+ gf_log(trans->name, GF_LOG_WARNING,
+ "unable to setup ping timer");
+ }
+ } else {
+ conn->ping_started = 0;
+ disconnect = 1;
+ }
+ }
+ pthread_mutex_unlock(&conn->lock);
+
+ if (unref)
+ rpc_clnt_unref(rpc);
+
+ if (disconnect) {
+ gf_log(trans->name, GF_LOG_CRITICAL,
+ "server %s has not responded in the last %d "
+ "seconds, disconnecting.",
+ trans->peerinfo.identifier, conn->ping_timeout);
+
+ rpc_transport_disconnect(conn->trans, _gf_false);
+ }
+
+out:
+ return;
+}
+
+int
+rpc_clnt_ping_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ struct ping_local *local = NULL;
+ xlator_t *this = NULL;
+ rpc_clnt_connection_t *conn = NULL;
+ call_frame_t *frame = NULL;
+ int unref = 0;
+ gf_boolean_t call_notify = _gf_false;
+
+ struct timespec now;
+ struct timespec delta;
+ int64_t latency_msec = 0;
+ int ret = 0;
+
+ if (!myframe) {
+ gf_log(THIS->name, GF_LOG_WARNING, "frame with the request is NULL");
+ goto out;
+ }
+
+ frame = myframe;
+ this = frame->this;
+ local = frame->local;
+ conn = &local->rpc->conn;
+
+ timespec_now(&now);
+ timespec_sub(&local->submit_time, &now, &delta);
+ latency_msec = delta.tv_sec * 1000 + delta.tv_nsec / 1000000;
+
+ gf_log(THIS->name, GF_LOG_DEBUG, "Ping latency is %" PRIu64 "ms",
+ latency_msec);
+ call_notify = _gf_true;
+
+ pthread_mutex_lock(&conn->lock);
+ {
+ unref = rpc_clnt_remove_ping_timer_locked(local->rpc);
+ if (req->rpc_status == -1) {
+ conn->ping_started = 0;
+ pthread_mutex_unlock(&conn->lock);
+ if (unref) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "socket or ib related error");
+
+ } else {
+ /* timer expired and transport bailed out */
+ gf_log(this->name, GF_LOG_WARNING, "socket disconnected");
+ }
+ goto after_unlock;
+ }
+
+ if (__rpc_clnt_rearm_ping_timer(local->rpc, rpc_clnt_start_ping) ==
+ -1) {
+ /* unlock before logging error */
+ pthread_mutex_unlock(&conn->lock);
+ gf_log(this->name, GF_LOG_WARNING, "failed to set the ping timer");
+ } else {
+ /* just unlock the mutex */
+ pthread_mutex_unlock(&conn->lock);
+ }
+ }
+after_unlock:
+ if (call_notify) {
+ ret = local->rpc->notifyfn(local->rpc, this, RPC_CLNT_PING,
+ (void *)(uintptr_t)latency_msec);
+ if (ret) {
+ gf_log(this->name, GF_LOG_WARNING, "RPC_CLNT_PING notify failed");
+ }
+ }
+out:
+ if (unref)
+ rpc_clnt_unref(local->rpc);
+
+ if (frame) {
+ GF_FREE(frame->local);
+ frame->local = NULL;
+ STACK_DESTROY(frame->root);
+ }
+ return 0;
+}
+
+int
+rpc_clnt_ping(struct rpc_clnt *rpc)
+{
+ call_frame_t *frame = NULL;
+ int32_t ret = -1;
+ rpc_clnt_connection_t *conn = NULL;
+ struct ping_local *local = NULL;
+
+ conn = &rpc->conn;
+ local = GF_MALLOC(sizeof(struct ping_local), gf_common_ping_local_t);
+ if (!local)
+ return ret;
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ GF_FREE(local);
+ return ret;
+ }
+
+ local->rpc = rpc;
+ timespec_now(&local->submit_time);
+ frame->local = local;
+
+ ret = rpc_clnt_submit(rpc, &clnt_ping_prog, GF_DUMP_PING, rpc_clnt_ping_cbk,
+ NULL, 0, NULL, 0, NULL, frame, NULL, 0, NULL, 0,
+ NULL);
+ if (ret) {
+ /* FIXME: should we free the frame here? Methinks so! */
+ gf_log(THIS->name, GF_LOG_ERROR, "failed to start ping timer");
+ } else {
+ /* ping successfully queued in list of saved frames
+ * for the connection*/
+ pthread_mutex_lock(&conn->lock);
+ conn->pingcnt++;
+ pthread_mutex_unlock(&conn->lock);
+ }
+
+ return ret;
+}
+
+static void
+rpc_clnt_start_ping(void *rpc_ptr)
+{
+ struct rpc_clnt *rpc = NULL;
+ rpc_clnt_connection_t *conn = NULL;
+ int frame_count = 0;
+ int unref = 0;
+
+ rpc = (struct rpc_clnt *)rpc_ptr;
+ conn = &rpc->conn;
+
+ if (conn->ping_timeout == 0) {
+ gf_log(THIS->name, GF_LOG_DEBUG,
+ "ping timeout is 0,"
+ " returning");
+ return;
+ }
+
+ pthread_mutex_lock(&conn->lock);
+ {
+ unref = rpc_clnt_remove_ping_timer_locked(rpc);
+
+ if (conn->saved_frames) {
+ GF_ASSERT(conn->saved_frames->count >= 0);
+ /* treat the case where conn->saved_frames is NULL
+ as no pending frames */
+ frame_count = conn->saved_frames->count;
+ }
+
+ if ((frame_count == 0) || !conn->connected) {
+ gf_log(THIS->name, GF_LOG_DEBUG,
+ "returning as transport is already disconnected"
+ " OR there are no frames (%d || %d)",
+ !conn->connected, frame_count);
+
+ pthread_mutex_unlock(&conn->lock);
+ if (unref)
+ rpc_clnt_unref(rpc);
+ return;
+ }
+
+ if (__rpc_clnt_rearm_ping_timer(rpc, rpc_clnt_ping_timer_expired) ==
+ -1) {
+ gf_log(THIS->name, GF_LOG_WARNING, "unable to setup ping timer");
+ pthread_mutex_unlock(&conn->lock);
+ if (unref)
+ rpc_clnt_unref(rpc);
+ return;
+ }
+ }
+ pthread_mutex_unlock(&conn->lock);
+ if (unref)
+ rpc_clnt_unref(rpc);
+
+ rpc_clnt_ping(rpc);
+}
+
+void
+rpc_clnt_check_and_start_ping(struct rpc_clnt *rpc)
+{
+ char start_ping = 0;
+
+ pthread_mutex_lock(&rpc->conn.lock);
+ {
+ if (!rpc->conn.ping_started)
+ start_ping = 1;
+ }
+ pthread_mutex_unlock(&rpc->conn.lock);
+
+ if (start_ping)
+ rpc_clnt_start_ping((void *)rpc);
+
+ return;
+}
diff --git a/rpc/rpc-lib/src/rpc-clnt-ping.h b/rpc/rpc-lib/src/rpc-clnt-ping.h
new file mode 100644
index 00000000000..e5466a828c2
--- /dev/null
+++ b/rpc/rpc-lib/src/rpc-clnt-ping.h
@@ -0,0 +1,16 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+struct rpc_clnt;
+#define RPC_DEFAULT_PING_TIMEOUT 30
+void
+rpc_clnt_check_and_start_ping(struct rpc_clnt *rpc_ptr);
+int
+rpc_clnt_remove_ping_timer_locked(struct rpc_clnt *rpc);
diff --git a/rpc/rpc-lib/src/rpc-clnt.c b/rpc/rpc-lib/src/rpc-clnt.c
index f8a75f6d385..517037c4a5d 100644
--- a/rpc/rpc-lib/src/rpc-clnt.c
+++ b/rpc/rpc-lib/src/rpc-clnt.c
@@ -8,510 +8,473 @@
cases as published by the Free Software Foundation.
*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#define RPC_CLNT_DEFAULT_REQUEST_COUNT 512
#include "rpc-clnt.h"
-#include "byte-order.h"
+#include "rpc-clnt-ping.h"
+#include <glusterfs/byte-order.h>
#include "xdr-rpcclnt.h"
#include "rpc-transport.h"
#include "protocol-common.h"
-#include "mem-pool.h"
+#include <glusterfs/mem-pool.h>
#include "xdr-rpc.h"
#include "rpc-common-xdr.h"
void
-rpc_clnt_reply_deinit (struct rpc_req *req, struct mem_pool *pool);
+rpc_clnt_reply_deinit(struct rpc_req *req, struct mem_pool *pool);
-uint64_t
-rpc_clnt_new_callid (struct rpc_clnt *clnt)
+struct saved_frame *
+__saved_frames_get_timedout(struct saved_frames *frames, uint32_t timeout,
+ struct timeval *current)
{
- uint64_t callid = 0;
+ struct saved_frame *bailout_frame = NULL, *tmp = NULL;
- pthread_mutex_lock (&clnt->lock);
- {
- callid = ++clnt->xid;
+ if (!list_empty(&frames->sf.list)) {
+ tmp = list_entry(frames->sf.list.next, typeof(*tmp), list);
+ if ((tmp->saved_at.tv_sec + timeout) <= current->tv_sec) {
+ bailout_frame = tmp;
+ list_del_init(&bailout_frame->list);
+ frames->count--;
}
- pthread_mutex_unlock (&clnt->lock);
+ }
- return callid;
-}
-
-
-struct saved_frame *
-__saved_frames_get_timedout (struct saved_frames *frames, uint32_t timeout,
- struct timeval *current)
-{
- struct saved_frame *bailout_frame = NULL, *tmp = NULL;
-
- if (!list_empty(&frames->sf.list)) {
- tmp = list_entry (frames->sf.list.next, typeof (*tmp), list);
- if ((tmp->saved_at.tv_sec + timeout) < current->tv_sec) {
- bailout_frame = tmp;
- list_del_init (&bailout_frame->list);
- frames->count--;
- }
- }
-
- return bailout_frame;
+ return bailout_frame;
}
static int
-_is_lock_fop (struct saved_frame *sframe)
+_is_lock_fop(struct saved_frame *sframe)
{
- int fop = 0;
+ int fop = 0;
- if (SFRAME_GET_PROGNUM (sframe) == GLUSTER3_1_FOP_PROGRAM &&
- SFRAME_GET_PROGVER (sframe) == GLUSTER3_1_FOP_VERSION)
- fop = SFRAME_GET_PROCNUM (sframe);
+ if (SFRAME_GET_PROGNUM(sframe) == GLUSTER_FOP_PROGRAM &&
+ SFRAME_GET_PROGVER(sframe) == GLUSTER_FOP_VERSION)
+ fop = SFRAME_GET_PROCNUM(sframe);
- return ((fop == GFS3_OP_LK) ||
- (fop == GFS3_OP_INODELK) ||
- (fop == GFS3_OP_FINODELK) ||
- (fop == GFS3_OP_ENTRYLK) ||
- (fop == GFS3_OP_FENTRYLK));
+ return ((fop == GFS3_OP_LK) || (fop == GFS3_OP_INODELK) ||
+ (fop == GFS3_OP_FINODELK) || (fop == GFS3_OP_ENTRYLK) ||
+ (fop == GFS3_OP_FENTRYLK));
}
-struct saved_frame *
-__saved_frames_put (struct saved_frames *frames, void *frame,
- struct rpc_req *rpcreq)
+static struct saved_frame *
+__saved_frames_put(struct saved_frames *frames, void *frame,
+ struct rpc_req *rpcreq)
{
- struct saved_frame *saved_frame = NULL;
+ struct saved_frame *saved_frame = mem_get(
+ rpcreq->conn->rpc_clnt->saved_frames_pool);
- saved_frame = mem_get (rpcreq->conn->rpc_clnt->saved_frames_pool);
- if (!saved_frame) {
- goto out;
- }
- /* THIS should be saved and set back */
+ if (!saved_frame) {
+ goto out;
+ }
+ /* THIS should be saved and set back */
- memset (saved_frame, 0, sizeof (*saved_frame));
- INIT_LIST_HEAD (&saved_frame->list);
+ INIT_LIST_HEAD(&saved_frame->list);
- saved_frame->capital_this = THIS;
- saved_frame->frame = frame;
- saved_frame->rpcreq = rpcreq;
- gettimeofday (&saved_frame->saved_at, NULL);
+ saved_frame->capital_this = THIS;
+ saved_frame->frame = frame;
+ saved_frame->rpcreq = rpcreq;
+ gettimeofday(&saved_frame->saved_at, NULL);
+ memset(&saved_frame->rsp, 0, sizeof(rpc_transport_rsp_t));
- if (_is_lock_fop (saved_frame))
- list_add_tail (&saved_frame->list, &frames->lk_sf.list);
- else
- list_add_tail (&saved_frame->list, &frames->sf.list);
+ if (_is_lock_fop(saved_frame))
+ list_add_tail(&saved_frame->list, &frames->lk_sf.list);
+ else
+ list_add_tail(&saved_frame->list, &frames->sf.list);
- frames->count++;
+ frames->count++;
out:
- return saved_frame;
+ return saved_frame;
}
-
-void
-saved_frames_delete (struct saved_frame *saved_frame,
- rpc_clnt_connection_t *conn)
-{
- GF_VALIDATE_OR_GOTO ("rpc-clnt", saved_frame, out);
- GF_VALIDATE_OR_GOTO ("rpc-clnt", conn, out);
-
- pthread_mutex_lock (&conn->lock);
- {
- list_del_init (&saved_frame->list);
- conn->saved_frames->count--;
- }
- pthread_mutex_unlock (&conn->lock);
-
- if (saved_frame->rpcreq != NULL) {
- rpc_clnt_reply_deinit (saved_frame->rpcreq,
- conn->rpc_clnt->reqpool);
- }
-
- mem_put (saved_frame);
-out:
- return;
-}
-
-
static void
-call_bail (void *data)
+call_bail(void *data)
{
- struct rpc_clnt *clnt = NULL;
- rpc_clnt_connection_t *conn = NULL;
- struct timeval current;
- struct list_head list;
- struct saved_frame *saved_frame = NULL;
- struct saved_frame *trav = NULL;
- struct saved_frame *tmp = NULL;
- struct tm frame_sent_tm;
- char frame_sent[256] = {0,};
- struct timeval timeout = {0,};
- struct iovec iov = {0,};
-
- GF_VALIDATE_OR_GOTO ("client", data, out);
-
- clnt = data;
-
- conn = &clnt->conn;
-
- gettimeofday (&current, NULL);
- INIT_LIST_HEAD (&list);
-
- pthread_mutex_lock (&conn->lock);
- {
- /* Chaining to get call-always functionality from
- call-once timer */
- if (conn->timer) {
- timeout.tv_sec = 10;
- timeout.tv_usec = 0;
-
- gf_timer_call_cancel (clnt->ctx, conn->timer);
- conn->timer = gf_timer_call_after (clnt->ctx,
- timeout,
- call_bail,
- (void *) clnt);
-
- if (conn->timer == NULL) {
- gf_log (conn->trans->name, GF_LOG_WARNING,
- "Cannot create bailout timer");
- }
- }
-
- do {
- saved_frame =
- __saved_frames_get_timedout (conn->saved_frames,
- conn->frame_timeout,
- &current);
- if (saved_frame)
- list_add (&saved_frame->list, &list);
-
- } while (saved_frame);
- }
- pthread_mutex_unlock (&conn->lock);
-
- list_for_each_entry_safe (trav, tmp, &list, list) {
- localtime_r (&trav->saved_at.tv_sec, &frame_sent_tm);
- strftime (frame_sent, 32, "%Y-%m-%d %H:%M:%S", &frame_sent_tm);
- snprintf (frame_sent + strlen (frame_sent),
- 256 - strlen (frame_sent),
- ".%"GF_PRI_SUSECONDS, trav->saved_at.tv_usec);
-
- gf_log (conn->trans->name, GF_LOG_ERROR,
- "bailing out frame type(%s) op(%s(%d)) xid = 0x%ux "
- "sent = %s. timeout = %d",
- trav->rpcreq->prog->progname,
- (trav->rpcreq->prog->procnames) ?
- trav->rpcreq->prog->procnames[trav->rpcreq->procnum] :
- "--",
- trav->rpcreq->procnum, trav->rpcreq->xid, frame_sent,
- conn->frame_timeout);
-
- clnt = rpc_clnt_ref (clnt);
- trav->rpcreq->rpc_status = -1;
- trav->rpcreq->cbkfn (trav->rpcreq, &iov, 1, trav->frame);
-
- rpc_clnt_reply_deinit (trav->rpcreq, clnt->reqpool);
- clnt = rpc_clnt_unref (clnt);
- list_del_init (&trav->list);
- mem_put (trav);
- }
+ rpc_transport_t *trans = NULL;
+ struct rpc_clnt *clnt = NULL;
+ rpc_clnt_connection_t *conn = NULL;
+ struct timeval current;
+ struct list_head list;
+ struct saved_frame *saved_frame = NULL;
+ struct saved_frame *trav = NULL;
+ struct saved_frame *tmp = NULL;
+ char frame_sent[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+ struct timespec timeout = {
+ 0,
+ };
+ char peerid[UNIX_PATH_MAX] = {0};
+ gf_boolean_t need_unref = _gf_false;
+
+ GF_VALIDATE_OR_GOTO("client", data, out);
+
+ clnt = data;
+
+ conn = &clnt->conn;
+ pthread_mutex_lock(&conn->lock);
+ {
+ trans = conn->trans;
+ if (trans) {
+ (void)snprintf(peerid, sizeof(peerid), "%s",
+ conn->trans->peerinfo.identifier);
+ }
+ }
+ pthread_mutex_unlock(&conn->lock);
+ /*rpc_clnt_connection_cleanup will be unwinding all saved frames,
+ * bailed or otherwise*/
+ if (!trans)
+ goto out;
+
+ gettimeofday(&current, NULL);
+ INIT_LIST_HEAD(&list);
+
+ pthread_mutex_lock(&conn->lock);
+ {
+ /* Chaining to get call-always functionality from
+ call-once timer */
+ if (conn->timer) {
+ timeout.tv_sec = 10;
+ timeout.tv_nsec = 0;
+
+ /* Ref rpc as it's added to timer event queue */
+ rpc_clnt_ref(clnt);
+ gf_timer_call_cancel(clnt->ctx, conn->timer);
+ conn->timer = gf_timer_call_after(clnt->ctx, timeout, call_bail,
+ (void *)clnt);
+
+ if (conn->timer == NULL) {
+ gf_log(conn->name, GF_LOG_WARNING,
+ "Cannot create bailout timer for %s", peerid);
+ need_unref = _gf_true;
+ }
+ }
+
+ do {
+ saved_frame = __saved_frames_get_timedout(
+ conn->saved_frames, conn->frame_timeout, &current);
+ if (saved_frame)
+ list_add(&saved_frame->list, &list);
+
+ } while (saved_frame);
+ }
+ pthread_mutex_unlock(&conn->lock);
+
+ if (list_empty(&list))
+ goto out;
+
+ list_for_each_entry_safe(trav, tmp, &list, list)
+ {
+ gf_time_fmt_tv(frame_sent, sizeof frame_sent, &trav->saved_at,
+ gf_timefmt_FT);
+
+ gf_log(conn->name, GF_LOG_ERROR,
+ "bailing out frame type(%s), op(%s(%d)), xid = 0x%x, "
+ "unique = %" PRIu64 ", sent = %s, timeout = %d for %s",
+ trav->rpcreq->prog->progname,
+ (trav->rpcreq->prog->procnames)
+ ? trav->rpcreq->prog->procnames[trav->rpcreq->procnum]
+ : "--",
+ trav->rpcreq->procnum, trav->rpcreq->xid,
+ ((call_frame_t *)(trav->frame))->root->unique, frame_sent,
+ conn->frame_timeout, peerid);
+
+ clnt = rpc_clnt_ref(clnt);
+ trav->rpcreq->rpc_status = -1;
+ trav->rpcreq->cbkfn(trav->rpcreq, NULL, 0, trav->frame);
+
+ rpc_clnt_reply_deinit(trav->rpcreq, clnt->reqpool);
+ clnt = rpc_clnt_unref(clnt);
+ list_del_init(&trav->list);
+ mem_put(trav);
+ }
out:
- return;
+ rpc_clnt_unref(clnt);
+ if (need_unref)
+ rpc_clnt_unref(clnt);
+ return;
}
-
/* to be called with conn->lock held */
-struct saved_frame *
-__save_frame (struct rpc_clnt *rpc_clnt, call_frame_t *frame,
- struct rpc_req *rpcreq)
+static struct saved_frame *
+__save_frame(struct rpc_clnt *rpc_clnt, call_frame_t *frame,
+ struct rpc_req *rpcreq)
{
- rpc_clnt_connection_t *conn = NULL;
- struct timeval timeout = {0, };
- struct saved_frame *saved_frame = NULL;
-
- conn = &rpc_clnt->conn;
-
- saved_frame = __saved_frames_put (conn->saved_frames, frame, rpcreq);
-
- if (saved_frame == NULL) {
- goto out;
- }
-
- /* TODO: make timeout configurable */
- if (conn->timer == NULL) {
- timeout.tv_sec = 10;
- timeout.tv_usec = 0;
- conn->timer = gf_timer_call_after (rpc_clnt->ctx,
- timeout,
- call_bail,
- (void *) rpc_clnt);
- }
+ rpc_clnt_connection_t *conn = &rpc_clnt->conn;
+ struct timespec timeout = {
+ 0,
+ };
+ struct saved_frame *saved_frame = __saved_frames_put(conn->saved_frames,
+ frame, rpcreq);
+
+ if (saved_frame == NULL) {
+ goto out;
+ }
+
+ /* TODO: make timeout configurable */
+ if (conn->timer == NULL) {
+ timeout.tv_sec = 10;
+ timeout.tv_nsec = 0;
+ rpc_clnt_ref(rpc_clnt);
+ conn->timer = gf_timer_call_after(rpc_clnt->ctx, timeout, call_bail,
+ (void *)rpc_clnt);
+ }
out:
- return saved_frame;
+ return saved_frame;
}
-
struct saved_frames *
-saved_frames_new (void)
+saved_frames_new(void)
{
- struct saved_frames *saved_frames = NULL;
+ struct saved_frames *saved_frames = NULL;
- saved_frames = GF_CALLOC (1, sizeof (*saved_frames),
- gf_common_mt_rpcclnt_savedframe_t);
- if (!saved_frames) {
- return NULL;
- }
+ saved_frames = GF_CALLOC(1, sizeof(*saved_frames),
+ gf_common_mt_rpcclnt_savedframe_t);
+ if (!saved_frames) {
+ return NULL;
+ }
- INIT_LIST_HEAD (&saved_frames->sf.list);
- INIT_LIST_HEAD (&saved_frames->lk_sf.list);
+ INIT_LIST_HEAD(&saved_frames->sf.list);
+ INIT_LIST_HEAD(&saved_frames->lk_sf.list);
- return saved_frames;
+ return saved_frames;
}
-
int
-__saved_frame_copy (struct saved_frames *frames, int64_t callid,
- struct saved_frame *saved_frame)
+__saved_frame_copy(struct saved_frames *frames, int64_t callid,
+ struct saved_frame *saved_frame)
{
- struct saved_frame *tmp = NULL;
- int ret = -1;
+ struct saved_frame *tmp = NULL;
+ int ret = -1;
+
+ if (!saved_frame) {
+ ret = 0;
+ goto out;
+ }
- if (!saved_frame) {
- ret = 0;
- goto out;
+ list_for_each_entry(tmp, &frames->sf.list, list)
+ {
+ if (tmp->rpcreq->xid == callid) {
+ *saved_frame = *tmp;
+ ret = 0;
+ goto out;
}
+ }
- list_for_each_entry (tmp, &frames->sf.list, list) {
- if (tmp->rpcreq->xid == callid) {
- *saved_frame = *tmp;
- ret = 0;
- goto out;
- }
- }
-
- list_for_each_entry (tmp, &frames->lk_sf.list, list) {
- if (tmp->rpcreq->xid == callid) {
- *saved_frame = *tmp;
- ret = 0;
- goto out;
- }
- }
+ list_for_each_entry(tmp, &frames->lk_sf.list, list)
+ {
+ if (tmp->rpcreq->xid == callid) {
+ *saved_frame = *tmp;
+ ret = 0;
+ goto out;
+ }
+ }
out:
- return ret;
+ return ret;
}
-
struct saved_frame *
-__saved_frame_get (struct saved_frames *frames, int64_t callid)
+__saved_frame_get(struct saved_frames *frames, int64_t callid)
{
- struct saved_frame *saved_frame = NULL;
- struct saved_frame *tmp = NULL;
-
- list_for_each_entry (tmp, &frames->sf.list, list) {
- if (tmp->rpcreq->xid == callid) {
- list_del_init (&tmp->list);
- frames->count--;
- saved_frame = tmp;
- goto out;
- }
- }
-
- list_for_each_entry (tmp, &frames->lk_sf.list, list) {
- if (tmp->rpcreq->xid == callid) {
- list_del_init (&tmp->list);
- frames->count--;
- saved_frame = tmp;
- goto out;
- }
- }
+ struct saved_frame *saved_frame = NULL;
+ struct saved_frame *tmp = NULL;
-out:
- if (saved_frame) {
- THIS = saved_frame->capital_this;
+ list_for_each_entry(tmp, &frames->sf.list, list)
+ {
+ if (tmp->rpcreq->xid == callid) {
+ list_del_init(&tmp->list);
+ frames->count--;
+ saved_frame = tmp;
+ goto out;
}
+ }
- return saved_frame;
-}
+ list_for_each_entry(tmp, &frames->lk_sf.list, list)
+ {
+ if (tmp->rpcreq->xid == callid) {
+ list_del_init(&tmp->list);
+ frames->count--;
+ saved_frame = tmp;
+ goto out;
+ }
+ }
+out:
+ if (saved_frame) {
+ THIS = saved_frame->capital_this;
+ }
+
+ return saved_frame;
+}
void
-saved_frames_unwind (struct saved_frames *saved_frames)
+saved_frames_unwind(struct saved_frames *saved_frames)
{
- struct rpc_clnt *clnt = NULL;
- struct saved_frame *trav = NULL;
- struct saved_frame *tmp = NULL;
- struct tm *frame_sent_tm = NULL;
- char timestr[256] = {0,};
-
- struct iovec iov = {0,};
-
- list_splice_init (&saved_frames->lk_sf.list, &saved_frames->sf.list);
-
- list_for_each_entry_safe (trav, tmp, &saved_frames->sf.list, list) {
- frame_sent_tm = localtime (&trav->saved_at.tv_sec);
- strftime (timestr, sizeof(timestr), "%Y-%m-%d %H:%M:%S",
- frame_sent_tm);
- snprintf (timestr + strlen (timestr),
- sizeof(timestr) - strlen (timestr),
- ".%"GF_PRI_SUSECONDS, trav->saved_at.tv_usec);
-
- if (!trav->rpcreq || !trav->rpcreq->prog)
- continue;
-
- gf_log_callingfn (trav->rpcreq->conn->trans->name,
- GF_LOG_ERROR,
- "forced unwinding frame type(%s) op(%s(%d)) "
- "called at %s (xid=0x%ux)",
- trav->rpcreq->prog->progname,
- ((trav->rpcreq->prog->procnames) ?
- trav->rpcreq->prog->procnames[trav->rpcreq->procnum]
- : "--"),
- trav->rpcreq->procnum, timestr,
- trav->rpcreq->xid);
- saved_frames->count--;
-
- clnt = rpc_clnt_ref (trav->rpcreq->conn->rpc_clnt);
- trav->rpcreq->rpc_status = -1;
- trav->rpcreq->cbkfn (trav->rpcreq, &iov, 1, trav->frame);
-
- rpc_clnt_reply_deinit (trav->rpcreq,
- trav->rpcreq->conn->rpc_clnt->reqpool);
-
- clnt = rpc_clnt_unref (clnt);
- list_del_init (&trav->list);
- mem_put (trav);
- }
+ struct saved_frame *trav = NULL;
+ struct saved_frame *tmp = NULL;
+ char timestr[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+
+ list_splice_init(&saved_frames->lk_sf.list, &saved_frames->sf.list);
+
+ list_for_each_entry_safe(trav, tmp, &saved_frames->sf.list, list)
+ {
+ gf_time_fmt_tv(timestr, sizeof timestr, &trav->saved_at, gf_timefmt_FT);
+
+ if (!trav->rpcreq || !trav->rpcreq->prog)
+ continue;
+
+ gf_log_callingfn(
+ trav->rpcreq->conn->name, GF_LOG_ERROR,
+ "forced unwinding frame type(%s) op(%s(%d)) "
+ "called at %s (xid=0x%x)",
+ trav->rpcreq->prog->progname,
+ ((trav->rpcreq->prog->procnames)
+ ? trav->rpcreq->prog->procnames[trav->rpcreq->procnum]
+ : "--"),
+ trav->rpcreq->procnum, timestr, trav->rpcreq->xid);
+ saved_frames->count--;
+
+ trav->rpcreq->rpc_status = -1;
+ trav->rpcreq->cbkfn(trav->rpcreq, NULL, 0, trav->frame);
+
+ rpc_clnt_reply_deinit(trav->rpcreq,
+ trav->rpcreq->conn->rpc_clnt->reqpool);
+
+ list_del_init(&trav->list);
+ mem_put(trav);
+ }
}
-
void
-saved_frames_destroy (struct saved_frames *frames)
+saved_frames_destroy(struct saved_frames *frames)
{
- if (!frames)
- return;
+ if (!frames)
+ return;
- saved_frames_unwind (frames);
+ saved_frames_unwind(frames);
- GF_FREE (frames);
+ GF_FREE(frames);
}
-
void
-rpc_clnt_reconnect (void *trans_ptr)
+rpc_clnt_reconnect(void *conn_ptr)
{
- rpc_transport_t *trans = NULL;
- rpc_clnt_connection_t *conn = NULL;
- struct timeval tv = {0, 0};
- int32_t ret = 0;
- struct rpc_clnt *clnt = NULL;
-
- trans = trans_ptr;
- if (!trans || !trans->mydata)
- return;
-
- conn = trans->mydata;
- clnt = conn->rpc_clnt;
-
- pthread_mutex_lock (&conn->lock);
- {
- if (conn->reconnect)
- gf_timer_call_cancel (clnt->ctx,
- conn->reconnect);
- conn->reconnect = 0;
-
- if (conn->connected == 0) {
- tv.tv_sec = 3;
-
- gf_log (trans->name, GF_LOG_TRACE,
- "attempting reconnect");
- ret = rpc_transport_connect (trans,
- conn->config.remote_port);
- /* Every time there is a disconnection, processes
- should try to connect to 'glusterd' (ie, default
- port) or whichever port given as 'option remote-port'
- in volume file. */
- /* Below code makes sure the (re-)configured port lasts
- for just one successful attempt */
- if (!ret)
- conn->config.remote_port = 0;
-
- conn->reconnect =
- gf_timer_call_after (clnt->ctx, tv,
- rpc_clnt_reconnect,
- trans);
- } else {
- gf_log (trans->name, GF_LOG_TRACE,
- "breaking reconnect chain");
- }
- }
- pthread_mutex_unlock (&conn->lock);
-
- if ((ret == -1) && (errno != EINPROGRESS) && (clnt->notifyfn)) {
- clnt->notifyfn (clnt, clnt->mydata, RPC_CLNT_DISCONNECT, NULL);
- }
-
- return;
+ rpc_transport_t *trans = NULL;
+ rpc_clnt_connection_t *conn = NULL;
+ struct timespec ts = {0, 0};
+ struct rpc_clnt *clnt = NULL;
+ gf_boolean_t need_unref = _gf_false;
+ gf_boolean_t canceled_unref = _gf_false;
+
+ conn = conn_ptr;
+ clnt = conn->rpc_clnt;
+ pthread_mutex_lock(&conn->lock);
+ {
+ trans = conn->trans;
+ if (!trans)
+ goto out_unlock;
+
+ if (conn->reconnect) {
+ if (!gf_timer_call_cancel(clnt->ctx, conn->reconnect))
+ canceled_unref = _gf_true;
+ }
+ conn->reconnect = 0;
+
+ if ((conn->connected == 0) && !clnt->disabled) {
+ ts.tv_sec = 3;
+ ts.tv_nsec = 0;
+
+ gf_log(conn->name, GF_LOG_TRACE, "attempting reconnect");
+ (void)rpc_transport_connect(trans, conn->config.remote_port);
+ rpc_clnt_ref(clnt);
+ conn->reconnect = gf_timer_call_after(clnt->ctx, ts,
+ rpc_clnt_reconnect, conn);
+ if (!conn->reconnect) {
+ need_unref = _gf_true;
+ gf_log(conn->name, GF_LOG_ERROR,
+ "Error adding to timer event queue");
+ }
+ } else {
+ gf_log(conn->name, GF_LOG_TRACE, "breaking reconnect chain");
+ }
+ }
+out_unlock:
+ pthread_mutex_unlock(&conn->lock);
+
+ rpc_clnt_unref(clnt);
+ if (need_unref)
+ rpc_clnt_unref(clnt);
+ if (canceled_unref)
+ rpc_clnt_unref(clnt);
+ return;
}
-
int
-rpc_clnt_fill_request_info (struct rpc_clnt *clnt, rpc_request_info_t *info)
+rpc_clnt_fill_request_info(struct rpc_clnt *clnt, rpc_request_info_t *info)
{
- struct saved_frame saved_frame = {{}, 0};
- int ret = -1;
-
- pthread_mutex_lock (&clnt->conn.lock);
- {
- ret = __saved_frame_copy (clnt->conn.saved_frames, info->xid,
- &saved_frame);
- }
- pthread_mutex_unlock (&clnt->conn.lock);
-
- if (ret == -1) {
- gf_log (clnt->conn.trans->name, GF_LOG_CRITICAL,
- "cannot lookup the saved "
- "frame corresponding to xid (%d)", info->xid);
- goto out;
- }
-
- info->prognum = saved_frame.rpcreq->prog->prognum;
- info->procnum = saved_frame.rpcreq->procnum;
- info->progver = saved_frame.rpcreq->prog->progver;
- info->rpc_req = saved_frame.rpcreq;
- info->rsp = saved_frame.rsp;
-
- ret = 0;
+ struct saved_frame saved_frame;
+ int ret = -1;
+
+ pthread_mutex_lock(&clnt->conn.lock);
+ {
+ ret = __saved_frame_copy(clnt->conn.saved_frames, info->xid,
+ &saved_frame);
+ }
+ pthread_mutex_unlock(&clnt->conn.lock);
+
+ if (ret == -1) {
+ gf_log(clnt->conn.name, GF_LOG_CRITICAL,
+ "cannot lookup the saved "
+ "frame corresponding to xid (%d)",
+ info->xid);
+ goto out;
+ }
+
+ info->prognum = saved_frame.rpcreq->prog->prognum;
+ info->procnum = saved_frame.rpcreq->procnum;
+ info->progver = saved_frame.rpcreq->prog->progver;
+ info->rpc_req = saved_frame.rpcreq;
+ info->rsp = saved_frame.rsp;
+
+ ret = 0;
out:
- return ret;
+ return ret;
}
int
-rpc_clnt_reconnect_cleanup (rpc_clnt_connection_t *conn)
+rpc_clnt_reconnect_cleanup(rpc_clnt_connection_t *conn)
{
- struct rpc_clnt *clnt = NULL;
+ struct rpc_clnt *clnt = NULL;
+ int ret = 0;
+ gf_boolean_t reconnect_unref = _gf_false;
- if (!conn) {
- goto out;
- }
-
- clnt = conn->rpc_clnt;
-
- pthread_mutex_lock (&conn->lock);
- {
+ if (!conn) {
+ goto out;
+ }
- if (conn->reconnect) {
- gf_timer_call_cancel (clnt->ctx, conn->reconnect);
- conn->reconnect = NULL;
- }
+ clnt = conn->rpc_clnt;
+ pthread_mutex_lock(&conn->lock);
+ {
+ if (conn->reconnect) {
+ ret = gf_timer_call_cancel(clnt->ctx, conn->reconnect);
+ if (!ret) {
+ reconnect_unref = _gf_true;
+ conn->cleanup_gen++;
+ }
+ conn->reconnect = NULL;
}
- pthread_mutex_unlock (&conn->lock);
+ }
+ pthread_mutex_unlock(&conn->lock);
+
+ if (reconnect_unref)
+ rpc_clnt_unref(clnt);
out:
- return 0;
+ return 0;
}
/*
@@ -520,45 +483,62 @@ out:
*
*/
int
-rpc_clnt_connection_cleanup (rpc_clnt_connection_t *conn)
+rpc_clnt_connection_cleanup(rpc_clnt_connection_t *conn)
{
- struct saved_frames *saved_frames = NULL;
- struct rpc_clnt *clnt = NULL;
-
- if (!conn) {
- goto out;
- }
-
- clnt = conn->rpc_clnt;
-
- gf_log (conn->trans->name, GF_LOG_TRACE,
- "cleaning up state in transport object %p", conn->trans);
-
- pthread_mutex_lock (&conn->lock);
- {
- saved_frames = conn->saved_frames;
- conn->saved_frames = saved_frames_new ();
-
- /* bailout logic cleanup */
- if (conn->timer) {
- gf_timer_call_cancel (clnt->ctx, conn->timer);
- conn->timer = NULL;
- }
-
- conn->connected = 0;
-
- if (conn->ping_timer) {
- gf_timer_call_cancel (clnt->ctx, conn->ping_timer);
- conn->ping_timer = NULL;
- conn->ping_started = 0;
- }
- }
- pthread_mutex_unlock (&conn->lock);
-
- saved_frames_destroy (saved_frames);
-
+ struct saved_frames *saved_frames = NULL;
+ struct rpc_clnt *clnt = NULL;
+ int unref = 0;
+ int ret = 0;
+ gf_boolean_t timer_unref = _gf_false;
+ gf_boolean_t reconnect_unref = _gf_false;
+
+ if (!conn) {
+ goto out;
+ }
+
+ clnt = conn->rpc_clnt;
+
+ pthread_mutex_lock(&conn->lock);
+ {
+ saved_frames = conn->saved_frames;
+ conn->saved_frames = saved_frames_new();
+
+ /* bailout logic cleanup */
+ if (conn->timer) {
+ ret = gf_timer_call_cancel(clnt->ctx, conn->timer);
+ if (!ret)
+ timer_unref = _gf_true;
+ conn->timer = NULL;
+ }
+ if (conn->reconnect) {
+ ret = gf_timer_call_cancel(clnt->ctx, conn->reconnect);
+ if (!ret)
+ reconnect_unref = _gf_true;
+ conn->reconnect = NULL;
+ }
+
+ conn->connected = 0;
+ conn->disconnected = 1;
+
+ unref = rpc_clnt_remove_ping_timer_locked(clnt);
+ /*reset rpc msgs stats*/
+ conn->pingcnt = 0;
+ conn->msgcnt = 0;
+ conn->cleanup_gen++;
+ }
+ pthread_mutex_unlock(&conn->lock);
+
+ saved_frames_destroy(saved_frames);
+ if (unref)
+ rpc_clnt_unref(clnt);
+
+ if (timer_unref)
+ rpc_clnt_unref(clnt);
+
+ if (reconnect_unref)
+ rpc_clnt_unref(clnt);
out:
- return 0;
+ return 0;
}
/*
@@ -570,1170 +550,1430 @@ out:
*/
static struct saved_frame *
-lookup_frame (rpc_clnt_connection_t *conn, int64_t callid)
+lookup_frame(rpc_clnt_connection_t *conn, int64_t callid)
{
- struct saved_frame *frame = NULL;
+ struct saved_frame *frame = NULL;
- pthread_mutex_lock (&conn->lock);
- {
- frame = __saved_frame_get (conn->saved_frames, callid);
- }
- pthread_mutex_unlock (&conn->lock);
+ pthread_mutex_lock(&conn->lock);
+ {
+ frame = __saved_frame_get(conn->saved_frames, callid);
+ }
+ pthread_mutex_unlock(&conn->lock);
- return frame;
+ return frame;
}
-
int
-rpc_clnt_reply_fill (rpc_transport_pollin_t *msg,
- rpc_clnt_connection_t *conn,
- struct rpc_msg *replymsg, struct iovec progmsg,
- struct rpc_req *req,
- struct saved_frame *saved_frame)
+rpc_clnt_reply_fill(rpc_transport_pollin_t *msg, rpc_clnt_connection_t *conn,
+ struct rpc_msg *replymsg, struct iovec progmsg,
+ struct rpc_req *req, struct saved_frame *saved_frame)
{
- int ret = -1;
-
- if ((!conn) || (!replymsg)|| (!req) || (!saved_frame) || (!msg)) {
- goto out;
- }
-
- req->rpc_status = 0;
- if ((rpc_reply_status (replymsg) == MSG_DENIED)
- || (rpc_accepted_reply_status (replymsg) != SUCCESS)) {
- req->rpc_status = -1;
- }
-
- req->rsp[0] = progmsg;
- req->rsp_iobref = iobref_ref (msg->iobref);
-
- if (msg->vectored) {
- req->rsp[1] = msg->vector[1];
- req->rspcnt = 2;
- } else {
- req->rspcnt = 1;
- }
-
- /* By this time, the data bytes for the auth scheme would have already
- * been copied into the required sections of the req structure,
- * we just need to fill in the meta-data about it now.
+ int ret = -1;
+
+ if ((!conn) || (!replymsg) || (!req) || (!saved_frame) || (!msg)) {
+ goto out;
+ }
+
+ req->rpc_status = 0;
+ if ((rpc_reply_status(replymsg) == MSG_DENIED) ||
+ (rpc_accepted_reply_status(replymsg) != SUCCESS)) {
+ req->rpc_status = -1;
+ }
+
+ req->rsp[0] = progmsg;
+ req->rsp_iobref = iobref_ref(msg->iobref);
+
+ if (msg->vectored) {
+ req->rsp[1] = msg->vector[1];
+ req->rspcnt = 2;
+ } else {
+ req->rspcnt = 1;
+ }
+
+ /* By this time, the data bytes for the auth scheme would have already
+ * been copied into the required sections of the req structure,
+ * we just need to fill in the meta-data about it now.
+ */
+ if (req->rpc_status == 0) {
+ /*
+ * req->verf.flavour = rpc_reply_verf_flavour (replymsg);
+ * req->verf.datalen = rpc_reply_verf_len (replymsg);
*/
- if (req->rpc_status == 0) {
- /*
- * req->verf.flavour = rpc_reply_verf_flavour (replymsg);
- * req->verf.datalen = rpc_reply_verf_len (replymsg);
- */
- }
+ }
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
void
-rpc_clnt_reply_deinit (struct rpc_req *req, struct mem_pool *pool)
+rpc_clnt_reply_deinit(struct rpc_req *req, struct mem_pool *pool)
{
- if (!req) {
- goto out;
- }
+ if (!req) {
+ goto out;
+ }
- if (req->rsp_iobref) {
- iobref_unref (req->rsp_iobref);
- }
+ if (req->rsp_iobref) {
+ iobref_unref(req->rsp_iobref);
+ }
- mem_put (req);
+ mem_put(req);
out:
- return;
+ return;
}
-
/* TODO: use mem-pool for allocating requests */
int
-rpc_clnt_reply_init (rpc_clnt_connection_t *conn, rpc_transport_pollin_t *msg,
- struct rpc_req *req, struct saved_frame *saved_frame)
+rpc_clnt_reply_init(rpc_clnt_connection_t *conn, rpc_transport_pollin_t *msg,
+ struct rpc_req *req, struct saved_frame *saved_frame)
{
- char *msgbuf = NULL;
- struct rpc_msg rpcmsg;
- struct iovec progmsg; /* RPC Program payload */
- size_t msglen = 0;
- int ret = -1;
-
- msgbuf = msg->vector[0].iov_base;
- msglen = msg->vector[0].iov_len;
-
- ret = xdr_to_rpc_reply (msgbuf, msglen, &rpcmsg, &progmsg,
- req->verf.authdata);
- if (ret != 0) {
- gf_log (conn->trans->name, GF_LOG_WARNING,
- "RPC reply decoding failed");
- goto out;
- }
-
- ret = rpc_clnt_reply_fill (msg, conn, &rpcmsg, progmsg, req,
- saved_frame);
- if (ret != 0) {
- goto out;
- }
-
- gf_log (conn->trans->name, GF_LOG_TRACE,
- "received rpc message (RPC XID: 0x%ux"
- " Program: %s, ProgVers: %d, Proc: %d) from rpc-transport (%s)",
- saved_frame->rpcreq->xid,
- saved_frame->rpcreq->prog->progname,
- saved_frame->rpcreq->prog->progver,
- saved_frame->rpcreq->procnum, conn->trans->name);
-
- req->rpc_status = 0;
+ char *msgbuf = NULL;
+ struct rpc_msg rpcmsg;
+ struct iovec progmsg; /* RPC Program payload */
+ size_t msglen = 0;
+ int ret = -1;
+
+ msgbuf = msg->vector[0].iov_base;
+ msglen = msg->vector[0].iov_len;
+
+ ret = xdr_to_rpc_reply(msgbuf, msglen, &rpcmsg, &progmsg,
+ req->verf.authdata);
+ if (ret != 0) {
+ gf_log(conn->name, GF_LOG_WARNING, "RPC reply decoding failed");
+ goto out;
+ }
+
+ ret = rpc_clnt_reply_fill(msg, conn, &rpcmsg, progmsg, req, saved_frame);
+ if (ret != 0) {
+ goto out;
+ }
+
+ gf_log(conn->name, GF_LOG_TRACE,
+ "received rpc message (RPC XID: 0x%x"
+ " Program: %s, ProgVers: %d, Proc: %d) from rpc-transport (%s)",
+ saved_frame->rpcreq->xid, saved_frame->rpcreq->prog->progname,
+ saved_frame->rpcreq->prog->progver, saved_frame->rpcreq->procnum,
+ conn->name);
out:
- if (ret != 0) {
- req->rpc_status = -1;
- }
+ if (ret != 0) {
+ req->rpc_status = -1;
+ }
- return ret;
+ return ret;
}
int
-rpc_clnt_handle_cbk (struct rpc_clnt *clnt, rpc_transport_pollin_t *msg)
+rpc_clnt_handle_cbk(struct rpc_clnt *clnt, rpc_transport_pollin_t *msg)
{
- char *msgbuf = NULL;
- rpcclnt_cb_program_t *program = NULL;
- struct rpc_msg rpcmsg;
- struct iovec progmsg; /* RPC Program payload */
- size_t msglen = 0;
- int found = 0;
- int ret = -1;
- int procnum = 0;
-
- msgbuf = msg->vector[0].iov_base;
- msglen = msg->vector[0].iov_len;
-
- clnt = rpc_clnt_ref (clnt);
- ret = xdr_to_rpc_call (msgbuf, msglen, &rpcmsg, &progmsg, NULL,NULL);
- if (ret == -1) {
- gf_log (clnt->conn.trans->name, GF_LOG_WARNING,
- "RPC call decoding failed");
- goto out;
- }
-
- gf_log (clnt->conn.trans->name, GF_LOG_TRACE,
- "received rpc message (XID: 0x%lx, "
- "Ver: %ld, Program: %ld, ProgVers: %ld, Proc: %ld) "
- "from rpc-transport (%s)", rpc_call_xid (&rpcmsg),
- rpc_call_rpcvers (&rpcmsg), rpc_call_program (&rpcmsg),
- rpc_call_progver (&rpcmsg), rpc_call_progproc (&rpcmsg),
- clnt->conn.trans->name);
-
- procnum = rpc_call_progproc (&rpcmsg);
-
- pthread_mutex_lock (&clnt->lock);
+ char *msgbuf = NULL;
+ rpcclnt_cb_program_t *program = NULL;
+ struct rpc_msg rpcmsg;
+ struct iovec progmsg; /* RPC Program payload */
+ size_t msglen = 0;
+ int found = 0;
+ int ret = -1;
+ int procnum = 0;
+
+ msgbuf = msg->vector[0].iov_base;
+ msglen = msg->vector[0].iov_len;
+
+ clnt = rpc_clnt_ref(clnt);
+ ret = xdr_to_rpc_call(msgbuf, msglen, &rpcmsg, &progmsg, NULL, NULL);
+ if (ret == -1) {
+ gf_log(clnt->conn.name, GF_LOG_WARNING, "RPC call decoding failed");
+ goto out;
+ }
+
+ gf_log(clnt->conn.name, GF_LOG_TRACE,
+ "receivd rpc message (XID: 0x%" GF_PRI_RPC_XID
+ ", "
+ "Ver: %" GF_PRI_RPC_VERSION ", Program: %" GF_PRI_RPC_PROG_ID
+ ", "
+ "ProgVers: %" GF_PRI_RPC_PROG_VERS ", Proc: %" GF_PRI_RPC_PROC
+ ") "
+ "from rpc-transport (%s)",
+ rpc_call_xid(&rpcmsg), rpc_call_rpcvers(&rpcmsg),
+ rpc_call_program(&rpcmsg), rpc_call_progver(&rpcmsg),
+ rpc_call_progproc(&rpcmsg), clnt->conn.name);
+
+ procnum = rpc_call_progproc(&rpcmsg);
+
+ pthread_mutex_lock(&clnt->lock);
+ {
+ list_for_each_entry(program, &clnt->programs, program)
{
- list_for_each_entry (program, &clnt->programs, program) {
- if ((program->prognum == rpc_call_program (&rpcmsg))
- && (program->progver
- == rpc_call_progver (&rpcmsg))) {
- found = 1;
- break;
- }
- }
+ if ((program->prognum == rpc_call_program(&rpcmsg)) &&
+ (program->progver == rpc_call_progver(&rpcmsg))) {
+ found = 1;
+ break;
+ }
}
- pthread_mutex_unlock (&clnt->lock);
+ }
+ pthread_mutex_unlock(&clnt->lock);
- if (found && (procnum < program->numactors) &&
- (program->actors[procnum].actor)) {
- program->actors[procnum].actor (&progmsg);
- }
+ if (found && (procnum < program->numactors) &&
+ (program->actors[procnum].actor)) {
+ program->actors[procnum].actor(clnt, program->mydata, &progmsg);
+ }
out:
- clnt = rpc_clnt_unref (clnt);
- return ret;
+ rpc_clnt_unref(clnt);
+ return ret;
}
int
-rpc_clnt_handle_reply (struct rpc_clnt *clnt, rpc_transport_pollin_t *pollin)
+rpc_clnt_handle_reply(struct rpc_clnt *clnt, rpc_transport_pollin_t *pollin)
{
- rpc_clnt_connection_t *conn = NULL;
- struct saved_frame *saved_frame = NULL;
- int ret = -1;
- struct rpc_req *req = NULL;
- uint32_t xid = 0;
-
- clnt = rpc_clnt_ref (clnt);
- conn = &clnt->conn;
-
- xid = ntoh32 (*((uint32_t *)pollin->vector[0].iov_base));
- saved_frame = lookup_frame (conn, xid);
- if (saved_frame == NULL) {
- gf_log (conn->trans->name, GF_LOG_ERROR,
- "cannot lookup the saved frame for reply with xid (%u)",
- xid);
- goto out;
- }
-
- req = saved_frame->rpcreq;
- if (req == NULL) {
- gf_log (conn->trans->name, GF_LOG_ERROR,
- "no request with frame for xid (%u)", xid);
- goto out;
- }
-
- ret = rpc_clnt_reply_init (conn, pollin, req, saved_frame);
- if (ret != 0) {
- req->rpc_status = -1;
- gf_log (conn->trans->name, GF_LOG_WARNING,
- "initialising rpc reply failed");
- }
-
- req->cbkfn (req, req->rsp, req->rspcnt, saved_frame->frame);
-
- if (req) {
- rpc_clnt_reply_deinit (req, conn->rpc_clnt->reqpool);
- }
+ rpc_clnt_connection_t *conn = NULL;
+ struct saved_frame *saved_frame = NULL;
+ int ret = -1;
+ struct rpc_req *req = NULL;
+ uint32_t xid = 0;
+
+ clnt = rpc_clnt_ref(clnt);
+ conn = &clnt->conn;
+
+ xid = ntoh32(*((uint32_t *)pollin->vector[0].iov_base));
+ saved_frame = lookup_frame(conn, xid);
+ if (saved_frame == NULL) {
+ gf_log(conn->name, GF_LOG_ERROR,
+ "cannot lookup the saved frame for reply with xid (%u)", xid);
+ goto out;
+ }
+
+ req = saved_frame->rpcreq;
+ if (req == NULL) {
+ gf_log(conn->name, GF_LOG_ERROR, "no request with frame for xid (%u)",
+ xid);
+ goto out;
+ }
+
+ ret = rpc_clnt_reply_init(conn, pollin, req, saved_frame);
+ if (ret != 0) {
+ req->rpc_status = -1;
+ gf_log(conn->name, GF_LOG_WARNING, "initialising rpc reply failed");
+ }
+
+ req->cbkfn(req, req->rsp, req->rspcnt, saved_frame->frame);
+
+ if (req) {
+ rpc_clnt_reply_deinit(req, conn->rpc_clnt->reqpool);
+ }
out:
- if (saved_frame) {
- mem_put (saved_frame);
- }
+ if (saved_frame) {
+ mem_put(saved_frame);
+ }
- clnt = rpc_clnt_unref (clnt);
- return ret;
+ rpc_clnt_unref(clnt);
+ return ret;
}
-
-inline void
-rpc_clnt_set_connected (rpc_clnt_connection_t *conn)
+gf_boolean_t
+is_rpc_clnt_disconnected(rpc_clnt_connection_t *conn)
{
- if (!conn) {
- goto out;
- }
+ gf_boolean_t disconnected = _gf_true;
- pthread_mutex_lock (&conn->lock);
- {
- conn->connected = 1;
- }
- pthread_mutex_unlock (&conn->lock);
+ if (!conn)
+ return disconnected;
-out:
- return;
+ pthread_mutex_lock(&conn->lock);
+ {
+ disconnected = conn->disconnected;
+ }
+ pthread_mutex_unlock(&conn->lock);
+
+ return disconnected;
}
+static void
+rpc_clnt_destroy(struct rpc_clnt *rpc);
-void
-rpc_clnt_unset_connected (rpc_clnt_connection_t *conn)
-{
- if (!conn) {
- goto out;
- }
+#define RPC_THIS_SAVE(xl) \
+ do { \
+ old_THIS = THIS; \
+ if (!old_THIS) \
+ gf_log_callingfn("rpc", GF_LOG_CRITICAL, \
+ "THIS is not initialised."); \
+ THIS = xl; \
+ } while (0)
- pthread_mutex_lock (&conn->lock);
- {
- conn->connected = 0;
- }
- pthread_mutex_unlock (&conn->lock);
+#define RPC_THIS_RESTORE (THIS = old_THIS)
-out:
- return;
+static int
+rpc_clnt_handle_disconnect(struct rpc_clnt *clnt, rpc_clnt_connection_t *conn)
+{
+ struct timespec ts = {
+ 0,
+ };
+ gf_boolean_t unref_clnt = _gf_false;
+ uint64_t pre_notify_gen = 0, post_notify_gen = 0;
+
+ pthread_mutex_lock(&conn->lock);
+ {
+ pre_notify_gen = conn->cleanup_gen;
+ }
+ pthread_mutex_unlock(&conn->lock);
+
+ if (clnt->notifyfn)
+ clnt->notifyfn(clnt, clnt->mydata, RPC_CLNT_DISCONNECT, NULL);
+
+ pthread_mutex_lock(&conn->lock);
+ {
+ post_notify_gen = conn->cleanup_gen;
+ }
+ pthread_mutex_unlock(&conn->lock);
+
+ if (pre_notify_gen == post_notify_gen) {
+ /* program didn't invoke cleanup, so rpc has to do it */
+ rpc_clnt_connection_cleanup(conn);
+ }
+
+ pthread_mutex_lock(&conn->lock);
+ {
+ if (!conn->rpc_clnt->disabled && (conn->reconnect == NULL)) {
+ ts.tv_sec = 3;
+ ts.tv_nsec = 0;
+
+ rpc_clnt_ref(clnt);
+ conn->reconnect = gf_timer_call_after(clnt->ctx, ts,
+ rpc_clnt_reconnect, conn);
+ if (conn->reconnect == NULL) {
+ gf_log(conn->name, GF_LOG_WARNING,
+ "Cannot create rpc_clnt_reconnect timer");
+ unref_clnt = _gf_true;
+ }
+ }
+ }
+ pthread_mutex_unlock(&conn->lock);
+
+ if (unref_clnt)
+ rpc_clnt_unref(clnt);
+
+ return 0;
}
int
-rpc_clnt_notify (rpc_transport_t *trans, void *mydata,
- rpc_transport_event_t event, void *data, ...)
+rpc_clnt_notify(rpc_transport_t *trans, void *mydata,
+ rpc_transport_event_t event, void *data, ...)
{
- rpc_clnt_connection_t *conn = NULL;
- struct rpc_clnt *clnt = NULL;
- int ret = -1;
- rpc_request_info_t *req_info = NULL;
- rpc_transport_pollin_t *pollin = NULL;
- struct timeval tv = {0, };
-
- conn = mydata;
- if (conn == NULL) {
- goto out;
- }
- clnt = conn->rpc_clnt;
- if (!clnt)
- goto out;
-
- switch (event) {
- case RPC_TRANSPORT_DISCONNECT:
- {
- rpc_clnt_connection_cleanup (conn);
-
- pthread_mutex_lock (&conn->lock);
- {
- if (!conn->rpc_clnt->disabled
- && (conn->reconnect == NULL)) {
- tv.tv_sec = 10;
-
- conn->reconnect =
- gf_timer_call_after (clnt->ctx, tv,
- rpc_clnt_reconnect,
- conn->trans);
- }
- }
- pthread_mutex_unlock (&conn->lock);
-
- if (clnt->notifyfn)
- ret = clnt->notifyfn (clnt, clnt->mydata,
- RPC_CLNT_DISCONNECT, NULL);
- break;
+ rpc_clnt_connection_t *conn = NULL;
+ struct rpc_clnt *clnt = NULL;
+ int ret = -1;
+ rpc_request_info_t *req_info = NULL;
+ rpc_transport_pollin_t *pollin = NULL;
+ void *clnt_mydata = NULL;
+ DECLARE_OLD_THIS;
+
+ conn = mydata;
+ if (conn == NULL) {
+ goto out;
+ }
+ clnt = conn->rpc_clnt;
+ if (!clnt)
+ goto out;
+
+ RPC_THIS_SAVE(clnt->owner);
+
+ switch (event) {
+ case RPC_TRANSPORT_DISCONNECT: {
+ rpc_clnt_handle_disconnect(clnt, conn);
+ /* The auth_value was being reset to AUTH_GLUSTERFS_v2.
+ * if (clnt->auth_value)
+ * clnt->auth_value = AUTH_GLUSTERFS_v2;
+ * It should not be reset here. The disconnect during
+ * portmap request can race with handshake. If handshake
+ * happens first and disconnect later, auth_value would set
+ * to default value and it never sets back to actual auth_value
+ * supported by server. But it's important to set to lower
+ * version supported in the case where the server downgrades.
+ * So moving this code to RPC_TRANSPORT_CONNECT. Note that
+ * CONNECT cannot race with handshake as by nature it is
+ * serialized with handhake. An handshake can happen only
+ * on a connected transport and hence its strictly serialized.
+ */
+ break;
}
case RPC_TRANSPORT_CLEANUP:
- /* this event should not be received on a client for, a
- * transport is only disconnected, but never destroyed.
- */
- ret = 0;
- break;
-
- case RPC_TRANSPORT_MAP_XID_REQUEST:
- {
- req_info = data;
- ret = rpc_clnt_fill_request_info (clnt, req_info);
- break;
- }
-
- case RPC_TRANSPORT_MSG_RECEIVED:
- {
- pthread_mutex_lock (&conn->lock);
- {
- gettimeofday (&conn->last_received, NULL);
+ if (clnt->notifyfn) {
+ clnt_mydata = clnt->mydata;
+ clnt->mydata = NULL;
+ ret = clnt->notifyfn(clnt, clnt_mydata, RPC_CLNT_DESTROY, NULL);
+ if (ret < 0) {
+ gf_log(trans->name, GF_LOG_WARNING,
+ "client notify handler returned error "
+ "while handling RPC_CLNT_DESTROY");
}
- pthread_mutex_unlock (&conn->lock);
-
- pollin = data;
- if (pollin->is_reply)
- ret = rpc_clnt_handle_reply (clnt, pollin);
- else
- ret = rpc_clnt_handle_cbk (clnt, pollin);
- /* ret = clnt->notifyfn (clnt, clnt->mydata, RPC_CLNT_MSG,
- * data);
- */
- break;
- }
-
- case RPC_TRANSPORT_MSG_SENT:
- {
- pthread_mutex_lock (&conn->lock);
- {
- gettimeofday (&conn->last_sent, NULL);
- }
- pthread_mutex_unlock (&conn->lock);
+ }
+ rpc_clnt_destroy(clnt);
+ ret = 0;
+ break;
+
+ case RPC_TRANSPORT_MAP_XID_REQUEST: {
+ req_info = data;
+ ret = rpc_clnt_fill_request_info(clnt, req_info);
+ break;
+ }
+
+ case RPC_TRANSPORT_MSG_RECEIVED: {
+ timespec_now_realtime(&conn->last_received);
+
+ pollin = data;
+ if (pollin->is_reply)
+ ret = rpc_clnt_handle_reply(clnt, pollin);
+ else
+ ret = rpc_clnt_handle_cbk(clnt, pollin);
+ /* ret = clnt->notifyfn (clnt, clnt->mydata, RPC_CLNT_MSG,
+ * data);
+ */
+ break;
+ }
+
+ case RPC_TRANSPORT_MSG_SENT: {
+ timespec_now_realtime(&conn->last_sent);
+ ret = 0;
+ break;
+ }
+
+ case RPC_TRANSPORT_CONNECT: {
+ pthread_mutex_lock(&conn->lock);
+ {
+ /* Every time there is a disconnection, processes
+ * should try to connect to 'glusterd' (ie, default
+ * port) or whichever port given as 'option remote-port'
+ * in volume file. */
+ /* Below code makes sure the (re-)configured port lasts
+ * for just one successful attempt */
+ conn->config.remote_port = 0;
+ conn->connected = 1;
+ conn->disconnected = 0;
+ pthread_cond_broadcast(&conn->cond);
+ }
+ pthread_mutex_unlock(&conn->lock);
- ret = 0;
- break;
- }
+ /* auth value should be set to lower version available
+ * and will be set to appropriate version supported by
+ * server after the handshake.
+ */
+ if (clnt->auth_value)
+ clnt->auth_value = AUTH_GLUSTERFS_v2;
+ if (clnt->notifyfn)
+ ret = clnt->notifyfn(clnt, clnt->mydata, RPC_CLNT_CONNECT,
+ NULL);
- case RPC_TRANSPORT_CONNECT:
- {
- if (clnt->notifyfn)
- ret = clnt->notifyfn (clnt, clnt->mydata,
- RPC_CLNT_CONNECT, NULL);
- break;
+ break;
}
case RPC_TRANSPORT_ACCEPT:
- /* only meaningful on a server, no need of handling this event
- * in a client.
- */
- ret = 0;
- break;
- }
+ /* only meaningful on a server, no need of handling this event
+ * in a client.
+ */
+ ret = 0;
+ break;
+
+ case RPC_TRANSPORT_EVENT_THREAD_DIED:
+ /* only meaningful on a server, no need of handling this event on a
+ * client */
+ ret = 0;
+ break;
+ }
out:
- return ret;
+ RPC_THIS_RESTORE;
+ return ret;
}
-
-void
-rpc_clnt_connection_deinit (rpc_clnt_connection_t *conn)
-{
- return;
-}
-
-
-inline int
-rpc_clnt_connection_init (struct rpc_clnt *clnt, glusterfs_ctx_t *ctx,
- dict_t *options, char *name)
+static int
+rpc_clnt_connection_init(struct rpc_clnt *clnt, glusterfs_ctx_t *ctx,
+ dict_t *options, char *name)
{
- int ret = -1;
- rpc_clnt_connection_t *conn = NULL;
-
- conn = &clnt->conn;
- pthread_mutex_init (&clnt->conn.lock, NULL);
-
- ret = dict_get_int32 (options, "frame-timeout",
- &conn->frame_timeout);
- if (ret >= 0) {
- gf_log (name, GF_LOG_INFO,
- "setting frame-timeout to %d", conn->frame_timeout);
- } else {
- gf_log (name, GF_LOG_DEBUG,
- "defaulting frame-timeout to 30mins");
- conn->frame_timeout = 1800;
- }
+ int ret = -1;
+ rpc_clnt_connection_t *conn = NULL;
+ rpc_transport_t *trans = NULL;
- conn->trans = rpc_transport_load (ctx, options, name);
- if (!conn->trans) {
- gf_log (name, GF_LOG_WARNING, "loading of new rpc-transport"
- " failed");
- ret = -1;
- goto out;
- }
-
- rpc_transport_ref (conn->trans);
+ conn = &clnt->conn;
+ pthread_mutex_init(&clnt->conn.lock, NULL);
+ pthread_cond_init(&clnt->conn.cond, NULL);
- conn->rpc_clnt = clnt;
-
- ret = rpc_transport_register_notify (conn->trans, rpc_clnt_notify,
- conn);
- if (ret == -1) {
- gf_log (name, GF_LOG_WARNING, "registering notify failed");
- rpc_clnt_connection_cleanup (conn);
- conn = NULL;
- goto out;
- }
-
- conn->saved_frames = saved_frames_new ();
- if (!conn->saved_frames) {
- gf_log (name, GF_LOG_WARNING, "creation of saved_frames "
- "failed");
- rpc_clnt_connection_cleanup (conn);
- goto out;
- }
+ conn->name = gf_strdup(name);
+ if (!conn->name) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_int32(options, "frame-timeout", &conn->frame_timeout);
+ if (ret >= 0) {
+ gf_log(name, GF_LOG_INFO, "setting frame-timeout to %d",
+ conn->frame_timeout);
+ } else {
+ gf_log(name, GF_LOG_DEBUG, "defaulting frame-timeout to 30mins");
+ conn->frame_timeout = 1800;
+ }
+ conn->rpc_clnt = clnt;
+
+ ret = dict_get_int32(options, "ping-timeout", &conn->ping_timeout);
+ if (ret >= 0) {
+ gf_log(name, GF_LOG_DEBUG, "setting ping-timeout to %d",
+ conn->ping_timeout);
+ } else {
+ /*TODO: Once the epoll thread model is fixed,
+ change the default ping-timeout to 30sec */
+ gf_log(name, GF_LOG_DEBUG, "disable ping-timeout");
+ conn->ping_timeout = 0;
+ }
+
+ trans = rpc_transport_load(ctx, options, name);
+ if (!trans) {
+ gf_log(name, GF_LOG_WARNING,
+ "loading of new rpc-transport"
+ " failed");
+ ret = -1;
+ goto out;
+ }
+ rpc_transport_ref(trans);
+
+ pthread_mutex_lock(&conn->lock);
+ {
+ conn->trans = trans;
+ trans = NULL;
+ }
+ pthread_mutex_unlock(&conn->lock);
+
+ ret = rpc_transport_register_notify(conn->trans, rpc_clnt_notify, conn);
+ if (ret == -1) {
+ gf_log(name, GF_LOG_WARNING, "registering notify failed");
+ goto out;
+ }
+
+ conn->saved_frames = saved_frames_new();
+ if (!conn->saved_frames) {
+ gf_log(name, GF_LOG_WARNING,
+ "creation of saved_frames "
+ "failed");
+ ret = -1;
+ goto out;
+ }
- ret = 0;
+ ret = 0;
out:
- return ret;
+ if (ret) {
+ pthread_mutex_lock(&conn->lock);
+ {
+ trans = conn->trans;
+ conn->trans = NULL;
+ }
+ pthread_mutex_unlock(&conn->lock);
+ if (trans)
+ rpc_transport_unref(trans);
+ // conn cleanup needs to be done since we might have failed to
+ // register notification.
+ rpc_clnt_connection_cleanup(conn);
+ }
+ return ret;
}
struct rpc_clnt *
-rpc_clnt_new (dict_t *options, glusterfs_ctx_t *ctx, char *name,
- uint32_t reqpool_size)
+rpc_clnt_new(dict_t *options, xlator_t *owner, char *name,
+ uint32_t reqpool_size)
{
- int ret = -1;
- struct rpc_clnt *rpc = NULL;
-
- rpc = GF_CALLOC (1, sizeof (*rpc), gf_common_mt_rpcclnt_t);
- if (!rpc) {
- goto out;
- }
-
- pthread_mutex_init (&rpc->lock, NULL);
- rpc->ctx = ctx;
-
- if (!reqpool_size)
- reqpool_size = RPC_CLNT_DEFAULT_REQUEST_COUNT;
-
- rpc->reqpool = mem_pool_new (struct rpc_req, reqpool_size);
- if (rpc->reqpool == NULL) {
- pthread_mutex_destroy (&rpc->lock);
- GF_FREE (rpc);
- rpc = NULL;
- goto out;
- }
-
- rpc->saved_frames_pool = mem_pool_new (struct saved_frame,
- reqpool_size);
- if (rpc->saved_frames_pool == NULL) {
- pthread_mutex_destroy (&rpc->lock);
- mem_pool_destroy (rpc->reqpool);
- GF_FREE (rpc);
- rpc = NULL;
- goto out;
- }
-
- ret = rpc_clnt_connection_init (rpc, ctx, options, name);
- if (ret == -1) {
- pthread_mutex_destroy (&rpc->lock);
- mem_pool_destroy (rpc->reqpool);
- mem_pool_destroy (rpc->saved_frames_pool);
- GF_FREE (rpc);
- rpc = NULL;
- if (options)
- dict_unref (options);
- goto out;
- }
-
- rpc->auth_null = dict_get_str_boolean (options, "auth-null", 0);
-
- rpc = rpc_clnt_ref (rpc);
- INIT_LIST_HEAD (&rpc->programs);
+ int ret = -1;
+ struct rpc_clnt *rpc = NULL;
+ glusterfs_ctx_t *ctx = owner->ctx;
+
+ rpc = GF_CALLOC(1, sizeof(*rpc), gf_common_mt_rpcclnt_t);
+ if (!rpc) {
+ goto out;
+ }
+
+ pthread_mutex_init(&rpc->lock, NULL);
+ rpc->ctx = ctx;
+ rpc->owner = owner;
+ GF_ATOMIC_INIT(rpc->xid, 1);
+
+ if (!reqpool_size)
+ reqpool_size = RPC_CLNT_DEFAULT_REQUEST_COUNT;
+
+ rpc->reqpool = mem_pool_new(struct rpc_req, reqpool_size);
+ if (rpc->reqpool == NULL) {
+ pthread_mutex_destroy(&rpc->lock);
+ GF_FREE(rpc);
+ rpc = NULL;
+ goto out;
+ }
+
+ rpc->saved_frames_pool = mem_pool_new(struct saved_frame, reqpool_size);
+ if (rpc->saved_frames_pool == NULL) {
+ pthread_mutex_destroy(&rpc->lock);
+ mem_pool_destroy(rpc->reqpool);
+ GF_FREE(rpc);
+ rpc = NULL;
+ goto out;
+ }
+
+ ret = rpc_clnt_connection_init(rpc, ctx, options, name);
+ if (ret == -1) {
+ pthread_mutex_destroy(&rpc->lock);
+ mem_pool_destroy(rpc->reqpool);
+ mem_pool_destroy(rpc->saved_frames_pool);
+ GF_FREE(rpc);
+ rpc = NULL;
+ goto out;
+ }
+
+ /* This is handled to make sure we have modularity in getting the
+ auth data changed */
+ gf_boolean_t auth_null = dict_get_str_boolean(options, "auth-null", 0);
+
+ rpc->auth_value = (auth_null) ? 0 : AUTH_GLUSTERFS_v2;
+
+ rpc = rpc_clnt_ref(rpc);
+ INIT_LIST_HEAD(&rpc->programs);
out:
- return rpc;
+ return rpc;
}
-
int
-rpc_clnt_start (struct rpc_clnt *rpc)
+rpc_clnt_start(struct rpc_clnt *rpc)
{
- struct rpc_clnt_connection *conn = NULL;
+ struct rpc_clnt_connection *conn = NULL;
- if (!rpc)
- return -1;
+ if (!rpc)
+ return -1;
- conn = &rpc->conn;
+ conn = &rpc->conn;
- rpc_clnt_reconnect (conn->trans);
+ pthread_mutex_lock(&conn->lock);
+ {
+ rpc->disabled = 0;
+ }
+ pthread_mutex_unlock(&conn->lock);
+ /* Corresponding unref will be either on successful timer cancel or last
+ * rpc_clnt_reconnect fire event.
+ */
+ rpc_clnt_ref(rpc);
+ rpc_clnt_reconnect(conn);
- return 0;
+ return 0;
}
-
int
-rpc_clnt_register_notify (struct rpc_clnt *rpc, rpc_clnt_notify_t fn,
- void *mydata)
+rpc_clnt_cleanup_and_start(struct rpc_clnt *rpc)
{
- rpc->mydata = mydata;
- rpc->notifyfn = fn;
+ struct rpc_clnt_connection *conn = NULL;
- return 0;
-}
+ if (!rpc)
+ return -1;
-ssize_t
-xdr_serialize_glusterfs_auth (char *dest, struct auth_glusterfs_parms_v2 *au)
-{
- ssize_t ret = -1;
- XDR xdr;
+ conn = &rpc->conn;
- if ((!dest) || (!au))
- return -1;
-
- xdrmem_create (&xdr, dest, GF_MAX_AUTH_BYTES, XDR_ENCODE);
-
- if (!xdr_auth_glusterfs_parms_v2 (&xdr, au)) {
- gf_log (THIS->name, GF_LOG_WARNING,
- "failed to encode auth glusterfs elements");
- ret = -1;
- goto ret;
- }
+ rpc_clnt_connection_cleanup(conn);
- ret = (((size_t)(&xdr)->x_private) - ((size_t)(&xdr)->x_base));
+ pthread_mutex_lock(&conn->lock);
+ {
+ rpc->disabled = 0;
+ }
+ pthread_mutex_unlock(&conn->lock);
+ /* Corresponding unref will be either on successful timer cancel or last
+ * rpc_clnt_reconnect fire event.
+ */
+ rpc_clnt_ref(rpc);
+ rpc_clnt_reconnect(conn);
-ret:
- return ret;
+ return 0;
}
-
int
-rpc_clnt_fill_request (int prognum, int progver, int procnum,
- uint64_t xid, struct auth_glusterfs_parms_v2 *au,
- struct rpc_msg *request, char *auth_data)
+rpc_clnt_register_notify(struct rpc_clnt *rpc, rpc_clnt_notify_t fn,
+ void *mydata)
{
- int ret = -1;
-
- if (!request) {
- goto out;
- }
-
- memset (request, 0, sizeof (*request));
-
- request->rm_xid = xid;
- request->rm_direction = CALL;
-
- request->rm_call.cb_rpcvers = 2;
- request->rm_call.cb_prog = prognum;
- request->rm_call.cb_vers = progver;
- request->rm_call.cb_proc = procnum;
+ rpc->mydata = mydata;
+ rpc->notifyfn = fn;
- /* TODO: Using AUTH_(GLUSTERFS/NULL) in a kludgy way for time-being.
- * Make it modular in future so it is easy to plug-in new
- * authentication schemes.
- */
- if (auth_data) {
- ret = xdr_serialize_glusterfs_auth (auth_data, au);
- if (ret == -1) {
- gf_log ("rpc-clnt", GF_LOG_DEBUG,
- "cannot encode credentials");
- goto out;
- }
+ return 0;
+}
- request->rm_call.cb_cred.oa_flavor = AUTH_GLUSTERFS_v2;
- request->rm_call.cb_cred.oa_base = auth_data;
- request->rm_call.cb_cred.oa_length = ret;
- } else {
- request->rm_call.cb_cred.oa_flavor = AUTH_NULL;
- request->rm_call.cb_cred.oa_base = NULL;
- request->rm_call.cb_cred.oa_length = 0;
- }
- request->rm_call.cb_verf.oa_flavor = AUTH_NONE;
- request->rm_call.cb_verf.oa_base = NULL;
- request->rm_call.cb_verf.oa_length = 0;
+/* used for GF_LOG_OCCASIONALLY() */
+static int gf_auth_max_groups_log = 0;
- ret = 0;
+static inline int
+setup_glusterfs_auth_param_v3(call_frame_t *frame, auth_glusterfs_params_v3 *au,
+ int lk_owner_len, char *owner_data)
+{
+ int ret = -1;
+ unsigned int max_groups = 0;
+ int max_lkowner_len = 0;
+
+ au->pid = frame->root->pid;
+ au->uid = frame->root->uid;
+ au->gid = frame->root->gid;
+
+ au->flags = frame->root->flags;
+ au->ctime_sec = frame->root->ctime.tv_sec;
+ au->ctime_nsec = frame->root->ctime.tv_nsec;
+
+ au->lk_owner.lk_owner_val = owner_data;
+ au->lk_owner.lk_owner_len = lk_owner_len;
+ au->groups.groups_val = frame->root->groups;
+ au->groups.groups_len = frame->root->ngrps;
+
+ /* The number of groups and the size of lk_owner depend on oneother.
+ * We can truncate the groups, but should not touch the lk_owner. */
+ max_groups = GF_AUTH_GLUSTERFS_MAX_GROUPS(lk_owner_len, AUTH_GLUSTERFS_v3);
+ if (au->groups.groups_len > max_groups) {
+ GF_LOG_OCCASIONALLY(gf_auth_max_groups_log, "rpc-auth", GF_LOG_WARNING,
+ "truncating grouplist "
+ "from %d to %d",
+ au->groups.groups_len, max_groups);
+
+ au->groups.groups_len = max_groups;
+ }
+
+ max_lkowner_len = GF_AUTH_GLUSTERFS_MAX_LKOWNER(au->groups.groups_len,
+ AUTH_GLUSTERFS_v3);
+ if (lk_owner_len > max_lkowner_len) {
+ gf_log("rpc-clnt", GF_LOG_ERROR,
+ "lkowner field is too "
+ "big (%d), it does not fit in the rpc-header",
+ au->lk_owner.lk_owner_len);
+ errno = E2BIG;
+ goto out;
+ }
+
+ ret = 0;
out:
- return ret;
+ return ret;
}
+static inline int
+setup_glusterfs_auth_param_v2(call_frame_t *frame, auth_glusterfs_parms_v2 *au,
+ int lk_owner_len, char *owner_data)
+{
+ unsigned int max_groups = 0;
+ int max_lkowner_len = 0;
+ int ret = -1;
+
+ au->pid = frame->root->pid;
+ au->uid = frame->root->uid;
+ au->gid = frame->root->gid;
+
+ au->lk_owner.lk_owner_val = owner_data;
+ au->lk_owner.lk_owner_len = lk_owner_len;
+ au->groups.groups_val = frame->root->groups;
+ au->groups.groups_len = frame->root->ngrps;
+
+ /* The number of groups and the size of lk_owner depend on oneother.
+ * We can truncate the groups, but should not touch the lk_owner. */
+ max_groups = GF_AUTH_GLUSTERFS_MAX_GROUPS(lk_owner_len, AUTH_GLUSTERFS_v2);
+ if (au->groups.groups_len > max_groups) {
+ GF_LOG_OCCASIONALLY(gf_auth_max_groups_log, "rpc-auth", GF_LOG_WARNING,
+ "truncating grouplist "
+ "from %d to %d",
+ au->groups.groups_len, max_groups);
+
+ au->groups.groups_len = max_groups;
+ }
+
+ max_lkowner_len = GF_AUTH_GLUSTERFS_MAX_LKOWNER(au->groups.groups_len,
+ AUTH_GLUSTERFS_v2);
+ if (lk_owner_len > max_lkowner_len) {
+ gf_log("rpc-auth", GF_LOG_ERROR,
+ "lkowner field is too "
+ "big (%d), it does not fit in the rpc-header",
+ au->lk_owner.lk_owner_len);
+ errno = E2BIG;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
-struct iovec
-rpc_clnt_record_build_header (char *recordstart, size_t rlen,
- struct rpc_msg *request, size_t payload)
+static ssize_t
+xdr_serialize_glusterfs_auth(struct rpc_clnt *clnt, call_frame_t *frame,
+ char *dest)
{
- struct iovec requesthdr = {0, };
- struct iovec txrecord = {0, 0};
- int ret = -1;
- size_t fraglen = 0;
+ ssize_t ret = -1;
+ XDR xdr;
+ char owner[4] = {
+ 0,
+ };
+ int32_t pid = 0;
+ char *lk_owner_data = NULL;
+ int lk_owner_len = 0;
+
+ if ((!dest))
+ return -1;
+
+ xdrmem_create(&xdr, dest, GF_MAX_AUTH_BYTES, XDR_ENCODE);
+
+ if (frame->root->lk_owner.len) {
+ lk_owner_data = frame->root->lk_owner.data;
+ lk_owner_len = frame->root->lk_owner.len;
+ } else {
+ pid = frame->root->pid;
+ owner[0] = (char)(pid & 0xff);
+ owner[1] = (char)((pid >> 8) & 0xff);
+ owner[2] = (char)((pid >> 16) & 0xff);
+ owner[3] = (char)((pid >> 24) & 0xff);
+
+ lk_owner_data = owner;
+ lk_owner_len = 4;
+ }
+
+ if (clnt->auth_value == AUTH_GLUSTERFS_v2) {
+ auth_glusterfs_parms_v2 au_v2 = {
+ 0,
+ };
+
+ ret = setup_glusterfs_auth_param_v2(frame, &au_v2, lk_owner_len,
+ lk_owner_data);
+ if (ret)
+ goto out;
+ if (!xdr_auth_glusterfs_parms_v2(&xdr, &au_v2)) {
+ gf_log(THIS->name, GF_LOG_WARNING,
+ "failed to encode auth glusterfs elements");
+ ret = -1;
+ goto out;
+ }
+ } else if (clnt->auth_value == AUTH_GLUSTERFS_v3) {
+ auth_glusterfs_params_v3 au_v3 = {
+ 0,
+ };
+
+ ret = setup_glusterfs_auth_param_v3(frame, &au_v3, lk_owner_len,
+ lk_owner_data);
+ if (ret)
+ goto out;
- ret = rpc_request_to_xdr (request, recordstart, rlen, &requesthdr);
- if (ret == -1) {
- gf_log ("rpc-clnt", GF_LOG_DEBUG,
- "Failed to create RPC request");
- goto out;
+ if (!xdr_auth_glusterfs_params_v3(&xdr, &au_v3)) {
+ gf_log(THIS->name, GF_LOG_WARNING,
+ "failed to encode auth glusterfs elements");
+ ret = -1;
+ goto out;
}
+ } else {
+ gf_log(THIS->name, GF_LOG_WARNING,
+ "failed to encode auth glusterfs elements");
+ ret = -1;
+ goto out;
+ }
- fraglen = payload + requesthdr.iov_len;
- gf_log ("rpc-clnt", GF_LOG_TRACE, "Request fraglen %zu, payload: %zu, "
- "rpc hdr: %zu", fraglen, payload, requesthdr.iov_len);
-
-
- txrecord.iov_base = recordstart;
-
- /* Remember, this is only the vec for the RPC header and does not
- * include the payload above. We needed the payload only to calculate
- * the size of the full fragment. This size is sent in the fragment
- * header.
- */
- txrecord.iov_len = requesthdr.iov_len;
+ ret = (((size_t)(&xdr)->x_private) - ((size_t)(&xdr)->x_base));
out:
- return txrecord;
+ return ret;
}
-
-struct iobuf *
-rpc_clnt_record_build_record (struct rpc_clnt *clnt, int prognum, int progver,
- int procnum, size_t hdrsize, uint64_t xid,
- struct auth_glusterfs_parms_v2 *au,
- struct iovec *recbuf)
+int
+rpc_clnt_fill_request(struct rpc_clnt *clnt, int prognum, int progver,
+ int procnum, uint64_t xid, call_frame_t *fr,
+ struct rpc_msg *request, char *auth_data)
{
- struct rpc_msg request = {0, };
- struct iobuf *request_iob = NULL;
- char *record = NULL;
- struct iovec recordhdr = {0, };
- size_t pagesize = 0;
- int ret = -1;
- size_t xdr_size = 0;
- char auth_data[GF_MAX_AUTH_BYTES] = {0, };
-
- if ((!clnt) || (!recbuf) || (!au)) {
- goto out;
- }
+ int ret = -1;
- /* Fill the rpc structure and XDR it into the buffer got above. */
- if (clnt->auth_null)
- ret = rpc_clnt_fill_request (prognum, progver, procnum,
- xid, NULL, &request, NULL);
- else
- ret = rpc_clnt_fill_request (prognum, progver, procnum,
- xid, au, &request, auth_data);
+ if (!request) {
+ goto out;
+ }
- if (ret == -1) {
- gf_log (clnt->conn.trans->name, GF_LOG_WARNING,
- "cannot build a rpc-request xid (%"PRIu64")", xid);
- goto out;
- }
+ memset(request, 0, sizeof(*request));
- xdr_size = xdr_sizeof ((xdrproc_t)xdr_callmsg, &request);
+ request->rm_xid = xid;
+ request->rm_direction = CALL;
- /* First, try to get a pointer into the buffer which the RPC
- * layer can use.
- */
- request_iob = iobuf_get2 (clnt->ctx->iobuf_pool, (xdr_size + hdrsize));
- if (!request_iob) {
- goto out;
- }
-
- pagesize = iobuf_pagesize (request_iob);
+ request->rm_call.cb_rpcvers = 2;
+ request->rm_call.cb_prog = prognum;
+ request->rm_call.cb_vers = progver;
+ request->rm_call.cb_proc = procnum;
- record = iobuf_ptr (request_iob); /* Now we have it. */
-
- recordhdr = rpc_clnt_record_build_header (record, pagesize, &request,
- hdrsize);
-
- if (!recordhdr.iov_base) {
- gf_log (clnt->conn.trans->name, GF_LOG_ERROR,
- "Failed to build record header");
- iobuf_unref (request_iob);
- request_iob = NULL;
- recbuf->iov_base = NULL;
- goto out;
+ if (!clnt->auth_value) {
+ request->rm_call.cb_cred.oa_flavor = AUTH_NULL;
+ request->rm_call.cb_cred.oa_base = NULL;
+ request->rm_call.cb_cred.oa_length = 0;
+ } else {
+ ret = xdr_serialize_glusterfs_auth(clnt, fr, auth_data);
+ if (ret == -1) {
+ gf_log("rpc-clnt", GF_LOG_WARNING,
+ "cannot encode auth credentials");
+ goto out;
}
- recbuf->iov_base = recordhdr.iov_base;
- recbuf->iov_len = recordhdr.iov_len;
+ request->rm_call.cb_cred.oa_flavor = clnt->auth_value;
+ request->rm_call.cb_cred.oa_base = auth_data;
+ request->rm_call.cb_cred.oa_length = ret;
+ }
+ request->rm_call.cb_verf.oa_flavor = AUTH_NONE;
+ request->rm_call.cb_verf.oa_base = NULL;
+ request->rm_call.cb_verf.oa_length = 0;
+ ret = 0;
out:
- return request_iob;
+ return ret;
}
+struct iovec
+rpc_clnt_record_build_header(char *recordstart, size_t rlen,
+ struct rpc_msg *request, size_t payload)
+{
+ struct iovec requesthdr = {
+ 0,
+ };
+ struct iovec txrecord = {0, 0};
+ int ret = -1;
+ size_t fraglen = 0;
+
+ ret = rpc_request_to_xdr(request, recordstart, rlen, &requesthdr);
+ if (ret == -1) {
+ gf_log("rpc-clnt", GF_LOG_DEBUG, "Failed to create RPC request");
+ goto out;
+ }
+
+ fraglen = payload + requesthdr.iov_len;
+ gf_log("rpc-clnt", GF_LOG_TRACE,
+ "Request fraglen %zu, payload: %zu, "
+ "rpc hdr: %zu",
+ fraglen, payload, requesthdr.iov_len);
+
+ txrecord.iov_base = recordstart;
+
+ /* Remember, this is only the vec for the RPC header and does not
+ * include the payload above. We needed the payload only to calculate
+ * the size of the full fragment. This size is sent in the fragment
+ * header.
+ */
+ txrecord.iov_len = requesthdr.iov_len;
+
+out:
+ return txrecord;
+}
struct iobuf *
-rpc_clnt_record (struct rpc_clnt *clnt, call_frame_t *call_frame,
- rpc_clnt_prog_t *prog, int procnum, size_t hdrlen,
- struct iovec *rpchdr, uint64_t callid)
+rpc_clnt_record_build_record(struct rpc_clnt *clnt, call_frame_t *fr,
+ int prognum, int progver, int procnum,
+ size_t hdrsize, uint64_t xid, struct iovec *recbuf)
{
- struct auth_glusterfs_parms_v2 au = {0, };
- struct iobuf *request_iob = NULL;
- char owner[4] = {0,};
-
- if (!prog || !rpchdr || !call_frame) {
- goto out;
- }
+ struct rpc_msg request = {
+ 0,
+ };
+ struct iobuf *request_iob = NULL;
+ char *record = NULL;
+ struct iovec recordhdr = {
+ 0,
+ };
+ size_t pagesize = 0;
+ int ret = -1;
+ size_t xdr_size = 0;
+ char auth_data[GF_MAX_AUTH_BYTES] = {
+ 0,
+ };
+
+ if ((!clnt) || (!recbuf)) {
+ goto out;
+ }
+
+ /* Fill the rpc structure and XDR it into the buffer got above. */
+ ret = rpc_clnt_fill_request(clnt, prognum, progver, procnum, xid, fr,
+ &request, auth_data);
+
+ if (ret == -1) {
+ gf_log(clnt->conn.name, GF_LOG_WARNING,
+ "cannot build a rpc-request xid (%" PRIu64 ")", xid);
+ goto out;
+ }
+
+ xdr_size = xdr_sizeof((xdrproc_t)xdr_callmsg, &request);
+
+ /* First, try to get a pointer into the buffer which the RPC
+ * layer can use.
+ */
+ request_iob = iobuf_get2(clnt->ctx->iobuf_pool, (xdr_size + hdrsize));
+ if (!request_iob) {
+ goto out;
+ }
+
+ pagesize = iobuf_pagesize(request_iob);
+
+ record = iobuf_ptr(request_iob); /* Now we have it. */
+
+ recordhdr = rpc_clnt_record_build_header(record, pagesize, &request,
+ hdrsize);
+
+ if (!recordhdr.iov_base) {
+ gf_log(clnt->conn.name, GF_LOG_ERROR, "Failed to build record header");
+ iobuf_unref(request_iob);
+ request_iob = NULL;
+ recbuf->iov_base = NULL;
+ goto out;
+ }
+
+ recbuf->iov_base = recordhdr.iov_base;
+ recbuf->iov_len = recordhdr.iov_len;
- au.pid = call_frame->root->pid;
- au.uid = call_frame->root->uid;
- au.gid = call_frame->root->gid;
- au.groups.groups_len = call_frame->root->ngrps;
- au.lk_owner.lk_owner_len = call_frame->root->lk_owner.len;
-
- if (au.groups.groups_len)
- au.groups.groups_val = call_frame->root->groups;
-
- if (call_frame->root->lk_owner.len)
- au.lk_owner.lk_owner_val = call_frame->root->lk_owner.data;
- else {
- owner[0] = (char)(au.pid & 0xff);
- owner[1] = (char)((au.pid >> 8) & 0xff);
- owner[2] = (char)((au.pid >> 16) & 0xff);
- owner[3] = (char)((au.pid >> 24) & 0xff);
-
- au.lk_owner.lk_owner_val = owner;
- au.lk_owner.lk_owner_len = 4;
- }
+out:
+ return request_iob;
+}
- gf_log (clnt->conn.trans->name, GF_LOG_TRACE, "Auth Info: pid: %u, uid: %d"
- ", gid: %d, owner: %s", au.pid, au.uid, au.gid,
- lkowner_utoa (&call_frame->root->lk_owner));
-
- request_iob = rpc_clnt_record_build_record (clnt, prog->prognum,
- prog->progver,
- procnum, hdrlen,
- callid, &au,
- rpchdr);
- if (!request_iob) {
- gf_log (clnt->conn.trans->name, GF_LOG_WARNING,
- "cannot build rpc-record");
- goto out;
- }
+static inline struct iobuf *
+rpc_clnt_record(struct rpc_clnt *clnt, call_frame_t *call_frame,
+ rpc_clnt_prog_t *prog, int procnum, size_t hdrlen,
+ struct iovec *rpchdr, uint64_t callid)
+{
+ if (!prog || !rpchdr || !call_frame) {
+ return NULL;
+ }
-out:
- return request_iob;
+ return rpc_clnt_record_build_record(clnt, call_frame, prog->prognum,
+ prog->progver, procnum, hdrlen, callid,
+ rpchdr);
}
int
-rpcclnt_cbk_program_register (struct rpc_clnt *clnt,
- rpcclnt_cb_program_t *program)
+rpcclnt_cbk_program_register(struct rpc_clnt *clnt,
+ rpcclnt_cb_program_t *program, void *mydata)
{
- int ret = -1;
- char already_registered = 0;
- rpcclnt_cb_program_t *tmp = NULL;
+ int ret = -1;
+ char already_registered = 0;
+ rpcclnt_cb_program_t *tmp = NULL;
- if (!clnt)
- goto out;
+ if (!clnt)
+ goto out;
- if (program->actors == NULL)
- goto out;
+ if (program->actors == NULL)
+ goto out;
- pthread_mutex_lock (&clnt->lock);
+ pthread_mutex_lock(&clnt->lock);
+ {
+ list_for_each_entry(tmp, &clnt->programs, program)
{
- list_for_each_entry (tmp, &clnt->programs, program) {
- if ((program->prognum == tmp->prognum)
- && (program->progver == tmp->progver)) {
- already_registered = 1;
- break;
- }
- }
+ if ((program->prognum == tmp->prognum) &&
+ (program->progver == tmp->progver)) {
+ already_registered = 1;
+ break;
+ }
}
- pthread_mutex_unlock (&clnt->lock);
+ }
+ pthread_mutex_unlock(&clnt->lock);
- if (already_registered) {
- gf_log_callingfn (clnt->conn.trans->name, GF_LOG_DEBUG,
- "already registered");
- ret = 0;
- goto out;
- }
+ if (already_registered) {
+ gf_log_callingfn(clnt->conn.name, GF_LOG_DEBUG, "already registered");
+ ret = 0;
+ goto out;
+ }
- tmp = GF_CALLOC (1, sizeof (*tmp),
- gf_common_mt_rpcclnt_cb_program_t);
- if (tmp == NULL) {
- goto out;
- }
+ tmp = GF_MALLOC(sizeof(*tmp), gf_common_mt_rpcclnt_cb_program_t);
+ if (tmp == NULL) {
+ goto out;
+ }
- memcpy (tmp, program, sizeof (*tmp));
- INIT_LIST_HEAD (&tmp->program);
+ memcpy(tmp, program, sizeof(*tmp));
+ INIT_LIST_HEAD(&tmp->program);
- pthread_mutex_lock (&clnt->lock);
- {
- list_add_tail (&tmp->program, &clnt->programs);
- }
- pthread_mutex_unlock (&clnt->lock);
+ tmp->mydata = mydata;
- ret = 0;
- gf_log (clnt->conn.trans->name, GF_LOG_DEBUG,
- "New program registered: %s, Num: %d, Ver: %d",
- program->progname, program->prognum,
- program->progver);
+ pthread_mutex_lock(&clnt->lock);
+ {
+ list_add_tail(&tmp->program, &clnt->programs);
+ }
+ pthread_mutex_unlock(&clnt->lock);
-out:
- if (ret == -1) {
- gf_log (clnt->conn.trans->name, GF_LOG_ERROR,
- "Program registration failed:"
- " %s, Num: %d, Ver: %d", program->progname,
- program->prognum, program->progver);
- }
+ ret = 0;
+ gf_log(clnt->conn.name, GF_LOG_DEBUG,
+ "New program registered: %s, Num: %d, Ver: %d", program->progname,
+ program->prognum, program->progver);
- return ret;
+out:
+ if (ret == -1 && clnt) {
+ gf_log(clnt->conn.name, GF_LOG_ERROR,
+ "Program registration failed:"
+ " %s, Num: %d, Ver: %d",
+ program->progname, program->prognum, program->progver);
+ }
+
+ return ret;
}
-
int
-rpc_clnt_submit (struct rpc_clnt *rpc, rpc_clnt_prog_t *prog,
- int procnum, fop_cbk_fn_t cbkfn,
- struct iovec *proghdr, int proghdrcount,
- struct iovec *progpayload, int progpayloadcount,
- struct iobref *iobref, void *frame, struct iovec *rsphdr,
- int rsphdr_count, struct iovec *rsp_payload,
- int rsp_payload_count, struct iobref *rsp_iobref)
+rpc_clnt_submit(struct rpc_clnt *rpc, rpc_clnt_prog_t *prog, int procnum,
+ fop_cbk_fn_t cbkfn, struct iovec *proghdr, int proghdrcount,
+ struct iovec *progpayload, int progpayloadcount,
+ struct iobref *iobref, void *frame, struct iovec *rsphdr,
+ int rsphdr_count, struct iovec *rsp_payload,
+ int rsp_payload_count, struct iobref *rsp_iobref)
{
- rpc_clnt_connection_t *conn = NULL;
- struct iobuf *request_iob = NULL;
- struct iovec rpchdr = {0,};
- struct rpc_req *rpcreq = NULL;
- rpc_transport_req_t req;
- int ret = -1;
- int proglen = 0;
- char new_iobref = 0;
- uint64_t callid = 0;
-
- if (!rpc || !prog || !frame) {
- goto out;
- }
-
- conn = &rpc->conn;
-
- if (conn->trans == NULL) {
- goto out;
- }
-
- rpcreq = mem_get (rpc->reqpool);
- if (rpcreq == NULL) {
- goto out;
- }
-
- memset (rpcreq, 0, sizeof (*rpcreq));
- memset (&req, 0, sizeof (req));
-
+ rpc_clnt_connection_t *conn = NULL;
+ struct iobuf *request_iob = NULL;
+ struct iovec rpchdr = {
+ 0,
+ };
+ struct rpc_req *rpcreq = NULL;
+ rpc_transport_req_t req;
+ int ret = -1;
+ int proglen = 0;
+ char new_iobref = 0;
+ uint64_t callid = 0;
+ gf_boolean_t need_unref = _gf_false;
+ call_frame_t *cframe = frame;
+
+ if (!rpc || !prog || !frame) {
+ goto out;
+ }
+
+ conn = &rpc->conn;
+
+ rpcreq = mem_get(rpc->reqpool);
+ if (rpcreq == NULL) {
+ goto out;
+ }
+
+ memset(rpcreq, 0, sizeof(*rpcreq));
+ memset(&req, 0, sizeof(req));
+
+ if (!iobref) {
+ iobref = iobref_new();
if (!iobref) {
- iobref = iobref_new ();
- if (!iobref) {
- goto out;
- }
-
- new_iobref = 1;
- }
-
- callid = rpc_clnt_new_callid (rpc);
-
- rpcreq->prog = prog;
- rpcreq->procnum = procnum;
- rpcreq->conn = conn;
- rpcreq->xid = callid;
- rpcreq->cbkfn = cbkfn;
-
- ret = -1;
-
- if (proghdr) {
- proglen += iov_length (proghdr, proghdrcount);
- }
-
- request_iob = rpc_clnt_record (rpc, frame, prog,
- procnum, proglen,
- &rpchdr, callid);
- if (!request_iob) {
- gf_log (conn->trans->name, GF_LOG_WARNING,
- "cannot build rpc-record");
- goto out;
- }
-
- iobref_add (iobref, request_iob);
-
- req.msg.rpchdr = &rpchdr;
- req.msg.rpchdrcount = 1;
- req.msg.proghdr = proghdr;
- req.msg.proghdrcount = proghdrcount;
- req.msg.progpayload = progpayload;
- req.msg.progpayloadcount = progpayloadcount;
- req.msg.iobref = iobref;
-
- req.rsp.rsphdr = rsphdr;
- req.rsp.rsphdr_count = rsphdr_count;
- req.rsp.rsp_payload = rsp_payload;
- req.rsp.rsp_payload_count = rsp_payload_count;
- req.rsp.rsp_iobref = rsp_iobref;
- req.rpc_req = rpcreq;
-
- pthread_mutex_lock (&conn->lock);
- {
- if (conn->connected == 0) {
- ret = rpc_transport_connect (conn->trans,
- conn->config.remote_port);
- /* Below code makes sure the (re-)configured port lasts
- for just one successful connect attempt */
- if (!ret)
- conn->config.remote_port = 0;
- }
-
- ret = rpc_transport_submit_request (rpc->conn.trans,
- &req);
- if (ret == -1) {
- gf_log (conn->trans->name, GF_LOG_WARNING,
- "failed to submit rpc-request "
- "(XID: 0x%ux Program: %s, ProgVers: %d, "
- "Proc: %d) to rpc-transport (%s)", rpcreq->xid,
- rpcreq->prog->progname, rpcreq->prog->progver,
- rpcreq->procnum, rpc->conn.trans->name);
- }
-
- if ((ret >= 0) && frame) {
- /* Save the frame in queue */
- __save_frame (rpc, frame, rpcreq);
-
- gf_log ("rpc-clnt", GF_LOG_TRACE, "submitted request "
- "(XID: 0x%ux Program: %s, ProgVers: %d, "
- "Proc: %d) to rpc-transport (%s)", rpcreq->xid,
- rpcreq->prog->progname, rpcreq->prog->progver,
- rpcreq->procnum, rpc->conn.trans->name);
- }
- }
- pthread_mutex_unlock (&conn->lock);
-
+ goto out;
+ }
+
+ new_iobref = 1;
+ }
+
+ callid = GF_ATOMIC_INC(rpc->xid);
+
+ rpcreq->prog = prog;
+ rpcreq->procnum = procnum;
+ rpcreq->conn = conn;
+ rpcreq->xid = callid;
+ rpcreq->cbkfn = cbkfn;
+
+ ret = -1;
+
+ if (proghdr) {
+ proglen += iov_length(proghdr, proghdrcount);
+ }
+
+ request_iob = rpc_clnt_record(rpc, frame, prog, procnum, proglen, &rpchdr,
+ callid);
+ if (!request_iob) {
+ gf_log(conn->name, GF_LOG_WARNING, "cannot build rpc-record");
+ goto out;
+ }
+
+ iobref_add(iobref, request_iob);
+
+ req.msg.rpchdr = &rpchdr;
+ req.msg.rpchdrcount = 1;
+ req.msg.proghdr = proghdr;
+ req.msg.proghdrcount = proghdrcount;
+ req.msg.progpayload = progpayload;
+ req.msg.progpayloadcount = progpayloadcount;
+ req.msg.iobref = iobref;
+
+ req.rsp.rsphdr = rsphdr;
+ req.rsp.rsphdr_count = rsphdr_count;
+ req.rsp.rsp_payload = rsp_payload;
+ req.rsp.rsp_payload_count = rsp_payload_count;
+ req.rsp.rsp_iobref = rsp_iobref;
+ req.rpc_req = rpcreq;
+
+ pthread_mutex_lock(&conn->lock);
+ {
+ if (conn->connected == 0) {
+ if (rpc->disabled)
+ goto unlock;
+ ret = rpc_transport_connect(conn->trans, conn->config.remote_port);
+ if (ret < 0) {
+ gf_log(conn->name,
+ (errno == EINPROGRESS) ? GF_LOG_DEBUG : GF_LOG_WARNING,
+ "error returned while attempting to "
+ "connect to host:%s, port:%d",
+ conn->config.remote_host, conn->config.remote_port);
+ goto unlock;
+ }
+ }
+
+ ret = rpc_transport_submit_request(conn->trans, &req);
if (ret == -1) {
- goto out;
- }
-
- ret = 0;
+ gf_log(conn->name, GF_LOG_WARNING,
+ "failed to submit rpc-request "
+ "(unique: %" PRIu64
+ ", XID: 0x%x Program: %s, "
+ "ProgVers: %d, Proc: %d) to rpc-transport (%s)",
+ cframe->root->unique, rpcreq->xid, rpcreq->prog->progname,
+ rpcreq->prog->progver, rpcreq->procnum, conn->name);
+ } else if ((ret >= 0) && frame) {
+ /* Save the frame in queue */
+ __save_frame(rpc, frame, rpcreq);
+
+ /* A ref on rpc-clnt object is taken while registering
+ * call_bail to timer in __save_frame. If it fails to
+ * register, it needs an unref and should happen outside
+ * conn->lock which otherwise leads to deadlocks */
+ if (conn->timer == NULL)
+ need_unref = _gf_true;
+
+ conn->msgcnt++;
+
+ gf_log("rpc-clnt", GF_LOG_TRACE,
+ "submitted request "
+ "(unique: %" PRIu64
+ ", XID: 0x%x, Program: %s, "
+ "ProgVers: %d, Proc: %d) to rpc-transport (%s)",
+ cframe->root->unique, rpcreq->xid, rpcreq->prog->progname,
+ rpcreq->prog->progver, rpcreq->procnum, conn->name);
+ }
+ }
+unlock:
+ pthread_mutex_unlock(&conn->lock);
+
+ if (need_unref)
+ rpc_clnt_unref(rpc);
+
+ if (ret == -1) {
+ goto out;
+ }
+
+ rpc_clnt_check_and_start_ping(rpc);
+ ret = 0;
out:
- if (request_iob) {
- iobuf_unref (request_iob);
- }
-
- if (new_iobref && iobref) {
- iobref_unref (iobref);
- }
-
- if (frame && (ret == -1)) {
- if (rpcreq) {
- rpcreq->rpc_status = -1;
- cbkfn (rpcreq, NULL, 0, frame);
- mem_put (rpcreq);
- }
- }
- return ret;
+ if (request_iob) {
+ iobuf_unref(request_iob);
+ }
+
+ if (new_iobref && iobref) {
+ iobref_unref(iobref);
+ }
+
+ if (frame && (ret == -1)) {
+ if (rpcreq) {
+ rpcreq->rpc_status = -1;
+ cbkfn(rpcreq, NULL, 0, frame);
+ mem_put(rpcreq);
+ }
+ }
+ return ret;
}
-
struct rpc_clnt *
-rpc_clnt_ref (struct rpc_clnt *rpc)
+rpc_clnt_ref(struct rpc_clnt *rpc)
{
- if (!rpc)
- return NULL;
- pthread_mutex_lock (&rpc->lock);
- {
- rpc->refcount++;
- }
- pthread_mutex_unlock (&rpc->lock);
- return rpc;
-}
+ if (!rpc)
+ return NULL;
+ GF_ATOMIC_INC(rpc->refcount);
+ return rpc;
+}
static void
-rpc_clnt_destroy (struct rpc_clnt *rpc)
+rpc_clnt_trigger_destroy(struct rpc_clnt *rpc)
{
- if (!rpc)
- return;
-
- if (rpc->conn.trans) {
- rpc_transport_unregister_notify (rpc->conn.trans);
- rpc_transport_disconnect (rpc->conn.trans);
- rpc_transport_unref (rpc->conn.trans);
- }
-
- rpc_clnt_reconnect_cleanup (&rpc->conn);
- saved_frames_destroy (rpc->conn.saved_frames);
- pthread_mutex_destroy (&rpc->lock);
- pthread_mutex_destroy (&rpc->conn.lock);
+ rpc_clnt_connection_t *conn = NULL;
+ rpc_transport_t *trans = NULL;
- /* mem-pool should be destroyed, otherwise,
- it will cause huge memory leaks */
- mem_pool_destroy (rpc->reqpool);
- mem_pool_destroy (rpc->saved_frames_pool);
-
- GF_FREE (rpc);
+ if (!rpc)
return;
-}
-
-struct rpc_clnt *
-rpc_clnt_unref (struct rpc_clnt *rpc)
-{
- int count = 0;
- if (!rpc)
- return NULL;
- pthread_mutex_lock (&rpc->lock);
- {
- count = --rpc->refcount;
- }
- pthread_mutex_unlock (&rpc->lock);
- if (!count) {
- rpc_clnt_destroy (rpc);
- return NULL;
- }
- return rpc;
+ /* reading conn->trans outside conn->lock is OK, since this is the last
+ * ref*/
+ conn = &rpc->conn;
+ trans = conn->trans;
+ rpc_clnt_disable(rpc);
+
+ /* This is to account for rpc_clnt_disable that might have been called
+ * before rpc_clnt_unref */
+ if (trans) {
+ /* set conn->trans to NULL before rpc_transport_unref
+ * as rpc_transport_unref can potentially free conn
+ */
+ conn->trans = NULL;
+ rpc_transport_unref(trans);
+ }
}
-
-void
-rpc_clnt_disable (struct rpc_clnt *rpc)
+static void
+rpc_clnt_destroy(struct rpc_clnt *rpc)
{
- rpc_clnt_connection_t *conn = NULL;
-
- if (!rpc) {
- goto out;
- }
-
- conn = &rpc->conn;
-
- pthread_mutex_lock (&conn->lock);
- {
- rpc->disabled = 1;
-
- if (conn->timer) {
- gf_timer_call_cancel (rpc->ctx, conn->timer);
- conn->timer = NULL;
- }
+ rpcclnt_cb_program_t *program = NULL;
+ rpcclnt_cb_program_t *tmp = NULL;
+ struct saved_frames *saved_frames = NULL;
+ rpc_clnt_connection_t *conn = NULL;
- if (conn->reconnect) {
- gf_timer_call_cancel (rpc->ctx, conn->reconnect);
- conn->reconnect = NULL;
- }
- conn->connected = 0;
-
- if (conn->ping_timer) {
- gf_timer_call_cancel (rpc->ctx, conn->ping_timer);
- conn->ping_timer = NULL;
- conn->ping_started = 0;
- }
-
- }
- pthread_mutex_unlock (&conn->lock);
-
- rpc_transport_disconnect (rpc->conn.trans);
-
-out:
+ if (!rpc)
return;
-}
+ conn = &rpc->conn;
+ GF_FREE(rpc->conn.name);
+ /* Access saved_frames in critical-section to avoid
+ crash in rpc_clnt_connection_cleanup at the time
+ of destroying saved frames
+ */
+ pthread_mutex_lock(&conn->lock);
+ {
+ saved_frames = conn->saved_frames;
+ conn->saved_frames = NULL;
+ }
+ pthread_mutex_unlock(&conn->lock);
+
+ saved_frames_destroy(saved_frames);
+ pthread_mutex_destroy(&rpc->lock);
+ pthread_mutex_destroy(&rpc->conn.lock);
+ pthread_cond_destroy(&rpc->conn.cond);
+
+ /* mem-pool should be destroyed, otherwise,
+ it will cause huge memory leaks */
+ mem_pool_destroy(rpc->reqpool);
+ mem_pool_destroy(rpc->saved_frames_pool);
+
+ list_for_each_entry_safe(program, tmp, &rpc->programs, program)
+ {
+ GF_FREE(program);
+ }
+
+ GF_FREE(rpc);
+ return;
+}
-void
-rpc_clnt_reconfig (struct rpc_clnt *rpc, struct rpc_clnt_config *config)
+struct rpc_clnt *
+rpc_clnt_unref(struct rpc_clnt *rpc)
{
- if (config->rpc_timeout) {
- if (config->rpc_timeout != rpc->conn.config.rpc_timeout)
- gf_log (rpc->conn.trans->name, GF_LOG_INFO,
- "changing timeout to %d (from %d)",
- config->rpc_timeout,
- rpc->conn.config.rpc_timeout);
- rpc->conn.config.rpc_timeout = config->rpc_timeout;
- }
-
- if (config->remote_port) {
- if (config->remote_port != rpc->conn.config.remote_port)
- gf_log (rpc->conn.trans->name, GF_LOG_INFO,
- "changing port to %d (from %d)",
- config->remote_port,
- rpc->conn.config.remote_port);
+ int count = 0;
- rpc->conn.config.remote_port = config->remote_port;
- }
+ if (!rpc)
+ return NULL;
- if (config->remote_host) {
- if (rpc->conn.config.remote_host) {
- if (strcmp (rpc->conn.config.remote_host,
- config->remote_host))
- gf_log (rpc->conn.trans->name, GF_LOG_INFO,
- "changing port to %s (from %s)",
- config->remote_host,
- rpc->conn.config.remote_host);
- FREE (rpc->conn.config.remote_host);
- } else {
- gf_log (rpc->conn.trans->name, GF_LOG_INFO,
- "setting hostname to %s",
- config->remote_host);
- }
+ count = GF_ATOMIC_DEC(rpc->refcount);
- rpc->conn.config.remote_host = gf_strdup (config->remote_host);
- }
+ if (!count) {
+ rpc_clnt_trigger_destroy(rpc);
+ return NULL;
+ }
+ return rpc;
}
int
-rpc_clnt_transport_unix_options_build (dict_t **options, char *filepath,
- int frame_timeout)
+rpc_clnt_disable(struct rpc_clnt *rpc)
{
- dict_t *dict = NULL;
- char *fpath = NULL;
- int ret = -1;
-
- GF_ASSERT (filepath);
- GF_ASSERT (options);
-
- dict = dict_new ();
- if (!dict)
- goto out;
-
- fpath = gf_strdup (filepath);
- if (!fpath) {
- ret = -1;
- goto out;
- }
-
- ret = dict_set_dynstr (dict, "transport.socket.connect-path", fpath);
- if (ret)
- goto out;
+ rpc_clnt_connection_t *conn = NULL;
+ rpc_transport_t *trans = NULL;
+ int unref = 0;
+ int ret = 0;
+ gf_boolean_t timer_unref = _gf_false;
+ gf_boolean_t reconnect_unref = _gf_false;
+
+ if (!rpc) {
+ goto out;
+ }
+
+ conn = &rpc->conn;
+
+ pthread_mutex_lock(&conn->lock);
+ {
+ rpc->disabled = 1;
+
+ if (conn->timer) {
+ ret = gf_timer_call_cancel(rpc->ctx, conn->timer);
+ /* If the event is not fired and it actually cancelled
+ * the timer, do the unref else registered call back
+ * function will take care of it.
+ */
+ if (!ret)
+ timer_unref = _gf_true;
+ conn->timer = NULL;
+ }
+
+ if (conn->reconnect) {
+ ret = gf_timer_call_cancel(rpc->ctx, conn->reconnect);
+ if (!ret)
+ reconnect_unref = _gf_true;
+ conn->reconnect = NULL;
+ }
+ conn->connected = 0;
+
+ unref = rpc_clnt_remove_ping_timer_locked(rpc);
+ trans = conn->trans;
+ }
+ pthread_mutex_unlock(&conn->lock);
+
+ ret = -1;
+ if (trans) {
+ ret = rpc_transport_disconnect(trans, _gf_true);
+ /* The auth_value was being reset to AUTH_GLUSTERFS_v2.
+ * if (clnt->auth_value)
+ * clnt->auth_value = AUTH_GLUSTERFS_v2;
+ * It should not be reset here. The disconnect during
+ * portmap request can race with handshake. If handshake
+ * happens first and disconnect later, auth_value would set
+ * to default value and it never sets back to actual auth_value
+ * supported by server. But it's important to set to lower
+ * version supported in the case where the server downgrades.
+ * So moving this code to RPC_TRANSPORT_CONNECT. Note that
+ * CONNECT cannot race with handshake as by nature it is
+ * serialized with handhake. An handshake can happen only
+ * on a connected transport and hence its strictly serialized.
+ */
+ }
+ if (unref)
+ rpc_clnt_unref(rpc);
- ret = dict_set_str (dict, "transport.address-family", "unix");
- if (ret)
- goto out;
+ if (timer_unref)
+ rpc_clnt_unref(rpc);
- ret = dict_set_str (dict, "transport.socket.nodelay", "off");
- if (ret)
- goto out;
+ if (reconnect_unref)
+ rpc_clnt_unref(rpc);
- ret = dict_set_str (dict, "transport-type", "socket");
- if (ret)
- goto out;
+out:
+ return ret;
+}
- ret = dict_set_str (dict, "transport.socket.keepalive", "off");
- if (ret)
- goto out;
+void
+rpc_clnt_reconfig(struct rpc_clnt *rpc, struct rpc_clnt_config *config)
+{
+ if (config->ping_timeout) {
+ if (config->ping_timeout != rpc->conn.ping_timeout)
+ gf_log(rpc->conn.name, GF_LOG_INFO,
+ "changing ping timeout to %d (from %d)",
+ config->ping_timeout, rpc->conn.ping_timeout);
- if (frame_timeout > 0) {
- ret = dict_set_int32 (dict, "frame-timeout", frame_timeout);
- if (ret)
- goto out;
+ pthread_mutex_lock(&rpc->conn.lock);
+ {
+ rpc->conn.ping_timeout = config->ping_timeout;
+ }
+ pthread_mutex_unlock(&rpc->conn.lock);
+ }
+
+ if (config->rpc_timeout) {
+ if (config->rpc_timeout != rpc->conn.config.rpc_timeout)
+ gf_log(rpc->conn.name, GF_LOG_INFO,
+ "changing timeout to %d (from %d)", config->rpc_timeout,
+ rpc->conn.config.rpc_timeout);
+ rpc->conn.config.rpc_timeout = config->rpc_timeout;
+ }
+
+ if (config->remote_port) {
+ if (config->remote_port != rpc->conn.config.remote_port)
+ gf_log(rpc->conn.name, GF_LOG_INFO, "changing port to %d (from %d)",
+ config->remote_port, rpc->conn.config.remote_port);
+
+ rpc->conn.config.remote_port = config->remote_port;
+ }
+
+ if (config->remote_host) {
+ if (rpc->conn.config.remote_host) {
+ if (strcmp(rpc->conn.config.remote_host, config->remote_host))
+ gf_log(rpc->conn.name, GF_LOG_INFO,
+ "changing hostname to %s (from %s)", config->remote_host,
+ rpc->conn.config.remote_host);
+ GF_FREE(rpc->conn.config.remote_host);
+ } else {
+ gf_log(rpc->conn.name, GF_LOG_INFO, "setting hostname to %s",
+ config->remote_host);
}
- *options = dict;
-out:
- if (ret) {
- if (fpath)
- GF_FREE (fpath);
- if (dict)
- dict_unref (dict);
- }
- return ret;
+ rpc->conn.config.remote_host = gf_strdup(config->remote_host);
+ }
}
diff --git a/rpc/rpc-lib/src/rpc-clnt.h b/rpc/rpc-lib/src/rpc-clnt.h
index ca3c1d027a2..2945265200b 100644
--- a/rpc/rpc-lib/src/rpc-clnt.h
+++ b/rpc/rpc-lib/src/rpc-clnt.h
@@ -11,82 +11,82 @@
#ifndef __RPC_CLNT_H
#define __RPC_CLNT_H
-#include "stack.h"
+#include <glusterfs/stack.h>
#include "rpc-transport.h"
-#include "timer.h"
+#include <glusterfs/timer.h>
#include "xdr-common.h"
+#include "glusterfs3.h"
typedef enum {
- RPC_CLNT_CONNECT,
- RPC_CLNT_DISCONNECT,
- RPC_CLNT_MSG
+ RPC_CLNT_CONNECT,
+ RPC_CLNT_DISCONNECT,
+ RPC_CLNT_PING,
+ RPC_CLNT_MSG,
+ RPC_CLNT_DESTROY
} rpc_clnt_event_t;
-
#define SFRAME_GET_PROGNUM(sframe) (sframe->rpcreq->prog->prognum)
#define SFRAME_GET_PROGVER(sframe) (sframe->rpcreq->prog->progver)
#define SFRAME_GET_PROCNUM(sframe) (sframe->rpcreq->procnum)
-struct xptr_clnt;
struct rpc_req;
struct rpc_clnt;
struct rpc_clnt_config;
struct rpc_clnt_program;
-typedef int (*rpc_clnt_notify_t) (struct rpc_clnt *rpc, void *mydata,
- rpc_clnt_event_t fn, void *data);
+typedef int (*rpc_clnt_notify_t)(struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t fn, void *data);
-typedef int (*fop_cbk_fn_t) (struct rpc_req *req, struct iovec *iov, int count,
- void *myframe);
+typedef int (*fop_cbk_fn_t)(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe);
-typedef int (*clnt_fn_t) (call_frame_t *fr, xlator_t *xl, void *args);
+typedef int (*clnt_fn_t)(call_frame_t *fr, xlator_t *xl, void *args);
struct saved_frame {
- union {
- struct list_head list;
- struct {
- struct saved_frame *frame_next;
- struct saved_frame *frame_prev;
- };
- };
- void *capital_this;
- void *frame;
- struct timeval saved_at;
- struct rpc_req *rpcreq;
- rpc_transport_rsp_t rsp;
+ union {
+ struct list_head list;
+ struct {
+ struct saved_frame *frame_next;
+ struct saved_frame *frame_prev;
+ };
+ };
+ void *capital_this;
+ void *frame;
+ struct rpc_req *rpcreq;
+ struct timeval saved_at;
+ rpc_transport_rsp_t rsp;
};
struct saved_frames {
- int64_t count;
- struct saved_frame sf;
- struct saved_frame lk_sf;
+ int64_t count;
+ struct saved_frame sf;
+ struct saved_frame lk_sf;
};
-
/* Initialized by procnum */
typedef struct rpc_clnt_procedure {
- char *procname;
- clnt_fn_t fn;
+ char *procname;
+ clnt_fn_t fn;
} rpc_clnt_procedure_t;
typedef struct rpc_clnt_program {
- char *progname;
- int prognum;
- int progver;
- rpc_clnt_procedure_t *proctable;
- char **procnames;
- int numproc;
+ char *progname;
+ int prognum;
+ int progver;
+ rpc_clnt_procedure_t *proctable;
+ char **procnames;
+ int numproc;
} rpc_clnt_prog_t;
-typedef int (*rpcclnt_cb_fn) (void *data);
+typedef int (*rpcclnt_cb_fn)(struct rpc_clnt *rpc, void *mydata, void *data);
/* The descriptor for each procedure/actor that runs
* over the RPC service.
*/
typedef struct rpcclnt_actor_desc {
- char procname[32];
- int procnum;
- rpcclnt_cb_fn actor;
+ char procname[32];
+ rpcclnt_cb_fn actor;
+ int procnum;
} rpcclnt_cb_actor_t;
/* Describes a program and its version along with the function pointers
@@ -94,101 +94,113 @@ typedef struct rpcclnt_actor_desc {
* Never changed ever by any thread so no need for a lock.
*/
typedef struct rpcclnt_cb_program {
- char progname[32];
- int prognum;
- int progver;
- rpcclnt_cb_actor_t *actors; /* All procedure handlers */
- int numactors; /* Num actors in actor array */
+ char progname[32];
+ int prognum;
+ int progver;
+ rpcclnt_cb_actor_t *actors; /* All procedure handlers */
+ /* Program specific state handed to actors */
+ void *private;
- /* Program specific state handed to actors */
- void *private;
+ /* list member to link to list of registered services with rpc_clnt */
+ struct list_head program;
+ /* Needed for passing back in cb_actor */
+ void *mydata;
+ int numactors; /* Num actors in actor array */
- /* list member to link to list of registered services with rpc_clnt */
- struct list_head program;
} rpcclnt_cb_program_t;
-
-
typedef struct rpc_auth_data {
- int flavour;
- int datalen;
- char authdata[GF_MAX_AUTH_BYTES];
+ int flavour;
+ int datalen;
+ char authdata[GF_MAX_AUTH_BYTES];
} rpc_auth_data_t;
-
struct rpc_clnt_config {
- int rpc_timeout;
- int remote_port;
- char * remote_host;
+ int rpc_timeout;
+ int remote_port;
+ char *remote_host;
+ int ping_timeout;
};
-
-#define rpc_auth_flavour(au) ((au).flavour)
+#define rpc_auth_flavour(au) ((au).flavour)
struct rpc_clnt_connection {
- pthread_mutex_t lock;
- rpc_transport_t *trans;
- struct rpc_clnt_config config;
- gf_timer_t *reconnect;
- gf_timer_t *timer;
- gf_timer_t *ping_timer;
- struct rpc_clnt *rpc_clnt;
- char connected;
- struct saved_frames *saved_frames;
- int32_t frame_timeout;
- struct timeval last_sent;
- struct timeval last_received;
- int32_t ping_started;
+ pthread_mutex_t lock;
+ pthread_cond_t cond;
+ rpc_transport_t *trans;
+ struct rpc_clnt_config config;
+ gf_timer_t *reconnect;
+ gf_timer_t *timer;
+ gf_timer_t *ping_timer;
+ struct rpc_clnt *rpc_clnt;
+ struct saved_frames *saved_frames;
+ struct timespec last_sent;
+ struct timespec last_received;
+ uint64_t pingcnt;
+ uint64_t msgcnt;
+ uint64_t cleanup_gen;
+ char *name;
+ int32_t ping_started;
+ int32_t frame_timeout;
+ int32_t ping_timeout;
+ gf_boolean_t disconnected;
+ char connected;
};
typedef struct rpc_clnt_connection rpc_clnt_connection_t;
struct rpc_req {
- rpc_clnt_connection_t *conn;
- uint32_t xid;
- struct iovec req[2];
- int reqcnt;
- struct iobref *req_iobref;
- struct iovec rsp[2];
- int rspcnt;
- struct iobref *rsp_iobref;
- int rpc_status;
- rpc_auth_data_t verf;
- rpc_clnt_prog_t *prog;
- int procnum;
- fop_cbk_fn_t cbkfn;
- void *conn_private;
+ rpc_clnt_connection_t *conn;
+ struct iovec req[2];
+ struct iobref *req_iobref;
+ struct iovec rsp[2];
+ int rspcnt;
+ int reqcnt;
+ struct iobref *rsp_iobref;
+ rpc_clnt_prog_t *prog;
+ rpc_auth_data_t verf;
+ fop_cbk_fn_t cbkfn;
+ void *conn_private;
+ int procnum;
+ int rpc_status;
+ uint32_t xid;
};
typedef struct rpc_clnt {
- pthread_mutex_t lock;
- rpc_clnt_notify_t notifyfn;
- rpc_clnt_connection_t conn;
- void *mydata;
- uint64_t xid;
+ pthread_mutex_t lock;
+ rpc_clnt_notify_t notifyfn;
+ rpc_clnt_connection_t conn;
+ void *mydata;
+ gf_atomic_t xid;
- /* list of cb programs registered with rpc-clnt */
- struct list_head programs;
+ /* list of cb programs registered with rpc-clnt */
+ struct list_head programs;
- /* Memory pool for rpc_req_t */
- struct mem_pool *reqpool;
+ /* Memory pool for rpc_req_t */
+ struct mem_pool *reqpool;
- struct mem_pool *saved_frames_pool;
+ struct mem_pool *saved_frames_pool;
- glusterfs_ctx_t *ctx;
- int refcount;
- int auth_null;
- char disabled;
+ glusterfs_ctx_t *ctx;
+ gf_atomic_t refcount;
+ xlator_t *owner;
+ int auth_value;
+ char disabled;
} rpc_clnt_t;
+struct rpc_clnt *
+rpc_clnt_new(dict_t *options, xlator_t *owner, char *name,
+ uint32_t reqpool_size);
-struct rpc_clnt *rpc_clnt_new (dict_t *options, glusterfs_ctx_t *ctx,
- char *name, uint32_t reqpool_size);
+int
+rpc_clnt_start(struct rpc_clnt *rpc);
-int rpc_clnt_start (struct rpc_clnt *rpc);
+int
+rpc_clnt_cleanup_and_start(struct rpc_clnt *rpc);
-int rpc_clnt_register_notify (struct rpc_clnt *rpc, rpc_clnt_notify_t fn,
- void *mydata);
+int
+rpc_clnt_register_notify(struct rpc_clnt *rpc, rpc_clnt_notify_t fn,
+ void *mydata);
/* Some preconditions related to vectors holding responses.
* @rsphdr: should contain pointer to buffer which can hold response header
@@ -205,40 +217,44 @@ int rpc_clnt_register_notify (struct rpc_clnt *rpc, rpc_clnt_notify_t fn,
* of the header.
*/
-int rpc_clnt_submit (struct rpc_clnt *rpc, rpc_clnt_prog_t *prog,
- int procnum, fop_cbk_fn_t cbkfn,
- struct iovec *proghdr, int proghdrcount,
- struct iovec *progpayload, int progpayloadcount,
- struct iobref *iobref, void *frame, struct iovec *rsphdr,
- int rsphdr_count, struct iovec *rsp_payload,
- int rsp_payload_count, struct iobref *rsp_iobref);
+int
+rpc_clnt_submit(struct rpc_clnt *rpc, rpc_clnt_prog_t *prog, int procnum,
+ fop_cbk_fn_t cbkfn, struct iovec *proghdr, int proghdrcount,
+ struct iovec *progpayload, int progpayloadcount,
+ struct iobref *iobref, void *frame, struct iovec *rsphdr,
+ int rsphdr_count, struct iovec *rsp_payload,
+ int rsp_payload_count, struct iobref *rsp_iobref);
struct rpc_clnt *
-rpc_clnt_ref (struct rpc_clnt *rpc);
+rpc_clnt_ref(struct rpc_clnt *rpc);
struct rpc_clnt *
-rpc_clnt_unref (struct rpc_clnt *rpc);
-
-int rpc_clnt_connection_cleanup (rpc_clnt_connection_t *conn);
+rpc_clnt_unref(struct rpc_clnt *rpc);
-void rpc_clnt_set_connected (rpc_clnt_connection_t *conn);
+int
+rpc_clnt_connection_cleanup(rpc_clnt_connection_t *conn);
+int
+rpc_clnt_reconnect_cleanup(rpc_clnt_connection_t *conn);
+gf_boolean_t
+is_rpc_clnt_disconnected(rpc_clnt_connection_t *conn);
-void rpc_clnt_unset_connected (rpc_clnt_connection_t *conn);
-void rpc_clnt_reconnect (void *trans_ptr);
+void
+rpc_clnt_reconnect(void *trans_ptr);
-void rpc_clnt_reconfig (struct rpc_clnt *rpc, struct rpc_clnt_config *config);
+void
+rpc_clnt_reconfig(struct rpc_clnt *rpc, struct rpc_clnt_config *config);
/* All users of RPC services should use this API to register their
* procedure handlers.
*/
-int rpcclnt_cbk_program_register (struct rpc_clnt *svc,
- rpcclnt_cb_program_t *program);
+int
+rpcclnt_cbk_program_register(struct rpc_clnt *svc,
+ rpcclnt_cb_program_t *program, void *mydata);
int
-rpc_clnt_transport_unix_options_build (dict_t **options, char *filepath,
- int frame_timeout);
+rpc_clnt_disable(struct rpc_clnt *rpc);
-void
-rpc_clnt_disable (struct rpc_clnt *rpc);
+int
+rpc_clnt_mgmt_pmap_signout(glusterfs_ctx_t *ctx, char *brick_name);
#endif /* !_RPC_CLNT_H */
diff --git a/rpc/rpc-lib/src/rpc-drc.c b/rpc/rpc-lib/src/rpc-drc.c
new file mode 100644
index 00000000000..de8dc630626
--- /dev/null
+++ b/rpc/rpc-lib/src/rpc-drc.c
@@ -0,0 +1,855 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "rpcsvc.h"
+#ifndef RPC_DRC_H
+#include "rpc-drc.h"
+#endif
+#include <glusterfs/locking.h>
+#include <glusterfs/statedump.h>
+#include <glusterfs/mem-pool.h>
+
+#include <netinet/in.h>
+#include <unistd.h>
+
+/**
+ * rpcsvc_drc_op_destroy - Destroys the cached reply
+ *
+ * @param drc - the main drc structure
+ * @param reply - the cached reply to destroy
+ * @return NULL if reply is destroyed, reply otherwise
+ */
+static drc_cached_op_t *
+rpcsvc_drc_op_destroy(rpcsvc_drc_globals_t *drc, drc_cached_op_t *reply)
+{
+ GF_ASSERT(drc);
+ GF_ASSERT(reply);
+
+ if (reply->state == DRC_OP_IN_TRANSIT)
+ return reply;
+
+ iobref_unref(reply->msg.iobref);
+ if (reply->msg.rpchdr)
+ GF_FREE(reply->msg.rpchdr);
+ if (reply->msg.proghdr)
+ GF_FREE(reply->msg.proghdr);
+ if (reply->msg.progpayload)
+ GF_FREE(reply->msg.progpayload);
+
+ list_del(&reply->global_list);
+ reply->client->op_count--;
+ drc->op_count--;
+ mem_put(reply);
+ reply = NULL;
+
+ return reply;
+}
+
+/**
+ * rpcsvc_drc_op_rb_unref - This function is used in rb tree cleanup only
+ *
+ * @param reply - the cached reply to unref
+ * @param drc - the main drc structure
+ * @return void
+ */
+static void
+rpcsvc_drc_rb_op_destroy(void *reply, void *drc)
+{
+ rpcsvc_drc_op_destroy(drc, (drc_cached_op_t *)reply);
+}
+
+/**
+ * rpcsvc_remove_drc_client - Cleanup the drc client
+ *
+ * @param client - the drc client to be removed
+ * @return void
+ */
+static void
+rpcsvc_remove_drc_client(drc_client_t *client)
+{
+ rb_destroy(client->rbtree, rpcsvc_drc_rb_op_destroy);
+ list_del(&client->client_list);
+ GF_FREE(client);
+}
+
+/**
+ * rpcsvc_client_lookup - Given a sockaddr_storage, find the client if it exists
+ *
+ * @param drc - the main drc structure
+ * @param sockaddr - the network address of the client to be looked up
+ * @return drc client if it exists, NULL otherwise
+ */
+static drc_client_t *
+rpcsvc_client_lookup(rpcsvc_drc_globals_t *drc,
+ struct sockaddr_storage *sockaddr)
+{
+ drc_client_t *client = NULL;
+
+ GF_ASSERT(drc);
+ GF_ASSERT(sockaddr);
+
+ if (list_empty(&drc->clients_head))
+ return NULL;
+
+ list_for_each_entry(client, &drc->clients_head, client_list)
+ {
+ if (gf_sock_union_equal_addr(&client->sock_union,
+ (union gf_sock_union *)sockaddr))
+ return client;
+ }
+
+ return NULL;
+}
+
+/**
+ * drc_compare_reqs - Used by rbtree to determine if incoming req matches with
+ * an existing node(cached reply) in rbtree
+ *
+ * @param item - pointer to the incoming req
+ * @param rb_node_data - pointer to an rbtree node (cached reply)
+ * @param param - drc pointer - unused here, but used in *op_destroy
+ * @return 0 if req matches reply, else (req->xid - reply->xid)
+ */
+int
+drc_compare_reqs(const void *item, const void *rb_node_data, void *param)
+{
+ int ret = -1;
+ drc_cached_op_t *req = NULL;
+ drc_cached_op_t *reply = NULL;
+
+ GF_ASSERT(item);
+ GF_ASSERT(rb_node_data);
+ GF_ASSERT(param);
+
+ req = (drc_cached_op_t *)item;
+ reply = (drc_cached_op_t *)rb_node_data;
+
+ ret = req->xid - reply->xid;
+ if (ret != 0)
+ return ret;
+
+ if (req->prognum == reply->prognum && req->procnum == reply->procnum &&
+ req->progversion == reply->progversion)
+ return 0;
+
+ return 1;
+}
+
+/**
+ * drc_init_client_cache - initialize a drc client and its rb tree
+ *
+ * @param drc - the main drc structure
+ * @param client - the drc client to be initialized
+ * @return 0 on success, -1 on failure
+ */
+static int
+drc_init_client_cache(rpcsvc_drc_globals_t *drc, drc_client_t *client)
+{
+ GF_ASSERT(drc);
+ GF_ASSERT(client);
+
+ client->rbtree = rb_create(drc_compare_reqs, drc, NULL);
+ if (!client->rbtree) {
+ gf_log(GF_RPCSVC, GF_LOG_DEBUG, "rb tree creation failed");
+ return -1;
+ }
+
+ return 0;
+}
+
+/**
+ * rpcsvc_get_drc_client - find the drc client with given sockaddr, else
+ * allocate and initialize a new drc client
+ *
+ * @param drc - the main drc structure
+ * @param sockaddr - network address of client
+ * @return drc client on success, NULL on failure
+ */
+static drc_client_t *
+rpcsvc_get_drc_client(rpcsvc_drc_globals_t *drc,
+ struct sockaddr_storage *sockaddr)
+{
+ drc_client_t *client = NULL;
+
+ GF_ASSERT(drc);
+ GF_ASSERT(sockaddr);
+
+ client = rpcsvc_client_lookup(drc, sockaddr);
+ if (client)
+ goto out;
+
+ /* if lookup fails, allocate cache for the new client */
+ client = GF_CALLOC(1, sizeof(drc_client_t), gf_common_mt_drc_client_t);
+ if (!client)
+ goto out;
+
+ GF_ATOMIC_INIT(client->ref, 0);
+ client->sock_union = (union gf_sock_union) * sockaddr;
+ client->op_count = 0;
+ INIT_LIST_HEAD(&client->client_list);
+
+ if (drc_init_client_cache(drc, client)) {
+ gf_log(GF_RPCSVC, GF_LOG_DEBUG, "initialization of drc client failed");
+ GF_FREE(client);
+ client = NULL;
+ goto out;
+ }
+ drc->client_count++;
+
+ list_add(&client->client_list, &drc->clients_head);
+
+out:
+ return client;
+}
+
+/**
+ * rpcsvc_need_drc - Determine if a request needs DRC service
+ *
+ * @param req - incoming request
+ * @return 1 if DRC is needed for req, 0 otherwise
+ */
+int
+rpcsvc_need_drc(rpcsvc_request_t *req)
+{
+ rpcsvc_actor_t *actor = NULL;
+ rpcsvc_drc_globals_t *drc = NULL;
+
+ GF_ASSERT(req);
+ GF_ASSERT(req->svc);
+
+ drc = req->svc->drc;
+
+ if (!drc || drc->status == DRC_UNINITIATED)
+ return 0;
+
+ actor = rpcsvc_program_actor(req);
+ if (!actor)
+ return 0;
+
+ return (actor->op_type == DRC_NON_IDEMPOTENT && drc->type != DRC_TYPE_NONE);
+}
+
+/**
+ * rpcsvc_drc_client_ref - ref the drc client
+ *
+ * @param client - the drc client to ref
+ * @return client
+ */
+static drc_client_t *
+rpcsvc_drc_client_ref(drc_client_t *client)
+{
+ GF_ASSERT(client);
+ GF_ATOMIC_INC(client->ref);
+ return client;
+}
+
+/**
+ * rpcsvc_drc_client_unref - unref the drc client, and destroy
+ * the client on last unref
+ *
+ * @param drc - the main drc structure
+ * @param client - the drc client to unref
+ * @return NULL if it is the last unref, client otherwise
+ */
+static drc_client_t *
+rpcsvc_drc_client_unref(rpcsvc_drc_globals_t *drc, drc_client_t *client)
+{
+ uint32_t refcount;
+
+ GF_ASSERT(drc);
+
+ refcount = GF_ATOMIC_DEC(client->ref);
+ if (!refcount) {
+ drc->client_count--;
+ rpcsvc_remove_drc_client(client);
+ client = NULL;
+ }
+
+ return client;
+}
+
+/**
+ * rpcsvc_drc_lookup - lookup a request to see if it is already cached
+ *
+ * @param req - incoming request
+ * @return cached reply of req if found, NULL otherwise
+ */
+drc_cached_op_t *
+rpcsvc_drc_lookup(rpcsvc_request_t *req)
+{
+ drc_client_t *client = NULL;
+ drc_cached_op_t *reply = NULL;
+ drc_cached_op_t new = {
+ .xid = req->xid,
+ .prognum = req->prognum,
+ .progversion = req->progver,
+ .procnum = req->procnum,
+ };
+
+ GF_ASSERT(req);
+
+ if (!req->trans->drc_client) {
+ client = rpcsvc_get_drc_client(req->svc->drc,
+ &req->trans->peerinfo.sockaddr);
+ if (!client)
+ goto out;
+
+ req->trans->drc_client = rpcsvc_drc_client_ref(client);
+ }
+
+ client = req->trans->drc_client;
+
+ if (client->op_count == 0)
+ goto out;
+
+ reply = rb_find(client->rbtree, &new);
+
+out:
+ return reply;
+}
+
+/**
+ * rpcsvc_send_cached_reply - send the cached reply for the incoming request
+ *
+ * @param req - incoming request (which is a duplicate in this case)
+ * @param reply - the cached reply for req
+ * @return 0 on successful reply submission, -1 or other non-zero value
+ * otherwise
+ */
+int
+rpcsvc_send_cached_reply(rpcsvc_request_t *req, drc_cached_op_t *reply)
+{
+ int ret = 0;
+
+ GF_ASSERT(req);
+ GF_ASSERT(reply);
+
+ gf_log(GF_RPCSVC, GF_LOG_DEBUG,
+ "sending cached reply: xid: %d, "
+ "client: %s",
+ req->xid, req->trans->peerinfo.identifier);
+
+ rpcsvc_drc_client_ref(reply->client);
+ ret = rpcsvc_transport_submit(
+ req->trans, reply->msg.rpchdr, reply->msg.rpchdrcount,
+ reply->msg.proghdr, reply->msg.proghdrcount, reply->msg.progpayload,
+ reply->msg.progpayloadcount, reply->msg.iobref, req->trans_private);
+ rpcsvc_drc_client_unref(req->svc->drc, reply->client);
+
+ return ret;
+}
+
+/**
+ * rpcsvc_cache_reply - cache the reply for the processed request 'req'
+ *
+ * @param req - processed request
+ * @param iobref - iobref structure of the reply
+ * @param rpchdr - rpc header of the reply
+ * @param rpchdrcount - size of rpchdr
+ * @param proghdr - program header of the reply
+ * @param proghdrcount - size of proghdr
+ * @param payload - payload of the reply if any
+ * @param payloadcount - size of payload
+ * @return 0 on success, -1 on failure
+ */
+int
+rpcsvc_cache_reply(rpcsvc_request_t *req, struct iobref *iobref,
+ struct iovec *rpchdr, int rpchdrcount, struct iovec *proghdr,
+ int proghdrcount, struct iovec *payload, int payloadcount)
+{
+ int ret = -1;
+ drc_cached_op_t *reply = NULL;
+
+ GF_ASSERT(req);
+ GF_ASSERT(req->reply);
+
+ reply = req->reply;
+
+ reply->state = DRC_OP_CACHED;
+
+ reply->msg.iobref = iobref_ref(iobref);
+
+ reply->msg.rpchdrcount = rpchdrcount;
+ reply->msg.rpchdr = iov_dup(rpchdr, rpchdrcount);
+
+ reply->msg.proghdrcount = proghdrcount;
+ reply->msg.proghdr = iov_dup(proghdr, proghdrcount);
+
+ reply->msg.progpayloadcount = payloadcount;
+ if (payloadcount)
+ reply->msg.progpayload = iov_dup(payload, payloadcount);
+
+ // rpcsvc_drc_client_unref (req->svc->drc, req->trans->drc_client);
+ // rpcsvc_drc_op_unref (req->svc->drc, reply);
+ ret = 0;
+
+ return ret;
+}
+
+/**
+ * rpcsvc_vacate_drc_entries - free up some percentage of drc cache
+ * based on the lru factor
+ *
+ * @param drc - the main drc structure
+ * @return void
+ */
+static void
+rpcsvc_vacate_drc_entries(rpcsvc_drc_globals_t *drc)
+{
+ uint32_t i = 0;
+ uint32_t n = 0;
+ drc_cached_op_t *reply = NULL;
+ drc_cached_op_t *tmp = NULL;
+ drc_client_t *client = NULL;
+
+ GF_ASSERT(drc);
+
+ n = drc->global_cache_size / drc->lru_factor;
+
+ list_for_each_entry_safe_reverse(reply, tmp, &drc->cache_head, global_list)
+ {
+ /* Don't delete ops that are in transit */
+ if (reply->state == DRC_OP_IN_TRANSIT)
+ continue;
+
+ client = reply->client;
+
+ rb_delete(client->rbtree, reply);
+
+ rpcsvc_drc_op_destroy(drc, reply);
+ rpcsvc_drc_client_unref(drc, client);
+ i++;
+ if (i >= n)
+ break;
+ }
+}
+
+/**
+ * rpcsvc_add_op_to_cache - insert the cached op into the client rbtree and drc
+ * list
+ *
+ * @param drc - the main drc structure
+ * @param reply - the op to be inserted
+ * @return 0 on success, -1 on failure
+ */
+static int
+rpcsvc_add_op_to_cache(rpcsvc_drc_globals_t *drc, drc_cached_op_t *reply)
+{
+ drc_client_t *client = NULL;
+ drc_cached_op_t **tmp_reply = NULL;
+
+ GF_ASSERT(drc);
+ GF_ASSERT(reply);
+
+ client = reply->client;
+
+ /* cache is full, free up some space */
+ if (drc->op_count >= drc->global_cache_size)
+ rpcsvc_vacate_drc_entries(drc);
+
+ tmp_reply = (drc_cached_op_t **)rb_probe(client->rbtree, reply);
+ if (!tmp_reply) {
+ /* mem alloc failed */
+ return -1;
+ } else if (*tmp_reply != reply) {
+ /* should never happen */
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "DRC failed to detect duplicates");
+ return -1;
+ }
+
+ client->op_count++;
+ list_add(&reply->global_list, &drc->cache_head);
+ drc->op_count++;
+
+ return 0;
+}
+
+/**
+ * rpcsvc_cache_request - cache the in-transition incoming request
+ *
+ * @param req - incoming request
+ * @return 0 on success, -1 on failure
+ */
+int
+rpcsvc_cache_request(rpcsvc_request_t *req)
+{
+ int ret = -1;
+ drc_client_t *client = NULL;
+ drc_cached_op_t *reply = NULL;
+ rpcsvc_drc_globals_t *drc = NULL;
+
+ GF_ASSERT(req);
+
+ drc = req->svc->drc;
+
+ client = req->trans->drc_client;
+ if (!client) {
+ gf_log(GF_RPCSVC, GF_LOG_DEBUG, "drc client is NULL");
+ goto out;
+ }
+
+ reply = mem_get0(drc->mempool);
+ if (!reply)
+ goto out;
+
+ reply->client = rpcsvc_drc_client_ref(client);
+ reply->xid = req->xid;
+ reply->prognum = req->prognum;
+ reply->progversion = req->progver;
+ reply->procnum = req->procnum;
+ reply->state = DRC_OP_IN_TRANSIT;
+ req->reply = reply;
+ INIT_LIST_HEAD(&reply->global_list);
+
+ ret = rpcsvc_add_op_to_cache(drc, reply);
+ if (ret) {
+ req->reply = NULL;
+ rpcsvc_drc_op_destroy(drc, reply);
+ rpcsvc_drc_client_unref(drc, client);
+ gf_log(GF_RPCSVC, GF_LOG_DEBUG, "Failed to add op to drc cache");
+ }
+
+out:
+ return ret;
+}
+
+/**
+ *
+ * rpcsvc_drc_priv - function which dumps the drc state
+ *
+ * @param drc - the main drc structure
+ * @return 0 on success, -1 on failure
+ */
+int32_t
+rpcsvc_drc_priv(rpcsvc_drc_globals_t *drc)
+{
+ int i = 0;
+ char key[GF_DUMP_MAX_BUF_LEN] = {0};
+ drc_client_t *client = NULL;
+ char ip[INET6_ADDRSTRLEN] = {0};
+
+ if (!drc || drc->status == DRC_UNINITIATED) {
+ gf_log(GF_RPCSVC, GF_LOG_DEBUG,
+ "DRC is "
+ "uninitialized, not dumping its state");
+ return 0;
+ }
+
+ gf_proc_dump_add_section("rpc.drc");
+
+ if (TRY_LOCK(&drc->lock))
+ return -1;
+
+ gf_proc_dump_build_key(key, "drc", "type");
+ gf_proc_dump_write(key, "%d", drc->type);
+
+ gf_proc_dump_build_key(key, "drc", "client_count");
+ gf_proc_dump_write(key, "%d", drc->client_count);
+
+ gf_proc_dump_build_key(key, "drc", "current_cache_size");
+ gf_proc_dump_write(key, "%d", drc->op_count);
+
+ gf_proc_dump_build_key(key, "drc", "max_cache_size");
+ gf_proc_dump_write(key, "%d", drc->global_cache_size);
+
+ gf_proc_dump_build_key(key, "drc", "lru_factor");
+ gf_proc_dump_write(key, "%d", drc->lru_factor);
+
+ gf_proc_dump_build_key(key, "drc", "duplicate_request_count");
+ gf_proc_dump_write(key, "%" PRIu64, drc->cache_hits);
+
+ gf_proc_dump_build_key(key, "drc", "in_transit_duplicate_requests");
+ gf_proc_dump_write(key, "%" PRIu64, drc->intransit_hits);
+
+ list_for_each_entry(client, &drc->clients_head, client_list)
+ {
+ gf_proc_dump_build_key(key, "client", "%d.ip-address", i);
+ memset(ip, 0, INET6_ADDRSTRLEN);
+ switch (client->sock_union.storage.ss_family) {
+ case AF_INET:
+ gf_proc_dump_write(
+ key, "%s",
+ inet_ntop(AF_INET, &client->sock_union.sin.sin_addr.s_addr,
+ ip, INET_ADDRSTRLEN));
+ break;
+ case AF_INET6:
+ gf_proc_dump_write(
+ key, "%s",
+ inet_ntop(AF_INET6, &client->sock_union.sin6.sin6_addr, ip,
+ INET6_ADDRSTRLEN));
+ break;
+ default:
+ gf_proc_dump_write(key, "%s", "N/A");
+ }
+
+ gf_proc_dump_build_key(key, "client", "%d.ref_count", i);
+ gf_proc_dump_write(key, "%" PRIu32, GF_ATOMIC_GET(client->ref));
+ gf_proc_dump_build_key(key, "client", "%d.op_count", i);
+ gf_proc_dump_write(key, "%d", client->op_count);
+ i++;
+ }
+
+ UNLOCK(&drc->lock);
+ return 0;
+}
+
+/**
+ * rpcsvc_drc_notify - function which is notified of RPC transport events
+ *
+ * @param svc - pointer to rpcsvc_t structure of the rpc
+ * @param xl - pointer to the xlator
+ * @param event - the event which triggered this notify
+ * @param data - the transport structure
+ * @return 0 on success, -1 on failure
+ */
+int
+rpcsvc_drc_notify(rpcsvc_t *svc, void *xl, rpcsvc_event_t event, void *data)
+{
+ int ret = -1;
+ rpc_transport_t *trans = NULL;
+ drc_client_t *client = NULL;
+ rpcsvc_drc_globals_t *drc = NULL;
+
+ GF_ASSERT(svc);
+ GF_ASSERT(svc->drc);
+ GF_ASSERT(data);
+
+ drc = svc->drc;
+
+ if (drc->status == DRC_UNINITIATED || drc->type == DRC_TYPE_NONE)
+ return 0;
+
+ LOCK(&drc->lock);
+ {
+ trans = (rpc_transport_t *)data;
+ client = rpcsvc_get_drc_client(drc, &trans->peerinfo.sockaddr);
+ if (!client)
+ goto unlock;
+
+ switch (event) {
+ case RPCSVC_EVENT_ACCEPT:
+ trans->drc_client = rpcsvc_drc_client_ref(client);
+ ret = 0;
+ break;
+
+ case RPCSVC_EVENT_DISCONNECT:
+ ret = 0;
+ if (list_empty(&drc->clients_head))
+ break;
+ /* should be the last unref */
+ trans->drc_client = NULL;
+ rpcsvc_drc_client_unref(drc, client);
+ break;
+
+ default:
+ break;
+ }
+ }
+unlock:
+ UNLOCK(&drc->lock);
+ return ret;
+}
+
+/**
+ * rpcsvc_drc_init - Initialize the duplicate request cache service
+ *
+ * @param svc - pointer to rpcsvc_t structure of the rpc
+ * @param options - the options dictionary which configures drc
+ * @return 0 on success, non-zero integer on failure
+ */
+int
+rpcsvc_drc_init(rpcsvc_t *svc, dict_t *options)
+{
+ int ret = 0;
+ uint32_t drc_type = 0;
+ uint32_t drc_size = 0;
+ uint32_t drc_factor = 0;
+ rpcsvc_drc_globals_t *drc = NULL;
+
+ GF_ASSERT(svc);
+ GF_ASSERT(options);
+
+ /* Toggle DRC on/off, when more drc types(persistent/cluster)
+ * are added, we shouldn't treat this as boolean. */
+ ret = dict_get_str_boolean(options, "nfs.drc", _gf_false);
+ if (ret == -1) {
+ gf_log(GF_RPCSVC, GF_LOG_INFO, "drc user options need second look");
+ ret = _gf_false;
+ }
+
+ gf_log(GF_RPCSVC, GF_LOG_INFO, "DRC is turned %s", (ret ? "ON" : "OFF"));
+
+ /*DRC off, nothing to do */
+ if (ret == _gf_false)
+ return (0);
+
+ drc = GF_CALLOC(1, sizeof(rpcsvc_drc_globals_t),
+ gf_common_mt_drc_globals_t);
+ if (!drc)
+ return (-1);
+
+ LOCK_INIT(&drc->lock);
+ svc->drc = drc;
+
+ /* Specify type of DRC to be used */
+ ret = dict_get_uint32(options, "nfs.drc-type", &drc_type);
+ if (ret) {
+ gf_log(GF_RPCSVC, GF_LOG_DEBUG,
+ "drc type not set. Continuing with default");
+ drc_type = DRC_DEFAULT_TYPE;
+ }
+
+ /* Set the global cache size (no. of ops to cache) */
+ ret = dict_get_uint32(options, "nfs.drc-size", &drc_size);
+ if (ret) {
+ gf_log(GF_RPCSVC, GF_LOG_DEBUG,
+ "drc size not set. Continuing with default size");
+ drc_size = DRC_DEFAULT_CACHE_SIZE;
+ }
+
+ LOCK(&drc->lock);
+
+ drc->type = drc_type;
+ drc->global_cache_size = drc_size;
+
+ /* Mempool for cached ops */
+ drc->mempool = mem_pool_new(drc_cached_op_t, drc->global_cache_size);
+ if (!drc->mempool) {
+ UNLOCK(&drc->lock);
+ gf_log(GF_RPCSVC, GF_LOG_ERROR,
+ "Failed to get mempool for DRC, drc-size: %d", drc_size);
+ ret = -1;
+ goto post_unlock;
+ }
+
+ /* What percent of cache to be evicted whenever it fills up */
+ ret = dict_get_uint32(options, "nfs.drc-lru-factor", &drc_factor);
+ if (ret) {
+ gf_log(GF_RPCSVC, GF_LOG_DEBUG,
+ "drc lru factor not set. Continuing with policy default");
+ drc_factor = DRC_DEFAULT_LRU_FACTOR;
+ }
+
+ drc->lru_factor = (drc_lru_factor_t)drc_factor;
+
+ INIT_LIST_HEAD(&drc->clients_head);
+ INIT_LIST_HEAD(&drc->cache_head);
+
+ ret = rpcsvc_register_notify(svc, rpcsvc_drc_notify, THIS);
+ if (ret) {
+ UNLOCK(&drc->lock);
+ gf_log(GF_RPCSVC, GF_LOG_ERROR,
+ "registration of drc_notify function failed");
+ goto post_unlock;
+ }
+
+ drc->status = DRC_INITIATED;
+ UNLOCK(&drc->lock);
+ gf_log(GF_RPCSVC, GF_LOG_DEBUG, "drc init successful");
+post_unlock:
+ if (ret == -1) {
+ if (drc->mempool) {
+ mem_pool_destroy(drc->mempool);
+ drc->mempool = NULL;
+ }
+ GF_FREE(drc);
+ svc->drc = NULL;
+ }
+ return ret;
+}
+
+int
+rpcsvc_drc_deinit(rpcsvc_t *svc)
+{
+ rpcsvc_drc_globals_t *drc = NULL;
+
+ if (!svc)
+ return (-1);
+
+ drc = svc->drc;
+ if (!drc)
+ return (0);
+
+ LOCK(&drc->lock);
+ (void)rpcsvc_unregister_notify(svc, rpcsvc_drc_notify, THIS);
+ if (drc->mempool) {
+ mem_pool_destroy(drc->mempool);
+ drc->mempool = NULL;
+ }
+ UNLOCK(&drc->lock);
+
+ GF_FREE(drc);
+ svc->drc = NULL;
+
+ return (0);
+}
+
+int
+rpcsvc_drc_reconfigure(rpcsvc_t *svc, dict_t *options)
+{
+ int ret = -1;
+ gf_boolean_t enable_drc = _gf_false;
+ rpcsvc_drc_globals_t *drc = NULL;
+ uint32_t drc_size = 0;
+
+ /* Input sanitization */
+ if ((!svc) || (!options))
+ return (-1);
+
+ /* If DRC was not enabled before, Let rpcsvc_drc_init() to
+ * take care of DRC initialization part.
+ */
+ drc = svc->drc;
+ if (!drc) {
+ return rpcsvc_drc_init(svc, options);
+ }
+
+ /* DRC was already enabled before. Going to be reconfigured. Check
+ * if reconfigured options contain "nfs.drc" and "nfs.drc-size".
+ *
+ * NB: If DRC is "OFF", "drc-size" has no role to play.
+ * So, "drc-size" gets evaluated IFF DRC is "ON".
+ *
+ * If DRC is reconfigured,
+ * case 1: DRC is "ON"
+ * sub-case 1: drc-size remains same
+ * ACTION: Nothing to do.
+ * sub-case 2: drc-size just changed
+ * ACTION: rpcsvc_drc_deinit() followed by
+ * rpcsvc_drc_init().
+ *
+ * case 2: DRC is "OFF"
+ * ACTION: rpcsvc_drc_deinit()
+ */
+ ret = dict_get_str_boolean(options, "nfs.drc", _gf_false);
+ if (ret < 0)
+ ret = _gf_false;
+
+ enable_drc = ret;
+ gf_log(GF_RPCSVC, GF_LOG_INFO, "DRC is turned %s", (ret ? "ON" : "OFF"));
+
+ /* case 1: DRC is "ON"*/
+ if (enable_drc) {
+ /* Fetch drc-size if reconfigured */
+ if (dict_get_uint32(options, "nfs.drc-size", &drc_size))
+ drc_size = DRC_DEFAULT_CACHE_SIZE;
+
+ /* case 1: sub-case 1*/
+ if (drc->global_cache_size == drc_size)
+ return (0);
+
+ /* case 1: sub-case 2*/
+ (void)rpcsvc_drc_deinit(svc);
+ return rpcsvc_drc_init(svc, options);
+ }
+
+ /* case 2: DRC is "OFF" */
+ return rpcsvc_drc_deinit(svc);
+}
diff --git a/rpc/rpc-lib/src/rpc-drc.h b/rpc/rpc-lib/src/rpc-drc.h
new file mode 100644
index 00000000000..ce66430809b
--- /dev/null
+++ b/rpc/rpc-lib/src/rpc-drc.h
@@ -0,0 +1,97 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef RPC_DRC_H
+#define RPC_DRC_H
+
+#include "rpcsvc-common.h"
+#include "rpcsvc.h"
+#include <glusterfs/locking.h>
+#include <glusterfs/dict.h>
+#include "rb.h"
+
+/* per-client cache structure */
+struct drc_client {
+ union gf_sock_union sock_union;
+ /* pointers to the cache */
+ struct rb_table *rbtree;
+ /* no. of ops currently cached */
+ uint32_t op_count;
+ gf_atomic_uint32_t ref;
+ struct list_head client_list;
+};
+
+struct drc_cached_op {
+ drc_op_state_t state;
+ int prognum;
+ int progversion;
+ int procnum;
+ rpc_transport_msg_t msg;
+ drc_client_t *client;
+ struct list_head client_list;
+ struct list_head global_list;
+ int32_t ref;
+ uint32_t xid;
+};
+
+/* global drc definitions */
+enum drc_status { DRC_UNINITIATED, DRC_INITIATED };
+typedef enum drc_status drc_status_t;
+
+struct drc_globals {
+ /* allocator must be the first member since
+ * it is used so in gf_libavl_allocator
+ */
+ struct libavl_allocator allocator;
+ /* configurable size parameter */
+ gf_lock_t lock;
+ uint64_t cache_hits;
+ uint64_t intransit_hits;
+ struct mem_pool *mempool;
+ struct list_head cache_head;
+ struct list_head clients_head;
+ uint32_t op_count;
+ uint32_t client_count;
+ uint32_t global_cache_size;
+ drc_type_t type;
+ drc_lru_factor_t lru_factor;
+ drc_status_t status;
+};
+
+int
+rpcsvc_need_drc(rpcsvc_request_t *req);
+
+drc_cached_op_t *
+rpcsvc_drc_lookup(rpcsvc_request_t *req);
+
+int
+rpcsvc_send_cached_reply(rpcsvc_request_t *req, drc_cached_op_t *reply);
+
+int
+rpcsvc_cache_reply(rpcsvc_request_t *req, struct iobref *iobref,
+ struct iovec *rpchdr, int rpchdrcount, struct iovec *proghdr,
+ int proghdrcount, struct iovec *payload, int payloadcount);
+
+int
+rpcsvc_cache_request(rpcsvc_request_t *req);
+
+int32_t
+rpcsvc_drc_priv(rpcsvc_drc_globals_t *drc);
+
+int
+rpcsvc_drc_init(rpcsvc_t *svc, dict_t *options);
+
+int
+rpcsvc_drc_deinit(rpcsvc_t *svc);
+
+int
+rpcsvc_drc_reconfigure(rpcsvc_t *svc, dict_t *options);
+
+#endif /* RPC_DRC_H */
diff --git a/rpc/rpc-lib/src/rpc-lib-messages.h b/rpc/rpc-lib/src/rpc-lib-messages.h
new file mode 100644
index 00000000000..2c0b820dbf9
--- /dev/null
+++ b/rpc/rpc-lib/src/rpc-lib-messages.h
@@ -0,0 +1,34 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _RPC_LIB_MESSAGES_H_
+#define _RPC_LIB_MESSAGES_H_
+
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(RPC_LIB, TRANS_MSG_ADDR_FAMILY_NOT_SPECIFIED,
+ TRANS_MSG_UNKNOWN_ADDR_FAMILY, TRANS_MSG_REMOTE_HOST_ERROR,
+ TRANS_MSG_DNS_RESOL_FAILED, TRANS_MSG_LISTEN_PATH_ERROR,
+ TRANS_MSG_CONNECT_PATH_ERROR, TRANS_MSG_GET_ADDR_INFO_FAILED,
+ TRANS_MSG_PORT_BIND_FAILED, TRANS_MSG_INET_ERROR,
+ TRANS_MSG_GET_NAME_INFO_FAILED, TRANS_MSG_TRANSPORT_ERROR,
+ TRANS_MSG_TIMEOUT_EXCEEDED, TRANS_MSG_SOCKET_BIND_ERROR);
+
+#endif /* !_RPC_LIB_MESSAGES_H_ */
diff --git a/rpc/rpc-lib/src/rpc-transport.c b/rpc/rpc-lib/src/rpc-transport.c
index e07bf0d7604..a6e201a9b36 100644
--- a/rpc/rpc-lib/src/rpc-transport.c
+++ b/rpc/rpc-lib/src/rpc-transport.c
@@ -12,601 +12,661 @@
#include <stdlib.h>
#include <stdio.h>
#include <sys/poll.h>
-#include <fnmatch.h>
#include <stdint.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "logging.h"
#include "rpc-transport.h"
-#include "glusterfs.h"
-/* FIXME: xlator.h is needed for volume_option_t, need to define the datatype
- * in some other header
- */
-#include "xlator.h"
-#include "list.h"
#ifndef GF_OPTION_LIST_EMPTY
#define GF_OPTION_LIST_EMPTY(_opt) (_opt->value[0] == NULL)
#endif
+int32_t
+rpc_transport_count(const char *transport_type)
+{
+ char *transport_dup = NULL;
+ char *saveptr = NULL;
+ char *ptr = NULL;
+ int count = 0;
+
+ if (transport_type == NULL)
+ return -1;
+
+ transport_dup = gf_strdup(transport_type);
+ if (transport_dup == NULL) {
+ return -1;
+ }
+
+ ptr = strtok_r(transport_dup, ",", &saveptr);
+ while (ptr != NULL) {
+ count++;
+ ptr = strtok_r(NULL, ",", &saveptr);
+ }
+
+ GF_FREE(transport_dup);
+ return count;
+}
int
-rpc_transport_get_myaddr (rpc_transport_t *this, char *peeraddr, int addrlen,
- struct sockaddr_storage *sa, size_t salen)
+rpc_transport_get_myaddr(rpc_transport_t *this, char *peeraddr, int addrlen,
+ struct sockaddr_storage *sa, size_t salen)
{
- int32_t ret = -1;
- GF_VALIDATE_OR_GOTO ("rpc", this, out);
+ int32_t ret = -1;
+ GF_VALIDATE_OR_GOTO("rpc", this, out);
- ret = this->ops->get_myaddr (this, peeraddr, addrlen, sa, salen);
+ ret = this->ops->get_myaddr(this, peeraddr, addrlen, sa, salen);
out:
- return ret;
+ return ret;
}
int32_t
-rpc_transport_get_myname (rpc_transport_t *this, char *hostname, int hostlen)
+rpc_transport_get_peername(rpc_transport_t *this, char *hostname, int hostlen)
{
- int32_t ret = -1;
- GF_VALIDATE_OR_GOTO ("rpc", this, out);
+ int32_t ret = -1;
+ GF_VALIDATE_OR_GOTO("rpc", this, out);
- ret = this->ops->get_myname (this, hostname, hostlen);
+ ret = this->ops->get_peername(this, hostname, hostlen);
out:
- return ret;
+ return ret;
}
-int32_t
-rpc_transport_get_peername (rpc_transport_t *this, char *hostname, int hostlen)
+int
+rpc_transport_throttle(rpc_transport_t *this, gf_boolean_t onoff)
{
- int32_t ret = -1;
- GF_VALIDATE_OR_GOTO ("rpc", this, out);
+ if (!this->ops->throttle)
+ return -ENOSYS;
- ret = this->ops->get_peername (this, hostname, hostlen);
-out:
- return ret;
+ return this->ops->throttle(this, onoff);
}
int32_t
-rpc_transport_get_peeraddr (rpc_transport_t *this, char *peeraddr, int addrlen,
- struct sockaddr_storage *sa, size_t salen)
+rpc_transport_get_peeraddr(rpc_transport_t *this, char *peeraddr, int addrlen,
+ struct sockaddr_storage *sa, size_t salen)
{
- int32_t ret = -1;
- GF_VALIDATE_OR_GOTO ("rpc", this, out);
+ int32_t ret = -1;
+ GF_VALIDATE_OR_GOTO("rpc", this, out);
- ret = this->ops->get_peeraddr (this, peeraddr, addrlen, sa, salen);
+ ret = this->ops->get_peeraddr(this, peeraddr, addrlen, sa, salen);
out:
- return ret;
+ return ret;
}
void
-rpc_transport_pollin_destroy (rpc_transport_pollin_t *pollin)
+rpc_transport_pollin_destroy(rpc_transport_pollin_t *pollin)
{
- GF_VALIDATE_OR_GOTO ("rpc", pollin, out);
+ GF_VALIDATE_OR_GOTO("rpc", pollin, out);
- if (pollin->iobref) {
- iobref_unref (pollin->iobref);
- }
+ if (pollin->iobref) {
+ iobref_unref(pollin->iobref);
+ }
- if (pollin->hdr_iobuf) {
- iobuf_unref (pollin->hdr_iobuf);
- }
+ if (pollin->private) {
+ /* */
+ GF_FREE(pollin->private);
+ }
- if (pollin->private) {
- /* */
- GF_FREE (pollin->private);
- }
-
- GF_FREE (pollin);
+ GF_FREE(pollin);
out:
- return;
+ return;
}
-
rpc_transport_pollin_t *
-rpc_transport_pollin_alloc (rpc_transport_t *this, struct iovec *vector,
- int count, struct iobuf *hdr_iobuf,
- struct iobref *iobref, void *private)
+rpc_transport_pollin_alloc(rpc_transport_t *this, struct iovec *vector,
+ int count, struct iobuf *hdr_iobuf,
+ struct iobref *iobref, void *private)
{
- rpc_transport_pollin_t *msg = NULL;
- msg = GF_CALLOC (1, sizeof (*msg), gf_common_mt_rpc_trans_pollin_t);
- if (!msg) {
- goto out;
- }
+ rpc_transport_pollin_t *msg = NULL;
+ msg = GF_CALLOC(1, sizeof(*msg), gf_common_mt_rpc_trans_pollin_t);
+ if (!msg) {
+ goto out;
+ }
- if (count > 1) {
- msg->vectored = 1;
- }
+ msg->trans = this;
+
+ if (count > 1) {
+ msg->vectored = 1;
+ }
- memcpy (msg->vector, vector, count * sizeof (*vector));
- msg->count = count;
- msg->iobref = iobref_ref (iobref);
- msg->private = private;
- if (hdr_iobuf)
- msg->hdr_iobuf = iobuf_ref (hdr_iobuf);
+ memcpy(msg->vector, vector, count * sizeof(*vector));
+ msg->count = count;
+ msg->iobref = iobref_ref(iobref);
+ msg->private = private;
+ if (hdr_iobuf)
+ iobref_add(iobref, hdr_iobuf);
out:
- return msg;
+ return msg;
}
+void
+rpc_transport_cleanup(rpc_transport_t *trans)
+{
+ if (!trans)
+ return;
+
+ if (trans->fini)
+ trans->fini(trans);
+
+ if (trans->options) {
+ dict_unref(trans->options);
+ trans->options = NULL;
+ }
+
+ GF_FREE(trans->name);
+
+ if (trans->xl)
+ pthread_mutex_destroy(&trans->lock);
+ if (trans->dl_handle)
+ dlclose(trans->dl_handle);
+
+ GF_FREE(trans);
+}
rpc_transport_t *
-rpc_transport_load (glusterfs_ctx_t *ctx, dict_t *options, char *trans_name)
+rpc_transport_load(glusterfs_ctx_t *ctx, dict_t *options, char *trans_name)
{
- struct rpc_transport *trans = NULL, *return_trans = NULL;
- char *name = NULL;
- void *handle = NULL;
- char *type = NULL;
- char str[] = "ERROR";
- int32_t ret = -1;
- int8_t is_tcp = 0, is_unix = 0, is_ibsdp = 0;
- volume_opt_list_t *vol_opt = NULL;
- gf_boolean_t bind_insecure = _gf_false;
-
- GF_VALIDATE_OR_GOTO("rpc-transport", options, fail);
- GF_VALIDATE_OR_GOTO("rpc-transport", ctx, fail);
- GF_VALIDATE_OR_GOTO("rpc-transport", trans_name, fail);
-
- trans = GF_CALLOC (1, sizeof (struct rpc_transport), gf_common_mt_rpc_trans_t);
- if (!trans)
- goto fail;
-
- trans->name = gf_strdup (trans_name);
- if (!trans->name)
- goto fail;
-
- trans->ctx = ctx;
- type = str;
-
- /* Backward compatibility */
- ret = dict_get_str (options, "transport-type", &type);
- if (ret < 0) {
- ret = dict_set_str (options, "transport-type", "socket");
- if (ret < 0)
- gf_log ("dict", GF_LOG_DEBUG,
- "setting transport-type failed");
- else
- gf_log ("rpc-transport", GF_LOG_WARNING,
- "missing 'option transport-type'. defaulting to "
- "\"socket\"");
- } else {
- {
- /* Backword compatibility to handle * /client,
- * * /server.
- */
- char *tmp = strchr (type, '/');
- if (tmp)
- *tmp = '\0';
- }
-
- is_tcp = strcmp (type, "tcp");
- is_unix = strcmp (type, "unix");
- is_ibsdp = strcmp (type, "ib-sdp");
- if ((is_tcp == 0) ||
- (is_unix == 0) ||
- (is_ibsdp == 0)) {
- if (is_unix == 0)
- ret = dict_set_str (options,
- "transport.address-family",
- "unix");
- if (is_ibsdp == 0)
- ret = dict_set_str (options,
- "transport.address-family",
- "inet-sdp");
-
- if (ret < 0)
- gf_log ("dict", GF_LOG_DEBUG,
- "setting address-family failed");
-
- ret = dict_set_str (options,
- "transport-type", "socket");
- if (ret < 0)
- gf_log ("dict", GF_LOG_DEBUG,
- "setting transport-type failed");
- }
- }
-
- /* client-bind-insecure is for clients protocol, and
- * bind-insecure for glusterd. Both mutually exclusive
- */
- ret = dict_get_str (options, "client-bind-insecure", &type);
- if (ret)
- ret = dict_get_str (options, "bind-insecure", &type);
- if (ret == 0) {
- ret = gf_string2boolean (type, &bind_insecure);
- if (ret < 0) {
- gf_log ("rcp-transport", GF_LOG_WARNING,
- "bind-insecure option %s is not a"
- " valid bool option", type);
- goto fail;
- }
- if (_gf_true == bind_insecure)
- trans->bind_insecure = 1;
- else
- trans->bind_insecure = 0;
- } else {
- trans->bind_insecure = 0;
+ struct rpc_transport *trans = NULL, *return_trans = NULL;
+ char *name = NULL;
+ void *handle = NULL;
+ char *type = NULL;
+ static char str[] = "ERROR";
+ int32_t ret = -1;
+ int is_tcp = 0, is_unix = 0, is_ibsdp = 0;
+ volume_opt_list_t *vol_opt = NULL;
+ gf_boolean_t bind_insecure = _gf_false;
+ xlator_t *this = NULL;
+ gf_boolean_t success = _gf_false;
+
+ GF_VALIDATE_OR_GOTO("rpc-transport", options, fail);
+ GF_VALIDATE_OR_GOTO("rpc-transport", ctx, fail);
+ GF_VALIDATE_OR_GOTO("rpc-transport", trans_name, fail);
+
+ trans = GF_CALLOC(1, sizeof(struct rpc_transport),
+ gf_common_mt_rpc_trans_t);
+ if (!trans)
+ goto fail;
+
+ trans->name = gf_strdup(trans_name);
+ if (!trans->name)
+ goto fail;
+
+ trans->ctx = ctx;
+ type = str;
+
+ /* Backward compatibility */
+ ret = dict_get_str_sizen(options, "transport-type", &type);
+ if (ret < 0) {
+ ret = dict_set_str_sizen(options, "transport-type", "socket");
+ if (ret < 0)
+ gf_log("dict", GF_LOG_DEBUG, "setting transport-type failed");
+ else
+ gf_log("rpc-transport", GF_LOG_DEBUG,
+ "missing 'option transport-type'. defaulting to "
+ "\"socket\"");
+ } else {
+ {
+ /* Backward compatibility to handle * /client,
+ * * /server.
+ */
+ char *tmp = strchr(type, '/');
+ if (tmp)
+ *tmp = '\0';
}
- ret = dict_get_str (options, "transport-type", &type);
- if (ret < 0) {
- gf_log ("rpc-transport", GF_LOG_ERROR,
- "'option transport-type <xx>' missing in volume '%s'",
- trans_name);
- goto fail;
- }
-
- ret = gf_asprintf (&name, "%s/%s.so", RPC_TRANSPORTDIR, type);
- if (-1 == ret) {
- goto fail;
+ is_tcp = strcmp(type, "tcp");
+ is_unix = strcmp(type, "unix");
+ is_ibsdp = strcmp(type, "ib-sdp");
+ if ((is_tcp == 0) || (is_unix == 0) || (is_ibsdp == 0)) {
+ if (is_unix == 0)
+ ret = dict_set_str_sizen(options, "transport.address-family",
+ "unix");
+ if (is_ibsdp == 0)
+ ret = dict_set_str_sizen(options, "transport.address-family",
+ "inet-sdp");
+
+ if (ret < 0)
+ gf_log("dict", GF_LOG_DEBUG, "setting address-family failed");
+
+ ret = dict_set_str_sizen(options, "transport-type", "socket");
+ if (ret < 0)
+ gf_log("dict", GF_LOG_DEBUG, "setting transport-type failed");
}
-
- gf_log ("rpc-transport", GF_LOG_DEBUG,
- "attempt to load file %s", name);
-
- handle = dlopen (name, RTLD_NOW|RTLD_GLOBAL);
- if (handle == NULL) {
- gf_log ("rpc-transport", GF_LOG_ERROR, "%s", dlerror ());
- gf_log ("rpc-transport", GF_LOG_ERROR,
- "volume '%s': transport-type '%s' is not valid or "
- "not found on this machine",
- trans_name, type);
- goto fail;
- }
-
- trans->ops = dlsym (handle, "tops");
- if (trans->ops == NULL) {
- gf_log ("rpc-transport", GF_LOG_ERROR,
- "dlsym (rpc_transport_ops) on %s", dlerror ());
- goto fail;
- }
-
- trans->init = dlsym (handle, "init");
- if (trans->init == NULL) {
- gf_log ("rpc-transport", GF_LOG_ERROR,
- "dlsym (gf_rpc_transport_init) on %s", dlerror ());
- goto fail;
- }
-
- trans->fini = dlsym (handle, "fini");
- if (trans->fini == NULL) {
- gf_log ("rpc-transport", GF_LOG_ERROR,
- "dlsym (gf_rpc_transport_fini) on %s", dlerror ());
- goto fail;
- }
-
- trans->reconfigure = dlsym (handle, "reconfigure");
- if (trans->fini == NULL) {
- gf_log ("rpc-transport", GF_LOG_DEBUG,
- "dlsym (gf_rpc_transport_reconfigure) on %s", dlerror());
+ }
+
+ /* client-bind-insecure is for clients protocol, and
+ * bind-insecure for glusterd. Both mutually exclusive
+ */
+ ret = dict_get_str_sizen(options, "client-bind-insecure", &type);
+ if (ret)
+ ret = dict_get_str_sizen(options, "bind-insecure", &type);
+ if (ret == 0) {
+ ret = gf_string2boolean(type, &bind_insecure);
+ if (ret < 0) {
+ gf_log("rcp-transport", GF_LOG_WARNING,
+ "bind-insecure option %s is not a"
+ " valid bool option",
+ type);
+ goto fail;
}
-
- vol_opt = GF_CALLOC (1, sizeof (volume_opt_list_t),
- gf_common_mt_volume_opt_list_t);
- if (!vol_opt) {
- goto fail;
+ if (_gf_true == bind_insecure)
+ trans->bind_insecure = 1;
+ else
+ trans->bind_insecure = 0;
+ } else {
+ /* By default allow bind insecure */
+ trans->bind_insecure = 1;
+ }
+
+ ret = dict_get_str_sizen(options, "transport-type", &type);
+ if (ret < 0) {
+ gf_log("rpc-transport", GF_LOG_ERROR,
+ "'option transport-type <xx>' missing in volume '%s'",
+ trans_name);
+ goto fail;
+ }
+
+ ret = gf_asprintf(&name, "%s/%s.so", RPC_TRANSPORTDIR, type);
+ if (-1 == ret) {
+ goto fail;
+ }
+
+ if (dict_get_sizen(options, "notify-poller-death")) {
+ trans->notify_poller_death = 1;
+ }
+
+ gf_log("rpc-transport", GF_LOG_DEBUG, "attempt to load file %s", name);
+
+ handle = dlopen(name, RTLD_NOW);
+ if (handle == NULL) {
+ gf_log("rpc-transport", GF_LOG_ERROR, "%s", dlerror());
+ gf_log("rpc-transport", GF_LOG_WARNING,
+ "volume '%s': transport-type '%s' is not valid or "
+ "not found on this machine",
+ trans_name, type);
+ goto fail;
+ }
+
+ trans->dl_handle = handle;
+
+ trans->ops = dlsym(handle, "tops");
+ if (trans->ops == NULL) {
+ gf_log("rpc-transport", GF_LOG_ERROR, "dlsym (rpc_transport_ops) on %s",
+ dlerror());
+ goto fail;
+ }
+
+ *VOID(&(trans->init)) = dlsym(handle, "init");
+ if (trans->init == NULL) {
+ gf_log("rpc-transport", GF_LOG_ERROR,
+ "dlsym (gf_rpc_transport_init) on %s", dlerror());
+ goto fail;
+ }
+
+ *VOID(&(trans->fini)) = dlsym(handle, "fini");
+ if (trans->fini == NULL) {
+ gf_log("rpc-transport", GF_LOG_ERROR,
+ "dlsym (gf_rpc_transport_fini) on %s", dlerror());
+ goto fail;
+ }
+
+ *VOID(&(trans->reconfigure)) = dlsym(handle, "reconfigure");
+ if (trans->reconfigure == NULL) {
+ gf_log("rpc-transport", GF_LOG_DEBUG,
+ "dlsym (gf_rpc_transport_reconfigure) on %s", dlerror());
+ }
+
+ vol_opt = GF_CALLOC(1, sizeof(volume_opt_list_t),
+ gf_common_mt_volume_opt_list_t);
+ if (!vol_opt) {
+ goto fail;
+ }
+
+ this = THIS;
+ vol_opt->given_opt = dlsym(handle, "options");
+ if (vol_opt->given_opt == NULL) {
+ gf_log("rpc-transport", GF_LOG_DEBUG,
+ "volume option validation not specified");
+ } else {
+ INIT_LIST_HEAD(&vol_opt->list);
+ list_add_tail(&vol_opt->list, &(this->volume_options));
+ if (xlator_options_validate_list(this, options, vol_opt, NULL)) {
+ gf_log("rpc-transport", GF_LOG_ERROR,
+ "volume option validation failed");
+ goto fail;
}
+ }
- vol_opt->given_opt = dlsym (handle, "options");
- if (vol_opt->given_opt == NULL) {
- gf_log ("rpc-transport", GF_LOG_DEBUG,
- "volume option validation not specified");
- } else {
- INIT_LIST_HEAD (&vol_opt->list);
- list_add_tail (&vol_opt->list, &(THIS->volume_options));
- if (xlator_options_validate_list (THIS, options, vol_opt,
- NULL)) {
- gf_log ("rpc-transport", GF_LOG_ERROR,
- "volume option validation failed");
- goto fail;
- }
- }
-
- trans->options = options;
-
- pthread_mutex_init (&trans->lock, NULL);
- trans->xl = THIS;
-
- ret = trans->init (trans);
- if (ret != 0) {
- gf_log ("rpc-transport", GF_LOG_ERROR,
- "'%s' initialization failed", type);
- goto fail;
- }
-
- return_trans = trans;
-
- if (name) {
- GF_FREE (name);
- }
+ trans->options = dict_ref(options);
+
+ pthread_mutex_init(&trans->lock, NULL);
+ trans->xl = this;
+
+ ret = trans->init(trans);
+ if (ret != 0) {
+ gf_log("rpc-transport", GF_LOG_WARNING, "'%s' initialization failed",
+ type);
+ goto fail;
+ }
+
+ INIT_LIST_HEAD(&trans->list);
+ GF_ATOMIC_INIT(trans->disconnect_progress, 0);
+
+ return_trans = trans;
- return return_trans;
+ GF_FREE(name);
+
+ success = _gf_true;
fail:
- if (trans) {
- if (trans->name) {
- GF_FREE (trans->name);
- }
+ if (!success) {
+ rpc_transport_cleanup(trans);
+ GF_FREE(name);
- GF_FREE (trans);
- }
+ return_trans = NULL;
+ }
- if (name) {
- GF_FREE (name);
+ if (vol_opt) {
+ if (!list_empty(&vol_opt->list)) {
+ list_del_init(&vol_opt->list);
}
+ GF_FREE(vol_opt);
+ }
- return NULL;
+ return return_trans;
}
-
int32_t
-rpc_transport_submit_request (rpc_transport_t *this, rpc_transport_req_t *req)
+rpc_transport_submit_request(rpc_transport_t *this, rpc_transport_req_t *req)
{
- int32_t ret = -1;
+ int32_t ret = -1;
- GF_VALIDATE_OR_GOTO("rpc_transport", this, fail);
- GF_VALIDATE_OR_GOTO("rpc_transport", this->ops, fail);
+ GF_VALIDATE_OR_GOTO("rpc_transport", this, fail);
+ GF_VALIDATE_OR_GOTO("rpc_transport", this->ops, fail);
- ret = this->ops->submit_request (this, req);
+ ret = this->ops->submit_request(this, req);
fail:
- return ret;
+ return ret;
}
-
int32_t
-rpc_transport_submit_reply (rpc_transport_t *this, rpc_transport_reply_t *reply)
+rpc_transport_submit_reply(rpc_transport_t *this, rpc_transport_reply_t *reply)
{
- int32_t ret = -1;
+ int32_t ret = -1;
- GF_VALIDATE_OR_GOTO("rpc_transport", this, fail);
- GF_VALIDATE_OR_GOTO("rpc_transport", this->ops, fail);
+ GF_VALIDATE_OR_GOTO("rpc_transport", this, fail);
+ GF_VALIDATE_OR_GOTO("rpc_transport", this->ops, fail);
- ret = this->ops->submit_reply (this, reply);
+ ret = this->ops->submit_reply(this, reply);
fail:
- return ret;
+ return ret;
}
-
int32_t
-rpc_transport_connect (rpc_transport_t *this, int port)
+rpc_transport_connect(rpc_transport_t *this, int port)
{
- int ret = -1;
+ int ret = -1;
- GF_VALIDATE_OR_GOTO("rpc_transport", this, fail);
+ GF_VALIDATE_OR_GOTO("rpc_transport", this, fail);
- ret = this->ops->connect (this, port);
+ ret = this->ops->connect(this, port);
fail:
- return ret;
+ return ret;
}
-
int32_t
-rpc_transport_listen (rpc_transport_t *this)
+rpc_transport_listen(rpc_transport_t *this)
{
- int ret = -1;
+ int ret = -1;
- GF_VALIDATE_OR_GOTO("rpc_transport", this, fail);
+ GF_VALIDATE_OR_GOTO("rpc_transport", this, fail);
- ret = this->ops->listen (this);
+ ret = this->ops->listen(this);
fail:
- return ret;
+ return ret;
}
-
int32_t
-rpc_transport_disconnect (rpc_transport_t *this)
+rpc_transport_disconnect(rpc_transport_t *this, gf_boolean_t wait)
{
- int32_t ret = -1;
+ int32_t ret = -1;
- GF_VALIDATE_OR_GOTO("rpc_transport", this, fail);
+ GF_VALIDATE_OR_GOTO("rpc_transport", this, fail);
+
+ ret = this->ops->disconnect(this, wait);
- ret = this->ops->disconnect (this);
fail:
- return ret;
+ return ret;
}
-
-int32_t
-rpc_transport_destroy (rpc_transport_t *this)
+static void
+rpc_transport_destroy(rpc_transport_t *this)
{
- int32_t ret = -1;
+ struct dnscache6 *cache = NULL;
- GF_VALIDATE_OR_GOTO("rpc_transport", this, fail);
+ if (this->clnt_options)
+ dict_unref(this->clnt_options);
+ if (this->options)
+ dict_unref(this->options);
+ if (this->fini)
+ this->fini(this);
- if (this->options)
- dict_unref (this->options);
- if (this->fini)
- this->fini (this);
+ pthread_mutex_destroy(&this->lock);
- pthread_mutex_destroy (&this->lock);
+ GF_FREE(this->name);
- if (this->name)
- GF_FREE (this->name);
+ if (this->dl_handle)
+ dlclose(this->dl_handle);
- GF_FREE (this);
-fail:
- return ret;
-}
+ if (this->ssl_name) {
+ GF_FREE(this->ssl_name);
+ }
+ if (this->dnscache) {
+ cache = this->dnscache;
+ if (cache->first)
+ freeaddrinfo(cache->first);
+ GF_FREE(this->dnscache);
+ }
+
+ GF_FREE(this);
+}
rpc_transport_t *
-rpc_transport_ref (rpc_transport_t *this)
+rpc_transport_ref(rpc_transport_t *this)
{
- rpc_transport_t *return_this = NULL;
+ rpc_transport_t *return_this = NULL;
- GF_VALIDATE_OR_GOTO("rpc_transport", this, fail);
+ GF_VALIDATE_OR_GOTO("rpc_transport", this, fail);
- pthread_mutex_lock (&this->lock);
- {
- this->refcount ++;
- }
- pthread_mutex_unlock (&this->lock);
+ GF_ATOMIC_INC(this->refcount);
- return_this = this;
+ return_this = this;
fail:
- return return_this;
+ return return_this;
}
-
int32_t
-rpc_transport_unref (rpc_transport_t *this)
+rpc_transport_unref(rpc_transport_t *this)
{
- int32_t refcount = 0;
- int32_t ret = -1;
+ int32_t refcount = 0;
+ int32_t ret = -1;
- GF_VALIDATE_OR_GOTO("rpc_transport", this, fail);
+ GF_VALIDATE_OR_GOTO("rpc_transport", this, fail);
- pthread_mutex_lock (&this->lock);
- {
- refcount = --this->refcount;
- }
- pthread_mutex_unlock (&this->lock);
+ refcount = GF_ATOMIC_DEC(this->refcount);
- if (refcount == 0) {
- if (this->mydata)
- this->notify (this, this->mydata, RPC_TRANSPORT_CLEANUP,
- NULL);
- rpc_transport_destroy (this);
- }
+ if (refcount == 0) {
+ if (this->mydata)
+ this->notify(this, this->mydata, RPC_TRANSPORT_CLEANUP, NULL);
+ this->mydata = NULL;
+ this->notify = NULL;
+ rpc_transport_destroy(this);
+ }
- ret = 0;
+ ret = 0;
fail:
- return ret;
+ return ret;
}
-
int32_t
-rpc_transport_notify (rpc_transport_t *this, rpc_transport_event_t event,
- void *data, ...)
+rpc_transport_notify(rpc_transport_t *this, rpc_transport_event_t event,
+ void *data, ...)
{
- int32_t ret = -1;
- GF_VALIDATE_OR_GOTO ("rpc", this, out);
+ int32_t ret = -1;
+ GF_VALIDATE_OR_GOTO("rpc", this, out);
- if (this->notify != NULL) {
- ret = this->notify (this, this->mydata, event, data);
- } else {
- ret = 0;
- }
+ if (this->notify != NULL) {
+ ret = this->notify(this, this->mydata, event, data);
+ } else {
+ ret = 0;
+ }
out:
- return ret;
+ return ret;
}
-
-
-inline int
-rpc_transport_register_notify (rpc_transport_t *trans,
- rpc_transport_notify_t notify, void *mydata)
+int
+rpc_transport_register_notify(rpc_transport_t *trans,
+ rpc_transport_notify_t notify, void *mydata)
{
- int32_t ret = -1;
- GF_VALIDATE_OR_GOTO ("rpc", trans, out);
+ int32_t ret = -1;
+ GF_VALIDATE_OR_GOTO("rpc", trans, out);
- trans->notify = notify;
- trans->mydata = mydata;
+ trans->notify = notify;
+ trans->mydata = mydata;
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
-inline int
-rpc_transport_unregister_notify (rpc_transport_t *trans)
+// give negative values to skip setting that value
+// this function asserts if both the values are negative.
+// why call it if you don't set it.
+int
+rpc_transport_keepalive_options_set(dict_t *options, int32_t interval,
+ int32_t time, int32_t timeout)
{
- GF_VALIDATE_OR_GOTO ("rpc-transport", trans, out);
+ int ret = -1;
+
+ GF_ASSERT(options);
+ GF_ASSERT((interval > 0) || (time > 0));
+
+ ret = dict_set_int32_sizen(options, "transport.socket.keepalive-interval",
+ interval);
+ if (ret)
+ goto out;
- trans->notify = NULL;
- trans->mydata = NULL;
+ ret = dict_set_int32_sizen(options, "transport.socket.keepalive-time",
+ time);
+ if (ret)
+ goto out;
+ ret = dict_set_int32_sizen(options, "transport.tcp-user-timeout", timeout);
+ if (ret)
+ goto out;
out:
- return 0;
+ return ret;
}
-
-//give negative values to skip setting that value
-//this function asserts if both the values are negative.
-//why call it if you dont set it.
int
-rpc_transport_keepalive_options_set (dict_t *options, int32_t interval,
- int32_t time)
+rpc_transport_unix_options_build(dict_t *dict, char *filepath,
+ int frame_timeout)
{
- int ret = -1;
-
- GF_ASSERT (options);
- GF_ASSERT ((interval > 0) || (time > 0));
-
- ret = dict_set_int32 (options,
- "transport.socket.keepalive-interval", interval);
- if (ret)
- goto out;
-
- ret = dict_set_int32 (options,
- "transport.socket.keepalive-time", time);
+ char *fpath = NULL;
+ int ret = -1;
+
+ GF_ASSERT(filepath);
+ GF_VALIDATE_OR_GOTO("rpc-transport", dict, out);
+
+ fpath = gf_strdup(filepath);
+ if (!fpath) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_dynstr_sizen(dict, "transport.socket.connect-path", fpath);
+ if (ret) {
+ GF_FREE(fpath);
+ goto out;
+ }
+
+ ret = dict_set_str_sizen(dict, "transport.address-family", "unix");
+ if (ret)
+ goto out;
+
+ ret = dict_set_str_sizen(dict, "transport.socket.nodelay", "off");
+ if (ret)
+ goto out;
+
+ ret = dict_set_str_sizen(dict, "transport-type", "socket");
+ if (ret)
+ goto out;
+
+ ret = dict_set_str_sizen(dict, "transport.socket.keepalive", "off");
+ if (ret)
+ goto out;
+
+ if (frame_timeout > 0) {
+ ret = dict_set_int32_sizen(dict, "frame-timeout", frame_timeout);
if (ret)
- goto out;
+ goto out;
+ }
out:
- return ret;
+ return ret;
}
int
-rpc_transport_inet_options_build (dict_t **options, const char *hostname,
- int port)
+rpc_transport_inet_options_build(dict_t *dict, const char *hostname, int port,
+ char *af)
{
- dict_t *dict = NULL;
- char *host = NULL;
- int ret = -1;
-
- GF_ASSERT (options);
- GF_ASSERT (hostname);
- GF_ASSERT (port >= 1024);
-
- dict = dict_new ();
- if (!dict)
- goto out;
-
- host = gf_strdup ((char*)hostname);
- if (!hostname) {
- ret = -1;
- goto out;
- }
-
- ret = dict_set_dynstr (dict, "remote-host", host);
- if (ret) {
- gf_log (THIS->name, GF_LOG_WARNING,
- "failed to set remote-host with %s", host);
- goto out;
- }
-
- ret = dict_set_int32 (dict, "remote-port", port);
- if (ret) {
- gf_log (THIS->name, GF_LOG_WARNING,
- "failed to set remote-port with %d", port);
- goto out;
- }
- ret = dict_set_str (dict, "transport.address-family", "inet/inet6");
- if (ret) {
- gf_log (THIS->name, GF_LOG_WARNING,
- "failed to set addr-family with inet");
- goto out;
- }
-
- ret = dict_set_str (dict, "transport-type", "socket");
- if (ret) {
- gf_log (THIS->name, GF_LOG_WARNING,
- "failed to set trans-type with socket");
- goto out;
- }
+ char *host = NULL;
+ int ret = -1;
+#ifdef IPV6_DEFAULT
+ static char *addr_family = "inet6";
+#else
+ static char *addr_family = "inet";
+#endif
- *options = dict;
+ GF_ASSERT(hostname);
+ GF_ASSERT(port >= 1024);
+ GF_VALIDATE_OR_GOTO("rpc-transport", dict, out);
+
+ host = gf_strdup((char *)hostname);
+ if (!host) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_dynstr_sizen(dict, "remote-host", host);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_WARNING, "failed to set remote-host with %s",
+ host);
+ GF_FREE(host);
+ goto out;
+ }
+
+ ret = dict_set_int32_sizen(dict, "remote-port", port);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_WARNING, "failed to set remote-port with %d",
+ port);
+ goto out;
+ }
+
+ ret = dict_set_str_sizen(dict, "address-family",
+ (af != NULL ? af : addr_family));
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_WARNING, "failed to set address-family to %s",
+ addr_family);
+ goto out;
+ }
+
+ ret = dict_set_str_sizen(dict, "transport-type", "socket");
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_WARNING,
+ "failed to set trans-type with socket");
+ goto out;
+ }
out:
- if (ret) {
- if (host)
- GF_FREE (host);
- if (dict)
- dict_unref (dict);
- }
-
- return ret;
+ return ret;
}
diff --git a/rpc/rpc-lib/src/rpc-transport.h b/rpc/rpc-lib/src/rpc-transport.h
index d9ab30dd81e..c499f0bb955 100644
--- a/rpc/rpc-lib/src/rpc-transport.h
+++ b/rpc/rpc-lib/src/rpc-transport.h
@@ -11,12 +11,6 @@
#ifndef __RPC_TRANSPORT_H__
#define __RPC_TRANSPORT_H__
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-
#include <inttypes.h>
#ifdef GF_SOLARIS_HOST_OS
#include <rpc/auth.h>
@@ -26,7 +20,6 @@
#include <rpc/rpc_msg.h>
-
#ifndef MAX_IOVEC
#define MAX_IOVEC 16
#endif
@@ -48,39 +41,45 @@
*/
#define RPC_FRAGSIZE(fraghdr) ((uint32_t)(fraghdr & 0x7fffffffU))
-#define RPC_FRAGHDR_SIZE 4
-#define RPC_MSGTYPE_SIZE 8
+#define RPC_FRAGHDR_SIZE 4
+#define RPC_MSGTYPE_SIZE 8
/* size of the msg from the start of call-body till and including credlen */
-#define RPC_CALL_BODY_SIZE 24
+#define RPC_CALL_BODY_SIZE 24
-#define RPC_REPLY_STATUS_SIZE 4
+#define RPC_REPLY_STATUS_SIZE 4
#define RPC_AUTH_FLAVOUR_N_LENGTH_SIZE 8
-#define RPC_ACCEPT_STATUS_LEN 4
+#define RPC_ACCEPT_STATUS_LEN 4
struct rpc_transport_ops;
typedef struct rpc_transport rpc_transport_t;
-#include "dict.h"
-#include "compat.h"
+#include <glusterfs/dict.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/async.h>
#include "rpcsvc-common.h"
struct peer_info {
- struct sockaddr_storage sockaddr;
- socklen_t sockaddr_len;
- char identifier[UNIX_PATH_MAX];
+ // OP-VERSION of clients
+ uint32_t max_op_version;
+ uint32_t min_op_version;
+ struct sockaddr_storage sockaddr;
+ socklen_t sockaddr_len;
+ char identifier[UNIX_PATH_MAX];
+ // Volume mounted by client
+ char volname[NAME_MAX];
};
typedef struct peer_info peer_info_t;
typedef enum msg_type msg_type_t;
typedef enum {
- RPC_TRANSPORT_ACCEPT, /* New client has been accepted */
- RPC_TRANSPORT_DISCONNECT, /* Connection is disconnected */
- RPC_TRANSPORT_CLEANUP, /* connection is about to be freed */
- /*RPC_TRANSPORT_READ,*/ /* An event used to enable rpcsvc to instruct
+ RPC_TRANSPORT_ACCEPT, /* New client has been accepted */
+ RPC_TRANSPORT_DISCONNECT, /* Connection is disconnected */
+ RPC_TRANSPORT_CLEANUP, /* connection is about to be freed */
+ /*RPC_TRANSPORT_READ,*/ /* An event used to enable rpcsvc to instruct
* transport the number of bytes to read.
* This helps in reading large msgs, wherein
* the rpc actors might decide to place the
@@ -92,54 +91,55 @@ typedef enum {
* reading a single msg, this event may be
* delivered more than once.
*/
- RPC_TRANSPORT_MAP_XID_REQUEST, /* receiver of this event should send
- * the prognum and procnum corresponding
- * to xid.
- */
- RPC_TRANSPORT_MSG_RECEIVED, /* Complete rpc msg has been read */
- RPC_TRANSPORT_CONNECT, /* client is connected to server */
- RPC_TRANSPORT_MSG_SENT,
+ RPC_TRANSPORT_MAP_XID_REQUEST, /* receiver of this event should send
+ * the prognum and procnum corresponding
+ * to xid.
+ */
+ RPC_TRANSPORT_MSG_RECEIVED, /* Complete rpc msg has been read */
+ RPC_TRANSPORT_CONNECT, /* client is connected to server */
+ RPC_TRANSPORT_MSG_SENT,
+ RPC_TRANSPORT_EVENT_THREAD_DIED /* event-thread has died */
} rpc_transport_event_t;
struct rpc_transport_msg {
- struct iovec *rpchdr;
- int rpchdrcount;
- struct iovec *proghdr;
- int proghdrcount;
- struct iovec *progpayload;
- int progpayloadcount;
- struct iobref *iobref;
+ struct iovec *rpchdr;
+ struct iovec *proghdr;
+ int rpchdrcount;
+ int proghdrcount;
+ struct iovec *progpayload;
+ struct iobref *iobref;
+ int progpayloadcount;
};
typedef struct rpc_transport_msg rpc_transport_msg_t;
struct rpc_transport_rsp {
- struct iovec *rsphdr;
- int rsphdr_count;
- struct iovec *rsp_payload;
- int rsp_payload_count;
- struct iobref *rsp_iobref;
+ struct iovec *rsphdr;
+ struct iovec *rsp_payload;
+ int rsphdr_count;
+ int rsp_payload_count;
+ struct iobref *rsp_iobref;
};
typedef struct rpc_transport_rsp rpc_transport_rsp_t;
struct rpc_transport_req {
- rpc_transport_msg_t msg;
- rpc_transport_rsp_t rsp;
- struct rpc_req *rpc_req;
+ struct rpc_req *rpc_req;
+ rpc_transport_msg_t msg;
+ rpc_transport_rsp_t rsp;
};
typedef struct rpc_transport_req rpc_transport_req_t;
struct rpc_transport_reply {
- rpc_transport_msg_t msg;
- void *private;
+ void *private;
+ rpc_transport_msg_t msg;
};
typedef struct rpc_transport_reply rpc_transport_reply_t;
struct rpc_transport_data {
- char is_request;
- union {
- rpc_transport_req_t req;
- rpc_transport_reply_t reply;
- } data;
+ union {
+ rpc_transport_req_t req;
+ rpc_transport_reply_t reply;
+ } data;
+ char is_request;
};
typedef struct rpc_transport_data rpc_transport_data_t;
@@ -147,154 +147,166 @@ typedef struct rpc_transport_data rpc_transport_data_t;
* rpc_request, hence these should be removed from request_info
*/
struct rpc_request_info {
- uint32_t xid;
- int prognum;
- int progver;
- int procnum;
- void *rpc_req; /* struct rpc_req */
- rpc_transport_rsp_t rsp;
+ int prognum;
+ int progver;
+ void *rpc_req; /* struct rpc_req */
+ rpc_transport_rsp_t rsp;
+ int procnum;
+ uint32_t xid;
};
typedef struct rpc_request_info rpc_request_info_t;
+typedef int (*rpc_transport_notify_t)(rpc_transport_t *, void *mydata,
+ rpc_transport_event_t, void *data, ...);
-struct rpc_transport_pollin {
- struct iovec vector[2];
- int count;
- char vectored;
- void *private;
- struct iobref *iobref;
- struct iobuf *hdr_iobuf;
- char is_reply;
+struct rpc_transport {
+ struct rpc_transport_ops *ops;
+ rpc_transport_t *listener; /* listener transport to which
+ * request for creation of this
+ * transport came from. valid only
+ * on server process.
+ */
+
+ void *private;
+ struct _client *xl_private;
+ void *xl; /* Used for THIS */
+ void *mydata;
+ pthread_mutex_t lock;
+ gf_atomic_t refcount;
+ glusterfs_ctx_t *ctx;
+ dict_t *options;
+ char *name;
+ void *dnscache;
+ void *drc_client;
+ data_t *buf;
+ int32_t (*init)(rpc_transport_t *this);
+ void (*fini)(rpc_transport_t *this);
+ int (*reconfigure)(rpc_transport_t *this, dict_t *options);
+ rpc_transport_notify_t notify;
+ void *notify_data;
+ peer_info_t peerinfo;
+ peer_info_t myinfo;
+
+ uint64_t total_bytes_read;
+ uint64_t total_bytes_write;
+ uint32_t xid; /* RPC/XID used for callbacks */
+ int32_t outstanding_rpc_count;
+
+ struct list_head list;
+ void *dl_handle; /* handle of dlopen() */
+ char *ssl_name;
+ dict_t *clnt_options; /* store options received from
+ * client */
+ gf_atomic_t disconnect_progress;
+ int bind_insecure;
+ /* connect_failed: saves the connect() syscall status as socket_t
+ * member holding connect() status can't be accessed by higher gfapi
+ * layer or in client management notification handler functions
+ */
+ gf_boolean_t connect_failed;
+ char notify_poller_death;
+ char poller_death_accept;
};
-typedef struct rpc_transport_pollin rpc_transport_pollin_t;
-
-typedef int (*rpc_transport_notify_t) (rpc_transport_t *, void *mydata,
- rpc_transport_event_t, void *data, ...);
-
-struct rpc_transport {
- struct rpc_transport_ops *ops;
- rpc_transport_t *listener; /* listener transport to which
- * request for creation of this
- * transport came from. valid only
- * on server process.
- */
-
- void *private;
- void *xl_private;
- void *xl; /* Used for THIS */
- void *mydata;
- pthread_mutex_t lock;
- int32_t refcount;
-
- glusterfs_ctx_t *ctx;
- dict_t *options;
- char *name;
- void *dnscache;
- data_t *buf;
- int32_t (*init) (rpc_transport_t *this);
- void (*fini) (rpc_transport_t *this);
- int (*reconfigure) (rpc_transport_t *this, dict_t *options);
- rpc_transport_notify_t notify;
- void *notify_data;
- peer_info_t peerinfo;
- peer_info_t myinfo;
-
- uint64_t total_bytes_read;
- uint64_t total_bytes_write;
-
- struct list_head list;
- int bind_insecure;
+struct rpc_transport_pollin {
+ struct rpc_transport *trans;
+ void *private;
+ struct iobref *iobref;
+ struct iovec vector[MAX_IOVEC];
+ gf_async_t async;
+ int count;
+ char is_reply;
+ char vectored;
};
+typedef struct rpc_transport_pollin rpc_transport_pollin_t;
struct rpc_transport_ops {
- /* no need of receive op, msg will be delivered through an event
- * notification
- */
- int32_t (*submit_request) (rpc_transport_t *this,
- rpc_transport_req_t *req);
- int32_t (*submit_reply) (rpc_transport_t *this,
- rpc_transport_reply_t *reply);
- int32_t (*connect) (rpc_transport_t *this, int port);
- int32_t (*listen) (rpc_transport_t *this);
- int32_t (*disconnect) (rpc_transport_t *this);
- int32_t (*get_peername) (rpc_transport_t *this, char *hostname,
- int hostlen);
- int32_t (*get_peeraddr) (rpc_transport_t *this, char *peeraddr,
- int addrlen, struct sockaddr_storage *sa,
- socklen_t sasize);
- int32_t (*get_myname) (rpc_transport_t *this, char *hostname,
- int hostlen);
- int32_t (*get_myaddr) (rpc_transport_t *this, char *peeraddr,
- int addrlen, struct sockaddr_storage *sa,
- socklen_t sasize);
+ /* no need of receive op, msg will be delivered through an event
+ * notification
+ */
+ int32_t (*submit_request)(rpc_transport_t *this, rpc_transport_req_t *req);
+ int32_t (*submit_reply)(rpc_transport_t *this,
+ rpc_transport_reply_t *reply);
+ int32_t (*connect)(rpc_transport_t *this, int port);
+ int32_t (*listen)(rpc_transport_t *this);
+ int32_t (*disconnect)(rpc_transport_t *this, gf_boolean_t wait);
+ int32_t (*get_peername)(rpc_transport_t *this, char *hostname, int hostlen);
+ int32_t (*get_peeraddr)(rpc_transport_t *this, char *peeraddr, int addrlen,
+ struct sockaddr_storage *sa, socklen_t sasize);
+ int32_t (*get_myname)(rpc_transport_t *this, char *hostname, int hostlen);
+ int32_t (*get_myaddr)(rpc_transport_t *this, char *peeraddr, int addrlen,
+ struct sockaddr_storage *sa, socklen_t sasize);
+ int32_t (*throttle)(rpc_transport_t *this, gf_boolean_t onoff);
};
-
int32_t
-rpc_transport_listen (rpc_transport_t *this);
+rpc_transport_count(const char *transport_type);
int32_t
-rpc_transport_connect (rpc_transport_t *this, int port);
+rpc_transport_listen(rpc_transport_t *this);
int32_t
-rpc_transport_disconnect (rpc_transport_t *this);
+rpc_transport_connect(rpc_transport_t *this, int port);
int32_t
-rpc_transport_destroy (rpc_transport_t *this);
+rpc_transport_disconnect(rpc_transport_t *this, gf_boolean_t wait);
int32_t
-rpc_transport_notify (rpc_transport_t *this, rpc_transport_event_t event,
- void *data, ...);
+rpc_transport_notify(rpc_transport_t *this, rpc_transport_event_t event,
+ void *data, ...);
int32_t
-rpc_transport_submit_request (rpc_transport_t *this, rpc_transport_req_t *req);
+rpc_transport_submit_request(rpc_transport_t *this, rpc_transport_req_t *req);
int32_t
-rpc_transport_submit_reply (rpc_transport_t *this,
- rpc_transport_reply_t *reply);
+rpc_transport_submit_reply(rpc_transport_t *this, rpc_transport_reply_t *reply);
rpc_transport_t *
-rpc_transport_load (glusterfs_ctx_t *ctx, dict_t *options, char *name);
+rpc_transport_load(glusterfs_ctx_t *ctx, dict_t *options, char *name);
rpc_transport_t *
-rpc_transport_ref (rpc_transport_t *trans);
+rpc_transport_ref(rpc_transport_t *trans);
int32_t
-rpc_transport_unref (rpc_transport_t *trans);
-
-int
-rpc_transport_register_notify (rpc_transport_t *trans, rpc_transport_notify_t,
- void *mydata);
+rpc_transport_unref(rpc_transport_t *trans);
int
-rpc_transport_unregister_notify (rpc_transport_t *trans);
+rpc_transport_register_notify(rpc_transport_t *trans, rpc_transport_notify_t,
+ void *mydata);
int32_t
-rpc_transport_get_peername (rpc_transport_t *this, char *hostname, int hostlen);
+rpc_transport_get_peername(rpc_transport_t *this, char *hostname, int hostlen);
int32_t
-rpc_transport_get_peeraddr (rpc_transport_t *this, char *peeraddr, int addrlen,
- struct sockaddr_storage *sa, size_t salen);
+rpc_transport_get_peeraddr(rpc_transport_t *this, char *peeraddr, int addrlen,
+ struct sockaddr_storage *sa, size_t salen);
int32_t
-rpc_transport_get_myname (rpc_transport_t *this, char *hostname, int hostlen);
+rpc_transport_get_myaddr(rpc_transport_t *this, char *peeraddr, int addrlen,
+ struct sockaddr_storage *sa, size_t salen);
-int32_t
-rpc_transport_get_myaddr (rpc_transport_t *this, char *peeraddr, int addrlen,
- struct sockaddr_storage *sa, size_t salen);
+int
+rpc_transport_throttle(rpc_transport_t *this, gf_boolean_t onoff);
rpc_transport_pollin_t *
-rpc_transport_pollin_alloc (rpc_transport_t *this, struct iovec *vector,
- int count, struct iobuf *hdr_iobuf,
- struct iobref *iobref, void *private);
+rpc_transport_pollin_alloc(rpc_transport_t *this, struct iovec *vector,
+ int count, struct iobuf *hdr_iobuf,
+ struct iobref *iobref, void *private);
void
-rpc_transport_pollin_destroy (rpc_transport_pollin_t *pollin);
+rpc_transport_pollin_destroy(rpc_transport_pollin_t *pollin);
+
+int
+rpc_transport_keepalive_options_set(dict_t *options, int32_t interval,
+ int32_t time, int32_t timeout);
int
-rpc_transport_keepalive_options_set (dict_t *options, int32_t interval,
- int32_t time);
+rpc_transport_unix_options_build(dict_t *options, char *filepath,
+ int frame_timeout);
int
-rpc_transport_inet_options_build (dict_t **options, const char *hostname, int port);
+rpc_transport_inet_options_build(dict_t *options, const char *hostname,
+ int port, char *af);
+
+void
+rpc_transport_cleanup(rpc_transport_t *);
#endif /* __RPC_TRANSPORT_H__ */
diff --git a/rpc/rpc-lib/src/rpcsvc-auth.c b/rpc/rpc-lib/src/rpcsvc-auth.c
index 3a46cc498e5..8e76b4188bb 100644
--- a/rpc/rpc-lib/src/rpcsvc-auth.c
+++ b/rpc/rpc-lib/src/rpcsvc-auth.c
@@ -9,447 +9,553 @@
*/
#include "rpcsvc.h"
-#include "logging.h"
-#include "dict.h"
+#include <glusterfs/dict.h>
extern rpcsvc_auth_t *
-rpcsvc_auth_null_init (rpcsvc_t *svc, dict_t *options);
+rpcsvc_auth_null_init(rpcsvc_t *svc, dict_t *options);
extern rpcsvc_auth_t *
-rpcsvc_auth_unix_init (rpcsvc_t *svc, dict_t *options);
+rpcsvc_auth_unix_init(rpcsvc_t *svc, dict_t *options);
extern rpcsvc_auth_t *
-rpcsvc_auth_glusterfs_init (rpcsvc_t *svc, dict_t *options);
+rpcsvc_auth_glusterfs_init(rpcsvc_t *svc, dict_t *options);
extern rpcsvc_auth_t *
-rpcsvc_auth_glusterfs_v2_init (rpcsvc_t *svc, dict_t *options);
+rpcsvc_auth_glusterfs_v2_init(rpcsvc_t *svc, dict_t *options);
+extern rpcsvc_auth_t *
+rpcsvc_auth_glusterfs_v3_init(rpcsvc_t *svc, dict_t *options);
int
-rpcsvc_auth_add_initer (struct list_head *list, char *idfier,
- rpcsvc_auth_initer_t init)
+rpcsvc_auth_add_initer(struct list_head *list, char *idfier,
+ rpcsvc_auth_initer_t init)
{
- struct rpcsvc_auth_list *new = NULL;
+ struct rpcsvc_auth_list *new = NULL;
- if ((!list) || (!init) || (!idfier))
- return -1;
+ if ((!list) || (!init) || (!idfier))
+ return -1;
- new = GF_CALLOC (1, sizeof (*new), gf_common_mt_rpcsvc_auth_list);
- if (!new) {
- return -1;
- }
+ new = GF_CALLOC(1, sizeof(*new), gf_common_mt_rpcsvc_auth_list);
+ if (!new) {
+ return -1;
+ }
- new->init = init;
- strcpy (new->name, idfier);
- INIT_LIST_HEAD (&new->authlist);
- list_add_tail (&new->authlist, list);
- return 0;
+ new->init = init;
+ strncpy(new->name, idfier, sizeof(new->name) - 1);
+ INIT_LIST_HEAD(&new->authlist);
+ list_add_tail(&new->authlist, list);
+ return 0;
}
-
-
int
-rpcsvc_auth_add_initers (rpcsvc_t *svc)
+rpcsvc_auth_add_initers(rpcsvc_t *svc)
{
- int ret = -1;
-
- ret = rpcsvc_auth_add_initer (&svc->authschemes, "auth-glusterfs",
- (rpcsvc_auth_initer_t)
- rpcsvc_auth_glusterfs_init);
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to add AUTH_GLUSTERFS");
- goto err;
- }
-
-
- ret = rpcsvc_auth_add_initer (&svc->authschemes, "auth-glusterfs-v2",
- (rpcsvc_auth_initer_t)
- rpcsvc_auth_glusterfs_v2_init);
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR,
- "Failed to add AUTH_GLUSTERFS-v2");
- goto err;
- }
-
- ret = rpcsvc_auth_add_initer (&svc->authschemes, "auth-unix",
- (rpcsvc_auth_initer_t)
- rpcsvc_auth_unix_init);
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to add AUTH_UNIX");
- goto err;
- }
-
- ret = rpcsvc_auth_add_initer (&svc->authschemes, "auth-null",
- (rpcsvc_auth_initer_t)
- rpcsvc_auth_null_init);
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to add AUTH_NULL");
- goto err;
- }
-
- ret = 0;
+ int ret = -1;
+
+ ret = rpcsvc_auth_add_initer(
+ &svc->authschemes, "auth-glusterfs",
+ (rpcsvc_auth_initer_t)rpcsvc_auth_glusterfs_init);
+ if (ret == -1) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "Failed to add AUTH_GLUSTERFS");
+ goto err;
+ }
+
+ ret = rpcsvc_auth_add_initer(
+ &svc->authschemes, "auth-glusterfs-v2",
+ (rpcsvc_auth_initer_t)rpcsvc_auth_glusterfs_v2_init);
+ if (ret == -1) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "Failed to add AUTH_GLUSTERFS-v2");
+ goto err;
+ }
+
+ ret = rpcsvc_auth_add_initer(
+ &svc->authschemes, "auth-glusterfs-v3",
+ (rpcsvc_auth_initer_t)rpcsvc_auth_glusterfs_v3_init);
+ if (ret == -1) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "Failed to add AUTH_GLUSTERFS-v3");
+ goto err;
+ }
+
+ ret = rpcsvc_auth_add_initer(&svc->authschemes, "auth-unix",
+ (rpcsvc_auth_initer_t)rpcsvc_auth_unix_init);
+ if (ret == -1) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "Failed to add AUTH_UNIX");
+ goto err;
+ }
+
+ ret = rpcsvc_auth_add_initer(&svc->authschemes, "auth-null",
+ (rpcsvc_auth_initer_t)rpcsvc_auth_null_init);
+ if (ret == -1) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "Failed to add AUTH_NULL");
+ goto err;
+ }
+
+ ret = 0;
err:
- return ret;
+ return ret;
}
-
int
-rpcsvc_auth_init_auth (rpcsvc_t *svc, dict_t *options,
- struct rpcsvc_auth_list *authitem)
+rpcsvc_auth_init_auth(rpcsvc_t *svc, dict_t *options,
+ struct rpcsvc_auth_list *authitem)
{
- int ret = -1;
-
- if ((!svc) || (!options) || (!authitem))
- return -1;
-
- if (!authitem->init) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "No init function defined");
- ret = -1;
- goto err;
- }
+ int ret = -1;
+
+ if ((!svc) || (!options) || (!authitem))
+ return -1;
+
+ if (!authitem->init) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "No init function defined");
+ ret = -1;
+ goto err;
+ }
+
+ authitem->auth = authitem->init(svc, options);
+ if (!authitem->auth) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR,
+ "Registration of auth failed:"
+ " %s",
+ authitem->name);
+ ret = -1;
+ goto err;
+ }
+
+ authitem->enable = 1;
+ gf_log(GF_RPCSVC, GF_LOG_TRACE, "Authentication enabled: %s",
+ authitem->auth->authname);
+
+ ret = 0;
+err:
+ return ret;
+}
- authitem->auth = authitem->init (svc, options);
- if (!authitem->auth) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Registration of auth failed:"
- " %s", authitem->name);
- ret = -1;
- goto err;
- }
+int
+rpcsvc_auth_init_auths(rpcsvc_t *svc, dict_t *options)
+{
+ int ret = -1;
+ struct rpcsvc_auth_list *auth = NULL;
+ struct rpcsvc_auth_list *tmp = NULL;
- authitem->enable = 1;
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "Authentication enabled: %s",
- authitem->auth->authname);
+ if (!svc)
+ return -1;
+ if (list_empty(&svc->authschemes)) {
+ gf_log(GF_RPCSVC, GF_LOG_WARNING, "No authentication!");
ret = 0;
+ goto err;
+ }
+
+ /* If auth null and sys are not disabled by the user, we must enable
+ * it by default. This is a globally default rule, the user is still
+ * allowed to disable the two for particular subvolumes.
+ */
+ if (!dict_get(options, "rpc-auth.auth-null")) {
+ ret = dict_set_str(options, "rpc-auth.auth-null", "on");
+ if (ret)
+ gf_log("rpc-auth", GF_LOG_DEBUG, "dict_set failed for 'auth-nill'");
+ }
+
+ if (!dict_get(options, "rpc-auth.auth-unix")) {
+ ret = dict_set_str(options, "rpc-auth.auth-unix", "on");
+ if (ret)
+ gf_log("rpc-auth", GF_LOG_DEBUG, "dict_set failed for 'auth-unix'");
+ }
+
+ if (!dict_get(options, "rpc-auth.auth-glusterfs")) {
+ ret = dict_set_str(options, "rpc-auth.auth-glusterfs", "on");
+ if (ret)
+ gf_log("rpc-auth", GF_LOG_DEBUG, "dict_set failed for 'auth-unix'");
+ }
+
+ list_for_each_entry_safe(auth, tmp, &svc->authschemes, authlist)
+ {
+ ret = rpcsvc_auth_init_auth(svc, options, auth);
+ if (ret == -1)
+ goto err;
+ }
+
+ ret = 0;
err:
- return ret;
+ return ret;
}
-
int
-rpcsvc_auth_init_auths (rpcsvc_t *svc, dict_t *options)
+rpcsvc_set_addr_namelookup(rpcsvc_t *svc, dict_t *options)
{
- int ret = -1;
- struct rpcsvc_auth_list *auth = NULL;
- struct rpcsvc_auth_list *tmp = NULL;
-
- if (!svc)
- return -1;
-
- if (list_empty (&svc->authschemes)) {
- gf_log (GF_RPCSVC, GF_LOG_WARNING, "No authentication!");
- ret = 0;
- goto err;
- }
-
- /* If auth null and sys are not disabled by the user, we must enable
- * it by default. This is a globally default rule, the user is still
- * allowed to disable the two for particular subvolumes.
- */
- if (!dict_get (options, "rpc-auth.auth-null")) {
- ret = dict_set_str (options, "rpc-auth.auth-null", "on");
- if (ret)
- gf_log ("rpc-auth", GF_LOG_DEBUG,
- "dict_set failed for 'auth-nill'");
- }
+ int ret;
+ static char *addrlookup_key = "rpc-auth.addr.namelookup";
- if (!dict_get (options, "rpc-auth.auth-unix")) {
- ret = dict_set_str (options, "rpc-auth.auth-unix", "on");
- if (ret)
- gf_log ("rpc-auth", GF_LOG_DEBUG,
- "dict_set failed for 'auth-unix'");
- }
-
- if (!dict_get (options, "rpc-auth.auth-glusterfs")) {
- ret = dict_set_str (options, "rpc-auth.auth-glusterfs", "on");
- if (ret)
- gf_log ("rpc-auth", GF_LOG_DEBUG,
- "dict_set failed for 'auth-unix'");
- }
+ if (!svc || !options)
+ return (-1);
- list_for_each_entry_safe (auth, tmp, &svc->authschemes, authlist) {
- ret = rpcsvc_auth_init_auth (svc, options, auth);
- if (ret == -1)
- goto err;
- }
+ /* By default it's disabled */
+ ret = dict_get_str_boolean(options, addrlookup_key, _gf_false);
+ if (ret < 0) {
+ svc->addr_namelookup = _gf_false;
+ } else {
+ svc->addr_namelookup = ret;
+ }
- ret = 0;
-err:
- return ret;
+ if (svc->addr_namelookup)
+ gf_log(GF_RPCSVC, GF_LOG_DEBUG, "Addr-Name lookup enabled");
+ return (0);
}
int
-rpcsvc_set_allow_insecure (rpcsvc_t *svc, dict_t *options)
+rpcsvc_set_allow_insecure(rpcsvc_t *svc, dict_t *options)
{
- int ret = -1;
- char *allow_insecure_str = NULL;
- gf_boolean_t is_allow_insecure = _gf_false;
+ int ret = -1;
+ char *allow_insecure_str = NULL;
+ gf_boolean_t is_allow_insecure = _gf_false;
- GF_ASSERT (svc);
- GF_ASSERT (options);
+ GF_ASSERT(svc);
+ GF_ASSERT(options);
- ret = dict_get_str (options, "rpc-auth-allow-insecure",
- &allow_insecure_str);
+ ret = dict_get_str(options, "rpc-auth-allow-insecure", &allow_insecure_str);
+ if (0 == ret) {
+ ret = gf_string2boolean(allow_insecure_str, &is_allow_insecure);
if (0 == ret) {
- ret = gf_string2boolean (allow_insecure_str,
- &is_allow_insecure);
- if (0 == ret) {
- if (_gf_true == is_allow_insecure)
- svc->allow_insecure = 1;
- else
- svc->allow_insecure = 0;
- }
+ if (_gf_true == is_allow_insecure)
+ svc->allow_insecure = 1;
+ else
+ svc->allow_insecure = 0;
}
+ } else {
+ /* By default set allow-insecure to true */
+ svc->allow_insecure = 1;
+
+ /* setting in options for the sake of functions that look
+ * configuration params for allow insecure, eg: gf_auth
+ */
+ ret = dict_set_str(options, "rpc-auth-allow-insecure", "on");
+ if (ret < 0)
+ gf_log("rpc-auth", GF_LOG_DEBUG,
+ "dict_set failed for 'allow-insecure'");
+ }
- return 0;
+ return ret;
}
int
-rpcsvc_auth_init (rpcsvc_t *svc, dict_t *options)
+rpcsvc_set_root_squash(rpcsvc_t *svc, dict_t *options)
{
- int ret = -1;
-
- if ((!svc) || (!options))
- return -1;
+ int ret = -1;
+ uid_t anonuid = -1;
+ gid_t anongid = -1;
+
+ GF_ASSERT(svc);
+ GF_ASSERT(options);
+
+ ret = dict_get_str_boolean(options, "root-squash", 0);
+ if (ret != -1)
+ svc->root_squash = ret;
+ else
+ svc->root_squash = _gf_false;
+
+ ret = dict_get_uint32(options, "anonuid", &anonuid);
+ if (!ret)
+ svc->anonuid = anonuid;
+ else
+ svc->anonuid = RPC_NOBODY_UID;
+
+ ret = dict_get_uint32(options, "anongid", &anongid);
+ if (!ret)
+ svc->anongid = anongid;
+ else
+ svc->anongid = RPC_NOBODY_GID;
+
+ if (svc->root_squash)
+ gf_log(GF_RPCSVC, GF_LOG_DEBUG,
+ "root squashing enabled "
+ "(uid=%d, gid=%d)",
+ svc->anonuid, svc->anongid);
+
+ return 0;
+}
- (void) rpcsvc_set_allow_insecure (svc, options);
- ret = rpcsvc_auth_add_initers (svc);
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to add initers");
- goto out;
- }
+int
+rpcsvc_set_all_squash(rpcsvc_t *svc, dict_t *options)
+{
+ int ret = -1;
+
+ uid_t anonuid = -1;
+ gid_t anongid = -1;
+
+ GF_ASSERT(svc);
+ GF_ASSERT(options);
+
+ ret = dict_get_str_boolean(options, "all-squash", 0);
+ if (ret != -1)
+ svc->all_squash = ret;
+ else
+ svc->all_squash = _gf_false;
+
+ ret = dict_get_uint32(options, "anonuid", &anonuid);
+ if (!ret)
+ svc->anonuid = anonuid;
+ else
+ svc->anonuid = RPC_NOBODY_UID;
+
+ ret = dict_get_uint32(options, "anongid", &anongid);
+ if (!ret)
+ svc->anongid = anongid;
+ else
+ svc->anongid = RPC_NOBODY_GID;
+
+ if (svc->all_squash)
+ gf_log(GF_RPCSVC, GF_LOG_DEBUG,
+ "all squashing enabled "
+ "(uid=%d, gid=%d)",
+ svc->anonuid, svc->anongid);
+
+ return 0;
+}
- ret = rpcsvc_auth_init_auths (svc, options);
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to init auth schemes");
- goto out;
- }
+int
+rpcsvc_auth_init(rpcsvc_t *svc, dict_t *options)
+{
+ int ret = -1;
+
+ if ((!svc) || (!options))
+ return -1;
+
+ (void)rpcsvc_set_allow_insecure(svc, options);
+ (void)rpcsvc_set_root_squash(svc, options);
+ (void)rpcsvc_set_all_squash(svc, options);
+ (void)rpcsvc_set_addr_namelookup(svc, options);
+ ret = rpcsvc_auth_add_initers(svc);
+ if (ret == -1) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "Failed to add initers");
+ goto out;
+ }
+
+ ret = rpcsvc_auth_init_auths(svc, options);
+ if (ret == -1) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "Failed to init auth schemes");
+ goto out;
+ }
out:
- return ret;
+ return ret;
}
-
-rpcsvc_auth_t *
-__rpcsvc_auth_get_handler (rpcsvc_request_t *req)
+int
+rpcsvc_auth_reconf(rpcsvc_t *svc, dict_t *options)
{
- struct rpcsvc_auth_list *auth = NULL;
- struct rpcsvc_auth_list *tmp = NULL;
- rpcsvc_t *svc = NULL;
+ int ret = 0;
- if (!req)
- return NULL;
+ if ((!svc) || (!options))
+ return (-1);
- svc = req->svc;
- if (!svc) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "!svc");
- goto err;
- }
+ ret = rpcsvc_set_allow_insecure(svc, options);
+ if (ret)
+ return (-1);
- if (list_empty (&svc->authschemes)) {
- gf_log (GF_RPCSVC, GF_LOG_WARNING, "No authentication!");
- goto err;
- }
+ ret = rpcsvc_set_root_squash(svc, options);
+ if (ret)
+ return (-1);
- list_for_each_entry_safe (auth, tmp, &svc->authschemes, authlist) {
- if (!auth->enable)
- continue;
- if (auth->auth->authnum == req->cred.flavour)
- goto err;
+ ret = rpcsvc_set_all_squash(svc, options);
+ if (ret)
+ return (-1);
- }
+ return rpcsvc_set_addr_namelookup(svc, options);
+}
- auth = NULL;
+rpcsvc_auth_t *
+__rpcsvc_auth_get_handler(rpcsvc_request_t *req)
+{
+ struct rpcsvc_auth_list *auth = NULL;
+ struct rpcsvc_auth_list *tmp = NULL;
+ rpcsvc_t *svc = NULL;
+
+ if (!req)
+ return NULL;
+
+ svc = req->svc;
+ if (!svc) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "!svc");
+ goto err;
+ }
+
+ if (list_empty(&svc->authschemes)) {
+ gf_log(GF_RPCSVC, GF_LOG_WARNING, "No authentication!");
+ goto err;
+ }
+
+ list_for_each_entry_safe(auth, tmp, &svc->authschemes, authlist)
+ {
+ if (!auth->enable)
+ continue;
+ if (auth->auth->authnum == req->cred.flavour)
+ goto err;
+ }
+
+ auth = NULL;
err:
- if (auth)
- return auth->auth;
- else
- return NULL;
+ if (auth)
+ return auth->auth;
+ else
+ return NULL;
}
rpcsvc_auth_t *
-rpcsvc_auth_get_handler (rpcsvc_request_t *req)
+rpcsvc_auth_get_handler(rpcsvc_request_t *req)
{
- rpcsvc_auth_t *auth = NULL;
+ rpcsvc_auth_t *auth = NULL;
- auth = __rpcsvc_auth_get_handler (req);
- if (auth)
- goto ret;
+ auth = __rpcsvc_auth_get_handler(req);
+ if (auth)
+ goto ret;
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "No auth handler: %d",
- req->cred.flavour);
+ gf_log(GF_RPCSVC, GF_LOG_TRACE, "No auth handler: %d", req->cred.flavour);
- /* The requested scheme was not available so fall back the to one
- * scheme that will always be present.
- */
- req->cred.flavour = AUTH_NULL;
- req->verf.flavour = AUTH_NULL;
- auth = __rpcsvc_auth_get_handler (req);
+ /* The requested scheme was not available so fall back the to one
+ * scheme that will always be present.
+ */
+ req->cred.flavour = AUTH_NULL;
+ req->verf.flavour = AUTH_NULL;
+ auth = __rpcsvc_auth_get_handler(req);
ret:
- return auth;
+ return auth;
}
-
int
-rpcsvc_auth_request_init (rpcsvc_request_t *req)
+rpcsvc_auth_request_init(rpcsvc_request_t *req, struct rpc_msg *callmsg)
{
- int ret = -1;
- rpcsvc_auth_t *auth = NULL;
-
- if (!req)
- return -1;
-
- auth = rpcsvc_auth_get_handler (req);
- if (!auth)
- goto err;
- ret = 0;
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "Auth handler: %s", auth->authname);
- if (!auth->authops->request_init)
- ret = auth->authops->request_init (req, auth->authprivate);
-
+ int32_t ret = 0;
+ rpcsvc_auth_t *auth = NULL;
+
+ if (!req || !callmsg) {
+ ret = -1;
+ goto err;
+ }
+
+ req->cred.flavour = rpc_call_cred_flavour(callmsg);
+ req->cred.datalen = rpc_call_cred_len(callmsg);
+ req->verf.flavour = rpc_call_verf_flavour(callmsg);
+ req->verf.datalen = rpc_call_verf_len(callmsg);
+
+ auth = rpcsvc_auth_get_handler(req);
+ if (!auth) {
+ ret = -1;
+ goto err;
+ }
+
+ gf_log(GF_RPCSVC, GF_LOG_TRACE, "Auth handler: %s", auth->authname);
+
+ if (auth->authops->request_init)
+ ret = auth->authops->request_init(req, auth->authprivate);
+
+ /* reset to auxgidlarge during
+ unsersialize if necessary */
+ req->auxgids = req->auxgidsmall;
+ req->auxgidlarge = NULL;
err:
- return ret;
+ return ret;
}
-
int
-rpcsvc_authenticate (rpcsvc_request_t *req)
+rpcsvc_authenticate(rpcsvc_request_t *req)
{
- int ret = RPCSVC_AUTH_REJECT;
- rpcsvc_auth_t *auth = NULL;
- int minauth = 0;
-
- if (!req)
- return ret;
-
- /* FIXME use rpcsvc_request_prog_minauth() */
- minauth = 0;
- if (minauth > rpcsvc_request_cred_flavour (req)) {
- gf_log (GF_RPCSVC, GF_LOG_WARNING, "Auth too weak");
- rpcsvc_request_set_autherr (req, AUTH_TOOWEAK);
- goto err;
- }
+ int ret = RPCSVC_AUTH_REJECT;
+ rpcsvc_auth_t *auth = NULL;
+ int minauth = 0;
- auth = rpcsvc_auth_get_handler (req);
- if (!auth) {
- gf_log (GF_RPCSVC, GF_LOG_WARNING, "No auth handler found");
- goto err;
- }
+ if (!req)
+ return ret;
+
+ /* FIXME use rpcsvc_request_prog_minauth() */
+ minauth = 0;
+ if (minauth > rpcsvc_request_cred_flavour(req)) {
+ gf_log(GF_RPCSVC, GF_LOG_WARNING, "Auth too weak");
+ rpcsvc_request_set_autherr(req, AUTH_TOOWEAK);
+ goto err;
+ }
+
+ auth = rpcsvc_auth_get_handler(req);
+ if (!auth) {
+ gf_log(GF_RPCSVC, GF_LOG_WARNING, "No auth handler found");
+ goto err;
+ }
- if (auth->authops->authenticate)
- ret = auth->authops->authenticate (req, auth->authprivate);
+ if (auth->authops->authenticate)
+ ret = auth->authops->authenticate(req, auth->authprivate);
err:
- return ret;
+ return ret;
}
int
-rpcsvc_auth_array (rpcsvc_t *svc, char *volname, int *autharr, int arrlen)
+rpcsvc_auth_array(rpcsvc_t *svc, char *volname, int *autharr, int arrlen)
{
- int count = 0;
- int gen = RPCSVC_AUTH_REJECT;
- int spec = RPCSVC_AUTH_REJECT;
- int final = RPCSVC_AUTH_REJECT;
- char *srchstr = NULL;
- char *valstr = NULL;
- gf_boolean_t boolval = _gf_false;
- int ret = 0;
-
- struct rpcsvc_auth_list *auth = NULL;
- struct rpcsvc_auth_list *tmp = NULL;
-
- if ((!svc) || (!autharr) || (!volname))
- return -1;
-
- memset (autharr, 0, arrlen * sizeof(int));
- if (list_empty (&svc->authschemes)) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "No authentication!");
- goto err;
+ int count = 0;
+ int result = RPCSVC_AUTH_REJECT;
+ char *srchstr = NULL;
+ int ret = 0;
+
+ struct rpcsvc_auth_list *auth = NULL;
+ struct rpcsvc_auth_list *tmp = NULL;
+
+ if ((!svc) || (!autharr) || (!volname))
+ return -1;
+
+ memset(autharr, 0, arrlen * sizeof(int));
+ if (list_empty(&svc->authschemes)) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "No authentication!");
+ goto err;
+ }
+
+ list_for_each_entry_safe(auth, tmp, &svc->authschemes, authlist)
+ {
+ if (count >= arrlen)
+ break;
+
+ result = gf_asprintf(&srchstr, "rpc-auth.%s.%s", auth->name, volname);
+ if (result == -1) {
+ count = -1;
+ goto err;
}
- list_for_each_entry_safe (auth, tmp, &svc->authschemes, authlist) {
- if (count >= arrlen)
- break;
-
- gen = gf_asprintf (&srchstr, "rpc-auth.%s", auth->name);
- if (gen == -1) {
- count = -1;
- goto err;
- }
-
- gen = RPCSVC_AUTH_REJECT;
- if (dict_get (svc->options, srchstr)) {
- ret = dict_get_str (svc->options, srchstr, &valstr);
- if (ret == 0) {
- ret = gf_string2boolean (valstr, &boolval);
- if (ret == 0) {
- if (boolval == _gf_true)
- gen = RPCSVC_AUTH_ACCEPT;
- } else
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Faile"
- "d to read auth val");
- } else
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Faile"
- "d to read auth val");
- }
-
- GF_FREE (srchstr);
- spec = gf_asprintf (&srchstr, "rpc-auth.%s.%s", auth->name,
- volname);
- if (spec == -1) {
- count = -1;
- goto err;
- }
-
- spec = RPCSVC_AUTH_DONTCARE;
- if (dict_get (svc->options, srchstr)) {
- ret = dict_get_str (svc->options, srchstr, &valstr);
- if (ret == 0) {
- ret = gf_string2boolean (valstr, &boolval);
- if (ret == 0) {
- if (boolval == _gf_true)
- spec = RPCSVC_AUTH_ACCEPT;
- else
- spec = RPCSVC_AUTH_REJECT;
- } else
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Faile"
- "d to read auth val");
- } else
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Faile"
- "d to read auth val");
- }
-
- GF_FREE (srchstr);
- final = rpcsvc_combine_gen_spec_volume_checks (gen, spec);
- if (final == RPCSVC_AUTH_ACCEPT) {
- autharr[count] = auth->auth->authnum;
- ++count;
- }
+ ret = dict_get_str_boolean(svc->options, srchstr, 0xC00FFEE);
+ GF_FREE(srchstr);
+
+ switch (ret) {
+ case _gf_true:
+ autharr[count] = auth->auth->authnum;
+ ++count;
+ break;
+
+ default:
+ /* nothing to do */
+ break;
}
+ }
err:
- return count;
+ return count;
}
gid_t *
-rpcsvc_auth_unix_auxgids (rpcsvc_request_t *req, int *arrlen)
+rpcsvc_auth_unix_auxgids(rpcsvc_request_t *req, int *arrlen)
{
- if ((!req) || (!arrlen))
- return NULL;
+ if ((!req) || (!arrlen))
+ return NULL;
- /* In case of AUTH_NULL auxgids are not used */
- switch (req->cred.flavour) {
+ /* In case of AUTH_NULL auxgids are not used */
+ switch (req->cred.flavour) {
case AUTH_UNIX:
case AUTH_GLUSTERFS:
case AUTH_GLUSTERFS_v2:
- break;
+ case AUTH_GLUSTERFS_v3:
+ break;
default:
- gf_log ("rpc", GF_LOG_DEBUG, "auth type not unix or glusterfs");
- return NULL;
- }
+ gf_log("rpc", GF_LOG_DEBUG, "auth type not unix or glusterfs");
+ return NULL;
+ }
- *arrlen = req->auxgidcount;
- if (*arrlen == 0)
- return NULL;
+ *arrlen = req->auxgidcount;
+ if (*arrlen == 0)
+ return NULL;
- return &req->auxgids[0];
+ return &req->auxgids[0];
}
diff --git a/rpc/rpc-lib/src/rpcsvc-common.h b/rpc/rpc-lib/src/rpcsvc-common.h
index 81f79811612..6c4ec49a6ef 100644
--- a/rpc/rpc-lib/src/rpcsvc-common.h
+++ b/rpc/rpc-lib/src/rpcsvc-common.h
@@ -12,62 +12,102 @@
#define _RPCSVC_COMMON_H
#include <pthread.h>
-#include "list.h"
-#include "compat.h"
-#include "glusterfs.h"
-#include "dict.h"
+#include <glusterfs/compat.h>
+#include <glusterfs/dict.h>
typedef enum {
- RPCSVC_EVENT_ACCEPT,
- RPCSVC_EVENT_DISCONNECT,
- RPCSVC_EVENT_TRANSPORT_DESTROY,
- RPCSVC_EVENT_LISTENER_DEAD,
+ RPCSVC_EVENT_ACCEPT,
+ RPCSVC_EVENT_DISCONNECT,
+ RPCSVC_EVENT_TRANSPORT_DESTROY,
+ RPCSVC_EVENT_LISTENER_DEAD,
} rpcsvc_event_t;
-
struct rpcsvc_state;
-typedef int (*rpcsvc_notify_t) (struct rpcsvc_state *, void *mydata,
- rpcsvc_event_t, void *data);
+typedef int (*rpcsvc_notify_t)(struct rpcsvc_state *, void *mydata,
+ rpcsvc_event_t, void *data);
+struct drc_globals;
+typedef struct drc_globals rpcsvc_drc_globals_t;
/* Contains global state required for all the RPC services.
*/
typedef struct rpcsvc_state {
+ /* Contains list of (program, version) handlers.
+ * other options.
+ */
+
+ pthread_rwlock_t rpclock;
+
+ /* List of the authentication schemes available. */
+ struct list_head authschemes;
+
+ /* Reference to the options */
+ dict_t *options;
+
+ uid_t anonuid;
+ gid_t anongid;
+ glusterfs_ctx_t *ctx;
+
+ /* list of connections which will listen for incoming connections */
+ struct list_head listeners;
+
+ /* list of programs registered with rpcsvc */
+ struct list_head programs;
+
+ /* list of notification callbacks */
+ struct list_head notify;
+ int notify_count;
+
+ unsigned int memfactor;
+
+ xlator_t *xl; /* xlator */
+ void *mydata;
+ rpcsvc_notify_t notifyfn;
+ struct mem_pool *rxpool;
+ rpcsvc_drc_globals_t *drc;
+
+ /* per-client limit of outstanding rpc requests */
+ int outstanding_rpc_limit;
+ gf_boolean_t addr_namelookup;
+ /* determine whether throttling is needed, by default OFF */
+ gf_boolean_t throttle;
+ /* Allow insecure ports. */
+ gf_boolean_t allow_insecure;
+ gf_boolean_t register_portmap;
+ gf_boolean_t root_squash;
+ gf_boolean_t all_squash;
+} rpcsvc_t;
- /* Contains list of (program, version) handlers.
- * other options.
- */
-
- pthread_mutex_t rpclock;
-
- unsigned int memfactor;
-
- /* List of the authentication schemes available. */
- struct list_head authschemes;
+/* DRC START */
+enum drc_op_type { DRC_NA = 0, DRC_IDEMPOTENT = 1, DRC_NON_IDEMPOTENT = 2 };
+typedef enum drc_op_type drc_op_type_t;
- /* Reference to the options */
- dict_t *options;
+enum drc_type { DRC_TYPE_NONE = 0, DRC_TYPE_IN_MEMORY = 1 };
+typedef enum drc_type drc_type_t;
- /* Allow insecure ports. */
- int allow_insecure;
- gf_boolean_t register_portmap;
- glusterfs_ctx_t *ctx;
+enum drc_lru_factor {
+ DRC_LRU_5_PC = 20,
+ DRC_LRU_10_PC = 10,
+ DRC_LRU_25_PC = 4,
+ DRC_LRU_50_PC = 2
+};
+typedef enum drc_lru_factor drc_lru_factor_t;
- /* list of connections which will listen for incoming connections */
- struct list_head listeners;
+enum drc_xid_state { DRC_XID_MONOTONOUS = 0, DRC_XID_WRAPPED = 1 };
+typedef enum drc_xid_state drc_xid_state_t;
- /* list of programs registered with rpcsvc */
- struct list_head programs;
+enum drc_op_state { DRC_OP_IN_TRANSIT = 0, DRC_OP_CACHED = 1 };
+typedef enum drc_op_state drc_op_state_t;
- /* list of notification callbacks */
- struct list_head notify;
- int notify_count;
+enum drc_policy { DRC_LRU = 0 };
+typedef enum drc_policy drc_policy_t;
- void *mydata; /* This is xlator */
- rpcsvc_notify_t notifyfn;
- struct mem_pool *rxpool;
-} rpcsvc_t;
+/* Default policies for DRC */
+#define DRC_DEFAULT_TYPE DRC_TYPE_IN_MEMORY
+#define DRC_DEFAULT_CACHE_SIZE 0x20000
+#define DRC_DEFAULT_LRU_FACTOR DRC_LRU_25_PC
+/* DRC END */
#endif /* #ifndef _RPCSVC_COMMON_H */
diff --git a/rpc/rpc-lib/src/rpcsvc.c b/rpc/rpc-lib/src/rpcsvc.c
index 98cc88d63b1..39910d481bf 100644
--- a/rpc/rpc-lib/src/rpcsvc.c
+++ b/rpc/rpc-lib/src/rpcsvc.c
@@ -8,25 +8,20 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "rpcsvc.h"
#include "rpc-transport.h"
-#include "dict.h"
-#include "logging.h"
-#include "byte-order.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "list.h"
+#include <glusterfs/dict.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/statedump.h>
#include "xdr-rpc.h"
-#include "iobuf.h"
-#include "globals.h"
+#include <glusterfs/iobuf.h>
#include "xdr-common.h"
#include "xdr-generic.h"
#include "rpc-common-xdr.h"
+#include <glusterfs/syncop.h>
+#include "rpc-drc.h"
+#include "protocol-common.h"
#include <errno.h>
#include <pthread.h>
@@ -38,901 +33,1364 @@
#include <fnmatch.h>
#include <stdarg.h>
#include <stdio.h>
+#include <dlfcn.h>
+
+#ifdef IPV6_DEFAULT
+#include <netconfig.h>
+#endif
#include "xdr-rpcclnt.h"
+#include <glusterfs/glusterfs-acl.h>
-#define ACL_PROGRAM 100227
+#ifndef PTHREAD_MUTEX_ADAPTIVE_NP
+#define PTHREAD_MUTEX_ADAPTIVE_NP PTHREAD_MUTEX_DEFAULT
+#endif
-struct rpcsvc_program gluster_dump_prog;
+static struct rpcsvc_program gluster_dump_prog;
-#define rpcsvc_alloc_request(svc, request) \
- do { \
- request = (rpcsvc_request_t *) mem_get ((svc)->rxpool); \
- memset (request, 0, sizeof (rpcsvc_request_t)); \
- } while (0)
+#define rpcsvc_alloc_request(svc, request) \
+ do { \
+ request = (rpcsvc_request_t *)mem_get((svc)->rxpool); \
+ if (request) { \
+ memset(request, 0, sizeof(rpcsvc_request_t)); \
+ } else { \
+ gf_log("rpcsvc", GF_LOG_ERROR, \
+ "error getting memory for rpc request"); \
+ } \
+ } while (0)
rpcsvc_listener_t *
-rpcsvc_get_listener (rpcsvc_t *svc, uint16_t port, rpc_transport_t *trans);
+rpcsvc_get_listener(rpcsvc_t *svc, uint16_t port, rpc_transport_t *trans);
int
-rpcsvc_notify (rpc_transport_t *trans, void *mydata,
- rpc_transport_event_t event, void *data, ...);
+rpcsvc_notify(rpc_transport_t *trans, void *mydata, rpc_transport_event_t event,
+ void *data, ...);
+void *
+rpcsvc_request_handler(void *arg);
-rpcsvc_notify_wrapper_t *
-rpcsvc_notify_wrapper_alloc (void)
+static int
+rpcsvc_match_subnet_v4(const char *addrtok, const char *ipaddr);
+
+static void
+rpcsvc_toggle_queue_status(rpcsvc_program_t *prog,
+ rpcsvc_request_queue_t *queue,
+ unsigned long status[])
{
- rpcsvc_notify_wrapper_t *wrapper = NULL;
+ unsigned queue_index = queue - prog->request_queue;
- wrapper = GF_CALLOC (1, sizeof (*wrapper), gf_common_mt_rpcsvc_wrapper_t);
- if (!wrapper) {
- goto out;
+ status[queue_index / __BITS_PER_LONG] ^= (1UL << (queue_index %
+ __BITS_PER_LONG));
+}
+
+int
+rpcsvc_get_free_queue_index(rpcsvc_program_t *prog)
+{
+ unsigned i, j = 0;
+
+ for (i = 0; i < EVENT_MAX_THREADS / __BITS_PER_LONG; i++)
+ if (prog->request_queue_status[i] != ULONG_MAX) {
+ j = __builtin_ctzl(~prog->request_queue_status[i]);
+ break;
}
- INIT_LIST_HEAD (&wrapper->list);
-out:
- return wrapper;
+ if (i == EVENT_MAX_THREADS / __BITS_PER_LONG)
+ return -1;
+
+ prog->request_queue_status[i] |= (1UL << j);
+ return i * __BITS_PER_LONG + j;
}
+rpcsvc_notify_wrapper_t *
+rpcsvc_notify_wrapper_alloc(void)
+{
+ rpcsvc_notify_wrapper_t *wrapper = NULL;
+
+ wrapper = GF_CALLOC(1, sizeof(*wrapper), gf_common_mt_rpcsvc_wrapper_t);
+ if (!wrapper) {
+ goto out;
+ }
+
+ INIT_LIST_HEAD(&wrapper->list);
+out:
+ return wrapper;
+}
void
-rpcsvc_listener_destroy (rpcsvc_listener_t *listener)
+rpcsvc_listener_destroy(rpcsvc_listener_t *listener)
{
- rpcsvc_t *svc = NULL;
+ rpcsvc_t *svc = NULL;
- if (!listener) {
- goto out;
- }
+ if (!listener) {
+ goto out;
+ }
- svc = listener->svc;
- if (!svc) {
- goto listener_free;
- }
+ svc = listener->svc;
+ if (!svc) {
+ goto listener_free;
+ }
- pthread_mutex_lock (&svc->rpclock);
- {
- list_del_init (&listener->list);
- }
- pthread_mutex_unlock (&svc->rpclock);
+ pthread_rwlock_wrlock(&svc->rpclock);
+ {
+ list_del_init(&listener->list);
+ }
+ pthread_rwlock_unlock(&svc->rpclock);
listener_free:
- GF_FREE (listener);
+ GF_FREE(listener);
out:
- return;
+ return;
}
rpcsvc_vector_sizer
-rpcsvc_get_program_vector_sizer (rpcsvc_t *svc, uint32_t prognum,
- uint32_t progver, uint32_t procnum)
+rpcsvc_get_program_vector_sizer(rpcsvc_t *svc, uint32_t prognum,
+ uint32_t progver, int procnum)
{
- rpcsvc_program_t *program = NULL;
- char found = 0;
+ rpcsvc_program_t *program = NULL;
+ char found = 0;
- if (!svc)
- return NULL;
+ if (!svc)
+ return NULL;
- pthread_mutex_lock (&svc->rpclock);
+ pthread_rwlock_rdlock(&svc->rpclock);
+ {
+ /* Find the matching RPC program from registered list */
+ list_for_each_entry(program, &svc->programs, program)
{
- list_for_each_entry (program, &svc->programs, program) {
- if ((program->prognum == prognum)
- && (program->progver == progver)) {
- found = 1;
- break;
- }
- }
+ if ((program->prognum == prognum) &&
+ (program->progver == progver)) {
+ found = 1;
+ break;
+ }
}
- pthread_mutex_unlock (&svc->rpclock);
+ }
+ pthread_rwlock_unlock(&svc->rpclock);
- if (found)
- return program->actors[procnum].vector_sizer;
- else
- return NULL;
+ if (found) {
+ /* Make sure the requested procnum is supported by RPC prog */
+ if ((procnum < 0) || (procnum >= program->numactors)) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR,
+ "RPC procedure %d not available for Program %s", procnum,
+ program->progname);
+ return NULL;
+ }
+
+ /* SUCCESS: Supported procedure */
+ return program->actors[procnum].vector_sizer;
+ }
+
+ return NULL; /* FAIL */
}
-/* This needs to change to returning errors, since
- * we need to return RPC specific error messages when some
- * of the pointers below are NULL.
- */
-rpcsvc_actor_t *
-rpcsvc_program_actor (rpcsvc_request_t *req)
+gf_boolean_t
+rpcsvc_can_outstanding_req_be_ignored(rpcsvc_request_t *req)
{
- rpcsvc_program_t *program = NULL;
- int err = SYSTEM_ERR;
- rpcsvc_actor_t *actor = NULL;
- rpcsvc_t *svc = NULL;
- char found = 0;
+ /*
+ * If outstanding_rpc_limit is reached because of blocked locks and
+ * throttling is attempted then no unlock requests will be received. So
+ * the outstanding request count will never change i.e. it will always
+ * be equal to the limit. This also leads to ping timer expiry on
+ * client.
+ */
+
+ /*
+ * This is a hack and a necessity until grantedlock == fop completion.
+ * Ideally if we get a blocking lock request which cannot be granted
+ * right now, we should unwind the fop saying “request registered, will
+ * notify you when grantedâ€, which is very hard to implement at the
+ * moment. Until we bring in such mechanism, we will need to live with
+ * not rate-limiting INODELK/ENTRYLK/LK fops
+ */
+
+ if ((req->prognum == GLUSTER_FOP_PROGRAM) &&
+ (req->progver == GLUSTER_FOP_VERSION)) {
+ if ((req->procnum == GFS3_OP_INODELK) ||
+ (req->procnum == GFS3_OP_FINODELK) ||
+ (req->procnum == GFS3_OP_ENTRYLK) ||
+ (req->procnum == GFS3_OP_FENTRYLK) || (req->procnum == GFS3_OP_LK))
+ return _gf_true;
+ }
+ return _gf_false;
+}
- if (!req)
- goto err;
+int
+rpcsvc_request_outstanding(rpcsvc_request_t *req, int delta)
+{
+ int ret = -1;
+ int old_count = 0;
+ int new_count = 0;
+ int limit = 0;
+ gf_boolean_t throttle = _gf_false;
- svc = req->svc;
- pthread_mutex_lock (&svc->rpclock);
- {
- list_for_each_entry (program, &svc->programs, program) {
- if (program->prognum == req->prognum) {
- err = PROG_MISMATCH;
- }
+ if (!req)
+ goto out;
- if ((program->prognum == req->prognum)
- && (program->progver == req->progver)) {
- found = 1;
- break;
- }
- }
- }
- pthread_mutex_unlock (&svc->rpclock);
-
- if (!found) {
- if (err != PROG_MISMATCH) {
- /* log in DEBUG when nfs clients try to see if
- * ACL requests are accepted by nfs server
- */
- gf_log (GF_RPCSVC, (req->prognum == ACL_PROGRAM) ?
- GF_LOG_DEBUG : GF_LOG_WARNING,
- "RPC program not available (req %u %u)",
- req->prognum, req->progver);
- err = PROG_UNAVAIL;
- goto err;
- }
+ throttle = rpcsvc_get_throttle(req->svc);
+ if (!throttle) {
+ ret = 0;
+ goto out;
+ }
- gf_log (GF_RPCSVC, GF_LOG_WARNING,
- "RPC program version not available (req %u %u)",
- req->prognum, req->progver);
- goto err;
- }
- req->prog = program;
- if (!program->actors) {
- gf_log (GF_RPCSVC, GF_LOG_WARNING,
- "RPC Actor not found for program %s %d",
- program->progname, program->prognum);
- err = SYSTEM_ERR;
- goto err;
- }
+ if (rpcsvc_can_outstanding_req_be_ignored(req)) {
+ ret = 0;
+ goto out;
+ }
- if ((req->procnum < 0) || (req->procnum >= program->numactors)) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "RPC Program procedure not"
- " available for procedure %d in %s", req->procnum,
- program->progname);
- err = PROC_UNAVAIL;
- goto err;
- }
+ pthread_mutex_lock(&req->trans->lock);
+ {
+ limit = req->svc->outstanding_rpc_limit;
+ if (!limit)
+ goto unlock;
- actor = &program->actors[req->procnum];
- if (!actor->actor) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "RPC Program procedure not"
- " available for procedure %d in %s", req->procnum,
- program->progname);
- err = PROC_UNAVAIL;
- actor = NULL;
- goto err;
- }
+ old_count = req->trans->outstanding_rpc_count;
+ req->trans->outstanding_rpc_count += delta;
+ new_count = req->trans->outstanding_rpc_count;
- err = SUCCESS;
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "Actor found: %s - %s",
- program->progname, actor->procname);
-err:
- if (req)
- req->rpc_err = err;
+ if (old_count <= limit && new_count > limit)
+ ret = rpc_transport_throttle(req->trans, _gf_true);
- return actor;
+ if (old_count > limit && new_count <= limit)
+ ret = rpc_transport_throttle(req->trans, _gf_false);
+ }
+unlock:
+ pthread_mutex_unlock(&req->trans->lock);
+
+out:
+ return ret;
}
+/* This needs to change to returning errors, since
+ * we need to return RPC specific error messages when some
+ * of the pointers below are NULL.
+ */
+rpcsvc_actor_t *
+rpcsvc_program_actor(rpcsvc_request_t *req)
+{
+ rpcsvc_program_t *program = NULL;
+ int err = SYSTEM_ERR;
+ rpcsvc_actor_t *actor = NULL;
+ rpcsvc_t *svc = NULL;
+ char found = 0;
+ char *peername = NULL;
+
+ if (!req)
+ goto err;
+
+ svc = req->svc;
+ peername = req->trans->peerinfo.identifier;
+ pthread_rwlock_rdlock(&svc->rpclock);
+ {
+ list_for_each_entry(program, &svc->programs, program)
+ {
+ if (program->prognum == req->prognum) {
+ err = PROG_MISMATCH;
+ }
+
+ if ((program->prognum == req->prognum) &&
+ (program->progver == req->progver)) {
+ found = 1;
+ break;
+ }
+ }
+ }
+ pthread_rwlock_unlock(&svc->rpclock);
+
+ if (!found) {
+ if (err != PROG_MISMATCH) {
+ /* log in DEBUG when nfs clients try to see if
+ * ACL requests are accepted by nfs server
+ */
+ gf_log(
+ GF_RPCSVC,
+ (req->prognum == ACL_PROGRAM) ? GF_LOG_DEBUG : GF_LOG_WARNING,
+ "RPC program not available (req %u %u) for %s", req->prognum,
+ req->progver, peername);
+ err = PROG_UNAVAIL;
+ goto err;
+ }
+
+ gf_log(GF_RPCSVC, GF_LOG_WARNING,
+ "RPC program version not available (req %u %u) for %s",
+ req->prognum, req->progver, peername);
+ goto err;
+ }
+ req->prog = program;
+ if (!program->actors) {
+ gf_log(GF_RPCSVC, GF_LOG_WARNING,
+ "RPC Actor not found for program %s %d for %s",
+ program->progname, program->prognum, peername);
+ err = SYSTEM_ERR;
+ goto err;
+ }
+
+ if ((req->procnum < 0) || (req->procnum >= program->numactors)) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR,
+ "RPC Program procedure not"
+ " available for procedure %d in %s for %s",
+ req->procnum, program->progname, peername);
+ err = PROC_UNAVAIL;
+ goto err;
+ }
+
+ actor = &program->actors[req->procnum];
+ if (!actor->actor) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR,
+ "RPC Program procedure not"
+ " available for procedure %d in %s for %s",
+ req->procnum, program->progname, peername);
+ err = PROC_UNAVAIL;
+ actor = NULL;
+ goto err;
+ }
+
+ if (svc->xl->ctx->measure_latency) {
+ timespec_now(&req->begin);
+ }
+
+ req->ownthread = program->ownthread;
+ req->synctask = program->synctask;
+
+ err = SUCCESS;
+ gf_log(GF_RPCSVC, GF_LOG_TRACE, "Actor found: %s - %s for %s",
+ program->progname, actor->procname, peername);
+err:
+ if (req)
+ req->rpc_err = err;
+
+ return actor;
+}
/* this procedure can only pass 4 arguments to registered notifyfn. To send more
* arguments call wrapper->notify directly.
*/
-inline void
-rpcsvc_program_notify (rpcsvc_listener_t *listener, rpcsvc_event_t event,
- void *data)
+static void
+rpcsvc_program_notify(rpcsvc_listener_t *listener, rpcsvc_event_t event,
+ void *data)
{
- rpcsvc_notify_wrapper_t *wrapper = NULL;
+ rpcsvc_notify_wrapper_t *wrapper = NULL;
- if (!listener) {
- goto out;
- }
+ if (!listener) {
+ goto out;
+ }
- list_for_each_entry (wrapper, &listener->svc->notify, list) {
- if (wrapper->notify) {
- wrapper->notify (listener->svc,
- wrapper->data,
- event, data);
- }
+ list_for_each_entry(wrapper, &listener->svc->notify, list)
+ {
+ if (wrapper->notify) {
+ wrapper->notify(listener->svc, wrapper->data, event, data);
}
+ }
out:
- return;
+ return;
}
-
-inline int
-rpcsvc_accept (rpcsvc_t *svc, rpc_transport_t *listen_trans,
- rpc_transport_t *new_trans)
+static int
+rpcsvc_accept(rpcsvc_t *svc, rpc_transport_t *listen_trans,
+ rpc_transport_t *new_trans)
{
- rpcsvc_listener_t *listener = NULL;
- int32_t ret = -1;
+ rpcsvc_listener_t *listener = NULL;
+ int32_t ret = -1;
- listener = rpcsvc_get_listener (svc, -1, listen_trans);
- if (listener == NULL) {
- goto out;
- }
+ listener = rpcsvc_get_listener(svc, -1, listen_trans);
+ if (listener == NULL) {
+ goto out;
+ }
- rpcsvc_program_notify (listener, RPCSVC_EVENT_ACCEPT, new_trans);
- ret = 0;
+ rpcsvc_program_notify(listener, RPCSVC_EVENT_ACCEPT, new_trans);
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
void
-rpcsvc_request_destroy (rpcsvc_request_t *req)
+rpcsvc_request_destroy(rpcsvc_request_t *req)
{
- if (!req) {
- goto out;
- }
+ if (!req) {
+ goto out;
+ }
- if (req->iobref) {
- iobref_unref (req->iobref);
- }
+ if (req->iobref) {
+ iobref_unref(req->iobref);
+ }
+
+ /* This marks the "end" of an RPC request. Reply is
+ completely written to the socket and is on the way
+ to the client. It is time to decrement the
+ outstanding request counter by 1.
+ */
+ if (req->prognum) // Only for initialized requests
+ rpcsvc_request_outstanding(req, -1);
- rpc_transport_unref (req->trans);
+ rpc_transport_unref(req->trans);
- mem_put (req);
+ GF_FREE(req->auxgidlarge);
+
+ mem_put(req);
out:
- return;
+ return;
}
+rpcsvc_request_t *
+rpcsvc_request_init(rpcsvc_t *svc, rpc_transport_t *trans,
+ struct rpc_msg *callmsg, struct iovec progmsg,
+ rpc_transport_pollin_t *msg, rpcsvc_request_t *req)
+{
+ int i = 0;
+
+ if ((!trans) || (!callmsg) || (!req) || (!msg))
+ return NULL;
+
+ /* We start a RPC request as always denied. */
+ req->rpc_status = MSG_DENIED;
+ req->xid = rpc_call_xid(callmsg);
+ req->prognum = rpc_call_program(callmsg);
+ req->progver = rpc_call_progver(callmsg);
+ req->procnum = rpc_call_progproc(callmsg);
+ req->trans = rpc_transport_ref(trans);
+ req->count = msg->count;
+ req->msg[0] = progmsg;
+ req->iobref = iobref_ref(msg->iobref);
+ if (msg->vectored) {
+ /* msg->vector[MAX_IOVEC] is defined in structure. prevent a
+ out of bound access */
+ for (i = 1; i < min(msg->count, MAX_IOVEC); i++) {
+ req->msg[i] = msg->vector[i];
+ }
+ }
+
+ req->svc = svc;
+ req->trans_private = msg->private;
+
+ INIT_LIST_HEAD(&req->txlist);
+ INIT_LIST_HEAD(&req->request_list);
+ req->payloadsize = 0;
+
+ /* By this time, the data bytes for the auth scheme would have already
+ * been copied into the required sections of the req structure,
+ * we just need to fill in the meta-data about it now.
+ */
+ rpcsvc_auth_request_init(req, callmsg);
+ return req;
+}
rpcsvc_request_t *
-rpcsvc_request_init (rpcsvc_t *svc, rpc_transport_t *trans,
- struct rpc_msg *callmsg,
- struct iovec progmsg, rpc_transport_pollin_t *msg,
- rpcsvc_request_t *req)
+rpcsvc_request_create(rpcsvc_t *svc, rpc_transport_t *trans,
+ rpc_transport_pollin_t *msg)
{
- int i = 0;
+ char *msgbuf = NULL;
+ struct rpc_msg rpcmsg;
+ struct iovec progmsg; /* RPC Program payload */
+ rpcsvc_request_t *req = NULL;
+ size_t msglen = 0;
+ int ret = -1;
+
+ if (!svc || !trans || !svc->rxpool)
+ return NULL;
+
+ /* We need to allocate the request before actually calling
+ * rpcsvc_request_init on the request so that we, can fill the auth
+ * data directly into the request structure from the message iobuf.
+ * This avoids a need to keep a temp buffer into which the auth data
+ * would've been copied otherwise.
+ */
+ rpcsvc_alloc_request(svc, req);
+ if (!req) {
+ goto err;
+ }
+
+ msgbuf = msg->vector[0].iov_base;
+ msglen = msg->vector[0].iov_len;
+
+ ret = xdr_to_rpc_call(msgbuf, msglen, &rpcmsg, &progmsg, req->cred.authdata,
+ req->verf.authdata);
+
+ if (ret == -1) {
+ gf_log(GF_RPCSVC, GF_LOG_WARNING, "RPC call decoding failed");
+ rpcsvc_request_seterr(req, GARBAGE_ARGS);
+ req->trans = rpc_transport_ref(trans);
+ req->svc = svc;
+ goto err;
+ }
+
+ ret = -1;
+ rpcsvc_request_init(svc, trans, &rpcmsg, progmsg, msg, req);
+
+ gf_log(GF_RPCSVC, GF_LOG_TRACE,
+ "received rpc-message "
+ "(XID: 0x%" GF_PRI_RPC_XID ", Ver: %" GF_PRI_RPC_VERSION
+ ", Program: %" GF_PRI_RPC_PROG_ID
+ ", "
+ "ProgVers: %" GF_PRI_RPC_PROG_VERS ", Proc: %" GF_PRI_RPC_PROC
+ ") "
+ "from rpc-transport (%s)",
+ rpc_call_xid(&rpcmsg), rpc_call_rpcvers(&rpcmsg),
+ rpc_call_program(&rpcmsg), rpc_call_progver(&rpcmsg),
+ rpc_call_progproc(&rpcmsg), trans->name);
+
+ /* We just received a new request from the wire. Account for
+ it in the outsanding request counter to make sure we don't
+ ingest too many concurrent requests from the same client.
+ */
+ if (req->prognum) // Only for initialized requests
+ ret = rpcsvc_request_outstanding(req, +1);
+
+ if (rpc_call_rpcvers(&rpcmsg) != 2) {
+ /* LOG- TODO: print rpc version, also print the peerinfo
+ from transport */
+ gf_log(GF_RPCSVC, GF_LOG_ERROR,
+ "RPC version not supported "
+ "(XID: 0x%" GF_PRI_RPC_XID ", Ver: %" GF_PRI_RPC_VERSION
+ ", Program: %" GF_PRI_RPC_PROG_ID
+ ", "
+ "ProgVers: %" GF_PRI_RPC_PROG_VERS ", Proc: %" GF_PRI_RPC_PROC
+ ") "
+ "from trans (%s)",
+ rpc_call_xid(&rpcmsg), rpc_call_rpcvers(&rpcmsg),
+ rpc_call_program(&rpcmsg), rpc_call_progver(&rpcmsg),
+ rpc_call_progproc(&rpcmsg), trans->name);
+ rpcsvc_request_seterr(req, RPC_MISMATCH);
+ goto err;
+ }
+
+ ret = rpcsvc_authenticate(req);
+ if (ret == RPCSVC_AUTH_REJECT) {
+ /* No need to set auth_err, that is the responsibility of
+ * the authentication handler since only that know what exact
+ * error happened.
+ */
+ rpcsvc_request_seterr(req, AUTH_ERROR);
+ gf_log(GF_RPCSVC, GF_LOG_ERROR,
+ "auth failed on request. "
+ "(XID: 0x%" GF_PRI_RPC_XID ", Ver: %" GF_PRI_RPC_VERSION
+ ", Program: %" GF_PRI_RPC_PROG_ID
+ ", "
+ "ProgVers: %" GF_PRI_RPC_PROG_VERS ", Proc: %" GF_PRI_RPC_PROC
+ ") "
+ "from trans (%s)",
+ rpc_call_xid(&rpcmsg), rpc_call_rpcvers(&rpcmsg),
+ rpc_call_program(&rpcmsg), rpc_call_progver(&rpcmsg),
+ rpc_call_progproc(&rpcmsg), trans->name);
+ ret = -1;
+ goto err;
+ }
+
+ /* If the error is not RPC_MISMATCH, we consider the call as accepted
+ * since we are not handling authentication failures for now.
+ */
+ req->rpc_status = MSG_ACCEPTED;
+ req->reply = NULL;
+ ret = 0;
+err:
+ if (ret == -1) {
+ ret = rpcsvc_error_reply(req);
+ if (ret)
+ gf_log("rpcsvc", GF_LOG_WARNING, "failed to queue error reply");
+ req = NULL;
+ }
- if ((!trans) || (!callmsg)|| (!req) || (!msg))
- return NULL;
+ return req;
+}
- /* We start a RPC request as always denied. */
- req->rpc_status = MSG_DENIED;
- req->xid = rpc_call_xid (callmsg);
- req->prognum = rpc_call_program (callmsg);
- req->progver = rpc_call_progver (callmsg);
- req->procnum = rpc_call_progproc (callmsg);
- req->trans = rpc_transport_ref (trans);
- req->count = msg->count;
- req->msg[0] = progmsg;
- req->iobref = iobref_ref (msg->iobref);
- if (msg->vectored) {
- /* msg->vector[2] is defined in structure. prevent a
- out of bound access */
- for (i = 1; i < min (msg->count, 2); i++) {
- req->msg[i] = msg->vector[i];
- }
- }
+int
+rpcsvc_check_and_reply_error(int ret, call_frame_t *frame, void *opaque)
+{
+ rpcsvc_request_t *req = NULL;
- req->svc = svc;
- req->trans_private = msg->private;
+ req = opaque;
- INIT_LIST_HEAD (&req->txlist);
- req->payloadsize = 0;
+ if (ret)
+ gf_log("rpcsvc", GF_LOG_ERROR,
+ "rpc actor (%d:%d:%d) failed to complete successfully",
+ req->prognum, req->progver, req->procnum);
- /* By this time, the data bytes for the auth scheme would have already
- * been copied into the required sections of the req structure,
- * we just need to fill in the meta-data about it now.
- */
- req->cred.flavour = rpc_call_cred_flavour (callmsg);
- req->cred.datalen = rpc_call_cred_len (callmsg);
- req->verf.flavour = rpc_call_verf_flavour (callmsg);
- req->verf.datalen = rpc_call_verf_len (callmsg);
+ if (ret == RPCSVC_ACTOR_ERROR) {
+ ret = rpcsvc_error_reply(req);
+ if (ret)
+ gf_log("rpcsvc", GF_LOG_WARNING, "failed to queue error reply");
+ }
- /* AUTH */
- rpcsvc_auth_request_init (req);
- return req;
+ return 0;
}
-
-rpcsvc_request_t *
-rpcsvc_request_create (rpcsvc_t *svc, rpc_transport_t *trans,
- rpc_transport_pollin_t *msg)
+void
+rpcsvc_queue_event_thread_death(rpcsvc_t *svc, rpcsvc_program_t *prog, int gen)
{
- char *msgbuf = NULL;
- struct rpc_msg rpcmsg;
- struct iovec progmsg; /* RPC Program payload */
- rpcsvc_request_t *req = NULL;
- size_t msglen = 0;
- int ret = -1;
-
- if (!svc || !trans)
- return NULL;
-
- /* We need to allocate the request before actually calling
- * rpcsvc_request_init on the request so that we, can fill the auth
- * data directly into the request structure from the message iobuf.
- * This avoids a need to keep a temp buffer into which the auth data
- * would've been copied otherwise.
- */
- rpcsvc_alloc_request (svc, req);
- if (!req) {
- goto err;
- }
+ rpcsvc_request_queue_t *queue = NULL;
+ int num = 0;
+ void *value = NULL;
+ rpcsvc_request_t *req = NULL;
+ char empty = 0;
+
+ value = pthread_getspecific(prog->req_queue_key);
+ if (value == NULL) {
+ return;
+ }
- msgbuf = msg->vector[0].iov_base;
- msglen = msg->vector[0].iov_len;
+ num = ((unsigned long)value) - 1;
- ret = xdr_to_rpc_call (msgbuf, msglen, &rpcmsg, &progmsg,
- req->cred.authdata,req->verf.authdata);
+ queue = &prog->request_queue[num];
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_WARNING, "RPC call decoding failed");
- rpcsvc_request_seterr (req, GARBAGE_ARGS);
- req->trans = rpc_transport_ref (trans);
- req->svc = svc;
- goto err;
- }
+ if (queue->gen == gen) {
+ /* duplicate event */
+ gf_log(GF_RPCSVC, GF_LOG_INFO,
+ "not queuing duplicate event thread death. "
+ "queue %d program %s",
+ num, prog->progname);
+ return;
+ }
- ret = -1;
- rpcsvc_request_init (svc, trans, &rpcmsg, progmsg, msg, req);
-
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "received rpc-message (XID: 0x%lx, "
- "Ver: %ld, Program: %ld, ProgVers: %ld, Proc: %ld) from"
- " rpc-transport (%s)", rpc_call_xid (&rpcmsg),
- rpc_call_rpcvers (&rpcmsg), rpc_call_program (&rpcmsg),
- rpc_call_progver (&rpcmsg), rpc_call_progproc (&rpcmsg),
- trans->name);
-
- if (rpc_call_rpcvers (&rpcmsg) != 2) {
- /* LOG- TODO: print rpc version, also print the peerinfo
- from transport */
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "RPC version not supported "
- "(XID: 0x%lx, Ver: %ld, Prog: %ld, ProgVers: %ld, "
- "Proc: %ld) from trans (%s)", rpc_call_xid (&rpcmsg),
- rpc_call_rpcvers (&rpcmsg), rpc_call_program (&rpcmsg),
- rpc_call_progver (&rpcmsg), rpc_call_progproc (&rpcmsg),
- trans->name);
- rpcsvc_request_seterr (req, RPC_MISMATCH);
- goto err;
- }
+ rpcsvc_alloc_request(svc, req);
+ req->prognum = RPCSVC_INFRA_PROGRAM;
+ req->procnum = RPCSVC_PROC_EVENT_THREAD_DEATH;
+ gf_log(GF_RPCSVC, GF_LOG_INFO,
+ "queuing event thread death request to queue %d of program %s", num,
+ prog->progname);
- ret = rpcsvc_authenticate (req);
- if (ret == RPCSVC_AUTH_REJECT) {
- /* No need to set auth_err, that is the responsibility of
- * the authentication handler since only that know what exact
- * error happened.
- */
- rpcsvc_request_seterr (req, AUTH_ERROR);
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "auth failed on request. "
- "(XID: 0x%lx, Ver: %ld, Prog: %ld, ProgVers: %ld, "
- "Proc: %ld) from trans (%s)", rpc_call_xid (&rpcmsg),
- rpc_call_rpcvers (&rpcmsg), rpc_call_program (&rpcmsg),
- rpc_call_progver (&rpcmsg), rpc_call_progproc (&rpcmsg),
- trans->name);
- ret = -1;
- goto err;
- }
+ pthread_mutex_lock(&queue->queue_lock);
+ {
+ empty = list_empty(&queue->request_queue);
+ list_add_tail(&req->request_list, &queue->request_queue);
+ queue->gen = gen;
- /* If the error is not RPC_MISMATCH, we consider the call as accepted
- * since we are not handling authentication failures for now.
- */
- req->rpc_status = MSG_ACCEPTED;
- ret = 0;
-err:
- if (ret == -1) {
- ret = rpcsvc_error_reply (req);
- if (ret)
- gf_log ("rpcsvc", GF_LOG_WARNING,
- "failed to queue error reply");
- req = NULL;
- }
+ if (empty && queue->waiting)
+ pthread_cond_signal(&queue->queue_cond);
+ }
+ pthread_mutex_unlock(&queue->queue_lock);
- return req;
+ return;
}
-
int
-rpcsvc_handle_rpc_call (rpcsvc_t *svc, rpc_transport_t *trans,
- rpc_transport_pollin_t *msg)
+rpcsvc_handle_event_thread_death(rpcsvc_t *svc, rpc_transport_t *trans, int gen)
{
- rpcsvc_actor_t *actor = NULL;
- rpcsvc_request_t *req = NULL;
- int ret = -1;
- uint16_t port = 0;
- gf_boolean_t is_unix = _gf_false;
- gf_boolean_t unprivileged = _gf_false;
+ rpcsvc_program_t *prog = NULL;
- if (!trans || !svc)
- return -1;
+ pthread_rwlock_rdlock(&svc->rpclock);
+ {
+ list_for_each_entry(prog, &svc->programs, program)
+ {
+ if (prog->ownthread)
+ rpcsvc_queue_event_thread_death(svc, prog, gen);
+ }
+ }
+ pthread_rwlock_unlock(&svc->rpclock);
+
+ return 0;
+}
- switch (trans->peerinfo.sockaddr.ss_family) {
+int
+rpcsvc_handle_rpc_call(rpcsvc_t *svc, rpc_transport_t *trans,
+ rpc_transport_pollin_t *msg)
+{
+ rpcsvc_actor_t *actor = NULL;
+ rpcsvc_actor actor_fn = NULL;
+ rpcsvc_request_t *req = NULL;
+ int ret = -1;
+ uint16_t port = 0;
+ gf_boolean_t is_unix = _gf_false, empty = _gf_false;
+ gf_boolean_t unprivileged = _gf_false, spawn_request_handler = 0;
+ drc_cached_op_t *reply = NULL;
+ rpcsvc_drc_globals_t *drc = NULL;
+ rpcsvc_request_queue_t *queue = NULL;
+ long num = 0;
+ void *value = NULL;
+
+ if (!trans || !svc)
+ return -1;
+
+ switch (trans->peerinfo.sockaddr.ss_family) {
case AF_INET:
- port = ((struct sockaddr_in *)&trans->peerinfo.sockaddr)->sin_port;
- break;
+ port = ((struct sockaddr_in *)&trans->peerinfo.sockaddr)->sin_port;
+ break;
case AF_INET6:
- port = ((struct sockaddr_in6 *)&trans->peerinfo.sockaddr)->sin6_port;
- break;
+ port = ((struct sockaddr_in6 *)&trans->peerinfo.sockaddr)
+ ->sin6_port;
+ break;
case AF_UNIX:
- is_unix = _gf_true;
- break;
+ is_unix = _gf_true;
+ break;
default:
- gf_log (GF_RPCSVC, GF_LOG_ERROR,
- "invalid address family (%d)",
- trans->peerinfo.sockaddr.ss_family);
- return -1;
- }
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "invalid address family (%d)",
+ trans->peerinfo.sockaddr.ss_family);
+ return -1;
+ }
+ if (is_unix == _gf_false) {
+ port = ntohs(port);
+ gf_log("rpcsvc", GF_LOG_TRACE, "Client port: %d", (int)port);
- if (is_unix == _gf_false) {
- port = ntohs (port);
+ if (port >= 1024)
+ unprivileged = _gf_true;
+ }
- gf_log ("rpcsvc", GF_LOG_TRACE, "Client port: %d", (int)port);
+ req = rpcsvc_request_create(svc, trans, msg);
+ if (!req)
+ goto out;
- if (port > 1024)
- unprivileged = _gf_true;
- }
+ if (!rpcsvc_request_accepted(req))
+ goto err_reply;
- req = rpcsvc_request_create (svc, trans, msg);
- if (!req)
- goto err;
+ actor = rpcsvc_program_actor(req);
+ if (!actor)
+ goto err_reply;
- if (!rpcsvc_request_accepted (req))
- goto err_reply;
+ if (0 == svc->allow_insecure && unprivileged && !actor->unprivileged) {
+ /* Non-privileged user, fail request */
+ gf_log(GF_RPCSVC, GF_LOG_ERROR,
+ "Request received from non-"
+ "privileged port. Failing request for %s.",
+ req->trans->peerinfo.identifier);
+ req->rpc_status = MSG_DENIED;
+ req->rpc_err = AUTH_ERROR;
+ req->auth_err = RPCSVC_AUTH_REJECT;
+ goto err_reply;
+ }
- actor = rpcsvc_program_actor (req);
- if (!actor)
- goto err_reply;
+ /* DRC */
+ if (rpcsvc_need_drc(req)) {
+ drc = req->svc->drc;
- if (0 == svc->allow_insecure && unprivileged && !actor->unprivileged) {
- /* Non-privileged user, fail request */
- gf_log ("glusterd", GF_LOG_ERROR,
- "Request received from non-"
- "privileged port. Failing request");
- rpcsvc_request_destroy (req);
- return -1;
- }
+ LOCK(&drc->lock);
+ {
+ reply = rpcsvc_drc_lookup(req);
+
+ /* retransmission of completed request, send cached reply */
+ if (reply && reply->state == DRC_OP_CACHED) {
+ gf_log(GF_RPCSVC, GF_LOG_INFO,
+ "duplicate request:"
+ " XID: 0x%x",
+ req->xid);
+ ret = rpcsvc_send_cached_reply(req, reply);
+ drc->cache_hits++;
+ UNLOCK(&drc->lock);
+ goto out;
- if (req->rpc_err == SUCCESS) {
- /* Before going to xlator code, set the THIS properly */
- THIS = svc->mydata;
+ } /* retransmitted request, original op in transit, drop it */
+ else if (reply && reply->state == DRC_OP_IN_TRANSIT) {
+ gf_log(GF_RPCSVC, GF_LOG_INFO,
+ "op in transit,"
+ " discarding. XID: 0x%x",
+ req->xid);
+ ret = 0;
+ drc->intransit_hits++;
+ rpcsvc_request_destroy(req);
+ UNLOCK(&drc->lock);
+ goto out;
- if (req->count == 2) {
- if (actor->vector_actor) {
- ret = actor->vector_actor (req, &req->msg[1], 1,
- req->iobref);
+ } /* fresh request, cache it as in-transit and proceed */
+ else {
+ ret = rpcsvc_cache_request(req);
+ }
+ }
+ UNLOCK(&drc->lock);
+ }
+
+ if (req->rpc_err == SUCCESS) {
+ /* Before going to xlator code, set the THIS properly */
+ THIS = svc->xl;
+
+ actor_fn = actor->actor;
+
+ if (!actor_fn) {
+ rpcsvc_request_seterr(req, PROC_UNAVAIL);
+ /* LOG TODO: print more info about procnum,
+ prognum etc, also print transport info */
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "No vectored handler present");
+ ret = RPCSVC_ACTOR_ERROR;
+ goto err_reply;
+ }
+
+ if (req->synctask) {
+ ret = synctask_new(THIS->ctx->env, (synctask_fn_t)actor_fn,
+ rpcsvc_check_and_reply_error, NULL, req);
+ } else if (req->ownthread) {
+ value = pthread_getspecific(req->prog->req_queue_key);
+ if (value == NULL) {
+ pthread_mutex_lock(&req->prog->thr_lock);
+ {
+ num = rpcsvc_get_free_queue_index(req->prog);
+ if (num != -1) {
+ num++;
+ value = (void *)num;
+ ret = pthread_setspecific(req->prog->req_queue_key,
+ value);
+ if (ret < 0) {
+ gf_log(GF_RPCSVC, GF_LOG_WARNING,
+ "setting request queue in TLS failed");
+ rpcsvc_toggle_queue_status(
+ req->prog, &req->prog->request_queue[num - 1],
+ req->prog->request_queue_status);
+ num = -1;
} else {
- rpcsvc_request_seterr (req, PROC_UNAVAIL);
- /* LOG TODO: print more info about procnum,
- prognum etc, also print transport info */
- gf_log (GF_RPCSVC, GF_LOG_ERROR,
- "No vectored handler present");
- ret = RPCSVC_ACTOR_ERROR;
+ spawn_request_handler = 1;
}
- } else if (actor->actor) {
- ret = actor->actor (req);
+ }
}
- }
+ pthread_mutex_unlock(&req->prog->thr_lock);
+ }
+
+ if (num == -1)
+ goto noqueue;
+
+ num = ((unsigned long)value) - 1;
+
+ queue = &req->prog->request_queue[num];
+
+ if (spawn_request_handler) {
+ ret = gf_thread_create(&queue->thread, NULL,
+ rpcsvc_request_handler, queue,
+ "rpcrqhnd");
+ if (!ret) {
+ gf_log(GF_RPCSVC, GF_LOG_INFO,
+ "spawned a request handler thread for queue %d",
+ (int)num);
+
+ req->prog->threadcount++;
+ } else {
+ gf_log(
+ GF_RPCSVC, GF_LOG_INFO,
+ "spawning a request handler thread for queue %d failed",
+ (int)num);
+ ret = pthread_setspecific(req->prog->req_queue_key, 0);
+ if (ret < 0) {
+ gf_log(GF_RPCSVC, GF_LOG_WARNING,
+ "resetting request queue in TLS failed");
+ }
+
+ rpcsvc_toggle_queue_status(
+ req->prog, &req->prog->request_queue[num - 1],
+ req->prog->request_queue_status);
+
+ goto noqueue;
+ }
+ }
-err_reply:
- if (ret == RPCSVC_ACTOR_ERROR) {
- ret = rpcsvc_error_reply (req);
+ pthread_mutex_lock(&queue->queue_lock);
+ {
+ empty = list_empty(&queue->request_queue);
+
+ list_add_tail(&req->request_list, &queue->request_queue);
+
+ if (empty && queue->waiting)
+ pthread_cond_signal(&queue->queue_cond);
+ }
+ pthread_mutex_unlock(&queue->queue_lock);
+
+ ret = 0;
+ } else {
+ noqueue:
+ ret = actor_fn(req);
}
+ }
- if (ret)
- gf_log ("rpcsvc", GF_LOG_WARNING, "failed to queue error reply");
+err_reply:
- /* No need to propagate error beyond this function since the reply
- * has now been queued. */
- ret = 0;
+ ret = rpcsvc_check_and_reply_error(ret, NULL, req);
+ /* No need to propagate error beyond this function since the reply
+ * has now been queued. */
+ ret = 0;
-err:
- return ret;
+out:
+ return ret;
}
-
int
-rpcsvc_handle_disconnect (rpcsvc_t *svc, rpc_transport_t *trans)
+rpcsvc_handle_disconnect(rpcsvc_t *svc, rpc_transport_t *trans)
{
- rpcsvc_event_t event;
- rpcsvc_notify_wrapper_t *wrappers = NULL, *wrapper;
- int32_t ret = -1, i = 0, wrapper_count = 0;
- rpcsvc_listener_t *listener = NULL;
+ rpcsvc_event_t event;
+ rpcsvc_notify_wrapper_t *wrappers = NULL, *wrapper;
+ int32_t ret = -1, i = 0, wrapper_count = 0;
+ rpcsvc_listener_t *listener = NULL;
- event = (trans->listener == NULL) ? RPCSVC_EVENT_LISTENER_DEAD
- : RPCSVC_EVENT_DISCONNECT;
+ event = (trans->listener == NULL) ? RPCSVC_EVENT_LISTENER_DEAD
+ : RPCSVC_EVENT_DISCONNECT;
- pthread_mutex_lock (&svc->rpclock);
- {
- if (!svc->notify_count)
- goto unlock;
-
- wrappers = GF_CALLOC (svc->notify_count, sizeof (*wrapper),
- gf_common_mt_rpcsvc_wrapper_t);
- if (!wrappers) {
- goto unlock;
- }
+ pthread_rwlock_rdlock(&svc->rpclock);
+ {
+ if (!svc->notify_count)
+ goto unlock;
- list_for_each_entry (wrapper, &svc->notify, list) {
- if (wrapper->notify) {
- wrappers[i++] = *wrapper;
- }
- }
+ wrappers = GF_CALLOC(svc->notify_count, sizeof(*wrapper),
+ gf_common_mt_rpcsvc_wrapper_t);
+ if (!wrappers) {
+ goto unlock;
+ }
- wrapper_count = i;
+ list_for_each_entry(wrapper, &svc->notify, list)
+ {
+ if (wrapper->notify) {
+ wrappers[i++] = *wrapper;
+ }
}
-unlock:
- pthread_mutex_unlock (&svc->rpclock);
- if (wrappers) {
- for (i = 0; i < wrapper_count; i++) {
- wrappers[i].notify (svc, wrappers[i].data,
- event, trans);
- }
+ wrapper_count = i;
+ }
+unlock:
+ pthread_rwlock_unlock(&svc->rpclock);
- GF_FREE (wrappers);
+ if (wrappers) {
+ for (i = 0; i < wrapper_count; i++) {
+ wrappers[i].notify(svc, wrappers[i].data, event, trans);
}
- if (event == RPCSVC_EVENT_LISTENER_DEAD) {
- listener = rpcsvc_get_listener (svc, -1, trans->listener);
- rpcsvc_listener_destroy (listener);
- }
+ GF_FREE(wrappers);
+ }
- return ret;
-}
+ if (event == RPCSVC_EVENT_LISTENER_DEAD) {
+ listener = rpcsvc_get_listener(svc, -1, trans->listener);
+ rpcsvc_listener_destroy(listener);
+ }
+ return ret;
+}
int
-rpcsvc_notify (rpc_transport_t *trans, void *mydata,
- rpc_transport_event_t event, void *data, ...)
+rpcsvc_notify(rpc_transport_t *trans, void *mydata, rpc_transport_event_t event,
+ void *data, ...)
{
- int ret = -1;
- rpc_transport_pollin_t *msg = NULL;
- rpc_transport_t *new_trans = NULL;
- rpcsvc_t *svc = NULL;
- rpcsvc_listener_t *listener = NULL;
-
- svc = mydata;
- if (svc == NULL) {
- goto out;
- }
-
- switch (event) {
+ int ret = -1;
+ rpc_transport_pollin_t *msg = NULL;
+ rpc_transport_t *new_trans = NULL;
+ rpcsvc_t *svc = NULL;
+ rpcsvc_listener_t *listener = NULL;
+
+ svc = mydata;
+ if (svc == NULL) {
+ goto out;
+ }
+
+ switch (event) {
case RPC_TRANSPORT_ACCEPT:
- new_trans = data;
- ret = rpcsvc_accept (svc, trans, new_trans);
- break;
+ new_trans = data;
+ ret = rpcsvc_accept(svc, trans, new_trans);
+ break;
case RPC_TRANSPORT_DISCONNECT:
- ret = rpcsvc_handle_disconnect (svc, trans);
- break;
+ ret = rpcsvc_handle_disconnect(svc, trans);
+ break;
case RPC_TRANSPORT_MSG_RECEIVED:
- msg = data;
- ret = rpcsvc_handle_rpc_call (svc, trans, msg);
- break;
+ msg = data;
+ ret = rpcsvc_handle_rpc_call(svc, trans, msg);
+ break;
case RPC_TRANSPORT_MSG_SENT:
- ret = 0;
- break;
+ ret = 0;
+ break;
case RPC_TRANSPORT_CONNECT:
- /* do nothing, no need for rpcsvc to handle this, client should
- * handle this event
- */
- /* print info about transport too : LOG TODO */
- gf_log ("rpcsvc", GF_LOG_CRITICAL,
- "got CONNECT event, which should have not come");
- ret = 0;
- break;
+ /* do nothing, no need for rpcsvc to handle this, client should
+ * handle this event
+ */
+ /* print info about transport too : LOG TODO */
+ gf_log("rpcsvc", GF_LOG_CRITICAL,
+ "got CONNECT event, which should have not come");
+ ret = 0;
+ break;
case RPC_TRANSPORT_CLEANUP:
- listener = rpcsvc_get_listener (svc, -1, trans->listener);
- if (listener == NULL) {
- goto out;
- }
+ listener = rpcsvc_get_listener(svc, -1, trans->listener);
+ if (listener == NULL) {
+ goto out;
+ }
- rpcsvc_program_notify (listener, RPCSVC_EVENT_TRANSPORT_DESTROY,
- trans);
- ret = 0;
- break;
+ rpcsvc_program_notify(listener, RPCSVC_EVENT_TRANSPORT_DESTROY,
+ trans);
+ ret = 0;
+ break;
case RPC_TRANSPORT_MAP_XID_REQUEST:
- /* FIXME: think about this later */
- gf_log ("rpcsvc", GF_LOG_CRITICAL,
- "got MAP_XID event, which should have not come");
- ret = 0;
- break;
- }
+ /* FIXME: think about this later */
+ gf_log("rpcsvc", GF_LOG_CRITICAL,
+ "got MAP_XID event, which should have not come");
+ ret = 0;
+ break;
+
+ case RPC_TRANSPORT_EVENT_THREAD_DIED:
+ rpcsvc_handle_event_thread_death(svc, trans,
+ (int)(unsigned long)data);
+ ret = 0;
+ break;
+ }
out:
- return ret;
+ return ret;
}
-
/* Given the RPC reply structure and the payload handed by the RPC program,
* encode the RPC record header into the buffer pointed by recordstart.
*/
struct iovec
-rpcsvc_record_build_header (char *recordstart, size_t rlen,
- struct rpc_msg reply, size_t payload)
+rpcsvc_record_build_header(char *recordstart, size_t rlen, struct rpc_msg reply,
+ size_t payload)
{
- struct iovec replyhdr;
- struct iovec txrecord = {0, 0};
- size_t fraglen = 0;
- int ret = -1;
-
- /* After leaving aside the 4 bytes for the fragment header, lets
- * encode the RPC reply structure into the buffer given to us.
- */
- ret = rpc_reply_to_xdr (&reply, recordstart, rlen, &replyhdr);
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_WARNING, "Failed to create RPC reply");
- goto err;
- }
-
- fraglen = payload + replyhdr.iov_len;
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "Reply fraglen %zu, payload: %zu, "
- "rpc hdr: %zu", fraglen, payload, replyhdr.iov_len);
-
- txrecord.iov_base = recordstart;
-
- /* Remember, this is only the vec for the RPC header and does not
- * include the payload above. We needed the payload only to calculate
- * the size of the full fragment. This size is sent in the fragment
- * header.
- */
- txrecord.iov_len = replyhdr.iov_len;
+ struct iovec replyhdr;
+ struct iovec txrecord = {0, 0};
+ size_t fraglen = 0;
+ int ret = -1;
+
+ /* After leaving aside the 4 bytes for the fragment header, lets
+ * encode the RPC reply structure into the buffer given to us.
+ */
+ ret = rpc_reply_to_xdr(&reply, recordstart, rlen, &replyhdr);
+ if (ret == -1) {
+ gf_log(GF_RPCSVC, GF_LOG_WARNING, "Failed to create RPC reply");
+ goto err;
+ }
+
+ fraglen = payload + replyhdr.iov_len;
+ gf_log(GF_RPCSVC, GF_LOG_TRACE,
+ "Reply fraglen %zu, payload: %zu, "
+ "rpc hdr: %zu",
+ fraglen, payload, replyhdr.iov_len);
+
+ txrecord.iov_base = recordstart;
+
+ /* Remember, this is only the vec for the RPC header and does not
+ * include the payload above. We needed the payload only to calculate
+ * the size of the full fragment. This size is sent in the fragment
+ * header.
+ */
+ txrecord.iov_len = replyhdr.iov_len;
err:
- return txrecord;
+ return txrecord;
}
-inline int
-rpcsvc_get_callid (rpcsvc_t *rpc)
+static uint32_t
+rpc_callback_new_callid(struct rpc_transport *trans)
{
- return GF_UNIVERSAL_ANSWER;
+ uint32_t callid = 0;
+
+ pthread_mutex_lock(&trans->lock);
+ {
+ callid = ++trans->xid;
+ }
+ pthread_mutex_unlock(&trans->lock);
+
+ return callid;
}
int
-rpcsvc_fill_callback (int prognum, int progver, int procnum, int payload,
- uint64_t xid, struct rpc_msg *request)
+rpcsvc_fill_callback(int prognum, int progver, int procnum, int payload,
+ uint32_t xid, struct rpc_msg *request)
{
- int ret = -1;
+ int ret = -1;
- if (!request) {
- goto out;
- }
+ if (!request) {
+ goto out;
+ }
- memset (request, 0, sizeof (*request));
+ memset(request, 0, sizeof(*request));
- request->rm_xid = xid;
- request->rm_direction = CALL;
+ request->rm_xid = xid;
+ request->rm_direction = CALL;
- request->rm_call.cb_rpcvers = 2;
- request->rm_call.cb_prog = prognum;
- request->rm_call.cb_vers = progver;
- request->rm_call.cb_proc = procnum;
+ request->rm_call.cb_rpcvers = 2;
+ request->rm_call.cb_prog = prognum;
+ request->rm_call.cb_vers = progver;
+ request->rm_call.cb_proc = procnum;
- request->rm_call.cb_cred.oa_flavor = AUTH_NONE;
- request->rm_call.cb_cred.oa_base = NULL;
- request->rm_call.cb_cred.oa_length = 0;
+ request->rm_call.cb_cred.oa_flavor = AUTH_NONE;
+ request->rm_call.cb_cred.oa_base = NULL;
+ request->rm_call.cb_cred.oa_length = 0;
- request->rm_call.cb_verf.oa_flavor = AUTH_NONE;
- request->rm_call.cb_verf.oa_base = NULL;
- request->rm_call.cb_verf.oa_length = 0;
+ request->rm_call.cb_verf.oa_flavor = AUTH_NONE;
+ request->rm_call.cb_verf.oa_base = NULL;
+ request->rm_call.cb_verf.oa_length = 0;
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
struct iovec
-rpcsvc_callback_build_header (char *recordstart, size_t rlen,
+rpcsvc_callback_build_header(char *recordstart, size_t rlen,
struct rpc_msg *request, size_t payload)
{
- struct iovec requesthdr = {0, };
- struct iovec txrecord = {0, 0};
- int ret = -1;
- size_t fraglen = 0;
-
- ret = rpc_request_to_xdr (request, recordstart, rlen, &requesthdr);
- if (ret == -1) {
- gf_log ("rpcsvc", GF_LOG_WARNING,
- "Failed to create RPC request");
- goto out;
- }
-
- fraglen = payload + requesthdr.iov_len;
- gf_log ("rpcsvc", GF_LOG_TRACE, "Request fraglen %zu, payload: %zu, "
- "rpc hdr: %zu", fraglen, payload, requesthdr.iov_len);
-
- txrecord.iov_base = recordstart;
-
- /* Remember, this is only the vec for the RPC header and does not
- * include the payload above. We needed the payload only to calculate
- * the size of the full fragment. This size is sent in the fragment
- * header.
- */
- txrecord.iov_len = requesthdr.iov_len;
+ struct iovec requesthdr = {
+ 0,
+ };
+ struct iovec txrecord = {0, 0};
+ int ret = -1;
+ size_t fraglen = 0;
+
+ ret = rpc_request_to_xdr(request, recordstart, rlen, &requesthdr);
+ if (ret == -1) {
+ gf_log("rpcsvc", GF_LOG_WARNING, "Failed to create RPC request");
+ goto out;
+ }
+
+ fraglen = payload + requesthdr.iov_len;
+ gf_log("rpcsvc", GF_LOG_TRACE,
+ "Request fraglen %zu, payload: %zu, "
+ "rpc hdr: %zu",
+ fraglen, payload, requesthdr.iov_len);
+
+ txrecord.iov_base = recordstart;
+
+ /* Remember, this is only the vec for the RPC header and does not
+ * include the payload above. We needed the payload only to calculate
+ * the size of the full fragment. This size is sent in the fragment
+ * header.
+ */
+ txrecord.iov_len = requesthdr.iov_len;
out:
- return txrecord;
+ return txrecord;
}
-struct iobuf *
-rpcsvc_callback_build_record (rpcsvc_t *rpc, int prognum, int progver,
- int procnum, size_t payload, uint64_t xid,
- struct iovec *recbuf)
-{
- struct rpc_msg request = {0, };
- struct iobuf *request_iob = NULL;
- char *record = NULL;
- struct iovec recordhdr = {0, };
- size_t pagesize = 0;
- size_t xdr_size = 0;
- int ret = -1;
-
- if ((!rpc) || (!recbuf)) {
- goto out;
- }
-
- /* Fill the rpc structure and XDR it into the buffer got above. */
- ret = rpcsvc_fill_callback (prognum, progver, procnum, payload, xid,
- &request);
- if (ret == -1) {
- gf_log ("rpcsvc", GF_LOG_WARNING, "cannot build a rpc-request "
- "xid (%"PRIu64")", xid);
- goto out;
- }
-
- /* First, try to get a pointer into the buffer which the RPC
- * layer can use.
- */
- xdr_size = xdr_sizeof ((xdrproc_t)xdr_callmsg, &request);
+static struct iobuf *
+rpcsvc_callback_build_record(rpcsvc_t *rpc, int prognum, int progver,
+ int procnum, size_t payload, u_long xid,
+ struct iovec *recbuf)
+{
+ struct rpc_msg request = {
+ 0,
+ };
+ struct iobuf *request_iob = NULL;
+ char *record = NULL;
+ struct iovec recordhdr = {
+ 0,
+ };
+ size_t pagesize = 0;
+ size_t xdr_size = 0;
+ int ret = -1;
+
+ if ((!rpc) || (!recbuf)) {
+ goto out;
+ }
+
+ /* Fill the rpc structure and XDR it into the buffer got above. */
+ ret = rpcsvc_fill_callback(prognum, progver, procnum, payload, xid,
+ &request);
+ if (ret == -1) {
+ gf_log("rpcsvc", GF_LOG_WARNING,
+ "cannot build a rpc-request "
+ "xid (%lu)",
+ xid);
+ goto out;
+ }
+
+ /* First, try to get a pointer into the buffer which the RPC
+ * layer can use.
+ */
+ xdr_size = xdr_sizeof((xdrproc_t)xdr_callmsg, &request);
+
+ request_iob = iobuf_get2(rpc->ctx->iobuf_pool, (xdr_size + payload));
+ if (!request_iob) {
+ goto out;
+ }
+
+ pagesize = iobuf_pagesize(request_iob);
+
+ record = iobuf_ptr(request_iob); /* Now we have it. */
+
+ recordhdr = rpcsvc_callback_build_header(record, pagesize, &request,
+ payload);
+
+ if (!recordhdr.iov_base) {
+ gf_log("rpc-clnt", GF_LOG_ERROR,
+ "Failed to build record "
+ " header");
+ iobuf_unref(request_iob);
+ request_iob = NULL;
+ recbuf->iov_base = NULL;
+ goto out;
+ }
+
+ recbuf->iov_base = recordhdr.iov_base;
+ recbuf->iov_len = recordhdr.iov_len;
- request_iob = iobuf_get2 (rpc->ctx->iobuf_pool, (xdr_size + payload));
- if (!request_iob) {
- goto out;
- }
+out:
+ return request_iob;
+}
- pagesize = iobuf_pagesize (request_iob);
+int
+rpcsvc_request_submit(rpcsvc_t *rpc, rpc_transport_t *trans,
+ rpcsvc_cbk_program_t *prog, int procnum, void *req,
+ glusterfs_ctx_t *ctx, xdrproc_t xdrproc)
+{
+ int ret = -1;
+ int count = 0;
+ struct iovec iov = {
+ 0,
+ };
+ struct iobuf *iobuf = NULL;
+ ssize_t xdr_size = 0;
+ struct iobref *iobref = NULL;
+
+ if (!req)
+ goto out;
+
+ xdr_size = xdr_sizeof(xdrproc, req);
+
+ iobuf = iobuf_get2(ctx->iobuf_pool, xdr_size);
+ if (!iobuf)
+ goto out;
+
+ iov.iov_base = iobuf->ptr;
+ iov.iov_len = iobuf_pagesize(iobuf);
+
+ ret = xdr_serialize_generic(iov, req, xdrproc);
+ if (ret == -1) {
+ gf_log(THIS->name, GF_LOG_WARNING, "failed to create XDR payload");
+ goto out;
+ }
+ iov.iov_len = ret;
+ count = 1;
+
+ iobref = iobref_new();
+ if (!iobref) {
+ ret = -1;
+ gf_log("rpcsvc", GF_LOG_WARNING, "Failed to create iobref");
+ goto out;
+ }
- record = iobuf_ptr (request_iob); /* Now we have it. */
+ iobref_add(iobref, iobuf);
- recordhdr = rpcsvc_callback_build_header (record, pagesize, &request,
- payload);
+ ret = rpcsvc_callback_submit(rpc, trans, prog, procnum, &iov, count,
+ iobref);
- if (!recordhdr.iov_base) {
- gf_log ("rpc-clnt", GF_LOG_ERROR, "Failed to build record "
- " header");
- iobuf_unref (request_iob);
- request_iob = NULL;
- recbuf->iov_base = NULL;
- goto out;
- }
+out:
+ if (iobuf)
+ iobuf_unref(iobuf);
- recbuf->iov_base = recordhdr.iov_base;
- recbuf->iov_len = recordhdr.iov_len;
+ if (iobref)
+ iobref_unref(iobref);
-out:
- return request_iob;
+ return ret;
}
int
-rpcsvc_callback_submit (rpcsvc_t *rpc, rpc_transport_t *trans,
- rpcsvc_cbk_program_t *prog, int procnum,
- struct iovec *proghdr, int proghdrcount)
-{
- struct iobuf *request_iob = NULL;
- struct iovec rpchdr = {0,};
- rpc_transport_req_t req;
- int ret = -1;
- int proglen = 0;
- uint64_t callid = 0;
-
- if (!rpc) {
- goto out;
- }
-
- memset (&req, 0, sizeof (req));
-
- callid = rpcsvc_get_callid (rpc);
-
- if (proghdr) {
- proglen += iov_length (proghdr, proghdrcount);
+rpcsvc_callback_submit(rpcsvc_t *rpc, rpc_transport_t *trans,
+ rpcsvc_cbk_program_t *prog, int procnum,
+ struct iovec *proghdr, int proghdrcount,
+ struct iobref *iobref)
+{
+ struct iobuf *request_iob = NULL;
+ struct iovec rpchdr = {
+ 0,
+ };
+ rpc_transport_req_t req;
+ int ret = -1;
+ int proglen = 0;
+ uint32_t xid = 0;
+ gf_boolean_t new_iobref = _gf_false;
+
+ if (!rpc) {
+ goto out;
+ }
+
+ memset(&req, 0, sizeof(req));
+
+ if (proghdr) {
+ proglen += iov_length(proghdr, proghdrcount);
+ }
+
+ xid = rpc_callback_new_callid(trans);
+
+ request_iob = rpcsvc_callback_build_record(
+ rpc, prog->prognum, prog->progver, procnum, proglen, xid, &rpchdr);
+ if (!request_iob) {
+ gf_log("rpcsvc", GF_LOG_WARNING, "cannot build rpc-record");
+ goto out;
+ }
+ if (!iobref) {
+ iobref = iobref_new();
+ if (!iobref) {
+ gf_log("rpcsvc", GF_LOG_WARNING, "Failed to create iobref");
+ goto out;
}
+ new_iobref = 1;
+ }
- request_iob = rpcsvc_callback_build_record (rpc, prog->prognum,
- prog->progver, procnum,
- proglen, callid,
- &rpchdr);
- if (!request_iob) {
- gf_log ("rpcsvc", GF_LOG_WARNING,
- "cannot build rpc-record");
- goto out;
- }
+ iobref_add(iobref, request_iob);
- req.msg.rpchdr = &rpchdr;
- req.msg.rpchdrcount = 1;
- req.msg.proghdr = proghdr;
- req.msg.proghdrcount = proghdrcount;
+ req.msg.rpchdr = &rpchdr;
+ req.msg.rpchdrcount = 1;
+ req.msg.proghdr = proghdr;
+ req.msg.proghdrcount = proghdrcount;
+ req.msg.iobref = iobref;
- ret = rpc_transport_submit_request (trans, &req);
- if (ret == -1) {
- gf_log ("rpcsvc", GF_LOG_WARNING,
- "transmission of rpc-request failed");
- goto out;
- }
+ ret = rpc_transport_submit_request(trans, &req);
+ if (ret == -1) {
+ gf_log("rpcsvc", GF_LOG_WARNING, "transmission of rpc-request failed");
+ goto out;
+ }
- ret = 0;
+ ret = 0;
out:
- iobuf_unref (request_iob);
+ iobuf_unref(request_iob);
- return ret;
+ if (new_iobref)
+ iobref_unref(iobref);
+
+ return ret;
}
-inline int
-rpcsvc_transport_submit (rpc_transport_t *trans, struct iovec *hdrvec,
- int hdrcount, struct iovec *proghdr, int proghdrcount,
- struct iovec *progpayload, int progpayloadcount,
- struct iobref *iobref, void *priv)
+int
+rpcsvc_transport_submit(rpc_transport_t *trans, struct iovec *rpchdr,
+ int rpchdrcount, struct iovec *proghdr,
+ int proghdrcount, struct iovec *progpayload,
+ int progpayloadcount, struct iobref *iobref, void *priv)
{
- int ret = -1;
- rpc_transport_reply_t reply = {{0, }};
-
- if ((!trans) || (!hdrvec) || (!hdrvec->iov_base)) {
- goto out;
- }
-
- reply.msg.rpchdr = hdrvec;
- reply.msg.rpchdrcount = hdrcount;
- reply.msg.proghdr = proghdr;
- reply.msg.proghdrcount = proghdrcount;
- reply.msg.progpayload = progpayload;
- reply.msg.progpayloadcount = progpayloadcount;
- reply.msg.iobref = iobref;
- reply.private = priv;
-
- ret = rpc_transport_submit_reply (trans, &reply);
+ int ret = -1;
+ rpc_transport_reply_t reply = {
+ 0,
+ };
+
+ if ((!trans) || (!rpchdr) || (!rpchdr->iov_base)) {
+ goto out;
+ }
+
+ reply.msg.rpchdr = rpchdr;
+ reply.msg.rpchdrcount = rpchdrcount;
+ reply.msg.proghdr = proghdr;
+ reply.msg.proghdrcount = proghdrcount;
+ reply.msg.progpayload = progpayload;
+ reply.msg.progpayloadcount = progpayloadcount;
+ reply.msg.iobref = iobref;
+ reply.private = priv;
+
+ ret = rpc_transport_submit_reply(trans, &reply);
out:
- return ret;
+ return ret;
}
-
int
-rpcsvc_fill_reply (rpcsvc_request_t *req, struct rpc_msg *reply)
+rpcsvc_fill_reply(rpcsvc_request_t *req, struct rpc_msg *reply)
{
- int ret = -1;
- rpcsvc_program_t *prog = NULL;
- if ((!req) || (!reply))
- goto out;
-
- ret = 0;
- rpc_fill_empty_reply (reply, req->xid);
- if (req->rpc_status == MSG_DENIED) {
- rpc_fill_denied_reply (reply, req->rpc_err, req->auth_err);
- goto out;
- }
-
- prog = rpcsvc_request_program (req);
-
- if (req->rpc_status == MSG_ACCEPTED)
- rpc_fill_accepted_reply (reply, req->rpc_err,
- (prog) ? prog->proglowvers : 0,
- (prog) ? prog->proghighvers: 0,
- req->verf.flavour, req->verf.datalen,
- req->verf.authdata);
- else
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Invalid rpc_status value");
+ int ret = -1;
+ rpcsvc_program_t *prog = NULL;
+ if ((!req) || (!reply))
+ goto out;
+
+ ret = 0;
+ rpc_fill_empty_reply(reply, req->xid);
+ if (req->rpc_status == MSG_DENIED) {
+ rpc_fill_denied_reply(reply, req->rpc_err, req->auth_err);
+ goto out;
+ }
+
+ prog = rpcsvc_request_program(req);
+
+ if (req->rpc_status == MSG_ACCEPTED)
+ rpc_fill_accepted_reply(
+ reply, req->rpc_err, (prog) ? prog->proglowvers : 0,
+ (prog) ? prog->proghighvers : 0, req->verf.flavour,
+ req->verf.datalen, req->verf.authdata);
+ else
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "Invalid rpc_status value");
out:
- return ret;
+ return ret;
}
-
/* Given a request and the reply payload, build a reply and encodes the reply
* into a record header. This record header is encoded into the vector pointed
* to be recbuf.
@@ -942,59 +1400,60 @@ out:
* we should account for the length of that buffer in the RPC fragment header.
*/
struct iobuf *
-rpcsvc_record_build_record (rpcsvc_request_t *req, size_t payload,
- size_t hdrlen, struct iovec *recbuf)
+rpcsvc_record_build_record(rpcsvc_request_t *req, size_t payload, size_t hdrlen,
+ struct iovec *recbuf)
{
- struct rpc_msg reply;
- struct iobuf *replyiob = NULL;
- char *record = NULL;
- struct iovec recordhdr = {0, };
- size_t pagesize = 0;
- size_t xdr_size = 0;
- rpcsvc_t *svc = NULL;
- int ret = -1;
-
- if ((!req) || (!req->trans) || (!req->svc) || (!recbuf))
- return NULL;
-
- svc = req->svc;
-
- /* Fill the rpc structure and XDR it into the buffer got above. */
- ret = rpcsvc_fill_reply (req, &reply);
- if (ret)
- goto err_exit;
-
- xdr_size = xdr_sizeof ((xdrproc_t)xdr_replymsg, &reply);
-
- /* Payload would include 'readv' size etc too, where as
- that comes as another payload iobuf */
- replyiob = iobuf_get2 (svc->ctx->iobuf_pool, (xdr_size + hdrlen));
- if (!replyiob) {
- goto err_exit;
- }
-
- pagesize = iobuf_pagesize (replyiob);
-
- record = iobuf_ptr (replyiob); /* Now we have it. */
-
- recordhdr = rpcsvc_record_build_header (record, pagesize, reply,
- payload);
- if (!recordhdr.iov_base) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to build record "
- " header");
- iobuf_unref (replyiob);
- replyiob = NULL;
- recbuf->iov_base = NULL;
- goto err_exit;
- }
-
- recbuf->iov_base = recordhdr.iov_base;
- recbuf->iov_len = recordhdr.iov_len;
+ struct rpc_msg reply;
+ struct iobuf *replyiob = NULL;
+ char *record = NULL;
+ struct iovec recordhdr = {
+ 0,
+ };
+ size_t pagesize = 0;
+ size_t xdr_size = 0;
+ rpcsvc_t *svc = NULL;
+ int ret = -1;
+
+ if ((!req) || (!req->trans) || (!req->svc) || (!recbuf))
+ return NULL;
+
+ svc = req->svc;
+
+ /* Fill the rpc structure and XDR it into the buffer got above. */
+ ret = rpcsvc_fill_reply(req, &reply);
+ if (ret)
+ goto err_exit;
+
+ xdr_size = xdr_sizeof((xdrproc_t)xdr_replymsg, &reply);
+
+ /* Payload would include 'readv' size etc too, where as
+ that comes as another payload iobuf */
+ replyiob = iobuf_get2(svc->ctx->iobuf_pool, (xdr_size + hdrlen));
+ if (!replyiob) {
+ goto err_exit;
+ }
+
+ pagesize = iobuf_pagesize(replyiob);
+
+ record = iobuf_ptr(replyiob); /* Now we have it. */
+
+ recordhdr = rpcsvc_record_build_header(record, pagesize, reply, payload);
+ if (!recordhdr.iov_base) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR,
+ "Failed to build record "
+ " header");
+ iobuf_unref(replyiob);
+ replyiob = NULL;
+ recbuf->iov_base = NULL;
+ goto err_exit;
+ }
+
+ recbuf->iov_base = recordhdr.iov_base;
+ recbuf->iov_len = recordhdr.iov_len;
err_exit:
- return replyiob;
+ return replyiob;
}
-
/*
* The function to submit a program message to the RPC service.
* This message is added to the transmission queue of the
@@ -1022,231 +1481,344 @@ err_exit:
*/
int
-rpcsvc_submit_generic (rpcsvc_request_t *req, struct iovec *proghdr,
- int hdrcount, struct iovec *payload, int payloadcount,
- struct iobref *iobref)
+rpcsvc_submit_generic(rpcsvc_request_t *req, struct iovec *proghdr,
+ int hdrcount, struct iovec *payload, int payloadcount,
+ struct iobref *iobref)
{
- int ret = -1, i = 0;
- struct iobuf *replyiob = NULL;
- struct iovec recordhdr = {0, };
- rpc_transport_t *trans = NULL;
- size_t msglen = 0;
- size_t hdrlen = 0;
- char new_iobref = 0;
-
- if ((!req) || (!req->trans))
- return -1;
-
- trans = req->trans;
-
- for (i = 0; i < hdrcount; i++) {
- msglen += proghdr[i].iov_len;
- }
-
- for (i = 0; i < payloadcount; i++) {
- msglen += payload[i].iov_len;
- }
-
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "Tx message: %zu", msglen);
-
- /* Build the buffer containing the encoded RPC reply. */
- replyiob = rpcsvc_record_build_record (req, msglen, hdrlen, &recordhdr);
- if (!replyiob) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR,"Reply record creation failed");
- goto disconnect_exit;
- }
-
+ int ret = -1, i = 0;
+ struct iobuf *replyiob = NULL;
+ struct iovec recordhdr = {
+ 0,
+ };
+ rpc_transport_t *trans = NULL;
+ size_t msglen = 0;
+ size_t hdrlen = 0;
+ char new_iobref = 0;
+ rpcsvc_drc_globals_t *drc = NULL;
+ gf_latency_t *lat = NULL;
+
+ if ((!req) || (!req->trans))
+ return -1;
+
+ if (req->prog && req->begin.tv_sec) {
+ if ((req->procnum >= 0) && (req->procnum < req->prog->numactors)) {
+ timespec_now(&req->end);
+ lat = &req->prog->latencies[req->procnum];
+ gf_latency_update(lat, &req->begin, &req->end);
+ }
+ }
+ trans = req->trans;
+
+ for (i = 0; i < hdrcount; i++) {
+ msglen += proghdr[i].iov_len;
+ }
+
+ for (i = 0; i < payloadcount; i++) {
+ msglen += payload[i].iov_len;
+ }
+
+ gf_log(GF_RPCSVC, GF_LOG_TRACE, "Tx message: %zu", msglen);
+
+ /* Build the buffer containing the encoded RPC reply. */
+ replyiob = rpcsvc_record_build_record(req, msglen, hdrlen, &recordhdr);
+ if (!replyiob) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "Reply record creation failed");
+ goto disconnect_exit;
+ }
+
+ if (!iobref) {
+ iobref = iobref_new();
if (!iobref) {
- iobref = iobref_new ();
- if (!iobref) {
- goto disconnect_exit;
- }
-
- new_iobref = 1;
+ goto disconnect_exit;
}
- iobref_add (iobref, replyiob);
+ new_iobref = 1;
+ }
- ret = rpcsvc_transport_submit (trans, &recordhdr, 1, proghdr, hdrcount,
- payload, payloadcount, iobref,
- req->trans_private);
+ iobref_add(iobref, replyiob);
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "failed to submit message "
- "(XID: 0x%ux, Program: %s, ProgVers: %d, Proc: %d) to "
- "rpc-transport (%s)", req->xid,
- req->prog ? req->prog->progname : "(not matched)",
- req->prog ? req->prog->progver : 0,
- req->procnum, trans->name);
- } else {
- gf_log (GF_RPCSVC, GF_LOG_TRACE,
- "submitted reply for rpc-message (XID: 0x%ux, "
- "Program: %s, ProgVers: %d, Proc: %d) to rpc-transport "
- "(%s)", req->xid, req->prog ? req->prog->progname: "-",
- req->prog ? req->prog->progver : 0,
- req->procnum, trans->name);
- }
+ /* cache the request in the duplicate request cache for appropriate ops */
+ if ((req->reply) && (rpcsvc_need_drc(req))) {
+ drc = req->svc->drc;
+
+ LOCK(&drc->lock);
+ ret = rpcsvc_cache_reply(req, iobref, &recordhdr, 1, proghdr, hdrcount,
+ payload, payloadcount);
+ UNLOCK(&drc->lock);
+ if (ret < 0) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "failed to cache reply");
+ }
+ }
+
+ ret = rpcsvc_transport_submit(trans, &recordhdr, 1, proghdr, hdrcount,
+ payload, payloadcount, iobref,
+ req->trans_private);
+
+ if (ret == -1) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR,
+ "failed to submit message "
+ "(XID: 0x%x, Program: %s, ProgVers: %d, Proc: %d) to "
+ "rpc-transport (%s)",
+ req->xid, req->prog ? req->prog->progname : "(not matched)",
+ req->prog ? req->prog->progver : 0, req->procnum,
+ trans ? trans->name : "");
+ } else {
+ gf_log(GF_RPCSVC, GF_LOG_TRACE,
+ "submitted reply for rpc-message (XID: 0x%x, "
+ "Program: %s, ProgVers: %d, Proc: %d) to rpc-transport "
+ "(%s)",
+ req->xid, req->prog ? req->prog->progname : "-",
+ req->prog ? req->prog->progver : 0, req->procnum,
+ trans ? trans->name : "");
+ }
disconnect_exit:
- if (replyiob) {
- iobuf_unref (replyiob);
- }
+ if (replyiob) {
+ iobuf_unref(replyiob);
+ }
- if (new_iobref) {
- iobref_unref (iobref);
- }
+ if (new_iobref) {
+ iobref_unref(iobref);
+ }
- rpcsvc_request_destroy (req);
+ rpcsvc_request_destroy(req);
- return ret;
+ return ret;
}
-
int
-rpcsvc_error_reply (rpcsvc_request_t *req)
+rpcsvc_error_reply(rpcsvc_request_t *req)
{
- struct iovec dummyvec = {0, };
+ struct iovec dummyvec = {
+ 0,
+ };
- if (!req)
- return -1;
+ if (!req)
+ return -1;
- gf_log_callingfn ("", GF_LOG_DEBUG, "sending a RPC error reply");
+ gf_log_callingfn("", GF_LOG_DEBUG, "sending a RPC error reply");
- /* At this point the req should already have been filled with the
- * appropriate RPC error numbers.
- */
- return rpcsvc_submit_generic (req, &dummyvec, 0, NULL, 0, NULL);
+ /* At this point the req should already have been filled with the
+ * appropriate RPC error numbers.
+ */
+ return rpcsvc_submit_generic(req, &dummyvec, 0, NULL, 0, NULL);
}
-
-/* Register the program with the local portmapper service. */
-inline int
-rpcsvc_program_register_portmap (rpcsvc_program_t *newprog, uint32_t port)
+#ifdef IPV6_DEFAULT
+int
+rpcsvc_program_register_rpcbind6(rpcsvc_program_t *newprog, uint32_t port)
{
- int ret = 0;
+ const int IP_BUF_LEN = 64;
+ char addr_buf[IP_BUF_LEN];
+
+ int err = 0;
+ bool_t success = 0;
+ struct netconfig *nc;
+ struct netbuf *nb;
+
+ if (!newprog) {
+ goto out;
+ }
+
+ nc = getnetconfigent("tcp6");
+ if (!nc) {
+ err = -1;
+ goto out;
+ }
+
+ err = sprintf(addr_buf, "::.%d.%d", port >> 8 & 0xff, port & 0xff);
+ if (err < 0) {
+ err = -1;
+ goto out;
+ }
+
+ nb = uaddr2taddr(nc, addr_buf);
+ if (!nb) {
+ err = -1;
+ goto out;
+ }
+
+ /* Force the unregistration of the program first.
+ * This call may fail if nothing has been registered,
+ * which is fine.
+ */
+ rpcsvc_program_unregister_rpcbind6(newprog);
+
+ success = rpcb_set(newprog->prognum, newprog->progver, nc, nb);
+ if (!success) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR,
+ "Could not register the IPv6"
+ " service with rpcbind");
+ }
+
+ err = 0;
- if (!newprog) {
- goto out;
- }
-
- if (!(pmap_set (newprog->prognum, newprog->progver, IPPROTO_TCP,
- port))) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Could not register with"
- " portmap");
- goto out;
- }
+out:
+ return err;
+}
- ret = 0;
+int
+rpcsvc_program_unregister_rpcbind6(rpcsvc_program_t *newprog)
+{
+ int err = 0;
+ bool_t success = 0;
+ struct netconfig *nc;
+
+ if (!newprog) {
+ goto out;
+ }
+
+ nc = getnetconfigent("tcp6");
+ if (!nc) {
+ err = -1;
+ goto out;
+ }
+
+ success = rpcb_unset(newprog->prognum, newprog->progver, nc);
+ if (!success) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR,
+ "Could not unregister the IPv6"
+ " service with rpcbind");
+ }
+
+ err = 0;
out:
- return ret;
+ return err;
}
+#endif
+/* Register the program with the local portmapper service. */
+int
+rpcsvc_program_register_portmap(rpcsvc_program_t *newprog, uint32_t port)
+{
+ int ret = -1; /* FAIL */
+
+ if (!newprog) {
+ goto out;
+ }
+
+ /* pmap_set() returns 0 for FAIL and 1 for SUCCESS */
+ if (!(pmap_set(newprog->prognum, newprog->progver, IPPROTO_TCP, port))) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR,
+ "Could not register with"
+ " portmap %d %d %u",
+ newprog->prognum, newprog->progver, port);
+ goto out;
+ }
+
+ ret = 0; /* SUCCESS */
+out:
+ return ret;
+}
-inline int
-rpcsvc_program_unregister_portmap (rpcsvc_program_t *prog)
+int
+rpcsvc_program_unregister_portmap(rpcsvc_program_t *prog)
{
- int ret = 0;
+ int ret = -1;
- if (!prog)
- goto out;
+ if (!prog)
+ goto out;
- if (!(pmap_unset(prog->prognum, prog->progver))) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Could not unregister with"
- " portmap");
- goto out;
- }
+ if (!(pmap_unset(prog->prognum, prog->progver))) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR,
+ "Could not unregister with"
+ " portmap");
+ goto out;
+ }
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
int
-rpcsvc_register_portmap_enabled (rpcsvc_t *svc)
+rpcsvc_register_portmap_enabled(rpcsvc_t *svc)
{
- return svc->register_portmap;
+ return svc->register_portmap;
}
int32_t
-rpcsvc_get_listener_port (rpcsvc_listener_t *listener)
+rpcsvc_get_listener_port(rpcsvc_listener_t *listener)
{
- int32_t listener_port = -1;
+ int32_t listener_port = -1;
- if ((listener == NULL) || (listener->trans == NULL)) {
- goto out;
- }
+ if ((listener == NULL) || (listener->trans == NULL)) {
+ goto out;
+ }
- switch (listener->trans->myinfo.sockaddr.ss_family) {
+ switch (listener->trans->myinfo.sockaddr.ss_family) {
case AF_INET:
- listener_port = ((struct sockaddr_in *)&listener->trans->myinfo.sockaddr)->sin_port;
- break;
+ listener_port = ((struct sockaddr_in *)&listener->trans->myinfo
+ .sockaddr)
+ ->sin_port;
+ break;
case AF_INET6:
- listener_port = ((struct sockaddr_in6 *)&listener->trans->myinfo.sockaddr)->sin6_port;
- break;
+ listener_port = ((struct sockaddr_in6 *)&listener->trans->myinfo
+ .sockaddr)
+ ->sin6_port;
+ break;
default:
- gf_log (GF_RPCSVC, GF_LOG_DEBUG,
- "invalid address family (%d)",
- listener->trans->myinfo.sockaddr.ss_family);
- goto out;
- }
+ gf_log(GF_RPCSVC, GF_LOG_DEBUG, "invalid address family (%d)",
+ listener->trans->myinfo.sockaddr.ss_family);
+ goto out;
+ }
- listener_port = ntohs (listener_port);
+ listener_port = ntohs(listener_port);
out:
- return listener_port;
+ return listener_port;
}
-
rpcsvc_listener_t *
-rpcsvc_get_listener (rpcsvc_t *svc, uint16_t port, rpc_transport_t *trans)
+rpcsvc_get_listener(rpcsvc_t *svc, uint16_t port, rpc_transport_t *trans)
{
- rpcsvc_listener_t *listener = NULL;
- char found = 0;
- uint32_t listener_port = 0;
-
- if (!svc) {
- goto out;
- }
-
- pthread_mutex_lock (&svc->rpclock);
+ rpcsvc_listener_t *listener = NULL;
+ char found = 0;
+ rpcsvc_listener_t *next = NULL;
+ uint32_t listener_port = 0;
+
+ if (!svc) {
+ goto out;
+ }
+
+ pthread_rwlock_rdlock(&svc->rpclock);
+ {
+ list_for_each_entry_safe(listener, next, &svc->listeners, list)
{
- list_for_each_entry (listener, &svc->listeners, list) {
- if (trans != NULL) {
- if (listener->trans == trans) {
- found = 1;
- break;
- }
-
- continue;
- }
+ if (trans != NULL) {
+ if (listener->trans == trans) {
+ found = 1;
+ break;
+ }
- listener_port = rpcsvc_get_listener_port (listener);
- if (listener_port == -1) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR,
- "invalid port for listener %s",
- listener->trans->name);
- continue;
- }
+ continue;
+ }
- if (listener_port == port) {
- found = 1;
- break;
- }
- }
- }
- pthread_mutex_unlock (&svc->rpclock);
+ listener_port = rpcsvc_get_listener_port(listener);
+ if (listener_port == -1) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "invalid port for listener %s",
+ listener->trans->name);
+ continue;
+ }
- if (!found) {
- listener = NULL;
+ if (listener_port == port) {
+ found = 1;
+ break;
+ }
}
+ }
+ pthread_rwlock_unlock(&svc->rpclock);
+
+ if (!found) {
+ listener = NULL;
+ }
out:
- return listener;
+ return listener;
}
-
/* The only difference between the generic submit and this one is that the
* generic submit is also used for submitting RPC error replies in where there
* are no payloads so the msgvec and msgbuf can be NULL.
@@ -1254,1195 +1826,1470 @@ out:
* we must perform NULL checks before calling the generic submit.
*/
int
-rpcsvc_submit_message (rpcsvc_request_t *req, struct iovec *proghdr,
- int hdrcount, struct iovec *payload, int payloadcount,
- struct iobref *iobref)
+rpcsvc_submit_message(rpcsvc_request_t *req, struct iovec *proghdr,
+ int hdrcount, struct iovec *payload, int payloadcount,
+ struct iobref *iobref)
{
- if ((!req) || (!req->trans) || (!proghdr) || (!proghdr->iov_base))
- return -1;
+ if ((!req) || (!req->trans) || (!proghdr) || (!proghdr->iov_base))
+ return -1;
- return rpcsvc_submit_generic (req, proghdr, hdrcount, payload,
- payloadcount, iobref);
+ return rpcsvc_submit_generic(req, proghdr, hdrcount, payload, payloadcount,
+ iobref);
}
+void
+rpcsvc_program_destroy(rpcsvc_program_t *program)
+{
+ if (program) {
+ GF_FREE(program->latencies);
+ GF_FREE(program);
+ }
+}
int
-rpcsvc_program_unregister (rpcsvc_t *svc, rpcsvc_program_t *program)
+rpcsvc_program_unregister(rpcsvc_t *svc, rpcsvc_program_t *program)
{
- int ret = -1;
- rpcsvc_program_t *prog = NULL;
- if (!svc || !program) {
- goto out;
- }
-
- ret = rpcsvc_program_unregister_portmap (program);
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "portmap unregistration of"
- " program failed");
- goto out;
- }
-
- pthread_mutex_lock (&svc->rpclock);
+ int ret = -1;
+ rpcsvc_program_t *prog = NULL;
+ if (!svc || !program) {
+ goto out;
+ }
+
+ pthread_rwlock_rdlock(&svc->rpclock);
+ {
+ list_for_each_entry(prog, &svc->programs, program)
{
- list_for_each_entry (prog, &svc->programs, program) {
- if ((prog->prognum == program->prognum)
- && (prog->progver == program->progver)) {
- break;
- }
- }
- }
- pthread_mutex_unlock (&svc->rpclock);
-
- if (prog == NULL) {
- ret = -1;
- goto out;
- }
-
- gf_log (GF_RPCSVC, GF_LOG_DEBUG, "Program unregistered: %s, Num: %d,"
- " Ver: %d, Port: %d", prog->progname, prog->prognum,
- prog->progver, prog->progport);
+ if ((prog->prognum == program->prognum) &&
+ (prog->progver == program->progver)) {
+ break;
+ }
+ }
+ }
+ pthread_rwlock_unlock(&svc->rpclock);
+
+ ret = rpcsvc_program_unregister_portmap(program);
+ if (ret == -1) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR,
+ "portmap unregistration of"
+ " program failed");
+ goto out;
+ }
+#ifdef IPV6_DEFAULT
+ ret = rpcsvc_program_unregister_rpcbind6(program);
+ if (ret == -1) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR,
+ "rpcbind (ipv6)"
+ " unregistration of program failed");
+ goto out;
+ }
+#endif
- pthread_mutex_lock (&svc->rpclock);
- {
- list_del_init (&prog->program);
- }
- pthread_mutex_unlock (&svc->rpclock);
+ gf_log(GF_RPCSVC, GF_LOG_DEBUG,
+ "Program unregistered: %s, Num: %d,"
+ " Ver: %d, Port: %d",
+ prog->progname, prog->prognum, prog->progver, prog->progport);
+ if (prog->ownthread) {
+ prog->alive = _gf_false;
ret = 0;
-out:
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Program unregistration failed"
- ": %s, Num: %d, Ver: %d, Port: %d", program->progname,
- program->prognum, program->progver, program->progport);
- }
-
- return ret;
-}
+ goto out;
+ }
+ pthread_rwlock_wrlock(&svc->rpclock);
+ {
+ list_del_init(&prog->program);
+ }
+ pthread_rwlock_unlock(&svc->rpclock);
-inline int
-rpcsvc_transport_peername (rpc_transport_t *trans, char *hostname, int hostlen)
-{
- if (!trans) {
- return -1;
+ ret = 0;
+out:
+ rpcsvc_program_destroy(prog);
+
+ if (ret == -1) {
+ if (program) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR,
+ "Program "
+ "unregistration failed"
+ ": %s, Num: %d, Ver: %d, Port: %d",
+ program->progname, program->prognum, program->progver,
+ program->progport);
+ } else {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "Program not found");
}
+ }
- return rpc_transport_get_peername (trans, hostname, hostlen);
+ return ret;
}
-
-inline int
-rpcsvc_transport_peeraddr (rpc_transport_t *trans, char *addrstr, int addrlen,
- struct sockaddr_storage *sa, socklen_t sasize)
+int
+rpcsvc_transport_peername(rpc_transport_t *trans, char *hostname, int hostlen)
{
- if (!trans) {
- return -1;
- }
+ if (!trans) {
+ return -1;
+ }
- return rpc_transport_get_peeraddr(trans, addrstr, addrlen, sa,
- sasize);
+ return rpc_transport_get_peername(trans, hostname, hostlen);
}
-
-rpc_transport_t *
-rpcsvc_transport_create (rpcsvc_t *svc, dict_t *options, char *name)
+int
+rpcsvc_transport_peeraddr(rpc_transport_t *trans, char *addrstr, int addrlen,
+ struct sockaddr_storage *sa, socklen_t sasize)
{
- int ret = -1;
- rpc_transport_t *trans = NULL;
-
- trans = rpc_transport_load (svc->ctx, options, name);
- if (!trans) {
- gf_log (GF_RPCSVC, GF_LOG_WARNING, "cannot create listener, "
- "initing the transport failed");
- goto out;
- }
-
- ret = rpc_transport_listen (trans);
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_WARNING,
- "listening on transport failed");
- goto out;
- }
+ if (!trans) {
+ return -1;
+ }
- ret = rpc_transport_register_notify (trans, rpcsvc_notify, svc);
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_WARNING, "registering notify failed");
- goto out;
- }
-
- ret = 0;
-out:
- if ((ret == -1) && (trans)) {
- rpc_transport_disconnect (trans);
- trans = NULL;
- }
-
- return trans;
+ return rpc_transport_get_peeraddr(trans, addrstr, addrlen, sa, sasize);
}
rpcsvc_listener_t *
-rpcsvc_listener_alloc (rpcsvc_t *svc, rpc_transport_t *trans)
+rpcsvc_listener_alloc(rpcsvc_t *svc, rpc_transport_t *trans)
{
- rpcsvc_listener_t *listener = NULL;
+ rpcsvc_listener_t *listener = NULL;
- listener = GF_CALLOC (1, sizeof (*listener),
- gf_common_mt_rpcsvc_listener_t);
- if (!listener) {
- goto out;
- }
+ listener = GF_CALLOC(1, sizeof(*listener), gf_common_mt_rpcsvc_listener_t);
+ if (!listener) {
+ goto out;
+ }
- listener->trans = trans;
- listener->svc = svc;
+ listener->trans = trans;
+ listener->svc = svc;
- INIT_LIST_HEAD (&listener->list);
+ INIT_LIST_HEAD(&listener->list);
- pthread_mutex_lock (&svc->rpclock);
- {
- list_add_tail (&listener->list, &svc->listeners);
- }
- pthread_mutex_unlock (&svc->rpclock);
+ pthread_rwlock_wrlock(&svc->rpclock);
+ {
+ list_add_tail(&listener->list, &svc->listeners);
+ }
+ pthread_rwlock_unlock(&svc->rpclock);
out:
- return listener;
+ return listener;
}
-
int32_t
-rpcsvc_create_listener (rpcsvc_t *svc, dict_t *options, char *name)
+rpcsvc_create_listener(rpcsvc_t *svc, dict_t *options, char *name)
{
- rpc_transport_t *trans = NULL;
- rpcsvc_listener_t *listener = NULL;
- int32_t ret = -1;
-
- if (!svc || !options) {
- goto out;
- }
-
- trans = rpcsvc_transport_create (svc, options, name);
- if (!trans) {
- /* LOG TODO */
- goto out;
- }
-
- listener = rpcsvc_listener_alloc (svc, trans);
- if (listener == NULL) {
- goto out;
- }
+ rpc_transport_t *trans = NULL;
+ rpcsvc_listener_t *listener = NULL;
+ int32_t ret = -1;
+
+ if (!svc || !options) {
+ goto out;
+ }
+
+ trans = rpc_transport_load(svc->ctx, options, name);
+ if (!trans) {
+ gf_log(GF_RPCSVC, GF_LOG_WARNING,
+ "cannot create listener, "
+ "initing the transport failed");
+ goto out;
+ }
+
+ ret = rpc_transport_listen(trans);
+ if (ret == -EADDRINUSE || ret == -1) {
+ gf_log(GF_RPCSVC, GF_LOG_WARNING, "listening on transport failed");
+ goto out;
+ }
+
+ ret = rpc_transport_register_notify(trans, rpcsvc_notify, svc);
+ if (ret == -1) {
+ gf_log(GF_RPCSVC, GF_LOG_WARNING, "registering notify failed");
+ goto out;
+ }
+
+ listener = rpcsvc_listener_alloc(svc, trans);
+ if (listener == NULL) {
+ ret = -1;
+ goto out;
+ }
- ret = 0;
+ ret = 0;
out:
- if (!listener && trans) {
- rpc_transport_disconnect (trans);
- }
+ if (!listener && trans) {
+ rpc_transport_disconnect(trans, _gf_true);
+ rpc_transport_cleanup(trans);
+ }
- return ret;
+ return ret;
}
-
int32_t
-rpcsvc_create_listeners (rpcsvc_t *svc, dict_t *options, char *name)
+rpcsvc_create_listeners(rpcsvc_t *svc, dict_t *options, char *name)
{
- int32_t ret = -1, count = 0;
- data_t *data = NULL;
- char *str = NULL, *ptr = NULL, *transport_name = NULL;
- char *transport_type = NULL, *saveptr = NULL, *tmp = NULL;
-
- if ((svc == NULL) || (options == NULL) || (name == NULL)) {
- goto out;
- }
-
- data = dict_get (options, "transport-type");
- if (data == NULL) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR,
- "option transport-type not set");
- goto out;
+ int32_t ret = -1, count = 0;
+ data_t *data = NULL;
+ char *str = NULL, *ptr = NULL, *transport_name = NULL;
+ char *transport_type = NULL, *saveptr = NULL, *tmp = NULL;
+
+ if ((svc == NULL) || (options == NULL) || (name == NULL)) {
+ goto out;
+ }
+
+ data = dict_get(options, "transport-type");
+ if (data == NULL) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "option transport-type not set");
+ goto out;
+ }
+
+ transport_type = data_to_str(data);
+ if (transport_type == NULL) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "option transport-type not set");
+ goto out;
+ }
+
+ /* duplicate transport_type, since following dict_set will free it */
+ transport_type = gf_strdup(transport_type);
+ if (transport_type == NULL) {
+ goto out;
+ }
+
+ str = gf_strdup(transport_type);
+ if (str == NULL) {
+ goto out;
+ }
+
+ ptr = strtok_r(str, ",", &saveptr);
+
+ while (ptr != NULL) {
+ tmp = gf_strdup(ptr);
+ if (tmp == NULL) {
+ goto out;
+ }
+
+ ret = gf_asprintf(&transport_name, "%s.%s", tmp, name);
+ if (ret == -1) {
+ goto out;
}
- transport_type = data_to_str (data);
- if (transport_type == NULL) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR,
- "option transport-type not set");
- goto out;
+ ret = dict_set_dynstr(options, "transport-type", tmp);
+ if (ret == -1) {
+ goto out;
}
- /* duplicate transport_type, since following dict_set will free it */
- transport_type = gf_strdup (transport_type);
- if (transport_type == NULL) {
- goto out;
- }
+ tmp = NULL;
+ ptr = strtok_r(NULL, ",", &saveptr);
- str = gf_strdup (transport_type);
- if (str == NULL) {
- goto out;
+ ret = rpcsvc_create_listener(svc, options, transport_name);
+ if (ret != 0) {
+ goto out;
}
- ptr = strtok_r (str, ",", &saveptr);
-
- while (ptr != NULL) {
- tmp = gf_strdup (ptr);
- if (tmp == NULL) {
- goto out;
- }
-
- ret = gf_asprintf (&transport_name, "%s.%s", tmp, name);
- if (ret == -1) {
- goto out;
- }
+ dict_del(options, "notify-poller-death");
+ GF_FREE(transport_name);
+ transport_name = NULL;
+ count++;
+ }
- ret = dict_set_dynstr (options, "transport-type", tmp);
- if (ret == -1) {
- goto out;
- }
-
- tmp = NULL;
- ptr = strtok_r (NULL, ",", &saveptr);
+ ret = dict_set_dynstr(options, "transport-type", transport_type);
+ if (ret == -1) {
+ goto out;
+ }
- ret = rpcsvc_create_listener (svc, options, transport_name);
- if (ret != 0) {
- goto out;
- }
-
- GF_FREE (transport_name);
- count++;
- }
-
- ret = dict_set_dynstr (options, "transport-type", transport_type);
- if (ret == -1) {
- goto out;
- }
-
- transport_type = NULL;
+ transport_type = NULL;
out:
- if (str != NULL) {
- GF_FREE (str);
- }
+ GF_FREE(str);
- if (transport_type != NULL) {
- GF_FREE (transport_type);
- }
+ GF_FREE(transport_type);
- if (tmp != NULL) {
- GF_FREE (tmp);
- }
+ GF_FREE(tmp);
+
+ GF_FREE(transport_name);
+ if (count > 0) {
return count;
+ } else {
+ return ret;
+ }
}
-
int
-rpcsvc_unregister_notify (rpcsvc_t *svc, rpcsvc_notify_t notify, void *mydata)
+rpcsvc_unregister_notify(rpcsvc_t *svc, rpcsvc_notify_t notify, void *mydata)
{
- rpcsvc_notify_wrapper_t *wrapper = NULL, *tmp = NULL;
- int ret = 0;
+ rpcsvc_notify_wrapper_t *wrapper = NULL, *tmp = NULL;
+ int ret = 0;
- if (!svc || !notify) {
- goto out;
- }
+ if (!svc || !notify) {
+ goto out;
+ }
- pthread_mutex_lock (&svc->rpclock);
+ pthread_rwlock_wrlock(&svc->rpclock);
+ {
+ list_for_each_entry_safe(wrapper, tmp, &svc->notify, list)
{
- list_for_each_entry_safe (wrapper, tmp, &svc->notify, list) {
- if ((wrapper->notify == notify)
- && (mydata == wrapper->data)) {
- list_del_init (&wrapper->list);
- GF_FREE (wrapper);
- ret++;
- }
- }
+ if ((wrapper->notify == notify) && (mydata == wrapper->data)) {
+ list_del_init(&wrapper->list);
+ GF_FREE(wrapper);
+ ret++;
+ }
}
- pthread_mutex_unlock (&svc->rpclock);
+ }
+ pthread_rwlock_unlock(&svc->rpclock);
out:
- return ret;
+ return ret;
}
int
-rpcsvc_register_notify (rpcsvc_t *svc, rpcsvc_notify_t notify, void *mydata)
+rpcsvc_register_notify(rpcsvc_t *svc, rpcsvc_notify_t notify, void *mydata)
{
- rpcsvc_notify_wrapper_t *wrapper = NULL;
- int ret = -1;
-
- wrapper = rpcsvc_notify_wrapper_alloc ();
- if (!wrapper) {
- goto out;
- }
- svc->mydata = mydata; /* this_xlator */
- wrapper->data = mydata;
- wrapper->notify = notify;
-
- pthread_mutex_lock (&svc->rpclock);
- {
- list_add_tail (&wrapper->list, &svc->notify);
- svc->notify_count++;
- }
- pthread_mutex_unlock (&svc->rpclock);
-
- ret = 0;
+ rpcsvc_notify_wrapper_t *wrapper = NULL;
+ int ret = -1;
+
+ wrapper = rpcsvc_notify_wrapper_alloc();
+ if (!wrapper) {
+ goto out;
+ }
+ svc->mydata = mydata;
+ wrapper->data = mydata;
+ wrapper->notify = notify;
+
+ pthread_rwlock_wrlock(&svc->rpclock);
+ {
+ list_add_tail(&wrapper->list, &svc->notify);
+ svc->notify_count++;
+ }
+ pthread_rwlock_unlock(&svc->rpclock);
+
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
-inline int
-rpcsvc_program_register (rpcsvc_t *svc, rpcsvc_program_t *program)
+void *
+rpcsvc_request_handler(void *arg)
{
- int ret = -1;
- rpcsvc_program_t *newprog = NULL;
- char already_registered = 0;
+ rpcsvc_request_queue_t *queue = NULL;
+ rpcsvc_program_t *program = NULL;
+ rpcsvc_request_t *req = NULL, *tmp_req = NULL;
+ rpcsvc_actor_t *actor = NULL;
+ gf_boolean_t done = _gf_false;
+ int ret = 0;
+ struct list_head tmp_list;
- if (!svc) {
- goto out;
- }
+ queue = arg;
+ program = queue->program;
- if (program->actors == NULL) {
- goto out;
- }
+ INIT_LIST_HEAD(&tmp_list);
- pthread_mutex_lock (&svc->rpclock);
+ if (!program)
+ return NULL;
+
+ while (1) {
+ pthread_mutex_lock(&queue->queue_lock);
{
- list_for_each_entry (newprog, &svc->programs, program) {
- if ((newprog->prognum == program->prognum)
- && (newprog->progver == program->progver)) {
- already_registered = 1;
- break;
- }
- }
- }
- pthread_mutex_unlock (&svc->rpclock);
+ if (!program->alive && list_empty(&queue->request_queue)) {
+ done = 1;
+ goto unlock;
+ }
- if (already_registered) {
- ret = 0;
- goto out;
+ while (list_empty(&queue->request_queue)) {
+ queue->waiting = _gf_true;
+ pthread_cond_wait(&queue->queue_cond, &queue->queue_lock);
+ }
+
+ queue->waiting = _gf_false;
+
+ if (!list_empty(&queue->request_queue)) {
+ INIT_LIST_HEAD(&tmp_list);
+ list_splice_init(&queue->request_queue, &tmp_list);
+ }
}
+ unlock:
+ pthread_mutex_unlock(&queue->queue_lock);
- newprog = GF_CALLOC (1, sizeof(*newprog),gf_common_mt_rpcsvc_program_t);
- if (newprog == NULL) {
- goto out;
+ list_for_each_entry_safe(req, tmp_req, &tmp_list, request_list)
+ {
+ if (req) {
+ list_del_init(&req->request_list);
+
+ if (req->prognum == RPCSVC_INFRA_PROGRAM) {
+ switch (req->procnum) {
+ case RPCSVC_PROC_EVENT_THREAD_DEATH:
+ gf_log(GF_RPCSVC, GF_LOG_INFO,
+ "event thread died, exiting request handler "
+ "thread for queue %d of program %s",
+ (int)(queue - &program->request_queue[0]),
+ program->progname);
+ done = 1;
+ pthread_mutex_lock(&program->thr_lock);
+ {
+ rpcsvc_toggle_queue_status(
+ program, queue,
+ program->request_queue_status);
+ program->threadcount--;
+ }
+ pthread_mutex_unlock(&program->thr_lock);
+ rpcsvc_request_destroy(req);
+ break;
+
+ default:
+ break;
+ }
+ } else {
+ THIS = req->svc->xl;
+ actor = rpcsvc_program_actor(req);
+ ret = actor->actor(req);
+
+ if (ret != 0) {
+ rpcsvc_check_and_reply_error(ret, NULL, req);
+ }
+ req = NULL;
+ }
+ }
}
- memcpy (newprog, program, sizeof (*program));
+ if (done)
+ break;
+ }
- INIT_LIST_HEAD (&newprog->program);
+ return NULL;
+}
- pthread_mutex_lock (&svc->rpclock);
+int
+rpcsvc_program_register(rpcsvc_t *svc, rpcsvc_program_t *program,
+ gf_boolean_t add_to_head)
+{
+ int ret = -1, i = 0;
+ rpcsvc_program_t *newprog = NULL;
+ char already_registered = 0;
+ pthread_mutexattr_t attr[EVENT_MAX_THREADS];
+ pthread_mutexattr_t thr_attr;
+
+ if (!svc) {
+ goto out;
+ }
+
+ if (program->actors == NULL) {
+ goto out;
+ }
+
+ pthread_rwlock_rdlock(&svc->rpclock);
+ {
+ list_for_each_entry(newprog, &svc->programs, program)
{
- list_add_tail (&newprog->program, &svc->programs);
+ if ((newprog->prognum == program->prognum) &&
+ (newprog->progver == program->progver)) {
+ already_registered = 1;
+ break;
+ }
}
- pthread_mutex_unlock (&svc->rpclock);
+ }
+ pthread_rwlock_unlock(&svc->rpclock);
+ if (already_registered) {
ret = 0;
- gf_log (GF_RPCSVC, GF_LOG_DEBUG, "New program registered: %s, Num: %d,"
- " Ver: %d, Port: %d", newprog->progname, newprog->prognum,
- newprog->progver, newprog->progport);
+ goto out;
+ }
+
+ newprog = GF_CALLOC(1, sizeof(*newprog), gf_common_mt_rpcsvc_program_t);
+ if (newprog == NULL) {
+ goto out;
+ }
+
+ memcpy(newprog, program, sizeof(*program));
+ newprog->latencies = gf_latency_new(program->numactors);
+ if (!newprog->latencies) {
+ rpcsvc_program_destroy(newprog);
+ goto out;
+ }
+
+ INIT_LIST_HEAD(&newprog->program);
+ pthread_mutexattr_init(&thr_attr);
+ pthread_mutexattr_settype(&thr_attr, PTHREAD_MUTEX_ADAPTIVE_NP);
+
+ for (i = 0; i < EVENT_MAX_THREADS; i++) {
+ pthread_mutexattr_init(&attr[i]);
+ pthread_mutexattr_settype(&attr[i], PTHREAD_MUTEX_ADAPTIVE_NP);
+ INIT_LIST_HEAD(&newprog->request_queue[i].request_queue);
+ pthread_mutex_init(&newprog->request_queue[i].queue_lock, &attr[i]);
+ pthread_cond_init(&newprog->request_queue[i].queue_cond, NULL);
+ newprog->request_queue[i].program = newprog;
+ }
+
+ pthread_mutex_init(&newprog->thr_lock, &thr_attr);
+ pthread_cond_init(&newprog->thr_cond, NULL);
+
+ newprog->alive = _gf_true;
+
+ if (gf_async_ctrl.enabled) {
+ newprog->ownthread = _gf_false;
+ newprog->synctask = _gf_false;
+ }
+
+ /* make sure synctask gets priority over ownthread */
+ if (newprog->synctask)
+ newprog->ownthread = _gf_false;
+
+ if (newprog->ownthread) {
+ struct event_pool *ep = svc->ctx->event_pool;
+ newprog->eventthreadcount = ep->eventthreadcount;
+
+ pthread_key_create(&newprog->req_queue_key, NULL);
+ newprog->thr_queue = 1;
+ }
+
+ pthread_rwlock_wrlock(&svc->rpclock);
+ {
+ if (add_to_head)
+ list_add(&newprog->program, &svc->programs);
+ else
+ list_add_tail(&newprog->program, &svc->programs);
+ }
+ pthread_rwlock_unlock(&svc->rpclock);
-out:
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Program registration failed:"
- " %s, Num: %d, Ver: %d, Port: %d", program->progname,
- program->prognum, program->progver, program->progport);
- }
+ ret = 0;
+ gf_log(GF_RPCSVC, GF_LOG_DEBUG,
+ "New program registered: %s, Num: %d,"
+ " Ver: %d, Port: %d",
+ newprog->progname, newprog->prognum, newprog->progver,
+ newprog->progport);
- return ret;
+out:
+ if (ret == -1) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR,
+ "Program registration failed:"
+ " %s, Num: %d, Ver: %d, Port: %d",
+ program->progname, program->prognum, program->progver,
+ program->progport);
+ }
+
+ return ret;
}
static void
-free_prog_details (gf_dump_rsp *rsp)
+free_prog_details(gf_dump_rsp *rsp)
{
- gf_prog_detail *prev = NULL;
- gf_prog_detail *trav = NULL;
-
- trav = rsp->prog;
- while (trav) {
- prev = trav;
- trav = trav->next;
- GF_FREE (prev);
- }
+ gf_prog_detail *prev = NULL;
+ gf_prog_detail *trav = NULL;
+
+ trav = rsp->prog;
+ while (trav) {
+ prev = trav;
+ trav = trav->next;
+ GF_FREE(prev);
+ }
}
static int
-build_prog_details (rpcsvc_request_t *req, gf_dump_rsp *rsp)
+build_prog_details(rpcsvc_request_t *req, gf_dump_rsp *rsp)
{
- int ret = -1;
- rpcsvc_program_t *program = NULL;
- gf_prog_detail *prog = NULL;
- gf_prog_detail *prev = NULL;
+ int ret = -1;
+ rpcsvc_program_t *program = NULL;
+ gf_prog_detail *prog = NULL;
+ gf_prog_detail *prev = NULL;
- if (!req || !req->trans || !req->svc)
- goto out;
+ if (!req || !req->trans || !req->svc)
+ goto out;
+
+ pthread_rwlock_rdlock(&req->svc->rpclock);
+ {
+ list_for_each_entry(program, &req->svc->programs, program)
+ {
+ prog = GF_CALLOC(1, sizeof(*prog), 0);
+ if (!prog)
+ goto unlock;
+
+ prog->progname = program->progname;
+ prog->prognum = program->prognum;
+ prog->progver = program->progver;
- list_for_each_entry (program, &req->svc->programs, program) {
- prog = GF_CALLOC (1, sizeof (*prog), 0);
- if (!prog)
- goto out;
- prog->progname = program->progname;
- prog->prognum = program->prognum;
- prog->progver = program->progver;
- if (!rsp->prog)
- rsp->prog = prog;
- if (prev)
- prev->next = prog;
- prev = prog;
+ if (!rsp->prog)
+ rsp->prog = prog;
+ if (prev)
+ prev->next = prog;
+ prev = prog;
}
if (prev)
- ret = 0;
+ ret = 0;
+ }
+unlock:
+ pthread_rwlock_unlock(&req->svc->rpclock);
out:
- return ret;
+ return ret;
}
static int
-rpcsvc_dump (rpcsvc_request_t *req)
+rpcsvc_ping(rpcsvc_request_t *req)
{
- char rsp_buf[8 * 1024] = {0,};
- gf_dump_rsp rsp = {0,};
- struct iovec iov = {0,};
- int op_errno = EINVAL;
- int ret = -1;
- uint32_t dump_rsp_len = 0;
+ char rsp_buf[8 * 1024] = {
+ 0,
+ };
+ gf_common_rsp rsp = {
+ 0,
+ };
+ struct iovec iov = {
+ 0,
+ };
+ int ret = -1;
+ uint32_t ping_rsp_len = 0;
+
+ ping_rsp_len = xdr_sizeof((xdrproc_t)xdr_gf_common_rsp, &rsp);
+
+ iov.iov_base = rsp_buf;
+ iov.iov_len = ping_rsp_len;
+
+ ret = xdr_serialize_generic(iov, &rsp, (xdrproc_t)xdr_gf_common_rsp);
+ if (ret < 0) {
+ ret = RPCSVC_ACTOR_ERROR;
+ } else {
+ rsp.op_ret = 0;
+ rpcsvc_submit_generic(req, &iov, 1, NULL, 0, NULL);
+ }
+
+ return 0;
+}
- if (!req)
- goto fail;
+static int
+rpcsvc_dump(rpcsvc_request_t *req)
+{
+ char rsp_buf[8 * 1024] = {
+ 0,
+ };
+ gf_dump_rsp rsp = {
+ 0,
+ };
+ struct iovec iov = {
+ 0,
+ };
+ int op_errno = EINVAL;
+ int ret = -1;
+ uint32_t dump_rsp_len = 0;
+
+ if (!req)
+ goto sendrsp;
+
+ ret = build_prog_details(req, &rsp);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto sendrsp;
+ }
+
+ op_errno = 0;
+
+sendrsp:
+ rsp.op_errno = gf_errno_to_error(op_errno);
+ rsp.op_ret = ret;
+
+ dump_rsp_len = xdr_sizeof((xdrproc_t)xdr_gf_dump_rsp, &rsp);
+
+ iov.iov_base = rsp_buf;
+ iov.iov_len = dump_rsp_len;
+
+ ret = xdr_serialize_generic(iov, &rsp, (xdrproc_t)xdr_gf_dump_rsp);
+ if (ret < 0) {
+ ret = RPCSVC_ACTOR_ERROR;
+ } else {
+ rpcsvc_submit_generic(req, &iov, 1, NULL, 0, NULL);
+ ret = 0;
+ }
- ret = build_prog_details (req, &rsp);
- if (ret < 0) {
- op_errno = -ret;
- goto fail;
- }
+ free_prog_details(&rsp);
-fail:
- rsp.op_errno = gf_errno_to_error (op_errno);
- rsp.op_ret = ret;
+ return ret;
+}
- dump_rsp_len = xdr_sizeof ((xdrproc_t) xdr_gf_dump_rsp,
- &rsp);
+int
+rpcsvc_init_options(rpcsvc_t *svc, dict_t *options)
+{
+ char *optstr = NULL;
+ int ret = -1;
+
+ if ((!svc) || (!options))
+ return -1;
- iov.iov_base = rsp_buf;
- iov.iov_len = dump_rsp_len;
+ svc->memfactor = RPCSVC_DEFAULT_MEMFACTOR;
+
+ svc->register_portmap = _gf_true;
+ if (dict_get(options, "rpc.register-with-portmap")) {
+ ret = dict_get_str(options, "rpc.register-with-portmap", &optstr);
+ if (ret < 0) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR,
+ "Failed to parse "
+ "dict");
+ goto out;
+ }
- ret = xdr_serialize_generic (iov, &rsp, (xdrproc_t)xdr_gf_dump_rsp);
+ ret = gf_string2boolean(optstr, &svc->register_portmap);
if (ret < 0) {
- if (req)
- req->rpc_err = GARBAGE_ARGS;
- op_errno = EINVAL;
- goto fail;
+ gf_log(GF_RPCSVC, GF_LOG_ERROR,
+ "Failed to parse bool "
+ "string");
+ goto out;
+ }
+ }
+
+ if (!svc->register_portmap)
+ gf_log(GF_RPCSVC, GF_LOG_DEBUG,
+ "Portmap registration "
+ "disabled");
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+rpcsvc_reconfigure_options(rpcsvc_t *svc, dict_t *options)
+{
+ xlator_t *xlator = NULL;
+ xlator_list_t *volentry = NULL;
+ char *srchkey = NULL;
+ char *keyval = NULL;
+ int ret = -1;
+
+ if ((!svc) || (!svc->options) || (!options))
+ return (-1);
+
+ /* Fetch the xlator from svc */
+ xlator = svc->xl;
+ if (!xlator)
+ return (-1);
+
+ /* Reconfigure the volume specific rpc-auth.addr allow part */
+ volentry = xlator->children;
+ while (volentry) {
+ ret = gf_asprintf(&srchkey, "rpc-auth.addr.%s.allow",
+ volentry->xlator->name);
+ if (ret == -1) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "asprintf failed");
+ return (-1);
+ }
+
+ /* key-string: rpc-auth.addr.<volname>.allow
+ *
+ * IMP: Delete the OLD key/value pair from dict.
+ * And set the NEW key/value pair IFF the option is SET
+ * in reconfigured volfile.
+ *
+ * NB: If rpc-auth.addr.<volname>.allow is not SET explicitly,
+ * build_nfs_graph() sets it as "*" i.e. anonymous.
+ */
+ dict_del(svc->options, srchkey);
+ if (!dict_get_str(options, srchkey, &keyval)) {
+ ret = dict_set_dynstr_with_alloc(svc->options, srchkey, keyval);
+ if (ret < 0) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "dict_set_str error");
+ GF_FREE(srchkey);
+ return (-1);
+ }
+ }
+
+ GF_FREE(srchkey);
+ volentry = volentry->next;
+ }
+
+ /* Reconfigure the volume specific rpc-auth.addr reject part */
+ volentry = xlator->children;
+ while (volentry) {
+ ret = gf_asprintf(&srchkey, "rpc-auth.addr.%s.reject",
+ volentry->xlator->name);
+ if (ret == -1) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "asprintf failed");
+ return (-1);
}
- ret = rpcsvc_submit_generic (req, &iov, 1, NULL, 0,
- NULL);
+ /* key-string: rpc-auth.addr.<volname>.reject
+ *
+ * IMP: Delete the OLD key/value pair from dict.
+ * And set the NEW key/value pair IFF the option is SET
+ * in reconfigured volfile.
+ *
+ * NB: No default value for reject key.
+ */
+ dict_del(svc->options, srchkey);
+ if (!dict_get_str(options, srchkey, &keyval)) {
+ ret = dict_set_dynstr_with_alloc(svc->options, srchkey, keyval);
+ if (ret < 0) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "dict_set_str error");
+ GF_FREE(srchkey);
+ return (-1);
+ }
+ }
- free_prog_details (&rsp);
+ GF_FREE(srchkey);
+ volentry = volentry->next;
+ }
- return ret;
+ ret = rpcsvc_init_options(svc, options);
+ if (ret)
+ return (-1);
+
+ return rpcsvc_auth_reconf(svc, options);
}
int
-rpcsvc_init_options (rpcsvc_t *svc, dict_t *options)
+rpcsvc_transport_unix_options_build(dict_t *dict, char *filepath)
{
- char *optstr = NULL;
- int ret = -1;
+ char *fpath = NULL;
+ int ret = -1;
- if ((!svc) || (!options))
- return -1;
+ GF_ASSERT(filepath);
+ GF_VALIDATE_OR_GOTO("rpcsvc", dict, out);
- svc->memfactor = RPCSVC_DEFAULT_MEMFACTOR;
+ fpath = gf_strdup(filepath);
+ if (!fpath) {
+ ret = -1;
+ goto out;
+ }
- svc->register_portmap = _gf_true;
- if (dict_get (options, "rpc.register-with-portmap")) {
- ret = dict_get_str (options, "rpc.register-with-portmap",
- &optstr);
- if (ret < 0) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to parse "
- "dict");
- goto out;
- }
+ ret = dict_set_dynstr(dict, "transport.socket.listen-path", fpath);
+ if (ret)
+ goto out;
- ret = gf_string2boolean (optstr, &svc->register_portmap);
- if (ret < 0) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to parse bool "
- "string");
- goto out;
- }
- }
+ ret = dict_set_str(dict, "transport.address-family", "unix");
+ if (ret)
+ goto out;
- if (!svc->register_portmap)
- gf_log (GF_RPCSVC, GF_LOG_DEBUG, "Portmap registration "
- "disabled");
+ ret = dict_set_str(dict, "transport.socket.nodelay", "off");
+ if (ret)
+ goto out;
- ret = 0;
+ ret = dict_set_str(dict, "transport-type", "socket");
+ if (ret)
+ goto out;
out:
- return ret;
+ if (ret) {
+ GF_FREE(fpath);
+ }
+ return ret;
}
+/*
+ * Configure() the rpc.outstanding-rpc-limit param.
+ * If dict_get_int32() for dict-key "rpc.outstanding-rpc-limit" FAILS,
+ * it would set the value as "defvalue". Otherwise it would fetch the
+ * value and round up to multiple-of-8. defvalue must be +ve.
+ *
+ * NB: defval or set-value "0" is special which means unlimited/65536.
+ */
int
-rpcsvc_transport_unix_options_build (dict_t **options, char *filepath)
+rpcsvc_set_outstanding_rpc_limit(rpcsvc_t *svc, dict_t *options, int defvalue)
{
- dict_t *dict = NULL;
- char *fpath = NULL;
- int ret = -1;
-
- GF_ASSERT (filepath);
- GF_ASSERT (options);
-
- dict = dict_new ();
- if (!dict)
- goto out;
+ int ret = -1; /* FAILURE */
+ int rpclim = 0;
+ static char *rpclimkey = "rpc.outstanding-rpc-limit";
+
+ if ((!svc) || (!options))
+ return (-1);
+
+ if ((defvalue < RPCSVC_MIN_OUTSTANDING_RPC_LIMIT) ||
+ (defvalue > RPCSVC_MAX_OUTSTANDING_RPC_LIMIT)) {
+ return (-1);
+ }
+
+ /* Fetch the rpc.outstanding-rpc-limit from dict. */
+ ret = dict_get_int32(options, rpclimkey, &rpclim);
+ if (ret < 0) {
+ /* Fall back to default for FAILURE */
+ rpclim = defvalue;
+ }
+
+ /* Round up to multiple-of-8. It must not exceed
+ * RPCSVC_MAX_OUTSTANDING_RPC_LIMIT.
+ */
+ rpclim = ((rpclim + 8 - 1) >> 3) * 8;
+ if (rpclim > RPCSVC_MAX_OUTSTANDING_RPC_LIMIT) {
+ rpclim = RPCSVC_MAX_OUTSTANDING_RPC_LIMIT;
+ }
+
+ if (svc->outstanding_rpc_limit != rpclim) {
+ svc->outstanding_rpc_limit = rpclim;
+ gf_log(GF_RPCSVC, GF_LOG_INFO, "Configured %s with value %d", rpclimkey,
+ rpclim);
+ }
+
+ return (0);
+}
- fpath = gf_strdup (filepath);
- if (!fpath) {
- ret = -1;
- goto out;
- }
+/*
+ * Enable throttling for rpcsvc_t svc.
+ * Returns 0 on success, -1 otherwise.
+ */
+int
+rpcsvc_set_throttle_on(rpcsvc_t *svc)
+{
+ if (!svc)
+ return -1;
- ret = dict_set_dynstr (dict, "transport.socket.listen-path", fpath);
- if (ret)
- goto out;
+ svc->throttle = _gf_true;
- ret = dict_set_str (dict, "transport.address-family", "unix");
- if (ret)
- goto out;
+ return 0;
+}
- ret = dict_set_str (dict, "transport.socket.nodelay", "off");
- if (ret)
- goto out;
+/*
+ * Disable throttling for rpcsvc_t svc.
+ * Returns 0 on success, -1 otherwise.
+ */
+int
+rpcsvc_set_throttle_off(rpcsvc_t *svc)
+{
+ if (!svc)
+ return -1;
- ret = dict_set_str (dict, "transport-type", "socket");
- if (ret)
- goto out;
+ svc->throttle = _gf_false;
- *options = dict;
-out:
- if (ret) {
- if (fpath)
- GF_FREE (fpath);
- if (dict)
- dict_unref (dict);
- }
- return ret;
+ return 0;
}
-/* The global RPC service initializer.
+/*
+ * Get throttle state for rpcsvc_t svc.
+ * Returns value of attribute throttle on success, _gf_false otherwise.
*/
-rpcsvc_t *
-rpcsvc_init (xlator_t *xl, glusterfs_ctx_t *ctx, dict_t *options,
- uint32_t poolcount)
+gf_boolean_t
+rpcsvc_get_throttle(rpcsvc_t *svc)
{
- rpcsvc_t *svc = NULL;
- int ret = -1;
+ if (!svc)
+ return _gf_false;
- if ((!ctx) || (!options))
- return NULL;
-
- svc = GF_CALLOC (1, sizeof (*svc), gf_common_mt_rpcsvc_t);
- if (!svc)
- return NULL;
+ return svc->throttle;
+}
- pthread_mutex_init (&svc->rpclock, NULL);
- INIT_LIST_HEAD (&svc->authschemes);
- INIT_LIST_HEAD (&svc->notify);
- INIT_LIST_HEAD (&svc->listeners);
- INIT_LIST_HEAD (&svc->programs);
+/* Function call to cleanup resources for svc
+ */
+int
+rpcsvc_destroy(rpcsvc_t *svc)
+{
+ struct rpcsvc_auth_list *auth = NULL;
+ struct rpcsvc_auth_list *tmp = NULL;
+ rpcsvc_listener_t *listener = NULL;
+ rpcsvc_listener_t *next = NULL;
+ int ret = 0;
- ret = rpcsvc_init_options (svc, options);
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to init options");
- goto free_svc;
- }
+ if (!svc)
+ return ret;
- if (!poolcount)
- poolcount = RPCSVC_POOLCOUNT_MULT * svc->memfactor;
+ list_for_each_entry_safe(listener, next, &svc->listeners, list)
+ {
+ rpcsvc_listener_destroy(listener);
+ }
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "rx pool: %d", poolcount);
- svc->rxpool = mem_pool_new (rpcsvc_request_t, poolcount);
- /* TODO: leak */
- if (!svc->rxpool) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "mem pool allocation failed");
- goto free_svc;
- }
+ list_for_each_entry_safe(auth, tmp, &svc->authschemes, authlist)
+ {
+ list_del_init(&auth->authlist);
+ GF_FREE(auth);
+ }
- ret = rpcsvc_auth_init (svc, options);
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to init "
- "authentication");
- goto free_svc;
- }
+ rpcsvc_program_unregister(svc, &gluster_dump_prog);
+ if (svc->rxpool) {
+ mem_pool_destroy(svc->rxpool);
+ svc->rxpool = NULL;
+ }
- ret = -1;
- svc->options = options;
- svc->ctx = ctx;
- svc->mydata = xl;
- gf_log (GF_RPCSVC, GF_LOG_DEBUG, "RPC service inited.");
+ pthread_rwlock_destroy(&svc->rpclock);
+ GF_FREE(svc);
- gluster_dump_prog.options = options;
+ return ret;
+}
- ret = rpcsvc_program_register (svc, &gluster_dump_prog);
- if (ret) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR,
- "failed to register DUMP program");
- goto free_svc;
- }
- ret = 0;
+/* The global RPC service initializer.
+ */
+rpcsvc_t *
+rpcsvc_init(xlator_t *xl, glusterfs_ctx_t *ctx, dict_t *options,
+ uint32_t poolcount)
+{
+ rpcsvc_t *svc = NULL;
+ int ret = -1;
+
+ if ((!xl) || (!ctx) || (!options))
+ return NULL;
+
+ svc = GF_CALLOC(1, sizeof(*svc), gf_common_mt_rpcsvc_t);
+ if (!svc)
+ return NULL;
+
+ pthread_rwlock_init(&svc->rpclock, NULL);
+ INIT_LIST_HEAD(&svc->authschemes);
+ INIT_LIST_HEAD(&svc->notify);
+ INIT_LIST_HEAD(&svc->listeners);
+ INIT_LIST_HEAD(&svc->programs);
+
+ ret = rpcsvc_init_options(svc, options);
+ if (ret == -1) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "Failed to init options");
+ goto free_svc;
+ }
+
+ if (!poolcount)
+ poolcount = RPCSVC_POOLCOUNT_MULT * svc->memfactor;
+
+ gf_log(GF_RPCSVC, GF_LOG_TRACE, "rx pool: %d", poolcount);
+ svc->rxpool = mem_pool_new(rpcsvc_request_t, poolcount);
+ /* TODO: leak */
+ if (!svc->rxpool) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "mem pool allocation failed");
+ goto free_svc;
+ }
+
+ ret = rpcsvc_auth_init(svc, options);
+ if (ret == -1) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR,
+ "Failed to init "
+ "authentication");
+ goto free_svc;
+ }
+
+ ret = -1;
+ svc->options = options;
+ svc->ctx = ctx;
+ svc->xl = xl;
+ gf_log(GF_RPCSVC, GF_LOG_DEBUG, "RPC service inited.");
+
+ gluster_dump_prog.options = options;
+
+ ret = rpcsvc_program_register(svc, &gluster_dump_prog, _gf_false);
+ if (ret) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "failed to register DUMP program");
+ goto free_svc;
+ }
+
+ ret = 0;
free_svc:
- if (ret == -1) {
- GF_FREE (svc);
- svc = NULL;
- }
+ if (ret == -1) {
+ GF_FREE(svc);
+ svc = NULL;
+ }
- return svc;
+ return svc;
}
-
int
-rpcsvc_transport_peer_check_search (dict_t *options, char *pattern, char *clstr)
+rpcsvc_transport_peer_check_search(dict_t *options, char *pattern, char *ip,
+ char *hostname)
{
- int ret = -1;
- char *addrtok = NULL;
- char *addrstr = NULL;
- char *dup_addrstr = NULL;
- char *svptr = NULL;
+ int ret = -1;
+ char *addrtok = NULL;
+ char *addrstr = NULL;
+ char *dup_addrstr = NULL;
+ char *svptr = NULL;
- if ((!options) || (!clstr))
- return -1;
+ if ((!options) || (!ip))
+ return -1;
- if (!dict_get (options, pattern))
- return -1;
-
- ret = dict_get_str (options, pattern, &addrstr);
- if (ret < 0) {
- ret = -1;
- goto err;
- }
+ ret = dict_get_str(options, pattern, &addrstr);
+ if (ret < 0) {
+ ret = -1;
+ goto err;
+ }
- if (!addrstr) {
- ret = -1;
- goto err;
- }
+ if (!addrstr) {
+ ret = -1;
+ goto err;
+ }
- dup_addrstr = gf_strdup (addrstr);
- addrtok = strtok_r (dup_addrstr, ",", &svptr);
- while (addrtok) {
+ dup_addrstr = gf_strdup(addrstr);
+ if (dup_addrstr == NULL) {
+ ret = -1;
+ goto err;
+ }
+ addrtok = strtok_r(dup_addrstr, ",", &svptr);
+ while (addrtok) {
+ /* CASEFOLD not present on Solaris */
+#ifdef FNM_CASEFOLD
+ ret = fnmatch(addrtok, ip, FNM_CASEFOLD);
+#else
+ ret = fnmatch(addrtok, ip, 0);
+#endif
+ if (ret == 0)
+ goto err;
- /* CASEFOLD not present on Solaris */
+ /* compare hostnames if applicable */
+ if (hostname) {
#ifdef FNM_CASEFOLD
- ret = fnmatch (addrtok, clstr, FNM_CASEFOLD);
+ ret = fnmatch(addrtok, hostname, FNM_CASEFOLD);
#else
- ret = fnmatch (addrtok, clstr, 0);
+ ret = fnmatch(addrtok, hostname, 0);
#endif
- if (ret == 0)
- goto err;
+ if (ret == 0)
+ goto err;
+ }
- addrtok = strtok_r (NULL, ",", &svptr);
+ /* Compare IPv4 subnetwork, TODO: IPv6 subnet support */
+ if (strchr(addrtok, '/')) {
+ ret = rpcsvc_match_subnet_v4(addrtok, ip);
+ if (ret == 0)
+ goto err;
}
- ret = -1;
+ addrtok = strtok_r(NULL, ",", &svptr);
+ }
+
+ ret = -1;
err:
- if (dup_addrstr)
- GF_FREE (dup_addrstr);
+ GF_FREE(dup_addrstr);
- return ret;
+ return ret;
}
-
-int
-rpcsvc_transport_peer_check_allow (dict_t *options, char *volname, char *clstr)
+static int
+rpcsvc_transport_peer_check_allow(dict_t *options, char *volname, char *ip,
+ char *hostname)
{
- int ret = RPCSVC_AUTH_DONTCARE;
- char *srchstr = NULL;
- char globalrule[] = "rpc-auth.addr.allow";
+ int ret = RPCSVC_AUTH_DONTCARE;
+ char *srchstr = NULL;
- if ((!options) || (!clstr))
- return ret;
+ if ((!options) || (!ip) || (!volname))
+ return ret;
- /* If volname is NULL, then we're searching for the general rule to
- * determine the current address in clstr is allowed or not for all
- * subvolumes.
- */
- if (volname) {
- ret = gf_asprintf (&srchstr, "rpc-auth.addr.%s.allow", volname);
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "asprintf failed");
- ret = RPCSVC_AUTH_DONTCARE;
- goto out;
- }
- } else
- srchstr = globalrule;
+ ret = gf_asprintf(&srchstr, "rpc-auth.addr.%s.allow", volname);
+ if (ret == -1) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "asprintf failed");
+ ret = RPCSVC_AUTH_DONTCARE;
+ goto out;
+ }
- ret = rpcsvc_transport_peer_check_search (options, srchstr, clstr);
- if (volname)
- GF_FREE (srchstr);
+ ret = rpcsvc_transport_peer_check_search(options, srchstr, ip, hostname);
+ GF_FREE(srchstr);
- if (ret == 0)
- ret = RPCSVC_AUTH_ACCEPT;
- else
- ret = RPCSVC_AUTH_DONTCARE;
+ if (ret == 0)
+ ret = RPCSVC_AUTH_ACCEPT;
+ else
+ ret = RPCSVC_AUTH_REJECT;
out:
- return ret;
+ return ret;
}
-int
-rpcsvc_transport_peer_check_reject (dict_t *options, char *volname, char *clstr)
-{
- int ret = RPCSVC_AUTH_DONTCARE;
- char *srchstr = NULL;
- char generalrule[] = "rpc-auth.addr.reject";
-
- if ((!options) || (!clstr))
- return ret;
-
- if (volname) {
- ret = gf_asprintf (&srchstr, "rpc-auth.addr.%s.reject",
- volname);
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "asprintf failed");
- ret = RPCSVC_AUTH_REJECT;
- goto out;
- }
- } else
- srchstr = generalrule;
-
- ret = rpcsvc_transport_peer_check_search (options, srchstr, clstr);
- if (volname)
- GF_FREE (srchstr);
+static int
+rpcsvc_transport_peer_check_reject(dict_t *options, char *volname, char *ip,
+ char *hostname)
+{
+ int ret = RPCSVC_AUTH_DONTCARE;
+ char *srchstr = NULL;
- if (ret == 0)
- ret = RPCSVC_AUTH_REJECT;
- else
- ret = RPCSVC_AUTH_DONTCARE;
-out:
+ if ((!options) || (!ip) || (!volname))
return ret;
-}
+ ret = gf_asprintf(&srchstr, "rpc-auth.addr.%s.reject", volname);
+ if (ret == -1) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "asprintf failed");
+ ret = RPCSVC_AUTH_REJECT;
+ goto out;
+ }
-/* This function tests the results of the allow rule and the reject rule to
- * combine them into a single result that can be used to determine if the
- * connection should be allowed to proceed.
- * Heres the test matrix we need to follow in this function.
- *
- * A - Allow, the result of the allow test. Never returns R.
- * R - Reject, result of the reject test. Never returns A.
- * Both can return D or dont care if no rule was given.
- *
- * | @allow | @reject | Result |
- * | A | R | R |
- * | D | D | D |
- * | A | D | A |
- * | D | R | R |
- */
-int
-rpcsvc_combine_allow_reject_volume_check (int allow, int reject)
-{
- int final = RPCSVC_AUTH_REJECT;
-
- /* If allowed rule allows but reject rule rejects, we stay cautious
- * and reject. */
- if ((allow == RPCSVC_AUTH_ACCEPT) && (reject == RPCSVC_AUTH_REJECT))
- final = RPCSVC_AUTH_REJECT;
- /* if both are dont care, that is user did not specify for either allow
- * or reject, we leave it up to the general rule to apply, in the hope
- * that there is one.
- */
- else if ((allow == RPCSVC_AUTH_DONTCARE) &&
- (reject == RPCSVC_AUTH_DONTCARE))
- final = RPCSVC_AUTH_DONTCARE;
- /* If one is dont care, the other one applies. */
- else if ((allow == RPCSVC_AUTH_ACCEPT) &&
- (reject == RPCSVC_AUTH_DONTCARE))
- final = RPCSVC_AUTH_ACCEPT;
- else if ((allow == RPCSVC_AUTH_DONTCARE) &&
- (reject == RPCSVC_AUTH_REJECT))
- final = RPCSVC_AUTH_REJECT;
+ ret = rpcsvc_transport_peer_check_search(options, srchstr, ip, hostname);
+ GF_FREE(srchstr);
- return final;
+ if (ret == 0)
+ ret = RPCSVC_AUTH_REJECT;
+ else
+ ret = RPCSVC_AUTH_DONTCARE;
+out:
+ return ret;
}
-
-/* Combines the result of the general rule test against, the specific rule
- * to determine final permission for the client's address.
- *
- * | @gen | @spec | Result |
- * | A | A | A |
- * | A | R | R |
- * | A | D | A |
- * | D | A | A |
- * | D | R | R |
- * | D | D | D |
- * | R | A | A |
- * | R | D | R |
- * | R | R | R |
+/* Combines rpc auth's allow and reject options.
+ * Order of checks is important.
+ * First, REJECT if either rejects.
+ * If neither rejects, ACCEPT if either accepts.
+ * If neither accepts, DONTCARE
*/
int
-rpcsvc_combine_gen_spec_addr_checks (int gen, int spec)
+rpcsvc_combine_allow_reject_volume_check(int allow, int reject)
{
- int final = RPCSVC_AUTH_REJECT;
-
- if ((gen == RPCSVC_AUTH_ACCEPT) && (spec == RPCSVC_AUTH_ACCEPT))
- final = RPCSVC_AUTH_ACCEPT;
- else if ((gen == RPCSVC_AUTH_ACCEPT) && (spec == RPCSVC_AUTH_REJECT))
- final = RPCSVC_AUTH_REJECT;
- else if ((gen == RPCSVC_AUTH_ACCEPT) && (spec == RPCSVC_AUTH_DONTCARE))
- final = RPCSVC_AUTH_ACCEPT;
- else if ((gen == RPCSVC_AUTH_DONTCARE) && (spec == RPCSVC_AUTH_ACCEPT))
- final = RPCSVC_AUTH_ACCEPT;
- else if ((gen == RPCSVC_AUTH_DONTCARE) && (spec == RPCSVC_AUTH_REJECT))
- final = RPCSVC_AUTH_REJECT;
- else if ((gen == RPCSVC_AUTH_DONTCARE) && (spec== RPCSVC_AUTH_DONTCARE))
- final = RPCSVC_AUTH_DONTCARE;
- else if ((gen == RPCSVC_AUTH_REJECT) && (spec == RPCSVC_AUTH_ACCEPT))
- final = RPCSVC_AUTH_ACCEPT;
- else if ((gen == RPCSVC_AUTH_REJECT) && (spec == RPCSVC_AUTH_DONTCARE))
- final = RPCSVC_AUTH_REJECT;
- else if ((gen == RPCSVC_AUTH_REJECT) && (spec == RPCSVC_AUTH_REJECT))
- final = RPCSVC_AUTH_REJECT;
-
- return final;
-}
-
+ if (allow == RPCSVC_AUTH_REJECT || reject == RPCSVC_AUTH_REJECT)
+ return RPCSVC_AUTH_REJECT;
+ if (allow == RPCSVC_AUTH_ACCEPT || reject == RPCSVC_AUTH_ACCEPT)
+ return RPCSVC_AUTH_ACCEPT;
-/* Combines the result of the general rule test against, the specific rule
- * to determine final test for the connection coming in for a given volume.
- *
- * | @gen | @spec | Result |
- * | A | A | A |
- * | A | R | R |
- * | A | D | A |
- * | D | A | A |
- * | D | R | R |
- * | D | D | R |, special case, we intentionally disallow this.
- * | R | A | A |
- * | R | D | R |
- * | R | R | R |
- */
-int
-rpcsvc_combine_gen_spec_volume_checks (int gen, int spec)
-{
- int final = RPCSVC_AUTH_REJECT;
-
- if ((gen == RPCSVC_AUTH_ACCEPT) && (spec == RPCSVC_AUTH_ACCEPT))
- final = RPCSVC_AUTH_ACCEPT;
- else if ((gen == RPCSVC_AUTH_ACCEPT) && (spec == RPCSVC_AUTH_REJECT))
- final = RPCSVC_AUTH_REJECT;
- else if ((gen == RPCSVC_AUTH_ACCEPT) && (spec == RPCSVC_AUTH_DONTCARE))
- final = RPCSVC_AUTH_ACCEPT;
- else if ((gen == RPCSVC_AUTH_DONTCARE) && (spec == RPCSVC_AUTH_ACCEPT))
- final = RPCSVC_AUTH_ACCEPT;
- else if ((gen == RPCSVC_AUTH_DONTCARE) && (spec == RPCSVC_AUTH_REJECT))
- final = RPCSVC_AUTH_REJECT;
- /* On no rule, we reject. */
- else if ((gen == RPCSVC_AUTH_DONTCARE) && (spec== RPCSVC_AUTH_DONTCARE))
- final = RPCSVC_AUTH_REJECT;
- else if ((gen == RPCSVC_AUTH_REJECT) && (spec == RPCSVC_AUTH_ACCEPT))
- final = RPCSVC_AUTH_ACCEPT;
- else if ((gen == RPCSVC_AUTH_REJECT) && (spec == RPCSVC_AUTH_DONTCARE))
- final = RPCSVC_AUTH_REJECT;
- else if ((gen == RPCSVC_AUTH_REJECT) && (spec == RPCSVC_AUTH_REJECT))
- final = RPCSVC_AUTH_REJECT;
-
- return final;
+ return RPCSVC_AUTH_DONTCARE;
}
-
int
-rpcsvc_transport_peer_check_name (dict_t *options, char *volname,
- rpc_transport_t *trans)
+rpcsvc_auth_check(rpcsvc_t *svc, char *volname, char *ipaddr)
{
- int ret = RPCSVC_AUTH_REJECT;
- int aret = RPCSVC_AUTH_REJECT;
- int rjret = RPCSVC_AUTH_REJECT;
- char clstr[RPCSVC_PEER_STRLEN];
-
- if (!trans)
- return ret;
-
- ret = rpcsvc_transport_peername (trans, clstr, RPCSVC_PEER_STRLEN);
- if (ret != 0) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to get remote addr: "
- "%s", gai_strerror (ret));
- ret = RPCSVC_AUTH_REJECT;
- goto err;
- }
-
- aret = rpcsvc_transport_peer_check_allow (options, volname, clstr);
- rjret = rpcsvc_transport_peer_check_reject (options, volname, clstr);
-
- ret = rpcsvc_combine_allow_reject_volume_check (aret, rjret);
-
-err:
+ int ret = RPCSVC_AUTH_REJECT;
+ int accept = RPCSVC_AUTH_REJECT;
+ int reject = RPCSVC_AUTH_REJECT;
+ char *hostname = NULL;
+ char *allow_str = NULL;
+ char *reject_str = NULL;
+ char *srchstr = NULL;
+ dict_t *options = NULL;
+
+ if (!svc || !volname || !ipaddr)
return ret;
-}
+ /* Fetch the options from svc struct and validate */
+ options = svc->options;
+ if (!options)
+ return ret;
-int
-rpcsvc_transport_peer_check_addr (dict_t *options, char *volname,
- rpc_transport_t *trans)
-{
- int ret = RPCSVC_AUTH_REJECT;
- int aret = RPCSVC_AUTH_DONTCARE;
- int rjret = RPCSVC_AUTH_REJECT;
- char clstr[RPCSVC_PEER_STRLEN];
- char *tmp = NULL;
- struct sockaddr_storage sastorage = {0,};
- struct sockaddr *sockaddr = NULL;
-
- if (!trans)
- return ret;
-
- ret = rpcsvc_transport_peeraddr (trans, clstr, RPCSVC_PEER_STRLEN,
- &sastorage, sizeof (sastorage));
- if (ret != 0) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to get remote addr: "
- "%s", gai_strerror (ret));
- ret = RPCSVC_AUTH_REJECT;
- goto err;
+ /* Accept if its the default case: Allow all, Reject none
+ * The default volfile always contains a 'allow *' rule
+ * for each volume. If allow rule is missing (which implies
+ * there is some bad volfile generating code doing this), we
+ * assume no one is allowed mounts, and thus, we reject mounts.
+ */
+ ret = gf_asprintf(&srchstr, "rpc-auth.addr.%s.allow", volname);
+ if (ret == -1) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "asprintf failed");
+ return RPCSVC_AUTH_REJECT;
+ }
+
+ ret = dict_get_str(options, srchstr, &allow_str);
+ GF_FREE(srchstr);
+ if (ret < 0)
+ return RPCSVC_AUTH_REJECT;
+
+ ret = gf_asprintf(&srchstr, "rpc-auth.addr.%s.reject", volname);
+ if (ret == -1) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "asprintf failed");
+ return RPCSVC_AUTH_REJECT;
+ }
+
+ ret = dict_get_str(options, srchstr, &reject_str);
+ GF_FREE(srchstr);
+
+ /*
+ * If "reject_str" is being set as '*' (anonymous), then NFS-server
+ * would reject everything. If the "reject_str" is not set and
+ * "allow_str" is set as '*' (anonymous), then NFS-server would
+ * accept mount requests from all clients.
+ */
+ if (reject_str != NULL) {
+ if (!strcmp("*", reject_str))
+ return RPCSVC_AUTH_REJECT;
+ } else {
+ if (!strcmp("*", allow_str))
+ return RPCSVC_AUTH_ACCEPT;
+ }
+
+ /* addr-namelookup check */
+ if (svc->addr_namelookup == _gf_true) {
+ ret = gf_get_hostname_from_ip(ipaddr, &hostname);
+ if (ret) {
+ if (hostname)
+ GF_FREE(hostname);
+ /* failed to get hostname, but hostname auth
+ * is enabled, so authentication will not be
+ * 100% correct. reject mounts
+ */
+ return RPCSVC_AUTH_REJECT;
}
+ }
- sockaddr = (struct sockaddr *) &sastorage;
- switch (sockaddr->sa_family) {
-
- case AF_INET:
- case AF_INET6:
- tmp = strrchr (clstr, ':');
- if (tmp)
- *tmp = '\0';
- break;
- }
+ accept = rpcsvc_transport_peer_check_allow(options, volname, ipaddr,
+ hostname);
- aret = rpcsvc_transport_peer_check_allow (options, volname, clstr);
- rjret = rpcsvc_transport_peer_check_reject (options, volname, clstr);
+ reject = rpcsvc_transport_peer_check_reject(options, volname, ipaddr,
+ hostname);
- ret = rpcsvc_combine_allow_reject_volume_check (aret, rjret);
-err:
- return ret;
+ if (hostname)
+ GF_FREE(hostname);
+ return rpcsvc_combine_allow_reject_volume_check(accept, reject);
}
-
int
-rpcsvc_transport_check_volume_specific (dict_t *options, char *volname,
- rpc_transport_t *trans)
-{
- int namechk = RPCSVC_AUTH_REJECT;
- int addrchk = RPCSVC_AUTH_REJECT;
- gf_boolean_t namelookup = _gf_false;
- char *namestr = NULL;
- int ret = 0;
-
- if ((!options) || (!volname) || (!trans))
- return RPCSVC_AUTH_REJECT;
-
- /* Disabled by default */
- if ((dict_get (options, "rpc-auth.addr.namelookup"))) {
- ret = dict_get_str (options, "rpc-auth.addr.namelookup"
- , &namestr);
- if (ret == 0)
- ret = gf_string2boolean (namestr, &namelookup);
- }
-
- /* We need two separate checks because the rules with addresses in them
- * can be network addresses which can be general and names can be
- * specific which will over-ride the network address rules.
- */
- if (namelookup)
- namechk = rpcsvc_transport_peer_check_name (options, volname,
- trans);
- addrchk = rpcsvc_transport_peer_check_addr (options, volname, trans);
-
- if (namelookup)
- ret = rpcsvc_combine_gen_spec_addr_checks (addrchk,
- namechk);
- else
- ret = addrchk;
+rpcsvc_transport_privport_check(rpcsvc_t *svc, char *volname, uint16_t port)
+{
+ int ret = RPCSVC_AUTH_REJECT;
+ char *srchstr = NULL;
+ char *valstr = NULL;
+ gf_boolean_t insecure = _gf_false;
+ if ((!svc) || (!volname))
return ret;
-}
+ gf_log(GF_RPCSVC, GF_LOG_TRACE, "Client port: %d", (int)port);
+ /* If the port is already a privileged one, don't bother with checking
+ * options.
+ */
+ if (port <= 1024) {
+ ret = RPCSVC_AUTH_ACCEPT;
+ goto err;
+ }
+
+ /* Disabled by default */
+ ret = gf_asprintf(&srchstr, "rpc-auth.ports.%s.insecure", volname);
+ if (ret == -1) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "asprintf failed");
+ ret = RPCSVC_AUTH_REJECT;
+ goto err;
+ }
+
+ ret = dict_get_str(svc->options, srchstr, &valstr);
+ if (ret) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR,
+ "Failed to"
+ " read rpc-auth.ports.insecure value");
+ goto err;
+ }
+
+ ret = gf_string2boolean(valstr, &insecure);
+ if (ret) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR,
+ "Failed to"
+ " convert rpc-auth.ports.insecure value");
+ goto err;
+ }
+
+ ret = insecure ? RPCSVC_AUTH_ACCEPT : RPCSVC_AUTH_REJECT;
+
+ if (ret == RPCSVC_AUTH_ACCEPT)
+ gf_log(GF_RPCSVC, GF_LOG_DEBUG, "Unprivileged port allowed");
+ else
+ gf_log(GF_RPCSVC, GF_LOG_DEBUG,
+ "Unprivileged port not"
+ " allowed");
-int
-rpcsvc_transport_check_volume_general (dict_t *options, rpc_transport_t *trans)
-{
- int addrchk = RPCSVC_AUTH_REJECT;
- int namechk = RPCSVC_AUTH_REJECT;
- gf_boolean_t namelookup = _gf_false;
- char *namestr = NULL;
- int ret = 0;
-
- if ((!options) || (!trans))
- return RPCSVC_AUTH_REJECT;
-
- /* Disabled by default */
- if ((dict_get (options, "rpc-auth.addr.namelookup"))) {
- ret = dict_get_str (options, "rpc-auth.addr.namelookup"
- , &namestr);
- if (ret == 0)
- ret = gf_string2boolean (namestr, &namelookup);
- }
-
- /* We need two separate checks because the rules with addresses in them
- * can be network addresses which can be general and names can be
- * specific which will over-ride the network address rules.
- */
- if (namelookup)
- namechk = rpcsvc_transport_peer_check_name (options, NULL, trans);
- addrchk = rpcsvc_transport_peer_check_addr (options, NULL, trans);
-
- if (namelookup)
- ret = rpcsvc_combine_gen_spec_addr_checks (addrchk, namechk);
- else
- ret = addrchk;
+err:
+ if (srchstr)
+ GF_FREE(srchstr);
- return ret;
+ return ret;
}
-int
-rpcsvc_transport_peer_check (dict_t *options, char *volname,
- rpc_transport_t *trans)
+char *
+rpcsvc_volume_allowed(dict_t *options, char *volname)
{
- int general_chk = RPCSVC_AUTH_REJECT;
- int specific_chk = RPCSVC_AUTH_REJECT;
+ char globalrule[] = "rpc-auth.addr.allow";
+ char *srchstr = NULL;
+ char *addrstr = NULL;
+ int ret = -1;
- if ((!options) || (!volname) || (!trans))
- return RPCSVC_AUTH_REJECT;
+ if ((!options) || (!volname))
+ return NULL;
- general_chk = rpcsvc_transport_check_volume_general (options, trans);
- specific_chk = rpcsvc_transport_check_volume_specific (options, volname,
- trans);
+ ret = gf_asprintf(&srchstr, "rpc-auth.addr.%s.allow", volname);
+ if (ret == -1) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "asprintf failed");
+ goto out;
+ }
- return rpcsvc_combine_gen_spec_volume_checks (general_chk,
- specific_chk);
-}
+ if (!dict_get(options, srchstr))
+ ret = dict_get_str(options, globalrule, &addrstr);
+ else
+ ret = dict_get_str(options, srchstr, &addrstr);
+out:
+ GF_FREE(srchstr);
-int
-rpcsvc_transport_privport_check (rpcsvc_t *svc, char *volname,
- rpc_transport_t *trans)
-{
- struct sockaddr_storage sastorage = {0,};
- struct sockaddr_in *sa = NULL;
- int ret = RPCSVC_AUTH_REJECT;
- socklen_t sasize = sizeof (sa);
- char *srchstr = NULL;
- char *valstr = NULL;
- int globalinsecure = RPCSVC_AUTH_REJECT;
- int exportinsecure = RPCSVC_AUTH_DONTCARE;
- uint16_t port = 0;
- gf_boolean_t insecure = _gf_false;
-
- if ((!svc) || (!volname) || (!trans))
- return ret;
-
- sa = (struct sockaddr_in*) &sastorage;
- ret = rpcsvc_transport_peeraddr (trans, NULL, 0, &sastorage,
- sasize);
- if (ret != 0) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to get peer addr: %s",
- gai_strerror (ret));
- ret = RPCSVC_AUTH_REJECT;
- goto err;
- }
+ return addrstr;
+}
- port = ntohs (sa->sin_port);
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "Client port: %d", (int)port);
- /* If the port is already a privileged one, dont bother with checking
- * options.
+/*
+ * rpcsvc_match_subnet_v4() takes subnetwork address pattern and checks
+ * if the target IPv4 address has the same network address with the help
+ * of network mask.
+ *
+ * Returns 0 for SUCCESS and -1 otherwise.
+ *
+ * NB: Validation of subnetwork address pattern is not required
+ * as it's already being done at the time of CLI SET.
+ */
+static int
+rpcsvc_match_subnet_v4(const char *addrtok, const char *ipaddr)
+{
+ char *slash = NULL;
+ char *netaddr = NULL;
+ int ret = -1;
+ uint32_t prefixlen = 0;
+ uint32_t shift = 0;
+ struct sockaddr_in sin1 = {
+ 0,
+ };
+ struct sockaddr_in sin2 = {
+ 0,
+ };
+ struct sockaddr_in mask = {
+ 0,
+ };
+
+ /* Copy the input */
+ netaddr = gf_strdup(addrtok);
+ if (netaddr == NULL) /* ENOMEM */
+ goto out;
+
+ /* Find the network socket addr of target */
+ if (inet_pton(AF_INET, ipaddr, &sin1.sin_addr) == 0)
+ goto out;
+
+ slash = strchr(netaddr, '/');
+ if (slash) {
+ *slash = '\0';
+ /*
+ * Find the IPv4 network mask in network byte order.
+ * IMP: String slash+1 is already validated, it can't have value
+ * more than IPv4_ADDR_SIZE (32).
*/
- if (port <= 1024) {
- ret = RPCSVC_AUTH_ACCEPT;
- goto err;
- }
-
- /* Disabled by default */
- if ((dict_get (svc->options, "rpc-auth.ports.insecure"))) {
- ret = dict_get_str (svc->options, "rpc-auth.ports.insecure"
- , &srchstr);
- if (ret == 0) {
- ret = gf_string2boolean (srchstr, &insecure);
- if (ret == 0) {
- if (insecure == _gf_true)
- globalinsecure = RPCSVC_AUTH_ACCEPT;
- } else
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to"
- " read rpc-auth.ports.insecure value");
- } else
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to"
- " read rpc-auth.ports.insecure value");
- }
-
- /* Disabled by default */
- ret = gf_asprintf (&srchstr, "rpc-auth.ports.%s.insecure", volname);
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "asprintf failed");
- ret = RPCSVC_AUTH_REJECT;
- goto err;
- }
-
- if (dict_get (svc->options, srchstr)) {
- ret = dict_get_str (svc->options, srchstr, &valstr);
- if (ret == 0) {
- ret = gf_string2boolean (valstr, &insecure);
- if (ret == 0) {
- if (insecure == _gf_true)
- exportinsecure = RPCSVC_AUTH_ACCEPT;
- else
- exportinsecure = RPCSVC_AUTH_REJECT;
- } else
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to"
- " read rpc-auth.ports.insecure value");
- } else
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to"
- " read rpc-auth.ports.insecure value");
- }
-
- ret = rpcsvc_combine_gen_spec_volume_checks (globalinsecure,
- exportinsecure);
- if (ret == RPCSVC_AUTH_ACCEPT)
- gf_log (GF_RPCSVC, GF_LOG_DEBUG, "Unprivileged port allowed");
- else
- gf_log (GF_RPCSVC, GF_LOG_DEBUG, "Unprivileged port not"
- " allowed");
-
-err:
- return ret;
+ prefixlen = (uint32_t)atoi(slash + 1);
+ if (prefixlen > 31)
+ goto out;
+ } else {
+ /* if there is no '/', then this function wouldn't be called */
+ goto out;
+ }
+
+ /* Need to do this after removing '/', as inet_pton() take IP address as
+ * second argument. Once we get sin2, then comparison is oranges to orange
+ */
+ if (inet_pton(AF_INET, netaddr, &sin2.sin_addr) == 0)
+ goto out;
+
+ shift = IPv4_ADDR_SIZE - prefixlen;
+ mask.sin_addr.s_addr = htonl((uint32_t)~0 << shift);
+
+ if (mask_match(sin1.sin_addr.s_addr, sin2.sin_addr.s_addr,
+ mask.sin_addr.s_addr)) {
+ ret = 0; /* SUCCESS */
+ }
+out:
+ GF_FREE(netaddr);
+ return ret;
}
-
-char *
-rpcsvc_volume_allowed (dict_t *options, char *volname)
+void
+rpcsvc_program_dump(rpcsvc_program_t *prog)
{
- char globalrule[] = "rpc-auth.addr.allow";
- char *srchstr = NULL;
- char *addrstr = NULL;
- int ret = -1;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+ char key[GF_DUMP_MAX_BUF_LEN];
+ int i;
- if ((!options) || (!volname))
- return NULL;
+ snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s", prog->progname);
+ gf_proc_dump_add_section("%s", key_prefix);
- ret = gf_asprintf (&srchstr, "rpc-auth.addr.%s.allow", volname);
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "asprintf failed");
- goto out;
- }
+ gf_proc_dump_build_key(key, key_prefix, "program-number");
+ gf_proc_dump_write(key, "%d", prog->prognum);
- if (!dict_get (options, srchstr))
- ret = dict_get_str (options, globalrule, &addrstr);
- else
- ret = dict_get_str (options, srchstr, &addrstr);
+ gf_proc_dump_build_key(key, key_prefix, "program-version");
+ gf_proc_dump_write(key, "%d", prog->progver);
-out:
- if (srchstr)
- GF_FREE (srchstr);
+ strncat(key_prefix, ".latency",
+ sizeof(key_prefix) - strlen(key_prefix) - 1);
- return addrstr;
+ for (i = 0; i < prog->numactors; i++) {
+ gf_proc_dump_build_key(key, key_prefix, "%s", prog->actors[i].procname);
+ gf_latency_statedump_and_reset(key, &prog->latencies[i]);
+ }
}
+void
+rpcsvc_statedump(rpcsvc_t *svc)
+{
+ rpcsvc_program_t *prog = NULL;
+ int ret = 0;
+ ret = pthread_rwlock_tryrdlock(&svc->rpclock);
+ if (ret)
+ return;
+ {
+ list_for_each_entry(prog, &svc->programs, program)
+ {
+ rpcsvc_program_dump(prog);
+ }
+ }
+ pthread_rwlock_unlock(&svc->rpclock);
+}
-rpcsvc_actor_t gluster_dump_actors[] = {
- [GF_DUMP_NULL] = {"NULL", GF_DUMP_NULL, NULL, NULL, NULL, 0},
- [GF_DUMP_DUMP] = {"DUMP", GF_DUMP_DUMP, rpcsvc_dump, NULL, NULL, 0},
- [GF_DUMP_MAXVALUE] = {"MAXVALUE", GF_DUMP_MAXVALUE, NULL, NULL, NULL, 0},
+static rpcsvc_actor_t gluster_dump_actors[GF_DUMP_MAXVALUE] = {
+ [GF_DUMP_NULL] = {"NULL", NULL, NULL, GF_DUMP_NULL, DRC_NA, 0},
+ [GF_DUMP_DUMP] = {"DUMP", rpcsvc_dump, NULL, GF_DUMP_DUMP, DRC_NA, 0},
+ [GF_DUMP_PING] = {"PING", rpcsvc_ping, NULL, GF_DUMP_PING, DRC_NA, 0},
};
-
-struct rpcsvc_program gluster_dump_prog = {
- .progname = "GF-DUMP",
- .prognum = GLUSTER_DUMP_PROGRAM,
- .progver = GLUSTER_DUMP_VERSION,
- .actors = gluster_dump_actors,
- .numactors = 2,
+static struct rpcsvc_program gluster_dump_prog = {
+ .progname = "GF-DUMP",
+ .prognum = GLUSTER_DUMP_PROGRAM,
+ .progver = GLUSTER_DUMP_VERSION,
+ .actors = gluster_dump_actors,
+ .numactors = GF_DUMP_MAXVALUE,
};
diff --git a/rpc/rpc-lib/src/rpcsvc.h b/rpc/rpc-lib/src/rpcsvc.h
index 95f2c4e8461..7b3030926c8 100644
--- a/rpc/rpc-lib/src/rpcsvc.h
+++ b/rpc/rpc-lib/src/rpcsvc.h
@@ -11,47 +11,57 @@
#ifndef _RPCSVC_H
#define _RPCSVC_H
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "event.h"
+#include <glusterfs/gf-event.h>
#include "rpc-transport.h"
-#include "logging.h"
-#include "dict.h"
-#include "mem-pool.h"
-#include "list.h"
-#include "iobuf.h"
+#include <glusterfs/dict.h>
#include "xdr-rpc.h"
-#include "glusterfs.h"
-#include "xlator.h"
#include "rpcsvc-common.h"
#include <pthread.h>
#include <sys/uio.h>
#include <inttypes.h>
#include <rpc/rpc_msg.h>
-#include "compat.h"
+#include <glusterfs/compat.h>
+#include <glusterfs/client_t.h>
#ifndef MAX_IOVEC
#define MAX_IOVEC 16
#endif
-#define GF_RPCSVC "rpc-service"
+/* TODO: we should store prognums at a centralized location to avoid conflict
+ or use a robust random number generator to avoid conflicts
+*/
+
+#define RPCSVC_INFRA_PROGRAM 7712846 /* random number */
+
+typedef enum {
+ RPCSVC_PROC_EVENT_THREAD_DEATH = 0,
+} rpcsvc_infra_procnum_t;
+
+#define RPCSVC_DEFAULT_OUTSTANDING_RPC_LIMIT \
+ 64 /* Default for protocol/server */
+#define RPCSVC_DEF_NFS_OUTSTANDING_RPC_LIMIT 16 /* Default for nfs/server */
+#define RPCSVC_MAX_OUTSTANDING_RPC_LIMIT 65536
+#define RPCSVC_MIN_OUTSTANDING_RPC_LIMIT 0 /* No limit i.e. Unlimited */
+
+#define GF_RPCSVC "rpc-service"
#define RPCSVC_THREAD_STACK_SIZE ((size_t)(1024 * GF_UNIT_KB))
-#define RPCSVC_FRAGHDR_SIZE 4 /* 4-byte RPC fragment header size */
-#define RPCSVC_DEFAULT_LISTEN_PORT GF_DEFAULT_BASE_PORT
-#define RPCSVC_DEFAULT_MEMFACTOR 8
-#define RPCSVC_EVENTPOOL_SIZE_MULT 1024
-#define RPCSVC_POOLCOUNT_MULT 64
-#define RPCSVC_CONN_READ (128 * GF_UNIT_KB)
-#define RPCSVC_PAGE_SIZE (128 * GF_UNIT_KB)
+#define RPCSVC_FRAGHDR_SIZE 4 /* 4-byte RPC fragment header size */
+#define RPCSVC_DEFAULT_LISTEN_PORT GF_DEFAULT_BASE_PORT
+#define RPCSVC_DEFAULT_MEMFACTOR 8
+#define RPCSVC_EVENTPOOL_SIZE_MULT 1024
+#define RPCSVC_POOLCOUNT_MULT 64
+#define RPCSVC_CONN_READ (128 * GF_UNIT_KB)
+#define RPCSVC_PAGE_SIZE (128 * GF_UNIT_KB)
+#define RPC_ROOT_UID 0
+#define RPC_ROOT_GID 0
+#define RPC_NOBODY_UID 65534
+#define RPC_NOBODY_GID 65534
/* RPC Record States */
-#define RPCSVC_READ_FRAGHDR 1
-#define RPCSVC_READ_FRAG 2
+#define RPCSVC_READ_FRAGHDR 1
+#define RPCSVC_READ_FRAG 2
/* The size in bytes, if crossed by a fragment will be handed over to the
* vectored actor so that it can allocate its buffers the way it wants.
* In our RPC layer, we assume that vectored RPC requests/records are never
@@ -59,21 +69,28 @@
* whether the record should be handled in RPC layer completely or handed to
* the vectored handler.
*/
-#define RPCSVC_VECTORED_FRAGSZ 4096
-#define RPCSVC_VECTOR_READCRED 1003
-#define RPCSVC_VECTOR_READVERFSZ 1004
-#define RPCSVC_VECTOR_READVERF 1005
-#define RPCSVC_VECTOR_IGNORE 1006
-#define RPCSVC_VECTOR_READVEC 1007
-#define RPCSVC_VECTOR_READPROCHDR 1008
-
-#define rpcsvc_record_vectored_baremsg(rs) (((rs)->state == RPCSVC_READ_FRAG) && (rs)->vecstate == 0)
-#define rpcsvc_record_vectored_cred(rs) ((rs)->vecstate == RPCSVC_VECTOR_READCRED)
-#define rpcsvc_record_vectored_verfsz(rs) ((rs)->vecstate == RPCSVC_VECTOR_READVERFSZ)
-#define rpcsvc_record_vectored_verfread(rs) ((rs)->vecstate == RPCSVC_VECTOR_READVERF)
-#define rpcsvc_record_vectored_ignore(rs) ((rs)->vecstate == RPCSVC_VECTOR_IGNORE)
-#define rpcsvc_record_vectored_readvec(rs) ((rs)->vecstate == RPCSVC_VECTOR_READVEC)
-#define rpcsvc_record_vectored_readprochdr(rs) ((rs)->vecstate == RPCSVC_VECTOR_READPROCHDR)
+#define RPCSVC_VECTORED_FRAGSZ 4096
+#define RPCSVC_VECTOR_READCRED 1003
+#define RPCSVC_VECTOR_READVERFSZ 1004
+#define RPCSVC_VECTOR_READVERF 1005
+#define RPCSVC_VECTOR_IGNORE 1006
+#define RPCSVC_VECTOR_READVEC 1007
+#define RPCSVC_VECTOR_READPROCHDR 1008
+
+#define rpcsvc_record_vectored_baremsg(rs) \
+ (((rs)->state == RPCSVC_READ_FRAG) && (rs)->vecstate == 0)
+#define rpcsvc_record_vectored_cred(rs) \
+ ((rs)->vecstate == RPCSVC_VECTOR_READCRED)
+#define rpcsvc_record_vectored_verfsz(rs) \
+ ((rs)->vecstate == RPCSVC_VECTOR_READVERFSZ)
+#define rpcsvc_record_vectored_verfread(rs) \
+ ((rs)->vecstate == RPCSVC_VECTOR_READVERF)
+#define rpcsvc_record_vectored_ignore(rs) \
+ ((rs)->vecstate == RPCSVC_VECTOR_IGNORE)
+#define rpcsvc_record_vectored_readvec(rs) \
+ ((rs)->vecstate == RPCSVC_VECTOR_READVEC)
+#define rpcsvc_record_vectored_readprochdr(rs) \
+ ((rs)->vecstate == RPCSVC_VECTOR_READPROCHDR)
#define rpcsvc_record_vectored(rs) ((rs)->fragsize > RPCSVC_VECTORED_FRAGSZ)
/* Includes bytes up to and including the credential length field. The credlen
* will be followed by @credlen bytes of credential data which will have to be
@@ -81,187 +98,232 @@
* verifier which will also have to be read separately including the 8 bytes of
* verf flavour and verflen.
*/
-#define RPCSVC_BARERPC_MSGSZ 32
-#define rpcsvc_record_readfraghdr(rs) ((rs)->state == RPCSVC_READ_FRAGHDR)
-#define rpcsvc_record_readfrag(rs) ((rs)->state == RPCSVC_READ_FRAG)
+#define RPCSVC_BARERPC_MSGSZ 32
+#define rpcsvc_record_readfraghdr(rs) ((rs)->state == RPCSVC_READ_FRAGHDR)
+#define rpcsvc_record_readfrag(rs) ((rs)->state == RPCSVC_READ_FRAG)
-#define RPCSVC_LOWVERS 2
+#define RPCSVC_LOWVERS 2
#define RPCSVC_HIGHVERS 2
-
#if 0
#error "defined in /usr/include/rpc/auth.h"
-#define AUTH_NONE 0 /* no authentication */
-#define AUTH_NULL 0 /* backward compatibility */
-#define AUTH_SYS 1 /* unix style (uid, gids) */
-#define AUTH_UNIX AUTH_SYS
-#define AUTH_SHORT 2 /* short hand unix style */
-#define AUTH_DES 3 /* des style (encrypted timestamps) */
-#define AUTH_DH AUTH_DES /* Diffie-Hellman (this is DES) */
-#define AUTH_KERB 4 /* kerberos style */
-#endif /* */
+#define AUTH_NONE 0 /* no authentication */
+#define AUTH_NULL 0 /* backward compatibility */
+#define AUTH_SYS 1 /* unix style (uid, gids) */
+#define AUTH_UNIX AUTH_SYS
+#define AUTH_SHORT 2 /* short hand unix style */
+#define AUTH_DES 3 /* des style (encrypted timestamps) */
+#define AUTH_DH AUTH_DES /* Diffie-Hellman (this is DES) */
+#define AUTH_KERB 4 /* kerberos style */
+#endif /* */
typedef struct rpcsvc_program rpcsvc_program_t;
struct rpcsvc_notify_wrapper {
- struct list_head list;
- void *data;
- rpcsvc_notify_t notify;
+ struct list_head list;
+ void *data;
+ rpcsvc_notify_t notify;
};
typedef struct rpcsvc_notify_wrapper rpcsvc_notify_wrapper_t;
-
typedef struct rpcsvc_request rpcsvc_request_t;
typedef struct {
- rpc_transport_t *trans;
- rpcsvc_t *svc;
- /* FIXME: remove address from this structure. Instead use get_myaddr
- * interface implemented by individual transports.
- */
- struct sockaddr_storage sa;
- struct list_head list;
+ rpc_transport_t *trans;
+ rpcsvc_t *svc;
+ /* FIXME: remove address from this structure. Instead use get_myaddr
+ * interface implemented by individual transports.
+ */
+ struct sockaddr_storage sa;
+ struct list_head list;
} rpcsvc_listener_t;
struct rpcsvc_config {
- int max_block_size;
+ int max_block_size;
};
-typedef struct rpcsvc_auth_data {
- int flavour;
- int datalen;
- char authdata[GF_MAX_AUTH_BYTES];
-} rpcsvc_auth_data_t;
+#define rpcsvc_auth_flavour(au) ((au).flavour)
-#define rpcsvc_auth_flavour(au) ((au).flavour)
+typedef struct drc_client drc_client_t;
+typedef struct drc_cached_op drc_cached_op_t;
/* The container for the RPC call handed up to an actor.
* Dynamically allocated. Lives till the call reply is completely
* transmitted.
* */
struct rpcsvc_request {
- /* connection over which this request came. */
- rpc_transport_t *trans;
-
- rpcsvc_t *svc;
-
- rpcsvc_program_t *prog;
-
- /* The identifier for the call from client.
- * Needed to pair the reply with the call.
- */
- uint32_t xid;
-
- int prognum;
-
- int progver;
-
- int procnum;
-
- int type;
-
- /* Uid and gid filled by the rpc-auth module during the authentication
- * phase.
- */
- uid_t uid;
- gid_t gid;
- pid_t pid;
-
- gf_lkowner_t lk_owner;
- uint64_t gfs_id;
+ /* connection over which this request came. */
+ rpc_transport_t *trans;
- /* Might want to move this to AUTH_UNIX specific state since this array
- * is not available for every authentication scheme.
- */
- gid_t auxgids[GF_MAX_AUX_GROUPS];
- int auxgidcount;
+ rpcsvc_t *svc;
+
+ rpcsvc_program_t *prog;
+ int prognum;
+
+ int progver;
- /* The RPC message payload, contains the data required
- * by the program actors. This is the buffer that will need to
- * be de-xdred by the actor.
- */
- struct iovec msg[MAX_IOVEC];
- int count;
-
- struct iobref *iobref;
-
- /* Status of the RPC call, whether it was accepted or denied. */
- int rpc_status;
-
- /* In case, the call was denied, the RPC error is stored here
- * till the reply is sent.
- */
- int rpc_err;
-
- /* In case the failure happened because of an authentication problem
- * , this value needs to be assigned the correct auth error number.
- */
- int auth_err;
-
- /* There can be cases of RPC requests where the reply needs to
- * be built from multiple sources. E.g. where even the NFS reply
- * can contain a payload, as in the NFSv3 read reply. Here the RPC header
- * ,NFS header and the read data are brought together separately from
- * different buffers, so we need to stage the buffers temporarily here
- * before all of them get added to the connection's transmission list.
- */
- struct list_head txlist;
-
- /* While the reply record is being built, this variable keeps track
- * of how many bytes have been added to the record.
- */
- size_t payloadsize;
-
- /* The credentials extracted from the rpc request */
- rpcsvc_auth_data_t cred;
-
- /* The verified extracted from the rpc request. In request side
- * processing this contains the verifier sent by the client, on reply
- * side processing, it is filled with the verified that will be
- * sent to the client.
- */
- rpcsvc_auth_data_t verf;
-
- /* Container for a RPC program wanting to store a temp
- * request-specific item.
- */
- void *private;
-
- /* Container for transport to store request-specific item */
- void *trans_private;
+ int procnum;
+
+ int type;
+
+ /* Uid and gid filled by the rpc-auth module during the authentication
+ * phase.
+ */
+ uid_t uid;
+ gid_t gid;
+ pid_t pid;
+
+ gf_lkowner_t lk_owner;
+ uint64_t gfs_id;
+
+ /* Might want to move this to AUTH_UNIX specific state since this array
+ * is not available for every authentication scheme.
+ */
+ gid_t *auxgids;
+ gid_t auxgidsmall[SMALL_GROUP_COUNT];
+ gid_t *auxgidlarge;
+ int auxgidcount;
+
+ /* The RPC message payload, contains the data required
+ * by the program actors. This is the buffer that will need to
+ * be de-xdred by the actor.
+ */
+ int count;
+ struct iovec msg[MAX_IOVEC];
+
+ struct iobref *iobref;
+
+ /* There can be cases of RPC requests where the reply needs to
+ * be built from multiple sources. E.g. where even the NFS reply
+ * can contain a payload, as in the NFSv3 read reply. Here the RPC header
+ * ,NFS header and the read data are brought together separately from
+ * different buffers, so we need to stage the buffers temporarily here
+ * before all of them get added to the connection's transmission list.
+ */
+ struct list_head txlist;
+
+ /* While the reply record is being built, this variable keeps track
+ * of how many bytes have been added to the record.
+ */
+ size_t payloadsize;
+
+ /* The credentials extracted from the rpc request */
+ client_auth_data_t cred;
+
+ /* The verified extracted from the rpc request. In request side
+ * processing this contains the verifier sent by the client, on reply
+ * side processing, it is filled with the verified that will be
+ * sent to the client.
+ */
+ client_auth_data_t verf;
+ /* Container for a RPC program wanting to store a temp
+ * request-specific item.
+ */
+ void *private;
+
+ /* Container for transport to store request-specific item */
+ void *trans_private;
+
+ /* pointer to cached reply for use in DRC */
+ drc_cached_op_t *reply;
+
+ /* request queue in rpcsvc */
+ struct list_head request_list;
+
+ /* Status of the RPC call, whether it was accepted or denied. */
+ int rpc_status;
+
+ /* In case, the call was denied, the RPC error is stored here
+ * till the reply is sent.
+ */
+ int rpc_err;
+
+ /* In case the failure happened because of an authentication problem
+ * , this value needs to be assigned the correct auth error number.
+ */
+ int auth_err;
+
+ /* Things passed to rpc layer from client */
+
+ /* @flags: Can be used for binary data passed in xdata to be
+ passed here instead */
+ unsigned int flags;
+
+ /* ctime: origin of time on the client side, ideally this is
+ the one we should consider for time */
+ struct timespec ctime;
+
+ /* The identifier for the call from client.
+ * Needed to pair the reply with the call.
+ */
+ uint32_t xid;
+
+ /* Execute this request's actor function in ownthread of program?*/
+ gf_boolean_t ownthread;
+
+ gf_boolean_t synctask;
+ struct timespec begin; /*req handling start time*/
+ struct timespec end; /*req handling end time*/
};
#define rpcsvc_request_program(req) ((rpcsvc_program_t *)((req)->prog))
#define rpcsvc_request_procnum(req) (((req)->procnum))
-#define rpcsvc_request_program_private(req) (((rpcsvc_program_t *)((req)->prog))->private)
-#define rpcsvc_request_accepted(req) ((req)->rpc_status == MSG_ACCEPTED)
+#define rpcsvc_request_program_private(req) \
+ (((rpcsvc_program_t *)((req)->prog))->private)
+#define rpcsvc_request_accepted(req) ((req)->rpc_status == MSG_ACCEPTED)
#define rpcsvc_request_accepted_success(req) ((req)->rpc_err == SUCCESS)
-#define rpcsvc_request_uid(req) ((req)->uid)
-#define rpcsvc_request_gid(req) ((req)->gid)
#define rpcsvc_request_prog_minauth(req) (rpcsvc_request_program(req)->min_auth)
#define rpcsvc_request_cred_flavour(req) (rpcsvc_auth_flavour(req->cred))
#define rpcsvc_request_verf_flavour(req) (rpcsvc_auth_flavour(req->verf))
-#define rpcsvc_request_service(req) ((req)->svc)
-#define rpcsvc_request_uid(req) ((req)->uid)
-#define rpcsvc_request_gid(req) ((req)->gid)
-#define rpcsvc_request_private(req) ((req)->private)
-#define rpcsvc_request_xid(req) ((req)->xid)
-#define rpcsvc_request_set_private(req,prv) (req)->private = (void *)(prv)
-#define rpcsvc_request_iobref_ref(req) (iobref_ref ((req)->iobref))
-#define rpcsvc_request_record_ref(req) (iobuf_ref ((req)->recordiob))
-#define rpcsvc_request_record_unref(req) (iobuf_unref ((req)->recordiob))
-#define rpcsvc_request_record_iob(req) ((req)->recordiob)
-#define rpcsvc_request_set_vecstate(req, state) ((req)->vecstate = state)
+#define rpcsvc_request_service(req) ((req)->svc)
+#define rpcsvc_request_uid(req) ((req)->uid)
+#define rpcsvc_request_gid(req) ((req)->gid)
+#define rpcsvc_request_private(req) ((req)->private)
+#define rpcsvc_request_xid(req) ((req)->xid)
+#define rpcsvc_request_set_private(req, prv) (req)->private = (void *)(prv)
+#define rpcsvc_request_iobref_ref(req) (iobref_ref((req)->iobref))
+#define rpcsvc_request_record_ref(req) (iobuf_ref((req)->recordiob))
+#define rpcsvc_request_record_unref(req) (iobuf_unref((req)->recordiob))
+#define rpcsvc_request_record_iob(req) ((req)->recordiob)
+#define rpcsvc_request_set_vecstate(req, state) ((req)->vecstate = state)
#define rpcsvc_request_vecstate(req) ((req)->vecstate)
#define rpcsvc_request_transport(req) ((req)->trans)
#define rpcsvc_request_transport_ref(req) (rpc_transport_ref((req)->trans))
-
-
-#define RPCSVC_ACTOR_SUCCESS 0
-#define RPCSVC_ACTOR_ERROR (-1)
-#define RPCSVC_ACTOR_IGNORE (-2)
+#define RPC_AUTH_ROOT_SQUASH(req) \
+ do { \
+ int gidcount = 0; \
+ if (req->svc->root_squash) { \
+ if (req->uid == RPC_ROOT_UID) \
+ req->uid = req->svc->anonuid; \
+ if (req->gid == RPC_ROOT_GID) \
+ req->gid = req->svc->anongid; \
+ \
+ for (gidcount = 0; gidcount < req->auxgidcount; ++gidcount) { \
+ if (!req->auxgids[gidcount]) \
+ req->auxgids[gidcount] = req->svc->anongid; \
+ } \
+ } \
+ } while (0);
+
+#define RPC_AUTH_ALL_SQUASH(req) \
+ do { \
+ int gidcount = 0; \
+ if (req->svc->all_squash) { \
+ req->uid = req->svc->anonuid; \
+ req->gid = req->svc->anongid; \
+ \
+ for (gidcount = 0; gidcount < req->auxgidcount; ++gidcount) { \
+ if (!req->auxgids[gidcount]) \
+ req->auxgids[gidcount] = req->svc->anongid; \
+ } \
+ } \
+ } while (0);
+
+#define RPCSVC_ACTOR_SUCCESS 0
+#define RPCSVC_ACTOR_ERROR (-1)
+#define RPCSVC_ACTOR_IGNORE (-2)
/* Functor for every type of protocol actor
* must be defined like this.
@@ -275,117 +337,146 @@ struct rpcsvc_request {
* should return RPCSVC_ACTOR_ERROR.
*
*/
-typedef int (*rpcsvc_actor) (rpcsvc_request_t *req);
-typedef int (*rpcsvc_vector_actor) (rpcsvc_request_t *req, struct iovec *vec,
- int count, struct iobref *iobref);
-typedef int (*rpcsvc_vector_sizer) (int state, ssize_t *readsize, char *addr);
+typedef int (*rpcsvc_actor)(rpcsvc_request_t *req);
+typedef int (*rpcsvc_vector_sizer)(int state, ssize_t *readsize,
+ char *base_addr, char *curr_addr);
/* Every protocol actor will also need to specify the function the RPC layer
* will use to serialize or encode the message into XDR format just before
* transmitting on the connection.
*/
-typedef void *(*rpcsvc_encode_reply) (void *msg);
+typedef void *(*rpcsvc_encode_reply)(void *msg);
/* Once the reply has been transmitted, the message will have to be de-allocated
* , so every actor will need to provide a function that deallocates the message
* it had allocated as a response.
*/
-typedef void (*rpcsvc_deallocate_reply) (void *msg);
-
+typedef void (*rpcsvc_deallocate_reply)(void *msg);
-#define RPCSVC_NAME_MAX 32
+#define RPCSVC_NAME_MAX 32
/* The descriptor for each procedure/actor that runs
* over the RPC service.
*/
typedef struct rpcsvc_actor_desc {
- char procname[RPCSVC_NAME_MAX];
- int procnum;
- rpcsvc_actor actor;
-
- /* Handler for cases where the RPC requests fragments are large enough
- * to benefit from being decoded into aligned memory addresses. While
- * decoding the request in a non-vectored manner, due to the nature of
- * the XDR scheme, RPC cannot guarantee memory aligned addresses for
- * the resulting message-specific structures. Allowing a specialized
- * handler for letting the RPC program read the data from the network
- * directly into its aligned buffers.
- */
- rpcsvc_vector_actor vector_actor;
- rpcsvc_vector_sizer vector_sizer;
-
- /* Can actor be ran on behalf an unprivileged requestor? */
- gf_boolean_t unprivileged;
+ char procname[RPCSVC_NAME_MAX];
+ rpcsvc_actor actor;
+
+ /* Handler for cases where the RPC requests fragments are large enough
+ * to benefit from being decoded into aligned memory addresses. While
+ * decoding the request in a non-vectored manner, due to the nature of
+ * the XDR scheme, RPC cannot guarantee memory aligned addresses for
+ * the resulting message-specific structures. Allowing a specialized
+ * handler for letting the RPC program read the data from the network
+ * directly into its aligned buffers.
+ */
+ rpcsvc_vector_sizer vector_sizer;
+
+ int procnum;
+
+ /* Can actor be ran on behalf an unprivileged requestor? */
+ drc_op_type_t op_type;
+ gf_boolean_t unprivileged;
} rpcsvc_actor_t;
+typedef struct rpcsvc_request_queue {
+ struct list_head request_queue;
+ pthread_mutex_t queue_lock;
+ pthread_cond_t queue_cond;
+ pthread_t thread;
+ struct rpcsvc_program *program;
+ int gen;
+ gf_boolean_t waiting;
+} rpcsvc_request_queue_t;
+
/* Describes a program and its version along with the function pointers
* required to handle the procedures/actors of each program/version.
* Never changed ever by any thread so no need for a lock.
*/
struct rpcsvc_program {
- char progname[RPCSVC_NAME_MAX];
- int prognum;
- int progver;
- /* FIXME */
- dict_t *options; /* An opaque dictionary
- * populated by the program
- * (probably from xl->options)
- * which contain enough
- * information for transport to
- * initialize. As a part of
- * cleanup, the members of
- * options which are of interest
- * to transport should be put
- * into a structure for better
- * readability and structure
- * should replace options member
- * here.
- */
- uint16_t progport; /* Registered with portmap */
+ char progname[RPCSVC_NAME_MAX];
+ int prognum;
+ int progver;
+ /* FIXME */
+ dict_t *options; /* An opaque dictionary
+ * populated by the program
+ * (probably from xl->options)
+ * which contain enough
+ * information for transport to
+ * initialize. As a part of
+ * cleanup, the members of
+ * options which are of interest
+ * to transport should be put
+ * into a structure for better
+ * readability and structure
+ * should replace options member
+ * here.
+ */
#if 0
int progaddrfamily; /* AF_INET or AF_INET6 */
char *proghost; /* Bind host, can be NULL */
#endif
- rpcsvc_actor_t *actors; /* All procedure handlers */
- int numactors; /* Num actors in actor array */
- int proghighvers; /* Highest ver for program
- supported by the system. */
- int proglowvers; /* Lowest ver */
-
- /* Program specific state handed to actors */
- void *private;
-
-
- /* This upcall is provided by the program during registration.
- * It is used to notify the program about events like connection being
- * destroyed etc. The rpc program may take appropriate actions, for eg.,
- * in the case of connection being destroyed, it should cleanup its
- * state stored in the connection.
- */
- rpcsvc_notify_t notify;
-
- /* An integer that identifies the min auth strength that is required
- * by this protocol, for eg. MOUNT3 needs AUTH_UNIX at least.
- * See RFC 1813, Section 5.2.1.
- */
- int min_auth;
-
- /* list member to link to list of registered services with rpcsvc */
- struct list_head program;
+ rpcsvc_actor_t *actors; /* All procedure handlers */
+ int numactors; /* Num actors in actor array */
+ int proghighvers; /* Highest ver for program
+ supported by the system. */
+ /* Program specific state handed to actors */
+ void *private;
+ gf_latency_t *latencies; /*Tracks latency statistics for the rpc call*/
+
+ /* This upcall is provided by the program during registration.
+ * It is used to notify the program about events like connection being
+ * destroyed etc. The rpc program may take appropriate actions, for eg.,
+ * in the case of connection being destroyed, it should cleanup its
+ * state stored in the connection.
+ */
+ rpcsvc_notify_t notify;
+
+ int proglowvers; /* Lowest ver */
+
+ /* An integer that identifies the min auth strength that is required
+ * by this protocol, for eg. MOUNT3 needs AUTH_UNIX at least.
+ * See RFC 1813, Section 5.2.1.
+ */
+ int min_auth;
+
+ /* list member to link to list of registered services with rpcsvc */
+ struct list_head program;
+ rpcsvc_request_queue_t request_queue[EVENT_MAX_THREADS];
+ pthread_mutex_t thr_lock;
+ pthread_cond_t thr_cond;
+ int threadcount;
+ int thr_queue;
+ pthread_key_t req_queue_key;
+
+ /* eventthreadcount is just a readonly copy of the actual value
+ * owned by the event sub-system
+ * It is used to control the scaling of rpcsvc_request_handler threads
+ */
+ int eventthreadcount;
+ uint16_t progport; /* Registered with portmap */
+ /* Execute actor function in program's own thread? This will reduce */
+ /* the workload on poller threads */
+ gf_boolean_t ownthread;
+ gf_boolean_t alive;
+
+ gf_boolean_t synctask;
+ unsigned long request_queue_status[EVENT_MAX_THREADS / __BITS_PER_LONG];
};
typedef struct rpcsvc_cbk_program {
- char *progname;
- int prognum;
- int progver;
+ char *progname;
+ int prognum;
+ int progver;
} rpcsvc_cbk_program_t;
/* All users of RPC services should use this API to register their
* procedure handlers.
*/
extern int
-rpcsvc_program_register (rpcsvc_t *svc, rpcsvc_program_t *program);
+rpcsvc_program_register(rpcsvc_t *svc, rpcsvc_program_t *program,
+ gf_boolean_t add_to_head);
extern int
-rpcsvc_program_unregister (rpcsvc_t *svc, rpcsvc_program_t *program);
+rpcsvc_program_unregister(rpcsvc_t *svc, rpcsvc_program_t *program);
/* This will create and add a listener to listener pool. Programs can
* use any of the listener in this pool. A single listener can be used by
@@ -396,127 +487,150 @@ rpcsvc_program_unregister (rpcsvc_t *svc, rpcsvc_program_t *program);
*/
/* FIXME: can multiple programs registered on same port? */
extern int32_t
-rpcsvc_create_listeners (rpcsvc_t *svc, dict_t *options, char *name);
+rpcsvc_create_listeners(rpcsvc_t *svc, dict_t *options, char *name);
void
-rpcsvc_listener_destroy (rpcsvc_listener_t *listener);
+rpcsvc_listener_destroy(rpcsvc_listener_t *listener);
extern int
-rpcsvc_program_register_portmap (rpcsvc_program_t *newprog, uint32_t port);
+rpcsvc_program_register_portmap(rpcsvc_program_t *newprog, uint32_t port);
+#ifdef IPV6_DEFAULT
+extern int
+rpcsvc_program_register_rpcbind6(rpcsvc_program_t *newprog, uint32_t port);
extern int
-rpcsvc_register_portmap_enabled (rpcsvc_t *svc);
+rpcsvc_program_unregister_rpcbind6(rpcsvc_program_t *newprog);
+#endif
+
+extern int
+rpcsvc_program_unregister_portmap(rpcsvc_program_t *newprog);
+
+extern int
+rpcsvc_register_portmap_enabled(rpcsvc_t *svc);
/* Inits the global RPC service data structures.
* Called in main.
*/
extern rpcsvc_t *
-rpcsvc_init (xlator_t *xl, glusterfs_ctx_t *ctx, dict_t *options,
- uint32_t poolcount);
+rpcsvc_init(xlator_t *xl, glusterfs_ctx_t *ctx, dict_t *options,
+ uint32_t poolcount);
+
+extern int
+rpcsvc_reconfigure_options(rpcsvc_t *svc, dict_t *options);
int
-rpcsvc_register_notify (rpcsvc_t *svc, rpcsvc_notify_t notify, void *mydata);
+rpcsvc_register_notify(rpcsvc_t *svc, rpcsvc_notify_t notify, void *mydata);
/* unregister a notification callback @notify with data @mydata from svc.
* returns the number of notification callbacks unregistered.
*/
int
-rpcsvc_unregister_notify (rpcsvc_t *svc, rpcsvc_notify_t notify, void *mydata);
+rpcsvc_unregister_notify(rpcsvc_t *svc, rpcsvc_notify_t notify, void *mydata);
int
-rpcsvc_submit_message (rpcsvc_request_t *req, struct iovec *proghdr,
- int hdrcount, struct iovec *payload, int payloadcount,
- struct iobref *iobref);
+rpcsvc_transport_submit(rpc_transport_t *trans, struct iovec *rpchdr,
+ int rpchdrcount, struct iovec *proghdr,
+ int proghdrcount, struct iovec *progpayload,
+ int progpayloadcount, struct iobref *iobref,
+ void *priv);
int
-rpcsvc_submit_generic (rpcsvc_request_t *req, struct iovec *proghdr,
- int hdrcount, struct iovec *payload, int payloadcount,
- struct iobref *iobref);
+rpcsvc_submit_message(rpcsvc_request_t *req, struct iovec *proghdr,
+ int hdrcount, struct iovec *payload, int payloadcount,
+ struct iobref *iobref);
+
+int
+rpcsvc_submit_generic(rpcsvc_request_t *req, struct iovec *proghdr,
+ int hdrcount, struct iovec *payload, int payloadcount,
+ struct iobref *iobref);
extern int
-rpcsvc_error_reply (rpcsvc_request_t *req);
+rpcsvc_error_reply(rpcsvc_request_t *req);
-#define RPCSVC_PEER_STRLEN 1024
-#define RPCSVC_AUTH_ACCEPT 1
-#define RPCSVC_AUTH_REJECT 2
-#define RPCSVC_AUTH_DONTCARE 3
+#define RPCSVC_PEER_STRLEN 1024
+#define RPCSVC_AUTH_ACCEPT 1
+#define RPCSVC_AUTH_REJECT 2
+#define RPCSVC_AUTH_DONTCARE 3
extern int
-rpcsvc_transport_peername (rpc_transport_t *trans, char *hostname, int hostlen);
+rpcsvc_transport_peername(rpc_transport_t *trans, char *hostname, int hostlen);
-extern inline int
-rpcsvc_transport_peeraddr (rpc_transport_t *trans, char *addrstr, int addrlen,
- struct sockaddr_storage *returnsa, socklen_t sasize);
+extern int
+rpcsvc_transport_peeraddr(rpc_transport_t *trans, char *addrstr, int addrlen,
+ struct sockaddr_storage *returnsa, socklen_t sasize);
extern int
-rpcsvc_transport_peer_check (dict_t *options, char *volname,
- rpc_transport_t *trans);
+rpcsvc_auth_check(rpcsvc_t *svc, char *volname, char *ipaddr);
extern int
-rpcsvc_transport_privport_check (rpcsvc_t *svc, char *volname,
- rpc_transport_t *trans);
-#define rpcsvc_request_seterr(req, err) (req)->rpc_err = err
-#define rpcsvc_request_set_autherr(req, err) (req)->auth_err = err
+rpcsvc_transport_privport_check(rpcsvc_t *svc, char *volname, uint16_t port);
-extern int rpcsvc_submit_vectors (rpcsvc_request_t *req);
+#define rpcsvc_request_seterr(req, err) ((req)->rpc_err = (int)(err))
+#define rpcsvc_request_set_autherr(req, err) ((req)->auth_err = (int)(err))
-extern int rpcsvc_request_attach_vector (rpcsvc_request_t *req,
- struct iovec msgvec, struct iobuf *iob,
- struct iobref *ioref, int finalvector);
+extern int
+rpcsvc_submit_vectors(rpcsvc_request_t *req);
+extern int
+rpcsvc_request_attach_vector(rpcsvc_request_t *req, struct iovec msgvec,
+ struct iobuf *iob, struct iobref *ioref,
+ int finalvector);
-typedef int (*auth_init_trans) (rpc_transport_t *trans, void *priv);
-typedef int (*auth_init_request) (rpcsvc_request_t *req, void *priv);
-typedef int (*auth_request_authenticate) (rpcsvc_request_t *req, void *priv);
+typedef int (*auth_init_trans)(rpc_transport_t *trans, void *priv);
+typedef int (*auth_init_request)(rpcsvc_request_t *req, void *priv);
+typedef int (*auth_request_authenticate)(rpcsvc_request_t *req, void *priv);
/* This structure needs to be registered by every authentication scheme.
* Our authentication schemes are stored per connection because
* each connection will end up using a different authentication scheme.
*/
typedef struct rpcsvc_auth_ops {
- auth_init_trans transport_init;
- auth_init_request request_init;
- auth_request_authenticate authenticate;
+ auth_init_trans transport_init;
+ auth_init_request request_init;
+ auth_request_authenticate authenticate;
} rpcsvc_auth_ops_t;
typedef struct rpcsvc_auth_flavour_desc {
- char authname[RPCSVC_NAME_MAX];
- int authnum;
- rpcsvc_auth_ops_t *authops;
- void *authprivate;
+ char authname[RPCSVC_NAME_MAX];
+ rpcsvc_auth_ops_t *authops;
+ void *authprivate;
+ int authnum;
} rpcsvc_auth_t;
-typedef void * (*rpcsvc_auth_initer_t) (rpcsvc_t *svc, dict_t *options);
+typedef void *(*rpcsvc_auth_initer_t)(rpcsvc_t *svc, dict_t *options);
struct rpcsvc_auth_list {
- struct list_head authlist;
- rpcsvc_auth_initer_t init;
- /* Should be the name with which we identify the auth scheme given
- * in the volfile options.
- * This should be different from the authname in rpc_auth_t
- * in way that makes it easier to specify this scheme in the volfile.
- * This is because the technical names of the schemes can be a bit
- * arcane.
- */
- char name[RPCSVC_NAME_MAX];
- rpcsvc_auth_t *auth;
- int enable;
+ struct list_head authlist;
+ rpcsvc_auth_initer_t init;
+ /* Should be the name with which we identify the auth scheme given
+ * in the volfile options.
+ * This should be different from the authname in rpc_auth_t
+ * in way that makes it easier to specify this scheme in the volfile.
+ * This is because the technical names of the schemes can be a bit
+ * arcane.
+ */
+ char name[RPCSVC_NAME_MAX];
+ rpcsvc_auth_t *auth;
+ int enable;
};
extern int
-rpcsvc_auth_request_init (rpcsvc_request_t *req);
+rpcsvc_auth_request_init(rpcsvc_request_t *req, struct rpc_msg *callmsg);
+
+extern int
+rpcsvc_auth_init(rpcsvc_t *svc, dict_t *options);
extern int
-rpcsvc_auth_init (rpcsvc_t *svc, dict_t *options);
+rpcsvc_auth_reconf(rpcsvc_t *svc, dict_t *options);
extern int
-rpcsvc_auth_transport_init (rpc_transport_t *xprt);
+rpcsvc_auth_transport_init(rpc_transport_t *xprt);
extern int
-rpcsvc_authenticate (rpcsvc_request_t *req);
+rpcsvc_authenticate(rpcsvc_request_t *req);
extern int
-rpcsvc_auth_array (rpcsvc_t *svc, char *volname, int *autharr, int arrlen);
+rpcsvc_auth_array(rpcsvc_t *svc, char *volname, int *autharr, int arrlen);
/* If the request has been sent using AUTH_UNIX, this function returns the
* auxiliary gids as an array, otherwise, it returns NULL.
@@ -524,28 +638,57 @@ rpcsvc_auth_array (rpcsvc_t *svc, char *volname, int *autharr, int arrlen);
* authentication code even further to support mode auth schemes.
*/
extern gid_t *
-rpcsvc_auth_unix_auxgids (rpcsvc_request_t *req, int *arrlen);
-
-extern int
-rpcsvc_combine_gen_spec_volume_checks (int gen, int spec);
+rpcsvc_auth_unix_auxgids(rpcsvc_request_t *req, int *arrlen);
extern char *
-rpcsvc_volume_allowed (dict_t *options, char *volname);
+rpcsvc_volume_allowed(dict_t *options, char *volname);
+
+int
+rpcsvc_request_submit(rpcsvc_t *rpc, rpc_transport_t *trans,
+ rpcsvc_cbk_program_t *prog, int procnum, void *req,
+ glusterfs_ctx_t *ctx, xdrproc_t xdrproc);
-int rpcsvc_callback_submit (rpcsvc_t *rpc, rpc_transport_t *trans,
- rpcsvc_cbk_program_t *prog, int procnum,
- struct iovec *proghdr, int proghdrcount);
+int
+rpcsvc_callback_submit(rpcsvc_t *rpc, rpc_transport_t *trans,
+ rpcsvc_cbk_program_t *prog, int procnum,
+ struct iovec *proghdr, int proghdrcount,
+ struct iobref *iobref);
+rpcsvc_actor_t *
+rpcsvc_program_actor(rpcsvc_request_t *req);
+
+int
+rpcsvc_transport_unix_options_build(dict_t *options, char *filepath);
int
-rpcsvc_transport_unix_options_build (dict_t **options, char *filepath);
+rpcsvc_set_allow_insecure(rpcsvc_t *svc, dict_t *options);
int
-rpcsvc_set_allow_insecure (rpcsvc_t *svc, dict_t *options);
+rpcsvc_set_addr_namelookup(rpcsvc_t *svc, dict_t *options);
+int
+rpcsvc_set_root_squash(rpcsvc_t *svc, dict_t *options);
+int
+rpcsvc_set_all_squash(rpcsvc_t *svc, dict_t *options);
+int
+rpcsvc_set_outstanding_rpc_limit(rpcsvc_t *svc, dict_t *options, int defvalue);
+
int
-rpcsvc_auth_array (rpcsvc_t *svc, char *volname, int *autharr, int arrlen);
-char *
-rpcsvc_volume_allowed (dict_t *options, char *volname);
+rpcsvc_set_throttle_on(rpcsvc_t *svc);
+
+int
+rpcsvc_set_throttle_off(rpcsvc_t *svc);
+
+gf_boolean_t
+rpcsvc_get_throttle(rpcsvc_t *svc);
+
+int
+rpcsvc_auth_array(rpcsvc_t *svc, char *volname, int *autharr, int arrlen);
rpcsvc_vector_sizer
-rpcsvc_get_program_vector_sizer (rpcsvc_t *svc, uint32_t prognum,
- uint32_t progver, uint32_t procnum);
+rpcsvc_get_program_vector_sizer(rpcsvc_t *svc, uint32_t prognum,
+ uint32_t progver, int procnum);
+void
+rpcsvc_autoscale_threads(glusterfs_ctx_t *ctx, rpcsvc_t *rpc, int incr);
+extern int
+rpcsvc_destroy(rpcsvc_t *svc);
+void
+rpcsvc_statedump(rpcsvc_t *svc);
#endif
diff --git a/rpc/rpc-lib/src/xdr-common.h b/rpc/rpc-lib/src/xdr-common.h
index 34dc9c6a228..752736b3d4d 100644
--- a/rpc/rpc-lib/src/xdr-common.h
+++ b/rpc/rpc-lib/src/xdr-common.h
@@ -11,14 +11,10 @@
#ifndef _XDR_COMMON_H_
#define _XDR_COMMON_H_
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include <rpc/types.h>
#include <sys/types.h>
#include <rpc/xdr.h>
+#include <rpc/auth.h>
#include <sys/uio.h>
#ifdef __NetBSD__
@@ -26,37 +22,76 @@
#endif /* __NetBSD__ */
enum gf_dump_procnum {
- GF_DUMP_NULL,
- GF_DUMP_DUMP,
- GF_DUMP_MAXVALUE,
+ GF_DUMP_NULL,
+ GF_DUMP_DUMP,
+ GF_DUMP_PING,
+ GF_DUMP_MAXVALUE,
};
#define GLUSTER_DUMP_PROGRAM 123451501 /* Completely random */
#define GLUSTER_DUMP_VERSION 1
-#define GF_MAX_AUTH_BYTES 2048
+/* MAX_AUTH_BYTES is restricted to 400 bytes, see
+ * http://tools.ietf.org/html/rfc5531#section-8.2 */
+#define GF_MAX_AUTH_BYTES MAX_AUTH_BYTES
+
+/* The size of an AUTH_GLUSTERFS_V2 structure:
+ *
+ * 1 | pid
+ * 1 | uid
+ * 1 | gid
+ * 1 | groups_len
+ * XX | groups_val (GF_MAX_AUX_GROUPS=65535)
+ * 1 | lk_owner_len
+ * YY | lk_owner_val (GF_MAX_LOCK_OWNER_LEN=1024)
+ * ----+-------------------------------------------
+ * 5 | total xdr-units
+ *
+ * one XDR-unit is defined as BYTES_PER_XDR_UNIT = 4 bytes
+ * MAX_AUTH_BYTES = 400 is the maximum, this is 100 xdr-units.
+ * XX + YY can be 95 to fill the 100 xdr-units.
+ *
+ * Note that the on-wire protocol has tighter requirements than the internal
+ * structures. It is possible for xlators to use more groups and a bigger
+ * lk_owner than that can be sent by a GlusterFS-client.
+ *
+ * -------
+ * On v3, there are 4 more units, and hence it will be 9 xdr-units
+ */
+#define GF_AUTH_GLUSTERFS_MAX_GROUPS(lk_len, type) \
+ ((type == AUTH_GLUSTERFS_v2) ? (95 - lk_len) : (91 - lk_len))
+#define GF_AUTH_GLUSTERFS_MAX_LKOWNER(groups_len, type) \
+ ((type == AUTH_GLUSTERFS_v2) ? (95 - groups_len) : (91 - groups_len))
+
+#ifdef GF_LINUX_HOST_OS
+#define xdr_u_int32_t xdr_uint32_t
+#define xdr_u_int64_t xdr_uint64_t
+unsigned long
+xdr_sizeof(xdrproc_t func, void *data);
+#endif
-#if GF_DARWIN_HOST_OS
+#ifdef GF_DARWIN_HOST_OS
#define xdr_u_quad_t xdr_u_int64_t
-#define xdr_quad_t xdr_int64_t
+#define xdr_quad_t xdr_int64_t
#define xdr_uint32_t xdr_u_int32_t
+#define xdr_uint64_t xdr_u_int64_t
#define uint64_t u_int64_t
+unsigned long
+xdr_sizeof(xdrproc_t func, void *data);
#endif
#if defined(__NetBSD__)
#define xdr_u_quad_t xdr_u_int64_t
-#define xdr_quad_t xdr_int64_t
+#define xdr_quad_t xdr_int64_t
#define xdr_uint32_t xdr_u_int32_t
#define xdr_uint64_t xdr_u_int64_t
#endif
-
-#if GF_SOLARIS_HOST_OS
+#ifdef GF_SOLARIS_HOST_OS
#define u_quad_t uint64_t
#define quad_t int64_t
#define xdr_u_quad_t xdr_uint64_t
-#define xdr_quad_t xdr_int64_t
-#define xdr_uint32_t xdr_uint32_t
+#define xdr_quad_t xdr_int64_t
#endif
/* Returns the address of the byte that follows the
@@ -64,16 +99,18 @@ enum gf_dump_procnum {
* E.g. once the RPC call for NFS has been decoded, the macro will return
* the address from which the NFS header starts.
*/
-#define xdr_decoded_remaining_addr(xdr) ((&xdr)->x_private)
+#define xdr_decoded_remaining_addr(xdr) ((&xdr)->x_private)
/* Returns the length of the remaining record after the previous decode
* operation completed.
*/
-#define xdr_decoded_remaining_len(xdr) ((&xdr)->x_handy)
+#define xdr_decoded_remaining_len(xdr) ((&xdr)->x_handy)
/* Returns the number of bytes used by the last encode operation. */
-#define xdr_encoded_length(xdr) (((size_t)(&xdr)->x_private) - ((size_t)(&xdr)->x_base))
+#define xdr_encoded_length(xdr) \
+ (((size_t)(&xdr)->x_private) - ((size_t)(&xdr)->x_base))
-#define xdr_decoded_length(xdr) (((size_t)(&xdr)->x_private) - ((size_t)(&xdr)->x_base))
+#define xdr_decoded_length(xdr) \
+ (((size_t)(&xdr)->x_private) - ((size_t)(&xdr)->x_base))
#endif
diff --git a/rpc/rpc-lib/src/xdr-rpc.c b/rpc/rpc-lib/src/xdr-rpc.c
index ef52764c3a8..4992dc5a7ce 100644
--- a/rpc/rpc-lib/src/xdr-rpc.c
+++ b/rpc/rpc-lib/src/xdr-rpc.c
@@ -8,205 +8,191 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include <rpc/rpc.h>
-#include <rpc/pmap_clnt.h>
-#include <arpa/inet.h>
#include <rpc/xdr.h>
#include <sys/uio.h>
#include <rpc/auth_unix.h>
-#include "mem-pool.h"
#include "xdr-rpc.h"
#include "xdr-common.h"
-#include "logging.h"
-#include "common-utils.h"
+#include <glusterfs/common-utils.h>
/* Decodes the XDR format in msgbuf into rpc_msg.
* The remaining payload is returned into payload.
*/
int
-xdr_to_rpc_call (char *msgbuf, size_t len, struct rpc_msg *call,
- struct iovec *payload, char *credbytes, char *verfbytes)
+xdr_to_rpc_call(char *msgbuf, size_t len, struct rpc_msg *call,
+ struct iovec *payload, char *credbytes, char *verfbytes)
{
- XDR xdr;
- char opaquebytes[MAX_AUTH_BYTES];
- struct opaque_auth *oa = NULL;
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO ("rpc", msgbuf, out);
- GF_VALIDATE_OR_GOTO ("rpc", call, out);
-
- memset (call, 0, sizeof (*call));
-
- oa = &call->rm_call.cb_cred;
- if (!credbytes)
- oa->oa_base = opaquebytes;
- else
- oa->oa_base = credbytes;
-
- oa = &call->rm_call.cb_verf;
- if (!verfbytes)
- oa->oa_base = opaquebytes;
- else
- oa->oa_base = verfbytes;
-
- xdrmem_create (&xdr, msgbuf, len, XDR_DECODE);
- if (!xdr_callmsg (&xdr, call)) {
- gf_log ("rpc", GF_LOG_WARNING, "failed to decode call msg");
- goto out;
- }
-
- if (payload) {
- payload->iov_base = xdr_decoded_remaining_addr (xdr);
- payload->iov_len = xdr_decoded_remaining_len (xdr);
- }
-
- ret = 0;
+ XDR xdr;
+ char opaquebytes[GF_MAX_AUTH_BYTES];
+ struct opaque_auth *oa = NULL;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO("rpc", msgbuf, out);
+ GF_VALIDATE_OR_GOTO("rpc", call, out);
+
+ memset(call, 0, sizeof(*call));
+
+ oa = &call->rm_call.cb_cred;
+ if (!credbytes)
+ oa->oa_base = opaquebytes;
+ else
+ oa->oa_base = credbytes;
+
+ oa = &call->rm_call.cb_verf;
+ if (!verfbytes)
+ oa->oa_base = opaquebytes;
+ else
+ oa->oa_base = verfbytes;
+
+ xdrmem_create(&xdr, msgbuf, len, XDR_DECODE);
+ if (!xdr_callmsg(&xdr, call)) {
+ gf_log("rpc", GF_LOG_WARNING, "failed to decode call msg");
+ goto out;
+ }
+
+ if (payload) {
+ payload->iov_base = xdr_decoded_remaining_addr(xdr);
+ payload->iov_len = xdr_decoded_remaining_len(xdr);
+ }
+
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
bool_t
-true_func (XDR *s, caddr_t *a)
+true_func(XDR *s, caddr_t *a)
{
- return TRUE;
+ return TRUE;
}
-
int
-rpc_fill_empty_reply (struct rpc_msg *reply, uint32_t xid)
+rpc_fill_empty_reply(struct rpc_msg *reply, uint32_t xid)
{
- int ret = -1;
+ int ret = -1;
- GF_VALIDATE_OR_GOTO ("rpc", reply, out);
+ GF_VALIDATE_OR_GOTO("rpc", reply, out);
- /* Setting to 0 also results in reply verifier flavor to be
- * set to AUTH_NULL which is what we want right now.
- */
- memset (reply, 0, sizeof (*reply));
- reply->rm_xid = xid;
- reply->rm_direction = REPLY;
+ /* Setting to 0 also results in reply verifier flavor to be
+ * set to AUTH_NULL which is what we want right now.
+ */
+ memset(reply, 0, sizeof(*reply));
+ reply->rm_xid = xid;
+ reply->rm_direction = REPLY;
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
int
-rpc_fill_denied_reply (struct rpc_msg *reply, int rjstat, int auth_err)
+rpc_fill_denied_reply(struct rpc_msg *reply, int rjstat, int auth_err)
{
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO ("rpc", reply, out);
-
- reply->rm_reply.rp_stat = MSG_DENIED;
- reply->rjcted_rply.rj_stat = rjstat;
- if (rjstat == RPC_MISMATCH) {
- /* No problem with hardcoding
- * RPC version numbers. We only support
- * v2 anyway.
- */
- reply->rjcted_rply.rj_vers.low = 2;
- reply->rjcted_rply.rj_vers.high = 2;
- } else if (rjstat == AUTH_ERROR)
- reply->rjcted_rply.rj_why = auth_err;
-
- ret = 0;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO("rpc", reply, out);
+
+ reply->rm_reply.rp_stat = MSG_DENIED;
+ reply->rjcted_rply.rj_stat = rjstat;
+ if (rjstat == RPC_MISMATCH) {
+ /* No problem with hardcoding
+ * RPC version numbers. We only support
+ * v2 anyway.
+ */
+ reply->rjcted_rply.rj_vers.low = 2;
+ reply->rjcted_rply.rj_vers.high = 2;
+ } else if (rjstat == AUTH_ERROR)
+ reply->rjcted_rply.rj_why = auth_err;
+
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
int
-rpc_fill_accepted_reply (struct rpc_msg *reply, int arstat, int proglow,
- int proghigh, int verf, int len, char *vdata)
+rpc_fill_accepted_reply(struct rpc_msg *reply, int arstat, int proglow,
+ int proghigh, int verf, int len, char *vdata)
{
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO ("rpc", reply, out);
-
- reply->rm_reply.rp_stat = MSG_ACCEPTED;
- reply->acpted_rply.ar_stat = arstat;
-
- reply->acpted_rply.ar_verf.oa_flavor = verf;
- reply->acpted_rply.ar_verf.oa_length = len;
- reply->acpted_rply.ar_verf.oa_base = vdata;
- if (arstat == PROG_MISMATCH) {
- reply->acpted_rply.ar_vers.low = proglow;
- reply->acpted_rply.ar_vers.high = proghigh;
- } else if (arstat == SUCCESS) {
-
- /* This is a hack. I'd really like to build a custom
- * XDR library because Sun RPC interface is not very flexible.
- */
- reply->acpted_rply.ar_results.proc = (xdrproc_t)true_func;
- reply->acpted_rply.ar_results.where = NULL;
- }
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO("rpc", reply, out);
+
+ reply->rm_reply.rp_stat = MSG_ACCEPTED;
+ reply->acpted_rply.ar_stat = arstat;
+
+ reply->acpted_rply.ar_verf.oa_flavor = verf;
+ reply->acpted_rply.ar_verf.oa_length = len;
+ reply->acpted_rply.ar_verf.oa_base = vdata;
+ if (arstat == PROG_MISMATCH) {
+ reply->acpted_rply.ar_vers.low = proglow;
+ reply->acpted_rply.ar_vers.high = proghigh;
+ } else if (arstat == SUCCESS) {
+ /* This is a hack. I'd really like to build a custom
+ * XDR library because Sun RPC interface is not very flexible.
+ */
+ reply->acpted_rply.ar_results.proc = (xdrproc_t)true_func;
+ reply->acpted_rply.ar_results.where = NULL;
+ }
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
int
-rpc_reply_to_xdr (struct rpc_msg *reply, char *dest, size_t len,
- struct iovec *dst)
+rpc_reply_to_xdr(struct rpc_msg *reply, char *dest, size_t len,
+ struct iovec *dst)
{
- XDR xdr;
- int ret = -1;
+ XDR xdr;
+ int ret = -1;
- GF_VALIDATE_OR_GOTO ("rpc", reply, out);
- GF_VALIDATE_OR_GOTO ("rpc", dest, out);
- GF_VALIDATE_OR_GOTO ("rpc", dst, out);
+ GF_VALIDATE_OR_GOTO("rpc", reply, out);
+ GF_VALIDATE_OR_GOTO("rpc", dest, out);
+ GF_VALIDATE_OR_GOTO("rpc", dst, out);
- xdrmem_create (&xdr, dest, len, XDR_ENCODE);
- if (!xdr_replymsg(&xdr, reply)) {
- gf_log ("rpc", GF_LOG_WARNING, "failed to encode reply msg");
- goto out;
- }
+ xdrmem_create(&xdr, dest, len, XDR_ENCODE);
+ if (!xdr_replymsg(&xdr, reply)) {
+ gf_log("rpc", GF_LOG_WARNING, "failed to encode reply msg");
+ goto out;
+ }
- dst->iov_base = dest;
- dst->iov_len = xdr_encoded_length (xdr);
+ dst->iov_base = dest;
+ dst->iov_len = xdr_encoded_length(xdr);
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
int
-xdr_to_auth_unix_cred (char *msgbuf, int msglen, struct authunix_parms *au,
- char *machname, gid_t *gids)
+xdr_to_auth_unix_cred(char *msgbuf, int msglen, struct authunix_parms *au,
+ char *machname, gid_t *gids)
{
- XDR xdr;
- int ret = -1;
+ XDR xdr;
+ int ret = -1;
- GF_VALIDATE_OR_GOTO ("rpc", msgbuf, out);
- GF_VALIDATE_OR_GOTO ("rpc", machname, out);
- GF_VALIDATE_OR_GOTO ("rpc", gids, out);
- GF_VALIDATE_OR_GOTO ("rpc", au, out);
+ GF_VALIDATE_OR_GOTO("rpc", msgbuf, out);
+ GF_VALIDATE_OR_GOTO("rpc", machname, out);
+ GF_VALIDATE_OR_GOTO("rpc", gids, out);
+ GF_VALIDATE_OR_GOTO("rpc", au, out);
- au->aup_machname = machname;
-#ifdef GF_DARWIN_HOST_OS
- au->aup_gids = (int *)gids;
+ au->aup_machname = machname;
+#if defined(GF_DARWIN_HOST_OS) || defined(__FreeBSD__)
+ au->aup_gids = (int *)gids;
#else
- au->aup_gids = gids;
+ au->aup_gids = gids;
#endif
- xdrmem_create (&xdr, msgbuf, msglen, XDR_DECODE);
+ xdrmem_create(&xdr, msgbuf, msglen, XDR_DECODE);
- if (!xdr_authunix_parms (&xdr, au)) {
- gf_log ("rpc", GF_LOG_WARNING, "failed to decode auth unix parms");
- goto out;
- }
+ if (!xdr_authunix_parms(&xdr, au)) {
+ gf_log("rpc", GF_LOG_WARNING, "failed to decode auth unix parms");
+ goto out;
+ }
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
diff --git a/rpc/rpc-lib/src/xdr-rpc.h b/rpc/rpc-lib/src/xdr-rpc.h
index f5f4a941e92..7baed273846 100644
--- a/rpc/rpc-lib/src/xdr-rpc.h
+++ b/rpc/rpc-lib/src/xdr-rpc.h
@@ -11,11 +11,6 @@
#ifndef _XDR_RPC_H_
#define _XDR_RPC_H_
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#ifndef GF_SOLARIS_HOST_OS
#include <rpc/rpc.h>
#endif
@@ -25,7 +20,6 @@
#include <rpc/auth_sys.h>
#endif
-//#include <rpc/pmap_clnt.h>
#include <arpa/inet.h>
#include <rpc/xdr.h>
#include <sys/uio.h>
@@ -33,49 +27,69 @@
#include "xdr-common.h"
typedef enum {
- AUTH_GLUSTERFS = 5,
- AUTH_GLUSTERFS_v2 = 390039, /* using a number from 'unused' range,
- from the list available in RFC5531 */
+ AUTH_GLUSTERFS = 5,
+ AUTH_GLUSTERFS_v2 = 390039, /* using a number from 'unused' range,
+ from the list available in RFC5531 */
+ AUTH_GLUSTERFS_v3 = 390040, /* this too is unused */
} gf_rpc_authtype_t;
/* Converts a given network buffer from its XDR format to a structure
* that contains everything an RPC call needs to work.
*/
extern int
-xdr_to_rpc_call (char *msgbuf, size_t len, struct rpc_msg *call,
- struct iovec *payload, char *credbytes, char *verfbytes);
+xdr_to_rpc_call(char *msgbuf, size_t len, struct rpc_msg *call,
+ struct iovec *payload, char *credbytes, char *verfbytes);
extern int
-rpc_fill_empty_reply (struct rpc_msg *reply, uint32_t xid);
+rpc_fill_empty_reply(struct rpc_msg *reply, uint32_t xid);
extern int
-rpc_fill_denied_reply (struct rpc_msg *reply, int rjstat, int auth_err);
+rpc_fill_denied_reply(struct rpc_msg *reply, int rjstat, int auth_err);
extern int
-rpc_fill_accepted_reply (struct rpc_msg *reply, int arstat, int proglow,
- int proghigh, int verf, int len, char *vdata);
+rpc_fill_accepted_reply(struct rpc_msg *reply, int arstat, int proglow,
+ int proghigh, int verf, int len, char *vdata);
extern int
-rpc_reply_to_xdr (struct rpc_msg *reply, char *dest, size_t len,
- struct iovec *dst);
+rpc_reply_to_xdr(struct rpc_msg *reply, char *dest, size_t len,
+ struct iovec *dst);
extern int
-xdr_to_auth_unix_cred (char *msgbuf, int msglen, struct authunix_parms *au,
- char *machname, gid_t *gids);
+xdr_to_auth_unix_cred(char *msgbuf, int msglen, struct authunix_parms *au,
+ char *machname, gid_t *gids);
/* Macros that simplify accessing the members of an RPC call structure. */
-#define rpc_call_xid(call) ((call)->rm_xid)
-#define rpc_call_direction(call) ((call)->rm_direction)
-#define rpc_call_rpcvers(call) ((call)->ru.RM_cmb.cb_rpcvers)
-#define rpc_call_program(call) ((call)->ru.RM_cmb.cb_prog)
-#define rpc_call_progver(call) ((call)->ru.RM_cmb.cb_vers)
-#define rpc_call_progproc(call) ((call)->ru.RM_cmb.cb_proc)
-#define rpc_opaque_auth_flavour(oa) ((oa)->oa_flavor)
-#define rpc_opaque_auth_len(oa) ((oa)->oa_length)
-
-#define rpc_call_cred_flavour(call) (rpc_opaque_auth_flavour ((&(call)->ru.RM_cmb.cb_cred)))
-#define rpc_call_cred_len(call) (rpc_opaque_auth_len ((&(call)->ru.RM_cmb.cb_cred)))
-
-
-#define rpc_call_verf_flavour(call) (rpc_opaque_auth_flavour ((&(call)->ru.RM_cmb.cb_verf)))
-#define rpc_call_verf_len(call) (rpc_opaque_auth_len ((&(call)->ru.RM_cmb.cb_verf)))
+#define rpc_call_xid(call) ((call)->rm_xid)
+#define rpc_call_direction(call) ((call)->rm_direction)
+#define rpc_call_rpcvers(call) ((call)->ru.RM_cmb.cb_rpcvers)
+#define rpc_call_program(call) ((call)->ru.RM_cmb.cb_prog)
+#define rpc_call_progver(call) ((call)->ru.RM_cmb.cb_vers)
+#define rpc_call_progproc(call) ((call)->ru.RM_cmb.cb_proc)
+#define rpc_opaque_auth_flavour(oa) ((oa)->oa_flavor)
+#define rpc_opaque_auth_len(oa) ((oa)->oa_length)
+
+#define rpc_call_cred_flavour(call) \
+ (rpc_opaque_auth_flavour((&(call)->ru.RM_cmb.cb_cred)))
+#define rpc_call_cred_len(call) \
+ (rpc_opaque_auth_len((&(call)->ru.RM_cmb.cb_cred)))
+
+#define rpc_call_verf_flavour(call) \
+ (rpc_opaque_auth_flavour((&(call)->ru.RM_cmb.cb_verf)))
+#define rpc_call_verf_len(call) \
+ (rpc_opaque_auth_len((&(call)->ru.RM_cmb.cb_verf)))
+
+#if defined(GF_DARWIN_HOST_OS) || !defined(HAVE_RPC_RPC_H)
+#define GF_PRI_RPC_XID PRIu32
+#define GF_PRI_RPC_VERSION PRIu32
+#define GF_PRI_RPC_PROG_ID PRIu32
+#define GF_PRI_RPC_PROG_VERS PRIu32
+#define GF_PRI_RPC_PROC PRIu32
+#define GF_PRI_RPC_PROC_VERSION PRIu32
+#else
+#define GF_PRI_RPC_XID PRIu64
+#define GF_PRI_RPC_VERSION PRIu64
+#define GF_PRI_RPC_PROG_ID PRIu64
+#define GF_PRI_RPC_PROG_VERS PRIu64
+#define GF_PRI_RPC_PROC PRIu64
+#define GF_PRI_RPC_PROC_VERSION PRIu64
+#endif
#endif
diff --git a/rpc/rpc-lib/src/xdr-rpcclnt.c b/rpc/rpc-lib/src/xdr-rpcclnt.c
index 810d1961b9d..8dcdcfeda83 100644
--- a/rpc/rpc-lib/src/xdr-rpcclnt.c
+++ b/rpc/rpc-lib/src/xdr-rpcclnt.c
@@ -8,110 +8,98 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include <rpc/rpc.h>
-#include <rpc/pmap_clnt.h>
-#include <arpa/inet.h>
#include <rpc/xdr.h>
#include <sys/uio.h>
#include <rpc/auth_unix.h>
#include <errno.h>
-#include "mem-pool.h"
#include "xdr-rpc.h"
#include "xdr-common.h"
-#include "logging.h"
-#include "common-utils.h"
+#include <glusterfs/common-utils.h>
/* Decodes the XDR format in msgbuf into rpc_msg.
* The remaining payload is returned into payload.
*/
int
-xdr_to_rpc_reply (char *msgbuf, size_t len, struct rpc_msg *reply,
- struct iovec *payload, char *verfbytes)
+xdr_to_rpc_reply(char *msgbuf, size_t len, struct rpc_msg *reply,
+ struct iovec *payload, char *verfbytes)
{
- XDR xdr;
- int ret = -EINVAL;
-
- GF_VALIDATE_OR_GOTO ("rpc", msgbuf, out);
- GF_VALIDATE_OR_GOTO ("rpc", reply, out);
-
- memset (reply, 0, sizeof (struct rpc_msg));
-
- reply->acpted_rply.ar_verf = _null_auth;
- reply->acpted_rply.ar_results.where = NULL;
- reply->acpted_rply.ar_results.proc = (xdrproc_t)(xdr_void);
-
- xdrmem_create (&xdr, msgbuf, len, XDR_DECODE);
- if (!xdr_replymsg (&xdr, reply)) {
- gf_log ("rpc", GF_LOG_WARNING, "failed to decode reply msg");
- ret = -errno;
- goto out;
- }
- if (payload) {
- payload->iov_base = xdr_decoded_remaining_addr (xdr);
- payload->iov_len = xdr_decoded_remaining_len (xdr);
- }
-
- ret = 0;
+ XDR xdr;
+ int ret = -EINVAL;
+
+ GF_VALIDATE_OR_GOTO("rpc", msgbuf, out);
+ GF_VALIDATE_OR_GOTO("rpc", reply, out);
+
+ memset(reply, 0, sizeof(struct rpc_msg));
+
+ reply->acpted_rply.ar_verf = _null_auth;
+ reply->acpted_rply.ar_results.where = NULL;
+ reply->acpted_rply.ar_results.proc = (xdrproc_t)(xdr_void);
+
+ xdrmem_create(&xdr, msgbuf, len, XDR_DECODE);
+ if (!xdr_replymsg(&xdr, reply)) {
+ gf_log("rpc", GF_LOG_WARNING, "failed to decode reply msg");
+ goto out;
+ }
+ if (payload) {
+ payload->iov_base = xdr_decoded_remaining_addr(xdr);
+ payload->iov_len = xdr_decoded_remaining_len(xdr);
+ }
+
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
int
-rpc_request_to_xdr (struct rpc_msg *request, char *dest, size_t len,
- struct iovec *dst)
+rpc_request_to_xdr(struct rpc_msg *request, char *dest, size_t len,
+ struct iovec *dst)
{
- XDR xdr;
- int ret = -1;
+ XDR xdr;
+ int ret = -1;
- GF_VALIDATE_OR_GOTO ("rpc", dest, out);
- GF_VALIDATE_OR_GOTO ("rpc", request, out);
- GF_VALIDATE_OR_GOTO ("rpc", dst, out);
+ GF_VALIDATE_OR_GOTO("rpc", dest, out);
+ GF_VALIDATE_OR_GOTO("rpc", request, out);
+ GF_VALIDATE_OR_GOTO("rpc", dst, out);
- xdrmem_create (&xdr, dest, len, XDR_ENCODE);
- if (!xdr_callmsg (&xdr, request)) {
- gf_log ("rpc", GF_LOG_WARNING, "failed to encode call msg");
- goto out;
- }
+ xdrmem_create(&xdr, dest, len, XDR_ENCODE);
+ if (!xdr_callmsg(&xdr, request)) {
+ gf_log("rpc", GF_LOG_WARNING, "failed to encode call msg");
+ goto out;
+ }
- dst->iov_base = dest;
- dst->iov_len = xdr_encoded_length (xdr);
+ dst->iov_base = dest;
+ dst->iov_len = xdr_encoded_length(xdr);
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
int
-auth_unix_cred_to_xdr (struct authunix_parms *au, char *dest, size_t len,
- struct iovec *iov)
+auth_unix_cred_to_xdr(struct authunix_parms *au, char *dest, size_t len,
+ struct iovec *iov)
{
- XDR xdr;
- int ret = -1;
+ XDR xdr;
+ int ret = -1;
- GF_VALIDATE_OR_GOTO ("rpc", au, out);
- GF_VALIDATE_OR_GOTO ("rpc", dest, out);
- GF_VALIDATE_OR_GOTO ("rpc", iov, out);
+ GF_VALIDATE_OR_GOTO("rpc", au, out);
+ GF_VALIDATE_OR_GOTO("rpc", dest, out);
+ GF_VALIDATE_OR_GOTO("rpc", iov, out);
- xdrmem_create (&xdr, dest, len, XDR_DECODE);
+ xdrmem_create(&xdr, dest, len, XDR_DECODE);
- if (!xdr_authunix_parms (&xdr, au)) {
- gf_log ("rpc", GF_LOG_WARNING, "failed to decode authunix parms");
- goto out;
- }
+ if (!xdr_authunix_parms(&xdr, au)) {
+ gf_log("rpc", GF_LOG_WARNING, "failed to decode authunix parms");
+ goto out;
+ }
- iov->iov_base = dest;
- iov->iov_len = xdr_encoded_length (xdr);
+ iov->iov_base = dest;
+ iov->iov_len = xdr_encoded_length(xdr);
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
diff --git a/rpc/rpc-lib/src/xdr-rpcclnt.h b/rpc/rpc-lib/src/xdr-rpcclnt.h
index c08d872f8c2..58eda4892a9 100644
--- a/rpc/rpc-lib/src/xdr-rpcclnt.h
+++ b/rpc/rpc-lib/src/xdr-rpcclnt.h
@@ -11,13 +11,6 @@
#ifndef _XDR_RPCCLNT_H
#define _XDR_RPCCLNT_H
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-//#include <rpc/rpc.h>
-//#include <rpc/pmap_clnt.h>
#include <arpa/inet.h>
#include <rpc/xdr.h>
#include <sys/uio.h>
@@ -25,18 +18,19 @@
#include <rpc/auth_unix.h>
/* Macros that simplify accessing the members of an RPC call structure. */
-#define rpc_reply_xid(reply) ((reply)->rm_xid)
-#define rpc_reply_status(reply) ((reply)->ru.RM_rmb.rp_stat)
-#define rpc_accepted_reply_status(reply) ((reply)->acpted_rply.ar_stat)
-#define rpc_reply_verf_flavour(reply) ((reply)->acpted_rply.ar_verf.oa_flavor)
+#define rpc_reply_xid(reply) ((reply)->rm_xid)
+#define rpc_reply_status(reply) ((reply)->ru.RM_rmb.rp_stat)
+#define rpc_accepted_reply_status(reply) ((reply)->acpted_rply.ar_stat)
+#define rpc_reply_verf_flavour(reply) ((reply)->acpted_rply.ar_verf.oa_flavor)
-int xdr_to_rpc_reply (char *msgbuf, size_t len, struct rpc_msg *reply,
- struct iovec *payload, char *verfbytes);
int
-rpc_request_to_xdr (struct rpc_msg *request, char *dest, size_t len,
- struct iovec *dst);
+xdr_to_rpc_reply(char *msgbuf, size_t len, struct rpc_msg *reply,
+ struct iovec *payload, char *verfbytes);
+int
+rpc_request_to_xdr(struct rpc_msg *request, char *dest, size_t len,
+ struct iovec *dst);
int
-auth_unix_cred_to_xdr (struct authunix_parms *au, char *dest, size_t len,
- struct iovec *iov);
+auth_unix_cred_to_xdr(struct authunix_parms *au, char *dest, size_t len,
+ struct iovec *iov);
#endif
diff --git a/rpc/rpc-transport/Makefile.am b/rpc/rpc-transport/Makefile.am
index 221fd640514..7dd9f026cfc 100644
--- a/rpc/rpc-transport/Makefile.am
+++ b/rpc/rpc-transport/Makefile.am
@@ -1 +1 @@
-SUBDIRS = socket $(RDMA_SUBDIR)
+SUBDIRS = socket
diff --git a/rpc/rpc-transport/rdma/src/Makefile.am b/rpc/rpc-transport/rdma/src/Makefile.am
deleted file mode 100644
index b4b940bca25..00000000000
--- a/rpc/rpc-transport/rdma/src/Makefile.am
+++ /dev/null
@@ -1,20 +0,0 @@
-# TODO : need to change transportdir
-
-transport_LTLIBRARIES = rdma.la
-transportdir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/rpc-transport
-
-rdma_la_LDFLAGS = -module -avoidversion
-
-rdma_la_SOURCES = rdma.c name.c
-rdma_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
- -libverbs
-
-noinst_HEADERS = rdma.h name.h
- -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/rpc/rpc-lib/src/ \
- -I$(top_srcdir)/xlators/protocol/lib/src/ -shared -nostartfiles $(GF_CFLAGS)
-
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/rpc/rpc-lib/src/ \
- -I$(top_srcdir)/rpc/xdr/src -shared -nostartfiles $(GF_CFLAGS)
-
-CLEANFILES = *~
diff --git a/rpc/rpc-transport/rdma/src/name.c b/rpc/rpc-transport/rdma/src/name.c
deleted file mode 100644
index e5f7ba2cae5..00000000000
--- a/rpc/rpc-transport/rdma/src/name.c
+++ /dev/null
@@ -1,698 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <errno.h>
-#include <netdb.h>
-#include <string.h>
-
-#ifdef CLIENT_PORT_CEILING
-#undef CLIENT_PORT_CEILING
-#endif
-
-#define CLIENT_PORT_CEILING 1024
-
-#ifndef AF_INET_SDP
-#define AF_INET_SDP 27
-#endif
-
-#include "rpc-transport.h"
-#include "rdma.h"
-
-int32_t
-gf_resolve_ip6 (const char *hostname,
- uint16_t port,
- int family,
- void **dnscache,
- struct addrinfo **addr_info);
-
-static int32_t
-af_inet_bind_to_port_lt_ceiling (int fd, struct sockaddr *sockaddr,
- socklen_t sockaddr_len, int ceiling)
-{
- int32_t ret = -1;
- /* struct sockaddr_in sin = {0, }; */
- uint16_t port = ceiling - 1;
-
- while (port)
- {
- switch (sockaddr->sa_family)
- {
- case AF_INET6:
- ((struct sockaddr_in6 *)sockaddr)->sin6_port = htons (port);
- break;
-
- case AF_INET_SDP:
- case AF_INET:
- ((struct sockaddr_in *)sockaddr)->sin_port = htons (port);
- break;
- }
-
- ret = bind (fd, sockaddr, sockaddr_len);
-
- if (ret == 0)
- break;
-
- if (ret == -1 && errno == EACCES)
- break;
-
- port--;
- }
-
- return ret;
-}
-
-static int32_t
-af_unix_client_bind (rpc_transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t sockaddr_len,
- int sock)
-{
- data_t *path_data = NULL;
- struct sockaddr_un *addr = NULL;
- int32_t ret = -1;
-
- path_data = dict_get (this->options,
- "transport.rdma.bind-path");
- if (path_data) {
- char *path = data_to_str (path_data);
- if (!path || strlen (path) > UNIX_PATH_MAX) {
- gf_log (this->name, GF_LOG_DEBUG,
- "transport.rdma.bind-path not specified "
- "for unix socket, letting connect to assign "
- "default value");
- goto err;
- }
-
- addr = (struct sockaddr_un *) sockaddr;
- strcpy (addr->sun_path, path);
- ret = bind (sock, (struct sockaddr *)addr, sockaddr_len);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "cannot bind to unix-domain socket %d (%s)",
- sock, strerror (errno));
- goto err;
- }
- }
-
-err:
- return ret;
-}
-
-static int32_t
-client_fill_address_family (rpc_transport_t *this, struct sockaddr *sockaddr)
-{
- data_t *address_family_data = NULL;
-
- address_family_data = dict_get (this->options,
- "transport.address-family");
- if (!address_family_data) {
- data_t *remote_host_data = NULL, *connect_path_data = NULL;
- remote_host_data = dict_get (this->options, "remote-host");
- connect_path_data = dict_get (this->options,
- "transport.rdma.connect-path");
-
- if (!(remote_host_data || connect_path_data) ||
- (remote_host_data && connect_path_data)) {
- gf_log (this->name, GF_LOG_ERROR,
- "address-family not specified and not able to "
- "determine the same from other options "
- "(remote-host:%s and connect-path:%s)",
- data_to_str (remote_host_data),
- data_to_str (connect_path_data));
- return -1;
- }
-
- if (remote_host_data) {
- gf_log (this->name, GF_LOG_DEBUG,
- "address-family not specified, guessing it "
- "to be inet/inet6");
- sockaddr->sa_family = AF_UNSPEC;
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "address-family not specified, guessing it "
- "to be unix");
- sockaddr->sa_family = AF_UNIX;
- }
-
- } else {
- char *address_family = data_to_str (address_family_data);
- if (!strcasecmp (address_family, "unix")) {
- sockaddr->sa_family = AF_UNIX;
- } else if (!strcasecmp (address_family, "inet")) {
- sockaddr->sa_family = AF_INET;
- } else if (!strcasecmp (address_family, "inet6")) {
- sockaddr->sa_family = AF_INET6;
- } else if (!strcasecmp (address_family, "inet-sdp")) {
- sockaddr->sa_family = AF_INET_SDP;
- } else if (!strcasecmp (address_family, "inet/inet6")
- || !strcasecmp (address_family, "inet6/inet")) {
- sockaddr->sa_family = AF_UNSPEC;
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "unknown address-family (%s) specified",
- address_family);
- return -1;
- }
- }
-
- return 0;
-}
-
-static int32_t
-af_inet_client_get_remote_sockaddr (rpc_transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t *sockaddr_len,
- int16_t remote_port)
-{
- dict_t *options = this->options;
- data_t *remote_host_data = NULL;
- data_t *remote_port_data = NULL;
- char *remote_host = NULL;
- struct addrinfo *addr_info = NULL;
- int32_t ret = 0;
-
- remote_host_data = dict_get (options, "remote-host");
- if (remote_host_data == NULL)
- {
- gf_log (this->name, GF_LOG_ERROR,
- "option remote-host missing in volume %s",
- this->name);
- ret = -1;
- goto err;
- }
-
- remote_host = data_to_str (remote_host_data);
- if (remote_host == NULL)
- {
- gf_log (this->name, GF_LOG_ERROR,
- "option remote-host has data NULL in volume %s",
- this->name);
- ret = -1;
- goto err;
- }
-
- if (remote_port == 0) {
- remote_port_data = dict_get (options, "remote-port");
- if (remote_port_data == NULL)
- {
- gf_log (this->name, GF_LOG_DEBUG,
- "option remote-port missing in volume %s. "
- "Defaulting to %d",
- this->name, GF_DEFAULT_RDMA_LISTEN_PORT);
-
- remote_port = GF_DEFAULT_RDMA_LISTEN_PORT;
- }
- else
- {
- remote_port = data_to_uint16 (remote_port_data);
- }
- }
-
- if (remote_port == -1)
- {
- gf_log (this->name, GF_LOG_ERROR,
- "option remote-port has invalid port in volume %s",
- this->name);
- ret = -1;
- goto err;
- }
-
- /* TODO: gf_resolve is a blocking call. kick in some
- non blocking dns techniques */
- ret = gf_resolve_ip6 (remote_host, remote_port,
- sockaddr->sa_family,
- &this->dnscache, &addr_info);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "DNS resolution failed on host %s", remote_host);
- goto err;
- }
-
- memcpy (sockaddr, addr_info->ai_addr, addr_info->ai_addrlen);
- *sockaddr_len = addr_info->ai_addrlen;
-
-err:
- return ret;
-}
-
-static int32_t
-af_unix_client_get_remote_sockaddr (rpc_transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t *sockaddr_len)
-{
- struct sockaddr_un *sockaddr_un = NULL;
- char *connect_path = NULL;
- data_t *connect_path_data = NULL;
- int32_t ret = 0;
-
- connect_path_data = dict_get (this->options,
- "transport.rdma.connect-path");
- if (!connect_path_data) {
- gf_log (this->name, GF_LOG_ERROR,
- "option transport.rdma.connect-path not "
- "specified for address-family unix");
- ret = -1;
- goto err;
- }
-
- connect_path = data_to_str (connect_path_data);
- if (!connect_path) {
- gf_log (this->name, GF_LOG_ERROR,
- "connect-path is null-string");
- ret = -1;
- goto err;
- }
-
- if (strlen (connect_path) > UNIX_PATH_MAX) {
- gf_log (this->name, GF_LOG_ERROR,
- "connect-path value length %"GF_PRI_SIZET" > "
- "%d octets", strlen (connect_path), UNIX_PATH_MAX);
- ret = -1;
- goto err;
- }
-
- gf_log (this->name,
- GF_LOG_DEBUG,
- "using connect-path %s", connect_path);
- sockaddr_un = (struct sockaddr_un *)sockaddr;
- strcpy (sockaddr_un->sun_path, connect_path);
- *sockaddr_len = sizeof (struct sockaddr_un);
-
-err:
- return ret;
-}
-
-static int32_t
-af_unix_server_get_local_sockaddr (rpc_transport_t *this,
- struct sockaddr *addr,
- socklen_t *addr_len)
-{
- data_t *listen_path_data = NULL;
- char *listen_path = NULL;
- int32_t ret = 0;
- struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
-
-
- listen_path_data = dict_get (this->options,
- "transport.rdma.listen-path");
- if (!listen_path_data) {
- gf_log (this->name, GF_LOG_ERROR,
- "missing option listen-path");
- ret = -1;
- goto err;
- }
-
- listen_path = data_to_str (listen_path_data);
-
-#ifndef UNIX_PATH_MAX
-#define UNIX_PATH_MAX 108
-#endif
-
- if (strlen (listen_path) > UNIX_PATH_MAX) {
- gf_log (this->name, GF_LOG_ERROR,
- "option listen-path has value length %"GF_PRI_SIZET" > %d",
- strlen (listen_path), UNIX_PATH_MAX);
- ret = -1;
- goto err;
- }
-
- sunaddr->sun_family = AF_UNIX;
- strcpy (sunaddr->sun_path, listen_path);
- *addr_len = sizeof (struct sockaddr_un);
-
-err:
- return ret;
-}
-
-static int32_t
-af_inet_server_get_local_sockaddr (rpc_transport_t *this,
- struct sockaddr *addr,
- socklen_t *addr_len)
-{
- struct addrinfo hints, *res = 0;
- data_t *listen_port_data = NULL, *listen_host_data = NULL;
- uint16_t listen_port = -1;
- char service[NI_MAXSERV], *listen_host = NULL;
- dict_t *options = NULL;
- int32_t ret = 0;
-
- options = this->options;
-
- listen_port_data = dict_get (options, "transport.rdma.listen-port");
- listen_host_data = dict_get (options,
- "transport.rdma.bind-address");
-
- if (listen_port_data) {
- listen_port = data_to_uint16 (listen_port_data);
- } else {
- if (addr->sa_family == AF_INET6) {
- struct sockaddr_in6 *in = (struct sockaddr_in6 *) addr;
- in->sin6_addr = in6addr_any;
- in->sin6_port = htons(listen_port);
- *addr_len = sizeof(struct sockaddr_in6);
- goto out;
- } else if (addr->sa_family == AF_INET) {
- struct sockaddr_in *in = (struct sockaddr_in *) addr;
- in->sin_addr.s_addr = htonl(INADDR_ANY);
- in->sin_port = htons(listen_port);
- *addr_len = sizeof(struct sockaddr_in);
- goto out;
- }
- }
-
- if (listen_port == (uint16_t) -1)
- listen_port = GF_DEFAULT_RDMA_LISTEN_PORT;
-
-
- if (listen_host_data) {
- listen_host = data_to_str (listen_host_data);
- }
-
- memset (service, 0, sizeof (service));
- sprintf (service, "%d", listen_port);
-
- memset (&hints, 0, sizeof (hints));
- hints.ai_family = addr->sa_family;
- hints.ai_socktype = SOCK_STREAM;
- hints.ai_flags = AI_ADDRCONFIG | AI_PASSIVE;
-
- ret = getaddrinfo(listen_host, service, &hints, &res);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "getaddrinfo failed for host %s, service %s (%s)",
- listen_host, service, gai_strerror (ret));
- ret = -1;
- goto out;
- }
-
- memcpy (addr, res->ai_addr, res->ai_addrlen);
- *addr_len = res->ai_addrlen;
-
- freeaddrinfo (res);
-
-out:
- return ret;
-}
-
-int32_t
-gf_rdma_client_bind (rpc_transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t *sockaddr_len,
- int sock)
-{
- int ret = 0;
-
- *sockaddr_len = sizeof (struct sockaddr_in6);
- switch (sockaddr->sa_family)
- {
- case AF_INET_SDP:
- case AF_INET:
- *sockaddr_len = sizeof (struct sockaddr_in);
-
- case AF_INET6:
- ret = af_inet_bind_to_port_lt_ceiling (sock, sockaddr,
- *sockaddr_len,
- CLIENT_PORT_CEILING);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "cannot bind inet socket (%d) to port "
- "less than %d (%s)",
- sock, CLIENT_PORT_CEILING, strerror (errno));
- ret = 0;
- }
- break;
-
- case AF_UNIX:
- *sockaddr_len = sizeof (struct sockaddr_un);
- ret = af_unix_client_bind (this, (struct sockaddr *)sockaddr,
- *sockaddr_len, sock);
- break;
-
- default:
- gf_log (this->name, GF_LOG_ERROR,
- "unknown address family %d", sockaddr->sa_family);
- ret = -1;
- break;
- }
-
- return ret;
-}
-
-int32_t
-gf_rdma_client_get_remote_sockaddr (rpc_transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t *sockaddr_len,
- int16_t remote_port)
-{
- int32_t ret = 0;
- char is_inet_sdp = 0;
-
- ret = client_fill_address_family (this, sockaddr);
- if (ret) {
- ret = -1;
- goto err;
- }
-
- switch (sockaddr->sa_family)
- {
- case AF_INET_SDP:
- sockaddr->sa_family = AF_INET;
- is_inet_sdp = 1;
-
- case AF_INET:
- case AF_INET6:
- case AF_UNSPEC:
- ret = af_inet_client_get_remote_sockaddr (this,
- sockaddr,
- sockaddr_len,
- remote_port);
-
- if (is_inet_sdp) {
- sockaddr->sa_family = AF_INET_SDP;
- }
-
- break;
-
- case AF_UNIX:
- ret = af_unix_client_get_remote_sockaddr (this,
- sockaddr,
- sockaddr_len);
- break;
-
- default:
- gf_log (this->name, GF_LOG_ERROR,
- "unknown address-family %d", sockaddr->sa_family);
- ret = -1;
- }
-
-err:
- return ret;
-}
-
-int32_t
-gf_rdma_server_get_local_sockaddr (rpc_transport_t *this,
- struct sockaddr *addr,
- socklen_t *addr_len)
-{
- data_t *address_family_data = NULL;
- int32_t ret = 0;
- char is_inet_sdp = 0;
-
- address_family_data = dict_get (this->options,
- "transport.address-family");
- if (address_family_data) {
- char *address_family = NULL;
- address_family = data_to_str (address_family_data);
-
- if (!strcasecmp (address_family, "inet")) {
- addr->sa_family = AF_INET;
- } else if (!strcasecmp (address_family, "inet6")) {
- addr->sa_family = AF_INET6;
- } else if (!strcasecmp (address_family, "inet-sdp")) {
- addr->sa_family = AF_INET_SDP;
- } else if (!strcasecmp (address_family, "unix")) {
- addr->sa_family = AF_UNIX;
- } else if (!strcasecmp (address_family, "inet/inet6")
- || !strcasecmp (address_family, "inet6/inet")) {
- addr->sa_family = AF_UNSPEC;
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "unknown address family (%s) specified",
- address_family);
- ret = -1;
- goto err;
- }
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "option address-family not specified, defaulting "
- "to inet/inet6");
- addr->sa_family = AF_UNSPEC;
- }
-
- switch (addr->sa_family)
- {
- case AF_INET_SDP:
- is_inet_sdp = 1;
- addr->sa_family = AF_INET;
-
- case AF_INET:
- case AF_INET6:
- case AF_UNSPEC:
- ret = af_inet_server_get_local_sockaddr (this, addr, addr_len);
- if (is_inet_sdp && !ret) {
- addr->sa_family = AF_INET_SDP;
- }
- break;
-
- case AF_UNIX:
- ret = af_unix_server_get_local_sockaddr (this, addr, addr_len);
- break;
- }
-
-err:
- return ret;
-}
-
-int32_t
-fill_inet6_inet_identifiers (rpc_transport_t *this, struct sockaddr_storage *addr,
- int32_t addr_len, char *identifier)
-{
- int32_t ret = 0, tmpaddr_len = 0;
- char service[NI_MAXSERV], host[NI_MAXHOST];
- union gf_sock_union sock_union;
-
- memset (&sock_union, 0, sizeof (sock_union));
- sock_union.storage = *addr;
- tmpaddr_len = addr_len;
-
- if (sock_union.sa.sa_family == AF_INET6) {
- int32_t one_to_four, four_to_eight, twelve_to_sixteen;
- int16_t eight_to_ten, ten_to_twelve;
-
- one_to_four = four_to_eight = twelve_to_sixteen = 0;
- eight_to_ten = ten_to_twelve = 0;
-
- one_to_four = sock_union.sin6.sin6_addr.s6_addr32[0];
- four_to_eight = sock_union.sin6.sin6_addr.s6_addr32[1];
-#ifdef GF_SOLARIS_HOST_OS
- eight_to_ten = S6_ADDR16(sock_union.sin6.sin6_addr)[4];
-#else
- eight_to_ten = sock_union.sin6.sin6_addr.s6_addr16[4];
-#endif
-
-#ifdef GF_SOLARIS_HOST_OS
- ten_to_twelve = S6_ADDR16(sock_union.sin6.sin6_addr)[5];
-#else
- ten_to_twelve = sock_union.sin6.sin6_addr.s6_addr16[5];
-#endif
- twelve_to_sixteen = sock_union.sin6.sin6_addr.s6_addr32[3];
-
- /* ipv4 mapped ipv6 address has
- bits 0-80: 0
- bits 80-96: 0xffff
- bits 96-128: ipv4 address
- */
-
- if (one_to_four == 0 &&
- four_to_eight == 0 &&
- eight_to_ten == 0 &&
- ten_to_twelve == -1) {
- struct sockaddr_in *in_ptr = &sock_union.sin;
- memset (&sock_union, 0, sizeof (sock_union));
-
- in_ptr->sin_family = AF_INET;
- in_ptr->sin_port = ((struct sockaddr_in6 *)addr)->sin6_port;
- in_ptr->sin_addr.s_addr = twelve_to_sixteen;
- tmpaddr_len = sizeof (*in_ptr);
- }
- }
-
- ret = getnameinfo (&sock_union.sa,
- tmpaddr_len,
- host, sizeof (host),
- service, sizeof (service),
- NI_NUMERICHOST | NI_NUMERICSERV);
- if (ret != 0) {
- gf_log (this->name,
- GF_LOG_ERROR,
- "getnameinfo failed (%s)", gai_strerror (ret));
- }
-
- sprintf (identifier, "%s:%s", host, service);
-
- return ret;
-}
-
-int32_t
-gf_rdma_get_transport_identifiers (rpc_transport_t *this)
-{
- int32_t ret = 0;
- char is_inet_sdp = 0;
-
- switch (((struct sockaddr *) &this->myinfo.sockaddr)->sa_family)
- {
- case AF_INET_SDP:
- is_inet_sdp = 1;
- ((struct sockaddr *) &this->peerinfo.sockaddr)->sa_family = ((struct sockaddr *) &this->myinfo.sockaddr)->sa_family = AF_INET;
-
- case AF_INET:
- case AF_INET6:
- {
- ret = fill_inet6_inet_identifiers (this,
- &this->myinfo.sockaddr,
- this->myinfo.sockaddr_len,
- this->myinfo.identifier);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "can't fill inet/inet6 identifier for server");
- goto err;
- }
-
- ret = fill_inet6_inet_identifiers (this,
- &this->peerinfo.sockaddr,
- this->peerinfo.sockaddr_len,
- this->peerinfo.identifier);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "can't fill inet/inet6 identifier for client");
- goto err;
- }
-
- if (is_inet_sdp) {
- ((struct sockaddr *) &this->peerinfo.sockaddr)->sa_family = ((struct sockaddr *) &this->myinfo.sockaddr)->sa_family = AF_INET_SDP;
- }
- }
- break;
-
- case AF_UNIX:
- {
- struct sockaddr_un *sunaddr = NULL;
-
- sunaddr = (struct sockaddr_un *) &this->myinfo.sockaddr;
- strcpy (this->myinfo.identifier, sunaddr->sun_path);
-
- sunaddr = (struct sockaddr_un *) &this->peerinfo.sockaddr;
- strcpy (this->peerinfo.identifier, sunaddr->sun_path);
- }
- break;
-
- default:
- gf_log (this->name, GF_LOG_ERROR,
- "unknown address family (%d)",
- ((struct sockaddr *) &this->myinfo.sockaddr)->sa_family);
- ret = -1;
- break;
- }
-
-err:
- return ret;
-}
diff --git a/rpc/rpc-transport/rdma/src/name.h b/rpc/rpc-transport/rdma/src/name.h
deleted file mode 100644
index 114ed1661a3..00000000000
--- a/rpc/rpc-transport/rdma/src/name.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _IB_VERBS_NAME_H
-#define _IB_VERBS_NAME_H
-
-#include <sys/socket.h>
-#include <sys/un.h>
-
-#include "compat.h"
-
-int32_t
-gf_rdma_client_bind (rpc_transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t *sockaddr_len,
- int sock);
-
-int32_t
-gf_rdma_client_get_remote_sockaddr (rpc_transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t *sockaddr_len,
- int16_t remote_port);
-
-int32_t
-gf_rdma_server_get_local_sockaddr (rpc_transport_t *this,
- struct sockaddr *addr,
- socklen_t *addr_len);
-
-int32_t
-gf_rdma_get_transport_identifiers (rpc_transport_t *this);
-
-#endif /* _IB_VERBS_NAME_H */
diff --git a/rpc/rpc-transport/rdma/src/rdma.c b/rpc/rpc-transport/rdma/src/rdma.c
deleted file mode 100644
index 8e9472fc6a2..00000000000
--- a/rpc/rpc-transport/rdma/src/rdma.c
+++ /dev/null
@@ -1,4942 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "dict.h"
-#include "glusterfs.h"
-#include "logging.h"
-#include "rdma.h"
-#include "name.h"
-#include "byte-order.h"
-#include "xlator.h"
-#include <signal.h>
-
-#define GF_RDMA_LOG_NAME "rpc-transport/rdma"
-
-static int32_t
-__gf_rdma_ioq_churn (gf_rdma_peer_t *peer);
-
-gf_rdma_post_t *
-gf_rdma_post_ref (gf_rdma_post_t *post);
-
-int
-gf_rdma_post_unref (gf_rdma_post_t *post);
-
-int32_t
-gf_resolve_ip6 (const char *hostname,
- uint16_t port,
- int family,
- void **dnscache,
- struct addrinfo **addr_info);
-
-static uint16_t
-gf_rdma_get_local_lid (struct ibv_context *context,
- int32_t port)
-{
- struct ibv_port_attr attr;
-
- if (ibv_query_port (context, port, &attr))
- return 0;
-
- return attr.lid;
-}
-
-static const char *
-get_port_state_str(enum ibv_port_state pstate)
-{
- switch (pstate) {
- case IBV_PORT_DOWN: return "PORT_DOWN";
- case IBV_PORT_INIT: return "PORT_INIT";
- case IBV_PORT_ARMED: return "PORT_ARMED";
- case IBV_PORT_ACTIVE: return "PORT_ACTIVE";
- case IBV_PORT_ACTIVE_DEFER: return "PORT_ACTIVE_DEFER";
- default: return "invalid state";
- }
-}
-
-static int32_t
-ib_check_active_port (struct ibv_context *ctx, uint8_t port)
-{
- struct ibv_port_attr port_attr = {0, };
- int32_t ret = 0;
- const char *state_str = NULL;
-
- if (!ctx) {
- gf_log_callingfn (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "Error in supplied context");
- return -1;
- }
-
- ret = ibv_query_port (ctx, port, &port_attr);
-
- if (ret) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "Failed to query port %u properties", port);
- return -1;
- }
-
- state_str = get_port_state_str (port_attr.state);
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_TRACE,
- "Infiniband PORT: (%u) STATE: (%s)",
- port, state_str);
-
- if (port_attr.state == IBV_PORT_ACTIVE)
- return 0;
-
- return -1;
-}
-
-static int32_t
-ib_get_active_port (struct ibv_context *ib_ctx)
-{
- struct ibv_device_attr ib_device_attr = {{0, }, };
- int32_t ret = -1;
- uint8_t ib_port = 0;
-
- if (!ib_ctx) {
- gf_log_callingfn (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "Error in supplied context");
- return -1;
- }
- if (ibv_query_device (ib_ctx, &ib_device_attr)) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "Failed to query device properties");
- return -1;
- }
-
- for (ib_port = 1; ib_port <= ib_device_attr.phys_port_cnt; ++ib_port) {
- ret = ib_check_active_port (ib_ctx, ib_port);
- if (ret == 0)
- return ib_port;
-
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_TRACE,
- "Port:(%u) not active", ib_port);
- continue;
- }
- return ret;
-}
-
-
-static void
-gf_rdma_put_post (gf_rdma_queue_t *queue, gf_rdma_post_t *post)
-{
- post->ctx.is_request = 0;
-
- pthread_mutex_lock (&queue->lock);
- {
- if (post->prev) {
- queue->active_count--;
- post->prev->next = post->next;
- }
-
- if (post->next) {
- post->next->prev = post->prev;
- }
-
- post->prev = &queue->passive_posts;
- post->next = post->prev->next;
- post->prev->next = post;
- post->next->prev = post;
- queue->passive_count++;
- }
- pthread_mutex_unlock (&queue->lock);
-}
-
-
-static gf_rdma_post_t *
-gf_rdma_new_post (gf_rdma_device_t *device, int32_t len,
- gf_rdma_post_type_t type)
-{
- gf_rdma_post_t *post = NULL;
- int ret = -1;
-
- post = (gf_rdma_post_t *) GF_CALLOC (1, sizeof (*post),
- gf_common_mt_rdma_post_t);
- if (post == NULL) {
- goto out;
- }
-
- pthread_mutex_init (&post->lock, NULL);
-
- post->buf_size = len;
-
- post->buf = valloc (len);
- if (!post->buf) {
- gf_log_nomem (GF_RDMA_LOG_NAME, GF_LOG_ERROR, len);
- goto out;
- }
-
- post->mr = ibv_reg_mr (device->pd,
- post->buf,
- post->buf_size,
- IBV_ACCESS_LOCAL_WRITE);
- if (!post->mr) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "memory registration failed");
- goto out;
- }
-
- post->device = device;
- post->type = type;
-
- ret = 0;
-out:
- if (ret != 0) {
- if (post->buf != NULL) {
- free (post->buf);
- }
-
- GF_FREE (post);
- post = NULL;
- }
-
- return post;
-}
-
-
-static gf_rdma_post_t *
-gf_rdma_get_post (gf_rdma_queue_t *queue)
-{
- gf_rdma_post_t *post = NULL;
-
- pthread_mutex_lock (&queue->lock);
- {
- post = queue->passive_posts.next;
- if (post == &queue->passive_posts)
- post = NULL;
-
- if (post) {
- if (post->prev)
- post->prev->next = post->next;
- if (post->next)
- post->next->prev = post->prev;
- post->prev = &queue->active_posts;
- post->next = post->prev->next;
- post->prev->next = post;
- post->next->prev = post;
- post->reused++;
- queue->active_count++;
- }
- }
- pthread_mutex_unlock (&queue->lock);
-
- return post;
-}
-
-void
-gf_rdma_destroy_post (gf_rdma_post_t *post)
-{
- ibv_dereg_mr (post->mr);
- free (post->buf);
- GF_FREE (post);
-}
-
-
-static int32_t
-__gf_rdma_quota_get (gf_rdma_peer_t *peer)
-{
- int32_t ret = -1;
- gf_rdma_private_t *priv = NULL;
-
- priv = peer->trans->private;
-
- if (priv->connected && peer->quota > 0) {
- ret = peer->quota--;
- }
-
- return ret;
-}
-
-/*
- static int32_t
- gf_rdma_quota_get (gf_rdma_peer_t *peer)
- {
- int32_t ret = -1;
- gf_rdma_private_t *priv = peer->trans->private;
-
- pthread_mutex_lock (&priv->write_mutex);
- {
- ret = __gf_rdma_quota_get (peer);
- }
- pthread_mutex_unlock (&priv->write_mutex);
-
- return ret;
- }
-*/
-
-static void
-__gf_rdma_ioq_entry_free (gf_rdma_ioq_t *entry)
-{
- list_del_init (&entry->list);
-
- if (entry->iobref) {
- iobref_unref (entry->iobref);
- entry->iobref = NULL;
- }
-
- if (entry->msg.request.rsp_iobref) {
- iobref_unref (entry->msg.request.rsp_iobref);
- entry->msg.request.rsp_iobref = NULL;
- }
- mem_put (entry);
-}
-
-
-static void
-__gf_rdma_ioq_flush (gf_rdma_peer_t *peer)
-{
- gf_rdma_ioq_t *entry = NULL, *dummy = NULL;
-
- list_for_each_entry_safe (entry, dummy, &peer->ioq, list) {
- __gf_rdma_ioq_entry_free (entry);
- }
-}
-
-
-static int32_t
-__gf_rdma_disconnect (rpc_transport_t *this)
-{
- gf_rdma_private_t *priv = NULL;
- int32_t ret = 0;
-
- priv = this->private;
-
- if (priv->connected || priv->tcp_connected) {
- fcntl (priv->sock, F_SETFL, O_NONBLOCK);
- if (shutdown (priv->sock, SHUT_RDWR) != 0) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_DEBUG,
- "shutdown () - error: %s",
- strerror (errno));
- ret = -errno;
- priv->tcp_connected = 0;
- priv->connected = 0;
- }
- }
-
- return ret;
-}
-
-
-static int32_t
-gf_rdma_post_send (struct ibv_qp *qp, gf_rdma_post_t *post, int32_t len)
-{
- struct ibv_sge list = {
- .addr = (unsigned long) post->buf,
- .length = len,
- .lkey = post->mr->lkey
- };
-
- struct ibv_send_wr wr = {
- .wr_id = (unsigned long) post,
- .sg_list = &list,
- .num_sge = 1,
- .opcode = IBV_WR_SEND,
- .send_flags = IBV_SEND_SIGNALED,
- }, *bad_wr;
-
- if (!qp)
- return EINVAL;
-
- return ibv_post_send (qp, &wr, &bad_wr);
-}
-
-int
-__gf_rdma_encode_error(gf_rdma_peer_t *peer, gf_rdma_reply_info_t *reply_info,
- struct iovec *rpchdr, uint32_t *ptr,
- gf_rdma_errcode_t err)
-{
- uint32_t *startp = NULL;
- struct rpc_msg *rpc_msg = NULL;
-
- startp = ptr;
- if (reply_info != NULL) {
- *ptr++ = hton32(reply_info->rm_xid);
- } else {
- rpc_msg = rpchdr[0].iov_base; /* assume rpchdr contains
- * only one vector.
- * (which is true)
- */
- *ptr++ = rpc_msg->rm_xid;
- }
-
- *ptr++ = hton32(GF_RDMA_VERSION);
- *ptr++ = hton32(peer->send_count);
- *ptr++ = hton32(GF_RDMA_ERROR);
- *ptr++ = hton32(err);
- if (err == ERR_VERS) {
- *ptr++ = hton32(GF_RDMA_VERSION);
- *ptr++ = hton32(GF_RDMA_VERSION);
- }
-
- return (int)((unsigned long)ptr - (unsigned long)startp);
-}
-
-
-int32_t
-__gf_rdma_send_error (gf_rdma_peer_t *peer, gf_rdma_ioq_t *entry,
- gf_rdma_post_t *post, gf_rdma_reply_info_t *reply_info,
- gf_rdma_errcode_t err)
-{
- int32_t ret = -1, len = 0;
-
- len = __gf_rdma_encode_error (peer, reply_info, entry->rpchdr,
- (uint32_t *)post->buf, err);
- if (len == -1) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "encode error returned -1");
- goto out;
- }
-
- gf_rdma_post_ref (post);
-
- ret = gf_rdma_post_send (peer->qp, post, len);
- if (!ret) {
- ret = len;
- } else {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "gf_rdma_post_send (to %s) failed with ret = %d (%s)",
- peer->trans->peerinfo.identifier, ret,
- (ret > 0) ? strerror (ret) : "");
- gf_rdma_post_unref (post);
- __gf_rdma_disconnect (peer->trans);
- ret = -1;
- }
-
-out:
- return ret;
-}
-
-
-int32_t
-__gf_rdma_create_read_chunks_from_vector (gf_rdma_peer_t *peer,
- gf_rdma_read_chunk_t **readch_ptr,
- int32_t *pos, struct iovec *vector,
- int count,
- gf_rdma_request_context_t *request_ctx)
-{
- int i = 0;
- gf_rdma_private_t *priv = NULL;
- gf_rdma_device_t *device = NULL;
- struct ibv_mr *mr = NULL;
- gf_rdma_read_chunk_t *readch = NULL;
- int32_t ret = -1;
-
- GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, peer, out);
- GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, readch_ptr, out);
- GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, *readch_ptr, out);
- GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, request_ctx, out);
- GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, vector, out);
-
- priv = peer->trans->private;
- device = priv->device;
- readch = *readch_ptr;
-
- for (i = 0; i < count; i++) {
- readch->rc_discrim = hton32 (1);
- readch->rc_position = hton32 (*pos);
-
- mr = ibv_reg_mr (device->pd, vector[i].iov_base,
- vector[i].iov_len,
- IBV_ACCESS_REMOTE_READ);
- if (!mr) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "memory registration failed");
- goto out;
- }
-
- request_ctx->mr[request_ctx->mr_count++] = mr;
-
- readch->rc_target.rs_handle = hton32 (mr->rkey);
- readch->rc_target.rs_length
- = hton32 (vector[i].iov_len);
- readch->rc_target.rs_offset
- = hton64 ((uint64_t)(unsigned long)vector[i].iov_base);
-
- *pos = *pos + vector[i].iov_len;
- readch++;
- }
-
- *readch_ptr = readch;
-
- ret = 0;
-out:
- return ret;
-}
-
-
-int32_t
-__gf_rdma_create_read_chunks (gf_rdma_peer_t *peer, gf_rdma_ioq_t *entry,
- gf_rdma_chunktype_t type, uint32_t **ptr,
- gf_rdma_request_context_t *request_ctx)
-{
- int32_t ret = -1;
- int pos = 0;
-
- GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, peer, out);
- GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, entry, out);
- GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, ptr, out);
- GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, *ptr, out);
- GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, request_ctx, out);
-
- request_ctx->iobref = iobref_ref (entry->iobref);
-
- if (type == gf_rdma_areadch) {
- pos = 0;
- ret = __gf_rdma_create_read_chunks_from_vector (peer,
- (gf_rdma_read_chunk_t **)ptr,
- &pos,
- entry->rpchdr,
- entry->rpchdr_count,
- request_ctx);
- if (ret == -1) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_DEBUG,
- "cannot create read chunks from vector, "
- "entry->rpchdr");
- goto out;
- }
-
- ret = __gf_rdma_create_read_chunks_from_vector (peer,
- (gf_rdma_read_chunk_t **)ptr,
- &pos,
- entry->proghdr,
- entry->proghdr_count,
- request_ctx);
- if (ret == -1) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_DEBUG,
- "cannot create read chunks from vector, "
- "entry->proghdr");
- }
-
- if (entry->prog_payload_count != 0) {
- ret = __gf_rdma_create_read_chunks_from_vector (peer,
- (gf_rdma_read_chunk_t **)ptr,
- &pos,
- entry->prog_payload,
- entry->prog_payload_count,
- request_ctx);
- if (ret == -1) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_DEBUG,
- "cannot create read chunks from vector,"
- " entry->prog_payload");
- }
- }
- } else {
- pos = iov_length (entry->rpchdr, entry->rpchdr_count);
- ret = __gf_rdma_create_read_chunks_from_vector (peer,
- (gf_rdma_read_chunk_t **)ptr,
- &pos,
- entry->prog_payload,
- entry->prog_payload_count,
- request_ctx);
- if (ret == -1) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_DEBUG,
- "cannot create read chunks from vector, "
- "entry->prog_payload");
- }
- }
-
- /* terminate read-chunk list*/
- **ptr = 0;
- *ptr = *ptr + 1;
-out:
- return ret;
-}
-
-
-int32_t
-__gf_rdma_create_write_chunks_from_vector (gf_rdma_peer_t *peer,
- gf_rdma_write_chunk_t **writech_ptr,
- struct iovec *vector, int count,
- gf_rdma_request_context_t *request_ctx)
-{
- int i = 0;
- gf_rdma_private_t *priv = NULL;
- gf_rdma_device_t *device = NULL;
- struct ibv_mr *mr = NULL;
- gf_rdma_write_chunk_t *writech = NULL;
- int32_t ret = -1;
-
- GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, peer, out);
- GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, writech_ptr, out);
- GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, *writech_ptr, out);
- GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, request_ctx, out);
- GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, vector, out);
-
- writech = *writech_ptr;
-
- priv = peer->trans->private;
- device = priv->device;
-
- for (i = 0; i < count; i++) {
- mr = ibv_reg_mr (device->pd, vector[i].iov_base,
- vector[i].iov_len,
- IBV_ACCESS_REMOTE_WRITE
- | IBV_ACCESS_LOCAL_WRITE);
- if (!mr) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "memory registration failed");
- goto out;
- }
-
- request_ctx->mr[request_ctx->mr_count++] = mr;
-
- writech->wc_target.rs_handle = hton32 (mr->rkey);
- writech->wc_target.rs_length = hton32 (vector[i].iov_len);
- writech->wc_target.rs_offset
- = hton64 (((uint64_t)(unsigned long)vector[i].iov_base));
-
- writech++;
- }
-
- *writech_ptr = writech;
-
- ret = 0;
-out:
- return ret;
-}
-
-
-int32_t
-__gf_rdma_create_write_chunks (gf_rdma_peer_t *peer, gf_rdma_ioq_t *entry,
- gf_rdma_chunktype_t chunk_type, uint32_t **ptr,
- gf_rdma_request_context_t *request_ctx)
-{
- int32_t ret = -1;
- gf_rdma_write_array_t *warray = NULL;
-
- GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, peer, out);
- GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, ptr, out);
- GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, *ptr, out);
- GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, request_ctx, out);
- GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, entry, out);
-
- if ((chunk_type == gf_rdma_replych)
- && ((entry->msg.request.rsphdr_count != 1) ||
- (entry->msg.request.rsphdr_vec[0].iov_base == NULL))) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- (entry->msg.request.rsphdr_count == 1)
- ? "chunktype specified as reply chunk but the vector "
- "specifying the buffer to be used for holding reply"
- " header is not correct" :
- "chunktype specified as reply chunk, but more than one "
- "buffer provided for holding reply");
- goto out;
- }
-
-/*
- if ((chunk_type == gf_rdma_writech)
- && ((entry->msg.request.rsphdr_count == 0)
- || (entry->msg.request.rsphdr_vec[0].iov_base == NULL))) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_DEBUG,
- "vector specifying buffer to hold the program's reply "
- "header should also be provided when buffers are "
- "provided for holding the program's payload in reply");
- goto out;
- }
-*/
-
- if (chunk_type == gf_rdma_writech) {
- warray = (gf_rdma_write_array_t *)*ptr;
- warray->wc_discrim = hton32 (1);
- warray->wc_nchunks
- = hton32 (entry->msg.request.rsp_payload_count);
-
- *ptr = (uint32_t *)&warray->wc_array[0];
-
- ret = __gf_rdma_create_write_chunks_from_vector (peer,
- (gf_rdma_write_chunk_t **)ptr,
- entry->msg.request.rsp_payload,
- entry->msg.request.rsp_payload_count,
- request_ctx);
- if (ret == -1) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_DEBUG,
- "cannot create write chunks from vector "
- "entry->rpc_payload");
- goto out;
- }
-
- /* terminate write chunklist */
- **ptr = 0;
- *ptr = *ptr + 1;
-
- /* no reply chunklist */
- **ptr = 0;
- *ptr = *ptr + 1;
- } else {
- /* no write chunklist */
- **ptr = 0;
- *ptr = *ptr + 1;
-
- warray = (gf_rdma_write_array_t *)*ptr;
- warray->wc_discrim = hton32 (1);
- warray->wc_nchunks = hton32 (entry->msg.request.rsphdr_count);
-
- *ptr = (uint32_t *)&warray->wc_array[0];
-
- ret = __gf_rdma_create_write_chunks_from_vector (peer,
- (gf_rdma_write_chunk_t **)ptr,
- entry->msg.request.rsphdr_vec,
- entry->msg.request.rsphdr_count,
- request_ctx);
- if (ret == -1) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_DEBUG,
- "cannot create write chunks from vector "
- "entry->rpchdr");
- goto out;
- }
-
- /* terminate reply chunklist */
- **ptr = 0;
- *ptr = *ptr + 1;
- }
-
-out:
- return ret;
-}
-
-
-inline void
-__gf_rdma_deregister_mr (struct ibv_mr **mr, int count)
-{
- int i = 0;
-
- if (mr == NULL) {
- goto out;
- }
-
- for (i = 0; i < count; i++) {
- ibv_dereg_mr (mr[i]);
- }
-
-out:
- return;
-}
-
-
-static int32_t
-__gf_rdma_quota_put (gf_rdma_peer_t *peer)
-{
- int32_t ret = 0;
-
- peer->quota++;
- ret = peer->quota;
-
- if (!list_empty (&peer->ioq)) {
- ret = __gf_rdma_ioq_churn (peer);
- }
-
- return ret;
-}
-
-
-static int32_t
-gf_rdma_quota_put (gf_rdma_peer_t *peer)
-{
- int32_t ret = 0;
- gf_rdma_private_t *priv = NULL;
-
- priv = peer->trans->private;
- pthread_mutex_lock (&priv->write_mutex);
- {
- ret = __gf_rdma_quota_put (peer);
- }
- pthread_mutex_unlock (&priv->write_mutex);
-
- return ret;
-}
-
-
-/* to be called with priv->mutex held */
-void
-__gf_rdma_request_context_destroy (gf_rdma_request_context_t *context)
-{
- gf_rdma_peer_t *peer = NULL;
- gf_rdma_private_t *priv = NULL;
- int32_t ret = 0;
-
- if (context == NULL) {
- goto out;
- }
-
- peer = context->peer;
-
- __gf_rdma_deregister_mr (context->mr, context->mr_count);
-
- priv = peer->trans->private;
-
- if (priv->connected) {
- ret = __gf_rdma_quota_put (peer);
- if (ret < 0) {
- gf_log ("rdma", GF_LOG_DEBUG,
- "failed to send "
- "message");
- mem_put (context);
- __gf_rdma_disconnect (peer->trans);
- goto out;
- }
- }
-
- if (context->iobref != NULL) {
- iobref_unref (context->iobref);
- context->iobref = NULL;
- }
-
- if (context->rsp_iobref != NULL) {
- iobref_unref (context->rsp_iobref);
- context->rsp_iobref = NULL;
- }
-
- mem_put (context);
-
-out:
- return;
-}
-
-
-void
-gf_rdma_post_context_destroy (gf_rdma_post_context_t *ctx)
-{
- if (ctx == NULL) {
- goto out;
- }
-
- __gf_rdma_deregister_mr (ctx->mr, ctx->mr_count);
-
- if (ctx->iobref != NULL) {
- iobref_unref (ctx->iobref);
- }
-
- if (ctx->hdr_iobuf != NULL) {
- iobuf_unref (ctx->hdr_iobuf);
- }
-
- memset (ctx, 0, sizeof (*ctx));
-out:
- return;
-}
-
-
-static int32_t
-gf_rdma_post_recv (struct ibv_srq *srq,
- gf_rdma_post_t *post)
-{
- struct ibv_sge list = {
- .addr = (unsigned long) post->buf,
- .length = post->buf_size,
- .lkey = post->mr->lkey
- };
-
- struct ibv_recv_wr wr = {
- .wr_id = (unsigned long) post,
- .sg_list = &list,
- .num_sge = 1,
- }, *bad_wr;
-
- gf_rdma_post_ref (post);
-
- return ibv_post_srq_recv (srq, &wr, &bad_wr);
-}
-
-
-int
-gf_rdma_post_unref (gf_rdma_post_t *post)
-{
- int refcount = -1;
-
- if (post == NULL) {
- goto out;
- }
-
- pthread_mutex_lock (&post->lock);
- {
- refcount = --post->refcount;
- }
- pthread_mutex_unlock (&post->lock);
-
- if (refcount == 0) {
- gf_rdma_post_context_destroy (&post->ctx);
- if (post->type == GF_RDMA_SEND_POST) {
- gf_rdma_put_post (&post->device->sendq, post);
- } else {
- gf_rdma_post_recv (post->device->srq, post);
- }
- }
-out:
- return refcount;
-}
-
-
-int
-gf_rdma_post_get_refcount (gf_rdma_post_t *post)
-{
- int refcount = -1;
-
- if (post == NULL) {
- goto out;
- }
-
- pthread_mutex_lock (&post->lock);
- {
- refcount = post->refcount;
- }
- pthread_mutex_unlock (&post->lock);
-
-out:
- return refcount;
-}
-
-gf_rdma_post_t *
-gf_rdma_post_ref (gf_rdma_post_t *post)
-{
- if (post == NULL) {
- goto out;
- }
-
- pthread_mutex_lock (&post->lock);
- {
- post->refcount++;
- }
- pthread_mutex_unlock (&post->lock);
-
-out:
- return post;
-}
-
-
-int32_t
-__gf_rdma_ioq_churn_request (gf_rdma_peer_t *peer, gf_rdma_ioq_t *entry,
- gf_rdma_post_t *post)
-{
- gf_rdma_chunktype_t rtype = gf_rdma_noch;
- gf_rdma_chunktype_t wtype = gf_rdma_noch;
- uint64_t send_size = 0;
- gf_rdma_header_t *hdr = NULL;
- struct rpc_msg *rpc_msg = NULL;
- uint32_t *chunkptr = NULL;
- char *buf = NULL;
- int32_t ret = 0;
- gf_rdma_private_t *priv = NULL;
- gf_rdma_device_t *device = NULL;
- int chunk_count = 0;
- gf_rdma_request_context_t *request_ctx = NULL;
- uint32_t prog_payload_length = 0, len = 0;
- struct rpc_req *rpc_req = NULL;
-
- GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, peer, out);
- GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, entry, out);
- GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, post, out);
-
- if ((entry->msg.request.rsphdr_count != 0)
- && (entry->msg.request.rsp_payload_count != 0)) {
- ret = -1;
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "both write-chunklist and reply-chunk cannot be "
- "present");
- goto out;
- }
-
- post->ctx.is_request = 1;
- priv = peer->trans->private;
- device = priv->device;
-
- hdr = (gf_rdma_header_t *)post->buf;
-
- send_size = iov_length (entry->rpchdr, entry->rpchdr_count)
- + iov_length (entry->proghdr, entry->proghdr_count)
- + GLUSTERFS_RDMA_MAX_HEADER_SIZE;
-
- if (entry->prog_payload_count != 0) {
- prog_payload_length
- = iov_length (entry->prog_payload,
- entry->prog_payload_count);
- }
-
- if (send_size > GLUSTERFS_RDMA_INLINE_THRESHOLD) {
- rtype = gf_rdma_areadch;
- } else if ((send_size + prog_payload_length)
- < GLUSTERFS_RDMA_INLINE_THRESHOLD) {
- rtype = gf_rdma_noch;
- } else if (entry->prog_payload_count != 0) {
- rtype = gf_rdma_readch;
- }
-
- if (entry->msg.request.rsphdr_count != 0) {
- wtype = gf_rdma_replych;
- } else if (entry->msg.request.rsp_payload_count != 0) {
- wtype = gf_rdma_writech;
- }
-
- if (rtype == gf_rdma_readch) {
- chunk_count += entry->prog_payload_count;
- } else if (rtype == gf_rdma_areadch) {
- chunk_count += entry->rpchdr_count;
- chunk_count += entry->proghdr_count;
- }
-
- if (wtype == gf_rdma_writech) {
- chunk_count += entry->msg.request.rsp_payload_count;
- } else if (wtype == gf_rdma_replych) {
- chunk_count += entry->msg.request.rsphdr_count;
- }
-
- if (chunk_count > GF_RDMA_MAX_SEGMENTS) {
- ret = -1;
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "chunk count(%d) exceeding maximum allowed RDMA "
- "segment count(%d)", chunk_count, GF_RDMA_MAX_SEGMENTS);
- goto out;
- }
-
- if ((wtype != gf_rdma_noch) || (rtype != gf_rdma_noch)) {
- request_ctx = mem_get (device->request_ctx_pool);
- if (request_ctx == NULL) {
- ret = -1;
- goto out;
- }
-
- memset (request_ctx, 0, sizeof (*request_ctx));
-
- request_ctx->pool = device->request_ctx_pool;
- request_ctx->peer = peer;
-
- entry->msg.request.rpc_req->conn_private = request_ctx;
-
- if (entry->msg.request.rsp_iobref != NULL) {
- request_ctx->rsp_iobref
- = iobref_ref (entry->msg.request.rsp_iobref);
- }
- }
-
- rpc_msg = (struct rpc_msg *) entry->rpchdr[0].iov_base;
-
- hdr->rm_xid = rpc_msg->rm_xid; /* no need of hton32(rpc_msg->rm_xid),
- * since rpc_msg->rm_xid is already
- * hton32ed value of actual xid
- */
- hdr->rm_vers = hton32 (GF_RDMA_VERSION);
- hdr->rm_credit = hton32 (peer->send_count);
- if (rtype == gf_rdma_areadch) {
- hdr->rm_type = hton32 (GF_RDMA_NOMSG);
- } else {
- hdr->rm_type = hton32 (GF_RDMA_MSG);
- }
-
- chunkptr = &hdr->rm_body.rm_chunks[0];
- if (rtype != gf_rdma_noch) {
- ret = __gf_rdma_create_read_chunks (peer, entry, rtype, &chunkptr,
- request_ctx);
- if (ret != 0) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_DEBUG,
- "creation of read chunks failed");
- goto out;
- }
- } else {
- *chunkptr++ = 0; /* no read chunks */
- }
-
- if (wtype != gf_rdma_noch) {
- ret = __gf_rdma_create_write_chunks (peer, entry, wtype, &chunkptr,
- request_ctx);
- if (ret != 0) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_DEBUG,
- "creation of write/reply chunk failed");
- goto out;
- }
- } else {
- *chunkptr++ = 0; /* no write chunks */
- *chunkptr++ = 0; /* no reply chunk */
- }
-
- buf = (char *)chunkptr;
-
- if (rtype != gf_rdma_areadch) {
- iov_unload (buf, entry->rpchdr, entry->rpchdr_count);
- buf += iov_length (entry->rpchdr, entry->rpchdr_count);
-
- iov_unload (buf, entry->proghdr, entry->proghdr_count);
- buf += iov_length (entry->proghdr, entry->proghdr_count);
-
- if (rtype != gf_rdma_readch) {
- iov_unload (buf, entry->prog_payload,
- entry->prog_payload_count);
- buf += iov_length (entry->prog_payload,
- entry->prog_payload_count);
- }
- }
-
- len = buf - post->buf;
-
- gf_rdma_post_ref (post);
-
- ret = gf_rdma_post_send (peer->qp, post, len);
- if (!ret) {
- ret = len;
- } else {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "gf_rdma_post_send (to %s) failed with ret = %d (%s)",
- peer->trans->peerinfo.identifier, ret,
- (ret > 0) ? strerror (ret) : "");
- gf_rdma_post_unref (post);
- __gf_rdma_disconnect (peer->trans);
- ret = -1;
- }
-
-out:
- if (ret == -1) {
- rpc_req = entry->msg.request.rpc_req;
-
- if (request_ctx != NULL) {
- __gf_rdma_request_context_destroy (rpc_req->conn_private);
- }
-
- rpc_req->conn_private = NULL;
- }
-
- return ret;
-}
-
-
-inline void
-__gf_rdma_fill_reply_header (gf_rdma_header_t *header, struct iovec *rpchdr,
- gf_rdma_reply_info_t *reply_info, int credits)
-{
- struct rpc_msg *rpc_msg = NULL;
-
- if (reply_info != NULL) {
- header->rm_xid = hton32 (reply_info->rm_xid);
- } else {
- rpc_msg = rpchdr[0].iov_base; /* assume rpchdr contains
- * only one vector.
- * (which is true)
- */
- header->rm_xid = rpc_msg->rm_xid;
- }
-
- header->rm_type = hton32 (GF_RDMA_MSG);
- header->rm_vers = hton32 (GF_RDMA_VERSION);
- header->rm_credit = hton32 (credits);
-
- header->rm_body.rm_chunks[0] = 0; /* no read chunks */
- header->rm_body.rm_chunks[1] = 0; /* no write chunks */
- header->rm_body.rm_chunks[2] = 0; /* no reply chunks */
-
- return;
-}
-
-
-int32_t
-__gf_rdma_send_reply_inline (gf_rdma_peer_t *peer, gf_rdma_ioq_t *entry,
- gf_rdma_post_t *post, gf_rdma_reply_info_t *reply_info)
-{
- gf_rdma_header_t *header = NULL;
- int32_t send_size = 0, ret = 0;
- char *buf = NULL;
-
- send_size = iov_length (entry->rpchdr, entry->rpchdr_count)
- + iov_length (entry->proghdr, entry->proghdr_count)
- + iov_length (entry->prog_payload, entry->prog_payload_count)
- + sizeof (gf_rdma_header_t); /*
- * remember, no chunklists in the
- * reply
- */
-
- if (send_size > GLUSTERFS_RDMA_INLINE_THRESHOLD) {
- ret = __gf_rdma_send_error (peer, entry, post, reply_info,
- ERR_CHUNK);
- goto out;
- }
-
- header = (gf_rdma_header_t *)post->buf;
-
- __gf_rdma_fill_reply_header (header, entry->rpchdr, reply_info,
- peer->send_count);
-
- buf = (char *)&header->rm_body.rm_chunks[3];
-
- if (entry->rpchdr_count != 0) {
- iov_unload (buf, entry->rpchdr, entry->rpchdr_count);
- buf += iov_length (entry->rpchdr, entry->rpchdr_count);
- }
-
- if (entry->proghdr_count != 0) {
- iov_unload (buf, entry->proghdr, entry->proghdr_count);
- buf += iov_length (entry->proghdr, entry->proghdr_count);
- }
-
- if (entry->prog_payload_count != 0) {
- iov_unload (buf, entry->prog_payload,
- entry->prog_payload_count);
- buf += iov_length (entry->prog_payload,
- entry->prog_payload_count);
- }
-
- gf_rdma_post_ref (post);
-
- ret = gf_rdma_post_send (peer->qp, post, (buf - post->buf));
- if (!ret) {
- ret = send_size;
- } else {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "gf_rdma_post_send (to %s) failed with ret = %d (%s)",
- peer->trans->peerinfo.identifier, ret,
- (ret > 0) ? strerror (ret) : "");
- gf_rdma_post_unref (post);
- __gf_rdma_disconnect (peer->trans);
- ret = -1;
- }
-
-out:
- return ret;
-}
-
-
-int32_t
-__gf_rdma_reply_encode_write_chunks (gf_rdma_peer_t *peer,
- uint32_t payload_size,
- gf_rdma_post_t *post,
- gf_rdma_reply_info_t *reply_info,
- uint32_t **ptr)
-{
- uint32_t chunk_size = 0;
- int32_t ret = -1;
- gf_rdma_write_array_t *target_array = NULL;
- int i = 0;
-
- target_array = (gf_rdma_write_array_t *)*ptr;
-
- for (i = 0; i < reply_info->wc_array->wc_nchunks; i++) {
- chunk_size +=
- reply_info->wc_array->wc_array[i].wc_target.rs_length;
- }
-
- if (chunk_size < payload_size) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_DEBUG,
- "length of payload (%d) is exceeding the total "
- "write chunk length (%d)", payload_size, chunk_size);
- goto out;
- }
-
- target_array->wc_discrim = hton32 (1);
- for (i = 0; (i < reply_info->wc_array->wc_nchunks)
- && (payload_size != 0);
- i++) {
- target_array->wc_array[i].wc_target.rs_offset
- = hton64 (reply_info->wc_array->wc_array[i].wc_target.rs_offset);
-
- target_array->wc_array[i].wc_target.rs_length
- = hton32 (min (payload_size,
- reply_info->wc_array->wc_array[i].wc_target.rs_length));
- }
-
- target_array->wc_nchunks = hton32 (i);
- target_array->wc_array[i].wc_target.rs_handle = 0; /* terminate
- chunklist */
-
- ret = 0;
-
- *ptr = &target_array->wc_array[i].wc_target.rs_length;
-out:
- return ret;
-}
-
-
-inline int32_t
-__gf_rdma_register_local_mr_for_rdma (gf_rdma_peer_t *peer,
- struct iovec *vector, int count,
- gf_rdma_post_context_t *ctx)
-{
- int i = 0;
- int32_t ret = -1;
- gf_rdma_private_t *priv = NULL;
- gf_rdma_device_t *device = NULL;
-
- if ((ctx == NULL) || (vector == NULL)) {
- goto out;
- }
-
- priv = peer->trans->private;
- device = priv->device;
-
- for (i = 0; i < count; i++) {
- /* what if the memory is registered more than once?
- * Assume that a single write buffer is passed to afr, which
- * then passes it to its children. If more than one children
- * happen to use rdma, then the buffer is registered more than
- * once.
- * Ib-verbs specification says that multiple registrations of
- * same memory location is allowed. Refer to 10.6.3.8 of
- * Infiniband Architecture Specification Volume 1
- * (Release 1.2.1)
- */
- ctx->mr[ctx->mr_count] = ibv_reg_mr (device->pd,
- vector[i].iov_base,
- vector[i].iov_len,
- IBV_ACCESS_LOCAL_WRITE);
- if (ctx->mr[ctx->mr_count] == NULL) {
- goto out;
- }
-
- ctx->mr_count++;
- }
-
- ret = 0;
-out:
- return ret;
-}
-
-/* 1. assumes xfer_len of data is pointed by vector(s) starting from vec[*idx]
- * 2. modifies vec
- */
-int32_t
-__gf_rdma_write (gf_rdma_peer_t *peer, gf_rdma_post_t *post, struct iovec *vec,
- uint32_t xfer_len, int *idx, gf_rdma_write_chunk_t *writech)
-{
- int size = 0, num_sge = 0, i = 0;
- int32_t ret = -1;
- struct ibv_sge *sg_list = NULL;
- struct ibv_send_wr wr = {
- .opcode = IBV_WR_RDMA_WRITE,
- .send_flags = IBV_SEND_SIGNALED,
- }, *bad_wr;
-
- if ((peer == NULL) || (writech == NULL) || (idx == NULL)
- || (post == NULL) || (vec == NULL) || (xfer_len == 0)) {
- goto out;
- }
-
- for (i = *idx; size < xfer_len; i++) {
- size += vec[i].iov_len;
- }
-
- num_sge = i - *idx;
-
- sg_list = GF_CALLOC (num_sge, sizeof (struct ibv_sge),
- gf_common_mt_sge);
- if (sg_list == NULL) {
- ret = -1;
- goto out;
- }
-
- for ((i = *idx), (num_sge = 0); (xfer_len != 0); i++, num_sge++) {
- size = min (xfer_len, vec[i].iov_len);
-
- sg_list [num_sge].addr = (unsigned long)vec[i].iov_base;
- sg_list [num_sge].length = size;
- sg_list [num_sge].lkey = post->ctx.mr[i]->lkey;
-
- xfer_len -= size;
- }
-
- *idx = i;
-
- if (size < vec[i - 1].iov_len) {
- vec[i - 1].iov_base += size;
- vec[i - 1].iov_len -= size;
- *idx = i - 1;
- }
-
- wr.sg_list = sg_list;
- wr.num_sge = num_sge;
- wr.wr_id = (unsigned long) gf_rdma_post_ref (post);
- wr.wr.rdma.rkey = writech->wc_target.rs_handle;
- wr.wr.rdma.remote_addr = writech->wc_target.rs_offset;
-
- ret = ibv_post_send(peer->qp, &wr, &bad_wr);
- if (ret) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, "rdma write to "
- "client (%s) failed with ret = %d (%s)",
- peer->trans->peerinfo.identifier, ret,
- (ret > 0) ? strerror (ret) : "");
- ret = -1;
- }
-
- GF_FREE (sg_list);
-out:
- return ret;
-}
-
-
-int32_t
-__gf_rdma_do_gf_rdma_write (gf_rdma_peer_t *peer, gf_rdma_post_t *post,
- struct iovec *vector, int count,
- struct iobref *iobref,
- gf_rdma_reply_info_t *reply_info)
-{
- int i = 0, payload_idx = 0;
- uint32_t payload_size = 0, xfer_len = 0;
- int32_t ret = -1;
-
- if (count != 0) {
- payload_size = iov_length (vector, count);
- }
-
- if (payload_size == 0) {
- ret = 0;
- goto out;
- }
-
- ret = __gf_rdma_register_local_mr_for_rdma (peer, vector, count,
- &post->ctx);
- if (ret == -1) {
- goto out;
- }
-
- post->ctx.iobref = iobref_ref (iobref);
-
- for (i = 0; (i < reply_info->wc_array->wc_nchunks)
- && (payload_size != 0);
- i++) {
- xfer_len = min (payload_size,
- reply_info->wc_array->wc_array[i].wc_target.rs_length);
-
- ret = __gf_rdma_write (peer, post, vector, xfer_len, &payload_idx,
- &reply_info->wc_array->wc_array[i]);
- if (ret == -1) {
- goto out;
- }
-
- payload_size -= xfer_len;
- }
-
- ret = 0;
-out:
-
- return ret;
-}
-
-
-int32_t
-__gf_rdma_send_reply_type_nomsg (gf_rdma_peer_t *peer, gf_rdma_ioq_t *entry,
- gf_rdma_post_t *post,
- gf_rdma_reply_info_t *reply_info)
-{
- gf_rdma_header_t *header = NULL;
- char *buf = NULL;
- uint32_t payload_size = 0;
- int count = 0, i = 0;
- int32_t ret = 0;
- struct iovec vector[MAX_IOVEC];
-
- header = (gf_rdma_header_t *)post->buf;
-
- __gf_rdma_fill_reply_header (header, entry->rpchdr, reply_info,
- peer->send_count);
-
- header->rm_type = hton32 (GF_RDMA_NOMSG);
-
- payload_size = iov_length (entry->rpchdr, entry->rpchdr_count) +
- iov_length (entry->proghdr, entry->proghdr_count);
-
- /* encode reply chunklist */
- buf = (char *)&header->rm_body.rm_chunks[2];
- ret = __gf_rdma_reply_encode_write_chunks (peer, payload_size, post,
- reply_info, (uint32_t **)&buf);
- if (ret == -1) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_DEBUG,
- "encoding write chunks failed");
- ret = __gf_rdma_send_error (peer, entry, post, reply_info,
- ERR_CHUNK);
- goto out;
- }
-
- gf_rdma_post_ref (post);
-
- for (i = 0; i < entry->rpchdr_count; i++) {
- vector[count++] = entry->rpchdr[i];
- }
-
- for (i = 0; i < entry->proghdr_count; i++) {
- vector[count++] = entry->proghdr[i];
- }
-
- ret = __gf_rdma_do_gf_rdma_write (peer, post, vector, count,
- entry->iobref, reply_info);
- if (ret == -1) {
- gf_rdma_post_unref (post);
- goto out;
- }
-
- ret = gf_rdma_post_send (peer->qp, post, (buf - post->buf));
- if (ret) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "gf_rdma_post_send to client (%s) failed with "
- "ret = %d (%s)", peer->trans->peerinfo.identifier, ret,
- (ret > 0) ? strerror (ret) : "");
- ret = -1;
- gf_rdma_post_unref (post);
- } else {
- ret = payload_size;
- }
-
-out:
- return ret;
-}
-
-
-int32_t
-__gf_rdma_send_reply_type_msg (gf_rdma_peer_t *peer, gf_rdma_ioq_t *entry,
- gf_rdma_post_t *post, gf_rdma_reply_info_t *reply_info)
-{
- gf_rdma_header_t *header = NULL;
- int32_t send_size = 0, ret = 0;
- char *ptr = NULL;
- uint32_t payload_size = 0;
-
- send_size = iov_length (entry->rpchdr, entry->rpchdr_count)
- + iov_length (entry->proghdr, entry->proghdr_count)
- + GLUSTERFS_RDMA_MAX_HEADER_SIZE;
-
- if (send_size > GLUSTERFS_RDMA_INLINE_THRESHOLD) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "client has provided only write chunks, but the "
- "combined size of rpc and program header (%d) is "
- "exceeding the size of msg that can be sent using "
- "RDMA send (%d)", send_size,
- GLUSTERFS_RDMA_INLINE_THRESHOLD);
-
- ret = __gf_rdma_send_error (peer, entry, post, reply_info,
- ERR_CHUNK);
- goto out;
- }
-
- header = (gf_rdma_header_t *)post->buf;
-
- __gf_rdma_fill_reply_header (header, entry->rpchdr, reply_info,
- peer->send_count);
-
- payload_size = iov_length (entry->prog_payload,
- entry->prog_payload_count);
- ptr = (char *)&header->rm_body.rm_chunks[1];
-
- ret = __gf_rdma_reply_encode_write_chunks (peer, payload_size, post,
- reply_info,
- (uint32_t **)&ptr);
- if (ret == -1) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_DEBUG,
- "encoding write chunks failed");
- ret = __gf_rdma_send_error (peer, entry, post, reply_info,
- ERR_CHUNK);
- goto out;
- }
-
- *(uint32_t *)ptr = 0; /* terminate reply chunklist */
- ptr += sizeof (uint32_t);
-
- gf_rdma_post_ref (post);
-
- ret = __gf_rdma_do_gf_rdma_write (peer, post, entry->prog_payload,
- entry->prog_payload_count,
- entry->iobref, reply_info);
- if (ret == -1) {
- gf_rdma_post_unref (post);
- goto out;
- }
-
- iov_unload (ptr, entry->rpchdr, entry->rpchdr_count);
- ptr += iov_length (entry->rpchdr, entry->rpchdr_count);
-
- iov_unload (ptr, entry->proghdr, entry->proghdr_count);
- ptr += iov_length (entry->proghdr, entry->proghdr_count);
-
- ret = gf_rdma_post_send (peer->qp, post, (ptr - post->buf));
- if (ret) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "rdma send to client (%s) failed with ret = %d (%s)",
- peer->trans->peerinfo.identifier, ret,
- (ret > 0) ? strerror (ret) : "");
- gf_rdma_post_unref (post);
- ret = -1;
- } else {
- ret = send_size + payload_size;
- }
-
-out:
- return ret;
-}
-
-
-void
-gf_rdma_reply_info_destroy (gf_rdma_reply_info_t *reply_info)
-{
- if (reply_info == NULL) {
- goto out;
- }
-
- if (reply_info->wc_array != NULL) {
- GF_FREE (reply_info->wc_array);
- reply_info->wc_array = NULL;
- }
-
- mem_put (reply_info);
-out:
- return;
-}
-
-
-gf_rdma_reply_info_t *
-gf_rdma_reply_info_alloc (gf_rdma_peer_t *peer)
-{
- gf_rdma_reply_info_t *reply_info = NULL;
- gf_rdma_private_t *priv = NULL;
-
- priv = peer->trans->private;
-
- reply_info = mem_get (priv->device->reply_info_pool);
- if (reply_info == NULL) {
- goto out;
- }
-
- memset (reply_info, 0, sizeof (*reply_info));
- reply_info->pool = priv->device->reply_info_pool;
-
-out:
- return reply_info;
-}
-
-
-int32_t
-__gf_rdma_ioq_churn_reply (gf_rdma_peer_t *peer, gf_rdma_ioq_t *entry,
- gf_rdma_post_t *post)
-{
- gf_rdma_reply_info_t *reply_info = NULL;
- int32_t ret = -1;
- gf_rdma_chunktype_t type = gf_rdma_noch;
-
- if ((peer == NULL) || (entry == NULL) || (post == NULL)) {
- goto out;
- }
-
- reply_info = entry->msg.reply_info;
- if (reply_info != NULL) {
- type = reply_info->type;
- }
-
- switch (type) {
- case gf_rdma_noch:
- ret = __gf_rdma_send_reply_inline (peer, entry, post,
- reply_info);
- break;
-
- case gf_rdma_replych:
- ret = __gf_rdma_send_reply_type_nomsg (peer, entry, post,
- reply_info);
- break;
-
- case gf_rdma_writech:
- ret = __gf_rdma_send_reply_type_msg (peer, entry, post,
- reply_info);
- break;
-
- default:
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "invalid chunktype (%d) specified for sending reply",
- type);
- break;
- }
-
- if (reply_info != NULL) {
- gf_rdma_reply_info_destroy (reply_info);
- }
-out:
- return ret;
-}
-
-
-int32_t
-__gf_rdma_ioq_churn_entry (gf_rdma_peer_t *peer, gf_rdma_ioq_t *entry)
-{
- int32_t ret = 0, quota = 0;
- gf_rdma_private_t *priv = NULL;
- gf_rdma_device_t *device = NULL;
- gf_rdma_options_t *options = NULL;
- gf_rdma_post_t *post = NULL;
-
- priv = peer->trans->private;
- options = &priv->options;
- device = priv->device;
-
- quota = __gf_rdma_quota_get (peer);
- if (quota > 0) {
- post = gf_rdma_get_post (&device->sendq);
- if (post == NULL) {
- post = gf_rdma_new_post (device,
- (options->send_size + 2048),
- GF_RDMA_SEND_POST);
- }
-
- if (post == NULL) {
- ret = -1;
- goto out;
- }
-
- if (entry->is_request) {
- ret = __gf_rdma_ioq_churn_request (peer, entry, post);
- } else {
- ret = __gf_rdma_ioq_churn_reply (peer, entry, post);
- }
-
- if (ret != 0) {
- __gf_rdma_ioq_entry_free (entry);
- }
- } else {
- ret = 0;
- }
-
-out:
- return ret;
-}
-
-
-static int32_t
-__gf_rdma_ioq_churn (gf_rdma_peer_t *peer)
-{
- gf_rdma_ioq_t *entry = NULL;
- int32_t ret = 0;
-
- while (!list_empty (&peer->ioq))
- {
- /* pick next entry */
- entry = peer->ioq_next;
-
- ret = __gf_rdma_ioq_churn_entry (peer, entry);
-
- if (ret <= 0)
- break;
- }
-
- /*
- list_for_each_entry_safe (entry, dummy, &peer->ioq, list) {
- ret = __gf_rdma_ioq_churn_entry (peer, entry);
- if (ret <= 0) {
- break;
- }
- }
- */
-
- return ret;
-}
-
-
-static int32_t
-gf_rdma_writev (rpc_transport_t *this, gf_rdma_ioq_t *entry)
-{
- int32_t ret = 0, need_append = 1;
- gf_rdma_private_t *priv = NULL;
- gf_rdma_peer_t *peer = NULL;
-
- priv = this->private;
- pthread_mutex_lock (&priv->write_mutex);
- {
- if (!priv->connected) {
- gf_log (this->name, GF_LOG_DEBUG,
- "rdma is not connected to post a "
- "send request");
- ret = -1;
- goto unlock;
- }
-
- peer = &priv->peer;
- if (list_empty (&peer->ioq)) {
- ret = __gf_rdma_ioq_churn_entry (peer, entry);
- if (ret != 0) {
- need_append = 0;
- }
- }
-
- if (need_append) {
- list_add_tail (&entry->list, &peer->ioq);
- }
- }
-unlock:
- pthread_mutex_unlock (&priv->write_mutex);
- return ret;
-}
-
-
-gf_rdma_ioq_t *
-gf_rdma_ioq_new (rpc_transport_t *this, rpc_transport_data_t *data)
-{
- gf_rdma_ioq_t *entry = NULL;
- int count = 0, i = 0;
- rpc_transport_msg_t *msg = NULL;
- gf_rdma_private_t *priv = NULL;
-
- if ((data == NULL) || (this == NULL)) {
- goto out;
- }
-
- priv = this->private;
-
- entry = mem_get (priv->device->ioq_pool);
- if (entry == NULL) {
- goto out;
- }
- memset (entry, 0, sizeof (*entry));
- entry->pool = priv->device->ioq_pool;
-
- if (data->is_request) {
- msg = &data->data.req.msg;
- if (data->data.req.rsp.rsphdr_count != 0) {
- for (i = 0; i < data->data.req.rsp.rsphdr_count; i++) {
- entry->msg.request.rsphdr_vec[i]
- = data->data.req.rsp.rsphdr[i];
- }
-
- entry->msg.request.rsphdr_count =
- data->data.req.rsp.rsphdr_count;
- }
-
- if (data->data.req.rsp.rsp_payload_count != 0) {
- for (i = 0; i < data->data.req.rsp.rsp_payload_count;
- i++) {
- entry->msg.request.rsp_payload[i]
- = data->data.req.rsp.rsp_payload[i];
- }
-
- entry->msg.request.rsp_payload_count =
- data->data.req.rsp.rsp_payload_count;
- }
-
- entry->msg.request.rpc_req = data->data.req.rpc_req;
-
- if (data->data.req.rsp.rsp_iobref != NULL) {
- entry->msg.request.rsp_iobref
- = iobref_ref (data->data.req.rsp.rsp_iobref);
- }
- } else {
- msg = &data->data.reply.msg;
- entry->msg.reply_info = data->data.reply.private;
- }
-
- entry->is_request = data->is_request;
-
- count = msg->rpchdrcount + msg->proghdrcount + msg->progpayloadcount;
-
- GF_ASSERT (count <= MAX_IOVEC);
-
- if (msg->rpchdr != NULL) {
- memcpy (&entry->rpchdr[0], msg->rpchdr,
- sizeof (struct iovec) * msg->rpchdrcount);
- entry->rpchdr_count = msg->rpchdrcount;
- }
-
- if (msg->proghdr != NULL) {
- memcpy (&entry->proghdr[0], msg->proghdr,
- sizeof (struct iovec) * msg->proghdrcount);
- entry->proghdr_count = msg->proghdrcount;
- }
-
- if (msg->progpayload != NULL) {
- memcpy (&entry->prog_payload[0], msg->progpayload,
- sizeof (struct iovec) * msg->progpayloadcount);
- entry->prog_payload_count = msg->progpayloadcount;
- }
-
- if (msg->iobref != NULL) {
- entry->iobref = iobref_ref (msg->iobref);
- }
-
- INIT_LIST_HEAD (&entry->list);
-
-out:
- return entry;
-}
-
-
-int32_t
-gf_rdma_submit_request (rpc_transport_t *this, rpc_transport_req_t *req)
-{
- int32_t ret = 0;
- gf_rdma_ioq_t *entry = NULL;
- rpc_transport_data_t data = {0, };
-
- if (req == NULL) {
- goto out;
- }
-
- data.is_request = 1;
- data.data.req = *req;
-
- entry = gf_rdma_ioq_new (this, &data);
- if (entry == NULL) {
- goto out;
- }
-
- ret = gf_rdma_writev (this, entry);
-
- if (ret > 0) {
- ret = 0;
- } else if (ret < 0) {
- rpc_transport_disconnect (this);
- }
-
-out:
- return ret;
-}
-
-int32_t
-gf_rdma_submit_reply (rpc_transport_t *this, rpc_transport_reply_t *reply)
-{
- int32_t ret = 0;
- gf_rdma_ioq_t *entry = NULL;
- rpc_transport_data_t data = {0, };
-
- if (reply == NULL) {
- goto out;
- }
-
- data.data.reply = *reply;
-
- entry = gf_rdma_ioq_new (this, &data);
- if (entry == NULL) {
- goto out;
- }
-
- ret = gf_rdma_writev (this, entry);
- if (ret > 0) {
- ret = 0;
- } else if (ret < 0) {
- rpc_transport_disconnect (this);
- }
-
-out:
- return ret;
-}
-
-#if 0
-static int
-gf_rdma_receive (rpc_transport_t *this, char **hdr_p, size_t *hdrlen_p,
- struct iobuf **iobuf_p)
-{
- gf_rdma_private_t *priv = this->private;
- /* TODO: return error if !priv->connected, check with locks */
- /* TODO: boundry checks for data_ptr/offset */
- char *copy_from = NULL;
- gf_rdma_header_t *header = NULL;
- uint32_t size1, size2, data_len = 0;
- char *hdr = NULL;
- struct iobuf *iobuf = NULL;
- int32_t ret = 0;
-
- pthread_mutex_lock (&priv->recv_mutex);
- {
-/*
- while (!priv->data_ptr)
- pthread_cond_wait (&priv->recv_cond, &priv->recv_mutex);
-*/
-
- copy_from = priv->data_ptr + priv->data_offset;
-
- priv->data_ptr = NULL;
- data_len = priv->data_len;
- pthread_cond_broadcast (&priv->recv_cond);
- }
- pthread_mutex_unlock (&priv->recv_mutex);
-
- header = (gf_rdma_header_t *)copy_from;
- if (strcmp (header->colonO, ":O")) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_DEBUG,
- "%s: corrupt header received", this->name);
- ret = -1;
- goto err;
- }
-
- size1 = ntoh32 (header->size1);
- size2 = ntoh32 (header->size2);
-
- if (data_len != (size1 + size2 + sizeof (*header))) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_DEBUG,
- "%s: sizeof data read from transport is not equal "
- "to the size specified in the header",
- this->name);
- ret = -1;
- goto err;
- }
-
- copy_from += sizeof (*header);
-
- if (size1) {
- hdr = GF_CALLOC (1, size1, gf_common_mt_char);
- if (!hdr) {
- gf_log (this->name, GF_LOG_ERROR,
- "unable to allocate header for peer %s",
- this->peerinfo.identifier);
- ret = -ENOMEM;
- goto err;
- }
- memcpy (hdr, copy_from, size1);
- copy_from += size1;
- *hdr_p = hdr;
- }
- *hdrlen_p = size1;
-
- if (size2) {
- iobuf = iobuf_get2 (this->ctx->iobuf_pool, size2);
- if (!iobuf) {
- gf_log (this->name, GF_LOG_ERROR,
- "unable to allocate IO buffer for peer %s",
- this->peerinfo.identifier);
- ret = -ENOMEM;
- goto err;
- }
- memcpy (iobuf->ptr, copy_from, size2);
- *iobuf_p = iobuf;
- }
-
-err:
- return ret;
-}
-#endif
-
-
-static void
-gf_rdma_destroy_cq (rpc_transport_t *this)
-{
- gf_rdma_private_t *priv = NULL;
- gf_rdma_device_t *device = NULL;
-
- priv = this->private;
- device = priv->device;
-
- if (device->recv_cq)
- ibv_destroy_cq (device->recv_cq);
- device->recv_cq = NULL;
-
- if (device->send_cq)
- ibv_destroy_cq (device->send_cq);
- device->send_cq = NULL;
-
- return;
-}
-
-
-static int32_t
-gf_rdma_create_cq (rpc_transport_t *this)
-{
- gf_rdma_private_t *priv = NULL;
- gf_rdma_options_t *options = NULL;
- gf_rdma_device_t *device = NULL;
- uint64_t send_cqe = 0;
- int32_t ret = 0;
- struct ibv_device_attr device_attr = {{0}, };
-
- priv = this->private;
- options = &priv->options;
- device = priv->device;
-
- device->recv_cq = ibv_create_cq (priv->device->context,
- options->recv_count * 2,
- device,
- device->recv_chan,
- 0);
- if (!device->recv_cq) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "%s: creation of CQ for device %s failed",
- this->name, device->device_name);
- ret = -1;
- goto out;
- } else if (ibv_req_notify_cq (device->recv_cq, 0)) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "%s: ibv_req_notify_cq on recv CQ of device %s failed",
- this->name, device->device_name);
- ret = -1;
- goto out;
- }
-
- do {
- ret = ibv_query_device (priv->device->context, &device_attr);
- if (ret != 0) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "%s: ibv_query_device on %s returned %d (%s)",
- this->name, priv->device->device_name, ret,
- (ret > 0) ? strerror (ret) : "");
- ret = -1;
- goto out;
- }
-
- send_cqe = options->send_count * 128;
- send_cqe = (send_cqe > device_attr.max_cqe)
- ? device_attr.max_cqe : send_cqe;
-
- /* TODO: make send_cq size dynamically adaptive */
- device->send_cq = ibv_create_cq (priv->device->context,
- send_cqe, device,
- device->send_chan, 0);
- if (!device->send_cq) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "%s: creation of send_cq for device %s failed",
- this->name, device->device_name);
- ret = -1;
- goto out;
- }
-
- if (ibv_req_notify_cq (device->send_cq, 0)) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "%s: ibv_req_notify_cq on send_cq for device %s"
- " failed", this->name, device->device_name);
- ret = -1;
- goto out;
- }
- } while (0);
-
-out:
- if (ret != 0)
- gf_rdma_destroy_cq (this);
-
- return ret;
-}
-
-
-static int
-gf_rdma_register_peer (gf_rdma_device_t *device, int32_t qp_num,
- gf_rdma_peer_t *peer)
-{
- struct _qpent *ent = NULL;
- gf_rdma_qpreg_t *qpreg = NULL;
- int32_t hash = 0;
- int ret = -1;
-
- qpreg = &device->qpreg;
- hash = qp_num % 42;
-
- pthread_mutex_lock (&qpreg->lock);
- {
- ent = qpreg->ents[hash].next;
- while ((ent != &qpreg->ents[hash]) && (ent->qp_num != qp_num)) {
- ent = ent->next;
- }
-
- if (ent->qp_num == qp_num) {
- ret = 0;
- goto unlock;
- }
-
- ent = (struct _qpent *) GF_CALLOC (1, sizeof (*ent),
- gf_common_mt_qpent);
- if (ent == NULL) {
- goto unlock;
- }
-
- /* TODO: ref reg->peer */
- ent->peer = peer;
- ent->next = &qpreg->ents[hash];
- ent->prev = ent->next->prev;
- ent->next->prev = ent;
- ent->prev->next = ent;
- ent->qp_num = qp_num;
- qpreg->count++;
- ret = 0;
- }
-unlock:
- pthread_mutex_unlock (&qpreg->lock);
-
- return ret;
-}
-
-
-static void
-gf_rdma_unregister_peer (gf_rdma_device_t *device, int32_t qp_num)
-{
- struct _qpent *ent = NULL;
- gf_rdma_qpreg_t *qpreg = NULL;
- int32_t hash = 0;
-
- qpreg = &device->qpreg;
- hash = qp_num % 42;
-
- pthread_mutex_lock (&qpreg->lock);
- {
- ent = qpreg->ents[hash].next;
- while ((ent != &qpreg->ents[hash]) && (ent->qp_num != qp_num))
- ent = ent->next;
- if (ent->qp_num != qp_num) {
- pthread_mutex_unlock (&qpreg->lock);
- return;
- }
- ent->prev->next = ent->next;
- ent->next->prev = ent->prev;
- /* TODO: unref reg->peer */
- GF_FREE (ent);
- qpreg->count--;
- }
- pthread_mutex_unlock (&qpreg->lock);
-}
-
-
-static gf_rdma_peer_t *
-__gf_rdma_lookup_peer (gf_rdma_device_t *device, int32_t qp_num)
-{
- struct _qpent *ent = NULL;
- gf_rdma_peer_t *peer = NULL;
- gf_rdma_qpreg_t *qpreg = NULL;
- int32_t hash = 0;
-
- qpreg = &device->qpreg;
- hash = qp_num % 42;
- ent = qpreg->ents[hash].next;
- while ((ent != &qpreg->ents[hash]) && (ent->qp_num != qp_num))
- ent = ent->next;
-
- if (ent != &qpreg->ents[hash]) {
- peer = ent->peer;
- }
-
- return peer;
-}
-
-/*
- static gf_rdma_peer_t *
- gf_rdma_lookup_peer (gf_rdma_device_t *device,
- int32_t qp_num)
- {
- gf_rdma_qpreg_t *qpreg = NULL;
- gf_rdma_peer_t *peer = NULL;
-
- qpreg = &device->qpreg;
- pthread_mutex_lock (&qpreg->lock);
- {
- peer = __gf_rdma_lookup_peer (device, qp_num);
- }
- pthread_mutex_unlock (&qpreg->lock);
-
- return peer;
- }
-*/
-
-
-static void
-__gf_rdma_destroy_qp (rpc_transport_t *this)
-{
- gf_rdma_private_t *priv = NULL;
-
- priv = this->private;
- if (priv->peer.qp) {
- gf_rdma_unregister_peer (priv->device, priv->peer.qp->qp_num);
- ibv_destroy_qp (priv->peer.qp);
- }
- priv->peer.qp = NULL;
-
- return;
-}
-
-
-static int32_t
-gf_rdma_create_qp (rpc_transport_t *this)
-{
- gf_rdma_private_t *priv = NULL;
- gf_rdma_options_t *options = NULL;
- gf_rdma_device_t *device = NULL;
- int32_t ret = 0;
- gf_rdma_peer_t *peer = NULL;
-
- priv = this->private;
- options = &priv->options;
- device = priv->device;
-
- peer = &priv->peer;
-
- struct ibv_qp_init_attr init_attr = {
- .send_cq = device->send_cq,
- .recv_cq = device->recv_cq,
- .srq = device->srq,
- .cap = {
- .max_send_wr = peer->send_count,
- .max_recv_wr = peer->recv_count,
- .max_send_sge = 2,
- .max_recv_sge = 1
- },
- .qp_type = IBV_QPT_RC
- };
-
- struct ibv_qp_attr attr = {
- .qp_state = IBV_QPS_INIT,
- .pkey_index = 0,
- .port_num = options->port,
- .qp_access_flags
- = IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE
- };
-
- peer->qp = ibv_create_qp (device->pd, &init_attr);
- if (!peer->qp) {
- gf_log (GF_RDMA_LOG_NAME,
- GF_LOG_CRITICAL,
- "%s: could not create QP",
- this->name);
- ret = -1;
- goto out;
- } else if (ibv_modify_qp (peer->qp, &attr,
- IBV_QP_STATE |
- IBV_QP_PKEY_INDEX |
- IBV_QP_PORT |
- IBV_QP_ACCESS_FLAGS)) {
- gf_log (GF_RDMA_LOG_NAME,
- GF_LOG_ERROR,
- "%s: failed to modify QP to INIT state",
- this->name);
- ret = -1;
- goto out;
- }
-
- peer->local_lid = gf_rdma_get_local_lid (device->context,
- options->port);
- peer->local_qpn = peer->qp->qp_num;
- peer->local_psn = lrand48 () & 0xffffff;
-
- ret = gf_rdma_register_peer (device, peer->qp->qp_num, peer);
-
-out:
- if (ret == -1)
- __gf_rdma_destroy_qp (this);
-
- return ret;
-}
-
-
-static void
-gf_rdma_destroy_posts (rpc_transport_t *this)
-{
-
-}
-
-
-static int32_t
-__gf_rdma_create_posts (rpc_transport_t *this, int32_t count, int32_t size,
- gf_rdma_queue_t *q, gf_rdma_post_type_t type)
-{
- int32_t i = 0;
- int32_t ret = 0;
- gf_rdma_private_t *priv = NULL;
- gf_rdma_device_t *device = NULL;
-
- priv = this->private;
- device = priv->device;
-
- for (i=0 ; i<count ; i++) {
- gf_rdma_post_t *post = NULL;
-
- post = gf_rdma_new_post (device, size + 2048, type);
- if (!post) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "%s: post creation failed",
- this->name);
- ret = -1;
- break;
- }
-
- gf_rdma_put_post (q, post);
- }
- return ret;
-}
-
-
-static int32_t
-gf_rdma_create_posts (rpc_transport_t *this)
-{
- int32_t i = 0, ret = 0;
- gf_rdma_post_t *post = NULL;
- gf_rdma_private_t *priv = NULL;
- gf_rdma_options_t *options = NULL;
- gf_rdma_device_t *device = NULL;
-
- priv = this->private;
- options = &priv->options;
- device = priv->device;
-
- ret = __gf_rdma_create_posts (this, options->send_count,
- options->send_size,
- &device->sendq, GF_RDMA_SEND_POST);
- if (!ret)
- ret = __gf_rdma_create_posts (this, options->recv_count,
- options->recv_size,
- &device->recvq,
- GF_RDMA_RECV_POST);
-
- if (!ret) {
- for (i=0 ; i<options->recv_count ; i++) {
- post = gf_rdma_get_post (&device->recvq);
- if (gf_rdma_post_recv (device->srq, post) != 0) {
- ret = -1;
- break;
- }
- }
- }
-
- if (ret)
- gf_rdma_destroy_posts (this);
-
- return ret;
-}
-
-
-static int32_t
-gf_rdma_connect_qp (rpc_transport_t *this)
-{
- gf_rdma_private_t *priv = this->private;
- gf_rdma_options_t *options = &priv->options;
- struct ibv_qp_attr attr = {
- .qp_state = IBV_QPS_RTR,
- .path_mtu = options->mtu,
- .dest_qp_num = priv->peer.remote_qpn,
- .rq_psn = priv->peer.remote_psn,
- .max_dest_rd_atomic = 1,
- .min_rnr_timer = 12,
- .qp_access_flags
- = IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE,
- .ah_attr = {
- .is_global = 0,
- .dlid = priv->peer.remote_lid,
- .sl = 0,
- .src_path_bits = 0,
- .port_num = options->port
- }
- };
- if (ibv_modify_qp (priv->peer.qp, &attr,
- IBV_QP_STATE |
- IBV_QP_AV |
- IBV_QP_PATH_MTU |
- IBV_QP_DEST_QPN |
- IBV_QP_RQ_PSN |
- IBV_QP_MAX_DEST_RD_ATOMIC |
- IBV_QP_MIN_RNR_TIMER)) {
- gf_log (GF_RDMA_LOG_NAME,
- GF_LOG_CRITICAL,
- "Failed to modify QP to RTR\n");
- return -1;
- }
-
- attr.qp_state = IBV_QPS_RTS;
- attr.timeout = options->attr_timeout;
- attr.retry_cnt = options->attr_retry_cnt;
- attr.rnr_retry = options->attr_rnr_retry;
- attr.sq_psn = priv->peer.local_psn;
- attr.max_rd_atomic = 1;
- if (ibv_modify_qp (priv->peer.qp, &attr,
- IBV_QP_STATE |
- IBV_QP_TIMEOUT |
- IBV_QP_RETRY_CNT |
- IBV_QP_RNR_RETRY |
- IBV_QP_SQ_PSN |
- IBV_QP_MAX_QP_RD_ATOMIC)) {
- gf_log (GF_RDMA_LOG_NAME,
- GF_LOG_CRITICAL,
- "Failed to modify QP to RTS\n");
- return -1;
- }
-
- return 0;
-}
-
-static int32_t
-__gf_rdma_teardown (rpc_transport_t *this)
-{
- gf_rdma_private_t *priv = NULL;
-
- priv = this->private;
- __gf_rdma_destroy_qp (this);
-
- if (!list_empty (&priv->peer.ioq)) {
- __gf_rdma_ioq_flush (&priv->peer);
- }
-
- /* TODO: decrement cq size */
- return 0;
-}
-
-/*
- * return value:
- * 0 = success (completed)
- * -1 = error
- * > 0 = incomplete
- */
-
-static int
-__tcp_rwv (rpc_transport_t *this, struct iovec *vector, int count,
- struct iovec **pending_vector, int *pending_count,
- int write)
-{
- gf_rdma_private_t *priv = NULL;
- int sock = -1;
- int ret = -1;
- struct iovec *opvector = NULL;
- int opcount = 0;
- int moved = 0;
-
- priv = this->private;
- sock = priv->sock;
- opvector = vector;
- opcount = count;
-
- while (opcount)
- {
- if (write)
- {
- ret = writev (sock, opvector, opcount);
-
- if (ret == 0 || (ret == -1 && errno == EAGAIN))
- {
- /* done for now */
- break;
- }
- }
- else
- {
- ret = readv (sock, opvector, opcount);
-
- if (ret == -1 && errno == EAGAIN)
- {
- /* done for now */
- break;
- }
- }
-
- if (ret == 0)
- {
- gf_log (this->name, GF_LOG_DEBUG,
- "EOF from peer %s", this->peerinfo.identifier);
- opcount = -1;
- errno = ENOTCONN;
- break;
- }
-
- if (ret == -1)
- {
- if (errno == EINTR)
- continue;
-
- gf_log (this->name, GF_LOG_DEBUG,
- "%s failed (%s)", write ? "writev" : "readv",
- strerror (errno));
- if (write && !priv->connected &&
- (errno == ECONNREFUSED))
- gf_log (this->name, GF_LOG_ERROR,
- "possible mismatch of 'rpc-transport-type'"
- " in protocol server and client. "
- "check volume file");
- opcount = -1;
- break;
- }
-
- moved = 0;
-
- while (moved < ret)
- {
- if ((ret - moved) >= opvector[0].iov_len)
- {
- moved += opvector[0].iov_len;
- opvector++;
- opcount--;
- }
- else
- {
- opvector[0].iov_len -= (ret - moved);
- opvector[0].iov_base += (ret - moved);
- moved += (ret - moved);
- }
- while (opcount && !opvector[0].iov_len)
- {
- opvector++;
- opcount--;
- }
- }
- }
-
- if (pending_vector)
- *pending_vector = opvector;
-
- if (pending_count)
- *pending_count = opcount;
-
- return opcount;
-}
-
-
-static int
-__tcp_readv (rpc_transport_t *this, struct iovec *vector, int count,
- struct iovec **pending_vector, int *pending_count)
-{
- int ret = -1;
-
- ret = __tcp_rwv (this, vector, count,
- pending_vector, pending_count, 0);
-
- return ret;
-}
-
-
-static int
-__tcp_writev (rpc_transport_t *this, struct iovec *vector, int count,
- struct iovec **pending_vector, int *pending_count)
-{
- int ret = -1;
- gf_rdma_private_t *priv = NULL;
-
- priv = this->private;
-
- ret = __tcp_rwv (this, vector, count, pending_vector,
- pending_count, 1);
-
- if (ret > 0) {
- /* TODO: Avoid multiple calls when socket is already
- registered for POLLOUT */
- priv->idx = event_select_on (this->ctx->event_pool,
- priv->sock, priv->idx, -1, 1);
- } else if (ret == 0) {
- priv->idx = event_select_on (this->ctx->event_pool,
- priv->sock,
- priv->idx, -1, 0);
- }
-
- return ret;
-}
-
-
-/*
- * allocates new memory to hold write-chunklist. New memory is needed since
- * write-chunklist will be used while sending reply and the post holding initial
- * write-chunklist sent from client will be put back to srq before a pollin
- * event is sent to upper layers.
- */
-int32_t
-gf_rdma_get_write_chunklist (char **ptr, gf_rdma_write_array_t **write_ary)
-{
- gf_rdma_write_array_t *from = NULL, *to = NULL;
- int32_t ret = -1, size = 0, i = 0;
-
- from = (gf_rdma_write_array_t *) *ptr;
- if (from->wc_discrim == 0) {
- ret = 0;
- goto out;
- }
-
- from->wc_nchunks = ntoh32 (from->wc_nchunks);
-
- size = sizeof (*from)
- + (sizeof (gf_rdma_write_chunk_t) * from->wc_nchunks);
-
- to = GF_CALLOC (1, size, gf_common_mt_char);
- if (to == NULL) {
- ret = -1;
- goto out;
- }
-
- to->wc_discrim = ntoh32 (from->wc_discrim);
- to->wc_nchunks = from->wc_nchunks;
-
- for (i = 0; i < to->wc_nchunks; i++) {
- to->wc_array[i].wc_target.rs_handle
- = ntoh32 (from->wc_array[i].wc_target.rs_handle);
- to->wc_array[i].wc_target.rs_length
- = ntoh32 (from->wc_array[i].wc_target.rs_length);
- to->wc_array[i].wc_target.rs_offset
- = ntoh64 (from->wc_array[i].wc_target.rs_offset);
- }
-
- *write_ary = to;
- ret = 0;
- *ptr = (char *)&from->wc_array[i].wc_target.rs_handle;
-out:
- return ret;
-}
-
-
-/*
- * does not allocate new memory to hold read-chunklist. New memory is not
- * needed, since post is not put back to srq till we've completed all the
- * rdma-reads and hence readchunk-list can point to memory held by post.
- */
-int32_t
-gf_rdma_get_read_chunklist (char **ptr, gf_rdma_read_chunk_t **readch)
-{
- int32_t ret = -1;
- gf_rdma_read_chunk_t *chunk = NULL;
- int i = 0;
-
- chunk = (gf_rdma_read_chunk_t *)*ptr;
- if (chunk[0].rc_discrim == 0) {
- ret = 0;
- goto out;
- }
-
- for (i = 0; chunk[i].rc_discrim != 0; i++) {
- chunk[i].rc_discrim = ntoh32 (chunk[i].rc_discrim);
- chunk[i].rc_position = ntoh32 (chunk[i].rc_position);
- chunk[i].rc_target.rs_handle
- = ntoh32 (chunk[i].rc_target.rs_handle);
- chunk[i].rc_target.rs_length
- = ntoh32 (chunk[i].rc_target.rs_length);
- chunk[i].rc_target.rs_offset
- = ntoh64 (chunk[i].rc_target.rs_offset);
- }
-
- *readch = &chunk[0];
- ret = 0;
- *ptr = (char *)&chunk[i].rc_discrim;
-out:
- return ret;
-}
-
-
-inline int32_t
-gf_rdma_decode_error_msg (gf_rdma_peer_t *peer, gf_rdma_post_t *post,
- size_t bytes_in_post)
-{
- gf_rdma_header_t *header = NULL;
- struct iobuf *iobuf = NULL;
- struct iobref *iobref = NULL;
- int32_t ret = -1;
-
- header = (gf_rdma_header_t *)post->buf;
- header->rm_body.rm_error.rm_type
- = ntoh32 (header->rm_body.rm_error.rm_type);
- if (header->rm_body.rm_error.rm_type == ERR_VERS) {
- header->rm_body.rm_error.rm_version.gf_rdma_vers_low =
- ntoh32 (header->rm_body.rm_error.rm_version.gf_rdma_vers_low);
- header->rm_body.rm_error.rm_version.gf_rdma_vers_high =
- ntoh32 (header->rm_body.rm_error.rm_version.gf_rdma_vers_high);
- }
-
- iobuf = iobuf_get2 (peer->trans->ctx->iobuf_pool, bytes_in_post);
- if (iobuf == NULL) {
- ret = -1;
- goto out;
- }
-
- post->ctx.iobref = iobref = iobref_new ();
- if (iobref == NULL) {
- ret = -1;
- goto out;
- }
-
- iobref_add (iobref, iobuf);
- iobuf_unref (iobuf);
- /*
- * FIXME: construct an appropriate rpc-msg here, what is being sent
- * to rpc is not correct.
- */
- post->ctx.vector[0].iov_base = iobuf_ptr (iobuf);
- post->ctx.vector[0].iov_len = bytes_in_post;
-
- memcpy (post->ctx.vector[0].iov_base, (char *)post->buf,
- post->ctx.vector[0].iov_len);
- post->ctx.count = 1;
-
- iobuf = NULL;
- iobref = NULL;
-
-out:
- if (ret == -1) {
- if (iobuf != NULL) {
- iobuf_unref (iobuf);
- }
-
- if (iobref != NULL) {
- iobref_unref (iobref);
- }
- }
-
- return 0;
-}
-
-
-int32_t
-gf_rdma_decode_msg (gf_rdma_peer_t *peer, gf_rdma_post_t *post,
- gf_rdma_read_chunk_t **readch, size_t bytes_in_post)
-{
- int32_t ret = -1;
- gf_rdma_header_t *header = NULL;
- gf_rdma_reply_info_t *reply_info = NULL;
- char *ptr = NULL;
- gf_rdma_write_array_t *write_ary = NULL;
- size_t header_len = 0;
-
- header = (gf_rdma_header_t *)post->buf;
-
- ptr = (char *)&header->rm_body.rm_chunks[0];
-
- ret = gf_rdma_get_read_chunklist (&ptr, readch);
- if (ret == -1) {
- goto out;
- }
-
- /* skip terminator of read-chunklist */
- ptr = ptr + sizeof (uint32_t);
-
- ret = gf_rdma_get_write_chunklist (&ptr, &write_ary);
- if (ret == -1) {
- goto out;
- }
-
- /* skip terminator of write-chunklist */
- ptr = ptr + sizeof (uint32_t);
-
- if (write_ary != NULL) {
- reply_info = gf_rdma_reply_info_alloc (peer);
- if (reply_info == NULL) {
- ret = -1;
- goto out;
- }
-
- reply_info->type = gf_rdma_writech;
- reply_info->wc_array = write_ary;
- reply_info->rm_xid = header->rm_xid;
- } else {
- ret = gf_rdma_get_write_chunklist (&ptr, &write_ary);
- if (ret == -1) {
- goto out;
- }
-
- if (write_ary != NULL) {
- reply_info = gf_rdma_reply_info_alloc (peer);
- if (reply_info == NULL) {
- ret = -1;
- goto out;
- }
-
- reply_info->type = gf_rdma_replych;
- reply_info->wc_array = write_ary;
- reply_info->rm_xid = header->rm_xid;
- }
- }
-
- /* skip terminator of reply chunk */
- ptr = ptr + sizeof (uint32_t);
- if (header->rm_type != GF_RDMA_NOMSG) {
- header_len = (long)ptr - (long)post->buf;
- post->ctx.vector[0].iov_len = (bytes_in_post - header_len);
-
- post->ctx.hdr_iobuf = iobuf_get2 (peer->trans->ctx->iobuf_pool,
- (bytes_in_post - header_len));
- if (post->ctx.hdr_iobuf == NULL) {
- ret = -1;
- goto out;
- }
-
- post->ctx.vector[0].iov_base = iobuf_ptr (post->ctx.hdr_iobuf);
- memcpy (post->ctx.vector[0].iov_base, ptr,
- post->ctx.vector[0].iov_len);
- post->ctx.count = 1;
- }
-
- post->ctx.reply_info = reply_info;
-out:
- if (ret == -1) {
- if (*readch != NULL) {
- GF_FREE (*readch);
- *readch = NULL;
- }
-
- if (write_ary != NULL) {
- GF_FREE (write_ary);
- }
- }
-
- return ret;
-}
-
-
-/* Assumes only one of either write-chunklist or a reply chunk is present */
-int32_t
-gf_rdma_decode_header (gf_rdma_peer_t *peer, gf_rdma_post_t *post,
- gf_rdma_read_chunk_t **readch, size_t bytes_in_post)
-{
- int32_t ret = -1;
- gf_rdma_header_t *header = NULL;
-
- header = (gf_rdma_header_t *)post->buf;
-
- header->rm_xid = ntoh32 (header->rm_xid);
- header->rm_vers = ntoh32 (header->rm_vers);
- header->rm_credit = ntoh32 (header->rm_credit);
- header->rm_type = ntoh32 (header->rm_type);
-
- switch (header->rm_type) {
- case GF_RDMA_MSG:
- case GF_RDMA_NOMSG:
- ret = gf_rdma_decode_msg (peer, post, readch, bytes_in_post);
- break;
-
- case GF_RDMA_MSGP:
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "rdma msg of msg-type GF_RDMA_MSGP should not have "
- "been received");
- ret = -1;
- break;
-
- case GF_RDMA_DONE:
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "rdma msg of msg-type GF_RDMA_DONE should not have "
- "been received");
- ret = -1;
- break;
-
- case GF_RDMA_ERROR:
- /* ret = gf_rdma_decode_error_msg (peer, post, bytes_in_post); */
- break;
-
- default:
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_DEBUG,
- "unknown rdma msg-type (%d)", header->rm_type);
- }
-
- return ret;
-}
-
-
-int32_t
-__gf_rdma_read (gf_rdma_peer_t *peer, gf_rdma_post_t *post, struct iovec *to,
- gf_rdma_read_chunk_t *readch)
-{
- int32_t ret = -1;
- struct ibv_sge list = {0, };
- struct ibv_send_wr wr = {0, }, *bad_wr = NULL;
-
- ret = __gf_rdma_register_local_mr_for_rdma (peer, to, 1, &post->ctx);
- if (ret == -1) {
- goto out;
- }
-
- list.addr = (unsigned long) to->iov_base;
- list.length = to->iov_len;
- list.lkey = post->ctx.mr[post->ctx.mr_count - 1]->lkey;
-
- wr.wr_id = (unsigned long) gf_rdma_post_ref (post);
- wr.sg_list = &list;
- wr.num_sge = 1;
- wr.opcode = IBV_WR_RDMA_READ;
- wr.send_flags = IBV_SEND_SIGNALED;
- wr.wr.rdma.remote_addr = readch->rc_target.rs_offset;
- wr.wr.rdma.rkey = readch->rc_target.rs_handle;
-
- ret = ibv_post_send (peer->qp, &wr, &bad_wr);
- if (ret) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_DEBUG, "rdma read from client "
- "(%s) failed with ret = %d (%s)",
- peer->trans->peerinfo.identifier,
- ret, (ret > 0) ? strerror (ret) : "");
- ret = -1;
- gf_rdma_post_unref (post);
- }
-out:
- return ret;
-}
-
-
-int32_t
-gf_rdma_do_reads (gf_rdma_peer_t *peer, gf_rdma_post_t *post,
- gf_rdma_read_chunk_t *readch)
-{
- int32_t ret = -1, i = 0, count = 0;
- size_t size = 0;
- char *ptr = NULL;
- struct iobuf *iobuf = NULL;
- gf_rdma_private_t *priv = NULL;
-
- priv = peer->trans->private;
-
- for (i = 0; readch[i].rc_discrim != 0; i++) {
- size += readch[i].rc_target.rs_length;
- }
-
- if (i == 0) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_DEBUG,
- "message type specified as rdma-read but there are no "
- "rdma read-chunks present");
- goto out;
- }
-
- post->ctx.gf_rdma_reads = i;
-
- iobuf = iobuf_get2 (peer->trans->ctx->iobuf_pool, size);
- if (iobuf == NULL) {
- goto out;
- }
-
- if (post->ctx.iobref == NULL) {
- post->ctx.iobref = iobref_new ();
- if (post->ctx.iobref == NULL) {
- iobuf_unref (iobuf);
- goto out;
- }
- }
-
- iobref_add (post->ctx.iobref, iobuf);
- iobuf_unref (iobuf);
-
- ptr = iobuf_ptr (iobuf);
- iobuf = NULL;
-
- pthread_mutex_lock (&priv->write_mutex);
- {
- if (!priv->connected) {
- goto unlock;
- }
-
- for (i = 0; readch[i].rc_discrim != 0; i++) {
- count = post->ctx.count++;
- post->ctx.vector[count].iov_base = ptr;
- post->ctx.vector[count].iov_len
- = readch[i].rc_target.rs_length;
-
- ret = __gf_rdma_read (peer, post,
- &post->ctx.vector[count],
- &readch[i]);
- if (ret == -1) {
- goto unlock;
- }
-
- ptr += readch[i].rc_target.rs_length;
- }
-
- ret = 0;
- }
-unlock:
- pthread_mutex_unlock (&priv->write_mutex);
-out:
-
- if (ret == -1) {
- if (iobuf != NULL) {
- iobuf_unref (iobuf);
- }
- }
-
- return ret;
-}
-
-
-int32_t
-gf_rdma_pollin_notify (gf_rdma_peer_t *peer, gf_rdma_post_t *post)
-{
- int32_t ret = -1;
- enum msg_type msg_type = 0;
- struct rpc_req *rpc_req = NULL;
- gf_rdma_request_context_t *request_context = NULL;
- rpc_request_info_t request_info = {0, };
- gf_rdma_private_t *priv = NULL;
- uint32_t *ptr = NULL;
- rpc_transport_pollin_t *pollin = NULL;
-
- if ((peer == NULL) || (post == NULL)) {
- goto out;
- }
-
- if (post->ctx.iobref == NULL) {
- post->ctx.iobref = iobref_new ();
- if (post->ctx.iobref == NULL) {
- goto out;
- }
-
- /* handling the case where both hdr and payload of
- * GF_FOP_READ_CBK were received in a single iobuf
- * because of server sending entire msg as inline without
- * doing rdma writes.
- */
- if (post->ctx.hdr_iobuf)
- iobref_add (post->ctx.iobref, post->ctx.hdr_iobuf);
- }
-
- pollin = rpc_transport_pollin_alloc (peer->trans,
- post->ctx.vector,
- post->ctx.count,
- post->ctx.hdr_iobuf,
- post->ctx.iobref,
- post->ctx.reply_info);
- if (pollin == NULL) {
- goto out;
- }
-
- ptr = (uint32_t *)pollin->vector[0].iov_base;
-
- request_info.xid = ntoh32 (*ptr);
- msg_type = ntoh32 (*(ptr + 1));
-
- if (msg_type == REPLY) {
- ret = rpc_transport_notify (peer->trans,
- RPC_TRANSPORT_MAP_XID_REQUEST,
- &request_info);
- if (ret == -1) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_DEBUG,
- "cannot get request information from rpc "
- "layer");
- goto out;
- }
-
- rpc_req = request_info.rpc_req;
- if (rpc_req == NULL) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_DEBUG,
- "rpc request structure not found");
- ret = -1;
- goto out;
- }
-
- request_context = rpc_req->conn_private;
- rpc_req->conn_private = NULL;
-
- priv = peer->trans->private;
- if (request_context != NULL) {
- pthread_mutex_lock (&priv->write_mutex);
- {
- __gf_rdma_request_context_destroy (request_context);
- }
- pthread_mutex_unlock (&priv->write_mutex);
- } else {
- gf_rdma_quota_put (peer);
- }
-
- pollin->is_reply = 1;
- }
-
- ret = rpc_transport_notify (peer->trans, RPC_TRANSPORT_MSG_RECEIVED,
- pollin);
-
-out:
- if (pollin != NULL) {
- pollin->private = NULL;
- rpc_transport_pollin_destroy (pollin);
- }
-
- return ret;
-}
-
-
-int32_t
-gf_rdma_recv_reply (gf_rdma_peer_t *peer, gf_rdma_post_t *post)
-{
- int32_t ret = -1;
- gf_rdma_header_t *header = NULL;
- gf_rdma_reply_info_t *reply_info = NULL;
- gf_rdma_write_array_t *wc_array = NULL;
- int i = 0;
- uint32_t *ptr = NULL;
- gf_rdma_request_context_t *ctx = NULL;
- rpc_request_info_t request_info = {0, };
- struct rpc_req *rpc_req = NULL;
-
- header = (gf_rdma_header_t *)post->buf;
- reply_info = post->ctx.reply_info;
-
- /* no write chunklist, just notify upper layers */
- if (reply_info == NULL) {
- ret = 0;
- goto out;
- }
-
- wc_array = reply_info->wc_array;
-
- if (header->rm_type == GF_RDMA_NOMSG) {
- post->ctx.vector[0].iov_base
- = (void *)(long)wc_array->wc_array[0].wc_target.rs_offset;
- post->ctx.vector[0].iov_len
- = wc_array->wc_array[0].wc_target.rs_length;
-
- post->ctx.count = 1;
- } else {
- for (i = 0; i < wc_array->wc_nchunks; i++) {
- post->ctx.vector[i + 1].iov_base
- = (void *)(long)wc_array->wc_array[i].wc_target.rs_offset;
- post->ctx.vector[i + 1].iov_len
- = wc_array->wc_array[i].wc_target.rs_length;
- }
-
- post->ctx.count += wc_array->wc_nchunks;
- }
-
- ptr = (uint32_t *)post->ctx.vector[0].iov_base;
- request_info.xid = ntoh32 (*ptr);
-
- ret = rpc_transport_notify (peer->trans,
- RPC_TRANSPORT_MAP_XID_REQUEST,
- &request_info);
- if (ret == -1) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_DEBUG,
- "cannot get request information from rpc "
- "layer");
- goto out;
- }
-
- rpc_req = request_info.rpc_req;
- if (rpc_req == NULL) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_DEBUG,
- "rpc request structure not found");
- ret = -1;
- goto out;
- }
-
- ctx = rpc_req->conn_private;
- if ((post->ctx.iobref == NULL) && ctx->rsp_iobref) {
- post->ctx.iobref = iobref_ref (ctx->rsp_iobref);
- }
-
- ret = 0;
-
- gf_rdma_reply_info_destroy (reply_info);
-
-out:
- if (ret == 0) {
- ret = gf_rdma_pollin_notify (peer, post);
- }
-
- return ret;
-}
-
-
-inline int32_t
-gf_rdma_recv_request (gf_rdma_peer_t *peer, gf_rdma_post_t *post,
- gf_rdma_read_chunk_t *readch)
-{
- int32_t ret = -1;
-
- if (readch != NULL) {
- ret = gf_rdma_do_reads (peer, post, readch);
- } else {
- ret = gf_rdma_pollin_notify (peer, post);
- if (ret == -1) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_DEBUG,
- "pollin notification failed");
- }
- }
-
- return ret;
-}
-
-void
-gf_rdma_process_recv (gf_rdma_peer_t *peer, struct ibv_wc *wc)
-{
- gf_rdma_post_t *post = NULL;
- gf_rdma_read_chunk_t *readch = NULL;
- int ret = -1;
- uint32_t *ptr = NULL;
- enum msg_type msg_type = 0;
- gf_rdma_header_t *header = NULL;
-
- post = (gf_rdma_post_t *) (long) wc->wr_id;
- if (post == NULL) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_DEBUG,
- "no post found in successful work completion element");
- goto out;
- }
-
- ret = gf_rdma_decode_header (peer, post, &readch, wc->byte_len);
- if (ret == -1) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_DEBUG,
- "decoding of header failed");
- goto out;
- }
-
- header = (gf_rdma_header_t *)post->buf;
-
- switch (header->rm_type) {
- case GF_RDMA_MSG:
- ptr = (uint32_t *)post->ctx.vector[0].iov_base;
- msg_type = ntoh32 (*(ptr + 1));
- break;
-
- case GF_RDMA_NOMSG:
- if (readch != NULL) {
- msg_type = CALL;
- } else {
- msg_type = REPLY;
- }
- break;
-
- case GF_RDMA_ERROR:
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "an error has happened while transmission of msg, "
- "disconnecting the transport");
- rpc_transport_disconnect (peer->trans);
- goto out;
-
-/* ret = gf_rdma_pollin_notify (peer, post);
- if (ret == -1) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_DEBUG,
- "pollin notification failed");
- }
- goto out;
-*/
-
- default:
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_DEBUG,
- "invalid rdma msg-type (%d)", header->rm_type);
- break;
- }
-
- if (msg_type == CALL) {
- ret = gf_rdma_recv_request (peer, post, readch);
- } else {
- ret = gf_rdma_recv_reply (peer, post);
- }
-
-out:
- if (ret == -1) {
- rpc_transport_disconnect (peer->trans);
- }
-
- return;
-}
-
-
-static void *
-gf_rdma_recv_completion_proc (void *data)
-{
- struct ibv_comp_channel *chan = NULL;
- gf_rdma_device_t *device = NULL;;
- gf_rdma_post_t *post = NULL;
- gf_rdma_peer_t *peer = NULL;
- struct ibv_cq *event_cq = NULL;
- struct ibv_wc wc = {0, };
- void *event_ctx = NULL;
- int32_t ret = 0;
-
- chan = data;
-
- while (1) {
- ret = ibv_get_cq_event (chan, &event_cq, &event_ctx);
- if (ret) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "ibv_get_cq_event failed, terminating recv "
- "thread %d (%d)", ret, errno);
- continue;
- }
-
- device = event_ctx;
-
- ret = ibv_req_notify_cq (event_cq, 0);
- if (ret) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "ibv_req_notify_cq on %s failed, terminating "
- "recv thread: %d (%d)",
- device->device_name, ret, errno);
- continue;
- }
-
- device = (gf_rdma_device_t *) event_ctx;
-
- while ((ret = ibv_poll_cq (event_cq, 1, &wc)) > 0) {
- post = (gf_rdma_post_t *) (long) wc.wr_id;
-
- pthread_mutex_lock (&device->qpreg.lock);
- {
- peer = __gf_rdma_lookup_peer (device,
- wc.qp_num);
-
- /*
- * keep a refcount on transport so that it
- * does not get freed because of some error
- * indicated by wc.status till we are done
- * with usage of peer and thereby that of trans.
- */
- if (peer != NULL) {
- rpc_transport_ref (peer->trans);
- }
- }
- pthread_mutex_unlock (&device->qpreg.lock);
-
- if (wc.status != IBV_WC_SUCCESS) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "recv work request on `%s' returned "
- "error (%d)", device->device_name,
- wc.status);
- if (peer) {
- rpc_transport_unref (peer->trans);
- rpc_transport_disconnect (peer->trans);
- }
-
- if (post) {
- gf_rdma_post_unref (post);
- }
- continue;
- }
-
- if (peer) {
- gf_rdma_process_recv (peer, &wc);
- rpc_transport_unref (peer->trans);
- } else {
- gf_log (GF_RDMA_LOG_NAME,
- GF_LOG_DEBUG,
- "could not lookup peer for qp_num: %d",
- wc.qp_num);
- }
-
- gf_rdma_post_unref (post);
- }
-
- if (ret < 0) {
- gf_log (GF_RDMA_LOG_NAME,
- GF_LOG_ERROR,
- "ibv_poll_cq on `%s' returned error "
- "(ret = %d, errno = %d)",
- device->device_name, ret, errno);
- continue;
- }
- ibv_ack_cq_events (event_cq, 1);
- }
-
- return NULL;
-}
-
-
-void
-gf_rdma_handle_failed_send_completion (gf_rdma_peer_t *peer, struct ibv_wc *wc)
-{
- gf_rdma_post_t *post = NULL;
- gf_rdma_device_t *device = NULL;
- gf_rdma_private_t *priv = NULL;
-
- if (peer != NULL) {
- priv = peer->trans->private;
- if (priv != NULL) {
- device = priv->device;
- }
- }
-
-
- post = (gf_rdma_post_t *) (long) wc->wr_id;
-
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "send work request on `%s' returned error "
- "wc.status = %d, wc.vendor_err = %d, post->buf = %p, "
- "wc.byte_len = %d, post->reused = %d",
- (device != NULL) ? device->device_name : NULL, wc->status,
- wc->vendor_err, post->buf, wc->byte_len, post->reused);
-
- if (wc->status == IBV_WC_RETRY_EXC_ERR) {
- gf_log ("rdma", GF_LOG_ERROR, "connection between client and"
- " server not working. check by running "
- "'ibv_srq_pingpong'. also make sure subnet manager"
- " is running (eg: 'opensm'), or check if rdma port is "
- "valid (or active) by running 'ibv_devinfo'. contact "
- "Gluster Support Team if the problem persists.");
- }
-
- if (peer) {
- rpc_transport_disconnect (peer->trans);
- }
-
- return;
-}
-
-
-void
-gf_rdma_handle_successful_send_completion (gf_rdma_peer_t *peer,
- struct ibv_wc *wc)
-{
- gf_rdma_post_t *post = NULL;
- int reads = 0, ret = 0;
- gf_rdma_header_t *header = NULL;
-
- if (wc->opcode != IBV_WC_RDMA_READ) {
- goto out;
- }
-
- post = (gf_rdma_post_t *)(long) wc->wr_id;
-
- pthread_mutex_lock (&post->lock);
- {
- reads = --post->ctx.gf_rdma_reads;
- }
- pthread_mutex_unlock (&post->lock);
-
- if (reads != 0) {
- /* if it is not the last rdma read, we've got nothing to do */
- goto out;
- }
-
- header = (gf_rdma_header_t *)post->buf;
-
- if (header->rm_type == GF_RDMA_NOMSG) {
- post->ctx.count = 1;
- post->ctx.vector[0].iov_len += post->ctx.vector[1].iov_len;
- }
-
- ret = gf_rdma_pollin_notify (peer, post);
- if ((ret == -1) && (peer != NULL)) {
- rpc_transport_disconnect (peer->trans);
- }
-
-out:
- return;
-}
-
-
-static void *
-gf_rdma_send_completion_proc (void *data)
-{
- struct ibv_comp_channel *chan = NULL;
- gf_rdma_post_t *post = NULL;
- gf_rdma_peer_t *peer = NULL;
- struct ibv_cq *event_cq = NULL;
- void *event_ctx = NULL;
- gf_rdma_device_t *device = NULL;
- struct ibv_wc wc = {0, };
- char is_request = 0;
- int32_t ret = 0, quota_ret = 0;
-
- chan = data;
- while (1) {
- ret = ibv_get_cq_event (chan, &event_cq, &event_ctx);
- if (ret) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "ibv_get_cq_event on failed, terminating "
- "send thread: %d (%d)", ret, errno);
- continue;
- }
-
- device = event_ctx;
-
- ret = ibv_req_notify_cq (event_cq, 0);
- if (ret) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "ibv_req_notify_cq on %s failed, terminating "
- "send thread: %d (%d)",
- device->device_name, ret, errno);
- continue;
- }
-
- while ((ret = ibv_poll_cq (event_cq, 1, &wc)) > 0) {
- post = (gf_rdma_post_t *) (long) wc.wr_id;
-
- pthread_mutex_lock (&device->qpreg.lock);
- {
- peer = __gf_rdma_lookup_peer (device, wc.qp_num);
-
- /*
- * keep a refcount on transport so that it
- * does not get freed because of some error
- * indicated by wc.status, till we are done
- * with usage of peer and thereby that of trans.
- */
- if (peer != NULL) {
- rpc_transport_ref (peer->trans);
- }
- }
- pthread_mutex_unlock (&device->qpreg.lock);
-
- if (wc.status != IBV_WC_SUCCESS) {
- gf_rdma_handle_failed_send_completion (peer, &wc);
- } else {
- gf_rdma_handle_successful_send_completion (peer,
- &wc);
- }
-
- if (post) {
- is_request = post->ctx.is_request;
-
- ret = gf_rdma_post_unref (post);
- if ((ret == 0)
- && (wc.status == IBV_WC_SUCCESS)
- && !is_request
- && (post->type == GF_RDMA_SEND_POST)
- && (peer != NULL)) {
- /* An GF_RDMA_RECV_POST can end up in
- * gf_rdma_send_completion_proc for
- * rdma-reads, and we do not take
- * quota for getting an GF_RDMA_RECV_POST.
- */
-
- /*
- * if it is request, quota is returned
- * after reply has come.
- */
- quota_ret = gf_rdma_quota_put (peer);
- if (quota_ret < 0) {
- gf_log ("rdma", GF_LOG_DEBUG,
- "failed to send "
- "message");
- }
- }
- }
-
- if (peer) {
- rpc_transport_unref (peer->trans);
- } else {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_DEBUG,
- "could not lookup peer for qp_num: %d",
- wc.qp_num);
- }
- }
-
- if (ret < 0) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "ibv_poll_cq on `%s' returned error (ret = %d,"
- " errno = %d)",
- device->device_name, ret, errno);
- continue;
- }
-
- ibv_ack_cq_events (event_cq, 1);
- }
-
- return NULL;
-}
-
-
-static void
-gf_rdma_options_init (rpc_transport_t *this)
-{
- gf_rdma_private_t *priv = NULL;
- gf_rdma_options_t *options = NULL;
- int32_t mtu = 0;
- data_t *temp = NULL;
-
- /* TODO: validate arguments from options below */
-
- priv = this->private;
- options = &priv->options;
- options->send_size = GLUSTERFS_RDMA_INLINE_THRESHOLD;/*this->ctx->page_size * 4; 512 KB*/
- options->recv_size = GLUSTERFS_RDMA_INLINE_THRESHOLD;/*this->ctx->page_size * 4; 512 KB*/
- options->send_count = 4096;
- options->recv_count = 4096;
- options->attr_timeout = GF_RDMA_TIMEOUT;
- options->attr_retry_cnt = GF_RDMA_RETRY_CNT;
- options->attr_rnr_retry = GF_RDMA_RNR_RETRY;
-
- temp = dict_get (this->options,
- "transport.rdma.work-request-send-count");
- if (temp)
- options->send_count = data_to_int32 (temp);
-
- temp = dict_get (this->options,
- "transport.rdma.work-request-recv-count");
- if (temp)
- options->recv_count = data_to_int32 (temp);
-
- temp = dict_get (this->options, "transport.rdma.attr-timeout");
-
- if (temp)
- options->attr_timeout = data_to_uint8 (temp);
-
- temp = dict_get (this->options, "transport.rdma.attr-retry-cnt");
-
- if (temp)
- options->attr_retry_cnt = data_to_uint8 (temp);
-
- temp = dict_get (this->options, "transport.rdma.attr-rnr-retry");
-
- if (temp)
- options->attr_rnr_retry = data_to_uint8 (temp);
-
- options->port = 1;
- temp = dict_get (this->options,
- "transport.rdma.port");
- if (temp)
- options->port = data_to_uint64 (temp);
-
- options->mtu = mtu = IBV_MTU_2048;
- temp = dict_get (this->options,
- "transport.rdma.mtu");
- if (temp)
- mtu = data_to_int32 (temp);
- switch (mtu) {
- case 256: options->mtu = IBV_MTU_256;
- break;
- case 512: options->mtu = IBV_MTU_512;
- break;
- case 1024: options->mtu = IBV_MTU_1024;
- break;
- case 2048: options->mtu = IBV_MTU_2048;
- break;
- case 4096: options->mtu = IBV_MTU_4096;
- break;
- default:
- if (temp)
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "%s: unrecognized MTU value '%s', defaulting "
- "to '2048'", this->name,
- data_to_str (temp));
- else
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_TRACE,
- "%s: defaulting MTU to '2048'",
- this->name);
- options->mtu = IBV_MTU_2048;
- break;
- }
-
- temp = dict_get (this->options,
- "transport.rdma.device-name");
- if (temp)
- options->device_name = gf_strdup (temp->data);
-
- return;
-}
-
-static void
-gf_rdma_queue_init (gf_rdma_queue_t *queue)
-{
- pthread_mutex_init (&queue->lock, NULL);
-
- queue->active_posts.next = &queue->active_posts;
- queue->active_posts.prev = &queue->active_posts;
- queue->passive_posts.next = &queue->passive_posts;
- queue->passive_posts.prev = &queue->passive_posts;
-}
-
-
-static gf_rdma_device_t *
-gf_rdma_get_device (rpc_transport_t *this, struct ibv_context *ibctx)
-{
- glusterfs_ctx_t *ctx = NULL;
- gf_rdma_private_t *priv = NULL;
- gf_rdma_options_t *options = NULL;
- char *device_name = NULL;
- uint32_t port = 0;
- uint8_t active_port = 0;
- int32_t ret = 0;
- int32_t i = 0;
- gf_rdma_device_t *trav = NULL;
-
- priv = this->private;
- options = &priv->options;
- device_name = priv->options.device_name;
- ctx = this->ctx;
- trav = ctx->ib;
- port = priv->options.port;
-
- while (trav) {
- if ((!strcmp (trav->device_name, device_name)) &&
- (trav->port == port))
- break;
- trav = trav->next;
- }
-
- if (!trav) {
-
- trav = GF_CALLOC (1, sizeof (*trav),
- gf_common_mt_rdma_device_t);
- if (trav == NULL) {
- return NULL;
- }
-
- priv->device = trav;
-
- trav->context = ibctx;
-
- ret = ib_get_active_port (trav->context);
-
- if (ret < 0) {
- if (!port) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "Failed to find any active ports and "
- "none specified in volume file,"
- " exiting");
- GF_FREE (trav);
- return NULL;
- }
- }
-
- trav->request_ctx_pool = mem_pool_new (gf_rdma_request_context_t,
- GF_RDMA_POOL_SIZE);
- if (trav->request_ctx_pool == NULL) {
- return NULL;
- }
-
- trav->ioq_pool = mem_pool_new (gf_rdma_ioq_t, GF_RDMA_POOL_SIZE);
- if (trav->ioq_pool == NULL) {
- mem_pool_destroy (trav->request_ctx_pool);
- return NULL;
- }
-
- trav->reply_info_pool = mem_pool_new (gf_rdma_reply_info_t,
- GF_RDMA_POOL_SIZE);
- if (trav->reply_info_pool == NULL) {
- mem_pool_destroy (trav->request_ctx_pool);
- mem_pool_destroy (trav->ioq_pool);
- return NULL;
- }
-
-
- active_port = ret;
-
- if (port) {
- ret = ib_check_active_port (trav->context, port);
- if (ret < 0) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "On device %s: provided port:%u is "
- "found to be offline, continuing to "
- "use the same port", device_name, port);
- }
- } else {
- priv->options.port = active_port;
- port = active_port;
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_TRACE,
- "Port unspecified in volume file using active "
- "port: %u", port);
- }
-
- trav->device_name = gf_strdup (device_name);
- trav->port = port;
-
- trav->next = ctx->ib;
- ctx->ib = trav;
-
- trav->send_chan = ibv_create_comp_channel (trav->context);
- if (!trav->send_chan) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "%s: could not create send completion channel",
- device_name);
- mem_pool_destroy (trav->ioq_pool);
- mem_pool_destroy (trav->request_ctx_pool);
- mem_pool_destroy (trav->reply_info_pool);
- GF_FREE ((char *)trav->device_name);
- GF_FREE (trav);
- return NULL;
- }
-
- trav->recv_chan = ibv_create_comp_channel (trav->context);
- if (!trav->recv_chan) {
- mem_pool_destroy (trav->ioq_pool);
- mem_pool_destroy (trav->request_ctx_pool);
- mem_pool_destroy (trav->reply_info_pool);
- ibv_destroy_comp_channel (trav->send_chan);
- GF_FREE ((char *)trav->device_name);
- GF_FREE (trav);
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "could not create recv completion channel");
- /* TODO: cleanup current mess */
- return NULL;
- }
-
- if (gf_rdma_create_cq (this) < 0) {
- mem_pool_destroy (trav->ioq_pool);
- mem_pool_destroy (trav->request_ctx_pool);
- mem_pool_destroy (trav->reply_info_pool);
- ibv_destroy_comp_channel (trav->recv_chan);
- ibv_destroy_comp_channel (trav->send_chan);
- GF_FREE ((char *)trav->device_name);
- GF_FREE (trav);
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "%s: could not create CQ",
- this->name);
- return NULL;
- }
-
- /* protection domain */
- trav->pd = ibv_alloc_pd (trav->context);
-
- if (!trav->pd) {
- mem_pool_destroy (trav->ioq_pool);
- mem_pool_destroy (trav->request_ctx_pool);
- mem_pool_destroy (trav->reply_info_pool);
- gf_rdma_destroy_cq (this);
- ibv_destroy_comp_channel (trav->recv_chan);
- ibv_destroy_comp_channel (trav->send_chan);
- GF_FREE ((char *)trav->device_name);
- GF_FREE (trav);
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "%s: could not allocate protection domain",
- this->name);
- return NULL;
- }
-
- struct ibv_srq_init_attr attr = {
- .attr = {
- .max_wr = options->recv_count,
- .max_sge = 1
- }
- };
- trav->srq = ibv_create_srq (trav->pd, &attr);
-
- if (!trav->srq) {
- mem_pool_destroy (trav->ioq_pool);
- mem_pool_destroy (trav->request_ctx_pool);
- mem_pool_destroy (trav->reply_info_pool);
- ibv_dealloc_pd (trav->pd);
- gf_rdma_destroy_cq (this);
- ibv_destroy_comp_channel (trav->recv_chan);
- ibv_destroy_comp_channel (trav->send_chan);
- GF_FREE ((char *)trav->device_name);
- GF_FREE (trav);
-
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "%s: could not create SRQ",
- this->name);
- return NULL;
- }
-
- /* queue init */
- gf_rdma_queue_init (&trav->sendq);
- gf_rdma_queue_init (&trav->recvq);
-
- if (gf_rdma_create_posts (this) < 0) {
- mem_pool_destroy (trav->ioq_pool);
- mem_pool_destroy (trav->request_ctx_pool);
- mem_pool_destroy (trav->reply_info_pool);
- ibv_dealloc_pd (trav->pd);
- gf_rdma_destroy_cq (this);
- ibv_destroy_comp_channel (trav->recv_chan);
- ibv_destroy_comp_channel (trav->send_chan);
- GF_FREE ((char *)trav->device_name);
- GF_FREE (trav);
-
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "%s: could not allocate posts",
- this->name);
- return NULL;
- }
-
- /* completion threads */
- ret = pthread_create (&trav->send_thread,
- NULL,
- gf_rdma_send_completion_proc,
- trav->send_chan);
- if (ret) {
- gf_rdma_destroy_posts (this);
- mem_pool_destroy (trav->ioq_pool);
- mem_pool_destroy (trav->request_ctx_pool);
- mem_pool_destroy (trav->reply_info_pool);
- ibv_dealloc_pd (trav->pd);
- gf_rdma_destroy_cq (this);
- ibv_destroy_comp_channel (trav->recv_chan);
- ibv_destroy_comp_channel (trav->send_chan);
- GF_FREE ((char *)trav->device_name);
- GF_FREE (trav);
-
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "could not create send completion thread");
- return NULL;
- }
-
- ret = pthread_create (&trav->recv_thread,
- NULL,
- gf_rdma_recv_completion_proc,
- trav->recv_chan);
- if (ret) {
- gf_rdma_destroy_posts (this);
- mem_pool_destroy (trav->ioq_pool);
- mem_pool_destroy (trav->request_ctx_pool);
- mem_pool_destroy (trav->reply_info_pool);
- ibv_dealloc_pd (trav->pd);
- gf_rdma_destroy_cq (this);
- ibv_destroy_comp_channel (trav->recv_chan);
- ibv_destroy_comp_channel (trav->send_chan);
- GF_FREE ((char *)trav->device_name);
- GF_FREE (trav);
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "could not create recv completion thread");
- return NULL;
- }
-
- /* qpreg */
- pthread_mutex_init (&trav->qpreg.lock, NULL);
- for (i=0; i<42; i++) {
- trav->qpreg.ents[i].next = &trav->qpreg.ents[i];
- trav->qpreg.ents[i].prev = &trav->qpreg.ents[i];
- }
- }
- return trav;
-}
-
-static int32_t
-gf_rdma_init (rpc_transport_t *this)
-{
- gf_rdma_private_t *priv = NULL;
- gf_rdma_options_t *options = NULL;
- struct ibv_device **dev_list;
- struct ibv_context *ib_ctx = NULL;
- int32_t ret = 0;
-
- priv = this->private;
- options = &priv->options;
-
- ibv_fork_init ();
- gf_rdma_options_init (this);
-
- {
- dev_list = ibv_get_device_list (NULL);
-
- if (!dev_list) {
- gf_log (GF_RDMA_LOG_NAME,
- GF_LOG_CRITICAL,
- "Failed to get IB devices");
- ret = -1;
- goto cleanup;
- }
-
- if (!*dev_list) {
- gf_log (GF_RDMA_LOG_NAME,
- GF_LOG_CRITICAL,
- "No IB devices found");
- ret = -1;
- goto cleanup;
- }
-
- if (!options->device_name) {
- if (*dev_list) {
- options->device_name =
- gf_strdup (ibv_get_device_name (*dev_list));
- } else {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_CRITICAL,
- "IB device list is empty. Check for "
- "'ib_uverbs' module");
- return -1;
- goto cleanup;
- }
- }
-
- while (*dev_list) {
- if (!strcmp (ibv_get_device_name (*dev_list),
- options->device_name)) {
- ib_ctx = ibv_open_device (*dev_list);
-
- if (!ib_ctx) {
- gf_log (GF_RDMA_LOG_NAME,
- GF_LOG_ERROR,
- "Failed to get infiniband"
- "device context");
- ret = -1;
- goto cleanup;
- }
- break;
- }
- ++dev_list;
- }
-
- priv->device = gf_rdma_get_device (this, ib_ctx);
-
- if (!priv->device) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "could not create rdma device for %s",
- options->device_name);
- ret = -1;
- goto cleanup;
- }
- }
-
- priv->peer.trans = this;
- INIT_LIST_HEAD (&priv->peer.ioq);
-
- pthread_mutex_init (&priv->read_mutex, NULL);
- pthread_mutex_init (&priv->write_mutex, NULL);
- pthread_mutex_init (&priv->recv_mutex, NULL);
- pthread_cond_init (&priv->recv_cond, NULL);
-
-cleanup:
- if (-1 == ret) {
- if (ib_ctx)
- ibv_close_device (ib_ctx);
- }
-
- if (dev_list)
- ibv_free_device_list (dev_list);
-
- return ret;
-}
-
-
-static int32_t
-gf_rdma_disconnect (rpc_transport_t *this)
-{
- gf_rdma_private_t *priv = NULL;
- int32_t ret = 0;
-
- priv = this->private;
- pthread_mutex_lock (&priv->write_mutex);
- {
- ret = __gf_rdma_disconnect (this);
- }
- pthread_mutex_unlock (&priv->write_mutex);
-
- return ret;
-}
-
-
-static int32_t
-__tcp_connect_finish (int fd)
-{
- int ret = -1;
- int optval = 0;
- socklen_t optlen = sizeof (int);
-
- ret = getsockopt (fd, SOL_SOCKET, SO_ERROR,
- (void *)&optval, &optlen);
-
- if (ret == 0 && optval)
- {
- errno = optval;
- ret = -1;
- }
-
- return ret;
-}
-
-static inline void
-gf_rdma_fill_handshake_data (char *buf, struct gf_rdma_nbio *nbio,
- gf_rdma_private_t *priv)
-{
- sprintf (buf,
- "QP1:RECV_BLKSIZE=%08x:SEND_BLKSIZE=%08x\n"
- "QP1:LID=%04x:QPN=%06x:PSN=%06x\n",
- priv->peer.recv_size,
- priv->peer.send_size,
- priv->peer.local_lid,
- priv->peer.local_qpn,
- priv->peer.local_psn);
-
- nbio->vector.iov_base = buf;
- nbio->vector.iov_len = strlen (buf) + 1;
- nbio->count = 1;
- return;
-}
-
-static inline void
-gf_rdma_fill_handshake_ack (char *buf, struct gf_rdma_nbio *nbio)
-{
- sprintf (buf, "DONE\n");
- nbio->vector.iov_base = buf;
- nbio->vector.iov_len = strlen (buf) + 1;
- nbio->count = 1;
- return;
-}
-
-static int
-gf_rdma_handshake_pollin (rpc_transport_t *this)
-{
- int ret = 0;
- gf_rdma_private_t *priv = NULL;
- char *buf = NULL;
- int32_t recv_buf_size = 0, send_buf_size;
- socklen_t sock_len = 0;
-
- priv = this->private;
- buf = priv->handshake.incoming.buf;
-
- if (priv->handshake.incoming.state == GF_RDMA_HANDSHAKE_COMPLETE) {
- return -1;
- }
-
- pthread_mutex_lock (&priv->write_mutex);
- {
- while (priv->handshake.incoming.state != GF_RDMA_HANDSHAKE_COMPLETE)
- {
- switch (priv->handshake.incoming.state)
- {
- case GF_RDMA_HANDSHAKE_START:
- buf = priv->handshake.incoming.buf = GF_CALLOC (1, 256, gf_common_mt_char);
- gf_rdma_fill_handshake_data (buf, &priv->handshake.incoming, priv);
- buf[0] = 0;
- priv->handshake.incoming.state = GF_RDMA_HANDSHAKE_RECEIVING_DATA;
- break;
-
- case GF_RDMA_HANDSHAKE_RECEIVING_DATA:
- ret = __tcp_readv (this,
- &priv->handshake.incoming.vector,
- priv->handshake.incoming.count,
- &priv->handshake.incoming.pending_vector,
- &priv->handshake.incoming.pending_count);
- if (ret == -1) {
- goto unlock;
- }
-
- if (ret > 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "partial header read on NB socket. continue later");
- goto unlock;
- }
-
- if (!ret) {
- priv->handshake.incoming.state = GF_RDMA_HANDSHAKE_RECEIVED_DATA;
- }
- break;
-
- case GF_RDMA_HANDSHAKE_RECEIVED_DATA:
- ret = sscanf (buf,
- "QP1:RECV_BLKSIZE=%08x:SEND_BLKSIZE=%08x\n"
- "QP1:LID=%04x:QPN=%06x:PSN=%06x\n",
- &recv_buf_size,
- &send_buf_size,
- &priv->peer.remote_lid,
- &priv->peer.remote_qpn,
- &priv->peer.remote_psn);
-
- if ((ret != 5) && (strncmp (buf, "QP1:", 4))) {
- gf_log (GF_RDMA_LOG_NAME,
- GF_LOG_CRITICAL,
- "%s: remote-host(%s)'s "
- "transport type is different",
- this->name,
- this->peerinfo.identifier);
- ret = -1;
- goto unlock;
- }
-
- if (recv_buf_size < priv->peer.recv_size)
- priv->peer.recv_size = recv_buf_size;
- if (send_buf_size < priv->peer.send_size)
- priv->peer.send_size = send_buf_size;
-
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_TRACE,
- "%s: transacted recv_size=%d "
- "send_size=%d",
- this->name, priv->peer.recv_size,
- priv->peer.send_size);
-
- priv->peer.quota = priv->peer.send_count;
-
- if (gf_rdma_connect_qp (this)) {
- gf_log (GF_RDMA_LOG_NAME,
- GF_LOG_ERROR,
- "%s: failed to connect with "
- "remote QP", this->name);
- ret = -1;
- goto unlock;
- }
- gf_rdma_fill_handshake_ack (buf, &priv->handshake.incoming);
- buf[0] = 0;
- priv->handshake.incoming.state = GF_RDMA_HANDSHAKE_RECEIVING_ACK;
- break;
-
- case GF_RDMA_HANDSHAKE_RECEIVING_ACK:
- ret = __tcp_readv (this,
- &priv->handshake.incoming.vector,
- priv->handshake.incoming.count,
- &priv->handshake.incoming.pending_vector,
- &priv->handshake.incoming.pending_count);
- if (ret == -1) {
- goto unlock;
- }
-
- if (ret > 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "partial header read on NB "
- "socket. continue later");
- goto unlock;
- }
-
- if (!ret) {
- priv->handshake.incoming.state = GF_RDMA_HANDSHAKE_RECEIVED_ACK;
- }
- break;
-
- case GF_RDMA_HANDSHAKE_RECEIVED_ACK:
- if (strncmp (buf, "DONE", 4)) {
- gf_log (GF_RDMA_LOG_NAME,
- GF_LOG_DEBUG,
- "%s: handshake-3 did not "
- "return 'DONE' (%s)",
- this->name, buf);
- ret = -1;
- goto unlock;
- }
- ret = 0;
- priv->connected = 1;
- sock_len = sizeof (struct sockaddr_storage);
- getpeername (priv->sock,
- (struct sockaddr *) &this->peerinfo.sockaddr,
- &sock_len);
-
- GF_FREE (priv->handshake.incoming.buf);
- priv->handshake.incoming.buf = NULL;
- priv->handshake.incoming.state = GF_RDMA_HANDSHAKE_COMPLETE;
- }
- }
- }
-unlock:
- pthread_mutex_unlock (&priv->write_mutex);
-
- if (ret == -1) {
- rpc_transport_disconnect (this);
- } else {
- ret = 0;
- }
-
-
- if (!ret && priv->connected) {
- if (priv->is_server) {
- ret = rpc_transport_notify (priv->listener,
- RPC_TRANSPORT_ACCEPT,
- this);
- } else {
- ret = rpc_transport_notify (this, RPC_TRANSPORT_CONNECT,
- this);
- }
- }
-
- return ret;
-}
-
-static int
-gf_rdma_handshake_pollout (rpc_transport_t *this)
-{
- gf_rdma_private_t *priv = NULL;
- char *buf = NULL;
- int32_t ret = 0;
-
- priv = this->private;
- buf = priv->handshake.outgoing.buf;
-
- if (priv->handshake.outgoing.state == GF_RDMA_HANDSHAKE_COMPLETE) {
- return 0;
- }
-
- pthread_mutex_unlock (&priv->write_mutex);
- {
- while (priv->handshake.outgoing.state
- != GF_RDMA_HANDSHAKE_COMPLETE)
- {
- switch (priv->handshake.outgoing.state)
- {
- case GF_RDMA_HANDSHAKE_START:
- buf = priv->handshake.outgoing.buf
- = GF_CALLOC (1, 256, gf_common_mt_char);
- gf_rdma_fill_handshake_data (buf,
- &priv->handshake.outgoing, priv);
- priv->handshake.outgoing.state
- = GF_RDMA_HANDSHAKE_SENDING_DATA;
- break;
-
- case GF_RDMA_HANDSHAKE_SENDING_DATA:
- ret = __tcp_writev (this,
- &priv->handshake.outgoing.vector,
- priv->handshake.outgoing.count,
- &priv->handshake.outgoing.pending_vector,
- &priv->handshake.outgoing.pending_count);
- if (ret == -1) {
- goto unlock;
- }
-
- if (ret > 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "partial header read on NB "
- "socket. continue later");
- goto unlock;
- }
-
- if (!ret) {
- priv->handshake.outgoing.state
- = GF_RDMA_HANDSHAKE_SENT_DATA;
- }
- break;
-
- case GF_RDMA_HANDSHAKE_SENT_DATA:
- gf_rdma_fill_handshake_ack (buf,
- &priv->handshake.outgoing);
- priv->handshake.outgoing.state
- = GF_RDMA_HANDSHAKE_SENDING_ACK;
- break;
-
- case GF_RDMA_HANDSHAKE_SENDING_ACK:
- ret = __tcp_writev (this,
- &priv->handshake.outgoing.vector,
- priv->handshake.outgoing.count,
- &priv->handshake.outgoing.pending_vector,
- &priv->handshake.outgoing.pending_count);
-
- if (ret == -1) {
- goto unlock;
- }
-
- if (ret > 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "partial header read on NB "
- "socket. continue later");
- goto unlock;
- }
-
- if (!ret) {
- GF_FREE (priv->handshake.outgoing.buf);
- priv->handshake.outgoing.buf = NULL;
- priv->handshake.outgoing.state
- = GF_RDMA_HANDSHAKE_COMPLETE;
- }
- break;
- }
- }
- }
-unlock:
- pthread_mutex_unlock (&priv->write_mutex);
-
- if (ret == -1) {
- rpc_transport_disconnect (this);
- } else {
- ret = 0;
- }
-
- return ret;
-}
-
-static int
-gf_rdma_handshake_pollerr (rpc_transport_t *this)
-{
- gf_rdma_private_t *priv = this->private;
- char need_unref = 0, connected = 0;
-
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_DEBUG,
- "%s: peer disconnected, cleaning up",
- this->name);
-
- pthread_mutex_lock (&priv->write_mutex);
- {
- __gf_rdma_teardown (this);
-
- connected = priv->connected;
- if (priv->sock != -1) {
- event_unregister (this->ctx->event_pool,
- priv->sock, priv->idx);
- need_unref = 1;
-
- if (close (priv->sock) != 0) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "close () - error: %s",
- strerror (errno));
- }
- priv->tcp_connected = priv->connected = 0;
- priv->sock = -1;
- }
-
- if (priv->handshake.incoming.buf) {
- GF_FREE (priv->handshake.incoming.buf);
- priv->handshake.incoming.buf = NULL;
- }
-
- priv->handshake.incoming.state = GF_RDMA_HANDSHAKE_START;
-
- if (priv->handshake.outgoing.buf) {
- GF_FREE (priv->handshake.outgoing.buf);
- priv->handshake.outgoing.buf = NULL;
- }
-
- priv->handshake.outgoing.state = GF_RDMA_HANDSHAKE_START;
- }
- pthread_mutex_unlock (&priv->write_mutex);
-
- if (connected) {
- rpc_transport_notify (this, RPC_TRANSPORT_DISCONNECT, this);
- }
-
- if (need_unref)
- rpc_transport_unref (this);
-
- return 0;
-}
-
-
-static int
-tcp_connect_finish (rpc_transport_t *this)
-{
- gf_rdma_private_t *priv = NULL;
- int error = 0, ret = 0;
-
- priv = this->private;
- pthread_mutex_lock (&priv->write_mutex);
- {
- ret = __tcp_connect_finish (priv->sock);
-
- if (!ret) {
- this->myinfo.sockaddr_len =
- sizeof (this->myinfo.sockaddr);
- ret = getsockname (priv->sock,
- (struct sockaddr *)&this->myinfo.sockaddr,
- &this->myinfo.sockaddr_len);
- if (ret == -1)
- {
- gf_log (this->name, GF_LOG_ERROR,
- "getsockname on new client-socket %d "
- "failed (%s)",
- priv->sock, strerror (errno));
- close (priv->sock);
- error = 1;
- goto unlock;
- }
-
- gf_rdma_get_transport_identifiers (this);
- priv->tcp_connected = 1;
- }
-
- if (ret == -1 && errno != EINPROGRESS) {
- gf_log (this->name, GF_LOG_ERROR,
- "tcp connect to %s failed (%s)",
- this->peerinfo.identifier, strerror (errno));
- error = 1;
- }
- }
-unlock:
- pthread_mutex_unlock (&priv->write_mutex);
-
- if (error) {
- rpc_transport_disconnect (this);
- }
-
- return ret;
-}
-
-static int
-gf_rdma_event_handler (int fd, int idx, void *data,
- int poll_in, int poll_out, int poll_err)
-{
- rpc_transport_t *this = NULL;
- gf_rdma_private_t *priv = NULL;
- gf_rdma_options_t *options = NULL;
- int ret = 0;
-
- this = data;
- priv = this->private;
- if (!priv->tcp_connected) {
- ret = tcp_connect_finish (this);
- if (priv->tcp_connected) {
- options = &priv->options;
-
- priv->peer.send_count = options->send_count;
- priv->peer.recv_count = options->recv_count;
- priv->peer.send_size = options->send_size;
- priv->peer.recv_size = options->recv_size;
-
- if ((ret = gf_rdma_create_qp (this)) < 0) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "%s: could not create QP",
- this->name);
- rpc_transport_disconnect (this);
- }
- }
- }
-
- if (!ret && poll_out && priv->tcp_connected) {
- ret = gf_rdma_handshake_pollout (this);
- }
-
- if (!ret && !poll_err && poll_in && priv->tcp_connected) {
- if (priv->handshake.incoming.state
- == GF_RDMA_HANDSHAKE_COMPLETE) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "%s: pollin received on tcp socket (peer: %s) "
- "after handshake is complete",
- this->name, this->peerinfo.identifier);
- gf_rdma_handshake_pollerr (this);
- return 0;
- }
- ret = gf_rdma_handshake_pollin (this);
- }
-
- if (ret < 0 || poll_err) {
- ret = gf_rdma_handshake_pollerr (this);
- }
-
- return 0;
-}
-
-static int
-__tcp_nonblock (int fd)
-{
- int flags = 0;
- int ret = -1;
-
- flags = fcntl (fd, F_GETFL);
-
- if (flags != -1)
- ret = fcntl (fd, F_SETFL, flags | O_NONBLOCK);
-
- return ret;
-}
-
-static int32_t
-gf_rdma_connect (struct rpc_transport *this, int port)
-{
- gf_rdma_private_t *priv = NULL;
- int32_t ret = 0;
- gf_boolean_t non_blocking = 1;
- union gf_sock_union sock_union = {{0, }, };
- socklen_t sockaddr_len = 0;
-
- priv = this->private;
-
- ret = gf_rdma_client_get_remote_sockaddr (this,
- &sock_union.sa,
- &sockaddr_len, port);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "cannot get remote address to connect");
- return ret;
- }
-
-
- pthread_mutex_lock (&priv->write_mutex);
- {
- if (priv->sock != -1) {
- ret = 0;
- goto unlock;
- }
-
- priv->sock = socket (sock_union.sa.sa_family, SOCK_STREAM, 0);
-
- if (priv->sock == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "socket () - error: %s", strerror (errno));
- ret = -errno;
- goto unlock;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "socket fd = %d", priv->sock);
-
- memcpy (&this->peerinfo.sockaddr, &sock_union.storage,
- sockaddr_len);
- this->peerinfo.sockaddr_len = sockaddr_len;
-
- if (port > 0)
- sock_union.sin.sin_port = htons (port);
-
- ((struct sockaddr *) &this->myinfo.sockaddr)->sa_family =
- ((struct sockaddr *)&this->peerinfo.sockaddr)->sa_family;
-
- if (non_blocking)
- {
- ret = __tcp_nonblock (priv->sock);
-
- if (ret == -1)
- {
- gf_log (this->name, GF_LOG_ERROR,
- "could not set socket %d to non "
- "blocking mode (%s)",
- priv->sock, strerror (errno));
- close (priv->sock);
- priv->sock = -1;
- goto unlock;
- }
- }
-
- ret = gf_rdma_client_bind (this,
- (struct sockaddr *)&this->myinfo.sockaddr,
- &this->myinfo.sockaddr_len,
- priv->sock);
- if (ret == -1)
- {
- gf_log (this->name, GF_LOG_WARNING,
- "client bind failed: %s", strerror (errno));
- close (priv->sock);
- priv->sock = -1;
- goto unlock;
- }
-
- ret = connect (priv->sock,
- (struct sockaddr *)&this->peerinfo.sockaddr,
- this->peerinfo.sockaddr_len);
- if (ret == -1 && errno != EINPROGRESS)
- {
- gf_log (this->name, GF_LOG_ERROR,
- "connection attempt failed (%s)",
- strerror (errno));
- close (priv->sock);
- priv->sock = -1;
- goto unlock;
- }
-
- priv->tcp_connected = priv->connected = 0;
-
- rpc_transport_ref (this);
-
- priv->handshake.incoming.state = GF_RDMA_HANDSHAKE_START;
- priv->handshake.outgoing.state = GF_RDMA_HANDSHAKE_START;
-
- priv->idx = event_register (this->ctx->event_pool,
- priv->sock, gf_rdma_event_handler,
- this, 1, 1);
- }
-unlock:
- pthread_mutex_unlock (&priv->write_mutex);
-
- return ret;
-}
-
-static int
-gf_rdma_server_event_handler (int fd, int idx, void *data,
- int poll_in, int poll_out, int poll_err)
-{
- int32_t main_sock = -1;
- rpc_transport_t *this = NULL, *trans = NULL;
- gf_rdma_private_t *priv = NULL;
- gf_rdma_private_t *trans_priv = NULL;
- gf_rdma_options_t *options = NULL;
-
- if (!poll_in) {
- return 0;
- }
-
- trans = data;
- trans_priv = (gf_rdma_private_t *) trans->private;
-
- this = GF_CALLOC (1, sizeof (rpc_transport_t),
- gf_common_mt_rpc_transport_t);
- if (this == NULL) {
- return -1;
- }
-
- this->listener = trans;
-
- priv = GF_CALLOC (1, sizeof (gf_rdma_private_t),
- gf_common_mt_rdma_private_t);
- if (priv == NULL) {
- GF_FREE (priv);
- return -1;
- }
- this->private = priv;
- /* Copy all the rdma related values in priv, from trans_priv
- as other than QP, all the values remain same */
- priv->device = trans_priv->device;
- priv->options = trans_priv->options;
- priv->is_server = 1;
- priv->listener = trans;
-
- options = &priv->options;
-
- this->ops = trans->ops;
- this->init = trans->init;
- this->fini = trans->fini;
- this->ctx = trans->ctx;
- this->name = gf_strdup (trans->name);
- this->notify = trans->notify;
- this->mydata = trans->mydata;
-
- memcpy (&this->myinfo.sockaddr, &trans->myinfo.sockaddr,
- trans->myinfo.sockaddr_len);
- this->myinfo.sockaddr_len = trans->myinfo.sockaddr_len;
-
- main_sock = (trans_priv)->sock;
- this->peerinfo.sockaddr_len = sizeof (this->peerinfo.sockaddr);
- priv->sock = accept (main_sock,
- (struct sockaddr *)&this->peerinfo.sockaddr,
- &this->peerinfo.sockaddr_len);
- if (priv->sock == -1) {
- gf_log ("rdma/server", GF_LOG_ERROR,
- "accept() failed: %s",
- strerror (errno));
- GF_FREE (this->private);
- GF_FREE (this);
- return -1;
- }
-
- priv->peer.trans = this;
- rpc_transport_ref (this);
-
- gf_rdma_get_transport_identifiers (this);
-
- priv->tcp_connected = 1;
- priv->handshake.incoming.state = GF_RDMA_HANDSHAKE_START;
- priv->handshake.outgoing.state = GF_RDMA_HANDSHAKE_START;
-
- priv->peer.send_count = options->send_count;
- priv->peer.recv_count = options->recv_count;
- priv->peer.send_size = options->send_size;
- priv->peer.recv_size = options->recv_size;
- INIT_LIST_HEAD (&priv->peer.ioq);
-
- if (gf_rdma_create_qp (this) < 0) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "%s: could not create QP",
- this->name);
- rpc_transport_disconnect (this);
- return -1;
- }
-
- priv->idx = event_register (this->ctx->event_pool, priv->sock,
- gf_rdma_event_handler, this, 1, 1);
-
- pthread_mutex_init (&priv->read_mutex, NULL);
- pthread_mutex_init (&priv->write_mutex, NULL);
- pthread_mutex_init (&priv->recv_mutex, NULL);
- /* pthread_cond_init (&priv->recv_cond, NULL); */
- return 0;
-}
-
-static int32_t
-gf_rdma_listen (rpc_transport_t *this)
-{
- union gf_sock_union sock_union = {{0, }, };
- socklen_t sockaddr_len = 0;
- gf_rdma_private_t *priv = NULL;
- int opt = 1, ret = 0;
- char service[NI_MAXSERV], host[NI_MAXHOST];
-
- priv = this->private;
- memset (&sock_union, 0, sizeof (sock_union));
- ret = gf_rdma_server_get_local_sockaddr (this,
- &sock_union.sa,
- &sockaddr_len);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "cannot find network address of server to bind to");
- goto err;
- }
-
- priv->sock = socket (sock_union.sa.sa_family, SOCK_STREAM, 0);
- if (priv->sock == -1) {
- gf_log ("rdma/server", GF_LOG_CRITICAL,
- "init: failed to create socket, error: %s",
- strerror (errno));
- GF_FREE (this->private);
- ret = -1;
- goto err;
- }
-
- memcpy (&this->myinfo.sockaddr, &sock_union.storage, sockaddr_len);
- this->myinfo.sockaddr_len = sockaddr_len;
-
- ret = getnameinfo ((struct sockaddr *)&this->myinfo.sockaddr,
- this->myinfo.sockaddr_len,
- host, sizeof (host),
- service, sizeof (service),
- NI_NUMERICHOST);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "getnameinfo failed (%s)", gai_strerror (ret));
- goto err;
- }
- sprintf (this->myinfo.identifier, "%s:%s", host, service);
-
- setsockopt (priv->sock, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof (opt));
- if (bind (priv->sock, &sock_union.sa, sockaddr_len) != 0) {
- ret = -1;
- gf_log ("rdma/server", GF_LOG_ERROR,
- "init: failed to bind to socket for %s (%s)",
- this->myinfo.identifier, strerror (errno));
- goto err;
- }
-
- if (listen (priv->sock, 10) != 0) {
- gf_log ("rdma/server", GF_LOG_ERROR,
- "init: listen () failed on socket for %s (%s)",
- this->myinfo.identifier, strerror (errno));
- ret = -1;
- goto err;
- }
-
- /* Register the main socket */
- priv->idx = event_register (this->ctx->event_pool, priv->sock,
- gf_rdma_server_event_handler,
- rpc_transport_ref (this), 1, 0);
-
-err:
- return ret;
-}
-
-struct rpc_transport_ops tops = {
- .submit_request = gf_rdma_submit_request,
- .submit_reply = gf_rdma_submit_reply,
- .connect = gf_rdma_connect,
- .disconnect = gf_rdma_disconnect,
- .listen = gf_rdma_listen,
-};
-
-int32_t
-init (rpc_transport_t *this)
-{
- gf_rdma_private_t *priv = NULL;
-
- priv = GF_CALLOC (1, sizeof (*priv), gf_common_mt_rdma_private_t);
- if (!priv)
- return -1;
-
- this->private = priv;
- priv->sock = -1;
-
- if (gf_rdma_init (this)) {
- gf_log (this->name, GF_LOG_ERROR,
- "Failed to initialize IB Device");
- return -1;
- }
-
- return 0;
-}
-
-void
-fini (struct rpc_transport *this)
-{
- /* TODO: verify this function does graceful finish */
- gf_rdma_private_t *priv = NULL;
-
- priv = this->private;
-
- this->private = NULL;
-
- if (priv) {
- pthread_mutex_destroy (&priv->recv_mutex);
- pthread_mutex_destroy (&priv->write_mutex);
- pthread_mutex_destroy (&priv->read_mutex);
-
- /* pthread_cond_destroy (&priv->recv_cond); */
- if (priv->sock != -1) {
- event_unregister (this->ctx->event_pool,
- priv->sock, priv->idx);
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "called fini on transport: %p", this);
- GF_FREE (priv);
- }
- return;
-}
-
-/* TODO: expand each option */
-struct volume_options options[] = {
- { .key = {"transport.rdma.port",
- "rdma-port"},
- .type = GF_OPTION_TYPE_INT,
- .min = 1,
- .max = 4,
- .description = "check the option by 'ibv_devinfo'"
- },
- { .key = {"transport.rdma.mtu",
- "rdma-mtu"},
- .type = GF_OPTION_TYPE_INT,
- },
- { .key = {"transport.rdma.device-name",
- "rdma-device-name"},
- .type = GF_OPTION_TYPE_ANY,
- .description = "check by 'ibv_devinfo'"
- },
- { .key = {"transport.rdma.work-request-send-count",
- "rdma-work-request-send-count"},
- .type = GF_OPTION_TYPE_INT,
- },
- { .key = {"transport.rdma.work-request-recv-count",
- "rdma-work-request-recv-count"},
- .type = GF_OPTION_TYPE_INT,
- },
- { .key = {"remote-port",
- "transport.remote-port",
- "transport.rdma.remote-port"},
- .type = GF_OPTION_TYPE_INT
- },
- { .key = {"transport.rdma.attr-timeout",
- "rdma-attr-timeout"},
- .type = GF_OPTION_TYPE_INT
- },
- { .key = {"transport.rdma.attr-retry-cnt",
- "rdma-attr-retry-cnt"},
- .type = GF_OPTION_TYPE_INT
- },
- { .key = {"transport.rdma.attr-rnr-retry",
- "rdma-attr-rnr-retry"},
- .type = GF_OPTION_TYPE_INT
- },
- { .key = {"transport.rdma.listen-port", "listen-port"},
- .type = GF_OPTION_TYPE_INT
- },
- { .key = {"transport.rdma.connect-path", "connect-path"},
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = {"transport.rdma.bind-path", "bind-path"},
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = {"transport.rdma.listen-path", "listen-path"},
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = {"transport.address-family",
- "address-family"},
- .value = {"inet", "inet6", "inet/inet6", "inet6/inet",
- "unix", "inet-sdp" },
- .type = GF_OPTION_TYPE_STR
- },
- { .key = {"transport.socket.lowlat"},
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {NULL} }
-};
diff --git a/rpc/rpc-transport/rdma/src/rdma.h b/rpc/rpc-transport/rdma/src/rdma.h
deleted file mode 100644
index 687d6005feb..00000000000
--- a/rpc/rpc-transport/rdma/src/rdma.h
+++ /dev/null
@@ -1,403 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _XPORT_RDMA_H
-#define _XPORT_RDMA_H
-
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#ifndef MAX_IOVEC
-#define MAX_IOVEC 16
-#endif /* MAX_IOVEC */
-
-#include "rpc-clnt.h"
-#include "rpc-transport.h"
-#include "xlator.h"
-#include "event.h"
-#include <stdio.h>
-#include <list.h>
-#include <arpa/inet.h>
-#include <infiniband/verbs.h>
-
-/* FIXME: give appropriate values to these macros */
-#define GF_DEFAULT_RDMA_LISTEN_PORT (GF_DEFAULT_BASE_PORT + 1)
-
-/* If you are changing GF_RDMA_MAX_SEGMENTS, please make sure to update
- * GLUSTERFS_GF_RDMA_MAX_HEADER_SIZE defined in glusterfs.h .
- */
-#define GF_RDMA_MAX_SEGMENTS 8
-
-#define GF_RDMA_VERSION 1
-#define GF_RDMA_POOL_SIZE 512
-
-/* Additional attributes */
-#define GF_RDMA_TIMEOUT 14
-#define GF_RDMA_RETRY_CNT 7
-#define GF_RDMA_RNR_RETRY 7
-
-typedef enum gf_rdma_errcode {
- ERR_VERS = 1,
- ERR_CHUNK = 2
-}gf_rdma_errcode_t;
-
-struct gf_rdma_err_vers {
- uint32_t gf_rdma_vers_low; /* Version range supported by peer */
- uint32_t gf_rdma_vers_high;
-}__attribute__ ((packed));
-typedef struct gf_rdma_err_vers gf_rdma_err_vers_t;
-
-typedef enum gf_rdma_proc {
- GF_RDMA_MSG = 0, /* An RPC call or reply msg */
- GF_RDMA_NOMSG = 1, /* An RPC call or reply msg - separate body */
- GF_RDMA_MSGP = 2, /* An RPC call or reply msg with padding */
- GF_RDMA_DONE = 3, /* Client signals reply completion */
- GF_RDMA_ERROR = 4 /* An RPC RDMA encoding error */
-}gf_rdma_proc_t;
-
-typedef enum gf_rdma_chunktype {
- gf_rdma_noch = 0, /* no chunk */
- gf_rdma_readch, /* some argument through rdma read */
- gf_rdma_areadch, /* entire request through rdma read */
- gf_rdma_writech, /* some result through rdma write */
- gf_rdma_replych /* entire reply through rdma write */
-}gf_rdma_chunktype_t;
-
-/* If you are modifying __gf_rdma_header, please make sure to change
- * GLUSTERFS_GF_RDMA_MAX_HEADER_SIZE defined in glusterfs.h to reflect your changes
- */
-struct __gf_rdma_header {
- uint32_t rm_xid; /* Mirrors the RPC header xid */
- uint32_t rm_vers; /* Version of this protocol */
- uint32_t rm_credit; /* Buffers requested/granted */
- uint32_t rm_type; /* Type of message (enum gf_rdma_proc) */
- union {
- struct { /* no chunks */
- uint32_t rm_empty[3]; /* 3 empty chunk lists */
- }__attribute__((packed)) rm_nochunks;
-
- struct { /* no chunks and padded */
- uint32_t rm_align; /* Padding alignment */
- uint32_t rm_thresh; /* Padding threshold */
- uint32_t rm_pempty[3]; /* 3 empty chunk lists */
- }__attribute__((packed)) rm_padded;
-
- struct {
- uint32_t rm_type;
- gf_rdma_err_vers_t rm_version;
- }__attribute__ ((packed)) rm_error;
-
- uint32_t rm_chunks[0]; /* read, write and reply chunks */
- }__attribute__ ((packed)) rm_body;
-} __attribute__((packed));
-typedef struct __gf_rdma_header gf_rdma_header_t;
-
-/* If you are modifying __gf_rdma_segment or __gf_rdma_read_chunk, please make sure
- * to change GLUSTERFS_GF_RDMA_MAX_HEADER_SIZE defined in glusterfs.h to reflect
- * your changes.
- */
-struct __gf_rdma_segment {
- uint32_t rs_handle; /* Registered memory handle */
- uint32_t rs_length; /* Length of the chunk in bytes */
- uint64_t rs_offset; /* Chunk virtual address or offset */
-} __attribute__((packed));
-typedef struct __gf_rdma_segment gf_rdma_segment_t;
-
-/* read chunk(s), encoded as a linked list. */
-struct __gf_rdma_read_chunk {
- uint32_t rc_discrim; /* 1 indicates presence */
- uint32_t rc_position; /* Position in XDR stream */
- gf_rdma_segment_t rc_target;
-} __attribute__((packed));
-typedef struct __gf_rdma_read_chunk gf_rdma_read_chunk_t;
-
-/* write chunk, and reply chunk. */
-struct __gf_rdma_write_chunk {
- gf_rdma_segment_t wc_target;
-} __attribute__((packed));
-typedef struct __gf_rdma_write_chunk gf_rdma_write_chunk_t;
-
-/* write chunk(s), encoded as a counted array. */
-struct __gf_rdma_write_array {
- uint32_t wc_discrim; /* 1 indicates presence */
- uint32_t wc_nchunks; /* Array count */
- struct __gf_rdma_write_chunk wc_array[0];
-} __attribute__((packed));
-typedef struct __gf_rdma_write_array gf_rdma_write_array_t;
-
-/* options per transport end point */
-struct __gf_rdma_options {
- int32_t port;
- char *device_name;
- enum ibv_mtu mtu;
- int32_t send_count;
- int32_t recv_count;
- uint64_t recv_size;
- uint64_t send_size;
- uint8_t attr_timeout;
- uint8_t attr_retry_cnt;
- uint8_t attr_rnr_retry;
-};
-typedef struct __gf_rdma_options gf_rdma_options_t;
-
-struct __gf_rdma_reply_info {
- uint32_t rm_xid; /* xid in network endian */
- gf_rdma_chunktype_t type; /*
- * can be either gf_rdma_replych
- * or gf_rdma_writech.
- */
- gf_rdma_write_array_t *wc_array;
- struct mem_pool *pool;
-};
-typedef struct __gf_rdma_reply_info gf_rdma_reply_info_t;
-
-struct __gf_rdma_ioq {
- union {
- struct list_head list;
- struct {
- struct __gf_rdma_ioq *next;
- struct __gf_rdma_ioq *prev;
- };
- };
-
- char is_request;
- struct iovec rpchdr[MAX_IOVEC];
- int rpchdr_count;
- struct iovec proghdr[MAX_IOVEC];
- int proghdr_count;
- struct iovec prog_payload[MAX_IOVEC];
- int prog_payload_count;
-
- struct iobref *iobref;
-
- union {
- struct __gf_rdma_ioq_request {
- /* used to build reply_chunk for GF_RDMA_NOMSG type msgs */
- struct iovec rsphdr_vec[MAX_IOVEC];
- int rsphdr_count;
-
- /*
- * used to build write_array during operations like
- * read.
- */
- struct iovec rsp_payload[MAX_IOVEC];
- int rsp_payload_count;
-
- struct rpc_req *rpc_req; /* FIXME: hack! hack! should be
- * cleaned up later
- */
- struct iobref *rsp_iobref;
- }request;
-
- gf_rdma_reply_info_t *reply_info;
- }msg;
-
- struct mem_pool *pool;
-};
-typedef struct __gf_rdma_ioq gf_rdma_ioq_t;
-
-typedef enum __gf_rdma_send_post_type {
- GF_RDMA_SEND_POST_NO_CHUNKLIST, /* post which is sent using rdma-send
- * and the msg carries no
- * chunklists.
- */
- GF_RDMA_SEND_POST_READ_CHUNKLIST, /* post which is sent using rdma-send
- * and the msg carries only read
- * chunklist.
- */
- GF_RDMA_SEND_POST_WRITE_CHUNKLIST, /* post which is sent using
- * rdma-send and the msg carries
- * only write chunklist.
- */
- GF_RDMA_SEND_POST_READ_WRITE_CHUNKLIST, /* post which is sent using
- * rdma-send and the msg
- * carries both read and
- * write chunklists.
- */
- GF_RDMA_SEND_POST_GF_RDMA_READ, /* RDMA read */
- GF_RDMA_SEND_POST_GF_RDMA_WRITE, /* RDMA write */
-}gf_rdma_send_post_type_t;
-
-/* represents one communication peer, two per transport_t */
-struct __gf_rdma_peer {
- rpc_transport_t *trans;
- struct ibv_qp *qp;
-
- int32_t recv_count;
- int32_t send_count;
- int32_t recv_size;
- int32_t send_size;
-
- int32_t quota;
- union {
- struct list_head ioq;
- struct {
- gf_rdma_ioq_t *ioq_next;
- gf_rdma_ioq_t *ioq_prev;
- };
- };
-
- /* QP attributes, needed to connect with remote QP */
- int32_t local_lid;
- int32_t local_psn;
- int32_t local_qpn;
- int32_t remote_lid;
- int32_t remote_psn;
- int32_t remote_qpn;
-};
-typedef struct __gf_rdma_peer gf_rdma_peer_t;
-
-struct __gf_rdma_post_context {
- struct ibv_mr *mr[GF_RDMA_MAX_SEGMENTS];
- int mr_count;
- struct iovec vector[MAX_IOVEC];
- int count;
- struct iobref *iobref;
- struct iobuf *hdr_iobuf;
- char is_request;
- int gf_rdma_reads;
- gf_rdma_reply_info_t *reply_info;
-};
-typedef struct __gf_rdma_post_context gf_rdma_post_context_t;
-
-typedef enum {
- GF_RDMA_SEND_POST,
- GF_RDMA_RECV_POST
-} gf_rdma_post_type_t;
-
-struct __gf_rdma_post {
- struct __gf_rdma_post *next, *prev;
- struct ibv_mr *mr;
- char *buf;
- int32_t buf_size;
- char aux;
- int32_t reused;
- struct __gf_rdma_device *device;
- gf_rdma_post_type_t type;
- gf_rdma_post_context_t ctx;
- int refcount;
- pthread_mutex_t lock;
-};
-typedef struct __gf_rdma_post gf_rdma_post_t;
-
-struct __gf_rdma_queue {
- gf_rdma_post_t active_posts, passive_posts;
- int32_t active_count, passive_count;
- pthread_mutex_t lock;
-};
-typedef struct __gf_rdma_queue gf_rdma_queue_t;
-
-struct __gf_rdma_qpreg {
- pthread_mutex_t lock;
- int32_t count;
- struct _qpent {
- struct _qpent *next, *prev;
- int32_t qp_num;
- gf_rdma_peer_t *peer;
- } ents[42];
-};
-typedef struct __gf_rdma_qpreg gf_rdma_qpreg_t;
-
-/* context per device, stored in global glusterfs_ctx_t->ib */
-struct __gf_rdma_device {
- struct __gf_rdma_device *next;
- const char *device_name;
- struct ibv_context *context;
- int32_t port;
- struct ibv_pd *pd;
- struct ibv_srq *srq;
- gf_rdma_qpreg_t qpreg;
- struct ibv_comp_channel *send_chan, *recv_chan;
- struct ibv_cq *send_cq, *recv_cq;
- gf_rdma_queue_t sendq, recvq;
- pthread_t send_thread, recv_thread;
- struct mem_pool *request_ctx_pool;
- struct mem_pool *ioq_pool;
- struct mem_pool *reply_info_pool;
-};
-typedef struct __gf_rdma_device gf_rdma_device_t;
-
-typedef enum {
- GF_RDMA_HANDSHAKE_START = 0,
- GF_RDMA_HANDSHAKE_SENDING_DATA,
- GF_RDMA_HANDSHAKE_RECEIVING_DATA,
- GF_RDMA_HANDSHAKE_SENT_DATA,
- GF_RDMA_HANDSHAKE_RECEIVED_DATA,
- GF_RDMA_HANDSHAKE_SENDING_ACK,
- GF_RDMA_HANDSHAKE_RECEIVING_ACK,
- GF_RDMA_HANDSHAKE_RECEIVED_ACK,
- GF_RDMA_HANDSHAKE_COMPLETE,
-} gf_rdma_handshake_state_t;
-
-struct gf_rdma_nbio {
- int state;
- char *buf;
- int count;
- struct iovec vector;
- struct iovec *pending_vector;
- int pending_count;
-};
-
-struct __gf_rdma_request_context {
- struct ibv_mr *mr[GF_RDMA_MAX_SEGMENTS];
- int mr_count;
- struct mem_pool *pool;
- gf_rdma_peer_t *peer;
- struct iobref *iobref;
- struct iobref *rsp_iobref;
-};
-typedef struct __gf_rdma_request_context gf_rdma_request_context_t;
-
-struct __gf_rdma_private {
- int32_t sock;
- int32_t idx;
- unsigned char connected;
- unsigned char tcp_connected;
- unsigned char ib_connected;
- in_addr_t addr;
- unsigned short port;
-
- /* IB Verbs Driver specific variables, pointers */
- gf_rdma_peer_t peer;
- struct __gf_rdma_device *device;
- gf_rdma_options_t options;
-
- /* Used by trans->op->receive */
- char *data_ptr;
- int32_t data_offset;
- int32_t data_len;
-
- /* Mutex */
- pthread_mutex_t read_mutex;
- pthread_mutex_t write_mutex;
- pthread_barrier_t handshake_barrier;
- char handshake_ret;
- char is_server;
- rpc_transport_t *listener;
-
- pthread_mutex_t recv_mutex;
- pthread_cond_t recv_cond;
-
- /* used during gf_rdma_handshake */
- struct {
- struct gf_rdma_nbio incoming;
- struct gf_rdma_nbio outgoing;
- int state;
- gf_rdma_header_t header;
- char *buf;
- size_t size;
- } handshake;
-};
-typedef struct __gf_rdma_private gf_rdma_private_t;
-
-#endif /* _XPORT_GF_RDMA_H */
diff --git a/rpc/rpc-transport/socket/src/Makefile.am b/rpc/rpc-transport/socket/src/Makefile.am
index 2c918c7e313..7b488583771 100644
--- a/rpc/rpc-transport/socket/src/Makefile.am
+++ b/rpc/rpc-transport/socket/src/Makefile.am
@@ -1,15 +1,22 @@
-noinst_HEADERS = socket.h name.h
+noinst_HEADERS = socket.h name.h socket-mem-types.h
rpctransport_LTLIBRARIES = socket.la
rpctransportdir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/rpc-transport
-socket_la_LDFLAGS = -module -avoidversion
+socket_la_LDFLAGS = -module -avoid-version
socket_la_SOURCES = socket.c name.c
-socket_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+socket_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
+ $(top_builddir)/rpc/xdr/src/libgfxdr.la \
+ $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la \
+ -lssl
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/rpc/rpc-lib/src/ \
- -I$(top_srcdir)/rpc/xdr/src/ -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) \
+ -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/rpc-lib/src/ \
+ -I$(top_srcdir)/rpc/xdr/src/ \
+ -I$(top_builddir)/rpc/xdr/src/
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES = *~
diff --git a/rpc/rpc-transport/socket/src/name.c b/rpc/rpc-transport/socket/src/name.c
index 0b2ca5805a2..9286bbb236d 100644
--- a/rpc/rpc-transport/socket/src/name.c
+++ b/rpc/rpc-transport/socket/src/name.c
@@ -15,727 +15,754 @@
#include <netdb.h>
#include <string.h>
-#ifdef CLIENT_PORT_CEILING
-#undef CLIENT_PORT_CEILING
-#endif
-
-#define CLIENT_PORT_CEILING 1024
-
#ifndef AF_INET_SDP
#define AF_INET_SDP 27
#endif
#include "rpc-transport.h"
#include "socket.h"
-#include "common-utils.h"
-
-int32_t
-gf_resolve_ip6 (const char *hostname,
- uint16_t port,
- int family,
- void **dnscache,
- struct addrinfo **addr_info);
+#include <glusterfs/common-utils.h>
-static int32_t
-af_inet_bind_to_port_lt_ceiling (int fd, struct sockaddr *sockaddr,
- socklen_t sockaddr_len, int ceiling)
+static void
+_assign_port(struct sockaddr *sockaddr, uint16_t port)
{
- int32_t ret = -1;
- /* struct sockaddr_in sin = {0, }; */
- uint16_t port = ceiling - 1;
-
- while (port)
- {
- switch (sockaddr->sa_family)
- {
- case AF_INET6:
- ((struct sockaddr_in6 *)sockaddr)->sin6_port = htons (port);
- break;
-
- case AF_INET_SDP:
- case AF_INET:
- ((struct sockaddr_in *)sockaddr)->sin_port = htons (port);
- break;
- }
-
- ret = bind (fd, sockaddr, sockaddr_len);
-
- if (ret == 0)
- break;
-
- if (ret == -1 && errno == EACCES)
- break;
-
- port--;
- }
+ switch (sockaddr->sa_family) {
+ case AF_INET6:
+ ((struct sockaddr_in6 *)sockaddr)->sin6_port = htons(port);
+ break;
- return ret;
+ case AF_INET_SDP:
+ case AF_INET:
+ ((struct sockaddr_in *)sockaddr)->sin_port = htons(port);
+ break;
+ }
}
static int32_t
-af_unix_client_bind (rpc_transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t sockaddr_len,
- int sock)
+af_inet_bind_to_port_lt_ceiling(int fd, struct sockaddr *sockaddr,
+ socklen_t sockaddr_len, uint32_t ceiling)
{
- data_t *path_data = NULL;
- struct sockaddr_un *addr = NULL;
- int32_t ret = 0;
-
- path_data = dict_get (this->options, "transport.socket.bind-path");
- if (path_data) {
- char *path = data_to_str (path_data);
- if (!path || strlen (path) > UNIX_PATH_MAX) {
- gf_log (this->name, GF_LOG_TRACE,
- "bind-path not specified for unix socket, "
- "letting connect to assign default value");
- goto err;
- }
+#if GF_DISABLE_PRIVPORT_TRACKING
+ _assign_port(sockaddr, 0);
+ return bind(fd, sockaddr, sockaddr_len);
+#else
+ int32_t ret = -1;
+ uint16_t port = ceiling - 1;
+ unsigned char ports[GF_PORT_ARRAY_SIZE] = {
+ 0,
+ };
+ int i = 0;
- addr = (struct sockaddr_un *) sockaddr;
- strcpy (addr->sun_path, path);
- ret = bind (sock, (struct sockaddr *)addr, sockaddr_len);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "cannot bind to unix-domain socket %d (%s)",
- sock, strerror (errno));
- goto err;
- }
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "bind-path not specified for unix socket, "
- "letting connect to assign default value");
+loop:
+ ret = gf_process_reserved_ports(ports, ceiling);
+
+ while (port) {
+ if (port == GF_CLIENT_PORT_CEILING) {
+ ret = -1;
+ break;
}
-err:
- return ret;
-}
+ /* ignore the reserved ports */
+ if (BIT_VALUE(ports, port)) {
+ port--;
+ continue;
+ }
-int32_t
-client_fill_address_family (rpc_transport_t *this, sa_family_t *sa_family)
-{
- data_t *address_family_data = NULL;
- int32_t ret = -1;
+ _assign_port(sockaddr, port);
- if (sa_family == NULL) {
- gf_log_callingfn ("", GF_LOG_WARNING,
- "sa_family argument is NULL");
- goto out;
- }
+ ret = bind(fd, sockaddr, sockaddr_len);
- address_family_data = dict_get (this->options,
- "transport.address-family");
- if (!address_family_data) {
- data_t *remote_host_data = NULL, *connect_path_data = NULL;
- remote_host_data = dict_get (this->options, "remote-host");
- connect_path_data = dict_get (this->options,
- "transport.socket.connect-path");
-
- if (!(remote_host_data || connect_path_data) ||
- (remote_host_data && connect_path_data)) {
- gf_log (this->name, GF_LOG_ERROR,
- "transport.address-family not specified and "
- "not able to determine the "
- "same from other options (remote-host:%s and "
- "transport.unix.connect-path:%s)",
- data_to_str (remote_host_data),
- data_to_str (connect_path_data));
- goto out;
- }
+ if (ret == 0)
+ break;
- if (remote_host_data) {
- gf_log (this->name, GF_LOG_DEBUG,
- "address-family not specified, guessing it "
- "to be inet/inet6");
- *sa_family = AF_UNSPEC;
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "address-family not specified, guessing it "
- "to be unix");
- *sa_family = AF_UNIX;
- }
+ if (ret == -1 && errno == EACCES)
+ break;
- } else {
- char *address_family = data_to_str (address_family_data);
- if (!strcasecmp (address_family, "unix")) {
- *sa_family = AF_UNIX;
- } else if (!strcasecmp (address_family, "inet")) {
- *sa_family = AF_INET;
- } else if (!strcasecmp (address_family, "inet6")) {
- *sa_family = AF_INET6;
- } else if (!strcasecmp (address_family, "inet-sdp")) {
- *sa_family = AF_INET_SDP;
- } else if (!strcasecmp (address_family, "inet/inet6")
- || !strcasecmp (address_family, "inet6/inet")) {
- *sa_family = AF_UNSPEC;
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "unknown address-family (%s) specified",
- address_family);
- goto out;
- }
- }
+ port--;
+ }
- ret = 0;
+ /* In case if all the secure ports are exhausted, we are no more
+ * binding to secure ports, hence instead of getting a random
+ * port, lets define the range to restrict it from getting from
+ * ports reserved for bricks i.e from range of 49152 - 65535
+ * which further may lead to port clash */
+ if (!port) {
+ ceiling = port = GF_CLNT_INSECURE_PORT_CEILING;
+ for (i = 0; i <= ceiling; i++)
+ BIT_CLEAR(ports, i);
+ goto loop;
+ }
-out:
- return ret;
+ return ret;
+#endif /* GF_DISABLE_PRIVPORT_TRACKING */
}
static int32_t
-af_inet_client_get_remote_sockaddr (rpc_transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t *sockaddr_len)
+af_unix_client_bind(rpc_transport_t *this, struct sockaddr *sockaddr,
+ socklen_t sockaddr_len, int sock)
{
- dict_t *options = this->options;
- data_t *remote_host_data = NULL;
- data_t *remote_port_data = NULL;
- char *remote_host = NULL;
- uint16_t remote_port = 0;
- struct addrinfo *addr_info = NULL;
- int32_t ret = 0;
-
- remote_host_data = dict_get (options, "remote-host");
- if (remote_host_data == NULL)
- {
- gf_log (this->name, GF_LOG_ERROR,
- "option remote-host missing in volume %s", this->name);
- ret = -1;
- goto err;
- }
-
- remote_host = data_to_str (remote_host_data);
- if (remote_host == NULL)
- {
- gf_log (this->name, GF_LOG_ERROR,
- "option remote-host has data NULL in volume %s", this->name);
- ret = -1;
- goto err;
- }
-
- remote_port_data = dict_get (options, "remote-port");
- if (remote_port_data == NULL)
- {
- gf_log (this->name, GF_LOG_TRACE,
- "option remote-port missing in volume %s. Defaulting to %d",
- this->name, GF_DEFAULT_SOCKET_LISTEN_PORT);
-
- remote_port = GF_DEFAULT_SOCKET_LISTEN_PORT;
- }
- else
- {
- remote_port = data_to_uint16 (remote_port_data);
- }
-
- if (remote_port == (uint16_t)-1)
- {
- gf_log (this->name, GF_LOG_ERROR,
- "option remote-port has invalid port in volume %s",
- this->name);
- ret = -1;
- goto err;
- }
-
- /* TODO: gf_resolve is a blocking call. kick in some
- non blocking dns techniques */
- ret = gf_resolve_ip6 (remote_host, remote_port,
- sockaddr->sa_family, &this->dnscache, &addr_info);
+ data_t *path_data = NULL;
+ struct sockaddr_un *addr = NULL;
+ int32_t ret = 0;
+
+ path_data = dict_get_sizen(this->options, "transport.socket.bind-path");
+ if (path_data) {
+ char *path = data_to_str(path_data);
+ if (!path || path_data->len > 108) { /* 108 = addr->sun_path length */
+ gf_log(this->name, GF_LOG_TRACE,
+ "bind-path not specified for unix socket, "
+ "letting connect to assign default value");
+ goto err;
+ }
+
+ addr = (struct sockaddr_un *)sockaddr;
+ strcpy(addr->sun_path, path);
+ ret = bind(sock, (struct sockaddr *)addr, sockaddr_len);
if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "DNS resolution failed on host %s", remote_host);
- goto err;
+ gf_log(this->name, GF_LOG_ERROR,
+ "cannot bind to unix-domain socket %d (%s)", sock,
+ strerror(errno));
+ goto err;
}
-
- memcpy (sockaddr, addr_info->ai_addr, addr_info->ai_addrlen);
- *sockaddr_len = addr_info->ai_addrlen;
+ } else {
+ gf_log(this->name, GF_LOG_TRACE,
+ "bind-path not specified for unix socket, "
+ "letting connect to assign default value");
+ }
err:
- return ret;
+ return ret;
}
static int32_t
-af_unix_client_get_remote_sockaddr (rpc_transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t *sockaddr_len)
+client_fill_address_family(rpc_transport_t *this, sa_family_t *sa_family)
{
- struct sockaddr_un *sockaddr_un = NULL;
- char *connect_path = NULL;
- data_t *connect_path_data = NULL;
- int32_t ret = 0;
-
- connect_path_data = dict_get (this->options,
- "transport.socket.connect-path");
- if (!connect_path_data) {
- gf_log (this->name, GF_LOG_ERROR,
- "option transport.unix.connect-path not specified for "
- "address-family unix");
- ret = -1;
- goto err;
+ data_t *address_family_data = NULL;
+ int32_t ret = -1;
+
+ if (sa_family == NULL) {
+ gf_log_callingfn("", GF_LOG_WARNING, "sa_family argument is NULL");
+ goto out;
+ }
+
+ address_family_data = dict_get_sizen(this->options,
+ "transport.address-family");
+ if (!address_family_data) {
+ data_t *remote_host_data = NULL, *connect_path_data = NULL;
+ remote_host_data = dict_get_sizen(this->options, "remote-host");
+ connect_path_data = dict_get_sizen(this->options,
+ "transport.socket.connect-path");
+
+ if (!(remote_host_data || connect_path_data) ||
+ (remote_host_data && connect_path_data)) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "transport.address-family not specified. "
+ "Could not guess default value from (remote-host:%s or "
+ "transport.unix.connect-path:%s) options",
+ data_to_str(remote_host_data),
+ data_to_str(connect_path_data));
+ *sa_family = AF_UNSPEC;
+ goto out;
+ }
+
+ if (remote_host_data) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "address-family not specified, marking it as unspec "
+ "for getaddrinfo to resolve from (remote-host: %s)",
+ data_to_str(remote_host_data));
+ *sa_family = AF_UNSPEC;
+ } else {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "address-family not specified, guessing it "
+ "to be unix from (transport.unix.connect-path: %s)",
+ data_to_str(connect_path_data));
+ *sa_family = AF_UNIX;
+ }
+
+ } else {
+ const char *address_family = data_to_str(address_family_data);
+ if (!strcasecmp(address_family, "unix")) {
+ *sa_family = AF_UNIX;
+ } else if (!strcasecmp(address_family, "inet")) {
+ *sa_family = AF_INET;
+ } else if (!strcasecmp(address_family, "inet6")) {
+ *sa_family = AF_INET6;
+ } else if (!strcasecmp(address_family, "inet-sdp")) {
+ *sa_family = AF_INET_SDP;
+ } else {
+ gf_log(this->name, GF_LOG_ERROR,
+ "unknown address-family (%s) specified", address_family);
+ *sa_family = AF_UNSPEC;
+ goto out;
}
+ }
- connect_path = data_to_str (connect_path_data);
- if (!connect_path) {
- gf_log (this->name, GF_LOG_ERROR,
- "transport.unix.connect-path is null-string");
- ret = -1;
- goto err;
- }
+ ret = 0;
- if (strlen (connect_path) > UNIX_PATH_MAX) {
- gf_log (this->name, GF_LOG_ERROR,
- "connect-path value length %"GF_PRI_SIZET" > %d octets",
- strlen (connect_path), UNIX_PATH_MAX);
- ret = -1;
- goto err;
- }
+out:
+ return ret;
+}
- gf_log (this->name, GF_LOG_TRACE,
- "using connect-path %s", connect_path);
- sockaddr_un = (struct sockaddr_un *)sockaddr;
- strcpy (sockaddr_un->sun_path, connect_path);
- *sockaddr_len = sizeof (struct sockaddr_un);
+static int32_t
+af_inet_client_get_remote_sockaddr(rpc_transport_t *this,
+ struct sockaddr *sockaddr,
+ socklen_t *sockaddr_len)
+{
+ dict_t *options = this->options;
+ data_t *remote_host_data = NULL;
+ data_t *remote_port_data = NULL;
+ char *remote_host = NULL;
+ uint16_t remote_port = GF_DEFAULT_SOCKET_LISTEN_PORT;
+ struct addrinfo *addr_info = NULL;
+ int32_t ret = 0;
+ struct in6_addr serveraddr;
+
+ remote_host_data = dict_get_sizen(options, "remote-host");
+ if (remote_host_data == NULL) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "option remote-host missing in volume %s", this->name);
+ ret = -1;
+ goto err;
+ }
+
+ remote_host = data_to_str(remote_host_data);
+ if (remote_host == NULL) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "option remote-host has data NULL in volume %s", this->name);
+ ret = -1;
+ goto err;
+ }
+
+ remote_port_data = dict_get_sizen(options, "remote-port");
+ if (remote_port_data == NULL) {
+ gf_log(this->name, GF_LOG_TRACE,
+ "option remote-port missing in volume %s. Defaulting to %d",
+ this->name, GF_DEFAULT_SOCKET_LISTEN_PORT);
+ } else {
+ remote_port = data_to_uint16(remote_port_data);
+ if (remote_port == (uint16_t)-1) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "option remote-port has invalid port in volume %s",
+ this->name);
+ ret = -1;
+ goto err;
+ }
+ }
+
+ /* Need to update transport-address family if address-family is not provided
+ to command-line arguments
+ */
+ if (inet_pton(AF_INET6, remote_host, &serveraddr)) {
+ sockaddr->sa_family = AF_INET6;
+ }
+
+ /* TODO: gf_resolve is a blocking call. kick in some
+ non blocking dns techniques */
+ ret = gf_resolve_ip6(remote_host, remote_port, sockaddr->sa_family,
+ &this->dnscache, &addr_info);
+ if (ret == -1) {
+ gf_log(this->name, GF_LOG_ERROR, "DNS resolution failed on host %s",
+ remote_host);
+ goto err;
+ }
+
+ memcpy(sockaddr, addr_info->ai_addr, addr_info->ai_addrlen);
+ *sockaddr_len = addr_info->ai_addrlen;
err:
- return ret;
+ return ret;
}
static int32_t
-af_unix_server_get_local_sockaddr (rpc_transport_t *this,
- struct sockaddr *addr,
- socklen_t *addr_len)
+af_unix_client_get_remote_sockaddr(rpc_transport_t *this,
+ struct sockaddr *sockaddr,
+ socklen_t *sockaddr_len)
{
- data_t *listen_path_data = NULL;
- char *listen_path = NULL;
- int32_t ret = 0;
- struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
-
-
- listen_path_data = dict_get (this->options,
- "transport.socket.listen-path");
- if (!listen_path_data) {
- gf_log (this->name, GF_LOG_ERROR,
- "missing option transport.socket.listen-path");
- ret = -1;
- goto err;
- }
+ struct sockaddr_un *sockaddr_un = NULL;
+ char *connect_path = NULL;
+ data_t *connect_path_data = NULL;
+ int32_t ret = -1;
+
+ connect_path_data = dict_get_sizen(this->options,
+ "transport.socket.connect-path");
+ if (!connect_path_data) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "option transport.unix.connect-path not specified for "
+ "address-family unix");
+ goto err;
+ }
+
+ /* 108 = sockaddr_un->sun_path length */
+ if ((connect_path_data->len + 1) > 108) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "connect-path value length %d > %d octets",
+ connect_path_data->len + 1, UNIX_PATH_MAX);
+ goto err;
+ }
+
+ connect_path = data_to_str(connect_path_data);
+ if (!connect_path) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "transport.unix.connect-path is null-string");
+ goto err;
+ }
+
+ gf_log(this->name, GF_LOG_TRACE, "using connect-path %s", connect_path);
+ sockaddr_un = (struct sockaddr_un *)sockaddr;
+ strcpy(sockaddr_un->sun_path, connect_path);
+ *sockaddr_len = sizeof(struct sockaddr_un);
+
+ ret = 0;
+err:
+ return ret;
+}
- listen_path = data_to_str (listen_path_data);
+static int32_t
+af_unix_server_get_local_sockaddr(rpc_transport_t *this, struct sockaddr *addr,
+ socklen_t *addr_len)
+{
+ data_t *listen_path_data = NULL;
+ char *listen_path = NULL;
+ int32_t ret = 0;
+ struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
+
+ listen_path_data = dict_get_sizen(this->options,
+ "transport.socket.listen-path");
+ if (!listen_path_data) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "missing option transport.socket.listen-path");
+ ret = -1;
+ goto err;
+ }
+
+ listen_path = data_to_str(listen_path_data);
#ifndef UNIX_PATH_MAX
#define UNIX_PATH_MAX 108
#endif
- if (strlen (listen_path) > UNIX_PATH_MAX) {
- gf_log (this->name, GF_LOG_ERROR,
- "option transport.unix.listen-path has value length "
- "%"GF_PRI_SIZET" > %d",
- strlen (listen_path), UNIX_PATH_MAX);
- ret = -1;
- goto err;
- }
+ if ((listen_path_data->len + 1) > UNIX_PATH_MAX) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "option transport.unix.listen-path has value length "
+ "%" GF_PRI_SIZET " > %d",
+ strlen(listen_path), UNIX_PATH_MAX);
+ ret = -1;
+ goto err;
+ }
- sunaddr->sun_family = AF_UNIX;
- strcpy (sunaddr->sun_path, listen_path);
- *addr_len = sizeof (struct sockaddr_un);
+ sunaddr->sun_family = AF_UNIX;
+ strcpy(sunaddr->sun_path, listen_path);
+ *addr_len = sizeof(struct sockaddr_un);
err:
- return ret;
+ return ret;
}
static int32_t
-af_inet_server_get_local_sockaddr (rpc_transport_t *this,
- struct sockaddr *addr,
- socklen_t *addr_len)
+af_inet_server_get_local_sockaddr(rpc_transport_t *this, struct sockaddr *addr,
+ socklen_t *addr_len)
{
- struct addrinfo hints, *res = 0, *rp = NULL;
- data_t *listen_port_data = NULL, *listen_host_data = NULL;
- uint16_t listen_port = -1;
- char service[NI_MAXSERV], *listen_host = NULL;
- dict_t *options = NULL;
- int32_t ret = 0;
-
- options = this->options;
-
- listen_port_data = dict_get (options, "transport.socket.listen-port");
- listen_host_data = dict_get (options, "transport.socket.bind-address");
-
- if (listen_port_data)
- {
- listen_port = data_to_uint16 (listen_port_data);
- }
-
- if (listen_port == (uint16_t) -1)
- listen_port = GF_DEFAULT_SOCKET_LISTEN_PORT;
-
-
- if (listen_host_data)
- {
- listen_host = data_to_str (listen_host_data);
+ struct addrinfo hints, *res = 0, *rp = NULL;
+ data_t *listen_port_data = NULL, *listen_host_data = NULL;
+ uint16_t listen_port = 0;
+ char service[NI_MAXSERV], *listen_host = NULL;
+ dict_t *options = NULL;
+ int32_t ret = 0;
+
+ /* initializes addr_len */
+ *addr_len = 0;
+
+ options = this->options;
+
+ listen_port_data = dict_get_sizen(options, "transport.socket.listen-port");
+ if (listen_port_data) {
+ listen_port = data_to_uint16(listen_port_data);
+ } else {
+ listen_port = GF_DEFAULT_SOCKET_LISTEN_PORT;
+ }
+
+ listen_host_data = dict_get_sizen(options, "transport.socket.bind-address");
+ if (listen_host_data) {
+ listen_host = data_to_str(listen_host_data);
+ } else {
+ if (addr->sa_family == AF_INET6) {
+ struct sockaddr_in6 *in = (struct sockaddr_in6 *)addr;
+ in->sin6_addr = in6addr_any;
+ in->sin6_port = htons(listen_port);
+ *addr_len = sizeof(struct sockaddr_in6);
+ goto out;
+ } else if (addr->sa_family == AF_INET) {
+ struct sockaddr_in *in = (struct sockaddr_in *)addr;
+ in->sin_addr.s_addr = htonl(INADDR_ANY);
+ in->sin_port = htons(listen_port);
+ *addr_len = sizeof(struct sockaddr_in);
+ goto out;
+ }
+ }
+
+ sprintf(service, "%d", listen_port);
+
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_family = addr->sa_family;
+ hints.ai_socktype = SOCK_STREAM;
+ hints.ai_flags = AI_PASSIVE;
+
+ ret = getaddrinfo(listen_host, service, &hints, &res);
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "getaddrinfo failed for host %s, service %s (%s)", listen_host,
+ service, gai_strerror(ret));
+ ret = -1;
+ goto out;
+ }
+ /* IPV6 server can handle both ipv4 and ipv6 clients */
+ for (rp = res; rp != NULL; rp = rp->ai_next) {
+ if (rp->ai_addr == NULL)
+ continue;
+ if (rp->ai_family == AF_INET6) {
+ memcpy(addr, rp->ai_addr, rp->ai_addrlen);
+ *addr_len = rp->ai_addrlen;
+ }
+ }
+
+ if (!(*addr_len)) {
+ if (res && res->ai_addr) {
+ memcpy(addr, res->ai_addr, res->ai_addrlen);
+ *addr_len = res->ai_addrlen;
} else {
- if (addr->sa_family == AF_INET6) {
- struct sockaddr_in6 *in = (struct sockaddr_in6 *) addr;
- in->sin6_addr = in6addr_any;
- in->sin6_port = htons(listen_port);
- *addr_len = sizeof(struct sockaddr_in6);
- goto out;
- } else if (addr->sa_family == AF_INET) {
- struct sockaddr_in *in = (struct sockaddr_in *) addr;
- in->sin_addr.s_addr = htonl(INADDR_ANY);
- in->sin_port = htons(listen_port);
- *addr_len = sizeof(struct sockaddr_in);
- goto out;
- }
+ ret = -1;
}
+ }
- memset (service, 0, sizeof (service));
- sprintf (service, "%d", listen_port);
-
- memset (&hints, 0, sizeof (hints));
- hints.ai_family = addr->sa_family;
- hints.ai_socktype = SOCK_STREAM;
- hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG;
-
- ret = getaddrinfo(listen_host, service, &hints, &res);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "getaddrinfo failed for host %s, service %s (%s)",
- listen_host, service, gai_strerror (ret));
- ret = -1;
- goto out;
- }
- /* IPV6 server can handle both ipv4 and ipv6 clients */
- for (rp = res; rp != NULL; rp = rp->ai_next) {
- if (rp->ai_addr == NULL)
- continue;
- if (rp->ai_family == AF_INET6) {
- memcpy (addr, rp->ai_addr, rp->ai_addrlen);
- *addr_len = rp->ai_addrlen;
- }
- }
-
- if (!(*addr_len)) {
- memcpy (addr, res->ai_addr, res->ai_addrlen);
- *addr_len = res->ai_addrlen;
- }
-
- freeaddrinfo (res);
+ freeaddrinfo(res);
out:
- return ret;
+ return ret;
}
int32_t
-client_bind (rpc_transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t *sockaddr_len,
- int sock)
+client_bind(rpc_transport_t *this, struct sockaddr *sockaddr,
+ socklen_t *sockaddr_len, int sock)
{
- int ret = 0;
+ int ret = 0;
- *sockaddr_len = sizeof (struct sockaddr_in6);
- switch (sockaddr->sa_family)
- {
+ *sockaddr_len = sizeof(struct sockaddr_in6);
+ switch (sockaddr->sa_family) {
case AF_INET_SDP:
case AF_INET:
- *sockaddr_len = sizeof (struct sockaddr_in);
-
+ *sockaddr_len = sizeof(struct sockaddr_in);
+ /* Fall through */
case AF_INET6:
- if (!this->bind_insecure) {
- ret = af_inet_bind_to_port_lt_ceiling (sock, sockaddr,
- *sockaddr_len, CLIENT_PORT_CEILING);
+ if (!this->bind_insecure) {
+ ret = af_inet_bind_to_port_lt_ceiling(
+ sock, sockaddr, *sockaddr_len, GF_CLIENT_PORT_CEILING);
+ if (ret == -1) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "cannot bind inet socket (%d) "
+ "to port less than %d (%s)",
+ sock, GF_CLIENT_PORT_CEILING, strerror(errno));
+ ret = 0;
}
+ } else {
+ ret = af_inet_bind_to_port_lt_ceiling(
+ sock, sockaddr, *sockaddr_len, GF_IANA_PRIV_PORTS_START);
if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "cannot bind inet socket (%d) to port less than %d (%s)",
- sock, CLIENT_PORT_CEILING, strerror (errno));
- ret = 0;
+ gf_log(this->name, GF_LOG_DEBUG,
+ "failed while binding to less than "
+ "%d (%s)",
+ GF_IANA_PRIV_PORTS_START, strerror(errno));
+ ret = 0;
}
- break;
+ }
+ break;
case AF_UNIX:
- *sockaddr_len = sizeof (struct sockaddr_un);
- ret = af_unix_client_bind (this, (struct sockaddr *)sockaddr,
- *sockaddr_len, sock);
- break;
+ *sockaddr_len = sizeof(struct sockaddr_un);
+ ret = af_unix_client_bind(this, (struct sockaddr *)sockaddr,
+ *sockaddr_len, sock);
+ break;
default:
- gf_log (this->name, GF_LOG_ERROR,
- "unknown address family %d", sockaddr->sa_family);
- ret = -1;
- break;
- }
+ gf_log(this->name, GF_LOG_ERROR, "unknown address family %d",
+ sockaddr->sa_family);
+ ret = -1;
+ break;
+ }
- return ret;
+ return ret;
}
int32_t
-socket_client_get_remote_sockaddr (rpc_transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t *sockaddr_len,
- sa_family_t *sa_family)
+socket_client_get_remote_sockaddr(rpc_transport_t *this,
+ struct sockaddr *sockaddr,
+ socklen_t *sockaddr_len,
+ sa_family_t *sa_family)
{
- int32_t ret = 0;
+ int32_t ret = 0;
- GF_VALIDATE_OR_GOTO ("socket", sockaddr, err);
- GF_VALIDATE_OR_GOTO ("socket", sockaddr_len, err);
- GF_VALIDATE_OR_GOTO ("socket", sa_family, err);
+ GF_VALIDATE_OR_GOTO("socket", sockaddr, err);
+ GF_VALIDATE_OR_GOTO("socket", sockaddr_len, err);
+ GF_VALIDATE_OR_GOTO("socket", sa_family, err);
- ret = client_fill_address_family (this, &sockaddr->sa_family);
- if (ret) {
- ret = -1;
- goto err;
- }
+ ret = client_fill_address_family(this, &sockaddr->sa_family);
+ if (ret) {
+ ret = -1;
+ goto err;
+ }
- *sa_family = sockaddr->sa_family;
+ *sa_family = sockaddr->sa_family;
- switch (sockaddr->sa_family)
- {
+ switch (sockaddr->sa_family) {
case AF_INET_SDP:
- sockaddr->sa_family = AF_INET;
-
+ sockaddr->sa_family = AF_INET;
+ /* Fall through */
case AF_INET:
case AF_INET6:
case AF_UNSPEC:
- ret = af_inet_client_get_remote_sockaddr (this, sockaddr,
- sockaddr_len);
- break;
+ ret = af_inet_client_get_remote_sockaddr(this, sockaddr,
+ sockaddr_len);
+ break;
case AF_UNIX:
- ret = af_unix_client_get_remote_sockaddr (this, sockaddr,
- sockaddr_len);
- break;
+ ret = af_unix_client_get_remote_sockaddr(this, sockaddr,
+ sockaddr_len);
+ break;
default:
- gf_log (this->name, GF_LOG_ERROR,
- "unknown address-family %d", sockaddr->sa_family);
- ret = -1;
- }
-
- if (*sa_family == AF_UNSPEC) {
- *sa_family = sockaddr->sa_family;
- }
+ gf_log(this->name, GF_LOG_ERROR, "unknown address-family %d",
+ sockaddr->sa_family);
+ ret = -1;
+ }
+
+ /* Address-family is updated based on remote_host in
+ af_inet_client_get_remote_sockaddr
+ */
+ if (*sa_family != sockaddr->sa_family) {
+ *sa_family = sockaddr->sa_family;
+ }
err:
- return ret;
+ return ret;
}
-
-int32_t
-server_fill_address_family (rpc_transport_t *this, sa_family_t *sa_family)
+static int32_t
+server_fill_address_family(rpc_transport_t *this, sa_family_t *sa_family)
{
- data_t *address_family_data = NULL;
- int32_t ret = -1;
-
- GF_VALIDATE_OR_GOTO ("socket", sa_family, out);
-
- address_family_data = dict_get (this->options,
- "transport.address-family");
- if (address_family_data) {
- char *address_family = NULL;
- address_family = data_to_str (address_family_data);
-
- if (!strcasecmp (address_family, "inet")) {
- *sa_family = AF_INET;
- } else if (!strcasecmp (address_family, "inet6")) {
- *sa_family = AF_INET6;
- } else if (!strcasecmp (address_family, "inet-sdp")) {
- *sa_family = AF_INET_SDP;
- } else if (!strcasecmp (address_family, "unix")) {
- *sa_family = AF_UNIX;
- } else if (!strcasecmp (address_family, "inet/inet6")
- || !strcasecmp (address_family, "inet6/inet")) {
- *sa_family = AF_UNSPEC;
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "unknown address family (%s) specified", address_family);
- goto out;
- }
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "option address-family not specified, defaulting to inet/inet6");
- *sa_family = AF_UNSPEC;
- }
+ data_t *address_family_data = NULL;
+ int32_t ret = -1;
- ret = 0;
+#ifdef IPV6_DEFAULT
+ const char *addr_family = "inet6";
+ sa_family_t default_family = AF_INET6;
+#else
+ const char *addr_family = "inet";
+ sa_family_t default_family = AF_INET;
+#endif
+
+ GF_VALIDATE_OR_GOTO("socket", sa_family, out);
+
+ address_family_data = dict_get_sizen(this->options,
+ "transport.address-family");
+ if (address_family_data) {
+ char *address_family = NULL;
+ address_family = data_to_str(address_family_data);
+
+ if (!strcasecmp(address_family, "inet")) {
+ *sa_family = AF_INET;
+ } else if (!strcasecmp(address_family, "inet6")) {
+ *sa_family = AF_INET6;
+ } else if (!strcasecmp(address_family, "inet-sdp")) {
+ *sa_family = AF_INET_SDP;
+ } else if (!strcasecmp(address_family, "unix")) {
+ *sa_family = AF_UNIX;
+ } else {
+ gf_log(this->name, GF_LOG_ERROR,
+ "unknown address family (%s) specified", address_family);
+ *sa_family = AF_UNSPEC;
+ goto out;
+ }
+ } else {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "option address-family not specified, "
+ "defaulting to %s",
+ addr_family);
+ *sa_family = default_family;
+ }
+
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
int32_t
-socket_server_get_local_sockaddr (rpc_transport_t *this, struct sockaddr *addr,
- socklen_t *addr_len, sa_family_t *sa_family)
+socket_server_get_local_sockaddr(rpc_transport_t *this, struct sockaddr *addr,
+ socklen_t *addr_len, sa_family_t *sa_family)
{
- int32_t ret = -1;
+ int32_t ret = -1;
- GF_VALIDATE_OR_GOTO ("socket", sa_family, err);
- GF_VALIDATE_OR_GOTO ("socket", addr, err);
- GF_VALIDATE_OR_GOTO ("socket", addr_len, err);
+ GF_VALIDATE_OR_GOTO("socket", sa_family, err);
+ GF_VALIDATE_OR_GOTO("socket", addr, err);
+ GF_VALIDATE_OR_GOTO("socket", addr_len, err);
- ret = server_fill_address_family (this, &addr->sa_family);
- if (ret == -1) {
- goto err;
- }
+ ret = server_fill_address_family(this, &addr->sa_family);
+ if (ret == -1) {
+ goto err;
+ }
- *sa_family = addr->sa_family;
+ *sa_family = addr->sa_family;
- switch (addr->sa_family)
- {
+ switch (addr->sa_family) {
case AF_INET_SDP:
- addr->sa_family = AF_INET;
-
+ addr->sa_family = AF_INET;
+ /* Fall through */
case AF_INET:
case AF_INET6:
case AF_UNSPEC:
- ret = af_inet_server_get_local_sockaddr (this, addr, addr_len);
- break;
+ ret = af_inet_server_get_local_sockaddr(this, addr, addr_len);
+ break;
case AF_UNIX:
- ret = af_unix_server_get_local_sockaddr (this, addr, addr_len);
- break;
- }
+ ret = af_unix_server_get_local_sockaddr(this, addr, addr_len);
+ break;
+ }
- if (*sa_family == AF_UNSPEC) {
- *sa_family = addr->sa_family;
- }
+ if (*sa_family == AF_UNSPEC) {
+ *sa_family = addr->sa_family;
+ }
err:
- return ret;
+ return ret;
}
-int32_t
-fill_inet6_inet_identifiers (rpc_transport_t *this, struct sockaddr_storage *addr,
- int32_t addr_len, char *identifier)
+static int32_t
+fill_inet6_inet_identifiers(rpc_transport_t *this,
+ struct sockaddr_storage *addr, int32_t addr_len,
+ char *identifier)
{
- union gf_sock_union sock_union;
-
- char service[NI_MAXSERV] = {0,};
- char host[NI_MAXHOST] = {0,};
- int32_t ret = 0;
- int32_t tmpaddr_len = 0;
- int32_t one_to_four = 0;
- int32_t four_to_eight = 0;
- int32_t twelve_to_sixteen = 0;
- int16_t eight_to_ten = 0;
- int16_t ten_to_twelve = 0;
-
- memset (&sock_union, 0, sizeof (sock_union));
- sock_union.storage = *addr;
- tmpaddr_len = addr_len;
-
- if (sock_union.sa.sa_family == AF_INET6) {
- one_to_four = sock_union.sin6.sin6_addr.s6_addr32[0];
- four_to_eight = sock_union.sin6.sin6_addr.s6_addr32[1];
+ union gf_sock_union sock_union;
+
+ char service[NI_MAXSERV] = {
+ 0,
+ };
+ char host[NI_MAXHOST] = {
+ 0,
+ };
+ int32_t ret = 0;
+ int32_t tmpaddr_len = 0;
+ int32_t one_to_four = 0;
+ int32_t four_to_eight = 0;
+ int32_t twelve_to_sixteen = 0;
+ int16_t eight_to_ten = 0;
+ int16_t ten_to_twelve = 0;
+
+ memset(&sock_union, 0, sizeof(sock_union));
+ sock_union.storage = *addr;
+ tmpaddr_len = addr_len;
+
+ if (sock_union.sa.sa_family == AF_INET6) {
+ one_to_four = sock_union.sin6.sin6_addr.s6_addr32[0];
+ four_to_eight = sock_union.sin6.sin6_addr.s6_addr32[1];
#ifdef GF_SOLARIS_HOST_OS
- eight_to_ten = S6_ADDR16(sock_union.sin6.sin6_addr)[4];
+ eight_to_ten = S6_ADDR16(sock_union.sin6.sin6_addr)[4];
#else
- eight_to_ten = sock_union.sin6.sin6_addr.s6_addr16[4];
+ eight_to_ten = sock_union.sin6.sin6_addr.s6_addr16[4];
#endif
#ifdef GF_SOLARIS_HOST_OS
- ten_to_twelve = S6_ADDR16(sock_union.sin6.sin6_addr)[5];
+ ten_to_twelve = S6_ADDR16(sock_union.sin6.sin6_addr)[5];
#else
- ten_to_twelve = sock_union.sin6.sin6_addr.s6_addr16[5];
+ ten_to_twelve = sock_union.sin6.sin6_addr.s6_addr16[5];
#endif
- twelve_to_sixteen = sock_union.sin6.sin6_addr.s6_addr32[3];
-
- /* ipv4 mapped ipv6 address has
- bits 0-80: 0
- bits 80-96: 0xffff
- bits 96-128: ipv4 address
- */
-
- if (one_to_four == 0 &&
- four_to_eight == 0 &&
- eight_to_ten == 0 &&
- ten_to_twelve == -1) {
- struct sockaddr_in *in_ptr = &sock_union.sin;
- memset (&sock_union, 0, sizeof (sock_union));
-
- in_ptr->sin_family = AF_INET;
- in_ptr->sin_port = ((struct sockaddr_in6 *)addr)->sin6_port;
- in_ptr->sin_addr.s_addr = twelve_to_sixteen;
- tmpaddr_len = sizeof (*in_ptr);
- }
- }
+ twelve_to_sixteen = sock_union.sin6.sin6_addr.s6_addr32[3];
+
+ /* ipv4 mapped ipv6 address has
+ bits 0-80: 0
+ bits 80-96: 0xffff
+ bits 96-128: ipv4 address
+ */
- ret = getnameinfo (&sock_union.sa,
- tmpaddr_len,
- host, sizeof (host),
- service, sizeof (service),
- NI_NUMERICHOST | NI_NUMERICSERV);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "getnameinfo failed (%s)", gai_strerror (ret));
+ if (one_to_four == 0 && four_to_eight == 0 && eight_to_ten == 0 &&
+ ten_to_twelve == -1) {
+ struct sockaddr_in *in_ptr = &sock_union.sin;
+ memset(&sock_union, 0, sizeof(sock_union));
+
+ in_ptr->sin_family = AF_INET;
+ in_ptr->sin_port = ((struct sockaddr_in6 *)addr)->sin6_port;
+ in_ptr->sin_addr.s_addr = twelve_to_sixteen;
+ tmpaddr_len = sizeof(*in_ptr);
}
+ }
+
+ ret = getnameinfo(&sock_union.sa, tmpaddr_len, host, sizeof(host), service,
+ sizeof(service), NI_NUMERICHOST | NI_NUMERICSERV);
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_ERROR, "getnameinfo failed (%s)",
+ gai_strerror(ret));
+ }
- sprintf (identifier, "%s:%s", host, service);
+ sprintf(identifier, "%s:%s", host, service);
- return ret;
+ return ret;
}
int32_t
-get_transport_identifiers (rpc_transport_t *this)
+get_transport_identifiers(rpc_transport_t *this)
{
- int32_t ret = 0;
- char is_inet_sdp = 0;
+ int32_t ret = 0;
+ char is_inet_sdp = 0;
- switch (((struct sockaddr *) &this->myinfo.sockaddr)->sa_family)
- {
+ switch (((struct sockaddr *)&this->myinfo.sockaddr)->sa_family) {
case AF_INET_SDP:
- is_inet_sdp = 1;
- ((struct sockaddr *) &this->peerinfo.sockaddr)->sa_family = ((struct sockaddr *) &this->myinfo.sockaddr)->sa_family = AF_INET;
-
+ is_inet_sdp = 1;
+ ((struct sockaddr *)&this->peerinfo.sockaddr)
+ ->sa_family = ((struct sockaddr *)&this->myinfo.sockaddr)
+ ->sa_family = AF_INET;
+ /* Fall through */
case AF_INET:
- case AF_INET6:
- {
- ret = fill_inet6_inet_identifiers (this,
- &this->myinfo.sockaddr,
- this->myinfo.sockaddr_len,
- this->myinfo.identifier);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "cannot fill inet/inet6 identifier for server");
- goto err;
- }
-
- ret = fill_inet6_inet_identifiers (this,
- &this->peerinfo.sockaddr,
- this->peerinfo.sockaddr_len,
- this->peerinfo.identifier);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "cannot fill inet/inet6 identifier for client");
- goto err;
- }
+ case AF_INET6: {
+ ret = fill_inet6_inet_identifiers(this, &this->myinfo.sockaddr,
+ this->myinfo.sockaddr_len,
+ this->myinfo.identifier);
+ if (ret == -1) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "cannot fill inet/inet6 identifier for server");
+ goto err;
+ }
+
+ ret = fill_inet6_inet_identifiers(this, &this->peerinfo.sockaddr,
+ this->peerinfo.sockaddr_len,
+ this->peerinfo.identifier);
+ if (ret == -1) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "cannot fill inet/inet6 identifier for client");
+ goto err;
+ }
- if (is_inet_sdp) {
- ((struct sockaddr *) &this->peerinfo.sockaddr)->sa_family = ((struct sockaddr *) &this->myinfo.sockaddr)->sa_family = AF_INET_SDP;
- }
- }
- break;
+ if (is_inet_sdp) {
+ ((struct sockaddr *)&this->peerinfo.sockaddr)
+ ->sa_family = ((struct sockaddr *)&this->myinfo.sockaddr)
+ ->sa_family = AF_INET_SDP;
+ }
+ } break;
- case AF_UNIX:
- {
- struct sockaddr_un *sunaddr = NULL;
+ case AF_UNIX: {
+ struct sockaddr_un *sunaddr = NULL;
- sunaddr = (struct sockaddr_un *) &this->myinfo.sockaddr;
- strcpy (this->myinfo.identifier, sunaddr->sun_path);
+ sunaddr = (struct sockaddr_un *)&this->myinfo.sockaddr;
+ strcpy(this->myinfo.identifier, sunaddr->sun_path);
- sunaddr = (struct sockaddr_un *) &this->peerinfo.sockaddr;
- strcpy (this->peerinfo.identifier, sunaddr->sun_path);
- }
- break;
+ sunaddr = (struct sockaddr_un *)&this->peerinfo.sockaddr;
+ strcpy(this->peerinfo.identifier, sunaddr->sun_path);
+ } break;
default:
- gf_log (this->name, GF_LOG_ERROR,
- "unknown address family (%d)",
- ((struct sockaddr *) &this->myinfo.sockaddr)->sa_family);
- ret = -1;
- break;
- }
+ gf_log(this->name, GF_LOG_ERROR, "unknown address family (%d)",
+ ((struct sockaddr *)&this->myinfo.sockaddr)->sa_family);
+ ret = -1;
+ break;
+ }
err:
- return ret;
+ return ret;
}
diff --git a/rpc/rpc-transport/socket/src/name.h b/rpc/rpc-transport/socket/src/name.h
index 0a13d8a9624..080c7588f5a 100644
--- a/rpc/rpc-transport/socket/src/name.h
+++ b/rpc/rpc-transport/socket/src/name.h
@@ -11,25 +11,23 @@
#ifndef _SOCKET_NAME_H
#define _SOCKET_NAME_H
-#include "compat.h"
+#include <glusterfs/compat.h>
int32_t
-client_bind (rpc_transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t *sockaddr_len,
- int sock);
+client_bind(rpc_transport_t *this, struct sockaddr *sockaddr,
+ socklen_t *sockaddr_len, int sock);
int32_t
-socket_client_get_remote_sockaddr (rpc_transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t *sockaddr_len,
- sa_family_t *sa_family);
+socket_client_get_remote_sockaddr(rpc_transport_t *this,
+ struct sockaddr *sockaddr,
+ socklen_t *sockaddr_len,
+ sa_family_t *sa_family);
int32_t
-socket_server_get_local_sockaddr (rpc_transport_t *this, struct sockaddr *addr,
- socklen_t *addr_len, sa_family_t *sa_family);
+socket_server_get_local_sockaddr(rpc_transport_t *this, struct sockaddr *addr,
+ socklen_t *addr_len, sa_family_t *sa_family);
int32_t
-get_transport_identifiers (rpc_transport_t *this);
+get_transport_identifiers(rpc_transport_t *this);
#endif /* _SOCKET_NAME_H */
diff --git a/rpc/rpc-transport/socket/src/socket-mem-types.h b/rpc/rpc-transport/socket/src/socket-mem-types.h
new file mode 100644
index 00000000000..241ce67f670
--- /dev/null
+++ b/rpc/rpc-transport/socket/src/socket-mem-types.h
@@ -0,0 +1,22 @@
+/*
+ Copyright (c) 2008-2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __SOCKET_MEM_TYPES_H__
+#define __SOCKET_MEM_TYPES_H__
+
+#include <glusterfs/mem-types.h>
+
+typedef enum gf_sock_mem_types_ {
+ gf_sock_connect_error_state_t = gf_common_mt_end + 1,
+ gf_sock_mt_lock_array,
+ gf_sock_mt_end
+} gf_sock_mem_types_t;
+
+#endif
diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c
index 6c2d909e43a..ed8b473be23 100644
--- a/rpc/rpc-transport/socket/src/socket.c
+++ b/rpc/rpc-transport/socket/src/socket.c
@@ -8,792 +8,1463 @@
cases as published by the Free Software Foundation.
*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "socket.h"
#include "name.h"
-#include "dict.h"
-#include "rpc-transport.h"
-#include "logging.h"
-#include "xlator.h"
-#include "byte-order.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-
+#include <glusterfs/dict.h>
+#include <glusterfs/syscall.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/compat-errno.h>
+#include "socket-mem-types.h"
/* ugly #includes below */
#include "protocol-common.h"
#include "glusterfs3-xdr.h"
-#include "xdr-nfs3.h"
+#include "glusterfs4-xdr.h"
#include "rpcsvc.h"
-#include <fcntl.h>
-#include <errno.h>
+/* for TCP_USER_TIMEOUT */
+#if !defined(TCP_USER_TIMEOUT) && defined(GF_LINUX_HOST_OS)
+#include <linux/tcp.h>
+#else
#include <netinet/tcp.h>
+#endif
+
+#include <errno.h>
#include <rpc/xdr.h>
+#include <sys/ioctl.h>
#define GF_LOG_ERRNO(errno) ((errno == ENOTCONN) ? GF_LOG_DEBUG : GF_LOG_ERROR)
#define SA(ptr) ((struct sockaddr *)ptr)
+#define SSL_ENABLED_OPT "transport.socket.ssl-enabled"
+#define SSL_OWN_CERT_OPT "transport.socket.ssl-own-cert"
+#define SSL_PRIVATE_KEY_OPT "transport.socket.ssl-private-key"
+#define SSL_CA_LIST_OPT "transport.socket.ssl-ca-list"
+#define SSL_CERT_DEPTH_OPT "transport.socket.ssl-cert-depth"
+#define SSL_CIPHER_LIST_OPT "transport.socket.ssl-cipher-list"
+#define SSL_DH_PARAM_OPT "transport.socket.ssl-dh-param"
+#define SSL_EC_CURVE_OPT "transport.socket.ssl-ec-curve"
+#define SSL_CRL_PATH_OPT "transport.socket.ssl-crl-path"
+#define OWN_THREAD_OPT "transport.socket.own-thread"
+
+/* TBD: do automake substitutions etc. (ick) to set these. */
+#if !defined(DEFAULT_ETC_SSL)
+#ifdef GF_LINUX_HOST_OS
+#define DEFAULT_ETC_SSL "/etc/ssl"
+#endif
+#ifdef GF_BSD_HOST_OS
+#define DEFAULT_ETC_SSL "/etc/openssl"
+#endif
+#ifdef GF_DARWIN_HOST_OS
+#define DEFAULT_ETC_SSL "/usr/local/etc/openssl"
+#endif
+#if !defined(DEFAULT_ETC_SSL)
+#define DEFAULT_ETC_SSL "/etc/ssl"
+#endif
+#endif
-#define __socket_proto_reset_pending(priv) do { \
- memset (&priv->incoming.frag.vector, 0, \
- sizeof (priv->incoming.frag.vector)); \
- priv->incoming.frag.pending_vector = \
- &priv->incoming.frag.vector; \
- priv->incoming.frag.pending_vector->iov_base = \
- priv->incoming.frag.fragcurrent; \
- priv->incoming.pending_vector = \
- priv->incoming.frag.pending_vector; \
- } while (0);
-
-
-#define __socket_proto_update_pending(priv) \
- do { \
- uint32_t remaining_fragsize = 0; \
- if (priv->incoming.frag.pending_vector->iov_len == 0) { \
- remaining_fragsize = RPC_FRAGSIZE (priv->incoming.fraghdr) \
- - priv->incoming.frag.bytes_read; \
- \
- priv->incoming.frag.pending_vector->iov_len = \
- remaining_fragsize > priv->incoming.frag.remaining_size \
- ? priv->incoming.frag.remaining_size : remaining_fragsize; \
- \
- priv->incoming.frag.remaining_size -= \
- priv->incoming.frag.pending_vector->iov_len; \
- } \
- } while (0);
-
-#define __socket_proto_update_priv_after_read(priv, ret, bytes_read) \
- { \
- priv->incoming.frag.fragcurrent += bytes_read; \
- priv->incoming.frag.bytes_read += bytes_read; \
- \
- if ((ret > 0) || (priv->incoming.frag.remaining_size != 0)) { \
- if (priv->incoming.frag.remaining_size != 0 && ret == 0) { \
- __socket_proto_reset_pending (priv); \
- } \
- \
- gf_log (this->name, GF_LOG_TRACE, "partial read on non-blocking socket"); \
- \
- break; \
- } \
- }
-
-#define __socket_proto_init_pending(priv, size) \
- do { \
- uint32_t remaining_fragsize = 0; \
- remaining_fragsize = RPC_FRAGSIZE (priv->incoming.fraghdr) \
- - priv->incoming.frag.bytes_read; \
- \
- __socket_proto_reset_pending (priv); \
- \
- priv->incoming.frag.pending_vector->iov_len = \
- remaining_fragsize > size ? size : remaining_fragsize; \
- \
- priv->incoming.frag.remaining_size = \
- size - priv->incoming.frag.pending_vector->iov_len; \
- \
- } while (0);
-
+#if !defined(DEFAULT_CERT_PATH)
+#define DEFAULT_CERT_PATH DEFAULT_ETC_SSL "/glusterfs.pem"
+#endif
+#if !defined(DEFAULT_KEY_PATH)
+#define DEFAULT_KEY_PATH DEFAULT_ETC_SSL "/glusterfs.key"
+#endif
+#if !defined(DEFAULT_CA_PATH)
+#define DEFAULT_CA_PATH DEFAULT_ETC_SSL "/glusterfs.ca"
+#endif
+#if !defined(DEFAULT_VERIFY_DEPTH)
+#define DEFAULT_VERIFY_DEPTH 1
+#endif
+#define DEFAULT_CIPHER_LIST "EECDH:EDH:HIGH:!3DES:!RC4:!DES:!MD5:!aNULL:!eNULL"
+#define DEFAULT_DH_PARAM DEFAULT_ETC_SSL "/dhparam.pem"
+#define DEFAULT_EC_CURVE "prime256v1"
+
+#define POLL_MASK_INPUT (POLLIN | POLLPRI)
+#define POLL_MASK_OUTPUT (POLLOUT)
+#define POLL_MASK_ERROR (POLLERR | POLLHUP | POLLNVAL)
+
+typedef int
+SSL_unary_func(SSL *);
+typedef int
+SSL_trinary_func(SSL *, void *, int);
+static int
+ssl_setup_connection_params(rpc_transport_t *this);
+
+#define __socket_proto_reset_pending(priv) \
+ do { \
+ struct gf_sock_incoming_frag *frag; \
+ frag = &priv->incoming.frag; \
+ \
+ memset(&frag->vector, 0, sizeof(frag->vector)); \
+ frag->pending_vector = &frag->vector; \
+ frag->pending_vector->iov_base = frag->fragcurrent; \
+ priv->incoming.pending_vector = frag->pending_vector; \
+ } while (0)
+
+#define __socket_proto_update_pending(priv) \
+ do { \
+ uint32_t remaining; \
+ struct gf_sock_incoming_frag *frag; \
+ frag = &priv->incoming.frag; \
+ if (frag->pending_vector->iov_len == 0) { \
+ remaining = (RPC_FRAGSIZE(priv->incoming.fraghdr) - \
+ frag->bytes_read); \
+ \
+ frag->pending_vector->iov_len = (remaining > frag->remaining_size) \
+ ? frag->remaining_size \
+ : remaining; \
+ \
+ frag->remaining_size -= frag->pending_vector->iov_len; \
+ } \
+ } while (0)
+
+#define __socket_proto_update_priv_after_read(priv, ret, bytes_read) \
+ { \
+ struct gf_sock_incoming_frag *frag; \
+ frag = &priv->incoming.frag; \
+ \
+ frag->fragcurrent += bytes_read; \
+ frag->bytes_read += bytes_read; \
+ \
+ if ((ret > 0) || (frag->remaining_size != 0)) { \
+ if (frag->remaining_size != 0 && ret == 0) { \
+ __socket_proto_reset_pending(priv); \
+ } \
+ \
+ gf_log(this->name, GF_LOG_TRACE, \
+ "partial read on non-blocking socket"); \
+ ret = 0; \
+ break; \
+ } \
+ }
+
+#define __socket_proto_init_pending(priv, size) \
+ do { \
+ uint32_t remaining = 0; \
+ struct gf_sock_incoming_frag *frag; \
+ frag = &priv->incoming.frag; \
+ \
+ remaining = (RPC_FRAGSIZE(priv->incoming.fraghdr) - frag->bytes_read); \
+ \
+ __socket_proto_reset_pending(priv); \
+ \
+ frag->pending_vector->iov_len = (remaining > size) ? size : remaining; \
+ \
+ frag->remaining_size = (size - frag->pending_vector->iov_len); \
+ \
+ } while (0)
/* This will be used in a switch case and breaks from the switch case if all
* the pending data is not read.
*/
-#define __socket_proto_read(priv, ret) \
- { \
- size_t bytes_read = 0; \
- \
- __socket_proto_update_pending (priv); \
- \
- ret = __socket_readv (this, \
- priv->incoming.pending_vector, 1, \
- &priv->incoming.pending_vector, \
- &priv->incoming.pending_count, \
- &bytes_read); \
- if (ret == -1) { \
- gf_log (this->name, GF_LOG_WARNING, \
- "reading from socket failed. Error (%s), " \
- "peer (%s)", strerror (errno), \
- this->peerinfo.identifier); \
- break; \
- } \
- __socket_proto_update_priv_after_read (priv, ret, bytes_read); \
- }
-
-
-int socket_init (rpc_transport_t *this);
+#define __socket_proto_read(priv, ret) \
+ { \
+ size_t bytes_read = 0; \
+ struct gf_sock_incoming *in; \
+ in = &priv->incoming; \
+ \
+ __socket_proto_update_pending(priv); \
+ \
+ ret = __socket_readv(this, in->pending_vector, 1, &in->pending_vector, \
+ &in->pending_count, &bytes_read); \
+ if (ret < 0) \
+ break; \
+ __socket_proto_update_priv_after_read(priv, ret, bytes_read); \
+ }
+
+struct socket_connect_error_state_ {
+ xlator_t *this;
+ rpc_transport_t *trans;
+ gf_boolean_t refd;
+};
+typedef struct socket_connect_error_state_ socket_connect_error_state_t;
-/*
- * return value:
- * 0 = success (completed)
- * -1 = error
- * > 0 = incomplete
- */
+static int
+socket_init(rpc_transport_t *this);
+static int
+__socket_nonblock(int fd);
-int
-__socket_rwv (rpc_transport_t *this, struct iovec *vector, int count,
- struct iovec **pending_vector, int *pending_count, size_t *bytes,
- int write)
+static void
+socket_dump_info(struct sockaddr *sa, int is_server, int is_ssl, int sock,
+ char *log_domain, char *log_label)
{
- socket_private_t *priv = NULL;
- int sock = -1;
- int ret = -1;
- struct iovec *opvector = NULL;
- int opcount = 0;
- int moved = 0;
+ char addr_buf[INET6_ADDRSTRLEN + 1] = {
+ 0,
+ };
+ char *addr = NULL;
+ const char *peer_type = NULL;
+ int af = sa->sa_family;
+ int so_error = -1;
+ socklen_t slen = sizeof(so_error);
+
+ if (af == AF_UNIX) {
+ addr = ((struct sockaddr_un *)(sa))->sun_path;
+ } else {
+ if (af == AF_INET6) {
+ struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)(sa);
+
+ inet_ntop(af, &sin6->sin6_addr, addr_buf, sizeof(addr_buf));
+ addr = addr_buf;
+ } else {
+ struct sockaddr_in *sin = (struct sockaddr_in *)(sa);
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", this->private, out);
+ inet_ntop(af, &sin->sin_addr, addr_buf, sizeof(addr_buf));
+ addr = addr_buf;
+ }
+ }
+ if (is_server)
+ peer_type = "server";
+ else
+ peer_type = "client";
+
+ (void)getsockopt(sock, SOL_SOCKET, SO_ERROR, &so_error, &slen);
+
+ gf_log(log_domain, GF_LOG_TRACE,
+ "$$$ %s: %s (af:%d,sock:%d) %s %s (errno:%d:%s)", peer_type,
+ log_label, af, sock, addr, (is_ssl ? "SSL" : "non-SSL"), so_error,
+ strerror(so_error));
+}
- priv = this->private;
- sock = priv->sock;
+static void
+ssl_dump_error_stack(const char *caller)
+{
+ unsigned long errnum = 0;
+ char errbuf[120] = {
+ 0,
+ };
- opvector = vector;
- opcount = count;
+ /* OpenSSL docs explicitly give 120 as the error-string length. */
- if (bytes != NULL) {
- *bytes = 0;
+ while ((errnum = ERR_get_error())) {
+ ERR_error_string(errnum, errbuf);
+ gf_log(caller, GF_LOG_ERROR, " %s", errbuf);
+ }
+}
+
+static int
+ssl_do(rpc_transport_t *this, void *buf, size_t len, SSL_trinary_func *func)
+{
+ int r = (-1);
+ socket_private_t *priv = NULL;
+
+ priv = this->private;
+
+ if (buf) {
+ if (priv->connected == -1) {
+ /*
+ * Fields in the SSL structure (especially
+ * the BIO pointers) are not valid at this
+ * point, so we'll segfault if we pass them
+ * to SSL_read/SSL_write.
+ */
+ gf_log(this->name, GF_LOG_INFO, "lost connection in %s", __func__);
+ return -1;
}
+ r = func(priv->ssl_ssl, buf, len);
+ } else {
+ /* This should be treated as error */
+ gf_log(this->name, GF_LOG_ERROR, "buffer is empty %s", __func__);
+ goto out;
+ }
+ switch (SSL_get_error(priv->ssl_ssl, r)) {
+ case SSL_ERROR_NONE:
+ /* fall through */
+ case SSL_ERROR_WANT_READ:
+ /* fall through */
+ case SSL_ERROR_WANT_WRITE:
+ errno = EAGAIN;
+ return r;
+
+ case SSL_ERROR_SYSCALL:
+ /* Sometimes SSL_ERROR_SYSCALL returns errno as
+ * EAGAIN. In such a case we should reattempt operation
+ * So, for now, just return the return value and the
+ * errno as is.
+ */
+ gf_log(this->name, GF_LOG_DEBUG,
+ "syscall error (probably remote disconnect) "
+ "errno:%d:%s",
+ errno, strerror(errno));
+ return r;
+ default:
+ errno = EIO;
+ goto out; /* "break" would just loop again */
+ }
+out:
+ return -1;
+}
- while (opcount) {
- if (write) {
- ret = writev (sock, opvector, opcount);
+#define ssl_read_one(t, b, l) \
+ ssl_do((t), (b), (l), (SSL_trinary_func *)SSL_read)
+#define ssl_write_one(t, b, l) \
+ ssl_do((t), (b), (l), (SSL_trinary_func *)SSL_write)
+
+/* set crl verify flags only for server */
+/* see man X509_VERIFY_PARAM_SET_FLAGS(3)
+ * X509_V_FLAG_CRL_CHECK enables CRL checking for the certificate chain
+ * leaf certificate. An error occurs if a suitable CRL cannot be found.
+ * Since we're never going to revoke a gluster node cert, we better disable
+ * CRL check for server certs to avoid getting error and failed connection
+ * attempts.
+ */
+static void
+ssl_clear_crl_verify_flags(SSL_CTX *ssl_ctx)
+{
+#ifdef X509_V_FLAG_CRL_CHECK_ALL
+#ifdef HAVE_SSL_CTX_GET0_PARAM
+ X509_VERIFY_PARAM *vpm;
+
+ vpm = SSL_CTX_get0_param(ssl_ctx);
+ if (vpm) {
+ X509_VERIFY_PARAM_clear_flags(
+ vpm, (X509_V_FLAG_CRL_CHECK | X509_V_FLAG_CRL_CHECK_ALL));
+ }
+#else
+ /* CRL verify flag need not be cleared for rhel6 kind of clients */
+#endif
+#else
+ gf_log(this->name, GF_LOG_ERROR, "OpenSSL version does not support CRL");
+#endif
+ return;
+}
- if (ret == 0 || (ret == -1 && errno == EAGAIN)) {
- /* done for now */
- break;
- }
- this->total_bytes_write += ret;
- } else {
- ret = readv (sock, opvector, opcount);
- if (ret == -1 && errno == EAGAIN) {
- /* done for now */
- break;
- }
- this->total_bytes_read += ret;
- }
+/* set crl verify flags only for server */
+static void
+ssl_set_crl_verify_flags(SSL_CTX *ssl_ctx)
+{
+#ifdef X509_V_FLAG_CRL_CHECK_ALL
+#ifdef HAVE_SSL_CTX_GET0_PARAM
+ X509_VERIFY_PARAM *vpm;
+
+ vpm = SSL_CTX_get0_param(ssl_ctx);
+ if (vpm) {
+ unsigned long flags;
+
+ flags = X509_VERIFY_PARAM_get_flags(vpm);
+ flags |= (X509_V_FLAG_CRL_CHECK | X509_V_FLAG_CRL_CHECK_ALL);
+ X509_VERIFY_PARAM_set_flags(vpm, flags);
+ }
+#else
+ X509_STORE *x509store;
- if (ret == 0) {
- /* Mostly due to 'umount' in client */
+ x509store = SSL_CTX_get_cert_store(ssl_ctx);
+ X509_STORE_set_flags(x509store,
+ X509_V_FLAG_CRL_CHECK | X509_V_FLAG_CRL_CHECK_ALL);
+#endif
+#else
+ gf_log(this->name, GF_LOG_ERROR, "OpenSSL version does not support CRL");
+#endif
+}
- gf_log (this->name, GF_LOG_DEBUG,
- "EOF from peer %s", this->peerinfo.identifier);
- opcount = -1;
- errno = ENOTCONN;
- break;
- }
- if (ret == -1) {
- if (errno == EINTR)
- continue;
-
- gf_log (this->name, GF_LOG_WARNING,
- "%s failed (%s)", write ? "writev" : "readv",
- strerror (errno));
- opcount = -1;
- break;
- }
+static int
+ssl_setup_connection_prefix(rpc_transport_t *this, gf_boolean_t server)
+{
+ int ret = -1;
+ socket_private_t *priv = NULL;
+
+ priv = this->private;
+
+ if (ssl_setup_connection_params(this) < 0) {
+ gf_log(this->name, GF_LOG_TRACE,
+ "+ ssl_setup_connection_params() failed!");
+ goto done;
+ } else {
+ gf_log(this->name, GF_LOG_TRACE,
+ "+ ssl_setup_connection_params() done!");
+ }
+
+ priv->ssl_error_required = SSL_ERROR_NONE;
+ priv->ssl_connected = _gf_false;
+ priv->ssl_accepted = _gf_false;
+ priv->ssl_context_created = _gf_false;
+
+ if (!server && priv->crl_path)
+ ssl_clear_crl_verify_flags(priv->ssl_ctx);
+
+ priv->ssl_ssl = SSL_new(priv->ssl_ctx);
+ if (!priv->ssl_ssl) {
+ gf_log(this->name, GF_LOG_ERROR, "SSL_new failed");
+ ssl_dump_error_stack(this->name);
+ goto done;
+ }
+
+ priv->ssl_sbio = BIO_new_socket(priv->sock, BIO_NOCLOSE);
+ if (!priv->ssl_sbio) {
+ gf_log(this->name, GF_LOG_ERROR, "BIO_new_socket failed");
+ ssl_dump_error_stack(this->name);
+ goto free_ssl;
+ }
+
+ SSL_set_bio(priv->ssl_ssl, priv->ssl_sbio, priv->ssl_sbio);
+ ret = 0;
+ goto done;
+
+free_ssl:
+ SSL_free(priv->ssl_ssl);
+ priv->ssl_ssl = NULL;
+done:
+ return ret;
+}
- if (bytes != NULL) {
- *bytes += ret;
- }
+static char *
+ssl_setup_connection_postfix(rpc_transport_t *this)
+{
+ X509 *peer = NULL;
+ char peer_CN[256] = "";
+ socket_private_t *priv = NULL;
+
+ priv = this->private;
+
+ /* Make sure _SSL verification_ succeeded, yielding an identity. */
+ if (SSL_get_verify_result(priv->ssl_ssl) != X509_V_OK) {
+ goto ssl_error;
+ }
+ peer = SSL_get_peer_certificate(priv->ssl_ssl);
+ if (!peer) {
+ goto ssl_error;
+ }
+
+ SSL_set_mode(priv->ssl_ssl, SSL_MODE_ENABLE_PARTIAL_WRITE);
+
+ /* Finally, everything seems OK. */
+ X509_NAME_get_text_by_NID(X509_get_subject_name(peer), NID_commonName,
+ peer_CN, sizeof(peer_CN) - 1);
+ peer_CN[sizeof(peer_CN) - 1] = '\0';
+ gf_log(this->name, GF_LOG_DEBUG, "peer CN = %s", peer_CN);
+ gf_log(this->name, GF_LOG_DEBUG,
+ "SSL verification succeeded (client: %s) (server: %s)",
+ this->peerinfo.identifier, this->myinfo.identifier);
+ X509_free(peer);
+ return gf_strdup(peer_CN);
+
+ /* Error paths. */
+ssl_error:
+ gf_log(this->name, GF_LOG_ERROR,
+ "SSL connect error (client: %s) (server: %s)",
+ this->peerinfo.identifier, this->myinfo.identifier);
+ ssl_dump_error_stack(this->name);
+
+ SSL_free(priv->ssl_ssl);
+ priv->ssl_ssl = NULL;
+ return NULL;
+}
- moved = 0;
-
- while (moved < ret) {
- if ((ret - moved) >= opvector[0].iov_len) {
- moved += opvector[0].iov_len;
- opvector++;
- opcount--;
- } else {
- opvector[0].iov_len -= (ret - moved);
- opvector[0].iov_base += (ret - moved);
- moved += (ret - moved);
- }
- while (opcount && !opvector[0].iov_len) {
- opvector++;
- opcount--;
- }
+static int
+ssl_complete_connection(rpc_transport_t *this)
+{
+ int ret = -1; /* 1 : implies go back to epoll_wait()
+ * 0 : implies successful ssl connection
+ * -1: implies continue processing current event
+ * as if EPOLLERR has been encountered
+ */
+ char *cname = NULL;
+ int r = -1;
+ int ssl_error = -1;
+ socket_private_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->is_server) {
+ r = SSL_accept(priv->ssl_ssl);
+ } else {
+ r = SSL_connect(priv->ssl_ssl);
+ }
+
+ ssl_error = SSL_get_error(priv->ssl_ssl, r);
+ priv->ssl_error_required = ssl_error;
+
+ switch (ssl_error) {
+ case SSL_ERROR_NONE:
+ cname = ssl_setup_connection_postfix(this);
+ if (!cname) {
+ /* we've failed to get the cname so
+ * we must close the connection
+ *
+ * treat this as EPOLLERR
+ */
+ gf_log(this->name, GF_LOG_TRACE, "error getting cname");
+ errno = ECONNRESET;
+ ret = -1;
+ } else {
+ this->ssl_name = cname;
+ if (priv->is_server) {
+ priv->ssl_accepted = _gf_true;
+ gf_log(this->name, GF_LOG_TRACE, "ssl_accepted!");
+ } else {
+ priv->ssl_connected = _gf_true;
+ gf_log(this->name, GF_LOG_TRACE, "ssl_connected!");
}
- }
-
- if (pending_vector)
- *pending_vector = opvector;
-
- if (pending_count)
- *pending_count = opcount;
+ ret = 0;
+ }
+ break;
+
+ case SSL_ERROR_WANT_READ:
+ /* fall through */
+ case SSL_ERROR_WANT_WRITE:
+ errno = EAGAIN;
+ break;
+
+ case SSL_ERROR_SYSCALL:
+ /* Sometimes SSL_ERROR_SYSCALL returns with errno as EAGAIN
+ * So, we should retry the operation.
+ * So, for now, we just return the return value and errno as is.
+ */
+ break;
+
+ case SSL_ERROR_SSL:
+ /* treat this as EPOLLERR */
+ ret = -1;
+ break;
+
+ default:
+ /* treat this as EPOLLERR */
+ errno = EIO;
+ ret = -1;
+ break;
+ }
+ return ret;
+}
-out:
- return opcount;
+static void
+ssl_teardown_connection(socket_private_t *priv)
+{
+ if (priv->ssl_ssl) {
+ SSL_shutdown(priv->ssl_ssl);
+ SSL_clear(priv->ssl_ssl);
+ SSL_free(priv->ssl_ssl);
+ SSL_CTX_free(priv->ssl_ctx);
+ priv->ssl_ssl = NULL;
+ priv->ssl_ctx = NULL;
+ if (priv->ssl_private_key) {
+ GF_FREE(priv->ssl_private_key);
+ priv->ssl_private_key = NULL;
+ }
+ if (priv->ssl_own_cert) {
+ GF_FREE(priv->ssl_own_cert);
+ priv->ssl_own_cert = NULL;
+ }
+ if (priv->ssl_ca_list) {
+ GF_FREE(priv->ssl_ca_list);
+ priv->ssl_ca_list = NULL;
+ }
+ }
+ priv->use_ssl = _gf_false;
}
+static ssize_t
+__socket_ssl_readv(rpc_transport_t *this, struct iovec *opvector, int opcount)
+{
+ socket_private_t *priv = NULL;
+ int sock = -1;
+ int ret = -1;
+
+ priv = this->private;
+ sock = priv->sock;
+
+ if (priv->use_ssl) {
+ gf_log(this->name, GF_LOG_TRACE, "***** reading over SSL");
+ ret = ssl_read_one(this, opvector->iov_base, opvector->iov_len);
+ } else {
+ gf_log(this->name, GF_LOG_TRACE, "***** reading over non-SSL");
+ ret = sys_readv(sock, opvector, IOV_MIN(opcount));
+ }
+
+ return ret;
+}
-int
-__socket_readv (rpc_transport_t *this, struct iovec *vector, int count,
- struct iovec **pending_vector, int *pending_count,
- size_t *bytes)
+static ssize_t
+__socket_ssl_read(rpc_transport_t *this, void *buf, size_t count)
{
- int ret = -1;
+ struct iovec iov = {
+ 0,
+ };
+ int ret = -1;
- ret = __socket_rwv (this, vector, count,
- pending_vector, pending_count, bytes, 0);
+ iov.iov_base = buf;
+ iov.iov_len = count;
- return ret;
+ ret = __socket_ssl_readv(this, &iov, 1);
+
+ return ret;
}
+static int
+__socket_cached_read(rpc_transport_t *this, struct iovec *opvector, int opcount)
+{
+ socket_private_t *priv = NULL;
+ struct gf_sock_incoming *in = NULL;
+ int req_len = -1;
+ int ret = -1;
+
+ priv = this->private;
+ in = &priv->incoming;
+ req_len = iov_length(opvector, opcount);
+
+ if (in->record_state == SP_STATE_READING_FRAGHDR) {
+ in->ra_read = 0;
+ in->ra_served = 0;
+ in->ra_max = 0;
+ in->ra_buf = NULL;
+ goto uncached;
+ }
+
+ if (!in->ra_max) {
+ /* first call after passing SP_STATE_READING_FRAGHDR */
+ in->ra_max = min(RPC_FRAGSIZE(in->fraghdr), GF_SOCKET_RA_MAX);
+ /* Note that the in->iobuf is the primary iobuf into which
+ headers are read into, and in->frag.fragcurrent points to
+ some position in the buffer. By using this itself as our
+ read-ahead cache, we can avoid memory copies in iov_load
+ */
+ in->ra_buf = in->frag.fragcurrent;
+ }
+
+ /* fill read-ahead */
+ if (in->ra_read < in->ra_max) {
+ ret = __socket_ssl_read(this, &in->ra_buf[in->ra_read],
+ (in->ra_max - in->ra_read));
+ if (ret > 0)
+ in->ra_read += ret;
+
+ /* we proceed to test if there is still cached data to
+ be served even if readahead could not progress */
+ }
+
+ /* serve cached */
+ if (in->ra_served < in->ra_read) {
+ ret = iov_load(opvector, opcount, &in->ra_buf[in->ra_served],
+ min(req_len, (in->ra_read - in->ra_served)));
+
+ in->ra_served += ret;
+ /* Do not read uncached and cached in the same call */
+ goto out;
+ }
+
+ if (in->ra_read < in->ra_max)
+ /* If there was no cached data to be served, (and we are
+ guaranteed to have already performed an attempt to progress
+ readahead above), and we have not yet read out the full
+ readahead capacity, then bail out for now without doing
+ the uncached read below (as that will overtake future cached
+ read)
+ */
+ goto out;
+uncached:
+ ret = __socket_ssl_readv(this, opvector, opcount);
+out:
+ return ret;
+}
-int
-__socket_writev (rpc_transport_t *this, struct iovec *vector, int count,
- struct iovec **pending_vector, int *pending_count)
+static gf_boolean_t
+__does_socket_rwv_error_need_logging(socket_private_t *priv, int write)
{
- int ret = -1;
+ int read = !write;
- ret = __socket_rwv (this, vector, count,
- pending_vector, pending_count, NULL, 1);
+ if (priv->connected == -1) /* Didn't even connect, of course it fails */
+ return _gf_false;
- return ret;
+ if (read && (priv->read_fail_log == _gf_false))
+ return _gf_false;
+
+ return _gf_true;
}
+/*
+ * return value:
+ * 0 = success (completed)
+ * -1 = error
+ * > 0 = incomplete
+ */
-int
-__socket_disconnect (rpc_transport_t *this)
+static int
+__socket_rwv(rpc_transport_t *this, struct iovec *vector, int count,
+ struct iovec **pending_vector, int *pending_count, size_t *bytes,
+ int write)
{
- socket_private_t *priv = NULL;
- int ret = -1;
+ socket_private_t *priv = NULL;
+ int sock = -1;
+ int ret = -1;
+ struct iovec *opvector = NULL;
+ int opcount = 0;
+ int moved = 0;
+
+ GF_VALIDATE_OR_GOTO("socket", this->private, out);
+
+ priv = this->private;
+ sock = priv->sock;
+
+ opvector = vector;
+ opcount = count;
+
+ if (bytes != NULL) {
+ *bytes = 0;
+ }
+
+ while (opcount > 0) {
+ if (opvector->iov_len == 0) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "would have passed zero length to read/write");
+ ++opvector;
+ --opcount;
+ continue;
+ }
+ if (priv->use_ssl && !priv->ssl_ssl) {
+ /*
+ * We could end up here with priv->ssl_ssl still NULL
+ * if (a) the connection failed and (b) some fool
+ * called other socket functions anyway. Demoting to
+ * non-SSL might be insecure, so just fail it outright.
+ */
+ ret = -1;
+ gf_log(this->name, GF_LOG_TRACE,
+ "### no priv->ssl_ssl yet; ret = -1;");
+ } else if (write) {
+ if (priv->use_ssl) {
+ ret = ssl_write_one(this, opvector->iov_base,
+ opvector->iov_len);
+ } else {
+ ret = sys_writev(sock, opvector, IOV_MIN(opcount));
+ }
+
+ if ((ret == 0) || ((ret < 0) && (errno == EAGAIN))) {
+ /* done for now */
+ break;
+ } else if (ret > 0)
+ this->total_bytes_write += ret;
+ } else {
+ ret = __socket_cached_read(this, opvector, opcount);
+ if (ret == 0) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "EOF on socket %d (errno:%d:%s); returning ENODATA",
+ sock, errno, strerror(errno));
+
+ errno = ENODATA;
+ ret = -1;
+ }
+ if ((ret < 0) && (errno == EAGAIN)) {
+ /* done for now */
+ break;
+ } else if (ret > 0)
+ this->total_bytes_read += ret;
+ }
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", this->private, out);
+ if (ret == 0) {
+ /* Mostly due to 'umount' in client */
- priv = this->private;
+ gf_log(this->name, GF_LOG_DEBUG, "EOF from peer %s",
+ this->peerinfo.identifier);
+ opcount = -1;
+ errno = ENOTCONN;
+ break;
+ }
+ if (ret < 0) {
+ if (errno == EINTR)
+ continue;
+
+ if (__does_socket_rwv_error_need_logging(priv, write)) {
+ GF_LOG_OCCASIONALLY(priv->log_ctr, this->name, GF_LOG_WARNING,
+ "%s on %s failed (%s)",
+ write ? "writev" : "readv",
+ this->peerinfo.identifier, strerror(errno));
+ }
+
+ if (priv->use_ssl && priv->ssl_ssl) {
+ ssl_dump_error_stack(this->name);
+ }
+ opcount = -1;
+ break;
+ }
- if (priv->sock != -1) {
- priv->connected = -1;
- ret = shutdown (priv->sock, SHUT_RDWR);
- if (ret) {
- /* its already disconnected.. no need to understand
- why it failed to shutdown in normal cases */
- gf_log (this->name, GF_LOG_DEBUG,
- "shutdown() returned %d. %s",
- ret, strerror (errno));
- }
+ if (bytes != NULL) {
+ *bytes += ret;
+ }
+
+ moved = 0;
+
+ while (moved < ret) {
+ if (!opcount) {
+ gf_log(this->name, GF_LOG_DEBUG, "ran out of iov, moved %d/%d",
+ moved, ret);
+ goto ran_out;
+ }
+ if (!opvector[0].iov_len) {
+ opvector++;
+ opcount--;
+ continue;
+ }
+ if ((ret - moved) >= opvector[0].iov_len) {
+ moved += opvector[0].iov_len;
+ opvector++;
+ opcount--;
+ } else {
+ opvector[0].iov_len -= (ret - moved);
+ opvector[0].iov_base += (ret - moved);
+ moved += (ret - moved);
+ }
}
+ }
+
+ran_out:
+
+ if (pending_vector)
+ *pending_vector = opvector;
+
+ if (pending_count)
+ *pending_count = opcount;
out:
- return ret;
+ return opcount;
}
+static int
+__socket_readv(rpc_transport_t *this, struct iovec *vector, int count,
+ struct iovec **pending_vector, int *pending_count, size_t *bytes)
+{
+ return __socket_rwv(this, vector, count, pending_vector, pending_count,
+ bytes, 0);
+}
-int
-__socket_server_bind (rpc_transport_t *this)
-{
- socket_private_t *priv = NULL;
- int ret = -1;
- int opt = 1;
- int reuse_check_sock = -1;
- struct sockaddr_storage unix_addr = {0};
-
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", this->private, out);
-
- priv = this->private;
-
- ret = setsockopt (priv->sock, SOL_SOCKET, SO_REUSEADDR,
- &opt, sizeof (opt));
-
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "setsockopt() for SO_REUSEADDR failed (%s)",
- strerror (errno));
- }
-
- /* reuse-address doesn't work for unix type sockets */
- if (AF_UNIX == SA (&this->myinfo.sockaddr)->sa_family) {
- memcpy (&unix_addr, SA (&this->myinfo.sockaddr),
- this->myinfo.sockaddr_len);
- reuse_check_sock = socket (AF_UNIX, SOCK_STREAM, 0);
- if (reuse_check_sock > 0) {
- ret = connect (reuse_check_sock, SA (&unix_addr),
- this->myinfo.sockaddr_len);
- if ((ret == -1) && (ECONNREFUSED == errno)) {
- unlink (((struct sockaddr_un*)&unix_addr)->sun_path);
- }
- close (reuse_check_sock);
- }
+static int
+__socket_writev(rpc_transport_t *this, struct iovec *vector, int count,
+ struct iovec **pending_vector, int *pending_count)
+{
+ return __socket_rwv(this, vector, count, pending_vector, pending_count,
+ NULL, 1);
+}
+
+static int
+__socket_shutdown(rpc_transport_t *this)
+{
+ int ret = -1;
+ socket_private_t *priv = this->private;
+
+ priv->connected = -1;
+ ret = shutdown(priv->sock, SHUT_RDWR);
+ if (ret) {
+ /* its already disconnected.. no need to understand
+ why it failed to shutdown in normal cases */
+ gf_log(this->name, GF_LOG_DEBUG, "shutdown() returned %d. %s", ret,
+ strerror(errno));
+ } else {
+ GF_LOG_OCCASIONALLY(priv->shutdown_log_ctr, this->name, GF_LOG_INFO,
+ "intentional socket shutdown(%d)", priv->sock);
+ }
+
+ return ret;
+}
+
+static int
+__socket_teardown_connection(rpc_transport_t *this)
+{
+ socket_private_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->use_ssl)
+ ssl_teardown_connection(priv);
+
+ return __socket_shutdown(this);
+}
+
+static int
+__socket_disconnect(rpc_transport_t *this)
+{
+ int ret = -1;
+ socket_private_t *priv = NULL;
+
+ priv = this->private;
+
+ gf_log(this->name, GF_LOG_TRACE, "disconnecting %p, sock=%d", this,
+ priv->sock);
+
+ if (priv->sock >= 0) {
+ gf_log_callingfn(this->name, GF_LOG_TRACE,
+ "tearing down socket connection");
+ ret = __socket_teardown_connection(this);
+ if (ret) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "__socket_teardown_connection () failed: %s",
+ strerror(errno));
}
+ }
- ret = bind (priv->sock, (struct sockaddr *)&this->myinfo.sockaddr,
- this->myinfo.sockaddr_len);
+ return ret;
+}
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "binding to %s failed: %s",
- this->myinfo.identifier, strerror (errno));
+static int
+__socket_server_bind(rpc_transport_t *this)
+{
+ socket_private_t *priv = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ cmd_args_t *cmd_args = NULL;
+ struct sockaddr_storage unix_addr = {0};
+ int ret = -1;
+ int opt = 1;
+ int reuse_check_sock = -1;
+ uint16_t sin_port = 0;
+ int retries = 0;
+
+ priv = this->private;
+ ctx = this->ctx;
+ cmd_args = &ctx->cmd_args;
+
+ ret = setsockopt(priv->sock, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt));
+
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "setsockopt() for SO_REUSEADDR failed (%s)", strerror(errno));
+ }
+
+ /* reuse-address doesn't work for unix type sockets */
+ if (AF_UNIX == SA(&this->myinfo.sockaddr)->sa_family) {
+ memcpy(&unix_addr, SA(&this->myinfo.sockaddr),
+ this->myinfo.sockaddr_len);
+ reuse_check_sock = sys_socket(AF_UNIX, SOCK_STREAM, 0);
+ if (reuse_check_sock >= 0) {
+ ret = connect(reuse_check_sock, SA(&unix_addr),
+ this->myinfo.sockaddr_len);
+ if ((ret != 0) && (ECONNREFUSED == errno)) {
+ sys_unlink(((struct sockaddr_un *)&unix_addr)->sun_path);
+ }
+ gf_log(this->name, GF_LOG_INFO,
+ "closing (AF_UNIX) reuse check socket %d", reuse_check_sock);
+ sys_close(reuse_check_sock);
+ }
+ }
+
+ if (AF_UNIX != SA(&this->myinfo.sockaddr)->sa_family) {
+ sin_port = (int)ntohs(
+ ((struct sockaddr_in *)&this->myinfo.sockaddr)->sin_port);
+ if (!sin_port) {
+ sin_port = GF_DEFAULT_SOCKET_LISTEN_PORT;
+ ((struct sockaddr_in *)&this->myinfo.sockaddr)->sin_port = htons(
+ sin_port);
+ }
+ retries = 10;
+ while (retries) {
+ ret = bind(priv->sock, (struct sockaddr *)&this->myinfo.sockaddr,
+ this->myinfo.sockaddr_len);
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_ERROR, "binding to %s failed: %s",
+ this->myinfo.identifier, strerror(errno));
if (errno == EADDRINUSE) {
- gf_log (this->name, GF_LOG_ERROR,
- "Port is already in use");
+ gf_log(this->name, GF_LOG_ERROR, "Port is already in use");
+ sleep(1);
+ retries--;
+ } else {
+ break;
}
+ } else {
+ break;
+ }
+ }
+ } else {
+ ret = bind(priv->sock, (struct sockaddr *)&this->myinfo.sockaddr,
+ this->myinfo.sockaddr_len);
+
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_ERROR, "binding to %s failed: %s",
+ this->myinfo.identifier, strerror(errno));
+ if (errno == EADDRINUSE) {
+ gf_log(this->name, GF_LOG_ERROR, "Port is already in use");
+ }
+ }
+ }
+ if (AF_UNIX != SA(&this->myinfo.sockaddr)->sa_family) {
+ if (getsockname(priv->sock, SA(&this->myinfo.sockaddr),
+ &this->myinfo.sockaddr_len) != 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "getsockname on (%d) failed (%s)", priv->sock,
+ strerror(errno));
+ ret = -1;
+ goto out;
}
+ if (!cmd_args->brick_port) {
+ cmd_args->brick_port = (int)ntohs(
+ ((struct sockaddr_in *)&this->myinfo.sockaddr)->sin_port);
+ gf_log(this->name, GF_LOG_INFO,
+ "process started listening on port (%d)",
+ cmd_args->brick_port);
+ }
+ }
out:
- return ret;
+ return ret;
}
-
-int
-__socket_nonblock (int fd)
+static int
+__socket_nonblock(int fd)
{
- int flags = 0;
- int ret = -1;
+ int flags = 0;
+ int ret = -1;
- flags = fcntl (fd, F_GETFL);
+ flags = fcntl(fd, F_GETFL);
- if (flags != -1)
- ret = fcntl (fd, F_SETFL, flags | O_NONBLOCK);
+ if (flags >= 0)
+ ret = fcntl(fd, F_SETFL, flags | O_NONBLOCK);
- return ret;
+ return ret;
}
-
-int
-__socket_nodelay (int fd)
+static int
+__socket_nodelay(int fd)
{
- int on = 1;
- int ret = -1;
+ int on = 1;
+ int ret = -1;
- ret = setsockopt (fd, IPPROTO_TCP, TCP_NODELAY,
- &on, sizeof (on));
- if (!ret)
- gf_log (THIS->name, GF_LOG_TRACE,
- "NODELAY enabled for socket %d", fd);
+ ret = setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
+ if (!ret)
+ gf_log(THIS->name, GF_LOG_TRACE, "NODELAY enabled for socket %d", fd);
- return ret;
+ return ret;
}
-
static int
-__socket_keepalive (int fd, int keepalive_intvl, int keepalive_idle)
+__socket_keepalive(int fd, int family, int keepaliveintvl, int keepaliveidle,
+ int keepalivecnt, int timeout)
{
- int on = 1;
- int ret = -1;
+ int on = 1;
+ int ret = -1;
+#if defined(TCP_USER_TIMEOUT)
+ int timeout_ms = timeout * 1000;
+#endif
- ret = setsockopt (fd, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof (on));
- if (ret == -1) {
- gf_log ("socket", GF_LOG_WARNING,
- "failed to set keep alive option on socket %d", fd);
- goto err;
- }
+ ret = setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on));
+ if (ret != 0) {
+ gf_log("socket", GF_LOG_WARNING,
+ "failed to set keep alive option on socket %d", fd);
+ goto err;
+ }
- if (keepalive_intvl == GF_USE_DEFAULT_KEEPALIVE)
- goto done;
+ if (keepaliveintvl == GF_USE_DEFAULT_KEEPALIVE)
+ goto done;
#if !defined(GF_LINUX_HOST_OS) && !defined(__NetBSD__)
-#ifdef GF_SOLARIS_HOST_OS
- ret = setsockopt (fd, SOL_SOCKET, SO_KEEPALIVE, &keepalive_intvl,
- sizeof (keepalive_intvl));
+#if defined(GF_SOLARIS_HOST_OS) || defined(__FreeBSD__)
+ ret = setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &keepaliveintvl,
+ sizeof(keepaliveintvl));
#else
- ret = setsockopt (fd, IPPROTO_TCP, TCP_KEEPALIVE, &keepalive_intvl,
- sizeof (keepalive_intvl));
+ ret = setsockopt(fd, IPPROTO_TCP, TCP_KEEPALIVE, &keepaliveintvl,
+ sizeof(keepaliveintvl));
#endif
- if (ret == -1) {
- gf_log ("socket", GF_LOG_WARNING,
- "failed to set keep alive interval on socket %d", fd);
- goto err;
- }
+ if (ret != 0) {
+ gf_log("socket", GF_LOG_WARNING,
+ "failed to set keep alive interval on socket %d", fd);
+ goto err;
+ }
#else
- ret = setsockopt (fd, IPPROTO_TCP, TCP_KEEPIDLE, &keepalive_idle,
- sizeof (keepalive_intvl));
- if (ret == -1) {
- gf_log ("socket", GF_LOG_WARNING,
- "failed to set keep idle on socket %d", fd);
- goto err;
- }
- ret = setsockopt (fd, IPPROTO_TCP, TCP_KEEPINTVL, &keepalive_intvl,
- sizeof (keepalive_intvl));
- if (ret == -1) {
- gf_log ("socket", GF_LOG_WARNING,
- "failed to set keep alive interval on socket %d", fd);
- goto err;
- }
+ if (family != AF_INET && family != AF_INET6)
+ goto done;
+
+ ret = setsockopt(fd, IPPROTO_TCP, TCP_KEEPIDLE, &keepaliveidle,
+ sizeof(keepaliveidle));
+ if (ret != 0) {
+ gf_log("socket", GF_LOG_WARNING,
+ "failed to set keep idle %d on socket %d, %s", keepaliveidle, fd,
+ strerror(errno));
+ goto err;
+ }
+ ret = setsockopt(fd, IPPROTO_TCP, TCP_KEEPINTVL, &keepaliveintvl,
+ sizeof(keepaliveintvl));
+ if (ret != 0) {
+ gf_log("socket", GF_LOG_WARNING,
+ "failed to set keep interval %d on socket %d, %s",
+ keepaliveintvl, fd, strerror(errno));
+ goto err;
+ }
+
+#if defined(TCP_USER_TIMEOUT)
+ if (timeout_ms < 0)
+ goto done;
+ ret = setsockopt(fd, IPPROTO_TCP, TCP_USER_TIMEOUT, &timeout_ms,
+ sizeof(timeout_ms));
+ if (ret != 0) {
+ gf_log("socket", GF_LOG_WARNING,
+ "failed to set "
+ "TCP_USER_TIMEOUT %d on socket %d, %s",
+ timeout_ms, fd, strerror(errno));
+ goto err;
+ }
+#endif
+#if defined(TCP_KEEPCNT)
+ ret = setsockopt(fd, IPPROTO_TCP, TCP_KEEPCNT, &keepalivecnt,
+ sizeof(keepalivecnt));
+ if (ret != 0) {
+ gf_log("socket", GF_LOG_WARNING,
+ "failed to set "
+ "TCP_KEEPCNT %d on socket %d, %s",
+ keepalivecnt, fd, strerror(errno));
+ goto err;
+ }
+#endif
#endif
done:
- gf_log (THIS->name, GF_LOG_TRACE, "Keep-alive enabled for socket %d, interval "
- "%d, idle: %d", fd, keepalive_intvl, keepalive_idle);
+ gf_log(THIS->name, GF_LOG_TRACE,
+ "Keep-alive enabled for socket: %d, "
+ "(idle: %d, interval: %d, max-probes: %d, timeout: %d)",
+ fd, keepaliveidle, keepaliveintvl, keepalivecnt, timeout);
err:
- return ret;
+ return ret;
}
-
-int
-__socket_connect_finish (int fd)
+static int
+__socket_connect_finish(int fd)
{
- int ret = -1;
- int optval = 0;
- socklen_t optlen = sizeof (int);
+ int ret = -1;
+ int optval = 0;
+ socklen_t optlen = sizeof(int);
- ret = getsockopt (fd, SOL_SOCKET, SO_ERROR, (void *)&optval, &optlen);
+ ret = getsockopt(fd, SOL_SOCKET, SO_ERROR, (void *)&optval, &optlen);
- if (ret == 0 && optval) {
- errno = optval;
- ret = -1;
- }
+ if (ret == 0 && optval) {
+ errno = optval;
+ ret = -1;
+ }
- return ret;
+ return ret;
}
-
-void
-__socket_reset (rpc_transport_t *this)
+static void
+__socket_reset(rpc_transport_t *this)
{
- socket_private_t *priv = NULL;
-
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", this->private, out);
-
- priv = this->private;
-
- /* TODO: use mem-pool on incoming data */
-
- if (priv->incoming.iobref) {
- iobref_unref (priv->incoming.iobref);
- priv->incoming.iobref = NULL;
- }
+ socket_private_t *priv = NULL;
- if (priv->incoming.iobuf) {
- iobuf_unref (priv->incoming.iobuf);
- }
-
- if (priv->incoming.request_info != NULL) {
- GF_FREE (priv->incoming.request_info);
- }
-
- memset (&priv->incoming, 0, sizeof (priv->incoming));
+ priv = this->private;
- event_unregister (this->ctx->event_pool, priv->sock, priv->idx);
+ /* TODO: use mem-pool on incoming data */
- close (priv->sock);
- priv->sock = -1;
- priv->idx = -1;
- priv->connected = -1;
+ if (priv->incoming.iobref) {
+ iobref_unref(priv->incoming.iobref);
+ priv->incoming.iobref = NULL;
+ }
-out:
- return;
+ if (priv->incoming.iobuf) {
+ iobuf_unref(priv->incoming.iobuf);
+ priv->incoming.iobuf = NULL;
+ }
+
+ GF_FREE(priv->incoming.request_info);
+
+ memset(&priv->incoming, 0, sizeof(priv->incoming));
+
+ gf_event_unregister_close(this->ctx->event_pool, priv->sock, priv->idx);
+ if (priv->use_ssl && priv->ssl_ssl) {
+ SSL_clear(priv->ssl_ssl);
+ SSL_free(priv->ssl_ssl);
+ priv->ssl_ssl = NULL;
+ }
+ if (priv->ssl_ctx) {
+ SSL_CTX_free(priv->ssl_ctx);
+ priv->ssl_ctx = NULL;
+ }
+ priv->sock = -1;
+ priv->idx = -1;
+ priv->connected = -1;
+ priv->ssl_connected = _gf_false;
+ priv->ssl_accepted = _gf_false;
+ priv->ssl_context_created = _gf_false;
+
+ if (priv->ssl_private_key) {
+ GF_FREE(priv->ssl_private_key);
+ priv->ssl_private_key = NULL;
+ }
+ if (priv->ssl_own_cert) {
+ GF_FREE(priv->ssl_own_cert);
+ priv->ssl_own_cert = NULL;
+ }
+ if (priv->ssl_ca_list) {
+ GF_FREE(priv->ssl_ca_list);
+ priv->ssl_ca_list = NULL;
+ }
}
-
-void
-socket_set_lastfrag (uint32_t *fragsize) {
- (*fragsize) |= 0x80000000U;
+static void
+socket_set_lastfrag(uint32_t *fragsize)
+{
+ (*fragsize) |= 0x80000000U;
}
-
-void
-socket_set_frag_header_size (uint32_t size, char *haddr)
+static void
+socket_set_frag_header_size(uint32_t size, char *haddr)
{
- size = htonl (size);
- memcpy (haddr, &size, sizeof (size));
+ size = htonl(size);
+ memcpy(haddr, &size, sizeof(size));
}
-
-void
-socket_set_last_frag_header_size (uint32_t size, char *haddr)
+static void
+socket_set_last_frag_header_size(uint32_t size, char *haddr)
{
- socket_set_lastfrag (&size);
- socket_set_frag_header_size (size, haddr);
+ socket_set_lastfrag(&size);
+ socket_set_frag_header_size(size, haddr);
}
-struct ioq *
-__socket_ioq_new (rpc_transport_t *this, rpc_transport_msg_t *msg)
+static struct ioq *
+__socket_ioq_new(rpc_transport_t *this, rpc_transport_msg_t *msg)
{
- struct ioq *entry = NULL;
- int count = 0;
- uint32_t size = 0;
+ struct ioq *entry = NULL;
+ int count = 0;
+ uint32_t size = 0;
- GF_VALIDATE_OR_GOTO ("socket", this, out);
+ /* TODO: use mem-pool */
+ entry = GF_CALLOC(1, sizeof(*entry), gf_common_mt_ioq);
+ if (!entry)
+ return NULL;
- /* TODO: use mem-pool */
- entry = GF_CALLOC (1, sizeof (*entry), gf_common_mt_ioq);
- if (!entry)
- return NULL;
+ count = msg->rpchdrcount + msg->proghdrcount + msg->progpayloadcount;
- count = msg->rpchdrcount + msg->proghdrcount + msg->progpayloadcount;
+ GF_ASSERT(count <= (MAX_IOVEC - 1));
- GF_ASSERT (count <= (MAX_IOVEC - 1));
+ size = iov_length(msg->rpchdr, msg->rpchdrcount) +
+ iov_length(msg->proghdr, msg->proghdrcount) +
+ iov_length(msg->progpayload, msg->progpayloadcount);
- size = iov_length (msg->rpchdr, msg->rpchdrcount)
- + iov_length (msg->proghdr, msg->proghdrcount)
- + iov_length (msg->progpayload, msg->progpayloadcount);
+ if (size > RPC_MAX_FRAGMENT_SIZE) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "msg size (%u) bigger than the maximum allowed size on "
+ "sockets (%u)",
+ size, RPC_MAX_FRAGMENT_SIZE);
+ GF_FREE(entry);
+ return NULL;
+ }
- if (size > RPC_MAX_FRAGMENT_SIZE) {
- gf_log (this->name, GF_LOG_ERROR,
- "msg size (%u) bigger than the maximum allowed size on "
- "sockets (%u)", size, RPC_MAX_FRAGMENT_SIZE);
- GF_FREE (entry);
- return NULL;
- }
+ socket_set_last_frag_header_size(size, (char *)&entry->fraghdr);
- socket_set_last_frag_header_size (size, (char *)&entry->fraghdr);
+ entry->vector[0].iov_base = (char *)&entry->fraghdr;
+ entry->vector[0].iov_len = sizeof(entry->fraghdr);
+ entry->count = 1;
- entry->vector[0].iov_base = (char *)&entry->fraghdr;
- entry->vector[0].iov_len = sizeof (entry->fraghdr);
- entry->count = 1;
+ if (msg->rpchdr != NULL) {
+ memcpy(&entry->vector[1], msg->rpchdr,
+ sizeof(struct iovec) * msg->rpchdrcount);
+ entry->count += msg->rpchdrcount;
+ }
- if (msg->rpchdr != NULL) {
- memcpy (&entry->vector[1], msg->rpchdr,
- sizeof (struct iovec) * msg->rpchdrcount);
- entry->count += msg->rpchdrcount;
- }
+ if (msg->proghdr != NULL) {
+ memcpy(&entry->vector[entry->count], msg->proghdr,
+ sizeof(struct iovec) * msg->proghdrcount);
+ entry->count += msg->proghdrcount;
+ }
- if (msg->proghdr != NULL) {
- memcpy (&entry->vector[entry->count], msg->proghdr,
- sizeof (struct iovec) * msg->proghdrcount);
- entry->count += msg->proghdrcount;
- }
+ if (msg->progpayload != NULL) {
+ memcpy(&entry->vector[entry->count], msg->progpayload,
+ sizeof(struct iovec) * msg->progpayloadcount);
+ entry->count += msg->progpayloadcount;
+ }
- if (msg->progpayload != NULL) {
- memcpy (&entry->vector[entry->count], msg->progpayload,
- sizeof (struct iovec) * msg->progpayloadcount);
- entry->count += msg->progpayloadcount;
- }
-
- entry->pending_vector = entry->vector;
- entry->pending_count = entry->count;
+ entry->pending_vector = entry->vector;
+ entry->pending_count = entry->count;
- if (msg->iobref != NULL)
- entry->iobref = iobref_ref (msg->iobref);
+ if (msg->iobref != NULL)
+ entry->iobref = iobref_ref(msg->iobref);
- INIT_LIST_HEAD (&entry->list);
+ INIT_LIST_HEAD(&entry->list);
-out:
- return entry;
+ return entry;
}
-
-void
-__socket_ioq_entry_free (struct ioq *entry)
+static void
+__socket_ioq_entry_free(struct ioq *entry)
{
- GF_VALIDATE_OR_GOTO ("socket", entry, out);
+ GF_VALIDATE_OR_GOTO("socket", entry, out);
- list_del_init (&entry->list);
- if (entry->iobref)
- iobref_unref (entry->iobref);
+ list_del_init(&entry->list);
+ if (entry->iobref)
+ iobref_unref(entry->iobref);
- /* TODO: use mem-pool */
- GF_FREE (entry);
+ /* TODO: use mem-pool */
+ GF_FREE(entry);
out:
- return;
+ return;
}
-
-void
-__socket_ioq_flush (rpc_transport_t *this)
+static void
+__socket_ioq_flush(socket_private_t *priv)
{
- socket_private_t *priv = NULL;
- struct ioq *entry = NULL;
-
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", this->private, out);
-
- priv = this->private;
+ struct ioq *entry = NULL;
- while (!list_empty (&priv->ioq)) {
- entry = priv->ioq_next;
- __socket_ioq_entry_free (entry);
- }
-
-out:
- return;
+ while (!list_empty(&priv->ioq)) {
+ entry = priv->ioq_next;
+ __socket_ioq_entry_free(entry);
+ }
}
-
-int
-__socket_ioq_churn_entry (rpc_transport_t *this, struct ioq *entry)
+static int
+__socket_ioq_churn_entry(rpc_transport_t *this, struct ioq *entry)
{
- int ret = -1;
+ int ret = -1;
- ret = __socket_writev (this, entry->pending_vector,
- entry->pending_count,
- &entry->pending_vector,
- &entry->pending_count);
+ ret = __socket_writev(this, entry->pending_vector, entry->pending_count,
+ &entry->pending_vector, &entry->pending_count);
- if (ret == 0) {
- /* current entry was completely written */
- GF_ASSERT (entry->pending_count == 0);
- __socket_ioq_entry_free (entry);
- }
+ if (ret == 0) {
+ /* current entry was completely written */
+ GF_ASSERT(entry->pending_count == 0);
+ __socket_ioq_entry_free(entry);
+ }
- return ret;
+ return ret;
}
-
-int
-__socket_ioq_churn (rpc_transport_t *this)
+static int
+__socket_ioq_churn(rpc_transport_t *this)
{
- socket_private_t *priv = NULL;
- int ret = 0;
- struct ioq *entry = NULL;
-
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", this->private, out);
+ socket_private_t *priv = NULL;
+ int ret = 0;
+ struct ioq *entry = NULL;
- priv = this->private;
+ priv = this->private;
- while (!list_empty (&priv->ioq)) {
- /* pick next entry */
- entry = priv->ioq_next;
+ while (!list_empty(&priv->ioq)) {
+ /* pick next entry */
+ entry = priv->ioq_next;
- ret = __socket_ioq_churn_entry (this, entry);
+ ret = __socket_ioq_churn_entry(this, entry);
- if (ret != 0)
- break;
- }
+ if (ret != 0)
+ break;
+ }
- if (list_empty (&priv->ioq)) {
- /* all pending writes done, not interested in POLLOUT */
- priv->idx = event_select_on (this->ctx->event_pool,
- priv->sock, priv->idx, -1, 0);
- }
+ if (list_empty(&priv->ioq)) {
+ /* all pending writes done, not interested in POLLOUT */
+ priv->idx = gf_event_select_on(this->ctx->event_pool, priv->sock,
+ priv->idx, -1, 0);
+ }
-out:
- return ret;
+ return ret;
}
-
-int
-socket_event_poll_err (rpc_transport_t *this)
+static gf_boolean_t
+socket_event_poll_err(rpc_transport_t *this, int gen, int idx)
{
- socket_private_t *priv = NULL;
- int ret = -1;
+ socket_private_t *priv = NULL;
+ gf_boolean_t socket_closed = _gf_false;
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", this->private, out);
+ priv = this->private;
- priv = this->private;
+ pthread_mutex_lock(&priv->out_lock);
+ {
+ if ((priv->gen == gen) && (priv->idx == idx) && (priv->sock >= 0)) {
+ __socket_ioq_flush(priv);
+ __socket_reset(this);
+ socket_closed = _gf_true;
+ }
+ }
+ pthread_mutex_unlock(&priv->out_lock);
- pthread_mutex_lock (&priv->lock);
+ if (socket_closed) {
+ pthread_mutex_lock(&priv->notify.lock);
{
- __socket_ioq_flush (this);
- __socket_reset (this);
+ while (priv->notify.in_progress)
+ pthread_cond_wait(&priv->notify.cond, &priv->notify.lock);
}
- pthread_mutex_unlock (&priv->lock);
+ pthread_mutex_unlock(&priv->notify.lock);
- rpc_transport_notify (this, RPC_TRANSPORT_DISCONNECT, this);
+ rpc_transport_notify(this, RPC_TRANSPORT_DISCONNECT, this);
+ }
-out:
- return ret;
+ return socket_closed;
}
-
-int
-socket_event_poll_out (rpc_transport_t *this)
+static int
+socket_event_poll_out(rpc_transport_t *this)
{
- socket_private_t *priv = NULL;
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", this->private, out);
-
- priv = this->private;
-
- pthread_mutex_lock (&priv->lock);
- {
- if (priv->connected == 1) {
- ret = __socket_ioq_churn (this);
-
- if (ret == -1) {
- __socket_disconnect (this);
- }
- }
+ socket_private_t *priv = NULL;
+ int ret = -1;
+
+ priv = this->private;
+
+ pthread_mutex_lock(&priv->out_lock);
+ {
+ if (priv->connected == 1) {
+ ret = __socket_ioq_churn(this);
+
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_TRACE,
+ "__socket_ioq_churn returned -1; "
+ "disconnecting socket");
+ __socket_disconnect(this);
+ }
}
- pthread_mutex_unlock (&priv->lock);
+ }
+ pthread_mutex_unlock(&priv->out_lock);
- ret = rpc_transport_notify (this, RPC_TRANSPORT_MSG_SENT, NULL);
+ if (ret == 0)
+ rpc_transport_notify(this, RPC_TRANSPORT_MSG_SENT, NULL);
-out:
- return ret;
-}
+ if (ret > 0)
+ ret = 0;
+ return ret;
+}
-inline int
-__socket_read_simple_msg (rpc_transport_t *this)
+static int
+__socket_read_simple_msg(rpc_transport_t *this)
{
- socket_private_t *priv = NULL;
- int ret = 0;
- uint32_t remaining_size = 0;
- size_t bytes_read = 0;
+ int ret = 0;
+ uint32_t remaining_size = 0;
+ size_t bytes_read = 0;
+ socket_private_t *priv = NULL;
+ struct gf_sock_incoming *in = NULL;
+ struct gf_sock_incoming_frag *frag = NULL;
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", this->private, out);
+ GF_VALIDATE_OR_GOTO("socket", this, out);
+ GF_VALIDATE_OR_GOTO("socket", this->private, out);
- priv = this->private;
+ priv = this->private;
- switch (priv->incoming.frag.simple_state) {
+ in = &priv->incoming;
+ frag = &in->frag;
+ switch (frag->simple_state) {
case SP_STATE_SIMPLE_MSG_INIT:
- remaining_size = RPC_FRAGSIZE (priv->incoming.fraghdr)
- - priv->incoming.frag.bytes_read;
+ remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read;
- __socket_proto_init_pending (priv, remaining_size);
+ __socket_proto_init_pending(priv, remaining_size);
- priv->incoming.frag.simple_state =
- SP_STATE_READING_SIMPLE_MSG;
+ frag->simple_state = SP_STATE_READING_SIMPLE_MSG;
- /* fall through */
+ /* fall through */
case SP_STATE_READING_SIMPLE_MSG:
- ret = 0;
+ ret = 0;
- remaining_size = RPC_FRAGSIZE (priv->incoming.fraghdr)
- - priv->incoming.frag.bytes_read;
+ remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read;
- if (remaining_size > 0) {
- ret = __socket_readv (this,
- priv->incoming.pending_vector, 1,
- &priv->incoming.pending_vector,
- &priv->incoming.pending_count,
- &bytes_read);
- }
+ if (remaining_size > 0) {
+ ret = __socket_readv(this, in->pending_vector, 1,
+ &in->pending_vector, &in->pending_count,
+ &bytes_read);
+ }
- if (ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "reading from socket failed. Error (%s), "
- "peer (%s)", strerror (errno),
- this->peerinfo.identifier);
- break;
- }
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "reading from socket failed. Error (%s), "
+ "peer (%s)",
+ strerror(errno), this->peerinfo.identifier);
+ break;
+ }
- priv->incoming.frag.bytes_read += bytes_read;
- priv->incoming.frag.fragcurrent += bytes_read;
+ frag->bytes_read += bytes_read;
+ frag->fragcurrent += bytes_read;
- if (ret > 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "partial read on non-blocking socket.");
- break;
- }
+ if (ret > 0) {
+ gf_log(this->name, GF_LOG_TRACE,
+ "partial read on non-blocking socket.");
+ ret = 0;
+ break;
+ }
- if (ret == 0) {
- priv->incoming.frag.simple_state
- = SP_STATE_SIMPLE_MSG_INIT;
- }
- }
+ if (ret == 0) {
+ frag->simple_state = SP_STATE_SIMPLE_MSG_INIT;
+ }
+ }
out:
- return ret;
+ return ret;
}
-
-inline int
-__socket_read_simple_request (rpc_transport_t *this)
-{
- return __socket_read_simple_msg (this);
-}
-
-
#define rpc_cred_addr(buf) (buf + RPC_MSGTYPE_SIZE + RPC_CALL_BODY_SIZE - 4)
#define rpc_verf_addr(fragcurrent) (fragcurrent - 4)
@@ -804,2006 +1475,3327 @@ __socket_read_simple_request (rpc_transport_t *this)
#define rpc_progver_addr(buf) (buf + RPC_MSGTYPE_SIZE + 8)
#define rpc_procnum_addr(buf) (buf + RPC_MSGTYPE_SIZE + 12)
-inline int
-__socket_read_vectored_request (rpc_transport_t *this, rpcsvc_vector_sizer vector_sizer)
+static int
+__socket_read_vectored_request(rpc_transport_t *this,
+ rpcsvc_vector_sizer vector_sizer)
{
- socket_private_t *priv = NULL;
- int ret = 0;
- uint32_t credlen = 0, verflen = 0;
- char *addr = NULL;
- struct iobuf *iobuf = NULL;
- uint32_t remaining_size = 0;
- ssize_t readsize = 0;
- size_t size = 0;
-
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", this->private, out);
-
- priv = this->private;
-
- switch (priv->incoming.frag.call_body.request.vector_state) {
+ socket_private_t *priv = NULL;
+ int ret = 0;
+ uint32_t credlen = 0, verflen = 0;
+ char *addr = NULL;
+ struct iobuf *iobuf = NULL;
+ uint32_t remaining_size = 0;
+ ssize_t readsize = 0;
+ size_t size = 0;
+ struct gf_sock_incoming *in = NULL;
+ struct gf_sock_incoming_frag *frag = NULL;
+ sp_rpcfrag_request_state_t *request = NULL;
+
+ priv = this->private;
+
+ /* used to reduce the indirection */
+ in = &priv->incoming;
+ frag = &in->frag;
+ request = &frag->call_body.request;
+
+ switch (request->vector_state) {
case SP_STATE_VECTORED_REQUEST_INIT:
- priv->incoming.frag.call_body.request.vector_sizer_state = 0;
- addr = rpc_cred_addr (iobuf_ptr (priv->incoming.iobuf));
+ request->vector_sizer_state = 0;
- /* also read verf flavour and verflen */
- credlen = ntoh32 (*((uint32_t *)addr))
- + RPC_AUTH_FLAVOUR_N_LENGTH_SIZE;
+ addr = rpc_cred_addr(iobuf_ptr(in->iobuf));
- __socket_proto_init_pending (priv, credlen);
+ /* also read verf flavour and verflen */
+ credlen = ntoh32(*((uint32_t *)addr)) +
+ RPC_AUTH_FLAVOUR_N_LENGTH_SIZE;
- priv->incoming.frag.call_body.request.vector_state =
- SP_STATE_READING_CREDBYTES;
+ __socket_proto_init_pending(priv, credlen);
- /* fall through */
+ request->vector_state = SP_STATE_READING_CREDBYTES;
+
+ /* fall through */
case SP_STATE_READING_CREDBYTES:
- __socket_proto_read (priv, ret);
+ __socket_proto_read(priv, ret);
- priv->incoming.frag.call_body.request.vector_state =
- SP_STATE_READ_CREDBYTES;
+ request->vector_state = SP_STATE_READ_CREDBYTES;
- /* fall through */
+ /* fall through */
case SP_STATE_READ_CREDBYTES:
- addr = rpc_verf_addr (priv->incoming.frag.fragcurrent);
- verflen = ntoh32 (*((uint32_t *)addr));
+ addr = rpc_verf_addr(frag->fragcurrent);
+ verflen = ntoh32(*((uint32_t *)addr));
- if (verflen == 0) {
- priv->incoming.frag.call_body.request.vector_state
- = SP_STATE_READ_VERFBYTES;
- goto sp_state_read_verfbytes;
- }
- __socket_proto_init_pending (priv, verflen);
+ if (verflen == 0) {
+ request->vector_state = SP_STATE_READ_VERFBYTES;
+ goto sp_state_read_verfbytes;
+ }
+ __socket_proto_init_pending(priv, verflen);
- priv->incoming.frag.call_body.request.vector_state
- = SP_STATE_READING_VERFBYTES;
+ request->vector_state = SP_STATE_READING_VERFBYTES;
- /* fall through */
+ /* fall through */
case SP_STATE_READING_VERFBYTES:
- __socket_proto_read (priv, ret);
+ __socket_proto_read(priv, ret);
- priv->incoming.frag.call_body.request.vector_state =
- SP_STATE_READ_VERFBYTES;
+ request->vector_state = SP_STATE_READ_VERFBYTES;
- /* fall through */
+ /* fall through */
case SP_STATE_READ_VERFBYTES:
-sp_state_read_verfbytes:
- priv->incoming.frag.call_body.request.vector_sizer_state =
- vector_sizer (priv->incoming.frag.call_body.request.vector_sizer_state,
- &readsize,
- priv->incoming.frag.fragcurrent);
- __socket_proto_init_pending (priv, readsize);
- priv->incoming.frag.call_body.request.vector_state
- = SP_STATE_READING_PROGHDR;
+ sp_state_read_verfbytes:
+ /* set the base_addr 'persistently' across multiple calls
+ into the state machine */
+ in->proghdr_base_addr = frag->fragcurrent;
- /* fall through */
+ request->vector_sizer_state = vector_sizer(
+ request->vector_sizer_state, &readsize, in->proghdr_base_addr,
+ frag->fragcurrent);
+ __socket_proto_init_pending(priv, readsize);
- case SP_STATE_READING_PROGHDR:
- __socket_proto_read (priv, ret);
-sp_state_reading_proghdr:
- priv->incoming.frag.call_body.request.vector_sizer_state =
- vector_sizer (priv->incoming.frag.call_body.request.vector_sizer_state,
- &readsize,
- priv->incoming.frag.fragcurrent);
- if (readsize == 0) {
- priv->incoming.frag.call_body.request.vector_state =
- SP_STATE_READ_PROGHDR;
- } else {
- __socket_proto_init_pending (priv, readsize);
- __socket_proto_read (priv, ret);
- goto sp_state_reading_proghdr;
- }
+ request->vector_state = SP_STATE_READING_PROGHDR;
- case SP_STATE_READ_PROGHDR:
- if (priv->incoming.payload_vector.iov_base == NULL) {
-
- size = RPC_FRAGSIZE (priv->incoming.fraghdr) -
- priv->incoming.frag.bytes_read;
- iobuf = iobuf_get2 (this->ctx->iobuf_pool, size);
- if (!iobuf) {
- ret = -1;
- break;
- }
+ /* fall through */
- if (priv->incoming.iobref == NULL) {
- priv->incoming.iobref = iobref_new ();
- if (priv->incoming.iobref == NULL) {
- ret = -1;
- iobuf_unref (iobuf);
- break;
- }
- }
+ case SP_STATE_READING_PROGHDR:
+ __socket_proto_read(priv, ret);
- iobref_add (priv->incoming.iobref, iobuf);
- iobuf_unref (iobuf);
+ request->vector_state = SP_STATE_READ_PROGHDR;
- priv->incoming.payload_vector.iov_base
- = iobuf_ptr (iobuf);
+ /* fall through */
- priv->incoming.frag.fragcurrent = iobuf_ptr (iobuf);
+ case SP_STATE_READ_PROGHDR:
+ sp_state_read_proghdr:
+ request->vector_sizer_state = vector_sizer(
+ request->vector_sizer_state, &readsize, in->proghdr_base_addr,
+ frag->fragcurrent);
+ if (readsize == 0) {
+ request->vector_state = SP_STATE_READ_PROGHDR_XDATA;
+ goto sp_state_read_proghdr_xdata;
+ }
+
+ __socket_proto_init_pending(priv, readsize);
+
+ request->vector_state = SP_STATE_READING_PROGHDR_XDATA;
+
+ /* fall through */
+
+ case SP_STATE_READING_PROGHDR_XDATA:
+ __socket_proto_read(priv, ret);
+
+ request->vector_state = SP_STATE_READ_PROGHDR;
+ /* check if the vector_sizer() has more to say */
+ goto sp_state_read_proghdr;
+
+ case SP_STATE_READ_PROGHDR_XDATA:
+ sp_state_read_proghdr_xdata:
+ if (in->payload_vector.iov_base == NULL) {
+ size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read;
+ iobuf = iobuf_get2(this->ctx->iobuf_pool, size);
+ if (!iobuf) {
+ ret = -1;
+ break;
}
- priv->incoming.frag.call_body.request.vector_state =
- SP_STATE_READING_PROG;
+ if (in->iobref == NULL) {
+ in->iobref = iobref_new();
+ if (in->iobref == NULL) {
+ ret = -1;
+ iobuf_unref(iobuf);
+ break;
+ }
+ }
- /* fall through */
+ iobref_add(in->iobref, iobuf);
- case SP_STATE_READING_PROG:
- /* now read the remaining rpc msg into buffer pointed by
- * fragcurrent
- */
+ in->payload_vector.iov_base = iobuf_ptr(iobuf);
+ frag->fragcurrent = iobuf_ptr(iobuf);
- ret = __socket_read_simple_msg (this);
-
- remaining_size = RPC_FRAGSIZE (priv->incoming.fraghdr)
- - priv->incoming.frag.bytes_read;
-
- if ((ret == -1)
- || ((ret == 0)
- && (remaining_size == 0)
- && RPC_LASTFRAG (priv->incoming.fraghdr))) {
- priv->incoming.frag.call_body.request.vector_state
- = SP_STATE_VECTORED_REQUEST_INIT;
- priv->incoming.payload_vector.iov_len
- = (unsigned long)priv->incoming.frag.fragcurrent
- - (unsigned long)
- priv->incoming.payload_vector.iov_base;
- }
- break;
- }
+ iobuf_unref(iobuf);
+ }
-out:
- return ret;
-}
+ request->vector_state = SP_STATE_READING_PROG;
-inline int
-__socket_read_request (rpc_transport_t *this)
-{
- socket_private_t *priv = NULL;
- uint32_t prognum = 0, procnum = 0, progver = 0;
- uint32_t remaining_size = 0;
- int ret = -1;
- char *buf = NULL;
- rpcsvc_vector_sizer vector_sizer = NULL;
+ /* fall through */
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", this->private, out);
+ case SP_STATE_READING_PROG:
+ /* now read the remaining rpc msg into buffer pointed by
+ * fragcurrent
+ */
- priv = this->private;
+ ret = __socket_read_simple_msg(this);
- switch (priv->incoming.frag.call_body.request.header_state) {
+ remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read;
+
+ if ((ret < 0) || ((ret == 0) && (remaining_size == 0) &&
+ RPC_LASTFRAG(in->fraghdr))) {
+ request->vector_state = SP_STATE_VECTORED_REQUEST_INIT;
+ in->payload_vector.iov_len = ((unsigned long)frag->fragcurrent -
+ (unsigned long)
+ in->payload_vector.iov_base);
+ }
+ break;
+ }
+
+ return ret;
+}
+static int
+__socket_read_request(rpc_transport_t *this)
+{
+ socket_private_t *priv = NULL;
+ uint32_t prognum = 0, procnum = 0, progver = 0;
+ uint32_t remaining_size = 0;
+ int ret = -1;
+ char *buf = NULL;
+ rpcsvc_vector_sizer vector_sizer = NULL;
+ struct gf_sock_incoming *in = NULL;
+ struct gf_sock_incoming_frag *frag = NULL;
+ sp_rpcfrag_request_state_t *request = NULL;
+
+ priv = this->private;
+
+ /* used to reduce the indirection */
+ in = &priv->incoming;
+ frag = &in->frag;
+ request = &frag->call_body.request;
+
+ switch (request->header_state) {
case SP_STATE_REQUEST_HEADER_INIT:
- __socket_proto_init_pending (priv, RPC_CALL_BODY_SIZE);
+ __socket_proto_init_pending(priv, RPC_CALL_BODY_SIZE);
- priv->incoming.frag.call_body.request.header_state
- = SP_STATE_READING_RPCHDR1;
+ request->header_state = SP_STATE_READING_RPCHDR1;
- /* fall through */
+ /* fall through */
case SP_STATE_READING_RPCHDR1:
- __socket_proto_read (priv, ret);
+ __socket_proto_read(priv, ret);
- priv->incoming.frag.call_body.request.header_state =
- SP_STATE_READ_RPCHDR1;
+ request->header_state = SP_STATE_READ_RPCHDR1;
- /* fall through */
+ /* fall through */
case SP_STATE_READ_RPCHDR1:
- buf = rpc_prognum_addr (iobuf_ptr (priv->incoming.iobuf));
- prognum = ntoh32 (*((uint32_t *)buf));
+ buf = rpc_prognum_addr(iobuf_ptr(in->iobuf));
+ prognum = ntoh32(*((uint32_t *)buf));
- buf = rpc_progver_addr (iobuf_ptr (priv->incoming.iobuf));
- progver = ntoh32 (*((uint32_t *)buf));
+ buf = rpc_progver_addr(iobuf_ptr(in->iobuf));
+ progver = ntoh32(*((uint32_t *)buf));
- buf = rpc_procnum_addr (iobuf_ptr (priv->incoming.iobuf));
- procnum = ntoh32 (*((uint32_t *)buf));
+ buf = rpc_procnum_addr(iobuf_ptr(in->iobuf));
+ procnum = ntoh32(*((uint32_t *)buf));
- if (this->listener) {
- /* this check is needed as rpcsvc and rpc-clnt actor structures are
- * not same */
- vector_sizer = rpcsvc_get_program_vector_sizer ((rpcsvc_t *)this->mydata,
- prognum, progver, procnum);
- }
+ if (priv->is_server) {
+ /* this check is needed as rpcsvc and rpc-clnt
+ * actor structures are not same */
+ vector_sizer = rpcsvc_get_program_vector_sizer(
+ (rpcsvc_t *)this->mydata, prognum, progver, procnum);
+ }
- if (vector_sizer) {
- ret = __socket_read_vectored_request (this, vector_sizer);
- } else {
- ret = __socket_read_simple_request (this);
- }
+ if (vector_sizer) {
+ ret = __socket_read_vectored_request(this, vector_sizer);
+ } else {
+ ret = __socket_read_simple_msg(this);
+ }
- remaining_size = RPC_FRAGSIZE (priv->incoming.fraghdr)
- - priv->incoming.frag.bytes_read;
+ remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read;
- if ((ret == -1)
- || ((ret == 0)
- && (remaining_size == 0)
- && (RPC_LASTFRAG (priv->incoming.fraghdr)))) {
- priv->incoming.frag.call_body.request.header_state =
- SP_STATE_REQUEST_HEADER_INIT;
- }
+ if ((ret < 0) || ((ret == 0) && (remaining_size == 0) &&
+ (RPC_LASTFRAG(in->fraghdr)))) {
+ request->header_state = SP_STATE_REQUEST_HEADER_INIT;
+ }
- break;
- }
+ break;
+ }
-out:
- return ret;
+ return ret;
}
-
-inline int
-__socket_read_accepted_successful_reply (rpc_transport_t *this)
+static int
+__socket_read_accepted_successful_reply(rpc_transport_t *this)
{
- socket_private_t *priv = NULL;
- int ret = 0;
- struct iobuf *iobuf = NULL;
- uint32_t gluster_read_rsp_hdr_len = 0;
- gfs3_read_rsp read_rsp = {0, };
- size_t size = 0;
+ socket_private_t *priv = NULL;
+ int ret = 0;
+ struct iobuf *iobuf = NULL;
+ gfs3_read_rsp read_rsp = {
+ 0,
+ };
+ ssize_t size = 0;
+ ssize_t default_read_size = 0;
+ XDR xdr;
+ struct gf_sock_incoming *in = NULL;
+ struct gf_sock_incoming_frag *frag = NULL;
+ uint32_t remaining_size = 0;
+
+ priv = this->private;
+
+ /* used to reduce the indirection */
+ in = &priv->incoming;
+ frag = &in->frag;
+
+ switch (frag->call_body.reply.accepted_success_state) {
+ case SP_STATE_ACCEPTED_SUCCESS_REPLY_INIT:
+ default_read_size = xdr_sizeof((xdrproc_t)xdr_gfs3_read_rsp,
+ &read_rsp);
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", this->private, out);
+ /* We need to store the current base address because we will
+ * need it after a partial read. */
+ in->proghdr_base_addr = frag->fragcurrent;
- priv = this->private;
+ __socket_proto_init_pending(priv, default_read_size);
- switch (priv->incoming.frag.call_body.reply.accepted_success_state) {
+ frag->call_body.reply
+ .accepted_success_state = SP_STATE_READING_PROC_HEADER;
- case SP_STATE_ACCEPTED_SUCCESS_REPLY_INIT:
- gluster_read_rsp_hdr_len = xdr_sizeof ((xdrproc_t) xdr_gfs3_read_rsp,
- &read_rsp);
+ /* fall through */
+
+ case SP_STATE_READING_PROC_HEADER:
+ __socket_proto_read(priv, ret);
+
+ /* there can be 'xdata' in read response, figure it out */
+ default_read_size = frag->fragcurrent - in->proghdr_base_addr;
+ xdrmem_create(&xdr, in->proghdr_base_addr, default_read_size,
+ XDR_DECODE);
+
+ /* This will fail if there is xdata sent from server, if not,
+ well and good, we don't need to worry about */
+ xdr_gfs3_read_rsp(&xdr, &read_rsp);
+
+ free(read_rsp.xdata.xdata_val);
- if (gluster_read_rsp_hdr_len == 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "xdr_sizeof on gfs3_read_rsp failed");
+ /* need to round off to proper gf_roof (%4), as XDR packing pads
+ the end of opaque object with '0' */
+ size = gf_roof(read_rsp.xdata.xdata_len, 4);
+
+ if (!size) {
+ frag->call_body.reply
+ .accepted_success_state = SP_STATE_READ_PROC_OPAQUE;
+ goto read_proc_opaque;
+ }
+
+ __socket_proto_init_pending(priv, size);
+
+ frag->call_body.reply
+ .accepted_success_state = SP_STATE_READING_PROC_OPAQUE;
+ /* fall through */
+
+ case SP_STATE_READING_PROC_OPAQUE:
+ __socket_proto_read(priv, ret);
+
+ frag->call_body.reply
+ .accepted_success_state = SP_STATE_READ_PROC_OPAQUE;
+ /* fall through */
+
+ case SP_STATE_READ_PROC_OPAQUE:
+ read_proc_opaque:
+ if (in->payload_vector.iov_base == NULL) {
+ size = (RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read);
+
+ iobuf = iobuf_get2(this->ctx->iobuf_pool, size);
+ if (iobuf == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ if (in->iobref == NULL) {
+ in->iobref = iobref_new();
+ if (in->iobref == NULL) {
ret = -1;
+ iobuf_unref(iobuf);
goto out;
+ }
}
- __socket_proto_init_pending (priv, gluster_read_rsp_hdr_len);
- priv->incoming.frag.call_body.reply.accepted_success_state
- = SP_STATE_READING_PROC_HEADER;
+ ret = iobref_add(in->iobref, iobuf);
+ iobuf_unref(iobuf);
+ if (ret < 0) {
+ goto out;
+ }
- /* fall through */
+ in->payload_vector.iov_base = iobuf_ptr(iobuf);
+ in->payload_vector.iov_len = size;
+ }
+
+ frag->fragcurrent = in->payload_vector.iov_base;
+
+ frag->call_body.reply
+ .accepted_success_state = SP_STATE_READ_PROC_HEADER;
+
+ /* fall through */
+
+ case SP_STATE_READ_PROC_HEADER:
+ /* now read the entire remaining msg into new iobuf */
+ ret = __socket_read_simple_msg(this);
+ remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read;
+ if ((ret < 0) || ((ret == 0) && (remaining_size == 0) &&
+ RPC_LASTFRAG(in->fraghdr))) {
+ frag->call_body.reply.accepted_success_state =
+ SP_STATE_ACCEPTED_SUCCESS_REPLY_INIT;
+ }
+
+ break;
+ }
+
+out:
+ return ret;
+}
+
+static int
+__socket_read_accepted_successful_reply_v2(rpc_transport_t *this)
+{
+ socket_private_t *priv = NULL;
+ int ret = 0;
+ struct iobuf *iobuf = NULL;
+ gfx_read_rsp read_rsp = {
+ 0,
+ };
+ ssize_t size = 0;
+ ssize_t default_read_size = 0;
+ XDR xdr;
+ struct gf_sock_incoming *in = NULL;
+ struct gf_sock_incoming_frag *frag = NULL;
+ uint32_t remaining_size = 0;
+
+ priv = this->private;
+
+ /* used to reduce the indirection */
+ in = &priv->incoming;
+ frag = &in->frag;
+
+ switch (frag->call_body.reply.accepted_success_state) {
+ case SP_STATE_ACCEPTED_SUCCESS_REPLY_INIT:
+ default_read_size = xdr_sizeof((xdrproc_t)xdr_gfx_read_rsp,
+ &read_rsp);
+
+ /* We need to store the current base address because we will
+ * need it after a partial read. */
+ in->proghdr_base_addr = frag->fragcurrent;
+
+ __socket_proto_init_pending(priv, default_read_size);
+
+ frag->call_body.reply
+ .accepted_success_state = SP_STATE_READING_PROC_HEADER;
+
+ /* fall through */
case SP_STATE_READING_PROC_HEADER:
- __socket_proto_read (priv, ret);
+ __socket_proto_read(priv, ret);
- priv->incoming.frag.call_body.reply.accepted_success_state
- = SP_STATE_READ_PROC_HEADER;
+ /* there can be 'xdata' in read response, figure it out */
+ default_read_size = frag->fragcurrent - in->proghdr_base_addr;
- if (priv->incoming.payload_vector.iov_base == NULL) {
+ xdrmem_create(&xdr, in->proghdr_base_addr, default_read_size,
+ XDR_DECODE);
- size = (RPC_FRAGSIZE (priv->incoming.fraghdr) -
- priv->incoming.frag.bytes_read);
+ /* This will fail if there is xdata sent from server, if not,
+ well and good, we don't need to worry about */
+ xdr_gfx_read_rsp(&xdr, &read_rsp);
- iobuf = iobuf_get2 (this->ctx->iobuf_pool, size);
- if (iobuf == NULL) {
- ret = -1;
- goto out;
- }
+ free(read_rsp.xdata.pairs.pairs_val);
- if (priv->incoming.iobref == NULL) {
- priv->incoming.iobref = iobref_new ();
- if (priv->incoming.iobref == NULL) {
- ret = -1;
- iobuf_unref (iobuf);
- goto out;
- }
- }
+ /* need to round off to proper gf_roof (%4), as XDR packing pads
+ the end of opaque object with '0' */
+ size = gf_roof(read_rsp.xdata.xdr_size, 4);
- iobref_add (priv->incoming.iobref, iobuf);
- iobuf_unref (iobuf);
+ if (!size) {
+ frag->call_body.reply
+ .accepted_success_state = SP_STATE_READ_PROC_OPAQUE;
+ goto read_proc_opaque;
+ }
- priv->incoming.payload_vector.iov_base
- = iobuf_ptr (iobuf);
+ __socket_proto_init_pending(priv, size);
- priv->incoming.payload_vector.iov_len = size;
- }
+ frag->call_body.reply
+ .accepted_success_state = SP_STATE_READING_PROC_OPAQUE;
+ /* fall through */
- priv->incoming.frag.fragcurrent
- = priv->incoming.payload_vector.iov_base;
+ case SP_STATE_READING_PROC_OPAQUE:
+ __socket_proto_read(priv, ret);
- /* fall through */
+ frag->call_body.reply
+ .accepted_success_state = SP_STATE_READ_PROC_OPAQUE;
+ /* fall through */
- case SP_STATE_READ_PROC_HEADER:
- /* now read the entire remaining msg into new iobuf */
- ret = __socket_read_simple_msg (this);
- if ((ret == -1)
- || ((ret == 0)
- && RPC_LASTFRAG (priv->incoming.fraghdr))) {
- priv->incoming.frag.call_body.reply.accepted_success_state
- = SP_STATE_ACCEPTED_SUCCESS_REPLY_INIT;
+ case SP_STATE_READ_PROC_OPAQUE:
+ read_proc_opaque:
+ if (in->payload_vector.iov_base == NULL) {
+ size = (RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read);
+
+ iobuf = iobuf_get2(this->ctx->iobuf_pool, size);
+ if (iobuf == NULL) {
+ ret = -1;
+ goto out;
}
- break;
- }
+ if (in->iobref == NULL) {
+ in->iobref = iobref_new();
+ if (in->iobref == NULL) {
+ ret = -1;
+ iobuf_unref(iobuf);
+ goto out;
+ }
+ }
+
+ ret = iobref_add(in->iobref, iobuf);
+ iobuf_unref(iobuf);
+ if (ret < 0) {
+ goto out;
+ }
+
+ in->payload_vector.iov_base = iobuf_ptr(iobuf);
+ in->payload_vector.iov_len = size;
+ }
+
+ frag->fragcurrent = in->payload_vector.iov_base;
+
+ frag->call_body.reply
+ .accepted_success_state = SP_STATE_READ_PROC_HEADER;
+
+ /* fall through */
+
+ case SP_STATE_READ_PROC_HEADER:
+ /* now read the entire remaining msg into new iobuf */
+ ret = __socket_read_simple_msg(this);
+ remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read;
+ if ((ret < 0) || ((ret == 0) && (remaining_size == 0) &&
+ RPC_LASTFRAG(in->fraghdr))) {
+ frag->call_body.reply.accepted_success_state =
+ SP_STATE_ACCEPTED_SUCCESS_REPLY_INIT;
+ }
+
+ break;
+ }
out:
- return ret;
+ return ret;
}
#define rpc_reply_verflen_addr(fragcurrent) ((char *)fragcurrent - 4)
#define rpc_reply_accept_status_addr(fragcurrent) ((char *)fragcurrent - 4)
-inline int
-__socket_read_accepted_reply (rpc_transport_t *this)
+static int
+__socket_read_accepted_reply(rpc_transport_t *this)
{
- socket_private_t *priv = NULL;
- int ret = -1;
- char *buf = NULL;
- uint32_t verflen = 0, len = 0;
- uint32_t remaining_size = 0;
-
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", this->private, out);
-
- priv = this->private;
-
- switch (priv->incoming.frag.call_body.reply.accepted_state) {
-
+ socket_private_t *priv = NULL;
+ int ret = -1;
+ char *buf = NULL;
+ uint32_t verflen = 0, len = 0;
+ uint32_t remaining_size = 0;
+ struct gf_sock_incoming *in = NULL;
+ struct gf_sock_incoming_frag *frag = NULL;
+
+ priv = this->private;
+ /* used to reduce the indirection */
+ in = &priv->incoming;
+ frag = &in->frag;
+
+ switch (frag->call_body.reply.accepted_state) {
case SP_STATE_ACCEPTED_REPLY_INIT:
- __socket_proto_init_pending (priv,
- RPC_AUTH_FLAVOUR_N_LENGTH_SIZE);
+ __socket_proto_init_pending(priv, RPC_AUTH_FLAVOUR_N_LENGTH_SIZE);
- priv->incoming.frag.call_body.reply.accepted_state
- = SP_STATE_READING_REPLY_VERFLEN;
+ frag->call_body.reply
+ .accepted_state = SP_STATE_READING_REPLY_VERFLEN;
- /* fall through */
+ /* fall through */
case SP_STATE_READING_REPLY_VERFLEN:
- __socket_proto_read (priv, ret);
+ __socket_proto_read(priv, ret);
- priv->incoming.frag.call_body.reply.accepted_state
- = SP_STATE_READ_REPLY_VERFLEN;
+ frag->call_body.reply.accepted_state = SP_STATE_READ_REPLY_VERFLEN;
- /* fall through */
+ /* fall through */
case SP_STATE_READ_REPLY_VERFLEN:
- buf = rpc_reply_verflen_addr (priv->incoming.frag.fragcurrent);
+ buf = rpc_reply_verflen_addr(frag->fragcurrent);
- verflen = ntoh32 (*((uint32_t *) buf));
+ verflen = ntoh32(*((uint32_t *)buf));
- /* also read accept status along with verf data */
- len = verflen + RPC_ACCEPT_STATUS_LEN;
+ /* also read accept status along with verf data */
+ len = verflen + RPC_ACCEPT_STATUS_LEN;
- __socket_proto_init_pending (priv, len);
+ __socket_proto_init_pending(priv, len);
- priv->incoming.frag.call_body.reply.accepted_state
- = SP_STATE_READING_REPLY_VERFBYTES;
+ frag->call_body.reply
+ .accepted_state = SP_STATE_READING_REPLY_VERFBYTES;
- /* fall through */
+ /* fall through */
case SP_STATE_READING_REPLY_VERFBYTES:
- __socket_proto_read (priv, ret);
+ __socket_proto_read(priv, ret);
- priv->incoming.frag.call_body.reply.accepted_state
- = SP_STATE_READ_REPLY_VERFBYTES;
+ frag->call_body.reply
+ .accepted_state = SP_STATE_READ_REPLY_VERFBYTES;
- buf = rpc_reply_accept_status_addr (priv->incoming.frag.fragcurrent);
+ buf = rpc_reply_accept_status_addr(frag->fragcurrent);
- priv->incoming.frag.call_body.reply.accept_status
- = ntoh32 (*(uint32_t *) buf);
+ frag->call_body.reply.accept_status = ntoh32(*(uint32_t *)buf);
- /* fall through */
+ /* fall through */
case SP_STATE_READ_REPLY_VERFBYTES:
- if (priv->incoming.frag.call_body.reply.accept_status
- == SUCCESS) {
- ret = __socket_read_accepted_successful_reply (this);
+ if (frag->call_body.reply.accept_status == SUCCESS) {
+ /* Need two different methods here for different protocols
+ Mainly because the exact XDR is used to calculate the
+ size of response */
+ if ((in->request_info->procnum == GFS3_OP_READ) &&
+ (in->request_info->prognum == GLUSTER_FOP_PROGRAM) &&
+ (in->request_info->progver == GLUSTER_FOP_VERSION_v2)) {
+ ret = __socket_read_accepted_successful_reply_v2(this);
} else {
- /* read entire remaining msg into buffer pointed to by
- * fragcurrent
- */
- ret = __socket_read_simple_msg (this);
+ ret = __socket_read_accepted_successful_reply(this);
}
+ } else {
+ /* read entire remaining msg into buffer pointed to by
+ * fragcurrent
+ */
+ ret = __socket_read_simple_msg(this);
+ }
- remaining_size = RPC_FRAGSIZE (priv->incoming.fraghdr)
- - priv->incoming.frag.bytes_read;
+ remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read;
- if ((ret == -1)
- || ((ret == 0)
- && (remaining_size == 0)
- && (RPC_LASTFRAG (priv->incoming.fraghdr)))) {
- priv->incoming.frag.call_body.reply.accepted_state
- = SP_STATE_ACCEPTED_REPLY_INIT;
- }
+ if ((ret < 0) || ((ret == 0) && (remaining_size == 0) &&
+ (RPC_LASTFRAG(in->fraghdr)))) {
+ frag->call_body.reply
+ .accepted_state = SP_STATE_ACCEPTED_REPLY_INIT;
+ }
- break;
- }
+ break;
+ }
-out:
- return ret;
+ return ret;
}
-
-inline int
-__socket_read_denied_reply (rpc_transport_t *this)
+static int
+__socket_read_denied_reply(rpc_transport_t *this)
{
- return __socket_read_simple_msg (this);
+ return __socket_read_simple_msg(this);
}
-
#define rpc_reply_status_addr(fragcurrent) ((char *)fragcurrent - 4)
-
-inline int
-__socket_read_vectored_reply (rpc_transport_t *this)
+static int
+__socket_read_vectored_reply(rpc_transport_t *this)
{
- socket_private_t *priv = NULL;
- int ret = 0;
- char *buf = NULL;
- uint32_t remaining_size = 0;
-
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", this->private, out);
-
- priv = this->private;
-
- switch (priv->incoming.frag.call_body.reply.status_state) {
-
+ socket_private_t *priv = NULL;
+ int ret = 0;
+ char *buf = NULL;
+ uint32_t remaining_size = 0;
+ struct gf_sock_incoming *in = NULL;
+ struct gf_sock_incoming_frag *frag = NULL;
+
+ priv = this->private;
+ in = &priv->incoming;
+ frag = &in->frag;
+
+ switch (frag->call_body.reply.status_state) {
case SP_STATE_ACCEPTED_REPLY_INIT:
- __socket_proto_init_pending (priv, RPC_REPLY_STATUS_SIZE);
+ __socket_proto_init_pending(priv, RPC_REPLY_STATUS_SIZE);
- priv->incoming.frag.call_body.reply.status_state
- = SP_STATE_READING_REPLY_STATUS;
+ frag->call_body.reply.status_state = SP_STATE_READING_REPLY_STATUS;
- /* fall through */
+ /* fall through */
case SP_STATE_READING_REPLY_STATUS:
- __socket_proto_read (priv, ret);
+ __socket_proto_read(priv, ret);
- buf = rpc_reply_status_addr (priv->incoming.frag.fragcurrent);
+ buf = rpc_reply_status_addr(frag->fragcurrent);
- priv->incoming.frag.call_body.reply.accept_status
- = ntoh32 (*((uint32_t *) buf));
+ frag->call_body.reply.accept_status = ntoh32(*((uint32_t *)buf));
- priv->incoming.frag.call_body.reply.status_state
- = SP_STATE_READ_REPLY_STATUS;
+ frag->call_body.reply.status_state = SP_STATE_READ_REPLY_STATUS;
- /* fall through */
+ /* fall through */
case SP_STATE_READ_REPLY_STATUS:
- if (priv->incoming.frag.call_body.reply.accept_status
- == MSG_ACCEPTED) {
- ret = __socket_read_accepted_reply (this);
- } else {
- ret = __socket_read_denied_reply (this);
- }
-
- remaining_size = RPC_FRAGSIZE (priv->incoming.fraghdr)
- - priv->incoming.frag.bytes_read;
-
- if ((ret == -1)
- || ((ret == 0)
- && (remaining_size == 0)
- && (RPC_LASTFRAG (priv->incoming.fraghdr)))) {
- priv->incoming.frag.call_body.reply.status_state
- = SP_STATE_ACCEPTED_REPLY_INIT;
- priv->incoming.payload_vector.iov_len
- = (unsigned long)priv->incoming.frag.fragcurrent
- - (unsigned long)
- priv->incoming.payload_vector.iov_base;
- }
- break;
- }
-
-out:
- return ret;
+ if (frag->call_body.reply.accept_status == MSG_ACCEPTED) {
+ ret = __socket_read_accepted_reply(this);
+ } else {
+ ret = __socket_read_denied_reply(this);
+ }
+
+ remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read;
+
+ if ((ret < 0) || ((ret == 0) && (remaining_size == 0) &&
+ (RPC_LASTFRAG(in->fraghdr)))) {
+ frag->call_body.reply
+ .status_state = SP_STATE_VECTORED_REPLY_STATUS_INIT;
+ in->payload_vector.iov_len = (unsigned long)frag->fragcurrent -
+ (unsigned long)
+ in->payload_vector.iov_base;
+ }
+ break;
+ }
+
+ return ret;
}
-
-inline int
-__socket_read_simple_reply (rpc_transport_t *this)
+static int
+__socket_read_simple_reply(rpc_transport_t *this)
{
- return __socket_read_simple_msg (this);
+ return __socket_read_simple_msg(this);
}
#define rpc_xid_addr(buf) (buf)
-inline int
-__socket_read_reply (rpc_transport_t *this)
+static int
+__socket_read_reply(rpc_transport_t *this)
{
- socket_private_t *priv = NULL;
- char *buf = NULL;
- int32_t ret = -1;
- rpc_request_info_t *request_info = NULL;
- char map_xid = 0;
-
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", this->private, out);
-
- priv = this->private;
-
- buf = rpc_xid_addr (iobuf_ptr (priv->incoming.iobuf));
+ socket_private_t *priv = NULL;
+ char *buf = NULL;
+ int32_t ret = -1;
+ rpc_request_info_t *request_info = NULL;
+ char map_xid = 0;
+ struct gf_sock_incoming *in = NULL;
+ struct gf_sock_incoming_frag *frag = NULL;
+
+ priv = this->private;
+ in = &priv->incoming;
+ frag = &in->frag;
+
+ buf = rpc_xid_addr(iobuf_ptr(in->iobuf));
+
+ if (in->request_info == NULL) {
+ in->request_info = GF_CALLOC(1, sizeof(*request_info),
+ gf_common_mt_rpc_trans_reqinfo_t);
+ if (in->request_info == NULL) {
+ goto out;
+ }
- if (priv->incoming.request_info == NULL) {
- priv->incoming.request_info = GF_CALLOC (1,
- sizeof (*request_info),
- gf_common_mt_rpc_trans_reqinfo_t);
- if (priv->incoming.request_info == NULL) {
- goto out;
- }
+ map_xid = 1;
+ }
- map_xid = 1;
- }
+ request_info = in->request_info;
- request_info = priv->incoming.request_info;
+ if (map_xid) {
+ request_info->xid = ntoh32(*((uint32_t *)buf));
- if (map_xid) {
- request_info->xid = ntoh32 (*((uint32_t *) buf));
+ /* release priv->lock, so as to avoid deadlock b/w conn->lock
+ * and priv->lock, since we are doing an upcall here.
+ */
+ frag->state = SP_STATE_NOTIFYING_XID;
+ ret = rpc_transport_notify(this, RPC_TRANSPORT_MAP_XID_REQUEST,
+ in->request_info);
- /* release priv->lock, so as to avoid deadlock b/w conn->lock
- * and priv->lock, since we are doing an upcall here.
- */
- pthread_mutex_unlock (&priv->lock);
- {
- ret = rpc_transport_notify (this,
- RPC_TRANSPORT_MAP_XID_REQUEST,
- priv->incoming.request_info);
- }
- pthread_mutex_lock (&priv->lock);
+ /* Transition back to externally visible state. */
+ frag->state = SP_STATE_READ_MSGTYPE;
- if (ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "notify for event MAP_XID failed");
- goto out;
- }
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "notify for event MAP_XID failed for %s",
+ this->peerinfo.identifier);
+ goto out;
}
+ }
- if ((request_info->prognum == GLUSTER3_1_FOP_PROGRAM)
- && (request_info->procnum == GF_FOP_READ)) {
- if (map_xid && request_info->rsp.rsp_payload_count != 0) {
- priv->incoming.iobref
- = iobref_ref (request_info->rsp.rsp_iobref);
- priv->incoming.payload_vector
- = *request_info->rsp.rsp_payload;
- }
-
- ret = __socket_read_vectored_reply (this);
- } else {
- ret = __socket_read_simple_reply (this);
+ if ((request_info->prognum == GLUSTER_FOP_PROGRAM) &&
+ (request_info->procnum == GF_FOP_READ)) {
+ if (map_xid && request_info->rsp.rsp_payload_count != 0) {
+ in->iobref = iobref_ref(request_info->rsp.rsp_iobref);
+ in->payload_vector = *request_info->rsp.rsp_payload;
}
+
+ ret = __socket_read_vectored_reply(this);
+ } else {
+ ret = __socket_read_simple_reply(this);
+ }
out:
- return ret;
+ return ret;
}
-
/* returns the number of bytes yet to be read in a fragment */
-inline int
-__socket_read_frag (rpc_transport_t *this)
+static int
+__socket_read_frag(rpc_transport_t *this)
{
- socket_private_t *priv = NULL;
- int32_t ret = 0;
- char *buf = NULL;
- uint32_t remaining_size = 0;
-
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", this->private, out);
-
- priv = this->private;
-
- switch (priv->incoming.frag.state) {
+ socket_private_t *priv = NULL;
+ int32_t ret = 0;
+ char *buf = NULL;
+ uint32_t remaining_size = 0;
+ struct gf_sock_incoming *in = NULL;
+ struct gf_sock_incoming_frag *frag = NULL;
+
+ priv = this->private;
+ /* used to reduce the indirection */
+ in = &priv->incoming;
+ frag = &in->frag;
+
+ switch (frag->state) {
case SP_STATE_NADA:
- __socket_proto_init_pending (priv, RPC_MSGTYPE_SIZE);
+ __socket_proto_init_pending(priv, RPC_MSGTYPE_SIZE);
- priv->incoming.frag.state = SP_STATE_READING_MSGTYPE;
+ frag->state = SP_STATE_READING_MSGTYPE;
- /* fall through */
+ /* fall through */
case SP_STATE_READING_MSGTYPE:
- __socket_proto_read (priv, ret);
+ __socket_proto_read(priv, ret);
- priv->incoming.frag.state = SP_STATE_READ_MSGTYPE;
- /* fall through */
+ frag->state = SP_STATE_READ_MSGTYPE;
+ /* fall through */
case SP_STATE_READ_MSGTYPE:
- buf = rpc_msgtype_addr (iobuf_ptr (priv->incoming.iobuf));
- priv->incoming.msg_type = ntoh32 (*((uint32_t *)buf));
-
- if (priv->incoming.msg_type == CALL) {
- ret = __socket_read_request (this);
- } else if (priv->incoming.msg_type == REPLY) {
- ret = __socket_read_reply (this);
- } else if (priv->incoming.msg_type == GF_UNIVERSAL_ANSWER) {
- gf_log ("rpc", GF_LOG_ERROR,
- "older version of protocol/process trying to "
- "connect from %s. use newer version on that node",
- this->peerinfo.identifier);
- } else {
- gf_log ("rpc", GF_LOG_ERROR,
- "wrong MSG-TYPE (%d) received from %s",
- priv->incoming.msg_type,
- this->peerinfo.identifier);
- ret = -1;
- }
+ buf = rpc_msgtype_addr(iobuf_ptr(in->iobuf));
+ in->msg_type = ntoh32(*((uint32_t *)buf));
+
+ if (in->msg_type == CALL) {
+ ret = __socket_read_request(this);
+ } else if (in->msg_type == REPLY) {
+ ret = __socket_read_reply(this);
+ } else if (in->msg_type == (msg_type_t)GF_UNIVERSAL_ANSWER) {
+ gf_log("rpc", GF_LOG_ERROR,
+ "older version of protocol/process trying to "
+ "connect from %s. use newer version on that node",
+ this->peerinfo.identifier);
+ } else {
+ gf_log("rpc", GF_LOG_ERROR,
+ "wrong MSG-TYPE (%d) received from %s", in->msg_type,
+ this->peerinfo.identifier);
+ ret = -1;
+ }
- remaining_size = RPC_FRAGSIZE (priv->incoming.fraghdr)
- - priv->incoming.frag.bytes_read;
+ remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read;
- if ((ret == -1)
- || ((ret == 0)
- && (remaining_size == 0)
- && (RPC_LASTFRAG (priv->incoming.fraghdr)))) {
- priv->incoming.frag.state = SP_STATE_NADA;
- }
+ if ((ret < 0) || ((ret == 0) && (remaining_size == 0) &&
+ (RPC_LASTFRAG(in->fraghdr)))) {
+ /* frag->state = SP_STATE_NADA; */
+ frag->state = SP_STATE_RPCFRAG_INIT;
+ }
- break;
- }
+ break;
-out:
- return ret;
-}
+ case SP_STATE_NOTIFYING_XID:
+ /* Another epoll thread is notifying higher layers
+ *of reply's xid. */
+ errno = EAGAIN;
+ return -1;
+ break;
+ }
+ return ret;
+}
-inline
-void __socket_reset_priv (socket_private_t *priv)
+static void
+__socket_reset_priv(socket_private_t *priv)
{
- if (priv->incoming.iobref) {
- iobref_unref (priv->incoming.iobref);
- priv->incoming.iobref = NULL;
- }
+ struct gf_sock_incoming *in = NULL;
- if (priv->incoming.iobuf) {
- iobuf_unref (priv->incoming.iobuf);
- }
+ /* used to reduce the indirection */
+ in = &priv->incoming;
- if (priv->incoming.request_info != NULL) {
- GF_FREE (priv->incoming.request_info);
- priv->incoming.request_info = NULL;
- }
+ if (in->iobref) {
+ iobref_unref(in->iobref);
+ in->iobref = NULL;
+ }
- memset (&priv->incoming.payload_vector, 0,
- sizeof (priv->incoming.payload_vector));
+ if (in->iobuf) {
+ iobuf_unref(in->iobuf);
+ in->iobuf = NULL;
+ }
- priv->incoming.iobuf = NULL;
+ if (in->request_info != NULL) {
+ GF_FREE(in->request_info);
+ in->request_info = NULL;
+ }
+
+ memset(&in->payload_vector, 0, sizeof(in->payload_vector));
}
+static int
+__socket_proto_state_machine(rpc_transport_t *this,
+ rpc_transport_pollin_t **pollin)
+{
+ int ret = -1;
+ socket_private_t *priv = NULL;
+ struct iobuf *iobuf = NULL;
+ struct iobref *iobref = NULL;
+ struct iovec vector[2];
+ struct gf_sock_incoming *in = NULL;
+ struct gf_sock_incoming_frag *frag = NULL;
-int
-__socket_proto_state_machine (rpc_transport_t *this,
- rpc_transport_pollin_t **pollin)
-{
- int ret = -1;
- socket_private_t *priv = NULL;
- struct iobuf *iobuf = NULL;
- struct iobref *iobref = NULL;
- struct iovec vector[2];
-
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", this->private, out);
-
- priv = this->private;
- while (priv->incoming.record_state != SP_STATE_COMPLETE) {
- switch (priv->incoming.record_state) {
-
- case SP_STATE_NADA:
- priv->incoming.total_bytes_read = 0;
- priv->incoming.payload_vector.iov_len = 0;
-
- priv->incoming.pending_vector = priv->incoming.vector;
- priv->incoming.pending_vector->iov_base =
- &priv->incoming.fraghdr;
-
- priv->incoming.pending_vector->iov_len =
- sizeof (priv->incoming.fraghdr);
-
- priv->incoming.record_state = SP_STATE_READING_FRAGHDR;
-
- /* fall through */
-
- case SP_STATE_READING_FRAGHDR:
- ret = __socket_readv (this,
- priv->incoming.pending_vector, 1,
- &priv->incoming.pending_vector,
- &priv->incoming.pending_count,
- NULL);
- if (ret == -1) {
- if (priv->read_fail_log == 1) {
- gf_log (this->name,
- ((priv->connected == 1) ?
- GF_LOG_WARNING : GF_LOG_DEBUG),
- "reading from socket failed. Error (%s)"
- ", peer (%s)", strerror (errno),
- this->peerinfo.identifier);
- }
- goto out;
- }
+ GF_VALIDATE_OR_GOTO("socket", this, out);
+ GF_VALIDATE_OR_GOTO("socket", this->private, out);
- if (ret > 0) {
- gf_log (this->name, GF_LOG_TRACE, "partial "
- "fragment header read");
- goto out;
- }
+ priv = this->private;
+ /* used to reduce the indirection */
+ in = &priv->incoming;
+ frag = &in->frag;
- if (ret == 0) {
- priv->incoming.record_state =
- SP_STATE_READ_FRAGHDR;
- }
- /* fall through */
-
- case SP_STATE_READ_FRAGHDR:
-
- priv->incoming.fraghdr = ntoh32 (priv->incoming.fraghdr);
- priv->incoming.record_state = SP_STATE_READING_FRAG;
- priv->incoming.total_bytes_read
- += RPC_FRAGSIZE(priv->incoming.fraghdr);
- iobuf = iobuf_get2 (this->ctx->iobuf_pool,
- priv->incoming.total_bytes_read +
- sizeof (priv->incoming.fraghdr));
- if (!iobuf) {
- ret = -ENOMEM;
- goto out;
- }
+ while (in->record_state != SP_STATE_COMPLETE) {
+ switch (in->record_state) {
+ case SP_STATE_NADA:
+ in->total_bytes_read = 0;
+ in->payload_vector.iov_len = 0;
- priv->incoming.iobuf = iobuf;
- priv->incoming.iobuf_size = 0;
- priv->incoming.frag.fragcurrent = iobuf_ptr (iobuf);
- /* fall through */
+ in->pending_vector = in->vector;
+ in->pending_vector->iov_base = &in->fraghdr;
- case SP_STATE_READING_FRAG:
- ret = __socket_read_frag (this);
+ in->pending_vector->iov_len = sizeof(in->fraghdr);
- if ((ret == -1)
- || (priv->incoming.frag.bytes_read !=
- RPC_FRAGSIZE (priv->incoming.fraghdr))) {
- goto out;
- }
+ in->record_state = SP_STATE_READING_FRAGHDR;
- priv->incoming.frag.bytes_read = 0;
+ /* fall through */
- if (!RPC_LASTFRAG (priv->incoming.fraghdr)) {
- priv->incoming.record_state =
- SP_STATE_READING_FRAGHDR;
- break;
- }
+ case SP_STATE_READING_FRAGHDR:
+ ret = __socket_readv(this, in->pending_vector, 1,
+ &in->pending_vector, &in->pending_count,
+ NULL);
+ if (ret < 0)
+ goto out;
- /* we've read the entire rpc record, notify the
- * upper layers.
- */
- if (pollin != NULL) {
- int count = 0;
- priv->incoming.iobuf_size
- = priv->incoming.total_bytes_read
- - priv->incoming.payload_vector.iov_len;
-
- memset (vector, 0, sizeof (vector));
-
- if (priv->incoming.iobref == NULL) {
- priv->incoming.iobref = iobref_new ();
- if (priv->incoming.iobref == NULL) {
- ret = -1;
- goto out;
- }
- }
-
- vector[count].iov_base
- = iobuf_ptr (priv->incoming.iobuf);
- vector[count].iov_len
- = priv->incoming.iobuf_size;
-
- iobref = priv->incoming.iobref;
-
- count++;
-
- if (priv->incoming.payload_vector.iov_base
- != NULL) {
- vector[count]
- = priv->incoming.payload_vector;
- count++;
- }
-
- *pollin = rpc_transport_pollin_alloc (this,
- vector,
- count,
- priv->incoming.iobuf,
- iobref,
- priv->incoming.request_info);
- iobuf_unref (priv->incoming.iobuf);
- priv->incoming.iobuf = NULL;
-
- if (*pollin == NULL) {
- gf_log (this->name, GF_LOG_WARNING,
- "transport pollin allocation failed");
- ret = -1;
- goto out;
- }
- if (priv->incoming.msg_type == REPLY)
- (*pollin)->is_reply = 1;
-
- priv->incoming.request_info = NULL;
- }
- priv->incoming.record_state = SP_STATE_COMPLETE;
- break;
+ if (ret > 0) {
+ gf_log(this->name, GF_LOG_TRACE,
+ "partial "
+ "fragment header read");
+ ret = 0;
+ goto out;
+ }
- case SP_STATE_COMPLETE:
- /* control should not reach here */
- gf_log (this->name, GF_LOG_WARNING, "control reached to "
- "SP_STATE_COMPLETE, which should not have "
- "happened");
- break;
+ if (ret == 0) {
+ in->record_state = SP_STATE_READ_FRAGHDR;
}
- }
+ /* fall through */
- if (priv->incoming.record_state == SP_STATE_COMPLETE) {
- priv->incoming.record_state = SP_STATE_NADA;
- __socket_reset_priv (priv);
- }
+ case SP_STATE_READ_FRAGHDR:
-out:
- if ((ret == -1) && (errno == EAGAIN)) {
- ret = 0;
- }
- return ret;
-}
+ in->fraghdr = ntoh32(in->fraghdr);
+ in->total_bytes_read += RPC_FRAGSIZE(in->fraghdr);
+ if (in->total_bytes_read >= GF_UNIT_GB) {
+ ret = -1;
+ goto out;
+ }
-int
-socket_proto_state_machine (rpc_transport_t *this,
- rpc_transport_pollin_t **pollin)
-{
- socket_private_t *priv = NULL;
- int ret = 0;
+ iobuf = iobuf_get2(
+ this->ctx->iobuf_pool,
+ (in->total_bytes_read + sizeof(in->fraghdr)));
+ if (!iobuf) {
+ ret = -1;
+ goto out;
+ }
+
+ if (in->iobuf == NULL) {
+ /* first fragment */
+ frag->fragcurrent = iobuf_ptr(iobuf);
+ } else {
+ /* second or further fragment */
+ memcpy(iobuf_ptr(iobuf), iobuf_ptr(in->iobuf),
+ in->total_bytes_read - RPC_FRAGSIZE(in->fraghdr));
+ iobuf_unref(in->iobuf);
+ frag->fragcurrent = (char *)iobuf_ptr(iobuf) +
+ in->total_bytes_read -
+ RPC_FRAGSIZE(in->fraghdr);
+ frag->pending_vector->iov_base = frag->fragcurrent;
+ in->pending_vector = frag->pending_vector;
+ }
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", this->private, out);
+ in->iobuf = iobuf;
+ in->iobuf_size = 0;
+ in->record_state = SP_STATE_READING_FRAG;
+ /* fall through */
- priv = this->private;
+ case SP_STATE_READING_FRAG:
+ ret = __socket_read_frag(this);
- pthread_mutex_lock (&priv->lock);
- {
- ret = __socket_proto_state_machine (this, pollin);
+ if ((ret < 0) ||
+ (frag->bytes_read != RPC_FRAGSIZE(in->fraghdr))) {
+ goto out;
+ }
+
+ frag->bytes_read = 0;
+
+ if (!RPC_LASTFRAG(in->fraghdr)) {
+ in->pending_vector = in->vector;
+ in->pending_vector->iov_base = &in->fraghdr;
+ in->pending_vector->iov_len = sizeof(in->fraghdr);
+ in->record_state = SP_STATE_READING_FRAGHDR;
+ break;
+ }
+
+ /* we've read the entire rpc record, notify the
+ * upper layers.
+ */
+ if (pollin != NULL) {
+ int count = 0;
+ in->iobuf_size = (in->total_bytes_read -
+ in->payload_vector.iov_len);
+
+ memset(vector, 0, sizeof(vector));
+
+ if (in->iobref == NULL) {
+ in->iobref = iobref_new();
+ if (in->iobref == NULL) {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ vector[count].iov_base = iobuf_ptr(in->iobuf);
+ vector[count].iov_len = in->iobuf_size;
+
+ iobref = in->iobref;
+
+ count++;
+
+ if (in->payload_vector.iov_base != NULL) {
+ vector[count] = in->payload_vector;
+ count++;
+ }
+
+ *pollin = rpc_transport_pollin_alloc(this, vector, count,
+ in->iobuf, iobref,
+ in->request_info);
+ iobuf_unref(in->iobuf);
+ in->iobuf = NULL;
+
+ if (*pollin == NULL) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "transport pollin allocation failed");
+ ret = -1;
+ goto out;
+ }
+ if (in->msg_type == REPLY)
+ (*pollin)->is_reply = 1;
+
+ in->request_info = NULL;
+ }
+ in->record_state = SP_STATE_COMPLETE;
+ break;
+
+ case SP_STATE_COMPLETE:
+ /* control should not reach here */
+ gf_log(this->name, GF_LOG_WARNING,
+ "control reached to "
+ "SP_STATE_COMPLETE, which should not have "
+ "happened");
+ break;
}
- pthread_mutex_unlock (&priv->lock);
+ }
+
+ if (in->record_state == SP_STATE_COMPLETE) {
+ in->record_state = SP_STATE_NADA;
+ __socket_reset_priv(priv);
+ }
out:
- return ret;
+ return ret;
}
+static int
+socket_proto_state_machine(rpc_transport_t *this,
+ rpc_transport_pollin_t **pollin)
+{
+ return __socket_proto_state_machine(this, pollin);
+}
-int
-socket_event_poll_in (rpc_transport_t *this)
+static void
+socket_event_poll_in_async(xlator_t *xl, gf_async_t *async)
{
- int ret = -1;
- rpc_transport_pollin_t *pollin = NULL;
+ rpc_transport_pollin_t *pollin;
+ rpc_transport_t *this;
+ socket_private_t *priv;
- ret = socket_proto_state_machine (this, &pollin);
+ pollin = caa_container_of(async, rpc_transport_pollin_t, async);
+ this = pollin->trans;
+ priv = this->private;
- if (pollin != NULL) {
- ret = rpc_transport_notify (this, RPC_TRANSPORT_MSG_RECEIVED,
- pollin);
+ rpc_transport_notify(this, RPC_TRANSPORT_MSG_RECEIVED, pollin);
- rpc_transport_pollin_destroy (pollin);
- }
+ rpc_transport_unref(this);
- return ret;
-}
+ rpc_transport_pollin_destroy(pollin);
+ pthread_mutex_lock(&priv->notify.lock);
+ {
+ --priv->notify.in_progress;
-int
-socket_connect_finish (rpc_transport_t *this)
+ if (!priv->notify.in_progress)
+ pthread_cond_signal(&priv->notify.cond);
+ }
+ pthread_mutex_unlock(&priv->notify.lock);
+}
+
+static int
+socket_event_poll_in(rpc_transport_t *this, gf_boolean_t notify_handled)
{
- int ret = -1;
- socket_private_t *priv = NULL;
- rpc_transport_event_t event = 0;
- char notify_rpc = 0;
+ int ret = -1;
+ rpc_transport_pollin_t *pollin = NULL;
+ socket_private_t *priv = this->private;
+ glusterfs_ctx_t *ctx = NULL;
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", this->private, out);
+ ctx = this->ctx;
- priv = this->private;
+ ret = socket_proto_state_machine(this, &pollin);
- pthread_mutex_lock (&priv->lock);
+ if (pollin) {
+ pthread_mutex_lock(&priv->notify.lock);
{
- if (priv->connected)
- goto unlock;
+ priv->notify.in_progress++;
+ }
+ pthread_mutex_unlock(&priv->notify.lock);
+ }
- ret = __socket_connect_finish (priv->sock);
+ if (notify_handled && (ret >= 0))
+ gf_event_handled(ctx->event_pool, priv->sock, priv->idx, priv->gen);
- if (ret == -1 && errno == EINPROGRESS)
- ret = 1;
+ if (pollin) {
+ rpc_transport_ref(this);
+ gf_async(&pollin->async, THIS, socket_event_poll_in_async);
+ }
- if (ret == -1 && errno != EINPROGRESS) {
- if (!priv->connect_finish_log) {
- gf_log (this->name, GF_LOG_ERROR,
- "connection to %s failed (%s)",
- this->peerinfo.identifier,
- strerror (errno));
- priv->connect_finish_log = 1;
- }
- __socket_disconnect (this);
- notify_rpc = 1;
- event = RPC_TRANSPORT_DISCONNECT;
- goto unlock;
- }
+ return ret;
+}
- if (ret == 0) {
- notify_rpc = 1;
-
- this->myinfo.sockaddr_len =
- sizeof (this->myinfo.sockaddr);
-
- ret = getsockname (priv->sock,
- SA (&this->myinfo.sockaddr),
- &this->myinfo.sockaddr_len);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "getsockname on (%d) failed (%s)",
- priv->sock, strerror (errno));
- __socket_disconnect (this);
- event = GF_EVENT_POLLERR;
- goto unlock;
- }
+static int
+socket_connect_finish(rpc_transport_t *this)
+{
+ int ret = -1;
+ socket_private_t *priv = NULL;
+ rpc_transport_event_t event = 0;
+ char notify_rpc = 0;
+
+ priv = this->private;
+
+ pthread_mutex_lock(&priv->out_lock);
+ {
+ if (priv->connected != 0)
+ goto unlock;
+
+ get_transport_identifiers(this);
+
+ ret = __socket_connect_finish(priv->sock);
+
+ if ((ret < 0) && (errno == EINPROGRESS))
+ ret = 1;
+
+ if ((ret < 0) && (errno != EINPROGRESS)) {
+ if (!priv->connect_finish_log) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "connection to %s failed (%s); "
+ "disconnecting socket",
+ this->peerinfo.identifier, strerror(errno));
+ priv->connect_finish_log = 1;
+ }
+ __socket_disconnect(this);
+ goto unlock;
+ }
- priv->connected = 1;
- priv->connect_finish_log = 0;
- event = RPC_TRANSPORT_CONNECT;
- get_transport_identifiers (this);
- }
+ if (ret == 0) {
+ notify_rpc = 1;
+
+ this->myinfo.sockaddr_len = sizeof(this->myinfo.sockaddr);
+
+ ret = getsockname(priv->sock, SA(&this->myinfo.sockaddr),
+ &this->myinfo.sockaddr_len);
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "getsockname on (%d) failed (%s) - "
+ "disconnecting socket",
+ priv->sock, strerror(errno));
+ __socket_disconnect(this);
+ event = RPC_TRANSPORT_DISCONNECT;
+ goto unlock;
+ }
+
+ priv->connected = 1;
+ priv->connect_finish_log = 0;
+ event = RPC_TRANSPORT_CONNECT;
}
+ }
unlock:
- pthread_mutex_unlock (&priv->lock);
+ pthread_mutex_unlock(&priv->out_lock);
- if (notify_rpc) {
- rpc_transport_notify (this, event, this);
- }
-out:
- return 0;
+ if (notify_rpc) {
+ rpc_transport_notify(this, event, this);
+ }
+
+ return ret;
}
+static int
+socket_disconnect(rpc_transport_t *this, gf_boolean_t wait);
-/* reads rpc_requests during pollin */
-int
-socket_event_handler (int fd, int idx, void *data,
- int poll_in, int poll_out, int poll_err)
+/* socket_is_connected() is for use only in socket_event_handler() */
+static inline gf_boolean_t
+socket_is_connected(socket_private_t *priv)
{
- rpc_transport_t *this = NULL;
- socket_private_t *priv = NULL;
- int ret = 0;
-
- this = data;
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", this->private, out);
- GF_VALIDATE_OR_GOTO ("socket", this->xl, out);
-
- THIS = this->xl;
- priv = this->private;
-
+ if (priv->use_ssl) {
+ return priv->is_server ? priv->ssl_accepted : priv->ssl_connected;
+ } else {
+ return priv->is_server ? priv->accepted : priv->connected;
+ }
+}
- pthread_mutex_lock (&priv->lock);
- {
- priv->idx = idx;
- }
- pthread_mutex_unlock (&priv->lock);
+static void
+ssl_rearm_event_fd(rpc_transport_t *this)
+{
+ socket_private_t *priv = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ int idx = -1;
+ int gen = -1;
+ int fd = -1;
+
+ priv = this->private;
+ ctx = this->ctx;
+
+ idx = priv->idx;
+ gen = priv->gen;
+ fd = priv->sock;
+
+ if (priv->ssl_error_required == SSL_ERROR_WANT_READ)
+ gf_event_select_on(ctx->event_pool, fd, idx, 1, -1);
+ if (priv->ssl_error_required == SSL_ERROR_WANT_WRITE)
+ gf_event_select_on(ctx->event_pool, fd, idx, -1, 1);
+ gf_event_handled(ctx->event_pool, fd, idx, gen);
+}
- if (!priv->connected) {
- ret = socket_connect_finish (this);
+static int
+ssl_handle_server_connection_attempt(rpc_transport_t *this)
+{
+ socket_private_t *priv = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ int idx = -1;
+ int gen = -1;
+ int ret = -1;
+ int fd = -1;
+
+ priv = this->private;
+ ctx = this->ctx;
+
+ idx = priv->idx;
+ gen = priv->gen;
+ fd = priv->sock;
+
+ if (!priv->ssl_context_created) {
+ ret = ssl_setup_connection_prefix(this, _gf_true);
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_TRACE,
+ "> ssl_setup_connection_prefix() failed!");
+ ret = -1;
+ goto out;
+ } else {
+ priv->ssl_context_created = _gf_true;
}
-
- if (!ret && poll_out) {
- ret = socket_event_poll_out (this);
+ }
+ ret = ssl_complete_connection(this);
+ if (ret == 0) {
+ /* nothing to do */
+ gf_event_select_on(ctx->event_pool, fd, idx, 1, 0);
+ gf_event_handled(ctx->event_pool, fd, idx, gen);
+ ret = 1;
+ } else {
+ if (errno == EAGAIN) {
+ ssl_rearm_event_fd(this);
+ ret = 1;
+ } else {
+ ret = -1;
+ gf_log(this->name, GF_LOG_TRACE,
+ "ssl_complete_connection returned error");
}
+ }
+out:
+ return ret;
+}
- if (!ret && poll_in) {
- ret = socket_event_poll_in (this);
+static int
+ssl_handle_client_connection_attempt(rpc_transport_t *this)
+{
+ socket_private_t *priv = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ int idx = -1;
+ int ret = -1;
+ int fd = -1;
+
+ priv = this->private;
+ ctx = this->ctx;
+
+ idx = priv->idx;
+ fd = priv->sock;
+
+ /* SSL client */
+ if (priv->connect_failed) {
+ gf_log(this->name, GF_LOG_TRACE, ">>> disconnecting SSL socket");
+ (void)socket_disconnect(this, _gf_false);
+ /* Force ret to be -1, as we are officially done with
+ this socket */
+ ret = -1;
+ } else {
+ if (!priv->ssl_context_created) {
+ ret = ssl_setup_connection_prefix(this, _gf_false);
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_TRACE,
+ "> ssl_setup_connection_prefix() "
+ "failed!");
+ ret = -1;
+ goto out;
+ } else {
+ priv->ssl_context_created = _gf_true;
+ }
}
-
- if ((ret < 0) || poll_err) {
- /* Logging has happened already in earlier cases */
- gf_log ("transport", ((ret >= 0) ? GF_LOG_INFO : GF_LOG_DEBUG),
- "disconnecting now");
- socket_event_poll_err (this);
- rpc_transport_unref (this);
+ ret = ssl_complete_connection(this);
+ if (ret == 0) {
+ ret = socket_connect_finish(this);
+ gf_event_select_on(ctx->event_pool, fd, idx, 1, 0);
+ gf_log(this->name, GF_LOG_TRACE, ">>> completed client connect");
+ } else {
+ if (errno == EAGAIN) {
+ gf_log(this->name, GF_LOG_TRACE,
+ ">>> retrying client connect 2");
+ ssl_rearm_event_fd(this);
+ ret = 1;
+ } else {
+ /* this is a connection failure */
+ (void)socket_connect_finish(this);
+ gf_log(this->name, GF_LOG_TRACE,
+ "ssl_complete_connection "
+ "returned error");
+ ret = -1;
+ }
}
-
+ }
out:
- return 0;
+ return ret;
}
+static int
+socket_handle_client_connection_attempt(rpc_transport_t *this)
+{
+ socket_private_t *priv = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ int idx = -1;
+ int gen = -1;
+ int ret = -1;
+ int fd = -1;
+
+ priv = this->private;
+ ctx = this->ctx;
+
+ idx = priv->idx;
+ gen = priv->gen;
+ fd = priv->sock;
+
+ /* non-SSL client */
+ if (priv->connect_failed) {
+ /* connect failed with some other error than
+ EINPROGRESS or ENOENT, so nothing more to
+ do, fail reading/writing anything even if
+ poll_in or poll_out
+ is set
+ */
+ gf_log("transport", GF_LOG_DEBUG,
+ "connect failed with some other error "
+ "than EINPROGRESS or ENOENT, so "
+ "nothing more to do; disconnecting "
+ "socket");
+ (void)socket_disconnect(this, _gf_false);
+
+ /* Force ret to be -1, as we are officially
+ * done with this socket
+ */
+ ret = -1;
+ } else {
+ ret = socket_connect_finish(this);
+ gf_log(this->name, GF_LOG_TRACE, "socket_connect_finish() returned %d",
+ ret);
+ if (ret == 0 || ret == 1) {
+ /* we don't want to do any reads or
+ * writes on the connection yet in
+ * socket_event_handler, so just
+ * return 1
+ */
+ ret = 1;
+ gf_event_handled(ctx->event_pool, fd, idx, gen);
+ }
+ }
+ return ret;
+}
-int
-socket_server_event_handler (int fd, int idx, void *data,
- int poll_in, int poll_out, int poll_err)
-{
- rpc_transport_t *this = NULL;
- socket_private_t *priv = NULL;
- int ret = 0;
- int new_sock = -1;
- rpc_transport_t *new_trans = NULL;
- struct sockaddr_storage new_sockaddr = {0, };
- socklen_t addrlen = sizeof (new_sockaddr);
- socket_private_t *new_priv = NULL;
- glusterfs_ctx_t *ctx = NULL;
-
- this = data;
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", this->private, out);
- GF_VALIDATE_OR_GOTO ("socket", this->xl, out);
-
- THIS = this->xl;
- priv = this->private;
- ctx = this->ctx;
-
- pthread_mutex_lock (&priv->lock);
- {
- priv->idx = idx;
+static int
+socket_complete_connection(rpc_transport_t *this)
+{
+ socket_private_t *priv = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ int idx = -1;
+ int gen = -1;
+ int ret = -1;
+ int fd = -1;
+
+ priv = this->private;
+ ctx = this->ctx;
+
+ idx = priv->idx;
+ gen = priv->gen;
+ fd = priv->sock;
+
+ if (priv->use_ssl) {
+ if (priv->is_server) {
+ ret = ssl_handle_server_connection_attempt(this);
+ } else {
+ ret = ssl_handle_client_connection_attempt(this);
+ }
+ } else {
+ if (priv->is_server) {
+ /* non-SSL server: nothing much to do
+ * connection has already been accepted in
+ * socket_server_event_handler()
+ */
+ priv->accepted = _gf_true;
+ gf_event_handled(ctx->event_pool, fd, idx, gen);
+ ret = 1;
+ } else {
+ ret = socket_handle_client_connection_attempt(this);
+ }
+ }
+ return ret;
+}
- if (poll_in) {
- new_sock = accept (priv->sock, SA (&new_sockaddr),
- &addrlen);
+/* reads rpc_requests during pollin */
+static void
+socket_event_handler(int fd, int idx, int gen, void *data, int poll_in,
+ int poll_out, int poll_err, char event_thread_died)
+{
+ rpc_transport_t *this = NULL;
+ socket_private_t *priv = NULL;
+ int ret = -1;
+ glusterfs_ctx_t *ctx = NULL;
+ gf_boolean_t socket_closed = _gf_false, notify_handled = _gf_false;
- if (new_sock == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "accept on %d failed (%s)",
- priv->sock, strerror (errno));
- goto unlock;
- }
+ this = data;
- if (!priv->bio) {
- ret = __socket_nonblock (new_sock);
+ if (event_thread_died) {
+ /* to avoid duplicate notifications, notify only for listener sockets */
+ return;
+ }
+
+ /* At this point we are sure no other thread is using the transport because
+ * we cannot receive more events until we call gf_event_handled(). However
+ * this function may call gf_event_handled() in some cases. When this is
+ * done, the transport may be destroyed at any moment if another thread
+ * handled an error event. To prevent that we take a reference here. */
+ rpc_transport_ref(this);
+
+ GF_VALIDATE_OR_GOTO("socket", this, out);
+ GF_VALIDATE_OR_GOTO("socket", this->private, out);
+ GF_VALIDATE_OR_GOTO("socket", this->xl, out);
+
+ THIS = this->xl;
+ priv = this->private;
+ ctx = this->ctx;
+
+ pthread_mutex_lock(&priv->out_lock);
+ {
+ priv->idx = idx;
+ priv->gen = gen;
+ }
+ pthread_mutex_unlock(&priv->out_lock);
+
+ gf_log(this->name, GF_LOG_TRACE, "%s (sock:%d) in:%d, out:%d, err:%d",
+ (priv->is_server ? "server" : "client"), priv->sock, poll_in,
+ poll_out, poll_err);
+
+ if (!poll_err) {
+ if (!socket_is_connected(priv)) {
+ gf_log(this->name, GF_LOG_TRACE,
+ "%s (sock:%d) socket is not connected, "
+ "completing connection",
+ (priv->is_server ? "server" : "client"), priv->sock);
+
+ ret = socket_complete_connection(this);
+
+ gf_log(this->name, GF_LOG_TRACE,
+ "(sock:%d) "
+ "socket_complete_connection() returned %d",
+ priv->sock, ret);
+
+ if (ret > 0) {
+ gf_log(this->name, GF_LOG_TRACE,
+ "(sock:%d) returning to wait on socket", priv->sock);
+ goto out;
+ }
+ } else {
+ char *sock_type = (priv->is_server ? "Server" : "Client");
- if (ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "NBIO on %d failed (%s)",
- new_sock, strerror (errno));
+ gf_log(this->name, GF_LOG_TRACE,
+ "%s socket (%d) is already connected", sock_type,
+ priv->sock);
+ ret = 0;
+ }
+ }
+
+ if (!ret && poll_out) {
+ ret = socket_event_poll_out(this);
+ gf_log(this->name, GF_LOG_TRACE,
+ "(sock:%d) "
+ "socket_event_poll_out returned %d",
+ priv->sock, ret);
+ }
+
+ if (!ret && poll_in) {
+ ret = socket_event_poll_in(this, !poll_err);
+ gf_log(this->name, GF_LOG_TRACE,
+ "(sock:%d) "
+ "socket_event_poll_in returned %d",
+ priv->sock, ret);
+ notify_handled = _gf_true;
+ }
+
+ if ((ret < 0) || poll_err) {
+ struct sockaddr *sa = SA(&this->peerinfo.sockaddr);
+
+ if (priv->is_server &&
+ SA(&this->myinfo.sockaddr)->sa_family == AF_UNIX) {
+ sa = SA(&this->myinfo.sockaddr);
+ }
- close (new_sock);
- goto unlock;
- }
- }
+ socket_dump_info(sa, priv->is_server, priv->use_ssl, priv->sock,
+ this->name, "disconnecting from");
- if (priv->nodelay) {
- ret = __socket_nodelay (new_sock);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "setsockopt() failed for "
- "NODELAY (%s)",
- strerror (errno));
- }
- }
+ /* Dump the SSL error stack to clear any errors that may otherwise
+ * resurface in the future.
+ */
+ if (priv->use_ssl && priv->ssl_ssl) {
+ ssl_dump_error_stack(this->name);
+ }
- if (priv->keepalive) {
- ret = __socket_keepalive (new_sock,
- priv->keepaliveintvl,
- priv->keepaliveidle);
- if (ret == -1)
- gf_log (this->name, GF_LOG_WARNING,
- "Failed to set keep-alive: %s",
- strerror (errno));
- }
+ /* Logging has happened already in earlier cases */
+ gf_log("transport", ((ret >= 0) ? GF_LOG_INFO : GF_LOG_DEBUG),
+ "EPOLLERR - disconnecting (sock:%d) (%s)", priv->sock,
+ (priv->use_ssl ? "SSL" : "non-SSL"));
- new_trans = GF_CALLOC (1, sizeof (*new_trans),
- gf_common_mt_rpc_trans_t);
- if (!new_trans)
- goto unlock;
-
- new_trans->name = gf_strdup (this->name);
-
- memcpy (&new_trans->peerinfo.sockaddr, &new_sockaddr,
- addrlen);
- new_trans->peerinfo.sockaddr_len = addrlen;
-
- new_trans->myinfo.sockaddr_len =
- sizeof (new_trans->myinfo.sockaddr);
-
- ret = getsockname (new_sock,
- SA (&new_trans->myinfo.sockaddr),
- &new_trans->myinfo.sockaddr_len);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "getsockname on %d failed (%s)",
- new_sock, strerror (errno));
- close (new_sock);
- goto unlock;
- }
+ socket_closed = socket_event_poll_err(this, gen, idx);
- get_transport_identifiers (new_trans);
- socket_init (new_trans);
- new_trans->ops = this->ops;
- new_trans->init = this->init;
- new_trans->fini = this->fini;
- new_trans->ctx = ctx;
- new_trans->xl = this->xl;
- new_trans->mydata = this->mydata;
- new_trans->notify = this->notify;
- new_trans->listener = this;
- new_priv = new_trans->private;
-
- pthread_mutex_lock (&new_priv->lock);
- {
- new_priv->sock = new_sock;
- new_priv->connected = 1;
- rpc_transport_ref (new_trans);
-
- new_priv->idx =
- event_register (ctx->event_pool,
- new_sock,
- socket_event_handler,
- new_trans, 1, 0);
-
- if (new_priv->idx == -1)
- ret = -1;
- }
- pthread_mutex_unlock (&new_priv->lock);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "failed to register the socket with event");
- goto unlock;
- }
+ if (socket_closed)
+ rpc_transport_unref(this);
- ret = rpc_transport_notify (this, RPC_TRANSPORT_ACCEPT,
- new_trans);
- }
- }
-unlock:
- pthread_mutex_unlock (&priv->lock);
+ } else if (!notify_handled) {
+ gf_event_handled(ctx->event_pool, fd, idx, gen);
+ }
out:
- return ret;
+ rpc_transport_unref(this);
}
-
-int
-socket_disconnect (rpc_transport_t *this)
+static void
+socket_server_event_handler(int fd, int idx, int gen, void *data, int poll_in,
+ int poll_out, int poll_err, char event_thread_died)
{
- socket_private_t *priv = NULL;
- int ret = -1;
+ rpc_transport_t *this = NULL;
+ socket_private_t *priv = NULL;
+ int ret = 0;
+ int new_sock = -1;
+ rpc_transport_t *new_trans = NULL;
+ struct sockaddr_storage new_sockaddr = {
+ 0,
+ };
+ socklen_t addrlen = sizeof(new_sockaddr);
+ socket_private_t *new_priv = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+
+ this = data;
+ GF_VALIDATE_OR_GOTO("socket", this, out);
+ GF_VALIDATE_OR_GOTO("socket", this->private, out);
+ GF_VALIDATE_OR_GOTO("socket", this->xl, out);
+ GF_VALIDATE_OR_GOTO("socket", this->ctx, out);
+
+ THIS = this->xl;
+ priv = this->private;
+ ctx = this->ctx;
+
+ if (event_thread_died) {
+ rpc_transport_notify(this, RPC_TRANSPORT_EVENT_THREAD_DIED,
+ (void *)(unsigned long)gen);
+ return;
+ }
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", this->private, out);
+ /* NOTE:
+ * We have done away with the critical section in this function. since
+ * there's little that it helps with. There's no other code that
+ * attempts to unref the listener socket/transport from any other
+ * thread context while we are using it here.
+ */
+ priv->idx = idx;
+ priv->gen = gen;
- priv = this->private;
+ if (poll_err) {
+ socket_event_poll_err(this, gen, idx);
+ goto out;
+ }
- pthread_mutex_lock (&priv->lock);
- {
- ret = __socket_disconnect (this);
- }
- pthread_mutex_unlock (&priv->lock);
+ if (poll_in) {
+ int aflags = 0;
-out:
- return ret;
-}
+ if (!priv->bio)
+ aflags = O_NONBLOCK;
+ new_sock = sys_accept(priv->sock, SA(&new_sockaddr), &addrlen, aflags);
-int
-socket_connect (rpc_transport_t *this, int port)
-{
- int ret = -1;
- int sock = -1;
- socket_private_t *priv = NULL;
- socklen_t sockaddr_len = 0;
- glusterfs_ctx_t *ctx = NULL;
- sa_family_t sa_family = {0, };
- union gf_sock_union sock_union;
+ gf_event_handled(ctx->event_pool, fd, idx, gen);
- GF_VALIDATE_OR_GOTO ("socket", this, err);
- GF_VALIDATE_OR_GOTO ("socket", this->private, err);
+ if (new_sock < 0) {
+ gf_log(this->name, GF_LOG_WARNING, "accept on %d failed (%s)",
+ priv->sock, strerror(errno));
+ goto out;
+ }
- priv = this->private;
- ctx = this->ctx;
+ if (new_sockaddr.ss_family != AF_UNIX) {
+ if (priv->nodelay) {
+ ret = __socket_nodelay(new_sock);
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "setsockopt() failed for "
+ "NODELAY (%s)",
+ strerror(errno));
+ }
+ }
- if (!priv) {
- gf_log_callingfn (this->name, GF_LOG_WARNING,
- "connect() called on uninitialized transport");
- goto err;
+ if (priv->keepalive) {
+ ret = __socket_keepalive(
+ new_sock, new_sockaddr.ss_family, priv->keepaliveintvl,
+ priv->keepaliveidle, priv->keepalivecnt, priv->timeout);
+ if (ret != 0)
+ gf_log(this->name, GF_LOG_WARNING,
+ "Failed to set keep-alive: %s", strerror(errno));
+ }
}
- pthread_mutex_lock (&priv->lock);
- {
- sock = priv->sock;
+ new_trans = GF_CALLOC(1, sizeof(*new_trans), gf_common_mt_rpc_trans_t);
+ if (!new_trans) {
+ sys_close(new_sock);
+ gf_log(this->name, GF_LOG_WARNING,
+ "transport object allocation failure; "
+ "closed newly accepted socket %d",
+ new_sock);
+ goto out;
}
- pthread_mutex_unlock (&priv->lock);
- if (sock != -1) {
- gf_log_callingfn (this->name, GF_LOG_TRACE,
- "connect () called on transport already connected");
- errno = EINPROGRESS;
- ret = -1;
- goto err;
+ ret = pthread_mutex_init(&new_trans->lock, NULL);
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "pthread_mutex_init() failed: %s; closing newly accepted "
+ "socket %d",
+ strerror(errno), new_sock);
+ sys_close(new_sock);
+ GF_FREE(new_trans);
+ goto out;
+ }
+ INIT_LIST_HEAD(&new_trans->list);
+
+ new_trans->name = gf_strdup(this->name);
+
+ memcpy(&new_trans->peerinfo.sockaddr, &new_sockaddr, addrlen);
+ new_trans->peerinfo.sockaddr_len = addrlen;
+
+ new_trans->myinfo.sockaddr_len = sizeof(new_trans->myinfo.sockaddr);
+
+ ret = getsockname(new_sock, SA(&new_trans->myinfo.sockaddr),
+ &new_trans->myinfo.sockaddr_len);
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "getsockname on socket %d "
+ "failed (errno:%s); closing newly accepted socket",
+ new_sock, strerror(errno));
+ sys_close(new_sock);
+ GF_FREE(new_trans->name);
+ GF_FREE(new_trans);
+ goto out;
}
- ret = socket_client_get_remote_sockaddr (this, &sock_union.sa,
- &sockaddr_len, &sa_family);
- if (ret == -1) {
- /* logged inside client_get_remote_sockaddr */
- goto err;
+ get_transport_identifiers(new_trans);
+ gf_log(this->name, GF_LOG_TRACE, "XXX server:%s, client:%s",
+ new_trans->myinfo.identifier, new_trans->peerinfo.identifier);
+
+ /* Make options available to local socket_init() to create new
+ * SSL_CTX per transport. A separate SSL_CTX per transport is
+ * required to avoid setting crl checking options for client
+ * connections. The verification options eventually get copied
+ * to the SSL object. Unfortunately, there's no way to identify
+ * whether socket_init() is being called after a client-side
+ * connect() or a server-side accept(). Although, we could pass
+ * a flag from the transport init() to the socket_init() and
+ * from this place, this doesn't identify the case where the
+ * server-side transport loading is done for the first time.
+ * Also, SSL doesn't apply for UNIX sockets.
+ */
+ if (new_sockaddr.ss_family != AF_UNIX)
+ new_trans->options = dict_ref(this->options);
+ new_trans->ctx = this->ctx;
+
+ ret = socket_init(new_trans);
+
+ /* reset options to NULL to avoid double free */
+ if (new_sockaddr.ss_family != AF_UNIX) {
+ dict_unref(new_trans->options);
+ new_trans->options = NULL;
}
- if (port > 0) {
- sock_union.sin.sin_port = htons (port);
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "initialization of new_trans "
+ "failed; closing newly accepted socket %d",
+ new_sock);
+ sys_close(new_sock);
+ GF_FREE(new_trans->name);
+ GF_FREE(new_trans);
+ goto out;
+ }
+ new_trans->ops = this->ops;
+ new_trans->init = this->init;
+ new_trans->fini = this->fini;
+ new_trans->ctx = ctx;
+ new_trans->xl = this->xl;
+ new_trans->mydata = this->mydata;
+ new_trans->notify = this->notify;
+ new_trans->listener = this;
+ new_trans->notify_poller_death = this->poller_death_accept;
+ new_priv = new_trans->private;
+
+ if (new_sockaddr.ss_family == AF_UNIX) {
+ new_priv->use_ssl = _gf_false;
+ } else {
+ switch (priv->srvr_ssl) {
+ case MGMT_SSL_ALWAYS:
+ /* Glusterd with secure_mgmt. */
+ new_priv->use_ssl = _gf_true;
+ break;
+ case MGMT_SSL_COPY_IO:
+ /* Glusterfsd. */
+ new_priv->use_ssl = priv->ssl_enabled;
+ break;
+ default:
+ new_priv->use_ssl = _gf_false;
+ }
}
- pthread_mutex_lock (&priv->lock);
- {
- if (priv->sock != -1) {
- gf_log (this->name, GF_LOG_TRACE,
- "connect() -- already connected");
- goto unlock;
- }
- memcpy (&this->peerinfo.sockaddr, &sock_union.storage,
- sockaddr_len);
- this->peerinfo.sockaddr_len = sockaddr_len;
+ new_priv->sock = new_sock;
- priv->sock = socket (sa_family, SOCK_STREAM, 0);
- if (priv->sock == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "socket creation failed (%s)",
- strerror (errno));
- goto unlock;
- }
+ new_priv->ssl_enabled = priv->ssl_enabled;
+ new_priv->connected = 1;
+ new_priv->is_server = _gf_true;
- /* Cant help if setting socket options fails. We can continue
- * working nonetheless.
- */
- if (priv->windowsize != 0) {
- if (setsockopt (priv->sock, SOL_SOCKET, SO_RCVBUF,
- &priv->windowsize,
- sizeof (priv->windowsize)) < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "setting receive window "
- "size failed: %d: %d: %s",
- priv->sock, priv->windowsize,
- strerror (errno));
- }
+ /*
+ * This is the first ref on the newly accepted
+ * transport.
+ */
+ rpc_transport_ref(new_trans);
- if (setsockopt (priv->sock, SOL_SOCKET, SO_SNDBUF,
- &priv->windowsize,
- sizeof (priv->windowsize)) < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "setting send window size "
- "failed: %d: %d: %s",
- priv->sock, priv->windowsize,
- strerror (errno));
- }
+ {
+ /* Take a ref on the new_trans to avoid
+ * getting deleted when event_register()
+ * causes socket_event_handler() to race
+ * ahead of this path to eventually find
+ * a disconnect and unref the transport
+ */
+ rpc_transport_ref(new_trans);
+
+ /* Send a notification to RPCSVC layer
+ * to save the new_trans in its service
+ * list before we register the new_sock
+ * with epoll to begin receiving notifications
+ * for data handling.
+ */
+ ret = rpc_transport_notify(this, RPC_TRANSPORT_ACCEPT, new_trans);
+
+ if (ret >= 0) {
+ new_priv->idx = gf_event_register(
+ ctx->event_pool, new_sock, socket_event_handler, new_trans,
+ 1, 0, new_trans->notify_poller_death);
+ if (new_priv->idx == -1) {
+ ret = -1;
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to register the socket "
+ "with event");
+
+ /* event_register() could have failed for some
+ * reason, implying that the new_sock cannot be
+ * added to the epoll set. If we won't get any
+ * more notifications for new_sock from epoll,
+ * then we better remove the corresponding
+ * new_trans object from the RPCSVC service list.
+ * Since we've notified RPC service of new_trans
+ * before we attempted event_register(), we better
+ * unlink the new_trans from the RPCSVC service list
+ * to cleanup the stateby sending out a DISCONNECT
+ * notification.
+ */
+ rpc_transport_notify(this, RPC_TRANSPORT_DISCONNECT,
+ new_trans);
}
+ }
+
+ /* this rpc_transport_unref() is for managing race between
+ * 1. socket_server_event_handler and
+ * 2. socket_event_handler
+ * trying to add and remove new_trans from the rpcsvc
+ * service list
+ * now that we are done with the notifications, lets
+ * reduce the reference
+ */
+ rpc_transport_unref(new_trans);
+ }
- if (priv->nodelay) {
- ret = __socket_nodelay (priv->sock);
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_WARNING, "closing newly accepted socket");
+ sys_close(new_sock);
+ /* this unref is to actually cause the destruction of
+ * the new_trans since we've failed at everything so far
+ */
+ rpc_transport_unref(new_trans);
+ }
+ }
+out:
+ return;
+}
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "NODELAY on %d failed (%s)",
- priv->sock, strerror (errno));
- }
- }
+static int
+socket_disconnect(rpc_transport_t *this, gf_boolean_t wait)
+{
+ socket_private_t *priv = NULL;
+ int ret = -1;
- if (!priv->bio) {
- ret = __socket_nonblock (priv->sock);
+ priv = this->private;
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "NBIO on %d failed (%s)",
- priv->sock, strerror (errno));
- close (priv->sock);
- priv->sock = -1;
- goto unlock;
- }
- }
+ pthread_mutex_lock(&priv->out_lock);
+ {
+ ret = __socket_disconnect(this);
+ }
+ pthread_mutex_unlock(&priv->out_lock);
- if (priv->keepalive) {
- ret = __socket_keepalive (priv->sock,
- priv->keepaliveintvl,
- priv->keepaliveidle);
- if (ret == -1)
- gf_log (this->name, GF_LOG_ERROR,
- "Failed to set keep-alive: %s",
- strerror (errno));
- }
+ return ret;
+}
- SA (&this->myinfo.sockaddr)->sa_family =
- SA (&this->peerinfo.sockaddr)->sa_family;
-
- ret = client_bind (this, SA (&this->myinfo.sockaddr),
- &this->myinfo.sockaddr_len, priv->sock);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "client bind failed: %s", strerror (errno));
- close (priv->sock);
- priv->sock = -1;
- goto unlock;
- }
+void *
+socket_connect_error_cbk(void *opaque)
+{
+ socket_connect_error_state_t *arg;
- ret = connect (priv->sock, SA (&this->peerinfo.sockaddr),
- this->peerinfo.sockaddr_len);
+ GF_ASSERT(opaque);
- if (ret == -1 && ((errno != EINPROGRESS) && (errno != ENOENT))) {
- gf_log (this->name, GF_LOG_ERROR,
- "connection attempt failed (%s)",
- strerror (errno));
- close (priv->sock);
- priv->sock = -1;
- goto unlock;
- }
+ arg = opaque;
+ THIS = arg->this;
- priv->connected = 0;
+ rpc_transport_notify(arg->trans, RPC_TRANSPORT_DISCONNECT, arg->trans);
- rpc_transport_ref (this);
+ if (arg->refd)
+ rpc_transport_unref(arg->trans);
- priv->idx = event_register (ctx->event_pool, priv->sock,
- socket_event_handler, this, 1, 1);
- if (priv->idx == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "failed to register the event");
- ret = -1;
- }
- }
-unlock:
- pthread_mutex_unlock (&priv->lock);
+ GF_FREE(opaque);
+ return NULL;
+}
-err:
- return ret;
+static void
+socket_fix_ssl_opts(rpc_transport_t *this, socket_private_t *priv,
+ uint16_t port)
+{
+ if (port == GF_DEFAULT_SOCKET_LISTEN_PORT) {
+ gf_log(this->name, GF_LOG_DEBUG, "%s SSL for portmapper connection",
+ priv->mgmt_ssl ? "enabling" : "disabling");
+ priv->use_ssl = priv->mgmt_ssl;
+ } else if (priv->ssl_enabled && !priv->use_ssl) {
+ gf_log(this->name, GF_LOG_DEBUG, "re-enabling SSL for I/O connection");
+ priv->use_ssl = _gf_true;
+ }
}
+/*
+ * If we might just be trying to connect prematurely, e.g. to a brick that's
+ * slow coming up, all we need is a simple retry. Don't worry about sleeping
+ * in some arbitrary thread. The connect(2) could already have the exact same
+ * effect, and we deal with it in that case so we can deal with it for sleep(2)
+ * as well.
+ */
+static int
+connect_loop(int sockfd, const struct sockaddr *addr, socklen_t addrlen)
+{
+ int ret;
+ int connect_fails = 0;
-int
-socket_listen (rpc_transport_t *this)
+ for (;;) {
+ ret = connect(sockfd, addr, addrlen);
+ if (ret >= 0) {
+ break;
+ }
+ if ((errno != ENOENT) || (++connect_fails >= 5)) {
+ break;
+ }
+ sleep(1);
+ }
+
+ return ret;
+}
+
+static int
+socket_connect(rpc_transport_t *this, int port)
{
- socket_private_t * priv = NULL;
- int ret = -1;
- int sock = -1;
- struct sockaddr_storage sockaddr;
- socklen_t sockaddr_len = 0;
- peer_info_t *myinfo = NULL;
- glusterfs_ctx_t *ctx = NULL;
- sa_family_t sa_family = {0, };
+ int ret = -1;
+ int th_ret = -1;
+ int sock = -1;
+ socket_private_t *priv = NULL;
+ socklen_t sockaddr_len = 0;
+ glusterfs_ctx_t *ctx = NULL;
+ sa_family_t sa_family = {
+ 0,
+ };
+ char *local_addr = NULL;
+ union gf_sock_union sock_union;
+ struct sockaddr_in *addr = NULL;
+ gf_boolean_t refd = _gf_false;
+ socket_connect_error_state_t *arg = NULL;
+ pthread_t th_id = {
+ 0,
+ };
+ gf_boolean_t ign_enoent = _gf_false;
+ gf_boolean_t connect_attempted = _gf_false;
+
+ GF_VALIDATE_OR_GOTO("socket", this, err);
+ GF_VALIDATE_OR_GOTO("socket", this->private, err);
+
+ priv = this->private;
+ ctx = this->ctx;
+
+ if (!priv) {
+ gf_log_callingfn(this->name, GF_LOG_WARNING,
+ "connect() called on uninitialized transport");
+ goto err;
+ }
+
+ pthread_mutex_lock(&priv->out_lock);
+ {
+ if (priv->sock >= 0) {
+ gf_log_callingfn(this->name, GF_LOG_TRACE,
+ "connect () called on transport "
+ "already connected");
+ errno = EINPROGRESS;
+ ret = -1;
+ goto unlock;
+ }
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", this->private, out);
+ gf_log(this->name, GF_LOG_TRACE, "connecting %p, sock=%d", this,
+ priv->sock);
- priv = this->private;
- myinfo = &this->myinfo;
- ctx = this->ctx;
+ ret = socket_client_get_remote_sockaddr(this, &sock_union.sa,
+ &sockaddr_len, &sa_family);
+ if (ret < 0) {
+ /* logged inside client_get_remote_sockaddr */
+ goto unlock;
+ }
- pthread_mutex_lock (&priv->lock);
- {
- sock = priv->sock;
+ if (sa_family == AF_UNIX) {
+ priv->ssl_enabled = _gf_false;
+ priv->mgmt_ssl = _gf_false;
+ } else {
+ if (port > 0) {
+ sock_union.sin.sin_port = htons(port);
+ }
+ socket_fix_ssl_opts(this, priv, ntohs(sock_union.sin.sin_port));
}
- pthread_mutex_unlock (&priv->lock);
- if (sock != -1) {
- gf_log_callingfn (this->name, GF_LOG_DEBUG,
- "already listening");
- return ret;
+ memcpy(&this->peerinfo.sockaddr, &sock_union.storage, sockaddr_len);
+ this->peerinfo.sockaddr_len = sockaddr_len;
+
+ priv->sock = sys_socket(sa_family, SOCK_STREAM, 0);
+ if (priv->sock < 0) {
+ gf_log(this->name, GF_LOG_ERROR, "socket creation failed (%s)",
+ strerror(errno));
+ ret = -1;
+ goto unlock;
}
- ret = socket_server_get_local_sockaddr (this, SA (&sockaddr),
- &sockaddr_len, &sa_family);
- if (ret == -1) {
- return ret;
+ /* Can't help if setting socket options fails. We can continue
+ * working nonetheless.
+ */
+ if (priv->windowsize != 0) {
+ if (setsockopt(priv->sock, SOL_SOCKET, SO_RCVBUF, &priv->windowsize,
+ sizeof(priv->windowsize)) != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "setting receive window "
+ "size failed: %d: %d: %s",
+ priv->sock, priv->windowsize, strerror(errno));
+ }
+
+ if (setsockopt(priv->sock, SOL_SOCKET, SO_SNDBUF, &priv->windowsize,
+ sizeof(priv->windowsize)) != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "setting send window size "
+ "failed: %d: %d: %s",
+ priv->sock, priv->windowsize, strerror(errno));
+ }
}
- pthread_mutex_lock (&priv->lock);
- {
- if (priv->sock != -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "already listening");
- goto unlock;
+ /* Make sure we are not vulnerable to someone setting
+ * net.ipv6.bindv6only to 1 so that gluster services are
+ * available over IPv4 & IPv6.
+ */
+#ifdef IPV6_DEFAULT
+ int disable_v6only = 0;
+ if (setsockopt(priv->sock, IPPROTO_IPV6, IPV6_V6ONLY,
+ (void *)&disable_v6only, sizeof(disable_v6only)) < 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Error disabling sockopt IPV6_V6ONLY: \"%s\"",
+ strerror(errno));
+ }
+#endif
+
+ if (sa_family != AF_UNIX) {
+ if (priv->nodelay) {
+ ret = __socket_nodelay(priv->sock);
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "NODELAY on %d failed (%s)", priv->sock,
+ strerror(errno));
}
+ }
- memcpy (&myinfo->sockaddr, &sockaddr, sockaddr_len);
- myinfo->sockaddr_len = sockaddr_len;
+ if (priv->keepalive) {
+ ret = __socket_keepalive(
+ priv->sock, sa_family, priv->keepaliveintvl,
+ priv->keepaliveidle, priv->keepalivecnt, priv->timeout);
+ if (ret != 0)
+ gf_log(this->name, GF_LOG_ERROR,
+ "Failed to set keep-alive: %s", strerror(errno));
+ }
+ }
- priv->sock = socket (sa_family, SOCK_STREAM, 0);
+ SA(&this->myinfo.sockaddr)->sa_family = SA(&this->peerinfo.sockaddr)
+ ->sa_family;
- if (priv->sock == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "socket creation failed (%s)",
- strerror (errno));
- goto unlock;
- }
+ /* If a source addr is explicitly specified, use it */
+ ret = dict_get_str_sizen(this->options, "transport.socket.source-addr",
+ &local_addr);
+ if (!ret && SA(&this->myinfo.sockaddr)->sa_family == AF_INET) {
+ addr = (struct sockaddr_in *)(&this->myinfo.sockaddr);
+ ret = inet_pton(AF_INET, local_addr, &(addr->sin_addr.s_addr));
+ }
- /* Cant help if setting socket options fails. We can continue
- * working nonetheless.
- */
- if (priv->windowsize != 0) {
- if (setsockopt (priv->sock, SOL_SOCKET, SO_RCVBUF,
- &priv->windowsize,
- sizeof (priv->windowsize)) < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "setting receive window size "
- "failed: %d: %d: %s", priv->sock,
- priv->windowsize,
- strerror (errno));
- }
+ /* If client wants ENOENT to be ignored */
+ ign_enoent = dict_get_str_boolean(
+ this->options, "transport.socket.ignore-enoent", _gf_false);
- if (setsockopt (priv->sock, SOL_SOCKET, SO_SNDBUF,
- &priv->windowsize,
- sizeof (priv->windowsize)) < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "setting send window size failed:"
- " %d: %d: %s", priv->sock,
- priv->windowsize,
- strerror (errno));
- }
- }
+ ret = client_bind(this, SA(&this->myinfo.sockaddr),
+ &this->myinfo.sockaddr_len, priv->sock);
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_WARNING, "client bind failed: %s",
+ strerror(errno));
+ goto handler;
+ }
- if (priv->nodelay) {
- ret = __socket_nodelay (priv->sock);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "setsockopt() failed for NODELAY (%s)",
- strerror (errno));
- }
- }
+ /* make socket non-blocking for all types of sockets */
+ if (!priv->bio) {
+ ret = __socket_nonblock(priv->sock);
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_ERROR, "NBIO on %d failed (%s)",
+ priv->sock, strerror(errno));
+ goto handler;
+ } else {
+ gf_log(this->name, GF_LOG_TRACE,
+ ">>> connect() with non-blocking IO for ALL");
+ }
+ }
+ this->connect_failed = _gf_false;
+ priv->connect_failed = 0;
+ priv->connected = 0;
- if (!priv->bio) {
- ret = __socket_nonblock (priv->sock);
+ socket_dump_info(SA(&this->peerinfo.sockaddr), priv->is_server,
+ priv->use_ssl, priv->sock, this->name,
+ "connecting to");
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "NBIO on %d failed (%s)",
- priv->sock, strerror (errno));
- close (priv->sock);
- priv->sock = -1;
- goto unlock;
- }
- }
+ if (ign_enoent) {
+ ret = connect_loop(priv->sock, SA(&this->peerinfo.sockaddr),
+ this->peerinfo.sockaddr_len);
+ } else {
+ ret = connect(priv->sock, SA(&this->peerinfo.sockaddr),
+ this->peerinfo.sockaddr_len);
+ }
- ret = __socket_server_bind (this);
+ connect_attempted = _gf_true;
- if (ret == -1) {
- /* logged inside __socket_server_bind() */
- close (priv->sock);
- priv->sock = -1;
- goto unlock;
- }
+ if ((ret != 0) && (errno == ENOENT) && ign_enoent) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Ignore failed connection attempt on %s, (%s) ",
+ this->peerinfo.identifier, strerror(errno));
- if (priv->backlog)
- ret = listen (priv->sock, priv->backlog);
- else
- ret = listen (priv->sock, 10);
-
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "could not set socket %d to listen mode (%s)",
- priv->sock, strerror (errno));
- close (priv->sock);
- priv->sock = -1;
- goto unlock;
- }
+ /* connect failed with some other error than EINPROGRESS
+ so, getsockopt (... SO_ERROR ...), will not catch any
+ errors and return them to us, we need to remember this
+ state, and take actions in socket_event_handler
+ appropriately */
+ /* TBD: What about ENOENT, we will do getsockopt there
+ as well, so how is that exempt from such a problem? */
+ priv->connect_failed = 1;
+ this->connect_failed = _gf_true;
- rpc_transport_ref (this);
+ goto handler;
+ }
- priv->idx = event_register (ctx->event_pool, priv->sock,
- socket_server_event_handler,
- this, 1, 0);
+ if ((ret != 0) && (errno != EINPROGRESS) && (errno != ENOENT)) {
+ /* For unix path based sockets, the socket path is
+ * cryptic (md5sum of path) and may not be useful for
+ * the user in debugging so log it in DEBUG
+ */
+ gf_log(this->name,
+ ((sa_family == AF_UNIX) ? GF_LOG_DEBUG : GF_LOG_ERROR),
+ "connection attempt on %s failed, (%s)",
+ this->peerinfo.identifier, strerror(errno));
+
+ /* connect failed with some other error than EINPROGRESS
+ so, getsockopt (... SO_ERROR ...), will not catch any
+ errors and return them to us, we need to remember this
+ state, and take actions in socket_event_handler
+ appropriately */
+ /* TBD: What about ENOENT, we will do getsockopt there
+ as well, so how is that exempt from such a problem? */
+ priv->connect_failed = 1;
+
+ goto handler;
+ } else {
+ /* reset connect_failed so that any previous attempts
+ state is not carried forward */
+ priv->connect_failed = 0;
+ ret = 0;
+ }
- if (priv->idx == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "could not register socket %d with events",
- priv->sock);
- ret = -1;
- close (priv->sock);
- priv->sock = -1;
- goto unlock;
- }
+ handler:
+ if (ret < 0 && !connect_attempted) {
+ /* Ignore error from connect. epoll events
+ should be handled in the socket handler. shutdown(2)
+ will result in EPOLLERR, so cleanup is done in
+ socket_event_handler or socket_poller */
+ shutdown(priv->sock, SHUT_RDWR);
+ ret = 0;
+ gf_log(this->name, GF_LOG_INFO,
+ "intentional client shutdown(%d, SHUT_RDWR)", priv->sock);
}
-unlock:
- pthread_mutex_unlock (&priv->lock);
-out:
- return ret;
-}
+ priv->connected = 0;
+ priv->is_server = _gf_false;
+ rpc_transport_ref(this);
+ refd = _gf_true;
+
+ this->listener = this;
+ priv->idx = gf_event_register(ctx->event_pool, priv->sock,
+ socket_event_handler, this, 1, 1,
+ this->notify_poller_death);
+ if (priv->idx == -1) {
+ gf_log("", GF_LOG_WARNING,
+ "failed to register the event; "
+ "closing socket %d",
+ priv->sock);
+ sys_close(priv->sock);
+ priv->sock = -1;
+ ret = -1;
+ }
+ unlock:
+ sock = priv->sock;
+ }
+ pthread_mutex_unlock(&priv->out_lock);
-int32_t
-socket_submit_request (rpc_transport_t *this, rpc_transport_req_t *req)
-{
- socket_private_t *priv = NULL;
- int ret = -1;
- char need_poll_out = 0;
- char need_append = 1;
- struct ioq *entry = NULL;
- glusterfs_ctx_t *ctx = NULL;
+err:
+ /* if sock >= 0, then cleanup is done from the event handler */
+ if ((ret < 0) && (sock < 0)) {
+ /* Cleaup requires to send notification to upper layer which
+ intern holds the big_lock. There can be dead-lock situation
+ if big_lock is already held by the current thread.
+ So transfer the ownership to separate thread for cleanup.
+ */
+ arg = GF_CALLOC(1, sizeof(*arg), gf_sock_connect_error_state_t);
+ arg->this = THIS;
+ arg->trans = this;
+ arg->refd = refd;
+ th_ret = gf_thread_create_detached(&th_id, socket_connect_error_cbk,
+ arg, "scleanup");
+ if (th_ret) {
+ /* Error will be logged by gf_thread_create_attached */
+ gf_log(this->name, GF_LOG_ERROR,
+ "Thread creation "
+ "failed");
+ GF_FREE(arg);
+ GF_ASSERT(0);
+ }
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", this->private, out);
+ ret = 0;
+ }
- priv = this->private;
- ctx = this->ctx;
+ return ret;
+}
- pthread_mutex_lock (&priv->lock);
- {
- if (priv->connected != 1) {
- if (!priv->submit_log && !priv->connect_finish_log) {
- gf_log (this->name, GF_LOG_INFO,
- "not connected (priv->connected = %d)",
- priv->connected);
- priv->submit_log = 1;
- }
- goto unlock;
- }
+static int
+socket_listen(rpc_transport_t *this)
+{
+ socket_private_t *priv = NULL;
+ int ret = -1;
+ int sock = -1;
+ struct sockaddr_storage sockaddr;
+ socklen_t sockaddr_len = 0;
+ peer_info_t *myinfo = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ sa_family_t sa_family = {
+ 0,
+ };
+
+ GF_VALIDATE_OR_GOTO("socket", this, out);
+ GF_VALIDATE_OR_GOTO("socket", this->private, out);
+
+ priv = this->private;
+ myinfo = &this->myinfo;
+ ctx = this->ctx;
+
+ pthread_mutex_lock(&priv->out_lock);
+ {
+ sock = priv->sock;
+ }
+ pthread_mutex_unlock(&priv->out_lock);
- priv->submit_log = 0;
- entry = __socket_ioq_new (this, &req->msg);
- if (!entry)
- goto unlock;
+ if (sock >= 0) {
+ gf_log_callingfn(this->name, GF_LOG_DEBUG, "already listening");
+ return ret;
+ }
- if (list_empty (&priv->ioq)) {
- ret = __socket_ioq_churn_entry (this, entry);
+ ret = socket_server_get_local_sockaddr(this, SA(&sockaddr), &sockaddr_len,
+ &sa_family);
+ if (ret < 0) {
+ return ret;
+ }
- if (ret == 0)
- need_append = 0;
+ pthread_mutex_lock(&priv->out_lock);
+ {
+ if (priv->sock >= 0) {
+ gf_log(this->name, GF_LOG_DEBUG, "already listening");
+ goto unlock;
+ }
- if (ret > 0)
- need_poll_out = 1;
- }
+ memcpy(&myinfo->sockaddr, &sockaddr, sockaddr_len);
+ myinfo->sockaddr_len = sockaddr_len;
- if (need_append) {
- list_add_tail (&entry->list, &priv->ioq);
- ret = 0;
- }
+ priv->sock = sys_socket(sa_family, SOCK_STREAM, 0);
- if (need_poll_out) {
- /* first entry to wait. continue writing on POLLOUT */
- priv->idx = event_select_on (ctx->event_pool,
- priv->sock,
- priv->idx, -1, 1);
- }
+ if (priv->sock < 0) {
+ gf_log(this->name, GF_LOG_ERROR, "socket creation failed (%s)",
+ strerror(errno));
+ goto unlock;
}
-unlock:
- pthread_mutex_unlock (&priv->lock);
-
-out:
- return ret;
-}
+ /* Can't help if setting socket options fails. We can continue
+ * working nonetheless.
+ */
+ if (priv->windowsize != 0) {
+ if (setsockopt(priv->sock, SOL_SOCKET, SO_RCVBUF, &priv->windowsize,
+ sizeof(priv->windowsize)) != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "setting receive window size "
+ "failed: %d: %d: %s",
+ priv->sock, priv->windowsize, strerror(errno));
+ }
+
+ if (setsockopt(priv->sock, SOL_SOCKET, SO_SNDBUF, &priv->windowsize,
+ sizeof(priv->windowsize)) != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "setting send window size failed:"
+ " %d: %d: %s",
+ priv->sock, priv->windowsize, strerror(errno));
+ }
+ }
-int32_t
-socket_submit_reply (rpc_transport_t *this, rpc_transport_reply_t *reply)
-{
- socket_private_t *priv = NULL;
- int ret = -1;
- char need_poll_out = 0;
- char need_append = 1;
- struct ioq *entry = NULL;
- glusterfs_ctx_t *ctx = NULL;
+ if (priv->nodelay && (sa_family != AF_UNIX)) {
+ ret = __socket_nodelay(priv->sock);
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "setsockopt() failed for NODELAY (%s)", strerror(errno));
+ }
+ }
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", this->private, out);
+ if (!priv->bio) {
+ ret = __socket_nonblock(priv->sock);
+
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "NBIO on socket %d failed "
+ "(errno:%s); closing socket",
+ priv->sock, strerror(errno));
+ sys_close(priv->sock);
+ priv->sock = -1;
+ goto unlock;
+ }
+ }
- priv = this->private;
- ctx = this->ctx;
+ /* coverity[SLEEP] */
+ ret = __socket_server_bind(this);
+
+ if (ret < 0) {
+ /* logged inside __socket_server_bind() */
+ gf_log(this->name, GF_LOG_ERROR,
+ "__socket_server_bind failed;"
+ "closing socket %d",
+ priv->sock);
+ sys_close(priv->sock);
+ priv->sock = -1;
+ goto unlock;
+ }
- pthread_mutex_lock (&priv->lock);
- {
- if (priv->connected != 1) {
- if (!priv->submit_log && !priv->connect_finish_log) {
- gf_log (this->name, GF_LOG_INFO,
- "not connected (priv->connected = %d)",
- priv->connected);
- priv->submit_log = 1;
- }
- goto unlock;
- }
- priv->submit_log = 0;
- entry = __socket_ioq_new (this, &reply->msg);
- if (!entry)
- goto unlock;
- if (list_empty (&priv->ioq)) {
- ret = __socket_ioq_churn_entry (this, entry);
-
- if (ret == 0)
- need_append = 0;
-
- if (ret > 0)
- need_poll_out = 1;
- }
+ socket_dump_info(SA(&this->myinfo.sockaddr), priv->is_server,
+ priv->use_ssl, priv->sock, this->name, "listening on");
- if (need_append) {
- list_add_tail (&entry->list, &priv->ioq);
- ret = 0;
- }
+ ret = listen(priv->sock, priv->backlog);
- if (need_poll_out) {
- /* first entry to wait. continue writing on POLLOUT */
- priv->idx = event_select_on (ctx->event_pool,
- priv->sock,
- priv->idx, -1, 1);
- }
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "could not set socket %d to listen mode (errno:%s); "
+ "closing socket",
+ priv->sock, strerror(errno));
+ sys_close(priv->sock);
+ priv->sock = -1;
+ goto unlock;
}
+ rpc_transport_ref(this);
+
+ priv->idx = gf_event_register(ctx->event_pool, priv->sock,
+ socket_server_event_handler, this, 1, 0,
+ this->notify_poller_death);
+
+ if (priv->idx == -1) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "could not register socket %d with events; "
+ "closing socket",
+ priv->sock);
+ ret = -1;
+ sys_close(priv->sock);
+ priv->sock = -1;
+ goto unlock;
+ }
+ }
unlock:
- pthread_mutex_unlock (&priv->lock);
+ pthread_mutex_unlock(&priv->out_lock);
out:
- return ret;
+ return ret;
}
-
-int32_t
-socket_getpeername (rpc_transport_t *this, char *hostname, int hostlen)
+static int32_t
+socket_submit_outgoing_msg(rpc_transport_t *this, rpc_transport_msg_t *msg)
{
- int32_t ret = -1;
+ int ret = -1;
+ char need_poll_out = 0;
+ char need_append = 1;
+ struct ioq *entry = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ socket_private_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO("socket", this, out);
+ GF_VALIDATE_OR_GOTO("socket", this->private, out);
+
+ priv = this->private;
+ ctx = this->ctx;
+
+ pthread_mutex_lock(&priv->out_lock);
+ {
+ if (priv->connected != 1) {
+ if (!priv->submit_log && !priv->connect_finish_log) {
+ gf_log(this->name, GF_LOG_INFO,
+ "not connected (priv->connected = %d)", priv->connected);
+ priv->submit_log = 1;
+ }
+ goto unlock;
+ }
+
+ priv->submit_log = 0;
+ entry = __socket_ioq_new(this, msg);
+ if (!entry)
+ goto unlock;
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", hostname, out);
+ if (list_empty(&priv->ioq)) {
+ ret = __socket_ioq_churn_entry(this, entry);
- if (hostlen < (strlen (this->peerinfo.identifier) + 1)) {
- goto out;
+ if (ret == 0) {
+ need_append = 0;
+ }
+ if (ret > 0) {
+ need_poll_out = 1;
+ }
}
- strcpy (hostname, this->peerinfo.identifier);
- ret = 0;
+ if (need_append) {
+ list_add_tail(&entry->list, &priv->ioq);
+ ret = 0;
+ }
+ if (need_poll_out) {
+ /* first entry to wait. continue writing on POLLOUT */
+ priv->idx = gf_event_select_on(ctx->event_pool, priv->sock,
+ priv->idx, -1, 1);
+ }
+ }
+unlock:
+ pthread_mutex_unlock(&priv->out_lock);
+
out:
- return ret;
+ return ret;
}
+static int32_t
+socket_submit_request(rpc_transport_t *this, rpc_transport_req_t *req)
+{
+ return socket_submit_outgoing_msg(this, &req->msg);
+}
-int32_t
-socket_getpeeraddr (rpc_transport_t *this, char *peeraddr, int addrlen,
- struct sockaddr_storage *sa, socklen_t salen)
+static int32_t
+socket_submit_reply(rpc_transport_t *this, rpc_transport_reply_t *reply)
{
- int32_t ret = -1;
+ return socket_submit_outgoing_msg(this, &reply->msg);
+}
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", sa, out);
+static int32_t
+socket_getpeername(rpc_transport_t *this, char *hostname, int hostlen)
+{
+ int32_t ret = -1;
- *sa = this->peerinfo.sockaddr;
+ GF_VALIDATE_OR_GOTO("socket", this, out);
+ GF_VALIDATE_OR_GOTO("socket", hostname, out);
- if (peeraddr != NULL) {
- ret = socket_getpeername (this, peeraddr, addrlen);
- }
- ret = 0;
+ if (hostlen < (strlen(this->peerinfo.identifier) + 1)) {
+ goto out;
+ }
+ strcpy(hostname, this->peerinfo.identifier);
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
-int32_t
-socket_getmyname (rpc_transport_t *this, char *hostname, int hostlen)
+static int32_t
+socket_getpeeraddr(rpc_transport_t *this, char *peeraddr, int addrlen,
+ struct sockaddr_storage *sa, socklen_t salen)
{
- int32_t ret = -1;
+ int32_t ret = -1;
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", hostname, out);
+ GF_VALIDATE_OR_GOTO("socket", this, out);
+ GF_VALIDATE_OR_GOTO("socket", sa, out);
+ ret = 0;
- if (hostlen < (strlen (this->myinfo.identifier) + 1)) {
- goto out;
- }
+ *sa = this->peerinfo.sockaddr;
- strcpy (hostname, this->myinfo.identifier);
- ret = 0;
+ if (peeraddr != NULL) {
+ ret = socket_getpeername(this, peeraddr, addrlen);
+ }
out:
- return ret;
+ return ret;
}
+static int32_t
+socket_getmyname(rpc_transport_t *this, char *hostname, int hostlen)
+{
+ int32_t ret = -1;
-int32_t
-socket_getmyaddr (rpc_transport_t *this, char *myaddr, int addrlen,
- struct sockaddr_storage *sa, socklen_t salen)
+ GF_VALIDATE_OR_GOTO("socket", this, out);
+ GF_VALIDATE_OR_GOTO("socket", hostname, out);
+
+ if (hostlen < (strlen(this->myinfo.identifier) + 1)) {
+ goto out;
+ }
+
+ strcpy(hostname, this->myinfo.identifier);
+ ret = 0;
+out:
+ return ret;
+}
+
+static int32_t
+socket_getmyaddr(rpc_transport_t *this, char *myaddr, int addrlen,
+ struct sockaddr_storage *sa, socklen_t salen)
{
- int32_t ret = 0;
+ int32_t ret = 0;
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", sa, out);
+ GF_VALIDATE_OR_GOTO("socket", this, out);
+ GF_VALIDATE_OR_GOTO("socket", sa, out);
- *sa = this->myinfo.sockaddr;
+ *sa = this->myinfo.sockaddr;
- if (myaddr != NULL) {
- ret = socket_getmyname (this, myaddr, addrlen);
- }
+ if (myaddr != NULL) {
+ ret = socket_getmyname(this, myaddr, addrlen);
+ }
out:
- return ret;
+ return ret;
}
+static int
+socket_throttle(rpc_transport_t *this, gf_boolean_t onoff)
+{
+ socket_private_t *priv = NULL;
+
+ priv = this->private;
+
+ /* The way we implement throttling is by taking off
+ POLLIN event from the polled flags. This way we
+ never get called with the POLLIN event and therefore
+ will never read() any more data until throttling
+ is turned off.
+ */
+ pthread_mutex_lock(&priv->out_lock);
+ {
+ /* Throttling is useless on a disconnected transport. In fact,
+ * it's dangerous since priv->idx and priv->sock are set to -1
+ * on a disconnected transport, which breaks epoll's event to
+ * registered fd mapping. */
+
+ if (priv->connected == 1)
+ priv->idx = gf_event_select_on(this->ctx->event_pool, priv->sock,
+ priv->idx, (int)!onoff, -1);
+ }
+ pthread_mutex_unlock(&priv->out_lock);
+ return 0;
+}
struct rpc_transport_ops tops = {
- .listen = socket_listen,
- .connect = socket_connect,
- .disconnect = socket_disconnect,
- .submit_request = socket_submit_request,
- .submit_reply = socket_submit_reply,
- .get_peername = socket_getpeername,
- .get_peeraddr = socket_getpeeraddr,
- .get_myname = socket_getmyname,
- .get_myaddr = socket_getmyaddr,
+ .listen = socket_listen,
+ .connect = socket_connect,
+ .disconnect = socket_disconnect,
+ .submit_request = socket_submit_request,
+ .submit_reply = socket_submit_reply,
+ .get_peername = socket_getpeername,
+ .get_peeraddr = socket_getpeeraddr,
+ .get_myname = socket_getmyname,
+ .get_myaddr = socket_getmyaddr,
+ .throttle = socket_throttle,
};
int
-reconfigure (rpc_transport_t *this, dict_t *options)
+reconfigure(rpc_transport_t *this, dict_t *options)
{
- socket_private_t *priv = NULL;
- gf_boolean_t tmp_bool = _gf_false;
- char *optstr = NULL;
- int ret = 0;
- uint64_t windowsize = 0;
+ socket_private_t *priv = NULL;
+ gf_boolean_t tmp_bool = _gf_false;
+ char *optstr = NULL;
+ int ret = -1;
+ uint32_t backlog = 0;
+ uint64_t windowsize = 0;
+ data_t *data;
+
+ GF_VALIDATE_OR_GOTO("socket", this, out);
+ GF_VALIDATE_OR_GOTO("socket", this->private, out);
+
+ priv = this->private;
+
+ if (dict_get_str_sizen(options, "transport.socket.keepalive", &optstr) ==
+ 0) {
+ if (gf_string2boolean(optstr, &tmp_bool) != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "'transport.socket.keepalive' takes only "
+ "boolean options, not taking any action");
+ priv->keepalive = 1;
+ goto out;
+ }
+ gf_log(this->name, GF_LOG_DEBUG,
+ "Reconfigured transport.socket.keepalive");
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", this->private, out);
+ priv->keepalive = tmp_bool;
+ } else
+ priv->keepalive = 1;
- if (!this || !this->private) {
- ret =-1;
- goto out;
+ if (dict_get_int32_sizen(options, "transport.tcp-user-timeout",
+ &(priv->timeout)) != 0)
+ priv->timeout = GF_NETWORK_TIMEOUT;
+ gf_log(this->name, GF_LOG_DEBUG,
+ "Reconfigured transport.tcp-user-timeout=%d", priv->timeout);
+
+ if (dict_get_uint32(options, "transport.listen-backlog", &backlog) == 0) {
+ priv->backlog = backlog;
+ gf_log(this->name, GF_LOG_DEBUG,
+ "Reconfigured transport.listen-backlog=%d", priv->backlog);
+ }
+
+ if (priv->keepalive) {
+ if (dict_get_int32_sizen(options, "transport.socket.keepalive-time",
+ &(priv->keepaliveidle)) != 0)
+ priv->keepaliveidle = GF_KEEPALIVE_TIME;
+ gf_log(this->name, GF_LOG_DEBUG,
+ "Reconfigured transport.socket.keepalive-time=%d",
+ priv->keepaliveidle);
+
+ if (dict_get_int32_sizen(options, "transport.socket.keepalive-interval",
+ &(priv->keepaliveintvl)) != 0)
+ priv->keepaliveintvl = GF_KEEPALIVE_INTERVAL;
+ gf_log(this->name, GF_LOG_DEBUG,
+ "Reconfigured transport.socket.keepalive-interval=%d",
+ priv->keepaliveintvl);
+
+ if (dict_get_int32_sizen(options, "transport.socket.keepalive-count",
+ &(priv->keepalivecnt)) != 0)
+ priv->keepalivecnt = GF_KEEPALIVE_COUNT;
+ gf_log(this->name, GF_LOG_DEBUG,
+ "Reconfigured transport.socket.keepalive-count=%d",
+ priv->keepalivecnt);
+ }
+
+ optstr = NULL;
+ if (dict_get_str_sizen(options, "tcp-window-size", &optstr) == 0) {
+ if (gf_string2uint64(optstr, &windowsize) != 0) {
+ gf_log(this->name, GF_LOG_ERROR, "invalid number format: %s",
+ optstr);
+ goto out;
}
+ }
- priv = this->private;
+ priv->windowsize = (int)windowsize;
- if (dict_get_str (this->options, "transport.socket.keepalive",
- &optstr) == 0) {
- if (gf_string2boolean (optstr, &tmp_bool) == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "'transport.socket.keepalive' takes only "
- "boolean options, not taking any action");
- priv->keepalive = 1;
- ret = -1;
- goto out;
- }
- gf_log (this->name, GF_LOG_DEBUG, "Reconfigured transport.socket.keepalive");
+ data = dict_get_sizen(options, "non-blocking-io");
+ if (data) {
+ optstr = data_to_str(data);
- priv->keepalive = tmp_bool;
- }
- else
- priv->keepalive = 1;
-
- optstr = NULL;
- if (dict_get_str (this->options, "tcp-window-size",
- &optstr) == 0) {
- if (gf_string2bytesize (optstr, &windowsize) != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "invalid number format: %s", optstr);
- goto out;
- }
+ if (gf_string2boolean(optstr, &tmp_bool) != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "'non-blocking-io' takes only boolean options,"
+ " not taking any action");
+ tmp_bool = 1;
}
- priv->windowsize = (int)windowsize;
+ if (!tmp_bool) {
+ priv->bio = 1;
+ gf_log(this->name, GF_LOG_WARNING, "disabling non-blocking IO");
+ }
+ }
+
+ if (!priv->bio) {
+ ret = __socket_nonblock(priv->sock);
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_WARNING, "NBIO on %d failed (%s)",
+ priv->sock, strerror(errno));
+ goto out;
+ }
+ }
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
+}
+#if OPENSSL_VERSION_NUMBER < 0x1010000f
+static pthread_mutex_t *lock_array = NULL;
+
+static void
+locking_func(int mode, int type, const char *file, int line)
+{
+ if (mode & CRYPTO_UNLOCK) {
+ pthread_mutex_unlock(&lock_array[type]);
+ } else {
+ pthread_mutex_lock(&lock_array[type]);
+ }
}
-int
-socket_init (rpc_transport_t *this)
+#if OPENSSL_VERSION_NUMBER >= 0x1000000f
+static void
+threadid_func(CRYPTO_THREADID *id)
+{
+ /*
+ * We're not supposed to know whether a pthread_t is a number or a
+ * pointer, but we definitely need an unsigned long. Even though it
+ * happens to be an unsigned long already on Linux, do the cast just in
+ * case that's not so on another platform. Note that this can still
+ * break if any platforms are left where a pointer is larger than an
+ * unsigned long. In that case there's not much we can do; hopefully
+ * anyone porting to such a platform will be aware enough to notice the
+ * compile warnings about truncating the pointer value.
+ */
+ CRYPTO_THREADID_set_numeric(id, (unsigned long)pthread_self());
+}
+#else /* older openssl */
+static unsigned long
+legacy_threadid_func(void)
{
- socket_private_t *priv = NULL;
- gf_boolean_t tmp_bool = 0;
- uint64_t windowsize = GF_DEFAULT_SOCKET_WINDOW_SIZE;
- char *optstr = NULL;
- uint32_t keepalive = 0;
- uint32_t backlog = 0;
+ /* See comments above, it applies here too. */
+ return (unsigned long)pthread_self();
+}
+#endif /* OPENSSL_VERSION_NUMBER >= 0x1000000f */
+#endif /* OPENSSL_VERSION_NUMBER < 0x1010000f */
+
+static void
+init_openssl_mt(void)
+{
+ static gf_boolean_t initialized = _gf_false;
+
+ if (initialized) {
+ /* this only needs to be initialized once GLOBALLY no
+ matter how many translators/sockets we end up with. */
+ return;
+ }
+
+ SSL_library_init();
+ SSL_load_error_strings();
+
+ initialized = _gf_true;
- if (this->private) {
- gf_log_callingfn (this->name, GF_LOG_ERROR,
- "double init attempted");
- return -1;
+#if OPENSSL_VERSION_NUMBER < 0x1010000f
+ int num_locks = CRYPTO_num_locks();
+ int i;
+
+ lock_array = GF_CALLOC(num_locks, sizeof(pthread_mutex_t),
+ gf_sock_mt_lock_array);
+ if (lock_array) {
+ for (i = 0; i < num_locks; ++i) {
+ pthread_mutex_init(&lock_array[i], NULL);
}
+#if OPENSSL_VERSION_NUMBER >= 0x1000000f
+ CRYPTO_THREADID_set_callback(threadid_func);
+#else /* older openssl */
+ CRYPTO_set_id_callback(legacy_threadid_func);
+#endif
+ CRYPTO_set_locking_callback(locking_func);
+ }
+#endif
+}
+
+static void __attribute__((destructor)) fini_openssl_mt(void)
+{
+#if OPENSSL_VERSION_NUMBER < 0x1010000f
+ int i;
+
+ if (!lock_array) {
+ return;
+ }
+
+ CRYPTO_set_locking_callback(NULL);
+#if OPENSSL_VERSION_NUMBER >= 0x1000000f
+ CRYPTO_THREADID_set_callback(NULL);
+#else /* older openssl */
+ CRYPTO_set_id_callback(NULL);
+#endif
+
+ for (i = 0; i < CRYPTO_num_locks(); ++i) {
+ pthread_mutex_destroy(&lock_array[i]);
+ }
+
+ GF_FREE(lock_array);
+ lock_array = NULL;
+#endif
+
+ ERR_free_strings();
+}
- priv = GF_CALLOC (1, sizeof (*priv), gf_common_mt_socket_private_t);
- if (!priv) {
- return -1;
+/* The function returns 0 if AES bit is enabled on the CPU */
+static int
+ssl_check_aes_bit(void)
+{
+ FILE *fp = fopen("/proc/cpuinfo", "r");
+ int ret = 1;
+ size_t len = 0;
+ char *line = NULL;
+ char *match = NULL;
+
+ GF_ASSERT(fp != NULL);
+
+ while (getline(&line, &len, fp) > 0) {
+ if (!strncmp(line, "flags", 5)) {
+ match = strstr(line, " aes");
+ if ((match != NULL) && ((match[4] == ' ') || (match[4] == 0))) {
+ ret = 0;
+ break;
+ }
}
+ }
- pthread_mutex_init (&priv->lock, NULL);
+ free(line);
+ fclose(fp);
- priv->sock = -1;
- priv->idx = -1;
- priv->connected = -1;
- priv->nodelay = 1;
- priv->bio = 0;
- priv->windowsize = GF_DEFAULT_SOCKET_WINDOW_SIZE;
- INIT_LIST_HEAD (&priv->ioq);
+ return ret;
+}
- /* All the below section needs 'this->options' to be present */
- if (!this->options)
- goto out;
+static int
+ssl_setup_connection_params(rpc_transport_t *this)
+{
+ socket_private_t *priv = NULL;
+ char *optstr = NULL;
+ static int session_id = 1;
+ int32_t cert_depth = DEFAULT_VERIFY_DEPTH;
+ char *cipher_list = DEFAULT_CIPHER_LIST;
+ char *dh_param = DEFAULT_DH_PARAM;
+ char *ec_curve = DEFAULT_EC_CURVE;
+ gf_boolean_t dh_flag = _gf_false;
+
+ priv = this->private;
+
+ if (priv->ssl_ctx != NULL) {
+ gf_log(this->name, GF_LOG_TRACE, "found old SSL context!");
+ return 0;
+ }
- if (dict_get (this->options, "non-blocking-io")) {
- optstr = data_to_str (dict_get (this->options,
- "non-blocking-io"));
+ if (!priv->ssl_enabled && !priv->mgmt_ssl) {
+ return 0;
+ }
+
+ if (!ssl_check_aes_bit()) {
+ cipher_list = "AES128:" DEFAULT_CIPHER_LIST;
+ }
+
+ priv->ssl_own_cert = DEFAULT_CERT_PATH;
+ if (dict_get_str_sizen(this->options, SSL_OWN_CERT_OPT, &optstr) == 0) {
+ if (!priv->ssl_enabled) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "%s specified without %s (ignored)", SSL_OWN_CERT_OPT,
+ SSL_ENABLED_OPT);
+ }
+ priv->ssl_own_cert = optstr;
+ }
+ priv->ssl_own_cert = gf_strdup(priv->ssl_own_cert);
+
+ priv->ssl_private_key = DEFAULT_KEY_PATH;
+ if (dict_get_str_sizen(this->options, SSL_PRIVATE_KEY_OPT, &optstr) == 0) {
+ if (!priv->ssl_enabled) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "%s specified without %s (ignored)", SSL_PRIVATE_KEY_OPT,
+ SSL_ENABLED_OPT);
+ }
+ priv->ssl_private_key = optstr;
+ }
+ priv->ssl_private_key = gf_strdup(priv->ssl_private_key);
+
+ priv->ssl_ca_list = DEFAULT_CA_PATH;
+ if (dict_get_str_sizen(this->options, SSL_CA_LIST_OPT, &optstr) == 0) {
+ if (!priv->ssl_enabled) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "%s specified without %s (ignored)", SSL_CA_LIST_OPT,
+ SSL_ENABLED_OPT);
+ }
+ priv->ssl_ca_list = optstr;
+ }
+ priv->ssl_ca_list = gf_strdup(priv->ssl_ca_list);
+
+ optstr = NULL;
+ if (dict_get_str_sizen(this->options, SSL_CRL_PATH_OPT, &optstr) == 0) {
+ if (!priv->ssl_enabled) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "%s specified without %s (ignored)", SSL_CRL_PATH_OPT,
+ SSL_ENABLED_OPT);
+ }
+ if (strcasecmp(optstr, "NULL") == 0)
+ priv->crl_path = NULL;
+ else
+ priv->crl_path = gf_strdup(optstr);
+ }
- if (gf_string2boolean (optstr, &tmp_bool) == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "'non-blocking-io' takes only boolean options,"
- " not taking any action");
- tmp_bool = 1;
- }
+ if (!priv->mgmt_ssl) {
+ if (!dict_get_int32_sizen(this->options, SSL_CERT_DEPTH_OPT,
+ &cert_depth)) {
+ }
+ } else {
+ cert_depth = this->ctx->ssl_cert_depth;
+ }
+ gf_log(this->name, priv->ssl_enabled ? GF_LOG_INFO : GF_LOG_DEBUG,
+ "SSL support for MGMT is %s IO path is %s certificate depth is %d "
+ "for peer %s",
+ (priv->mgmt_ssl ? "ENABLED" : "NOT enabled"),
+ (priv->ssl_enabled ? "ENABLED" : "NOT enabled"), cert_depth,
+ this->peerinfo.identifier);
+
+ if (!dict_get_str_sizen(this->options, SSL_CIPHER_LIST_OPT, &cipher_list)) {
+ gf_log(this->name, GF_LOG_INFO, "using cipher list %s", cipher_list);
+ }
+ if (!dict_get_str_sizen(this->options, SSL_DH_PARAM_OPT, &dh_param)) {
+ dh_flag = _gf_true;
+ gf_log(this->name, GF_LOG_INFO, "using DH parameters %s", dh_param);
+ }
+ if (!dict_get_str_sizen(this->options, SSL_EC_CURVE_OPT, &ec_curve)) {
+ gf_log(this->name, GF_LOG_INFO, "using EC curve %s", ec_curve);
+ }
+
+ if (priv->ssl_enabled || priv->mgmt_ssl) {
+ BIO *bio = NULL;
+
+#if HAVE_TLS_METHOD
+ priv->ssl_meth = (SSL_METHOD *)TLS_method();
+#elif HAVE_TLSV1_2_METHOD
+ priv->ssl_meth = (SSL_METHOD *)TLSv1_2_method();
+#else
+/*
+ * Nobody should use an OpenSSL so old it does not support TLS 1.2.
+ * If that is really required, build with -DUSE_INSECURE_OPENSSL
+ */
+#ifndef USE_INSECURE_OPENSSL
+#error Old and insecure OpenSSL, use -DUSE_INSECURE_OPENSSL to use it anyway
+#endif
+ /* SSLv23_method uses highest available protocol */
+ priv->ssl_meth = SSLv23_method();
+#endif
+ priv->ssl_ctx = SSL_CTX_new(priv->ssl_meth);
- if (!tmp_bool) {
- priv->bio = 1;
- gf_log (this->name, GF_LOG_WARNING,
- "disabling non-blocking IO");
- }
+ SSL_CTX_set_options(priv->ssl_ctx, SSL_OP_NO_SSLv2);
+ SSL_CTX_set_options(priv->ssl_ctx, SSL_OP_NO_SSLv3);
+#ifdef SSL_OP_NO_TICKET
+ SSL_CTX_set_options(priv->ssl_ctx, SSL_OP_NO_TICKET);
+#endif
+#ifdef SSL_OP_NO_COMPRESSION
+ SSL_CTX_set_options(priv->ssl_ctx, SSL_OP_NO_COMPRESSION);
+#endif
+ /* Upload file to bio wrapper only if dh param is configured
+ */
+ if (dh_flag) {
+ if ((bio = BIO_new_file(dh_param, "r")) == NULL) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to open %s, "
+ "DH ciphers are disabled",
+ dh_param);
+ }
}
- optstr = NULL;
+ if (bio != NULL) {
+#ifdef HAVE_OPENSSL_DH_H
+ DH *dh;
+ unsigned long err;
+
+ dh = PEM_read_bio_DHparams(bio, NULL, NULL, NULL);
+ BIO_free(bio);
+ if (dh != NULL) {
+ SSL_CTX_set_options(priv->ssl_ctx, SSL_OP_SINGLE_DH_USE);
+ SSL_CTX_set_tmp_dh(priv->ssl_ctx, dh);
+ DH_free(dh);
+ } else {
+ err = ERR_get_error();
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to read DH param from %s: %s "
+ "DH ciphers are disabled.",
+ dh_param, ERR_error_string(err, NULL));
+ }
+#else /* HAVE_OPENSSL_DH_H */
+ BIO_free(bio);
+ gf_log(this->name, GF_LOG_ERROR, "OpenSSL has no DH support");
+#endif /* HAVE_OPENSSL_DH_H */
+ }
- // By default, we enable NODELAY
- if (dict_get (this->options, "transport.socket.nodelay")) {
- optstr = data_to_str (dict_get (this->options,
- "transport.socket.nodelay"));
+ if (ec_curve != NULL) {
+#ifdef HAVE_OPENSSL_ECDH_H
+ EC_KEY *ecdh = NULL;
+ int nid;
+ unsigned long err;
+
+ nid = OBJ_sn2nid(ec_curve);
+ if (nid != 0)
+ ecdh = EC_KEY_new_by_curve_name(nid);
+
+ if (ecdh != NULL) {
+ SSL_CTX_set_options(priv->ssl_ctx, SSL_OP_SINGLE_ECDH_USE);
+ SSL_CTX_set_tmp_ecdh(priv->ssl_ctx, ecdh);
+ EC_KEY_free(ecdh);
+ } else {
+ err = ERR_get_error();
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to load EC curve %s: %s. "
+ "ECDH ciphers are disabled.",
+ ec_curve, ERR_error_string(err, NULL));
+ }
+#else /* HAVE_OPENSSL_ECDH_H */
+ gf_log(this->name, GF_LOG_ERROR, "OpenSSL has no ECDH support");
+#endif /* HAVE_OPENSSL_ECDH_H */
+ }
- if (gf_string2boolean (optstr, &tmp_bool) == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "'transport.socket.nodelay' takes only "
- "boolean options, not taking any action");
- tmp_bool = 1;
- }
- if (!tmp_bool) {
- priv->nodelay = 0;
- gf_log (this->name, GF_LOG_DEBUG,
- "disabling nodelay");
- }
+ /* This must be done after DH and ECDH setups */
+ if (SSL_CTX_set_cipher_list(priv->ssl_ctx, cipher_list) == 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to find any valid ciphers");
+ goto err;
}
- optstr = NULL;
- if (dict_get_str (this->options, "tcp-window-size",
- &optstr) == 0) {
- if (gf_string2bytesize (optstr, &windowsize) != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "invalid number format: %s", optstr);
- return -1;
- }
+ SSL_CTX_set_options(priv->ssl_ctx, SSL_OP_CIPHER_SERVER_PREFERENCE);
+
+ if (!SSL_CTX_use_certificate_chain_file(priv->ssl_ctx,
+ priv->ssl_own_cert)) {
+ gf_log(this->name, GF_LOG_ERROR, "could not load our cert at %s",
+ priv->ssl_own_cert);
+ ssl_dump_error_stack(this->name);
+ goto err;
}
- priv->windowsize = (int)windowsize;
+ if (!SSL_CTX_use_PrivateKey_file(priv->ssl_ctx, priv->ssl_private_key,
+ SSL_FILETYPE_PEM)) {
+ gf_log(this->name, GF_LOG_ERROR, "could not load private key at %s",
+ priv->ssl_private_key);
+ ssl_dump_error_stack(this->name);
+ goto err;
+ }
- optstr = NULL;
- /* Enable Keep-alive by default. */
- priv->keepalive = 1;
- priv->keepaliveintvl = 2;
- priv->keepaliveidle = 20;
- if (dict_get_str (this->options, "transport.socket.keepalive",
- &optstr) == 0) {
- if (gf_string2boolean (optstr, &tmp_bool) == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "'transport.socket.keepalive' takes only "
- "boolean options, not taking any action");
- tmp_bool = 1;
- }
+ if (!SSL_CTX_load_verify_locations(priv->ssl_ctx, priv->ssl_ca_list,
+ priv->crl_path)) {
+ gf_log(this->name, GF_LOG_ERROR, "could not load CA list");
+ goto err;
+ }
+
+ SSL_CTX_set_verify_depth(priv->ssl_ctx, cert_depth);
+
+ if (priv->crl_path)
+ ssl_set_crl_verify_flags(priv->ssl_ctx);
+
+ priv->ssl_session_id = session_id++;
+ SSL_CTX_set_session_id_context(priv->ssl_ctx,
+ (void *)&priv->ssl_session_id,
+ sizeof(priv->ssl_session_id));
+
+ SSL_CTX_set_verify(priv->ssl_ctx, SSL_VERIFY_PEER, 0);
+
+ /*
+ * Since glusterfs shares the same settings for client-side
+ * and server-side of SSL, we need to ignore any certificate
+ * usage specification (SSL client vs SSL server), otherwise
+ * SSL connexions will fail with 'unsupported cerritifcate"
+ */
+ SSL_CTX_set_purpose(priv->ssl_ctx, X509_PURPOSE_ANY);
+ }
+ return 0;
+
+err:
+ return -1;
+}
- if (!tmp_bool)
- priv->keepalive = 0;
+static int
+socket_init(rpc_transport_t *this)
+{
+ socket_private_t *priv = NULL;
+ gf_boolean_t tmp_bool = 0;
+ uint64_t windowsize = GF_DEFAULT_SOCKET_WINDOW_SIZE;
+ char *optstr = NULL;
+ data_t *data;
+
+ if (this->private) {
+ gf_log_callingfn(this->name, GF_LOG_ERROR, "double init attempted");
+ return -1;
+ }
+
+ priv = GF_CALLOC(1, sizeof(*priv), gf_common_mt_socket_private_t);
+ if (!priv) {
+ return -1;
+ }
+
+ this->private = priv;
+ pthread_mutex_init(&priv->out_lock, NULL);
+ pthread_mutex_init(&priv->cond_lock, NULL);
+ pthread_cond_init(&priv->cond, NULL);
+
+ /*GF_REF_INIT (priv, socket_poller_mayday);*/
+
+ priv->sock = -1;
+ priv->idx = -1;
+ priv->connected = -1;
+ priv->nodelay = 1;
+ priv->bio = 0;
+ priv->ssl_accepted = _gf_false;
+ priv->ssl_connected = _gf_false;
+ priv->windowsize = GF_DEFAULT_SOCKET_WINDOW_SIZE;
+ INIT_LIST_HEAD(&priv->ioq);
+ pthread_mutex_init(&priv->notify.lock, NULL);
+ pthread_cond_init(&priv->notify.cond, NULL);
+
+ /* All the below section needs 'this->options' to be present */
+ if (!this->options)
+ goto out;
+
+ data = dict_get_sizen(this->options, "non-blocking-io");
+ if (data) {
+ optstr = data_to_str(data);
+
+ if (gf_string2boolean(optstr, &tmp_bool) != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "'non-blocking-io' takes only boolean options,"
+ " not taking any action");
+ tmp_bool = 1;
}
- if (dict_get_uint32 (this->options,
- "transport.socket.keepalive-interval",
- &keepalive) == 0) {
- priv->keepaliveintvl = keepalive;
+ if (!tmp_bool) {
+ priv->bio = 1;
+ gf_log(this->name, GF_LOG_WARNING, "disabling non-blocking IO");
}
+ }
- if (dict_get_uint32 (this->options,
- "transport.socket.keepalive-time",
- &keepalive) == 0) {
- priv->keepaliveidle = keepalive;
+ optstr = NULL;
+
+ /* By default, we enable NODELAY */
+ data = dict_get_sizen(this->options, "transport.socket.nodelay");
+ if (data) {
+ optstr = data_to_str(data);
+
+ if (gf_string2boolean(optstr, &tmp_bool) != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "'transport.socket.nodelay' takes only "
+ "boolean options, not taking any action");
+ tmp_bool = 1;
+ }
+ if (!tmp_bool) {
+ priv->nodelay = 0;
+ gf_log(this->name, GF_LOG_DEBUG, "disabling nodelay");
+ }
+ }
+
+ optstr = NULL;
+ if (dict_get_str_sizen(this->options, "tcp-window-size", &optstr) == 0) {
+ if (gf_string2uint64(optstr, &windowsize) != 0) {
+ gf_log(this->name, GF_LOG_ERROR, "invalid number format: %s",
+ optstr);
+ return -1;
+ }
+ }
+
+ priv->windowsize = (int)windowsize;
+
+ optstr = NULL;
+ /* Enable Keep-alive by default. */
+ priv->keepalive = 1;
+ priv->keepaliveintvl = GF_KEEPALIVE_INTERVAL;
+ priv->keepaliveidle = GF_KEEPALIVE_TIME;
+ priv->keepalivecnt = GF_KEEPALIVE_COUNT;
+ if (dict_get_str_sizen(this->options, "transport.socket.keepalive",
+ &optstr) == 0) {
+ if (gf_string2boolean(optstr, &tmp_bool) != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "'transport.socket.keepalive' takes only "
+ "boolean options, not taking any action");
+ tmp_bool = 1;
}
- if (dict_get_uint32 (this->options,
- "transport.socket.listen-backlog",
- &backlog) == 0) {
- priv->backlog = backlog;
+ if (!tmp_bool)
+ priv->keepalive = 0;
+ }
+
+ if (dict_get_int32_sizen(this->options, "transport.tcp-user-timeout",
+ &(priv->timeout)) != 0)
+ priv->timeout = GF_NETWORK_TIMEOUT;
+ gf_log(this->name, GF_LOG_DEBUG, "Configured transport.tcp-user-timeout=%d",
+ priv->timeout);
+
+ if (priv->keepalive) {
+ if (dict_get_int32_sizen(this->options,
+ "transport.socket.keepalive-time",
+ &(priv->keepaliveidle)) != 0) {
+ priv->keepaliveidle = GF_KEEPALIVE_TIME;
}
- optstr = NULL;
+ if (dict_get_int32_sizen(this->options,
+ "transport.socket.keepalive-interval",
+ &(priv->keepaliveintvl)) != 0) {
+ priv->keepaliveintvl = GF_KEEPALIVE_INTERVAL;
+ }
- /* Check if socket read failures are to be logged */
- priv->read_fail_log = 1;
- if (dict_get (this->options, "transport.socket.read-fail-log")) {
- optstr = data_to_str (dict_get (this->options, "transport.socket.read-fail-log"));
- if (gf_string2boolean (optstr, &tmp_bool) == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "'transport.socket.read-fail-log' takes only "
- "boolean options; logging socket read fails");
- }
- else if (tmp_bool == _gf_false) {
- priv->read_fail_log = 0;
- }
+ if (dict_get_int32_sizen(this->options,
+ "transport.socket.keepalive-count",
+ &(priv->keepalivecnt)) != 0)
+ priv->keepalivecnt = GF_KEEPALIVE_COUNT;
+ gf_log(this->name, GF_LOG_DEBUG,
+ "Reconfigured transport.keepalivecnt=%d", priv->keepalivecnt);
+ }
+
+ if (dict_get_uint32(this->options, "transport.listen-backlog",
+ &(priv->backlog)) != 0) {
+ priv->backlog = GLUSTERFS_SOCKET_LISTEN_BACKLOG;
+ }
+
+ optstr = NULL;
+
+ /* Check if socket read failures are to be logged */
+ priv->read_fail_log = 1;
+ data = dict_get_sizen(this->options, "transport.socket.read-fail-log");
+ if (data) {
+ optstr = data_to_str(data);
+ if (gf_string2boolean(optstr, &tmp_bool) != 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "'transport.socket.read-fail-log' takes only "
+ "boolean options; logging socket read fails");
+ } else if (tmp_bool == _gf_false) {
+ priv->read_fail_log = 0;
}
+ }
- optstr = NULL;
-out:
- this->private = priv;
+ priv->windowsize = (int)windowsize;
- return 0;
-}
+ priv->ssl_enabled = _gf_false;
+ if (dict_get_str_sizen(this->options, SSL_ENABLED_OPT, &optstr) == 0) {
+ if (gf_string2boolean(optstr, &priv->ssl_enabled) != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "invalid value given for ssl-enabled boolean");
+ }
+ }
+ priv->mgmt_ssl = this->ctx->secure_mgmt;
+ priv->srvr_ssl = this->ctx->secure_srvr;
+ ssl_setup_connection_params(this);
+out:
+ this->private = priv;
+ return 0;
+}
void
-fini (rpc_transport_t *this)
+fini(rpc_transport_t *this)
{
- socket_private_t *priv = NULL;
+ socket_private_t *priv = NULL;
- if (!this)
- return;
+ if (!this)
+ return;
- priv = this->private;
- if (priv) {
- if (priv->sock != -1) {
- pthread_mutex_lock (&priv->lock);
- {
- __socket_ioq_flush (this);
- __socket_reset (this);
- }
- pthread_mutex_unlock (&priv->lock);
- }
- gf_log (this->name, GF_LOG_TRACE,
- "transport %p destroyed", this);
+ priv = this->private;
+ if (priv) {
+ if (priv->sock >= 0) {
+ pthread_mutex_lock(&priv->out_lock);
+ {
+ __socket_ioq_flush(priv);
+ __socket_reset(this);
+ }
+ pthread_mutex_unlock(&priv->out_lock);
+ }
+ gf_log(this->name, GF_LOG_TRACE, "transport %p destroyed", this);
+
+ pthread_mutex_destroy(&priv->out_lock);
+ pthread_mutex_destroy(&priv->cond_lock);
+ pthread_cond_destroy(&priv->cond);
- pthread_mutex_destroy (&priv->lock);
- GF_FREE (priv);
+ GF_ASSERT(priv->notify.in_progress == 0);
+ pthread_mutex_destroy(&priv->notify.lock);
+ pthread_cond_destroy(&priv->notify.cond);
+
+ if (priv->use_ssl && priv->ssl_ssl) {
+ SSL_clear(priv->ssl_ssl);
+ SSL_free(priv->ssl_ssl);
+ priv->ssl_ssl = NULL;
+ }
+ if (priv->ssl_ctx) {
+ SSL_CTX_free(priv->ssl_ctx);
+ priv->ssl_ctx = NULL;
}
- this->private = NULL;
-}
+ if (priv->ssl_private_key) {
+ GF_FREE(priv->ssl_private_key);
+ }
+ if (priv->ssl_own_cert) {
+ GF_FREE(priv->ssl_own_cert);
+ }
+ if (priv->ssl_ca_list) {
+ GF_FREE(priv->ssl_ca_list);
+ }
+ GF_FREE(priv);
+ }
+ this->private = NULL;
+}
int32_t
-init (rpc_transport_t *this)
+init(rpc_transport_t *this)
{
- int ret = -1;
+ int ret = -1;
- ret = socket_init (this);
+ init_openssl_mt();
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "socket_init() failed");
- }
+ ret = socket_init(this);
- return ret;
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_DEBUG, "socket_init() failed");
+ }
+
+ return ret;
}
struct volume_options options[] = {
- { .key = {"remote-port",
- "transport.remote-port",
- "transport.socket.remote-port"},
- .type = GF_OPTION_TYPE_INT
- },
- { .key = {"transport.socket.listen-port", "listen-port"},
- .type = GF_OPTION_TYPE_INT
- },
- { .key = {"transport.socket.bind-address", "bind-address" },
- .type = GF_OPTION_TYPE_INTERNET_ADDRESS
- },
- { .key = {"transport.socket.connect-path", "connect-path"},
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = {"transport.socket.bind-path", "bind-path"},
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = {"transport.socket.listen-path", "listen-path"},
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = { "transport.address-family",
- "address-family" },
- .value = {"inet", "inet6", "inet/inet6", "inet6/inet",
- "unix", "inet-sdp" },
- .type = GF_OPTION_TYPE_STR
- },
-
- { .key = {"non-blocking-io"},
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {"tcp-window-size"},
- .type = GF_OPTION_TYPE_SIZET,
- .min = GF_MIN_SOCKET_WINDOW_SIZE,
- .max = GF_MAX_SOCKET_WINDOW_SIZE
- },
- { .key = {"transport.socket.nodelay"},
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {"transport.socket.lowlat"},
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {"transport.socket.keepalive"},
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {"transport.socket.keepalive-interval"},
- .type = GF_OPTION_TYPE_INT
- },
- { .key = {"transport.socket.keepalive-time"},
- .type = GF_OPTION_TYPE_INT
- },
- { .key = {"transport.socket.listen-backlog"},
- .type = GF_OPTION_TYPE_INT
- },
- { .key = {"transport.socket.read-fail-log"},
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {NULL} }
-};
+ {.key = {"remote-port", "transport.remote-port",
+ "transport.socket.remote-port"},
+ .type = GF_OPTION_TYPE_INT},
+ {.key = {"transport.socket.listen-port", "listen-port"},
+ .type = GF_OPTION_TYPE_INT},
+ {.key = {"transport.socket.bind-address", "bind-address"},
+ .type = GF_OPTION_TYPE_INTERNET_ADDRESS},
+ {.key = {"transport.socket.connect-path", "connect-path"},
+ .type = GF_OPTION_TYPE_ANY},
+ {.key = {"transport.socket.bind-path", "bind-path"},
+ .type = GF_OPTION_TYPE_ANY},
+ {.key = {"transport.socket.listen-path", "listen-path"},
+ .type = GF_OPTION_TYPE_ANY},
+ {.key = {"transport.address-family", "address-family"},
+ .value = {"inet", "inet6", "unix", "inet-sdp"},
+ .op_version = {GD_OP_VERSION_3_7_4},
+ .type = GF_OPTION_TYPE_STR},
+ {.key = {"non-blocking-io"}, .type = GF_OPTION_TYPE_BOOL},
+ {.key = {"tcp-window-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .op_version = {1},
+ .flags = OPT_FLAG_SETTABLE,
+ .description = "Option to set TCP SEND/RECV BUFFER SIZE",
+ .min = GF_MIN_SOCKET_WINDOW_SIZE,
+ .max = GF_MAX_SOCKET_WINDOW_SIZE},
+ {
+ .key = {"transport.listen-backlog"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .op_version = {GD_OP_VERSION_3_11_1},
+ .flags = OPT_FLAG_SETTABLE,
+ .description = "This option uses the value of backlog argument that "
+ "defines the maximum length to which the queue of "
+ "pending connections for socket fd may grow.",
+ .default_value = "1024",
+ },
+ {.key = {"transport.tcp-user-timeout"},
+ .type = GF_OPTION_TYPE_INT,
+ .op_version = {GD_OP_VERSION_3_10_2},
+ .default_value = TOSTRING(GF_NETWORK_TIMEOUT)},
+ {.key = {"transport.socket.nodelay"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "1"},
+ {.key = {"transport.socket.keepalive"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .op_version = {1},
+ .default_value = "1"},
+ {.key = {"transport.socket.keepalive-interval"},
+ .type = GF_OPTION_TYPE_INT,
+ .op_version = {GD_OP_VERSION_3_10_2},
+ .default_value = "2"},
+ {.key = {"transport.socket.keepalive-time"},
+ .type = GF_OPTION_TYPE_INT,
+ .op_version = {GD_OP_VERSION_3_10_2},
+ .default_value = "20"},
+ {.key = {"transport.socket.keepalive-count"},
+ .type = GF_OPTION_TYPE_INT,
+ .op_version = {GD_OP_VERSION_3_10_2},
+ .default_value = "9"},
+ {.key = {"transport.socket.read-fail-log"}, .type = GF_OPTION_TYPE_BOOL},
+ {.key = {SSL_ENABLED_OPT}, .type = GF_OPTION_TYPE_BOOL},
+ {.key = {SSL_OWN_CERT_OPT}, .type = GF_OPTION_TYPE_STR},
+ {.key = {SSL_PRIVATE_KEY_OPT}, .type = GF_OPTION_TYPE_STR},
+ {.key = {SSL_CA_LIST_OPT}, .type = GF_OPTION_TYPE_STR},
+ {.key = {SSL_CERT_DEPTH_OPT}, .type = GF_OPTION_TYPE_STR},
+ {.key = {SSL_CIPHER_LIST_OPT}, .type = GF_OPTION_TYPE_STR},
+ {.key = {SSL_DH_PARAM_OPT}, .type = GF_OPTION_TYPE_STR},
+ {.key = {SSL_EC_CURVE_OPT}, .type = GF_OPTION_TYPE_STR},
+ {.key = {SSL_CRL_PATH_OPT}, .type = GF_OPTION_TYPE_STR},
+ {.key = {OWN_THREAD_OPT}, .type = GF_OPTION_TYPE_BOOL},
+ {.key = {"ssl-own-cert"},
+ .op_version = {GD_OP_VERSION_3_7_4},
+ .flags = OPT_FLAG_SETTABLE,
+ .type = GF_OPTION_TYPE_STR,
+ .description = "SSL certificate. Ignored if SSL is not enabled."},
+ {.key = {"ssl-private-key"},
+ .op_version = {GD_OP_VERSION_3_7_4},
+ .flags = OPT_FLAG_SETTABLE,
+ .type = GF_OPTION_TYPE_STR,
+ .description = "SSL private key. Ignored if SSL is not enabled."},
+ {.key = {"ssl-ca-list"},
+ .op_version = {GD_OP_VERSION_3_7_4},
+ .flags = OPT_FLAG_SETTABLE,
+ .type = GF_OPTION_TYPE_STR,
+ .description = "SSL CA list. Ignored if SSL is not enabled."},
+ {.key = {"ssl-cert-depth"},
+ .type = GF_OPTION_TYPE_INT,
+ .op_version = {GD_OP_VERSION_3_6_0},
+ .flags = OPT_FLAG_SETTABLE,
+ .description = "Maximum certificate-chain depth. If zero, the "
+ "peer's certificate itself must be in the local "
+ "certificate list. Otherwise, there may be up to N "
+ "signing certificates between the peer's and the "
+ "local list. Ignored if SSL is not enabled."},
+ {.key = {"ssl-cipher-list"},
+ .type = GF_OPTION_TYPE_STR,
+ .op_version = {GD_OP_VERSION_3_6_0},
+ .flags = OPT_FLAG_SETTABLE,
+ .description = "Allowed SSL ciphers. Ignored if SSL is not enabled."},
+ {.key = {"ssl-dh-param"},
+ .type = GF_OPTION_TYPE_STR,
+ .op_version = {GD_OP_VERSION_3_7_4},
+ .flags = OPT_FLAG_SETTABLE,
+ .description = "DH parameters file. Ignored if SSL is not enabled."},
+ {.key = {"ssl-ec-curve"},
+ .type = GF_OPTION_TYPE_STR,
+ .op_version = {GD_OP_VERSION_3_7_4},
+ .flags = OPT_FLAG_SETTABLE,
+ .description = "ECDH curve name. Ignored if SSL is not enabled."},
+ {.key = {"ssl-crl-path"},
+ .type = GF_OPTION_TYPE_STR,
+ .op_version = {GD_OP_VERSION_3_7_4},
+ .flags = OPT_FLAG_SETTABLE,
+ .description = "Path to directory containing CRL. "
+ "Ignored if SSL is not enabled."},
+ {.key = {NULL}}};
diff --git a/rpc/rpc-transport/socket/src/socket.h b/rpc/rpc-transport/socket/src/socket.h
index 0c897bd2ec0..8a2eda70605 100644
--- a/rpc/rpc-transport/socket/src/socket.h
+++ b/rpc/rpc-transport/socket/src/socket.h
@@ -11,24 +11,24 @@
#ifndef _SOCKET_H
#define _SOCKET_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
+#include <openssl/ssl.h>
+#include <openssl/err.h>
+#include <openssl/x509v3.h>
+#include <openssl/x509_vfy.h>
+#ifdef HAVE_OPENSSL_DH_H
+#include <openssl/dh.h>
+#endif
+#ifdef HAVE_OPENSSL_ECDH_H
+#include <openssl/ecdh.h>
#endif
-#include "event.h"
#include "rpc-transport.h"
-#include "logging.h"
-#include "dict.h"
-#include "mem-pool.h"
-#include "globals.h"
#ifndef MAX_IOVEC
#define MAX_IOVEC 16
#endif /* MAX_IOVEC */
-#define GF_DEFAULT_SOCKET_LISTEN_PORT GF_DEFAULT_BASE_PORT
+#define GF_DEFAULT_SOCKET_LISTEN_PORT GF_DEFAULT_BASE_PORT
#define RPC_MAX_FRAGMENT_SIZE 0x7fffffff
@@ -40,153 +40,237 @@
* setsockopt will fail. Having larger values might be beneficial for
* IB links.
*/
-#define GF_DEFAULT_SOCKET_WINDOW_SIZE (0)
-#define GF_MAX_SOCKET_WINDOW_SIZE (1 * GF_UNIT_MB)
-#define GF_MIN_SOCKET_WINDOW_SIZE (0)
-#define GF_USE_DEFAULT_KEEPALIVE (-1)
+#define GF_DEFAULT_SOCKET_WINDOW_SIZE (0)
+#define GF_MAX_SOCKET_WINDOW_SIZE (1 * GF_UNIT_MB)
+#define GF_MIN_SOCKET_WINDOW_SIZE (0)
+#define GF_USE_DEFAULT_KEEPALIVE (-1)
+
+#define GF_KEEPALIVE_TIME (20)
+#define GF_KEEPALIVE_INTERVAL (2)
+#define GF_KEEPALIVE_COUNT (9)
typedef enum {
- SP_STATE_NADA = 0,
- SP_STATE_COMPLETE,
- SP_STATE_READING_FRAGHDR,
- SP_STATE_READ_FRAGHDR,
- SP_STATE_READING_FRAG,
+ SP_STATE_NADA = 0,
+ SP_STATE_COMPLETE,
+ SP_STATE_READING_FRAGHDR,
+ SP_STATE_READ_FRAGHDR,
+ SP_STATE_READING_FRAG,
} sp_rpcrecord_state_t;
typedef enum {
- SP_STATE_RPCFRAG_INIT,
- SP_STATE_READING_MSGTYPE,
- SP_STATE_READ_MSGTYPE,
+ SP_STATE_RPCFRAG_INIT,
+ SP_STATE_READING_MSGTYPE,
+ SP_STATE_READ_MSGTYPE,
+ SP_STATE_NOTIFYING_XID
} sp_rpcfrag_state_t;
typedef enum {
- SP_STATE_SIMPLE_MSG_INIT,
- SP_STATE_READING_SIMPLE_MSG,
+ SP_STATE_SIMPLE_MSG_INIT,
+ SP_STATE_READING_SIMPLE_MSG,
} sp_rpcfrag_simple_msg_state_t;
typedef enum {
- SP_STATE_VECTORED_REQUEST_INIT,
- SP_STATE_READING_CREDBYTES,
- SP_STATE_READ_CREDBYTES, /* read credential data. */
- SP_STATE_READING_VERFBYTES,
- SP_STATE_READ_VERFBYTES, /* read verifier data */
- SP_STATE_READING_PROGHDR,
- SP_STATE_READ_PROGHDR,
- SP_STATE_READING_PROG,
+ SP_STATE_VECTORED_REQUEST_INIT,
+ SP_STATE_READING_CREDBYTES,
+ SP_STATE_READ_CREDBYTES, /* read credential data. */
+ SP_STATE_READING_VERFBYTES,
+ SP_STATE_READ_VERFBYTES, /* read verifier data */
+ SP_STATE_READING_PROGHDR,
+ SP_STATE_READ_PROGHDR,
+ SP_STATE_READING_PROGHDR_XDATA,
+ SP_STATE_READ_PROGHDR_XDATA, /* It's a bad "name" in the generic
+ RPC state machine, but greatly
+ aids code review (and xdata is
+ the only "consumer" of this state)
+ */
+ SP_STATE_READING_PROG,
} sp_rpcfrag_vectored_request_state_t;
typedef enum {
- SP_STATE_REQUEST_HEADER_INIT,
- SP_STATE_READING_RPCHDR1,
- SP_STATE_READ_RPCHDR1, /* read msg from beginning till and
- * including credlen
- */
+ SP_STATE_REQUEST_HEADER_INIT,
+ SP_STATE_READING_RPCHDR1,
+ SP_STATE_READ_RPCHDR1, /* read msg from beginning till and
+ * including credlen
+ */
} sp_rpcfrag_request_header_state_t;
struct ioq {
- union {
- struct list_head list;
- struct {
- struct ioq *next;
- struct ioq *prev;
- };
+ union {
+ struct list_head list;
+ struct {
+ struct ioq *next;
+ struct ioq *prev;
};
+ };
- uint32_t fraghdr;
- struct iovec vector[MAX_IOVEC];
- int count;
- struct iovec *pending_vector;
- int pending_count;
- struct iobref *iobref;
+ struct iovec vector[MAX_IOVEC];
+ struct iovec *pending_vector;
+ int count;
+ int pending_count;
+ struct iobref *iobref;
+ uint32_t fraghdr;
+ char _pad[4];
};
typedef struct {
- sp_rpcfrag_request_header_state_t header_state;
- sp_rpcfrag_vectored_request_state_t vector_state;
- int vector_sizer_state;
+ sp_rpcfrag_request_header_state_t header_state;
+ sp_rpcfrag_vectored_request_state_t vector_state;
+ int vector_sizer_state;
} sp_rpcfrag_request_state_t;
typedef enum {
- SP_STATE_VECTORED_REPLY_STATUS_INIT,
- SP_STATE_READING_REPLY_STATUS,
- SP_STATE_READ_REPLY_STATUS,
+ SP_STATE_VECTORED_REPLY_STATUS_INIT,
+ SP_STATE_READING_REPLY_STATUS,
+ SP_STATE_READ_REPLY_STATUS,
} sp_rpcfrag_vectored_reply_status_state_t;
typedef enum {
- SP_STATE_ACCEPTED_SUCCESS_REPLY_INIT,
- SP_STATE_READING_PROC_HEADER,
- SP_STATE_READ_PROC_HEADER,
+ SP_STATE_ACCEPTED_SUCCESS_REPLY_INIT,
+ SP_STATE_READING_PROC_HEADER,
+ SP_STATE_READING_PROC_OPAQUE,
+ SP_STATE_READ_PROC_OPAQUE,
+ SP_STATE_READ_PROC_HEADER,
} sp_rpcfrag_vectored_reply_accepted_success_state_t;
typedef enum {
- SP_STATE_ACCEPTED_REPLY_INIT,
- SP_STATE_READING_REPLY_VERFLEN,
- SP_STATE_READ_REPLY_VERFLEN,
- SP_STATE_READING_REPLY_VERFBYTES,
- SP_STATE_READ_REPLY_VERFBYTES,
+ SP_STATE_ACCEPTED_REPLY_INIT,
+ SP_STATE_READING_REPLY_VERFLEN,
+ SP_STATE_READ_REPLY_VERFLEN,
+ SP_STATE_READING_REPLY_VERFBYTES,
+ SP_STATE_READ_REPLY_VERFBYTES,
} sp_rpcfrag_vectored_reply_accepted_state_t;
typedef struct {
- uint32_t accept_status;
- sp_rpcfrag_vectored_reply_status_state_t status_state;
- sp_rpcfrag_vectored_reply_accepted_state_t accepted_state;
- sp_rpcfrag_vectored_reply_accepted_success_state_t accepted_success_state;
+ uint32_t accept_status;
+ sp_rpcfrag_vectored_reply_status_state_t status_state;
+ sp_rpcfrag_vectored_reply_accepted_state_t accepted_state;
+ sp_rpcfrag_vectored_reply_accepted_success_state_t accepted_success_state;
} sp_rpcfrag_vectored_reply_state_t;
+struct gf_sock_incoming_frag {
+ char *fragcurrent;
+ uint32_t bytes_read;
+ uint32_t remaining_size;
+ struct iovec vector;
+ struct iovec *pending_vector;
+ union {
+ sp_rpcfrag_request_state_t request;
+ sp_rpcfrag_vectored_reply_state_t reply;
+ } call_body;
+
+ sp_rpcfrag_simple_msg_state_t simple_state;
+ sp_rpcfrag_state_t state;
+};
+
+#define GF_SOCKET_RA_MAX 1024
+
+struct gf_sock_incoming {
+ char *proghdr_base_addr;
+ struct iobuf *iobuf;
+ size_t iobuf_size;
+ struct gf_sock_incoming_frag frag;
+ struct iovec vector[2];
+ struct iovec payload_vector;
+ struct iobref *iobref;
+ rpc_request_info_t *request_info;
+ struct iovec *pending_vector;
+ int count;
+ int pending_count;
+ size_t total_bytes_read;
+
+ size_t ra_read;
+ size_t ra_max;
+ size_t ra_served;
+ char *ra_buf;
+ uint32_t fraghdr;
+ msg_type_t msg_type;
+ sp_rpcrecord_state_t record_state;
+ char _pad[4];
+};
+
typedef struct {
- int32_t sock;
- int32_t idx;
- unsigned char connected; // -1 = not connected. 0 = in progress. 1 = connected
- char bio;
- char connect_finish_log;
- char submit_log;
- union {
- struct list_head ioq;
- struct {
- struct ioq *ioq_next;
- struct ioq *ioq_prev;
- };
- };
+ union {
+ struct list_head ioq;
struct {
- sp_rpcrecord_state_t record_state;
- struct {
- char *fragcurrent;
- uint32_t bytes_read;
- uint32_t remaining_size;
- struct iovec vector;
- struct iovec *pending_vector;
- union {
- sp_rpcfrag_request_state_t request;
- sp_rpcfrag_vectored_reply_state_t reply;
- } call_body;
-
- sp_rpcfrag_simple_msg_state_t simple_state;
- sp_rpcfrag_state_t state;
- } frag;
- struct iobuf *iobuf;
- size_t iobuf_size;
- struct iovec vector[2];
- int count;
- struct iovec payload_vector;
- struct iobref *iobref;
- rpc_request_info_t *request_info;
- struct iovec *pending_vector;
- int pending_count;
- uint32_t fraghdr;
- char complete_record;
- msg_type_t msg_type;
- size_t total_bytes_read;
- } incoming;
- pthread_mutex_t lock;
- int windowsize;
- char lowlat;
- char nodelay;
- int keepalive;
- int keepaliveidle;
- int keepaliveintvl;
- uint32_t backlog;
- gf_boolean_t read_fail_log;
-} socket_private_t;
+ struct ioq *ioq_next;
+ struct ioq *ioq_prev;
+ };
+ };
+ pthread_mutex_t out_lock;
+ pthread_mutex_t cond_lock;
+ pthread_cond_t cond;
+ int windowsize;
+ int keepalive;
+ int keepaliveidle;
+ int keepaliveintvl;
+ int keepalivecnt;
+ int timeout;
+ int log_ctr;
+ int shutdown_log_ctr;
+ /* ssl_error_required is used only during the SSL connection setup
+ * phase.
+ * It holds the error code returned by SSL_get_error() and is used to
+ * arm the epoll event set for the required event for the specific fd.
+ */
+ int ssl_error_required;
+ int ssl_session_id;
+ GF_REF_DECL; /* refcount to keep track of socket_poller
+ threads */
+ struct {
+ pthread_mutex_t lock;
+ pthread_cond_t cond;
+ uint64_t in_progress;
+ } notify;
+ int32_t sock;
+ int32_t idx;
+ int32_t gen;
+ uint32_t backlog;
+ SSL_METHOD *ssl_meth;
+ SSL_CTX *ssl_ctx;
+ BIO *ssl_sbio;
+ SSL *ssl_ssl;
+ char *ssl_own_cert;
+ char *ssl_private_key;
+ char *ssl_ca_list;
+ char *crl_path;
+ struct gf_sock_incoming incoming;
+ mgmt_ssl_t srvr_ssl;
+ /* -1 = not connected. 0 = in progress. 1 = connected */
+ char connected;
+ /* 1 = connect failed for reasons other than EINPROGRESS/ENOENT
+ see socket_connect for details */
+ char connect_failed;
+ char bio;
+ char connect_finish_log;
+ char submit_log;
+ char nodelay;
+ gf_boolean_t read_fail_log;
+ gf_boolean_t ssl_enabled; /* outbound I/O */
+ gf_boolean_t mgmt_ssl; /* outbound mgmt */
+ gf_boolean_t is_server;
+ gf_boolean_t use_ssl;
+ gf_boolean_t ssl_accepted; /* To indicate SSL_accept() */
+ gf_boolean_t ssl_connected; /* or SSL_connect() has been
+ * been completed on this socket.
+ * These are valid only when
+ * use_ssl is true.
+ */
+ /* SSL_CTX is created for each transport. Since we are now using non-
+ * blocking mechanism for SSL_accept() and SSL_connect(), the SSL
+ * context is created on the first EPOLLIN event which may lead to
+ * SSL_ERROR_WANT_READ/SSL_ERROR_WANT_WRITE and may not complete the
+ * SSL connection at the first attempt.
+ * ssl_context_created is a flag to note that we've created the SSL
+ * context for the connection so that we don't blindly create any more
+ * while !ssl_accepted or !ssl_connected.
+ */
+ gf_boolean_t ssl_context_created;
+ gf_boolean_t accepted; /* explicit flag to be set in
+ * socket_event_handler() for
+ * newly accepted socket
+ */
+ char _pad[4];
+} socket_private_t;
#endif
diff --git a/rpc/xdr/src/.gitignore b/rpc/xdr/src/.gitignore
new file mode 100644
index 00000000000..a0c8b7ca2b6
--- /dev/null
+++ b/rpc/xdr/src/.gitignore
@@ -0,0 +1,25 @@
+acl3-xdr.c
+acl3-xdr.h
+changelog-xdr.c
+changelog-xdr.h
+cli1-xdr.c
+cli1-xdr.h
+glusterd1-xdr.c
+glusterd1-xdr.h
+glusterfs3-xdr.c
+glusterfs3-xdr.h
+glusterfs4-xdr.c
+glusterfs4-xdr.h
+mount3udp.c
+mount3udp.h
+nlm4-xdr.c
+nlm4-xdr.h
+nlmcbk-xdr.c
+nlmcbk-xdr.h
+nsm-xdr.c
+nsm-xdr.h
+portmap-xdr.c
+portmap-xdr.h
+rpc-common-xdr.c
+rpc-common-xdr.h
+*-e
diff --git a/rpc/xdr/src/Makefile.am b/rpc/xdr/src/Makefile.am
index 7174815b841..0e9c377ec93 100644
--- a/rpc/xdr/src/Makefile.am
+++ b/rpc/xdr/src/Makefile.am
@@ -1,26 +1,85 @@
+if BUILD_GNFS
+ NFS_XDRS = nlm4-xdr.x nsm-xdr.x acl3-xdr.x mount3udp.x
+ NFS_SRCS = xdr-nfs3.c msg-nfs3.c
+ NFS_HDRS = xdr-nfs3.h msg-nfs3.h
+else
+ NFS_EXTRA_XDRS = nlm4-xdr.x nsm-xdr.x acl3-xdr.x mount3udp.x
+endif
+
+XDRGENFILES = glusterfs3-xdr.x glusterfs4-xdr.x cli1-xdr.x \
+ rpc-common-xdr.x glusterd1-xdr.x changelog-xdr.x \
+ portmap-xdr.x ${NFS_XDRS}
+
+XDRHEADERS = $(XDRGENFILES:.x=.h)
+XDRSOURCES = $(XDRGENFILES:.x=.c)
+
+EXTRA_DIST = $(XDRGENFILES) libgfxdr.sym ${NFS_EXTRA_XDRS}
+
lib_LTLIBRARIES = libgfxdr.la
-libgfxdr_la_CFLAGS = -fPIC -Wall -g -shared -nostartfiles $(GF_CFLAGS) $(GF_DARWIN_LIBGLUSTERFS_CFLAGS)
-
-libgfxdr_la_CPPFLAGS = -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64 \
- -D_GNU_SOURCE -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/rpc/rpc-lib/src
-
-libgfxdr_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
- $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la
-
-libgfxdr_la_SOURCES = xdr-generic.c rpc-common-xdr.c \
- glusterfs3-xdr.c \
- cli1-xdr.c \
- glusterd1-xdr.c \
- portmap-xdr.c \
- nlm4-xdr.c xdr-nfs3.c msg-nfs3.c nsm-xdr.c \
- nlmcbk-xdr.c
-
-noinst_HEADERS = xdr-generic.h rpc-common-xdr.h \
- glusterfs3-xdr.h glusterfs3.h \
- cli1-xdr.h \
- glusterd1-xdr.h \
- portmap-xdr.h \
- nlm4-xdr.h xdr-nfs3.h msg-nfs3.h nsm-xdr.h \
- nlmcbk-xdr.h
+libgfxdr_la_CFLAGS = -Wall $(GF_CFLAGS) $(GF_DARWIN_LIBGLUSTERFS_CFLAGS)
+
+libgfxdr_la_CPPFLAGS = $(GF_CPPFLAGS) -D__USE_FILE_OFFSET64 \
+ -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/rpc/rpc-lib/src \
+ -I$(top_builddir)/rpc/xdr/src
+
+libgfxdr_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+libgfxdr_la_LDFLAGS = -version-info $(LIBGFXDR_LT_VERSION) $(GF_LDFLAGS) \
+ -export-symbols $(top_srcdir)/rpc/xdr/src/libgfxdr.sym
+
+libgfxdr_la_SOURCES = xdr-generic.c ${NFS_SRCS}
+nodist_libgfxdr_la_SOURCES = $(XDRSOURCES)
+
+libgfxdr_la_HEADERS = xdr-generic.h glusterfs3.h rpc-pragmas.h ${NFS_HDRS}
+nodist_libgfxdr_la_HEADERS = $(XDRHEADERS)
+
+libgfxdr_ladir = $(includedir)/glusterfs/rpc
+
+CLEANFILES = $(XDRSOURCES) $(XDRHEADERS)
+
+# trick automake into doing BUILT_SOURCES magic
+BUILT_SOURCES = $(XDRHEADERS) $(XDRSOURCES)
+
+xdrsrc=$(top_srcdir)/rpc/xdr/src
+xdrdst=$(top_builddir)/rpc/xdr/src
+
+# make's dependency resolution may mean that it decides to run
+# rpcgen again (unnecessarily), but as the .c file already exists,
+# rpcgen will exit with an error, resulting in a build error. We
+# could use a '-' (i.e. -@rpcgen ...) and suffer with noisy warnings
+# in the build. Or we do this crufty thing instead.
+$(XDRSOURCES): $(XDRGENFILES)
+ @if [ ! -e $(xdrdst)/$@ -o $(@:.c=.x) -nt $(xdrdst)/$@ ]; then \
+ rpcgen -c -o $(xdrdst)/$@ $(@:.c=.x) ;\
+ fi
+
+# d*mn sed in netbsd6 doesn't do -i (inline)
+# (why are we still running smoke on netbsd6 and not netbsd7?)
+$(XDRHEADERS): $(XDRGENFILES)
+ @if [ ! -e $(xdrdst)/$@ -o $(@:.h=.x) -nt $(xdrdst)/$@ ]; then \
+ rpcgen -h -o $(@:.h=.tmp) $(@:.h=.x) && \
+ sed -e '/#ifndef/ s/-/_/g' -e '/#define/ s/-/_/g' \
+ -e '/#endif/ s/-/_/' -e 's/TMP_/H_/g' \
+ $(@:.h=.tmp) > $(xdrdst)/$@ && \
+ rm -f $(@:.h=.tmp) ; \
+ fi
+
+
+# link .x files when doing out-of-tree builds
+# have to use .PHONY here to force it; all versions of make
+# will think the file already exists "here" by virtue of the
+# VPATH. And we have to have the .x file in $cwd in order to
+# have rpcgen generate "nice" #include directives
+# i.e. (nice):
+# #include "acl3-xdr.h"
+# versus (not nice):
+# #include "../../../../foo/src/rpc/xdr/src/acl3-xdr.h"
+.PHONY : $(XDRGENFILES)
+$(XDRGENFILES):
+ @if [ ! -e $@ ]; then ln -s $(xdrsrc)/$@ . ; fi;
+
+clean-local:
+ @if [ $(top_builddir) != $(top_srcdir) ]; then \
+ rm -f $(xdrdst)/*.x; \
+ fi
diff --git a/rpc/xdr/src/acl3-xdr.x b/rpc/xdr/src/acl3-xdr.x
new file mode 100644
index 00000000000..7f7364971e6
--- /dev/null
+++ b/rpc/xdr/src/acl3-xdr.x
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+#ifdef RPC_XDR
+%#include "rpc-pragmas.h"
+#endif
+%#include <glusterfs/compat.h>
+%#include "xdr-nfs3.h"
+
+struct aclentry {
+ int type;
+ int uid;
+ int perm;
+};
+
+struct getaclargs {
+ netobj fh;
+ int mask;
+};
+
+struct getaclreply {
+ int status;
+ int attr_follows;
+ fattr3 attr;
+ int mask;
+ int aclcount;
+ aclentry aclentry<>;
+ int daclcount;
+ aclentry daclentry<>;
+};
+
+struct setaclargs {
+ netobj fh;
+ int mask;
+ int aclcount;
+ aclentry aclentry<>;
+ int daclcount;
+ aclentry daclentry<>;
+};
+
+struct setaclreply {
+ int status;
+ int attr_follows;
+ fattr3 attr;
+};
diff --git a/rpc/xdr/src/changelog-xdr.x b/rpc/xdr/src/changelog-xdr.x
new file mode 100644
index 00000000000..5956245d5ce
--- /dev/null
+++ b/rpc/xdr/src/changelog-xdr.x
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+#ifdef RPC_XDR
+%#include "rpc-pragmas.h"
+#endif
+%#include <glusterfs/compat.h>
+
+/* XDR: libgfchangelog -> changelog */
+
+struct changelog_probe_req {
+ unsigned int filter;
+ char sock[UNIX_PATH_MAX];
+};
+
+struct changelog_probe_rsp {
+ int op_ret;
+};
+
+/* XDR: changelog -> libgfchangelog */
+struct changelog_event_req {
+ /* sequence number for the buffer */
+ unsigned hyper seq;
+
+ /* time of dispatch */
+ unsigned hyper tv_sec;
+ unsigned hyper tv_usec;
+};
+
+struct changelog_event_rsp {
+ int op_ret;
+
+ /* ack'd buffers sequence number */
+ unsigned hyper seq;
+};
diff --git a/rpc/xdr/src/cli1-xdr.c b/rpc/xdr/src/cli1-xdr.c
deleted file mode 100644
index 53e1807658a..00000000000
--- a/rpc/xdr/src/cli1-xdr.c
+++ /dev/null
@@ -1,459 +0,0 @@
-/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include "xdr-common.h"
-#include "compat.h"
-
-#if defined(__GNUC__)
-#if __GNUC__ >= 4
-#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
-#endif
-#endif
-
-/*
- * Please do not edit this file.
- * It was generated using rpcgen.
- */
-
-#include "cli1-xdr.h"
-
-bool_t
-xdr_gf_cli_defrag_type (XDR *xdrs, gf_cli_defrag_type *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_enum (xdrs, (enum_t *) objp))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf_defrag_status_t (XDR *xdrs, gf_defrag_status_t *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_enum (xdrs, (enum_t *) objp))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf1_cluster_type (XDR *xdrs, gf1_cluster_type *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_enum (xdrs, (enum_t *) objp))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf1_cli_replace_op (XDR *xdrs, gf1_cli_replace_op *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_enum (xdrs, (enum_t *) objp))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf1_op_commands (XDR *xdrs, gf1_op_commands *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_enum (xdrs, (enum_t *) objp))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf_quota_type (XDR *xdrs, gf_quota_type *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_enum (xdrs, (enum_t *) objp))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf1_cli_friends_list (XDR *xdrs, gf1_cli_friends_list *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_enum (xdrs, (enum_t *) objp))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf1_cli_get_volume (XDR *xdrs, gf1_cli_get_volume *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_enum (xdrs, (enum_t *) objp))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf1_cli_sync_volume (XDR *xdrs, gf1_cli_sync_volume *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_enum (xdrs, (enum_t *) objp))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf1_cli_op_flags (XDR *xdrs, gf1_cli_op_flags *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_enum (xdrs, (enum_t *) objp))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf1_cli_gsync_set (XDR *xdrs, gf1_cli_gsync_set *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_enum (xdrs, (enum_t *) objp))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf1_cli_stats_op (XDR *xdrs, gf1_cli_stats_op *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_enum (xdrs, (enum_t *) objp))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf1_cli_top_op (XDR *xdrs, gf1_cli_top_op *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_enum (xdrs, (enum_t *) objp))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf_cli_status_type (XDR *xdrs, gf_cli_status_type *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_enum (xdrs, (enum_t *) objp))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf_cli_req (XDR *xdrs, gf_cli_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf_cli_rsp (XDR *xdrs, gf_cli_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_string (xdrs, &objp->op_errstr, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf1_cli_probe_req (XDR *xdrs, gf1_cli_probe_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_string (xdrs, &objp->hostname, ~0))
- return FALSE;
- if (!xdr_int (xdrs, &objp->port))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf1_cli_probe_rsp (XDR *xdrs, gf1_cli_probe_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
-
- if (xdrs->x_op == XDR_ENCODE) {
- buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_int (xdrs, &objp->port))
- return FALSE;
-
- } else {
- IXDR_PUT_LONG(buf, objp->op_ret);
- IXDR_PUT_LONG(buf, objp->op_errno);
- IXDR_PUT_LONG(buf, objp->port);
- }
- if (!xdr_string (xdrs, &objp->hostname, ~0))
- return FALSE;
- if (!xdr_string (xdrs, &objp->op_errstr, ~0))
- return FALSE;
- return TRUE;
- } else if (xdrs->x_op == XDR_DECODE) {
- buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_int (xdrs, &objp->port))
- return FALSE;
-
- } else {
- objp->op_ret = IXDR_GET_LONG(buf);
- objp->op_errno = IXDR_GET_LONG(buf);
- objp->port = IXDR_GET_LONG(buf);
- }
- if (!xdr_string (xdrs, &objp->hostname, ~0))
- return FALSE;
- if (!xdr_string (xdrs, &objp->op_errstr, ~0))
- return FALSE;
- return TRUE;
- }
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_int (xdrs, &objp->port))
- return FALSE;
- if (!xdr_string (xdrs, &objp->hostname, ~0))
- return FALSE;
- if (!xdr_string (xdrs, &objp->op_errstr, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf1_cli_deprobe_req (XDR *xdrs, gf1_cli_deprobe_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_string (xdrs, &objp->hostname, ~0))
- return FALSE;
- if (!xdr_int (xdrs, &objp->port))
- return FALSE;
- if (!xdr_int (xdrs, &objp->flags))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf1_cli_deprobe_rsp (XDR *xdrs, gf1_cli_deprobe_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_string (xdrs, &objp->hostname, ~0))
- return FALSE;
- if (!xdr_string (xdrs, &objp->op_errstr, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf1_cli_peer_list_req (XDR *xdrs, gf1_cli_peer_list_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->flags))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf1_cli_peer_list_rsp (XDR *xdrs, gf1_cli_peer_list_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->friends.friends_val, (u_int *) &objp->friends.friends_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf1_cli_fsm_log_req (XDR *xdrs, gf1_cli_fsm_log_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_string (xdrs, &objp->name, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf1_cli_fsm_log_rsp (XDR *xdrs, gf1_cli_fsm_log_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_string (xdrs, &objp->op_errstr, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->fsm_log.fsm_log_val, (u_int *) &objp->fsm_log.fsm_log_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf1_cli_getwd_req (XDR *xdrs, gf1_cli_getwd_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->unused))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf1_cli_getwd_rsp (XDR *xdrs, gf1_cli_getwd_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_string (xdrs, &objp->wd, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf1_cli_mount_req (XDR *xdrs, gf1_cli_mount_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_string (xdrs, &objp->label, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf1_cli_mount_rsp (XDR *xdrs, gf1_cli_mount_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_string (xdrs, &objp->path, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf1_cli_umount_req (XDR *xdrs, gf1_cli_umount_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->lazy))
- return FALSE;
- if (!xdr_string (xdrs, &objp->path, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf1_cli_umount_rsp (XDR *xdrs, gf1_cli_umount_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- return TRUE;
-}
diff --git a/rpc/xdr/src/cli1-xdr.h b/rpc/xdr/src/cli1-xdr.h
deleted file mode 100644
index b30fd740ae2..00000000000
--- a/rpc/xdr/src/cli1-xdr.h
+++ /dev/null
@@ -1,369 +0,0 @@
-/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include "xdr-common.h"
-#include "compat.h"
-
-#if defined(__GNUC__)
-#if __GNUC__ >= 4
-#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
-#endif
-#endif
-
-/*
- * Please do not edit this file.
- * It was generated using rpcgen.
- */
-
-#ifndef _CLI1_XDR_H_RPCGEN
-#define _CLI1_XDR_H_RPCGEN
-
-#include <rpc/rpc.h>
-
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-enum gf_cli_defrag_type {
- GF_DEFRAG_CMD_START = 1,
- GF_DEFRAG_CMD_STOP = 1 + 1,
- GF_DEFRAG_CMD_STATUS = 1 + 2,
- GF_DEFRAG_CMD_START_LAYOUT_FIX = 1 + 3,
- GF_DEFRAG_CMD_START_FORCE = 1 + 4,
-};
-typedef enum gf_cli_defrag_type gf_cli_defrag_type;
-
-enum gf_defrag_status_t {
- GF_DEFRAG_STATUS_NOT_STARTED = 0,
- GF_DEFRAG_STATUS_STARTED = 1,
- GF_DEFRAG_STATUS_STOPPED = 2,
- GF_DEFRAG_STATUS_COMPLETE = 3,
- GF_DEFRAG_STATUS_FAILED = 4,
-};
-typedef enum gf_defrag_status_t gf_defrag_status_t;
-
-enum gf1_cluster_type {
- GF_CLUSTER_TYPE_NONE = 0,
- GF_CLUSTER_TYPE_STRIPE = 0 + 1,
- GF_CLUSTER_TYPE_REPLICATE = 0 + 2,
- GF_CLUSTER_TYPE_STRIPE_REPLICATE = 0 + 3,
-};
-typedef enum gf1_cluster_type gf1_cluster_type;
-
-enum gf1_cli_replace_op {
- GF_REPLACE_OP_NONE = 0,
- GF_REPLACE_OP_START = 0 + 1,
- GF_REPLACE_OP_COMMIT = 0 + 2,
- GF_REPLACE_OP_PAUSE = 0 + 3,
- GF_REPLACE_OP_ABORT = 0 + 4,
- GF_REPLACE_OP_STATUS = 0 + 5,
- GF_REPLACE_OP_COMMIT_FORCE = 0 + 6,
-};
-typedef enum gf1_cli_replace_op gf1_cli_replace_op;
-
-enum gf1_op_commands {
- GF_OP_CMD_NONE = 0,
- GF_OP_CMD_START = 0 + 1,
- GF_OP_CMD_COMMIT = 0 + 2,
- GF_OP_CMD_STOP = 0 + 3,
- GF_OP_CMD_STATUS = 0 + 4,
- GF_OP_CMD_COMMIT_FORCE = 0 + 5,
-};
-typedef enum gf1_op_commands gf1_op_commands;
-
-enum gf_quota_type {
- GF_QUOTA_OPTION_TYPE_NONE = 0,
- GF_QUOTA_OPTION_TYPE_ENABLE = 0 + 1,
- GF_QUOTA_OPTION_TYPE_DISABLE = 0 + 2,
- GF_QUOTA_OPTION_TYPE_LIMIT_USAGE = 0 + 3,
- GF_QUOTA_OPTION_TYPE_REMOVE = 0 + 4,
- GF_QUOTA_OPTION_TYPE_LIST = 0 + 5,
- GF_QUOTA_OPTION_TYPE_VERSION = 0 + 6,
-};
-typedef enum gf_quota_type gf_quota_type;
-
-enum gf1_cli_friends_list {
- GF_CLI_LIST_ALL = 1,
-};
-typedef enum gf1_cli_friends_list gf1_cli_friends_list;
-
-enum gf1_cli_get_volume {
- GF_CLI_GET_VOLUME_ALL = 1,
- GF_CLI_GET_VOLUME = 1 + 1,
- GF_CLI_GET_NEXT_VOLUME = 1 + 2,
-};
-typedef enum gf1_cli_get_volume gf1_cli_get_volume;
-
-enum gf1_cli_sync_volume {
- GF_CLI_SYNC_ALL = 1,
-};
-typedef enum gf1_cli_sync_volume gf1_cli_sync_volume;
-
-enum gf1_cli_op_flags {
- GF_CLI_FLAG_OP_FORCE = 1,
-};
-typedef enum gf1_cli_op_flags gf1_cli_op_flags;
-
-enum gf1_cli_gsync_set {
- GF_GSYNC_OPTION_TYPE_NONE = 0,
- GF_GSYNC_OPTION_TYPE_START = 1,
- GF_GSYNC_OPTION_TYPE_STOP = 2,
- GF_GSYNC_OPTION_TYPE_CONFIG = 3,
- GF_GSYNC_OPTION_TYPE_STATUS = 4,
- GF_GSYNC_OPTION_TYPE_ROTATE = 5,
-};
-typedef enum gf1_cli_gsync_set gf1_cli_gsync_set;
-
-enum gf1_cli_stats_op {
- GF_CLI_STATS_NONE = 0,
- GF_CLI_STATS_START = 1,
- GF_CLI_STATS_STOP = 2,
- GF_CLI_STATS_INFO = 3,
- GF_CLI_STATS_TOP = 4,
-};
-typedef enum gf1_cli_stats_op gf1_cli_stats_op;
-
-enum gf1_cli_top_op {
- GF_CLI_TOP_NONE = 0,
- GF_CLI_TOP_OPEN = 0 + 1,
- GF_CLI_TOP_READ = 0 + 2,
- GF_CLI_TOP_WRITE = 0 + 3,
- GF_CLI_TOP_OPENDIR = 0 + 4,
- GF_CLI_TOP_READDIR = 0 + 5,
- GF_CLI_TOP_READ_PERF = 0 + 6,
- GF_CLI_TOP_WRITE_PERF = 0 + 7,
-};
-typedef enum gf1_cli_top_op gf1_cli_top_op;
-
-enum gf_cli_status_type {
- GF_CLI_STATUS_NONE = 0x0000,
- GF_CLI_STATUS_MEM = 0x0001,
- GF_CLI_STATUS_CLIENTS = 0x0002,
- GF_CLI_STATUS_INODE = 0x0004,
- GF_CLI_STATUS_FD = 0x0008,
- GF_CLI_STATUS_CALLPOOL = 0x0010,
- GF_CLI_STATUS_DETAIL = 0x0020,
- GF_CLI_STATUS_MASK = 0x00FF,
- GF_CLI_STATUS_VOL = 0x0100,
- GF_CLI_STATUS_ALL = 0x0200,
- GF_CLI_STATUS_BRICK = 0x0400,
- GF_CLI_STATUS_NFS = 0x0800,
- GF_CLI_STATUS_SHD = 0x1000,
-};
-typedef enum gf_cli_status_type gf_cli_status_type;
-
-struct gf_cli_req {
- struct {
- u_int dict_len;
- char *dict_val;
- } dict;
-};
-typedef struct gf_cli_req gf_cli_req;
-
-struct gf_cli_rsp {
- int op_ret;
- int op_errno;
- char *op_errstr;
- struct {
- u_int dict_len;
- char *dict_val;
- } dict;
-};
-typedef struct gf_cli_rsp gf_cli_rsp;
-
-struct gf1_cli_probe_req {
- char *hostname;
- int port;
-};
-typedef struct gf1_cli_probe_req gf1_cli_probe_req;
-
-struct gf1_cli_probe_rsp {
- int op_ret;
- int op_errno;
- int port;
- char *hostname;
- char *op_errstr;
-};
-typedef struct gf1_cli_probe_rsp gf1_cli_probe_rsp;
-
-struct gf1_cli_deprobe_req {
- char *hostname;
- int port;
- int flags;
-};
-typedef struct gf1_cli_deprobe_req gf1_cli_deprobe_req;
-
-struct gf1_cli_deprobe_rsp {
- int op_ret;
- int op_errno;
- char *hostname;
- char *op_errstr;
-};
-typedef struct gf1_cli_deprobe_rsp gf1_cli_deprobe_rsp;
-
-struct gf1_cli_peer_list_req {
- int flags;
- struct {
- u_int dict_len;
- char *dict_val;
- } dict;
-};
-typedef struct gf1_cli_peer_list_req gf1_cli_peer_list_req;
-
-struct gf1_cli_peer_list_rsp {
- int op_ret;
- int op_errno;
- struct {
- u_int friends_len;
- char *friends_val;
- } friends;
-};
-typedef struct gf1_cli_peer_list_rsp gf1_cli_peer_list_rsp;
-
-struct gf1_cli_fsm_log_req {
- char *name;
-};
-typedef struct gf1_cli_fsm_log_req gf1_cli_fsm_log_req;
-
-struct gf1_cli_fsm_log_rsp {
- int op_ret;
- int op_errno;
- char *op_errstr;
- struct {
- u_int fsm_log_len;
- char *fsm_log_val;
- } fsm_log;
-};
-typedef struct gf1_cli_fsm_log_rsp gf1_cli_fsm_log_rsp;
-
-struct gf1_cli_getwd_req {
- int unused;
-};
-typedef struct gf1_cli_getwd_req gf1_cli_getwd_req;
-
-struct gf1_cli_getwd_rsp {
- int op_ret;
- int op_errno;
- char *wd;
-};
-typedef struct gf1_cli_getwd_rsp gf1_cli_getwd_rsp;
-
-struct gf1_cli_mount_req {
- char *label;
- struct {
- u_int dict_len;
- char *dict_val;
- } dict;
-};
-typedef struct gf1_cli_mount_req gf1_cli_mount_req;
-
-struct gf1_cli_mount_rsp {
- int op_ret;
- int op_errno;
- char *path;
-};
-typedef struct gf1_cli_mount_rsp gf1_cli_mount_rsp;
-
-struct gf1_cli_umount_req {
- int lazy;
- char *path;
-};
-typedef struct gf1_cli_umount_req gf1_cli_umount_req;
-
-struct gf1_cli_umount_rsp {
- int op_ret;
- int op_errno;
-};
-typedef struct gf1_cli_umount_rsp gf1_cli_umount_rsp;
-
-/* the xdr functions */
-
-#if defined(__STDC__) || defined(__cplusplus)
-extern bool_t xdr_gf_cli_defrag_type (XDR *, gf_cli_defrag_type*);
-extern bool_t xdr_gf_defrag_status_t (XDR *, gf_defrag_status_t*);
-extern bool_t xdr_gf1_cluster_type (XDR *, gf1_cluster_type*);
-extern bool_t xdr_gf1_cli_replace_op (XDR *, gf1_cli_replace_op*);
-extern bool_t xdr_gf1_op_commands (XDR *, gf1_op_commands*);
-extern bool_t xdr_gf_quota_type (XDR *, gf_quota_type*);
-extern bool_t xdr_gf1_cli_friends_list (XDR *, gf1_cli_friends_list*);
-extern bool_t xdr_gf1_cli_get_volume (XDR *, gf1_cli_get_volume*);
-extern bool_t xdr_gf1_cli_sync_volume (XDR *, gf1_cli_sync_volume*);
-extern bool_t xdr_gf1_cli_op_flags (XDR *, gf1_cli_op_flags*);
-extern bool_t xdr_gf1_cli_gsync_set (XDR *, gf1_cli_gsync_set*);
-extern bool_t xdr_gf1_cli_stats_op (XDR *, gf1_cli_stats_op*);
-extern bool_t xdr_gf1_cli_top_op (XDR *, gf1_cli_top_op*);
-extern bool_t xdr_gf_cli_status_type (XDR *, gf_cli_status_type*);
-extern bool_t xdr_gf_cli_req (XDR *, gf_cli_req*);
-extern bool_t xdr_gf_cli_rsp (XDR *, gf_cli_rsp*);
-extern bool_t xdr_gf1_cli_probe_req (XDR *, gf1_cli_probe_req*);
-extern bool_t xdr_gf1_cli_probe_rsp (XDR *, gf1_cli_probe_rsp*);
-extern bool_t xdr_gf1_cli_deprobe_req (XDR *, gf1_cli_deprobe_req*);
-extern bool_t xdr_gf1_cli_deprobe_rsp (XDR *, gf1_cli_deprobe_rsp*);
-extern bool_t xdr_gf1_cli_peer_list_req (XDR *, gf1_cli_peer_list_req*);
-extern bool_t xdr_gf1_cli_peer_list_rsp (XDR *, gf1_cli_peer_list_rsp*);
-extern bool_t xdr_gf1_cli_fsm_log_req (XDR *, gf1_cli_fsm_log_req*);
-extern bool_t xdr_gf1_cli_fsm_log_rsp (XDR *, gf1_cli_fsm_log_rsp*);
-extern bool_t xdr_gf1_cli_getwd_req (XDR *, gf1_cli_getwd_req*);
-extern bool_t xdr_gf1_cli_getwd_rsp (XDR *, gf1_cli_getwd_rsp*);
-extern bool_t xdr_gf1_cli_mount_req (XDR *, gf1_cli_mount_req*);
-extern bool_t xdr_gf1_cli_mount_rsp (XDR *, gf1_cli_mount_rsp*);
-extern bool_t xdr_gf1_cli_umount_req (XDR *, gf1_cli_umount_req*);
-extern bool_t xdr_gf1_cli_umount_rsp (XDR *, gf1_cli_umount_rsp*);
-
-#else /* K&R C */
-extern bool_t xdr_gf_cli_defrag_type ();
-extern bool_t xdr_gf_defrag_status_t ();
-extern bool_t xdr_gf1_cluster_type ();
-extern bool_t xdr_gf1_cli_replace_op ();
-extern bool_t xdr_gf1_op_commands ();
-extern bool_t xdr_gf_quota_type ();
-extern bool_t xdr_gf1_cli_friends_list ();
-extern bool_t xdr_gf1_cli_get_volume ();
-extern bool_t xdr_gf1_cli_sync_volume ();
-extern bool_t xdr_gf1_cli_op_flags ();
-extern bool_t xdr_gf1_cli_gsync_set ();
-extern bool_t xdr_gf1_cli_stats_op ();
-extern bool_t xdr_gf1_cli_top_op ();
-extern bool_t xdr_gf_cli_status_type ();
-extern bool_t xdr_gf_cli_req ();
-extern bool_t xdr_gf_cli_rsp ();
-extern bool_t xdr_gf1_cli_probe_req ();
-extern bool_t xdr_gf1_cli_probe_rsp ();
-extern bool_t xdr_gf1_cli_deprobe_req ();
-extern bool_t xdr_gf1_cli_deprobe_rsp ();
-extern bool_t xdr_gf1_cli_peer_list_req ();
-extern bool_t xdr_gf1_cli_peer_list_rsp ();
-extern bool_t xdr_gf1_cli_fsm_log_req ();
-extern bool_t xdr_gf1_cli_fsm_log_rsp ();
-extern bool_t xdr_gf1_cli_getwd_req ();
-extern bool_t xdr_gf1_cli_getwd_rsp ();
-extern bool_t xdr_gf1_cli_mount_req ();
-extern bool_t xdr_gf1_cli_mount_rsp ();
-extern bool_t xdr_gf1_cli_umount_req ();
-extern bool_t xdr_gf1_cli_umount_rsp ();
-
-#endif /* K&R C */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* !_CLI1_XDR_H_RPCGEN */
diff --git a/rpc/xdr/src/cli1-xdr.x b/rpc/xdr/src/cli1-xdr.x
index cb22080cce3..777cb0046a2 100644
--- a/rpc/xdr/src/cli1-xdr.x
+++ b/rpc/xdr/src/cli1-xdr.x
@@ -1,35 +1,76 @@
+/*
+ * Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+#ifdef RPC_XDR
+%#include "rpc-pragmas.h"
+#endif
+%#include <glusterfs/compat.h>
+
enum gf_cli_defrag_type {
- GF_DEFRAG_CMD_START = 1,
+ GF_DEFRAG_CMD_NONE = 0,
+ GF_DEFRAG_CMD_START,
GF_DEFRAG_CMD_STOP,
GF_DEFRAG_CMD_STATUS,
GF_DEFRAG_CMD_START_LAYOUT_FIX,
- GF_DEFRAG_CMD_START_FORCE /* used by remove-brick data migration */
-} ;
+ GF_DEFRAG_CMD_START_FORCE, /* used by remove-brick data migration */
+ GF_DEFRAG_CMD_START_TIER,
+ GF_DEFRAG_CMD_STATUS_TIER,
+ GF_DEFRAG_CMD_START_DETACH_TIER,
+ GF_DEFRAG_CMD_STOP_DETACH_TIER,
+ GF_DEFRAG_CMD_PAUSE_TIER,
+ GF_DEFRAG_CMD_RESUME_TIER,
+ GF_DEFRAG_CMD_DETACH_STATUS,
+ GF_DEFRAG_CMD_STOP_TIER,
+ GF_DEFRAG_CMD_DETACH_START,
+ GF_DEFRAG_CMD_DETACH_COMMIT,
+ GF_DEFRAG_CMD_DETACH_COMMIT_FORCE,
+ GF_DEFRAG_CMD_DETACH_STOP,
+ GF_DEFRAG_CMD_TYPE_MAX
+};
enum gf_defrag_status_t {
GF_DEFRAG_STATUS_NOT_STARTED,
GF_DEFRAG_STATUS_STARTED,
GF_DEFRAG_STATUS_STOPPED,
GF_DEFRAG_STATUS_COMPLETE,
- GF_DEFRAG_STATUS_FAILED
-} ;
+ GF_DEFRAG_STATUS_FAILED,
+ GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED,
+ GF_DEFRAG_STATUS_LAYOUT_FIX_STOPPED,
+ GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE,
+ GF_DEFRAG_STATUS_LAYOUT_FIX_FAILED,
+ GF_DEFRAG_STATUS_MAX
+};
- enum gf1_cluster_type {
+enum gf1_cluster_type {
GF_CLUSTER_TYPE_NONE = 0,
GF_CLUSTER_TYPE_STRIPE,
GF_CLUSTER_TYPE_REPLICATE,
- GF_CLUSTER_TYPE_STRIPE_REPLICATE
-} ;
-
- enum gf1_cli_replace_op {
- GF_REPLACE_OP_NONE = 0,
- GF_REPLACE_OP_START,
- GF_REPLACE_OP_COMMIT,
- GF_REPLACE_OP_PAUSE,
- GF_REPLACE_OP_ABORT,
- GF_REPLACE_OP_STATUS,
- GF_REPLACE_OP_COMMIT_FORCE
-} ;
+ GF_CLUSTER_TYPE_STRIPE_REPLICATE,
+ GF_CLUSTER_TYPE_DISPERSE,
+ GF_CLUSTER_TYPE_TIER,
+ GF_CLUSTER_TYPE_MAX
+};
+
+enum gf_bitrot_type {
+ GF_BITROT_OPTION_TYPE_NONE = 0,
+ GF_BITROT_OPTION_TYPE_ENABLE,
+ GF_BITROT_OPTION_TYPE_DISABLE,
+ GF_BITROT_OPTION_TYPE_SCRUB_THROTTLE,
+ GF_BITROT_OPTION_TYPE_SCRUB_FREQ,
+ GF_BITROT_OPTION_TYPE_SCRUB,
+ GF_BITROT_OPTION_TYPE_EXPIRY_TIME,
+ GF_BITROT_CMD_SCRUB_STATUS,
+ GF_BITROT_CMD_SCRUB_ONDEMAND,
+ GF_BITROT_OPTION_TYPE_SIGNER_THREADS,
+ GF_BITROT_OPTION_TYPE_MAX
+};
enum gf1_op_commands {
GF_OP_CMD_NONE = 0,
@@ -37,8 +78,12 @@
GF_OP_CMD_COMMIT,
GF_OP_CMD_STOP,
GF_OP_CMD_STATUS,
- GF_OP_CMD_COMMIT_FORCE
-} ;
+ GF_OP_CMD_COMMIT_FORCE,
+ GF_OP_CMD_DETACH_START,
+ GF_OP_CMD_DETACH_COMMIT,
+ GF_OP_CMD_DETACH_COMMIT_FORCE,
+ GF_OP_CMD_STOP_DETACH_TIER
+};
enum gf_quota_type {
GF_QUOTA_OPTION_TYPE_NONE = 0,
@@ -47,22 +92,34 @@ enum gf_quota_type {
GF_QUOTA_OPTION_TYPE_LIMIT_USAGE,
GF_QUOTA_OPTION_TYPE_REMOVE,
GF_QUOTA_OPTION_TYPE_LIST,
- GF_QUOTA_OPTION_TYPE_VERSION
+ GF_QUOTA_OPTION_TYPE_VERSION,
+ GF_QUOTA_OPTION_TYPE_ALERT_TIME,
+ GF_QUOTA_OPTION_TYPE_SOFT_TIMEOUT,
+ GF_QUOTA_OPTION_TYPE_HARD_TIMEOUT,
+ GF_QUOTA_OPTION_TYPE_DEFAULT_SOFT_LIMIT,
+ GF_QUOTA_OPTION_TYPE_VERSION_OBJECTS,
+ GF_QUOTA_OPTION_TYPE_LIMIT_OBJECTS,
+ GF_QUOTA_OPTION_TYPE_LIST_OBJECTS,
+ GF_QUOTA_OPTION_TYPE_REMOVE_OBJECTS,
+ GF_QUOTA_OPTION_TYPE_ENABLE_OBJECTS,
+ GF_QUOTA_OPTION_TYPE_UPGRADE,
+ GF_QUOTA_OPTION_TYPE_MAX
};
enum gf1_cli_friends_list {
- GF_CLI_LIST_ALL = 1
-} ;
+ GF_CLI_LIST_PEERS = 1,
+ GF_CLI_LIST_POOL_NODES = 2
+};
enum gf1_cli_get_volume {
GF_CLI_GET_VOLUME_ALL = 1,
GF_CLI_GET_VOLUME,
GF_CLI_GET_NEXT_VOLUME
-} ;
+};
enum gf1_cli_sync_volume {
GF_CLI_SYNC_ALL = 1
-} ;
+};
enum gf1_cli_op_flags {
GF_CLI_FLAG_OP_FORCE = 1
@@ -74,7 +131,11 @@ enum gf1_cli_gsync_set {
GF_GSYNC_OPTION_TYPE_STOP,
GF_GSYNC_OPTION_TYPE_CONFIG,
GF_GSYNC_OPTION_TYPE_STATUS,
- GF_GSYNC_OPTION_TYPE_ROTATE
+ GF_GSYNC_OPTION_TYPE_ROTATE,
+ GF_GSYNC_OPTION_TYPE_CREATE,
+ GF_GSYNC_OPTION_TYPE_DELETE,
+ GF_GSYNC_OPTION_TYPE_PAUSE,
+ GF_GSYNC_OPTION_TYPE_RESUME
};
enum gf1_cli_stats_op {
@@ -85,6 +146,19 @@ enum gf1_cli_stats_op {
GF_CLI_STATS_TOP = 4
};
+enum gf1_cli_info_op {
+ GF_CLI_INFO_NONE = 0,
+ GF_CLI_INFO_ALL = 1,
+ GF_CLI_INFO_INCREMENTAL = 2,
+ GF_CLI_INFO_CUMULATIVE = 3,
+ GF_CLI_INFO_CLEAR = 4
+};
+
+enum gf_cli_get_state_op {
+ GF_CLI_GET_STATE_DETAIL = 1,
+ GF_CLI_GET_STATE_VOLOPTS = 2
+};
+
enum gf1_cli_top_op {
GF_CLI_TOP_NONE = 0,
GF_CLI_TOP_OPEN,
@@ -99,68 +173,95 @@ enum gf1_cli_top_op {
/* The unconventional hex numbers help us perform
bit-wise operations which reduces complexity */
enum gf_cli_status_type {
- GF_CLI_STATUS_NONE = 0x0000,
- GF_CLI_STATUS_MEM = 0x0001, /*0000000000001*/
- GF_CLI_STATUS_CLIENTS = 0x0002, /*0000000000010*/
- GF_CLI_STATUS_INODE = 0x0004, /*0000000000100*/
- GF_CLI_STATUS_FD = 0x0008, /*0000000001000*/
- GF_CLI_STATUS_CALLPOOL = 0x0010, /*0000000010000*/
- GF_CLI_STATUS_DETAIL = 0x0020, /*0000000100000*/
- GF_CLI_STATUS_MASK = 0x00FF, /*0000011111111 Used to get the op*/
- GF_CLI_STATUS_VOL = 0x0100, /*0000100000000*/
- GF_CLI_STATUS_ALL = 0x0200, /*0001000000000*/
- GF_CLI_STATUS_BRICK = 0x0400, /*0010000000000*/
- GF_CLI_STATUS_NFS = 0x0800, /*0100000000000*/
- GF_CLI_STATUS_SHD = 0x1000 /*1000000000000*/
-};
-
- struct gf_cli_req {
- opaque dict<>;
-} ;
+ GF_CLI_STATUS_NONE = 0x000000,
+ GF_CLI_STATUS_MEM = 0x000001, /*000000000000001*/
+ GF_CLI_STATUS_CLIENTS = 0x000002, /*000000000000010*/
+ GF_CLI_STATUS_INODE = 0x000004, /*000000000000100*/
+ GF_CLI_STATUS_FD = 0x000008, /*000000000001000*/
+ GF_CLI_STATUS_CALLPOOL = 0x000010, /*000000000010000*/
+ GF_CLI_STATUS_DETAIL = 0x000020, /*000000000100000*/
+ GF_CLI_STATUS_TASKS = 0x000040, /*00000001000000*/
+ GF_CLI_STATUS_CLIENT_LIST = 0x000080, /*00000010000000*/
+ GF_CLI_STATUS_MASK = 0x0000FF, /*000000011111111 Used to get the op*/
+ GF_CLI_STATUS_VOL = 0x000100, /*00000000100000000*/
+ GF_CLI_STATUS_ALL = 0x000200, /*00000001000000000*/
+ GF_CLI_STATUS_BRICK = 0x000400, /*00000010000000000*/
+ GF_CLI_STATUS_NFS = 0x000800, /*00000100000000000*/
+ GF_CLI_STATUS_SHD = 0x001000, /*00001000000000000*/
+ GF_CLI_STATUS_QUOTAD = 0x002000, /*00010000000000000*/
+ GF_CLI_STATUS_SNAPD = 0x004000, /*00100000000000000*/
+ GF_CLI_STATUS_BITD = 0x008000, /*01000000000000000*/
+ GF_CLI_STATUS_SCRUB = 0x010000, /*10000000000000000*/
+ GF_CLI_STATUS_TIERD = 0x020000 /*100000000000000000*/
+};
- struct gf_cli_rsp {
- int op_ret;
- int op_errno;
- string op_errstr<>;
- opaque dict<>;
-} ;
+/* Identifiers for snapshot clis */
+enum gf1_cli_snapshot {
+ GF_SNAP_OPTION_TYPE_NONE = 0,
+ GF_SNAP_OPTION_TYPE_CREATE,
+ GF_SNAP_OPTION_TYPE_DELETE,
+ GF_SNAP_OPTION_TYPE_RESTORE,
+ GF_SNAP_OPTION_TYPE_ACTIVATE,
+ GF_SNAP_OPTION_TYPE_DEACTIVATE,
+ GF_SNAP_OPTION_TYPE_LIST,
+ GF_SNAP_OPTION_TYPE_STATUS,
+ GF_SNAP_OPTION_TYPE_CONFIG,
+ GF_SNAP_OPTION_TYPE_CLONE,
+ GF_SNAP_OPTION_TYPE_INFO
+};
- struct gf1_cli_probe_req {
- string hostname<>;
- int port;
-} ;
+enum gf1_cli_snapshot_info {
+ GF_SNAP_INFO_TYPE_ALL = 0,
+ GF_SNAP_INFO_TYPE_SNAP,
+ GF_SNAP_INFO_TYPE_VOL
+};
- struct gf1_cli_probe_rsp {
- int op_ret;
- int op_errno;
- int port;
- string hostname<>;
- string op_errstr<>;
-} ;
+enum gf1_cli_snapshot_config {
+ GF_SNAP_CONFIG_TYPE_NONE = 0,
+ GF_SNAP_CONFIG_TYPE_SET,
+ GF_SNAP_CONFIG_DISPLAY
+};
- struct gf1_cli_deprobe_req {
- string hostname<>;
- int port;
- int flags;
-} ;
+enum gf1_cli_snapshot_status {
+ GF_SNAP_STATUS_TYPE_ALL = 0,
+ GF_SNAP_STATUS_TYPE_SNAP,
+ GF_SNAP_STATUS_TYPE_VOL,
+ GF_SNAP_STATUS_TYPE_ITER
+};
+
+/* Changing order of GF_SNAP_DELETE_TYPE_VOL *
+ * and GF_SNAP_DELETE_TYPE_SNAP so that they don't *
+ * overlap with the enums of GF_SNAP_STATUS_TYPE_SNAP, *
+ * and GF_SNAP_STATUS_TYPE_VOL *
+ */
+enum gf1_cli_snapshot_delete {
+ GF_SNAP_DELETE_TYPE_ALL = 0,
+ GF_SNAP_DELETE_TYPE_VOL = 1,
+ GF_SNAP_DELETE_TYPE_SNAP = 2,
+ GF_SNAP_DELETE_TYPE_ITER = 3
+};
- struct gf1_cli_deprobe_rsp {
+struct gf_cli_req {
+ opaque dict<>;
+};
+
+ struct gf_cli_rsp {
int op_ret;
int op_errno;
- string hostname<>;
string op_errstr<>;
-} ;
+ opaque dict<>;
+};
struct gf1_cli_peer_list_req {
int flags;
opaque dict<>;
-} ;
+};
struct gf1_cli_peer_list_rsp {
int op_ret;
int op_errno;
opaque friends<>;
-} ;
+};
struct gf1_cli_fsm_log_req {
string name<>;
@@ -175,7 +276,7 @@ struct gf1_cli_fsm_log_rsp {
struct gf1_cli_getwd_req {
int unused;
-} ;
+};
struct gf1_cli_getwd_rsp {
int op_ret;
diff --git a/rpc/xdr/src/glusterd1-xdr.c b/rpc/xdr/src/glusterd1-xdr.c
deleted file mode 100644
index 28ab49b6f73..00000000000
--- a/rpc/xdr/src/glusterd1-xdr.c
+++ /dev/null
@@ -1,502 +0,0 @@
-/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include "xdr-common.h"
-#include "compat.h"
-
-#if defined(__GNUC__)
-#if __GNUC__ >= 4
-#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
-#endif
-#endif
-
-/*
- * Please do not edit this file.
- * It was generated using rpcgen.
- */
-
-#include "glusterd1-xdr.h"
-
-bool_t
-xdr_glusterd_volume_status (XDR *xdrs, glusterd_volume_status *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_enum (xdrs, (enum_t *) objp))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gd1_mgmt_probe_req (XDR *xdrs, gd1_mgmt_probe_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
- sizeof (u_char), (xdrproc_t) xdr_u_char))
- return FALSE;
- if (!xdr_string (xdrs, &objp->hostname, ~0))
- return FALSE;
- if (!xdr_int (xdrs, &objp->port))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gd1_mgmt_probe_rsp (XDR *xdrs, gd1_mgmt_probe_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
-
- if (xdrs->x_op == XDR_ENCODE) {
- if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
- sizeof (u_char), (xdrproc_t) xdr_u_char))
- return FALSE;
- if (!xdr_string (xdrs, &objp->hostname, ~0))
- return FALSE;
- buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_int (xdrs, &objp->port))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
-
- } else {
- IXDR_PUT_LONG(buf, objp->port);
- IXDR_PUT_LONG(buf, objp->op_ret);
- IXDR_PUT_LONG(buf, objp->op_errno);
- }
- if (!xdr_string (xdrs, &objp->op_errstr, ~0))
- return FALSE;
- return TRUE;
- } else if (xdrs->x_op == XDR_DECODE) {
- if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
- sizeof (u_char), (xdrproc_t) xdr_u_char))
- return FALSE;
- if (!xdr_string (xdrs, &objp->hostname, ~0))
- return FALSE;
- buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_int (xdrs, &objp->port))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
-
- } else {
- objp->port = IXDR_GET_LONG(buf);
- objp->op_ret = IXDR_GET_LONG(buf);
- objp->op_errno = IXDR_GET_LONG(buf);
- }
- if (!xdr_string (xdrs, &objp->op_errstr, ~0))
- return FALSE;
- return TRUE;
- }
-
- if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
- sizeof (u_char), (xdrproc_t) xdr_u_char))
- return FALSE;
- if (!xdr_string (xdrs, &objp->hostname, ~0))
- return FALSE;
- if (!xdr_int (xdrs, &objp->port))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_string (xdrs, &objp->op_errstr, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gd1_mgmt_friend_req (XDR *xdrs, gd1_mgmt_friend_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
- sizeof (u_char), (xdrproc_t) xdr_u_char))
- return FALSE;
- if (!xdr_string (xdrs, &objp->hostname, ~0))
- return FALSE;
- if (!xdr_int (xdrs, &objp->port))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->vols.vols_val, (u_int *) &objp->vols.vols_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gd1_mgmt_friend_rsp (XDR *xdrs, gd1_mgmt_friend_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
- sizeof (u_char), (xdrproc_t) xdr_u_char))
- return FALSE;
- if (!xdr_string (xdrs, &objp->hostname, ~0))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_int (xdrs, &objp->port))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gd1_mgmt_unfriend_req (XDR *xdrs, gd1_mgmt_unfriend_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
- sizeof (u_char), (xdrproc_t) xdr_u_char))
- return FALSE;
- if (!xdr_string (xdrs, &objp->hostname, ~0))
- return FALSE;
- if (!xdr_int (xdrs, &objp->port))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gd1_mgmt_unfriend_rsp (XDR *xdrs, gd1_mgmt_unfriend_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
- sizeof (u_char), (xdrproc_t) xdr_u_char))
- return FALSE;
- if (!xdr_string (xdrs, &objp->hostname, ~0))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_int (xdrs, &objp->port))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gd1_mgmt_cluster_lock_req (XDR *xdrs, gd1_mgmt_cluster_lock_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
- sizeof (u_char), (xdrproc_t) xdr_u_char))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gd1_mgmt_cluster_lock_rsp (XDR *xdrs, gd1_mgmt_cluster_lock_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
- sizeof (u_char), (xdrproc_t) xdr_u_char))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gd1_mgmt_cluster_unlock_req (XDR *xdrs, gd1_mgmt_cluster_unlock_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
- sizeof (u_char), (xdrproc_t) xdr_u_char))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gd1_mgmt_cluster_unlock_rsp (XDR *xdrs, gd1_mgmt_cluster_unlock_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
- sizeof (u_char), (xdrproc_t) xdr_u_char))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gd1_mgmt_stage_op_req (XDR *xdrs, gd1_mgmt_stage_op_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
- sizeof (u_char), (xdrproc_t) xdr_u_char))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->buf.buf_val, (u_int *) &objp->buf.buf_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gd1_mgmt_stage_op_rsp (XDR *xdrs, gd1_mgmt_stage_op_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
-
- if (xdrs->x_op == XDR_ENCODE) {
- if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
- sizeof (u_char), (xdrproc_t) xdr_u_char))
- return FALSE;
- buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_int (xdrs, &objp->op))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
-
- } else {
- IXDR_PUT_LONG(buf, objp->op);
- IXDR_PUT_LONG(buf, objp->op_ret);
- IXDR_PUT_LONG(buf, objp->op_errno);
- }
- if (!xdr_string (xdrs, &objp->op_errstr, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- return TRUE;
- } else if (xdrs->x_op == XDR_DECODE) {
- if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
- sizeof (u_char), (xdrproc_t) xdr_u_char))
- return FALSE;
- buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_int (xdrs, &objp->op))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
-
- } else {
- objp->op = IXDR_GET_LONG(buf);
- objp->op_ret = IXDR_GET_LONG(buf);
- objp->op_errno = IXDR_GET_LONG(buf);
- }
- if (!xdr_string (xdrs, &objp->op_errstr, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- return TRUE;
- }
-
- if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
- sizeof (u_char), (xdrproc_t) xdr_u_char))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_string (xdrs, &objp->op_errstr, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gd1_mgmt_commit_op_req (XDR *xdrs, gd1_mgmt_commit_op_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
- sizeof (u_char), (xdrproc_t) xdr_u_char))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->buf.buf_val, (u_int *) &objp->buf.buf_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gd1_mgmt_commit_op_rsp (XDR *xdrs, gd1_mgmt_commit_op_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
-
- if (xdrs->x_op == XDR_ENCODE) {
- if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
- sizeof (u_char), (xdrproc_t) xdr_u_char))
- return FALSE;
- buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_int (xdrs, &objp->op))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
-
- } else {
- IXDR_PUT_LONG(buf, objp->op);
- IXDR_PUT_LONG(buf, objp->op_ret);
- IXDR_PUT_LONG(buf, objp->op_errno);
- }
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- if (!xdr_string (xdrs, &objp->op_errstr, ~0))
- return FALSE;
- return TRUE;
- } else if (xdrs->x_op == XDR_DECODE) {
- if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
- sizeof (u_char), (xdrproc_t) xdr_u_char))
- return FALSE;
- buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_int (xdrs, &objp->op))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
-
- } else {
- objp->op = IXDR_GET_LONG(buf);
- objp->op_ret = IXDR_GET_LONG(buf);
- objp->op_errno = IXDR_GET_LONG(buf);
- }
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- if (!xdr_string (xdrs, &objp->op_errstr, ~0))
- return FALSE;
- return TRUE;
- }
-
- if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
- sizeof (u_char), (xdrproc_t) xdr_u_char))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- if (!xdr_string (xdrs, &objp->op_errstr, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gd1_mgmt_friend_update (XDR *xdrs, gd1_mgmt_friend_update *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
- sizeof (u_char), (xdrproc_t) xdr_u_char))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->friends.friends_val, (u_int *) &objp->friends.friends_len, ~0))
- return FALSE;
- if (!xdr_int (xdrs, &objp->port))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gd1_mgmt_friend_update_rsp (XDR *xdrs, gd1_mgmt_friend_update_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
- sizeof (u_char), (xdrproc_t) xdr_u_char))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gd1_mgmt_brick_op_req (XDR *xdrs, gd1_mgmt_brick_op_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_string (xdrs, &objp->name, ~0))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->input.input_val, (u_int *) &objp->input.input_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gd1_mgmt_brick_op_rsp (XDR *xdrs, gd1_mgmt_brick_op_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->output.output_val, (u_int *) &objp->output.output_len, ~0))
- return FALSE;
- if (!xdr_string (xdrs, &objp->op_errstr, ~0))
- return FALSE;
- return TRUE;
-}
diff --git a/rpc/xdr/src/glusterd1-xdr.h b/rpc/xdr/src/glusterd1-xdr.h
deleted file mode 100644
index 89de04ebef2..00000000000
--- a/rpc/xdr/src/glusterd1-xdr.h
+++ /dev/null
@@ -1,264 +0,0 @@
-/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include "xdr-common.h"
-#include "compat.h"
-
-#if defined(__GNUC__)
-#if __GNUC__ >= 4
-#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
-#endif
-#endif
-
-/*
- * Please do not edit this file.
- * It was generated using rpcgen.
- */
-
-#ifndef _GLUSTERD1_XDR_H_RPCGEN
-#define _GLUSTERD1_XDR_H_RPCGEN
-
-#include <rpc/rpc.h>
-
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-enum glusterd_volume_status {
- GLUSTERD_STATUS_NONE = 0,
- GLUSTERD_STATUS_STARTED = 0 + 1,
- GLUSTERD_STATUS_STOPPED = 0 + 2,
-};
-typedef enum glusterd_volume_status glusterd_volume_status;
-
-struct gd1_mgmt_probe_req {
- u_char uuid[16];
- char *hostname;
- int port;
-};
-typedef struct gd1_mgmt_probe_req gd1_mgmt_probe_req;
-
-struct gd1_mgmt_probe_rsp {
- u_char uuid[16];
- char *hostname;
- int port;
- int op_ret;
- int op_errno;
- char *op_errstr;
-};
-typedef struct gd1_mgmt_probe_rsp gd1_mgmt_probe_rsp;
-
-struct gd1_mgmt_friend_req {
- u_char uuid[16];
- char *hostname;
- int port;
- struct {
- u_int vols_len;
- char *vols_val;
- } vols;
-};
-typedef struct gd1_mgmt_friend_req gd1_mgmt_friend_req;
-
-struct gd1_mgmt_friend_rsp {
- u_char uuid[16];
- char *hostname;
- int op_ret;
- int op_errno;
- int port;
-};
-typedef struct gd1_mgmt_friend_rsp gd1_mgmt_friend_rsp;
-
-struct gd1_mgmt_unfriend_req {
- u_char uuid[16];
- char *hostname;
- int port;
-};
-typedef struct gd1_mgmt_unfriend_req gd1_mgmt_unfriend_req;
-
-struct gd1_mgmt_unfriend_rsp {
- u_char uuid[16];
- char *hostname;
- int op_ret;
- int op_errno;
- int port;
-};
-typedef struct gd1_mgmt_unfriend_rsp gd1_mgmt_unfriend_rsp;
-
-struct gd1_mgmt_cluster_lock_req {
- u_char uuid[16];
-};
-typedef struct gd1_mgmt_cluster_lock_req gd1_mgmt_cluster_lock_req;
-
-struct gd1_mgmt_cluster_lock_rsp {
- u_char uuid[16];
- int op_ret;
- int op_errno;
-};
-typedef struct gd1_mgmt_cluster_lock_rsp gd1_mgmt_cluster_lock_rsp;
-
-struct gd1_mgmt_cluster_unlock_req {
- u_char uuid[16];
-};
-typedef struct gd1_mgmt_cluster_unlock_req gd1_mgmt_cluster_unlock_req;
-
-struct gd1_mgmt_cluster_unlock_rsp {
- u_char uuid[16];
- int op_ret;
- int op_errno;
-};
-typedef struct gd1_mgmt_cluster_unlock_rsp gd1_mgmt_cluster_unlock_rsp;
-
-struct gd1_mgmt_stage_op_req {
- u_char uuid[16];
- int op;
- struct {
- u_int buf_len;
- char *buf_val;
- } buf;
-};
-typedef struct gd1_mgmt_stage_op_req gd1_mgmt_stage_op_req;
-
-struct gd1_mgmt_stage_op_rsp {
- u_char uuid[16];
- int op;
- int op_ret;
- int op_errno;
- char *op_errstr;
- struct {
- u_int dict_len;
- char *dict_val;
- } dict;
-};
-typedef struct gd1_mgmt_stage_op_rsp gd1_mgmt_stage_op_rsp;
-
-struct gd1_mgmt_commit_op_req {
- u_char uuid[16];
- int op;
- struct {
- u_int buf_len;
- char *buf_val;
- } buf;
-};
-typedef struct gd1_mgmt_commit_op_req gd1_mgmt_commit_op_req;
-
-struct gd1_mgmt_commit_op_rsp {
- u_char uuid[16];
- int op;
- int op_ret;
- int op_errno;
- struct {
- u_int dict_len;
- char *dict_val;
- } dict;
- char *op_errstr;
-};
-typedef struct gd1_mgmt_commit_op_rsp gd1_mgmt_commit_op_rsp;
-
-struct gd1_mgmt_friend_update {
- u_char uuid[16];
- struct {
- u_int friends_len;
- char *friends_val;
- } friends;
- int port;
-};
-typedef struct gd1_mgmt_friend_update gd1_mgmt_friend_update;
-
-struct gd1_mgmt_friend_update_rsp {
- u_char uuid[16];
- int op;
- int op_ret;
- int op_errno;
-};
-typedef struct gd1_mgmt_friend_update_rsp gd1_mgmt_friend_update_rsp;
-
-struct gd1_mgmt_brick_op_req {
- char *name;
- int op;
- struct {
- u_int input_len;
- char *input_val;
- } input;
-};
-typedef struct gd1_mgmt_brick_op_req gd1_mgmt_brick_op_req;
-
-struct gd1_mgmt_brick_op_rsp {
- int op_ret;
- int op_errno;
- struct {
- u_int output_len;
- char *output_val;
- } output;
- char *op_errstr;
-};
-typedef struct gd1_mgmt_brick_op_rsp gd1_mgmt_brick_op_rsp;
-
-/* the xdr functions */
-
-#if defined(__STDC__) || defined(__cplusplus)
-extern bool_t xdr_glusterd_volume_status (XDR *, glusterd_volume_status*);
-extern bool_t xdr_gd1_mgmt_probe_req (XDR *, gd1_mgmt_probe_req*);
-extern bool_t xdr_gd1_mgmt_probe_rsp (XDR *, gd1_mgmt_probe_rsp*);
-extern bool_t xdr_gd1_mgmt_friend_req (XDR *, gd1_mgmt_friend_req*);
-extern bool_t xdr_gd1_mgmt_friend_rsp (XDR *, gd1_mgmt_friend_rsp*);
-extern bool_t xdr_gd1_mgmt_unfriend_req (XDR *, gd1_mgmt_unfriend_req*);
-extern bool_t xdr_gd1_mgmt_unfriend_rsp (XDR *, gd1_mgmt_unfriend_rsp*);
-extern bool_t xdr_gd1_mgmt_cluster_lock_req (XDR *, gd1_mgmt_cluster_lock_req*);
-extern bool_t xdr_gd1_mgmt_cluster_lock_rsp (XDR *, gd1_mgmt_cluster_lock_rsp*);
-extern bool_t xdr_gd1_mgmt_cluster_unlock_req (XDR *, gd1_mgmt_cluster_unlock_req*);
-extern bool_t xdr_gd1_mgmt_cluster_unlock_rsp (XDR *, gd1_mgmt_cluster_unlock_rsp*);
-extern bool_t xdr_gd1_mgmt_stage_op_req (XDR *, gd1_mgmt_stage_op_req*);
-extern bool_t xdr_gd1_mgmt_stage_op_rsp (XDR *, gd1_mgmt_stage_op_rsp*);
-extern bool_t xdr_gd1_mgmt_commit_op_req (XDR *, gd1_mgmt_commit_op_req*);
-extern bool_t xdr_gd1_mgmt_commit_op_rsp (XDR *, gd1_mgmt_commit_op_rsp*);
-extern bool_t xdr_gd1_mgmt_friend_update (XDR *, gd1_mgmt_friend_update*);
-extern bool_t xdr_gd1_mgmt_friend_update_rsp (XDR *, gd1_mgmt_friend_update_rsp*);
-extern bool_t xdr_gd1_mgmt_brick_op_req (XDR *, gd1_mgmt_brick_op_req*);
-extern bool_t xdr_gd1_mgmt_brick_op_rsp (XDR *, gd1_mgmt_brick_op_rsp*);
-
-#else /* K&R C */
-extern bool_t xdr_glusterd_volume_status ();
-extern bool_t xdr_gd1_mgmt_probe_req ();
-extern bool_t xdr_gd1_mgmt_probe_rsp ();
-extern bool_t xdr_gd1_mgmt_friend_req ();
-extern bool_t xdr_gd1_mgmt_friend_rsp ();
-extern bool_t xdr_gd1_mgmt_unfriend_req ();
-extern bool_t xdr_gd1_mgmt_unfriend_rsp ();
-extern bool_t xdr_gd1_mgmt_cluster_lock_req ();
-extern bool_t xdr_gd1_mgmt_cluster_lock_rsp ();
-extern bool_t xdr_gd1_mgmt_cluster_unlock_req ();
-extern bool_t xdr_gd1_mgmt_cluster_unlock_rsp ();
-extern bool_t xdr_gd1_mgmt_stage_op_req ();
-extern bool_t xdr_gd1_mgmt_stage_op_rsp ();
-extern bool_t xdr_gd1_mgmt_commit_op_req ();
-extern bool_t xdr_gd1_mgmt_commit_op_rsp ();
-extern bool_t xdr_gd1_mgmt_friend_update ();
-extern bool_t xdr_gd1_mgmt_friend_update_rsp ();
-extern bool_t xdr_gd1_mgmt_brick_op_req ();
-extern bool_t xdr_gd1_mgmt_brick_op_rsp ();
-
-#endif /* K&R C */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* !_GLUSTERD1_XDR_H_RPCGEN */
diff --git a/rpc/xdr/src/glusterd1-xdr.x b/rpc/xdr/src/glusterd1-xdr.x
index fc1bb58b4a8..b631dea3502 100644
--- a/rpc/xdr/src/glusterd1-xdr.x
+++ b/rpc/xdr/src/glusterd1-xdr.x
@@ -1,3 +1,18 @@
+/*
+ * Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+#ifdef RPC_XDR
+%#include "rpc-pragmas.h"
+#endif
+%#include <glusterfs/compat.h>
+
enum glusterd_volume_status {
GLUSTERD_STATUS_NONE = 0,
GLUSTERD_STATUS_STARTED,
@@ -117,6 +132,7 @@ struct gd1_mgmt_brick_op_req {
string name<>;
int op;
opaque input<>;
+ opaque dict<>;
} ;
struct gd1_mgmt_brick_op_rsp {
@@ -125,3 +141,109 @@ struct gd1_mgmt_brick_op_rsp {
opaque output<>;
string op_errstr<>;
} ;
+
+struct gd1_mgmt_v3_lock_req {
+ unsigned char uuid[16];
+ unsigned char txn_id[16];
+ int op;
+ opaque dict<>;
+} ;
+
+struct gd1_mgmt_v3_lock_rsp {
+ unsigned char uuid[16];
+ unsigned char txn_id[16];
+ opaque dict<>;
+ int op_ret;
+ int op_errno;
+} ;
+
+struct gd1_mgmt_v3_pre_val_req {
+ unsigned char uuid[16];
+ int op;
+ opaque dict<>;
+} ;
+
+struct gd1_mgmt_v3_pre_val_rsp {
+ unsigned char uuid[16];
+ int op;
+ int op_ret;
+ int op_errno;
+ string op_errstr<>;
+ opaque dict<>;
+} ;
+
+struct gd1_mgmt_v3_brick_op_req {
+ unsigned char uuid[16];
+ int op;
+ opaque dict<>;
+} ;
+
+struct gd1_mgmt_v3_brick_op_rsp {
+ unsigned char uuid[16];
+ int op;
+ int op_ret;
+ int op_errno;
+ string op_errstr<>;
+ opaque dict<>;
+} ;
+
+struct gd1_mgmt_v3_commit_req {
+ unsigned char uuid[16];
+ int op;
+ opaque dict<>;
+} ;
+
+struct gd1_mgmt_v3_commit_rsp {
+ unsigned char uuid[16];
+ int op;
+ int op_ret;
+ int op_errno;
+ opaque dict<>;
+ string op_errstr<>;
+} ;
+
+struct gd1_mgmt_v3_post_commit_req {
+ unsigned char uuid[16];
+ int op;
+ opaque dict<>;
+} ;
+
+struct gd1_mgmt_v3_post_commit_rsp {
+ unsigned char uuid[16];
+ int op;
+ int op_ret;
+ int op_errno;
+ opaque dict<>;
+ string op_errstr<>;
+} ;
+
+struct gd1_mgmt_v3_post_val_req {
+ unsigned char uuid[16];
+ int op;
+ int op_ret;
+ opaque dict<>;
+} ;
+
+struct gd1_mgmt_v3_post_val_rsp {
+ unsigned char uuid[16];
+ int op;
+ int op_ret;
+ int op_errno;
+ string op_errstr<>;
+ opaque dict<>;
+} ;
+
+struct gd1_mgmt_v3_unlock_req {
+ unsigned char uuid[16];
+ unsigned char txn_id[16];
+ int op;
+ opaque dict<>;
+} ;
+
+struct gd1_mgmt_v3_unlock_rsp {
+ unsigned char uuid[16];
+ unsigned char txn_id[16];
+ opaque dict<>;
+ int op_ret;
+ int op_errno;
+} ;
diff --git a/rpc/xdr/src/glusterfs3-xdr.c b/rpc/xdr/src/glusterfs3-xdr.c
deleted file mode 100644
index dd8281ee223..00000000000
--- a/rpc/xdr/src/glusterfs3-xdr.c
+++ /dev/null
@@ -1,1922 +0,0 @@
-/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include "xdr-common.h"
-#include "compat.h"
-
-#if defined(__GNUC__)
-#if __GNUC__ >= 4
-#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
-#endif
-#endif
-
-/*
- * Please do not edit this file.
- * It was generated using rpcgen.
- */
-
-#include "glusterfs3-xdr.h"
-
-bool_t
-xdr_gf_statfs (XDR *xdrs, gf_statfs *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_u_quad_t (xdrs, &objp->bsize))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->frsize))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->blocks))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->bfree))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->bavail))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->files))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ffree))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->favail))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->fsid))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->flag))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->namemax))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf_proto_flock (XDR *xdrs, gf_proto_flock *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_u_int (xdrs, &objp->type))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->whence))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->start))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->len))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->pid))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->lk_owner.lk_owner_val, (u_int *) &objp->lk_owner.lk_owner_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf_iatt (XDR *xdrs, gf_iatt *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
-
- if (xdrs->x_op == XDR_ENCODE) {
- if (!xdr_opaque (xdrs, objp->ia_gfid, 16))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ia_ino))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ia_dev))
- return FALSE;
- buf = XDR_INLINE (xdrs, 4 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_u_int (xdrs, &objp->mode))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_nlink))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_uid))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_gid))
- return FALSE;
-
- } else {
- IXDR_PUT_U_LONG(buf, objp->mode);
- IXDR_PUT_U_LONG(buf, objp->ia_nlink);
- IXDR_PUT_U_LONG(buf, objp->ia_uid);
- IXDR_PUT_U_LONG(buf, objp->ia_gid);
- }
- if (!xdr_u_quad_t (xdrs, &objp->ia_rdev))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ia_size))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_blksize))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ia_blocks))
- return FALSE;
- buf = XDR_INLINE (xdrs, 6 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_u_int (xdrs, &objp->ia_atime))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_atime_nsec))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_mtime))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_mtime_nsec))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_ctime))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_ctime_nsec))
- return FALSE;
- } else {
- IXDR_PUT_U_LONG(buf, objp->ia_atime);
- IXDR_PUT_U_LONG(buf, objp->ia_atime_nsec);
- IXDR_PUT_U_LONG(buf, objp->ia_mtime);
- IXDR_PUT_U_LONG(buf, objp->ia_mtime_nsec);
- IXDR_PUT_U_LONG(buf, objp->ia_ctime);
- IXDR_PUT_U_LONG(buf, objp->ia_ctime_nsec);
- }
- return TRUE;
- } else if (xdrs->x_op == XDR_DECODE) {
- if (!xdr_opaque (xdrs, objp->ia_gfid, 16))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ia_ino))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ia_dev))
- return FALSE;
- buf = XDR_INLINE (xdrs, 4 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_u_int (xdrs, &objp->mode))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_nlink))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_uid))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_gid))
- return FALSE;
-
- } else {
- objp->mode = IXDR_GET_U_LONG(buf);
- objp->ia_nlink = IXDR_GET_U_LONG(buf);
- objp->ia_uid = IXDR_GET_U_LONG(buf);
- objp->ia_gid = IXDR_GET_U_LONG(buf);
- }
- if (!xdr_u_quad_t (xdrs, &objp->ia_rdev))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ia_size))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_blksize))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ia_blocks))
- return FALSE;
- buf = XDR_INLINE (xdrs, 6 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_u_int (xdrs, &objp->ia_atime))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_atime_nsec))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_mtime))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_mtime_nsec))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_ctime))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_ctime_nsec))
- return FALSE;
- } else {
- objp->ia_atime = IXDR_GET_U_LONG(buf);
- objp->ia_atime_nsec = IXDR_GET_U_LONG(buf);
- objp->ia_mtime = IXDR_GET_U_LONG(buf);
- objp->ia_mtime_nsec = IXDR_GET_U_LONG(buf);
- objp->ia_ctime = IXDR_GET_U_LONG(buf);
- objp->ia_ctime_nsec = IXDR_GET_U_LONG(buf);
- }
- return TRUE;
- }
-
- if (!xdr_opaque (xdrs, objp->ia_gfid, 16))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ia_ino))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ia_dev))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->mode))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_nlink))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_uid))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_gid))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ia_rdev))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ia_size))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_blksize))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ia_blocks))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_atime))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_atime_nsec))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_mtime))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_mtime_nsec))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_ctime))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_ctime_nsec))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_stat_req (XDR *xdrs, gfs3_stat_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_stat_rsp (XDR *xdrs, gfs3_stat_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->stat))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_readlink_req (XDR *xdrs, gfs3_readlink_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->size))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_readlink_rsp (XDR *xdrs, gfs3_readlink_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->buf))
- return FALSE;
- if (!xdr_string (xdrs, &objp->path, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_mknod_req (XDR *xdrs, gfs3_mknod_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->pargfid, 16))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->dev))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->mode))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->umask))
- return FALSE;
- if (!xdr_string (xdrs, &objp->bname, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_mknod_rsp (XDR *xdrs, gfs3_mknod_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->stat))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->preparent))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->postparent))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_mkdir_req (XDR *xdrs, gfs3_mkdir_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->pargfid, 16))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->mode))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->umask))
- return FALSE;
- if (!xdr_string (xdrs, &objp->bname, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_mkdir_rsp (XDR *xdrs, gfs3_mkdir_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->stat))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->preparent))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->postparent))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_unlink_req (XDR *xdrs, gfs3_unlink_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->pargfid, 16))
- return FALSE;
- if (!xdr_string (xdrs, &objp->bname, ~0))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->xflags))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_unlink_rsp (XDR *xdrs, gfs3_unlink_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->preparent))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->postparent))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_rmdir_req (XDR *xdrs, gfs3_rmdir_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->pargfid, 16))
- return FALSE;
- if (!xdr_int (xdrs, &objp->xflags))
- return FALSE;
- if (!xdr_string (xdrs, &objp->bname, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_rmdir_rsp (XDR *xdrs, gfs3_rmdir_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->preparent))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->postparent))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_symlink_req (XDR *xdrs, gfs3_symlink_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->pargfid, 16))
- return FALSE;
- if (!xdr_string (xdrs, &objp->bname, ~0))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->umask))
- return FALSE;
- if (!xdr_string (xdrs, &objp->linkname, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_symlink_rsp (XDR *xdrs, gfs3_symlink_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->stat))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->preparent))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->postparent))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_rename_req (XDR *xdrs, gfs3_rename_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->oldgfid, 16))
- return FALSE;
- if (!xdr_opaque (xdrs, objp->newgfid, 16))
- return FALSE;
- if (!xdr_string (xdrs, &objp->oldbname, ~0))
- return FALSE;
- if (!xdr_string (xdrs, &objp->newbname, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_rename_rsp (XDR *xdrs, gfs3_rename_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->stat))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->preoldparent))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->postoldparent))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->prenewparent))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->postnewparent))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_link_req (XDR *xdrs, gfs3_link_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->oldgfid, 16))
- return FALSE;
- if (!xdr_opaque (xdrs, objp->newgfid, 16))
- return FALSE;
- if (!xdr_string (xdrs, &objp->newbname, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_link_rsp (XDR *xdrs, gfs3_link_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->stat))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->preparent))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->postparent))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_truncate_req (XDR *xdrs, gfs3_truncate_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->offset))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_truncate_rsp (XDR *xdrs, gfs3_truncate_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->prestat))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->poststat))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_open_req (XDR *xdrs, gfs3_open_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->flags))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_open_rsp (XDR *xdrs, gfs3_open_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_read_req (XDR *xdrs, gfs3_read_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->offset))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->size))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->flag))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_read_rsp (XDR *xdrs, gfs3_read_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->stat))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->size))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_lookup_req (XDR *xdrs, gfs3_lookup_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_opaque (xdrs, objp->pargfid, 16))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->flags))
- return FALSE;
- if (!xdr_string (xdrs, &objp->bname, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_lookup_rsp (XDR *xdrs, gfs3_lookup_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->stat))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->postparent))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_write_req (XDR *xdrs, gfs3_write_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->offset))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->size))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->flag))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_write_rsp (XDR *xdrs, gfs3_write_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->prestat))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->poststat))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_statfs_req (XDR *xdrs, gfs3_statfs_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_statfs_rsp (XDR *xdrs, gfs3_statfs_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_statfs (xdrs, &objp->statfs))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_lk_req (XDR *xdrs, gfs3_lk_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->cmd))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->type))
- return FALSE;
- if (!xdr_gf_proto_flock (xdrs, &objp->flock))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_lk_rsp (XDR *xdrs, gfs3_lk_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_proto_flock (xdrs, &objp->flock))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_inodelk_req (XDR *xdrs, gfs3_inodelk_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->cmd))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->type))
- return FALSE;
- if (!xdr_gf_proto_flock (xdrs, &objp->flock))
- return FALSE;
- if (!xdr_string (xdrs, &objp->volume, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_finodelk_req (XDR *xdrs, gfs3_finodelk_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->cmd))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->type))
- return FALSE;
- if (!xdr_gf_proto_flock (xdrs, &objp->flock))
- return FALSE;
- if (!xdr_string (xdrs, &objp->volume, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_flush_req (XDR *xdrs, gfs3_flush_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_fsync_req (XDR *xdrs, gfs3_fsync_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->data))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_fsync_rsp (XDR *xdrs, gfs3_fsync_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->prestat))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->poststat))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_setxattr_req (XDR *xdrs, gfs3_setxattr_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->flags))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_fsetxattr_req (XDR *xdrs, gfs3_fsetxattr_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->flags))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_xattrop_req (XDR *xdrs, gfs3_xattrop_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->flags))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_xattrop_rsp (XDR *xdrs, gfs3_xattrop_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_fxattrop_req (XDR *xdrs, gfs3_fxattrop_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->flags))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_fxattrop_rsp (XDR *xdrs, gfs3_fxattrop_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_getxattr_req (XDR *xdrs, gfs3_getxattr_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->namelen))
- return FALSE;
- if (!xdr_string (xdrs, &objp->name, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_getxattr_rsp (XDR *xdrs, gfs3_getxattr_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_fgetxattr_req (XDR *xdrs, gfs3_fgetxattr_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->namelen))
- return FALSE;
- if (!xdr_string (xdrs, &objp->name, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_fgetxattr_rsp (XDR *xdrs, gfs3_fgetxattr_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_removexattr_req (XDR *xdrs, gfs3_removexattr_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_string (xdrs, &objp->name, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_fremovexattr_req (XDR *xdrs, gfs3_fremovexattr_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_string (xdrs, &objp->name, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_opendir_req (XDR *xdrs, gfs3_opendir_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_opendir_rsp (XDR *xdrs, gfs3_opendir_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_fsyncdir_req (XDR *xdrs, gfs3_fsyncdir_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_int (xdrs, &objp->data))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_readdir_req (XDR *xdrs, gfs3_readdir_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->offset))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->size))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_readdirp_req (XDR *xdrs, gfs3_readdirp_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->offset))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->size))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf_setvolume_req (XDR *xdrs, gf_setvolume_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf_setvolume_rsp (XDR *xdrs, gf_setvolume_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_access_req (XDR *xdrs, gfs3_access_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->mask))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_create_req (XDR *xdrs, gfs3_create_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
-
- if (xdrs->x_op == XDR_ENCODE) {
- if (!xdr_opaque (xdrs, objp->pargfid, 16))
- return FALSE;
- buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_u_int (xdrs, &objp->flags))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->mode))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->umask))
- return FALSE;
-
- } else {
- IXDR_PUT_U_LONG(buf, objp->flags);
- IXDR_PUT_U_LONG(buf, objp->mode);
- IXDR_PUT_U_LONG(buf, objp->umask);
- }
- if (!xdr_string (xdrs, &objp->bname, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
- } else if (xdrs->x_op == XDR_DECODE) {
- if (!xdr_opaque (xdrs, objp->pargfid, 16))
- return FALSE;
- buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_u_int (xdrs, &objp->flags))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->mode))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->umask))
- return FALSE;
-
- } else {
- objp->flags = IXDR_GET_U_LONG(buf);
- objp->mode = IXDR_GET_U_LONG(buf);
- objp->umask = IXDR_GET_U_LONG(buf);
- }
- if (!xdr_string (xdrs, &objp->bname, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
- }
-
- if (!xdr_opaque (xdrs, objp->pargfid, 16))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->flags))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->mode))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->umask))
- return FALSE;
- if (!xdr_string (xdrs, &objp->bname, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_create_rsp (XDR *xdrs, gfs3_create_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->stat))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->preparent))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->postparent))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_ftruncate_req (XDR *xdrs, gfs3_ftruncate_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->offset))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_ftruncate_rsp (XDR *xdrs, gfs3_ftruncate_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->prestat))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->poststat))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_fstat_req (XDR *xdrs, gfs3_fstat_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_fstat_rsp (XDR *xdrs, gfs3_fstat_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->stat))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_entrylk_req (XDR *xdrs, gfs3_entrylk_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->cmd))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->type))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->namelen))
- return FALSE;
- if (!xdr_string (xdrs, &objp->name, ~0))
- return FALSE;
- if (!xdr_string (xdrs, &objp->volume, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_fentrylk_req (XDR *xdrs, gfs3_fentrylk_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->cmd))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->type))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->namelen))
- return FALSE;
- if (!xdr_string (xdrs, &objp->name, ~0))
- return FALSE;
- if (!xdr_string (xdrs, &objp->volume, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_setattr_req (XDR *xdrs, gfs3_setattr_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->stbuf))
- return FALSE;
- if (!xdr_int (xdrs, &objp->valid))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_setattr_rsp (XDR *xdrs, gfs3_setattr_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->statpre))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->statpost))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_fsetattr_req (XDR *xdrs, gfs3_fsetattr_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->stbuf))
- return FALSE;
- if (!xdr_int (xdrs, &objp->valid))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_fsetattr_rsp (XDR *xdrs, gfs3_fsetattr_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->statpre))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->statpost))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_rchecksum_req (XDR *xdrs, gfs3_rchecksum_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->offset))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->len))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_rchecksum_rsp (XDR *xdrs, gfs3_rchecksum_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
-
- if (xdrs->x_op == XDR_ENCODE) {
- buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->weak_checksum))
- return FALSE;
-
- } else {
- IXDR_PUT_LONG(buf, objp->op_ret);
- IXDR_PUT_LONG(buf, objp->op_errno);
- IXDR_PUT_U_LONG(buf, objp->weak_checksum);
- }
- if (!xdr_bytes (xdrs, (char **)&objp->strong_checksum.strong_checksum_val, (u_int *) &objp->strong_checksum.strong_checksum_len, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
- } else if (xdrs->x_op == XDR_DECODE) {
- buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->weak_checksum))
- return FALSE;
-
- } else {
- objp->op_ret = IXDR_GET_LONG(buf);
- objp->op_errno = IXDR_GET_LONG(buf);
- objp->weak_checksum = IXDR_GET_U_LONG(buf);
- }
- if (!xdr_bytes (xdrs, (char **)&objp->strong_checksum.strong_checksum_val, (u_int *) &objp->strong_checksum.strong_checksum_len, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
- }
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->weak_checksum))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->strong_checksum.strong_checksum_val, (u_int *) &objp->strong_checksum.strong_checksum_len, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf_getspec_req (XDR *xdrs, gf_getspec_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_u_int (xdrs, &objp->flags))
- return FALSE;
- if (!xdr_string (xdrs, &objp->key, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf_getspec_rsp (XDR *xdrs, gf_getspec_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_string (xdrs, &objp->spec, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf_log_req (XDR *xdrs, gf_log_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_bytes (xdrs, (char **)&objp->msg.msg_val, (u_int *) &objp->msg.msg_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf_notify_req (XDR *xdrs, gf_notify_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_u_int (xdrs, &objp->flags))
- return FALSE;
- if (!xdr_string (xdrs, &objp->buf, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf_notify_rsp (XDR *xdrs, gf_notify_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
-
- if (xdrs->x_op == XDR_ENCODE) {
- buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->flags))
- return FALSE;
-
- } else {
- IXDR_PUT_LONG(buf, objp->op_ret);
- IXDR_PUT_LONG(buf, objp->op_errno);
- IXDR_PUT_U_LONG(buf, objp->flags);
- }
- if (!xdr_string (xdrs, &objp->buf, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
- } else if (xdrs->x_op == XDR_DECODE) {
- buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->flags))
- return FALSE;
-
- } else {
- objp->op_ret = IXDR_GET_LONG(buf);
- objp->op_errno = IXDR_GET_LONG(buf);
- objp->flags = IXDR_GET_U_LONG(buf);
- }
- if (!xdr_string (xdrs, &objp->buf, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
- }
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->flags))
- return FALSE;
- if (!xdr_string (xdrs, &objp->buf, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_releasedir_req (XDR *xdrs, gfs3_releasedir_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_release_req (XDR *xdrs, gfs3_release_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf_common_rsp (XDR *xdrs, gf_common_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_dirlist (XDR *xdrs, gfs3_dirlist *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_u_quad_t (xdrs, &objp->d_ino))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->d_off))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->d_len))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->d_type))
- return FALSE;
- if (!xdr_string (xdrs, &objp->name, ~0))
- return FALSE;
- if (!xdr_pointer (xdrs, (char **)&objp->nextentry, sizeof (gfs3_dirlist), (xdrproc_t) xdr_gfs3_dirlist))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_readdir_rsp (XDR *xdrs, gfs3_readdir_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_pointer (xdrs, (char **)&objp->reply, sizeof (gfs3_dirlist), (xdrproc_t) xdr_gfs3_dirlist))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_dirplist (XDR *xdrs, gfs3_dirplist *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_u_quad_t (xdrs, &objp->d_ino))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->d_off))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->d_len))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->d_type))
- return FALSE;
- if (!xdr_string (xdrs, &objp->name, ~0))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->stat))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- if (!xdr_pointer (xdrs, (char **)&objp->nextentry, sizeof (gfs3_dirplist), (xdrproc_t) xdr_gfs3_dirplist))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_readdirp_rsp (XDR *xdrs, gfs3_readdirp_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_pointer (xdrs, (char **)&objp->reply, sizeof (gfs3_dirplist), (xdrproc_t) xdr_gfs3_dirplist))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf_set_lk_ver_rsp (XDR *xdrs, gf_set_lk_ver_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_int (xdrs, &objp->lk_ver))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf_set_lk_ver_req (XDR *xdrs, gf_set_lk_ver_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_string (xdrs, &objp->uid, ~0))
- return FALSE;
- if (!xdr_int (xdrs, &objp->lk_ver))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf_event_notify_req (XDR *xdrs, gf_event_notify_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf_event_notify_rsp (XDR *xdrs, gf_event_notify_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- return TRUE;
-}
diff --git a/rpc/xdr/src/glusterfs3-xdr.h b/rpc/xdr/src/glusterfs3-xdr.h
deleted file mode 100644
index ed29deea271..00000000000
--- a/rpc/xdr/src/glusterfs3-xdr.h
+++ /dev/null
@@ -1,1295 +0,0 @@
-/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include "xdr-common.h"
-#include "compat.h"
-
-#if defined(__GNUC__)
-#if __GNUC__ >= 4
-#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
-#endif
-#endif
-
-/*
- * Please do not edit this file.
- * It was generated using rpcgen.
- */
-
-#ifndef _GLUSTERFS3_XDR_H_RPCGEN
-#define _GLUSTERFS3_XDR_H_RPCGEN
-
-#include <rpc/rpc.h>
-
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-struct gf_statfs {
- u_quad_t bsize;
- u_quad_t frsize;
- u_quad_t blocks;
- u_quad_t bfree;
- u_quad_t bavail;
- u_quad_t files;
- u_quad_t ffree;
- u_quad_t favail;
- u_quad_t fsid;
- u_quad_t flag;
- u_quad_t namemax;
-};
-typedef struct gf_statfs gf_statfs;
-
-struct gf_proto_flock {
- u_int type;
- u_int whence;
- u_quad_t start;
- u_quad_t len;
- u_int pid;
- struct {
- u_int lk_owner_len;
- char *lk_owner_val;
- } lk_owner;
-};
-typedef struct gf_proto_flock gf_proto_flock;
-
-struct gf_iatt {
- char ia_gfid[16];
- u_quad_t ia_ino;
- u_quad_t ia_dev;
- u_int mode;
- u_int ia_nlink;
- u_int ia_uid;
- u_int ia_gid;
- u_quad_t ia_rdev;
- u_quad_t ia_size;
- u_int ia_blksize;
- u_quad_t ia_blocks;
- u_int ia_atime;
- u_int ia_atime_nsec;
- u_int ia_mtime;
- u_int ia_mtime_nsec;
- u_int ia_ctime;
- u_int ia_ctime_nsec;
-};
-typedef struct gf_iatt gf_iatt;
-
-struct gfs3_stat_req {
- char gfid[16];
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_stat_req gfs3_stat_req;
-
-struct gfs3_stat_rsp {
- int op_ret;
- int op_errno;
- struct gf_iatt stat;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_stat_rsp gfs3_stat_rsp;
-
-struct gfs3_readlink_req {
- char gfid[16];
- u_int size;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_readlink_req gfs3_readlink_req;
-
-struct gfs3_readlink_rsp {
- int op_ret;
- int op_errno;
- struct gf_iatt buf;
- char *path;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_readlink_rsp gfs3_readlink_rsp;
-
-struct gfs3_mknod_req {
- char pargfid[16];
- u_quad_t dev;
- u_int mode;
- u_int umask;
- char *bname;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_mknod_req gfs3_mknod_req;
-
-struct gfs3_mknod_rsp {
- int op_ret;
- int op_errno;
- struct gf_iatt stat;
- struct gf_iatt preparent;
- struct gf_iatt postparent;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_mknod_rsp gfs3_mknod_rsp;
-
-struct gfs3_mkdir_req {
- char pargfid[16];
- u_int mode;
- u_int umask;
- char *bname;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_mkdir_req gfs3_mkdir_req;
-
-struct gfs3_mkdir_rsp {
- int op_ret;
- int op_errno;
- struct gf_iatt stat;
- struct gf_iatt preparent;
- struct gf_iatt postparent;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_mkdir_rsp gfs3_mkdir_rsp;
-
-struct gfs3_unlink_req {
- char pargfid[16];
- char *bname;
- u_int xflags;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_unlink_req gfs3_unlink_req;
-
-struct gfs3_unlink_rsp {
- int op_ret;
- int op_errno;
- struct gf_iatt preparent;
- struct gf_iatt postparent;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_unlink_rsp gfs3_unlink_rsp;
-
-struct gfs3_rmdir_req {
- char pargfid[16];
- int xflags;
- char *bname;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_rmdir_req gfs3_rmdir_req;
-
-struct gfs3_rmdir_rsp {
- int op_ret;
- int op_errno;
- struct gf_iatt preparent;
- struct gf_iatt postparent;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_rmdir_rsp gfs3_rmdir_rsp;
-
-struct gfs3_symlink_req {
- char pargfid[16];
- char *bname;
- u_int umask;
- char *linkname;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_symlink_req gfs3_symlink_req;
-
-struct gfs3_symlink_rsp {
- int op_ret;
- int op_errno;
- struct gf_iatt stat;
- struct gf_iatt preparent;
- struct gf_iatt postparent;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_symlink_rsp gfs3_symlink_rsp;
-
-struct gfs3_rename_req {
- char oldgfid[16];
- char newgfid[16];
- char *oldbname;
- char *newbname;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_rename_req gfs3_rename_req;
-
-struct gfs3_rename_rsp {
- int op_ret;
- int op_errno;
- struct gf_iatt stat;
- struct gf_iatt preoldparent;
- struct gf_iatt postoldparent;
- struct gf_iatt prenewparent;
- struct gf_iatt postnewparent;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_rename_rsp gfs3_rename_rsp;
-
-struct gfs3_link_req {
- char oldgfid[16];
- char newgfid[16];
- char *newbname;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_link_req gfs3_link_req;
-
-struct gfs3_link_rsp {
- int op_ret;
- int op_errno;
- struct gf_iatt stat;
- struct gf_iatt preparent;
- struct gf_iatt postparent;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_link_rsp gfs3_link_rsp;
-
-struct gfs3_truncate_req {
- char gfid[16];
- u_quad_t offset;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_truncate_req gfs3_truncate_req;
-
-struct gfs3_truncate_rsp {
- int op_ret;
- int op_errno;
- struct gf_iatt prestat;
- struct gf_iatt poststat;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_truncate_rsp gfs3_truncate_rsp;
-
-struct gfs3_open_req {
- char gfid[16];
- u_int flags;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_open_req gfs3_open_req;
-
-struct gfs3_open_rsp {
- int op_ret;
- int op_errno;
- quad_t fd;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_open_rsp gfs3_open_rsp;
-
-struct gfs3_read_req {
- char gfid[16];
- quad_t fd;
- u_quad_t offset;
- u_int size;
- u_int flag;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_read_req gfs3_read_req;
-
-struct gfs3_read_rsp {
- int op_ret;
- int op_errno;
- struct gf_iatt stat;
- u_int size;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_read_rsp gfs3_read_rsp;
-
-struct gfs3_lookup_req {
- char gfid[16];
- char pargfid[16];
- u_int flags;
- char *bname;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_lookup_req gfs3_lookup_req;
-
-struct gfs3_lookup_rsp {
- int op_ret;
- int op_errno;
- struct gf_iatt stat;
- struct gf_iatt postparent;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_lookup_rsp gfs3_lookup_rsp;
-
-struct gfs3_write_req {
- char gfid[16];
- quad_t fd;
- u_quad_t offset;
- u_int size;
- u_int flag;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_write_req gfs3_write_req;
-
-struct gfs3_write_rsp {
- int op_ret;
- int op_errno;
- struct gf_iatt prestat;
- struct gf_iatt poststat;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_write_rsp gfs3_write_rsp;
-
-struct gfs3_statfs_req {
- char gfid[16];
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_statfs_req gfs3_statfs_req;
-
-struct gfs3_statfs_rsp {
- int op_ret;
- int op_errno;
- struct gf_statfs statfs;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_statfs_rsp gfs3_statfs_rsp;
-
-struct gfs3_lk_req {
- char gfid[16];
- quad_t fd;
- u_int cmd;
- u_int type;
- struct gf_proto_flock flock;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_lk_req gfs3_lk_req;
-
-struct gfs3_lk_rsp {
- int op_ret;
- int op_errno;
- struct gf_proto_flock flock;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_lk_rsp gfs3_lk_rsp;
-
-struct gfs3_inodelk_req {
- char gfid[16];
- u_int cmd;
- u_int type;
- struct gf_proto_flock flock;
- char *volume;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_inodelk_req gfs3_inodelk_req;
-
-struct gfs3_finodelk_req {
- char gfid[16];
- quad_t fd;
- u_int cmd;
- u_int type;
- struct gf_proto_flock flock;
- char *volume;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_finodelk_req gfs3_finodelk_req;
-
-struct gfs3_flush_req {
- char gfid[16];
- quad_t fd;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_flush_req gfs3_flush_req;
-
-struct gfs3_fsync_req {
- char gfid[16];
- quad_t fd;
- u_int data;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_fsync_req gfs3_fsync_req;
-
-struct gfs3_fsync_rsp {
- int op_ret;
- int op_errno;
- struct gf_iatt prestat;
- struct gf_iatt poststat;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_fsync_rsp gfs3_fsync_rsp;
-
-struct gfs3_setxattr_req {
- char gfid[16];
- u_int flags;
- struct {
- u_int dict_len;
- char *dict_val;
- } dict;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_setxattr_req gfs3_setxattr_req;
-
-struct gfs3_fsetxattr_req {
- char gfid[16];
- quad_t fd;
- u_int flags;
- struct {
- u_int dict_len;
- char *dict_val;
- } dict;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_fsetxattr_req gfs3_fsetxattr_req;
-
-struct gfs3_xattrop_req {
- char gfid[16];
- u_int flags;
- struct {
- u_int dict_len;
- char *dict_val;
- } dict;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_xattrop_req gfs3_xattrop_req;
-
-struct gfs3_xattrop_rsp {
- int op_ret;
- int op_errno;
- struct {
- u_int dict_len;
- char *dict_val;
- } dict;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_xattrop_rsp gfs3_xattrop_rsp;
-
-struct gfs3_fxattrop_req {
- char gfid[16];
- quad_t fd;
- u_int flags;
- struct {
- u_int dict_len;
- char *dict_val;
- } dict;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_fxattrop_req gfs3_fxattrop_req;
-
-struct gfs3_fxattrop_rsp {
- int op_ret;
- int op_errno;
- struct {
- u_int dict_len;
- char *dict_val;
- } dict;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_fxattrop_rsp gfs3_fxattrop_rsp;
-
-struct gfs3_getxattr_req {
- char gfid[16];
- u_int namelen;
- char *name;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_getxattr_req gfs3_getxattr_req;
-
-struct gfs3_getxattr_rsp {
- int op_ret;
- int op_errno;
- struct {
- u_int dict_len;
- char *dict_val;
- } dict;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_getxattr_rsp gfs3_getxattr_rsp;
-
-struct gfs3_fgetxattr_req {
- char gfid[16];
- quad_t fd;
- u_int namelen;
- char *name;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_fgetxattr_req gfs3_fgetxattr_req;
-
-struct gfs3_fgetxattr_rsp {
- int op_ret;
- int op_errno;
- struct {
- u_int dict_len;
- char *dict_val;
- } dict;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_fgetxattr_rsp gfs3_fgetxattr_rsp;
-
-struct gfs3_removexattr_req {
- char gfid[16];
- char *name;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_removexattr_req gfs3_removexattr_req;
-
-struct gfs3_fremovexattr_req {
- char gfid[16];
- quad_t fd;
- char *name;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_fremovexattr_req gfs3_fremovexattr_req;
-
-struct gfs3_opendir_req {
- char gfid[16];
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_opendir_req gfs3_opendir_req;
-
-struct gfs3_opendir_rsp {
- int op_ret;
- int op_errno;
- quad_t fd;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_opendir_rsp gfs3_opendir_rsp;
-
-struct gfs3_fsyncdir_req {
- char gfid[16];
- quad_t fd;
- int data;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_fsyncdir_req gfs3_fsyncdir_req;
-
-struct gfs3_readdir_req {
- char gfid[16];
- quad_t fd;
- u_quad_t offset;
- u_int size;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_readdir_req gfs3_readdir_req;
-
-struct gfs3_readdirp_req {
- char gfid[16];
- quad_t fd;
- u_quad_t offset;
- u_int size;
- struct {
- u_int dict_len;
- char *dict_val;
- } dict;
-};
-typedef struct gfs3_readdirp_req gfs3_readdirp_req;
-
-struct gf_setvolume_req {
- struct {
- u_int dict_len;
- char *dict_val;
- } dict;
-};
-typedef struct gf_setvolume_req gf_setvolume_req;
-
-struct gf_setvolume_rsp {
- int op_ret;
- int op_errno;
- struct {
- u_int dict_len;
- char *dict_val;
- } dict;
-};
-typedef struct gf_setvolume_rsp gf_setvolume_rsp;
-
-struct gfs3_access_req {
- char gfid[16];
- u_int mask;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_access_req gfs3_access_req;
-
-struct gfs3_create_req {
- char pargfid[16];
- u_int flags;
- u_int mode;
- u_int umask;
- char *bname;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_create_req gfs3_create_req;
-
-struct gfs3_create_rsp {
- int op_ret;
- int op_errno;
- struct gf_iatt stat;
- u_quad_t fd;
- struct gf_iatt preparent;
- struct gf_iatt postparent;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_create_rsp gfs3_create_rsp;
-
-struct gfs3_ftruncate_req {
- char gfid[16];
- quad_t fd;
- u_quad_t offset;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_ftruncate_req gfs3_ftruncate_req;
-
-struct gfs3_ftruncate_rsp {
- int op_ret;
- int op_errno;
- struct gf_iatt prestat;
- struct gf_iatt poststat;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_ftruncate_rsp gfs3_ftruncate_rsp;
-
-struct gfs3_fstat_req {
- char gfid[16];
- quad_t fd;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_fstat_req gfs3_fstat_req;
-
-struct gfs3_fstat_rsp {
- int op_ret;
- int op_errno;
- struct gf_iatt stat;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_fstat_rsp gfs3_fstat_rsp;
-
-struct gfs3_entrylk_req {
- char gfid[16];
- u_int cmd;
- u_int type;
- u_quad_t namelen;
- char *name;
- char *volume;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_entrylk_req gfs3_entrylk_req;
-
-struct gfs3_fentrylk_req {
- char gfid[16];
- quad_t fd;
- u_int cmd;
- u_int type;
- u_quad_t namelen;
- char *name;
- char *volume;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_fentrylk_req gfs3_fentrylk_req;
-
-struct gfs3_setattr_req {
- char gfid[16];
- struct gf_iatt stbuf;
- int valid;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_setattr_req gfs3_setattr_req;
-
-struct gfs3_setattr_rsp {
- int op_ret;
- int op_errno;
- struct gf_iatt statpre;
- struct gf_iatt statpost;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_setattr_rsp gfs3_setattr_rsp;
-
-struct gfs3_fsetattr_req {
- quad_t fd;
- struct gf_iatt stbuf;
- int valid;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_fsetattr_req gfs3_fsetattr_req;
-
-struct gfs3_fsetattr_rsp {
- int op_ret;
- int op_errno;
- struct gf_iatt statpre;
- struct gf_iatt statpost;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_fsetattr_rsp gfs3_fsetattr_rsp;
-
-struct gfs3_rchecksum_req {
- quad_t fd;
- u_quad_t offset;
- u_int len;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_rchecksum_req gfs3_rchecksum_req;
-
-struct gfs3_rchecksum_rsp {
- int op_ret;
- int op_errno;
- u_int weak_checksum;
- struct {
- u_int strong_checksum_len;
- char *strong_checksum_val;
- } strong_checksum;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_rchecksum_rsp gfs3_rchecksum_rsp;
-
-struct gf_getspec_req {
- u_int flags;
- char *key;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gf_getspec_req gf_getspec_req;
-
-struct gf_getspec_rsp {
- int op_ret;
- int op_errno;
- char *spec;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gf_getspec_rsp gf_getspec_rsp;
-
-struct gf_log_req {
- struct {
- u_int msg_len;
- char *msg_val;
- } msg;
-};
-typedef struct gf_log_req gf_log_req;
-
-struct gf_notify_req {
- u_int flags;
- char *buf;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gf_notify_req gf_notify_req;
-
-struct gf_notify_rsp {
- int op_ret;
- int op_errno;
- u_int flags;
- char *buf;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gf_notify_rsp gf_notify_rsp;
-
-struct gfs3_releasedir_req {
- char gfid[16];
- quad_t fd;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_releasedir_req gfs3_releasedir_req;
-
-struct gfs3_release_req {
- char gfid[16];
- quad_t fd;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_release_req gfs3_release_req;
-
-struct gf_common_rsp {
- int op_ret;
- int op_errno;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gf_common_rsp gf_common_rsp;
-
-struct gfs3_dirlist {
- u_quad_t d_ino;
- u_quad_t d_off;
- u_int d_len;
- u_int d_type;
- char *name;
- struct gfs3_dirlist *nextentry;
-};
-typedef struct gfs3_dirlist gfs3_dirlist;
-
-struct gfs3_readdir_rsp {
- int op_ret;
- int op_errno;
- struct gfs3_dirlist *reply;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_readdir_rsp gfs3_readdir_rsp;
-
-struct gfs3_dirplist {
- u_quad_t d_ino;
- u_quad_t d_off;
- u_int d_len;
- u_int d_type;
- char *name;
- struct gf_iatt stat;
- struct {
- u_int dict_len;
- char *dict_val;
- } dict;
- struct gfs3_dirplist *nextentry;
-};
-typedef struct gfs3_dirplist gfs3_dirplist;
-
-struct gfs3_readdirp_rsp {
- int op_ret;
- int op_errno;
- struct gfs3_dirplist *reply;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_readdirp_rsp gfs3_readdirp_rsp;
-
-struct gf_set_lk_ver_rsp {
- int op_ret;
- int op_errno;
- int lk_ver;
-};
-typedef struct gf_set_lk_ver_rsp gf_set_lk_ver_rsp;
-
-struct gf_set_lk_ver_req {
- char *uid;
- int lk_ver;
-};
-typedef struct gf_set_lk_ver_req gf_set_lk_ver_req;
-
-struct gf_event_notify_req {
- int op;
- struct {
- u_int dict_len;
- char *dict_val;
- } dict;
-};
-typedef struct gf_event_notify_req gf_event_notify_req;
-
-struct gf_event_notify_rsp {
- int op_ret;
- int op_errno;
- struct {
- u_int dict_len;
- char *dict_val;
- } dict;
-};
-typedef struct gf_event_notify_rsp gf_event_notify_rsp;
-
-/* the xdr functions */
-
-#if defined(__STDC__) || defined(__cplusplus)
-extern bool_t xdr_gf_statfs (XDR *, gf_statfs*);
-extern bool_t xdr_gf_proto_flock (XDR *, gf_proto_flock*);
-extern bool_t xdr_gf_iatt (XDR *, gf_iatt*);
-extern bool_t xdr_gfs3_stat_req (XDR *, gfs3_stat_req*);
-extern bool_t xdr_gfs3_stat_rsp (XDR *, gfs3_stat_rsp*);
-extern bool_t xdr_gfs3_readlink_req (XDR *, gfs3_readlink_req*);
-extern bool_t xdr_gfs3_readlink_rsp (XDR *, gfs3_readlink_rsp*);
-extern bool_t xdr_gfs3_mknod_req (XDR *, gfs3_mknod_req*);
-extern bool_t xdr_gfs3_mknod_rsp (XDR *, gfs3_mknod_rsp*);
-extern bool_t xdr_gfs3_mkdir_req (XDR *, gfs3_mkdir_req*);
-extern bool_t xdr_gfs3_mkdir_rsp (XDR *, gfs3_mkdir_rsp*);
-extern bool_t xdr_gfs3_unlink_req (XDR *, gfs3_unlink_req*);
-extern bool_t xdr_gfs3_unlink_rsp (XDR *, gfs3_unlink_rsp*);
-extern bool_t xdr_gfs3_rmdir_req (XDR *, gfs3_rmdir_req*);
-extern bool_t xdr_gfs3_rmdir_rsp (XDR *, gfs3_rmdir_rsp*);
-extern bool_t xdr_gfs3_symlink_req (XDR *, gfs3_symlink_req*);
-extern bool_t xdr_gfs3_symlink_rsp (XDR *, gfs3_symlink_rsp*);
-extern bool_t xdr_gfs3_rename_req (XDR *, gfs3_rename_req*);
-extern bool_t xdr_gfs3_rename_rsp (XDR *, gfs3_rename_rsp*);
-extern bool_t xdr_gfs3_link_req (XDR *, gfs3_link_req*);
-extern bool_t xdr_gfs3_link_rsp (XDR *, gfs3_link_rsp*);
-extern bool_t xdr_gfs3_truncate_req (XDR *, gfs3_truncate_req*);
-extern bool_t xdr_gfs3_truncate_rsp (XDR *, gfs3_truncate_rsp*);
-extern bool_t xdr_gfs3_open_req (XDR *, gfs3_open_req*);
-extern bool_t xdr_gfs3_open_rsp (XDR *, gfs3_open_rsp*);
-extern bool_t xdr_gfs3_read_req (XDR *, gfs3_read_req*);
-extern bool_t xdr_gfs3_read_rsp (XDR *, gfs3_read_rsp*);
-extern bool_t xdr_gfs3_lookup_req (XDR *, gfs3_lookup_req*);
-extern bool_t xdr_gfs3_lookup_rsp (XDR *, gfs3_lookup_rsp*);
-extern bool_t xdr_gfs3_write_req (XDR *, gfs3_write_req*);
-extern bool_t xdr_gfs3_write_rsp (XDR *, gfs3_write_rsp*);
-extern bool_t xdr_gfs3_statfs_req (XDR *, gfs3_statfs_req*);
-extern bool_t xdr_gfs3_statfs_rsp (XDR *, gfs3_statfs_rsp*);
-extern bool_t xdr_gfs3_lk_req (XDR *, gfs3_lk_req*);
-extern bool_t xdr_gfs3_lk_rsp (XDR *, gfs3_lk_rsp*);
-extern bool_t xdr_gfs3_inodelk_req (XDR *, gfs3_inodelk_req*);
-extern bool_t xdr_gfs3_finodelk_req (XDR *, gfs3_finodelk_req*);
-extern bool_t xdr_gfs3_flush_req (XDR *, gfs3_flush_req*);
-extern bool_t xdr_gfs3_fsync_req (XDR *, gfs3_fsync_req*);
-extern bool_t xdr_gfs3_fsync_rsp (XDR *, gfs3_fsync_rsp*);
-extern bool_t xdr_gfs3_setxattr_req (XDR *, gfs3_setxattr_req*);
-extern bool_t xdr_gfs3_fsetxattr_req (XDR *, gfs3_fsetxattr_req*);
-extern bool_t xdr_gfs3_xattrop_req (XDR *, gfs3_xattrop_req*);
-extern bool_t xdr_gfs3_xattrop_rsp (XDR *, gfs3_xattrop_rsp*);
-extern bool_t xdr_gfs3_fxattrop_req (XDR *, gfs3_fxattrop_req*);
-extern bool_t xdr_gfs3_fxattrop_rsp (XDR *, gfs3_fxattrop_rsp*);
-extern bool_t xdr_gfs3_getxattr_req (XDR *, gfs3_getxattr_req*);
-extern bool_t xdr_gfs3_getxattr_rsp (XDR *, gfs3_getxattr_rsp*);
-extern bool_t xdr_gfs3_fgetxattr_req (XDR *, gfs3_fgetxattr_req*);
-extern bool_t xdr_gfs3_fgetxattr_rsp (XDR *, gfs3_fgetxattr_rsp*);
-extern bool_t xdr_gfs3_removexattr_req (XDR *, gfs3_removexattr_req*);
-extern bool_t xdr_gfs3_fremovexattr_req (XDR *, gfs3_fremovexattr_req*);
-extern bool_t xdr_gfs3_opendir_req (XDR *, gfs3_opendir_req*);
-extern bool_t xdr_gfs3_opendir_rsp (XDR *, gfs3_opendir_rsp*);
-extern bool_t xdr_gfs3_fsyncdir_req (XDR *, gfs3_fsyncdir_req*);
-extern bool_t xdr_gfs3_readdir_req (XDR *, gfs3_readdir_req*);
-extern bool_t xdr_gfs3_readdirp_req (XDR *, gfs3_readdirp_req*);
-extern bool_t xdr_gf_setvolume_req (XDR *, gf_setvolume_req*);
-extern bool_t xdr_gf_setvolume_rsp (XDR *, gf_setvolume_rsp*);
-extern bool_t xdr_gfs3_access_req (XDR *, gfs3_access_req*);
-extern bool_t xdr_gfs3_create_req (XDR *, gfs3_create_req*);
-extern bool_t xdr_gfs3_create_rsp (XDR *, gfs3_create_rsp*);
-extern bool_t xdr_gfs3_ftruncate_req (XDR *, gfs3_ftruncate_req*);
-extern bool_t xdr_gfs3_ftruncate_rsp (XDR *, gfs3_ftruncate_rsp*);
-extern bool_t xdr_gfs3_fstat_req (XDR *, gfs3_fstat_req*);
-extern bool_t xdr_gfs3_fstat_rsp (XDR *, gfs3_fstat_rsp*);
-extern bool_t xdr_gfs3_entrylk_req (XDR *, gfs3_entrylk_req*);
-extern bool_t xdr_gfs3_fentrylk_req (XDR *, gfs3_fentrylk_req*);
-extern bool_t xdr_gfs3_setattr_req (XDR *, gfs3_setattr_req*);
-extern bool_t xdr_gfs3_setattr_rsp (XDR *, gfs3_setattr_rsp*);
-extern bool_t xdr_gfs3_fsetattr_req (XDR *, gfs3_fsetattr_req*);
-extern bool_t xdr_gfs3_fsetattr_rsp (XDR *, gfs3_fsetattr_rsp*);
-extern bool_t xdr_gfs3_rchecksum_req (XDR *, gfs3_rchecksum_req*);
-extern bool_t xdr_gfs3_rchecksum_rsp (XDR *, gfs3_rchecksum_rsp*);
-extern bool_t xdr_gf_getspec_req (XDR *, gf_getspec_req*);
-extern bool_t xdr_gf_getspec_rsp (XDR *, gf_getspec_rsp*);
-extern bool_t xdr_gf_log_req (XDR *, gf_log_req*);
-extern bool_t xdr_gf_notify_req (XDR *, gf_notify_req*);
-extern bool_t xdr_gf_notify_rsp (XDR *, gf_notify_rsp*);
-extern bool_t xdr_gfs3_releasedir_req (XDR *, gfs3_releasedir_req*);
-extern bool_t xdr_gfs3_release_req (XDR *, gfs3_release_req*);
-extern bool_t xdr_gf_common_rsp (XDR *, gf_common_rsp*);
-extern bool_t xdr_gfs3_dirlist (XDR *, gfs3_dirlist*);
-extern bool_t xdr_gfs3_readdir_rsp (XDR *, gfs3_readdir_rsp*);
-extern bool_t xdr_gfs3_dirplist (XDR *, gfs3_dirplist*);
-extern bool_t xdr_gfs3_readdirp_rsp (XDR *, gfs3_readdirp_rsp*);
-extern bool_t xdr_gf_set_lk_ver_rsp (XDR *, gf_set_lk_ver_rsp*);
-extern bool_t xdr_gf_set_lk_ver_req (XDR *, gf_set_lk_ver_req*);
-extern bool_t xdr_gf_event_notify_req (XDR *, gf_event_notify_req*);
-extern bool_t xdr_gf_event_notify_rsp (XDR *, gf_event_notify_rsp*);
-
-#else /* K&R C */
-extern bool_t xdr_gf_statfs ();
-extern bool_t xdr_gf_proto_flock ();
-extern bool_t xdr_gf_iatt ();
-extern bool_t xdr_gfs3_stat_req ();
-extern bool_t xdr_gfs3_stat_rsp ();
-extern bool_t xdr_gfs3_readlink_req ();
-extern bool_t xdr_gfs3_readlink_rsp ();
-extern bool_t xdr_gfs3_mknod_req ();
-extern bool_t xdr_gfs3_mknod_rsp ();
-extern bool_t xdr_gfs3_mkdir_req ();
-extern bool_t xdr_gfs3_mkdir_rsp ();
-extern bool_t xdr_gfs3_unlink_req ();
-extern bool_t xdr_gfs3_unlink_rsp ();
-extern bool_t xdr_gfs3_rmdir_req ();
-extern bool_t xdr_gfs3_rmdir_rsp ();
-extern bool_t xdr_gfs3_symlink_req ();
-extern bool_t xdr_gfs3_symlink_rsp ();
-extern bool_t xdr_gfs3_rename_req ();
-extern bool_t xdr_gfs3_rename_rsp ();
-extern bool_t xdr_gfs3_link_req ();
-extern bool_t xdr_gfs3_link_rsp ();
-extern bool_t xdr_gfs3_truncate_req ();
-extern bool_t xdr_gfs3_truncate_rsp ();
-extern bool_t xdr_gfs3_open_req ();
-extern bool_t xdr_gfs3_open_rsp ();
-extern bool_t xdr_gfs3_read_req ();
-extern bool_t xdr_gfs3_read_rsp ();
-extern bool_t xdr_gfs3_lookup_req ();
-extern bool_t xdr_gfs3_lookup_rsp ();
-extern bool_t xdr_gfs3_write_req ();
-extern bool_t xdr_gfs3_write_rsp ();
-extern bool_t xdr_gfs3_statfs_req ();
-extern bool_t xdr_gfs3_statfs_rsp ();
-extern bool_t xdr_gfs3_lk_req ();
-extern bool_t xdr_gfs3_lk_rsp ();
-extern bool_t xdr_gfs3_inodelk_req ();
-extern bool_t xdr_gfs3_finodelk_req ();
-extern bool_t xdr_gfs3_flush_req ();
-extern bool_t xdr_gfs3_fsync_req ();
-extern bool_t xdr_gfs3_fsync_rsp ();
-extern bool_t xdr_gfs3_setxattr_req ();
-extern bool_t xdr_gfs3_fsetxattr_req ();
-extern bool_t xdr_gfs3_xattrop_req ();
-extern bool_t xdr_gfs3_xattrop_rsp ();
-extern bool_t xdr_gfs3_fxattrop_req ();
-extern bool_t xdr_gfs3_fxattrop_rsp ();
-extern bool_t xdr_gfs3_getxattr_req ();
-extern bool_t xdr_gfs3_getxattr_rsp ();
-extern bool_t xdr_gfs3_fgetxattr_req ();
-extern bool_t xdr_gfs3_fgetxattr_rsp ();
-extern bool_t xdr_gfs3_removexattr_req ();
-extern bool_t xdr_gfs3_fremovexattr_req ();
-extern bool_t xdr_gfs3_opendir_req ();
-extern bool_t xdr_gfs3_opendir_rsp ();
-extern bool_t xdr_gfs3_fsyncdir_req ();
-extern bool_t xdr_gfs3_readdir_req ();
-extern bool_t xdr_gfs3_readdirp_req ();
-extern bool_t xdr_gf_setvolume_req ();
-extern bool_t xdr_gf_setvolume_rsp ();
-extern bool_t xdr_gfs3_access_req ();
-extern bool_t xdr_gfs3_create_req ();
-extern bool_t xdr_gfs3_create_rsp ();
-extern bool_t xdr_gfs3_ftruncate_req ();
-extern bool_t xdr_gfs3_ftruncate_rsp ();
-extern bool_t xdr_gfs3_fstat_req ();
-extern bool_t xdr_gfs3_fstat_rsp ();
-extern bool_t xdr_gfs3_entrylk_req ();
-extern bool_t xdr_gfs3_fentrylk_req ();
-extern bool_t xdr_gfs3_setattr_req ();
-extern bool_t xdr_gfs3_setattr_rsp ();
-extern bool_t xdr_gfs3_fsetattr_req ();
-extern bool_t xdr_gfs3_fsetattr_rsp ();
-extern bool_t xdr_gfs3_rchecksum_req ();
-extern bool_t xdr_gfs3_rchecksum_rsp ();
-extern bool_t xdr_gf_getspec_req ();
-extern bool_t xdr_gf_getspec_rsp ();
-extern bool_t xdr_gf_log_req ();
-extern bool_t xdr_gf_notify_req ();
-extern bool_t xdr_gf_notify_rsp ();
-extern bool_t xdr_gfs3_releasedir_req ();
-extern bool_t xdr_gfs3_release_req ();
-extern bool_t xdr_gf_common_rsp ();
-extern bool_t xdr_gfs3_dirlist ();
-extern bool_t xdr_gfs3_readdir_rsp ();
-extern bool_t xdr_gfs3_dirplist ();
-extern bool_t xdr_gfs3_readdirp_rsp ();
-extern bool_t xdr_gf_set_lk_ver_rsp ();
-extern bool_t xdr_gf_set_lk_ver_req ();
-extern bool_t xdr_gf_event_notify_req ();
-extern bool_t xdr_gf_event_notify_rsp ();
-
-#endif /* K&R C */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* !_GLUSTERFS3_XDR_H_RPCGEN */
diff --git a/rpc/xdr/src/glusterfs3-xdr.x b/rpc/xdr/src/glusterfs3-xdr.x
index ad261423d2e..1c99099a721 100644
--- a/rpc/xdr/src/glusterfs3-xdr.x
+++ b/rpc/xdr/src/glusterfs3-xdr.x
@@ -1,40 +1,63 @@
+/*
+ * Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+#ifdef RPC_XDR
+%#include "rpc-pragmas.h"
+#endif
+%#include <glusterfs/glusterfs-fops.h>
+%#include "rpc-common-xdr.h"
+
#define GF_REQUEST_MAXGROUPS 16
struct gf_statfs {
- unsigned hyper bsize;
- unsigned hyper frsize;
- unsigned hyper blocks;
- unsigned hyper bfree;
- unsigned hyper bavail;
- unsigned hyper files;
- unsigned hyper ffree;
- unsigned hyper favail;
- unsigned hyper fsid;
- unsigned hyper flag;
- unsigned hyper namemax;
+ u_quad_t bsize;
+ u_quad_t frsize;
+ u_quad_t blocks;
+ u_quad_t bfree;
+ u_quad_t bavail;
+ u_quad_t files;
+ u_quad_t ffree;
+ u_quad_t favail;
+ u_quad_t fsid;
+ u_quad_t flag;
+ u_quad_t namemax;
};
+
struct gf_proto_flock {
unsigned int type;
unsigned int whence;
- unsigned hyper start;
- unsigned hyper len;
+ u_quad_t start;
+ u_quad_t len;
unsigned int pid;
opaque lk_owner<>;
} ;
+struct gf_proto_lease {
+ unsigned int cmd;
+ unsigned int lease_type;
+ opaque lease_id[16];
+ unsigned int lease_flags;
+} ;
struct gf_iatt {
opaque ia_gfid[16];
- unsigned hyper ia_ino; /* inode number */
- unsigned hyper ia_dev; /* backing device ID */
+ u_quad_t ia_ino; /* inode number */
+ u_quad_t ia_dev; /* backing device ID */
unsigned int mode; /* mode (type + protection )*/
unsigned int ia_nlink; /* Link count */
unsigned int ia_uid; /* user ID of owner */
unsigned int ia_gid; /* group ID of owner */
- unsigned hyper ia_rdev; /* device ID (if special file) */
- unsigned hyper ia_size; /* file size in bytes */
+ u_quad_t ia_rdev; /* device ID (if special file) */
+ u_quad_t ia_size; /* file size in bytes */
unsigned int ia_blksize; /* blocksize for filesystem I/O */
- unsigned hyper ia_blocks; /* number of 512B blocks allocated */
+ u_quad_t ia_blocks; /* number of 512B blocks allocated */
unsigned int ia_atime; /* last access time */
unsigned int ia_atime_nsec;
unsigned int ia_mtime; /* last modification time */
@@ -43,6 +66,22 @@ struct gf_iatt {
unsigned int ia_ctime_nsec;
};
+
+struct gfs3_cbk_cache_invalidation_req {
+ string gfid<>;
+ unsigned int event_type; /* Upcall event type */
+ unsigned int flags; /* or mask of events incase of inotify */
+ unsigned int expire_time_attr; /* the amount of time which client
+ * can cache this entry */
+ gf_iatt stat; /* Updated/current stat of the file/dir */
+ gf_iatt parent_stat; /* Updated stat of the parent dir
+ * needed in case of create, mkdir,
+ * unlink, rmdir, rename fops */
+ gf_iatt oldparent_stat; /* Updated stat of the oldparent dir
+ needed in case of rename fop */
+ opaque xdata<>; /* Extra data */
+};
+
struct gfs3_stat_req {
opaque gfid[16];
opaque xdata<>; /* Extra data */
@@ -50,7 +89,7 @@ struct gfs3_stat_req {
struct gfs3_stat_rsp {
int op_ret;
int op_errno;
- struct gf_iatt stat;
+ gf_iatt stat;
opaque xdata<>; /* Extra data */
} ;
@@ -63,7 +102,7 @@ struct gfs3_readlink_req {
struct gfs3_readlink_rsp {
int op_ret;
int op_errno;
- struct gf_iatt buf;
+ gf_iatt buf;
string path<>; /* NULL terminated */
opaque xdata<>; /* Extra data */
} ;
@@ -71,7 +110,7 @@ struct gfs3_readlink_req {
struct gfs3_mknod_req {
opaque pargfid[16];
- unsigned hyper dev;
+ u_quad_t dev;
unsigned int mode;
unsigned int umask;
string bname<>; /* NULL terminated */
@@ -80,9 +119,9 @@ struct gfs3_readlink_req {
struct gfs3_mknod_rsp {
int op_ret;
int op_errno;
- struct gf_iatt stat;
- struct gf_iatt preparent;
- struct gf_iatt postparent;
+ gf_iatt stat;
+ gf_iatt preparent;
+ gf_iatt postparent;
opaque xdata<>; /* Extra data */
};
@@ -97,9 +136,9 @@ struct gfs3_readlink_req {
struct gfs3_mkdir_rsp {
int op_ret;
int op_errno;
- struct gf_iatt stat;
- struct gf_iatt preparent;
- struct gf_iatt postparent;
+ gf_iatt stat;
+ gf_iatt preparent;
+ gf_iatt postparent;
opaque xdata<>; /* Extra data */
} ;
@@ -113,8 +152,8 @@ struct gfs3_readlink_req {
struct gfs3_unlink_rsp {
int op_ret;
int op_errno;
- struct gf_iatt preparent;
- struct gf_iatt postparent;
+ gf_iatt preparent;
+ gf_iatt postparent;
opaque xdata<>; /* Extra data */
};
@@ -128,8 +167,8 @@ struct gfs3_readlink_req {
struct gfs3_rmdir_rsp {
int op_ret;
int op_errno;
- struct gf_iatt preparent;
- struct gf_iatt postparent;
+ gf_iatt preparent;
+ gf_iatt postparent;
opaque xdata<>; /* Extra data */
};
@@ -144,9 +183,9 @@ struct gfs3_readlink_req {
struct gfs3_symlink_rsp {
int op_ret;
int op_errno;
- struct gf_iatt stat;
- struct gf_iatt preparent;
- struct gf_iatt postparent;
+ gf_iatt stat;
+ gf_iatt preparent;
+ gf_iatt postparent;
opaque xdata<>; /* Extra data */
};
@@ -161,11 +200,11 @@ struct gfs3_readlink_req {
struct gfs3_rename_rsp {
int op_ret;
int op_errno;
- struct gf_iatt stat;
- struct gf_iatt preoldparent;
- struct gf_iatt postoldparent;
- struct gf_iatt prenewparent;
- struct gf_iatt postnewparent;
+ gf_iatt stat;
+ gf_iatt preoldparent;
+ gf_iatt postoldparent;
+ gf_iatt prenewparent;
+ gf_iatt postnewparent;
opaque xdata<>; /* Extra data */
};
@@ -179,22 +218,22 @@ struct gfs3_readlink_req {
struct gfs3_link_rsp {
int op_ret;
int op_errno;
- struct gf_iatt stat;
- struct gf_iatt preparent;
- struct gf_iatt postparent;
+ gf_iatt stat;
+ gf_iatt preparent;
+ gf_iatt postparent;
opaque xdata<>; /* Extra data */
};
struct gfs3_truncate_req {
opaque gfid[16];
- unsigned hyper offset;
+ u_quad_t offset;
opaque xdata<>; /* Extra data */
};
struct gfs3_truncate_rsp {
int op_ret;
int op_errno;
- struct gf_iatt prestat;
- struct gf_iatt poststat;
+ gf_iatt prestat;
+ gf_iatt poststat;
opaque xdata<>; /* Extra data */
};
@@ -207,15 +246,15 @@ struct gfs3_readlink_req {
struct gfs3_open_rsp {
int op_ret;
int op_errno;
- hyper fd;
+ quad_t fd;
opaque xdata<>; /* Extra data */
};
struct gfs3_read_req {
opaque gfid[16];
- hyper fd;
- unsigned hyper offset;
+ quad_t fd;
+ u_quad_t offset;
unsigned int size;
unsigned int flag;
opaque xdata<>; /* Extra data */
@@ -223,7 +262,7 @@ struct gfs3_readlink_req {
struct gfs3_read_rsp {
int op_ret;
int op_errno;
- struct gf_iatt stat;
+ gf_iatt stat;
unsigned int size;
opaque xdata<>; /* Extra data */
} ;
@@ -238,8 +277,8 @@ struct gfs3_lookup_req {
struct gfs3_lookup_rsp {
int op_ret;
int op_errno;
- struct gf_iatt stat;
- struct gf_iatt postparent;
+ gf_iatt stat;
+ gf_iatt postparent;
opaque xdata<>; /* Extra data */
} ;
@@ -247,8 +286,8 @@ struct gfs3_lookup_req {
struct gfs3_write_req {
opaque gfid[16];
- hyper fd;
- unsigned hyper offset;
+ quad_t fd;
+ u_quad_t offset;
unsigned int size;
unsigned int flag;
opaque xdata<>; /* Extra data */
@@ -256,8 +295,8 @@ struct gfs3_lookup_req {
struct gfs3_write_rsp {
int op_ret;
int op_errno;
- struct gf_iatt prestat;
- struct gf_iatt poststat;
+ gf_iatt prestat;
+ gf_iatt poststat;
opaque xdata<>; /* Extra data */
} ;
@@ -269,40 +308,60 @@ struct gfs3_lookup_req {
struct gfs3_statfs_rsp {
int op_ret;
int op_errno;
- struct gf_statfs statfs;
+ gf_statfs statfs;
opaque xdata<>; /* Extra data */
} ;
struct gfs3_lk_req {
opaque gfid[16];
- hyper fd;
+ int64_t fd;
unsigned int cmd;
unsigned int type;
- struct gf_proto_flock flock;
+ gf_proto_flock flock;
opaque xdata<>; /* Extra data */
} ;
struct gfs3_lk_rsp {
int op_ret;
int op_errno;
- struct gf_proto_flock flock;
+ gf_proto_flock flock;
opaque xdata<>; /* Extra data */
} ;
+struct gfs3_lease_req {
+ opaque gfid[16];
+ gf_proto_lease lease;
+ opaque xdata<>; /* Extra data */
+} ;
+
+struct gfs3_lease_rsp {
+ int op_ret;
+ int op_errno;
+ gf_proto_lease lease;
+ opaque xdata<>; /* Extra data */
+} ;
+
+struct gfs3_recall_lease_req {
+ opaque gfid[16];
+ unsigned int lease_type;
+ opaque tid[16];
+ opaque xdata<>; /* Extra data */
+} ;
+
struct gfs3_inodelk_req {
opaque gfid[16];
unsigned int cmd;
unsigned int type;
- struct gf_proto_flock flock;
+ gf_proto_flock flock;
string volume<>;
opaque xdata<>; /* Extra data */
} ;
struct gfs3_finodelk_req {
opaque gfid[16];
- hyper fd;
+ quad_t fd;
unsigned int cmd;
unsigned int type;
- struct gf_proto_flock flock;
+ gf_proto_flock flock;
string volume<>;
opaque xdata<>; /* Extra data */
} ;
@@ -310,22 +369,22 @@ struct gfs3_finodelk_req {
struct gfs3_flush_req {
opaque gfid[16];
- hyper fd;
+ quad_t fd;
opaque xdata<>; /* Extra data */
} ;
struct gfs3_fsync_req {
opaque gfid[16];
- hyper fd;
+ quad_t fd;
unsigned int data;
opaque xdata<>; /* Extra data */
} ;
struct gfs3_fsync_rsp {
int op_ret;
int op_errno;
- struct gf_iatt prestat;
- struct gf_iatt poststat;
+ gf_iatt prestat;
+ gf_iatt poststat;
opaque xdata<>; /* Extra data */
} ;
@@ -341,7 +400,7 @@ struct gfs3_finodelk_req {
struct gfs3_fsetxattr_req {
opaque gfid[16];
- hyper fd;
+ int64_t fd;
unsigned int flags;
opaque dict<>;
opaque xdata<>; /* Extra data */
@@ -366,7 +425,7 @@ struct gfs3_finodelk_req {
struct gfs3_fxattrop_req {
opaque gfid[16];
- hyper fd;
+ quad_t fd;
unsigned int flags;
opaque dict<>;
opaque xdata<>; /* Extra data */
@@ -396,7 +455,7 @@ struct gfs3_finodelk_req {
struct gfs3_fgetxattr_req {
opaque gfid[16];
- hyper fd;
+ quad_t fd;
unsigned int namelen;
string name<>;
opaque xdata<>; /* Extra data */
@@ -417,7 +476,7 @@ struct gfs3_finodelk_req {
struct gfs3_fremovexattr_req {
opaque gfid[16];
- hyper fd;
+ quad_t fd;
string name<>;
opaque xdata<>; /* Extra data */
} ;
@@ -431,44 +490,35 @@ struct gfs3_finodelk_req {
struct gfs3_opendir_rsp {
int op_ret;
int op_errno;
- hyper fd;
+ quad_t fd;
opaque xdata<>; /* Extra data */
} ;
struct gfs3_fsyncdir_req {
opaque gfid[16];
- hyper fd;
+ quad_t fd;
int data;
opaque xdata<>; /* Extra data */
} ;
struct gfs3_readdir_req {
opaque gfid[16];
- hyper fd;
- unsigned hyper offset;
+ quad_t fd;
+ u_quad_t offset;
unsigned int size;
opaque xdata<>; /* Extra data */
};
struct gfs3_readdirp_req {
opaque gfid[16];
- hyper fd;
- unsigned hyper offset;
+ quad_t fd;
+ u_quad_t offset;
unsigned int size;
opaque dict<>;
} ;
- struct gf_setvolume_req {
- opaque dict<>;
-} ;
- struct gf_setvolume_rsp {
- int op_ret;
- int op_errno;
- opaque dict<>;
-} ;
-
struct gfs3_access_req {
opaque gfid[16];
unsigned int mask;
@@ -487,10 +537,10 @@ struct gfs3_create_req {
struct gfs3_create_rsp {
int op_ret;
int op_errno;
- struct gf_iatt stat;
- unsigned hyper fd;
- struct gf_iatt preparent;
- struct gf_iatt postparent;
+ gf_iatt stat;
+ u_quad_t fd;
+ gf_iatt preparent;
+ gf_iatt postparent;
opaque xdata<>; /* Extra data */
} ;
@@ -498,28 +548,28 @@ struct gfs3_create_rsp {
struct gfs3_ftruncate_req {
opaque gfid[16];
- hyper fd;
- unsigned hyper offset;
+ quad_t fd;
+ u_quad_t offset;
opaque xdata<>; /* Extra data */
} ;
struct gfs3_ftruncate_rsp {
int op_ret;
int op_errno;
- struct gf_iatt prestat;
- struct gf_iatt poststat;
+ gf_iatt prestat;
+ gf_iatt poststat;
opaque xdata<>; /* Extra data */
} ;
struct gfs3_fstat_req {
opaque gfid[16];
- hyper fd;
+ quad_t fd;
opaque xdata<>; /* Extra data */
} ;
struct gfs3_fstat_rsp {
int op_ret;
int op_errno;
- struct gf_iatt stat;
+ gf_iatt stat;
opaque xdata<>; /* Extra data */
} ;
@@ -529,7 +579,7 @@ struct gfs3_fstat_req {
opaque gfid[16];
unsigned int cmd;
unsigned int type;
- unsigned hyper namelen;
+ u_quad_t namelen;
string name<>;
string volume<>;
opaque xdata<>; /* Extra data */
@@ -537,10 +587,10 @@ struct gfs3_fstat_req {
struct gfs3_fentrylk_req {
opaque gfid[16];
- hyper fd;
+ quad_t fd;
unsigned int cmd;
unsigned int type;
- unsigned hyper namelen;
+ u_quad_t namelen;
string name<>;
string volume<>;
opaque xdata<>; /* Extra data */
@@ -549,35 +599,85 @@ struct gfs3_fstat_req {
struct gfs3_setattr_req {
opaque gfid[16];
- struct gf_iatt stbuf;
+ gf_iatt stbuf;
int valid;
opaque xdata<>; /* Extra data */
} ;
struct gfs3_setattr_rsp {
int op_ret;
int op_errno;
- struct gf_iatt statpre;
- struct gf_iatt statpost;
+ gf_iatt statpre;
+ gf_iatt statpost;
opaque xdata<>; /* Extra data */
} ;
struct gfs3_fsetattr_req {
- hyper fd;
- struct gf_iatt stbuf;
+ quad_t fd;
+ gf_iatt stbuf;
int valid;
opaque xdata<>; /* Extra data */
} ;
struct gfs3_fsetattr_rsp {
int op_ret;
int op_errno;
- struct gf_iatt statpre;
- struct gf_iatt statpost;
+ gf_iatt statpre;
+ gf_iatt statpost;
opaque xdata<>; /* Extra data */
} ;
+ struct gfs3_fallocate_req {
+ opaque gfid[16];
+ quad_t fd;
+ unsigned int flags;
+ u_quad_t offset;
+ u_quad_t size;
+ opaque xdata<>; /* Extra data */
+} ;
+
+ struct gfs3_fallocate_rsp {
+ int op_ret;
+ int op_errno;
+ gf_iatt statpre;
+ gf_iatt statpost;
+ opaque xdata<>; /* Extra data */
+} ;
+
+ struct gfs3_discard_req {
+ opaque gfid[16];
+ quad_t fd;
+ u_quad_t offset;
+ u_quad_t size;
+ opaque xdata<>; /* Extra data */
+} ;
+
+ struct gfs3_discard_rsp {
+ int op_ret;
+ int op_errno;
+ gf_iatt statpre;
+ gf_iatt statpost;
+ opaque xdata<>; /* Extra data */
+} ;
+
+ struct gfs3_zerofill_req {
+ opaque gfid[16];
+ quad_t fd;
+ u_quad_t offset;
+ u_quad_t size;
+ opaque xdata<>;
+} ;
+
+ struct gfs3_zerofill_rsp {
+ int op_ret;
+ int op_errno;
+ gf_iatt statpre;
+ gf_iatt statpost;
+ opaque xdata<>;
+} ;
+
+
struct gfs3_rchecksum_req {
- hyper fd;
- unsigned hyper offset;
+ quad_t fd;
+ u_quad_t offset;
unsigned int len;
opaque xdata<>; /* Extra data */
} ;
@@ -590,6 +690,44 @@ struct gfs3_fstat_req {
} ;
+struct gfs3_ipc_req {
+ int op;
+ opaque xdata<>;
+};
+
+struct gfs3_ipc_rsp {
+ int op_ret;
+ int op_errno;
+ opaque xdata<>;
+};
+
+
+struct gfs3_seek_req {
+ opaque gfid[16];
+ quad_t fd;
+ u_quad_t offset;
+ int what;
+ opaque xdata<>;
+};
+
+struct gfs3_seek_rsp {
+ int op_ret;
+ int op_errno;
+ u_quad_t offset;
+ opaque xdata<>;
+};
+
+
+ struct gf_setvolume_req {
+ opaque dict<>;
+} ;
+ struct gf_setvolume_rsp {
+ int op_ret;
+ int op_errno;
+ opaque dict<>;
+} ;
+
+
struct gf_getspec_req {
unsigned int flags;
string key<>;
@@ -602,10 +740,29 @@ struct gfs3_fstat_req {
opaque xdata<>; /* Extra data */
} ;
+ struct gf_get_volume_info_req {
+ opaque dict<>; /* Extra data */
+} ;
+ struct gf_get_volume_info_rsp {
+ int op_ret;
+ int op_errno;
+ string op_errstr<>;
+ opaque dict<>; /* Extra data */
+} ;
+
+ struct gf_mgmt_hndsk_req {
+ opaque hndsk<>;
+} ;
+
+ struct gf_mgmt_hndsk_rsp {
+ int op_ret;
+ int op_errno;
+ opaque hndsk<>;
+} ;
struct gf_log_req {
- opaque msg<>;
-};
+ opaque msg<>;
+} ;
struct gf_notify_req {
unsigned int flags;
@@ -622,54 +779,48 @@ struct gfs3_fstat_req {
struct gfs3_releasedir_req {
opaque gfid[16];
- hyper fd;
+ quad_t fd;
opaque xdata<>; /* Extra data */
} ;
struct gfs3_release_req {
opaque gfid[16];
- hyper fd;
+ quad_t fd;
opaque xdata<>; /* Extra data */
} ;
-struct gf_common_rsp {
- int op_ret;
- int op_errno;
- opaque xdata<>; /* Extra data */
-} ;
-
struct gfs3_dirlist {
- unsigned hyper d_ino;
- unsigned hyper d_off;
+ u_quad_t d_ino;
+ u_quad_t d_off;
unsigned int d_len;
unsigned int d_type;
string name<>;
- struct gfs3_dirlist *nextentry;
+ gfs3_dirlist *nextentry;
};
struct gfs3_readdir_rsp {
int op_ret;
int op_errno;
- struct gfs3_dirlist *reply;
+ gfs3_dirlist *reply;
opaque xdata<>; /* Extra data */
};
struct gfs3_dirplist {
- unsigned hyper d_ino;
- unsigned hyper d_off;
+ u_quad_t d_ino;
+ u_quad_t d_off;
unsigned int d_len;
unsigned int d_type;
string name<>;
- struct gf_iatt stat;
+ gf_iatt stat;
opaque dict<>;
- struct gfs3_dirplist *nextentry;
+ gfs3_dirplist *nextentry;
};
struct gfs3_readdirp_rsp {
int op_ret;
int op_errno;
- struct gfs3_dirplist *reply;
+ gfs3_dirplist *reply;
opaque xdata<>; /* Extra data */
};
@@ -694,3 +845,46 @@ struct gf_event_notify_rsp {
int op_errno;
opaque dict<>;
};
+
+
+struct gf_getsnap_name_uuid_req {
+ opaque dict<>;
+};
+
+struct gf_getsnap_name_uuid_rsp {
+ int op_ret;
+ int op_errno;
+ string op_errstr<>;
+ opaque dict<>;
+};
+
+struct gfs3_locklist {
+ gf_proto_flock flock;
+ string client_uid<>;
+ unsigned int lk_flags;
+ gfs3_locklist *nextentry;
+};
+
+struct gfs3_getactivelk_rsp {
+ int op_ret;
+ int op_errno;
+ gfs3_locklist *reply;
+ opaque xdata<>;
+};
+
+struct gfs3_getactivelk_req {
+ opaque gfid[16];
+ opaque xdata<>;
+};
+
+struct gfs3_setactivelk_rsp {
+ int op_ret;
+ int op_errno;
+ opaque xdata<>;
+};
+
+struct gfs3_setactivelk_req {
+ opaque gfid[16];
+ gfs3_locklist *request;
+ opaque xdata<>;
+};
diff --git a/rpc/xdr/src/glusterfs3.h b/rpc/xdr/src/glusterfs3.h
index df0f1d138fa..86b3a4c0e5d 100644
--- a/rpc/xdr/src/glusterfs3.h
+++ b/rpc/xdr/src/glusterfs3.h
@@ -1,23 +1,13 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef _GLUSTERFS3_H
#define _GLUSTERFS3_H
@@ -25,256 +15,943 @@
#include "xdr-generic.h"
#include "glusterfs3-xdr.h"
-#include "iatt.h"
-
-#define xdr_decoded_remaining_addr(xdr) ((&xdr)->x_private)
-#define xdr_decoded_remaining_len(xdr) ((&xdr)->x_handy)
-#define xdr_encoded_length(xdr) (((size_t)(&xdr)->x_private) - ((size_t)(&xdr)->x_base))
-#define xdr_decoded_length(xdr) (((size_t)(&xdr)->x_private) - ((size_t)(&xdr)->x_base))
-
-
-#define GF_O_ACCMODE 003
-#define GF_O_RDONLY 00
-#define GF_O_WRONLY 01
-#define GF_O_RDWR 02
-#define GF_O_CREAT 0100
-#define GF_O_EXCL 0200
-#define GF_O_NOCTTY 0400
-#define GF_O_TRUNC 01000
-#define GF_O_APPEND 02000
-#define GF_O_NONBLOCK 04000
-#define GF_O_SYNC 010000
-#define GF_O_ASYNC 020000
-
-#define GF_O_DIRECT 040000
-#define GF_O_DIRECTORY 0200000
-#define GF_O_NOFOLLOW 0400000
-#define GF_O_NOATIME 01000000
-#define GF_O_CLOEXEC 02000000
-
-#define GF_O_LARGEFILE 0100000
-
-#define GF_O_FMODE_EXEC 040
-
-#define XLATE_BIT(from, to, bit) do { \
- if (from & bit) \
- to = to | GF_##bit; \
- } while (0)
-
-#define UNXLATE_BIT(from, to, bit) do { \
- if (from & GF_##bit) \
- to = to | bit; \
- } while (0)
-
-#define XLATE_ACCESSMODE(from, to) do { \
- switch (from & O_ACCMODE) { \
- case O_RDONLY: to |= GF_O_RDONLY; \
- break; \
- case O_WRONLY: to |= GF_O_WRONLY; \
- break; \
- case O_RDWR: to |= GF_O_RDWR; \
- break; \
- } \
- } while (0)
-
-#define UNXLATE_ACCESSMODE(from, to) do { \
- switch (from & GF_O_ACCMODE) { \
- case GF_O_RDONLY: to |= O_RDONLY; \
- break; \
- case GF_O_WRONLY: to |= O_WRONLY; \
- break; \
- case GF_O_RDWR: to |= O_RDWR; \
- break; \
- } \
- } while (0)
+#include "glusterfs4-xdr.h"
+#include <glusterfs/iatt.h>
+#include "protocol-common.h"
+#include <glusterfs/upcall-utils.h>
+
+#define xdr_decoded_remaining_addr(xdr) ((&xdr)->x_private)
+#define xdr_decoded_remaining_len(xdr) ((&xdr)->x_handy)
+#define xdr_encoded_length(xdr) \
+ (((size_t)(&xdr)->x_private) - ((size_t)(&xdr)->x_base))
+#define xdr_decoded_length(xdr) \
+ (((size_t)(&xdr)->x_private) - ((size_t)(&xdr)->x_base))
+
+#define GF_O_ACCMODE 003
+#define GF_O_RDONLY 00
+#define GF_O_WRONLY 01
+#define GF_O_RDWR 02
+#define GF_O_CREAT 0100
+#define GF_O_EXCL 0200
+#define GF_O_NOCTTY 0400
+#define GF_O_TRUNC 01000
+#define GF_O_APPEND 02000
+#define GF_O_NONBLOCK 04000
+#define GF_O_SYNC 010000
+#define GF_O_ASYNC 020000
+
+#define GF_O_DIRECT 040000
+#define GF_O_DIRECTORY 0200000
+#define GF_O_NOFOLLOW 0400000
+#define GF_O_NOATIME 01000000
+#define GF_O_CLOEXEC 02000000
+
+#define GF_O_LARGEFILE 0100000
+
+#define GF_O_FMODE_EXEC 040
+
+#define XLATE_BIT(from, to, bit) \
+ do { \
+ if (from & bit) \
+ to = to | GF_##bit; \
+ } while (0)
+
+#define UNXLATE_BIT(from, to, bit) \
+ do { \
+ if (from & GF_##bit) \
+ to = to | bit; \
+ } while (0)
+
+#define XLATE_ACCESSMODE(from, to) \
+ do { \
+ switch (from & O_ACCMODE) { \
+ case O_RDONLY: \
+ to |= GF_O_RDONLY; \
+ break; \
+ case O_WRONLY: \
+ to |= GF_O_WRONLY; \
+ break; \
+ case O_RDWR: \
+ to |= GF_O_RDWR; \
+ break; \
+ } \
+ } while (0)
+
+#define UNXLATE_ACCESSMODE(from, to) \
+ do { \
+ switch (from & GF_O_ACCMODE) { \
+ case GF_O_RDONLY: \
+ to |= O_RDONLY; \
+ break; \
+ case GF_O_WRONLY: \
+ to |= O_WRONLY; \
+ break; \
+ case GF_O_RDWR: \
+ to |= O_RDWR; \
+ break; \
+ } \
+ } while (0)
static inline uint32_t
-gf_flags_from_flags (uint32_t flags)
+gf_flags_from_flags(uint32_t flags)
{
- uint32_t gf_flags = 0;
-
- XLATE_ACCESSMODE (flags, gf_flags);
-
- XLATE_BIT (flags, gf_flags, O_CREAT);
- XLATE_BIT (flags, gf_flags, O_EXCL);
- XLATE_BIT (flags, gf_flags, O_NOCTTY);
- XLATE_BIT (flags, gf_flags, O_TRUNC);
- XLATE_BIT (flags, gf_flags, O_APPEND);
- XLATE_BIT (flags, gf_flags, O_NONBLOCK);
- XLATE_BIT (flags, gf_flags, O_SYNC);
- XLATE_BIT (flags, gf_flags, O_ASYNC);
-
- XLATE_BIT (flags, gf_flags, O_DIRECT);
- XLATE_BIT (flags, gf_flags, O_DIRECTORY);
- XLATE_BIT (flags, gf_flags, O_NOFOLLOW);
+ uint32_t gf_flags = 0;
+
+ XLATE_ACCESSMODE(flags, gf_flags);
+
+ XLATE_BIT(flags, gf_flags, O_CREAT);
+ XLATE_BIT(flags, gf_flags, O_EXCL);
+ XLATE_BIT(flags, gf_flags, O_NOCTTY);
+ XLATE_BIT(flags, gf_flags, O_TRUNC);
+ XLATE_BIT(flags, gf_flags, O_APPEND);
+ XLATE_BIT(flags, gf_flags, O_NONBLOCK);
+ XLATE_BIT(flags, gf_flags, O_SYNC);
+ XLATE_BIT(flags, gf_flags, O_ASYNC);
+
+ XLATE_BIT(flags, gf_flags, O_DIRECT);
+ XLATE_BIT(flags, gf_flags, O_DIRECTORY);
+ XLATE_BIT(flags, gf_flags, O_NOFOLLOW);
#ifdef O_NOATIME
- XLATE_BIT (flags, gf_flags, O_NOATIME);
+ XLATE_BIT(flags, gf_flags, O_NOATIME);
#endif
#ifdef O_CLOEXEC
- XLATE_BIT (flags, gf_flags, O_CLOEXEC);
+ XLATE_BIT(flags, gf_flags, O_CLOEXEC);
#endif
- XLATE_BIT (flags, gf_flags, O_LARGEFILE);
- XLATE_BIT (flags, gf_flags, O_FMODE_EXEC);
+ XLATE_BIT(flags, gf_flags, O_LARGEFILE);
+ XLATE_BIT(flags, gf_flags, O_FMODE_EXEC);
- return gf_flags;
+ return gf_flags;
}
static inline uint32_t
-gf_flags_to_flags (uint32_t gf_flags)
+gf_flags_to_flags(uint32_t gf_flags)
{
- uint32_t flags = 0;
-
- UNXLATE_ACCESSMODE (gf_flags, flags);
-
- UNXLATE_BIT (gf_flags, flags, O_CREAT);
- UNXLATE_BIT (gf_flags, flags, O_EXCL);
- UNXLATE_BIT (gf_flags, flags, O_NOCTTY);
- UNXLATE_BIT (gf_flags, flags, O_TRUNC);
- UNXLATE_BIT (gf_flags, flags, O_APPEND);
- UNXLATE_BIT (gf_flags, flags, O_NONBLOCK);
- UNXLATE_BIT (gf_flags, flags, O_SYNC);
- UNXLATE_BIT (gf_flags, flags, O_ASYNC);
-
- UNXLATE_BIT (gf_flags, flags, O_DIRECT);
- UNXLATE_BIT (gf_flags, flags, O_DIRECTORY);
- UNXLATE_BIT (gf_flags, flags, O_NOFOLLOW);
+ uint32_t flags = 0;
+
+ UNXLATE_ACCESSMODE(gf_flags, flags);
+
+ UNXLATE_BIT(gf_flags, flags, O_CREAT);
+ UNXLATE_BIT(gf_flags, flags, O_EXCL);
+ UNXLATE_BIT(gf_flags, flags, O_NOCTTY);
+ UNXLATE_BIT(gf_flags, flags, O_TRUNC);
+ UNXLATE_BIT(gf_flags, flags, O_APPEND);
+ UNXLATE_BIT(gf_flags, flags, O_NONBLOCK);
+ UNXLATE_BIT(gf_flags, flags, O_SYNC);
+ UNXLATE_BIT(gf_flags, flags, O_ASYNC);
+
+ UNXLATE_BIT(gf_flags, flags, O_DIRECT);
+ UNXLATE_BIT(gf_flags, flags, O_DIRECTORY);
+ UNXLATE_BIT(gf_flags, flags, O_NOFOLLOW);
#ifdef O_NOATIME
- UNXLATE_BIT (gf_flags, flags, O_NOATIME);
+ UNXLATE_BIT(gf_flags, flags, O_NOATIME);
#endif
#ifdef O_CLOEXEC
- UNXLATE_BIT (gf_flags, flags, O_CLOEXEC);
+ UNXLATE_BIT(gf_flags, flags, O_CLOEXEC);
#endif
- UNXLATE_BIT (gf_flags, flags, O_LARGEFILE);
- UNXLATE_BIT (gf_flags, flags, O_FMODE_EXEC);
+ UNXLATE_BIT(gf_flags, flags, O_LARGEFILE);
+ UNXLATE_BIT(gf_flags, flags, O_FMODE_EXEC);
- return flags;
+ return flags;
}
+static inline void
+gf_statfs_to_statfs(struct gf_statfs *gf_stat, struct statvfs *stat)
+{
+ if (!stat || !gf_stat)
+ return;
+
+ stat->f_bsize = (gf_stat->bsize);
+ stat->f_frsize = (gf_stat->frsize);
+ stat->f_blocks = (gf_stat->blocks);
+ stat->f_bfree = (gf_stat->bfree);
+ stat->f_bavail = (gf_stat->bavail);
+ stat->f_files = (gf_stat->files);
+ stat->f_ffree = (gf_stat->ffree);
+ stat->f_favail = (gf_stat->favail);
+ stat->f_fsid = (gf_stat->fsid);
+ stat->f_flag = (gf_stat->flag);
+ stat->f_namemax = (gf_stat->namemax);
+}
static inline void
-gf_statfs_to_statfs (struct gf_statfs *gf_stat, struct statvfs *stat)
+gf_statfs_from_statfs(struct gf_statfs *gf_stat, struct statvfs *stat)
{
- if (!stat || !gf_stat)
- return;
-
- stat->f_bsize = (gf_stat->bsize);
- stat->f_frsize = (gf_stat->frsize);
- stat->f_blocks = (gf_stat->blocks);
- stat->f_bfree = (gf_stat->bfree);
- stat->f_bavail = (gf_stat->bavail);
- stat->f_files = (gf_stat->files);
- stat->f_ffree = (gf_stat->ffree);
- stat->f_favail = (gf_stat->favail);
- stat->f_fsid = (gf_stat->fsid);
- stat->f_flag = (gf_stat->flag);
- stat->f_namemax = (gf_stat->namemax);
+ if (!stat || !gf_stat)
+ return;
+
+ gf_stat->bsize = stat->f_bsize;
+ gf_stat->frsize = stat->f_frsize;
+ gf_stat->blocks = stat->f_blocks;
+ gf_stat->bfree = stat->f_bfree;
+ gf_stat->bavail = stat->f_bavail;
+ gf_stat->files = stat->f_files;
+ gf_stat->ffree = stat->f_ffree;
+ gf_stat->favail = stat->f_favail;
+ gf_stat->fsid = stat->f_fsid;
+ gf_stat->flag = stat->f_flag;
+ gf_stat->namemax = stat->f_namemax;
}
+static inline void
+gf_proto_lease_to_lease(struct gf_proto_lease *gf_proto_lease,
+ struct gf_lease *gf_lease)
+{
+ if (!gf_lease || !gf_proto_lease)
+ return;
+
+ gf_lease->cmd = gf_proto_lease->cmd;
+ gf_lease->lease_type = gf_proto_lease->lease_type;
+ memcpy(gf_lease->lease_id, gf_proto_lease->lease_id, LEASE_ID_SIZE);
+}
static inline void
-gf_statfs_from_statfs (struct gf_statfs *gf_stat, struct statvfs *stat)
+gf_proto_lease_from_lease(struct gf_proto_lease *gf_proto_lease,
+ struct gf_lease *gf_lease)
{
- if (!stat || !gf_stat)
- return;
-
- gf_stat->bsize = stat->f_bsize;
- gf_stat->frsize = stat->f_frsize;
- gf_stat->blocks = stat->f_blocks;
- gf_stat->bfree = stat->f_bfree;
- gf_stat->bavail = stat->f_bavail;
- gf_stat->files = stat->f_files;
- gf_stat->ffree = stat->f_ffree;
- gf_stat->favail = stat->f_favail;
- gf_stat->fsid = stat->f_fsid;
- gf_stat->flag = stat->f_flag;
- gf_stat->namemax = stat->f_namemax;
+ if (!gf_lease || !gf_proto_lease)
+ return;
+
+ gf_proto_lease->cmd = gf_lease->cmd;
+ gf_proto_lease->lease_type = gf_lease->lease_type;
+ memcpy(gf_proto_lease->lease_id, gf_lease->lease_id, LEASE_ID_SIZE);
+}
+
+static inline int
+gf_proto_recall_lease_to_upcall(struct gfs3_recall_lease_req *recall_lease,
+ struct gf_upcall *gf_up_data)
+{
+ struct gf_upcall_recall_lease *tmp = NULL;
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO(THIS->name, recall_lease, out);
+ GF_VALIDATE_OR_GOTO(THIS->name, gf_up_data, out);
+
+ tmp = (struct gf_upcall_recall_lease *)gf_up_data->data;
+ tmp->lease_type = recall_lease->lease_type;
+ memcpy(gf_up_data->gfid, recall_lease->gfid, 16);
+ memcpy(tmp->tid, recall_lease->tid, 16);
+
+ GF_PROTOCOL_DICT_UNSERIALIZE(
+ THIS, tmp->dict, (recall_lease->xdata).xdata_val,
+ (recall_lease->xdata).xdata_len, ret, errno, out);
+out:
+ return ret;
+}
+
+static inline int
+gf_proto_recall_lease_from_upcall(xlator_t *this,
+ struct gfs3_recall_lease_req *recall_lease,
+ struct gf_upcall *gf_up_data)
+{
+ struct gf_upcall_recall_lease *tmp = NULL;
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO(this->name, recall_lease, out);
+ GF_VALIDATE_OR_GOTO(this->name, gf_up_data, out);
+
+ tmp = (struct gf_upcall_recall_lease *)gf_up_data->data;
+ recall_lease->lease_type = tmp->lease_type;
+ memcpy(recall_lease->gfid, gf_up_data->gfid, 16);
+ memcpy(recall_lease->tid, tmp->tid, 16);
+
+ GF_PROTOCOL_DICT_SERIALIZE(this, tmp->dict,
+ &(recall_lease->xdata).xdata_val,
+ (recall_lease->xdata).xdata_len, ret, out);
+out:
+ return ret;
}
static inline void
-gf_proto_flock_to_flock (struct gf_proto_flock *gf_proto_flock, struct gf_flock *gf_flock)
+gf_proto_flock_to_flock(struct gf_proto_flock *gf_proto_flock,
+ struct gf_flock *gf_flock)
{
- if (!gf_flock || !gf_proto_flock)
- return;
-
- gf_flock->l_type = gf_proto_flock->type;
- gf_flock->l_whence = gf_proto_flock->whence;
- gf_flock->l_start = gf_proto_flock->start;
- gf_flock->l_len = gf_proto_flock->len;
- gf_flock->l_pid = gf_proto_flock->pid;
- gf_flock->l_owner.len = gf_proto_flock->lk_owner.lk_owner_len;
- if (gf_flock->l_owner.len &&
- (gf_flock->l_owner.len < GF_MAX_LOCK_OWNER_LEN))
- memcpy (gf_flock->l_owner.data, gf_proto_flock->lk_owner.lk_owner_val,
- gf_flock->l_owner.len);
+ if (!gf_flock || !gf_proto_flock)
+ return;
+
+ gf_flock->l_type = gf_proto_flock->type;
+ gf_flock->l_whence = gf_proto_flock->whence;
+ gf_flock->l_start = gf_proto_flock->start;
+ gf_flock->l_len = gf_proto_flock->len;
+ gf_flock->l_pid = gf_proto_flock->pid;
+ gf_flock->l_owner.len = gf_proto_flock->lk_owner.lk_owner_len;
+ if (gf_flock->l_owner.len &&
+ (gf_flock->l_owner.len < GF_MAX_LOCK_OWNER_LEN))
+ memcpy(gf_flock->l_owner.data, gf_proto_flock->lk_owner.lk_owner_val,
+ gf_flock->l_owner.len);
}
+static inline void
+gf_proto_flock_from_flock(struct gf_proto_flock *gf_proto_flock,
+ struct gf_flock *gf_flock)
+{
+ if (!gf_flock || !gf_proto_flock)
+ return;
+
+ gf_proto_flock->type = (gf_flock->l_type);
+ gf_proto_flock->whence = (gf_flock->l_whence);
+ gf_proto_flock->start = (gf_flock->l_start);
+ gf_proto_flock->len = (gf_flock->l_len);
+ gf_proto_flock->pid = (gf_flock->l_pid);
+ gf_proto_flock->lk_owner.lk_owner_len = gf_flock->l_owner.len;
+ if (gf_flock->l_owner.len)
+ gf_proto_flock->lk_owner.lk_owner_val = gf_flock->l_owner.data;
+}
static inline void
-gf_proto_flock_from_flock (struct gf_proto_flock *gf_proto_flock, struct gf_flock *gf_flock)
+gf_stat_to_iatt(struct gf_iatt *gf_stat, struct iatt *iatt)
{
- if (!gf_flock || !gf_proto_flock)
- return;
-
- gf_proto_flock->type = (gf_flock->l_type);
- gf_proto_flock->whence = (gf_flock->l_whence);
- gf_proto_flock->start = (gf_flock->l_start);
- gf_proto_flock->len = (gf_flock->l_len);
- gf_proto_flock->pid = (gf_flock->l_pid);
- gf_proto_flock->lk_owner.lk_owner_len = gf_flock->l_owner.len;
- if (gf_flock->l_owner.len)
- gf_proto_flock->lk_owner.lk_owner_val = gf_flock->l_owner.data;
+ if (!iatt || !gf_stat)
+ return;
+
+ memcpy(iatt->ia_gfid, gf_stat->ia_gfid, 16);
+ iatt->ia_ino = gf_stat->ia_ino;
+ iatt->ia_dev = gf_stat->ia_dev;
+ iatt->ia_type = ia_type_from_st_mode(gf_stat->mode);
+ iatt->ia_prot = ia_prot_from_st_mode(gf_stat->mode);
+ iatt->ia_nlink = gf_stat->ia_nlink;
+ iatt->ia_uid = gf_stat->ia_uid;
+ iatt->ia_gid = gf_stat->ia_gid;
+ iatt->ia_rdev = gf_stat->ia_rdev;
+ iatt->ia_size = gf_stat->ia_size;
+ iatt->ia_blksize = gf_stat->ia_blksize;
+ iatt->ia_blocks = gf_stat->ia_blocks;
+ iatt->ia_atime = gf_stat->ia_atime;
+ iatt->ia_atime_nsec = gf_stat->ia_atime_nsec;
+ iatt->ia_mtime = gf_stat->ia_mtime;
+ iatt->ia_mtime_nsec = gf_stat->ia_mtime_nsec;
+ iatt->ia_ctime = gf_stat->ia_ctime;
+ iatt->ia_ctime_nsec = gf_stat->ia_ctime_nsec;
}
static inline void
-gf_stat_to_iatt (struct gf_iatt *gf_stat, struct iatt *iatt)
+gf_stat_from_iatt(struct gf_iatt *gf_stat, struct iatt *iatt)
+{
+ if (!iatt || !gf_stat)
+ return;
+
+ memcpy(gf_stat->ia_gfid, iatt->ia_gfid, 16);
+ gf_stat->ia_ino = iatt->ia_ino;
+ gf_stat->ia_dev = iatt->ia_dev;
+ gf_stat->mode = st_mode_from_ia(iatt->ia_prot, iatt->ia_type);
+ gf_stat->ia_nlink = iatt->ia_nlink;
+ gf_stat->ia_uid = iatt->ia_uid;
+ gf_stat->ia_gid = iatt->ia_gid;
+ gf_stat->ia_rdev = iatt->ia_rdev;
+ gf_stat->ia_size = iatt->ia_size;
+ gf_stat->ia_blksize = iatt->ia_blksize;
+ gf_stat->ia_blocks = iatt->ia_blocks;
+ gf_stat->ia_atime = iatt->ia_atime;
+ gf_stat->ia_atime_nsec = iatt->ia_atime_nsec;
+ gf_stat->ia_mtime = iatt->ia_mtime;
+ gf_stat->ia_mtime_nsec = iatt->ia_mtime_nsec;
+ gf_stat->ia_ctime = iatt->ia_ctime;
+ gf_stat->ia_ctime_nsec = iatt->ia_ctime_nsec;
+}
+
+static inline int
+gf_proto_cache_invalidation_from_upcall(
+ xlator_t *this, gfs3_cbk_cache_invalidation_req *gf_c_req,
+ struct gf_upcall *gf_up_data)
{
- if (!iatt || !gf_stat)
- return;
-
- memcpy (iatt->ia_gfid, gf_stat->ia_gfid, 16);
- iatt->ia_ino = gf_stat->ia_ino ;
- iatt->ia_dev = gf_stat->ia_dev ;
- iatt->ia_type = ia_type_from_st_mode (gf_stat->mode) ;
- iatt->ia_prot = ia_prot_from_st_mode (gf_stat->mode) ;
- iatt->ia_nlink = gf_stat->ia_nlink ;
- iatt->ia_uid = gf_stat->ia_uid ;
- iatt->ia_gid = gf_stat->ia_gid ;
- iatt->ia_rdev = gf_stat->ia_rdev ;
- iatt->ia_size = gf_stat->ia_size ;
- iatt->ia_blksize = gf_stat->ia_blksize ;
- iatt->ia_blocks = gf_stat->ia_blocks ;
- iatt->ia_atime = gf_stat->ia_atime ;
- iatt->ia_atime_nsec = gf_stat->ia_atime_nsec ;
- iatt->ia_mtime = gf_stat->ia_mtime ;
- iatt->ia_mtime_nsec = gf_stat->ia_mtime_nsec ;
- iatt->ia_ctime = gf_stat->ia_ctime ;
- iatt->ia_ctime_nsec = gf_stat->ia_ctime_nsec ;
+ struct gf_upcall_cache_invalidation *gf_c_data = NULL;
+ int is_cache_inval = 0;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO(this->name, gf_c_req, out);
+ GF_VALIDATE_OR_GOTO(this->name, gf_up_data, out);
+
+ is_cache_inval = ((gf_up_data->event_type == GF_UPCALL_CACHE_INVALIDATION)
+ ? 1
+ : 0);
+ GF_VALIDATE_OR_GOTO(this->name, is_cache_inval, out);
+
+ gf_c_data = (struct gf_upcall_cache_invalidation *)gf_up_data->data;
+ GF_VALIDATE_OR_GOTO(this->name, gf_c_data, out);
+
+ gf_c_req->gfid = uuid_utoa(gf_up_data->gfid);
+ gf_c_req->event_type = gf_up_data->event_type;
+ gf_c_req->flags = gf_c_data->flags;
+ gf_c_req->expire_time_attr = gf_c_data->expire_time_attr;
+ gf_stat_from_iatt(&gf_c_req->stat, &gf_c_data->stat);
+ gf_stat_from_iatt(&gf_c_req->parent_stat, &gf_c_data->p_stat);
+ gf_stat_from_iatt(&gf_c_req->oldparent_stat, &gf_c_data->oldp_stat);
+
+ ret = 0;
+ GF_PROTOCOL_DICT_SERIALIZE(this, gf_c_data->dict,
+ &(gf_c_req->xdata).xdata_val,
+ (gf_c_req->xdata).xdata_len, ret, out);
+out:
+ return ret;
}
+static inline int
+gf_proto_cache_invalidation_to_upcall(xlator_t *this,
+ gfs3_cbk_cache_invalidation_req *gf_c_req,
+ struct gf_upcall *gf_up_data)
+{
+ struct gf_upcall_cache_invalidation *gf_c_data = NULL;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO(this->name, gf_c_req, out);
+ GF_VALIDATE_OR_GOTO(this->name, gf_up_data, out);
+
+ gf_c_data = (struct gf_upcall_cache_invalidation *)gf_up_data->data;
+ GF_VALIDATE_OR_GOTO(this->name, gf_c_data, out);
+
+ ret = gf_uuid_parse(gf_c_req->gfid, gf_up_data->gfid);
+ if (ret) {
+ gf_log(this->name, GF_LOG_WARNING, "gf_uuid_parse(%s) failed",
+ gf_c_req->gfid);
+ gf_up_data->event_type = GF_UPCALL_EVENT_NULL;
+ goto out;
+ }
+
+ gf_up_data->event_type = gf_c_req->event_type;
+
+ gf_c_data->flags = gf_c_req->flags;
+ gf_c_data->expire_time_attr = gf_c_req->expire_time_attr;
+ gf_stat_to_iatt(&gf_c_req->stat, &gf_c_data->stat);
+ gf_stat_to_iatt(&gf_c_req->parent_stat, &gf_c_data->p_stat);
+ gf_stat_to_iatt(&gf_c_req->oldparent_stat, &gf_c_data->oldp_stat);
+
+ ret = 0;
+ GF_PROTOCOL_DICT_UNSERIALIZE(this, gf_c_data->dict,
+ (gf_c_req->xdata).xdata_val,
+ (gf_c_req->xdata).xdata_len, ret, ret, out);
+
+ /* If no dict was sent, create an empty dict, so that each xlator
+ * need not check if empty then create new dict. Will be unref'd by the
+ * caller */
+ if (!gf_c_data->dict)
+ gf_c_data->dict = dict_new();
+out:
+ return ret;
+}
+
+static inline int
+gf_proto_inodelk_contention_to_upcall(struct gfs4_inodelk_contention_req *lc,
+ struct gf_upcall *gf_up_data)
+{
+ struct gf_upcall_inodelk_contention *tmp = NULL;
+ xlator_t *this = NULL;
+ int ret = -1;
+ int op_errno = EINVAL;
+
+ this = THIS;
+
+ GF_VALIDATE_OR_GOTO(this->name, lc, out);
+ GF_VALIDATE_OR_GOTO(this->name, gf_up_data, out);
+
+ tmp = (struct gf_upcall_inodelk_contention *)gf_up_data->data;
+
+ gf_uuid_copy(gf_up_data->gfid, (unsigned char *)lc->gfid);
+
+ gf_proto_flock_to_flock(&lc->flock, &tmp->flock);
+ tmp->pid = lc->pid;
+ tmp->domain = lc->domain;
+ if ((tmp->domain != NULL) && (*tmp->domain == 0)) {
+ tmp->domain = NULL;
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE(this, tmp->xdata, lc->xdata.xdata_val,
+ lc->xdata.xdata_len, ret, op_errno, out);
+
+ ret = 0;
+
+out:
+ if (ret < 0) {
+ ret = -op_errno;
+ }
+
+ return ret;
+}
+
+static inline int
+gf_proto_inodelk_contention_from_upcall(xlator_t *this,
+ struct gfs4_inodelk_contention_req *lc,
+ struct gf_upcall *gf_up_data)
+{
+ struct gf_upcall_inodelk_contention *tmp = NULL;
+ int ret = -1;
+ int op_errno = EINVAL;
+
+ GF_VALIDATE_OR_GOTO(this->name, lc, out);
+ GF_VALIDATE_OR_GOTO(this->name, gf_up_data, out);
+
+ tmp = (struct gf_upcall_inodelk_contention *)gf_up_data->data;
+
+ gf_uuid_copy((unsigned char *)lc->gfid, gf_up_data->gfid);
+
+ gf_proto_flock_from_flock(&lc->flock, &tmp->flock);
+ lc->pid = tmp->pid;
+ lc->domain = (char *)tmp->domain;
+ if (lc->domain == NULL) {
+ lc->domain = "";
+ }
+
+ GF_PROTOCOL_DICT_SERIALIZE(this, tmp->xdata, &lc->xdata.xdata_val,
+ lc->xdata.xdata_len, op_errno, out);
+
+ ret = 0;
+
+out:
+ if (ret < 0) {
+ ret = -op_errno;
+ }
+
+ return ret;
+}
+
+static inline int
+gf_proto_entrylk_contention_to_upcall(struct gfs4_entrylk_contention_req *lc,
+ struct gf_upcall *gf_up_data)
+{
+ struct gf_upcall_entrylk_contention *tmp = NULL;
+ xlator_t *this = NULL;
+ int ret = -1;
+ int op_errno = EINVAL;
+
+ this = THIS;
+
+ GF_VALIDATE_OR_GOTO(this->name, lc, out);
+ GF_VALIDATE_OR_GOTO(this->name, gf_up_data, out);
+
+ tmp = (struct gf_upcall_entrylk_contention *)gf_up_data->data;
+
+ gf_uuid_copy(gf_up_data->gfid, (unsigned char *)lc->gfid);
+
+ tmp->type = lc->type;
+ tmp->name = lc->name;
+ if ((tmp->name != NULL) && (*tmp->name == 0)) {
+ tmp->name = NULL;
+ }
+ tmp->pid = lc->pid;
+ tmp->domain = lc->domain;
+ if ((tmp->domain != NULL) && (*tmp->domain == 0)) {
+ tmp->domain = NULL;
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE(this, tmp->xdata, lc->xdata.xdata_val,
+ lc->xdata.xdata_len, ret, op_errno, out);
+
+ ret = 0;
+
+out:
+ if (ret < 0) {
+ ret = -op_errno;
+ }
+
+ return ret;
+}
+
+static inline int
+gf_proto_entrylk_contention_from_upcall(xlator_t *this,
+ struct gfs4_entrylk_contention_req *lc,
+ struct gf_upcall *gf_up_data)
+{
+ struct gf_upcall_entrylk_contention *tmp = NULL;
+ int ret = -1;
+ int op_errno = EINVAL;
+
+ GF_VALIDATE_OR_GOTO(this->name, lc, out);
+ GF_VALIDATE_OR_GOTO(this->name, gf_up_data, out);
+
+ tmp = (struct gf_upcall_entrylk_contention *)gf_up_data->data;
+
+ gf_uuid_copy((unsigned char *)lc->gfid, gf_up_data->gfid);
+
+ lc->type = tmp->type;
+ lc->name = (char *)tmp->name;
+ if (lc->name == NULL) {
+ lc->name = "";
+ }
+ lc->pid = tmp->pid;
+ lc->domain = (char *)tmp->domain;
+ if (lc->domain == NULL) {
+ lc->domain = "";
+ }
+
+ GF_PROTOCOL_DICT_SERIALIZE(this, tmp->xdata, &lc->xdata.xdata_val,
+ lc->xdata.xdata_len, op_errno, out);
+
+ ret = 0;
+
+out:
+ if (ret < 0) {
+ ret = -op_errno;
+ }
+
+ return ret;
+}
+
+static inline void
+gfx_mdata_iatt_to_mdata_iatt(struct gfx_mdata_iatt *gf_mdata_iatt,
+ struct mdata_iatt *mdata_iatt)
+{
+ if (!mdata_iatt || !gf_mdata_iatt)
+ return;
+ mdata_iatt->ia_atime = gf_mdata_iatt->ia_atime;
+ mdata_iatt->ia_atime_nsec = gf_mdata_iatt->ia_atime_nsec;
+ mdata_iatt->ia_mtime = gf_mdata_iatt->ia_mtime;
+ mdata_iatt->ia_mtime_nsec = gf_mdata_iatt->ia_mtime_nsec;
+ mdata_iatt->ia_ctime = gf_mdata_iatt->ia_ctime;
+ mdata_iatt->ia_ctime_nsec = gf_mdata_iatt->ia_ctime_nsec;
+}
+
+static inline void
+gfx_mdata_iatt_from_mdata_iatt(struct gfx_mdata_iatt *gf_mdata_iatt,
+ struct mdata_iatt *mdata_iatt)
+{
+ if (!mdata_iatt || !gf_mdata_iatt)
+ return;
+ gf_mdata_iatt->ia_atime = mdata_iatt->ia_atime;
+ gf_mdata_iatt->ia_atime_nsec = mdata_iatt->ia_atime_nsec;
+ gf_mdata_iatt->ia_mtime = mdata_iatt->ia_mtime;
+ gf_mdata_iatt->ia_mtime_nsec = mdata_iatt->ia_mtime_nsec;
+ gf_mdata_iatt->ia_ctime = mdata_iatt->ia_ctime;
+ gf_mdata_iatt->ia_ctime_nsec = mdata_iatt->ia_ctime_nsec;
+}
+
+static inline void
+gfx_stat_to_iattx(struct gfx_iattx *gf_stat, struct iatt *iatt)
+{
+ if (!iatt || !gf_stat)
+ return;
+
+ memcpy(iatt->ia_gfid, gf_stat->ia_gfid, 16);
+
+ iatt->ia_flags = gf_stat->ia_flags;
+ iatt->ia_ino = gf_stat->ia_ino;
+ iatt->ia_dev = gf_stat->ia_dev;
+ iatt->ia_rdev = gf_stat->ia_rdev;
+ iatt->ia_size = gf_stat->ia_size;
+ iatt->ia_nlink = gf_stat->ia_nlink;
+ iatt->ia_uid = gf_stat->ia_uid;
+ iatt->ia_gid = gf_stat->ia_gid;
+ iatt->ia_blksize = gf_stat->ia_blksize;
+ iatt->ia_blocks = gf_stat->ia_blocks;
+ iatt->ia_atime = gf_stat->ia_atime;
+ iatt->ia_atime_nsec = gf_stat->ia_atime_nsec;
+ iatt->ia_mtime = gf_stat->ia_mtime;
+ iatt->ia_mtime_nsec = gf_stat->ia_mtime_nsec;
+ iatt->ia_ctime = gf_stat->ia_ctime;
+ iatt->ia_ctime_nsec = gf_stat->ia_ctime_nsec;
+ iatt->ia_btime = gf_stat->ia_btime;
+ iatt->ia_btime_nsec = gf_stat->ia_btime_nsec;
+ iatt->ia_attributes = gf_stat->ia_attributes;
+ iatt->ia_attributes_mask = gf_stat->ia_attributes_mask;
+
+ iatt->ia_type = ia_type_from_st_mode(gf_stat->mode);
+ iatt->ia_prot = ia_prot_from_st_mode(gf_stat->mode);
+}
static inline void
-gf_stat_from_iatt (struct gf_iatt *gf_stat, struct iatt *iatt)
+gfx_stat_from_iattx(struct gfx_iattx *gf_stat, struct iatt *iatt)
+{
+ if (!iatt || !gf_stat)
+ return;
+
+ memcpy(gf_stat->ia_gfid, iatt->ia_gfid, 16);
+ gf_stat->ia_ino = iatt->ia_ino;
+ gf_stat->ia_dev = iatt->ia_dev;
+
+ gf_stat->ia_nlink = iatt->ia_nlink;
+ gf_stat->ia_uid = iatt->ia_uid;
+ gf_stat->ia_gid = iatt->ia_gid;
+ gf_stat->ia_rdev = iatt->ia_rdev;
+ gf_stat->ia_size = iatt->ia_size;
+ gf_stat->ia_blksize = iatt->ia_blksize;
+ gf_stat->ia_blocks = iatt->ia_blocks;
+ gf_stat->ia_atime = iatt->ia_atime;
+ gf_stat->ia_atime_nsec = iatt->ia_atime_nsec;
+ gf_stat->ia_mtime = iatt->ia_mtime;
+ gf_stat->ia_mtime_nsec = iatt->ia_mtime_nsec;
+ gf_stat->ia_ctime = iatt->ia_ctime;
+ gf_stat->ia_ctime_nsec = iatt->ia_ctime_nsec;
+
+ gf_stat->ia_flags = iatt->ia_flags;
+ gf_stat->ia_btime = iatt->ia_btime;
+ gf_stat->ia_btime_nsec = iatt->ia_btime_nsec;
+ gf_stat->ia_attributes = iatt->ia_attributes;
+ gf_stat->ia_attributes_mask = iatt->ia_attributes_mask;
+
+ gf_stat->mode = st_mode_from_ia(iatt->ia_prot, iatt->ia_type);
+}
+
+/* dict_to_xdr () */
+static inline int
+dict_to_xdr(dict_t *this, gfx_dict *dict)
+{
+ int ret = -1;
+ int i = 0;
+ int index = 0;
+ data_pair_t *dpair = NULL;
+ gfx_dict_pair *xpair = NULL;
+ ssize_t size = 0;
+
+ /* This is a failure as we expect destination to be valid */
+ if (!dict)
+ goto out;
+
+ /* This is OK as dictionary can be null, in which case, destination
+ should also know that it is NULL. */
+ if (!this) {
+ /* encode special meaning data here,
+ while decoding, you know it is NULL dict */
+ dict->count = -1;
+ /* everything else is normal */
+ dict->pairs.pairs_len = 0;
+ ret = 0;
+ goto out;
+ }
+
+ /* Do the whole operation in locked region */
+ LOCK(&this->lock);
+
+ dict->pairs.pairs_val = GF_CALLOC(1, (this->count * sizeof(gfx_dict_pair)),
+ gf_common_mt_char);
+ if (!dict->pairs.pairs_val)
+ goto out;
+
+ dpair = this->members_list;
+ for (i = 0; i < this->count; i++) {
+ xpair = &dict->pairs.pairs_val[index];
+
+ xpair->key.key_val = dpair->key;
+ xpair->key.key_len = strlen(dpair->key) + 1;
+ xpair->value.type = dpair->value->data_type;
+ switch (dpair->value->data_type) {
+ /* Add more type here */
+ case GF_DATA_TYPE_INT:
+ index++;
+ xpair->value.gfx_value_u.value_int = strtoll(dpair->value->data,
+ NULL, 0);
+ break;
+ case GF_DATA_TYPE_UINT:
+ index++;
+ xpair->value.gfx_value_u.value_uint = strtoull(
+ dpair->value->data, NULL, 0);
+ break;
+ case GF_DATA_TYPE_DOUBLE:
+ index++;
+ xpair->value.gfx_value_u.value_dbl = strtod(dpair->value->data,
+ NULL);
+ break;
+ case GF_DATA_TYPE_STR:
+ index++;
+ xpair->value.gfx_value_u.val_string
+ .val_string_val = dpair->value->data;
+ xpair->value.gfx_value_u.val_string
+ .val_string_len = dpair->value->len;
+ break;
+ case GF_DATA_TYPE_IATT:
+ index++;
+ gfx_stat_from_iattx(&xpair->value.gfx_value_u.iatt,
+ (struct iatt *)dpair->value->data);
+ break;
+ case GF_DATA_TYPE_MDATA:
+ index++;
+ gfx_mdata_iatt_from_mdata_iatt(
+ &xpair->value.gfx_value_u.mdata_iatt,
+ (struct mdata_iatt *)dpair->value->data);
+ break;
+ case GF_DATA_TYPE_GFUUID:
+ index++;
+ memcpy(&xpair->value.gfx_value_u.uuid, dpair->value->data,
+ sizeof(uuid_t));
+ break;
+
+ case GF_DATA_TYPE_PTR:
+ case GF_DATA_TYPE_STR_OLD:
+ index++;
+ /* Ideally, each type of data stored in dictionary
+ should have type. A pointer type shouldn't be
+ sent on wire */
+
+ /* This is done for backward compatibility as dict is
+ heavily used for transporting data over wire.
+ Ideally, wherever there is an issue, fix and
+ move on */
+ xpair->value.gfx_value_u.other.other_val = dpair->value->data;
+ xpair->value.gfx_value_u.other.other_len = dpair->value->len;
+
+ /* Change this to INFO, after taking the above down */
+ gf_msg("dict", GF_LOG_DEBUG, EINVAL, LG_MSG_DICT_SERIAL_FAILED,
+ "key '%s' would not be sent on wire in the future",
+ dpair->key);
+ break;
+ default:
+ /* Unknown type and ptr type is not sent on wire */
+ gf_msg("dict", GF_LOG_WARNING, EINVAL,
+ LG_MSG_DICT_SERIAL_FAILED,
+ "key '%s' is not sent on wire", dpair->key);
+ break;
+ }
+ dpair = dpair->next;
+ }
+
+ dict->pairs.pairs_len = index;
+ dict->count = index;
+
+ /* This is required mainly in the RPC layer to understand the
+ boundary for proper payload. Hence only send the size of
+ variable XDR size. ie, the formula should be:
+ xdr_size = total size - (xdr_size + count + pairs.pairs_len)) */
+ size = xdr_sizeof((xdrproc_t)xdr_gfx_dict, dict);
+
+ dict->xdr_size = (size > 12) ? (size - 12) : 0;
+
+ ret = 0;
+out:
+ /* this can be null here, so unlock only if its not null */
+ if (this)
+ UNLOCK(&this->lock);
+
+ return ret;
+}
+
+static inline int
+xdr_to_dict(gfx_dict *dict, dict_t **to)
{
- if (!iatt || !gf_stat)
- return;
-
- memcpy (gf_stat->ia_gfid, iatt->ia_gfid, 16);
- gf_stat->ia_ino = iatt->ia_ino ;
- gf_stat->ia_dev = iatt->ia_dev ;
- gf_stat->mode = st_mode_from_ia (iatt->ia_prot, iatt->ia_type);
- gf_stat->ia_nlink = iatt->ia_nlink ;
- gf_stat->ia_uid = iatt->ia_uid ;
- gf_stat->ia_gid = iatt->ia_gid ;
- gf_stat->ia_rdev = iatt->ia_rdev ;
- gf_stat->ia_size = iatt->ia_size ;
- gf_stat->ia_blksize = iatt->ia_blksize ;
- gf_stat->ia_blocks = iatt->ia_blocks ;
- gf_stat->ia_atime = iatt->ia_atime ;
- gf_stat->ia_atime_nsec = iatt->ia_atime_nsec ;
- gf_stat->ia_mtime = iatt->ia_mtime ;
- gf_stat->ia_mtime_nsec = iatt->ia_mtime_nsec ;
- gf_stat->ia_ctime = iatt->ia_ctime ;
- gf_stat->ia_ctime_nsec = iatt->ia_ctime_nsec ;
+ int ret = -1;
+ int index = 0;
+ char *key = NULL;
+ char *value = NULL;
+ gfx_dict_pair *xpair = NULL;
+ dict_t *this = NULL;
+ unsigned char *uuid = NULL;
+ struct iatt *iatt = NULL;
+ struct mdata_iatt *mdata_iatt = NULL;
+
+ if (!to || !dict)
+ goto out;
+
+ if (dict->count < 0) {
+ /* indicates NULL dict was passed for encoding */
+ ret = 0;
+ goto out;
+ }
+
+ this = dict_new();
+ if (!this)
+ goto out;
+
+ for (index = 0; index < dict->pairs.pairs_len; index++) {
+ ret = -1;
+ xpair = &dict->pairs.pairs_val[index];
+
+ key = xpair->key.key_val;
+ switch (xpair->value.type) {
+ /* Add more type here */
+ case GF_DATA_TYPE_INT:
+ ret = dict_set_int64(this, key,
+ xpair->value.gfx_value_u.value_int);
+ break;
+ case GF_DATA_TYPE_UINT:
+ ret = dict_set_uint64(this, key,
+ xpair->value.gfx_value_u.value_uint);
+ break;
+ case GF_DATA_TYPE_DOUBLE:
+ ret = dict_set_double(this, key,
+ xpair->value.gfx_value_u.value_dbl);
+ break;
+ case GF_DATA_TYPE_STR:
+ value = GF_MALLOC(
+ xpair->value.gfx_value_u.val_string.val_string_len + 1,
+ gf_common_mt_char);
+ if (!value) {
+ errno = ENOMEM;
+ goto out;
+ }
+ memcpy(value,
+ xpair->value.gfx_value_u.val_string.val_string_val,
+ xpair->value.gfx_value_u.val_string.val_string_len);
+ value[xpair->value.gfx_value_u.val_string.val_string_len] =
+ '\0';
+ free(xpair->value.gfx_value_u.val_string.val_string_val);
+ ret = dict_set_dynstr(this, key, value);
+ break;
+ case GF_DATA_TYPE_GFUUID:
+ uuid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t);
+ if (!uuid) {
+ errno = ENOMEM;
+ goto out;
+ }
+ memcpy(uuid, xpair->value.gfx_value_u.uuid, sizeof(uuid_t));
+ ret = dict_set_gfuuid(this, key, uuid, false);
+ break;
+ case GF_DATA_TYPE_IATT:
+ iatt = GF_CALLOC(1, sizeof(struct iatt), gf_common_mt_char);
+ if (!iatt) {
+ errno = ENOMEM;
+ goto out;
+ }
+ gfx_stat_to_iattx(&xpair->value.gfx_value_u.iatt, iatt);
+ ret = dict_set_iatt(this, key, iatt, false);
+ break;
+ case GF_DATA_TYPE_MDATA:
+ mdata_iatt = GF_CALLOC(1, sizeof(struct mdata_iatt),
+ gf_common_mt_char);
+ if (!mdata_iatt) {
+ errno = ENOMEM;
+ gf_msg(THIS->name, GF_LOG_ERROR, ENOMEM, LG_MSG_NO_MEMORY,
+ "failed to allocate memory. key: %s", key);
+ ret = -1;
+ goto out;
+ }
+ gfx_mdata_iatt_to_mdata_iatt(
+ &xpair->value.gfx_value_u.mdata_iatt, mdata_iatt);
+ ret = dict_set_mdata(this, key, mdata_iatt, false);
+ if (ret != 0) {
+ GF_FREE(mdata_iatt);
+ gf_msg(THIS->name, GF_LOG_ERROR, ENOMEM,
+ LG_MSG_DICT_SET_FAILED,
+ "failed to set the key (%s)"
+ " into dict",
+ key);
+ ret = -1;
+ goto out;
+ }
+ break;
+ case GF_DATA_TYPE_PTR:
+ case GF_DATA_TYPE_STR_OLD:
+ value = GF_MALLOC(xpair->value.gfx_value_u.other.other_len + 1,
+ gf_common_mt_char);
+ if (!value) {
+ errno = ENOMEM;
+ goto out;
+ }
+ memcpy(value, xpair->value.gfx_value_u.other.other_val,
+ xpair->value.gfx_value_u.other.other_len);
+ value[xpair->value.gfx_value_u.other.other_len] = '\0';
+ free(xpair->value.gfx_value_u.other.other_val);
+ ret = dict_set_dynptr(this, key, value,
+ xpair->value.gfx_value_u.other.other_len);
+ break;
+ default:
+ ret = 0;
+ /* Unknown type and ptr type is not sent on wire */
+ break;
+ }
+ if (ret) {
+ gf_msg_debug(THIS->name, ENOMEM,
+ "failed to set the key (%s) into dict", key);
+ }
+ free(xpair->key.key_val);
+ }
+
+ free(dict->pairs.pairs_val);
+ ret = 0;
+
+ /* If everything is fine, assign the dictionary to target */
+ *to = this;
+ this = NULL;
+
+out:
+ if (this)
+ dict_unref(this);
+
+ return ret;
}
#endif /* !_GLUSTERFS3_H */
diff --git a/rpc/xdr/src/glusterfs4-xdr.x b/rpc/xdr/src/glusterfs4-xdr.x
new file mode 100644
index 00000000000..d3b1d0dfaf0
--- /dev/null
+++ b/rpc/xdr/src/glusterfs4-xdr.x
@@ -0,0 +1,797 @@
+/*
+ * Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+#ifdef RPC_XDR
+%#include "rpc-pragmas.h"
+#endif
+%#include <glusterfs/glusterfs-fops.h>
+%#include "glusterfs3-xdr.h"
+
+/* Need to consume iattx and new dict in all the fops */
+struct gfx_iattx {
+ opaque ia_gfid[16];
+
+ unsigned hyper ia_flags;
+ unsigned hyper ia_ino; /* inode number */
+ unsigned hyper ia_dev; /* backing device ID */
+ unsigned hyper ia_rdev; /* device ID (if special file) */
+ unsigned hyper ia_size; /* file size in bytes */
+ unsigned hyper ia_blocks; /* number of 512B blocks allocated */
+ unsigned hyper ia_attributes; /* chattr related:compressed, immutable,
+ * append only, encrypted etc.*/
+ unsigned hyper ia_attributes_mask; /* Mask for the attributes */
+
+ hyper ia_atime; /* last access time */
+ hyper ia_mtime; /* last modification time */
+ hyper ia_ctime; /* last status change time */
+ hyper ia_btime; /* creation time. Fill using statx */
+
+ unsigned int ia_atime_nsec;
+ unsigned int ia_mtime_nsec;
+ unsigned int ia_ctime_nsec;
+ unsigned int ia_btime_nsec;
+ unsigned int ia_nlink; /* Link count */
+ unsigned int ia_uid; /* user ID of owner */
+ unsigned int ia_gid; /* group ID of owner */
+ unsigned int ia_blksize; /* blocksize for filesystem I/O */
+ unsigned int mode; /* type of file and rwx mode */
+};
+
+struct gfx_mdata_iatt {
+ hyper ia_atime; /* last access time */
+ hyper ia_mtime; /* last modification time */
+ hyper ia_ctime; /* last status change time */
+
+ unsigned int ia_atime_nsec;
+ unsigned int ia_mtime_nsec;
+ unsigned int ia_ctime_nsec;
+};
+
+union gfx_value switch (int type) {
+ case GF_DATA_TYPE_INT:
+ hyper value_int;
+ case GF_DATA_TYPE_UINT:
+ unsigned hyper value_uint;
+ case GF_DATA_TYPE_DOUBLE:
+ double value_dbl;
+ case GF_DATA_TYPE_STR:
+ opaque val_string<>;
+ case GF_DATA_TYPE_IATT:
+ gfx_iattx iatt;
+ case GF_DATA_TYPE_GFUUID:
+ opaque uuid[16];
+ case GF_DATA_TYPE_PTR:
+ case GF_DATA_TYPE_STR_OLD:
+ opaque other<>;
+ case GF_DATA_TYPE_MDATA:
+ gfx_mdata_iatt mdata_iatt;
+};
+
+/* AUTH */
+/* This is used in the rpc header part itself, And not program payload.
+ Avoid sending large data load here. Allowed maximum is 400 bytes.
+ Ref: http://tools.ietf.org/html/rfc5531#section-8.2
+ this is also handled in xdr-common.h
+*/
+struct auth_glusterfs_params_v3 {
+ int pid;
+ unsigned int uid;
+ unsigned int gid;
+
+ /* flags */
+ /* Makes sense to use it for each bits */
+ /* 0x1 == IS_INTERNAL? */
+ /* Another 31 bits are reserved */
+ unsigned int flags;
+
+ /* birth time of the frame / call */
+ unsigned int ctime_nsec; /* good to have 32bit for this */
+ unsigned hyper ctime_sec;
+
+ unsigned int groups<>;
+ opaque lk_owner<>;
+};
+
+struct gfx_dict_pair {
+ opaque key<>;
+ gfx_value value;
+};
+
+struct gfx_dict {
+ unsigned int xdr_size;
+ int count;
+ gfx_dict_pair pairs<>;
+};
+
+/* FOPS */
+struct gfx_common_rsp {
+ int op_ret;
+ int op_errno;
+ gfx_dict xdata; /* Extra data */
+};
+
+struct gfx_common_iatt_rsp {
+ int op_ret;
+ int op_errno;
+ gfx_dict xdata;
+ gfx_iattx stat;
+};
+
+struct gfx_common_2iatt_rsp {
+ int op_ret;
+ int op_errno;
+ gfx_dict xdata;
+ gfx_iattx prestat;
+ gfx_iattx poststat;
+};
+
+struct gfx_common_3iatt_rsp {
+ int op_ret;
+ int op_errno;
+ gfx_dict xdata; /* Extra data */
+ gfx_iattx stat;
+ gfx_iattx preparent;
+ gfx_iattx postparent;
+};
+
+struct gfx_fsetattr_req {
+ opaque gfid[16];
+ hyper fd;
+ gfx_iattx stbuf;
+ int valid;
+ gfx_dict xdata; /* Extra data */
+};
+
+struct gfx_rchecksum_req {
+ opaque gfid[16];
+ hyper fd;
+ unsigned hyper offset;
+ unsigned int len;
+ unsigned int flags;
+ gfx_dict xdata; /* Extra data */
+};
+
+struct gfx_icreate_req {
+ opaque gfid[16];
+ unsigned int mode;
+ gfx_dict xdata;
+};
+
+struct gfx_put_req {
+ opaque pargfid[16];
+ string bname<>;
+ unsigned int mode;
+ unsigned int umask;
+ unsigned int flag;
+ u_quad_t offset;
+ unsigned int size;
+ gfx_dict xattr;
+ gfx_dict xdata;
+};
+
+struct gfx_namelink_req {
+ opaque pargfid[16];
+ string bname<>;
+ gfx_dict xdata;
+};
+
+/* Define every fops */
+/* Changes from Version 3:
+ 1. Dict has its own type instead of being opaque
+ 2. Iattx instead of iatt on wire
+ 3. gfid has 4 extra bytes so it can be used for future
+*/
+struct gfx_stat_req {
+ opaque gfid[16];
+ gfx_dict xdata;
+};
+
+struct gfx_readlink_req {
+ opaque gfid[16];
+ unsigned int size;
+ gfx_dict xdata; /* Extra data */
+};
+
+struct gfx_readlink_rsp {
+ int op_ret;
+ int op_errno;
+ gfx_dict xdata; /* Extra data */
+ gfx_iattx buf;
+ string path<>; /* NULL terminated */
+};
+
+struct gfx_mknod_req {
+ opaque pargfid[16];
+ u_quad_t dev;
+ unsigned int mode;
+ unsigned int umask;
+ string bname<>; /* NULL terminated */
+ gfx_dict xdata; /* Extra data */
+};
+
+struct gfx_mkdir_req {
+ opaque pargfid[16];
+ unsigned int mode;
+ unsigned int umask;
+ string bname<>; /* NULL terminated */
+ gfx_dict xdata; /* Extra data */
+};
+
+struct gfx_unlink_req {
+ opaque pargfid[16];
+ string bname<>; /* NULL terminated */
+ unsigned int xflags;
+ gfx_dict xdata; /* Extra data */
+};
+
+
+struct gfx_rmdir_req {
+ opaque pargfid[16];
+ int xflags;
+ string bname<>; /* NULL terminated */
+ gfx_dict xdata; /* Extra data */
+};
+
+struct gfx_symlink_req {
+ opaque pargfid[16];
+ string bname<>;
+ unsigned int umask;
+ string linkname<>;
+ gfx_dict xdata; /* Extra data */
+};
+
+struct gfx_rename_req {
+ opaque oldgfid[16];
+ opaque newgfid[16];
+ string oldbname<>; /* NULL terminated */
+ string newbname<>; /* NULL terminated */
+ gfx_dict xdata; /* Extra data */
+};
+
+struct gfx_rename_rsp {
+ int op_ret;
+ int op_errno;
+ gfx_dict xdata; /* Extra data */
+ gfx_iattx stat;
+ gfx_iattx preoldparent;
+ gfx_iattx postoldparent;
+ gfx_iattx prenewparent;
+ gfx_iattx postnewparent;
+};
+
+
+ struct gfx_link_req {
+ opaque oldgfid[16];
+ opaque newgfid[16];
+ string newbname<>;
+ gfx_dict xdata; /* Extra data */
+};
+
+ struct gfx_truncate_req {
+ opaque gfid[16];
+ u_quad_t offset;
+ gfx_dict xdata; /* Extra data */
+};
+
+ struct gfx_open_req {
+ opaque gfid[16];
+ unsigned int flags;
+ gfx_dict xdata; /* Extra data */
+};
+
+struct gfx_open_rsp {
+ int op_ret;
+ int op_errno;
+ gfx_dict xdata; /* Extra data */
+ quad_t fd;
+};
+
+struct gfx_opendir_req {
+ opaque gfid[16];
+ gfx_dict xdata; /* Extra data */
+} ;
+
+
+ struct gfx_read_req {
+ opaque gfid[16];
+ quad_t fd;
+ u_quad_t offset;
+ unsigned int size;
+ unsigned int flag;
+ gfx_dict xdata; /* Extra data */
+};
+ struct gfx_read_rsp {
+ int op_ret;
+ int op_errno;
+ gfx_iattx stat;
+ unsigned int size;
+ gfx_dict xdata; /* Extra data */
+} ;
+
+struct gfx_lookup_req {
+ opaque gfid[16];
+ opaque pargfid[16];
+ unsigned int flags;
+ string bname<>;
+ gfx_dict xdata; /* Extra data */
+};
+
+
+ struct gfx_write_req {
+ opaque gfid[16];
+ quad_t fd;
+ u_quad_t offset;
+ unsigned int size;
+ unsigned int flag;
+ gfx_dict xdata; /* Extra data */
+};
+
+ struct gfx_statfs_req {
+ opaque gfid[16];
+ gfx_dict xdata; /* Extra data */
+} ;
+ struct gfx_statfs_rsp {
+ int op_ret;
+ int op_errno;
+ gfx_dict xdata; /* Extra data */
+ gf_statfs statfs;
+} ;
+
+ struct gfx_lk_req {
+ opaque gfid[16];
+ int64_t fd;
+ unsigned int cmd;
+ unsigned int type;
+ gf_proto_flock flock;
+ gfx_dict xdata; /* Extra data */
+} ;
+ struct gfx_lk_rsp {
+ int op_ret;
+ int op_errno;
+ gfx_dict xdata; /* Extra data */
+ gf_proto_flock flock;
+} ;
+
+struct gfx_lease_req {
+ opaque gfid[16];
+ gf_proto_lease lease;
+ gfx_dict xdata; /* Extra data */
+} ;
+
+struct gfx_lease_rsp {
+ int op_ret;
+ int op_errno;
+ gfx_dict xdata; /* Extra data */
+ gf_proto_lease lease;
+} ;
+
+struct gfx_recall_lease_req {
+ opaque gfid[16];
+ unsigned int lease_type;
+ opaque tid[16];
+ gfx_dict xdata; /* Extra data */
+} ;
+
+ struct gfx_inodelk_req {
+ opaque gfid[16];
+ unsigned int cmd;
+ unsigned int type;
+ gf_proto_flock flock;
+ string volume<>;
+ gfx_dict xdata; /* Extra data */
+} ;
+
+struct gfx_finodelk_req {
+ opaque gfid[16];
+ quad_t fd;
+ unsigned int cmd;
+ unsigned int type;
+ gf_proto_flock flock;
+ string volume<>;
+ gfx_dict xdata; /* Extra data */
+} ;
+
+
+ struct gfx_flush_req {
+ opaque gfid[16];
+ quad_t fd;
+ gfx_dict xdata; /* Extra data */
+} ;
+
+
+ struct gfx_fsync_req {
+ opaque gfid[16];
+ quad_t fd;
+ unsigned int data;
+ gfx_dict xdata; /* Extra data */
+} ;
+
+ struct gfx_setxattr_req {
+ opaque gfid[16];
+ unsigned int flags;
+ gfx_dict dict;
+ gfx_dict xdata; /* Extra data */
+} ;
+
+
+
+ struct gfx_fsetxattr_req {
+ opaque gfid[16];
+ int64_t fd;
+ unsigned int flags;
+ gfx_dict dict;
+ gfx_dict xdata; /* Extra data */
+} ;
+
+
+
+ struct gfx_xattrop_req {
+ opaque gfid[16];
+ unsigned int flags;
+ gfx_dict dict;
+ gfx_dict xdata; /* Extra data */
+} ;
+
+struct gfx_common_dict_rsp {
+ int op_ret;
+ int op_errno;
+ gfx_dict xdata; /* Extra data */
+ gfx_dict dict;
+ gfx_iattx prestat;
+ gfx_iattx poststat;
+};
+
+
+ struct gfx_fxattrop_req {
+ opaque gfid[16];
+ quad_t fd;
+ unsigned int flags;
+ gfx_dict dict;
+ gfx_dict xdata; /* Extra data */
+} ;
+
+ struct gfx_getxattr_req {
+ opaque gfid[16];
+ unsigned int namelen;
+ string name<>;
+ gfx_dict xdata; /* Extra data */
+} ;
+
+
+ struct gfx_fgetxattr_req {
+ opaque gfid[16];
+ quad_t fd;
+ unsigned int namelen;
+ string name<>;
+ gfx_dict xdata; /* Extra data */
+} ;
+
+ struct gfx_removexattr_req {
+ opaque gfid[16];
+ string name<>;
+ gfx_dict xdata; /* Extra data */
+} ;
+
+ struct gfx_fremovexattr_req {
+ opaque gfid[16];
+ quad_t fd;
+ string name<>;
+ gfx_dict xdata; /* Extra data */
+} ;
+
+
+ struct gfx_fsyncdir_req {
+ opaque gfid[16];
+ quad_t fd;
+ int data;
+ gfx_dict xdata; /* Extra data */
+} ;
+
+ struct gfx_readdir_req {
+ opaque gfid[16];
+ quad_t fd;
+ u_quad_t offset;
+ unsigned int size;
+ gfx_dict xdata; /* Extra data */
+};
+
+ struct gfx_readdirp_req {
+ opaque gfid[16];
+ quad_t fd;
+ u_quad_t offset;
+ unsigned int size;
+ gfx_dict xdata;
+} ;
+
+
+struct gfx_access_req {
+ opaque gfid[16];
+ unsigned int mask;
+ gfx_dict xdata; /* Extra data */
+} ;
+
+
+struct gfx_create_req {
+ opaque pargfid[16];
+ unsigned int flags;
+ unsigned int mode;
+ unsigned int umask;
+ string bname<>;
+ gfx_dict xdata; /* Extra data */
+} ;
+struct gfx_create_rsp {
+ int op_ret;
+ int op_errno;
+ gfx_dict xdata; /* Extra data */
+ gfx_iattx stat;
+ u_quad_t fd;
+ gfx_iattx preparent;
+ gfx_iattx postparent;
+} ;
+
+struct gfx_ftruncate_req {
+ opaque gfid[16];
+ quad_t fd;
+ u_quad_t offset;
+ gfx_dict xdata; /* Extra data */
+} ;
+
+
+struct gfx_fstat_req {
+ opaque gfid[16];
+ quad_t fd;
+ gfx_dict xdata; /* Extra data */
+} ;
+
+
+struct gfx_entrylk_req {
+ opaque gfid[16];
+ unsigned int cmd;
+ unsigned int type;
+ u_quad_t namelen;
+ string name<>;
+ string volume<>;
+ gfx_dict xdata; /* Extra data */
+};
+
+struct gfx_fentrylk_req {
+ opaque gfid[16];
+ quad_t fd;
+ unsigned int cmd;
+ unsigned int type;
+ u_quad_t namelen;
+ string name<>;
+ string volume<>;
+ gfx_dict xdata; /* Extra data */
+};
+
+ struct gfx_setattr_req {
+ opaque gfid[16];
+ gfx_iattx stbuf;
+ int valid;
+ gfx_dict xdata; /* Extra data */
+} ;
+
+ struct gfx_fallocate_req {
+ opaque gfid[16];
+ quad_t fd;
+ unsigned int flags;
+ u_quad_t offset;
+ u_quad_t size;
+ gfx_dict xdata; /* Extra data */
+} ;
+
+struct gfx_discard_req {
+ opaque gfid[16];
+ quad_t fd;
+ u_quad_t offset;
+ u_quad_t size;
+ gfx_dict xdata; /* Extra data */
+} ;
+
+struct gfx_zerofill_req {
+ opaque gfid[16];
+ quad_t fd;
+ u_quad_t offset;
+ u_quad_t size;
+ gfx_dict xdata;
+} ;
+
+struct gfx_rchecksum_rsp {
+ int op_ret;
+ int op_errno;
+ gfx_dict xdata; /* Extra data */
+ unsigned int flags;
+ unsigned int weak_checksum;
+ opaque strong_checksum<>;
+} ;
+
+
+struct gfx_ipc_req {
+ int op;
+ gfx_dict xdata;
+};
+
+
+struct gfx_seek_req {
+ opaque gfid[16];
+ quad_t fd;
+ u_quad_t offset;
+ int what;
+ gfx_dict xdata;
+};
+
+struct gfx_seek_rsp {
+ int op_ret;
+ int op_errno;
+ gfx_dict xdata;
+ u_quad_t offset;
+};
+
+
+ struct gfx_setvolume_req {
+ gfx_dict dict;
+} ;
+
+ struct gfx_copy_file_range_req {
+ opaque gfid1[16];
+ opaque gfid2[16];
+ quad_t fd_in;
+ quad_t fd_out;
+ u_quad_t off_in;
+ u_quad_t off_out;
+ unsigned int size;
+ unsigned int flag;
+ gfx_dict xdata; /* Extra data */
+};
+
+ struct gfx_setvolume_rsp {
+ int op_ret;
+ int op_errno;
+ gfx_dict dict;
+} ;
+
+
+ struct gfx_getspec_req {
+ unsigned int flags;
+ string key<>;
+ gfx_dict xdata; /* Extra data */
+} ;
+ struct gfx_getspec_rsp {
+ int op_ret;
+ int op_errno;
+ string spec<>;
+ gfx_dict xdata; /* Extra data */
+} ;
+
+
+ struct gfx_notify_req {
+ unsigned int flags;
+ string buf<>;
+ gfx_dict xdata; /* Extra data */
+} ;
+ struct gfx_notify_rsp {
+ int op_ret;
+ int op_errno;
+ unsigned int flags;
+ string buf<>;
+ gfx_dict xdata; /* Extra data */
+} ;
+
+struct gfx_releasedir_req {
+ opaque gfid[16];
+ quad_t fd;
+ gfx_dict xdata; /* Extra data */
+} ;
+
+struct gfx_release_req {
+ opaque gfid[16];
+ quad_t fd;
+ gfx_dict xdata; /* Extra data */
+} ;
+
+struct gfx_dirlist {
+ u_quad_t d_ino;
+ u_quad_t d_off;
+ unsigned int d_len;
+ unsigned int d_type;
+ string name<>;
+ gfx_dirlist *nextentry;
+};
+
+
+struct gfx_readdir_rsp {
+ int op_ret;
+ int op_errno;
+ gfx_dict xdata; /* Extra data */
+ gfx_dirlist *reply;
+};
+
+struct gfx_dirplist {
+ u_quad_t d_ino;
+ u_quad_t d_off;
+ unsigned int d_len;
+ unsigned int d_type;
+ string name<>;
+ gfx_iattx stat;
+ gfx_dict dict;
+ gfx_dirplist *nextentry;
+};
+
+struct gfx_readdirp_rsp {
+ int op_ret;
+ int op_errno;
+ gfx_dict xdata; /* Extra data */
+ gfx_dirplist *reply;
+};
+
+struct gfx_set_lk_ver_rsp {
+ int op_ret;
+ int op_errno;
+ gfx_dict xdata;
+ int lk_ver;
+};
+
+struct gfx_set_lk_ver_req {
+ string uid<>;
+ int lk_ver;
+};
+
+struct gfx_event_notify_req {
+ int op;
+ gfx_dict dict;
+};
+
+
+struct gfx_getsnap_name_uuid_req {
+ gfx_dict dict;
+};
+
+struct gfx_getsnap_name_uuid_rsp {
+ int op_ret;
+ int op_errno;
+ gfx_dict dict;
+ string op_errstr<>;
+};
+
+struct gfx_getactivelk_rsp {
+ int op_ret;
+ int op_errno;
+ gfx_dict xdata;
+ gfs3_locklist *reply;
+};
+
+struct gfx_getactivelk_req {
+ opaque gfid[16];
+ gfx_dict xdata;
+};
+
+struct gfx_setactivelk_req {
+ opaque gfid[16];
+ gfs3_locklist *request;
+ gfx_dict xdata;
+};
+
+struct gfs4_inodelk_contention_req {
+ opaque gfid[16];
+ struct gf_proto_flock flock;
+ unsigned int pid;
+ string domain<>;
+ opaque xdata<>;
+};
+
+struct gfs4_entrylk_contention_req {
+ opaque gfid[16];
+ unsigned int type;
+ unsigned int pid;
+ string name<>;
+ string domain<>;
+ opaque xdata<>;
+};
diff --git a/rpc/xdr/src/libgfxdr.sym b/rpc/xdr/src/libgfxdr.sym
new file mode 100644
index 00000000000..8fa0e0ddd8a
--- /dev/null
+++ b/rpc/xdr/src/libgfxdr.sym
@@ -0,0 +1,350 @@
+xdr_auth_glusterfs_parms
+xdr_auth_glusterfs_parms_v2
+xdr_auth_glusterfs_params_v3
+xdr_changelog_event_req
+xdr_changelog_event_rsp
+xdr_changelog_probe_req
+xdr_changelog_probe_rsp
+xdr_dirpath
+xdr_free_exports_list
+xdr_free_mountlist
+xdr_gd1_mgmt_brick_op_req
+xdr_gd1_mgmt_brick_op_rsp
+xdr_gd1_mgmt_cluster_lock_req
+xdr_gd1_mgmt_cluster_lock_rsp
+xdr_gd1_mgmt_cluster_unlock_req
+xdr_gd1_mgmt_cluster_unlock_rsp
+xdr_gd1_mgmt_commit_op_req
+xdr_gd1_mgmt_commit_op_rsp
+xdr_gd1_mgmt_friend_req
+xdr_gd1_mgmt_friend_rsp
+xdr_gd1_mgmt_friend_update
+xdr_gd1_mgmt_friend_update_rsp
+xdr_gd1_mgmt_probe_req
+xdr_gd1_mgmt_probe_rsp
+xdr_gd1_mgmt_stage_op_req
+xdr_gd1_mgmt_stage_op_rsp
+xdr_gd1_mgmt_v3_brick_op_req
+xdr_gd1_mgmt_v3_brick_op_rsp
+xdr_gd1_mgmt_v3_commit_req
+xdr_gd1_mgmt_v3_commit_rsp
+xdr_gd1_mgmt_v3_post_commit_req
+xdr_gd1_mgmt_v3_post_commit_rsp
+xdr_gd1_mgmt_v3_lock_req
+xdr_gd1_mgmt_v3_lock_rsp
+xdr_gd1_mgmt_v3_post_val_req
+xdr_gd1_mgmt_v3_post_val_rsp
+xdr_gd1_mgmt_v3_pre_val_req
+xdr_gd1_mgmt_v3_pre_val_rsp
+xdr_gd1_mgmt_v3_unlock_req
+xdr_gd1_mgmt_v3_unlock_rsp
+xdr_gf1_cli_fsm_log_req
+xdr_gf1_cli_fsm_log_rsp
+xdr_gf1_cli_getwd_req
+xdr_gf1_cli_getwd_rsp
+xdr_gf1_cli_mount_req
+xdr_gf1_cli_mount_rsp
+xdr_gf1_cli_peer_list_req
+xdr_gf1_cli_peer_list_rsp
+xdr_gf1_cli_umount_req
+xdr_gf1_cli_umount_rsp
+xdr_gf_cli_req
+xdr_gf_cli_rsp
+xdr_gf_common_rsp
+xdr_gf_dump_req
+xdr_gf_dump_rsp
+xdr_gf_event_notify_req
+xdr_gf_event_notify_rsp
+xdr_gf_getsnap_name_uuid_req
+xdr_gf_getsnap_name_uuid_rsp
+xdr_gf_getspec_req
+xdr_gf_getspec_rsp
+xdr_gf_get_volume_info_req
+xdr_gf_get_volume_info_rsp
+xdr_gf_mgmt_hndsk_req
+xdr_gf_mgmt_hndsk_rsp
+xdr_gfs3_access_req
+xdr_gfs3_cbk_cache_invalidation_req
+xdr_gfs3_compound_req
+xdr_gfs3_compound_rsp
+xdr_gfs3_create_req
+xdr_gfs3_create_rsp
+xdr_gfs3_discard_req
+xdr_gfs3_discard_rsp
+xdr_gfs3_entrylk_req
+xdr_gfs3_fallocate_req
+xdr_gfs3_fallocate_rsp
+xdr_gfs3_fentrylk_req
+xdr_gfs3_fgetxattr_req
+xdr_gfs3_fgetxattr_rsp
+xdr_gfs3_finodelk_req
+xdr_gfs3_flush_req
+xdr_gfs3_fremovexattr_req
+xdr_gfs3_fsetattr_req
+xdr_gfs3_fsetattr_req_v2
+xdr_gfs3_fsetattr_rsp
+xdr_gfs3_fsetxattr_req
+xdr_gfs3_fstat_req
+xdr_gfs3_fstat_rsp
+xdr_gfs3_fsyncdir_req
+xdr_gfs3_fsync_req
+xdr_gfs3_fsync_rsp
+xdr_gfs3_ftruncate_req
+xdr_gfs3_ftruncate_rsp
+xdr_gfs3_fxattrop_req
+xdr_gfs3_fxattrop_rsp
+xdr_gfs3_getactivelk_req
+xdr_gfs3_getactivelk_rsp
+xdr_gfs3_getxattr_req
+xdr_gfs3_getxattr_rsp
+xdr_gfs3_inodelk_req
+xdr_gfs3_ipc_req
+xdr_gfs3_ipc_rsp
+xdr_gfs3_lease_req
+xdr_gfs3_lease_rsp
+xdr_gfs3_link_req
+xdr_gfs3_link_rsp
+xdr_gfs3_lk_req
+xdr_gfs3_lk_rsp
+xdr_gfs3_lookup_req
+xdr_gfs3_lookup_rsp
+xdr_gfs3_mkdir_req
+xdr_gfs3_mkdir_rsp
+xdr_gfs3_mknod_req
+xdr_gfs3_mknod_rsp
+xdr_gfs3_opendir_req
+xdr_gfs3_opendir_rsp
+xdr_gfs3_open_req
+xdr_gfs3_open_rsp
+xdr_gfs3_rchecksum_req
+xdr_gfs3_rchecksum_req_v2
+xdr_gfs3_rchecksum_rsp
+xdr_gfs3_readdirp_req
+xdr_gfs3_readdirp_rsp
+xdr_gfs3_readdir_req
+xdr_gfs3_readdir_rsp
+xdr_gfs3_readlink_req
+xdr_gfs3_readlink_rsp
+xdr_gfs3_read_req
+xdr_gfs3_read_rsp
+xdr_gfs3_recall_lease_req
+xdr_gfs3_releasedir_req
+xdr_gfs3_release_req
+xdr_gfs3_removexattr_req
+xdr_gfs3_rename_req
+xdr_gfs3_rename_rsp
+xdr_gfs3_rmdir_req
+xdr_gfs3_rmdir_rsp
+xdr_gfs3_seek_req
+xdr_gfs3_seek_rsp
+xdr_gfs3_setactivelk_req
+xdr_gfs3_setactivelk_rsp
+xdr_gfs3_setattr_req
+xdr_gfs3_setattr_rsp
+xdr_gfs3_setxattr_req
+xdr_gfs3_statfs_req
+xdr_gfs3_statfs_rsp
+xdr_gfs3_stat_req
+xdr_gfs3_stat_rsp
+xdr_gfs3_symlink_req
+xdr_gfs3_symlink_rsp
+xdr_gfs3_truncate_req
+xdr_gfs3_truncate_rsp
+xdr_gfs3_unlink_req
+xdr_gfs3_unlink_rsp
+xdr_gfs3_write_req
+xdr_gfs3_write_rsp
+xdr_gfs3_xattrop_req
+xdr_gfs3_xattrop_rsp
+xdr_gfs3_zerofill_req
+xdr_gfs3_zerofill_rsp
+xdr_gfs4_entrylk_contention_req
+xdr_gfs4_entrylk_contention_rsp
+xdr_gfs4_icreate_req
+xdr_gfs4_icreate_rsp
+xdr_gfs4_inodelk_contention_req
+xdr_gfs4_inodelk_contention_rsp
+xdr_gfs4_namelink_req
+xdr_gfs4_namelink_rsp
+xdr_gf_set_lk_ver_req
+xdr_gf_set_lk_ver_rsp
+xdr_gf_setvolume_req
+xdr_gf_setvolume_rsp
+xdr_gf_statedump
+xdr_length_round_up
+xdr_mon
+xdr_mountres3
+xdr_mountstat3
+xdr_nlm_sm_status
+xdr_pmap_brick_by_port_req
+xdr_pmap_brick_by_port_rsp
+xdr_pmap_port_by_brick_req
+xdr_pmap_port_by_brick_rsp
+xdr_pmap_signin_req
+xdr_pmap_signin_rsp
+xdr_pmap_signout_req
+xdr_pmap_signout_rsp
+xdr_serialize_access3res
+xdr_serialize_commit3res
+xdr_serialize_create3res
+xdr_serialize_exports
+xdr_serialize_fsinfo3res
+xdr_serialize_fsstat3res
+xdr_serialize_generic
+xdr_serialize_getaclreply
+xdr_serialize_getattr3res
+xdr_serialize_link3res
+xdr_serialize_lookup3res
+xdr_serialize_mkdir3res
+xdr_serialize_mknod3res
+xdr_serialize_mountlist
+xdr_serialize_mountres3
+xdr_serialize_mountstat3
+xdr_serialize_nlm4_res
+xdr_serialize_nlm4_shareres
+xdr_serialize_nlm4_testargs
+xdr_serialize_nlm4_testres
+xdr_serialize_pathconf3res
+xdr_serialize_read3res
+xdr_serialize_read3res_nocopy
+xdr_serialize_readdir3res
+xdr_serialize_readdirp3res
+xdr_serialize_readlink3res
+xdr_serialize_remove3res
+xdr_serialize_rename3res
+xdr_serialize_rmdir3res
+xdr_serialize_setaclreply
+xdr_serialize_setattr3res
+xdr_serialize_symlink3res
+xdr_serialize_write3res
+xdr_sm_stat
+xdr_sm_stat_res
+xdr_to_access3args
+xdr_to_commit3args
+xdr_to_create3args
+xdr_to_fsinfo3args
+xdr_to_fsstat3args
+xdr_to_generic
+xdr_to_getaclargs
+xdr_to_getattr3args
+xdr_to_link3args
+xdr_to_lookup3args
+xdr_to_mkdir3args
+xdr_to_mknod3args
+xdr_to_mountpath
+xdr_to_nlm4_cancelargs
+xdr_to_nlm4_freeallargs
+xdr_to_nlm4_lockargs
+xdr_to_nlm4_shareargs
+xdr_to_nlm4_testargs
+xdr_to_nlm4_unlockargs
+xdr_to_pathconf3args
+xdr_to_read3args
+xdr_to_readdir3args
+xdr_to_readdirp3args
+xdr_to_readlink3args
+xdr_to_remove3args
+xdr_to_rename3args
+xdr_to_rmdir3args
+xdr_to_setaclargs
+xdr_to_setattr3args
+xdr_to_symlink3args
+xdr_to_write3args
+xdr_vector_round_up
+xdr_gfx_read_rsp
+xdr_gfx_iattx
+xdr_gfx_mdata_iatt
+xdr_gfx_value
+xdr_gfx_dict_pair
+xdr_gfx_dict
+xdr_gfx_common_rsp
+xdr_gfx_common_iatt_rsp
+xdr_gfx_common_2iatt_rsp
+xdr_gfx_common_3iatt_rsp
+xdr_gfx_fsetattr_req
+xdr_gfx_rchecksum_req
+xdr_gfx_icreate_req
+xdr_gfx_namelink_req
+xdr_gfx_stat_req
+xdr_gfx_readlink_req
+xdr_gfx_readlink_rsp
+xdr_gfx_mknod_req
+xdr_gfx_mkdir_req
+xdr_gfx_unlink_req
+xdr_gfx_rmdir_req
+xdr_gfx_symlink_req
+xdr_gfx_rename_req
+xdr_gfx_rename_rsp
+xdr_gfx_link_req
+xdr_gfx_truncate_req
+xdr_gfx_open_req
+xdr_gfx_open_rsp
+xdr_gfx_opendir_req
+xdr_gfx_read_req
+xdr_gfx_read_rsp
+xdr_gfx_lookup_req
+xdr_gfx_write_req
+xdr_gfx_statfs_req
+xdr_gfx_statfs_rsp
+xdr_gfx_lk_req
+xdr_gfx_lk_rsp
+xdr_gfx_lease_req
+xdr_gfx_lease_rsp
+xdr_gfx_recall_lease_req
+xdr_gfx_inodelk_req
+xdr_gfx_finodelk_req
+xdr_gfx_flush_req
+xdr_gfx_fsync_req
+xdr_gfx_setxattr_req
+xdr_gfx_fsetxattr_req
+xdr_gfx_xattrop_req
+xdr_gfx_common_dict_rsp
+xdr_gfx_fxattrop_req
+xdr_gfx_getxattr_req
+xdr_gfx_fgetxattr_req
+xdr_gfx_removexattr_req
+xdr_gfx_fremovexattr_req
+xdr_gfx_fsyncdir_req
+xdr_gfx_readdir_req
+xdr_gfx_readdirp_req
+xdr_gfx_access_req
+xdr_gfx_create_req
+xdr_gfx_create_rsp
+xdr_gfx_ftruncate_req
+xdr_gfx_fstat_req
+xdr_gfx_entrylk_req
+xdr_gfx_fentrylk_req
+xdr_gfx_setattr_req
+xdr_gfx_fallocate_req
+xdr_gfx_discard_req
+xdr_gfx_zerofill_req
+xdr_gfx_rchecksum_rsp
+xdr_gfx_ipc_req
+xdr_gfx_seek_req
+xdr_gfx_seek_rsp
+xdr_gfx_setvolume_req
+xdr_gfx_setvolume_rsp
+xdr_gfx_getspec_req
+xdr_gfx_getspec_rsp
+xdr_gfx_notify_req
+xdr_gfx_notify_rsp
+xdr_gfx_releasedir_req
+xdr_gfx_release_req
+xdr_gfx_dirlist
+xdr_gfx_readdir_rsp
+xdr_gfx_dirplist
+xdr_gfx_readdirp_rsp
+xdr_gfx_set_lk_ver_rsp
+xdr_gfx_set_lk_ver_req
+xdr_gfx_event_notify_req
+xdr_gfx_getsnap_name_uuid_req
+xdr_gfx_getsnap_name_uuid_rsp
+xdr_gfx_getactivelk_rsp
+xdr_gfx_getactivelk_req
+xdr_gfx_setactivelk_req
+xdr_gfx_put_req
+xdr_compound_req_v2
+xdr_gfx_compound_req
+xdr_compound_rsp_v2
+xdr_gfx_compound_rsp
+xdr_gfx_copy_file_range_req
diff --git a/rpc/xdr/src/mount3udp.x b/rpc/xdr/src/mount3udp.x
index 90b5b741abe..e8366df400c 100644
--- a/rpc/xdr/src/mount3udp.x
+++ b/rpc/xdr/src/mount3udp.x
@@ -1,26 +1,24 @@
/*
- Copyright (c) 2012 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
+#ifdef RPC_XDR
+%#include "rpc-pragmas.h"
+#endif
+%#include <glusterfs/compat.h>
+
/* This is used by rpcgen to auto generate the rpc stubs.
* mount3udp_svc.c is heavily modified though
*/
+%#include "xdr-nfs3.h"
+
const MNTUDPPATHLEN = 1024;
typedef string mntudpdirpath<MNTPATHLEN>;
diff --git a/rpc/xdr/src/msg-nfs3.c b/rpc/xdr/src/msg-nfs3.c
index 8078f23f097..d14a731b62a 100644
--- a/rpc/xdr/src/msg-nfs3.c
+++ b/rpc/xdr/src/msg-nfs3.c
@@ -1,27 +1,13 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include <sys/uio.h>
#include <rpc/rpc.h>
#include <rpc/xdr.h>
@@ -32,35 +18,34 @@
#include "xdr-generic.h"
#include "xdr-common.h"
-
/* Decode the mount path from the network message in inmsg
* into the memory referenced by outpath.iov_base.
* The size allocated for outpath.iov_base is outpath.iov_len.
* The size of the path extracted from the message is returned.
*/
ssize_t
-xdr_to_mountpath (struct iovec outpath, struct iovec inmsg)
+xdr_to_mountpath(struct iovec outpath, struct iovec inmsg)
{
- XDR xdr;
- ssize_t ret = -1;
- char *mntpath = NULL;
+ XDR xdr;
+ ssize_t ret = -1;
+ char *mntpath = NULL;
- if ((!outpath.iov_base) || (!inmsg.iov_base))
- return -1;
+ if ((!outpath.iov_base) || (!inmsg.iov_base))
+ return -1;
- xdrmem_create (&xdr, inmsg.iov_base, (unsigned int)inmsg.iov_len,
- XDR_DECODE);
+ xdrmem_create(&xdr, inmsg.iov_base, (unsigned int)inmsg.iov_len,
+ XDR_DECODE);
- mntpath = outpath.iov_base;
- if (!xdr_dirpath (&xdr, (dirpath *)&mntpath)) {
- ret = -1;
- goto ret;
- }
+ mntpath = outpath.iov_base;
+ if (!xdr_dirpath(&xdr, (dirpath *)&mntpath)) {
+ ret = -1;
+ goto ret;
+ }
- ret = xdr_decoded_length (xdr);
+ ret = xdr_decoded_length(xdr);
ret:
- return ret;
+ return ret;
}
/* Translate the mountres3 structure in res into XDR format into memory
@@ -68,485 +53,429 @@ ret:
* Returns the number of bytes used in encoding into XDR format.
*/
ssize_t
-xdr_serialize_mountres3 (struct iovec outmsg, mountres3 *res)
+xdr_serialize_mountres3(struct iovec outmsg, mountres3 *res)
{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_mountres3);
+ return xdr_serialize_generic(outmsg, (void *)res, (xdrproc_t)xdr_mountres3);
}
-
ssize_t
-xdr_serialize_mountbody (struct iovec outmsg, mountbody *mb)
+xdr_serialize_mountbody(struct iovec outmsg, mountbody *mb)
{
- return xdr_serialize_generic (outmsg, (void *)mb,
- (xdrproc_t)xdr_mountbody);
+ return xdr_serialize_generic(outmsg, (void *)mb, (xdrproc_t)xdr_mountbody);
}
ssize_t
-xdr_serialize_mountlist (struct iovec outmsg, mountlist *ml)
+xdr_serialize_mountlist(struct iovec outmsg, mountlist *ml)
{
- return xdr_serialize_generic (outmsg, (void *)ml,
- (xdrproc_t)xdr_mountlist);
+ return xdr_serialize_generic(outmsg, (void *)ml, (xdrproc_t)xdr_mountlist);
}
-
ssize_t
-xdr_serialize_mountstat3 (struct iovec outmsg, mountstat3 *m)
+xdr_serialize_mountstat3(struct iovec outmsg, mountstat3 *m)
{
- return xdr_serialize_generic (outmsg, (void *)m,
- (xdrproc_t)xdr_mountstat3);
+ return xdr_serialize_generic(outmsg, (void *)m, (xdrproc_t)xdr_mountstat3);
}
-
ssize_t
-xdr_to_getattr3args (struct iovec inmsg, getattr3args *ga)
+xdr_to_getattr3args(struct iovec inmsg, getattr3args *ga)
{
- return xdr_to_generic (inmsg, (void *)ga,
- (xdrproc_t)xdr_getattr3args);
+ return xdr_to_generic(inmsg, (void *)ga, (xdrproc_t)xdr_getattr3args);
}
-
ssize_t
-xdr_serialize_getattr3res (struct iovec outmsg, getattr3res *res)
+xdr_serialize_getattr3res(struct iovec outmsg, getattr3res *res)
{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_getattr3res);
+ return xdr_serialize_generic(outmsg, (void *)res,
+ (xdrproc_t)xdr_getattr3res);
}
-
ssize_t
-xdr_serialize_setattr3res (struct iovec outmsg, setattr3res *res)
+xdr_serialize_setattr3res(struct iovec outmsg, setattr3res *res)
{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_setattr3res);
+ return xdr_serialize_generic(outmsg, (void *)res,
+ (xdrproc_t)xdr_setattr3res);
}
-
ssize_t
-xdr_to_setattr3args (struct iovec inmsg, setattr3args *sa)
+xdr_to_setattr3args(struct iovec inmsg, setattr3args *sa)
{
- return xdr_to_generic (inmsg, (void *)sa,
- (xdrproc_t)xdr_setattr3args);
+ return xdr_to_generic(inmsg, (void *)sa, (xdrproc_t)xdr_setattr3args);
}
-
ssize_t
-xdr_serialize_lookup3res (struct iovec outmsg, lookup3res *res)
+xdr_serialize_lookup3res(struct iovec outmsg, lookup3res *res)
{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_lookup3res);
+ return xdr_serialize_generic(outmsg, (void *)res,
+ (xdrproc_t)xdr_lookup3res);
}
-
ssize_t
-xdr_to_lookup3args (struct iovec inmsg, lookup3args *la)
+xdr_to_lookup3args(struct iovec inmsg, lookup3args *la)
{
- return xdr_to_generic (inmsg, (void *)la,
- (xdrproc_t)xdr_lookup3args);
+ return xdr_to_generic(inmsg, (void *)la, (xdrproc_t)xdr_lookup3args);
}
-
ssize_t
-xdr_to_access3args (struct iovec inmsg, access3args *ac)
+xdr_to_access3args(struct iovec inmsg, access3args *ac)
{
- return xdr_to_generic (inmsg,(void *)ac,
- (xdrproc_t)xdr_access3args);
+ return xdr_to_generic(inmsg, (void *)ac, (xdrproc_t)xdr_access3args);
}
-
ssize_t
-xdr_serialize_access3res (struct iovec outmsg, access3res *res)
+xdr_serialize_access3res(struct iovec outmsg, access3res *res)
{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_access3res);
+ return xdr_serialize_generic(outmsg, (void *)res,
+ (xdrproc_t)xdr_access3res);
}
-
ssize_t
-xdr_to_readlink3args (struct iovec inmsg, readlink3args *ra)
+xdr_to_readlink3args(struct iovec inmsg, readlink3args *ra)
{
- return xdr_to_generic (inmsg, (void *)ra,
- (xdrproc_t)xdr_readlink3args);
+ return xdr_to_generic(inmsg, (void *)ra, (xdrproc_t)xdr_readlink3args);
}
-
ssize_t
-xdr_serialize_readlink3res (struct iovec outmsg, readlink3res *res)
+xdr_serialize_readlink3res(struct iovec outmsg, readlink3res *res)
{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_readlink3res);
+ return xdr_serialize_generic(outmsg, (void *)res,
+ (xdrproc_t)xdr_readlink3res);
}
-
ssize_t
-xdr_to_read3args (struct iovec inmsg, read3args *ra)
+xdr_to_read3args(struct iovec inmsg, read3args *ra)
{
- return xdr_to_generic (inmsg, (void *)ra, (xdrproc_t)xdr_read3args);
+ return xdr_to_generic(inmsg, (void *)ra, (xdrproc_t)xdr_read3args);
}
-
ssize_t
-xdr_serialize_read3res (struct iovec outmsg, read3res *res)
+xdr_serialize_read3res(struct iovec outmsg, read3res *res)
{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_read3res);
+ return xdr_serialize_generic(outmsg, (void *)res, (xdrproc_t)xdr_read3res);
}
ssize_t
-xdr_serialize_read3res_nocopy (struct iovec outmsg, read3res *res)
+xdr_serialize_read3res_nocopy(struct iovec outmsg, read3res *res)
{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_read3res_nocopy);
+ return xdr_serialize_generic(outmsg, (void *)res,
+ (xdrproc_t)xdr_read3res_nocopy);
}
-
ssize_t
-xdr_to_write3args (struct iovec inmsg, write3args *wa)
+xdr_to_write3args(struct iovec inmsg, write3args *wa)
{
- return xdr_to_generic (inmsg, (void *)wa,(xdrproc_t)xdr_write3args);
+ return xdr_to_generic(inmsg, (void *)wa, (xdrproc_t)xdr_write3args);
}
-
ssize_t
-xdr_to_write3args_nocopy (struct iovec inmsg, write3args *wa,
- struct iovec *payload)
+xdr_to_write3args_nocopy(struct iovec inmsg, write3args *wa,
+ struct iovec *payload)
{
- return xdr_to_generic_payload (inmsg, (void *)wa,
- (xdrproc_t)xdr_write3args, payload);
+ return xdr_to_generic_payload(inmsg, (void *)wa, (xdrproc_t)xdr_write3args,
+ payload);
}
-
ssize_t
-xdr_serialize_write3res (struct iovec outmsg, write3res *res)
+xdr_serialize_write3res(struct iovec outmsg, write3res *res)
{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_write3res);
+ return xdr_serialize_generic(outmsg, (void *)res, (xdrproc_t)xdr_write3res);
}
-
ssize_t
-xdr_to_create3args (struct iovec inmsg, create3args *ca)
+xdr_to_create3args(struct iovec inmsg, create3args *ca)
{
- return xdr_to_generic (inmsg, (void *)ca,
- (xdrproc_t)xdr_create3args);
+ return xdr_to_generic(inmsg, (void *)ca, (xdrproc_t)xdr_create3args);
}
-
ssize_t
-xdr_serialize_create3res (struct iovec outmsg, create3res *res)
+xdr_serialize_create3res(struct iovec outmsg, create3res *res)
{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_create3res);
+ return xdr_serialize_generic(outmsg, (void *)res,
+ (xdrproc_t)xdr_create3res);
}
-
ssize_t
-xdr_serialize_mkdir3res (struct iovec outmsg, mkdir3res *res)
+xdr_serialize_mkdir3res(struct iovec outmsg, mkdir3res *res)
{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_mkdir3res);
+ return xdr_serialize_generic(outmsg, (void *)res, (xdrproc_t)xdr_mkdir3res);
}
-
ssize_t
-xdr_to_mkdir3args (struct iovec inmsg, mkdir3args *ma)
+xdr_to_mkdir3args(struct iovec inmsg, mkdir3args *ma)
{
- return xdr_to_generic (inmsg, (void *)ma,
- (xdrproc_t)xdr_mkdir3args);
+ return xdr_to_generic(inmsg, (void *)ma, (xdrproc_t)xdr_mkdir3args);
}
-
ssize_t
-xdr_to_symlink3args (struct iovec inmsg, symlink3args *sa)
+xdr_to_symlink3args(struct iovec inmsg, symlink3args *sa)
{
- return xdr_to_generic (inmsg, (void *)sa,
- (xdrproc_t)xdr_symlink3args);
+ return xdr_to_generic(inmsg, (void *)sa, (xdrproc_t)xdr_symlink3args);
}
-
ssize_t
-xdr_serialize_symlink3res (struct iovec outmsg, symlink3res *res)
+xdr_serialize_symlink3res(struct iovec outmsg, symlink3res *res)
{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_symlink3res);
+ return xdr_serialize_generic(outmsg, (void *)res,
+ (xdrproc_t)xdr_symlink3res);
}
-
ssize_t
-xdr_to_mknod3args (struct iovec inmsg, mknod3args *ma)
+xdr_to_mknod3args(struct iovec inmsg, mknod3args *ma)
{
- return xdr_to_generic (inmsg, (void *)ma,
- (xdrproc_t)xdr_mknod3args);
+ return xdr_to_generic(inmsg, (void *)ma, (xdrproc_t)xdr_mknod3args);
}
-
ssize_t
-xdr_serialize_mknod3res (struct iovec outmsg, mknod3res *res)
+xdr_serialize_mknod3res(struct iovec outmsg, mknod3res *res)
{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_mknod3res);
+ return xdr_serialize_generic(outmsg, (void *)res, (xdrproc_t)xdr_mknod3res);
}
-
ssize_t
-xdr_to_remove3args (struct iovec inmsg, remove3args *ra)
+xdr_to_remove3args(struct iovec inmsg, remove3args *ra)
{
- return xdr_to_generic (inmsg, (void *)ra,
- (xdrproc_t)xdr_remove3args);
+ return xdr_to_generic(inmsg, (void *)ra, (xdrproc_t)xdr_remove3args);
}
-
ssize_t
-xdr_serialize_remove3res (struct iovec outmsg, remove3res *res)
+xdr_serialize_remove3res(struct iovec outmsg, remove3res *res)
{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_remove3res);
+ return xdr_serialize_generic(outmsg, (void *)res,
+ (xdrproc_t)xdr_remove3res);
}
-
ssize_t
-xdr_to_rmdir3args (struct iovec inmsg, rmdir3args *ra)
+xdr_to_rmdir3args(struct iovec inmsg, rmdir3args *ra)
{
- return xdr_to_generic (inmsg, (void *)ra,
- (xdrproc_t)xdr_rmdir3args);
+ return xdr_to_generic(inmsg, (void *)ra, (xdrproc_t)xdr_rmdir3args);
}
-
ssize_t
-xdr_serialize_rmdir3res (struct iovec outmsg, rmdir3res *res)
+xdr_serialize_rmdir3res(struct iovec outmsg, rmdir3res *res)
{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_rmdir3res);
+ return xdr_serialize_generic(outmsg, (void *)res, (xdrproc_t)xdr_rmdir3res);
}
-
ssize_t
-xdr_serialize_rename3res (struct iovec outmsg, rename3res *res)
+xdr_serialize_rename3res(struct iovec outmsg, rename3res *res)
{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_rename3res);
+ return xdr_serialize_generic(outmsg, (void *)res,
+ (xdrproc_t)xdr_rename3res);
}
-
ssize_t
-xdr_to_rename3args (struct iovec inmsg, rename3args *ra)
+xdr_to_rename3args(struct iovec inmsg, rename3args *ra)
{
- return xdr_to_generic (inmsg, (void *)ra,
- (xdrproc_t)xdr_rename3args);
+ return xdr_to_generic(inmsg, (void *)ra, (xdrproc_t)xdr_rename3args);
}
-
ssize_t
-xdr_serialize_link3res (struct iovec outmsg, link3res *li)
+xdr_serialize_link3res(struct iovec outmsg, link3res *li)
{
- return xdr_serialize_generic (outmsg, (void *)li,
- (xdrproc_t)xdr_link3res);
+ return xdr_serialize_generic(outmsg, (void *)li, (xdrproc_t)xdr_link3res);
}
-
ssize_t
-xdr_to_link3args (struct iovec inmsg, link3args *la)
+xdr_to_link3args(struct iovec inmsg, link3args *la)
{
- return xdr_to_generic (inmsg, (void *)la, (xdrproc_t)xdr_link3args);
+ return xdr_to_generic(inmsg, (void *)la, (xdrproc_t)xdr_link3args);
}
-
ssize_t
-xdr_to_readdir3args (struct iovec inmsg, readdir3args *rd)
+xdr_to_readdir3args(struct iovec inmsg, readdir3args *rd)
{
- return xdr_to_generic (inmsg, (void *)rd,
- (xdrproc_t)xdr_readdir3args);
+ return xdr_to_generic(inmsg, (void *)rd, (xdrproc_t)xdr_readdir3args);
}
-
ssize_t
-xdr_serialize_readdir3res (struct iovec outmsg, readdir3res *res)
+xdr_serialize_readdir3res(struct iovec outmsg, readdir3res *res)
{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_readdir3res);
+ return xdr_serialize_generic(outmsg, (void *)res,
+ (xdrproc_t)xdr_readdir3res);
}
-
ssize_t
-xdr_to_readdirp3args (struct iovec inmsg, readdirp3args *rp)
+xdr_to_readdirp3args(struct iovec inmsg, readdirp3args *rp)
{
- return xdr_to_generic (inmsg, (void *)rp,
- (xdrproc_t)xdr_readdirp3args);
+ return xdr_to_generic(inmsg, (void *)rp, (xdrproc_t)xdr_readdirp3args);
}
-
ssize_t
-xdr_serialize_readdirp3res (struct iovec outmsg, readdirp3res *res)
+xdr_serialize_readdirp3res(struct iovec outmsg, readdirp3res *res)
{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_readdirp3res);
+ return xdr_serialize_generic(outmsg, (void *)res,
+ (xdrproc_t)xdr_readdirp3res);
}
-
ssize_t
-xdr_to_fsstat3args (struct iovec inmsg, fsstat3args *fa)
+xdr_to_fsstat3args(struct iovec inmsg, fsstat3args *fa)
{
- return xdr_to_generic (inmsg, (void *)fa,
- (xdrproc_t)xdr_fsstat3args);
+ return xdr_to_generic(inmsg, (void *)fa, (xdrproc_t)xdr_fsstat3args);
}
-
ssize_t
-xdr_serialize_fsstat3res (struct iovec outmsg, fsstat3res *res)
+xdr_serialize_fsstat3res(struct iovec outmsg, fsstat3res *res)
{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_fsstat3res);
+ return xdr_serialize_generic(outmsg, (void *)res,
+ (xdrproc_t)xdr_fsstat3res);
}
ssize_t
-xdr_to_fsinfo3args (struct iovec inmsg, fsinfo3args *fi)
+xdr_to_fsinfo3args(struct iovec inmsg, fsinfo3args *fi)
{
- return xdr_to_generic (inmsg, (void *)fi,
- (xdrproc_t)xdr_fsinfo3args);
+ return xdr_to_generic(inmsg, (void *)fi, (xdrproc_t)xdr_fsinfo3args);
}
-
ssize_t
-xdr_serialize_fsinfo3res (struct iovec outmsg, fsinfo3res *res)
+xdr_serialize_fsinfo3res(struct iovec outmsg, fsinfo3res *res)
{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_fsinfo3res);
+ return xdr_serialize_generic(outmsg, (void *)res,
+ (xdrproc_t)xdr_fsinfo3res);
}
-
ssize_t
-xdr_to_pathconf3args (struct iovec inmsg, pathconf3args *pc)
+xdr_to_pathconf3args(struct iovec inmsg, pathconf3args *pc)
{
- return xdr_to_generic (inmsg, (void *)pc,
- (xdrproc_t)xdr_pathconf3args);}
-
+ return xdr_to_generic(inmsg, (void *)pc, (xdrproc_t)xdr_pathconf3args);
+}
ssize_t
-xdr_serialize_pathconf3res (struct iovec outmsg, pathconf3res *res)
+xdr_serialize_pathconf3res(struct iovec outmsg, pathconf3res *res)
{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_pathconf3res);
+ return xdr_serialize_generic(outmsg, (void *)res,
+ (xdrproc_t)xdr_pathconf3res);
}
-
ssize_t
-xdr_to_commit3args (struct iovec inmsg, commit3args *ca)
+xdr_to_commit3args(struct iovec inmsg, commit3args *ca)
{
- return xdr_to_generic (inmsg, (void *)ca,
- (xdrproc_t)xdr_commit3args);
+ return xdr_to_generic(inmsg, (void *)ca, (xdrproc_t)xdr_commit3args);
}
-
ssize_t
-xdr_serialize_commit3res (struct iovec outmsg, commit3res *res)
+xdr_serialize_commit3res(struct iovec outmsg, commit3res *res)
{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_commit3res);
+ return xdr_serialize_generic(outmsg, (void *)res,
+ (xdrproc_t)xdr_commit3res);
}
-
ssize_t
-xdr_serialize_exports (struct iovec outmsg, exports *elist)
+xdr_serialize_exports(struct iovec outmsg, exports *elist)
{
- XDR xdr;
- ssize_t ret = -1;
+ XDR xdr;
+ ssize_t ret = -1;
- if ((!outmsg.iov_base) || (!elist))
- return -1;
+ if ((!outmsg.iov_base) || (!elist))
+ return -1;
- xdrmem_create (&xdr, outmsg.iov_base, (unsigned int)outmsg.iov_len,
- XDR_ENCODE);
+ xdrmem_create(&xdr, outmsg.iov_base, (unsigned int)outmsg.iov_len,
+ XDR_ENCODE);
- if (!xdr_exports (&xdr, elist))
- goto ret;
+ if (!xdr_exports(&xdr, elist))
+ goto ret;
- ret = xdr_decoded_length (xdr);
+ ret = xdr_decoded_length(xdr);
ret:
- return ret;
+ return ret;
+}
+
+ssize_t
+xdr_serialize_nfsstat3(struct iovec outmsg, nfsstat3 *s)
+{
+ return xdr_serialize_generic(outmsg, (void *)s, (xdrproc_t)xdr_nfsstat3);
+}
+
+ssize_t
+xdr_to_nlm4_testargs(struct iovec inmsg, nlm4_testargs *args)
+{
+ return xdr_to_generic(inmsg, (void *)args, (xdrproc_t)xdr_nlm4_testargs);
+}
+
+ssize_t
+xdr_serialize_nlm4_testres(struct iovec outmsg, nlm4_testres *res)
+{
+ return xdr_serialize_generic(outmsg, (void *)res,
+ (xdrproc_t)xdr_nlm4_testres);
}
+ssize_t
+xdr_to_nlm4_lockargs(struct iovec inmsg, nlm4_lockargs *args)
+{
+ return xdr_to_generic(inmsg, (void *)args, (xdrproc_t)xdr_nlm4_lockargs);
+}
ssize_t
-xdr_serialize_nfsstat3 (struct iovec outmsg, nfsstat3 *s)
+xdr_serialize_nlm4_res(struct iovec outmsg, nlm4_res *res)
{
- return xdr_serialize_generic (outmsg, (void *)s,
- (xdrproc_t)xdr_nfsstat3);
+ return xdr_serialize_generic(outmsg, (void *)res, (xdrproc_t)xdr_nlm4_res);
}
ssize_t
-xdr_to_nlm4_testargs (struct iovec inmsg, nlm4_testargs *args)
+xdr_to_nlm4_cancelargs(struct iovec inmsg, nlm4_cancargs *args)
{
- return xdr_to_generic (inmsg, (void*)args,
- (xdrproc_t)xdr_nlm4_testargs);
+ return xdr_to_generic(inmsg, (void *)args, (xdrproc_t)xdr_nlm4_cancargs);
}
ssize_t
-xdr_serialize_nlm4_testres (struct iovec outmsg, nlm4_testres *res)
+xdr_to_nlm4_unlockargs(struct iovec inmsg, nlm4_unlockargs *args)
{
- return xdr_serialize_generic (outmsg, (void*)res,
- (xdrproc_t)xdr_nlm4_testres);
+ return xdr_to_generic(inmsg, (void *)args, (xdrproc_t)xdr_nlm4_unlockargs);
}
ssize_t
-xdr_to_nlm4_lockargs (struct iovec inmsg, nlm4_lockargs *args)
+xdr_to_nlm4_shareargs(struct iovec inmsg, nlm4_shareargs *args)
{
- return xdr_to_generic (inmsg, (void*)args,
- (xdrproc_t)xdr_nlm4_lockargs);
+ return xdr_to_generic(inmsg, (void *)args, (xdrproc_t)xdr_nlm4_shareargs);
}
ssize_t
-xdr_serialize_nlm4_res (struct iovec outmsg, nlm4_res *res)
+xdr_serialize_nlm4_shareres(struct iovec outmsg, nlm4_shareres *res)
{
- return xdr_serialize_generic (outmsg, (void*)res,
- (xdrproc_t)xdr_nlm4_res);
+ return xdr_serialize_generic(outmsg, (void *)res,
+ (xdrproc_t)xdr_nlm4_shareres);
}
ssize_t
-xdr_to_nlm4_cancelargs (struct iovec inmsg, nlm4_cancargs *args)
+xdr_serialize_nlm4_testargs(struct iovec outmsg, nlm4_testargs *args)
{
- return xdr_to_generic (inmsg, (void*)args,
- (xdrproc_t)xdr_nlm4_cancargs);
+ return xdr_serialize_generic(outmsg, (void *)args,
+ (xdrproc_t)xdr_nlm4_testargs);
}
ssize_t
-xdr_to_nlm4_unlockargs (struct iovec inmsg, nlm4_unlockargs *args)
+xdr_to_nlm4_res(struct iovec inmsg, nlm4_res *args)
{
- return xdr_to_generic (inmsg, (void*)args,
- (xdrproc_t)xdr_nlm4_unlockargs);
+ return xdr_to_generic(inmsg, (void *)args, (xdrproc_t)xdr_nlm4_res);
}
ssize_t
-xdr_to_nlm4_shareargs (struct iovec inmsg, nlm4_shareargs *args)
+xdr_to_nlm4_freeallargs(struct iovec inmsg, nlm4_freeallargs *args)
{
- return xdr_to_generic (inmsg, (void*)args,
- (xdrproc_t)xdr_nlm4_shareargs);
+ return xdr_to_generic(inmsg, (void *)args, (xdrproc_t)xdr_nlm4_freeallargs);
}
ssize_t
-xdr_serialize_nlm4_shareres (struct iovec outmsg, nlm4_shareres *res)
+xdr_to_getaclargs(struct iovec inmsg, getaclargs *args)
{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_nlm4_shareres);
+ return xdr_to_generic(inmsg, (void *)args, (xdrproc_t)xdr_getaclargs);
}
ssize_t
-xdr_serialize_nlm4_testargs (struct iovec outmsg, nlm4_testargs *args)
+xdr_to_setaclargs(struct iovec inmsg, setaclargs *args)
{
- return xdr_serialize_generic (outmsg, (void*)args,
- (xdrproc_t)xdr_nlm4_testargs);
+ return xdr_to_generic(inmsg, (void *)args, (xdrproc_t)xdr_setaclargs);
}
ssize_t
-xdr_to_nlm4_res (struct iovec inmsg, nlm4_res *args)
+xdr_serialize_getaclreply(struct iovec inmsg, getaclreply *res)
{
- return xdr_to_generic (inmsg, (void*)args,
- (xdrproc_t)xdr_nlm4_res);
+ return xdr_serialize_generic(inmsg, (void *)res,
+ (xdrproc_t)xdr_getaclreply);
}
ssize_t
-xdr_to_nlm4_freeallargs (struct iovec inmsg, nlm4_freeallargs *args)
+xdr_serialize_setaclreply(struct iovec inmsg, setaclreply *res)
{
- return xdr_to_generic (inmsg, (void*)args,
- (xdrproc_t)xdr_nlm4_freeallargs);
+ return xdr_serialize_generic(inmsg, (void *)res,
+ (xdrproc_t)xdr_setaclreply);
}
diff --git a/rpc/xdr/src/msg-nfs3.h b/rpc/xdr/src/msg-nfs3.h
index 701df300a6e..869ddc3524a 100644
--- a/rpc/xdr/src/msg-nfs3.h
+++ b/rpc/xdr/src/msg-nfs3.h
@@ -1,220 +1,219 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _MSG_NFS3_H_
#define _MSG_NFS3_H_
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "xdr-nfs3.h"
#include "nlm4-xdr.h"
+#include "acl3-xdr.h"
#include <sys/types.h>
#include <sys/uio.h>
extern ssize_t
-xdr_to_mountpath (struct iovec outpath, struct iovec inmsg);
+xdr_to_mountpath(struct iovec outpath, struct iovec inmsg);
+
+extern ssize_t
+xdr_serialize_mountres3(struct iovec outmsg, mountres3 *res);
+
+extern ssize_t
+xdr_serialize_mountbody(struct iovec outmsg, mountbody *mb);
+
+extern ssize_t
+xdr_to_getattr3args(struct iovec inmsg, getattr3args *ga);
+
+extern ssize_t
+xdr_serialize_getattr3res(struct iovec outmsg, getattr3res *res);
extern ssize_t
-xdr_serialize_mountres3 (struct iovec outmsg, mountres3 *res);
+xdr_serialize_setattr3res(struct iovec outmsg, setattr3res *res);
extern ssize_t
-xdr_serialize_mountbody (struct iovec outmsg, mountbody *mb);
+xdr_to_setattr3args(struct iovec inmsg, setattr3args *sa);
extern ssize_t
-xdr_to_getattr3args (struct iovec inmsg, getattr3args *ga);
+xdr_serialize_lookup3res(struct iovec outmsg, lookup3res *res);
extern ssize_t
-xdr_serialize_getattr3res (struct iovec outmsg, getattr3res *res);
+xdr_to_lookup3args(struct iovec inmsg, lookup3args *la);
extern ssize_t
-xdr_serialize_setattr3res (struct iovec outmsg, setattr3res *res);
+xdr_to_access3args(struct iovec inmsg, access3args *ac);
extern ssize_t
-xdr_to_setattr3args (struct iovec inmsg, setattr3args *sa);
+xdr_serialize_access3res(struct iovec outmsg, access3res *res);
extern ssize_t
-xdr_serialize_lookup3res (struct iovec outmsg, lookup3res *res);
+xdr_to_readlink3args(struct iovec inmsg, readlink3args *ra);
extern ssize_t
-xdr_to_lookup3args (struct iovec inmsg, lookup3args *la);
+xdr_serialize_readlink3res(struct iovec outmsg, readlink3res *res);
extern ssize_t
-xdr_to_access3args (struct iovec inmsg, access3args *ac);
+xdr_to_read3args(struct iovec inmsg, read3args *ra);
extern ssize_t
-xdr_serialize_access3res (struct iovec outmsg, access3res *res);
+xdr_serialize_read3res(struct iovec outmsg, read3res *res);
extern ssize_t
-xdr_to_readlink3args (struct iovec inmsg, readlink3args *ra);
+xdr_serialize_read3res_nocopy(struct iovec outmsg, read3res *res);
extern ssize_t
-xdr_serialize_readlink3res (struct iovec outmsg, readlink3res *res);
+xdr_to_write3args(struct iovec inmsg, write3args *wa);
extern ssize_t
-xdr_to_read3args (struct iovec inmsg, read3args *ra);
+xdr_to_write3args_nocopy(struct iovec inmsg, write3args *wa,
+ struct iovec *payload);
extern ssize_t
-xdr_serialize_read3res (struct iovec outmsg, read3res *res);
+xdr_serialize_write3res(struct iovec outmsg, write3res *res);
extern ssize_t
-xdr_serialize_read3res_nocopy (struct iovec outmsg, read3res *res);
+xdr_to_create3args(struct iovec inmsg, create3args *ca);
extern ssize_t
-xdr_to_write3args (struct iovec inmsg, write3args *wa);
+xdr_serialize_create3res(struct iovec outmsg, create3res *res);
extern ssize_t
-xdr_to_write3args_nocopy (struct iovec inmsg, write3args *wa,
- struct iovec *payload);
+xdr_serialize_mkdir3res(struct iovec outmsg, mkdir3res *res);
extern ssize_t
-xdr_serialize_write3res (struct iovec outmsg, write3res *res);
+xdr_to_mkdir3args(struct iovec inmsg, mkdir3args *ma);
extern ssize_t
-xdr_to_create3args (struct iovec inmsg, create3args *ca);
+xdr_to_symlink3args(struct iovec inmsg, symlink3args *sa);
extern ssize_t
-xdr_serialize_create3res (struct iovec outmsg, create3res *res);
+xdr_serialize_symlink3res(struct iovec outmsg, symlink3res *res);
extern ssize_t
-xdr_serialize_mkdir3res (struct iovec outmsg, mkdir3res *res);
+xdr_to_mknod3args(struct iovec inmsg, mknod3args *ma);
extern ssize_t
-xdr_to_mkdir3args (struct iovec inmsg, mkdir3args *ma);
+xdr_serialize_mknod3res(struct iovec outmsg, mknod3res *res);
extern ssize_t
-xdr_to_symlink3args (struct iovec inmsg, symlink3args *sa);
+xdr_to_remove3args(struct iovec inmsg, remove3args *ra);
extern ssize_t
-xdr_serialize_symlink3res (struct iovec outmsg, symlink3res *res);
+xdr_serialize_remove3res(struct iovec outmsg, remove3res *res);
extern ssize_t
-xdr_to_mknod3args (struct iovec inmsg, mknod3args *ma);
+xdr_to_rmdir3args(struct iovec inmsg, rmdir3args *ra);
extern ssize_t
-xdr_serialize_mknod3res (struct iovec outmsg, mknod3res *res);
+xdr_serialize_rmdir3res(struct iovec outmsg, rmdir3res *res);
extern ssize_t
-xdr_to_remove3args (struct iovec inmsg, remove3args *ra);
+xdr_serialize_rename3res(struct iovec outmsg, rename3res *res);
extern ssize_t
-xdr_serialize_remove3res (struct iovec outmsg, remove3res *res);
+xdr_to_rename3args(struct iovec inmsg, rename3args *ra);
extern ssize_t
-xdr_to_rmdir3args (struct iovec inmsg, rmdir3args *ra);
+xdr_serialize_link3res(struct iovec outmsg, link3res *li);
extern ssize_t
-xdr_serialize_rmdir3res (struct iovec outmsg, rmdir3res *res);
+xdr_to_link3args(struct iovec inmsg, link3args *la);
extern ssize_t
-xdr_serialize_rename3res (struct iovec outmsg, rename3res *res);
+xdr_to_readdir3args(struct iovec inmsg, readdir3args *rd);
extern ssize_t
-xdr_to_rename3args (struct iovec inmsg, rename3args *ra);
+xdr_serialize_readdir3res(struct iovec outmsg, readdir3res *res);
extern ssize_t
-xdr_serialize_link3res (struct iovec outmsg, link3res *li);
+xdr_to_readdirp3args(struct iovec inmsg, readdirp3args *rp);
extern ssize_t
-xdr_to_link3args (struct iovec inmsg, link3args *la);
+xdr_serialize_readdirp3res(struct iovec outmsg, readdirp3res *res);
extern ssize_t
-xdr_to_readdir3args (struct iovec inmsg, readdir3args *rd);
+xdr_to_fsstat3args(struct iovec inmsg, fsstat3args *fa);
extern ssize_t
-xdr_serialize_readdir3res (struct iovec outmsg, readdir3res *res);
+xdr_serialize_fsstat3res(struct iovec outmsg, fsstat3res *res);
extern ssize_t
-xdr_to_readdirp3args (struct iovec inmsg, readdirp3args *rp);
+xdr_to_fsinfo3args(struct iovec inmsg, fsinfo3args *fi);
extern ssize_t
-xdr_serialize_readdirp3res (struct iovec outmsg, readdirp3res *res);
+xdr_serialize_fsinfo3res(struct iovec outmsg, fsinfo3res *res);
extern ssize_t
-xdr_to_fsstat3args (struct iovec inmsg, fsstat3args *fa);
+xdr_to_pathconf3args(struct iovec inmsg, pathconf3args *pc);
extern ssize_t
-xdr_serialize_fsstat3res (struct iovec outmsg, fsstat3res *res);
+xdr_serialize_pathconf3res(struct iovec outmsg, pathconf3res *res);
extern ssize_t
-xdr_to_fsinfo3args (struct iovec inmsg, fsinfo3args *fi);
+xdr_to_commit3args(struct iovec inmsg, commit3args *ca);
extern ssize_t
-xdr_serialize_fsinfo3res (struct iovec outmsg, fsinfo3res *res);
+xdr_serialize_commit3res(struct iovec outmsg, commit3res *res);
extern ssize_t
-xdr_to_pathconf3args (struct iovec inmsg, pathconf3args *pc);
+xdr_serialize_exports(struct iovec outmsg, exports *elist);
extern ssize_t
-xdr_serialize_pathconf3res (struct iovec outmsg, pathconf3res *res);
+xdr_serialize_mountlist(struct iovec outmsg, mountlist *ml);
extern ssize_t
-xdr_to_commit3args (struct iovec inmsg, commit3args *ca);
+xdr_serialize_mountstat3(struct iovec outmsg, mountstat3 *m);
extern ssize_t
-xdr_serialize_commit3res (struct iovec outmsg, commit3res *res);
+xdr_serialize_nfsstat3(struct iovec outmsg, nfsstat3 *s);
extern ssize_t
-xdr_serialize_exports (struct iovec outmsg, exports *elist);
+xdr_to_nlm4_testargs(struct iovec inmsg, nlm4_testargs *args);
extern ssize_t
-xdr_serialize_mountlist (struct iovec outmsg, mountlist *ml);
+xdr_serialize_nlm4_testres(struct iovec outmsg, nlm4_testres *res);
extern ssize_t
-xdr_serialize_mountstat3 (struct iovec outmsg, mountstat3 *m);
+xdr_to_nlm4_lockargs(struct iovec inmsg, nlm4_lockargs *args);
extern ssize_t
-xdr_serialize_nfsstat3 (struct iovec outmsg, nfsstat3 *s);
+xdr_serialize_nlm4_res(struct iovec outmsg, nlm4_res *res);
extern ssize_t
-xdr_to_nlm4_testargs (struct iovec inmsg, nlm4_testargs *args);
+xdr_to_nlm4_cancelargs(struct iovec inmsg, nlm4_cancargs *args);
extern ssize_t
-xdr_serialize_nlm4_testres (struct iovec outmsg, nlm4_testres *res);
+xdr_to_nlm4_unlockargs(struct iovec inmsg, nlm4_unlockargs *args);
extern ssize_t
-xdr_to_nlm4_lockargs (struct iovec inmsg, nlm4_lockargs *args);
+xdr_to_nlm4_shareargs(struct iovec inmsg, nlm4_shareargs *args);
extern ssize_t
-xdr_serialize_nlm4_res (struct iovec outmsg, nlm4_res *res);
+xdr_serialize_nlm4_shareres(struct iovec outmsg, nlm4_shareres *res);
extern ssize_t
-xdr_to_nlm4_cancelargs (struct iovec inmsg, nlm4_cancargs *args);
+xdr_serialize_nlm4_testargs(struct iovec outmsg, nlm4_testargs *args);
extern ssize_t
-xdr_to_nlm4_unlockargs (struct iovec inmsg, nlm4_unlockargs *args);
+xdr_to_nlm4_res(struct iovec inmsg, nlm4_res *args);
extern ssize_t
-xdr_to_nlm4_shareargs (struct iovec inmsg, nlm4_shareargs *args);
+xdr_to_nlm4_freeallargs(struct iovec inmsg, nlm4_freeallargs *args);
extern ssize_t
-xdr_serialize_nlm4_shareres (struct iovec outmsg, nlm4_shareres *res);
+xdr_to_getaclargs(struct iovec inmsg, getaclargs *args);
extern ssize_t
-xdr_serialize_nlm4_testargs (struct iovec outmsg, nlm4_testargs *args);
+xdr_to_setaclargs(struct iovec inmsg, setaclargs *args);
extern ssize_t
-xdr_to_nlm4_res (struct iovec inmsg, nlm4_res *args);
+xdr_serialize_getaclreply(struct iovec inmsg, getaclreply *res);
extern ssize_t
-xdr_to_nlm4_freeallargs (struct iovec inmsg, nlm4_freeallargs *args);
+xdr_serialize_setaclreply(struct iovec inmsg, setaclreply *res);
#endif
diff --git a/rpc/xdr/src/nlm4-xdr.c b/rpc/xdr/src/nlm4-xdr.c
deleted file mode 100644
index 4cf6a51e8fd..00000000000
--- a/rpc/xdr/src/nlm4-xdr.c
+++ /dev/null
@@ -1,254 +0,0 @@
-/*
- Copyright (c) 2012 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-/*
- * Please do not edit this file.
- * It was generated using rpcgen.
- */
-
-#include "nlm4-xdr.h"
-
-bool_t
-xdr_netobj (XDR *xdrs, netobj *objp)
-{
- if (!xdr_bytes (xdrs, (char **)&objp->n_bytes, (u_int *) &objp->n_len, MAXNETOBJ_SZ))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_fsh_mode (XDR *xdrs, fsh_mode *objp)
-{
- if (!xdr_enum (xdrs, (enum_t *) objp))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_fsh_access (XDR *xdrs, fsh_access *objp)
-{
- if (!xdr_enum (xdrs, (enum_t *) objp))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_nlm4_stats (XDR *xdrs, nlm4_stats *objp)
-{
- if (!xdr_enum (xdrs, (enum_t *) objp))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_nlm4_stat (XDR *xdrs, nlm4_stat *objp)
-{
- if (!xdr_nlm4_stats (xdrs, &objp->stat))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_nlm4_holder (XDR *xdrs, nlm4_holder *objp)
-{
- if (!xdr_bool (xdrs, &objp->exclusive))
- return FALSE;
- if (!xdr_uint32_t (xdrs, &objp->svid))
- return FALSE;
- if (!xdr_netobj (xdrs, &objp->oh))
- return FALSE;
- if (!xdr_uint64_t (xdrs, &objp->l_offset))
- return FALSE;
- if (!xdr_uint64_t (xdrs, &objp->l_len))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_nlm4_lock (XDR *xdrs, nlm4_lock *objp)
-{
- if (!xdr_string (xdrs, &objp->caller_name, MAXNAMELEN))
- return FALSE;
- if (!xdr_netobj (xdrs, &objp->fh))
- return FALSE;
- if (!xdr_netobj (xdrs, &objp->oh))
- return FALSE;
- if (!xdr_uint32_t (xdrs, &objp->svid))
- return FALSE;
- if (!xdr_uint64_t (xdrs, &objp->l_offset))
- return FALSE;
- if (!xdr_uint64_t (xdrs, &objp->l_len))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_nlm4_share (XDR *xdrs, nlm4_share *objp)
-{
- if (!xdr_string (xdrs, &objp->caller_name, MAXNAMELEN))
- return FALSE;
- if (!xdr_netobj (xdrs, &objp->fh))
- return FALSE;
- if (!xdr_netobj (xdrs, &objp->oh))
- return FALSE;
- if (!xdr_fsh_mode (xdrs, &objp->mode))
- return FALSE;
- if (!xdr_fsh_access (xdrs, &objp->access))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_nlm4_testrply (XDR *xdrs, nlm4_testrply *objp)
-{
- if (!xdr_nlm4_stats (xdrs, &objp->stat))
- return FALSE;
- switch (objp->stat) {
- case nlm4_denied:
- if (!xdr_nlm4_holder (xdrs, &objp->nlm4_testrply_u.holder))
- return FALSE;
- break;
- default:
- break;
- }
- return TRUE;
-}
-
-bool_t
-xdr_nlm4_testres (XDR *xdrs, nlm4_testres *objp)
-{
- if (!xdr_netobj (xdrs, &objp->cookie))
- return FALSE;
- if (!xdr_nlm4_testrply (xdrs, &objp->stat))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_nlm4_testargs (XDR *xdrs, nlm4_testargs *objp)
-{
- if (!xdr_netobj (xdrs, &objp->cookie))
- return FALSE;
- if (!xdr_bool (xdrs, &objp->exclusive))
- return FALSE;
- if (!xdr_nlm4_lock (xdrs, &objp->alock))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_nlm4_res (XDR *xdrs, nlm4_res *objp)
-{
- if (!xdr_netobj (xdrs, &objp->cookie))
- return FALSE;
- if (!xdr_nlm4_stat (xdrs, &objp->stat))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_nlm4_lockargs (XDR *xdrs, nlm4_lockargs *objp)
-{
- if (!xdr_netobj (xdrs, &objp->cookie))
- return FALSE;
- if (!xdr_bool (xdrs, &objp->block))
- return FALSE;
- if (!xdr_bool (xdrs, &objp->exclusive))
- return FALSE;
- if (!xdr_nlm4_lock (xdrs, &objp->alock))
- return FALSE;
- if (!xdr_bool (xdrs, &objp->reclaim))
- return FALSE;
- if (!xdr_int (xdrs, &objp->state))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_nlm4_cancargs (XDR *xdrs, nlm4_cancargs *objp)
-{
- if (!xdr_netobj (xdrs, &objp->cookie))
- return FALSE;
- if (!xdr_bool (xdrs, &objp->block))
- return FALSE;
- if (!xdr_bool (xdrs, &objp->exclusive))
- return FALSE;
- if (!xdr_nlm4_lock (xdrs, &objp->alock))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_nlm4_unlockargs (XDR *xdrs, nlm4_unlockargs *objp)
-{
- if (!xdr_netobj (xdrs, &objp->cookie))
- return FALSE;
- if (!xdr_nlm4_lock (xdrs, &objp->alock))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_nlm4_shareargs (XDR *xdrs, nlm4_shareargs *objp)
-{
- if (!xdr_netobj (xdrs, &objp->cookie))
- return FALSE;
- if (!xdr_nlm4_share (xdrs, &objp->share))
- return FALSE;
- if (!xdr_bool (xdrs, &objp->reclaim))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_nlm4_shareres (XDR *xdrs, nlm4_shareres *objp)
-{
- if (!xdr_netobj (xdrs, &objp->cookie))
- return FALSE;
- if (!xdr_nlm4_stats (xdrs, &objp->stat))
- return FALSE;
- if (!xdr_int (xdrs, &objp->sequence))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_nlm4_freeallargs (XDR *xdrs, nlm4_freeallargs *objp)
-{
- if (!xdr_string (xdrs, &objp->name, LM_MAXSTRLEN))
- return FALSE;
- if (!xdr_uint32_t (xdrs, &objp->state))
- return FALSE;
- return TRUE;
-}
-
-
-/*
-bool_t
-xdr_nlm_sm_status (XDR *xdrs, nlm_sm_status *objp)
-{
- if (!xdr_string (xdrs, &objp->mon_name, LM_MAXSTRLEN))
- return FALSE;
- if (!xdr_int (xdrs, &objp->state))
- return FALSE;
- if (!xdr_opaque (xdrs, objp->priv, 16))
- return FALSE;
- return TRUE;
-}
-*/
diff --git a/rpc/xdr/src/nlm4-xdr.h b/rpc/xdr/src/nlm4-xdr.h
deleted file mode 100644
index f9d6efd4d61..00000000000
--- a/rpc/xdr/src/nlm4-xdr.h
+++ /dev/null
@@ -1,254 +0,0 @@
-/*
- Copyright (c) 2012 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-/*
- * Please do not edit this file.
- * It was generated using rpcgen.
- */
-
-#ifndef _NLM_H_RPCGEN
-#define _NLM_H_RPCGEN
-
-#include <rpc/rpc.h>
-
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define MAXNETOBJ_SZ 1024
-#define LM_MAXSTRLEN 1024
-#define MAXNAMELEN 1025
-
-/*
- * The following enums are actually bit encoded for efficient
- * boolean algebra.... DON'T change them.....
- */
-
-enum fsh_mode {
- fsm_DN = 0,
- fsm_DR = 1,
- fsm_DW = 2,
- fsm_DRW = 3,
-};
-typedef enum fsh_mode fsh_mode;
-
-enum fsh_access {
- fsa_NONE = 0,
- fsa_R = 1,
- fsa_W = 2,
- fsa_RW = 3,
-};
-typedef enum fsh_access fsh_access;
-/* definitions for NLM version 4 */
-
-enum nlm4_stats {
- nlm4_granted = 0,
- nlm4_denied = 1,
- nlm4_denied_nolock = 2,
- nlm4_blocked = 3,
- nlm4_denied_grace_period = 4,
- nlm4_deadlck = 5,
- nlm4_rofs = 6,
- nlm4_stale_fh = 7,
- nlm4_fbig = 8,
- nlm4_failed = 9,
-};
-typedef enum nlm4_stats nlm4_stats;
-
-struct nlm4_stat {
- nlm4_stats stat;
-};
-typedef struct nlm4_stat nlm4_stat;
-
-struct nlm4_holder {
- bool_t exclusive;
- u_int32_t svid;
- netobj oh;
- u_int64_t l_offset;
- u_int64_t l_len;
-};
-typedef struct nlm4_holder nlm4_holder;
-
-struct nlm4_lock {
- char *caller_name;
- netobj fh;
- netobj oh;
- u_int32_t svid;
- u_int64_t l_offset;
- u_int64_t l_len;
-};
-typedef struct nlm4_lock nlm4_lock;
-
-struct nlm4_share {
- char *caller_name;
- netobj fh;
- netobj oh;
- fsh_mode mode;
- fsh_access access;
-};
-typedef struct nlm4_share nlm4_share;
-
-struct nlm4_testrply {
- nlm4_stats stat;
- union {
- struct nlm4_holder holder;
- } nlm4_testrply_u;
-};
-typedef struct nlm4_testrply nlm4_testrply;
-
-struct nlm4_testres {
- netobj cookie;
- nlm4_testrply stat;
-};
-typedef struct nlm4_testres nlm4_testres;
-
-struct nlm4_testargs {
- netobj cookie;
- bool_t exclusive;
- struct nlm4_lock alock;
-};
-typedef struct nlm4_testargs nlm4_testargs;
-
-struct nlm4_res {
- netobj cookie;
- nlm4_stat stat;
-};
-typedef struct nlm4_res nlm4_res;
-
-struct nlm4_lockargs {
- netobj cookie;
- bool_t block;
- bool_t exclusive;
- struct nlm4_lock alock;
- bool_t reclaim;
- int state;
-};
-typedef struct nlm4_lockargs nlm4_lockargs;
-
-struct nlm4_cancargs {
- netobj cookie;
- bool_t block;
- bool_t exclusive;
- struct nlm4_lock alock;
-};
-typedef struct nlm4_cancargs nlm4_cancargs;
-
-struct nlm4_unlockargs {
- netobj cookie;
- struct nlm4_lock alock;
-};
-typedef struct nlm4_unlockargs nlm4_unlockargs;
-
-struct nlm4_shareargs {
- netobj cookie;
- nlm4_share share;
- bool_t reclaim;
-};
-typedef struct nlm4_shareargs nlm4_shareargs;
-
-struct nlm4_shareres {
- netobj cookie;
- nlm4_stats stat;
- int sequence;
-};
-typedef struct nlm4_shareres nlm4_shareres;
-
-struct nlm4_freeallargs {
- char *name;
- u_int32_t state;
-};
-typedef struct nlm4_freeallargs nlm4_freeallargs;
-
-
-#define NLM4_NULL 0
-#define NLM4_TEST 1
-#define NLM4_LOCK 2
-#define NLM4_CANCEL 3
-#define NLM4_UNLOCK 4
-#define NLM4_GRANTED 5
-#define NLM4_TEST_MSG 6
-#define NLM4_LOCK_MSG 7
-#define NLM4_CANCEL_MSG 8
-#define NLM4_UNLOCK_MSG 9
-#define NLM4_GRANTED_MSG 10
-#define NLM4_TEST_RES 11
-#define NLM4_LOCK_RES 12
-#define NLM4_CANCEL_RES 13
-#define NLM4_UNLOCK_RES 14
-#define NLM4_GRANTED_RES 15
-#define NLM4_SM_NOTIFY 16
-#define NLM4_SEVENTEEN 17
-#define NLM4_EIGHTEEN 18
-#define NLM4_NINETEEN 19
-#define NLM4_SHARE 20
-#define NLM4_UNSHARE 21
-#define NLM4_NM_LOCK 22
-#define NLM4_FREE_ALL 23
-#define NLM4_PROC_COUNT 24
-
-/* the xdr functions */
-
-#if defined(__STDC__) || defined(__cplusplus)
-extern bool_t xdr_netobj (XDR *, netobj*);
-extern bool_t xdr_fsh_mode (XDR *, fsh_mode*);
-extern bool_t xdr_fsh_access (XDR *, fsh_access*);
-extern bool_t xdr_nlm4_stats (XDR *, nlm4_stats*);
-extern bool_t xdr_nlm4_stat (XDR *, nlm4_stat*);
-extern bool_t xdr_nlm4_holder (XDR *, nlm4_holder*);
-extern bool_t xdr_nlm4_lock (XDR *, nlm4_lock*);
-extern bool_t xdr_nlm4_share (XDR *, nlm4_share*);
-extern bool_t xdr_nlm4_testrply (XDR *, nlm4_testrply*);
-extern bool_t xdr_nlm4_testres (XDR *, nlm4_testres*);
-extern bool_t xdr_nlm4_testargs (XDR *, nlm4_testargs*);
-extern bool_t xdr_nlm4_res (XDR *, nlm4_res*);
-extern bool_t xdr_nlm4_lockargs (XDR *, nlm4_lockargs*);
-extern bool_t xdr_nlm4_cancargs (XDR *, nlm4_cancargs*);
-extern bool_t xdr_nlm4_unlockargs (XDR *, nlm4_unlockargs*);
-extern bool_t xdr_nlm4_shareargs (XDR *, nlm4_shareargs*);
-extern bool_t xdr_nlm4_shareres (XDR *, nlm4_shareres*);
-extern bool_t xdr_nlm4_freeallargs (XDR *, nlm4_freeallargs*);
-
-#else /* K&R C */
-extern bool_t xdr_netobj ();
-extern bool_t xdr_fsh_mode ();
-extern bool_t xdr_fsh_access ();
-extern bool_t xdr_nlm4_stats ();
-extern bool_t xdr_nlm4_stat ();
-extern bool_t xdr_nlm4_holder ();
-extern bool_t xdr_nlm4_lock ();
-extern bool_t xdr_nlm4_share ();
-extern bool_t xdr_nlm4_testrply ();
-extern bool_t xdr_nlm4_testres ();
-extern bool_t xdr_nlm4_testargs ();
-extern bool_t xdr_nlm4_res ();
-extern bool_t xdr_nlm4_lockargs ();
-extern bool_t xdr_nlm4_cancargs ();
-extern bool_t xdr_nlm4_unlockargs ();
-extern bool_t xdr_nlm4_shareargs ();
-extern bool_t xdr_nlm4_shareres ();
-extern bool_t xdr_nlm4_freeallargs ();
-
-#endif /* K&R C */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* !_NLM_H_RPCGEN */
diff --git a/rpc/xdr/src/nlm4.x b/rpc/xdr/src/nlm4-xdr.x
index a6de6f32bf4..847b0e64491 100644
--- a/rpc/xdr/src/nlm4.x
+++ b/rpc/xdr/src/nlm4-xdr.x
@@ -1,29 +1,27 @@
/*
- Copyright (c) 2012 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
+#ifdef RPC_XDR
+%#include "rpc-pragmas.h"
+#endif
+%#include <glusterfs/compat.h>
+
/* .x file defined as according to the RFC */
+%#include "xdr-common.h"
+
const MAXNETOBJ_SZ = 1024;
const LM_MAXSTRLEN = 1024;
const MAXNAMELEN = 1025;
-typedef opaque netobj<MAXNETOBJ_SZ>;
+typedef opaque nlm4_netobj<MAXNETOBJ_SZ>;
#ifdef RPC_HDR
%/*
@@ -68,15 +66,15 @@ struct nlm4_stat {
struct nlm4_holder {
bool exclusive;
u_int32_t svid;
- netobj oh;
+ nlm4_netobj oh;
u_int64_t l_offset;
u_int64_t l_len;
};
struct nlm4_lock {
string caller_name<LM_MAXSTRLEN>;
- netobj fh;
- netobj oh;
+ nlm4_netobj fh;
+ nlm4_netobj oh;
u_int32_t svid;
u_int64_t l_offset;
u_int64_t l_len;
@@ -84,71 +82,71 @@ struct nlm4_lock {
struct nlm4_share {
string caller_name<LM_MAXSTRLEN>;
- netobj fh;
- netobj oh;
+ nlm4_netobj fh;
+ nlm4_netobj oh;
fsh_mode mode;
fsh_access access;
};
union nlm4_testrply switch (nlm4_stats stat) {
- case nlm_denied:
- struct nlm4_holder holder;
+ case nlm4_denied:
+ nlm4_holder holder;
default:
void;
};
struct nlm4_testres {
- netobj cookie;
+ nlm4_netobj cookie;
nlm4_testrply stat;
};
struct nlm4_testargs {
- netobj cookie;
+ nlm4_netobj cookie;
bool exclusive;
- struct nlm4_lock alock;
+ nlm4_lock alock;
};
struct nlm4_res {
- netobj cookie;
+ nlm4_netobj cookie;
nlm4_stat stat;
};
struct nlm4_lockargs {
- netobj cookie;
+ nlm4_netobj cookie;
bool block;
bool exclusive;
- struct nlm4_lock alock;
+ nlm4_lock alock;
bool reclaim; /* used for recovering locks */
- int state; /* specify local status monitor state */
+ int32_t state; /* specify local status monitor state */
};
struct nlm4_cancargs {
- netobj cookie;
+ nlm4_netobj cookie;
bool block;
bool exclusive;
- struct nlm4_lock alock;
+ nlm4_lock alock;
};
struct nlm4_unlockargs {
- netobj cookie;
- struct nlm4_lock alock;
+ nlm4_netobj cookie;
+ nlm4_lock alock;
};
struct nlm4_shareargs {
- netobj cookie;
+ nlm4_netobj cookie;
nlm4_share share;
bool reclaim;
};
struct nlm4_shareres {
- netobj cookie;
+ nlm4_netobj cookie;
nlm4_stats stat;
- int sequence;
+ int32_t sequence;
};
struct nlm4_freeallargs {
string name<LM_MAXSTRLEN>; /* client hostname */
- uint32 state; /* unused */
+ uint32_t state; /* unused */
};
/*
@@ -161,3 +159,9 @@ struct nlm_sm_status {
int state; /* new state */
opaque priv[16]; /* private data */
};
+
+program NLMCBK_PROGRAM {
+ version NLMCBK_V1 {
+ void NLMCBK_SM_NOTIFY(nlm_sm_status) = 16;
+ } = 1;
+} = 100021;
diff --git a/rpc/xdr/src/nlmcbk-xdr.c b/rpc/xdr/src/nlmcbk-xdr.c
deleted file mode 100644
index 26446ab1bf4..00000000000
--- a/rpc/xdr/src/nlmcbk-xdr.c
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- Copyright (c) 2012 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-/*
- * Please do not edit this file.
- * It was generated using rpcgen.
- */
-
-#include "nlmcbk-xdr.h"
-
-bool_t
-xdr_nlm_sm_status (XDR *xdrs, nlm_sm_status *objp)
-{
- if (!xdr_string (xdrs, &objp->mon_name, LM_MAXSTRLEN))
- return FALSE;
- if (!xdr_int (xdrs, &objp->state))
- return FALSE;
- if (!xdr_opaque (xdrs, objp->priv, 16))
- return FALSE;
- return TRUE;
-}
diff --git a/rpc/xdr/src/nlmcbk-xdr.h b/rpc/xdr/src/nlmcbk-xdr.h
deleted file mode 100644
index 4d6d670ab38..00000000000
--- a/rpc/xdr/src/nlmcbk-xdr.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- Copyright (c) 2012 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-/*
- * Please do not edit this file.
- * It was generated using rpcgen.
- */
-
-#ifndef _NLMCBK_H_RPCGEN
-#define _NLMCBK_H_RPCGEN
-
-#include <rpc/rpc.h>
-
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define LM_MAXSTRLEN 1024
-
-struct nlm_sm_status {
- char *mon_name;
- int state;
- char priv[16];
-};
-typedef struct nlm_sm_status nlm_sm_status;
-
-#define NLMCBK_PROGRAM 100021
-#define NLMCBK_V1 1
-
-#if defined(__STDC__) || defined(__cplusplus)
-#define NLMCBK_SM_NOTIFY 16
-extern void * nlmcbk_sm_notify_0(struct nlm_sm_status *, CLIENT *);
-extern void * nlmcbk_sm_notify_0_svc(struct nlm_sm_status *, struct svc_req *);
-extern int nlmcbk_program_0_freeresult (SVCXPRT *, xdrproc_t, caddr_t);
-
-#else /* K&R C */
-#define NLMCBK_SM_NOTIFY 16
-extern void * nlmcbk_sm_notify_0();
-extern void * nlmcbk_sm_notify_0_svc();
-extern int nlmcbk_program_0_freeresult ();
-#endif /* K&R C */
-
-/* the xdr functions */
-
-#if defined(__STDC__) || defined(__cplusplus)
-extern bool_t xdr_nlm_sm_status (XDR *, nlm_sm_status*);
-
-#else /* K&R C */
-extern bool_t xdr_nlm_sm_status ();
-
-#endif /* K&R C */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* !_NLMCBK_H_RPCGEN */
diff --git a/rpc/xdr/src/nlmcbk.x b/rpc/xdr/src/nlmcbk.x
deleted file mode 100644
index 49901047eb3..00000000000
--- a/rpc/xdr/src/nlmcbk.x
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- Copyright (c) 2012 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-const LM_MAXSTRLEN = 1024;
-
-struct nlm_sm_status {
- string mon_name<LM_MAXSTRLEN>; /* name of host */
- int state; /* new state */
- opaque priv[16]; /* private data */
-};
-
-program NLMCBK_PROGRAM {
- version NLMCBK_V0 {
- void NLMCBK_SM_NOTIFY(struct nlm_sm_status) = 1;
- } = 0;
-} = 1238477;
-
diff --git a/rpc/xdr/src/nsm-xdr.c b/rpc/xdr/src/nsm-xdr.c
deleted file mode 100644
index 8c41b4365a2..00000000000
--- a/rpc/xdr/src/nsm-xdr.c
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- Copyright (c) 2012 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-/*
- * Please do not edit this file.
- * It was generated using rpcgen.
- */
-
-#include "nsm-xdr.h"
-
-bool_t
-xdr_sm_name (XDR *xdrs, sm_name *objp)
-{
- if (!xdr_string (xdrs, &objp->mon_name, SM_MAXSTRLEN))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_res (XDR *xdrs, res *objp)
-{
- if (!xdr_enum (xdrs, (enum_t *) objp))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_sm_stat_res (XDR *xdrs, sm_stat_res *objp)
-{
- if (!xdr_res (xdrs, &objp->res_stat))
- return FALSE;
- if (!xdr_int (xdrs, &objp->state))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_sm_stat (XDR *xdrs, sm_stat *objp)
-{
- if (!xdr_int (xdrs, &objp->state))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_my_id (XDR *xdrs, my_id *objp)
-{
- if (!xdr_string (xdrs, &objp->my_name, SM_MAXSTRLEN))
- return FALSE;
- if (!xdr_int (xdrs, &objp->my_prog))
- return FALSE;
- if (!xdr_int (xdrs, &objp->my_vers))
- return FALSE;
- if (!xdr_int (xdrs, &objp->my_proc))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_mon_id (XDR *xdrs, mon_id *objp)
-{
- if (!xdr_string (xdrs, &objp->mon_name, SM_MAXSTRLEN))
- return FALSE;
- if (!xdr_my_id (xdrs, &objp->my_id))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_mon (XDR *xdrs, mon *objp)
-{
- if (!xdr_mon_id (xdrs, &objp->mon_id))
- return FALSE;
- if (!xdr_opaque (xdrs, objp->priv, 16))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_nsm_callback_status (XDR *xdrs, nsm_callback_status *objp)
-{
- if (!xdr_string (xdrs, &objp->mon_name, SM_MAXSTRLEN))
- return FALSE;
- if (!xdr_int (xdrs, &objp->state))
- return FALSE;
- if (!xdr_opaque (xdrs, objp->priv, 16))
- return FALSE;
- return TRUE;
-}
diff --git a/rpc/xdr/src/nsm-xdr.h b/rpc/xdr/src/nsm-xdr.h
deleted file mode 100644
index 9de642c1572..00000000000
--- a/rpc/xdr/src/nsm-xdr.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Please do not edit this file.
- * It was generated using rpcgen.
- */
-
-#ifndef _NSM_H_RPCGEN
-#define _NSM_H_RPCGEN
-
-#include <rpc/rpc.h>
-
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define SM_MAXSTRLEN 1024
-
-struct sm_name {
- char *mon_name;
-};
-typedef struct sm_name sm_name;
-
-enum res {
- STAT_SUCC = 0,
- STAT_FAIL = 1,
-};
-typedef enum res res;
-
-struct sm_stat_res {
- res res_stat;
- int state;
-};
-typedef struct sm_stat_res sm_stat_res;
-
-struct sm_stat {
- int state;
-};
-typedef struct sm_stat sm_stat;
-
-struct my_id {
- char *my_name;
- int my_prog;
- int my_vers;
- int my_proc;
-};
-typedef struct my_id my_id;
-
-struct mon_id {
- char *mon_name;
- struct my_id my_id;
-};
-typedef struct mon_id mon_id;
-
-struct mon {
- struct mon_id mon_id;
- char priv[16];
-};
-typedef struct mon mon;
-
-struct nsm_callback_status {
- char *mon_name;
- int state;
- char priv[16];
-};
-typedef struct nsm_callback_status nsm_callback_status;
-
-/* the xdr functions */
-
-#if defined(__STDC__) || defined(__cplusplus)
-extern bool_t xdr_sm_name (XDR *, sm_name*);
-extern bool_t xdr_res (XDR *, res*);
-extern bool_t xdr_sm_stat_res (XDR *, sm_stat_res*);
-extern bool_t xdr_sm_stat (XDR *, sm_stat*);
-extern bool_t xdr_my_id (XDR *, my_id*);
-extern bool_t xdr_mon_id (XDR *, mon_id*);
-extern bool_t xdr_mon (XDR *, mon*);
-extern bool_t xdr_nsm_callback_status (XDR *, nsm_callback_status*);
-
-#else /* K&R C */
-extern bool_t xdr_sm_name ();
-extern bool_t xdr_res ();
-extern bool_t xdr_sm_stat_res ();
-extern bool_t xdr_sm_stat ();
-extern bool_t xdr_my_id ();
-extern bool_t xdr_mon_id ();
-extern bool_t xdr_mon ();
-extern bool_t xdr_nsm_callback_status ();
-
-#endif /* K&R C */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* !_NSM_H_RPCGEN */
diff --git a/rpc/xdr/src/nsm.x b/rpc/xdr/src/nsm-xdr.x
index 8f97b1aaa1f..7c16a741f1d 100644
--- a/rpc/xdr/src/nsm.x
+++ b/rpc/xdr/src/nsm-xdr.x
@@ -1,4 +1,19 @@
/*
+ * Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+#ifdef RPC_XDR
+%#include "rpc-pragmas.h"
+#endif
+%#include <glusterfs/compat.h>
+
+/*
* This defines the maximum length of the string
* identifying the caller.
*/
@@ -31,11 +46,11 @@ struct my_id {
struct mon_id {
string mon_name<SM_MAXSTRLEN>; /* name of the host to be monitored */
- struct my_id my_id;
+ my_id my_id;
};
struct mon {
- struct mon_id mon_id;
+ mon_id mon_id;
opaque priv[16]; /* private information */
};
diff --git a/rpc/xdr/src/portmap-xdr.c b/rpc/xdr/src/portmap-xdr.c
deleted file mode 100644
index 7033213c0bd..00000000000
--- a/rpc/xdr/src/portmap-xdr.c
+++ /dev/null
@@ -1,246 +0,0 @@
-/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include "xdr-common.h"
-#include "compat.h"
-
-#if defined(__GNUC__)
-#if __GNUC__ >= 4
-#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
-#endif
-#endif
-
-/*
- * Please do not edit this file.
- * It was generated using rpcgen.
- */
-
-#include "portmap-xdr.h"
-
-bool_t
-xdr_pmap_port_by_brick_req (XDR *xdrs, pmap_port_by_brick_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_string (xdrs, &objp->brick, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_pmap_port_by_brick_rsp (XDR *xdrs, pmap_port_by_brick_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
-
- if (xdrs->x_op == XDR_ENCODE) {
- buf = XDR_INLINE (xdrs, 4 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_int (xdrs, &objp->status))
- return FALSE;
- if (!xdr_int (xdrs, &objp->port))
- return FALSE;
- } else {
- IXDR_PUT_LONG(buf, objp->op_ret);
- IXDR_PUT_LONG(buf, objp->op_errno);
- IXDR_PUT_LONG(buf, objp->status);
- IXDR_PUT_LONG(buf, objp->port);
- }
- return TRUE;
- } else if (xdrs->x_op == XDR_DECODE) {
- buf = XDR_INLINE (xdrs, 4 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_int (xdrs, &objp->status))
- return FALSE;
- if (!xdr_int (xdrs, &objp->port))
- return FALSE;
- } else {
- objp->op_ret = IXDR_GET_LONG(buf);
- objp->op_errno = IXDR_GET_LONG(buf);
- objp->status = IXDR_GET_LONG(buf);
- objp->port = IXDR_GET_LONG(buf);
- }
- return TRUE;
- }
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_int (xdrs, &objp->status))
- return FALSE;
- if (!xdr_int (xdrs, &objp->port))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_pmap_brick_by_port_req (XDR *xdrs, pmap_brick_by_port_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->port))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_pmap_brick_by_port_rsp (XDR *xdrs, pmap_brick_by_port_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
-
- if (xdrs->x_op == XDR_ENCODE) {
- buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_int (xdrs, &objp->status))
- return FALSE;
-
- } else {
- IXDR_PUT_LONG(buf, objp->op_ret);
- IXDR_PUT_LONG(buf, objp->op_errno);
- IXDR_PUT_LONG(buf, objp->status);
- }
- if (!xdr_string (xdrs, &objp->brick, ~0))
- return FALSE;
- return TRUE;
- } else if (xdrs->x_op == XDR_DECODE) {
- buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_int (xdrs, &objp->status))
- return FALSE;
-
- } else {
- objp->op_ret = IXDR_GET_LONG(buf);
- objp->op_errno = IXDR_GET_LONG(buf);
- objp->status = IXDR_GET_LONG(buf);
- }
- if (!xdr_string (xdrs, &objp->brick, ~0))
- return FALSE;
- return TRUE;
- }
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_int (xdrs, &objp->status))
- return FALSE;
- if (!xdr_string (xdrs, &objp->brick, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_pmap_signup_req (XDR *xdrs, pmap_signup_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_string (xdrs, &objp->brick, ~0))
- return FALSE;
- if (!xdr_int (xdrs, &objp->port))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_pmap_signup_rsp (XDR *xdrs, pmap_signup_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_pmap_signin_req (XDR *xdrs, pmap_signin_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_string (xdrs, &objp->brick, ~0))
- return FALSE;
- if (!xdr_int (xdrs, &objp->port))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_pmap_signin_rsp (XDR *xdrs, pmap_signin_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_pmap_signout_req (XDR *xdrs, pmap_signout_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_string (xdrs, &objp->brick, ~0))
- return FALSE;
- if (!xdr_int (xdrs, &objp->port))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_pmap_signout_rsp (XDR *xdrs, pmap_signout_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- return TRUE;
-}
diff --git a/rpc/xdr/src/portmap-xdr.h b/rpc/xdr/src/portmap-xdr.h
deleted file mode 100644
index 2686da287b4..00000000000
--- a/rpc/xdr/src/portmap-xdr.h
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include "xdr-common.h"
-#include "compat.h"
-
-#if defined(__GNUC__)
-#if __GNUC__ >= 4
-#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
-#endif
-#endif
-
-/*
- * Please do not edit this file.
- * It was generated using rpcgen.
- */
-
-#ifndef _PORTMAP_XDR_H_RPCGEN
-#define _PORTMAP_XDR_H_RPCGEN
-
-#include <rpc/rpc.h>
-
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-struct pmap_port_by_brick_req {
- char *brick;
-};
-typedef struct pmap_port_by_brick_req pmap_port_by_brick_req;
-
-struct pmap_port_by_brick_rsp {
- int op_ret;
- int op_errno;
- int status;
- int port;
-};
-typedef struct pmap_port_by_brick_rsp pmap_port_by_brick_rsp;
-
-struct pmap_brick_by_port_req {
- int port;
-};
-typedef struct pmap_brick_by_port_req pmap_brick_by_port_req;
-
-struct pmap_brick_by_port_rsp {
- int op_ret;
- int op_errno;
- int status;
- char *brick;
-};
-typedef struct pmap_brick_by_port_rsp pmap_brick_by_port_rsp;
-
-struct pmap_signup_req {
- char *brick;
- int port;
-};
-typedef struct pmap_signup_req pmap_signup_req;
-
-struct pmap_signup_rsp {
- int op_ret;
- int op_errno;
-};
-typedef struct pmap_signup_rsp pmap_signup_rsp;
-
-struct pmap_signin_req {
- char *brick;
- int port;
-};
-typedef struct pmap_signin_req pmap_signin_req;
-
-struct pmap_signin_rsp {
- int op_ret;
- int op_errno;
-};
-typedef struct pmap_signin_rsp pmap_signin_rsp;
-
-struct pmap_signout_req {
- char *brick;
- int port;
-};
-typedef struct pmap_signout_req pmap_signout_req;
-
-struct pmap_signout_rsp {
- int op_ret;
- int op_errno;
-};
-typedef struct pmap_signout_rsp pmap_signout_rsp;
-
-/* the xdr functions */
-
-#if defined(__STDC__) || defined(__cplusplus)
-extern bool_t xdr_pmap_port_by_brick_req (XDR *, pmap_port_by_brick_req*);
-extern bool_t xdr_pmap_port_by_brick_rsp (XDR *, pmap_port_by_brick_rsp*);
-extern bool_t xdr_pmap_brick_by_port_req (XDR *, pmap_brick_by_port_req*);
-extern bool_t xdr_pmap_brick_by_port_rsp (XDR *, pmap_brick_by_port_rsp*);
-extern bool_t xdr_pmap_signup_req (XDR *, pmap_signup_req*);
-extern bool_t xdr_pmap_signup_rsp (XDR *, pmap_signup_rsp*);
-extern bool_t xdr_pmap_signin_req (XDR *, pmap_signin_req*);
-extern bool_t xdr_pmap_signin_rsp (XDR *, pmap_signin_rsp*);
-extern bool_t xdr_pmap_signout_req (XDR *, pmap_signout_req*);
-extern bool_t xdr_pmap_signout_rsp (XDR *, pmap_signout_rsp*);
-
-#else /* K&R C */
-extern bool_t xdr_pmap_port_by_brick_req ();
-extern bool_t xdr_pmap_port_by_brick_rsp ();
-extern bool_t xdr_pmap_brick_by_port_req ();
-extern bool_t xdr_pmap_brick_by_port_rsp ();
-extern bool_t xdr_pmap_signup_req ();
-extern bool_t xdr_pmap_signup_rsp ();
-extern bool_t xdr_pmap_signin_req ();
-extern bool_t xdr_pmap_signin_rsp ();
-extern bool_t xdr_pmap_signout_req ();
-extern bool_t xdr_pmap_signout_rsp ();
-
-#endif /* K&R C */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* !_PORTMAP_XDR_H_RPCGEN */
diff --git a/rpc/xdr/src/portmap-xdr.x b/rpc/xdr/src/portmap-xdr.x
index f60dcc76c8e..23515572b9f 100644
--- a/rpc/xdr/src/portmap-xdr.x
+++ b/rpc/xdr/src/portmap-xdr.x
@@ -1,3 +1,17 @@
+/*
+ * Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+#ifdef RPC_XDR
+%#include "rpc-pragmas.h"
+#endif
+%#include <glusterfs/compat.h>
struct pmap_port_by_brick_req {
string brick<>;
@@ -23,20 +37,10 @@ struct pmap_brick_by_port_rsp {
};
-struct pmap_signup_req {
- string brick<>;
- int port;
-};
-
-struct pmap_signup_rsp {
- int op_ret;
- int op_errno;
-};
-
-
struct pmap_signin_req {
string brick<>;
int port;
+ int pid;
};
struct pmap_signin_rsp {
@@ -47,6 +51,7 @@ struct pmap_signin_rsp {
struct pmap_signout_req {
string brick<>;
int port;
+ int rdma_port;
};
struct pmap_signout_rsp {
diff --git a/rpc/xdr/src/rpc-common-xdr.c b/rpc/xdr/src/rpc-common-xdr.c
deleted file mode 100644
index 14ddea71537..00000000000
--- a/rpc/xdr/src/rpc-common-xdr.c
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include "xdr-common.h"
-#include "compat.h"
-
-#if defined(__GNUC__)
-#if __GNUC__ >= 4
-#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
-#endif
-#endif
-
-/*
- * Please do not edit this file.
- * It was generated using rpcgen.
- */
-
-#include "rpc-common-xdr.h"
-
-bool_t
-xdr_auth_glusterfs_parms_v2 (XDR *xdrs, auth_glusterfs_parms_v2 *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
-
- if (xdrs->x_op == XDR_ENCODE) {
- buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_int (xdrs, &objp->pid))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->uid))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->gid))
- return FALSE;
-
- } else {
- IXDR_PUT_LONG(buf, objp->pid);
- IXDR_PUT_U_LONG(buf, objp->uid);
- IXDR_PUT_U_LONG(buf, objp->gid);
- }
- if (!xdr_array (xdrs, (char **)&objp->groups.groups_val, (u_int *) &objp->groups.groups_len, ~0,
- sizeof (u_int), (xdrproc_t) xdr_u_int))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->lk_owner.lk_owner_val, (u_int *) &objp->lk_owner.lk_owner_len, ~0))
- return FALSE;
- return TRUE;
- } else if (xdrs->x_op == XDR_DECODE) {
- buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_int (xdrs, &objp->pid))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->uid))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->gid))
- return FALSE;
-
- } else {
- objp->pid = IXDR_GET_LONG(buf);
- objp->uid = IXDR_GET_U_LONG(buf);
- objp->gid = IXDR_GET_U_LONG(buf);
- }
- if (!xdr_array (xdrs, (char **)&objp->groups.groups_val, (u_int *) &objp->groups.groups_len, ~0,
- sizeof (u_int), (xdrproc_t) xdr_u_int))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->lk_owner.lk_owner_val, (u_int *) &objp->lk_owner.lk_owner_len, ~0))
- return FALSE;
- return TRUE;
- }
-
- if (!xdr_int (xdrs, &objp->pid))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->uid))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->gid))
- return FALSE;
- if (!xdr_array (xdrs, (char **)&objp->groups.groups_val, (u_int *) &objp->groups.groups_len, ~0,
- sizeof (u_int), (xdrproc_t) xdr_u_int))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->lk_owner.lk_owner_val, (u_int *) &objp->lk_owner.lk_owner_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_auth_glusterfs_parms (XDR *xdrs, auth_glusterfs_parms *objp)
-{
- register int32_t *buf;
- int i;
- buf = NULL;
-
-
- if (xdrs->x_op == XDR_ENCODE) {
- if (!xdr_u_quad_t (xdrs, &objp->lk_owner))
- return FALSE;
- buf = XDR_INLINE (xdrs, (4 + 16 )* BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_u_int (xdrs, &objp->pid))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->uid))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->gid))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ngrps))
- return FALSE;
- if (!xdr_vector (xdrs, (char *)objp->groups, 16,
- sizeof (u_int), (xdrproc_t) xdr_u_int))
- return FALSE;
- } else {
- IXDR_PUT_U_LONG(buf, objp->pid);
- IXDR_PUT_U_LONG(buf, objp->uid);
- IXDR_PUT_U_LONG(buf, objp->gid);
- IXDR_PUT_U_LONG(buf, objp->ngrps);
- {
- register u_int *genp;
-
- for (i = 0, genp = objp->groups;
- i < 16; ++i) {
- IXDR_PUT_U_LONG(buf, *genp++);
- }
- }
- }
- return TRUE;
- } else if (xdrs->x_op == XDR_DECODE) {
- if (!xdr_u_quad_t (xdrs, &objp->lk_owner))
- return FALSE;
- buf = XDR_INLINE (xdrs, (4 + 16 )* BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_u_int (xdrs, &objp->pid))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->uid))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->gid))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ngrps))
- return FALSE;
- if (!xdr_vector (xdrs, (char *)objp->groups, 16,
- sizeof (u_int), (xdrproc_t) xdr_u_int))
- return FALSE;
- } else {
- objp->pid = IXDR_GET_U_LONG(buf);
- objp->uid = IXDR_GET_U_LONG(buf);
- objp->gid = IXDR_GET_U_LONG(buf);
- objp->ngrps = IXDR_GET_U_LONG(buf);
- {
- register u_int *genp;
-
- for (i = 0, genp = objp->groups;
- i < 16; ++i) {
- *genp++ = IXDR_GET_U_LONG(buf);
- }
- }
- }
- return TRUE;
- }
-
- if (!xdr_u_quad_t (xdrs, &objp->lk_owner))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->pid))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->uid))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->gid))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ngrps))
- return FALSE;
- if (!xdr_vector (xdrs, (char *)objp->groups, 16,
- sizeof (u_int), (xdrproc_t) xdr_u_int))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf_dump_req (XDR *xdrs, gf_dump_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf_prog_detail (XDR *xdrs, gf_prog_detail *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_string (xdrs, &objp->progname, ~0))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->prognum))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->progver))
- return FALSE;
- if (!xdr_pointer (xdrs, (char **)&objp->next, sizeof (gf_prog_detail), (xdrproc_t) xdr_gf_prog_detail))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf_dump_rsp (XDR *xdrs, gf_dump_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_pointer (xdrs, (char **)&objp->prog, sizeof (gf_prog_detail), (xdrproc_t) xdr_gf_prog_detail))
- return FALSE;
- return TRUE;
-}
diff --git a/rpc/xdr/src/rpc-common-xdr.h b/rpc/xdr/src/rpc-common-xdr.h
deleted file mode 100644
index 66d9c3772fe..00000000000
--- a/rpc/xdr/src/rpc-common-xdr.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include "xdr-common.h"
-#include "compat.h"
-
-#if defined(__GNUC__)
-#if __GNUC__ >= 4
-#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
-#endif
-#endif
-
-/*
- * Please do not edit this file.
- * It was generated using rpcgen.
- */
-
-#ifndef _RPC_COMMON_XDR_H_RPCGEN
-#define _RPC_COMMON_XDR_H_RPCGEN
-
-#include <rpc/rpc.h>
-
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-struct auth_glusterfs_parms_v2 {
- int pid;
- u_int uid;
- u_int gid;
- struct {
- u_int groups_len;
- u_int *groups_val;
- } groups;
- struct {
- u_int lk_owner_len;
- char *lk_owner_val;
- } lk_owner;
-};
-typedef struct auth_glusterfs_parms_v2 auth_glusterfs_parms_v2;
-
-struct auth_glusterfs_parms {
- u_quad_t lk_owner;
- u_int pid;
- u_int uid;
- u_int gid;
- u_int ngrps;
- u_int groups[16];
-};
-typedef struct auth_glusterfs_parms auth_glusterfs_parms;
-
-struct gf_dump_req {
- u_quad_t gfs_id;
-};
-typedef struct gf_dump_req gf_dump_req;
-
-struct gf_prog_detail {
- char *progname;
- u_quad_t prognum;
- u_quad_t progver;
- struct gf_prog_detail *next;
-};
-typedef struct gf_prog_detail gf_prog_detail;
-
-struct gf_dump_rsp {
- u_quad_t gfs_id;
- int op_ret;
- int op_errno;
- struct gf_prog_detail *prog;
-};
-typedef struct gf_dump_rsp gf_dump_rsp;
-
-/* the xdr functions */
-
-#if defined(__STDC__) || defined(__cplusplus)
-extern bool_t xdr_auth_glusterfs_parms_v2 (XDR *, auth_glusterfs_parms_v2*);
-extern bool_t xdr_auth_glusterfs_parms (XDR *, auth_glusterfs_parms*);
-extern bool_t xdr_gf_dump_req (XDR *, gf_dump_req*);
-extern bool_t xdr_gf_prog_detail (XDR *, gf_prog_detail*);
-extern bool_t xdr_gf_dump_rsp (XDR *, gf_dump_rsp*);
-
-#else /* K&R C */
-extern bool_t xdr_auth_glusterfs_parms_v2 ();
-extern bool_t xdr_auth_glusterfs_parms ();
-extern bool_t xdr_gf_dump_req ();
-extern bool_t xdr_gf_prog_detail ();
-extern bool_t xdr_gf_dump_rsp ();
-
-#endif /* K&R C */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* !_RPC_COMMON_XDR_H_RPCGEN */
diff --git a/rpc/xdr/src/rpc-common-xdr.x b/rpc/xdr/src/rpc-common-xdr.x
index ca28f38b5db..baf8b4313c8 100644
--- a/rpc/xdr/src/rpc-common-xdr.x
+++ b/rpc/xdr/src/rpc-common-xdr.x
@@ -1,6 +1,23 @@
+/*
+ * Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+#ifdef RPC_XDR
+%#include "rpc-pragmas.h"
+#endif
+%#include <glusterfs/glusterfs-fops.h>
+
/* This file has definition of few XDR structures which are
* not captured in any section specific file */
+%#include "xdr-common.h"
+
struct auth_glusterfs_parms_v2 {
int pid;
unsigned int uid;
@@ -10,7 +27,7 @@ struct auth_glusterfs_parms_v2 {
};
struct auth_glusterfs_parms {
- unsigned hyper lk_owner;
+ u_quad_t lk_owner;
unsigned int pid;
unsigned int uid;
unsigned int gid;
@@ -19,21 +36,31 @@ struct auth_glusterfs_parms {
};
struct gf_dump_req {
- unsigned hyper gfs_id;
+ u_quad_t gfs_id;
};
+struct gf_statedump {
+ unsigned int pid;
+};
struct gf_prog_detail {
string progname<>;
- unsigned hyper prognum;
- unsigned hyper progver;
+ u_quad_t prognum;
+ u_quad_t progver;
struct gf_prog_detail *next;
};
struct gf_dump_rsp {
- unsigned hyper gfs_id;
+ u_quad_t gfs_id;
int op_ret;
int op_errno;
struct gf_prog_detail *prog;
};
+
+
+struct gf_common_rsp {
+ int op_ret;
+ int op_errno;
+ opaque xdata<>; /* Extra data */
+} ;
diff --git a/rpc/xdr/src/rpc-pragmas.h b/rpc/xdr/src/rpc-pragmas.h
new file mode 100644
index 00000000000..4c54cf6f1df
--- /dev/null
+++ b/rpc/xdr/src/rpc-pragmas.h
@@ -0,0 +1,28 @@
+/*
+ Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef GLUSTERFS_RPC_PRAGMAS_H
+#define GLUSTERFS_RPC_PRAGMAS_H
+
+#if defined(__GNUC__)
+#if __GNUC__ >= 4
+#if !defined(__clang__)
+#if !defined(__NetBSD__)
+#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+#else
+#pragma clang diagnostic ignored "-Wunused-variable"
+#pragma clang diagnostic ignored "-Wunused-value"
+#endif
+#endif
+#endif
+
+#endif /* GLUSTERFS_RPC_PRAGMAS_H */
diff --git a/rpc/xdr/src/xdr-generic.c b/rpc/xdr/src/xdr-generic.c
index 5d5cf7197fb..20b54eb0a8a 100644
--- a/rpc/xdr/src/xdr-generic.c
+++ b/rpc/xdr/src/xdr-generic.c
@@ -1,134 +1,120 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#include "xdr-generic.h"
-
ssize_t
-xdr_serialize_generic (struct iovec outmsg, void *res, xdrproc_t proc)
+xdr_serialize_generic(struct iovec outmsg, void *res, xdrproc_t proc)
{
- ssize_t ret = -1;
- XDR xdr;
+ ssize_t ret = -1;
+ XDR xdr;
- if ((!outmsg.iov_base) || (!res) || (!proc))
- return -1;
+ if ((!outmsg.iov_base) || (!res) || (!proc))
+ return -1;
- xdrmem_create (&xdr, outmsg.iov_base, (unsigned int)outmsg.iov_len,
- XDR_ENCODE);
+ xdrmem_create(&xdr, outmsg.iov_base, (unsigned int)outmsg.iov_len,
+ XDR_ENCODE);
- if (!proc (&xdr, res)) {
- ret = -1;
- goto ret;
- }
+ if (!PROC(&xdr, res)) {
+ ret = -1;
+ goto ret;
+ }
- ret = xdr_encoded_length (xdr);
+ ret = xdr_encoded_length(xdr);
ret:
- return ret;
+ return ret;
}
-
ssize_t
-xdr_to_generic (struct iovec inmsg, void *args, xdrproc_t proc)
+xdr_to_generic(struct iovec inmsg, void *args, xdrproc_t proc)
{
- XDR xdr;
- ssize_t ret = -1;
+ XDR xdr;
+ ssize_t ret = -1;
- if ((!inmsg.iov_base) || (!args) || (!proc))
- return -1;
+ if ((!inmsg.iov_base) || (!args) || (!proc))
+ return -1;
- xdrmem_create (&xdr, inmsg.iov_base, (unsigned int)inmsg.iov_len,
- XDR_DECODE);
+ xdrmem_create(&xdr, inmsg.iov_base, (unsigned int)inmsg.iov_len,
+ XDR_DECODE);
- if (!proc (&xdr, args)) {
- ret = -1;
- goto ret;
- }
+ if (!PROC(&xdr, args)) {
+ ret = -1;
+ goto ret;
+ }
- ret = xdr_decoded_length (xdr);
+ ret = xdr_decoded_length(xdr);
ret:
- return ret;
+ return ret;
}
-
ssize_t
-xdr_to_generic_payload (struct iovec inmsg, void *args, xdrproc_t proc,
- struct iovec *pendingpayload)
+xdr_to_generic_payload(struct iovec inmsg, void *args, xdrproc_t proc,
+ struct iovec *pendingpayload)
{
- XDR xdr;
- ssize_t ret = -1;
+ XDR xdr;
+ ssize_t ret = -1;
- if ((!inmsg.iov_base) || (!args) || (!proc))
- return -1;
+ if ((!inmsg.iov_base) || (!args) || (!proc))
+ return -1;
- xdrmem_create (&xdr, inmsg.iov_base, (unsigned int)inmsg.iov_len,
- XDR_DECODE);
+ xdrmem_create(&xdr, inmsg.iov_base, (unsigned int)inmsg.iov_len,
+ XDR_DECODE);
- if (!proc (&xdr, args)) {
- ret = -1;
- goto ret;
- }
+ if (!PROC(&xdr, args)) {
+ ret = -1;
+ goto ret;
+ }
- ret = xdr_decoded_length (xdr);
+ ret = xdr_decoded_length(xdr);
- if (pendingpayload) {
- pendingpayload->iov_base = xdr_decoded_remaining_addr (xdr);
- pendingpayload->iov_len = xdr_decoded_remaining_len (xdr);
- }
+ if (pendingpayload) {
+ pendingpayload->iov_base = xdr_decoded_remaining_addr(xdr);
+ pendingpayload->iov_len = xdr_decoded_remaining_len(xdr);
+ }
ret:
- return ret;
+ return ret;
}
ssize_t
-xdr_length_round_up (size_t len, size_t bufsize)
+xdr_length_round_up(size_t len, size_t bufsize)
{
- int roundup = 0;
+ int roundup = 0;
- roundup = len % XDR_BYTES_PER_UNIT;
- if (roundup > 0)
- roundup = XDR_BYTES_PER_UNIT - roundup;
+ roundup = len % XDR_BYTES_PER_UNIT;
+ if (roundup > 0)
+ roundup = XDR_BYTES_PER_UNIT - roundup;
- if ((roundup > 0) && ((roundup + len) <= bufsize))
- len += roundup;
+ if ((roundup > 0) && ((roundup + len) <= bufsize))
+ len += roundup;
- return len;
+ return len;
}
int
-xdr_bytes_round_up (struct iovec *vec, size_t bufsize)
+xdr_bytes_round_up(struct iovec *vec, size_t bufsize)
{
- vec->iov_len = xdr_length_round_up (vec->iov_len, bufsize);
- return 0;
+ vec->iov_len = xdr_length_round_up(vec->iov_len, bufsize);
+ return 0;
}
-
void
-xdr_vector_round_up (struct iovec *vec, int vcount, uint32_t count)
+xdr_vector_round_up(struct iovec *vec, int vcount, uint32_t count)
{
- uint32_t round_count = 0;
+ uint32_t round_count = 0;
- round_count = xdr_length_round_up (count, 1048576);
- round_count -= count;
- if (round_count == 0)
- return;
+ round_count = xdr_length_round_up(count, 1048576);
+ round_count -= count;
+ if (round_count == 0 || vcount <= 0)
+ return;
- vec[vcount-1].iov_len += round_count;
+ vec[vcount - 1].iov_len += round_count;
}
diff --git a/rpc/xdr/src/xdr-generic.h b/rpc/xdr/src/xdr-generic.h
index 24054e11c75..794dda508cc 100644
--- a/rpc/xdr/src/xdr-generic.h
+++ b/rpc/xdr/src/xdr-generic.h
@@ -1,58 +1,76 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef _XDR_GENERIC_H
#define _XDR_GENERIC_H
#include <sys/uio.h>
-//#include <rpc/rpc.h>
#include <rpc/types.h>
#include <rpc/xdr.h>
-#include "compat.h"
+#include <glusterfs/compat.h>
-#define xdr_decoded_remaining_addr(xdr) ((&xdr)->x_private)
-#define xdr_decoded_remaining_len(xdr) ((&xdr)->x_handy)
-#define xdr_encoded_length(xdr) (((size_t)(&xdr)->x_private) - ((size_t)(&xdr)->x_base))
-#define xdr_decoded_length(xdr) (((size_t)(&xdr)->x_private) - ((size_t)(&xdr)->x_base))
+#define xdr_decoded_remaining_addr(xdr) ((&xdr)->x_private)
+#define xdr_decoded_remaining_len(xdr) ((&xdr)->x_handy)
+#define xdr_encoded_length(xdr) \
+ (((size_t)(&xdr)->x_private) - ((size_t)(&xdr)->x_base))
+#define xdr_decoded_length(xdr) \
+ (((size_t)(&xdr)->x_private) - ((size_t)(&xdr)->x_base))
-#define XDR_BYTES_PER_UNIT 4
+#define XDR_BYTES_PER_UNIT 4
-ssize_t
-xdr_serialize_generic (struct iovec outmsg, void *res, xdrproc_t proc);
+/*
+ On OSX > 10.9
+ -------------
+ typedef bool_t (*xdrproc_t)(XDR *, void *, unsigned int);
+
+ On OSX < 10.9
+ ------------
+ typedef bool_t (*xdrproc_t)(XDR *, ...);
+
+ FreeBSD all versions
+ ------------
+ typedef bool_t (*xdrproc_t)(XDR *, ...);
+
+ NetBSD 6.1.4
+ -----------
+ typedef bool_t (*xdrproc_t)(XDR *, const void *);
+
+ Linux all versions
+ -----------
+ typedef bool_t (*xdrproc_t)(XDR *, void *,...);
+*/
+
+#if defined(__NetBSD__)
+#define PROC(xdr, res) proc(xdr, res)
+#else
+#define PROC(xdr, res) proc(xdr, res, 0)
+#endif
ssize_t
-xdr_to_generic (struct iovec inmsg, void *args, xdrproc_t proc);
+xdr_serialize_generic(struct iovec outmsg, void *res, xdrproc_t proc);
ssize_t
-xdr_to_generic_payload (struct iovec inmsg, void *args, xdrproc_t proc,
- struct iovec *pendingpayload);
+xdr_to_generic(struct iovec inmsg, void *args, xdrproc_t proc);
+ssize_t
+xdr_to_generic_payload(struct iovec inmsg, void *args, xdrproc_t proc,
+ struct iovec *pendingpayload);
extern int
-xdr_bytes_round_up (struct iovec *vec, size_t bufsize);
+xdr_bytes_round_up(struct iovec *vec, size_t bufsize);
extern ssize_t
-xdr_length_round_up (size_t len, size_t bufsize);
+xdr_length_round_up(size_t len, size_t bufsize);
void
-xdr_vector_round_up (struct iovec *vec, int vcount, uint32_t count);
+xdr_vector_round_up(struct iovec *vec, int vcount, uint32_t count);
#endif /* !_XDR_GENERIC_H */
diff --git a/rpc/xdr/src/xdr-nfs3.c b/rpc/xdr/src/xdr-nfs3.c
index 35fca59ff80..cfccaaa89b8 100644
--- a/rpc/xdr/src/xdr-nfs3.c
+++ b/rpc/xdr/src/xdr-nfs3.c
@@ -1,1899 +1,1907 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
+#if defined(__GNUC__)
+#if __GNUC__ >= 4
+#if !defined(__clang__)
+#if !defined(__NetBSD__)
+#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+#else
+#pragma clang diagnostic ignored "-Wunused-variable"
+#pragma clang diagnostic ignored "-Wunused-value"
+#endif
+#endif
+#endif
+
#include "xdr-nfs3.h"
-#include "mem-pool.h"
+#include <glusterfs/mem-pool.h>
#include "xdr-common.h"
-#if GF_DARWIN_HOST_OS
-#define xdr_u_quad_t xdr_u_int64_t
-#define xdr_quad_t xdr_int64_t
-#define xdr_uint32_t xdr_u_int32_t
-#define xdr_uint64_t xdr_u_int64_t
-#endif
-
bool_t
-xdr_uint64 (XDR *xdrs, uint64 *objp)
+xdr_uint64(XDR *xdrs, uint64 *objp)
{
- if (!xdr_uint64_t (xdrs, objp))
- return FALSE;
- return TRUE;
+ if (!xdr_uint64_t(xdrs, objp))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_int64 (XDR *xdrs, int64 *objp)
+xdr_int64(XDR *xdrs, int64 *objp)
{
- if (!xdr_int64_t (xdrs, objp))
- return FALSE;
- return TRUE;
+ if (!xdr_int64_t(xdrs, objp))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_uint32 (XDR *xdrs, uint32 *objp)
+xdr_uint32(XDR *xdrs, uint32 *objp)
{
- if (!xdr_uint32_t (xdrs, objp))
- return FALSE;
- return TRUE;
+ if (!xdr_uint32_t(xdrs, objp))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_int32 (XDR *xdrs, int32 *objp)
+xdr_int32(XDR *xdrs, int32 *objp)
{
- if (!xdr_int32_t (xdrs, objp))
- return FALSE;
- return TRUE;
+ if (!xdr_int32_t(xdrs, objp))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_filename3 (XDR *xdrs, filename3 *objp)
+xdr_filename3(XDR *xdrs, filename3 *objp)
{
- if (!xdr_string (xdrs, objp, ~0))
- return FALSE;
- return TRUE;
+ if (!xdr_string(xdrs, objp, ~0))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_nfspath3 (XDR *xdrs, nfspath3 *objp)
+xdr_nfspath3(XDR *xdrs, nfspath3 *objp)
{
- if (!xdr_string (xdrs, objp, ~0))
- return FALSE;
- return TRUE;
+ if (!xdr_string(xdrs, objp, ~0))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_fileid3 (XDR *xdrs, fileid3 *objp)
+xdr_fileid3(XDR *xdrs, fileid3 *objp)
{
- if (!xdr_uint64 (xdrs, objp))
- return FALSE;
- return TRUE;
+ if (!xdr_uint64(xdrs, objp))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_cookie3 (XDR *xdrs, cookie3 *objp)
+xdr_cookie3(XDR *xdrs, cookie3 *objp)
{
- if (!xdr_uint64 (xdrs, objp))
- return FALSE;
- return TRUE;
+ if (!xdr_uint64(xdrs, objp))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_cookieverf3 (XDR *xdrs, cookieverf3 objp)
+xdr_cookieverf3(XDR *xdrs, cookieverf3 objp)
{
- if (!xdr_opaque (xdrs, objp, NFS3_COOKIEVERFSIZE))
- return FALSE;
- return TRUE;
+ if (!xdr_opaque(xdrs, objp, NFS3_COOKIEVERFSIZE))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_createverf3 (XDR *xdrs, createverf3 objp)
+xdr_createverf3(XDR *xdrs, createverf3 objp)
{
- if (!xdr_opaque (xdrs, objp, NFS3_CREATEVERFSIZE))
- return FALSE;
- return TRUE;
+ if (!xdr_opaque(xdrs, objp, NFS3_CREATEVERFSIZE))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_writeverf3 (XDR *xdrs, writeverf3 objp)
+xdr_writeverf3(XDR *xdrs, writeverf3 objp)
{
- if (!xdr_opaque (xdrs, objp, NFS3_WRITEVERFSIZE))
- return FALSE;
- return TRUE;
+ if (!xdr_opaque(xdrs, objp, NFS3_WRITEVERFSIZE))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_uid3 (XDR *xdrs, uid3 *objp)
+xdr_uid3(XDR *xdrs, uid3 *objp)
{
- if (!xdr_uint32 (xdrs, objp))
- return FALSE;
- return TRUE;
+ if (!xdr_uint32(xdrs, objp))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_gid3 (XDR *xdrs, gid3 *objp)
+xdr_gid3(XDR *xdrs, gid3 *objp)
{
- if (!xdr_uint32 (xdrs, objp))
- return FALSE;
- return TRUE;
+ if (!xdr_uint32(xdrs, objp))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_size3 (XDR *xdrs, size3 *objp)
+xdr_size3(XDR *xdrs, size3 *objp)
{
- if (!xdr_uint64 (xdrs, objp))
- return FALSE;
- return TRUE;
+ if (!xdr_uint64(xdrs, objp))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_offset3 (XDR *xdrs, offset3 *objp)
+xdr_offset3(XDR *xdrs, offset3 *objp)
{
- if (!xdr_uint64 (xdrs, objp))
- return FALSE;
- return TRUE;
+ if (!xdr_uint64(xdrs, objp))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_mode3 (XDR *xdrs, mode3 *objp)
+xdr_mode3(XDR *xdrs, mode3 *objp)
{
- if (!xdr_uint32 (xdrs, objp))
- return FALSE;
- return TRUE;
+ if (!xdr_uint32(xdrs, objp))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_count3 (XDR *xdrs, count3 *objp)
+xdr_count3(XDR *xdrs, count3 *objp)
{
- if (!xdr_uint32 (xdrs, objp))
- return FALSE;
- return TRUE;
+ if (!xdr_uint32(xdrs, objp))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_nfsstat3 (XDR *xdrs, nfsstat3 *objp)
+xdr_nfsstat3(XDR *xdrs, nfsstat3 *objp)
{
- if (!xdr_enum (xdrs, (enum_t *) objp))
- return FALSE;
- return TRUE;
+ if (!xdr_enum(xdrs, (enum_t *)objp))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_ftype3 (XDR *xdrs, ftype3 *objp)
+xdr_ftype3(XDR *xdrs, ftype3 *objp)
{
- if (!xdr_enum (xdrs, (enum_t *) objp))
- return FALSE;
- return TRUE;
+ if (!xdr_enum(xdrs, (enum_t *)objp))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_specdata3 (XDR *xdrs, specdata3 *objp)
+xdr_specdata3(XDR *xdrs, specdata3 *objp)
{
- if (!xdr_uint32 (xdrs, &objp->specdata1))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->specdata2))
- return FALSE;
- return TRUE;
+ if (!xdr_uint32(xdrs, &objp->specdata1))
+ return FALSE;
+ if (!xdr_uint32(xdrs, &objp->specdata2))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_nfs_fh3 (XDR *xdrs, nfs_fh3 *objp)
+xdr_nfs_fh3(XDR *xdrs, nfs_fh3 *objp)
{
- if (!xdr_bytes (xdrs, (char **)&objp->data.data_val, (u_int *) &objp->data.data_len, NFS3_FHSIZE))
- return FALSE;
- return TRUE;
+ if (!xdr_bytes(xdrs, (char **)&objp->data.data_val,
+ (u_int *)&objp->data.data_len, NFS3_FHSIZE))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_nfstime3 (XDR *xdrs, nfstime3 *objp)
+xdr_nfstime3(XDR *xdrs, nfstime3 *objp)
{
- if (!xdr_uint32 (xdrs, &objp->seconds))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->nseconds))
- return FALSE;
- return TRUE;
+ if (!xdr_uint32(xdrs, &objp->seconds))
+ return FALSE;
+ if (!xdr_uint32(xdrs, &objp->nseconds))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_fattr3 (XDR *xdrs, fattr3 *objp)
+xdr_fattr3(XDR *xdrs, fattr3 *objp)
{
- if (!xdr_ftype3 (xdrs, &objp->type))
- return FALSE;
- if (!xdr_mode3 (xdrs, &objp->mode))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->nlink))
- return FALSE;
- if (!xdr_uid3 (xdrs, &objp->uid))
- return FALSE;
- if (!xdr_gid3 (xdrs, &objp->gid))
- return FALSE;
- if (!xdr_size3 (xdrs, &objp->size))
- return FALSE;
- if (!xdr_size3 (xdrs, &objp->used))
- return FALSE;
- if (!xdr_specdata3 (xdrs, &objp->rdev))
- return FALSE;
- if (!xdr_uint64 (xdrs, &objp->fsid))
- return FALSE;
- if (!xdr_fileid3 (xdrs, &objp->fileid))
- return FALSE;
- if (!xdr_nfstime3 (xdrs, &objp->atime))
- return FALSE;
- if (!xdr_nfstime3 (xdrs, &objp->mtime))
- return FALSE;
- if (!xdr_nfstime3 (xdrs, &objp->ctime))
- return FALSE;
- return TRUE;
+ if (!xdr_ftype3(xdrs, &objp->type))
+ return FALSE;
+ if (!xdr_mode3(xdrs, &objp->mode))
+ return FALSE;
+ if (!xdr_uint32(xdrs, &objp->nlink))
+ return FALSE;
+ if (!xdr_uid3(xdrs, &objp->uid))
+ return FALSE;
+ if (!xdr_gid3(xdrs, &objp->gid))
+ return FALSE;
+ if (!xdr_size3(xdrs, &objp->size))
+ return FALSE;
+ if (!xdr_size3(xdrs, &objp->used))
+ return FALSE;
+ if (!xdr_specdata3(xdrs, &objp->rdev))
+ return FALSE;
+ if (!xdr_uint64(xdrs, &objp->fsid))
+ return FALSE;
+ if (!xdr_fileid3(xdrs, &objp->fileid))
+ return FALSE;
+ if (!xdr_nfstime3(xdrs, &objp->atime))
+ return FALSE;
+ if (!xdr_nfstime3(xdrs, &objp->mtime))
+ return FALSE;
+ if (!xdr_nfstime3(xdrs, &objp->ctime))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_post_op_attr (XDR *xdrs, post_op_attr *objp)
+xdr_post_op_attr(XDR *xdrs, post_op_attr *objp)
{
- if (!xdr_bool (xdrs, &objp->attributes_follow))
- return FALSE;
- switch (objp->attributes_follow) {
- case TRUE:
- if (!xdr_fattr3 (xdrs, &objp->post_op_attr_u.attributes))
- return FALSE;
- break;
- case FALSE:
- break;
- default:
- return FALSE;
- }
- return TRUE;
+ if (!xdr_bool(xdrs, &objp->attributes_follow))
+ return FALSE;
+ switch (objp->attributes_follow) {
+ case TRUE:
+ if (!xdr_fattr3(xdrs, &objp->post_op_attr_u.attributes))
+ return FALSE;
+ break;
+ case FALSE:
+ break;
+ default:
+ return FALSE;
+ }
+ return TRUE;
}
bool_t
-xdr_wcc_attr (XDR *xdrs, wcc_attr *objp)
+xdr_wcc_attr(XDR *xdrs, wcc_attr *objp)
{
- if (!xdr_size3 (xdrs, &objp->size))
- return FALSE;
- if (!xdr_nfstime3 (xdrs, &objp->mtime))
- return FALSE;
- if (!xdr_nfstime3 (xdrs, &objp->ctime))
- return FALSE;
- return TRUE;
+ if (!xdr_size3(xdrs, &objp->size))
+ return FALSE;
+ if (!xdr_nfstime3(xdrs, &objp->mtime))
+ return FALSE;
+ if (!xdr_nfstime3(xdrs, &objp->ctime))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_pre_op_attr (XDR *xdrs, pre_op_attr *objp)
+xdr_pre_op_attr(XDR *xdrs, pre_op_attr *objp)
{
- if (!xdr_bool (xdrs, &objp->attributes_follow))
- return FALSE;
- switch (objp->attributes_follow) {
- case TRUE:
- if (!xdr_wcc_attr (xdrs, &objp->pre_op_attr_u.attributes))
- return FALSE;
- break;
- case FALSE:
- break;
- default:
- return FALSE;
- }
- return TRUE;
+ if (!xdr_bool(xdrs, &objp->attributes_follow))
+ return FALSE;
+ switch (objp->attributes_follow) {
+ case TRUE:
+ if (!xdr_wcc_attr(xdrs, &objp->pre_op_attr_u.attributes))
+ return FALSE;
+ break;
+ case FALSE:
+ break;
+ default:
+ return FALSE;
+ }
+ return TRUE;
}
bool_t
-xdr_wcc_data (XDR *xdrs, wcc_data *objp)
+xdr_wcc_data(XDR *xdrs, wcc_data *objp)
{
- if (!xdr_pre_op_attr (xdrs, &objp->before))
- return FALSE;
- if (!xdr_post_op_attr (xdrs, &objp->after))
- return FALSE;
- return TRUE;
+ if (!xdr_pre_op_attr(xdrs, &objp->before))
+ return FALSE;
+ if (!xdr_post_op_attr(xdrs, &objp->after))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_post_op_fh3 (XDR *xdrs, post_op_fh3 *objp)
-{
- if (!xdr_bool (xdrs, &objp->handle_follows))
- return FALSE;
- switch (objp->handle_follows) {
- case TRUE:
- if (!xdr_nfs_fh3 (xdrs, &objp->post_op_fh3_u.handle))
- return FALSE;
- break;
- case FALSE:
- break;
- default:
- return FALSE;
- }
- return TRUE;
-}
-
-bool_t
-xdr_time_how (XDR *xdrs, time_how *objp)
-{
- if (!xdr_enum (xdrs, (enum_t *) objp))
- return FALSE;
- return TRUE;
+xdr_post_op_fh3(XDR *xdrs, post_op_fh3 *objp)
+{
+ if (!xdr_bool(xdrs, &objp->handle_follows))
+ return FALSE;
+ switch (objp->handle_follows) {
+ case TRUE:
+ if (!xdr_nfs_fh3(xdrs, &objp->post_op_fh3_u.handle))
+ return FALSE;
+ break;
+ case FALSE:
+ break;
+ default:
+ return FALSE;
+ }
+ return TRUE;
+}
+
+bool_t
+xdr_time_how(XDR *xdrs, time_how *objp)
+{
+ if (!xdr_enum(xdrs, (enum_t *)objp))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_set_mode3 (XDR *xdrs, set_mode3 *objp)
+xdr_set_mode3(XDR *xdrs, set_mode3 *objp)
{
- if (!xdr_bool (xdrs, &objp->set_it))
- return FALSE;
- switch (objp->set_it) {
- case TRUE:
- if (!xdr_mode3 (xdrs, &objp->set_mode3_u.mode))
- return FALSE;
- break;
- default:
- break;
- }
- return TRUE;
+ if (!xdr_bool(xdrs, &objp->set_it))
+ return FALSE;
+ switch (objp->set_it) {
+ case TRUE:
+ if (!xdr_mode3(xdrs, &objp->set_mode3_u.mode))
+ return FALSE;
+ break;
+ default:
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_set_uid3 (XDR *xdrs, set_uid3 *objp)
+xdr_set_uid3(XDR *xdrs, set_uid3 *objp)
{
- if (!xdr_bool (xdrs, &objp->set_it))
- return FALSE;
- switch (objp->set_it) {
- case TRUE:
- if (!xdr_uid3 (xdrs, &objp->set_uid3_u.uid))
- return FALSE;
- break;
- default:
- break;
- }
- return TRUE;
+ if (!xdr_bool(xdrs, &objp->set_it))
+ return FALSE;
+ switch (objp->set_it) {
+ case TRUE:
+ if (!xdr_uid3(xdrs, &objp->set_uid3_u.uid))
+ return FALSE;
+ break;
+ default:
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_set_gid3 (XDR *xdrs, set_gid3 *objp)
+xdr_set_gid3(XDR *xdrs, set_gid3 *objp)
{
- if (!xdr_bool (xdrs, &objp->set_it))
- return FALSE;
- switch (objp->set_it) {
- case TRUE:
- if (!xdr_gid3 (xdrs, &objp->set_gid3_u.gid))
- return FALSE;
- break;
- default:
- break;
- }
- return TRUE;
+ if (!xdr_bool(xdrs, &objp->set_it))
+ return FALSE;
+ switch (objp->set_it) {
+ case TRUE:
+ if (!xdr_gid3(xdrs, &objp->set_gid3_u.gid))
+ return FALSE;
+ break;
+ default:
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_set_size3 (XDR *xdrs, set_size3 *objp)
+xdr_set_size3(XDR *xdrs, set_size3 *objp)
{
- if (!xdr_bool (xdrs, &objp->set_it))
- return FALSE;
- switch (objp->set_it) {
- case TRUE:
- if (!xdr_size3 (xdrs, &objp->set_size3_u.size))
- return FALSE;
- break;
- default:
- break;
- }
- return TRUE;
+ if (!xdr_bool(xdrs, &objp->set_it))
+ return FALSE;
+ switch (objp->set_it) {
+ case TRUE:
+ if (!xdr_size3(xdrs, &objp->set_size3_u.size))
+ return FALSE;
+ break;
+ default:
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_set_atime (XDR *xdrs, set_atime *objp)
+xdr_set_atime(XDR *xdrs, set_atime *objp)
{
- if (!xdr_time_how (xdrs, &objp->set_it))
- return FALSE;
- switch (objp->set_it) {
- case SET_TO_CLIENT_TIME:
- if (!xdr_nfstime3 (xdrs, &objp->set_atime_u.atime))
- return FALSE;
- break;
- default:
- break;
- }
- return TRUE;
+ if (!xdr_time_how(xdrs, &objp->set_it))
+ return FALSE;
+ switch (objp->set_it) {
+ case SET_TO_CLIENT_TIME:
+ if (!xdr_nfstime3(xdrs, &objp->set_atime_u.atime))
+ return FALSE;
+ break;
+ default:
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_set_mtime (XDR *xdrs, set_mtime *objp)
+xdr_set_mtime(XDR *xdrs, set_mtime *objp)
{
- if (!xdr_time_how (xdrs, &objp->set_it))
- return FALSE;
- switch (objp->set_it) {
- case SET_TO_CLIENT_TIME:
- if (!xdr_nfstime3 (xdrs, &objp->set_mtime_u.mtime))
- return FALSE;
- break;
- default:
- break;
- }
- return TRUE;
+ if (!xdr_time_how(xdrs, &objp->set_it))
+ return FALSE;
+ switch (objp->set_it) {
+ case SET_TO_CLIENT_TIME:
+ if (!xdr_nfstime3(xdrs, &objp->set_mtime_u.mtime))
+ return FALSE;
+ break;
+ default:
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_sattr3 (XDR *xdrs, sattr3 *objp)
+xdr_sattr3(XDR *xdrs, sattr3 *objp)
{
- if (!xdr_set_mode3 (xdrs, &objp->mode))
- return FALSE;
- if (!xdr_set_uid3 (xdrs, &objp->uid))
- return FALSE;
- if (!xdr_set_gid3 (xdrs, &objp->gid))
- return FALSE;
- if (!xdr_set_size3 (xdrs, &objp->size))
- return FALSE;
- if (!xdr_set_atime (xdrs, &objp->atime))
- return FALSE;
- if (!xdr_set_mtime (xdrs, &objp->mtime))
- return FALSE;
- return TRUE;
+ if (!xdr_set_mode3(xdrs, &objp->mode))
+ return FALSE;
+ if (!xdr_set_uid3(xdrs, &objp->uid))
+ return FALSE;
+ if (!xdr_set_gid3(xdrs, &objp->gid))
+ return FALSE;
+ if (!xdr_set_size3(xdrs, &objp->size))
+ return FALSE;
+ if (!xdr_set_atime(xdrs, &objp->atime))
+ return FALSE;
+ if (!xdr_set_mtime(xdrs, &objp->mtime))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_diropargs3 (XDR *xdrs, diropargs3 *objp)
+xdr_diropargs3(XDR *xdrs, diropargs3 *objp)
{
- if (!xdr_nfs_fh3 (xdrs, &objp->dir))
- return FALSE;
- if (!xdr_filename3 (xdrs, &objp->name))
- return FALSE;
- return TRUE;
+ if (!xdr_nfs_fh3(xdrs, &objp->dir))
+ return FALSE;
+ if (!xdr_filename3(xdrs, &objp->name))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_getattr3args (XDR *xdrs, getattr3args *objp)
+xdr_getattr3args(XDR *xdrs, getattr3args *objp)
{
- if (!xdr_nfs_fh3 (xdrs, &objp->object))
- return FALSE;
- return TRUE;
+ if (!xdr_nfs_fh3(xdrs, &objp->object))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_getattr3resok (XDR *xdrs, getattr3resok *objp)
+xdr_getattr3resok(XDR *xdrs, getattr3resok *objp)
{
- if (!xdr_fattr3 (xdrs, &objp->obj_attributes))
- return FALSE;
- return TRUE;
+ if (!xdr_fattr3(xdrs, &objp->obj_attributes))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_getattr3res (XDR *xdrs, getattr3res *objp)
+xdr_getattr3res(XDR *xdrs, getattr3res *objp)
{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_getattr3resok (xdrs, &objp->getattr3res_u.resok))
- return FALSE;
- break;
- default:
- break;
- }
- return TRUE;
+ if (!xdr_nfsstat3(xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_getattr3resok(xdrs, &objp->getattr3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_sattrguard3 (XDR *xdrs, sattrguard3 *objp)
+xdr_sattrguard3(XDR *xdrs, sattrguard3 *objp)
{
- if (!xdr_bool (xdrs, &objp->check))
- return FALSE;
- switch (objp->check) {
- case TRUE:
- if (!xdr_nfstime3 (xdrs, &objp->sattrguard3_u.obj_ctime))
- return FALSE;
- break;
- case FALSE:
- break;
- default:
- return FALSE;
- }
- return TRUE;
+ if (!xdr_bool(xdrs, &objp->check))
+ return FALSE;
+ switch (objp->check) {
+ case TRUE:
+ if (!xdr_nfstime3(xdrs, &objp->sattrguard3_u.obj_ctime))
+ return FALSE;
+ break;
+ case FALSE:
+ break;
+ default:
+ return FALSE;
+ }
+ return TRUE;
}
bool_t
-xdr_setattr3args (XDR *xdrs, setattr3args *objp)
+xdr_setattr3args(XDR *xdrs, setattr3args *objp)
{
- if (!xdr_nfs_fh3 (xdrs, &objp->object))
- return FALSE;
- if (!xdr_sattr3 (xdrs, &objp->new_attributes))
- return FALSE;
- if (!xdr_sattrguard3 (xdrs, &objp->guard))
- return FALSE;
- return TRUE;
+ if (!xdr_nfs_fh3(xdrs, &objp->object))
+ return FALSE;
+ if (!xdr_sattr3(xdrs, &objp->new_attributes))
+ return FALSE;
+ if (!xdr_sattrguard3(xdrs, &objp->guard))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_setattr3resok (XDR *xdrs, setattr3resok *objp)
+xdr_setattr3resok(XDR *xdrs, setattr3resok *objp)
{
- if (!xdr_wcc_data (xdrs, &objp->obj_wcc))
- return FALSE;
- return TRUE;
+ if (!xdr_wcc_data(xdrs, &objp->obj_wcc))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_setattr3resfail (XDR *xdrs, setattr3resfail *objp)
+xdr_setattr3resfail(XDR *xdrs, setattr3resfail *objp)
{
- if (!xdr_wcc_data (xdrs, &objp->obj_wcc))
- return FALSE;
- return TRUE;
+ if (!xdr_wcc_data(xdrs, &objp->obj_wcc))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_setattr3res (XDR *xdrs, setattr3res *objp)
+xdr_setattr3res(XDR *xdrs, setattr3res *objp)
{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_setattr3resok (xdrs, &objp->setattr3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_setattr3resfail (xdrs, &objp->setattr3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
+ if (!xdr_nfsstat3(xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_setattr3resok(xdrs, &objp->setattr3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_setattr3resfail(xdrs, &objp->setattr3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_lookup3args (XDR *xdrs, lookup3args *objp)
+xdr_lookup3args(XDR *xdrs, lookup3args *objp)
{
- if (!xdr_diropargs3 (xdrs, &objp->what))
- return FALSE;
- return TRUE;
+ if (!xdr_diropargs3(xdrs, &objp->what))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_lookup3resok (XDR *xdrs, lookup3resok *objp)
+xdr_lookup3resok(XDR *xdrs, lookup3resok *objp)
{
- if (!xdr_nfs_fh3 (xdrs, &objp->object))
- return FALSE;
- if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
- return FALSE;
- if (!xdr_post_op_attr (xdrs, &objp->dir_attributes))
- return FALSE;
- return TRUE;
+ if (!xdr_nfs_fh3(xdrs, &objp->object))
+ return FALSE;
+ if (!xdr_post_op_attr(xdrs, &objp->obj_attributes))
+ return FALSE;
+ if (!xdr_post_op_attr(xdrs, &objp->dir_attributes))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_lookup3resfail (XDR *xdrs, lookup3resfail *objp)
+xdr_lookup3resfail(XDR *xdrs, lookup3resfail *objp)
{
- if (!xdr_post_op_attr (xdrs, &objp->dir_attributes))
- return FALSE;
- return TRUE;
+ if (!xdr_post_op_attr(xdrs, &objp->dir_attributes))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_lookup3res (XDR *xdrs, lookup3res *objp)
+xdr_lookup3res(XDR *xdrs, lookup3res *objp)
{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_lookup3resok (xdrs, &objp->lookup3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_lookup3resfail (xdrs, &objp->lookup3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
+ if (!xdr_nfsstat3(xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_lookup3resok(xdrs, &objp->lookup3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_lookup3resfail(xdrs, &objp->lookup3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_access3args (XDR *xdrs, access3args *objp)
+xdr_access3args(XDR *xdrs, access3args *objp)
{
- if (!xdr_nfs_fh3 (xdrs, &objp->object))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->access))
- return FALSE;
- return TRUE;
+ if (!xdr_nfs_fh3(xdrs, &objp->object))
+ return FALSE;
+ if (!xdr_uint32(xdrs, &objp->access))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_access3resok (XDR *xdrs, access3resok *objp)
+xdr_access3resok(XDR *xdrs, access3resok *objp)
{
- if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->access))
- return FALSE;
- return TRUE;
+ if (!xdr_post_op_attr(xdrs, &objp->obj_attributes))
+ return FALSE;
+ if (!xdr_uint32(xdrs, &objp->access))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_access3resfail (XDR *xdrs, access3resfail *objp)
+xdr_access3resfail(XDR *xdrs, access3resfail *objp)
{
- if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
- return FALSE;
- return TRUE;
+ if (!xdr_post_op_attr(xdrs, &objp->obj_attributes))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_access3res (XDR *xdrs, access3res *objp)
+xdr_access3res(XDR *xdrs, access3res *objp)
{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_access3resok (xdrs, &objp->access3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_access3resfail (xdrs, &objp->access3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
+ if (!xdr_nfsstat3(xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_access3resok(xdrs, &objp->access3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_access3resfail(xdrs, &objp->access3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_readlink3args (XDR *xdrs, readlink3args *objp)
+xdr_readlink3args(XDR *xdrs, readlink3args *objp)
{
- if (!xdr_nfs_fh3 (xdrs, &objp->symlink))
- return FALSE;
- return TRUE;
+ if (!xdr_nfs_fh3(xdrs, &objp->symlink))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_readlink3resok (XDR *xdrs, readlink3resok *objp)
+xdr_readlink3resok(XDR *xdrs, readlink3resok *objp)
{
- if (!xdr_post_op_attr (xdrs, &objp->symlink_attributes))
- return FALSE;
- if (!xdr_nfspath3 (xdrs, &objp->data))
- return FALSE;
- return TRUE;
+ if (!xdr_post_op_attr(xdrs, &objp->symlink_attributes))
+ return FALSE;
+ if (!xdr_nfspath3(xdrs, &objp->data))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_readlink3resfail (XDR *xdrs, readlink3resfail *objp)
+xdr_readlink3resfail(XDR *xdrs, readlink3resfail *objp)
{
- if (!xdr_post_op_attr (xdrs, &objp->symlink_attributes))
- return FALSE;
- return TRUE;
+ if (!xdr_post_op_attr(xdrs, &objp->symlink_attributes))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_readlink3res (XDR *xdrs, readlink3res *objp)
+xdr_readlink3res(XDR *xdrs, readlink3res *objp)
{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_readlink3resok (xdrs, &objp->readlink3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_readlink3resfail (xdrs, &objp->readlink3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
+ if (!xdr_nfsstat3(xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_readlink3resok(xdrs, &objp->readlink3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_readlink3resfail(xdrs, &objp->readlink3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_read3args (XDR *xdrs, read3args *objp)
+xdr_read3args(XDR *xdrs, read3args *objp)
{
- if (!xdr_nfs_fh3 (xdrs, &objp->file))
- return FALSE;
- if (!xdr_offset3 (xdrs, &objp->offset))
- return FALSE;
- if (!xdr_count3 (xdrs, &objp->count))
- return FALSE;
- return TRUE;
+ if (!xdr_nfs_fh3(xdrs, &objp->file))
+ return FALSE;
+ if (!xdr_offset3(xdrs, &objp->offset))
+ return FALSE;
+ if (!xdr_count3(xdrs, &objp->count))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_read3resok_nocopy (XDR *xdrs, read3resok *objp)
+xdr_read3resok_nocopy(XDR *xdrs, read3resok *objp)
{
- if (!xdr_post_op_attr (xdrs, &objp->file_attributes))
- return FALSE;
- if (!xdr_count3 (xdrs, &objp->count))
- return FALSE;
- if (!xdr_bool (xdrs, &objp->eof))
- return FALSE;
- if (!xdr_u_int (xdrs, (u_int *) &objp->data.data_len))
- return FALSE;
- return TRUE;
+ if (!xdr_post_op_attr(xdrs, &objp->file_attributes))
+ return FALSE;
+ if (!xdr_count3(xdrs, &objp->count))
+ return FALSE;
+ if (!xdr_bool(xdrs, &objp->eof))
+ return FALSE;
+ if (!xdr_u_int(xdrs, (u_int *)&objp->data.data_len))
+ return FALSE;
+ return TRUE;
}
-
bool_t
-xdr_read3resok (XDR *xdrs, read3resok *objp)
+xdr_read3resok(XDR *xdrs, read3resok *objp)
{
- if (!xdr_post_op_attr (xdrs, &objp->file_attributes))
- return FALSE;
- if (!xdr_count3 (xdrs, &objp->count))
- return FALSE;
- if (!xdr_bool (xdrs, &objp->eof))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->data.data_val, (u_int *) &objp->data.data_len, ~0))
- return FALSE;
- return TRUE;
+ if (!xdr_post_op_attr(xdrs, &objp->file_attributes))
+ return FALSE;
+ if (!xdr_count3(xdrs, &objp->count))
+ return FALSE;
+ if (!xdr_bool(xdrs, &objp->eof))
+ return FALSE;
+ if (!xdr_bytes(xdrs, (char **)&objp->data.data_val,
+ (u_int *)&objp->data.data_len, ~0))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_read3resfail (XDR *xdrs, read3resfail *objp)
+xdr_read3resfail(XDR *xdrs, read3resfail *objp)
{
- if (!xdr_post_op_attr (xdrs, &objp->file_attributes))
- return FALSE;
- return TRUE;
+ if (!xdr_post_op_attr(xdrs, &objp->file_attributes))
+ return FALSE;
+ return TRUE;
}
-
bool_t
-xdr_read3res_nocopy (XDR *xdrs, read3res *objp)
+xdr_read3res_nocopy(XDR *xdrs, read3res *objp)
{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_read3resok_nocopy (xdrs, &objp->read3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_read3resfail (xdrs, &objp->read3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
+ if (!xdr_nfsstat3(xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_read3resok_nocopy(xdrs, &objp->read3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_read3resfail(xdrs, &objp->read3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
}
-
bool_t
-xdr_read3res (XDR *xdrs, read3res *objp)
+xdr_read3res(XDR *xdrs, read3res *objp)
{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_read3resok (xdrs, &objp->read3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_read3resfail (xdrs, &objp->read3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
+ if (!xdr_nfsstat3(xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_read3resok(xdrs, &objp->read3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_read3resfail(xdrs, &objp->read3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_stable_how (XDR *xdrs, stable_how *objp)
+xdr_stable_how(XDR *xdrs, stable_how *objp)
{
- if (!xdr_enum (xdrs, (enum_t *) objp))
- return FALSE;
- return TRUE;
+ if (!xdr_enum(xdrs, (enum_t *)objp))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_write3args (XDR *xdrs, write3args *objp)
+xdr_write3args(XDR *xdrs, write3args *objp)
{
- if (!xdr_nfs_fh3 (xdrs, &objp->file))
- return FALSE;
- if (!xdr_offset3 (xdrs, &objp->offset))
- return FALSE;
- if (!xdr_count3 (xdrs, &objp->count))
- return FALSE;
- if (!xdr_stable_how (xdrs, &objp->stable))
- return FALSE;
+ if (!xdr_nfs_fh3(xdrs, &objp->file))
+ return FALSE;
+ if (!xdr_offset3(xdrs, &objp->offset))
+ return FALSE;
+ if (!xdr_count3(xdrs, &objp->count))
+ return FALSE;
+ if (!xdr_stable_how(xdrs, &objp->stable))
+ return FALSE;
- /* Added specifically to avoid copies from the xdr buffer into
- * the write3args structure, which will also require an already
- * allocated buffer. That is not optimal.
- */
- if (!xdr_u_int (xdrs, (u_int *) &objp->data.data_len))
- return FALSE;
+ /* Added specifically to avoid copies from the xdr buffer into
+ * the write3args structure, which will also require an already
+ * allocated buffer. That is not optimal.
+ */
+ if (!xdr_u_int(xdrs, (u_int *)&objp->data.data_len))
+ return FALSE;
- /* The remaining bytes in the xdr buffer are the bytes that need to be
- * written. See how these bytes are extracted in the xdr_to_write3args
- * code path. Be careful, while using the write3args structure, since
- * only the data.data_len has been filled. The actual data is
- * extracted in xdr_to_write3args path.
- */
+ /* The remaining bytes in the xdr buffer are the bytes that need to be
+ * written. See how these bytes are extracted in the xdr_to_write3args
+ * code path. Be careful, while using the write3args structure, since
+ * only the data.data_len has been filled. The actual data is
+ * extracted in xdr_to_write3args path.
+ */
- /* if (!xdr_bytes (xdrs, (char **)&objp->data.data_val, (u_int *) &objp->data.data_len, ~0))
- return FALSE;
- */
- return TRUE;
+ /* if (!xdr_bytes (xdrs, (char **)&objp->data.data_val, (u_int *)
+ &objp->data.data_len, ~0)) return FALSE;
+ */
+ return TRUE;
}
bool_t
-xdr_write3resok (XDR *xdrs, write3resok *objp)
+xdr_write3resok(XDR *xdrs, write3resok *objp)
{
- if (!xdr_wcc_data (xdrs, &objp->file_wcc))
- return FALSE;
- if (!xdr_count3 (xdrs, &objp->count))
- return FALSE;
- if (!xdr_stable_how (xdrs, &objp->committed))
- return FALSE;
- if (!xdr_writeverf3 (xdrs, objp->verf))
- return FALSE;
- return TRUE;
+ if (!xdr_wcc_data(xdrs, &objp->file_wcc))
+ return FALSE;
+ if (!xdr_count3(xdrs, &objp->count))
+ return FALSE;
+ if (!xdr_stable_how(xdrs, &objp->committed))
+ return FALSE;
+ if (!xdr_writeverf3(xdrs, objp->verf))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_write3resfail (XDR *xdrs, write3resfail *objp)
+xdr_write3resfail(XDR *xdrs, write3resfail *objp)
{
- if (!xdr_wcc_data (xdrs, &objp->file_wcc))
- return FALSE;
- return TRUE;
+ if (!xdr_wcc_data(xdrs, &objp->file_wcc))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_write3res (XDR *xdrs, write3res *objp)
+xdr_write3res(XDR *xdrs, write3res *objp)
{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_write3resok (xdrs, &objp->write3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_write3resfail (xdrs, &objp->write3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
+ if (!xdr_nfsstat3(xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_write3resok(xdrs, &objp->write3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_write3resfail(xdrs, &objp->write3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_createmode3 (XDR *xdrs, createmode3 *objp)
+xdr_createmode3(XDR *xdrs, createmode3 *objp)
{
- if (!xdr_enum (xdrs, (enum_t *) objp))
- return FALSE;
- return TRUE;
+ if (!xdr_enum(xdrs, (enum_t *)objp))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_createhow3 (XDR *xdrs, createhow3 *objp)
+xdr_createhow3(XDR *xdrs, createhow3 *objp)
{
- if (!xdr_createmode3 (xdrs, &objp->mode))
- return FALSE;
- switch (objp->mode) {
- case UNCHECKED:
- case GUARDED:
- if (!xdr_sattr3 (xdrs, &objp->createhow3_u.obj_attributes))
- return FALSE;
- break;
- case EXCLUSIVE:
- if (!xdr_createverf3 (xdrs, objp->createhow3_u.verf))
- return FALSE;
- break;
- default:
- return FALSE;
- }
- return TRUE;
+ if (!xdr_createmode3(xdrs, &objp->mode))
+ return FALSE;
+ switch (objp->mode) {
+ case UNCHECKED:
+ case GUARDED:
+ if (!xdr_sattr3(xdrs, &objp->createhow3_u.obj_attributes))
+ return FALSE;
+ break;
+ case EXCLUSIVE:
+ if (!xdr_createverf3(xdrs, objp->createhow3_u.verf))
+ return FALSE;
+ break;
+ default:
+ return FALSE;
+ }
+ return TRUE;
}
bool_t
-xdr_create3args (XDR *xdrs, create3args *objp)
+xdr_create3args(XDR *xdrs, create3args *objp)
{
- if (!xdr_diropargs3 (xdrs, &objp->where))
- return FALSE;
- if (!xdr_createhow3 (xdrs, &objp->how))
- return FALSE;
- return TRUE;
+ if (!xdr_diropargs3(xdrs, &objp->where))
+ return FALSE;
+ if (!xdr_createhow3(xdrs, &objp->how))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_create3resok (XDR *xdrs, create3resok *objp)
+xdr_create3resok(XDR *xdrs, create3resok *objp)
{
- if (!xdr_post_op_fh3 (xdrs, &objp->obj))
- return FALSE;
- if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
- return FALSE;
- if (!xdr_wcc_data (xdrs, &objp->dir_wcc))
- return FALSE;
- return TRUE;
+ if (!xdr_post_op_fh3(xdrs, &objp->obj))
+ return FALSE;
+ if (!xdr_post_op_attr(xdrs, &objp->obj_attributes))
+ return FALSE;
+ if (!xdr_wcc_data(xdrs, &objp->dir_wcc))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_create3resfail (XDR *xdrs, create3resfail *objp)
+xdr_create3resfail(XDR *xdrs, create3resfail *objp)
{
- if (!xdr_wcc_data (xdrs, &objp->dir_wcc))
- return FALSE;
- return TRUE;
+ if (!xdr_wcc_data(xdrs, &objp->dir_wcc))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_create3res (XDR *xdrs, create3res *objp)
+xdr_create3res(XDR *xdrs, create3res *objp)
{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_create3resok (xdrs, &objp->create3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_create3resfail (xdrs, &objp->create3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
+ if (!xdr_nfsstat3(xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_create3resok(xdrs, &objp->create3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_create3resfail(xdrs, &objp->create3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_mkdir3args (XDR *xdrs, mkdir3args *objp)
+xdr_mkdir3args(XDR *xdrs, mkdir3args *objp)
{
- if (!xdr_diropargs3 (xdrs, &objp->where))
- return FALSE;
- if (!xdr_sattr3 (xdrs, &objp->attributes))
- return FALSE;
- return TRUE;
+ if (!xdr_diropargs3(xdrs, &objp->where))
+ return FALSE;
+ if (!xdr_sattr3(xdrs, &objp->attributes))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_mkdir3resok (XDR *xdrs, mkdir3resok *objp)
+xdr_mkdir3resok(XDR *xdrs, mkdir3resok *objp)
{
- if (!xdr_post_op_fh3 (xdrs, &objp->obj))
- return FALSE;
- if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
- return FALSE;
- if (!xdr_wcc_data (xdrs, &objp->dir_wcc))
- return FALSE;
- return TRUE;
+ if (!xdr_post_op_fh3(xdrs, &objp->obj))
+ return FALSE;
+ if (!xdr_post_op_attr(xdrs, &objp->obj_attributes))
+ return FALSE;
+ if (!xdr_wcc_data(xdrs, &objp->dir_wcc))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_mkdir3resfail (XDR *xdrs, mkdir3resfail *objp)
+xdr_mkdir3resfail(XDR *xdrs, mkdir3resfail *objp)
{
- if (!xdr_wcc_data (xdrs, &objp->dir_wcc))
- return FALSE;
- return TRUE;
+ if (!xdr_wcc_data(xdrs, &objp->dir_wcc))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_mkdir3res (XDR *xdrs, mkdir3res *objp)
+xdr_mkdir3res(XDR *xdrs, mkdir3res *objp)
{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_mkdir3resok (xdrs, &objp->mkdir3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_mkdir3resfail (xdrs, &objp->mkdir3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
+ if (!xdr_nfsstat3(xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_mkdir3resok(xdrs, &objp->mkdir3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_mkdir3resfail(xdrs, &objp->mkdir3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_symlinkdata3 (XDR *xdrs, symlinkdata3 *objp)
+xdr_symlinkdata3(XDR *xdrs, symlinkdata3 *objp)
{
- if (!xdr_sattr3 (xdrs, &objp->symlink_attributes))
- return FALSE;
- if (!xdr_nfspath3 (xdrs, &objp->symlink_data))
- return FALSE;
- return TRUE;
+ if (!xdr_sattr3(xdrs, &objp->symlink_attributes))
+ return FALSE;
+ if (!xdr_nfspath3(xdrs, &objp->symlink_data))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_symlink3args (XDR *xdrs, symlink3args *objp)
+xdr_symlink3args(XDR *xdrs, symlink3args *objp)
{
- if (!xdr_diropargs3 (xdrs, &objp->where))
- return FALSE;
- if (!xdr_symlinkdata3 (xdrs, &objp->symlink))
- return FALSE;
- return TRUE;
+ if (!xdr_diropargs3(xdrs, &objp->where))
+ return FALSE;
+ if (!xdr_symlinkdata3(xdrs, &objp->symlink))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_symlink3resok (XDR *xdrs, symlink3resok *objp)
+xdr_symlink3resok(XDR *xdrs, symlink3resok *objp)
{
- if (!xdr_post_op_fh3 (xdrs, &objp->obj))
- return FALSE;
- if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
- return FALSE;
- if (!xdr_wcc_data (xdrs, &objp->dir_wcc))
- return FALSE;
- return TRUE;
+ if (!xdr_post_op_fh3(xdrs, &objp->obj))
+ return FALSE;
+ if (!xdr_post_op_attr(xdrs, &objp->obj_attributes))
+ return FALSE;
+ if (!xdr_wcc_data(xdrs, &objp->dir_wcc))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_symlink3resfail (XDR *xdrs, symlink3resfail *objp)
+xdr_symlink3resfail(XDR *xdrs, symlink3resfail *objp)
{
- if (!xdr_wcc_data (xdrs, &objp->dir_wcc))
- return FALSE;
- return TRUE;
+ if (!xdr_wcc_data(xdrs, &objp->dir_wcc))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_symlink3res (XDR *xdrs, symlink3res *objp)
+xdr_symlink3res(XDR *xdrs, symlink3res *objp)
{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_symlink3resok (xdrs, &objp->symlink3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_symlink3resfail (xdrs, &objp->symlink3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
+ if (!xdr_nfsstat3(xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_symlink3resok(xdrs, &objp->symlink3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_symlink3resfail(xdrs, &objp->symlink3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_devicedata3 (XDR *xdrs, devicedata3 *objp)
+xdr_devicedata3(XDR *xdrs, devicedata3 *objp)
{
- if (!xdr_sattr3 (xdrs, &objp->dev_attributes))
- return FALSE;
- if (!xdr_specdata3 (xdrs, &objp->spec))
- return FALSE;
- return TRUE;
+ if (!xdr_sattr3(xdrs, &objp->dev_attributes))
+ return FALSE;
+ if (!xdr_specdata3(xdrs, &objp->spec))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_mknoddata3 (XDR *xdrs, mknoddata3 *objp)
+xdr_mknoddata3(XDR *xdrs, mknoddata3 *objp)
{
- if (!xdr_ftype3 (xdrs, &objp->type))
- return FALSE;
- switch (objp->type) {
- case NF3CHR:
- case NF3BLK:
- if (!xdr_devicedata3 (xdrs, &objp->mknoddata3_u.device))
- return FALSE;
- break;
- case NF3SOCK:
- case NF3FIFO:
- if (!xdr_sattr3 (xdrs, &objp->mknoddata3_u.pipe_attributes))
- return FALSE;
- break;
- default:
- break;
- }
- return TRUE;
+ if (!xdr_ftype3(xdrs, &objp->type))
+ return FALSE;
+ switch (objp->type) {
+ case NF3CHR:
+ case NF3BLK:
+ if (!xdr_devicedata3(xdrs, &objp->mknoddata3_u.device))
+ return FALSE;
+ break;
+ case NF3SOCK:
+ case NF3FIFO:
+ if (!xdr_sattr3(xdrs, &objp->mknoddata3_u.pipe_attributes))
+ return FALSE;
+ break;
+ default:
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_mknod3args (XDR *xdrs, mknod3args *objp)
+xdr_mknod3args(XDR *xdrs, mknod3args *objp)
{
- if (!xdr_diropargs3 (xdrs, &objp->where))
- return FALSE;
- if (!xdr_mknoddata3 (xdrs, &objp->what))
- return FALSE;
- return TRUE;
+ if (!xdr_diropargs3(xdrs, &objp->where))
+ return FALSE;
+ if (!xdr_mknoddata3(xdrs, &objp->what))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_mknod3resok (XDR *xdrs, mknod3resok *objp)
+xdr_mknod3resok(XDR *xdrs, mknod3resok *objp)
{
- if (!xdr_post_op_fh3 (xdrs, &objp->obj))
- return FALSE;
- if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
- return FALSE;
- if (!xdr_wcc_data (xdrs, &objp->dir_wcc))
- return FALSE;
- return TRUE;
+ if (!xdr_post_op_fh3(xdrs, &objp->obj))
+ return FALSE;
+ if (!xdr_post_op_attr(xdrs, &objp->obj_attributes))
+ return FALSE;
+ if (!xdr_wcc_data(xdrs, &objp->dir_wcc))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_mknod3resfail (XDR *xdrs, mknod3resfail *objp)
+xdr_mknod3resfail(XDR *xdrs, mknod3resfail *objp)
{
- if (!xdr_wcc_data (xdrs, &objp->dir_wcc))
- return FALSE;
- return TRUE;
+ if (!xdr_wcc_data(xdrs, &objp->dir_wcc))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_mknod3res (XDR *xdrs, mknod3res *objp)
+xdr_mknod3res(XDR *xdrs, mknod3res *objp)
{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_mknod3resok (xdrs, &objp->mknod3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_mknod3resfail (xdrs, &objp->mknod3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
+ if (!xdr_nfsstat3(xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_mknod3resok(xdrs, &objp->mknod3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_mknod3resfail(xdrs, &objp->mknod3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_remove3args (XDR *xdrs, remove3args *objp)
+xdr_remove3args(XDR *xdrs, remove3args *objp)
{
- if (!xdr_diropargs3 (xdrs, &objp->object))
- return FALSE;
- return TRUE;
+ if (!xdr_diropargs3(xdrs, &objp->object))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_remove3resok (XDR *xdrs, remove3resok *objp)
+xdr_remove3resok(XDR *xdrs, remove3resok *objp)
{
- if (!xdr_wcc_data (xdrs, &objp->dir_wcc))
- return FALSE;
- return TRUE;
+ if (!xdr_wcc_data(xdrs, &objp->dir_wcc))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_remove3resfail (XDR *xdrs, remove3resfail *objp)
+xdr_remove3resfail(XDR *xdrs, remove3resfail *objp)
{
- if (!xdr_wcc_data (xdrs, &objp->dir_wcc))
- return FALSE;
- return TRUE;
+ if (!xdr_wcc_data(xdrs, &objp->dir_wcc))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_remove3res (XDR *xdrs, remove3res *objp)
+xdr_remove3res(XDR *xdrs, remove3res *objp)
{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_remove3resok (xdrs, &objp->remove3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_remove3resfail (xdrs, &objp->remove3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
+ if (!xdr_nfsstat3(xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_remove3resok(xdrs, &objp->remove3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_remove3resfail(xdrs, &objp->remove3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_rmdir3args (XDR *xdrs, rmdir3args *objp)
+xdr_rmdir3args(XDR *xdrs, rmdir3args *objp)
{
- if (!xdr_diropargs3 (xdrs, &objp->object))
- return FALSE;
- return TRUE;
+ if (!xdr_diropargs3(xdrs, &objp->object))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_rmdir3resok (XDR *xdrs, rmdir3resok *objp)
+xdr_rmdir3resok(XDR *xdrs, rmdir3resok *objp)
{
- if (!xdr_wcc_data (xdrs, &objp->dir_wcc))
- return FALSE;
- return TRUE;
+ if (!xdr_wcc_data(xdrs, &objp->dir_wcc))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_rmdir3resfail (XDR *xdrs, rmdir3resfail *objp)
+xdr_rmdir3resfail(XDR *xdrs, rmdir3resfail *objp)
{
- if (!xdr_wcc_data (xdrs, &objp->dir_wcc))
- return FALSE;
- return TRUE;
+ if (!xdr_wcc_data(xdrs, &objp->dir_wcc))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_rmdir3res (XDR *xdrs, rmdir3res *objp)
+xdr_rmdir3res(XDR *xdrs, rmdir3res *objp)
{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_rmdir3resok (xdrs, &objp->rmdir3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_rmdir3resfail (xdrs, &objp->rmdir3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
+ if (!xdr_nfsstat3(xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_rmdir3resok(xdrs, &objp->rmdir3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_rmdir3resfail(xdrs, &objp->rmdir3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_rename3args (XDR *xdrs, rename3args *objp)
+xdr_rename3args(XDR *xdrs, rename3args *objp)
{
- if (!xdr_diropargs3 (xdrs, &objp->from))
- return FALSE;
- if (!xdr_diropargs3 (xdrs, &objp->to))
- return FALSE;
- return TRUE;
+ if (!xdr_diropargs3(xdrs, &objp->from))
+ return FALSE;
+ if (!xdr_diropargs3(xdrs, &objp->to))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_rename3resok (XDR *xdrs, rename3resok *objp)
+xdr_rename3resok(XDR *xdrs, rename3resok *objp)
{
- if (!xdr_wcc_data (xdrs, &objp->fromdir_wcc))
- return FALSE;
- if (!xdr_wcc_data (xdrs, &objp->todir_wcc))
- return FALSE;
- return TRUE;
+ if (!xdr_wcc_data(xdrs, &objp->fromdir_wcc))
+ return FALSE;
+ if (!xdr_wcc_data(xdrs, &objp->todir_wcc))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_rename3resfail (XDR *xdrs, rename3resfail *objp)
+xdr_rename3resfail(XDR *xdrs, rename3resfail *objp)
{
- if (!xdr_wcc_data (xdrs, &objp->fromdir_wcc))
- return FALSE;
- if (!xdr_wcc_data (xdrs, &objp->todir_wcc))
- return FALSE;
- return TRUE;
+ if (!xdr_wcc_data(xdrs, &objp->fromdir_wcc))
+ return FALSE;
+ if (!xdr_wcc_data(xdrs, &objp->todir_wcc))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_rename3res (XDR *xdrs, rename3res *objp)
+xdr_rename3res(XDR *xdrs, rename3res *objp)
{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_rename3resok (xdrs, &objp->rename3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_rename3resfail (xdrs, &objp->rename3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
+ if (!xdr_nfsstat3(xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_rename3resok(xdrs, &objp->rename3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_rename3resfail(xdrs, &objp->rename3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_link3args (XDR *xdrs, link3args *objp)
+xdr_link3args(XDR *xdrs, link3args *objp)
{
- if (!xdr_nfs_fh3 (xdrs, &objp->file))
- return FALSE;
- if (!xdr_diropargs3 (xdrs, &objp->link))
- return FALSE;
- return TRUE;
+ if (!xdr_nfs_fh3(xdrs, &objp->file))
+ return FALSE;
+ if (!xdr_diropargs3(xdrs, &objp->link))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_link3resok (XDR *xdrs, link3resok *objp)
+xdr_link3resok(XDR *xdrs, link3resok *objp)
{
- if (!xdr_post_op_attr (xdrs, &objp->file_attributes))
- return FALSE;
- if (!xdr_wcc_data (xdrs, &objp->linkdir_wcc))
- return FALSE;
- return TRUE;
+ if (!xdr_post_op_attr(xdrs, &objp->file_attributes))
+ return FALSE;
+ if (!xdr_wcc_data(xdrs, &objp->linkdir_wcc))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_link3resfail (XDR *xdrs, link3resfail *objp)
+xdr_link3resfail(XDR *xdrs, link3resfail *objp)
{
- if (!xdr_post_op_attr (xdrs, &objp->file_attributes))
- return FALSE;
- if (!xdr_wcc_data (xdrs, &objp->linkdir_wcc))
- return FALSE;
- return TRUE;
+ if (!xdr_post_op_attr(xdrs, &objp->file_attributes))
+ return FALSE;
+ if (!xdr_wcc_data(xdrs, &objp->linkdir_wcc))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_link3res (XDR *xdrs, link3res *objp)
+xdr_link3res(XDR *xdrs, link3res *objp)
{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_link3resok (xdrs, &objp->link3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_link3resfail (xdrs, &objp->link3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
+ if (!xdr_nfsstat3(xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_link3resok(xdrs, &objp->link3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_link3resfail(xdrs, &objp->link3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_readdir3args (XDR *xdrs, readdir3args *objp)
+xdr_readdir3args(XDR *xdrs, readdir3args *objp)
{
- if (!xdr_nfs_fh3 (xdrs, &objp->dir))
- return FALSE;
- if (!xdr_cookie3 (xdrs, &objp->cookie))
- return FALSE;
- if (!xdr_cookieverf3 (xdrs, objp->cookieverf))
- return FALSE;
- if (!xdr_count3 (xdrs, &objp->count))
- return FALSE;
- return TRUE;
+ if (!xdr_nfs_fh3(xdrs, &objp->dir))
+ return FALSE;
+ if (!xdr_cookie3(xdrs, &objp->cookie))
+ return FALSE;
+ if (!xdr_cookieverf3(xdrs, objp->cookieverf))
+ return FALSE;
+ if (!xdr_count3(xdrs, &objp->count))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_entry3 (XDR *xdrs, entry3 *objp)
+xdr_entry3(XDR *xdrs, entry3 *objp)
{
- if (!xdr_fileid3 (xdrs, &objp->fileid))
- return FALSE;
- if (!xdr_filename3 (xdrs, &objp->name))
- return FALSE;
- if (!xdr_cookie3 (xdrs, &objp->cookie))
- return FALSE;
- if (!xdr_pointer (xdrs, (char **)&objp->nextentry, sizeof (entry3), (xdrproc_t) xdr_entry3))
- return FALSE;
- return TRUE;
+ if (!xdr_fileid3(xdrs, &objp->fileid))
+ return FALSE;
+ if (!xdr_filename3(xdrs, &objp->name))
+ return FALSE;
+ if (!xdr_cookie3(xdrs, &objp->cookie))
+ return FALSE;
+ if (!xdr_pointer(xdrs, (char **)&objp->nextentry, sizeof(entry3),
+ (xdrproc_t)xdr_entry3))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_dirlist3 (XDR *xdrs, dirlist3 *objp)
+xdr_dirlist3(XDR *xdrs, dirlist3 *objp)
{
- if (!xdr_pointer (xdrs, (char **)&objp->entries, sizeof (entry3), (xdrproc_t) xdr_entry3))
- return FALSE;
- if (!xdr_bool (xdrs, &objp->eof))
- return FALSE;
- return TRUE;
+ if (!xdr_pointer(xdrs, (char **)&objp->entries, sizeof(entry3),
+ (xdrproc_t)xdr_entry3))
+ return FALSE;
+ if (!xdr_bool(xdrs, &objp->eof))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_readdir3resok (XDR *xdrs, readdir3resok *objp)
+xdr_readdir3resok(XDR *xdrs, readdir3resok *objp)
{
- if (!xdr_post_op_attr (xdrs, &objp->dir_attributes))
- return FALSE;
- if (!xdr_cookieverf3 (xdrs, objp->cookieverf))
- return FALSE;
- if (!xdr_dirlist3 (xdrs, &objp->reply))
- return FALSE;
- return TRUE;
+ if (!xdr_post_op_attr(xdrs, &objp->dir_attributes))
+ return FALSE;
+ if (!xdr_cookieverf3(xdrs, objp->cookieverf))
+ return FALSE;
+ if (!xdr_dirlist3(xdrs, &objp->reply))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_readdir3resfail (XDR *xdrs, readdir3resfail *objp)
+xdr_readdir3resfail(XDR *xdrs, readdir3resfail *objp)
{
- if (!xdr_post_op_attr (xdrs, &objp->dir_attributes))
- return FALSE;
- return TRUE;
+ if (!xdr_post_op_attr(xdrs, &objp->dir_attributes))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_readdir3res (XDR *xdrs, readdir3res *objp)
+xdr_readdir3res(XDR *xdrs, readdir3res *objp)
{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_readdir3resok (xdrs, &objp->readdir3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_readdir3resfail (xdrs, &objp->readdir3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
+ if (!xdr_nfsstat3(xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_readdir3resok(xdrs, &objp->readdir3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_readdir3resfail(xdrs, &objp->readdir3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_readdirp3args (XDR *xdrs, readdirp3args *objp)
+xdr_readdirp3args(XDR *xdrs, readdirp3args *objp)
{
- if (!xdr_nfs_fh3 (xdrs, &objp->dir))
- return FALSE;
- if (!xdr_cookie3 (xdrs, &objp->cookie))
- return FALSE;
- if (!xdr_cookieverf3 (xdrs, objp->cookieverf))
- return FALSE;
- if (!xdr_count3 (xdrs, &objp->dircount))
- return FALSE;
- if (!xdr_count3 (xdrs, &objp->maxcount))
- return FALSE;
- return TRUE;
+ if (!xdr_nfs_fh3(xdrs, &objp->dir))
+ return FALSE;
+ if (!xdr_cookie3(xdrs, &objp->cookie))
+ return FALSE;
+ if (!xdr_cookieverf3(xdrs, objp->cookieverf))
+ return FALSE;
+ if (!xdr_count3(xdrs, &objp->dircount))
+ return FALSE;
+ if (!xdr_count3(xdrs, &objp->maxcount))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_entryp3 (XDR *xdrs, entryp3 *objp)
+xdr_entryp3(XDR *xdrs, entryp3 *objp)
{
- if (!xdr_fileid3 (xdrs, &objp->fileid))
- return FALSE;
- if (!xdr_filename3 (xdrs, &objp->name))
- return FALSE;
- if (!xdr_cookie3 (xdrs, &objp->cookie))
- return FALSE;
- if (!xdr_post_op_attr (xdrs, &objp->name_attributes))
- return FALSE;
- if (!xdr_post_op_fh3 (xdrs, &objp->name_handle))
- return FALSE;
- if (!xdr_pointer (xdrs, (char **)&objp->nextentry, sizeof (entryp3), (xdrproc_t) xdr_entryp3))
- return FALSE;
- return TRUE;
+ if (!xdr_fileid3(xdrs, &objp->fileid))
+ return FALSE;
+ if (!xdr_filename3(xdrs, &objp->name))
+ return FALSE;
+ if (!xdr_cookie3(xdrs, &objp->cookie))
+ return FALSE;
+ if (!xdr_post_op_attr(xdrs, &objp->name_attributes))
+ return FALSE;
+ if (!xdr_post_op_fh3(xdrs, &objp->name_handle))
+ return FALSE;
+ if (!xdr_pointer(xdrs, (char **)&objp->nextentry, sizeof(entryp3),
+ (xdrproc_t)xdr_entryp3))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_dirlistp3 (XDR *xdrs, dirlistp3 *objp)
+xdr_dirlistp3(XDR *xdrs, dirlistp3 *objp)
{
- if (!xdr_pointer (xdrs, (char **)&objp->entries, sizeof (entryp3), (xdrproc_t) xdr_entryp3))
- return FALSE;
- if (!xdr_bool (xdrs, &objp->eof))
- return FALSE;
- return TRUE;
+ if (!xdr_pointer(xdrs, (char **)&objp->entries, sizeof(entryp3),
+ (xdrproc_t)xdr_entryp3))
+ return FALSE;
+ if (!xdr_bool(xdrs, &objp->eof))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_readdirp3resok (XDR *xdrs, readdirp3resok *objp)
+xdr_readdirp3resok(XDR *xdrs, readdirp3resok *objp)
{
- if (!xdr_post_op_attr (xdrs, &objp->dir_attributes))
- return FALSE;
- if (!xdr_cookieverf3 (xdrs, objp->cookieverf))
- return FALSE;
- if (!xdr_dirlistp3 (xdrs, &objp->reply))
- return FALSE;
- return TRUE;
+ if (!xdr_post_op_attr(xdrs, &objp->dir_attributes))
+ return FALSE;
+ if (!xdr_cookieverf3(xdrs, objp->cookieverf))
+ return FALSE;
+ if (!xdr_dirlistp3(xdrs, &objp->reply))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_readdirp3resfail (XDR *xdrs, readdirp3resfail *objp)
+xdr_readdirp3resfail(XDR *xdrs, readdirp3resfail *objp)
{
- if (!xdr_post_op_attr (xdrs, &objp->dir_attributes))
- return FALSE;
- return TRUE;
+ if (!xdr_post_op_attr(xdrs, &objp->dir_attributes))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_readdirp3res (XDR *xdrs, readdirp3res *objp)
+xdr_readdirp3res(XDR *xdrs, readdirp3res *objp)
{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_readdirp3resok (xdrs, &objp->readdirp3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_readdirp3resfail (xdrs, &objp->readdirp3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
+ if (!xdr_nfsstat3(xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_readdirp3resok(xdrs, &objp->readdirp3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_readdirp3resfail(xdrs, &objp->readdirp3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_fsstat3args (XDR *xdrs, fsstat3args *objp)
+xdr_fsstat3args(XDR *xdrs, fsstat3args *objp)
{
- if (!xdr_nfs_fh3 (xdrs, &objp->fsroot))
- return FALSE;
- return TRUE;
+ if (!xdr_nfs_fh3(xdrs, &objp->fsroot))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_fsstat3resok (XDR *xdrs, fsstat3resok *objp)
+xdr_fsstat3resok(XDR *xdrs, fsstat3resok *objp)
{
- if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
- return FALSE;
- if (!xdr_size3 (xdrs, &objp->tbytes))
- return FALSE;
- if (!xdr_size3 (xdrs, &objp->fbytes))
- return FALSE;
- if (!xdr_size3 (xdrs, &objp->abytes))
- return FALSE;
- if (!xdr_size3 (xdrs, &objp->tfiles))
- return FALSE;
- if (!xdr_size3 (xdrs, &objp->ffiles))
- return FALSE;
- if (!xdr_size3 (xdrs, &objp->afiles))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->invarsec))
- return FALSE;
- return TRUE;
+ if (!xdr_post_op_attr(xdrs, &objp->obj_attributes))
+ return FALSE;
+ if (!xdr_size3(xdrs, &objp->tbytes))
+ return FALSE;
+ if (!xdr_size3(xdrs, &objp->fbytes))
+ return FALSE;
+ if (!xdr_size3(xdrs, &objp->abytes))
+ return FALSE;
+ if (!xdr_size3(xdrs, &objp->tfiles))
+ return FALSE;
+ if (!xdr_size3(xdrs, &objp->ffiles))
+ return FALSE;
+ if (!xdr_size3(xdrs, &objp->afiles))
+ return FALSE;
+ if (!xdr_uint32(xdrs, &objp->invarsec))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_fsstat3resfail (XDR *xdrs, fsstat3resfail *objp)
+xdr_fsstat3resfail(XDR *xdrs, fsstat3resfail *objp)
{
- if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
- return FALSE;
- return TRUE;
+ if (!xdr_post_op_attr(xdrs, &objp->obj_attributes))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_fsstat3res (XDR *xdrs, fsstat3res *objp)
+xdr_fsstat3res(XDR *xdrs, fsstat3res *objp)
{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_fsstat3resok (xdrs, &objp->fsstat3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_fsstat3resfail (xdrs, &objp->fsstat3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
+ if (!xdr_nfsstat3(xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_fsstat3resok(xdrs, &objp->fsstat3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_fsstat3resfail(xdrs, &objp->fsstat3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_fsinfo3args (XDR *xdrs, fsinfo3args *objp)
+xdr_fsinfo3args(XDR *xdrs, fsinfo3args *objp)
{
- if (!xdr_nfs_fh3 (xdrs, &objp->fsroot))
- return FALSE;
- return TRUE;
+ if (!xdr_nfs_fh3(xdrs, &objp->fsroot))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_fsinfo3resok (XDR *xdrs, fsinfo3resok *objp)
+xdr_fsinfo3resok(XDR *xdrs, fsinfo3resok *objp)
{
- if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->rtmax))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->rtpref))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->rtmult))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->wtmax))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->wtpref))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->wtmult))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->dtpref))
- return FALSE;
- if (!xdr_size3 (xdrs, &objp->maxfilesize))
- return FALSE;
- if (!xdr_nfstime3 (xdrs, &objp->time_delta))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->properties))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_fsinfo3resfail (XDR *xdrs, fsinfo3resfail *objp)
-{
- if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_fsinfo3res (XDR *xdrs, fsinfo3res *objp)
-{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_fsinfo3resok (xdrs, &objp->fsinfo3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_fsinfo3resfail (xdrs, &objp->fsinfo3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
-}
-
-bool_t
-xdr_pathconf3args (XDR *xdrs, pathconf3args *objp)
-{
- if (!xdr_nfs_fh3 (xdrs, &objp->object))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_pathconf3resok (XDR *xdrs, pathconf3resok *objp)
-{
- register int32_t *buf;
-
-
- if (xdrs->x_op == XDR_ENCODE) {
- if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->linkmax))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->name_max))
- return FALSE;
- buf = XDR_INLINE (xdrs, 4 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_bool (xdrs, &objp->no_trunc))
- return FALSE;
- if (!xdr_bool (xdrs, &objp->chown_restricted))
- return FALSE;
- if (!xdr_bool (xdrs, &objp->case_insensitive))
- return FALSE;
- if (!xdr_bool (xdrs, &objp->case_preserving))
- return FALSE;
- } else {
- IXDR_PUT_BOOL(buf, objp->no_trunc);
- IXDR_PUT_BOOL(buf, objp->chown_restricted);
- IXDR_PUT_BOOL(buf, objp->case_insensitive);
- IXDR_PUT_BOOL(buf, objp->case_preserving);
- }
- return TRUE;
- } else if (xdrs->x_op == XDR_DECODE) {
- if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->linkmax))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->name_max))
- return FALSE;
- buf = XDR_INLINE (xdrs, 4 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_bool (xdrs, &objp->no_trunc))
- return FALSE;
- if (!xdr_bool (xdrs, &objp->chown_restricted))
- return FALSE;
- if (!xdr_bool (xdrs, &objp->case_insensitive))
- return FALSE;
- if (!xdr_bool (xdrs, &objp->case_preserving))
- return FALSE;
- } else {
- objp->no_trunc = IXDR_GET_BOOL(buf);
- objp->chown_restricted = IXDR_GET_BOOL(buf);
- objp->case_insensitive = IXDR_GET_BOOL(buf);
- objp->case_preserving = IXDR_GET_BOOL(buf);
- }
- return TRUE;
- }
-
- if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->linkmax))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->name_max))
- return FALSE;
- if (!xdr_bool (xdrs, &objp->no_trunc))
- return FALSE;
- if (!xdr_bool (xdrs, &objp->chown_restricted))
- return FALSE;
- if (!xdr_bool (xdrs, &objp->case_insensitive))
- return FALSE;
- if (!xdr_bool (xdrs, &objp->case_preserving))
- return FALSE;
- return TRUE;
+ if (!xdr_post_op_attr(xdrs, &objp->obj_attributes))
+ return FALSE;
+ if (!xdr_uint32(xdrs, &objp->rtmax))
+ return FALSE;
+ if (!xdr_uint32(xdrs, &objp->rtpref))
+ return FALSE;
+ if (!xdr_uint32(xdrs, &objp->rtmult))
+ return FALSE;
+ if (!xdr_uint32(xdrs, &objp->wtmax))
+ return FALSE;
+ if (!xdr_uint32(xdrs, &objp->wtpref))
+ return FALSE;
+ if (!xdr_uint32(xdrs, &objp->wtmult))
+ return FALSE;
+ if (!xdr_uint32(xdrs, &objp->dtpref))
+ return FALSE;
+ if (!xdr_size3(xdrs, &objp->maxfilesize))
+ return FALSE;
+ if (!xdr_nfstime3(xdrs, &objp->time_delta))
+ return FALSE;
+ if (!xdr_uint32(xdrs, &objp->properties))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_pathconf3resfail (XDR *xdrs, pathconf3resfail *objp)
+xdr_fsinfo3resfail(XDR *xdrs, fsinfo3resfail *objp)
{
- if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
- return FALSE;
- return TRUE;
+ if (!xdr_post_op_attr(xdrs, &objp->obj_attributes))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_fsinfo3res(XDR *xdrs, fsinfo3res *objp)
+{
+ if (!xdr_nfsstat3(xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_fsinfo3resok(xdrs, &objp->fsinfo3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_fsinfo3resfail(xdrs, &objp->fsinfo3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_pathconf3res (XDR *xdrs, pathconf3res *objp)
+xdr_pathconf3args(XDR *xdrs, pathconf3args *objp)
{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_pathconf3resok (xdrs, &objp->pathconf3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_pathconf3resfail (xdrs, &objp->pathconf3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
+ if (!xdr_nfs_fh3(xdrs, &objp->object))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_commit3args (XDR *xdrs, commit3args *objp)
+xdr_pathconf3resok(XDR *xdrs, pathconf3resok *objp)
{
- if (!xdr_nfs_fh3 (xdrs, &objp->file))
- return FALSE;
- if (!xdr_offset3 (xdrs, &objp->offset))
- return FALSE;
- if (!xdr_count3 (xdrs, &objp->count))
- return FALSE;
- return TRUE;
+ register int32_t *buf;
+
+ if (xdrs->x_op == XDR_ENCODE) {
+ if (!xdr_post_op_attr(xdrs, &objp->obj_attributes))
+ return FALSE;
+ if (!xdr_uint32(xdrs, &objp->linkmax))
+ return FALSE;
+ if (!xdr_uint32(xdrs, &objp->name_max))
+ return FALSE;
+ buf = XDR_INLINE(xdrs, 4 * BYTES_PER_XDR_UNIT);
+ if (buf == NULL) {
+ if (!xdr_bool(xdrs, &objp->no_trunc))
+ return FALSE;
+ if (!xdr_bool(xdrs, &objp->chown_restricted))
+ return FALSE;
+ if (!xdr_bool(xdrs, &objp->case_insensitive))
+ return FALSE;
+ if (!xdr_bool(xdrs, &objp->case_preserving))
+ return FALSE;
+ } else {
+ IXDR_PUT_BOOL(buf, objp->no_trunc);
+ IXDR_PUT_BOOL(buf, objp->chown_restricted);
+ IXDR_PUT_BOOL(buf, objp->case_insensitive);
+ IXDR_PUT_BOOL(buf, objp->case_preserving);
+ }
+ return TRUE;
+ } else if (xdrs->x_op == XDR_DECODE) {
+ if (!xdr_post_op_attr(xdrs, &objp->obj_attributes))
+ return FALSE;
+ if (!xdr_uint32(xdrs, &objp->linkmax))
+ return FALSE;
+ if (!xdr_uint32(xdrs, &objp->name_max))
+ return FALSE;
+ buf = XDR_INLINE(xdrs, 4 * BYTES_PER_XDR_UNIT);
+ if (buf == NULL) {
+ if (!xdr_bool(xdrs, &objp->no_trunc))
+ return FALSE;
+ if (!xdr_bool(xdrs, &objp->chown_restricted))
+ return FALSE;
+ if (!xdr_bool(xdrs, &objp->case_insensitive))
+ return FALSE;
+ if (!xdr_bool(xdrs, &objp->case_preserving))
+ return FALSE;
+ } else {
+ objp->no_trunc = IXDR_GET_BOOL(buf);
+ objp->chown_restricted = IXDR_GET_BOOL(buf);
+ objp->case_insensitive = IXDR_GET_BOOL(buf);
+ objp->case_preserving = IXDR_GET_BOOL(buf);
+ }
+ return TRUE;
+ }
+
+ if (!xdr_post_op_attr(xdrs, &objp->obj_attributes))
+ return FALSE;
+ if (!xdr_uint32(xdrs, &objp->linkmax))
+ return FALSE;
+ if (!xdr_uint32(xdrs, &objp->name_max))
+ return FALSE;
+ if (!xdr_bool(xdrs, &objp->no_trunc))
+ return FALSE;
+ if (!xdr_bool(xdrs, &objp->chown_restricted))
+ return FALSE;
+ if (!xdr_bool(xdrs, &objp->case_insensitive))
+ return FALSE;
+ if (!xdr_bool(xdrs, &objp->case_preserving))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_commit3resok (XDR *xdrs, commit3resok *objp)
+xdr_pathconf3resfail(XDR *xdrs, pathconf3resfail *objp)
{
- if (!xdr_wcc_data (xdrs, &objp->file_wcc))
- return FALSE;
- if (!xdr_writeverf3 (xdrs, objp->verf))
- return FALSE;
- return TRUE;
+ if (!xdr_post_op_attr(xdrs, &objp->obj_attributes))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_commit3resfail (XDR *xdrs, commit3resfail *objp)
+xdr_pathconf3res(XDR *xdrs, pathconf3res *objp)
{
- if (!xdr_wcc_data (xdrs, &objp->file_wcc))
- return FALSE;
- return TRUE;
+ if (!xdr_nfsstat3(xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_pathconf3resok(xdrs, &objp->pathconf3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_pathconf3resfail(xdrs, &objp->pathconf3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_commit3res (XDR *xdrs, commit3res *objp)
+xdr_commit3args(XDR *xdrs, commit3args *objp)
{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_commit3resok (xdrs, &objp->commit3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_commit3resfail (xdrs, &objp->commit3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
+ if (!xdr_nfs_fh3(xdrs, &objp->file))
+ return FALSE;
+ if (!xdr_offset3(xdrs, &objp->offset))
+ return FALSE;
+ if (!xdr_count3(xdrs, &objp->count))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_fhandle3 (XDR *xdrs, fhandle3 *objp)
+xdr_commit3resok(XDR *xdrs, commit3resok *objp)
{
- if (!xdr_bytes (xdrs, (char **)&objp->fhandle3_val, (u_int *) &objp->fhandle3_len, FHSIZE3))
- return FALSE;
- return TRUE;
+ if (!xdr_wcc_data(xdrs, &objp->file_wcc))
+ return FALSE;
+ if (!xdr_writeverf3(xdrs, objp->verf))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_dirpath (XDR *xdrs, dirpath *objp)
+xdr_commit3resfail(XDR *xdrs, commit3resfail *objp)
{
- if (!xdr_string (xdrs, objp, MNTPATHLEN))
- return FALSE;
- return TRUE;
+ if (!xdr_wcc_data(xdrs, &objp->file_wcc))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_name (XDR *xdrs, name *objp)
+xdr_commit3res(XDR *xdrs, commit3res *objp)
{
- if (!xdr_string (xdrs, objp, MNTNAMLEN))
- return FALSE;
- return TRUE;
+ if (!xdr_nfsstat3(xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_commit3resok(xdrs, &objp->commit3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_commit3resfail(xdrs, &objp->commit3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_mountstat3 (XDR *xdrs, mountstat3 *objp)
+xdr_fhandle3(XDR *xdrs, fhandle3 *objp)
{
- if (!xdr_enum (xdrs, (enum_t *) objp))
- return FALSE;
- return TRUE;
+ if (!xdr_bytes(xdrs, (char **)&objp->fhandle3_val,
+ (u_int *)&objp->fhandle3_len, FHSIZE3))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_mountres3_ok (XDR *xdrs, mountres3_ok *objp)
+xdr_dirpath(XDR *xdrs, dirpath *objp)
{
- if (!xdr_fhandle3 (xdrs, &objp->fhandle))
- return FALSE;
- if (!xdr_array (xdrs, (char **)&objp->auth_flavors.auth_flavors_val, (u_int *) &objp->auth_flavors.auth_flavors_len, ~0,
- sizeof (int), (xdrproc_t) xdr_int))
- return FALSE;
- return TRUE;
+ if (!xdr_string(xdrs, objp, MNTPATHLEN))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_mountres3 (XDR *xdrs, mountres3 *objp)
+xdr_name(XDR *xdrs, name *objp)
{
- if (!xdr_mountstat3 (xdrs, &objp->fhs_status))
- return FALSE;
- switch (objp->fhs_status) {
- case MNT3_OK:
- if (!xdr_mountres3_ok (xdrs, &objp->mountres3_u.mountinfo))
- return FALSE;
- break;
- default:
- break;
- }
- return TRUE;
+ if (!xdr_string(xdrs, objp, MNTNAMLEN))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_mountlist (XDR *xdrs, mountlist *objp)
+xdr_mountstat3(XDR *xdrs, mountstat3 *objp)
{
- if (!xdr_pointer (xdrs, (char **)objp, sizeof (struct mountbody), (xdrproc_t) xdr_mountbody))
- return FALSE;
- return TRUE;
+ if (!xdr_enum(xdrs, (enum_t *)objp))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_mountbody (XDR *xdrs, mountbody *objp)
+xdr_mountres3_ok(XDR *xdrs, mountres3_ok *objp)
{
- if (!xdr_name (xdrs, &objp->ml_hostname))
- return FALSE;
- if (!xdr_dirpath (xdrs, &objp->ml_directory))
- return FALSE;
- if (!xdr_mountlist (xdrs, &objp->ml_next))
- return FALSE;
- return TRUE;
+ if (!xdr_fhandle3(xdrs, &objp->fhandle))
+ return FALSE;
+ if (!xdr_array(xdrs, (char **)&objp->auth_flavors.auth_flavors_val,
+ (u_int *)&objp->auth_flavors.auth_flavors_len, ~0,
+ sizeof(int), (xdrproc_t)xdr_int))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_groups (XDR *xdrs, groups *objp)
+xdr_mountres3(XDR *xdrs, mountres3 *objp)
{
- if (!xdr_pointer (xdrs, (char **)objp, sizeof (struct groupnode), (xdrproc_t) xdr_groupnode))
- return FALSE;
- return TRUE;
+ if (!xdr_mountstat3(xdrs, &objp->fhs_status))
+ return FALSE;
+ switch (objp->fhs_status) {
+ case MNT3_OK:
+ if (!xdr_mountres3_ok(xdrs, &objp->mountres3_u.mountinfo))
+ return FALSE;
+ break;
+ default:
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_groupnode (XDR *xdrs, groupnode *objp)
+xdr_mountlist(XDR *xdrs, mountlist *objp)
{
- if (!xdr_name (xdrs, &objp->gr_name))
- return FALSE;
- if (!xdr_groups (xdrs, &objp->gr_next))
- return FALSE;
- return TRUE;
+ if (!xdr_pointer(xdrs, (char **)objp, sizeof(struct mountbody),
+ (xdrproc_t)xdr_mountbody))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_exports (XDR *xdrs, exports *objp)
+xdr_mountbody(XDR *xdrs, mountbody *objp)
{
- if (!xdr_pointer (xdrs, (char **)objp, sizeof (struct exportnode), (xdrproc_t) xdr_exportnode))
- return FALSE;
- return TRUE;
+ if (!xdr_name(xdrs, &objp->ml_hostname))
+ return FALSE;
+ if (!xdr_dirpath(xdrs, &objp->ml_directory))
+ return FALSE;
+ if (!xdr_mountlist(xdrs, &objp->ml_next))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_exportnode (XDR *xdrs, exportnode *objp)
+xdr_groups(XDR *xdrs, groups *objp)
{
- if (!xdr_dirpath (xdrs, &objp->ex_dir))
- return FALSE;
- if (!xdr_groups (xdrs, &objp->ex_groups))
- return FALSE;
- if (!xdr_exports (xdrs, &objp->ex_next))
- return FALSE;
- return TRUE;
+ if (!xdr_pointer(xdrs, (char **)objp, sizeof(struct groupnode),
+ (xdrproc_t)xdr_groupnode))
+ return FALSE;
+ return TRUE;
}
-void
-xdr_free_exports_list (struct exportnode *first)
+bool_t
+xdr_groupnode(XDR *xdrs, groupnode *objp)
{
- struct exportnode *elist = NULL;
+ if (!xdr_name(xdrs, &objp->gr_name))
+ return FALSE;
+ if (!xdr_groups(xdrs, &objp->gr_next))
+ return FALSE;
+ return TRUE;
+}
- if (!first)
- return;
+bool_t
+xdr_exports(XDR *xdrs, exports *objp)
+{
+ if (!xdr_pointer(xdrs, (char **)objp, sizeof(struct exportnode),
+ (xdrproc_t)xdr_exportnode))
+ return FALSE;
+ return TRUE;
+}
- while (first) {
- elist = first->ex_next;
- if (first->ex_dir)
- GF_FREE (first->ex_dir);
+bool_t
+xdr_exportnode(XDR *xdrs, exportnode *objp)
+{
+ if (!xdr_dirpath(xdrs, &objp->ex_dir))
+ return FALSE;
+ if (!xdr_groups(xdrs, &objp->ex_groups))
+ return FALSE;
+ if (!xdr_exports(xdrs, &objp->ex_next))
+ return FALSE;
+ return TRUE;
+}
- if (first->ex_groups) {
- if (first->ex_groups->gr_name)
- GF_FREE (first->ex_groups->gr_name);
- GF_FREE (first->ex_groups);
- }
+static void
+xdr_free_groupnode(struct groupnode *group)
+{
+ if (!group)
+ return;
- GF_FREE (first);
- first = elist;
- }
+ if (group->gr_next)
+ xdr_free_groupnode(group->gr_next);
+ GF_FREE(group->gr_name);
+ GF_FREE(group);
}
-
void
-xdr_free_mountlist (mountlist ml)
+xdr_free_exports_list(struct exportnode *first)
{
- struct mountbody *next = NULL;
+ struct exportnode *elist = NULL;
- if (!ml)
- return;
+ if (!first)
+ return;
- while (ml) {
- GF_FREE (ml->ml_hostname);
- GF_FREE (ml->ml_directory);
- next = ml->ml_next;
- GF_FREE (ml);
- ml = next;
- }
+ while (first) {
+ elist = first->ex_next;
+ GF_FREE(first->ex_dir);
- return;
+ xdr_free_groupnode(first->ex_groups);
+
+ GF_FREE(first);
+ first = elist;
+ }
}
+void
+xdr_free_mountlist(mountlist ml)
+{
+ struct mountbody *next = NULL;
+
+ if (!ml)
+ return;
+
+ while (ml) {
+ GF_FREE(ml->ml_hostname);
+ GF_FREE(ml->ml_directory);
+ next = ml->ml_next;
+ GF_FREE(ml);
+ ml = next;
+ }
+
+ return;
+}
/* Free statements are based on the way sunrpc xdr decoding
* code performs memory allocations.
*/
void
-xdr_free_write3args_nocopy (write3args *wa)
+xdr_free_write3args_nocopy(write3args *wa)
{
- if (!wa)
- return;
+ if (!wa)
+ return;
- FREE (wa->file.data.data_val);
+ FREE(wa->file.data.data_val);
}
-
-
diff --git a/rpc/xdr/src/xdr-nfs3.h b/rpc/xdr/src/xdr-nfs3.h
index 9e5ccd186ca..b7f5abefffd 100644
--- a/rpc/xdr/src/xdr-nfs3.h
+++ b/rpc/xdr/src/xdr-nfs3.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _XDR_NFS3_H
@@ -23,25 +14,27 @@
#include <rpc/rpc.h>
#include <sys/types.h>
-#define NFS3_FHSIZE 64
-#define NFS3_COOKIEVERFSIZE 8
-#define NFS3_CREATEVERFSIZE 8
-#define NFS3_WRITEVERFSIZE 8
+#define NFS3_FHSIZE 64
+#define NFS3_COOKIEVERFSIZE 8
+#define NFS3_CREATEVERFSIZE 8
+#define NFS3_WRITEVERFSIZE 8
-#define NFS3_ENTRY3_FIXED_SIZE 24
-#define NFS3_POSTOPATTR_SIZE 88
-#define NFS3_READDIR_RESOK_SIZE (NFS3_POSTOPATTR_SIZE + sizeof (bool_t) + NFS3_COOKIEVERFSIZE)
+#define NFS3_ENTRY3_FIXED_SIZE 24
+#define NFS3_POSTOPATTR_SIZE 88
+#define NFS3_READDIR_RESOK_SIZE \
+ (NFS3_POSTOPATTR_SIZE + sizeof(bool_t) + NFS3_COOKIEVERFSIZE)
/* In size of post_op_fh3, the length of the file handle will have to be
* included separately since we have variable length fh. Here we only account
* for the field for handle_follows and for the file handle length field.
*/
-#define NFS3_POSTOPFH3_FIXED_SIZE (sizeof (bool_t) + sizeof (uint32_t))
+#define NFS3_POSTOPFH3_FIXED_SIZE (sizeof(bool_t) + sizeof(uint32_t))
/* Similarly, the size of the entry will have to include the variable length
* file handle and the length of the entry name.
*/
-#define NFS3_ENTRYP3_FIXED_SIZE (NFS3_ENTRY3_FIXED_SIZE + NFS3_POSTOPATTR_SIZE + NFS3_POSTOPFH3_FIXED_SIZE)
+#define NFS3_ENTRYP3_FIXED_SIZE \
+ (NFS3_ENTRY3_FIXED_SIZE + NFS3_POSTOPATTR_SIZE + NFS3_POSTOPFH3_FIXED_SIZE)
typedef uint64_t uint64;
typedef int64_t int64;
@@ -61,283 +54,284 @@ typedef uint64 offset3;
typedef uint32 mode3;
typedef uint32 count3;
-#define NFS3MODE_SETXUID 0x00800
-#define NFS3MODE_SETXGID 0x00400
-#define NFS3MODE_SAVESWAPTXT 0x00200
-#define NFS3MODE_ROWNER 0x00100
-#define NFS3MODE_WOWNER 0x00080
-#define NFS3MODE_XOWNER 0x00040
-#define NFS3MODE_RGROUP 0x00020
-#define NFS3MODE_WGROUP 0x00010
-#define NFS3MODE_XGROUP 0x00008
-#define NFS3MODE_ROTHER 0x00004
-#define NFS3MODE_WOTHER 0x00002
-#define NFS3MODE_XOTHER 0x00001
+#define NFS3MODE_SETXUID 0x00800
+#define NFS3MODE_SETXGID 0x00400
+#define NFS3MODE_SAVESWAPTXT 0x00200
+#define NFS3MODE_ROWNER 0x00100
+#define NFS3MODE_WOWNER 0x00080
+#define NFS3MODE_XOWNER 0x00040
+#define NFS3MODE_RGROUP 0x00020
+#define NFS3MODE_WGROUP 0x00010
+#define NFS3MODE_XGROUP 0x00008
+#define NFS3MODE_ROTHER 0x00004
+#define NFS3MODE_WOTHER 0x00002
+#define NFS3MODE_XOTHER 0x00001
enum nfsstat3 {
- NFS3_OK = 0,
- NFS3ERR_PERM = 1,
- NFS3ERR_NOENT = 2,
- NFS3ERR_IO = 5,
- NFS3ERR_NXIO = 6,
- NFS3ERR_ACCES = 13,
- NFS3ERR_EXIST = 17,
- NFS3ERR_XDEV = 18,
- NFS3ERR_NODEV = 19,
- NFS3ERR_NOTDIR = 20,
- NFS3ERR_ISDIR = 21,
- NFS3ERR_INVAL = 22,
- NFS3ERR_FBIG = 27,
- NFS3ERR_NOSPC = 28,
- NFS3ERR_ROFS = 30,
- NFS3ERR_MLINK = 31,
- NFS3ERR_NAMETOOLONG = 63,
- NFS3ERR_NOTEMPTY = 66,
- NFS3ERR_DQUOT = 69,
- NFS3ERR_STALE = 70,
- NFS3ERR_REMOTE = 71,
- NFS3ERR_BADHANDLE = 10001,
- NFS3ERR_NOT_SYNC = 10002,
- NFS3ERR_BAD_COOKIE = 10003,
- NFS3ERR_NOTSUPP = 10004,
- NFS3ERR_TOOSMALL = 10005,
- NFS3ERR_SERVERFAULT = 10006,
- NFS3ERR_BADTYPE = 10007,
- NFS3ERR_JUKEBOX = 10008,
+ NFS3_OK = 0,
+ NFS3ERR_PERM = 1,
+ NFS3ERR_NOENT = 2,
+ NFS3ERR_IO = 5,
+ NFS3ERR_NXIO = 6,
+ NFS3ERR_ACCES = 13,
+ NFS3ERR_EXIST = 17,
+ NFS3ERR_XDEV = 18,
+ NFS3ERR_NODEV = 19,
+ NFS3ERR_NOTDIR = 20,
+ NFS3ERR_ISDIR = 21,
+ NFS3ERR_INVAL = 22,
+ NFS3ERR_FBIG = 27,
+ NFS3ERR_NOSPC = 28,
+ NFS3ERR_ROFS = 30,
+ NFS3ERR_MLINK = 31,
+ NFS3ERR_NAMETOOLONG = 63,
+ NFS3ERR_NOTEMPTY = 66,
+ NFS3ERR_DQUOT = 69,
+ NFS3ERR_STALE = 70,
+ NFS3ERR_REMOTE = 71,
+ NFS3ERR_BADHANDLE = 10001,
+ NFS3ERR_NOT_SYNC = 10002,
+ NFS3ERR_BAD_COOKIE = 10003,
+ NFS3ERR_NOTSUPP = 10004,
+ NFS3ERR_TOOSMALL = 10005,
+ NFS3ERR_SERVERFAULT = 10006,
+ NFS3ERR_BADTYPE = 10007,
+ NFS3ERR_JUKEBOX = 10008,
+ NFS3ERR_END_OF_LIST = -1,
};
typedef enum nfsstat3 nfsstat3;
enum ftype3 {
- NF3REG = 1,
- NF3DIR = 2,
- NF3BLK = 3,
- NF3CHR = 4,
- NF3LNK = 5,
- NF3SOCK = 6,
- NF3FIFO = 7,
+ NF3REG = 1,
+ NF3DIR = 2,
+ NF3BLK = 3,
+ NF3CHR = 4,
+ NF3LNK = 5,
+ NF3SOCK = 6,
+ NF3FIFO = 7,
};
typedef enum ftype3 ftype3;
struct specdata3 {
- uint32 specdata1;
- uint32 specdata2;
+ uint32 specdata1;
+ uint32 specdata2;
};
typedef struct specdata3 specdata3;
struct nfs_fh3 {
- struct {
- u_int data_len;
- char *data_val;
- } data;
+ struct {
+ u_int data_len;
+ char *data_val;
+ } data;
};
typedef struct nfs_fh3 nfs_fh3;
struct nfstime3 {
- uint32 seconds;
- uint32 nseconds;
+ uint32 seconds;
+ uint32 nseconds;
};
typedef struct nfstime3 nfstime3;
struct fattr3 {
- ftype3 type;
- mode3 mode;
- uint32 nlink;
- uid3 uid;
- gid3 gid;
- size3 size;
- size3 used;
- specdata3 rdev;
- uint64 fsid;
- fileid3 fileid;
- nfstime3 atime;
- nfstime3 mtime;
- nfstime3 ctime;
+ ftype3 type;
+ mode3 mode;
+ uint32 nlink;
+ uid3 uid;
+ gid3 gid;
+ size3 size;
+ size3 used;
+ specdata3 rdev;
+ uint64 fsid;
+ fileid3 fileid;
+ nfstime3 atime;
+ nfstime3 mtime;
+ nfstime3 ctime;
};
typedef struct fattr3 fattr3;
struct post_op_attr {
- bool_t attributes_follow;
- union {
- fattr3 attributes;
- } post_op_attr_u;
+ bool_t attributes_follow;
+ union {
+ fattr3 attributes;
+ } post_op_attr_u;
};
typedef struct post_op_attr post_op_attr;
struct wcc_attr {
- size3 size;
- nfstime3 mtime;
- nfstime3 ctime;
+ size3 size;
+ nfstime3 mtime;
+ nfstime3 ctime;
};
typedef struct wcc_attr wcc_attr;
struct pre_op_attr {
- bool_t attributes_follow;
- union {
- wcc_attr attributes;
- } pre_op_attr_u;
+ bool_t attributes_follow;
+ union {
+ wcc_attr attributes;
+ } pre_op_attr_u;
};
typedef struct pre_op_attr pre_op_attr;
struct wcc_data {
- pre_op_attr before;
- post_op_attr after;
+ pre_op_attr before;
+ post_op_attr after;
};
typedef struct wcc_data wcc_data;
struct post_op_fh3 {
- bool_t handle_follows;
- union {
- nfs_fh3 handle;
- } post_op_fh3_u;
+ bool_t handle_follows;
+ union {
+ nfs_fh3 handle;
+ } post_op_fh3_u;
};
typedef struct post_op_fh3 post_op_fh3;
enum time_how {
- DONT_CHANGE = 0,
- SET_TO_SERVER_TIME = 1,
- SET_TO_CLIENT_TIME = 2,
+ DONT_CHANGE = 0,
+ SET_TO_SERVER_TIME = 1,
+ SET_TO_CLIENT_TIME = 2,
};
typedef enum time_how time_how;
struct set_mode3 {
- bool_t set_it;
- union {
- mode3 mode;
- } set_mode3_u;
+ bool_t set_it;
+ union {
+ mode3 mode;
+ } set_mode3_u;
};
typedef struct set_mode3 set_mode3;
struct set_uid3 {
- bool_t set_it;
- union {
- uid3 uid;
- } set_uid3_u;
+ bool_t set_it;
+ union {
+ uid3 uid;
+ } set_uid3_u;
};
typedef struct set_uid3 set_uid3;
struct set_gid3 {
- bool_t set_it;
- union {
- gid3 gid;
- } set_gid3_u;
+ bool_t set_it;
+ union {
+ gid3 gid;
+ } set_gid3_u;
};
typedef struct set_gid3 set_gid3;
struct set_size3 {
- bool_t set_it;
- union {
- size3 size;
- } set_size3_u;
+ bool_t set_it;
+ union {
+ size3 size;
+ } set_size3_u;
};
typedef struct set_size3 set_size3;
struct set_atime {
- time_how set_it;
- union {
- nfstime3 atime;
- } set_atime_u;
+ time_how set_it;
+ union {
+ nfstime3 atime;
+ } set_atime_u;
};
typedef struct set_atime set_atime;
struct set_mtime {
- time_how set_it;
- union {
- nfstime3 mtime;
- } set_mtime_u;
+ time_how set_it;
+ union {
+ nfstime3 mtime;
+ } set_mtime_u;
};
typedef struct set_mtime set_mtime;
struct sattr3 {
- set_mode3 mode;
- set_uid3 uid;
- set_gid3 gid;
- set_size3 size;
- set_atime atime;
- set_mtime mtime;
+ set_mode3 mode;
+ set_uid3 uid;
+ set_gid3 gid;
+ set_size3 size;
+ set_atime atime;
+ set_mtime mtime;
};
typedef struct sattr3 sattr3;
struct diropargs3 {
- nfs_fh3 dir;
- filename3 name;
+ nfs_fh3 dir;
+ filename3 name;
};
typedef struct diropargs3 diropargs3;
struct getattr3args {
- nfs_fh3 object;
+ nfs_fh3 object;
};
typedef struct getattr3args getattr3args;
struct getattr3resok {
- fattr3 obj_attributes;
+ fattr3 obj_attributes;
};
typedef struct getattr3resok getattr3resok;
struct getattr3res {
- nfsstat3 status;
- union {
- getattr3resok resok;
- } getattr3res_u;
+ nfsstat3 status;
+ union {
+ getattr3resok resok;
+ } getattr3res_u;
};
typedef struct getattr3res getattr3res;
struct sattrguard3 {
- bool_t check;
- union {
- nfstime3 obj_ctime;
- } sattrguard3_u;
+ bool_t check;
+ union {
+ nfstime3 obj_ctime;
+ } sattrguard3_u;
};
typedef struct sattrguard3 sattrguard3;
struct setattr3args {
- nfs_fh3 object;
- sattr3 new_attributes;
- sattrguard3 guard;
+ nfs_fh3 object;
+ sattr3 new_attributes;
+ sattrguard3 guard;
};
typedef struct setattr3args setattr3args;
struct setattr3resok {
- wcc_data obj_wcc;
+ wcc_data obj_wcc;
};
typedef struct setattr3resok setattr3resok;
struct setattr3resfail {
- wcc_data obj_wcc;
+ wcc_data obj_wcc;
};
typedef struct setattr3resfail setattr3resfail;
struct setattr3res {
- nfsstat3 status;
- union {
- setattr3resok resok;
- setattr3resfail resfail;
- } setattr3res_u;
+ nfsstat3 status;
+ union {
+ setattr3resok resok;
+ setattr3resfail resfail;
+ } setattr3res_u;
};
typedef struct setattr3res setattr3res;
struct lookup3args {
- diropargs3 what;
+ diropargs3 what;
};
typedef struct lookup3args lookup3args;
struct lookup3resok {
- nfs_fh3 object;
- post_op_attr obj_attributes;
- post_op_attr dir_attributes;
+ nfs_fh3 object;
+ post_op_attr obj_attributes;
+ post_op_attr dir_attributes;
};
typedef struct lookup3resok lookup3resok;
struct lookup3resfail {
- post_op_attr dir_attributes;
+ post_op_attr dir_attributes;
};
typedef struct lookup3resfail lookup3resfail;
struct lookup3res {
- nfsstat3 status;
- union {
- lookup3resok resok;
- lookup3resfail resfail;
- } lookup3res_u;
+ nfsstat3 status;
+ union {
+ lookup3resok resok;
+ lookup3resfail resfail;
+ } lookup3res_u;
};
typedef struct lookup3res lookup3res;
#define ACCESS3_READ 0x0001
@@ -348,104 +342,104 @@ typedef struct lookup3res lookup3res;
#define ACCESS3_EXECUTE 0x0020
struct access3args {
- nfs_fh3 object;
- uint32 access;
+ nfs_fh3 object;
+ uint32 access;
};
typedef struct access3args access3args;
struct access3resok {
- post_op_attr obj_attributes;
- uint32 access;
+ post_op_attr obj_attributes;
+ uint32 access;
};
typedef struct access3resok access3resok;
struct access3resfail {
- post_op_attr obj_attributes;
+ post_op_attr obj_attributes;
};
typedef struct access3resfail access3resfail;
struct access3res {
- nfsstat3 status;
- union {
- access3resok resok;
- access3resfail resfail;
- } access3res_u;
+ nfsstat3 status;
+ union {
+ access3resok resok;
+ access3resfail resfail;
+ } access3res_u;
};
typedef struct access3res access3res;
struct readlink3args {
- nfs_fh3 symlink;
+ nfs_fh3 symlink;
};
typedef struct readlink3args readlink3args;
struct readlink3resok {
- post_op_attr symlink_attributes;
- nfspath3 data;
+ post_op_attr symlink_attributes;
+ nfspath3 data;
};
typedef struct readlink3resok readlink3resok;
struct readlink3resfail {
- post_op_attr symlink_attributes;
+ post_op_attr symlink_attributes;
};
typedef struct readlink3resfail readlink3resfail;
struct readlink3res {
- nfsstat3 status;
- union {
- readlink3resok resok;
- readlink3resfail resfail;
- } readlink3res_u;
+ nfsstat3 status;
+ union {
+ readlink3resok resok;
+ readlink3resfail resfail;
+ } readlink3res_u;
};
typedef struct readlink3res readlink3res;
struct read3args {
- nfs_fh3 file;
- offset3 offset;
- count3 count;
+ nfs_fh3 file;
+ offset3 offset;
+ count3 count;
};
typedef struct read3args read3args;
struct read3resok {
- post_op_attr file_attributes;
- count3 count;
- bool_t eof;
- struct {
- u_int data_len;
- char *data_val;
- } data;
+ post_op_attr file_attributes;
+ count3 count;
+ bool_t eof;
+ struct {
+ u_int data_len;
+ char *data_val;
+ } data;
};
typedef struct read3resok read3resok;
struct read3resfail {
- post_op_attr file_attributes;
+ post_op_attr file_attributes;
};
typedef struct read3resfail read3resfail;
struct read3res {
- nfsstat3 status;
- union {
- read3resok resok;
- read3resfail resfail;
- } read3res_u;
+ nfsstat3 status;
+ union {
+ read3resok resok;
+ read3resfail resfail;
+ } read3res_u;
};
typedef struct read3res read3res;
enum stable_how {
- UNSTABLE = 0,
- DATA_SYNC = 1,
- FILE_SYNC = 2,
+ UNSTABLE = 0,
+ DATA_SYNC = 1,
+ FILE_SYNC = 2,
};
typedef enum stable_how stable_how;
struct write3args {
- nfs_fh3 file;
- offset3 offset;
- count3 count;
- stable_how stable;
- struct {
- u_int data_len;
- char *data_val;
- } data;
+ nfs_fh3 file;
+ offset3 offset;
+ count3 count;
+ stable_how stable;
+ struct {
+ u_int data_len;
+ char *data_val;
+ } data;
};
typedef struct write3args write3args;
@@ -454,393 +448,395 @@ typedef struct write3args write3args;
* sizeof (nfs_fh3) rather than first trying to extract the fh size of the
* network followed by a sized-read of the file handle.
*/
-#define NFS3_WRITE3ARGS_SIZE (sizeof (uint32_t) + NFS3_FHSIZE + sizeof (offset3) + sizeof (count3) + sizeof (uint32_t))
+#define NFS3_WRITE3ARGS_SIZE \
+ (sizeof(uint32_t) + NFS3_FHSIZE + sizeof(offset3) + sizeof(count3) + \
+ sizeof(uint32_t))
struct write3resok {
- wcc_data file_wcc;
- count3 count;
- stable_how committed;
- writeverf3 verf;
+ wcc_data file_wcc;
+ count3 count;
+ stable_how committed;
+ writeverf3 verf;
};
typedef struct write3resok write3resok;
struct write3resfail {
- wcc_data file_wcc;
+ wcc_data file_wcc;
};
typedef struct write3resfail write3resfail;
struct write3res {
- nfsstat3 status;
- union {
- write3resok resok;
- write3resfail resfail;
- } write3res_u;
+ nfsstat3 status;
+ union {
+ write3resok resok;
+ write3resfail resfail;
+ } write3res_u;
};
typedef struct write3res write3res;
enum createmode3 {
- UNCHECKED = 0,
- GUARDED = 1,
- EXCLUSIVE = 2,
+ UNCHECKED = 0,
+ GUARDED = 1,
+ EXCLUSIVE = 2,
};
typedef enum createmode3 createmode3;
struct createhow3 {
- createmode3 mode;
- union {
- sattr3 obj_attributes;
- createverf3 verf;
- } createhow3_u;
+ createmode3 mode;
+ union {
+ sattr3 obj_attributes;
+ createverf3 verf;
+ } createhow3_u;
};
typedef struct createhow3 createhow3;
struct create3args {
- diropargs3 where;
- createhow3 how;
+ diropargs3 where;
+ createhow3 how;
};
typedef struct create3args create3args;
struct create3resok {
- post_op_fh3 obj;
- post_op_attr obj_attributes;
- wcc_data dir_wcc;
+ post_op_fh3 obj;
+ post_op_attr obj_attributes;
+ wcc_data dir_wcc;
};
typedef struct create3resok create3resok;
struct create3resfail {
- wcc_data dir_wcc;
+ wcc_data dir_wcc;
};
typedef struct create3resfail create3resfail;
struct create3res {
- nfsstat3 status;
- union {
- create3resok resok;
- create3resfail resfail;
- } create3res_u;
+ nfsstat3 status;
+ union {
+ create3resok resok;
+ create3resfail resfail;
+ } create3res_u;
};
typedef struct create3res create3res;
struct mkdir3args {
- diropargs3 where;
- sattr3 attributes;
+ diropargs3 where;
+ sattr3 attributes;
};
typedef struct mkdir3args mkdir3args;
struct mkdir3resok {
- post_op_fh3 obj;
- post_op_attr obj_attributes;
- wcc_data dir_wcc;
+ post_op_fh3 obj;
+ post_op_attr obj_attributes;
+ wcc_data dir_wcc;
};
typedef struct mkdir3resok mkdir3resok;
struct mkdir3resfail {
- wcc_data dir_wcc;
+ wcc_data dir_wcc;
};
typedef struct mkdir3resfail mkdir3resfail;
struct mkdir3res {
- nfsstat3 status;
- union {
- mkdir3resok resok;
- mkdir3resfail resfail;
- } mkdir3res_u;
+ nfsstat3 status;
+ union {
+ mkdir3resok resok;
+ mkdir3resfail resfail;
+ } mkdir3res_u;
};
typedef struct mkdir3res mkdir3res;
struct symlinkdata3 {
- sattr3 symlink_attributes;
- nfspath3 symlink_data;
+ sattr3 symlink_attributes;
+ nfspath3 symlink_data;
};
typedef struct symlinkdata3 symlinkdata3;
struct symlink3args {
- diropargs3 where;
- symlinkdata3 symlink;
+ diropargs3 where;
+ symlinkdata3 symlink;
};
typedef struct symlink3args symlink3args;
struct symlink3resok {
- post_op_fh3 obj;
- post_op_attr obj_attributes;
- wcc_data dir_wcc;
+ post_op_fh3 obj;
+ post_op_attr obj_attributes;
+ wcc_data dir_wcc;
};
typedef struct symlink3resok symlink3resok;
struct symlink3resfail {
- wcc_data dir_wcc;
+ wcc_data dir_wcc;
};
typedef struct symlink3resfail symlink3resfail;
struct symlink3res {
- nfsstat3 status;
- union {
- symlink3resok resok;
- symlink3resfail resfail;
- } symlink3res_u;
+ nfsstat3 status;
+ union {
+ symlink3resok resok;
+ symlink3resfail resfail;
+ } symlink3res_u;
};
typedef struct symlink3res symlink3res;
struct devicedata3 {
- sattr3 dev_attributes;
- specdata3 spec;
+ sattr3 dev_attributes;
+ specdata3 spec;
};
typedef struct devicedata3 devicedata3;
struct mknoddata3 {
- ftype3 type;
- union {
- devicedata3 device;
- sattr3 pipe_attributes;
- } mknoddata3_u;
+ ftype3 type;
+ union {
+ devicedata3 device;
+ sattr3 pipe_attributes;
+ } mknoddata3_u;
};
typedef struct mknoddata3 mknoddata3;
struct mknod3args {
- diropargs3 where;
- mknoddata3 what;
+ diropargs3 where;
+ mknoddata3 what;
};
typedef struct mknod3args mknod3args;
struct mknod3resok {
- post_op_fh3 obj;
- post_op_attr obj_attributes;
- wcc_data dir_wcc;
+ post_op_fh3 obj;
+ post_op_attr obj_attributes;
+ wcc_data dir_wcc;
};
typedef struct mknod3resok mknod3resok;
struct mknod3resfail {
- wcc_data dir_wcc;
+ wcc_data dir_wcc;
};
typedef struct mknod3resfail mknod3resfail;
struct mknod3res {
- nfsstat3 status;
- union {
- mknod3resok resok;
- mknod3resfail resfail;
- } mknod3res_u;
+ nfsstat3 status;
+ union {
+ mknod3resok resok;
+ mknod3resfail resfail;
+ } mknod3res_u;
};
typedef struct mknod3res mknod3res;
struct remove3args {
- diropargs3 object;
+ diropargs3 object;
};
typedef struct remove3args remove3args;
struct remove3resok {
- wcc_data dir_wcc;
+ wcc_data dir_wcc;
};
typedef struct remove3resok remove3resok;
struct remove3resfail {
- wcc_data dir_wcc;
+ wcc_data dir_wcc;
};
typedef struct remove3resfail remove3resfail;
struct remove3res {
- nfsstat3 status;
- union {
- remove3resok resok;
- remove3resfail resfail;
- } remove3res_u;
+ nfsstat3 status;
+ union {
+ remove3resok resok;
+ remove3resfail resfail;
+ } remove3res_u;
};
typedef struct remove3res remove3res;
struct rmdir3args {
- diropargs3 object;
+ diropargs3 object;
};
typedef struct rmdir3args rmdir3args;
struct rmdir3resok {
- wcc_data dir_wcc;
+ wcc_data dir_wcc;
};
typedef struct rmdir3resok rmdir3resok;
struct rmdir3resfail {
- wcc_data dir_wcc;
+ wcc_data dir_wcc;
};
typedef struct rmdir3resfail rmdir3resfail;
struct rmdir3res {
- nfsstat3 status;
- union {
- rmdir3resok resok;
- rmdir3resfail resfail;
- } rmdir3res_u;
+ nfsstat3 status;
+ union {
+ rmdir3resok resok;
+ rmdir3resfail resfail;
+ } rmdir3res_u;
};
typedef struct rmdir3res rmdir3res;
struct rename3args {
- diropargs3 from;
- diropargs3 to;
+ diropargs3 from;
+ diropargs3 to;
};
typedef struct rename3args rename3args;
struct rename3resok {
- wcc_data fromdir_wcc;
- wcc_data todir_wcc;
+ wcc_data fromdir_wcc;
+ wcc_data todir_wcc;
};
typedef struct rename3resok rename3resok;
struct rename3resfail {
- wcc_data fromdir_wcc;
- wcc_data todir_wcc;
+ wcc_data fromdir_wcc;
+ wcc_data todir_wcc;
};
typedef struct rename3resfail rename3resfail;
struct rename3res {
- nfsstat3 status;
- union {
- rename3resok resok;
- rename3resfail resfail;
- } rename3res_u;
+ nfsstat3 status;
+ union {
+ rename3resok resok;
+ rename3resfail resfail;
+ } rename3res_u;
};
typedef struct rename3res rename3res;
struct link3args {
- nfs_fh3 file;
- diropargs3 link;
+ nfs_fh3 file;
+ diropargs3 link;
};
typedef struct link3args link3args;
struct link3resok {
- post_op_attr file_attributes;
- wcc_data linkdir_wcc;
+ post_op_attr file_attributes;
+ wcc_data linkdir_wcc;
};
typedef struct link3resok link3resok;
struct link3resfail {
- post_op_attr file_attributes;
- wcc_data linkdir_wcc;
+ post_op_attr file_attributes;
+ wcc_data linkdir_wcc;
};
typedef struct link3resfail link3resfail;
struct link3res {
- nfsstat3 status;
- union {
- link3resok resok;
- link3resfail resfail;
- } link3res_u;
+ nfsstat3 status;
+ union {
+ link3resok resok;
+ link3resfail resfail;
+ } link3res_u;
};
typedef struct link3res link3res;
struct readdir3args {
- nfs_fh3 dir;
- cookie3 cookie;
- cookieverf3 cookieverf;
- count3 count;
+ nfs_fh3 dir;
+ cookie3 cookie;
+ cookieverf3 cookieverf;
+ count3 count;
};
typedef struct readdir3args readdir3args;
struct entry3 {
- fileid3 fileid;
- filename3 name;
- cookie3 cookie;
- struct entry3 *nextentry;
+ fileid3 fileid;
+ filename3 name;
+ cookie3 cookie;
+ struct entry3 *nextentry;
};
typedef struct entry3 entry3;
struct dirlist3 {
- entry3 *entries;
- bool_t eof;
+ entry3 *entries;
+ bool_t eof;
};
typedef struct dirlist3 dirlist3;
struct readdir3resok {
- post_op_attr dir_attributes;
- cookieverf3 cookieverf;
- dirlist3 reply;
+ post_op_attr dir_attributes;
+ cookieverf3 cookieverf;
+ dirlist3 reply;
};
typedef struct readdir3resok readdir3resok;
struct readdir3resfail {
- post_op_attr dir_attributes;
+ post_op_attr dir_attributes;
};
typedef struct readdir3resfail readdir3resfail;
struct readdir3res {
- nfsstat3 status;
- union {
- readdir3resok resok;
- readdir3resfail resfail;
- } readdir3res_u;
+ nfsstat3 status;
+ union {
+ readdir3resok resok;
+ readdir3resfail resfail;
+ } readdir3res_u;
};
typedef struct readdir3res readdir3res;
struct readdirp3args {
- nfs_fh3 dir;
- cookie3 cookie;
- cookieverf3 cookieverf;
- count3 dircount;
- count3 maxcount;
+ nfs_fh3 dir;
+ cookie3 cookie;
+ cookieverf3 cookieverf;
+ count3 dircount;
+ count3 maxcount;
};
typedef struct readdirp3args readdirp3args;
struct entryp3 {
- fileid3 fileid;
- filename3 name;
- cookie3 cookie;
- post_op_attr name_attributes;
- post_op_fh3 name_handle;
- struct entryp3 *nextentry;
+ fileid3 fileid;
+ filename3 name;
+ cookie3 cookie;
+ post_op_attr name_attributes;
+ post_op_fh3 name_handle;
+ struct entryp3 *nextentry;
};
typedef struct entryp3 entryp3;
struct dirlistp3 {
- entryp3 *entries;
- bool_t eof;
+ entryp3 *entries;
+ bool_t eof;
};
typedef struct dirlistp3 dirlistp3;
struct readdirp3resok {
- post_op_attr dir_attributes;
- cookieverf3 cookieverf;
- dirlistp3 reply;
+ post_op_attr dir_attributes;
+ cookieverf3 cookieverf;
+ dirlistp3 reply;
};
typedef struct readdirp3resok readdirp3resok;
struct readdirp3resfail {
- post_op_attr dir_attributes;
+ post_op_attr dir_attributes;
};
typedef struct readdirp3resfail readdirp3resfail;
struct readdirp3res {
- nfsstat3 status;
- union {
- readdirp3resok resok;
- readdirp3resfail resfail;
- } readdirp3res_u;
+ nfsstat3 status;
+ union {
+ readdirp3resok resok;
+ readdirp3resfail resfail;
+ } readdirp3res_u;
};
typedef struct readdirp3res readdirp3res;
struct fsstat3args {
- nfs_fh3 fsroot;
+ nfs_fh3 fsroot;
};
typedef struct fsstat3args fsstat3args;
struct fsstat3resok {
- post_op_attr obj_attributes;
- size3 tbytes;
- size3 fbytes;
- size3 abytes;
- size3 tfiles;
- size3 ffiles;
- size3 afiles;
- uint32 invarsec;
+ post_op_attr obj_attributes;
+ size3 tbytes;
+ size3 fbytes;
+ size3 abytes;
+ size3 tfiles;
+ size3 ffiles;
+ size3 afiles;
+ uint32 invarsec;
};
typedef struct fsstat3resok fsstat3resok;
struct fsstat3resfail {
- post_op_attr obj_attributes;
+ post_op_attr obj_attributes;
};
typedef struct fsstat3resfail fsstat3resfail;
struct fsstat3res {
- nfsstat3 status;
- union {
- fsstat3resok resok;
- fsstat3resfail resfail;
- } fsstat3res_u;
+ nfsstat3 status;
+ union {
+ fsstat3resok resok;
+ fsstat3resfail resfail;
+ } fsstat3res_u;
};
typedef struct fsstat3res fsstat3res;
#define FSF3_LINK 0x0001
@@ -849,93 +845,93 @@ typedef struct fsstat3res fsstat3res;
#define FSF3_CANSETTIME 0x0010
struct fsinfo3args {
- nfs_fh3 fsroot;
+ nfs_fh3 fsroot;
};
typedef struct fsinfo3args fsinfo3args;
struct fsinfo3resok {
- post_op_attr obj_attributes;
- uint32 rtmax;
- uint32 rtpref;
- uint32 rtmult;
- uint32 wtmax;
- uint32 wtpref;
- uint32 wtmult;
- uint32 dtpref;
- size3 maxfilesize;
- nfstime3 time_delta;
- uint32 properties;
+ post_op_attr obj_attributes;
+ uint32 rtmax;
+ uint32 rtpref;
+ uint32 rtmult;
+ uint32 wtmax;
+ uint32 wtpref;
+ uint32 wtmult;
+ uint32 dtpref;
+ size3 maxfilesize;
+ nfstime3 time_delta;
+ uint32 properties;
};
typedef struct fsinfo3resok fsinfo3resok;
struct fsinfo3resfail {
- post_op_attr obj_attributes;
+ post_op_attr obj_attributes;
};
typedef struct fsinfo3resfail fsinfo3resfail;
struct fsinfo3res {
- nfsstat3 status;
- union {
- fsinfo3resok resok;
- fsinfo3resfail resfail;
- } fsinfo3res_u;
+ nfsstat3 status;
+ union {
+ fsinfo3resok resok;
+ fsinfo3resfail resfail;
+ } fsinfo3res_u;
};
typedef struct fsinfo3res fsinfo3res;
struct pathconf3args {
- nfs_fh3 object;
+ nfs_fh3 object;
};
typedef struct pathconf3args pathconf3args;
struct pathconf3resok {
- post_op_attr obj_attributes;
- uint32 linkmax;
- uint32 name_max;
- bool_t no_trunc;
- bool_t chown_restricted;
- bool_t case_insensitive;
- bool_t case_preserving;
+ post_op_attr obj_attributes;
+ uint32 linkmax;
+ uint32 name_max;
+ bool_t no_trunc;
+ bool_t chown_restricted;
+ bool_t case_insensitive;
+ bool_t case_preserving;
};
typedef struct pathconf3resok pathconf3resok;
struct pathconf3resfail {
- post_op_attr obj_attributes;
+ post_op_attr obj_attributes;
};
typedef struct pathconf3resfail pathconf3resfail;
struct pathconf3res {
- nfsstat3 status;
- union {
- pathconf3resok resok;
- pathconf3resfail resfail;
- } pathconf3res_u;
+ nfsstat3 status;
+ union {
+ pathconf3resok resok;
+ pathconf3resfail resfail;
+ } pathconf3res_u;
};
typedef struct pathconf3res pathconf3res;
struct commit3args {
- nfs_fh3 file;
- offset3 offset;
- count3 count;
+ nfs_fh3 file;
+ offset3 offset;
+ count3 count;
};
typedef struct commit3args commit3args;
struct commit3resok {
- wcc_data file_wcc;
- writeverf3 verf;
+ wcc_data file_wcc;
+ writeverf3 verf;
};
typedef struct commit3resok commit3resok;
struct commit3resfail {
- wcc_data file_wcc;
+ wcc_data file_wcc;
};
typedef struct commit3resfail commit3resfail;
struct commit3res {
- nfsstat3 status;
- union {
- commit3resok resok;
- commit3resfail resfail;
- } commit3res_u;
+ nfsstat3 status;
+ union {
+ commit3resok resok;
+ commit3resfail resfail;
+ } commit3res_u;
};
typedef struct commit3res commit3res;
#define MNTPATHLEN 1024
@@ -943,8 +939,8 @@ typedef struct commit3res commit3res;
#define FHSIZE3 NFS3_FHSIZE
typedef struct {
- u_int fhandle3_len;
- char *fhandle3_val;
+ u_int fhandle3_len;
+ char *fhandle3_val;
} fhandle3;
typedef char *dirpath;
@@ -952,255 +948,404 @@ typedef char *dirpath;
typedef char *name;
enum mountstat3 {
- MNT3_OK = 0,
- MNT3ERR_PERM = 1,
- MNT3ERR_NOENT = 2,
- MNT3ERR_IO = 5,
- MNT3ERR_ACCES = 13,
- MNT3ERR_NOTDIR = 20,
- MNT3ERR_INVAL = 22,
- MNT3ERR_NAMETOOLONG = 63,
- MNT3ERR_NOTSUPP = 10004,
- MNT3ERR_SERVERFAULT = 10006,
+ MNT3_OK = 0,
+ MNT3ERR_PERM = 1,
+ MNT3ERR_NOENT = 2,
+ MNT3ERR_IO = 5,
+ MNT3ERR_ACCES = 13,
+ MNT3ERR_NOTDIR = 20,
+ MNT3ERR_INVAL = 22,
+ MNT3ERR_NAMETOOLONG = 63,
+ MNT3ERR_NOTSUPP = 10004,
+ MNT3ERR_SERVERFAULT = 10006,
};
typedef enum mountstat3 mountstat3;
struct mountres3_ok {
- fhandle3 fhandle;
- struct {
- u_int auth_flavors_len;
- int *auth_flavors_val;
- } auth_flavors;
+ fhandle3 fhandle;
+ struct {
+ u_int auth_flavors_len;
+ int *auth_flavors_val;
+ } auth_flavors;
};
typedef struct mountres3_ok mountres3_ok;
struct mountres3 {
- mountstat3 fhs_status;
- union {
- mountres3_ok mountinfo;
- } mountres3_u;
+ mountstat3 fhs_status;
+ union {
+ mountres3_ok mountinfo;
+ } mountres3_u;
};
typedef struct mountres3 mountres3;
typedef struct mountbody *mountlist;
struct mountbody {
- name ml_hostname;
- dirpath ml_directory;
- mountlist ml_next;
+ name ml_hostname;
+ dirpath ml_directory;
+ mountlist ml_next;
};
typedef struct mountbody mountbody;
typedef struct groupnode *groups;
struct groupnode {
- name gr_name;
- groups gr_next;
+ name gr_name;
+ groups gr_next;
};
typedef struct groupnode groupnode;
typedef struct exportnode *exports;
struct exportnode {
- dirpath ex_dir;
- groups ex_groups;
- exports ex_next;
+ dirpath ex_dir;
+ groups ex_groups;
+ exports ex_next;
};
typedef struct exportnode exportnode;
-#define NFS_PROGRAM 100003
-#define NFS_V3 3
-
-#define NFS3_NULL 0
-#define NFS3_GETATTR 1
-#define NFS3_SETATTR 2
-#define NFS3_LOOKUP 3
-#define NFS3_ACCESS 4
-#define NFS3_READLINK 5
-#define NFS3_READ 6
-#define NFS3_WRITE 7
-#define NFS3_CREATE 8
-#define NFS3_MKDIR 9
-#define NFS3_SYMLINK 10
-#define NFS3_MKNOD 11
-#define NFS3_REMOVE 12
-#define NFS3_RMDIR 13
-#define NFS3_RENAME 14
-#define NFS3_LINK 15
-#define NFS3_READDIR 16
-#define NFS3_READDIRP 17
-#define NFS3_FSSTAT 18
-#define NFS3_FSINFO 19
-#define NFS3_PATHCONF 20
-#define NFS3_COMMIT 21
-#define NFS3_PROC_COUNT 22
-
-#define MOUNT_PROGRAM 100005
-#define MOUNT_V3 3
-#define MOUNT_V1 1
-
-#define MOUNT3_NULL 0
-#define MOUNT3_MNT 1
-#define MOUNT3_DUMP 2
-#define MOUNT3_UMNT 3
-#define MOUNT3_UMNTALL 4
-#define MOUNT3_EXPORT 5
-#define MOUNT3_PROC_COUNT 6
-
-#define MOUNT1_NULL 0
-#define MOUNT1_DUMP 2
-#define MOUNT1_UMNT 3
-#define MOUNT1_EXPORT 5
-#define MOUNT1_PROC_COUNT 6
+#define NFS_PROGRAM 100003
+#define NFS_V3 3
+
+#define NFS3_NULL 0
+#define NFS3_GETATTR 1
+#define NFS3_SETATTR 2
+#define NFS3_LOOKUP 3
+#define NFS3_ACCESS 4
+#define NFS3_READLINK 5
+#define NFS3_READ 6
+#define NFS3_WRITE 7
+#define NFS3_CREATE 8
+#define NFS3_MKDIR 9
+#define NFS3_SYMLINK 10
+#define NFS3_MKNOD 11
+#define NFS3_REMOVE 12
+#define NFS3_RMDIR 13
+#define NFS3_RENAME 14
+#define NFS3_LINK 15
+#define NFS3_READDIR 16
+#define NFS3_READDIRP 17
+#define NFS3_FSSTAT 18
+#define NFS3_FSINFO 19
+#define NFS3_PATHCONF 20
+#define NFS3_COMMIT 21
+#define NFS3_PROC_COUNT 22
+
+#define MOUNT_PROGRAM 100005
+#define MOUNT_V3 3
+#define MOUNT_V1 1
+
+#define MOUNT3_NULL 0
+#define MOUNT3_MNT 1
+#define MOUNT3_DUMP 2
+#define MOUNT3_UMNT 3
+#define MOUNT3_UMNTALL 4
+#define MOUNT3_EXPORT 5
+#define MOUNT3_PROC_COUNT 6
+
+#define MOUNT1_NULL 0
+#define MOUNT1_MNT 1
+#define MOUNT1_DUMP 2
+#define MOUNT1_UMNT 3
+#define MOUNT1_UMNTALL 4
+#define MOUNT1_EXPORT 5
+#define MOUNT1_PROC_COUNT 6
/* the xdr functions */
-extern bool_t xdr_uint64 (XDR *, uint64*);
-extern bool_t xdr_int64 (XDR *, int64*);
-extern bool_t xdr_uint32 (XDR *, uint32*);
-extern bool_t xdr_int32 (XDR *, int32*);
-extern bool_t xdr_filename3 (XDR *, filename3*);
-extern bool_t xdr_nfspath3 (XDR *, nfspath3*);
-extern bool_t xdr_fileid3 (XDR *, fileid3*);
-extern bool_t xdr_cookie3 (XDR *, cookie3*);
-extern bool_t xdr_cookieverf3 (XDR *, cookieverf3);
-extern bool_t xdr_createverf3 (XDR *, createverf3);
-extern bool_t xdr_writeverf3 (XDR *, writeverf3);
-extern bool_t xdr_uid3 (XDR *, uid3*);
-extern bool_t xdr_gid3 (XDR *, gid3*);
-extern bool_t xdr_size3 (XDR *, size3*);
-extern bool_t xdr_offset3 (XDR *, offset3*);
-extern bool_t xdr_mode3 (XDR *, mode3*);
-extern bool_t xdr_count3 (XDR *, count3*);
-extern bool_t xdr_nfsstat3 (XDR *, nfsstat3*);
-extern bool_t xdr_ftype3 (XDR *, ftype3*);
-extern bool_t xdr_specdata3 (XDR *, specdata3*);
-extern bool_t xdr_nfs_fh3 (XDR *, nfs_fh3*);
-extern bool_t xdr_nfstime3 (XDR *, nfstime3*);
-extern bool_t xdr_fattr3 (XDR *, fattr3*);
-extern bool_t xdr_post_op_attr (XDR *, post_op_attr*);
-extern bool_t xdr_wcc_attr (XDR *, wcc_attr*);
-extern bool_t xdr_pre_op_attr (XDR *, pre_op_attr*);
-extern bool_t xdr_wcc_data (XDR *, wcc_data*);
-extern bool_t xdr_post_op_fh3 (XDR *, post_op_fh3*);
-extern bool_t xdr_time_how (XDR *, time_how*);
-extern bool_t xdr_set_mode3 (XDR *, set_mode3*);
-extern bool_t xdr_set_uid3 (XDR *, set_uid3*);
-extern bool_t xdr_set_gid3 (XDR *, set_gid3*);
-extern bool_t xdr_set_size3 (XDR *, set_size3*);
-extern bool_t xdr_set_atime (XDR *, set_atime*);
-extern bool_t xdr_set_mtime (XDR *, set_mtime*);
-extern bool_t xdr_sattr3 (XDR *, sattr3*);
-extern bool_t xdr_diropargs3 (XDR *, diropargs3*);
-extern bool_t xdr_getattr3args (XDR *, getattr3args*);
-extern bool_t xdr_getattr3resok (XDR *, getattr3resok*);
-extern bool_t xdr_getattr3res (XDR *, getattr3res*);
-extern bool_t xdr_sattrguard3 (XDR *, sattrguard3*);
-extern bool_t xdr_setattr3args (XDR *, setattr3args*);
-extern bool_t xdr_setattr3resok (XDR *, setattr3resok*);
-extern bool_t xdr_setattr3resfail (XDR *, setattr3resfail*);
-extern bool_t xdr_setattr3res (XDR *, setattr3res*);
-extern bool_t xdr_lookup3args (XDR *, lookup3args*);
-extern bool_t xdr_lookup3resok (XDR *, lookup3resok*);
-extern bool_t xdr_lookup3resfail (XDR *, lookup3resfail*);
-extern bool_t xdr_lookup3res (XDR *, lookup3res*);
-extern bool_t xdr_access3args (XDR *, access3args*);
-extern bool_t xdr_access3resok (XDR *, access3resok*);
-extern bool_t xdr_access3resfail (XDR *, access3resfail*);
-extern bool_t xdr_access3res (XDR *, access3res*);
-extern bool_t xdr_readlink3args (XDR *, readlink3args*);
-extern bool_t xdr_readlink3resok (XDR *, readlink3resok*);
-extern bool_t xdr_readlink3resfail (XDR *, readlink3resfail*);
-extern bool_t xdr_readlink3res (XDR *, readlink3res*);
-extern bool_t xdr_read3args (XDR *, read3args*);
-extern bool_t xdr_read3resok (XDR *, read3resok*);
-extern bool_t xdr_read3resfail (XDR *, read3resfail*);
-extern bool_t xdr_read3res (XDR *, read3res*);
-extern bool_t xdr_read3res_nocopy (XDR *xdrs, read3res *objp);
-extern bool_t xdr_stable_how (XDR *, stable_how*);
-extern bool_t xdr_write3args (XDR *, write3args*);
-extern bool_t xdr_write3resok (XDR *, write3resok*);
-extern bool_t xdr_write3resfail (XDR *, write3resfail*);
-extern bool_t xdr_write3res (XDR *, write3res*);
-extern bool_t xdr_createmode3 (XDR *, createmode3*);
-extern bool_t xdr_createhow3 (XDR *, createhow3*);
-extern bool_t xdr_create3args (XDR *, create3args*);
-extern bool_t xdr_create3resok (XDR *, create3resok*);
-extern bool_t xdr_create3resfail (XDR *, create3resfail*);
-extern bool_t xdr_create3res (XDR *, create3res*);
-extern bool_t xdr_mkdir3args (XDR *, mkdir3args*);
-extern bool_t xdr_mkdir3resok (XDR *, mkdir3resok*);
-extern bool_t xdr_mkdir3resfail (XDR *, mkdir3resfail*);
-extern bool_t xdr_mkdir3res (XDR *, mkdir3res*);
-extern bool_t xdr_symlinkdata3 (XDR *, symlinkdata3*);
-extern bool_t xdr_symlink3args (XDR *, symlink3args*);
-extern bool_t xdr_symlink3resok (XDR *, symlink3resok*);
-extern bool_t xdr_symlink3resfail (XDR *, symlink3resfail*);
-extern bool_t xdr_symlink3res (XDR *, symlink3res*);
-extern bool_t xdr_devicedata3 (XDR *, devicedata3*);
-extern bool_t xdr_mknoddata3 (XDR *, mknoddata3*);
-extern bool_t xdr_mknod3args (XDR *, mknod3args*);
-extern bool_t xdr_mknod3resok (XDR *, mknod3resok*);
-extern bool_t xdr_mknod3resfail (XDR *, mknod3resfail*);
-extern bool_t xdr_mknod3res (XDR *, mknod3res*);
-extern bool_t xdr_remove3args (XDR *, remove3args*);
-extern bool_t xdr_remove3resok (XDR *, remove3resok*);
-extern bool_t xdr_remove3resfail (XDR *, remove3resfail*);
-extern bool_t xdr_remove3res (XDR *, remove3res*);
-extern bool_t xdr_rmdir3args (XDR *, rmdir3args*);
-extern bool_t xdr_rmdir3resok (XDR *, rmdir3resok*);
-extern bool_t xdr_rmdir3resfail (XDR *, rmdir3resfail*);
-extern bool_t xdr_rmdir3res (XDR *, rmdir3res*);
-extern bool_t xdr_rename3args (XDR *, rename3args*);
-extern bool_t xdr_rename3resok (XDR *, rename3resok*);
-extern bool_t xdr_rename3resfail (XDR *, rename3resfail*);
-extern bool_t xdr_rename3res (XDR *, rename3res*);
-extern bool_t xdr_link3args (XDR *, link3args*);
-extern bool_t xdr_link3resok (XDR *, link3resok*);
-extern bool_t xdr_link3resfail (XDR *, link3resfail*);
-extern bool_t xdr_link3res (XDR *, link3res*);
-extern bool_t xdr_readdir3args (XDR *, readdir3args*);
-extern bool_t xdr_entry3 (XDR *, entry3*);
-extern bool_t xdr_dirlist3 (XDR *, dirlist3*);
-extern bool_t xdr_readdir3resok (XDR *, readdir3resok*);
-extern bool_t xdr_readdir3resfail (XDR *, readdir3resfail*);
-extern bool_t xdr_readdir3res (XDR *, readdir3res*);
-extern bool_t xdr_readdirp3args (XDR *, readdirp3args*);
-extern bool_t xdr_entryp3 (XDR *, entryp3*);
-extern bool_t xdr_dirlistp3 (XDR *, dirlistp3*);
-extern bool_t xdr_readdirp3resok (XDR *, readdirp3resok*);
-extern bool_t xdr_readdirp3resfail (XDR *, readdirp3resfail*);
-extern bool_t xdr_readdirp3res (XDR *, readdirp3res*);
-extern bool_t xdr_fsstat3args (XDR *, fsstat3args*);
-extern bool_t xdr_fsstat3resok (XDR *, fsstat3resok*);
-extern bool_t xdr_fsstat3resfail (XDR *, fsstat3resfail*);
-extern bool_t xdr_fsstat3res (XDR *, fsstat3res*);
-extern bool_t xdr_fsinfo3args (XDR *, fsinfo3args*);
-extern bool_t xdr_fsinfo3resok (XDR *, fsinfo3resok*);
-extern bool_t xdr_fsinfo3resfail (XDR *, fsinfo3resfail*);
-extern bool_t xdr_fsinfo3res (XDR *, fsinfo3res*);
-extern bool_t xdr_pathconf3args (XDR *, pathconf3args*);
-extern bool_t xdr_pathconf3resok (XDR *, pathconf3resok*);
-extern bool_t xdr_pathconf3resfail (XDR *, pathconf3resfail*);
-extern bool_t xdr_pathconf3res (XDR *, pathconf3res*);
-extern bool_t xdr_commit3args (XDR *, commit3args*);
-extern bool_t xdr_commit3resok (XDR *, commit3resok*);
-extern bool_t xdr_commit3resfail (XDR *, commit3resfail*);
-extern bool_t xdr_commit3res (XDR *, commit3res*);
-extern bool_t xdr_fhandle3 (XDR *, fhandle3*);
-extern bool_t xdr_dirpath (XDR *, dirpath*);
-extern bool_t xdr_name (XDR *, name*);
-extern bool_t xdr_mountstat3 (XDR *, mountstat3*);
-extern bool_t xdr_mountres3_ok (XDR *, mountres3_ok*);
-extern bool_t xdr_mountres3 (XDR *, mountres3*);
-extern bool_t xdr_mountlist (XDR *, mountlist*);
-extern bool_t xdr_mountbody (XDR *, mountbody*);
-extern bool_t xdr_groups (XDR *, groups*);
-extern bool_t xdr_groupnode (XDR *, groupnode*);
-extern bool_t xdr_exports (XDR *, exports*);
-extern bool_t xdr_exportnode (XDR *, exportnode*);
-
-extern void xdr_free_exports_list (struct exportnode *first);
-extern void xdr_free_mountlist (mountlist ml);
-
-extern void xdr_free_write3args_nocopy (write3args *wa);
+extern bool_t
+xdr_uint64(XDR *, uint64 *);
+extern bool_t
+xdr_int64(XDR *, int64 *);
+extern bool_t
+xdr_uint32(XDR *, uint32 *);
+extern bool_t
+xdr_int32(XDR *, int32 *);
+extern bool_t
+xdr_filename3(XDR *, filename3 *);
+extern bool_t
+xdr_nfspath3(XDR *, nfspath3 *);
+extern bool_t
+xdr_fileid3(XDR *, fileid3 *);
+extern bool_t
+xdr_cookie3(XDR *, cookie3 *);
+extern bool_t
+xdr_cookieverf3(XDR *, cookieverf3);
+extern bool_t
+xdr_createverf3(XDR *, createverf3);
+extern bool_t
+xdr_writeverf3(XDR *, writeverf3);
+extern bool_t
+xdr_uid3(XDR *, uid3 *);
+extern bool_t
+xdr_gid3(XDR *, gid3 *);
+extern bool_t
+xdr_size3(XDR *, size3 *);
+extern bool_t
+xdr_offset3(XDR *, offset3 *);
+extern bool_t
+xdr_mode3(XDR *, mode3 *);
+extern bool_t
+xdr_count3(XDR *, count3 *);
+extern bool_t
+xdr_nfsstat3(XDR *, nfsstat3 *);
+extern bool_t
+xdr_ftype3(XDR *, ftype3 *);
+extern bool_t
+xdr_specdata3(XDR *, specdata3 *);
+extern bool_t
+xdr_nfs_fh3(XDR *, nfs_fh3 *);
+extern bool_t
+xdr_nfstime3(XDR *, nfstime3 *);
+extern bool_t
+xdr_fattr3(XDR *, fattr3 *);
+extern bool_t
+xdr_post_op_attr(XDR *, post_op_attr *);
+extern bool_t
+xdr_wcc_attr(XDR *, wcc_attr *);
+extern bool_t
+xdr_pre_op_attr(XDR *, pre_op_attr *);
+extern bool_t
+xdr_wcc_data(XDR *, wcc_data *);
+extern bool_t
+xdr_post_op_fh3(XDR *, post_op_fh3 *);
+extern bool_t
+xdr_time_how(XDR *, time_how *);
+extern bool_t
+xdr_set_mode3(XDR *, set_mode3 *);
+extern bool_t
+xdr_set_uid3(XDR *, set_uid3 *);
+extern bool_t
+xdr_set_gid3(XDR *, set_gid3 *);
+extern bool_t
+xdr_set_size3(XDR *, set_size3 *);
+extern bool_t
+xdr_set_atime(XDR *, set_atime *);
+extern bool_t
+xdr_set_mtime(XDR *, set_mtime *);
+extern bool_t
+xdr_sattr3(XDR *, sattr3 *);
+extern bool_t
+xdr_diropargs3(XDR *, diropargs3 *);
+extern bool_t
+xdr_getattr3args(XDR *, getattr3args *);
+extern bool_t
+xdr_getattr3resok(XDR *, getattr3resok *);
+extern bool_t
+xdr_getattr3res(XDR *, getattr3res *);
+extern bool_t
+xdr_sattrguard3(XDR *, sattrguard3 *);
+extern bool_t
+xdr_setattr3args(XDR *, setattr3args *);
+extern bool_t
+xdr_setattr3resok(XDR *, setattr3resok *);
+extern bool_t
+xdr_setattr3resfail(XDR *, setattr3resfail *);
+extern bool_t
+xdr_setattr3res(XDR *, setattr3res *);
+extern bool_t
+xdr_lookup3args(XDR *, lookup3args *);
+extern bool_t
+xdr_lookup3resok(XDR *, lookup3resok *);
+extern bool_t
+xdr_lookup3resfail(XDR *, lookup3resfail *);
+extern bool_t
+xdr_lookup3res(XDR *, lookup3res *);
+extern bool_t
+xdr_access3args(XDR *, access3args *);
+extern bool_t
+xdr_access3resok(XDR *, access3resok *);
+extern bool_t
+xdr_access3resfail(XDR *, access3resfail *);
+extern bool_t
+xdr_access3res(XDR *, access3res *);
+extern bool_t
+xdr_readlink3args(XDR *, readlink3args *);
+extern bool_t
+xdr_readlink3resok(XDR *, readlink3resok *);
+extern bool_t
+xdr_readlink3resfail(XDR *, readlink3resfail *);
+extern bool_t
+xdr_readlink3res(XDR *, readlink3res *);
+extern bool_t
+xdr_read3args(XDR *, read3args *);
+extern bool_t
+xdr_read3resok(XDR *, read3resok *);
+extern bool_t
+xdr_read3resfail(XDR *, read3resfail *);
+extern bool_t
+xdr_read3res(XDR *, read3res *);
+extern bool_t
+xdr_read3res_nocopy(XDR *xdrs, read3res *objp);
+extern bool_t
+xdr_stable_how(XDR *, stable_how *);
+extern bool_t
+xdr_write3args(XDR *, write3args *);
+extern bool_t
+xdr_write3resok(XDR *, write3resok *);
+extern bool_t
+xdr_write3resfail(XDR *, write3resfail *);
+extern bool_t
+xdr_write3res(XDR *, write3res *);
+extern bool_t
+xdr_createmode3(XDR *, createmode3 *);
+extern bool_t
+xdr_createhow3(XDR *, createhow3 *);
+extern bool_t
+xdr_create3args(XDR *, create3args *);
+extern bool_t
+xdr_create3resok(XDR *, create3resok *);
+extern bool_t
+xdr_create3resfail(XDR *, create3resfail *);
+extern bool_t
+xdr_create3res(XDR *, create3res *);
+extern bool_t
+xdr_mkdir3args(XDR *, mkdir3args *);
+extern bool_t
+xdr_mkdir3resok(XDR *, mkdir3resok *);
+extern bool_t
+xdr_mkdir3resfail(XDR *, mkdir3resfail *);
+extern bool_t
+xdr_mkdir3res(XDR *, mkdir3res *);
+extern bool_t
+xdr_symlinkdata3(XDR *, symlinkdata3 *);
+extern bool_t
+xdr_symlink3args(XDR *, symlink3args *);
+extern bool_t
+xdr_symlink3resok(XDR *, symlink3resok *);
+extern bool_t
+xdr_symlink3resfail(XDR *, symlink3resfail *);
+extern bool_t
+xdr_symlink3res(XDR *, symlink3res *);
+extern bool_t
+xdr_devicedata3(XDR *, devicedata3 *);
+extern bool_t
+xdr_mknoddata3(XDR *, mknoddata3 *);
+extern bool_t
+xdr_mknod3args(XDR *, mknod3args *);
+extern bool_t
+xdr_mknod3resok(XDR *, mknod3resok *);
+extern bool_t
+xdr_mknod3resfail(XDR *, mknod3resfail *);
+extern bool_t
+xdr_mknod3res(XDR *, mknod3res *);
+extern bool_t
+xdr_remove3args(XDR *, remove3args *);
+extern bool_t
+xdr_remove3resok(XDR *, remove3resok *);
+extern bool_t
+xdr_remove3resfail(XDR *, remove3resfail *);
+extern bool_t
+xdr_remove3res(XDR *, remove3res *);
+extern bool_t
+xdr_rmdir3args(XDR *, rmdir3args *);
+extern bool_t
+xdr_rmdir3resok(XDR *, rmdir3resok *);
+extern bool_t
+xdr_rmdir3resfail(XDR *, rmdir3resfail *);
+extern bool_t
+xdr_rmdir3res(XDR *, rmdir3res *);
+extern bool_t
+xdr_rename3args(XDR *, rename3args *);
+extern bool_t
+xdr_rename3resok(XDR *, rename3resok *);
+extern bool_t
+xdr_rename3resfail(XDR *, rename3resfail *);
+extern bool_t
+xdr_rename3res(XDR *, rename3res *);
+extern bool_t
+xdr_link3args(XDR *, link3args *);
+extern bool_t
+xdr_link3resok(XDR *, link3resok *);
+extern bool_t
+xdr_link3resfail(XDR *, link3resfail *);
+extern bool_t
+xdr_link3res(XDR *, link3res *);
+extern bool_t
+xdr_readdir3args(XDR *, readdir3args *);
+extern bool_t
+xdr_entry3(XDR *, entry3 *);
+extern bool_t
+xdr_dirlist3(XDR *, dirlist3 *);
+extern bool_t
+xdr_readdir3resok(XDR *, readdir3resok *);
+extern bool_t
+xdr_readdir3resfail(XDR *, readdir3resfail *);
+extern bool_t
+xdr_readdir3res(XDR *, readdir3res *);
+extern bool_t
+xdr_readdirp3args(XDR *, readdirp3args *);
+extern bool_t
+xdr_entryp3(XDR *, entryp3 *);
+extern bool_t
+xdr_dirlistp3(XDR *, dirlistp3 *);
+extern bool_t
+xdr_readdirp3resok(XDR *, readdirp3resok *);
+extern bool_t
+xdr_readdirp3resfail(XDR *, readdirp3resfail *);
+extern bool_t
+xdr_readdirp3res(XDR *, readdirp3res *);
+extern bool_t
+xdr_fsstat3args(XDR *, fsstat3args *);
+extern bool_t
+xdr_fsstat3resok(XDR *, fsstat3resok *);
+extern bool_t
+xdr_fsstat3resfail(XDR *, fsstat3resfail *);
+extern bool_t
+xdr_fsstat3res(XDR *, fsstat3res *);
+extern bool_t
+xdr_fsinfo3args(XDR *, fsinfo3args *);
+extern bool_t
+xdr_fsinfo3resok(XDR *, fsinfo3resok *);
+extern bool_t
+xdr_fsinfo3resfail(XDR *, fsinfo3resfail *);
+extern bool_t
+xdr_fsinfo3res(XDR *, fsinfo3res *);
+extern bool_t
+xdr_pathconf3args(XDR *, pathconf3args *);
+extern bool_t
+xdr_pathconf3resok(XDR *, pathconf3resok *);
+extern bool_t
+xdr_pathconf3resfail(XDR *, pathconf3resfail *);
+extern bool_t
+xdr_pathconf3res(XDR *, pathconf3res *);
+extern bool_t
+xdr_commit3args(XDR *, commit3args *);
+extern bool_t
+xdr_commit3resok(XDR *, commit3resok *);
+extern bool_t
+xdr_commit3resfail(XDR *, commit3resfail *);
+extern bool_t
+xdr_commit3res(XDR *, commit3res *);
+extern bool_t
+xdr_fhandle3(XDR *, fhandle3 *);
+extern bool_t
+xdr_dirpath(XDR *, dirpath *);
+extern bool_t
+xdr_name(XDR *, name *);
+extern bool_t
+xdr_mountstat3(XDR *, mountstat3 *);
+extern bool_t
+xdr_mountres3_ok(XDR *, mountres3_ok *);
+extern bool_t
+xdr_mountres3(XDR *, mountres3 *);
+extern bool_t
+xdr_mountlist(XDR *, mountlist *);
+extern bool_t
+xdr_mountbody(XDR *, mountbody *);
+extern bool_t
+xdr_groups(XDR *, groups *);
+extern bool_t
+xdr_groupnode(XDR *, groupnode *);
+extern bool_t
+xdr_exports(XDR *, exports *);
+extern bool_t
+xdr_exportnode(XDR *, exportnode *);
+
+extern void
+xdr_free_exports_list(struct exportnode *first);
+extern void
+xdr_free_mountlist(mountlist ml);
+
+extern void
+xdr_free_write3args_nocopy(write3args *wa);
#endif
diff --git a/run-tests-in-vagrant.sh b/run-tests-in-vagrant.sh
new file mode 100755
index 00000000000..a3f2ac7c72d
--- /dev/null
+++ b/run-tests-in-vagrant.sh
@@ -0,0 +1,311 @@
+#!/bin/bash
+
+###############################################################################
+# TODO: Support other OSes. #
+###############################################################################
+
+ORIGIN_DIR=$PWD
+autostart="no"
+destroy_after_test="no"
+os="fedora"
+destroy_now="no"
+run_tests_args=""
+redirect=">/dev/null 2>&1"
+ssh="no"
+custom_cflags=""
+
+
+pushd () {
+ command pushd "$@" >/dev/null
+}
+
+popd () {
+ command popd "$@" >/dev/null
+}
+
+usage() {
+ echo "Usage: $0 [...]"
+ echo ''
+ echo 'The options that this script accepts are:'
+ echo ''
+ echo '-a, --autostart configure the testVM to autostart on boot'
+ echo '--destroy-now cleanup the testVM'
+ echo '--destroy-after-test cleanup once the tests finishes'
+ echo '-h, --help show this help text'
+ echo '--os=<flavor> select the OS for the testVM (fedora, centos6)'
+ echo '--ssh ssh into the testVM'
+ echo '--verbose show what commands in the testVM are executed'
+ echo ''
+}
+
+function parse_args () {
+ args=`getopt \
+ --options ah \
+ --long autostart,os:,destroy-now,destroy-after-test,verbose,ssh,help \
+ -n 'run-tests-in-vagrant.sh' \
+ -- "$@"`
+ eval set -- "$args"
+ while true; do
+ case "$1" in
+ -a|--autostart) autostart="yes"; shift ;;
+ --destroy-after-test) destroy_after_test="yes"; shift ;;
+ --destroy-now) destroy_now="yes"; shift ;;
+ -h|--help) usage ; exit 0 ;;
+ --ssh) sshvm="yes"; shift ;;
+ --os)
+ case "$2" in
+ "") shift 2 ;;
+ *) os="$2" ; shift 2 ;;
+ esac ;;
+ --verbose) redirect=""; shift ;;
+ --) shift ; break ;;
+ *) echo "Internal error!" ; exit 1;;
+ esac
+ done
+ run_tests_args="$@"
+}
+
+function force_location()
+{
+ current_dir=$(dirname $0);
+
+ if [ ! -f ${current_dir}/tests/vagrant/vagrant-template-fedora/Vagrantfile ]; then
+ echo "Aborting."
+ echo "The tests/vagrant subdirectory seems to be missing."
+ echo "Please correct the problem and try again."
+ exit 1
+ fi
+}
+
+function vagrant_check()
+{
+ vagrant -v >/dev/null 2>&1;
+
+ if [ $? -ne 0 ]; then
+ echo "Aborting"
+ echo "Vagrant not found. Please install Vagrant and try again."
+ echo "On Fedora, run "dnf install vagrant vagrant-libvirt" "
+ exit 1
+ fi
+}
+
+function ansible_check()
+{
+ ansible --version >/dev/null 2>&1 ;
+
+ if [ $? -ne 0 ]; then
+ echo "Aborting"
+ echo "Ansible not found. Please install Ansible and try again."
+ echo "On Fedora, run "dnf install ansible" "
+ exit 1
+ fi
+}
+
+function set_branchname_from_git_branch()
+{
+ BRANCHNAME=`git rev-parse --abbrev-ref HEAD`
+ if [ $? -ne 0 ]; then
+ echo "Could not get branch name from git, will exit"
+ exit 1
+ fi
+}
+
+
+function destroy_vm()
+{
+ local retval=0
+
+ echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!CAUTION!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
+ echo "This will destroy VM and delete tests/vagrant/${BRANCHNAME} dir"
+ echo
+ while true; do
+ read -p "Do you want to continue?" yn
+ case $yn in
+ [Yy]* ) break;;
+ * ) echo "Did not get an yes, exiting."; exit 1 ;;
+ esac
+ done
+ if [ -d "tests/vagrant/${BRANCHNAME}" ]; then
+ pushd "tests/vagrant/${BRANCHNAME}"
+ eval vagrant destroy $redirect
+ popd
+ rm -rf "tests/vagrant/${BRANCHNAME}"
+ else
+ echo "Could not find vagrant dir for corresponding git branch, exiting"
+ retval=1
+ fi
+
+ return ${retval}
+}
+
+
+function create_vagrant_dir()
+{
+ mkdir -p tests/vagrant/$BRANCHNAME
+ if [ -d "tests/vagrant/vagrant-template-${os}" ]; then
+ echo "Copying tests/vagrant/vagrant-template-${os} dir to tests/vagrant/${BRANCHNAME} ...."
+ cp -R tests/vagrant/vagrant-template-${os}/* tests/vagrant/$BRANCHNAME
+ else
+ echo "Could not find template files for requested os $os, exiting"
+ exit 1
+ fi
+}
+
+
+function start_vm()
+{
+ echo "Doing vagrant up...."
+ pushd "tests/vagrant/${BRANCHNAME}"
+ eval vagrant up $redirect
+ if [ $? -eq 0 ]
+ then
+ popd
+ else
+ echo "Vagrant up failed, exiting....";
+ popd
+ exit 1
+ fi
+}
+
+function set_vm_attributes()
+{
+ if [ "x$autostart" == "xyes" ] ; then
+ virsh autostart ${BRANCHNAME}_vagrant-testVM
+ fi
+}
+
+function copy_source_code()
+{
+ echo "Copying source code from host machine to VM...."
+ pushd "tests/vagrant/${BRANCHNAME}"
+ vagrant ssh-config > ssh_config
+ rsync -az -e "ssh -F ssh_config" --rsync-path="sudo rsync" "$ORIGIN_DIR/." vagrant-testVM:/home/vagrant/glusterfs
+ if [ $? -eq 0 ]
+ then
+ popd
+ else
+ echo "Copy failed, exiting...."
+ popd
+ exit 1
+ fi
+}
+
+function compile_gluster()
+{
+ echo "Source compile and install Gluster...."
+ pushd "tests/vagrant/${BRANCHNAME}"
+ vagrant ssh -c "cd /home/vagrant/glusterfs ; sudo make clean $redirect" -- -t
+ vagrant ssh -c "cd /home/vagrant/glusterfs ; sudo ./autogen.sh $redirect" -- -t
+ if [ $? -ne 0 ]
+ then
+ echo "autogen failed, exiting...."
+ popd
+ exit 1
+ fi
+
+ # GCC on fedora complains about uninitialized variables and
+ # GCC on centos6 does not under don't warn on uninitialized variables flag.
+ if [ "x$os" == "fedora" ] ; then
+ custom_cflags="CFLAGS='-g -O0 -Werror -Wall -Wno-error=cpp -Wno-error=maybe-uninitialized'"
+ else
+ custom_cflags="CFLAGS='-g -O0 -Werror -Wall'"
+ fi
+
+
+ custom_cflags=
+ vagrant ssh -c "cd /home/vagrant/glusterfs ; \
+ sudo \
+ $custom_cflags \
+ ./configure \
+ --prefix=/usr \
+ --exec-prefix=/usr \
+ --bindir=/usr/bin \
+ --sbindir=/usr/sbin \
+ --sysconfdir=/etc \
+ --datadir=/usr/share \
+ --includedir=/usr/include \
+ --libdir=/usr/lib64 \
+ --libexecdir=/usr/libexec \
+ --localstatedir=/var \
+ --sharedstatedir=/var/lib \
+ --mandir=/usr/share/man \
+ --infodir=/usr/share/info \
+ --libdir=/usr/lib64 \
+ --enable-gnfs \
+ --enable-debug $redirect" -- -t
+ if [ $? -ne 0 ]
+ then
+ echo "configure failed, exiting...."
+ popd
+ exit 1
+ fi
+ # Test for missing dependencies based on the BuildRequires in the
+ # glusterfs.spec. If anything is missing, install them (and only then, dnf
+ # cache is a large download).
+ vagrant ssh -c "cd /home/vagrant/glusterfs; ( sudo dnf -C -y builddep --spec glusterfs.spec || sudo dnf -y builddep --spec glusterfs.spec ) $redirect" -- -t
+ vagrant ssh -c "cd /home/vagrant/glusterfs; sudo make -j install $redirect" -- -t
+ if [ $? -ne 0 ]
+ then
+ echo "make failed, exiting...."
+ popd
+ exit 1
+ fi
+ popd
+}
+
+function run_tests()
+{
+ local retval=0
+
+ pushd "tests/vagrant/${BRANCHNAME}"
+ vagrant ssh -c "cd /home/vagrant/glusterfs; sudo ./run-tests.sh $run_tests_args" -- -t
+ retval=$?
+ popd
+
+ return ${retval}
+}
+
+function ssh_into_vm_using_exec()
+{
+ pushd "tests/vagrant/${BRANCHNAME}"
+ exec vagrant ssh
+ popd
+}
+
+echo
+parse_args "$@"
+
+# Check environment for dependencies
+force_location
+vagrant_check
+ansible_check
+
+# We have one vm per git branch, query git branch
+set_branchname_from_git_branch
+
+if [ "x$destroy_now" == "xyes" ] ; then
+ destroy_vm
+ exit $?
+fi
+
+if [ "x$sshvm" == "xyes" ] ; then
+ ssh_into_vm_using_exec
+fi
+
+
+create_vagrant_dir
+start_vm
+set_vm_attributes
+
+
+
+copy_source_code
+compile_gluster
+run_tests
+RET=$?
+
+if [ "x$destroy_after_test" == "xyes" ] ; then
+ destroy_vm
+fi
+
+exit ${RET}
diff --git a/run-tests.sh b/run-tests.sh
new file mode 100755
index 00000000000..e2a1655d8e0
--- /dev/null
+++ b/run-tests.sh
@@ -0,0 +1,615 @@
+#!/bin/bash
+# Copyright (c) 2013-2014 Red Hat, Inc. <http://www.redhat.com>
+#
+
+# As many tests are designed to take values of variables from 'env.rc',
+# it is good to source the file. While it is also required to source the
+# file individually in each tests (as it should be possible to run the
+# tests separately), exporting variables from env.rc is not harmful if
+# done here
+
+source ./tests/env.rc
+
+export TZ=UTC
+force="no"
+head="yes"
+retry="yes"
+tests=""
+exit_on_failure="yes"
+skip_bad_tests="yes"
+skip_known_bugs="yes"
+result_output="/tmp/gluster_regression.txt"
+section_separator="========================================"
+run_timeout=200
+kill_after_time=5
+nfs_tests=$RUN_NFS_TESTS
+
+# Option below preserves log tarballs for each run of a test separately
+# named: <test>-iteration-<n>.tar
+# If set to any other value, then log tarball is just named after the test and
+# overwritten in each iteration (saves space)
+# named: <test>.tar
+# Use option -p to override default behavior
+skip_preserve_logs="yes"
+
+OSTYPE=$(uname -s)
+
+# Function for use in generating filenames with increasing "-<n>" index
+# In:
+# $1 basepath: Directory where file needs to be created
+# $2 filename: Name of the file sans extension
+# $3 extension: Extension string that would be appended to the generated
+# filename
+# Out:
+# string of next available filename with appended "-<n>"
+# Example:
+# Interested routines that want to create a file name, say foo-<n>.txt at
+# location /var/log/gluster would pass in "/var/log/gluster" "foo" "txt"
+# and be returned next available foo-<n> filename to create.
+# Notes:
+# Function will not accept empty extension, and will return the same name
+# over and over (which can be fixed when there is a need for it)
+function get_next_filename()
+{
+ local basepath=$1
+ local filename=$2
+ local extension=$3
+ local next=1
+ local tfilename="${filename}-${next}"
+ while [ -e "${basepath}/${tfilename}.${extension}" ]; do
+ next=$((next+1))
+ tfilename="${filename}-${next}"
+ done
+
+ echo "$tfilename"
+}
+
+# Tar the gluster logs and generate a tarball named after the first parameter
+# passed in to the function. Ideally the test name is passed to this function
+# to generate the required tarball.
+# Tarball name is further controlled by the variable skip_preserve_logs
+function tar_logs()
+{
+ t=$1
+
+ logdir=$(gluster --print-logdir)
+ basetarname=$(basename "$t" .t)
+
+ if [ -n "$logdir" ]
+ then
+ if [[ $skip_preserve_logs == "yes" ]]; then
+ savetarname=$(get_next_filename "${logdir}" \
+ "${basetarname}-iteration" "tar" \
+ | tail -1)
+ else
+ savetarname="$basetarname"
+ fi
+
+ # Can't use --exclude here because NetBSD doesn't have it.
+ # However, both it and Linux have -X to take patterns from
+ # a file, so use that.
+ (echo '*.tar'; echo .notar) > "${logdir}"/.notar \
+ && \
+ tar -cf "${logdir}"/"${savetarname}".tar -X "${logdir}"/.notar \
+ "${logdir}"/* 2> /dev/null \
+ && \
+ find "$logdir"/* -maxdepth 0 -name '*.tar' -prune \
+ -o -exec rm -rf '{}' ';'
+
+ echo "Logs preserved in tarball $savetarname.tar"
+ else
+ echo "Logs not preserved, as logdir is not set"
+ fi
+}
+
+function check_dependencies()
+{
+ ## Check all dependencies are present
+ MISSING=""
+
+ # Check for dbench
+ env dbench --usage > /dev/null 2>&1
+ if [ $? -ne 0 ]; then
+ MISSING="$MISSING dbench"
+ fi
+
+ # Check for git
+ env git --version > /dev/null 2>&1
+ if [ $? -ne 0 ]; then
+ MISSING="$MISSING git"
+ fi
+
+ # Check for nfs-utils (Linux-only: built-in NetBSD with different name)
+ if [ "x`uname -s`" = "xLinux" ] ; then
+ env mount.nfs -V > /dev/null 2>&1
+ if [ $? -ne 0 ]; then
+ MISSING="$MISSING nfs-utils"
+ fi
+ fi
+
+ # Check for netstat
+ env netstat --version > /dev/null 2>&1
+ if [ $? -ne 0 ]; then
+ MISSING="$MISSING netstat"
+ fi
+
+ # Check for the Perl Test Harness
+ env prove --version > /dev/null 2>&1
+ if [ $? -ne 0 ]; then
+ MISSING="$MISSING perl-Test-Harness"
+ fi
+
+ which json_verify > /dev/null
+ if [ $? -ne 0 ]; then
+ MISSING="$MISSING json_verify"
+ fi
+
+ # Check for XFS programs (Linux Only: NetBSD does without)
+ if [ "x`uname -s`" = "xLinux" ] ; then
+ env mkfs.xfs -V > /dev/null 2>&1
+ if [ $? -ne 0 ]; then
+ MISSING="$MISSING xfsprogs"
+ fi
+ fi
+
+ # Check for attr
+ env getfattr --version > /dev/null 2>&1
+ if [ $? -ne 0 ]; then
+ MISSING="$MISSING attr"
+ fi
+
+ # Check for pidof
+ pidof pidof > /dev/null 2>&1
+ if [ $? -ne 0 ]; then
+ MISSING="$MISSING pidof"
+ fi
+
+ # Check for netstat
+ env netstat --version > /dev/null 2>&1
+ if [ $? -ne 0 ]; then
+ MISSING="$MISSING netstat"
+ fi
+
+ # check for psutil python package
+ test `uname -s` == "Darwin" || test `uname -s` == "FreeBSD" && {
+ pip show psutil | grep -q psutil >/dev/null 2>&1
+ if [ $? -ne 0 ]; then
+ MISSING="$MISSING psutil"
+ fi
+ }
+
+ ## If dependencies are missing, warn the user and abort
+ if [ "x$MISSING" != "x" ]; then
+ test "x${force}" != "xyes" && echo "Aborting."
+ echo
+ echo "The following required tools are missing:"
+ echo
+ for pkg in $MISSING; do
+ echo " * $pkg"
+ done
+ echo
+ test "x${force}" = "xyes" && return
+ echo "Please install them and try again."
+ echo
+ exit 2
+ fi
+}
+
+function check_location()
+{
+ regression_testsdir=$(dirname $0);
+
+ if [ ! -f ${regression_testsdir}/tests/include.rc ]; then
+ echo "Aborting."
+ echo
+ echo "The tests/ subdirectory seems to be missing."
+ echo
+ echo "Please correct the problem and try again."
+ echo
+ exit 1
+ fi
+}
+
+function check_user()
+{
+ # If we're not running as root, warn the user and abort
+ MYUID=`/usr/bin/id -u`
+ if [ 0${MYUID} -ne 0 ]; then
+ echo "Aborting."
+ echo
+ echo "The GlusterFS Test Framework must be run as root."
+ echo
+ echo "Please change to the root user and try again."
+ echo
+ exit 3
+ fi
+}
+
+function match()
+{
+ # Patterns considered valid:
+ # 0. Empty means everything
+ # "" matches ** i.e all
+ # 1. full or partial file/directory names
+ # basic matches tests/basic
+ # basic/afr matches tests/basic/afr
+ # 2. globs
+ # basic/* matches all files and directories in basic
+ # basic/*/ matches subdirectories in basic (afr|ec)
+ # 3. numbered bug matching
+ # 884455 matches bugs/bug-884455.t
+ # 859927 matches bugs/859927, bugs/bug-859927.t
+ # 1015990 matches /bugs/bug-1015990-rep.t, bug-1015990.t
+ # ...lots of other cases accepted as well, since globbing is tricky.
+ local t=$1
+ shift
+ local a
+ local match=1
+ if [ -z "$@" ]; then
+ match=0
+ return $match
+ fi
+ for a in $@ ; do
+ case "$t" in
+ *$a*)
+ match=0
+ ;;
+ esac
+ done
+ return $match
+}
+
+# Tests can have comment lines with some comma separated values within them.
+# Key names used to determine test status are
+# G_TESTDEF_TEST_STATUS_CENTOS6
+# G_TESTDEF_TEST_STATUS_NETBSD7
+# Some examples:
+# G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=123456
+# G_TESTDEF_TEST_STATUS_CENTOS6=BRICK_MUX_BAD_TEST,BUG=123456
+# G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=4444444
+# G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=123456;555555
+# G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TESTS,BUG=1385758
+# You can change status of test to enabled or delete the line only if all the
+# bugs are closed or modified or if the patch fixes it.
+function get_test_status ()
+{
+ local test_name=$1
+ local host_os=""
+ local result=""
+
+ host_os=$(uname -s)
+
+ case "$host_os" in
+ # Leaving out the logic to determine the particular distro and version
+ # for later. Why does the key have the distro and version then?
+ # Because changing the key in all test files would be very big process
+ # updating just this function with a better logic much simpler.
+ Linux)
+ result=$(grep -e "^#G_TESTDEF_TEST_STATUS_CENTOS6" $test_name | \
+ awk -F"," {'print $1'} | awk -F"=" {'print $2'}) ;;
+ NetBSD)
+ result=$(grep -e "^#G_TESTDEF_TEST_STATUS_NETBSD7" $test_name | \
+ awk -F"," {'print $1'} | awk -F"=" {'print $2'}) ;;
+ *)
+ result="ENABLED" ;;
+ esac
+
+ echo "$result"
+
+}
+
+function get_bug_list_for_disabled_test ()
+{
+ local test_name=$1
+ local host_os=""
+ local result=""
+
+ host_os=$(uname -s)
+
+ case "$host_os" in
+ # Leaving out the logic to determine the particular distro and version
+ # for later. Why does the key have the distro and version then?
+ # Because changing the key in all test files would be very big process
+ # updating just this function with a better logic much simpler.
+ Linux)
+ result=$(grep -e "^#G_TESTDEF_TEST_STATUS_CENTOS6" $test_name | \
+ awk -F"," {'print $2'} | awk -F"=" {'print $2'}) ;;
+ NetBSD)
+ result=$(grep -e "^#G_TESTDEF_TEST_STATUS_NETBSD7" $test_name | \
+ awk -F"," {'print $2'} | awk -F"=" {'print $2'}) ;;
+ *)
+ result="0000000" ;;
+ esac
+
+ echo "$result"
+
+}
+
+function run_tests()
+{
+ RES=0
+ FLAKY=''
+ FAILED=''
+ TESTS_NEEDED_RETRY=''
+ GENERATED_CORE=''
+ total_tests=0
+ selected_tests=0
+ skipped_bad_tests=0
+ skipped_known_issue_tests=0
+ total_run_tests=0
+
+ # key = path of .t file; value = time taken to run the .t file
+ declare -A ELAPSEDTIMEMAP
+
+ # Test if -k is supported for timeout command
+ # This is not supported on centos6, but spuported on centos7
+ # The flags is required for running the command in both flavors
+ timeout_cmd_exists="yes"
+ timeout -k 1 10 echo "testing 'timeout' command"
+ if [ $? -ne 0 ]; then
+ timeout_cmd_exists="no"
+ fi
+
+ all_tests=($(find ${regression_testsdir}/tests -name '*.t' | sort))
+ all_tests_cnt=${#all_tests[@]}
+ for t in "${all_tests[@]}" ; do
+ old_cores=$(ls /*-*.core 2> /dev/null | wc -l)
+ total_tests=$((total_tests+1))
+ if match $t "$@" ; then
+ selected_tests=$((selected_tests+1))
+ echo
+ echo $section_separator "(${total_tests} / ${all_tests_cnt})" $section_separator
+ if [[ $(get_test_status $t) =~ "BAD_TEST" ]] && \
+ [[ $skip_bad_tests == "yes" ]]
+ then
+ skipped_bad_tests=$((skipped_bad_tests+1))
+ echo "Skipping bad test file $t"
+ echo "Reason: bug(s):" $(get_bug_list_for_disabled_test $t)
+ echo $section_separator$section_separator
+ echo
+ continue
+ fi
+ if [[ $(get_test_status $t) == "KNOWN_ISSUE" ]] && \
+ [[ $skip_known_bugs == "yes" ]]
+ then
+ skipped_known_issue_tests=$((skipped_known_issue_tests+1))
+ echo "Skipping test file $t due to known issue"
+ echo "Reason: bug(s):" $(get_bug_list_for_disabled_test $t)
+ echo $section_separator$section_separator
+ echo
+ continue
+ fi
+ if [[ $(get_test_status $t) == "NFS_TEST" ]] && \
+ [[ $nfs_tests == "no" ]]
+ then
+ echo "Skipping nfs test file $t"
+ echo $section_separator$section_separator
+ echo
+ continue
+ fi
+ total_run_tests=$((total_run_tests+1))
+ echo "[$(date +%H:%M:%S)] Running tests in file $t"
+ starttime="$(date +%s)"
+
+ local cmd_timeout=$run_timeout;
+ if [ ${timeout_cmd_exists} == "yes" ]; then
+ if [ $(grep -c "SCRIPT_TIMEOUT=" ${t}) == 1 ] ; then
+ cmd_timeout=$(grep "SCRIPT_TIMEOUT=" ${t} | cut -f2 -d'=');
+ echo "Timeout set is ${cmd_timeout}, default ${run_timeout}"
+ fi
+ timeout --foreground -k ${kill_after_time} ${cmd_timeout} prove -vmfe '/bin/bash' ${t}
+ else
+ prove -vmfe '/bin/bash' ${t}
+ fi
+ TMP_RES=$?
+ ELAPSEDTIMEMAP[$t]=`expr $(date +%s) - $starttime`
+ tar_logs "$t"
+
+ # timeout always return 124 if it is actually a timeout.
+ if ((${TMP_RES} == 124)); then
+ echo "${t} timed out after ${cmd_timeout} seconds"
+ fi
+
+ if [ ${TMP_RES} -ne 0 ] && [ "x${retry}" = "xyes" ] ; then
+ echo "$t: bad status $TMP_RES"
+ echo ""
+ echo " *********************************"
+ echo " * REGRESSION FAILED *"
+ echo " * Retrying failed tests in case *"
+ echo " * we got some spurious failures *"
+ echo " *********************************"
+ echo ""
+
+ if [ ${timeout_cmd_exists} == "yes" ]; then
+ timeout --foreground -k ${kill_after_time} ${cmd_timeout} prove -vmfe '/bin/bash' ${t}
+ else
+ prove -vmfe '/bin/bash' ${t}
+ fi
+ TMP_RES=$?
+ tar_logs "$t"
+
+ if ((${TMP_RES} == 124)); then
+ echo "${t} timed out after ${cmd_timeout} seconds"
+ fi
+
+ TESTS_NEEDED_RETRY="${TESTS_NEEDED_RETRY}${t} "
+ fi
+
+
+ if [ ${TMP_RES} -ne 0 ] ; then
+ if [[ "$t" == *"tests/000-flaky/"* ]]; then
+ FLAKY="${FLAKY}${t} "
+ echo "FAILURE -> SUCCESS: Flaky test"
+ TMP_RES=0
+ else
+ RES=${TMP_RES}
+ FAILED="${FAILED}${t} "
+ fi
+ fi
+
+ new_cores=$(ls /*-*.core 2> /dev/null | wc -l)
+ if [ x"$new_cores" != x"$old_cores" ]; then
+ core_diff=$((new_cores-old_cores))
+ echo "$t: $core_diff new core files"
+ RES=1
+ GENERATED_CORE="${GENERATED_CORE}${t} "
+ fi
+ echo "End of test $t"
+ echo $section_separator$section_separator
+ echo
+ if [ $RES -ne 0 ] && [ x"$exit_on_failure" = "xyes" ] ; then
+ break;
+ fi
+ fi
+ done
+ echo
+ echo "Run complete"
+ echo $section_separator$section_separator
+ echo "Number of tests found: $total_tests"
+ echo "Number of tests selected for run based on pattern: $selected_tests"
+ echo "Number of tests skipped as they were marked bad: $skipped_bad_tests"
+ echo "Number of tests skipped because of known_issues: $skipped_known_issue_tests"
+ echo "Number of tests that were run: $total_run_tests"
+
+ echo
+ echo "Tests ordered by time taken, slowest to fastest: "
+ echo $section_separator$section_separator
+ for key in "${!ELAPSEDTIMEMAP[@]}"
+ do
+ echo "$key - ${ELAPSEDTIMEMAP["$key"]} second"
+ done | sort -rn -k3
+
+ # initialize the output file
+ echo > "${result_output}"
+
+ # Output the errors into a file
+ if [ ${RES} -ne 0 ] ; then
+ FAILED=$( echo ${FAILED} | tr ' ' '\n' | sort -u )
+ FAILED_COUNT=$( echo -n "${FAILED}" | grep -c '^' )
+ echo -e "\n$FAILED_COUNT test(s) failed \n${FAILED}" >> "${result_output}"
+ GENERATED_CORE=$( echo ${GENERATED_CORE} | tr ' ' '\n' | sort -u )
+ GENERATED_CORE_COUNT=$( echo -n "${GENERATED_CORE}" | grep -c '^' )
+ echo -e "\n$GENERATED_CORE_COUNT test(s) generated core \n${GENERATED_CORE}" >> "${result_output}"
+ cat "${result_output}"
+ fi
+ TESTS_NEEDED_RETRY=$( echo ${TESTS_NEEDED_RETRY} | tr ' ' '\n' | sort -u )
+ RETRY_COUNT=$( echo -n "${TESTS_NEEDED_RETRY}" | grep -c '^' )
+ if [ ${RETRY_COUNT} -ne 0 ] ; then
+ echo -e "\n${RETRY_COUNT} test(s) needed retry \n${TESTS_NEEDED_RETRY}" >> "${result_output}"
+ fi
+
+ FLAKY_TESTS_FAILED=$( echo ${FLAKY} | tr ' ' '\n' | sort -u )
+ RETRY_COUNT=$( echo -n "${FLAKY_TESTS_FAILED}" | grep -c '^' )
+ if [ ${RETRY_COUNT} -ne 0 ] ; then
+ echo -e "\n${RETRY_COUNT} flaky test(s) marked as success even though they failed \n${FLAKY_TESTS_FAILED}" >> "${result_output}"
+ fi
+
+ echo
+ echo "Result is $RES"
+ echo
+ return ${RES}
+}
+
+function run_head_tests()
+{
+ [ -d ${regression_testsdir}/.git ] || return 0
+
+ # The git command needs $cwd to be within the repository, but run_tests
+ # needs it to be back where we started.
+ pushd $regression_testsdir
+ git_cmd="git diff-tree --no-commit-id --name-only --diff-filter=ACMRTUXB"
+ htests=$($git_cmd -r HEAD tests | grep '.t$')
+ popd
+ [ -n "$htests" ] || return 0
+
+ # Perhaps it's not ideal that we'll end up re-running these tests, but the
+ # gains from letting them fail fast in the first round should outweigh the
+ # losses from running them again in the second. OTOH, with so many of our
+ # tests being non-deterministic, maybe it doesn't hurt to give the newest
+ # tests an extra workout.
+ run_tests "$htests"
+}
+
+function show_usage ()
+{
+ cat <<EOF
+Usage: $0 <opts> [<glob>|<bzid>]...
+
+Options:
+
+-f force
+-h skip tests altering from HEAD
+-H run only tests altering from HEAD
+-r retry failed tests
+-R do not retry failed tests
+-c dont't exit on failure
+-b don't skip bad tests
+-k don't skip known bugs
+-p don't keep logs from preceding runs
+-o OUTPUT
+-t TIMEOUT
+-n skip NFS tests
+--help
+EOF
+}
+
+usage="no"
+
+function parse_args ()
+{
+ args=`getopt -u -l help frRcbkphHno:t: "$@"`
+ if ! [ $? -eq 0 ]; then
+ show_usage
+ exit 1
+ fi
+ set -- $args
+ while [ $# -gt 0 ]; do
+ case "$1" in
+ -f) force="yes" ;;
+ -h) head="no" ;;
+ -H) head="only" ;;
+ -r) retry="yes" ;;
+ -R) retry="no" ;;
+ -c) exit_on_failure="no" ;;
+ -b) skip_bad_tests="no" ;;
+ -k) skip_known_bugs="no" ;;
+ -p) skip_preserve_logs="no" ;;
+ -o) result_output="$2"; shift;;
+ -t) run_timeout="$2"; shift;;
+ -n) nfs_tests="no";;
+ --help) usage="yes" ;;
+ --) shift; break;;
+ esac
+ shift
+ done
+ tests="$@"
+}
+
+
+echo
+echo ... GlusterFS Test Framework ...
+echo
+
+# Get user options
+parse_args "$@"
+if [ x"$usage" == x"yes" ]; then
+ show_usage
+ exit 0
+fi
+
+# Make sure we're running as the root user
+check_user
+
+# Make sure the needed programs are available
+check_dependencies
+
+# Check we're running from the right location
+check_location
+
+# Run the tests
+if [ x"$head" != x"no" ]; then
+ run_head_tests || exit 1
+fi
+if [ x"$head" != x"only" ]; then
+ run_tests "$tests" || exit 1
+fi
diff --git a/scheduler/Makefile.am b/scheduler/Makefile.am
deleted file mode 100644
index 618fa7dd8b8..00000000000
--- a/scheduler/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = alu random nufa rr switch
-
-CLEANFILES =
diff --git a/scheduler/alu/src/Makefile.am b/scheduler/alu/src/Makefile.am
deleted file mode 100644
index eb7d0db07e0..00000000000
--- a/scheduler/alu/src/Makefile.am
+++ /dev/null
@@ -1,14 +0,0 @@
-sched_LTLIBRARIES = alu.la
-scheddir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/scheduler
-
-alu_la_LDFLAGS = -module -avoidversion
-
-alu_la_SOURCES = alu.c
-alu_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-noinst_HEADERS = alu.h
-
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
-
-CLEANFILES =
diff --git a/scheduler/alu/src/alu-mem-types.h b/scheduler/alu/src/alu-mem-types.h
deleted file mode 100644
index 92702f28690..00000000000
--- a/scheduler/alu/src/alu-mem-types.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-#ifndef __ALU_MEM_TYPES_H__
-#define __ALU_MEM_TYPES_H__
-
-#include "mem-types.h"
-
-enum gf_alu_mem_types_ {
- gf_alu_mt_alu_threshold = gf_common_mt_end + 1,
- gf_alu_mt_alu_sched,
- gf_alu_mt_alu_limits,
- gf_alu_mt_alu_sched_struct,
- gf_alu_mt_alu_sched_node,
- gf_alu_mt_end
-};
-#endif
-
diff --git a/scheduler/alu/src/alu.c b/scheduler/alu/src/alu.c
deleted file mode 100644
index 58bef60ae9f..00000000000
--- a/scheduler/alu/src/alu.c
+++ /dev/null
@@ -1,1019 +0,0 @@
-/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-
-/* ALU code needs a complete re-write. This is one of the most important
- * part of GlusterFS and so needs more and more reviews and testing
- */
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <sys/time.h>
-#include <stdint.h>
-#include "stack.h"
-#include "alu.h"
-#include "alu-mem-types.h"
-
-#define ALU_DISK_USAGE_ENTRY_THRESHOLD_DEFAULT (1 * GF_UNIT_GB)
-#define ALU_DISK_USAGE_EXIT_THRESHOLD_DEFAULT (512 * GF_UNIT_MB)
-
-#define ALU_WRITE_USAGE_ENTRY_THRESHOLD_DEFAULT 25
-#define ALU_WRITE_USAGE_EXIT_THRESHOLD_DEFAULT 5
-
-#define ALU_READ_USAGE_ENTRY_THRESHOLD_DEFAULT 25
-#define ALU_READ_USAGE_EXIT_THRESHOLD_DEFAULT 5
-
-#define ALU_OPEN_FILES_USAGE_ENTRY_THRESHOLD_DEFAULT 1000
-#define ALU_OPEN_FILES_USAGE_EXIT_THRESHOLD_DEFAULT 100
-
-#define ALU_LIMITS_TOTAL_DISK_SIZE_DEFAULT 100
-
-#define ALU_REFRESH_INTERVAL_DEFAULT 5
-#define ALU_REFRESH_CREATE_COUNT_DEFAULT 5
-
-
-static int64_t
-get_stats_disk_usage (struct xlator_stats *this)
-{
- return this->disk_usage;
-}
-
-static int64_t
-get_stats_write_usage (struct xlator_stats *this)
-{
- return this->write_usage;
-}
-
-static int64_t
-get_stats_read_usage (struct xlator_stats *this)
-{
- return this->read_usage;
-}
-
-static int64_t
-get_stats_disk_speed (struct xlator_stats *this)
-{
- return this->disk_speed;
-}
-
-static int64_t
-get_stats_file_usage (struct xlator_stats *this)
-{
- /* Avoid warning "defined but not used" */
- (void) &get_stats_file_usage;
-
- return this->nr_files;
-}
-
-static int64_t
-get_stats_free_disk (struct xlator_stats *this)
-{
- if (this->total_disk_size > 0)
- return (this->free_disk * 100) / this->total_disk_size;
- return 0;
-}
-
-static int64_t
-get_max_diff_write_usage (struct xlator_stats *max, struct xlator_stats *min)
-{
- return (max->write_usage - min->write_usage);
-}
-
-static int64_t
-get_max_diff_read_usage (struct xlator_stats *max, struct xlator_stats *min)
-{
- return (max->read_usage - min->read_usage);
-}
-
-static int64_t
-get_max_diff_disk_usage (struct xlator_stats *max, struct xlator_stats *min)
-{
- return (max->disk_usage - min->disk_usage);
-}
-
-static int64_t
-get_max_diff_disk_speed (struct xlator_stats *max, struct xlator_stats *min)
-{
- return (max->disk_speed - min->disk_speed);
-}
-
-static int64_t
-get_max_diff_file_usage (struct xlator_stats *max, struct xlator_stats *min)
-{
- return (max->nr_files - min->nr_files);
-}
-
-
-int
-alu_parse_options (xlator_t *xl, struct alu_sched *alu_sched)
-{
- data_t *order = dict_get (xl->options, "scheduler.alu.order");
- if (!order) {
- gf_log (xl->name, GF_LOG_ERROR,
- "option 'scheduler.alu.order' not specified");
- return -1;
- }
- struct alu_threshold *_threshold_fn;
- struct alu_threshold *tmp_threshold;
- data_t *entry_fn = NULL;
- data_t *exit_fn = NULL;
- char *tmp_str = NULL;
- char *order_str = strtok_r (order->data, ":", &tmp_str);
- /* Get the scheduling priority order, specified by the user. */
- while (order_str) {
- gf_log ("alu", GF_LOG_DEBUG,
- "alu_init: order string: %s",
- order_str);
- if (strcmp (order_str, "disk-usage") == 0) {
- /* Disk usage */
- _threshold_fn =
- GF_CALLOC (1,
- sizeof (struct alu_threshold),
- gf_alu_mt_alu_threshold);
- ERR_ABORT (_threshold_fn);
- _threshold_fn->diff_value = get_max_diff_disk_usage;
- _threshold_fn->sched_value = get_stats_disk_usage;
- entry_fn =
- dict_get (xl->options,
- "scheduler.alu.disk-usage.entry-threshold");
- if (entry_fn) {
- if (gf_string2bytesize (entry_fn->data,
- &alu_sched->entry_limit.disk_usage) != 0) {
- gf_log (xl->name, GF_LOG_ERROR,
- "invalid number format \"%s\" "
- "of \"option scheduler.alu."
- "disk-usage.entry-threshold\"",
- entry_fn->data);
- return -1;
- }
- } else {
- alu_sched->entry_limit.disk_usage = ALU_DISK_USAGE_ENTRY_THRESHOLD_DEFAULT;
- }
- _threshold_fn->entry_value = get_stats_disk_usage;
- exit_fn = dict_get (xl->options,
- "scheduler.alu.disk-usage.exit-threshold");
- if (exit_fn) {
- if (gf_string2bytesize (exit_fn->data, &alu_sched->exit_limit.disk_usage) != 0) {
- gf_log (xl->name, GF_LOG_ERROR,
- "invalid number format \"%s\" "
- "of \"option scheduler.alu."
- "disk-usage.exit-threshold\"",
- exit_fn->data);
- return -1;
- }
- } else {
- alu_sched->exit_limit.disk_usage = ALU_DISK_USAGE_EXIT_THRESHOLD_DEFAULT;
- }
- _threshold_fn->exit_value = get_stats_disk_usage;
- tmp_threshold = alu_sched->threshold_fn;
- if (!tmp_threshold) {
- alu_sched->threshold_fn = _threshold_fn;
- } else {
- while (tmp_threshold->next) {
- tmp_threshold = tmp_threshold->next;
- }
- tmp_threshold->next = _threshold_fn;
- }
- gf_log ("alu",
- GF_LOG_DEBUG, "alu_init: = %"PRId64",%"PRId64"",
- alu_sched->entry_limit.disk_usage,
- alu_sched->exit_limit.disk_usage);
-
- } else if (strcmp (order_str, "write-usage") == 0) {
- /* Handle "write-usage" */
-
- _threshold_fn = GF_CALLOC (1, sizeof (struct alu_threshold), gf_alu_mt_alu_threshold);
- ERR_ABORT (_threshold_fn);
- _threshold_fn->diff_value = get_max_diff_write_usage;
- _threshold_fn->sched_value = get_stats_write_usage;
- entry_fn = dict_get (xl->options,
- "scheduler.alu.write-usage.entry-threshold");
- if (entry_fn) {
- if (gf_string2bytesize (entry_fn->data,
- &alu_sched->entry_limit.write_usage) != 0) {
- gf_log (xl->name, GF_LOG_ERROR,
- "invalid number format \"%s\" "
- "of option scheduler.alu."
- "write-usage.entry-threshold",
- entry_fn->data);
- return -1;
- }
- } else {
- alu_sched->entry_limit.write_usage = ALU_WRITE_USAGE_ENTRY_THRESHOLD_DEFAULT;
- }
- _threshold_fn->entry_value = get_stats_write_usage;
- exit_fn = dict_get (xl->options,
- "scheduler.alu.write-usage.exit-threshold");
- if (exit_fn) {
- if (gf_string2bytesize (exit_fn->data,
- &alu_sched->exit_limit.write_usage) != 0) {
- gf_log (xl->name, GF_LOG_ERROR,
- "invalid number format \"%s\""
- " of \"option scheduler.alu."
- "write-usage.exit-threshold\"",
- exit_fn->data);
- return -1;
- }
- } else {
- alu_sched->exit_limit.write_usage = ALU_WRITE_USAGE_EXIT_THRESHOLD_DEFAULT;
- }
- _threshold_fn->exit_value = get_stats_write_usage;
- tmp_threshold = alu_sched->threshold_fn;
- if (!tmp_threshold) {
- alu_sched->threshold_fn = _threshold_fn;
- } else {
- while (tmp_threshold->next) {
- tmp_threshold = tmp_threshold->next;
- }
- tmp_threshold->next = _threshold_fn;
- }
- gf_log (xl->name, GF_LOG_DEBUG,
- "alu_init: = %"PRId64",%"PRId64"",
- alu_sched->entry_limit.write_usage,
- alu_sched->exit_limit.write_usage);
-
- } else if (strcmp (order_str, "read-usage") == 0) {
- /* Read usage */
-
- _threshold_fn = GF_CALLOC (1, sizeof (struct alu_threshold), gf_alu_mt_alu_threshold);
- ERR_ABORT (_threshold_fn);
- _threshold_fn->diff_value = get_max_diff_read_usage;
- _threshold_fn->sched_value = get_stats_read_usage;
- entry_fn = dict_get (xl->options,
- "scheduler.alu.read-usage.entry-threshold");
- if (entry_fn) {
- if (gf_string2bytesize (entry_fn->data,
- &alu_sched->entry_limit.read_usage) != 0) {
- gf_log (xl->name,
- GF_LOG_ERROR,
- "invalid number format \"%s\" "
- "of \"option scheduler.alu."
- "read-usage.entry-threshold\"",
- entry_fn->data);
- return -1;
- }
- } else {
- alu_sched->entry_limit.read_usage = ALU_READ_USAGE_ENTRY_THRESHOLD_DEFAULT;
- }
- _threshold_fn->entry_value = get_stats_read_usage;
- exit_fn = dict_get (xl->options,
- "scheduler.alu.read-usage.exit-threshold");
- if (exit_fn)
- {
- if (gf_string2bytesize (exit_fn->data,
- &alu_sched->exit_limit.read_usage) != 0)
- {
- gf_log ("alu", GF_LOG_ERROR,
- "invalid number format \"%s\" "
- "of \"option scheduler.alu."
- "read-usage.exit-threshold\"",
- exit_fn->data);
- return -1;
- }
- }
- else
- {
- alu_sched->exit_limit.read_usage = ALU_READ_USAGE_EXIT_THRESHOLD_DEFAULT;
- }
- _threshold_fn->exit_value = get_stats_read_usage;
- tmp_threshold = alu_sched->threshold_fn;
- if (!tmp_threshold) {
- alu_sched->threshold_fn = _threshold_fn;
- }
- else {
- while (tmp_threshold->next) {
- tmp_threshold = tmp_threshold->next;
- }
- tmp_threshold->next = _threshold_fn;
- }
- gf_log ("alu", GF_LOG_DEBUG,
- "alu_init: = %"PRId64",%"PRId64"",
- alu_sched->entry_limit.read_usage,
- alu_sched->exit_limit.read_usage);
-
- } else if (strcmp (order_str, "open-files-usage") == 0) {
- /* Open files counter */
-
- _threshold_fn = GF_CALLOC (1, sizeof (struct alu_threshold), gf_alu_mt_alu_threshold);
- ERR_ABORT (_threshold_fn);
- _threshold_fn->diff_value = get_max_diff_file_usage;
- _threshold_fn->sched_value = get_stats_file_usage;
- entry_fn = dict_get (xl->options,
- "scheduler.alu.open-files-usage.entry-threshold");
- if (entry_fn) {
- if (gf_string2uint64 (entry_fn->data,
- &alu_sched->entry_limit.nr_files) != 0)
- {
- gf_log ("alu", GF_LOG_ERROR,
- "invalid number format \"%s\" "
- "of \"option scheduler.alu."
- "open-files-usage.entry-"
- "threshold\"", entry_fn->data);
- return -1;
- }
- }
- else
- {
- alu_sched->entry_limit.nr_files = ALU_OPEN_FILES_USAGE_ENTRY_THRESHOLD_DEFAULT;
- }
- _threshold_fn->entry_value = get_stats_file_usage;
- exit_fn = dict_get (xl->options,
- "scheduler.alu.open-files-usage.exit-threshold");
- if (exit_fn)
- {
- if (gf_string2uint64 (exit_fn->data,
- &alu_sched->exit_limit.nr_files) != 0)
- {
- gf_log ("alu", GF_LOG_ERROR,
- "invalid number format \"%s\" "
- "of \"option scheduler.alu."
- "open-files-usage.exit-"
- "threshold\"", exit_fn->data);
- return -1;
- }
- }
- else
- {
- alu_sched->exit_limit.nr_files = ALU_OPEN_FILES_USAGE_EXIT_THRESHOLD_DEFAULT;
- }
- _threshold_fn->exit_value = get_stats_file_usage;
- tmp_threshold = alu_sched->threshold_fn;
- if (!tmp_threshold) {
- alu_sched->threshold_fn = _threshold_fn;
- }
- else {
- while (tmp_threshold->next) {
- tmp_threshold = tmp_threshold->next;
- }
- tmp_threshold->next = _threshold_fn;
- }
- gf_log ("alu", GF_LOG_DEBUG,
- "alu.c->alu_init: = %"PRIu64",%"PRIu64"",
- alu_sched->entry_limit.nr_files,
- alu_sched->exit_limit.nr_files);
-
- } else if (strcmp (order_str, "disk-speed-usage") == 0) {
- /* Disk speed */
-
- _threshold_fn = GF_CALLOC (1, sizeof (struct alu_threshold), gf_alu_mt_alu_threshold);
- ERR_ABORT (_threshold_fn);
- _threshold_fn->diff_value = get_max_diff_disk_speed;
- _threshold_fn->sched_value = get_stats_disk_speed;
- entry_fn = dict_get (xl->options,
- "scheduler.alu.disk-speed-usage.entry-threshold");
- if (entry_fn) {
- gf_log ("alu", GF_LOG_DEBUG,
- "entry-threshold is given, "
- "value is constant");
- }
- _threshold_fn->entry_value = NULL;
- exit_fn = dict_get (xl->options,
- "scheduler.alu.disk-speed-usage.exit-threshold");
- if (exit_fn) {
- gf_log ("alu", GF_LOG_DEBUG,
- "exit-threshold is given, "
- "value is constant");
- }
- _threshold_fn->exit_value = NULL;
- tmp_threshold = alu_sched->threshold_fn;
- if (!tmp_threshold) {
- alu_sched->threshold_fn = _threshold_fn;
- }
- else {
- while (tmp_threshold->next) {
- tmp_threshold = tmp_threshold->next;
- }
- tmp_threshold->next = _threshold_fn;
- }
-
- } else {
- gf_log ("alu", GF_LOG_DEBUG,
- "%s, unknown option provided to scheduler",
- order_str);
- }
- order_str = strtok_r (NULL, ":", &tmp_str);
- }
-
- return 0;
-}
-
-static int32_t
-alu_init (xlator_t *xl)
-{
- struct alu_sched *alu_sched = NULL;
- struct alu_limits *_limit_fn = NULL;
- struct alu_limits *tmp_limits = NULL;
- uint32_t min_free_disk = 0;
- data_t *limits = NULL;
-
- alu_sched = GF_CALLOC (1, sizeof (struct alu_sched),
- gf_alu_mt_alu_sched);
- ERR_ABORT (alu_sched);
-
- {
- alu_parse_options (xl, alu_sched);
- }
-
- /* Get the limits */
-
- limits = dict_get (xl->options,
- "scheduler.limits.min-free-disk");
- if (limits) {
- _limit_fn = GF_CALLOC (1, sizeof (struct alu_limits),
- gf_alu_mt_alu_limits);
- ERR_ABORT (_limit_fn);
- _limit_fn->min_value = get_stats_free_disk;
- _limit_fn->cur_value = get_stats_free_disk;
- tmp_limits = alu_sched->limits_fn ;
- _limit_fn->next = tmp_limits;
- alu_sched->limits_fn = _limit_fn;
-
- if (gf_string2percent (limits->data,
- &min_free_disk) != 0) {
- gf_log ("alu", GF_LOG_ERROR,
- "invalid number format \"%s\" "
- "of \"option scheduler.limits."
- "min-free-disk\"", limits->data);
- return -1;
- }
- alu_sched->spec_limit.free_disk = min_free_disk;
-
- if (alu_sched->spec_limit.free_disk >= 100) {
- gf_log ("alu", GF_LOG_ERROR,
- "check the \"option scheduler."
- "limits.min-free-disk\", it should "
- "be percentage value");
- return -1;
- }
- alu_sched->spec_limit.total_disk_size = ALU_LIMITS_TOTAL_DISK_SIZE_DEFAULT; /* Its in % */
- gf_log ("alu", GF_LOG_DEBUG,
- "alu.limit.min-disk-free = %"PRId64"",
- _limit_fn->cur_value (&(alu_sched->spec_limit)));
- }
-
- limits = dict_get (xl->options,
- "scheduler.limits.max-open-files");
- if (limits) {
- // Update alu_sched->priority properly
- _limit_fn = GF_CALLOC (1, sizeof (struct alu_limits),
- gf_alu_mt_alu_limits);
- ERR_ABORT (_limit_fn);
- _limit_fn->max_value = get_stats_file_usage;
- _limit_fn->cur_value = get_stats_file_usage;
- tmp_limits = alu_sched->limits_fn ;
- _limit_fn->next = tmp_limits;
- alu_sched->limits_fn = _limit_fn;
- if (gf_string2uint64_base10 (limits->data,
- &alu_sched->spec_limit.nr_files) != 0)
- {
- gf_log ("alu", GF_LOG_ERROR,
- "invalid number format '%s' of option "
- "scheduler.limits.max-open-files",
- limits->data);
- return -1;
- }
-
- gf_log ("alu", GF_LOG_DEBUG,
- "alu_init: limit.max-open-files = %"PRId64"",
- _limit_fn->cur_value (&(alu_sched->spec_limit)));
- }
-
-
- /* Stats refresh options */
- limits = dict_get (xl->options,
- "scheduler.refresh-interval");
- if (limits) {
- if (gf_string2time (limits->data,
- &alu_sched->refresh_interval) != 0) {
- gf_log ("alu", GF_LOG_ERROR,
- "invalid number format \"%s\" of "
- "option scheduler.refresh-interval",
- limits->data);
- return -1;
- }
- } else {
- alu_sched->refresh_interval = ALU_REFRESH_INTERVAL_DEFAULT;
- }
- gettimeofday (&(alu_sched->last_stat_fetch), NULL);
-
-
- limits = dict_get (xl->options,
- "scheduler.alu.stat-refresh.num-file-create");
- if (limits) {
- if (gf_string2uint32 (limits->data,
- &alu_sched->refresh_create_count) != 0)
- {
- gf_log ("alu", GF_LOG_ERROR,
- "invalid number format \"%s\" of \"option "
- "alu.stat-refresh.num-file-create\"",
- limits->data);
- return -1;
- }
- } else {
- alu_sched->refresh_create_count = ALU_REFRESH_CREATE_COUNT_DEFAULT;
- }
-
- {
- /* Build an array of child_nodes */
- struct alu_sched_struct *sched_array = NULL;
- xlator_list_t *trav_xl = xl->children;
- data_t *data = NULL;
- int32_t index = 0;
-
- while (trav_xl) {
- index++;
- trav_xl = trav_xl->next;
- }
- alu_sched->child_count = index;
- sched_array = GF_CALLOC (index, sizeof (struct alu_sched_struct), gf_alu_mt_alu_sched_struct);
- ERR_ABORT (sched_array);
- trav_xl = xl->children;
- index = 0;
- while (trav_xl) {
- sched_array[index].xl = trav_xl->xlator;
- sched_array[index].eligible = 1;
- index++;
- trav_xl = trav_xl->next;
- }
- alu_sched->array = sched_array;
-
- data = dict_get (xl->options,
- "scheduler.read-only-subvolumes");
- if (data) {
- char *child = NULL;
- char *tmp = NULL;
- char *childs_data = gf_strdup (data->data);
-
- child = strtok_r (childs_data, ",", &tmp);
- while (child) {
- for (index = 1; index < alu_sched->child_count; index++) {
- if (strcmp (alu_sched->array[index -1].xl->name, child) == 0) {
- memcpy (&(alu_sched->array[index -1]),
- &(alu_sched->array[alu_sched->child_count -1]),
- sizeof (struct alu_sched_struct));
- alu_sched->child_count--;
- break;
- }
- }
- child = strtok_r (NULL, ",", &tmp);
- }
- }
- }
-
- *((long *)xl->private) = (long)alu_sched;
-
- /* Initialize all the alu_sched structure's elements */
- {
- alu_sched->sched_nodes_pending = 0;
-
- alu_sched->min_limit.free_disk = 0x00FFFFFF;
- alu_sched->min_limit.disk_usage = 0xFFFFFFFF;
- alu_sched->min_limit.total_disk_size = 0xFFFFFFFF;
- alu_sched->min_limit.disk_speed = 0xFFFFFFFF;
- alu_sched->min_limit.write_usage = 0xFFFFFFFF;
- alu_sched->min_limit.read_usage = 0xFFFFFFFF;
- alu_sched->min_limit.nr_files = 0xFFFFFFFF;
- alu_sched->min_limit.nr_clients = 0xFFFFFFFF;
- }
-
- pthread_mutex_init (&alu_sched->alu_mutex, NULL);
- return 0;
-}
-
-static void
-alu_fini (xlator_t *xl)
-{
- if (!xl)
- return;
- struct alu_sched *alu_sched = (struct alu_sched *)*((long *)xl->private);
- struct alu_limits *limit = alu_sched->limits_fn;
- struct alu_threshold *threshold = alu_sched->threshold_fn;
- void *tmp = NULL;
- pthread_mutex_destroy (&alu_sched->alu_mutex);
- GF_FREE (alu_sched->array);
- while (limit) {
- tmp = limit;
- limit = limit->next;
- GF_FREE (tmp);
- }
- while (threshold) {
- tmp = threshold;
- threshold = threshold->next;
- GF_FREE (tmp);
- }
- GF_FREE (alu_sched);
-}
-
-static int32_t
-update_stat_array_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *xl,
- int32_t op_ret,
- int32_t op_errno,
- struct xlator_stats *trav_stats)
-{
- struct alu_sched *alu_sched = (struct alu_sched *)*((long *)xl->private);
- struct alu_limits *limits_fn = alu_sched->limits_fn;
- int32_t idx = 0;
-
- pthread_mutex_lock (&alu_sched->alu_mutex);
- for (idx = 0; idx < alu_sched->child_count; idx++) {
- if (alu_sched->array[idx].xl == (xlator_t *)cookie)
- break;
- }
- pthread_mutex_unlock (&alu_sched->alu_mutex);
-
- if (op_ret == -1) {
- alu_sched->array[idx].eligible = 0;
- } else {
- memcpy (&(alu_sched->array[idx].stats), trav_stats, sizeof (struct xlator_stats));
-
- /* Get stats from all the child node */
- /* Here check the limits specified by the user to
- consider the nodes to be used by scheduler */
- alu_sched->array[idx].eligible = 1;
- limits_fn = alu_sched->limits_fn;
- while (limits_fn){
- if (limits_fn->max_value &&
- (limits_fn->cur_value (trav_stats) >
- limits_fn->max_value (&(alu_sched->spec_limit)))) {
- alu_sched->array[idx].eligible = 0;
- }
- if (limits_fn->min_value &&
- (limits_fn->cur_value (trav_stats) <
- limits_fn->min_value (&(alu_sched->spec_limit)))) {
- alu_sched->array[idx].eligible = 0;
- }
- limits_fn = limits_fn->next;
- }
-
- /* Select minimum and maximum disk_usage */
- if (trav_stats->disk_usage > alu_sched->max_limit.disk_usage) {
- alu_sched->max_limit.disk_usage = trav_stats->disk_usage;
- }
- if (trav_stats->disk_usage < alu_sched->min_limit.disk_usage) {
- alu_sched->min_limit.disk_usage = trav_stats->disk_usage;
- }
-
- /* Select minimum and maximum disk_speed */
- if (trav_stats->disk_speed > alu_sched->max_limit.disk_speed) {
- alu_sched->max_limit.disk_speed = trav_stats->disk_speed;
- }
- if (trav_stats->disk_speed < alu_sched->min_limit.disk_speed) {
- alu_sched->min_limit.disk_speed = trav_stats->disk_speed;
- }
-
- /* Select minimum and maximum number of open files */
- if (trav_stats->nr_files > alu_sched->max_limit.nr_files) {
- alu_sched->max_limit.nr_files = trav_stats->nr_files;
- }
- if (trav_stats->nr_files < alu_sched->min_limit.nr_files) {
- alu_sched->min_limit.nr_files = trav_stats->nr_files;
- }
-
- /* Select minimum and maximum write-usage */
- if (trav_stats->write_usage > alu_sched->max_limit.write_usage) {
- alu_sched->max_limit.write_usage = trav_stats->write_usage;
- }
- if (trav_stats->write_usage < alu_sched->min_limit.write_usage) {
- alu_sched->min_limit.write_usage = trav_stats->write_usage;
- }
-
- /* Select minimum and maximum read-usage */
- if (trav_stats->read_usage > alu_sched->max_limit.read_usage) {
- alu_sched->max_limit.read_usage = trav_stats->read_usage;
- }
- if (trav_stats->read_usage < alu_sched->min_limit.read_usage) {
- alu_sched->min_limit.read_usage = trav_stats->read_usage;
- }
-
- /* Select minimum and maximum free-disk */
- if (trav_stats->free_disk > alu_sched->max_limit.free_disk) {
- alu_sched->max_limit.free_disk = trav_stats->free_disk;
- }
- if (trav_stats->free_disk < alu_sched->min_limit.free_disk) {
- alu_sched->min_limit.free_disk = trav_stats->free_disk;
- }
- }
-
- STACK_DESTROY (frame->root);
-
- return 0;
-}
-
-static void
-update_stat_array (xlator_t *xl)
-{
- /* This function schedules the file in one of the child nodes */
- struct alu_sched *alu_sched = (struct alu_sched *)*((long *)xl->private);
- int32_t idx = 0;
- call_frame_t *frame = NULL;
- call_pool_t *pool = xl->ctx->pool;
-
- for (idx = 0 ; idx < alu_sched->child_count; idx++) {
- frame = create_frame (xl, pool);
-
- STACK_WIND_COOKIE (frame,
- update_stat_array_cbk,
- alu_sched->array[idx].xl, //cookie
- alu_sched->array[idx].xl,
- (alu_sched->array[idx].xl)->mops->stats,
- 0); //flag
- }
- return;
-}
-
-static void
-alu_update (xlator_t *xl)
-{
- struct timeval tv;
- struct alu_sched *alu_sched = (struct alu_sched *)*((long *)xl->private);
-
- gettimeofday (&tv, NULL);
- if (tv.tv_sec > (alu_sched->refresh_interval + alu_sched->last_stat_fetch.tv_sec)) {
- /* Update the stats from all the server */
- update_stat_array (xl);
- alu_sched->last_stat_fetch.tv_sec = tv.tv_sec;
- }
-}
-
-static xlator_t *
-alu_scheduler (xlator_t *xl, const void *path)
-{
- /* This function schedules the file in one of the child nodes */
- struct alu_sched *alu_sched = (struct alu_sched *)*((long *)xl->private);
- int32_t sched_index = 0;
- int32_t sched_index_orig = 0;
- int32_t idx = 0;
-
- alu_update (xl);
-
- /* Now check each threshold one by one if some nodes are classified */
- {
- struct alu_threshold *trav_threshold = alu_sched->threshold_fn;
- struct alu_threshold *tmp_threshold = alu_sched->sched_method;
- struct alu_sched_node *tmp_sched_node;
-
- /* This pointer 'trav_threshold' contains function pointers according to spec file
- give by user, */
- while (trav_threshold) {
- /* This check is needed for seeing if already there are nodes in this criteria
- to be scheduled */
- if (!alu_sched->sched_nodes_pending) {
- for (idx = 0; idx < alu_sched->child_count; idx++) {
- if (!alu_sched->array[idx].eligible) {
- continue;
- }
- if (trav_threshold->entry_value) {
- if (trav_threshold->diff_value (&(alu_sched->max_limit),
- &(alu_sched->array[idx].stats)) <
- trav_threshold->entry_value (&(alu_sched->entry_limit))) {
- continue;
- }
- }
- tmp_sched_node = GF_CALLOC (1, sizeof (struct alu_sched_node), gf_alu_mt_alu_sched_node);
- ERR_ABORT (tmp_sched_node);
- tmp_sched_node->index = idx;
- if (!alu_sched->sched_node) {
- alu_sched->sched_node = tmp_sched_node;
- } else {
- pthread_mutex_lock (&alu_sched->alu_mutex);
- tmp_sched_node->next = alu_sched->sched_node;
- alu_sched->sched_node = tmp_sched_node;
- pthread_mutex_unlock (&alu_sched->alu_mutex);
- }
- alu_sched->sched_nodes_pending++;
- }
- } /* end of if (sched_nodes_pending) */
-
- /* This loop is required to check the eligible nodes */
- struct alu_sched_node *trav_sched_node;
- while (alu_sched->sched_nodes_pending) {
- trav_sched_node = alu_sched->sched_node;
- sched_index = trav_sched_node->index;
- if (alu_sched->array[sched_index].eligible)
- break;
- alu_sched->sched_node = trav_sched_node->next;
- GF_FREE (trav_sched_node);
- alu_sched->sched_nodes_pending--;
- }
- if (alu_sched->sched_nodes_pending) {
- /* There are some node in this criteria to be scheduled, no need
- * to sort and check other methods
- */
- if (tmp_threshold && tmp_threshold->exit_value) {
- /* verify the exit value && whether node is eligible or not */
- if (tmp_threshold->diff_value (&(alu_sched->max_limit),
- &(alu_sched->array[sched_index].stats)) >
- tmp_threshold->exit_value (&(alu_sched->exit_limit))) {
- /* Free the allocated info for the node :) */
- pthread_mutex_lock (&alu_sched->alu_mutex);
- alu_sched->sched_node = trav_sched_node->next;
- GF_FREE (trav_sched_node);
- trav_sched_node = alu_sched->sched_node;
- alu_sched->sched_nodes_pending--;
- pthread_mutex_unlock (&alu_sched->alu_mutex);
- }
- } else {
- /* if there is no exit value, then exit after scheduling once */
- pthread_mutex_lock (&alu_sched->alu_mutex);
- alu_sched->sched_node = trav_sched_node->next;
- GF_FREE (trav_sched_node);
- trav_sched_node = alu_sched->sched_node;
- alu_sched->sched_nodes_pending--;
- pthread_mutex_unlock (&alu_sched->alu_mutex);
- }
-
- alu_sched->sched_method = tmp_threshold; /* this is the method used for selecting */
-
- /* */
- if (trav_sched_node) {
- tmp_sched_node = trav_sched_node;
- while (trav_sched_node->next) {
- trav_sched_node = trav_sched_node->next;
- }
- if (tmp_sched_node->next) {
- pthread_mutex_lock (&alu_sched->alu_mutex);
- alu_sched->sched_node = tmp_sched_node->next;
- tmp_sched_node->next = NULL;
- trav_sched_node->next = tmp_sched_node;
- pthread_mutex_unlock (&alu_sched->alu_mutex);
- }
- }
- /* return the scheduled node */
- return alu_sched->array[sched_index].xl;
- } /* end of if (pending_nodes) */
-
- tmp_threshold = trav_threshold;
- trav_threshold = trav_threshold->next;
- }
- }
-
- /* This is used only when there is everything seems ok, or no eligible nodes */
- sched_index_orig = alu_sched->sched_index;
- alu_sched->sched_method = NULL;
- while (1) {
- //lock
- pthread_mutex_lock (&alu_sched->alu_mutex);
- sched_index = alu_sched->sched_index++;
- alu_sched->sched_index = alu_sched->sched_index % alu_sched->child_count;
- pthread_mutex_unlock (&alu_sched->alu_mutex);
- //unlock
- if (alu_sched->array[sched_index].eligible)
- break;
- if (sched_index_orig == (sched_index + 1) % alu_sched->child_count) {
- gf_log ("alu", GF_LOG_WARNING, "No node is eligible to schedule");
- //lock
- pthread_mutex_lock (&alu_sched->alu_mutex);
- alu_sched->sched_index++;
- alu_sched->sched_index = alu_sched->sched_index % alu_sched->child_count;
- pthread_mutex_unlock (&alu_sched->alu_mutex);
- //unlock
- break;
- }
- }
- return alu_sched->array[sched_index].xl;
-}
-
-/**
- * notify
- */
-void
-alu_notify (xlator_t *xl, int32_t event, void *data)
-{
- struct alu_sched *alu_sched = NULL;
- int32_t idx = 0;
-
- alu_sched = (struct alu_sched *)*((long *)xl->private);
- if (!alu_sched)
- return;
-
- for (idx = 0; idx < alu_sched->child_count; idx++) {
- if (alu_sched->array[idx].xl == (xlator_t *)data)
- break;
- }
-
- switch (event)
- {
- case GF_EVENT_CHILD_UP:
- {
- //alu_sched->array[idx].eligible = 1;
- }
- break;
- case GF_EVENT_CHILD_DOWN:
- {
- alu_sched->array[idx].eligible = 0;
- }
- break;
- default:
- {
- ;
- }
- break;
- }
-
-}
-
-int32_t
-alu_mem_acct_init (xlator_t *this)
-{
- int ret = -1;
-
- if (!this)
- return ret;
-
- ret = xlator_mem_acct_init (this, gf_alu_mt_end + 1);
-
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
- "failed");
- return ret;
- }
-
- return ret;
-}
-
-struct sched_ops sched = {
- .init = alu_init,
- .fini = alu_fini,
- .update = alu_update,
- .schedule = alu_scheduler,
- .notify = alu_notify,
- .mem_acct_init = alu_mem_acct_init,
-};
-
-struct volume_options options[] = {
- { .key = { "scheduler.alu.order", "alu.order" },
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = { "scheduler.alu.disk-usage.entry-threshold",
- "alu.disk-usage.entry-threshold" },
- .type = GF_OPTION_TYPE_SIZET
- },
- { .key = { "scheduler.alu.disk-usage.exit-threshold",
- "alu.disk-usage.exit-threshold" },
- .type = GF_OPTION_TYPE_SIZET
- },
- { .key = { "scheduler.alu.write-usage.entry-threshold",
- "alu.write-usage.entry-threshold" },
- .type = GF_OPTION_TYPE_SIZET
- },
- { .key = { "scheduler.alu.write-usage.exit-threshold",
- "alu.write-usage.exit-threshold" },
- .type = GF_OPTION_TYPE_SIZET
- },
- { .key = { "scheduler.alu.read-usage.entry-threshold",
- "alu.read-usage.entry-threshold" },
- .type = GF_OPTION_TYPE_SIZET
- },
- { .key = { "scheduler.alu.read-usage.exit-threshold",
- "alu.read-usage.exit-threshold" },
- .type = GF_OPTION_TYPE_SIZET
- },
- { .key = { "scheduler.alu.open-files-usage.entry-threshold",
- "alu.open-files-usage.entry-threshold" },
- .type = GF_OPTION_TYPE_INT
- },
- { .key = { "scheduler.alu.open-files-usage.exit-threshold",
- "alu.open-files-usage.exit-threshold" },
- .type = GF_OPTION_TYPE_INT
- },
- { .key = { "scheduler.read-only-subvolumes",
- "alu.read-only-subvolumes" },
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = { "scheduler.refresh-interval",
- "alu.refresh-interval",
- "alu.stat-refresh.interval" },
- .type = GF_OPTION_TYPE_TIME
- },
- { .key = { "scheduler.limits.min-free-disk",
- "alu.limits.min-free-disk" },
- .type = GF_OPTION_TYPE_PERCENT
- },
- { .key = { "scheduler.alu.stat-refresh.num-file-create"
- "alu.stat-refresh.num-file-create"},
- .type = GF_OPTION_TYPE_INT
- },
- { .key = {NULL}, }
-};
diff --git a/scheduler/alu/src/alu.h b/scheduler/alu/src/alu.h
deleted file mode 100644
index c716ad8e5e7..00000000000
--- a/scheduler/alu/src/alu.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _ALU_H
-#define _ALU_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "scheduler.h"
-
-struct alu_sched;
-
-struct alu_sched_struct {
- xlator_t *xl;
- struct xlator_stats stats;
- unsigned char eligible;
-};
-
-// Write better name for these functions
-struct alu_limits {
- struct alu_limits *next;
- int64_t (*max_value) (struct xlator_stats *); /* Max limit, specified by the user */
- int64_t (*min_value) (struct xlator_stats *); /* Min limit, specified by the user */
- int64_t (*cur_value) (struct xlator_stats *); /* Current values of variables got from stats call */
-};
-
-struct alu_threshold {
- struct alu_threshold *next;
- int64_t (*diff_value) (struct xlator_stats *max, struct xlator_stats *min); /* Diff b/w max and min */
- int64_t (*entry_value) (struct xlator_stats *); /* Limit specified user */
- int64_t (*exit_value) (struct xlator_stats *); /* Exit point for the limit */
- int64_t (*sched_value) (struct xlator_stats *); /* This will return the index of the child area */
-};
-
-struct alu_sched_node {
- struct alu_sched_node *next;
- int32_t index;
-};
-
-struct alu_sched {
- struct alu_limits *limits_fn;
- struct alu_threshold *threshold_fn;
- struct alu_sched_struct *array;
- struct alu_sched_node *sched_node;
- struct alu_threshold *sched_method;
- struct xlator_stats max_limit;
- struct xlator_stats min_limit;
- struct xlator_stats entry_limit;
- struct xlator_stats exit_limit;
- struct xlator_stats spec_limit; /* User given limit */
-
- pthread_mutex_t alu_mutex;
- struct timeval last_stat_fetch;
- uint32_t refresh_interval; /* in seconds */
- uint32_t refresh_create_count; /* num-file-create */
-
- int32_t sched_nodes_pending;
-
- int32_t sched_index; /* used for round robin scheduling in case of many nodes getting the criteria match. */
- int32_t child_count;
-
-};
-
-struct _alu_local_t {
- int32_t call_count;
-};
-
-typedef struct _alu_local_t alu_local_t;
-
-#endif /* _ALU_H */
diff --git a/scheduler/nufa/src/Makefile.am b/scheduler/nufa/src/Makefile.am
deleted file mode 100644
index 6eb3d39f1e2..00000000000
--- a/scheduler/nufa/src/Makefile.am
+++ /dev/null
@@ -1,12 +0,0 @@
-sched_LTLIBRARIES = nufa.la
-scheddir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/scheduler
-
-nufa_la_LDFLAGS = -module -avoidversion
-
-nufa_la_SOURCES = nufa.c
-nufa_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
-
-CLEANFILES =
diff --git a/scheduler/nufa/src/nufa-mem-types.h b/scheduler/nufa/src/nufa-mem-types.h
deleted file mode 100644
index 945dafa7c31..00000000000
--- a/scheduler/nufa/src/nufa-mem-types.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-#ifndef __NUFA_MEM_TYPES_H__
-#define __NUFA_MEM_TYPES_H__
-
-#include "mem-types.h"
-
-enum gf_locks_mem_types_ {
- gf_nufa_mt_nufa_struct = gf_common_mt_end + 1,
- gf_nufa_mt_nufa_sched_struct,
- gf_nufa_mt_int32_t,
- gf_nufa_mt_end
-};
-#endif
-
diff --git a/scheduler/nufa/src/nufa.c b/scheduler/nufa/src/nufa.c
deleted file mode 100644
index 1bf477bdc2c..00000000000
--- a/scheduler/nufa/src/nufa.c
+++ /dev/null
@@ -1,429 +0,0 @@
-/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <sys/time.h>
-
-#include "scheduler.h"
-#include "common-utils.h"
-#include "nufa-mem-types.h"
-
-struct nufa_sched_struct {
- xlator_t *xl;
- struct timeval last_stat_fetch;
- int64_t free_disk;
- int32_t refresh_interval;
- unsigned char eligible;
-};
-
-struct nufa_struct {
- struct nufa_sched_struct *array;
- struct timeval last_stat_fetch;
-
- int32_t *local_array; /* Used to keep the index of the local xlators */
- int32_t local_xl_index; /* index in the above array */
- int32_t local_xl_count; /* Count of the local subvolumes */
-
- uint32_t refresh_interval;
- uint32_t min_free_disk;
-
- gf_lock_t nufa_lock;
- int32_t child_count;
- int32_t sched_index;
-};
-
-#define NUFA_LIMITS_MIN_FREE_DISK_DEFAULT 15
-#define NUFA_REFRESH_INTERVAL_DEFAULT 30
-
-static int32_t
-nufa_init (xlator_t *xl)
-{
- int32_t index = 0;
- data_t *local_name = NULL;
- data_t *data = NULL;
- xlator_list_t *trav_xl = xl->children;
- struct nufa_struct *nufa_buf = NULL;
-
- nufa_buf = GF_CALLOC (1, sizeof (struct nufa_struct),
- gf_nufa_mt_nufa_struct);
- ERR_ABORT (nufa_buf);
-
- data = dict_get (xl->options, "scheduler.limits.min-free-disk");
- if (data) {
- if (gf_string2percent (data->data,
- &nufa_buf->min_free_disk) != 0) {
- gf_log ("nufa", GF_LOG_ERROR,
- "invalid number format \"%s\" of "
- "\"option scheduler.limits.min-free-disk\"",
- data->data);
- return -1;
- }
- if (nufa_buf->min_free_disk >= 100) {
- gf_log ("nufa", GF_LOG_ERROR,
- "check \"option scheduler.limits.min-free-disk"
- "\", it should be percentage value");
- return -1;
- }
- } else {
- gf_log ("nufa", GF_LOG_WARNING,
- "No option for limit min-free-disk given, "
- "defaulting it to 15%%");
- nufa_buf->min_free_disk = NUFA_LIMITS_MIN_FREE_DISK_DEFAULT;
- }
- data = dict_get (xl->options, "scheduler.refresh-interval");
- if (data != NULL) {
- if (gf_string2time (data->data,
- &nufa_buf->refresh_interval) != 0) {
- gf_log ("nufa", GF_LOG_ERROR,
- "invalid number format \"%s\" of "
- "\"option scheduler.refresh-interval\"",
- data->data);
- return -1;
- }
- } else {
- gf_log ("nufa", GF_LOG_WARNING,
- "No option for scheduler.refresh-interval given, "
- "defaulting it to 30");
- nufa_buf->refresh_interval = NUFA_REFRESH_INTERVAL_DEFAULT;
- }
-
- /* Get the array built */
- while (trav_xl) {
- index++;
- trav_xl = trav_xl->next;
- }
- nufa_buf->child_count = index;
- nufa_buf->sched_index = 0;
- nufa_buf->array = GF_CALLOC (index, sizeof (struct nufa_sched_struct),
- gf_nufa_mt_nufa_sched_struct);
- ERR_ABORT (nufa_buf->array);
- nufa_buf->local_array = GF_CALLOC (index, sizeof (int32_t),
- gf_nufa_mt_int32_t);
- ERR_ABORT (nufa_buf->array);
- trav_xl = xl->children;
-
- local_name = dict_get (xl->options, "scheduler.local-volume-name");
- if (!local_name) {
- /* Error */
- gf_log ("nufa", GF_LOG_ERROR,
- "No 'local-volume-name' option given in volume file");
- GF_FREE (nufa_buf->array);
- GF_FREE (nufa_buf->local_array);
- GF_FREE (nufa_buf);
- return -1;
- }
-
- /* Get the array properly */
- index = 0;
- trav_xl = xl->children;
- while (trav_xl) {
- nufa_buf->array[index].xl = trav_xl->xlator;
- nufa_buf->array[index].eligible = 1;
- nufa_buf->array[index].free_disk = nufa_buf->min_free_disk;
- nufa_buf->array[index].refresh_interval =
- nufa_buf->refresh_interval;
-
- trav_xl = trav_xl->next;
- index++;
- }
-
- {
- int32_t array_index = 0;
- char *child = NULL;
- char *tmp = NULL;
- char *childs_data = gf_strdup (local_name->data);
-
- child = strtok_r (childs_data, ",", &tmp);
- while (child) {
- /* Check if the local_volume specified is proper
- subvolume of unify */
- trav_xl = xl->children;
- index=0;
- while (trav_xl) {
- if (strcmp (child, trav_xl->xlator->name) == 0)
- break;
- trav_xl = trav_xl->next;
- index++;
- }
-
- if (!trav_xl) {
- /* entry for 'local-volume-name' is wrong,
- not present in subvolumes */
- gf_log ("nufa", GF_LOG_ERROR,
- "option 'scheduler.local-volume-name' "
- "%s is wrong", child);
- GF_FREE (nufa_buf->array);
- GF_FREE (nufa_buf->local_array);
- GF_FREE (nufa_buf);
- return -1;
- } else {
- nufa_buf->local_array[array_index++] = index;
- nufa_buf->local_xl_count++;
- }
- child = strtok_r (NULL, ",", &tmp);
- }
- GF_FREE (childs_data);
- }
-
- LOCK_INIT (&nufa_buf->nufa_lock);
- *((long *)xl->private) = (long)nufa_buf; // put it at the proper place
- return 0;
-}
-
-static void
-nufa_fini (xlator_t *xl)
-{
- struct nufa_struct *nufa_buf =
- (struct nufa_struct *)*((long *)xl->private);
-
- LOCK_DESTROY (&nufa_buf->nufa_lock);
- GF_FREE (nufa_buf->local_array);
- GF_FREE (nufa_buf->array);
- GF_FREE (nufa_buf);
-}
-
-static int32_t
-update_stat_array_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *xl,
- int32_t op_ret,
- int32_t op_errno,
- struct xlator_stats *trav_stats)
-{
- struct nufa_struct *nufa_struct = NULL;
- int32_t idx = 0;
- int32_t percent = 0;
-
- nufa_struct = (struct nufa_struct *)*((long *)xl->private);
- LOCK (&nufa_struct->nufa_lock);
- for (idx = 0; idx < nufa_struct->child_count; idx++) {
- if (nufa_struct->array[idx].xl->name == (char *)cookie)
- break;
- }
- UNLOCK (&nufa_struct->nufa_lock);
-
- if (op_ret == 0) {
- percent = ((trav_stats->free_disk * 100) /
- trav_stats->total_disk_size);
- if (nufa_struct->array[idx].free_disk > percent) {
- if (nufa_struct->array[idx].eligible)
- gf_log ("nufa", GF_LOG_CRITICAL,
- "node \"%s\" is _almost_ (%d %%) full",
- nufa_struct->array[idx].xl->name,
- 100 - percent);
- nufa_struct->array[idx].eligible = 0;
- } else {
- nufa_struct->array[idx].eligible = 1;
- }
- } else {
- nufa_struct->array[idx].eligible = 0;
- }
-
- STACK_DESTROY (frame->root);
- return 0;
-}
-
-static void
-update_stat_array (xlator_t *xl)
-{
- /* This function schedules the file in one of the child nodes */
- int32_t idx;
- struct nufa_struct *nufa_buf =
- (struct nufa_struct *)*((long *)xl->private);
- call_frame_t *frame = NULL;
- call_pool_t *pool = xl->ctx->pool;
-
- for (idx = 0; idx < nufa_buf->child_count; idx++) {
- frame = create_frame (xl, pool);
-
- STACK_WIND_COOKIE (frame,
- update_stat_array_cbk,
- nufa_buf->array[idx].xl->name,
- nufa_buf->array[idx].xl,
- (nufa_buf->array[idx].xl)->mops->stats,
- 0); //flag
- }
-
- return;
-}
-
-static void
-nufa_update (xlator_t *xl)
-{
- struct nufa_struct *nufa_buf =
- (struct nufa_struct *)*((long *)xl->private);
- struct timeval tv;
- gettimeofday (&tv, NULL);
- if (tv.tv_sec > (nufa_buf->refresh_interval
- + nufa_buf->last_stat_fetch.tv_sec)) {
- /* Update the stats from all the server */
- update_stat_array (xl);
- nufa_buf->last_stat_fetch.tv_sec = tv.tv_sec;
- }
-}
-
-static xlator_t *
-nufa_schedule (xlator_t *xl, const void *path)
-{
- struct nufa_struct *nufa_buf =
- (struct nufa_struct *)*((long *)xl->private);
- int32_t nufa_orig = nufa_buf->local_xl_index;
- int32_t rr;
-
- nufa_update (xl);
-
- while (1) {
- LOCK (&nufa_buf->nufa_lock);
- rr = nufa_buf->local_xl_index++;
- nufa_buf->local_xl_index %= nufa_buf->local_xl_count;
- UNLOCK (&nufa_buf->nufa_lock);
-
- /* if 'eligible' or there are _no_ eligible nodes */
- if (nufa_buf->array[nufa_buf->local_array[rr]].eligible) {
- /* Return the local node */
- return nufa_buf->array[nufa_buf->local_array[rr]].xl;
- }
- if ((rr + 1) % nufa_buf->local_xl_count == nufa_orig) {
- gf_log ("nufa", GF_LOG_CRITICAL,
- "No free space available on any local "
- "volumes, using RR scheduler");
- LOCK (&nufa_buf->nufa_lock);
- nufa_buf->local_xl_index++;
- nufa_buf->local_xl_index %= nufa_buf->local_xl_count;
- UNLOCK (&nufa_buf->nufa_lock);
- break;
- }
- }
-
- nufa_orig = nufa_buf->sched_index;
- while (1) {
- LOCK (&nufa_buf->nufa_lock);
- rr = nufa_buf->sched_index++;
- nufa_buf->sched_index = (nufa_buf->sched_index %
- nufa_buf->child_count);
- UNLOCK (&nufa_buf->nufa_lock);
-
- /* if 'eligible' or there are _no_ eligible nodes */
- if (nufa_buf->array[rr].eligible) {
- break;
- }
- if ((rr + 1) % nufa_buf->child_count == nufa_orig) {
- gf_log ("nufa", GF_LOG_CRITICAL,
- "No free space available on any server, "
- "using RR scheduler.");
- LOCK (&nufa_buf->nufa_lock);
- nufa_buf->sched_index++;
- nufa_buf->sched_index = (nufa_buf->sched_index %
- nufa_buf->child_count);
- UNLOCK (&nufa_buf->nufa_lock);
- break;
- }
- }
- return nufa_buf->array[rr].xl;
-}
-
-
-/**
- * notify
- */
-void
-nufa_notify (xlator_t *xl, int32_t event, void *data)
-{
- struct nufa_struct *nufa_buf =
- (struct nufa_struct *)*((long *)xl->private);
- int32_t idx = 0;
-
- if (!nufa_buf)
- return;
-
- for (idx = 0; idx < nufa_buf->child_count; idx++) {
- if (strcmp (nufa_buf->array[idx].xl->name,
- ((xlator_t *)data)->name) == 0)
- break;
- }
-
- switch (event)
- {
- case GF_EVENT_CHILD_UP:
- {
- //nufa_buf->array[idx].eligible = 1;
- }
- break;
- case GF_EVENT_CHILD_DOWN:
- {
- nufa_buf->array[idx].eligible = 0;
- }
- break;
- default:
- {
- ;
- }
- break;
- }
-
-}
-
-int32_t
-nufa_mem_acct_init (xlator_t *this)
-{
- int ret = -1;
-
- if (!this)
- return ret;
-
- ret = xlator_mem_acct_init (this, gf_nufa_mt_end + 1);
-
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
- "failed");
- return ret;
- }
-
- return ret;
-}
-
-struct sched_ops sched = {
- .init = nufa_init,
- .fini = nufa_fini,
- .update = nufa_update,
- .schedule = nufa_schedule,
- .notify = nufa_notify,
- .mem_acct_init = nufa_mem_acct_init,
-};
-
-struct volume_options options[] = {
- { .key = { "scheduler.refresh-interval",
- "nufa.refresh-interval" },
- .type = GF_OPTION_TYPE_TIME
- },
- { .key = { "scheduler.limits.min-free-disk",
- "nufa.limits.min-free-disk" },
- .type = GF_OPTION_TYPE_PERCENT
- },
- { .key = { "scheduler.local-volume-name",
- "nufa.local-volume-name" },
- .type = GF_OPTION_TYPE_XLATOR
- },
- { .key = {NULL} }
-};
-
diff --git a/scheduler/random/src/Makefile.am b/scheduler/random/src/Makefile.am
deleted file mode 100644
index 572181336c2..00000000000
--- a/scheduler/random/src/Makefile.am
+++ /dev/null
@@ -1,14 +0,0 @@
-sched_LTLIBRARIES = random.la
-scheddir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/scheduler
-
-random_la_LDFLAGS = -module -avoidversion
-
-random_la_SOURCES = random.c
-random_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-noinst_HEADERS = random.h
-
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
-
-CLEANFILES =
diff --git a/scheduler/random/src/random-mem-types.h b/scheduler/random/src/random-mem-types.h
deleted file mode 100644
index ff30d9244fc..00000000000
--- a/scheduler/random/src/random-mem-types.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-#ifndef __RANDOM_MEM_TYPES_H__
-#define __RANDOM_MEM_TYPES_H__
-
-#include "mem-types.h"
-
-enum gf_random_mem_types_ {
- gf_random_mt_random_struct = gf_common_mt_end + 1,
- gf_random_mt_random_sched_struct,
- gf_random_mt_end
-};
-#endif
-
diff --git a/scheduler/random/src/random.c b/scheduler/random/src/random.c
deleted file mode 100644
index 03a284b66bd..00000000000
--- a/scheduler/random/src/random.c
+++ /dev/null
@@ -1,305 +0,0 @@
-/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include <stdlib.h>
-#include <sys/time.h>
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "random.h"
-#include "random-mem-types.h"
-
-#define RANDOM_LIMITS_MIN_FREE_DISK_DEFAULT 15
-#define RANDOM_REFRESH_INTERVAL_DEFAULT 10
-
-
-static int32_t
-random_init (xlator_t *xl)
-{
- struct random_struct *random_buf = NULL;
- xlator_list_t *trav_xl = xl->children;
- data_t *limit = NULL;
- int32_t index = 0;
-
- random_buf = GF_CALLOC (1, sizeof (struct random_struct),
- gf_random_mt_random_struct);
- ERR_ABORT (random_buf);
-
- /* Set the seed for the 'random' function */
- srandom ((uint32_t) time (NULL));
-
- limit = dict_get (xl->options, "scheduler.limits.min-free-disk");
- if (limit) {
- if (gf_string2percent (data_to_str (limit),
- &random_buf->min_free_disk) != 0) {
- gf_log ("random", GF_LOG_ERROR,
- "invalid number format \"%s\" of \"option "
- "scheduler.limits.min-free-disk\"",
- limit->data);
- return -1;
- }
- if (random_buf->min_free_disk >= 100) {
- gf_log ("random", GF_LOG_ERROR,
- "check the \"option random.limits.min-free"
- "-disk\", it should be percentage value");
- return -1;
- }
-
- } else {
- gf_log ("random", GF_LOG_WARNING,
- "No option for limit min-free-disk given, "
- "defaulting it to 5%%");
- random_buf->min_free_disk =
- RANDOM_LIMITS_MIN_FREE_DISK_DEFAULT;
- }
-
- limit = dict_get (xl->options, "scheduler.refresh-interval");
- if (limit) {
- if (gf_string2time (data_to_str (limit),
- &random_buf->refresh_interval) != 0) {
- gf_log ("random", GF_LOG_ERROR,
- "invalid number format \"%s\" of "
- "\"option random.refresh-interval\"",
- limit->data);
- return -1;
- }
- } else {
- random_buf->refresh_interval = RANDOM_REFRESH_INTERVAL_DEFAULT;
- }
-
- while (trav_xl) {
- index++;
- trav_xl = trav_xl->next;
- }
- random_buf->child_count = index;
- random_buf->array = GF_CALLOC (index,
- sizeof (struct random_sched_struct),
- gf_random_mt_random_sched_struct);
- ERR_ABORT (random_buf->array);
- trav_xl = xl->children;
- index = 0;
-
- while (trav_xl) {
- random_buf->array[index].xl = trav_xl->xlator;
- random_buf->array[index].eligible = 1;
- trav_xl = trav_xl->next;
- index++;
- }
- pthread_mutex_init (&random_buf->random_mutex, NULL);
-
- // put it at the proper place
- *((long *)xl->private) = (long)random_buf;
- return 0;
-}
-
-static void
-random_fini (xlator_t *xl)
-{
- struct random_struct *random_buf = NULL;
-
- random_buf = (struct random_struct *)*((long *)xl->private);
- pthread_mutex_destroy (&random_buf->random_mutex);
- GF_FREE (random_buf->array);
- GF_FREE (random_buf);
-}
-
-
-static int32_t
-update_stat_array_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *xl,
- int32_t op_ret,
- int32_t op_errno,
- struct xlator_stats *trav_stats)
-{
- int32_t idx = 0;
- int32_t percent = 0;
- struct random_struct *random_buf = NULL;
-
- random_buf = (struct random_struct *)*((long *)xl->private);
-
- pthread_mutex_lock (&random_buf->random_mutex);
- for (idx = 0; idx < random_buf->child_count; idx++) {
- if (strcmp (random_buf->array[idx].xl->name,
- (char *)cookie) == 0)
- break;
- }
- pthread_mutex_unlock (&random_buf->random_mutex);
-
- if (op_ret == 0) {
- percent = ((trav_stats->free_disk *100) /
- trav_stats->total_disk_size);
- if (random_buf->min_free_disk > percent) {
- random_buf->array[idx].eligible = 0;
- } else {
- random_buf->array[idx].eligible = 1;
- }
- } else {
- random_buf->array[idx].eligible = 0;
- }
-
- STACK_DESTROY (frame->root);
- return 0;
-}
-
-static void
-update_stat_array (xlator_t *xl)
-{
- int32_t idx;
- struct random_struct *random_buf = NULL;
- call_frame_t *frame = NULL;
- call_pool_t *pool = xl->ctx->pool;
-
- random_buf = (struct random_struct *)*((long *)xl->private);
- for (idx = 0; idx < random_buf->child_count; idx++) {
- frame = create_frame (xl, pool);
-
- STACK_WIND_COOKIE (frame,
- update_stat_array_cbk,
- random_buf->array[idx].xl->name,
- random_buf->array[idx].xl,
- (random_buf->array[idx].xl)->mops->stats,
- 0);
- }
- return ;
-}
-
-static void
-random_update (xlator_t *xl)
-{
- struct timeval tv;
- struct random_struct *random_buf = NULL;
-
- random_buf = (struct random_struct *)*((long *)xl->private);
-
- gettimeofday(&tv, NULL);
- if (tv.tv_sec > (random_buf->refresh_interval +
- random_buf->last_stat_entry.tv_sec)) {
- update_stat_array (xl);
- random_buf->last_stat_entry.tv_sec = tv.tv_sec;
- }
-}
-
-static xlator_t *
-random_schedule (xlator_t *xl, const void *path)
-{
- struct random_struct *random_buf = NULL;
- int32_t rand = 0;
- int32_t try = 0;
-
- random_buf = (struct random_struct *)*((long *)xl->private);
-
- rand = (int32_t) (1.0*random_buf->child_count *
- (random() / (RAND_MAX + 1.0)));
-
- random_update (xl);
-
- while (!random_buf->array[rand].eligible) {
- if (try++ > 100) {
- /* there is a chance of this becoming a
- infinite loop otherwise. */
- break;
- }
- rand = (int32_t) (1.0*random_buf->child_count *
- (random() / (RAND_MAX + 1.0)));
- }
- return random_buf->array[rand].xl;
-}
-
-int32_t
-random_mem_acct_init (xlator_t *this)
-{
- int ret = -1;
-
- if (!this)
- return ret;
-
- ret = xlator_mem_acct_init (this, gf_random_mt_end + 1);
-
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
- " failed");
- return ret;
- }
-
- return ret;
-}
-
-/**
- * notify
- */
-void
-random_notify (xlator_t *xl, int32_t event, void *data)
-{
- struct random_struct *random_buf = NULL;
- int32_t idx = 0;
-
- random_buf = (struct random_struct *)*((long *)xl->private);
- if (!random_buf)
- return;
-
- for (idx = 0; idx < random_buf->child_count; idx++) {
- if (random_buf->array[idx].xl == (xlator_t *)data)
- break;
- }
-
- switch (event)
- {
- case GF_EVENT_CHILD_UP:
- {
- //random_buf->array[idx].eligible = 1;
- }
- break;
- case GF_EVENT_CHILD_DOWN:
- {
- random_buf->array[idx].eligible = 0;
- }
- break;
- default:
- {
- ;
- }
- break;
- }
-
-}
-
-struct sched_ops sched = {
- .init = random_init,
- .fini = random_fini,
- .update = random_update,
- .schedule = random_schedule,
- .notify = random_notify,
- .mem_acct_init = random_mem_acct_init,
-};
-
-struct volume_options options[] = {
- { .key = { "scheduler.refresh-interval",
- "random.refresh-interval" },
- .type = GF_OPTION_TYPE_TIME
- },
- { .key = { "scheduler.limits.min-free-disk",
- "random.limits.min-free-disk" },
- .type = GF_OPTION_TYPE_PERCENT
- },
- { .key = {NULL} }
-};
diff --git a/scheduler/random/src/random.h b/scheduler/random/src/random.h
deleted file mode 100644
index 154b1bdfb24..00000000000
--- a/scheduler/random/src/random.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _RANDOM_H
-#define _RANDOM_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-
-#include <sys/time.h>
-#include "scheduler.h"
-
-struct random_sched_struct {
- xlator_t *xl;
- unsigned char eligible;
-};
-
-struct random_struct {
- int32_t child_count;
- uint32_t refresh_interval;
- uint32_t min_free_disk;
- struct timeval last_stat_entry;
- struct random_sched_struct *array;
- pthread_mutex_t random_mutex;
-};
-
-#endif /* _RANDOM_H */
diff --git a/scheduler/rr/Makefile.am b/scheduler/rr/Makefile.am
deleted file mode 100644
index d471a3f9243..00000000000
--- a/scheduler/rr/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = src
-
-CLEANFILES =
diff --git a/scheduler/rr/src/Makefile.am b/scheduler/rr/src/Makefile.am
deleted file mode 100644
index 7e911c0eda8..00000000000
--- a/scheduler/rr/src/Makefile.am
+++ /dev/null
@@ -1,13 +0,0 @@
-sched_LTLIBRARIES = rr.la
-scheddir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/scheduler
-
-rr_la_LDFLAGS = -module -avoidversion
-
-rr_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-rr_la_SOURCES = rr.c rr-options.c
-noinst_HEADERS = rr.h rr-options.h
-
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
-
-CLEANFILES =
diff --git a/scheduler/rr/src/rr-mem-types.h b/scheduler/rr/src/rr-mem-types.h
deleted file mode 100644
index cb05323cd77..00000000000
--- a/scheduler/rr/src/rr-mem-types.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-#ifndef __RR_MEM_TYPES_H__
-#define __RR_MEM_TYPES_H__
-
-#include "mem-types.h"
-
-enum gf_rr_mem_types_ {
- gf_rr_mt_rr_t = gf_common_mt_end + 1,
- gf_rr_mt_rr_subvolume_t,
- gf_rr_mt_end
-};
-#endif
-
diff --git a/scheduler/rr/src/rr-options.c b/scheduler/rr/src/rr-options.c
deleted file mode 100644
index 505b1713e33..00000000000
--- a/scheduler/rr/src/rr-options.c
+++ /dev/null
@@ -1,256 +0,0 @@
-/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include "scheduler.h"
-#include "rr-options.h"
-
-#define RR_LIMITS_MIN_FREE_DISK_OPTION_STRING "scheduler.limits.min-free-disk"
-#define RR_LIMITS_MIN_FREE_DISK_VALUE_DEFAULT 15
-#define RR_LIMITS_MIN_FREE_DISK_VALUE_MIN 0
-#define RR_LIMITS_MIN_FREE_DISK_VALUE_MAX 100
-
-#define RR_REFRESH_INTERVAL_OPTION_STRING "scheduler.refresh-interval"
-#define RR_REFRESH_INTERVAL_VALUE_DEFAULT 10
-
-#define RR_READ_ONLY_SUBVOLUMES_OPTION_STRING "scheduler.read-only-subvolumes"
-
-#define LOG_ERROR(args...) gf_log ("rr-options", GF_LOG_ERROR, ##args)
-#define LOG_WARNING(args...) gf_log ("rr-options", GF_LOG_WARNING, ##args)
-
-static int
-_rr_options_min_free_disk_validate (const char *value_string, uint32_t *n)
-{
- uint32_t value = 0;
-
- if (value_string == NULL)
- {
- return -1;
- }
-
- if (gf_string2percent (value_string, &value) != 0)
- {
- gf_log ("rr",
- GF_LOG_ERROR,
- "invalid number format [%s] of option [%s]",
- value_string,
- RR_LIMITS_MIN_FREE_DISK_OPTION_STRING);
- return -1;
- }
-
- if ((value <= RR_LIMITS_MIN_FREE_DISK_VALUE_MIN) ||
- (value >= RR_LIMITS_MIN_FREE_DISK_VALUE_MAX))
- {
- gf_log ("rr",
- GF_LOG_ERROR,
- "out of range [%d] of option [%s]. Allowed range is 0 to 100.",
- value,
- RR_LIMITS_MIN_FREE_DISK_OPTION_STRING);
- return -1;
- }
-
- *n = value;
-
- return 0;
-}
-
-static int
-_rr_options_refresh_interval_validate (const char *value_string, uint32_t *n)
-{
- uint32_t value = 0;
-
- if (value_string == NULL)
- {
- return -1;
- }
-
- if (gf_string2time (value_string, &value) != 0)
- {
- gf_log ("rr",
- GF_LOG_ERROR,
- "invalid number format [%s] of option [%s]",
- value_string,
- RR_REFRESH_INTERVAL_OPTION_STRING);
- return -1;
- }
-
- *n = value;
-
- return 0;
-}
-
-static int
-_rr_options_read_only_subvolumes_validate (const char *value_string,
- char ***volume_list,
- uint64_t *volume_count)
-{
- char **vlist = NULL;
- int vcount = 0;
- int i = 0;
-
- if (value_string == NULL || volume_list == NULL || volume_count)
- {
- return -1;
- }
-
- if (gf_strsplit (value_string,
- ", ",
- &vlist,
- &vcount) != 0)
- {
- gf_log ("rr",
- GF_LOG_ERROR,
- "invalid subvolume list [%s] of option [%s]",
- value_string,
- RR_READ_ONLY_SUBVOLUMES_OPTION_STRING);
- return -1;
- }
-
- for (i = 0; i < vcount; i++)
- {
- if (gf_volume_name_validate (vlist[i]) != 0)
- {
- gf_log ("rr",
- GF_LOG_ERROR,
- "invalid subvolume name [%s] in [%s] of option [%s]",
- vlist[i],
- value_string,
- RR_READ_ONLY_SUBVOLUMES_OPTION_STRING);
- goto free_exit;
- }
- }
-
- *volume_list = vlist;
- *volume_count = vcount;
-
- return 0;
-
- free_exit:
- for (i = 0; i < vcount; i++)
- {
- GF_FREE (vlist[i]);
- }
- GF_FREE (vlist);
-
- return -1;
-}
-
-int
-rr_options_validate (dict_t *options, rr_options_t *rr_options)
-{
- char *value_string = NULL;
-
- if (options == NULL || rr_options == NULL)
- {
- return -1;
- }
-
- if (dict_get (options, RR_LIMITS_MIN_FREE_DISK_OPTION_STRING))
- if (data_to_str (dict_get (options, RR_LIMITS_MIN_FREE_DISK_OPTION_STRING)))
- value_string = data_to_str (dict_get (options,
- RR_LIMITS_MIN_FREE_DISK_OPTION_STRING));
- if (value_string != NULL)
- {
- if (_rr_options_min_free_disk_validate (value_string,
- &rr_options->min_free_disk) != 0)
- {
- return -1;
- }
-
- gf_log ("rr",
- GF_LOG_WARNING,
- "using %s = %d",
- RR_LIMITS_MIN_FREE_DISK_OPTION_STRING,
- rr_options->min_free_disk);
- }
- else
- {
- rr_options->min_free_disk = RR_LIMITS_MIN_FREE_DISK_VALUE_DEFAULT;
-
- gf_log ("rr", GF_LOG_DEBUG,
- "using %s = %d [default]",
- RR_LIMITS_MIN_FREE_DISK_OPTION_STRING,
- rr_options->min_free_disk);
- }
-
- value_string = NULL;
- if (dict_get (options, RR_REFRESH_INTERVAL_OPTION_STRING))
- value_string = data_to_str (dict_get (options,
- RR_REFRESH_INTERVAL_OPTION_STRING));
- if (value_string != NULL)
- {
- if (_rr_options_refresh_interval_validate (value_string,
- &rr_options->refresh_interval) != 0)
- {
- return -1;
- }
-
- gf_log ("rr",
- GF_LOG_WARNING,
- "using %s = %d",
- RR_REFRESH_INTERVAL_OPTION_STRING,
- rr_options->refresh_interval);
- }
- else
- {
- rr_options->refresh_interval = RR_REFRESH_INTERVAL_VALUE_DEFAULT;
-
- gf_log ("rr", GF_LOG_DEBUG,
- "using %s = %d [default]",
- RR_REFRESH_INTERVAL_OPTION_STRING,
- rr_options->refresh_interval);
- }
-
- value_string = NULL;
- if (dict_get (options, RR_READ_ONLY_SUBVOLUMES_OPTION_STRING))
- value_string = data_to_str (dict_get (options,
- RR_READ_ONLY_SUBVOLUMES_OPTION_STRING));
- if (value_string != NULL)
- {
- if (_rr_options_read_only_subvolumes_validate (value_string,
- &rr_options->read_only_subvolume_list,
- &rr_options->read_only_subvolume_count) != 0)
- {
- return -1;
- }
-
- gf_log ("rr",
- GF_LOG_WARNING,
- "using %s = [%s]",
- RR_READ_ONLY_SUBVOLUMES_OPTION_STRING,
- value_string);
- }
-
- return 0;
-}
-
-struct volume_options options[] = {
- { .key = { "scheduler.refresh-interval",
- "rr.refresh-interval" },
- .type = GF_OPTION_TYPE_TIME
- },
- { .key = { "scheduler.limits.min-free-disk",
- "rr.limits.min-free-disk" },
- .type = GF_OPTION_TYPE_PERCENT
- },
- { .key = { "scheduler.read-only-subvolumes",
- "rr.read-only-subvolumes" },
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = {NULL} }
-};
diff --git a/scheduler/rr/src/rr-options.h b/scheduler/rr/src/rr-options.h
deleted file mode 100644
index 1b0a1ef3d90..00000000000
--- a/scheduler/rr/src/rr-options.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _RR_OPTIONS_H
-#define _RR_OPTIONS_H
-
-struct rr_options
-{
- uint32_t min_free_disk;
- uint32_t refresh_interval;
- char **read_only_subvolume_list;
- uint64_t read_only_subvolume_count;
-};
-typedef struct rr_options rr_options_t;
-
-int rr_options_validate (dict_t *options, rr_options_t *rr_options);
-
-#endif
diff --git a/scheduler/rr/src/rr.c b/scheduler/rr/src/rr.c
deleted file mode 100644
index e7b556e671b..00000000000
--- a/scheduler/rr/src/rr.c
+++ /dev/null
@@ -1,567 +0,0 @@
-/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <sys/time.h>
-#include <stdlib.h>
-
-#include <stdint.h>
-
-#include "scheduler.h"
-
-#include "rr-options.h"
-#include "rr.h"
-#include "rr-mem-types.h"
-
-#define RR_MIN_FREE_DISK_NOT_REACHED 0
-#define RR_MIN_FREE_DISK_REACHED 1
-
-#define RR_SUBVOLUME_OFFLINE 0
-#define RR_SUBVOLUME_ONLINE 1
-
-#define LOG_ERROR(args...) gf_log ("rr", GF_LOG_ERROR, ##args)
-#define LOG_WARNING(args...) gf_log ("rr", GF_LOG_WARNING, ##args)
-#define LOG_CRITICAL(args...) gf_log ("rr", GF_LOG_CRITICAL, ##args)
-
-#define ROUND_ROBIN(index, count) ((index + 1) % count)
-
-static int
-_cleanup_rr (rr_t *rr)
-{
- int i;
-
- if (rr == NULL)
- {
- return -1;
- }
-
- if (rr->options.read_only_subvolume_list != NULL)
- {
- for (i = 0; i < rr->options.read_only_subvolume_count; i++)
- {
- GF_FREE (rr->options.read_only_subvolume_list[i]);
- }
- GF_FREE (rr->options.read_only_subvolume_list);
- }
-
- GF_FREE (rr->subvolume_list);
-
- GF_FREE (rr);
-
- return 0;
-}
-
-int
-rr_init (xlator_t *this_xl)
-{
- rr_t *rr = NULL;
- dict_t *options = NULL;
- xlator_list_t *children = NULL;
- uint64_t children_count = 0;
- int i = 0;
- int j = 0;
-
- if (this_xl == NULL)
- {
- return -1;
- }
-
- if ((options = this_xl->options) == NULL)
- {
- return -1;
- }
-
- if ((children = this_xl->children) == NULL)
- {
- return -1;
- }
-
- if ((rr = GF_CALLOC (1, sizeof (rr_t), gf_rr_mt_rr_t)) == NULL)
- {
- return -1;
- }
-
- if (rr_options_validate (options, &rr->options) != 0)
- {
- GF_FREE (rr);
- return -1;
- }
-
- for (i = 0; i < rr->options.read_only_subvolume_count; i++)
- {
- char found = 0;
-
- for (children = this_xl->children;
- children != NULL;
- children = children->next)
- {
- if (strcmp (rr->options.read_only_subvolume_list[i],
- children->xlator->name) == 0)
- {
- found = 1;
- break;
- }
- }
-
- if (!found)
- {
- LOG_ERROR ("read-only subvolume [%s] not found in volume list",
- rr->options.read_only_subvolume_list[i]);
- _cleanup_rr (rr);
- return -1;
- }
- }
-
- for (children = this_xl->children;
- children != NULL;
- children = children->next)
- {
- children_count++;
- }
-
- /* bala: excluding read_only_subvolumes */
- if ((rr->subvolume_count = children_count -
- rr->options.read_only_subvolume_count) == 0)
- {
- LOG_ERROR ("no writable volumes found for scheduling");
- _cleanup_rr (rr);
- return -1;
- }
-
- if ((rr->subvolume_list = GF_CALLOC (rr->subvolume_count,
- sizeof (rr_subvolume_t),
- gf_rr_mt_rr_subvolume_t)) == NULL)
- {
- _cleanup_rr (rr);
- return -1;
- }
-
- i = 0;
- j = 0;
- for (children = this_xl->children;
- children != NULL;
- children = children->next)
- {
- char found = 0;
-
- for (j = 0; j < rr->options.read_only_subvolume_count; j++)
- {
- if (strcmp (rr->options.read_only_subvolume_list[i],
- children->xlator->name) == 0)
- {
- found = 1;
- break;
- }
- }
-
- if (!found)
- {
- rr_subvolume_t *subvolume = NULL;
-
- subvolume = &rr->subvolume_list[i];
-
- subvolume->xl = children->xlator;
- subvolume->free_disk_status = RR_MIN_FREE_DISK_NOT_REACHED;
- subvolume->status = RR_SUBVOLUME_ONLINE;
-
- i++;
- }
- }
-
- rr->schedule_index = UINT64_MAX;
- rr->last_stat_fetched_time.tv_sec = 0;
- rr->last_stat_fetched_time.tv_usec = 0;
- pthread_mutex_init (&rr->mutex, NULL);
-
- *((long *)this_xl->private) = (long)rr;
-
- return 0;
-}
-
-void
-rr_fini (xlator_t *this_xl)
-{
- rr_t *rr = NULL;
-
- if (this_xl == NULL)
- {
- return;
- }
-
- if ((rr = (rr_t *) *((long *)this_xl->private)) != NULL)
- {
- pthread_mutex_destroy (&rr->mutex);
- _cleanup_rr (rr);
- this_xl->private = NULL;
- }
-
- return;
-}
-
-xlator_t *
-rr_schedule (xlator_t *this_xl, const void *path)
-{
- rr_t *rr = NULL;
- uint64_t next_schedule_index = 0;
- int i = 0;
-
- if (this_xl == NULL || path == NULL)
- {
- return NULL;
- }
-
- rr = (rr_t *) *((long *)this_xl->private);
- next_schedule_index = ROUND_ROBIN (rr->schedule_index,
- rr->subvolume_count);
-
- rr_update (this_xl);
-
- for (i = next_schedule_index; i < rr->subvolume_count; i++)
- {
- if (rr->subvolume_list[i].status == RR_SUBVOLUME_ONLINE &&
- rr->subvolume_list[i].status == RR_MIN_FREE_DISK_NOT_REACHED)
- {
- pthread_mutex_lock (&rr->mutex);
- rr->schedule_index = i;
- pthread_mutex_unlock (&rr->mutex);
- return rr->subvolume_list[i].xl;
- }
- }
-
- for (i = 0; i < next_schedule_index; i++)
- {
- if (rr->subvolume_list[i].status == RR_SUBVOLUME_ONLINE &&
- rr->subvolume_list[i].status == RR_MIN_FREE_DISK_NOT_REACHED)
- {
- pthread_mutex_lock (&rr->mutex);
- rr->schedule_index = i;
- pthread_mutex_unlock (&rr->mutex);
- return rr->subvolume_list[i].xl;
- }
- }
-
- for (i = next_schedule_index; i < rr->subvolume_count; i++)
- {
- if (rr->subvolume_list[i].status == RR_SUBVOLUME_ONLINE)
- {
- pthread_mutex_lock (&rr->mutex);
- rr->schedule_index = i;
- pthread_mutex_unlock (&rr->mutex);
- return rr->subvolume_list[i].xl;
- }
- }
-
- for (i = 0; i < next_schedule_index; i++)
- {
- if (rr->subvolume_list[i].status == RR_SUBVOLUME_ONLINE)
- {
- pthread_mutex_lock (&rr->mutex);
- rr->schedule_index = i;
- pthread_mutex_unlock (&rr->mutex);
- return rr->subvolume_list[i].xl;
- }
- }
-
- return NULL;
-}
-
-void
-rr_update (xlator_t *this_xl)
-{
- rr_t *rr = NULL;
- struct timeval ctime = {0, 0};
- int i = 0;
-
- if (this_xl == NULL)
- {
- return ;
- }
-
- if ((rr = (rr_t *) *((long *)this_xl->private)) == NULL)
- {
- return ;
- }
-
- if (gettimeofday (&ctime, NULL) != 0)
- {
- return ;
- }
-
- if (ctime.tv_sec > (rr->options.refresh_interval +
- rr->last_stat_fetched_time.tv_sec))
- {
- pthread_mutex_lock (&rr->mutex);
- rr->last_stat_fetched_time = ctime;
- pthread_mutex_unlock (&rr->mutex);
-
- for (i = 0; i < rr->subvolume_count; i++)
- {
- xlator_t *subvolume_xl = NULL;
- call_frame_t *frame = NULL;
- call_pool_t *pool = NULL;
-
- subvolume_xl = rr->subvolume_list[i].xl;
-
- pool = this_xl->ctx->pool;
-
- frame = create_frame (this_xl, pool);
-
- STACK_WIND_COOKIE (frame,
- rr_update_cbk,
- subvolume_xl->name,
- subvolume_xl,
- subvolume_xl->mops->stats,
- 0);
- }
- }
-
- return ;
-}
-
-int
-rr_update_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this_xl,
- int32_t op_ret,
- int32_t op_errno,
- struct xlator_stats *stats)
-{
- rr_t *rr = NULL;
- rr_subvolume_t *subvolume = NULL;
- uint8_t free_disk_percent = 0;
- int i = 0;
-
- if (frame == NULL)
- {
- return -1;
- }
-
- if (cookie == NULL || this_xl == NULL)
- {
- STACK_DESTROY (frame->root);
- return -1;
- }
-
- if (op_ret == 0 && stats == NULL)
- {
- LOG_CRITICAL ("fatal! op_ret is 0 and stats is NULL. "
- "Please report this to <gluster-devel@nongnu.org>");
- STACK_DESTROY (frame->root);
- return -1;
- }
-
- if ((rr = (rr_t *) *((long *)this_xl->private)) == NULL)
- {
- STACK_DESTROY (frame->root);
- return -1;
- }
-
- for (i = 0; i < rr->subvolume_count; i++)
- {
- if (rr->subvolume_list[i].xl->name == (char *) cookie)
- {
- subvolume = &rr->subvolume_list[i];
- break;
- }
- }
-
- if (subvolume == NULL)
- {
- LOG_ERROR ("unknown cookie [%s]", (char *) cookie);
- STACK_DESTROY (frame->root);
- return -1;
- }
-
- if (op_ret == 0)
- {
- free_disk_percent = (stats->free_disk * 100) / stats->total_disk_size;
- if (free_disk_percent > rr->options.min_free_disk)
- {
- if (subvolume->free_disk_status != RR_MIN_FREE_DISK_NOT_REACHED)
- {
- pthread_mutex_lock (&rr->mutex);
- subvolume->free_disk_status = RR_MIN_FREE_DISK_NOT_REACHED;
- pthread_mutex_unlock (&rr->mutex);
- LOG_WARNING ("subvolume [%s] is available with free space for scheduling",
- subvolume->xl->name);
- }
- }
- else
- {
- if (subvolume->free_disk_status != RR_MIN_FREE_DISK_REACHED)
- {
- pthread_mutex_lock (&rr->mutex);
- subvolume->free_disk_status = RR_MIN_FREE_DISK_REACHED;
- pthread_mutex_unlock (&rr->mutex);
- LOG_WARNING ("subvolume [%s] reached minimum disk space requirement",
- subvolume->xl->name);
- }
- }
- }
- else
- {
- pthread_mutex_lock (&rr->mutex);
- subvolume->status = RR_SUBVOLUME_OFFLINE;
- pthread_mutex_unlock (&rr->mutex);
- LOG_ERROR ("unable to get subvolume [%s] status information and "
- "scheduling is disabled",
- subvolume->xl->name);
- }
-
- STACK_DESTROY (frame->root);
- return 0;
-}
-
-void
-rr_notify (xlator_t *this_xl, int32_t event, void *data)
-{
- rr_t *rr = NULL;
- rr_subvolume_t *subvolume = NULL;
- xlator_t *subvolume_xl = NULL;
- int i = 0, ret = 0;
- call_frame_t *frame = NULL;
- call_pool_t *pool = NULL;
- dict_t *xattr = get_new_dict ();
- int32_t version[1] = {1};
-
- if (this_xl == NULL || data == NULL) {
- return ;
- }
-
- if ((rr = (rr_t *) *((long *)this_xl->private)) == NULL) {
- return ;
- }
-
- subvolume_xl = (xlator_t *) data;
-
- for (i = 0; i < rr->subvolume_count; i++) {
- if (rr->subvolume_list[i].xl == subvolume_xl) {
- subvolume = &rr->subvolume_list[i];
- break;
- }
- }
-
- switch (event) {
- case GF_EVENT_CHILD_UP:
- /* Seeding, to be done only once */
- if (rr->first_time && (i == rr->subvolume_count)) {
- loc_t loc = {0,};
- xlator_t *trav = NULL;
-
- pool = this_xl->ctx->pool;
- frame = create_frame (this_xl, pool);
- ret = dict_set_bin (xattr, "trusted.glusterfs.scheduler.rr",
- version, sizeof (int32_t));
- if (-1 == ret) {
- gf_log (this_xl->name, GF_LOG_ERROR, "rr seed setting failed");
- }
- if (xattr)
- dict_ref (xattr);
-
- loc.path = gf_strdup ("/");
- for (trav = this_xl->parents->xlator; trav; trav = trav->parents->xlator) {
- if (trav->itable) {
- loc.inode = trav->itable->root;
- break;
- }
- }
- STACK_WIND (frame,
- rr_notify_cbk,
- (xlator_t *)data,
- ((xlator_t *)data)->fops->xattrop,
- &loc,
- GF_XATTROP_ADD_ARRAY,
- xattr);
-
- if (xattr)
- dict_unref (xattr);
-
- rr->first_time = 0;
- }
- if (subvolume) {
- pthread_mutex_lock (&rr->mutex);
- subvolume->status = RR_SUBVOLUME_ONLINE;
- pthread_mutex_unlock (&rr->mutex);
- }
- break;
- case GF_EVENT_CHILD_DOWN:
- if (subvolume) {
- pthread_mutex_lock (&rr->mutex);
- subvolume->status = RR_SUBVOLUME_OFFLINE;
- pthread_mutex_unlock (&rr->mutex);
- }
- break;
- }
-
- return ;
-}
-
-int
-rr_notify_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this_xl,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *xattr)
-{
- rr_t *rr = NULL;
- int32_t *index = NULL;
- int32_t ret = -1;
- void *tmp_index_ptr = NULL;
-
- if (frame == NULL)
- {
- return -1;
- }
-
- if ((this_xl == NULL) || (op_ret == -1))
- {
- STACK_DESTROY (frame->root);
- return -1;
- }
-
- if ((rr = (rr_t *) *((long *)this_xl->private)) == NULL)
- {
- STACK_DESTROY (frame->root);
- return -1;
- }
-
- ret = dict_get_bin (xattr, "trusted.glusterfs.scheduler.rr", &tmp_index_ptr);
- index = tmp_index_ptr;
- if (ret == 0)
- rr->schedule_index = (index[0] % rr->subvolume_count);
- else
- rr->schedule_index = 0;
-
- STACK_DESTROY (frame->root);
- return 0;
-}
-
-struct sched_ops sched = {
- .init = rr_init,
- .fini = rr_fini,
- .update = rr_update,
- .schedule = rr_schedule,
- .notify = rr_notify
-};
-
diff --git a/scheduler/rr/src/rr.h b/scheduler/rr/src/rr.h
deleted file mode 100644
index 3bd95929b61..00000000000
--- a/scheduler/rr/src/rr.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _RR_H
-#define _RR_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "scheduler.h"
-#include <stdint.h>
-#include <sys/time.h>
-
-struct rr_subvolume
-{
- xlator_t *xl;
- uint8_t free_disk_status;
- uint8_t status;
-};
-typedef struct rr_subvolume rr_subvolume_t;
-
-struct rr
-{
- rr_options_t options;
- rr_subvolume_t *subvolume_list;
- uint64_t subvolume_count;
- uint64_t schedule_index;
- struct timeval last_stat_fetched_time;
- pthread_mutex_t mutex;
- char first_time;
-};
-typedef struct rr rr_t;
-
-int rr_init (xlator_t *this_xl);
-void rr_fini (xlator_t *this_xl);
-xlator_t *rr_schedule (xlator_t *this_xl, const void *path);
-void rr_update (xlator_t *this_xl);
-int rr_update_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this_xl,
- int32_t op_ret,
- int32_t op_errno,
- struct xlator_stats *stats);
-void rr_notify (xlator_t *this_xl, int32_t event, void *data);
-int rr_notify_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this_xl,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *xattr);
-
-#endif /* _RR_H */
diff --git a/scheduler/switch/Makefile.am b/scheduler/switch/Makefile.am
deleted file mode 100644
index d471a3f9243..00000000000
--- a/scheduler/switch/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = src
-
-CLEANFILES =
diff --git a/scheduler/switch/src/Makefile.am b/scheduler/switch/src/Makefile.am
deleted file mode 100644
index dc7d16d40d2..00000000000
--- a/scheduler/switch/src/Makefile.am
+++ /dev/null
@@ -1,12 +0,0 @@
-sched_LTLIBRARIES = switch.la
-scheddir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/scheduler
-
-switch_la_LDFLAGS = -module -avoidversion
-
-switch_la_SOURCES = switch.c
-switch_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
-
-CLEANFILES =
diff --git a/scheduler/switch/src/switch-mem-types.h b/scheduler/switch/src/switch-mem-types.h
deleted file mode 100644
index 7039b035d0f..00000000000
--- a/scheduler/switch/src/switch-mem-types.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-#ifndef __SWITCH_MEM_TYPES_H__
-#define __SWITCH_MEM_TYPES_H__
-
-#include "mem-types.h"
-
-enum gf_switch_mem_types_ {
- gf_switch_mt_switch_struct = gf_common_mt_end + 1,
- gf_switch_mt_switch_sched_struct,
- gf_switch_mt_switch_sched_array,
- gf_switch_mt_end
-};
-#endif
-
diff --git a/scheduler/switch/src/switch.c b/scheduler/switch/src/switch.c
deleted file mode 100644
index 1362e5cc994..00000000000
--- a/scheduler/switch/src/switch.c
+++ /dev/null
@@ -1,451 +0,0 @@
-/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include <sys/time.h>
-#include <stdlib.h>
-#include <fnmatch.h>
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "xlator.h"
-#include "scheduler.h"
-#include "switch-mem-types.h"
-
-struct switch_sched_array {
- xlator_t *xl;
- int32_t eligible;
- int32_t considered;
-};
-
-/* Select one of this struct based on the path's pattern match */
-struct switch_sched_struct {
- struct switch_sched_struct *next;
- struct switch_sched_array *array;
- char path_pattern[256];
- int32_t node_index; /* Index of the node in
- this pattern. */
- int32_t num_child; /* Total num of child nodes
- with this pattern. */
-};
-
-struct switch_struct {
- struct switch_sched_struct *cond;
- struct switch_sched_array *array;
- pthread_mutex_t switch_mutex;
- int32_t child_count;
-};
-
-/* This function should return child node as '*:subvolumes' is inserterd */
-static xlator_t *
-switch_get_matching_xl (const char *path, struct switch_sched_struct *cond)
-{
- struct switch_sched_struct *trav = cond;
- char *pathname = gf_strdup (path);
- int index = 0;
-
- while (trav) {
- if (fnmatch (trav->path_pattern,
- pathname, FNM_NOESCAPE) == 0) {
- GF_FREE (pathname);
- trav->node_index %= trav->num_child;
- index = (trav->node_index++) % trav->num_child;
- return trav->array[index].xl;
- }
- trav = trav->next;
- }
- GF_FREE (pathname);
- return NULL;
-}
-
-static int32_t
-gf_unify_valid_child (const char *child,
- xlator_t *xl)
-{
- xlator_list_t *children = NULL, *prev = NULL;
- int32_t ret = 0;
-
- children = xl->children;
- do {
- if (!strcmp (child, children->xlator->name)) {
- ret = 1;
- break;
- }
-
- prev = children;
- children = prev->next;
- } while (children);
-
- return ret;
-}
-
-static int32_t
-switch_init (xlator_t *xl)
-{
- int32_t index = 0;
- data_t *data = NULL;
- char *child = NULL;
- char *tmp = NULL;
- char *childs_data = NULL;
- xlator_list_t *trav_xl = xl->children;
- struct switch_struct *switch_buf = NULL;
-
- switch_buf = GF_CALLOC (1, sizeof (struct switch_struct),
- gf_switch_mt_switch_struct);
- ERR_ABORT (switch_buf);
-
- while (trav_xl) {
- index++;
- trav_xl = trav_xl->next;
- }
- switch_buf->child_count = index;
- switch_buf->array = GF_CALLOC (index + 1,
- sizeof (struct switch_sched_struct),
- gf_switch_mt_switch_sched_struct);
- ERR_ABORT (switch_buf->array);
- trav_xl = xl->children;
- index = 0;
-
- while (trav_xl) {
- switch_buf->array[index].xl = trav_xl->xlator;
- switch_buf->array[index].eligible = 1;
- trav_xl = trav_xl->next;
- index++;
- }
-
- data = dict_get (xl->options, "scheduler.read-only-subvolumes");
- if (data) {
- childs_data = gf_strdup (data->data);
- child = strtok_r (childs_data, ",", &tmp);
- while (child) {
- for (index = 1;
- index < switch_buf->child_count; index++) {
- if (strcmp (switch_buf->array[index - 1].xl->name, child) == 0) {
- gf_log ("switch", GF_LOG_DEBUG,
- "Child '%s' is read-only",
- child);
- memcpy (&(switch_buf->array[index-1]),
- &(switch_buf->array[switch_buf->child_count - 1]),
- sizeof (struct switch_sched_array));
- switch_buf->child_count--;
- break;
- }
- }
- child = strtok_r (NULL, ",", &tmp);
- }
- GF_FREE (childs_data);
- }
-
- data = dict_get (xl->options, "scheduler.local-volume-name");
- if (data) {
- /* Means, give preference to that node first */
- gf_log ("switch", GF_LOG_DEBUG,
- "local volume defined as %s", data->data);
-
- /* TODO: parse it properly, have an extra index to
- specify that first */
- }
-
- /* *jpg:child1,child2;*mpg:child3;*:child4,child5,child6 */
- data = dict_get (xl->options, "scheduler.switch.case");
- if (data) {
- char *tmp_str = NULL;
- char *tmp_str1 = NULL;
- char *dup_str = NULL;
- char *switch_str = NULL;
- char *pattern = NULL;
- char *childs = NULL;
- struct switch_sched_struct *switch_opt = NULL;
- struct switch_sched_struct *trav = NULL;
- /* Get the pattern for considering switch case.
- "option block-size *avi:10MB" etc */
- switch_str = strtok_r (data->data, ";", &tmp_str);
- while (switch_str) {
- dup_str = gf_strdup (switch_str);
- switch_opt =
- GF_CALLOC (1, sizeof (struct switch_sched_struct), gf_switch_mt_switch_sched_struct);
- ERR_ABORT (switch_opt);
-
- /* Link it to the main structure */
- if (switch_buf->cond) {
- /* there are already few entries */
- trav = switch_buf->cond;
- while (trav->next)
- trav = trav->next;
- trav->next = switch_opt;
- } else {
- /* First entry */
- switch_buf->cond = switch_opt;
- }
- pattern = strtok_r (dup_str, ":", &tmp_str1);
- childs = strtok_r (NULL, ":", &tmp_str1);
- if (strncmp (pattern, "*", 2) == 0) {
- gf_log ("switch", GF_LOG_WARNING,
- "'*' pattern will be taken by default "
- "for all the unconfigured child nodes,"
- " hence neglecting current option");
- switch_str = strtok_r (NULL, ";", &tmp_str);
- GF_FREE (dup_str);
- continue;
- }
- memcpy (switch_opt->path_pattern,
- pattern, strlen (pattern));
- if (childs) {
- int32_t idx = 0;
- char *tmp1 = NULL;
- char *dup_childs = NULL;
- /* TODO: get the list of child nodes for
- the given pattern */
- dup_childs = gf_strdup (childs);
- child = strtok_r (dup_childs, ",", &tmp);
- while (child) {
- if (gf_unify_valid_child (child, xl)) {
- idx++;
- child = strtok_r (NULL, ",", &tmp);
- } else {
- gf_log ("switch", GF_LOG_ERROR,
- "%s is not a subvolume "
- "of %s. pattern can "
- "only be scheduled only"
- " to a subvolume of %s",
- child, xl->name,
- xl->name);
- return -1;
- }
- }
- GF_FREE (dup_childs);
- child = strtok_r (childs, ",", &tmp1);
- switch_opt->num_child = idx;
- switch_opt->array =
- GF_CALLOC (1, idx * sizeof (struct switch_sched_array), gf_switch_mt_switch_sched_array);
- ERR_ABORT (switch_opt->array);
- idx = 0;
- child = strtok_r (childs, ",", &tmp);
- while (child) {
- for (index = 1;
- index < switch_buf->child_count;
- index++) {
- if (strcmp (switch_buf->array[index - 1].xl->name,
- child) == 0) {
- gf_log ("switch",
- GF_LOG_DEBUG,
- "'%s' pattern will be scheduled to \"%s\"",
- switch_opt->path_pattern, child);
- /*
- if (switch_buf->array[index-1].considered) {
- gf_log ("switch", GF_LOG_DEBUG,
- "ambiguity found, exiting");
- return -1;
- }
- */
- switch_opt->array[idx].xl = switch_buf->array[index-1].xl;
- switch_buf->array[index-1].considered = 1;
- idx++;
- break;
- }
- }
- child = strtok_r (NULL, ",", &tmp1);
- }
- } else {
- /* error */
- gf_log ("switch", GF_LOG_ERROR,
- "Check \"scheduler.switch.case\" "
- "option in unify volume. Exiting");
- GF_FREE (switch_buf->array);
- GF_FREE (switch_buf);
- return -1;
- }
- GF_FREE (dup_str);
- switch_str = strtok_r (NULL, ";", &tmp_str);
- }
- }
- /* Now, all the pattern based considerations done, so for all the
- * remaining pattern, '*' to all the remaining child nodes
- */
- {
- struct switch_sched_struct *switch_opt = NULL;
- int32_t flag = 0;
- int32_t index = 0;
- for (index=0; index < switch_buf->child_count; index++) {
- /* check for considered flag */
- if (switch_buf->array[index].considered)
- continue;
- flag++;
- }
- if (!flag) {
- gf_log ("switch", GF_LOG_ERROR,
- "No nodes left for pattern '*'. Exiting.");
- return -1;
- }
- switch_opt = GF_CALLOC (1, sizeof (struct switch_sched_struct), gf_switch_mt_switch_sched_struct);
- ERR_ABORT (switch_opt);
- if (switch_buf->cond) {
- /* there are already few entries */
- struct switch_sched_struct *trav = switch_buf->cond;
- while (trav->next)
- trav = trav->next;
- trav->next = switch_opt;
- } else {
- /* First entry */
- switch_buf->cond = switch_opt;
- }
- /* Add the '*' pattern to the array */
- memcpy (switch_opt->path_pattern, "*", 2);
- switch_opt->num_child = flag;
- switch_opt->array =
- GF_CALLOC (1, flag * sizeof (struct switch_sched_array), gf_switch_mt_switch_sched_array);
- ERR_ABORT (switch_opt->array);
- flag = 0;
- for (index=0; index < switch_buf->child_count; index++) {
- /* check for considered flag */
- if (switch_buf->array[index].considered)
- continue;
- gf_log ("switch", GF_LOG_DEBUG,
- "'%s' pattern will be scheduled to \"%s\"",
- switch_opt->path_pattern,
- switch_buf->array[index].xl->name);
- switch_opt->array[flag].xl =
- switch_buf->array[index].xl;
- switch_buf->array[index].considered = 1;
- flag++;
- }
- }
-
- pthread_mutex_init (&switch_buf->switch_mutex, NULL);
-
- // put it at the proper place
- *((long *)xl->private) = (long)switch_buf;
-
- return 0;
-}
-
-static void
-switch_fini (xlator_t *xl)
-{
- /* TODO: free all the allocated entries */
- struct switch_struct *switch_buf = NULL;
- switch_buf = (struct switch_struct *)*((long *)xl->private);
-
- pthread_mutex_destroy (&switch_buf->switch_mutex);
- GF_FREE (switch_buf->array);
- GF_FREE (switch_buf);
-}
-
-static xlator_t *
-switch_schedule (xlator_t *xl, const void *path)
-{
- struct switch_struct *switch_buf = NULL;
- switch_buf = (struct switch_struct *)*((long *)xl->private);
-
- return switch_get_matching_xl (path, switch_buf->cond);
-}
-
-
-/**
- * notify
- */
-void
-switch_notify (xlator_t *xl, int32_t event, void *data)
-{
- /* TODO: This should be checking in switch_sched_struct */
-#if 0
- struct switch_struct *switch_buf = NULL;
- int32_t idx = 0;
-
- switch_buf = (struct switch_struct *)*((long *)xl->private);
- if (!switch_buf)
- return;
-
- for (idx = 0; idx < switch_buf->child_count; idx++) {
- if (switch_buf->array[idx].xl == (xlator_t *)data)
- break;
- }
-
- switch (event)
- {
- case GF_EVENT_CHILD_UP:
- {
- switch_buf->array[idx].eligible = 1;
- }
- break;
- case GF_EVENT_CHILD_DOWN:
- {
- switch_buf->array[idx].eligible = 0;
- }
- break;
- default:
- {
- ;
- }
- break;
- }
-#endif
-}
-
-static void
-switch_update (xlator_t *xl)
-{
- return;
-}
-
-int32_t
-switch_mem_acct_init (xlator_t *this)
-{
- int ret = -1;
-
- if (!this)
- return ret;
-
- ret = xlator_mem_acct_init (this, gf_switch_mt_end + 1);
-
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
- " failed");
- return ret;
- }
-
- return ret;
-}
-
-struct sched_ops sched = {
- .init = switch_init,
- .fini = switch_fini,
- .update = switch_update,
- .schedule = switch_schedule,
- .notify = switch_notify,
- .mem_acct_init = switch_mem_acct_init,
-};
-
-struct volume_options options[] = {
- { .key = { "scheduler.read-only-subvolumes" ,
- "switch.read-only-subvolumes"},
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = { "scheduler.local-volume-name",
- "switch.nufa.local-volume-name" },
- .type = GF_OPTION_TYPE_XLATOR
- },
- { .key = { "scheduler.switch.case",
- "switch.case" },
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = {NULL} }
-};
diff --git a/site.h.in b/site.h.in
new file mode 100644
index 00000000000..eb2f062e60c
--- /dev/null
+++ b/site.h.in
@@ -0,0 +1,44 @@
+/*
+ * Guidelines for using this file vs. configure.ac
+ *
+ * (1) If it already exists in configure.ac, leave it there.
+ *
+ * (2) If it needs to take effect at configure (not compile) time, it *needs*
+ * to go in configure.ac.
+ *
+ * (3) If it affects file paths, which are the things most likely to be based
+ * on an OS or distribution's generic filesystem hierarchy and not on a
+ * particular package's definition (e.g. an RPM specfile), it should probably
+ * go in configure.ac.
+ *
+ * (4) If it affects default sizes, limits, thresholds, or modes of operation
+ * (e.g. IPv4 vs. IPv6), it should probably go here.
+ *
+ * (5) For anything else, is it more like the things in 3 or the things in 4?
+ * Which approach is more convenient for the people who are likely to use the
+ * new option(s)? Make your best guesses, confirm with others, and go with
+ * what works.
+ */
+
+#define SITE_H_ENABLE_LEAST_PRIORITY "on"
+#define SITE_H_MD_CACHE_TIMEOUT "1"
+#define SITE_H_NFS_DISABLE "on"
+
+/*
+ * As an example of how to use this file, here's what the Facebook version looks
+ * like:
+
+#define SITE_H_ENABLE_LEAST_PRIORITY "off"
+#define SITE_H_MD_CACHE_TIMEOUT "180"
+#define SITE_H_NFS_DISABLE "off"
+
+ * Each time we add a value here, we lessen the risk of values being
+ * inconsistent across production automation, test automation, and manual
+ * developer testing. We also save effort compared to updating values for each
+ * kind of external automation. To do the same thing with configure scripts or
+ * specfiles, we'd have to make much more complicated and less discoverable
+ * changes there.
+ *
+ * Other orgs are likely to have the same issues regarding their preferred
+ * settings, and likewise should add their favorites here as well.
+ */
diff --git a/smoke.sh b/smoke.sh
deleted file mode 100755
index a87908d794e..00000000000
--- a/smoke.sh
+++ /dev/null
@@ -1,83 +0,0 @@
-#!/bin/bash
-
-set -e;
-
-M=/mnt;
-P=/build;
-H=$(hostname);
-T=600;
-V=patchy;
-
-
-function cleanup()
-{
- killall -15 glusterfs glusterfsd glusterd glusterd 2>&1 || true;
- killall -9 glusterfs glusterfsd glusterd glusterd 2>&1 || true;
- umount -l $M 2>&1 || true;
- rm -rf /var/lib/glusterd /etc/glusterd $P/export;
-}
-
-function start_fs()
-{
- mkdir -p $P/export;
- chmod 0755 $P/export;
-
- glusterd;
- gluster --mode=script volume create $V replica 2 $H:$P/export/export{1,2,3,4};
- gluster volume start $V;
- glusterfs -s $H --volfile-id $V $M;
-# mount -t glusterfs $H:/$V $M;
-}
-
-
-function run_tests()
-{
- cd $M;
-
- (sleep 1; dbench -s -t 60 10 >/dev/null) &
-
- (sleep 1; /opt/qa/tools/posix_compliance.sh) &
-
- wait %2
- wait %3
-
- rm -rf clients;
-
- cd -;
-}
-
-
-function watchdog ()
-{
- # insurance against hangs during the test
-
- sleep $1;
-
- echo "Kicking in watchdog after $1 secs";
-
- cleanup;
-}
-
-
-function finish ()
-{
- cleanup;
- kill %1;
-}
-
-function main ()
-{
- cleanup;
-
- watchdog $T &
-
- trap finish EXIT;
-
- set -x;
-
- start_fs;
-
- run_tests;
-}
-
-main "$@";
diff --git a/submit-for-review.sh b/submit-for-review.sh
new file mode 120000
index 00000000000..a21c0e2869a
--- /dev/null
+++ b/submit-for-review.sh
@@ -0,0 +1 @@
+rfc.sh \ No newline at end of file
diff --git a/swift/1.4.8/README b/swift/1.4.8/README
deleted file mode 100644
index 0ea91535ef7..00000000000
--- a/swift/1.4.8/README
+++ /dev/null
@@ -1,22 +0,0 @@
-Gluster Unified File and Object Storage allows files and directories created
-via gluster-native/nfs mount to be accessed as containers and objects. It is
-a plugin for OpenStack Swift project.
-
-Install
-* Clone the swift repo from git://github.com/openstack/swift.git
-* Apply the swift.diff present in glusterfs.git/swift/1.4.8 to the swift repo.
-* Create a directory named "plugins" under swift.git/swift directory.
-* Copy the contents of glusterfs.git/swift/1.4.8/plugins/ under swift.git/swift/
- except the conf directory.
-* Copy the contents of glusterfs.git/swift/1.4.5/plugins/conf under /etc/swift/.
-* Run python setup.py install
-
-Once this is done, you can access the GlusterFS volumes as Swift accounts.
-Add the Volume names with the user-name and its corresponding password to the
-/etc/swift/proxy-server.conf (follow the syntax used in the sample conf file).
-
-Command to start the servers
- swift-init main start
-
-Command to stop the servers
- swift-init main stop
diff --git a/swift/1.4.8/gluster-swift-plugin.spec b/swift/1.4.8/gluster-swift-plugin.spec
deleted file mode 100644
index 746f75c5f5d..00000000000
--- a/swift/1.4.8/gluster-swift-plugin.spec
+++ /dev/null
@@ -1,60 +0,0 @@
-############################################################################################################
-# Command to build rpms.#
-# $ rpmbuild -ta %{name}-%{version}-%{release}.tar.gz #
-############################################################################################################
-# Setting up the environment. #
-# * Create a directory %{name}-%{version} under $HOME/rpmbuild/SOURCES #
-# * Copy the contents of plugins directory into $HOME/rpmbuild/SOURCES/%{name}-%{version} #
-# * tar zcvf %{name}-%{version}-%{release}.tar.gz $HOME/rpmbuild/SOURCES/%{name}-%{version} %{name}.spec #
-# For more information refer #
-# http://fedoraproject.org/wiki/How_to_create_an_RPM_package #
-############################################################################################################
-
-%define _confdir /etc/swift
-%define _swiftdir /usr/lib/python2.6/site-packages/swift
-%define _ufo_version 1.0
-%define _ufo_release 3
-
-Summary : GlusterFS Unified File and Object Storage.
-Name : gluster-swift-plugin
-Version : %{_ufo_version}
-Release : %{_ufo_release}
-Group : Application/File
-Vendor : Red Hat Inc.
-Source0 : %{name}-%{version}-%{release}.tar.gz
-Packager : gluster-users@gluster.org
-License : Apache
-BuildArch: noarch
-Requires : memcached
-Requires : openssl
-Requires : python
-Requires : gluster-swift
-
-%description
-Gluster Unified File and Object Storage unifies NAS and object storage
-technology. This provides a system for data storage that enables users to access
-the same data as an object and as a file, simplifying management and controlling
-storage costs.
-
-%prep
-%setup -q
-
-%install
-rm -rf %{buildroot}
-
-mkdir -p %{buildroot}/%{_swiftdir}/plugins
-mkdir -p %{buildroot}/%{_confdir}/
-
-cp constraints.py %{buildroot}/%{_swiftdir}/plugins
-cp DiskDir.py %{buildroot}/%{_swiftdir}/plugins
-cp DiskFile.py %{buildroot}/%{_swiftdir}/plugins
-cp Glusterfs.py %{buildroot}/%{_swiftdir}/plugins
-cp __init__.py %{buildroot}/%{_swiftdir}/plugins
-cp utils.py %{buildroot}/%{_swiftdir}/plugins
-
-cp -r conf/* %{buildroot}/%{_confdir}/
-
-%files
-%defattr(-,root,root)
-%{_swiftdir}/plugins
-%{_confdir}/
diff --git a/swift/1.4.8/gluster-swift.spec b/swift/1.4.8/gluster-swift.spec
deleted file mode 100644
index 640b944e8cb..00000000000
--- a/swift/1.4.8/gluster-swift.spec
+++ /dev/null
@@ -1,396 +0,0 @@
-%if ! (0%{?fedora} > 12 || 0%{?rhel} > 5)
-%{!?python_sitelib: %global python_sitelib %(%{__python} -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())")}
-%endif
-
-Name: gluster-swift
-Version: 1.4.8
-Release: 3%{?dist}
-Summary: OpenStack Object Storage (swift)
-
-Group: Development/Languages
-License: ASL 2.0
-URL: http://launchpad.net/swift
-Source0: http://launchpad.net/swift/essex/%{version}/+download/swift-%{version}.tar.gz
-Source1: %{name}-functions
-Source2: %{name}-account.init
-Source4: %{name}-container.init
-Source5: %{name}-object.init
-Source6: %{name}-proxy.init
-Patch0: openstack-swift-newdeps.patch
-Patch1: openstack-swift-docmod.patch
-Patch2: openstack-swift-nonet.patch
-Patch3: gluster.patch
-
-BuildRoot: %{_tmppath}/swift-%{version}-%{release}-root-%(%{__id_u} -n)
-
-BuildArch: noarch
-BuildRequires: dos2unix
-BuildRequires: python-devel
-BuildRequires: python-setuptools
-BuildRequires: python-netifaces
-BuildRequires: python-paste-deploy
-Requires: python-configobj
-Requires: python-eventlet >= 0.9.8
-Requires: python-greenlet >= 0.3.1
-Requires: python-paste-deploy
-Requires: python-simplejson
-Requires: python-webob1.0
-Requires: pyxattr
-Requires: python-setuptools
-Requires: python-netifaces
-Requires: python-netifaces
-
-Conflicts: openstack-swift
-
-Requires(post): chkconfig
-Requires(postun): initscripts
-Requires(preun): chkconfig
-Requires(pre): shadow-utils
-Obsoletes: openstack-swift-auth <= 1.4.0
-
-%description
-OpenStack Object Storage (swift) aggregates commodity servers to work together
-in clusters for reliable, redundant, and large-scale storage of static objects.
-Objects are written to multiple hardware devices in the data center, with the
-OpenStack software responsible for ensuring data replication and integrity
-across the cluster. Storage clusters can scale horizontally by adding new nodes,
-which are automatically configured. Should a node fail, OpenStack works to
-replicate its content from other active nodes. Because OpenStack uses software
-logic to ensure data replication and distribution across different devices,
-inexpensive commodity hard drives and servers can be used in lieu of more
-expensive equipment.
-
-%package account
-Summary: A swift account server
-Group: Applications/System
-
-Requires: %{name} = %{version}-%{release}
-
-%description account
-OpenStack Object Storage (swift) aggregates commodity servers to work together
-in clusters for reliable, redundant, and large-scale storage of static objects.
-
-This package contains the %{name} account server.
-
-%package container
-Summary: A swift container server
-Group: Applications/System
-
-Requires: %{name} = %{version}-%{release}
-
-%description container
-OpenStack Object Storage (swift) aggregates commodity servers to work together
-in clusters for reliable, redundant, and large-scale storage of static objects.
-
-This package contains the %{name} container server.
-
-%package object
-Summary: A swift object server
-Group: Applications/System
-
-Requires: %{name} = %{version}-%{release}
-Requires: rsync >= 3.0
-
-%description object
-OpenStack Object Storage (swift) aggregates commodity servers to work together
-in clusters for reliable, redundant, and large-scale storage of static objects.
-
-This package contains the %{name} object server.
-
-%package proxy
-Summary: A swift proxy server
-Group: Applications/System
-
-Requires: %{name} = %{version}-%{release}
-
-%description proxy
-OpenStack Object Storage (swift) aggregates commodity servers to work together
-in clusters for reliable, redundant, and large-scale storage of static objects.
-
-This package contains the %{name} proxy server.
-
-%package doc
-Summary: Documentation for %{name}
-Group: Documentation
-#%if 0%{?rhel} >= 6
-#BuildRequires: python-sphinx10 >= 1.0
-#%endif
-%if 0%{?fedora} >= 14
-BuildRequires: python-sphinx >= 1.0
-%endif
-# Required for generating docs
-BuildRequires: python-eventlet
-BuildRequires: python-simplejson
-BuildRequires: python-webob1.0
-BuildRequires: pyxattr
-
-%description doc
-OpenStack Object Storage (swift) aggregates commodity servers to work together
-in clusters for reliable, redundant, and large-scale storage of static objects.
-
-This package contains documentation files for %{name}.
-
-%prep
-%setup -q -n swift-%{version}
-%patch0 -p1 -b .newdeps
-%patch1 -p1 -b .docmod
-%patch2 -p1 -b .nonet
-%patch3 -p1 -b .gluster
-# Fix wrong-file-end-of-line-encoding warning
-dos2unix LICENSE
-
-%build
-%{__python} setup.py build
-# Fails unless we create the build directory
-mkdir -p doc/build
-# Build docs
-%if 0%{?fedora} >= 14
-%{__python} setup.py build_sphinx
-%endif
-#%if 0%{?rhel} >= 6
-#export PYTHONPATH="$( pwd ):$PYTHONPATH"
-#SPHINX_DEBUG=1 sphinx-1.0-build -b html doc/source doc/build/html
-#SPHINX_DEBUG=1 sphinx-1.0-build -b man doc/source doc/build/man
-#%endif
-# Fix hidden-file-or-dir warning
-#rm doc/build/html/.buildinfo
-
-%install
-rm -rf %{buildroot}
-%{__python} setup.py install -O1 --skip-build --root %{buildroot}
-# Init helper functions
-install -p -D -m 644 %{SOURCE1} %{buildroot}%{_datarootdir}/%{name}/functions
-# Init scripts
-install -p -D -m 755 %{SOURCE2} %{buildroot}%{_initrddir}/%{name}-account
-install -p -D -m 755 %{SOURCE4} %{buildroot}%{_initrddir}/%{name}-container
-install -p -D -m 755 %{SOURCE5} %{buildroot}%{_initrddir}/%{name}-object
-install -p -D -m 755 %{SOURCE6} %{buildroot}%{_initrddir}/%{name}-proxy
-# Remove tests
-rm -fr %{buildroot}/%{python_sitelib}/test
-# Misc other
-install -d -m 755 %{buildroot}%{_sysconfdir}/swift
-install -d -m 755 %{buildroot}%{_sysconfdir}/swift/account-server
-install -d -m 755 %{buildroot}%{_sysconfdir}/swift/container-server
-install -d -m 755 %{buildroot}%{_sysconfdir}/swift/object-server
-install -d -m 755 %{buildroot}%{_sysconfdir}/swift/proxy-server
-# Install pid directory
-install -d -m 755 %{buildroot}%{_localstatedir}/run/swift
-install -d -m 755 %{buildroot}%{_localstatedir}/run/swift/account-server
-install -d -m 755 %{buildroot}%{_localstatedir}/run/swift/container-server
-install -d -m 755 %{buildroot}%{_localstatedir}/run/swift/object-server
-install -d -m 755 %{buildroot}%{_localstatedir}/run/swift/proxy-server
-
-%clean
-rm -rf %{buildroot}
-
-%pre
-getent group swift >/dev/null || groupadd -r swift -g 160
-getent passwd swift >/dev/null || \
-useradd -r -g swift -u 160 -d %{_sharedstatedir}/swift -s /sbin/nologin \
--c "OpenStack Swift Daemons" swift
-exit 0
-
-%post account
-/sbin/chkconfig --add %{name}-account
-
-%preun account
-if [ $1 = 0 ] ; then
- /sbin/service %{name}-account stop >/dev/null 2>&1
- /sbin/chkconfig --del %{name}-account
-fi
-
-%postun account
-if [ "$1" -ge "1" ] ; then
- /sbin/service %{name}-account condrestart >/dev/null 2>&1 || :
-fi
-
-%post container
-/sbin/chkconfig --add %{name}-container
-
-%preun container
-if [ $1 = 0 ] ; then
- /sbin/service %{name}-container stop >/dev/null 2>&1
- /sbin/chkconfig --del %{name}-container
-fi
-
-%postun container
-if [ "$1" -ge "1" ] ; then
- /sbin/service %{name}-container condrestart >/dev/null 2>&1 || :
-fi
-
-%post object
-/sbin/chkconfig --add %{name}-object
-
-%preun object
-if [ $1 = 0 ] ; then
- /sbin/service %{name}-object stop >/dev/null 2>&1
- /sbin/chkconfig --del %{name}-object
-fi
-
-%postun object
-if [ "$1" -ge "1" ] ; then
- /sbin/service %{name}-object condrestart >/dev/null 2>&1 || :
-fi
-
-%post proxy
-/sbin/chkconfig --add %{name}-proxy
-
-%preun proxy
-if [ $1 = 0 ] ; then
- /sbin/service %{name}-proxy stop >/dev/null 2>&1
- /sbin/chkconfig --del %{name}-proxy
-fi
-
-%postun proxy
-if [ "$1" -ge "1" ] ; then
- /sbin/service %{name}-proxy condrestart >/dev/null 2>&1 || :
-fi
-
-%files
-%defattr(-,root,root,-)
-%doc AUTHORS LICENSE README
-%doc etc/dispersion.conf-sample etc/drive-audit.conf-sample etc/object-expirer.conf-sample
-%doc etc/swift.conf-sample
-%dir %{_datarootdir}/%{name}/functions
-%dir %attr(0755, swift, swift) %{_localstatedir}/run/swift
-%dir %{_sysconfdir}/swift
-%dir %{python_sitelib}/swift
-%{_bindir}/swift
-%{_bindir}/swift-account-audit
-%{_bindir}/swift-bench
-%{_bindir}/swift-drive-audit
-%{_bindir}/swift-get-nodes
-%{_bindir}/swift-init
-%{_bindir}/swift-ring-builder
-%{_bindir}/swift-dispersion-populate
-%{_bindir}/swift-dispersion-report
-%{_bindir}/swift-recon*
-%{_bindir}/swift-object-expirer
-%{_bindir}/swift-oldies
-%{_bindir}/swift-orphans
-%{_bindir}/swift-form-signature
-%{_bindir}/swift-temp-url
-%{python_sitelib}/swift/*.py*
-%{python_sitelib}/swift/common
-%{python_sitelib}/swift-%{version}-*.egg-info
-
-%files account
-%defattr(-,root,root,-)
-%doc etc/account-server.conf-sample
-%dir %{_initrddir}/%{name}-account
-%dir %attr(0755, swift, swift) %{_localstatedir}/run/swift/account-server
-%dir %{_sysconfdir}/swift/account-server
-%{_bindir}/swift-account-auditor
-%{_bindir}/swift-account-reaper
-%{_bindir}/swift-account-replicator
-%{_bindir}/swift-account-server
-%{python_sitelib}/swift/account
-
-
-%files container
-%defattr(-,root,root,-)
-%doc etc/container-server.conf-sample
-%dir %{_initrddir}/%{name}-container
-%dir %attr(0755, swift, swift) %{_localstatedir}/run/swift/container-server
-%dir %{_sysconfdir}/swift/container-server
-%{_bindir}/swift-container-auditor
-%{_bindir}/swift-container-server
-%{_bindir}/swift-container-replicator
-%{_bindir}/swift-container-updater
-%{_bindir}/swift-container-sync
-%{python_sitelib}/swift/container
-
-%files object
-%defattr(-,root,root,-)
-%doc etc/object-server.conf-sample etc/rsyncd.conf-sample
-%dir %{_initrddir}/%{name}-object
-%dir %attr(0755, swift, swift) %{_localstatedir}/run/swift/object-server
-%dir %{_sysconfdir}/swift/object-server
-%{_bindir}/swift-object-auditor
-%{_bindir}/swift-object-info
-%{_bindir}/swift-object-replicator
-%{_bindir}/swift-object-server
-%{_bindir}/swift-object-updater
-%{python_sitelib}/swift/obj
-
-%files proxy
-%defattr(-,root,root,-)
-%doc etc/proxy-server.conf-sample
-%dir %{_initrddir}/%{name}-proxy
-%dir %attr(0755, swift, swift) %{_localstatedir}/run/swift/proxy-server
-%dir %{_sysconfdir}/swift/proxy-server
-%{_bindir}/swift-proxy-server
-%{python_sitelib}/swift/proxy
-
-%files doc
-%defattr(-,root,root,-)
-%doc LICENSE
-#%doc doc/build/html
-
-%changelog
-* Thu Apr 26 2012 Anthony Towns <atowns@redhat.com> 1.4.8-2
-- Apply gluster patches
-- Rename to gluster-swift
-
-* Thu Mar 22 2012 Alan Pevec <apevec@redhat.com> 1.4.8-1
-- Update to 1.4.8
-
-* Fri Mar 09 2012 Alan Pevec <apevec@redhat.com> 1.4.7-1
-- Update to 1.4.7
-
-* Mon Feb 13 2012 Alan Pevec <apevec@redhat.com> 1.4.6-1
-- Update to 1.4.6
-
-* Thu Jan 12 2012 Alan Pevec <apevec@redhat.com> 1.4.4-2
-- add back /var/run/swift for el6
-
-* Wed Jan 04 2012 Alan Pevec <apevec@redhat.com> 1.4.4-1
-- Use updated parallel install versions of epel packages (pbrady)
-- Ensure the docs aren't built with the system glance module (pbrady)
-- Ensure we don't access the net when building docs (pbrady)
-- Update to 1.4.4
-
-* Wed Nov 23 2011 David Nalley <david@gnsa.us> -1.4.3-2
-* fixed some missing requires
-
-* Sat Nov 05 2011 David Nalley <david@gnsa.us> - 1.4.3-1
-- Update to 1.4.3
-- fix init script add, registration, deletion BZ 685155
-- fixing BR to facilitate epel6 building
-
-* Tue Aug 23 2011 David Nalley <david@gnsa.us> - 1.4.0-2
-- adding uid:gid for bz 732693
-
-* Wed Jun 22 2011 David Nalley <david@gnsa.us> - 1.4.1-1
-- Update to 1.4.0
-- change the name of swift binary from st to swift
-
-* Sat Jun 04 2011 David Nalley <david@gnsa.us> - 1.4.0-1
-- Update to 1.4.0
-
-* Fri May 20 2011 David Nalley <david@gnsa.us> - 1.3.0-1
-- Update to 1.3.0
-
-* Tue Feb 08 2011 Fedora Release Engineering <rel-eng@lists.fedoraproject.org> - 1.1.0-2
-- Rebuilt for https://fedoraproject.org/wiki/Fedora_15_Mass_Rebuild
-
-* Sun Dec 05 2010 Silas Sewell <silas@sewell.ch> - 1.1.0-1
-- Update to 1.1.0
-
-* Sun Aug 08 2010 Silas Sewell <silas@sewell.ch> - 1.0.2-5
-- Update for new Python macro guidelines
-- Use dos2unix instead of sed
-- Make gecos field more descriptive
-
-* Wed Jul 28 2010 Silas Sewell <silas@sewell.ch> - 1.0.2-4
-- Rename to openstack-swift
-
-* Wed Jul 28 2010 Silas Sewell <silas@sewell.ch> - 1.0.2-3
-- Fix return value in swift-functions
-
-* Tue Jul 27 2010 Silas Sewell <silas@sewell.ch> - 1.0.2-2
-- Add swift user
-- Update init scripts
-
-* Sun Jul 18 2010 Silas Sewell <silas@sewell.ch> - 1.0.2-1
-- Initial build
diff --git a/swift/1.4.8/plugins/DiskDir.py b/swift/1.4.8/plugins/DiskDir.py
deleted file mode 100644
index 28671be3cd9..00000000000
--- a/swift/1.4.8/plugins/DiskDir.py
+++ /dev/null
@@ -1,484 +0,0 @@
-# Copyright (c) 2011 Red Hat, Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-# implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-
-from swift.plugins.utils import clean_metadata, dir_empty, rmdirs, mkdirs, \
- validate_account, validate_container, check_valid_account, is_marker, \
- get_container_details, get_account_details, create_container_metadata, \
- create_account_metadata, DEFAULT_GID, DEFAULT_UID, get_account_details, \
- validate_object, create_object_metadata, read_metadata, write_metadata
-
-from swift.common.constraints import CONTAINER_LISTING_LIMIT, \
- check_mount
-
-from swift.plugins.utils import X_CONTENT_TYPE, X_CONTENT_LENGTH, X_TIMESTAMP,\
- X_PUT_TIMESTAMP, X_TYPE, X_ETAG, X_OBJECTS_COUNT, X_BYTES_USED, \
- X_CONTAINER_COUNT, CONTAINER
-
-from swift import plugins
-def strip_obj_storage_path(path, string='/mnt/gluster-object'):
- """
- strip /mnt/gluster-object
- """
- return path.replace(string, '').strip('/')
-
-DATADIR = 'containers'
-
-
-class DiskCommon(object):
- def is_deleted(self):
- return not os.path.exists(self.datadir)
-
- def filter_prefix(self, objects, prefix):
- """
- Accept sorted list.
- """
- found = 0
- filtered_objs = []
- for object_name in objects:
- if object_name.startswith(prefix):
- filtered_objs.append(object_name)
- found = 1
- else:
- if found:
- break
- return filtered_objs
-
- def filter_delimiter(self, objects, delimiter, prefix):
- """
- Accept sorted list.
- Objects should start with prefix.
- """
- filtered_objs=[]
- for object_name in objects:
- tmp_obj = object_name.replace(prefix, '', 1)
- sufix = tmp_obj.split(delimiter, 1)
- new_obj = prefix + sufix[0]
- if new_obj and new_obj not in filtered_objs:
- filtered_objs.append(new_obj)
-
- return filtered_objs
-
- def filter_marker(self, objects, marker):
- """
- TODO: We can traverse in reverse order to optimize.
- Accept sorted list.
- """
- filtered_objs=[]
- found = 0
- if objects[-1] < marker:
- return filtered_objs
- for object_name in objects:
- if object_name > marker:
- filtered_objs.append(object_name)
-
- return filtered_objs
-
- def filter_end_marker(self, objects, end_marker):
- """
- Accept sorted list.
- """
- filtered_objs=[]
- for object_name in objects:
- if object_name < end_marker:
- filtered_objs.append(object_name)
- else:
- break
-
- return filtered_objs
-
- def filter_limit(self, objects, limit):
- filtered_objs=[]
- for i in range(0, limit):
- filtered_objs.append(objects[i])
-
- return filtered_objs
-
- def update_account(self, metadata):
- acc_path = self.datadir
- write_metadata(acc_path, metadata)
- self.metadata = metadata
-
-class DiskDir(DiskCommon):
- """
- Manage object files on disk.
-
- :param path: path to devices on the node
- :param device: device name
- :param partition: partition on the device the object lives in
- :param account: account name for the object
- :param container: container name for the object
- :param obj: object name for the object
- :param keep_data_fp: if True, don't close the fp, otherwise close it
- :param disk_chunk_Size: size of chunks on file reads
- """
-
- def __init__(self, path, device, partition, account, container, logger,
- uid=DEFAULT_UID, gid=DEFAULT_GID, fs_object=None):
- self.root = path
- device = account
- if container:
- self.name = container
- else:
- self.name = None
- if self.name:
- self.datadir = os.path.join(path, account, self.name)
- else:
- self.datadir = os.path.join(path, device)
- self.account = account
- self.device_path = os.path.join(path, device)
- if not check_mount(path, device):
- check_valid_account(account, fs_object)
- self.logger = logger
- self.metadata = {}
- self.uid = int(uid)
- self.gid = int(gid)
- # Create a dummy db_file in /etc/swift
- self.db_file = '/etc/swift/db_file.db'
- if not os.path.exists(self.db_file):
- file(self.db_file, 'w+')
- self.dir_exists = os.path.exists(self.datadir)
- if self.dir_exists:
- try:
- self.metadata = read_metadata(self.datadir)
- except EOFError:
- create_container_metadata(self.datadir)
- else:
- return
- if container:
- if not self.metadata:
- create_container_metadata(self.datadir)
- self.metadata = read_metadata(self.datadir)
- else:
- if not validate_container(self.metadata):
- create_container_metadata(self.datadir)
- self.metadata = read_metadata(self.datadir)
- else:
- if not self.metadata:
- create_account_metadata(self.datadir)
- self.metadata = read_metadata(self.datadir)
- else:
- if not validate_account(self.metadata):
- create_account_metadata(self.datadir)
- self.metadata = read_metadata(self.datadir)
-
- def empty(self):
- return dir_empty(self.datadir)
-
- def delete(self):
- if self.empty():
- #For delete account.
- if os.path.ismount(self.datadir):
- clean_metadata(self.datadir)
- else:
- rmdirs(self.datadir)
- self.dir_exists = False
-
-
- def put_metadata(self, metadata):
- """
- Write metadata to directory/container.
- """
- write_metadata(self.datadir, metadata)
- self.metadata = metadata
-
- def put(self, metadata):
- """
- Create and write metatdata to directory/container.
- :param metadata: Metadata to write.
- """
- if not self.dir_exists:
- mkdirs(self.datadir)
-
- os.chown(self.datadir, self.uid, self.gid)
- write_metadata(self.datadir, metadata)
- self.metadata = metadata
- self.dir_exists = True
-
- def put_obj(self, content_length, timestamp):
- self.metadata[X_OBJECTS_COUNT] = int(self.metadata[X_OBJECTS_COUNT]) + 1
- self.metadata[X_PUT_TIMESTAMP] = timestamp
- self.metadata[X_BYTES_USED] = int(self.metadata[X_BYTES_USED]) + int(content_length)
- #TODO: define update_metadata instad of writing whole metadata again.
- self.put_metadata(self.metadata)
-
- def delete_obj(self, content_length):
- self.metadata[X_OBJECTS_COUNT] = int(self.metadata[X_OBJECTS_COUNT]) - 1
- self.metadata[X_BYTES_USED] = int(self.metadata[X_BYTES_USED]) - int(content_length)
- self.put_metadata(self.metadata)
-
- def put_container(self, container, put_timestamp, del_timestamp, object_count, bytes_used):
- """
- For account server.
- """
- self.metadata[X_OBJECTS_COUNT] = 0
- self.metadata[X_BYTES_USED] = 0
- self.metadata[X_CONTAINER_COUNT] = int(self.metadata[X_CONTAINER_COUNT]) + 1
- self.metadata[X_PUT_TIMESTAMP] = 1
- self.put_metadata(self.metadata)
-
- def delete_container(self, object_count, bytes_used):
- """
- For account server.
- """
- self.metadata[X_OBJECTS_COUNT] = 0
- self.metadata[X_BYTES_USED] = 0
- self.metadata[X_CONTAINER_COUNT] = int(self.metadata[X_CONTAINER_COUNT]) - 1
- self.put_metadata(self.metadata)
-
- def unlink(self):
- """
- Remove directory/container if empty.
- """
- if dir_empty(self.datadir):
- rmdirs(self.datadir)
-
- def list_objects_iter(self, limit, marker, end_marker,
- prefix, delimiter, path):
- """
- Returns tuple of name, created_at, size, content_type, etag.
- """
- if path:
- prefix = path = path.rstrip('/') + '/'
- delimiter = '/'
- if delimiter and not prefix:
- prefix = ''
-
- objects = []
- object_count = 0
- bytes_used = 0
- container_list = []
-
- objects, object_count, bytes_used = get_container_details(self.datadir)
-
- if int(self.metadata[X_OBJECTS_COUNT]) != object_count or \
- int(self.metadata[X_BYTES_USED]) != bytes_used:
- self.metadata[X_OBJECTS_COUNT] = object_count
- self.metadata[X_BYTES_USED] = bytes_used
- self.update_container(self.metadata)
-
- if objects:
- objects.sort()
-
- if objects and prefix:
- objects = self.filter_prefix(objects, prefix)
-
- if objects and delimiter:
- objects = self.filter_delimiter(objects, delimiter, prefix)
-
- if objects and marker:
- objects = self.filter_marker(objects, marker)
-
- if objects and end_marker:
- objects = self.filter_end_marker(objects, end_marker)
-
- if objects and limit:
- if len(objects) > limit:
- objects = self.filter_limit(objects, limit)
-
- if objects:
- for obj in objects:
- list_item = []
- list_item.append(obj)
- metadata = read_metadata(self.datadir + '/' + obj)
- if not metadata or not validate_object(metadata):
- metadata = create_object_metadata(self.datadir + '/' + obj)
- if metadata:
- list_item.append(metadata[X_TIMESTAMP])
- list_item.append(int(metadata[X_CONTENT_LENGTH]))
- list_item.append(metadata[X_CONTENT_TYPE])
- list_item.append(metadata[X_ETAG])
- container_list.append(list_item)
-
- return container_list
-
- def update_container(self, metadata):
- cont_path = self.datadir
- write_metadata(cont_path, metadata)
- self.metadata = metadata
-
- def update_object_count(self):
- objects = []
- object_count = 0
- bytes_used = 0
- objects, object_count, bytes_used = get_container_details(self.datadir)
-
-
- if int(self.metadata[X_OBJECTS_COUNT]) != object_count or \
- int(self.metadata[X_BYTES_USED]) != bytes_used:
- self.metadata[X_OBJECTS_COUNT] = object_count
- self.metadata[X_BYTES_USED] = bytes_used
- self.update_container(self.metadata)
-
- def update_container_count(self):
- containers = []
- container_count = 0
-
- containers, container_count = get_account_details(self.datadir)
-
- if int(self.metadata[X_CONTAINER_COUNT]) != container_count:
- self.metadata[X_CONTAINER_COUNT] = container_count
- self.update_account(self.metadata)
-
- def get_info(self, include_metadata=False):
- """
- Get global data for the container.
- :returns: dict with keys: account, container, created_at,
- put_timestamp, delete_timestamp, object_count, bytes_used,
- reported_put_timestamp, reported_delete_timestamp,
- reported_object_count, reported_bytes_used, hash, id,
- x_container_sync_point1, and x_container_sync_point2.
- If include_metadata is set, metadata is included as a key
- pointing to a dict of tuples of the metadata
- """
- # TODO: delete_timestamp, reported_put_timestamp
- # reported_delete_timestamp, reported_object_count,
- # reported_bytes_used, created_at
-
- metadata = {}
- if os.path.exists(self.datadir):
- metadata = read_metadata(self.datadir)
-
- data = {'account' : self.account, 'container' : self.name,
- 'object_count' : metadata.get(X_OBJECTS_COUNT, '0'),
- 'bytes_used' : metadata.get(X_BYTES_USED, '0'),
- 'hash': '', 'id' : '', 'created_at' : '1',
- 'put_timestamp' : metadata.get(X_PUT_TIMESTAMP, '0'),
- 'delete_timestamp' : '1',
- 'reported_put_timestamp' : '1', 'reported_delete_timestamp' : '1',
- 'reported_object_count' : '1', 'reported_bytes_used' : '1'}
- if include_metadata:
- data['metadata'] = metadata
- return data
-
- def put_object(self, name, timestamp, size, content_type,
- etag, deleted=0):
- # TODO: Implement the specifics of this func.
- pass
-
- def initialize(self, timestamp):
- pass
-
- def update_put_timestamp(self, timestamp):
- """
- Create the container if it doesn't exist and update the timestamp
- """
- if not os.path.exists(self.datadir):
- self.put(self.metadata)
-
- def delete_object(self, name, timestamp):
- # TODO: Implement the delete object
- pass
-
- def delete_db(self, timestamp):
- """
- Delete the container
- """
- self.unlink()
-
- def update_metadata(self, metadata):
- self.metadata.update(metadata)
- write_metadata(self.datadir, self.metadata)
-
-
-class DiskAccount(DiskDir):
- def __init__(self, root, account, fs_object = None):
- self.root = root
- self.account = account
- self.datadir = os.path.join(self.root, self.account)
- if not check_mount(root, account):
- check_valid_account(account, fs_object)
- self.metadata = read_metadata(self.datadir)
- if not self.metadata or not validate_account(self.metadata):
- self.metadata = create_account_metadata(self.datadir)
-
- def list_containers_iter(self, limit, marker, end_marker,
- prefix, delimiter):
- """
- Return tuple of name, object_count, bytes_used, 0(is_subdir).
- Used by account server.
- """
- if delimiter and not prefix:
- prefix = ''
- containers = []
- container_count = 0
- account_list = []
-
- containers, container_count = get_account_details(self.datadir)
-
- if int(self.metadata[X_CONTAINER_COUNT]) != container_count:
- self.metadata[X_CONTAINER_COUNT] = container_count
- self.update_account(self.metadata)
-
- if containers:
- containers.sort()
-
- if containers and prefix:
- containers = self.filter_prefix(containers, prefix)
-
- if containers and delimiter:
- containers = self.filter_delimiter(containers, delimiter, prefix)
-
- if containers and marker:
- containers = self.filter_marker(containers, marker)
-
- if containers and end_marker:
- containers = self.filter_end_marker(containers, end_marker)
-
- if containers and limit:
- if len(containers) > limit:
- containers = self.filter_limit(containers, limit)
-
- if containers:
- for cont in containers:
- list_item = []
- metadata = None
- list_item.append(cont)
- metadata = read_metadata(self.datadir + '/' + cont)
- if not metadata or not validate_container(metadata):
- metadata = create_container_metadata(self.datadir + '/' + cont)
-
- if metadata:
- list_item.append(metadata[X_OBJECTS_COUNT])
- list_item.append(metadata[X_BYTES_USED])
- list_item.append(0)
- account_list.append(list_item)
-
- return account_list
-
- def get_info(self, include_metadata=False):
- """
- Get global data for the account.
- :returns: dict with keys: account, created_at, put_timestamp,
- delete_timestamp, container_count, object_count,
- bytes_used, hash, id
- """
- metadata = {}
- if (os.path.exists(self.datadir)):
- metadata = read_metadata(self.datadir)
- if not metadata:
- metadata = create_account_metadata(self.datadir)
-
- data = {'account' : self.account, 'created_at' : '1',
- 'put_timestamp' : '1', 'delete_timestamp' : '1',
- 'container_count' : metadata.get(X_CONTAINER_COUNT, 0),
- 'object_count' : metadata.get(X_OBJECTS_COUNT, 0),
- 'bytes_used' : metadata.get(X_BYTES_USED, 0),
- 'hash' : '', 'id' : ''}
-
- if include_metadata:
- data['metadata'] = metadata
- return data
diff --git a/swift/1.4.8/plugins/DiskFile.py b/swift/1.4.8/plugins/DiskFile.py
deleted file mode 100644
index 6f77eaaa57a..00000000000
--- a/swift/1.4.8/plugins/DiskFile.py
+++ /dev/null
@@ -1,316 +0,0 @@
-# Copyright (c) 2011 Red Hat, Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-# implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-from eventlet import tpool
-from tempfile import mkstemp
-from contextlib import contextmanager
-from swift.common.utils import normalize_timestamp, renamer
-from swift.plugins.utils import mkdirs, rmdirs, validate_object, \
- check_valid_account, create_object_metadata, do_open, \
- do_close, do_unlink, do_chown, do_stat, do_listdir, read_metadata,\
- write_metadata
-from swift.common.constraints import check_mount
-from swift.plugins.utils import X_CONTENT_TYPE, X_CONTENT_LENGTH, X_TIMESTAMP,\
- X_PUT_TIMESTAMP, X_TYPE, X_ETAG, X_OBJECTS_COUNT, X_BYTES_USED, \
- X_OBJECT_TYPE, FILE, DIR, MARKER_DIR, OBJECT, \
- DIR_TYPE, FILE_TYPE, DEFAULT_UID, DEFAULT_GID
-
-import logging
-from swift.obj.server import DiskFile
-
-
-DATADIR = 'objects'
-ASYNCDIR = 'async_pending'
-KEEP_CACHE_SIZE = (5 * 1024 * 1024)
-# keep these lower-case
-DISALLOWED_HEADERS = set('content-length content-type deleted etag'.split())
-
-
-class Gluster_DiskFile(DiskFile):
- """
- Manage object files on disk.
-
- :param path: path to devices on the node/mount path for UFO.
- :param device: device name/account_name for UFO.
- :param partition: partition on the device the object lives in
- :param account: account name for the object
- :param container: container name for the object
- :param obj: object name for the object
- :param keep_data_fp: if True, don't close the fp, otherwise close it
- :param disk_chunk_Size: size of chunks on file reads
- """
-
- def __init__(self, path, device, partition, account, container, obj,
- logger, keep_data_fp=False, disk_chunk_size=65536,
- uid=DEFAULT_UID, gid=DEFAULT_GID, fs_object = None):
- self.disk_chunk_size = disk_chunk_size
- device = account
- #Don't support obj_name ending/begining with '/', like /a, a/, /a/b/ etc
- obj = obj.strip('/')
- if '/' in obj:
- self.obj_path, self.obj = obj.rsplit('/', 1)
- else:
- self.obj_path = ''
- self.obj = obj
-
- if self.obj_path:
- self.name = '/'.join((container, self.obj_path))
- else:
- self.name = container
- #Absolute path for obj directory.
- self.datadir = os.path.join(path, device, self.name)
-
- self.device_path = os.path.join(path, device)
- if not check_mount(path, device):
- check_valid_account(account, fs_object)
-
- self.container_path = os.path.join(path, device, container)
- self.tmpdir = os.path.join(path, device, 'tmp')
- self.logger = logger
- self.metadata = {}
- self.meta_file = None
- self.data_file = None
- self.fp = None
- self.iter_etag = None
- self.started_at_0 = False
- self.read_to_eof = False
- self.quarantined_dir = None
- self.keep_cache = False
- self.is_dir = False
- self.is_valid = True
- self.uid = int(uid)
- self.gid = int(gid)
- if not os.path.exists(self.datadir + '/' + self.obj):
- return
-
- self.data_file = os.path.join(self.datadir, self.obj)
- self.metadata = read_metadata(self.datadir + '/' + self.obj)
- if not self.metadata:
- create_object_metadata(self.datadir + '/' + self.obj)
- self.metadata = read_metadata(self.datadir + '/' + self.obj)
-
- if not validate_object(self.metadata):
- create_object_metadata(self.datadir + '/' + self.obj)
- self.metadata = read_metadata(self.datadir + '/' +
- self.obj)
-
- self.filter_metadata()
-
- if os.path.isdir(self.datadir + '/' + self.obj):
- self.is_dir = True
- else:
- self.fp = do_open(self.data_file, 'rb')
- if not keep_data_fp:
- self.close(verify_file=False)
-
- def close(self, verify_file=True):
- """
- Close the file. Will handle quarantining file if necessary.
-
- :param verify_file: Defaults to True. If false, will not check
- file to see if it needs quarantining.
- """
- #Marker directory
- if self.is_dir:
- return
- if self.fp:
- do_close(self.fp)
- self.fp = None
-
- def is_deleted(self):
- """
- Check if the file is deleted.
-
- :returns: True if the file doesn't exist or has been flagged as
- deleted.
- """
- return not self.data_file
-
- def create_dir_object(self, dir_path):
- #TODO: if object already exists???
- if os.path.exists(dir_path) and not os.path.isdir(dir_path):
- self.logger.error("Deleting file %s", dir_path)
- do_unlink(dir_path)
- #If dir aleady exist just override metadata.
- mkdirs(dir_path)
- do_chown(dir_path, self.uid, self.gid)
- create_object_metadata(dir_path)
- return True
-
-
-
- def put_metadata(self, metadata):
- obj_path = self.datadir + '/' + self.obj
- write_metadata(obj_path, metadata)
- self.metadata = metadata
-
-
- def put(self, fd, tmppath, metadata, extension=''):
- """
- Finalize writing the file on disk, and renames it from the temp file to
- the real location. This should be called after the data has been
- written to the temp file.
-
- :params fd: file descriptor of the temp file
- :param tmppath: path to the temporary file being used
- :param metadata: dictionary of metadata to be written
- :param extention: extension to be used when making the file
- """
- #Marker dir.
- if extension == '.ts':
- return True
- if extension == '.meta':
- self.put_metadata(metadata)
- return True
- else:
- extension = ''
- if metadata[X_OBJECT_TYPE] == MARKER_DIR:
- self.create_dir_object(os.path.join(self.datadir, self.obj))
- self.put_metadata(metadata)
- self.data_file = self.datadir + '/' + self.obj
- return True
- #Check if directory already exists.
- if self.is_dir:
- self.logger.error('Directory already exists %s/%s' % \
- (self.datadir , self.obj))
- return False
- #metadata['name'] = self.name
- timestamp = normalize_timestamp(metadata[X_TIMESTAMP])
- write_metadata(tmppath, metadata)
- if X_CONTENT_LENGTH in metadata:
- self.drop_cache(fd, 0, int(metadata[X_CONTENT_LENGTH]))
- tpool.execute(os.fsync, fd)
- if self.obj_path:
- dir_objs = self.obj_path.split('/')
- tmp_path = ''
- if len(dir_objs):
- for dir_name in dir_objs:
- if tmp_path:
- tmp_path = tmp_path + '/' + dir_name
- else:
- tmp_path = dir_name
- if not self.create_dir_object(os.path.join(self.container_path,
- tmp_path)):
- self.logger.error("Failed in subdir %s",\
- os.path.join(self.container_path,tmp_path))
- return False
-
- renamer(tmppath, os.path.join(self.datadir,
- self.obj + extension))
- do_chown(os.path.join(self.datadir, self.obj + extension), \
- self.uid, self.gid)
- self.metadata = metadata
- #self.logger.error("Meta %s", self.metadata)
- self.data_file = self.datadir + '/' + self.obj + extension
- return True
-
-
- def unlinkold(self, timestamp):
- """
- Remove any older versions of the object file. Any file that has an
- older timestamp than timestamp will be deleted.
-
- :param timestamp: timestamp to compare with each file
- """
- if self.metadata and self.metadata['X-Timestamp'] != timestamp:
- self.unlink()
-
- def unlink(self):
- """
- Remove the file.
- """
- #Marker dir.
- if self.is_dir:
- rmdirs(os.path.join(self.datadir, self.obj))
- if not os.path.isdir(os.path.join(self.datadir, self.obj)):
- self.metadata = {}
- self.data_file = None
- else:
- logging.error('Unable to delete dir %s' % os.path.join(self.datadir, self.obj))
- return
-
- for fname in do_listdir(self.datadir):
- if fname == self.obj:
- try:
- do_unlink(os.path.join(self.datadir, fname))
- except OSError, err:
- if err.errno != errno.ENOENT:
- raise
-
- #Remove entire path for object.
- #remove_dir_path(self.obj_path, self.container_path)
-
- self.metadata = {}
- self.data_file = None
-
- def get_data_file_size(self):
- """
- Returns the os.path.getsize for the file. Raises an exception if this
- file does not match the Content-Length stored in the metadata. Or if
- self.data_file does not exist.
-
- :returns: file size as an int
- :raises DiskFileError: on file size mismatch.
- :raises DiskFileNotExist: on file not existing (including deleted)
- """
- #Marker directory.
- if self.is_dir:
- return 0
- try:
- file_size = 0
- if self.data_file:
- file_size = os.path.getsize(self.data_file)
- if X_CONTENT_LENGTH in self.metadata:
- metadata_size = int(self.metadata[X_CONTENT_LENGTH])
- if file_size != metadata_size:
- self.metadata[X_CONTENT_LENGTH] = file_size
- self.update_object(self.metadata)
-
- return file_size
- except OSError, err:
- if err.errno != errno.ENOENT:
- raise
- raise DiskFileNotExist('Data File does not exist.')
-
- def update_object(self, metadata):
- obj_path = self.datadir + '/' + self.obj
- write_metadata(obj_path, metadata)
- self.metadata = metadata
-
- def filter_metadata(self):
- if X_TYPE in self.metadata:
- self.metadata.pop(X_TYPE)
- if X_OBJECT_TYPE in self.metadata:
- self.metadata.pop(X_OBJECT_TYPE)
-
- @contextmanager
- def mkstemp(self):
- """Contextmanager to make a temporary file."""
-
- if not os.path.exists(self.tmpdir):
- mkdirs(self.tmpdir)
- fd, tmppath = mkstemp(dir=self.tmpdir)
- try:
- yield fd, tmppath
- finally:
- try:
- os.close(fd)
- except OSError:
- pass
- try:
- os.unlink(tmppath)
- except OSError:
- pass
diff --git a/swift/1.4.8/plugins/Glusterfs.py b/swift/1.4.8/plugins/Glusterfs.py
deleted file mode 100644
index 5e191e1bd8f..00000000000
--- a/swift/1.4.8/plugins/Glusterfs.py
+++ /dev/null
@@ -1,131 +0,0 @@
-# Copyright (c) 2011 Red Hat, Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-# implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import logging
-import os, fcntl, time
-from ConfigParser import ConfigParser
-from swift.common.utils import TRUE_VALUES
-from hashlib import md5
-from swift.plugins.utils import mkdirs
-
-class Glusterfs(object):
- def __init__(self):
- self.name = 'glusterfs'
- self.fs_conf = ConfigParser()
- self.fs_conf.read(os.path.join('/etc/swift', 'fs.conf'))
- self.mount_path = self.fs_conf.get('DEFAULT', 'mount_path', '/mnt/gluster-object')
- self.auth_account = self.fs_conf.get('DEFAULT', 'auth_account', 'auth')
- self.mount_ip = self.fs_conf.get('DEFAULT', 'mount_ip', 'localhost')
- self.remote_cluster = self.fs_conf.get('DEFAULT', 'remote_cluster', False) in TRUE_VALUES
- self.object_only = self.fs_conf.get('DEFAULT', 'object_only', "no") in TRUE_VALUES
-
- def busy_wait(self, mount_path):
- # Iterate for definite number of time over a given
- # interval for successful mount
- for i in range(0, 5):
- if os.path.ismount(os.path.join(mount_path)):
- return True
- time.sleep(2)
- return False
-
- def mount(self, account):
- mount_path = os.path.join(self.mount_path, account)
- export = self.get_export_from_account_id(account)
-
- pid_dir = "/var/lib/glusterd/vols/%s/run/" %export
- pid_file = os.path.join(pid_dir, 'swift.pid');
-
- if not os.path.exists(pid_dir):
- mkdirs(pid_dir)
-
- fd = os.open(pid_file, os.O_CREAT|os.O_RDWR)
- with os.fdopen(fd, 'r+b') as f:
- try:
- fcntl.lockf(f, fcntl.LOCK_EX|fcntl.LOCK_NB)
- except:
- ex = sys.exc_info()[1]
- if isinstance(ex, IOError) and ex.errno in (EACCES, EAGAIN):
- # This means that some other process is mounting the
- # filesystem, so wait for the mount process to complete
- return self.busy_wait(mount_path)
-
- mnt_cmd = 'mount -t glusterfs %s:%s %s' % (self.mount_ip, export, \
- mount_path)
- if os.system(mnt_cmd) or not self.busy_wait(mount_path):
- raise Exception('Mount failed %s: %s' % (self.name, mnt_cmd))
- return False
- return True
-
- def unmount(self, mount_path):
- umnt_cmd = 'umount %s 2>> /dev/null' % mount_path
- if os.system(umnt_cmd):
- logging.error('Unable to unmount %s %s' % (mount_path, self.name))
-
- def get_export_list_local(self):
- export_list = []
- cmnd = 'gluster volume info'
-
- if os.system(cmnd + ' >> /dev/null'):
- raise Exception('Getting volume failed %s', self.name)
- return export_list
-
- fp = os.popen(cmnd)
- while True:
- item = fp.readline()
- if not item:
- break
- item = item.strip('\n').strip(' ')
- if item.lower().startswith('volume name:'):
- export_list.append(item.split(':')[1].strip(' '))
-
- return export_list
-
-
- def get_export_list_remote(self):
- export_list = []
- cmnd = 'ssh %s gluster volume info' % self.mount_ip
-
- if os.system(cmnd + ' >> /dev/null'):
- raise Exception('Getting volume info failed %s, make sure to have \
- passwordless ssh on %s', self.name, self.mount_ip)
- return export_list
-
- fp = os.popen(cmnd)
- while True:
- item = fp.readline()
- if not item:
- break
- item = item.strip('\n').strip(' ')
- if item.lower().startswith('volume name:'):
- export_list.append(item.split(':')[1].strip(' '))
-
- return export_list
-
- def get_export_list(self):
- if self.remote_cluster:
- return self.get_export_list_remote()
- else:
- return self.get_export_list_local()
-
- def get_export_from_account_id(self, account):
- if not account:
- print 'account is none, returning'
- raise AttributeError
-
- for export in self.get_export_list():
- if account == 'AUTH_' + export:
- return export
-
- raise Exception('No export found %s %s' % (account, self.name))
- return None
diff --git a/swift/1.4.8/plugins/__init__.py b/swift/1.4.8/plugins/__init__.py
deleted file mode 100644
index 3d98c960c14..00000000000
--- a/swift/1.4.8/plugins/__init__.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# Copyright (c) 2011 Red Hat, Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-# implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from Glusterfs import Glusterfs
diff --git a/swift/1.4.8/plugins/conf/account-server/1.conf b/swift/1.4.8/plugins/conf/account-server/1.conf
deleted file mode 100644
index 54cbf654003..00000000000
--- a/swift/1.4.8/plugins/conf/account-server/1.conf
+++ /dev/null
@@ -1,22 +0,0 @@
-[DEFAULT]
-devices = /srv/1/node
-mount_check = false
-bind_port = 6012
-user = root
-log_facility = LOG_LOCAL2
-
-[pipeline:main]
-pipeline = gluster account-server
-
-[app:account-server]
-use = egg:swift#account
-
-[filter:gluster]
-use = egg:swift#gluster
-
-[account-replicator]
-vm_test_mode = yes
-
-[account-auditor]
-
-[account-reaper]
diff --git a/swift/1.4.8/plugins/conf/account.builder b/swift/1.4.8/plugins/conf/account.builder
deleted file mode 100644
index 2943b9cfb49..00000000000
--- a/swift/1.4.8/plugins/conf/account.builder
+++ /dev/null
Binary files differ
diff --git a/swift/1.4.8/plugins/conf/account.ring.gz b/swift/1.4.8/plugins/conf/account.ring.gz
deleted file mode 100644
index e1a5e6ae25e..00000000000
--- a/swift/1.4.8/plugins/conf/account.ring.gz
+++ /dev/null
Binary files differ
diff --git a/swift/1.4.8/plugins/conf/container-server/1.conf b/swift/1.4.8/plugins/conf/container-server/1.conf
deleted file mode 100644
index 9e776838f58..00000000000
--- a/swift/1.4.8/plugins/conf/container-server/1.conf
+++ /dev/null
@@ -1,24 +0,0 @@
-[DEFAULT]
-devices = /srv/1/node
-mount_check = false
-bind_port = 6011
-user = root
-log_facility = LOG_LOCAL2
-
-[pipeline:main]
-pipeline = gluster container-server
-
-[app:container-server]
-use = egg:swift#container
-
-[filter:gluster]
-use = egg:swift#gluster
-
-[container-replicator]
-vm_test_mode = yes
-
-[container-updater]
-
-[container-auditor]
-
-[container-sync]
diff --git a/swift/1.4.8/plugins/conf/container.builder b/swift/1.4.8/plugins/conf/container.builder
deleted file mode 100644
index 6031d79df60..00000000000
--- a/swift/1.4.8/plugins/conf/container.builder
+++ /dev/null
Binary files differ
diff --git a/swift/1.4.8/plugins/conf/container.ring.gz b/swift/1.4.8/plugins/conf/container.ring.gz
deleted file mode 100644
index fdbcb18b2bf..00000000000
--- a/swift/1.4.8/plugins/conf/container.ring.gz
+++ /dev/null
Binary files differ
diff --git a/swift/1.4.8/plugins/conf/fs.conf b/swift/1.4.8/plugins/conf/fs.conf
deleted file mode 100644
index b6ec5121f9f..00000000000
--- a/swift/1.4.8/plugins/conf/fs.conf
+++ /dev/null
@@ -1,9 +0,0 @@
-[DEFAULT]
-mount_path = /mnt/gluster-object
-auth_account = auth
-#ip of the fs server.
-mount_ip = localhost
-#fs server need not be local, remote server can also be used,
-#set remote_cluster=yes for using remote server.
-remote_cluster = no
-object_only = no \ No newline at end of file
diff --git a/swift/1.4.8/plugins/conf/object-server/1.conf b/swift/1.4.8/plugins/conf/object-server/1.conf
deleted file mode 100644
index f191cefcf01..00000000000
--- a/swift/1.4.8/plugins/conf/object-server/1.conf
+++ /dev/null
@@ -1,22 +0,0 @@
-[DEFAULT]
-devices = /srv/1/node
-mount_check = false
-bind_port = 6010
-user = root
-log_facility = LOG_LOCAL2
-
-[pipeline:main]
-pipeline = gluster object-server
-
-[app:object-server]
-use = egg:swift#object
-
-[filter:gluster]
-use = egg:swift#gluster
-
-[object-replicator]
-vm_test_mode = yes
-
-[object-updater]
-
-[object-auditor]
diff --git a/swift/1.4.8/plugins/conf/object.builder b/swift/1.4.8/plugins/conf/object.builder
deleted file mode 100644
index ce45350595d..00000000000
--- a/swift/1.4.8/plugins/conf/object.builder
+++ /dev/null
Binary files differ
diff --git a/swift/1.4.8/plugins/conf/object.ring.gz b/swift/1.4.8/plugins/conf/object.ring.gz
deleted file mode 100644
index 73e88d58918..00000000000
--- a/swift/1.4.8/plugins/conf/object.ring.gz
+++ /dev/null
Binary files differ
diff --git a/swift/1.4.8/plugins/conf/proxy-server.conf b/swift/1.4.8/plugins/conf/proxy-server.conf
deleted file mode 100644
index 1fcde8e0d3a..00000000000
--- a/swift/1.4.8/plugins/conf/proxy-server.conf
+++ /dev/null
@@ -1,21 +0,0 @@
-[DEFAULT]
-bind_port = 8080
-user = root
-log_facility = LOG_LOCAL1
-
-[pipeline:main]
-pipeline = healthcheck cache tempauth proxy-server
-
-[app:proxy-server]
-use = egg:swift#proxy
-allow_account_management = true
-account_autocreate = true
-
-[filter:tempauth]
-use = egg:swift#tempauth
-
-[filter:healthcheck]
-use = egg:swift#healthcheck
-
-[filter:cache]
-use = egg:swift#memcache
diff --git a/swift/1.4.8/plugins/conf/swift.conf b/swift/1.4.8/plugins/conf/swift.conf
deleted file mode 100644
index f9864e35231..00000000000
--- a/swift/1.4.8/plugins/conf/swift.conf
+++ /dev/null
@@ -1,7 +0,0 @@
-[DEFAULT]
-Enable_plugin = yes
-
-[swift-hash]
-# random unique string that can never change (DO NOT LOSE)
-swift_hash_path_suffix = gluster
-
diff --git a/swift/1.4.8/plugins/constraints.py b/swift/1.4.8/plugins/constraints.py
deleted file mode 100644
index 6be853629fb..00000000000
--- a/swift/1.4.8/plugins/constraints.py
+++ /dev/null
@@ -1,97 +0,0 @@
-# Copyright (c) 2011 Red Hat, Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-# implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import logging
-from swift.common.constraints import check_utf8, check_metadata
-
-from webob.exc import HTTPBadRequest, HTTPLengthRequired, \
- HTTPRequestEntityTooLarge
-
-
-#: Max file size allowed for objects
-MAX_FILE_SIZE = 0xffffffffffffffff
-#: Max length of the name of a key for metadata
-MAX_META_NAME_LENGTH = 128
-#: Max length of the value of a key for metadata
-MAX_META_VALUE_LENGTH = 256
-#: Max number of metadata items
-MAX_META_COUNT = 90
-#: Max overall size of metadata
-MAX_META_OVERALL_SIZE = 4096
-#: Max object name length
-MAX_OBJECT_NAME_LENGTH = 255
-#: Max object list length of a get request for a container
-CONTAINER_LISTING_LIMIT = 10000
-#: Max container list length of a get request for an account
-ACCOUNT_LISTING_LIMIT = 10000
-MAX_ACCOUNT_NAME_LENGTH = 255
-MAX_CONTAINER_NAME_LENGTH = 255
-
-def validate_obj_name(obj):
- if len(obj) > MAX_OBJECT_NAME_LENGTH:
- logging.error('Object name too long %s' % obj)
- return False
- if obj == '.' or obj == '..':
- logging.error('Object name cannot be . or .. %s' % obj)
- return False
-
- return True
-
-def check_object_creation(req, object_name):
- """
- Check to ensure that everything is alright about an object to be created.
-
- :param req: HTTP request object
- :param object_name: name of object to be created
- :raises HTTPRequestEntityTooLarge: the object is too large
- :raises HTTPLengthRequered: missing content-length header and not
- a chunked request
- :raises HTTPBadRequest: missing or bad content-type header, or
- bad metadata
- """
- if req.content_length and req.content_length > MAX_FILE_SIZE:
- return HTTPRequestEntityTooLarge(body='Your request is too large.',
- request=req, content_type='text/plain')
- if req.content_length is None and \
- req.headers.get('transfer-encoding') != 'chunked':
- return HTTPLengthRequired(request=req)
- if 'X-Copy-From' in req.headers and req.content_length:
- return HTTPBadRequest(body='Copy requests require a zero byte body',
- request=req, content_type='text/plain')
- for obj in object_name.split('/'):
- if not validate_obj_name(obj):
- return HTTPBadRequest(body='Invalid object name %s' %
- (obj), request=req,
- content_type='text/plain')
- if 'Content-Type' not in req.headers:
- return HTTPBadRequest(request=req, content_type='text/plain',
- body='No content type')
- if not check_utf8(req.headers['Content-Type']):
- return HTTPBadRequest(request=req, body='Invalid Content-Type',
- content_type='text/plain')
- if 'x-object-manifest' in req.headers:
- value = req.headers['x-object-manifest']
- container = prefix = None
- try:
- container, prefix = value.split('/', 1)
- except ValueError:
- pass
- if not container or not prefix or '?' in value or '&' in value or \
- prefix[0] == '/':
- return HTTPBadRequest(request=req,
- body='X-Object-Manifest must in the format container/prefix')
- return check_metadata(req, 'object')
-
diff --git a/swift/1.4.8/plugins/utils.py b/swift/1.4.8/plugins/utils.py
deleted file mode 100644
index d8fc7a43615..00000000000
--- a/swift/1.4.8/plugins/utils.py
+++ /dev/null
@@ -1,679 +0,0 @@
-# Copyright (c) 2011 Red Hat, Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-# implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-import os
-import errno
-from hashlib import md5
-from swift.common.utils import normalize_timestamp
-from xattr import setxattr, removexattr, getxattr, removexattr
-import cPickle as pickle
-
-X_CONTENT_TYPE = 'Content-Type'
-X_CONTENT_LENGTH = 'Content-Length'
-X_TIMESTAMP = 'X-Timestamp'
-X_PUT_TIMESTAMP = 'X-PUT-Timestamp'
-X_TYPE = 'X-Type'
-X_ETAG = 'ETag'
-X_OBJECTS_COUNT = 'X-Object-Count'
-X_BYTES_USED = 'X-Bytes-Used'
-X_CONTAINER_COUNT = 'X-Container-Count'
-X_OBJECT_TYPE = 'X-Object-Type'
-DIR_TYPE = 'application/directory'
-ACCOUNT = 'Account'
-MOUNT_PATH = '/mnt/gluster-object'
-METADATA_KEY = 'user.swift.metadata'
-CONTAINER = 'container'
-DIR = 'dir'
-MARKER_DIR = 'marker_dir'
-FILE = 'file'
-DIR_TYPE = 'application/directory'
-FILE_TYPE = 'application/octet-stream'
-OBJECT = 'Object'
-OBJECT_TYPE = 'application/octet-stream'
-DEFAULT_UID = -1
-DEFAULT_GID = -1
-PICKLE_PROTOCOL = 2
-CHUNK_SIZE = 65536
-
-
-def mkdirs(path):
- """
- Ensures the path is a directory or makes it if not. Errors if the path
- exists but is a file or on permissions failure.
-
- :param path: path to create
- """
- if not os.path.isdir(path):
- try:
- do_makedirs(path)
- except OSError, err:
- #TODO: check, isdir will fail if mounted and volume stopped.
- #if err.errno != errno.EEXIST or not os.path.isdir(path)
- if err.errno != errno.EEXIST:
- raise
-
-def rmdirs(path):
- if os.path.isdir(path) and dir_empty(path):
- do_rmdir(path)
- else:
- logging.error("rmdirs failed dir may not be empty or not valid dir")
- return False
-
-def strip_obj_storage_path(path, string='/mnt/gluster-object'):
- """
- strip /mnt/gluster-object
- """
- return path.replace(string, '').strip('/')
-
-def do_mkdir(path):
- try:
- os.mkdir(path)
- except Exception, err:
- logging.exception("Mkdir failed on %s err: %s", path, str(err))
- if err.errno != errno.EEXIST:
- raise
- return True
-
-def do_makedirs(path):
- try:
- os.makedirs(path)
- except Exception, err:
- logging.exception("Makedirs failed on %s err: %s", path, str(err))
- if err.errno != errno.EEXIST:
- raise
- return True
-
-
-def do_listdir(path):
- try:
- buf = os.listdir(path)
- except Exception, err:
- logging.exception("Listdir failed on %s err: %s", path, str(err))
- raise
- return buf
-
-def do_chown(path, uid, gid):
- try:
- os.chown(path, uid, gid)
- except Exception, err:
- logging.exception("Chown failed on %s err: %s", path, str(err))
- raise
- return True
-
-def do_stat(path):
- try:
- #Check for fd.
- if isinstance(path, int):
- buf = os.fstat(path)
- else:
- buf = os.stat(path)
- except Exception, err:
- logging.exception("Stat failed on %s err: %s", path, str(err))
- raise
-
- return buf
-
-def do_open(path, mode):
- try:
- fd = open(path, mode)
- except Exception, err:
- logging.exception("Open failed on %s err: %s", path, str(err))
- raise
- return fd
-
-def do_close(fd):
- #fd could be file or int type.
- try:
- if isinstance(fd, int):
- os.close(fd)
- else:
- fd.close()
- except Exception, err:
- logging.exception("Close failed on %s err: %s", fd, str(err))
- raise
- return True
-
-def do_unlink(path, log = True):
- try:
- os.unlink(path)
- except Exception, err:
- if log:
- logging.exception("Unlink failed on %s err: %s", path, str(err))
- if err.errno != errno.ENOENT:
- raise
- return True
-
-def do_rmdir(path):
- try:
- os.rmdir(path)
- except Exception, err:
- logging.exception("Rmdir failed on %s err: %s", path, str(err))
- if err.errno != errno.ENOENT:
- raise
- return True
-
-def do_rename(old_path, new_path):
- try:
- os.rename(old_path, new_path)
- except Exception, err:
- logging.exception("Rename failed on %s to %s err: %s", old_path, new_path, \
- str(err))
- raise
- return True
-
-def do_setxattr(path, key, value):
- fd = None
- if not os.path.isdir(path):
- fd = do_open(path, 'rb')
- else:
- fd = path
- if fd or os.path.isdir(path):
- try:
- setxattr(fd, key, value)
- except Exception, err:
- logging.exception("setxattr failed on %s key %s err: %s", path, key, str(err))
- raise
- finally:
- if fd and not os.path.isdir(path):
- do_close(fd)
- else:
- logging.error("Open failed path %s", path)
- return False
- return True
-
-
-
-def do_getxattr(path, key, log = True):
- fd = None
- if not os.path.isdir(path):
- fd = do_open(path, 'rb')
- else:
- fd = path
- if fd or os.path.isdir(path):
- try:
- value = getxattr(fd, key)
- except Exception, err:
- if log:
- logging.exception("getxattr failed on %s key %s err: %s", path, key, str(err))
- raise
- finally:
- if fd and not os.path.isdir(path):
- do_close(fd)
- else:
- logging.error("Open failed path %s", path)
- return False
- return value
-
-def do_removexattr(path, key):
- fd = None
- if not os.path.isdir(path):
- fd = do_open(path, 'rb')
- else:
- fd = path
- if fd or os.path.isdir(path):
- try:
- removexattr(fd, key)
- except Exception, err:
- logging.exception("removexattr failed on %s key %s err: %s", path, key, str(err))
- raise
- finally:
- if fd and not os.path.isdir(path):
- do_close(fd)
- else:
- logging.error("Open failed path %s", path)
- return False
- return True
-
-def read_metadata(path):
- """
- Helper function to read the pickled metadata from a File/Directory .
-
- :param path: File/Directory to read metadata from.
-
- :returns: dictionary of metadata
- """
- metadata = ''
- key = 0
- while True:
- try:
- metadata += do_getxattr(path, '%s%s' % (METADATA_KEY, (key or '')),
- log = False)
- except Exception:
- break
- key += 1
- if metadata:
- return pickle.loads(metadata)
- else:
- return {}
-
-
-def write_metadata(path, metadata):
- """
- Helper function to write pickled metadata for a File/Directory.
-
- :param path: File/Directory path to write the metadata
- :param metadata: metadata to write
- """
- metastr = pickle.dumps(metadata, PICKLE_PROTOCOL)
- key = 0
- while metastr:
- do_setxattr(path, '%s%s' % (METADATA_KEY, key or ''), metastr[:254])
- metastr = metastr[254:]
- key += 1
-
-def clean_metadata(path):
- key = 0
- while True:
- value = do_getxattr(path, '%s%s' % (METADATA_KEY, (key or '')))
- do_removexattr(path, '%s%s' % (METADATA_KEY, (key or '')))
- key += 1
-
-
-def dir_empty(path):
- """
- Return true if directory/container is empty.
- :param path: Directory path.
- :returns: True/False.
- """
- if os.path.isdir(path):
- try:
- files = do_listdir(path)
- except Exception, err:
- logging.exception("listdir failed on %s err: %s", path, str(err))
- raise
- if not files:
- return True
- else:
- return False
- else:
- if not os.path.exists(path):
- return True
-
-
-def get_device_from_account(account):
- if account.startswith(RESELLER_PREFIX):
- device = account.replace(RESELLER_PREFIX, '', 1)
- return device
-
-def check_user_xattr(path):
- if not os.path.exists(path):
- return False
- do_setxattr(path, 'user.test.key1', 'value1')
- try:
- removexattr(path, 'user.test.key1')
- except Exception, err:
- logging.exception("removexattr failed on %s err: %s", path, str(err))
- #Remove xattr may fail in case of concurrent remove.
- return True
-
-
-def _check_valid_account(account, fs_object):
- mount_path = getattr(fs_object, 'mount_path', MOUNT_PATH)
-
- if os.path.ismount(os.path.join(mount_path, account)):
- return True
-
- if not check_account_exists(fs_object.get_export_from_account_id(account), fs_object):
- logging.error('Account not present %s', account)
- return False
-
- if not os.path.isdir(os.path.join(mount_path, account)):
- mkdirs(os.path.join(mount_path, account))
-
- if fs_object:
- if not fs_object.mount(account):
- return False
-
- return True
-
-def check_valid_account(account, fs_object):
- return _check_valid_account(account, fs_object)
-
-def validate_container(metadata):
- if not metadata:
- logging.error('No metadata')
- return False
-
- if X_TYPE not in metadata.keys() or \
- X_TIMESTAMP not in metadata.keys() or \
- X_PUT_TIMESTAMP not in metadata.keys() or \
- X_OBJECTS_COUNT not in metadata.keys() or \
- X_BYTES_USED not in metadata.keys():
- #logging.error('Container error %s' % metadata)
- return False
-
- if metadata[X_TYPE] == CONTAINER:
- return True
-
- logging.error('Container error %s' % metadata)
- return False
-
-def validate_account(metadata):
- if not metadata:
- logging.error('No metadata')
- return False
-
- if X_TYPE not in metadata.keys() or \
- X_TIMESTAMP not in metadata.keys() or \
- X_PUT_TIMESTAMP not in metadata.keys() or \
- X_OBJECTS_COUNT not in metadata.keys() or \
- X_BYTES_USED not in metadata.keys() or \
- X_CONTAINER_COUNT not in metadata.keys():
- #logging.error('Account error %s' % metadata)
- return False
-
- if metadata[X_TYPE] == ACCOUNT:
- return True
-
- logging.error('Account error %s' % metadata)
- return False
-
-def validate_object(metadata):
- if not metadata:
- logging.error('No metadata')
- return False
-
- if X_TIMESTAMP not in metadata.keys() or \
- X_CONTENT_TYPE not in metadata.keys() or \
- X_ETAG not in metadata.keys() or \
- X_CONTENT_LENGTH not in metadata.keys() or \
- X_TYPE not in metadata.keys() or \
- X_OBJECT_TYPE not in metadata.keys():
- #logging.error('Object error %s' % metadata)
- return False
-
- if metadata[X_TYPE] == OBJECT:
- return True
-
- logging.error('Object error %s' % metadata)
- return False
-
-def is_marker(metadata):
- if not metadata:
- logging.error('No metadata')
- return False
-
- if X_OBJECT_TYPE not in metadata.keys():
- logging.error('X_OBJECT_TYPE missing %s' % metadata)
- return False
-
- if metadata[X_OBJECT_TYPE] == MARKER_DIR:
- return True
- else:
- return False
-
-def _update_list(path, const_path, src_list, reg_file=True, object_count=0,
- bytes_used=0, obj_list=[]):
- obj_path = strip_obj_storage_path(path, const_path)
-
- for i in src_list:
- if obj_path:
- obj_list.append(os.path.join(obj_path, i))
- else:
- obj_list.append(i)
-
- object_count += 1
-
- if reg_file:
- bytes_used += os.path.getsize(path + '/' + i)
-
- return object_count, bytes_used
-
-def update_list(path, const_path, dirs=[], files=[], object_count=0,
- bytes_used=0, obj_list=[]):
- object_count, bytes_used = _update_list (path, const_path, files, True,
- object_count, bytes_used,
- obj_list)
- object_count, bytes_used = _update_list (path, const_path, dirs, False,
- object_count, bytes_used,
- obj_list)
- return object_count, bytes_used
-
-def get_container_details_from_fs(cont_path, const_path,
- memcache=None):
- """
- get container details by traversing the filesystem
- """
- bytes_used = 0
- object_count = 0
- obj_list=[]
- dir_list = []
-
- if os.path.isdir(cont_path):
- for (path, dirs, files) in os.walk(cont_path):
- object_count, bytes_used = update_list(path, const_path, dirs, files,
- object_count, bytes_used,
- obj_list)
-
- dir_list.append(path + ':' + str(do_stat(path).st_mtime))
-
- if memcache:
- memcache.set(strip_obj_storage_path(cont_path), obj_list)
- memcache.set(strip_obj_storage_path(cont_path) + '-dir_list',
- ','.join(dir_list))
- memcache.set(strip_obj_storage_path(cont_path) + '-cont_meta',
- [object_count, bytes_used])
-
- return obj_list, object_count, bytes_used
-
-def get_container_details_from_memcache(cont_path, const_path,
- memcache):
- """
- get container details stored in memcache
- """
-
- bytes_used = 0
- object_count = 0
- obj_list=[]
-
- dir_contents = memcache.get(strip_obj_storage_path(cont_path) + '-dir_list')
- if not dir_contents:
- return get_container_details_from_fs(cont_path, const_path,
- memcache=memcache)
-
- for i in dir_contents.split(','):
- path, mtime = i.split(':')
- if mtime != str(do_stat(path).st_mtime):
- return get_container_details_from_fs(cont_path, const_path,
- memcache=memcache)
-
- obj_list = memcache.get(strip_obj_storage_path(cont_path))
-
- object_count, bytes_used = memcache.get(strip_obj_storage_path(cont_path) + '-cont_meta')
-
- return obj_list, object_count, bytes_used
-
-def get_container_details(cont_path, memcache=None):
- """
- Return object_list, object_count and bytes_used.
- """
- if memcache:
- object_list, object_count, bytes_used = get_container_details_from_memcache(cont_path, cont_path,
- memcache=memcache)
- else:
- object_list, object_count, bytes_used = get_container_details_from_fs(cont_path, cont_path)
-
- return object_list, object_count, bytes_used
-
-def get_account_details_from_fs(acc_path, memcache=None):
- container_list = []
- container_count = 0
-
- if os.path.isdir(acc_path):
- for name in do_listdir(acc_path):
- if not os.path.isdir(acc_path + '/' + name) or \
- name.lower() == 'tmp' or name.lower() == 'async_pending':
- continue
- container_count += 1
- container_list.append(name)
-
- if memcache:
- memcache.set(strip_obj_storage_path(acc_path) + '_container_list', container_list)
- memcache.set(strip_obj_storage_path(acc_path)+'_mtime', str(do_stat(acc_path).st_mtime))
- memcache.set(strip_obj_storage_path(acc_path)+'_container_count', container_count)
-
- return container_list, container_count
-
-def get_account_details_from_memcache(acc_path, memcache=None):
- if memcache:
- mtime = memcache.get(strip_obj_storage_path(acc_path)+'_mtime')
- if not mtime or mtime != str(do_stat(acc_path).st_mtime):
- return get_account_details_from_fs(acc_path, memcache)
- container_list = memcache.get(strip_obj_storage_path(acc_path) + '_container_list')
- container_count = memcache.get(strip_obj_storage_path(acc_path)+'_container_count')
- return container_list, container_count
-
-
-def get_account_details(acc_path, memcache=None):
- """
- Return container_list and container_count.
- """
- if memcache:
- return get_account_details_from_memcache(acc_path, memcache)
- else:
- return get_account_details_from_fs(acc_path, memcache)
-
-
-
-def get_etag(path):
- etag = None
- if os.path.exists(path):
- etag = md5()
- if not os.path.isdir(path):
- fp = open(path, 'rb')
- if fp:
- while True:
- chunk = fp.read(CHUNK_SIZE)
- if chunk:
- etag.update(chunk)
- else:
- break
- fp.close()
-
- etag = etag.hexdigest()
-
- return etag
-
-
-def get_object_metadata(obj_path):
- """
- Return metadata of object.
- """
- metadata = {}
- if os.path.exists(obj_path):
- if not os.path.isdir(obj_path):
- metadata = {
- X_TIMESTAMP: normalize_timestamp(os.path.getctime(obj_path)),
- X_CONTENT_TYPE: FILE_TYPE,
- X_ETAG: get_etag(obj_path),
- X_CONTENT_LENGTH: os.path.getsize(obj_path),
- X_TYPE: OBJECT,
- X_OBJECT_TYPE: FILE,
- }
- else:
- metadata = {
- X_TIMESTAMP: normalize_timestamp(os.path.getctime(obj_path)),
- X_CONTENT_TYPE: DIR_TYPE,
- X_ETAG: get_etag(obj_path),
- X_CONTENT_LENGTH: 0,
- X_TYPE: OBJECT,
- X_OBJECT_TYPE: DIR,
- }
-
- return metadata
-
-def get_container_metadata(cont_path, memcache=None):
- objects = []
- object_count = 0
- bytes_used = 0
- objects, object_count, bytes_used = get_container_details(cont_path,
- memcache=memcache)
- metadata = {X_TYPE: CONTAINER,
- X_TIMESTAMP: normalize_timestamp(os.path.getctime(cont_path)),
- X_PUT_TIMESTAMP: normalize_timestamp(os.path.getmtime(cont_path)),
- X_OBJECTS_COUNT: object_count,
- X_BYTES_USED: bytes_used}
- return metadata
-
-def get_account_metadata(acc_path, memcache=None):
- containers = []
- container_count = 0
- containers, container_count = get_account_details(acc_path, memcache)
- metadata = {X_TYPE: ACCOUNT,
- X_TIMESTAMP: normalize_timestamp(os.path.getctime(acc_path)),
- X_PUT_TIMESTAMP: normalize_timestamp(os.path.getmtime(acc_path)),
- X_OBJECTS_COUNT: 0,
- X_BYTES_USED: 0,
- X_CONTAINER_COUNT: container_count}
- return metadata
-
-def restore_object(obj_path, metadata):
- meta = read_metadata(obj_path)
- if meta:
- meta.update(metadata)
- write_metadata(obj_path, meta)
- else:
- write_metadata(obj_path, metadata)
-
-def restore_container(cont_path, metadata):
- meta = read_metadata(cont_path)
- if meta:
- meta.update(metadata)
- write_metadata(cont_path, meta)
- else:
- write_metadata(cont_path, metadata)
-
-def restore_account(acc_path, metadata):
- meta = read_metadata(acc_path)
- if meta:
- meta.update(metadata)
- write_metadata(acc_path, meta)
- else:
- write_metadata(acc_path, metadata)
-
-def create_object_metadata(obj_path):
- meta = get_object_metadata(obj_path)
- restore_object(obj_path, meta)
- return meta
-
-def create_container_metadata(cont_path, memcache=None):
- meta = get_container_metadata(cont_path, memcache)
- restore_container(cont_path, meta)
- return meta
-
-def create_account_metadata(acc_path, memcache=None):
- meta = get_account_metadata(acc_path, memcache)
- restore_account(acc_path, meta)
- return meta
-
-
-def check_account_exists(account, fs_object):
- if account not in get_account_list(fs_object):
- logging.error('Account not exists %s' % account)
- return False
- else:
- return True
-
-def get_account_list(fs_object):
- account_list = []
- if fs_object:
- account_list = fs_object.get_export_list()
- return account_list
-
-
-def get_account_id(account):
- return RESELLER_PREFIX + md5(account + HASH_PATH_SUFFIX).hexdigest()
-
diff --git a/swift/1.4.8/swift.diff b/swift/1.4.8/swift.diff
deleted file mode 100644
index 8ed5070e11a..00000000000
--- a/swift/1.4.8/swift.diff
+++ /dev/null
@@ -1,797 +0,0 @@
-diff --git a/setup.py b/setup.py
-index d195d34..b5b5ca2 100644
---- a/setup.py
-+++ b/setup.py
-@@ -1,5 +1,6 @@
- #!/usr/bin/python
- # Copyright (c) 2010-2012 OpenStack, LLC.
-+# Copyright (c) 2011 Red Hat, Inc.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
-@@ -94,6 +95,7 @@ setup(
- 'tempurl=swift.common.middleware.tempurl:filter_factory',
- 'formpost=swift.common.middleware.formpost:filter_factory',
- 'name_check=swift.common.middleware.name_check:filter_factory',
-+ 'gluster=swift.common.middleware.gluster:filter_factory',
- ],
- },
- )
-diff --git a/swift/account/server.py b/swift/account/server.py
-index 800b3c0..cb17970 100644
---- a/swift/account/server.py
-+++ b/swift/account/server.py
-@@ -1,4 +1,5 @@
- # Copyright (c) 2010-2012 OpenStack, LLC.
-+# Copyright (c) 2011 Red Hat, Inc.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
-@@ -31,7 +32,7 @@ import simplejson
-
- from swift.common.db import AccountBroker
- from swift.common.utils import get_logger, get_param, hash_path, \
-- normalize_timestamp, split_path, storage_directory
-+ normalize_timestamp, split_path, storage_directory, plugin_enabled
- from swift.common.constraints import ACCOUNT_LISTING_LIMIT, \
- check_mount, check_float, check_utf8
- from swift.common.db_replicator import ReplicatorRpc
-@@ -39,6 +40,8 @@ from swift.common.db_replicator import ReplicatorRpc
-
- DATADIR = 'accounts'
-
-+if plugin_enabled():
-+ from swift.plugins.DiskDir import DiskAccount
-
- class AccountController(object):
- """WSGI controller for the account server."""
-@@ -52,8 +55,12 @@ class AccountController(object):
- self.mount_check, logger=self.logger)
- self.auto_create_account_prefix = \
- conf.get('auto_create_account_prefix') or '.'
-+ self.fs_object = None
-
- def _get_account_broker(self, drive, part, account):
-+ if self.fs_object:
-+ return DiskAccount(self.root, account, self.fs_object);
-+
- hsh = hash_path(account)
- db_dir = storage_directory(DATADIR, part, hsh)
- db_path = os.path.join(self.root, drive, db_dir, hsh + '.db')
-@@ -121,9 +128,15 @@ class AccountController(object):
- if broker.is_deleted():
- return HTTPConflict(request=req)
- metadata = {}
-- metadata.update((key, (value, timestamp))
-- for key, value in req.headers.iteritems()
-- if key.lower().startswith('x-account-meta-'))
-+ if not self.fs_object:
-+ metadata.update((key, (value, timestamp))
-+ for key, value in req.headers.iteritems()
-+ if key.lower().startswith('x-account-meta-'))
-+ else:
-+ metadata.update((key, value)
-+ for key, value in req.headers.iteritems()
-+ if key.lower().startswith('x-account-meta-'))
-+
- if metadata:
- broker.update_metadata(metadata)
- if created:
-@@ -153,6 +166,9 @@ class AccountController(object):
- broker.stale_reads_ok = True
- if broker.is_deleted():
- return HTTPNotFound(request=req)
-+ if self.fs_object and not self.fs_object.object_only:
-+ broker.list_containers_iter(None, None,None,
-+ None, None)
- info = broker.get_info()
- headers = {
- 'X-Account-Container-Count': info['container_count'],
-@@ -164,9 +180,16 @@ class AccountController(object):
- container_ts = broker.get_container_timestamp(container)
- if container_ts is not None:
- headers['X-Container-Timestamp'] = container_ts
-- headers.update((key, value)
-- for key, (value, timestamp) in broker.metadata.iteritems()
-- if value != '')
-+ if not self.fs_object:
-+ headers.update((key, value)
-+ for key, (value, timestamp) in broker.metadata.iteritems()
-+ if value != '')
-+ else:
-+ headers.update((key, value)
-+ for key, value in broker.metadata.iteritems()
-+ if value != '')
-+
-+
- return HTTPNoContent(request=req, headers=headers)
-
- def GET(self, req):
-@@ -190,9 +213,15 @@ class AccountController(object):
- 'X-Account-Bytes-Used': info['bytes_used'],
- 'X-Timestamp': info['created_at'],
- 'X-PUT-Timestamp': info['put_timestamp']}
-- resp_headers.update((key, value)
-- for key, (value, timestamp) in broker.metadata.iteritems()
-- if value != '')
-+ if not self.fs_object:
-+ resp_headers.update((key, value)
-+ for key, (value, timestamp) in broker.metadata.iteritems()
-+ if value != '')
-+ else:
-+ resp_headers.update((key, value)
-+ for key, value in broker.metadata.iteritems()
-+ if value != '')
-+
- try:
- prefix = get_param(req, 'prefix')
- delimiter = get_param(req, 'delimiter')
-@@ -224,6 +253,7 @@ class AccountController(object):
- content_type='text/plain', request=req)
- account_list = broker.list_containers_iter(limit, marker, end_marker,
- prefix, delimiter)
-+
- if out_content_type == 'application/json':
- json_pattern = ['"name":%s', '"count":%s', '"bytes":%s']
- json_pattern = '{' + ','.join(json_pattern) + '}'
-@@ -298,15 +328,29 @@ class AccountController(object):
- return HTTPNotFound(request=req)
- timestamp = normalize_timestamp(req.headers['x-timestamp'])
- metadata = {}
-- metadata.update((key, (value, timestamp))
-- for key, value in req.headers.iteritems()
-- if key.lower().startswith('x-account-meta-'))
-+ if not self.fs_object:
-+ metadata.update((key, (value, timestamp))
-+ for key, value in req.headers.iteritems()
-+ if key.lower().startswith('x-account-meta-'))
-+ else:
-+ metadata.update((key, value)
-+ for key, value in req.headers.iteritems()
-+ if key.lower().startswith('x-account-meta-'))
- if metadata:
- broker.update_metadata(metadata)
- return HTTPNoContent(request=req)
-
-+ def plugin(self, env):
-+ if env.get('Gluster_enabled', False):
-+ self.fs_object = env.get('fs_object')
-+ self.root = env.get('root')
-+ self.mount_check = False
-+ else:
-+ self.fs_object = None
-+
- def __call__(self, env, start_response):
- start_time = time.time()
-+ self.plugin(env)
- req = Request(env)
- self.logger.txn_id = req.headers.get('x-trans-id', None)
- if not check_utf8(req.path_info):
-diff --git a/swift/common/middleware/gluster.py b/swift/common/middleware/gluster.py
-new file mode 100644
-index 0000000..341285d
---- /dev/null
-+++ b/swift/common/middleware/gluster.py
-@@ -0,0 +1,55 @@
-+# Copyright (c) 2011 Red Hat, Inc.
-+#
-+# Licensed under the Apache License, Version 2.0 (the "License");
-+# you may not use this file except in compliance with the License.
-+# You may obtain a copy of the License at
-+#
-+# http://www.apache.org/licenses/LICENSE-2.0
-+#
-+# Unless required by applicable law or agreed to in writing, software
-+# distributed under the License is distributed on an "AS IS" BASIS,
-+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-+# implied.
-+# See the License for the specific language governing permissions and
-+# limitations under the License.
-+
-+from swift.common.utils import get_logger, plugin_enabled
-+from swift import plugins
-+from ConfigParser import ConfigParser
-+
-+class Gluster_plugin(object):
-+ """
-+ Update the environment with keys that reflect Gluster_plugin enabled
-+ """
-+
-+ def __init__(self, app, conf):
-+ self.app = app
-+ self.conf = conf
-+ self.fs_name = 'Glusterfs'
-+ self.logger = get_logger(conf, log_route='gluster')
-+
-+ def __call__(self, env, start_response):
-+ if not plugin_enabled():
-+ return self.app(env, start_response)
-+ env['Gluster_enabled'] =True
-+ fs_object = getattr(plugins, self.fs_name, False)
-+ if not fs_object:
-+ raise Exception('%s plugin not found', self.fs_name)
-+
-+ env['fs_object'] = fs_object()
-+ fs_conf = ConfigParser()
-+ if fs_conf.read('/etc/swift/fs.conf'):
-+ try:
-+ env['root'] = fs_conf.get ('DEFAULT', 'mount_path')
-+ except NoSectionError, NoOptionError:
-+ self.logger.exception(_('ERROR mount_path not present'))
-+ return self.app(env, start_response)
-+
-+def filter_factory(global_conf, **local_conf):
-+ """Returns a WSGI filter app for use with paste.deploy."""
-+ conf = global_conf.copy()
-+ conf.update(local_conf)
-+
-+ def gluster_filter(app):
-+ return Gluster_plugin(app, conf)
-+ return gluster_filter
-diff --git a/swift/common/utils.py b/swift/common/utils.py
-index 47edce8..03701ce 100644
---- a/swift/common/utils.py
-+++ b/swift/common/utils.py
-@@ -1,4 +1,5 @@
- # Copyright (c) 2010-2012 OpenStack, LLC.
-+# Copyright (c) 2011 Red Hat, Inc.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
-@@ -1138,3 +1139,11 @@ def streq_const_time(s1, s2):
- for (a, b) in zip(s1, s2):
- result |= ord(a) ^ ord(b)
- return result == 0
-+
-+def plugin_enabled():
-+ swift_conf = ConfigParser()
-+ swift_conf.read(os.path.join('/etc/swift', 'swift.conf'))
-+ try:
-+ return swift_conf.get('DEFAULT', 'Enable_plugin', 'no') in TRUE_VALUES
-+ except NoOptionError, NoSectionError:
-+ return False
-diff --git a/swift/container/server.py b/swift/container/server.py
-index 8a18cfd..93943a3 100644
---- a/swift/container/server.py
-+++ b/swift/container/server.py
-@@ -1,4 +1,5 @@
- # Copyright (c) 2010-2012 OpenStack, LLC.
-+# Copyright (c) 2011 Red Hat, Inc.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
-@@ -31,7 +32,8 @@ from webob.exc import HTTPAccepted, HTTPBadRequest, HTTPConflict, \
-
- from swift.common.db import ContainerBroker
- from swift.common.utils import get_logger, get_param, hash_path, \
-- normalize_timestamp, storage_directory, split_path, validate_sync_to
-+ normalize_timestamp, storage_directory, split_path, validate_sync_to, \
-+ plugin_enabled
- from swift.common.constraints import CONTAINER_LISTING_LIMIT, \
- check_mount, check_float, check_utf8
- from swift.common.bufferedhttp import http_connect
-@@ -40,6 +42,9 @@ from swift.common.db_replicator import ReplicatorRpc
-
- DATADIR = 'containers'
-
-+if plugin_enabled():
-+ from swift.plugins.DiskDir import DiskDir
-+
-
- class ContainerController(object):
- """WSGI Controller for the container server."""
-@@ -62,6 +67,7 @@ class ContainerController(object):
- ContainerBroker, self.mount_check, logger=self.logger)
- self.auto_create_account_prefix = \
- conf.get('auto_create_account_prefix') or '.'
-+ self.fs_object = None
-
- def _get_container_broker(self, drive, part, account, container):
- """
-@@ -73,6 +79,11 @@ class ContainerController(object):
- :param container: container name
- :returns: ContainerBroker object
- """
-+ if self.fs_object:
-+ return DiskDir(self.root, drive, part, account,
-+ container, self.logger,
-+ fs_object = self.fs_object)
-+
- hsh = hash_path(account, container)
- db_dir = storage_directory(DATADIR, part, hsh)
- db_path = os.path.join(self.root, drive, db_dir, hsh + '.db')
-@@ -211,10 +222,18 @@ class ContainerController(object):
- if broker.is_deleted():
- return HTTPConflict(request=req)
- metadata = {}
-- metadata.update((key, (value, timestamp))
-- for key, value in req.headers.iteritems()
-- if key.lower() in self.save_headers or
-- key.lower().startswith('x-container-meta-'))
-+ #Note: check the structure of req.headers
-+ if not self.fs_object:
-+ metadata.update((key, (value, timestamp))
-+ for key, value in req.headers.iteritems()
-+ if key.lower() in self.save_headers or
-+ key.lower().startswith('x-container-meta-'))
-+ else:
-+ metadata.update((key, value)
-+ for key, value in req.headers.iteritems()
-+ if key.lower() in self.save_headers or
-+ key.lower().startswith('x-container-meta-'))
-+
- if metadata:
- if 'X-Container-Sync-To' in metadata:
- if 'X-Container-Sync-To' not in broker.metadata or \
-@@ -222,6 +241,7 @@ class ContainerController(object):
- broker.metadata['X-Container-Sync-To'][0]:
- broker.set_x_container_sync_points(-1, -1)
- broker.update_metadata(metadata)
-+
- resp = self.account_update(req, account, container, broker)
- if resp:
- return resp
-@@ -245,6 +265,11 @@ class ContainerController(object):
- broker.stale_reads_ok = True
- if broker.is_deleted():
- return HTTPNotFound(request=req)
-+
-+ if self.fs_object and not self.fs_object.object_only:
-+ broker.list_objects_iter(None, None, None, None,
-+ None, None)
-+
- info = broker.get_info()
- headers = {
- 'X-Container-Object-Count': info['object_count'],
-@@ -252,10 +277,17 @@ class ContainerController(object):
- 'X-Timestamp': info['created_at'],
- 'X-PUT-Timestamp': info['put_timestamp'],
- }
-- headers.update((key, value)
-- for key, (value, timestamp) in broker.metadata.iteritems()
-- if value != '' and (key.lower() in self.save_headers or
-- key.lower().startswith('x-container-meta-')))
-+ if not self.fs_object:
-+ headers.update((key, value)
-+ for key, (value, timestamp) in broker.metadata.iteritems()
-+ if value != '' and (key.lower() in self.save_headers or
-+ key.lower().startswith('x-container-meta-')))
-+ else:
-+ headers.update((key, value)
-+ for key, value in broker.metadata.iteritems()
-+ if value != '' and (key.lower() in self.save_headers or
-+ key.lower().startswith('x-container-meta-')))
-+
- return HTTPNoContent(request=req, headers=headers)
-
- def GET(self, req):
-@@ -268,6 +300,7 @@ class ContainerController(object):
- request=req)
- if self.mount_check and not check_mount(self.root, drive):
- return Response(status='507 %s is not mounted' % drive)
-+
- broker = self._get_container_broker(drive, part, account, container)
- broker.pending_timeout = 0.1
- broker.stale_reads_ok = True
-@@ -280,10 +313,17 @@ class ContainerController(object):
- 'X-Timestamp': info['created_at'],
- 'X-PUT-Timestamp': info['put_timestamp'],
- }
-- resp_headers.update((key, value)
-- for key, (value, timestamp) in broker.metadata.iteritems()
-- if value != '' and (key.lower() in self.save_headers or
-- key.lower().startswith('x-container-meta-')))
-+ if not self.fs_object:
-+ resp_headers.update((key, value)
-+ for key, (value, timestamp) in broker.metadata.iteritems()
-+ if value != '' and (key.lower() in self.save_headers or
-+ key.lower().startswith('x-container-meta-')))
-+ else:
-+ resp_headers.update((key, value)
-+ for key, value in broker.metadata.iteritems()
-+ if value != '' and (key.lower() in self.save_headers or
-+ key.lower().startswith('x-container-meta-')))
-+
- try:
- path = get_param(req, 'path')
- prefix = get_param(req, 'prefix')
-@@ -414,10 +454,17 @@ class ContainerController(object):
- return HTTPNotFound(request=req)
- timestamp = normalize_timestamp(req.headers['x-timestamp'])
- metadata = {}
-- metadata.update((key, (value, timestamp))
-- for key, value in req.headers.iteritems()
-- if key.lower() in self.save_headers or
-- key.lower().startswith('x-container-meta-'))
-+ if not self.fs_object:
-+ metadata.update((key, (value, timestamp))
-+ for key, value in req.headers.iteritems()
-+ if key.lower() in self.save_headers or
-+ key.lower().startswith('x-container-meta-'))
-+ else:
-+ metadata.update((key, value)
-+ for key, value in req.headers.iteritems()
-+ if key.lower() in self.save_headers or
-+ key.lower().startswith('x-container-meta-'))
-+
- if metadata:
- if 'X-Container-Sync-To' in metadata:
- if 'X-Container-Sync-To' not in broker.metadata or \
-@@ -427,8 +474,19 @@ class ContainerController(object):
- broker.update_metadata(metadata)
- return HTTPNoContent(request=req)
-
-+ def plugin(self, env):
-+ if env.get('Gluster_enabled', False):
-+ self.fs_object = env.get('fs_object')
-+ if not self.fs_object:
-+ raise NoneTypeError
-+ self.root = env.get('root')
-+ self.mount_check = False
-+ else:
-+ self.fs_object = None
-+
- def __call__(self, env, start_response):
- start_time = time.time()
-+ self.plugin(env)
- req = Request(env)
- self.logger.txn_id = req.headers.get('x-trans-id', None)
- if not check_utf8(req.path_info):
-diff --git a/swift/obj/server.py b/swift/obj/server.py
-index 9cca16b..a45daff 100644
---- a/swift/obj/server.py
-+++ b/swift/obj/server.py
-@@ -1,4 +1,5 @@
- # Copyright (c) 2010-2012 OpenStack, LLC.
-+# Copyright (c) 2011 Red Hat, Inc.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
-@@ -26,6 +27,7 @@ from hashlib import md5
- from tempfile import mkstemp
- from urllib import unquote
- from contextlib import contextmanager
-+from ConfigParser import ConfigParser
-
- from webob import Request, Response, UTC
- from webob.exc import HTTPAccepted, HTTPBadRequest, HTTPCreated, \
-@@ -37,16 +39,23 @@ from eventlet import sleep, Timeout, tpool
-
- from swift.common.utils import mkdirs, normalize_timestamp, \
- storage_directory, hash_path, renamer, fallocate, \
-- split_path, drop_buffer_cache, get_logger, write_pickle
-+ split_path, drop_buffer_cache, get_logger, write_pickle, \
-+ plugin_enabled
- from swift.common.bufferedhttp import http_connect
--from swift.common.constraints import check_object_creation, check_mount, \
-- check_float, check_utf8
-+if plugin_enabled():
-+ from swift.plugins.constraints import check_object_creation
-+ from swift.plugins.utils import X_TYPE, X_OBJECT_TYPE, FILE, DIR, MARKER_DIR, \
-+ OBJECT, DIR_TYPE, FILE_TYPE
-+else:
-+ from swift.common.constraints import check_object_creation
-+
-+from swift.common.constraints import check_mount, check_float, check_utf8
-+
- from swift.common.exceptions import ConnectionTimeout, DiskFileError, \
- DiskFileNotExist
- from swift.obj.replicator import tpooled_get_hashes, invalidate_hash, \
- quarantine_renamer
-
--
- DATADIR = 'objects'
- ASYNCDIR = 'async_pending'
- PICKLE_PROTOCOL = 2
-@@ -339,6 +348,9 @@ class DiskFile(object):
- raise
- raise DiskFileNotExist('Data File does not exist.')
-
-+if plugin_enabled():
-+ from swift.plugins.DiskFile import Gluster_DiskFile
-+
-
- class ObjectController(object):
- """Implements the WSGI application for the Swift Object Server."""
-@@ -377,6 +389,17 @@ class ObjectController(object):
- 'expiring_objects'
- self.expiring_objects_container_divisor = \
- int(conf.get('expiring_objects_container_divisor') or 86400)
-+ self.fs_object = None
-+
-+ def get_DiskFile_obj(self, path, device, partition, account, container, obj,
-+ logger, keep_data_fp=False, disk_chunk_size=65536):
-+ if self.fs_object:
-+ return Gluster_DiskFile(path, device, partition, account, container,
-+ obj, logger, keep_data_fp,
-+ disk_chunk_size, fs_object = self.fs_object);
-+ else:
-+ return DiskFile(path, device, partition, account, container,
-+ obj, logger, keep_data_fp, disk_chunk_size)
-
- def async_update(self, op, account, container, obj, host, partition,
- contdevice, headers_out, objdevice):
-@@ -493,7 +516,7 @@ class ObjectController(object):
- content_type='text/plain')
- if self.mount_check and not check_mount(self.devices, device):
- return Response(status='507 %s is not mounted' % device)
-- file = DiskFile(self.devices, device, partition, account, container,
-+ file = self.get_DiskFile_obj(self.devices, device, partition, account, container,
- obj, self.logger, disk_chunk_size=self.disk_chunk_size)
-
- if 'X-Delete-At' in file.metadata and \
-@@ -548,7 +571,7 @@ class ObjectController(object):
- if new_delete_at and new_delete_at < time.time():
- return HTTPBadRequest(body='X-Delete-At in past', request=request,
- content_type='text/plain')
-- file = DiskFile(self.devices, device, partition, account, container,
-+ file = self.get_DiskFile_obj(self.devices, device, partition, account, container,
- obj, self.logger, disk_chunk_size=self.disk_chunk_size)
- orig_timestamp = file.metadata.get('X-Timestamp')
- upload_expiration = time.time() + self.max_upload_time
-@@ -580,12 +603,29 @@ class ObjectController(object):
- if 'etag' in request.headers and \
- request.headers['etag'].lower() != etag:
- return HTTPUnprocessableEntity(request=request)
-- metadata = {
-- 'X-Timestamp': request.headers['x-timestamp'],
-- 'Content-Type': request.headers['content-type'],
-- 'ETag': etag,
-- 'Content-Length': str(os.fstat(fd).st_size),
-- }
-+ content_type = request.headers['content-type']
-+ if self.fs_object and not content_type:
-+ content_type = FILE_TYPE
-+ if not self.fs_object:
-+ metadata = {
-+ 'X-Timestamp': request.headers['x-timestamp'],
-+ 'Content-Type': request.headers['content-type'],
-+ 'ETag': etag,
-+ 'Content-Length': str(os.fstat(fd).st_size),
-+ }
-+ else:
-+ metadata = {
-+ 'X-Timestamp': request.headers['x-timestamp'],
-+ 'Content-Type': request.headers['content-type'],
-+ 'ETag': etag,
-+ 'Content-Length': str(os.fstat(fd).st_size),
-+ X_TYPE: OBJECT,
-+ X_OBJECT_TYPE: FILE,
-+ }
-+
-+ if self.fs_object and \
-+ request.headers['content-type'].lower() == DIR_TYPE:
-+ metadata.update({X_OBJECT_TYPE: MARKER_DIR})
- metadata.update(val for val in request.headers.iteritems()
- if val[0].lower().startswith('x-object-meta-') and
- len(val[0]) > 14)
-@@ -612,7 +652,7 @@ class ObjectController(object):
- 'x-timestamp': file.metadata['X-Timestamp'],
- 'x-etag': file.metadata['ETag'],
- 'x-trans-id': request.headers.get('x-trans-id', '-')},
-- device)
-+ (self.fs_object and account) or device)
- resp = HTTPCreated(request=request, etag=etag)
- return resp
-
-@@ -626,9 +666,9 @@ class ObjectController(object):
- content_type='text/plain')
- if self.mount_check and not check_mount(self.devices, device):
- return Response(status='507 %s is not mounted' % device)
-- file = DiskFile(self.devices, device, partition, account, container,
-- obj, self.logger, keep_data_fp=True,
-- disk_chunk_size=self.disk_chunk_size)
-+ file = self.get_DiskFile_obj(self.devices, device, partition, account, container,
-+ obj, self.logger, keep_data_fp=True,
-+ disk_chunk_size=self.disk_chunk_size)
- if file.is_deleted() or ('X-Delete-At' in file.metadata and
- int(file.metadata['X-Delete-At']) <= time.time()):
- if request.headers.get('if-match') == '*':
-@@ -702,7 +742,7 @@ class ObjectController(object):
- return resp
- if self.mount_check and not check_mount(self.devices, device):
- return Response(status='507 %s is not mounted' % device)
-- file = DiskFile(self.devices, device, partition, account, container,
-+ file = self.get_DiskFile_obj(self.devices, device, partition, account, container,
- obj, self.logger, disk_chunk_size=self.disk_chunk_size)
- if file.is_deleted() or ('X-Delete-At' in file.metadata and
- int(file.metadata['X-Delete-At']) <= time.time()):
-@@ -744,7 +784,7 @@ class ObjectController(object):
- if self.mount_check and not check_mount(self.devices, device):
- return Response(status='507 %s is not mounted' % device)
- response_class = HTTPNoContent
-- file = DiskFile(self.devices, device, partition, account, container,
-+ file = self.get_DiskFile_obj(self.devices, device, partition, account, container,
- obj, self.logger, disk_chunk_size=self.disk_chunk_size)
- if 'x-if-delete-at' in request.headers and \
- int(request.headers['x-if-delete-at']) != \
-@@ -797,9 +837,18 @@ class ObjectController(object):
- raise hashes
- return Response(body=pickle.dumps(hashes))
-
-+ def plugin(self, env):
-+ if env.get('Gluster_enabled', False):
-+ self.fs_object = env.get('fs_object')
-+ self.devices = env.get('root')
-+ self.mount_check = False
-+ else:
-+ self.fs_object = None
-+
- def __call__(self, env, start_response):
- """WSGI Application entry point for the Swift Object Server."""
- start_time = time.time()
-+ self.plugin(env)
- req = Request(env)
- self.logger.txn_id = req.headers.get('x-trans-id', None)
- if not check_utf8(req.path_info):
-diff --git a/swift/proxy/server.py b/swift/proxy/server.py
-index 17613b8..d277d28 100644
---- a/swift/proxy/server.py
-+++ b/swift/proxy/server.py
-@@ -1,4 +1,5 @@
- # Copyright (c) 2010-2012 OpenStack, LLC.
-+# Copyright (c) 2011 Red Hat, Inc.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
-@@ -53,11 +54,20 @@ from webob import Request, Response
-
- from swift.common.ring import Ring
- from swift.common.utils import cache_from_env, ContextPool, get_logger, \
-- get_remote_client, normalize_timestamp, split_path, TRUE_VALUES
-+ get_remote_client, normalize_timestamp, split_path, TRUE_VALUES, \
-+ plugin_enabled
- from swift.common.bufferedhttp import http_connect
--from swift.common.constraints import check_metadata, check_object_creation, \
-- check_utf8, CONTAINER_LISTING_LIMIT, MAX_ACCOUNT_NAME_LENGTH, \
-- MAX_CONTAINER_NAME_LENGTH, MAX_FILE_SIZE
-+
-+if plugin_enabled():
-+ from swift.plugins.constraints import check_object_creation, \
-+ MAX_ACCOUNT_NAME_LENGTH, MAX_CONTAINER_NAME_LENGTH, MAX_FILE_SIZE
-+else:
-+ from swift.common.constraints import check_object_creation, \
-+ MAX_ACCOUNT_NAME_LENGTH, MAX_CONTAINER_NAME_LENGTH, MAX_FILE_SIZE
-+
-+from swift.common.constraints import check_metadata, check_utf8, \
-+ CONTAINER_LISTING_LIMIT
-+
- from swift.common.exceptions import ChunkReadTimeout, \
- ChunkWriteTimeout, ConnectionTimeout
-
-diff --git a/test/__init__.py b/test/__init__.py
-index ef2ce31..363a051 100644
---- a/test/__init__.py
-+++ b/test/__init__.py
-@@ -6,8 +6,16 @@ import sys
- import os
- from ConfigParser import MissingSectionHeaderError
- from StringIO import StringIO
--
- from swift.common.utils import readconf
-+from swift.common.utils import plugin_enabled
-+if plugin_enabled():
-+ from swift.plugins.constraints import MAX_OBJECT_NAME_LENGTH, \
-+ MAX_CONTAINER_NAME_LENGTH, MAX_ACCOUNT_NAME_LENGTH, \
-+ MAX_FILE_SIZE
-+else:
-+ from swift.common.constraints import MAX_OBJECT_NAME_LENGTH, \
-+ MAX_CONTAINER_NAME_LENGTH, MAX_ACCOUNT_NAME_LENGTH, \
-+ MAX_FILE_SIZE
-
- setattr(__builtin__, '_', lambda x: x)
-
-diff --git a/test/functional/tests.py b/test/functional/tests.py
-index b25b4fd..8d12f58 100644
---- a/test/functional/tests.py
-+++ b/test/functional/tests.py
-@@ -31,6 +31,16 @@ import urllib
- from test import get_config
- from swift import Account, AuthenticationFailed, Connection, Container, \
- File, ResponseError
-+from test import plugin_enabled
-+if plugin_enabled():
-+ from test import MAX_OBJECT_NAME_LENGTH, \
-+ MAX_CONTAINER_NAME_LENGTH, MAX_ACCOUNT_NAME_LENGTH, \
-+ MAX_FILE_SIZE
-+else:
-+ from test import MAX_OBJECT_NAME_LENGTH, \
-+ MAX_CONTAINER_NAME_LENGTH, MAX_ACCOUNT_NAME_LENGTH, \
-+ MAX_FILE_SIZE
-+
-
- config = get_config()
-
-@@ -361,7 +371,7 @@ class TestContainer(Base):
- set_up = False
-
- def testContainerNameLimit(self):
-- limit = 256
-+ limit = MAX_CONTAINER_NAME_LENGTH
-
- for l in (limit-100, limit-10, limit-1, limit,
- limit+1, limit+10, limit+100):
-@@ -949,7 +959,7 @@ class TestFile(Base):
- self.assert_status(404)
-
- def testNameLimit(self):
-- limit = 1024
-+ limit = MAX_OBJECT_NAME_LENGTH
-
- for l in (1, 10, limit/2, limit-1, limit, limit+1, limit*2):
- file = self.env.container.file('a'*l)
-@@ -1093,7 +1103,7 @@ class TestFile(Base):
- self.assert_(file.read(hdrs={'Range': r}) == data[0:1000])
-
- def testFileSizeLimit(self):
-- limit = 5*2**30 + 2
-+ limit = MAX_FILE_SIZE
- tsecs = 3
-
- for i in (limit-100, limit-10, limit-1, limit, limit+1, limit+10,
-diff --git a/test/unit/obj/test_server.py b/test/unit/obj/test_server.py
-index 075700e..5b6f32d 100644
---- a/test/unit/obj/test_server.py
-+++ b/test/unit/obj/test_server.py
-@@ -1355,7 +1355,7 @@ class TestObjectController(unittest.TestCase):
-
- def test_max_object_name_length(self):
- timestamp = normalize_timestamp(time())
-- req = Request.blank('/sda1/p/a/c/' + ('1' * 1024),
-+ req = Request.blank('/sda1/p/a/c/' + ('1' * MAX_OBJECT_NAME_LENGTH),
- environ={'REQUEST_METHOD': 'PUT'},
- headers={'X-Timestamp': timestamp,
- 'Content-Length': '4',
-diff --git a/test/unit/proxy/test_server.py b/test/unit/proxy/test_server.py
-index 364370e..c17fe59 100644
---- a/test/unit/proxy/test_server.py
-+++ b/test/unit/proxy/test_server.py
-@@ -21,7 +21,6 @@ import os
- import sys
- import unittest
- from nose import SkipTest
--from ConfigParser import ConfigParser
- from contextlib import contextmanager
- from cStringIO import StringIO
- from gzip import GzipFile
-@@ -44,8 +43,18 @@ from swift.account import server as account_server
- from swift.container import server as container_server
- from swift.obj import server as object_server
- from swift.common import ring
--from swift.common.constraints import MAX_META_NAME_LENGTH, \
-- MAX_META_VALUE_LENGTH, MAX_META_COUNT, MAX_META_OVERALL_SIZE, MAX_FILE_SIZE
-+from swift.common.utils import plugin_enabled
-+if plugin_enabled():
-+ from swift.plugins.constraints import MAX_META_NAME_LENGTH, \
-+ MAX_META_VALUE_LENGTH, MAX_META_COUNT, MAX_META_OVERALL_SIZE, \
-+ MAX_FILE_SIZE, MAX_ACCOUNT_NAME_LENGTH, MAX_CONTAINER_NAME_LENGTH, \
-+ MAX_OBJECT_NAME_LENGTH
-+else:
-+ from swift.plugins.constraints import MAX_META_NAME_LENGTH, \
-+ MAX_META_VALUE_LENGTH, MAX_META_COUNT, MAX_META_OVERALL_SIZE, \
-+ MAX_FILE_SIZE, MAX_ACCOUNT_NAME_LENGTH, MAX_CONTAINER_NAME_LENGTH, \
-+ MAX_OBJECT_NAME_LENGTH
-+
- from swift.common.utils import mkdirs, normalize_timestamp, NullLogger
- from swift.common.wsgi import monkey_patch_mimetools
-
-@@ -3207,7 +3216,8 @@ class TestContainerController(unittest.TestCase):
- def test_PUT_max_container_name_length(self):
- with save_globals():
- controller = proxy_server.ContainerController(self.app, 'account',
-- '1' * 256)
-+ '1' *
-+ MAX_CONTAINER_NAME_LENGTH,)
- self.assert_status_map(controller.PUT,
- (200, 200, 200, 201, 201, 201), 201,
- missing_container=True)
-@@ -3813,7 +3823,8 @@ class TestAccountController(unittest.TestCase):
- def test_PUT_max_account_name_length(self):
- with save_globals():
- self.app.allow_account_management = True
-- controller = proxy_server.AccountController(self.app, '1' * 256)
-+ controller = proxy_server.AccountController(self.app, '1' *
-+ MAX_ACCOUNT_NAME_LENGTH)
- self.assert_status_map(controller.PUT, (201, 201, 201), 201)
- controller = proxy_server.AccountController(self.app, '2' * 257)
- self.assert_status_map(controller.PUT, (201, 201, 201), 400)
diff --git a/tests/00-geo-rep/00-georep-verify-non-root-setup.t b/tests/00-geo-rep/00-georep-verify-non-root-setup.t
new file mode 100644
index 00000000000..a55fd3e5e6a
--- /dev/null
+++ b/tests/00-geo-rep/00-georep-verify-non-root-setup.t
@@ -0,0 +1,294 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../geo-rep.rc
+. $(dirname $0)/../env.rc
+
+SCRIPT_TIMEOUT=600
+
+### Basic Non-root geo-rep setup test with Distribute Replicate volumes
+
+##Cleanup and start glusterd
+cleanup;
+TEST glusterd;
+TEST pidof glusterd
+
+
+##Variables
+GEOREP_CLI="$CLI volume geo-replication"
+master=$GMV0
+SH0="127.0.0.1"
+slave=${SH0}::${GSV0}
+num_active=2
+num_passive=2
+master_mnt=$M0
+slave_mnt=$M1
+
+##User and group to be used for non-root geo-rep setup
+usr="nroot"
+grp="ggroup"
+
+slave_url=$usr@$slave
+slave_vol=$GSV0
+ssh_url=$usr@$SH0
+
+############################################################
+#SETUP VOLUMES AND VARIABLES
+
+##create_and_start_master_volume
+TEST $CLI volume create $GMV0 replica 2 $H0:$B0/${GMV0}{1,2,3,4};
+TEST $CLI volume start $GMV0
+
+##create_and_start_slave_volume
+TEST $CLI volume create $GSV0 replica 2 $H0:$B0/${GSV0}{1,2,3,4};
+TEST $CLI volume start $GSV0
+
+##Mount master
+#TEST glusterfs -s $H0 --volfile-id $GMV0 $M0
+
+##Mount slave
+#TEST glusterfs -s $H0 --volfile-id $GSV0 $M1
+
+
+##########################################################
+#TEST FUNCTIONS
+
+function distribute_key_non_root()
+{
+ ${GLUSTER_LIBEXECDIR}/set_geo_rep_pem_keys.sh $usr $master $slave_vol
+ echo $?
+}
+
+
+function check_status_non_root()
+{
+ local search_key=$1
+ $GEOREP_CLI $master $slave_url status | grep -F "$search_key" | wc -l
+}
+
+
+function check_and_clean_group()
+{
+ if [ $(getent group $grp) ]
+ then
+ groupdel $grp;
+ echo $?
+ else
+ echo 0
+ fi
+}
+
+function clean_lock_files()
+{
+ if [ ! -f /etc/passwd.lock ];
+ then
+ rm -rf /etc/passwd.lock;
+ fi
+
+ if [ ! -f /etc/group.lock ];
+ then
+ rm -rf /etc/group.lock;
+ fi
+
+ if [ ! -f /etc/shadow.lock ];
+ then
+ rm -rf /etc/shadow.lock;
+ fi
+
+ if [ ! -f /etc/gshadow.lock ];
+ then
+ rm -rf /etc/gshadow.lock;
+ fi
+}
+
+
+###########################################################
+#SETUP NON-ROOT GEO REPLICATION
+
+##Create ggroup group
+##First test if group exists and then create new one
+
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_and_clean_group
+
+##cleanup *.lock files
+
+clean_lock_files
+
+TEST /usr/sbin/groupadd $grp
+
+clean_lock_files
+##Del if exists and create non-root user and assign it to newly created group
+userdel -r -f $usr
+TEST /usr/sbin/useradd -G $grp $usr
+
+##Modify password for non-root user to have control over distributing ssh-key
+echo "$usr:pass" | chpasswd
+
+##Set up mountbroker root
+TEST gluster-mountbroker setup /var/mountbroker-root $grp
+
+##Associate volume and non-root user to the mountbroker
+TEST gluster-mountbroker add $slave_vol $usr
+
+##Check ssh setting for clear text passwords
+sed '/^PasswordAuthentication /{s/no/yes/}' -i /etc/ssh/sshd_config && grep '^PasswordAuthentication ' /etc/ssh/sshd_config && service sshd restart
+
+
+##Restart glusterd to reflect mountbroker changages
+TEST killall_gluster;
+TEST glusterd;
+TEST pidof glusterd;
+
+##Create, start and mount meta_volume
+TEST $CLI volume create $META_VOL replica 3 $H0:$B0/${META_VOL}{1,2,3};
+TEST $CLI volume start $META_VOL
+TEST mkdir -p $META_MNT
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "3" brick_count ${META_VOL}
+TEST glusterfs -s $H0 --volfile-id $META_VOL $META_MNT
+
+##Mount master
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "4" brick_count $GMV0
+TEST glusterfs -s $H0 --volfile-id $GMV0 $M0
+
+##Mount slave
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "4" brick_count $GSV0
+TEST glusterfs -s $H0 --volfile-id $GSV0 $M1
+
+## Check status of mount-broker
+TEST gluster-mountbroker status
+
+
+##Setup password-less ssh for non-root user
+#sshpass -p "pass" ssh-copy-id -i ~/.ssh/id_rsa.pub $ssh_url
+##Run ssh agent
+eval "$(ssh-agent -s)"
+PASS="pass"
+
+
+##Create a temp script to echo the SSH password, used by SSH_ASKPASS
+
+SSH_ASKPASS_SCRIPT=/tmp/ssh-askpass-script
+cat > ${SSH_ASKPASS_SCRIPT} <<EOL
+#!/bin/bash
+echo "${PASS}"
+EOL
+chmod u+x ${SSH_ASKPASS_SCRIPT}
+
+##set no display, necessary for ssh to use with setsid and SSH_ASKPASS
+export DISPLAY
+
+export SSH_ASKPASS=${SSH_ASKPASS_SCRIPT}
+
+DISPLAY=: setsid ssh-copy-id -o 'PreferredAuthentications=password' -o 'StrictHostKeyChecking=no' -i ~/.ssh/id_rsa.pub $ssh_url
+
+##Setting up PATH for gluster binaries in case of source installation
+##ssh -oNumberOfPasswordPrompts=0 -oStrictHostKeyChecking=no $ssh_url "echo "export PATH=$PATH:/usr/local/sbin" >> ~/.bashrc"
+
+##Creating secret pem pub file
+TEST gluster-georep-sshkey generate
+
+##Create geo-rep non-root setup
+
+TEST $GEOREP_CLI $master $slave_url create push-pem
+
+#check for session creation
+EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_status_non_root "Created"
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave_url config gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave_url config slave-gluster-command-dir ${GLUSTER_CMD_DIR}
+
+## Test for key distribution
+
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 distribute_key_non_root
+
+##Wait for common secret pem file to be created
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_common_secret_file
+
+#Enable_metavolume
+TEST $GEOREP_CLI $master $slave config use_meta_volume true
+
+#Start_georep
+TEST $GEOREP_CLI $master $slave_url start
+
+## Meta volume is enabled so looking for 2 Active and 2 Passive sessions
+
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_non_root "Active"
+
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_non_root "Passive"
+
+#Pause geo-replication session
+TEST $GEOREP_CLI $master $slave_url pause
+
+#Resume geo-replication session
+TEST $GEOREP_CLI $master $slave_url resume
+
+#Validate failure of volume stop when geo-rep is running
+TEST ! $CLI volume stop $GMV0
+
+#Negative test for ssh-port
+#Port should be integer and between 1-65535 range
+
+TEST ! $GEOREP_CLI $master $slave_url config ssh-port -22
+
+TEST ! $GEOREP_CLI $master $slave_url config ssh-port abc
+
+TEST ! $GEOREP_CLI $master $slave_url config ssh-port 6875943
+
+TEST ! $GEOREP_CLI $master $slave_url config ssh-port 4.5
+
+TEST ! $GEOREP_CLI $master $slave_url config ssh-port 22a
+
+#Config Set ssh-port to validate int validation
+TEST $GEOREP_CLI $master $slave config ssh-port 22
+
+#Hybrid directory rename test BZ#1763439
+
+TEST $GEOREP_CLI $master $slave_url config change_detector xsync
+#verify master and slave mount
+
+EXPECT_WITHIN $CHECK_MOUNT_TIMEOUT "^1$" check_mounted ${master_mnt}
+EXPECT_WITHIN $CHECK_MOUNT_TIMEOUT "^1$" check_mounted ${slave_mnt}
+
+#Create test data for hybrid crawl
+TEST mkdir ${master_mnt}/dir1
+TEST mkdir ${master_mnt}/dir1/dir2
+TEST mkdir ${master_mnt}/dir1/dir3
+TEST mkdir ${master_mnt}/hybrid_d1
+
+mv ${master_mnt}/hybrid_d1 ${master_mnt}/hybrid_rn_d1
+mv ${master_mnt}/dir1/dir2 ${master_mnt}/rn_dir2
+mv ${master_mnt}/dir1/dir3 ${master_mnt}/dir1/rn_dir3
+
+#Verify hybrid crawl data on slave
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/dir1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/hybrid_rn_d1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/rn_dir2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/dir1/rn_dir3
+
+#Stop Geo-rep
+TEST $GEOREP_CLI $master $slave_url stop
+
+#Delete Geo-rep
+TEST $GEOREP_CLI $master $slave_url delete
+
+#Cleanup authorized_keys
+sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' /home/$usr/.ssh/authorized_keys
+sed -i '/^command=.*gsyncd.*/d' /home/$usr/.ssh/authorized_keys
+
+#clear mountbroker
+gluster-mountbroker remove --user $usr
+gluster-mountbroker remove --volume $slave_vol
+
+#delete group and user created for non-root setup
+TEST userdel -r -f $usr
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_and_clean_group
+
+##password script cleanup
+rm -rf /tmp/ssh-askpass-script
+
+
+cleanup;
+
diff --git a/tests/00-geo-rep/00-georep-verify-setup.t b/tests/00-geo-rep/00-georep-verify-setup.t
new file mode 100644
index 00000000000..0d46c04102d
--- /dev/null
+++ b/tests/00-geo-rep/00-georep-verify-setup.t
@@ -0,0 +1,110 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../geo-rep.rc
+. $(dirname $0)/../env.rc
+
+SCRIPT_TIMEOUT=400
+GEO_REP_TIMEOUT=200
+
+##Cleanup and start glusterd
+cleanup;
+TEST glusterd;
+TEST pidof glusterd
+
+
+##Variables
+GEOREP_CLI="$CLI volume geo-replication"
+master=$GMV0
+SH0="127.0.0.1"
+slave=${SH0}::${GSV0}
+num_active=2
+num_passive=2
+master_mnt=$M0
+slave_mnt=$M1
+
+############################################################
+#SETUP VOLUMES AND GEO-REPLICATION
+############################################################
+
+##create_and_start_master_volume
+TEST $CLI volume create $GMV0 replica 2 $H0:$B0/${GMV0}{1,2,3,4};
+TEST $CLI volume start $GMV0
+
+##create_and_start_slave_volume
+TEST $CLI volume create $GSV0 replica 2 $H0:$B0/${GSV0}{1,2,3,4};
+TEST $CLI volume start $GSV0
+
+##Create, start and mount meta_volume
+TEST $CLI volume create $META_VOL replica 3 $H0:$B0/${META_VOL}{1,2,3};
+TEST $CLI volume start $META_VOL
+TEST mkdir -p $META_MNT
+TEST glusterfs -s $H0 --volfile-id $META_VOL $META_MNT
+
+##Mount master
+TEST glusterfs -s $H0 --volfile-id $GMV0 $M0
+
+##Mount slave
+TEST glusterfs -s $H0 --volfile-id $GSV0 $M1
+
+############################################################
+#BASIC GEO-REPLICATION TESTS
+############################################################
+
+#Test invalid slave url
+TEST ! $GEOREP_CLI $master ${SH0}:${GSV0} create push-pem
+TEST ! $GEOREP_CLI $master ${SH0}:::${GSV0} create push-pem
+
+#Create geo-rep session
+TEST create_georep_session $master $slave
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave config gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave config slave-gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Enable_metavolume
+TEST $GEOREP_CLI $master $slave config use_meta_volume true
+
+#Wait for common secret pem file to be created
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_common_secret_file
+
+#Verify the keys are distributed
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_keys_distributed
+
+#Start_georep
+TEST $GEOREP_CLI $master $slave start
+
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Passive"
+
+#get-state commamd shouldn't crash glusterd when geo-rep session is configured
+TEST $CLI get-state
+TEST pidof glusterd
+
+TEST $CLI get-state detail
+TEST pidof glusterd
+
+#Pause geo-replication session
+TEST $GEOREP_CLI $master $slave pause
+
+#Resume geo-replication session
+TEST $GEOREP_CLI $master $slave resume
+
+#Validate failure of volume stop when geo-rep is running
+TEST ! $CLI volume stop $GMV0
+
+#Stop Geo-rep
+TEST $GEOREP_CLI $master $slave stop
+
+#Delete Geo-rep
+TEST $GEOREP_CLI $master $slave delete
+
+#Cleanup authorized keys
+sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' ~/.ssh/authorized_keys
+sed -i '/^command=.*gsyncd.*/d' ~/.ssh/authorized_keys
+
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
diff --git a/tests/00-geo-rep/01-georep-glusterd-tests.t b/tests/00-geo-rep/01-georep-glusterd-tests.t
new file mode 100644
index 00000000000..47d5116af26
--- /dev/null
+++ b/tests/00-geo-rep/01-georep-glusterd-tests.t
@@ -0,0 +1,213 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../geo-rep.rc
+. $(dirname $0)/../env.rc
+
+SCRIPT_TIMEOUT=300
+
+#Cleanup and start glusterd
+cleanup;
+TEST glusterd;
+TEST pidof glusterd
+
+
+#Variables
+GEOREP_CLI="$CLI volume geo-replication"
+master=$GMV0
+SH0="127.0.0.1"
+slave=${SH0}::${GSV0}
+slave1=root@${SH0}::${GSV1}
+num_active=2
+num_passive=2
+master_mnt=$M0
+slave_mnt=$M1
+
+############################################################
+#SETUP VOLUMES AND GEO-REPLICATION
+############################################################
+
+#create_and_start_master_volume
+TEST $CLI volume create $GMV0 replica 3 $H0:$B0/${GMV0}{1,2,3};
+
+#Negative testase: Create geo-rep session, master is not started
+TEST ! $GEOREP_CLI $master $slave create push-pem
+
+TEST $CLI volume start $GMV0
+
+#create_and_start_slave_volume
+TEST $CLI volume create $GSV0 replica 3 $H0:$B0/${GSV0}{1,2,3};
+
+#Negative testcase: Create geo-rep session, slave is not started
+TEST ! $GEOREP_CLI $master $slave create push-pem
+
+TEST $CLI volume start $GSV0
+
+#create_and_start_slave1_volume
+TEST $CLI volume create $GSV1 replica 3 $H0:$B0/${GSV1}{1,2,3};
+TEST $CLI volume start $GSV1
+
+#Create, start and mount meta_volume
+TEST $CLI volume create $META_VOL replica 3 $H0:$B0/${META_VOL}{1,2,3};
+TEST $CLI volume start $META_VOL
+TEST mkdir -p $META_MNT
+TEST glusterfs -s $H0 --volfile-id $META_VOL $META_MNT
+
+#Mount master
+TEST glusterfs -s $H0 --volfile-id $GMV0 $M0
+
+#Mount slave
+TEST glusterfs -s $H0 --volfile-id $GSV0 $M1
+
+############################################################
+#BASIC GEO-REPLICATION GLUSTERD TESTS WITH FANOUT SETUP
+############################################################
+
+#Negative testcase: Test invalid master
+TEST ! $GEOREP_CLI master1 ${SH0}::${GSV0} create push-pem
+
+#Negatvie testcase: Test invalid slave
+TEST ! $GEOREP_CLI $master ${SH0}::slave3 create push-pem
+
+##------------------- Session 1 Creation Begin-----------------##
+#Create geo-rep session
+TEST create_georep_session $master $slave
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave config gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave config slave-gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Enable_metavolume
+TEST $GEOREP_CLI $master $slave config use_meta_volume true
+##------------------- Session 1 Creation End-----------------##
+
+##------------------- Session 2 Creation Begin-----------------##
+#Create geo-rep session2
+TEST $GEOREP_CLI $master $slave1 create ssh-port 22 no-verify
+
+#Config gluster-command-dir for session2
+TEST $GEOREP_CLI $master $slave1 config gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Config gluster-command-dir for session2
+TEST $GEOREP_CLI $master $slave1 config slave-gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Enable_metavolume for session2
+TEST $GEOREP_CLI $master $slave1 config use_meta_volume true
+##------------------- Session 2 Creation End-----------------##
+
+#Wait for common secret pem file to be created
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_common_secret_file
+
+#Verify the keys are distributed
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_keys_distributed
+
+#Start_georep
+TEST $GEOREP_CLI $master $slave start
+
+#check geo-rep status without specifying master and slave volumes
+TEST $GEOREP_CLI status
+
+#Start_georep force
+TEST $GEOREP_CLI $master $slave1 start force
+
+#Negative testcase: Create the same session after start, fails
+#With root@ prefix
+TEST ! $GEOREP_CLI $master $slave1 create push-pem
+#Without root@ prefix
+TEST ! $GEOREP_CLI $master ${SH0}::${GSV1} create push-pem
+TEST $GEOREP_CLI $master $slave1 create push-pem force
+
+##------------------- Fanout status testcases Begin --------------##
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 check_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Passive"
+
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_fanout_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_fanout_status_num_rows "Passive"
+
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_fanout_status_detail_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_fanout_status_detail_num_rows "Passive"
+
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_all_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_all_status_num_rows "Passive"
+
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_all_status_detail_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_all_status_detail_num_rows "Passive"
+
+##------------------- Fanout status testcases End --------------##
+
+##------Checkpoint Testcase Begin---------------##
+#Write I/O
+echo "test data" > $M0/file1
+TEST $GEOREP_CLI $master $slave config checkpoint now
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_checkpoint_met $master $slave
+touch $M0
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 verify_checkpoint_met $master $slave
+##------Checkpoint Testcase End---------------##
+
+##------------------ Geo-rep config testcases Begin--------------------##
+TEST $GEOREP_CLI $master $slave config
+TEST ! $GEOREP_CLI $master $slave config arsync-options '-W'
+TEST $GEOREP_CLI $master $slave config rsync-options '-W'
+TEST $GEOREP_CLI $master $slave config rsync-options
+TEST $GEOREP_CLI $master $slave config \!rsync-options
+TEST $GEOREP_CLI $master $slave config sync-xattrs false
+##------------------ Geo-rep config testcases End --------------------##
+
+##---------------- Pause/Resume testcase Begin-------------##
+#Negative testcase: Resume geo-replication session when not paused
+TEST ! $GEOREP_CLI $master $slave1 resume
+TEST $GEOREP_CLI $master $slave1 resume force
+
+#Pause geo-replication session with root@
+TEST $GEOREP_CLI $master $slave1 pause force
+
+#Resume geo-replication session with root@
+TEST $GEOREP_CLI $master $slave1 resume force
+
+#Stop Geo-rep
+TEST $GEOREP_CLI $master $slave1 stop force
+
+#Negative testcase: Resume geo-replication session after geo-rep stop
+TEST ! $GEOREP_CLI $master $slave1 resume
+##---------------- Pause/Resume testcase End-------------##
+
+##-----------------glusterd slave key/value upgrade testcase Begin ---------##
+#Upgrade test of slave key stored in glusterd info file
+src=$(grep slave2 /var/lib/glusterd/vols/$master/info)
+#Remove slave uuuid (last part after divided by : )
+dst=${src%:*}
+
+#Update glusterd info file with old slave format
+sed -i "s|$src|$dst|g" /var/lib/glusterd/vols/$master/info
+TEST ! grep $src /var/lib/glusterd/vols/$master/info
+
+#Restart glusterd to update in-memory volinfo
+TEST pkill glusterd
+TEST glusterd;
+TEST pidof glusterd
+
+#Start geo-rep and validate slave format is updated
+TEST $GEOREP_CLI $master $slave1 start force
+TEST grep $src /var/lib/glusterd/vols/$master/info
+##-----------------glusted slave key/value upgrade testcase End ---------##
+
+#Negative testcase: Delete Geo-rep 2 fails as geo-rep is running
+TEST ! $GEOREP_CLI $master $slave1 delete
+
+#Stop and Delete Geo-rep 2
+TEST $GEOREP_CLI $master $slave1 stop force
+TEST $GEOREP_CLI $master $slave1 delete reset-sync-time
+
+#Stop and Delete Geo-rep 1
+TEST $GEOREP_CLI $master $slave stop
+TEST $GEOREP_CLI $master $slave delete
+
+#Cleanup authorized keys
+sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' ~/.ssh/authorized_keys
+sed -i '/^command=.*gsyncd.*/d' ~/.ssh/authorized_keys
+
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
diff --git a/tests/00-geo-rep/bug-1600145.t b/tests/00-geo-rep/bug-1600145.t
new file mode 100644
index 00000000000..1d38bf92682
--- /dev/null
+++ b/tests/00-geo-rep/bug-1600145.t
@@ -0,0 +1,109 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../geo-rep.rc
+. $(dirname $0)/../env.rc
+
+### Basic Tests with Distribute Replicate volumes
+
+##Cleanup and start glusterd
+cleanup;
+SCRIPT_TIMEOUT=600
+TEST glusterd;
+TEST pidof glusterd
+
+##Variables
+GEOREP_CLI="$CLI volume geo-replication"
+master=$GMV0
+SH0="127.0.0.1"
+slave=${SH0}::${GSV0}
+num_active=2
+num_passive=2
+master_mnt=$M0
+slave_mnt=$M1
+
+############################################################
+#SETUP VOLUMES AND GEO-REPLICATION
+############################################################
+
+##create_and_start_master_volume
+TEST $CLI volume create $GMV0 replica 2 $H0:$B0/${GMV0}{1,2};
+gluster v set all cluster.brick-multiplex on
+TEST $CLI volume start $GMV0
+
+##create_and_start_slave_volume
+TEST $CLI volume create $GSV0 replica 2 $H0:$B0/${GSV0}{1,2};
+TEST $CLI volume start $GSV0
+
+##Create, start and mount meta_volume
+TEST $CLI volume create $META_VOL replica 3 $H0:$B0/${META_VOL}{1,2,3};
+TEST $CLI volume start $META_VOL
+TEST mkdir -p $META_MNT
+TEST glusterfs -s $H0 --volfile-id $META_VOL $META_MNT
+
+############################################################
+#BASIC GEO-REPLICATION TESTS
+############################################################
+
+#Create geo-rep session
+TEST create_georep_session $master $slave
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave config gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave config slave-gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Enable_metavolume
+TEST $GEOREP_CLI $master $slave config use_meta_volume true
+
+#Wait for common secret pem file to be created
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_common_secret_file
+
+#Verify the keys are distributed
+
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_keys_distributed
+
+#Count no. of changelog socket
+brick_pid=`ps -aef | grep glusterfsd | grep -v "shared_storage" | grep -v grep | awk -F " " '{print $2}'`
+n=$(grep -Fc "changelog" /proc/$brick_pid/net/unix)
+
+#Start_georep
+TEST $GEOREP_CLI $master $slave start
+
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 check_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 check_status_num_rows "Passive"
+
+#Count no. of changelog socket
+brick_pid=`ps -aef | grep glusterfsd | grep -v "shared_storage" | grep -v grep | awk -F " " '{print $2}'`
+c=$(grep -Fc "changelog" /proc/$brick_pid/net/unix)
+let expected=n+2
+TEST [ "$c" -eq "$expected" ]
+
+#Kill the "Active" brick
+brick=$($GEOREP_CLI $master $slave status | grep -F "Active" | awk {'print $3'})
+cat /proc/$brick_pid/net/unix | grep "changelog"
+TEST kill_brick $GMV0 $H0 $brick
+#Expect geo-rep status to be "Faulty"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 check_status_num_rows "Faulty"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 check_status_num_rows "Active"
+
+#Count no. of changelog socket
+brick_pid=`ps -aef | grep glusterfsd | grep -v "shared_storage" | grep -v grep | awk -F " " '{print $2}'`
+cat /proc/$brick_pid/net/unix | grep "changelog"
+ls -lrth /proc/$brick_pid/fd | grep "socket"
+c=$(grep -Fc "changelog" /proc/$brick_pid/net/unix)
+TEST [ "$c" -eq "$n" ]
+
+#Stop Geo-rep
+TEST $GEOREP_CLI $master $slave stop
+
+#Delete Geo-rep
+TEST $GEOREP_CLI $master $slave delete
+
+#Cleanup authorized keys
+sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' ~/.ssh/authorized_keys
+sed -i '/^command=.*gsyncd.*/d' ~/.ssh/authorized_keys
+
+cleanup;
diff --git a/tests/00-geo-rep/bug-1708603.t b/tests/00-geo-rep/bug-1708603.t
new file mode 100644
index 00000000000..26913f1d318
--- /dev/null
+++ b/tests/00-geo-rep/bug-1708603.t
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../geo-rep.rc
+. $(dirname $0)/../env.rc
+
+SCRIPT_TIMEOUT=300
+
+##Cleanup and start glusterd
+cleanup;
+TEST glusterd;
+TEST pidof glusterd
+
+
+##Variables
+GEOREP_CLI="gluster volume geo-replication"
+master=$GMV0
+SH0="127.0.0.1"
+slave=${SH0}::${GSV0}
+num_active=2
+num_passive=2
+master_mnt=$M0
+slave_mnt=$M1
+
+############################################################
+#SETUP VOLUMES AND GEO-REPLICATION
+############################################################
+
+##create_and_start_master_volume
+TEST $CLI volume create $GMV0 replica 2 $H0:$B0/${GMV0}{1,2,3,4};
+TEST $CLI volume start $GMV0
+
+##create_and_start_slave_volume
+TEST $CLI volume create $GSV0 replica 2 $H0:$B0/${GSV0}{1,2,3,4};
+TEST $CLI volume start $GSV0
+
+##Mount master
+TEST glusterfs -s $H0 --volfile-id $GMV0 $M0
+
+##Mount slave
+TEST glusterfs -s $H0 --volfile-id $GSV0 $M1
+
+#Create geo-rep session
+TEST create_georep_session $master $slave
+
+echo n | $GEOREP_CLI $master $slave config ignore-deletes true >/dev/null 2>&1
+EXPECT "false" echo $($GEOREP_CLI $master $slave config ignore-deletes)
+echo y | $GEOREP_CLI $master $slave config ignore-deletes true
+EXPECT "true" echo $($GEOREP_CLI $master $slave config ignore-deletes)
+
+#Stop Geo-rep
+TEST $GEOREP_CLI $master $slave stop
+
+#Delete Geo-rep
+TEST $GEOREP_CLI $master $slave delete
+
+#Cleanup authorized keys
+sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' ~/.ssh/authorized_keys
+sed -i '/^command=.*gsyncd.*/d' ~/.ssh/authorized_keys
+
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
diff --git a/tests/00-geo-rep/georep-basic-dr-rsync-arbiter.t b/tests/00-geo-rep/georep-basic-dr-rsync-arbiter.t
new file mode 100644
index 00000000000..c45d2ff62ce
--- /dev/null
+++ b/tests/00-geo-rep/georep-basic-dr-rsync-arbiter.t
@@ -0,0 +1,234 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../geo-rep.rc
+. $(dirname $0)/../env.rc
+
+SCRIPT_TIMEOUT=500
+
+AREQUAL_PATH=$(dirname $0)/../utils
+test "`uname -s`" != "Linux" && {
+ CFLAGS="$CFLAGS -lintl";
+}
+build_tester $AREQUAL_PATH/arequal-checksum.c $CFLAGS
+
+### Basic Tests with Distribute Replicate volumes
+
+##Cleanup and start glusterd
+cleanup;
+TEST glusterd;
+TEST pidof glusterd
+
+
+##Variables
+GEOREP_CLI="$CLI volume geo-replication"
+master=$GMV0
+SH0="127.0.0.1"
+slave=${SH0}::${GSV0}
+num_active=2
+num_passive=4
+master_mnt=$M0
+slave_mnt=$M1
+
+############################################################
+#SETUP VOLUMES AND GEO-REPLICATION
+############################################################
+
+##create_and_start_master_volume
+TEST $CLI volume create $GMV0 replica 3 arbiter 1 $H0:$B0/${GMV0}{1,2,3,4,5,6};
+TEST $CLI volume start $GMV0
+
+##create_and_start_slave_volume
+TEST $CLI volume create $GSV0 replica 3 arbiter 1 $H0:$B0/${GSV0}{1,2,3,4,5,6};
+TEST $CLI volume start $GSV0
+TEST $CLI volume set $GSV0 performance.stat-prefetch off
+TEST $CLI volume set $GSV0 performance.quick-read off
+TEST $CLI volume set $GSV0 performance.readdir-ahead off
+TEST $CLI volume set $GSV0 performance.read-ahead off
+
+##Create, start and mount meta_volume
+TEST $CLI volume create $META_VOL replica 3 $H0:$B0/${META_VOL}{1,2,3};
+TEST $CLI volume start $META_VOL
+TEST mkdir -p $META_MNT
+TEST glusterfs -s $H0 --volfile-id $META_VOL $META_MNT
+
+##Mount master
+TEST glusterfs -s $H0 --volfile-id $GMV0 $M0
+
+##Mount slave
+TEST glusterfs -s $H0 --volfile-id $GSV0 $M1
+
+############################################################
+#BASIC GEO-REPLICATION TESTS
+############################################################
+
+#Check Hybrid Crawl
+TEST create_data "hybrid"
+TEST create_georep_session $master $slave
+EXPECT_WITHIN $GEO_REP_TIMEOUT 6 check_status_num_rows "Created"
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave config gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave config slave-gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Enable_metavolume
+TEST $GEOREP_CLI $master $slave config use_meta_volume true
+
+#Set changelog roll-over time to 3 secs
+TEST $CLI volume set $GMV0 changelog.rollover-time 3
+
+#Wait for common secret pem file to be created
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_common_secret_file
+
+#Verify the keys are distributed
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_keys_distributed
+
+#Verify "features.read-only" Option
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_slave_read_only $GSV0
+
+#Start_georep
+TEST $GEOREP_CLI $master $slave start
+
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_status_num_rows "Passive"
+
+#data_tests "hybrid"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 regular_file_ok ${slave_mnt}/hybrid_f1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/hybrid_d1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_file_ok ${slave_mnt}/hybrid_f3 ${slave_mnt}/hybrid_f4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_dir_ok ${slave_mnt}/hybrid_d3 ${slave_mnt}/hybrid_d4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 symlink_ok hybrid_f1 ${slave_mnt}/hybrid_sl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_file_ok ${slave_mnt}/hybrid_f1 ${slave_mnt}/hybrid_hl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/hybrid_f2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/hybrid_d2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 data_ok ${slave_mnt}/hybrid_f1 "HelloWorld!"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/hybrid_chown_f1
+
+#Check History Crawl.
+TEST $GEOREP_CLI $master $slave stop
+TEST create_data "history"
+TEST $GEOREP_CLI $master $slave start
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_status_num_rows "Passive"
+
+#data_tests "history"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 regular_file_ok ${slave_mnt}/history_f1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/history_d1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_file_ok ${slave_mnt}/history_f3 ${slave_mnt}/history_f4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_dir_ok ${slave_mnt}/history_d3 ${slave_mnt}/history_d4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 symlink_ok history_f1 ${slave_mnt}/history_sl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_file_ok ${slave_mnt}/history_f1 ${slave_mnt}/history_hl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/history_f2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/history_d2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 data_ok ${slave_mnt}/history_f1 "HelloWorld!"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/history_chown_f1
+
+#Check Changelog Crawl.
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Changelog Crawl"
+TEST create_data "changelog"
+
+# logrotate test
+logrotate_file=${master_mnt}/logrotate/lg_test_file
+TEST mkdir -p ${master_mnt}/logrotate
+logrotate_simulate $logrotate_file 2
+logrotate_simulate $logrotate_file 2
+logrotate_simulate $logrotate_file 2
+logrotate_simulate $logrotate_file 2
+
+# CREATE + RENAME
+create_rename ${master_mnt}/rename_test_file
+
+# hard-link rename
+hardlink_rename ${master_mnt}/hardlink_rename_test_file
+
+#SYNC CHECK
+#data_tests "changelog"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 regular_file_ok ${slave_mnt}/changelog_f1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/changelog_d1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_file_ok ${slave_mnt}/changelog_f3 ${slave_mnt}/changelog_f4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_dir_ok ${slave_mnt}/changelog_d3 ${slave_mnt}/changelog_d4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 symlink_ok changelog_f1 ${slave_mnt}/changelog_sl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_file_ok ${slave_mnt}/changelog_f1 ${slave_mnt}/changelog_hl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/changelog_f2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/changelog_d2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 data_ok ${slave_mnt}/changelog_f1 "HelloWorld!"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/changelog_chown_f1
+
+#logrotate
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/logrotate
+EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt}/logrotate ${slave_mnt}/logrotate
+
+#CREATE+RENAME
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 create_rename_ok ${slave_mnt}/create_rename_test_file
+
+#hardlink rename
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_rename_ok ${slave_mnt}/hardlink_rename_test_file
+
+#Stop Geo-rep
+TEST $GEOREP_CLI $master $slave stop
+
+#Symlink testcase: Rename symlink and create dir with same name
+TEST create_symlink_rename_mkdir_data
+
+#hardlink-rename-unlink usecase. Sonatype Nexus3 Usecase. BUG:1512483
+#TEST create_hardlink_rename_data
+
+#rsnapshot usecase
+TEST create_rsnapshot_data
+
+#Start Geo-rep
+TEST $GEOREP_CLI $master $slave start
+
+#Wait for geo-rep to come up
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_status_num_rows "Passive"
+
+#Check for hardlink rename case. BUG: 1296174
+#It should not create src file again on changelog reprocessing
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_rename_ok ${slave_mnt}/hardlink_rename_test_file
+
+#Symlink testcase: Rename symlink and create dir with same name
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_symlink_rename_mkdir_data ${slave_mnt}/symlink_test1
+
+#hardlink-rename-unlink usecase. Sonatype Nexus3 Usecase. BUG:1512483
+#EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_hardlink_rename_data ${slave_mnt}
+
+#rsnapshot usecase
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rsnapshot_data ${slave_mnt}
+
+#Test rsync-options set BUG:1629561
+TEST gluster volume geo-rep $master $slave config rsync-options "--whole-file"
+TEST "echo sampledata > $master_mnt/rsync_option_test_file"
+
+#rename with existing destination case BUG:1694820
+TEST create_rename_with_existing_destination ${master_mnt}
+#verify rename with existing destination case BUG:1694820
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rename_with_existing_destination ${slave_mnt}
+
+#Verify arequal for whole volume
+EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt} ${slave_mnt}
+
+#Pause geo-replication session
+TEST $GEOREP_CLI $master $slave pause force
+
+#Resume geo-replication session
+TEST $GEOREP_CLI $master $slave resume force
+
+#Stop Geo-rep
+TEST $GEOREP_CLI $master $slave stop
+
+#Delete Geo-rep
+TEST $GEOREP_CLI $master $slave delete
+
+#Cleanup are-equal binary
+TEST rm $AREQUAL_PATH/arequal-checksum
+
+#Cleanup authorized keys
+sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' ~/.ssh/authorized_keys
+sed -i '/^command=.*gsyncd.*/d' ~/.ssh/authorized_keys
+
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
diff --git a/tests/00-geo-rep/georep-basic-dr-rsync.t b/tests/00-geo-rep/georep-basic-dr-rsync.t
new file mode 100644
index 00000000000..d785aa59fc9
--- /dev/null
+++ b/tests/00-geo-rep/georep-basic-dr-rsync.t
@@ -0,0 +1,258 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../geo-rep.rc
+. $(dirname $0)/../env.rc
+
+SCRIPT_TIMEOUT=500
+
+AREQUAL_PATH=$(dirname $0)/../utils
+test "`uname -s`" != "Linux" && {
+ CFLAGS="$CFLAGS -lintl";
+}
+build_tester $AREQUAL_PATH/arequal-checksum.c $CFLAGS
+
+### Basic Tests with Distribute Replicate volumes
+
+##Cleanup and start glusterd
+cleanup;
+TEST glusterd;
+TEST pidof glusterd
+
+
+##Variables
+GEOREP_CLI="$CLI volume geo-replication"
+master=$GMV0
+SH0="127.0.0.1"
+slave=${SH0}::${GSV0}
+num_active=2
+num_passive=2
+master_mnt=$M0
+slave_mnt=$M1
+
+############################################################
+#SETUP VOLUMES AND GEO-REPLICATION
+############################################################
+
+##create_and_start_master_volume
+TEST $CLI volume create $GMV0 replica 2 $H0:$B0/${GMV0}{1,2,3,4};
+TEST $CLI volume start $GMV0
+
+##create_and_start_slave_volume
+TEST $CLI volume create $GSV0 replica 2 $H0:$B0/${GSV0}{1,2,3,4};
+TEST $CLI volume start $GSV0
+TEST $CLI volume set $GSV0 performance.stat-prefetch off
+TEST $CLI volume set $GSV0 performance.quick-read off
+TEST $CLI volume set $GSV0 performance.readdir-ahead off
+TEST $CLI volume set $GSV0 performance.read-ahead off
+
+##Create, start and mount meta_volume
+TEST $CLI volume create $META_VOL replica 3 $H0:$B0/${META_VOL}{1,2,3};
+TEST $CLI volume start $META_VOL
+TEST mkdir -p $META_MNT
+TEST glusterfs -s $H0 --volfile-id $META_VOL $META_MNT
+
+##Mount master
+TEST glusterfs -s $H0 --volfile-id $GMV0 $M0
+
+##Mount slave
+TEST glusterfs -s $H0 --volfile-id $GSV0 $M1
+
+############################################################
+#BASIC GEO-REPLICATION TESTS
+############################################################
+
+#Check Hybrid Crawl
+TEST create_data "hybrid"
+TEST create_georep_session $master $slave
+EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_status_num_rows "Created"
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave config gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Negative test for ssh-port
+#Port should be integer and between 1-65535 range
+
+TEST ! $GEOREP_CLI $master $slave config ssh-port -22
+
+TEST ! $GEOREP_CLI $master $slave config ssh-port abc
+
+TEST ! $GEOREP_CLI $master $slave config ssh-port 6875943
+
+TEST ! $GEOREP_CLI $master $slave config ssh-port 4.5
+
+TEST ! $GEOREP_CLI $master $slave config ssh-port 22a
+
+#Config Set ssh-port to validate int validation
+TEST $GEOREP_CLI $master $slave config ssh-port 22
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave config slave-gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Enable_metavolume
+TEST $GEOREP_CLI $master $slave config use_meta_volume true
+
+#Set changelog roll-over time to 3 secs
+TEST $CLI volume set $GMV0 changelog.rollover-time 3
+
+#Wait for common secret pem file to be created
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_common_secret_file
+
+#Verify the keys are distributed
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_keys_distributed
+
+#Verify "features.read-only" Option
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_slave_read_only $GSV0
+
+#Start_georep
+TEST $GEOREP_CLI $master $slave start
+
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Passive"
+
+#data_tests "hybrid"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 regular_file_ok ${slave_mnt}/hybrid_f1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/hybrid_d1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_file_ok ${slave_mnt}/hybrid_f3 ${slave_mnt}/hybrid_f4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_dir_ok ${slave_mnt}/hybrid_d3 ${slave_mnt}/hybrid_d4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 symlink_ok hybrid_f1 ${slave_mnt}/hybrid_sl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_file_ok ${slave_mnt}/hybrid_f1 ${slave_mnt}/hybrid_hl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/hybrid_f2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/hybrid_d2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 data_ok ${slave_mnt}/hybrid_f1 "HelloWorld!"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/hybrid_chown_f1
+
+#Check History Crawl.
+TEST $GEOREP_CLI $master $slave stop
+TEST create_data "history"
+TEST create_rename_symlink_case
+TEST $GEOREP_CLI $master $slave start
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Passive"
+
+#data_tests "history"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 regular_file_ok ${slave_mnt}/history_f1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/history_d1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_file_ok ${slave_mnt}/history_f3 ${slave_mnt}/history_f4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_dir_ok ${slave_mnt}/history_d3 ${slave_mnt}/history_d4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 symlink_ok history_f1 ${slave_mnt}/history_sl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_file_ok ${slave_mnt}/history_f1 ${slave_mnt}/history_hl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/history_f2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/history_d2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 data_ok ${slave_mnt}/history_f1 "HelloWorld!"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/history_chown_f1
+
+#Check Changelog Crawl.
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Changelog Crawl"
+TEST create_data "changelog"
+
+# logrotate test
+logrotate_file=${master_mnt}/logrotate/lg_test_file
+TEST mkdir -p ${master_mnt}/logrotate
+logrotate_simulate $logrotate_file 2
+logrotate_simulate $logrotate_file 2
+logrotate_simulate $logrotate_file 2
+logrotate_simulate $logrotate_file 2
+
+# CREATE + RENAME
+create_rename ${master_mnt}/rename_test_file
+
+# hard-link rename
+hardlink_rename ${master_mnt}/hardlink_rename_test_file
+
+#SYNC CHECK
+#data_tests "changelog"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 regular_file_ok ${slave_mnt}/changelog_f1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/changelog_d1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_file_ok ${slave_mnt}/changelog_f3 ${slave_mnt}/changelog_f4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_dir_ok ${slave_mnt}/changelog_d3 ${slave_mnt}/changelog_d4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 symlink_ok changelog_f1 ${slave_mnt}/changelog_sl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_file_ok ${slave_mnt}/changelog_f1 ${slave_mnt}/changelog_hl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/changelog_f2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/changelog_d2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 data_ok ${slave_mnt}/changelog_f1 "HelloWorld!"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/changelog_chown_f1
+
+#logrotate
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/logrotate
+EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt}/logrotate ${slave_mnt}/logrotate
+
+#CREATE+RENAME
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 create_rename_ok ${slave_mnt}/create_rename_test_file
+
+#hardlink rename
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_rename_ok ${slave_mnt}/hardlink_rename_test_file
+
+#Stop Geo-rep
+TEST $GEOREP_CLI $master $slave stop
+
+#Symlink testcase: Rename symlink and create dir with same name
+TEST create_symlink_rename_mkdir_data
+
+#hardlink-rename-unlink usecase. Sonatype Nexus3 Usecase. BUG:1512483
+TEST create_hardlink_rename_data
+
+#rsnapshot usecase
+TEST create_rsnapshot_data
+
+#Start Geo-rep
+TEST $GEOREP_CLI $master $slave start
+
+#Wait for geo-rep to come up
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Passive"
+
+#Check for hardlink rename case. BUG: 1296174
+#It should not create src file again on changelog reprocessing
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_rename_ok ${slave_mnt}/hardlink_rename_test_file
+
+#Symlink testcase: Rename symlink and create dir with same name
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_symlink_rename_mkdir_data ${slave_mnt}/symlink_test1
+
+#hardlink-rename-unlink usecase. Sonatype Nexus3 Usecase. BUG:1512483
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_hardlink_rename_data ${slave_mnt}
+
+#rsnapshot usecase
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rsnapshot_data ${slave_mnt}
+
+#Test rsync-options set BUG:1629561
+TEST gluster volume geo-rep $master $slave config rsync-options "--whole-file"
+TEST "echo sampledata > $master_mnt/rsync_option_test_file"
+
+#rename with existing destination case BUG:1694820
+TEST create_rename_with_existing_destination ${master_mnt}
+#verify rename with existing destination case BUG:1694820
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rename_with_existing_destination ${slave_mnt}
+
+#Verify arequal for whole volume
+EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt} ${slave_mnt}
+
+#Test config upgrade BUG: 1707731
+config_file=$GLUSTERD_WORKDIR/geo-replication/${GMV0}_${SH0}_${GSV0}/gsyncd.conf
+cat >> $config_file<<EOL
+[peers ${GMV0} ${GSV0}]
+use_tarssh = true
+timeout = 1
+EOL
+TEST $GEOREP_CLI $master $slave stop
+TEST $GEOREP_CLI $master $slave start
+#verify that the config file is updated
+EXPECT "1" echo $(grep -Fc "vars" $config_file)
+EXPECT "1" echo $(grep -Fc "sync-method = tarssh" $config_file)
+EXPECT "1" echo $(grep -Fc "slave-timeout = 1" $config_file)
+#Stop Geo-rep
+TEST $GEOREP_CLI $master $slave stop
+
+#Delete Geo-rep
+TEST $GEOREP_CLI $master $slave delete
+
+#Cleanup are-equal binary
+TEST rm $AREQUAL_PATH/arequal-checksum
+
+#Cleanup authorized keys
+sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' ~/.ssh/authorized_keys
+sed -i '/^command=.*gsyncd.*/d' ~/.ssh/authorized_keys
+
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
diff --git a/tests/00-geo-rep/georep-basic-dr-tarssh-arbiter.t b/tests/00-geo-rep/georep-basic-dr-tarssh-arbiter.t
new file mode 100644
index 00000000000..8fed929ffca
--- /dev/null
+++ b/tests/00-geo-rep/georep-basic-dr-tarssh-arbiter.t
@@ -0,0 +1,227 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../geo-rep.rc
+. $(dirname $0)/../env.rc
+
+SCRIPT_TIMEOUT=500
+
+AREQUAL_PATH=$(dirname $0)/../utils
+test "`uname -s`" != "Linux" && {
+ CFLAGS="$CFLAGS -lintl";
+}
+build_tester $AREQUAL_PATH/arequal-checksum.c $CFLAGS
+
+### Basic Tests with Distribute Replicate volumes
+
+##Cleanup and start glusterd
+cleanup;
+TEST glusterd;
+TEST pidof glusterd
+
+
+##Variables
+GEOREP_CLI="$CLI volume geo-replication"
+master=$GMV0
+SH0="127.0.0.1"
+slave=${SH0}::${GSV0}
+num_active=2
+num_passive=4
+master_mnt=$M0
+slave_mnt=$M1
+
+############################################################
+#SETUP VOLUMES AND GEO-REPLICATION
+############################################################
+
+##create_and_start_master_volume
+TEST $CLI volume create $GMV0 replica 3 arbiter 1 $H0:$B0/${GMV0}{1,2,3,4,5,6};
+TEST $CLI volume start $GMV0
+
+##create_and_start_slave_volume
+TEST $CLI volume create $GSV0 replica 3 arbiter 1 $H0:$B0/${GSV0}{1,2,3,4,5,6};
+TEST $CLI volume start $GSV0
+TEST $CLI volume set $GSV0 performance.stat-prefetch off
+TEST $CLI volume set $GSV0 performance.quick-read off
+TEST $CLI volume set $GSV0 performance.readdir-ahead off
+TEST $CLI volume set $GSV0 performance.read-ahead off
+
+##Create, start and mount meta_volume
+TEST $CLI volume create $META_VOL replica 3 $H0:$B0/${META_VOL}{1,2,3};
+TEST $CLI volume start $META_VOL
+TEST mkdir -p $META_MNT
+TEST glusterfs -s $H0 --volfile-id $META_VOL $META_MNT
+
+##Mount master
+TEST glusterfs -s $H0 --volfile-id $GMV0 $M0
+
+##Mount slave
+TEST glusterfs -s $H0 --volfile-id $GSV0 $M1
+
+############################################################
+#BASIC GEO-REPLICATION TESTS
+############################################################
+
+#Check Hybrid Crawl
+TEST create_data "hybrid"
+TEST create_georep_session $master $slave
+EXPECT_WITHIN $GEO_REP_TIMEOUT 6 check_status_num_rows "Created"
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave config gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave config slave-gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Enable_metavolume
+TEST $GEOREP_CLI $master $slave config use_meta_volume true
+
+#Set changelog roll-over time to 3 secs
+TEST $CLI volume set $GMV0 changelog.rollover-time 3
+
+#Config tarssh as sync-engine
+TEST $GEOREP_CLI $master $slave config sync-method tarssh
+
+#Wait for common secret pem file to be created
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_common_secret_file
+
+#Verify the keys are distributed
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_keys_distributed
+
+#Verify "features.read-only" Option
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_slave_read_only $GSV0
+
+#Start_georep
+TEST $GEOREP_CLI $master $slave start
+
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_status_num_rows "Passive"
+
+#data_tests "hybrid"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 regular_file_ok ${slave_mnt}/hybrid_f1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/hybrid_d1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_file_ok ${slave_mnt}/hybrid_f3 ${slave_mnt}/hybrid_f4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_dir_ok ${slave_mnt}/hybrid_d3 ${slave_mnt}/hybrid_d4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 symlink_ok hybrid_f1 ${slave_mnt}/hybrid_sl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_file_ok ${slave_mnt}/hybrid_f1 ${slave_mnt}/hybrid_hl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/hybrid_f2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/hybrid_d2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 data_ok ${slave_mnt}/hybrid_f1 "HelloWorld!"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/hybrid_chown_f1
+
+#Check History Crawl.
+TEST $GEOREP_CLI $master $slave stop
+TEST create_data "history"
+TEST $GEOREP_CLI $master $slave start
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_status_num_rows "Passive"
+
+#data_tests "history"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 regular_file_ok ${slave_mnt}/history_f1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/history_d1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_file_ok ${slave_mnt}/history_f3 ${slave_mnt}/history_f4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_dir_ok ${slave_mnt}/history_d3 ${slave_mnt}/history_d4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 symlink_ok history_f1 ${slave_mnt}/history_sl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_file_ok ${slave_mnt}/history_f1 ${slave_mnt}/history_hl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/history_f2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/history_d2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 data_ok ${slave_mnt}/history_f1 "HelloWorld!"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/history_chown_f1
+
+#Check Changelog Crawl.
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Changelog Crawl"
+TEST create_data "changelog"
+
+# logrotate test
+logrotate_file=${master_mnt}/logrotate/lg_test_file
+TEST mkdir -p ${master_mnt}/logrotate
+logrotate_simulate $logrotate_file 2
+logrotate_simulate $logrotate_file 2
+logrotate_simulate $logrotate_file 2
+logrotate_simulate $logrotate_file 2
+
+# CREATE + RENAME
+create_rename ${master_mnt}/rename_test_file
+
+# hard-link rename
+hardlink_rename ${master_mnt}/hardlink_rename_test_file
+
+#SYNC CHECK
+#data_tests "changelog"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 regular_file_ok ${slave_mnt}/changelog_f1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/changelog_d1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_file_ok ${slave_mnt}/changelog_f3 ${slave_mnt}/changelog_f4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_dir_ok ${slave_mnt}/changelog_d3 ${slave_mnt}/changelog_d4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 symlink_ok changelog_f1 ${slave_mnt}/changelog_sl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_file_ok ${slave_mnt}/changelog_f1 ${slave_mnt}/changelog_hl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/changelog_f2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/changelog_d2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 data_ok ${slave_mnt}/changelog_f1 "HelloWorld!"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/changelog_chown_f1
+
+#logrotate
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/logrotate
+EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt}/logrotate ${slave_mnt}/logrotate
+
+#CREATE+RENAME
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 create_rename_ok ${slave_mnt}/create_rename_test_file
+
+#hardlink rename
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_rename_ok ${slave_mnt}/hardlink_rename_test_file
+
+#Stop Geo-rep
+TEST $GEOREP_CLI $master $slave stop
+
+#Symlink testcase: Rename symlink and create dir with same name
+TEST create_symlink_rename_mkdir_data
+
+#hardlink-rename-unlink usecase. Sonatype Nexus3 Usecase. BUG:1512483
+#TEST create_hardlink_rename_data
+
+#rsnapshot usecase
+TEST create_rsnapshot_data
+
+#Start Geo-rep
+TEST $GEOREP_CLI $master $slave start
+
+#Wait for geo-rep to come up
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_status_num_rows "Passive"
+
+#Check for hardlink rename case. BUG: 1296174
+#It should not create src file again on changelog reprocessing
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_rename_ok ${slave_mnt}/hardlink_rename_test_file
+
+#Symlink testcase: Rename symlink and create dir with same name
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_symlink_rename_mkdir_data ${slave_mnt}/symlink_test1
+
+#hardlink-rename-unlink usecase. Sonatype Nexus3 Usecase. BUG:1512483
+#EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_hardlink_rename_data ${slave_mnt}
+
+#rsnapshot usecase
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rsnapshot_data ${slave_mnt}
+
+#rename with existing destination case BUG:1694820
+TEST create_rename_with_existing_destination ${master_mnt}
+#verify rename with existing destination case BUG:1694820
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rename_with_existing_destination ${slave_mnt}
+
+#Verify arequal for whole volume
+EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt} ${slave_mnt}
+
+#Stop Geo-rep
+TEST $GEOREP_CLI $master $slave stop
+
+#Delete Geo-rep
+TEST $GEOREP_CLI $master $slave delete
+
+#Cleanup are-equal binary
+TEST rm $AREQUAL_PATH/arequal-checksum
+
+#Cleanup authorized keys
+sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' ~/.ssh/authorized_keys
+sed -i '/^command=.*gsyncd.*/d' ~/.ssh/authorized_keys
+
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
diff --git a/tests/00-geo-rep/georep-basic-dr-tarssh.t b/tests/00-geo-rep/georep-basic-dr-tarssh.t
new file mode 100644
index 00000000000..feb2de74c90
--- /dev/null
+++ b/tests/00-geo-rep/georep-basic-dr-tarssh.t
@@ -0,0 +1,227 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../geo-rep.rc
+. $(dirname $0)/../env.rc
+
+SCRIPT_TIMEOUT=500
+
+AREQUAL_PATH=$(dirname $0)/../utils
+test "`uname -s`" != "Linux" && {
+ CFLAGS="$CFLAGS -lintl";
+}
+build_tester $AREQUAL_PATH/arequal-checksum.c $CFLAGS
+
+### Basic Tests with Distribute Replicate volumes
+
+##Cleanup and start glusterd
+cleanup;
+TEST glusterd;
+TEST pidof glusterd
+
+
+##Variables
+GEOREP_CLI="$CLI volume geo-replication"
+master=$GMV0
+SH0="127.0.0.1"
+slave=${SH0}::${GSV0}
+num_active=2
+num_passive=2
+master_mnt=$M0
+slave_mnt=$M1
+
+############################################################
+#SETUP VOLUMES AND GEO-REPLICATION
+############################################################
+
+##create_and_start_master_volume
+TEST $CLI volume create $GMV0 replica 2 $H0:$B0/${GMV0}{1,2,3,4};
+TEST $CLI volume start $GMV0
+
+##create_and_start_slave_volume
+TEST $CLI volume create $GSV0 replica 2 $H0:$B0/${GSV0}{1,2,3,4};
+TEST $CLI volume start $GSV0
+TEST $CLI volume set $GSV0 performance.stat-prefetch off
+TEST $CLI volume set $GSV0 performance.quick-read off
+TEST $CLI volume set $GSV0 performance.readdir-ahead off
+TEST $CLI volume set $GSV0 performance.read-ahead off
+
+##Create, start and mount meta_volume
+TEST $CLI volume create $META_VOL replica 3 $H0:$B0/${META_VOL}{1,2,3};
+TEST $CLI volume start $META_VOL
+TEST mkdir -p $META_MNT
+TEST glusterfs -s $H0 --volfile-id $META_VOL $META_MNT
+
+##Mount master
+TEST glusterfs -s $H0 --volfile-id $GMV0 $M0
+
+##Mount slave
+TEST glusterfs -s $H0 --volfile-id $GSV0 $M1
+
+############################################################
+#BASIC GEO-REPLICATION TESTS
+############################################################
+
+#Check Hybrid Crawl
+TEST create_data "hybrid"
+TEST create_georep_session $master $slave
+EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_status_num_rows "Created"
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave config gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave config slave-gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Enable_metavolume
+TEST $GEOREP_CLI $master $slave config use_meta_volume true
+
+#Set changelog roll-over time to 3 secs
+TEST $CLI volume set $GMV0 changelog.rollover-time 3
+
+#Config tarssh as sync-engine
+TEST $GEOREP_CLI $master $slave config sync-method tarssh
+
+#Wait for common secret pem file to be created
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_common_secret_file
+
+#Verify the keys are distributed
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_keys_distributed
+
+#Verify "features.read-only" Option
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_slave_read_only $GSV0
+
+#Start_georep
+TEST $GEOREP_CLI $master $slave start
+
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Passive"
+
+#data_tests "hybrid"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 regular_file_ok ${slave_mnt}/hybrid_f1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/hybrid_d1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_file_ok ${slave_mnt}/hybrid_f3 ${slave_mnt}/hybrid_f4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_dir_ok ${slave_mnt}/hybrid_d3 ${slave_mnt}/hybrid_d4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 symlink_ok hybrid_f1 ${slave_mnt}/hybrid_sl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_file_ok ${slave_mnt}/hybrid_f1 ${slave_mnt}/hybrid_hl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/hybrid_f2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/hybrid_d2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 data_ok ${slave_mnt}/hybrid_f1 "HelloWorld!"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/hybrid_chown_f1
+
+#Check History Crawl.
+TEST $GEOREP_CLI $master $slave stop
+TEST create_data "history"
+TEST $GEOREP_CLI $master $slave start
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Passive"
+
+#data_tests "history"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 regular_file_ok ${slave_mnt}/history_f1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/history_d1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_file_ok ${slave_mnt}/history_f3 ${slave_mnt}/history_f4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_dir_ok ${slave_mnt}/history_d3 ${slave_mnt}/history_d4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 symlink_ok history_f1 ${slave_mnt}/history_sl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_file_ok ${slave_mnt}/history_f1 ${slave_mnt}/history_hl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/history_f2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/history_d2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 data_ok ${slave_mnt}/history_f1 "HelloWorld!"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/history_chown_f1
+
+#Check Changelog Crawl.
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Changelog Crawl"
+TEST create_data "changelog"
+
+# logrotate test
+logrotate_file=${master_mnt}/logrotate/lg_test_file
+TEST mkdir -p ${master_mnt}/logrotate
+logrotate_simulate $logrotate_file 2
+logrotate_simulate $logrotate_file 2
+logrotate_simulate $logrotate_file 2
+logrotate_simulate $logrotate_file 2
+
+# CREATE + RENAME
+create_rename ${master_mnt}/rename_test_file
+
+# hard-link rename
+hardlink_rename ${master_mnt}/hardlink_rename_test_file
+
+#SYNC CHECK
+#data_tests "changelog"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 regular_file_ok ${slave_mnt}/changelog_f1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/changelog_d1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_file_ok ${slave_mnt}/changelog_f3 ${slave_mnt}/changelog_f4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_dir_ok ${slave_mnt}/changelog_d3 ${slave_mnt}/changelog_d4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 symlink_ok changelog_f1 ${slave_mnt}/changelog_sl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_file_ok ${slave_mnt}/changelog_f1 ${slave_mnt}/changelog_hl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/changelog_f2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/changelog_d2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 data_ok ${slave_mnt}/changelog_f1 "HelloWorld!"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/changelog_chown_f1
+
+#logrotate
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/logrotate
+EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt}/logrotate ${slave_mnt}/logrotate
+
+#CREATE+RENAME
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 create_rename_ok ${slave_mnt}/create_rename_test_file
+
+#hardlink rename
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_rename_ok ${slave_mnt}/hardlink_rename_test_file
+
+#Stop Geo-rep
+TEST $GEOREP_CLI $master $slave stop
+
+#Symlink testcase: Rename symlink and create dir with same name
+TEST create_symlink_rename_mkdir_data
+
+#hardlink-rename-unlink usecase. Sonatype Nexus3 Usecase. BUG:1512483
+TEST create_hardlink_rename_data
+
+#rsnapshot usecase
+TEST create_rsnapshot_data
+
+#Start Geo-rep
+TEST $GEOREP_CLI $master $slave start
+
+#Wait for geo-rep to come up
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Passive"
+
+#Check for hardlink rename case. BUG: 1296174
+#It should not create src file again on changelog reprocessing
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_rename_ok ${slave_mnt}/hardlink_rename_test_file
+
+#Symlink testcase: Rename symlink and create dir with same name
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_symlink_rename_mkdir_data ${slave_mnt}/symlink_test1
+
+#hardlink-rename-unlink usecase. Sonatype Nexus3 Usecase. BUG:1512483
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_hardlink_rename_data ${slave_mnt}
+
+#rsnapshot usecase
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rsnapshot_data ${slave_mnt}
+
+#rename with existing destination case BUG:1694820
+TEST create_rename_with_existing_destination ${master_mnt}
+#verify rename with existing destination case BUG:1694820
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rename_with_existing_destination ${slave_mnt}
+
+#Verify arequal for whole volume
+EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt} ${slave_mnt}
+
+#Stop Geo-rep
+TEST $GEOREP_CLI $master $slave stop
+
+#Delete Geo-rep
+TEST $GEOREP_CLI $master $slave delete
+
+#Cleanup are-equal binary
+TEST rm $AREQUAL_PATH/arequal-checksum
+
+#Cleanup authorized keys
+sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' ~/.ssh/authorized_keys
+sed -i '/^command=.*gsyncd.*/d' ~/.ssh/authorized_keys
+
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
diff --git a/tests/00-geo-rep/georep-basic-rsync-ec.t b/tests/00-geo-rep/georep-basic-rsync-ec.t
new file mode 100644
index 00000000000..dd1f94edbc9
--- /dev/null
+++ b/tests/00-geo-rep/georep-basic-rsync-ec.t
@@ -0,0 +1,224 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../geo-rep.rc
+. $(dirname $0)/../env.rc
+
+SCRIPT_TIMEOUT=500
+
+AREQUAL_PATH=$(dirname $0)/../utils
+test "`uname -s`" != "Linux" && {
+ CFLAGS="$CFLAGS -lintl";
+}
+build_tester $AREQUAL_PATH/arequal-checksum.c $CFLAGS
+
+### Basic Tests with Distributed Disperse volumes
+
+##Cleanup and start glusterd
+cleanup;
+TEST glusterd;
+TEST pidof glusterd
+
+
+##Variables
+GEOREP_CLI="$CLI volume geo-replication"
+master=$GMV0
+SH0="127.0.0.1"
+slave=${SH0}::${GSV0}
+num_active=2
+num_passive=10
+master_mnt=$M0
+slave_mnt=$M1
+
+############################################################
+#SETUP VOLUMES AND GEO-REPLICATION
+############################################################
+
+##create_and_start_master_volume
+TEST $CLI volume create $GMV0 disperse 3 redundancy 1 $H0:$B0/${GMV0}{0..5};
+TEST $CLI volume start $GMV0
+
+##create_and_start_slave_volume
+TEST $CLI volume create $GSV0 disperse 3 redundancy 1 $H0:$B0/${GSV0}{0..5};
+TEST $CLI volume start $GSV0
+
+##Create, start and mount meta_volume
+TEST $CLI volume create $META_VOL replica 3 $H0:$B0/${META_VOL}{1,2,3};
+TEST $CLI volume start $META_VOL
+TEST mkdir -p $META_MNT
+TEST glusterfs -s $H0 --volfile-id $META_VOL $META_MNT
+
+##Mount master
+TEST glusterfs -s $H0 --volfile-id $GMV0 $M0
+
+##Mount slave
+TEST glusterfs -s $H0 --volfile-id $GSV0 $M1
+
+############################################################
+#BASIC GEO-REPLICATION TESTS
+############################################################
+
+#Check Hybrid Crawl
+TEST create_data "hybrid"
+TEST create_georep_session $master $slave
+EXPECT_WITHIN $GEO_REP_TIMEOUT 6 check_status_num_rows "Created"
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave config gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave config slave-gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Enable_metavolume
+TEST $GEOREP_CLI $master $slave config use_meta_volume true
+
+#Set changelog roll-over time to 3 secs
+TEST $CLI volume set $GMV0 changelog.rollover-time 3
+
+#Wait for common secret pem file to be created
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_common_secret_file
+
+#Verify the keys are distributed
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_keys_distributed
+
+#Verify "features.read-only" Option
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_slave_read_only $GSV0
+
+#Start_georep
+TEST $GEOREP_CLI $master $slave start
+
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_status_num_rows "Passive"
+
+#data_tests "hybrid"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 regular_file_ok ${slave_mnt}/hybrid_f1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/hybrid_d1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_file_ok ${slave_mnt}/hybrid_f3 ${slave_mnt}/hybrid_f4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_dir_ok ${slave_mnt}/hybrid_d3 ${slave_mnt}/hybrid_d4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 symlink_ok hybrid_f1 ${slave_mnt}/hybrid_sl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_file_ok ${slave_mnt}/hybrid_f1 ${slave_mnt}/hybrid_hl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/hybrid_f2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/hybrid_d2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 data_ok ${slave_mnt}/hybrid_f1 "HelloWorld!"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/hybrid_chown_f1
+
+#Check History Crawl.
+TEST $GEOREP_CLI $master $slave stop
+TEST create_data "history"
+TEST $GEOREP_CLI $master $slave start
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_status_num_rows "Passive"
+
+#data_tests "history"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 regular_file_ok ${slave_mnt}/history_f1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/history_d1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_file_ok ${slave_mnt}/history_f3 ${slave_mnt}/history_f4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_dir_ok ${slave_mnt}/history_d3 ${slave_mnt}/history_d4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 symlink_ok history_f1 ${slave_mnt}/history_sl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_file_ok ${slave_mnt}/history_f1 ${slave_mnt}/history_hl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/history_f2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/history_d2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 data_ok ${slave_mnt}/history_f1 "HelloWorld!"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/history_chown_f1
+
+#Check Changelog Crawl.
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Changelog Crawl"
+TEST create_data "changelog"
+
+# logrotate test
+logrotate_file=${master_mnt}/logrotate/lg_test_file
+TEST mkdir -p ${master_mnt}/logrotate
+logrotate_simulate $logrotate_file 2
+logrotate_simulate $logrotate_file 2
+logrotate_simulate $logrotate_file 2
+logrotate_simulate $logrotate_file 2
+
+# CREATE + RENAME
+create_rename ${master_mnt}/rename_test_file
+
+# hard-link rename
+hardlink_rename ${master_mnt}/hardlink_rename_test_file
+
+#SYNC CHECK
+#data_tests "changelog"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 regular_file_ok ${slave_mnt}/changelog_f1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/changelog_d1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_file_ok ${slave_mnt}/changelog_f3 ${slave_mnt}/changelog_f4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_dir_ok ${slave_mnt}/changelog_d3 ${slave_mnt}/changelog_d4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 symlink_ok changelog_f1 ${slave_mnt}/changelog_sl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_file_ok ${slave_mnt}/changelog_f1 ${slave_mnt}/changelog_hl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/changelog_f2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/changelog_d2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 data_ok ${slave_mnt}/changelog_f1 "HelloWorld!"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/changelog_chown_f1
+
+#logrotate
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/logrotate
+EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt}/logrotate ${slave_mnt}/logrotate
+
+#CREATE+RENAME
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 create_rename_ok ${slave_mnt}/create_rename_test_file
+
+#hardlink rename
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_rename_ok ${slave_mnt}/hardlink_rename_test_file
+
+#Stop Geo-rep
+TEST $GEOREP_CLI $master $slave stop
+
+#Symlink testcase: Rename symlink and create dir with same name
+TEST create_symlink_rename_mkdir_data
+
+#hardlink-rename-unlink usecase. Sonatype Nexus3 Usecase. BUG:1512483
+TEST create_hardlink_rename_data
+
+#rsnapshot usecase
+#TEST create_rsnapshot_data
+
+#Start Geo-rep
+TEST $GEOREP_CLI $master $slave start
+
+#Wait for geo-rep to come up
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_status_num_rows "Passive"
+
+#Check for hardlink rename case. BUG: 1296174
+#It should not create src file again on changelog reprocessing
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_rename_ok ${slave_mnt}/hardlink_rename_test_file
+
+#Symlink testcase: Rename symlink and create dir with same name
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_symlink_rename_mkdir_data ${slave_mnt}/symlink_test1
+
+#hardlink-rename-unlink usecase. Sonatype Nexus3 Usecase. BUG:1512483
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_hardlink_rename_data ${slave_mnt}
+
+#rsnapshot usecase
+#EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rsnapshot_data ${slave_mnt}
+
+#Test rsync-options set BUG:1629561
+TEST gluster volume geo-rep $master $slave config rsync-options "--whole-file"
+TEST "echo sampledata > $master_mnt/rsync_option_test_file"
+
+#rename with existing destination case BUG:1694820
+#TEST create_rename_with_existing_destination ${master_mnt}
+#verify rename with existing destination case BUG:1694820
+#EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rename_with_existing_destination ${slave_mnt}
+
+#Verify arequal for whole volume
+EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt} ${slave_mnt}
+
+#Stop Geo-rep
+TEST $GEOREP_CLI $master $slave stop
+
+#Delete Geo-rep
+TEST $GEOREP_CLI $master $slave delete
+
+#Cleanup are-equal binary
+TEST rm $AREQUAL_PATH/arequal-checksum
+
+#Cleanup authorized keys
+sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' ~/.ssh/authorized_keys
+sed -i '/^command=.*gsyncd.*/d' ~/.ssh/authorized_keys
+
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
diff --git a/tests/00-geo-rep/georep-basic-tarssh-ec.t b/tests/00-geo-rep/georep-basic-tarssh-ec.t
new file mode 100644
index 00000000000..987bd9391c8
--- /dev/null
+++ b/tests/00-geo-rep/georep-basic-tarssh-ec.t
@@ -0,0 +1,223 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../geo-rep.rc
+. $(dirname $0)/../env.rc
+
+SCRIPT_TIMEOUT=500
+
+AREQUAL_PATH=$(dirname $0)/../utils
+test "`uname -s`" != "Linux" && {
+ CFLAGS="$CFLAGS -lintl";
+}
+build_tester $AREQUAL_PATH/arequal-checksum.c $CFLAGS
+
+### Basic Tests with Distributed Disperse volumes
+
+##Cleanup and start glusterd
+cleanup;
+TEST glusterd;
+TEST pidof glusterd
+
+
+##Variables
+GEOREP_CLI="$CLI volume geo-replication"
+master=$GMV0
+SH0="127.0.0.1"
+slave=${SH0}::${GSV0}
+num_active=2
+num_passive=10
+master_mnt=$M0
+slave_mnt=$M1
+
+############################################################
+#SETUP VOLUMES AND GEO-REPLICATION
+############################################################
+
+##create_and_start_master_volume
+TEST $CLI volume create $GMV0 disperse 3 redundancy 1 $H0:$B0/${GMV0}{0..5};
+TEST $CLI volume start $GMV0
+
+##create_and_start_slave_volume
+TEST $CLI volume create $GSV0 disperse 3 redundancy 1 $H0:$B0/${GSV0}{0..5};
+TEST $CLI volume start $GSV0
+
+##Create, start and mount meta_volume
+TEST $CLI volume create $META_VOL replica 3 $H0:$B0/${META_VOL}{1,2,3};
+TEST $CLI volume start $META_VOL
+TEST mkdir -p $META_MNT
+TEST glusterfs -s $H0 --volfile-id $META_VOL $META_MNT
+
+##Mount master
+TEST glusterfs -s $H0 --volfile-id $GMV0 $M0
+
+##Mount slave
+TEST glusterfs -s $H0 --volfile-id $GSV0 $M1
+
+############################################################
+#BASIC GEO-REPLICATION TESTS
+############################################################
+
+#Check Hybrid Crawl
+TEST create_data "hybrid"
+TEST create_georep_session $master $slave
+EXPECT_WITHIN $GEO_REP_TIMEOUT 6 check_status_num_rows "Created"
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave config gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave config slave-gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Enable_metavolume
+TEST $GEOREP_CLI $master $slave config use_meta_volume true
+
+#Set changelog roll-over time to 3 secs
+TEST $CLI volume set $GMV0 changelog.rollover-time 3
+
+#Config tarssh as sync-engine
+TEST $GEOREP_CLI $master $slave config sync-method tarssh
+
+#Wait for common secret pem file to be created
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_common_secret_file
+
+#Verify the keys are distributed
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_keys_distributed
+
+#Verify "features.read-only" Option
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_slave_read_only $GSV0
+
+#Start_georep
+TEST $GEOREP_CLI $master $slave start
+
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_status_num_rows "Passive"
+
+#data_tests "hybrid"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 regular_file_ok ${slave_mnt}/hybrid_f1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/hybrid_d1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_file_ok ${slave_mnt}/hybrid_f3 ${slave_mnt}/hybrid_f4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_dir_ok ${slave_mnt}/hybrid_d3 ${slave_mnt}/hybrid_d4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 symlink_ok hybrid_f1 ${slave_mnt}/hybrid_sl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_file_ok ${slave_mnt}/hybrid_f1 ${slave_mnt}/hybrid_hl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/hybrid_f2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/hybrid_d2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 data_ok ${slave_mnt}/hybrid_f1 "HelloWorld!"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/hybrid_chown_f1
+
+#Check History Crawl.
+TEST $GEOREP_CLI $master $slave stop
+TEST create_data "history"
+TEST $GEOREP_CLI $master $slave start
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_status_num_rows "Passive"
+
+#data_tests "history"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 regular_file_ok ${slave_mnt}/history_f1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/history_d1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_file_ok ${slave_mnt}/history_f3 ${slave_mnt}/history_f4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_dir_ok ${slave_mnt}/history_d3 ${slave_mnt}/history_d4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 symlink_ok history_f1 ${slave_mnt}/history_sl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_file_ok ${slave_mnt}/history_f1 ${slave_mnt}/history_hl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/history_f2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/history_d2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 data_ok ${slave_mnt}/history_f1 "HelloWorld!"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/history_chown_f1
+
+#Check Changelog Crawl.
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Changelog Crawl"
+TEST create_data "changelog"
+
+# logrotate test
+logrotate_file=${master_mnt}/logrotate/lg_test_file
+TEST mkdir -p ${master_mnt}/logrotate
+logrotate_simulate $logrotate_file 2
+logrotate_simulate $logrotate_file 2
+logrotate_simulate $logrotate_file 2
+logrotate_simulate $logrotate_file 2
+
+# CREATE + RENAME
+create_rename ${master_mnt}/rename_test_file
+
+# hard-link rename
+hardlink_rename ${master_mnt}/hardlink_rename_test_file
+
+#SYNC CHECK
+#data_tests "changelog"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 regular_file_ok ${slave_mnt}/changelog_f1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/changelog_d1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_file_ok ${slave_mnt}/changelog_f3 ${slave_mnt}/changelog_f4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_dir_ok ${slave_mnt}/changelog_d3 ${slave_mnt}/changelog_d4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 symlink_ok changelog_f1 ${slave_mnt}/changelog_sl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_file_ok ${slave_mnt}/changelog_f1 ${slave_mnt}/changelog_hl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/changelog_f2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/changelog_d2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 data_ok ${slave_mnt}/changelog_f1 "HelloWorld!"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/changelog_chown_f1
+
+#logrotate
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/logrotate
+EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt}/logrotate ${slave_mnt}/logrotate
+
+#CREATE+RENAME
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 create_rename_ok ${slave_mnt}/create_rename_test_file
+
+#hardlink rename
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_rename_ok ${slave_mnt}/hardlink_rename_test_file
+
+#Stop Geo-rep
+TEST $GEOREP_CLI $master $slave stop
+
+#Symlink testcase: Rename symlink and create dir with same name
+TEST create_symlink_rename_mkdir_data
+
+#hardlink-rename-unlink usecase. Sonatype Nexus3 Usecase. BUG:1512483
+TEST create_hardlink_rename_data
+
+#rsnapshot usecase
+#TEST create_rsnapshot_data
+
+#Start Geo-rep
+TEST $GEOREP_CLI $master $slave start
+
+#Wait for geo-rep to come up
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_status_num_rows "Passive"
+
+#Check for hardlink rename case. BUG: 1296174
+#It should not create src file again on changelog reprocessing
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_rename_ok ${slave_mnt}/hardlink_rename_test_file
+
+#Symlink testcase: Rename symlink and create dir with same name
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_symlink_rename_mkdir_data ${slave_mnt}/symlink_test1
+
+#hardlink-rename-unlink usecase. Sonatype Nexus3 Usecase. BUG:1512483
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_hardlink_rename_data ${slave_mnt}
+
+#rsnapshot usecase
+#EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rsnapshot_data ${slave_mnt}
+
+#rename with existing destination case BUG:1694820
+#TEST create_rename_with_existing_destination ${master_mnt}
+#verify rename with existing destination case BUG:1694820
+#EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rename_with_existing_destination ${slave_mnt}
+
+#Verify arequal for whole volume
+EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt} ${slave_mnt}
+
+#Stop Geo-rep
+TEST $GEOREP_CLI $master $slave stop
+
+#Delete Geo-rep
+TEST $GEOREP_CLI $master $slave delete
+
+#Cleanup are-equal binary
+TEST rm $AREQUAL_PATH/arequal-checksum
+
+#Cleanup authorized keys
+sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' ~/.ssh/authorized_keys
+sed -i '/^command=.*gsyncd.*/d' ~/.ssh/authorized_keys
+
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
diff --git a/tests/00-geo-rep/georep-config-upgrade.t b/tests/00-geo-rep/georep-config-upgrade.t
new file mode 100644
index 00000000000..557461cd9c4
--- /dev/null
+++ b/tests/00-geo-rep/georep-config-upgrade.t
@@ -0,0 +1,132 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../geo-rep.rc
+. $(dirname $0)/../env.rc
+
+SCRIPT_TIMEOUT=300
+OLD_CONFIG_PATH=$(dirname $0)/gsyncd.conf.old
+WORKING_DIR=/var/lib/glusterd/geo-replication/master_127.0.0.1_slave
+
+##Cleanup and start glusterd
+cleanup;
+TEST glusterd;
+TEST pidof glusterd
+
+##Variables
+GEOREP_CLI="$CLI volume geo-replication"
+master=$GMV0
+SH0="127.0.0.1"
+slave=${SH0}::${GSV0}
+num_active=2
+num_passive=2
+master_mnt=$M0
+slave_mnt=$M1
+
+############################################################
+#SETUP VOLUMES AND GEO-REPLICATION
+############################################################
+
+##create_and_start_master_volume
+TEST $CLI volume create $GMV0 replica 2 $H0:$B0/${GMV0}{1,2,3,4};
+TEST $CLI volume start $GMV0
+
+##create_and_start_slave_volume
+TEST $CLI volume create $GSV0 replica 2 $H0:$B0/${GSV0}{1,2,3,4};
+TEST $CLI volume start $GSV0
+
+##Create, start and mount meta_volume
+TEST $CLI volume create $META_VOL replica 3 $H0:$B0/${META_VOL}{1,2,3};
+TEST $CLI volume start $META_VOL
+TEST mkdir -p $META_MNT
+TEST glusterfs -s $H0 --volfile-id $META_VOL $META_MNT
+
+##Mount master
+TEST glusterfs -s $H0 --volfile-id $GMV0 $M0
+
+##Mount slave
+TEST glusterfs -s $H0 --volfile-id $GSV0 $M1
+
+############################################################
+#BASIC GEO-REPLICATION TESTS
+############################################################
+
+#Create geo-rep session
+TEST create_georep_session $master $slave
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave config gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave config slave-gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Enable_metavolume
+TEST $GEOREP_CLI $master $slave config use_meta_volume true
+
+#Wait for common secret pem file to be created
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_common_secret_file
+
+#Verify the keys are distributed
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_keys_distributed
+
+#Start_georep
+TEST $GEOREP_CLI $master $slave start
+
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Passive"
+
+TEST $GEOREP_CLI $master $slave config sync-method tarssh
+
+#Stop Geo-rep
+TEST $GEOREP_CLI $master $slave stop
+
+#Copy old config file
+mv -f $WORKING_DIR/gsyncd.conf $WORKING_DIR/gsyncd.conf.org
+cp -p $OLD_CONFIG_PATH $WORKING_DIR/gsyncd.conf
+
+#Check if config get all updates config_file
+TEST ! grep "sync-method" $WORKING_DIR/gsyncd.conf
+TEST $GEOREP_CLI $master $slave config
+TEST grep "sync-method" $WORKING_DIR/gsyncd.conf
+
+#Check if config get updates config_file
+rm -f $WORKING_DIR/gsyncd.conf
+cp -p $OLD_CONFIG_PATH $WORKING_DIR/gsyncd.conf
+TEST ! grep "sync-method" $WORKING_DIR/gsyncd.conf
+TEST $GEOREP_CLI $master $slave config sync-method
+TEST grep "sync-method" $WORKING_DIR/gsyncd.conf
+
+#Check if config set updates config_file
+rm -f $WORKING_DIR/gsyncd.conf
+cp -p $OLD_CONFIG_PATH $WORKING_DIR/gsyncd.conf
+TEST ! grep "sync-method" $WORKING_DIR/gsyncd.conf
+TEST $GEOREP_CLI $master $slave config sync-xattrs false
+TEST grep "sync-method" $WORKING_DIR/gsyncd.conf
+
+#Check if config reset updates config_file
+rm -f $WORKING_DIR/gsyncd.conf
+cp -p $OLD_CONFIG_PATH $WORKING_DIR/gsyncd.conf
+TEST ! grep "sync-method" $WORKING_DIR/gsyncd.conf
+TEST $GEOREP_CLI $master $slave config \!sync-xattrs
+TEST grep "sync-method" $WORKING_DIR/gsyncd.conf
+
+#Check if geo-rep start updates config_file
+rm -f $WORKING_DIR/gsyncd.conf
+cp -p $OLD_CONFIG_PATH $WORKING_DIR/gsyncd.conf
+TEST ! grep "sync-method" $WORKING_DIR/gsyncd.conf
+TEST $GEOREP_CLI $master $slave start
+TEST grep "sync-method" $WORKING_DIR/gsyncd.conf
+
+#Stop geo-rep
+TEST $GEOREP_CLI $master $slave stop
+
+#Delete Geo-rep
+TEST $GEOREP_CLI $master $slave delete
+
+#Cleanup authorized keys
+sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' ~/.ssh/authorized_keys
+sed -i '/^command=.*gsyncd.*/d' ~/.ssh/authorized_keys
+
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
diff --git a/tests/00-geo-rep/georep-stderr-hang.t b/tests/00-geo-rep/georep-stderr-hang.t
new file mode 100644
index 00000000000..496f0e6577d
--- /dev/null
+++ b/tests/00-geo-rep/georep-stderr-hang.t
@@ -0,0 +1,128 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../geo-rep.rc
+. $(dirname $0)/../env.rc
+
+SCRIPT_TIMEOUT=500
+
+AREQUAL_PATH=$(dirname $0)/../utils
+test "`uname -s`" != "Linux" && {
+ CFLAGS="$CFLAGS -lintl";
+}
+build_tester $AREQUAL_PATH/arequal-checksum.c $CFLAGS
+
+### Basic Tests with Distribute Replicate volumes
+
+##Cleanup and start glusterd
+cleanup;
+TEST glusterd;
+TEST pidof glusterd
+
+
+##Variables
+GEOREP_CLI="$CLI volume geo-replication"
+master=$GMV0
+SH0="127.0.0.1"
+slave=${SH0}::${GSV0}
+num_active=2
+num_passive=2
+master_mnt=$M0
+slave_mnt=$M1
+
+############################################################
+#SETUP VOLUMES AND GEO-REPLICATION
+############################################################
+
+##create_and_start_master_volume
+TEST $CLI volume create $GMV0 $H0:$B0/${GMV0}1;
+TEST $CLI volume start $GMV0
+
+##create_and_start_slave_volume
+TEST $CLI volume create $GSV0 $H0:$B0/${GSV0}1;
+TEST $CLI volume start $GSV0
+TEST $CLI volume set $GSV0 performance.stat-prefetch off
+TEST $CLI volume set $GSV0 performance.quick-read off
+TEST $CLI volume set $GSV0 performance.readdir-ahead off
+TEST $CLI volume set $GSV0 performance.read-ahead off
+
+##Mount master
+TEST glusterfs -s $H0 --volfile-id $GMV0 $M0
+
+##Mount slave
+TEST glusterfs -s $H0 --volfile-id $GSV0 $M1
+
+############################################################
+#BASIC GEO-REPLICATION TESTS
+############################################################
+
+TEST create_georep_session $master $slave
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 check_status_num_rows "Created"
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave config gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave config slave-gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Set changelog roll-over time to 45 secs
+TEST $CLI volume set $GMV0 changelog.rollover-time 45
+
+#Wait for common secret pem file to be created
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_common_secret_file
+
+#Verify the keys are distributed
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_keys_distributed
+
+#Set sync-jobs to 1
+TEST $GEOREP_CLI $master $slave config sync-jobs 1
+
+#Start_georep
+TEST $GEOREP_CLI $master $slave start
+
+touch $M0
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 check_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 check_status_num_rows "Changelog Crawl"
+
+#Check History Crawl.
+TEST $GEOREP_CLI $master $slave stop
+TEST create_data_hang "rsync_hang"
+TEST create_data "history_rsync"
+TEST $GEOREP_CLI $master $slave start
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 check_status_num_rows "Active"
+
+#Verify arequal for whole volume
+EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt} ${slave_mnt}
+
+#Stop Geo-rep
+TEST $GEOREP_CLI $master $slave stop
+
+#Config tarssh as sync-engine
+TEST $GEOREP_CLI $master $slave config sync-method tarssh
+
+#Create tarssh hang data
+TEST create_data_hang "tarssh_hang"
+TEST create_data "history_tar"
+
+TEST $GEOREP_CLI $master $slave start
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 check_status_num_rows "Active"
+
+#Verify arequal for whole volume
+EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt} ${slave_mnt}
+
+#Stop Geo-rep
+TEST $GEOREP_CLI $master $slave stop
+
+#Delete Geo-rep
+TEST $GEOREP_CLI $master $slave delete
+
+#Cleanup are-equal binary
+TEST rm $AREQUAL_PATH/arequal-checksum
+
+#Cleanup authorized keys
+sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' ~/.ssh/authorized_keys
+sed -i '/^command=.*gsyncd.*/d' ~/.ssh/authorized_keys
+
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
diff --git a/tests/00-geo-rep/georep-upgrade.t b/tests/00-geo-rep/georep-upgrade.t
new file mode 100644
index 00000000000..7523068ed50
--- /dev/null
+++ b/tests/00-geo-rep/georep-upgrade.t
@@ -0,0 +1,79 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+SCRIPT_TIMEOUT=500
+
+###############################################################################################
+#Before upgrade
+###############################################################################################
+brick=/bricks/brick1
+epoch1=$(date '+%s')
+sleep 1
+epoch2=$(date '+%s')
+mkdir -p /bricks/brick1/.glusterfs/changelogs/htime
+mkdir -p /bricks/brick1/.glusterfs/changelogs
+
+#multiple htime files(changelog enable/disable scenario)
+TEST touch /bricks/brick1/.glusterfs/changelogs/htime/HTIME.$epoch1
+TEST touch /bricks/brick1/.glusterfs/changelogs/htime/HTIME.$epoch2
+
+#changelog files
+TEST touch /bricks/brick1/.glusterfs/changelogs/CHANGELOG.$epoch1
+TEST touch /bricks/brick1/.glusterfs/changelogs/CHANGELOG.$epoch2
+
+htime_file1=/bricks/brick1/.glusterfs/changelogs/htime/HTIME.$epoch1
+htime_file2=/bricks/brick1/.glusterfs/changelogs/htime/HTIME.$epoch2
+
+#data inside htime files before upgrade
+data1=/bricks/brick1/.glusterfs/changelogs/CHANGELOG.$epoch1
+data2=/bricks/brick1/.glusterfs/changelogs/CHANGELOG.$epoch2
+
+#data inside htime files after upgrade
+updated_data1=/bricks/brick1/.glusterfs/changelogs/`echo $(date '+%Y/%m/%d')`/CHANGELOG.$epoch1
+updated_data2=/bricks/brick1/.glusterfs/changelogs/`echo $(date '+%Y/%m/%d')`/CHANGELOG.$epoch2
+
+echo -n $data1>$htime_file1
+echo -n $data2>$htime_file2
+
+echo "Before upgrade:"
+EXPECT '1' echo $(grep $data1 $htime_file1 | wc -l)
+EXPECT '1' echo $(grep $data2 $htime_file2 | wc -l)
+
+EXPECT '1' echo $(ls /bricks/brick1/.glusterfs/changelogs/CHANGELOG.$epoch1 | wc -l)
+EXPECT '1' echo $(ls /bricks/brick1/.glusterfs/changelogs/htime/HTIME.$epoch1 | wc -l)
+EXPECT '1' echo $(ls /bricks/brick1/.glusterfs/changelogs/CHANGELOG.$epoch2 | wc -l)
+EXPECT '1' echo $(ls /bricks/brick1/.glusterfs/changelogs/htime/HTIME.$epoch2 | wc -l)
+###############################################################################################
+#Upgrade
+###############################################################################################
+### This needed to be fixed as this very vague finding a file with name in '/'
+### multiple file with same name can exist
+### for temp fix picking only 1st result
+TEST upgrade_script=$(find / -type f -name glusterfs-georep-upgrade.py -print | head -n 1)
+TEST python3 $upgrade_script $brick
+
+###############################################################################################
+#After upgrade
+###############################################################################################
+echo "After upgrade:"
+EXPECT '1' echo $(grep $updated_data1 $htime_file1 | wc -l)
+EXPECT '1' echo $(grep $updated_data2 $htime_file2 | wc -l)
+
+#Check directory structure inside changelogs
+TEST ! ls /bricks/brick1/.glusterfs/changelogs/CHANGELOG.$epoch1
+EXPECT '1' echo $(ls /bricks/brick1/.glusterfs/changelogs/htime/HTIME.$epoch1 | wc -l)
+EXPECT '1' echo $(ls /bricks/brick1/.glusterfs/changelogs/htime/HTIME.$epoch1.bak | wc -l)
+EXPECT '1' echo $(ls /bricks/brick1/.glusterfs/changelogs/`echo $(date '+%Y')` | wc -l)
+EXPECT '1' echo $(ls /bricks/brick1/.glusterfs/changelogs/`echo $(date '+%Y/%m')` | wc -l)
+EXPECT '2' echo $(ls /bricks/brick1/.glusterfs/changelogs/`echo $(date '+%Y/%m/%d')` | wc -l)
+EXPECT '1' echo $(ls /bricks/brick1/.glusterfs/changelogs/`echo $(date '+%Y/%m/%d')`/CHANGELOG.$epoch1 | wc -l)
+
+TEST ! ls /bricks/brick1/.glusterfs/changelogs/CHANGELOG.$epoch2
+EXPECT '1' echo $(ls /bricks/brick1/.glusterfs/changelogs/htime/HTIME.$epoch2 | wc -l)
+EXPECT '1' echo $(ls /bricks/brick1/.glusterfs/changelogs/htime/HTIME.$epoch2.bak | wc -l)
+EXPECT '1' echo $(ls /bricks/brick1/.glusterfs/changelogs/`echo $(date '+%Y')` | wc -l)
+EXPECT '1' echo $(ls /bricks/brick1/.glusterfs/changelogs/`echo $(date '+%Y/%m')`| wc -l)
+EXPECT '1' echo $(ls /bricks/brick1/.glusterfs/changelogs/`echo $(date '+%Y/%m/%d')`/CHANGELOG.$epoch2 | wc -l)
+
+TEST rm -rf /bricks
diff --git a/tests/00-geo-rep/gsyncd.conf.old b/tests/00-geo-rep/gsyncd.conf.old
new file mode 100644
index 00000000000..519acaf8f3e
--- /dev/null
+++ b/tests/00-geo-rep/gsyncd.conf.old
@@ -0,0 +1,47 @@
+[__meta__]
+version = 2.0
+
+[peersrx . .]
+remote_gsyncd = /usr/local/libexec/glusterfs/gsyncd
+georep_session_working_dir = /var/lib/glusterd/geo-replication/${mastervol}_${remotehost}_${slavevol}/
+ssh_command_tar = ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i /var/lib/glusterd/geo-replication/tar_ssh.pem
+changelog_log_file = /var/log/glusterfs/geo-replication/${mastervol}/${eSlave}${local_id}-changes.log
+working_dir = /var/lib/misc/glusterfsd/${mastervol}/${eSlave}
+ignore_deletes = false
+pid_file = /var/lib/glusterd/geo-replication/${mastervol}_${remotehost}_${slavevol}/monitor.pid
+state_file = /var/lib/glusterd/geo-replication/${mastervol}_${remotehost}_${slavevol}/monitor.status
+gluster_command_dir = /usr/local/sbin/
+gluster_params = aux-gfid-mount acl
+ssh_command = ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i /var/lib/glusterd/geo-replication/secret.pem
+state_detail_file = /var/lib/glusterd/geo-replication/${mastervol}_${remotehost}_${slavevol}/${eSlave}-detail.status
+state_socket_unencoded = /var/lib/glusterd/geo-replication/${mastervol}_${remotehost}_${slavevol}/${eSlave}.socket
+socketdir = /var/run/gluster
+log_file = /var/log/glusterfs/geo-replication/${mastervol}/${eSlave}.log
+gluster_log_file = /var/log/glusterfs/geo-replication/${mastervol}/${eSlave}${local_id}.gluster.log
+special_sync_mode = partial
+change_detector = changelog
+pid-file = /var/lib/glusterd/geo-replication/${mastervol}_${remotehost}_${slavevol}/monitor.pid
+state-file = /var/lib/glusterd/geo-replication/${mastervol}_${remotehost}_${slavevol}/monitor.status
+
+[__section_order__]
+peersrx . . = 0
+peersrx . %5essh%3a = 2
+peersrx . = 3
+peers master slave = 4
+
+[peersrx . %5Essh%3A]
+remote_gsyncd = /nonexistent/gsyncd
+
+[peersrx .]
+gluster_command_dir = /usr/local/sbin/
+gluster_params = aux-gfid-mount acl
+log_file = /var/log/glusterfs/geo-replication-slaves/${session_owner}:${local_node}${local_id}.${slavevol}.log
+log_file_mbr = /var/log/glusterfs/geo-replication-slaves/mbr/${session_owner}:${local_node}${local_id}.${slavevol}.log
+gluster_log_file = /var/log/glusterfs/geo-replication-slaves/${session_owner}:${local_node}${local_id}.${slavevol}.gluster.log
+
+[peers master slave]
+session_owner = 0732cbd1-3ec5-4920-ab0d-aa5a896d5214
+master.stime_xattr_name = trusted.glusterfs.0732cbd1-3ec5-4920-ab0d-aa5a896d5214.07a9005c-ace4-4f67-b3c0-73938fb236c4.stime
+volume_id = 0732cbd1-3ec5-4920-ab0d-aa5a896d5214
+use_tarssh = true
+
diff --git a/tests/000-flaky/basic_afr_split-brain-favorite-child-policy.t b/tests/000-flaky/basic_afr_split-brain-favorite-child-policy.t
new file mode 100644
index 00000000000..77d82a4996f
--- /dev/null
+++ b/tests/000-flaky/basic_afr_split-brain-favorite-child-policy.t
@@ -0,0 +1,203 @@
+#!/bin/bash
+
+#Test the split-brain resolution CLI commands.
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+#Create replica 2 volume
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.heal-timeout 5
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST touch $M0/file
+
+############ Healing using favorite-child-policy = ctime #################
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST dd if=/dev/urandom of=$M0/file bs=1024 count=1024
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST dd if=/dev/urandom of=$M0/file bs=1024 count=1024
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+
+#file still in split-brain
+cat $M0/file > /dev/null
+EXPECT "1" echo $?
+
+# Umount to prevent further FOPS on the file, then find the brick with latest ctime.
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+ctime1=`stat -c "%.Z" $B0/${V0}0/file`
+ctime2=`stat -c "%.Z" $B0/${V0}1/file`
+if (( $(echo "$ctime1 > $ctime2" | bc -l) )); then
+ LATEST_CTIME_MD5=$(md5sum $B0/${V0}0/file | cut -d\ -f1)
+else
+ LATEST_CTIME_MD5=$(md5sum $B0/${V0}1/file | cut -d\ -f1)
+fi
+TEST $CLI volume set $V0 cluster.favorite-child-policy ctime
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+B0_MD5=$(md5sum $B0/${V0}0/file | cut -d\ -f1)
+B1_MD5=$(md5sum $B0/${V0}1/file | cut -d\ -f1)
+TEST [ "$LATEST_CTIME_MD5" == "$B0_MD5" ]
+TEST [ "$LATEST_CTIME_MD5" == "$B1_MD5" ]
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+cat $M0/file > /dev/null
+EXPECT "0" echo $?
+
+############ Healing using favorite-child-policy = mtime #################
+TEST $CLI volume set $V0 cluster.favorite-child-policy none
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST dd if=/dev/urandom of=$M0/file bs=1024 count=1024
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST dd if=/dev/urandom of=$M0/file bs=1024 count=1024
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+
+#file still in split-brain
+cat $M0/file > /dev/null
+EXPECT "1" echo $?
+
+#We know that the second brick has latest mtime.
+LATEST_CTIME_MD5=$(md5sum $B0/${V0}1/file | cut -d\ -f1)
+TEST $CLI volume set $V0 cluster.favorite-child-policy mtime
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+cat $M0/file > /dev/null
+EXPECT "0" echo $?
+HEALED_MD5=$(md5sum $B0/${V0}0/file | cut -d\ -f1)
+TEST [ "$LATEST_CTIME_MD5" == "$HEALED_MD5" ]
+
+############ Healing using favorite-child-policy = size #################
+TEST $CLI volume set $V0 cluster.favorite-child-policy none
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST dd if=/dev/urandom of=$M0/file bs=1024 count=1024
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST dd if=/dev/urandom of=$M0/file bs=1024 count=10240
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+
+#file still in split-brain
+cat $M0/file > /dev/null
+EXPECT "1" echo $?
+
+#We know that the second brick has the bigger size file.
+BIGGER_FILE_MD5=$(md5sum $B0/${V0}1/file | cut -d\ -f1)
+TEST $CLI volume set $V0 cluster.favorite-child-policy size
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+cat $M0/file > /dev/null
+EXPECT "0" echo $?
+HEALED_MD5=$(md5sum $B0/${V0}0/file | cut -d\ -f1)
+TEST [ "$BIGGER_FILE_MD5" == "$HEALED_MD5" ]
+
+############ Healing using favorite-child-policy = majority on replica-3 #################
+
+#Convert volume to replica-3
+TEST $CLI volume add-brick $V0 replica 3 $H0:$B0/${V0}2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+TEST $CLI volume set $V0 cluster.quorum-type none
+TEST $CLI volume set $V0 cluster.favorite-child-policy none
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST dd if=/dev/urandom of=$M0/file bs=1024 count=1024
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST dd if=/dev/urandom of=$M0/file bs=1024 count=10240
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+TEST $CLI volume heal $V0
+
+#file still in split-brain
+cat $M0/file > /dev/null
+EXPECT "1" echo $?
+
+#We know that the second and third bricks agree with each other. Pick any one of them.
+MAJORITY_MD5=$(md5sum $B0/${V0}1/file | cut -d\ -f1)
+TEST $CLI volume set $V0 cluster.favorite-child-policy majority
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+cat $M0/file > /dev/null
+EXPECT "0" echo $?
+HEALED_MD5=$(md5sum $B0/${V0}0/file | cut -d\ -f1)
+TEST [ "$MAJORITY_MD5" == "$HEALED_MD5" ]
+
+TEST force_umount $M0
+cleanup
diff --git a/tests/000-flaky/basic_changelog_changelog-snapshot.t b/tests/000-flaky/basic_changelog_changelog-snapshot.t
new file mode 100644
index 00000000000..f6cd0b04d47
--- /dev/null
+++ b/tests/000-flaky/basic_changelog_changelog-snapshot.t
@@ -0,0 +1,60 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../snapshot.rc
+
+cleanup;
+ROLLOVER_TIME=3
+
+TEST verify_lvm_version;
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST setup_lvm 1
+
+TEST $CLI volume create $V0 $H0:$L1
+BRICK_LOG=$(echo "$L1" | tr / - | sed 's/^-//g')
+TEST $CLI volume start $V0
+
+#Enable changelog
+TEST $CLI volume set $V0 changelog.changelog on
+TEST $CLI volume set $V0 changelog.rollover-time $ROLLOVER_TIME
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+#Create snapshot
+S1="${V0}-snap1"
+
+mkdir $M0/RENAME
+mkdir $M0/LINK
+mkdir $M0/UNLINK
+mkdir $M0/RMDIR
+mkdir $M0/SYMLINK
+
+for i in {1..400} ; do touch $M0/RENAME/file$i; done
+for i in {1..400} ; do touch $M0/LINK/file$i; done
+for i in {1..400} ; do touch $M0/UNLINK/file$i; done
+for i in {1..400} ; do mkdir $M0/RMDIR/dir$i; done
+for i in {1..400} ; do touch $M0/SYMLINK/file$i; done
+
+#Write I/O in background
+for i in {1..400} ; do touch $M0/file$i 2>/dev/null; done &
+for i in {1..400} ; do mknod $M0/mknod-file$i p 2>/dev/null; done &
+for i in {1..400} ; do mkdir $M0/dir$i 2>/dev/null; done & 2>/dev/null
+for i in {1..400} ; do mv $M0/RENAME/file$i $M0/RENAME/rn-file$i 2>/dev/null; done &
+for i in {1..400} ; do ln $M0/LINK/file$i $M0/LINK/ln-file$i 2>/dev/null; done &
+for i in {1..400} ; do rm -f $M0/UNLINK/file$i 2>/dev/null; done &
+for i in {1..400} ; do rmdir $M0/RMDIR/dir$i 2>/dev/null; done &
+for i in {1..400} ; do ln -s $M0/SYMLINK/file$i $M0/SYMLINK/sym-file$i 2>/dev/null; done &
+
+sleep 1
+TEST $CLI snapshot create $S1 $V0 no-timestamp
+TEST snapshot_exists 0 $S1
+
+TEST grep '"Enabled changelog barrier"' /var/log/glusterfs/bricks/$BRICK_LOG.log
+TEST grep '"Disabled changelog barrier"' /var/log/glusterfs/bricks/$BRICK_LOG.log
+
+TEST glusterfs -s $H0 --volfile-id=/snaps/$S1/$V0 $M1
+
+#Clean up
+TEST $CLI volume stop $V0 force
+cleanup;
diff --git a/tests/000-flaky/basic_distribute_rebal-all-nodes-migrate.t b/tests/000-flaky/basic_distribute_rebal-all-nodes-migrate.t
new file mode 100644
index 00000000000..eb5d3305ac1
--- /dev/null
+++ b/tests/000-flaky/basic_distribute_rebal-all-nodes-migrate.t
@@ -0,0 +1,142 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../cluster.rc
+. $(dirname $0)/../dht.rc
+
+
+# Check if every single rebalance process migrated some files
+
+function cluster_rebal_all_nodes_migrated_files {
+ val=0
+ a=$($CLI_1 volume rebalance $V0 status | grep "completed" | awk '{print $2}');
+ b=($a)
+ for i in "${b[@]}"
+ do
+ if [ "$i" -eq "0" ]; then
+ echo "false";
+ val=1;
+ fi
+ done
+ echo $val
+}
+
+cleanup
+
+TEST launch_cluster 3;
+TEST $CLI_1 peer probe $H2;
+TEST $CLI_1 peer probe $H3;
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count
+
+
+#Start with a pure distribute volume (multiple bricks on the same node)
+TEST $CLI_1 volume create $V0 $H1:$B1/dist1 $H1:$B1/dist2 $H2:$B2/dist3 $H2:$B2/dist4
+
+TEST $CLI_1 volume start $V0
+$CLI_1 volume info $V0
+
+#TEST $CLI_1 volume set $V0 client-log-level DEBUG
+
+## Mount FUSE
+TEST glusterfs -s $H1 --volfile-id $V0 $M0;
+
+TEST mkdir $M0/dir1 2>/dev/null;
+TEST touch $M0/dir1/file-{1..500}
+
+## Add-brick and run rebalance to force file migration
+TEST $CLI_1 volume add-brick $V0 $H1:$B1/dist5 $H2:$B2/dist6
+
+#Start a rebalance
+TEST $CLI_1 volume rebalance $V0 start force
+
+#volume rebalance status should work
+#TEST $CLI_1 volume rebalance $V0 status
+#$CLI_1 volume rebalance $V0 status
+
+EXPECT_WITHIN $REBALANCE_TIMEOUT "0" cluster_rebalance_completed
+EXPECT "0" cluster_rebal_all_nodes_migrated_files
+$CLI_1 volume rebalance $V0 status
+
+
+TEST umount -f $M0
+TEST $CLI_1 volume stop $V0
+TEST $CLI_1 volume delete $V0
+
+
+##############################################################
+
+# Next, a dist-rep volume
+TEST $CLI_1 volume create $V0 replica 2 $H1:$B1/drep1 $H2:$B2/drep1 $H1:$B1/drep2 $H2:$B2/drep2
+
+TEST $CLI_1 volume start $V0
+$CLI_1 volume info $V0
+
+#TEST $CLI_1 volume set $V0 client-log-level DEBUG
+
+## Mount FUSE
+TEST glusterfs -s $H1 --volfile-id $V0 $M0;
+
+TEST mkdir $M0/dir1 2>/dev/null;
+TEST touch $M0/dir1/file-{1..500}
+
+## Add-brick and run rebalance to force file migration
+TEST $CLI_1 volume add-brick $V0 replica 2 $H1:$B1/drep3 $H2:$B2/drep3
+
+#Start a rebalance
+TEST $CLI_1 volume rebalance $V0 start force
+
+#volume rebalance status should work
+#TEST $CLI_1 volume rebalance $V0 status
+#$CLI_1 volume rebalance $V0 status
+
+EXPECT_WITHIN $REBALANCE_TIMEOUT "0" cluster_rebalance_completed
+#EXPECT "0" cluster_rebal_all_nodes_migrated_files
+$CLI_1 volume rebalance $V0 status
+
+
+TEST umount -f $M0
+TEST $CLI_1 volume stop $V0
+TEST $CLI_1 volume delete $V0
+
+##############################################################
+
+# Next, a disperse volume
+TEST $CLI_1 volume create $V0 disperse 3 $H1:$B1/ec1 $H2:$B1/ec2 $H3:$B1/ec3 force
+
+TEST $CLI_1 volume start $V0
+$CLI_1 volume info $V0
+
+#TEST $CLI_1 volume set $V0 client-log-level DEBUG
+
+## Mount FUSE
+TEST glusterfs -s $H1 --volfile-id $V0 $M0;
+
+TEST mkdir $M0/dir1 2>/dev/null;
+TEST touch $M0/dir1/file-{1..500}
+
+## Add-brick and run rebalance to force file migration
+TEST $CLI_1 volume add-brick $V0 $H1:$B2/ec4 $H2:$B2/ec5 $H3:$B2/ec6
+
+#Start a rebalance
+TEST $CLI_1 volume rebalance $V0 start force
+
+#volume rebalance status should work
+#TEST $CLI_1 volume rebalance $V0 status
+#$CLI_1 volume rebalance $V0 status
+
+EXPECT_WITHIN $REBALANCE_TIMEOUT "0" cluster_rebalance_completed
+
+# this will not work unless EC is changed to return all node-uuids
+# comment this out once that patch is ready
+#EXPECT "0" cluster_rebal_all_nodes_migrated_files
+$CLI_1 volume rebalance $V0 status
+
+
+TEST umount -f $M0
+TEST $CLI_1 volume stop $V0
+TEST $CLI_1 volume delete $V0
+
+##############################################################
+
+cleanup
+#G_TESTDEF_TEST_STATUS_NETBSD7=1501388
diff --git a/tests/000-flaky/basic_ec_ec-quorum-count-partial-failure.t b/tests/000-flaky/basic_ec_ec-quorum-count-partial-failure.t
new file mode 100644
index 00000000000..42808ce0c0e
--- /dev/null
+++ b/tests/000-flaky/basic_ec_ec-quorum-count-partial-failure.t
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+#This test checks that partial failure of fop results in main fop failure only
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
+TEST $CLI volume create $V1 $H0:$B0/${V1}{0..5}
+TEST $CLI volume set $V0 performance.flush-behind off
+TEST $CLI volume start $V0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=/$V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+
+TEST dd if=/dev/urandom of=$M0/a bs=12347 count=1
+TEST dd if=/dev/urandom of=$M0/b bs=12347 count=1
+TEST cp $M0/b $M0/c
+TEST fallocate -p -l 101 $M0/c
+TEST $CLI volume stop $V0
+TEST $CLI volume set $V0 debug.delay-gen posix;
+TEST $CLI volume set $V0 delay-gen.delay-duration 10000000;
+TEST $CLI volume set $V0 delay-gen.enable WRITE;
+TEST $CLI volume set $V0 delay-gen.delay-percentage 100
+TEST $CLI volume set $V0 disperse.quorum-count 6
+TEST $CLI volume start $V0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+cksum=$(dd if=$M0/a bs=12345 count=1 | md5sum | awk '{print $1}')
+truncate -s 12345 $M0/a & #While write is waiting for 5 seconds, introduce failure
+fallocate -p -l 101 $M0/b &
+sleep 1
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST wait
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count ${V0}
+EXPECT "12345" stat --format=%s $M0/a
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST kill_brick $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0;
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "4" ec_child_up_count $V0 0
+cksum_after_heal=$(dd if=$M0/a | md5sum | awk '{print $1}')
+TEST [[ $cksum == $cksum_after_heal ]]
+cksum=$(dd if=$M0/c | md5sum | awk '{print $1}')
+cksum_after_heal=$(dd if=$M0/b | md5sum | awk '{print $1}')
+TEST [[ $cksum == $cksum_after_heal ]]
+
+cleanup;
diff --git a/tests/000-flaky/basic_mount-nfs-auth.t b/tests/000-flaky/basic_mount-nfs-auth.t
new file mode 100644
index 00000000000..3d4a9cff00b
--- /dev/null
+++ b/tests/000-flaky/basic_mount-nfs-auth.t
@@ -0,0 +1,342 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+# Our mount timeout must be as long as the time for a regular configuration
+# change to be acted upon *plus* AUTH_REFRESH_TIMEOUT, not one replacing the
+# other. Otherwise this process races vs. the one making the change we're
+# trying to test, which leads to spurious failures.
+MY_MOUNT_TIMEOUT=$((CONFIG_UPDATE_TIMEOUT+AUTH_REFRESH_INTERVAL))
+
+cleanup;
+## Check whether glusterd is running
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+H0IP=$(ip addr show |grep -w inet |grep -v 127.0.0.1|awk '{ print $2 }'| cut -d "/" -f 1)
+H0IP6=$(host $HOSTNAME | grep IPv6 | awk '{print $NF}')
+
+# Export variables for allow & deny
+EXPORT_ALLOW="/$V0 $H0(sec=sys,rw,anonuid=0) @ngtop(sec=sys,rw,anonuid=0)"
+EXPORT_ALLOW_SLASH="/$V0/ $H0(sec=sys,rw,anonuid=0) @ngtop(sec=sys,rw,anonuid=0)"
+EXPORT_DENY="/$V0 1.2.3.4(sec=sys,rw,anonuid=0) @ngtop(sec=sys,rw,anonuid=0)"
+
+# Netgroup variables for allow & deny
+NETGROUP_ALLOW="ngtop ng1000\nng1000 ng999\nng999 ng1\nng1 ng2\nng2 ($H0,,)"
+NETGROUP_DENY="ngtop ng1000\nng1000 ng999\nng999 ng1\nng1 ng2\nng2 (1.2.3.4,,)"
+
+V0L1="$V0/L1"
+V0L2="$V0L1/L2"
+V0L3="$V0L2/L3"
+
+# Other variations for allow & deny
+EXPORT_ALLOW_RO="/$V0 $H0(sec=sys,ro,anonuid=0) @ngtop(sec=sys,ro,anonuid=0)"
+EXPORT_ALLOW_L1="/$V0L1 $H0(sec=sys,rw,anonuid=0) @ngtop(sec=sys,rw,anonuid=0)"
+EXPORT_WILDCARD="/$V0 *(sec=sys,rw,anonuid=0) @ngtop(sec=sys,rw,anonuid=0)"
+
+function build_dirs () {
+ mkdir -p $B0/b{0,1,2}/L1/L2/L3
+}
+
+function export_allow_this_host_ipv6 () {
+ printf "$EXPORT_ALLOW6\n" > "$GLUSTERD_WORKDIR"/nfs/exports
+}
+
+function export_allow_this_host () {
+ printf "$EXPORT_ALLOW\n" > ${NFSDIR}/exports
+}
+
+function export_allow_this_host_with_slash () {
+ printf "$EXPORT_ALLOW_SLASH\n" > ${NFSDIR}/exports
+}
+
+function export_deny_this_host () {
+ printf "$EXPORT_DENY\n" > ${NFSDIR}/exports
+}
+
+function export_allow_this_host_l1 () {
+ printf "$EXPORT_ALLOW_L1\n" >> ${NFSDIR}/exports
+}
+
+function export_allow_wildcard () {
+ printf "$EXPORT_WILDCARD\n" > ${NFSDIR}/exports
+}
+
+function export_allow_this_host_ro () {
+ printf "$EXPORT_ALLOW_RO\n" > ${NFSDIR}/exports
+}
+
+function netgroup_allow_this_host () {
+ printf "$NETGROUP_ALLOW\n" > ${NFSDIR}/netgroups
+}
+
+function netgroup_deny_this_host () {
+ printf "$NETGROUP_DENY\n" > ${NFSDIR}/netgroups
+}
+
+function create_vol () {
+ $CLI vol create $V0 $H0:$B0/b0
+}
+
+function setup_cluster() {
+ build_dirs # Build directories
+ export_allow_this_host # Allow this host in the exports file
+ netgroup_allow_this_host # Allow this host in the netgroups file
+
+ glusterd
+ create_vol # Create the volume
+}
+
+function check_mount_success {
+ mount_nfs $H0:/$1 $N0 nolock
+ if [ $? -eq 0 ]; then
+ echo "Y"
+ else
+ echo "N"
+ fi
+}
+
+function check_mount_failure {
+ mount_nfs $H0:/$1 $N0 nolock
+ if [ $? -ne 0 ]; then
+ echo "Y"
+ else
+ local timeout=$UMOUNT_TIMEOUT
+ while ! umount_nfs $N0 && [$timeout -ne 0] ; do
+ timeout=$(( $timeout - 1 ))
+ sleep 1
+ done
+ fi
+}
+
+function small_write () {
+ dd if=/dev/zero of=$N0/test-small-write count=1 bs=1k 2>&1
+ if [ $? -ne 0 ]; then
+ echo "N"
+ else
+ echo "Y"
+ fi
+}
+
+function bg_write () {
+ dd if=/dev/zero of=$N0/test-bg-write count=1 bs=1k &
+ BG_WRITE_PID=$!
+}
+
+function big_write() {
+ dd if=/dev/zero of=$N0/test-big-write count=500 bs=1024k
+}
+
+function create () {
+ touch $N0/create-test
+}
+
+function stat_nfs () {
+ ls $N0/
+}
+
+# Restarts the NFS server
+function restart_nfs () {
+ local NFS_PID=$(cat $GLUSTERD_PIDFILEDIR/nfs/nfs.pid)
+
+ # kill the NFS-server if it is running
+ while ps -q ${NFS_PID} 2>&1 > /dev/null; do
+ kill ${NFS_PID}
+ sleep 0.5
+ done
+
+ # start-force starts the NFS-server again
+ $CLI vol start patchy force
+}
+
+setup_cluster
+
+# run preliminary tests
+TEST $CLI vol set $V0 nfs.disable off
+TEST $CLI vol start $V0
+
+# Get NFS state directory
+NFSDIR=$( $CLI volume get patchy nfs.mount-rmtab | \
+ awk '/^nfs.mount-rmtab/{print $2}' | \
+ xargs dirname )
+
+## Wait for volume to register with rpc.mountd
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available
+
+## NFS server starts with auth disabled
+## Do some tests to verify that.
+
+EXPECT "Y" check_mount_success $V0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+## Disallow host
+TEST export_deny_this_host
+TEST netgroup_deny_this_host
+
+## Technically deauthorized this host, but since auth is disabled we should be
+## able to do mounts, writes, etc.
+EXPECT_WITHIN $MY_MOUNT_TIMEOUT "Y" check_mount_success $V0
+EXPECT "Y" small_write
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+## Reauthorize this host
+export_allow_this_host
+netgroup_allow_this_host
+
+## Restart NFS with auth enabled
+$CLI vol stop $V0
+TEST $CLI vol set $V0 nfs.exports-auth-enable on
+$CLI vol start $V0
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available
+
+## Mount NFS
+EXPECT "Y" check_mount_success $V0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+## Mount NFS using the IPv6 export
+export_allow_this_host_ipv6
+EXPECT "Y" check_mount_success $V0
+
+## Disallow host
+TEST export_deny_this_host
+TEST netgroup_deny_this_host
+
+## Writes should not be allowed, host is not authorized
+EXPECT_WITHIN $AUTH_REFRESH_INTERVAL "N" small_write
+
+## Unmount so we can test mount
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+## Subsequent ounts should not be allowed, host is not authorized
+EXPECT "Y" check_mount_failure $V0
+
+## Reauthorize host
+TEST export_allow_this_host
+TEST netgroup_allow_this_host
+
+EXPECT_WITHIN $MY_MOUNT_TIMEOUT "Y" check_mount_success $V0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+## Allow host in netgroups but not in exports, host should be allowed
+TEST export_deny_this_host
+TEST netgroup_allow_this_host
+
+# wait for the mount authentication to rebuild
+sleep $[$AUTH_REFRESH_INTERVAL + 1]
+
+EXPECT_WITHIN $MY_MOUNT_TIMEOUT "Y" check_mount_success $V0
+EXPECT "Y" small_write
+TEST big_write
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+## Allow host in exports but not in netgroups, host should be allowed
+TEST export_allow_this_host
+TEST netgroup_deny_this_host
+
+EXPECT_WITHIN $MY_MOUNT_TIMEOUT "Y" check_mount_success $V0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+## Finally, reauth the host in export and netgroup, test mount & write
+TEST export_allow_this_host_l1
+TEST netgroup_allow_this_host
+
+EXPECT_WITHIN $MY_MOUNT_TIMEOUT "Y" check_mount_success $V0L1
+EXPECT "Y" small_write
+
+## Failover test: Restarting NFS and then doing a write should pass
+bg_write
+TEST restart_nfs
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available
+
+TEST wait $BG_WRITE_PID
+EXPECT "Y" small_write
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+## Test deep mounts
+EXPECT "Y" check_mount_success $V0L1
+EXPECT "Y" small_write
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+TEST export_allow_this_host_ro
+TEST netgroup_deny_this_host
+
+## Restart the nfs server to avoid spurious failure(BZ1256352)
+restart_nfs
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available
+
+EXPECT_WITHIN $MY_MOUNT_TIMEOUT "Y" check_mount_success $V0
+EXPECT "N" small_write # Writes should not be allowed
+TEST ! create # Create should not be allowed
+TEST stat_nfs # Stat should be allowed
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+TEST export_deny_this_host
+TEST netgroup_deny_this_host
+TEST export_allow_this_host_l1 # Allow this host at L1
+
+EXPECT_WITHIN $MY_MOUNT_TIMEOUT "Y" check_mount_failure $V0 #V0 shouldnt be allowed
+EXPECT_WITHIN $MY_MOUNT_TIMEOUT "Y" check_mount_success $V0L1 #V0L1 should be
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+## Test wildcard hosts
+TEST export_allow_wildcard
+
+# the $MY_MOUNT_TIMEOUT might not be long enough? restart should do
+restart_nfs
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available
+
+EXPECT_WITHIN $MY_MOUNT_TIMEOUT "Y" check_mount_success $V0
+EXPECT_WITHIN $AUTH_REFRESH_INTERVAL "Y" small_write
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+## Test if path is parsed correctly
+## by mounting host:vol/ instead of host:vol
+EXPECT "Y" check_mount_success $V0/
+EXPECT "Y" small_write
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+TEST export_allow_this_host_with_slash
+
+EXPECT_WITHIN $MY_MOUNT_TIMEOUT "Y" check_mount_success $V0
+EXPECT "Y" small_write
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+EXPECT "Y" check_mount_success $V0/
+EXPECT "Y" small_write
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+
+## Turn off exports authentication
+$CLI vol stop $V0
+TEST $CLI vol set $V0 nfs.exports-auth-enable off
+$CLI vol start $V0
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available
+
+TEST export_deny_this_host # Deny the host
+TEST netgroup_deny_this_host
+
+EXPECT_WITHIN $MY_MOUNT_TIMEOUT "Y" check_mount_success $V0 # Do a mount & test
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+## Turn back on the exports authentication
+$CLI vol stop $V0
+TEST $CLI vol set $V0 nfs.exports-auth-enable on
+$CLI vol start $V0
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available
+
+## Do a simple test to set the refresh time to 20 seconds
+TEST $CLI vol set $V0 nfs.auth-refresh-interval-sec 20
+
+## Do a simple test to see if the volume option exists
+TEST $CLI vol set $V0 nfs.auth-cache-ttl-sec 400
+
+## Finish up
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup
diff --git a/tests/000-flaky/bugs_core_multiplex-limit-issue-151.t b/tests/000-flaky/bugs_core_multiplex-limit-issue-151.t
new file mode 100644
index 00000000000..5a88f97d726
--- /dev/null
+++ b/tests/000-flaky/bugs_core_multiplex-limit-issue-151.t
@@ -0,0 +1,56 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../traps.rc
+. $(dirname $0)/../volume.rc
+
+function count_up_bricks {
+ $CLI --xml volume status all | grep '<status>1' | wc -l
+}
+
+function count_brick_processes {
+ pgrep glusterfsd | wc -l
+}
+
+function count_brick_pids {
+ $CLI --xml volume status all | sed -n '/.*<pid>\([^<]*\).*/s//\1/p' \
+ | grep -v "N/A" | sort | uniq | wc -l
+}
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume set all cluster.brick-multiplex on
+TEST ! $CLI volume set all cluster.max-bricks-per-process -1
+TEST ! $CLI volume set all cluster.max-bricks-per-process foobar
+TEST $CLI volume set all cluster.max-bricks-per-process 3
+
+TEST $CLI volume create $V0 $H0:$B0/brick{0..5}
+TEST $CLI volume start $V0
+
+EXPECT 2 count_brick_processes
+EXPECT 2 count_brick_pids
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 6 count_up_bricks
+
+pkill gluster
+TEST glusterd
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_brick_processes
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_brick_pids
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 6 count_up_bricks
+
+TEST $CLI volume add-brick $V0 $H0:$B0/brick6
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_brick_processes
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_brick_pids
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 7 count_up_bricks
+
+TEST $CLI volume remove-brick $V0 $H0:$B0/brick3 start
+TEST $CLI volume remove-brick $V0 $H0:$B0/brick3 force
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_brick_processes
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_brick_pids
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 6 count_up_bricks
+
+cleanup;
diff --git a/tests/000-flaky/bugs_distribute_bug-1117851.t b/tests/000-flaky/bugs_distribute_bug-1117851.t
new file mode 100644
index 00000000000..5980bf2fd4b
--- /dev/null
+++ b/tests/000-flaky/bugs_distribute_bug-1117851.t
@@ -0,0 +1,101 @@
+#!/bin/bash
+
+SCRIPT_TIMEOUT=250
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+create_files () {
+ for i in {1..1000}; do
+ orig=$(printf %s/abc%04d $1 $i)
+ real=$(printf %s/src%04d $1 $i)
+ # Make sure lots of these have linkfiles.
+ echo "This is file $i" > $orig
+ mv $orig $real
+ done
+ sync
+}
+
+move_files_inner () {
+ sfile=$M0/status_$(basename $1)
+ for i in {1..1000}; do
+ src=$(printf %s/src%04d $1 $i)
+ dst=$(printf %s/dst%04d $1 $i)
+ mv $src $dst 2> /dev/null
+ done
+ echo "done" > $sfile
+}
+
+move_files () {
+ #Create the status file here to prevent spurious failures
+ #caused by the file not being created in time by the
+ #background process
+ sfile=$M0/status_$(basename $1)
+ echo "running" > $sfile
+ move_files_inner $* &
+}
+
+check_files () {
+ errors=0
+ for i in {1..1000}; do
+ if [ ! -f $(printf %s/dst%04d $1 $i) ]; then
+ if [ -f $(printf %s/src%04d $1 $i) ]; then
+ echo "file $i didnt get moved" > /dev/stderr
+ else
+ echo "file $i is MISSING" > /dev/stderr
+ errors=$((errors+1))
+ fi
+ fi
+ done
+ if [ $((errors)) != 0 ]; then
+ : ls -l $1 > /dev/stderr
+ fi
+ return $errors
+}
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4,5,6};
+
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT '6' brick_count $V0
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Mount FUSE with caching disabled (read-write)
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0;
+
+TEST create_files $M0
+
+## Mount FUSE with caching disabled (read-write) again
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M1;
+
+TEST move_files $M0
+TEST move_files $M1
+
+# It's regrettable that renaming 1000 files might take more than 30 seconds,
+# but on our test systems sometimes it does, so double the time from what we'd
+# use otherwise. There still seem to be some spurious failures, 1 in 20 when
+# this does not complete, added an additional 60 seconds to take false reports
+# out of the system, during test runs, especially on slower test systems.
+EXPECT_WITHIN 120 "done" cat $M0/status_0
+EXPECT_WITHIN 120 "done" cat $M1/status_1
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M1
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0;
+TEST check_files $M0
+
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/000-flaky/bugs_distribute_bug-1122443.t b/tests/000-flaky/bugs_distribute_bug-1122443.t
new file mode 100644
index 00000000000..abd37082b33
--- /dev/null
+++ b/tests/000-flaky/bugs_distribute_bug-1122443.t
@@ -0,0 +1,61 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../dht.rc
+
+make_files() {
+ mkdir $1 && \
+ ln -s ../ $1/symlink && \
+ mknod $1/special_b b 1 2 && \
+ mknod $1/special_c c 3 4 && \
+ mknod $1/special_u u 5 6 && \
+ mknod $1/special_p p && \
+ touch -h --date=@1 $1/symlink && \
+ touch -h --date=@2 $1/special_b &&
+ touch -h --date=@3 $1/special_c &&
+ touch -h --date=@4 $1/special_u &&
+ touch -h --date=@5 $1/special_p
+}
+
+bug_1113050_workaround() {
+ # Test if graph change has settled (bug-1113050?)
+ test=$(stat -c "%n:%Y" $1 2>&1 | tr '\n' ',')
+ if [ $? -eq 0 ] ; then
+ echo RECONNECTED
+ else
+ echo WAITING
+ fi
+ return 0
+}
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}0
+TEST $CLI volume start $V0
+
+# Mount FUSE and create symlink
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+TEST make_files $M0/subdir
+
+# Get mtime before migration
+BEFORE="$(stat -c %n:%Y $M0/subdir/* | sort | tr '\n' ',')"
+echo $BEFORE
+# Migrate brick
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}1
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}0 start
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" remove_brick_status_completed_field "$V0 $H0:$B0/${V0}0"
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}0 commit
+
+# Get mtime after migration
+EXPECT_WITHIN 30 RECONNECTED bug_1113050_workaround $M0/subdir/symlink
+sleep 3
+AFTER="$(stat -c %n:%Y $M0/subdir/* | sort | tr '\n' ',')"
+echo $AFTER
+# Check if mtime is unchanged
+TEST [ "$AFTER" == "$BEFORE" ]
+
+cleanup
diff --git a/tests/000-flaky/bugs_glusterd_bug-857330/common.rc b/tests/000-flaky/bugs_glusterd_bug-857330/common.rc
new file mode 100644
index 00000000000..bd122eff18c
--- /dev/null
+++ b/tests/000-flaky/bugs_glusterd_bug-857330/common.rc
@@ -0,0 +1,57 @@
+. $(dirname $0)/../../include.rc
+
+UUID_REGEX='[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}'
+
+TASK_ID=""
+COMMAND=""
+PATTERN=""
+
+function check-and-store-task-id()
+{
+ TASK_ID=""
+
+ local task_id=$($CLI $COMMAND | grep $PATTERN | grep -o -E "$UUID_REGEX")
+
+ if [ -z "$task_id" ] && [ "${task_id+asdf}" = "asdf" ]; then
+ return 1
+ fi
+
+ TASK_ID=$task_id
+ return 0;
+}
+
+function get-task-id()
+{
+ $CLI $COMMAND | grep $PATTERN | grep -o -E "$UUID_REGEX" | tail -n1
+
+}
+
+function check-and-store-task-id-xml()
+{
+ TASK_ID=""
+
+ local task_id=$($CLI $COMMAND --xml | xmllint --format - | grep $PATTERN | grep -o -E "$UUID_REGEX")
+
+ if [ -z "$task_id" ] && [ "${task_id+asdf}" = "asdf" ]; then
+ return 1
+ fi
+
+ TASK_ID=$task_id
+ return 0;
+}
+
+function get-task-id-xml()
+{
+ $CLI $COMMAND --xml | xmllint --format - | grep $PATTERN | grep -o -E "$UUID_REGEX"
+}
+
+function get-task-status()
+{
+ pattern=$1
+ val=1
+ test=$(gluster $COMMAND | grep -o $pattern 2>&1)
+ if [ $? -eq 0 ]; then
+ val=0
+ fi
+ echo $val
+}
diff --git a/tests/000-flaky/bugs_glusterd_bug-857330/normal.t b/tests/000-flaky/bugs_glusterd_bug-857330/normal.t
new file mode 100755
index 00000000000..6c1cf54ec3c
--- /dev/null
+++ b/tests/000-flaky/bugs_glusterd_bug-857330/normal.t
@@ -0,0 +1,69 @@
+#!/bin/bash
+
+. $(dirname $0)/common.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}1 $H0:$B0/${V0}2;
+TEST $CLI volume info $V0;
+TEST $CLI volume start $V0;
+
+TEST glusterfs -s $H0 --volfile-id=$V0 $M0;
+
+TEST $PYTHON $(dirname $0)/../../utils/create-files.py \
+ --multi -b 10 -d 10 -n 10 $M0;
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+###############
+## Rebalance ##
+###############
+TEST $CLI volume add-brick $V0 replica 2 $H0:$B0/${V0}3 $H0:$B0/${V0}4;
+
+COMMAND="volume rebalance $V0 start"
+PATTERN="ID:"
+TEST check-and-store-task-id
+
+COMMAND="volume status $V0"
+PATTERN="ID"
+EXPECT $TASK_ID get-task-id
+
+COMMAND="volume rebalance $V0 status"
+PATTERN="completed"
+EXPECT_WITHIN $REBALANCE_TIMEOUT "0" get-task-status $PATTERN
+
+###################
+## Replace-brick ##
+###################
+REP_BRICK_PAIR="$H0:$B0/${V0}2 $H0:$B0/${V0}5"
+
+TEST $CLI volume replace-brick $V0 $REP_BRICK_PAIR commit force;
+
+##################
+## Remove-brick ##
+##################
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}5
+
+COMMAND="volume remove-brick $V0 $H0:$B0/${V0}1 $H0:$B0/${V0}5 start"
+PATTERN="ID:"
+TEST check-and-store-task-id
+
+COMMAND="volume status $V0"
+PATTERN="ID"
+EXPECT $TASK_ID get-task-id
+
+COMMAND="volume remove-brick $V0 $H0:$B0/${V0}1 $H0:$B0/${V0}5 status"
+PATTERN="completed"
+EXPECT_WITHIN $REBALANCE_TIMEOUT "0" get-task-status $PATTERN
+
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}1 $H0:$B0/${V0}5 commit
+
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/000-flaky/bugs_glusterd_bug-857330/xml.t b/tests/000-flaky/bugs_glusterd_bug-857330/xml.t
new file mode 100755
index 00000000000..11785adacdb
--- /dev/null
+++ b/tests/000-flaky/bugs_glusterd_bug-857330/xml.t
@@ -0,0 +1,83 @@
+#!/bin/bash
+
+. $(dirname $0)/common.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}1 $H0:$B0/${V0}2;
+TEST $CLI volume info $V0;
+TEST $CLI volume start $V0;
+
+TEST glusterfs -s $H0 --volfile-id=$V0 $M0;
+
+TEST $PYTHON $(dirname $0)/../../utils/create-files.py \
+ --multi -b 10 -d 10 -n 10 $M0;
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+
+###############
+## Rebalance ##
+###############
+TEST $CLI volume add-brick $V0 replica 2 $H0:$B0/${V0}3 $H0:$B0/${V0}4;
+
+COMMAND="volume rebalance $V0 start"
+PATTERN="task-id"
+TEST check-and-store-task-id-xml
+
+COMMAND="volume status $V0"
+PATTERN="id"
+EXPECT $TASK_ID get-task-id-xml
+
+COMMAND="volume rebalance $V0 status"
+PATTERN="task-id"
+EXPECT $TASK_ID get-task-id-xml
+
+## TODO: Add tests for rebalance stop
+
+COMMAND="volume rebalance $V0 status"
+PATTERN="completed"
+EXPECT_WITHIN $REBALANCE_TIMEOUT "0" get-task-status $PATTERN
+
+###################
+## Replace-brick ##
+###################
+TEST $CLI volume replace-brick $V0 $H0:$B0/${V0}4 $H0:$B0/${V0}5 commit force
+
+##################
+## Remove-brick ##
+##################
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}5
+
+COMMAND="volume remove-brick $V0 $H0:$B0/${V0}3 $H0:$B0/${V0}5 start"
+PATTERN="task-id"
+TEST check-and-store-task-id-xml
+
+COMMAND="volume status $V0"
+PATTERN="id"
+EXPECT $TASK_ID get-task-id-xml
+
+COMMAND="volume remove-brick $V0 $H0:$B0/${V0}3 $H0:$B0/${V0}5 status"
+PATTERN="task-id"
+EXPECT $TASK_ID get-task-id-xml
+
+COMMAND="volume remove-brick $V0 $H0:$B0/${V0}3 $H0:$B0/${V0}5 status"
+PATTERN="completed"
+EXPECT_WITHIN $REBALANCE_TIMEOUT "0" get-task-status $PATTERN
+
+## TODO: Add tests for remove-brick stop
+
+COMMAND="volume remove-brick $V0 $H0:$B0/${V0}3 $H0:$B0/${V0}5 commit"
+PATTERN="task-id"
+EXPECT $TASK_ID get-task-id-xml
+
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/000-flaky/bugs_glusterd_quorum-value-check.t b/tests/000-flaky/bugs_glusterd_quorum-value-check.t
new file mode 100644
index 00000000000..a431b8c4fd4
--- /dev/null
+++ b/tests/000-flaky/bugs_glusterd_quorum-value-check.t
@@ -0,0 +1,37 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+function check_quorum_nfs() {
+ local qnfs="$(less /var/lib/glusterd/nfs/nfs-server.vol | grep "quorum-count"| awk '{print $3}')"
+ local qinfo="$($CLI volume info $V0| grep "cluster.quorum-count"| awk '{print $2}')"
+
+ if [ $qnfs = $qinfo ]; then
+ echo "Y"
+ else
+ echo "N"
+ fi
+}
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 nfs.disable off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 cluster.quorum-type fixed
+TEST $CLI volume start $V0
+
+TEST $CLI volume set $V0 cluster.quorum-count 1
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "Y" check_quorum_nfs
+TEST $CLI volume set $V0 cluster.quorum-count 2
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "Y" check_quorum_nfs
+TEST $CLI volume set $V0 cluster.quorum-count 3
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "Y" check_quorum_nfs
+
+cleanup;
diff --git a/tests/000-flaky/bugs_nfs_bug-1116503.t b/tests/000-flaky/bugs_nfs_bug-1116503.t
new file mode 100644
index 00000000000..fc50021acc7
--- /dev/null
+++ b/tests/000-flaky/bugs_nfs_bug-1116503.t
@@ -0,0 +1,47 @@
+#!/bin/bash
+#
+# Verify that mounting NFS over UDP (MOUNT service only) works.
+#
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume set $V0 nfs.mount-udp on
+
+TEST $CLI volume start $V0
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+
+TEST mount_nfs $H0:/$V0 $N0 nolock,mountproto=udp,proto=tcp;
+TEST mkdir -p $N0/foo/bar
+TEST ls $N0/foo
+TEST ls $N0/foo/bar
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+TEST mount_nfs $H0:/$V0/foo $N0 nolock,mountproto=udp,proto=tcp;
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+TEST mount_nfs $H0:/$V0/foo/bar $N0 nolock,mountproto=udp,proto=tcp;
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+TEST $CLI volume set $V0 nfs.addr-namelookup on
+TEST $CLI volume set $V0 nfs.rpc-auth-allow $H0
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+TEST mount_nfs $H0:/$V0/foo/bar $N0 nolock,mountproto=udp,proto=tcp;
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+TEST $CLI volume set $V0 nfs.rpc-auth-reject $H0
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+TEST ! mount_nfs $H0:/$V0/foo/bar $N0 nolock,mountproto=udp,proto=tcp;
+
+cleanup;
diff --git a/tests/000-flaky/features_lock-migration_lkmigration-set-option.t b/tests/000-flaky/features_lock-migration_lkmigration-set-option.t
new file mode 100644
index 00000000000..1327ef3579f
--- /dev/null
+++ b/tests/000-flaky/features_lock-migration_lkmigration-set-option.t
@@ -0,0 +1,34 @@
+#!/bin/bash
+# Test to check
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+#Check lock-migration set option sanity
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/brick1 $H0:$B0/brick2
+TEST $CLI volume start $V0
+
+TEST $CLI volume set $V0 lock-migration on
+EXPECT "on" echo `$CLI volume info | grep lock-migration | awk '{print $2}'`
+TEST $CLI volume set $V0 lock-migration off
+EXPECT "off" echo `$CLI volume info | grep lock-migration | awk '{print $2}'`
+TEST ! $CLI volume set $V0 lock-migration garbage
+#make sure it is still off
+EXPECT "off" echo `$CLI volume info | grep lock-migration | awk '{print $2}'`
+
+
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+
+
+#create a afr volume and make sure option setting fails
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick1 $H0:$B0/brick2
+TEST $CLI volume start $V0
+
+TEST ! $CLI volume set $V0 lock-migration on
+
+cleanup;
diff --git a/tests/README.md b/tests/README.md
new file mode 100644
index 00000000000..09c98576987
--- /dev/null
+++ b/tests/README.md
@@ -0,0 +1,43 @@
+Regression tests framework for GlusterFS
+========================================
+
+## Prereq
+- Build and install the version of glusterfs with your changes. Make
+ sure the installed version is accessible from $PATH.
+
+## Prereq for geo-rep regression tests.
+- Passwordless ssh on the test system to itself
+- arequal-checksum installed on the test-system.
+ You can find the repo here - https://github.com/raghavendrabhat/arequal
+
+## How-To
+- To mount glusterfs, NEVER use 'mount -t glusterfs', instead use
+ 'glusterfs -s ' method. This is because with the patch build setup
+ doesnot install the /sbin/mount.glusterfs necessary, where as the
+ glusterfs binary will be accessible with $PATH, and will pick the
+ right version.
+- (optional) Set environment variables to specify location of
+ export directories and mount points. Unless you have special
+ requirements, the defaults should just work. The variables
+ themselves can be found at the top of tests/include.rc. All
+ of them can be overriden with environment variables.
+
+## Usage
+- Execute `/usr/share/glusterfs/run-tests.sh` as root.
+
+- If you want to run individual tests located in `/usr/share/glusterfs/tests`
+ as opposed to the full test-suite, invoke it as
+ `/usr/share/glusterfs/run-tests.sh [pattern]*`, where pattern can be:
+ - the trailing parts of the full path of a test,
+ e.g. `tests/basic/mount.t`
+ - the name of a file or directory, e.g `self-heal.t` or `basic/`
+ - bug number, which will match against numbered bugs in the
+ `tests/bugs/` directory.
+ - a glob pattern (see `man 7 glob` for mor info on globs)
+
+- To execute single ".t" file, use "prove -vf /path/to/.t"
+- If some test cases fail, report to GlusterFS community at
+ `gluster-devel@gluster.org`.
+
+## Reminder
+- BE WARNED THAT THE TEST CASES DELETE ``GLUSTERD_WORKDIR`` * !!!
diff --git a/tests/afr.rc b/tests/afr.rc
new file mode 100644
index 00000000000..241789903ba
--- /dev/null
+++ b/tests/afr.rc
@@ -0,0 +1,123 @@
+#!/bin/bash
+
+function create_brick_xattrop_entry {
+ local xattrop_dir=$(afr_get_index_path $1)
+ local base_entry=`ls $xattrop_dir|grep xattrop`
+ local gfid_str
+ local params=`echo "$@" | cut -d' ' -f2-`
+ echo $params
+
+ for file in $params
+ do
+ gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $1/$file))
+ ln $xattrop_dir/$base_entry $xattrop_dir/$gfid_str
+ done
+}
+
+function diff_dirs {
+ diff <(ls $1 | sort) <(ls $2 | sort)
+}
+
+function heal_status {
+ local f1_path="${1}/${3}"
+ local f2_path="${2}/${3}"
+ local zero_xattr="000000000000000000000000"
+ local insync=""
+ diff_dirs $f1_path $f2_path
+ if [ $? -eq 0 ];
+ then
+ insync="Y"
+ else
+ insync="N"
+ fi
+ local xattr11=$(get_hex_xattr trusted.afr.$V0-client-0 $f1_path)
+ local xattr12=$(get_hex_xattr trusted.afr.$V0-client-1 $f1_path)
+ local xattr21=$(get_hex_xattr trusted.afr.$V0-client-0 $f2_path)
+ local xattr22=$(get_hex_xattr trusted.afr.$V0-client-1 $f2_path)
+ local dirty1=$(get_hex_xattr trusted.afr.dirty $f1_path)
+ local dirty2=$(get_hex_xattr trusted.afr.dirty $f2_path)
+ if [ -z $xattr11 ]; then xattr11="000000000000000000000000"; fi
+ if [ -z $xattr12 ]; then xattr12="000000000000000000000000"; fi
+ if [ -z $xattr21 ]; then xattr21="000000000000000000000000"; fi
+ if [ -z $xattr22 ]; then xattr22="000000000000000000000000"; fi
+ if [ -z $dirty1 ]; then dirty1="000000000000000000000000"; fi
+ if [ -z $dirty2 ]; then dirty2="000000000000000000000000"; fi
+ echo ${insync}${xattr11}${xattr12}${xattr21}${xattr22}${dirty1}${dirty2}
+}
+# Check if given dir's self-heal is done
+function is_dir_heal_done {
+ local zero_xattr="000000000000000000000000"
+ if [ "$(heal_status $@)" == "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" ];
+ then
+ echo "Y"
+ else
+ echo "N"
+ fi
+}
+# Check if the given file's self-heal is done
+function is_file_heal_done {
+ local f1_path="${1}/${3}"
+ local f2_path="${2}/${3}"
+ local zxattr="000000000000000000000000"
+ local size1=$(stat -c "%s" $f1_path)
+ local size2=$(stat -c "%s" $f2_path)
+ local diff=$((size1-size2))
+ local x11=$(get_hex_xattr trusted.afr.$V0-client-0 $f1_path)
+ local x12=$(get_hex_xattr trusted.afr.$V0-client-1 $f1_path)
+ local x21=$(get_hex_xattr trusted.afr.$V0-client-0 $f2_path)
+ local x22=$(get_hex_xattr trusted.afr.$V0-client-1 $f2_path)
+ local dirty1=$(get_hex_xattr trusted.afr.dirty $f1_path)
+ local dirty2=$(get_hex_xattr trusted.afr.dirty $f2_path)
+ if [ -z $x11 ]; then x11="000000000000000000000000"; fi
+ if [ -z $x12 ]; then x12="000000000000000000000000"; fi
+ if [ -z $x21 ]; then x21="000000000000000000000000"; fi
+ if [ -z $x22 ]; then x22="000000000000000000000000"; fi
+ if [ -z $dirty1 ]; then dirty1="000000000000000000000000"; fi
+ if [ -z $dirty2 ]; then dirty2="000000000000000000000000"; fi
+ if [ "${diff}${x11}${x12}${x21}${x22}${dirty1}${dirty2}" == "0${zxattr}${zxattr}${zxattr}${zxattr}${zxattr}${zxattr}" ];
+ then
+ echo "Y"
+ else
+ echo "N"
+ fi
+}
+
+#count the number of entries marked for self-heal
+#in brick $1's index
+
+function count_index_entries()
+{
+ ls $1/.glusterfs/indices/xattrop | wc -l
+}
+
+function afr_up_status()
+{
+ local v=$1
+ local m=$2
+ local replica_id=$3
+ grep -E "^up = " $m/.meta/graphs/active/${v}-replicate-${replica_id}/private | cut -f2 -d'='
+}
+
+function get_quorum_type()
+{
+ local m="$1"
+ local v="$2"
+ local repl_id="$3"
+ cat $m/.meta/graphs/active/$v-replicate-$repl_id/private|grep quorum-type|awk '{print $3}'
+}
+
+function afr_private_key_value()
+{
+ local v=$1
+ local m=$2
+ local replica_id=$3
+ local key=$4
+#xargs at the end will strip leading spaces
+ grep -E "^${key} = " $m/.meta/graphs/active/${v}-replicate-${replica_id}/private | cut -f2 -d'=' | xargs
+}
+
+function afr_anon_entry_count()
+{
+ local b=$1
+ ls $b/.glusterfs-anonymous-inode* | wc -l
+}
diff --git a/tests/basic/0symbol-check.t b/tests/basic/0symbol-check.t
new file mode 100755
index 00000000000..69485a516af
--- /dev/null
+++ b/tests/basic/0symbol-check.t
@@ -0,0 +1,46 @@
+#!/bin/bash
+#
+
+. $(dirname $0)/../include.rc
+
+buildscratch=""
+
+case $OSTYPE in
+Linux)
+ ;;
+*)
+ echo "Skip Linux specific test" >&2
+ SKIP_TESTS
+ exit 0
+ ;;
+esac
+
+# look in the usual places for the build tree
+if [ -d /build/scratch ]; then
+ buildscratch="/build/scratch"
+else
+ # might be in developer's tree
+ if [ -d ./libglusterfs/src/.libs ]; then
+ buildscratch="."
+ elif [ -d ../libglusterfs/src/.libs ]; then
+ buildscratch=".."
+ fi
+fi
+
+if [ -z ${buildscratch} ]; then
+ echo "could find build tree in /build/scratch, . or .." >&2
+ SKIP_TESTS
+ exit 0
+fi
+
+# check symbols
+
+rm -f ./.symbol-check-errors
+
+TEST find ${buildscratch} -name \*.o -exec ./tests/basic/symbol-check.sh {} \\\;
+
+TEST [ ! -e ./.symbol-check-errors ]
+
+rm -f ./.symbol-check-errors
+
+cleanup
diff --git a/tests/basic/afr/add-brick-self-heal.t b/tests/basic/afr/add-brick-self-heal.t
new file mode 100644
index 00000000000..c847e22977f
--- /dev/null
+++ b/tests/basic/afr/add-brick-self-heal.t
@@ -0,0 +1,74 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+EXPECT 'Created' volinfo_field $V0 'Status';
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status';
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume set $V0 cluster.heal-timeout 5
+
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+# Create files
+for i in {1..5}
+do
+ echo $i > $M0/file$i.txt
+done
+
+# Metadata changes
+TEST setfattr -n user.test -v qwerty $M0/file5.txt
+
+# Add brick1
+TEST $CLI volume add-brick $V0 replica 3 $H0:$B0/${V0}2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+
+# New-brick should accuse the old-bricks (Simulating case for data-loss)
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}2/
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}2/
+
+# Check if pending xattr and dirty-xattr are set for newly-added-brick
+EXPECT "000000000000000100000001" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0
+EXPECT "000000000000000100000001" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1
+EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.dirty $B0/${V0}2
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+TEST $CLI volume set $V0 self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+TEST $CLI volume heal $V0
+
+# Wait for heal to complete
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+# Check if entry-heal has happened
+TEST diff <(ls $B0/${V0}0 | sort) <(ls $B0/${V0}2 | sort)
+TEST diff <(ls $B0/${V0}1 | sort) <(ls $B0/${V0}2 | sort)
+
+# Test if data was healed
+TEST diff $B0/${V0}0/file1.txt $B0/${V0}2/file1.txt
+
+# Test if metadata was healed and exists on both the bricks
+EXPECT "qwerty" get_text_xattr user.test $B0/${V0}2/file5.txt
+EXPECT "qwerty" get_text_xattr user.test $B0/${V0}0/file5.txt
+
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.dirty $B0/${V0}2
+
+cleanup;
diff --git a/tests/basic/afr/afr-anon-inode-no-quorum.t b/tests/basic/afr/afr-anon-inode-no-quorum.t
new file mode 100644
index 00000000000..896ba0c9b2c
--- /dev/null
+++ b/tests/basic/afr/afr-anon-inode-no-quorum.t
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+#Test that anon-inode entry is not cleaned up as long as there exists at least
+#one valid entry
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume heal $V0 disable
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 performance.readdir-ahead off
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+TEST touch $M0/a $M0/b
+
+gfid_a=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/a))
+gfid_b=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/b))
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST mv $M0/a $M0/a-new
+TEST mv $M0/b $M0/b-new
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST ! ls $M0/a
+TEST ! ls $M0/b
+anon_inode_name=$(ls -a $B0/${V0}0 | grep glusterfs-anonymous-inode)
+TEST stat $B0/${V0}0/$anon_inode_name/$gfid_a
+TEST stat $B0/${V0}0/$anon_inode_name/$gfid_b
+#Make sure index heal doesn't happen after enabling heal
+TEST setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1
+TEST rm -f $B0/${V0}1/.glusterfs/indices/xattrop/*
+TEST $CLI volume heal $V0 enable
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+#Allow time for a scan
+sleep 5
+TEST stat $B0/${V0}0/$anon_inode_name/$gfid_a
+TEST stat $B0/${V0}0/$anon_inode_name/$gfid_b
+inum_b=$(STAT_INO $B0/${V0}0/$anon_inode_name/$gfid_b)
+TEST rm -f $M0/a-new
+TEST stat $M0/b-new
+
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}1
+EXPECT "$inum_b" STAT_INO $B0/${V0}0/b-new
+
+cleanup
diff --git a/tests/basic/afr/afr-anon-inode.t b/tests/basic/afr/afr-anon-inode.t
new file mode 100644
index 00000000000..f4cf37a2fa0
--- /dev/null
+++ b/tests/basic/afr/afr-anon-inode.t
@@ -0,0 +1,114 @@
+#!/bin/bash
+#Tests that afr-anon-inode test cases work fine as expected
+#These are cases where in entry-heal/name-heal we dont know entry for an inode
+#so these inodes are kept in a special directory
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0..2}
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+EXPECT "^1$" afr_private_key_value $V0 $M0 0 "use-anonymous-inode"
+TEST $CLI volume set $V0 cluster.use-anonymous-inode no
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^0$" afr_private_key_value $V0 $M0 0 "use-anonymous-inode"
+TEST $CLI volume set $V0 cluster.use-anonymous-inode yes
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "use-anonymous-inode"
+TEST mkdir -p $M0/d1/b $M0/d2/a
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST mv $M0/d2/a $M0/d1
+TEST mv $M0/d1/b $M0/d2
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+anon_inode_name=$(ls -a $B0/${V0}0 | grep glusterfs-anonymous-inode)
+TEST [[ -d $B0/${V0}1/$anon_inode_name ]]
+TEST [[ -d $B0/${V0}2/$anon_inode_name ]]
+anon_gfid=$(gf_get_gfid_xattr $B0/${V0}0/$anon_inode_name)
+EXPECT "$anon_gfid" gf_get_gfid_xattr $B0/${V0}1/$anon_inode_name
+EXPECT "$anon_gfid" gf_get_gfid_xattr $B0/${V0}2/$anon_inode_name
+
+TEST ! ls $M0/$anon_inode_name
+EXPECT "^4$" echo $(ls -a $M0 | wc -l)
+
+#Test purging code path by shd
+TEST $CLI volume heal $V0 disable
+TEST mkdir $M0/l0 $M0/l1 $M0/l2
+TEST touch $M0/del-file $M0/del-file-nolink $M0/l0/file
+TEST ln $M0/del-file $M0/del-file-link
+TEST ln $M0/l0/file $M0/l1/file-link1
+TEST ln $M0/l0/file $M0/l2/file-link2
+TEST mkdir -p $M0/del-recursive-dir/d1
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST rm -f $M0/del-file $M0/del-file-nolink
+TEST rm -rf $M0/del-recursive-dir
+TEST mv $M0/d1/a $M0/d2
+TEST mv $M0/l0/file $M0/l0/renamed-file
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 0
+
+nolink_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/del-file-nolink))
+link_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/del-file))
+dir_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/del-recursive-dir))
+rename_dir_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/d1/a))
+rename_file_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/l0/file))
+TEST ! stat $M0/del-file
+TEST stat $B0/${V0}0/$anon_inode_name/$link_gfid
+TEST ! stat $M0/del-file-nolink
+TEST ! stat $B0/${V0}0/$anon_inode_name/$nolink_gfid
+TEST ! stat $M0/del-recursive-dir
+TEST stat $B0/${V0}0/$anon_inode_name/$dir_gfid
+TEST ! stat $M0/d1/a
+TEST stat $B0/${V0}0/$anon_inode_name/$rename_dir_gfid
+TEST ! stat $M0/l0/file
+TEST stat $B0/${V0}0/$anon_inode_name/$rename_file_gfid
+
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST mv $M0/l1/file-link1 $M0/l1/renamed-file-link1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 1
+TEST ! stat $M0/l1/file-link1
+TEST stat $B0/${V0}1/$anon_inode_name/$rename_file_gfid
+
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST mv $M0/l2/file-link2 $M0/l2/renamed-file-link2
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 2
+TEST ! stat $M0/l2/file-link2
+TEST stat $B0/${V0}2/$anon_inode_name/$rename_file_gfid
+
+#Simulate only anon-inodes present in all bricks
+TEST rm -f $M0/l0/renamed-file $M0/l1/renamed-file-link1 $M0/l2/renamed-file-link2
+
+#Test that shd doesn't cleanup anon-inodes when some bricks are down
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST $CLI volume heal $V0 enable
+$CLI volume heal $V0
+sleep 5 #Allow time for completion of one scan
+TEST stat $B0/${V0}0/$anon_inode_name/$link_gfid
+TEST stat $B0/${V0}0/$anon_inode_name/$rename_dir_gfid
+TEST stat $B0/${V0}0/$anon_inode_name/$dir_gfid
+rename_dir_inum=$(STAT_INO $B0/${V0}0/$anon_inode_name/$rename_dir_gfid)
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 1
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}1
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}2
+
+#Test that rename indeed happened instead of rmdir/mkdir
+renamed_dir_inum=$(STAT_INO $B0/${V0}0/d2/a)
+EXPECT "$rename_dir_inum" echo $renamed_dir_inum
+cleanup;
diff --git a/tests/basic/afr/afr-no-fsync.t b/tests/basic/afr/afr-no-fsync.t
new file mode 100644
index 00000000000..0966d9b0a11
--- /dev/null
+++ b/tests/basic/afr/afr-no-fsync.t
@@ -0,0 +1,20 @@
+#!/bin/bash
+#Tests that sequential write workload doesn't lead to FSYNCs
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/brick{0,1,3}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 performance.flush-behind off
+TEST $CLI volume start $V0
+TEST $CLI volume profile $V0 start
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+TEST dd if=/dev/zero of=$M0/a bs=1M count=500
+TEST ! "$CLI volume profile $V0 info incremental | grep FSYNC"
+
+cleanup;
diff --git a/tests/basic/afr/afr-read-hash-mode.t b/tests/basic/afr/afr-read-hash-mode.t
new file mode 100644
index 00000000000..eeff10d8ebd
--- /dev/null
+++ b/tests/basic/afr/afr-read-hash-mode.t
@@ -0,0 +1,56 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+function reads_brick_count {
+ $CLI volume profile $V0 info incremental | grep -w READ | wc -l
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 arbiter 1 $H0:$B0/${V0}{0..2}
+
+TEST $CLI volume set $V0 cluster.choose-local off
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume start $V0
+
+# Disable all caching
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+TEST dd if=/dev/urandom of=$M0/FILE bs=1M count=8
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+# TEST if the option gives the intended behavior. The way we perform this test
+# is by performing reads from the mount and write to /dev/null. If the
+# read-hash-mode is 3, then for a given file, more than 1 brick should serve the
+# read-fops where as with the default read-hash-mode (i.e. 1), only 1 brick will.
+
+# read-hash-mode=1
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+EXPECT "1" mount_get_option_value $M0 $V0-replicate-0 read-hash-mode
+TEST $CLI volume profile $V0 start
+TEST dd if=$M0/FILE of=/dev/null bs=1M
+count=`reads_brick_count`
+TEST [ $count -eq 1 ]
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+# read-hash-mode=3
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+TEST $CLI volume set $V0 cluster.read-hash-mode 3
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "3" mount_get_option_value $M0 $V0-replicate-0 read-hash-mode
+TEST $CLI volume profile $V0 info clear
+TEST dd if=$M0/FILE of=/dev/null bs=1M
+count=`reads_brick_count`
+TEST [ $count -eq 2 ]
+
+# Check that the arbiter did not serve any reads
+arbiter_reads=$($CLI volume top $V0 read brick $H0:$B0/${V0}2|grep FILE|awk '{print $1}')
+TEST [ -z $arbiter_reads ]
+
+cleanup;
diff --git a/tests/basic/afr/afr-seek.t b/tests/basic/afr/afr-seek.t
new file mode 100644
index 00000000000..c12ee011660
--- /dev/null
+++ b/tests/basic/afr/afr-seek.t
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+SEEK=$(dirname $0)/seek
+build_tester $(dirname $0)/../seek.c -o ${SEEK}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+TEST mkdir -p $B0/${V0}{0..2}
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0..2}
+
+TEST $CLI volume start $V0
+
+TEST $GFS -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+TEST ${SEEK} create ${M0}/test 0 1 1048576 1
+# Determine underlying filesystem allocation block size
+BSIZE="$(($(${SEEK} scan ${M0}/test hole 0) * 2))"
+
+TEST ${SEEK} create ${M0}/test 0 ${BSIZE} $((${BSIZE} * 4 + 512)) ${BSIZE}
+
+EXPECT "^0$" ${SEEK} scan ${M0}/test data 0
+EXPECT "^$((${BSIZE} / 2))$" ${SEEK} scan ${M0}/test data $((${BSIZE} / 2))
+EXPECT "^$((${BSIZE} - 1))$" ${SEEK} scan ${M0}/test data $((${BSIZE} - 1))
+EXPECT "^$((${BSIZE} * 4))$" ${SEEK} scan ${M0}/test data ${BSIZE}
+EXPECT "^$((${BSIZE} * 4))$" ${SEEK} scan ${M0}/test data $((${BSIZE} * 4))
+EXPECT "^$((${BSIZE} * 5))$" ${SEEK} scan ${M0}/test data $((${BSIZE} * 5))
+EXPECT "^$((${BSIZE} * 5 + 511))$" ${SEEK} scan ${M0}/test data $((${BSIZE} * 5 + 511))
+EXPECT "^ENXIO$" ${SEEK} scan ${M0}/test data $((${BSIZE} * 5 + 512))
+EXPECT "^ENXIO$" ${SEEK} scan ${M0}/test data $((${BSIZE} * 6))
+
+EXPECT "^${BSIZE}$" ${SEEK} scan ${M0}/test hole 0
+EXPECT "^${BSIZE}$" ${SEEK} scan ${M0}/test hole $((${BSIZE} / 2))
+EXPECT "^${BSIZE}$" ${SEEK} scan ${M0}/test hole $((${BSIZE} - 1))
+EXPECT "^${BSIZE}$" ${SEEK} scan ${M0}/test hole ${BSIZE}
+EXPECT "^$((${BSIZE} * 5 + 512))$" ${SEEK} scan ${M0}/test hole $((${BSIZE} * 4))
+EXPECT "^$((${BSIZE} * 5 + 512))$" ${SEEK} scan ${M0}/test hole $((${BSIZE} * 5))
+EXPECT "^$((${BSIZE} * 5 + 512))$" ${SEEK} scan ${M0}/test hole $((${BSIZE} * 5 + 511))
+EXPECT "^ENXIO$" ${SEEK} scan ${M0}/test hole $((${BSIZE} * 5 + 512))
+EXPECT "^ENXIO$" ${SEEK} scan ${M0}/test hole $((${BSIZE} * 6))
+
+rm -f ${SEEK}
+cleanup
+
+# Centos6 regression slaves seem to not support SEEK_DATA/SEEK_HOLE
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=000000
diff --git a/tests/basic/afr/afr-up.t b/tests/basic/afr/afr-up.t
new file mode 100644
index 00000000000..428aac875e0
--- /dev/null
+++ b/tests/basic/afr/afr-up.t
@@ -0,0 +1,28 @@
+#!/bin/bash
+#Tests that afr up/down works as expected
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,3,4,5,6}
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+EXPECT "1" afr_up_status $V0 $M0 0
+EXPECT "1" afr_up_status $V0 $M0 1
+
+#kill two bricks in first replica and check that afr_up_status is 0 for it
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_up_status $V0 $M0 0
+EXPECT "1" afr_up_status $V0 $M0 1
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_up_status $V0 $M0 0
+EXPECT "1" afr_up_status $V0 $M0 1
+cleanup;
diff --git a/tests/basic/afr/arbiter-add-brick.t b/tests/basic/afr/arbiter-add-brick.t
new file mode 100644
index 00000000000..77b93d9a210
--- /dev/null
+++ b/tests/basic/afr/arbiter-add-brick.t
@@ -0,0 +1,86 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+#Create replica 2 volume and create file/dir.
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST mkdir $M0/dir1
+TEST dd if=/dev/urandom of=$M0/file1 bs=1024 count=1
+
+#Kill second brick and perform I/O to have pending heals.
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST mkdir $M0/dir2
+TEST dd if=/dev/urandom of=$M0/file1 bs=1024 count=1024
+
+
+#convert replica 2 to arbiter volume
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+#syntax check for add-brick.
+TEST ! $CLI volume add-brick $V0 replica 2 arbiter 1 $H0:$B0/${V0}2
+TEST ! $CLI volume add-brick $V0 replica 3 arbiter 2 $H0:$B0/${V0}2
+
+TEST $CLI volume add-brick $V0 replica 3 arbiter 1 $H0:$B0/${V0}2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+#Trigger name heals from client. If we just rely on index heal, the first index
+#crawl on B0 fails for /, dir2 and /file either due to lock collision or files
+#not being present on the other 2 bricks yet. It is getting healed only in the
+#next crawl after priv->shd.timeout (600 seconds) or by manually launching
+#index heal again.
+TEST $CLI volume set $V0 data-self-heal off
+TEST $CLI volume set $V0 metadata-self-heal off
+TEST $CLI volume set $V0 entry-self-heal off
+TEST stat $M0/dir1
+TEST stat $M0/dir2
+TEST stat $M0/file1
+
+#Heal files
+TEST $CLI volume set $V0 self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+
+#Perform I/O after add-brick
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+TEST mkdir $M0/dir3
+TEST dd if=/dev/urandom of=$M0/file2 bs=1024 count=1024
+
+# File hierarchy must be same in all 3 bricks.
+TEST diff <(ls $B0/${V0}0 | sort) <(ls $B0/${V0}2 | sort)
+TEST diff <(ls $B0/${V0}1 | sort) <(ls $B0/${V0}2 | sort)
+
+#Mount serves the correct file size
+EXPECT "1048576" stat -c %s $M0/file1
+EXPECT "1048576" stat -c %s $M0/file2
+
+#Check file size in arbiter brick
+EXPECT "0" stat -c %s $B0/${V0}2/file1
+EXPECT "0" stat -c %s $B0/${V0}2/file2
+
+#Increasing replica count of arbiter volumes must not be allowed.
+TEST ! $CLI volume add-brick $V0 replica 4 $H0:$B0/${V0}3
+TEST ! $CLI volume add-brick $V0 replica 4 arbiter 1 $H0:$B0/${V0}3
+
+#Adding another distribute leg should succeed.
+TEST $CLI volume add-brick $V0 replica 3 arbiter 1 $H0:$B0/${V0}{3..5}
+TEST force_umount $M0
+cleanup;
diff --git a/tests/basic/afr/arbiter-cli.t b/tests/basic/afr/arbiter-cli.t
new file mode 100644
index 00000000000..ad79de79d02
--- /dev/null
+++ b/tests/basic/afr/arbiter-cli.t
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+cleanup;
+
+# Negative test cases for arbiter volume creation should not crash.
+
+TEST glusterd;
+TEST pidof glusterd
+
+# No replica count.
+TEST ! $CLI volume create $V0 arbiter 3 $H0:$B0/${V0}{0,1,2}
+
+# replica count given after arbiter count.
+TEST ! $CLI volume create $V0 arbiter 1 replica 3 $H0:$B0/${V0}{0,1,2}
+
+# Incorrect values for replica and arbiter count.
+TEST ! $CLI volume create $V0 replica 3 arbiter 2 $H0:$B0/${V0}{0,1,2}
+
+# Correct setup
+# Only documented value is replica=2 and arbiter=1.
+TEST $CLI volume create $V0 replica 2 arbiter 1 $H0:$B0/${V0}{0,1,2}
+
+# Earlier documents mentioned 'replica 3 arbiter 1' as the valid option
+# Preserve backward compatibility till Oct, 2019.
+TEST $CLI volume create ${V0}-old replica 3 arbiter 1 $H0:$B0/${V0}-old{0,1,2}
+
+cleanup
diff --git a/tests/basic/afr/arbiter-mount.t b/tests/basic/afr/arbiter-mount.t
new file mode 100644
index 00000000000..404d334d2f9
--- /dev/null
+++ b/tests/basic/afr/arbiter-mount.t
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+#Check that mounting fails when only arbiter brick is up.
+
+TEST glusterd;
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 3 arbiter 1 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+
+# Doing `mount -t glusterfs $H0:$V0 $M0` fails right away but doesn't work on NetBSD
+# So check that stat <mount> fails instead.
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST ! stat $M0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+mount_nfs $H0:/$V0 $N0
+TEST [ $? -ne 0 ]
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST stat $M0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+mount_nfs $H0:/$V0 $N0
+TEST [ $? -eq 0 ]
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+cleanup
diff --git a/tests/basic/afr/arbiter-remove-brick.t b/tests/basic/afr/arbiter-remove-brick.t
new file mode 100644
index 00000000000..ec93c8758e4
--- /dev/null
+++ b/tests/basic/afr/arbiter-remove-brick.t
@@ -0,0 +1,36 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+#Create arbiter volume.
+TEST $CLI volume create $V0 replica 3 arbiter 1 $H0:$B0/${V0}{0,1,2}
+EXPECT "1 x \(2 \+ 1\) = 3" volinfo_field $V0 "Number of Bricks"
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+#syntax check for remove-brick.
+TEST ! $CLI volume remove-brick $V0 replica 2 $H0:$B0/${V0}0 force
+TEST ! $CLI volume remove-brick $V0 replica 2 $H0:$B0/${V0}1 force
+
+#convert to replica 2 volume
+TEST $CLI volume remove-brick $V0 replica 2 $H0:$B0/${V0}2 force
+EXPECT "1 x 2 = 2" volinfo_field $V0 "Number of Bricks"
+
+TEST mkdir $M0/dir
+TEST dd if=/dev/urandom of=$M0/file bs=1024 count=1024
+TEST diff <(ls $B0/${V0}0 | sort) <(ls $B0/${V0}1 | sort)
+
+#Mount serves the correct file size
+EXPECT "1048576" stat -c %s $M0/file
+
+#Check file size in bricks
+EXPECT "1048576" stat -c %s $B0/${V0}0/file
+EXPECT "1048576" stat -c %s $B0/${V0}1/file
+
+TEST force_umount $M0
+cleanup;
diff --git a/tests/basic/afr/arbiter-statfs.t b/tests/basic/afr/arbiter-statfs.t
new file mode 100644
index 00000000000..61cb9e1d04f
--- /dev/null
+++ b/tests/basic/afr/arbiter-statfs.t
@@ -0,0 +1,41 @@
+#!/bin/bash
+#Test that statfs is not served from the arbiter brick.
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+EXIT_EARLY=1
+TEST glusterd
+
+#Create brick partitions
+TEST truncate -s 1G $B0/brick1
+TEST truncate -s 1G $B0/brick2
+#Arbiter brick is of a lesser size.
+TEST truncate -s 90M $B0/brick3
+LO1=`SETUP_LOOP $B0/brick1`
+TEST [ $? -eq 0 ]
+TEST MKFS_LOOP $LO1
+LO2=`SETUP_LOOP $B0/brick2`
+TEST [ $? -eq 0 ]
+TEST MKFS_LOOP $LO2
+LO3=`SETUP_LOOP $B0/brick3`
+TEST [ $? -eq 0 ]
+TEST MKFS_LOOP $LO3
+TEST mkdir -p $B0/${V0}1 $B0/${V0}2 $B0/${V0}3
+TEST MOUNT_LOOP $LO1 $B0/${V0}1
+TEST MOUNT_LOOP $LO2 $B0/${V0}2
+TEST MOUNT_LOOP $LO3 $B0/${V0}3
+
+TEST $CLI volume create $V0 replica 3 arbiter 1 $H0:$B0/${V0}{1,2,3};
+TEST $CLI volume start $V0
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0
+free_space=$(df -P $M0 | tail -1 | awk '{ print $4}')
+TEST [ $free_space -gt 100000 ]
+TEST force_umount $M0
+TEST $CLI volume stop $V0
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+TEST $CLI volume delete $V0;
+UMOUNT_LOOP ${B0}/${V0}{1,2,3}
+rm -f ${B0}/brick{1,2,3}
+cleanup;
diff --git a/tests/basic/afr/arbiter.t b/tests/basic/afr/arbiter.t
new file mode 100644
index 00000000000..7c92a9fe6c9
--- /dev/null
+++ b/tests/basic/afr/arbiter.t
@@ -0,0 +1,92 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+cleanup;
+
+TEST glusterd;
+TEST pidof glusterd
+
+# Non arbiter replica 3 volumes should not have arbiter-count option enabled.
+TEST mkdir -p $B0/${V0}{0,1,2}
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+TEST ! stat $M0/.meta/graphs/active/$V0-replicate-0/options/arbiter-count
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+# Make sure we clean up *all the way* so we don't get "brick X is already part
+# of a volume" errors.
+cleanup;
+TEST glusterd;
+TEST pidof glusterd
+
+# Create and mount a replica 3 arbiter volume.
+TEST mkdir -p $B0/${V0}{0,1,2}
+TEST $CLI volume create $V0 replica 3 arbiter 1 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+TEST stat $M0/.meta/graphs/active/$V0-replicate-0/options/arbiter-count
+EXPECT "1" cat $M0/.meta/graphs/active/$V0-replicate-0/options/arbiter-count
+
+# Write data and metadata
+TEST `echo hello >> $M0/file`
+TEST setfattr -n user.name -v value1 $M0/file
+
+# Data I/O will fail if arbiter is the only source.
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST `echo "B0 is down, B1 and B2 are sources" >> $M0/file`
+TEST setfattr -n user.name -v value2 $M0/file
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+echo "B2 is down, B3 is the only source, writes will fail" >> $M0/file
+EXPECT_NOT "0" echo $?
+TEST ! cat $M0/file
+# Though metadata IO could have been served from arbiter, we do not allow it
+# anymore as FOPS like getfattr could be overloaded to return iatt buffers for
+# use by other translators.
+TEST ! getfattr -n user.name $M0/file
+TEST ! setfattr -n user.name -v value3 $M0/file
+
+#shd should not data self-heal from arbiter to the sinks.
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+$CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT '1' echo $(count_sh_entries $B0/$V0"1")
+EXPECT_WITHIN $HEAL_TIMEOUT '1' echo $(count_sh_entries $B0/$V0"2")
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST $CLI volume heal $V0
+EXPECT 0 get_pending_heal_count $V0
+
+# I/O can resume again.
+TEST cat $M0/file
+TEST getfattr -n user.name $M0/file
+TEST `echo append>> $M0/file`
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+cleanup
diff --git a/tests/basic/afr/client-side-heal.t b/tests/basic/afr/client-side-heal.t
new file mode 100755
index 00000000000..1e9336184b5
--- /dev/null
+++ b/tests/basic/afr/client-side-heal.t
@@ -0,0 +1,95 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+echo "some data" > $M0/datafile
+EXPECT 0 echo $?
+TEST touch $M0/mdatafile
+TEST touch $M0/mdatafile-backend-direct-modify
+TEST mkdir $M0/dir
+
+#Kill a brick and perform I/O to have pending heals.
+TEST kill_brick $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" afr_child_up_status $V0 0
+
+#pending data heal
+echo "some more data" >> $M0/datafile
+EXPECT 0 echo $?
+
+#pending metadata heal
+TEST chmod +x $M0/mdatafile
+TEST chmod +x $B0/${V0}0/mdatafile-backend-direct-modify
+
+#pending entry heal. Also causes pending metadata/data heals on file{1..5}
+TEST touch $M0/dir/file{1..5}
+
+EXPECT 8 get_pending_heal_count $V0
+
+#After brick comes back up, access from client should not trigger heals
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+#Medatada heal via explicit lookup must not happen
+TEST getfattr -d -m. -e hex $M0/mdatafile
+TEST ls $M0/mdatafile-backend-direct-modify
+
+TEST [[ "$(stat -c %A $B0/${V0}0/mdatafile-backend-direct-modify)" != "$(stat -c %A $B0/${V0}1/mdatafile-backend-direct-modify)" ]]
+
+#Inode refresh must not trigger data metadata and entry heals.
+#To trigger inode refresh for sure, the volume is unmounted and mounted each time.
+#Check that data heal does not happen.
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+TEST cat $M0/datafile
+#Check that entry heal does not happen.
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+TEST ls $M0/dir
+#No heal must have happened
+EXPECT 8 get_pending_heal_count $V0
+
+#Enable heal client side heal options and trigger heals
+TEST $CLI volume set $V0 cluster.data-self-heal on
+TEST $CLI volume set $V0 cluster.metadata-self-heal on
+TEST $CLI volume set $V0 cluster.entry-self-heal on
+
+#Inode refresh must trigger data metadata and entry heals.
+#To trigger inode refresh for sure, the volume is unmounted and mounted each time.
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+TEST ls $M0/mdatafile-backend-direct-modify
+
+TEST [[ "$(stat -c %A $B0/${V0}0/mdatafile-backend-direct-modify)" == "$(stat -c %A $B0/${V0}1/mdatafile-backend-direct-modify)" ]]
+
+
+TEST getfattr -d -m. -e hex $M0/mdatafile
+EXPECT_WITHIN $HEAL_TIMEOUT 7 get_pending_heal_count $V0
+
+TEST cat $M0/datafile
+EXPECT_WITHIN $HEAL_TIMEOUT 6 get_pending_heal_count $V0
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+TEST ls $M0/dir
+EXPECT_WITHIN $HEAL_TIMEOUT 5 get_pending_heal_count $V0
+
+TEST cat $M0/dir/file1
+TEST cat $M0/dir/file2
+TEST cat $M0/dir/file3
+TEST cat $M0/dir/file4
+TEST cat $M0/dir/file5
+
+EXPECT_WITHIN $HEAL_TIMEOUT 0 get_pending_heal_count $V0
+cleanup;
diff --git a/tests/basic/afr/data-self-heal.t b/tests/basic/afr/data-self-heal.t
new file mode 100644
index 00000000000..0f417b4a0ba
--- /dev/null
+++ b/tests/basic/afr/data-self-heal.t
@@ -0,0 +1,209 @@
+#!/bin/bash
+#Self-heal tests
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+cleanup;
+
+function create_xattrop_entry {
+ local xattrop_dir0=$(afr_get_index_path $B0/brick0)
+ local xattrop_dir1=$(afr_get_index_path $B0/brick1)
+ local base_entry_b0=`ls $xattrop_dir0`
+ local base_entry_b1=`ls $xattrop_dir1`
+ local gfid_str
+
+ for file in "$@"
+ do
+ gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/brick0/$file))
+ ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
+ ln $xattrop_dir1/$base_entry_b1 $xattrop_dir1/$gfid_str
+ done
+}
+
+function is_heal_done {
+ local f1_path="${1}/${3}"
+ local f2_path="${2}/${3}"
+ local zero_xattr="000000000000000000000000"
+ local size1=$(stat -c "%s" $f1_path)
+ local size2=$(stat -c "%s" $f2_path)
+ local diff=$((size1-size2))
+ local xattr11=$(get_hex_xattr trusted.afr.$V0-client-0 $f1_path)
+ local xattr12=$(get_hex_xattr trusted.afr.$V0-client-1 $f1_path)
+ local xattr21=$(get_hex_xattr trusted.afr.$V0-client-0 $f2_path)
+ local xattr22=$(get_hex_xattr trusted.afr.$V0-client-1 $f2_path)
+ local dirty1=$(get_hex_xattr trusted.afr.dirty $f1_path)
+ local dirty2=$(get_hex_xattr trusted.afr.dirty $f2_path)
+ if [ -z $xattr11 ]; then xattr11="000000000000000000000000"; fi
+ if [ -z $xattr12 ]; then xattr12="000000000000000000000000"; fi
+ if [ -z $xattr21 ]; then xattr21="000000000000000000000000"; fi
+ if [ -z $xattr22 ]; then xattr22="000000000000000000000000"; fi
+ if [ -z $dirty1 ]; then dirty1="000000000000000000000000"; fi
+ if [ -z $dirty2 ]; then dirty2="000000000000000000000000"; fi
+ if [ "${diff}${xattr11}${xattr12}${xattr21}${xattr22}${dirty1}${dirty2}" == "0${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" ];
+ then
+ echo "Y"
+ else
+ echo "N"
+ fi
+}
+
+function print_pending_heals {
+ local result=":"
+ for i in "$@";
+ do
+ if [ "N" == $(is_heal_done $B0/brick0 $B0/brick1 $i) ];
+ then
+ result="$result:$i"
+ fi
+ done
+#To prevent any match for EXPECT_WITHIN, print a char non-existent in file-names
+ if [ $result == ":" ]; then result="~"; fi
+ echo $result
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1}
+TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.open-behind off
+
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+cd $M0
+TEST touch pending-changelog biggest-file-source.txt biggest-file-more-prio-than-changelog.txt same-size-more-prio-to-changelog.txt size-and-witness-same.txt self-accusing-vs-source.txt self-accusing-both.txt self-accusing-vs-innocent.txt self-accusing-bigger-exists.txt size-more-prio-than-self-accused.txt v1-dirty.txt split-brain.txt split-brain-all-dirty.txt split-brain-with-dirty.txt
+
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000000000000 $B0/brick0/pending-changelog
+TEST "echo abc > $B0/brick1/pending-changelog"
+
+TEST "echo abc > $B0/brick0/biggest-file-source.txt"
+TEST "echo abcd > $B0/brick1/biggest-file-source.txt"
+
+TEST "echo abc > $B0/brick0/biggest-file-more-prio-than-changelog.txt"
+TEST "echo abcd > $B0/brick1/biggest-file-more-prio-than-changelog.txt"
+TEST setfattr -n trusted.afr.dirty -v 0x000000200000000000000000 $B0/brick0/biggest-file-more-prio-than-changelog.txt
+
+TEST "echo abc > $B0/brick0/same-size-more-prio-to-changelog.txt"
+TEST "echo def > $B0/brick1/same-size-more-prio-to-changelog.txt"
+TEST setfattr -n trusted.afr.dirty -v 0x000000200000000000000000 $B0/brick0/same-size-more-prio-to-changelog.txt
+
+TEST "echo abc > $B0/brick0/size-and-witness-same.txt"
+TEST "echo def > $B0/brick1/size-and-witness-same.txt"
+TEST setfattr -n trusted.afr.dirty -v 0x000000200000000000000000 $B0/brick0/size-and-witness-same.txt
+TEST setfattr -n trusted.afr.dirty -v 0x000000200000000000000000 $B0/brick1/size-and-witness-same.txt
+
+TEST "echo abc > $B0/brick0/split-brain.txt"
+TEST "echo def > $B0/brick1/split-brain.txt"
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000200000000000000000 $B0/brick0/split-brain.txt
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000200000000000000000 $B0/brick1/split-brain.txt
+
+TEST "echo abc > $B0/brick0/split-brain-all-dirty.txt"
+TEST "echo def > $B0/brick1/split-brain-all-dirty.txt"
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000200000000000000000 $B0/brick0/split-brain-all-dirty.txt
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000200000000000000000 $B0/brick1/split-brain-all-dirty.txt
+TEST setfattr -n trusted.afr.dirty -v 0x000000200000000000000000 $B0/brick0/split-brain-all-dirty.txt
+TEST setfattr -n trusted.afr.dirty -v 0x000000200000000000000000 $B0/brick1/split-brain-all-dirty.txt
+
+TEST "echo abc > $B0/brick0/split-brain-with-dirty.txt"
+TEST "echo def > $B0/brick1/split-brain-with-dirty.txt"
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000200000000000000000 $B0/brick0/split-brain-with-dirty.txt
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000200000000000000000 $B0/brick1/split-brain-with-dirty.txt
+TEST setfattr -n trusted.afr.dirty -v 0x000000200000000000000000 $B0/brick1/split-brain-with-dirty.txt
+
+TEST "echo def > $B0/brick1/self-accusing-vs-source.txt"
+TEST "echo abc > $B0/brick0/self-accusing-vs-source.txt"
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000200000000000000000 $B0/brick1/self-accusing-vs-source.txt
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000200000000000000000 $B0/brick1/self-accusing-vs-source.txt
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000200000000000000000 $B0/brick0/self-accusing-vs-source.txt
+
+TEST "echo abc > $B0/brick0/self-accusing-both.txt"
+TEST "echo def > $B0/brick1/self-accusing-both.txt"
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000200000000000000000 $B0/brick0/self-accusing-both.txt
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000200000000000000000 $B0/brick0/self-accusing-both.txt
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000200000000000000000 $B0/brick1/self-accusing-both.txt
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000200000000000000000 $B0/brick1/self-accusing-both.txt
+
+TEST "echo abc > $B0/brick0/self-accusing-vs-innocent.txt"
+TEST "echo def > $B0/brick1/self-accusing-vs-innocent.txt"
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000200000000000000000 $B0/brick1/self-accusing-vs-innocent.txt
+
+TEST "echo abc > $B0/brick0/self-accusing-bigger-exists.txt"
+TEST "echo def > $B0/brick1/self-accusing-bigger-exists.txt"
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000200000000000000000 $B0/brick0/self-accusing-bigger-exists.txt
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000300000000000000000 $B0/brick0/self-accusing-bigger-exists.txt
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000200000000000000000 $B0/brick1/self-accusing-bigger-exists.txt
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000200000000000000000 $B0/brick1/self-accusing-bigger-exists.txt
+
+TEST "echo abc > $B0/brick0/size-more-prio-than-self-accused.txt"
+TEST "echo defg > $B0/brick1/size-more-prio-than-self-accused.txt"
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000200000000000000000 $B0/brick0/size-more-prio-than-self-accused.txt
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000300000000000000000 $B0/brick0/size-more-prio-than-self-accused.txt
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000200000000000000000 $B0/brick1/size-more-prio-than-self-accused.txt
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000200000000000000000 $B0/brick1/size-more-prio-than-self-accused.txt
+
+TEST "echo abc > $B0/brick0/v1-dirty.txt"
+TEST "echo def > $B0/brick1/v1-dirty.txt"
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000200000000000000000 $B0/brick0/v1-dirty.txt
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000100000000000000000 $B0/brick1/v1-dirty.txt
+
+#Create base entry in indices/xattrop
+echo "Data" > $M0/FILE
+rm -f $M0/FILE
+EXPECT "1" count_index_entries $B0/brick0
+EXPECT "1" count_index_entries $B0/brick1
+cd -
+
+#Create gfid hard links for all files before triggering index heals.
+create_xattrop_entry pending-changelog biggest-file-source.txt biggest-file-more-prio-than-changelog.txt same-size-more-prio-to-changelog.txt size-and-witness-same.txt self-accusing-vs-source.txt self-accusing-both.txt self-accusing-vs-innocent.txt self-accusing-bigger-exists.txt size-more-prio-than-self-accused.txt v1-dirty.txt
+
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "~" print_pending_heals pending-changelog biggest-file-source.txt biggest-file-more-prio-than-changelog.txt same-size-more-prio-to-changelog.txt size-and-witness-same.txt self-accusing-vs-source.txt self-accusing-both.txt self-accusing-vs-innocent.txt self-accusing-bigger-exists.txt size-more-prio-than-self-accused.txt v1-dirty.txt
+EXPECT "N" is_heal_done $B0/brick0 $B0/brick1 split-brain.txt
+EXPECT "N" is_heal_done $B0/brick0 $B0/brick1 split-brain-all-dirty.txt
+EXPECT "N" is_heal_done $B0/brick0 $B0/brick1 split-brain-with-dirty.txt
+
+EXPECT "0" stat -c "%s" $M0/pending-changelog
+TEST cmp $B0/brick0/pending-changelog $B0/brick1/pending-changelog
+
+EXPECT "abcd" cat $M0/biggest-file-source.txt
+TEST cmp $B0/brick0/biggest-file-source.txt $B0/brick1/biggest-file-source.txt
+
+EXPECT "abcd" cat $M0/biggest-file-more-prio-than-changelog.txt
+TEST cmp $B0/brick0/biggest-file-more-prio-than-changelog.txt $B0/brick1/biggest-file-more-prio-than-changelog.txt
+
+EXPECT "abc" cat $M0/same-size-more-prio-to-changelog.txt
+TEST cmp $B0/brick0/same-size-more-prio-to-changelog.txt $B0/brick1/same-size-more-prio-to-changelog.txt
+
+EXPECT "(abc|def)" cat $M0/size-and-witness-same.txt
+TEST cmp $B0/brick0/size-and-witness-same.txt $B0/brick1/size-and-witness-same.txt
+
+TEST ! cat $M0/split-brain.txt
+TEST ! cat $M0/split-brain-all-dirty.txt
+TEST ! cat $M0/split-brain-with-dirty.txt
+
+EXPECT "abc" cat $M0/self-accusing-vs-source.txt
+TEST cmp $B0/brick0/self-accusing-vs-source.txt $B0/brick1/self-accusing-vs-source.txt
+
+EXPECT "(abc|def)" cat $M0/self-accusing-both.txt
+TEST cmp $B0/brick0/self-accusing-both.txt $B0/brick1/self-accusing-both.txt
+
+EXPECT "def" cat $M0/self-accusing-vs-innocent.txt
+TEST cmp $B0/brick0/self-accusing-vs-innocent.txt $B0/brick1/self-accusing-vs-innocent.txt
+
+EXPECT "abc" cat $M0/self-accusing-bigger-exists.txt
+TEST cmp $B0/brick0/self-accusing-bigger-exists.txt $B0/brick1/self-accusing-bigger-exists.txt
+
+EXPECT "defg" cat $M0/size-more-prio-than-self-accused.txt
+TEST cmp $B0/brick0/size-more-prio-than-self-accused.txt $B0/brick1/size-more-prio-than-self-accused.txt
+
+EXPECT "abc" cat $M0/v1-dirty.txt
+TEST cmp $B0/brick0/v1-dirty.txt $B0/brick1/v1-dirty.txt
+cleanup;
diff --git a/tests/basic/afr/durability-off.t b/tests/basic/afr/durability-off.t
new file mode 100644
index 00000000000..6e0f18b88f8
--- /dev/null
+++ b/tests/basic/afr/durability-off.t
@@ -0,0 +1,46 @@
+#!/bin/bash
+#This test tests that self-heals don't perform fsync when durability is turned
+#off
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1}
+TEST $CLI volume start $V0
+TEST $CLI volume profile $V0 start
+TEST $CLI volume set $V0 cluster.ensure-durability off
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+TEST kill_brick $V0 $H0 $B0/brick0
+TEST dd of=$M0/a.txt if=/dev/zero bs=1024k count=1
+#NetBSD sends FSYNC so the counts go for a toss. Stop and start the volume.
+TEST $CLI volume stop $V0
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+EXPECT "^0$" echo $($CLI volume profile $V0 info | grep -w FSYNC | wc -l)
+
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+#Test that fsyncs happen when durability is on
+TEST $CLI volume set $V0 cluster.ensure-durability on
+TEST $CLI volume set $V0 performance.strict-write-ordering on
+TEST kill_brick $V0 $H0 $B0/brick0
+TEST dd of=$M0/a.txt if=/dev/zero bs=1024k count=1
+#NetBSD sends FSYNC so the counts go for a toss. Stop and start the volume.
+TEST $CLI volume stop $V0
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+EXPECT "^2$" echo $($CLI volume profile $V0 info | grep -w FSYNC | wc -l)
+
+cleanup;
diff --git a/tests/basic/afr/entry-self-heal-anon-dir-off.t b/tests/basic/afr/entry-self-heal-anon-dir-off.t
new file mode 100644
index 00000000000..7bb6ee14193
--- /dev/null
+++ b/tests/basic/afr/entry-self-heal-anon-dir-off.t
@@ -0,0 +1,459 @@
+#!/bin/bash
+
+#This file checks if missing entry self-heal and entry self-heal are working
+#as expected.
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+function get_file_type {
+ stat -c "%a:%F:%g:%t:%T:%u" $1
+}
+
+function diff_dirs {
+ diff <(ls $1 | sort) <(ls $2 | sort)
+}
+
+function heal_status {
+ local f1_path="${1}/${3}"
+ local f2_path="${2}/${3}"
+ local insync=""
+ diff_dirs $f1_path $f2_path
+ if [ $? -eq 0 ];
+ then
+ insync="Y"
+ else
+ insync="N"
+ fi
+ local xattr11=$(get_hex_xattr trusted.afr.$V0-client-0 $f1_path)
+ local xattr12=$(get_hex_xattr trusted.afr.$V0-client-1 $f1_path)
+ local xattr21=$(get_hex_xattr trusted.afr.$V0-client-0 $f2_path)
+ local xattr22=$(get_hex_xattr trusted.afr.$V0-client-1 $f2_path)
+ local dirty1=$(get_hex_xattr trusted.afr.dirty $f1_path)
+ local dirty2=$(get_hex_xattr trusted.afr.dirty $f2_path)
+ if [ -z $xattr11 ]; then xattr11="000000000000000000000000"; fi
+ if [ -z $xattr12 ]; then xattr12="000000000000000000000000"; fi
+ if [ -z $xattr21 ]; then xattr21="000000000000000000000000"; fi
+ if [ -z $xattr22 ]; then xattr22="000000000000000000000000"; fi
+ if [ -z $dirty1 ]; then dirty1="000000000000000000000000"; fi
+ if [ -z $dirty2 ]; then dirty2="000000000000000000000000"; fi
+ echo ${insync}${xattr11}${xattr12}${xattr21}${xattr22}${dirty1}${dirty2}
+}
+
+function is_heal_done {
+ local zero_xattr="000000000000000000000000"
+ if [ "$(heal_status $@)" == "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" ];
+ then
+ echo "Y"
+ else
+ echo "N"
+ fi
+}
+
+function print_pending_heals {
+ local result=":"
+ for i in "$@";
+ do
+ if [ "N" == $(is_heal_done $B0/${V0}0 $B0/${V0}1 $i) ];
+ then
+ result="$result:$i"
+ fi
+ done
+#To prevent any match for EXPECT_WITHIN, print a char non-existent in file-names
+ if [ $result == ":" ]; then result="~"; fi
+ echo $result
+}
+
+zero_xattr="000000000000000000000000"
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume heal $V0 disable
+TEST $CLI volume set $V0 cluster.use-anonymous-inode off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 performance.readdir-ahead off
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 cluster.data-self-heal on
+TEST $CLI volume set $V0 cluster.metadata-self-heal on
+TEST $CLI volume set $V0 cluster.entry-self-heal on
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --use-readdirp=no $M0
+cd $M0
+#_me_ is dir on which missing entry self-heal happens, _heal is where dir self-heal happens
+#spb is split-brain, fool is all fool
+
+#source_self_accusing means there exists source and a sink which self-accuses.
+#This simulates failures where fops failed on the bricks without it going down.
+#Something like EACCESS/EDQUOT etc
+
+TEST mkdir spb_heal spb spb_me_heal spb_me fool_heal fool_me v1_fool_heal v1_fool_me source_creations_heal source_deletions_heal source_creations_me source_deletions_me v1_dirty_me v1_dirty_heal source_self_accusing
+TEST mkfifo source_deletions_heal/fifo
+TEST mknod source_deletions_heal/block b 4 5
+TEST mknod source_deletions_heal/char c 1 5
+TEST touch source_deletions_heal/file
+TEST ln -s source_deletions_heal/file source_deletions_heal/slink
+TEST mkdir source_deletions_heal/dir1
+TEST mkdir source_deletions_heal/dir1/dir2
+
+TEST mkfifo source_deletions_me/fifo
+TEST mknod source_deletions_me/block b 4 5
+TEST mknod source_deletions_me/char c 1 5
+TEST touch source_deletions_me/file
+TEST ln -s source_deletions_me/file source_deletions_me/slink
+TEST mkdir source_deletions_me/dir1
+TEST mkdir source_deletions_me/dir1/dir2
+
+TEST mkfifo source_self_accusing/fifo
+TEST mknod source_self_accusing/block b 4 5
+TEST mknod source_self_accusing/char c 1 5
+TEST touch source_self_accusing/file
+TEST ln -s source_self_accusing/file source_self_accusing/slink
+TEST mkdir source_self_accusing/dir1
+TEST mkdir source_self_accusing/dir1/dir2
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+
+TEST touch spb_heal/0 spb/0 spb_me_heal/0 spb_me/0 fool_heal/0 fool_me/0 v1_fool_heal/0 v1_fool_me/0 v1_dirty_heal/0 v1_dirty_me/0
+TEST rm -rf source_deletions_heal/fifo source_deletions_heal/block source_deletions_heal/char source_deletions_heal/file source_deletions_heal/slink source_deletions_heal/dir1
+TEST rm -rf source_deletions_me/fifo source_deletions_me/block source_deletions_me/char source_deletions_me/file source_deletions_me/slink source_deletions_me/dir1
+TEST rm -rf source_self_accusing/fifo source_self_accusing/block source_self_accusing/char source_self_accusing/file source_self_accusing/slink source_self_accusing/dir1
+
+#Test that the files are deleted
+TEST ! stat $B0/${V0}1/source_deletions_heal/fifo
+TEST ! stat $B0/${V0}1/source_deletions_heal/block
+TEST ! stat $B0/${V0}1/source_deletions_heal/char
+TEST ! stat $B0/${V0}1/source_deletions_heal/file
+TEST ! stat $B0/${V0}1/source_deletions_heal/slink
+TEST ! stat $B0/${V0}1/source_deletions_heal/dir1
+TEST ! stat $B0/${V0}1/source_deletions_me/fifo
+TEST ! stat $B0/${V0}1/source_deletions_me/block
+TEST ! stat $B0/${V0}1/source_deletions_me/char
+TEST ! stat $B0/${V0}1/source_deletions_me/file
+TEST ! stat $B0/${V0}1/source_deletions_me/slink
+TEST ! stat $B0/${V0}1/source_deletions_me/dir1
+TEST ! stat $B0/${V0}1/source_self_accusing/fifo
+TEST ! stat $B0/${V0}1/source_self_accusing/block
+TEST ! stat $B0/${V0}1/source_self_accusing/char
+TEST ! stat $B0/${V0}1/source_self_accusing/file
+TEST ! stat $B0/${V0}1/source_self_accusing/slink
+TEST ! stat $B0/${V0}1/source_self_accusing/dir1
+
+
+TEST mkfifo source_creations_heal/fifo
+TEST mknod source_creations_heal/block b 4 5
+TEST mknod source_creations_heal/char c 1 5
+TEST touch source_creations_heal/file
+TEST ln -s source_creations_heal/file source_creations_heal/slink
+TEST mkdir source_creations_heal/dir1
+TEST mkdir source_creations_heal/dir1/dir2
+
+TEST mkfifo source_creations_me/fifo
+TEST mknod source_creations_me/block b 4 5
+TEST mknod source_creations_me/char c 1 5
+TEST touch source_creations_me/file
+TEST ln -s source_creations_me/file source_creations_me/slink
+TEST mkdir source_creations_me/dir1
+TEST mkdir source_creations_me/dir1/dir2
+
+$CLI volume stop $V0
+
+#simulate fool fool scenario for fool_* dirs
+setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1/{fool_heal,fool_me}
+setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}1/{fool_heal,fool_me}
+setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}1/{v1_fool_heal,v1_fool_me}
+
+#Simulate v1-dirty(self-accusing but no pending ops on others) scenario for v1-dirty
+setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1/v1_dirty_{heal,me}
+setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}1/v1_dirty_{heal,me}
+
+$CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST kill_brick $V0 $H0 $B0/${V0}1
+
+TEST touch spb_heal/1 spb/0 spb_me_heal/1 spb_me/0 fool_heal/1 fool_me/1 v1_fool_heal/1 v1_fool_me/1
+
+$CLI volume stop $V0
+
+#simulate fool fool scenario for fool_* dirs
+setfattr -x trusted.afr.$V0-client-1 $B0/${V0}0/{fool_heal,fool_me}
+setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}1/{fool_heal,fool_me}
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}1/{v1_fool_heal,v1_fool_me}
+
+#simulate self-accusing for source_self_accusing
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000006 $B0/${V0}0/source_self_accusing
+
+$CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+# Check if conservative merges happened correctly on _me_ dirs
+TEST stat spb_me_heal/1
+TEST stat $B0/${V0}0/spb_me_heal/1
+TEST stat $B0/${V0}1/spb_me_heal/1
+
+TEST stat spb_me_heal/0
+TEST stat $B0/${V0}0/spb_me_heal/0
+TEST stat $B0/${V0}1/spb_me_heal/0
+
+TEST stat fool_me/1
+TEST stat $B0/${V0}0/fool_me/1
+TEST stat $B0/${V0}1/fool_me/1
+
+TEST stat fool_me/0
+TEST stat $B0/${V0}0/fool_me/0
+TEST stat $B0/${V0}1/fool_me/0
+
+TEST stat v1_fool_me/0
+TEST stat $B0/${V0}0/v1_fool_me/0
+TEST stat $B0/${V0}1/v1_fool_me/0
+
+TEST stat v1_fool_me/1
+TEST stat $B0/${V0}0/v1_fool_me/1
+TEST stat $B0/${V0}1/v1_fool_me/1
+
+TEST stat v1_dirty_me/0
+TEST stat $B0/${V0}0/v1_dirty_me/0
+TEST stat $B0/${V0}1/v1_dirty_me/0
+
+#Check if files that have gfid-mismatches in _me_ are giving EIO
+TEST ! stat spb_me/0
+
+#Check if stale files are deleted on access
+TEST ! stat source_deletions_me/fifo
+TEST ! stat $B0/${V0}0/source_deletions_me/fifo
+TEST ! stat $B0/${V0}1/source_deletions_me/fifo
+TEST ! stat source_deletions_me/block
+TEST ! stat $B0/${V0}0/source_deletions_me/block
+TEST ! stat $B0/${V0}1/source_deletions_me/block
+TEST ! stat source_deletions_me/char
+TEST ! stat $B0/${V0}0/source_deletions_me/char
+TEST ! stat $B0/${V0}1/source_deletions_me/char
+TEST ! stat source_deletions_me/file
+TEST ! stat $B0/${V0}0/source_deletions_me/file
+TEST ! stat $B0/${V0}1/source_deletions_me/file
+TEST ! stat source_deletions_me/file
+TEST ! stat $B0/${V0}0/source_deletions_me/file
+TEST ! stat $B0/${V0}1/source_deletions_me/file
+TEST ! stat source_deletions_me/dir1/dir2
+TEST ! stat $B0/${V0}0/source_deletions_me/dir1/dir2
+TEST ! stat $B0/${V0}1/source_deletions_me/dir1/dir2
+TEST ! stat source_deletions_me/dir1
+TEST ! stat $B0/${V0}0/source_deletions_me/dir1
+TEST ! stat $B0/${V0}1/source_deletions_me/dir1
+
+#Test if the files created as part of access are healed correctly
+r=$(get_file_type source_creations_me/fifo)
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/fifo
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/fifo
+TEST [ -p source_creations_me/fifo ]
+
+r=$(get_file_type source_creations_me/block)
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/block
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/block
+EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}1/source_creations_me/block
+EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}0/source_creations_me/block
+TEST [ -b source_creations_me/block ]
+
+r=$(get_file_type source_creations_me/char)
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/char
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/char
+EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}1/source_creations_me/char
+EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}0/source_creations_me/char
+TEST [ -c source_creations_me/char ]
+
+r=$(get_file_type source_creations_me/file)
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/file
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/file
+TEST [ -f source_creations_me/file ]
+
+r=$(get_file_type source_creations_me/slink)
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/slink
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/slink
+TEST [ -h source_creations_me/slink ]
+
+r=$(get_file_type source_creations_me/dir1/dir2)
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/dir1/dir2
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/dir1/dir2
+TEST [ -d source_creations_me/dir1/dir2 ]
+
+r=$(get_file_type source_creations_me/dir1)
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/dir1
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/dir1
+TEST [ -d source_creations_me/dir1 ]
+
+#Trigger heal and check _heal dirs are healed properly
+#Trigger change in event generation number. That way inodes would get refreshed during lookup
+TEST kill_brick $V0 $H0 $B0/${V0}1
+$CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+TEST stat spb_heal
+TEST stat spb_me_heal
+TEST stat fool_heal
+TEST stat fool_me
+TEST stat v1_fool_heal
+TEST stat v1_fool_me
+TEST stat source_deletions_heal
+TEST stat source_deletions_me
+TEST stat source_self_accusing
+TEST stat source_creations_heal
+TEST stat source_creations_me
+TEST stat v1_dirty_heal
+TEST stat v1_dirty_me
+TEST $CLI volume stop $V0
+TEST rm -rf $B0/${V0}{0,1}/.glusterfs/indices/xattrop/*
+
+$CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+#Create base entry in indices/xattrop
+echo "Data" > $M0/FILE
+rm -f $M0/FILE
+EXPECT "1" count_index_entries $B0/${V0}0
+EXPECT "1" count_index_entries $B0/${V0}1
+
+TEST $CLI volume stop $V0;
+
+#Create entries for fool_heal and fool_me to ensure they are fully healed and dirty xattrs erased, before triggering index heal
+create_brick_xattrop_entry $B0/${V0}0 fool_heal fool_me source_creations_heal/dir1
+
+$CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+$CLI volume heal $V0 enable
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+
+TEST $CLI volume heal $V0;
+EXPECT_WITHIN $HEAL_TIMEOUT "~" print_pending_heals spb_heal spb_me_heal fool_heal fool_me v1_fool_heal v1_fool_me source_deletions_heal source_deletions_me source_creations_heal source_creations_me v1_dirty_heal v1_dirty_me source_self_accusing
+
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 spb_heal
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 spb_me_heal
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 fool_heal
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 fool_me
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_fool_heal
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_fool_me
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_deletions_heal
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_deletions_me
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_self_accusing
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_creations_heal
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_creations_me
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_dirty_heal
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_dirty_me
+
+#Don't access the files/dirs from mount point as that may cause self-heals
+# Check if conservative merges happened correctly on heal dirs
+TEST stat $B0/${V0}0/spb_heal/1
+TEST stat $B0/${V0}1/spb_heal/1
+
+TEST stat $B0/${V0}0/spb_heal/0
+TEST stat $B0/${V0}1/spb_heal/0
+
+TEST stat $B0/${V0}0/fool_heal/1
+TEST stat $B0/${V0}1/fool_heal/1
+
+TEST stat $B0/${V0}0/fool_heal/0
+TEST stat $B0/${V0}1/fool_heal/0
+
+TEST stat $B0/${V0}0/v1_fool_heal/0
+TEST stat $B0/${V0}1/v1_fool_heal/0
+
+TEST stat $B0/${V0}0/v1_fool_heal/1
+TEST stat $B0/${V0}1/v1_fool_heal/1
+
+TEST stat $B0/${V0}0/v1_dirty_heal/0
+TEST stat $B0/${V0}1/v1_dirty_heal/0
+
+#Check if files that have gfid-mismatches in spb are giving EIO
+TEST ! stat spb/0
+
+#Check if stale files are deleted on access
+TEST ! stat $B0/${V0}0/source_deletions_heal/fifo
+TEST ! stat $B0/${V0}1/source_deletions_heal/fifo
+TEST ! stat $B0/${V0}0/source_deletions_heal/block
+TEST ! stat $B0/${V0}1/source_deletions_heal/block
+TEST ! stat $B0/${V0}0/source_deletions_heal/char
+TEST ! stat $B0/${V0}1/source_deletions_heal/char
+TEST ! stat $B0/${V0}0/source_deletions_heal/file
+TEST ! stat $B0/${V0}1/source_deletions_heal/file
+TEST ! stat $B0/${V0}0/source_deletions_heal/file
+TEST ! stat $B0/${V0}1/source_deletions_heal/file
+TEST ! stat $B0/${V0}0/source_deletions_heal/dir1/dir2
+TEST ! stat $B0/${V0}1/source_deletions_heal/dir1/dir2
+TEST ! stat $B0/${V0}0/source_deletions_heal/dir1
+TEST ! stat $B0/${V0}1/source_deletions_heal/dir1
+
+#Check if stale files are deleted on access
+TEST ! stat $B0/${V0}0/source_self_accusing/fifo
+TEST ! stat $B0/${V0}1/source_self_accusing/fifo
+TEST ! stat $B0/${V0}0/source_self_accusing/block
+TEST ! stat $B0/${V0}1/source_self_accusing/block
+TEST ! stat $B0/${V0}0/source_self_accusing/char
+TEST ! stat $B0/${V0}1/source_self_accusing/char
+TEST ! stat $B0/${V0}0/source_self_accusing/file
+TEST ! stat $B0/${V0}1/source_self_accusing/file
+TEST ! stat $B0/${V0}0/source_self_accusing/file
+TEST ! stat $B0/${V0}1/source_self_accusing/file
+TEST ! stat $B0/${V0}0/source_self_accusing/dir1/dir2
+TEST ! stat $B0/${V0}1/source_self_accusing/dir1/dir2
+TEST ! stat $B0/${V0}0/source_self_accusing/dir1
+TEST ! stat $B0/${V0}1/source_self_accusing/dir1
+
+#Test if the files created as part of full self-heal correctly
+r=$(get_file_type $B0/${V0}0/source_creations_heal/fifo)
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/fifo
+TEST [ -p $B0/${V0}0/source_creations_heal/fifo ]
+EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}1/source_creations_heal/block
+EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}0/source_creations_heal/block
+
+r=$(get_file_type $B0/${V0}0/source_creations_heal/block)
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/block
+
+r=$(get_file_type $B0/${V0}0/source_creations_heal/char)
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/char
+EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}1/source_creations_heal/char
+EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}0/source_creations_heal/char
+
+r=$(get_file_type $B0/${V0}0/source_creations_heal/file)
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/file
+TEST [ -f $B0/${V0}0/source_creations_heal/file ]
+
+r=$(get_file_type source_creations_heal/file $B0/${V0}0/slink)
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/file slink
+TEST [ -h $B0/${V0}0/source_creations_heal/slink ]
+
+r=$(get_file_type $B0/${V0}0/source_creations_heal/dir1/dir2)
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/dir1/dir2
+TEST [ -d $B0/${V0}0/source_creations_heal/dir1/dir2 ]
+
+r=$(get_file_type $B0/${V0}0/source_creations_heal/dir1)
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/dir1
+TEST [ -d $B0/${V0}0/source_creations_heal/dir1 ]
+
+cd -
+
+#Anonymous directory shouldn't be created
+TEST mkdir $M0/rename-dir
+before_rename=$(STAT_INO $B0/${V0}1/rename-dir)
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST mv $M0/rename-dir $M0/new-name
+TEST $CLI volume start $V0 force
+#'spb' is in split-brain so pending-heal-count will be 2
+EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0
+after_rename=$(STAT_INO $B0/${V0}1/new-name)
+EXPECT "0" echo $(ls -a $B0/${V0}0/ | grep anonymous-inode | wc -l)
+EXPECT "0" echo $(ls -a $B0/${V0}1/ | grep anonymous-inode | wc -l)
+EXPECT_NOT "$before_rename" echo $after_rename
+cleanup
diff --git a/tests/basic/afr/entry-self-heal.t b/tests/basic/afr/entry-self-heal.t
new file mode 100644
index 00000000000..0c1da7d211e
--- /dev/null
+++ b/tests/basic/afr/entry-self-heal.t
@@ -0,0 +1,447 @@
+#!/bin/bash
+
+#This file checks if missing entry self-heal and entry self-heal are working
+#as expected.
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+function get_file_type {
+ stat -c "%a:%F:%g:%t:%T:%u" $1
+}
+
+function diff_dirs {
+ diff <(ls $1 | sort) <(ls $2 | sort)
+}
+
+function heal_status {
+ local f1_path="${1}/${3}"
+ local f2_path="${2}/${3}"
+ local zero_xattr="000000000000000000000000"
+ local insync=""
+ diff_dirs $f1_path $f2_path
+ if [ $? -eq 0 ];
+ then
+ insync="Y"
+ else
+ insync="N"
+ fi
+ local xattr11=$(get_hex_xattr trusted.afr.$V0-client-0 $f1_path)
+ local xattr12=$(get_hex_xattr trusted.afr.$V0-client-1 $f1_path)
+ local xattr21=$(get_hex_xattr trusted.afr.$V0-client-0 $f2_path)
+ local xattr22=$(get_hex_xattr trusted.afr.$V0-client-1 $f2_path)
+ local dirty1=$(get_hex_xattr trusted.afr.dirty $f1_path)
+ local dirty2=$(get_hex_xattr trusted.afr.dirty $f2_path)
+ if [ -z $xattr11 ]; then xattr11="000000000000000000000000"; fi
+ if [ -z $xattr12 ]; then xattr12="000000000000000000000000"; fi
+ if [ -z $xattr21 ]; then xattr21="000000000000000000000000"; fi
+ if [ -z $xattr22 ]; then xattr22="000000000000000000000000"; fi
+ if [ -z $dirty1 ]; then dirty1="000000000000000000000000"; fi
+ if [ -z $dirty2 ]; then dirty2="000000000000000000000000"; fi
+ echo ${insync}${xattr11}${xattr12}${xattr21}${xattr22}${dirty1}${dirty2}
+}
+
+function is_heal_done {
+ local zero_xattr="000000000000000000000000"
+ if [ "$(heal_status $@)" == "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" ];
+ then
+ echo "Y"
+ else
+ echo "N"
+ fi
+}
+
+function print_pending_heals {
+ local result=":"
+ for i in "$@";
+ do
+ if [ "N" == $(is_heal_done $B0/${V0}0 $B0/${V0}1 $i) ];
+ then
+ result="$result:$i"
+ fi
+ done
+#To prevent any match for EXPECT_WITHIN, print a char non-existent in file-names
+ if [ $result == ":" ]; then result="~"; fi
+ echo $result
+}
+
+zero_xattr="000000000000000000000000"
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 performance.readdir-ahead off
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 cluster.data-self-heal on
+TEST $CLI volume set $V0 cluster.metadata-self-heal on
+TEST $CLI volume set $V0 cluster.entry-self-heal on
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --use-readdirp=no $M0
+cd $M0
+#_me_ is dir on which missing entry self-heal happens, _heal is where dir self-heal happens
+#spb is split-brain, fool is all fool
+
+#source_self_accusing means there exists source and a sink which self-accuses.
+#This simulates failures where fops failed on the bricks without it going down.
+#Something like EACCESS/EDQUOT etc
+
+TEST mkdir spb_heal spb spb_me_heal spb_me fool_heal fool_me v1_fool_heal v1_fool_me source_creations_heal source_deletions_heal source_creations_me source_deletions_me v1_dirty_me v1_dirty_heal source_self_accusing
+TEST mkfifo source_deletions_heal/fifo
+TEST mknod source_deletions_heal/block b 4 5
+TEST mknod source_deletions_heal/char c 1 5
+TEST touch source_deletions_heal/file
+TEST ln -s source_deletions_heal/file source_deletions_heal/slink
+TEST mkdir source_deletions_heal/dir1
+TEST mkdir source_deletions_heal/dir1/dir2
+
+TEST mkfifo source_deletions_me/fifo
+TEST mknod source_deletions_me/block b 4 5
+TEST mknod source_deletions_me/char c 1 5
+TEST touch source_deletions_me/file
+TEST ln -s source_deletions_me/file source_deletions_me/slink
+TEST mkdir source_deletions_me/dir1
+TEST mkdir source_deletions_me/dir1/dir2
+
+TEST mkfifo source_self_accusing/fifo
+TEST mknod source_self_accusing/block b 4 5
+TEST mknod source_self_accusing/char c 1 5
+TEST touch source_self_accusing/file
+TEST ln -s source_self_accusing/file source_self_accusing/slink
+TEST mkdir source_self_accusing/dir1
+TEST mkdir source_self_accusing/dir1/dir2
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+
+TEST touch spb_heal/0 spb/0 spb_me_heal/0 spb_me/0 fool_heal/0 fool_me/0 v1_fool_heal/0 v1_fool_me/0 v1_dirty_heal/0 v1_dirty_me/0
+TEST rm -rf source_deletions_heal/fifo source_deletions_heal/block source_deletions_heal/char source_deletions_heal/file source_deletions_heal/slink source_deletions_heal/dir1
+TEST rm -rf source_deletions_me/fifo source_deletions_me/block source_deletions_me/char source_deletions_me/file source_deletions_me/slink source_deletions_me/dir1
+TEST rm -rf source_self_accusing/fifo source_self_accusing/block source_self_accusing/char source_self_accusing/file source_self_accusing/slink source_self_accusing/dir1
+
+#Test that the files are deleted
+TEST ! stat $B0/${V0}1/source_deletions_heal/fifo
+TEST ! stat $B0/${V0}1/source_deletions_heal/block
+TEST ! stat $B0/${V0}1/source_deletions_heal/char
+TEST ! stat $B0/${V0}1/source_deletions_heal/file
+TEST ! stat $B0/${V0}1/source_deletions_heal/slink
+TEST ! stat $B0/${V0}1/source_deletions_heal/dir1
+TEST ! stat $B0/${V0}1/source_deletions_me/fifo
+TEST ! stat $B0/${V0}1/source_deletions_me/block
+TEST ! stat $B0/${V0}1/source_deletions_me/char
+TEST ! stat $B0/${V0}1/source_deletions_me/file
+TEST ! stat $B0/${V0}1/source_deletions_me/slink
+TEST ! stat $B0/${V0}1/source_deletions_me/dir1
+TEST ! stat $B0/${V0}1/source_self_accusing/fifo
+TEST ! stat $B0/${V0}1/source_self_accusing/block
+TEST ! stat $B0/${V0}1/source_self_accusing/char
+TEST ! stat $B0/${V0}1/source_self_accusing/file
+TEST ! stat $B0/${V0}1/source_self_accusing/slink
+TEST ! stat $B0/${V0}1/source_self_accusing/dir1
+
+
+TEST mkfifo source_creations_heal/fifo
+TEST mknod source_creations_heal/block b 4 5
+TEST mknod source_creations_heal/char c 1 5
+TEST touch source_creations_heal/file
+TEST ln -s source_creations_heal/file source_creations_heal/slink
+TEST mkdir source_creations_heal/dir1
+TEST mkdir source_creations_heal/dir1/dir2
+
+TEST mkfifo source_creations_me/fifo
+TEST mknod source_creations_me/block b 4 5
+TEST mknod source_creations_me/char c 1 5
+TEST touch source_creations_me/file
+TEST ln -s source_creations_me/file source_creations_me/slink
+TEST mkdir source_creations_me/dir1
+TEST mkdir source_creations_me/dir1/dir2
+
+$CLI volume stop $V0
+
+#simulate fool fool scenario for fool_* dirs
+setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1/{fool_heal,fool_me}
+setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}1/{fool_heal,fool_me}
+setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}1/{v1_fool_heal,v1_fool_me}
+
+#Simulate v1-dirty(self-accusing but no pending ops on others) scenario for v1-dirty
+setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1/v1_dirty_{heal,me}
+setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}1/v1_dirty_{heal,me}
+
+$CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST kill_brick $V0 $H0 $B0/${V0}1
+
+TEST touch spb_heal/1 spb/0 spb_me_heal/1 spb_me/0 fool_heal/1 fool_me/1 v1_fool_heal/1 v1_fool_me/1
+
+$CLI volume stop $V0
+
+#simulate fool fool scenario for fool_* dirs
+setfattr -x trusted.afr.$V0-client-1 $B0/${V0}0/{fool_heal,fool_me}
+setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}1/{fool_heal,fool_me}
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}1/{v1_fool_heal,v1_fool_me}
+
+#simulate self-accusing for source_self_accusing
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000006 $B0/${V0}0/source_self_accusing
+
+$CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+# Check if conservative merges happened correctly on _me_ dirs
+TEST stat spb_me_heal/1
+TEST stat $B0/${V0}0/spb_me_heal/1
+TEST stat $B0/${V0}1/spb_me_heal/1
+
+TEST stat spb_me_heal/0
+TEST stat $B0/${V0}0/spb_me_heal/0
+TEST stat $B0/${V0}1/spb_me_heal/0
+
+TEST stat fool_me/1
+TEST stat $B0/${V0}0/fool_me/1
+TEST stat $B0/${V0}1/fool_me/1
+
+TEST stat fool_me/0
+TEST stat $B0/${V0}0/fool_me/0
+TEST stat $B0/${V0}1/fool_me/0
+
+TEST stat v1_fool_me/0
+TEST stat $B0/${V0}0/v1_fool_me/0
+TEST stat $B0/${V0}1/v1_fool_me/0
+
+TEST stat v1_fool_me/1
+TEST stat $B0/${V0}0/v1_fool_me/1
+TEST stat $B0/${V0}1/v1_fool_me/1
+
+TEST stat v1_dirty_me/0
+TEST stat $B0/${V0}0/v1_dirty_me/0
+TEST stat $B0/${V0}1/v1_dirty_me/0
+
+#Check if files that have gfid-mismatches in _me_ are giving EIO
+TEST ! stat spb_me/0
+
+#Check if stale files are deleted on access
+TEST ! stat source_deletions_me/fifo
+TEST ! stat $B0/${V0}0/source_deletions_me/fifo
+TEST ! stat $B0/${V0}1/source_deletions_me/fifo
+TEST ! stat source_deletions_me/block
+TEST ! stat $B0/${V0}0/source_deletions_me/block
+TEST ! stat $B0/${V0}1/source_deletions_me/block
+TEST ! stat source_deletions_me/char
+TEST ! stat $B0/${V0}0/source_deletions_me/char
+TEST ! stat $B0/${V0}1/source_deletions_me/char
+TEST ! stat source_deletions_me/file
+TEST ! stat $B0/${V0}0/source_deletions_me/file
+TEST ! stat $B0/${V0}1/source_deletions_me/file
+TEST ! stat source_deletions_me/file
+TEST ! stat $B0/${V0}0/source_deletions_me/file
+TEST ! stat $B0/${V0}1/source_deletions_me/file
+TEST ! stat source_deletions_me/dir1/dir2
+TEST ! stat $B0/${V0}0/source_deletions_me/dir1/dir2
+TEST ! stat $B0/${V0}1/source_deletions_me/dir1/dir2
+TEST ! stat source_deletions_me/dir1
+TEST ! stat $B0/${V0}0/source_deletions_me/dir1
+TEST ! stat $B0/${V0}1/source_deletions_me/dir1
+
+#Test if the files created as part of access are healed correctly
+r=$(get_file_type source_creations_me/fifo)
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/fifo
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/fifo
+TEST [ -p source_creations_me/fifo ]
+
+r=$(get_file_type source_creations_me/block)
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/block
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/block
+EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}1/source_creations_me/block
+EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}0/source_creations_me/block
+TEST [ -b source_creations_me/block ]
+
+r=$(get_file_type source_creations_me/char)
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/char
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/char
+EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}1/source_creations_me/char
+EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}0/source_creations_me/char
+TEST [ -c source_creations_me/char ]
+
+r=$(get_file_type source_creations_me/file)
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/file
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/file
+TEST [ -f source_creations_me/file ]
+
+r=$(get_file_type source_creations_me/slink)
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/slink
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/slink
+TEST [ -h source_creations_me/slink ]
+
+r=$(get_file_type source_creations_me/dir1/dir2)
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/dir1/dir2
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/dir1/dir2
+TEST [ -d source_creations_me/dir1/dir2 ]
+
+r=$(get_file_type source_creations_me/dir1)
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/dir1
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/dir1
+TEST [ -d source_creations_me/dir1 ]
+
+#Trigger heal and check _heal dirs are healed properly
+#Trigger change in event generation number. That way inodes would get refreshed during lookup
+TEST kill_brick $V0 $H0 $B0/${V0}1
+$CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+TEST stat spb_heal
+TEST stat spb_me_heal
+TEST stat fool_heal
+TEST stat fool_me
+TEST stat v1_fool_heal
+TEST stat v1_fool_me
+TEST stat source_deletions_heal
+TEST stat source_deletions_me
+TEST stat source_self_accusing
+TEST stat source_creations_heal
+TEST stat source_creations_me
+TEST stat v1_dirty_heal
+TEST stat v1_dirty_me
+TEST $CLI volume stop $V0
+TEST rm -rf $B0/${V0}{0,1}/.glusterfs/indices/xattrop/*
+
+$CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+#Create base entry in indices/xattrop
+echo "Data" > $M0/FILE
+rm -f $M0/FILE
+EXPECT "1" count_index_entries $B0/${V0}0
+EXPECT "1" count_index_entries $B0/${V0}1
+
+TEST $CLI volume stop $V0;
+
+#Create entries for fool_heal and fool_me to ensure they are fully healed and dirty xattrs erased, before triggering index heal
+create_brick_xattrop_entry $B0/${V0}0 fool_heal fool_me source_creations_heal/dir1
+
+$CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+$CLI volume set $V0 self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+
+TEST $CLI volume heal $V0;
+EXPECT_WITHIN $HEAL_TIMEOUT "~" print_pending_heals spb_heal spb_me_heal fool_heal fool_me v1_fool_heal v1_fool_me source_deletions_heal source_deletions_me source_creations_heal source_creations_me v1_dirty_heal v1_dirty_me source_self_accusing
+
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 spb_heal
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 spb_me_heal
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 fool_heal
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 fool_me
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_fool_heal
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_fool_me
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_deletions_heal
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_deletions_me
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_self_accusing
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_creations_heal
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_creations_me
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_dirty_heal
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_dirty_me
+
+#Don't access the files/dirs from mount point as that may cause self-heals
+# Check if conservative merges happened correctly on heal dirs
+TEST stat $B0/${V0}0/spb_heal/1
+TEST stat $B0/${V0}1/spb_heal/1
+
+TEST stat $B0/${V0}0/spb_heal/0
+TEST stat $B0/${V0}1/spb_heal/0
+
+TEST stat $B0/${V0}0/fool_heal/1
+TEST stat $B0/${V0}1/fool_heal/1
+
+TEST stat $B0/${V0}0/fool_heal/0
+TEST stat $B0/${V0}1/fool_heal/0
+
+TEST stat $B0/${V0}0/v1_fool_heal/0
+TEST stat $B0/${V0}1/v1_fool_heal/0
+
+TEST stat $B0/${V0}0/v1_fool_heal/1
+TEST stat $B0/${V0}1/v1_fool_heal/1
+
+TEST stat $B0/${V0}0/v1_dirty_heal/0
+TEST stat $B0/${V0}1/v1_dirty_heal/0
+
+#Check if files that have gfid-mismatches in spb are giving EIO
+TEST ! stat spb/0
+
+#Check if stale files are deleted on access
+TEST ! stat $B0/${V0}0/source_deletions_heal/fifo
+TEST ! stat $B0/${V0}1/source_deletions_heal/fifo
+TEST ! stat $B0/${V0}0/source_deletions_heal/block
+TEST ! stat $B0/${V0}1/source_deletions_heal/block
+TEST ! stat $B0/${V0}0/source_deletions_heal/char
+TEST ! stat $B0/${V0}1/source_deletions_heal/char
+TEST ! stat $B0/${V0}0/source_deletions_heal/file
+TEST ! stat $B0/${V0}1/source_deletions_heal/file
+TEST ! stat $B0/${V0}0/source_deletions_heal/file
+TEST ! stat $B0/${V0}1/source_deletions_heal/file
+TEST ! stat $B0/${V0}0/source_deletions_heal/dir1/dir2
+TEST ! stat $B0/${V0}1/source_deletions_heal/dir1/dir2
+TEST ! stat $B0/${V0}0/source_deletions_heal/dir1
+TEST ! stat $B0/${V0}1/source_deletions_heal/dir1
+
+#Check if stale files are deleted on access
+TEST ! stat $B0/${V0}0/source_self_accusing/fifo
+TEST ! stat $B0/${V0}1/source_self_accusing/fifo
+TEST ! stat $B0/${V0}0/source_self_accusing/block
+TEST ! stat $B0/${V0}1/source_self_accusing/block
+TEST ! stat $B0/${V0}0/source_self_accusing/char
+TEST ! stat $B0/${V0}1/source_self_accusing/char
+TEST ! stat $B0/${V0}0/source_self_accusing/file
+TEST ! stat $B0/${V0}1/source_self_accusing/file
+TEST ! stat $B0/${V0}0/source_self_accusing/file
+TEST ! stat $B0/${V0}1/source_self_accusing/file
+TEST ! stat $B0/${V0}0/source_self_accusing/dir1/dir2
+TEST ! stat $B0/${V0}1/source_self_accusing/dir1/dir2
+TEST ! stat $B0/${V0}0/source_self_accusing/dir1
+TEST ! stat $B0/${V0}1/source_self_accusing/dir1
+
+#Test if the files created as part of full self-heal correctly
+r=$(get_file_type $B0/${V0}0/source_creations_heal/fifo)
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/fifo
+TEST [ -p $B0/${V0}0/source_creations_heal/fifo ]
+EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}1/source_creations_heal/block
+EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}0/source_creations_heal/block
+
+r=$(get_file_type $B0/${V0}0/source_creations_heal/block)
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/block
+
+r=$(get_file_type $B0/${V0}0/source_creations_heal/char)
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/char
+EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}1/source_creations_heal/char
+EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}0/source_creations_heal/char
+
+r=$(get_file_type $B0/${V0}0/source_creations_heal/file)
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/file
+TEST [ -f $B0/${V0}0/source_creations_heal/file ]
+
+r=$(get_file_type source_creations_heal/file $B0/${V0}0/slink)
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/file slink
+TEST [ -h $B0/${V0}0/source_creations_heal/slink ]
+
+r=$(get_file_type $B0/${V0}0/source_creations_heal/dir1/dir2)
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/dir1/dir2
+TEST [ -d $B0/${V0}0/source_creations_heal/dir1/dir2 ]
+
+r=$(get_file_type $B0/${V0}0/source_creations_heal/dir1)
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/dir1
+TEST [ -d $B0/${V0}0/source_creations_heal/dir1 ]
+
+cd -
+
+cleanup
diff --git a/tests/basic/afr/gfid-heal.t b/tests/basic/afr/gfid-heal.t
new file mode 100644
index 00000000000..5e26e3307eb
--- /dev/null
+++ b/tests/basic/afr/gfid-heal.t
@@ -0,0 +1,33 @@
+#!/bin/bash
+#gfid self-heal test on distributed replica. Make sure all the gfids are same
+#and the gfid exists on all the bricks
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+function get_gfid_count {
+ getfattr -d -m. -e hex $B0/brick{0,1,2,3,4,5}/$1 2>&1 | grep trusted.gfid | grep -v gfid2path | wc -l
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1,2,3,4,5}
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+TEST mkdir $B0/brick{0,1,2,3}/{0..9}
+sleep 2 #to prevent is_fresh_file code path
+TEST stat $M0/{0..9}
+EXPECT 6 get_gfid_count 0
+EXPECT 6 get_gfid_count 1
+EXPECT 6 get_gfid_count 2
+EXPECT 6 get_gfid_count 3
+EXPECT 6 get_gfid_count 4
+EXPECT 6 get_gfid_count 5
+EXPECT 6 get_gfid_count 6
+EXPECT 6 get_gfid_count 7
+EXPECT 6 get_gfid_count 8
+EXPECT 6 get_gfid_count 9
+cleanup;
diff --git a/tests/basic/afr/gfid-mismatch-resolution-with-cli.t b/tests/basic/afr/gfid-mismatch-resolution-with-cli.t
new file mode 100644
index 00000000000..b739ddc49cc
--- /dev/null
+++ b/tests/basic/afr/gfid-mismatch-resolution-with-cli.t
@@ -0,0 +1,168 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.data-self-heal off
+cd $M0
+
+##### Healing from latest mtime ######
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+echo "Sink based on mtime" > f1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+echo "Source based on mtime" > f1
+
+gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/f1)
+gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/f1)
+TEST [ "$gfid_0" != "$gfid_1" ]
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+#We know that first brick has the latest mtime
+LATEST_MTIME_MD5=$(md5sum $B0/${V0}0/f1 | awk '{print $1}')
+
+TEST $CLI volume heal $V0 split-brain latest-mtime /f1
+
+#gfid split-brain should be resolved
+gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/f1)
+TEST [ "$gfid_0" == "$gfid_1" ]
+
+#Heal the data and check the md5sum
+TEST $CLI volume set $V0 self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+HEALED_MD5=$(md5sum $B0/${V0}1/f1 | awk '{print $1}')
+TEST [ "$LATEST_MTIME_MD5" == "$HEALED_MD5" ]
+
+
+##### Healing from bigger file ######
+
+TEST mkdir test
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST kill_brick $V0 $H0 $B0/${V0}0
+echo "Bigger file" > test/f2
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+echo "Small file" > test/f2
+
+gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/test/f2)
+gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/test/f2)
+TEST [ "$gfid_0" != "$gfid_1" ]
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+#We know that second brick has the bigger file
+BIGGER_FILE_MD5=$(md5sum $B0/${V0}1/test/f2 | awk '{print $1}')
+
+TEST $CLI volume heal $V0 split-brain bigger-file /test/f2
+
+#gfid split-brain should be resolved
+gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/test/f2)
+TEST [ "$gfid_0" == "$gfid_1" ]
+
+#Heal the data and check the md5sum
+TEST $CLI volume set $V0 self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+HEALED_MD5=$(md5sum $B0/${V0}0/test/f2 | awk '{print $1}')
+TEST [ "$BIGGER_FILE_MD5" == "$HEALED_MD5" ]
+
+
+#Add one more brick, and heal.
+TEST $CLI volume add-brick $V0 replica 3 $H0:$B0/${V0}2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+
+##### Healing from source brick ######
+
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 cluster.quorum-type none
+TEST kill_brick $V0 $H0 $B0/${V0}0
+echo "We will consider these as sinks" > test/f3
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST kill_brick $V0 $H0 $B0/${V0}2
+echo "We will take this as source" > test/f3
+
+gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/test/f3)
+gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/test/f3)
+gfid_2=$(gf_get_gfid_xattr $B0/${V0}2/test/f3)
+TEST [ "$gfid_0" != "$gfid_1" ]
+TEST [ "$gfid_1" == "$gfid_2" ]
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+#We will try to heal the split-brain with bigger file option.
+#It should fail, since we have same file size in bricks 1 & 2.
+EXPECT "No bigger file for file /test/f3" $CLI volume heal $V0 split-brain bigger-file /test/f3
+
+#Now heal from taking the brick 0 as the source
+SOURCE_MD5=$(md5sum $B0/${V0}0/test/f3 | awk '{print $1}')
+
+TEST $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}0 /test/f3
+
+#gfid split-brain should be resolved
+gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/test/f3)
+gfid_2=$(gf_get_gfid_xattr $B0/${V0}2/test/f3)
+TEST [ "$gfid_0" == "$gfid_1" ]
+TEST [ "$gfid_0" == "$gfid_2" ]
+
+#Heal the data and check the md5sum
+TEST $CLI volume set $V0 self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+HEALED_MD5_1=$(md5sum $B0/${V0}1/test/f3 | awk '{print $1}')
+HEALED_MD5_2=$(md5sum $B0/${V0}2/test/f3 | awk '{print $1}')
+TEST [ "$SOURCE_MD5" == "$HEALED_MD5_1" ]
+TEST [ "$SOURCE_MD5" == "$HEALED_MD5_2" ]
+
+cd -
+cleanup;
diff --git a/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t b/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t
new file mode 100644
index 00000000000..35e295dc170
--- /dev/null
+++ b/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t
@@ -0,0 +1,229 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST $CLI volume set $V0 cluster.heal-timeout 5
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+
+##### Healing with favorite-child-policy = mtime ######
+##### and self-heal-daemon ######
+
+TEST $CLI volume set $V0 favorite-child-policy mtime
+TEST kill_brick $V0 $H0 $B0/${V0}0
+echo "Sink based on mtime" > $M0/f1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+echo "Source based on mtime" > $M0/f1
+
+#Gfids of file f1 on bricks 0 & 1 should differ
+gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/f1)
+gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/f1)
+TEST [ "$gfid_0" != "$gfid_1" ]
+
+TEST $CLI volume set $V0 self-heal-daemon on
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+#We know that first brick has the latest mtime
+LATEST_MTIME_MD5=$(md5sum $B0/${V0}0/f1 | cut -d\ -f1)
+
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+#gfid split-brain should be resolved
+gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/f1)
+TEST [ "$gfid_0" == "$gfid_1" ]
+
+HEALED_MD5=$(md5sum $B0/${V0}1/f1 | cut -d\ -f1)
+TEST [ "$LATEST_MTIME_MD5" == "$HEALED_MD5" ]
+
+TEST $CLI volume set $V0 self-heal-daemon off
+
+
+##### Healing with favorite-child-policy = ctime ######
+##### and self-heal-daemon ######
+
+#gfid split-brain resolution should work even when the granular-enrty-heal is
+#enabled
+TEST $CLI volume heal $V0 granular-entry-heal enable
+
+TEST $CLI volume set $V0 favorite-child-policy ctime
+TEST kill_brick $V0 $H0 $B0/${V0}1
+echo "Sink based on ctime" > $M0/f2
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST kill_brick $V0 $H0 $B0/${V0}0
+echo "Source based on ctime" > $M0/f2
+
+#Gfids of file f2 on bricks 0 & 1 should differ
+gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/f2)
+gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/f2)
+TEST [ "$gfid_0" != "$gfid_1" ]
+
+TEST $CLI volume set $V0 self-heal-daemon on
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+#We know that second brick has the latest ctime
+LATEST_CTIME_MD5=$(md5sum $B0/${V0}1/f2 | cut -d\ -f1)
+
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+#gfid split-brain should be resolved
+gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/f2)
+TEST [ "$gfid_0" == "$gfid_1" ]
+
+HEALED_MD5=$(md5sum $B0/${V0}0/f2 | cut -d\ -f1)
+TEST [ "$LATEST_CTIME_MD5" == "$HEALED_MD5" ]
+
+
+#Add one more brick, and heal.
+TEST $CLI volume add-brick $V0 replica 3 $H0:$B0/${V0}2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+TEST $CLI volume set $V0 self-heal-daemon off
+
+
+##### Healing using favorite-child-policy = size #####
+##### and client side heal #####
+
+TEST $CLI volume set $V0 cluster.data-self-heal on
+TEST $CLI volume set $V0 cluster.metadata-self-heal on
+TEST $CLI volume set $V0 cluster.entry-self-heal on
+
+#Set the quorum-type to none, and create a gfid split brain
+TEST $CLI volume set $V0 cluster.quorum-type none
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+echo "Smallest file" > $M0/f3
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST kill_brick $V0 $H0 $B0/${V0}2
+echo "Second smallest file" > $M0/f3
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}2
+echo "Biggest among the three files" > $M0/f3
+
+#Bring back the down bricks.
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+#Gfids of file f3 on all the bricks should differ
+gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/f3)
+gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/f3)
+gfid_2=$(gf_get_gfid_xattr $B0/${V0}2/f3)
+TEST [ "$gfid_0" != "$gfid_1" ]
+TEST [ "$gfid_0" != "$gfid_2" ]
+TEST [ "$gfid_1" != "$gfid_2" ]
+
+#We know that second brick has the bigger size file
+BIGGER_FILE_MD5=$(md5sum $B0/${V0}1/f3 | cut -d\ -f1)
+
+TEST ls $M0 #Trigger entry heal via readdir inode refresh
+TEST cat $M0/f3 #Trigger data heal via readv inode refresh
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+#gfid split-brain should be resolved
+gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/f3)
+gfid_2=$(gf_get_gfid_xattr $B0/${V0}2/f3)
+TEST [ "$gfid_0" == "$gfid_1" ]
+TEST [ "$gfid_2" == "$gfid_1" ]
+
+HEALED_MD5_1=$(md5sum $B0/${V0}0/f3 | cut -d\ -f1)
+HEALED_MD5_2=$(md5sum $B0/${V0}2/f3 | cut -d\ -f1)
+TEST [ "$BIGGER_FILE_MD5" == "$HEALED_MD5_1" ]
+TEST [ "$BIGGER_FILE_MD5" == "$HEALED_MD5_2" ]
+
+
+##### Healing using favorite-child-policy = majority #####
+##### and client side heal #####
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+echo "Does not agree with bricks 0 & 1" > $M0/f4
+
+TEST $CLI v start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST kill_brick $V0 $H0 $B0/${V0}2
+echo "Agree on bricks 0 & 1" > $M0/f4
+
+#Gfids of file f4 on bricks 0 & 1 should be same and bricks 0 & 2 should differ
+gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/f4)
+gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/f4)
+gfid_2=$(gf_get_gfid_xattr $B0/${V0}2/f4)
+TEST [ "$gfid_0" == "$gfid_1" ]
+TEST [ "$gfid_0" != "$gfid_2" ]
+
+#We know that first and second bricks agree with each other. Pick any one of
+#them as source
+MAJORITY_MD5=$(md5sum $B0/${V0}0/f4 | cut -d\ -f1)
+
+#Bring back the down brick and heal.
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+TEST ls $M0 #Trigger entry heal via readdir inode refresh
+TEST cat $M0/f4 #Trigger data heal via readv inode refresh
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+#gfid split-brain should be resolved
+gfid_2=$(gf_get_gfid_xattr $B0/${V0}2/f4)
+TEST [ "$gfid_0" == "$gfid_2" ]
+
+HEALED_MD5=$(md5sum $B0/${V0}2/f4 | cut -d\ -f1)
+TEST [ "$MAJORITY_MD5" == "$HEALED_MD5" ]
+
+cleanup;
diff --git a/tests/basic/afr/gfid-mismatch.t b/tests/basic/afr/gfid-mismatch.t
new file mode 100644
index 00000000000..fc15793cf5a
--- /dev/null
+++ b/tests/basic/afr/gfid-mismatch.t
@@ -0,0 +1,32 @@
+#!/bin/bash
+#Test that GFID mismatches result in EIO
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+#Init
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1}
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 stat-prefetch off
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+# We can't count on brick0 getting a copy of the file immediately without this,
+# because (especially with multiplexing) it might not have *come up*
+# immediately.
+TEST $CLI volume set $V0 cluster.quorum-type auto
+TEST $GFS --volfile-id=$V0 -s $H0 $M0;
+
+#Test
+TEST touch $M0/file
+TEST setfattr -n trusted.gfid -v 0sBfz5vAdHTEK1GZ99qjqTIg== $B0/brick0/file
+TEST ! "find $M0/file"
+TEST ! "stat $M0/file"
+
+#Cleanup
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+TEST rm -rf $B0/*
diff --git a/tests/basic/afr/gfid-self-heal.t b/tests/basic/afr/gfid-self-heal.t
new file mode 100644
index 00000000000..5a530681186
--- /dev/null
+++ b/tests/basic/afr/gfid-self-heal.t
@@ -0,0 +1,145 @@
+#!/bin/bash
+
+#Tests for files without gfids
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 nfs.disable on
+TEST touch $B0/${V0}{0,1}/{1,2,3,4}
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+#Test that readdir returns entries even when no gfids are present
+EXPECT 4 echo $(ls $M0 | grep -v '^\.' | wc -l)
+sleep 2;
+#stat the files and check that the files have same gfids on the bricks now
+TEST stat $M0/1
+gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/1)
+gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/1)
+TEST "[[ ! -z $gfid_0 ]]"
+EXPECT $gfid_0 echo $gfid_1
+
+TEST stat $M0/2
+gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/2)
+gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/2)
+TEST "[[ ! -z $gfid_0 ]]"
+EXPECT $gfid_0 echo $gfid_1
+
+TEST stat $M0/3
+gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/3)
+gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/3)
+TEST "[[ ! -z $gfid_0 ]]"
+EXPECT $gfid_0 echo $gfid_1
+
+TEST stat $M0/4
+gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/4)
+gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/4)
+TEST "[[ ! -z $gfid_0 ]]"
+EXPECT $gfid_0 echo $gfid_1
+
+#Check gfid self-heal happens from one brick to other when a file has missing
+#gfid
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST touch $M0/a
+gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/a)
+TEST touch $B0/${V0}0/a
+# storage/posix considers that a file without gfid changed less than a second
+# before doesn't exist, so we need to wait for a second to force posix to
+# consider that this is a valid file but without gfid.
+sleep 1
+$CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST stat $M0/a
+gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/a)
+EXPECT $gfid_1 echo $gfid_0
+
+#Check gfid self-heal doesn't happen from one brick to other when type mismatch
+#is present for a name, without any xattrs
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST touch $M0/b
+TEST mkdir $B0/${V0}0/b
+TEST setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1
+# storage/posix considers that a file without gfid changed less than a second
+# before doesn't exist, so we need to wait for a second to force posix to
+# consider that this is a valid file but without gfid.
+sleep 1
+$CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST ! stat $M0/b
+gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/b)
+TEST "[[ -z \"$gfid_0\" ]]"
+
+#Check gfid assigning doesn't happen when there is type mismatch
+TEST touch $B0/${V0}1/c
+TEST mkdir $B0/${V0}0/c
+# storage/posix considers that a file without gfid changed less than a second
+# before doesn't exist, so we need to wait for a second to force posix to
+# consider that this is a valid file but without gfid.
+sleep 1
+TEST ! stat $M0/c
+gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/c)
+gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/c)
+TEST "[[ -z \"$gfid_1\" ]]"
+TEST "[[ -z \"$gfid_0\" ]]"
+
+#Check gfid assigning doesn't happen only when even one brick is down to prevent
+# gfid split-brain
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST touch $B0/${V0}1/d
+# storage/posix considers that a file without gfid changed less than a second
+# before doesn't exist, so we need to wait for a second to force posix to
+# consider that this is a valid file but without gfid.
+sleep 1
+TEST ! stat $M0/d
+gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/d)
+TEST "[[ -z \"$gfid_1\" ]]"
+TEST $CLI volume start $V0 force;
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+#Check gfid self-heal doesn't happen from one brick to other when type mismatch
+#is present for a name without any pending xattrs
+#TEST kill_brick $V0 $H0 $B0/${V0}0
+#TEST touch $M0/e
+#TEST $CLI volume start $V0 force;
+#EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+#TEST kill_brick $V0 $H0 $B0/${V0}1
+#TEST mkdir $M0/e
+#TEST $CLI volume stop $V0 force;
+#TEST setfattr -x trusted.gfid $B0/${V0}1/e
+#TEST setfattr -x trusted.gfid $B0/${V0}0/e
+#TEST $CLI volume set $V0 cluster.entry-self-heal off
+#$CLI volume start $V0 force
+#EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+#EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+#TEST ! stat $M0/e
+#gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/e)
+#gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/e)
+#TEST "[[ -z \"$gfid_1\" ]]"
+#TEST "[[ -z \"$gfid_0\" ]]"
+
+#Check if lookup fails with gfid-mismatch of a file
+#is present for a name without any pending xattrs
+#TEST kill_brick $V0 $H0 $B0/${V0}0
+#TEST touch $M0/f
+#$CLI volume start $V0 force
+#EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+#TEST kill_brick $V0 $H0 $B0/${V0}1
+#TEST touch $M0/f
+#simulate no pending changelog
+#$CLI volume stop $V0 force
+#TEST setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1
+#TEST setfattr -x trusted.afr.$V0-client-1 $B0/${V0}0
+#$CLI volume start $V0 force
+#EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+#EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+#TEST ! stat $M0/f
+
+cleanup
diff --git a/tests/basic/afr/granular-esh/add-brick.t b/tests/basic/afr/granular-esh/add-brick.t
new file mode 100644
index 00000000000..270cf1d32a6
--- /dev/null
+++ b/tests/basic/afr/granular-esh/add-brick.t
@@ -0,0 +1,80 @@
+#!/bin/bash
+
+. $(dirname $0)/../../../include.rc
+. $(dirname $0)/../../../volume.rc
+. $(dirname $0)/../../../afr.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume heal $V0 granular-entry-heal enable
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0
+
+# Create files
+for i in {1..5}
+do
+ echo $i > $M0/file$i.txt
+done
+
+# Metadata changes
+TEST setfattr -n user.test -v qwerty $M0/file5.txt
+
+# Add brick1
+TEST $CLI volume add-brick $V0 replica 3 $H0:$B0/${V0}2
+
+# New-brick should accuse the old-bricks (Simulating case for data-loss)
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}2/
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}2/
+
+# Check if pending data, metadata and entry xattrs are set for newly-added-brick
+EXPECT "000000010000000100000001" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0
+EXPECT "000000010000000100000001" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1
+
+# Also ensure we are not mistakenly tampering with the new-brick's changelog xattrs
+EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}2
+EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}2
+
+# Check if dirty xattr is set for newly-added-brick
+EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.dirty $B0/${V0}2
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+TEST $CLI volume set $V0 self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+TEST $CLI volume heal $V0
+
+# Wait for heal to complete
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+# Check if entry-heal has happened
+TEST diff <(ls $B0/${V0}0 | sort) <(ls $B0/${V0}2 | sort)
+TEST diff <(ls $B0/${V0}1 | sort) <(ls $B0/${V0}2 | sort)
+
+# Test if data was healed
+TEST diff $B0/${V0}0/file1.txt $B0/${V0}2/file1.txt
+
+# Test if metadata was healed and exists on both the bricks
+EXPECT "qwerty" get_text_xattr user.test $B0/${V0}2/file5.txt
+EXPECT "qwerty" get_text_xattr user.test $B0/${V0}0/file5.txt
+
+# Ensure all changelog xattrs are now back to zero.
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.dirty $B0/${V0}2
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}2
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}2
+
+cleanup
diff --git a/tests/basic/afr/granular-esh/cli.t b/tests/basic/afr/granular-esh/cli.t
new file mode 100644
index 00000000000..10b6c6398da
--- /dev/null
+++ b/tests/basic/afr/granular-esh/cli.t
@@ -0,0 +1,114 @@
+#!/bin/bash
+
+. $(dirname $0)/../../../include.rc
+. $(dirname $0)/../../../volume.rc
+. $(dirname $0)/../../../afr.rc
+
+cleanup
+
+TESTS_EXPECTED_IN_LOOP=4
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+# Test that enabling the option should work on a newly created volume
+TEST $CLI volume set $V0 cluster.granular-entry-heal on
+TEST $CLI volume set $V0 cluster.granular-entry-heal off
+
+#########################
+##### DISPERSE TEST #####
+#########################
+# Execute the same command on a disperse volume and make sure it fails.
+TEST $CLI volume create $V1 disperse 3 redundancy 1 $H0:$B0/${V1}{0,1,2}
+TEST $CLI volume start $V1
+TEST ! $CLI volume heal $V1 granular-entry-heal enable
+TEST ! $CLI volume heal $V1 granular-entry-heal disable
+
+######################
+### REPLICATE TEST ###
+######################
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume set $V0 self-heal-daemon off
+# Test that the volume-set way of enabling the option is disallowed
+TEST ! $CLI volume set $V0 granular-entry-heal on
+# Test that the volume-heal way of enabling the option is allowed
+TEST $CLI volume heal $V0 granular-entry-heal enable
+# Volume-reset of the option should be allowed
+TEST $CLI volume reset $V0 granular-entry-heal
+TEST $CLI volume heal $V0 granular-entry-heal enable
+
+EXPECT "enable" volume_option $V0 cluster.granular-entry-heal
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0
+
+# Kill brick-0.
+TEST kill_brick $V0 $H0 $B0/${V0}0
+
+# Disabling the option should work even when one or more bricks are down
+TEST $CLI volume heal $V0 granular-entry-heal disable
+# When a brick is down, 'enable' attempt should be failed
+TEST ! $CLI volume heal $V0 granular-entry-heal enable
+
+# Restart the killed brick
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+# When all bricks are up, it should be possible to enable the option
+TEST $CLI volume heal $V0 granular-entry-heal enable
+
+# Kill brick-0 again
+TEST kill_brick $V0 $H0 $B0/${V0}0
+
+# Create files under root
+for i in {1..2}
+do
+ echo $i > $M0/f$i
+done
+
+# Test that the index associated with '/' is created on B1.
+TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID
+
+# Check for successful creation of granular entry indices
+for i in {1..2}
+do
+ TEST_IN_LOOP stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/f$i
+done
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+TEST gluster volume set $V0 cluster.self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+
+TEST $CLI volume heal $V0
+
+# Wait for heal to complete
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+# Test if data was healed
+for i in {1..2}
+do
+ TEST_IN_LOOP diff $B0/${V0}0/f$i $B0/${V0}1/f$i
+done
+
+# Now verify that there are no name indices left after self-heal
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/f1
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/f2
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID
+
+# Perform a volume-reset-all-options operation
+TEST $CLI volume reset $V0
+# Ensure that granular entry heal is also disabled
+EXPECT "no" volume_get_field $V0 cluster.granular-entry-heal
+EXPECT "off" volume_get_field $V0 cluster.entry-self-heal
+
+cleanup
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=1399038
diff --git a/tests/basic/afr/granular-esh/conservative-merge.t b/tests/basic/afr/granular-esh/conservative-merge.t
new file mode 100644
index 00000000000..b170e47e0cb
--- /dev/null
+++ b/tests/basic/afr/granular-esh/conservative-merge.t
@@ -0,0 +1,138 @@
+#!/bin/bash
+
+. $(dirname $0)/../../../include.rc
+. $(dirname $0)/../../../volume.rc
+. $(dirname $0)/../../../afr.rc
+
+cleanup
+
+TESTS_EXPECTED_IN_LOOP=4
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 data-self-heal off
+TEST $CLI volume set $V0 metadata-self-heal off
+TEST $CLI volume set $V0 entry-self-heal off
+TEST $CLI volume heal $V0 granular-entry-heal enable
+
+TEST $GFS --volfile-id=$V0 -s $H0 $M0
+
+TEST mkdir $M0/dir
+gfid_dir=$(get_gfid_string $M0/dir)
+
+echo "1" > $M0/f1
+echo "2" > $M0/f2
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+
+TEST mkdir $M0/dir2
+gfid_dir2=$(get_gfid_string $M0/dir2)
+TEST unlink $M0/f1
+echo "3" > $M0/f3
+TEST mkdir $M0/dir/subdir
+gfid_subdir=$(get_gfid_string $M0/dir/subdir)
+echo "dir2-1" > $M0/dir2/f1
+echo "subdir-1" > $M0/dir/subdir/f1
+
+TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID
+TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/dir2
+TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/f1
+TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/f3
+TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_dir2
+TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_dir2/f1
+TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_dir
+TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_dir/subdir
+TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_subdir
+TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_subdir/f1
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST kill_brick $V0 $H0 $B0/${V0}1
+
+TEST mkdir $M0/dir3
+gfid_dir3=$(get_gfid_string $M0/dir3)
+# Root is now in split-brain.
+TEST mkdir $M0/dir/subdir2
+gfid_subdir2=$(get_gfid_string $M0/dir/subdir2)
+# /dir is now in split-brain.
+echo "4" > $M0/f4
+TEST unlink $M0/f2
+echo "dir3-1" > $M0/dir3/f1
+echo "subdir2-1" > $M0/dir/subdir2/f1
+
+TEST stat $B0/${V0}0/.glusterfs/indices/entry-changes/$ROOT_GFID
+TEST stat $B0/${V0}0/.glusterfs/indices/entry-changes/$ROOT_GFID/dir3
+TEST stat $B0/${V0}0/.glusterfs/indices/entry-changes/$ROOT_GFID/f2
+TEST stat $B0/${V0}0/.glusterfs/indices/entry-changes/$ROOT_GFID/f4
+TEST stat $B0/${V0}0/.glusterfs/indices/entry-changes/$gfid_dir
+TEST stat $B0/${V0}0/.glusterfs/indices/entry-changes/$gfid_dir/subdir2
+TEST stat $B0/${V0}0/.glusterfs/indices/entry-changes/$gfid_dir3
+TEST stat $B0/${V0}0/.glusterfs/indices/entry-changes/$gfid_dir3/f1
+TEST stat $B0/${V0}0/.glusterfs/indices/entry-changes/$gfid_subdir2
+TEST stat $B0/${V0}0/.glusterfs/indices/entry-changes/$gfid_subdir2/f1
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+TEST $CLI volume set $V0 self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+
+TEST $CLI volume heal $V0
+
+# Wait for heal to complete
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+# Make sure entry self-heal did the right thing in terms of impunging deleted
+# files in the event of a split-brain.
+TEST stat $M0/f1
+TEST stat $M0/f2
+
+# Test if data was healed
+for i in {1..4}
+do
+ TEST_IN_LOOP diff $B0/${V0}0/f$i $B0/${V0}1/f$i
+done
+
+TEST diff $B0/${V0}0/dir2/f1 $B0/${V0}1/dir2/f1
+EXPECT "dir2-1" cat $M0/dir2/f1
+
+TEST diff $B0/${V0}0/dir/subdir/f1 $B0/${V0}1/dir/subdir/f1
+EXPECT "subdir-1" cat $M0/dir/subdir/f1
+
+TEST diff $B0/${V0}0/dir3/f1 $B0/${V0}1/dir3/f1
+EXPECT "dir3-1" cat $M0/dir3/f1
+
+TEST diff $B0/${V0}0/dir/subdir2/f1 $B0/${V0}1/dir/subdir2/f1
+EXPECT "subdir2-1" cat $M0/dir/subdir2/f1
+
+# Verify that all name indices have been removed after a successful heal.
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/dir2
+TEST ! stat $B0/${V0}0/.glusterfs/indices/entry-changes/$ROOT_GFID/dir3
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/f1
+TEST ! stat $B0/${V0}0/.glusterfs/indices/entry-changes/$ROOT_GFID/f2
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/f3
+TEST ! stat $B0/${V0}0/.glusterfs/indices/entry-changes/$ROOT_GFID/f4
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_dir2/f1
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_dir/subdir
+TEST ! stat $B0/${V0}0/.glusterfs/indices/entry-changes/$gfid_dir/subdir2
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_subdir/f1
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_dir3/f1
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_subdir2/f1
+
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID
+TEST ! stat $B0/${V0}0/.glusterfs/indices/entry-changes/$ROOT_GFID
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_dir
+TEST ! stat $B0/${V0}0/.glusterfs/indices/entry-changes/$gfid_dir
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_dir2
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_subdir
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_dir3
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_subdir2
+
+cleanup
diff --git a/tests/basic/afr/granular-esh/granular-esh.t b/tests/basic/afr/granular-esh/granular-esh.t
new file mode 100644
index 00000000000..de0e8f4290b
--- /dev/null
+++ b/tests/basic/afr/granular-esh/granular-esh.t
@@ -0,0 +1,168 @@
+#!/bin/bash
+
+. $(dirname $0)/../../../include.rc
+. $(dirname $0)/../../../volume.rc
+. $(dirname $0)/../../../afr.rc
+
+cleanup
+
+TESTS_EXPECTED_IN_LOOP=12
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume heal $V0 granular-entry-heal enable
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0
+
+# Create files under root
+for i in {1..4}
+do
+ echo $i > $M0/f$i
+done
+
+# Create a directory and few files under it
+TEST mkdir $M0/dir
+gfid_dir=$(get_gfid_string $M0/dir)
+
+for i in {1..3}
+do
+ echo $i > $M0/dir/f$i
+done
+
+# Kill brick-0.
+TEST kill_brick $V0 $H0 $B0/${V0}0
+
+# Create more files
+for i in {5..6}
+do
+ echo $i > $M0/f$i
+done
+
+# Test that the index associated with '/' is created on B1.
+TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID
+
+# Check for successful creation of granular entry indices
+for i in {5..6}
+do
+ TEST_IN_LOOP stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/f$i
+done
+
+# Delete an existing file
+TEST unlink $M0/f1
+
+# Rename an existing file
+TEST mv $M0/f2 $M0/f2_renamed
+
+# Create a hard link on f3
+TEST ln $M0/f3 $M0/link
+
+# Create a symlink on f4
+TEST ln -s $M0/f4 $M0/symlink
+
+# Check for successful creation of granular entry indices
+for i in {1..2}
+do
+ TEST_IN_LOOP stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/f$i
+done
+
+TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/f2_renamed
+TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/link
+TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/symlink
+
+# Create a file and also delete it. This is to test deletion of stale indices during heal.
+TEST touch $M0/file_stale
+TEST unlink $M0/file_stale
+TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/file_stale
+
+# Create a directory and create its subdirs and files while a brick is down
+TEST mkdir -p $M0/newdir/newsubdir
+
+for i in {1..3}
+do
+ echo $i > $M0/newdir/f$i
+ echo $i > $M0/newdir/newsubdir/f$i
+done
+
+TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/newdir
+gfid_newdir=$(get_gfid_string $M0/newdir)
+gfid_newsubdir=$(get_gfid_string $M0/newdir/newsubdir)
+TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_newdir/newsubdir
+# Check if 'data' segment of the changelog is set for the newly created directories 'newdir' and 'newsubdir'
+EXPECT "00000001" afr_get_specific_changelog_xattr $B0/${V0}1/newdir trusted.afr.$V0-client-0 data
+EXPECT "00000001" afr_get_specific_changelog_xattr $B0/${V0}1/newdir/newsubdir trusted.afr.$V0-client-0 data
+
+# Test that removal of an entire sub-tree in the hierarchy works.
+TEST rm -rf $M0/dir
+
+TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/dir
+TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_dir/f1
+TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_dir/f2
+TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_dir/f3
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+TEST gluster volume set $V0 cluster.self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+
+# Wait for heal to complete
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+# Test if data was healed
+for i in {5..6}
+do
+ TEST_IN_LOOP diff $B0/${V0}0/f$i $B0/${V0}1/f$i
+done
+
+for i in {1..3}
+do
+ TEST_IN_LOOP diff $B0/${V0}0/newdir/f$i $B0/${V0}1/newdir/f$i
+ TEST_IN_LOOP diff $B0/${V0}0/newdir/newsubdir/f$i $B0/${V0}1/newdir/newsubdir/f$i
+done
+
+# Verify that all the deleted names have been removed on the sink brick too by self-heal.
+TEST ! stat $B0/${V0}0/f1
+TEST ! stat $B0/${V0}0/f2
+TEST stat $B0/${V0}0/f2_renamed
+TEST stat $B0/${V0}0/symlink
+EXPECT "3" get_hard_link_count $B0/${V0}0/f3
+EXPECT "f4" readlink $B0/${V0}0/symlink
+TEST ! stat $B0/${V0}0/dir
+
+# Now verify that there are no name indices left after self-heal
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/f1
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/f2
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/f2_renamed
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/link
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/symlink
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/dir
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/newdir
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/file_stale
+
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_dir/f1
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_dir/f2
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_dir/f3
+
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_newdir/f1
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_newdir/f2
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_newdir/f3
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_newsubdir/f1
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_newsubdir/f2
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_newsubdir/f3
+
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_newdir
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_newsubdir
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_dir
+
+cleanup
diff --git a/tests/basic/afr/granular-esh/granular-indices-but-non-granular-heal.t b/tests/basic/afr/granular-esh/granular-indices-but-non-granular-heal.t
new file mode 100644
index 00000000000..1b5421bf4b6
--- /dev/null
+++ b/tests/basic/afr/granular-esh/granular-indices-but-non-granular-heal.t
@@ -0,0 +1,76 @@
+#!/bin/bash
+
+. $(dirname $0)/../../../include.rc
+. $(dirname $0)/../../../volume.rc
+. $(dirname $0)/../../../afr.rc
+
+cleanup
+
+TESTS_EXPECTED_IN_LOOP=4
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume heal $V0 granular-entry-heal enable
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0
+
+# Kill brick-0.
+TEST kill_brick $V0 $H0 $B0/${V0}0
+
+# Create files under root
+for i in {1..2}
+do
+ echo $i > $M0/f$i
+done
+
+# Test that the index associated with '/' is created on B1.
+TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID
+
+# Check for successful creation of granular entry indices
+for i in {1..2}
+do
+ TEST_IN_LOOP stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/f$i
+done
+
+# Now disable granular-entry-heal
+TEST $CLI volume heal $V0 granular-entry-heal disable
+
+# Start the brick that was down
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+# Enable shd
+TEST gluster volume set $V0 cluster.self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+
+# Now the indices created are granular but the heal is going to be of the
+# normal kind. We test to make sure that heal still completes fine and that
+# the stale granular indices are going to be deleted
+
+TEST $CLI volume heal $V0
+
+# Wait for heal to complete
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+# Test if data was healed
+for i in {1..2}
+do
+ TEST_IN_LOOP diff $B0/${V0}0/f$i $B0/${V0}1/f$i
+done
+
+# Now verify that there are no name indices left after self-heal
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/f1
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/f2
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID
+
+cleanup
diff --git a/tests/basic/afr/granular-esh/replace-brick.t b/tests/basic/afr/granular-esh/replace-brick.t
new file mode 100644
index 00000000000..5fc7811a8d8
--- /dev/null
+++ b/tests/basic/afr/granular-esh/replace-brick.t
@@ -0,0 +1,76 @@
+#!/bin/bash
+. $(dirname $0)/../../../include.rc
+. $(dirname $0)/../../../volume.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 cluster.heal-timeout 5
+TEST $CLI volume heal $V0 granular-entry-heal enable
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+# Create files
+for i in {1..5}
+do
+ echo $i > $M0/file$i.txt
+done
+
+# Metadata changes
+TEST setfattr -n user.test -v qwerty $M0/file5.txt
+
+# Replace brick1
+TEST $CLI volume replace-brick $V0 $H0:$B0/${V0}1 $H0:$B0/${V0}1_new commit force
+
+# Replaced-brick should accuse the non-replaced-brick (Simulating case for data-loss)
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}1_new/
+
+# Check if data, metadata and entry segments of changelog are set for replaced-brick
+EXPECT "000000010000000100000001" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0
+
+# Also ensure we don't mistakenly tamper with the new brick's changelog xattrs
+EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1_new
+
+# Ensure the dirty xattr is set on the new brick.
+EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.dirty $B0/${V0}1_new
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+TEST $CLI volume set $V0 self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+
+# Wait for heal to complete
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+
+# Check if entry-heal has happened
+TEST diff <(ls $B0/${V0}0 | sort) <(ls $B0/${V0}1_new | sort)
+
+# To make sure that files were not lost from brick0
+TEST diff <(ls $B0/${V0}0 | sort) <(ls $B0/${V0}1 | sort)
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0
+
+# Test if data was healed
+TEST diff $B0/${V0}0/file1.txt $B0/${V0}1_new/file1.txt
+# To make sure that data was not lost from brick0
+TEST diff $B0/${V0}0/file1.txt $B0/${V0}1/file1.txt
+
+# Test if metadata was healed and exists on both the bricks
+EXPECT "qwerty" get_text_xattr user.test $B0/${V0}1_new/file5.txt
+EXPECT "qwerty" get_text_xattr user.test $B0/${V0}0/file5.txt
+
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1_new
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.dirty $B0/${V0}1_new
+
+cleanup
diff --git a/tests/basic/afr/halo.t b/tests/basic/afr/halo.t
new file mode 100644
index 00000000000..3f61f5a0402
--- /dev/null
+++ b/tests/basic/afr/halo.t
@@ -0,0 +1,61 @@
+#!/bin/bash
+#Tests that halo basic functionality works as expected
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+function get_up_child()
+{
+ if [ "1" == $(afr_private_key_value $V0 $M0 0 "child_up\[0\]") ];
+ then
+ echo 0
+ elif [ "1" == $(afr_private_key_value $V0 $M0 0 "child_up\[1\]") ]
+ then
+ echo 1
+ fi
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 cluster.halo-enabled yes
+TEST $CLI volume set $V0 cluster.halo-max-replicas 1
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+EXPECT "^1$" afr_private_key_value $V0 $M0 0 "halo_child_up\[0\]"
+EXPECT "^1$" afr_private_key_value $V0 $M0 0 "halo_child_up\[1\]"
+EXPECT_NOT "^-1$" afr_private_key_value $V0 $M0 0 "child_latency\[0\]"
+EXPECT_NOT "^-1$" afr_private_key_value $V0 $M0 0 "child_latency\[1\]"
+
+up_id=$(get_up_child)
+TEST [[ ! -z "$up_id" ]]
+
+down_id=$((1-up_id))
+
+TEST kill_brick $V0 $H0 $B0/${V0}${up_id}
+#As max-replicas is configured to be 1, down_child should be up now
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "halo_child_up\[${down_id}\]"
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "child_up\[${down_id}\]"
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_private_key_value $V0 $M0 0 "halo_child_up\[${up_id}\]"
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_private_key_value $V0 $M0 0 "child_up\[${up_id}\]"
+EXPECT "^-1$" afr_private_key_value $V0 $M0 0 "child_latency\[${up_id}\]"
+EXPECT_NOT "^-1$" afr_private_key_value $V0 $M0 0 "child_latency\[${down_id}\]"
+
+#Bring the brick back up and the state should be restored
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "halo_child_up\[${up_id}\]"
+
+up_id=$(get_up_child)
+TEST [[ ! -z "$up_id" ]]
+down_id=$((1-up_id))
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "halo_child_up\[${down_id}\]"
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_private_key_value $V0 $M0 0 "child_up\[${down_id}\]"
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "halo_child_up\[${up_id}\]"
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "child_up\[${up_id}\]"
+EXPECT_NOT "^-1$" afr_private_key_value $V0 $M0 0 "child_latency\[0\]"
+EXPECT_NOT "^-1$" afr_private_key_value $V0 $M0 0 "child_latency\[1\]"
+
+cleanup;
diff --git a/tests/basic/afr/heal-info.t b/tests/basic/afr/heal-info.t
new file mode 100644
index 00000000000..46d65007c88
--- /dev/null
+++ b/tests/basic/afr/heal-info.t
@@ -0,0 +1,36 @@
+#!/bin/bash
+#Test that parallel heal-info command execution doesn't result in spurious
+#entries with locking-scheme granular
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+function heal_info_to_file {
+ while [ -f $M0/b.txt ]; do
+ $CLI volume heal $V0 info | grep -i number | grep -v 0 >> $1
+ done
+}
+
+function write_and_del_file {
+ dd of=$M0/a.txt if=/dev/zero bs=1024k count=100
+ rm -f $M0/b.txt
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1}
+TEST $CLI volume set $V0 locking-scheme granular
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+TEST touch $M0/a.txt $M0/b.txt
+write_and_del_file &
+touch $B0/f1 $B0/f2
+heal_info_to_file $B0/f1 &
+heal_info_to_file $B0/f2 &
+wait
+EXPECT "^$" cat $B0/f1
+EXPECT "^$" cat $B0/f2
+
+cleanup;
diff --git a/tests/basic/afr/heal-quota.t b/tests/basic/afr/heal-quota.t
new file mode 100644
index 00000000000..96e23363da8
--- /dev/null
+++ b/tests/basic/afr/heal-quota.t
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+#This file tests that heal succeeds even when quota is exceeded
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+TEST $CLI volume quota $V0 enable
+TEST $CLI volume quota $V0 limit-usage / 10MB
+TEST $CLI volume quota $V0 soft-timeout 0
+TEST $CLI volume quota $V0 hard-timeout 0
+
+TEST touch $M0/a $M0/b
+dd if=/dev/zero of=$M0/b bs=1M count=7
+TEST kill_brick $V0 $H0 $B0/${V0}0
+dd if=/dev/zero of=$M0/a bs=1M count=12 #This shall fail
+TEST $CLI volume start $V0 force
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+
+cleanup
diff --git a/tests/basic/afr/inodelk.t b/tests/basic/afr/inodelk.t
new file mode 100644
index 00000000000..a32aa8531b5
--- /dev/null
+++ b/tests/basic/afr/inodelk.t
@@ -0,0 +1,87 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+#This test tests that inodelk fails when quorum is not met. Also tests the
+#success case where inodelk is obtained and unlocks are done correctly.
+
+TEST glusterd;
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 3 arbiter 1 $H0:$B0/${V0}{0..5}
+TEST $CLI volume start $V0
+TEST $GFS -s $H0 --volfile-id=$V0 $M0
+
+#Test success case
+TEST mkdir $M0/dir1
+TEST mv $M0/dir1 $M0/dir2
+
+#If there is a problem with inodelk unlocking the following would hang.
+TEST mv $M0/dir2 $M0/dir1
+
+#Test failure case by bringing two of the bricks down
+#Test that the directory is not moved partially on some bricks but successful
+#on other subvol where quorum meets. Do that for both set of bricks
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST ! mv $M0/dir1 $M0/dir2
+
+TEST stat $B0/${V0}0/dir1
+TEST stat $B0/${V0}1/dir1
+TEST stat $B0/${V0}2/dir1
+TEST stat $B0/${V0}3/dir1
+TEST stat $B0/${V0}4/dir1
+TEST stat $B0/${V0}5/dir1
+TEST ! stat $B0/${V0}0/dir2
+TEST ! stat $B0/${V0}1/dir2
+TEST ! stat $B0/${V0}2/dir2
+TEST ! stat $B0/${V0}3/dir2
+TEST ! stat $B0/${V0}4/dir2
+TEST ! stat $B0/${V0}5/dir2
+
+TEST $CLI volume start $V0 force
+TEST kill_brick $V0 $H0 $B0/${V0}3
+TEST kill_brick $V0 $H0 $B0/${V0}4
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST ! mv $M0/dir1 $M0/dir2
+TEST stat $B0/${V0}0/dir1
+TEST stat $B0/${V0}1/dir1
+TEST stat $B0/${V0}2/dir1
+TEST stat $B0/${V0}3/dir1
+TEST stat $B0/${V0}4/dir1
+TEST stat $B0/${V0}5/dir1
+TEST ! stat $B0/${V0}0/dir2
+TEST ! stat $B0/${V0}1/dir2
+TEST ! stat $B0/${V0}2/dir2
+TEST ! stat $B0/${V0}3/dir2
+TEST ! stat $B0/${V0}4/dir2
+TEST ! stat $B0/${V0}5/dir2
+
+#Bring the bricks back up and try mv once more, it should succeed.
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 3
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 4
+TEST mv $M0/dir1 $M0/dir2
+cleanup;
+#Do similar tests on replica 2
+TEST glusterd;
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0..3}
+TEST $CLI volume start $V0
+TEST $GFS -s $H0 --volfile-id=$V0 $M0
+TEST mkdir $M0/dir1
+TEST mv $M0/dir1 $M0/dir2
+#Because we don't know hashed subvol, do the same test twice bringing 1 brick
+#from each down, quorum calculation should allow it.
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST mv $M0/dir2 $M0/dir1
+TEST $CLI volume start $V0 force
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST mv $M0/dir1 $M0/dir2
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST mv $M0/dir2 $M0/dir1
+cleanup
diff --git a/tests/basic/afr/lk-quorum.t b/tests/basic/afr/lk-quorum.t
new file mode 100644
index 00000000000..3364d8a6a1b
--- /dev/null
+++ b/tests/basic/afr/lk-quorum.t
@@ -0,0 +1,257 @@
+#!/bin/bash
+
+SCRIPT_TIMEOUT=300
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fileio.rc
+cleanup;
+
+TEST glusterd;
+TEST pidof glusterd
+
+#Tests for quorum-type option for replica 2
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1};
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume start $V0
+TEST $GFS -s $H0 --volfile-id=$V0 --direct-io-mode=enable $M0;
+
+TEST touch $M0/a
+
+#When all bricks are up, lock and unlock should succeed
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST flock -x $fd1
+TEST fd_close $fd1
+
+#When all bricks are down, lock/unlock should fail
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST $CLI volume stop $V0
+TEST ! flock -x $fd1
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST fd_close $fd1
+
+#Check locking behavior with quorum 'fixed' and quorum-count 2
+TEST $CLI volume set $V0 cluster.quorum-type fixed
+TEST $CLI volume set $V0 cluster.quorum-count 2
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^fixed$" mount_get_option_value $M0 $V0-replicate-0 quorum-type
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^2$" mount_get_option_value $M0 $V0-replicate-0 quorum-count
+
+#When all bricks are up, lock and unlock should succeed
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST flock -x $fd1
+TEST fd_close $fd1
+
+#When all bricks are down, lock/unlock should fail
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST $CLI volume stop $V0
+TEST ! flock -x $fd1
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST fd_close $fd1
+
+#When any of the bricks is down lock/unlock should fail
+#kill first brick
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST ! flock -x $fd1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST fd_close $fd1
+
+#kill 2nd brick
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST ! flock -x $fd1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST fd_close $fd1
+
+#Check locking behavior with quorum 'fixed' and quorum-count 1
+TEST $CLI volume set $V0 cluster.quorum-count 1
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^1$" mount_get_option_value $M0 $V0-replicate-0 quorum-count
+
+#When all bricks are up, lock and unlock should succeed
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST flock -x $fd1
+TEST fd_close $fd1
+
+#When all bricks are down, lock/unlock should fail
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST $CLI volume stop $V0
+TEST ! flock -x $fd1
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST fd_close $fd1
+
+#When any of the bricks is down lock/unlock should succeed
+#kill first brick
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST flock -x $fd1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST fd_close $fd1
+
+#kill 2nd brick
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST flock -x $fd1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST fd_close $fd1
+
+#Check locking behavior with quorum 'auto'
+TEST $CLI volume set $V0 cluster.quorum-type auto
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^auto$" mount_get_option_value $M0 $V0-replicate-0 quorum-type
+
+#When all bricks are up, lock and unlock should succeed
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST flock -x $fd1
+TEST fd_close $fd1
+
+#When all bricks are down, lock/unlock should fail
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST $CLI volume stop $V0
+TEST ! flock -x $fd1
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST fd_close $fd1
+
+#When first brick is down lock/unlock should fail
+#kill first brick
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST ! flock -x $fd1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST fd_close $fd1
+
+#When second brick is down lock/unlock should succeed
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST flock -x $fd1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST fd_close $fd1
+
+cleanup;
+TEST glusterd;
+TEST pidof glusterd
+
+#Tests for replica 3
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2};
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume start $V0
+TEST $GFS -s $H0 --volfile-id=$V0 --direct-io-mode=enable $M0;
+
+TEST touch $M0/a
+
+#When all bricks are up, lock and unlock should succeed
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST flock -x $fd1
+TEST fd_close $fd1
+
+#When all bricks are down, lock/unlock should fail
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST $CLI volume stop $V0
+TEST ! flock -x $fd1
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+TEST fd_close $fd1
+
+#When any of the bricks is down lock/unlock should succeed
+#kill first brick
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST flock -x $fd1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST fd_close $fd1
+
+#kill 2nd brick
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST flock -x $fd1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST fd_close $fd1
+
+#kill 3rd brick
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST flock -x $fd1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+TEST fd_close $fd1
+
+#When any two of the bricks are down lock/unlock should fail
+#kill first,second bricks
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST ! flock -x $fd1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST fd_close $fd1
+
+#kill 2nd,3rd bricks
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST ! flock -x $fd1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+TEST fd_close $fd1
+
+#kill 1st,3rd brick
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST ! flock -x $fd1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST fd_close $fd1
+
+cleanup
diff --git a/tests/basic/afr/metadata-self-heal.t b/tests/basic/afr/metadata-self-heal.t
new file mode 100644
index 00000000000..275aecd2175
--- /dev/null
+++ b/tests/basic/afr/metadata-self-heal.t
@@ -0,0 +1,133 @@
+#!/bin/bash
+#Self-heal tests
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+function is_heal_done {
+ local f1_path="${1}/${3}"
+ local f2_path="${2}/${3}"
+ local zero_xattr="000000000000000000000000"
+ local iatt1=$(stat -c "%g:%u:%A" $f1_path)
+ local iatt2=$(stat -c "%g:%u:%A" $f2_path)
+ local diff="1"
+ if [ "$iatt1" == "$iatt2" ]; then diff=0; fi
+ local xattr11=$(get_hex_xattr trusted.afr.$V0-client-0 $f1_path)
+ local xattr12=$(get_hex_xattr trusted.afr.$V0-client-1 $f1_path)
+ local xattr21=$(get_hex_xattr trusted.afr.$V0-client-0 $f2_path)
+ local xattr22=$(get_hex_xattr trusted.afr.$V0-client-1 $f2_path)
+ local dirty1=$(get_hex_xattr trusted.afr.dirty $f1_path)
+ local dirty2=$(get_hex_xattr trusted.afr.dirty $f2_path)
+ if [ -z $xattr11 ]; then xattr11="000000000000000000000000"; fi
+ if [ -z $xattr12 ]; then xattr12="000000000000000000000000"; fi
+ if [ -z $xattr21 ]; then xattr21="000000000000000000000000"; fi
+ if [ -z $xattr22 ]; then xattr22="000000000000000000000000"; fi
+ if [ -z $dirty1 ]; then dirty1="000000000000000000000000"; fi
+ if [ -z $dirty2 ]; then dirty2="000000000000000000000000"; fi
+ if [ "${diff}${xattr11}${xattr12}${xattr21}${xattr22}${dirty1}${dirty2}" == "0${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" ];
+ then
+ echo "Y"
+ else
+ echo "N"
+ fi
+}
+
+function print_pending_heals {
+ local result=":"
+ for i in "$@";
+ do
+ if [ "N" == $(is_heal_done $B0/brick0 $B0/brick1 $i) ];
+ then
+ result="$result:$i"
+ fi
+ done
+#To prevent any match for EXPECT_WITHIN, print a char non-existent in file-names
+ if [ $result == ":" ]; then result="~"; fi
+ echo $result
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1}
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+cd $M0
+
+TEST touch a
+
+#Test heal with pending xattrs
+TEST kill_brick $V0 $H0 $B0/brick0
+TEST chmod 777 a
+TEST chown 100:100 a
+TEST setfattr -n trusted.abc -v 0x616263 a
+TEST setfattr -n trusted.def -v 0x646566 a
+permissions=$(stat -c "%A" a)
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $HEAL_TIMEOUT "~" print_pending_heals a
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+EXPECT $permissions stat -c "%A" a
+EXPECT $permissions stat -c "%A" $B0/brick0/a
+EXPECT $permissions stat -c "%A" $B0/brick1/a
+
+EXPECT 100 stat -c "%g" a
+EXPECT 100 stat -c "%g" $B0/brick0/a
+EXPECT 100 stat -c "%g" $B0/brick1/a
+
+EXPECT 100 stat -c "%u" a
+EXPECT 100 stat -c "%u" $B0/brick0/a
+EXPECT 100 stat -c "%u" $B0/brick1/a
+
+EXPECT 616263 get_hex_xattr trusted.abc a
+EXPECT 616263 get_hex_xattr trusted.abc $B0/brick0/a
+EXPECT 616263 get_hex_xattr trusted.abc $B0/brick1/a
+
+EXPECT 646566 get_hex_xattr trusted.def a
+EXPECT 646566 get_hex_xattr trusted.def $B0/brick0/a
+EXPECT 646566 get_hex_xattr trusted.def $B0/brick1/a
+
+TEST kill_brick $V0 $H0 $B0/brick1
+TEST setfattr -n trusted.abc -v 0x646566 a
+TEST setfattr -x trusted.def a
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $HEAL_TIMEOUT "~" print_pending_heals a
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+EXPECT 646566 get_hex_xattr trusted.abc a
+EXPECT 646566 get_hex_xattr trusted.abc $B0/brick0/a
+EXPECT 646566 get_hex_xattr trusted.abc $B0/brick1/a
+
+TEST ! getfattr -n trusted.def a
+TEST ! getfattr -n trusted.def $B0/brick0/a
+TEST ! getfattr -n trusted.def $B0/brick1/a
+
+#Test split-brain && iatt mismatch without any xattrs (this will be simulated)
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST touch b
+TEST touch c
+TEST kill_brick $V0 $H0 $B0/brick0
+TEST chmod 777 b
+TEST chmod 777 c
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST kill_brick $V0 $H0 $B0/brick1
+TEST chown 100:100 b
+TEST chown 100:100 c
+TEST $CLI volume stop $V0
+TEST setfattr -x trusted.afr.$V0-client-0 $B0/brick1/c
+TEST setfattr -x trusted.afr.$V0-client-1 $B0/brick0/c
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0 full
+EXPECT_WITHIN $HEAL_TIMEOUT "~" print_pending_heals c
+EXPECT "N" is_heal_done $B0/brick0 $B0/brick1 b
+
+cd -
+cleanup;
diff --git a/tests/basic/afr/name-self-heal.t b/tests/basic/afr/name-self-heal.t
new file mode 100644
index 00000000000..50fc2ecc6c2
--- /dev/null
+++ b/tests/basic/afr/name-self-heal.t
@@ -0,0 +1,112 @@
+#!/bin/bash
+#Self-heal tests
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+cleanup;
+
+#Check that when quorum is not enabled name-heal happens correctly
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1}
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume start $V0
+TEST $CLI volume heal $V0 disable
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+TEST touch $M0/a
+TEST touch $M0/c
+TEST kill_brick $V0 $H0 $B0/brick0
+TEST touch $M0/b
+TEST rm -f $M0/a
+TEST rm -f $M0/c
+TEST touch $M0/c #gfid mismatch case
+c_gfid=$(gf_get_gfid_xattr $B0/brick1/c)
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST ! stat $M0/a
+TEST ! stat $B0/brick0/a
+TEST ! stat $B0/brick1/a
+
+TEST stat $M0/b
+TEST stat $B0/brick0/b
+TEST stat $B0/brick1/b
+TEST [[ "$(gf_get_gfid_xattr $B0/brick0/b)" == "$(gf_get_gfid_xattr $B0/brick1/b)" ]]
+
+TEST stat $M0/c
+TEST stat $B0/brick0/c
+TEST stat $B0/brick1/c
+TEST [[ "$(gf_get_gfid_xattr $B0/brick0/c)" == "$c_gfid" ]]
+
+cleanup;
+
+#Check that when quorum is enabled name-heal happens as expected
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/brick{0,1,2}
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume start $V0
+TEST $CLI volume heal $V0 disable
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+TEST touch $M0/a
+TEST touch $M0/c
+TEST kill_brick $V0 $H0 $B0/brick0
+TEST touch $M0/b
+TEST rm -f $M0/a
+TEST rm -f $M0/c
+TEST touch $M0/c #gfid mismatch case
+c_gfid=$(gf_get_gfid_xattr $B0/brick1/c)
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST ! stat $M0/a
+TEST ! stat $B0/brick0/a
+TEST ! stat $B0/brick1/a
+TEST ! stat $B0/brick2/a
+
+TEST stat $M0/b
+TEST ! stat $B0/brick0/b #Name heal shouldn't be triggered
+TEST stat $B0/brick1/b
+TEST stat $B0/brick2/b
+
+TEST stat $M0/c
+TEST stat $B0/brick0/c
+TEST stat $B0/brick1/c
+TEST stat $B0/brick2/c
+TEST [[ "$(gf_get_gfid_xattr $B0/brick0/c)" == "$c_gfid" ]]
+
+TEST $CLI volume set $V0 cluster.quorum-type none
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "none" get_quorum_type $M0 $V0 0
+TEST stat $M0/b
+TEST stat $B0/brick0/b #Name heal should be triggered
+TEST stat $B0/brick1/b
+TEST stat $B0/brick2/b
+TEST [[ "$(gf_get_gfid_xattr $B0/brick0/b)" == "$(gf_get_gfid_xattr $B0/brick1/b)" ]]
+TEST $CLI volume set $V0 cluster.quorum-type auto
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "auto" get_quorum_type $M0 $V0 0
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+#Missing parent xattrs cases
+TEST $CLI volume heal $V0 enable
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+TEST $CLI volume heal $V0 disable
+#In cases where a good parent doesn't have pending xattrs and a file,
+#name-heal will be triggered
+TEST gf_rm_file_and_gfid_link $B0/brick1 c
+TEST stat $M0/c
+TEST stat $B0/brick0/c
+TEST stat $B0/brick1/c
+TEST stat $B0/brick2/c
+TEST [[ "$(gf_get_gfid_xattr $B0/brick0/c)" == "$c_gfid" ]]
+cleanup
diff --git a/tests/basic/afr/quorum.t b/tests/basic/afr/quorum.t
new file mode 100644
index 00000000000..58116ba49f5
--- /dev/null
+++ b/tests/basic/afr/quorum.t
@@ -0,0 +1,97 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+TEST glusterd;
+TEST pidof glusterd
+
+function test_write {
+ dd of=$M0/a if=/dev/urandom bs=1k count=1 2>&1 > /dev/null
+}
+
+#Tests for quorum-type option for replica 2
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2};
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume start $V0
+TEST $GFS -s $H0 --volfile-id=$V0 --direct-io-mode=enable $M0;
+
+touch $M0/a
+echo abc > $M0/b
+
+TEST ! $CLI volume set $V0 cluster.quorum-type ""
+TEST $CLI volume set $V0 cluster.quorum-type fixed
+EXPECT fixed volume_option $V0 cluster.quorum-type
+TEST $CLI volume set $V0 cluster.quorum-count 2
+TEST test_write
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST ! test_write
+TEST ! cat $M0/b
+
+TEST $CLI volume set $V0 cluster.quorum-type auto
+EXPECT auto volume_option $V0 cluster.quorum-type
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST test_write
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST ! test_write
+TEST ! cat $M0/b
+
+TEST $CLI volume set $V0 cluster.quorum-type none
+EXPECT none volume_option $V0 cluster.quorum-type
+TEST test_write
+#Default is 'none' for even number of bricks in replication
+TEST $CLI volume reset $V0 cluster.quorum-type
+TEST test_write
+EXPECT "abc" cat $M0/b
+
+cleanup;
+TEST glusterd;
+TEST pidof glusterd
+
+#Tests for quorum-type option for replica 3
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3};
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume start $V0
+TEST $GFS -s $H0 --volfile-id=$V0 --direct-io-mode=enable $M0;
+
+touch $M0/a
+echo abc > $M0/b
+
+TEST $CLI volume set $V0 cluster.quorum-type fixed
+EXPECT fixed volume_option $V0 cluster.quorum-type
+TEST $CLI volume set $V0 cluster.quorum-count 3
+TEST test_write
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST ! test_write
+TEST ! cat $M0/b
+
+TEST $CLI volume set $V0 cluster.quorum-type auto
+EXPECT auto volume_option $V0 cluster.quorum-type
+TEST test_write
+TEST kill_brick $V0 $H0 $B0/${V0}3
+TEST ! test_write
+TEST ! cat $M0/b
+
+TEST $CLI volume set $V0 cluster.quorum-type none
+EXPECT none volume_option $V0 cluster.quorum-type
+TEST test_write
+#Default is 'auto' for odd number of bricks in replication
+TEST $CLI volume reset $V0 cluster.quorum-type
+EXPECT "^$" volume_option $V0 cluster.quorum-type
+TEST ! test_write
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+TEST kill_brick $V0 $H0 $B0/${V0}3
+TEST test_write
+cleanup;
diff --git a/tests/basic/afr/read-subvol-data.t b/tests/basic/afr/read-subvol-data.t
new file mode 100644
index 00000000000..39f43a15028
--- /dev/null
+++ b/tests/basic/afr/read-subvol-data.t
@@ -0,0 +1,33 @@
+#!/bin/bash
+#Test if the source is selected based on data transaction for a regular file.
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+#Init
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1}
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 stat-prefetch off
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+#Test
+TEST $CLI volume set $V0 cluster.read-subvolume $V0-client-1
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST dd if=/dev/urandom of=$M0/afr_success_5.txt bs=1024k count=1
+TEST kill_brick $V0 $H0 $B0/brick0
+TEST dd if=/dev/urandom of=$M0/afr_success_5.txt bs=1024k count=10
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "10485760" echo `ls -l $M0/afr_success_5.txt | awk '{ print $5}'`
+
+#Cleanup
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+TEST rm -rf $B0/*
diff --git a/tests/basic/afr/read-subvol-entry.t b/tests/basic/afr/read-subvol-entry.t
new file mode 100644
index 00000000000..76b2dcf85b0
--- /dev/null
+++ b/tests/basic/afr/read-subvol-entry.t
@@ -0,0 +1,35 @@
+#!/bin/bash
+#Test if the read child is selected based on entry transaction for directory
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+#Init
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1}
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 stat-prefetch off
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+#Test
+TEST mkdir -p $M0/abc/def
+
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+
+TEST kill_brick $V0 $H0 $B0/brick0
+
+TEST touch $M0/abc/def/ghi
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "ghi" echo `ls $M0/abc/def/`
+
+#Cleanup
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+TEST rm -rf $B0/*
diff --git a/tests/basic/afr/rename-data-loss.t b/tests/basic/afr/rename-data-loss.t
new file mode 100644
index 00000000000..256ee2aafce
--- /dev/null
+++ b/tests/basic/afr/rename-data-loss.t
@@ -0,0 +1,72 @@
+#!/bin/bash
+#Self-heal tests
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1}
+TEST $CLI volume set $V0 write-behind off
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 data-self-heal off
+TEST $CLI volume set $V0 metadata-self-heal off
+TEST $CLI volume set $V0 entry-self-heal off
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+cd $M0
+TEST `echo "line1" >> file1`
+TEST mkdir dir1
+TEST mkdir dir2
+TEST mkdir -p dir1/dira/dirb
+TEST `echo "line1">>dir1/dira/dirb/file1`
+TEST mkdir delete_me
+TEST `echo "line1" >> delete_me/file1`
+
+#brick0 has witnessed the second write while brick1 is down.
+TEST kill_brick $V0 $H0 $B0/brick1
+TEST `echo "line2" >> file1`
+TEST `echo "line2" >> dir1/dira/dirb/file1`
+TEST `echo "line2" >> delete_me/file1`
+
+#Toggle the bricks that are up/down.
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST kill_brick $V0 $H0 $B0/brick0
+
+#Rename when the 'source' brick0 for data-selfheals is down.
+mv file1 file2
+mv dir1/dira dir2
+
+#Delete a dir when brick0 is down.
+rm -rf delete_me
+cd -
+
+#Bring everything up and trigger heal
+TEST $CLI volume set $V0 self-heal-daemon on
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/brick0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/brick1
+
+#Remount to avoid reading from caches
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
+EXPECT "line2" tail -1 $M0/file2
+EXPECT "line2" tail -1 $M0/dir2/dira/dirb/file1
+TEST ! stat $M0/delete_me/file1
+TEST ! stat $M0/delete_me
+
+anon_inode_name=$(ls -a $B0/brick0 | grep glusterfs-anonymous-inode)
+TEST [[ -d $B0/brick0/$anon_inode_name ]]
+TEST [[ -d $B0/brick1/$anon_inode_name ]]
+cleanup
diff --git a/tests/basic/afr/replace-brick-self-heal.t b/tests/basic/afr/replace-brick-self-heal.t
new file mode 100644
index 00000000000..0360db71a2f
--- /dev/null
+++ b/tests/basic/afr/replace-brick-self-heal.t
@@ -0,0 +1,64 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume set $V0 cluster.heal-timeout 5
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+# Create files
+for i in {1..5}
+do
+ echo $i > $M0/file$i.txt
+done
+
+# Metadata changes
+TEST setfattr -n user.test -v qwerty $M0/file5.txt
+
+# Replace brick1
+TEST $CLI volume replace-brick $V0 $H0:$B0/${V0}1 $H0:$B0/${V0}1_new commit force
+
+# Replaced-brick should accuse the non-replaced-brick (Simulating case for data-loss)
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}1_new/
+
+# Check if pending xattr and dirty-xattr are set for replaced-brick
+EXPECT "000000000000000100000001" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0
+EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.dirty $B0/${V0}1_new
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+TEST $CLI volume set $V0 self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+
+# Wait for heal to complete
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+
+# Check if entry-heal has happened
+TEST diff <(ls $B0/${V0}0 | sort) <(ls $B0/${V0}1_new | sort)
+
+# To make sure that files were not lost from brick0
+TEST diff <(ls $B0/${V0}0 | sort) <(ls $B0/${V0}1 | sort)
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0
+
+# Test if data was healed
+TEST diff $B0/${V0}0/file1.txt $B0/${V0}1_new/file1.txt
+# To make sure that data was not lost from brick0
+TEST diff $B0/${V0}0/file1.txt $B0/${V0}1/file1.txt
+
+# Test if metadata was healed and exists on both the bricks
+EXPECT "qwerty" get_text_xattr user.test $B0/${V0}1_new/file5.txt
+EXPECT "qwerty" get_text_xattr user.test $B0/${V0}0/file5.txt
+
+cleanup;
diff --git a/tests/basic/afr/resolve.t b/tests/basic/afr/resolve.t
new file mode 100644
index 00000000000..a741eee6e5e
--- /dev/null
+++ b/tests/basic/afr/resolve.t
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+#Check that operations succeed after changing the disk of the brick while
+#a brick is down
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M1;
+TEST cd $M0
+TEST mkdir -p a/b/c/d/e
+TEST cd a/b/c/d/e
+echo abc > g
+
+#Simulate disk replacement
+TEST kill_brick $V0 $H0 $B0/${V0}0
+rm -rf $B0/${V0}0/.glusterfs $B0/${V0}0/a
+
+#Ideally, disk replacement is done using reset-brick or replace-brick gluster CLI
+#which will create .glusterfs folder.
+mkdir $B0/${V0}0/.glusterfs && chmod 600 $B0/${V0}0/.glusterfs
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
+#Test that the lookup returns ENOENT instead of ESTALE
+#If lookup returns ESTALE this command will fail with ESTALE
+TEST touch f
+
+#Test that ESTALE is ignored when there is a good copy
+EXPECT abc cat g
+
+#Simulate file changing only one mount
+#create the file on first mount
+echo ghi > $M0/b
+
+#re-create the file on other mount while one of the bricks is down.
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST rm -f $M1/b
+echo jkl > $M1/b
+#Clear the extended attributes on the directory to create a scenario where
+#gfid-mismatch happened. This should result in EIO
+TEST setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
+# The kernel knows nothing about the tricks done to the volume, and the file
+# may still be in page cache. Wait for timeout.
+EXPECT_WITHIN 10 "^$" cat $M0/b
+cleanup
diff --git a/tests/basic/afr/root-squash-self-heal.t b/tests/basic/afr/root-squash-self-heal.t
new file mode 100644
index 00000000000..6e12098465a
--- /dev/null
+++ b/tests/basic/afr/root-squash-self-heal.t
@@ -0,0 +1,27 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 server.root-squash on
+TEST $CLI volume set $V0 cluster.data-self-heal on
+TEST $CLI volume set $V0 cluster.metadata-self-heal on
+TEST $CLI volume set $V0 cluster.entry-self-heal on
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --no-root-squash=yes --use-readdirp=no $M0
+TEST kill_brick $V0 $H0 $B0/${V0}0
+echo abc > $M0/a
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+find $M0 | xargs stat > /dev/null
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+
+cleanup
diff --git a/tests/basic/afr/self-heal.t b/tests/basic/afr/self-heal.t
new file mode 100644
index 00000000000..10fb152d046
--- /dev/null
+++ b/tests/basic/afr/self-heal.t
@@ -0,0 +1,244 @@
+#!/bin/bash
+#Self-heal tests
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+#Init
+AREQUAL_PATH=$(dirname $0)/../../utils
+AREQUAL_BIN=$AREQUAL_PATH/arequal-checksum
+CFLAGS=""
+test "`uname -s`" != "Linux" && {
+ CFLAGS="$CFLAGS -lintl";
+}
+build_tester $AREQUAL_PATH/arequal-checksum.c $CFLAGS
+TEST [ -e $AREQUAL_BIN ]
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1}
+TEST $CLI volume set $V0 stat-prefetch off
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+###############################################################################
+#1.Test successful data, metadata and entry self-heal
+
+#Test
+TEST mkdir -p $M0/abc/def $M0/abc/ghi
+TEST dd if=/dev/urandom of=$M0/abc/file_abc.txt bs=1024k count=2 2>/dev/null
+TEST dd if=/dev/urandom of=$M0/abc/def/file_abc_def_1.txt bs=1024k count=2 2>/dev/null
+TEST dd if=/dev/urandom of=$M0/abc/def/file_abc_def_2.txt bs=1024k count=3 2>/dev/null
+TEST dd if=/dev/urandom of=$M0/abc/ghi/file_abc_ghi.txt bs=1024k count=4 2>/dev/null
+
+TEST kill_brick $V0 $H0 $B0/brick0
+TEST truncate -s 0 $M0/abc/def/file_abc_def_1.txt
+NEW_UID=36
+NEW_GID=36
+TEST chown $NEW_UID:$NEW_GID $M0/abc/def/file_abc_def_2.txt
+TEST rm -rf $M0/abc/ghi
+TEST mkdir -p $M0/def/ghi $M0/jkl/mno
+TEST dd if=/dev/urandom of=$M0/def/ghi/file1.txt bs=1024k count=2 2>/dev/null
+TEST dd if=/dev/urandom of=$M0/def/ghi/file2.txt bs=1024k count=3 2>/dev/null
+TEST dd if=/dev/urandom of=$M0/jkl/mno/file.txt bs=1024k count=4 2>/dev/null
+TEST chown $NEW_UID:$NEW_GID $M0/def/ghi/file2.txt
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+
+#check all files created/deleted on brick1 are also replicated on brick 0
+#(i.e. no reverse heal has happened)
+TEST ls $B0/brick0/def/ghi/file1.txt
+TEST ls $B0/brick0/def/ghi/file2.txt
+TEST ls $B0/brick0/jkl/mno/file.txt
+TEST ! ls $B0/brick0/abc/ghi
+EXPECT "$NEW_UID$NEW_GID" stat -c %u%g $B0/brick0/abc/def/file_abc_def_2.txt
+TEST diff <($AREQUAL_BIN -p $B0/brick0 -i .glusterfs) <($AREQUAL_BIN -p $B0/brick1 -i .glusterfs)
+
+#Cleanup
+TEST rm -rf $M0/*
+###############################################################################
+
+#2.Test successful self-heal of different file types.
+
+#Test
+TEST touch $M0/file
+TEST kill_brick $V0 $H0 $B0/brick0
+TEST rm -f $M0/file
+TEST mkdir $M0/file
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+
+#check heal has happened in the correct direction
+TEST test -d $B0/brick0/file
+TEST diff <($AREQUAL_BIN -p $B0/brick0 -i .glusterfs) <($AREQUAL_BIN -p $B0/brick1 -i .glusterfs)
+
+#Cleanup
+TEST rm -rf $M0/*
+###############################################################################
+
+#3.Test successful self-heal of file permissions.
+
+#Test
+TEST touch $M0/file
+TEST chmod 666 $M0/file
+TEST kill_brick $V0 $H0 $B0/brick0
+TEST chmod 777 $M0/file
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+
+#check heal has happened in the correct direction
+EXPECT "777" stat -c %a $B0/brick0/file
+TEST diff <($AREQUAL_BIN -p $B0/brick0 -i .glusterfs) <($AREQUAL_BIN -p $B0/brick1 -i .glusterfs)
+
+#Cleanup
+TEST rm -rf $M0/*
+###############################################################################
+
+#4.Test successful self-heal of file ownership
+
+#Test
+TEST touch $M0/file
+TEST kill_brick $V0 $H0 $B0/brick0
+NEW_UID=36
+NEW_GID=36
+TEST chown $NEW_UID:$NEW_GID $M0/file
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+
+#check heal has happened in the correct direction
+EXPECT "$NEW_UID$NEW_GID" stat -c %u%g $B0/brick0/file
+TEST diff <($AREQUAL_BIN -p $B0/brick0 -i .glusterfs) <($AREQUAL_BIN -p $B0/brick1 -i .glusterfs)
+
+#Cleanup
+TEST rm -rf $M0/*
+###############################################################################
+
+#5.File size test
+
+#Test
+TEST touch $M0/file
+TEST `echo "write1">$M0/file`
+TEST kill_brick $V0 $H0 $B0/brick0
+TEST `echo "write2">>$M0/file`
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+TEST kill_brick $V0 $H0 $B0/brick1
+TEST truncate -s 0 $M0/file
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+
+#check heal has happened in the correct direction
+EXPECT 0 stat -c %s $B0/brick1/file
+TEST diff <($AREQUAL_BIN -p $B0/brick0 -i .glusterfs) <($AREQUAL_BIN -p $B0/brick1 -i .glusterfs)
+
+#Cleanup
+TEST rm -rf $M0/*
+###############################################################################
+
+#6.GFID heal
+
+#Test
+TEST touch $M0/file
+TEST kill_brick $V0 $H0 $B0/brick0
+TEST rm -f $M0/file
+TEST touch $M0/file
+GFID=$(gf_get_gfid_xattr $B0/brick1/file)
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+
+#check heal has happened in the correct direction
+EXPECT "$GFID" gf_get_gfid_xattr $B0/brick0/file
+
+#Cleanup
+TEST rm -rf $M0/*
+###############################################################################
+
+#7. Link/symlink heal
+
+#Test
+TEST touch $M0/file
+TEST ln $M0/file $M0/link_to_file
+TEST kill_brick $V0 $H0 $B0/brick0
+TEST rm -f $M0/link_to_file
+TEST ln -s $M0/file $M0/link_to_file
+TEST ln $M0/file $M0/hard_link_to_file
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+
+#check heal has happened in the correct direction
+TEST test -f $B0/brick0/hard_link_to_file
+TEST test -h $B0/brick0/link_to_file
+TEST diff <($AREQUAL_BIN -p $B0/brick0 -i .glusterfs) <($AREQUAL_BIN -p $B0/brick1 -i .glusterfs)
+
+#Cleanup
+TEST rm -rf $M0/*
+###############################################################################
+
+#8. Heal xattrs set by application
+
+#Test
+TEST touch $M0/file
+TEST setfattr -n user.myattr_1 -v My_attribute_1 $M0/file
+TEST setfattr -n user.myattr_2 -v "My_attribute_2" $M0/file
+TEST kill_brick $V0 $H0 $B0/brick0
+TEST setfattr -n user.myattr_1 -v "My_attribute_1_modified" $M0/file
+TEST setfattr -n user.myattr_3 -v "My_attribute_3" $M0/file
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+
+TEST diff <(echo "user.myattr_1=\"My_attribute_1_modified\"") <(getfattr -n user.myattr_1 $B0/brick1/file|grep user.myattr_1)
+TEST diff <(echo "user.myattr_3=\"My_attribute_3\"") <(getfattr -n user.myattr_3 $B0/brick1/file|grep user.myattr_3)
+
+#Cleanup
+TEST rm -rf $M0/*
+###############################################################################
+
+TEST rm -rf $AREQUAL_BIN
+cleanup;
diff --git a/tests/basic/afr/self-heald.t b/tests/basic/afr/self-heald.t
new file mode 100644
index 00000000000..24c82777921
--- /dev/null
+++ b/tests/basic/afr/self-heald.t
@@ -0,0 +1,193 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+START_TIMESTAMP=`date +%s`
+
+function kill_multiple_bricks {
+ local vol=$1
+ local host=$2
+ local brickpath=$3
+
+ if [ $decide_kill == 0 ]
+ then
+ for ((i=0; i<=4; i=i+2)) do
+ TEST kill_brick $vol $host $brickpath/${vol}$i
+ done
+ else
+ for ((i=1; i<=5; i=i+2)) do
+ TEST kill_brick $vol $host $brickpath/${vol}$i
+ done
+ fi
+}
+function check_bricks_up {
+ local vol=$1
+ if [ $decide_kill == 0 ]
+ then
+ for ((i=0; i<=4; i=i+2)) do
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_in_shd $vol $i
+ done
+ else
+ for ((i=1; i<=5; i=i+2)) do
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_in_shd $vol $i
+ done
+ fi
+}
+
+function disconnected_brick_count {
+ local vol=$1
+ $CLI volume heal $vol info | \
+ egrep -i '(transport|Socket is not connected)' | wc -l
+}
+
+TESTS_EXPECTED_IN_LOOP=20
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1,2,3,4,5}
+TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+TEST $CLI volume set $V0 cluster.eager-lock off
+TEST $CLI volume set $V0 performance.flush-behind off
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+decide_kill=$((`date +"%j"|sed 's/^0*//'` % 2 ))
+
+kill_multiple_bricks $V0 $H0 $B0
+cd $M0
+HEAL_FILES=0
+for i in {1..10}
+do
+ dd if=/dev/urandom of=f bs=1024k count=10
+ HEAL_FILES=$(($HEAL_FILES+1)) #+1 for data/metadata self-heal of 'f'
+ mkdir a; cd a;
+ #+3 for metadata self-heal of 'a' one per subvolume of DHT
+ HEAL_FILES=$(($HEAL_FILES+3))
+done
+#+3 represents entry sh on "/", one per subvolume of DHT?
+HEAL_FILES=$(($HEAL_FILES + 3))
+
+cd ~
+EXPECT "$HEAL_FILES" get_pending_heal_count $V0
+
+#When bricks are down, it says Transport End point Not connected for them
+EXPECT "3" disconnected_brick_count $V0
+
+#Create some stale indices and verify that they are not counted in heal info
+#TO create stale index create and delete files when one brick is down in
+#replica pair.
+for i in {11..20}; do echo abc > $M0/$i; done
+HEAL_FILES=$(($HEAL_FILES + 10)) #count extra 10 files
+EXPECT "$HEAL_FILES" get_pending_heal_count $V0
+#delete the files now, so that stale indices will remain.
+for i in {11..20}; do rm -f $M0/$i; done
+#After deleting files they should not appear in heal info
+HEAL_FILES=$(($HEAL_FILES - 10))
+EXPECT "$HEAL_FILES" get_pending_heal_count $V0
+
+
+TEST ! $CLI volume heal $V0
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST ! $CLI volume heal $V0
+TEST ! $CLI volume heal $V0 full
+TEST $CLI volume start $V0 force
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+
+check_bricks_up $V0
+
+TEST $CLI volume heal $V0
+sleep 5 #Until the heal-statistics command implementation
+#check that this heals the contents partially
+TEST [ $HEAL_FILES -gt $(get_pending_heal_count $V0) ]
+
+TEST $CLI volume heal $V0 full
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+#Test that ongoing IO is not considered as Pending heal
+(dd if=/dev/zero of=$M0/file1 bs=1k 2>/dev/null 1>/dev/null)&
+back_pid1=$!;
+(dd if=/dev/zero of=$M0/file2 bs=1k 2>/dev/null 1>/dev/null)&
+back_pid2=$!;
+(dd if=/dev/zero of=$M0/file3 bs=1k 2>/dev/null 1>/dev/null)&
+back_pid3=$!;
+(dd if=/dev/zero of=$M0/file4 bs=1k 2>/dev/null 1>/dev/null)&
+back_pid4=$!;
+(dd if=/dev/zero of=$M0/file5 bs=1k 2>/dev/null 1>/dev/null)&
+back_pid5=$!;
+EXPECT 0 get_pending_heal_count $V0
+kill -SIGTERM $back_pid1;
+kill -SIGTERM $back_pid2;
+kill -SIGTERM $back_pid3;
+kill -SIGTERM $back_pid4;
+kill -SIGTERM $back_pid5;
+wait >/dev/null 2>&1;
+
+#Test that volume heal info reports files even when self-heal
+#options are disabled
+TEST touch $M0/f
+TEST mkdir $M0/d
+#DATA
+TEST $CLI volume set $V0 cluster.data-self-heal off
+EXPECT "off" volume_option $V0 cluster.data-self-heal
+kill_multiple_bricks $V0 $H0 $B0
+echo abc > $M0/f
+EXPECT 1 get_pending_heal_count $V0
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+check_bricks_up $V0
+
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+TEST $CLI volume set $V0 cluster.data-self-heal on
+
+#METADATA
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+EXPECT "off" volume_option $V0 cluster.metadata-self-heal
+kill_multiple_bricks $V0 $H0 $B0
+
+TEST chmod 777 $M0/f
+EXPECT 1 get_pending_heal_count $V0
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+check_bricks_up $V0
+
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+TEST $CLI volume set $V0 cluster.metadata-self-heal on
+
+#ENTRY
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+EXPECT "off" volume_option $V0 cluster.entry-self-heal
+kill_multiple_bricks $V0 $H0 $B0
+TEST touch $M0/d/a
+# 4 if mtime/ctime is modified for d in bricks without a
+# 2 otherwise
+PENDING=$( get_pending_heal_count $V0 )
+TEST test $PENDING -eq 2 -o $PENDING -eq 4
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+check_bricks_up $V0
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+TEST $CLI volume set $V0 cluster.entry-self-heal on
+
+#Negative test cases
+#Fail volume does not exist case
+TEST ! $CLI volume heal fail info
+
+#Fail volume stopped case
+TEST $CLI volume stop $V0
+TEST ! $CLI volume heal $V0 info
+
+#Fail non-replicate volume info
+TEST $CLI volume delete $V0
+TEST $CLI volume create $V0 $H0:$B0/${V0}{6}
+TEST $CLI volume start $V0
+TEST ! $CLI volume heal $V0 info
+
+# Check for non Linux systems that we did not mess with directory offsets
+TEST ! log_newer $START_TIMESTAMP "offset reused from another DIR"
+
+cleanup
diff --git a/tests/basic/afr/sparse-file-self-heal.t b/tests/basic/afr/sparse-file-self-heal.t
new file mode 100644
index 00000000000..04b77c41de1
--- /dev/null
+++ b/tests/basic/afr/sparse-file-self-heal.t
@@ -0,0 +1,181 @@
+#!/bin/bash
+
+#This file checks if self-heal of files with holes is working properly or not
+#bigger is 2M, big is 1M, small is anything less
+#Also tests if non-sparse files with zeroes in it are healed correctly w.r.t
+#disk usage.
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 data-self-heal-algorithm full
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+TEST dd if=/dev/urandom of=$M0/small count=1 bs=1024k
+TEST dd if=/dev/urandom of=$M0/bigger2big count=1 bs=2048k
+TEST dd if=/dev/urandom of=$M0/big2bigger count=1 bs=1024k
+TEST truncate -s 1G $M0/FILE
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+
+#File with >128k size hole
+TEST truncate -s 1M $M0/big
+big_md5sum=$(md5sum $M0/big | awk '{print $1}')
+
+#File with <128k hole
+TEST truncate -s 0 $M0/small
+TEST truncate -s 64k $M0/small
+small_md5sum=$(md5sum $M0/small | awk '{print $1}')
+
+#Bigger file truncated to big size hole.
+TEST truncate -s 0 $M0/bigger2big
+TEST truncate -s 1M $M0/bigger2big
+bigger2big_md5sum=$(md5sum $M0/bigger2big | awk '{print $1}')
+
+#Big file truncated to Bigger size hole
+TEST truncate -s 2M $M0/big2bigger
+big2bigger_md5sum=$(md5sum $M0/big2bigger | awk '{print $1}')
+
+#Write data to file and restore its sparseness
+TEST dd if=/dev/urandom of=$M0/FILE count=1 bs=131072
+TEST truncate -s 1G $M0/FILE
+
+#Create a non-sparse file containing zeroes.
+TEST dd if=/dev/zero of=$M0/zeroedfile bs=1024 count=1024
+
+$CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST gluster volume heal $V0 full
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+
+#If the file system of bricks is XFS and speculative preallocation is on,
+#dropping cahce should be done to free speculatively pre-allocated blocks
+#by XFS.
+drop_cache $M0
+
+big_md5sum_0=$(md5sum $B0/${V0}0/big | awk '{print $1}')
+small_md5sum_0=$(md5sum $B0/${V0}0/small | awk '{print $1}')
+bigger2big_md5sum_0=$(md5sum $B0/${V0}0/bigger2big | awk '{print $1}')
+big2bigger_md5sum_0=$(md5sum $B0/${V0}0/big2bigger | awk '{print $1}')
+
+EXPECT $big_md5sum echo $big_md5sum_0
+EXPECT $small_md5sum echo $small_md5sum_0
+EXPECT $big2bigger_md5sum echo $big2bigger_md5sum_0
+EXPECT $bigger2big_md5sum echo $bigger2big_md5sum_0
+
+
+EXPECT "1" has_holes $B0/${V0}0/big
+#Because self-heal writes the final chunk hole should not be there for
+#files < 128K
+EXPECT "0" has_holes $B0/${V0}0/small
+# Since source is smaller than sink, self-heal does blind copy so no holes will
+# be present
+EXPECT "0" has_holes $B0/${V0}0/bigger2big
+EXPECT "1" has_holes $B0/${V0}0/big2bigger
+
+#Check that self-heal has not written 0s to sink and made it non-sparse.
+USED_KB=`du -s $B0/${V0}0/FILE|cut -f1`
+TEST [ $USED_KB -lt 1000000 ]
+
+#Check that the non-sparse file has the same file size on both bricks and that
+#the disk usage is greater than or equal to the file size. We could have checked
+#that the disk usage is just equal to the file size but XFS does speculative
+#preallocation due to which disk usage can be more than the file size.
+STAT_SIZE1=$(stat -c "%s" $B0/${V0}0/zeroedfile)
+STAT_SIZE2=$(stat -c "%s" $B0/${V0}1/zeroedfile)
+TEST [ $STAT_SIZE1 -eq $STAT_SIZE2 ]
+USED_KB1="$((`stat -c %b $B0/${V0}0/zeroedfile` * `stat -c %B $B0/${V0}0/zeroedfile`))"
+TEST [ $USED_KB1 -ge $STAT_SIZE1 ]
+USED_KB2="$((`stat -c %b $B0/${V0}1/zeroedfile` * `stat -c %B $B0/${V0}1/zeroedfile`))"
+TEST [ $USED_KB2 -ge $STAT_SIZE2 ]
+
+TEST rm -f $M0/*
+
+#check the same tests with diff self-heal
+TEST $CLI volume set $V0 data-self-heal-algorithm diff
+
+TEST dd if=/dev/urandom of=$M0/small count=1 bs=1024k
+TEST dd if=/dev/urandom of=$M0/big2bigger count=1 bs=1024k
+TEST dd if=/dev/urandom of=$M0/bigger2big count=1 bs=2048k
+TEST truncate -s 1G $M0/FILE
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+
+#File with >128k size hole
+TEST truncate -s 1M $M0/big
+big_md5sum=$(md5sum $M0/big | awk '{print $1}')
+
+#File with <128k hole
+TEST truncate -s 0 $M0/small
+TEST truncate -s 64k $M0/small
+small_md5sum=$(md5sum $M0/small | awk '{print $1}')
+
+#Bigger file truncated to big size hole
+TEST truncate -s 0 $M0/bigger2big
+TEST truncate -s 1M $M0/bigger2big
+bigger2big_md5sum=$(md5sum $M0/bigger2big | awk '{print $1}')
+
+#Big file truncated to Bigger size hole
+TEST truncate -s 2M $M0/big2bigger
+big2bigger_md5sum=$(md5sum $M0/big2bigger | awk '{print $1}')
+
+#Write data to file and restore its sparseness
+TEST dd if=/dev/urandom of=$M0/FILE count=1 bs=131072
+TEST truncate -s 1G $M0/FILE
+
+#Create a non-sparse file containing zeroes.
+TEST dd if=/dev/zero of=$M0/zeroedfile bs=1024 count=1024
+
+$CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST gluster volume heal $V0 full
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+
+#If the file system of bricks is XFS and speculative preallocation is on,
+#dropping cahce should be done to free speculatively pre-allocated blocks
+#by XFS.
+drop_cache $M0
+
+big_md5sum_0=$(md5sum $B0/${V0}0/big | awk '{print $1}')
+small_md5sum_0=$(md5sum $B0/${V0}0/small | awk '{print $1}')
+bigger2big_md5sum_0=$(md5sum $B0/${V0}0/bigger2big | awk '{print $1}')
+big2bigger_md5sum_0=$(md5sum $B0/${V0}0/big2bigger | awk '{print $1}')
+
+EXPECT $big_md5sum echo $big_md5sum_0
+EXPECT $small_md5sum echo $small_md5sum_0
+EXPECT $big2bigger_md5sum echo $big2bigger_md5sum_0
+EXPECT $bigger2big_md5sum echo $bigger2big_md5sum_0
+
+EXPECT "1" has_holes $B0/${V0}0/big
+EXPECT "1" has_holes $B0/${V0}0/big2bigger
+EXPECT "0" has_holes $B0/${V0}0/bigger2big
+EXPECT "0" has_holes $B0/${V0}0/small
+
+#Check that self-heal has not written 0s to sink and made it non-sparse.
+USED_KB=`du -s $B0/${V0}0/FILE|cut -f1`
+TEST [ $USED_KB -lt 1000000 ]
+
+#Check that the non-sparse file has the same file size on both bricks and that
+#the disk usage is greater than or equal to the file size. We could have checked
+#that the disk usage is just equal to the file size but XFS does speculative
+#preallocation due to which disk usage can be more than the file size.
+STAT_SIZE1=$(stat -c "%s" $B0/${V0}0/zeroedfile)
+STAT_SIZE2=$(stat -c "%s" $B0/${V0}1/zeroedfile)
+TEST [ $STAT_SIZE1 -eq $STAT_SIZE2 ]
+USED_KB1="$((`stat -c %b $B0/${V0}0/zeroedfile` * `stat -c %B $B0/${V0}0/zeroedfile`))"
+TEST [ $USED_KB1 -ge $STAT_SIZE1 ]
+USED_KB2="$((`stat -c %b $B0/${V0}1/zeroedfile` * `stat -c %B $B0/${V0}1/zeroedfile`))"
+TEST [ $USED_KB2 -ge $STAT_SIZE2 ]
+
+cleanup
diff --git a/tests/basic/afr/split-brain-favorite-child-policy-client-side-healing.t b/tests/basic/afr/split-brain-favorite-child-policy-client-side-healing.t
new file mode 100644
index 00000000000..7c249c4bcbd
--- /dev/null
+++ b/tests/basic/afr/split-brain-favorite-child-policy-client-side-healing.t
@@ -0,0 +1,124 @@
+#!/bin/bash
+
+#Test the client side split-brain resolution
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+GET_MDATA_PATH=$(dirname $0)/../../utils
+build_tester $GET_MDATA_PATH/get-mdata-xattr.c
+
+TEST glusterd
+TEST pidof glusterd
+
+count_files () {
+ ls $1 | wc -l
+}
+
+#Create replica 2 volume
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume heal $V0 disable
+TEST $CLI volume set $V0 cluster.quorum-type fixed
+TEST $CLI volume set $V0 cluster.quorum-count 1
+TEST $CLI volume set $V0 cluster.metadata-self-heal on
+TEST $CLI volume set $V0 cluster.data-self-heal on
+TEST $CLI volume set $V0 cluster.entry-self-heal on
+
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+TEST mkdir $M0/data
+TEST touch $M0/data/file
+
+
+############ Client side healing using favorite-child-policy = mtime #################
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST dd if=/dev/urandom of=$M0/data/file bs=1024 count=1024
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST dd if=/dev/urandom of=$M0/data/file bs=1024 count=1024
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+mtime1=$(get_mtime $B0/${V0}0/data/file)
+mtime2=$(get_mtime $B0/${V0}1/data/file)
+if (( $(echo "$mtime1 > $mtime2" | bc -l) )); then
+ LATEST_MTIME_MD5=$(md5sum $B0/${V0}0/data/file | cut -d\ -f1)
+else
+ LATEST_MTIME_MD5=$(md5sum $B0/${V0}1/data/file | cut -d\ -f1)
+fi
+
+#file will be in split-brain
+cat $M0/data/file > /dev/null
+EXPECT "1" echo $?
+
+TEST $CLI volume set $V0 cluster.favorite-child-policy mtime
+TEST $CLI volume start $V0 force
+
+EXPECT_WITHIN $HEAL_TIMEOUT "^2$" afr_get_split_brain_count $V0
+cat $M0/data/file > /dev/null
+EXPECT "0" echo $?
+M0_MD5=$(md5sum $M0/data/file | cut -d\ -f1)
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_get_split_brain_count $V0
+TEST [ "$LATEST_MTIME_MD5" == "$M0_MD5" ]
+
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+B0_MD5=$(md5sum $B0/${V0}0/data/file | cut -d\ -f1)
+B1_MD5=$(md5sum $B0/${V0}1/data/file | cut -d\ -f1)
+TEST [ "$LATEST_MTIME_MD5" == "$B0_MD5" ]
+TEST [ "$LATEST_MTIME_MD5" == "$B1_MD5" ]
+
+############ Client side directory conservative merge #################
+TEST $CLI volume reset $V0 cluster.favorite-child-policy
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST touch $M0/data/test
+files=$(count_files $M0/data)
+EXPECT "2" echo $files
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST touch $M0/data/test1
+files=$(count_files $M0/data)
+EXPECT "2" echo $files
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+#data dir will be in entry split-brain
+ls $M0/data > /dev/null
+EXPECT "2" echo $?
+
+TEST $CLI volume set $V0 cluster.favorite-child-policy mtime
+
+EXPECT_WITHIN $HEAL_TIMEOUT "^2$" afr_get_split_brain_count $V0
+
+
+ls $M0/data > /dev/null
+EXPECT "0" echo $?
+
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_get_split_brain_count $V0
+#Entry Split-brain is gone, but data self-heal is pending on the files
+EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0
+
+cat $M0/data/test > /dev/null
+cat $M0/data/test1 > /dev/null
+
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+files=$(count_files $M0/data)
+EXPECT "3" echo $files
+
+TEST force_umount $M0
+TEST rm $GET_MDATA_PATH/get-mdata-xattr
+
+cleanup
diff --git a/tests/basic/afr/split-brain-heal-info.t b/tests/basic/afr/split-brain-heal-info.t
new file mode 100644
index 00000000000..2e4742fff08
--- /dev/null
+++ b/tests/basic/afr/split-brain-heal-info.t
@@ -0,0 +1,62 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+function volume_start_force()
+{
+ local vol=$1
+ TEST $CLI volume start $vol force
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $vol 0
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $vol 1
+}
+
+TESTS_EXPECTED_IN_LOOP=15
+SPB_FILES=0
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+TEST mkdir $M0/dspb
+TEST mkdir $M0/mspb
+TEST mkdir $M0/espb
+TEST touch $M0/dspb/file
+
+#### Simlulate data-split-brain
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST `echo "abc" > $M0/dspb/file`
+volume_start_force $V0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST `echo "def" > $M0/dspb/file`
+volume_start_force $V0
+SPB_FILES=$(($SPB_FILES + 1))
+
+### Simulate metadata-split-brain
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST chmod 757 $M0/mspb
+volume_start_force $V0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST chmod 747 $M0/mspb
+volume_start_force $V0
+SPB_FILES=$(($SPB_FILES + 1))
+
+#### Simulate entry-split-brain
+TEST kill_brick $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN ${PROCESS_DOWN_TIMEOUT} "^0$" afr_child_up_status $V0 0
+TEST touch $M0/espb/a
+volume_start_force $V0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN ${PROCESS_DOWN_TIMEOUT} "^0$" afr_child_up_status $V0 1
+TEST mkdir $M0/espb/a
+volume_start_force $V0
+SPB_FILES=$(($SPB_FILES + 1))
+
+#Multiply by 2, for each brick in replica pair
+SPB_FILES=$(($SPB_FILES * 2))
+EXPECT "$SPB_FILES" afr_get_split_brain_count $V0
+cleanup;
diff --git a/tests/basic/afr/split-brain-healing-ctime.t b/tests/basic/afr/split-brain-healing-ctime.t
new file mode 100644
index 00000000000..676788fce3f
--- /dev/null
+++ b/tests/basic/afr/split-brain-healing-ctime.t
@@ -0,0 +1,252 @@
+#!/bin/bash
+
+#Test the split-brain resolution CLI commands.
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+function get_replicate_subvol_number {
+ local filename=$1
+ #get_backend_paths
+ if [ -f $B0/${V0}1/$filename ]
+ then
+ echo 0
+ elif [ -f $B0/${V0}3/$filename ]
+ then echo 1
+ else
+ echo -1
+ fi
+}
+
+cleanup;
+
+AREQUAL_PATH=$(dirname $0)/../../utils
+GET_MDATA_PATH=$(dirname $0)/../../utils
+CFLAGS=""
+test "`uname -s`" != "Linux" && {
+ CFLAGS="$CFLAGS -lintl";
+}
+build_tester $AREQUAL_PATH/arequal-checksum.c $CFLAGS
+build_tester $GET_MDATA_PATH/get-mdata-xattr.c
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4}
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+cd $M0
+for i in {1..10}
+do
+ echo "Initial content">>file$i
+done
+
+replica_0_files_list=(`ls $B0/${V0}1|grep -v '^\.'`)
+replica_1_files_list=(`ls $B0/${V0}3|grep -v '^\.'`)
+
+############ Create data split-brain in the files. ###########################
+TEST kill_brick $V0 $H0 $B0/${V0}1
+for file in ${!replica_0_files_list[*]}
+do
+ echo "B1 is down">>${replica_0_files_list[$file]}
+done
+TEST kill_brick $V0 $H0 $B0/${V0}3
+for file in ${!replica_1_files_list[*]}
+do
+ echo "B3 is down">>${replica_1_files_list[$file]}
+done
+
+SMALLER_FILE_SIZE=$(stat -c %s file1)
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+TEST kill_brick $V0 $H0 $B0/${V0}2
+for file in ${!replica_0_files_list[*]}
+do
+ echo "B2 is down">>${replica_0_files_list[$file]}
+ echo "appending more content to make it the bigger file">>${replica_0_files_list[$file]}
+done
+TEST kill_brick $V0 $H0 $B0/${V0}4
+for file in ${!replica_1_files_list[*]}
+do
+ echo "B4 is down">>${replica_1_files_list[$file]}
+ echo "appending more content to make it the bigger file">>${replica_1_files_list[$file]}
+done
+
+BIGGER_FILE_SIZE=$(stat -c %s file1)
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 3
+
+
+############### Acessing the files should now give EIO. ###############################
+TEST ! cat file1
+TEST ! cat file2
+TEST ! cat file3
+TEST ! cat file4
+TEST ! cat file5
+TEST ! cat file6
+TEST ! cat file7
+TEST ! cat file8
+TEST ! cat file9
+TEST ! cat file10
+###################
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 3
+
+################ Heal file1 using the bigger-file option ##############
+$CLI volume heal $V0 split-brain bigger-file /file1
+EXPECT "0" echo $?
+EXPECT $BIGGER_FILE_SIZE stat -c %s file1
+
+################ Heal file2 using the bigger-file option and its gfid ##############
+subvolume=$(get_replicate_subvol_number file2)
+if [ $subvolume == 0 ]
+then
+ GFID=$(gf_get_gfid_xattr $B0/${V0}1/file2)
+elif [ $subvolume == 1 ]
+then
+ GFID=$(gf_get_gfid_xattr $B0/${V0}3/file2)
+fi
+GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)"
+$CLI volume heal $V0 split-brain bigger-file $GFIDSTR
+EXPECT "0" echo $?
+
+################ Heal file3 using the source-brick option ##############
+################ Use the brick having smaller file size as source #######
+subvolume=$(get_replicate_subvol_number file3)
+if [ $subvolume == 0 ]
+then
+ $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}2 /file3
+elif [ $subvolume == 1 ]
+then
+ $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}4 /file3
+fi
+EXPECT "0" echo $?
+EXPECT $SMALLER_FILE_SIZE stat -c %s file3
+
+################ Heal file4 using the source-brick option and it's gfid ##############
+################ Use the brick having smaller file size as source #######
+subvolume=$(get_replicate_subvol_number file4)
+if [ $subvolume == 0 ]
+then
+ GFID=$(gf_get_gfid_xattr $B0/${V0}1/file4)
+ GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)"
+ $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}2 $GFIDSTR
+elif [ $subvolume == 1 ]
+then
+ GFID=$(gf_get_gfid_xattr $B0/${V0}3/file4)
+ GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)"
+ $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}4 $GFIDSTR
+fi
+EXPECT "0" echo $?
+EXPECT $SMALLER_FILE_SIZE stat -c %s file4
+
+# With ctime enabled, the ctime xattr ("trusted.glusterfs.mdata") gets healed
+# as part of metadata heal. So mtime would be same, hence it can't be healed
+# using 'latest-mtime' policy, use 'source-brick' option instead.
+################ Heal file5 using the source-brick option ##############
+subvolume=$(get_replicate_subvol_number file5)
+if [ $subvolume == 0 ]
+then
+ $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}1 /file5
+elif [ $subvolume == 1 ]
+then
+ $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}3 /file5
+fi
+EXPECT "0" echo $?
+
+if [ $subvolume == 0 ]
+then
+ mtime1_after_heal=$(get_mtime $B0/${V0}1/file5)
+ mtime2_after_heal=$(get_mtime $B0/${V0}2/file5)
+elif [ $subvolume == 1 ]
+then
+ mtime1_after_heal=$(get_mtime $B0/${V0}3/file5)
+ mtime2_after_heal=$(get_mtime $B0/${V0}4/file5)
+fi
+
+#TODO: To below comparisons on full sub-second resolution
+
+TEST [ $mtime1_after_heal -eq $mtime2_after_heal ]
+
+mtime_mount_after_heal=$(stat -c %Y file5)
+
+TEST [ $mtime1_after_heal -eq $mtime_mount_after_heal ]
+
+################ Heal file6 using the source-brick option and its gfid ##############
+subvolume=$(get_replicate_subvol_number file6)
+if [ $subvolume == 0 ]
+then
+ GFID=$(gf_get_gfid_xattr $B0/${V0}1/file6)
+ GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)"
+ $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}1 $GFIDSTR
+elif [ $subvolume == 1 ]
+then
+ GFID=$(gf_get_gfid_xattr $B0/${V0}3/file6)
+ GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)"
+ $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}3 $GFIDSTR
+fi
+EXPECT "0" echo $?
+
+if [ $subvolume == 0 ]
+then
+ mtime1_after_heal=$(get_mtime $B0/${V0}1/file6)
+ mtime2_after_heal=$(get_mtime $B0/${V0}2/file6)
+elif [ $subvolume == 1 ]
+then
+ mtime1_after_heal=$(get_mtime $B0/${V0}3/file6)
+ mtime2_after_heal=$(get_mtime $B0/${V0}4/file6)
+fi
+
+#TODO: To below comparisons on full sub-second resolution
+
+TEST [ $mtime1_after_heal -eq $mtime2_after_heal ]
+
+mtime_mount_after_heal=$(stat -c %Y file6)
+
+TEST [ $mtime1_after_heal -eq $mtime_mount_after_heal ]
+
+################ Heal remaining SB'ed files of replica_0 using B1 as source ##############
+$CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}1
+EXPECT "0" echo $?
+
+################ Heal remaining SB'ed files of replica_1 using B3 as source ##############
+$CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}3
+EXPECT "0" echo $?
+
+############### Reading the files should now succeed. ###############################
+TEST cat file1
+TEST cat file2
+TEST cat file3
+TEST cat file4
+TEST cat file5
+TEST cat file6
+TEST cat file7
+TEST cat file8
+TEST cat file9
+TEST cat file10
+
+################ File contents on the bricks must be same. ################################
+TEST diff <(arequal-checksum -p $B0/$V01 -i .glusterfs) <(arequal-checksum -p $B0/$V02 -i .glusterfs)
+TEST diff <(arequal-checksum -p $B0/$V03 -i .glusterfs) <(arequal-checksum -p $B0/$V04 -i .glusterfs)
+
+############### Trying to heal files not in SB should fail. ###############################
+$CLI volume heal $V0 split-brain bigger-file /file1
+EXPECT "1" echo $?
+$CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}4 /file3
+EXPECT "1" echo $?
+
+cd -
+TEST rm $AREQUAL_PATH/arequal-checksum
+TEST rm $GET_MDATA_PATH/get-mdata-xattr
+cleanup
diff --git a/tests/basic/afr/split-brain-healing.t b/tests/basic/afr/split-brain-healing.t
new file mode 100644
index 00000000000..315e815eb7e
--- /dev/null
+++ b/tests/basic/afr/split-brain-healing.t
@@ -0,0 +1,261 @@
+#!/bin/bash
+
+#Test the split-brain resolution CLI commands.
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+function get_replicate_subvol_number {
+ local filename=$1
+ #get_backend_paths
+ if [ -f $B0/${V0}1/$filename ]
+ then
+ echo 0
+ elif [ -f $B0/${V0}3/$filename ]
+ then echo 1
+ else
+ echo -1
+ fi
+}
+
+cleanup;
+
+AREQUAL_PATH=$(dirname $0)/../../utils
+GET_MDATA_PATH=$(dirname $0)/../../utils
+CFLAGS=""
+test "`uname -s`" != "Linux" && {
+ CFLAGS="$CFLAGS -lintl";
+}
+build_tester $AREQUAL_PATH/arequal-checksum.c $CFLAGS
+build_tester $GET_MDATA_PATH/get-mdata-xattr.c
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4}
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume set $V0 ctime off
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+cd $M0
+for i in {1..10}
+do
+ echo "Initial content">>file$i
+done
+
+replica_0_files_list=(`ls $B0/${V0}1|grep -v '^\.'`)
+replica_1_files_list=(`ls $B0/${V0}3|grep -v '^\.'`)
+
+############ Create data split-brain in the files. ###########################
+TEST kill_brick $V0 $H0 $B0/${V0}1
+for file in ${!replica_0_files_list[*]}
+do
+ echo "B1 is down">>${replica_0_files_list[$file]}
+done
+TEST kill_brick $V0 $H0 $B0/${V0}3
+for file in ${!replica_1_files_list[*]}
+do
+ echo "B3 is down">>${replica_1_files_list[$file]}
+done
+
+SMALLER_FILE_SIZE=$(stat -c %s file1)
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+TEST kill_brick $V0 $H0 $B0/${V0}2
+for file in ${!replica_0_files_list[*]}
+do
+ echo "B2 is down">>${replica_0_files_list[$file]}
+ echo "appending more content to make it the bigger file">>${replica_0_files_list[$file]}
+done
+TEST kill_brick $V0 $H0 $B0/${V0}4
+for file in ${!replica_1_files_list[*]}
+do
+ echo "B4 is down">>${replica_1_files_list[$file]}
+ echo "appending more content to make it the bigger file">>${replica_1_files_list[$file]}
+done
+
+BIGGER_FILE_SIZE=$(stat -c %s file1)
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 3
+
+
+############### Acessing the files should now give EIO. ###############################
+TEST ! cat file1
+TEST ! cat file2
+TEST ! cat file3
+TEST ! cat file4
+TEST ! cat file5
+TEST ! cat file6
+TEST ! cat file7
+TEST ! cat file8
+TEST ! cat file9
+TEST ! cat file10
+###################
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 3
+
+################ Heal file1 using the bigger-file option ##############
+$CLI volume heal $V0 split-brain bigger-file /file1
+EXPECT "0" echo $?
+EXPECT $BIGGER_FILE_SIZE stat -c %s file1
+
+################ Heal file2 using the bigger-file option and its gfid ##############
+subvolume=$(get_replicate_subvol_number file2)
+if [ $subvolume == 0 ]
+then
+ GFID=$(gf_get_gfid_xattr $B0/${V0}1/file2)
+elif [ $subvolume == 1 ]
+then
+ GFID=$(gf_get_gfid_xattr $B0/${V0}3/file2)
+fi
+GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)"
+$CLI volume heal $V0 split-brain bigger-file $GFIDSTR
+EXPECT "0" echo $?
+
+################ Heal file3 using the source-brick option ##############
+################ Use the brick having smaller file size as source #######
+subvolume=$(get_replicate_subvol_number file3)
+if [ $subvolume == 0 ]
+then
+ $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}2 /file3
+elif [ $subvolume == 1 ]
+then
+ $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}4 /file3
+fi
+EXPECT "0" echo $?
+EXPECT $SMALLER_FILE_SIZE stat -c %s file3
+
+################ Heal file4 using the source-brick option and it's gfid ##############
+################ Use the brick having smaller file size as source #######
+subvolume=$(get_replicate_subvol_number file4)
+if [ $subvolume == 0 ]
+then
+ GFID=$(gf_get_gfid_xattr $B0/${V0}1/file4)
+ GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)"
+ $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}2 $GFIDSTR
+elif [ $subvolume == 1 ]
+then
+ GFID=$(gf_get_gfid_xattr $B0/${V0}3/file4)
+ GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)"
+ $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}4 $GFIDSTR
+fi
+EXPECT "0" echo $?
+EXPECT $SMALLER_FILE_SIZE stat -c %s file4
+
+################ Heal file5 using the latest-mtime option ##############
+subvolume=$(get_replicate_subvol_number file5)
+if [ $subvolume == 0 ]
+then
+ mtime1=$(get_mtime $B0/${V0}1/file5)
+ mtime2=$(get_mtime $B0/${V0}2/file5)
+ LATEST_MTIME=$(($mtime1 > $mtime2 ? $mtime1:$mtime2))
+elif [ $subvolume == 1 ]
+then
+ mtime1=$(get_mtime $B0/${V0}3/file5)
+ mtime2=$(get_mtime $B0/${V0}4/file5)
+ LATEST_MTIME=$(($mtime1 > $mtime2 ? $mtime1:$mtime2))
+fi
+$CLI volume heal $V0 split-brain latest-mtime /file5
+EXPECT "0" echo $?
+
+if [ $subvolume == 0 ]
+then
+ mtime1_after_heal=$(get_mtime $B0/${V0}1/file5)
+ mtime2_after_heal=$(get_mtime $B0/${V0}2/file5)
+elif [ $subvolume == 1 ]
+then
+ mtime1_after_heal=$(get_mtime $B0/${V0}3/file5)
+ mtime2_after_heal=$(get_mtime $B0/${V0}4/file5)
+fi
+
+#TODO: To below comparisons on full sub-second resolution
+
+TEST [ $LATEST_MTIME -eq $mtime1_after_heal ]
+TEST [ $LATEST_MTIME -eq $mtime2_after_heal ]
+
+mtime_mount_after_heal=$(stat -c %Y file5)
+
+TEST [ $LATEST_MTIME -eq $mtime_mount_after_heal ]
+
+################ Heal file6 using the latest-mtime option and its gfid ##############
+subvolume=$(get_replicate_subvol_number file6)
+if [ $subvolume == 0 ]
+then
+ GFID=$(gf_get_gfid_xattr $B0/${V0}1/file6)
+ mtime1=$(get_mtime $B0/${V0}1/file6)
+ mtime2=$(get_mtime $B0/${V0}2/file6)
+ LATEST_MTIME=$(($mtime1 > $mtime2 ? $mtime1:$mtime2))
+elif [ $subvolume == 1 ]
+then
+ GFID=$(gf_get_gfid_xattr $B0/${V0}3/file6)
+ mtime1=$(get_mtime $B0/${V0}3/file6)
+ mtime2=$(get_mtime $B0/${V0}4/file6)
+ LATEST_MTIME=$(($mtime1 > $mtime2 ? $mtime1:$mtime2))
+fi
+GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)"
+$CLI volume heal $V0 split-brain latest-mtime $GFIDSTR
+EXPECT "0" echo $?
+
+if [ $subvolume == 0 ]
+then
+ mtime1_after_heal=$(get_mtime $B0/${V0}1/file6)
+ mtime2_after_heal=$(get_mtime $B0/${V0}2/file6)
+elif [ $subvolume == 1 ]
+then
+ mtime1_after_heal=$(get_mtime $B0/${V0}3/file6)
+ mtime2_after_heal=$(get_mtime $B0/${V0}4/file6)
+fi
+
+#TODO: To below comparisons on full sub-second resolution
+
+TEST [ $LATEST_MTIME -eq $mtime1_after_heal ]
+TEST [ $LATEST_MTIME -eq $mtime2_after_heal ]
+
+mtime_mount_after_heal=$(stat -c %Y file6)
+
+TEST [ $LATEST_MTIME -eq $mtime_mount_after_heal ]
+
+################ Heal remaining SB'ed files of replica_0 using B1 as source ##############
+$CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}1
+EXPECT "0" echo $?
+
+################ Heal remaining SB'ed files of replica_1 using B3 as source ##############
+$CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}3
+EXPECT "0" echo $?
+
+############### Reading the files should now succeed. ###############################
+TEST cat file1
+TEST cat file2
+TEST cat file3
+TEST cat file4
+TEST cat file5
+TEST cat file6
+TEST cat file7
+TEST cat file8
+TEST cat file9
+TEST cat file10
+
+################ File contents on the bricks must be same. ################################
+TEST diff <(arequal-checksum -p $B0/$V01 -i .glusterfs) <(arequal-checksum -p $B0/$V02 -i .glusterfs)
+TEST diff <(arequal-checksum -p $B0/$V03 -i .glusterfs) <(arequal-checksum -p $B0/$V04 -i .glusterfs)
+
+############### Trying to heal files not in SB should fail. ###############################
+$CLI volume heal $V0 split-brain bigger-file /file1
+EXPECT "1" echo $?
+$CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}4 /file3
+EXPECT "1" echo $?
+
+cd -
+TEST rm $AREQUAL_PATH/arequal-checksum
+TEST rm $GET_MDATA_PATH/get-mdata-xattr
+cleanup
diff --git a/tests/basic/afr/split-brain-open.t b/tests/basic/afr/split-brain-open.t
new file mode 100644
index 00000000000..9b2f2856047
--- /dev/null
+++ b/tests/basic/afr/split-brain-open.t
@@ -0,0 +1,38 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+
+#Disable self-heal-daemon
+TEST $CLI volume heal $V0 disable
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
+
+TEST touch $M0/data-split-brain.txt
+
+#Create data split-brain
+TEST kill_brick $V0 $H0 $B0/${V0}0
+
+`echo "brick1_alive" > $M0/data-split-brain.txt`
+TEST [ $? == 0 ];
+
+TEST $CLI volume start $V0 force
+TEST kill_brick $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+`echo "brick0_alive" > $M0/data-split-brain.txt`
+TEST [ $? == 0 ];
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+echo "all-alive" >> $M0/data-split-brain.txt
+TEST [ $? != 0 ];
+
+cleanup;
diff --git a/tests/basic/afr/split-brain-resolution.t b/tests/basic/afr/split-brain-resolution.t
new file mode 100644
index 00000000000..834237c96ec
--- /dev/null
+++ b/tests/basic/afr/split-brain-resolution.t
@@ -0,0 +1,105 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+function get_split_brain_status {
+ local path=$1
+ echo `getfattr -n replica.split-brain-status $path` | cut -f2 -d"=" | sed -e 's/^"//' -e 's/"$//'
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 cluster.data-self-heal on
+TEST $CLI volume set $V0 cluster.metadata-self-heal on
+TEST $CLI volume set $V0 cluster.entry-self-heal on
+TEST $CLI volume start $V0
+
+#Disable self-heal-daemon
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+TEST `echo "some-data" > $M0/data-split-brain.txt`
+TEST `echo "some-data" > $M0/metadata-split-brain.txt`
+
+#Create data and metadata split-brain
+TEST kill_brick $V0 $H0 $B0/${V0}0
+
+TEST `echo "brick1_alive" > $M0/data-split-brain.txt`
+TEST setfattr -n user.test -v brick1 $M0/metadata-split-brain.txt
+
+TEST $CLI volume start $V0 force
+TEST kill_brick $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+TEST `echo "brick0_alive" > $M0/data-split-brain.txt`
+TEST setfattr -n user.test -v brick0 $M0/metadata-split-brain.txt
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+EXPECT 4 get_pending_heal_count $V0
+
+TEST ! cat $M0/data-split-brain.txt
+TEST ! getfattr -n user.test $M0/metadata-split-brain.txt
+
+#Inspect file in data-split-brain
+EXPECT "data-split-brain:yes metadata-split-brain:no Choices:patchy-client-0,patchy-client-1" get_split_brain_status $M0/data-split-brain.txt
+TEST setfattr -n replica.split-brain-choice -v $V0-client-0 $M0/data-split-brain.txt
+
+#Should now be able to read the contents of data-split-brain.txt
+EXPECT "brick0_alive" cat $M0/data-split-brain.txt
+
+TEST setfattr -n replica.split-brain-choice-timeout -v 10 $M0/
+TEST setfattr -n replica.split-brain-choice -v $V0-client-1 $M0/data-split-brain.txt
+
+#Should now be able to read the contents of data-split-brain.txt
+EXPECT "brick1_alive" cat $M0/data-split-brain.txt
+
+#Inspect the file in metadata-split-brain
+EXPECT "data-split-brain:no metadata-split-brain:yes Choices:patchy-client-0,patchy-client-1" get_split_brain_status $M0/metadata-split-brain.txt
+TEST setfattr -n replica.split-brain-choice -v $V0-client-0 $M0/metadata-split-brain.txt
+
+EXPECT "brick0" get_text_xattr user.test $M0/metadata-split-brain.txt
+
+TEST setfattr -n replica.split-brain-choice -v $V0-client-1 $M0/metadata-split-brain.txt
+EXPECT "brick1" get_text_xattr user.test $M0/metadata-split-brain.txt
+
+#Check that setting split-brain-choice to "none" results in EIO again
+TEST setfattr -n replica.split-brain-choice -v none $M0/metadata-split-brain.txt
+TEST setfattr -n replica.split-brain-choice -v none $M0/data-split-brain.txt
+TEST ! getfattr -n user.test $M0/metadata-split-brain.txt
+TEST ! cat $M0/data-split-brain.txt
+
+#Check that after timeout fops result in EIO again.
+#Set one minute timeout
+TEST setfattr -n replica.split-brain-choice-timeout -v 1 $M0/
+TEST setfattr -n replica.split-brain-choice -v $V0-client-1 $M0/data-split-brain.txt
+EXPECT "brick1_alive" cat $M0/data-split-brain.txt
+TEST setfattr -n replica.split-brain-choice -v $V0-client-0 $M0/metadata-split-brain.txt
+EXPECT "brick0" get_text_xattr user.test $M0/metadata-split-brain.txt
+#Wait until timeout completes and test that the fops fail again
+sleep 62
+TEST ! getfattr -n user.test $M0/metadata-split-brain.txt
+TEST ! cat $M0/data-split-brain.txt
+
+#Negative test cases should fail
+TEST ! setfattr -n replica.split-brain-choice -v $V0-client-4 $M0/data-split-brain.txt
+TEST ! setfattr -n replica.split-brain-heal-finalize -v $V0-client-4 $M0/metadata-split-brain.txt
+
+#Heal the files
+TEST setfattr -n replica.split-brain-heal-finalize -v $V0-client-0 $M0/metadata-split-brain.txt
+TEST setfattr -n replica.split-brain-heal-finalize -v $V0-client-1 $M0/data-split-brain.txt
+
+EXPECT "brick0" get_text_xattr user.test $M0/metadata-split-brain.txt
+EXPECT "brick1_alive" cat $M0/data-split-brain.txt
+
+EXPECT 0 get_pending_heal_count $V0
+
+cleanup;
+
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=000000
diff --git a/tests/basic/afr/stale-file-lookup.t b/tests/basic/afr/stale-file-lookup.t
new file mode 100644
index 00000000000..f30422fd009
--- /dev/null
+++ b/tests/basic/afr/stale-file-lookup.t
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+#This file checks if stale file lookup fails or not.
+#A file is deleted when a brick was down. Before self-heal could happen to it
+#the file is accessed. It should fail.
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+TEST touch $M0/a
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST rm -f $M0/a
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST stat $B0/${V0}0/a
+TEST ! stat $B0/${V0}1/a
+TEST ! ls -l $M0/a
+
+cleanup
diff --git a/tests/basic/afr/ta-check-locks.t b/tests/basic/afr/ta-check-locks.t
new file mode 100644
index 00000000000..c0102c35b7b
--- /dev/null
+++ b/tests/basic/afr/ta-check-locks.t
@@ -0,0 +1,68 @@
+#!/bin/bash
+#This test checks if all the locks on
+#ta file are being held and released properly
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../thin-arbiter.rc
+
+function get_lock_count_on_ta()
+{
+ tapid=`cat $B0/ta.pid`
+ local sfile=$(generate_statedump $tapid)
+ count=$(grep "inodelk-count" $sfile | cut -f2 -d'=' | tail -1)
+ ncount=$(grep "inodelk.inodelk" $sfile | grep "len=1" | wc -l)
+ echo "count = $count : ncount = $ncount"
+ if [ "$count" = "" ]
+ then
+ count=0
+ fi
+
+ if [ "$count" -eq "$ncount" ]
+ then
+ echo "$count"
+ else
+ echo "-1"
+ fi
+}
+
+cleanup;
+TEST ta_create_brick_and_volfile brick0
+TEST ta_create_brick_and_volfile brick1
+TEST ta_create_ta_and_volfile ta
+TEST ta_start_brick_process brick0
+TEST ta_start_brick_process brick1
+TEST ta_start_ta_process ta
+
+TEST ta_create_mount_volfile brick0 brick1 ta
+TEST ta_start_mount_process $M0
+TEST ta_start_mount_process $M1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_up_status $V0 $M0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_up_status $V0 $M1 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "trusted.afr.patchy-ta-2" ls $B0/ta
+
+TEST ta_create_shd_volfile brick0 brick1 ta
+TEST ta_start_shd_process glustershd
+shd_pid=$(cat $B0/glustershd.pid)
+
+TEST touch $M0/a.txt
+echo "Hello" >> $M0/a.txt
+EXPECT_WITHIN $IO_WAIT_TIMEOUT "0" get_lock_count_on_ta
+
+TEST ta_kill_brick brick0
+echo "Hello" >> $M0/a.txt
+EXPECT_WITHIN $IO_WAIT_TIMEOUT "1" get_lock_count_on_ta
+
+echo "Hello" >> $M1/a.txt
+EXPECT_WITHIN $IO_WAIT_TIMEOUT "2" get_lock_count_on_ta
+
+echo "xyz" >> $M0/a.txt
+EXPECT_WITHIN $IO_WAIT_TIMEOUT "2" get_lock_count_on_ta
+
+chmod 0666 $M0/a.txt
+EXPECT_WITHIN $IO_WAIT_TIMEOUT "2" get_lock_count_on_ta
+
+TEST ta_start_brick_process brick0
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_lock_count_on_ta
+
+cleanup;
diff --git a/tests/basic/afr/ta-read.t b/tests/basic/afr/ta-read.t
new file mode 100644
index 00000000000..3cfc16b9b8a
--- /dev/null
+++ b/tests/basic/afr/ta-read.t
@@ -0,0 +1,64 @@
+#!/bin/bash
+
+# Test read transaction logic for thin-arbiter.
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../thin-arbiter.rc
+cleanup;
+TEST ta_create_brick_and_volfile brick0
+TEST ta_create_brick_and_volfile brick1
+TEST ta_create_ta_and_volfile ta
+TEST ta_start_brick_process brick0
+TEST ta_start_brick_process brick1
+TEST ta_start_ta_process ta
+
+TEST ta_create_mount_volfile brick0 brick1 ta
+TEST ta_start_mount_process $M0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_up_status $V0 $M0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "trusted.afr.patchy-ta-2" ls $B0/ta
+
+TEST touch $M0/FILE
+TEST ls $B0/brick0/FILE
+TEST ls $B0/brick1/FILE
+TEST ! ls $B0/ta/FILE
+
+# Kill one brick and write to FILE.
+TEST ta_kill_brick brick0
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" ta_mount_child_up_status $M0 $V0 0
+echo "brick0 down">> $M0/FILE
+TEST [ $? -eq 0 ]
+EXPECT "000000010000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/brick1/FILE
+EXPECT "000000010000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/ta/trusted.afr.patchy-ta-2
+
+#Umount and mount to remove cached data.
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST ta_start_mount_process $M0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_up_status $V0 $M0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 1
+# Read must be allowed since good brick is up.
+TEST cat $M0/FILE
+
+#Umount and mount to remove cached data.
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST ta_start_mount_process $M0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_up_status $V0 $M0 0
+# Toggle good and bad data brick processes.
+TEST ta_start_brick_process brick0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 0
+TEST ta_kill_brick brick1
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" ta_mount_child_up_status $M0 $V0 1
+# Read must now fail.
+TEST ! cat $M0/FILE
+
+# Bring all data bricks up, and kill TA.
+TEST ta_start_brick_process brick1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 1
+TA_PID=$(ta_get_pid_by_brick_name ta)
+TEST [ -n $TA_PID ]
+TEST ta_kill_brick ta
+TA_PID=$(ta_get_pid_by_brick_name ta)
+TEST [ -z $TA_PID ]
+# Read must now succeed.
+TEST cat $M0/FILE
+cleanup;
diff --git a/tests/basic/afr/ta-shd.t b/tests/basic/afr/ta-shd.t
new file mode 100644
index 00000000000..96ecfc678e0
--- /dev/null
+++ b/tests/basic/afr/ta-shd.t
@@ -0,0 +1,49 @@
+#!/bin/bash
+#Self-heal tests
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../thin-arbiter.rc
+cleanup;
+TEST ta_create_brick_and_volfile brick0
+TEST ta_create_brick_and_volfile brick1
+TEST ta_create_ta_and_volfile ta
+TEST ta_start_brick_process brick0
+TEST ta_start_brick_process brick1
+TEST ta_start_ta_process ta
+
+TEST ta_create_mount_volfile brick0 brick1 ta
+TEST ta_start_mount_process $M0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_up_status $V0 $M0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "trusted.afr.patchy-ta-2" ls $B0/ta
+
+TEST ta_create_shd_volfile brick0 brick1 ta
+TEST ta_start_shd_process glustershd
+
+TEST touch $M0/a.txt
+TEST ta_kill_brick brick0
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" ta_mount_child_up_status $M0 $V0 0
+echo "Hello" >> $M0/a.txt
+EXPECT "000000010000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/brick1/a.txt
+EXPECT "000000010000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/ta/trusted.afr.$V0-ta-2
+
+#TODO: After the write txn changes are merged, take statedump of TA process and
+#check whether AFR_TA_DOM_NOTIFY lock is held by the client here. Take the
+#statedump again after line #38 to check AFR_TA_DOM_NOTIFY lock is released by
+#the SHD process.
+
+TEST ta_start_brick_process brick0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 0
+EXPECT_WITHIN $HEAL_TIMEOUT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/brick1/a.txt
+EXPECT_WITHIN $HEAL_TIMEOUT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/ta/trusted.afr.$V0-ta-2
+
+#Kill the previously up brick and try reading from other brick. Since the heal
+#has happened file content should be same.
+TEST ta_kill_brick brick1
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" ta_mount_child_up_status $M0 $V0 1
+#Umount and mount to remove cached data.
+TEST umount $M0
+TEST ta_start_mount_process $M0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_up_status $V0 $M0 0
+EXPECT "Hello" cat $M0/a.txt
+cleanup;
diff --git a/tests/basic/afr/ta-write-on-bad-brick.t b/tests/basic/afr/ta-write-on-bad-brick.t
new file mode 100644
index 00000000000..096ca9f47cf
--- /dev/null
+++ b/tests/basic/afr/ta-write-on-bad-brick.t
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../thin-arbiter.rc
+cleanup;
+TEST ta_create_brick_and_volfile brick0
+TEST ta_create_brick_and_volfile brick1
+TEST ta_create_ta_and_volfile ta
+TEST ta_start_brick_process brick0
+TEST ta_start_brick_process brick1
+TEST ta_start_ta_process ta
+
+TEST ta_create_mount_volfile brick0 brick1 ta
+TEST ta_start_mount_process $M0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_up_status $V0 $M0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "trusted.afr.patchy-ta-2" ls $B0/ta
+
+TEST touch $M0/a.txt
+TEST ls $B0/brick0/a.txt
+TEST ls $B0/brick1/a.txt
+TEST ! ls $B0/ta/a.txt
+
+TEST dd if=/dev/zero of=$M0/a.txt bs=1M count=5
+
+#Good Data brick is down. TA and bad brick are UP
+
+TEST ta_kill_brick brick1
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" ta_mount_child_up_status $M0 $V0 1
+TEST dd if=/dev/zero of=$M0/a.txt bs=1M count=5
+TEST ta_kill_brick brick0
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" ta_mount_child_up_status $M0 $V0 0
+TEST ta_start_brick_process brick1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 1
+TEST ! dd if=/dev/zero of=$M0/a.txt bs=1M count=5
+
+# Good Data brick is UP. Bad and TA are down
+TEST ta_kill_brick brick1
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" ta_mount_child_up_status $M0 $V0 1
+TEST ta_start_brick_process brick0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 0
+TEST ta_kill_brick ta
+TEST ! dd if=/dev/zero of=$M0/a.txt bs=1M count=5
+
+# Good and Bad data bricks are UP. TA is down
+TEST ta_start_brick_process brick1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 0
+TEST dd if=/dev/zero of=$M0/a.txt bs=1M count=5
+
+cleanup;
diff --git a/tests/basic/afr/ta.t b/tests/basic/afr/ta.t
new file mode 100644
index 00000000000..05d48431c95
--- /dev/null
+++ b/tests/basic/afr/ta.t
@@ -0,0 +1,54 @@
+#!/bin/bash
+#Self-heal tests
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../thin-arbiter.rc
+cleanup;
+TEST ta_create_brick_and_volfile brick0
+TEST ta_create_brick_and_volfile brick1
+TEST ta_create_ta_and_volfile ta
+TEST ta_start_brick_process brick0
+TEST ta_start_brick_process brick1
+TEST ta_start_ta_process ta
+
+TEST ta_create_mount_volfile brick0 brick1 ta
+TEST ta_start_mount_process $M0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_up_status $V0 $M0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "trusted.afr.patchy-ta-2" ls $B0/ta
+
+TEST touch $M0/a.txt
+TEST ls $B0/brick0/a.txt
+TEST ls $B0/brick1/a.txt
+TEST ! ls $B0/ta/a.txt
+
+TEST ta_kill_brick brick0
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0
+TEST touch $M0/b.txt
+EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-0 $B0/brick1
+EXPECT "000000010000000200000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/brick1/b.txt
+#New entry mark lead to pending data on the file and on ta
+EXPECT "000000010000000100000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/ta/trusted.afr.patchy-ta-2
+TEST ! ls $B0/brick0/b.txt
+TEST ls $B0/brick1/b.txt
+
+#Try to create an entry while good brick is down and bad brick is UP. Should not create
+TEST ta_start_brick_process brick0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
+TEST ta_kill_brick brick1
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 1
+TEST ! touch $M0/d.txt
+EXPECT "000000010000000100000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/ta/trusted.afr.patchy-ta-2
+
+TEST ta_start_brick_process brick1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1
+TEST ta_kill_brick brick0
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0
+
+TEST ta_kill_brick ta
+# Entry create must fail if only one brick is UP, even if that is a good brick.
+TEST ! touch $M0/c.txt
+TEST ! ls $B0/brick0/c.txt
+TEST ! ls $B0/brick1/c.txt
+
+cleanup;
diff --git a/tests/basic/afr/tarissue.t b/tests/basic/afr/tarissue.t
new file mode 100644
index 00000000000..83f7463130c
--- /dev/null
+++ b/tests/basic/afr/tarissue.t
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+TESTS_EXPECTED_IN_LOOP=10
+cleanup;
+
+#Basic checks
+TEST glusterd
+TEST pidof glusterd
+
+#Create a distributed-replicate volume
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1..6};
+TEST $CLI volume set $V0 cluster.consistent-metadata on
+TEST $CLI volume set $V0 cluster.post-op-delay-secs 0
+TEST $CLI volume set $V0 nfs.rdirplus off
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+
+# Mount NFS
+mount_nfs $H0:/$V0 $N0 vers=3
+
+#Create files
+TEST mkdir -p $N0/nfs/dir1/dir2
+for i in {1..10}; do
+ TEST_IN_LOOP dd if=/dev/urandom of=$N0/nfs/dir1/dir2/file$i bs=1024k count=1
+done
+TEST tar cvf /tmp/dir1.tar.gz $N0/nfs/dir1
+
+TEST rm -f /tmp/dir1.tar.gz
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+cleanup;
diff --git a/tests/basic/all_squash.t b/tests/basic/all_squash.t
new file mode 100644
index 00000000000..29766c50af7
--- /dev/null
+++ b/tests/basic/all_squash.t
@@ -0,0 +1,74 @@
+#!/bin/bash
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0;
+
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0;
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+TEST mount_nfs $H0:/$V0 $N0 nolock;
+
+# random uid/gid
+uid=22162
+gid=5845
+
+TEST $CLI volume set $V0 server.anonuid $uid;
+TEST $CLI volume set $V0 server.anongid $gid;
+
+# Ensure server.all-squash is disabled
+TEST $CLI volume set $V0 server.all-squash disable;
+
+# Tests for the fuse mount
+mkdir $M0/other;
+chown $uid:$gid $M0/other;
+
+TEST $CLI volume set $V0 server.all-squash enable;
+
+touch $M0/file 2>/dev/null;
+TEST [ $? -ne 0 ]
+mkdir $M0/dir 2>/dev/null;
+TEST [ $? -ne 0 ]
+
+TEST touch $M0/other/file 2>/dev/null;
+TEST [ "$(stat -c %u:%g $M0/other/file)" = "$uid:$gid" ];
+TEST mkdir $M0/other/dir 2>/dev/null;
+TEST [ "$(stat -c %u:%g $M0/other/dir)" = "$uid:$gid" ];
+
+TEST $CLI volume set $V0 server.all-squash disable;
+TEST rm -rf $M0/other;
+
+sleep 1;
+
+# tests for nfs mount
+mkdir $N0/other;
+chown $uid:$gid $N0/other;
+
+TEST $CLI volume set $V0 server.all-squash enable;
+
+touch $N0/file 2>/dev/null;
+TEST [ $? -ne 0 ]
+mkdir $N0/dir 2>/dev/null;
+TEST [ $? -ne 0 ]
+
+TEST touch $N0/other/file 2>/dev/null;
+TEST [ "$(stat -c %u:%g $N0/other/file)" = "$uid:$gid" ];
+TEST mkdir $N0/other/dir 2>/dev/null;
+TEST [ "$(stat -c %u:%g $N0/other/dir)" = "$uid:$gid" ];
+
+TEST $CLI volume set $V0 server.all-squash disable;
+TEST rm -rf $N0/other;
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/basic/cdc.t b/tests/basic/cdc.t
new file mode 100755
index 00000000000..8653a77207a
--- /dev/null
+++ b/tests/basic/cdc.t
@@ -0,0 +1,148 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+function file_mime_type () {
+ mime_type=$(file --mime $1 2>/dev/null | sed '/^[^:]*: /s///')
+ echo $mime_type
+}
+
+TEST glusterd
+TEST pidof glusterd
+
+## Create a volume with one brick
+TEST $CLI volume create $V0 $H0:$B0/${V0}1;
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT '1' brick_count $V0
+
+## Turn off performance translators
+## This is required for testing readv calls
+TEST $CLI volume set $V0 performance.io-cache off
+EXPECT 'off' volinfo_field $V0 'performance.io-cache'
+TEST $CLI volume set $V0 performance.quick-read off
+EXPECT 'off' volinfo_field $V0 'performance.quick-read'
+
+TEST $CLI volume set $V0 performance.strict-write-ordering on
+EXPECT 'on' volinfo_field $V0 'performance.strict-write-ordering'
+
+## Turn on cdc xlator by setting network.compression to on
+TEST $CLI volume set $V0 network.compression on
+EXPECT 'on' volinfo_field $V0 'network.compression'
+
+## Make sure that user cannot change network.compression.mode
+## This would break the cdc xlator if allowed!
+TEST ! $CLI volume set $V0 network.compression.mode client
+
+## Turn on network.compression.debug option
+## This will dump compressed data onto disk as gzip file
+## This is used to check if compression actually happened
+TEST $CLI volume set $V0 network.compression.debug on
+EXPECT 'on' volinfo_field $V0 'network.compression.debug'
+
+## Start the volume
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+sleep 2
+## Mount FUSE with caching disabled
+TEST $GFS -s $H0 --volfile-id $V0 $M0;
+
+####################
+## Testing writev ##
+####################
+
+## Create a 1K file locally and find the md5sum
+TEST dd if=/dev/zero of=/tmp/cdc-orig count=1 bs=1k 2>/dev/null
+checksum[original-file]=`md5sum /tmp/cdc-orig | cut -d' ' -f1`
+
+## Copy the file to mountpoint and find its md5sum on brick
+TEST dd if=/tmp/cdc-orig of=$M0/cdc-server count=1 bs=1k 2>/dev/null
+checksum[brick-file]=`md5sum $B0/${V0}1/cdc-server | cut -d' ' -f1`
+
+## Uncompress the gzip dump file and find its md5sum
+# mime outputs for gzip are different for file version > 5.14
+TEST touch /tmp/gzipfile
+TEST gzip /tmp/gzipfile
+GZIP_MIME_TYPE=$(file_mime_type /tmp/gzipfile.gz)
+
+TEST rm -f /tmp/gzipfile.gz
+
+EXPECT "$GZIP_MIME_TYPE" echo $(file_mime_type /tmp/cdcdump.gz)
+
+TEST gunzip -f /tmp/cdcdump.gz
+checksum[dump-file-writev]=`md5sum /tmp/cdcdump | cut -d' ' -f1`
+
+## Check if all 3 checksums are same
+TEST test ${checksum[original-file]} = ${checksum[brick-file]}
+TEST test ${checksum[brick-file]} = ${checksum[dump-file-writev]}
+
+## Cleanup files
+TEST rm -f /tmp/cdcdump.gz
+
+###################
+## Testing readv ##
+###################
+
+## Copy file from mount point to client and find checksum
+TEST dd if=$M0/cdc-server of=/tmp/cdc-client count=1 bs=1k 2>/dev/null
+checksum[client-file]=`md5sum /tmp/cdc-client | cut -d' ' -f1`
+
+## Uncompress the gzip dump file and find its md5sum
+# mime outputs for gzip are different for file version > 5.14
+EXPECT "$GZIP_MIME_TYPE" echo $(file_mime_type /tmp/cdcdump.gz)
+
+TEST gunzip -f /tmp/cdcdump.gz
+checksum[dump-file-readv]=`md5sum /tmp/cdcdump | cut -d' ' -f1`
+
+## Check if all 3 checksums are same
+TEST test ${checksum[brick-file]} = ${checksum[client-file]}
+TEST test ${checksum[client-file]} = ${checksum[dump-file-readv]}
+
+## Cleanup files and unmount
+TEST rm -f /tmp/cdc* $M0/cdc*
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+## Stop the volume
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+## Turn on network.compression.min-size and set it to 100 bytes
+## Compression should not take place if file size
+## is less than 100 bytes
+TEST $CLI volume set $V0 network.compression.min-size 100
+EXPECT '100' volinfo_field $V0 'network.compression.min-size'
+
+## Start the volume
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Mount FUSE with caching disabled
+TEST $GFS -s $H0 --volfile-id $V0 $M0;
+
+## Create a file of size 99 bytes on mountpoint
+## This is should not be compressed
+TEST dd if=/dev/zero of=$M0/cdc-small count=1 bs=99 2>/dev/null
+TEST ! test -e /tmp/cdcdump.gz
+
+## Cleanup files and unmount
+TEST rm -f /tmp/cdc* $M0/cdc*
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+## Stop the volume
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+## Reset the network.compression options
+TEST $CLI volume reset $V0 network.compression.debug
+TEST $CLI volume reset $V0 network.compression.min-size
+TEST $CLI volume reset $V0 network.compression
+
+## Delete the volume
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/basic/changelog/changelog-api.t b/tests/basic/changelog/changelog-api.t
new file mode 100644
index 00000000000..516c2f2f60d
--- /dev/null
+++ b/tests/basic/changelog/changelog-api.t
@@ -0,0 +1,37 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../env.rc
+
+cleanup;
+
+CHANGELOG_BIN_PATH=$(dirname $0)/../../utils/changelog
+build_tester $CHANGELOG_BIN_PATH/test-changelog-api.c -lgfchangelog
+
+CHANGELOG_PATH_0="$B0/${V0}0/.glusterfs/changelogs"
+ROLLOVER_TIME=2
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}0
+TEST $CLI volume set $V0 changelog.changelog on
+TEST $CLI volume set $V0 changelog.rollover-time $ROLLOVER_TIME
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+sleep 3;
+
+#Listen to changelog journal notifcations
+$CHANGELOG_BIN_PATH/test-changelog-api &
+for i in {1..12};do echo "data" > $M0/file$i 2>/dev/null; sleep 1;done &
+
+#Wait for changelogs to be in .processed directory
+sleep 12
+
+EXPECT "Y" processed_changelogs "/tmp/scratch_v1/.processed"
+TEST rm $CHANGELOG_BIN_PATH/test-changelog-api
+rm -rf /tmp/scratch_v1
+
+cleanup;
diff --git a/tests/basic/changelog/changelog-history.t b/tests/basic/changelog/changelog-history.t
new file mode 100644
index 00000000000..ea952619652
--- /dev/null
+++ b/tests/basic/changelog/changelog-history.t
@@ -0,0 +1,91 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../env.rc
+
+cleanup;
+
+SCRIPT_TIMEOUT=300
+HISTORY_BIN_PATH=$(dirname $0)/../../utils/changelog
+build_tester $HISTORY_BIN_PATH/get-history.c -lgfchangelog
+
+time_before_enable1=$(date '+%s')
+CHANGELOG_PATH_0="$B0/${V0}0/.glusterfs/changelogs"
+ROLLOVER_TIME=2
+
+TEST glusterd
+TEST pidof glusterd
+
+sleep 3
+time_before_enable2=$(date '+%s')
+
+sleep 3
+TEST $CLI volume create $V0 $H0:$B0/${V0}0
+TEST $CLI volume set $V0 changelog.changelog on
+TEST $CLI volume set $V0 changelog.rollover-time $ROLLOVER_TIME
+TEST $CLI volume start $V0
+
+sleep 3
+time_after_enable1=$(date '+%s')
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+touch $M0/file{1..10}
+
+sleep 3
+time_after_enable2=$(date '+%s')
+
+let time_future=time_after_enable2+600
+
+#Fails as start falls before changelog enable
+EXPECT "-3" $HISTORY_BIN_PATH/get-history $time_before_enable1 $time_before_enable2
+
+#Fails as start falls before changelog enable
+EXPECT "-3" $HISTORY_BIN_PATH/get-history $time_before_enable2 $time_after_enable1
+
+#Passes as start and end falls in same htime file
+EXPECT "0" $HISTORY_BIN_PATH/get-history $time_after_enable1 $time_after_enable2
+
+#Passes, gives the changelogs till continuous changelogs are available
+# but returns 1
+EXPECT "1" $HISTORY_BIN_PATH/get-history $time_after_enable2 $time_future
+
+#Disable and enable changelog
+TEST $CLI volume set $V0 changelog.changelog off
+sleep 6
+time_between_htime=$(date '+%s')
+sleep 6
+TEST $CLI volume set $V0 changelog.changelog on
+
+sleep 6
+touch $M0/test{1..10}
+time_in_sec_htime1=$(date '+%s')
+
+sleep 6
+touch $M0/test1{1..10}
+time_in_sec_htime2=$(date '+%s')
+
+sleep 3
+TEST $CLI volume set $V0 changelog.changelog off
+sleep 3
+time_after_disable=$(date '+%s')
+
+TEST $CLI volume set $V0 changelog.changelog on
+sleep 5
+
+#Passes, gives the changelogs till continuous changelogs are available
+# but returns 1
+EXPECT_WITHIN 10 "1" $HISTORY_BIN_PATH/get-history $time_after_enable1 $time_in_sec_htime2
+
+#Fails as start falls between htime files
+EXPECT_WITHIN 10 "-3" $HISTORY_BIN_PATH/get-history $time_between_htime $time_in_sec_htime1
+
+#Passes as start and end falls in same htime file
+EXPECT_WITHIN 10 "0" $HISTORY_BIN_PATH/get-history $time_in_sec_htime1 $time_in_sec_htime2
+
+#Passes, gives the changelogs till continuous changelogs are available
+EXPECT_WITHIN 10 "0" $HISTORY_BIN_PATH/get-history $time_in_sec_htime2 $time_after_disable
+
+TEST rm $HISTORY_BIN_PATH/get-history
+rm -rf /tmp/scratch_v1/*
+
+cleanup;
diff --git a/tests/basic/changelog/changelog-rename.t b/tests/basic/changelog/changelog-rename.t
new file mode 100644
index 00000000000..9a0ef527b5b
--- /dev/null
+++ b/tests/basic/changelog/changelog-rename.t
@@ -0,0 +1,44 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+CHANGELOG_PATH_0="$B0/${V0}0/.glusterfs/changelogs"
+ROLLOVER_TIME=30
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}0
+TEST $CLI volume set $V0 changelog.changelog on
+TEST $CLI volume set $V0 changelog.rollover-time $ROLLOVER_TIME
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+touch $M0/file1
+mv $M0/file1 $M0/rn_file1
+mkdir $M0/dir1
+mv $M0/dir1 $M0/rn_dir1
+
+EXPECT "2" check_changelog_op ${CHANGELOG_PATH_0} "RENAME"
+
+cleanup;
+
+#####Test on multiple subvolume#####
+#==========================================#
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 changelog.changelog on
+TEST $CLI volume set $V0 changelog.rollover-time $ROLLOVER_TIME
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+touch $M0/gluster_file
+mv $M0/gluster_file $M0/rn_gluster_file
+mkdir $M0/dir1
+mv $M0/dir1 $M0/rn_dir1
+
+EXPECT "2" check_changelog_op ${CHANGELOG_PATH_0} "RENAME"
+
+cleanup;
diff --git a/tests/basic/changelog/history-api.t b/tests/basic/changelog/history-api.t
new file mode 100644
index 00000000000..9e63118cef9
--- /dev/null
+++ b/tests/basic/changelog/history-api.t
@@ -0,0 +1,42 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../env.rc
+
+cleanup;
+
+HISTORY_BIN_PATH=$(dirname $0)/../../utils/changelog
+build_tester $HISTORY_BIN_PATH/test-history-api.c -lgfchangelog
+
+CHANGELOG_PATH_0="$B0/${V0}0/.glusterfs/changelogs"
+ROLLOVER_TIME=2
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}0
+TEST $CLI volume set $V0 changelog.changelog on
+TEST $CLI volume set $V0 changelog.rollover-time $ROLLOVER_TIME
+TEST $CLI volume start $V0
+
+sleep 3
+start=$(date '+%s')
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+touch $M0/file{1..10}
+
+for i in {1..12};do echo "data" > $M0/file$i; sleep 1;done
+end=$(date '+%s')
+sleep 2
+
+#Passes as start and end falls in same htime file
+EXPECT "0" $HISTORY_BIN_PATH/test-history-api $start $end
+
+#Wait for changelogs to be in .processed directory
+sleep 2
+
+EXPECT "Y" processed_changelogs "/tmp/scratch_v1/.history/.processed"
+TEST rm $HISTORY_BIN_PATH/test-history-api
+rm -rf /tmp/scratch_v1
+
+cleanup;
diff --git a/tests/basic/cloudsync-sanity.t b/tests/basic/cloudsync-sanity.t
new file mode 100644
index 00000000000..834ba96430c
--- /dev/null
+++ b/tests/basic/cloudsync-sanity.t
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6,7,8,9};
+TEST $CLI volume set $V0 features.cloudsync enable;
+TEST $CLI volume start $V0;
+
+## Mount FUSE
+TEST $GFS -s $H0 --volfile-id $V0 $M1;
+
+# This test covers lookup, mkdir, mknod, symlink, link, rename,
+# create operations
+TEST $(dirname $0)/rpc-coverage.sh $M1
+
+
+TEST cp $(dirname ${0})/gfapi/glfsxmp-coverage.c glfsxmp.c
+TEST build_tester ./glfsxmp.c -lgfapi
+./glfsxmp $V0 $H0
+cleanup_tester ./glfsxmp
+rm ./glfsxmp.c
+
+cleanup;
diff --git a/tests/basic/ctime/ctime-ec-heal.t b/tests/basic/ctime/ctime-ec-heal.t
new file mode 100644
index 00000000000..142237c5014
--- /dev/null
+++ b/tests/basic/ctime/ctime-ec-heal.t
@@ -0,0 +1,70 @@
+#!/bin/bash
+#
+# This will test self healing of ctime xattr 'trusted.glusterfs.mdata'
+#
+###
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup
+
+#cleate and start volume
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{1..3}
+TEST $CLI volume start $V0
+
+#Mount the volume
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+
+# Create files
+mkdir $M0/dir1
+echo "Initial content" > $M0/file1
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/file1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/file1
+
+# Kill brick
+TEST kill_brick $V0 $H0 $B0/${V0}3
+
+echo "B3 is down" >> $M0/file1
+echo "Change dir1 time attributes" > $M0/dir1/dir1_file1
+echo "Entry heal file" > $M0/entry_heal_file1
+mkdir $M0/entry_heal_dir1
+
+# Check xattr
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_uniq_count $B0/${V0}{1..3}/dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/file1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_uniq_count $B0/${V0}{1..3}/file1
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_count $B0/${V0}{1..3}/dir1/dir1_file1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1/dir1_file1
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_count $B0/${V0}{1..3}/entry_heal_file1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_file1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_count $B0/${V0}{1..3}/entry_heal_dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_dir1
+
+TEST $CLI volume start $V0 force
+$CLI volume heal $V0
+
+# Check xattr
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/file1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/file1
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1/dir1_file1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1/dir1_file1
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/entry_heal_file1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_file1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/entry_heal_dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_dir1
+
+cleanup;
diff --git a/tests/basic/ctime/ctime-ec-rebalance.t b/tests/basic/ctime/ctime-ec-rebalance.t
new file mode 100644
index 00000000000..2b73bcdd103
--- /dev/null
+++ b/tests/basic/ctime/ctime-ec-rebalance.t
@@ -0,0 +1,43 @@
+#!/bin/bash
+#
+# This will test healing of ctime xattr 'trusted.glusterfs.mdata' after add-brick and rebalance
+#
+###
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fallocate.rc
+
+cleanup
+
+#cleate and start volume
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0..5}
+TEST $CLI volume start $V0
+
+#Mount the volume
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+
+# Create files
+mkdir $M0/dir1
+echo "test data" > $M0/dir1/file1
+
+# Add brick
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}{6..8}
+
+#Trigger rebalance
+TEST $CLI volume rebalance $V0 start force
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $V0
+
+#Verify ctime xattr heal on directory
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}6/dir1"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}7/dir1"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}8/dir1"
+
+b6_mdata=$(get_mdata "$B0/${V0}6/dir1")
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "${b6_mdata}" get_mdata $B0/${V0}7/dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "${b6_mdata}" get_mdata $B0/${V0}8/dir1
+
+cleanup;
diff --git a/tests/basic/ctime/ctime-glfs-init.c b/tests/basic/ctime/ctime-glfs-init.c
new file mode 100644
index 00000000000..e4f197b8f30
--- /dev/null
+++ b/tests/basic/ctime/ctime-glfs-init.c
@@ -0,0 +1,68 @@
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+#include <limits.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+#define LOG_ERR(msg) \
+ do { \
+ fprintf(stderr, "%s : Error (%s)\n", msg, strerror(errno)); \
+ } while (0)
+
+int
+main(int argc, char *argv[])
+{
+ int ret = 0;
+ char *hostname = NULL;
+ char *volname = NULL;
+ char *logfile = NULL;
+ glfs_t *fs = NULL;
+
+ if (argc != 4) {
+ fprintf(stderr, "Invalid argument\n");
+ exit(1);
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+ logfile = argv[3];
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ LOG_ERR("glfs_new failed");
+ return -1;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ if (ret < 0) {
+ LOG_ERR("glfs_set_volfile_server failed");
+ goto err;
+ }
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ if (ret < 0) {
+ LOG_ERR("glfs_set_logging failed");
+ goto err;
+ }
+
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ LOG_ERR("glfs_init failed");
+ goto err;
+ }
+
+ glfs_fini(fs);
+ fs = NULL;
+ return 0;
+err:
+ glfs_fini(fs);
+ fs = NULL;
+
+ return -1;
+}
diff --git a/tests/basic/ctime/ctime-glfs-init.t b/tests/basic/ctime/ctime-glfs-init.t
new file mode 100644
index 00000000000..56d7d6caee0
--- /dev/null
+++ b/tests/basic/ctime/ctime-glfs-init.t
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 replica 3 ${H0}:$B0/brick{1,2,3};
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume start $V0;
+
+logdir=`gluster --print-logdir`
+
+TEST build_tester $(dirname $0)/ctime-glfs-init.c -lgfapi -lpthread
+
+TEST ./$(dirname $0)/ctime-glfs-init ${H0} $V0 $logdir/ctime-glfs-init.log
+
+cleanup_tester $(dirname $0)/ctime-glfs-init
+
+cleanup;
+
diff --git a/tests/basic/ctime/ctime-heal-symlinks.t b/tests/basic/ctime/ctime-heal-symlinks.t
new file mode 100644
index 00000000000..547b1807e94
--- /dev/null
+++ b/tests/basic/ctime/ctime-heal-symlinks.t
@@ -0,0 +1,65 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+cleanup;
+
+###############################################################################
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
+
+cd $M0
+TEST "echo hello_world > FILE"
+TEST ln -s FILE SOFTLINK
+
+# Remove symlink only (not the .glusterfs entry) and trigger named heal.
+TEST rm -f $B0/${V0}2/SOFTLINK
+TEST stat SOFTLINK
+
+# To heal and clear new-entry mark on source bricks.
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+EXPECT 2 stat -c %h $B0/${V0}2/SOFTLINK
+EXPECT "hello_world" cat $B0/${V0}2/SOFTLINK
+
+cd -
+cleanup
+###############################################################################
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
+
+cd $M0
+TEST "echo hello_world > FILE"
+TEST ln -s FILE SOFTLINK
+
+# Remove symlink only (not the .glusterfs entry) and trigger named heal.
+TEST rm -f $B0/${V0}2/SOFTLINK
+TEST stat SOFTLINK
+
+# To heal and clear new-entry mark on source bricks.
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+EXPECT 2 stat -c %h $B0/${V0}2/SOFTLINK
+TEST kill_brick $V0 $H0 $B0/${V0}0
+cd -
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
+cd $M0
+EXPECT "hello_world" cat SOFTLINK
+
+cd -
+cleanup
+###############################################################################
diff --git a/tests/basic/ctime/ctime-mdata-legacy-files.t b/tests/basic/ctime/ctime-mdata-legacy-files.t
new file mode 100644
index 00000000000..2e782d5c99d
--- /dev/null
+++ b/tests/basic/ctime/ctime-mdata-legacy-files.t
@@ -0,0 +1,83 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+cleanup;
+
+###############################################################################
+#Replica volume
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
+
+#Disable ctime and create file, file doesn't contain "trusted.glusterfs.mdata" xattr
+TEST $CLI volume set $V0 ctime off
+
+TEST "mkdir $M0/DIR"
+TEST "echo hello_world > $M0/DIR/FILE"
+
+#Verify absence of xattr
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}0/DIR"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}0/DIR/FILE"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}1/DIR"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}1/DIR/FILE"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}2/DIR"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}2/DIR/FILE"
+
+#Enable ctime
+TEST $CLI volume set $V0 ctime on
+sleep 3
+TEST stat $M0/DIR/FILE
+
+#Verify presence "trusted.glusterfs.mdata" xattr on backend
+#The lookup above should have created xattr
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}0/DIR"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}0/DIR/FILE"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}1/DIR"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}1/DIR/FILE"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}2/DIR"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}2/DIR/FILE"
+
+###############################################################################
+#Disperse Volume
+
+TEST $CLI volume create $V1 disperse 3 redundancy 1 $H0:$B0/${V1}{0,1,2}
+TEST $CLI volume set $V1 performance.stat-prefetch off
+TEST $CLI volume start $V1
+
+TEST glusterfs --volfile-id=$V1 --volfile-server=$H0 --entry-timeout=0 $M1;
+
+#Disable ctime and create file, file doesn't contain "trusted.glusterfs.mdata" xattr
+TEST $CLI volume set $V1 ctime off
+TEST "mkdir $M1/DIR"
+TEST "echo hello_world > $M1/DIR/FILE"
+
+#Verify absence of xattr
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}0/DIR"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}0/DIR/FILE"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}1/DIR"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}1/DIR/FILE"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}2/DIR"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}2/DIR/FILE"
+
+#Enable ctime
+TEST $CLI volume set $V1 ctime on
+sleep 3
+TEST stat $M1/DIR/FILE
+
+#Verify presence "trusted.glusterfs.mdata" xattr on backend
+#The lookup above should have created xattr
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}0/DIR"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}0/DIR/FILE"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}1/DIR"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}1/DIR/FILE"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}2/DIR"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}2/DIR/FILE"
+
+cleanup;
+###############################################################################
diff --git a/tests/basic/ctime/ctime-noatime.t b/tests/basic/ctime/ctime-noatime.t
new file mode 100644
index 00000000000..609ccbd72c1
--- /dev/null
+++ b/tests/basic/ctime/ctime-noatime.t
@@ -0,0 +1,49 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+cleanup;
+
+function atime_compare {
+ local atime=$1
+ local file_name=$2
+ local atime1=$(stat -c "%X" $file_name)
+
+ if [ $atime == $atime1 ]
+ then
+ echo "0"
+ else
+ echo "1"
+ fi
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.read-after-open off
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.io-cache off
+
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
+
+cd $M0
+TEST "echo hello_world > FILE"
+atime1=$(stat -c "%X" FILE)
+
+TEST "cat FILE > /dev/null"
+EXPECT "0" atime_compare $atime1 FILE
+
+sleep 1
+
+TEST $CLI volume set $V0 noatime off
+TEST "cat FILE > /dev/null"
+EXPECT "1" atime_compare $atime1 FILE
+
+cd -
+cleanup
diff --git a/tests/basic/ctime/ctime-readdir.c b/tests/basic/ctime/ctime-readdir.c
new file mode 100644
index 00000000000..8760db29ae8
--- /dev/null
+++ b/tests/basic/ctime/ctime-readdir.c
@@ -0,0 +1,29 @@
+#include <stdio.h>
+#include <dirent.h>
+#include <string.h>
+#include <assert.h>
+
+int
+main(int argc, char **argv)
+{
+ DIR *dir = NULL;
+ struct dirent *entry = NULL;
+ int ret = 0;
+ char *path = NULL;
+
+ assert(argc == 2);
+ path = argv[1];
+
+ dir = opendir(path);
+ if (!dir) {
+ printf("opendir(%s) failed.\n", path);
+ return -1;
+ }
+
+ while ((entry = readdir(dir)) != NULL) {
+ }
+ if (dir)
+ closedir(dir);
+
+ return ret;
+}
diff --git a/tests/basic/ctime/ctime-readdir.t b/tests/basic/ctime/ctime-readdir.t
new file mode 100644
index 00000000000..4564fc1b667
--- /dev/null
+++ b/tests/basic/ctime/ctime-readdir.t
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 replica 3 ${H0}:$B0/brick{1,2,3};
+TEST $CLI volume set $V0 performance.stat-prefetch on
+TEST $CLI volume set $V0 performance.readdir-ahead off
+TEST $CLI volume start $V0;
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
+
+TEST mkdir $M0/dir0
+TEST "echo hello_world > $M0/dir0/FILE"
+
+ctime1=$(stat -c %Z $M0/dir0/FILE)
+echo "Mount change time: $ctime1"
+
+sleep 2
+
+#Write to back end directly to modify ctime of backend file
+TEST "echo write_from_backend >> $B0/brick1/dir0/FILE"
+TEST "echo write_from_backend >> $B0/brick2/dir0/FILE"
+TEST "echo write_from_backend >> $B0/brick3/dir0/FILE"
+echo "Backend change time"
+echo "brick1: $(stat -c %Z $B0/brick1/dir0/FILE)"
+echo "brick2: $(stat -c %Z $B0/brick2/dir0/FILE)"
+echo "brick3: $(stat -c %Z $B0/brick3/dir0/FILE)"
+
+#Stop and start to hit the case of no inode for readdir
+TEST umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
+
+TEST build_tester $(dirname $0)/ctime-readdir.c
+
+#Do readdir
+TEST ./$(dirname $0)/ctime-readdir $M0/dir0
+
+EXPECT "$ctime1" stat -c %Z $M0/dir0/FILE
+echo "Mount change time after readdir $(stat -c %Z $M0/dir0/FILE)"
+
+cleanup_tester $(dirname $0)/ctime-readdir
+
+cleanup;
diff --git a/tests/basic/ctime/ctime-rep-heal.t b/tests/basic/ctime/ctime-rep-heal.t
new file mode 100644
index 00000000000..20517c74971
--- /dev/null
+++ b/tests/basic/ctime/ctime-rep-heal.t
@@ -0,0 +1,70 @@
+#!/bin/bash
+#
+# This will test self healing of ctime xattr 'trusted.glusterfs.mdata'
+#
+###
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup
+
+#cleate and start volume
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1..3}
+TEST $CLI volume start $V0
+
+#Mount the volume
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+
+# Create files
+mkdir $M0/dir1
+echo "Initial content" > $M0/file1
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/file1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/file1
+
+# Kill brick
+TEST kill_brick $V0 $H0 $B0/${V0}3
+
+echo "B3 is down" >> $M0/file1
+echo "Change dir1 time attributes" > $M0/dir1/dir1_file1
+echo "Entry heal file" > $M0/entry_heal_file1
+mkdir $M0/entry_heal_dir1
+
+# Check xattr
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_uniq_count $B0/${V0}{1..3}/dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/file1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_uniq_count $B0/${V0}{1..3}/file1
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_count $B0/${V0}{1..3}/dir1/dir1_file1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1/dir1_file1
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_count $B0/${V0}{1..3}/entry_heal_file1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_file1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_count $B0/${V0}{1..3}/entry_heal_dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_dir1
+
+TEST $CLI volume start $V0 force
+$CLI volume heal $V0
+
+# Check xattr
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/file1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/file1
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1/dir1_file1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1/dir1_file1
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/entry_heal_file1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_file1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/entry_heal_dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_dir1
+
+cleanup;
diff --git a/tests/basic/ctime/ctime-rep-rebalance.t b/tests/basic/ctime/ctime-rep-rebalance.t
new file mode 100644
index 00000000000..866cf87e6cb
--- /dev/null
+++ b/tests/basic/ctime/ctime-rep-rebalance.t
@@ -0,0 +1,41 @@
+#!/bin/bash
+#
+# This will test healing of ctime xattr 'trusted.glusterfs.mdata' after add-brick and rebalance
+#
+###
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup
+
+#cleate and start volume
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0..5}
+TEST $CLI volume start $V0
+
+#Mount the volume
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+
+# Create files
+mkdir $M0/dir1
+
+# Add brick
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}{6..8}
+
+#Trigger rebalance
+TEST $CLI volume rebalance $V0 start force
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $V0
+
+#Verify ctime xattr heal on directory
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}6/dir1"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}7/dir1"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}8/dir1"
+
+b6_mdata=$(get_mdata "$B0/${V0}6/dir1")
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "${b6_mdata}" get_mdata $B0/${V0}7/dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "${b6_mdata}" get_mdata $B0/${V0}8/dir1
+
+cleanup;
diff --git a/tests/basic/ctime/ctime-utimesat.t b/tests/basic/ctime/ctime-utimesat.t
new file mode 100644
index 00000000000..540e57aec83
--- /dev/null
+++ b/tests/basic/ctime/ctime-utimesat.t
@@ -0,0 +1,28 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.read-after-open off
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.io-cache off
+
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
+
+touch $M0/FILE
+
+atime=$(stat -c "%.X" $M0/FILE)
+EXPECT $atime stat -c "%.Y" $M0/FILE
+EXPECT $atime stat -c "%.Z" $M0/FILE
+
+cleanup
diff --git a/tests/basic/distribute/brick-down.t b/tests/basic/distribute/brick-down.t
new file mode 100644
index 00000000000..522ccc07210
--- /dev/null
+++ b/tests/basic/distribute/brick-down.t
@@ -0,0 +1,83 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../common-utils.rc
+. $(dirname $0)/../../dht.rc
+
+# Test 1 overview:
+# ----------------
+# Test whether lookups are sent after a brick comes up again
+#
+# 1. Create a 3 brick pure distribute volume
+# 2. Fuse mount the volume so the layout is set on the root
+# 3. Kill one brick and try to create a directory which hashes to that brick.
+# It should fail with EIO.
+# 4. Restart the brick that was killed.
+# 5. Do not remount the volume. Try to create the same directory as in step 3.
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0-{1..3}
+TEST $CLI volume start $V0
+
+# We want the lookup to reach DHT
+TEST $CLI volume set $V0 performance.stat-prefetch off
+
+# Mount using FUSE and lookup the mount so a layout is set on the brick root
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+
+ls $M0/
+
+TEST mkdir $M0/level1
+
+# Find a dirname that will hash to the brick we are going to kill
+hashed=$V0-client-1
+TEST dht_first_filename_with_hashsubvol "$hashed" $M0 "dir-"
+roottestdir=$fn_return_val
+
+hashed=$V0-client-1
+TEST dht_first_filename_with_hashsubvol "$hashed" $M0/level1 "dir-"
+level1testdir=$fn_return_val
+
+
+TEST kill_brick $V0 $H0 $B0/$V0-2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" brick_up_status $V0 $H0 $B0/$V0-2
+
+TEST $CLI volume status $V0
+
+
+# Unmount and mount the volume again so dht has an incomplete in memory layout
+
+umount -f $M0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+
+
+mkdir $M0/$roottestdir
+TEST [ $? -ne 0 ]
+
+mkdir $M0/level1/$level1testdir
+TEST [ $? -ne 0 ]
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/$V0-2
+
+#$CLI volume status
+
+# It takes a while for the client to reconnect to the brick
+sleep 5
+
+
+mkdir $M0/$roottestdir
+TEST [ $? -eq 0 ]
+
+mkdir $M0/$level1/level1testdir
+TEST [ $? -eq 0 ]
+
+# Cleanup
+cleanup
+
+
diff --git a/tests/basic/distribute/bug-1265677-use-readdirp.t b/tests/basic/distribute/bug-1265677-use-readdirp.t
new file mode 100644
index 00000000000..eef8affc8b9
--- /dev/null
+++ b/tests/basic/distribute/bug-1265677-use-readdirp.t
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# This test checks use-readdirp disable/enable for dht
+
+cleanup
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0..1}
+TEST $CLI volume set $V0 nfs.disable yes
+TEST $CLI volume set $V0 dht.force-readdirp yes
+TEST $CLI volume set $V0 performance.readdir-ahead off
+TEST $CLI volume set $V0 performance.force-readdirp no
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --use-readdirp=no $M0;
+TEST mkdir $M0/d
+TEST touch $M0/d/{1..10}
+
+TEST $CLI volume profile $V0 start
+#Clear all the fops till now
+TEST $CLI volume profile $V0 info
+
+EXPECT "^10$" echo $(ls $M0/d | wc -l)
+EXPECT_NOT "^0$" echo $($CLI volume profile $V0 info incremental | grep -w READDIRP | wc -l)
+EXPECT "^10$" echo $(ls $M0/d | wc -l)
+EXPECT "^0$" echo $($CLI volume profile $V0 info incremental | grep -w READDIR | wc -l)
+
+TEST $CLI volume set $V0 dht.force-readdirp no
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "0" mount_get_option_value $M0 $V0-dht use-readdirp
+
+EXPECT "^10$" echo $(ls $M0/d | wc -l)
+EXPECT "^0$" echo $($CLI volume profile $V0 info incremental | grep -w READDIRP | wc -l)
+EXPECT "^10$" echo $(ls $M0/d | wc -l)
+EXPECT_NOT "^0$" echo $($CLI volume profile $V0 info incremental | grep -w READDIR | wc -l)
+
+cleanup
diff --git a/tests/basic/distribute/debug-xattrs.t b/tests/basic/distribute/debug-xattrs.t
new file mode 100644
index 00000000000..6d87c0e8671
--- /dev/null
+++ b/tests/basic/distribute/debug-xattrs.t
@@ -0,0 +1,54 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+. $(dirname $0)/../../common-utils.rc
+
+# Test overview: Test the virtual xattrs dht provides for debugging
+
+# Test 1 : "dht.file.hashed-subvol.<filename>"
+# Get the hashed subvolume for file1 in dir1 using xattr
+# Create file1 in dir1
+# Check if the file is created in the brick returned by xattr
+
+hashdebugxattr="dht.file.hashed-subvol."
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0-{0..3}
+TEST $CLI volume start $V0
+
+# Mount using FUSE and create a file
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+
+# Test 1 : "dht.file.hashed-subvol.<filename>"
+# Get the hashed subvolume for file1 in dir1 using xattr
+# Create file1 in dir1
+# Check if the file is created in the brick returned by xattr
+# Create a directory on $M0
+
+TEST mkdir $M0/dir1
+
+xattrname=$hashdebugxattr"file1"
+
+hashed=$(getfattr --only-values -n "$xattrname" $M0/dir1)
+
+# Get the brick path for $hashed
+brickpath=$(cat "$M0/.meta/graphs/active/$hashed/options/remote-subvolume")
+brickpath=$brickpath"/dir1/file1"
+
+# Create the file for which we checked the xattr
+TEST touch $M0/dir1/file1
+TEST stat $brickpath
+
+# Non-existent directory
+TEST ! getfattr --only-values -n "$xattrname" $M0/dir2
+
+
+# Cleanup
+cleanup
+
diff --git a/tests/basic/distribute/dir-heal.t b/tests/basic/distribute/dir-heal.t
new file mode 100644
index 00000000000..851f765b245
--- /dev/null
+++ b/tests/basic/distribute/dir-heal.t
@@ -0,0 +1,145 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+. $(dirname $0)/../../common-utils.rc
+
+# Test 1 overview:
+# ----------------
+#
+# 1. Kill one brick of the volume.
+# 2. Create directories and change directory properties.
+# 3. Bring up the brick and access the directory
+# 4. Check the permissions and xattrs on the backend
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0-{1..3}
+TEST $CLI volume start $V0
+
+# We want the lookup to reach DHT
+TEST $CLI volume set $V0 performance.stat-prefetch off
+
+# Mount using FUSE , kill a brick and create directories
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+
+ls $M0/
+cd $M0
+
+TEST kill_brick $V0 $H0 $B0/$V0-1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" brick_up_status $V0 $H0 $B0/$V0-1
+
+TEST mkdir dir{1..4}
+
+# No change for dir1
+# Change permissions for dir2
+# Set xattr on dir3
+# Change permissions and set xattr on dir4
+
+TEST chmod 777 $M0/dir2
+
+TEST setfattr -n "user.test" -v "test" $M0/dir3
+
+TEST chmod 777 $M0/dir4
+TEST setfattr -n "user.test" -v "test" $M0/dir4
+
+
+# Start all bricks
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/$V0-1
+
+#$CLI volume status
+
+# It takes a while for the client to reconnect to the brick
+sleep 5
+
+stat $M0/dir* > /dev/null
+
+# Check that directories have been created on the brick that was killed
+
+TEST ls $B0/$V0-1/dir1
+
+TEST ls $B0/$V0-1/dir2
+EXPECT "777" stat -c "%a" $B0/$V0-1/dir2
+
+TEST ls $B0/$V0-1/dir3
+EXPECT "test" getfattr -n "user.test" --absolute-names --only-values $B0/$V0-1/dir3
+
+
+TEST ls $B0/$V0-1/dir4
+EXPECT "777" stat -c "%a" $B0/$V0-1/dir4
+EXPECT "test" getfattr -n "user.test" --absolute-names --only-values $B0/$V0-1/dir4
+
+
+TEST rm -rf $M0/*
+
+cd
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+
+# Test 2 overview:
+# ----------------
+# 1. Create directories with all bricks up.
+# 2. Kill a brick and change directory properties and set user xattr.
+# 2. Bring up the brick and access the directory
+# 3. Check the permissions and xattrs on the backend
+
+
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+
+ls $M0/
+cd $M0
+TEST mkdir dir{1..4}
+
+TEST kill_brick $V0 $H0 $B0/$V0-1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" brick_up_status $V0 $H0 $B0/$V0-1
+
+# No change for dir1
+# Change permissions for dir2
+# Set xattr on dir3
+# Change permissions and set xattr on dir4
+
+TEST chmod 777 $M0/dir2
+
+TEST setfattr -n "user.test" -v "test" $M0/dir3
+
+TEST chmod 777 $M0/dir4
+TEST setfattr -n "user.test" -v "test" $M0/dir4
+
+
+# Start all bricks
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/$V0-1
+
+#$CLI volume status
+
+# It takes a while for the client to reconnect to the brick
+sleep 5
+
+stat $M0/dir* > /dev/null
+
+# Check directories on the brick that was killed
+
+TEST ls $B0/$V0-1/dir2
+EXPECT "777" stat -c "%a" $B0/$V0-1/dir2
+
+TEST ls $B0/$V0-1/dir3
+EXPECT "test" getfattr -n "user.test" --absolute-names --only-values $B0/$V0-1/dir3
+
+
+TEST ls $B0/$V0-1/dir4
+EXPECT "777" stat -c "%a" $B0/$V0-1/dir4
+EXPECT "test" getfattr -n "user.test" --absolute-names --only-values $B0/$V0-1/dir4
+cd
+
+
+# Cleanup
+cleanup
+
diff --git a/tests/basic/distribute/file-create.t b/tests/basic/distribute/file-create.t
new file mode 100644
index 00000000000..41b662eefe2
--- /dev/null
+++ b/tests/basic/distribute/file-create.t
@@ -0,0 +1,120 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../common-utils.rc
+. $(dirname $0)/../../dht.rc
+
+# Test overview: Test file creation in various scenarios
+
+
+# Test 1 : "dht.file.hashed-subvol.<filename>"
+# Get the hashed subvolume for file1 in dir1 using xattr
+# Create file1 in dir1
+# Check if the file is created in the brick returned by xattr
+
+hashdebugxattr="dht.file.hashed-subvol."
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+
+# We want fixed size bricks to test min-free-disk
+
+# Create 2 loop devices, one per brick.
+TEST truncate -s 25M $B0/brick1
+TEST truncate -s 25M $B0/brick2
+
+TEST L1=`SETUP_LOOP $B0/brick1`
+TEST MKFS_LOOP $L1
+
+TEST L2=`SETUP_LOOP $B0/brick2`
+TEST MKFS_LOOP $L2
+
+
+TEST mkdir -p $B0/${V0}{1,2}
+
+TEST MOUNT_LOOP $L1 $B0/${V0}1
+TEST MOUNT_LOOP $L2 $B0/${V0}2
+
+
+# Create a plain distribute volume with 2 subvols.
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+TEST $CLI volume start $V0;
+EXPECT "Started" volinfo_field $V0 'Status';
+
+TEST $CLI volume set $V0 cluster.min-free-disk 40%
+#TEST $CLI volume set $V0 client-log-level DEBUG
+
+# Mount using FUSE and create a file
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+
+TEST mkdir $M0/dir1
+
+######################################################
+# Test 1 : Test file creation on correct hashed subvol
+######################################################
+
+hashed="$V0-client-0"
+TEST dht_first_filename_with_hashsubvol "$hashed" $M0/dir1 "big-"
+firstfile=$fn_return_val
+
+#Create a large file to fill up $hashed past the min-free-disk limits
+TEST dd if=/dev/zero of=$M0/dir1/$firstfile bs=1M count=15
+
+brickpath_0=$(cat "$M0/.meta/graphs/active/$hashed/options/remote-subvolume")
+brickpath_1=$(cat "$M0/.meta/graphs/active/$V0-client-1/options/remote-subvolume")
+
+TEST stat "$brickpath_0/dir1/$firstfile"
+EXPECT "0" is_dht_linkfile "$brickpath_0/dir1/$firstfile"
+
+
+######################################################
+# Test 2: Create a file which hashes to the subvol which has crossed
+# the min-free-disk limit. It should be created on the other subvol
+######################################################
+
+# DHT only checks disk usage every second. Create a new file and introduce a
+# delay here to ensure DHT updates the in memory disk usage
+sleep 2
+TEST dd if=/dev/zero of=$M0/dir1/file-2 bs=1024 count=1
+
+# Find a file that will hash to $hash_subvol
+TEST dht_first_filename_with_hashsubvol $hashed $M0/dir1 "newfile-"
+newfile=$fn_return_val
+echo $newfile
+
+# Create $newfile - it should be created on the other subvol as its hash subvol
+# has crossed the min-free-disk limit
+TEST dd if=/dev/zero of=$M0/dir1/$newfile bs=1024 count=20
+TEST stat "$brickpath_0/dir1/$newfile"
+EXPECT "1" is_dht_linkfile "$brickpath_0/dir1/$newfile"
+
+
+#TEST rm -rf $M0/dir1/$firstfile
+#TEST rm -rf $M0/dir1/$newfile
+
+
+######################################################
+# Test 3: Test dht_filter_loc_subvol_key
+######################################################
+
+TEST dht_first_filename_with_hashsubvol $V0-client-1 $M0/dir1 "filter-"
+newfile=$fn_return_val
+echo $newfile
+TEST dd if=/dev/zero of="$M0/dir1/$newfile@$V0-dht:$hashed" bs=1024 count=20
+TEST stat $M0/dir1/$newfile
+TEST stat "$brickpath_0/dir1/$newfile"
+EXPECT "1" is_dht_linkfile "$brickpath_1/dir1/$newfile"
+
+
+force_umount $M0
+TEST $CLI volume stop $V0
+UMOUNT_LOOP ${B0}/${V0}{1,2}
+rm -f ${B0}/brick{1,2}
+
+
+# Cleanup
+cleanup
+
diff --git a/tests/basic/distribute/file-rename.t b/tests/basic/distribute/file-rename.t
new file mode 100644
index 00000000000..63111b8ad8f
--- /dev/null
+++ b/tests/basic/distribute/file-rename.t
@@ -0,0 +1,1021 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+. $(dirname $0)/../../common-utils.rc
+
+# Test overview:
+# Test all combinations of src-hashed/src-cached/dst-hashed/dst-cached
+
+hashdebugxattr="dht.file.hashed-subvol."
+
+function get_brick_index {
+ local inpath=$1
+ brickroot=$(getfattr -m . -n trusted.glusterfs.pathinfo $inpath | tr ' ' '\n' | sed -n 's/<POSIX(\(.*\)):.*:.*>.*/\1/p')
+ echo ${brickroot:(-1)}
+}
+
+function get_brick_path_for_subvol {
+ local in_subvol=$1
+ local in_brickpath
+
+ in_brickpath=$(cat "$M0/.meta/graphs/active/$in_subvol/options/remote-subvolume")
+ echo $in_brickpath
+
+}
+
+#Checks that file exists only on hashed and/or cached
+function file_existence_check
+{
+ local in_file_path=$1
+ local in_hashed=$2
+ local in_cached=$3
+ local in_client_subvol
+ local in_brickpath
+ local ret
+
+ for i in {0..3}
+ do
+ in_client_subvol="$V0-client-$i"
+ in_brickpath=$(cat "$M0/.meta/graphs/active/$in_client_subvol/options/remote-subvolume")
+ stat "$in_brickpath/$in_file_path" 2>/dev/null
+ ret=$?
+ # Either the linkto or the data file must exist on the hashed
+ if [ "$in_client_subvol" == "$in_hashed" ]; then
+ if [ $ret -ne 0 ]; then
+ return 1
+ fi
+ continue
+ fi
+
+ # If the cached is non-null, we expect the file to exist on it
+ if [ "$in_client_subvol" == "$in_cached" ]; then
+ if [ $ret -ne 0 ]; then
+ return 1
+ fi
+ continue
+ fi
+
+ if [ $ret -eq 0 ]; then
+ return 2
+ fi
+ done
+ return 0
+}
+
+
+# Check if file exists on any of the bricks of the volume
+function file_does_not_exist
+{
+ local inpath=$1
+ for i in `seq 0 3`
+ do
+ file_path=$B0/$V0-$i/$inpath
+ if [ -f "$file_path" ]; then
+ echo "1"
+ return 1
+ fi
+ done
+ return 0
+}
+
+
+# Input: filename dirpath
+function get_hash_subvol
+{
+ hash_subvol=$(getfattr --only-values -n "$hashdebugxattr$1" $2 2>/dev/null)
+}
+
+
+
+# Find the first filename that hashes to a subvol
+# other than $1
+
+function first_filename_with_diff_hashsubvol
+{
+ local in_subvol=$1
+ local in_path=$2
+ local file_pattern=$3
+ local in_hash_subvol
+
+ for i in {1..100}
+ do
+ dstfilename="$file_pattern$i"
+ in_hash_subvol=$(get_hash_subvol "$dstfilename" "$in_path")
+ echo $in_hash_subvol
+ if [ "$in_subvol" != "$in_hash_subvol" ]; then
+ return 0
+ fi
+ done
+ return 1
+}
+
+# Find the first filename that hashes to the same subvol
+# as $1
+function first_filename_with_same_hashsubvol
+{
+ local in_subvol=$1
+ local in_path=$2
+ local in_hash_subvol
+ local file_pattern=$3
+
+ for i in {1..100}
+ do
+ dstfilename="$file_pattern$i"
+ get_hash_subvol "$dstfilename" "$in_path"
+ in_hash_subvol=$hash_subvol
+# echo $in_hash_subvol
+ if [ "$in_subvol" == "$in_hash_subvol" ]; then
+ return 0
+ fi
+ done
+ return 1
+}
+
+function file_is_linkto
+{
+ local brick_filepath=$1
+
+ test=$(stat $brick_filepath 2>&1)
+ if [ $? -ne 0 ]; then
+ echo "2"
+ return
+ fi
+
+ test=$(getfattr -n trusted.glusterfs.dht.linkto -e text $brick_filepath 2>&1)
+
+ if [ $? -eq 0 ]; then
+ echo "1"
+ else
+ echo "0"
+ fi
+}
+
+
+
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+
+
+# We need at least 4 bricks to test all combinations of hashed and
+# cached files
+
+TEST $CLI volume create $V0 $H0:$B0/$V0-{0..3}
+TEST $CLI volume start $V0
+
+# Mount using FUSE
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+
+
+################################################################
+# The first set of tests are those where the Dst file does not exist
+# dst-cached = NULL
+#
+###############################################################
+
+################### Test 1 ####################################
+#
+# src-hashed = src-cached = dst-hashed
+# dst-cached = null
+# src-file = src-1
+
+echo " **** Test 1 **** "
+
+src_file="src-1"
+
+TEST mkdir $M0/test-1
+TEST touch $M0/test-1/$src_file
+
+TEST get_hash_subvol $src_file $M0/test-1
+src_hashed=$hash_subvol
+#echo "Hashed subvol for $src_file: " $src_hashed
+
+# Find a file name that hashes to the same subvol as $src_file
+TEST first_filename_with_same_hashsubvol "$src_hashed" "$M0/test-1" "dst-"
+#echo "dst-file name: " $dstfilename
+dst_hashed=$src_hashed
+
+src_hash_brick=$(get_brick_path_for_subvol $src_hashed)
+
+echo "Renaming $src_file to $dstfilename"
+
+TEST mv $M0/test-1/$src_file $M0/test-1/$dstfilename
+
+# Expected:
+# dst file is accessible from the mount point
+# dst file exists only on the hashed brick.
+# no linkto files on any bricks
+# src files do not exist
+
+
+TEST stat $M0/test-1/$dstfilename 2>/dev/null
+TEST file_existence_check test-1/$dstfilename $src_hashed
+TEST file_does_not_exist test-1/$src_file
+EXPECT "0" file_is_linkto $src_hash_brick/test-1/$dstfilename
+
+
+################### Test 2 ####################################
+
+# src-hashed = src-cached != dst-hashed
+# dst-cached = null
+
+echo " **** Test 2 **** "
+
+src_file="src-1"
+
+TEST mkdir $M0/test-2
+TEST touch $M0/test-2/$src_file
+
+TEST get_hash_subvol $src_file $M0/test-2
+src_hashed=$hash_subvol
+#echo "Hashed subvol for $src_file: " $src_hashed
+
+# Find a file name that hashes to a diff hashed subvol than $src_file
+TEST first_filename_with_diff_hashsubvol "$src_hashed" "$M0/test-2" "dst-"
+echo "dst-file name: " $dstfilename
+TEST get_hash_subvol $dstfilename $M0/test-2
+dst_hashed=$hash_subvol
+
+src_hash_brick=$(get_brick_path_for_subvol $src_hashed)
+dst_hash_brick=$(get_brick_path_for_subvol $dst_hashed)
+
+echo "Renaming $src_file to $dstfilename"
+
+TEST mv $M0/test-2/$src_file $M0/test-2/$dstfilename
+
+
+# Expected:
+# dst file is accessible from the mount point
+# dst data file on src_hashed and dst linkto file on dst_hashed
+# src files do not exist
+
+
+TEST stat $M0/test-2/$dstfilename 2>/dev/null
+TEST file_existence_check test-2/$dstfilename $dst_hashed $src_hashed
+TEST file_does_not_exist test-2/$src_file
+EXPECT "1" file_is_linkto $dst_hash_brick/test-2/$dstfilename
+EXPECT "0" file_is_linkto $src_hash_brick/test-2/$dstfilename
+
+################### Test 3 ####################################
+
+# src-hashed = dst-hashed != src-cached
+
+echo " **** Test 3 **** "
+
+src_file0="abc-1"
+
+# 1. Create src file with src_cached != src_hashed
+TEST mkdir $M0/test-3
+TEST touch $M0/test-3/$src_file0
+
+TEST get_hash_subvol $src_file0 $M0/test-3
+src_cached=$hash_subvol
+#echo "Hashed subvol for $src_file0: " $src_cached
+
+# Find a file name that hashes to a diff hashed subvol than $src_file0
+TEST first_filename_with_diff_hashsubvol "$src_cached" "$M0/test-3" "src-"
+echo "dst-file name: " $dstfilename
+src_file=$dstfilename
+
+TEST mv $M0/test-3/$src_file0 $M0/test-3/$src_file
+
+TEST get_hash_subvol $src_file $M0/test-3
+src_hashed=$hash_subvol
+
+
+# 2. Rename src to dst
+TEST first_filename_with_same_hashsubvol "$src_hashed" "$M0/test-3" "dst-"
+#echo "dst-file name: " $dstfilename
+
+src_hash_brick=$(get_brick_path_for_subvol $src_hashed)
+src_cached_brick=$(get_brick_path_for_subvol $src_cached)
+
+echo "Renaming $src_file to $dstfilename"
+
+TEST mv $M0/test-3/$src_file $M0/test-3/$dstfilename
+
+
+# Expected:
+# dst file is accessible from the mount point
+TEST stat $M0/test-3/$dstfilename 2>/dev/null
+
+# src file does not exist
+TEST file_does_not_exist test-3/$src_file
+
+# dst linkto file on src_hashed and dst data file on src_cached
+TEST file_existence_check test-3/$dstfilename $src_hashed $src_cached
+
+EXPECT "1" file_is_linkto $src_hash_brick/test-3/$dstfilename
+EXPECT "0" file_is_linkto $src_cached_brick/test-3/$dstfilename
+
+
+
+################### Test 4 ####################################
+
+# src-cached = dst-hashed != src-hashed
+
+echo " **** Test 4 **** "
+
+src_file0="abc-1"
+
+# 1. Create src file with src_cached != src_hashed
+TEST mkdir $M0/test-4
+TEST touch $M0/test-4/$src_file0
+
+TEST get_hash_subvol $src_file0 $M0/test-4
+src_cached=$hash_subvol
+#echo "Hashed subvol for $src_file0: " $src_cached
+
+# Find a file name that hashes to a diff hashed subvol than $src_file0
+TEST first_filename_with_diff_hashsubvol "$src_cached" "$M0/test-4" "src-"
+src_file=$dstfilename
+
+TEST mv $M0/test-4/$src_file0 $M0/test-4/$src_file
+
+TEST get_hash_subvol $src_file $M0/test-4
+src_hashed=$hash_subvol
+
+
+# 2. Rename src to dst
+TEST first_filename_with_same_hashsubvol "$src_cached" "$M0/test-4" "dst-"
+#echo "dst-file name: " $dstfilename
+
+src_hash_brick=$(get_brick_path_for_subvol $src_hashed)
+src_cached_brick=$(get_brick_path_for_subvol $src_cached)
+
+echo "Renaming $src_file to $dstfilename"
+
+TEST mv $M0/test-4/$src_file $M0/test-4/$dstfilename
+
+# Expected:
+# dst file is accessible from the mount point
+TEST stat $M0/test-4/$dstfilename 2>/dev/null
+
+# src file does not exist
+TEST file_does_not_exist test-4/$src_file
+
+# dst linkto file on src_hashed and dst data file on src_cached
+TEST file_existence_check test-4/$dstfilename $src_cached
+
+EXPECT "0" file_is_linkto $src_cached_brick/test-4/$dstfilename
+
+
+################### Test 5 ####################################
+
+# src-cached != src-hashed
+# src-hashed != dst-hashed
+# src-cached != dst-hashed
+
+
+echo " **** Test 5 **** "
+
+# 1. Create src and dst files
+
+TEST mkdir $M0/test-5
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-5" "abc-"
+src_file0=$dstfilename
+
+TEST touch $M0/test-5/$src_file0
+
+TEST get_hash_subvol $src_file0 $M0/test-5
+src_cached=$hash_subvol
+#echo "Hashed subvol for $src_file0: " $src_cached
+
+# Find a file name that hashes to a diff hashed subvol than $src_file0
+TEST first_filename_with_same_hashsubvol "$V0-client-1" "$M0/test-5" "src-"
+src_file=$dstfilename
+
+TEST mv $M0/test-5/$src_file0 $M0/test-5/$src_file
+
+TEST get_hash_subvol $src_file $M0/test-5
+src_hashed=$hash_subvol
+
+TEST first_filename_with_same_hashsubvol "$V0-client-2" "$M0/test-5" "dst-"
+#echo "dst-file name: " $dstfilename
+
+dst_hash_brick=$(get_brick_path_for_subvol "$V0-client-2")
+src_cached_brick=$(get_brick_path_for_subvol $src_cached)
+
+
+# 2. Rename src to dst
+echo "Renaming $src_file to $dstfilename"
+
+TEST mv $M0/test-5/$src_file $M0/test-5/$dstfilename
+
+
+# 3. Validate
+
+# Expected:
+# dst file is accessible from the mount point
+TEST stat $M0/test-5/$dstfilename 2>/dev/null
+
+# src file does not exist
+TEST file_does_not_exist test-5/$src_file
+
+# dst linkto file on src_hashed and dst data file on src_cached
+
+EXPECT "0" file_is_linkto $src_cached_brick/test-5/$dstfilename
+EXPECT "1" file_is_linkto $dst_hash_brick/test-5/$dstfilename
+
+
+########################################################################
+#
+# The Dst file exists
+#
+########################################################################
+
+################### Test 6 ####################################
+
+# src_hash = src_cached
+# dst_hash = dst_cached
+# dst_hash = src_hash
+
+
+TEST mkdir $M0/test-6
+
+# 1. Create src and dst files
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-6" "src-"
+src_file=$dstfilename
+
+TEST touch $M0/test-6/$src_file
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-6" "dst-"
+dst_file=$dstfilename
+
+TEST touch $M0/test-6/$dst_file
+
+
+# 2. Rename src to dst
+
+TEST mv $M0/test-6/$src_file $M0/test-6/$dst_file
+
+
+# 3. Validate
+
+dst_hash_brick=$(get_brick_path_for_subvol "$V0-client-0")
+
+TEST stat $M0/test-6/$dst_file 2>/dev/null
+TEST file_existence_check test-6/$dst_file "$V0-client-0"
+TEST file_does_not_exist test-6/$src_file
+EXPECT "0" file_is_linkto $dst_hash_brick/test-6/$dst_file
+
+
+################### Test 7 ####################################
+
+# src_hash = src_cached
+# dst_hash = dst_cached
+# dst_hash != src_hash
+
+
+echo " **** Test 7 **** "
+
+TEST mkdir $M0/test-7
+
+# 1. Create src and dst files
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-7" "src-"
+src_file=$dstfilename
+
+TEST touch $M0/test-7/$src_file
+
+TEST first_filename_with_same_hashsubvol "$V0-client-1" "$M0/test-7" "dst-"
+dst_file=$dstfilename
+
+TEST touch $M0/test-7/$dst_file
+
+
+# 2. Rename src to dst
+
+TEST mv $M0/test-7/$src_file $M0/test-7/$dst_file
+
+
+# 3. Validate
+
+dst_hash_brick=$(get_brick_path_for_subvol "$V0-client-1")
+src_hash_brick=$(get_brick_path_for_subvol "$V0-client-0")
+
+TEST stat $M0/test-7/$dst_file 2>/dev/null
+TEST file_existence_check test-7/$dst_file "$V0-client-1" "$V0-client-0"
+TEST file_does_not_exist test-7/$src_file
+
+EXPECT "0" file_is_linkto $src_hash_brick/test-7/$dst_file
+EXPECT "1" file_is_linkto $dst_hash_brick/test-7/$dst_file
+
+
+################### Test 8 ####################################
+
+# src_hash = src_cached
+# dst_hash != dst_cached
+# dst_hash != src_hash
+# dst_cached != src_hash
+
+echo " **** Test 8 **** "
+
+TEST mkdir $M0/test-8
+
+
+# 1. Create src and dst files
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-8" "src-"
+src_file=$dstfilename
+TEST touch $M0/test-8/$src_file
+
+TEST first_filename_with_same_hashsubvol "$V0-client-1" "$M0/test-8" "dst0-"
+dst_file0=$dstfilename
+TEST touch $M0/test-8/$dst_file0
+
+TEST first_filename_with_same_hashsubvol "$V0-client-2" "$M0/test-8" "dst-"
+dst_file=$dstfilename
+
+mv $M0/test-8/$dst_file0 $M0/test-8/$dst_file
+
+
+# 2. Rename the file
+
+mv $M0/test-8/$src_file $M0/test-8/$dst_file
+
+
+# 3. Validate
+
+dst_hash_brick=$(get_brick_path_for_subvol "$V0-client-2")
+src_hash_brick=$(get_brick_path_for_subvol "$V0-client-0")
+
+TEST stat $M0/test-8/$dst_file 2>/dev/null
+TEST file_existence_check test-8/$dst_file "$V0-client-2" "$V0-client-0"
+TEST file_does_not_exist test-8/$src_file
+
+EXPECT "0" file_is_linkto $src_hash_brick/test-8/$dst_file
+EXPECT "1" file_is_linkto $dst_hash_brick/test-8/$dst_file
+
+################### Test 9 ####################################
+
+# src_hash = src_cached = dst_hash
+# dst_hash != dst_cached
+
+echo " **** Test 9 **** "
+
+TEST mkdir $M0/test-9
+
+
+# 1. Create src and dst files
+
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-9" "src-"
+src_file=$dstfilename
+TEST touch $M0/test-9/$src_file
+
+
+TEST first_filename_with_same_hashsubvol "$V0-client-1" "$M0/test-9" "dst0-"
+dst0_file=$dstfilename
+TEST touch $M0/test-9/$dst0_file
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-9" "dst-"
+dst_file=$dstfilename
+
+TEST mv $M0/test-9/$dst0_file $M0/test-9/$dst_file
+
+# 2. Rename the file
+
+mv $M0/test-9/$src_file $M0/test-9/$dst_file
+
+
+# 3. Validate
+
+dst_hash_brick=$(get_brick_path_for_subvol "$V0-client-0")
+
+TEST stat $M0/test-9/$dst_file 2>/dev/null
+TEST file_existence_check test-9/$dst_file "$V0-client-0"
+TEST file_does_not_exist test-9/$src_file
+EXPECT "0" file_is_linkto $dst_hash_brick/test-9/$dst_file
+
+
+################### Test 10 ####################################
+
+# src_hash = src_cached = dst_cached
+# dst_hash != dst_cached
+
+echo " **** Test 10 **** "
+
+TEST mkdir $M0/test-10
+
+
+# 1. Create src and dst files
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-10" "src-"
+src_file=$dstfilename
+TEST touch $M0/test-10/$src_file
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-10" "dst0-"
+dst0_file=$dstfilename
+TEST touch $M0/test-10/$dst0_file
+
+TEST first_filename_with_same_hashsubvol "$V0-client-1" "$M0/test-10" "dst-"
+dst_file=$dstfilename
+
+mv $M0/test-10/$dst0_file $M0/test-10/$dst_file
+
+
+# 2. Rename the file
+
+mv $M0/test-10/$src_file $M0/test-10/$dst_file
+
+
+# 3. Validate
+
+dst_hash_brick=$(get_brick_path_for_subvol "$V0-client-1")
+dst_cached_brick=$(get_brick_path_for_subvol "$V0-client-0")
+
+TEST stat $M0/test-10/$dst_file 2>/dev/null
+TEST file_existence_check test-10/$dst_file "$V0-client-1" "$V0-client-0"
+TEST file_does_not_exist test-10/$src_file
+EXPECT "1" file_is_linkto $dst_hash_brick/test-10/$dst_file
+EXPECT "0" file_is_linkto $dst_cached_brick/test-10/$dst_file
+
+
+################### Test 11 ####################################
+
+# src_hash != src_cached
+# dst_hash = dst_cached = src_cached
+
+echo " **** Test 11 **** "
+
+TEST mkdir $M0/test-11
+
+
+# 1. Create src and dst files
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-11" "src0-"
+src0_file=$dstfilename
+TEST touch $M0/test-11/$src0_file
+
+TEST first_filename_with_same_hashsubvol "$V0-client-1" "$M0/test-11" "src-"
+src_file=$dstfilename
+
+mv $M0/test-11/$src0_file $M0/test-11/$src_file
+
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-11" "dst-"
+dst_file=$dstfilename
+TEST touch $M0/test-11/$dst_file
+
+
+# 2. Rename the file
+
+mv $M0/test-11/$src_file $M0/test-11/$dst_file
+
+
+# 3. Validate
+
+dst_hash_brick=$(get_brick_path_for_subvol "$V0-client-0")
+
+TEST stat $M0/test-11/$dst_file 2>/dev/null
+TEST file_existence_check test-11/$dst_file "$V0-client-0"
+TEST file_does_not_exist test-11/$src_file
+EXPECT "0" file_is_linkto $dst_hash_brick/test-11/$dst_file
+
+
+################### Test 12 ####################################
+
+# src_hash != src_cached
+# dst_hash = dst_cached = src_hash
+
+echo " **** Test 12 **** "
+
+TEST mkdir $M0/test-12
+
+
+# 1. Create src and dst files
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-12" "src0-"
+src0_file=$dstfilename
+TEST touch $M0/test-12/$src0_file
+
+TEST first_filename_with_same_hashsubvol "$V0-client-1" "$M0/test-12" "src-"
+src_file=$dstfilename
+
+mv $M0/test-12/$src0_file $M0/test-12/$src_file
+
+
+TEST first_filename_with_same_hashsubvol "$V0-client-1" "$M0/test-12" "dst-"
+dst_file=$dstfilename
+TEST touch $M0/test-12/$dst_file
+
+
+# 2. Rename the file
+
+mv $M0/test-12/$src_file $M0/test-12/$dst_file
+
+
+# 3. Validate
+
+dst_hash_brick=$(get_brick_path_for_subvol "$V0-client-1")
+dst_cached_brick=$(get_brick_path_for_subvol "$V0-client-0")
+
+TEST stat $M0/test-12/$dst_file 2>/dev/null
+TEST file_existence_check test-12/$dst_file "$V0-client-1" "$V0-client-0"
+TEST file_does_not_exist test-12/$src_file
+EXPECT "1" file_is_linkto $dst_hash_brick/test-12/$dst_file
+EXPECT "0" file_is_linkto $dst_cached_brick/test-12/$dst_file
+
+################### Test 13 ####################################
+
+# src_hash != src_cached
+# dst_hash = dst_cached
+# dst_hash != src_cached
+# dst_hash != src_hash
+
+echo " **** Test 13 **** "
+
+TEST mkdir $M0/test-13
+
+
+# 1. Create src and dst files
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-13" "src0-"
+src0_file=$dstfilename
+TEST touch $M0/test-13/$src0_file
+
+TEST first_filename_with_same_hashsubvol "$V0-client-1" "$M0/test-13" "src-"
+src_file=$dstfilename
+
+mv $M0/test-13/$src0_file $M0/test-13/$src_file
+
+
+TEST first_filename_with_same_hashsubvol "$V0-client-2" "$M0/test-13" "dst-"
+dst_file=$dstfilename
+TEST touch $M0/test-13/$dst_file
+
+# 2. Rename the file
+
+mv $M0/test-13/$src_file $M0/test-13/$dst_file
+
+
+# 3. Validate
+
+dst_hash_brick=$(get_brick_path_for_subvol "$V0-client-2")
+dst_cached_brick=$(get_brick_path_for_subvol "$V0-client-0")
+
+TEST stat $M0/test-13/$dst_file 2>/dev/null
+TEST file_existence_check test-13/$dst_file "$V0-client-2" "$V0-client-0"
+TEST file_does_not_exist test-13/$src_file
+EXPECT "1" file_is_linkto $dst_hash_brick/test-13/$dst_file
+EXPECT "0" file_is_linkto $dst_cached_brick/test-13/$dst_file
+
+
+################### Test 14 ####################################
+
+# src_hash != src_cached
+# dst_hash = src_hash
+# dst_cached = src_cached
+
+echo " **** Test 14 **** "
+
+TEST mkdir $M0/test-14
+
+
+# 1. Create src and dst files
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-14" "src0-"
+src0_file=$dstfilename
+TEST touch $M0/test-14/$src0_file
+
+TEST first_filename_with_same_hashsubvol "$V0-client-1" "$M0/test-14" "src-"
+src_file=$dstfilename
+
+mv $M0/test-14/$src0_file $M0/test-14/$src_file
+
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-14" "dst0-"
+dst0_file=$dstfilename
+TEST touch $M0/test-14/$dst0_file
+
+TEST first_filename_with_same_hashsubvol "$V0-client-1" "$M0/test-14" "dst-"
+dst_file=$dstfilename
+
+mv $M0/test-14/$dst0_file $M0/test-14/$dst_file
+
+
+# 2. Rename the file
+
+mv $M0/test-14/$src_file $M0/test-14/$dst_file
+
+
+# 3. Validate
+
+dst_hash_brick=$(get_brick_path_for_subvol "$V0-client-1")
+dst_cached_brick=$(get_brick_path_for_subvol "$V0-client-0")
+
+TEST stat $M0/test-14/$dst_file 2>/dev/null
+TEST file_existence_check test-14/$dst_file "$V0-client-1" "$V0-client-0"
+TEST file_does_not_exist test-14/$src_file
+EXPECT "1" file_is_linkto $dst_hash_brick/test-14/$dst_file
+EXPECT "0" file_is_linkto $dst_cached_brick/test-14/$dst_file
+
+################### Test 15 ####################################
+
+# src_hash != src_cached
+# dst_hash != src_hash
+# dst_hash != src_cached
+# dst_cached = src_cached
+
+echo " **** Test 15 **** "
+
+TEST mkdir $M0/test-15
+
+
+# 1. Create src and dst files
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-15" "src0-"
+src0_file=$dstfilename
+TEST touch $M0/test-15/$src0_file
+
+TEST first_filename_with_same_hashsubvol "$V0-client-1" "$M0/test-15" "src-"
+src_file=$dstfilename
+
+mv $M0/test-15/$src0_file $M0/test-15/$src_file
+
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-15" "dst0-"
+dst0_file=$dstfilename
+TEST touch $M0/test-15/$dst0_file
+
+TEST first_filename_with_same_hashsubvol "$V0-client-2" "$M0/test-15" "dst-"
+dst_file=$dstfilename
+
+mv $M0/test-15/$dst0_file $M0/test-15/$dst_file
+
+
+# 2. Rename the file
+
+mv $M0/test-15/$src_file $M0/test-15/$dst_file
+
+# 3. Validate
+
+dst_hash_brick=$(get_brick_path_for_subvol "$V0-client-2")
+dst_cached_brick=$(get_brick_path_for_subvol "$V0-client-0")
+
+TEST stat $M0/test-15/$dst_file 2>/dev/null
+TEST file_existence_check test-15/$dst_file "$V0-client-2" "$V0-client-0"
+TEST file_does_not_exist test-15/$src_file
+EXPECT "1" file_is_linkto $dst_hash_brick/test-15/$dst_file
+EXPECT "0" file_is_linkto $dst_cached_brick/test-15/$dst_file
+
+
+
+################### Test 16 ####################################
+
+# src_hash != src_cached
+# dst_hash = src_cached
+# dst_cached = src_hash
+
+echo " **** Test 16 **** "
+
+TEST mkdir $M0/test-16
+
+
+# 1. Create src and dst files
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-16" "src0-"
+src0_file=$dstfilename
+TEST touch $M0/test-16/$src0_file
+
+TEST first_filename_with_same_hashsubvol "$V0-client-1" "$M0/test-16" "src-"
+src_file=$dstfilename
+
+mv $M0/test-16/$src0_file $M0/test-16/$src_file
+
+
+TEST first_filename_with_same_hashsubvol "$V0-client-1" "$M0/test-16" "dst0-"
+dst0_file=$dstfilename
+TEST touch $M0/test-16/$dst0_file
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-16" "dst-"
+dst_file=$dstfilename
+
+mv $M0/test-16/$dst0_file $M0/test-16/$dst_file
+
+
+# 2. Rename the file
+
+mv $M0/test-16/$src_file $M0/test-16/$dst_file
+
+# 3. Validate
+
+dst_hash_brick=$(get_brick_path_for_subvol "$V0-client-0")
+
+TEST stat $M0/test-16/$dst_file 2>/dev/null
+TEST file_existence_check test-16/$dst_file "$V0-client-0"
+TEST file_does_not_exist test-16/$src_file
+EXPECT "0" file_is_linkto $dst_hash_brick/test-16/$dst_file
+
+
+################### Test 17 ####################################
+
+# src_hash != src_cached
+# dst_hash != dst_cached
+# dst_hash != src_hash != src_cached
+# dst_cached = src_hash
+
+
+echo " **** Test 17 **** "
+
+TEST mkdir $M0/test-17
+
+
+# 1. Create src and dst files
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-17" "src0-"
+src0_file=$dstfilename
+TEST touch $M0/test-17/$src0_file
+
+TEST first_filename_with_same_hashsubvol "$V0-client-1" "$M0/test-17" "src-"
+src_file=$dstfilename
+
+mv $M0/test-17/$src0_file $M0/test-17/$src_file
+
+
+TEST first_filename_with_same_hashsubvol "$V0-client-1" "$M0/test-17" "dst0-"
+dst0_file=$dstfilename
+TEST touch $M0/test-17/$dst0_file
+
+TEST first_filename_with_same_hashsubvol "$V0-client-2" "$M0/test-17" "dst-"
+dst_file=$dstfilename
+
+mv $M0/test-17/$dst0_file $M0/test-17/$dst_file
+
+
+# 2. Rename the file
+
+mv $M0/test-17/$src_file $M0/test-17/$dst_file
+
+# 3. Validate
+
+dst_hash_brick=$(get_brick_path_for_subvol "$V0-client-2")
+dst_cached_brick=$(get_brick_path_for_subvol "$V0-client-0")
+
+TEST stat $M0/test-17/$dst_file 2>/dev/null
+TEST file_existence_check test-17/$dst_file "$V0-client-2" "$V0-client-0"
+TEST file_does_not_exist test-17/$src_file
+EXPECT "1" file_is_linkto $dst_hash_brick/test-17/$dst_file
+EXPECT "0" file_is_linkto $dst_cached_brick/test-17/$dst_file
+
+
+################### Test 18 ####################################
+
+# src_hash != src_cached
+# dst_hash != dst_cached
+# dst_hash != src_hash != src_cached != dst_cached
+
+
+echo " **** Test 18 **** "
+
+TEST mkdir $M0/test-18
+
+
+# 1. Create src and dst files
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-18" "src0-"
+src0_file=$dstfilename
+TEST touch $M0/test-18/$src0_file
+
+TEST first_filename_with_same_hashsubvol "$V0-client-1" "$M0/test-18" "src-"
+src_file=$dstfilename
+
+mv $M0/test-18/$src0_file $M0/test-18/$src_file
+
+
+TEST first_filename_with_same_hashsubvol "$V0-client-2" "$M0/test-18" "dst0-"
+dst0_file=$dstfilename
+TEST touch $M0/test-18/$dst0_file
+
+TEST first_filename_with_same_hashsubvol "$V0-client-3" "$M0/test-18" "dst-"
+dst_file=$dstfilename
+
+mv $M0/test-18/$dst0_file $M0/test-18/$dst_file
+
+
+# 2. Rename the file
+
+mv $M0/test-18/$src_file $M0/test-18/$dst_file
+
+# 3. Validate
+
+dst_hash_brick=$(get_brick_path_for_subvol "$V0-client-3")
+dst_cached_brick=$(get_brick_path_for_subvol "$V0-client-0")
+
+TEST stat $M0/test-18/$dst_file 2>/dev/null
+TEST file_existence_check test-18/$dst_file "$V0-client-3" "$V0-client-0"
+TEST file_does_not_exist test-18/$src_file
+EXPECT "1" file_is_linkto $dst_hash_brick/test-18/$dst_file
+EXPECT "0" file_is_linkto $dst_cached_brick/test-18/$dst_file
+
+
+# Cleanup
+cleanup
+
diff --git a/tests/basic/distribute/force-migration.t b/tests/basic/distribute/force-migration.t
new file mode 100644
index 00000000000..f6c4997a505
--- /dev/null
+++ b/tests/basic/distribute/force-migration.t
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+#This tests checks if the file migration fails with force-migration
+#option set to off.
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+TEST touch $M0/file
+#This rename creates a link file for tile in the other brick.
+TEST mv $M0/file $M0/tile
+#Lets keep writing to the file which will have a open fd
+dd if=/dev/zero of=$M0/tile bs=1b &
+bg_pid=$!
+#Now rebalance will try to skip the file
+TEST $CLI volume set $V0 force-migration off
+TEST $CLI volume rebalance $V0 start force
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $V0
+skippedcount=`gluster v rebalance $V0 status | awk 'NR==3{print $6}'`
+TEST [[ $skippedcount -eq 1 ]]
+#file should be migrated now
+TEST $CLI volume set $V0 force-migration on
+TEST $CLI volume rebalance $V0 start force
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $V0
+skippedcount=`gluster v rebalance $V0 status | awk 'NR==3{print $6}'`
+rebalancedcount=`gluster v rebalance $V0 status | awk 'NR==3{print $2}'`
+TEST [[ $skippedcount -eq 0 ]]
+TEST [[ $rebalancedcount -eq 1 ]]
+kill -9 $bg_pid > /dev/null 2>&1
+wait > /dev/null 2>&1
+cleanup
+#Bad test because we are not sure writes are happening at the time of
+#rebalance. We need to write a test case which makes sure client
+#writes happen during rebalance. One way would be to set S+T bits on
+#src and write to file from client and then start rebalance. Currently
+#marking this as bad test.
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=000000
+
diff --git a/tests/basic/distribute/lookup.t b/tests/basic/distribute/lookup.t
new file mode 100644
index 00000000000..f757bd99fd9
--- /dev/null
+++ b/tests/basic/distribute/lookup.t
@@ -0,0 +1,54 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+. $(dirname $0)/../../common-utils.rc
+
+# Test overview:
+# Check that non-privileged users can also clean up stale linkto files
+#
+# 1. Use the current parallel-readdir behaviour of changing the DHT child subvols
+# in the graph to generate stale linkto files
+# 2. Access the file with the stale linkto file as a non-root user
+# 3. This should now succeed (returned EIO before commit 3fb1df7870e03c9de)
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0-{1..3}
+TEST $CLI volume start $V0
+
+# Mount using FUSE and create a file
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+TEST glusterfs -s $H0 --volfile-id $V0 $M1
+
+ls $M0/FILE-1
+EXPECT "2" echo $?
+
+
+# Create a file and a directory on $M0
+TEST dd if=/dev/urandom of=$M0/FILE-1 count=1 bs=16k
+TEST mkdir $M0/dir1
+
+ls $M0/FILE-1
+EXPECT "0" echo $?
+
+ls $M0/dir1
+EXPECT "0" echo $?
+
+#Use a fresh mount so as to trigger a fresh lookup
+TEST glusterfs -s $H0 --volfile-id $V0 $M1
+
+TEST ls $M1/FILE-1
+EXPECT "0" echo $?
+
+
+ls $M1/dir1
+EXPECT "0" echo $?
+
+# Cleanup
+cleanup
+
diff --git a/tests/basic/distribute/non-root-unlink-stale-linkto.t b/tests/basic/distribute/non-root-unlink-stale-linkto.t
new file mode 100644
index 00000000000..d6c866ffc8e
--- /dev/null
+++ b/tests/basic/distribute/non-root-unlink-stale-linkto.t
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+. $(dirname $0)/../../common-utils.rc
+
+# Test overview:
+# Check that non-privileged users can also clean up stale linkto files
+#
+# 1. Use the current parallel-readdir behaviour of changing the DHT child subvols
+# in the graph to generate stale linkto files
+# 2. Access the file with the stale linkto file as a non-root user
+# 3. This should now succeed (returned EIO before commit 3fb1df7870e03c9de)
+
+USERNAME=user11
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0-{1,2}
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 performance.parallel-readdir on
+
+# Mount using FUSE and create a file
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+
+# Create a file for testing
+TEST dd if=/dev/urandom of=$M0/FILE-1 count=1 bs=16k
+
+#Rename to create a linkto file
+TEST mv $M0/FILE-1 $M0/FILE-2
+
+# This should change the graph and cause the linkto values to become stale
+TEST $CLI volume set $V0 performance.parallel-readdir off
+
+$CLI volume set $V0 allow-insecure on
+
+
+TEST useradd -m $USERNAME
+
+#Use a fresh mount so as to trigger a lookup everywhere
+TEST glusterfs -s $H0 --volfile-id $V0 $M1
+TEST run_cmd_as_user $USERNAME "ls $M1/FILE-2"
+
+
+# Cleanup
+TEST userdel --force $USERNAME
+cleanup
+
diff --git a/tests/basic/distribute/spare_file_rebalance.t b/tests/basic/distribute/spare_file_rebalance.t
new file mode 100644
index 00000000000..061c02f7392
--- /dev/null
+++ b/tests/basic/distribute/spare_file_rebalance.t
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../dht.rc
+
+# Initialize
+#------------------------------------------------------------
+cleanup;
+
+# Start glusterd
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+# Create a volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+
+# Verify volume creation
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+# Start volume and verify successful start
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
+
+#------------------------------------------------------------
+
+# Test case - Create sparse files on MP and verify
+# file info after rebalance
+#------------------------------------------------------------
+
+# Create some sparse files and get their size
+TEST cd $M0;
+dd if=/dev/urandom of=sparse_file bs=10k count=1 seek=2M
+cp --sparse=always sparse_file sparse_file_3;
+
+# Add a 3rd brick
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}3;
+
+# Trigger rebalance
+TEST $CLI volume rebalance $V0 start force;
+EXPECT_WITHIN $REBALANCE_TIMEOUT "0" rebalance_completed;
+
+# Compare original and rebalanced files
+TEST cd $B0/${V0}2
+TEST cmp sparse_file $B0/${V0}3/sparse_file_3
+EXPECT_WITHIN 30 "";
+
+cleanup;
diff --git a/tests/basic/distribute/throttle-rebal.t b/tests/basic/distribute/throttle-rebal.t
new file mode 100644
index 00000000000..f4823cf4f21
--- /dev/null
+++ b/tests/basic/distribute/throttle-rebal.t
@@ -0,0 +1,56 @@
+#!/bin/bash
+# Test to check
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+#Check rebal-throttle set option sanity
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/brick1 $H0:$B0/brick2
+TEST $CLI volume start $V0
+
+function set_throttle {
+ local level=$1
+ $CLI volume set $V0 cluster.rebal-throttle $level 2>&1 |grep -oE 'success|failed'
+}
+
+#Determine number of cores
+cores=$(cat /proc/cpuinfo | grep processor | wc -l)
+if [ "$cores" == "" ]; then
+ echo "Could not get number of cores available"
+fi
+
+THROTTLE_LEVEL="lazy"
+EXPECT "success" set_throttle $THROTTLE_LEVEL
+EXPECT "$THROTTLE_LEVEL" echo `$CLI volume info | grep rebal-throttle | awk '{print $2}'`
+
+THROTTLE_LEVEL="normal"
+EXPECT "success" set_throttle $THROTTLE_LEVEL
+EXPECT "$THROTTLE_LEVEL" echo `$CLI volume info | grep rebal-throttle | awk '{print $2}'`
+
+
+THROTTLE_LEVEL="aggressive"
+EXPECT "success" set_throttle $THROTTLE_LEVEL
+EXPECT "$THROTTLE_LEVEL" echo `$CLI volume info | grep rebal-throttle | awk '{print $2}'`
+
+THROTTLE_LEVEL="garbage"
+EXPECT "failed" set_throttle $THROTTLE_LEVEL
+
+#check if throttle-level is still aggressive
+EXPECT "aggressive" echo `$CLI volume info | grep rebal-throttle | awk '{print $2}'`
+
+EXPECT "success" set_throttle $cores
+
+#Setting thorttle number to be more than the number of cores should fail
+THORTTLE_LEVEL=$((cores+1))
+TEST echo $THORTTLE_LEVEL
+EXPECT "failed" set_throttle $THROTTLE_LEVEL
+EXPECT "$cores" echo `$CLI volume info | grep rebal-throttle | awk '{print $2}'`
+
+
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/basic/ec/dht-rename.t b/tests/basic/ec/dht-rename.t
new file mode 100644
index 00000000000..81b41ff4c78
--- /dev/null
+++ b/tests/basic/ec/dht-rename.t
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# This test checks notify part of ec
+
+cleanup
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0..5}
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+
+TEST touch $M0/1
+TEST mv $M0/1 $M0/10
+cleanup
diff --git a/tests/basic/ec/ec-1468261.t b/tests/basic/ec/ec-1468261.t
new file mode 100644
index 00000000000..77d704cf880
--- /dev/null
+++ b/tests/basic/ec/ec-1468261.t
@@ -0,0 +1,95 @@
+#!/bin/bash
+#
+# This test case verifies handling node down scenario with optimistic
+# changelog enabled on EC volume.
+###
+
+SCRIPT_TIMEOUT=300
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+#cleate and start volume
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
+TEST $CLI volume set $V0 disperse.optimistic-change-log on
+TEST $CLI volume set $V0 disperse.other-eager-lock on
+TEST $CLI volume start $V0
+
+#Mount the volume
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+
+#Verify that all is good
+TEST mkdir $M0/test_dir
+TEST touch $M0/test_dir/file
+sleep 2
+EXPECT_WITHIN $IO_WAIT_TIMEOUT "^$" get_hex_xattr trusted.ec.dirty $B0/${V0}0/test_dir
+EXPECT_WITHIN $IO_WAIT_TIMEOUT "^$" get_hex_xattr trusted.ec.dirty $B0/${V0}1/test_dir
+EXPECT_WITHIN $IO_WAIT_TIMEOUT "^$" get_hex_xattr trusted.ec.dirty $B0/${V0}2/test_dir
+EXPECT_WITHIN $IO_WAIT_TIMEOUT "^$" get_hex_xattr trusted.ec.dirty $B0/${V0}3/test_dir
+EXPECT_WITHIN $IO_WAIT_TIMEOUT "^$" get_hex_xattr trusted.ec.dirty $B0/${V0}4/test_dir
+EXPECT_WITHIN $IO_WAIT_TIMEOUT "^$" get_hex_xattr trusted.ec.dirty $B0/${V0}5/test_dir
+
+#Kill two bricks and touch a file
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "4" ec_child_up_count $V0 0
+TEST touch $M0/test_dir/new_file
+sleep 2
+
+#Dirty should be set on up bricks
+EXPECT_WITHIN $IO_WAIT_TIMEOUT "^00000000000000010000000000000001$" get_hex_xattr trusted.ec.dirty $B0/${V0}2/test_dir
+EXPECT_WITHIN $IO_WAIT_TIMEOUT "^00000000000000010000000000000001$" get_hex_xattr trusted.ec.dirty $B0/${V0}3/test_dir
+EXPECT_WITHIN $IO_WAIT_TIMEOUT "^00000000000000010000000000000001$" get_hex_xattr trusted.ec.dirty $B0/${V0}4/test_dir
+EXPECT_WITHIN $IO_WAIT_TIMEOUT "^00000000000000010000000000000001$" get_hex_xattr trusted.ec.dirty $B0/${V0}5/test_dir
+
+#Bring up the down bricks
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+
+#remove mount point contents
+TEST rm -rf $M0"/*" 2>/dev/null
+
+# unmount and remount the volume
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+
+#Create a tar file
+TEST mkdir /tmp/test_dir
+seq 1 3000 | xargs -n 1 -P 20 -I {} dd if=/dev/urandom of=/tmp/test_dir/file-{} bs=10K count=1
+tar -cf /tmp/test_dir.tar /tmp/test_dir/ 2>/dev/null
+rm -rf /tmp/test_dir/
+
+#Untar the tar file
+tar -C $M0 -xf /tmp/test_dir.tar 2>/dev/null&
+
+#Kill 1st and 2nd brick
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "4" ec_child_up_count $V0 0
+
+#Stop untaring
+TEST kill %1
+rm -f /tmp/test_dir.tar
+
+#Bring up the down bricks
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+
+#Wait for heal to complete
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+#Kill 3rd and 4th brick
+TEST kill_brick $V0 $H0 $B0/${V0}3
+TEST kill_brick $V0 $H0 $B0/${V0}4
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "4" ec_child_up_count $V0 0
+
+#remove mount point contents
+#this will fail if things are wrong
+TEST rm -rf $M0"/*" 2>/dev/null
+
+cleanup
diff --git a/tests/basic/ec/ec-3-1.t b/tests/basic/ec/ec-3-1.t
new file mode 100644
index 00000000000..511ca6420a2
--- /dev/null
+++ b/tests/basic/ec/ec-3-1.t
@@ -0,0 +1,14 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# This test checks basic dispersed volume functionality and cli interface
+
+DISPERSE=3
+REDUNDANCY=1
+
+# This must be equal to 36 * $DISPERSE + 109
+TESTS_EXPECTED_IN_LOOP=217
+
+. $(dirname $0)/ec-common
diff --git a/tests/basic/ec/ec-4-1.t b/tests/basic/ec/ec-4-1.t
new file mode 100644
index 00000000000..3f0d0c72e44
--- /dev/null
+++ b/tests/basic/ec/ec-4-1.t
@@ -0,0 +1,14 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# This test checks basic dispersed volume functionality and cli interface
+
+DISPERSE=4
+REDUNDANCY=1
+
+# This must be equal to 36 * $DISPERSE + 109
+TESTS_EXPECTED_IN_LOOP=253
+
+. $(dirname $0)/ec-common
diff --git a/tests/basic/ec/ec-5-2.t b/tests/basic/ec/ec-5-2.t
new file mode 100644
index 00000000000..6d9e91b0f58
--- /dev/null
+++ b/tests/basic/ec/ec-5-2.t
@@ -0,0 +1,14 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# This test checks basic dispersed volume functionality and cli interface
+
+DISPERSE=5
+REDUNDANCY=2
+
+# This must be equal to 36 * $DISPERSE + 109
+TESTS_EXPECTED_IN_LOOP=289
+
+. $(dirname $0)/ec-common
diff --git a/tests/basic/ec/ec-6-2.t b/tests/basic/ec/ec-6-2.t
new file mode 100644
index 00000000000..b4451f905a1
--- /dev/null
+++ b/tests/basic/ec/ec-6-2.t
@@ -0,0 +1,14 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# This test checks basic dispersed volume functionality and cli interface
+
+DISPERSE=6
+REDUNDANCY=2
+
+# This must be equal to 36 * $DISPERSE + 109
+TESTS_EXPECTED_IN_LOOP=325
+
+. $(dirname $0)/ec-common
diff --git a/tests/basic/ec/ec-anonymous-fd.t b/tests/basic/ec/ec-anonymous-fd.t
new file mode 100644
index 00000000000..a61628182ef
--- /dev/null
+++ b/tests/basic/ec/ec-anonymous-fd.t
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fileio.rc
+
+cleanup
+function num_entries {
+ ls -l $1 | wc -l
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
+TEST $CLI volume start $V0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+
+fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/file1
+TEST fd_write $fd1 testing
+TEST cat $M0/file1
+TEST rm -rf $M0/file1
+TEST fd_write $fd1 testing
+TEST fd_write $fd1 testing
+TEST fd_write $fd1 testing
+
+EXPECT "^2$" num_entries $B0/${V0}0/.glusterfs/unlink/
+EXPECT "^2$" num_entries $B0/${V0}1/.glusterfs/unlink/
+EXPECT "^2$" num_entries $B0/${V0}2/.glusterfs/unlink/
+EXPECT "^2$" num_entries $B0/${V0}3/.glusterfs/unlink/
+EXPECT "^2$" num_entries $B0/${V0}4/.glusterfs/unlink/
+EXPECT "^2$" num_entries $B0/${V0}5/.glusterfs/unlink/
+TEST fd_close $fd1;
+EXPECT_WITHIN $UNLINK_TIMEOUT "^1$" num_entries $B0/${V0}0/.glusterfs/unlink/
+EXPECT_WITHIN $UNLINK_TIMEOUT "^1$" num_entries $B0/${V0}1/.glusterfs/unlink/
+EXPECT_WITHIN $UNLINK_TIMEOUT "^1$" num_entries $B0/${V0}2/.glusterfs/unlink/
+EXPECT_WITHIN $UNLINK_TIMEOUT "^1$" num_entries $B0/${V0}3/.glusterfs/unlink/
+EXPECT_WITHIN $UNLINK_TIMEOUT "^1$" num_entries $B0/${V0}4/.glusterfs/unlink/
+EXPECT_WITHIN $UNLINK_TIMEOUT "^1$" num_entries $B0/${V0}5/.glusterfs/unlink/
+
+cleanup;
diff --git a/tests/basic/ec/ec-background-heals.t b/tests/basic/ec/ec-background-heals.t
new file mode 100644
index 00000000000..29778a4f818
--- /dev/null
+++ b/tests/basic/ec/ec-background-heals.t
@@ -0,0 +1,105 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# This test checks background heals option
+
+cleanup
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0..2}
+TEST $CLI volume heal $V0 disable
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 disperse.background-heals 0
+TEST $CLI volume set $V0 disperse.eager-lock off
+TEST $CLI volume set $V0 disperse.other-eager-lock off
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "0" mount_get_option_value $M0 $V0-disperse-0 background-heals
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "0" mount_get_option_value $M0 $V0-disperse-0 heal-wait-qlength
+TEST touch $M0/a
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" count_sh_entries $B0/${V0}0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" count_sh_entries $B0/${V0}1
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" count_sh_entries $B0/${V0}2
+
+TEST kill_brick $V0 $H0 $B0/${V0}2
+echo abc > $M0/a
+EXPECT 2 get_pending_heal_count $V0 #One for each active brick
+$CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+#Accessing file shouldn't heal the file
+EXPECT "abc" cat $M0/a
+EXPECT 2 get_pending_heal_count $V0 #One for each active brick
+TEST $CLI volume set $V0 disperse.background-heals 1
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "1" mount_get_option_value $M0 $V0-disperse-0 background-heals
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "128" mount_get_option_value $M0 $V0-disperse-0 heal-wait-qlength
+#Accessing file should heal the file now
+EXPECT "abc" cat $M0/a
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+#Test above test cases with reset instead of setting background-heals to 1
+TEST $CLI volume set $V0 disperse.heal-wait-qlength 1024
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "1" mount_get_option_value $M0 $V0-disperse-0 background-heals
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "1024" mount_get_option_value $M0 $V0-disperse-0 heal-wait-qlength
+TEST $CLI volume set $V0 disperse.background-heals 0
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "0" mount_get_option_value $M0 $V0-disperse-0 background-heals
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "0" mount_get_option_value $M0 $V0-disperse-0 heal-wait-qlength
+TEST $CLI volume set $V0 disperse.heal-wait-qlength 200 #Changing qlength shouldn't affect anything now
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "0" mount_get_option_value $M0 $V0-disperse-0 background-heals
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "0" mount_get_option_value $M0 $V0-disperse-0 heal-wait-qlength
+TEST kill_brick $V0 $H0 $B0/${V0}2
+echo abc > $M0/a
+EXPECT 2 get_pending_heal_count $V0 #One for each active brick
+$CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+#Accessing file shouldn't heal the file
+EXPECT "abc" cat $M0/a
+sleep 3
+EXPECT 2 get_pending_heal_count $V0 #One for each active brick
+TEST $CLI volume reset $V0 disperse.background-heals
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "8" mount_get_option_value $M0 $V0-disperse-0 background-heals
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "200" mount_get_option_value $M0 $V0-disperse-0 heal-wait-qlength
+#Accessing file should heal the file now
+EXPECT "abc" cat $M0/a
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+#Test that disabling background-heals still drains the queue
+TEST $CLI volume set $V0 disperse.background-heals 1
+TEST touch $M0/{a,b,c,d}
+TEST kill_brick $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "1" mount_get_option_value $M0 $V0-disperse-0 background-heals
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "200" mount_get_option_value $M0 $V0-disperse-0 heal-wait-qlength
+TEST truncate -s 1GB $M0/a
+echo abc > $M0/b
+echo abc > $M0/c
+echo abc > $M0/d
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+TEST chown root:root $M0/{a,b,c,d}
+TEST $CLI volume set $V0 disperse.background-heals 0
+EXPECT_NOT "0" mount_get_option_value $M0 $V0-disperse-0 heal-waiters
+TEST truncate -s 0 $M0/a # This completes the heal fast ;-)
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+#Test that background heals get rejected on meeting background-qlen limit
+TEST $CLI volume set $V0 disperse.background-heals 1
+TEST $CLI volume set $V0 disperse.heal-wait-qlength 0
+TEST kill_brick $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "1" mount_get_option_value $M0 $V0-disperse-0 background-heals
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "0" mount_get_option_value $M0 $V0-disperse-0 heal-wait-qlength
+TEST truncate -s 1GB $M0/a
+echo abc > $M0/b
+echo abc > $M0/c
+echo abc > $M0/d
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+TEST chown root:root $M0/{a,b,c,d}
+EXPECT "0" mount_get_option_value $M0 $V0-disperse-0 heal-waiters
+cleanup
diff --git a/tests/basic/ec/ec-badfd.c b/tests/basic/ec/ec-badfd.c
new file mode 100644
index 00000000000..8be23c10eaf
--- /dev/null
+++ b/tests/basic/ec/ec-badfd.c
@@ -0,0 +1,124 @@
+#include <stdio.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+#include <limits.h>
+#include <string.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+int
+fill_iov(struct iovec *iov, char fillchar, int count)
+{
+ int ret = -1;
+
+ iov->iov_base = malloc(count + 1);
+ if (iov->iov_base == NULL) {
+ return ret;
+ } else {
+ iov->iov_len = count;
+ ret = 0;
+ }
+ memset(iov->iov_base, fillchar, count);
+ memset(iov->iov_base + count, '\0', 1);
+
+ return ret;
+}
+
+int
+write_sync(glfs_t *fs, glfs_fd_t *glfd, int char_count)
+{
+ ssize_t ret = -1;
+ int flags = O_RDWR;
+ struct iovec iov = {0};
+
+ ret = fill_iov(&iov, 'a', char_count);
+ if (ret) {
+ fprintf(stderr, "failed to create iov");
+ goto out;
+ }
+
+ ret = glfs_pwritev(glfd, &iov, 1, 0, flags);
+out:
+ if (ret < 0) {
+ fprintf(stderr, "glfs_pwritev failed, %d", errno);
+ }
+ return ret;
+}
+
+int
+main(int argc, char *argv[])
+{
+ glfs_t *fs = NULL;
+ glfs_fd_t *fd = NULL;
+ int ret = 1;
+ char volume_cmd[4096] = {0};
+
+ if (argc != 4) {
+ fprintf(stderr, "Syntax: %s <host> <volname> <file>\n", argv[0]);
+ return 1;
+ }
+
+ fs = glfs_new(argv[2]);
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL\n");
+ return 1;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", argv[1], 24007);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_set_volfile_server: returned %d\n", ret);
+ goto out;
+ }
+ ret = glfs_set_logging(fs, "/tmp/ec-badfd.log", 7);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_set_logging: returned %d\n", ret);
+ goto out;
+ }
+ ret = glfs_init(fs);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_init: returned %d\n", ret);
+ goto out;
+ }
+
+ fd = glfs_open(fs, argv[3], O_RDWR);
+ if (fd == NULL) {
+ fprintf(stderr, "glfs_open: returned NULL\n");
+ goto out;
+ }
+
+ ret = write_sync(fs, fd, 16);
+ if (ret < 0) {
+ fprintf(stderr, "write_sync failed\n");
+ }
+
+ snprintf(volume_cmd, sizeof(volume_cmd),
+ "gluster --mode=script volume stop %s", argv[2]);
+ /*Stop the volume so that update-size-version fails*/
+ system(volume_cmd);
+ sleep(8); /* 3 seconds more than eager-lock-timeout*/
+ snprintf(volume_cmd, sizeof(volume_cmd),
+ "gluster --mode=script volume start %s", argv[2]);
+ system(volume_cmd);
+ sleep(8); /*wait for bricks to come up*/
+ ret = glfs_fsync(fd, NULL, NULL);
+ if (ret == 0) {
+ fprintf(stderr, "fsync succeeded on a BADFD\n");
+ exit(1);
+ }
+
+ ret = glfs_close(fd);
+ if (ret == 0) {
+ fprintf(stderr, "flush succeeded on a BADFD\n");
+ exit(1);
+ }
+ ret = 0;
+
+out:
+ unlink("/tmp/ec-badfd.log");
+ glfs_fini(fs);
+
+ return ret;
+}
diff --git a/tests/basic/ec/ec-badfd.t b/tests/basic/ec/ec-badfd.t
new file mode 100755
index 00000000000..56feb47f115
--- /dev/null
+++ b/tests/basic/ec/ec-badfd.t
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{1..6}
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 disperse.eager-lock-timeout 5
+
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+TEST $GFS -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+TEST touch $M0/file
+
+TEST build_tester $(dirname $0)/ec-badfd.c -lgfapi -Wall -O2
+TEST $(dirname $0)/ec-badfd $H0 $V0 /file
+cleanup_tester $(dirname ${0})/ec-badfd
+
+cleanup;
diff --git a/tests/basic/ec/ec-common b/tests/basic/ec/ec-common
new file mode 100644
index 00000000000..152e3b51236
--- /dev/null
+++ b/tests/basic/ec/ec-common
@@ -0,0 +1,145 @@
+SIZE_LIST="1048576 1000 12345 0"
+
+LAST_BRICK=$(($DISPERSE - 1))
+
+CHUNK_SIZE=512
+
+function fragment_size
+{
+ local fragments=$(($DISPERSE - $REDUNDANCY))
+ local block_size=$(($CHUNK_SIZE * $fragments))
+ local size=$(($1 + $block_size - 1))
+
+ echo $((( $size - ( $size ) % $block_size ) / $fragments))
+}
+
+cleanup
+
+tmp=`mktemp -d -t ${0##*/}.XXXXXX`
+if [ ! -d $tmp ]; then
+ exit 1
+fi
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 redundancy $REDUNDANCY $H0:$B0/${V0}{0..$LAST_BRICK}
+EXPECT 'Created' volinfo_field $V0 'Status'
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Started' volinfo_field $V0 'Status'
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "$DISPERSE" ec_child_up_count $V0 0
+
+TEST dd if=/dev/urandom of=$tmp/small bs=1024 count=1
+TEST dd if=/dev/urandom of=$tmp/big bs=1024 count=4096
+
+cs_small=$(sha1sum $tmp/small | awk '{ print $1 }')
+cs_big=$(sha1sum $tmp/big | awk '{ print $1 }')
+cp $tmp/small $tmp/small1
+for size in $SIZE_LIST; do
+ truncate -s $size $tmp/small1
+ eval cs_small_truncate[$size]=$(sha1sum $tmp/small1 | awk '{ print $1 }')
+done
+cp $tmp/big $tmp/big1
+for size in $SIZE_LIST; do
+ truncate -s $size $tmp/big1
+ eval cs_big_truncate[$size]=$(sha1sum $tmp/big1 | awk '{ print $1 }')
+done
+
+TEST df -h $M0
+TEST stat $M0
+
+for idx in `seq 0 $LAST_BRICK`; do
+ brick[$idx]=$(gf_get_gfid_backend_file_path $B0/$V0$idx)
+done
+
+TEST stat $M0/
+TEST mkdir $M0/dir1
+TEST [ -d $M0/dir1 ]
+TEST touch $M0/file1
+TEST [ -f $M0/file1 ]
+
+for dir in . dir1; do
+ TEST cp $tmp/small $M0/$dir/small
+ TEST [ -f $M0/$dir/small ]
+ fsize=$(fragment_size 1024)
+ EXPECT "1024" stat -c "%s" $M0/$dir/small
+ for idx in `seq 0 $LAST_BRICK`; do
+ EXPECT "$fsize" stat -c "%s" ${brick[$idx]}/$dir/small
+ done
+
+ EXPECT "$cs_small" echo $(sha1sum $M0/$dir/small | awk '{ print $1 }')
+
+ TEST cp $tmp/big $M0/$dir/big
+ TEST [ -f $M0/$dir/big ]
+ fsize=$(fragment_size 4194304)
+ EXPECT "4194304" stat -c "%s" $M0/$dir/big
+ for idx in `seq 0 $LAST_BRICK`; do
+ EXPECT "$fsize" stat -c "%s" ${brick[$idx]}/$dir/big
+ done
+
+ EXPECT "$cs_big" echo $(sha1sum $M0/$dir/big | awk '{ print $1 }')
+
+# Give enough time for current operations to complete. Otherwise the
+# following kill_brick can cause data corruption and self-heal will be
+# needed, but this script is not prepared to handle self-healing.
+ sleep 2
+
+ for idx in `seq 0 $LAST_BRICK`; do
+ TEST kill_brick $V0 $H0 $B0/$V0$idx
+
+ EXPECT "1024" stat -c "%s" $M0/$dir/small
+ EXPECT "4194304" stat -c "%s" $M0/$dir/big
+
+ TEST $CLI volume start $V0 force
+ EXPECT_WITHIN $CHILD_UP_TIMEOUT "$DISPERSE" ec_child_up_count $V0 0
+ done
+
+ for size in $SIZE_LIST; do
+ TEST truncate -s $size $M0/$dir/small
+ TEST [ -f $M0/$dir/small ]
+ fsize=$(fragment_size $size)
+ EXPECT "$size" stat -c "%s" $M0/$dir/small
+ for idx in `seq 0 $LAST_BRICK`; do
+ EXPECT "$fsize" stat -c "%s" ${brick[$idx]}/$dir/small
+ done
+
+ EXPECT "${cs_small_truncate[$size]}" echo $(sha1sum $M0/$dir/small | awk '{ print $1 }')
+
+ TEST truncate -s $size $M0/$dir/big
+ TEST [ -f $M0/$dir/big ]
+ EXPECT "$size" stat -c "%s" $M0/$dir/big
+ for idx in `seq 0 $LAST_BRICK`; do
+ EXPECT "$fsize" stat -c "%s" ${brick[$idx]}/$dir/big
+ done
+
+ EXPECT "${cs_big_truncate[$size]}" echo $(sha1sum $M0/$dir/big | awk '{ print $1 }')
+ done
+
+ TEST rm -f $M0/$dir/small
+ TEST [ ! -e $M0/$dir/small ]
+ for idx in `seq 0 $LAST_BRICK`; do
+ TEST [ ! -e ${brick[$idx]}/$dir/small ]
+ done
+
+ TEST rm -f $M0/$dir/big
+ TEST [ ! -e $M0/$dir/big ]
+ for idx in `seq 0 $LAST_BRICK`; do
+ TEST [ ! -e ${brick[$idx]}/$dir/big ]
+ done
+done
+
+TEST rmdir $M0/dir1
+TEST [ ! -e $M0/dir1 ]
+for idx in `seq 0 $LAST_BRICK`; do
+ TEST [ ! -e ${brick[$idx]}/dir1 ]
+done
+
+TEST rm -f $M0/file1
+TEST [ ! -e $M0/file1 ]
+for idx in `seq 0 $LAST_BRICK`; do
+ TEST [ ! -e ${brick[$idx]}/file1 ]
+done
+
+rm -rf $tmp
+
+cleanup
diff --git a/tests/basic/ec/ec-cpu-extensions.t b/tests/basic/ec/ec-cpu-extensions.t
new file mode 100644
index 00000000000..c9af27ea234
--- /dev/null
+++ b/tests/basic/ec/ec-cpu-extensions.t
@@ -0,0 +1,62 @@
+#!/bin/bash
+
+DISPERSE=18
+REDUNDANCY=2
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+TESTS_EXPECTED_IN_LOOP=96
+
+function check_contents
+{
+ local src=$1
+ local cs=$2
+
+ TEST cp $src $M0/file
+ TEST [ -f $M0/file ]
+
+ for ext in none x64 sse avx; do
+ EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+ TEST $CLI volume set $V0 disperse.cpu-extensions $ext
+ TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+ EXPECT_WITHIN $CHILD_UP_TIMEOUT "$DISPERSE" ec_child_up_count $V0 0
+
+ EXPECT "$cs" echo $(sha1sum $M0/file | awk '{ print $1 }')
+ done
+
+ TEST rm -f $M0/file
+}
+
+cleanup
+
+tmp=`mktemp -p ${LOGDIR} -d -t ${0##*/}.XXXXXX`
+if [ ! -d $tmp ]; then
+ exit 1
+fi
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 redundancy $REDUNDANCY $H0:$B0/${V0}{1..$DISPERSE}
+TEST $CLI volume set $V0 performance.flush-behind off
+TEST $CLI volume set $V0 disperse.read-policy round-robin
+EXPECT 'Created' volinfo_field $V0 'Status'
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Started' volinfo_field $V0 'Status'
+
+TEST dd if=/dev/urandom of=$tmp/file bs=1048576 count=1
+cs_file=$(sha1sum $tmp/file | awk '{ print $1 }')
+
+for ext in none x64 sse avx; do
+ TEST $CLI volume set $V0 disperse.cpu-extensions $ext
+ TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+ EXPECT_WITHIN $CHILD_UP_TIMEOUT "$DISPERSE" ec_child_up_count $V0 0
+
+ check_contents $tmp/file $cs_file
+
+ EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+done
+
+TEST rm -rf $tmp
+
+cleanup
diff --git a/tests/basic/ec/ec-data-heal.t b/tests/basic/ec/ec-data-heal.t
new file mode 100755
index 00000000000..2672661c6b1
--- /dev/null
+++ b/tests/basic/ec/ec-data-heal.t
@@ -0,0 +1,75 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+#This test checks data corruption after heal while IO is going on
+
+cleanup
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0..2}
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+
+############ Start IO ###########
+TEST touch $M0/file
+#start background IO on file
+dd if=/dev/urandom of=$M0/file conv=fdatasync &
+iopid=$(echo $!)
+
+
+############ Kill and start brick0 for heal ###########
+TEST kill_brick $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0
+#sleep so that data can be written which will be healed later
+sleep 10
+TEST $CLI volume start $V0 force
+##wait for heal info to become 0 and kill IO
+EXPECT_WITHIN $IO_HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+kill $iopid
+EXPECT_WITHIN $IO_HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+############### Check md5sum #########################
+
+## unmount and mount get md5sum after killing brick0
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0
+mdsum0=`md5sum $M0/file | awk '{print $1}'`
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+
+## unmount and mount get md5sum after killing brick1
+
+TEST kill_brick $V0 $H0 $B0/${V0}1
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0
+mdsum1=`md5sum $M0/file | awk '{print $1}'`
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+
+## unmount and mount get md5sum after killing brick2
+
+TEST kill_brick $V0 $H0 $B0/${V0}2
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0
+mdsum2=`md5sum $M0/file | awk '{print $1}'`
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+
+# compare all the three md5sums
+EXPECT "$mdsum0" echo $mdsum1
+EXPECT "$mdsum0" echo $mdsum2
+EXPECT "$mdsum1" echo $mdsum2
+
+cleanup
diff --git a/tests/basic/ec/ec-dirty-flags.t b/tests/basic/ec/ec-dirty-flags.t
new file mode 100644
index 00000000000..68e66103f08
--- /dev/null
+++ b/tests/basic/ec/ec-dirty-flags.t
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# This checks if the fop keeps the dirty flags settings correctly after
+# finishing the fop.
+
+cleanup
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0..2}
+TEST $CLI volume heal $V0 disable
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+cd $M0
+for i in {1..1000}; do dd if=/dev/zero of=file-${i} bs=512k count=2; done
+cd -
+EXPECT "^0$" get_pending_heal_count $V0
+
+cleanup
diff --git a/tests/basic/ec/ec-discard.t b/tests/basic/ec/ec-discard.t
new file mode 100644
index 00000000000..001f4498c86
--- /dev/null
+++ b/tests/basic/ec/ec-discard.t
@@ -0,0 +1,205 @@
+#!/bin/bash
+#
+# Test discard functionality
+#
+# Test that basic discard (hole punch) functionality works via the fallocate
+# command line tool. Hole punch deallocates a region of a file, creating a hole
+# and a zero-filled data region. We verify that hole punch works, frees blocks
+# and that subsequent reads do not read stale data (caches are invalidated).
+#
+# NOTE: fuse fallocate is known to be broken with regard to cache invalidation
+# up to 3.9.0 kernels. Therefore, FOPEN_KEEP_CACHE is not used in this
+# test (opens will invalidate the fuse cache).
+###
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../fallocate.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+#cleate and start volume
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
+TEST $CLI volume set $V0 disperse.optimistic-change-log on
+TEST $CLI volume start $V0
+
+#Mount the volume
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+
+#Check for fallocate and hole punch support
+require_fallocate -l 1m $M0/file
+require_fallocate -p -l 512k $M0/file && rm -f $M0/file
+
+#Write some data, punch a hole and verify the file content changes
+TEST dd if=/dev/urandom of=$M0/file bs=1024k count=1
+TEST cp $M0/file $M0/file.copy.pre
+TEST fallocate -p -o 512k -l 128k $M0/file
+TEST ! cmp $M0/file.copy.pre $M0/file
+TEST rm -f $M0/file $M0/file.copy.pre
+
+#Allocate some blocks, punch a hole and verify block allocation
+TEST fallocate -l 1m $M0/file
+blksz=`stat -c %B $M0/file`
+nblks=`stat -c %b $M0/file`
+TEST [ $(($blksz * $nblks)) -ge 1048576 ]
+TEST fallocate -p -o 512k -l 128k $M0/file
+nblks=`stat -c %b $M0/file`
+TEST [ $(($blksz * $nblks)) -lt $((933889)) ]
+TEST unlink $M0/file
+
+###Punch hole test cases without fallocate
+##With write
+#Touching starting boundary
+TEST dd if=/dev/urandom of=$B0/test_file bs=1024 count=8
+TEST cp $B0/test_file $M0/test_file
+TEST fallocate -p -o 0 -l 500 $B0/test_file
+TEST fallocate -p -o 0 -l 500 $M0/test_file
+TEST md5_sum=`get_md5_sum $B0/test_file`
+EXPECT $md5_sum get_md5_sum $M0/test_file
+TEST rm -f $B0/test_file $M0/test_file
+
+#Touching boundary
+TEST dd if=/dev/urandom of=$B0/test_file bs=1024 count=8
+TEST cp $B0/test_file $M0/test_file
+TEST fallocate -p -o 500 -l 1548 $B0/test_file
+TEST fallocate -p -o 500 -l 1548 $M0/test_file
+TEST md5_sum=`get_md5_sum $B0/test_file`
+EXPECT $md5_sum get_md5_sum $M0/test_file
+TEST rm -f $B0/test_file $M0/test_file
+
+#Not touching boundary
+TEST dd if=/dev/urandom of=$B0/test_file bs=1024 count=8
+TEST cp $B0/test_file $M0/test_file
+TEST fallocate -p -o 500 -l 1000 $B0/test_file
+TEST fallocate -p -o 500 -l 1000 $M0/test_file
+TEST md5_sum=`get_md5_sum $B0/test_file`
+EXPECT $md5_sum get_md5_sum $M0/test_file
+TEST rm -f $B0/test_file $M0/test_file
+
+#Over boundary
+TEST dd if=/dev/urandom of=$B0/test_file bs=1024 count=8
+TEST cp $B0/test_file $M0/test_file
+TEST fallocate -p -o 1500 -l 1000 $B0/test_file
+TEST fallocate -p -o 1500 -l 1000 $M0/test_file
+TEST md5_sum=`get_md5_sum $B0/test_file`
+EXPECT $md5_sum get_md5_sum $M0/test_file
+TEST rm -f $B0/test_file $M0/test_file
+
+###Punch hole test cases with fallocate
+##Without write
+
+#Zero size
+TEST dd if=/dev/urandom of=$M0/test_file bs=1024 count=8
+TEST ! fallocate -p -o 1500 -l 0 $M0/test_file
+
+#Negative size
+TEST ! fallocate -p -o 1500 -l -100 $M0/test_file
+TEST rm -f $M0/test_file
+
+#Touching boundary
+TEST dd if=/dev/urandom of=$B0/test_file bs=1024 count=8
+TEST cp $B0/test_file $M0/test_file
+TEST fallocate -p -o 2048 -l 2048 $B0/test_file
+TEST fallocate -p -o 2048 -l 2048 $M0/test_file
+TEST md5_sum=`get_md5_sum $B0/test_file`
+EXPECT $md5_sum get_md5_sum $M0/test_file
+TEST rm -f $B0/test_file $M0/test_file
+
+#Touching boundary,multiple stripe
+TEST dd if=/dev/urandom of=$B0/test_file bs=1024 count=8
+TEST cp $B0/test_file $M0/test_file
+TEST fallocate -p -o 2048 -l 4096 $B0/test_file
+TEST fallocate -p -o 2048 -l 4096 $M0/test_file
+TEST md5_sum=`get_md5_sum $B0/test_file`
+EXPECT $md5_sum get_md5_sum $M0/test_file
+TEST rm -f $B0/test_file $M0/test_file
+
+##With write
+
+#Size ends in boundary
+TEST dd if=/dev/urandom of=$B0/test_file bs=1024 count=8
+TEST cp $B0/test_file $M0/test_file
+TEST fallocate -p -o 600 -l 3496 $B0/test_file
+TEST fallocate -p -o 600 -l 3496 $M0/test_file
+TEST md5_sum=`get_md5_sum $B0/test_file`
+EXPECT $md5_sum get_md5_sum $M0/test_file
+TEST rm -f $B0/test_file $M0/test_file
+
+#Offset at boundary
+TEST dd if=/dev/urandom of=$B0/test_file bs=1024 count=8
+TEST cp $B0/test_file $M0/test_file
+TEST fallocate -p -o 2048 -l 3072 $B0/test_file
+TEST fallocate -p -o 2048 -l 3072 $M0/test_file
+TEST md5_sum=`get_md5_sum $B0/test_file`
+EXPECT $md5_sum get_md5_sum $M0/test_file
+TEST rm -f $B0/test_file $M0/test_file
+
+#Offset and Size not at boundary covering a stripe
+TEST dd if=/dev/urandom of=$B0/test_file bs=1024 count=8
+TEST cp $B0/test_file $M0/test_file
+TEST fallocate -p -o 1500 -l 3000 $B0/test_file
+TEST fallocate -p -o 1500 -l 3000 $M0/test_file
+TEST md5_sum=`get_md5_sum $B0/test_file`
+EXPECT $md5_sum get_md5_sum $M0/test_file
+TEST rm -f $B0/test_file $M0/test_file
+
+#Offset and Size not at boundary
+TEST dd if=/dev/urandom of=$B0/test_file bs=1024 count=8
+TEST cp $B0/test_file $M0/test_file
+TEST fallocate -p -o 1000 -l 3072 $B0/test_file
+TEST fallocate -p -o 1000 -l 3072 $M0/test_file
+TEST md5_sum=`get_md5_sum $B0/test_file`
+EXPECT $md5_sum get_md5_sum $M0/test_file
+
+#Data Corruption Tests
+#Kill brick1 and brick2
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "4" ec_child_up_count $V0 0
+
+#Unmount and mount
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0;
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "4" ec_child_up_count $V0 0
+
+#verify md5 sum
+EXPECT $md5_sum get_md5_sum $M0/test_file
+
+#Bring up the bricks
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+
+#Kill brick3 and brick4
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST kill_brick $V0 $H0 $B0/${V0}3
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "4" ec_child_up_count $V0 0
+
+#Unmount and mount
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0;
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "4" ec_child_up_count $V0 0
+
+#verify md5 sum
+EXPECT $md5_sum get_md5_sum $M0/test_file
+
+#Bring up the bricks
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+
+#Kill brick5 and brick6
+TEST kill_brick $V0 $H0 $B0/${V0}4
+TEST kill_brick $V0 $H0 $B0/${V0}5
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "4" ec_child_up_count $V0 0
+
+#Unmount and mount
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0;
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "4" ec_child_up_count $V0 0
+
+#verify md5 sum
+EXPECT $md5_sum get_md5_sum $M0/test_file
+
+cleanup
diff --git a/tests/basic/ec/ec-fallocate.t b/tests/basic/ec/ec-fallocate.t
new file mode 100644
index 00000000000..1b827eed7df
--- /dev/null
+++ b/tests/basic/ec/ec-fallocate.t
@@ -0,0 +1,72 @@
+#!/bin/bash
+#
+# Run several commands to verify basic fallocate functionality. We verify that
+# fallocate creates and allocates blocks to a file. We also verify that the keep
+# size option does not modify the file size.
+###
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fallocate.rc
+
+cleanup
+
+#cleate and start volume
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0..2}
+TEST $CLI volume start $V0
+
+#Mount the volume
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+
+# check for fallocate support before continuing the test
+require_fallocate -l 1m -n $M0/file && rm -f $M0/file
+
+# fallocate a file and verify blocks are allocated
+TEST fallocate -l 1m $M0/file
+blksz=`stat -c %b $M0/file`
+nblks=`stat -c %B $M0/file`
+TEST [ $(($blksz * $nblks)) -eq 1048576 ]
+
+TEST unlink $M0/file
+
+# truncate a file to a fixed size, fallocate and verify that the size does not
+# change
+TEST truncate -s 1M $M0/file
+TEST fallocate -l 2m -n $M0/file
+blksz=`stat -c %b $M0/file`
+nblks=`stat -c %B $M0/file`
+sz=`stat -c %s $M0/file`
+TEST [ $sz -eq 1048576 ]
+# Note that gluster currently incorporates a hack to limit the number of blocks
+# reported as allocated to the file by the file size. We have allocated beyond the
+# file size here. Just check for non-zero allocation to avoid setting a land mine
+# for if/when that behavior might change.
+TEST [ ! $(($blksz * $nblks)) -eq 0 ]
+TEST unlink $M0/file
+
+# write some data, fallocate within and outside the range
+# and check for data corruption.
+TEST dd if=/dev/urandom of=$M0/file bs=1024k count=1
+TEST cp $M0/file $M0/file.copy.pre
+TEST fallocate -o 512k -l 128k $M0/file
+TEST cp $M0/file $M0/file.copy.post
+TEST cmp $M0/file.copy.pre $M0/file.copy.post
+TEST fallocate -o 1000k -l 128k $M0/file
+TEST cp $M0/file $M0/file.copy.post2
+TEST ! cmp $M0/file.copy.pre $M0/file.copy.post2
+TEST truncate -s 1M $M0/file.copy.post2
+TEST cmp $M0/file.copy.pre $M0/file.copy.post2
+TEST unlink $M0/file
+
+#Make sure offset/size are modified so that 3 blocks are allocated
+TEST touch $M0/f1
+TEST fallocate -o 1280 -l 1024 $M0/f1
+EXPECT "^2304$" stat -c "%s" $M0/f1
+EXPECT "^1536$" stat -c "%s" $B0/${V0}0/f1
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+cleanup;
diff --git a/tests/basic/ec/ec-fast-fgetxattr.c b/tests/basic/ec/ec-fast-fgetxattr.c
new file mode 100644
index 00000000000..bf982151861
--- /dev/null
+++ b/tests/basic/ec/ec-fast-fgetxattr.c
@@ -0,0 +1,129 @@
+#include <stdio.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+#include <limits.h>
+#include <string.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+int cbk_complete = 0;
+ssize_t cbk_ret_val = 0;
+int
+fill_iov(struct iovec *iov, char fillchar, int count)
+{
+ int ret = -1;
+
+ iov->iov_base = malloc(count + 1);
+ if (iov->iov_base == NULL) {
+ return ret;
+ } else {
+ iov->iov_len = count;
+ ret = 0;
+ }
+ memset(iov->iov_base, fillchar, count);
+ memset(iov->iov_base + count, '\0', 1);
+
+ return ret;
+}
+
+void
+write_async_cbk(glfs_fd_t *fd, ssize_t ret, struct stat *prestat,
+ struct stat *poststat, void *cookie)
+{
+ if (ret < 0) {
+ fprintf(stderr, "glfs_write failed");
+ }
+ cbk_ret_val = ret;
+ cbk_complete = 1;
+}
+
+int
+write_async(glfs_t *fs, glfs_fd_t *glfd, int char_count)
+{
+ ssize_t ret = -1;
+ int flags = O_RDWR;
+ struct iovec iov = {0};
+
+ ret = fill_iov(&iov, 'a', char_count);
+ if (ret) {
+ fprintf(stderr, "failed to create iov");
+ goto out;
+ }
+
+ ret = glfs_pwritev_async(glfd, &iov, 1, 0, flags, write_async_cbk, NULL);
+out:
+ if (ret < 0) {
+ fprintf(stderr, "glfs_pwritev async failed");
+ }
+ return ret;
+}
+
+int
+main(int argc, char *argv[])
+{
+ glfs_t *fs = NULL;
+ glfs_fd_t *fd = NULL;
+ int ret = 1;
+ char buf[1024] = {0};
+
+ if (argc != 4) {
+ fprintf(stderr, "Syntax: %s <host> <volname> <file>\n", argv[0]);
+ return 1;
+ }
+
+ fs = glfs_new(argv[2]);
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL\n");
+ return 1;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", argv[1], 24007);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_set_volfile_server: returned %d\n", ret);
+ goto out;
+ }
+ ret = glfs_set_logging(fs, "/tmp/ec-fgetxattr.log", 7);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_set_logging: returned %d\n", ret);
+ goto out;
+ }
+ ret = glfs_init(fs);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_init: returned %d\n", ret);
+ goto out;
+ }
+
+ fd = glfs_open(fs, argv[3], O_RDWR | O_TRUNC);
+ if (fd == NULL) {
+ fprintf(stderr, "glfs_open: returned NULL\n");
+ goto out;
+ }
+
+ ret = write_async(fs, fd, 16);
+ if (ret) {
+ fprintf(stderr, "write_async failed\n");
+ }
+
+ sleep(1);
+ ret = glfs_fgetxattr(fd, "trusted.glusterfs.abc", buf, sizeof buf);
+ while (cbk_complete != 1) {
+ /* ret will be -ve as xattr doesn't exist, and fgetxattr should
+ * return waaaayyy before writev */
+ ret = 0;
+ sleep(1);
+ }
+ if (cbk_ret_val < 0) {
+ fprintf(stderr, "cbk_ret_val is -ve\n");
+ ret = -1;
+ }
+ glfs_close(fd);
+
+out:
+ unlink("/tmp/ec-fgetxattr.log");
+ glfs_fini(fs);
+
+ return ret;
+}
diff --git a/tests/basic/ec/ec-fast-fgetxattr.t b/tests/basic/ec/ec-fast-fgetxattr.t
new file mode 100755
index 00000000000..eb12fa4a0ba
--- /dev/null
+++ b/tests/basic/ec/ec-fast-fgetxattr.t
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{1..6}
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.client-io-threads off
+TEST $CLI volume set $V0 brick-log-level DEBUG
+TEST $CLI volume set $V0 delay-gen posix
+TEST $CLI volume set $V0 delay-gen.delay-duration 10000000
+TEST $CLI volume set $V0 delay-gen.delay-percentage 100
+TEST $CLI volume set $V0 delay-gen.enable read,write
+
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+TEST $GFS -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+TEST touch $M0/file
+
+# Perform two writes to make sure io-threads have enough threads to perform
+# things in parallel when the test execution happens.
+echo abc > $M0/file1 &
+echo abc > $M0/file2 &
+wait
+
+TEST build_tester $(dirname $0)/ec-fast-fgetxattr.c -lgfapi -Wall -O2
+TEST $(dirname $0)/ec-fast-fgetxattr $H0 $V0 /file
+cleanup_tester $(dirname ${0})/ec-fast-fgetxattr
+
+cleanup;
diff --git a/tests/basic/ec/ec-fix-openfd.t b/tests/basic/ec/ec-fix-openfd.t
new file mode 100644
index 00000000000..04fdd802c62
--- /dev/null
+++ b/tests/basic/ec/ec-fix-openfd.t
@@ -0,0 +1,111 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fileio.rc
+
+# This test checks for open fd heal on EC
+
+#Create Volume
+cleanup
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0..2}
+TEST $CLI volume set $V0 performance.read-after-open yes
+TEST $CLI volume set $V0 performance.lazy-open no
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 disperse.background-heals 0
+TEST $CLI volume heal $V0 disable
+TEST $CLI volume start $V0
+
+#Mount the volume
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+
+#Touch a file
+TEST touch "$M0/test_file"
+
+#Kill a brick
+TEST kill_brick $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0
+
+#Open the file in write mode
+TEST fd=`fd_available`
+TEST fd_open $fd 'rw' "$M0/test_file"
+
+#Bring up the killed brick
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+
+sleep 1
+
+#Test the fd count
+EXPECT "0" get_fd_count $V0 $H0 $B0/${V0}0 test_file
+EXPECT "1" get_fd_count $V0 $H0 $B0/${V0}1 test_file
+EXPECT "1" get_fd_count $V0 $H0 $B0/${V0}2 test_file
+
+#Write to file
+dd iflag=fullblock if=/dev/urandom bs=1024 count=2 >&$fd 2>/dev/null
+
+#Test the fd count
+EXPECT "1" get_fd_count $V0 $H0 $B0/${V0}0 test_file
+
+#Close fd
+TEST fd_close $fd
+
+#Stop the volume
+TEST $CLI volume stop $V0
+
+#Start the volume
+TEST $CLI volume start $V0
+
+#Kill brick1
+TEST kill_brick $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0
+
+#Unmount and mount
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0;
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0
+
+#Calculate md5 sum
+md5sum0=`get_md5_sum "$M0/test_file"`
+
+#Bring up the brick
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+
+#Kill brick2
+TEST kill_brick $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0
+
+#Unmount and mount
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0
+
+#Calculate md5 sum
+md5sum1=`get_md5_sum "$M0/test_file"`
+
+#Bring up the brick
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+
+#Kill brick3
+TEST kill_brick $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0
+
+#Unmount and mount
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0
+
+#Calculate md5 sum
+md5sum2=`get_md5_sum "$M0/test_file"`
+
+#compare the md5sum
+EXPECT "$md5sum0" echo $md5sum1
+EXPECT "$md5sum0" echo $md5sum2
+EXPECT "$md5sum1" echo $md5sum2
+
+cleanup
diff --git a/tests/basic/ec/ec-internal-xattrs.t b/tests/basic/ec/ec-internal-xattrs.t
new file mode 100644
index 00000000000..5ef0502e016
--- /dev/null
+++ b/tests/basic/ec/ec-internal-xattrs.t
@@ -0,0 +1,45 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# This test checks internal xattr handling in ec
+TESTS_EXPECTED_IN_LOOP=11
+
+cleanup
+function get_ec_xattrs
+{
+ getfattr -d -m. -e hex $1 | grep trusted.ec
+}
+
+function get_xattr_count
+{
+ getfattr -d -m. -e hex $1 | grep "trusted" | wc -l
+}
+
+declare -a xattrs=("trusted.ec.config" "trusted.ec.size" "trusted.ec.version" "trusted.ec.heal")
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 redundancy 2 $H0:$B0/${V0}{0..5}
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+# Wait until all 6 children have been recognized by the ec xlator
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+TEST touch $M0/a
+
+#Check that internal xattrs are not modifiable or readable
+for x in "${xattrs[@]}"; do
+ TEST_IN_LOOP ! setfattr -n $x "abc" $M0/a
+ TEST_IN_LOOP ! setfattr -x $x "abc" $M0/a
+ if [ $x != "trusted.ec.heal" ];
+ then
+ TEST_IN_LOOP ! getfattr -n $x $M0/a
+ fi
+done
+
+TEST ! get_ec_xattrs $M0/a
+TEST setfattr -n trusted.abc -v 0x616263 $M0/a
+EXPECT "616263" get_hex_xattr trusted.abc $M0/a
+EXPECT "1" get_xattr_count $M0/a
+cleanup
diff --git a/tests/basic/ec/ec-new-entry.t b/tests/basic/ec/ec-new-entry.t
new file mode 100644
index 00000000000..be97aecd8e2
--- /dev/null
+++ b/tests/basic/ec/ec-new-entry.t
@@ -0,0 +1,70 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+function num_entries {
+ ls -l $1 | wc -l
+}
+
+function get_md5sum {
+ md5sum $1 | awk '{print $1}'
+}
+
+#after replace-brick immediately trusted.ec.version will be absent, so if it
+#is present we can assume that heal attempted on root
+function root_heal_attempted {
+ if [ -z $(get_hex_xattr trusted.ec.version $1) ];
+ then
+ echo "N";
+ else
+ echo "Y";
+ fi
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
+TEST $CLI volume start $V0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+touch $M0/{1..10}
+touch $M0/11
+TEST mknod $M0/char c 1 5
+TEST mknod $M0/block b 4 5
+for i in {1..10}; do dd if=/dev/zero of=$M0/$i bs=1M count=1; done
+TEST $CLI volume replace-brick $V0 $H0:$B0/${V0}5 $H0:$B0/${V0}6 commit force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count_shd $V0 0
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" root_heal_attempted $B0/${V0}6
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+#ls -l gives "Total" line so number of lines will be 1 more
+EXPECT "^14$" num_entries $B0/${V0}6
+EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}6/char
+EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}6/block
+ec_version=$(get_hex_xattr trusted.ec.version $B0/${V0}0)
+EXPECT "$ec_version" get_hex_xattr trusted.ec.version $B0/${V0}1
+EXPECT "$ec_version" get_hex_xattr trusted.ec.version $B0/${V0}2
+EXPECT "$ec_version" get_hex_xattr trusted.ec.version $B0/${V0}3
+EXPECT "$ec_version" get_hex_xattr trusted.ec.version $B0/${V0}4
+EXPECT "$ec_version" get_hex_xattr trusted.ec.version $B0/${V0}6
+file_md5sum=$(get_md5sum $M0/1)
+empty_md5sum=$(get_md5sum $M0/11)
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+echo $file_md5sum
+EXPECT "$file_md5sum" get_md5sum $M0/1
+EXPECT "$file_md5sum" get_md5sum $M0/2
+EXPECT "$file_md5sum" get_md5sum $M0/3
+EXPECT "$file_md5sum" get_md5sum $M0/4
+EXPECT "$file_md5sum" get_md5sum $M0/5
+EXPECT "$file_md5sum" get_md5sum $M0/6
+EXPECT "$file_md5sum" get_md5sum $M0/7
+EXPECT "$file_md5sum" get_md5sum $M0/8
+EXPECT "$file_md5sum" get_md5sum $M0/9
+EXPECT "$file_md5sum" get_md5sum $M0/10
+EXPECT "$empty_md5sum" get_md5sum $M0/11
+
+cleanup;
diff --git a/tests/basic/ec/ec-notify.t b/tests/basic/ec/ec-notify.t
new file mode 100644
index 00000000000..53290b7c798
--- /dev/null
+++ b/tests/basic/ec/ec-notify.t
@@ -0,0 +1,101 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# This test checks notify part of ec
+
+# We *know* some of these mounts will succeed but not be actually usable
+# (terrible idea IMO), so speed things up and eliminate some noise by
+# overriding this function.
+_GFS () {
+ glusterfs "$@"
+}
+
+ec_up_brick_count () {
+ local bricknum
+ for bricknum in $(seq 0 2); do
+ brick_up_status $V0 $H0 $B0/$V0$bricknum
+ done | grep -E '^1$' | wc -l
+}
+
+cleanup
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0..2}
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "3" ec_up_brick_count
+
+#First time mount tests.
+# When all the bricks are up, mount should succeed and up-children
+# count should be 3
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+TEST stat $M0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+# When the volume is stopped mount succeeds and up-children will be 0
+TEST $CLI volume stop $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+# Wait for 5 seconds even after that up_count should show 0
+sleep 5;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "0" ec_child_up_count $V0 0
+TEST ! stat $M0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+# When 2 bricks are up, mount should succeed and up-children
+# count should be 2
+
+TEST $CLI volume start $V0
+TEST kill_brick $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" ec_up_brick_count
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0
+TEST stat $M0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+# When only 1 brick is up mount should fail.
+TEST kill_brick $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ec_up_brick_count
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+# Wait for 5 seconds even after that up_count should show 1
+sleep 5
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" ec_child_up_count $V0 0
+TEST ! stat $M0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+# Mount already succeeded. Test that the brick up down are leading to correct
+# state changes in ec.
+TEST $CLI volume stop $V0
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "3" ec_up_brick_count
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+TEST touch $M0/a
+
+# kill 1 brick and the up_count should become 2, fops should still succeed
+TEST kill_brick $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" ec_up_brick_count
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0
+TEST touch $M0/b
+
+# kill one more brick and the up_count should become 1, fops should fail
+TEST kill_brick $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ec_up_brick_count
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" ec_child_up_count $V0 0
+TEST ! touch $M0/c
+
+# kill one more brick and the up_count should become 0, fops should still fail
+TEST kill_brick $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" ec_up_brick_count
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "0" ec_child_up_count $V0 0
+TEST ! touch $M0/c
+
+# Bring up all the bricks up and see that up_count is 3 and fops are succeeding
+# again.
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "3" ec_up_brick_count
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+TEST touch $M0/c
+
+cleanup
diff --git a/tests/basic/ec/ec-optimistic-changelog.t b/tests/basic/ec/ec-optimistic-changelog.t
new file mode 100644
index 00000000000..a372cd39a64
--- /dev/null
+++ b/tests/basic/ec/ec-optimistic-changelog.t
@@ -0,0 +1,153 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# This test checks optimistic-change-log option
+
+cleanup
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0..2}
+TEST $CLI volume heal $V0 disable
+
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 disperse.background-heals 0
+TEST $CLI volume set $V0 disperse.optimistic-change-log off
+TEST $CLI volume set $V0 disperse.eager-lock off
+TEST $CLI volume set $V0 disperse.other-eager-lock off
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "0" mount_get_option_value $M0 $V0-disperse-0 background-heals
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "0" mount_get_option_value $M0 $V0-disperse-0 heal-wait-qlength
+
+TEST $CLI volume set $V0 disperse.background-heals 1
+TEST touch $M0/a
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" count_sh_entries $B0/${V0}0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" count_sh_entries $B0/${V0}1
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" count_sh_entries $B0/${V0}2
+
+
+
+### optimistic-change-log = off ; All bricks good. Test file operation
+echo abc > $M0/a
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+############################################################
+
+## optimistic-change-log = off ; Kill one brick . Test file operation
+TEST kill_brick $V0 $H0 $B0/${V0}2
+echo abc > $M0/a
+EXPECT 2 get_pending_heal_count $V0 #One for each active brick
+$CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+#Accessing file should heal the file now
+EXPECT "abc" cat $M0/a
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+############################################################
+
+## optimistic-change-log = off ; All bricks good. Test entry operation
+TEST touch $M0/b
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+############################################################
+
+## optimistic-change-log = off ; All bricks good. Test metadata operation
+TEST chmod 0777 $M0/b
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+############################################################
+
+## optimistic-change-log = off ; Kill one brick. Test entry operation
+
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST touch $M0/c
+EXPECT 4 get_pending_heal_count $V0 #two for each active brick
+$CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+getfattr -d -m. -e hex $M0 2>&1 > /dev/null
+getfattr -d -m. -e hex $M0/c 2>&1 > /dev/null
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+############################################################
+
+## optimistic-change-log = off ; Kill one brick. Test metadata operation
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST chmod 0777 $M0/c
+EXPECT 2 get_pending_heal_count $V0 #One for each active brick
+$CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+getfattr -d -m. -e hex $M0/c 2>&1 > /dev/null
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+############################################################
+
+TEST $CLI volume set $V0 disperse.optimistic-change-log on
+
+### optimistic-change-log = on ; All bricks good. Test file operation
+
+echo abc > $M0/aa
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+############################################################
+
+## optimistic-change-log = on ; Kill one brick. Test file operation
+
+TEST kill_brick $V0 $H0 $B0/${V0}2
+echo abc > $M0/aa
+EXPECT 2 get_pending_heal_count $V0 #One for each active brick
+$CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+#Accessing file should heal the file now
+getfattr -d -m. -e hex $M0/aa 2>&1 > /dev/null
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+############################################################
+
+## optimistic-change-log = on ; All bricks good. Test entry operation
+
+TEST touch $M0/bb
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+############################################################
+
+## optimistic-change-log = on ; All bricks good. Test metadata operation
+
+TEST chmod 0777 $M0/bb
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+############################################################
+
+## optimistic-change-log = on ; Kill one brick. Test entry operation
+
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST touch $M0/cc
+EXPECT 4 get_pending_heal_count $V0 #two for each active brick
+$CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+getfattr -d -m. -e hex $M0 2>&1 > /dev/null
+getfattr -d -m. -e hex $M0/cc 2>&1 > /dev/null
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+############################################################
+
+## optimistic-change-log = on ; Kill one brick. Test metadata operation
+
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST chmod 0777 $M0/cc
+EXPECT 2 get_pending_heal_count $V0 #One for each active brick
+$CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+getfattr -d -m. -e hex $M0/cc 2>&1 > /dev/null
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+############################################################
+
+cleanup
diff --git a/tests/basic/ec/ec-quorum-count.t b/tests/basic/ec/ec-quorum-count.t
new file mode 100644
index 00000000000..9310ebbb8f2
--- /dev/null
+++ b/tests/basic/ec/ec-quorum-count.t
@@ -0,0 +1,167 @@
+ #!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../ec.rc
+
+cleanup
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
+TEST $CLI volume create $V1 $H0:$B0/${V1}{0..5}
+TEST $CLI volume set $V0 disperse.eager-lock-timeout 5
+TEST $CLI volume set $V0 performance.flush-behind off
+TEST $CLI volume set $V0 disperse.background-heals 0
+TEST $CLI volume set $V0 disperse.heal-wait-qlength 0
+
+#Should fail on non-disperse volume
+TEST ! $CLI volume set $V1 disperse.quorum-count 5
+
+#Should succeed on a valid range
+TEST ! $CLI volume set $V0 disperse.quorum-count 0
+TEST ! $CLI volume set $V0 disperse.quorum-count -0
+TEST ! $CLI volume set $V0 disperse.quorum-count abc
+TEST ! $CLI volume set $V0 disperse.quorum-count 10abc
+TEST ! $CLI volume set $V0 disperse.quorum-count 1
+TEST ! $CLI volume set $V0 disperse.quorum-count 2
+TEST ! $CLI volume set $V0 disperse.quorum-count 3
+TEST $CLI volume set $V0 disperse.quorum-count 4
+TEST $CLI volume start $V0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+
+#Test that the option is reflected in the mount
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^4$" ec_option_value $V0 $M0 0 quorum-count
+TEST $CLI volume reset $V0 disperse.quorum-count
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^0$" ec_option_value $V0 $M0 0 quorum-count
+TEST $CLI volume set $V0 disperse.quorum-count 6
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^6$" ec_option_value $V0 $M0 0 quorum-count
+
+TEST touch $M0/a
+TEST touch $M0/data
+TEST setfattr -n trusted.def -v def $M0/a
+TEST touch $M0/src
+TEST touch $M0/del-me
+TEST mkdir $M0/dir1
+TEST dd if=/dev/zero of=$M0/read-file bs=1M count=1 oflag=direct
+TEST dd if=/dev/zero of=$M0/del-file bs=1M count=1 oflag=direct
+TEST gf_rm_file_and_gfid_link $B0/${V0}0 del-file
+#modify operations should fail as the file is not in quorum
+TEST ! dd if=/dev/zero of=$M0/del-file bs=1M count=1 oflag=direct
+TEST kill_brick $V0 $H0 $B0/${V0}0
+#Read should succeed even when quorum-count is not met
+TEST dd if=$M0/read-file of=/dev/null iflag=direct
+TEST ! touch $M0/a2
+TEST ! mkdir $M0/dir2
+TEST ! mknod $M0/b2 b 4 5
+TEST ! ln -s $M0/a $M0/symlink
+TEST ! ln $M0/a $M0/link
+TEST ! mv $M0/src $M0/dst
+TEST ! rm -f $M0/del-me
+TEST ! rmdir $M0/dir1
+TEST ! dd if=/dev/zero of=$M0/a bs=1M count=1 conv=notrunc
+TEST ! dd if=/dev/zero of=$M0/data bs=1M count=1 conv=notrunc
+TEST ! truncate -s 0 $M0/a
+TEST ! setfattr -n trusted.abc -v abc $M0/a
+TEST ! setfattr -x trusted.def $M0/a
+TEST ! chmod +x $M0/a
+TEST ! fallocate -l 2m -n $M0/a
+TEST ! fallocate -p -l 512k $M0/a
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count ${V0}
+
+# reset the option and check whether the default redundancy count is
+# accepted or not.
+TEST $CLI volume reset $V0 disperse.quorum-count
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^0$" ec_option_value $V0 $M0 0 quorum-count
+TEST touch $M0/a1
+TEST touch $M0/data1
+TEST setfattr -n trusted.def -v def $M0/a1
+TEST touch $M0/src1
+TEST touch $M0/del-me1
+TEST mkdir $M0/dir11
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST touch $M0/a21
+TEST mkdir $M0/dir21
+TEST mknod $M0/b21 b 4 5
+TEST ln -s $M0/a1 $M0/symlink1
+TEST ln $M0/a1 $M0/link1
+TEST mv $M0/src1 $M0/dst1
+TEST rm -f $M0/del-me1
+TEST rmdir $M0/dir11
+TEST dd if=/dev/zero of=$M0/a1 bs=1M count=1 conv=notrunc
+TEST dd if=/dev/zero of=$M0/data1 bs=1M count=1 conv=notrunc
+TEST truncate -s 0 $M0/a1
+TEST setfattr -n trusted.abc -v abc $M0/a1
+TEST setfattr -x trusted.def $M0/a1
+TEST chmod +x $M0/a1
+TEST fallocate -l 2m -n $M0/a1
+TEST fallocate -p -l 512k $M0/a1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+
+TEST touch $M0/a2
+TEST touch $M0/data2
+TEST setfattr -n trusted.def -v def $M0/a1
+TEST touch $M0/src2
+TEST touch $M0/del-me2
+TEST mkdir $M0/dir12
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST ! touch $M0/a22
+TEST ! mkdir $M0/dir22
+TEST ! mknod $M0/b22 b 4 5
+TEST ! ln -s $M0/a2 $M0/symlink2
+TEST ! ln $M0/a2 $M0/link2
+TEST ! mv $M0/src2 $M0/dst2
+TEST ! rm -f $M0/del-me2
+TEST ! rmdir $M0/dir12
+TEST ! dd if=/dev/zero of=$M0/a2 bs=1M count=1 conv=notrunc
+TEST ! dd if=/dev/zero of=$M0/data2 bs=1M count=1 conv=notrunc
+TEST ! truncate -s 0 $M0/a2
+TEST ! setfattr -n trusted.abc -v abc $M0/a2
+TEST ! setfattr -x trusted.def $M0/a2
+TEST ! chmod +x $M0/a2
+TEST ! fallocate -l 2m -n $M0/a2
+TEST ! fallocate -p -l 512k $M0/a2
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count ${V0}
+
+# Set quorum-count to 5 and kill 1 brick and the fops should pass
+TEST $CLI volume set $V0 disperse.quorum-count 5
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^5$" ec_option_value $V0 $M0 0 quorum-count
+TEST touch $M0/a3
+TEST touch $M0/data3
+TEST setfattr -n trusted.def -v def $M0/a3
+TEST touch $M0/src3
+TEST touch $M0/del-me3
+TEST mkdir $M0/dir13
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST touch $M0/a31
+TEST mkdir $M0/dir31
+TEST mknod $M0/b31 b 4 5
+TEST ln -s $M0/a3 $M0/symlink3
+TEST ln $M0/a3 $M0/link3
+TEST mv $M0/src3 $M0/dst3
+TEST rm -f $M0/del-me3
+TEST rmdir $M0/dir13
+TEST dd if=/dev/zero of=$M0/a3 bs=1M count=1 conv=notrunc
+TEST dd if=/dev/zero of=$M0/data3 bs=1M count=1 conv=notrunc
+TEST truncate -s 0 $M0/a3
+TEST setfattr -n trusted.abc -v abc $M0/a3
+TEST setfattr -x trusted.def $M0/a3
+TEST chmod +x $M0/a3
+TEST fallocate -l 2m -n $M0/a3
+TEST fallocate -p -l 512k $M0/a3
+TEST dd if=/dev/urandom of=$M0/heal-file bs=1M count=1 oflag=direct
+cksum_before_heal="$(md5sum $M0/heal-file | awk '{print $1}')"
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count ${V0}
+TEST kill_brick $V0 $H0 $B0/${V0}4
+TEST kill_brick $V0 $H0 $B0/${V0}5
+cksum_after_heal=$(dd if=$M0/heal-file iflag=direct | md5sum | awk '{print $1}')
+TEST [[ $cksum_before_heal == $cksum_after_heal ]]
+cleanup;
diff --git a/tests/basic/ec/ec-read-mask.t b/tests/basic/ec/ec-read-mask.t
new file mode 100644
index 00000000000..ddb556f2973
--- /dev/null
+++ b/tests/basic/ec/ec-read-mask.t
@@ -0,0 +1,114 @@
+ #!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../ec.rc
+
+cleanup
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
+TEST $CLI volume start $V0
+
+#Empty read-mask should fail
+TEST ! $GFS --xlator-option=*.ec-read-mask="" -s $H0 --volfile-id $V0 $M0
+
+#Less than 4 number of bricks should fail
+TEST ! $GFS --xlator-option="*.ec-read-mask=0" -s $H0 --volfile-id $V0 $M0
+TEST ! $GFS --xlator-option="*.ec-read-mask=0:1" -s $H0 --volfile-id $V0 $M0
+TEST ! $GFS --xlator-option=*.ec-read-mask="0:1:2" -s $H0 --volfile-id $V0 $M0
+
+#ids greater than 5 should fail
+TEST ! $GFS --xlator-option="*.ec-read-mask=0:1:2:6" -s $H0 --volfile-id $V0 $M0
+
+#ids less than 0 should fail
+TEST ! $GFS --xlator-option="*.ec-read-mask=0:-1:2:5" -s $H0 --volfile-id $V0 $M0
+
+#read-mask with non-alphabet or comma should fail
+TEST ! $GFS --xlator-option="*.ec-read-mask=0:1:2:5:abc" -s $H0 --volfile-id $V0 $M0
+TEST ! $GFS --xlator-option="*.ec-read-mask=0:1:2:5a" -s $H0 --volfile-id $V0 $M0
+
+#mount with at least 4 read-mask-ids and all of them valid should pass
+TEST $GFS --xlator-option="*.ec-read-mask=0:1:2:5:4:3" -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+EXPECT "^111111$" ec_option_value $V0 $M0 0 read-mask
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST $GFS --xlator-option="*.ec-read-mask=0:1:2:5" -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+EXPECT "^100111$" ec_option_value $V0 $M0 0 read-mask
+
+TEST dd if=/dev/urandom of=$M0/a bs=1M count=1
+md5=$(md5sum $M0/a | awk '{print $1}')
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+#Read on the file should fail if any of the read-mask is down when number of
+#ids is data-count
+TEST $GFS --xlator-option="*.ec-read-mask=0:1:2:5" -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+EXPECT "^100111$" ec_option_value $V0 $M0 0 read-mask
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST ! dd if=$M0/a of=/dev/null
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume start $V0 force
+
+TEST $GFS --xlator-option="*.ec-read-mask=0:1:2:5" -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+EXPECT "^100111$" ec_option_value $V0 $M0 0 read-mask
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST ! dd if=$M0/a of=/dev/null
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume start $V0 force
+
+TEST $GFS --xlator-option="*.ec-read-mask=0:1:2:5" -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+EXPECT "^100111$" ec_option_value $V0 $M0 0 read-mask
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST ! dd if=$M0/a of=/dev/null
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume start $V0 force
+
+TEST $GFS --xlator-option="*.ec-read-mask=0:1:2:5" -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+EXPECT "^100111$" ec_option_value $V0 $M0 0 read-mask
+TEST kill_brick $V0 $H0 $B0/${V0}5
+TEST ! dd if=$M0/a of=/dev/null
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume start $V0 force
+
+#Read on file should succeed when non-read-mask bricks are down
+TEST $GFS --xlator-option="*.ec-read-mask=0:1:2:5" -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+EXPECT "^100111$" ec_option_value $V0 $M0 0 read-mask
+TEST kill_brick $V0 $H0 $B0/${V0}3
+EXPECT "^$md5$" echo $(dd if=$M0/a | md5sum | awk '{print $1}')
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume start $V0 force
+
+TEST $GFS --xlator-option="*.ec-read-mask=0:1:2:5" -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+EXPECT "^100111$" ec_option_value $V0 $M0 0 read-mask
+TEST kill_brick $V0 $H0 $B0/${V0}4
+EXPECT "^$md5$" echo $(dd if=$M0/a | md5sum | awk '{print $1}')
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume start $V0 force
+
+TEST $GFS --xlator-option="*.ec-read-mask=0:1:2:5" -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+EXPECT "^100111$" ec_option_value $V0 $M0 0 read-mask
+TEST kill_brick $V0 $H0 $B0/${V0}3
+TEST kill_brick $V0 $H0 $B0/${V0}4
+EXPECT "^$md5$" echo $(dd if=$M0/a | md5sum | awk '{print $1}')
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume start $V0 force
+
+#Deliberately corrupt chunks 3: 4 and check that reads still give correct data
+TEST dd if=/dev/zero of=$B0/${V0}3/a bs=256k count=1
+TEST dd if=/dev/zero of=$B0/${V0}4/a bs=256k count=1
+TEST $GFS --xlator-option="*.ec-read-mask=0:1:2:5" -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+EXPECT "^100111$" ec_option_value $V0 $M0 0 read-mask
+EXPECT "^$md5$" echo $(dd if=$M0/a | md5sum | awk '{print $1}')
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+cleanup;
diff --git a/tests/basic/ec/ec-read-policy.t b/tests/basic/ec/ec-read-policy.t
new file mode 100644
index 00000000000..fe6fe6576e7
--- /dev/null
+++ b/tests/basic/ec/ec-read-policy.t
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume heal $V0 disable
+TEST $CLI volume start $V0
+
+#Disable all caching
+TEST glusterfs --direct-io-mode=yes --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+#TEST volume operations work fine
+
+EXPECT "gfid-hash" mount_get_option_value $M0 $V0-disperse-0 read-policy
+TEST $CLI volume set $V0 disperse.read-policy round-robin
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "round-robin" mount_get_option_value $M0 $V0-disperse-0 read-policy
+
+#TEST if the option gives the intended behavior. The way we perform this test
+#is by performing reads from the mount and write to /dev/null. If the
+#read-policy is round-robin, then all bricks should have read-fop where as
+#with gfid-hash number of bricks with reads should be equal to (num-bricks - redundancy)
+#count
+
+TEST $CLI volume profile $V0 start
+TEST dd if=/dev/zero of=$M0/1 bs=1M count=4
+#Perform reads now from file on the mount, this only tests dispatch_min
+TEST dd if=$M0/1 of=/dev/null bs=1M count=4
+#TEST that reads are executed on all bricks
+rr_reads=$($CLI volume profile $V0 info cumulative| grep -w READ | wc -l)
+EXPECT "^6$" echo $rr_reads
+TEST $CLI volume profile $V0 info clear
+
+TEST $CLI volume set $V0 disperse.read-policy gfid-hash
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "gfid-hash" mount_get_option_value $M0 $V0-disperse-0 read-policy
+
+#Perform reads now from file on the mount, this only tests dispatch_min
+TEST dd if=$M0/1 of=/dev/null bs=1M count=4
+#TEST that reads are executed on all bricks
+gh_reads=$($CLI volume profile $V0 info cumulative| grep -w READ | wc -l)
+EXPECT "^4$" echo $gh_reads
+
+cleanup;
diff --git a/tests/basic/ec/ec-readdir.t b/tests/basic/ec/ec-readdir.t
new file mode 100644
index 00000000000..fad101bcabe
--- /dev/null
+++ b/tests/basic/ec/ec-readdir.t
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# This test checks that readdir works fine on distributed disperse volume
+
+cleanup
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0..5}
+TEST $CLI volume heal $V0 disable
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 1
+
+TEST mkdir $M0/d
+TEST touch $M0/d/{1..100}
+EXPECT "100" echo $(ls $M0/d/* | wc -l)
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}3
+TEST rm -rf $M0/d/{1..100}
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 1
+#Do it 3 times so that even with all load balancing, readdir never falls
+#on stale bricks
+EXPECT "0" echo $(ls $M0/d/ | wc -l)
+EXPECT "0" echo $(ls $M0/d/ | wc -l)
+EXPECT "0" echo $(ls $M0/d/ | wc -l)
+#Do the same test above for creation of entries
+TEST mkdir $M0/d1
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}3
+TEST touch $M0/d1/{1..100}
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 1
+#Do it 3 times so that even with all load balancing, readdir never falls
+#on stale bricks
+EXPECT "100" echo $(ls $M0/d1/ | wc -l)
+EXPECT "100" echo $(ls $M0/d1/ | wc -l)
+EXPECT "100" echo $(ls $M0/d1/ | wc -l)
+cleanup
diff --git a/tests/basic/ec/ec-rebalance.t b/tests/basic/ec/ec-rebalance.t
new file mode 100644
index 00000000000..6cda3a3e4be
--- /dev/null
+++ b/tests/basic/ec/ec-rebalance.t
@@ -0,0 +1,61 @@
+#!/bin/bash
+#
+# This will test the rebalance failure reported in 1447559
+#
+###
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fallocate.rc
+
+cleanup
+
+#cleate and start volume
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0..2}
+TEST $CLI volume set $V0 lookup-optimize on
+TEST $CLI volume start $V0
+
+#Mount the volume
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+
+# Create files
+for i in {1..10}
+do
+ dd if=/dev/urandom of=$M0/file$i bs=1024k count=1
+done
+
+md5_1=$(md5sum $M0/file1 | awk '{print $1}')
+md5_2=$(md5sum $M0/file2 | awk '{print $1}')
+md5_3=$(md5sum $M0/file3 | awk '{print $1}')
+md5_4=$(md5sum $M0/file4 | awk '{print $1}')
+md5_5=$(md5sum $M0/file5 | awk '{print $1}')
+md5_6=$(md5sum $M0/file6 | awk '{print $1}')
+md5_7=$(md5sum $M0/file7 | awk '{print $1}')
+md5_8=$(md5sum $M0/file8 | awk '{print $1}')
+md5_9=$(md5sum $M0/file9 | awk '{print $1}')
+md5_10=$(md5sum $M0/file10 | awk '{print $1}')
+# Add brick
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}{3..5}
+
+#Trigger rebalance
+TEST $CLI volume rebalance $V0 start force
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $V0
+
+#Remount to avoid any caches
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT "$md5_1" echo $(md5sum $M0/file1 | awk '{print $1}')
+EXPECT "$md5_2" echo $(md5sum $M0/file2 | awk '{print $1}')
+EXPECT "$md5_3" echo $(md5sum $M0/file3 | awk '{print $1}')
+EXPECT "$md5_4" echo $(md5sum $M0/file4 | awk '{print $1}')
+EXPECT "$md5_5" echo $(md5sum $M0/file5 | awk '{print $1}')
+EXPECT "$md5_6" echo $(md5sum $M0/file6 | awk '{print $1}')
+EXPECT "$md5_7" echo $(md5sum $M0/file7 | awk '{print $1}')
+EXPECT "$md5_8" echo $(md5sum $M0/file8 | awk '{print $1}')
+EXPECT "$md5_9" echo $(md5sum $M0/file9 | awk '{print $1}')
+EXPECT "$md5_10" echo $(md5sum $M0/file10 | awk '{print $1}')
+
+cleanup;
diff --git a/tests/basic/ec/ec-reset-brick.t b/tests/basic/ec/ec-reset-brick.t
new file mode 100644
index 00000000000..f1a625df4ff
--- /dev/null
+++ b/tests/basic/ec/ec-reset-brick.t
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+function num_entries {
+ ls -l $1 | wc -l
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
+TEST $CLI volume start $V0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+
+mkdir $M0/dir
+touch $M0/dir/{1..10}
+
+mkdir $M0/dir/dir1
+touch $M0/dir/dir1/{1..10}
+
+#kill brick process
+TEST $CLI volume reset-brick $V0 $H0:$B0/${V0}5 start
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "5" ec_child_up_count $V0 0
+
+#reset-brick by removing all the data and create dir again
+rm -rf $B0/${V0}5
+mkdir $B0/${V0}5
+
+#start brick process and heal by commiting reset-brick
+TEST $CLI volume reset-brick $V0 $H0:$B0/${V0}5 $H0:$B0/${V0}5 commit force
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count_shd $V0 0
+
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count ${V0}
+
+EXPECT "^12$" num_entries $B0/${V0}5/dir
+EXPECT "^11$" num_entries $B0/${V0}5/dir/dir1
+
+ec_version=$(get_hex_xattr trusted.ec.version $B0/${V0}0)
+EXPECT "$ec_version" get_hex_xattr trusted.ec.version $B0/${V0}1
+EXPECT "$ec_version" get_hex_xattr trusted.ec.version $B0/${V0}2
+EXPECT "$ec_version" get_hex_xattr trusted.ec.version $B0/${V0}3
+EXPECT "$ec_version" get_hex_xattr trusted.ec.version $B0/${V0}4
+EXPECT "$ec_version" get_hex_xattr trusted.ec.version $B0/${V0}5
+
+cleanup;
diff --git a/tests/basic/ec/ec-root-heal.t b/tests/basic/ec/ec-root-heal.t
new file mode 100644
index 00000000000..11ea7cdf9d4
--- /dev/null
+++ b/tests/basic/ec/ec-root-heal.t
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+function num_entries {
+ ls -l $1 | wc -l
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
+TEST $CLI volume start $V0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+touch $M0/{1..10}
+TEST $CLI volume replace-brick $V0 $H0:$B0/${V0}5 $H0:$B0/${V0}6 commit force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count_shd $V0 0
+
+# active heal
+TEST $CLI volume heal $V0 full
+#ls -l gives "Total" line so number of lines will be 1 more
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count ${V0}
+EXPECT "^11$" num_entries $B0/${V0}6
+ec_version=$(get_hex_xattr trusted.ec.version $B0/${V0}0)
+EXPECT "$ec_version" get_hex_xattr trusted.ec.version $B0/${V0}1
+EXPECT "$ec_version" get_hex_xattr trusted.ec.version $B0/${V0}2
+EXPECT "$ec_version" get_hex_xattr trusted.ec.version $B0/${V0}3
+EXPECT "$ec_version" get_hex_xattr trusted.ec.version $B0/${V0}4
+EXPECT "$ec_version" get_hex_xattr trusted.ec.version $B0/${V0}6
+
+cleanup;
diff --git a/tests/basic/ec/ec-seek.t b/tests/basic/ec/ec-seek.t
new file mode 100644
index 00000000000..5a7d31b9f8f
--- /dev/null
+++ b/tests/basic/ec/ec-seek.t
@@ -0,0 +1,58 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+SEEK=$(dirname $0)/seek
+build_tester $(dirname $0)/../seek.c -o ${SEEK}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+TEST mkdir -p $B0/${V0}{0..2}
+TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0..2}
+
+EXPECT "$V0" volinfo_field $V0 'Volume Name'
+EXPECT 'Created' volinfo_field $V0 'Status'
+EXPECT '3' brick_count $V0
+
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Started' volinfo_field $V0 'Status'
+
+TEST $GFS -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+
+TEST ${SEEK} create ${M0}/test 0 1 1048576 1
+# Determine underlying filesystem allocation block size
+BSIZE="$(($(${SEEK} scan ${M0}/test hole 0) * 2))"
+
+TEST ${SEEK} create ${M0}/test 0 ${BSIZE} $((${BSIZE} * 4 + 512)) ${BSIZE}
+
+EXPECT "^0$" ${SEEK} scan ${M0}/test data 0
+EXPECT "^$((${BSIZE} / 2))$" ${SEEK} scan ${M0}/test data $((${BSIZE} / 2))
+EXPECT "^$((${BSIZE} - 1))$" ${SEEK} scan ${M0}/test data $((${BSIZE} - 1))
+EXPECT "^$((${BSIZE} * 4))$" ${SEEK} scan ${M0}/test data ${BSIZE}
+EXPECT "^$((${BSIZE} * 4))$" ${SEEK} scan ${M0}/test data $((${BSIZE} * 4))
+EXPECT "^$((${BSIZE} * 5))$" ${SEEK} scan ${M0}/test data $((${BSIZE} * 5))
+EXPECT "^$((${BSIZE} * 5 + 511))$" ${SEEK} scan ${M0}/test data $((${BSIZE} * 5 + 511))
+EXPECT "^ENXIO$" ${SEEK} scan ${M0}/test data $((${BSIZE} * 5 + 512))
+EXPECT "^ENXIO$" ${SEEK} scan ${M0}/test data $((${BSIZE} * 6))
+
+EXPECT "^${BSIZE}$" ${SEEK} scan ${M0}/test hole 0
+EXPECT "^${BSIZE}$" ${SEEK} scan ${M0}/test hole $((${BSIZE} / 2))
+EXPECT "^${BSIZE}$" ${SEEK} scan ${M0}/test hole $((${BSIZE} - 1))
+EXPECT "^${BSIZE}$" ${SEEK} scan ${M0}/test hole ${BSIZE}
+EXPECT "^$((${BSIZE} * 5 + 512))$" ${SEEK} scan ${M0}/test hole $((${BSIZE} * 4))
+EXPECT "^$((${BSIZE} * 5 + 512))$" ${SEEK} scan ${M0}/test hole $((${BSIZE} * 5))
+EXPECT "^$((${BSIZE} * 5 + 512))$" ${SEEK} scan ${M0}/test hole $((${BSIZE} * 5 + 511))
+EXPECT "^ENXIO$" ${SEEK} scan ${M0}/test hole $((${BSIZE} * 5 + 512))
+EXPECT "^ENXIO$" ${SEEK} scan ${M0}/test hole $((${BSIZE} * 6))
+
+rm -f ${SEEK}
+cleanup
+
+# Centos6 regression slaves seem to not support SEEK_DATA/SEEK_HOLE
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=000000
diff --git a/tests/basic/ec/ec-stripe.t b/tests/basic/ec/ec-stripe.t
new file mode 100644
index 00000000000..98b92294feb
--- /dev/null
+++ b/tests/basic/ec/ec-stripe.t
@@ -0,0 +1,227 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# These tests will check the stripe cache functionality of
+# disperse volume
+
+test_index=0
+stripe_count=4
+loop_test=0
+
+TESTS_EXPECTED_IN_LOOP=182
+
+function get_mount_stripe_cache {
+ local sd=$1
+ local field=$2
+ local val=$(grep "$field" $sd | cut -f2 -d'=' | tail -1)
+ echo $val
+}
+
+function get_stripes_in_cache {
+ local target=$1
+ local count=$2
+ local c=0
+ for (( c=0; c<$count; c++ ))
+ do
+ let x=102+$c*1024
+ echo yy | dd of=$target oflag=seek_bytes,sync seek=$x conv=notrunc
+ if [ $? != 0 ]
+ then
+ break
+ fi
+ done
+ echo "$c"
+}
+# tests in this loop = 7
+function mount_get_test_files {
+ let test_index+=1
+ let loop_test+=7
+ echo "Test Case $test_index"
+ local stripe_count=$1
+ TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+ EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+ TEST dd if=/dev/urandom of=$B0/test_file bs=1024 count=20
+ TEST cp $B0/test_file $M0/test_file
+ TEST dd if=/dev/urandom of=$B0/misc_file bs=1024 count=20
+ EXPECT_WITHIN $UMOUNT_TIMEOUT "$stripe_count" get_stripes_in_cache $B0/test_file $stripe_count
+ EXPECT_WITHIN $UMOUNT_TIMEOUT "$stripe_count" get_stripes_in_cache $M0/test_file $stripe_count
+}
+
+#check_statedump_md5sum (hitcount misscount)
+#tests in this loop = 4
+function check_statedump_md5sum {
+ statedump=$(generate_mount_statedump $V0)
+ let loop_test+=4
+ sleep 1
+ nhits=$(get_mount_stripe_cache $statedump "hits")
+ nmisses=$(get_mount_stripe_cache $statedump "misses")
+ EXPECT "$1" echo $nhits
+ EXPECT "$2" echo $nmisses
+ TEST md5_sum=`get_md5_sum $B0/test_file`
+ EXPECT $md5_sum get_md5_sum $M0/test_file
+}
+
+#tests in this loop = 2
+function clean_file_unmount {
+ let loop_test+=2
+ TEST rm -f $B0/test_file $M0/test_file $B0/misc_file
+ cleanup_mount_statedump $V0
+ EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+}
+
+cleanup
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0..2}
+TEST $CLI volume heal $V0 disable
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 disperse.background-heals 0
+TEST $CLI volume set $V0 disperse.eager-lock on
+TEST $CLI volume set $V0 disperse.other-eager-lock on
+TEST $CLI volume set $V0 disperse.stripe-cache 8
+TEST $CLI volume start $V0
+
+### 1 - offset and size in one stripes ####
+
+mount_get_test_files $stripe_count
+# This should have 4 hits on cached stripes
+get_stripes_in_cache $M0/test_file $stripe_count
+check_statedump_md5sum 4 4
+clean_file_unmount
+
+### 2 - Length less than a stripe size, covering two stripes ####
+
+mount_get_test_files $stripe_count
+TEST dd if=$B0/misc_file of=$B0/test_file bs=1022 count=1 oflag=seek_bytes,sync seek=102 conv=notrunc
+TEST dd if=$B0/misc_file of=$M0/test_file bs=1022 count=1 oflag=seek_bytes,sync seek=102 conv=notrunc
+check_statedump_md5sum 2 4
+clean_file_unmount
+
+### 3 -Length exactly equal to the stripe size, covering a single stripe ####
+
+mount_get_test_files $stripe_count
+TEST dd if=$B0/misc_file of=$B0/test_file bs=1024 count=1 oflag=seek_bytes,sync seek=0 conv=notrunc
+TEST dd if=$B0/misc_file of=$M0/test_file bs=1024 count=1 oflag=seek_bytes,sync seek=0 conv=notrunc
+check_statedump_md5sum 0 4
+clean_file_unmount
+
+### 4 - Length exactly equal to the stripe size, covering two stripes ####
+
+mount_get_test_files $stripe_count
+TEST dd if=$B0/misc_file of=$B0/test_file bs=2048 count=1 oflag=seek_bytes,sync seek=1024 conv=notrunc
+TEST dd if=$B0/misc_file of=$M0/test_file bs=2048 count=1 oflag=seek_bytes,sync seek=1024 conv=notrunc
+check_statedump_md5sum 0 4
+clean_file_unmount
+
+### 5 - Length greater than a stripe size, covering two stripes ####
+
+mount_get_test_files $stripe_count
+TEST dd if=$B0/misc_file of=$B0/test_file bs=1030 count=1 oflag=seek_bytes,sync seek=500 conv=notrunc
+TEST dd if=$B0/misc_file of=$M0/test_file bs=1030 count=1 oflag=seek_bytes,sync seek=500 conv=notrunc
+check_statedump_md5sum 2 4
+clean_file_unmount
+
+### 6 - Length greater than a stripe size, covering three stripes ####
+
+mount_get_test_files $stripe_count
+TEST dd if=$B0/misc_file of=$B0/test_file bs=2078 count=1 oflag=seek_bytes,sync seek=1000 conv=notrunc
+TEST dd if=$B0/misc_file of=$M0/test_file bs=2078 count=1 oflag=seek_bytes,sync seek=1000 conv=notrunc
+check_statedump_md5sum 2 4
+clean_file_unmount
+
+### 7 - Discard range - all stripe from cache should be invalidated complete stripes ####
+
+mount_get_test_files $stripe_count
+TEST fallocate -p -o 0 -l 5120 $B0/test_file
+TEST fallocate -p -o 0 -l 5120 $M0/test_file
+TEST dd if=$B0/misc_file of=$B0/test_file bs=1024 count=6 oflag=seek_bytes,sync seek=1030 conv=notrunc
+TEST dd if=$B0/misc_file of=$M0/test_file bs=1024 count=6 oflag=seek_bytes,sync seek=1030 conv=notrunc
+check_statedump_md5sum 5 11
+clean_file_unmount
+
+### 8 - Discard range - starts in the middle of stripe, ends on the middle of next stripe####
+
+mount_get_test_files $stripe_count
+TEST fallocate -p -o 500 -l 1024 $B0/test_file
+TEST fallocate -p -o 500 -l 1024 $M0/test_file
+TEST dd if=$B0/misc_file of=$B0/test_file bs=1024 count=5 oflag=seek_bytes,sync seek=500 conv=notrunc
+TEST dd if=$B0/misc_file of=$M0/test_file bs=1024 count=5 oflag=seek_bytes,sync seek=500 conv=notrunc
+check_statedump_md5sum 10 6
+clean_file_unmount
+
+### 9 - Discard range - starts in the middle of stripe, ends on the middle of 3rd stripe#####
+
+mount_get_test_files $stripe_count
+TEST fallocate -p -o 500 -l 2048 $B0/test_file
+TEST fallocate -p -o 500 -l 2048 $M0/test_file
+TEST dd if=$B0/misc_file of=$B0/test_file bs=1024 count=5 oflag=seek_bytes,sync seek=500 conv=notrunc
+TEST dd if=$B0/misc_file of=$M0/test_file bs=1024 count=5 oflag=seek_bytes,sync seek=500 conv=notrunc
+check_statedump_md5sum 9 7
+clean_file_unmount
+
+### 10 - Discard range - starts and end within one stripe ####
+
+mount_get_test_files $stripe_count
+TEST fallocate -p -o 500 -l 100 $B0/test_file
+TEST fallocate -p -o 500 -l 100 $M0/test_file
+TEST dd if=$B0/misc_file of=$B0/test_file bs=1024 count=1 oflag=seek_bytes,sync seek=0 conv=notrunc
+TEST dd if=$B0/misc_file of=$M0/test_file bs=1024 count=1 oflag=seek_bytes,sync seek=0 conv=notrunc
+check_statedump_md5sum 1 4
+clean_file_unmount
+
+### 11 - Discard range - starts and end in one complete stripe ####
+
+mount_get_test_files $stripe_count
+TEST fallocate -p -o 0 -l 1024 $B0/test_file
+TEST fallocate -p -o 0 -l 1024 $M0/test_file
+TEST dd if=$B0/misc_file of=$B0/test_file bs=1024 count=1 oflag=seek_bytes,sync seek=512 conv=notrunc
+TEST dd if=$B0/misc_file of=$M0/test_file bs=1024 count=1 oflag=seek_bytes,sync seek=512 conv=notrunc
+check_statedump_md5sum 1 5
+clean_file_unmount
+
+### 12 - Discard range - starts and end two complete stripe ####
+
+mount_get_test_files $stripe_count
+TEST fallocate -p -o 0 -l 2048 $B0/test_file
+TEST fallocate -p -o 0 -l 2048 $M0/test_file
+TEST dd if=$B0/misc_file of=$B0/test_file bs=1024 count=4 oflag=seek_bytes,sync seek=300 conv=notrunc
+TEST dd if=$B0/misc_file of=$M0/test_file bs=1024 count=4 oflag=seek_bytes,sync seek=300 conv=notrunc
+check_statedump_md5sum 5 7
+clean_file_unmount
+
+### 13 - Truncate to invalidate all the stripe in cache ####
+
+mount_get_test_files $stripe_count
+TEST truncate -s 0 $B0/test_file
+TEST truncate -s 0 $M0/test_file
+TEST dd if=$B0/misc_file of=$B0/test_file bs=1022 count=5 oflag=seek_bytes,sync seek=400 conv=notrunc
+TEST dd if=$B0/misc_file of=$M0/test_file bs=1022 count=5 oflag=seek_bytes,sync seek=400 conv=notrunc
+check_statedump_md5sum 4 4
+clean_file_unmount
+
+### 14 - Truncate to invalidate all but one the stripe in cache ####
+
+mount_get_test_files $stripe_count
+TEST truncate -s 500 $B0/test_file
+TEST truncate -s 500 $M0/test_file
+TEST dd if=$B0/misc_file of=$B0/test_file bs=1024 count=1 oflag=seek_bytes,sync seek=525 conv=notrunc
+TEST dd if=$B0/misc_file of=$M0/test_file bs=1024 count=1 oflag=seek_bytes,sync seek=525 conv=notrunc
+check_statedump_md5sum 2 4
+clean_file_unmount
+
+### 15 - Truncate to invalidate all but one the stripe in cache ####
+mount_get_test_files $stripe_count
+TEST truncate -s 2148 $B0/test_file
+TEST truncate -s 2148 $M0/test_file
+TEST dd if=$B0/misc_file of=$B0/test_file bs=1000 count=1 oflag=seek_bytes,sync seek=2050 conv=notrunc
+TEST dd if=$B0/misc_file of=$M0/test_file bs=1000 count=1 oflag=seek_bytes,sync seek=2050 conv=notrunc
+check_statedump_md5sum 2 4
+clean_file_unmount
+echo "Total loop tests $loop_test"
+cleanup
diff --git a/tests/basic/ec/ec-up.t b/tests/basic/ec/ec-up.t
new file mode 100644
index 00000000000..d54e7e1d022
--- /dev/null
+++ b/tests/basic/ec/ec-up.t
@@ -0,0 +1,28 @@
+#!/bin/bash
+#Tests that ec subvolume is up/down correctly
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../ec.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse-data 2 redundancy 1 $H0:$B0/${V0}{0,1,3,4,5,6}
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+EXPECT "1" ec_up_status $V0 $M0 0
+EXPECT "1" ec_up_status $V0 $M0 1
+
+#kill two bricks in first disperse subvolume and check that ec_up_status is 0 for it
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" ec_up_status $V0 $M0 0
+EXPECT "1" ec_up_status $V0 $M0 1
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ec_up_status $V0 $M0 0
+EXPECT "1" ec_up_status $V0 $M0 1
+cleanup;
diff --git a/tests/basic/ec/ec.t b/tests/basic/ec/ec.t
new file mode 100644
index 00000000000..cc882771501
--- /dev/null
+++ b/tests/basic/ec/ec.t
@@ -0,0 +1,259 @@
+#!/bin/bash
+
+. $(dirname $0)/../../traps.rc
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+TEST_USER=test-ec-user
+TEST_UID=27341
+
+function my_getfattr {
+ getfattr --only-values -e text $* 2> /dev/null
+}
+
+function get_rep_count {
+ v=$(my_getfattr -n trusted.jbr.rep-count $1)
+ #echo $v > /dev/tty
+ echo $v
+}
+
+function create_file {
+ dd if=/dev/urandom of=$1 bs=4k count=$2 conv=sync 2> /dev/null
+}
+
+function setup_perm_file {
+ mkdir $1/perm_dir || return 1
+ chown ${TEST_USER} $1/perm_dir || return 1
+ su ${TEST_USER} -c "touch $1/perm_dir/perm_file" || return 1
+ return 0
+}
+
+# Functions to check repair for specific operation types.
+
+function check_create_write {
+ for b in $*; do
+ cmp $tmpdir/create-write $b/create-write || return 1
+ done
+ return 0
+}
+
+function check_truncate {
+ truncate -s 8192 $tmpdir/truncate
+ for b in $*; do
+ cmp $tmpdir/truncate $b/truncate || return 1
+ done
+ return 0
+}
+
+function check_hard_link {
+ stat $M0/hard-link-1
+ stat $M0/hard-link-2
+ for b in $*; do
+ inum1=$(ls -i $b/hard-link-1 | cut -d' ' -f1)
+ inum2=$(ls -i $b/hard-link-2 | cut -d' ' -f1)
+ if [ "$inum1" != "$inum2" ]; then
+ echo "N"
+ return 0
+ fi
+ done
+ echo "Y"
+ return 0
+}
+
+function check_soft_link {
+ stat $M0/soft-link
+ for b in $*; do
+ if [ "$(readlink $b/soft-link)" != "soft-link-tgt" ]; then
+ echo "N"
+ return 0
+ fi
+ done
+ echo "Y"
+ return 0
+}
+
+function check_unlink {
+ stat $M0/unlink
+ for b in $*; do
+ if [ -e $b/unlink ]; then
+ echo "N"
+ return 0
+ fi
+ done
+ echo "Y"
+ return 0
+}
+
+function check_mkdir {
+ getfattr -m. -d $M0/mkdir
+ for b in $*; do
+ if [ ! -d $b/mkdir ]; then
+ echo "N"
+ return 0
+ fi
+ done
+ echo "Y"
+ return 0
+}
+
+function check_rmdir {
+ getfattr -m. -d $M0/rmdir
+ for b in $*; do
+ if [ -e $b/rmdir ]; then
+ echo "N"
+ return 0
+ fi
+ done
+ echo "Y"
+ return 0
+}
+
+function check_setxattr {
+ getfattr -d -m. -e hex $M0/setxattr
+ for b in $*; do
+ v=$(my_getfattr -n user.foo $b/setxattr)
+ if [ "$v" != "ash_nazg_durbatuluk" ]; then
+ echo "N"
+ return 0
+ fi
+ done
+ echo "Y"
+ return 0
+}
+
+function check_removexattr {
+ getfattr -d -m. -e hex $M0/removexattr
+ for b in $*; do
+ my_getfattr -n user.bar $b/removexattr 2> /dev/null
+ if [ $? -eq 0 ]; then
+ echo "N"
+ return 0
+ fi
+ done
+ echo "Y"
+ return 0
+}
+
+function check_perm_file {
+ stat $M0/perm_dir/perm_file
+ getfattr -m. -d $M0/perm_dir
+ b1=$1
+ shift 1
+ ftext=$(stat -c "%u %g %a" $b1/perm_dir/perm_file)
+ #echo "first u/g/a = $ftext" > /dev/tty
+ for b in $*; do
+ btext=$(stat -c "%u %g %a" $b/perm_dir/perm_file)
+ #echo " next u/a/a = $btext" > /dev/tty
+ if [ x"$btext" != x"$ftext" ]; then
+ echo "N"
+ return 0
+ fi
+ done
+ echo "Y"
+ return 0
+}
+
+cleanup
+
+TEST useradd -o -M -u ${TEST_UID} ${TEST_USER}
+push_trapfunc "userdel --force ${TEST_USER}"
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+TEST mkdir -p $B0/${V0}{0,1,2,3,4,5,6,7,8,9}
+TEST $CLI volume create $V0 disperse 10 redundancy 2 $H0:$B0/${V0}{0,1,2,3,4,5,6,7,8,9}
+
+EXPECT "$V0" volinfo_field $V0 'Volume Name'
+EXPECT 'Created' volinfo_field $V0 'Status'
+EXPECT '10' brick_count $V0
+
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Started' volinfo_field $V0 'Status'
+
+# Mount FUSE with caching disabled
+TEST $GFS -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "10" ec_child_up_count $V0 0
+
+# Create local files for comparisons etc.
+tmpdir=$(mktemp -d -t ${0##*/}.XXXXXX)
+push_trapfunc "rm -rf $tmpdir"
+TEST create_file $tmpdir/create-write 10
+TEST create_file $tmpdir/truncate 10
+
+# Prepare files and directories we'll need later.
+TEST cp $tmpdir/truncate $M0/
+TEST touch $M0/hard-link-1
+TEST touch $M0/unlink
+TEST mkdir $M0/rmdir
+TEST touch $M0/setxattr
+TEST touch $M0/removexattr
+TEST setfattr -n user.bar -v "ash_nazg_gimbatul" $M0/removexattr
+
+sleep 2
+
+# Kill a couple of bricks and allow some time for things to settle.
+TEST kill_brick $V0 $H0 $B0/${V0}3
+TEST kill_brick $V0 $H0 $B0/${V0}8
+sleep 10
+
+# Test create+write
+TEST cp $tmpdir/create-write $M0/
+# Test truncate
+TEST truncate -s 8192 $M0/truncate
+# Test hard link
+TEST ln $M0/hard-link-1 $M0/hard-link-2
+# Test soft link
+TEST ln -s soft-link-tgt $M0/soft-link
+# Test unlink
+TEST rm $M0/unlink
+# Test rmdir
+TEST rmdir $M0/rmdir
+# Test mkdir
+TEST mkdir $M0/mkdir
+# Test setxattr
+TEST setfattr -n user.foo -v "ash_nazg_durbatuluk" $M0/setxattr
+# Test removexattr
+TEST setfattr -x user.bar $M0/removexattr
+# Test uid/gid behavior
+TEST setup_perm_file $M0
+
+sleep 2
+
+# Unmount/remount so that create/write and truncate don't see cached data.
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $GFS -s $H0 --volfile-id $V0 $M1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "8" ec_child_up_count $V0 0
+
+# Test create/write and truncate *before* the bricks are brought back.
+TEST check_create_write $M1
+TEST check_truncate $M1
+
+# Restart the bricks and allow repair to occur.
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Started' volinfo_field $V0 'Status'
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "10" ec_child_up_count $V0 0
+
+# Unmount/remount again, same reason as before.
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M1
+TEST $GFS -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "10" ec_child_up_count $V0 0
+
+# Make sure everything is as it should be. Most tests check for consistency
+# between the bricks and the front end. This is not valid for disperse, so we
+# check the mountpoint state instead.
+
+TEST check_create_write $M0
+TEST check_truncate $M0
+
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_hard_link $B0/${V0}{0..9}
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_soft_link $B0/${V0}{0..9}
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_unlink $B0/${V0}{0..9}
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_rmdir $B0/${V0}{0..9}
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_mkdir $B0/${V0}{0..9}
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_setxattr $B0/${V0}{0..9}
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_removexattr $B0/${V0}{0..9}
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_perm_file $B0/${V0}{0..9}
+
+cleanup
diff --git a/tests/basic/ec/gfapi-ec-open-truncate.c b/tests/basic/ec/gfapi-ec-open-truncate.c
new file mode 100644
index 00000000000..fb16807003a
--- /dev/null
+++ b/tests/basic/ec/gfapi-ec-open-truncate.c
@@ -0,0 +1,171 @@
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+#include <limits.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+#define LOG_ERR(msg) \
+ do { \
+ fprintf(stderr, "%s : Error (%s)\n", msg, strerror(errno)); \
+ } while (0)
+
+int
+fill_iov(struct iovec *iov, char fillchar, int count)
+{
+ int ret = -1;
+
+ iov->iov_base = calloc(count + 1, sizeof(fillchar));
+ if (iov->iov_base == NULL) {
+ return ret;
+ } else {
+ iov->iov_len = count;
+ ret = 0;
+ }
+ memset(iov->iov_base, fillchar, count);
+ memset(iov->iov_base + count, '\0', 1);
+
+ return ret;
+}
+
+glfs_t *
+init_glfs(const char *hostname, const char *volname, const char *logfile)
+{
+ int ret = -1;
+ glfs_t *fs = NULL;
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ LOG_ERR("glfs_new failed");
+ return NULL;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ if (ret < 0) {
+ LOG_ERR("glfs_set_volfile_server failed");
+ goto out;
+ }
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ if (ret < 0) {
+ LOG_ERR("glfs_set_logging failed");
+ goto out;
+ }
+
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ LOG_ERR("glfs_init failed");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret) {
+ glfs_fini(fs);
+ fs = NULL;
+ }
+
+ return fs;
+}
+
+int
+main(int argc, char *argv[])
+{
+ char *hostname = NULL;
+ char *volname = NULL;
+ char *logfile = NULL;
+ glfs_t *fs = NULL;
+ glfs_fd_t *glfd = NULL;
+ int ret = 0;
+ int i = 0;
+ int count = 200;
+ struct iovec iov = {0};
+ int flags = O_RDWR;
+ int bricksup = 0;
+ int fdopen = 0;
+
+ if (argc != 4) {
+ fprintf(stderr, "Invalid argument\n");
+ exit(1);
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+ logfile = argv[3];
+
+ fs = init_glfs(hostname, volname, logfile);
+ if (fs == NULL) {
+ LOG_ERR("init_glfs failed");
+ return -1;
+ }
+
+ /* Brick is down and we are opening a file to trigger fd heal. */
+ /* Bypass Write-behind */
+ glfd = glfs_open(fs, "a", O_WRONLY | O_TRUNC | O_SYNC);
+ if (glfd == NULL) {
+ LOG_ERR("glfs_open_truncate failed");
+ exit(1);
+ }
+ system("gluster --mode=script volume start patchy force");
+ /*CHILD_UP_TIMEOUT is 20 seconds*/
+ for (i = 0; i < 20; i++) {
+ ret = system(
+ "[ $(gluster --mode=script volume status patchy | "
+ "grep \" Y \" | awk '{print $(NF-1)}' | wc -l) == 3 ]");
+ if (WIFEXITED(ret) && WEXITSTATUS(ret)) {
+ printf("Ret value of system: %d\n, ifexited: %d, exitstatus: %d",
+ ret, WIFEXITED(ret), WEXITSTATUS(ret));
+ sleep(1);
+ continue;
+ }
+ printf("Number of loops: %d\n", i);
+ bricksup = 1;
+ break;
+ }
+ if (!bricksup) {
+ system("gluster --mode=script volume status patchy");
+ LOG_ERR("Bricks didn't come up\n");
+ exit(1);
+ }
+
+ /*Not sure how to check that the child-up reached EC, so sleep 3 for now*/
+ sleep(3);
+ ret = fill_iov(&iov, 'a', 200);
+ if (ret) {
+ LOG_ERR("failed to create iov");
+ exit(1);
+ }
+
+ /*write will trigger re-open*/
+ ret = glfs_pwritev(glfd, &iov, 1, 0, flags);
+ if (ret < 0) {
+ LOG_ERR("glfs_test_function failed");
+ exit(1);
+ }
+ /*Check reopen happened by checking for open-fds on the brick*/
+ for (i = 0; i < 20; i++) {
+ ret = system(
+ "[ $(for i in $(pgrep glusterfsd); do ls -l /proc/$i/fd | grep "
+ "\"[.]glusterfs\" | grep -v \".glusterfs/[0-9a-f][0-9a-f]\" | grep "
+ "-v health_check; done | wc -l) == 3 ]");
+ if (WIFEXITED(ret) && WEXITSTATUS(ret)) {
+ printf("Ret value of system: %d\n, ifexited: %d, exitstatus: %d",
+ ret, WIFEXITED(ret), WEXITSTATUS(ret));
+ sleep(1);
+ continue;
+ }
+ fdopen = 1;
+ break;
+ }
+
+ if (!fdopen) {
+ LOG_ERR("fd reopen didn't succeed");
+ exit(1);
+ }
+
+ return 0;
+}
diff --git a/tests/basic/ec/gfapi-ec-open-truncate.t b/tests/basic/ec/gfapi-ec-open-truncate.t
new file mode 100644
index 00000000000..e22562c6ea3
--- /dev/null
+++ b/tests/basic/ec/gfapi-ec-open-truncate.t
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+#This .t tests the functionality of open-fd-heal when opened with O_TRUNC.
+#If re-open is not done with O_TRUNC then the test will pass.
+
+cleanup
+
+TEST glusterd
+
+TEST $CLI volume create $V0 disperse 3 ${H0}:$B0/brick{1,2,3}
+EXPECT 'Created' volinfo_field $V0 'Status'
+#Disable heals to prevent any chance of heals masking the problem
+TEST $CLI volume set $V0 disperse.background-heals 0
+TEST $CLI volume set $V0 disperse.heal-wait-qlength 0
+TEST $CLI volume set $V0 performance.write-behind off
+
+#We need truncate fop to go through before pre-op completes for the write-fop
+#which triggers open-fd heal. Otherwise truncate won't be allowed on 'bad' brick
+TEST $CLI volume set $V0 delay-gen posix
+TEST $CLI volume set $V0 delay-gen.enable fxattrop
+TEST $CLI volume set $V0 delay-gen.delay-percentage 100
+TEST $CLI volume set $V0 delay-gen.delay-duration 1000000
+
+TEST $CLI volume heal $V0 disable
+
+TEST $CLI volume start $V0
+TEST $CLI volume profile $V0 start
+EXPECT 'Started' volinfo_field $V0 'Status'
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+TEST touch $M0/a
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST kill_brick $V0 $H0 $B0/brick1
+logdir=`gluster --print-logdir`
+
+TEST build_tester $(dirname $0)/gfapi-ec-open-truncate.c -lgfapi
+
+TEST $CLI volume profile $V0 info clear
+TEST ./$(dirname $0)/gfapi-ec-open-truncate ${H0} $V0 $logdir/gfapi-ec-open-truncate.log
+
+EXPECT "^2$" echo $($CLI volume profile $V0 info incremental | grep -i truncate | wc -l)
+cleanup_tester $(dirname $0)/gfapi-ec-open-truncate
+
+cleanup
diff --git a/tests/basic/ec/heal-info.t b/tests/basic/ec/heal-info.t
new file mode 100644
index 00000000000..1549d5fcdb0
--- /dev/null
+++ b/tests/basic/ec/heal-info.t
@@ -0,0 +1,74 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# This test checks if heal info works as expected or not
+
+function create_files {
+ for i in {21..1000};
+ do
+ dd if=/dev/zero of=$M0/$i bs=1M count=1 2>&1 > /dev/null;
+ done
+ rm -f $M0/lock
+}
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 redundancy 2 $H0:$B0/${V0}{0..5}
+TEST $CLI volume set $V0 client-log-level DEBUG
+TEST $CLI volume heal $V0 disable
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --direct-io-mode=yes $M0;
+# Wait until all 6 childs have been recognized by the ec xlator
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+
+#heal info should give zero entries to be healed when I/O is going on
+dd if=/dev/zero of=$M0/a bs=1M count=2048 &
+dd_pid=$!
+sleep 3 #Wait for I/O to proceed for some time
+EXPECT "^0$" get_pending_heal_count $V0
+kill -9 $dd_pid
+touch $M0/lock
+create_files &
+
+total_heal_count=0
+while [ -f $M0/lock ];
+do
+ heal_count=$(get_pending_heal_count $V0)
+ total_heal_count=$((heal_count+total_heal_count))
+done
+EXPECT "^0$" echo $total_heal_count
+
+#When only data heal is required it should print it
+#There is no easy way to create this using commands so assigning xattrs directly
+TEST setfattr -n trusted.ec.version -v 0x00000000000000020000000000000000 $B0/${V0}0/1000
+TEST setfattr -n trusted.ec.version -v 0x00000000000000020000000000000000 $B0/${V0}1/1000
+TEST setfattr -n trusted.ec.version -v 0x00000000000000020000000000000000 $B0/${V0}2/1000
+TEST setfattr -n trusted.ec.version -v 0x00000000000000020000000000000000 $B0/${V0}3/1000
+TEST setfattr -n trusted.ec.version -v 0x00000000000000020000000000000000 $B0/${V0}4/1000
+TEST setfattr -n trusted.ec.version -v 0x00000000000000010000000000000000 $B0/${V0}5/1000
+index_path=$B0/${V0}5/.glusterfs/indices/xattrop/$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}5/1000))
+while [ -f $index_path ]; do :; done
+TEST touch $index_path
+EXPECT "^1$" get_pending_heal_count $V0
+TEST rm -f $M0/1000
+
+#When files/directories need heal test that it prints them
+TEST touch $M0/{1..10}
+TEST kill_brick $V0 $H0 $B0/${V0}0
+for i in {11..20};
+do
+ echo abc > $M0/$i #Data + entry + metadata heal
+done
+for i in {1..10};
+do
+ chmod +x $M0/$i;
+done
+
+EXPECT "^105$" get_pending_heal_count $V0
+
+cleanup
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=1533815
diff --git a/tests/basic/ec/lock-contention.t b/tests/basic/ec/lock-contention.t
new file mode 100644
index 00000000000..8f86cee16ad
--- /dev/null
+++ b/tests/basic/ec/lock-contention.t
@@ -0,0 +1,62 @@
+#!/bin/bash
+
+# This test verifies that when 'lock-notify-contention' option is enabled,
+# locks xlator actually sends an upcall notification that causes the acquired
+# lock from one client to be released before it's supposed to when another
+# client accesses the file.
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+function elapsed_time() {
+ local start="`date +%s`"
+
+ if [[ "test" == `cat "$1"` ]]; then
+ echo "$((`date +%s` - ${start}))"
+ fi
+}
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0..2}
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 features.locks-notify-contention off
+TEST $CLI volume set $V0 disperse.eager-lock on
+TEST $CLI volume set $V0 disperse.eager-lock-timeout 6
+TEST $CLI volume set $V0 disperse.other-eager-lock on
+TEST $CLI volume set $V0 disperse.other-eager-lock-timeout 6
+TEST $CLI volume start $V0
+
+TEST $GFS --direct-io-mode=yes --volfile-id=/$V0 --volfile-server=$H0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0 $M0
+
+TEST $GFS --direct-io-mode=yes --volfile-id=/$V0 --volfile-server=$H0 $M1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0 $M1
+
+TEST $(echo "test" >$M0/file)
+
+# With locks-notify-contention set to off, accessing the file from another
+# client should take 6 seconds. Checking against 3 seconds to be safe.
+elapsed="$(elapsed_time $M1/file)"
+TEST [[ ${elapsed} -ge 3 ]]
+
+elapsed="$(elapsed_time $M0/file)"
+TEST [[ ${elapsed} -ge 3 ]]
+
+TEST $CLI volume set $V0 features.locks-notify-contention on
+
+# With locks-notify-contention set to on, accessing the file from another
+# client should be fast. Checking against 3 seconds to be safe.
+elapsed="$(elapsed_time $M1/file)"
+TEST [[ ${elapsed} -le 3 ]]
+
+elapsed="$(elapsed_time $M0/file)"
+TEST [[ ${elapsed} -le 3 ]]
+
+cleanup
diff --git a/tests/basic/ec/nfs.t b/tests/basic/ec/nfs.t
new file mode 100755
index 00000000000..3f51a640ef7
--- /dev/null
+++ b/tests/basic/ec/nfs.t
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 redundancy 2 $H0:$B0/${V0}{0..5}
+EXPECT "Created" volinfo_field $V0 'Status'
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Started" volinfo_field $V0 'Status'
+
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+TEST mount_nfs $H0:/$V0 $N0 nolock
+
+# The test below fails with "bs=1024k count=1k", but passes when "oflag=direct"
+# is used. There also does not seem to be an issue on systems with sufficient
+# memory. Reducing the "count" prevents hangs too.
+TEST dd if=/dev/zero of=$N0/test bs=1024k count=32
+
+## Before killing daemon to avoid deadlocks
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+cleanup
diff --git a/tests/basic/ec/quota.t b/tests/basic/ec/quota.t
new file mode 100755
index 00000000000..c9612c8b76a
--- /dev/null
+++ b/tests/basic/ec/quota.t
@@ -0,0 +1,45 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+QDD=$(dirname $0)/quota
+# compile the test write program and run it
+build_tester $(dirname $0)/../quota.c -o $QDD
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse $H0:$B0/${V0}{0..2}
+EXPECT 'Created' volinfo_field $V0 'Status'
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Started' volinfo_field $V0 'Status'
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+
+TEST mkdir -p $M0/test
+
+TEST $CLI volume quota $V0 enable
+
+TEST $CLI volume quota $V0 limit-usage /test 10MB
+
+EXPECT "10.0MB" quota_hard_limit "/test";
+EXPECT "80%" quota_soft_limit "/test";
+
+TEST $CLI volume quota $V0 soft-timeout 0
+TEST $CLI volume quota $V0 hard-timeout 0
+
+TEST ! $QDD $M0/test/file1.txt 256 48
+TEST rm $M0/test/file1.txt
+
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "0Bytes" quotausage "/test"
+
+TEST $QDD $M0/test/file2.txt 256 32
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "8.0MB" quotausage "/test"
+
+TEST rm $M0/test/file2.txt
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "0Bytes" quotausage "/test"
+TEST $CLI volume stop $V0
+
+rm -f $QDD
+cleanup;
diff --git a/tests/basic/ec/self-heal-read-write-fail.t b/tests/basic/ec/self-heal-read-write-fail.t
new file mode 100644
index 00000000000..0ba591b5bb2
--- /dev/null
+++ b/tests/basic/ec/self-heal-read-write-fail.t
@@ -0,0 +1,69 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+#This test verifies that self-heal fails when read/write fails as part of heal
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume heal $V0 disable
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+TEST touch $M0/a
+TEST kill_brick $V0 $H0 $B0/${V0}0
+echo abc >> $M0/a
+
+# Umount the volume to force all pending writes to reach the bricks
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+#Load error-gen and fail read fop and test that heal fails
+TEST $CLI volume stop $V0 #Stop volume so that error-gen can be loaded
+TEST $CLI volume set $V0 debug.error-gen posix
+TEST $CLI volume set $V0 debug.error-fops read
+TEST $CLI volume set $V0 debug.error-number EBADF
+TEST $CLI volume set $V0 debug.error-failure 100
+
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0
+TEST ! getfattr -n trusted.ec.heal $M0/a
+EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0
+
+#fail write fop and test that heal fails
+TEST $CLI volume stop $V0
+TEST $CLI volume set $V0 debug.error-fops write
+
+TEST $CLI volume start $V0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0
+TEST ! getfattr -n trusted.ec.heal $M0/a
+EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0
+
+TEST $CLI volume stop $V0 #Stop volume so that error-gen can be disabled
+TEST $CLI volume reset $V0 debug.error-gen
+TEST $CLI volume reset $V0 debug.error-fops
+TEST $CLI volume reset $V0 debug.error-number
+TEST $CLI volume reset $V0 debug.error-failure
+
+TEST $CLI volume start $V0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0
+TEST getfattr -n trusted.ec.heal $M0/a
+EXPECT "^0$" get_pending_heal_count $V0
+
+#Test that heal worked as expected by forcing read from brick0
+#remount to make sure data is not served from any cache
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST kill_brick $V0 $H0 $B0/${V0}2
+EXPECT "abc" cat $M0/a
+
+cleanup
diff --git a/tests/basic/ec/self-heal.t b/tests/basic/ec/self-heal.t
new file mode 100644
index 00000000000..6329bb60248
--- /dev/null
+++ b/tests/basic/ec/self-heal.t
@@ -0,0 +1,235 @@
+#!/bin/bash
+
+SCRIPT_TIMEOUT=300
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# This test checks self-healing feature of dispersed volumes
+
+cleanup
+
+function check_mount_dir
+{
+ getfattr -d -m. -e hex $M0 2>&1 > /dev/null
+ for i in {1..20}; do
+ ls -l $M0/ | grep "dir1"
+ if [ $? -ne 0 ]; then
+ return 1
+ fi
+ done
+
+ return 0
+}
+
+function check_size
+{
+ cat $M0/$1 2>&1 > /dev/null
+ for i in "${brick[@]}"; do
+ res=`stat -c "%s" $i/$1`
+ if [ "$res" != "$2" ]; then
+ echo "N"
+ return 0
+ fi
+ done
+ echo "Y"
+ return 0
+}
+
+function check_mode
+{
+ getfattr -d -m. -e hex $M0/$1 2>&1 > /dev/null
+ for i in "${brick[@]}"; do
+ res=`stat -c "%A" $i/$1`
+ if [ "$res" != "$2" ]; then
+ echo "N"
+ return 0
+ fi
+ done
+ echo "Y"
+ return 0
+}
+
+function check_date
+{
+ getfattr -d -m. -e hex $M0/$1 2>&1 > /dev/null
+ for i in "${brick[@]}"; do
+ res=`stat -c "%Y" $i/$1`
+ if [ "$res" != "$2" ]; then
+ echo "N"
+ return 0
+ fi
+ done
+ echo "Y"
+ return 0
+}
+
+function check_xattr
+{
+ getfattr -d -m. -e hex $M0/$1 2>&1 > /dev/null
+ for i in "${brick[@]}"; do
+ getfattr -n $2 $i/$1 2>/dev/null
+ if [ $? -eq 0 ]; then
+ echo "N"
+ return 0
+ fi
+ done
+ echo "Y"
+ return 0
+}
+
+function check_dir
+{
+ getfattr -m. -d $M0/dir1 2>&1 > /dev/null
+ for i in "${brick[@]}"; do
+ if [ ! -d $i/dir1 ]; then
+ echo "N"
+ return 0
+ fi
+ done
+ echo "Y"
+ return 0
+}
+
+function check_soft_link
+{
+ getfattr -d -m. -e hex $M0/test3 2>&1 > /dev/null
+ for i in "${brick[@]}"; do
+ if [ ! -h $i/test3 ]; then
+ echo "N"
+ return 0
+ fi
+ done
+ echo "Y"
+ return 0
+}
+
+function check_hard_link
+{
+ getfattr -d -m. -e hex $M0/test4 2>&1 > /dev/null
+ for i in "${brick[@]}"; do
+ res=`stat -c "%h" $i/test4`
+ if [ "$res" != "3" ]; then
+ echo "N"
+ return 0
+ fi
+ done
+ echo "Y"
+ return 0
+}
+
+tmp=`mktemp -d -t ${0##*/}.XXXXXX`
+if [ ! -d $tmp ]; then
+ exit 1
+fi
+
+TESTS_EXPECTED_IN_LOOP=194
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 redundancy 2 $H0:$B0/${V0}{0..5}
+TEST $CLI volume set $V0 client-log-level DEBUG
+#Write-behind has a bug where lookup can race over write which leads to size mismatch on the mount after a 'cp'
+TEST $CLI volume set $V0 performance.write-behind off
+#md-cache can return stale stat due to default timeout being 1 sec
+TEST $CLI volume set $V0 performance.stat-prefetch off
+EXPECT "Created" volinfo_field $V0 'Status'
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Started" volinfo_field $V0 'Status'
+#direct-io-mode is to make sure 'cat' leads to READ fop which triggers heal
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --direct-io-mode=yes $M0;
+# Wait until all 6 childs have been recognized by the ec xlator
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+
+TEST dd if=/dev/urandom of=$tmp/test bs=1024 count=1024
+
+cs=$(sha1sum $tmp/test | awk '{ print $1 }')
+
+TEST df -h $M0
+TEST stat $M0
+
+for idx in {0..5}; do
+ brick[$idx]=$(gf_get_gfid_backend_file_path $B0/$V0$idx)
+done
+
+TEST cp $tmp/test $M0/test
+TEST chmod 644 $M0/test
+TEST touch -d "@946681200" $M0/test
+EXPECT "-rw-r--r--" stat -c "%A" $M0/test
+EXPECT "946681200" stat -c "%Y" $M0/test
+
+for idx1 in {0..5}; do
+ TEST chmod 666 ${brick[$idx1]}/test
+ TEST truncate -s 0 ${brick[$idx1]}/test
+ TEST setfattr -n user.test -v "test1" ${brick[$idx1]}/test
+ EXPECT "-rw-r--r--" stat -c "%A" $M0/test
+ EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_size test "262144"
+ EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_mode test "-rw-r--r--"
+ EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_date test "946681200"
+ EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_xattr test "user.test"
+done
+
+for idx1 in {0..4}; do
+ for idx2 in `seq $(($idx1 + 1)) 5`; do
+ if [ $idx1 -ne $idx2 ]; then
+ TEST chmod 666 ${brick[$idx1]}/test
+ TEST chmod 600 ${brick[$idx2]}/test
+ TEST truncate -s 0 ${brick[$idx1]}/test
+ TEST truncate -s 2097152 ${brick[$idx2]}/test
+ TEST setfattr -n user.test -v "test1" ${brick[$idx1]}/test
+ TEST setfattr -n user.test -v "test2" ${brick[$idx2]}/test
+ EXPECT "-rw-r--r--" stat -c "%A" $M0/test
+ EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_size test "262144"
+ EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_mode test "-rw-r--r--"
+ EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_date test "946681200"
+ EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_xattr test "user.test"
+ fi
+ done
+done
+
+sleep 2
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST cp $tmp/test $M0/test2
+EXPECT "1048576" stat -c "%s" $M0/test2
+TEST chmod 777 $M0/test2
+EXPECT "-rwxrwxrwx" stat -c "%A" $M0/test2
+
+TEST mkdir $M0/dir1
+TEST ls -al $M0/dir1
+
+TEST ln -s test2 $M0/test3
+TEST [ -h $M0/test3 ]
+
+TEST ln $M0/test2 $M0/test4
+TEST [ -f $M0/test4 ]
+EXPECT "2" stat -c "%h" $M0/test2
+EXPECT "2" stat -c "%h" $M0/test4
+
+sleep 2
+
+TEST $CLI volume start $V0 force
+# Wait until the killed bricks have been started and recognized by the ec
+# xlator
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+
+TEST check_mount_dir
+
+EXPECT "1048576" stat -c "%s" $M0/test2
+EXPECT "-rwxrwxrwx" stat -c "%A" $M0/test2
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_size test2 "262144"
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_mode test2 "-rwxrwxrwx"
+
+TEST ls -al $M0/dir1
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_dir
+
+TEST [ -h $M0/test3 ]
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_soft_link
+
+EXPECT "2" stat -c "%h" $M0/test4
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_hard_link
+
+TEST rm -rf $tmp
+
+cleanup
diff --git a/tests/basic/ec/statedump.t b/tests/basic/ec/statedump.t
new file mode 100644
index 00000000000..8d311ec3d6d
--- /dev/null
+++ b/tests/basic/ec/statedump.t
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 redundancy 1 $H0:$B0/${V0}{0..2}
+EXPECT "Created" volinfo_field $V0 'Status'
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Started" volinfo_field $V0 'Status'
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" ec_child_up_status $V0 0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" ec_child_up_status $V0 0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" ec_child_up_status $V0 0 2
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "0" ec_child_up_status $V0 0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" ec_child_up_status $V0 0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" ec_child_up_status $V0 0 2
+
+cleanup
diff --git a/tests/basic/exports_parsing.t b/tests/basic/exports_parsing.t
new file mode 100644
index 00000000000..da88bbcb2cc
--- /dev/null
+++ b/tests/basic/exports_parsing.t
@@ -0,0 +1,57 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+EXP_FILES=$(dirname $0)/../configfiles
+
+cleanup
+
+function test_good_file ()
+{
+ glusterfsd --print-exports $1
+}
+
+function test_long_netgroup()
+{
+ glusterfsd --print-exports $1 2>&1 | sed -n 1p
+}
+
+function test_bad_line ()
+{
+ glusterfsd --print-exports $1 2>&1 | sed -n 1p
+}
+
+function test_big_file ()
+{
+ glusterfsd --print-exports $1 | sed -n 3p
+}
+
+function test_bad_opt ()
+{
+ glusterfsd --print-exports $1 2>&1 | sed -n 1p
+}
+
+function check_export_line() {
+ if [ "$1" == "$2" ]; then
+ echo "Y"
+ else
+ echo "N"
+ fi
+ return
+}
+
+export_result=$(test_good_file $EXP_FILES/exports)
+EXPECT "Y" check_export_line '/test @test(rw,anonuid=0,sec=sys,) 10.35.11.31(rw,anonuid=0,sec=sys,) ' "$export_result"
+
+export_result=$(test_good_file $EXP_FILES/exports-v6)
+EXPECT "Y" check_export_line '/test @test(rw,anonuid=0,sec=sys,) 2401:db00:11:1:face:0:3d:0(rw,anonuid=0,sec=sys,) ' "$export_result"
+
+EXPECT_KEYWORD "Error parsing netgroups for:" test_bad_line $EXP_FILES/bad_exports
+EXPECT_KEYWORD "Error parsing netgroups for:" test_long_netgroup $EXP_FILES/bad_exports
+
+EXPECT_KEYWORD "HDCDTY43SXOAH1TNUKB23MO9DE574W(rw,anonuid=0,sec=sys,)" test_big_file $EXP_FILES/big_exports
+
+EXPECT_KEYWORD "Could not find any valid options" test_bad_opt $EXP_FILES/exports_bad_opt
+
+cleanup
diff --git a/tests/basic/fencing/afr-lock-heal-advanced.c b/tests/basic/fencing/afr-lock-heal-advanced.c
new file mode 100644
index 00000000000..e202ccd5b29
--- /dev/null
+++ b/tests/basic/fencing/afr-lock-heal-advanced.c
@@ -0,0 +1,227 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <signal.h>
+#include <unistd.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+#define GF_ENFORCE_MANDATORY_LOCK "trusted.glusterfs.enforce-mandatory-lock"
+
+FILE *logfile_fp;
+
+#define LOG_ERR(func, err) \
+ do { \
+ if (!logfile_fp) { \
+ fprintf(stderr, "%\n%d %s : returned error (%s)\n", __LINE__, \
+ func, strerror(err)); \
+ fflush(stderr); \
+ } else { \
+ fprintf(logfile_fp, "\n%d %s : returned error (%s)\n", __LINE__, \
+ func, strerror(err)); \
+ fflush(logfile_fp); \
+ } \
+ } while (0)
+
+glfs_t *
+setup_client(char *hostname, char *volname, char *log_file)
+{
+ int ret = 0;
+ glfs_t *fs = NULL;
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ fprintf(logfile_fp, "\nglfs_new: returned NULL (%s)\n",
+ strerror(errno));
+ goto error;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ if (ret < 0) {
+ fprintf(logfile_fp, "\nglfs_set_volfile_server failed ret:%d (%s)\n",
+ ret, strerror(errno));
+ goto error;
+ }
+
+ ret = glfs_set_logging(fs, log_file, 7);
+ if (ret < 0) {
+ fprintf(logfile_fp, "\nglfs_set_logging failed with ret: %d (%s)\n",
+ ret, strerror(errno));
+ goto error;
+ }
+
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ fprintf(logfile_fp, "\nglfs_init failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ goto error;
+ }
+
+out:
+ return fs;
+error:
+ return NULL;
+}
+
+glfs_fd_t *
+open_file(glfs_t *fs, char *fname)
+{
+ glfs_fd_t *fd = NULL;
+
+ fd = glfs_creat(fs, fname, O_CREAT, 0644);
+ if (!fd) {
+ LOG_ERR("glfs_creat", errno);
+ goto out;
+ }
+out:
+ return fd;
+}
+
+int
+acquire_mandatory_lock(glfs_t *fs, glfs_fd_t *fd)
+{
+ struct flock lock;
+ int ret = 0;
+
+ /* initialize lock */
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 100;
+
+ ret = glfs_fsetxattr(fd, GF_ENFORCE_MANDATORY_LOCK, "set", 8, 0);
+ if (ret < 0) {
+ LOG_ERR("glfs_fsetxattr", errno);
+ ret = -1;
+ goto out;
+ }
+
+ /* take a write mandatory lock */
+ ret = glfs_file_lock(fd, F_SETLKW, &lock, GLFS_LK_MANDATORY);
+ if (ret) {
+ LOG_ERR("glfs_file_lock", errno);
+ ret = -1;
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+int
+perform_test(glfs_t *fs, char *file1, char *file2)
+{
+ int ret = 0;
+ glfs_fd_t *fd1 = NULL;
+ glfs_fd_t *fd2 = NULL;
+ char *buf = "0123456789";
+
+ fd1 = open_file(fs, file1);
+ if (!fd1) {
+ ret = -1;
+ goto out;
+ }
+ fd2 = open_file(fs, file2);
+ if (!fd2) {
+ ret = -1;
+ goto out;
+ }
+
+ /* Kill one brick from the .t.*/
+ pause();
+
+ ret = acquire_mandatory_lock(fs, fd1);
+ if (ret) {
+ goto out;
+ }
+ ret = acquire_mandatory_lock(fs, fd2);
+ if (ret) {
+ goto out;
+ }
+
+ /* Bring the brick up and let the locks heal. */
+ pause();
+ /*At this point, the .t would have killed and brought back 2 bricks, marking
+ * the fd bad.*/
+
+ ret = glfs_write(fd1, buf, 10, 0);
+ if (ret > 0) {
+ /* Write is supposed to fail with EBADFD*/
+ LOG_ERR("glfs_write", ret);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (fd1)
+ glfs_close(fd1);
+ if (fd2)
+ glfs_close(fd2);
+ return ret;
+}
+
+static void
+sigusr1_handler(int signo)
+{
+ /*Signal caught. Just continue with the execution.*/
+}
+
+int
+main(int argc, char *argv[])
+{
+ int ret = 0;
+ glfs_t *fs = NULL;
+ char *volname = NULL;
+ char log_file[100];
+ char *hostname = NULL;
+ char *fname1 = NULL;
+ char *fname2 = NULL;
+
+ if (argc != 7) {
+ fprintf(stderr,
+ "Expect following args %s <host> <volname> <file1> <file2> "
+ "<log file "
+ "location> <log_file_suffix>\n",
+ argv[0]);
+ return -1;
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+ fname1 = argv[3];
+ fname2 = argv[4];
+
+ /*Use SIGUSR1 and pause()as a means of hitting break-points this program
+ *when signalled from the .t test case.*/
+ if (signal(SIGUSR1, sigusr1_handler) == SIG_ERR) {
+ LOG_ERR("SIGUSR1 handler error", errno);
+ exit(EXIT_FAILURE);
+ }
+
+ sprintf(log_file, "%s/%s.%s.%s", argv[5], "lock-heal.c", argv[6], "log");
+ logfile_fp = fopen(log_file, "w");
+ if (!logfile_fp) {
+ fprintf(stderr, "\nfailed to open %s\n", log_file);
+ fflush(stderr);
+ return -1;
+ }
+
+ sprintf(log_file, "%s/%s.%s.%s", argv[5], "glfs-client", argv[6], "log");
+ fs = setup_client(hostname, volname, log_file);
+ if (!fs) {
+ LOG_ERR("setup_client", errno);
+ return -1;
+ }
+
+ ret = perform_test(fs, fname1, fname2);
+
+error:
+ if (fs) {
+ /*glfs_fini(fs)*/; // glfs fini path is racy and crashes the program
+ }
+
+ fclose(logfile_fp);
+
+ return ret;
+}
diff --git a/tests/basic/fencing/afr-lock-heal-advanced.t b/tests/basic/fencing/afr-lock-heal-advanced.t
new file mode 100644
index 00000000000..8a5b5989b5e
--- /dev/null
+++ b/tests/basic/fencing/afr-lock-heal-advanced.t
@@ -0,0 +1,115 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+PROCESS_UP_TIMEOUT=90
+
+function is_gfapi_program_alive()
+{
+ pid=$1
+ ps -p $pid
+ if [ $? -eq 0 ]
+ then
+ echo "Y"
+ else
+ echo "N"
+ fi
+}
+
+function get_active_lock_count {
+ brick=$1
+ i1=$2
+ i2=$3
+ pattern="ACTIVE.*client-${brick: -1}"
+
+ sdump=$(generate_brick_statedump $V0 $H0 $brick)
+ lock_count1="$(egrep "$i1" $sdump -A3| egrep "$pattern"|uniq|wc -l)"
+ lock_count2="$(egrep "$i2" $sdump -A3| egrep "$pattern"|uniq|wc -l)"
+ echo "$((lock_count1+lock_count2))"
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+EXPECT 'Created' volinfo_field $V0 'Status';
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 locks.mandatory-locking forced
+TEST $CLI volume set $V0 enforce-mandatory-lock on
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+logdir=`gluster --print-logdir`
+TEST build_tester $(dirname $0)/afr-lock-heal-advanced.c -lgfapi -ggdb
+
+#------------------------------------------------------------------------------
+# Use more than 1 fd from same client so that list_for_each_* loops are executed more than once.
+$(dirname $0)/afr-lock-heal-advanced $H0 $V0 "/FILE1" "/FILE2" $logdir C1&
+client_pid=$!
+TEST [ $client_pid ]
+
+TEST sleep 5 # By now, the client would have opened an fd on FILE1 and FILE2 and waiting for a SIGUSR1.
+EXPECT "Y" is_gfapi_program_alive $client_pid
+
+gfid_str1=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/FILE1))
+inode1="FILE1|gfid:$gfid_str1"
+gfid_str2=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/FILE2))
+inode2="FILE2|gfid:$gfid_str2"
+
+# Kill brick-3 and let client-1 take lock on both files.
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST kill -SIGUSR1 $client_pid
+# If program is still alive, glfs_file_lock() was a success.
+EXPECT "Y" is_gfapi_program_alive $client_pid
+
+# Check lock is present on brick-1 and brick-2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" get_active_lock_count $B0/${V0}0 $inode1 $inode2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" get_active_lock_count $B0/${V0}1 $inode1 $inode2
+
+# Restart brick-3 and check that the lock has healed on it.
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+TEST sleep 10 #Needed for client to re-open fd? Otherwise client_pre_lk_v2() fails with EBADFD for remote-fd.
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" get_active_lock_count $B0/${V0}2 $inode1 $inode2
+
+#------------------------------------------------------------------------------
+# Kill same brick before heal completes the first time and check it completes the second time.
+TEST $CLI volume set $V0 delay-gen locks
+TEST $CLI volume set $V0 delay-gen.delay-duration 5000000
+TEST $CLI volume set $V0 delay-gen.delay-percentage 100
+TEST $CLI volume set $V0 delay-gen.enable finodelk
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST $CLI volume reset $V0 delay-gen
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" get_active_lock_count $B0/${V0}0 $inode1 $inode2
+
+#------------------------------------------------------------------------------
+# Kill 2 bricks and bring it back. The fds must be marked bad.
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+
+# TODO: `gluster v statedump $V0 client localhost:$client_pid` is not working,
+# so sleep for 20 seconds for the client to connect to connect to the bricks.
+TEST sleep $CHILD_UP_TIMEOUT
+
+# Try to write to FILE1 from the .c; it must fail.
+TEST kill -SIGUSR1 $client_pid
+wait $client_pid
+ret=$?
+TEST [ $ret == 0 ]
+
+cleanup_tester $(dirname $0)/afr-lock-heal-advanced
+cleanup;
diff --git a/tests/basic/fencing/afr-lock-heal-basic.c b/tests/basic/fencing/afr-lock-heal-basic.c
new file mode 100644
index 00000000000..768c9e57181
--- /dev/null
+++ b/tests/basic/fencing/afr-lock-heal-basic.c
@@ -0,0 +1,182 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <signal.h>
+#include <unistd.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+#define GF_ENFORCE_MANDATORY_LOCK "trusted.glusterfs.enforce-mandatory-lock"
+
+FILE *logfile_fp;
+
+#define LOG_ERR(func, err) \
+ do { \
+ if (!logfile_fp) { \
+ fprintf(stderr, "%\n%d %s : returned error (%s)\n", __LINE__, \
+ func, strerror(err)); \
+ fflush(stderr); \
+ } else { \
+ fprintf(logfile_fp, "\n%d %s : returned error (%s)\n", __LINE__, \
+ func, strerror(err)); \
+ fflush(logfile_fp); \
+ } \
+ } while (0)
+
+glfs_t *
+setup_client(char *hostname, char *volname, char *log_file)
+{
+ int ret = 0;
+ glfs_t *fs = NULL;
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ fprintf(logfile_fp, "\nglfs_new: returned NULL (%s)\n",
+ strerror(errno));
+ goto error;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ if (ret < 0) {
+ fprintf(logfile_fp, "\nglfs_set_volfile_server failed ret:%d (%s)\n",
+ ret, strerror(errno));
+ goto error;
+ }
+
+ ret = glfs_set_logging(fs, log_file, 7);
+ if (ret < 0) {
+ fprintf(logfile_fp, "\nglfs_set_logging failed with ret: %d (%s)\n",
+ ret, strerror(errno));
+ goto error;
+ }
+
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ fprintf(logfile_fp, "\nglfs_init failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ goto error;
+ }
+
+out:
+ return fs;
+error:
+ return NULL;
+}
+
+int
+acquire_mandatory_lock(glfs_t *fs, char *fname)
+{
+ struct flock lock;
+ int ret = 0;
+ glfs_fd_t *fd = NULL;
+
+ fd = glfs_creat(fs, fname, O_CREAT, 0644);
+ if (!fd) {
+ if (errno != EEXIST) {
+ LOG_ERR("glfs_creat", errno);
+ ret = -1;
+ goto out;
+ }
+ fd = glfs_open(fs, fname, O_RDWR | O_NONBLOCK);
+ if (!fd) {
+ LOG_ERR("glfs_open", errno);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ /* initialize lock */
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 100;
+
+ ret = glfs_fsetxattr(fd, GF_ENFORCE_MANDATORY_LOCK, "set", 8, 0);
+ if (ret < 0) {
+ LOG_ERR("glfs_fsetxattr", errno);
+ ret = -1;
+ goto out;
+ }
+
+ pause();
+
+ /* take a write mandatory lock */
+ ret = glfs_file_lock(fd, F_SETLKW, &lock, GLFS_LK_MANDATORY);
+ if (ret) {
+ LOG_ERR("glfs_file_lock", errno);
+ goto out;
+ }
+
+ pause();
+
+out:
+ if (fd) {
+ glfs_close(fd);
+ }
+
+ return ret;
+}
+
+static void
+sigusr1_handler(int signo)
+{
+ /*Signal caught. Just continue with the execution.*/
+}
+
+int
+main(int argc, char *argv[])
+{
+ int ret = 0;
+ glfs_t *fs = NULL;
+ char *volname = NULL;
+ char log_file[100];
+ char *hostname = NULL;
+ char *fname = NULL;
+
+ if (argc != 6) {
+ fprintf(stderr,
+ "Expect following args %s <host> <volname> <file> <log file "
+ "location> <log_file_suffix>\n",
+ argv[0]);
+ return -1;
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+ fname = argv[3];
+
+ /*Use SIGUSR1 and pause()as a means of hitting break-points this program
+ *when signalled from the .t test case.*/
+ if (signal(SIGUSR1, sigusr1_handler) == SIG_ERR) {
+ LOG_ERR("SIGUSR1 handler error", errno);
+ exit(EXIT_FAILURE);
+ }
+
+ sprintf(log_file, "%s/%s.%s.%s", argv[4], "lock-heal-basic.c", argv[5],
+ "log");
+ logfile_fp = fopen(log_file, "w");
+ if (!logfile_fp) {
+ fprintf(stderr, "\nfailed to open %s\n", log_file);
+ fflush(stderr);
+ return -1;
+ }
+
+ sprintf(log_file, "%s/%s.%s.%s", argv[4], "glfs-client", argv[5], "log");
+ fs = setup_client(hostname, volname, log_file);
+ if (!fs) {
+ LOG_ERR("setup_client", errno);
+ return -1;
+ }
+
+ ret = acquire_mandatory_lock(fs, fname);
+
+error:
+ if (fs) {
+ /*glfs_fini(fs)*/; // glfs fini path is racy and crashes the program
+ }
+
+ fclose(logfile_fp);
+
+ return ret;
+}
diff --git a/tests/basic/fencing/afr-lock-heal-basic.t b/tests/basic/fencing/afr-lock-heal-basic.t
new file mode 100644
index 00000000000..69131af085d
--- /dev/null
+++ b/tests/basic/fencing/afr-lock-heal-basic.t
@@ -0,0 +1,102 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+function is_gfapi_program_alive()
+{
+ pid=$1
+ ps -p $pid
+ if [ $? -eq 0 ]
+ then
+ echo "Y"
+ else
+ echo "N"
+ fi
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+EXPECT 'Created' volinfo_field $V0 'Status';
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 locks.mandatory-locking forced
+TEST $CLI volume set $V0 enforce-mandatory-lock on
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+logdir=`gluster --print-logdir`
+TEST build_tester $(dirname $0)/afr-lock-heal-basic.c -lgfapi -ggdb
+
+$(dirname $0)/afr-lock-heal-basic $H0 $V0 "/FILE" $logdir C1&
+client1_pid=$!
+TEST [ $client1_pid ]
+
+$(dirname $0)/afr-lock-heal-basic $H0 $V0 "/FILE" $logdir C2&
+client2_pid=$!
+TEST [ $client2_pid ]
+
+TEST sleep 5 # By now, the 2 clients would have opened an fd on FILE and waiting for a SIGUSR1.
+EXPECT "Y" is_gfapi_program_alive $client1_pid
+EXPECT "Y" is_gfapi_program_alive $client2_pid
+
+gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/FILE))
+inode="FILE|gfid:$gfid_str"
+
+# Kill brick-3 and let client-1 take lock on the file.
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST kill -SIGUSR1 $client1_pid
+# If program is still alive, glfs_file_lock() was a success.
+EXPECT "Y" is_gfapi_program_alive $client1_pid
+
+# Check lock is present on brick-1 and brick-2
+b1_sdump=$(generate_brick_statedump $V0 $H0 $B0/${V0}0)
+c1_lock_on_b1="$(egrep "$inode" $b1_sdump -A3| egrep 'ACTIVE.*client-0'| uniq| awk '{print $1,$2,$3,S4,$5,$6,$7,$8}'|tr -d '(,), ,')"
+b2_sdump=$(generate_brick_statedump $V0 $H0 $B0/${V0}1)
+c1_lock_on_b2="$(egrep "$inode" $b2_sdump -A3| egrep 'ACTIVE.*client-1'| uniq| awk '{print $1,$2,$3,S4,$5,$6,$7,$8}'|tr -d '(,), ,')"
+TEST [ "$c1_lock_on_b1" == "$c1_lock_on_b2" ]
+
+# Restart brick-3 and check that the lock has healed on it.
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+TEST sleep 10 #Needed for client to re-open fd? Otherwise client_pre_lk_v2() fails with EBADFD for remote-fd. Also wait for lock heal.
+
+b3_sdump=$(generate_brick_statedump $V0 $H0 $B0/${V0}2)
+c1_lock_on_b3="$(egrep "$inode" $b3_sdump -A3| egrep 'ACTIVE.*client-2'| uniq| awk '{print $1,$2,$3,S4,$5,$6,$7,$8}'|tr -d '(,), ,')"
+TEST [ "$c1_lock_on_b1" == "$c1_lock_on_b3" ]
+
+# Kill brick-1 and let client-2 preempt the lock on bricks 2 and 3.
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill -SIGUSR1 $client2_pid
+# If program is still alive, glfs_file_lock() was a success.
+EXPECT "Y" is_gfapi_program_alive $client2_pid
+
+# Restart brick-1 and let lock healing complete.
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+TEST sleep 10 #Needed for client to re-open fd? Otherwise client_pre_lk_v2() fails with EBADFD for remote-fd. Also wait for lock heal.
+
+# Check that all bricks now have locks from client 2 only.
+b1_sdump=$(generate_brick_statedump $V0 $H0 $B0/${V0}0)
+c2_lock_on_b1="$(egrep "$inode" $b1_sdump -A3| egrep 'ACTIVE.*client-0'| uniq| awk '{print $1,$2,$3,S4,$5,$6,$7,$8}'|tr -d '(,), ,')"
+b2_sdump=$(generate_brick_statedump $V0 $H0 $B0/${V0}1)
+c2_lock_on_b2="$(egrep "$inode" $b2_sdump -A3| egrep 'ACTIVE.*client-1'| uniq| awk '{print $1,$2,$3,S4,$5,$6,$7,$8}'|tr -d '(,), ,')"
+b3_sdump=$(generate_brick_statedump $V0 $H0 $B0/${V0}2)
+c2_lock_on_b3="$(egrep "$inode" $b3_sdump -A3| egrep 'ACTIVE.*client-2'| uniq| awk '{print $1,$2,$3,S4,$5,$6,$7,$8}'|tr -d '(,), ,')"
+TEST [ "$c2_lock_on_b1" == "$c2_lock_on_b2" ]
+TEST [ "$c2_lock_on_b1" == "$c2_lock_on_b3" ]
+TEST [ "$c2_lock_on_b1" != "$c1_lock_on_b1" ]
+
+#Let the client programs run and exit.
+TEST kill -SIGUSR1 $client1_pid
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "N" is_gfapi_program_alive $client1_pid
+TEST kill -SIGUSR1 $client2_pid
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "N" is_gfapi_program_alive $client2_pid
+
+cleanup_tester $(dirname $0)/afr-lock-heal-basic
+cleanup;
diff --git a/tests/basic/fencing/fence-basic.c b/tests/basic/fencing/fence-basic.c
new file mode 100644
index 00000000000..4aa452e19b0
--- /dev/null
+++ b/tests/basic/fencing/fence-basic.c
@@ -0,0 +1,229 @@
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define NO_INIT 1
+#define GF_ENFORCE_MANDATORY_LOCK "trusted.glusterfs.enforce-mandatory-lock"
+
+FILE *fp;
+char *buf = "0123456789";
+
+#define LOG_ERR(func, err) \
+ do { \
+ if (!fp) { \
+ fprintf(stderr, "%\n%d %s : returned error (%s)\n", __LINE__, \
+ func, strerror(err)); \
+ fflush(stderr); \
+ } else { \
+ fprintf(fp, "\n%d %s : returned error (%s)\n", __LINE__, func, \
+ strerror(err)); \
+ fflush(fp); \
+ } \
+ } while (0)
+
+glfs_t *
+setup_new_client(char *hostname, char *volname, char *log_file, int flag)
+{
+ int ret = 0;
+ glfs_t *fs = NULL;
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ fprintf(fp, "\nglfs_new: returned NULL (%s)\n", strerror(errno));
+ goto error;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ if (ret < 0) {
+ fprintf(fp, "\nglfs_set_volfile_server failed ret:%d (%s)\n", ret,
+ strerror(errno));
+ goto error;
+ }
+
+ ret = glfs_set_logging(fs, log_file, 7);
+ if (ret < 0) {
+ fprintf(fp, "\nglfs_set_logging failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ goto error;
+ }
+
+ if (flag == NO_INIT)
+ goto out;
+
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ fprintf(fp, "\nglfs_init failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ goto error;
+ }
+
+out:
+ return fs;
+error:
+ return NULL;
+}
+
+/* test plan
+ *
+ * - take mandatory lock from client 1
+ * - preempt mandatory lock from client 2
+ * - write from client 1 which should fail
+ */
+
+int
+test(glfs_t *fs1, glfs_t *fs2, char *fname)
+{
+ struct flock lock;
+ int ret = 0;
+ glfs_fd_t *fd1, *fd2 = NULL;
+
+ fd1 = glfs_creat(fs1, fname, O_RDWR, 0777);
+ if (ret) {
+ LOG_ERR("glfs_creat", errno);
+ ret = -1;
+ goto out;
+ }
+
+ fd2 = glfs_open(fs2, fname, O_RDWR | O_NONBLOCK);
+ if (ret) {
+ LOG_ERR("glfs_open", errno);
+ ret = -1;
+ goto out;
+ }
+
+ /* initialize lock */
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 100;
+
+ ret = glfs_fsetxattr(fd1, GF_ENFORCE_MANDATORY_LOCK, "set", 8, 0);
+ if (ret < 0) {
+ LOG_ERR("glfs_fsetxattr", errno);
+ ret = -1;
+ goto out;
+ }
+
+ /* take a write mandatory lock */
+ ret = glfs_file_lock(fd1, F_SETLKW, &lock, GLFS_LK_MANDATORY);
+ if (ret) {
+ LOG_ERR("glfs_file_lock", errno);
+ goto out;
+ }
+
+ ret = glfs_write(fd1, buf, 10, 0);
+ if (ret != 10) {
+ LOG_ERR("glfs_write", errno);
+ ret = -1;
+ goto out;
+ }
+
+ /* write should fail */
+ ret = glfs_write(fd2, buf, 10, 0);
+ if (ret != -1) {
+ LOG_ERR("glfs_write", errno);
+ ret = -1;
+ goto out;
+ }
+
+ /* preempt mandatory lock from client 1*/
+ ret = glfs_file_lock(fd2, F_SETLKW, &lock, GLFS_LK_MANDATORY);
+ if (ret) {
+ LOG_ERR("glfs_file_lock", errno);
+ goto out;
+ }
+
+ /* write should succeed from client 2 */
+ ret = glfs_write(fd2, buf, 10, 0);
+ if (ret == -1) {
+ LOG_ERR("glfs_write", errno);
+ goto out;
+ }
+
+ /* write should fail from client 1 */
+ ret = glfs_write(fd1, buf, 10, 0);
+ if (ret == 10) {
+ LOG_ERR("glfs_write", errno);
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ if (fd1) {
+ glfs_close(fd1);
+ }
+
+ if (fd2) {
+ glfs_close(fd2);
+ }
+
+ return ret;
+}
+
+int
+main(int argc, char *argv[])
+{
+ int ret = 0;
+ glfs_t *fs1 = NULL;
+ glfs_t *fs2 = NULL;
+ char *volname = NULL;
+ char log_file[100];
+ char *hostname = NULL;
+ char *fname = "/file";
+ glfs_fd_t *fd1 = NULL;
+ glfs_fd_t *fd2 = NULL;
+
+ if (argc != 4) {
+ fprintf(
+ stderr,
+ "Expect following args %s <hostname> <Vol> <log file location>\n",
+ argv[0]);
+ return -1;
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+
+ sprintf(log_file, "%s/%s", argv[3], "fence-basic.log");
+ fp = fopen(log_file, "w");
+ if (!fp) {
+ fprintf(stderr, "\nfailed to open %s\n", log_file);
+ fflush(stderr);
+ return -1;
+ }
+
+ sprintf(log_file, "%s/%s", argv[3], "glfs-client-1.log");
+ fs1 = setup_new_client(hostname, volname, log_file, 0);
+ if (!fs1) {
+ LOG_ERR("setup_new_client", errno);
+ return -1;
+ }
+
+ sprintf(log_file, "%s/%s", argv[3], "glfs-client-2.log");
+ fs2 = setup_new_client(hostname, volname, log_file, 0);
+ if (!fs2) {
+ LOG_ERR("setup_new_client", errno);
+ ret = -1;
+ goto error;
+ }
+
+ ret = test(fs1, fs2, fname);
+
+error:
+ if (fs1) {
+ glfs_fini(fs1);
+ }
+
+ if (fs2) {
+ glfs_fini(fs2);
+ }
+
+ fclose(fp);
+
+ return ret;
+}
diff --git a/tests/basic/fencing/fence-basic.t b/tests/basic/fencing/fence-basic.t
new file mode 100755
index 00000000000..30f379e7b20
--- /dev/null
+++ b/tests/basic/fencing/fence-basic.t
@@ -0,0 +1,31 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/brick1
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST $CLI volume set $V0 diagnostics.client-log-flush-timeout 30
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 locks.mandatory-locking forced
+TEST $CLI volume set $V0 enforce-mandatory-lock on
+
+
+logdir=`gluster --print-logdir`
+
+TEST build_tester $(dirname $0)/fence-basic.c -lgfapi -ggdb
+TEST $(dirname $0)/fence-basic $H0 $V0 $logdir
+
+cleanup_tester $(dirname $0)/fence-basic
+
+cleanup; \ No newline at end of file
diff --git a/tests/basic/fencing/fencing-crash-conistency.t b/tests/basic/fencing/fencing-crash-conistency.t
new file mode 100644
index 00000000000..0c69411e90c
--- /dev/null
+++ b/tests/basic/fencing/fencing-crash-conistency.t
@@ -0,0 +1,62 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+# with lock enforcement flag write should fail with out lock
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}1
+EXPECT 'Created' volinfo_field $V0 'Status';
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+TEST $CLI volume set $V0 performance.write-behind off
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+
+TEST touch $M0/file
+
+#write should pass
+TEST "echo "test" > $M0/file"
+TEST "truncate -s 0 $M0/file"
+
+#enable mandatory locking
+TEST $CLI volume set $V0 locks.mandatory-locking forced
+TEST $CLI volume set $V0 enforce-mandatory-lock on
+
+#write should pass
+TEST "echo "test" >> $M0/file"
+TEST "truncate -s 0 $M0/file"
+
+#enforce lock on the file
+TEST setfattr -n trusted.glusterfs.enforce-mandatory-lock -v 1 $M0/file
+
+#write should fail
+TEST ! "echo "test" >> $M0/file"
+TEST ! "truncate -s 0 $M0/file"
+
+#remove lock enforcement flag
+TEST setfattr -x trusted.glusterfs.enforce-mandatory-lock $M0/file
+
+#write should pass
+TEST "echo "test" >> $M0/file"
+TEST "truncate -s 0 $M0/file"
+
+#enforce lock on the file
+TEST setfattr -n trusted.glusterfs.enforce-mandatory-lock -v 1 $M0/file
+#kill brick
+TEST kill_brick $V0 $H0 $B0/${V0}1
+
+TEST $CLI volume start $V0 force
+
+# wait one second for the brick to come online
+sleep 2
+#write should fail (lock xlator gets lock enforcement info from disk)
+TEST ! "echo "test" >> $M0/file"
+TEST ! "truncate -s 0 $M0/file"
+
+cleanup; \ No newline at end of file
diff --git a/tests/basic/fencing/test-fence-option.t b/tests/basic/fencing/test-fence-option.t
new file mode 100644
index 00000000000..115cbe7dbdf
--- /dev/null
+++ b/tests/basic/fencing/test-fence-option.t
@@ -0,0 +1,37 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+# with lock enforcement flag write should fail with out lock
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}1
+EXPECT 'Created' volinfo_field $V0 'Status';
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+
+TEST touch $M0/file
+
+#setfattr for mandatory-enforcement will fail
+TEST ! setfattr -n trusted.glusterfs.enforce-mandatory-lock -v 1 $M0/file
+
+#enable mandatory locking
+TEST $CLI volume set $V0 locks.mandatory-locking forced
+
+#setfattr will fail
+TEST ! setfattr -n trusted.glusterfs.enforce-mandatory-lock -v 1 $M0/file
+
+#set lock-enforcement option
+TEST $CLI volume set $V0 enforce-mandatory-lock on
+
+#setfattr should succeed
+TEST setfattr -n trusted.glusterfs.enforce-mandatory-lock -v 1 $M0/file
+
+cleanup; \ No newline at end of file
diff --git a/tests/basic/fop-sampling.t b/tests/basic/fop-sampling.t
new file mode 100644
index 00000000000..cea8aa737c0
--- /dev/null
+++ b/tests/basic/fop-sampling.t
@@ -0,0 +1,61 @@
+#!/bin/bash
+#
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+SAMPLE_FILE="$(gluster --print-logdir)/samples/glusterfs_${V0}.samp"
+
+function print_cnt() {
+ local FOP_TYPE=$1
+ local FOP_CNT=$(grep ,${FOP_TYPE} ${SAMPLE_FILE} | wc -l)
+ echo $FOP_CNT
+}
+
+# Verify we got non-zero counts for stats/lookup/readdir
+check_samples() {
+ STAT_CNT=$(print_cnt STAT)
+ if [ "$STAT_CNT" -le "0" ]; then
+ echo "STAT count is zero"
+ return
+ fi
+
+ LOOKUP_CNT=$(print_cnt LOOKUP)
+ if [ "$LOOKUP_CNT" -le "0" ]; then
+ echo "LOOKUP count is zero"
+ return
+ fi
+
+ READDIR_CNT=$(print_cnt READDIR)
+ if [ "$READDIR_CNT" -le "0" ]; then
+ echo "READDIR count is zero"
+ return
+ fi
+
+ echo "OK"
+}
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 nfs.disable off
+TEST $CLI volume set $V0 diagnostics.latency-measurement on
+TEST $CLI volume set $V0 diagnostics.count-fop-hits on
+TEST $CLI volume set $V0 diagnostics.stats-dump-interval 2
+TEST $CLI volume set $V0 diagnostics.fop-sample-buf-size 65535
+TEST $CLI volume set $V0 diagnostics.fop-sample-interval 1
+TEST $CLI volume set $V0 diagnostics.stats-dnscache-ttl-sec 3600
+
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+
+for i in {1..5}
+do
+ dd if=/dev/zero of=${M0}/testfile$i bs=4k count=1
+done
+
+TEST ls -l $M0
+EXPECT_WITHIN 6 "OK" check_samples
+
+cleanup
diff --git a/tests/basic/fops-sanity.c b/tests/basic/fops-sanity.c
new file mode 100644
index 00000000000..ef00aa0f088
--- /dev/null
+++ b/tests/basic/fops-sanity.c
@@ -0,0 +1,1033 @@
+/* Copyright (c) 2014 Red Hat, Inc. All rights reserved.
+
+* This copyrighted material is made available to anyone wishing
+* to use, modify, copy, or redistribute it subject to the terms
+* and conditions of the GNU General Public License version 2.
+
+* This program is distributed in the hope that it will be
+* useful, but WITHOUT ANY WARRANTY; without even the implied
+* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+* PURPOSE. See the GNU General Public License for more details.
+
+* You should have received a copy of the GNU General Public
+* License along with this program; if not, write to the Free
+* Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+* Boston, MA 02110-1301, USA.
+*/
+
+/* Filesystem basic sanity check, tests all (almost) fops. */
+
+#include <stdio.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/xattr.h>
+#include <errno.h>
+#include <string.h>
+#include <dirent.h>
+#include <sys/sysmacros.h>
+
+#ifndef linux
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#endif
+
+/* for fd based fops after unlink */
+int
+fd_based_fops_1(char *filename);
+/* for fd based fops before unlink */
+int
+fd_based_fops_2(char *filename);
+/* fops based on fd after dup */
+int
+dup_fd_based_fops(char *filename);
+/* for fops based on path */
+int
+path_based_fops(char *filename);
+/* for fops which operate on directory */
+int
+dir_based_fops(char *filename);
+/* for fops which operate in link files (symlinks) */
+int
+link_based_fops(char *filename);
+/* to test open syscall with open modes available. */
+int
+test_open_modes(char *filename);
+/* generic function which does open write and read. */
+int
+generic_open_read_write(char *filename, int flag, mode_t mode);
+
+#define OPEN_MODE 0666
+
+int
+main(int argc, char *argv[])
+{
+ int ret = -1;
+ int result = 0;
+ char filename[255] = {
+ 0,
+ };
+
+ if (argc > 1)
+ strcpy(filename, argv[1]);
+ else
+ strcpy(filename, "temp-xattr-test-file");
+
+ ret = fd_based_fops_1(strcat(filename, "_1"));
+ if (ret < 0) {
+ fprintf(stderr, "fd based file operation 1 failed\n");
+ result |= ret;
+ } else {
+ fprintf(stdout, "fd based file operation 1 passed\n");
+ }
+
+ ret = fd_based_fops_2(strcat(filename, "_2"));
+ if (ret < 0) {
+ result |= ret;
+ fprintf(stderr, "fd based file operation 2 failed\n");
+ } else {
+ fprintf(stdout, "fd based file operation 2 passed\n");
+ }
+
+ ret = dup_fd_based_fops(strcat(filename, "_3"));
+ if (ret < 0) {
+ result |= ret;
+ fprintf(stderr, "dup fd based file operation failed\n");
+ } else {
+ fprintf(stdout, "dup fd based file operation passed\n");
+ }
+
+ ret = path_based_fops(strcat(filename, "_4"));
+ if (ret < 0) {
+ result |= ret;
+ fprintf(stderr, "path based file operation failed\n");
+ } else {
+ fprintf(stdout, "path based file operation passed\n");
+ }
+
+ ret = dir_based_fops(strcat(filename, "_5"));
+ if (ret < 0) {
+ result |= ret;
+ fprintf(stderr, "directory based file operation failed\n");
+ } else {
+ fprintf(stdout, "directory based file operation passed\n");
+ }
+
+ ret = link_based_fops(strcat(filename, "_5"));
+ if (ret < 0) {
+ result |= ret;
+ fprintf(stderr, "link based file operation failed\n");
+ } else {
+ fprintf(stdout, "link based file operation passed\n");
+ }
+
+ ret = test_open_modes(strcat(filename, "_5"));
+ if (ret < 0) {
+ result |= ret;
+ fprintf(stderr, "testing modes of `open' call failed\n");
+ } else {
+ fprintf(stdout, "testing modes of `open' call passed\n");
+ }
+ return result;
+}
+
+/* Execute all possible fops on a fd which is unlinked */
+int
+fd_based_fops_1(char *filename)
+{
+ int fd = 0;
+ int ret = -1;
+ int result = 0;
+ struct stat stbuf = {
+ 0,
+ };
+ char wstr[50] = {
+ 0,
+ };
+ char rstr[50] = {
+ 0,
+ };
+
+ fd = open(filename, O_RDWR | O_CREAT, OPEN_MODE);
+ if (fd < 0) {
+ fprintf(stderr, "open failed : %s\n", strerror(errno));
+ return ret;
+ }
+
+ ret = unlink(filename);
+ if (ret < 0) {
+ fprintf(stderr, "unlink failed : %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ strcpy(wstr, "This is my string\n");
+ ret = write(fd, wstr, strlen(wstr));
+ if (ret <= 0) {
+ fprintf(stderr, "write failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = lseek(fd, 0, SEEK_SET);
+ if (ret < 0) {
+ fprintf(stderr, "lseek failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = read(fd, rstr, strlen(wstr));
+ if (ret <= 0) {
+ fprintf(stderr, "read failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = memcmp(rstr, wstr, strlen(wstr));
+ if (ret != 0) {
+ fprintf(stderr, "read returning junk\n");
+ result |= ret;
+ }
+
+ ret = ftruncate(fd, 0);
+ if (ret < 0) {
+ fprintf(stderr, "ftruncate failed : %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = fstat(fd, &stbuf);
+ if (ret < 0) {
+ fprintf(stderr, "fstat failed : %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = fsync(fd);
+ if (ret < 0) {
+ fprintf(stderr, "fsync failed : %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = fdatasync(fd);
+ if (ret < 0) {
+ fprintf(stderr, "fdatasync failed : %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ /*
+ * These metadata operations fail at the moment because kernel doesn't
+ * pass the client fd in the operation.
+ * The following bug tracks this change.
+ * https://bugzilla.redhat.com/show_bug.cgi?id=1084422
+ * ret = fchmod (fd, 0640);
+ * if (ret < 0) {
+ * fprintf (stderr, "fchmod failed : %s\n", strerror (errno));
+ * result |= ret;
+ * }
+
+ * ret = fchown (fd, 10001, 10001);
+ * if (ret < 0) {
+ * fprintf (stderr, "fchown failed : %s\n", strerror (errno));
+ * result |= ret;
+ * }
+
+ * ret = fsetxattr (fd, "trusted.xattr-test", "working", 8, 0);
+ * if (ret < 0) {
+ * fprintf (stderr, "fsetxattr failed : %s\n", strerror
+ (errno));
+ * result |= ret;
+ * }
+
+ * ret = flistxattr (fd, NULL, 0);
+ * if (ret <= 0) {
+ * fprintf (stderr, "flistxattr failed : %s\n", strerror
+ (errno));
+ * result |= ret;
+ * }
+
+ * ret = fgetxattr (fd, "trusted.xattr-test", NULL, 0);
+ * if (ret <= 0) {
+ * fprintf (stderr, "fgetxattr failed : %s\n", strerror
+ (errno));
+ * result |= ret;
+ * }
+
+ * ret = fremovexattr (fd, "trusted.xattr-test");
+ * if (ret < 0) {
+ * fprintf (stderr, "fremovexattr failed : %s\n", strerror
+ (errno));
+ * result |= ret;
+ * }
+ */
+
+ if (fd)
+ close(fd);
+ return result;
+}
+
+int
+fd_based_fops_2(char *filename)
+{
+ int fd = 0;
+ int ret = -1;
+ int result = 0;
+ struct stat stbuf = {
+ 0,
+ };
+ char wstr[50] = {
+ 0,
+ };
+ char rstr[50] = {
+ 0,
+ };
+
+ fd = open(filename, O_RDWR | O_CREAT, OPEN_MODE);
+ if (fd < 0) {
+ fprintf(stderr, "open failed : %s\n", strerror(errno));
+ return ret;
+ }
+
+ ret = ftruncate(fd, 0);
+ if (ret < 0) {
+ fprintf(stderr, "ftruncate failed : %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ strcpy(wstr, "This is my second string\n");
+ ret = write(fd, wstr, strlen(wstr));
+ if (ret < 0) {
+ fprintf(stderr, "write failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ lseek(fd, 0, SEEK_SET);
+ if (ret < 0) {
+ fprintf(stderr, "lseek failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = read(fd, rstr, strlen(wstr));
+ if (ret <= 0) {
+ fprintf(stderr, "read failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = memcmp(rstr, wstr, strlen(wstr));
+ if (ret != 0) {
+ fprintf(stderr, "read returning junk\n");
+ result |= ret;
+ }
+
+ ret = fstat(fd, &stbuf);
+ if (ret < 0) {
+ fprintf(stderr, "fstat failed : %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = fchmod(fd, 0640);
+ if (ret < 0) {
+ fprintf(stderr, "fchmod failed : %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = fchown(fd, 10001, 10001);
+ if (ret < 0) {
+ fprintf(stderr, "fchown failed : %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = fsync(fd);
+ if (ret < 0) {
+ fprintf(stderr, "fsync failed : %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = fsetxattr(fd, "trusted.xattr-test", "working", 8, 0);
+ if (ret < 0) {
+ fprintf(stderr, "fsetxattr failed : %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = fdatasync(fd);
+ if (ret < 0) {
+ fprintf(stderr, "fdatasync failed : %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = flistxattr(fd, NULL, 0);
+ if (ret <= 0) {
+ fprintf(stderr, "flistxattr failed : %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = fgetxattr(fd, "trusted.xattr-test", NULL, 0);
+ if (ret <= 0) {
+ fprintf(stderr, "fgetxattr failed : %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = fremovexattr(fd, "trusted.xattr-test");
+ if (ret < 0) {
+ fprintf(stderr, "fremovexattr failed : %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ if (fd)
+ close(fd);
+ unlink(filename);
+
+ return result;
+}
+
+int
+path_based_fops(char *filename)
+{
+ int ret = -1;
+ int fd = 0;
+ int result = 0;
+ struct stat stbuf = {
+ 0,
+ };
+ char newfilename[255] = {
+ 0,
+ };
+ char *hardlink = "linkfile-hard.txt";
+ char *symlnk = "linkfile-soft.txt";
+ char buf[1024] = {
+ 0,
+ };
+
+ fd = creat(filename, 0644);
+ if (fd < 0) {
+ fprintf(stderr, "creat failed: %s\n", strerror(errno));
+ return ret;
+ }
+
+ ret = truncate(filename, 0);
+ if (ret < 0) {
+ fprintf(stderr, "truncate failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = stat(filename, &stbuf);
+ if (ret < 0) {
+ fprintf(stderr, "stat failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = chmod(filename, 0640);
+ if (ret < 0) {
+ fprintf(stderr, "chmod failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = chown(filename, 10001, 10001);
+ if (ret < 0) {
+ fprintf(stderr, "chown failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = setxattr(filename, "trusted.xattr-test", "working", 8, 0);
+ if (ret < 0) {
+ fprintf(stderr, "setxattr failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = listxattr(filename, NULL, 0);
+ if (ret <= 0) {
+ ret = -1;
+ fprintf(stderr, "listxattr failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = getxattr(filename, "trusted.xattr-test", NULL, 0);
+ if (ret <= 0) {
+ fprintf(stderr, "getxattr failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = removexattr(filename, "trusted.xattr-test");
+ if (ret < 0) {
+ fprintf(stderr, "removexattr failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = access(filename, R_OK | W_OK);
+ if (ret < 0) {
+ fprintf(stderr, "access failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = link(filename, hardlink);
+ if (ret < 0) {
+ fprintf(stderr, "link failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+ unlink(hardlink);
+
+ ret = symlink(filename, symlnk);
+ if (ret < 0) {
+ fprintf(stderr, "symlink failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = readlink(symlnk, buf, sizeof(buf));
+ if (ret < 0) {
+ fprintf(stderr, "readlink failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+ unlink(symlnk);
+
+ /* Create a character special file */
+ ret = mknod("cspecial", S_IFCHR | S_IRWXU | S_IRWXG, makedev(2, 3));
+ if (ret < 0) {
+ fprintf(stderr, "cpsecial mknod failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+ unlink("cspecial");
+
+ ret = mknod("bspecial", S_IFBLK | S_IRWXU | S_IRWXG, makedev(4, 5));
+ if (ret < 0) {
+ fprintf(stderr, "bspecial mknod failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+ unlink("bspecial");
+
+#ifdef linux
+ ret = mknod("fifo", S_IFIFO | S_IRWXU | S_IRWXG, 0);
+#else
+ ret = mkfifo("fifo", 0);
+#endif
+ if (ret < 0) {
+ fprintf(stderr, "fifo mknod failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+ unlink("fifo");
+
+#ifdef linux
+ ret = mknod("sock", S_IFSOCK | S_IRWXU | S_IRWXG, 0);
+ if (ret < 0) {
+ fprintf(stderr, "sock mknod failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+#else
+ {
+ int s;
+ const char *pathname = "sock";
+ struct sockaddr_un addr;
+
+ s = socket(PF_LOCAL, SOCK_STREAM, 0);
+ memset(&addr, 0, sizeof(addr));
+ strncpy(addr.sun_path, pathname, sizeof(addr.sun_path));
+ ret = bind(s, (const struct sockaddr *)&addr, SUN_LEN(&addr));
+ if (ret < 0) {
+ fprintf(stderr, "fifo mknod failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+ close(s);
+ }
+#endif
+ unlink("sock");
+
+ strcpy(newfilename, filename);
+ strcat(newfilename, "_new");
+ ret = rename(filename, newfilename);
+ if (ret < 0) {
+ fprintf(stderr, "rename failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+ unlink(newfilename);
+
+ if (fd)
+ close(fd);
+
+ unlink(filename);
+ return result;
+}
+
+int
+dup_fd_based_fops(char *filename)
+{
+ int fd = 0;
+ int result = 0;
+ int newfd = 0;
+ int ret = -1;
+ struct stat stbuf = {
+ 0,
+ };
+ char wstr[50] = {
+ 0,
+ };
+ char rstr[50] = {
+ 0,
+ };
+
+ fd = open(filename, O_RDWR | O_CREAT, OPEN_MODE);
+ if (fd < 0) {
+ fprintf(stderr, "open failed : %s\n", strerror(errno));
+ return ret;
+ }
+
+ newfd = dup(fd);
+ if (newfd < 0) {
+ fprintf(stderr, "dup failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ close(fd);
+
+ strcpy(wstr, "This is my string\n");
+ ret = write(newfd, wstr, strlen(wstr));
+ if (ret <= 0) {
+ fprintf(stderr, "write failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = lseek(newfd, 0, SEEK_SET);
+ if (ret < 0) {
+ fprintf(stderr, "lseek failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = read(newfd, rstr, strlen(wstr));
+ if (ret <= 0) {
+ fprintf(stderr, "read failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = memcmp(rstr, wstr, strlen(wstr));
+ if (ret != 0) {
+ fprintf(stderr, "read returning junk\n");
+ result |= ret;
+ }
+
+ ret = ftruncate(newfd, 0);
+ if (ret < 0) {
+ fprintf(stderr, "ftruncate failed : %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = fstat(newfd, &stbuf);
+ if (ret < 0) {
+ fprintf(stderr, "fstat failed : %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = fchmod(newfd, 0640);
+ if (ret < 0) {
+ fprintf(stderr, "fchmod failed : %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = fchown(newfd, 10001, 10001);
+ if (ret < 0) {
+ fprintf(stderr, "fchown failed : %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = fsync(newfd);
+ if (ret < 0) {
+ fprintf(stderr, "fsync failed : %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = fsetxattr(newfd, "trusted.xattr-test", "working", 8, 0);
+ if (ret < 0) {
+ fprintf(stderr, "fsetxattr failed : %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = fdatasync(newfd);
+ if (ret < 0) {
+ fprintf(stderr, "fdatasync failed : %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = flistxattr(newfd, NULL, 0);
+ if (ret <= 0) {
+ fprintf(stderr, "flistxattr failed : %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = fgetxattr(newfd, "trusted.xattr-test", NULL, 0);
+ if (ret <= 0) {
+ fprintf(stderr, "fgetxattr failed : %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = fremovexattr(newfd, "trusted.xattr-test");
+ if (ret < 0) {
+ fprintf(stderr, "fremovexattr failed : %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ if (newfd)
+ close(newfd);
+ ret = unlink(filename);
+ if (ret < 0) {
+ fprintf(stderr, "unlink failed : %s\n", strerror(errno));
+ result |= ret;
+ }
+ return result;
+}
+
+int
+dir_based_fops(char *dirname)
+{
+ int ret = -1;
+ int result = 0;
+ DIR *dp = NULL;
+ char buff[255] = {
+ 0,
+ };
+ struct dirent *dbuff = {
+ 0,
+ };
+ struct stat stbuff = {
+ 0,
+ };
+ char newdname[255] = {
+ 0,
+ };
+ char *cwd = NULL;
+
+ ret = mkdir(dirname, 0755);
+ if (ret < 0) {
+ fprintf(stderr, "mkdir failed: %s\n", strerror(errno));
+ return ret;
+ }
+
+ dp = opendir(dirname);
+ if (dp == NULL) {
+ fprintf(stderr, "opendir failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ dbuff = readdir(dp);
+ if (NULL == dbuff) {
+ fprintf(stderr, "readdir failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = closedir(dp);
+ if (ret < 0) {
+ fprintf(stderr, "closedir failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = stat(dirname, &stbuff);
+ if (ret < 0) {
+ fprintf(stderr, "stat failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = chmod(dirname, 0744);
+ if (ret < 0) {
+ fprintf(stderr, "chmod failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = chown(dirname, 10001, 10001);
+ if (ret < 0) {
+ fprintf(stderr, "chmod failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = setxattr(dirname, "trusted.xattr-test", "working", 8, 0);
+ if (ret < 0) {
+ fprintf(stderr, "setxattr failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = listxattr(dirname, NULL, 0);
+ if (ret <= 0) {
+ ret = -1;
+ fprintf(stderr, "listxattr failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = getxattr(dirname, "trusted.xattr-test", NULL, 0);
+ if (ret <= 0) {
+ ret = -1;
+ fprintf(stderr, "getxattr failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = removexattr(dirname, "trusted.xattr-test");
+ if (ret < 0) {
+ fprintf(stderr, "removexattr failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ strcpy(newdname, dirname);
+ strcat(newdname, "/../");
+ ret = chdir(newdname);
+ if (ret < 0) {
+ fprintf(stderr, "chdir failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ cwd = getcwd(buff, 255);
+ if (NULL == cwd) {
+ fprintf(stderr, "getcwd failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ strcpy(newdname, dirname);
+ strcat(newdname, "new");
+ ret = rename(dirname, newdname);
+ if (ret < 0) {
+ fprintf(stderr, "rename failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = rmdir(newdname);
+ if (ret < 0) {
+ fprintf(stderr, "rmdir failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ rmdir(dirname);
+ return result;
+}
+
+int
+link_based_fops(char *filename)
+{
+ int ret = -1;
+ int result = 0;
+ int fd = 0;
+ char newname[255] = {
+ 0,
+ };
+ char linkname[255] = {
+ 0,
+ };
+ struct stat lstbuf = {
+ 0,
+ };
+
+ fd = creat(filename, 0644);
+ if (fd < 0) {
+ fd = 0;
+ fprintf(stderr, "creat failed: %s\n", strerror(errno));
+ return ret;
+ }
+
+ strcpy(newname, filename);
+ strcat(newname, "_hlink");
+ ret = link(filename, newname);
+ if (ret < 0) {
+ fprintf(stderr, "link failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = unlink(filename);
+ if (ret < 0) {
+ fprintf(stderr, "unlink failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ strcpy(linkname, filename);
+ strcat(linkname, "_slink");
+ ret = symlink(newname, linkname);
+ if (ret < 0) {
+ fprintf(stderr, "symlink failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = lstat(linkname, &lstbuf);
+ if (ret < 0) {
+ fprintf(stderr, "lstbuf failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = lchown(linkname, 10001, 10001);
+ if (ret < 0) {
+ fprintf(stderr, "lchown failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = lsetxattr(linkname, "trusted.lxattr-test", "working", 8, 0);
+ if (ret < 0) {
+ fprintf(stderr, "lsetxattr failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = llistxattr(linkname, NULL, 0);
+ if (ret < 0) {
+ ret = -1;
+ fprintf(stderr, "llistxattr failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = lgetxattr(linkname, "trusted.lxattr-test", NULL, 0);
+ if (ret < 0) {
+ ret = -1;
+ fprintf(stderr, "lgetxattr failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = lremovexattr(linkname, "trusted.lxattr-test");
+ if (ret < 0) {
+ fprintf(stderr, "lremovexattr failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ if (fd)
+ close(fd);
+ unlink(linkname);
+ unlink(newname);
+ return result;
+}
+
+int
+test_open_modes(char *filename)
+{
+ int ret = -1;
+ int result = 0;
+
+ ret = generic_open_read_write(filename, O_CREAT | O_WRONLY, OPEN_MODE);
+ if (ret != 0) {
+ fprintf(stderr, "flag O_CREAT|O_WRONLY failed: \n");
+ result |= ret;
+ }
+
+ ret = generic_open_read_write(filename, O_CREAT | O_RDWR, OPEN_MODE);
+ if (ret != 0) {
+ fprintf(stderr, "flag O_CREAT|O_RDWR failed\n");
+ result |= ret;
+ }
+
+ ret = generic_open_read_write(filename, O_CREAT | O_RDONLY, OPEN_MODE);
+ if (ret != 0) {
+ fprintf(stderr, "flag O_CREAT|O_RDONLY failed\n");
+ result |= ret;
+ }
+
+ ret = creat(filename, 0644);
+ close(ret);
+ ret = generic_open_read_write(filename, O_WRONLY, 0);
+ if (ret != 0) {
+ fprintf(stderr, "flag O_WRONLY failed\n");
+ result |= ret;
+ }
+
+ ret = creat(filename, 0644);
+ close(ret);
+ ret = generic_open_read_write(filename, O_RDWR, 0);
+ if (0 != ret) {
+ fprintf(stderr, "flag O_RDWR failed\n");
+ result |= ret;
+ }
+
+ ret = creat(filename, 0644);
+ close(ret);
+ ret = generic_open_read_write(filename, O_RDONLY, 0);
+ if (0 != ret) {
+ fprintf(stderr, "flag O_RDONLY failed\n");
+ result |= ret;
+ }
+
+ ret = creat(filename, 0644);
+ close(ret);
+ ret = generic_open_read_write(filename, O_TRUNC | O_WRONLY, 0);
+ if (0 != ret) {
+ fprintf(stderr, "flag O_TRUNC|O_WRONLY failed\n");
+ result |= ret;
+ }
+
+#if 0 /* undefined behaviour, unable to reliably test */
+ ret = creat (filename, 0644);
+ close (ret);
+ ret = generic_open_read_write (filename, O_TRUNC|O_RDONLY, 0);
+ if (0 != ret) {
+ fprintf (stderr, "flag O_TRUNC|O_RDONLY failed\n");
+ result |= ret;
+ }
+#endif
+
+ ret = generic_open_read_write(filename, O_CREAT | O_RDWR | O_SYNC,
+ OPEN_MODE);
+ if (0 != ret) {
+ fprintf(stderr, "flag O_CREAT|O_RDWR|O_SYNC failed\n");
+ result |= ret;
+ }
+
+ ret = creat(filename, 0644);
+ close(ret);
+ ret = generic_open_read_write(filename, O_CREAT | O_EXCL, OPEN_MODE);
+ if (0 != ret) {
+ fprintf(stderr, "flag O_CREAT|O_EXCL failed\n");
+ result |= ret;
+ }
+
+ return result;
+}
+
+int
+generic_open_read_write(char *filename, int flag, mode_t mode)
+{
+ int fd = 0;
+ int ret = -1;
+ char wstring[50] = {
+ 0,
+ };
+ char rstring[50] = {
+ 0,
+ };
+
+ fd = open(filename, flag, mode);
+ if (fd < 0) {
+ if (flag == (O_CREAT | O_EXCL) && errno == EEXIST) {
+ unlink(filename);
+ return 0;
+ } else {
+ fprintf(stderr, "open failed: %s\n", strerror(errno));
+ return -1;
+ }
+ }
+
+ strcpy(wstring, "My string to write\n");
+ ret = write(fd, wstring, strlen(wstring));
+ if (ret <= 0) {
+ if (errno != EBADF) {
+ fprintf(stderr, "write failed: %s\n", strerror(errno));
+ close(fd);
+ unlink(filename);
+ return ret;
+ }
+ }
+
+ ret = lseek(fd, 0, SEEK_SET);
+ if (ret < 0) {
+ close(fd);
+ unlink(filename);
+ return ret;
+ }
+
+ ret = read(fd, rstring, strlen(wstring));
+ if (ret < 0 && flag != (O_CREAT | O_WRONLY) && flag != O_WRONLY &&
+ flag != (O_TRUNC | O_WRONLY)) {
+ close(fd);
+ unlink(filename);
+ return ret;
+ }
+
+ /* Compare the rstring with wstring. But we do not want to return
+ * error when the flag is either O_RDONLY, O_CREAT|O_RDONLY or
+ * O_TRUNC|O_RDONLY. Because in that case we are not writing
+ * anything to the file.*/
+
+ ret = memcmp(wstring, rstring, strlen(wstring));
+ if (0 != ret && flag != (O_TRUNC | O_WRONLY) && flag != O_WRONLY &&
+ flag != (O_CREAT | O_WRONLY) &&
+ !(flag == (O_CREAT | O_RDONLY) || flag == O_RDONLY ||
+ flag == (O_TRUNC | O_RDONLY))) {
+ fprintf(stderr, "read is returning junk\n");
+ close(fd);
+ unlink(filename);
+ return ret;
+ }
+
+ close(fd);
+ unlink(filename);
+ return 0;
+}
diff --git a/tests/basic/fops-sanity.t b/tests/basic/fops-sanity.t
new file mode 100755
index 00000000000..8962f3a2fba
--- /dev/null
+++ b/tests/basic/fops-sanity.t
@@ -0,0 +1,27 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+#mount on a random dir
+TEST glusterfs --entry-timeout=3600 --attribute-timeout=3600 -s $H0 --volfile-id=$V0 $M0 --direct-io-mode=yes
+
+build_tester $(dirname $0)/fops-sanity.c
+
+TEST cp $(dirname $0)/fops-sanity $M0
+cd $M0
+TEST ./fops-sanity $V0
+cd -
+TEST rm -f $(dirname $0)/fops-sanity
+cleanup;
diff --git a/tests/basic/fuse/Makefile b/tests/basic/fuse/Makefile
new file mode 100644
index 00000000000..c446d253228
--- /dev/null
+++ b/tests/basic/fuse/Makefile
@@ -0,0 +1,12 @@
+CFLAGS = -Wall -g
+LDFLAGS =
+
+BINARIES = seek
+
+%: %.c
+ $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^
+
+all: $(BINARIES)
+
+clean:
+ -$(RM) $(BINARIES)
diff --git a/tests/basic/fuse/active-io-graph-switch.t b/tests/basic/fuse/active-io-graph-switch.t
new file mode 100644
index 00000000000..6ec3e1fcbfa
--- /dev/null
+++ b/tests/basic/fuse/active-io-graph-switch.t
@@ -0,0 +1,65 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+TESTS_EXPECTED_IN_LOOP=12
+
+function perform_io_on_mount {
+ local m="$1"
+ local f="$2"
+ local lockfile="$3"
+ while [ -f "$m/$lockfile" ];
+ do
+ dd if=/dev/zero of=$m/$f bs=1M count=1
+ done
+}
+
+function perform_graph_switch {
+ for i in {1..3}
+ do
+ TEST_IN_LOOP $CLI volume set $V0 performance.stat-prefetch off
+ sleep 3
+ TEST_IN_LOOP $CLI volume set $V0 performance.stat-prefetch on
+ sleep 3
+ done
+}
+
+function count_files {
+ ls $M0 | wc -l
+}
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 flush-behind off
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST touch $M0/lock
+for i in {1..100}; do perform_io_on_mount $M0 $i lock & done
+EXPECT_WITHIN 5 "101" count_files
+
+perform_graph_switch
+TEST rm -f $M0/lock
+wait
+EXPECT "100" count_files
+TEST rm -f $M0/{1..100}
+EXPECT "0" count_files
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+#Repeat the tests with reader-thread-count
+TEST $GFS --reader-thread-count=10 --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST touch $M0/lock
+for i in {1..100}; do perform_io_on_mount $M0 $i lock & done
+EXPECT_WITHIN 5 "101" count_files
+
+perform_graph_switch
+TEST rm -f $M0/lock
+wait
+EXPECT "100" count_files
+TEST rm -f $M0/{1..100}
+EXPECT "0" count_files
+
+cleanup
diff --git a/tests/basic/fuse/seek.c b/tests/basic/fuse/seek.c
new file mode 100644
index 00000000000..30943ad0f33
--- /dev/null
+++ b/tests/basic/fuse/seek.c
@@ -0,0 +1,82 @@
+/* seek.c - use lseek() to find holes in a file
+ *
+ * Author: Niels de Vos <ndevos@redhat.com>
+ */
+
+/* needed for SEEK_HOLE/SEEK_DATA */
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+int
+main(int argc, char **argv)
+{
+ int ret = EXIT_SUCCESS;
+ int fd = -1;
+ char *filename = NULL;
+ struct stat st = {
+ 0,
+ };
+ off_t hole_start = 0;
+ off_t hole_end = 0;
+
+ if (argc != 2) {
+ fprintf(stderr, "Invalid argument, use %s <file>\n", argv[0]);
+ return EXIT_FAILURE;
+ }
+
+ filename = argv[1];
+
+ fd = open(filename, O_RDONLY);
+ if (fd <= 0) {
+ perror("open");
+ return EXIT_FAILURE;
+ }
+
+ if (fstat(fd, &st)) {
+ perror("fstat");
+ return EXIT_FAILURE;
+ }
+
+ while (hole_end < st.st_size) {
+ hole_start = lseek(fd, hole_end, SEEK_HOLE);
+ if (hole_start == -1 && errno == ENXIO) {
+ /* no more holes */
+ break;
+ } else if (hole_start == -1 && errno == ENOTSUP) {
+ /* SEEK_HOLE is not supported */
+ perror("lseek(SEEK_HOLE)");
+ ret = EXIT_FAILURE;
+ break;
+ } else if (hole_start == -1) {
+ perror("no more holes");
+ break;
+ }
+
+ hole_end = lseek(fd, hole_start, SEEK_DATA);
+ if (hole_end == -1 && errno == ENXIO) {
+ /* no more data */
+ break;
+ } else if (hole_end == -1 && errno == ENOTSUP) {
+ /* SEEK_DATA is not supported */
+ perror("lseek(SEEK_DATA)");
+ ret = EXIT_FAILURE;
+ break;
+ }
+
+ printf("HOLE found: %ld - %ld%s\n", hole_start, hole_end,
+ (hole_end == st.st_size) ? " (EOF)" : "");
+ }
+
+ close(fd);
+
+ return ret;
+}
diff --git a/tests/basic/geo-replication/marker-xattrs.t b/tests/basic/geo-replication/marker-xattrs.t
new file mode 100755
index 00000000000..7e5ea8eebec
--- /dev/null
+++ b/tests/basic/geo-replication/marker-xattrs.t
@@ -0,0 +1,80 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+TEST glusterd
+TEST pidof glusterd
+
+## Start and create a replicated volume
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}-{0,1,2,3,4,5}
+
+TEST $CLI volume set $V0 indexing on
+
+TEST $CLI volume start $V0;
+
+## Mount native
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+
+## Mount client-pid=-1
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 --client-pid=-1 $M1
+
+TEST touch $M0
+
+vol_uuid=$(get_volume_mark $M1)
+xtime=trusted.glusterfs.$vol_uuid.xtime
+
+TEST "getfattr -n $xtime $B0/${V0}-1 | grep -q ${xtime}="
+
+TEST kill_brick $V0 $H0 $B0/${V0}-0
+
+TEST "getfattr -n $xtime $B0/${V0}-1 | grep -q ${xtime}="
+
+TEST getfattr -d -m. -e hex $M1
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M1
+
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+
+cleanup
+TEST glusterd
+TEST pidof glusterd
+## Start and create a disperse volume
+TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}-{0,1,2}
+
+TEST $CLI volume set $V0 indexing on
+
+TEST $CLI volume start $V0;
+
+## Mount native
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" mount_get_option_value $M0 $V0-disperse-0 childs_up
+
+## Mount client-pid=-1
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 --client-pid=-1 $M1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" mount_get_option_value $M1 $V0-disperse-0 childs_up
+
+TEST touch $M0
+
+vol_uuid=$(get_volume_mark $M1)
+xtime=trusted.glusterfs.$vol_uuid.xtime
+stime=trusted.glusterfs.$vol_uuid.stime
+
+stime_val=$(getfattr -e hex -n $xtime $B0/${V0}-1 | grep ${xtime}= | cut -f2 -d'=')
+TEST "setfattr -n $stime -v $stime_val $B0/${V0}-1"
+TEST "getfattr -n $xtime $B0/${V0}-1 | grep -q ${xtime}="
+
+TEST kill_brick $V0 $H0 $B0/${V0}-0
+
+TEST "getfattr -n $xtime $B0/${V0}-1 | grep -q ${xtime}="
+TEST "getfattr -n $stime $M1 | grep -q ${stime}="
+
+TEST getfattr -d -m. -e hex $M1
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M1
+
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+
+cleanup
diff --git a/tests/basic/gfapi/Makefile b/tests/basic/gfapi/Makefile
new file mode 100644
index 00000000000..1c5cf03ca3d
--- /dev/null
+++ b/tests/basic/gfapi/Makefile
@@ -0,0 +1,22 @@
+## compiles against the *system* version of libgfapi,
+## but not the libgfapi for the testcases
+
+CFLAGS = -Wall -g $(shell pkg-config --cflags glusterfs-api)
+LDFLAGS = $(shell pkg-config --libs glusterfs-api)
+
+BINARIES = upcall-cache-invalidate libgfapi-fini-hang anonymous_fd seek \
+ bug1283983 bug1291259 gfapi-ssl-test gfapi-load-volfile \
+ mandatory-lock-optimal
+
+%: %.c
+ $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^
+
+all: check-pkgconfig $(BINARIES)
+
+clean:
+ -$(RM) $(BINARIES)
+
+.phony: check-pkgconfig
+
+check-pkgconfig:
+ pkg-config --exists glusterfs-api
diff --git a/tests/basic/gfapi/anonymous_fd.t b/tests/basic/gfapi/anonymous_fd.t
new file mode 100755
index 00000000000..bc0fb0fa2f0
--- /dev/null
+++ b/tests/basic/gfapi/anonymous_fd.t
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+logdir=`gluster --print-logdir`
+
+TEST build_tester $(dirname $0)/anonymous_fd_read_write.c -lgfapi -o $(dirname $0)/anonymous_fd
+TEST ./$(dirname $0)/anonymous_fd $H0 $V0 $logdir/anonymous_fd.log
+
+cleanup_tester $(dirname $0)/anonymous_fd
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/basic/gfapi/anonymous_fd_read_write.c b/tests/basic/gfapi/anonymous_fd_read_write.c
new file mode 100644
index 00000000000..fc276ca4310
--- /dev/null
+++ b/tests/basic/gfapi/anonymous_fd_read_write.c
@@ -0,0 +1,106 @@
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+#include <limits.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+#define LOG_ERR(func, ret) \
+ do { \
+ if (ret != 0) { \
+ fprintf(stderr, "%s : returned error %d (%s)\n", func, ret, \
+ strerror(errno)); \
+ goto out; \
+ } else { \
+ fprintf(stderr, "%s : returned %d\n", func, ret); \
+ } \
+ } while (0)
+
+int
+main(int argc, char *argv[])
+{
+ int ret = 0;
+ glfs_t *fs = NULL;
+ struct glfs_object *root = NULL, *file_obj = NULL;
+ struct stat sb = {
+ 0,
+ };
+ char readbuf[32], writebuf[32];
+ char *filename = "file.txt";
+ char *logfile = NULL;
+ char *volname = NULL;
+ char *hostname = NULL;
+
+ if (argc != 4) {
+ fprintf(stderr, "Invalid argument\n");
+ exit(1);
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+ logfile = argv[3];
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL\n");
+ ret = -1;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ LOG_ERR("glfs_set_volfile_server", ret);
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ LOG_ERR("glfs_set_logging", ret);
+
+ ret = glfs_init(fs);
+ LOG_ERR("glfs_init", ret);
+
+ root = glfs_h_lookupat(fs, NULL, "/", &sb, 0);
+ if (root == NULL) {
+ fprintf(stderr, "glfs_h_lookupat: error on lookup of / ,%s\n",
+ strerror(errno));
+ goto out;
+ }
+
+ file_obj = glfs_h_creat(fs, root, filename, O_CREAT, 0644, &sb);
+ if (file_obj == NULL) {
+ fprintf(stderr, "glfs_h_creat: error on create of %s: from (%p),%s\n",
+ filename, root, strerror(errno));
+ goto out;
+ }
+
+ /* test read/write based on anonymous fd */
+ memcpy(writebuf, "abcdefghijklmnopqrstuvwxyz012345", 32);
+
+ ret = glfs_h_anonymous_write(fs, file_obj, writebuf, 32, 0);
+ if (ret < 0)
+ LOG_ERR("glfs_h_anonymous_write", ret);
+
+ ret = glfs_h_anonymous_read(fs, file_obj, readbuf, 32, 0);
+ if (ret < 0)
+ LOG_ERR("glfs_h_anonymous_read", ret);
+
+ if (memcmp(readbuf, writebuf, 32)) {
+ fprintf(stderr, "Failed to read what I wrote: %s %s\n", readbuf,
+ writebuf);
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (file_obj)
+ glfs_h_close(file_obj);
+
+ if (fs) {
+ ret = glfs_fini(fs);
+ fprintf(stderr, "glfs_fini(fs) returned %d \n", ret);
+ }
+ if (ret)
+ exit(1);
+ exit(0);
+}
diff --git a/tests/basic/gfapi/bug-1241104.c b/tests/basic/gfapi/bug-1241104.c
new file mode 100644
index 00000000000..78c87595a71
--- /dev/null
+++ b/tests/basic/gfapi/bug-1241104.c
@@ -0,0 +1,93 @@
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+#include <limits.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+#include <sys/wait.h>
+
+int gfapi = 1;
+
+#define LOG_ERR(func, ret) \
+ do { \
+ if (ret != 0) { \
+ fprintf(stderr, "%s : returned error %d (%s)\n", func, ret, \
+ strerror(errno)); \
+ goto out; \
+ } else { \
+ fprintf(stderr, "%s : returned %d\n", func, ret); \
+ } \
+ } while (0)
+
+int
+main(int argc, char *argv[])
+{
+ glfs_t *fs = NULL;
+ int ret = 0, i, status = 0;
+ glfs_fd_t *fd = NULL;
+ char *filename = "file_tmp";
+ char *volname = NULL;
+ char *logfile = NULL;
+ char *hostname = NULL;
+ struct flock lock = {
+ 0,
+ };
+
+ if (argc != 4) {
+ fprintf(stderr, "Invalid argument\n");
+ exit(1);
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+ logfile = argv[3];
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL\n");
+ return -1;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ LOG_ERR("glfs_set_volfile_server", ret);
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ LOG_ERR("glfs_set_logging", ret);
+
+ ret = glfs_init(fs);
+ LOG_ERR("glfs_init", ret);
+
+ fd = glfs_creat(fs, filename, O_RDWR | O_SYNC, 0644);
+ if (fd <= 0) {
+ ret = -1;
+ LOG_ERR("glfs_creat", ret);
+ }
+ fprintf(stderr, "glfs-create fd - %d\n", fd);
+
+ /* validate locks for negative range */
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 10;
+ lock.l_len = -9;
+
+ ret = glfs_posix_lock(fd, F_SETLK, &lock);
+ LOG_ERR("glfs_posix_lock", ret);
+
+err:
+ glfs_close(fd);
+ LOG_ERR("glfs_close", ret);
+
+out:
+ if (fs) {
+ ret = glfs_fini(fs);
+ fprintf(stderr, "glfs_fini(fs) returned %d \n", ret);
+ }
+
+ if (ret)
+ exit(1);
+ exit(0);
+}
diff --git a/tests/basic/gfapi/bug-1241104.t b/tests/basic/gfapi/bug-1241104.t
new file mode 100755
index 00000000000..e7f4759c3d5
--- /dev/null
+++ b/tests/basic/gfapi/bug-1241104.t
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+logdir=`gluster --print-logdir`
+
+## Enable Upcall cache-invalidation feature
+TEST $CLI volume set $V0 features.cache-invalidation on;
+
+TEST build_tester $(dirname $0)/bug-1241104.c -lgfapi
+
+TEST ./$(dirname $0)/bug-1241104 $H0 $V0 $logdir/bug-1241104.log
+
+cleanup_tester $(dirname $0)/bug-1241104
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/basic/gfapi/bug-1507896.c b/tests/basic/gfapi/bug-1507896.c
new file mode 100644
index 00000000000..1cc20849c2b
--- /dev/null
+++ b/tests/basic/gfapi/bug-1507896.c
@@ -0,0 +1,49 @@
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+#define VALIDATE_AND_GOTO_LABEL_ON_ERROR(func, ret, label) \
+ do { \
+ if (ret < 0) { \
+ fprintf(stderr, "%s : returned error %d (%s)\n", func, ret, \
+ strerror(errno)); \
+ goto label; \
+ } \
+ } while (0)
+
+int
+main(int argc, char *argv[])
+{
+ int ret = -1;
+ glfs_t *fs = NULL;
+ char *volname = NULL;
+ char *logfile = NULL;
+ char *hostname = NULL;
+
+ hostname = argv[1];
+ volname = argv[2];
+ logfile = argv[3];
+
+ fs = glfs_new(volname);
+ if (!fs)
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_new(fs)", ret, out);
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_set_volfile_server(fs)", ret, out);
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_set_logging(fs)", ret, out);
+
+ ret = glfs_init(fs);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_init(fs)", ret, out);
+
+out:
+ if (fs) {
+ ret = glfs_fini(fs);
+ if (ret)
+ fprintf(stderr, "glfs_fini(fs) returned %d\n", ret);
+ }
+ return ret;
+}
diff --git a/tests/basic/gfapi/bug-1507896.t b/tests/basic/gfapi/bug-1507896.t
new file mode 100644
index 00000000000..4764e650232
--- /dev/null
+++ b/tests/basic/gfapi/bug-1507896.t
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+logdir=`gluster --print-logdir`
+
+TEST build_tester $(dirname $0)/bug-1507896.c -lgfapi
+
+TEST ./$(dirname $0)/bug-1507896 $H0 $V0 $logdir/bug-1507896.log
+
+#volume name precedding with '/'
+TEST ! ./$(dirname $0)/bug-1507896 $H0 /$V0 $logdir/bug-1507896.log
+
+#volume name passed with any special characters
+TEST ! ./$(dirname $0)/bug-1507896 $H0 test@_$V0 $logdir/bug-1507896.log
+
+cleanup_tester $(dirname $0)/bug-1507896
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/basic/gfapi/bug1283983.c b/tests/basic/gfapi/bug1283983.c
new file mode 100644
index 00000000000..b920013d0e0
--- /dev/null
+++ b/tests/basic/gfapi/bug1283983.c
@@ -0,0 +1,122 @@
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+#include <limits.h>
+#include <string.h>
+#include <stdio.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+int gfapi = 1;
+
+#define LOG_ERR(func, ret) \
+ do { \
+ if (ret != 0) { \
+ fprintf(stderr, "%s : returned error ret(%d), errno(%d)\n", func, \
+ ret, errno); \
+ exit(1); \
+ } else { \
+ fprintf(stderr, "%s : returned %d\n", func, ret); \
+ } \
+ } while (0)
+#define LOG_IF_NO_ERR(func, ret) \
+ do { \
+ if (ret == 0) { \
+ fprintf(stderr, "%s : hasn't returned error %d\n", func, ret); \
+ exit(1); \
+ } else { \
+ fprintf(stderr, "%s : returned %d\n", func, ret); \
+ } \
+ } while (0)
+int
+main(int argc, char *argv[])
+{
+ glfs_t *fs = NULL;
+ int ret = 0, i;
+ glfs_fd_t *fd = NULL;
+ char *filename = "/a1";
+ char *filename2 = "/a2";
+ struct stat sb = {
+ 0,
+ };
+ struct glfs_upcall *cbk = NULL;
+ char *logfile = NULL;
+ char *volname = NULL;
+ int cnt = 1;
+ struct glfs_upcall_inode *in_arg = NULL;
+ struct glfs_object *root = NULL, *leaf = NULL;
+
+ fprintf(stderr, "Starting libgfapi_fini\n");
+ if (argc != 4) {
+ fprintf(stderr, "Invalid argument\n");
+ exit(1);
+ }
+
+ hostname = argv[1] volname = argv[2];
+ logfile = argv[3];
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL\n");
+ return 1;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ LOG_ERR("glfs_set_volfile_server", ret);
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ LOG_ERR("glfs_set_logging", ret);
+
+ ret = glfs_init(fs);
+ LOG_ERR("glfs_init", ret);
+
+ sleep(2);
+ root = glfs_h_lookupat(fs, NULL, "/", &sb, 0);
+ if (!root) {
+ ret = -1;
+ LOG_ERR("glfs_h_lookupat root", ret);
+ }
+ leaf = glfs_h_lookupat(fs, root, filename, &sb, 0);
+ if (!leaf) {
+ ret = -1;
+ LOG_IF_NO_ERR("glfs_h_lookupat leaf", ret);
+ }
+
+ leaf = glfs_h_creat(fs, root, filename, O_RDWR, 0644, &sb);
+ if (!leaf) {
+ ret = -1;
+ LOG_ERR("glfs_h_lookupat leaf", ret);
+ }
+ fprintf(stderr, "glfs_h_create leaf - %p\n", leaf);
+
+ leaf = glfs_h_lookupat(fs, root, filename2, &sb, 0);
+ if (!leaf) {
+ ret = -1;
+ LOG_IF_NO_ERR("glfs_h_lookupat leaf", ret);
+ }
+
+ ret = glfs_h_rename(fs, root, filename, root, filename2);
+ LOG_ERR("glfs_rename", ret);
+
+ while (cnt++ < 5) {
+ ret = glfs_h_poll_upcall(fs, &cbk);
+ LOG_ERR("glfs_h_poll_upcall", ret);
+
+ /* There should not be any upcalls sent */
+ if (glfs_upcall_get_reason(cbk) != GLFS_UPCALL_EVENT_NULL) {
+ fprintf(stderr, "Error: Upcall received(%d)\n",
+ glfs_upcall_get_reason(cbk));
+ exit(1);
+ }
+
+ glfs_free(cbk);
+ }
+
+ ret = glfs_fini(fs);
+ LOG_ERR("glfs_fini", ret);
+
+ fprintf(stderr, "End of libgfapi_fini\n");
+
+ exit(0);
+}
diff --git a/tests/basic/gfapi/bug1283983.sh b/tests/basic/gfapi/bug1283983.sh
new file mode 100755
index 00000000000..931a0b55935
--- /dev/null
+++ b/tests/basic/gfapi/bug1283983.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+logdir=`gluster --print-logdir`
+
+## Enable Upcall cache-invalidation feature
+TEST $CLI volume set $V0 features.cache-invalidation on;
+
+build_tester $(dirname $0)/bug1283983.c -lgfapi
+
+TEST ./$(dirname $0)/bug1283983 $H0 $V0 $logdir/bug1283983.log
+
+## There shouldn't be any NULL gfid messages logged
+TEST ! cat $logdir/bug1283983.log | grep "upcall" | grep "00000000-0000-0000-0000-000000000000"
+
+cleanup_tester $(dirname $0)/bug1283983
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/basic/gfapi/bug1291259.c b/tests/basic/gfapi/bug1291259.c
new file mode 100644
index 00000000000..cd7bc65268b
--- /dev/null
+++ b/tests/basic/gfapi/bug1291259.c
@@ -0,0 +1,181 @@
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+#include <limits.h>
+#include <string.h>
+#include <stdio.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+int gfapi = 1;
+
+#define LOG_ERR(func, ret) \
+ do { \
+ if (ret != 0) { \
+ fprintf(stderr, "%s : returned error ret(%d), errno(%d)\n", func, \
+ ret, errno); \
+ exit(1); \
+ } else { \
+ fprintf(stderr, "%s : returned %d\n", func, ret); \
+ } \
+ } while (0)
+#define LOG_IF_NO_ERR(func, ret) \
+ do { \
+ if (ret == 0) { \
+ fprintf(stderr, "%s : hasn't returned error %d\n", func, ret); \
+ exit(1); \
+ } else { \
+ fprintf(stderr, "%s : returned %d\n", func, ret); \
+ } \
+ } while (0)
+
+#define GLAPI_UUID_LENGTH 16
+int
+main(int argc, char *argv[])
+{
+ glfs_t *fs = NULL;
+ glfs_t *fs2 = NULL;
+ int ret = 0, i;
+ glfs_fd_t *fd = NULL;
+ char *filename = "/a1";
+ char *filename2 = "/a2";
+ struct stat sb = {
+ 0,
+ };
+ char *logfile = NULL;
+ char *volname = NULL;
+ char *hostname = NULL;
+ int cnt = 1;
+ int upcall_received = 0;
+ struct glfs_upcall *cbk = NULL;
+ struct glfs_object *root = NULL, *leaf = NULL;
+ unsigned char globjhdl[GFAPI_HANDLE_LENGTH];
+ unsigned char globjhdl2[GFAPI_HANDLE_LENGTH];
+
+ fprintf(stderr, "Starting libgfapi_fini\n");
+ if (argc != 4) {
+ fprintf(stderr, "Invalid argument\n");
+ exit(1);
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+ logfile = argv[3];
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL\n");
+ return 1;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ LOG_ERR("glfs_set_volfile_server", ret);
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ LOG_ERR("glfs_set_logging", ret);
+
+ ret = glfs_init(fs);
+ LOG_ERR("glfs_init", ret);
+
+ /* This does not block, but enables caching of events. Real
+ * applications like NFS-Ganesha run this in a thread before activity
+ * on the fs (through this instance) happens. */
+ ret = glfs_h_poll_upcall(fs, &cbk);
+ LOG_ERR("glfs_h_poll_upcall", ret);
+
+ fs2 = glfs_new(volname);
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL\n");
+ return 1;
+ }
+
+ ret = glfs_set_volfile_server(fs2, "tcp", hostname, 24007);
+ LOG_ERR("glfs_set_volfile_server", ret);
+
+ ret = glfs_set_logging(fs2, logfile, 7);
+ LOG_ERR("glfs_set_logging", ret);
+
+ ret = glfs_init(fs2);
+ LOG_ERR("glfs_init", ret);
+
+ sleep(2);
+ root = glfs_h_lookupat(fs, NULL, "/", &sb, 0);
+ if (!root) {
+ ret = -1;
+ LOG_ERR("glfs_h_lookupat root", ret);
+ }
+ leaf = glfs_h_lookupat(fs, root, filename, &sb, 0);
+ if (!leaf) {
+ ret = -1;
+ LOG_IF_NO_ERR("glfs_h_lookupat leaf", ret);
+ }
+
+ root = glfs_h_lookupat(fs2, NULL, "/", &sb, 0);
+ if (!root) {
+ ret = -1;
+ LOG_ERR("glfs_h_lookupat root", ret);
+ }
+ leaf = glfs_h_creat(fs2, root, filename, O_RDWR, 0644, &sb);
+ if (!leaf) {
+ ret = -1;
+ LOG_ERR("glfs_h_lookupat leaf", ret);
+ }
+ fprintf(stderr, "glfs_h_create leaf - %p\n", leaf);
+
+ while (cnt++ < 5 && !upcall_received) {
+ enum glfs_upcall_reason reason = 0;
+ struct glfs_upcall_inode *in_arg = NULL;
+
+ ret = glfs_h_poll_upcall(fs, &cbk);
+ LOG_ERR("glfs_h_poll_upcall", ret);
+ if (ret)
+ goto retry;
+
+ reason = glfs_upcall_get_reason(cbk);
+ fprintf(stderr, "Upcall received(%d)\n", reason);
+
+ if (reason == GLFS_UPCALL_INODE_INVALIDATE) {
+ struct glfs_object *object = NULL;
+
+ in_arg = glfs_upcall_get_event(cbk);
+ object = glfs_upcall_inode_get_object(in_arg);
+
+ ret = glfs_h_extract_handle(root, globjhdl + GLAPI_UUID_LENGTH,
+ GFAPI_HANDLE_LENGTH);
+ LOG_ERR("glfs_h_extract_handle", (ret != 16));
+
+ ret = glfs_h_extract_handle(object, globjhdl2 + GLAPI_UUID_LENGTH,
+ GFAPI_HANDLE_LENGTH);
+ LOG_ERR("glfs_h_extract_handle", (ret != 16));
+
+ if (memcmp(globjhdl + GLAPI_UUID_LENGTH,
+ globjhdl2 + GLAPI_UUID_LENGTH, 16)) {
+ fprintf(stderr, "Error: gfid mismatch\n");
+ exit(1);
+ }
+ upcall_received = 1;
+ }
+
+ retry:
+ if (!upcall_received)
+ sleep(1); /* glfs_h_poll_upcall() does not block */
+
+ if (!ret) {
+ glfs_free(cbk);
+ cbk = NULL;
+ }
+ }
+
+ if (!upcall_received) {
+ fprintf(stderr, "Error: Upcall not received\n");
+ exit(1);
+ }
+
+ ret = glfs_fini(fs);
+ LOG_ERR("glfs_fini", ret);
+
+ fprintf(stderr, "End of libgfapi_fini\n");
+
+ exit(0);
+}
diff --git a/tests/basic/gfapi/bug1291259.t b/tests/basic/gfapi/bug1291259.t
new file mode 100755
index 00000000000..999cda2da3a
--- /dev/null
+++ b/tests/basic/gfapi/bug1291259.t
@@ -0,0 +1,32 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+logdir=`gluster --print-logdir`
+
+## Enable Upcall cache-invalidation feature
+TEST $CLI volume set $V0 features.cache-invalidation on;
+
+TEST build_tester $(dirname $0)/bug1291259.c -lgfapi
+
+TEST ./$(dirname $0)/bug1291259 $H0 $V0 $logdir/bug1291259.log
+
+cleanup_tester $(dirname $0)/bug1291259
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=1405301
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=1405301
diff --git a/tests/basic/gfapi/bug1613098.c b/tests/basic/gfapi/bug1613098.c
new file mode 100644
index 00000000000..ee67e97a034
--- /dev/null
+++ b/tests/basic/gfapi/bug1613098.c
@@ -0,0 +1,96 @@
+#include <inttypes.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+#define ACL_TYPE_ACCESS (0x8000)
+
+#define VALIDATE_AND_GOTO_LABEL_ON_ERROR(func, ret, label) \
+ do { \
+ if (ret < 0) { \
+ fprintf(stderr, "%s : returned error %d (%s)\n", func, ret, \
+ strerror(errno)); \
+ goto label; \
+ } \
+ } while (0)
+
+int
+main(int argc, char *argv[])
+{
+ int ret = -1;
+ int flags = O_RDWR | O_SYNC;
+ glfs_t *fs = NULL;
+ glfs_fd_t *fd = NULL;
+ char *volname = NULL;
+ char *logfile = NULL;
+ const char *filename = "file_tmp";
+ struct glfs_object *object = NULL;
+ acl_t acl = NULL;
+ struct stat sb;
+
+ if (argc != 3) {
+ fprintf(stderr, "Invalid argument\n");
+ return 1;
+ }
+
+ volname = argv[1];
+ logfile = argv[2];
+
+ fs = glfs_new(volname);
+ if (!fs)
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_new", ret, out);
+
+ ret = glfs_set_volfile_server(fs, "tcp", "localhost", 24007);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_set_volfile_server", ret, out);
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_set_logging", ret, out);
+
+ ret = glfs_init(fs);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_init", ret, out);
+
+ fd = glfs_creat(fs, filename, flags, 0044);
+ if (fd == NULL) {
+ ret = -1;
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_creat", ret, out);
+ }
+ glfs_close(fd);
+
+ object = glfs_h_lookupat(fs, NULL, filename, NULL, 0);
+ if (object == NULL) {
+ ret = -1;
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_h_lookupat", ret, out);
+ }
+
+ ret = glfs_chown(fs, filename, 99, 99);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_chown", ret, out);
+
+ ret = glfs_setfsuid(99);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_setfsuid", ret, out);
+
+ ret = glfs_setfsgid(99);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_setfsgid", ret, out);
+
+ acl = glfs_h_acl_get(fs, object, ACL_TYPE_ACCESS);
+ if (acl == NULL) {
+ ret = -1;
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_h_acl_get", ret, out);
+ }
+
+ ret = glfs_h_acl_set(fs, object, ACL_TYPE_ACCESS, acl);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_h_acl_get", ret, out);
+out:
+ glfs_setfsuid(0);
+ glfs_setfsgid(0);
+
+ if (object)
+ glfs_h_close(object);
+
+ if (fs)
+ glfs_fini(fs);
+
+ return ret;
+}
diff --git a/tests/basic/gfapi/bug1613098.t b/tests/basic/gfapi/bug1613098.t
new file mode 100755
index 00000000000..e4acc2b76bf
--- /dev/null
+++ b/tests/basic/gfapi/bug1613098.t
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+TEST glusterd
+
+TEST $CLI volume create $V0 ${H0}:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+logdir=`gluster --print-logdir`
+
+build_tester $(dirname $0)/bug1613098.c -lgfapi
+
+TEST ./$(dirname $0)/bug1613098 $V0 $logdir/bug1613098.log
+
+cleanup_tester $(dirname $0)/bug1613098
+
+cleanup;
diff --git a/tests/basic/gfapi/gfapi-async-calls-test.c b/tests/basic/gfapi/gfapi-async-calls-test.c
new file mode 100644
index 00000000000..55835b14709
--- /dev/null
+++ b/tests/basic/gfapi/gfapi-async-calls-test.c
@@ -0,0 +1,494 @@
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+#include <limits.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+#define LOG_ERR(msg) \
+ do { \
+ fprintf(stderr, "%s : Error (%s)\n", msg, strerror(errno)); \
+ } while (0)
+
+int cbk_complete = 0;
+int cbk_ret_val = -1;
+
+void
+cbk_check()
+{
+ while (cbk_complete != 1) {
+ sleep(1);
+ }
+ if (cbk_ret_val < 0) {
+ fprintf(stderr, "cbk_ret_val is -ve\n");
+ }
+}
+
+int
+fill_iov(struct iovec *iov, char fillchar, int count)
+{
+ int ret = -1;
+
+ iov->iov_base = malloc(count + 1);
+ if (iov->iov_base == NULL) {
+ return ret;
+ } else {
+ iov->iov_len = count;
+ ret = 0;
+ }
+ memset(iov->iov_base, fillchar, count);
+ memset(iov->iov_base + count, '\0', 1);
+
+ return ret;
+}
+
+glfs_t *
+init_glfs(const char *hostname, const char *volname, const char *logfile)
+{
+ int ret = -1;
+ glfs_t *fs = NULL;
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ LOG_ERR("glfs_new failed");
+ return NULL;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ if (ret < 0) {
+ LOG_ERR("glfs_set_volfile_server failed");
+ goto out;
+ }
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ if (ret < 0) {
+ LOG_ERR("glfs_set_logging failed");
+ goto out;
+ }
+
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ LOG_ERR("glfs_init failed");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret) {
+ glfs_fini(fs);
+ fs = NULL;
+ }
+
+ return fs;
+}
+
+void
+pwritev_async_cbk(glfs_fd_t *fd, ssize_t ret, struct stat *prestat,
+ struct stat *poststat, void *cookie)
+{
+ if (ret < 0) {
+ LOG_ERR("glfs_pwritev failed");
+ }
+ cbk_ret_val = ret;
+ cbk_complete = 1;
+}
+
+int
+pwritev_async(glfs_t *fs, glfs_fd_t *glfd, int char_count)
+{
+ ssize_t ret = -1;
+ int flags = O_RDWR;
+ struct iovec iov = {0};
+ void *write_cookie = NULL;
+
+ ret = fill_iov(&iov, 'a', char_count);
+ if (ret) {
+ LOG_ERR("failed to create iov");
+ goto out;
+ }
+
+ write_cookie = strdup("write_cookie");
+ ret = glfs_pwritev_async(glfd, &iov, 1, 0, flags, pwritev_async_cbk,
+ &write_cookie);
+out:
+ if (ret < 0) {
+ LOG_ERR("glfs_pwritev async failed");
+ }
+ return ret;
+}
+
+void
+pwrite_async_cbk(glfs_fd_t *fd, ssize_t ret, struct stat *prestat,
+ struct stat *poststat, void *cookie)
+{
+ if (ret < 0) {
+ LOG_ERR("glfs_pwrite_cbk failed");
+ }
+ cbk_ret_val = ret;
+ cbk_complete = 1;
+}
+
+int
+pwrite_async(glfs_fd_t *glfd)
+{
+ ssize_t ret = -1;
+ int flags = O_RDWR;
+ char buf1[10];
+ char *buf2 = "ten bytes!";
+ void *write_cookie = strdup("write_cookie");
+ ret = glfs_pwrite_async(glfd, buf1, 10, 0, flags, pwrite_async_cbk,
+ &write_cookie);
+
+ if (ret < 0) {
+ LOG_ERR("glfs_pwrite_async failed");
+ }
+ return ret;
+}
+
+void
+writev_async_cbk(glfs_fd_t *fd, ssize_t ret, struct stat *prestat,
+ struct stat *poststat, void *cookie)
+{
+ if (ret < 0) {
+ LOG_ERR("glfs_writev_cbk failed");
+ }
+ cbk_ret_val = ret;
+ cbk_complete = 1;
+}
+
+int
+writev_async(glfs_t *fs, glfs_fd_t *glfd, int char_count)
+{
+ ssize_t ret = -1;
+ int flags = O_RDWR;
+ struct iovec iov = {0};
+ void *write_cookie = NULL;
+
+ ret = fill_iov(&iov, 'a', char_count);
+ if (ret) {
+ LOG_ERR("failed to create iov");
+ goto out;
+ }
+
+ write_cookie = strdup("write_cookie");
+ ret = glfs_writev_async(glfd, &iov, 1, flags, writev_async_cbk,
+ &write_cookie);
+out:
+ if (ret < 0) {
+ LOG_ERR("glfs_writev_async failed");
+ }
+ return ret;
+}
+
+void
+write_async_cbk(glfs_fd_t *fd, ssize_t ret, struct stat *prestat,
+ struct stat *poststat, void *cookie)
+{
+ if (ret < 0) {
+ LOG_ERR("glfs_write_cbk failed");
+ }
+ cbk_ret_val = ret;
+ cbk_complete = 1;
+}
+
+int
+write_async(glfs_fd_t *glfd)
+{
+ ssize_t ret = -1;
+ int flags = O_RDWR;
+ char buf1[10];
+ char *buf2 = "ten bytes!";
+ void *write_cookie = strdup("write_cookie");
+ ret = glfs_write_async(glfd, buf1, 10, flags, write_async_cbk,
+ &write_cookie);
+
+ if (ret < 0) {
+ LOG_ERR("glfs_write_async failed");
+ }
+ return ret;
+}
+
+void
+preadv_async_cbk(glfs_fd_t *fd, ssize_t ret, struct stat *prestat,
+ struct stat *poststat, void *cookie)
+{
+ if (ret < 0) {
+ LOG_ERR("glfs_preadv_cbk failed");
+ }
+ cbk_ret_val = ret;
+ cbk_complete = 1;
+}
+
+int
+preadv_async(glfs_t *fs, glfs_fd_t *glfd, int char_count)
+{
+ ssize_t ret = -1;
+ int flags = O_RDWR;
+ struct iovec iov = {0};
+ void *read_cookie = NULL;
+
+ ret = fill_iov(&iov, 'a', char_count);
+ if (ret) {
+ LOG_ERR("failed to create iov");
+ goto out;
+ }
+
+ read_cookie = strdup("preadv_cookie");
+ ret = glfs_preadv_async(glfd, &iov, 1, 0, flags, preadv_async_cbk,
+ &read_cookie);
+out:
+ if (ret < 0) {
+ LOG_ERR("glfs_preadv async failed");
+ }
+ return ret;
+}
+
+void
+pread_async_cbk(glfs_fd_t *fd, ssize_t ret, struct stat *prestat,
+ struct stat *poststat, void *cookie)
+{
+ if (ret < 0) {
+ LOG_ERR("glfs_pread_cbk failed");
+ }
+ cbk_ret_val = ret;
+ cbk_complete = 1;
+}
+
+int
+pread_async(glfs_fd_t *glfd)
+{
+ ssize_t ret = -1;
+ int flags = O_RDWR;
+ char buf1[10];
+ void *read_cookie = strdup("read_cookie");
+ ret = glfs_pread_async(glfd, buf1, 10, 0, flags, pread_async_cbk,
+ &read_cookie);
+ if (ret < 0) {
+ LOG_ERR("glfs_pread_async failed");
+ }
+
+ return ret;
+}
+
+void
+readv_async_cbk(glfs_fd_t *fd, ssize_t ret, struct stat *prestat,
+ struct stat *poststat, void *cookie)
+{
+ if (ret < 0) {
+ LOG_ERR("glfs_readv_cbk failed");
+ }
+ cbk_ret_val = ret;
+ cbk_complete = 1;
+}
+
+int
+readv_async(glfs_t *fs, glfs_fd_t *glfd, int char_count)
+{
+ ssize_t ret = -1;
+ int flags = O_RDWR;
+ struct iovec iov = {0};
+ void *read_cookie = NULL;
+
+ ret = fill_iov(&iov, 'a', char_count);
+ if (ret) {
+ LOG_ERR("failed to create iov");
+ goto out;
+ }
+
+ read_cookie = strdup("read_cookie");
+ ret = glfs_readv_async(glfd, &iov, 1, flags, readv_async_cbk, &read_cookie);
+out:
+ if (ret < 0) {
+ LOG_ERR("glfs_readv_async failed");
+ }
+ return ret;
+}
+
+void
+read_async_cbk(glfs_fd_t *fd, ssize_t ret, struct stat *prestat,
+ struct stat *poststat, void *cookie)
+{
+ if (ret < 0) {
+ LOG_ERR("glfs_read_cbk failed");
+ }
+ cbk_ret_val = ret;
+ cbk_complete = 1;
+}
+
+int
+read_async(glfs_fd_t *glfd)
+{
+ ssize_t ret = -1;
+ int flags = O_RDWR;
+ char buf1[10];
+ void *read_cookie = strdup("read_cookie");
+ ret = glfs_read_async(glfd, buf1, 10, flags, read_async_cbk, &read_cookie);
+
+ if (ret < 0) {
+ LOG_ERR("glfs_read_async failed");
+ }
+ return ret;
+}
+
+void
+fsync_async_cbk(glfs_fd_t *fd, ssize_t ret, struct stat *prestat,
+ struct stat *poststat, void *cookie)
+{
+ if (ret < 0) {
+ LOG_ERR("glfs_fsync_async_cbk failed");
+ }
+ cbk_ret_val = ret;
+ cbk_complete = 1;
+}
+
+void
+fdatasync_async_cbk(glfs_fd_t *fd, ssize_t ret, struct stat *prestat,
+ struct stat *poststat, void *cookie)
+{
+ if (ret < 0) {
+ LOG_ERR("glfs_fdatasync_async_cbk failed");
+ }
+ cbk_ret_val = ret;
+ cbk_complete = 1;
+}
+
+void
+ftruncate_async_cbk(glfs_fd_t *fd, ssize_t ret, struct stat *prestat,
+ struct stat *poststat, void *cookie)
+{
+ if (ret < 0) {
+ LOG_ERR("glfs_ftruncate_async_cbk failed");
+ }
+ cbk_ret_val = ret;
+ cbk_complete = 1;
+}
+
+int
+main(int argc, char *argv[])
+{
+ int ret = 0;
+ char *hostname = NULL;
+ char *volname = NULL;
+ char *logfile = NULL;
+ glfs_t *fs = NULL;
+ const char *filename = "glfs_test.txt";
+ int flags = (O_RDWR | O_CREAT);
+ glfs_fd_t *glfd = NULL;
+ int count = 200;
+ void *data = strdup("Sample_text");
+
+ if (argc != 4) {
+ fprintf(stderr, "Invalid argument\n");
+ exit(1);
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+ logfile = argv[3];
+
+ fs = init_glfs(hostname, volname, logfile);
+ if (fs == NULL) {
+ LOG_ERR("init_glfs failed");
+ return -1;
+ }
+
+ glfd = glfs_creat(fs, filename, flags, 0644);
+ if (glfd == NULL) {
+ LOG_ERR("glfs_creat failed");
+ exit(1);
+ }
+
+ ret = pwritev_async(fs, glfd, count);
+ if (ret) {
+ LOG_ERR("glfs_pwritev_async_test failed");
+ exit(1);
+ }
+ cbk_check();
+
+ ret = writev_async(fs, glfd, count);
+ if (ret) {
+ LOG_ERR("glfs_writev_async_test failed");
+ exit(1);
+ }
+ cbk_check();
+
+ ret = write_async(glfd);
+ if (ret) {
+ LOG_ERR("glfs_write_async_test failed");
+ exit(1);
+ }
+ cbk_check();
+
+ ret = preadv_async(fs, glfd, count);
+ if (ret) {
+ LOG_ERR("glfs_preadv_async_test failed");
+ exit(1);
+ }
+ cbk_check();
+
+ ret = pread_async(glfd);
+ if (ret) {
+ LOG_ERR("glfs_pread_async_test failed");
+ exit(1);
+ }
+ cbk_check();
+
+ ret = readv_async(fs, glfd, count);
+ if (ret) {
+ LOG_ERR("glfs_readv_async_test failed");
+ exit(1);
+ }
+ cbk_check();
+
+ ret = read_async(glfd);
+ if (ret) {
+ LOG_ERR("glfs_read_async_test failed");
+ exit(1);
+ }
+ cbk_check();
+
+ ret = glfs_fsync(glfd, NULL, NULL);
+ if (ret < 0) {
+ LOG_ERR("glfs_fsync failed");
+ exit(1);
+ }
+
+ ret = glfs_fdatasync(glfd, NULL, NULL);
+ if (ret < 0) {
+ LOG_ERR("glfs_fdatasync failed");
+ exit(1);
+ }
+
+ ret = glfs_fsync_async(glfd, fsync_async_cbk, data);
+ if (ret < 0) {
+ LOG_ERR("glfs_fsync_async failed");
+ exit(1);
+ }
+ cbk_check();
+
+ ret = glfs_fdatasync_async(glfd, fdatasync_async_cbk, data);
+ if (ret < 0) {
+ LOG_ERR("glfs_fdatasync_async failed");
+ exit(1);
+ }
+ cbk_check();
+
+ ret = glfs_ftruncate_async(glfd, 4, ftruncate_async_cbk, data);
+ if (ret < 0) {
+ LOG_ERR("glfs_ftruncate_async failed");
+ exit(1);
+ }
+
+ ret = glfs_close(glfd);
+ if (ret < 0) {
+ LOG_ERR("glfs close failed");
+ }
+
+ ret = glfs_fini(fs);
+
+ return ret;
+}
diff --git a/tests/basic/gfapi/gfapi-async-calls-test.t b/tests/basic/gfapi/gfapi-async-calls-test.t
new file mode 100644
index 00000000000..9e5cd7c92cc
--- /dev/null
+++ b/tests/basic/gfapi/gfapi-async-calls-test.t
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+EXIT_EARLY=1;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 ${H0}:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+logdir=`gluster --print-logdir`
+
+TEST build_tester $(dirname $0)/gfapi-async-calls-test.c -lgfapi
+
+
+TEST ./$(dirname $0)/gfapi-async-calls-test ${H0} $V0 $logdir/gfapi-async-calls-test.log
+
+cleanup_tester $(dirname $0)/gfapi-async-calls-test
+
+cleanup;
diff --git a/tests/basic/gfapi/gfapi-copy-file-range.t b/tests/basic/gfapi/gfapi-copy-file-range.t
new file mode 100644
index 00000000000..a56d3a58e07
--- /dev/null
+++ b/tests/basic/gfapi/gfapi-copy-file-range.t
@@ -0,0 +1,82 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+mkfs.xfs 2>&1 | grep reflink
+if [ $? -ne 0 ]; then
+ SKIP_TESTS
+ exit
+fi
+
+
+TEST glusterd
+
+TEST truncate -s 2G $B0/xfs_image
+# for now, a xfs filesystem with reflink support is created.
+# In future, better to make changes in MKFS_LOOP so that,
+# once can create a xfs filesystem with reflink enabled in
+# generic and simple way, instead of doing below steps each
+# time.
+TEST mkfs.xfs -f -i size=512 -m reflink=1 $B0/xfs_image;
+
+TEST mkdir $B0/bricks
+TEST mount -t xfs -o loop $B0/xfs_image $B0/bricks
+
+# Just a single brick volume. More test cases need to be
+# added in future for distribute, replicate,
+# distributed replicate and distributed replicated sharded
+# volumes.
+TEST $CLI volume create $V0 $H0:$B0/bricks/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+TEST dd if=/dev/urandom of=$M0/file bs=1M count=555;
+
+# check for the existence of the created file
+TEST stat $M0/file;
+
+# grab the size of the file
+SRC_SIZE=$(stat -c %s $M0/file);
+
+logdir=`gluster --print-logdir`
+
+# TODO:
+# For now, do not call copy-file-range utility. This is because,
+# the regression machines are centos-7 based which does not have
+# copy_file_range API available. So, instead of this testcase
+# causing regression failures, for now, this is just a dummy test
+# case. Uncomment the below tests (until volume stop) when there
+# is support for copy_file_range in the regression machines.
+#
+
+TEST build_tester $(dirname $0)/glfs-copy-file-range.c -lgfapi
+
+TEST ./$(dirname $0)/glfs-copy-file-range $H0 $V0 $logdir/gfapi-copy-file-range.log /file /new
+
+# check whether the destination file is created or not
+TEST stat $M0/new
+
+# check the size of the destination file
+DST_SIZE=$(stat -c %s $M0/new);
+
+# The sizes of the source and destination should be same.
+# Atleast it ensures that, copy_file_range API is working
+# as expected. Whether the actual cloning happened via reflink
+# or a read/write happened is different matter.
+TEST [ $SRC_SIZE == $DST_SIZE ];
+
+cleanup_tester $(dirname $0)/glfs-copy-file-range
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+UMOUNT_LOOP $B0/bricks;
+
+cleanup;
diff --git a/tests/basic/gfapi/gfapi-dup.c b/tests/basic/gfapi/gfapi-dup.c
new file mode 100644
index 00000000000..028108e4590
--- /dev/null
+++ b/tests/basic/gfapi/gfapi-dup.c
@@ -0,0 +1,84 @@
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+#define VALIDATE_AND_GOTO_LABEL_ON_ERROR(func, ret, label) \
+ do { \
+ if (ret < 0) { \
+ fprintf(stderr, "%s : returned error %d (%s)\n", func, ret, \
+ strerror(errno)); \
+ goto label; \
+ } \
+ } while (0)
+
+int
+main(int argc, char *argv[])
+{
+ int ret = -1;
+ int flags = O_RDWR | O_SYNC;
+ glfs_t *fs = NULL;
+ glfs_fd_t *fd1 = NULL;
+ glfs_fd_t *fd2 = NULL;
+ char *volname = NULL;
+ char *logfile = NULL;
+ char *hostname = NULL;
+ const char *filename = "file_tmp";
+ const char *buff =
+ "An opinion should be the result of thought, "
+ "not a substitute for it.";
+
+ if (argc != 4) {
+ fprintf(stderr, "Invalid argument\n");
+ return 1;
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+ logfile = argv[3];
+
+ fs = glfs_new(volname);
+ if (!fs)
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_new", ret, out);
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_set_volfile_server", ret, out);
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_set_logging", ret, out);
+
+ ret = glfs_init(fs);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_init", ret, out);
+
+ fd1 = glfs_creat(fs, filename, flags, 0644);
+ if (fd1 == NULL) {
+ ret = -1;
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_creat", ret, out);
+ }
+
+ ret = glfs_write(fd1, buff, strlen(buff), flags);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_write", ret, out);
+
+ fd2 = glfs_dup(fd1);
+ if (fd2 == NULL) {
+ ret = -1;
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_dup", ret, out);
+ }
+
+ ret = glfs_lseek(fd2, 0, SEEK_SET);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_lseek", ret, out);
+
+out:
+ if (fd1 != NULL)
+ glfs_close(fd1);
+ if (fd2 != NULL)
+ glfs_close(fd2);
+ if (fs) {
+ ret = glfs_fini(fs);
+ if (ret)
+ fprintf(stderr, "glfs_fini(fs) returned %d\n", ret);
+ }
+
+ return ret;
+}
diff --git a/tests/basic/gfapi/gfapi-dup.t b/tests/basic/gfapi/gfapi-dup.t
new file mode 100755
index 00000000000..849b106f90f
--- /dev/null
+++ b/tests/basic/gfapi/gfapi-dup.t
@@ -0,0 +1,27 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+logdir=`gluster --print-logdir`
+
+TEST build_tester $(dirname $0)/gfapi-dup.c -lgfapi
+
+TEST ./$(dirname $0)/gfapi-dup $H0 $V0 $logdir/gfapi-dup.log
+
+cleanup_tester $(dirname $0)/gfapi-dup
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/basic/gfapi/gfapi-graph-switch-open-fd.t b/tests/basic/gfapi/gfapi-graph-switch-open-fd.t
new file mode 100644
index 00000000000..2e666be7ec7
--- /dev/null
+++ b/tests/basic/gfapi/gfapi-graph-switch-open-fd.t
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 replica 3 ${H0}:$B0/brick{0..2};
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+TEST touch $M0/sync
+logdir=`gluster --print-logdir`
+
+TEST build_tester $(dirname $0)/gfapi-keep-writing.c -lgfapi
+
+
+#Launch a program to keep doing writes on an fd
+./$(dirname $0)/gfapi-keep-writing ${H0} $V0 $logdir/gfapi-async-calls-test.log sync &
+p=$!
+sleep 1 #Let some writes go through
+#Check if graph switch will lead to any pending markers for ever
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+
+
+TEST rm -f $M0/sync #Make sure the glfd is closed
+TEST wait #Wait for background process to die
+#Goal is to check if there is permanent FOOL changelog
+sleep 5
+EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/brick0/glfs_test.txt trusted.afr.dirty
+EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/brick1/glfs_test.txt trusted.afr.dirty
+EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/brick2/glfs_test.txt trusted.afr.dirty
+
+cleanup_tester $(dirname $0)/gfapi-async-calls-test
+
+cleanup;
diff --git a/tests/basic/gfapi/gfapi-keep-writing.c b/tests/basic/gfapi/gfapi-keep-writing.c
new file mode 100644
index 00000000000..91b59cea02b
--- /dev/null
+++ b/tests/basic/gfapi/gfapi-keep-writing.c
@@ -0,0 +1,129 @@
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+#include <limits.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+#define LOG_ERR(msg) \
+ do { \
+ fprintf(stderr, "%s : Error (%s)\n", msg, strerror(errno)); \
+ } while (0)
+
+glfs_t *
+init_glfs(const char *hostname, const char *volname, const char *logfile)
+{
+ int ret = -1;
+ glfs_t *fs = NULL;
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ LOG_ERR("glfs_new failed");
+ return NULL;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ if (ret < 0) {
+ LOG_ERR("glfs_set_volfile_server failed");
+ goto out;
+ }
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ if (ret < 0) {
+ LOG_ERR("glfs_set_logging failed");
+ goto out;
+ }
+
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ LOG_ERR("glfs_init failed");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret) {
+ glfs_fini(fs);
+ fs = NULL;
+ }
+
+ return fs;
+}
+
+int
+glfs_test_function(const char *hostname, const char *volname,
+ const char *logfile, const char *syncfile)
+{
+ int ret = -1;
+ int flags = O_CREAT | O_RDWR;
+ glfs_t *fs = NULL;
+ glfs_fd_t *glfd = NULL;
+ const char *buff = "This is from my prog\n";
+ const char *filename = "glfs_test.txt";
+ struct stat buf = {0};
+
+ fs = init_glfs(hostname, volname, logfile);
+ if (fs == NULL) {
+ LOG_ERR("init_glfs failed");
+ return -1;
+ }
+
+ glfd = glfs_creat(fs, filename, flags, 0644);
+ if (glfd == NULL) {
+ LOG_ERR("glfs_creat failed");
+ goto out;
+ }
+
+ while (glfs_stat(fs, syncfile, &buf) == 0) {
+ ret = glfs_write(glfd, buff, strlen(buff), flags);
+ if (ret < 0) {
+ LOG_ERR("glfs_write failed");
+ goto out;
+ }
+ }
+
+ ret = glfs_close(glfd);
+ if (ret < 0) {
+ LOG_ERR("glfs_write failed");
+ goto out;
+ }
+
+out:
+ ret = glfs_fini(fs);
+ if (ret) {
+ LOG_ERR("glfs_fini failed");
+ }
+
+ return ret;
+}
+
+int
+main(int argc, char *argv[])
+{
+ int ret = 0;
+ char *hostname = NULL;
+ char *volname = NULL;
+ char *logfile = NULL;
+ char *syncfile = NULL;
+
+ if (argc != 5) {
+ fprintf(stderr, "Invalid argument\n");
+ exit(1);
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+ logfile = argv[3];
+ syncfile = argv[4];
+
+ ret = glfs_test_function(hostname, volname, logfile, syncfile);
+ if (ret) {
+ LOG_ERR("glfs_test_function failed");
+ }
+
+ return ret;
+}
diff --git a/tests/basic/gfapi/gfapi-load-volfile.c b/tests/basic/gfapi/gfapi-load-volfile.c
new file mode 100644
index 00000000000..fbfc6045cd7
--- /dev/null
+++ b/tests/basic/gfapi/gfapi-load-volfile.c
@@ -0,0 +1,65 @@
+/*
+ * Create a glfs instance based on a .vol file
+ *
+ * This is used to measure memory leaks by initializing a graph through a .vol
+ * file and destroying it again.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <glusterfs/api/glfs.h>
+
+#define PROGNAME "gfapi-load-volfile"
+
+void
+usage(FILE *output)
+{
+ fprintf(output, "Usage: " PROGNAME " <volfile>\n");
+}
+
+void
+main(int argc, char **argv)
+{
+ int ret = 0;
+ glfs_t *fs = NULL;
+
+ if (argc != 2) {
+ usage(stderr);
+ exit(EXIT_FAILURE);
+ }
+
+ if (!strcmp(argv[1], "-h") || !strcmp(argv[1], "-h")) {
+ usage(stdout);
+ exit(EXIT_SUCCESS);
+ }
+
+ fs = glfs_new(PROGNAME);
+ if (!fs) {
+ perror("glfs_new failed");
+ exit(EXIT_FAILURE);
+ }
+
+ glfs_set_logging(fs, PROGNAME ".log", 9);
+
+ ret = glfs_set_volfile(fs, argv[1]);
+ if (ret) {
+ perror("glfs_set_volfile failed");
+ ret = EXIT_FAILURE;
+ goto out;
+ }
+
+ ret = glfs_init(fs);
+ if (ret) {
+ perror("glfs_init failed");
+ ret = EXIT_FAILURE;
+ goto out;
+ }
+
+ ret = EXIT_SUCCESS;
+out:
+ glfs_fini(fs);
+
+ exit(ret);
+}
diff --git a/tests/basic/gfapi/gfapi-load-volfile.t b/tests/basic/gfapi/gfapi-load-volfile.t
new file mode 100644
index 00000000000..d914cacd819
--- /dev/null
+++ b/tests/basic/gfapi/gfapi-load-volfile.t
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+TEST glusterd
+
+TEST $CLI volume create ${V0} ${H0}:${B0}/brick0
+EXPECT 'Created' volinfo_field ${V0} 'Status'
+
+TEST $CLI volume start ${V0}
+EXPECT 'Started' volinfo_field ${V0} 'Status'
+
+TEST build_tester $(dirname ${0})/gfapi-load-volfile.c -lgfapi
+
+sed -e "s,@@HOSTNAME@@,${H0},g" -e "s,@@BRICKPATH@@,${B0}/brick0,g" \
+ $(dirname ${0})/protocol-client.vol.in \
+ > $(dirname ${0})/protocol-client.vol
+
+TEST ./$(dirname ${0})/gfapi-load-volfile \
+ $(dirname ${0})/protocol-client.vol
+
+cleanup_tester $(dirname ${0})/gfapi-load-volfile
+cleanup_tester $(dirname ${0})/protocol-client.vol
+
+cleanup
diff --git a/tests/basic/gfapi/gfapi-ssl-load-volfile-test.c b/tests/basic/gfapi/gfapi-ssl-load-volfile-test.c
new file mode 100644
index 00000000000..7beb8dd1fe4
--- /dev/null
+++ b/tests/basic/gfapi/gfapi-ssl-load-volfile-test.c
@@ -0,0 +1,127 @@
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+#include <limits.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+#define LOG_ERR(msg) \
+ do { \
+ fprintf(stderr, "%s : Error (%s)\n", msg, strerror(errno)); \
+ } while (0)
+
+glfs_t *
+init_glfs(const char *hostname, const char *volname, const char *volfile,
+ const char *logfile)
+{
+ int ret = -1;
+ glfs_t *fs = NULL;
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ LOG_ERR("glfs_new failed");
+ return NULL;
+ }
+
+ ret = glfs_set_volfile(fs, volfile);
+ if (ret < 0) {
+ LOG_ERR("glfs_set_volfile failed");
+ goto out;
+ }
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ if (ret < 0) {
+ LOG_ERR("glfs_set_logging failed");
+ goto out;
+ }
+
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ LOG_ERR("glfs_init failed");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret) {
+ glfs_fini(fs);
+ fs = NULL;
+ }
+
+ return fs;
+}
+
+int
+glfs_test_function(const char *hostname, const char *volname,
+ const char *volfile, const char *logfile)
+{
+ int ret = -1;
+ int flags = O_CREAT | O_RDWR;
+ glfs_t *fs = NULL;
+ glfs_fd_t *glfd = NULL;
+ const char *buff = "This is from my prog\n";
+ const char *filename = "glfs_test.txt";
+
+ fs = init_glfs(hostname, volname, volfile, logfile);
+ if (fs == NULL) {
+ LOG_ERR("init_glfs failed");
+ return -1;
+ }
+
+ glfd = glfs_creat(fs, filename, flags, 0644);
+ if (glfd == NULL) {
+ LOG_ERR("glfs_creat failed");
+ goto out;
+ }
+
+ ret = glfs_write(glfd, buff, strlen(buff), flags);
+ if (ret < 0) {
+ LOG_ERR("glfs_write failed");
+ goto out;
+ }
+
+ ret = glfs_close(glfd);
+ if (ret < 0) {
+ LOG_ERR("glfs_write failed");
+ goto out;
+ }
+
+out:
+ ret = glfs_fini(fs);
+ if (ret) {
+ LOG_ERR("glfs_fini failed");
+ }
+
+ return ret;
+}
+
+int
+main(int argc, char *argv[])
+{
+ int ret = 0;
+ char *hostname = NULL;
+ char *volname = NULL;
+ char *volfile = NULL;
+ char *logfile = NULL;
+
+ if (argc != 5) {
+ fprintf(stderr, "Invalid argument\n");
+ exit(1);
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+ volfile = argv[3];
+ logfile = argv[4];
+
+ ret = glfs_test_function(hostname, volname, volfile, logfile);
+ if (ret) {
+ LOG_ERR("glfs_test_function failed");
+ }
+
+ return ret;
+}
diff --git a/tests/basic/gfapi/gfapi-ssl-load-volfile-test.t b/tests/basic/gfapi/gfapi-ssl-load-volfile-test.t
new file mode 100755
index 00000000000..8e94df9d321
--- /dev/null
+++ b/tests/basic/gfapi/gfapi-ssl-load-volfile-test.t
@@ -0,0 +1,76 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../traps.rc
+. $(dirname $0)/../../ssl.rc
+
+cleanup;
+
+sed -e "s,@@HOSTNAME@@,${H0},g" -e "s,@@BRICKPATH@@,${B0}/brick1,g" \
+ -e "s,@@SSL@@,off,g" \
+ $(dirname ${0})/protocol-client-ssl.vol.in \
+ > $(dirname ${0})/protocol-client-ssl.vol
+
+TEST create_self_signed_certs
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
+
+logdir=`gluster --print-logdir`
+
+TEST build_tester $(dirname $0)/gfapi-ssl-load-volfile-test.c -lgfapi
+
+# Run test without I/O or management encryption
+TEST $(dirname $0)/gfapi-ssl-load-volfile-test $H0 $V0 \
+ $(dirname ${0})/protocol-client-ssl.vol \
+ $logdir/gfapi-ssl-load-volfile-test.log
+
+# Enable management encryption
+touch $GLUSTERD_WORKDIR/secure-access
+
+killall_gluster
+
+TEST glusterd
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
+
+# Run test with management encryption (No I/O encryption)
+TEST $(dirname $0)/gfapi-ssl-load-volfile-test $H0 $V0 \
+ $(dirname ${0})/protocol-client-ssl.vol \
+ $logdir/gfapi-ssl-load-volfile-test.log
+
+# Enable I/O encryption
+TEST $CLI volume set $V0 server.ssl on
+
+killall_gluster
+
+sed -e "s,@@HOSTNAME@@,${H0},g" -e "s,@@BRICKPATH@@,${B0}/brick1,g" \
+ -e "s,@@SSL@@,on,g" \
+ $(dirname ${0})/protocol-client-ssl.vol.in \
+ > $(dirname ${0})/protocol-client-ssl.vol
+
+TEST glusterd
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
+
+# Run test without I/O or management encryption
+TEST $(dirname $0)/gfapi-ssl-load-volfile-test $H0 $V0 \
+ $(dirname ${0})/protocol-client-ssl.vol \
+ $logdir/gfapi-ssl-load-volfile-test.log
+
+cleanup_tester $(dirname $0)/gfapi-ssl-load-volfile-test
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
+
+# NetBSD build scripts are not up to date therefore this test
+# is failing in NetBSD. Therefore skipping the test in NetBSD
+# as of now.
+#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=000000
diff --git a/tests/basic/gfapi/gfapi-ssl-test.c b/tests/basic/gfapi/gfapi-ssl-test.c
new file mode 100644
index 00000000000..a27b5233702
--- /dev/null
+++ b/tests/basic/gfapi/gfapi-ssl-test.c
@@ -0,0 +1,124 @@
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+#include <limits.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+#define LOG_ERR(msg) \
+ do { \
+ fprintf(stderr, "%s : Error (%s)\n", msg, strerror(errno)); \
+ } while (0)
+
+glfs_t *
+init_glfs(const char *hostname, const char *volname, const char *logfile)
+{
+ int ret = -1;
+ glfs_t *fs = NULL;
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ LOG_ERR("glfs_new failed");
+ return NULL;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ if (ret < 0) {
+ LOG_ERR("glfs_set_volfile_server failed");
+ goto out;
+ }
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ if (ret < 0) {
+ LOG_ERR("glfs_set_logging failed");
+ goto out;
+ }
+
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ LOG_ERR("glfs_init failed");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret) {
+ glfs_fini(fs);
+ fs = NULL;
+ }
+
+ return fs;
+}
+
+int
+glfs_test_function(const char *hostname, const char *volname,
+ const char *logfile)
+{
+ int ret = -1;
+ int flags = O_CREAT | O_RDWR;
+ glfs_t *fs = NULL;
+ glfs_fd_t *glfd = NULL;
+ const char *buff = "This is from my prog\n";
+ const char *filename = "glfs_test.txt";
+
+ fs = init_glfs(hostname, volname, logfile);
+ if (fs == NULL) {
+ LOG_ERR("init_glfs failed");
+ return -1;
+ }
+
+ glfd = glfs_creat(fs, filename, flags, 0644);
+ if (glfd == NULL) {
+ LOG_ERR("glfs_creat failed");
+ goto out;
+ }
+
+ ret = glfs_write(glfd, buff, strlen(buff), flags);
+ if (ret < 0) {
+ LOG_ERR("glfs_write failed");
+ goto out;
+ }
+
+ ret = glfs_close(glfd);
+ if (ret < 0) {
+ LOG_ERR("glfs_write failed");
+ goto out;
+ }
+
+out:
+ ret = glfs_fini(fs);
+ if (ret) {
+ LOG_ERR("glfs_fini failed");
+ }
+
+ return ret;
+}
+
+int
+main(int argc, char *argv[])
+{
+ int ret = 0;
+ char *hostname = NULL;
+ char *volname = NULL;
+ char *logfile = NULL;
+
+ if (argc != 4) {
+ fprintf(stderr, "Invalid argument\n");
+ exit(1);
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+ logfile = argv[3];
+
+ ret = glfs_test_function(hostname, volname, logfile);
+ if (ret) {
+ LOG_ERR("glfs_test_function failed");
+ }
+
+ return ret;
+}
diff --git a/tests/basic/gfapi/gfapi-ssl-test.t b/tests/basic/gfapi/gfapi-ssl-test.t
new file mode 100755
index 00000000000..937fcc83a4c
--- /dev/null
+++ b/tests/basic/gfapi/gfapi-ssl-test.t
@@ -0,0 +1,61 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../traps.rc
+. $(dirname $0)/../../ssl.rc
+
+cleanup;
+
+TEST create_self_signed_certs
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
+
+logdir=`gluster --print-logdir`
+
+TEST build_tester $(dirname $0)/gfapi-ssl-test.c -lgfapi
+
+# Run test without I/O or management encryption
+TEST ./$(dirname $0)/gfapi-ssl-test $H0 $V0 $logdir/gfapi-ssl-test.log
+
+# Enable management encryption
+touch $GLUSTERD_WORKDIR/secure-access
+
+killall_gluster
+
+TEST glusterd
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
+
+# Run test with management encryption (No I/O encryption)
+TEST ./$(dirname $0)/gfapi-ssl-test $H0 $V0 $logdir/gfapi-ssl-test.log
+
+# Enable I/O encryption
+TEST $CLI volume set $V0 client.ssl on
+TEST $CLI volume set $V0 server.ssl on
+
+killall_gluster
+
+TEST glusterd
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
+
+# Run test without I/O or management encryption
+TEST ./$(dirname $0)/gfapi-ssl-test $H0 $V0 $logdir/gfapi-ssl-test.log
+
+cleanup_tester $(dirname $0)/gfapi-ssl-test
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
+
+# NetBSD build scripts are not up to date therefore this test
+# is failing in NetBSD. Therefore skipping the test in NetBSD
+# as of now.
+#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=000000
diff --git a/tests/basic/gfapi/gfapi-statx-basic.c b/tests/basic/gfapi/gfapi-statx-basic.c
new file mode 100644
index 00000000000..a4943fa0fd1
--- /dev/null
+++ b/tests/basic/gfapi/gfapi-statx-basic.c
@@ -0,0 +1,184 @@
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <glusterfs/api/glfs.h>
+
+#define VALIDATE_AND_GOTO_LABEL_ON_ERROR(func, ret, label) \
+ do { \
+ if (ret < 0) { \
+ fprintf(stderr, "%s : returned error %d (%s)\n", func, ret, \
+ strerror(errno)); \
+ goto label; \
+ } \
+ } while (0)
+
+#define GOTO_LABEL_ON_FALSE(compstr, ret, label) \
+ do { \
+ if (ret == false) { \
+ fprintf(stderr, "%s : comparison failed!\n", compstr); \
+ goto label; \
+ } \
+ } while (0)
+
+#define WRITE_SIZE 513
+#define TRUNC_SIZE 4096
+
+/* Using private function and hence providing a forward declation in sync with
+code in glfs-internal.h */
+int
+glfs_statx(struct glfs *fs, const char *path, unsigned int mask,
+ struct glfs_stat *statxbuf);
+
+int
+main(int argc, char *argv[])
+{
+ int ret = -1;
+ int flags = O_RDWR | O_SYNC;
+ glfs_t *fs = NULL;
+ glfs_fd_t *fd1 = NULL;
+ char *volname = NULL;
+ char *logfile = NULL;
+ const char *filename = "file_tmp";
+ const char buff[WRITE_SIZE];
+ struct stat sb;
+ unsigned int mask;
+ struct glfs_stat statx;
+ bool bret;
+
+ if (argc != 3) {
+ fprintf(stderr, "Invalid argument\n");
+ fprintf(stderr, "Usage: %s <volname> <logfile>\n", argv[0]);
+ return 1;
+ }
+
+ volname = argv[1];
+ logfile = argv[2];
+
+ fs = glfs_new(volname);
+ if (!fs)
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_new", ret, out);
+
+ ret = glfs_set_volfile_server(fs, "tcp", "localhost", 24007);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_set_volfile_server", ret, out);
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_set_logging", ret, out);
+
+ ret = glfs_init(fs);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_init", ret, out);
+
+ fd1 = glfs_creat(fs, filename, flags, 0644);
+ if (fd1 == NULL) {
+ ret = -1;
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_creat", ret, out);
+ }
+
+ ret = glfs_truncate(fs, filename, TRUNC_SIZE);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_truncate", ret, out);
+
+ ret = glfs_write(fd1, buff, WRITE_SIZE, flags);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_write", ret, out);
+
+ ret = glfs_fstat(fd1, &sb);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_fstat", ret, out);
+
+ if (sb.st_size != TRUNC_SIZE) {
+ fprintf(stderr, "wrong size %jd should be %jd\n", (intmax_t)sb.st_size,
+ (intmax_t)2048);
+ ret = -1;
+ goto out;
+ }
+
+ glfs_close(fd1);
+ fd1 = NULL;
+
+ /* TEST 1: Invalid mask to statx */
+ mask = 0xfafadbdb;
+ ret = glfs_statx(fs, filename, mask, NULL);
+ if (ret == 0 || ((ret == -1) && (errno != EINVAL))) {
+ fprintf(stderr,
+ "Invalid args passed, but error returned is"
+ " incorrect (ret - %d, errno - %d)\n",
+ ret, errno);
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+
+ /* TEST 2: Call statx and validate fields against prior fstat data */
+ /* NOTE: This fails, as iatt->ia_flags are not carried through the stack,
+ * for example if mdc_to_iatt is invoked to serve cached stat, we will loose
+ * the flags. */
+ mask = GLFS_STAT_ALL;
+ ret = glfs_statx(fs, filename, mask, &statx);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_statx", ret, out);
+
+ if ((statx.glfs_st_mask & GLFS_STAT_BASIC_STATS) != GLFS_STAT_BASIC_STATS) {
+ fprintf(stderr, "Invalid glfs_st_mask, expecting 0x%x got 0x%x\n",
+ GLFS_STAT_ALL, statx.glfs_st_mask);
+ ret = -1;
+ goto out;
+ }
+
+ bret = (sb.st_ino == statx.glfs_st_ino);
+ GOTO_LABEL_ON_FALSE("(sb.st_ino == statx.glfs_st_ino)", bret, out);
+
+ bret = (sb.st_mode == statx.glfs_st_mode);
+ GOTO_LABEL_ON_FALSE("(sb.st_mode == statx.glfs_st_mode)", bret, out);
+
+ bret = (sb.st_nlink == statx.glfs_st_nlink);
+ GOTO_LABEL_ON_FALSE("(sb.st_nlink == statx.glfs_st_nlink)", bret, out);
+
+ bret = (sb.st_uid == statx.glfs_st_uid);
+ GOTO_LABEL_ON_FALSE("(sb.st_uid == statx.glfs_st_uid)", bret, out);
+
+ bret = (sb.st_gid == statx.glfs_st_gid);
+ GOTO_LABEL_ON_FALSE("(sb.st_gid == statx.glfs_st_gid)", bret, out);
+
+ bret = (sb.st_size == statx.glfs_st_size);
+ GOTO_LABEL_ON_FALSE("(sb.st_size == statx.glfs_st_size)", bret, out);
+
+ bret = (sb.st_blksize == statx.glfs_st_blksize);
+ GOTO_LABEL_ON_FALSE("(sb.st_blksize == statx.glfs_st_blksize)", bret, out);
+
+ bret = (sb.st_blocks == statx.glfs_st_blocks);
+ GOTO_LABEL_ON_FALSE("(sb.st_blocks == statx.glfs_st_blocks)", bret, out);
+
+ bret = (!memcmp(&sb.st_atim, &statx.glfs_st_atime,
+ sizeof(struct timespec)));
+ GOTO_LABEL_ON_FALSE("(sb.st_atim == statx.glfs_st_atime)", bret, out);
+
+ bret = (!memcmp(&sb.st_mtim, &statx.glfs_st_mtime,
+ sizeof(struct timespec)));
+ GOTO_LABEL_ON_FALSE("(sb.st_mtim == statx.glfs_st_mtime)", bret, out);
+
+ bret = (!memcmp(&sb.st_ctim, &statx.glfs_st_ctime,
+ sizeof(struct timespec)));
+ GOTO_LABEL_ON_FALSE("(sb.st_ctim == statx.glfs_st_ctime)", bret, out);
+
+ /* TEST 3: Check if partial masks are accepted */
+ mask = GLFS_STAT_TYPE | GLFS_STAT_UID | GLFS_STAT_GID;
+ ret = glfs_statx(fs, filename, mask, &statx);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_statx", ret, out);
+
+ /* We currently still return all stats, as is acceptable based on the API
+ * definition in the header (and in statx as well) */
+ if ((statx.glfs_st_mask & GLFS_STAT_BASIC_STATS) != GLFS_STAT_BASIC_STATS) {
+ fprintf(stderr, "Invalid glfs_st_mask, expecting 0x%x got 0x%x\n",
+ GLFS_STAT_ALL, statx.glfs_st_mask);
+ ret = -1;
+ goto out;
+ }
+out:
+ if (fd1 != NULL)
+ glfs_close(fd1);
+ if (fs) {
+ (void)glfs_fini(fs);
+ }
+
+ return ret;
+}
diff --git a/tests/basic/gfapi/gfapi-statx-basic.t b/tests/basic/gfapi/gfapi-statx-basic.t
new file mode 100755
index 00000000000..d9acbce2f99
--- /dev/null
+++ b/tests/basic/gfapi/gfapi-statx-basic.t
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 ${H0}:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+# NOTE: Test is passing due to very specific volume configuration
+# Disable md-cache, as it does not save and return ia_flags from iatt
+# This is possibly going to be true of other xlators as well (ec/afr), need to
+# ensure these are fixed, or hack statx to return all basic attrs anyway.
+TEST $CLI volume set $V0 performance.md-cache-timeout 0
+
+logdir=`gluster --print-logdir`
+
+build_tester $(dirname $0)/gfapi-statx-basic.c -lgfapi
+
+TEST ./$(dirname $0)/gfapi-statx-basic $V0 $logdir/gfapi-statx-basic.log
+
+cleanup_tester $(dirname $0)/gfapi-statx-basic
+
+cleanup;
diff --git a/tests/basic/gfapi/gfapi-trunc.c b/tests/basic/gfapi/gfapi-trunc.c
new file mode 100644
index 00000000000..769f6cfa1d9
--- /dev/null
+++ b/tests/basic/gfapi/gfapi-trunc.c
@@ -0,0 +1,89 @@
+#include <inttypes.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+#define VALIDATE_AND_GOTO_LABEL_ON_ERROR(func, ret, label) \
+ do { \
+ if (ret < 0) { \
+ fprintf(stderr, "%s : returned error %d (%s)\n", func, ret, \
+ strerror(errno)); \
+ goto label; \
+ } \
+ } while (0)
+
+#define WRITE_SIZE 4096
+#define TRUNC_SIZE 1234
+/* Make sure TRUNC_SIZE is smaller than WRITE_SIZE at compile time. */
+typedef char _size_check[WRITE_SIZE - TRUNC_SIZE];
+
+int
+main(int argc, char *argv[])
+{
+ int ret = -1;
+ int flags = O_RDWR | O_SYNC;
+ glfs_t *fs = NULL;
+ glfs_fd_t *fd1 = NULL;
+ char *volname = NULL;
+ char *logfile = NULL;
+ const char *filename = "file_tmp";
+ const char buff[WRITE_SIZE];
+ struct stat sb;
+
+ if (argc != 3) {
+ fprintf(stderr, "Invalid argument\n");
+ return 1;
+ }
+
+ volname = argv[1];
+ logfile = argv[2];
+
+ fs = glfs_new(volname);
+ if (!fs)
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_new", ret, out);
+
+ ret = glfs_set_volfile_server(fs, "tcp", "localhost", 24007);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_set_volfile_server", ret, out);
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_set_logging", ret, out);
+
+ ret = glfs_init(fs);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_init", ret, out);
+
+ fd1 = glfs_creat(fs, filename, flags, 0644);
+ if (fd1 == NULL) {
+ ret = -1;
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_creat", ret, out);
+ }
+
+ ret = glfs_write(fd1, buff, WRITE_SIZE, flags);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_write", ret, out);
+
+ ret = glfs_truncate(fs, filename, TRUNC_SIZE);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_truncate", ret, out);
+
+ ret = glfs_fstat(fd1, &sb);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_fstat", ret, out);
+
+ if (sb.st_size != TRUNC_SIZE) {
+ fprintf(stderr, "wrong size %jd should be %jd\n", (intmax_t)sb.st_size,
+ (intmax_t)2048);
+ ret = -1;
+ }
+
+out:
+ if (fd1 != NULL)
+ glfs_close(fd1);
+ if (fs) {
+ /*
+ * If this fails (as it does on Special Snowflake NetBSD for no
+ * good reason), it shouldn't affect the result of the test.
+ */
+ (void)glfs_fini(fs);
+ }
+
+ return ret;
+}
diff --git a/tests/basic/gfapi/gfapi-trunc.t b/tests/basic/gfapi/gfapi-trunc.t
new file mode 100644
index 00000000000..4943a6be898
--- /dev/null
+++ b/tests/basic/gfapi/gfapi-trunc.t
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+TEST glusterd
+
+TEST $CLI volume create $V0 ${H0}:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+logdir=`gluster --print-logdir`
+
+build_tester $(dirname $0)/gfapi-trunc.c -lgfapi
+
+TEST ./$(dirname $0)/gfapi-trunc $V0 $logdir/gfapi-trunc.log
+
+cleanup_tester $(dirname $0)/gfapi-trunc
+
+cleanup;
diff --git a/tests/basic/gfapi/glfd-lkowner.c b/tests/basic/gfapi/glfd-lkowner.c
new file mode 100644
index 00000000000..ec0429dc3c4
--- /dev/null
+++ b/tests/basic/gfapi/glfd-lkowner.c
@@ -0,0 +1,214 @@
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+#include <limits.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+#include <sys/wait.h>
+#include <stdbool.h>
+
+int gfapi = 1;
+
+#define LOG_ERR(func, ret) \
+ do { \
+ if (ret != 0) { \
+ fprintf(stderr, "%s : returned error %d (%s)\n", func, ret, \
+ strerror(errno)); \
+ goto out; \
+ } else { \
+ fprintf(stderr, "%s : returned %d\n", func, ret); \
+ } \
+ } while (0)
+
+char lownera[8] = "lownera", lownerb[8] = "lownerb";
+char lownerc[8] = "lownerc";
+
+int
+lock_test(glfs_fd_t *glfd1, glfs_fd_t *glfd2, bool should_fail, int l1_start,
+ int l1_len, char *l1_owner, int lo1_len, int l2_start, int l2_len,
+ char *l2_owner, int lo2_len)
+{
+ int ret = -1, f_ret = -1;
+ struct flock lock1 =
+ {
+ 0,
+ },
+ lock2 = {
+ 0,
+ };
+
+lock1:
+ if (!glfd1)
+ goto lock2;
+
+ /* lock on glfd1 */
+ lock1.l_type = F_WRLCK;
+ lock1.l_whence = SEEK_SET;
+ lock1.l_start = l1_start;
+ lock1.l_len = l1_len;
+
+ ret = glfs_fd_set_lkowner(glfd1, l1_owner, lo1_len);
+ LOG_ERR("glfs_fd_set_lkowner on glfd1", ret);
+
+ ret = glfs_posix_lock(glfd1, F_SETLK, &lock1);
+ LOG_ERR("glfs_posix_lock on glfd1", ret);
+
+lock2:
+ if (!glfd2)
+ goto out;
+
+ /* lock on glfd2 */
+ lock2.l_type = F_WRLCK;
+ lock2.l_whence = SEEK_SET;
+ lock2.l_start = l2_start;
+ lock2.l_len = l2_len;
+
+ ret = glfs_fd_set_lkowner(glfd2, l2_owner, lo2_len);
+ LOG_ERR("glfs_fd_set_lkowner on glfd2", ret);
+
+ ret = glfs_posix_lock(glfd2, F_SETLK, &lock2);
+
+ if (should_fail && ret) {
+ f_ret = 0;
+ } else if (!ret && !should_fail) {
+ f_ret = 0;
+ } else {
+ f_ret = -1;
+ }
+out:
+ fprintf(stderr,
+ "Lock test on glfd1 (start(%d), len(%d),"
+ " lk_owner(%s)) and glfd2 (start(%d), len(%d), "
+ "lk_owner(%s)) - expected(%s) - result(%s)\n",
+ l1_start, l1_len, l1_owner, l2_start, l2_len, l2_owner,
+ (should_fail ? "FAIL" : "SUCCESS"), (ret ? "FAIL" : "SUCCESS"));
+ return f_ret;
+}
+
+int
+main(int argc, char *argv[])
+{
+ glfs_t *fs = NULL;
+ int ret = 0, i, status = 0;
+ glfs_fd_t *fd1 = NULL;
+ glfs_fd_t *fd2 = NULL;
+ glfs_fd_t *fd3 = NULL;
+ char *filename = "file_tmp";
+ char *volname = NULL;
+ char *logfile = NULL;
+ char *hostname = NULL;
+
+ if (argc != 4) {
+ fprintf(stderr, "Invalid argument\n");
+ exit(1);
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+ logfile = argv[3];
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL\n");
+ return -1;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ LOG_ERR("glfs_set_volfile_server", ret);
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ LOG_ERR("glfs_set_logging", ret);
+
+ ret = glfs_init(fs);
+ LOG_ERR("glfs_init", ret);
+
+ fd1 = glfs_creat(fs, filename, O_RDWR | O_SYNC, 0644);
+ if (fd1 <= 0) {
+ ret = -1;
+ LOG_ERR("glfs_creat", ret);
+ }
+ fprintf(stderr, "glfs-create fd1 - %d\n", fd1);
+
+ fd2 = glfs_dup(fd1);
+ fprintf(stderr, "glfs-dup fd2 - %d\n", fd2);
+
+ fd3 = glfs_open(fs, filename, O_RDWR | O_SYNC);
+ if (fd2 <= 0) {
+ ret = -1;
+ LOG_ERR("glfs_open", ret);
+ }
+ fprintf(stderr, "glfs-open fd3 - %d\n", fd3);
+
+ /* TEST 1: Conflicting ranges, same lk_owner
+ * lock1 (0, 10, lownera)
+ * lock2 (5, 10, lownera)
+ * Expected: should not fail but get merged
+ */
+ ret = lock_test(fd1, fd2, false, 0, 10, lownera, 8, 5, 10, lownera, 8);
+ LOG_ERR("==== glfs_lock_test_1", ret);
+
+ /* TEST 2: Conflicting ranges, different lk_owner
+ * lock1 (0, 10, lownera) - already taken
+ * lock2 (5, 10, lownerb)
+ * Expected: should fail and not get merged
+ */
+ ret = lock_test(NULL, fd2, true, 0, 10, lownera, 8, 5, 10, lownerb, 8);
+ LOG_ERR("==== glfs_lock_test_2", ret);
+
+ /* TEST 3: Different ranges, same lk_owner
+ * lock1 (0, 10, lownera) - already taken
+ * lock2 (30, 10, lownera)
+ * Expected: should not fail
+ */
+ ret = lock_test(NULL, fd2, false, 0, 10, lownera, 8, 30, 10, lownera, 8);
+ LOG_ERR("==== glfs_lock_test_3", ret);
+
+ /* TEST 4: Conflicting ranges, different lk_owner
+ * lock1 (0, 10, lownera) - already taken
+ * lock2 (50, 10, lownerb)
+ * Expected: should not fail
+ */
+ ret = lock_test(NULL, fd2, false, 0, 10, lownera, 8, 50, 10, lownerb, 8);
+ LOG_ERR("==== glfs_lock_test_4", ret);
+
+ /* TEST 5: Close fd1 & retry TEST2
+ * lock1 (not applicable)
+ * lock2 (5, 10, lownerb)
+ * Expected: should succeed now
+ */
+ ret = glfs_close(fd1);
+ LOG_ERR("glfs_close", ret);
+
+ ret = lock_test(NULL, fd2, false, 0, 10, lownera, 8, 5, 10, lownerb, 8);
+ LOG_ERR("==== glfs_lock_test_5", ret);
+
+ /* TEST 6: Check closing fd1 doesn't flush fd2 locks
+ * retry TEST 4 but with fd2 and fd3.
+ * lock1 (50, 10, lownerb) - already taken
+ * lock2 (55, 10, lownerc)
+ * Expected: should fail
+ */
+ ret = lock_test(NULL, fd3, true, 50, 10, lownerb, 8, 55, 10, lownerc, 8);
+ LOG_ERR("==== glfs_lock_test_6", ret);
+
+err:
+ ret = glfs_close(fd2);
+ LOG_ERR("glfs_close", ret);
+
+ ret = glfs_close(fd3);
+ LOG_ERR("glfs_close", ret);
+
+out:
+ if (fs) {
+ ret = glfs_fini(fs);
+ fprintf(stderr, "glfs_fini(fs) returned %d\n", ret);
+ }
+
+ if (ret)
+ exit(1);
+ exit(0);
+}
diff --git a/tests/basic/gfapi/glfd-lkowner.t b/tests/basic/gfapi/glfd-lkowner.t
new file mode 100755
index 00000000000..ad7b0260a14
--- /dev/null
+++ b/tests/basic/gfapi/glfd-lkowner.t
@@ -0,0 +1,27 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+logdir=`gluster --print-logdir`
+
+TEST build_tester $(dirname $0)/glfd-lkowner.c -lgfapi
+
+TEST ./$(dirname $0)/glfd-lkowner $H0 $V0 $logdir/glfd-lkowner.log
+
+cleanup_tester $(dirname $0)/glfd-lkowner
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/basic/gfapi/glfs-copy-file-range.c b/tests/basic/gfapi/glfs-copy-file-range.c
new file mode 100644
index 00000000000..1c5fd81fc87
--- /dev/null
+++ b/tests/basic/gfapi/glfs-copy-file-range.c
@@ -0,0 +1,180 @@
+/*
+ Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+#include <string.h>
+#include <time.h>
+#include <libgen.h>
+
+static void
+cleanup(glfs_t *fs)
+{
+ if (!fs)
+ return;
+#if 0
+ /* glfs fini path is still racy and crashing the program. Since
+ * this program any way has to die, we are not going to call fini
+ * in the released versions. i.e. final builds. For all
+ * internal testing lets enable this so that glfs_fini code
+ * path becomes stable. */
+ glfs_fini (fs);
+#endif
+}
+
+int
+main(int argc, char **argv)
+{
+ glfs_t *fs = NULL;
+ int ret = -1;
+ char *volname = NULL;
+ char *logfilepath = NULL;
+ char *path_src = NULL;
+ char *path_dst = NULL;
+ glfs_fd_t *glfd_in = NULL;
+ glfs_fd_t *glfd_out = NULL;
+ char *volfile_server = NULL;
+
+ struct stat stbuf = {
+ 0,
+ };
+ struct glfs_stat stat_src = {
+ 0,
+ };
+ struct glfs_stat prestat_dst = {
+ 0,
+ };
+ struct glfs_stat poststat_dst = {
+ 0,
+ };
+ size_t len;
+
+ if (argc < 6) {
+ printf("%s <volume> <log file path> <source> <destination>", argv[0]);
+ ret = -1;
+ goto out;
+ }
+
+ volfile_server = argv[1];
+ volname = argv[2];
+ logfilepath = argv[3];
+ path_src = argv[4];
+ path_dst = argv[5];
+
+ if (path_src[0] != '/') {
+ fprintf(stderr, "source path %s is not absolute", path_src);
+ errno = EINVAL;
+ goto out;
+ }
+
+ if (path_dst[0] != '/') {
+ fprintf(stderr, "destination path %s is not absolute", path_dst);
+ errno = EINVAL;
+ goto out;
+ }
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ ret = -errno;
+ fprintf(stderr, "Not able to initialize volume '%s'", volname);
+ goto out;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", volfile_server, 24007);
+ if (ret < 0) {
+ ret = -errno;
+ fprintf(stderr,
+ "Failed to set the volfile server, "
+ "%s",
+ strerror(errno));
+ goto out;
+ }
+
+ ret = glfs_set_logging(fs, logfilepath, 7);
+ if (ret < 0) {
+ ret = -errno;
+ fprintf(stderr,
+ "Failed to set the log file path, "
+ "%s",
+ strerror(errno));
+ goto out;
+ }
+
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ ret = -errno;
+ if (errno == ENOENT) {
+ fprintf(stderr, "Volume %s does not exist", volname);
+ } else {
+ fprintf(stderr,
+ "%s: Not able to fetch "
+ "volfile from glusterd",
+ volname);
+ }
+ goto out;
+ }
+
+ glfd_in = glfs_open(fs, path_src, O_RDONLY | O_NONBLOCK);
+ if (!glfd_in) {
+ ret = -errno;
+ goto out;
+ } else {
+ printf("OPEN_SRC: opening %s is success\n", path_src);
+ }
+
+ glfd_out = glfs_creat(fs, path_dst, O_RDWR, 0644);
+ if (!glfd_out) {
+ fprintf(stderr,
+ "FAILED_DST_OPEN: failed to "
+ "open (create) %s (%s)\n",
+ path_dst, strerror(errno));
+ ret = -errno;
+ goto out;
+ } else {
+ printf("OPEN_DST: opening %s is success\n", path_dst);
+ }
+
+ ret = glfs_fstat(glfd_in, &stbuf);
+ if (ret < 0) {
+ ret = -errno;
+ goto out;
+ } else {
+ printf("FSTAT_SRC: fstat on %s is success\n", path_dst);
+ }
+
+ len = stbuf.st_size;
+
+ do {
+ ret = glfs_copy_file_range(glfd_in, NULL, glfd_out, NULL, len, 0,
+ &stat_src, &prestat_dst, &poststat_dst);
+ if (ret == -1) {
+ fprintf(stderr, "copy_file_range failed with %s\n",
+ strerror(errno));
+ ret = -errno;
+ break;
+ } else {
+ printf("copy_file_range successful\n");
+ len -= ret;
+ }
+ } while (len > 0);
+
+out:
+ if (glfd_in)
+ glfs_close(glfd_in);
+ if (glfd_out)
+ glfs_close(glfd_out);
+
+ cleanup(fs);
+
+ return ret;
+}
diff --git a/tests/basic/gfapi/glfs_h_creat_open.c b/tests/basic/gfapi/glfs_h_creat_open.c
new file mode 100644
index 00000000000..7672561e73f
--- /dev/null
+++ b/tests/basic/gfapi/glfs_h_creat_open.c
@@ -0,0 +1,118 @@
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+#include <limits.h>
+#include <string.h>
+#include <stdio.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+#define LOG_ERR(func, ret) \
+ do { \
+ if (ret != 0) { \
+ fprintf(stderr, "%s : returned error ret(%d), errno(%d)\n", func, \
+ ret, errno); \
+ exit(1); \
+ } else { \
+ fprintf(stderr, "%s : returned %d\n", func, ret); \
+ } \
+ } while (0)
+#define LOG_IF_NO_ERR(func, ret) \
+ do { \
+ if (ret == 0) { \
+ fprintf(stderr, "%s : hasn't returned error %d\n", func, ret); \
+ exit(1); \
+ } else { \
+ fprintf(stderr, "%s : returned %d\n", func, ret); \
+ } \
+ } while (0)
+int
+main(int argc, char *argv[])
+{
+ glfs_t *fs = NULL;
+ int ret = 0;
+ struct glfs_object *root = NULL, *leaf = NULL;
+ glfs_fd_t *fd = NULL;
+ char *filename = "/ro-file";
+ struct stat sb = {
+ 0,
+ };
+ char *logfile = NULL;
+ char *volname = NULL;
+ char *hostname = NULL;
+ char buf[32] = "abcdefghijklmnopqrstuvwxyz012345";
+
+ fprintf(stderr, "Starting glfs_h_creat_open\n");
+
+ if (argc != 4) {
+ fprintf(stderr, "Invalid argument\n");
+ exit(1);
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+ logfile = argv[3];
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL\n");
+ return 1;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ LOG_ERR("glfs_set_volfile_server", ret);
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ LOG_ERR("glfs_set_logging", ret);
+
+ ret = glfs_init(fs);
+ LOG_ERR("glfs_init", ret);
+
+ sleep(2);
+ root = glfs_h_lookupat(fs, NULL, "/", &sb, 0);
+ if (!root) {
+ ret = -1;
+ LOG_ERR("glfs_h_lookupat root", ret);
+ }
+ leaf = glfs_h_lookupat(fs, root, filename, &sb, 0);
+ if (!leaf) {
+ ret = -1;
+ LOG_IF_NO_ERR("glfs_h_lookupat leaf", ret);
+ }
+
+ leaf = glfs_h_creat_open(fs, root, filename, O_RDONLY, 00444, &sb, &fd);
+ if (!leaf || !fd) {
+ ret = -1;
+ LOG_ERR("glfs_h_creat leaf", ret);
+ }
+ fprintf(stderr, "glfs_h_create_open leaf - %p\n", leaf);
+
+ ret = glfs_write(fd, buf, 32, 0);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_write: error writing to file %s, %s\n", filename,
+ strerror(errno));
+ goto out;
+ }
+
+ ret = glfs_h_getattrs(fs, leaf, &sb);
+ LOG_ERR("glfs_h_getattrs", ret);
+
+ if (sb.st_size != 32) {
+ fprintf(stderr, "glfs_write: post size mismatch\n");
+ goto out;
+ }
+
+ fprintf(stderr, "Successfully opened and written to a read-only file \n");
+out:
+ if (fd)
+ glfs_close(fd);
+
+ ret = glfs_fini(fs);
+ LOG_ERR("glfs_fini", ret);
+
+ fprintf(stderr, "End of libgfapi_fini\n");
+
+ exit(0);
+}
diff --git a/tests/basic/gfapi/glfs_h_creat_open.t b/tests/basic/gfapi/glfs_h_creat_open.t
new file mode 100755
index 00000000000..f24ae7395be
--- /dev/null
+++ b/tests/basic/gfapi/glfs_h_creat_open.t
@@ -0,0 +1,27 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+logdir=`gluster --print-logdir`
+
+TEST build_tester $(dirname $0)/glfs_h_creat_open.c -lgfapi
+
+TEST ./$(dirname $0)/glfs_h_creat_open $H0 $V0 $logdir/glfs.log
+
+cleanup_tester $(dirname $0)/glfs_h_creat_open
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/basic/gfapi/glfs_sysrq.c b/tests/basic/gfapi/glfs_sysrq.c
new file mode 100644
index 00000000000..13e06be6df2
--- /dev/null
+++ b/tests/basic/gfapi/glfs_sysrq.c
@@ -0,0 +1,60 @@
+/** glfs_sysrq.c
+ *
+ * Simple test application to run all glfs_syqrq() debugging calls.
+ *
+ * Usage: ./glfs_sysrq <host> <volume> <logfile>
+ */
+#include <errno.h>
+#include <stdio.h>
+
+#include <glusterfs/api/glfs.h>
+
+int
+main(int argc, char *argv[])
+{
+ /* cmdline arguments */
+ char *host = NULL;
+ char *volume = NULL;
+ char *logfile = NULL;
+
+ /* other variables */
+ glfs_t *fs = NULL;
+ int ret = 0;
+
+ if (argc != 4) {
+ fprintf(stderr, "Usage: %s <host> <volume> <logfile>\n", argv[0]);
+ return -1;
+ }
+
+ host = argv[1];
+ volume = argv[2];
+ logfile = argv[3];
+
+ fs = glfs_new(volume);
+ if (!fs) {
+ return -1;
+ }
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ if (ret < 0) {
+ return -1;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", host, 24007);
+ if (ret < 0) {
+ return -1;
+ }
+
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ return -1;
+ }
+
+ /* checking of the results is easier in the script running this test */
+ glfs_sysrq(fs, GLFS_SYSRQ_HELP);
+ glfs_sysrq(fs, GLFS_SYSRQ_STATEDUMP);
+
+ glfs_fini(fs);
+
+ return 0;
+}
diff --git a/tests/basic/gfapi/glfs_sysrq.t b/tests/basic/gfapi/glfs_sysrq.t
new file mode 100755
index 00000000000..d1a0e9bc248
--- /dev/null
+++ b/tests/basic/gfapi/glfs_sysrq.t
@@ -0,0 +1,39 @@
+#!/bin/bash
+#
+# Run glfs_sysrq, a gfapi applications calling all glfs_sysrq() commands.
+# Each command generates a specific log message, or something else that can be
+# tested for existance.
+#
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/brick1
+EXPECT 'Created' volinfo_field $V0 'Status'
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+logdir=$(gluster --print-logdir)
+
+# clear all statedumps
+cleanup_statedump
+TEST ! test -e $statedumpdir/*.dump.*
+# vim friendly command */
+
+build_tester $(dirname $0)/glfs_sysrq.c -lgfapi
+TEST $(dirname $0)/glfs_sysrq $H0 $V0 $logdir/glfs_sysrq.log
+
+# check for the help message in the log
+TEST grep -q '"(H)elp"' $logdir/glfs_sysrq.log
+
+# see if there is a statedump
+TEST test -e $statedumpdir/*.dump.*
+# vim friendly command */
+
+cleanup_tester $(dirname $0)/glfs_sysrq
+cleanup
diff --git a/tests/basic/gfapi/glfs_xreaddirplus_r.c b/tests/basic/gfapi/glfs_xreaddirplus_r.c
new file mode 100644
index 00000000000..0c4c79123eb
--- /dev/null
+++ b/tests/basic/gfapi/glfs_xreaddirplus_r.c
@@ -0,0 +1,242 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+#include <time.h>
+
+#define VALIDATE_AND_GOTO_LABEL_ON_ERROR(func, ret, label) \
+ do { \
+ if (ret < 0) { \
+ fprintf(stderr, "%s : returned error %d (%s)\n", func, ret, \
+ strerror(errno)); \
+ goto label; \
+ } \
+ } while (0)
+
+#define VALIDATE_BOOL_AND_GOTO_LABEL_ON_ERROR(func, bool_var, ret, label) \
+ do { \
+ if (!bool_var) { \
+ fprintf(stderr, "%s : returned error (%s)\n", func, \
+ strerror(errno)); \
+ ret = -1; \
+ goto label; \
+ } \
+ } while (0)
+
+#define MAX_FILES_CREATE 10
+#define MAXPATHNAME 512
+
+void
+assimilatetime(struct timespec *ts, struct timespec ts_st,
+ struct timespec ts_ed)
+{
+ if ((ts_ed.tv_nsec - ts_st.tv_nsec) < 0) {
+ ts->tv_sec += ts_ed.tv_sec - ts_st.tv_sec - 1;
+ ts->tv_nsec += 1000000000 + ts_ed.tv_nsec - ts_st.tv_nsec;
+ } else {
+ ts->tv_sec += ts_ed.tv_sec - ts_st.tv_sec;
+ ts->tv_nsec += ts_ed.tv_nsec - ts_st.tv_nsec;
+ }
+
+ if (ts->tv_nsec > 1000000000) {
+ ts->tv_nsec = ts->tv_nsec - 1000000000;
+ ts->tv_sec += 1;
+ }
+
+ return;
+}
+
+/*
+ * Returns '%' difference between ts1 & ts2
+ */
+int
+comparetime(struct timespec ts1, struct timespec ts2)
+{
+ uint64_t ts1_n, ts2_n;
+ int pct = 0;
+
+ ts1_n = (ts1.tv_sec * 1000000000) + ts1.tv_nsec;
+ ts2_n = (ts2.tv_sec * 1000000000) + ts2.tv_nsec;
+
+ pct = ((ts1_n - ts2_n) * 100) / ts1_n;
+
+ return pct;
+}
+
+int
+old_readdir(glfs_t *fs)
+{
+ struct glfs_object *root = NULL;
+ struct glfs_fd *fd = NULL;
+ struct stat *sb = NULL;
+ char buf[512];
+ struct dirent *entry = NULL;
+ int ret = -1;
+ struct glfs_object *glhandle = NULL;
+
+ if (!fs)
+ return -1;
+
+ root = glfs_h_lookupat(fs, NULL, "/", sb, 0);
+ VALIDATE_BOOL_AND_GOTO_LABEL_ON_ERROR("glfs_h_lookupat", !!root, ret, out);
+
+ fd = glfs_opendir(fs, "/");
+ VALIDATE_BOOL_AND_GOTO_LABEL_ON_ERROR("glfs_opendir", !!fd, ret, out);
+
+ while (glfs_readdir_r(fd, (struct dirent *)buf, &entry), entry) {
+ if (strcmp(entry->d_name, ".") && strcmp(entry->d_name, "..")) {
+ glhandle = glfs_h_lookupat(fs, root, "/", sb, 0);
+ VALIDATE_BOOL_AND_GOTO_LABEL_ON_ERROR("glfs_h_lookupat", !!glhandle,
+ ret, out);
+ }
+ }
+
+ glfs_closedir(fd);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+new_xreaddirplus(glfs_t *fs)
+{
+ struct glfs_fd *fd = NULL;
+ struct stat *sb = NULL;
+ int ret = -1;
+ uint32_t rflags = (GFAPI_XREADDIRP_STAT | GFAPI_XREADDIRP_HANDLE);
+ struct glfs_xreaddirp_stat *xstat = NULL;
+ struct dirent de;
+ struct dirent *pde = NULL;
+ struct glfs_object *glhandle = NULL;
+
+ if (!fs)
+ return -1;
+
+ fd = glfs_opendir(fs, "/");
+ VALIDATE_BOOL_AND_GOTO_LABEL_ON_ERROR("glfs_opendir", !!fd, ret, out);
+
+ ret = glfs_xreaddirplus_r(fd, rflags, &xstat, &de, &pde);
+ while (ret > 0 && pde != NULL) {
+ if (xstat) {
+ sb = glfs_xreaddirplus_get_stat(xstat);
+ VALIDATE_BOOL_AND_GOTO_LABEL_ON_ERROR("glfs_xreaddirplus_get_stat",
+ !!sb, ret, out);
+
+ if (strcmp(de.d_name, ".") && strcmp(de.d_name, "..")) {
+ glhandle = glfs_xreaddirplus_get_object(xstat);
+ VALIDATE_BOOL_AND_GOTO_LABEL_ON_ERROR(
+ "glfs_xreaddirplus_get_object", !!glhandle, ret, out);
+ }
+ }
+
+ if (xstat) {
+ glfs_free(xstat);
+ xstat = NULL;
+ }
+
+ ret = glfs_xreaddirplus_r(fd, rflags, &xstat, &de, &pde);
+
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_xreaddirp_r", ret, out);
+ }
+
+ if (xstat)
+ glfs_free(xstat);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+main(int argc, char *argv[])
+{
+ int ret = -1;
+ glfs_t *fs = NULL;
+ char *volname = NULL;
+ char *logfile = NULL;
+ char *hostname = NULL;
+ char *my_file = "file_";
+ char my_file_name[MAXPATHNAME];
+ uint32_t flags = O_RDWR | O_SYNC;
+ struct glfs_fd *fd = NULL;
+ int i = 0;
+ int pct = 0;
+ struct timespec timestamp = {0, 0}, st_timestamp, ed_timestamp;
+ struct timespec otimestamp = {0, 0}, ost_timestamp, oed_timestamp;
+
+ if (argc != 4) {
+ fprintf(stderr, "Invalid argument\n");
+ return 1;
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+ logfile = argv[3];
+
+ fs = glfs_new(volname);
+ VALIDATE_BOOL_AND_GOTO_LABEL_ON_ERROR("glfs_new", !!fs, ret, out);
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_set_volfile_server", ret, out);
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_set_logging", ret, out);
+
+ ret = glfs_init(fs);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_init", ret, out);
+
+ for (i = 0; i < MAX_FILES_CREATE; i++) {
+ sprintf(my_file_name, "%s%d", my_file, i);
+
+ fd = glfs_creat(fs, my_file_name, flags, 0644);
+ VALIDATE_BOOL_AND_GOTO_LABEL_ON_ERROR("glfs_creat", !!fd, ret, out);
+
+ glfs_close(fd);
+ }
+
+ /* measure performance using old readdir call and new xreaddirplus call and
+ * compare */
+ ret = clock_gettime(CLOCK_REALTIME, &ost_timestamp);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("clock_gettime", ret, out);
+
+ ret = old_readdir(fs);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("old_readdir", ret, out);
+
+ ret = clock_gettime(CLOCK_REALTIME, &oed_timestamp);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("clock_gettime", ret, out);
+
+ assimilatetime(&otimestamp, ost_timestamp, oed_timestamp);
+
+ printf("\tOverall time using readdir:\n\t\tSecs:%ld\n\t\tnSecs:%ld\n",
+ otimestamp.tv_sec, otimestamp.tv_nsec);
+
+ ret = clock_gettime(CLOCK_REALTIME, &st_timestamp);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("clock_gettime", ret, out);
+
+ ret = new_xreaddirplus(fs);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("new_xreaddirplus", ret, out);
+
+ ret = clock_gettime(CLOCK_REALTIME, &ed_timestamp);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("clock_gettime", ret, out);
+
+ assimilatetime(&timestamp, st_timestamp, ed_timestamp);
+
+ printf("\tOverall time using xreaddirplus:\n\t\tSecs:%ld\n\t\tnSecs:%ld\n",
+ timestamp.tv_sec, timestamp.tv_nsec);
+
+ pct = comparetime(otimestamp, timestamp);
+ printf("There is improvement by %d%%\n", pct);
+
+ ret = 0;
+out:
+ if (fs) {
+ ret = glfs_fini(fs);
+ if (ret)
+ fprintf(stderr, "glfs_fini(fs) returned %d\n", ret);
+ }
+
+ return ret;
+}
diff --git a/tests/basic/gfapi/glfs_xreaddirplus_r.t b/tests/basic/gfapi/glfs_xreaddirplus_r.t
new file mode 100755
index 00000000000..d21a00c66f2
--- /dev/null
+++ b/tests/basic/gfapi/glfs_xreaddirplus_r.t
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+logdir=`gluster --print-logdir`
+
+TEST build_tester $(dirname $0)/glfs_xreaddirplus_r.c -lgfapi
+
+TEST $(dirname $0)/glfs_xreaddirplus_r $H0 $V0 $logdir/glfs_xreaddirplus_r.log
+
+cleanup_tester $(dirname $0)/glfs_xreaddirplus_r
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
diff --git a/tests/basic/gfapi/glfsxmp-coverage.c b/tests/basic/gfapi/glfsxmp-coverage.c
new file mode 100644
index 00000000000..51650023efd
--- /dev/null
+++ b/tests/basic/gfapi/glfsxmp-coverage.c
@@ -0,0 +1,1900 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+#include <string.h>
+#include <time.h>
+
+#define TEST_STR_LEN 2048
+
+int
+test_dirops(glfs_t *fs)
+{
+ glfs_fd_t *fd = NULL;
+ char buf[2048];
+ struct dirent *entry = NULL;
+
+ fd = glfs_opendir(fs, "/");
+ if (!fd) {
+ fprintf(stderr, "/: %s\n", strerror(errno));
+ return -1;
+ }
+
+ fprintf(stderr, "Entries:\n");
+ while (glfs_readdir_r(fd, (struct dirent *)buf, &entry), entry) {
+ fprintf(stderr, "%s: %lu\n", entry->d_name, glfs_telldir(fd));
+ }
+
+ /* Should internally call fsyncdir(), hopefully */
+ glfs_fsync(fd, NULL, NULL);
+
+ glfs_closedir(fd);
+ return 0;
+}
+
+int
+test_xattr(glfs_t *fs)
+{
+ char *filename = "/filename2";
+ char *linkfile = "/linkfile";
+ glfs_fd_t *fd = NULL;
+ char buf[512];
+ char *ptr;
+ int ret;
+
+ ret = glfs_setxattr(fs, filename, "user.testkey", "testval", 8, 0);
+ fprintf(stderr, "setxattr(%s): %d (%s)\n", filename, ret, strerror(errno));
+
+ ret = glfs_setxattr(fs, filename, "user.testkey2", "testval", 8, 0);
+ fprintf(stderr, "setxattr(%s): %d (%s)\n", filename, ret, strerror(errno));
+
+ ret = glfs_getxattr(fs, filename, "user.testkey", buf, 512);
+ fprintf(stderr, "getxattr(%s): %d (%s)\n", filename, ret, strerror(errno));
+ if (ret < 0)
+ return -1;
+
+ ret = glfs_listxattr(fs, filename, buf, 512);
+ fprintf(stderr, "listxattr(%s): %d (%s)\n", filename, ret, strerror(errno));
+ if (ret < 0)
+ return -1;
+
+ ret = glfs_symlink(fs, "filename", linkfile);
+ fprintf(stderr, "symlink(%s %s): %s\n", filename, linkfile,
+ strerror(errno));
+ if (ret < 0)
+ return -1;
+
+ ret = glfs_readlink(fs, linkfile, buf, 512);
+ fprintf(stderr, "readlink(%s) : %d (%s)\n", filename, ret, strerror(errno));
+ if (ret < 0)
+ return -1;
+
+ ret = glfs_lsetxattr(fs, filename, "user.testkey3", "testval", 8, 0);
+ fprintf(stderr, "lsetxattr(%s) : %d (%s)\n", linkfile, ret,
+ strerror(errno));
+ if (ret < 0)
+ return -1;
+
+ ret = glfs_llistxattr(fs, linkfile, buf, 512);
+ fprintf(stderr, "llistxattr(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+ if (ret < 0)
+ return -1;
+
+ ret = glfs_lgetxattr(fs, filename, "user.testkey3", buf, 512);
+ fprintf(stderr, "lgetxattr(%s): %d (%s)\n", linkfile, ret, strerror(errno));
+ if (ret < 0)
+ return -1;
+
+ for (ptr = buf; ptr < buf + ret; ptr++) {
+ printf("key=%s\n", ptr);
+ ptr += strlen(ptr);
+ }
+
+ ret = glfs_removexattr(fs, filename, "user.testkey2");
+ fprintf(stderr, "removexattr(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+
+ fd = glfs_open(fs, filename, O_RDWR);
+ fprintf(stderr, "open(%s): (%p) %s\n", filename, fd, strerror(errno));
+
+ ret = glfs_fsetxattr(fd, "user.testkey2", "testval", 8, 0);
+ fprintf(stderr, "fsetxattr(%s): %d (%s)\n", filename, ret, strerror(errno));
+
+ ret = glfs_fgetxattr(fd, "user.testkey2", buf, 512);
+ fprintf(stderr, "fgetxattr(%s): %d (%s)\n", filename, ret, strerror(errno));
+
+ ret = glfs_flistxattr(fd, buf, 512);
+ fprintf(stderr, "flistxattr(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+ if (ret < 0)
+ return -1;
+
+ for (ptr = buf; ptr < buf + ret; ptr++) {
+ printf("key=%s\n", ptr);
+ ptr += strlen(ptr);
+ }
+
+ ret = glfs_fremovexattr(fd, "user.testkey2");
+ fprintf(stderr, "fremovexattr(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+
+ glfs_close(fd);
+
+ return 0;
+}
+
+int
+test_chdir(glfs_t *fs)
+{
+ int ret = -1;
+ char *dir = "/dir";
+ char *topdir = "/topdir";
+ char *linkdir = "/linkdir";
+ char *linkdir2 = "/linkdir2";
+ char *subdir = "./subdir";
+ char *respath = NULL;
+ char pathbuf[4096];
+
+ ret = glfs_mkdir(fs, topdir, 0755);
+ fprintf(stderr, "mkdir(%s): %s\n", topdir, strerror(errno));
+ if (ret)
+ return -1;
+
+ ret = glfs_mkdir(fs, dir, 0755);
+ fprintf(stderr, "mkdir(%s): %s\n", dir, strerror(errno));
+ if (ret)
+ return -1;
+
+ respath = glfs_getcwd(fs, pathbuf, 4096);
+ fprintf(stdout, "getcwd() = %s\n", respath);
+
+ ret = glfs_symlink(fs, "topdir", linkdir);
+ if (ret) {
+ fprintf(stderr, "symlink(%s, %s): %s\n", topdir, linkdir,
+ strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_chdir(fs, linkdir);
+ if (ret) {
+ fprintf(stderr, "chdir(%s): %s\n", linkdir, strerror(errno));
+ return -1;
+ }
+
+ respath = glfs_getcwd(fs, pathbuf, 4096);
+ fprintf(stdout, "getcwd() = %s\n", respath);
+
+ respath = glfs_realpath(fs, subdir, pathbuf);
+ if (respath) {
+ fprintf(stderr, "realpath(%s) worked unexpectedly: %s\n", subdir,
+ respath);
+ return -1;
+ }
+
+ ret = glfs_mkdir(fs, subdir, 0755);
+ if (ret) {
+ fprintf(stderr, "mkdir(%s): %s\n", subdir, strerror(errno));
+ return -1;
+ }
+
+ respath = glfs_realpath(fs, subdir, pathbuf);
+ if (!respath) {
+ fprintf(stderr, "realpath(%s): %s\n", subdir, strerror(errno));
+ } else {
+ fprintf(stdout, "realpath(%s) = %s\n", subdir, respath);
+ }
+
+ ret = glfs_chdir(fs, subdir);
+ if (ret) {
+ fprintf(stderr, "chdir(%s): %s\n", subdir, strerror(errno));
+ return -1;
+ }
+
+ respath = glfs_getcwd(fs, pathbuf, 4096);
+ fprintf(stdout, "getcwd() = %s\n", respath);
+
+ respath = glfs_realpath(fs, "/linkdir/subdir", pathbuf);
+ if (!respath) {
+ fprintf(stderr, "realpath(/linkdir/subdir): %s\n", strerror(errno));
+ } else {
+ fprintf(stdout, "realpath(/linkdir/subdir) = %s\n", respath);
+ }
+
+ return 0;
+}
+
+#ifdef DEBUG
+static void
+peek_stat(struct stat *sb)
+{
+ printf("Dumping stat information:\n");
+ printf("File type: ");
+
+ switch (sb->st_mode & S_IFMT) {
+ case S_IFBLK:
+ printf("block device\n");
+ break;
+ case S_IFCHR:
+ printf("character device\n");
+ break;
+ case S_IFDIR:
+ printf("directory\n");
+ break;
+ case S_IFIFO:
+ printf("FIFO/pipe\n");
+ break;
+ case S_IFLNK:
+ printf("symlink\n");
+ break;
+ case S_IFREG:
+ printf("regular file\n");
+ break;
+ case S_IFSOCK:
+ printf("socket\n");
+ break;
+ default:
+ printf("unknown?\n");
+ break;
+ }
+
+ printf("I-node number: %ld\n", (long)sb->st_ino);
+
+ printf("Mode: %lo (octal)\n",
+ (unsigned long)sb->st_mode);
+
+ printf("Link count: %ld\n", (long)sb->st_nlink);
+ printf("Ownership: UID=%ld GID=%ld\n", (long)sb->st_uid,
+ (long)sb->st_gid);
+
+ printf("Preferred I/O block size: %ld bytes\n", (long)sb->st_blksize);
+ printf("File size: %lld bytes\n", (long long)sb->st_size);
+ printf("Blocks allocated: %lld\n", (long long)sb->st_blocks);
+
+ printf("Last status change: %s", ctime(&sb->st_ctime));
+ printf("Last file access: %s", ctime(&sb->st_atime));
+ printf("Last file modification: %s", ctime(&sb->st_mtime));
+
+ return;
+}
+
+static void
+peek_handle(unsigned char *glid)
+{
+ int i;
+
+ for (i = 0; i < GFAPI_HANDLE_LENGTH; i++) {
+ printf(":%02x:", glid[i]);
+ }
+ printf("\n");
+}
+#else /* DEBUG */
+static void
+peek_stat(struct stat *sb)
+{
+ return;
+}
+
+static void
+peek_handle(unsigned char *id)
+{
+ return;
+}
+#endif /* DEBUG */
+
+glfs_t *fs = NULL;
+char *full_parent_name = "/testdir", *parent_name = "testdir";
+
+void
+test_h_unlink(void)
+{
+ char *my_dir = "unlinkdir";
+ char *my_file = "file.txt";
+ char *my_subdir = "dir1";
+ struct glfs_object *parent = NULL, *leaf = NULL, *dir = NULL,
+ *subdir = NULL, *subleaf = NULL;
+ struct stat sb;
+ int ret;
+
+ printf("glfs_h_unlink tests: In Progress\n");
+
+ /* Prepare tests */
+ parent = glfs_h_lookupat(fs, NULL, full_parent_name, &sb, 0);
+ if (parent == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, NULL, strerror(errno));
+ printf("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ dir = glfs_h_mkdir(fs, parent, my_dir, 0755, &sb);
+ if (dir == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ my_dir, parent, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ leaf = glfs_h_creat(fs, dir, my_file, O_CREAT, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf(stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, dir, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ subdir = glfs_h_mkdir(fs, dir, my_subdir, 0755, &sb);
+ if (subdir == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ my_subdir, dir, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ subleaf = glfs_h_creat(fs, subdir, my_file, O_CREAT, 0644, &sb);
+ if (subleaf == NULL) {
+ fprintf(stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, subdir, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ /* unlink non empty directory */
+ ret = glfs_h_unlink(fs, dir, my_subdir);
+ if ((ret && errno != ENOTEMPTY) || (ret == 0)) {
+ fprintf(stderr,
+ "glfs_h_unlink: error unlinking %s: it is non empty: %s\n",
+ my_subdir, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ /* unlink regular file */
+ ret = glfs_h_unlink(fs, subdir, my_file);
+ if (ret) {
+ fprintf(stderr, "glfs_h_unlink: error unlinking %s: from (%p),%s\n",
+ my_file, subdir, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ /* unlink directory */
+ ret = glfs_h_unlink(fs, dir, my_subdir);
+ if (ret) {
+ fprintf(stderr, "glfs_h_unlink: error unlinking %s: from (%p),%s\n",
+ my_subdir, dir, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ /* unlink regular file */
+ ret = glfs_h_unlink(fs, dir, my_file);
+ if (ret) {
+ fprintf(stderr, "glfs_h_unlink: error unlinking %s: from (%p),%s\n",
+ my_file, dir, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ /* unlink non-existent regular file */
+ ret = glfs_h_unlink(fs, dir, my_file);
+ if ((ret && errno != ENOENT) || (ret == 0)) {
+ fprintf(stderr,
+ "glfs_h_unlink: error unlinking non-existent %s: invalid errno "
+ ",%d, %s\n",
+ my_file, ret, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ /* unlink non-existent directory */
+ ret = glfs_h_unlink(fs, dir, my_subdir);
+ if ((ret && errno != ENOENT) || (ret == 0)) {
+ fprintf(stderr,
+ "glfs_h_unlink: error unlinking non-existent %s: invalid "
+ "errno ,%d, %s\n",
+ my_subdir, ret, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ /* unlink directory */
+ ret = glfs_h_unlink(fs, parent, my_dir);
+ if (ret) {
+ fprintf(stderr, "glfs_h_unlink: error unlinking %s: from (%p),%s\n",
+ my_dir, dir, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ printf("glfs_h_unlink tests: PASSED\n");
+
+out:
+ if (dir)
+ glfs_h_close(dir);
+ if (leaf)
+ glfs_h_close(leaf);
+ if (subdir)
+ glfs_h_close(subdir);
+ if (subleaf)
+ glfs_h_close(subleaf);
+ if (parent)
+ glfs_h_close(parent);
+
+ return;
+}
+
+void
+test_h_getsetattrs(void)
+{
+ char *my_dir = "attrdir";
+ char *my_file = "attrfile.txt";
+ struct glfs_object *parent = NULL, *leaf = NULL, *dir = NULL;
+ struct stat sb, retsb;
+ int ret, valid;
+ struct timespec timestamp;
+
+ printf("glfs_h_getattrs and setattrs tests: In Progress\n");
+
+ /* Prepare tests */
+ parent = glfs_h_lookupat(fs, NULL, full_parent_name, &sb, 0);
+ if (parent == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, NULL, strerror(errno));
+ printf("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ dir = glfs_h_mkdir(fs, parent, my_dir, 0755, &sb);
+ if (dir == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ my_dir, parent, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ leaf = glfs_h_creat(fs, dir, my_file, O_CREAT, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf(stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, dir, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ ret = glfs_h_getattrs(fs, dir, &retsb);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_getattrs: error %s: from (%p),%s\n", my_dir,
+ dir, strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&retsb);
+ /* TODO: Compare stat information */
+
+ retsb.st_mode = 00666;
+ retsb.st_uid = 1000;
+ retsb.st_gid = 1001;
+ ret = clock_gettime(CLOCK_REALTIME, &timestamp);
+ if (ret != 0) {
+ fprintf(stderr, "clock_gettime: error %s\n", strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+ retsb.st_atim = timestamp;
+ retsb.st_mtim = timestamp;
+ valid = GFAPI_SET_ATTR_MODE | GFAPI_SET_ATTR_UID | GFAPI_SET_ATTR_GID |
+ GFAPI_SET_ATTR_ATIME | GFAPI_SET_ATTR_MTIME;
+ peek_stat(&retsb);
+
+ ret = glfs_h_setattrs(fs, dir, &retsb, valid);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_setattrs: error %s: from (%p),%s\n", my_dir,
+ dir, strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ memset(&retsb, 0, sizeof(struct stat));
+ ret = glfs_h_stat(fs, dir, &retsb);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_stat: error %s: from (%p),%s\n", my_dir, dir,
+ strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&retsb);
+
+ printf("glfs_h_getattrs and setattrs tests: PASSED\n");
+out:
+ if (parent)
+ glfs_h_close(parent);
+ if (leaf)
+ glfs_h_close(leaf);
+ if (dir)
+ glfs_h_close(dir);
+
+ return;
+}
+
+void
+test_h_truncate(void)
+{
+ char *my_dir = "truncatedir";
+ char *my_file = "file.txt";
+ struct glfs_object *root = NULL, *parent = NULL, *leaf = NULL;
+ struct stat sb;
+ glfs_fd_t *fd = NULL;
+ char buf[32];
+ off_t offset = 0;
+ int ret = 0;
+
+ printf("glfs_h_truncate tests: In Progress\n");
+
+ /* Prepare tests */
+ root = glfs_h_lookupat(fs, NULL, full_parent_name, &sb, 0);
+ if (root == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, NULL, strerror(errno));
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ parent = glfs_h_mkdir(fs, root, my_dir, 0755, &sb);
+ if (parent == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ my_dir, root, strerror(errno));
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ leaf = glfs_h_creat(fs, parent, my_file, O_CREAT, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf(stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, parent, strerror(errno));
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ fd = glfs_h_open(fs, leaf, O_RDWR);
+ if (fd == NULL) {
+ fprintf(stderr, "glfs_h_open: error on open of %s: %s\n", my_file,
+ strerror(errno));
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+
+ memcpy(buf, "abcdefghijklmnopqrstuvwxyz012345", 32);
+ ret = glfs_write(fd, buf, 32, 0);
+
+ /* run tests */
+ /* truncate lower */
+ offset = 30;
+ ret = glfs_h_truncate(fs, leaf, offset);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_truncate: error creating %s: from (%p),%s\n",
+ my_file, parent, strerror(errno));
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ ret = glfs_h_getattrs(fs, leaf, &sb);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_getattrs: error for %s (%p),%s\n", my_file,
+ leaf, strerror(errno));
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ if (sb.st_size != offset) {
+ fprintf(stderr, "glfs_h_truncate: post size mismatch\n");
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+
+ /* truncate higher */
+ offset = 32;
+ ret = glfs_h_truncate(fs, leaf, offset);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_truncate: error creating %s: from (%p),%s\n",
+ my_file, parent, strerror(errno));
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ ret = glfs_h_getattrs(fs, leaf, &sb);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_getattrs: error for %s (%p),%s\n", my_file,
+ leaf, strerror(errno));
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ if (sb.st_size != offset) {
+ fprintf(stderr, "glfs_h_truncate: post size mismatch\n");
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+
+ /* truncate equal */
+ offset = 30;
+ ret = glfs_h_truncate(fs, leaf, offset);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_truncate: error creating %s: from (%p),%s\n",
+ my_file, parent, strerror(errno));
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ ret = glfs_h_getattrs(fs, leaf, &sb);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_getattrs: error for %s (%p),%s\n", my_file,
+ leaf, strerror(errno));
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ if (sb.st_size != offset) {
+ fprintf(stderr, "glfs_h_truncate: post size mismatch\n");
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+
+ printf("glfs_h_truncate tests: PASSED\n");
+out:
+ if (fd)
+ glfs_close(fd);
+ if (root)
+ glfs_h_close(root);
+ if (parent)
+ glfs_h_close(parent);
+ if (leaf)
+ glfs_h_close(leaf);
+
+ return;
+}
+
+void
+test_h_links(void)
+{
+ char *my_dir = "linkdir";
+ char *my_file = "file.txt";
+ char *my_symlnk = "slnk.txt";
+ char *my_lnk = "lnk.txt";
+ char *linksrc_dir = "dir1";
+ char *linktgt_dir = "dir2";
+ struct glfs_object *root = NULL, *parent = NULL, *leaf = NULL,
+ *dirsrc = NULL, *dirtgt = NULL, *dleaf = NULL;
+ struct glfs_object *ln1 = NULL;
+ struct stat sb;
+ int ret;
+ char *buf = NULL;
+
+ printf("glfs_h_link(s) tests: In Progress\n");
+
+ /* Prepare tests */
+ root = glfs_h_lookupat(fs, NULL, full_parent_name, &sb, 0);
+ if (root == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, NULL, strerror(errno));
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ parent = glfs_h_mkdir(fs, root, my_dir, 0755, &sb);
+ if (parent == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ my_dir, root, strerror(errno));
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ leaf = glfs_h_creat(fs, parent, my_file, O_CREAT, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf(stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, parent, strerror(errno));
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ dirsrc = glfs_h_mkdir(fs, parent, linksrc_dir, 0755, &sb);
+ if (dirsrc == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ linksrc_dir, parent, strerror(errno));
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ dirtgt = glfs_h_mkdir(fs, parent, linktgt_dir, 0755, &sb);
+ if (dirtgt == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ linktgt_dir, parent, strerror(errno));
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ dleaf = glfs_h_creat(fs, dirsrc, my_file, O_CREAT, 0644, &sb);
+ if (dleaf == NULL) {
+ fprintf(stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, dirsrc, strerror(errno));
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ /* run tests */
+ /* sym link: /testdir/linkdir/file.txt to ./slnk.txt */
+ ln1 = glfs_h_symlink(fs, parent, my_symlnk, "./file.txt", &sb);
+ if (ln1 == NULL) {
+ fprintf(stderr, "glfs_h_symlink: error creating %s: from (%p),%s\n",
+ my_symlnk, parent, strerror(errno));
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ buf = calloc(1024, sizeof(char));
+ if (buf == NULL) {
+ fprintf(stderr, "Error allocating memory\n");
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+
+ ret = glfs_h_readlink(fs, ln1, buf, 1024);
+ if (ret <= 0) {
+ fprintf(stderr, "glfs_h_readlink: error reading %s: from (%p),%s\n",
+ my_symlnk, ln1, strerror(errno));
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ if (!(strncmp(buf, my_symlnk, strlen(my_symlnk)))) {
+ fprintf(stderr,
+ "glfs_h_readlink: error mismatch in link name: actual %s: "
+ "retrieved %s\n",
+ my_symlnk, buf);
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+
+ /* link: /testdir/linkdir/file.txt to ./lnk.txt */
+ ret = glfs_h_link(fs, leaf, parent, my_lnk);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_link: error creating %s: from (%p),%s\n",
+ my_lnk, parent, strerror(errno));
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ /* TODO: Should write content to a file and read from the link */
+
+ /* link: /testdir/linkdir/dir1/file.txt to ../dir2/slnk.txt */
+ ret = glfs_h_link(fs, dleaf, dirtgt, my_lnk);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_link: error creating %s: from (%p),%s\n",
+ my_lnk, dirtgt, strerror(errno));
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ /* TODO: Should write content to a file and read from the link */
+
+ printf("glfs_h_link(s) tests: PASSED\n");
+
+out:
+ if (root)
+ glfs_h_close(root);
+ if (parent)
+ glfs_h_close(parent);
+ if (leaf)
+ glfs_h_close(leaf);
+ if (dirsrc)
+ glfs_h_close(dirsrc);
+ if (dirtgt)
+ glfs_h_close(dirtgt);
+ if (dleaf)
+ glfs_h_close(dleaf);
+ if (ln1)
+ glfs_h_close(ln1);
+ if (buf)
+ free(buf);
+
+ return;
+}
+
+void
+test_h_rename(void)
+{
+ char *my_dir = "renamedir";
+ char *my_file = "file.txt";
+ char *src_dir = "dir1";
+ char *tgt_dir = "dir2";
+ struct glfs_object *root = NULL, *parent = NULL, *leaf = NULL,
+ *dirsrc = NULL, *dirtgt = NULL, *dleaf = NULL;
+ struct stat sb;
+ int ret;
+
+ printf("glfs_h_rename tests: In Progress\n");
+
+ /* Prepare tests */
+ root = glfs_h_lookupat(fs, NULL, full_parent_name, &sb, 0);
+ if (root == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, NULL, strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ parent = glfs_h_mkdir(fs, root, my_dir, 0755, &sb);
+ if (parent == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ my_dir, root, strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ leaf = glfs_h_creat(fs, parent, my_file, O_CREAT, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf(stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, parent, strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ dirsrc = glfs_h_mkdir(fs, parent, src_dir, 0755, &sb);
+ if (dirsrc == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ src_dir, parent, strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ dirtgt = glfs_h_mkdir(fs, parent, tgt_dir, 0755, &sb);
+ if (dirtgt == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ tgt_dir, parent, strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ dleaf = glfs_h_creat(fs, dirsrc, my_file, O_CREAT, 0644, &sb);
+ if (dleaf == NULL) {
+ fprintf(stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, dirsrc, strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ /* run tests */
+ /* Rename file.txt -> file1.txt */
+ ret = glfs_h_rename(fs, parent, "file.txt", parent, "file1.txt");
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_rename: error renaming %s to %s (%s)\n",
+ "file.txt", "file1.txt", strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+
+ /* rename dir1/file.txt -> file.txt */
+ ret = glfs_h_rename(fs, dirsrc, "file.txt", parent, "file.txt");
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_rename: error renaming %s/%s to %s (%s)\n",
+ src_dir, "file.txt", "file.txt", strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+
+ /* rename file1.txt -> file.txt (exists) */
+ ret = glfs_h_rename(fs, parent, "file1.txt", parent, "file.txt");
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_rename: error renaming %s to %s (%s)\n",
+ "file.txt", "file.txt", strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+
+ /* rename dir1 -> dir3 */
+ ret = glfs_h_rename(fs, parent, "dir1", parent, "dir3");
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_rename: error renaming %s to %s (%s)\n", "dir1",
+ "dir3", strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+
+ /* rename dir2 ->dir3 (exists) */
+ ret = glfs_h_rename(fs, parent, "dir2", parent, "dir3");
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_rename: error renaming %s to %s (%s)\n", "dir2",
+ "dir3", strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+
+ /* rename file.txt -> dir3 (fail) */
+ ret = glfs_h_rename(fs, parent, "file.txt", parent, "dir3");
+ if (ret == 0) {
+ fprintf(stderr, "glfs_h_rename: NO error renaming %s to %s (%s)\n",
+ "file.txt", "dir3", strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+
+ /* rename dir3 -> file.txt (fail) */
+ ret = glfs_h_rename(fs, parent, "dir3", parent, "file.txt");
+ if (ret == 0) {
+ fprintf(stderr, "glfs_h_rename: NO error renaming %s to %s (%s)\n",
+ "dir3", "file.txt", strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+
+ printf("glfs_h_rename tests: PASSED\n");
+
+out:
+ if (root)
+ glfs_h_close(root);
+ if (parent)
+ glfs_h_close(parent);
+ if (leaf)
+ glfs_h_close(leaf);
+ if (dirsrc)
+ glfs_h_close(dirsrc);
+ if (dirtgt)
+ glfs_h_close(dirtgt);
+ if (dleaf)
+ glfs_h_close(dleaf);
+
+ return;
+}
+
+void
+assimilatetime(struct timespec *ts, struct timespec ts_st,
+ struct timespec ts_ed)
+{
+ if ((ts_ed.tv_nsec - ts_st.tv_nsec) < 0) {
+ ts->tv_sec += ts_ed.tv_sec - ts_st.tv_sec - 1;
+ ts->tv_nsec += 1000000000 + ts_ed.tv_nsec - ts_st.tv_nsec;
+ } else {
+ ts->tv_sec += ts_ed.tv_sec - ts_st.tv_sec;
+ ts->tv_nsec += ts_ed.tv_nsec - ts_st.tv_nsec;
+ }
+
+ if (ts->tv_nsec > 1000000000) {
+ ts->tv_nsec = ts->tv_nsec - 1000000000;
+ ts->tv_sec += 1;
+ }
+
+ return;
+}
+
+#define MAX_FILES_CREATE 10
+#define MAXPATHNAME 512
+void
+test_h_performance(void)
+{
+ char *my_dir = "perftest", *full_dir_path = "/testdir/perftest";
+ char *my_file = "file_", my_file_name[MAXPATHNAME];
+ struct glfs_object *parent = NULL, *leaf = NULL, *dir = NULL;
+ struct stat sb;
+ int ret, i;
+ struct glfs_fd *fd;
+ struct timespec c_ts = {0, 0}, c_ts_st, c_ts_ed;
+ struct timespec o_ts = {0, 0}, o_ts_st, o_ts_ed;
+
+ printf("glfs_h_performance tests: In Progress\n");
+
+ /* Prepare tests */
+ parent = glfs_h_lookupat(fs, NULL, full_parent_name, &sb, 0);
+ if (parent == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, NULL, strerror(errno));
+ printf("glfs_h_performance tests: FAILED\n");
+ goto out;
+ }
+
+ dir = glfs_h_mkdir(fs, parent, my_dir, 0755, &sb);
+ if (dir == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ my_dir, parent, strerror(errno));
+ printf("glfs_h_performance tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ /* create performance */
+ ret = clock_gettime(CLOCK_REALTIME, &o_ts_st);
+ if (ret != 0) {
+ fprintf(stderr, "clock_gettime: error %s\n", strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ for (i = 0; i < MAX_FILES_CREATE; i++) {
+ sprintf(my_file_name, "%s%d", my_file, i);
+
+ ret = clock_gettime(CLOCK_REALTIME, &c_ts_st);
+ if (ret != 0) {
+ fprintf(stderr, "clock_gettime: error %s\n", strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ leaf = glfs_h_lookupat(fs, dir, my_file_name, &sb, 0);
+ if (leaf != NULL) {
+ fprintf(stderr, "glfs_h_lookup: exists %s\n", my_file_name);
+ printf("glfs_h_performance tests: FAILED\n");
+ goto out;
+ }
+
+ leaf = glfs_h_creat(fs, dir, my_file_name, O_CREAT, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf(stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, dir, strerror(errno));
+ printf("glfs_h_performance tests: FAILED\n");
+ goto out;
+ }
+
+ ret = clock_gettime(CLOCK_REALTIME, &c_ts_ed);
+ if (ret != 0) {
+ fprintf(stderr, "clock_gettime: error %s\n", strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ assimilatetime(&c_ts, c_ts_st, c_ts_ed);
+ glfs_h_close(leaf);
+ leaf = NULL;
+ }
+
+ ret = clock_gettime(CLOCK_REALTIME, &o_ts_ed);
+ if (ret != 0) {
+ fprintf(stderr, "clock_gettime: error %s\n", strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ assimilatetime(&o_ts, o_ts_st, o_ts_ed);
+
+ printf("Creation performance (handle based):\n\t# empty files:%d\n",
+ MAX_FILES_CREATE);
+ printf("\tOverall time:\n\t\tSecs:%ld\n\t\tnSecs:%ld\n", o_ts.tv_sec,
+ o_ts.tv_nsec);
+ printf("\tcreate call time time:\n\t\tSecs:%ld\n\t\tnSecs:%ld\n",
+ c_ts.tv_sec, c_ts.tv_nsec);
+
+ /* create using path */
+ c_ts.tv_sec = o_ts.tv_sec = 0;
+ c_ts.tv_nsec = o_ts.tv_nsec = 0;
+
+ sprintf(my_file_name, "%s1", full_dir_path);
+ ret = glfs_mkdir(fs, my_file_name, 0755);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_mkdir: error creating %s: from (%p),%s\n", my_dir,
+ parent, strerror(errno));
+ printf("glfs_h_performance tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ ret = clock_gettime(CLOCK_REALTIME, &o_ts_st);
+ if (ret != 0) {
+ fprintf(stderr, "clock_gettime: error %s\n", strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ for (i = 0; i < MAX_FILES_CREATE; i++) {
+ sprintf(my_file_name, "%s1/%sn%d", full_dir_path, my_file, i);
+
+ ret = clock_gettime(CLOCK_REALTIME, &c_ts_st);
+ if (ret != 0) {
+ fprintf(stderr, "clock_gettime: error %s\n", strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ ret = glfs_stat(fs, my_file_name, &sb);
+ if (ret == 0) {
+ fprintf(stderr, "glfs_stat: exists %s\n", my_file_name);
+ printf("glfs_h_performance tests: FAILED\n");
+ goto out;
+ }
+
+ fd = glfs_creat(fs, my_file_name, O_CREAT, 0644);
+ if (fd == NULL) {
+ fprintf(stderr, "glfs_creat: error creating %s: from (%p),%s\n",
+ my_file, dir, strerror(errno));
+ printf("glfs_h_performance tests: FAILED\n");
+ goto out;
+ }
+
+ ret = clock_gettime(CLOCK_REALTIME, &c_ts_ed);
+ if (ret != 0) {
+ fprintf(stderr, "clock_gettime: error %s\n", strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ assimilatetime(&c_ts, c_ts_st, c_ts_ed);
+ glfs_close(fd);
+ }
+
+ ret = clock_gettime(CLOCK_REALTIME, &o_ts_ed);
+ if (ret != 0) {
+ fprintf(stderr, "clock_gettime: error %s\n", strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ assimilatetime(&o_ts, o_ts_st, o_ts_ed);
+
+ printf("Creation performance (path based):\n\t# empty files:%d\n",
+ MAX_FILES_CREATE);
+ printf("\tOverall time:\n\t\tSecs:%ld\n\t\tnSecs:%ld\n", o_ts.tv_sec,
+ o_ts.tv_nsec);
+ printf("\tcreate call time time:\n\t\tSecs:%ld\n\t\tnSecs:%ld\n",
+ c_ts.tv_sec, c_ts.tv_nsec);
+out:
+ return;
+}
+
+int
+test_handleops(int argc, char *argv[])
+{
+ int ret = 0;
+ glfs_fd_t *fd = NULL;
+ struct stat sb = {
+ 0,
+ };
+ struct glfs_object *root = NULL, *parent = NULL, *leaf = NULL, *tmp = NULL;
+ char readbuf[32], writebuf[32];
+ unsigned char leaf_handle[GFAPI_HANDLE_LENGTH];
+
+ char *full_leaf_name = "/testdir/testfile.txt", *leaf_name = "testfile.txt",
+ *relative_leaf_name = "testdir/testfile.txt";
+ char *leaf_name1 = "testfile1.txt";
+ char *full_newparent_name = "/testdir/dir1", *newparent_name = "dir1";
+ char *full_newnod_name = "/testdir/nod1", *newnod_name = "nod1";
+
+ /* Initialize test area */
+ ret = glfs_mkdir(fs, full_parent_name, 0755);
+ if (ret != 0 && errno != EEXIST) {
+ fprintf(stderr, "%s: (%p) %s\n", full_parent_name, fd, strerror(errno));
+ printf("Test initialization failed on volume %s\n", argv[1]);
+ goto out;
+ } else if (ret != 0) {
+ printf("Found test directory %s to be existing\n", full_parent_name);
+ printf("Cleanup test directory and restart tests\n");
+ goto out;
+ }
+
+ fd = glfs_creat(fs, full_leaf_name, O_CREAT, 0644);
+ if (fd == NULL) {
+ fprintf(stderr, "%s: (%p) %s\n", full_leaf_name, fd, strerror(errno));
+ printf("Test initialization failed on volume %s\n", argv[1]);
+ goto out;
+ }
+ glfs_close(fd);
+
+ printf("Initialized the test area, within volume %s\n", argv[1]);
+
+ /* Handle based APIs test area */
+
+ /* glfs_lookupat test */
+ printf("glfs_h_lookupat tests: In Progress\n");
+ /* start at root of the volume */
+ root = glfs_h_lookupat(fs, NULL, "/", &sb, 0);
+ if (root == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n", "/",
+ NULL, strerror(errno));
+ printf("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ /* lookup a parent within root */
+ parent = glfs_h_lookupat(fs, root, parent_name, &sb, 0);
+ if (parent == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ parent_name, root, strerror(errno));
+ printf("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ /* lookup a leaf/child within the parent */
+ leaf = glfs_h_lookupat(fs, parent, leaf_name, &sb, 0);
+ if (leaf == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ leaf_name, parent, strerror(errno));
+ printf("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ /* reset */
+ glfs_h_close(root);
+ root = NULL;
+ glfs_h_close(leaf);
+ leaf = NULL;
+ glfs_h_close(parent);
+ parent = NULL;
+
+ /* check absolute paths */
+ root = glfs_h_lookupat(fs, NULL, "/", &sb, 0);
+ if (root == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n", "/",
+ NULL, strerror(errno));
+ printf("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ parent = glfs_h_lookupat(fs, NULL, full_parent_name, &sb, 0);
+ if (parent == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, root, strerror(errno));
+ printf("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ leaf = glfs_h_lookupat(fs, NULL, full_leaf_name, &sb, 0);
+ if (leaf == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_leaf_name, parent, strerror(errno));
+ printf("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ /* reset */
+ glfs_h_close(leaf);
+ leaf = NULL;
+
+ /* check multiple component paths */
+ leaf = glfs_h_lookupat(fs, root, relative_leaf_name, &sb, 0);
+ if (leaf == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ relative_leaf_name, parent, strerror(errno));
+ goto out;
+ }
+ peek_stat(&sb);
+
+ /* reset */
+ glfs_h_close(root);
+ root = NULL;
+ glfs_h_close(parent);
+ parent = NULL;
+
+ /* check symlinks in path */
+
+ /* TODO: -ve test cases */
+ /* parent invalid
+ * path invalid
+ * path does not exist after some components
+ * no parent, but relative path
+ * parent and full path? -ve?
+ */
+
+ printf("glfs_h_lookupat tests: PASSED\n");
+
+ /* glfs_openat test */
+ printf("glfs_h_open tests: In Progress\n");
+ fd = glfs_h_open(fs, leaf, O_RDWR);
+ if (fd == NULL) {
+ fprintf(stderr, "glfs_h_open: error on open of %s: %s\n",
+ full_leaf_name, strerror(errno));
+ printf("glfs_h_open tests: FAILED\n");
+ goto out;
+ }
+
+ /* test read/write based on fd */
+ memcpy(writebuf, "abcdefghijklmnopqrstuvwxyz012345", 32);
+ ret = glfs_write(fd, writebuf, 32, 0);
+
+ glfs_lseek(fd, 10, SEEK_SET);
+
+ ret = glfs_read(fd, readbuf, 32, 0);
+ if (memcmp(readbuf, writebuf, 32)) {
+ printf("Failed to read what I wrote: %s %s\n", readbuf, writebuf);
+ glfs_close(fd);
+ printf("glfs_h_open tests: FAILED\n");
+ goto out;
+ }
+
+ glfs_h_close(leaf);
+ leaf = NULL;
+ glfs_close(fd);
+
+ printf("glfs_h_open tests: PASSED\n");
+
+ /* Create tests */
+ printf("glfs_h_creat tests: In Progress\n");
+ parent = glfs_h_lookupat(fs, NULL, full_parent_name, &sb, 0);
+ if (parent == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, root, strerror(errno));
+ printf("glfs_h_creat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ leaf = glfs_h_creat(fs, parent, leaf_name1, O_CREAT, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf(stderr, "glfs_h_creat: error on create of %s: from (%p),%s\n",
+ leaf_name1, parent, strerror(errno));
+ printf("glfs_h_creat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ glfs_h_close(leaf);
+ leaf = NULL;
+
+ leaf = glfs_h_creat(fs, parent, leaf_name1, O_CREAT | O_EXCL, 0644, &sb);
+ if (leaf != NULL || errno != EEXIST) {
+ fprintf(stderr,
+ "glfs_h_creat: existing file, leaf = (%p), errno = %s\n", leaf,
+ strerror(errno));
+ printf("glfs_h_creat tests: FAILED\n");
+ if (leaf != NULL) {
+ glfs_h_close(leaf);
+ leaf = NULL;
+ }
+ }
+
+ tmp = glfs_h_creat(fs, root, parent_name, O_CREAT, 0644, &sb);
+ if (tmp != NULL || !(errno == EISDIR || errno == EINVAL)) {
+ fprintf(stderr, "glfs_h_creat: dir create, tmp = (%p), errno = %s\n",
+ leaf, strerror(errno));
+ printf("glfs_h_creat tests: FAILED\n");
+ if (tmp != NULL) {
+ glfs_h_close(tmp);
+ tmp = NULL;
+ }
+ }
+
+ /* TODO: Other combinations and -ve cases as applicable */
+ printf("glfs_h_creat tests: PASSED\n");
+
+ /* extract handle and create from handle test */
+ printf(
+ "glfs_h_extract_handle and glfs_h_create_from_handle tests: In "
+ "Progress\n");
+ /* TODO: Change the lookup to create below for a GIFD recovery failure,
+ * that needs to be fixed */
+ leaf = glfs_h_lookupat(fs, parent, leaf_name1, &sb, 0);
+ if (leaf == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ leaf_name1, parent, strerror(errno));
+ printf("glfs_h_extract_handle tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ ret = glfs_h_extract_handle(leaf, leaf_handle, GFAPI_HANDLE_LENGTH);
+ if (ret < 0) {
+ fprintf(stderr,
+ "glfs_h_extract_handle: error extracting handle of %s: %s\n",
+ full_leaf_name, strerror(errno));
+ printf("glfs_h_extract_handle tests: FAILED\n");
+ goto out;
+ }
+ peek_handle(leaf_handle);
+
+ glfs_h_close(leaf);
+ leaf = NULL;
+
+ leaf = glfs_h_create_from_handle(fs, leaf_handle, GFAPI_HANDLE_LENGTH, &sb);
+ if (leaf == NULL) {
+ fprintf(
+ stderr,
+ "glfs_h_create_from_handle: error on create of %s: from (%p),%s\n",
+ leaf_name1, leaf_handle, strerror(errno));
+ printf("glfs_h_create_from_handle tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ fd = glfs_h_open(fs, leaf, O_RDWR);
+ if (fd == NULL) {
+ fprintf(stderr, "glfs_h_open: error on open of %s: %s\n",
+ full_leaf_name, strerror(errno));
+ printf("glfs_h_create_from_handle tests: FAILED\n");
+ goto out;
+ }
+
+ /* test read/write based on fd */
+ memcpy(writebuf, "abcdefghijklmnopqrstuvwxyz012345", 32);
+ ret = glfs_write(fd, writebuf, 32, 0);
+
+ glfs_lseek(fd, 0, SEEK_SET);
+
+ ret = glfs_read(fd, readbuf, 32, 0);
+ if (memcmp(readbuf, writebuf, 32)) {
+ printf("Failed to read what I wrote: %s %s\n", writebuf, writebuf);
+ printf("glfs_h_create_from_handle tests: FAILED\n");
+ glfs_close(fd);
+ goto out;
+ }
+
+ glfs_close(fd);
+ glfs_h_close(leaf);
+ leaf = NULL;
+ glfs_h_close(parent);
+ parent = NULL;
+
+ printf(
+ "glfs_h_extract_handle and glfs_h_create_from_handle tests: PASSED\n");
+
+ /* Mkdir tests */
+ printf("glfs_h_mkdir tests: In Progress\n");
+
+ ret = glfs_rmdir(fs, full_newparent_name);
+ if (ret && errno != ENOENT) {
+ fprintf(stderr, "glfs_rmdir: Failed for %s: %s\n", full_newparent_name,
+ strerror(errno));
+ printf("glfs_h_mkdir tests: FAILED\n");
+ goto out;
+ }
+
+ parent = glfs_h_lookupat(fs, NULL, full_parent_name, &sb, 0);
+ if (parent == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, root, strerror(errno));
+ printf("glfs_h_mkdir tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ leaf = glfs_h_mkdir(fs, parent, newparent_name, 0755, &sb);
+ if (leaf == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error on mkdir of %s: from (%p),%s\n",
+ newparent_name, parent, strerror(errno));
+ printf("glfs_h_mkdir tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ glfs_h_close(leaf);
+ leaf = NULL;
+
+ leaf = glfs_h_mkdir(fs, parent, newparent_name, 0755, &sb);
+ if (leaf != NULL || errno != EEXIST) {
+ fprintf(stderr,
+ "glfs_h_mkdir: existing directory, leaf = (%p), errno = %s\n",
+ leaf, strerror(errno));
+ printf("glfs_h_mkdir tests: FAILED\n");
+ if (leaf != NULL) {
+ glfs_h_close(leaf);
+ leaf = NULL;
+ }
+ }
+
+ glfs_h_close(parent);
+ parent = NULL;
+
+ printf("glfs_h_mkdir tests: PASSED\n");
+
+ /* Mknod tests */
+ printf("glfs_h_mknod tests: In Progress\n");
+ ret = glfs_unlink(fs, full_newnod_name);
+ if (ret && errno != ENOENT) {
+ fprintf(stderr, "glfs_unlink: Failed for %s: %s\n", full_newnod_name,
+ strerror(errno));
+ printf("glfs_h_mknod tests: FAILED\n");
+ goto out;
+ }
+
+ parent = glfs_h_lookupat(fs, NULL, full_parent_name, &sb, 0);
+ if (parent == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, root, strerror(errno));
+ printf("glfs_h_mknod tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ leaf = glfs_h_mknod(fs, parent, newnod_name, S_IFIFO, 0, &sb);
+ if (leaf == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error on mkdir of %s: from (%p),%s\n",
+ newnod_name, parent, strerror(errno));
+ printf("glfs_h_mknod tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ /* TODO: create op on a FIFO node hangs, need to check and fix
+ tmp = glfs_h_creat (fs, parent, newnod_name, O_CREAT, 0644, &sb);
+ if (tmp != NULL || errno != EINVAL) {
+ fprintf (stderr, "glfs_h_creat: node create, tmp = (%p), errno =
+ %s\n", tmp, strerror (errno)); printf ("glfs_h_creat/mknod tests:
+ FAILED\n"); if (tmp != NULL) { glfs_h_close(tmp); tmp = NULL;
+ }
+ } */
+
+ glfs_h_close(leaf);
+ leaf = NULL;
+
+ leaf = glfs_h_mknod(fs, parent, newnod_name, 0644, 0, &sb);
+ if (leaf != NULL || errno != EEXIST) {
+ fprintf(stderr,
+ "glfs_h_mknod: existing node, leaf = (%p), errno = %s\n", leaf,
+ strerror(errno));
+ printf("glfs_h_mknod tests: FAILED\n");
+ if (leaf != NULL) {
+ glfs_h_close(leaf);
+ leaf = NULL;
+ }
+ }
+
+ glfs_h_close(parent);
+ parent = NULL;
+
+ printf("glfs_h_mknod tests: PASSED\n");
+
+ /* unlink tests */
+ test_h_unlink();
+
+ /* TODO: opendir tests */
+
+ /* getattr tests */
+ test_h_getsetattrs();
+
+ /* TODO: setattr tests */
+
+ /* truncate tests */
+ test_h_truncate();
+
+ /* link tests */
+ test_h_links();
+
+ /* rename tests */
+ test_h_rename();
+
+ /* performance tests */
+ test_h_performance();
+
+ /* END: New APIs test area */
+
+out:
+ /* Cleanup glfs handles */
+ if (root)
+ glfs_h_close(root);
+ if (parent)
+ glfs_h_close(parent);
+ if (leaf)
+ glfs_h_close(leaf);
+
+ return ret;
+}
+
+int
+test_write_apis(glfs_t *fs)
+{
+ /* Add more content here */
+ /* Some apis we can get are */
+ /*
+ 0. glfs_set_xlator_option()
+
+ Read/Write combinations:
+ . glfs_{p,}readv/{p,}writev
+ . glfs_pread/pwrite
+
+ tests/basic/gfapi/gfapi-async-calls-test.c
+ . glfs_read_async/write_async
+ . glfs_pread_async/pwrite_async
+ . glfs_readv_async/writev_async
+ . glfs_preadv_async/pwritev_async
+
+ . ftruncate/ftruncate_async
+ . fsync/fsync_async
+ . fdatasync/fdatasync_async
+
+ */
+
+ glfs_fd_t *fd = NULL;
+ char *filename = "/filename2";
+ int flags = O_RDWR;
+ char *buf = "some bytes!";
+ char writestr[TEST_STR_LEN];
+ struct iovec iov = {&writestr, TEST_STR_LEN};
+ int ret, i;
+
+ for (i = 0; i < TEST_STR_LEN; i++)
+ writestr[i] = 0x11;
+
+ fd = glfs_open(fs, filename, flags);
+ if (!fd)
+ fprintf(stderr, "open(%s): (%p) %s\n", filename, fd, strerror(errno));
+
+ ret = glfs_writev(fd, &iov, 1, flags);
+ if (ret < 0) {
+ fprintf(stderr, "writev(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+ }
+
+ ret = glfs_pwrite(fd, buf, 10, 4, flags, NULL, NULL);
+ if (ret < 0) {
+ fprintf(stderr, "pwrite(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+ }
+
+ ret = glfs_pwritev(fd, &iov, 1, 4, flags);
+ if (ret < 0) {
+ fprintf(stderr, "pwritev(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+ }
+
+ ret = glfs_fsync(fd, NULL, NULL);
+ if (ret < 0) {
+ fprintf(stderr, "fsync(%s): %d (%s)\n", filename, ret, strerror(errno));
+ }
+
+ glfs_close(fd);
+
+ return 0;
+}
+
+int
+test_metadata_ops(glfs_t *fs, glfs_t *fs2)
+{
+ glfs_fd_t *fd = NULL;
+ glfs_fd_t *fd2 = NULL;
+ struct stat sb = {
+ 0,
+ };
+ struct glfs_stat gsb = {
+ 0,
+ };
+ struct statvfs sfs;
+ char readbuf[32];
+ char writebuf[11] = "helloworld";
+
+ char *filename = "/filename2";
+ int ret;
+
+ ret = glfs_lstat(fs, filename, &sb);
+ fprintf(stderr, "lstat(%s): (%d) %s\n", filename, ret, strerror(errno));
+
+ fd = glfs_creat(fs, filename, O_RDWR, 0644);
+ if (!fd)
+ fprintf(stderr, "creat(%s): (%p) %s\n", filename, fd, strerror(errno));
+
+ fd2 = glfs_open(fs2, filename, O_RDWR);
+ if (!fd2)
+ fprintf(stderr, "open(%s): (%p) %s\n", filename, fd, strerror(errno));
+
+ ret = glfs_lstat(fs, filename, &sb);
+ if (ret)
+ fprintf(stderr, "lstat(%s): (%d) %s\n", filename, ret, strerror(errno));
+
+ ret = glfs_write(fd, writebuf, 11, 0);
+ if (ret < 0) {
+ fprintf(stderr, "writev(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+ }
+
+ glfs_fsync(fd, NULL, NULL);
+
+ glfs_lseek(fd2, 5, SEEK_SET);
+
+ ret = glfs_read(fd2, readbuf, 32, 0);
+
+ printf("read %d, %s", ret, readbuf);
+
+ /* get stat */
+ ret = glfs_fstat(fd2, &sb);
+ if (ret)
+ fprintf(stderr, "fstat(%s): %d (%s)\n", filename, ret, strerror(errno));
+
+ ret = glfs_access(fs, filename, R_OK);
+ if (ret)
+ fprintf(stderr, "access(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+
+ ret = glfs_fallocate(fd2, 1024, 1024, 1024);
+ if (ret)
+ fprintf(stderr, "fallocate(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+
+ ret = glfs_discard(fd2, 1024, 512);
+ if (ret)
+ fprintf(stderr, "discard(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+
+ ret = glfs_zerofill(fd2, 2048, 1024);
+ if (ret)
+ fprintf(stderr, "zerofill(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+
+ /* set stat */
+ /* TODO: got some errors, need to fix */
+ ret = glfs_fsetattr(fd2, &gsb);
+
+ glfs_close(fd);
+ glfs_close(fd2);
+
+ filename = "/filename3";
+ ret = glfs_mknod(fs, filename, S_IFIFO, 0);
+ if (ret)
+ fprintf(stderr, "%s: (%d) %s\n", filename, ret, strerror(errno));
+
+ ret = glfs_lstat(fs, filename, &sb);
+ if (ret)
+ fprintf(stderr, "%s: (%d) %s\n", filename, ret, strerror(errno));
+
+ ret = glfs_rename(fs, filename, "/filename4");
+ if (ret)
+ fprintf(stderr, "rename(%s): (%d) %s\n", filename, ret,
+ strerror(errno));
+
+ ret = glfs_unlink(fs, "/filename4");
+ if (ret)
+ fprintf(stderr, "unlink(%s): (%d) %s\n", "/filename4", ret,
+ strerror(errno));
+
+ filename = "/dirname2";
+ ret = glfs_mkdir(fs, filename, 0);
+ if (ret)
+ fprintf(stderr, "%s: (%d) %s\n", filename, ret, strerror(errno));
+
+ ret = glfs_lstat(fs, filename, &sb);
+ if (ret)
+ fprintf(stderr, "lstat(%s): (%d) %s\n", filename, ret, strerror(errno));
+
+ ret = glfs_rmdir(fs, filename);
+ if (ret)
+ fprintf(stderr, "rmdir(%s): (%d) %s\n", filename, ret, strerror(errno));
+}
+int
+main(int argc, char *argv[])
+{
+ glfs_t *fs2 = NULL;
+ int ret = 0;
+ glfs_fd_t *fd = NULL;
+ glfs_fd_t *fd2 = NULL;
+ struct stat sb = {
+ 0,
+ };
+ struct glfs_stat gsb = {
+ 0,
+ };
+ struct statvfs sfs;
+ char readbuf[32];
+ char writebuf[32];
+ char volumeid[64];
+
+ char *filename = "/filename2";
+
+ if ((argc < 2) || (argc > 3)) {
+ printf("Usage:\n\t%s <volname> <hostname>\n\t%s <volfile-path>",
+ argv[0], argv[0]);
+ return -1;
+ }
+
+ if (argc == 2) {
+ /* Generally glfs_new() requires volume name as an argument */
+ fs = glfs_new("test-only");
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL\n");
+ return 1;
+ }
+ ret = glfs_set_volfile(fs, argv[1]);
+ if (ret)
+ fprintf(stderr, "glfs_set_volfile failed\n");
+ } else {
+ fs = glfs_new(argv[1]);
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL\n");
+ return 1;
+ }
+ // ret = glfs_set_volfile_server (fs, "unix", "/tmp/gluster.sock", 0);
+ ret = glfs_set_volfile_server(fs, "tcp", argv[2], 24007);
+ if (ret)
+ fprintf(stderr, "glfs_set_volfile_server failed\n");
+ }
+
+ /* Change this to relevant file when running locally */
+ ret = glfs_set_logging(fs, "/dev/stderr", 5);
+ if (ret)
+ fprintf(stderr, "glfs_set_logging failed\n");
+
+ ret = glfs_init(fs);
+ if (ret)
+ fprintf(stderr, "glfs_init: returned %d\n", ret);
+
+ if (ret)
+ goto out;
+
+ /* no major use for getting the volume id in this test, done for coverage */
+ ret = glfs_get_volumeid(fs, volumeid, 64);
+ if (ret) {
+ fprintf(stderr, "glfs_get_volumeid: returned %d\n", ret);
+ }
+
+ sleep(2);
+
+ if (argc == 2) {
+ /* Generally glfs_new() requires volume name as an argument */
+ fs2 = glfs_new("test_only_volume");
+ if (!fs2) {
+ fprintf(stderr, "glfs_new(fs2): returned NULL\n");
+ return 1;
+ }
+ ret = glfs_set_volfile(fs2, argv[1]);
+ if (ret)
+ fprintf(stderr, "glfs_set_volfile failed(fs2)\n");
+ } else {
+ fs2 = glfs_new(argv[1]);
+ if (!fs2) {
+ fprintf(stderr, "glfs_new(fs2): returned NULL\n");
+ return 1;
+ }
+ ret = glfs_set_volfile_server(fs2, "tcp", argv[2], 24007);
+ if (ret)
+ fprintf(stderr, "glfs_set_volfile_server failed(fs2)\n");
+ }
+
+ ret = glfs_set_statedump_path(fs2, "/tmp");
+ if (ret) {
+ fprintf(stderr, "glfs_set_statedump_path: %s\n", strerror(errno));
+ }
+
+ ret = glfs_init(fs2);
+ if (ret)
+ fprintf(stderr, "glfs_init: returned %d\n", ret);
+
+ test_metadata_ops(fs, fs2);
+
+ test_dirops(fs);
+
+ test_xattr(fs);
+
+ test_chdir(fs);
+
+ test_handleops(argc, argv);
+ // done
+
+ /* Test some extra apis */
+ test_write_apis(fs);
+
+ glfs_statvfs(fs, "/", &sfs);
+
+ glfs_unset_volfile_server(fs, "tcp", argv[2], 24007);
+
+ glfs_fini(fs);
+ glfs_fini(fs2);
+
+ ret = 0;
+out:
+ return ret;
+}
diff --git a/tests/basic/gfapi/glfsxmp.t b/tests/basic/gfapi/glfsxmp.t
new file mode 100644
index 00000000000..b3e6645c0f5
--- /dev/null
+++ b/tests/basic/gfapi/glfsxmp.t
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+TEST glusterd
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/brick{0,1,2}
+EXPECT 'Created' volinfo_field $V0 'Status'
+
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+$CLI system getspec $V0 > fubar.vol
+
+TEST cp $(dirname $0)/glfsxmp-coverage.c ./glfsxmp.c
+TEST build_tester ./glfsxmp.c -lgfapi
+TEST ./glfsxmp $V0 $H0
+
+TEST ./glfsxmp fubar.vol
+
+TEST cleanup_tester ./glfsxmp
+TEST rm ./glfsxmp.c
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup
diff --git a/tests/basic/gfapi/libgfapi-fini-hang.c b/tests/basic/gfapi/libgfapi-fini-hang.c
new file mode 100644
index 00000000000..37800e3188b
--- /dev/null
+++ b/tests/basic/gfapi/libgfapi-fini-hang.c
@@ -0,0 +1,62 @@
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+#include <limits.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+#define LOG_ERR(func, ret) \
+ do { \
+ if (ret != 0) { \
+ fprintf(stderr, "%s : returned error %d\n", func, ret); \
+ exit(1); \
+ } else { \
+ fprintf(stderr, "%s : returned %d\n", func, ret); \
+ } \
+ } while (0)
+
+int
+main(int argc, char *argv[])
+{
+ glfs_t *fs = NULL;
+ int ret = 0, i;
+ glfs_fd_t *fd = NULL;
+ char readbuf[32];
+ char *logname = NULL;
+ char *hostname = NULL;
+ char *volname = NULL;
+
+ fprintf(stderr, "Starting libgfapi_fini\n");
+
+ if (argc < 4) {
+ fprintf(stderr, "Invalid argument\n");
+ exit(1);
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+ logname = argv[3];
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL\n");
+ exit(1);
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 0);
+ LOG_ERR("glfs_set_volfile_server", ret);
+
+ ret = glfs_set_logging(fs, logname, 7);
+ LOG_ERR("glfs_set_logging", ret);
+
+ /* Do not call glfs_init.
+ * glfs_fini() shouldn't hang in that case*/
+ ret = glfs_fini(fs);
+ LOG_ERR("glfs_fini", ret);
+ fprintf(stderr, "End of libgfapi_fini\n");
+
+ exit(0);
+}
diff --git a/tests/basic/gfapi/libgfapi-fini-hang.t b/tests/basic/gfapi/libgfapi-fini-hang.t
new file mode 100755
index 00000000000..ba262a943ee
--- /dev/null
+++ b/tests/basic/gfapi/libgfapi-fini-hang.t
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+function check_process () {
+ ps -p $1
+ if [ $? -eq 1 ] ; then
+ echo "Y"
+ else
+ echo "N"
+ fi
+}
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+logdir=$(gluster --print-logdir)
+
+TEST build_tester $(dirname $0)/libgfapi-fini-hang.c -o $M0/libgfapi-fini-hang -lgfapi
+TEST cd $M0
+ ./libgfapi-fini-hang $H0 $V0 $logdir/libgfapi-fini-hang.log &
+PID=$!
+
+# check if the process "libgfapi-fini-hang" exits with in $PROCESS_UP_TIMEOUT
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Y' check_process $PID
+
+# Kill the process if present
+TEST ! kill -9 $PID
+
+TEST rm -f $M0/libgfapi-fini-hang
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/basic/gfapi/mandatory-lock-optimal.c b/tests/basic/gfapi/mandatory-lock-optimal.c
new file mode 100644
index 00000000000..34fef8d0b80
--- /dev/null
+++ b/tests/basic/gfapi/mandatory-lock-optimal.c
@@ -0,0 +1,532 @@
+/* Pre-requisites:-
+ *
+ * 1. Make sure that performance translators are switched off while running this
+ * test.
+ * 2. Perform the following volume set operation:
+ * # gluster volume set <VOLNAME> locks.mandatory-locking optimal
+ * 3. For installation under non-standard paths, export LD_LIBRARY_PATH to
+ * automatically load exact libgfapi.so and compile this C file as follows:
+ * $ gcc mandatory-lock-optimal.c -lgfapi -I <include path for api/glfs.h> -L
+ * <include path for libgfapi shared library>
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <glusterfs/api/glfs.h>
+
+#define TOTAL_TEST_COUNT 8
+
+/* C1 = Client 1 : C2 = Client 2 : C3 = Client 3 :
+ * fs1, fd1 are associated with C1. Similarly fs2, fd2 for C2
+ * and fs3, fd3 for C3 */
+
+FILE *fp;
+glfs_t *fs1, *fs2, *fs3;
+glfs_fd_t *fd, *fd1, *fd2, *fd3;
+struct flock lock;
+char buf1[10], *buf2 = "ten bytes!", *fname = "/mand.lock";
+int ret, test_count;
+off_t offset;
+
+/* run_test_1 () : C1 takes byte range mandatory read lock.
+ C2 attempts to read from a conflicting range.
+ Expected result : Read from C2 should pass.
+
+ * run_test_2 () : C1 takes byte range mandatory read lock.
+ C2 attempts write to a conflicting range.
+ Expected result : Write from C2 should fail with EAGAIN.
+
+ * run_test_3 () : C1 takes byte range advisory write lock.
+ C2 attempts to read from a conflicting range.
+ Expected result : Read from C2 should pass.
+
+ * run_test_4 () : C1 takes byte range advisory write lock.
+ C2 attempts write to a conflicting range.
+ Expected result : Write from C2 should pass.
+
+ * run_test_5 () : C1 takes byte range advisory read lock.
+ C2 attempts to open the same file with O_TRUNC.
+ Expected result : Open from C2 should pass.
+
+ * run_test_6 () : C1 takes byte range mandatory read lock.
+ C2 attempts to open the same file with O_TRUNC.
+ Expected result : Open from C2 should fail with EAGAIN.
+
+ * run_test_7 () : C1 takes byte range mandatory read lock.
+ C2 attempts ftruncate on a conflicting range.
+ Expected result : Write from C2 should fail with EAGAIN.
+
+ * run_test_8 () : C1 takes byte range advisory read lock.
+ C2 takes byte range mandatory read lock
+ within the byte range for which C1 already
+ holds an advisory lock so as to perform a
+ basic split/merge. C3 repositions fd3 to
+ start of C2's byte range mandatory lock
+ offset and attempts a write. Then it again
+ repositions fd3 to one byte past C2's byte
+ range mandatoy lock and again attempts a write.
+ Expected result : First write should fail with EAGAIN.
+ Second write should pass. */
+
+#define LOG_ERR(func, err) \
+ do { \
+ if (!fp) \
+ fprintf(stderr, "\n%s : returned error (%s)\n", func, \
+ strerror(err)); \
+ else \
+ fprintf(fp, "\n%s : returned error (%s)\n", func, strerror(err)); \
+ cleanup_and_exit(err); \
+ } while (0)
+
+void
+cleanup_and_exit(int exit_status)
+{
+ if (exit_status || test_count != TOTAL_TEST_COUNT) {
+ fprintf(fp, "\nAborting due to some test failures.\n");
+ exit_status = 1;
+ } else
+ fprintf(fp, "\nAll tests ran successfully.\n");
+ if (fp)
+ fclose(fp);
+ if (fd)
+ glfs_close(fd);
+ if (fd1)
+ glfs_close(fd1);
+ if (fd2)
+ glfs_close(fd2);
+
+ glfs_unlink(fs1, fname);
+
+ if (fs1)
+ glfs_fini(fs1);
+ if (fs2)
+ glfs_fini(fs2);
+
+ exit(exit_status);
+}
+
+glfs_t *
+new_client_create(char *hostname, char *volname, char *logfile_name)
+{
+ glfs_t *fs = NULL;
+
+ fs = glfs_new(volname);
+ if (!fs)
+ LOG_ERR("glfs_new", errno);
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ if (ret)
+ LOG_ERR("glfs_set_volfile_server", errno);
+
+ ret = glfs_set_logging(fs, logfile_name, 7);
+ if (ret)
+ LOG_ERR("glfs_set_logging", errno);
+
+ ret = glfs_init(fs);
+ if (ret)
+ LOG_ERR("glfs_init", errno);
+
+ return fs;
+}
+
+void
+run_test_1(int i)
+{
+ fprintf(fp, "\nRunning Test-%d . . . ", i);
+
+ fd1 = glfs_open(fs1, fname, O_RDONLY | O_NONBLOCK);
+ if (!fd1)
+ LOG_ERR("glfs_open", errno);
+
+ lock.l_type = F_RDLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0L;
+ lock.l_len = 5L;
+
+ ret = glfs_file_lock(fd1, F_SETLK, &lock, GLFS_LK_MANDATORY);
+ if (ret)
+ LOG_ERR("glfs_file_lock", errno);
+
+ fd2 = glfs_open(fs2, fname, O_RDONLY | O_NONBLOCK);
+ if (!fd2)
+ LOG_ERR("glfs_open", errno);
+
+ /* On successful read, 0 is returned as there is no content inside the
+ * file
+ */
+ ret = glfs_read(fd2, buf1, 10, 0);
+ if (ret)
+ LOG_ERR("glfs_read", errno);
+
+ ret = glfs_close(fd1);
+ if (ret)
+ LOG_ERR("glfs_close", errno);
+ fd1 = NULL;
+
+ ret = glfs_close(fd2);
+ if (ret)
+ LOG_ERR("glfs_close", errno);
+ fd2 = NULL;
+
+ test_count++;
+ fprintf(fp, "OK\n", i);
+}
+
+void
+run_test_2(int i)
+{
+ fprintf(fp, "\nRunning Test-%d . . . ", i);
+
+ fd1 = glfs_open(fs1, fname, O_RDONLY | O_NONBLOCK);
+ if (!fd1)
+ LOG_ERR("glfs_open", errno);
+
+ lock.l_type = F_RDLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0L;
+ lock.l_len = 5L;
+
+ ret = glfs_file_lock(fd1, F_SETLK, &lock, GLFS_LK_MANDATORY);
+ if (ret)
+ LOG_ERR("glfs_file_lock", errno);
+
+ fd2 = glfs_open(fs2, fname, O_WRONLY | O_NONBLOCK);
+ if (!fd2)
+ LOG_ERR("glfs_open", errno);
+
+ ret = glfs_write(fd2, buf2, 10, 0);
+ if (ret == 10 || errno != EAGAIN)
+ LOG_ERR("glfs_write", errno);
+
+ ret = glfs_close(fd1);
+ if (ret)
+ LOG_ERR("glfs_close", errno);
+ fd1 = NULL;
+
+ ret = glfs_close(fd2);
+ if (ret)
+ LOG_ERR("glfs_close", errno);
+ fd2 = NULL;
+
+ test_count++;
+ fprintf(fp, "OK\n", i);
+}
+
+void
+run_test_3(int i)
+{
+ fprintf(fp, "\nRunning Test-%d . . . ", i);
+
+ fd1 = glfs_open(fs1, fname, O_WRONLY | O_NONBLOCK);
+ if (!fd1)
+ LOG_ERR("glfs_open", errno);
+
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0L;
+ lock.l_len = 5L;
+
+ ret = glfs_file_lock(fd1, F_SETLK, &lock, GLFS_LK_ADVISORY);
+ if (ret)
+ LOG_ERR("glfs_file_lock", errno);
+
+ fd2 = glfs_open(fs2, fname, O_RDONLY | O_NONBLOCK);
+ if (!fd2)
+ LOG_ERR("glfs_open", errno);
+
+ /* Still there is no content inside file. So following read should
+ * return 0
+ */
+ ret = glfs_read(fd2, buf1, 10, 0);
+ if (ret)
+ LOG_ERR("glfs_read", errno);
+
+ ret = glfs_close(fd1);
+ if (ret)
+ LOG_ERR("glfs_close", errno);
+ fd1 = NULL;
+
+ ret = glfs_close(fd2);
+ if (ret)
+ LOG_ERR("glfs_close", errno);
+ fd2 = NULL;
+
+ test_count++;
+ fprintf(fp, "OK\n", i);
+}
+
+void
+run_test_4(int i)
+{
+ fprintf(fp, "\nRunning Test-%d . . . ", i);
+
+ fd1 = glfs_open(fs1, fname, O_WRONLY | O_NONBLOCK);
+ if (!fd1)
+ LOG_ERR("glfs_open", errno);
+
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0L;
+ lock.l_len = 5L;
+
+ ret = glfs_file_lock(fd1, F_SETLK, &lock, GLFS_LK_ADVISORY);
+ if (ret)
+ LOG_ERR("glfs_file_lock", errno);
+
+ fd2 = glfs_open(fs2, fname, O_WRONLY | O_NONBLOCK);
+ if (!fd2)
+ LOG_ERR("glfs_open", errno);
+
+ ret = glfs_write(fd2, buf2, 10, 0);
+ if (ret != 10)
+ LOG_ERR("glfs_write", errno);
+
+ ret = glfs_close(fd1);
+ if (ret)
+ LOG_ERR("glfs_close", errno);
+ fd1 = NULL;
+
+ ret = glfs_close(fd2);
+ if (ret)
+ LOG_ERR("glfs_close", errno);
+ fd2 = NULL;
+
+ test_count++;
+ fprintf(fp, "OK\n", i);
+}
+
+void
+run_test_5(int i)
+{
+ fprintf(fp, "\nRunning Test-%d . . . ", i);
+
+ fd1 = glfs_open(fs1, fname, O_RDONLY | O_NONBLOCK);
+ if (!fd1)
+ LOG_ERR("glfs_open", errno);
+
+ lock.l_type = F_RDLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0L;
+ lock.l_len = 5L;
+
+ ret = glfs_file_lock(fd1, F_SETLK, &lock, GLFS_LK_ADVISORY);
+ if (ret)
+ LOG_ERR("glfs_file_lock", errno);
+
+ fd2 = glfs_open(fs2, fname, O_RDONLY | O_NONBLOCK | O_TRUNC);
+ if (!fd2)
+ LOG_ERR("glfs_open", errno);
+
+ ret = glfs_close(fd1);
+ if (ret)
+ LOG_ERR("glfs_close", errno);
+ fd1 = NULL;
+
+ ret = glfs_close(fd2);
+ if (ret)
+ LOG_ERR("glfs_close", errno);
+ fd2 = NULL;
+
+ test_count++;
+ fprintf(fp, "OK\n", i);
+}
+
+void
+run_test_6(int i)
+{
+ fprintf(fp, "\nRunning Test-%d . . . ", i);
+
+ fd1 = glfs_open(fs1, fname, O_RDONLY | O_NONBLOCK);
+ if (!fd1)
+ LOG_ERR("glfs_open", errno);
+
+ lock.l_type = F_RDLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0L;
+ lock.l_len = 5L;
+
+ ret = glfs_file_lock(fd1, F_SETLK, &lock, GLFS_LK_MANDATORY);
+ if (ret)
+ LOG_ERR("glfs_file_lock", errno);
+
+ fd2 = glfs_open(fs2, fname, O_RDONLY | O_NONBLOCK | O_TRUNC);
+ if (fd2)
+ LOG_ERR("glfs_open", errno);
+
+ ret = glfs_close(fd1);
+ if (ret)
+ LOG_ERR("glfs_close", errno);
+ fd1 = NULL;
+
+ test_count++;
+ fprintf(fp, "OK\n", i);
+}
+
+void
+run_test_7(int i)
+{
+ fprintf(fp, "\nRunning Test-%d . . . ", i);
+
+ fd1 = glfs_open(fs1, fname, O_RDONLY | O_NONBLOCK);
+ if (!fd1)
+ LOG_ERR("glfs_open", errno);
+
+ lock.l_type = F_RDLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0L;
+ lock.l_len = 5L;
+
+ ret = glfs_file_lock(fd1, F_SETLK, &lock, GLFS_LK_MANDATORY);
+ if (ret)
+ LOG_ERR("glfs_file_lock", errno);
+
+ fd2 = glfs_open(fs2, fname, O_RDWR | O_NONBLOCK);
+ if (!fd2)
+ LOG_ERR("glfs_open", errno);
+
+ ret = glfs_ftruncate(fd2, 4, NULL, NULL);
+ if (ret == 0 || errno != EAGAIN)
+ LOG_ERR("glfs_ftruncate", errno);
+
+ ret = glfs_close(fd1);
+ if (ret)
+ LOG_ERR("glfs_close", errno);
+ fd1 = NULL;
+
+ ret = glfs_close(fd2);
+ if (ret)
+ LOG_ERR("glfs_close", errno);
+ fd2 = NULL;
+
+ test_count++;
+ fprintf(fp, "OK\n", i);
+}
+
+void
+run_test_8(int i)
+{
+ fprintf(fp, "\nRunning Test-%d . . . ", i);
+
+ fd1 = glfs_open(fs1, fname, O_RDONLY | O_NONBLOCK);
+ if (!fd1)
+ LOG_ERR("glfs_open", errno);
+
+ lock.l_type = F_RDLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0L;
+ lock.l_len = 10L;
+
+ ret = glfs_file_lock(fd1, F_SETLK, &lock, GLFS_LK_ADVISORY);
+ if (ret)
+ LOG_ERR("glfs_file_lock", errno);
+
+ fd2 = glfs_open(fs2, fname, O_RDONLY | O_NONBLOCK);
+ if (!fd2)
+ LOG_ERR("glfs_open", errno);
+
+ lock.l_type = F_RDLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 5L;
+ lock.l_len = 2L;
+
+ ret = glfs_file_lock(fd2, F_SETLK, &lock, GLFS_LK_MANDATORY);
+ if (ret)
+ LOG_ERR("glfs_file_lock", errno);
+
+ fd3 = glfs_open(fs3, fname, O_RDWR | O_NONBLOCK);
+ if (!fd3)
+ LOG_ERR("glfs_open", errno);
+
+ offset = glfs_lseek(fd3, 5L, SEEK_SET);
+ if (offset != 5)
+ LOG_ERR("glfs_lseek", errno);
+
+ ret = glfs_write(fd3, buf2, 10, 0);
+ if (ret == 10 || errno != EAGAIN)
+ LOG_ERR("glfs_write", errno);
+
+ offset = glfs_lseek(fd3, 8L, SEEK_SET);
+ if (offset != 8)
+ LOG_ERR("glfs_lseek", errno);
+
+ ret = glfs_write(fd3, buf2, 10, 0);
+ if (ret != 10)
+ LOG_ERR("glfs_write", errno);
+
+ ret = glfs_close(fd1);
+ if (ret)
+ LOG_ERR("glfs_close", errno);
+ fd1 = NULL;
+
+ ret = glfs_close(fd2);
+ if (ret)
+ LOG_ERR("glfs_close", errno);
+ fd2 = NULL;
+
+ ret = glfs_close(fd3);
+ if (ret)
+ LOG_ERR("glfs_close", errno);
+ fd3 = NULL;
+
+ test_count++;
+ fprintf(fp, "OK\n", i);
+}
+
+int
+main(int argc, char *argv[])
+{
+ char logfile[50];
+
+ if (argc != 4) {
+ fprintf(stderr,
+ "Usage: %s <server ip/hostname> <volume name> <test log "
+ "directory>\n",
+ argv[0]);
+ return 0;
+ }
+
+ sprintf(logfile, "%s/%s", argv[3], "mandatory-lock-optimal-test.log");
+ fp = fopen(logfile, "w");
+ if (!fp) {
+ fprintf(stderr, "\n%s\n", logfile);
+ LOG_ERR("Log file creation", errno);
+ }
+
+ sprintf(logfile, "%s/%s", argv[3], "glfs-client-1.log");
+ fs1 = new_client_create(argv[1], argv[2], logfile);
+ if (!fs1)
+ LOG_ERR("client-1 creation", EINVAL);
+
+ sprintf(logfile, "%s/%s", argv[3], "glfs-client-2.log");
+ fs2 = new_client_create(argv[1], argv[2], logfile);
+ if (!fs2)
+ LOG_ERR("client-2 creation", EINVAL);
+
+ sprintf(logfile, "%s/%s", argv[3], "glfs-client-3.log");
+ fs3 = new_client_create(argv[1], argv[2], logfile);
+ if (!fs3)
+ LOG_ERR("client-3 creation", EINVAL);
+
+ fd = glfs_creat(fs1, fname, O_RDWR, 0644);
+ if (!fd)
+ LOG_ERR("glfs_creat", errno);
+
+ test_count = 0;
+
+ run_test_1(1);
+ run_test_2(2);
+ run_test_3(3);
+ run_test_4(4);
+ run_test_5(5);
+ run_test_6(6);
+ run_test_7(7);
+ run_test_8(8);
+
+ cleanup_and_exit(0);
+
+ return 0;
+}
diff --git a/tests/basic/gfapi/mandatory-lock-optimal.t b/tests/basic/gfapi/mandatory-lock-optimal.t
new file mode 100644
index 00000000000..27062e1f6c2
--- /dev/null
+++ b/tests/basic/gfapi/mandatory-lock-optimal.t
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+TEST glusterd
+
+# Create and start the volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}1
+TEST $CLI volume start $V0
+
+logdir=`gluster --print-logdir`
+
+# Switch off performance translators
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 performance.readdir-ahead off
+
+# Enable optimal mandatory-locking mode and restart the volume
+TEST $CLI volume set $V0 locks.mandatory-locking optimal
+TEST $CLI volume stop $V0
+TEST $CLI volume start $V0
+
+# Compile and run the test program
+TEST build_tester $(dirname $0)/mandatory-lock-optimal.c -lgfapi
+TEST ./$(dirname $0)/mandatory-lock-optimal $H0 $V0 $logdir
+
+# Cleanup the environment
+cleanup_tester $(dirname $0)/mandatory-lock-optimal
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup
diff --git a/tests/basic/gfapi/protocol-client-ssl.vol.in b/tests/basic/gfapi/protocol-client-ssl.vol.in
new file mode 100644
index 00000000000..cdc0c9d0671
--- /dev/null
+++ b/tests/basic/gfapi/protocol-client-ssl.vol.in
@@ -0,0 +1,15 @@
+#
+# This .vol file expects that there is
+#
+# 1. GlusterD listening on @@HOSTNAME@@
+# 2. a volume that provides a brick on @@BRICKPATH@@
+# 3. the volume with the brick has been started
+#
+volume test
+ type protocol/client
+ option remote-host @@HOSTNAME@@
+ option remote-subvolume @@BRICKPATH@@
+ option transport-type socket
+ option transport.socket.ssl-enabled @@SSL@@
+end-volume
+
diff --git a/tests/basic/gfapi/protocol-client.vol.in b/tests/basic/gfapi/protocol-client.vol.in
new file mode 100644
index 00000000000..ef35001e29f
--- /dev/null
+++ b/tests/basic/gfapi/protocol-client.vol.in
@@ -0,0 +1,14 @@
+#
+# This .vol file expects that there is
+#
+# 1. GlusterD listening on @@HOSTNAME@@
+# 2. a volume that provides a brick on @@BRICKPATH@@
+# 3. the volume with the brick has been started
+#
+volume test
+ type protocol/client
+ option remote-host @@HOSTNAME@@
+ option remote-subvolume @@BRICKPATH@@
+ option transport-type socket
+end-volume
+
diff --git a/tests/basic/gfapi/seek.c b/tests/basic/gfapi/seek.c
new file mode 100644
index 00000000000..85ea9b88141
--- /dev/null
+++ b/tests/basic/gfapi/seek.c
@@ -0,0 +1,99 @@
+/* seek.c - use glfs_lseek() to find holes in a file
+ *
+ * Author: Niels de Vos <ndevos@redhat.com>
+ */
+
+/* needed for SEEK_HOLE/SEEK_DATA */
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#include <sys/types.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+int
+main(int argc, char **argv)
+{
+ glfs_t *fs = NULL;
+ int ret = 0;
+ glfs_fd_t *fd = NULL;
+ char *filename = NULL;
+ char *volname = NULL;
+ char *hostname = NULL;
+ struct stat st = {
+ 0,
+ };
+ off_t hole_start = 0;
+ off_t hole_end = 0;
+
+ if (argc != 4) {
+ fprintf(stderr, "Invalid argument, use %s <hostname> <vol> <file>\n",
+ argv[0]);
+ exit(1);
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+ filename = argv[3];
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ perror("glfs_new() returned NULL");
+ return 1;
+ }
+
+ if (glfs_set_volfile_server(fs, "tcp", hostname, 24007)) {
+ perror("glfs_set_volfile_server");
+ return 1;
+ }
+
+ if (glfs_init(fs)) {
+ perror("glfs_init");
+ return 1;
+ }
+
+ fd = glfs_open(fs, filename, O_RDONLY);
+ if (fd <= 0) {
+ perror("glfs_open");
+ return 1;
+ }
+
+ if (glfs_fstat(fd, &st)) {
+ perror("glfs_fstat");
+ return 1;
+ }
+
+ while (hole_end < st.st_size) {
+ hole_start = glfs_lseek(fd, hole_end, SEEK_HOLE);
+ if (hole_start == -1 && errno == ENXIO)
+ /* no more holes */
+ break;
+ if (hole_start == -1) {
+ perror("no more holes");
+ break;
+ }
+
+ hole_end = glfs_lseek(fd, hole_start, SEEK_DATA);
+ if (hole_end == -1 && errno == ENXIO) {
+ /* no more data */
+ break;
+ }
+
+ printf("HOLE found: %ld - %ld%s\n", hole_start, hole_end,
+ (hole_end == st.st_size) ? " (EOF)" : "");
+ }
+
+ glfs_close(fd);
+
+ if (fs) {
+ glfs_fini(fs);
+ }
+
+ return ret;
+}
diff --git a/tests/basic/gfapi/sink.t b/tests/basic/gfapi/sink.t
new file mode 100644
index 00000000000..53af2ecf62d
--- /dev/null
+++ b/tests/basic/gfapi/sink.t
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+TEST build_tester $(dirname ${0})/gfapi-load-volfile.c -lgfapi
+TEST ./$(dirname ${0})/gfapi-load-volfile $(dirname $0)/sink.vol
+
+cleanup_tester $(dirname ${0})/gfapi-load-volfile
+
+cleanup
diff --git a/tests/basic/gfapi/sink.vol b/tests/basic/gfapi/sink.vol
new file mode 100644
index 00000000000..d1c92261448
--- /dev/null
+++ b/tests/basic/gfapi/sink.vol
@@ -0,0 +1,24 @@
+#
+# The sink xlator does not do any memory allocations. It only passes the FOPs
+# through to the next xlator.
+#
+# For testing, there is no next xlator needed, we are only interested in the
+# resource usage of the Gluster core when gfapi is used.
+#
+# Note: The sink xlator does not handle any calls. Mounting is possible, but
+# any I/O needs additional functionality in the sink xlator.
+#
+volume sink
+ type debug/sink
+ # an option is required, otherwise the graph parsing fails
+ option an-option-is-required yes
+end-volume
+
+#
+# It is possible to test the resource usage of other xlators by adding them in
+# the graph before the "sink".
+#
+#volume mdcache-sink
+# type performance/md-cache
+# subvolumes sink
+#end-volume
diff --git a/tests/basic/gfapi/upcall-cache-invalidate.c b/tests/basic/gfapi/upcall-cache-invalidate.c
new file mode 100644
index 00000000000..078286a8956
--- /dev/null
+++ b/tests/basic/gfapi/upcall-cache-invalidate.c
@@ -0,0 +1,209 @@
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+#include <limits.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+#define LOG_ERR(func, ret) \
+ do { \
+ if (ret != 0) { \
+ fprintf(stderr, "%s : returned error %d (%s)\n", func, ret, \
+ strerror(errno)); \
+ goto out; \
+ } else { \
+ fprintf(stderr, "%s : returned %d\n", func, ret); \
+ } \
+ } while (0)
+
+int
+main(int argc, char *argv[])
+{
+ glfs_t *fs = NULL;
+ glfs_t *fs2 = NULL;
+ glfs_t *fs_tmp = NULL;
+ glfs_t *fs_tmp2 = NULL;
+ int ret = 0, i;
+ glfs_fd_t *fd = NULL;
+ glfs_fd_t *fd2 = NULL;
+ glfs_fd_t *fd_tmp = NULL;
+ glfs_fd_t *fd_tmp2 = NULL;
+ char readbuf[32];
+ char *filename = "file_tmp";
+ char *writebuf = NULL;
+ char *vol_id = NULL;
+ unsigned int cnt = 1;
+ struct glfs_upcall *cbk = NULL;
+ char *logfile = NULL;
+ char *volname = NULL;
+ char *hostname = NULL;
+
+ if (argc != 4) {
+ fprintf(stderr, "Invalid argument\n");
+ exit(1);
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+ logfile = argv[3];
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL\n");
+ return -1;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ LOG_ERR("glfs_set_volfile_server", ret);
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ LOG_ERR("glfs_set_logging", ret);
+
+ ret = glfs_init(fs);
+ LOG_ERR("glfs_init", ret);
+
+ /* This does not block, but enables caching of events. Real
+ * applications like NFS-Ganesha run this in a thread before activity
+ * on the fs (through this instance) happens. */
+ ret = glfs_h_poll_upcall(fs_tmp, &cbk);
+ LOG_ERR("glfs_h_poll_upcall", ret);
+
+ fs2 = glfs_new(volname);
+ if (!fs2) {
+ fprintf(stderr, "glfs_new fs2: returned NULL\n");
+ return 1;
+ }
+
+ ret = glfs_set_volfile_server(fs2, "tcp", hostname, 24007);
+ LOG_ERR("glfs_set_volfile_server-fs2", ret);
+
+ ret = glfs_set_logging(fs2, logfile, 7);
+ LOG_ERR("glfs_set_logging-fs2", ret);
+
+ ret = glfs_init(fs2);
+ LOG_ERR("glfs_init-fs2", ret);
+
+ fd = glfs_creat(fs, filename, O_RDWR | O_SYNC, 0644);
+ if (fd <= 0) {
+ ret = -1;
+ LOG_ERR("glfs_creat", ret);
+ }
+ fprintf(stderr, "glfs-create fd - %d\n", fd);
+
+ fd2 = glfs_open(fs2, filename, O_SYNC | O_RDWR | O_CREAT);
+ if (fd2 <= 0) {
+ ret = -1;
+ LOG_ERR("glfs_open-fs2", ret);
+ }
+ fprintf(stderr, "glfs-open fd2 - %d\n", fd2);
+
+ do {
+ if (cnt % 2) {
+ fd_tmp = fd;
+ fs_tmp = fs;
+ fd_tmp2 = fd2;
+ fs_tmp2 = fs2;
+ } else {
+ fd_tmp = fd2;
+ fs_tmp = fs2;
+ fd_tmp2 = fd;
+ fs_tmp2 = fs;
+ }
+
+ /* WRITE on fd_tmp */
+ writebuf = malloc(10);
+ if (writebuf) {
+ memcpy(writebuf, "abcd", 4);
+ ret = glfs_write(fd_tmp, writebuf, 4, 0);
+ if (ret <= 0) {
+ ret = -1;
+ LOG_ERR("glfs_write", ret);
+ } else {
+ fprintf(stderr, "glfs_write succeeded\n");
+ }
+ free(writebuf);
+ } else {
+ fprintf(stderr, "Could not allocate writebuf\n");
+ return -1;
+ }
+
+ /* READ on fd_tmp2 */
+ ret = glfs_lseek(fd_tmp2, 0, SEEK_SET);
+ LOG_ERR("glfs_lseek", ret);
+
+ memset(readbuf, 0, sizeof(readbuf));
+ ret = glfs_pread(fd_tmp2, readbuf, 4, 0, 0, NULL);
+
+ if (ret <= 0) {
+ ret = -1;
+ LOG_ERR("glfs_pread", ret);
+ } else {
+ fprintf(stderr, "glfs_read: %s\n", readbuf);
+ }
+
+ /* Open() fops seem to be not performed on server side until
+ * there are I/Os on that fd
+ */
+ if (cnt > 2) {
+ struct glfs_upcall_inode *in_arg = NULL;
+ enum glfs_upcall_reason reason = 0;
+ struct glfs_object *object = NULL;
+ uint64_t flags = 0;
+ uint64_t expire = 0;
+
+ ret = glfs_h_poll_upcall(fs_tmp, &cbk);
+ LOG_ERR("glfs_h_poll_upcall", ret);
+
+ reason = glfs_upcall_get_reason(cbk);
+
+ /* Expect 'GLFS_INODE_INVALIDATE' upcall event. */
+ if (reason == GLFS_UPCALL_INODE_INVALIDATE) {
+ in_arg = glfs_upcall_get_event(cbk);
+
+ object = glfs_upcall_inode_get_object(in_arg);
+ flags = glfs_upcall_inode_get_flags(in_arg);
+ expire = glfs_upcall_inode_get_expire(in_arg);
+
+ fprintf(stderr,
+ " upcall event type - %d,"
+ " object(%p), flags(%d), "
+ " expire_time_attr(%d)\n",
+ reason, object, flags, expire);
+ } else {
+ fprintf(stderr, "Didn't receive upcall notify event");
+ ret = -1;
+ goto err;
+ }
+
+ glfs_free(cbk);
+ }
+
+ sleep(5);
+ } while (++cnt < 5);
+
+err:
+ glfs_close(fd);
+ LOG_ERR("glfs_close", ret);
+
+ glfs_close(fd2);
+ LOG_ERR("glfs_close-fd2", ret);
+
+out:
+ if (fs) {
+ ret = glfs_fini(fs);
+ fprintf(stderr, "glfs_fini(fs) returned %d \n", ret);
+ }
+
+ if (fs2) {
+ ret = glfs_fini(fs2);
+ fprintf(stderr, "glfs_fini(fs2) returned %d \n", ret);
+ }
+
+ if (ret)
+ exit(1);
+ exit(0);
+}
diff --git a/tests/basic/gfapi/upcall-cache-invalidate.t b/tests/basic/gfapi/upcall-cache-invalidate.t
new file mode 100755
index 00000000000..5fd6a3332e7
--- /dev/null
+++ b/tests/basic/gfapi/upcall-cache-invalidate.t
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+logdir=`gluster --print-logdir`
+
+## Enable Upcall cache-invalidation feature
+TEST $CLI volume set $V0 features.cache-invalidation on;
+
+TEST build_tester $(dirname $0)/upcall-cache-invalidate.c -lgfapi
+
+TEST ./$(dirname $0)/upcall-cache-invalidate $H0 $V0 $logdir/upcall-cache-invalidate.log
+
+cleanup_tester $(dirname $0)/upcall-cache-invalidate
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/basic/gfapi/upcall-register-api.c b/tests/basic/gfapi/upcall-register-api.c
new file mode 100644
index 00000000000..53ce0ecdb68
--- /dev/null
+++ b/tests/basic/gfapi/upcall-register-api.c
@@ -0,0 +1,286 @@
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+#include <limits.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+#define LOG_ERR(func, ret) \
+ do { \
+ if (ret != 0) { \
+ fprintf(stderr, "%s : returned error %d (%s)\n", func, ret, \
+ strerror(errno)); \
+ goto out; \
+ } else { \
+ fprintf(stderr, "%s : returned %d\n", func, ret); \
+ } \
+ } while (0)
+
+int upcall_recv = 0;
+
+void
+up_async_invalidate(struct glfs_upcall *up_arg, void *data)
+{
+ struct glfs_upcall_inode *in_arg = NULL;
+ enum glfs_upcall_reason reason = 0;
+ struct glfs_object *object = NULL;
+ uint64_t flags = 0;
+ uint64_t expire = 0;
+
+ if (!up_arg)
+ return;
+
+ reason = glfs_upcall_get_reason(up_arg);
+
+ /* Expect 'GLFS_INODE_INVALIDATE' upcall event. */
+
+ if (reason == GLFS_UPCALL_INODE_INVALIDATE) {
+ in_arg = glfs_upcall_get_event(up_arg);
+
+ object = glfs_upcall_inode_get_object(in_arg);
+ flags = glfs_upcall_inode_get_flags(in_arg);
+ expire = glfs_upcall_inode_get_expire(in_arg);
+
+ fprintf(stderr,
+ " upcall event type - %d,"
+ " object(%p), flags(%d), "
+ " expire_time_attr(%d)\n",
+ reason, object, flags, expire);
+ upcall_recv++;
+ }
+
+ glfs_free(up_arg);
+ return;
+}
+
+int
+perform_io(glfs_t *fs, glfs_t *fs2, int cnt)
+{
+ glfs_t *fs_tmp = NULL;
+ glfs_t *fs_tmp2 = NULL;
+ glfs_fd_t *fd_tmp = NULL;
+ glfs_fd_t *fd_tmp2 = NULL;
+ char readbuf[32];
+ char *writebuf = NULL;
+ glfs_fd_t *fd = NULL;
+ glfs_fd_t *fd2 = NULL;
+ char *filename = "file_tmp";
+ int ret = -1;
+
+ if (!fs || !fs2)
+ return -1;
+
+ /* Create file from fs and open it from fs2 */
+ fd = glfs_creat(fs, filename, O_RDWR | O_SYNC, 0644);
+ if (fd <= 0) {
+ ret = -1;
+ LOG_ERR("glfs_creat", ret);
+ }
+
+ fd2 = glfs_open(fs2, filename, O_SYNC | O_RDWR | O_CREAT);
+ if (fd2 <= 0) {
+ ret = -1;
+ LOG_ERR("glfs_open-fs2", ret);
+ }
+
+ do {
+ if (cnt % 2) {
+ fd_tmp = fd;
+ fs_tmp = fs;
+ fd_tmp2 = fd2;
+ fs_tmp2 = fs2;
+ } else {
+ fd_tmp = fd2;
+ fs_tmp = fs2;
+ fd_tmp2 = fd;
+ fs_tmp2 = fs;
+ }
+
+ /* WRITE on fd_tmp */
+ writebuf = malloc(10);
+ if (writebuf) {
+ memcpy(writebuf, "abcd", 4);
+ ret = glfs_write(fd_tmp, writebuf, 4, 0);
+ if (ret <= 0) {
+ ret = -1;
+ LOG_ERR("glfs_write", ret);
+ }
+ free(writebuf);
+ } else {
+ fprintf(stderr, "Could not allocate writebuf\n");
+ return -1;
+ }
+
+ /* READ on fd_tmp2 */
+ ret = glfs_lseek(fd_tmp2, 0, SEEK_SET);
+ LOG_ERR("glfs_lseek", ret);
+
+ memset(readbuf, 0, sizeof(readbuf));
+ ret = glfs_pread(fd_tmp2, readbuf, 4, 0, 0, NULL);
+
+ if (ret <= 0) {
+ ret = -1;
+ LOG_ERR("glfs_pread", ret);
+ }
+
+ sleep(2);
+ } while (--cnt > 0);
+
+ sleep(2);
+
+ ret = 0;
+err:
+ glfs_close(fd);
+
+ glfs_close(fd2);
+
+out:
+ return ret;
+}
+
+int
+main(int argc, char *argv[])
+{
+ glfs_t *fs = NULL;
+ glfs_t *fs2 = NULL;
+ int ret = 0, i;
+ char *vol_id = NULL;
+ unsigned int cnt = 5;
+ struct glfs_upcall *cbk = NULL;
+ char *logfile = NULL;
+ char *volname = NULL;
+ char *hostname = NULL;
+ int up_events = GLFS_EVENT_ANY;
+
+ if (argc != 4) {
+ fprintf(stderr, "Invalid argument\n");
+ exit(1);
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+ logfile = argv[3];
+
+ /* Initialize fs */
+ fs = glfs_new(volname);
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL\n");
+ return -1;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ LOG_ERR("glfs_set_volfile_server", ret);
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ LOG_ERR("glfs_set_logging", ret);
+
+ ret = glfs_init(fs);
+ LOG_ERR("glfs_init", ret);
+
+ /* Initialize fs2 */
+ fs2 = glfs_new(volname);
+ if (!fs2) {
+ fprintf(stderr, "glfs_new fs2: returned NULL\n");
+ return 1;
+ }
+
+ ret = glfs_set_volfile_server(fs2, "tcp", hostname, 24007);
+ LOG_ERR("glfs_set_volfile_server-fs2", ret);
+
+ ret = glfs_set_logging(fs2, logfile, 7);
+ LOG_ERR("glfs_set_logging-fs2", ret);
+
+ ret = glfs_init(fs2);
+ LOG_ERR("glfs_init-fs2", ret);
+
+ /* Register Upcalls */
+ ret = glfs_upcall_register(fs, up_events, up_async_invalidate, NULL);
+
+ /* Check if the return mask contains the event */
+ if (!(ret & GLFS_EVENT_INODE_INVALIDATE)) {
+ fprintf(stderr,
+ "glfs_upcall_register return doesn't contain"
+ " upcall event\n");
+ return -1;
+ }
+
+ ret = glfs_upcall_register(fs2, up_events, up_async_invalidate, NULL);
+
+ /* Check if the return mask contains the event */
+ if ((ret < 0) || !(ret & GLFS_EVENT_INODE_INVALIDATE)) {
+ fprintf(stderr,
+ "glfs_upcall_register return doesn't contain"
+ " upcall event\n");
+ return -1;
+ }
+
+ /* Perform I/O */
+ ret = perform_io(fs, fs2, cnt);
+ LOG_ERR("perform_io", ret);
+
+ if (upcall_recv == 0) {
+ fprintf(stderr, "Upcalls are not received.\n");
+ ret = -1;
+ } else {
+ fprintf(stderr, "Received %d upcalls as expected\n", upcall_recv);
+ ret = 0;
+ }
+
+ sleep(5); /* to flush out previous upcalls if any */
+
+ /* Now unregister and check there are no upcall events received */
+ ret = glfs_upcall_unregister(fs, up_events);
+
+ /* Check if the return mask contains the event */
+ if ((ret < 0) || !(ret & GLFS_EVENT_INODE_INVALIDATE)) {
+ fprintf(stderr,
+ "glfs_upcall_unregister return doesn't contain"
+ " upcall event\n");
+ return -1;
+ }
+
+ ret = glfs_upcall_unregister(fs2, up_events);
+
+ /* Check if the return mask contains the event */
+ if ((ret < 0) || !(ret & GLFS_EVENT_INODE_INVALIDATE)) {
+ fprintf(stderr,
+ "glfs_upcall_unregister return doesn't contain"
+ " upcall event\n");
+ return -1;
+ }
+
+ upcall_recv = 0;
+
+ ret = perform_io(fs, fs2, cnt);
+ LOG_ERR("perform_io", ret);
+
+ if (upcall_recv != 0) {
+ fprintf(stderr, "%d upcalls received even after unregister.\n",
+ upcall_recv);
+ ret = -1;
+ } else {
+ fprintf(stderr,
+ "Post unregister, no upcalls received as"
+ " expected\n");
+ ret = 0;
+ }
+
+out:
+ if (fs) {
+ ret = glfs_fini(fs);
+ fprintf(stderr, "glfs_fini(fs) returned %d\n", ret);
+ }
+
+ if (fs2) {
+ ret = glfs_fini(fs2);
+ fprintf(stderr, "glfs_fini(fs2) returned %d\n", ret);
+ }
+
+ if (ret)
+ exit(1);
+ exit(0);
+}
diff --git a/tests/basic/gfapi/upcall-register-api.t b/tests/basic/gfapi/upcall-register-api.t
new file mode 100755
index 00000000000..a46234ed7af
--- /dev/null
+++ b/tests/basic/gfapi/upcall-register-api.t
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+logdir=`gluster --print-logdir`
+
+## Enable Upcall cache-invalidation feature
+TEST $CLI volume set $V0 features.cache-invalidation on;
+
+TEST build_tester $(dirname $0)/upcall-register-api.c -lgfapi
+
+TEST ./$(dirname $0)/upcall-register-api $H0 $V0 $logdir/upcall-register-api.log
+
+cleanup_tester $(dirname $0)/upcall-register-api
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/basic/gfid-access.t b/tests/basic/gfid-access.t
new file mode 100644
index 00000000000..19b6564e676
--- /dev/null
+++ b/tests/basic/gfid-access.t
@@ -0,0 +1,80 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}0
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 --aux-gfid-mount;
+TEST mkdir $M0/a
+TEST touch $M0/b
+a_gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/a))
+b_gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/b))
+
+#Operations on Directory
+TEST setfattr -n trusted.abc -v abc $M0/a
+EXPECT "abc" echo $(getfattr -n trusted.abc --only-values $M0/a)
+EXPECT "abc" echo $(getfattr -n trusted.abc --only-values $M0/.gfid/$a_gfid_str)
+TEST setfattr -x trusted.abc $M0/a
+TEST ! getfattr -n trusted.abc $M0/a
+TEST ! getfattr -n trusted.abc $M0/.gfid/$a_gfid_str
+TEST chmod 0777 $M0/a
+EXPECT "777" stat -c "%a" $M0/a
+EXPECT "777" stat -c "%a" $M0/.gfid/$a_gfid_str
+
+TEST setfattr -n trusted.abc -v def $M0/.gfid/$a_gfid_str
+EXPECT "def" echo $(getfattr -n trusted.abc --only-values $M0/a)
+EXPECT "def" echo $(getfattr -n trusted.abc --only-values $M0/.gfid/$a_gfid_str)
+TEST setfattr -x trusted.abc $M0/.gfid/$a_gfid_str
+TEST ! getfattr -n trusted.abc $M0/a
+TEST ! getfattr -n trusted.abc $M0/.gfid/$a_gfid_str
+TEST chmod 0777 $M0/.gfid/$a_gfid_str
+EXPECT "777" stat -c "%a" $M0/a
+EXPECT "777" stat -c "%a" $M0/.gfid/$a_gfid_str
+
+#Entry operations on directory
+#Test that virtual directories are not allowed to be deleted.
+TEST ! mkdir $M0/.gfid
+TEST ! rmdir $M0/.gfid
+TEST ! touch $M0/.gfid
+TEST ! rm -f $M0/.gfid
+TEST ! mv $M0/.gfid $M0/dont-rename
+TEST ! ln -s $M0/symlink $M0/.gfid
+TEST ! ln $M0/.gfid $M0/hlink
+TEST ! mknod $M0/.gfid b 0 0
+
+#Test that first level directory/file creations inside .gfid are not allowed.
+tmpfile=$(mktemp)
+TEST ! mkdir $M0/.gfid/a
+TEST ! touch $M0/.gfid/a
+TEST ! mv $tmpfile $M0/.gfid
+TEST ! mv $M0/a $M0/.gfid
+TEST ! mknod $M0/.gfid/b b 0 0
+rm -f $tmpfile
+
+#Operations on File
+TEST setfattr -n trusted.abc -v abc $M0/b
+EXPECT "abc" echo $(getfattr -n trusted.abc --only-values $M0/b)
+EXPECT "abc" echo $(getfattr -n trusted.abc --only-values $M0/.gfid/$b_gfid_str)
+TEST setfattr -x trusted.abc $M0/b
+TEST ! getfattr -n trusted.abc $M0/b
+TEST ! getfattr -n trusted.abc $M0/.gfid/$b_gfid_str
+TEST chmod 0777 $M0/b
+EXPECT "777" stat -c "%a" $M0/b
+EXPECT "777" stat -c "%a" $M0/.gfid/$b_gfid_str
+
+TEST setfattr -n trusted.abc -v def $M0/.gfid/$b_gfid_str
+EXPECT "def" echo $(getfattr -n trusted.abc --only-values $M0/b)
+EXPECT "def" echo $(getfattr -n trusted.abc --only-values $M0/.gfid/$b_gfid_str)
+TEST setfattr -x trusted.abc $M0/.gfid/$b_gfid_str
+TEST ! getfattr -n trusted.abc $M0/b
+TEST ! getfattr -n trusted.abc $M0/.gfid/$b_gfid_str
+TEST chmod 0777 $M0/.gfid/$b_gfid_str
+EXPECT "777" stat -c "%a" $M0/b
+EXPECT "777" stat -c "%a" $M0/.gfid/$b_gfid_str
+
+cleanup
diff --git a/tests/basic/gfproxy.t b/tests/basic/gfproxy.t
new file mode 100755
index 00000000000..7aa8b70b793
--- /dev/null
+++ b/tests/basic/gfproxy.t
@@ -0,0 +1,71 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../nfs.rc
+
+function file_exists
+{
+ if [ -f $1 ]; then echo "Y"; else echo "N"; fi
+}
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 config.gfproxyd enable
+TEST $CLI volume set $V0 failover-hosts "127.0.0.1,192.168.122.215,192.168.122.90"
+TEST $CLI volume set $V0 client-log-level TRACE
+TEST $CLI volume start $V0
+
+sleep 2
+
+REGULAR_CLIENT_VOLFILE="/var/lib/glusterd/vols/${V0}/trusted-${V0}.tcp-fuse.vol"
+GFPROXY_CLIENT_VOLFILE="/var/lib/glusterd/vols/${V0}/trusted-${V0}.tcp-gfproxy-fuse.vol"
+GFPROXYD_VOLFILE="/var/lib/glusterd/vols/${V0}/${V0}.gfproxyd.vol"
+
+# Client volfile must exist
+TEST [ -f $GFPROXY_CLIENT_VOLFILE ]
+
+# write-behind translators must exist
+TEST grep "performance/write-behind" $GFPROXY_CLIENT_VOLFILE
+
+# Make sure we didn't screw up the existing client
+TEST grep "performance/write-behind" $REGULAR_CLIENT_VOLFILE
+TEST grep "cluster/replicate" $REGULAR_CLIENT_VOLFILE
+TEST grep "cluster/distribute" $REGULAR_CLIENT_VOLFILE
+
+TEST [ -f $GFPROXYD_VOLFILE ]
+
+TEST grep "cluster/replicate" $GFPROXYD_VOLFILE
+TEST grep "cluster/distribute" $GFPROXYD_VOLFILE
+
+# write-behind must *not* exist
+TEST ! grep "performance/write-behind" $GFPROXYD_VOLFILE
+
+# Test that we can start the server and the client
+TEST glusterfs --thin-client --volfile-id=patchy --volfile-server=$H0 -l /var/log/glusterfs/${V0}-gfproxy-client.log $M0
+sleep 2
+TEST grep gfproxy-client/${V0} /proc/mounts
+
+# Write data to the mount and checksum it
+TEST dd if=/dev/urandom bs=1M count=10 of=/tmp/testfile1
+md5=$(md5sum /tmp/testfile1 | awk '{print $1}')
+TEST cp -v /tmp/testfile1 $M0/testfile1
+TEST [ "$(md5sum $M0/testfile1 | awk '{print $1}')" == "$md5" ]
+
+rm /tmp/testfile1
+
+dd if=/dev/zero of=$M0/bigfile bs=1K count=10240 &
+BG_STRESS_PID=$!
+
+TEST wait $BG_STRESS_PID
+
+# Perform graph change and make sure the gfproxyd restarts
+TEST $CLI volume set $V0 stat-prefetch off
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" file_exists $M0/bigfile
+
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=1501392
diff --git a/tests/basic/global-threading.t b/tests/basic/global-threading.t
new file mode 100644
index 00000000000..f7d34044b09
--- /dev/null
+++ b/tests/basic/global-threading.t
@@ -0,0 +1,104 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+# Test if the given process has a number of threads of a given type between
+# min and max.
+function check_threads() {
+ local pid="${1}"
+ local pattern="${2}"
+ local min="${3}"
+ local max="${4-}"
+ local count
+
+ count="$(ps hH -o comm ${pid} | grep "${pattern}" | wc -l)"
+ if [[ ${min} -gt ${count} ]]; then
+ return 1
+ fi
+ if [[ ! -z "${max}" && ${max} -lt ${count} ]]; then
+ return 1
+ fi
+
+ return 0
+}
+
+cleanup
+
+TEST glusterd
+
+# Glusterd shouldn't use any thread
+TEST check_threads $(get_glusterd_pid) glfs_tpw 0 0
+TEST check_threads $(get_glusterd_pid) glfs_iotwr 0 0
+
+TEST pkill -9 glusterd
+
+TEST glusterd --global-threading
+
+# Glusterd shouldn't use global threads, even if enabled
+TEST check_threads $(get_glusterd_pid) glfs_tpw 0 0
+TEST check_threads $(get_glusterd_pid) glfs_iotwr 0 0
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/b{0,1}
+
+# Normal configuration using io-threads on bricks
+TEST $CLI volume set $V0 config.global-threading off
+TEST $CLI volume set $V0 performance.iot-pass-through off
+TEST $CLI volume set $V0 performance.client-io-threads off
+TEST $CLI volume start $V0
+
+# There shouldn't be global threads
+TEST check_threads $(get_brick_pid $V0 $H0 $B0/b0) glfs_tpw 0 0
+TEST check_threads $(get_brick_pid $V0 $H0 $B0/b1) glfs_tpw 0 0
+
+# There should be at least 1 io-thread
+TEST check_threads $(get_brick_pid $V0 $H0 $B0/b0) glfs_iotwr 1
+TEST check_threads $(get_brick_pid $V0 $H0 $B0/b1) glfs_iotwr 1
+
+# Self-heal should be using global threads
+TEST check_threads $(get_shd_process_pid) glfs_tpw 1
+TEST check_threads $(get_shd_process_pid) glfs_iotwr 0 0
+
+TEST $CLI volume stop $V0
+
+# Configuration with global threads on bricks
+TEST $CLI volume set $V0 config.global-threading on
+TEST $CLI volume set $V0 performance.iot-pass-through on
+TEST $CLI volume start $V0
+
+# There should be at least 1 global thread
+TEST check_threads $(get_brick_pid $V0 $H0 $B0/b0) glfs_tpw 1
+TEST check_threads $(get_brick_pid $V0 $H0 $B0/b1) glfs_tpw 1
+
+# There shouldn't be any io-thread worker threads
+TEST check_threads $(get_brick_pid $V0 $H0 $B0/b0) glfs_iotwr 0 0
+TEST check_threads $(get_brick_pid $V0 $H0 $B0/b1) glfs_iotwr 0 0
+
+# Normal configuration using io-threads on clients
+TEST $CLI volume set $V0 performance.iot-pass-through off
+TEST $CLI volume set $V0 performance.client-io-threads on
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+# There shouldn't be global threads
+TEST check_threads $(get_mount_process_pid $V0 $M0) glfs_tpw 0 0
+
+# There should be at least 1 io-thread
+TEST check_threads $(get_mount_process_pid $V0 $M0) glfs_iotwr 1
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+# Configuration with global threads on clients
+TEST $CLI volume set $V0 performance.client-io-threads off
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --global-threading $M0
+
+# There should be at least 1 global thread
+TEST check_threads $(get_mount_process_pid $V0 $M0) glfs_tpw 1
+
+# There shouldn't be io-threads
+TEST check_threads $(get_mount_process_pid $V0 $M0) glfs_iotwr 0 0
+
+# Some basic volume access checks with global-threading enabled everywhere
+TEST mkdir ${M0}/dir
+TEST dd if=/dev/zero of=${M0}/dir/file bs=128k count=8
+
+cleanup
diff --git a/tests/basic/glusterd-restart-shd-mux.t b/tests/basic/glusterd-restart-shd-mux.t
new file mode 100644
index 00000000000..46d0dac2fce
--- /dev/null
+++ b/tests/basic/glusterd-restart-shd-mux.t
@@ -0,0 +1,96 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TESTS_EXPECTED_IN_LOOP=20
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2,3,4,5}
+TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+TEST $CLI volume set $V0 cluster.eager-lock off
+TEST $CLI volume set $V0 performance.flush-behind off
+TEST $CLI volume start $V0
+
+for i in $(seq 1 3); do
+ TEST $CLI volume create ${V0}_afr$i replica 3 $H0:$B0/${V0}_afr${i}{0,1,2,3,4,5}
+ TEST $CLI volume start ${V0}_afr$i
+ TEST $CLI volume create ${V0}_ec$i disperse 6 redundancy 2 $H0:$B0/${V0}_ec${i}{0,1,2,3,4,5}
+ TEST $CLI volume start ${V0}_ec$i
+done
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
+
+#Stop the glusterd
+TEST pkill glusterd
+#Only stopping glusterd, so there will be one shd
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^1$" shd_count
+TEST glusterd
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
+#Check the thread count become to number of volumes*number of ec subvolume (3*6=18)
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd $V0 "ec_shd_index_healer"
+#Check the thread count become to number of volumes*number of afr subvolume (4*6=24)
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^24$" number_healer_threads_shd $V0 "afr_shd_index_healer"
+
+shd_pid=$(get_shd_mux_pid $V0)
+for i in $(seq 1 3); do
+ afr_path="/var/run/gluster/shd/${V0}_afr$i/${V0}_afr$i-shd.pid"
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" cat $afr_path
+ ec_path="/var/run/gluster/shd/${V0}_ec$i/${V0}_ec${i}-shd.pid"
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" cat $ec_path
+done
+
+#Reboot a node scenario
+TEST pkill gluster
+#Only stopped glusterd, so there will be one shd
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" shd_count
+
+TEST glusterd
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
+
+#Check the thread count become to number of volumes*number of ec subvolume (3*6=18)
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd $V0 "ec_shd_index_healer"
+#Check the thread count become to number of volumes*number of afr subvolume (4*6=24)
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^24$" number_healer_threads_shd $V0 "afr_shd_index_healer"
+
+shd_pid=$(get_shd_mux_pid $V0)
+for i in $(seq 1 3); do
+ afr_path="/var/run/gluster/shd/${V0}_afr$i/${V0}_afr$i-shd.pid"
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" cat $afr_path
+ ec_path="/var/run/gluster/shd/${V0}_ec$i/${V0}_ec${i}-shd.pid"
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" cat $ec_path
+done
+
+for i in $(seq 1 3); do
+ TEST $CLI volume stop ${V0}_afr$i
+ TEST $CLI volume stop ${V0}_ec$i
+done
+
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^6$" number_healer_threads_shd $V0 "afr_shd_index_healer"
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}3
+
+TEST touch $M0/foo{1..100}
+
+EXPECT_WITHIN $HEAL_TIMEOUT "^204$" get_pending_heal_count $V0
+
+TEST $CLI volume start ${V0} force
+
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+TEST rm -rf $M0/*
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+
+TEST $CLI volume stop ${V0}
+TEST $CLI volume delete ${V0}
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^0$" shd_count
+
+cleanup
diff --git a/tests/basic/glusterd/arbiter-volume-probe.t b/tests/basic/glusterd/arbiter-volume-probe.t
new file mode 100644
index 00000000000..cb05f4ada42
--- /dev/null
+++ b/tests/basic/glusterd/arbiter-volume-probe.t
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../cluster.rc
+
+#This tests if the arbiter-count is transferred to the other peer.
+function check_peers {
+ $CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
+}
+
+cleanup;
+
+TEST launch_cluster 2;
+TEST $CLI_1 peer probe $H2;
+
+EXPECT_WITHIN $PROBE_TIMEOUT 1 check_peers
+
+kill_glusterd 2
+$CLI_1 volume create $V0 replica 3 arbiter 1 $H0:$B0/b{1..3}
+TEST $glusterd_2
+EXPECT_WITHIN $PROBE_TIMEOUT 1 check_peers
+EXPECT "1 x \(2 \+ 1\) = 3" volinfo_field_1 $V0 "Number of Bricks"
+EXPECT "1 x \(2 \+ 1\) = 3" volinfo_field_2 $V0 "Number of Bricks"
+
+cleanup;
diff --git a/tests/basic/glusterd/check-cloudsync-ancestry.t b/tests/basic/glusterd/check-cloudsync-ancestry.t
new file mode 100644
index 00000000000..ff6ffee8db7
--- /dev/null
+++ b/tests/basic/glusterd/check-cloudsync-ancestry.t
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# When shard and cloudsync xlators enabled on a volume, shard xlator
+# should be an ancestor of cloudsync. This testcase is to check this condition.
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/b1 $H0:$B0/b2 $H0:$B0/b3
+
+volfile=$(gluster system:: getwd)"/vols/$V0/trusted-$V0.tcp-fuse.vol"
+
+#Test that both shard and cloudsync are not loaded
+EXPECT "N" volgen_volume_exists $volfile $V0-shard features shard
+EXPECT "N" volgen_volume_exists $volfile $V0-cloudsync features cloudsync
+
+#Enable shard and cloudsync in that order and check if volfile is correct
+TEST $CLI volume set $V0 shard on
+TEST $CLI volume set $V0 cloudsync on
+
+#Test that both shard and cloudsync are loaded
+EXPECT "Y" volgen_volume_exists $volfile $V0-shard features shard
+EXPECT "Y" volgen_volume_exists $volfile $V0-cloudsync features cloudsync
+
+EXPECT "Y" volgen_check_ancestry $volfile features shard features cloudsync
+
+#Disable shard and cloudsync
+TEST $CLI volume set $V0 shard off
+TEST $CLI volume set $V0 cloudsync off
+
+#Test that both shard and cloudsync are not loaded
+EXPECT "N" volgen_volume_exists $volfile $V0-shard features shard
+EXPECT "N" volgen_volume_exists $volfile $V0-cloudsync features cloudsync
+
+#Enable cloudsync and shard in that order and check if volfile is correct
+TEST $CLI volume set $V0 cloudsync on
+TEST $CLI volume set $V0 shard on
+
+#Test that both shard and cloudsync are loaded
+EXPECT "Y" volgen_volume_exists $volfile $V0-shard features shard
+EXPECT "Y" volgen_volume_exists $volfile $V0-cloudsync features cloudsync
+
+EXPECT "Y" volgen_check_ancestry $volfile features shard features cloudsync
+
+cleanup;
diff --git a/tests/basic/glusterd/disperse-create.t b/tests/basic/glusterd/disperse-create.t
new file mode 100644
index 00000000000..db8a621d48e
--- /dev/null
+++ b/tests/basic/glusterd/disperse-create.t
@@ -0,0 +1,73 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# This command tests the volume create command validation for disperse volumes.
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse $H0:$B0/b1 $H0:$B0/b2 $H0:$B0/b3
+EXPECT "1 x \(2 \+ 1\) = 3" volinfo_field $V0 "Number of Bricks"
+
+TEST $CLI volume delete $V0
+TEST $CLI volume create $V0 disperse 3 $H0:$B0/b4 $H0:$B0/b5 $H0:$B0/b6
+EXPECT "1 x \(2 \+ 1\) = 3" volinfo_field $V0 "Number of Bricks"
+
+TEST $CLI volume delete $V0
+TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/b7 $H0:$B0/b8 $H0:$B0/b9
+EXPECT "1 x \(2 \+ 1\) = 3" volinfo_field $V0 "Number of Bricks"
+
+TEST $CLI volume delete $V0
+TEST $CLI volume create $V0 disperse-data 2 $H0:$B0/b10 $H0:$B0/b11 $H0:$B0/b12
+EXPECT "1 x \(2 \+ 1\) = 3" volinfo_field $V0 "Number of Bricks"
+
+TEST $CLI volume delete $V0
+TEST $CLI volume create $V0 redundancy 1 $H0:$B0/b10 $H0:$B0/b11 $H0:$B0/b12
+EXPECT "1 x \(2 \+ 1\) = 3" volinfo_field $V0 "Number of Bricks"
+
+TEST $CLI volume delete $V0
+TEST $CLI volume create $V0 disperse-data 2 redundancy 1 $H0:$B0/b11 $H0:$B0/b12 $H0:$B0/b13
+EXPECT "1 x \(2 \+ 1\) = 3" volinfo_field $V0 "Number of Bricks"
+
+TEST $CLI volume delete $V0
+TEST $CLI volume create $V0 disperse-data 2 redundancy 1 $H0:$B0/b11 $H0:$B0/b12 $H0:$B0/b13
+EXPECT "1 x \(2 \+ 1\) = 3" volinfo_field $V0 "Number of Bricks"
+
+TEST $CLI volume delete $V0
+TEST $CLI volume create $V0 disperse 3 disperse-data 2 $H0:$B0/b14 $H0:$B0/b15 $H0:$B0/b16
+EXPECT "1 x \(2 \+ 1\) = 3" volinfo_field $V0 "Number of Bricks"
+
+TEST $CLI volume delete $V0
+TEST $CLI volume create $V0 disperse 3 disperse-data 2 redundancy 1 $H0:$B0/b17 $H0:$B0/b18 $H0:$B0/b19
+EXPECT "1 x \(2 \+ 1\) = 3" volinfo_field $V0 "Number of Bricks"
+
+# -ve test cases
+#Key-words appearing more than once
+TEST ! $CLI volume create $V0 disperse 3 disperse 3 $H0:$B0/b20 $H0:$B0/b21 $H0:$B0/b22
+TEST ! $CLI volume create $V0 disperse-data 2 disperse-data 2 $H0:$B0/b20 $H0:$B0/b21 $H0:$B0/b22
+TEST ! $CLI volume create $V0 redundancy 1 redundancy 1 $H0:$B0/b20 $H0:$B0/b21 $H0:$B0/b22
+
+#Minimum counts test
+TEST ! $CLI volume create $V0 disperse 2 $H0:$B0/b20 $H0:$B0/b22
+TEST ! $CLI volume create $V0 disperse-data 1 redundancy 0 $H0:$B0/b20 $H0:$B0/b22
+TEST ! $CLI volume create $V0 disperse 4 disperse-data 4 $H0:$B0/b20 $H0:$B0/b21 $H0:$B0/b23 $H0:$B0/b24
+TEST ! $CLI volume create $V0 redundancy 0 $H0:$B0/b20 $H0:$B0/b22
+
+#Wrong count n != k+m
+TEST ! $CLI volume create $V0 disperse 4 disperse-data 4 redundancy 2 $H0:$B0/b20 $H0:$B0/b21 $H0:$B0/b22
+#Num bricks is not multiple of disperse count
+TEST ! $CLI volume create $V0 disperse 6 disperse-data 4 $H0:$B0/b20 $H0:$B0/b21 $H0:$B0/b22
+#Redundancy > data
+TEST ! $CLI volume create $V0 disperse 6 disperse-data 2 redundancy 4 $H0:$B0/b20 $H0:$B0/b21 $H0:$B0/b22
+TEST ! $CLI volume create $V0 disperse 4 disperse-data 2 redundancy 2 $H0:$B0/b20 $H0:$B0/b21 $H0:$B0/b22
+#Replica + Disperse
+TEST ! $CLI volume create $V0 disperse 4 replica 2 $H0:$B0/b20 $H0:$B0/b21 $H0:$B0/b22
+TEST ! $CLI volume create $V0 disperse-data 2 replica 2 $H0:$B0/b20 $H0:$B0/b21 $H0:$B0/b22 $H0:$B0/b23
+TEST ! $CLI volume create $V0 redundancy 2 replica 2 $H0:$B0/b20 $H0:$B0/b21 $H0:$B0/b22
+TEST ! $CLI volume create $V0 replica 2 disperse 4 $H0:$B0/b20 $H0:$B0/b21 $H0:$B0/b22
+TEST ! $CLI volume create $V0 replica 2 disperse-data 2 $H0:$B0/b20 $H0:$B0/b21 $H0:$B0/b22 $H0:$B0/b23
+TEST ! $CLI volume create $V0 replica 2 redundancy 2 $H0:$B0/b20 $H0:$B0/b21 $H0:$B0/b22
+
+cleanup
diff --git a/tests/basic/glusterd/heald.t b/tests/basic/glusterd/heald.t
new file mode 100644
index 00000000000..7dae3c3f0fb
--- /dev/null
+++ b/tests/basic/glusterd/heald.t
@@ -0,0 +1,92 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# This test contains volume heal commands handled by glusterd.
+# Covers enable/disable at the moment. Will be enhanced later to include
+# the other commands as well.
+
+function is_pid_running {
+ local pid=$1
+ num=`ps auxww | grep glustershd | grep $pid | grep -v grep | wc -l`
+ echo $num
+}
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+
+#Commands should fail when volume doesn't exist
+TEST ! $CLI volume heal non-existent-volume enable
+TEST ! $CLI volume heal non-existent-volume disable
+
+# Glustershd shouldn't be running as long as there are no replicate/disperse
+# volumes
+TEST $CLI volume create dist $H0:$B0/dist
+TEST $CLI volume start dist
+TEST "[ -z $(get_shd_process_pid dist)]"
+TEST ! $CLI volume heal dist enable
+TEST ! $CLI volume heal dist disable
+
+# Commands should work on replicate/disperse volume.
+TEST $CLI volume create r2 replica 2 $H0:$B0/r2_0 $H0:$B0/r2_1
+TEST "[ -z $(get_shd_process_pid r2)]"
+TEST $CLI volume start r2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid r2
+TEST $CLI volume heal r2 enable
+EXPECT "enable" volume_option r2 "cluster.self-heal-daemon"
+volfiler2=$(gluster system:: getwd)"/vols/r2/r2-shd.vol"
+EXPECT "enable" volgen_volume_option $volfiler2 r2-replicate-0 cluster replicate self-heal-daemon
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid r2
+pid=$( get_shd_process_pid r2 )
+TEST $CLI volume heal r2 disable
+EXPECT "disable" volume_option r2 "cluster.self-heal-daemon"
+EXPECT "disable" volgen_volume_option $volfiler2 r2-replicate-0 cluster replicate self-heal-daemon
+EXPECT "1" is_pid_running $pid
+
+# Commands should work on disperse volume.
+TEST $CLI volume create ec2 disperse 3 redundancy 1 $H0:$B0/ec2_0 $H0:$B0/ec2_1 $H0:$B0/ec2_2
+TEST $CLI volume start ec2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid ec2
+TEST $CLI volume heal ec2 enable
+EXPECT "enable" volume_option ec2 "cluster.disperse-self-heal-daemon"
+volfileec2=$(gluster system:: getwd)"/vols/ec2/ec2-shd.vol"
+EXPECT "enable" volgen_volume_option $volfileec2 ec2-disperse-0 cluster disperse self-heal-daemon
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid ec2
+pid=$(get_shd_process_pid ec2)
+TEST $CLI volume heal ec2 disable
+EXPECT "disable" volume_option ec2 "cluster.disperse-self-heal-daemon"
+EXPECT "disable" volgen_volume_option $volfileec2 ec2-disperse-0 cluster disperse self-heal-daemon
+EXPECT "1" is_pid_running $pid
+
+#Check that shd graph is rewritten correctly on volume stop/start
+EXPECT "Y" volgen_volume_exists $volfileec2 ec2-disperse-0 cluster disperse
+
+EXPECT "Y" volgen_volume_exists $volfiler2 r2-replicate-0 cluster replicate
+TEST $CLI volume stop r2
+EXPECT "Y" volgen_volume_exists $volfileec2 ec2-disperse-0 cluster disperse
+TEST $CLI volume stop ec2
+# When both the volumes are stopped glustershd volfile is not modified just the
+# process is stopped
+TEST "[ -z $(get_shd_process_pid dist) ]"
+TEST "[ -z $(get_shd_process_pid ec2) ]"
+
+TEST $CLI volume start r2
+EXPECT "Y" volgen_volume_exists $volfiler2 r2-replicate-0 cluster replicate
+
+TEST $CLI volume set r2 self-heal-daemon on
+TEST $CLI volume set r2 cluster.self-heal-daemon off
+TEST ! $CLI volume set ec2 self-heal-daemon off
+TEST ! $CLI volume set ec2 cluster.self-heal-daemon on
+TEST ! $CLI volume set dist self-heal-daemon off
+TEST ! $CLI volume set dist cluster.self-heal-daemon on
+
+TEST $CLI volume set ec2 disperse-self-heal-daemon off
+TEST $CLI volume set ec2 cluster.disperse-self-heal-daemon on
+TEST ! $CLI volume set r2 disperse-self-heal-daemon on
+TEST ! $CLI volume set r2 cluster.disperse-self-heal-daemon off
+TEST ! $CLI volume set dist disperse-self-heal-daemon off
+TEST ! $CLI volume set dist cluster.disperse-self-heal-daemon on
+
+cleanup
diff --git a/tests/basic/glusterd/thin-arbiter-volume-probe.t b/tests/basic/glusterd/thin-arbiter-volume-probe.t
new file mode 100644
index 00000000000..acc6943806d
--- /dev/null
+++ b/tests/basic/glusterd/thin-arbiter-volume-probe.t
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../cluster.rc
+
+#This tests if the thin-arbiter-count is transferred to the other peer.
+function check_peers {
+ $CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
+}
+
+cleanup;
+
+TEST launch_cluster 2;
+TEST $CLI_1 peer probe $H2;
+
+EXPECT_WITHIN $PROBE_TIMEOUT 1 check_peers
+
+kill_glusterd 2
+$CLI_1 volume create $V0 replica 2 thin-arbiter 1 $H0:$B0/b{1..3}
+TEST $glusterd_2
+EXPECT_WITHIN $PROBE_TIMEOUT 1 check_peers
+EXPECT "1 x 2 = 2" volinfo_field_1 $V0 "Number of Bricks"
+EXPECT "1 x 2 = 2" volinfo_field_2 $V0 "Number of Bricks"
+
+cleanup;
diff --git a/tests/basic/glusterd/thin-arbiter-volume.t b/tests/basic/glusterd/thin-arbiter-volume.t
new file mode 100644
index 00000000000..4e813890a45
--- /dev/null
+++ b/tests/basic/glusterd/thin-arbiter-volume.t
@@ -0,0 +1,45 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../ volume.rc
+. $(dirname $0)/../../thin-arbiter.rc
+
+#This command tests the volume create command validation for thin-arbiter volumes.
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 2 thin-arbiter 1 $H0:$B0/b1 $H0:$B0/b2 $H0:$B0/b3
+EXPECT "1 x 2 = 2" volinfo_field $V0 "Number of Bricks"
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+TEST touch $M0/a.txt
+TEST ls $B0/b1/a.txt
+TEST ls $B0/b2/a.txt
+TEST ! ls $B0/b3/a.txt
+
+TEST umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+TEST $CLI volume create $V0 replica 2 thin-arbiter 1 $H0:$B0/b{4..8}
+EXPECT "2 x 2 = 4" volinfo_field $V0 "Number of Bricks"
+
+TEST $CLI volume delete $V0
+
+TEST rm -rf $B0/b{1..3}
+
+TEST $CLI volume create $V0 replica 2 thin-arbiter 1 $H0:$B0/b1 $H0:$B0/b2 $H0:$B0/b3
+EXPECT "1 x 2 = 2" volinfo_field $V0 "Number of Bricks"
+
+TEST killall -15 glusterd
+TEST glusterd
+TEST pidof glusterd
+EXPECT "1 x 2 = 2" volinfo_field $V0 "Number of Bricks"
+
+cleanup
+
diff --git a/tests/basic/glusterd/volfile_server_switch.t b/tests/basic/glusterd/volfile_server_switch.t
new file mode 100644
index 00000000000..e11cfed509a
--- /dev/null
+++ b/tests/basic/glusterd/volfile_server_switch.t
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+
+cleanup;
+
+# * How this test works ?
+# 1. create a 3 node cluster
+# 2. add them to trusted pool
+# 3. create a volume and start
+# 4. mount the volume with all 3 backup-volfile servers
+# 5. kill glusterd in node 1
+# 6. make changes to volume using node 2, using 'volume set' here
+# 7. check whether those notifications are received by client
+
+TEST launch_cluster 3;
+
+TEST $CLI_1 peer probe $H2;
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+TEST $CLI_1 peer probe $H3;
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count
+
+TEST $CLI_1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0 $H3:$B3/$V0
+
+TEST $CLI_1 volume start $V0
+
+TEST $CLI_1 volume status $V0;
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H1 --volfile-server=$H2 --volfile-server=$H3 $M0
+
+TEST kill_glusterd 1
+
+TEST $CLI_2 volume set $V0 performance.write-behind off
+
+# make sure by this time directory will be created
+# TODO: suggest ideal time to wait
+sleep 5
+
+count=$(find $M0/.meta/graphs/* -maxdepth 0 -type d -iname "*" | wc -l)
+TEST [ "$count" -gt "1" ]
+
+cleanup;
diff --git a/tests/basic/glusterd/volume-brick-count.t b/tests/basic/glusterd/volume-brick-count.t
new file mode 100644
index 00000000000..dc1a5278f4f
--- /dev/null
+++ b/tests/basic/glusterd/volume-brick-count.t
@@ -0,0 +1,61 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+function test_volume_config()
+{
+ volname=$1
+ type_string=$2
+ brickCount=$3
+ distCount=$4
+ replicaCount=$5
+ arbiterCount=$6
+ disperseCount=$7
+ redundancyCount=$8
+
+ EXPECT "$type_string" volinfo_field $volname "Number of Bricks"
+ EXPECT "$brickCount" get-xml "volume info $volname" "brickCount"
+ EXPECT "$distCount" get-xml "volume info $volname" "distCount"
+ EXPECT "$replicaCount" get-xml "volume info $volname" "replicaCount"
+ EXPECT "$arbiterCount" get-xml "volume info $volname" "arbiterCount"
+ EXPECT "$disperseCount" get-xml "volume info $volname" "disperseCount"
+ EXPECT "$redundancyCount" get-xml "volume info $volname" "redundancyCount"
+}
+
+# This command tests the volume create command and number of bricks for different volume types.
+cleanup;
+TESTS_EXPECTED_IN_LOOP=56
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create ${V0}_1 replica 3 arbiter 1 $H0:$B0/b1 $H0:$B0/b2 $H0:$B0/b3
+test_volume_config "${V0}_1" "1 x \(2 \+ 1\) = 3" "3" "1" "3" "1" "0" "0"
+
+TEST $CLI volume create ${V0}_2 replica 3 arbiter 1 $H0:$B0/b{4..9}
+test_volume_config "${V0}_2" "2 x \(2 \+ 1\) = 6" "6" "2" "3" "1" "0" "0"
+
+
+TEST $CLI volume create ${V0}_3 replica 3 arbiter 1 $H0:$B0/b{10..12}
+test_volume_config "${V0}_3" "1 x \(2 \+ 1\) = 3" "3" "1" "3" "1" "0" "0"
+TEST killall -15 glusterd
+TEST glusterd
+TEST pidof glusterd
+test_volume_config "${V0}_3" "1 x \(2 \+ 1\) = 3" "3" "1" "3" "1" "0" "0"
+
+TEST $CLI volume create ${V0}_4 replica 3 $H0:$B0/b{13..15}
+test_volume_config "${V0}_4" "1 x 3 = 3" "3" "1" "3" "0" "0" "0"
+
+TEST $CLI volume create ${V0}_5 replica 3 $H0:$B0/b{16..21}
+test_volume_config "${V0}_5" "2 x 3 = 6" "6" "2" "3" "0" "0" "0"
+
+TEST $CLI volume create ${V0}_6 disperse 3 redundancy 1 $H0:$B0/b{22..24}
+test_volume_config "${V0}_6" "1 x \(2 \+ 1\) = 3" "3" "1" "1" "0" "3" "1"
+
+TEST $CLI volume create ${V0}_7 disperse 3 redundancy 1 $H0:$B0/b{25..30}
+test_volume_config "${V0}_7" "2 x \(2 \+ 1\) = 6" "6" "2" "1" "0" "3" "1"
+
+TEST $CLI volume create ${V0}_8 $H0:$B0/b{31..33}
+test_volume_config "${V0}_8" "3" "3" "3" "1" "0" "0" "0"
+
+cleanup
diff --git a/tests/basic/glusterfsd-args.t b/tests/basic/glusterfsd-args.t
new file mode 100644
index 00000000000..2dd84b8c29e
--- /dev/null
+++ b/tests/basic/glusterfsd-args.t
@@ -0,0 +1,5 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+EXPECT $GLUSTER_LIBEXECDIR glusterfsd --print-libexecdir
diff --git a/tests/basic/graph-cleanup-brick-down-shd-mux.t b/tests/basic/graph-cleanup-brick-down-shd-mux.t
new file mode 100644
index 00000000000..3c621cdcc26
--- /dev/null
+++ b/tests/basic/graph-cleanup-brick-down-shd-mux.t
@@ -0,0 +1,64 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TESTS_EXPECTED_IN_LOOP=4
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2,3,4,5}
+TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+TEST $CLI volume set $V0 cluster.eager-lock off
+TEST $CLI volume set $V0 performance.flush-behind off
+TEST $CLI volume start $V0
+
+for i in $(seq 1 2); do
+ TEST $CLI volume create ${V0}_afr$i replica 3 $H0:$B0/${V0}_afr${i}{0,1,2,3,4,5}
+ TEST $CLI volume start ${V0}_afr$i
+ TEST $CLI volume create ${V0}_ec$i disperse 6 redundancy 2 $H0:$B0/${V0}_ec${i}{0,1,2,3,4,5}
+ TEST $CLI volume start ${V0}_ec$i
+done
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
+#Check the thread count become to number of volumes*number of ec subvolume (2*6=12)
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^12$" number_healer_threads_shd $V0 "ec_shd_index_healer"
+#Check the thread count become to number of volumes*number of afr subvolume (3*6=18)
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd $V0 "afr_shd_index_healer"
+
+#kill one brick and test cleanup
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST $CLI volume stop $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^12$" number_healer_threads_shd ${V0}_afr1 "afr_shd_index_healer"
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd ${V0}_afr1 "afr_shd_index_healer"
+
+#kill an entire subvol and test cleanup
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST kill_brick $V0 $H0 $B0/${V0}2
+#wait for some time to create a race sceanrio
+sleep 1
+TEST $CLI volume stop $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^12$" number_healer_threads_shd ${V0}_afr1 "afr_shd_index_healer"
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd ${V0}_afr1 "afr_shd_index_healer"
+
+#kill all bricks and test cleanup
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST kill_brick $V0 $H0 $B0/${V0}3
+TEST kill_brick $V0 $H0 $B0/${V0}4
+TEST kill_brick $V0 $H0 $B0/${V0}5
+#wait for some time to create a race sceanrio
+sleep 2
+
+TEST $CLI volume stop $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^12$" number_healer_threads_shd ${V0}_afr1 "afr_shd_index_healer"
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd ${V0}_afr1 "afr_shd_index_healer"
+
+cleanup
diff --git a/tests/basic/hardlink-limit.t b/tests/basic/hardlink-limit.t
new file mode 100644
index 00000000000..ee65c650b59
--- /dev/null
+++ b/tests/basic/hardlink-limit.t
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../dht.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6};
+
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT '6' brick_count $V0
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+TEST $CLI volume set $V0 storage.max-hardlinks 3
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+
+TEST dd if=/dev/zero of=$M0/testfile count=1
+
+# max-hardlinks is 3, should be able to create 2 links.
+TEST link $M0/testfile $M0/testfile.link1
+TEST link $M0/testfile $M0/testfile.link2
+
+# But not 3.
+TEST ! link $M0/testfile $M0/testfile.link3
+# If we remove one...
+TEST rm $M0/testfile.link1
+# Now we can add one.
+TEST link $M0/testfile $M0/testfile.link3
+
+# But not another
+TEST ! link $M0/testfile $M0/testfile.link4
+
+# Unless we disable the limit...
+TEST $CLI volume set $V0 storage.max-hardlinks 0
+TEST link $M0/testfile $M0/testfile.link4
+
+cleanup;
diff --git a/tests/basic/inode-leak.t b/tests/basic/inode-leak.t
new file mode 100644
index 00000000000..e112fdddf8a
--- /dev/null
+++ b/tests/basic/inode-leak.t
@@ -0,0 +1,31 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1,2,3}
+TEST $CLI volume start $V0
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+
+EXPECT "1" get_mount_active_size_value $V0 $M0
+EXPECT "0" get_mount_lru_size_value $V0 $M0
+
+TEST cp -rf /etc $M0
+TEST find $M0
+TEST rm -rf $M0/*
+
+EXPECT "1" get_mount_active_size_value $V0 $M0
+EXPECT "0" get_mount_lru_size_value $V0 $M0
+
+cleanup
+
+# Mainly marking it as known-issue as it is taking a *lot* of time.
+# Revert back if we are below an hour in regression runs.
+# Or consider running only in nightly regressions.
+
+#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=000000
+#G_TESTDEF_TEST_STATUS_CENTOS6=KNOWN_ISSUE,BUG=000000
diff --git a/tests/basic/inode-quota-enforcing.t b/tests/basic/inode-quota-enforcing.t
new file mode 100644
index 00000000000..d666395dab1
--- /dev/null
+++ b/tests/basic/inode-quota-enforcing.t
@@ -0,0 +1,100 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../nfs.rc
+
+cleanup;
+
+QDD=$(dirname $0)/quota
+# compile the test write program and run it
+build_tester $(dirname $0)/quota.c -o $QDD
+
+TESTS_EXPECTED_IN_LOOP=9
+
+TEST glusterd
+
+# --------------------------------------------------
+# Create, start and mount a volume with single brick
+# --------------------------------------------------
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2};
+
+TEST $CLI volume start $V0
+
+TEST $GFS -s $H0 --volfile-id $V0 $M0
+TEST mkdir -p $M0/test_dir
+
+#--------------------------------------------------------
+# Enable quota of the volume and set hard and soft timeout
+#------------------------------------------------------
+
+TEST $CLI volume quota $V0 enable
+EXPECT 'on' volinfo_field $V0 'features.quota'
+TEST $CLI volume quota $V0 soft-timeout 0
+EXPECT '0' volinfo_field $V0 'features.soft-timeout'
+TEST $CLI volume quota $V0 hard-timeout 0
+EXPECT '0' volinfo_field $V0 'features.hard-timeout'
+
+#-------------------------------------------------------
+# Set quota limits on the directory and
+# verify if the limits are being reflected properly
+#------------------------------------------------------
+
+TEST $CLI volume quota $V0 limit-objects /test_dir 10
+EXPECT "10" quota_object_list_field "/test_dir" 2
+
+TEST $CLI volume quota $V0 limit-usage /test_dir 100MB
+EXPECT "100.0MB" quota_list_field "/test_dir" 2
+
+#------------------------------------------------------
+# Check the quota enforcement mechanism for object count
+#-------------------------------------------------------
+
+# Try creating 9 files and it should succeed as object limit
+# is set to 10, since directory where limit is set is accounted
+# as well.
+
+for i in {1..9}; do
+ #TEST_IN_LOOP touch $M0/test_dir/test$i.txt
+ TEST_IN_LOOP $QDD $M0/test_dir/test$i.txt 256 4
+done
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "9" quota_object_list_field "/test_dir" 4
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "9.0MB" quotausage "/test_dir"
+
+# Check available limit
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "0" quota_object_list_field "/test_dir" 6
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "91.0MB" quota_list_field "/test_dir" 5
+
+# Check if hard-limit exceeded
+EXPECT "Yes" quota_object_list_field "/test_dir" 8
+
+# Check if soft-limit exceeded
+EXPECT "Yes" quota_object_list_field "/test_dir" 7
+
+# Creation of 11th file should throw out an error
+TEST ! touch $M0/test_dir/test11.txt
+
+#-------------------------------------------------------
+# remove quota limits on the directory and
+# verify if the limit show 'N/A' and displayes only the usage
+#------------------------------------------------------
+TEST $CLI volume quota $V0 remove-objects /test_dir
+EXPECT "N/A" quota_object_list_field "/test_dir" 2
+EXPECT "9" quota_object_list_field "/test_dir" 4
+
+TEST $CLI volume quota $V0 remove /test_dir
+EXPECT "N/A" quota_list_field "/test_dir" 2
+EXPECT "9.0MB" quotausage "/test_dir" 4
+
+# Set back the limits
+TEST $CLI volume quota $V0 limit-objects /test_dir 10
+EXPECT "10" quota_object_list_field "/test_dir" 2
+
+# Remove all files and verify the file count
+TEST rm -rf $M0/test_dir/test*
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "0" quota_object_list_field "/test_dir" 4
+
+rm -f $QDD
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=1332021
diff --git a/tests/basic/ios-dump.t b/tests/basic/ios-dump.t
new file mode 100644
index 00000000000..0cfbdc6ae7c
--- /dev/null
+++ b/tests/basic/ios-dump.t
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+function check_brick_inter_stats() {
+ local counter="$1"
+ local inter_cnt=""
+
+ inter_cnt=$(grep -h "\".*inter.*$counter\"" \
+ /var/lib/glusterd/stats/glusterfsd*.dump 2>/dev/null |
+ grep -v '\"0.0000\"' | wc -l)
+ if (( $inter_cnt == 3 )); then
+ echo "Y"
+ else
+ echo "N"
+ fi
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 diagnostics.stats-dump-interval 5
+TEST $CLI volume set $V0 diagnostics.count-fop-hits on
+TEST $CLI volume set $V0 diagnostics.latency-measurement on
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+
+# Generate some FOPs
+cd $M0
+for i in {1..10}; do
+ mkdir a
+ cd a
+ for g in {1..10}; do
+ dd if=/dev/zero of=test$g bs=128k count=1
+ done
+done
+
+EXPECT_WITHIN 30 "Y" check_brick_inter_stats fop.weighted_latency_ave_usec
+
+cleanup
diff --git a/tests/basic/jbr/jbr-volgen.t b/tests/basic/jbr/jbr-volgen.t
new file mode 100644
index 00000000000..f368710c158
--- /dev/null
+++ b/tests/basic/jbr/jbr-volgen.t
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+volfiles=${GLUSTERD_WORKDIR}/vols/${V0}/
+check_brick_volfiles () {
+ for vf in ${volfiles}${V0}.$(hostname).*.vol; do
+ grep -qs experimental/jbr $vf || return
+ # At least for now, nothing else would put a client translator
+ # in a brick volfile.
+ grep -qs protocol/client $vf || return
+ done
+ echo "OK"
+}
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2}
+TEST $CLI volume set $V0 cluster.jbr on
+
+# Check that the client volfile got modified properly.
+TEST grep -qs experimental/jbrc ${volfiles}${V0}.tcp-fuse.vol
+
+# Check that the brick volfiles got modified as well.
+EXPECT "OK" check_brick_volfiles
+
+# Put things back and make sure the "undo" worked.
+TEST $CLI volume set $V0 cluster.jbr off
+TEST $CLI volume start $V0
+TEST $GFS -s $H0 --volfile-id $V0 $M0
+echo hello > $M0/probe
+EXPECT hello cat ${B0}/${V0}1/probe
+EXPECT hello cat ${B0}/${V0}2/probe
+
+cleanup
+#G_TESTDEF_TEST_STATUS_CENTOS6=KNOWN_ISSUE,BUG=1385758
+#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=1385758
diff --git a/tests/basic/jbr/jbr.t b/tests/basic/jbr/jbr.t
new file mode 100755
index 00000000000..605344b5a7e
--- /dev/null
+++ b/tests/basic/jbr/jbr.t
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+. $(dirname $0)/../../snapshot.rc
+. $(dirname $0)/../../fdl.rc
+
+cleanup;
+
+TEST verify_lvm_version;
+#Create cluster with 3 nodes
+TEST launch_cluster 3;
+TEST setup_lvm 3
+
+TEST $CLI_1 peer probe $H2;
+TEST $CLI_1 peer probe $H3;
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count;
+
+TEST $CLI_1 volume create $V0 replica 3 $H1:$L1 $H2:$L2 $H3:$L3
+TEST $CLI_1 volume set $V0 cluster.jbr on
+TEST $CLI_1 volume set $V0 cluster.jbr.quorum-percent 100
+TEST $CLI_1 volume set $V0 features.fdl on
+#TEST $CLI_1 volume set $V0 diagnostics.brick-log-level DEBUG
+TEST $CLI_1 volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H1 --entry-timeout=0 $M0;
+
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" jbrc_child_up_status $V0 0
+
+echo "file" > $M0/file1
+TEST stat $L1/file1
+TEST stat $L2/file1
+TEST stat $L3/file1
+
+cleanup;
+#G_TESTDEF_TEST_STATUS_CENTOS6=KNOWN_ISSUE,BUG=1385758
+#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=1385758
diff --git a/tests/basic/logchecks-messages.h b/tests/basic/logchecks-messages.h
new file mode 100644
index 00000000000..bf364848ec7
--- /dev/null
+++ b/tests/basic/logchecks-messages.h
@@ -0,0 +1,105 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+ */
+
+#ifndef _LOGCHECKS_MESSAGES_H_
+#define _LOGCHECKS_MESSAGES_H_
+
+#include <glusterfs/glfs-message-id.h>
+
+/* NOTE: Rules for message additions
+ * 1) Each instance of a message is _better_ left with a unique message ID, even
+ * if the message format is the same. Reasoning is that, if the message
+ * format needs to change in one instance, the other instances are not
+ * impacted or the new change does not change the ID of the instance being
+ * modified.
+ * 2) Addition of a message,
+ * - Should increment the GLFS_NUM_MESSAGES
+ * - Append to the list of messages defined, towards the end
+ * - Retain macro naming as glfs_msg_X (for redability across developers)
+ * NOTE: Rules for message format modifications
+ * 3) Check acorss the code if the message ID macro in question is reused
+ * anywhere. If reused then then the modifications should ensure correctness
+ * everywhere, or needs a new message ID as (1) above was not adhered to. If
+ * not used anywhere, proceed with the required modification.
+ * NOTE: Rules for message deletion
+ * 4) Check (3) and if used anywhere else, then cannot be deleted. If not used
+ * anywhere, then can be deleted, but will leave a hole by design, as
+ * addition rules specify modification to the end of the list and not filling
+ * holes.
+ */
+
+#define GLFS_COMP_BASE 1000
+#define GLFS_NUM_MESSAGES 19
+#define GLFS_MSGID_END (GLFS_COMP_BASE + GLFS_NUM_MESSAGES + 1)
+/* Messaged with message IDs */
+#define glfs_msg_start_x GLFS_COMP_BASE, "Invalid: Start of messages"
+/*------------*/
+#define logchecks_msg_1 \
+ (GLFS_COMP_BASE + 1), \
+ "Informational: Testing logging" \
+ " in gluster"
+#define logchecks_msg_2 \
+ (GLFS_COMP_BASE + 2), \
+ "Informational: Format testing:" \
+ " %d:%s:%x"
+#define logchecks_msg_3 \
+ (GLFS_COMP_BASE + 3), \
+ "Critical: Testing logging" \
+ " in gluster"
+#define logchecks_msg_4 \
+ (GLFS_COMP_BASE + 4), \
+ "Critical: Format testing:" \
+ " %d:%s:%x"
+#define logchecks_msg_5 (GLFS_COMP_BASE + 5), "Critical: Rotated the log"
+#define logchecks_msg_6 (GLFS_COMP_BASE + 6), "Critical: Flushed the log"
+#define logchecks_msg_7 (GLFS_COMP_BASE + 7), "Informational: gf_msg_callingfn"
+#define logchecks_msg_8 \
+ (GLFS_COMP_BASE + 8), \
+ "Informational: " \
+ "gf_msg_callingfn: Format testing: %d:%s:%x"
+#define logchecks_msg_9 (GLFS_COMP_BASE + 9), "Critical: gf_msg_callingfn"
+#define logchecks_msg_10 \
+ (GLFS_COMP_BASE + 10), \
+ "Critical: " \
+ "gf_msg_callingfn: Format testing: %d:%s:%x"
+#define logchecks_msg_11 (GLFS_COMP_BASE + 11), "=========================="
+#define logchecks_msg_12 \
+ (GLFS_COMP_BASE + 12), \
+ "Test 1: Only stderr and" \
+ " partial syslog"
+#define logchecks_msg_13 \
+ (GLFS_COMP_BASE + 13), \
+ "Test 2: Only checklog and" \
+ " partial syslog"
+#define logchecks_msg_14 \
+ (GLFS_COMP_BASE + 14), \
+ "Test 5: Changing to" \
+ " traditional format"
+#define logchecks_msg_15 \
+ (GLFS_COMP_BASE + 15), \
+ "Test 6: Changing log level" \
+ " to critical and above"
+#define logchecks_msg_16 (GLFS_COMP_BASE + 16), "Test 7: Only to syslog"
+#define logchecks_msg_17 \
+ (GLFS_COMP_BASE + 17), \
+ "Test 8: Only to syslog," \
+ " traditional format"
+#define logchecks_msg_18 \
+ (GLFS_COMP_BASE + 18), \
+ "Test 9: Only to syslog," \
+ " only critical and above"
+#define logchecks_msg_19 \
+ (GLFS_COMP_BASE + 19), \
+ "Pre init message, not to be" \
+ " seen in logs"
+/*------------*/
+#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
+
+#endif /* !_component_MESSAGES_H_ */ \ No newline at end of file
diff --git a/tests/basic/logchecks.c b/tests/basic/logchecks.c
new file mode 100644
index 00000000000..df0be28ace0
--- /dev/null
+++ b/tests/basic/logchecks.c
@@ -0,0 +1,214 @@
+/*
+ * Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/globals.h>
+#include <glusterfs/logging.h>
+
+#include "logchecks-messages.h"
+#include "../../libglusterfs/src/logging.h"
+
+glusterfs_ctx_t *ctx = NULL;
+
+#define TEST_FILENAME "/tmp/logchecks.log"
+#define GF_LOG_CONTROL_FILE "/etc/glusterfs/logger.conf"
+
+int
+go_log_vargs(gf_loglevel_t level, const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ gf_msg_vplain(level, fmt, ap);
+ va_end(ap);
+
+ return 0;
+}
+
+int
+go_log(void)
+{
+ /*** gf_msg ***/
+ gf_msg("logchecks", GF_LOG_INFO, 0, logchecks_msg_1);
+ gf_msg("logchecks", GF_LOG_INFO, 22, logchecks_msg_2, 42, "Forty-Two", 42);
+ /* change criticality */
+ gf_msg("logchecks", GF_LOG_CRITICAL, 0, logchecks_msg_3);
+ gf_msg("logchecks", GF_LOG_CRITICAL, 22, logchecks_msg_4, 42, "Forty-Two",
+ 42);
+
+ /*** msg_nomem ***/
+ gf_msg_nomem("logchecks", GF_LOG_ALERT, 555);
+ gf_msg_nomem("logchecks", GF_LOG_INFO, 555);
+
+ /*** msg_plain ***/
+ gf_msg_plain(GF_LOG_INFO,
+ "Informational: gf_msg_plain with"
+ " args %d:%s:%x",
+ 42, "Forty-Two", 42);
+ gf_msg_plain(GF_LOG_ALERT,
+ "Alert: gf_msg_plain with"
+ " args %d:%s:%x",
+ 42, "Forty-Two", 42);
+
+ /*** msg_vplain ***/
+ go_log_vargs(GF_LOG_INFO, "Informational: gf_msg_vplain: No args!!!");
+ go_log_vargs(GF_LOG_INFO,
+ "Informational: gf_msg_vplain: Some"
+ " args %d:%s:%x",
+ 42, "Forty-Two", 42);
+ go_log_vargs(GF_LOG_INFO, "Critical: gf_msg_vplain: No args!!!");
+ go_log_vargs(GF_LOG_INFO,
+ "Critical: gf_msg_vplain: Some"
+ " args %d:%s:%x",
+ 42, "Forty-Two", 42);
+
+ /*** msg_plain_nomem ***/
+ gf_msg_plain_nomem(GF_LOG_INFO, "Informational: gf_msg_plain_nomem");
+ gf_msg_plain_nomem(GF_LOG_ALERT, "Alert: gf_msg_plain_nomem");
+
+ /*** msg_backtrace_nomem ***/
+ // TODO: Need to create a stack depth and then call
+ gf_msg_backtrace_nomem(GF_LOG_INFO, 5);
+ gf_msg_backtrace_nomem(GF_LOG_ALERT, 5);
+
+ /*** gf_msg_callingfn ***/
+ // TODO: Need to create a stack depth and then call
+ gf_msg_callingfn("logchecks", GF_LOG_INFO, 0, logchecks_msg_7);
+ gf_msg_callingfn("logchecks", GF_LOG_INFO, 0, logchecks_msg_8, 42,
+ "Forty-Two", 42);
+ gf_msg_callingfn("logchecks", GF_LOG_CRITICAL, 0, logchecks_msg_9);
+ gf_msg_callingfn("logchecks", GF_LOG_CRITICAL, 0, logchecks_msg_10, 42,
+ "Forty-Two", 42);
+
+ /*** gf_msg_debug ***/
+ gf_msg_debug("logchecks", 0, "Debug: Hello World!!!");
+ gf_msg_debug("logchecks", 22, "Debug: With args %d:%s:%x", 42, "Forty-Two",
+ 42);
+
+ /*** gf_msg_trace ***/
+ gf_msg_trace("logchecks", 0, "Trace: Hello World!!!");
+ gf_msg_trace("logchecks", 22, "Trace: With args %d:%s:%x", 42, "Forty-Two",
+ 42);
+
+ /*** gf_msg_backtrace ***/
+ // TODO: Test with lower callstr values to check truncation
+
+ return 0;
+}
+
+int
+main(int argc, char *argv[])
+{
+ int ret = -1;
+
+ unlink(GF_LOG_CONTROL_FILE);
+ creat(GF_LOG_CONTROL_FILE, O_RDONLY);
+ ctx = glusterfs_ctx_new();
+ if (!ctx)
+ return -1;
+
+ ret = glusterfs_globals_init(ctx);
+ if (ret) {
+ printf("Error from glusterfs_globals_init [%s]\n", strerror(errno));
+ return ret;
+ }
+
+ /* Pre init test, message should not be printed */
+ gf_msg("logchecks", GF_LOG_ALERT, 0, logchecks_msg_19);
+
+ THIS->ctx = ctx;
+
+ /* TEST 1: messages before initializing the log, goes to stderr
+ * and syslog based on criticality */
+ gf_msg("logchecks", GF_LOG_ALERT, 0, logchecks_msg_11);
+ gf_msg("logchecks", GF_LOG_ALERT, 0, logchecks_msg_12);
+ go_log();
+ gf_msg("logchecks", GF_LOG_ALERT, 0, logchecks_msg_11);
+
+ /* TEST 2: messages post initialization, goes to glusterlog and
+ * syslog based on severity */
+ ret = gf_log_init(ctx, TEST_FILENAME, "logchecks");
+ if (ret != 0) {
+ printf("Error from gf_log_init [%s]\n", strerror(errno));
+ return -1;
+ }
+ gf_msg("logchecks", GF_LOG_ALERT, 0, logchecks_msg_11);
+ gf_msg("logchecks", GF_LOG_ALERT, 0, logchecks_msg_13);
+ go_log();
+ gf_msg("logchecks", GF_LOG_ALERT, 0, logchecks_msg_11);
+
+ /* TEST 3: Test rotation */
+ gf_msg("logchecks", GF_LOG_ALERT, 0, logchecks_msg_11);
+ gf_log_logrotate(0);
+ gf_msg("logchecks", GF_LOG_CRITICAL, 0, logchecks_msg_5);
+ gf_msg("logchecks", GF_LOG_ALERT, 0, logchecks_msg_11);
+
+ /* TEST 4: Check flush, nothing noticeable should occur :) */
+ gf_msg("logchecks", GF_LOG_ALERT, 0, logchecks_msg_11);
+ gf_log_flush();
+ gf_msg("logchecks", GF_LOG_CRITICAL, 0, logchecks_msg_6);
+ gf_msg("logchecks", GF_LOG_ALERT, 0, logchecks_msg_11);
+
+ /* TEST 5: Change format */
+ gf_msg("logchecks", GF_LOG_ALERT, 0, logchecks_msg_11);
+ gf_log_set_logformat(gf_logformat_traditional);
+ gf_msg("logchecks", GF_LOG_ALERT, 0, logchecks_msg_14);
+ go_log();
+ gf_msg("logchecks", GF_LOG_ALERT, 0, logchecks_msg_11);
+
+ /* TEST 6: Change level */
+ gf_msg("logchecks", GF_LOG_ALERT, 0, logchecks_msg_11);
+ gf_log_set_loglevel(ctx, GF_LOG_CRITICAL);
+ gf_msg("logchecks", GF_LOG_ALERT, 0, logchecks_msg_15);
+ go_log();
+ gf_msg("logchecks", GF_LOG_ALERT, 0, logchecks_msg_11);
+
+ /* Reset to run with syslog */
+ gf_log_set_logformat(gf_logformat_withmsgid);
+ gf_log_set_loglevel(ctx, GF_LOG_INFO);
+
+ /* Run tests with logger changed to syslog */
+ /* TEST 7: No more gluster logs */
+ gf_msg("logchecks", GF_LOG_ALERT, 0, logchecks_msg_11);
+ gf_log_set_logger(gf_logger_syslog);
+ gf_msg("logchecks", GF_LOG_ALERT, 0, logchecks_msg_16);
+ go_log();
+ gf_msg("logchecks", GF_LOG_ALERT, 0, logchecks_msg_11);
+
+ /* TEST 8: Change format */
+ gf_msg("logchecks", GF_LOG_ALERT, 0, logchecks_msg_11);
+ gf_log_set_logformat(gf_logformat_traditional);
+ gf_msg("logchecks", GF_LOG_ALERT, 0, logchecks_msg_14);
+ go_log();
+ gf_msg("logchecks", GF_LOG_ALERT, 0, logchecks_msg_11);
+
+ /* TEST 9: Change level */
+ gf_msg("logchecks", GF_LOG_ALERT, 0, logchecks_msg_11);
+ gf_log_set_loglevel(ctx, GF_LOG_CRITICAL);
+ gf_msg("logchecks", GF_LOG_ALERT, 0, logchecks_msg_15);
+ go_log();
+ gf_msg("logchecks", GF_LOG_ALERT, 0, logchecks_msg_11);
+
+ // TODO: signal crash prints, but not yet feasible here
+ // TODO: Graph printing
+ // TODO: Multi threaded logging
+
+ /* Close out the logging */
+ gf_log_fini(ctx);
+ gf_log_globals_fini();
+
+ unlink(GF_LOG_CONTROL_FILE);
+ unlink(TEST_FILENAME);
+
+ return 0;
+}
diff --git a/tests/basic/md-cache/bug-1317785.t b/tests/basic/md-cache/bug-1317785.t
new file mode 100644
index 00000000000..5076e3612ac
--- /dev/null
+++ b/tests/basic/md-cache/bug-1317785.t
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0
+TEST $CLI volume start $V0
+
+TEST $CLI volume set $V0 cache-swift-metadata on
+EXPECT 'on' volinfo_field $V0 'performance.cache-swift-metadata'
+
+TEST $CLI volume set $V0 cache-swift-metadata off
+EXPECT 'off' volinfo_field $V0 'performance.cache-swift-metadata'
+
+TEST $CLI volume set $V0 performance.cache-capability-xattrs off
+EXPECT 'off' volinfo_field $V0 'performance.cache-capability-xattrs'
+
+TEST $CLI volume set $V0 performance.cache-capability-xattrs on
+EXPECT 'on' volinfo_field $V0 'performance.cache-capability-xattrs'
+
+TEST $CLI volume set $V0 performance.cache-ima-xattrs off
+EXPECT 'off' volinfo_field $V0 'performance.cache-ima-xattrs'
+
+TEST $CLI volume set $V0 performance.cache-ima-xattrs on
+EXPECT 'on' volinfo_field $V0 'performance.cache-ima-xattrs'
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/basic/md-cache/bug-1418249.t b/tests/basic/md-cache/bug-1418249.t
new file mode 100755
index 00000000000..85a4f58ec10
--- /dev/null
+++ b/tests/basic/md-cache/bug-1418249.t
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0
+TEST $CLI volume start $V0
+
+TEST $CLI volume set $V0 group metadata-cache
+EXPECT 'on' volinfo_field $V0 'performance.cache-invalidation'
+EXPECT '600' volinfo_field $V0 'performance.md-cache-timeout'
+EXPECT 'on' volinfo_field $V0 'performance.stat-prefetch'
+EXPECT '600' volinfo_field $V0 'features.cache-invalidation-timeout'
+EXPECT 'on' volinfo_field $V0 'features.cache-invalidation'
+EXPECT '200000' volinfo_field $V0 'network.inode-lru-limit'
+cleanup;
diff --git a/tests/basic/meta.t b/tests/basic/meta.t
new file mode 100755
index 00000000000..0bac3c6797d
--- /dev/null
+++ b/tests/basic/meta.t
@@ -0,0 +1,45 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1..9};
+
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST $GFS -s $H0 --volfile-id $V0 $M0;
+
+# verify json validity
+
+TEST json_verify < $M0/.meta/frames;
+
+TEST json_verify < $M0/.meta/cmdline;
+
+TEST json_verify < $M0/.meta/version;
+
+# default log level (INFO) is 7
+TEST grep -q 7 $M0/.meta/logging/loglevel;
+
+# check for attribute_timeout exposed through state dump
+TEST grep -q attribute_timeout $M0/.meta/master/private;
+
+# check for mount point specified as an option
+TEST grep -q $M0 $M0/.meta/master/options/mountpoint;
+
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/basic/metadisp/fsyncdir.c b/tests/basic/metadisp/fsyncdir.c
new file mode 100644
index 00000000000..62b532b9ce4
--- /dev/null
+++ b/tests/basic/metadisp/fsyncdir.c
@@ -0,0 +1,29 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <fcntl.h>
+
+int
+main(int argc, char **argv)
+{
+ int pfd;
+
+ pfd = open(argv[1], O_RDONLY | O_DIRECTORY);
+ if (pfd == (-1)) {
+ perror("open");
+ return EXIT_FAILURE;
+ }
+
+ if (rename(argv[2], argv[3]) == (-1)) {
+ perror("rename");
+ return EXIT_FAILURE;
+ }
+
+ if (fsync(pfd) == (-1)) {
+ perror("fsync");
+ return EXIT_FAILURE;
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/tests/basic/metadisp/ftruncate.c b/tests/basic/metadisp/ftruncate.c
new file mode 100644
index 00000000000..c9185212c31
--- /dev/null
+++ b/tests/basic/metadisp/ftruncate.c
@@ -0,0 +1,34 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <fcntl.h>
+
+int
+main(int argc, char **argv)
+{
+ int pfd;
+
+ pfd = open(argv[1], O_RDWR);
+ if (pfd == (-1)) {
+ perror("open");
+ return EXIT_FAILURE;
+ }
+
+ if (ftruncate(pfd, 0) == (-1)) {
+ perror("ftruncate");
+ return EXIT_FAILURE;
+ }
+
+ if (write(pfd, "hello", 5) == (-1)) {
+ perror("write");
+ return EXIT_FAILURE;
+ }
+
+ if (fsync(pfd) == (-1)) {
+ perror("fsync");
+ return EXIT_FAILURE;
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/tests/basic/metadisp/fxattr.c b/tests/basic/metadisp/fxattr.c
new file mode 100644
index 00000000000..e552057778a
--- /dev/null
+++ b/tests/basic/metadisp/fxattr.c
@@ -0,0 +1,107 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <fcntl.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/xattr.h>
+
+static char MY_XATTR[] = "user.fxtest";
+static char *PROGRAM;
+#define CONSUME(v) \
+ do { \
+ if (!argc) { \
+ fprintf(stderr, "missing argument\n"); \
+ return EXIT_FAILURE; \
+ } \
+ v = argv[0]; \
+ ++argv; \
+ --argc; \
+ } while (0)
+
+static int
+do_get(int argc, char **argv, int fd)
+{
+ char *value;
+ int ret;
+ char buf[1024];
+
+ CONSUME(value);
+
+ ret = fgetxattr(fd, MY_XATTR, buf, sizeof(buf));
+ if (ret == (-1)) {
+ perror("fgetxattr");
+ return EXIT_FAILURE;
+ }
+
+ if (strncmp(buf, value, ret) != 0) {
+ fprintf(stderr, "data mismatch\n");
+ return EXIT_FAILURE;
+ }
+
+ return EXIT_SUCCESS;
+}
+
+static int
+do_set(int argc, char **argv, int fd)
+{
+ char *value;
+ int ret;
+
+ CONSUME(value);
+
+ ret = fsetxattr(fd, MY_XATTR, value, strlen(value), 0);
+ if (ret == (-1)) {
+ perror("fsetxattr");
+ return EXIT_FAILURE;
+ }
+
+ return EXIT_SUCCESS;
+}
+
+static int
+do_remove(int argc, char **argv, int fd)
+{
+ int ret;
+
+ ret = fremovexattr(fd, MY_XATTR);
+ if (ret == (-1)) {
+ perror("femovexattr");
+ return EXIT_FAILURE;
+ }
+
+ return EXIT_SUCCESS;
+}
+
+int
+main(int argc, char **argv)
+{
+ int fd;
+ char *path;
+ char *cmd;
+
+ CONSUME(PROGRAM);
+ CONSUME(path);
+ CONSUME(cmd);
+
+ fd = open(path, O_RDWR);
+ if (fd == (-1)) {
+ perror("open");
+ return EXIT_FAILURE;
+ }
+
+ if (strcmp(cmd, "get") == 0) {
+ return do_get(argc, argv, fd);
+ }
+
+ if (strcmp(cmd, "set") == 0) {
+ return do_set(argc, argv, fd);
+ }
+
+ if (strcmp(cmd, "remove") == 0) {
+ return do_remove(argc, argv, fd);
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/tests/basic/metadisp/gfs-fsetxattr.c b/tests/basic/metadisp/gfs-fsetxattr.c
new file mode 100644
index 00000000000..63578bc528f
--- /dev/null
+++ b/tests/basic/metadisp/gfs-fsetxattr.c
@@ -0,0 +1,141 @@
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+int gfapi = 1;
+
+int
+main(int argc, char *argv[])
+{
+ glfs_t *fs = NULL;
+ int ret = 0;
+ int i = 0;
+ glfs_fd_t *fd = NULL;
+ char *topdir = "topdir", *filename = "file1";
+ char *buf = NULL;
+ char *logfile = NULL;
+ char *hostname = NULL;
+ char *basename = NULL;
+ char *dir1 = NULL, *dir2 = NULL, *filename1 = NULL, *filename2 = NULL;
+ struct stat sb = {
+ 0,
+ };
+
+ if (argc != 5) {
+ fprintf(
+ stderr,
+ "Expect following args %s <hostname> <Vol> <log file> <basename>\n",
+ argv[0]);
+ return -1;
+ }
+
+ hostname = argv[1];
+ logfile = argv[3];
+ basename = argv[4];
+
+ fs = glfs_new(argv[2]);
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL (%s)\n", strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_set_volfile_server failed ret:%d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_set_logging failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_init failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+
+ ret = asprintf(&dir1, "%s-dir", basename);
+ if (ret < 0) {
+ fprintf(stderr, "cannot construct filename (%s)", strerror(errno));
+ return ret;
+ }
+
+ ret = glfs_mkdir(fs, dir1, 0755);
+ if (ret < 0) {
+ fprintf(stderr, "mkdir(%s): %s\n", dir1, strerror(errno));
+ return -1;
+ }
+
+ fd = glfs_opendir(fs, dir1);
+ if (!fd) {
+ fprintf(stderr, "/: %s\n", strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_fsetxattr(fd, "user.dirfattr", "fsetxattr", 9, 0);
+ if (ret < 0) {
+ fprintf(stderr, "fsetxattr(%s): %d (%s)\n", dir1, ret, strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_closedir(fd);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_closedir failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+
+ ret = asprintf(&filename1, "%s-file", basename);
+ if (ret < 0) {
+ fprintf(stderr, "cannot construct filename (%s)", strerror(errno));
+ return ret;
+ }
+
+ ret = asprintf(&filename2, "%s-file-renamed", basename);
+ if (ret < 0) {
+ fprintf(stderr, "cannot construct filename (%s)", strerror(errno));
+ return ret;
+ }
+
+ fd = glfs_creat(fs, filename1, O_RDWR, 0644);
+ if (!fd) {
+ fprintf(stderr, "%s: (%p) %s\n", filename1, fd, strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_rename(fs, filename1, filename2);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_rename failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_lstat(fs, filename2, &sb);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_lstat failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_fsetxattr(fd, "user.filefattr", "fsetxattr", 9, 0);
+ if (ret < 0) {
+ fprintf(stderr, "fsetxattr(%s): %d (%s)\n", dir1, ret, strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_close(fd);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_close failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+}
diff --git a/tests/basic/metadisp/metadisp.t b/tests/basic/metadisp/metadisp.t
new file mode 100644
index 00000000000..894ffe07226
--- /dev/null
+++ b/tests/basic/metadisp/metadisp.t
@@ -0,0 +1,316 @@
+#!/usr/bin/env bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+
+# Considering `--enable-metadisp` is an option for `./configure`,
+# which is disabled by default, this test will never pass regression.
+# But to see the value of this test, run below after configuring
+# with above option :
+# `prove -vmfe '/bin/bash' tests/basic/metadisp/metadisp.t`
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST
+
+cleanup;
+
+TEST mkdir -p $B0/b0/{0,1}
+
+TEST setfattr -n trusted.glusterfs.volume-id -v 0xddab9eece7b64a95b07351a1f748f56f ${B0}/b0/0
+TEST setfattr -n trusted.glusterfs.volume-id -v 0xddab9eece7b64a95b07351a1f748f56f ${B0}/b0/1
+
+TEST $GFS --volfile=$(dirname $0)/metadisp.vol --volfile-id=$V0 $M0;
+
+NUM_FILES=40
+TEST touch $M0/{1..${NUM_FILES}}
+
+# each drive should get 40 files
+TEST [ $(dir -1 $B0/b0/0/ | wc -l) -eq $NUM_FILES ]
+TEST [ $(dir -1 $B0/b0/1/ | wc -l) -eq $NUM_FILES ]
+
+# now write some data to a file
+echo "hello" > $M0/3
+filename=$$
+echo "hello" > /tmp/metadisp-write-${filename}
+checksum=$(md5sum /tmp/metadisp-write-${filename} | awk '{print $1}')
+TEST [ "$(md5sum $M0/3 | awk '{print $1}')" == "$checksum" ]
+
+# check that the backend file exists on b1
+gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/b0/*/3))
+TEST [ $(dir -1 $B0/b0/1/$gfid | wc -l) -eq 1 ]
+
+# check that the backend file matches the frontend
+TEST [ "$(md5sum $B0/b0/1/$gfid | awk '{print $1}')" == "$checksum" ]
+
+# delete the file
+TEST rm $M0/3
+
+# ensure the frontend and backend files are cleaned up
+TEST ! -e $M0/3
+TEST ! [ stat $B0/b*/*/$gfid ]
+
+# Test TRUNCATE + WRITE flow
+echo "hello" | tee $M0/4
+echo "goo" | tee $M0/4
+filename=$$
+echo "goo" | tee /tmp/metadisp-truncate-${filename}
+checksum=$(md5sum /tmp/metadisp-truncate-${filename} | awk '{print $1}')
+TEST [ "$(md5sum $M0/4 | awk '{print $1}')" == "$checksum" ]
+
+# Test mkdir + rmdir.
+TEST mkdir $M0/rmdir_me
+nfiles=$(ls -d $B0/b*/*/rmdir_me 2> /dev/null | wc -l)
+TEST [ "$nfiles" = "1" ]
+TEST rmdir $M0/rmdir_me
+nfiles=$(ls -d $B0/b*/*/rmdir_me 2> /dev/null | wc -l)
+TEST [ "$nfiles" = "0" ]
+
+# Test rename.
+TEST touch $M0/rename_me
+nfiles=$(ls $B0/b*/*/rename_me 2> /dev/null | wc -l)
+TEST [ "$nfiles" = "1" ]
+nfiles=$(ls $B0/b*/*/such_rename 2> /dev/null | wc -l)
+TEST [ "$nfiles" = "0" ]
+TEST mv $M0/rename_me $M0/such_rename
+nfiles=$(ls $B0/b*/*/rename_me 2> /dev/null | wc -l)
+TEST [ "$nfiles" = "0" ]
+nfiles=$(ls $B0/b*/*/such_rename 2> /dev/null | wc -l)
+TEST [ "$nfiles" = "1" ]
+
+# Test rename of a file that doesn't exist.
+TEST ! mv $M0/does-not-exist $M0/neither-does-this
+
+
+# cleanup all the other files.
+TEST rm -v $M0/1 $M0/2 $M0/{4..${NUM_FILES}}
+TEST rm $M0/such_rename
+TEST [ $(ls /d/backends/b0/0/ | wc -l) -eq 0 ]
+TEST [ $(ls /d/backends/b0/1/ | wc -l) -eq 0 ]
+
+# Test CREATE flow
+NUM_FILES=40
+TEST touch $M0/{1..${NUM_FILES}}
+TEST [ $(ls /d/backends/b0/0/ | wc -l) -eq $NUM_FILES ]
+TEST [ $(ls /d/backends/b0/1/ | wc -l) -eq $NUM_FILES ]
+
+# Test UNLINK flow
+# No drives should have any files
+TEST rm -v $M0/{1..${NUM_FILES}}
+TEST [ $(ls /d/backends/b0/0/ | wc -l) -eq 0 ]
+TEST [ $(ls /d/backends/b0/1/ | wc -l) -eq 0 ]
+
+# Test CREATE + WRITE + READ flow
+filename=$$
+dd if=/dev/urandom of=/tmp/${filename} bs=1M count=10
+checksum=$(md5sum /tmp/${filename} | awk '{print $1}')
+TEST cp -v /tmp/${filename} $M0/1
+TEST cp -v /tmp/${filename} $M0/2
+TEST cp -v /tmp/${filename} $M0/3
+TEST cp -v /tmp/${filename} $M0/4
+TEST [ "$(md5sum $M0/1 | awk '{print $1}')" == "$checksum" ]
+TEST [ "$(md5sum $M0/2 | awk '{print $1}')" == "$checksum" ]
+TEST [ "$(md5sum $M0/3 | awk '{print $1}')" == "$checksum" ]
+TEST [ "$(md5sum $M0/4 | awk '{print $1}')" == "$checksum" ]
+
+# Test TRUNCATE + WRITE flow
+TEST dd if=/dev/zero of=$M0/1 bs=1M count=20
+
+# Check that readdir stats the files properly and we get the correct sizes
+TEST [ $(find $M0 -size +9M | wc -l) -eq 4 ];
+
+# Test mkdir + rmdir.
+TEST mkdir $M0/rmdir_me
+nfiles=$(ls -d $B0/b*/*/rmdir_me 2> /dev/null | wc -l)
+TEST [ "$nfiles" = "1" ]
+TEST rmdir $M0/rmdir_me
+nfiles=$(ls -d $B0/b*/*/rmdir_me 2> /dev/null | wc -l)
+TEST [ "$nfiles" = "0" ]
+
+# Test rename.
+# Still flaky, so disabled until it can be debugged.
+TEST touch $M0/rename_me
+nfiles=$(ls $B0/b*/*/rename_me 2> /dev/null | wc -l)
+TEST [ "$nfiles" = "1" ]
+nfiles=$(ls $B0/b*/*/such_rename 2> /dev/null | wc -l)
+TEST [ "$nfiles" = "0" ]
+TEST mv $M0/rename_me $M0/such_rename
+nfiles=$(ls $B0/b*/*/rename_me 2> /dev/null | wc -l)
+TEST [ "$nfiles" = "0" ]
+nfiles=$(ls $B0/b*/*/such_rename 2> /dev/null | wc -l)
+TEST [ "$nfiles" = "1" ]
+
+# Test rename of a file that doesn't exist.
+TEST ! mv $M0/does-not-exist $M0/neither-does-this
+
+# Test rename over an existing file.
+ok=yes
+for i in $(seq 0 9); do
+ echo foo > $M0/src$i
+ echo bar > $M0/dst$i
+done
+for i in $(seq 0 9); do
+ mv $M0/src$i $M0/dst$i
+done
+for i in $(seq 0 9); do
+ nfiles=$(cat $B0/b0/*/dst$i | wc -l)
+ if [ "$nfiles" = "2" ]; then
+ echo "COLLISION on dst$i"
+ (ls -l $B0/b0/*/dst$i; cat $B0/b0/*/dst$i) | sed "/^/s// /"
+ ok=no
+ fi
+done
+EXPECT "yes" echo $ok
+
+# Test rename of a directory.
+count_copies () {
+ ls -d $B0/b?/?/$1 2> /dev/null | wc -l
+}
+TEST mkdir $M0/foo_dir
+EXPECT 1 count_copies foo_dir
+EXPECT 0 count_copies bar_dir
+TEST mv $M0/foo_dir $M0/bar_dir
+EXPECT 0 count_copies foo_dir
+EXPECT 1 count_copies bar_dir
+
+for x in $(seq 0 99); do
+ touch $M0/target$x
+ ln -s $M0/target$x $M0/link$x
+done
+on_0=$(ls $B0/b*/0/link* | wc -l)
+on_1=$(ls $B0/b*/1/link* | wc -l)
+TEST [ "$on_0" -eq 100 ]
+TEST [ "$on_1" -eq 0 ]
+TEST [ "$(ls -l $M0/link* | wc -l)" = 100 ]
+
+# Test (hard) link.
+_test_hardlink () {
+ local b
+ local has_src
+ local has_dst
+ local src_inum
+ local dst_inum
+ touch $M0/hardsrc$1
+ ln $M0/hardsrc$1 $M0/harddst$1
+ for b in $B0/b{0}/{0,1}; do
+ [ -f $b/hardsrc$1 ]; has_src=$?
+ [ -f $b/harddst$1 ]; has_dst=$?
+ if [ "$has_src" != "$has_dst" ]; then
+ echo "MISSING $b/hardxxx$1 $has_src $has_dst"
+ return
+ fi
+ if [ "$has_src$has_dst" = "00" ]; then
+ src_inum=$(stat -c '%i' $b/hardsrc$1)
+ dst_inum=$(stat -c '%i' $b/harddst$1)
+ if [ "$dst_inum" != "$src_inum" ]; then
+ echo "MISMATCH $b/hardxx$i $src_inum $dst_inum"
+ return
+ fi
+ fi
+ done
+ echo "OK"
+}
+
+test_hardlink () {
+ local result=$(_test_hardlink $*)
+ # [ "$result" = "OK" ] || echo $result > /dev/tty
+ echo $result
+}
+
+# Do this multiple times to make sure colocation isn't a fluke.
+EXPECT "OK" test_hardlink 0
+EXPECT "OK" test_hardlink 1
+EXPECT "OK" test_hardlink 2
+EXPECT "OK" test_hardlink 3
+EXPECT "OK" test_hardlink 4
+EXPECT "OK" test_hardlink 5
+EXPECT "OK" test_hardlink 6
+EXPECT "OK" test_hardlink 7
+EXPECT "OK" test_hardlink 8
+EXPECT "OK" test_hardlink 9
+
+# Test remove hardlink source. ensure deleting one file
+# doesn't delete the data unless link-count is 1
+TEST mkdir $M0/hardlink
+TEST touch $M0/hardlink/fileA
+echo "data" >> $M0/hardlink/fileA
+checksum=$(md5sum $M0/hardlink/fileA | awk '{print $1}')
+TEST ln $M0/hardlink/fileA $M0/hardlink/fileB
+TEST [ $(dir -1 $M0/hardlink/ | wc -l) -eq 2 ]
+TEST rm $M0/hardlink/fileA
+TEST [ $(dir -1 $M0/hardlink/ | wc -l) -eq 1 ]
+TEST [ "$(md5sum $M0/hardlink/fileB | awk '{print $1}')" == "$checksum" ]
+
+#
+# FIXME: statfs values look ok but the test is bad
+#
+# Test statfs. If we're doing it right, the numbers for the mountpoint should be
+# double those for the brick filesystem times the number of bricks,
+# but unless we're on a completely idle
+# system (which never happens) the numbers can change even while this function
+# runs and that would trip us up. Do a sloppy comparison to deal with that.
+#compare_fields () {
+# val1=$(df $1 | grep / | awk "{print \$$3}")
+# val2=$(df $2 | grep / | awk "{print \$$3}")
+# [ "$val2" -gt "$(((val1/(29/10))*19/10))" -a "$val2" -lt "$(((val1/(31/10))*21/10))" ]
+#}
+
+#brick_df=$(df $B0 | grep /)
+#mount_df=$(df $M0 | grep /)
+#TEST compare_fields $B0 $M0 2 # Total blocks
+#TEST compare_fields $B0 $M0 3 # Used
+#TEST compare_fields $B0 $M0 4 # Available
+
+# Test removexattr.
+#RXATTR_FILE=$(get_file_not_on_disk0 rxtest)
+#TEST setfattr -n user.foo -v bar $M0/$RXATTR_FILE
+#TEST getfattr -n user.foo $B0/b0/1/$RXATTR_FILE
+#TEST setfattr -x user.foo $M0/$RXATTR_FILE
+#TEST ! getfattr -n user.foo $B0/b0/1/$RXATTR_FILE
+
+# Test fsyncdir. We can't really test whether it's doing the right thing,
+# but we can test that it doesn't fail and we can hand-check that it's calling
+# down to all of the disks instead of just one.
+#
+# P.S. There's no fsyncdir test in the rest of Gluster, so who even knows if
+# other translators are handling it correctly?
+
+#FSYNCDIR_EXE=$(dirname $0)/fsyncdir
+#build_tester ${FSYNCDIR_EXE}.c
+#TEST touch $M0/fsyncdir_src
+#TEST $FSYNCDIR_EXE $M0 $M0/fsyncdir_src $M0/fsyncdir_dst
+#TEST rm -f $FSYNCDIR_EXE
+
+# Test fsetxattr, fgetxattr, fremovexattr (in that order).
+FXATTR_FILE=$M0/fxfile1
+TEST touch $FXATTR_FILE
+FXATTR_EXE=$(dirname $0)/fxattr
+build_tester ${FXATTR_EXE}.c
+TEST ! getfattr -n user.fxtest $FXATTR_FILE
+TEST $FXATTR_EXE $FXATTR_FILE set value1
+TEST getfattr -n user.fxtest $FXATTR_FILE
+TEST setfattr -n user.fxtest -v value2 $FXATTR_FILE
+TEST $FXATTR_EXE $FXATTR_FILE get value2
+TEST $FXATTR_EXE $FXATTR_FILE remove
+TEST ! getfattr -n user.fxtest $FXATTR_FILE
+TEST rm -f $FXATTR_EXE
+
+# Test ftruncate
+FTRUNCATE_EXE=$(dirname $0)/ftruncate
+build_tester ${FTRUNCATE_EXE}.c
+FTRUNCATE_FILE=$M0/ftfile1
+TEST dd if=/dev/urandom of=$FTRUNCATE_FILE count=1 bs=1MB
+TEST $FTRUNCATE_EXE $FTRUNCATE_FILE
+#gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/b0/*/ftfile1))
+
+# Test fallocate, discard, zerofill. Actually we don't so much check that these
+# *work* as that they don't throw any errors (especially ENOENT because the
+# file's not on disk zero).
+FALLOC_FILE=fatest1
+TEST touch $M0/$FALLOC_FILE
+TEST fallocate -l $((4096*5)) $M0/$FALLOC_FILE
+TEST fallocate -p -o 4096 -l 4096 $M0/$FALLOC_FILE
+# This actually fails with "operation not supported" on most filesystems, so
+# don't leave it enabled except to test changes.
+#TEST fallocate -z -o $((4096*3)) -l 4096 $M0/$FALLOC_FILE
+
+#cleanup;
diff --git a/tests/basic/metadisp/metadisp.vol b/tests/basic/metadisp/metadisp.vol
new file mode 100644
index 00000000000..58ae2f6f2a8
--- /dev/null
+++ b/tests/basic/metadisp/metadisp.vol
@@ -0,0 +1,14 @@
+volume posix-0
+ type storage/posix
+ option directory /d/backends/b0/0
+end-volume
+
+volume posix-1
+ type storage/posix
+ option directory /d/backends/b0/1
+end-volume
+
+volume metadisp-0
+ type features/metadisp
+ subvolumes posix-0 posix-1
+end-volume
diff --git a/tests/basic/mgmt_v3-locks.t b/tests/basic/mgmt_v3-locks.t
new file mode 100644
index 00000000000..9d766f40602
--- /dev/null
+++ b/tests/basic/mgmt_v3-locks.t
@@ -0,0 +1,120 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../cluster.rc
+
+function check_peers {
+ $CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
+}
+
+function volume_count {
+ local cli=$1;
+ if [ $cli -eq '1' ] ; then
+ $CLI_1 volume info | grep 'Volume Name' | wc -l;
+ else
+ $CLI_2 volume info | grep 'Volume Name' | wc -l;
+ fi
+}
+
+function volinfo_field()
+{
+ local vol=$1;
+ local field=$2;
+
+ $CLI_1 volume info $vol | grep "^$field: " | sed 's/.*: //';
+}
+
+function two_diff_vols_create {
+ # Both volume creates should be successful
+ $CLI_1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0 $H3:$B3/$V0 &
+ PID_1=$!
+
+ $CLI_2 volume create $V1 $H1:$B1/$V1 $H2:$B2/$V1 $H3:$B3/$V1 &
+ PID_2=$!
+
+ wait $PID_1 $PID_2
+}
+
+function two_diff_vols_start {
+ # Both volume starts should be successful
+ $CLI_1 volume start $V0 &
+ PID_1=$!
+
+ $CLI_2 volume start $V1 &
+ PID_2=$!
+
+ wait $PID_1 $PID_2
+}
+
+function two_diff_vols_stop_force {
+ # Force stop, so that if rebalance from the
+ # remove bricks is in progress, stop can
+ # still go ahead. Both volume stops should
+ # be successful
+ $CLI_1 volume stop $V0 force &
+ PID_1=$!
+
+ $CLI_2 volume stop $V1 force &
+ PID_2=$!
+
+ wait $PID_1 $PID_2
+}
+
+function same_vol_remove_brick {
+
+ # Running two same vol commands at the same time can result in
+ # two success', two failures, or one success and one failure, all
+ # of which are valid. The only thing that shouldn't happen is a
+ # glusterd crash.
+
+ local vol=$1
+ local brick=$2
+ $CLI_1 volume remove-brick $1 $2 start &
+ PID=$!
+ $CLI_2 volume remove-brick $1 $2 start
+ wait $PID
+}
+
+cleanup;
+
+TEST launch_cluster 3;
+TEST $CLI_1 peer probe $H2;
+TEST $CLI_1 peer probe $H3;
+
+EXPECT_WITHIN $PROBE_TIMEOUT 2 check_peers
+
+two_diff_vols_create
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT 'Created' volinfo_field $V1 'Status';
+
+two_diff_vols_start
+EXPECT 'Started' volinfo_field $V0 'Status';
+EXPECT 'Started' volinfo_field $V1 'Status';
+
+same_vol_remove_brick $V0 $H2:$B2/$V0
+# Checking glusterd crashed or not after same volume remove brick
+# on both nodes.
+EXPECT_WITHIN $PROBE_TIMEOUT 2 check_peers
+
+same_vol_remove_brick $V1 $H2:$B2/$V1
+# Checking glusterd crashed or not after same volume remove brick
+# on both nodes.
+EXPECT_WITHIN $PROBE_TIMEOUT 2 check_peers
+
+$CLI_1 volume set $V0 diagnostics.client-log-level DEBUG &
+PID=$!
+$CLI_1 volume set $V1 diagnostics.client-log-level DEBUG
+wait $PID
+kill_glusterd 3
+$CLI_1 volume status $V0
+$CLI_2 volume status $V1
+$CLI_1 peer status
+EXPECT_WITHIN $PROBE_TIMEOUT 1 check_peers
+EXPECT 'Started' volinfo_field $V0 'Status';
+EXPECT 'Started' volinfo_field $V1 'Status';
+
+TEST $glusterd_3
+$CLI_1 volume status $V0
+$CLI_2 volume status $V1
+$CLI_1 peer status
+cleanup;
diff --git a/tests/basic/mount-options.disabled b/tests/basic/mount-options.disabled
new file mode 100644
index 00000000000..a04c8686276
--- /dev/null
+++ b/tests/basic/mount-options.disabled
@@ -0,0 +1,143 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd --xlator-option=*.rpc-auth-allow-insecure=on
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1,2,3,4,5}
+TEST $CLI volume set $V0 server.allow-insecure on
+TEST $CLI volume start $V0
+
+#test all the options available to see if the mount succeeds with those options
+#or not. This does not test functionality. This is added to prevent options
+#being removed in future breaking backward-compatibility.
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --entry-timeout=0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST glusterfs --volfile=$GLUSTERD_WORKDIR/vols/$V0/${V0}-fuse.vol $M0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 --log-file=/tmp/a.txt --log-level=DEBUG $M0
+EXPECT_NOT "0" wc -l /tmp/a.txt
+TEST grep " D " /tmp/a.txt
+TEST rm -f /tmp/a.txt
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --acl
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --aux-gfid-mount
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --enable-ino32
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --fopen-keep-cache=yes
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --fopen-keep-cache=no
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST ! glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --fopen-keep-cache=fail
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --mac-compat=yes
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --mac-compat=no
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST ! glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --mac-compat=fail
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --use-readdirp=yes
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --use-readdirp=no
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST ! glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --use-readdirp=fail
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --direct-io-mode=yes
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --direct-io-mode=no
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST ! glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --direct-io-mode=fail
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --read-only
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --selinux
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --worm
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --volfile-check
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --dump-fuse=/tmp/a.txt
+EXPECT "0" stat /tmp/a.txt
+TEST rm -f /tmp/a.txt
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --gid-timeout=0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --gid-timeout=-1
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST ! glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --gid-timeout=abc
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --background-qlen=16
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST ! glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --background-qlen=abc
+
+TEST ! glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --background-qlen=-1
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --congestion-threshold=12
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST ! glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --congestion-threshold=abc
+
+TEST ! glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --congestion=threshold=-1
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --negative-timeout=10
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST ! glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --negative-timeout=abc
+TEST ! glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --negative-timeout=-1
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --pid-file=/tmp/a.txt
+EXPECT_NOT "0" wc -l /tmp/a.txt
+TEST rm -f /tmp/a.txt
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --volfile-server-port=24007
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST ! glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --volfile-server-port=2400
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --volfile-server-transport=tcp
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --volfile-server-transport=ib-verbs
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --auto-invalidation=off
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST ! glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --volfile-server-port=socket
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --volume-name=$V0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST ! glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --volume-name=abcd
+
+TEST ! glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --invalid-option
+
+TEST ! glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --invalid-option=abc
+cleanup;
diff --git a/tests/basic/mount.t b/tests/basic/mount.t
new file mode 100755
index 00000000000..3a3d7cc9d8d
--- /dev/null
+++ b/tests/basic/mount.t
@@ -0,0 +1,85 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6,7,8,9};
+TEST $CLI volume set $V0 nfs.disable false
+
+function volinfo_field()
+{
+ local vol=$1;
+ local field=$2;
+
+ $CLI volume info $vol | grep "^$field: " | sed 's/.*: //';
+}
+
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+
+## Make volume tightly consistent for metdata
+TEST $CLI volume set $V0 performance.stat-prefetch off;
+
+## Mount FUSE with caching disabled (read-write)
+TEST $GFS -s $H0 --volfile-id $V0 $M0;
+
+## Check consistent "rw" option
+TEST 'mount -t $MOUNT_TYPE_FUSE | grep -E "^$H0:$V0 "|$GREP_MOUNT_OPT_RW';
+TEST 'grep -E "^$H0:$V0 .+ ,?rw,?" /proc/mounts';
+
+## Mount FUSE with caching disabled (read-only)
+TEST $GFS --read-only -s $H0 --volfile-id $V0 $M1;
+
+## Check consistent "ro" option
+TEST 'mount -t $MOUNT_TYPE_FUSE | grep -E "^$H0:$V0 "|$GREP_MOUNT_OPT_RO';
+TEST 'grep -E "^$H0:$V0 .+ ,?ro(,.+)?" /proc/mounts';
+
+## Wait for volume to register with rpc.mountd
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+
+## Mount NFS
+TEST mount_nfs $H0:/$V0 $N0 nolock;
+
+## Test for consistent views between NFS and FUSE mounts
+## write access to $M1 should fail
+TEST ! stat $M0/newfile;
+TEST ! touch $M1/newfile;
+TEST touch $M0/newfile;
+TEST stat $M1/newfile;
+TEST stat $N0/newfile;
+TEST ! rm -f $M1/newfile;
+TEST rm -f $N0/newfile;
+TEST ! stat $M0/newfile;
+TEST ! stat $M1/newfile;
+
+# No need to check for status here right now
+$(dirname $0)/rpc-coverage.sh $N0 >/dev/null
+
+## Before killing daemon to avoid deadlocks
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/basic/mpx-compat.t b/tests/basic/mpx-compat.t
new file mode 100644
index 00000000000..baf629dbf9b
--- /dev/null
+++ b/tests/basic/mpx-compat.t
@@ -0,0 +1,53 @@
+#!/bin/bash
+#This test tests that self-heals don't perform fsync when durability is turned
+#off
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../traps.rc
+. $(dirname $0)/../volume.rc
+
+function count_processes {
+ # It would generally be a good idea to use "pgrep -x" to ensure an
+ # exact match, but the version of pgrep we have on NetBSD (a.k.a.
+ # the worst operating system ever) doesn't support that option.
+ # Fortunately, "glusterfsd" isn't the prefix of any other name,
+ # so this works anyway. For now.
+ pgrep glusterfsd | wc -w
+}
+
+function count_brick_pids {
+ $CLI --xml volume status all | sed -n '/.*<pid>\([^<]*\).*/s//\1/p' \
+ | grep -v "N/A" | sort | uniq | wc -l
+}
+
+cleanup
+TEST glusterd
+TEST $CLI volume set all cluster.brick-multiplex yes
+
+# Create two vanilla volumes.
+TEST $CLI volume create $V0 $H0:$B0/brick-${V0}-{0,1}
+TEST $CLI volume create $V1 $H0:$B0/brick-${V1}-{0,1}
+
+# Enable brick log-level to DEBUG
+gluster v set $V0 diagnostics.brick-log-level DEBUG
+
+# Start both.
+TEST $CLI volume start $V0
+TEST $CLI volume start $V1
+
+# There should be only one process for compatible volumes. We can't use
+# EXPECT_WITHIN here because it could transiently see one process as two are
+# coming up, and yield a false positive.
+sleep $PROCESS_UP_TIMEOUT
+EXPECT "1" count_processes
+EXPECT 1 count_brick_pids
+
+# Make the second volume incompatible with the first.
+TEST $CLI volume stop $V1
+TEST $CLI volume set $V1 server.manage-gids no
+TEST $CLI volume start $V1
+
+# There should be two processes this time (can't share protocol/server).
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" count_processes
+
+cleanup; \ No newline at end of file
diff --git a/tests/basic/multiple-volume-shd-mux.t b/tests/basic/multiple-volume-shd-mux.t
new file mode 100644
index 00000000000..d7cfbaec85f
--- /dev/null
+++ b/tests/basic/multiple-volume-shd-mux.t
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TESTS_EXPECTED_IN_LOOP=16
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2,3,4,5}
+TEST $CLI volume start $V0
+
+shd_pid=$(get_shd_mux_pid $V0)
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^6$" number_healer_threads_shd $V0 "afr_shd_index_healer"
+
+for i in $(seq 1 3); do
+ TEST $CLI volume create ${V0}_afr$i replica 3 $H0:$B0/${V0}_afr${i}{0,1,2,3,4,5}
+ TEST $CLI volume start ${V0}_afr$i
+ TEST $CLI volume create ${V0}_ec$i disperse 6 redundancy 2 $H0:$B0/${V0}_ec${i}{0,1,2,3,4,5}
+ TEST $CLI volume start ${V0}_ec$i
+done
+
+#Check the thread count become to number of volumes*number of ec subvolume (3*6=18)
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^18$" number_healer_threads_shd $V0 "ec_shd_index_healer"
+#Check the thread count become to number of volumes*number of afr subvolume (4*6=24)
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^24$" number_healer_threads_shd $V0 "afr_shd_index_healer"
+#Delete the volumes
+for i in $(seq 1 3); do
+ TEST $CLI volume stop ${V0}_afr$i
+ TEST $CLI volume stop ${V0}_ec$i
+ TEST $CLI volume delete ${V0}_afr$i
+ TEST $CLI volume delete ${V0}_ec$i
+done
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" get_shd_mux_pid $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
+
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^6$" number_healer_threads_shd $V0 "afr_shd_index_healer"
+
+TEST $CLI volume stop ${V0}
+TEST $CLI volume delete ${V0}
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" shd_count
+
+cleanup
diff --git a/tests/basic/multiplex.t b/tests/basic/multiplex.t
new file mode 100644
index 00000000000..2f558a6824b
--- /dev/null
+++ b/tests/basic/multiplex.t
@@ -0,0 +1,78 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../traps.rc
+. $(dirname $0)/../volume.rc
+
+function count_up_bricks {
+ $CLI --xml volume status $V0 | grep '<status>1' | wc -l
+}
+
+function count_brick_processes {
+ pgrep glusterfsd | wc -l
+}
+
+function count_brick_pids {
+ $CLI --xml volume status $V0 | sed -n '/.*<pid>\([^<]*\).*/s//\1/p' \
+ | grep -v "N/A" | sort | uniq | wc -l
+}
+
+cleanup
+
+TEST glusterd
+TEST $CLI volume set all cluster.brick-multiplex on
+
+TEST $CLI volume create $V0 $H0:$B0/brick{0,1}
+TEST $CLI volume set $V0 features.trash enable
+
+TEST $CLI volume start $V0
+# Without multiplexing, there would be two.
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_up_bricks
+EXPECT 1 count_brick_processes
+
+TEST $CLI volume stop $V0
+#Testing the volume set command introduced for protocol/server
+TEST $CLI volume set $V0 transport.listen-backlog 1024
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT 0 count_brick_processes
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_up_bricks
+EXPECT 1 count_brick_processes
+
+TEST kill_brick $V0 $H0 $B0/brick1
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT 1 count_up_bricks
+# Make sure the whole process didn't go away.
+EXPECT 1 count_brick_processes
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_up_bricks
+EXPECT 1 count_brick_processes
+
+# Killing the first brick is a bit more of a challenge due to socket-path
+# issues.
+TEST kill_brick $V0 $H0 $B0/brick0
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT 1 count_up_bricks
+EXPECT 1 count_brick_processes
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_up_bricks
+EXPECT 1 count_brick_processes
+
+# Make sure that the two bricks show the same PID.
+EXPECT 1 count_brick_pids
+
+# Do a quick test to make sure that the bricks are acting as separate bricks
+# even though they're in the same process.
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+for i in $(seq 10 99); do
+ echo hello > $M0/file$i
+done
+nbrick0=$(ls $B0/brick0/file?? | wc -l)
+nbrick1=$(ls $B0/brick1/file?? | wc -l)
+TEST [ $((nbrick0 + nbrick1)) -eq 90 ]
+TEST [ $((nbrick0 * nbrick1)) -ne 0 ]
+
+pkill gluster
+TEST glusterd
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT 1 count_brick_pids
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT 1 count_brick_processes
+
+cleanup; \ No newline at end of file
diff --git a/tests/basic/namespace.t b/tests/basic/namespace.t
new file mode 100644
index 00000000000..d1bbe7eea29
--- /dev/null
+++ b/tests/basic/namespace.t
@@ -0,0 +1,131 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+# These hashes are a result of calling SuperFastHash
+# on the corresponding folder names.
+NAMESPACE_HASH=28153613
+NAMESPACE2_HASH=3926991974
+NAMESPACE3_HASH=3493960770
+
+function check_brick_multiplex() {
+ local ret=$($CLI volume info|grep "cluster.brick-multiplex"|cut -d" " -f2)
+ local cnt="$(ls /var/log/glusterfs/bricks|wc -l)"
+ local bcnt="$(brick_count)"
+
+ if [ $bcnt -ne 1 ]; then
+ if [ -z $ret ]; then
+ ret="no"
+ fi
+
+ if [ $ret = "on" ] || [ $cnt -eq 1 ]; then
+ echo "Y"
+ else
+ echo "N"
+ fi
+ else
+ echo "N"
+ fi
+}
+
+function check_samples() {
+ local FOP_TYPE=$1
+ local NS_HASH=$2
+ local FILE=$3
+ local BRICK=$4
+ local GFID="$(getfattr -n trusted.gfid -e text --only-values $B0/$BRICK$FILE | xxd -p)"
+ local val="$(check_brick_multiplex)"
+
+ if [ $val = "Y" ]; then
+ BRICK="${V0}0"
+ fi
+
+ grep -i "ns_$OP" /var/log/glusterfs/bricks/d-backends-$BRICK.log |
+ grep -- $NS_HASH | sed 's/\-//g' | grep -- $GFID
+ if [ $? -eq 0 ]; then
+ echo "Y"
+ else
+ echo "N"
+ fi
+}
+
+cleanup;
+
+TEST mkdir -p $B0/${V0}{0,1,2,3,4,5,6,7,8,9}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2,3,4,5,6,7,8}
+TEST $CLI volume set $V0 nfs.disable off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.nfs.stat-prefetch off
+TEST $CLI volume set $V0 cluster.read-subvolume-index 0
+TEST $CLI volume set $V0 diagnostics.brick-log-level DEBUG
+TEST $CLI volume set $V0 features.tag-namespaces on
+TEST $CLI volume set $V0 storage.build-pgfid on
+TEST $CLI volume start $V0
+
+sleep 2
+
+TEST mount_nfs $H0:/$V0 $N0 nolock;
+
+################################
+# Paths in the samples #
+################################
+
+mkdir -p $N0/namespace
+
+# subvol_1 = bar, subvol_2 = foo, subvol_3 = hey
+# Test create, write (tagged by loc, fd respectively).
+touch $N0/namespace/{bar,foo,hey}
+echo "garbage" > $N0/namespace/bar
+echo "garbage" > $N0/namespace/foo
+echo "garbage" > $N0/namespace/hey
+EXPECT_WITHIN 10 "Y" check_samples CREATE $NAMESPACE_HASH /namespace/bar patchy0
+EXPECT_WITHIN 10 "Y" check_samples CREATE $NAMESPACE_HASH /namespace/foo patchy3
+EXPECT_WITHIN 10 "Y" check_samples CREATE $NAMESPACE_HASH /namespace/hey patchy6
+EXPECT_WITHIN 10 "Y" check_samples WRITEV $NAMESPACE_HASH /namespace/bar patchy0
+EXPECT_WITHIN 10 "Y" check_samples WRITEV $NAMESPACE_HASH /namespace/foo patchy3
+EXPECT_WITHIN 10 "Y" check_samples WRITEV $NAMESPACE_HASH /namespace/hey patchy6
+
+# Test stat (tagged by loc)
+stat $N0/namespace/bar &> /dev/null
+stat $N0/namespace/foo &> /dev/null
+stat $N0/namespace/hey &> /dev/null
+EXPECT_WITHIN 10 "Y" check_samples STAT $NAMESPACE_HASH /namespace/bar patchy0
+EXPECT_WITHIN 10 "Y" check_samples STAT $NAMESPACE_HASH /namespace/foo patchy3
+EXPECT_WITHIN 10 "Y" check_samples STAT $NAMESPACE_HASH /namespace/hey patchy6
+
+EXPECT_WITHIN 10 "Y" umount_nfs $N0;
+sleep 1
+TEST mount_nfs $H0:/$V0 $N0 nolock;
+
+cat $N0/namespace/bar &> /dev/null
+EXPECT_WITHIN 10 "Y" check_samples READ $NAMESPACE_HASH /namespace/bar patchy0
+
+dir $N0/namespace &> /dev/null
+EXPECT_WITHIN 10 "Y" check_samples LOOKUP $NAMESPACE_HASH /namespace patchy0
+
+mkdir -p $N0/namespace{2,3}
+EXPECT_WITHIN 10 "Y" check_samples MKDIR $NAMESPACE2_HASH /namespace2 patchy0
+EXPECT_WITHIN 10 "Y" check_samples MKDIR $NAMESPACE3_HASH /namespace3 patchy0
+
+touch $N0/namespace2/file
+touch $N0/namespace3/file
+EXPECT_WITHIN 10 "Y" check_samples CREATE $NAMESPACE2_HASH /namespace2/file patchy0
+EXPECT_WITHIN 10 "Y" check_samples CREATE $NAMESPACE3_HASH /namespace3/file patchy0
+
+truncate -s 0 $N0/namespace/bar
+EXPECT_WITHIN 10 "Y" check_samples TRUNCATE $NAMESPACE_HASH /namespace/bar patchy0
+
+ln -s $N0/namespace/foo $N0/namespace/foo_link
+EXPECT_WITHIN 10 "Y" check_samples SYMLINK $NAMESPACE_HASH /namespace/foo patchy3
+
+open $N0/namespace/hey
+EXPECT_WITHIN 10 "Y" check_samples OPEN $NAMESPACE_HASH /namespace/hey patchy6
+
+cleanup;
diff --git a/tests/basic/netgroup_parsing.t b/tests/basic/netgroup_parsing.t
new file mode 100644
index 00000000000..cf8d871f1f8
--- /dev/null
+++ b/tests/basic/netgroup_parsing.t
@@ -0,0 +1,56 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+NG_FILES=$(dirname $0)/../configfiles
+cleanup;
+
+function test_ng_1 ()
+{
+ glusterfsd --print-netgroups $1 | sed -n 1p
+}
+
+function test_ng_2 ()
+{
+ glusterfsd --print-netgroups $1 | sed -n 2p
+}
+
+function test_ng_3 ()
+{
+ glusterfsd --print-netgroups $1 | sed -n 3p
+}
+
+function test_ng_4 ()
+{
+ glusterfsd --print-netgroups $1 | sed -n 4p
+}
+
+function test_bad_ng ()
+{
+ glusterfsd --print-netgroups $1 2>&1 | sed -n 1p
+}
+
+function test_large_file ()
+{
+ # The build system needs this path for the test to pass.
+ # This is an important test because this file is ~1800 lines
+ # longs and is a "real-world" netgroups file.
+ glusterfsd --print-netgroups ~/opsfiles/storage/netgroup/netgroup | sed -n 1p
+}
+
+function test_empty_ng ()
+{
+ glusterfsd --print-netgroups $1 2>&1 | sed -n 2p
+}
+
+EXPECT_KEYWORD "ng3 (dev-1763.prn-2.example.com,,)" test_ng_1 $NG_FILES/netgroups
+EXPECT_KEYWORD "ng2 (dev1763.prn2.example.com,,)" test_ng_2 $NG_FILES/netgroups
+EXPECT_KEYWORD "ng1 ng2 (dev1763.prn2.example.com,,)" test_ng_3 $NG_FILES/netgroups
+EXPECT_KEYWORD "asdf ng1 ng2 (dev1763.prn2.example.com,,)" test_ng_4 $NG_FILES/netgroups
+# TODO: get a real-world large netgroup file
+#EXPECT_KEYWORD "wikipedia001.07.prn1 (wikipedia003.prn1.example.com,,)(wikipedia002.prn1.example.com,,)(wikipedia001.prn1.example.com,,)" test_large_file
+EXPECT_KEYWORD "Parse error" test_bad_ng $NG_FILES/bad_netgroups
+EXPECT_KEYWORD "No netgroups were specified except for the parent" test_empty_ng $NG_FILES/bad_netgroups
+
+cleanup;
diff --git a/tests/basic/nl-cache.t b/tests/basic/nl-cache.t
new file mode 100755
index 00000000000..90c778c8a88
--- /dev/null
+++ b/tests/basic/nl-cache.t
@@ -0,0 +1,98 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0..4}
+EXPECT 'Created' volinfo_field $V0 'Status'
+
+TEST $CLI volume set $V0 group nl-cache
+EXPECT '600' volinfo_field $V0 'performance.nl-cache-timeout'
+EXPECT 'on' volinfo_field $V0 'performance.nl-cache'
+EXPECT '600' volinfo_field $V0 'features.cache-invalidation-timeout'
+EXPECT 'on' volinfo_field $V0 'features.cache-invalidation'
+EXPECT '200000' volinfo_field $V0 'network.inode-lru-limit'
+TEST $CLI volume set $V0 nl-cache-positive-entry on
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M1
+
+TEST ! ls $M0/file2
+TEST touch $M0/file1
+TEST ! ls $M0/file2
+TEST touch $M0/file2
+TEST ls $M0/file2
+TEST rm $M0/file2
+TEST rm $M0/file1
+
+TEST mkdir $M0/dir1
+TEST ! ls -l $M0/dir1/file
+TEST mkdir $M0/dir1/dir2
+TEST ! ls -l $M0/dir1/file
+TEST ! ls -l $M0/dir1/dir2/file
+TEST ls -l $M0/dir1/dir2
+TEST rmdir $M0/dir1/dir2
+TEST rmdir $M0/dir1
+
+TEST ! ls -l $M0/file2
+TEST touch $M1/file2
+TEST ls -l $M0/file2
+TEST rm $M1/file2
+
+TEST ! ls -l $M0/dir1
+TEST mkdir $M1/dir1
+TEST ls -l $M0/dir1
+TEST ! ls -l $M0/dir1/file1
+TEST mkdir $M1/dir1/dir2
+TEST ! ls -l $M0/dir1/file1
+TEST ls -l $M0/dir1/dir2
+TEST ! ls -l $M1/dir1/file1
+
+TEST touch $M0/dir1/file
+TEST ln $M0/dir1/file $M0/dir1/file_link
+TEST ls -l $M1/dir1/file
+TEST ls -l $M1/dir1/file_link
+TEST rm $M0/dir1/file
+TEST rm $M0/dir1/file_link
+TEST rmdir $M0/dir1/dir2
+TEST rmdir $M0/dir1
+
+#Check mknod
+TEST ! ls -l $M0/dir
+TEST mkdir $M0/dir
+TEST mknod -m 0666 $M0/dir/block b 4 5
+TEST mknod -m 0666 $M0/dir/char c 1 5
+TEST mknod -m 0666 $M0/dir/fifo p
+TEST rm $M0/dir/block
+TEST rm $M0/dir/char
+TEST rm $M0/dir/fifo
+
+#Check getxattr
+TEST touch $M0/file1
+TEST getfattr -d -m. -e hex $M0/file1
+TEST getfattr -n "glusterfs.get_real_filename:file1" $M0;
+TEST getfattr -n "glusterfs.get_real_filename:FILE1" $M0;
+TEST ! getfattr -n "glusterfs.get_real_filename:FILE2" $M0;
+
+#Check statedump
+TEST generate_mount_statedump $V0 $M0
+TEST cleanup_mount_statedump $V0
+
+#Check reconfigure
+TEST $CLI volume reset $V0 nl-cache-timeout
+TEST $CLI volume reset $V0 nl-cache-positive-entry
+TEST $CLI volume reset $V0 nl-cache-limit
+TEST $CLI volume reset $V0 nl-cache-pass-through
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
diff --git a/tests/basic/nufa.t b/tests/basic/nufa.t
new file mode 100644
index 00000000000..cb09fc5bbbf
--- /dev/null
+++ b/tests/basic/nufa.t
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6};
+TEST $CLI volume set $V0 nfs.disable false
+
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT '6' brick_count $V0
+
+TEST $CLI volume set $V0 nufa on;
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Mount FUSE with caching disabled (read-only)
+TEST $GFS --read-only -s $H0 --volfile-id $V0 $M1;
+
+## Wait for volume to register with rpc.mountd
+sleep 5;
+
+##Wait for connection establishment between nfs server and brick process
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+## Mount NFS
+TEST mount_nfs $H0:/$V0 $N0 nolock;
+
+## Before killing daemon to avoid deadlocks
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+cleanup;
diff --git a/tests/basic/op_errnos.t b/tests/basic/op_errnos.t
new file mode 100755
index 00000000000..9c48d7a02ad
--- /dev/null
+++ b/tests/basic/op_errnos.t
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../snapshot.rc
+
+function get-op_errno-xml()
+{
+ $CLI $1 --xml | xmllint --format - | grep opErrno | sed 's/\(<opErrno>\|<\/opErrno>\)//g'
+}
+
+cleanup;
+TEST verify_lvm_version;
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST setup_lvm 1
+
+TEST $CLI volume create $V0 $H0:$L1
+TEST $CLI volume start $V0
+
+EXPECT 0 get-op_errno-xml "snapshot create snap1 $V0 no-timestamp"
+EXPECT 30806 get-op_errno-xml "snapshot create snap1 imaginary_volume"
+EXPECT 30807 get-op_errno-xml "snapshot delete imaginary_snap"
+EXPECT 30809 get-op_errno-xml "snapshot restore snap1"
+TEST $CLI volume stop $V0
+EXPECT 30810 get-op_errno-xml "snapshot create snap1 $V0"
+TEST $CLI volume start $V0
+EXPECT 30811 get-op_errno-xml "snapshot clone $V0 snap1"
+EXPECT 30812 get-op_errno-xml "snapshot create snap1 $V0 no-timestamp"
+
+EXPECT 0 get-op_errno-xml "snapshot delete snap1"
+TEST $CLI volume stop $V0
+
+cleanup;
diff --git a/tests/basic/open-behind/open-behind.t b/tests/basic/open-behind/open-behind.t
new file mode 100644
index 00000000000..5e865d602e2
--- /dev/null
+++ b/tests/basic/open-behind/open-behind.t
@@ -0,0 +1,183 @@
+#!/bin/bash
+
+WD="$(dirname "${0}")"
+
+. ${WD}/../../include.rc
+. ${WD}/../../volume.rc
+
+function assign() {
+ local _assign_var="${1}"
+ local _assign_value="${2}"
+
+ printf -v "${_assign_var}" "%s" "${_assign_value}"
+}
+
+function pipe_create() {
+ local _pipe_create_var="${1}"
+ local _pipe_create_name
+ local _pipe_create_fd
+
+ _pipe_create_name="$(mktemp -u)"
+ mkfifo "${_pipe_create_name}"
+ exec {_pipe_create_fd}<>"${_pipe_create_name}"
+ rm "${_pipe_create_name}"
+
+ assign "${_pipe_create_var}" "${_pipe_create_fd}"
+}
+
+function pipe_close() {
+ local _pipe_close_fd="${!1}"
+
+ exec {_pipe_close_fd}>&-
+}
+
+function tester_start() {
+ declare -ag tester
+ local tester_in
+ local tester_out
+
+ pipe_create tester_in
+ pipe_create tester_out
+
+ ${WD}/tester <&${tester_in} >&${tester_out} &
+
+ tester=("$!" "${tester_in}" "${tester_out}")
+}
+
+function tester_send() {
+ declare -ag tester
+ local tester_res
+ local tester_extra
+
+ echo "${*}" >&${tester[1]}
+
+ read -t 3 -u ${tester[2]} tester_res tester_extra
+ echo "${tester_res} ${tester_extra}"
+ if [[ "${tester_res}" == "OK" ]]; then
+ return 0
+ fi
+
+ return 1
+}
+
+function tester_stop() {
+ declare -ag tester
+ local tester_res
+
+ tester_send "quit"
+
+ tester_res=0
+ if ! wait ${tester[0]}; then
+ tester_res=$?
+ fi
+
+ unset tester
+
+ return ${tester_res}
+}
+
+function count_open() {
+ local file="$(realpath "${B0}/${V0}/${1}")"
+ local count="0"
+ local inode
+ local ref
+
+ inode="$(stat -c %i "${file}")"
+
+ for fd in /proc/${BRICK_PID}/fd/*; do
+ ref="$(readlink "${fd}")"
+ if [[ "${ref}" == "${B0}/${V0}/"* ]]; then
+ if [[ "$(stat -c %i "${ref}")" == "${inode}" ]]; then
+ count="$((${count} + 1))"
+ fi
+ fi
+ done
+
+ echo "${count}"
+}
+
+cleanup
+
+TEST build_tester ${WD}/tester.c ${WD}/tester-fd.c
+
+TEST glusterd
+TEST pidof glusterd
+TEST ${CLI} volume create ${V0} ${H0}:${B0}/${V0}
+TEST ${CLI} volume set ${V0} flush-behind off
+TEST ${CLI} volume set ${V0} write-behind off
+TEST ${CLI} volume set ${V0} quick-read off
+TEST ${CLI} volume set ${V0} stat-prefetch on
+TEST ${CLI} volume set ${V0} io-cache off
+TEST ${CLI} volume set ${V0} open-behind on
+TEST ${CLI} volume set ${V0} lazy-open off
+TEST ${CLI} volume set ${V0} read-after-open off
+TEST ${CLI} volume start ${V0}
+
+TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0};
+
+BRICK_PID="$(get_brick_pid ${V0} ${H0} ${B0}/${V0})"
+
+TEST touch "${M0}/test"
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0};
+
+TEST tester_start
+
+TEST tester_send fd open 0 "${M0}/test"
+EXPECT_WITHIN 5 "1" count_open "/test"
+TEST tester_send fd close 0
+EXPECT_WITHIN 5 "0" count_open "/test"
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST ${CLI} volume set ${V0} lazy-open on
+TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0};
+
+TEST tester_send fd open 0 "${M0}/test"
+sleep 2
+EXPECT "0" count_open "/test"
+TEST tester_send fd write 0 "test"
+EXPECT "1" count_open "/test"
+TEST tester_send fd close 0
+EXPECT_WITHIN 5 "0" count_open "/test"
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0};
+
+TEST tester_send fd open 0 "${M0}/test"
+EXPECT "0" count_open "/test"
+EXPECT "test" tester_send fd read 0 64
+# Even though read-after-open is disabled, use-anonymous-fd is also disabled,
+# so reads need to open the file first.
+EXPECT "1" count_open "/test"
+TEST tester_send fd close 0
+EXPECT "0" count_open "/test"
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0};
+
+TEST tester_send fd open 0 "${M0}/test"
+EXPECT "0" count_open "/test"
+TEST tester_send fd open 1 "${M0}/test"
+EXPECT "2" count_open "/test"
+TEST tester_send fd close 0
+EXPECT_WITHIN 5 "1" count_open "/test"
+TEST tester_send fd close 1
+EXPECT_WITHIN 5 "0" count_open "/test"
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST ${CLI} volume set ${V0} read-after-open on
+TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0};
+
+TEST tester_send fd open 0 "${M0}/test"
+EXPECT "0" count_open "/test"
+EXPECT "test" tester_send fd read 0 64
+EXPECT "1" count_open "/test"
+TEST tester_send fd close 0
+EXPECT_WITHIN 5 "0" count_open "/test"
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST tester_stop
+
+cleanup
diff --git a/tests/basic/open-behind/tester-fd.c b/tests/basic/open-behind/tester-fd.c
new file mode 100644
index 00000000000..00f02bc5b0a
--- /dev/null
+++ b/tests/basic/open-behind/tester-fd.c
@@ -0,0 +1,99 @@
+/*
+ Copyright (c) 2020 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "tester.h"
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <string.h>
+#include <ctype.h>
+#include <errno.h>
+
+static int32_t
+fd_open(context_t *ctx, command_t *cmd)
+{
+ obj_t *obj;
+ int32_t fd;
+
+ obj = cmd->args[0].obj.ref;
+
+ fd = open(cmd->args[1].str.data, O_RDWR);
+ if (fd < 0) {
+ return error(errno, "open() failed");
+ }
+
+ obj->type = OBJ_TYPE_FD;
+ obj->fd = fd;
+
+ out_ok("%d", fd);
+
+ return 0;
+}
+
+static int32_t
+fd_close(context_t *ctx, command_t *cmd)
+{
+ obj_t *obj;
+
+ obj = cmd->args[0].obj.ref;
+ obj->type = OBJ_TYPE_NONE;
+
+ if (close(obj->fd) != 0) {
+ return error(errno, "close() failed");
+ }
+
+ out_ok();
+
+ return 0;
+}
+
+static int32_t
+fd_write(context_t *ctx, command_t *cmd)
+{
+ ssize_t len, ret;
+
+ len = strlen(cmd->args[1].str.data);
+ ret = write(cmd->args[0].obj.ref->fd, cmd->args[1].str.data, len);
+ if (ret < 0) {
+ return error(errno, "write() failed");
+ }
+
+ out_ok("%zd", ret);
+
+ return 0;
+}
+
+static int32_t
+fd_read(context_t *ctx, command_t *cmd)
+{
+ char data[cmd->args[1].num.value + 1];
+ ssize_t ret;
+
+ ret = read(cmd->args[0].obj.ref->fd, data, cmd->args[1].num.value);
+ if (ret < 0) {
+ return error(errno, "read() failed");
+ }
+
+ data[ret] = 0;
+
+ out_ok("%zd %s", ret, data);
+
+ return 0;
+}
+
+command_t fd_commands[] = {
+ {"open", fd_open, CMD_ARGS(ARG_VAL(OBJ_TYPE_NONE), ARG_STR(1024))},
+ {"close", fd_close, CMD_ARGS(ARG_VAL(OBJ_TYPE_FD))},
+ {"write", fd_write, CMD_ARGS(ARG_VAL(OBJ_TYPE_FD), ARG_STR(1024))},
+ {"read", fd_read, CMD_ARGS(ARG_VAL(OBJ_TYPE_FD), ARG_NUM(0, 1024))},
+ CMD_END};
diff --git a/tests/basic/open-behind/tester.c b/tests/basic/open-behind/tester.c
new file mode 100644
index 00000000000..b2da71c8385
--- /dev/null
+++ b/tests/basic/open-behind/tester.c
@@ -0,0 +1,444 @@
+/*
+ Copyright (c) 2020 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "tester.h"
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <ctype.h>
+#include <errno.h>
+
+static void *
+mem_alloc(size_t size)
+{
+ void *ptr;
+
+ ptr = malloc(size);
+ if (ptr == NULL) {
+ error(ENOMEM, "Failed to allocate memory (%zu bytes)", size);
+ }
+
+ return ptr;
+}
+
+static void
+mem_free(void *ptr)
+{
+ free(ptr);
+}
+
+static bool
+buffer_create(context_t *ctx, size_t size)
+{
+ ctx->buffer.base = mem_alloc(size);
+ if (ctx->buffer.base == NULL) {
+ return false;
+ }
+
+ ctx->buffer.size = size;
+ ctx->buffer.len = 0;
+ ctx->buffer.pos = 0;
+
+ return true;
+}
+
+static void
+buffer_destroy(context_t *ctx)
+{
+ mem_free(ctx->buffer.base);
+ ctx->buffer.size = 0;
+ ctx->buffer.len = 0;
+}
+
+static int32_t
+buffer_get(context_t *ctx)
+{
+ ssize_t len;
+
+ if (ctx->buffer.pos >= ctx->buffer.len) {
+ len = read(0, ctx->buffer.base, ctx->buffer.size);
+ if (len < 0) {
+ return error(errno, "read() failed");
+ }
+ if (len == 0) {
+ return 0;
+ }
+
+ ctx->buffer.len = len;
+ ctx->buffer.pos = 0;
+ }
+
+ return ctx->buffer.base[ctx->buffer.pos++];
+}
+
+static int32_t
+str_skip_spaces(context_t *ctx, int32_t current)
+{
+ while ((current > 0) && (current != '\n') && isspace(current)) {
+ current = buffer_get(ctx);
+ }
+
+ return current;
+}
+
+static int32_t
+str_token(context_t *ctx, char *buffer, uint32_t size, int32_t current)
+{
+ uint32_t len;
+
+ current = str_skip_spaces(ctx, current);
+
+ len = 0;
+ while ((size > 0) && (current > 0) && (current != '\n') &&
+ !isspace(current)) {
+ len++;
+ *buffer++ = current;
+ size--;
+ current = buffer_get(ctx);
+ }
+
+ if (len == 0) {
+ return error(ENODATA, "Expecting a token");
+ }
+
+ if (size == 0) {
+ return error(ENOBUFS, "Token too long");
+ }
+
+ *buffer = 0;
+
+ return current;
+}
+
+static int32_t
+str_number(context_t *ctx, uint64_t min, uint64_t max, uint64_t *value,
+ int32_t current)
+{
+ char text[32], *ptr;
+ uint64_t num;
+
+ current = str_token(ctx, text, sizeof(text), current);
+ if (current > 0) {
+ num = strtoul(text, &ptr, 0);
+ if ((*ptr != 0) || (num < min) || (num > max)) {
+ return error(ERANGE, "Invalid number");
+ }
+ *value = num;
+ }
+
+ return current;
+}
+
+static int32_t
+str_eol(context_t *ctx, int32_t current)
+{
+ current = str_skip_spaces(ctx, current);
+ if (current != '\n') {
+ return error(EINVAL, "Expecting end of command");
+ }
+
+ return current;
+}
+
+static void
+str_skip(context_t *ctx, int32_t current)
+{
+ while ((current > 0) && (current != '\n')) {
+ current = buffer_get(ctx);
+ }
+}
+
+static int32_t
+cmd_parse_obj(context_t *ctx, arg_t *arg, int32_t current)
+{
+ obj_t *obj;
+ uint64_t id;
+
+ current = str_number(ctx, 0, ctx->obj_count, &id, current);
+ if (current <= 0) {
+ return current;
+ }
+
+ obj = &ctx->objs[id];
+ if (obj->type != arg->obj.type) {
+ if (obj->type != OBJ_TYPE_NONE) {
+ return error(EBUSY, "Object is in use");
+ }
+ return error(ENOENT, "Object is not defined");
+ }
+
+ arg->obj.ref = obj;
+
+ return current;
+}
+
+static int32_t
+cmd_parse_num(context_t *ctx, arg_t *arg, int32_t current)
+{
+ return str_number(ctx, arg->num.min, arg->num.max, &arg->num.value,
+ current);
+}
+
+static int32_t
+cmd_parse_str(context_t *ctx, arg_t *arg, int32_t current)
+{
+ return str_token(ctx, arg->str.data, arg->str.size, current);
+}
+
+static int32_t
+cmd_parse_args(context_t *ctx, command_t *cmd, int32_t current)
+{
+ arg_t *arg;
+
+ for (arg = cmd->args; arg->type != ARG_TYPE_NONE; arg++) {
+ switch (arg->type) {
+ case ARG_TYPE_OBJ:
+ current = cmd_parse_obj(ctx, arg, current);
+ break;
+ case ARG_TYPE_NUM:
+ current = cmd_parse_num(ctx, arg, current);
+ break;
+ case ARG_TYPE_STR:
+ current = cmd_parse_str(ctx, arg, current);
+ break;
+ default:
+ return error(EINVAL, "Unknown argument type");
+ }
+ }
+
+ if (current < 0) {
+ return current;
+ }
+
+ current = str_eol(ctx, current);
+ if (current <= 0) {
+ return error(EINVAL, "Syntax error");
+ }
+
+ return cmd->handler(ctx, cmd);
+}
+
+static int32_t
+cmd_parse(context_t *ctx, command_t *cmds)
+{
+ char text[32];
+ command_t *cmd;
+ int32_t current;
+
+ cmd = cmds;
+ do {
+ current = str_token(ctx, text, sizeof(text), buffer_get(ctx));
+ if (current <= 0) {
+ return current;
+ }
+
+ while (cmd->name != NULL) {
+ if (strcmp(cmd->name, text) == 0) {
+ if (cmd->handler != NULL) {
+ return cmd_parse_args(ctx, cmd, current);
+ }
+ cmd = cmd->cmds;
+ break;
+ }
+ cmd++;
+ }
+ } while (cmd->name != NULL);
+
+ str_skip(ctx, current);
+
+ return error(ENOTSUP, "Unknown command");
+}
+
+static void
+cmd_fini(context_t *ctx, command_t *cmds)
+{
+ command_t *cmd;
+ arg_t *arg;
+
+ for (cmd = cmds; cmd->name != NULL; cmd++) {
+ if (cmd->handler == NULL) {
+ cmd_fini(ctx, cmd->cmds);
+ } else {
+ for (arg = cmd->args; arg->type != ARG_TYPE_NONE; arg++) {
+ switch (arg->type) {
+ case ARG_TYPE_STR:
+ mem_free(arg->str.data);
+ arg->str.data = NULL;
+ break;
+ default:
+ break;
+ }
+ }
+ }
+ }
+}
+
+static bool
+cmd_init(context_t *ctx, command_t *cmds)
+{
+ command_t *cmd;
+ arg_t *arg;
+
+ for (cmd = cmds; cmd->name != NULL; cmd++) {
+ if (cmd->handler == NULL) {
+ if (!cmd_init(ctx, cmd->cmds)) {
+ return false;
+ }
+ } else {
+ for (arg = cmd->args; arg->type != ARG_TYPE_NONE; arg++) {
+ switch (arg->type) {
+ case ARG_TYPE_STR:
+ arg->str.data = mem_alloc(arg->str.size);
+ if (arg->str.data == NULL) {
+ return false;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ }
+ }
+
+ return true;
+}
+
+static bool
+objs_create(context_t *ctx, uint32_t count)
+{
+ uint32_t i;
+
+ ctx->objs = mem_alloc(sizeof(obj_t) * count);
+ if (ctx->objs == NULL) {
+ return false;
+ }
+ ctx->obj_count = count;
+
+ for (i = 0; i < count; i++) {
+ ctx->objs[i].type = OBJ_TYPE_NONE;
+ }
+
+ return true;
+}
+
+static int32_t
+objs_destroy(context_t *ctx)
+{
+ uint32_t i;
+ int32_t err;
+
+ err = 0;
+ for (i = 0; i < ctx->obj_count; i++) {
+ if (ctx->objs[i].type != OBJ_TYPE_NONE) {
+ err = error(ENOTEMPTY, "Objects not destroyed");
+ break;
+ }
+ }
+
+ mem_free(ctx->objs);
+ ctx->objs = NULL;
+ ctx->obj_count = 0;
+
+ return err;
+}
+
+static context_t *
+init(size_t size, uint32_t objs, command_t *cmds)
+{
+ context_t *ctx;
+
+ ctx = mem_alloc(sizeof(context_t));
+ if (ctx == NULL) {
+ goto failed;
+ }
+
+ if (!buffer_create(ctx, size)) {
+ goto failed_ctx;
+ }
+
+ if (!objs_create(ctx, objs)) {
+ goto failed_buffer;
+ }
+
+ if (!cmd_init(ctx, cmds)) {
+ goto failed_objs;
+ }
+
+ ctx->active = true;
+
+ return ctx;
+
+failed_objs:
+ cmd_fini(ctx, cmds);
+ objs_destroy(ctx);
+failed_buffer:
+ buffer_destroy(ctx);
+failed_ctx:
+ mem_free(ctx);
+failed:
+ return NULL;
+}
+
+static int32_t
+fini(context_t *ctx, command_t *cmds)
+{
+ int32_t ret;
+
+ cmd_fini(ctx, cmds);
+ buffer_destroy(ctx);
+
+ ret = objs_destroy(ctx);
+
+ ctx->active = false;
+
+ return ret;
+}
+
+static int32_t
+exec_quit(context_t *ctx, command_t *cmd)
+{
+ ctx->active = false;
+
+ return 0;
+}
+
+static command_t commands[] = {{"fd", NULL, CMD_SUB(fd_commands)},
+ {"quit", exec_quit, CMD_ARGS()},
+ CMD_END};
+
+int32_t
+main(int32_t argc, char *argv[])
+{
+ context_t *ctx;
+ int32_t res;
+
+ ctx = init(1024, 16, commands);
+ if (ctx == NULL) {
+ return 1;
+ }
+
+ do {
+ res = cmd_parse(ctx, commands);
+ if (res < 0) {
+ out_err(-res);
+ }
+ } while (ctx->active);
+
+ res = fini(ctx, commands);
+ if (res >= 0) {
+ out_ok();
+ return 0;
+ }
+
+ out_err(-res);
+
+ return 1;
+}
diff --git a/tests/basic/open-behind/tester.h b/tests/basic/open-behind/tester.h
new file mode 100644
index 00000000000..64e940c78fc
--- /dev/null
+++ b/tests/basic/open-behind/tester.h
@@ -0,0 +1,145 @@
+/*
+ Copyright (c) 2020 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __TESTER_H__
+#define __TESTER_H__
+
+#include <stdio.h>
+#include <inttypes.h>
+#include <stdbool.h>
+
+enum _obj_type;
+typedef enum _obj_type obj_type_t;
+
+enum _arg_type;
+typedef enum _arg_type arg_type_t;
+
+struct _buffer;
+typedef struct _buffer buffer_t;
+
+struct _obj;
+typedef struct _obj obj_t;
+
+struct _context;
+typedef struct _context context_t;
+
+struct _arg;
+typedef struct _arg arg_t;
+
+struct _command;
+typedef struct _command command_t;
+
+enum _obj_type { OBJ_TYPE_NONE, OBJ_TYPE_FD };
+
+enum _arg_type { ARG_TYPE_NONE, ARG_TYPE_OBJ, ARG_TYPE_NUM, ARG_TYPE_STR };
+
+struct _buffer {
+ char *base;
+ uint32_t size;
+ uint32_t len;
+ uint32_t pos;
+};
+
+struct _obj {
+ obj_type_t type;
+ union {
+ int32_t fd;
+ };
+};
+
+struct _context {
+ obj_t *objs;
+ buffer_t buffer;
+ uint32_t obj_count;
+ bool active;
+};
+
+struct _arg {
+ arg_type_t type;
+ union {
+ struct {
+ obj_type_t type;
+ obj_t *ref;
+ } obj;
+ struct {
+ uint64_t value;
+ uint64_t min;
+ uint64_t max;
+ } num;
+ struct {
+ uint32_t size;
+ char *data;
+ } str;
+ };
+};
+
+struct _command {
+ const char *name;
+ int32_t (*handler)(context_t *ctx, command_t *cmd);
+ union {
+ arg_t *args;
+ command_t *cmds;
+ };
+};
+
+#define msg(_stream, _fmt, _args...) \
+ do { \
+ fprintf(_stream, _fmt "\n", ##_args); \
+ fflush(_stream); \
+ } while (0)
+
+#define msg_out(_fmt, _args...) msg(stdout, _fmt, ##_args)
+#define msg_err(_err, _fmt, _args...) \
+ ({ \
+ int32_t __msg_err = (_err); \
+ msg(stderr, "[%4u:%-15s] " _fmt, __LINE__, __FUNCTION__, __msg_err, \
+ ##_args); \
+ -__msg_err; \
+ })
+
+#define error(_err, _fmt, _args...) msg_err(_err, "E(%4d) " _fmt, ##_args)
+#define warn(_err, _fmt, _args...) msg_err(_err, "W(%4d) " _fmt, ##_args)
+#define info(_err, _fmt, _args...) msg_err(_err, "I(%4d) " _fmt, ##_args)
+
+#define out_ok(_args...) msg_out("OK " _args)
+#define out_err(_err) msg_out("ERR %d", _err)
+
+#define ARG_END \
+ { \
+ ARG_TYPE_NONE \
+ }
+
+#define CMD_ARGS1(_x, _args...) \
+ .args = (arg_t[]) { _args }
+#define CMD_ARGS(_args...) CMD_ARGS1(, ##_args, ARG_END)
+
+#define CMD_SUB(_cmds) .cmds = _cmds
+
+#define CMD_END \
+ { \
+ NULL, NULL, CMD_SUB(NULL) \
+ }
+
+#define ARG_VAL(_type) \
+ { \
+ ARG_TYPE_OBJ, .obj = {.type = _type } \
+ }
+#define ARG_NUM(_min, _max) \
+ { \
+ ARG_TYPE_NUM, .num = {.min = _min, .max = _max } \
+ }
+#define ARG_STR(_size) \
+ { \
+ ARG_TYPE_STR, .str = {.size = _size } \
+ }
+
+extern command_t fd_commands[];
+
+#endif /* __TESTER_H__ */ \ No newline at end of file
diff --git a/tests/basic/open-fd-snap-delete.t b/tests/basic/open-fd-snap-delete.t
new file mode 100644
index 00000000000..a9f47cac19d
--- /dev/null
+++ b/tests/basic/open-fd-snap-delete.t
@@ -0,0 +1,74 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../snapshot.rc
+. $(dirname $0)/../fileio.rc
+
+cleanup;
+
+TEST init_n_bricks 3;
+TEST setup_lvm 3;
+
+# start glusterd
+TEST glusterd;
+
+TEST pidof glusterd;
+
+TEST $CLI volume create $V0 $H0:$L1 $H0:$L2 $H0:$L3;
+TEST $CLI volume set $V0 nfs.disable false
+
+
+TEST $CLI volume start $V0;
+
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0;
+
+for i in {1..10} ; do echo "file" > $M0/file$i ; done
+
+# Create file and directory
+TEST touch $M0/f1
+TEST mkdir $M0/dir
+
+TEST $CLI snapshot config activate-on-create enable
+TEST $CLI volume set $V0 features.uss enable;
+
+for i in {1..10} ; do echo "file" > $M0/dir/file$i ; done
+
+TEST $CLI snapshot create snap1 $V0 no-timestamp;
+
+for i in {11..20} ; do echo "file" > $M0/file$i ; done
+for i in {11..20} ; do echo "file" > $M0/dir/file$i ; done
+
+TEST $CLI snapshot create snap2 $V0 no-timestamp;
+
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'r' $M0/.snaps/snap2/dir/file11;
+TEST fd_cat $fd1
+
+TEST $CLI snapshot delete snap2;
+
+TEST ! fd_cat $fd1;
+
+# the return value of this command (i.e. fd_close) depetends
+# mainly on how the release operation on a file descriptor is
+# handled in snapview-server process. As of now snapview-server
+# returns 0 for the release operation. And it is similar to how
+# posix xlator does. So, as of now the expectation is to receive
+# success for the close operation.
+TEST fd_close $fd1;
+
+# This check is mainly to ensure that the snapshot daemon
+# (snapd) is up and running. If it is not running, the following
+# stat would receive ENOTCONN.
+
+TEST stat $M0/.snaps/snap1/dir/file1
+
+TEST $CLI snapshot delete snap1;
+
+TEST rm -rf $M0/*;
+
+TEST $CLI volume stop $V0;
+
+TEST $CLI volume delete $V0;
+
+cleanup
diff --git a/tests/basic/peer-parsing.t b/tests/basic/peer-parsing.t
new file mode 100644
index 00000000000..813b65e2ae1
--- /dev/null
+++ b/tests/basic/peer-parsing.t
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+PEER_DIR="$GLUSTERD_WORKDIR"/peers
+TEST mkdir -p $PEER_DIR
+
+declare -i HOST_NUM=100
+
+create_random_peer_files() {
+ for i in $(seq 0 9); do
+ local peer_uuid=$(uuidgen)
+ # The rules for quoting and variable substitution in
+ # here documents would force this to be even less
+ # readable that way.
+ (
+ echo "state=1"
+ echo "uuid=$peer_uuid"
+ echo "hostname=127.0.0.$HOST_NUM"
+ ) > $PEER_DIR/$peer_uuid
+ HOST_NUM+=1
+ done
+}
+
+create_non_peer_file() {
+ echo "random stuff" > $PEER_DIR/not_a_peer_file
+}
+
+create_malformed_peer_file() {
+ echo "more random stuff" > $PEER_DIR/$(uuidgen)
+}
+
+# We create lots of files, in batches, to ensure that our bogus ones are
+# properly interspersed with the valid ones.
+
+TEST create_random_peer_files
+TEST create_non_peer_file
+TEST create_random_peer_files
+TEST create_malformed_peer_file
+TEST create_random_peer_files
+
+# There should be 30 peers, not counting the two bogus files.
+TEST glusterd
+N_PEERS=$($CLI peer status | grep ^Uuid: | wc -l)
+TEST [ "$N_PEERS" = "30" ]
+
+# For extra credit, check the logs for messages about bogus files.
+
+cleanup
+
+
+
diff --git a/tests/basic/pgfid-feat.t b/tests/basic/pgfid-feat.t
new file mode 100644
index 00000000000..a7baeec7b7a
--- /dev/null
+++ b/tests/basic/pgfid-feat.t
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+function get_ancestry_path() {
+ local path=$1
+ local ancestry=$(getfattr --absolute-names -e text -n glusterfs.ancestry.path "$M0/$path" | grep "^glusterfs.ancestry.path" | cut -d"=" -f2 | tr -d \");
+ echo $ancestry;
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4};
+TEST $CLI volume set $V0 build-pgfid on;
+
+TEST $CLI volume start $V0;
+TEST $GFS -s $H0 --volfile-id $V0 $M0;
+
+TEST mkdir $M0/a;
+TEST touch $M0/a/b;
+
+EXPECT "/a/b" get_ancestry_path "/a/b";
+
+TEST $CLI volume set $V0 build-pgfid off;
+EXPECT "" get_ancestry_path "/a/b";
+
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/basic/playground/template-xlator-sanity.t b/tests/basic/playground/template-xlator-sanity.t
new file mode 100755
index 00000000000..1c665502bfe
--- /dev/null
+++ b/tests/basic/playground/template-xlator-sanity.t
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST mkdir -p $B0/single-brick
+cat > $B0/template.vol <<EOF
+volume posix
+ type storage/posix
+ option directory $B0/single-brick
+end-volume
+
+volume template
+ type playground/template
+ subvolumes posix
+ option dummy 13
+end-volume
+EOF
+
+TEST glusterfs -f $B0/template.vol $M0
+
+TEST $(dirname $0)/../rpc-coverage.sh --no-locks $M0
+
+# Take statedump to get maximum code coverage
+pid=$(ps auxww | grep glusterfs | grep -E "template.vol" | awk '{print $2}' | head -1)
+
+TEST generate_statedump $pid
+
+# For monitor output
+kill -USR2 $pid
+
+# Handle SIGHUP and reconfigure
+sed -i -e '/s/dummy 13/dummy 42/g' $B0/template.vol
+kill -HUP $pid
+
+# for calling 'fini()'
+kill -TERM $pid
+
+force_umount $M0
+
+cleanup;
diff --git a/tests/basic/posix/shared-statfs.t b/tests/basic/posix/shared-statfs.t
new file mode 100644
index 00000000000..0e4a1bb409f
--- /dev/null
+++ b/tests/basic/posix/shared-statfs.t
@@ -0,0 +1,58 @@
+#!/bin/bash
+#Test that statfs is not served from posix backend FS.
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+TEST glusterd
+
+#Create brick partitions
+TEST truncate -s 100M $B0/brick1
+TEST truncate -s 100M $B0/brick2
+LO1=`SETUP_LOOP $B0/brick1`
+TEST [ $? -eq 0 ]
+TEST MKFS_LOOP $LO1
+LO2=`SETUP_LOOP $B0/brick2`
+TEST [ $? -eq 0 ]
+TEST MKFS_LOOP $LO2
+TEST mkdir -p $B0/${V0}1 $B0/${V0}2
+TEST MOUNT_LOOP $LO1 $B0/${V0}1
+TEST MOUNT_LOOP $LO2 $B0/${V0}2
+
+total_brick_blocks=$(df -P $B0/${V0}1 $B0/${V0}2 | tail -2 | awk '{sum = sum+$2}END{print sum}')
+#Account for rounding error
+brick_blocks_two_percent_less=$((total_brick_blocks*98/100))
+# Create a subdir in mountpoint and use that for volume.
+TEST $CLI volume create $V0 $H0:$B0/${V0}1/1 $H0:$B0/${V0}2/1;
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" online_brick_count
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0
+total_mount_blocks=$(df -P $M0 | tail -1 | awk '{ print $2}')
+# Keeping the size less than 200M mainly because XFS will use
+# some storage in brick to keep its own metadata.
+TEST [ $total_mount_blocks -gt $brick_blocks_two_percent_less -a $total_mount_blocks -lt 200000 ]
+
+
+TEST force_umount $M0
+TEST $CLI volume stop $V0
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+# From the same mount point, share another 2 bricks with the volume
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}1/2 $H0:$B0/${V0}2/2 $H0:$B0/${V0}1/3 $H0:$B0/${V0}2/3
+
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "6" online_brick_count
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0
+total_mount_blocks=$(df -P $M0 | tail -1 | awk '{ print $2}')
+TEST [ $total_mount_blocks -gt $brick_blocks_two_percent_less -a $total_mount_blocks -lt 200000 ]
+
+TEST force_umount $M0
+TEST $CLI volume stop $V0
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+
+UMOUNT_LOOP ${B0}/${V0}{1,2}
+rm -f ${B0}/brick{1,2}
+cleanup;
diff --git a/tests/basic/posix/zero-fill-enospace.c b/tests/basic/posix/zero-fill-enospace.c
new file mode 100644
index 00000000000..b1f142c6be9
--- /dev/null
+++ b/tests/basic/posix/zero-fill-enospace.c
@@ -0,0 +1,67 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+int
+main(int argc, char *argv[])
+{
+ glfs_t *fs = NULL;
+ glfs_fd_t *fd = NULL;
+ int ret = 1;
+ off_t size = 0;
+
+ if (argc != 6) {
+ fprintf(stderr,
+ "Syntax: %s <host> <volname> <file-path> <log-file> <size>\n",
+ argv[0]);
+ return 1;
+ }
+
+ fs = glfs_new(argv[2]);
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL\n");
+ return 1;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", argv[1], 24007);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_set_volfile_server: returned %d\n", ret);
+ goto out;
+ }
+ ret = glfs_set_logging(fs, argv[4], 7);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_set_logging: returned %d\n", ret);
+ goto out;
+ }
+ ret = glfs_init(fs);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_init: returned %d\n", ret);
+ goto out;
+ }
+
+ fd = glfs_open(fs, argv[3], O_RDWR);
+ if (fd == NULL) {
+ fprintf(stderr, "glfs_open: returned NULL\n");
+ goto out;
+ }
+
+ size = strtol(argv[5], NULL, 10);
+ if (size < 0) {
+ fprintf(stderr, "Wrong size %s", argv[5]);
+ goto out;
+ }
+ ret = glfs_zerofill(fd, 0, size);
+ if (ret <= 0) {
+ fprintf(stderr, "glfs_zerofill: returned %d\n", ret);
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ if (fd)
+ glfs_close(fd);
+ glfs_fini(fs);
+ return ret;
+}
diff --git a/tests/basic/posix/zero-fill-enospace.t b/tests/basic/posix/zero-fill-enospace.t
new file mode 100644
index 00000000000..ac2e61b10cf
--- /dev/null
+++ b/tests/basic/posix/zero-fill-enospace.t
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../dht.rc
+
+cleanup;
+
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST truncate -s 100M $B0/brick1
+
+TEST L1=`SETUP_LOOP $B0/brick1`
+TEST MKFS_LOOP $L1
+
+TEST mkdir -p $B0/${V0}1
+
+TEST MOUNT_LOOP $L1 $B0/${V0}1
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}1
+
+TEST $CLI volume start $V0;
+
+TEST glusterfs -s $H0 --volfile-id=$V0 $M0
+TEST touch $M0/foo
+TEST build_tester $(dirname $0)/zero-fill-enospace.c -lgfapi -Wall -O2
+TEST ! $(dirname $0)/zero-fill-enospace $H0 $V0 /foo `gluster --print-logdir`/glfs-$V0.log 104857600
+
+TEST force_umount $M0
+TEST $CLI volume stop $V0
+UMOUNT_LOOP ${B0}/${V0}1
+rm -f ${B0}/brick1
+
+cleanup
diff --git a/tests/basic/posixonly.t b/tests/basic/posixonly.t
new file mode 100755
index 00000000000..4844818fcc3
--- /dev/null
+++ b/tests/basic/posixonly.t
@@ -0,0 +1,31 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST mkdir -p $B0/posixonly
+cat > $B0/posixonly.vol <<EOF
+volume poisxonly
+ type storage/posix
+ option directory $B0/posixonly
+end-volume
+EOF
+
+TEST glusterfs -f $B0/posixonly.vol $M0;
+
+TEST touch $M0/filename;
+TEST stat $M0/filename;
+TEST mkdir $M0/dirname;
+TEST stat $M0/dirname;
+TEST touch $M0/dirname/filename;
+TEST stat $M0/dirname/filename;
+TEST ln $M0/dirname/filename $M0/dirname/linkname;
+TEST chown 100:100 $M0/dirname/filename;
+TEST chown 100:100 $M0/dirname;
+TEST rm -rf $M0/filename $M0/dirname;
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+cleanup;
diff --git a/tests/basic/quick-read-with-upcall.t b/tests/basic/quick-read-with-upcall.t
new file mode 100644
index 00000000000..dfb751dfcdb
--- /dev/null
+++ b/tests/basic/quick-read-with-upcall.t
@@ -0,0 +1,72 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+ #. $(dirname $0)/../volume.rc
+
+cleanup;
+
+#Basic checks
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+#Create a distributed volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1..2};
+TEST $CLI volume start $V0
+
+# Mount FUSE without selinux:
+TEST glusterfs -s $H0 --volfile-id $V0 --direct-io-mode=enable $M0;
+TEST glusterfs -s $H0 --volfile-id $V0 --direct-io-mode=enable $M1;
+
+D0="test-message0";
+D1="test-message1";
+
+function write_to()
+{
+ local file="$1";
+ local data="$2";
+ echo "$data" > "$file";
+}
+
+
+TEST write_to "$M0/test.txt" "$D0"
+EXPECT "$D0" cat $M0/test.txt
+EXPECT "$D0" cat $M1/test.txt
+
+TEST write_to "$M0/test.txt" "$D1"
+EXPECT "$D1" cat $M0/test.txt
+EXPECT "$D0" cat $M1/test.txt
+
+sleep 1
+
+# TODO: This line normally fails
+EXPECT "$D1" cat $M1/test.txt
+
+TEST $CLI volume set $V0 features.cache-invalidation on
+TEST $CLI volume set $V0 performance.quick-read-cache-timeout 15
+TEST $CLI volume set $V0 performance.md-cache-timeout 15
+
+TEST write_to "$M0/test1.txt" "$D0"
+EXPECT "$D0" cat $M0/test1.txt
+EXPECT "$D0" cat $M1/test1.txt
+
+TEST write_to "$M0/test1.txt" "$D1"
+EXPECT "$D1" cat $M0/test1.txt
+EXPECT "$D0" cat $M1/test1.txt
+
+sleep 1
+EXPECT "$D0" cat $M1/test1.txt
+
+sleep 30
+EXPECT "$D1" cat $M1/test1.txt
+
+TEST $CLI volume set $V0 performance.quick-read-cache-invalidation on
+TEST $CLI volume set $V0 performance.cache-invalidation on
+
+TEST write_to "$M0/test2.txt" "$D0"
+EXPECT "$D0" cat $M0/test2.txt
+EXPECT "$D0" cat $M1/test2.txt
+
+TEST write_to "$M0/test2.txt" "$D1"
+EXPECT "$D1" cat $M0/test2.txt
+EXPECT "$D1" cat $M1/test2.txt
diff --git a/tests/basic/quota-ancestry-building.t b/tests/basic/quota-ancestry-building.t
new file mode 100755
index 00000000000..fcb39ee31f5
--- /dev/null
+++ b/tests/basic/quota-ancestry-building.t
@@ -0,0 +1,71 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../fileio.rc
+
+cleanup;
+# This tests quota enforcing on an inode without any path information.
+# This should cover anon-fd type of workload as well.
+
+QDD=$(dirname $0)/quota
+# compile the test write program and run it
+build_tester $(dirname $0)/quota.c -o $QDD
+
+TESTS_EXPECTED_IN_LOOP=8
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick1 $H0:$B0/brick2;
+
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume start $V0;
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST $CLI volume quota $V0 enable
+TEST $CLI volume quota $V0 limit-usage / 1B
+TEST $CLI volume quota $V0 soft-timeout 0
+TEST $CLI volume quota $V0 hard-timeout 0
+
+deep=/0/1/2/3/4/5/6/7/8/9
+TEST mkdir -p $M0/$deep
+
+TEST touch $M0/$deep/file1 $M0/$deep/file2 $M0/$deep/file3 $M0/$deep/file4
+
+TEST fd_open 3 'w' "$M0/$deep/file1"
+TEST fd_open 4 'w' "$M0/$deep/file2"
+TEST fd_open 5 'w' "$M0/$deep/file3"
+TEST fd_open 6 'w' "$M0/$deep/file4"
+
+# consume all quota
+TEST ! $QDD $M0/$deep/file 256 4
+
+# simulate name-less lookups for re-open where the parent information is lost.
+# Stopping and starting the bricks will trigger client re-open which happens on
+# a gfid without any parent information. Since no operations are performed on
+# the fds {3..6} every-xl will be under the impression that they are good fds
+
+TEST $CLI volume stop $V0
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+for i in $(seq 3 6); do
+# failing writes indicate that we are enforcing quota set on /
+TEST_IN_LOOP ! fd_write $i "content"
+TEST_IN_LOOP sync
+done
+
+exec 3>&-
+exec 4>&-
+exec 5>&-
+exec 6>&-
+
+TEST $CLI volume stop $V0
+
+rm -f $QDD
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=1332020
diff --git a/tests/basic/quota-anon-fd-nfs.t b/tests/basic/quota-anon-fd-nfs.t
new file mode 100755
index 00000000000..9e6675af6ec
--- /dev/null
+++ b/tests/basic/quota-anon-fd-nfs.t
@@ -0,0 +1,117 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../nfs.rc
+. $(dirname $0)/../fileio.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+QDD=$(dirname $0)/quota
+# compile the test write program and run it
+build_tester $(dirname $0)/quota.c -o $QDD
+
+TESTS_EXPECTED_IN_LOOP=16
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+TEST $CLI volume set $V0 nfs.disable false
+
+
+# The test makes use of inode-lru-limit to hit a scenario, where we
+# find an inode whose ancestry is not there. Following is the
+# hypothesis (which is confirmed by seeing logs indicating that
+# codepath has been executed, but not through a good understanding of
+# NFS internals).
+
+# At the end of an fop, the reference count of an inode would be
+# zero. The inode (and its ancestry) persists in memory only
+# because of non-zero lookup count. These looked up inodes are put
+# in an lru queue of size 1 (here). So, there can be at most one
+# such inode in memory.
+
+# NFS Server makes use of anonymous fds. So, if it cannot find
+# valid fd, it does a nameless lookup. This gives us an inode
+# whose ancestry is NULL. When a write happens on this inode,
+# quota-enforcer/marker finds a NULL ancestry and asks
+# storage/posix to build it.
+
+TEST $CLI volume set $V0 network.inode-lru-limit 1
+TEST $CLI volume set $V0 performance.nfs.write-behind off
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST $CLI volume quota $V0 enable
+TEST $CLI volume quota $V0 limit-usage / 1
+TEST $CLI volume quota $V0 soft-timeout 0
+TEST $CLI volume quota $V0 hard-timeout 0
+
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+TEST mount_nfs $H0:/$V0 $N0 noac,soft,nolock,vers=3;
+deep=/0/1/2/3/4/5/6/7/8/9
+TEST mkdir -p $N0/$deep
+
+TEST touch $N0/$deep/file1 $N0/$deep/file2 $N0/$deep/file3 $N0/$deep/file4
+
+TEST fd_open 3 'w' "$N0/$deep/file1"
+TEST fd_open 4 'w' "$N0/$deep/file2"
+TEST fd_open 5 'w' "$N0/$deep/file3"
+TEST fd_open 6 'w' "$N0/$deep/file4"
+
+# consume all quota
+echo "Hello" > $N0/$deep/new_file_1
+echo "World" >> $N0/$deep/new_file_1
+echo 1 >> $N0/$deep/new_file_1
+echo 2 >> $N0/$deep/new_file_1
+
+# Try to create a 1M file which should fail
+TEST ! $QDD $N0/$deep/new_file_2 256 4
+
+
+# At the end of each fop in server, reference count of the
+# inode associated with each of the file above drops to zero and hence
+# put into lru queue. Since lru-limit is set to 1, an fop next file
+# will displace the current inode from itable. This will ensure that
+# when writes happens on same fd, fd resolution results in
+# nameless lookup from server and quota_writev encounters an fd
+# associated with an inode whose parent is not present in itable.
+
+for j in $(seq 1 2); do
+ for i in $(seq 3 6); do
+ # failing writes indicate that we are enforcing quota set on /
+ # even with anonymous fds.
+ TEST_IN_LOOP ! fd_write $i "content"
+ TEST_IN_LOOP sync
+ done
+done
+
+exec 3>&-
+exec 4>&-
+exec 5>&-
+exec 6>&-
+
+$CLI volume statedump $V0 all
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+# This is ugly, but there seems to be a latent race between other actions and
+# stopping the volume. The visible symptom is that "umount -l" (run from
+# gf_umount_lazy in glusterd) hangs. This happens pretty consistently with the
+# new mem-pool code, though it's not really anything to do with memory pools -
+# just with changed timing. Adding the sleep here makes it work consistently.
+#
+# If anyone else wants to debug the race condition, feel free.
+sleep 3
+
+TEST $CLI volume stop $V0
+
+rm -f $QDD
+
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
diff --git a/tests/basic/quota-nfs.t b/tests/basic/quota-nfs.t
new file mode 100755
index 00000000000..de94a950a7f
--- /dev/null
+++ b/tests/basic/quota-nfs.t
@@ -0,0 +1,66 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+QDD=$(dirname $0)/quota
+# compile the test write program and run it
+build_tester $(dirname $0)/quota.c -o $QDD
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+TEST $CLI volume set $V0 nfs.disable false
+
+TEST $CLI volume set $V0 network.inode-lru-limit 1
+TEST $CLI volume set $V0 performance.nfs.write-behind off
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+TEST mount_nfs $H0:/$V0 $N0
+deep=/0/1/2/3/4/5/6/7/8/9
+TEST mkdir -p $N0/$deep
+
+TEST $QDD $N0/$deep/file 256 40
+
+TEST $CLI volume quota $V0 enable
+TEST $CLI volume quota $V0 limit-usage / 20MB
+TEST $CLI volume quota $V0 soft-timeout 0
+TEST $CLI volume quota $V0 hard-timeout 0
+
+TEST $QDD $N0/$deep/newfile_1 256 20
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" STAT $N0/$deep/file
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" STAT $N0/$deep/newfile_1
+
+#Unmount and mount to flush the data
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+TEST mount_nfs $H0:/$V0 $N0
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" STAT $N0/$deep/file
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" STAT $N0/$deep/newfile_1
+
+# wait for write behind to complete.
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "15.0MB" quotausage "/"
+
+# Try to create a 100Mb file which should fail
+TEST ! $QDD $N0/$deep/newfile_2 256 400
+TEST rm -f $N0/$deep/newfile_2
+
+## Before killing daemon to avoid deadlocks
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+TEST $CLI volume stop $V0
+
+rm -f $QDD
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
diff --git a/tests/basic/quota-rename.t b/tests/basic/quota-rename.t
new file mode 100644
index 00000000000..37438689d9f
--- /dev/null
+++ b/tests/basic/quota-rename.t
@@ -0,0 +1,37 @@
+#!/bin/bash
+
+# This regression test tries to ensure renaming a directory with content, and
+# no limit set, is accounted properly, when moved into a directory with quota
+# limit set.
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}
+TEST $CLI volume start $V0;
+
+TEST $CLI volume quota $V0 enable;
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+TEST $CLI volume quota $V0 hard-timeout 0
+TEST $CLI volume quota $V0 soft-timeout 0
+
+TEST mkdir -p $M0/dir/dir1
+TEST $CLI volume quota $V0 limit-objects /dir 20
+
+TEST mkdir $M0/dir/dir1/d{1..5}
+TEST touch $M0/dir/dir1/f{1..5}
+TEST mv $M0/dir/dir1 $M0/dir/dir2
+
+#Number of files under /dir is 5
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "5" quota_object_list_field "/dir" 4
+
+#Number of directories under /dir is 7
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "7" quota_object_list_field "/dir" 5
+
+cleanup;
diff --git a/tests/basic/quota.c b/tests/basic/quota.c
new file mode 100644
index 00000000000..809ceb8e54c
--- /dev/null
+++ b/tests/basic/quota.c
@@ -0,0 +1,89 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <string.h>
+#include <unistd.h>
+
+ssize_t
+nwrite(int fd, const void *buf, size_t count)
+{
+ ssize_t ret = 0;
+ ssize_t written = 0;
+
+ for (written = 0; written != count; written += ret) {
+ ret = write(fd, buf + written, count - written);
+ if (ret < 0) {
+ if (errno == EINTR)
+ ret = 0;
+ else
+ goto out;
+ }
+ }
+
+ ret = written;
+out:
+ return ret;
+}
+
+int
+file_write(char *filename, int bs, int count)
+{
+ int fd = 0;
+ int ret = -1;
+ int i = 0;
+ char *buf = NULL;
+
+ bs = bs * 1024;
+
+ buf = (char *)malloc(bs);
+ if (buf == NULL)
+ goto out;
+
+ memset(buf, 0, bs);
+
+ fd = open(filename, O_RDWR | O_CREAT | O_SYNC, 0600);
+ while (i < count) {
+ ret = nwrite(fd, buf, bs);
+ if (ret == -1) {
+ close(fd);
+ goto out;
+ }
+ i++;
+ }
+
+ ret = fdatasync(fd);
+ if (ret) {
+ close(fd);
+ goto out;
+ }
+
+ ret = close(fd);
+ if (ret)
+ goto out;
+
+ ret = 0;
+
+out:
+ if (buf)
+ free(buf);
+ return ret;
+}
+
+int
+main(int argc, char **argv)
+{
+ if (argc != 4) {
+ printf("Usage: %s <filename> <block size in k> <count>\n", argv[0]);
+ return EXIT_FAILURE;
+ }
+
+ if (file_write(argv[1], atoi(argv[2]), atoi(argv[3])) < 0) {
+ perror("write failed");
+ return EXIT_FAILURE;
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/tests/basic/quota.t b/tests/basic/quota.t
new file mode 100755
index 00000000000..46d1bafff84
--- /dev/null
+++ b/tests/basic/quota.t
@@ -0,0 +1,236 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../nfs.rc
+. $(dirname $0)/../dht.rc
+. $(dirname $0)/../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+QDD=$(dirname $0)/quota
+# compile the test write program and run it
+build_tester $(dirname $0)/quota.c -o $QDD
+
+TESTS_EXPECTED_IN_LOOP=19
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4};
+
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT '4' brick_count $V0
+
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST $GFS -s $H0 --volfile-id $V0 $M0;
+
+TEST mkdir -p $M0/test_dir/in_test_dir
+
+## --------------------------------------------------------------------------
+## Verify quota commands and check if quota-deem-statfs is enabled by default
+## --------------------------------------------------------------------------
+TEST $CLI volume quota $V0 enable
+EXPECT 'on' volinfo_field $V0 'features.quota'
+EXPECT 'on' volinfo_field $V0 'features.inode-quota'
+EXPECT 'on' volinfo_field $V0 'features.quota-deem-statfs'
+
+
+TEST $CLI volume quota $V0 limit-usage /test_dir 100MB
+
+TEST $CLI volume quota $V0 limit-usage /test_dir/in_test_dir 150MB
+
+EXPECT "150.0MB" quota_hard_limit "/test_dir/in_test_dir";
+EXPECT "80%" quota_soft_limit "/test_dir/in_test_dir";
+
+TEST $CLI volume quota $V0 remove /test_dir/in_test_dir
+
+EXPECT "100.0MB" quota_hard_limit "/test_dir";
+
+TEST $CLI volume quota $V0 limit-usage /test_dir 10MB
+EXPECT "10.0MB" quota_hard_limit "/test_dir";
+EXPECT "80%" quota_soft_limit "/test_dir";
+
+TEST $CLI volume quota $V0 soft-timeout 0
+TEST $CLI volume quota $V0 hard-timeout 0
+
+## ------------------------------
+## Verify quota enforcement
+## -----------------------------
+
+# Try to create a 12MB file which should fail
+TEST ! $QDD $M0/test_dir/1.txt 256 48
+TEST rm $M0/test_dir/1.txt
+
+# wait for marker's accounting to complete
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "0Bytes" quotausage "/test_dir"
+
+TEST $QDD $M0/test_dir/2.txt 256 32
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "8.0MB" quotausage "/test_dir"
+
+# Checking internal xattr
+# This confirms that pgfid is also filtered
+TEST ! "getfattr -d -e hex -m . $M0/test_dir/2.txt | grep pgfid ";
+# just check for quota xattr are visible or not
+TEST ! "getfattr -d -e hex -m . $M0/test_dir | grep quota";
+
+# setfattr should fail
+TEST ! setfattr -n trusted.glusterfs.quota.limit-set -v 10 $M0/test_dir;
+
+# remove xattr should fail
+TEST ! setfattr -x trusted.glusterfs.quota.limit-set $M0/test_dir;
+
+TEST rm $M0/test_dir/2.txt
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "0Bytes" quotausage "/test_dir"
+
+## rename tests
+TEST $QDD $M0/test_dir/2 256 32
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "8.0MB" quotausage "/test_dir"
+TEST mv $M0/test_dir/2 $M0/test_dir/0
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "8.0MB" quotausage "/test_dir"
+TEST rm $M0/test_dir/0
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "0Bytes" quotausage "/test_dir"
+
+## rename tests under different directories
+TEST mkdir -p $M0/1/2;
+TEST $CLI volume quota $V0 limit-usage /1/2 100MB 70%;
+
+# The corresponding write(3) should fail with EDQUOT ("Disk quota exceeded")
+TEST ! $QDD $M0/1/2/file 256 408
+
+TEST mkdir -p $M0/1/3;
+TEST $QDD $M0/1/3/file 256 408
+
+#The corresponding rename(3) should fail with EDQUOT ("Disk quota exceeded")
+TEST ! mv $M0/1/3/ $M0/1/2/3_mvd;
+
+## ---------------------------
+
+## ------------------------------
+## Check if presence of nfs mount results in ESTALE errors for I/O
+# on a fuse mount. Note: Quota command internally uses a fuse mount,
+# though this may change.
+## -----------------------------
+
+##Wait for connection establishment between nfs server and brick process
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+
+TEST mount_nfs $H0:/$V0 $N0 nolock;
+TEST $CLI volume quota $V0 limit-usage /test_dir 100MB
+
+TEST $CLI volume quota $V0 limit-usage /test_dir/in_test_dir 150MB
+
+EXPECT "150.0MB" quota_hard_limit "/test_dir/in_test_dir";
+## -----------------------------
+
+
+###################################################
+## ------------------------------------------------
+## <Test quota functionality in add-brick scenarios>
+## ------------------------------------------------
+###################################################
+QUOTALIMIT=100
+QUOTALIMITROOT=2048
+TESTDIR="addbricktest"
+
+rm -rf $M0/*;
+
+## <Create directories and test>
+## -----------------------------
+# 41-42
+TEST mkdir $M0/$TESTDIR
+TEST mkdir $M0/$TESTDIR/dir{1..10};
+
+
+# 43-52
+## <set limits>
+## -----------------------------
+TEST $CLI volume quota $V0 limit-usage / "$QUOTALIMITROOT"MB;
+for i in {1..10}; do
+ TEST_IN_LOOP $CLI volume quota $V0 limit-usage /$TESTDIR/dir$i \
+ "$QUOTALIMIT"MB;
+done
+## </Enable quota and set limits>
+
+#53-62
+for i in `seq 1 9`; do
+ TEST_IN_LOOP $QDD "$M0/$TESTDIR/dir1/10MBfile$i" 256 40
+done
+
+# 63-64
+## <Add brick and start rebalance>
+## -------------------------------
+TEST $CLI volume add-brick $V0 $H0:$B0/brick{3,4}
+TEST $CLI volume rebalance $V0 start;
+
+## Wait for rebalance
+EXPECT_WITHIN $REBALANCE_TIMEOUT "0" rebalance_completed
+
+## <Try creating data beyond limit>
+## --------------------------------
+for i in `seq 1 200`; do
+ $QDD of="$M0/$TESTDIR/dir1/1MBfile$i" 256 4\
+ 2>&1 | egrep -v '(No space left|Disc quota exceeded)'
+done
+
+# 65
+## <Test whether quota limit crossed more than 10% of limit>
+## ---------------------------------------------------------
+USED_KB=`du -ks $M0/$TESTDIR/dir1 | cut -f1`;
+USED_MB=$(($USED_KB/1024));
+TEST [ $USED_MB -le $((($QUOTALIMIT * 110) / 100)) ]
+
+# 66-67
+## <Test the xattrs healed to new brick>
+## -------------------------------------
+TEST getfattr -d -m "trusted.glusterfs.quota.limit-set" -e hex \
+ --absolute-names $B0/brick{3,4}/$TESTDIR/dir{1..10};
+# Test on root.
+TEST getfattr -d -m "trusted.glusterfs.quota.limit-set" -e hex \
+ --absolute-names $B0/brick{3,4};
+
+## -------------------------------------------------
+## </Test quota functionality in add-brick scenarios>
+## -------------------------------------------------
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+## ---------------------------
+## Test quota volume options
+## ---------------------------
+TEST $CLI volume reset $V0
+EXPECT 'on' volinfo_field $V0 'features.quota'
+EXPECT 'on' volinfo_field $V0 'features.inode-quota'
+EXPECT 'on' volinfo_field $V0 'features.quota-deem-statfs'
+
+TEST $CLI volume reset $V0 force
+EXPECT 'on' volinfo_field $V0 'features.quota'
+EXPECT 'on' volinfo_field $V0 'features.inode-quota'
+EXPECT 'on' volinfo_field $V0 'features.quota-deem-statfs'
+
+TEST $CLI volume reset $V0 features.quota-deem-statfs
+EXPECT 'on' volinfo_field $V0 'features.quota-deem-statfs'
+
+TEST $CLI volume set $V0 features.quota-deem-statfs off
+EXPECT 'off' volinfo_field $V0 'features.quota-deem-statfs'
+
+TEST $CLI volume set $V0 features.quota-deem-statfs on
+EXPECT 'on' volinfo_field $V0 'features.quota-deem-statfs'
+
+TEST $CLI volume quota $V0 disable
+EXPECT 'off' volinfo_field $V0 'features.quota'
+EXPECT 'off' volinfo_field $V0 'features.inode-quota'
+EXPECT '' volinfo_field $V0 'features.quota-deem-statfs'
+
+TEST $CLI volume stop $V0;
+
+rm -f $QDD
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=1332045
diff --git a/tests/basic/quota_aux_mount.t b/tests/basic/quota_aux_mount.t
new file mode 100755
index 00000000000..78d7f47e373
--- /dev/null
+++ b/tests/basic/quota_aux_mount.t
@@ -0,0 +1,53 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+##-------------------------------------------------------------
+## Tests to verify that aux mount is unmounted after each quota
+## command executes.
+##-------------------------------------------------------------
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4};
+
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT '4' brick_count $V0
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST $GFS -s $H0 --volfile-id $V0 $M0;
+
+TEST mkdir -p $M0/test_dir/
+
+TEST $CLI volume quota $V0 enable
+EXPECT 'on' volinfo_field $V0 'features.quota'
+EXPECT 'on' volinfo_field $V0 'features.inode-quota'
+
+TEST $CLI volume quota $V0 limit-usage /test_dir 150MB
+EXPECT "1" get_limit_aux
+TEST $CLI volume quota $V0 limit-objects /test_dir 10
+EXPECT "1" get_limit_aux
+EXPECT "150.0MB" quota_hard_limit "/test_dir";
+EXPECT "1" get_list_aux
+EXPECT "10" quota_object_hard_limit "/test_dir";
+EXPECT "1" get_list_aux
+
+TEST $CLI volume quota $V0 remove /test_dir/
+EXPECT "1" get_limit_aux
+TEST $CLI volume quota $V0 remove-objects /test_dir
+EXPECT "1" get_limit_aux
+
+TEST $CLI volume quota $V0 disable
+
+TEST $CLI volume stop $V0;
+
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=1447344
diff --git a/extras/rpc-coverage.sh b/tests/basic/rpc-coverage.sh
index 9148a73e241..6203f0ac7cb 100755
--- a/extras/rpc-coverage.sh
+++ b/tests/basic/rpc-coverage.sh
@@ -47,8 +47,11 @@
#set -e;
set -o pipefail;
+# pull compatibility functions (e.g.: stat replacement if not running Linux)
+. $(dirname $0)/../include.rc
+
function fail() {
- echo "$*: failed.";
+ echo "$*: failed.";
exit 1;
}
@@ -71,8 +74,8 @@ function test_statfs()
{
local size;
- size=$(stat -f -c '%s' $PFX/dir/file);
- test "x$size" != "x0" || fail "statfs"
+ mode=$(stat -c '%a' $PFX/dir/file);
+ test "x$mode" == "x644" || fail "statfs"
}
@@ -85,8 +88,8 @@ function test_open()
function test_write()
{
- dd if=/dev/zero of=$PFX/dir/file bs=65536 count=16 2>/dev/null;
- test $(stat -c '%s' $PFX/dir/file) == 1048576 || fail "open"
+ dd if=/dev/zero of=$PFX/dir/file bs=65536 count=16
+ test $(stat -c '%s' $PFX/dir/file) == 1048576 || fail "write"
}
@@ -114,7 +117,9 @@ function test_fstat()
local msg;
export PFX;
- msg=$(sh -c 'tail -f $PFX/dir/file --pid=$$ & sleep 1 && echo hooha > $PFX/dir/file && sleep 1');
+ echo hooha > $PFX/dir/file
+ sleep 1
+ msg=$(sh -c 'tail $PFX/dir/file')
test "x$msg" == "xhooha" || fail "fstat"
}
@@ -139,7 +144,8 @@ function test_symlink()
{
local msg;
- ln -s $PFX/dir/file $PFX/dir/symlink;
+ ( cd $PFX/dir && ln -s file symlink; )
+
test "$(stat -c '%F' $PFX/dir/symlink)" == "symbolic link" || fail "Creation of symlink"
msg=$(cat $PFX/dir/symlink);
@@ -352,21 +358,21 @@ function test_utimes()
function test_locks()
{
- exec 200>$PFX/dir/lockfile || fail "exec"
+ exec 100>$PFX/dir/lockfile || fail "exec"
## exclusive locks test
- flock -e 200 || fail "flock -e"
- ! flock -n -e $PFX/dir/lockfile -c true || fail "! flock -n -e"
+ flock -x 100 || fail "flock -x"
+ ! flock -n -x $PFX/dir/lockfile -c true || fail "! flock -n -x"
! flock -n -s $PFX/dir/lockfile -c true || fail "! flock -n -s"
- flock -u 200 || fail "flock -u"
+ flock -u 100 || fail "flock -u"
## shared locks test
- flock -s 200 || fail "flock -s"
- ! flock -n -e $PFX/dir/lockfile -c true || fail "! flock -n -e"
+ flock -s 100 || fail "flock -s"
+ ! flock -n -x $PFX/dir/lockfile -c true || fail "! flock -n -x"
flock -n -s $PFX/dir/lockfile -c true || fail "! flock -n -s"
- flock -u 200 || fail "flock -u"
+ flock -u 100 || fail "flock -u"
- exec 200>&- || fail "exec"
+ exec 100>&- || fail "exec"
}
@@ -413,9 +419,15 @@ function test_rmdir()
rm -rf $PFX || fail "rm -rf"
}
+function test_statvfs()
+{
+ df $DIR 2>&1 || fail "df"
+}
+
function run_tests()
{
+ test_statvfs;
test_mkdir;
test_create;
test_statfs;
@@ -430,13 +442,15 @@ function run_tests()
test_rename;
test_chmod;
test_chown;
- test_utimes;
- test_locks;
test_readdir;
test_setxattr;
test_listxattr;
test_getxattr;
test_removexattr;
+ if [ "$run_lock_tests" = "1" ]; then
+ test_locks;
+ fi
+ test_utimes;
test_unlink;
test_rmdir;
}
@@ -447,14 +461,19 @@ function _init()
DIR=$(pwd);
}
-
+run_lock_tests=1
function parse_cmdline()
{
if [ "x$1" == "x" ] ; then
- echo "Usage: $0 /path/mount"
+ echo "Usage: $0 [--no-locks] /path/mount"
exit 1
fi
+ if [ "$1" == "--no-locks" ] ; then
+ run_lock_tests=0
+ shift
+ fi
+
DIR=$1;
if [ ! -d "$DIR" ] ; then
diff --git a/tests/basic/rpc-coverage.t b/tests/basic/rpc-coverage.t
new file mode 100755
index 00000000000..2c1bcd5a63a
--- /dev/null
+++ b/tests/basic/rpc-coverage.t
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6,7,8,9};
+
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT '9' brick_count $V0
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Mount FUSE
+TEST $GFS -s $H0 --volfile-id $V0 $M1;
+
+TEST $(dirname $0)/rpc-coverage.sh $M1
+cleanup;
diff --git a/tests/basic/sdfs-sanity.t b/tests/basic/sdfs-sanity.t
new file mode 100644
index 00000000000..16d0bed866f
--- /dev/null
+++ b/tests/basic/sdfs-sanity.t
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6,7,8,9};
+TEST $CLI volume set $V0 features.sdfs enable;
+TEST $CLI volume start $V0;
+
+## Mount FUSE
+TEST $GFS -s $H0 --volfile-id $V0 $M1;
+
+# This test covers lookup, mkdir, mknod, symlink, link, rename,
+# create operations
+TEST $(dirname $0)/rpc-coverage.sh $M1
+
+TEST cp $(dirname ${0})/gfapi/glfsxmp-coverage.c glfsxmp.c
+TEST build_tester ./glfsxmp.c -lgfapi
+TEST ./glfsxmp $V0 $H0
+TEST cleanup_tester ./glfsxmp
+TEST rm ./glfsxmp.c
+
+cleanup;
diff --git a/tests/basic/seek.c b/tests/basic/seek.c
new file mode 100644
index 00000000000..54fa6f463af
--- /dev/null
+++ b/tests/basic/seek.c
@@ -0,0 +1,182 @@
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <string.h>
+#include <errno.h>
+
+static char buffer[65536];
+
+static int
+parse_int(const char *text, size_t *value)
+{
+ char *ptr;
+ size_t val;
+
+ val = strtoul(text, &ptr, 0);
+ if (*ptr != 0) {
+ return 0;
+ }
+
+ *value = val;
+
+ return 1;
+}
+
+static int
+fill_area(int fd, off_t offset, size_t size)
+{
+ size_t len;
+ ssize_t res;
+
+ while (size > 0) {
+ len = sizeof(buffer);
+ if (len > size) {
+ len = size;
+ }
+ res = pwrite(fd, buffer, len, offset);
+ if (res < 0) {
+ fprintf(stderr, "pwrite(%d, %p, %lu, %lu) failed: %d\n", fd, buffer,
+ size, offset, errno);
+ return 0;
+ }
+ if (res != len) {
+ fprintf(stderr,
+ "pwrite(%d, %p, %lu, %lu) didn't wrote all "
+ "data: %lu/%lu\n",
+ fd, buffer, size, offset, res, len);
+ return 0;
+ }
+ offset += len;
+ size -= len;
+ }
+
+ return 1;
+}
+
+static void
+syntax(void)
+{
+ fprintf(stderr, "Syntax: seek create <path> <offset> <size> [...]\n");
+ fprintf(stderr, " seek scan <path> data|hole <offset>\n");
+}
+
+static int
+seek_create(const char *path, int argc, char *argv[])
+{
+ size_t off, size;
+ int fd;
+ int ret = 1;
+
+ fd = open(path, O_CREAT | O_TRUNC | O_RDWR, 0644);
+ if (fd < 0) {
+ fprintf(stderr, "Failed to create the file\n");
+ goto out;
+ }
+
+ while (argc > 0) {
+ if (!parse_int(argv[0], &off) || !parse_int(argv[1], &size)) {
+ syntax();
+ goto out_close;
+ }
+ if (!fill_area(fd, off, size)) {
+ goto out_close;
+ }
+ argv += 2;
+ argc -= 2;
+ }
+
+ ret = 0;
+
+out_close:
+ close(fd);
+out:
+ return ret;
+}
+
+static int
+seek_scan(const char *path, const char *type, const char *pos)
+{
+ size_t off, res;
+ int fd, whence;
+ int ret = 1;
+
+ if (strcmp(type, "data") == 0) {
+ whence = SEEK_DATA;
+ } else if (strcmp(type, "hole") == 0) {
+ whence = SEEK_HOLE;
+ } else {
+ syntax();
+ goto out;
+ }
+
+ if (!parse_int(pos, &off)) {
+ syntax();
+ goto out;
+ }
+
+ fd = open(path, O_RDWR);
+ if (fd < 0) {
+ fprintf(stderr, "Failed to open the file\n");
+ goto out;
+ }
+
+ res = lseek(fd, off, whence);
+ if (res == (off_t)-1) {
+ if (errno != ENXIO) {
+ fprintf(stderr, "seek(%d, %lu, %d) failed: %d\n", fd, off, whence,
+ errno);
+ goto out_close;
+ }
+ fprintf(stdout, "ENXIO\n");
+ } else {
+ fprintf(stdout, "%lu\n", res);
+ }
+
+ ret = 0;
+
+out_close:
+ close(fd);
+out:
+ return ret;
+}
+
+int
+main(int argc, char *argv[])
+{
+ int ret = 1;
+
+ memset(buffer, 0x55, sizeof(buffer));
+
+ if (argc < 3) {
+ syntax();
+ goto out;
+ }
+
+ if (strcmp(argv[1], "create") == 0) {
+ if (((argc - 3) & 1) != 0) {
+ syntax();
+ goto out;
+ }
+ ret = seek_create(argv[2], argc - 3, argv + 3);
+ } else if (strcmp(argv[1], "scan") == 0) {
+ if (argc != 5) {
+ syntax();
+ goto out;
+ }
+ ret = seek_scan(argv[2], argv[3], argv[4]);
+ } else {
+ syntax();
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
diff --git a/tests/basic/shd-mux-afr.t b/tests/basic/shd-mux-afr.t
new file mode 100644
index 00000000000..cf300c148bb
--- /dev/null
+++ b/tests/basic/shd-mux-afr.t
@@ -0,0 +1,70 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2,3,4,5}
+TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+TEST $CLI volume set $V0 cluster.eager-lock off
+TEST $CLI volume set $V0 performance.flush-behind off
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+shd_pid=$(get_shd_mux_pid $V0)
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^6$" number_healer_threads_shd $V0 "afr_shd_index_healer"
+
+#Create a one more volume
+TEST $CLI volume create ${V0}_1 replica 3 $H0:$B0/${V0}_1{0,1,2,3,4,5}
+TEST $CLI volume start ${V0}_1
+
+#Check whether the shd has multiplexed or not
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" get_shd_mux_pid ${V0}_1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" get_shd_mux_pid ${V0}
+
+TEST $CLI volume set ${V0}_1 cluster.background-self-heal-count 0
+TEST $CLI volume set ${V0}_1 cluster.eager-lock off
+TEST $CLI volume set ${V0}_1 performance.flush-behind off
+TEST $GFS --volfile-id=/${V0}_1 --volfile-server=$H0 $M1
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}4
+TEST kill_brick ${V0}_1 $H0 $B0/${V0}_10
+TEST kill_brick ${V0}_1 $H0 $B0/${V0}_14
+
+TEST touch $M0/foo{1..100}
+TEST touch $M1/foo{1..100}
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^204$" get_pending_heal_count $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^204$" get_pending_heal_count ${V0}_1
+
+TEST $CLI volume start ${V0} force
+TEST $CLI volume start ${V0}_1 force
+
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count ${V0}_1
+
+TEST rm -rf $M0/*
+TEST rm -rf $M1/*
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M1
+
+#Stop the volume
+TEST $CLI volume stop ${V0}_1
+TEST $CLI volume delete ${V0}_1
+
+#Check the stop succeeded and detached the volume with out restarting it
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" get_shd_mux_pid $V0
+
+#Check the thread count become to earlier number after stopping
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^6$" number_healer_threads_shd $V0 "afr_shd_index_healer"
+
+TEST $CLI volume stop ${V0}
+TEST $CLI volume delete ${V0}
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" shd_count
+cleanup
diff --git a/tests/basic/shd-mux-ec.t b/tests/basic/shd-mux-ec.t
new file mode 100644
index 00000000000..ef4d65018d3
--- /dev/null
+++ b/tests/basic/shd-mux-ec.t
@@ -0,0 +1,75 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2,3,4,5}
+TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+TEST $CLI volume set $V0 cluster.eager-lock off
+TEST $CLI volume set $V0 performance.flush-behind off
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+shd_pid=$(get_shd_mux_pid $V0)
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^6$" number_healer_threads_shd $V0 "afr_shd_index_healer"
+
+#Now create a ec volume and check mux works
+TEST $CLI volume create ${V0}_2 disperse 6 redundancy 2 $H0:$B0/${V0}_2{0,1,2,3,4,5}
+TEST $CLI volume start ${V0}_2
+
+#Check whether the shd has multiplexed or not
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" get_shd_mux_pid ${V0}_2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" get_shd_mux_pid ${V0}
+
+TEST $CLI volume set ${V0}_2 cluster.background-self-heal-count 0
+TEST $CLI volume set ${V0}_2 cluster.eager-lock off
+TEST $CLI volume set ${V0}_2 performance.flush-behind off
+TEST $GFS --volfile-id=/${V0}_2 --volfile-server=$H0 $M1
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}4
+TEST kill_brick ${V0}_2 $H0 $B0/${V0}_20
+TEST kill_brick ${V0}_2 $H0 $B0/${V0}_22
+
+TEST touch $M0/foo{1..100}
+TEST touch $M1/foo{1..100}
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^204$" get_pending_heal_count $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^404$" get_pending_heal_count ${V0}_2
+
+TEST $CLI volume start ${V0} force
+TEST $CLI volume start ${V0}_2 force
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^6$" number_healer_threads_shd $V0 "ec_shd_index_healer"
+
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count ${V0}_2
+
+TEST rm -rf $M0/*
+TEST rm -rf $M1/*
+
+
+#Stop the volume
+TEST $CLI volume stop ${V0}_2
+TEST $CLI volume delete ${V0}_2
+
+#Check the stop succeeded and detached the volume with out restarting it
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" get_shd_mux_pid $V0
+
+#Check the thread count become to zero for ec related threads
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" number_healer_threads_shd $V0 "ec_shd_index_healer"
+#Check the thread count become to earlier number after stopping
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^6$" number_healer_threads_shd $V0 "afr_shd_index_healer"
+
+TEST $CLI volume stop ${V0}
+TEST $CLI volume delete ${V0}
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" shd_count
+
+cleanup
diff --git a/tests/basic/stats-dump.t b/tests/basic/stats-dump.t
new file mode 100644
index 00000000000..ed73fd1d14a
--- /dev/null
+++ b/tests/basic/stats-dump.t
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 diagnostics.latency-measurement on
+TEST $CLI volume set $V0 diagnostics.count-fop-hits on
+TEST $CLI volume set $V0 diagnostics.stats-dump-interval 1
+TEST $CLI volume set $V0 performance.nfs.io-threads on
+TEST $CLI volume set $V0 nfs.disable off
+TEST $CLI volume start $V0
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+TEST mount_nfs $H0:/$V0 $N0 nolock,soft,intr
+
+for i in {1..10};do
+ dd if=/dev/zero of=$M0/fuse_testfile$i bs=4k count=100
+done
+
+for i in {1..10};do
+ dd if=/dev/zero of=$N0/nfs_testfile$i bs=4k count=100
+done
+
+# Wait for one dump interval to be done, some seconds past 1 that is the dump
+# interval set
+sleep 2
+
+# Change the dump interval to 0, so that when reading the file contents we
+# do not get them truncated by the next interval that is overwriting the latest
+# stats data
+TEST $CLI volume set $V0 diagnostics.stats-dump-interval 0
+
+# Verify we have non-zero write counts from the bricks, gNFSd
+# and the FUSE mount.
+TEST [ $(grep 'aggr.fop.write.count' ${GLUSTERD_WORKDIR}/stats/glusterfs_nfsd.dump|tail -1|cut -d: -f2) != "0," ]
+TEST [ $(grep 'aggr.fop.write.count' ${GLUSTERD_WORKDIR}/stats/glusterfs_patchy.dump|tail -1|cut -d: -f2) != "0," ]
+TEST [ $(grep 'aggr.fop.write.count' ${GLUSTERD_WORKDIR}/stats/glusterfsd__d_backends_patchy0.dump|tail -1|cut -d: -f2) != "0," ]
+TEST [ $(grep 'aggr.fop.write.count' ${GLUSTERD_WORKDIR}/stats/glusterfsd__d_backends_patchy1.dump|tail -1|cut -d: -f2) != "0," ]
+TEST [ $(grep 'aggr.fop.write.count' ${GLUSTERD_WORKDIR}/stats/glusterfsd__d_backends_patchy2.dump|tail -1|cut -d: -f2) != "0," ]
+
+# Test that io-stats is getting queue sizes from io-threads
+TEST grep '.queue_size' ${GLUSTERD_WORKDIR}/stats/glusterfs_nfsd.dump
+TEST grep '.queue_size' ${GLUSTERD_WORKDIR}/stats/glusterfsd__d_backends_patchy0.dump
+TEST grep '.queue_size' ${GLUSTERD_WORKDIR}/stats/glusterfsd__d_backends_patchy1.dump
+TEST grep '.queue_size' ${GLUSTERD_WORKDIR}/stats/glusterfsd__d_backends_patchy2.dump
+
+cleanup;
diff --git a/tests/basic/symbol-check.sh b/tests/basic/symbol-check.sh
new file mode 100755
index 00000000000..0f8243ca731
--- /dev/null
+++ b/tests/basic/symbol-check.sh
@@ -0,0 +1,114 @@
+#!/bin/bash
+
+syscalls=$'access\nchmod\nchown\nclose\nclosedir\ncreat64\n\
+fallocate64\nfchmod\nfchown\nfdatasync\nfgetxattr\nflistxattr\n\
+fremovexattr\nfsetxattr\nfsync\nftruncate64\n__fxstat64\n\
+__fxstatat64\nlchown\nlgetxattr\nlinkat\nllistxattr\nlremovexattr\n\
+lseek64\nlsetxattr\n__lxstat64\nmkdir\nmkdirat\nopenat64\nopendir\n\
+pread64\npwrite64\npreadv64\npwritev64\nread\nreaddir64\nreadlink\n\
+readv\nrename\nrmdir\n statvfs64\nsymlink\n\truncate64\nunlink\n\
+utimeswrite\nwritev\n\__xmknod\n__xstat64'
+
+syscalls32=$'creat\nfallocate\nftruncate\n__fxstat\n__fxstatat\n\
+lseek\n__lxstat\nopenat\nreaddir\nstatvfs\ntruncate\nstat\n\
+preadv\npwritev\npread\npwrite'
+
+glibccalls=$'tmpfile'
+
+exclude_files=$'/libglusterfs/src/.libs/libglusterfs_la-syscall.o\n\
+/libglusterfs/src/.libs/libglusterfs_la-gen_uuid.o\n\
+/contrib/fuse-util/fusermount.o\n\
+/contrib/fuse-util/mount_util.o\n\
+/contrib/fuse-util/mount-common.o\n\
+/xlators/mount/fuse/src/.libs/mount.o\n\
+/xlators/mount/fuse/src/.libs/mount-common.o\n\
+/xlators/features/qemu-block/src/.libs/block.o\n\
+/xlators/features/qemu-block/src/.libs/cutils.o\n\
+/xlators/features/qemu-block/src/.libs/oslib-posix.o'
+
+function main()
+{
+ for exclude_file in ${exclude_files}; do
+ if [[ ${1} = *${exclude_file} ]]; then
+ exit 0
+ fi
+ done
+
+ local retval=0
+ local t
+ t=$(nm "${1}" | grep " U " | sed -e "s/ //g" -e "s/ U //g")
+
+ for symy in ${t}; do
+
+ for symx in ${syscalls}; do
+
+ if [[ ${symx} = "${symy}" ]]; then
+
+ case ${symx} in
+ "creat64") sym="creat";;
+ "fallocate64") sym="fallocate";;
+ "ftruncate64") sym="ftruncate";;
+ "lseek64") sym="lseek";;
+ "preadv64") sym="preadv";;
+ "pwritev64") sym="pwritev";;
+ "pread64") sym="pread";;
+ "pwrite64") sym="pwrite";;
+ "openat64") sym="openat";;
+ "readdir64") sym="readdir";;
+ "truncate64") sym="truncate";;
+ "__statvfs64") sym="statvfs";;
+ "__fxstat64") sym="fstat";;
+ "__fxstatat64") sym="fstatat";;
+ "__lxstat64") sym="lstat";;
+ "__xmknod") sym="mknod";;
+ "__xstat64") sym="stat";;
+ *) sym=${symx};;
+ esac
+
+ echo "${1} should call sys_${sym}, not ${sym}" >&2
+ retval=1
+ fi
+
+ done
+
+ for symx in ${syscalls32}; do
+
+ if [[ ${symx} = "${symy}" ]]; then
+
+ echo "${1} was not compiled with -D_FILE_OFFSET_BITS=64" >&2
+ retval=1
+ fi
+ done
+
+ symy_glibc=$(echo "${symy}" | sed -e "s/@@GLIBC.*//g")
+ # Eliminate false positives, check if we have a GLIBC symbol in 'y'
+ if [[ ${symy} != "${symy_glibc}" ]]; then
+ for symx in ${glibccalls}; do
+
+ if [[ ${symx} = "${symy_glibc}" ]]; then
+
+ case ${symx} in
+ "tmpfile") alt="mkstemp";;
+ *) alt="none";;
+ esac
+
+ if [[ ${alt} = "none" ]]; then
+ echo "${1} should not call ${symy_glibc}";
+ else
+ echo "${1} should use ${alt} instead of ${symy_glibc}" >&2;
+ fi
+
+ retval=1
+ fi
+ done
+ fi
+
+ done
+
+ if [ ${retval} = 1 ]; then
+ touch ./.symbol-check-errors
+ fi
+ exit ${retval}
+}
+
+main "$@"
diff --git a/tests/basic/trace.t b/tests/basic/trace.t
new file mode 100755
index 00000000000..01e7c9e0a25
--- /dev/null
+++ b/tests/basic/trace.t
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST mkdir -p $B0/single-brick
+cat > $B0/template.vol <<EOF
+volume posix
+ type storage/posix
+ option directory $B0/single-brick
+end-volume
+
+volume trace
+ type debug/trace
+ option log-file yes
+ option log-history yes
+ subvolumes posix
+end-volume
+EOF
+
+TEST glusterfs -f $B0/template.vol $M0
+
+TEST $(dirname $0)/rpc-coverage.sh --no-locks $M0
+
+# Take statedump to get maximum code coverage
+pid=$(ps auxww | grep glusterfs | grep -E "template.vol" | awk '{print $2}' | head -1)
+
+TEST generate_statedump $pid
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+# Now, use the glusterd way of enabling trace
+TEST glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6};
+
+TEST $CLI volume set $V0 debug.trace marker
+TEST $CLI volume set $V0 debug.log-file yes
+#TEST $CLI volume set $V0 debug.log-history yes
+
+TEST $CLI volume start $V0;
+
+TEST $GFS -s $H0 --volfile-id $V0 $M1;
+
+TEST $(dirname $0)/rpc-coverage.sh --no-locks $M1
+cp $(dirname ${0})/gfapi/glfsxmp-coverage.c ./glfsxmp.c
+build_tester ./glfsxmp.c -lgfapi
+./glfsxmp $V0 $H0 > /dev/null
+cleanup_tester ./glfsxmp
+rm ./glfsxmp.c
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M1
+
+cleanup;
diff --git a/tests/basic/uss.t b/tests/basic/uss.t
new file mode 100644
index 00000000000..09dd00ef995
--- /dev/null
+++ b/tests/basic/uss.t
@@ -0,0 +1,421 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../snapshot.rc
+. $(dirname $0)/../fileio.rc
+. $(dirname $0)/../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+function check_readonly()
+{
+ $@ 2>&1 | grep -q 'Read-only file system'
+ return $?
+}
+
+function lookup()
+{
+ ls $1
+ if [ "$?" == "0" ]
+ then
+ echo "Y"
+ else
+ echo "N"
+ fi
+}
+
+cleanup;
+TESTS_EXPECTED_IN_LOOP=10
+
+TEST init_n_bricks 3;
+TEST setup_lvm 3;
+
+TEST glusterd;
+
+TEST pidof glusterd;
+
+TEST $CLI volume create $V0 $H0:$L1 $H0:$L2 $H0:$L3;
+
+TEST $CLI volume set $V0 nfs.disable false
+
+
+TEST $CLI volume start $V0;
+
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0;
+
+for i in {1..10} ; do echo "file" > $M0/file$i ; done
+
+# Create file and hard-links
+TEST touch $M0/f1
+TEST mkdir $M0/dir
+TEST ln $M0/f1 $M0/f2
+TEST ln $M0/f1 $M0/dir/f3
+
+TEST $CLI snapshot config activate-on-create enable
+TEST $CLI volume set $V0 features.uss enable;
+
+TEST ! $CLI snapshot create snap1 $V0 no-timestamp description "";
+TEST $CLI snapshot create snap1 $V0 no-timestamp;
+
+for i in {11..20} ; do echo "file" > $M0/file$i ; done
+
+TEST $CLI snapshot create snap2 $V0 no-timestamp;
+
+########### Test inode numbers ###########
+s1_f1_ino=$(STAT_INO $M0/.snaps/snap1/f1)
+TEST [ $s1_f1_ino != 0 ]
+
+# Inode number of f1 should be same as f2 f3 within snapshot
+EXPECT $s1_f1_ino STAT_INO $M0/.snaps/snap1/f2
+EXPECT $s1_f1_ino STAT_INO $M0/.snaps/snap1/dir/f3
+EXPECT $s1_f1_ino STAT_INO $M0/dir/.snaps/snap1/f3
+
+# Inode number of f1 in snap1 should be different from f1 in snap2
+tmp_ino=$(STAT_INO $M0/.snaps/snap2/f1)
+TEST [ $s1_f1_ino != $tmp_ino ]
+
+# Inode number of f1 in snap1 should be different from f1 in regular volume
+tmp_ino=$(STAT_INO $M0/f1)
+TEST [ $s1_f1_ino != $tmp_ino ]
+
+# Directory inode of snap1 should be different in each sub-dir
+s1_ino=$(STAT_INO $M0/.snaps/snap1)
+tmp_ino=$(STAT_INO $M0/dir/.snaps/snap1)
+TEST [ $s1_ino != $tmp_ino ]
+##########################################
+
+mkdir $M0/dir1;
+mkdir $M0/dir2;
+
+for i in {1..10} ; do echo "foo" > $M0/dir1/foo$i ; done
+for i in {1..10} ; do echo "foo" > $M0/dir2/foo$i ; done
+
+TEST $CLI snapshot create snap3 $V0 no-timestamp;
+
+for i in {11..20} ; do echo "foo" > $M0/dir1/foo$i ; done
+for i in {11..20} ; do echo "foo" > $M0/dir2/foo$i ; done
+
+TEST $CLI snapshot create snap4 $V0 no-timestamp;
+## Test that features.uss takes only options enable/disable and throw error for
+## any other argument.
+for i in {1..10}; do
+ RANDOM_STRING=$(uuidgen | tr -dc 'a-zA-Z' | head -c 8)
+ TEST_IN_LOOP ! $CLI volume set $V0 features.uss $RANDOM_STRING
+done
+
+## Test that features.snapshot-directory:
+## contains only '0-9a-z-_'
+# starts with dot (.)
+# value cannot exceed 255 characters
+## and throws error for any other argument.
+TEST ! $CLI volume set $V0 features.snapshot-directory a/b
+TEST ! $CLI volume set $V0 features.snapshot-directory snaps
+TEST ! $CLI volume set $V0 features.snapshot-directory -a
+TEST ! $CLI volume set $V0 features.snapshot-directory .
+TEST ! $CLI volume set $V0 features.snapshot-directory ..
+TEST ! $CLI volume set $V0 features.snapshot-directory .123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0;
+
+# test 15
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "4" count_snaps $M0
+
+NUM_SNAPS=$(ls $M0/.snaps | wc -l);
+
+TEST [ $NUM_SNAPS == 4 ]
+TEST ls $M0/.snaps/snap1;
+TEST ls $M0/.snaps/snap2;
+TEST ls $M0/.snaps/snap3;
+TEST ls $M0/.snaps/snap4;
+
+TEST ls $M0/.snaps/snap3/dir1;
+TEST ls $M0/.snaps/snap3/dir2;
+
+TEST ls $M0/.snaps/snap4/dir1;
+TEST ls $M0/.snaps/snap4/dir2;
+
+TEST ls $M0/dir1/.snaps/
+TEST ! ls $M0/dir1/.snaps/snap1;
+TEST ! ls $M0/dir2/.snaps/snap2;
+TEST ls $M0/dir1/.snaps/snap3;
+TEST ls $M0/dir2/.snaps/snap4;
+
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'r' $M0/.snaps/snap1/file1;
+TEST fd_cat $fd1
+
+# opening fd with in write mode for snapshot files should fail
+TEST fd2=`fd_available`
+TEST ! fd_open $fd1 'w' $M0/.snaps/snap1/file2;
+
+# lookup on .snaps in the snapshot world should fail
+TEST ! stat $M0/.snaps/snap1/.snaps
+
+# creating new entries in snapshots should fail
+TEST check_readonly mkdir $M0/.snaps/new
+TEST check_readonly touch $M0/.snaps/snap2/other;
+
+TEST fd3=`fd_available`
+TEST fd_open $fd3 'r' $M0/dir1/.snaps/snap3/foo1
+
+TEST fd_cat $fd3;
+
+TEST fd_close $fd1;
+TEST fd_close $fd2;
+TEST fd_close $fd3
+
+
+# similar tests on nfs mount
+##Wait for connection establishment between nfs server and brick process
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+#test 44
+TEST mount_nfs $H0:/$V0 $N0 nolock;
+
+NUM_SNAPS=$(ls $N0/.snaps | wc -l);
+
+TEST [ $NUM_SNAPS == 4 ];
+
+TEST stat $N0/.snaps/snap1;
+TEST stat $N0/.snaps/snap2;
+
+TEST ls -l $N0/.snaps;
+
+# readdir + lookup on each entry
+TEST ls -l $N0/.snaps/snap1;
+TEST ls -l $N0/.snaps/snap2;
+
+# readdir + access each entry by doing stat. If snapview-server has not
+# filled the fs instance and handle in the inode context of the entry as
+# part of readdirp, then when stat comes (i.e fop comes directly without
+# a previous lookup), snapview-server should do a lookup of the entry via
+# gfapi call and fill in the fs instance + handle information in the inode
+# context
+TEST ls $N0/.snaps/snap3/;
+TEST stat $N0/.snaps/snap3/dir1;
+TEST stat $N0/.snaps/snap3/dir2;
+
+TEST ls -l $N0/.snaps/snap3/dir1;
+TEST ls -l $N0/.snaps/snap3/dir2;
+TEST ls -l $N0/.snaps/snap4/dir1;
+TEST ls -l $N0/.snaps/snap4/dir2;
+
+TEST ! ls -l $N0/dir1/.snaps/snap1;
+TEST ! ls -l $N0/dir2/.snaps/snap2;
+TEST ls -l $N0/dir1/.snaps/snap3;
+TEST ls -l $N0/dir2/.snaps/snap4;
+
+
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'r' $N0/.snaps/snap1/file1;
+TEST fd_cat $fd1
+
+TEST fd2=`fd_available`
+TEST ! fd_open $fd1 'w' $N0/.snaps/snap1/file2;
+
+TEST ! stat $N0/.snaps/snap1/.stat
+
+TEST check_readonly mkdir $N0/.snaps/new
+
+TEST check_readonly touch $N0/.snaps/snap2/other;
+
+TEST fd3=`fd_available`
+TEST fd_open $fd3 'r' $N0/dir1/.snaps/snap3/foo1
+
+TEST fd_cat $fd3;
+
+
+TEST fd_close $fd1;
+TEST fd_close $fd2;
+TEST fd_close $fd3;
+
+# test 73
+TEST $CLI volume set $V0 "features.snapshot-directory" .history
+
+#snapd client might take fraction of time to compare the volfile from glusterd
+#hence a EXPECT_WITHIN is a better choice here
+EXPECT_WITHIN 2 "Y" lookup "$M0/.history";
+
+NUM_SNAPS=$(ls $M0/.history | wc -l);
+
+TEST [ $NUM_SNAPS == 4 ]
+
+TEST ls $M0/.history/snap1;
+TEST ls $M0/.history/snap2;
+TEST ls $M0/.history/snap3;
+TEST ls $M0/.history/snap4;
+
+TEST ls $M0/.history/snap3/dir1;
+TEST ls $M0/.history/snap3/dir2;
+
+TEST ls $M0/.history/snap4/dir1;
+TEST ls $M0/.history/snap4/dir2;
+
+TEST ls $M0/dir1/.history/
+TEST ! ls $M0/dir1/.history/snap1;
+TEST ! ls $M0/dir2/.history/snap2;
+TEST ls $M0/dir1/.history/snap3;
+TEST ls $M0/dir2/.history/snap4;
+
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'r' $M0/.history/snap1/file1;
+TEST fd_cat $fd1
+
+# opening fd with in write mode for snapshot files should fail
+TEST fd2=`fd_available`
+TEST ! fd_open $fd1 'w' $M0/.history/snap1/file2;
+
+# lookup on .history in the snapshot world should fail
+TEST ! stat $M0/.history/snap1/.history
+
+# creating new entries in snapshots should fail
+TEST check_readonly mkdir $M0/.history/new
+TEST check_readonly touch $M0/.history/snap2/other;
+
+TEST fd3=`fd_available`
+TEST fd_open $fd3 'r' $M0/dir1/.history/snap3/foo1
+
+TEST fd_cat $fd3;
+
+TEST fd_close $fd1;
+TEST fd_close $fd2;
+TEST fd_close $fd3
+
+
+# similar tests on nfs mount
+# test 103
+TEST ls $N0/.history;
+
+NUM_SNAPS=$(ls $N0/.history | wc -l);
+
+TEST [ $NUM_SNAPS == 4 ];
+
+TEST ls -l $N0/.history/snap1;
+TEST ls -l $N0/.history/snap2;
+TEST ls -l $N0/.history/snap3;
+TEST ls -l $N0/.history/snap4;
+
+TEST ls -l $N0/.history/snap3/dir1;
+TEST ls -l $N0/.history/snap3/dir2;
+
+TEST ls -l $N0/.history/snap4/dir1;
+TEST ls -l $N0/.history/snap4/dir2;
+
+TEST ! ls -l $N0/dir1/.history/snap1;
+TEST ! ls -l $N0/dir2/.history/snap2;
+TEST ls -l $N0/dir1/.history/snap3;
+TEST ls -l $N0/dir2/.history/snap4;
+
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'r' $N0/.history/snap1/file1;
+TEST fd_cat $fd1
+
+TEST fd2=`fd_available`
+TEST ! fd_open $fd1 'w' $N0/.history/snap1/file2;
+
+TEST ! stat $N0/.history/snap1/.stat
+
+TEST check_readonly mkdir $N0/.history/new
+
+TEST check_readonly touch $N0/.history/snap2/other;
+
+TEST fd3=`fd_available`
+TEST fd_open $fd3 'r' $N0/dir1/.history/snap3/foo1
+
+TEST fd_cat $fd3;
+
+TEST fd_close $fd1;
+TEST fd_close $fd2;
+TEST fd_close $fd3;
+
+## Before killing daemon to avoid deadlocks
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+#test 131
+TEST $CLI snapshot create snap5 $V0 no-timestamp
+TEST ls $M0/.history;
+
+function count_snaps
+{
+ local mount_point=$1;
+ local num_snaps;
+
+ num_snaps=$(ls $mount_point/.history | wc -l);
+
+ echo $num_snaps;
+}
+
+EXPECT_WITHIN 30 "5" count_snaps $M0;
+
+# deletion of a snapshot and creation of a new snapshot with same name
+# should not create problems. The data that was supposed to be present
+# in the deleted snapshot need not be present in the new snapshot just
+# because the name is same. Ex:
+# 1) Create a file "aaa"
+# 2) Create a snapshot snap6
+# 3) stat the file "aaa" in snap6 and it should succeed
+# 4) delete the file "aaa"
+# 5) Delete the snapshot snap6
+# 6) Create a snapshot snap6
+# 7) stat the file "aaa" in snap6 and it should fail now
+
+echo "aaa" > $M0/aaa;
+
+TEST $CLI snapshot create snap6 $V0 no-timestamp
+
+TEST ls $M0/.history;
+
+EXPECT_WITHIN 30 "6" count_snaps $M0;
+
+EXPECT_WITHIN 10 "Y" lookup $M0/.history/snap6/aaa
+
+TEST rm -f $M0/aaa;
+
+TEST $CLI snapshot delete snap6;
+
+# drop the caches so that, the dentry for "snap6" is
+# is forgotten from the client cache.
+drop_cache $M0
+
+EXPECT_WITHIN 30 "5" count_snaps $M0;
+
+# This should fail, as snap6 just got deleted.
+TEST ! stat $M0/.history/snap6
+
+TEST $CLI snapshot create snap6 $V0 no-timestamp
+
+TEST ls $M0/.history;
+
+EXPECT_WITHIN 30 "6" count_snaps $M0;
+
+TEST ls $M0/.history/snap6/;
+
+TEST ! stat $M0/.history/snap6/aaa;
+
+TEST stat $M0
+
+# done with the tests start cleaning up of things
+TEST $CLI volume set $V0 features.uss disable
+
+TEST $CLI snapshot delete snap6;
+
+TEST $CLI snapshot delete snap5;
+
+TEST $CLI snapshot delete snap4;
+
+TEST $CLI snapshot delete snap3;
+
+TEST $CLI snapshot delete snap2;
+
+TEST $CLI snapshot delete snap1;
+
+# nfs client has been already unmounted at line 333
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST $CLI volume stop $V0
+
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/basic/volfile-sanity.t b/tests/basic/volfile-sanity.t
new file mode 100644
index 00000000000..ef2f9344468
--- /dev/null
+++ b/tests/basic/volfile-sanity.t
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}1
+
+killall glusterd
+
+# Client by default tries to connect to port 24007
+# So, start server on that port, and you can see
+# client successfully working.
+TEST $GFS --xlator-option "${V0}-server.transport.socket.listen-port=24007" \
+ -f /var/lib/glusterd/vols/${V0}/${V0}.${H0}.*.vol
+TEST $GFS -f /var/lib/glusterd/vols/${V0}/${V0}.tcp-fuse.vol $M0
+
+TEST $(df -h $M0 | grep -q ${V0})
+TEST $(cat /proc/mounts | grep -q $M0)
+
+TEST ! stat $M0/newfile;
+TEST touch $M0/newfile;
+TEST rm $M0/newfile;
+
+cleanup;
diff --git a/tests/basic/volume-scale-shd-mux.t b/tests/basic/volume-scale-shd-mux.t
new file mode 100644
index 00000000000..102de22468e
--- /dev/null
+++ b/tests/basic/volume-scale-shd-mux.t
@@ -0,0 +1,116 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TESTS_EXPECTED_IN_LOOP=6
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2,3,4,5}
+TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+TEST $CLI volume set $V0 cluster.eager-lock off
+TEST $CLI volume set $V0 performance.flush-behind off
+TEST $CLI volume start $V0
+
+for i in $(seq 1 2); do
+ TEST $CLI volume create ${V0}_afr$i replica 3 $H0:$B0/${V0}_afr${i}{0,1,2,3,4,5}
+ TEST $CLI volume start ${V0}_afr$i
+ TEST $CLI volume create ${V0}_ec$i disperse 6 redundancy 2 $H0:$B0/${V0}_ec${i}{0,1,2,3,4,5}
+ TEST $CLI volume start ${V0}_ec$i
+done
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
+#Check the thread count become to number of volumes*number of ec subvolume (2*6=12)
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^12$" number_healer_threads_shd $V0 "ec_shd_index_healer"
+#Check the thread count become to number of volumes*number of afr subvolume (3*6=18)
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd $V0 "afr_shd_index_healer"
+
+TEST $CLI volume add-brick $V0 replica 3 $H0:$B0/${V0}{6,7,8};
+#Check the thread count become to number of volumes*number of afr subvolume plus 3 additional threads from newly added bricks (3*6+3=21)
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^21$" number_healer_threads_shd $V0 "afr_shd_index_healer"
+
+#Remove the brick and check the detach is successful
+$CLI volume remove-brick $V0 $H0:$B0/${V0}{6,7,8} force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd $V0 "afr_shd_index_healer"
+
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" number_healer_threads_shd $V0 "glusterfs_graph_cleanup"
+TEST $CLI volume add-brick ${V0}_ec1 $H0:$B0/${V0}_ec1_add{0,1,2,3,4,5};
+#Check the thread count become to number of volumes*number of ec subvolume plus 2 additional threads from newly added bricks (2*6+6=18)
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd $V0 "ec_shd_index_healer"
+
+#Remove the brick and check the detach is successful
+$CLI volume remove-brick ${V0}_ec1 $H0:$B0/${V0}_ec1_add{0,1,2,3,4,5} force
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^12$" number_healer_threads_shd $V0 "ec_shd_index_healer"
+
+
+for i in $(seq 1 2); do
+ TEST $CLI volume stop ${V0}_afr$i
+ TEST $CLI volume stop ${V0}_ec$i
+done
+
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^6$" number_healer_threads_shd $V0 "afr_shd_index_healer"
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}4
+
+TEST touch $M0/foo{1..100}
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^204$" get_pending_heal_count $V0
+
+TEST $CLI volume start ${V0} force
+
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+TEST rm -rf $M0/*
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+shd_pid=$(get_shd_mux_pid $V0)
+TEST $CLI volume create ${V0}_distribute1 $H0:$B0/${V0}_distribute10
+TEST $CLI volume start ${V0}_distribute1
+
+#Creating a non-replicate/non-ec volume should not have any effect in shd
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^6$" number_healer_threads_shd $V0 "afr_shd_index_healer"
+EXPECT "^${shd_pid}$" get_shd_mux_pid $V0
+
+TEST mkdir $B0/add/
+#Now convert the distributed volume to replicate
+TEST $CLI volume add-brick ${V0}_distribute1 replica 3 $H0:$B0/add/{2..3}
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^9$" number_healer_threads_shd $V0 "afr_shd_index_healer"
+
+#scale down the volume
+TEST $CLI volume remove-brick ${V0}_distribute1 replica 1 $H0:$B0/add/{2..3} force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^6$" number_healer_threads_shd $V0 "afr_shd_index_healer"
+
+#Before stopping the process, make sure there is no pending clenup threads hanging
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" number_healer_threads_shd $V0 "glusterfs_graph_cleanup"
+
+TEST $CLI volume stop ${V0}
+TEST $CLI volume delete ${V0}
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" shd_count
+
+TEST rm -rf $B0/add/2 $B0/add/3
+
+#Now convert the distributed volume back to replicate and make sure that a new shd is spawned
+TEST $CLI volume add-brick ${V0}_distribute1 replica 3 $H0:$B0/add/{2..3};
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
+EXPECT_WITHIN $HEAL_TIMEOUT "^3$" number_healer_threads_shd ${V0}_distribute1 "afr_shd_index_healer"
+
+#Now convert the replica volume to distribute again and make sure the shd is now stopped
+TEST $CLI volume remove-brick ${V0}_distribute1 replica 1 $H0:$B0/add/{2..3} force
+TEST rm -rf $B0/add/
+
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" shd_count
+
+cleanup
+
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=1708929
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=1708929
diff --git a/tests/basic/volume-snap-scheduler.t b/tests/basic/volume-snap-scheduler.t
new file mode 100644
index 00000000000..a638c5cc46a
--- /dev/null
+++ b/tests/basic/volume-snap-scheduler.t
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${GMV0}{1,2,3,4};
+TEST $CLI volume start $V0
+
+## Create, start and mount meta_volume as
+## snap_scheduler expects shared storage to be enabled.
+## This test is very basic in nature not creating any snapshot
+## and purpose is to validate snap scheduling commands.
+
+TEST $CLI volume create $META_VOL replica 3 $H0:$B0/${META_VOL}{1,2,3};
+TEST $CLI volume start $META_VOL
+TEST mkdir -p $META_MNT
+TEST glusterfs -s $H0 --volfile-id $META_VOL $META_MNT
+
+##function to check status
+function check_status_scheduler()
+{
+ local key=$1
+ snap_scheduler.py status | grep -F "$key" | wc -l
+}
+
+##Basic snap_scheduler command test init/enable/disable/list
+
+TEST snap_scheduler.py init
+
+TEST snap_scheduler.py enable
+
+EXPECT 1 check_status_scheduler "Enabled"
+
+TEST snap_scheduler.py disable
+
+EXPECT 1 check_status_scheduler "Disabled"
+
+TEST snap_scheduler.py list
+
+TEST $CLI volume stop $V0;
+
+TEST $CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/basic/volume-snapshot-clone.t b/tests/basic/volume-snapshot-clone.t
new file mode 100755
index 00000000000..e6da9d7ddca
--- /dev/null
+++ b/tests/basic/volume-snapshot-clone.t
@@ -0,0 +1,129 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../cluster.rc
+. $(dirname $0)/../snapshot.rc
+
+
+V1="patchy2"
+
+function create_volumes() {
+ $CLI_1 volume create $V0 $H1:$L1 &
+ PID_1=$!
+
+ $CLI_2 volume create $V1 $H2:$L2 $H3:$L3 &
+ PID_2=$!
+
+ wait $PID_1 $PID_2
+}
+
+function create_snapshots() {
+
+ $CLI_1 snapshot create $1 $2 no-timestamp&
+ PID_1=$!
+
+ wait $PID_1
+}
+
+
+
+function delete_snapshot() {
+ $CLI_1 snapshot delete $1 &
+ PID_1=$!
+
+ wait $PID_1
+}
+
+cleanup;
+
+TEST verify_lvm_version;
+#Create cluster with 3 nodes
+TEST launch_cluster 3;
+TEST setup_lvm 3
+
+TEST $CLI_1 peer probe $H2;
+TEST $CLI_1 peer probe $H3;
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count;
+
+create_volumes
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT 'Created' volinfo_field $V1 'Status';
+
+start_volumes 2
+EXPECT 'Started' volinfo_field $V0 'Status';
+EXPECT 'Started' volinfo_field $V1 'Status';
+
+TEST $CLI_1 snapshot config activate-on-create enable
+
+#Snapshot Operations
+create_snapshots ${V0}_snap ${V0};
+create_snapshots ${V1}_snap ${V1};
+
+EXPECT 'Started' snapshot_status ${V0}_snap;
+EXPECT 'Started' snapshot_status ${V1}_snap;
+
+sleep 5
+TEST $CLI_1 snapshot clone ${V0}_clone ${V0}_snap
+TEST $CLI_1 snapshot clone ${V1}_clone ${V1}_snap
+
+EXPECT 'Created' volinfo_field ${V0}_clone 'Status';
+EXPECT 'Created' volinfo_field ${V1}_clone 'Status';
+
+TEST $CLI_1 volume start ${V0}_clone force;
+TEST $CLI_1 volume start ${V1}_clone force;
+
+EXPECT 'Started' volinfo_field ${V0}_clone 'Status';
+EXPECT 'Started' volinfo_field ${V1}_clone 'Status';
+
+
+TEST glusterfs -s $H1 --volfile-id=/${V0}_clone $M0
+TEST glusterfs -s $H2 --volfile-id=/${V1}_clone $M1
+
+TEST touch $M0/file1
+TEST touch $M1/file1
+
+TEST echo "Hello world" $M0/file1
+TEST echo "Hello world" $M1/file1
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M1
+
+TEST kill_glusterd 2;
+sleep 15
+TEST $glusterd_2;
+sleep 15
+
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count;
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Started' volinfo_field ${V0}_clone 'Status';
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Started' volinfo_field ${V1}_clone 'Status';
+
+TEST $CLI_1 volume stop ${V0}_clone
+TEST $CLI_1 volume stop ${V1}_clone
+
+TEST $CLI_1 volume delete ${V0}_clone
+TEST $CLI_1 volume delete ${V1}_clone
+
+TEST $CLI_1 snapshot clone ${V0}_clone ${V0}_snap
+TEST $CLI_1 snapshot clone ${V1}_clone ${V1}_snap
+
+EXPECT 'Created' volinfo_field ${V0}_clone 'Status';
+EXPECT 'Created' volinfo_field ${V1}_clone 'Status';
+
+#Clean up
+stop_force_volumes 2
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT 'Stopped' volinfo_field $V0 'Status';
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT 'Stopped' volinfo_field $V1 'Status';
+
+TEST delete_snapshot ${V0}_snap
+TEST delete_snapshot ${V1}_snap
+
+TEST ! snapshot_exists 1 ${V0}_snap
+TEST ! snapshot_exists 1 ${V1}_snap
+
+delete_volumes 2
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "N" volume_exists $V0
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "N" volume_exists $V1
+
+cleanup;
diff --git a/tests/basic/volume-snapshot-xml.t b/tests/basic/volume-snapshot-xml.t
new file mode 100755
index 00000000000..ff63b54538d
--- /dev/null
+++ b/tests/basic/volume-snapshot-xml.t
@@ -0,0 +1,72 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../snapshot.rc
+
+cleanup;
+TEST verify_lvm_version;
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST setup_lvm 1
+
+TEST $CLI volume create $V0 $H0:$L1
+TEST $CLI volume start $V0
+
+# Snapshot config xmls
+EXPECT "enable" get-xml "snapshot config activate-on-create enable" "activateOnCreate"
+EXPECT "100" get-xml "snapshot config $V0 snap-max-hard-limit 100" "newHardLimit"
+EXPECT "70" get-xml "snapshot config snap-max-soft-limit 70" "newSoftLimit"
+EXPECT "enable" get-xml "snapshot config auto-delete enable" "autoDelete"
+
+# Snapshot create, activate, deactivate xmls
+EXPECT "snap1" get-xml "snapshot create snap1 $V0 no-timestamp" "name"
+EXPECT "snap1" get-xml "snapshot deactivate snap1" "name"
+EXPECT "snap1" get-xml "snapshot activate snap1" "name"
+EXPECT "snap2" get-xml "snapshot create snap2 $V0 no-timestamp" "name"
+
+# Snapshot info xmls
+EXPECT "2" get-xml "snapshot info" "count"
+EXPECT "Started" get-xml "snapshot info" "status"
+EXPECT "2" get-xml "snapshot info volume $V0" "count"
+EXPECT "Started" get-xml "snapshot info volume $V0" "status"
+EXPECT "1" get-xml "snapshot info snap1" "count"
+EXPECT "2" get-xml "snapshot info snap1" "snapCount"
+EXPECT "Started" get-xml "snapshot info snap1" "status"
+
+# Snapshot list xmls
+EXPECT "2" get-xml "snapshot list" "count"
+EXPECT "snap2" get-xml "snapshot list $V0" "snapshot"
+
+# Snapshot status xmls
+EXPECT "snap2" get-xml "snapshot status" "name"
+EXPECT "snap2" get-xml "snapshot deactivate snap2" "name"
+#XPECT "N/A" get-xml "snapshot status" "pid"
+EXPECT "snap1" get-xml "snapshot status snap1" "name"
+EXPECT "Yes" get-xml "snapshot status snap1" "brick_running"
+
+# Snapshot restore xmls
+TEST $CLI volume stop $V0
+EXPECT "snap2" get-xml "snapshot restore snap2" "name"
+EXPECT "30807" get-xml "snapshot restore snap2" "opErrno"
+EXPECT "0" get-xml "snapshot restore snap1" "opErrno"
+
+# Snapshot delete xmls
+TEST $CLI volume start $V0 force
+EXPECT "snap1" get-xml "snapshot create snap1 $V0 no-timestamp" "name"
+EXPECT "snap2" get-xml "snapshot create snap2 $V0 no-timestamp" "name"
+EXPECT "snap3" get-xml "snapshot create snap3 $V0 no-timestamp" "name"
+EXPECT "Success" get-xml "snapshot delete snap3" "status"
+EXPECT "Success" get-xml "snapshot delete all" "status"
+EXPECT "0" get-xml "snapshot list" "count"
+#XPECT "snap1" get-xml "snapshot create snap1 $V0 no-timestamp" "name"
+#XPECT "snap2" get-xml "snapshot create snap2 $V0 no-timestamp" "name"
+#XPECT "snap3" get-xml "snapshot create snap3 $V0 no-timestamp" "name"
+#XPECT "Success" get-xml "snapshot delete volume $V0" "status"
+#XPECT "0" get-xml "snapshot list" "count"
+
+# Snapshot clone xmls
+# Snapshot clone xml is broken. Once it is fixed it will be added here.
+
+cleanup;
diff --git a/tests/basic/volume-snapshot.t b/tests/basic/volume-snapshot.t
new file mode 100755
index 00000000000..dd938b4064a
--- /dev/null
+++ b/tests/basic/volume-snapshot.t
@@ -0,0 +1,172 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../cluster.rc
+. $(dirname $0)/../snapshot.rc
+
+
+V1="patchy2"
+
+function create_volumes() {
+ $CLI_1 volume create $V0 $H1:$L1 &
+ PID_1=$!
+
+ $CLI_2 volume create $V1 $H2:$L2 $H3:$L3 &
+ PID_2=$!
+
+ wait $PID_1 $PID_2
+}
+
+function create_snapshots() {
+ $CLI_1 snapshot create ${V0}_snap ${V0} no-timestamp &
+ PID_1=$!
+
+ $CLI_1 snapshot create ${V1}_snap ${V1} no-timestamp &
+ PID_2=$!
+
+ wait $PID_1 $PID_2
+}
+
+function create_snapshots_with_timestamp() {
+ $CLI_1 snapshot create ${V0}_snap1 ${V0}&
+ PID_1=$!
+ $CLI_1 snapshot create ${V1}_snap1 ${V1}&
+ PID_2=$!
+
+ wait $PID_1 $PID_2
+}
+
+
+function activate_snapshots() {
+ $CLI_1 snapshot activate ${V0}_snap &
+ PID_1=$!
+
+ $CLI_1 snapshot activate ${V1}_snap &
+ PID_2=$!
+
+ wait $PID_1 $PID_2
+}
+
+function deactivate_snapshots() {
+ $CLI_1 snapshot deactivate ${V0}_snap &
+ PID_1=$!
+
+ $CLI_1 snapshot deactivate ${V1}_snap &
+ PID_2=$!
+
+ wait $PID_1 $PID_2
+}
+
+function delete_snapshots() {
+ $CLI_1 snapshot delete $1 &
+ PID_1=$!
+
+ $CLI_1 snapshot delete $2 &
+ PID_2=$!
+
+ wait $PID_1 $PID_2
+}
+
+function restore_snapshots() {
+ $CLI_1 snapshot restore ${V0}_snap &
+ PID_1=$!
+
+ $CLI_1 snapshot restore ${V1}_snap &
+ PID_2=$!
+
+ wait $PID_1 $PID_2
+}
+cleanup;
+
+TEST verify_lvm_version;
+#Create cluster with 3 nodes
+TEST launch_cluster 3;
+TEST setup_lvm 3
+
+TEST $CLI_1 peer probe $H2;
+TEST $CLI_1 peer probe $H3;
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count;
+
+create_volumes
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT 'Created' volinfo_field $V1 'Status';
+
+start_volumes 2
+EXPECT 'Started' volinfo_field $V0 'Status';
+EXPECT 'Started' volinfo_field $V1 'Status';
+
+TEST $CLI_1 snapshot config activate-on-create enable
+
+#Snapshot Operations
+create_snapshots
+
+EXPECT 'Started' snapshot_status ${V0}_snap;
+EXPECT 'Started' snapshot_status ${V1}_snap;
+
+EXPECT '1' volinfo_field $V0 'Snapshot Count';
+EXPECT '1' volinfo_field $V1 'Snapshot Count';
+EXPECT "1" get-cmd-field-xml "volume info $V0" "snapshotCount"
+EXPECT "1" get-cmd-field-xml "volume info $V1" "snapshotCount"
+
+deactivate_snapshots
+
+EXPECT 'Stopped' snapshot_status ${V0}_snap;
+EXPECT 'Stopped' snapshot_status ${V1}_snap;
+
+activate_snapshots
+
+EXPECT 'Started' snapshot_status ${V0}_snap;
+EXPECT 'Started' snapshot_status ${V1}_snap;
+
+deactivate_snapshots
+activate_snapshots
+
+TEST snapshot_exists 1 ${V0}_snap
+TEST snapshot_exists 1 ${V1}_snap
+TEST $CLI_1 snapshot config $V0 snap-max-hard-limit 100
+TEST $CLI_1 snapshot config $V1 snap-max-hard-limit 100
+
+TEST glusterfs -s $H1 --volfile-id=/snaps/${V0}_snap/${V0} $M0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST glusterfs -s $H2 --volfile-id=/snaps/${V1}_snap/${V1} $M0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+#create timestamp appended snaps
+create_snapshots_with_timestamp;
+new_name1=`$CLI_1 snapshot list ${V0} | grep ${V0}_snap1`;
+new_name2=`$CLI_1 snapshot list ${V1} | grep ${V1}_snap1`;
+
+EXPECT '2' volinfo_field $V0 'Snapshot Count';
+EXPECT '2' volinfo_field $V1 'Snapshot Count';
+EXPECT "2" get-cmd-field-xml "volume info $V0" "snapshotCount"
+EXPECT "2" get-cmd-field-xml "volume info $V1" "snapshotCount"
+
+EXPECT_NOT "{V0}_snap1" echo $new_name1;
+EXPECT_NOT "{V1}_snap1" echo $new_name1;
+delete_snapshots $new_name1 $new_name2;
+
+EXPECT '1' volinfo_field $V0 'Snapshot Count';
+EXPECT '1' volinfo_field $V1 'Snapshot Count';
+EXPECT "1" get-cmd-field-xml "volume info $V0" "snapshotCount"
+EXPECT "1" get-cmd-field-xml "volume info $V1" "snapshotCount"
+
+#Clean up
+stop_force_volumes 2
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+EXPECT 'Stopped' volinfo_field $V1 'Status';
+
+restore_snapshots
+TEST ! snapshot_exists 1 ${V0}_snap
+TEST ! snapshot_exists 1 ${V1}_snap
+
+EXPECT '0' volinfo_field $V0 'Snapshot Count';
+EXPECT '0' volinfo_field $V1 'Snapshot Count';
+EXPECT "0" get-cmd-field-xml "volume info $V0" "snapshotCount"
+EXPECT "0" get-cmd-field-xml "volume info $V1" "snapshotCount"
+
+delete_volumes 2
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "N" volume_exists $V0
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "N" volume_exists $V1
+
+cleanup;
diff --git a/tests/basic/volume-status.t b/tests/basic/volume-status.t
new file mode 100644
index 00000000000..01d7ebf6c07
--- /dev/null
+++ b/tests/basic/volume-status.t
@@ -0,0 +1,114 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+function gluster_client_list_status () {
+ gluster volume status $V0 client-list | sed -n '/Name/','/total/'p | wc -l
+}
+
+function gluster_fd_status () {
+ gluster volume status $V0 fd | sed -n '/Brick :/ p' | wc -l
+}
+
+function gluster_inode_status () {
+ gluster volume status $V0 inode | sed -n '/Connection / p' | wc -l
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6};
+TEST $CLI volume set $V0 nfs.disable false
+
+TEST $CLI volume start $V0;
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" nfs_up_status
+
+## Mount FUSE
+TEST $GFS -s $H0 --volfile-id $V0 $M0;
+TEST touch $M0/file{1..20}
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "6" gluster_fd_status
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "768" gluster_inode_status
+
+##Disabling this test until the client-list command works for brick-multiplexing
+#EXPECT_WITHIN $PROCESS_UP_TIMEOUT "7" gluster_client_list_status
+
+##Wait for connection establishment between nfs server and brick process
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+
+## Mount NFS
+TEST mount_nfs $H0:/$V0 $N0 nolock;
+
+TEST $CLI volume status all
+TEST $CLI volume status $V0
+
+function test_nfs_cmds () {
+ local ret=0
+ declare -a nfs_cmds=("clients" "mem" "inode" "callpool")
+ for cmd in ${nfs_cmds[@]}; do
+ $CLI volume status $V0 nfs $cmd
+ (( ret += $? ))
+ $CLI volume status $V0 nfs $cmd --xml
+ (( ret += $? ))
+ done
+ return $ret
+}
+
+function test_shd_cmds () {
+ local ret=0
+ declare -a shd_cmds=("mem" "inode" "callpool")
+ for cmd in ${shd_cmds[@]}; do
+ $CLI volume status $V0 shd $cmd
+ (( ret += $? ))
+ $CLI volume status $V0 shd $cmd --xml
+ (( ret += $? ))
+ done
+ return $ret
+}
+
+function test_brick_cmds () {
+ local ret=0
+ declare -a cmds=("detail" "clients" "mem" "inode" "fd" "callpool")
+ for cmd in ${cmds[@]}; do
+ for i in {1..2}; do
+ $CLI volume status $V0 $H0:$B0/${V0}$i $cmd
+ (( ret += $? ))
+ $CLI volume status $V0 $H0:$B0/${V0}$i $cmd --xml
+ (( ret += $? ))
+ done
+ done
+ return $ret
+}
+
+function test_status_cmds () {
+ local ret=0
+ declare -a cmds=("detail" "clients" "mem" "inode" "fd" "callpool" "tasks" "client-list")
+ for cmd in ${cmds[@]}; do
+ $CLI volume status $V0 $cmd
+ (( ret += $? ))
+ $CLI volume status $V0 $cmd --xml
+ (( ret += $? ))
+ done
+ return $ret
+}
+
+TEST test_shd_cmds;
+TEST test_nfs_cmds;
+TEST test_brick_cmds;
+TEST test_status_cmds;
+
+
+## Before killing daemon to avoid deadlocks
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+cleanup;
diff --git a/tests/basic/volume.t b/tests/basic/volume.t
new file mode 100644
index 00000000000..27fe093d07d
--- /dev/null
+++ b/tests/basic/volume.t
@@ -0,0 +1,60 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6};
+
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT '6' brick_count $V0
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}{9,10,11};
+EXPECT '9' brick_count $V0
+
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}{1,2,3} force;
+EXPECT '6' brick_count $V0
+
+TEST $CLI volume top $V0 read-perf bs 4096 count 1000
+TEST $CLI volume top $V0 write-perf bs 1048576 count 2
+
+TEST touch $M0/foo
+
+# statedump path should be a directory, setting it to a file path should fail
+
+TEST ! $CLI v set $V0 server.statedump-path $M0/foo;
+EXPECT '/var/run/gluster' $CLI v get $V0 server.statedump-path
+
+#set the statedump path to an existing ditectory which should succeed
+TEST mkdir $D0/level;
+TEST $CLI v set $V0 server.statedump-path $D0/level
+EXPECT '/level' volinfo_field $V0 'server.statedump-path'
+
+ret=$(ls $D0/level | wc -l);
+TEST [ $ret == 0 ]
+TEST $CLI v statedump $V0;
+ret=$(ls $D0/level | wc -l);
+TEST ! [ $ret == 0 ]
+
+#set the statedump path to a non - existing directory which should fail
+TEST ! $CLI v set $V0 server.statedump-path /root/test
+EXPECT '/level' volinfo_field $V0 'server.statedump-path'
+
+TEST rm -rf $D0/level
+
+TEST $CLI volume stop $V0
+EXPECT 'Stopped' volinfo_field $V0 'Status'
+
+TEST $CLI volume delete $V0
+TEST ! $CLI volume info $V0
+
+cleanup;
diff --git a/tests/basic/xlator-pass-through-sanity.t b/tests/basic/xlator-pass-through-sanity.t
new file mode 100644
index 00000000000..e996be89260
--- /dev/null
+++ b/tests/basic/xlator-pass-through-sanity.t
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}
+TEST $CLI volume set $V0 performance.io-cache-pass-through enable;
+TEST $CLI volume start $V0;
+
+## Mount FUSE
+TEST $GFS -s $H0 --volfile-id $V0 $M1;
+
+# This test covers lookup, mkdir, mknod, symlink, link, rename,
+# create operations
+TEST $(dirname $0)/rpc-coverage.sh $M1
+
+cleanup;
diff --git a/tests/bitrot/br-signer-threads-config-1797869.t b/tests/bitrot/br-signer-threads-config-1797869.t
new file mode 100644
index 00000000000..657ef3eedaf
--- /dev/null
+++ b/tests/bitrot/br-signer-threads-config-1797869.t
@@ -0,0 +1,73 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../cluster.rc
+
+function get_bitd_count_1 {
+ ps auxww | grep glusterfs | grep bitd.pid | grep -v grep | grep $H1 | wc -l
+}
+
+function get_bitd_count_2 {
+ ps auxww | grep glusterfs | grep bitd.pid | grep -v grep | grep $H2 | wc -l
+}
+
+function get_bitd_pid_1 {
+ ps auxww | grep glusterfs | grep bitd.pid | grep -v grep | grep $H1 | awk '{print $2}'
+}
+
+function get_bitd_pid_2 {
+ ps auxww | grep glusterfs | grep bitd.pid | grep -v grep | grep $H2 | awk '{print $2}'
+}
+
+function get_signer_th_count_1 {
+ ps -eL | grep $(get_bitd_pid_1) | grep glfs_brpobj | wc -l
+}
+
+function get_signer_th_count_2 {
+ ps -eL | grep $(get_bitd_pid_2) | grep glfs_brpobj | wc -l
+}
+
+cleanup;
+
+TEST launch_cluster 2
+
+TEST $CLI_1 peer probe $H2;
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count;
+
+TEST $CLI_1 volume create $V0 $H1:$B1
+TEST $CLI_1 volume create $V1 $H2:$B2
+EXPECT 'Created' volinfo_field_1 $V0 'Status';
+EXPECT 'Created' volinfo_field_1 $V1 'Status';
+
+TEST $CLI_1 volume start $V0
+TEST $CLI_1 volume start $V1
+EXPECT 'Started' volinfo_field_1 $V0 'Status';
+EXPECT 'Started' volinfo_field_1 $V1 'Status';
+
+#Enable bitrot
+TEST $CLI_1 volume bitrot $V0 enable
+TEST $CLI_1 volume bitrot $V1 enable
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count_1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count_2
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "4" get_signer_th_count_1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "4" get_signer_th_count_2
+
+old_bitd_pid_1=$(get_bitd_pid_1)
+old_bitd_pid_2=$(get_bitd_pid_2)
+TEST $CLI_1 volume bitrot $V0 signer-threads 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_signer_th_count_1
+EXPECT_NOT "$old_bitd_pid_1" get_bitd_pid_1;
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "4" get_signer_th_count_2
+EXPECT "$old_bitd_pid_2" get_bitd_pid_2;
+
+old_bitd_pid_1=$(get_bitd_pid_1)
+old_bitd_pid_2=$(get_bitd_pid_2)
+TEST $CLI_1 volume bitrot $V1 signer-threads 2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" get_signer_th_count_2
+EXPECT_NOT "$old_bitd_pid_2" get_bitd_pid_2;
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_signer_th_count_1
+EXPECT "$old_bitd_pid_1" get_bitd_pid_1;
+
+cleanup;
diff --git a/tests/bitrot/br-state-check.t b/tests/bitrot/br-state-check.t
new file mode 100644
index 00000000000..2142275699e
--- /dev/null
+++ b/tests/bitrot/br-state-check.t
@@ -0,0 +1,83 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../nfs.rc
+
+cleanup;
+SCRIPT_TIMEOUT=350
+
+TEST glusterd
+TEST pidof glusterd
+
+## Create a distribute volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}1 $H0:$B0/${V0}2 $H0:$B0/${V0}3;
+TEST $CLI volume start $V0;
+
+## Enable bitrot on volume $V0
+TEST $CLI volume bitrot $V0 enable
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_scrubd_count
+
+## perform a series of scrub related state change tests. As of now, there'
+## no way to check if a given change has been correctly acknowledged by
+## the scrub process as there isn't an _interface_ to check scrub internal
+## state (yet). What's been verified here is scrub state machine execution
+## w.r.t. locking and faults.
+
+## 0x0: verify scrub rescheduling
+TEST $CLI volume bitrot $V0 scrub-frequency monthly
+TEST $CLI volume bitrot $V0 scrub-frequency daily
+TEST $CLI volume bitrot $V0 scrub-frequency hourly
+
+## 0x1: test reschedule after pause/resume
+TEST $CLI volume bitrot $V0 scrub pause
+TEST $CLI volume bitrot $V0 scrub-frequency daily
+TEST $CLI volume bitrot $V0 scrub resume
+
+## 0x2: test reschedule w/ an offline brick
+TEST kill_brick $V0 $H0 $B0/${V0}1
+
+TEST $CLI volume bitrot $V0 scrub-frequency hourly
+TEST $CLI volume bitrot $V0 scrub-throttle aggressive
+
+## 0x3: test pause/resume w/ an offline brick
+TEST $CLI volume bitrot $V0 scrub pause
+TEST $CLI volume bitrot $V0 scrub-frequency monthly
+TEST $CLI volume bitrot $V0 scrub resume
+
+## 0x4: test "start" from a paused scrub state
+
+TEST $CLI volume bitrot $V0 scrub pause
+TEST $CLI volume start $V0 force
+
+## 0x4a: try pausing an already paused scrub
+TEST ! $CLI volume bitrot $V0 scrub pause
+
+## 0x4b: perform configuration changes
+TEST $CLI volume bitrot $V0 scrub-frequency hourly
+TEST $CLI volume bitrot $V0 scrub-throttle lazy
+TEST $CLI volume bitrot $V0 scrub resume
+
+## 0x5: test cleanup upon brick going offline
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST kill_brick $V0 $H0 $B0/${V0}3
+
+## 0x6: test cleanup upon brick going offline when srubber is paused
+## (initially paused and otherwise)
+
+## 0x6a: initially paused case
+TEST $CLI volume bitrot $V0 scrub pause
+TEST $CLI volume start $V0 force
+TEST kill_brick $V0 $H0 $B0/${V0}3
+TEST $CLI volume bitrot $V0 scrub resume
+
+## 0x6b: paused _after_ execution
+TEST $CLI volume start $V0 force
+TEST $CLI volume bitrot $V0 scrub pause
+TEST kill_brick $V0 $H0 $B0/${V0}2
+
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=1332473
diff --git a/tests/bitrot/br-stub.c b/tests/bitrot/br-stub.c
new file mode 100644
index 00000000000..1111f710f59
--- /dev/null
+++ b/tests/bitrot/br-stub.c
@@ -0,0 +1,195 @@
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/xattr.h>
+#include <errno.h>
+
+#include "bit-rot-object-version.h"
+
+/* NOTE: no size discovery */
+int
+brstub_validate_version(char *bpath, unsigned long version)
+{
+ int ret = 0;
+ int match = 0;
+ size_t xsize = 0;
+ br_version_t *xv = NULL;
+
+ xsize = sizeof(br_version_t);
+
+ xv = calloc(1, xsize);
+ if (!xv) {
+ match = -1;
+ goto err;
+ }
+
+ ret = getxattr(bpath, "trusted.bit-rot.version", xv, xsize);
+ if (ret < 0) {
+ if (errno == ENODATA)
+ match = -2;
+ goto err;
+ }
+
+ if (xv->ongoingversion != version) {
+ match = -3;
+ fprintf(stderr, "ongoingversion: %lu\n", xv->ongoingversion);
+ }
+ free(xv);
+
+err:
+ return match;
+}
+
+int
+brstub_write_validation(char *filp, char *bpath, unsigned long startversion)
+{
+ int fd1 = 0;
+ int fd2 = 0;
+ int ret = 0;
+ char *string = "string\n";
+
+ /* read only check */
+ fd1 = open(filp, O_RDONLY);
+ if (fd1 < 0)
+ goto err;
+ close(fd1);
+
+ ret = brstub_validate_version(bpath, startversion);
+ if (ret != -2)
+ goto err;
+
+ /* single open (write/) check */
+ fd1 = open(filp, O_RDWR);
+ if (fd1 < 0)
+ goto err;
+
+ ret = write(fd1, string, strlen(string));
+ if (ret <= 0)
+ goto err;
+ /**
+ * Fsync is done so that the write call has properly reached the
+ * disk. For fuse mounts write-behind xlator would have held the
+ * writes with itself and for nfs, client would have held the
+ * write in its cache. So write fop would not have triggered the
+ * versioning as it would have not reached the bit-rot-stub.
+ */
+ fsync(fd1);
+ ret = brstub_validate_version(bpath, startversion);
+ if (ret != 0)
+ goto err;
+ ret = write(fd1, string, strlen(string));
+ if (ret <= 0)
+ goto err;
+ fsync(fd1); /* let it reach the disk */
+
+ ret = brstub_validate_version(bpath, startversion);
+ if (ret != 0)
+ goto err;
+
+ close(fd1);
+
+ /**
+ * Well, this is not a _real_ test per se . For this test to pass
+ * the inode should not get a forget() in the interim. Therefore,
+ * perform this test asap.
+ */
+
+ /* multi open (write/) check */
+ fd1 = open(filp, O_RDWR);
+ if (fd1 < 0)
+ goto err;
+ fd2 = open(filp, O_WRONLY);
+ if (fd1 < 0)
+ goto err;
+
+ ret = write(fd1, string, strlen(string));
+ if (ret <= 0)
+ goto err;
+
+ ret = write(fd2, string, strlen(string));
+ if (ret <= 0)
+ goto err;
+
+ /* probably do a syncfs() */
+ fsync(fd1);
+ fsync(fd2);
+
+ close(fd1);
+ close(fd2);
+
+ /**
+ * incremented once per write()/write().../close()/close() sequence
+ */
+ ret = brstub_validate_version(bpath, startversion);
+ if (ret != 0)
+ goto err;
+
+ return 0;
+
+err:
+ return -1;
+}
+
+int
+brstub_new_object_validate(char *filp, char *brick)
+{
+ int ret = 0;
+ char *fname = NULL;
+ char bpath[PATH_MAX] = {
+ 0,
+ };
+
+ fname = basename(filp);
+ if (!fname)
+ goto err;
+
+ (void)snprintf(bpath, PATH_MAX, "%s/%s", brick, fname);
+
+ printf("Validating initial version..\n");
+ ret = brstub_validate_version(bpath, 2);
+ if (ret != -2) /* version _should_ be missing */
+ goto err;
+
+ printf("Validating version on modifications..\n");
+ ret = brstub_write_validation(filp, bpath, 2);
+ if (ret < 0)
+ goto err;
+
+ return 0;
+
+err:
+ return -1;
+}
+
+int
+main(int argc, char **argv)
+{
+ int ret = 0;
+ char *filp = NULL;
+ char *brick = NULL;
+
+ if (argc != 3) {
+ printf("Usage: %s <path> <brick>\n", argv[0]);
+ goto err;
+ }
+
+ filp = argv[1];
+ brick = argv[2];
+
+ printf("Validating object version [%s]\n", filp);
+ ret = brstub_new_object_validate(filp, brick);
+ if (ret < 0)
+ goto err;
+
+ return 0;
+
+err:
+ return -1;
+}
diff --git a/tests/bitrot/br-stub.t b/tests/bitrot/br-stub.t
new file mode 100644
index 00000000000..cc0319afac9
--- /dev/null
+++ b/tests/bitrot/br-stub.t
@@ -0,0 +1,66 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+STUB_SOURCE=$(dirname $0)/br-stub.c
+STUB_EXEC=$(dirname $0)/br-stub
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+## Create a distribute volume (B=2)
+TEST $CLI volume create $V0 $H0:$B0/${V0}1 $H0:$B0/${V0}2;
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT '2' brick_count $V0
+TEST $CLI volume set $V0 nfs.disable false
+
+## Turn off write-behind (write-behind clubs writes together)
+TEST $CLI volume set $V0 performance.write-behind off
+#EXPECT 'off' volinfo_field $V0 'performance.open-behind'
+
+## Start the volume
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## enable bitrot
+TEST $CLI volume bitrot $V0 enable;
+
+## Wait for gluster nfs to come up
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available
+
+## Mount the volume
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0;
+TEST mount_nfs $H0:/$V0 $N0 nolock;
+
+## Build stub C source
+build_tester $STUB_SOURCE -o $STUB_EXEC -I$(dirname $0)/../../xlators/features/bit-rot/src/stub
+TEST [ -e $STUB_EXEC ]
+
+## create & check version
+fname="$M0/filezero"
+touch $fname;
+backpath=$(get_backend_paths $fname)
+
+TEST $STUB_EXEC $fname $(dirname $backpath)
+
+rm -f $fname;
+
+## test nfs
+fname="$N0/filezero"
+touch $fname; # backpath remains same..
+
+TEST $STUB_EXEC $fname $(dirname $backpath)
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+##cleanups..
+rm -f $STUB_EXEC
+
+cleanup;
diff --git a/tests/bitrot/bug-1207627-bitrot-scrub-status.t b/tests/bitrot/bug-1207627-bitrot-scrub-status.t
new file mode 100644
index 00000000000..a361986fdaf
--- /dev/null
+++ b/tests/bitrot/bug-1207627-bitrot-scrub-status.t
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+## Test case for bitrot scrub status BZ:1207627
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+## Start glusterd
+TEST glusterd;
+TEST pidof glusterd;
+
+## Lets create and start the volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}1
+TEST $CLI volume start $V0
+
+## Enable bitrot for volume $V0
+TEST $CLI volume bitrot $V0 enable
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count
+
+## Setting scrubber frequency daily
+TEST $CLI volume bitrot $V0 scrub-frequency hourly
+
+## Setting scrubber throttle value lazy
+TEST $CLI volume bitrot $V0 scrub-throttle lazy
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Active' scrub_status $V0 'State of scrub'
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'lazy' scrub_status $V0 'Scrub impact'
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'hourly' scrub_status $V0 'Scrub frequency'
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '/var/log/glusterfs/bitd.log' scrub_status $V0 'Bitrot error log location'
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '/var/log/glusterfs/scrub.log' scrub_status $V0 'Scrubber error log location'
+
+## Set expiry-timeout to 1 sec
+TEST $CLI volume set $V0 features.expiry-time 1
+
+##Mount $V0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+
+#Create sample file
+TEST `echo "1234" > $M0/FILE1`
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.bit-rot.signature' check_for_xattr 'trusted.bit-rot.signature' "/$B0/${V0}1/FILE1"
+
+##Corrupt the file
+TEST `echo "corrupt" >> /$B0/${V0}1/FILE1`
+
+## Ondemand scrub
+TEST $CLI volume bitrot $V0 scrub ondemand
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.bit-rot.bad-file' check_for_xattr 'trusted.bit-rot.bad-file' "/$B0/${V0}1/FILE1"
+
+cleanup;
diff --git a/tests/bitrot/bug-1221914.t b/tests/bitrot/bug-1221914.t
new file mode 100644
index 00000000000..7f6c10c50df
--- /dev/null
+++ b/tests/bitrot/bug-1221914.t
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../nfs.rc
+
+STUB_SOURCE=$(dirname $0)/br-stub.c
+STUB_EXEC=$(dirname $0)/br-stub
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+## Create a distribute volume (B=2)
+TEST $CLI volume create $V0 $H0:$B0/${V0}1 $H0:$B0/${V0}2;
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT '2' brick_count $V0
+
+## Turn off write-behind (write-behind clubs writes together)
+TEST $CLI volume set $V0 performance.write-behind off
+
+## Start the volume
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Enable bitrot
+TEST $CLI volume bitrot $V0 enable;
+
+## Mount the volume
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0;
+
+## Build stub C source
+build_tester $STUB_SOURCE -o $STUB_EXEC -I$(dirname $0)/../../xlators/features/bit-rot/src/stub
+TEST [ -e $STUB_EXEC ]
+
+## create & check version
+fname="$M0/filezero"
+$PYTHON -c "import os,stat; os.mknod('${fname}', stat.S_IFREG | stat.S_IRUSR | stat.S_IWUSR | stat.S_IROTH | stat.S_IRGRP)"
+
+backpath=$(get_backend_paths $fname)
+
+TEST $STUB_EXEC $fname $(dirname $backpath)
+
+rm -f $fname;
+
+##cleanups..
+rm -f $STUB_EXEC
+
+cleanup;
diff --git a/tests/bitrot/bug-1244613.t b/tests/bitrot/bug-1244613.t
new file mode 100644
index 00000000000..57b86a94ac0
--- /dev/null
+++ b/tests/bitrot/bug-1244613.t
@@ -0,0 +1,95 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../nfs.rc
+. $(dirname $0)/../fileio.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+TESTS_EXPECTED_IN_LOOP=16
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+TEST $CLI volume set $V0 nfs.disable false
+
+# The test makes use of inode-lru-limit to hit a scenario, where we
+# find an inode whose ancestry is not there. Following is the
+# hypothesis (which is confirmed by seeing logs indicating that
+# codepath has been executed, but not through a good understanding of
+# NFS internals).
+
+# At the end of an fop, the reference count of an inode would be
+# zero. The inode (and its ancestry) persists in memory only
+# because of non-zero lookup count. These looked up inodes are put
+# in an lru queue of size 1 (here). So, there can be at most one
+# such inode in memory.
+
+# NFS Server makes use of anonymous fds. So, if it cannot find
+# valid fd, it does a nameless lookup. This gives us an inode
+# whose ancestry is NULL. When a write happens on this inode,
+# quota-enforcer/marker finds a NULL ancestry and asks
+# storage/posix to build it.
+
+TEST $CLI volume set $V0 network.inode-lru-limit 1
+TEST $CLI volume set $V0 performance.nfs.write-behind off
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Enable bitrot
+TEST $CLI volume bitrot $V0 enable;
+
+## Wait for gluster nfs to come up
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available
+
+TEST mount_nfs $H0:/$V0 $N0;
+deep=/0/1/2/3/4/5/6/7/8/9
+TEST mkdir -p $N0/$deep
+
+TEST touch $N0/$deep/file1 $N0/$deep/file2 $N0/$deep/file3 $N0/$deep/file4
+
+TEST fd_open 3 'w' "$N0/$deep/file1"
+TEST fd_open 4 'w' "$N0/$deep/file2"
+TEST fd_open 5 'w' "$N0/$deep/file3"
+TEST fd_open 6 'w' "$N0/$deep/file4"
+
+# consume all quota
+echo "Hello" > $N0/$deep/new_file_1
+echo "World" >> $N0/$deep/new_file_1
+echo 1 >> $N0/$deep/new_file_1
+echo 2 >> $N0/$deep/new_file_1
+
+
+# At the end of each fop in server, reference count of the
+# inode associated with each of the file above drops to zero and hence
+# put into lru queue. Since lru-limit is set to 1, an fop next file
+# will displace the current inode from itable. This will ensure that
+# when writes happens on same fd, fd resolution results in
+# nameless lookup from server and encounters an fd
+# associated with an inode whose parent is not present in itable.
+
+for j in $(seq 1 2); do
+ for i in $(seq 3 6); do
+ TEST_IN_LOOP fd_write $i "content"
+ TEST_IN_LOOP sync
+ done
+done
+
+exec 3>&-
+exec 4>&-
+exec 5>&-
+exec 6>&-
+
+$CLI volume statedump $V0 all
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+TEST $CLI volume stop $V0
+
+cleanup;
diff --git a/tests/bitrot/bug-1294786.t b/tests/bitrot/bug-1294786.t
new file mode 100644
index 00000000000..5b4b6ddb4d3
--- /dev/null
+++ b/tests/bitrot/bug-1294786.t
@@ -0,0 +1,95 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../cluster.rc
+
+function get_bitd_count_1 {
+ ps auxww | grep glusterfs | grep bitd.pid | grep -v grep | grep $H1 | wc -l
+}
+
+function get_bitd_count_2 {
+ ps auxww | grep glusterfs | grep bitd.pid | grep -v grep | grep $H2 | wc -l
+}
+
+function get_node_uuid {
+ getfattr -n trusted.glusterfs.node-uuid --only-values $M0/FILE1 2>/dev/null
+}
+
+cleanup;
+
+TEST launch_cluster 2
+
+TEST $CLI_1 peer probe $H2;
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count;
+
+TEST $CLI_1 volume create $V0 replica 2 $H1:$B1 $H2:$B2
+EXPECT 'Created' volinfo_field_1 $V0 'Status';
+
+TEST $CLI_1 volume start $V0
+EXPECT 'Started' volinfo_field_1 $V0 'Status';
+
+uuid1=$($CLI_1 system:: uuid get | awk '{print $2}')
+uuid2=$($CLI_2 system:: uuid get | awk '{print $2}')
+
+##Mount $V0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H1 $M0
+
+#Enable bitrot
+TEST $CLI_1 volume bitrot $V0 enable
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count_1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count_2
+
+#Create sample file
+TEST `echo "1234" > $M0/FILE1`
+TEST `echo "5678" > $M0/FILE2`
+gfid1=$(getfattr -n glusterfs.gfid.string --only-values $M0/FILE1)
+gfid2=$(getfattr -n glusterfs.gfid.string --only-values $M0/FILE2)
+
+EXPECT "$uuid1" get_node_uuid;
+
+#Corrupt file from back-end
+TEST stat $B1/FILE1
+TEST stat $B1/FILE2
+echo "Corrupted data" >> $B1/FILE1
+echo "Corrupted data" >> $B1/FILE2
+#Manually set bad-file xattr since we can't wait for an hour for scrubber.
+TEST setfattr -n trusted.bit-rot.bad-file -v 0x3100 $B1/FILE1
+TEST setfattr -n trusted.bit-rot.bad-file -v 0x3100 $B1/FILE2
+TEST touch "$B1/.glusterfs/quarantine/$gfid1"
+TEST chmod 000 "$B1/.glusterfs/quarantine/$gfid1"
+TEST touch "$B1/.glusterfs/quarantine/$gfid2"
+TEST chmod 000 "$B1/.glusterfs/quarantine/$gfid2"
+EXPECT "4" get_quarantine_count "$B1";
+
+TEST $CLI_1 volume stop $V0
+TEST $CLI_1 volume start $V0
+EXPECT 'Started' volinfo_field_1 $V0 'Status';
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H1 $B1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H2 $B2
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count_1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count_2
+#Trigger lookup so that bitrot xlator marks file as bad in its inode context.
+TEST stat $M0/FILE1
+TEST stat $M0/FILE2
+
+EXPECT "$uuid2" get_node_uuid;
+
+#BUG 1308961
+#Remove bad files from mount, it should be removed from quarantine directory.
+TEST rm -f $M0/FILE1
+TEST ! stat "$B1/.glusterfs/quarantine/$gfid1"
+
+#BUG 1308961
+#Set network.inode-lru-limit to 5 and exceed the limit by creating 10 other files.
+#The bad entry from quarantine directory should not be removed.
+TEST $CLI_1 volume set $V0 network.inode-lru-limit 5
+for i in {1..10}
+do
+ echo "1234" > $M0/file_$i
+done
+TEST stat "$B1/.glusterfs/quarantine/$gfid2"
+
+cleanup;
diff --git a/tests/bitrot/bug-1373520.t b/tests/bitrot/bug-1373520.t
new file mode 100644
index 00000000000..6af5124e86e
--- /dev/null
+++ b/tests/bitrot/bug-1373520.t
@@ -0,0 +1,71 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+#Create a disperse volume
+TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "6" online_brick_count
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Started' volinfo_field $V0 'Status'
+
+#Disable self heal daemon as it races in this test with lookup on volume
+#stop and start.
+$CLI volume set $V0 self-heal-daemon off
+
+#Disable few perf xlators to get the first lookup on the brick
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.force-readdirp off
+TEST $CLI volume set $V0 dht.force-readdirp off
+
+#Mount the volume
+TEST $GFS -s $H0 --use-readdirp=no --attribute-timeout=0 --entry-timeout=0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+
+#Enable bitrot
+TEST $CLI volume bitrot $V0 enable
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count
+
+#Create sample file
+TEST `echo "1234" > $M0/FILE1`
+#Create hardlink
+TEST `ln $M0/FILE1 $M0/HL_FILE1`
+
+#Corrupt file from back-end
+TEST stat $B0/${V0}5/FILE1
+SIZE=$(stat -c %s $B0/${V0}5/FILE1)
+echo "Corrupted data" >> $B0/${V0}5/FILE1
+gfid1=$(getfattr -n glusterfs.gfid.string --only-values $M0/FILE1)
+
+#Manually set bad-file xattr
+TEST setfattr -n trusted.bit-rot.bad-file -v 0x3100 $B0/${V0}5/FILE1
+TEST touch "$B0/${V0}5/.glusterfs/quarantine/$gfid1"
+TEST chmod 000 "$B0/${V0}5/.glusterfs/quarantine/$gfid1"
+EXPECT "3" get_quarantine_count "$B0/${V0}5";
+
+TEST $CLI volume stop $V0
+TEST $CLI volume start $V0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count
+
+#Delete file and all links from backend
+TEST rm -rf $(find $B0/${V0}5 -inum $(stat -c %i $B0/${V0}5/FILE1))
+
+#New mount for recovery
+TEST $GFS -s $H0 --use-readdirp=no --attribute-timeout=0 --entry-timeout=0 --volfile-id $V0 $M1
+
+$CLI volume set $V0 self-heal-daemon on
+TEST $CLI volume heal $V0
+
+#Access files
+TEST cat $M1/FILE1
+EXPECT_WITHIN $HEAL_TIMEOUT "$SIZE" path_size $B0/${V0}5/FILE1
+TEST cat $M1/HL_FILE1
+EXPECT_WITHIN $HEAL_TIMEOUT "$SIZE" path_size $B0/${V0}5/HL_FILE1
+
+cleanup;
diff --git a/tests/bitrot/bug-1700078.t b/tests/bitrot/bug-1700078.t
new file mode 100644
index 00000000000..f27374211fe
--- /dev/null
+++ b/tests/bitrot/bug-1700078.t
@@ -0,0 +1,87 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+## Start glusterd
+TEST glusterd;
+TEST pidof glusterd;
+
+## Lets create and start the volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}1
+TEST $CLI volume start $V0
+
+## Enable bitrot for volume $V0
+TEST $CLI volume bitrot $V0 enable
+
+## Turn off quick-read so that it wont cache the contents
+# of the file in lookup. For corrupted files, it might
+# end up in reads being served from the cache instead of
+# an error.
+TEST $CLI volume set $V0 performance.quick-read off
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Active' scrub_status $V0 'State of scrub'
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '/var/log/glusterfs/bitd.log' scrub_status $V0 'Bitrot error log location'
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '/var/log/glusterfs/scrub.log' scrub_status $V0 'Scrubber error log location'
+
+## Set expiry-timeout to 1 sec
+TEST $CLI volume set $V0 features.expiry-time 1
+
+##Mount $V0
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+
+## Turn off quick-read xlator so that, the contents are not served from the
+# quick-read cache.
+TEST $CLI volume set $V0 performance.quick-read off
+
+#Create sample file
+TEST `echo "1234" > $M0/FILE1`
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.bit-rot.signature' check_for_xattr 'trusted.bit-rot.signature' "/$B0/${V0}1/FILE1"
+
+##disable bitrot
+TEST $CLI volume bitrot $V0 disable
+
+## modify the file
+TEST `echo "write" >> $M0/FILE1`
+
+# unmount and remount when the file has to be accessed.
+# This is to ensure that, when the remount happens,
+# and the file is read, its contents are served from the
+# brick instead of cache.
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+##enable bitrot
+TEST $CLI volume bitrot $V0 enable
+
+# expiry time is set to 1 second. Hence sleep for 2 seconds for the
+# oneshot crawler to finish its crawling and sign the file properly.
+sleep 2
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Active' scrub_status $V0 'State of scrub'
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '/var/log/glusterfs/bitd.log' scrub_status $V0 'Bitrot error log location'
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '/var/log/glusterfs/scrub.log' scrub_status $V0 'Scrubber error log location'
+
+## Ondemand scrub
+TEST $CLI volume bitrot $V0 scrub ondemand
+
+# the scrub ondemand CLI command, just ensures that
+# the scrubber has received the ondemand scrub directive
+# and started. sleep for 2 seconds for scrubber to finish
+# crawling and marking file(s) as bad (if if finds that
+# corruption has happened) which are filesystem operations.
+sleep 2
+
+TEST ! getfattr -n 'trusted.bit-rot.bad-file' $B0/${V0}1/FILE1
+
+##Mount $V0
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+
+TEST cat $M0/FILE1
+
+cleanup;
diff --git a/tests/bitrot/bug-internal-xattrs-check-1243391.t b/tests/bitrot/bug-internal-xattrs-check-1243391.t
new file mode 100644
index 00000000000..bc9c12520b2
--- /dev/null
+++ b/tests/bitrot/bug-internal-xattrs-check-1243391.t
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+## Create a distribute volume (B=2)
+TEST $CLI volume create $V0 $H0:$B0/${V0}1 $H0:$B0/${V0}2;
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT '2' brick_count $V0
+
+
+## Start the volume
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Mount the volume
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0;
+
+echo "123" >> $M0/file;
+
+TEST ! setfattr -n "trusted.glusterfs.set-signature" -v "123" $M0/file;
+TEST ! setfattr -n "trusted.glusterfs.get-signature" -v "123" $M0/file;
+
+# sign xattr
+TEST ! setfattr -n "trusted.bit-rot.signature" -v "123" $M0/file;
+TEST ! setfattr -x "trusted.bit-rot.signature" $M0/file;
+
+# versioning xattr
+TEST ! setfattr -n "trusted.bit-rot.version" -v "123" $M0/file;
+TEST ! setfattr -x "trusted.bit-rot.version" $M0/file;
+
+# bad file xattr
+TEST ! setfattr -n "trusted.bit-rot.bad-file" -v "123" $M0/file;
+TEST ! setfattr -x "trusted.bit-rot.bad-file" $M0/file;
+
+cleanup;
diff --git a/tests/bugs/access-control/bug-1051896.c b/tests/bugs/access-control/bug-1051896.c
new file mode 100644
index 00000000000..31799d97a71
--- /dev/null
+++ b/tests/bugs/access-control/bug-1051896.c
@@ -0,0 +1,94 @@
+#include <sys/param.h>
+#include <sys/stat.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <grp.h>
+#include <string.h>
+#include <ctype.h>
+#include <errno.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <utime.h>
+#include <sys/acl.h>
+
+int
+do_setfacl(const char *path, const char *options, const char *textacl)
+{
+ int r;
+ int type;
+ acl_t acl;
+ int dob;
+ int dok;
+ int dom;
+ struct stat st;
+ char textmode[30];
+
+ r = 0;
+ dob = strchr(options, 'b') != (char *)NULL;
+ dok = strchr(options, 'k') != (char *)NULL;
+ dom = strchr(options, 'm') != (char *)NULL;
+ if ((dom && !textacl) ||
+ (!dom && (textacl || (!dok && !dob) || strchr(options, 'd')))) {
+ errno = EBADRQC; /* "bad request" */
+ r = -1;
+ } else {
+ if (dob || dok) {
+ r = acl_delete_def_file(path);
+ }
+ if (dob && !r) {
+ if (!stat(path, &st)) {
+ sprintf(textmode, "u::%c%c%c,g::%c%c%c,o::%c%c%c",
+ (st.st_mode & 0400 ? 'r' : '-'),
+ (st.st_mode & 0200 ? 'w' : '-'),
+ (st.st_mode & 0100 ? 'x' : '-'),
+ (st.st_mode & 0040 ? 'r' : '-'),
+ (st.st_mode & 0020 ? 'w' : '-'),
+ (st.st_mode & 0010 ? 'x' : '-'),
+ (st.st_mode & 004 ? 'r' : '-'),
+ (st.st_mode & 002 ? 'w' : '-'),
+ (st.st_mode & 001 ? 'x' : '-'));
+ acl = acl_from_text(textmode);
+ if (acl) {
+ r = acl_set_file(path, ACL_TYPE_ACCESS, acl);
+ acl_free(acl);
+ } else
+ r = -1;
+ } else
+ r = -1;
+ }
+ if (!r && dom) {
+ if (strchr(options, 'd'))
+ type = ACL_TYPE_DEFAULT;
+ else
+ type = ACL_TYPE_ACCESS;
+ acl = acl_from_text(textacl);
+ if (acl) {
+ r = acl_set_file(path, type, acl);
+ acl_free(acl);
+ } else
+ r = -1;
+ }
+ }
+ if (r)
+ r = -errno;
+ return r;
+}
+
+int
+main(int argc, char *argv[])
+{
+ int rc = 0;
+
+ if (argc != 4) {
+ fprintf(stderr, "usage: ./setfacl_test <path> <options> <textacl>\n");
+ return 0;
+ }
+ rc = do_setfacl(argv[1], argv[2], argv[3]);
+ if (rc != 0) {
+ fprintf(stderr, "do_setfacl failed: %s\n", strerror(errno));
+ return rc;
+ }
+ return 0;
+}
diff --git a/tests/bugs/access-control/bug-1051896.t b/tests/bugs/access-control/bug-1051896.t
new file mode 100644
index 00000000000..870ede7db21
--- /dev/null
+++ b/tests/bugs/access-control/bug-1051896.t
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+case $OSTYPE in
+NetBSD)
+ echo "Skip test on ACL which are not available on NetBSD" >&2
+ SKIP_TESTS
+ exit 0
+ ;;
+*)
+ ;;
+esac
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4};
+TEST $CLI volume start $V0;
+
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 --acl -s $H0 --volfile-id $V0 $M0;
+
+TEST touch $M0/file1;
+
+TEST $CC $(dirname $0)/bug-1051896.c -o $(dirname $0)/bug-1051896 -lacl
+TEST ! $(dirname $0)/bug-1051896 $M0/file1 m 'u::r,u::w,g::r--,o::r--'
+TEST rm -f $(dirname $0)/bug-1051896
+
+cleanup
diff --git a/tests/bugs/access-control/bug-1387241.c b/tests/bugs/access-control/bug-1387241.c
new file mode 100644
index 00000000000..e2e843a2fda
--- /dev/null
+++ b/tests/bugs/access-control/bug-1387241.c
@@ -0,0 +1,18 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+int
+main(int argc, char *argv[])
+{
+ int ret = EXIT_FAILURE;
+ int fd = open(argv[1], O_RDONLY | O_TRUNC);
+
+ if (fd) {
+ ret = EXIT_SUCCESS;
+ close(fd);
+ }
+
+ return ret;
+}
diff --git a/tests/bugs/access-control/bug-1387241.t b/tests/bugs/access-control/bug-1387241.t
new file mode 100644
index 00000000000..2efd80547d6
--- /dev/null
+++ b/tests/bugs/access-control/bug-1387241.t
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+case $OSTYPE in
+NetBSD)
+ echo "Skip test on ACL which are not available on NetBSD" >&2
+ SKIP_TESTS
+ exit 0
+ ;;
+*)
+ ;;
+esac
+
+#cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4};
+TEST $CLI volume start $V0;
+
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 --acl -s $H0 --volfile-id $V0 $M0;
+
+TEST touch $M0/file1;
+
+TEST $CC $(dirname $0)/bug-1387241.c -o $(dirname $0)/bug-1387241
+
+TEST $(dirname $0)/bug-1387241 $M0/file1
+
+TEST rm -f $(dirname $0)/bug-1387241
+
+#cleanup
diff --git a/tests/bugs/access-control/bug-887098-gmount-crash.t b/tests/bugs/access-control/bug-887098-gmount-crash.t
new file mode 100644
index 00000000000..ba9937bd5bf
--- /dev/null
+++ b/tests/bugs/access-control/bug-887098-gmount-crash.t
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4};
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST glusterfs -s $H0 --volfile-id=$V0 --acl $M0
+MOUNT_PID=$(get_mount_process_pid $V0)
+
+for i in {1..25};
+do
+ mkdir $M0/tmp_$i && cat /etc/hosts > $M0/tmp_$i/file
+ cp -RPp $M0/tmp_$i $M0/newtmp_$i && cat /etc/hosts > $M0/newtmp_$i/newfile
+done
+
+EXPECT "$MOUNT_PID" get_mount_process_pid $V0
+TEST rm -rf $M0/*
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/bugs/access-control/bug-958691.t b/tests/bugs/access-control/bug-958691.t
new file mode 100644
index 00000000000..8b70607bdbb
--- /dev/null
+++ b/tests/bugs/access-control/bug-958691.t
@@ -0,0 +1,52 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0;
+
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0;
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+TEST mount_nfs $H0:/$V0 $N0 nolock;
+
+# Tests for the fuse mount
+TEST mkdir $M0/dir;
+TEST chmod 1777 $M0/dir;
+TEST touch $M0/dir/file{1,2};
+
+TEST $CLI volume set $V0 server.root-squash enable;
+
+mv $M0/dir/file1 $M0/dir/file11 2>/dev/null;
+TEST [ $? -ne 0 ];
+
+TEST $CLI volume set $V0 server.root-squash disable;
+TEST rm -rf $M0/dir;
+
+sleep 1;
+
+# tests for nfs mount
+TEST mkdir $N0/dir;
+TEST chmod 1777 $N0/dir;
+TEST touch $N0/dir/file{1,2};
+
+TEST $CLI volume set $V0 server.root-squash enable;
+
+mv $N0/dir/file1 $N0/dir/file11 2>/dev/null;
+TEST [ $? -ne 0 ];
+
+TEST $CLI volume set $V0 server.root-squash disable;
+TEST rm -rf $N0/dir;
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/bugs/bitrot/1207029-bitrot-daemon-should-start-on-valid-node.t b/tests/bugs/bitrot/1207029-bitrot-daemon-should-start-on-valid-node.t
new file mode 100755
index 00000000000..691ebc303e4
--- /dev/null
+++ b/tests/bugs/bitrot/1207029-bitrot-daemon-should-start-on-valid-node.t
@@ -0,0 +1,57 @@
+#!/bin/bash
+
+## Test case for bitrot
+## bitd daemon should not start on the node which dont have any brick
+
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+cleanup;
+
+function get_bitd_count {
+ ps auxw | grep glusterfs | grep bitd.pid | grep -v grep | wc -l
+}
+
+## Start a 2 node virtual cluster
+TEST launch_cluster 2;
+
+## Peer probe server 2 from server 1 cli
+TEST $CLI_1 peer probe $H2;
+
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+## Creating a volume which is having brick only on one node
+TEST $CLI_1 volume create $V0 $H1:$B1/${V0}0 $H1:$B1/${V0}1
+
+## Start the volume
+TEST $CLI_1 volume start $V0
+
+## Enable bitrot on volume from 2nd node.
+TEST $CLI_2 volume bitrot $V0 enable
+
+## Bitd daemon should be running on the node which is having brick. Here node1
+## only have brick so bitrot daemon count value should be 1.
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count
+
+## Bitd daemon should not run on 2nd node and it should not create bitrot
+## volfile on this node. Below test case it to check whether its creating bitrot
+## volfile or not for 2nd node which dont have any brick.
+## Get current working directory of 2nd node which dont have any brick and do
+## stat on bitrot volfile.
+
+cur_wrk_dir2=$($CLI_2 system:: getwd)
+TEST ! stat $cur_wrk_dir2/bitd/bitd-server.vol
+
+
+## Bitd daemon should run on 1st node and it should create bitrot
+## volfile on this node. Below test case it to check whether its creating bitrot
+## volfile or not for 1st node which is having brick.
+## Get current working directory of 1st node which have brick and do
+## stat on bitrot volfile.
+
+cur_wrk_dir1=$($CLI_1 system:: getwd)
+TEST stat $cur_wrk_dir1/bitd/bitd-server.vol
+
+cleanup;
diff --git a/tests/bugs/bitrot/1209751-bitrot-scrub-tunable-reset.t b/tests/bugs/bitrot/1209751-bitrot-scrub-tunable-reset.t
new file mode 100644
index 00000000000..919ffc3ba62
--- /dev/null
+++ b/tests/bugs/bitrot/1209751-bitrot-scrub-tunable-reset.t
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+## Test case for bitrot
+## On restarting glusterd should not reset bitrot tunable value to default
+
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+cleanup;
+
+
+## Start glusterd
+TEST glusterd;
+TEST pidof glusterd;
+
+## Lets create and start the volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+TEST $CLI volume start $V0
+
+## Enable bitrot on volume $V0
+TEST $CLI volume bitrot $V0 enable
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count
+
+## Set bitrot scrub-throttle value to lazy
+TEST $CLI volume bitrot $V0 scrub-throttle lazy
+
+## Set bitrot scrub-frequency value to monthly
+TEST $CLI volume bitrot $V0 scrub-frequency monthly
+
+## Set bitrot scrubber to pause state
+TEST $CLI volume bitrot $V0 scrub pause
+
+## restart glusterd process
+pkill glusterd;
+TEST glusterd;
+TEST pidof glusterd;
+
+## All the bitrot scrub tunable value should come back again.
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Started' volinfo_field $V0 'Status';
+EXPECT 'lazy' volinfo_field $V0 'features.scrub-throttle';
+EXPECT 'monthly' volinfo_field $V0 'features.scrub-freq';
+EXPECT 'pause' volinfo_field $V0 'features.scrub';
+EXPECT 'on' volinfo_field $V0 'features.bitrot';
+
+cleanup;
diff --git a/tests/bugs/bitrot/1209752-volume-status-should-show-bitrot-scrub-info.t b/tests/bugs/bitrot/1209752-volume-status-should-show-bitrot-scrub-info.t
new file mode 100644
index 00000000000..6101910666c
--- /dev/null
+++ b/tests/bugs/bitrot/1209752-volume-status-should-show-bitrot-scrub-info.t
@@ -0,0 +1,70 @@
+#!/bin/bash
+
+## Test case for bitrot
+## gluster volume status command should show status of bitrot daemon
+
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+cleanup;
+
+
+## Start a 2 node virtual cluster
+TEST launch_cluster 2;
+
+## Peer probe server 2 from server 1 cli
+TEST $CLI_1 peer probe $H2;
+
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+## Lets create and start the volume
+TEST $CLI_1 volume create $V0 $H1:$B1/${V0}0 $H2:$B2/${V0}1
+TEST $CLI_1 volume start $V0
+
+## Enable bitrot on volume $V0
+TEST $CLI_1 volume bitrot $V0 enable
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" get_bitd_count
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" get_scrubd_count
+
+## From node 1 Gluster volume status command should show the status of bitrot
+## daemon of all the nodes. there are 2 nodes in a cluster with having brick
+## ${V0}1 and ${V0}2 . So there should be 2 bitrot daemon running.
+
+bitd=$($CLI_1 volume status $V0 | grep "Bitrot Daemon" | grep -v grep | wc -l)
+TEST [ "$bitd" -eq 2 ];
+
+
+
+## From node 2 Gluster volume status command should show the status of Scrubber
+## daemon of all the nodes. There are 2 nodes in a cluster with having brick
+## ${V0}1 and ${V0}2 . So there should be 2 Scrubber daemon running.
+
+scrub=$($CLI_2 volume status $V0 | grep "Scrubber Daemon" | grep -v grep | \
+ wc -l)
+TEST [ "$scrub" -eq 2 ];
+
+
+
+## From node 1 Gluster volume status command should print status of only
+## scrubber daemon. There should be total 2 scrubber daemon running, one daemon
+## for each node
+
+scrub=$($CLI_1 volume status $V0 scrub | grep "Scrubber Daemon" | \
+ grep -v grep | wc -l)
+TEST [ "$scrub" -eq 2 ];
+
+
+
+## From node 2 Gluster volume status command should print status of only
+## bitd daemon. There should be total 2 bitd daemon running, one daemon
+## for each node
+
+bitd=$($CLI_2 volume status $V0 bitd | grep "Bitrot Daemon" | \
+ grep -v grep | wc -l)
+TEST [ "$bitd" -eq 2 ];
+
+
+cleanup;
diff --git a/tests/bugs/bitrot/1209818-vol-info-show-scrub-process-properly.t b/tests/bugs/bitrot/1209818-vol-info-show-scrub-process-properly.t
new file mode 100644
index 00000000000..4fe02dc7f63
--- /dev/null
+++ b/tests/bugs/bitrot/1209818-vol-info-show-scrub-process-properly.t
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+## Test case for bitrot.
+## volume info should not show 'features.scrub: resume' if scrub process is
+## resumed from paused state.
+
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+cleanup;
+
+## Start glusterd
+TEST glusterd;
+TEST pidof glusterd;
+
+## Lets create and start the volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+TEST $CLI volume start $V0
+
+## Enable bitrot on volume $V0
+TEST $CLI volume bitrot $V0 enable
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count
+
+## Set bitrot scrubber process to pause state
+TEST $CLI volume bitrot $V0 scrub pause
+
+## gluster volume info command should show scrub process pause.
+EXPECT 'pause' volinfo_field $V0 'features.scrub';
+
+
+## Resume scrub process on volume $V0
+TEST $CLI volume bitrot $V0 scrub resume
+
+## gluster volume info command should show scrub process Active
+EXPECT 'Active' volinfo_field $V0 'features.scrub';
+
+
+## Disable bitrot on volume $V0
+TEST $CLI volume bitrot $V0 disable
+
+## gluster volume info command should show scrub process Inactive
+EXPECT 'Inactive' volinfo_field $V0 'features.scrub';
+
+
+cleanup;
diff --git a/tests/bugs/bitrot/bug-1210684-scrub-pause-resume-error-handling.t b/tests/bugs/bitrot/bug-1210684-scrub-pause-resume-error-handling.t
new file mode 100644
index 00000000000..b15b908d21a
--- /dev/null
+++ b/tests/bugs/bitrot/bug-1210684-scrub-pause-resume-error-handling.t
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+## Test case for bitrot BZ:1210684
+## Bitrot scrub pause/resume option should give proper error if scrubber is
+## already pause/resume and admin try to perform same operation on a volume
+
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+cleanup;
+
+## Start glusterd
+TEST glusterd;
+TEST pidof glusterd;
+
+## Lets create and start the volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1..2}
+TEST $CLI volume start $V0
+
+## Enable bitrot for volume $V0
+TEST $CLI volume bitrot $V0 enable
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count
+
+## Pause scrubber operation on volume $V0
+TEST $CLI volume bitrot $V0 scrub pause
+
+## Pausing scrubber again should not success and should give error
+TEST ! $CLI volume bitrot $V0 scrub pause
+
+## Resume scrubber operation on volume $V0
+TEST $CLI volume bitrot $V0 scrub resume
+
+## Resuming scrubber again should not success and should give error
+TEST ! $CLI volume bitrot $V0 scrub resume
+
+cleanup;
diff --git a/tests/bugs/bitrot/bug-1227996.t b/tests/bugs/bitrot/bug-1227996.t
new file mode 100644
index 00000000000..121c7b5f279
--- /dev/null
+++ b/tests/bugs/bitrot/bug-1227996.t
@@ -0,0 +1,57 @@
+#!/bin/bash
+
+## Test case for bitrot
+## Tunable object signing waiting time value for bitrot.
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+SLEEP_TIME=3
+
+cleanup;
+## Start glusterd
+TEST glusterd;
+TEST pidof glusterd;
+
+## Lets create and start the volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+TEST $CLI volume start $V0
+## Enable bitrot on volume $V0
+TEST $CLI volume bitrot $V0 enable
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count
+
+# wait a bit for oneshot crawler to finish
+sleep $SLEEP_TIME
+
+## Set object expiry time value
+TEST $CLI volume bitrot $V0 signing-time $SLEEP_TIME
+
+## Mount the volume
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0;
+
+# create and check object signature
+fname="$M0/filezero"
+echo "ZZZ" > $fname
+
+# wait till the object is signed
+sleep `expr $SLEEP_TIME \* 2`
+
+backpath=$(get_backend_paths $fname)
+TEST getfattr -m . -n trusted.bit-rot.signature $backpath
+
+## for now just remove the signature xattr to test for signing
+## upon truncate()
+TEST setfattr -x trusted.bit-rot.signature $backpath
+
+## overwrite the file (truncate(), write())
+echo "XYX" > $fname
+
+# wait till the object is signed
+sleep `expr $SLEEP_TIME \* 2`
+
+# test for new signature
+TEST getfattr -m . -n trusted.bit-rot.signature $backpath
+
+cleanup;
diff --git a/tests/bugs/bitrot/bug-1228680.t b/tests/bugs/bitrot/bug-1228680.t
new file mode 100644
index 00000000000..23db9d5e208
--- /dev/null
+++ b/tests/bugs/bitrot/bug-1228680.t
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+## Test case for bitrot
+## Tunable object signing waiting time value for bitrot.
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+SLEEP_TIME=3
+
+cleanup;
+## Start glusterd
+TEST glusterd;
+TEST pidof glusterd;
+
+## Lets create and start the volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+TEST $CLI volume start $V0
+
+## Enable bitrot on volume $V0
+TEST $CLI volume bitrot $V0 enable
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count
+
+# wait a bit for oneshot crawler to finish
+sleep $SLEEP_TIME
+
+## negative test
+TEST ! $CLI volume bitrot $V0 signing-time -100
+
+## Set object expiry time value 5 second.
+TEST $CLI volume bitrot $V0 signing-time $SLEEP_TIME
+
+## Mount the volume
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0;
+
+# create and check object signature
+fname="$M0/filezero"
+echo "ZZZ" > $fname
+
+# wait till the object is signed
+sleep `expr $SLEEP_TIME \* 2`
+
+backpath=$(get_backend_paths $fname)
+TEST getfattr -m . -n trusted.bit-rot.signature $backpath
+
+cleanup;
diff --git a/tests/bugs/bitrot/bug-1229134-bitd-not-support-vol-set.t b/tests/bugs/bitrot/bug-1229134-bitd-not-support-vol-set.t
new file mode 100644
index 00000000000..471471f4b6b
--- /dev/null
+++ b/tests/bugs/bitrot/bug-1229134-bitd-not-support-vol-set.t
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+## Test case for bitrot BZ:1229134
+## gluster volume set <VOLNAME> bitrot * command succeeds,
+## which is not supported to enable bitrot.
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+cleanup;
+
+## Start glusterd
+TEST glusterd;
+TEST pidof glusterd;
+
+## Lets create and start the volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1..2}
+TEST $CLI volume start $V0
+
+## 'gluster volume set <VOLNAME>' command for bitrot should failed.
+TEST ! $CLI volume set $V0 bitrot enable
+TEST ! $CLI volume set $V0 bitrot disable
+TEST ! $CLI volume set $V0 scrub-frequency daily
+TEST ! $CLI volume set $V0 scrub pause
+TEST ! $CLI volume set $V0 scrub-throttle lazy
+
+
+## 'gluster volume bitrot <VOLNAME> *' command for bitrot should succeeds.
+TEST $CLI volume bitrot $V0 enable
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count
+
+TEST $CLI volume bitrot $V0 scrub pause
+TEST $CLI volume bitrot $V0 scrub-frequency daily
+TEST $CLI volume bitrot $V0 scrub-throttle lazy
+
+cleanup;
+
diff --git a/tests/bugs/bitrot/bug-1245981.t b/tests/bugs/bitrot/bug-1245981.t
new file mode 100644
index 00000000000..f3955256b01
--- /dev/null
+++ b/tests/bugs/bitrot/bug-1245981.t
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+## Test case for bitrot
+## Tunable object signing waiting time value for bitrot.
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+SLEEP_TIME=5
+
+cleanup;
+## Start glusterd
+TEST glusterd;
+TEST pidof glusterd;
+
+## Lets create and start the volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 network.inode-lru-limit 1
+## Enable bitrot on volume $V0
+TEST $CLI volume bitrot $V0 enable
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count
+
+# wait a bit for oneshot crawler to finish
+sleep 2;
+
+## Set object expiry time value
+TEST $CLI volume bitrot $V0 signing-time $SLEEP_TIME
+
+## Mount the volume
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0;
+
+# create and check object signature
+fname="$M0/filezero"
+echo "ZZZ" > $fname
+echo "123" > $M0/new_file;
+
+touch $M0/1
+touch $M0/2
+touch $M0/3
+touch $M0/4
+touch $M0/5
+
+# wait till the object is signed
+sleep `expr $SLEEP_TIME \* 2`
+
+backpath=$(get_backend_paths $fname)
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.bit-rot.signature' check_for_xattr 'trusted.bit-rot.signature' $backpath
+
+backpath=$(get_backend_paths $M0/new_file)
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.bit-rot.signature' check_for_xattr 'trusted.bit-rot.signature' $backpath
+
+cleanup;
diff --git a/tests/bugs/bitrot/bug-1288490.t b/tests/bugs/bitrot/bug-1288490.t
new file mode 100644
index 00000000000..5f67f4a6ec5
--- /dev/null
+++ b/tests/bugs/bitrot/bug-1288490.t
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1}
+TEST $CLI volume start $V0
+
+TEST $CLI volume bitrot $V0 enable
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+TEST dd if=/dev/urandom of=$M0/FILE bs=1024 count=1
+
+# corrupt data -- append 2 bytes
+echo -n "~~" >> $B0/brick0/FILE
+# manually set bad-file xattr
+TEST setfattr -n trusted.bit-rot.bad-file -v 0x3100 $B0/brick0/FILE
+
+TEST $CLI volume stop $V0
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status';
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/brick0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/brick1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count
+
+# trigger lookup
+TEST stat $M0/FILE
+
+# extend the file
+TEST dd if=/dev/urandom of=$M0/FILE bs=1024 count=1 oflag=append conv=notrunc
+
+# check backend file size
+EXPECT "1026" stat -c "%s" $B0/brick0/FILE
+EXPECT "2048" stat -c "%s" $B0/brick1/FILE
+
+# check file size on mount
+EXPECT "2048" stat -c "%s" $M0/FILE
+
+TEST umount $M0
+cleanup
diff --git a/tests/bugs/bug-1064147.t b/tests/bugs/bug-1064147.t
new file mode 100755
index 00000000000..27ffde4eb44
--- /dev/null
+++ b/tests/bugs/bug-1064147.t
@@ -0,0 +1,72 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+# Initialize
+#------------------------------------------------------------
+cleanup;
+
+# Start glusterd
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+# Create a volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2}
+
+# Verify volume creation
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+# Start volume and verify successful start
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
+#------------------------------------------------------------
+
+# Test case 1 - Subvolume down + Healing
+#------------------------------------------------------------
+# Kill 2nd brick process
+TEST kill_brick $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "1" online_brick_count
+
+# Change root permissions
+TEST chmod 444 $M0
+
+# Store permission for comparision
+TEST permission_new=`stat -c "%A" $M0`
+
+# Bring up the killed brick process
+TEST $CLI volume start $V0 force
+
+# Perform lookup
+sleep 5
+TEST ls $M0
+
+# Check brick permissions
+TEST brick_perm=`stat -c "%A" $B0/${V0}2`
+TEST [ ${brick_perm} = ${permission_new} ]
+#------------------------------------------------------------
+
+# Test case 2 - Add-brick + Healing
+#------------------------------------------------------------
+# Change root permissions
+TEST chmod 777 $M0
+
+# Store permission for comparision
+TEST permission_new_2=`stat -c "%A" $M0`
+
+# Add a 3rd brick
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}3
+
+# Perform lookup
+sleep 5
+TEST ls $M0
+
+# Check permissions on the new brick
+TEST brick_perm2=`stat -c "%A" $B0/${V0}3`
+
+TEST [ ${brick_perm2} = ${permission_new_2} ]
+
+cleanup;
diff --git a/tests/bugs/bug-1110262.t b/tests/bugs/bug-1110262.t
new file mode 100644
index 00000000000..90b101fc98d
--- /dev/null
+++ b/tests/bugs/bug-1110262.t
@@ -0,0 +1,72 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../traps.rc
+
+cleanup;
+
+## Start glusterd
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+## Lets create volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+TEST $CLI volume set $V0 nfs.disable false
+
+## Verify volume is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+TEST glusterfs -s $H0 --volfile-id=$V0 $M0
+
+#do some operation on mount, so that kill_brick is guaranteed to be
+#done _after_ first lookup on root and dht has a proper layout on
+#it. Otherwise mkdir done in later stages of script might fail due to
+#lack of layout on "/" as dht-self-heal won't proceed if any of its
+#subvolumes are down.
+TEST ls $M0
+#kill one of the brick process
+TEST kill_brick $V0 $H0 $B0/${V0}2
+
+cleanup_user_group () {
+ userdel --force dev
+ groupdel QA
+}
+push_trapfunc cleanup_user_group
+
+#create a user and group
+TEST useradd dev
+TEST groupadd QA
+
+#create a new directory now with special user, group and mode bits
+mkdir -m 7777 $M0/dironedown
+TEST chown dev $M0/dironedown
+TEST chgrp QA $M0/dironedown
+
+#store the permissions for comparision
+permission_onedown=`ls -l $M0 | grep dironedown | awk '{print $1}'`
+
+#Now bring up the brick process
+TEST $CLI volume start $V0 force
+
+#The updation of directory attrs happens on the revalidate path. Hence, atmax on
+#2 lookups the update will happen.
+sleep 5
+TEST ls $M0/dironedown;
+
+#check directory that was created post brick going down
+TEST brick_perm=`ls -l $B0/${V0}2 | grep dironedown | awk '{print $1}'`
+TEST echo $brick_perm;
+TEST [ ${brick_perm} = ${permission_onedown} ]
+uid=`ls -l $B0/${V0}2 | grep dironedown | awk '{print $3}'`
+TEST echo $uid
+TEST [ $uid = dev ]
+gid=`ls -l $B0/${V0}2 | grep dironedown | awk '{print $4}'`
+TEST echo $gid
+TEST [ $gid = QA ]
+
+cleanup
diff --git a/tests/bugs/bug-1138841.t b/tests/bugs/bug-1138841.t
new file mode 100644
index 00000000000..abec5e89d56
--- /dev/null
+++ b/tests/bugs/bug-1138841.t
@@ -0,0 +1,25 @@
+#!/bin/bash
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../dht.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+## Create a volume and set auth.allow using cidr format ip
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 auth.allow 127.0.0.1/20
+TEST $CLI volume start $V0
+
+
+## mount the volume and create a file on the mount point
+
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0
+TEST touch $M0/tmp1
+
+## Stop the volume and do the cleanup
+
+TEST $CLI volume stop $V0
+cleanup
diff --git a/tests/bugs/bug-1258069.t b/tests/bugs/bug-1258069.t
new file mode 100755
index 00000000000..b87ecbf2fe8
--- /dev/null
+++ b/tests/bugs/bug-1258069.t
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 cluster.choose-local off
+TEST $CLI volume set $V0 nfs.disable off
+TEST $CLI volume set $V0 diagnostics.client-log-level DEBUG
+TEST $CLI volume start $V0
+
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available
+TEST mount_nfs $H0:/$V0 $N0 nolock
+TEST mkdir -p $N0/a/b/c
+TEST umount_nfs $N0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+rmdir $M0/a/b/c
+mkdir $M0/a/b/c
+TEST mount_nfs $H0:/$V0/a/b/c $N0 nolock
+TEST umount_nfs $N0
+TEST umount $M0
+
+cleanup
diff --git a/tests/bugs/bug-1368312.t b/tests/bugs/bug-1368312.t
new file mode 100644
index 00000000000..c60d562bbd7
--- /dev/null
+++ b/tests/bugs/bug-1368312.t
@@ -0,0 +1,86 @@
+#!/bin/bash
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+cleanup;
+
+function compare_get_split_brain_status {
+ local path=$1
+ local choice=$2
+ echo `getfattr -n replica.split-brain-status $path` | cut -f2 -d"=" | sed -e 's/^"//' -e 's/"$//' | grep $choice
+ if [ $? -ne 0 ]
+ then
+ echo 1
+ else
+ echo 0
+ fi
+
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1,2,3,4,5}
+TEST $CLI volume start $V0
+
+#Disable self-heal-daemon
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
+
+TEST mkdir $M0/tmp1
+
+#Create metadata split-brain
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST chmod 666 $M0/tmp1
+TEST $CLI volume start $V0 force
+TEST kill_brick $V0 $H0 $B0/${V0}3
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+TEST chmod 757 $M0/tmp1
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 3
+
+EXPECT 2 get_pending_heal_count $V0
+
+
+TEST kill_brick $V0 $H0 $B0/${V0}4
+TEST chmod 755 $M0/tmp1
+TEST $CLI volume start $V0 force
+TEST kill_brick $V0 $H0 $B0/${V0}5
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 4
+
+TEST chmod 766 $M0/tmp1
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 4
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 5
+
+EXPECT 4 get_pending_heal_count $V0
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST chmod 765 $M0/tmp1
+TEST $CLI volume start $V0 force
+TEST kill_brick $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+TEST chmod 756 $M0/tmp1
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+EXPECT 6 get_pending_heal_count $V0
+
+cd $M0
+EXPECT 0 compare_get_split_brain_status ./tmp1 patchy-client-0
+EXPECT 0 compare_get_split_brain_status ./tmp1 patchy-client-1
+EXPECT 0 compare_get_split_brain_status ./tmp1 patchy-client-2
+EXPECT 0 compare_get_split_brain_status ./tmp1 patchy-client-3
+EXPECT 0 compare_get_split_brain_status ./tmp1 patchy-client-4
+EXPECT 0 compare_get_split_brain_status ./tmp1 patchy-client-5
+
+cd -
+cleanup
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=000000
diff --git a/tests/bugs/bug-1371806.t b/tests/bugs/bug-1371806.t
new file mode 100644
index 00000000000..08180525650
--- /dev/null
+++ b/tests/bugs/bug-1371806.t
@@ -0,0 +1,81 @@
+#!/bin/bash
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../dht.rc
+cleanup;
+
+function get_getfattr {
+ local path=$1
+ echo `getfattr -n user.foo $path` | cut -f2 -d"=" | sed -e 's/^"//' -e 's/"$//'
+}
+
+function set_fattr {
+ for i in `seq 1 10`
+ do
+ setfattr -n user.foo -v "newabc" ./tmp${i}
+ if [ "$?" = "0" ]
+ then
+ succ=$((succ+1))
+ else
+ fail=$((fail+1))
+ fi
+ done
+}
+
+
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1,2,3,4,5}
+TEST $CLI volume start $V0
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "6" online_brick_count
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
+
+cd $M0
+TEST mkdir tmp{1..10}
+
+##First set user.foo xattr with value abc on all dirs
+
+TEST setfattr -n user.foo -v "abc" ./tmp{1..10}
+EXPECT "abc" get_getfattr ./tmp{1..10}
+EXPECT "abc" get_getfattr $B0/${V0}5/tmp{1..10}
+
+TEST kill_brick $V0 $H0 $B0/${V0}5
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "5" online_brick_count
+
+succ=fail=0
+## set user.foo xattr with value newabc after kill one brick
+set_fattr
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "6" online_brick_count
+
+cd -
+TEST umount $M0
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
+
+cd $M0
+## At this point dht code will heal xattr on down brick only for those dirs
+## hashed subvol was up at the time of update xattr
+TEST stat ./tmp{1..10}
+
+## Count the user.foo xattr value with abc on mount point and compare with fail value
+count=`getfattr -n user.foo ./tmp{1..10} | grep "user.foo" | grep -iw "abc" | wc -l`
+EXPECT "$fail" echo $count
+
+## Count the user.foo xattr value with newabc on mount point and compare with succ value
+count=`getfattr -n user.foo ./tmp{1..10} | grep "user.foo" | grep -iw "newabc" | wc -l`
+EXPECT "$succ" echo $count
+
+## Count the user.foo xattr value with abc on brick and compare with succ value
+count=`getfattr -n user.foo $B0/${V0}5/tmp{1..10} | grep "user.foo" | grep -iw "abc" | wc -l`
+EXPECT "$fail" echo $count
+
+## Count the user.foo xattr value with newabc on brick and compare with succ value
+count=`getfattr -n user.foo $B0/${V0}5/tmp{1..10} | grep "user.foo" | grep -iw "newabc" | wc -l`
+EXPECT "$succ" echo $count
+
+
+cd -
+cleanup
+exit
diff --git a/tests/bugs/bug-1371806_1.t b/tests/bugs/bug-1371806_1.t
new file mode 100644
index 00000000000..df19a8c1c2a
--- /dev/null
+++ b/tests/bugs/bug-1371806_1.t
@@ -0,0 +1,48 @@
+#!/bin/bash
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../dht.rc
+cleanup;
+
+function get_getfattr {
+ local path=$1
+ echo `getfattr -n user.foo $path` | cut -f2 -d"=" | sed -e 's/^"//' -e 's/"$//'
+}
+
+function remove_mds_xattr {
+
+ for i in `seq 1 10`
+ do
+ setfattr -x trusted.glusterfs.dht.mds $1/tmp${i} 2> /dev/null
+ done
+}
+
+
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1,2,3}
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+cd $M0
+TEST mkdir tmp{1..10}
+
+##Remove internal mds xattr from all directory
+remove_mds_xattr $B0/${V0}0
+remove_mds_xattr $B0/${V0}1
+remove_mds_xattr $B0/${V0}2
+remove_mds_xattr $B0/${V0}3
+
+cd -
+umount $M0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
+cd $M0
+
+TEST setfattr -n user.foo -v "abc" ./tmp{1..10}
+EXPECT "abc" get_getfattr ./tmp{1..10}
+
+cd -
+cleanup
diff --git a/tests/bugs/bug-1371806_2.t b/tests/bugs/bug-1371806_2.t
new file mode 100644
index 00000000000..e6aa8e7c1ad
--- /dev/null
+++ b/tests/bugs/bug-1371806_2.t
@@ -0,0 +1,52 @@
+#!/bin/bash
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../dht.rc
+cleanup;
+
+function get_getfattr {
+ local path=$1
+ echo `getfattr -n user.foo $path` | cut -f2 -d"=" | sed -e 's/^"//' -e 's/"$//'
+}
+
+function remove_mds_xattr {
+
+ for i in `seq 1 10`
+ do
+ setfattr -x trusted.glusterfs.dht.mds $1/tmp${i} 2> /dev/null
+ done
+}
+
+
+
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1,2,3}
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 --attribute-timeout=0 $M0;
+cd $M0
+TEST mkdir tmp{1..10}
+
+##Remove internal mds xattr from all directory
+remove_mds_xattr $B0/${V0}0
+remove_mds_xattr $B0/${V0}1
+remove_mds_xattr $B0/${V0}2
+remove_mds_xattr $B0/${V0}3
+
+##First set user.foo xattr with value abc on all dirs
+
+TEST setfattr -n user.foo -v "abc" ./tmp{1..10}
+EXPECT "abc" get_getfattr ./tmp{1..10}
+EXPECT "abc" get_getfattr $B0/${V0}0/tmp{1..10}
+EXPECT "abc" get_getfattr $B0/${V0}1/tmp{1..10}
+EXPECT "abc" get_getfattr $B0/${V0}2/tmp{1..10}
+EXPECT "abc" get_getfattr $B0/${V0}3/tmp{1..10}
+
+cd -
+TEST umount $M0
+
+cd -
+cleanup
+exit
diff --git a/tests/bugs/bug-1371806_3.t b/tests/bugs/bug-1371806_3.t
new file mode 100644
index 00000000000..cb13f37c737
--- /dev/null
+++ b/tests/bugs/bug-1371806_3.t
@@ -0,0 +1,63 @@
+#!/bin/bash
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../dht.rc
+cleanup;
+
+function get_getfattr {
+ local path=$1
+ echo `getfattr -n user.foo $path` | cut -f2 -d"=" | sed -e 's/^"//' -e 's/"$//'
+}
+
+function set_fattr {
+ for i in `seq 1 10`
+ do
+ setfattr -n user.foo -v "newabc" ./tmp${i}
+ if [ "$?" = "0" ]
+ then
+ succ=$((succ+1))
+ else
+ fail=$((fail+1))
+ fi
+ done
+}
+
+
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1,2,3}
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 --attribute-timeout=0 $M0;
+
+cd $M0
+TEST mkdir tmp{1..10}
+
+TEST kill_brick $V0 $H0 $B0/${V0}3
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "3" online_brick_count
+
+succ=fail=0
+## set user.foo xattr with value newabc after kill one brick
+set_fattr
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "4" online_brick_count
+
+cd -
+TEST umount $M0
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 --attribute-timeout=0 $M0;
+
+cd $M0
+## At this point dht code will heal xattr on down brick only for those dirs
+## hashed subvol was up at the time of update xattr
+TEST stat ./tmp{1..10}
+
+
+## Count the user.foo xattr value with newabc on brick and compare with succ value
+count=`getfattr -n user.foo $B0/${V0}3/tmp{1..10} | grep "user.foo" | grep -iw "newabc" | wc -l`
+EXPECT "$succ" echo $count
+
+
+cd -
+cleanup
+exit
diff --git a/tests/bugs/bug-1371806_acl.t b/tests/bugs/bug-1371806_acl.t
new file mode 100644
index 00000000000..c39165628cc
--- /dev/null
+++ b/tests/bugs/bug-1371806_acl.t
@@ -0,0 +1,96 @@
+#!/bin/bash
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+TEST useradd tmpuser
+
+function set_facl_user {
+ for i in `seq 1 10`
+ do
+ setfacl -m u:tmpuser:rw ./tmp${i}
+ if [ "$?" = "0" ]
+ then
+ succ=$((succ+1))
+ else
+ fail=$((fail+1))
+ fi
+ done
+}
+
+function set_facl_default {
+ for i in `seq 1 10`
+ do
+ setfacl -m d:o:rw ./tmp${i}
+ if [ "$?" = "0" ]
+ then
+ succ1=$((succ1+1))
+ else
+ fail1=$((fail1+1))
+ fi
+ done
+}
+
+
+
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1,2,3,4,5}
+TEST $CLI volume set $V0 diagnostics.client-log-level DEBUG
+TEST $CLI volume start $V0
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "6" online_brick_count
+
+TEST glusterfs --volfile-id=$V0 --acl --volfile-server=$H0 --entry-timeout=0 $M0;
+
+cd $M0
+TEST mkdir tmp{1..10}
+TEST setfacl -m u:tmpuser:rwx ./tmp{1..10}
+count=`getfacl -p $M0/tmp{1..10} | grep -c "user:tmpuser:rwx"`
+EXPECT "10" echo $count
+TEST setfacl -m d:o:rwx ./tmp{1..10}
+count=`getfacl -p $M0/tmp{1..10} | grep -c "default:other::rwx"`
+EXPECT "10" echo $count
+count=`getfacl -p $B0/${V0}5/tmp{1..10} | grep -c "user:tmpuser:rwx"`
+EXPECT "10" echo $count
+count=`getfacl -p $B0/${V0}5/tmp{1..10} | grep -c "default:other::rwx"`
+EXPECT "10" echo $count
+
+
+TEST kill_brick $V0 $H0 $B0/${V0}5
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "5" online_brick_count
+
+succ=fail=0
+## Update acl attributes on dir after kill one brick
+set_facl_user
+succ1=fail1=0
+set_facl_default
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "6" online_brick_count
+
+cd -
+TEST umount $M0
+TEST glusterfs --volfile-id=$V0 --acl --volfile-server=$H0 --entry-timeout=0 $M0;
+
+cd $M0
+## At this point dht will heal xatts on down brick only for those hashed_subvol
+## was up at the time of updated xattrs
+TEST stat ./tmp{1..10}
+
+# Make sure to send a write and read on the file inside mount
+echo "helloworld" > ./tmp1/file
+TEST cat ./tmp1/file
+
+## Compare succ value with updated acl attributes
+count=`getfacl -p $B0/${V0}5/tmp{1..10} | grep -c "user:tmpuser:rw-"`
+EXPECT "$succ" echo $count
+
+
+count=`getfacl -p $B0/${V0}5/tmp{1..10} | grep -c "default:other::rw-"`
+EXPECT "$succ1" echo $count
+
+cd -
+userdel --force tmpuser
+
+cleanup
diff --git a/tests/bugs/bug-1584517.t b/tests/bugs/bug-1584517.t
new file mode 100644
index 00000000000..7f48015a034
--- /dev/null
+++ b/tests/bugs/bug-1584517.t
@@ -0,0 +1,70 @@
+#!/bin/bash
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../dht.rc
+cleanup;
+#This test case verifies attributes (uid/gid/perm) for the
+#directory are healed after stop/start brick. To verify the same
+#test case change attributes of the directory after down a DHT subvolume
+#and one AFR children. After start the volume with force and run lookup
+#operation attributes should be healed on started bricks at the backend.
+
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2,3,4,5}
+TEST $CLI volume start $V0
+TEST useradd dev -M
+TEST groupadd QA
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+TEST mkdir $M0/dironedown
+
+TEST kill_brick $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "5" online_brick_count
+
+TEST kill_brick $V0 $H0 $B0/${V0}3
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "4" online_brick_count
+
+TEST kill_brick $V0 $H0 $B0/${V0}4
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "3" online_brick_count
+
+TEST kill_brick $V0 $H0 $B0/${V0}5
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "2" online_brick_count
+
+TEST chown dev $M0/dironedown
+TEST chgrp QA $M0/dironedown
+TEST chmod 777 $M0/dironedown
+
+#store the permissions for comparision
+permission_onedown=`ls -l $M0 | grep dironedown | awk '{print $1}'`
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "6" online_brick_count
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+#Run lookup two times to hit revalidate code path in dht
+# to heal user attr
+
+TEST ls $M0/dironedown
+
+#check attributes those were created post brick going down
+TEST brick_perm=`ls -l $B0/${V0}3 | grep dironedown | awk '{print $1}'`
+TEST echo $brick_perm
+TEST [ ${brick_perm} = ${permission_onedown} ]
+uid=`ls -l $B0/${V0}3 | grep dironedown | awk '{print $3}'`
+TEST echo $uid
+TEST [ $uid = dev ]
+gid=`ls -l $B0/${V0}3 | grep dironedown | awk '{print $4}'`
+TEST echo $gid
+TEST [ $gid = QA ]
+
+TEST umount $M0
+userdel --force dev
+groupdel QA
+
+cleanup
+exit
+
diff --git a/tests/bugs/bug-1620580.t b/tests/bugs/bug-1620580.t
new file mode 100644
index 00000000000..0c74d4a6089
--- /dev/null
+++ b/tests/bugs/bug-1620580.t
@@ -0,0 +1,67 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+## Start glusterd
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+## Lets create volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+
+## Verify volume is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+TEST glusterfs -s $H0 --volfile-id=$V0 $M0
+
+#do some operation on mount, so that kill_brick is guaranteed to be
+#done _after_ first lookup on root
+
+TEST ls $M0
+TEST touch $M0/file
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+# Case of Same volume name, but different bricks
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{3,4};
+TEST $CLI volume start $V0;
+
+# Give time for 'reconnect' to happen
+sleep 4
+
+TEST ! ls $M0
+TEST ! touch $M0/file1
+
+# Case of Same brick, but different volume (ie, recreated).
+TEST $CLI volume create $V1 $H0:$B0/${V0}{1,2};
+TEST $CLI volume start $V1;
+
+# Give time for 'reconnect' to happen
+sleep 4
+TEST ! ls $M0
+TEST ! touch $M0/file2
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+TEST $CLI volume stop $V1
+TEST $CLI volume delete $V1
+
+# Case of Same brick, but different volume (but same volume name)
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2}
+TEST $CLI volume start $V0;
+
+# Give time for 'reconnect' to happen
+sleep 4
+TEST ! ls $M0
+TEST ! touch $M0/file3
+
+
+cleanup
diff --git a/tests/bugs/bug-1694920.t b/tests/bugs/bug-1694920.t
new file mode 100644
index 00000000000..5bf93c92f94
--- /dev/null
+++ b/tests/bugs/bug-1694920.t
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+SCRIPT_TIMEOUT=300
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../fileio.rc
+cleanup;
+
+TEST glusterd;
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0};
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume start $V0
+TEST $GFS -s $H0 --volfile-id=$V0 $M0;
+
+TEST touch $M0/a
+
+#When all bricks are up, lock and unlock should succeed
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST flock -x $fd1
+TEST fd_close $fd1
+
+#When all bricks are down, lock/unlock should fail
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST $CLI volume stop $V0
+TEST ! flock -x $fd1
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" client_connected_status_meta $M0 $V0-client-0
+TEST fd_close $fd1
+
+#When a brick goes down and comes back up operations on fd which had locks on it should succeed by default
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST flock -x $fd1
+TEST $CLI volume stop $V0
+sleep 2
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" client_connected_status_meta $M0 $V0-client-0
+TEST fd_write $fd1 "data"
+TEST fd_close $fd1
+
+#When a brick goes down and comes back up operations on fd which had locks on it should fail when client.strict-locks is on
+TEST $CLI volume set $V0 client.strict-locks on
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST flock -x $fd1
+TEST $CLI volume stop $V0
+sleep 2
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" client_connected_status_meta $M0 $V0-client-0
+TEST ! fd_write $fd1 "data"
+TEST fd_close $fd1
+
+cleanup
diff --git a/tests/bugs/bug-1702299.t b/tests/bugs/bug-1702299.t
new file mode 100644
index 00000000000..1cff2ed5d3d
--- /dev/null
+++ b/tests/bugs/bug-1702299.t
@@ -0,0 +1,67 @@
+#!/bin/bash
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../dht.rc
+cleanup;
+
+function get_getfattr {
+ local path=$1
+ echo `getfattr -n user.foo $path` | cut -f2 -d"=" | sed -e 's/^"//' -e 's/"$//'
+}
+
+function set_fattr {
+ for i in `seq 1 10`
+ do
+ setfattr -n user.foo -v "newabc" ./tmp${i}
+ if [ "$?" = "0" ]
+ then
+ succ=$((succ+1))
+ else
+ fail=$((fail+1))
+ fi
+ done
+}
+
+
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1,2,3}
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 --attribute-timeout=0 $M0;
+
+cd $M0
+TEST mkdir tmp{1..10}
+
+succ=fail=0
+## set user.foo xattr with value newabc after kill one brick
+set_fattr
+count=10
+EXPECT "$succ" echo $count
+count=0
+EXPECT "$fail" echo $count
+
+cd -
+
+# Add-brick
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}{4,5}
+
+cd $M0
+## At this point dht code will heal xattr on down brick only for those dirs
+## hashed subvol was up at the time of update xattr
+TEST stat ./tmp{1..10}
+
+
+## Count the user.foo xattr value with newabc on brick and compare with succ value
+count=`getfattr -n user.foo $B0/${V0}4/tmp{1..10} | grep "user.foo" | grep -iw "newabc" | wc -l`
+EXPECT "$succ" echo $count
+
+## Count the user.foo xattr value with newabc on brick and compare with succ value
+count=`getfattr -n user.foo $B0/${V0}5/tmp{1..10} | grep "user.foo" | grep -iw "newabc" | wc -l`
+EXPECT "$succ" echo $count
+
+
+cd -
+TEST umount $M0
+cleanup
diff --git a/tests/bugs/bug-863068.t b/tests/bugs/bug-863068.t
deleted file mode 100644
index 56041b15dd3..00000000000
--- a/tests/bugs/bug-863068.t
+++ /dev/null
@@ -1,76 +0,0 @@
-#!/bin/bash
-
-. $(dirname $0)/../include.rc
-
-cleanup;
-
-## This function get the No. of entries for
-## gluster volume heal volnmae info healed command for brick1 and brick2
-## and compare the initial value (Before volume heal full) and final value
-## (After gluster volume heal vol full) and compare.
-
-function getdiff()
-{
- val=10
- if [ "$1" == "$3" ]
- then
- if [ "$2" == "$4" ]
- then
- val=0
- else
- val=20
- fi
- fi
-
- echo $val
-}
-
-
-TEST glusterd
-TEST pidof glusterd
-TEST $CLI volume info;
-TEST $CLI volume create $V0 replica 2 $H0:$B0/brick1 $H0:$B0/brick2;
-TEST $CLI volume start $V0;
-mount -t glusterfs $H0:/$V0 $M0;
-B0_hiphenated=`echo $B0 | tr '/' '-'`
-kill -9 `cat /var/lib/glusterd/vols/$V0/run/$H0$B0_hiphenated-brick1.pid` ;
-
-mkdir $M0/{a,b,c};
-echo "GLUSTERFS" >> $M0/a/file;
-
-TEST $CLI volume start $V0 force;
-sleep 5
-TEST $CLI volume heal $V0 full;
-sleep 5
-
-##First Brick Initial(Before full type self heal) value
-FBI=`gluster volume heal $V0 info healed | grep entries | awk '{print $4}' | head -n 1`
-
-##Second Brick Initial Value
-SBI=`gluster volume heal $V0 info healed | grep entries | awk '{print $4}' | tail -n 1`
-TEST $CLI volume heal $V0 full;
-
-sleep 5
-
-##First Brick Final value
-##Number of entries from output of <gluster volume heal volname info healed>
-
-FBF=`gluster volume heal $V0 info healed | grep entries | awk '{print $4}' | head -n 1`
-
-##Second Brick Final Value
-SBF=`gluster volume heal $V0 info healed | grep entries | awk '{print $4}' | tail -n 1`
-
-##get the difference of values
-EXPECT "0" getdiff $FBI $SBI $FBF $SBF;
-
-## Tests after this comment checks for the background self heal
-
-TEST mkdir $M0/d
-kill -9 `cat /var/lib/glusterd/vols/$V0/run/$H0$B0_hiphenated-brick1.pid` ;
-TEST $CLI volume set $V0 self-heal-daemon off
-dd if=/dev/random of=$M0/d/file1 bs=100M count=1 2>/dev/null;
-TEST $CLI volume start $V0 force
-sleep 3
-TEST ls -l $M0/d
-
-cleanup;
diff --git a/tests/bugs/changelog/bug-1208470.t b/tests/bugs/changelog/bug-1208470.t
new file mode 100755
index 00000000000..526f8f20612
--- /dev/null
+++ b/tests/bugs/changelog/bug-1208470.t
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+## Testcase:
+## Avoid creating any EMPTY changelog(over the changelog rollover time)
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../changelog.rc
+cleanup;
+
+## override current changelog rollover-time
+## to avoid sleeping for long duration.
+CL_RO_TIME=5
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume create $V0 $H0:$B0/$V0"1";
+
+## Verify volume is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Set changelog ON
+TEST $CLI volume set $V0 changelog.changelog on;
+
+EXPECT 1 count_changelog_files $B0/${V0}1
+
+## Set changelog rollover time
+TEST $CLI volume set $V0 changelog.rollover-time $CL_RO_TIME;
+
+## Wait for changelog rollover time
+sleep $CL_RO_TIME
+
+## NO additional empty changelogs created
+EXPECT 1 count_changelog_files $B0/${V0}1
diff --git a/tests/bugs/changelog/bug-1211327.t b/tests/bugs/changelog/bug-1211327.t
new file mode 100644
index 00000000000..a849ec3981f
--- /dev/null
+++ b/tests/bugs/changelog/bug-1211327.t
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+#Testcase:
+#On brick restart, new HTIME.TSTAMP file should not be created.
+#But on changelog disable/enable HTIME.TSTAMP should be created.
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../changelog.rc
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume create $V0 $H0:$B0/$V0"1";
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST $CLI volume set $V0 changelog.changelog on;
+##Let changelog init complete before killing gluster processes
+sleep 1
+
+TEST killall_gluster;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "0" online_brick_count
+
+TEST glusterd;
+TEST pidof glusterd;
+
+##Let the brick processes starts
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
+
+##On brick restart only one HTIME should be found.
+EXPECT 1 count_htime_files;
+
+##On changelog disable/enable, new HTIME should be created.
+TEST $CLI volume set $V0 changelog.changelog off;
+TEST $CLI volume set $V0 changelog.changelog on;
+EXPECT 2 count_htime_files;
+
+cleanup;
diff --git a/tests/bugs/changelog/bug-1225542.t b/tests/bugs/changelog/bug-1225542.t
new file mode 100644
index 00000000000..a646aa88014
--- /dev/null
+++ b/tests/bugs/changelog/bug-1225542.t
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+#Testcase:
+#On snapshot, notify changelog reconfigure upon explicit rollover
+#irrespective of any failures and send error back to barrier if any.
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../snapshot.rc
+
+cleanup;
+TEST verify_lvm_version
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST setup_lvm 1
+
+## Create a volume
+TEST $CLI volume create $V0 $H0:$L1
+
+## Start volume and verify
+TEST $CLI volume start $V0
+
+TEST $CLI volume set $V0 changelog.changelog on
+##Wait for changelog init to complete.
+sleep 1
+
+## Take snapshot
+TEST $CLI snapshot create snap1 $V0
+
+cleanup;
diff --git a/tests/bugs/changelog/bug-1321955.t b/tests/bugs/changelog/bug-1321955.t
new file mode 100644
index 00000000000..9e3752b1728
--- /dev/null
+++ b/tests/bugs/changelog/bug-1321955.t
@@ -0,0 +1,60 @@
+#!/bin/bash
+
+#This file checks if missing entry self-heal and entry self-heal are working
+#as expected.
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 changelog.changelog enable
+TEST $CLI volume set $V0 changelog.capture-del-path on
+TEST $CLI volume start $V0
+
+#Mount the volume
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+
+#Create files
+TEST touch $M0/file1
+TEST mkdir $M0/dir1
+TEST touch $M0/dir1/file1
+
+#Check for presence of files
+TEST stat $B0/${V0}0/dir1/file1
+TEST stat $B0/${V0}1/dir1/file1
+TEST stat $B0/${V0}0/file1
+TEST stat $B0/${V0}1/file1
+
+#Kill brick1
+TEST kill_brick $V0 $H0 $B0/${V0}1
+
+#Del dir1/file1
+TEST rm -f $M0/dir1/file1
+
+#file1 should be present in brick1 and not in brick0
+TEST ! stat $B0/${V0}0/dir1/file1
+TEST stat $B0/${V0}1/dir1/file1
+
+#Bring up the brick which is down
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+
+#Initiate heal
+TEST $CLI volume heal $V0
+
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+
+#dir1/file1 in brick1 should be deleted
+TEST ! stat $B0/${V0}1/dir1/file1
+
+#file1 under root should not be deleted in brick1
+TEST stat $B0/${V0}1/file1
+
+cleanup;
diff --git a/tests/bugs/cli/bug-1004218.t b/tests/bugs/cli/bug-1004218.t
new file mode 100644
index 00000000000..ab8307d6405
--- /dev/null
+++ b/tests/bugs/cli/bug-1004218.t
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+# Test if only a single xml document is generated by 'status all'
+# when a volume is not started
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create ${V0}1 $H0:$B0/${V0}1{1,2}
+TEST $CLI volume create ${V0}2 $H0:$B0/${V0}2{1,2}
+
+TEST $CLI volume start ${V0}1
+
+function test_status_all ()
+{
+ $CLI volume status all --xml | xmllint -format -
+}
+
+TEST test_status_all
+
+TEST $CLI volume stop ${V0}1
+
+cleanup
diff --git a/tests/bugs/cli/bug-1022905.t b/tests/bugs/cli/bug-1022905.t
new file mode 100644
index 00000000000..ee629e970d9
--- /dev/null
+++ b/tests/bugs/cli/bug-1022905.t
@@ -0,0 +1,37 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+## Create a volume
+TEST glusterd;
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1};
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Volume start
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Enable a protected and a resettable/unprotected option
+TEST $CLI volume quota $V0 enable
+TEST $CLI volume set $V0 diagnostics.client-log-level DEBUG
+
+## Reset cmd resets only unprotected option(s), succeeds.
+TEST $CLI volume reset $V0;
+
+## Set an unprotected option
+TEST $CLI volume set $V0 diagnostics.client-log-level DEBUG
+
+## Now 1 protected and 1 unprotected options are set
+## Reset force should succeed
+TEST $CLI volume reset $V0 force;
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/bugs/cli/bug-1030580.t b/tests/bugs/cli/bug-1030580.t
new file mode 100644
index 00000000000..ac8b1d8f6db
--- /dev/null
+++ b/tests/bugs/cli/bug-1030580.t
@@ -0,0 +1,53 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+function write_to_file {
+ dd of=$M0/1 if=/dev/zero bs=1024k count=128 oflag=append 2>&1 >/dev/null
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+# Increasing the json stats dump time interval, so that it doesn't mess with the test.
+TEST $CLI volume set $V0 diagnostics.stats-dump-interval 3600
+TEST $CLI volume start $V0
+TEST $CLI volume profile $V0 start
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+
+# Clear the profile info uptill now.
+TEST $CLI volume profile $V0 info clear
+
+# Verify 'volume profile info' prints both cumulative and incremental stats
+write_to_file &
+wait
+output=$($CLI volume profile $V0 info)
+EXPECT 2 cumulative_stat_count "$output"
+EXPECT 2 incremental_stat_count "$output" ' 0 '
+
+# Verify 'volume profile info incremental' prints incremental stats only
+write_to_file &
+wait
+output=$($CLI volume profile $V0 info incremental)
+EXPECT 0 cumulative_stat_count "$output"
+EXPECT 2 incremental_stat_count "$output" ' 1 '
+
+# Verify 'volume profile info cumulative' prints cumulative stats only
+write_to_file &
+wait
+output=$($CLI volume profile $V0 info cumulative)
+EXPECT 2 cumulative_stat_count "$output"
+EXPECT 0 incremental_stat_count "$output" '.*'
+
+# Verify the 'volume profile info cumulative' command above didn't alter
+# the interval id
+write_to_file &
+wait
+output=$($CLI volume profile $V0 info incremental)
+EXPECT 0 cumulative_stat_count "$output"
+EXPECT 2 incremental_stat_count "$output" ' 2 '
+
+cleanup;
diff --git a/tests/bugs/cli/bug-1047378.t b/tests/bugs/cli/bug-1047378.t
new file mode 100644
index 00000000000..33ee6be3d8b
--- /dev/null
+++ b/tests/bugs/cli/bug-1047378.t
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST "echo volume list | $CLI --xml | xmllint --format -"
+
+cleanup
diff --git a/tests/bugs/cli/bug-1047416.t b/tests/bugs/cli/bug-1047416.t
new file mode 100644
index 00000000000..864301034c9
--- /dev/null
+++ b/tests/bugs/cli/bug-1047416.t
@@ -0,0 +1,71 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+function write_to_file {
+ dd of=$M0/1 if=/dev/zero bs=1024k count=128 oflag=append 2>&1 >/dev/null
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+# Increasing the json stats dump time interval, so that it doesn't mess with the test.
+TEST $CLI volume set $V0 diagnostics.stats-dump-interval 3600
+TEST $CLI volume start $V0
+TEST $CLI volume profile $V0 start
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+
+# Clear the profile info uptill now.
+TEST $CLI volume profile $V0 info clear
+
+# Verify 'volume profile info' prints both cumulative and incremental stats
+write_to_file &
+wait
+output=$($CLI volume profile $V0 info)
+EXPECT 2 cumulative_stat_count "$output"
+EXPECT 2 incremental_stat_count "$output" ' 0 '
+
+# Verify 'volume profile info peek' prints both cumulative and incremental stats
+# without clearing incremental stats
+write_to_file &
+wait
+output=$($CLI volume profile $V0 info peek)
+EXPECT 2 cumulative_stat_count "$output"
+EXPECT 2 incremental_stat_count "$output" ' 1 '
+
+write_to_file &
+wait
+output=$($CLI volume profile $V0 info peek)
+EXPECT 2 cumulative_stat_count "$output"
+EXPECT 2 incremental_stat_count "$output" ' 1 '
+
+# Verify 'volume profile info incremental peek' prints incremental stats only
+# without clearing incremental stats
+write_to_file &
+wait
+output=$($CLI volume profile $V0 info incremental peek)
+EXPECT 0 cumulative_stat_count "$output"
+EXPECT 2 incremental_stat_count "$output" ' 1 '
+
+write_to_file &
+wait
+output=$($CLI volume profile $V0 info incremental peek)
+EXPECT 0 cumulative_stat_count "$output"
+EXPECT 2 incremental_stat_count "$output" ' 1 '
+
+# Verify 'volume profile info clear' clears both incremental and cumulative stats
+write_to_file &
+wait
+output=$($CLI volume profile $V0 info clear)
+EXPECT 2 cleared_stat_count "$output"
+
+output=$($CLI volume profile $V0 info)
+EXPECT 2 cumulative_stat_count "$output"
+EXPECT 2 incremental_stat_count "$output" ' 0 '
+EXPECT 4 data_read_count "$output" ' 0 '
+EXPECT 4 data_written_count "$output" ' 0 '
+
+cleanup;
diff --git a/tests/bugs/cli/bug-1077682.t b/tests/bugs/cli/bug-1077682.t
new file mode 100644
index 00000000000..eab5d86d04b
--- /dev/null
+++ b/tests/bugs/cli/bug-1077682.t
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2,3,4}
+TEST $CLI volume start $V0
+TEST ! $CLI volume remove-brick $V0 $H0:$B0/${V0}1
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}2 force
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}3 start
+
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" remove_brick_status_completed_field "$V0" \
+"$H0:$B0/${V0}3"
+
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}3 commit
+TEST killall glusterd
+TEST glusterd
+
+cleanup
diff --git a/tests/bugs/cli/bug-1087487.t b/tests/bugs/cli/bug-1087487.t
new file mode 100755
index 00000000000..0659ffab684
--- /dev/null
+++ b/tests/bugs/cli/bug-1087487.t
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+function rebalance_start {
+ $CLI volume rebalance $1 start | head -1;
+}
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+
+TEST $CLI volume start $V0
+
+EXPECT "volume rebalance: $V0: success: Rebalance on $V0 has \
+been started successfully. Use rebalance status command to \
+check status of the rebalance process." rebalance_start $V0
+
+cleanup;
diff --git a/tests/bugs/cli/bug-1113476.t b/tests/bugs/cli/bug-1113476.t
new file mode 100644
index 00000000000..fc7dbca537d
--- /dev/null
+++ b/tests/bugs/cli/bug-1113476.t
@@ -0,0 +1,45 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+
+function volinfo_validate ()
+{
+ local var=$1
+ $CLI volume info $V0 | grep "^$var" | sed 's/.*: //'
+}
+
+cleanup;
+
+TEST verify_lvm_version
+TEST glusterd
+TEST pidof glusterd
+TEST setup_lvm 1
+
+TEST $CLI volume create $V0 $H0:$L1
+TEST $CLI volume start $V0
+
+EXPECT '' volinfo_validate 'snap-max-hard-limit'
+EXPECT '' volinfo_validate 'snap-max-soft-limit'
+EXPECT '' volinfo_validate 'auto-delete'
+
+TEST $CLI snapshot config snap-max-hard-limit 100
+TEST $CLI snapshot config $V0 snap-max-hard-limit 50
+EXPECT '' volinfo_validate 'snap-max-hard-limit'
+EXPECT '' volinfo_validate 'snap-max-soft-limit'
+EXPECT '' volinfo_validate 'auto-delete'
+
+TEST $CLI snapshot config snap-max-soft-limit 50
+EXPECT '' volinfo_validate 'snap-max-hard-limit'
+EXPECT '' volinfo_validate 'snap-max-soft-limit'
+EXPECT '' volinfo_validate 'auto-delete'
+
+TEST $CLI snapshot config auto-delete enable
+EXPECT '' volinfo_validate 'snap-max-hard-limit'
+EXPECT '' volinfo_validate 'snap-max-soft-limit'
+EXPECT 'enable' volinfo_validate 'auto-delete'
+
+cleanup;
+
+
diff --git a/tests/bugs/cli/bug-1169302.c b/tests/bugs/cli/bug-1169302.c
new file mode 100644
index 00000000000..7c6b5fbf856
--- /dev/null
+++ b/tests/bugs/cli/bug-1169302.c
@@ -0,0 +1,79 @@
+#include <errno.h>
+#include <stdio.h>
+#include <signal.h>
+
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+int keep_running = 1;
+
+void
+stop_running(int sig)
+{
+ if (sig == SIGTERM)
+ keep_running = 0;
+}
+
+int
+main(int argc, char *argv[])
+{
+ glfs_t *fs = NULL;
+ int ret = 0;
+ glfs_fd_t *fd = NULL;
+ char *filename = NULL;
+ char *logfile = NULL;
+ char *host = NULL;
+
+ if (argc != 5) {
+ return -1;
+ }
+
+ host = argv[2];
+ logfile = argv[3];
+ filename = argv[4];
+
+ /* setup signal handler for exiting */
+ signal(SIGTERM, stop_running);
+
+ fs = glfs_new(argv[1]);
+ if (!fs) {
+ return -1;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", host, 24007);
+ if (ret < 0) {
+ return -1;
+ }
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ if (ret < 0) {
+ return -1;
+ }
+
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ return -1;
+ }
+
+ fd = glfs_creat(fs, filename, O_RDWR, 0644);
+ if (!fd) {
+ return -1;
+ }
+
+ /* sleep until SIGTERM has been received */
+ while (keep_running) {
+ sleep(1);
+ }
+
+ ret = glfs_close(fd);
+ if (ret < 0) {
+ return -1;
+ }
+
+ ret = glfs_fini(fs);
+ if (ret < 0) {
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/tests/bugs/cli/bug-1169302.t b/tests/bugs/cli/bug-1169302.t
new file mode 100755
index 00000000000..19660e033a8
--- /dev/null
+++ b/tests/bugs/cli/bug-1169302.t
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+function check_peers {
+ $CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
+}
+cleanup
+
+#setup cluster and test volume
+TEST launch_cluster 3; # start 3-node virtual cluster
+TEST $CLI_1 peer probe $H2; # peer probe server 2 from server 1 cli
+TEST $CLI_1 peer probe $H3; # peer probe server 3 from server 1 cli
+
+EXPECT_WITHIN $PROBE_TIMEOUT 2 check_peers;
+
+TEST $CLI_1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0 $H3:$B3/$V0
+TEST $CLI_1 volume start $V0
+
+# test CLI parameter acceptance
+TEST $CLI_1 volume statedump $V0
+TEST $CLI_2 volume statedump $V0
+TEST $CLI_3 volume statedump $V0
+TEST ! $CLI_1 volume statedump $V0 client $H2:0
+TEST ! $CLI_2 volume statedump $V0 client $H2:-1
+TEST $CLI_3 volume statedump $V0 client $H2:765
+TEST ! $CLI_1 volume statedump $V0 client $H2:
+TEST ! $CLI_2 volume statedump $V0 client
+TEST ! $CLI_3 volume statedump $V0 client $H2 $GFAPI_PID
+
+# build and run a gfapi appliction for triggering a statedump
+logdir=`gluster --print-logdir`
+STATEDUMP_TIMEOUT=60
+
+build_tester $(dirname $0)/bug-1169302.c -lgfapi
+$(dirname $0)/bug-1169302 $V0 $H1 $logdir/bug-1169302.log testfile & GFAPI_PID=$!
+
+cleanup_statedump
+
+# Take the statedump of the process connected to $H1, it should match the
+# hostname or IP-address with the connection from the bug-1169302 executable.
+# In our CI it seems not possible to use $H0, 'localhost', $(hostname --fqdn)
+# or even "127.0.0.1"....
+sleep 2
+host=`netstat -nap | grep $GFAPI_PID | grep 24007 | awk '{print $4}' | cut -d: -f1`
+TEST $CLI_3 volume statedump $V0 client $host:$GFAPI_PID
+EXPECT_WITHIN $STATEDUMP_TIMEOUT "Y" path_exists $statedumpdir/glusterdump.$GFAPI_PID*
+
+kill $GFAPI_PID
+
+cleanup_statedump
+cleanup_tester $(dirname $0)/bug-1169302
+cleanup \ No newline at end of file
diff --git a/tests/bugs/cli/bug-1320388.t b/tests/bugs/cli/bug-1320388.t
new file mode 100755
index 00000000000..e719fc59033
--- /dev/null
+++ b/tests/bugs/cli/bug-1320388.t
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# This test enables management ssl and then test the
+# heal info command.
+
+for d in /etc/ssl /etc/openssl /usr/local/etc/openssl ; do
+ if test -d $d ; then
+ SSL_BASE=$d
+ break
+ fi
+done
+
+SSL_KEY=$SSL_BASE/glusterfs.key
+SSL_CERT=$SSL_BASE/glusterfs.pem
+SSL_CA=$SSL_BASE/glusterfs.ca
+
+cleanup;
+rm -f $SSL_BASE/glusterfs.*
+touch "$GLUSTERD_WORKDIR"/secure-access
+
+TEST openssl genrsa -out $SSL_KEY 2048
+TEST openssl req -new -x509 -key $SSL_KEY -subj /CN=Anyone -out $SSL_CERT
+ln $SSL_CERT $SSL_CA
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
+TEST $CLI volume set $V0 disperse.eager-lock off
+TEST $CLI volume set $V0 disperse.other-eager-lock off
+TEST $CLI volume start $V0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^6$" ec_child_up_count $V0 0
+touch $M0/a
+TEST kill_brick $V0 $H0 $B0/${V0}5
+echo abc > $M0/a
+EXPECT_WITHIN $HEAL_TIMEOUT "^5$" get_pending_heal_count $V0 #One for each active brick
+$CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^6$" ec_child_up_count $V0 0
+TEST gluster volume heal $V0 info
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 #One for each active brick
+cleanup;
diff --git a/tests/bugs/cli/bug-1353156-get-state-cli-validations.t b/tests/bugs/cli/bug-1353156-get-state-cli-validations.t
new file mode 100644
index 00000000000..a4556c9c997
--- /dev/null
+++ b/tests/bugs/cli/bug-1353156-get-state-cli-validations.t
@@ -0,0 +1,147 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+. $(dirname $0)/../../traps.rc
+
+cleanup;
+
+ODIR="/var/tmp/gdstates/"
+NOEXDIR="/var/tmp/gdstatesfoo/"
+
+function get_daemon_not_supported_part {
+ echo $1
+}
+
+function get_usage_part {
+ echo $7
+}
+
+function get_directory_doesnt_exist_part {
+ echo $1
+}
+
+function get_parsing_arguments_part {
+ echo $1
+}
+
+function positive_test {
+ local text=$("$@")
+ echo $text > /dev/stderr
+ (echo -n $text | grep -qs ' state dumped to ') || return 1
+ local opath=$(echo -n $text | awk '{print $5}')
+ [ -r $opath ] || return 1
+ rm -f $opath
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST mkdir -p $ODIR
+
+push_trapfunc rm -rf $ODIR
+
+TEST $CLI volume create $V0 disperse $H0:$B0/b1 $H0:$B0/b2 $H0:$B0/b3
+TEST $CLI volume start $V0
+
+TEST setup_lvm 1
+TEST $CLI volume create $V1 $H0:$L1;
+TEST $CLI volume start $V1
+
+TEST $CLI snapshot create ${V1}_snap $V1
+
+TEST positive_test $CLI get-state
+
+TEST positive_test $CLI get-state glusterd
+
+TEST ! $CLI get-state glusterfsd;
+ERRSTR=$($CLI get-state glusterfsd 2>&1 >/dev/null);
+EXPECT 'glusterd' get_daemon_not_supported_part $ERRSTR;
+EXPECT 'Usage:' get_usage_part $ERRSTR;
+
+TEST positive_test $CLI get-state file gdstate
+
+TEST positive_test $CLI get-state glusterd file gdstate
+
+TEST ! $CLI get-state glusterfsd file gdstate;
+ERRSTR=$($CLI get-state glusterfsd file gdstate 2>&1 >/dev/null);
+EXPECT 'glusterd' get_daemon_not_supported_part $ERRSTR;
+EXPECT 'Usage:' get_usage_part $ERRSTR;
+
+TEST positive_test $CLI get-state odir $ODIR
+
+TEST positive_test $CLI get-state glusterd odir $ODIR
+
+TEST positive_test $CLI get-state odir $ODIR file gdstate
+
+TEST positive_test $CLI get-state glusterd odir $ODIR file gdstate
+
+TEST positive_test $CLI get-state detail
+
+TEST positive_test $CLI get-state glusterd detail
+
+TEST positive_test $CLI get-state odir $ODIR detail
+
+TEST positive_test $CLI get-state glusterd odir $ODIR detail
+
+TEST positive_test $CLI get-state glusterd odir $ODIR file gdstate detail
+
+TEST positive_test $CLI get-state volumeoptions
+
+TEST positive_test $CLI get-state glusterd volumeoptions
+
+TEST positive_test $CLI get-state odir $ODIR volumeoptions
+
+TEST positive_test $CLI get-state glusterd odir $ODIR volumeoptions
+
+TEST positive_test $CLI get-state glusterd odir $ODIR file gdstate volumeoptions
+
+TEST ! $CLI get-state glusterfsd odir $ODIR;
+ERRSTR=$($CLI get-state glusterfsd odir $ODIR 2>&1 >/dev/null);
+EXPECT 'glusterd' get_daemon_not_supported_part $ERRSTR;
+EXPECT 'Usage:' get_usage_part $ERRSTR;
+
+TEST ! $CLI get-state glusterfsd odir $ODIR file gdstate;
+ERRSTR=$($CLI get-state glusterfsd odir $ODIR file gdstate 2>&1 >/dev/null);
+EXPECT 'glusterd' get_daemon_not_supported_part $ERRSTR;
+EXPECT 'Usage:' get_usage_part $ERRSTR;
+
+TEST ! $CLI get-state glusterfsd odir $NOEXDIR file gdstate;
+ERRSTR=$($CLI get-state glusterfsd odir $NOEXDIR file gdstate 2>&1 >/dev/null);
+EXPECT 'glusterd' get_daemon_not_supported_part $ERRSTR;
+EXPECT 'Usage:' get_usage_part $ERRSTR;
+
+TEST ! $CLI get-state odir $NOEXDIR;
+ERRSTR=$($CLI get-state odir $NOEXDIR 2>&1 >/dev/null);
+EXPECT 'Failed' get_directory_doesnt_exist_part $ERRSTR;
+
+TEST ! $CLI get-state odir $NOEXDIR file gdstate;
+ERRSTR=$($CLI get-state odir $NOEXDIR 2>&1 >/dev/null);
+EXPECT 'Failed' get_directory_doesnt_exist_part $ERRSTR;
+
+TEST ! $CLI get-state foo bar;
+ERRSTR=$($CLI get-state foo bar 2>&1 >/dev/null);
+EXPECT 'glusterd' get_daemon_not_supported_part $ERRSTR;
+EXPECT 'Usage:' get_usage_part $ERRSTR;
+
+TEST ! $CLI get-state glusterd foo bar;
+ERRSTR=$($CLI get-state glusterd foo bar 2>&1 >/dev/null);
+EXPECT 'Problem' get_parsing_arguments_part $ERRSTR;
+
+TEST ! $CLI get-state glusterd detail file gdstate;
+ERRSTR=$($CLI get-state glusterd foo bar 2>&1 >/dev/null);
+EXPECT 'Problem' get_parsing_arguments_part $ERRSTR;
+
+TEST ! $CLI get-state glusterd foo bar detail;
+ERRSTR=$($CLI get-state glusterd foo bar 2>&1 >/dev/null);
+EXPECT 'Problem' get_parsing_arguments_part $ERRSTR;
+
+TEST ! $CLI get-state glusterd volumeoptions file gdstate;
+ERRSTR=$($CLI get-state glusterd foo bar 2>&1 >/dev/null);
+EXPECT 'Problem' get_parsing_arguments_part $ERRSTR;
+
+TEST ! $CLI get-state glusterd foo bar volumeoptions;
+ERRSTR=$($CLI get-state glusterd foo bar 2>&1 >/dev/null);
+EXPECT 'Problem' get_parsing_arguments_part $ERRSTR;
+
+cleanup;
diff --git a/tests/bugs/cli/bug-1378842-volume-get-all.t b/tests/bugs/cli/bug-1378842-volume-get-all.t
new file mode 100644
index 00000000000..be41f25b000
--- /dev/null
+++ b/tests/bugs/cli/bug-1378842-volume-get-all.t
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume set all server-quorum-ratio 80
+
+# Execute volume get without having an explicit option, this should fail
+TEST ! $CLI volume get all
+
+# Execute volume get with an explicit global option
+TEST $CLI volume get all server-quorum-ratio
+EXPECT '80' volume_get_field all 'cluster.server-quorum-ratio'
+
+# Execute volume get with 'all'
+TEST $CLI volume get all all
+
+cleanup;
+
diff --git a/tests/bugs/cli/bug-764638.t b/tests/bugs/cli/bug-764638.t
new file mode 100644
index 00000000000..ffc613409d6
--- /dev/null
+++ b/tests/bugs/cli/bug-764638.t
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI pool list;
+TEST $CLI pool list --xml;
+
+cleanup;
diff --git a/tests/bugs/cli/bug-822830.t b/tests/bugs/cli/bug-822830.t
new file mode 100755
index 00000000000..a9904854110
--- /dev/null
+++ b/tests/bugs/cli/bug-822830.t
@@ -0,0 +1,64 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6};
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Setting nfs.rpc-auth-reject as 192.{}.1.2
+TEST ! $CLI volume set $V0 nfs.rpc-auth-reject 192.{}.1.2
+EXPECT '' volinfo_field $V0 'nfs.rpc-auth-reject';
+
+# Setting nfs.rpc-auth-allow as a.a.
+TEST ! $CLI volume set $V0 nfs.rpc-auth-allow a.a.
+EXPECT '' volinfo_field $V0 'nfs.rpc-auth-allow';
+
+## Setting nfs.rpc-auth-reject as 192.*..*
+TEST $CLI volume set $V0 nfs.rpc-auth-reject 192.*..*
+EXPECT '192.*..*' volinfo_field $V0 'nfs.rpc-auth-reject';
+
+# Setting nfs.rpc-auth-allow as a.a
+TEST $CLI volume set $V0 nfs.rpc-auth-allow a.a
+EXPECT 'a.a' volinfo_field $V0 'nfs.rpc-auth-allow';
+
+# Setting nfs.rpc-auth-allow as *.redhat.com
+TEST $CLI volume set $V0 nfs.rpc-auth-allow *.redhat.com
+EXPECT '\*.redhat.com' volinfo_field $V0 'nfs.rpc-auth-allow';
+
+# Setting nfs.rpc-auth-allow as 192.168.10.[1-5]
+TEST $CLI volume set $V0 nfs.rpc-auth-allow 192.168.10.[1-5]
+EXPECT '192.168.10.\[1-5]' volinfo_field $V0 'nfs.rpc-auth-allow';
+
+# Setting nfs.rpc-auth-allow as 192.168.70.?
+TEST $CLI volume set $V0 nfs.rpc-auth-allow 192.168.70.?
+EXPECT '192.168.70.?' volinfo_field $V0 'nfs.rpc-auth-allow';
+
+# Setting nfs.rpc-auth-reject as 192.168.10.5/16
+TEST $CLI volume set $V0 nfs.rpc-auth-reject 192.168.10.5/16
+EXPECT '192.168.10.5/16' volinfo_field $V0 'nfs.rpc-auth-reject';
+
+## Setting nfs.rpc-auth-reject as 192.*.*
+TEST $CLI volume set $V0 nfs.rpc-auth-reject 192.*.*
+EXPECT '192.*.*' volinfo_field $V0 'nfs.rpc-auth-reject';
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/bugs/cli/bug-867252.t b/tests/bugs/cli/bug-867252.t
new file mode 100644
index 00000000000..ccc33d82a0f
--- /dev/null
+++ b/tests/bugs/cli/bug-867252.t
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}1;
+
+
+function volinfo_field()
+{
+ local vol=$1;
+ local field=$2;
+
+ $CLI volume info $vol | grep "^$field: " | sed 's/.*: //';
+}
+
+
+function brick_count()
+{
+ local vol=$1;
+
+ $CLI volume info $vol | egrep "^Brick[0-9]+: " | wc -l;
+}
+
+
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT '1' brick_count $V0
+
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}2;
+EXPECT '2' brick_count $V0
+
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}2 force;
+EXPECT '1' brick_count $V0
+
+cleanup;
diff --git a/tests/bugs/cli/bug-921215.t b/tests/bugs/cli/bug-921215.t
new file mode 100755
index 00000000000..02532562cff
--- /dev/null
+++ b/tests/bugs/cli/bug-921215.t
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+# This is test case for bug no 921215 "Can not create volume with a . in the name"
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST ! $CLI volume create $V0.temp replica 2 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+
+cleanup;
diff --git a/tests/bugs/cli/bug-949298.t b/tests/bugs/cli/bug-949298.t
new file mode 100644
index 00000000000..e0692f0c157
--- /dev/null
+++ b/tests/bugs/cli/bug-949298.t
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI --xml volume info $V0
+
+cleanup;
diff --git a/tests/bugs/cli/bug-961307.t b/tests/bugs/cli/bug-961307.t
new file mode 100644
index 00000000000..602a6e34bce
--- /dev/null
+++ b/tests/bugs/cli/bug-961307.t
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+REPLICA=2
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica $REPLICA $H0:$B0/${V0}-00 $H0:$B0/${V0}-01 $H0:$B0/${V0}-10 $H0:$B0/${V0}-11
+TEST $CLI volume start $V0
+
+var1=$($CLI volume remove-brick $H0:$B0/${V0}-00 $H0:$B0/${V0}-01 start 2>&1)
+var2="volume remove-brick start: failed: Volume $H0:$B0/${V0}-00 does not exist"
+
+EXPECT "$var2" echo "$var1"
+cleanup;
diff --git a/tests/bugs/cli/bug-969193.t b/tests/bugs/cli/bug-969193.t
new file mode 100755
index 00000000000..dd6d7cdf100
--- /dev/null
+++ b/tests/bugs/cli/bug-969193.t
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+# Test that "system getspec" works without op_version problems.
+
+. $(dirname $0)/../../include.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+TEST $CLI volume create $V0 $H0:$B0/brick1
+TEST $CLI system getspec $V0
+cleanup;
diff --git a/tests/bugs/cli/bug-977246.t b/tests/bugs/cli/bug-977246.t
new file mode 100644
index 00000000000..bb8d6328e8b
--- /dev/null
+++ b/tests/bugs/cli/bug-977246.t
@@ -0,0 +1,21 @@
+#! /bin/bash
+
+# This test checks if address validation, correctly catches hostnames
+# with consective dots, such as 'example..org', as invalid
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}1
+TEST $CLI volume info $V0
+TEST $CLI volume start $V0
+
+TEST ! $CLI volume set $V0 auth.allow example..org
+
+TEST $CLI volume stop $V0
+
+cleanup;
diff --git a/tests/bugs/cli/bug-982174.t b/tests/bugs/cli/bug-982174.t
new file mode 100644
index 00000000000..067e5b97c17
--- /dev/null
+++ b/tests/bugs/cli/bug-982174.t
@@ -0,0 +1,36 @@
+#!/bin/bash
+# Test to check
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+#Check if incorrect log-level keywords does not crash the CLI
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/brick1 $H0:$B0/brick2
+TEST $CLI volume start $V0
+
+function set_log_level_status {
+ local level=$1
+ $CLI volume set $V0 diagnostics.client-log-level $level 2>&1 |grep -oE 'success|failed'
+}
+
+
+LOG_LEVEL="trace"
+EXPECT "failed" set_log_level_status $LOG_LEVEL
+
+
+LOG_LEVEL="error-gen"
+EXPECT "failed" set_log_level_status $LOG_LEVEL
+
+
+LOG_LEVEL="TRACE"
+EXPECT "success" set_log_level_status $LOG_LEVEL
+
+EXPECT "$LOG_LEVEL" echo `$CLI volume info | grep diagnostics | awk '{print $2}'`
+
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/bugs/cli/bug-983317-volume-get.t b/tests/bugs/cli/bug-983317-volume-get.t
new file mode 100644
index 00000000000..c793bbc9f0c
--- /dev/null
+++ b/tests/bugs/cli/bug-983317-volume-get.t
@@ -0,0 +1,45 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+# Set a volume option
+TEST $CLI volume set $V0 open-behind on
+TEST $CLI volume start $V0
+
+TEST $CLI volume set all server-quorum-ratio 80
+
+TEST $CLI volume set $V0 user.metadata 'dummy'
+
+# Execute volume get without having an explicit option, this should fail
+TEST ! $CLI volume get $V0
+
+# Execute volume get with an explicit option
+TEST $CLI volume get $V0 open-behind
+EXPECT 'on' volume_get_field $V0 'open-behind'
+
+# Execute volume get with 'all"
+TEST $CLI volume get $V0 all
+
+# Check if volume get can display correct global options values as well
+EXPECT '80' volume_get_field $V0 'server-quorum-ratio'
+
+# Check user.* options can also be retrived using volume get
+EXPECT 'dummy' volume_get_field $V0 'user.metadata'
+
+TEST $CLI volume set all brick-multiplex enable
+EXPECT 'enable' volume_get_field $V0 'brick-multiplex'
+
+TEST $CLI volume set all brick-multiplex disable
+EXPECT 'disable' volume_get_field $V0 'brick-multiplex'
+
+#setting an cluster level option for single volume should fail
+TEST ! $CLI volume set $V0 brick-multiplex enable
+
diff --git a/tests/bugs/core/949327.t b/tests/bugs/core/949327.t
new file mode 100644
index 00000000000..6b8033a5c85
--- /dev/null
+++ b/tests/bugs/core/949327.t
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+function tmp_file_count()
+{
+ echo $(ls -lh /tmp/tmp.* 2>/dev/null | wc -l)
+}
+
+
+old_count=$(tmp_file_count);
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+TEST $CLI volume start $V0
+new_count=$(tmp_file_count);
+
+TEST [ "$old_count" -eq "$new_count" ]
+
+cleanup
diff --git a/tests/bugs/core/brick-mux-fd-cleanup.t b/tests/bugs/core/brick-mux-fd-cleanup.t
new file mode 100644
index 00000000000..de11c177b8a
--- /dev/null
+++ b/tests/bugs/core/brick-mux-fd-cleanup.t
@@ -0,0 +1,78 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+#This .t tests that the fds from client are closed on brick when gluster volume
+#stop is executed in brick-mux setup.
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+
+function keep_fd_open {
+#This function has to be run as background job because opening the fd in
+#foreground and running commands is leading to flush calls on these fds
+#which is making it very difficult to create the race where fds will be left
+#open even after the brick dies.
+ exec 5>$M1/a
+ exec 6>$M1/b
+ while [ -f $M0/a ]; do sleep 1; done
+}
+
+function count_open_files {
+ local brick_pid="$1"
+ local pattern="$2"
+ ls -l /proc/$brick_pid/fd | grep -i "$pattern" | wc -l
+}
+
+TEST $CLI volume set all cluster.brick-multiplex on
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume create $V1 replica 2 $H0:$B0/${V1}{2,3}
+#Have same configuration on both bricks so that they are multiplexed
+#Delay flush fop for a second
+TEST $CLI volume heal $V0 disable
+TEST $CLI volume heal $V1 disable
+TEST $CLI volume set $V0 delay-gen posix
+TEST $CLI volume set $V0 delay-gen.enable flush
+TEST $CLI volume set $V0 delay-gen.delay-percentage 100
+TEST $CLI volume set $V0 delay-gen.delay-duration 1000000
+TEST $CLI volume set $V1 delay-gen posix
+TEST $CLI volume set $V1 delay-gen.enable flush
+TEST $CLI volume set $V1 delay-gen.delay-percentage 100
+TEST $CLI volume set $V1 delay-gen.delay-duration 1000000
+
+TEST $CLI volume start $V0
+TEST $CLI volume start $V1
+
+TEST $GFS -s $H0 --volfile-id=$V0 --direct-io-mode=enable $M0
+TEST $GFS -s $H0 --volfile-id=$V1 --direct-io-mode=enable $M1
+
+TEST touch $M0/a
+keep_fd_open &
+TEST $CLI volume profile $V1 start
+brick_pid=$(get_brick_pid $V1 $H0 $B0/${V1}2)
+TEST count_open_files $brick_pid "$B0/${V1}2/a"
+TEST count_open_files $brick_pid "$B0/${V1}2/b"
+TEST count_open_files $brick_pid "$B0/${V1}3/a"
+TEST count_open_files $brick_pid "$B0/${V1}3/b"
+
+#If any other flush fops are introduced into the system other than the one at
+#cleanup it interferes with the race, so test for it
+EXPECT "^0$" echo "$($CLI volume profile $V1 info incremental | grep -i flush | wc -l)"
+#Stop the volume
+TEST $CLI volume stop $V1
+
+#Wait for cleanup resources or volume V1
+EXPECT_WITHIN $GRAPH_SWITCH_TIMEOUT "^0$" count_open_files $brick_pid "$B0/${V1}2/a"
+EXPECT_WITHIN $GRAPH_SWITCH_TIMEOUT "^0$" count_open_files $brick_pid "$B0/${V1}2/b"
+EXPECT_WITHIN $GRAPH_SWITCH_TIMEOUT "^0$" count_open_files $brick_pid "$B0/${V1}3/a"
+EXPECT_WITHIN $GRAPH_SWITCH_TIMEOUT "^0$" count_open_files $brick_pid "$B0/${V1}3/b"
+
+TEST rm -f $M0/a #Exit keep_fd_open()
+wait
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M1
+
+cleanup
diff --git a/tests/bugs/core/bug-1110917.t b/tests/bugs/core/bug-1110917.t
new file mode 100644
index 00000000000..c4b04fbf2c7
--- /dev/null
+++ b/tests/bugs/core/bug-1110917.t
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/brick1 $H0:$B0/brick2;
+TEST $CLI volume start $V0;
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+
+TEST $CLI volume set $V0 changelog on
+TEST $CLI volume set $V0 changelog.fsync-interval 1
+
+# perform I/O on the background
+f=$(basename `mktemp -t ${0##*/}.XXXXXX`)
+dd if=/dev/urandom of=$M0/$f count=100000 bs=4k &
+
+# this is the best we can do without inducing _error points_ in the code
+# without the patch reconfigre() would hang...
+TEST $CLI volume set $V0 changelog.rollover-time `expr $((RANDOM % 9)) + 1`
+TEST $CLI volume set $V0 changelog.rollover-time `expr $((RANDOM % 9)) + 1`
+
+TEST $CLI volume set $V0 changelog off
+TEST $CLI volume set $V0 changelog on
+TEST $CLI volume set $V0 changelog off
+TEST $CLI volume set $V0 changelog on
+
+TEST $CLI volume set $V0 changelog.rollover-time `expr $((RANDOM % 9)) + 1`
+TEST $CLI volume set $V0 changelog.rollover-time `expr $((RANDOM % 9)) + 1`
+
+# if there's a deadlock, this would hang
+wait;
+
+cleanup;
diff --git a/tests/bugs/core/bug-1111557.t b/tests/bugs/core/bug-1111557.t
new file mode 100644
index 00000000000..4ed45761bce
--- /dev/null
+++ b/tests/bugs/core/bug-1111557.t
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0}
+TEST $CLI volume set $V0 diagnostics.brick-log-buf-size 0
+TEST ! $CLI volume set $V0 diagnostics.brick-log-buf-size -0
+cleanup
diff --git a/tests/bugs/core/bug-1117951.t b/tests/bugs/core/bug-1117951.t
new file mode 100644
index 00000000000..b484fee2fe4
--- /dev/null
+++ b/tests/bugs/core/bug-1117951.t
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/brick
+EXPECT 'Created' volinfo_field $V0 'Status';
+TEST $CLI volume start $V0
+
+# Running with a locale not using '.' as decimal separator should work
+export LC_NUMERIC=sv_SE.utf8
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+# As should a locale using '.' as a decimal separator
+export LC_NUMERIC=C
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+cleanup
diff --git a/tests/bugs/core/bug-1119582.t b/tests/bugs/core/bug-1119582.t
new file mode 100644
index 00000000000..c30057c2b2c
--- /dev/null
+++ b/tests/bugs/core/bug-1119582.t
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fileio.rc
+. $(dirname $0)/../../nfs.rc
+
+cleanup;
+
+TEST glusterd;
+
+TEST pidof glusterd;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+
+TEST $CLI volume set $V0 features.uss disable;
+
+TEST killall glusterd;
+
+rm -f $GLUSTERD_WORKDIR/vols/$V0/snapd.info
+
+TEST glusterd
+
+cleanup ;
diff --git a/tests/bugs/core/bug-1135514-allow-setxattr-with-null-value.t b/tests/bugs/core/bug-1135514-allow-setxattr-with-null-value.t
new file mode 100644
index 00000000000..d26aa561321
--- /dev/null
+++ b/tests/bugs/core/bug-1135514-allow-setxattr-with-null-value.t
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+#Test
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}0
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+TEST touch $M0/file
+TEST setfattr -n user.attribute1 $M0/file
+TEST getfattr -n user.attribute1 $M0/file
+cleanup
+
diff --git a/tests/bugs/core/bug-1168803-snapd-option-validation-fix.t b/tests/bugs/core/bug-1168803-snapd-option-validation-fix.t
new file mode 100755
index 00000000000..89b015d6374
--- /dev/null
+++ b/tests/bugs/core/bug-1168803-snapd-option-validation-fix.t
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+## Test case for BZ-1168803 - snapd option validation should not fail if the
+#snapd is not running
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+## Start glusterd
+TEST glusterd;
+TEST pidof glusterd;
+
+## create volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+TEST $CLI volume set $V0 features.uss enable
+
+## Now set another volume option, this should not fail
+TEST $CLI volume set $V0 performance.io-cache off
+
+## start the volume
+TEST $CLI volume start $V0
+
+## Kill snapd daemon and then try to stop the volume which should not fail
+kill $(ps aux | grep glusterfsd | grep snapd | awk '{print $2}')
+
+TEST $CLI volume stop $V0
+
+cleanup;
diff --git a/tests/bugs/core/bug-1402841.t-mt-dir-scan-race.t b/tests/bugs/core/bug-1402841.t-mt-dir-scan-race.t
new file mode 100755
index 00000000000..a1b9a851bf7
--- /dev/null
+++ b/tests/bugs/core/bug-1402841.t-mt-dir-scan-race.t
@@ -0,0 +1,42 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+FILE_COUNT=500
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 cluster.shd-wait-qlength 100
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
+for i in `seq 1 $FILE_COUNT`; do touch $M0/file$i; done
+TEST kill_brick $V0 $H0 $B0/${V0}1
+for i in `seq 1 $FILE_COUNT`; do echo hello>$M0/file$i; chmod -x $M0/file$i; done
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+EXPECT "$FILE_COUNT" get_pending_heal_count $V0
+TEST $CLI volume set $V0 self-heal-daemon on
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+
+TEST $CLI volume heal $V0
+TEST $CLI volume set $V0 self-heal-daemon off
+EXPECT_NOT "^0$" get_pending_heal_count $V0
+TEST $CLI volume set $V0 self-heal-daemon on
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+TEST umount $M0
+cleanup;
diff --git a/tests/bugs/core/bug-1421721-mpx-toggle.t b/tests/bugs/core/bug-1421721-mpx-toggle.t
new file mode 100644
index 00000000000..231be5b81a0
--- /dev/null
+++ b/tests/bugs/core/bug-1421721-mpx-toggle.t
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+write_a_file () {
+ echo $1 > $2
+}
+
+TEST glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}[0,1]
+
+TEST $CLI volume set all cluster.brick-multiplex on
+TEST $CLI volume start $V0
+
+TEST $GFS -s $H0 --volfile-id=$V0 $M0
+TEST write_a_file "hello" $M0/a_file
+
+TEST force_umount $M0
+TEST $CLI volume stop $V0
+
+TEST $CLI volume set all cluster.brick-multiplex off
+TEST $CLI volume start $V0
+
+cleanup
diff --git a/tests/bugs/core/bug-1432542-mpx-restart-crash.t b/tests/bugs/core/bug-1432542-mpx-restart-crash.t
new file mode 100644
index 00000000000..2793d7008e1
--- /dev/null
+++ b/tests/bugs/core/bug-1432542-mpx-restart-crash.t
@@ -0,0 +1,116 @@
+#!/bin/bash
+
+SCRIPT_TIMEOUT=800
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../traps.rc
+
+cleanup;
+
+NUM_VOLS=15
+MOUNT_BASE=$(dirname $M0)
+
+# GlusterD reports that bricks are started when in fact their attach requests
+# might still need to be retried. That's a bit of a hack, but there's no
+# feasible way to wait at that point (in attach_brick) and the rest of the
+# code is unprepared to deal with transient errors so the whole "brick start"
+# would fail. Meanwhile, glusterfsd can only handle attach requests at a
+# rather slow rate. After GlusterD tries to start a couple of hundred bricks,
+# glusterfsd can fall behind and we start getting mount failures. Arguably,
+# those are spurious because we will eventually catch up. We're just not
+# ready *yet*. More to the point, even if the errors aren't spurious that's
+# not what we're testing right now. Therefore, we give glusterfsd a bit more
+# breathing room for this test than we would otherwise.
+MOUNT_TIMEOUT=15
+
+get_brick_base () {
+ printf "%s/vol%02d" $B0 $1
+}
+
+get_mount_point () {
+ printf "%s/vol%02d" $MOUNT_BASE $1
+}
+
+function count_up_bricks {
+ vol=$1;
+ $CLI --xml volume status $vol | grep '<status>1' | wc -l
+}
+
+create_volume () {
+
+ local vol_name=$(printf "%s-vol%02d" $V0 $1)
+
+ local brick_base=$(get_brick_base $1)
+ local cmd="$CLI volume create $vol_name replica 3"
+ local b
+ for b in $(seq 0 5); do
+ local this_brick=${brick_base}/brick$b
+ mkdir -p $this_brick
+ cmd="$cmd $H0:$this_brick"
+ done
+ TEST $cmd
+ TEST $CLI volume start $vol_name
+ # check for 6 bricks and 1 shd daemon to be up and running
+ EXPECT_WITHIN 120 7 count_up_bricks $vol_name
+ local mount_point=$(get_mount_point $1)
+ mkdir -p $mount_point
+ TEST $GFS -s $H0 --volfile-id=$vol_name $mount_point
+}
+
+cleanup_func () {
+ local v
+ for v in $(seq 1 $NUM_VOLS); do
+ local mount_point=$(get_mount_point $v)
+ force_umount $mount_point
+ rm -rf $mount_point
+ local vol_name=$(printf "%s-vol%02d" $V0 $v)
+ $CLI volume stop $vol_name
+ $CLI volume delete $vol_name
+ rm -rf $(get_brick_base $1) &
+ done &> /dev/null
+ wait
+}
+push_trapfunc cleanup_func
+
+TEST glusterd
+TEST $CLI volume set all cluster.brick-multiplex on
+
+# Our infrastructure can't handle an arithmetic expression here. The formula
+# is (NUM_VOLS-1)*5 because it sees each TEST/EXPECT once but needs the other
+# NUM_VOLS-1 and there are 5 such statements in each iteration.
+TESTS_EXPECTED_IN_LOOP=84
+for i in $(seq 1 $NUM_VOLS); do
+ starttime="$(date +%s)";
+
+ create_volume $i
+ TEST dd if=/dev/zero of=$(get_mount_point $i)/a_file bs=4k count=1
+ # Unmounting to reduce memory footprint on regression hosts
+ mnt_point=$(get_mount_point $i)
+ EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $mnt_point
+ endtime=$(expr $(date +%s) - $starttime)
+
+ echo "Memory Used after $i volumes : $(pmap -x $(pgrep glusterfsd) | grep total)"
+ echo "Thread Count after $i volumes: $(ps -T -p $(pgrep glusterfsd) | wc -l)"
+ echo "Time taken : ${endtime} seconds"
+done
+
+echo "=========="
+echo "List of all the threads in the Brick process"
+ps -T -p $(pgrep glusterfsd)
+echo "=========="
+
+# Kill glusterd, and wait a bit for all traces to disappear.
+TEST killall -9 glusterd
+sleep 5
+TEST killall -9 glusterfsd
+sleep 5
+
+# Restart glusterd. This is where the brick daemon supposedly dumps core,
+# though I (jdarcy) have yet to see that. Again, give it a while to settle,
+# just to be sure.
+TEST glusterd
+
+cleanup_func
+trap - EXIT
+cleanup
diff --git a/tests/bugs/core/bug-1650403.t b/tests/bugs/core/bug-1650403.t
new file mode 100644
index 00000000000..43d09bc8bd9
--- /dev/null
+++ b/tests/bugs/core/bug-1650403.t
@@ -0,0 +1,113 @@
+#!/bin/bash
+
+SCRIPT_TIMEOUT=500
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../traps.rc
+
+cleanup;
+
+NUM_VOLS=5
+MOUNT_BASE=$(dirname $M0)
+
+# GlusterD reports that bricks are started when in fact their attach requests
+# might still need to be retried. That's a bit of a hack, but there's no
+# feasible way to wait at that point (in attach_brick) and the rest of the
+# code is unprepared to deal with transient errors so the whole "brick start"
+# would fail. Meanwhile, glusterfsd can only handle attach requests at a
+# rather slow rate. After GlusterD tries to start a couple of hundred bricks,
+# glusterfsd can fall behind and we start getting mount failures. Arguably,
+# those are spurious because we will eventually catch up. We're just not
+# ready *yet*. More to the point, even if the errors aren't spurious that's
+# not what we're testing right now. Therefore, we give glusterfsd a bit more
+# breathing room for this test than we would otherwise.
+MOUNT_TIMEOUT=15
+
+get_brick_base () {
+ printf "%s/vol%02d" $B0 $1
+}
+
+get_mount_point () {
+ printf "%s/vol%02d" $MOUNT_BASE $1
+}
+
+function count_up_bricks {
+ vol=$1;
+ $CLI --xml volume status $vol | grep '<status>1' | wc -l
+}
+
+create_volume () {
+
+ local vol_name=$(printf "%s-vol%02d" $V0 $1)
+
+ local brick_base=$(get_brick_base $1)
+ local cmd="$CLI volume create $vol_name replica 3"
+ local b
+ for b in $(seq 0 5); do
+ local this_brick=${brick_base}/brick$b
+ mkdir -p $this_brick
+ cmd="$cmd $H0:$this_brick"
+ done
+ TEST $cmd
+ TEST $CLI volume start $vol_name
+ # check for 6 bricks and 1 shd daemon to be up and running
+ EXPECT_WITHIN 120 7 count_up_bricks $vol_name
+ local mount_point=$(get_mount_point $1)
+ mkdir -p $mount_point
+ TEST $GFS -s $H0 --volfile-id=$vol_name $mount_point
+}
+
+cleanup_func () {
+ local v
+ for v in $(seq 1 $NUM_VOLS); do
+ local mount_point=$(get_mount_point $v)
+ force_umount $mount_point
+ rm -rf $mount_point
+ local vol_name=$(printf "%s-vol%02d" $V0 $v)
+ $CLI volume stop $vol_name
+ $CLI volume delete $vol_name
+ rm -rf $(get_brick_base $1) &
+ done &> /dev/null
+ wait
+}
+push_trapfunc cleanup_func
+
+TEST glusterd
+TEST $CLI volume set all cluster.brick-multiplex on
+
+# Our infrastructure can't handle an arithmetic expression here. The formula
+# is (NUM_VOLS-1)*5 because it sees each TEST/EXPECT once but needs the other
+# NUM_VOLS-1 and there are 5 such statements in each iteration.
+TESTS_EXPECTED_IN_LOOP=24
+for i in $(seq 1 $NUM_VOLS); do
+ create_volume $i
+ TEST dd if=/dev/zero of=$(get_mount_point $i)/a_file bs=4k count=1
+ # Unmounting to reduce memory footprint on regression hosts
+ mnt_point=$(get_mount_point $i)
+ EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $mnt_point
+done
+
+glustershd_pid=`ps auxwww | grep glustershd | grep -v grep | awk -F " " '{print $2}'`
+TEST [ $glustershd_pid != 0 ]
+start=`pmap -x $glustershd_pid | grep total | awk -F " " '{print $4}'`
+echo "Memory consumption for glustershd process"
+for i in $(seq 1 50); do
+ pmap -x $glustershd_pid | grep total
+ for j in $(seq 1 $NUM_VOLS); do
+ vol_name=$(printf "%s-vol%02d" $V0 $j)
+ gluster v set $vol_name cluster.self-heal-daemon off > /dev/null
+ gluster v set $vol_name cluster.self-heal-daemon on > /dev/null
+ done
+done
+
+end=`pmap -x $glustershd_pid | grep total | awk -F " " '{print $4}'`
+diff=$((end-start))
+
+# If memory consumption is more than 10M it means some leak in reconfigure
+# code path
+
+TEST [ $diff -lt 10000 ]
+
+trap - EXIT
+cleanup
diff --git a/tests/bugs/core/bug-1699025-brick-mux-detach-brick-fd-issue.t b/tests/bugs/core/bug-1699025-brick-mux-detach-brick-fd-issue.t
new file mode 100644
index 00000000000..1acbaa8dc0b
--- /dev/null
+++ b/tests/bugs/core/bug-1699025-brick-mux-detach-brick-fd-issue.t
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+function count_brick_processes {
+ pgrep glusterfsd | wc -l
+}
+
+cleanup
+
+#bug-1444596 - validating brick mux
+
+TEST glusterd
+TEST $CLI volume create $V0 $H0:$B0/brick{0,1}
+TEST $CLI volume create $V1 $H0:$B0/brick{2,3}
+
+TEST $CLI volume set all cluster.brick-multiplex on
+
+TEST $CLI volume start $V0
+TEST $CLI volume start $V1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 4 online_brick_count
+EXPECT 1 count_brick_processes
+
+TEST $CLI volume stop $V1
+# At the time initialize brick daemon it always keeps open
+# standard fd's (0, 1 , 2) so after stop 1 volume fd's should
+# be open
+nofds=$(ls -lrth /proc/`pgrep glusterfsd`/fd | grep dev/null | wc -l)
+TEST [ $((nofds)) -eq 3 ]
+
+cleanup
diff --git a/tests/bugs/core/bug-834465.c b/tests/bugs/core/bug-834465.c
new file mode 100644
index 00000000000..33dd270b112
--- /dev/null
+++ b/tests/bugs/core/bug-834465.c
@@ -0,0 +1,60 @@
+#include <sys/file.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+int
+main(int argc, char *argv[])
+{
+ int fd = -1;
+ char *filename = NULL;
+ struct flock lock = {
+ 0,
+ };
+ int i = 0;
+ int ret = -1;
+
+ if (argc != 2) {
+ fprintf(stderr, "Usage: %s <filename> ", argv[0]);
+ goto out;
+ }
+
+ filename = argv[1];
+
+ fd = open(filename, O_RDWR | O_CREAT, 0);
+ if (fd < 0) {
+ fprintf(stderr, "open (%s) failed (%s)\n", filename, strerror(errno));
+ goto out;
+ }
+
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 1;
+ lock.l_len = 1;
+
+ while (i < 100) {
+ lock.l_type = F_WRLCK;
+ ret = fcntl(fd, F_SETLK, &lock);
+ if (ret < 0) {
+ fprintf(stderr, "fcntl setlk failed (%s)\n", strerror(errno));
+ goto out;
+ }
+
+ lock.l_type = F_UNLCK;
+ ret = fcntl(fd, F_SETLK, &lock);
+ if (ret < 0) {
+ fprintf(stderr, "fcntl setlk failed (%s)\n", strerror(errno));
+ goto out;
+ }
+
+ i++;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
diff --git a/tests/bugs/core/bug-834465.t b/tests/bugs/core/bug-834465.t
new file mode 100755
index 00000000000..996248d4116
--- /dev/null
+++ b/tests/bugs/core/bug-834465.t
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/brick1 $H0:$B0/brick2;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+MOUNTDIR=$M0;
+
+#memory-accounting is enabled by default
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $MOUNTDIR;
+
+sdump1=$(generate_mount_statedump $V0);
+nalloc1=0
+grep -A3 "fuse - usage-type gf_common_mt_fd_lk_ctx_node_t" $sdump1
+if [ $? -eq '0' ]
+then
+ nalloc1=`grep -A3 "fuse - usage-type gf_common_mt_fd_lk_ctx_node_t" $sdump1 | grep -E "^num_allocs" | cut -d '=' -f2`
+fi
+
+build_tester $(dirname $0)/bug-834465.c
+
+TEST $(dirname $0)/bug-834465 $M0/testfile
+
+sdump2=$(generate_mount_statedump $V0);
+nalloc2=0
+grep -A3 "fuse - usage-type gf_common_mt_fd_lk_ctx_node_t" $sdump2
+if [ $? -eq '0' ]
+then
+ nalloc2=`grep -A3 "fuse - usage-type gf_common_mt_fd_lk_ctx_node_t" $sdump2 | grep -E "^num_allocs" | cut -d '=' -f2`
+fi
+
+TEST [ $nalloc1 -eq $nalloc2 ];
+
+TEST rm -rf $MOUNTDIR/*
+TEST rm -rf $(dirname $0)/bug-834465
+cleanup_mount_statedump $V0
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $MOUNTDIR
+
+cleanup;
diff --git a/tests/bugs/core/bug-845213.t b/tests/bugs/core/bug-845213.t
new file mode 100644
index 00000000000..136e4126d14
--- /dev/null
+++ b/tests/bugs/core/bug-845213.t
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+## Create and start a volume with aio enabled
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+TEST $CLI volume set $V0 remote-dio enable;
+TEST $CLI volume set $V0 network.remote-dio disable;
+
+cleanup;
+
diff --git a/tests/bugs/core/bug-903336.t b/tests/bugs/core/bug-903336.t
new file mode 100644
index 00000000000..b52c1a4758e
--- /dev/null
+++ b/tests/bugs/core/bug-903336.t
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+TEST setfattr -n trusted.io-stats-dump -v /tmp $M0
+cleanup
diff --git a/tests/bugs/core/bug-908146.t b/tests/bugs/core/bug-908146.t
new file mode 100755
index 00000000000..327be6e54bc
--- /dev/null
+++ b/tests/bugs/core/bug-908146.t
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fileio.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}0
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 performance.flush-behind off
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 --direct-io-mode=enable
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M1 --attribute-timeout=0 --entry-timeout=0 --direct-io-mode=enable
+
+TEST touch $M0/a
+
+exec 4>"$M0/a"
+exec 5>"$M1/a"
+EXPECT "2" get_fd_count $V0 $H0 $B0/${V0}0 a
+
+exec 4>&-
+EXPECT "1" get_fd_count $V0 $H0 $B0/${V0}0 a
+
+exec 5>&-
+EXPECT "0" get_fd_count $V0 $H0 $B0/${V0}0 a
+
+cleanup
diff --git a/tests/bugs/core/bug-913544.t b/tests/bugs/core/bug-913544.t
new file mode 100644
index 00000000000..af421722590
--- /dev/null
+++ b/tests/bugs/core/bug-913544.t
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+#simulate a split-brain of a file and do truncate. This should not crash the mount point
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+TEST $CLI volume set $V0 stat-prefetch off
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+cd $M0
+TEST touch a
+#simulate no-changelog data split-brain
+echo "abc" > $B0/${V0}1/a
+echo "abcd" > $B0/${V0}0/a
+TEST truncate -s 0 a
+TEST ls
+cd
+
+cleanup
diff --git a/tests/bugs/core/bug-924075.t b/tests/bugs/core/bug-924075.t
new file mode 100755
index 00000000000..61ce0f18286
--- /dev/null
+++ b/tests/bugs/core/bug-924075.t
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+#FIXME: there is another patch which moves the following function into
+#include.rc
+function process_leak_count ()
+{
+ local pid=$1;
+ return $(ls -lh /proc/$pid/fd | grep "(deleted)" | wc -l)
+}
+
+TEST glusterd;
+TEST $CLI volume create $V0 $H0:$B0/${V0}1;
+TEST $CLI volume start $V0;
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+mount_pid=$(get_mount_process_pid $V0);
+TEST process_leak_count $mount_pid;
+
+cleanup;
diff --git a/tests/bugs/core/bug-927616.t b/tests/bugs/core/bug-927616.t
new file mode 100755
index 00000000000..18257131ac7
--- /dev/null
+++ b/tests/bugs/core/bug-927616.t
@@ -0,0 +1,65 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2};
+TEST $CLI volume set $V0 performance.open-behind off;
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0
+
+## Mount FUSE with caching disabled
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0;
+
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+TEST mount_nfs $H0:/$V0 $N0 nolock;
+
+TEST mkdir $M0/dir;
+
+mkdir $M0/other;
+cp /etc/passwd $M0/;
+cp $M0/passwd $M0/file;
+chmod 600 $M0/file;
+
+chown -R nfsnobody:nfsnobody $M0/dir;
+
+TEST $CLI volume set $V0 server.root-squash on;
+
+sleep 1;
+
+# tests should fail.
+touch $M0/foo 2>/dev/null;
+TEST [ $? -ne 0 ]
+touch $N0/foo 2>/dev/null;
+TEST [ $? -ne 0 ]
+mkdir $M0/new 2>/dev/null;
+TEST [ $? -ne 0 ]
+mkdir $N0/new 2>/dev/null;
+TEST [ $? -ne 0 ]
+
+TEST $CLI volume set $V0 server.root-squash off;
+
+sleep 1;
+
+# tests should pass.
+touch $M0/foo 2>/dev/null;
+TEST [ $? -eq 0 ]
+touch $N0/bar 2>/dev/null;
+TEST [ $? -eq 0 ]
+mkdir $M0/new 2>/dev/null;
+TEST [ $? -eq 0 ]
+mkdir $N0/old 2>/dev/null;
+TEST [ $? -eq 0 ]
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/bugs/core/bug-949242.t b/tests/bugs/core/bug-949242.t
new file mode 100644
index 00000000000..5e916cbdbe6
--- /dev/null
+++ b/tests/bugs/core/bug-949242.t
@@ -0,0 +1,55 @@
+#!/bin/bash
+#
+# Bug 949242 - Test basic fallocate functionality.
+#
+# Run several commands to verify basic fallocate functionality. We verify that
+# fallocate creates and allocates blocks to a file. We also verify that the keep
+# size option does not modify the file size.
+###
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../fallocate.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4}
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+
+# check for fallocate support before continuing the test
+require_fallocate -l 1m -n $M0/file && rm -f $M0/file
+
+# fallocate a file and verify blocks are allocated
+TEST fallocate -l 1m $M0/file
+blksz=`stat -c %b $M0/file`
+nblks=`stat -c %B $M0/file`
+TEST [ $(($blksz * $nblks)) -eq 1048576 ]
+
+TEST unlink $M0/file
+
+# truncate a file to a fixed size, fallocate and verify that the size does not
+# change
+TEST truncate -s 1M $M0/file
+TEST fallocate -l 2m -n $M0/file
+blksz=`stat -c %b $M0/file`
+nblks=`stat -c %B $M0/file`
+sz=`stat -c %s $M0/file`
+TEST [ $sz -eq 1048576 ]
+# Note that gluster currently incorporates a hack to limit the number of blocks
+# reported as allocated to the file by the file size. We have allocated beyond the
+# file size here. Just check for non-zero allocation to avoid setting a land mine
+# for if/when that behavior might change.
+TEST [ ! $(($blksz * $nblks)) -eq 0 ]
+
+TEST unlink $M0/file
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/bugs/core/bug-986429.t b/tests/bugs/core/bug-986429.t
new file mode 100644
index 00000000000..e512301775f
--- /dev/null
+++ b/tests/bugs/core/bug-986429.t
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+## This tests failover achieved by providing multiple
+## servers from the trusted pool for fetching volume
+## specification
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0
+TEST $CLI volume start $V0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s non-existent -s $H0 --volfile-id=/$V0 $M0
+
+cleanup;
diff --git a/tests/bugs/core/io-stats-1322825.t b/tests/bugs/core/io-stats-1322825.t
new file mode 100755
index 00000000000..53f2d040daa
--- /dev/null
+++ b/tests/bugs/core/io-stats-1322825.t
@@ -0,0 +1,67 @@
+#!/bin/bash
+
+# Test details:
+# This is to test that the io-stat-dump xattr is not set on the brick,
+# against the path that is used to trigger the stats dump.
+# Additionally it also tests if as many io-stat dumps are generated as there
+# are io-stat xlators in the graphs, which is 2 by default
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+# Covering replication and distribution in the test
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1..4}
+TEST $CLI volume start $V0
+TEST $GFS -s $H0 --volfile-id $V0 $M0
+
+# Generate some activity for the stats to produce something useful
+TEST $CLI volume profile $V0 start
+TEST mkdir $M0/dir1
+
+# Generate the stat dump across the io-stat instances
+TEST setfattr -n trusted.io-stats-dump -v io-stats-1322825 $M0
+
+# Check if $M0 is clean w.r.t xattr information
+# TODO: if there are better ways to check we really get no attr error, please
+# correct the following.
+getfattr -n trusted.io-stats-dump $B0/${V0}1 2>&1 | grep -qi "no such attribute"
+ret=$(echo $?)
+EXPECT 0 echo $ret
+getfattr -n trusted.io-stats-dump $B0/${V0}2 2>&1 | grep -qi "no such attribute"
+ret=$(echo $?)
+EXPECT 0 echo $ret
+getfattr -n trusted.io-stats-dump $B0/${V0}3 2>&1 | grep -qi "no such attribute"
+ret=$(echo $?)
+EXPECT 0 echo $ret
+getfattr -n trusted.io-stats-dump $B0/${V0}4 2>&1 | grep -qi "no such attribute"
+ret=$(echo $?)
+EXPECT 0 echo $ret
+
+# Check if we have 5 io-stat files in /tmp
+EXPECT 5 ls -1 /var/run/gluster/io-stats-1322825*
+# Cleanup the 5 generated files
+rm -f /var/run/gluster/io-stats-1322825*
+
+# Rinse and repeat above for a directory
+TEST setfattr -n trusted.io-stats-dump -v io-stats-1322825 $M0/dir1
+getfattr -n trusted.io-stats-dump $B0/${V0}1/dir1 2>&1 | grep -qi "no such attribute"
+ret=$(echo $?)
+EXPECT 0 echo $ret
+getfattr -n trusted.io-stats-dump $B0/${V0}2/dir1 2>&1 | grep -qi "no such attribute"
+ret=$(echo $?)
+EXPECT 0 echo $ret
+getfattr -n trusted.io-stats-dump $B0/${V0}3/dir1 2>&1 | grep -qi "no such attribute"
+ret=$(echo $?)
+EXPECT 0 echo $ret
+getfattr -n trusted.io-stats-dump $B0/${V0}4/dir1 2>&1 | grep -qi "no such attribute"
+ret=$(echo $?)
+EXPECT 0 echo $ret
+
+EXPECT 5 ls -1 /var/run/gluster/io-stats-1322825*
+rm -f /var/run/gluster/io-stats-1322825*
+
+cleanup;
diff --git a/tests/bugs/core/log-bug-1362520.t b/tests/bugs/core/log-bug-1362520.t
new file mode 100755
index 00000000000..cde854c3349
--- /dev/null
+++ b/tests/bugs/core/log-bug-1362520.t
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+#. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+#Basic checks
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+#Create a distributed volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1};
+TEST $CLI volume start $V0
+
+# Mount FUSE without selinux:
+TEST glusterfs -s $H0 --volfile-id $V0 $@ $M0
+
+#Get the client log file
+log_wd=$(gluster --print-logdir)
+log_id=${M0:1} # Remove initial slash
+log_id=${log_id//\//-} # Replace remaining slashes with dashes
+log_file=$log_wd/$log_id.log
+
+#Set the client xlator log-level to TRACE and check if the TRACE logs get
+#printed
+TEST setfattr -n trusted.glusterfs.$V0-client-0.set-log-level -v TRACE $M0
+TEST ! stat $M0/xyz
+grep -q " T \[rpc-clnt\.c" $log_file
+res=$?
+EXPECT '0' echo $res
+
+#Set the client xlator log-level to INFO and make sure the TRACE logs do
+#not get printed
+echo > $log_file
+TEST setfattr -n trusted.glusterfs.$V0-client-0.set-log-level -v INFO $M0
+TEST ! stat $M0/xyz
+grep -q " T \[rpc-clnt\.c" $log_file
+res=$?
+EXPECT_NOT '0' echo $res
+
+cleanup;
diff --git a/tests/bugs/ctime/issue-832.t b/tests/bugs/ctime/issue-832.t
new file mode 100755
index 00000000000..740f731ab73
--- /dev/null
+++ b/tests/bugs/ctime/issue-832.t
@@ -0,0 +1,32 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../traps.rc
+
+#Trigger trusted.glusterfs.mdata setting codepath and see things work as expected
+cleanup
+
+TEST_USER=test-ctime-user
+TEST_UID=27341
+
+TEST useradd -o -M -u ${TEST_UID} ${TEST_USER}
+push_trapfunc "userdel --force ${TEST_USER}"
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0
+TEST $CLI volume start $V0
+
+$GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+echo abc > $M0/test
+TEST chmod 755 $M0/
+TEST chmod 744 $M0/test
+TEST setfattr -x trusted.glusterfs.mdata $B0/$V0/test
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+$GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+su ${TEST_USER} -c "cat $M0/test"
+TEST getfattr -n trusted.glusterfs.mdata $B0/$V0/test
+
+cleanup
diff --git a/tests/bugs/distribute/bug-1042725.t b/tests/bugs/distribute/bug-1042725.t
new file mode 100644
index 00000000000..5497eb8bc00
--- /dev/null
+++ b/tests/bugs/distribute/bug-1042725.t
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+#Basic checks
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+#Create a distributed volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1..2};
+TEST $CLI volume start $V0
+
+# Mount FUSE
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+
+#Create files
+TEST mkdir $M0/foo
+TEST touch $M0/foo/{1..20}
+for file in {1..20}; do
+ ln $M0/foo/$file $M0/foo/${file}_linkfile;
+done
+
+#Stop one of the brick
+TEST kill_brick ${V0} ${H0} ${B0}/${V0}1
+
+rm -rf $M0/foo 2>/dev/null
+TEST stat $M0/foo
+
+touch $M0/foo/{1..20} 2>/dev/null
+touch $M0/foo/{1..20}_linkfile 2>/dev/null
+
+TEST $CLI volume start $V0 force;
+sleep 5
+function verify_duplicate {
+ count=`ls $M0/foo | sort | uniq --repeated | grep [0-9] -c`
+ echo $count
+}
+EXPECT 0 verify_duplicate
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/bugs/distribute/bug-1066798.t b/tests/bugs/distribute/bug-1066798.t
new file mode 100755
index 00000000000..03de970a637
--- /dev/null
+++ b/tests/bugs/distribute/bug-1066798.t
@@ -0,0 +1,88 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TESTS_EXPECTED_IN_LOOP=200
+
+## Start glusterd
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+## Lets create volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+
+## Verify volume is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+TEST glusterfs -s $H0 --volfile-id=$V0 $M0
+
+############################################################
+#TEST_PLAN#
+#Create a file
+#Store the hashed brick information
+#Create hard links to it
+#Remove the hashed brick
+#Check now all the hardlinks are migrated in to "OTHERBRICK"
+#Check also in mount point for all the files
+#check there is no failures and skips for migration
+############################################################
+
+TEST touch $M0/file1;
+
+file_perm=`ls -l $M0/file1 | grep file1 | awk '{print $1}'`;
+
+if [ -f $B0/${V0}1/file1 ]
+then
+ HASHED=$B0/${V0}1
+ OTHER=$B0/${V0}2
+else
+ HASHED=$B0/${V0}2
+ OTHER=$B0/${V0}1
+fi
+
+#create hundred hard links
+for i in {1..50};
+do
+TEST_IN_LOOP ln $M0/file1 $M0/link$i;
+done
+
+
+TEST $CLI volume remove-brick $V0 $H0:${HASHED} start
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" remove_brick_status_completed_field "$V0" "$H0:${HASHED}";
+
+#check consistency in mount point
+#And also check all the links are migrated to OTHER
+for i in {1..50}
+do
+TEST_IN_LOOP [ -f ${OTHER}/link${i} ];
+TEST_IN_LOOP [ -f ${M0}/link${i} ];
+done;
+
+#check in OTHER that all the files has proper permission (Means no
+#linkto files)
+
+for i in {1..50}
+do
+link_perm=`ls -l $OTHER | grep -w link${i} | awk '{print $1}'`;
+TEST_IN_LOOP [ "${file_perm}" == "${link_perm}" ]
+
+done
+
+#check that remove-brick status should not have any failed or skipped files
+
+var=`$CLI volume remove-brick $V0 $H0:${HASHED} status | grep completed`
+
+TEST [ `echo $var | awk '{print $5}'` = "0" ]
+TEST [ `echo $var | awk '{print $6}'` = "0" ]
+
+cleanup
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=000000
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
diff --git a/tests/bugs/distribute/bug-1086228.t b/tests/bugs/distribute/bug-1086228.t
new file mode 100755
index 00000000000..e14ea572b61
--- /dev/null
+++ b/tests/bugs/distribute/bug-1086228.t
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fileio.rc
+. $(dirname $0)/../../dht.rc
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2}
+TEST $CLI volume start $V0;
+TEST glusterfs --direct-io-mode=yes --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0;
+
+echo "D" > $M0/file1;
+TEST chmod +st $M0/file1;
+
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}"3"
+TEST $CLI volume rebalance $V0 start force
+
+EXPECT_WITHIN "10" "0" rebalance_completed
+count=0
+for i in `ls $B0/$V0"3"`;
+ do
+ var=`stat -c %A $B0/$V0"3"/$i | cut -c 4`;
+ echo $B0/$V0"3"/$i $var
+ if [ "$var" != "S" ]; then
+ count=$((count + 1))
+ fi
+ done
+
+TEST [[ $count == 0 ]]
+cleanup
diff --git a/tests/bugs/distribute/bug-1088231.t b/tests/bugs/distribute/bug-1088231.t
new file mode 100755
index 00000000000..8d4d1db73a5
--- /dev/null
+++ b/tests/bugs/distribute/bug-1088231.t
@@ -0,0 +1,173 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fileio.rc
+. $(dirname $0)/../../dht.rc
+
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 cluster.randomize-hash-range-by-gfid on
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --aux-gfid-mount --volfile-server=$H0 $M0
+TEST mkdir $M0/a
+
+
+## Bug Description: In case of dht_discover code path, which is triggered
+## when lookup done is nameless lookup, at the end of the lookup, even if
+## it finds that self-heal is needed to fix-the layout it wont heal because
+## healing code path is not added under nameless lookup.
+
+## What to test: With Patch, Even in case of nameless lookup, if layout
+## needs to be fixed, the it will be fixed wherever lookup is successful
+## and it will not create any directory for subvols having ENOENT as it is
+## nameless lookup.
+
+gfid_with_hyphen=`getfattr -n glusterfs.gfid.string $M0/a 2>/dev/null \
+ | grep glusterfs.gfid.string | cut -d '"' -f 2`
+
+TEST setfattr -x trusted.glusterfs.dht $B0/$V0"0"/a
+
+## new healing code don't attempt healing if inode is already
+## populated. So, unmount and remount before we do stat.
+TEST umount $M0
+TEST glusterfs --volfile-id=/$V0 --aux-gfid-mount --volfile-server=$H0 $M0
+
+TEST stat $M0/.gfid/$gfid_with_hyphen
+
+## Assuming that we have two bricks, we can have two permutations of layout
+## Case 1: Brick - A Brick - B
+## 0 - 50 51-100
+##
+## Case 2: Brick - A Brick - B
+## 51 - 100 0 - 50
+##
+## To ensure layout is assigned properly, the following tests should be
+## performed.
+##
+## Case 1: Layout_b0_s = 0; Layout_b0_e = 50, Layout_b1_s=51,
+## Layout_b1_e = 100;
+##
+## layout_b1_s = layout_b0_e + 1;
+## layout_b0_s = layout_b1_e + 1; but b0_s is 0, so change to 101
+## then compare
+## Case 2: Layout_b0_s = 51, Layout_b0_e = 100, Layout_b1_s=0,
+## Layout_b1_e = 51
+##
+## layout_b0_s = Layout_b1_e + 1;
+## layout_b1_s = Layout_b0_e + 1; but b1_s is 0, so chage to 101.
+
+
+##Extract Layout
+echo `get_layout $B0/$V0"0"/a`
+echo `get_layout $B0/$V0"1"/a`
+layout_b0_s=`get_layout $B0/$V0"0"/a | cut -c19-26`
+layout_b0_e=`get_layout $B0/$V0"0"/a | cut -c27-34`
+layout_b1_s=`get_layout $B0/$V0"1"/a | cut -c19-26`
+layout_b1_e=`get_layout $B0/$V0"1"/a | cut -c27-34`
+
+
+##Add 0X to perform Hex arithematic
+layout_b0_s="0x"$layout_b0_s
+layout_b0_e="0x"$layout_b0_e
+layout_b1_s="0x"$layout_b1_s
+layout_b1_e="0x"$layout_b1_e
+
+
+## Logic of converting starting layout "0" to "Max_value of layout + 1"
+comp1=$(($layout_b0_s + 0))
+if [ "$comp1" == "0" ];then
+ comp1=4294967296
+fi
+
+comp2=$(($layout_b1_s + 0))
+if [ "$comp2" == "0" ];then
+ comp2=4294967296
+fi
+
+diff1=$(($layout_b0_e + 1))
+diff2=$(($layout_b1_e + 1))
+
+
+healed=0
+
+if [ "$comp1" == "$diff1" ] && [ "$comp2" == "$diff2" ]; then
+ healed=$(($healed + 1))
+fi
+
+if [ "$comp1" == "$diff2" ] && [ "$comp2" == "$diff1" ]; then
+ healed=$(($healed + 1))
+fi
+
+TEST [ $healed == 1 ]
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 cluster.randomize-hash-range-by-gfid on
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --aux-gfid-mount --volfile-server=$H0 $M0
+TEST mkdir $M0/a
+
+gfid_with_hyphen=`getfattr -n glusterfs.gfid.string $M0/a 2>/dev/null \
+ | grep glusterfs.gfid.string | cut -d '"' -f 2`
+
+TEST setfattr -x trusted.glusterfs.dht $B0/$V0"0"/a
+TEST setfattr -x trusted.glusterfs.dht $B0/$V0"1"/a
+
+## new healing code don't attempt healing if inode is already
+## populated. So, unmount and remount before we do stat.
+TEST umount $M0
+TEST glusterfs --volfile-id=/$V0 --aux-gfid-mount --volfile-server=$H0 $M0
+
+TEST stat $M0/.gfid/$gfid_with_hyphen
+
+##Extract Layout
+
+layout_b0_s=`get_layout $B0/$V0"0"/a | cut -c19-26`
+layout_b0_e=`get_layout $B0/$V0"0"/a | cut -c27-34`
+layout_b1_s=`get_layout $B0/$V0"1"/a | cut -c19-26`
+layout_b1_e=`get_layout $B0/$V0"1"/a | cut -c27-34`
+
+
+##Add 0X to perform Hex arithematic
+layout_b0_s="0x"$layout_b0_s
+layout_b0_e="0x"$layout_b0_e
+layout_b1_s="0x"$layout_b1_s
+layout_b1_e="0x"$layout_b1_e
+
+
+
+## Logic of converting starting layout "0" to "Max_value of layout + 1"
+comp1=$(($layout_b0_s + 0))
+if [ "$comp1" == "0" ];then
+ comp1=4294967296
+fi
+
+comp2=$(($layout_b1_s + 0))
+if [ "$comp2" == "0" ];then
+ comp2=4294967296
+fi
+
+diff1=$(($layout_b0_e + 1))
+diff2=$(($layout_b1_e + 1))
+
+
+healed=0
+
+if [ "$comp1" == "$diff1" ] && [ "$comp2" == "$diff2" ]; then
+ healed=$(($healed + 1))
+fi
+
+if [ "$comp1" == "$diff2" ] && [ "$comp2" == "$diff1" ]; then
+ healed=$(($healed + 1))
+fi
+
+TEST [ $healed == 1 ]
+cleanup
+
diff --git a/tests/bugs/distribute/bug-1099890.t b/tests/bugs/distribute/bug-1099890.t
new file mode 100644
index 00000000000..1a19ba880c0
--- /dev/null
+++ b/tests/bugs/distribute/bug-1099890.t
@@ -0,0 +1,130 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../dht.rc
+
+## TO-DO: Fix the following once the dht du refresh interval issue is fixed:
+## 1. Do away with sleep(1).
+## 2. Do away with creation of empty files.
+
+cleanup;
+
+QDD=$(dirname $0)/quota
+# compile the test write program and run it
+build_tester $(dirname $0)/../../basic/quota.c -o $QDD
+
+TEST glusterd;
+TEST pidof glusterd;
+
+# Create 2 loop devices, one per brick.
+TEST truncate -s 100M $B0/brick1
+TEST truncate -s 100M $B0/brick2
+
+TEST L1=`SETUP_LOOP $B0/brick1`
+TEST MKFS_LOOP $L1
+
+TEST L2=`SETUP_LOOP $B0/brick2`
+TEST MKFS_LOOP $L2
+
+TEST mkdir -p $B0/${V0}{1,2}
+
+TEST MOUNT_LOOP $L1 $B0/${V0}1
+TEST MOUNT_LOOP $L2 $B0/${V0}2
+
+# Create a plain distribute volume with 2 subvols.
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+
+TEST $CLI volume start $V0;
+EXPECT "Started" volinfo_field $V0 'Status';
+
+TEST $CLI volume quota $V0 enable;
+
+TEST $CLI volume set $V0 features.quota-deem-statfs on
+
+TEST $CLI volume quota $V0 limit-usage / 150MB;
+
+TEST $CLI volume set $V0 cluster.min-free-disk 50%
+
+TEST glusterfs -s $H0 --volfile-id=$V0 $M0
+
+# Make sure quota-deem-statfs is working as expected
+EXPECT "150M" echo `df -h $M0 -P | tail -1 | awk {'print $2'}`
+
+# Create a new file 'foo' under the root of the volume, which hashes to subvol-0
+# of DHT, that consumes 40M
+TEST $QDD $M0/foo 256 160
+
+TEST stat $B0/${V0}1/foo
+TEST ! stat $B0/${V0}2/foo
+
+# Create a new file 'bar' under the root of the volume, which hashes to subvol-1
+# of DHT, that consumes 40M
+TEST $QDD $M0/bar 256 160
+
+TEST ! stat $B0/${V0}1/bar
+TEST stat $B0/${V0}2/bar
+
+# Touch a zero-byte file on the root of the volume to make sure the statfs data
+# on DHT is refreshed
+sleep 1;
+TEST touch $M0/empty1;
+
+# At this point, the available space on each subvol {60M,60M} is greater than
+# their min-free-disk {50M,50M}, but if this bug still exists, then
+# the total available space on the volume as perceived by DHT should be less
+# than min-free-disk, i.e.,
+#
+# consumed space returned per subvol by quota = (40M + 40M) = 80M
+#
+# Therefore, consumed space per subvol computed by DHT WITHOUT the fix would be:
+# (80M/150M)*100 = 53%
+#
+# Available space per subvol as perceived by DHT with the bug = 47%
+# which is less than min-free-disk
+
+# Now I create a file that hashes to subvol-1 (counting from 0) of DHT.
+# If this bug still exists,then DHT should be routing this creation to subvol-0.
+# If this bug is fixed, then DHT should be routing the creation to subvol-1 only
+# as it has more than min-free-disk space available.
+
+TEST $QDD $M0/file 1 1
+sleep 1;
+TEST ! stat $B0/${V0}1/file
+TEST stat $B0/${V0}2/file
+
+# Touch another zero-byte file on the root of the volume to refresh statfs
+# values stored by DHT.
+
+TEST touch $M0/empty2;
+
+# Now I create a new file that hashes to subvol-0, at the end of which, there
+# will be less than min-free-disk space available on it.
+TEST $QDD $M0/fil 256 80
+sleep 1;
+TEST stat $B0/${V0}1/fil
+TEST ! stat $B0/${V0}2/fil
+
+# Touch to refresh statfs info cached by DHT
+
+TEST touch $M0/empty3;
+
+# Now I create a file that hashes to subvol-0 but since it has less than
+# min-free-disk space available, its data will be cached on subvol-1.
+
+TEST $QDD $M0/zz 256 20
+
+TEST stat $B0/${V0}1/zz
+TEST stat $B0/${V0}2/zz
+
+EXPECT "$V0-client-1" dht_get_linkto_target "$B0/${V0}1/zz"
+
+EXPECT "1" is_dht_linkfile "$B0/${V0}1/zz"
+
+force_umount $M0
+TEST $CLI volume stop $V0
+UMOUNT_LOOP ${B0}/${V0}{1,2}
+rm -f ${B0}/brick{1,2}
+
+rm -f $QDD
+cleanup
diff --git a/tests/bugs/distribute/bug-1125824.t b/tests/bugs/distribute/bug-1125824.t
new file mode 100755
index 00000000000..7e401092273
--- /dev/null
+++ b/tests/bugs/distribute/bug-1125824.t
@@ -0,0 +1,103 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+create_files () {
+ for i in {1..10}; do
+ orig=$(printf %s/file%04d $1 $i)
+ echo "This is file $i" > $orig
+ done
+ for i in {1..10}; do
+ mkdir $(printf %s/dir%04d $1 $i)
+ done
+ sync
+}
+
+create_dirs () {
+ for i in {1..10}; do
+ mkdir $(printf %s/dir%04d $1 $i)
+ create_files $(printf %s/dir%04d $1 $i)
+ done
+ sync
+}
+
+stat_files () {
+ for i in {1..10}; do
+ orig=$(printf %s/file%04d $1 $i)
+ stat $orig
+ done
+ for i in {1..10}; do
+ stat $(printf %s/dir%04d $1 $i)
+ done
+ sync
+}
+
+stat_dirs () {
+ for i in {1..10}; do
+ stat $(printf %s/dir%04d $1 $i)
+ stat_files $(printf %s/dir%04d $1 $i)
+ done
+ sync
+}
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4};
+
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT '4' brick_count $V0
+
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+TEST mount_nfs $H0:/$V0 $N0
+
+# Create and poulate the NFS inode tables
+TEST create_dirs $N0
+TEST stat_dirs $N0
+
+# add-bricks changing the state of the volume where some bricks
+# would have some directories and others would not
+TEST $CLI volume add-brick $V0 replica 2 $H0:$B0/${V0}{5,6,7,8}
+
+# Post this dht_access was creating a mess for directories which is fixed
+# with this commit. The issues could range from getting ENOENT or
+# ESTALE or entries missing to directories not having complete
+# layouts.
+TEST cd $N0
+TEST ls -lR
+
+TEST $CLI volume rebalance $V0 start force
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $V0
+
+# tests to check post rebalance if layouts and entires are fine and
+# accessible by NFS to clear the volume
+TEST ls -lR
+rm -rf ./*
+# There are additional bugs where NFS+DHT does not delete all entries
+# on an rm -rf, so we do an additional rm -rf to ensure all is done
+# and we are facing this transient issue, rather than a bad directory
+# layout that is cached in memory
+TEST rm -rf ./*
+
+# Get out of the mount, so that umount can work
+TEST cd /
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/bugs/distribute/bug-1161156.t b/tests/bugs/distribute/bug-1161156.t
new file mode 100755
index 00000000000..2b9e15407ca
--- /dev/null
+++ b/tests/bugs/distribute/bug-1161156.t
@@ -0,0 +1,58 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+QDD=$(dirname $0)/quota
+# compile the test write program and run it
+build_tester $(dirname $0)/../../basic/quota.c -o $QDD
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4,5,6};
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+# Testing with NFS for no particular reason
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+TEST mount_nfs $H0:/$V0 $N0
+mydir="dir"
+TEST mkdir -p $N0/$mydir
+TEST mkdir -p $N0/newdir
+
+TEST $QDD $N0/$mydir/file 256 40
+
+TEST $CLI volume quota $V0 enable
+TEST $CLI volume quota $V0 limit-usage / 20MB
+TEST $CLI volume quota $V0 limit-usage /newdir 5MB
+TEST $CLI volume quota $V0 soft-timeout 0
+TEST $CLI volume quota $V0 hard-timeout 0
+
+TEST $QDD $N0/$mydir/newfile_1 256 20
+# wait for write behind to complete.
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "15.0MB" quotausage "/"
+TEST ! $QDD $N0/$mydir/newfile_2 256 40
+
+# Test rename within a directory. It should pass even when the
+# corresponding directory quota is filled.
+TEST mv $N0/dir/file $N0/dir/newfile_3
+
+# rename should fail here with disk quota exceeded
+TEST ! mv $N0/dir/newfile_3 $N0/newdir/
+
+umount_nfs $N0
+TEST $CLI volume stop $V0
+
+rm -f $QDD
+
+cleanup;
diff --git a/tests/bugs/distribute/bug-1161311.t b/tests/bugs/distribute/bug-1161311.t
new file mode 100755
index 00000000000..62796068928
--- /dev/null
+++ b/tests/bugs/distribute/bug-1161311.t
@@ -0,0 +1,164 @@
+#!/bin/bash
+
+SCRIPT_TIMEOUT=350
+
+# This tests for hard link preservation for files that are linked, when the
+# file is undergoing migration
+
+# --- Improvements and other tests ---
+## Fail rebalance of the large file for which links are created during P1/2
+### phases of migration
+## Start with multiple hard links to the file and then create more during P1/2
+### phases of migration
+## Test the same with NFS as the mount rather than FUSE
+## Create links when file is under P2 of migration specifically
+## Test with quota, to error out during hard link creation (if possible)
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+TEST truncate -s 10GB $B0/brick1
+TEST truncate -s 10GB $B0/brick2
+TEST truncate -s 10GB $B0/brick3
+
+TEST LO1=`SETUP_LOOP $B0/brick1`
+TEST MKFS_LOOP $LO1
+
+TEST LO2=`SETUP_LOOP $B0/brick2`
+TEST MKFS_LOOP $LO2
+
+TEST LO3=`SETUP_LOOP $B0/brick3`
+TEST MKFS_LOOP $LO3
+
+TEST mkdir -p $B0/${V0}1 $B0/${V0}2 $B0/${V0}3
+
+
+TEST MOUNT_LOOP $LO1 $B0/${V0}1
+TEST MOUNT_LOOP $LO2 $B0/${V0}2
+TEST MOUNT_LOOP $LO3 $B0/${V0}3
+
+checksticky () {
+ i=0;
+ while [ ! -k $1 ]; do
+ sleep 1
+ i=$((i+1));
+ # Try for 10 seconds to get the sticky bit state
+ # else fail the test, as we may never see it
+ if [[ $i == 10 ]]; then
+ return $i
+ fi
+ echo "Waiting... $i"
+ done
+ echo "Done... got out @ $i"
+ return 0
+}
+
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1..3};
+
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT '3' brick_count $V0
+
+TEST $CLI volume set $V0 parallel-readdir on
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Mount FUSE with caching disabled (read-write)
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+
+# Create a directories to hold the links
+TEST mkdir $M0/dir1
+TEST mkdir -p $M0/dir2/dir3
+
+# Create a large file (8 GB), so that rebalance takes time
+# Since we really don't care about the contents of the file, we use fallocate
+# to generate the file much faster. We could also use truncate, which is even
+# faster, but rebalance could take advantage of an sparse file and migrate it
+# in an optimized way, but we don't want a fast migration.
+TEST fallocate -l 8G $M0/dir1/FILE2
+
+# Rename the file to create a linkto, for rebalance to
+# act on the file
+## FILE1 and FILE2 hashes are, 678b1c4a e22c1ada, so they fall
+## into separate bricks when brick count is 3
+TEST mv $M0/dir1/FILE2 $M0/dir1/FILE1
+
+brick_loc=$(get_backend_paths $M0/dir1/FILE1)
+
+# unmount and remount the volume
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+
+# Start the rebalance
+TEST $CLI volume rebalance $V0 start force
+
+# Wait for FILE to get the sticky bit on, so that file is under
+# active rebalance, before creating the links
+TEST checksticky $brick_loc
+
+# Create the links
+## FILE3 FILE5 FILE7 have hashes, c8c91469 566d26ce 22ce7eba
+## Which fall into separate bricks on a 3 brick layout
+cd $M0
+TEST ln ./dir1/FILE1 ./dir1/FILE7
+TEST ln ./dir1/FILE1 ./dir1/FILE5
+TEST ln ./dir1/FILE1 ./dir1/FILE3
+
+TEST ln ./dir1/FILE1 ./dir2/FILE7
+TEST ln ./dir1/FILE1 ./dir2/FILE5
+TEST ln ./dir1/FILE1 ./dir2/FILE3
+
+TEST ln ./dir1/FILE1 ./dir2/dir3/FILE7
+TEST ln ./dir1/FILE1 ./dir2/dir3/FILE5
+TEST ln ./dir1/FILE1 ./dir2/dir3/FILE3
+cd /
+
+# Ideally for this test to have done its job, the file should still be
+# under migration, so check the sticky bit again
+TEST checksticky $brick_loc
+
+# Wait for rebalance to complete
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $V0
+
+# Check if all files are clean and migrated right
+## stat on the original file should show linkcount of 10
+linkcountsrc=$(stat -c %h $M0/dir1/FILE1)
+TEST [[ $linkcountsrc == 10 ]]
+
+## inode and size of every file should be same as original file
+inodesrc=$(stat -c %i $M0/dir1/FILE1)
+TEST [[ $(stat -c %i $M0/dir1/FILE3) == $inodesrc ]]
+TEST [[ $(stat -c %i $M0/dir1/FILE5) == $inodesrc ]]
+TEST [[ $(stat -c %i $M0/dir1/FILE7) == $inodesrc ]]
+
+TEST [[ $(stat -c %i $M0/dir2/FILE3) == $inodesrc ]]
+TEST [[ $(stat -c %i $M0/dir2/FILE5) == $inodesrc ]]
+TEST [[ $(stat -c %i $M0/dir2/FILE7) == $inodesrc ]]
+
+TEST [[ $(stat -c %i $M0/dir2/dir3/FILE3) == $inodesrc ]]
+TEST [[ $(stat -c %i $M0/dir2/dir3/FILE5) == $inodesrc ]]
+TEST [[ $(stat -c %i $M0/dir2/dir3/FILE7) == $inodesrc ]]
+
+# Check, newer link creations
+cd $M0
+TEST ln ./dir1/FILE1 ./FILE1
+TEST ln ./dir2/FILE3 ./FILE3
+TEST ln ./dir2/dir3/FILE5 ./FILE5
+TEST ln ./dir1/FILE7 ./FILE7
+cd /
+linkcountsrc=$(stat -c %h $M0/dir1/FILE1)
+TEST [[ $linkcountsrc == 14 ]]
+
+
+# Stop the volume
+TEST $CLI volume stop $V0;
+
+UMOUNT_LOOP ${B0}/${V0}{1..3}
+rm -f ${B0}/brick{1..3}
+cleanup;
diff --git a/tests/bugs/distribute/bug-1190734.t b/tests/bugs/distribute/bug-1190734.t
new file mode 100644
index 00000000000..9256088f7a0
--- /dev/null
+++ b/tests/bugs/distribute/bug-1190734.t
@@ -0,0 +1,93 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+BRICK_COUNT=3
+FILE_COUNT=100
+FILE_COUNT_TIME=5
+
+function create_files {
+ rm -rf $2
+ mkdir $2
+ for i in `seq 1 $1`; do
+ touch $2/file_$i
+ done
+}
+
+function get_file_count {
+ ls $1/file_[0-9]* | wc -l
+}
+
+function reset {
+ $CLI volume stop $V0
+ ${UMOUNT_F} $1
+ $CLI volume delete $V0
+}
+
+function start_mount_fuse {
+ $CLI volume start $V0
+ [ $? -ne 0 ] && return 1
+
+ $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+ [ $? -ne 0 ] && return 1
+
+ create_files $FILE_COUNT $M0/$1
+ [ $? -ne 0 ] && return 1
+
+ return 0
+}
+
+function start_mount_nfs {
+ $CLI volume start $V0
+ [ $? -ne 0 ] && return 1
+
+ sleep 3
+ mount_nfs $H0:/$V0 $N0
+ [ $? -ne 0 ] && return 1
+
+ create_files $FILE_COUNT $N0/$1
+ [ $? -ne 0 ] && return 1
+
+ return 0
+}
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+
+# Test 1-2 Create repliacted volume
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}0 $H0:$B0/${V0}1 \
+ $H0:$B0/${V0}2 $H0:$B0/${V0}3 $H0:$B0/${V0}4 $H0:$B0/${V0}5
+TEST $CLI volume set $V0 nfs.disable false
+
+# ------- test 1: AFR, fuse + remove bricks
+
+TEST start_mount_fuse test1
+EXPECT_WITHIN $FILE_COUNT_TIME $FILE_COUNT get_file_count $M0/test1
+TEST $CLI volume remove-brick $V0 replica 2 $H0:$B0/${V0}{2,3} start
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" remove_brick_status_completed_field "$V0" "$H0:$B0/${V0}2 $H0:$B0/${V0}3"
+TEST $CLI volume remove-brick $V0 replica 2 $H0:$B0/${V0}{2,3} commit
+EXPECT_WITHIN $FILE_COUNT_TIME $FILE_COUNT get_file_count $M0/test1
+reset $M0
+
+# ------- test 2: AFR, nfs + remove bricks
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}0 $H0:$B0/${V0}1 \
+ $H0:$B0/${V0}2 $H0:$B0/${V0}3 $H0:$B0/${V0}4 $H0:$B0/${V0}5
+TEST $CLI volume set $V0 nfs.disable false
+
+TEST start_mount_nfs test2
+EXPECT_WITHIN $FILE_COUNT_TIME $FILE_COUNT get_file_count $N0/test2
+TEST $CLI volume remove-brick $V0 replica 2 $H0:$B0/${V0}2 $H0:$B0/${V0}3 start
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" remove_brick_status_completed_field "$V0" "$H0:$B0/${V0}2 $H0:$B0/${V0}3"
+TEST $CLI volume remove-brick $V0 replica 2 $H0:$B0/${V0}{2,3} commit
+EXPECT_WITHIN $FILE_COUNT_TIME $FILE_COUNT get_file_count $N0/test2
+reset $N0
+
+cleanup
diff --git a/tests/bugs/distribute/bug-1193636.c b/tests/bugs/distribute/bug-1193636.c
new file mode 100644
index 00000000000..ea3f79a4e06
--- /dev/null
+++ b/tests/bugs/distribute/bug-1193636.c
@@ -0,0 +1,68 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/xattr.h>
+#include <fcntl.h>
+#include <string.h>
+
+#define MY_XATTR_NAME "user.ftest"
+#define MY_XATTR_VAL "ftestval"
+
+void
+usage(void)
+{
+ printf("Usage : bug-1193636 <filename> <xattr_name> <op>\n");
+ printf(" op : 0 - set, 1 - remove\n");
+}
+
+int
+main(int argc, char **argv)
+{
+ int fd;
+ int err = 0;
+ char *xattr_name = NULL;
+ int op = 0;
+
+ if (argc != 4) {
+ usage();
+ exit(1);
+ }
+
+ op = atoi(argv[3]);
+
+ if ((op != 0) && (op != 1)) {
+ printf("Invalid operation specified.\n");
+ usage();
+ exit(1);
+ }
+
+ xattr_name = argv[2];
+
+ fd = open(argv[1], O_RDWR);
+ if (fd == -1) {
+ printf("Failed to open file %s\n", argv[1]);
+ exit(1);
+ }
+
+ if (!op) {
+ err = fsetxattr(fd, xattr_name, MY_XATTR_VAL, strlen(MY_XATTR_VAL) + 1,
+ XATTR_CREATE);
+
+ if (err) {
+ printf("Failed to set xattr %s: %m\n", xattr_name);
+ exit(1);
+ }
+
+ } else {
+ err = fremovexattr(fd, xattr_name);
+
+ if (err) {
+ printf("Failed to remove xattr %s: %m\n", xattr_name);
+ exit(1);
+ }
+ }
+
+ close(fd);
+
+ return 0;
+}
diff --git a/tests/bugs/distribute/bug-1193636.t b/tests/bugs/distribute/bug-1193636.t
new file mode 100644
index 00000000000..b377910336e
--- /dev/null
+++ b/tests/bugs/distribute/bug-1193636.t
@@ -0,0 +1,74 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+
+checksticky () {
+ i=0;
+ while [ ! -k $1 ]; do
+ sleep 1
+ i=$((i+1));
+ if [[ $i == 10 ]]; then
+ return $i
+ fi
+ echo "Waiting... $i"
+ done
+ echo "done ...got out @ $i"
+ return 0
+}
+
+cleanup;
+
+#Basic checks
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+#Create a distributed volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1..3};
+TEST $CLI volume start $V0
+
+# Mount FUSE
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+
+TEST mkdir $M0/dir1
+
+# Create a large file (1GB), so that rebalance takes time
+dd if=/dev/zero of=$M0/dir1/FILE2 bs=64k count=10240
+
+# Rename the file to create a linkto, for rebalance to
+# act on the file
+TEST mv $M0/dir1/FILE2 $M0/dir1/FILE1
+
+brick_loc=$(get_backend_paths $M0/dir1/FILE1)
+
+build_tester $(dirname $0)/bug-1193636.c
+
+TEST $CLI volume rebalance $V0 start force
+
+TEST checksticky $brick_loc
+
+TEST setfattr -n "user.test1" -v "test1" $M0/dir1/FILE1
+TEST setfattr -n "user.test2" -v "test1" $M0/dir1/FILE1
+TEST setfattr -n "user.test3" -v "test1" $M0/dir1/FILE1
+
+TEST $(dirname $0)/bug-1193636 $M0/dir1/FILE1 user.fsetx 0
+TEST $(dirname $0)/bug-1193636 $M0/dir1/FILE1 user.fremx 0
+
+TEST getfattr -n "user.fremx" $M0/dir1/FILE1
+TEST setfattr -x "user.test2" $M0/dir1/FILE1
+
+
+TEST $(dirname $0)/bug-1193636 $M0/dir1/FILE1 user.fremx 1
+
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $V0
+
+TEST getfattr -n "user.fsetx" $M0/dir1/FILE1
+TEST getfattr -n "user.test1" $M0/dir1/FILE1
+TEST ! getfattr -n "user.test2" $M0/dir1/FILE1
+TEST ! getfattr -n "user.fremx" $M0/dir1/FILE1
+TEST getfattr -n "user.test3" $M0/dir1/FILE1
+
+
+cleanup;
diff --git a/tests/bugs/distribute/bug-1204140.t b/tests/bugs/distribute/bug-1204140.t
new file mode 100755
index 00000000000..050c069ea30
--- /dev/null
+++ b/tests/bugs/distribute/bug-1204140.t
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../dht.rc
+
+cleanup;
+
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1 $H0:$B0/${V0}2
+TEST $CLI volume start $V0
+
+## Mount FUSE
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+TEST touch $M0/new.txt;
+TEST getfattr -n "glusterfs.get_real_filename:NEW.txt" $M0;
+TEST ! getfattr -n "glusterfs.get_realfilename:NEXT.txt" $M0;
+
+
+cleanup;
diff --git a/tests/bugs/distribute/bug-1247563.t b/tests/bugs/distribute/bug-1247563.t
new file mode 100644
index 00000000000..a2fc722896f
--- /dev/null
+++ b/tests/bugs/distribute/bug-1247563.t
@@ -0,0 +1,62 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+is_sticky_set () {
+ echo $1
+ if [ -k $1 ];
+ then
+ echo "yes"
+ else
+ echo "no"
+ fi
+}
+
+cleanup;
+
+#Basic checks
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+#Create a distributed volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1..3};
+TEST $CLI volume start $V0
+
+# Mount FUSE
+TEST glusterfs --acl -s $H0 --volfile-id $V0 $M0
+
+TEST mkdir $M0/dir1
+
+echo "Testing pacls on rebalance" > $M0/dir1/FILE1
+
+FPATH1=`find $B0/ -name FILE1`
+
+# Rename the file to create a linkto, for rebalance to
+# act on the file
+
+TEST mv $M0/dir1/FILE1 $M0/dir1/FILE2
+FPATH2=`find $B0/ -perm 1000 -name FILE2`
+
+setfacl -m user:root:rwx $M0/dir1/FILE2
+
+# Run rebalance without the force option to skip
+# the file migration
+TEST $CLI volume rebalance $V0 start
+
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $V0
+
+#Check that the file has been skipped,i.e., the linkto still exists
+EXPECT "yes" is_sticky_set $FPATH2
+
+
+#The linkto file should not have any posix acls set
+COUNT=`getfacl $FPATH2 |grep -c "user:root:rwx"`
+EXPECT "0" echo $COUNT
+
+cleanup;
+
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=000000
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
diff --git a/tests/bugs/distribute/bug-1368012.t b/tests/bugs/distribute/bug-1368012.t
new file mode 100644
index 00000000000..0b626353aab
--- /dev/null
+++ b/tests/bugs/distribute/bug-1368012.t
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+function get_permission {
+ stat -c "%A" $1
+}
+
+## Start glusterd
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+## Lets create volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+
+## Verify volume is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" online_brick_count
+TEST $CLI volume set $V0 performance.stat-prefetch off
+EXPECT 'Started' volinfo_field $V0 'Status';
+TEST glusterfs -s $H0 --volfile-id=$V0 $M0
+
+##Test case: Add-brick
+#------------------------------------------------------------
+#change permission of both root
+TEST chmod 444 $M0
+
+#store permission for comparision
+TEST permission_root=`stat -c "%A" $M0`
+TEST echo $permission_root
+#Add-brick
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}3
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "3" online_brick_count
+
+#Allow one lookup to happen
+TEST ls $M0
+#Generate another lookup
+echo 3 > /proc/sys/vm/drop_caches
+TEST ls $M0
+#check root permission
+EXPECT_WITHIN "5" $permission_root get_permission $M0
+#check permission on the new-brick
+EXPECT $permission_root get_permission $B0/${V0}3
+cleanup
diff --git a/tests/bugs/distribute/bug-1389697.t b/tests/bugs/distribute/bug-1389697.t
new file mode 100644
index 00000000000..0d428b8d9d2
--- /dev/null
+++ b/tests/bugs/distribute/bug-1389697.t
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../cluster.rc
+
+
+cleanup
+
+TEST launch_cluster 2;
+TEST $CLI_1 peer probe $H2;
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+TEST $CLI_1 volume create $V0 $H1:$B1/b1 $H1:$B1/b2 $H2:$B2/b3
+TEST $CLI_1 volume start $V0
+
+#Start a fix-layout
+TEST $CLI_1 volume rebalance $V0 fix-layout start
+
+#volume rebalance status should work
+TEST $CLI_1 volume rebalance $V0 status
+$CLI_1 volume rebalance $V0 status
+
+val=$($CLI_1 volume rebalance $V0 status |grep "fix-layout" 2>&1)
+val=$?
+TEST [ $val -eq 0 ];
+
+#Start a remove brick for the brick on H2
+TEST $CLI_1 volume remove-brick $V0 $H2:$B2/b3 start
+TEST $CLI_1 volume remove-brick $V0 $H2:$B2/b3 status
+
+#Check remove brick status from H1
+$CLI_1 volume remove-brick $V0 $H2:$B2/b3 status |grep "fix-layout" 2>&1
+val=$?
+TEST [ $val -eq 1 ];
+
+$CLI_1 volume remove-brick $V0 $H2:$B2/b3 status
+$CLI_2 volume remove-brick $V0 $H2:$B2/b3 status
+
+
+TEST $CLI_1 volume remove-brick $V0 $H2:$B2/b3 stop
+
+cleanup
diff --git a/tests/bugs/distribute/bug-1543279.t b/tests/bugs/distribute/bug-1543279.t
new file mode 100644
index 00000000000..47b8b4a4a95
--- /dev/null
+++ b/tests/bugs/distribute/bug-1543279.t
@@ -0,0 +1,67 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../dht.rc
+
+TESTS_EXPECTED_IN_LOOP=44
+SCRIPT_TIMEOUT=600
+
+rename_files() {
+ MOUNT=$1
+ ITERATIONS=$2
+ for i in $(seq 1 $ITERATIONS); do uuid="`uuidgen`"; echo "some data" > $MOUNT/test$uuid; mv $MOUNT/test$uuid $MOUNT/test -f || return $?; done
+}
+
+run_test_for_volume() {
+ VOLUME=$1
+ ITERATIONS=$2
+ TEST_IN_LOOP $CLI volume start $VOLUME
+
+ TEST_IN_LOOP glusterfs -s $H0 --volfile-id $VOLUME $M0
+ TEST_IN_LOOP glusterfs -s $H0 --volfile-id $VOLUME $M1
+ TEST_IN_LOOP glusterfs -s $H0 --volfile-id $VOLUME $M2
+ TEST_IN_LOOP glusterfs -s $H0 --volfile-id $VOLUME $M3
+
+ rename_files $M0 $ITERATIONS &
+ M0_RENAME_PID=$!
+
+ rename_files $M1 $ITERATIONS &
+ M1_RENAME_PID=$!
+
+ rename_files $M2 $ITERATIONS &
+ M2_RENAME_PID=$!
+
+ rename_files $M3 $ITERATIONS &
+ M3_RENAME_PID=$!
+
+ TEST_IN_LOOP wait $M0_RENAME_PID
+ TEST_IN_LOOP wait $M1_RENAME_PID
+ TEST_IN_LOOP wait $M2_RENAME_PID
+ TEST_IN_LOOP wait $M3_RENAME_PID
+
+ TEST_IN_LOOP $CLI volume stop $VOLUME
+ TEST_IN_LOOP $CLI volume delete $VOLUME
+ umount $M0 $M1 $M2 $M3
+}
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0..8} force
+run_test_for_volume $V0 200
+
+TEST $CLI volume create $V0 replica 3 arbiter 1 $H0:$B0/${V0}{0..8} force
+run_test_for_volume $V0 200
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0..8} force
+run_test_for_volume $V0 200
+
+TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5} force
+run_test_for_volume $V0 200
+
+cleanup
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=000000
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
diff --git a/tests/bugs/distribute/bug-1600379.t b/tests/bugs/distribute/bug-1600379.t
new file mode 100644
index 00000000000..8d2f6154100
--- /dev/null
+++ b/tests/bugs/distribute/bug-1600379.t
@@ -0,0 +1,54 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# Initialize
+#------------------------------------------------------------
+cleanup;
+
+# Start glusterd
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+# Create a volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2}
+
+# Verify volume creation
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+# Start volume and verify successful start
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
+#------------------------------------------------------------
+
+# Test case - Remove xattr from killed brick on lookup
+#------------------------------------------------------------
+# Create a dir and set custom xattr
+TEST mkdir $M0/testdir
+TEST setfattr -n user.attr -v val $M0/testdir
+xattr_val=`getfattr -d $B0/${V0}2/testdir | awk '{print $1}'`;
+TEST ${xattr_val}='user.attr="val"';
+
+# Kill 2nd brick process
+TEST kill_brick $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "1" online_brick_count
+
+# Remove custom xattr
+TEST setfattr -x user.attr $M0/testdir
+
+# Bring up the killed brick process
+TEST $CLI volume start $V0 force
+
+# Perform lookup
+sleep 5
+TEST ls $M0/testdir
+
+# Check brick xattrs
+xattr_val_2=`getfattr -d $B0/${V0}2/testdir`;
+TEST [ ${xattr_val_2} = ''] ;
+
+cleanup;
diff --git a/tests/bugs/distribute/bug-1667804.t b/tests/bugs/distribute/bug-1667804.t
new file mode 100644
index 00000000000..3f7c43111d7
--- /dev/null
+++ b/tests/bugs/distribute/bug-1667804.t
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../dht.rc
+
+function confirm_all_linkto_files ()
+{
+ inpath=$1
+ for infile in $inpath/*
+ do
+ echo $infile
+ ret1=$(is_dht_linkfile $infile)
+ if [ "$ret1" -eq 0 ]; then
+ echo "$infile is not a linkto file"
+ echo 0
+ return
+ fi
+ done
+ echo 1
+}
+
+cleanup;
+
+#Basic checks
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+#Create a distributed volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1..2};
+TEST $CLI volume start $V0
+
+# Mount FUSE
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+
+#Create files and rename them in order to create linkto files
+TEST mkdir -p $M0/dir0/dir1
+TEST touch $M0/dir0/dir1/file-{1..50}
+
+for i in {1..50}; do
+ mv $M0/dir0/dir1/file-$i $M0/dir0/dir1/nfile-$i;
+done
+
+#Remove the second brick to force the creation of linkto files
+#on the removed brick
+
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}2 start
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" remove_brick_status_completed_field "$V0 $H0:$B0/${V0}2"
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}2 stop
+
+EXPECT "1" confirm_all_linkto_files $B0/${V0}2/dir0/dir1
+
+#Modify the xattrs of the linkto files on the removed brick to point to itself.
+
+target=$(cat $M0/.meta/graphs/active/$V0-dht/subvolumes/1/name)
+
+setfattr -n trusted.glusterfs.dht.linkto -v "$target\0" $B0/${V0}2/dir0/dir1/nfile*
+
+
+TEST rm -rf $M0/dir0
+
+cleanup;
diff --git a/tests/bugs/distribute/bug-1786679.t b/tests/bugs/distribute/bug-1786679.t
new file mode 100755
index 00000000000..219ce51c8a9
--- /dev/null
+++ b/tests/bugs/distribute/bug-1786679.t
@@ -0,0 +1,69 @@
+#!/bin/bash
+
+SCRIPT_TIMEOUT=250
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../dht.rc
+
+
+# create 2 subvols
+# create a dir
+# create a file
+# change layout
+# remove the file
+# execute create from a different mount
+# Without the patch, the file will be present on both of the bricks
+
+cleanup
+
+function get_layout () {
+
+layout=`getfattr -n trusted.glusterfs.dht -e hex $1 2>&1 | grep dht | gawk -F"=" '{print $2}'`
+
+echo $layout
+
+}
+
+function set_layout()
+{
+ setfattr -n "trusted.glusterfs.dht" -v $1 $2
+}
+
+TEST glusterd
+TEST pidof glusterd
+
+BRICK1=$B0/${V0}-0
+BRICK2=$B0/${V0}-1
+
+TEST $CLI volume create $V0 $H0:$BRICK1 $H0:$BRICK2
+TEST $CLI volume start $V0
+
+# Mount FUSE and create symlink
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+TEST mkdir $M0/dir
+TEST touch $M0/dir/file
+TEST ! stat "$BRICK1/dir/file"
+TEST stat "$BRICK2/dir/file"
+
+layout1="$(get_layout "$BRICK1/dir")"
+layout2="$(get_layout "$BRICK2/dir")"
+
+TEST set_layout $layout1 "$BRICK2/dir"
+TEST set_layout $layout2 "$BRICK1/dir"
+
+TEST rm $M0/dir/file -f
+TEST gluster v set $V0 client-log-level DEBUG
+
+#Without the patch in place, this client will create the file in $BRICK2
+#which will lead to two files being on both the bricks when a new client
+#create the file with the same name
+TEST touch $M0/dir/file
+
+TEST glusterfs -s $H0 --volfile-id $V0 $M1
+TEST touch $M1/dir/file
+
+TEST stat "$BRICK1/dir/file"
+TEST ! stat "$BRICK2/dir/file"
+
+cleanup
diff --git a/tests/bugs/distribute/bug-853258.t b/tests/bugs/distribute/bug-853258.t
new file mode 100755
index 00000000000..6817d9e2cd3
--- /dev/null
+++ b/tests/bugs/distribute/bug-853258.t
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+mkdir -p $H0:$B0/${V0}0
+mkdir -p $H0:$B0/${V0}1
+mkdir -p $H0:$B0/${V0}2
+mkdir -p $H0:$B0/${V0}3
+
+# Create and start a volume.
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1 $H0:$B0/${V0}2
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Started' volinfo_field $V0 'Status';
+TEST $CLI volume set $V0 cluster.weighted-rebalance off
+
+# Force assignment of initial ranges.
+TEST $CLI volume rebalance $V0 fix-layout start
+EXPECT_WITHIN $REBALANCE_TIMEOUT "fix-layout completed" fix-layout_status_field $V0
+
+# Get the original values.
+xattrs=""
+for i in $(seq 0 2); do
+ xattrs="$xattrs $(dht_get_layout $B0/${V0}$i)"
+done
+
+# Expand the volume and force assignment of new ranges.
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}3
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "4" online_brick_count
+# Force assignment of initial ranges.
+TEST $CLI volume rebalance $V0 fix-layout start
+EXPECT_WITHIN $REBALANCE_TIMEOUT "fix-layout completed" fix-layout_status_field $V0
+
+for i in $(seq 0 3); do
+ xattrs="$xattrs $(dht_get_layout $B0/${V0}$i)"
+done
+
+overlap=$( $PYTHON $(dirname $0)/overlap.py $xattrs)
+# 2863311531 = 0xaaaaaaab = 2/3 overlap
+TEST [ "$overlap" -ge 2863311531 ]
+
+cleanup
diff --git a/tests/bugs/distribute/bug-860663.c b/tests/bugs/distribute/bug-860663.c
new file mode 100644
index 00000000000..ca0c31ffe8f
--- /dev/null
+++ b/tests/bugs/distribute/bug-860663.c
@@ -0,0 +1,41 @@
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <string.h>
+#include <err.h>
+#include <sys/param.h>
+
+int main(argc, argv) int argc;
+char **argv;
+{
+ char *basepath;
+ char path[MAXPATHLEN + 1];
+ unsigned int count;
+ int i, fd;
+
+ if (argc != 3)
+ errx(1, "usage: %s path count", argv[0]);
+
+ basepath = argv[1];
+ count = atoi(argv[2]);
+
+ if (count > 999999)
+ errx(1, "count too big");
+
+ if (strlen(basepath) > MAXPATHLEN - 6)
+ errx(1, "path too long");
+
+ for (i = 0; i < count; i++) {
+ (void)sprintf(path, "%s%06d", basepath, i);
+
+ fd = open(path, O_CREAT | O_RDWR, 0644);
+ if (fd == -1)
+ err(1, "create %s failed", path);
+
+ if (close(fd) != 0)
+ warn("close %s failed", path);
+ }
+
+ return 0;
+}
diff --git a/tests/bugs/distribute/bug-860663.t b/tests/bugs/distribute/bug-860663.t
new file mode 100644
index 00000000000..59b8223ef3f
--- /dev/null
+++ b/tests/bugs/distribute/bug-860663.t
@@ -0,0 +1,56 @@
+#!/bin/bash
+
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+function file_count()
+{
+ val=1
+
+ if [ "$1" == "$2" ]
+ then
+ val=0
+ fi
+ echo $val
+}
+
+BRICK_COUNT=3
+
+build_tester $(dirname $0)/bug-860663.c
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1 $H0:$B0/${V0}2
+TEST $CLI volume start $V0
+
+## Mount FUSE
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+
+TEST $(dirname $0)/bug-860663 $M0/files 1000
+
+ORIG_FILE_COUNT=`ls -l $M0 | wc -l`;
+TEST [ $ORIG_FILE_COUNT -ge 1000 ]
+
+# Kill a brick process
+kill_brick $V0 $H0 $B0/${V0}1
+
+TEST $CLI volume rebalance $V0 fix-layout start
+
+EXPECT_WITHIN $REBALANCE_TIMEOUT "fix-layout failed" fix-layout_status_field $V0;
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+
+# Unmount and remount to make sure we're doing fresh lookups.
+TEST umount $M0
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+
+NEW_FILE_COUNT=`ls -l $M0 | wc -l`;
+
+EXPECT "0" file_count $ORIG_FILE_COUNT $NEW_FILE_COUNT
+
+rm -f $(dirname $0)/bug-860663
+cleanup;
diff --git a/tests/bugs/distribute/bug-862967.t b/tests/bugs/distribute/bug-862967.t
new file mode 100644
index 00000000000..2fb0848bd7c
--- /dev/null
+++ b/tests/bugs/distribute/bug-862967.t
@@ -0,0 +1,59 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+function uid_gid_compare()
+{
+ val=1
+
+ if [ "$1" == "$3" ]
+ then
+ if [ "$2" == "$4" ]
+ then
+ val=0
+ fi
+ fi
+ echo "$val"
+}
+
+BRICK_COUNT=3
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1 $H0:$B0/${V0}2
+TEST $CLI volume set $V0 stat-prefetch off
+TEST $CLI volume start $V0
+
+## Mount FUSE
+TEST glusterfs --attribute-timeout=0 --entry-timeout=0 --gid-timeout=-1 -s $H0 --volfile-id $V0 $M0;
+
+# change dir permissions
+mkdir $M0/dir;
+chown 1:1 $M0/dir;
+
+# Kill a brick process
+
+kill_brick $V0 $H0 $B0/${V0}2
+# change dir ownership
+NEW_UID=36;
+NEW_GID=36;
+chown $NEW_UID:$NEW_GID $M0/dir;
+
+# bring the brick back up
+TEST $CLI volume start $V0 force
+
+sleep 10;
+
+ls -l $M0/dir;
+
+# check if uid/gid is healed on backend brick which was taken down
+BACKEND_UID=`stat -c %u $B0/${V0}2/dir`;
+BACKEND_GID=`stat -c %g $B0/${V0}2/dir`;
+
+EXPECT "0" uid_gid_compare $NEW_UID $NEW_GID $BACKEND_UID $BACKEND_GID
+
+cleanup;
diff --git a/tests/bugs/distribute/bug-882278.t b/tests/bugs/distribute/bug-882278.t
new file mode 100755
index 00000000000..8cb51474720
--- /dev/null
+++ b/tests/bugs/distribute/bug-882278.t
@@ -0,0 +1,73 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup
+
+# Is there a good reason to require --fqdn elsewhere? It's worse than useless
+# here.
+H0=$(hostname -s)
+
+function recreate {
+ # The rm is necessary so we don't get fooled by leftovers from old runs.
+ rm -rf $1 && mkdir -p $1
+}
+
+function count_lines {
+ grep "$1" $2/* | wc -l
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+## Start and create a volume
+TEST recreate ${B0}/${V0}-0
+TEST recreate ${B0}/${V0}-1
+TEST $CLI volume create $V0 $H0:$B0/${V0}-{0,1}
+TEST $CLI volume set $V0 cluster.nufa on
+
+function volinfo_field()
+{
+ local vol=$1;
+ local field=$2;
+
+ $CLI volume info $vol | grep "^$field: " | sed 's/.*: //';
+}
+
+
+## Verify volume is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Mount native
+special_option="--xlator-option ${V0}-dht.local-volume-name=${V0}-client-1"
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $special_option $M0
+
+## Create a bunch of test files.
+for i in $(seq 0 99); do
+ echo hello > $(printf $M0/file%02d $i)
+done
+
+## Make sure the files went to the right place. There might be link files in
+## the other brick, but they won't have any contents.
+EXPECT "0" count_lines hello ${B0}/${V0}-0
+EXPECT "100" count_lines hello ${B0}/${V0}-1
+
+if [ "$EXIT_EARLY" = "1" ]; then
+ exit 0;
+fi
+
+## Finish up
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/bugs/distribute/bug-884455.t b/tests/bugs/distribute/bug-884455.t
new file mode 100755
index 00000000000..59413cd5408
--- /dev/null
+++ b/tests/bugs/distribute/bug-884455.t
@@ -0,0 +1,84 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../dht.rc
+
+cleanup;
+
+function layout_compare()
+{
+ res=0
+
+ if [ "$1" == "$2" ]
+ then
+ res=1
+ fi
+ if [ "$1" == "$3" ]
+ then
+ res=1
+ fi
+ if [ "$2" == "$3" ]
+ then
+ res=1
+ fi
+
+ echo $res
+}
+
+function get_layout()
+{
+ layout1=`getfattr -n trusted.glusterfs.dht -e hex $1 2>&1|grep dht |cut -d = -f2`
+ layout2=`getfattr -n trusted.glusterfs.dht -e hex $2 2>&1|grep dht |cut -d = -f2`
+ layout3=`getfattr -n trusted.glusterfs.dht -e hex $3 2>&1|grep dht |cut -d = -f2`
+
+ ret=$(layout_compare $layout1 $layout2 $layout3)
+
+ if [ $ret -ne 0 ]
+ then
+ echo 1
+ else
+ echo 0
+ fi
+
+}
+
+BRICK_COUNT=3
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+## set subvols-per-dir option
+TEST $CLI volume set $V0 subvols-per-directory 2
+TEST $CLI volume start $V0
+
+## Mount FUSE
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+
+TEST mkdir $M0/dir{1..10} 2>/dev/null;
+
+## Add-brick n run rebalance to force re-write of layout
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}2
+sleep 5;
+
+## trigger dir self heal on client
+TEST ls -l $M0 2>/dev/null;
+
+TEST $CLI volume rebalance $V0 start force
+
+EXPECT_WITHIN $REBALANCE_TIMEOUT "0" rebalance_completed
+
+## check for layout overlaps.
+EXPECT "0" get_layout $B0/${V0}0 $B0/${V0}1 $B0/${V0}2
+EXPECT "0" get_layout $B0/${V0}0/dir1 $B0/${V0}1/dir1 $B0/${V0}2/dir1
+EXPECT "0" get_layout $B0/${V0}0/dir2 $B0/${V0}1/dir2 $B0/${V0}2/dir2
+EXPECT "0" get_layout $B0/${V0}0/dir3 $B0/${V0}1/dir3 $B0/${V0}2/dir3
+EXPECT "0" get_layout $B0/${V0}0/dir4 $B0/${V0}1/dir4 $B0/${V0}2/dir4
+EXPECT "0" get_layout $B0/${V0}0/dir5 $B0/${V0}1/dir5 $B0/${V0}2/dir5
+EXPECT "0" get_layout $B0/${V0}0/dir6 $B0/${V0}1/dir6 $B0/${V0}2/dir6
+EXPECT "0" get_layout $B0/${V0}0/dir7 $B0/${V0}1/dir7 $B0/${V0}2/dir7
+EXPECT "0" get_layout $B0/${V0}0/dir8 $B0/${V0}1/dir8 $B0/${V0}2/dir8
+EXPECT "0" get_layout $B0/${V0}0/dir9 $B0/${V0}1/dir9 $B0/${V0}2/dir9
+EXPECT "0" get_layout $B0/${V0}0/dir10 $B0/${V0}1/dir10 $B0/${V0}2/dir10
+
+cleanup;
diff --git a/tests/bugs/distribute/bug-884597.t b/tests/bugs/distribute/bug-884597.t
new file mode 100755
index 00000000000..d6a2c65f370
--- /dev/null
+++ b/tests/bugs/distribute/bug-884597.t
@@ -0,0 +1,173 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../dht.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+BRICK_COUNT=3
+function uid_gid_compare()
+{
+ val=1
+
+ if [ "$1" == "$3" ]
+ then
+ if [ "$2" == "$4" ]
+ then
+ val=0
+ fi
+ fi
+ echo "$val"
+}
+
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1 $H0:$B0/${V0}2
+TEST $CLI volume start $V0
+
+## Mount FUSE
+TEST glusterfs --attribute-timeout=0 --entry-timeout=0 -s $H0 --volfile-id $V0 $M0;
+
+i=1
+NEW_UID=36
+NEW_GID=36
+
+TEST touch $M0/$i
+
+chown $NEW_UID:$NEW_GID $M0/$i
+## rename till file gets a linkfile
+
+has_link=0
+while [ $i -lt 100 ]
+do
+ mv $M0/$i $M0/$(( $i+1 ))
+ if [ $? -ne 0 ]
+ then
+ break
+ fi
+ let i++
+ file_has_linkfile $i
+ has_link=$?
+ if [ $has_link -eq 2 ]
+ then
+ break;
+ fi
+done
+
+TEST [ $has_link -eq 2 ]
+
+get_hashed_brick $i
+cached=$?
+
+# check if uid/gid on linkfile is created with correct uid/gid
+BACKEND_UID=`stat -c %u $B0/${V0}$cached/$i`;
+BACKEND_GID=`stat -c %g $B0/${V0}$cached/$i`;
+
+EXPECT "0" uid_gid_compare $NEW_UID $NEW_GID $BACKEND_UID $BACKEND_GID
+
+# remove linkfile from backend, and trigger a lookup heal. uid/gid should match
+rm -rf $B0/${V0}$cached/$i
+
+# without a unmount, we are not able to trigger a lookup based heal
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+## Mount FUSE
+TEST glusterfs --attribute-timeout=0 --entry-timeout=0 -s $H0 --volfile-id $V0 $M0;
+
+lookup=`ls -l $M0/$i 2>/dev/null`
+
+# check if uid/gid on linkfile is created with correct uid/gid
+BACKEND_UID=`stat -c %u $B0/${V0}$cached/$i`;
+BACKEND_GID=`stat -c %g $B0/${V0}$cached/$i`;
+
+EXPECT "0" uid_gid_compare $NEW_UID $NEW_GID $BACKEND_UID $BACKEND_GID
+# create hardlinks. Make sure a linkfile gets created
+
+i=1
+NEW_UID=36
+NEW_GID=36
+
+TEST touch $M0/file
+chown $NEW_UID:$NEW_GID $M0/file;
+
+## ln till file gets a linkfile
+
+has_link=0
+while [ $i -lt 100 ]
+do
+ ln $M0/file $M0/link$i
+ if [ $? -ne 0 ]
+ then
+ break
+ fi
+ file_has_linkfile link$i
+ has_link=$?
+ if [ $has_link -eq 2 ]
+ then
+ break;
+ fi
+ let i++
+done
+
+TEST [ $has_link -eq 2 ]
+
+get_hashed_brick link$i
+cached=$?
+
+# check if uid/gid on linkfile is created with correct uid/gid
+BACKEND_UID=`stat -c %u $B0/${V0}$cached/link$i`;
+BACKEND_GID=`stat -c %g $B0/${V0}$cached/link$i`;
+
+EXPECT "0" uid_gid_compare $NEW_UID $NEW_GID $BACKEND_UID $BACKEND_GID
+
+## UID/GID creation as different user
+i=1
+NEW_UID=36
+NEW_GID=36
+
+TEST touch $M0/user_file1
+TEST chown $NEW_UID:$NEW_GID $M0/user_file1;
+
+## Give permission on volume, so that different users can perform rename
+
+TEST chmod 0777 $M0
+
+## Add a user known as ABC and perform renames
+TEST `useradd -M ABC 2>/dev/null`
+
+TEST cd $M0
+## rename as different user till file gets a linkfile
+
+has_link=0
+while [ $i -lt 100 ]
+do
+ su -m ABC -c "mv $M0/user_file$i $M0/user_file$(( $i+1 ))"
+ if [ $? -ne 0 ]
+ then
+ break
+ fi
+ let i++
+ file_has_linkfile user_file$i
+ has_link=$?
+ if [ $has_link -eq 2 ]
+ then
+ break;
+ fi
+done
+
+TEST [ $has_link -eq 2 ]
+
+## del user ABC
+TEST userdel ABC
+
+get_hashed_brick user_file$i
+cached=$?
+
+# check if uid/gid on linkfile is created with correct uid/gid
+BACKEND_UID=`stat -c %u $B0/${V0}$cached/user_file$i`;
+BACKEND_GID=`stat -c %g $B0/${V0}$cached/user_file$i`;
+
+EXPECT "0" uid_gid_compare $NEW_UID $NEW_GID $BACKEND_UID $BACKEND_GID
+cleanup;
diff --git a/tests/bugs/distribute/bug-907072.t b/tests/bugs/distribute/bug-907072.t
new file mode 100755
index 00000000000..a4d98831380
--- /dev/null
+++ b/tests/bugs/distribute/bug-907072.t
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../fileio.rc
+. $(dirname $0)/../../dht.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1,2,3};
+TEST $CLI volume start $V0;
+
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+
+TEST mkdir $M0/test;
+
+# Extract the layout sans the commit hash
+OLD_LAYOUT0=`get_layout $B0/${V0}0/test | cut -c11-34`;
+OLD_LAYOUT1=`get_layout $B0/${V0}1/test | cut -c11-34`;
+OLD_LAYOUT2=`get_layout $B0/${V0}2/test | cut -c11-34`;
+OLD_LAYOUT3=`get_layout $B0/${V0}3/test | cut -c11-34`;
+
+TEST killall glusterfsd;
+
+# Delete directory on one brick
+TEST rm -rf $B0/${V}1/test;
+
+# And only layout xattr on another brick
+TEST setfattr -x trusted.glusterfs.dht $B0/${V0}2/test;
+
+TEST $CLI volume start $V0 force;
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+TEST stat $M0/test;
+
+# Extract the layout sans the commit hash
+NEW_LAYOUT0=`get_layout $B0/${V0}0/test | cut -c11-34`;
+NEW_LAYOUT1=`get_layout $B0/${V0}1/test | cut -c11-34`;
+NEW_LAYOUT2=`get_layout $B0/${V0}2/test | cut -c11-34`;
+NEW_LAYOUT3=`get_layout $B0/${V0}3/test | cut -c11-34`;
+
+EXPECT $OLD_LAYOUT0 echo $NEW_LAYOUT0;
+EXPECT $OLD_LAYOUT1 echo $NEW_LAYOUT1;
+EXPECT $OLD_LAYOUT2 echo $NEW_LAYOUT2;
+EXPECT $OLD_LAYOUT3 echo $NEW_LAYOUT3;
diff --git a/tests/bugs/distribute/bug-912564.t b/tests/bugs/distribute/bug-912564.t
new file mode 100755
index 00000000000..d437728f83b
--- /dev/null
+++ b/tests/bugs/distribute/bug-912564.t
@@ -0,0 +1,92 @@
+#!/bin/bash
+
+# Test that the rsync and "extra" regexes cause rename-in-place without
+# creating linkfiles, when they're supposed to. Without the regex we'd have a
+# 1/4 chance of each file being assigned to the right place, so with 16 files
+# we have a 1/2^32 chance of getting the correct result by accident.
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+function count_linkfiles {
+ local i
+ local count=0
+ for i in $(seq $2 $3); do
+ x=$(find $1$i -perm -1000 | wc -l)
+ # Divide by two because of the .glusterfs links.
+ count=$((count+x/2))
+ done
+ echo $count
+}
+
+# This function only exists to get around quoting difficulties in TEST.
+function set_regex {
+ $CLI volume set $1 cluster.extra-hash-regex '^foo(.+)bar$'
+}
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+mkdir -p $H0:$B0/${V0}0
+mkdir -p $H0:$B0/${V0}1
+mkdir -p $H0:$B0/${V0}2
+mkdir -p $H0:$B0/${V0}3
+
+# Create and start a volume.
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1 \
+ $H0:$B0/${V0}2 $H0:$B0/${V0}3
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Started' volinfo_field $V0 'Status';
+
+# Mount it.
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+
+# Make sure the rsync regex works, by verifying that no linkfiles are
+# created.
+rm -f $M0/file*
+for i in $(seq 0 15); do
+ fn=$(printf file%x $i)
+ tmp_fn=$(printf .%s.%d $fn $RANDOM)
+ echo testing > $M0/$tmp_fn
+ mv $M0/$tmp_fn $M0/$fn
+done
+lf=$(count_linkfiles $B0/$V0 0 3)
+TEST [ "$lf" -eq "0" ]
+
+# Make sure that linkfiles *are* created for normal files.
+rm -f $M0/file*
+for i in $(seq 0 15); do
+ fn=$(printf file%x $i)
+ tmp_fn=$(printf foo%sbar $fn)
+ echo testing > $M0/$tmp_fn
+ mv $M0/$tmp_fn $M0/$fn
+done
+lf=$(count_linkfiles $B0/$V0 0 3)
+TEST [ "$lf" -ne "0" ]
+
+# Make sure that setting an extra regex suppresses the linkfiles.
+TEST set_regex $V0
+rm -f $M0/file*
+for i in $(seq 0 15); do
+ fn=$(printf file%x $i)
+ tmp_fn=$(printf foo%sbar $fn)
+ echo testing > $M0/$tmp_fn
+ mv $M0/$tmp_fn $M0/$fn
+done
+lf=$(count_linkfiles $B0/$V0 0 3)
+TEST [ "$lf" -eq "0" ]
+
+# Re-test the rsync regex, to make sure the extra one didn't break it.
+rm -f $M0/file*
+for i in $(seq 0 15); do
+ fn=$(printf file%x $i)
+ tmp_fn=$(printf .%s.%d $fn $RANDOM)
+ echo testing > $M0/$tmp_fn
+ mv $M0/$tmp_fn $M0/$fn
+done
+lf=$(count_linkfiles $B0/$V0 0 3)
+TEST [ "$lf" -eq "0" ]
+
+cleanup
diff --git a/tests/bugs/distribute/bug-915554.t b/tests/bugs/distribute/bug-915554.t
new file mode 100755
index 00000000000..1f59008c56f
--- /dev/null
+++ b/tests/bugs/distribute/bug-915554.t
@@ -0,0 +1,76 @@
+#!/bin/bash
+#
+# Bug <915554>
+#
+# This test checks for a condition where a rebalance migrates a file and does
+# not preserve the original file size. This can occur due to hole preservation
+# logic in the file migration code. If a file size is aligned to a disk sector
+# boundary (512b) and the tail portion of the file is zero-filled, the file
+# may end up truncated to the end of the last data region in the file.
+#
+###
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../dht.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+BRICK_COUNT=3
+# create, start and mount a two brick DHT volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1 $H0:$B0/${V0}2
+TEST $CLI volume start $V0
+
+TEST glusterfs --attribute-timeout=0 --entry-timeout=0 --gid-timeout=-1 -s $H0 --volfile-id $V0 $M0;
+
+i=1
+# Write some data to a file and extend such that the file is sparse to a sector
+# aligned boundary.
+echo test > $M0/$i
+TEST truncate -s 1M $M0/$i
+
+# cache the original size
+SIZE1=`stat -c %s $M0/$i`
+
+# rename till file gets a linkfile
+
+while [ $i -ne 0 ]
+do
+ test=`mv $M0/$i $M0/$(( $i+1 )) 2>/dev/null`
+ if [ $? -ne 0 ]
+ then
+ echo "rename failed"
+ break
+ fi
+ let i++
+ file_has_linkfile $i
+ has_link=$?
+ if [ $has_link -eq 2 ]
+ then
+ break;
+ fi
+done
+
+# start a rebalance (force option to overide checks) to trigger migration of
+# file
+
+TEST $CLI volume rebalance $V0 start force
+
+# check if rebalance has completed for up to 15 secs
+
+EXPECT_WITHIN $REBALANCE_TIMEOUT "0" rebalance_completed
+
+# validate the file size after the migration
+SIZE2=`stat -c %s $M0/$i`
+
+TEST [ $SIZE1 -eq $SIZE2 ]
+
+TEST rm -f $M0/$i
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/bugs/distribute/bug-921408.t b/tests/bugs/distribute/bug-921408.t
new file mode 100755
index 00000000000..559114bb85a
--- /dev/null
+++ b/tests/bugs/distribute/bug-921408.t
@@ -0,0 +1,90 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../fileio.rc
+. $(dirname $0)/../../dht.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+wait_check_status ()
+{
+ n=0
+ while [ $n -lt $1 ]
+ do
+ ret=$(rebalance_completed)
+ if [ $ret == "0" ]
+ then
+ return 0;
+ else
+ sleep 1
+ n=`expr $n + 1`;
+ fi
+ done
+ return 1;
+}
+
+addbr_rebal_till_layout_change()
+{
+ val=1
+ l=$1
+ i=1
+ while [ $i -lt 5 ]
+ do
+ $CLI volume add-brick $V0 $H0:$B0/${V0}$l &>/dev/null
+ $CLI volume rebalance $V0 fix-layout start &>/dev/null
+ wait_check_status $REBALANCE_TIMEOUT
+ if [ $? -eq 1 ]
+ then
+ break
+ fi
+ NEW_LAYOUT=`get_layout $B0/${V0}0 | cut -c11-34`
+ if [ $OLD_LAYOUT == $NEW_LAYOUT ]
+ then
+ i=`expr $i + 1`;
+ l=`expr $l + 1`;
+ else
+ val=0
+ break
+ fi
+ done
+ return $val
+}
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}0
+TEST $CLI volume set $V0 subvols-per-directory 1
+TEST $CLI volume start $V0
+
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+
+TEST mkdir $M0/test
+TEST touch $M0/test/test
+
+fd=`fd_available`
+TEST fd_open $fd "rw" $M0/test/test
+
+OLD_LAYOUT=`get_layout $B0/${V0}0 | cut -c11-34`
+
+addbr_rebal_till_layout_change 1
+
+TEST [ $? -eq 0 ]
+
+for i in $(seq 1 1000)
+do
+ ls -l $M0/ >/dev/null
+ ret=$?
+ if [ $ret != 0 ]
+ then
+ break
+ fi
+done
+
+TEST [ $ret == 0 ];
+TEST fd_close $fd;
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup
diff --git a/tests/bugs/distribute/bug-924265.t b/tests/bugs/distribute/bug-924265.t
new file mode 100755
index 00000000000..67c21de97cb
--- /dev/null
+++ b/tests/bugs/distribute/bug-924265.t
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+# Test that setting cluster.dht-xattr-name works, and that DHT consistently
+# uses the specified name instead of the default.
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# We only care about the exit code, so keep it quiet.
+function silent_getfattr {
+ getfattr $* &> /dev/null
+}
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+
+mkdir -p $H0:$B0/${V0}0
+
+# Create a volume and set the option.
+TEST $CLI volume create $V0 $H0:$B0/${V0}0
+TEST $CLI volume set $V0 cluster.dht-xattr-name trusted.foo.bar
+
+# Start and mount the volume.
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Started' volinfo_field $V0 'Status';
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+
+# Create a directory and make sure it has the right xattr.
+mkdir $M0/test
+TEST ! silent_getfattr -n trusted.glusterfs.dht $B0/${V0}0/test
+TEST silent_getfattr -n trusted.foo.bar $B0/${V0}0/test
+
+cleanup
diff --git a/tests/bugs/distribute/bug-961615.t b/tests/bugs/distribute/bug-961615.t
new file mode 100644
index 00000000000..00938e8fa9b
--- /dev/null
+++ b/tests/bugs/distribute/bug-961615.t
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+#This test tests that an extra fd_unref does not happen in rebalance
+#migration completion check code path in dht
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+TEST touch $M0/1
+#This rename creates a link file for 10 in the other volume.
+TEST mv $M0/1 $M0/10
+#Lets keep writing to the file which will trigger rebalance completion check
+dd if=/dev/zero of=$M0/10 bs=1k &
+bg_pid=$!
+#Now rebalance force will migrate file '10'
+TEST $CLI volume rebalance $V0 start force
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $V0
+#If the bug exists mount would have crashed by now
+TEST ls $M0
+kill -9 $bg_pid > /dev/null 2>&1
+wait > /dev/null 2>&1
+cleanup
diff --git a/tests/bugs/distribute/bug-973073.t b/tests/bugs/distribute/bug-973073.t
new file mode 100755
index 00000000000..cdb785abac0
--- /dev/null
+++ b/tests/bugs/distribute/bug-973073.t
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../dht.rc
+
+## Steps followed are one described in bugzilla
+
+cleanup;
+
+function get_layout()
+{
+ layout1=`getfattr -n trusted.glusterfs.dht -e hex $1 2>&1`
+
+ if [ $? -ne 0 ]
+ then
+ echo 1
+ else
+ echo 0
+ fi
+
+}
+
+BRICK_COUNT=3
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1 $H0:$B0/${V0}2
+TEST $CLI volume start $V0
+
+## Mount FUSE
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}2 start
+
+## remove-brick status == rebalance_status
+EXPECT_WITHIN $REBALANCE_TIMEOUT "0" remove_brick_completed
+
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}2 stop
+
+TEST $CLI volume rebalance $V0 fix-layout start
+
+EXPECT_WITHIN $REBALANCE_TIMEOUT "0" rebalance_completed
+
+TEST mkdir $M0/dir 2>/dev/null;
+
+EXPECT "0" get_layout $B0/${V0}2/dir
+cleanup;
diff --git a/tests/bugs/distribute/issue-1327.t b/tests/bugs/distribute/issue-1327.t
new file mode 100755
index 00000000000..acd8c8c6614
--- /dev/null
+++ b/tests/bugs/distribute/issue-1327.t
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+SCRIPT_TIMEOUT=250
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../dht.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+
+BRICK1=$B0/${V0}-0
+BRICK2=$B0/${V0}-1
+
+TEST $CLI volume create $V0 $H0:$BRICK1 $H0:$BRICK2
+TEST $CLI volume start $V0
+
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+TEST mkdir $M0/dir
+
+#remove dir from one of the brick
+TEST rmdir $BRICK2/dir
+
+#safe cache timeout for lookup to be triggered
+sleep 2
+
+TEST ls $M0/dir
+
+TEST stat $BRICK2/dir
+
+cleanup
diff --git a/tests/bugs/distribute/overlap.py b/tests/bugs/distribute/overlap.py
new file mode 100755
index 00000000000..2813979787b
--- /dev/null
+++ b/tests/bugs/distribute/overlap.py
@@ -0,0 +1,59 @@
+
+from __future__ import print_function
+import sys
+
+def calculate_one (ov, nv):
+ old_start = int(ov[18:26], 16)
+ old_end = int(ov[26:34], 16)
+ new_start = int(nv[18:26], 16)
+ new_end = int(nv[26:34], 16)
+ if (new_end < old_start) or (new_start > old_end):
+ #print '%s, %s -> ZERO' % (ov, nv)
+ return 0
+ all_start = max(old_start, new_start)
+ all_end = min(old_end, new_end)
+ #print '%s, %s -> %08x' % (ov, nv, all_end - all_start + 1)
+ return all_end - all_start + 1
+
+def calculate_all (values):
+ total = 0
+ nv_index = len(values) // 2
+ for old_val in values[:nv_index]:
+ new_val = values[nv_index]
+ nv_index += 1
+ total += calculate_one(old_val, new_val)
+ return total
+
+"""
+test1_vals = [
+ '0x0000000000000000000000003fffffff', # first quarter
+ '0x0000000000000000400000007fffffff', # second quarter
+ '0x000000000000000080000000ffffffff', # second half
+ '0x00000000000000000000000055555554', # first third
+ '0x000000000000000055555555aaaaaaa9', # second third
+ '0x0000000000000000aaaaaaaaffffffff', # last third
+]
+
+test2_vals = [
+ '0x0000000000000000000000003fffffff', # first quarter
+ '0x0000000000000000400000007fffffff', # second quarter
+ '0x000000000000000080000000ffffffff', # second half
+ '0x00000000000000000000000055555554', # first third
+ # Next two are (incorrectly) swapped.
+ '0x0000000000000000aaaaaaaaffffffff', # last third
+ '0x000000000000000055555555aaaaaaa9', # second third
+]
+
+print '%08x' % calculate_one(test1_vals[0], test1_vals[3])
+print '%08x' % calculate_one(test1_vals[1], test1_vals[4])
+print '%08x' % calculate_one(test1_vals[2], test1_vals[5])
+print '= %08x' % calculate_all(test1_vals)
+print '%08x' % calculate_one(test2_vals[0], test2_vals[3])
+print '%08x' % calculate_one(test2_vals[1], test2_vals[4])
+print '%08x' % calculate_one(test2_vals[2], test2_vals[5])
+print '= %08x' % calculate_all(test2_vals)
+"""
+
+if __name__ == '__main__':
+ # Return decimal so bash can reason about it.
+ print('%d' % calculate_all(sys.argv[1:]))
diff --git a/tests/bugs/ec/bug-1161621.t b/tests/bugs/ec/bug-1161621.t
new file mode 100644
index 00000000000..84361e440dc
--- /dev/null
+++ b/tests/bugs/ec/bug-1161621.t
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+. $(dirname $0)/../../traps.rc
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 5 redundancy 2 $H0:$B0/${V0}{0..4}
+EXPECT "Created" volinfo_field $V0 'Status'
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Started" volinfo_field $V0 'Status'
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "5" ec_child_up_count $V0 0
+
+tmpdir=$(mktemp -d -t ${0##*/}.XXXXXX)
+push_trapfunc "rm -rf $tmpdir"
+
+TEST dd if=/dev/urandom of=$tmpdir/file bs=1234 count=20
+cs=$(sha1sum $tmpdir/file | awk '{ print $1 }')
+# Test O_APPEND on create
+TEST dd if=$tmpdir/file of=$M0/file bs=1234 count=10 oflag=append
+# Test O_APPEND on open
+TEST dd if=$tmpdir/file of=$M0/file bs=1234 skip=10 oflag=append conv=notrunc
+EXPECT "$cs" echo $(sha1sum $M0/file | awk '{ print $1 }')
+
+# Fill a file with ff (I don't use 0's because empty holes created by an
+# incorrect offset will be returned as 0's and won't be detected)
+dd if=/dev/zero bs=24680 count=1000 | tr '\0' '\377' >$tmpdir/shared
+cs=$(sha1sum $tmpdir/shared | awk '{ print $1 }')
+# Test concurrent writes to the same file using O_APPEND
+dd if=$tmpdir/shared of=$M0/shared bs=123400 count=100 oflag=append conv=notrunc &
+dd if=$tmpdir/shared of=$M1/shared bs=123400 count=100 oflag=append conv=notrunc &
+wait
+
+EXPECT "24680000" stat -c "%s" $M0/shared
+EXPECT "$cs" echo $(sha1sum $M0/shared | awk '{ print $1 }')
+
+cleanup
diff --git a/tests/bugs/ec/bug-1161886.c b/tests/bugs/ec/bug-1161886.c
new file mode 100644
index 00000000000..1f12650ea6d
--- /dev/null
+++ b/tests/bugs/ec/bug-1161886.c
@@ -0,0 +1,53 @@
+
+#include <stdio.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+int
+main(int argc, char *argv[])
+{
+ glfs_t *fs = NULL;
+ glfs_fd_t *fd = NULL;
+ int ret = 1;
+
+ if (argc != 4) {
+ fprintf(stderr, "Syntax: %s <host> <volname> <file>\n", argv[0]);
+ return 1;
+ }
+
+ fs = glfs_new(argv[2]);
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL\n");
+ return 1;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", argv[1], 24007);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_set_volfile_server: returned %d\n", ret);
+ goto out;
+ }
+ ret = glfs_set_logging(fs, "/dev/null", 7);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_set_logging: returned %d\n", ret);
+ goto out;
+ }
+ ret = glfs_init(fs);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_init: returned %d\n", ret);
+ goto out;
+ }
+
+ fd = glfs_open(fs, argv[3], O_RDWR | O_TRUNC);
+ if (fd == NULL) {
+ fprintf(stderr, "glfs_open: returned NULL\n");
+ goto out;
+ }
+ glfs_close(fd);
+
+ ret = 0;
+
+out:
+ glfs_fini(fs);
+
+ return ret;
+}
diff --git a/tests/bugs/ec/bug-1161886.t b/tests/bugs/ec/bug-1161886.t
new file mode 100644
index 00000000000..e7322210a3e
--- /dev/null
+++ b/tests/bugs/ec/bug-1161886.t
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+function check_ec_size
+{
+ local res
+
+ for i in {0..2}; do
+ res=$(( `getfattr -n trusted.ec.size -e hex $B0/$V0$i/$1 | sed -n "s/^trusted.ec.size=//p"` ))
+ if [ "x$res" == "x" -o "$res" != "$2" ]; then
+ echo "N"
+ return 0
+ fi
+ done
+ echo "Y"
+ return 0
+}
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 redundancy 1 $H0:$B0/${V0}{0..2}
+EXPECT "Created" volinfo_field $V0 'Status'
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Started" volinfo_field $V0 'Status'
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+
+TEST dd if=/dev/zero of=$M0/file1 bs=4k count=1
+TEST mv $M0/file1 $M0/file2
+TEST ! stat $M0/file1
+TEST stat $M0/file2
+
+TEST build_tester $(dirname $0)/bug-1161886.c -lgfapi -Wall -O2
+TEST $(dirname $0)/bug-1161886 $H0 $V0 /file2
+EXPECT "^0$" stat -c "%s" $M0/file2
+EXPECT "^Y$" check_ec_size file2 0
+
+rm -f $(dirname $0)/bug-1161886
+
+cleanup
diff --git a/tests/bugs/ec/bug-1179050.t b/tests/bugs/ec/bug-1179050.t
new file mode 100644
index 00000000000..ea2d7b39838
--- /dev/null
+++ b/tests/bugs/ec/bug-1179050.t
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 redundancy 1 $H0:$B0/${V0}{0..2}
+EXPECT "Created" volinfo_field $V0 'Status'
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Started" volinfo_field $V0 'Status'
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+
+TEST $CLI volume clear-locks $V0 / kind all inode
+
+sleep 1
+
+EXPECT "3" ec_child_up_count $V0 0
+
+cleanup
diff --git a/tests/bugs/ec/bug-1187474.t b/tests/bugs/ec/bug-1187474.t
new file mode 100644
index 00000000000..e6344c26e73
--- /dev/null
+++ b/tests/bugs/ec/bug-1187474.t
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+function check_dir()
+{
+ local count
+
+ count=`ls $1 | grep "dir.[0-9]*" | wc -l`
+ if [[ $count -eq 100 ]]; then
+ echo "Y"
+ else
+ echo "N"
+ fi
+}
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 redundancy 2 $H0:$B0/${V0}{0..5}
+EXPECT "Created" volinfo_field $V0 'Status'
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Started" volinfo_field $V0 'Status'
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available
+TEST mount_nfs $H0:/$V0 $N0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+
+TEST mkdir $M0/dir.{1..100}
+
+sleep 2
+
+EXPECT "Y" check_dir $N0
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+cleanup
diff --git a/tests/bugs/ec/bug-1188145.t b/tests/bugs/ec/bug-1188145.t
new file mode 100644
index 00000000000..aa3a59bc62f
--- /dev/null
+++ b/tests/bugs/ec/bug-1188145.t
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+function create_dirs()
+{
+ local stop=$1
+ local idx
+ local res
+
+ res=0
+ idx=1
+ while [[ -f ${stop} ]]; do
+ mkdir $M0/${idx}
+ if [[ $? -ne 0 ]]; then
+ res=1
+ break;
+ fi
+ idx=$(( idx + 1 ))
+ done
+
+ return ${res}
+}
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 redundancy 2 $H0:$B0/${V0}{0..5}
+EXPECT "Created" volinfo_field $V0 'Status'
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Started" volinfo_field $V0 'Status'
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+
+name=`mktemp -t ${0##*/}.XXXXXX`
+create_dirs ${name} &
+pid=$!
+
+sleep 2
+TEST $CLI volume set $V0 uss on
+sleep 5
+TEST $CLI volume set $V0 uss off
+sleep 5
+
+TEST rm -f ${name}
+TEST wait ${pid}
+
+cleanup
diff --git a/tests/bugs/ec/bug-1227869.t b/tests/bugs/ec/bug-1227869.t
new file mode 100644
index 00000000000..00fad825fae
--- /dev/null
+++ b/tests/bugs/ec/bug-1227869.t
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+QDD=$(dirname $0)/quota
+# compile the test write program and run it
+build_tester $(dirname $0)/../../basic/quota.c -o $QDD
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 3 $H0:$B0/${V0}{1..3}
+EXPECT "Created" volinfo_field $V0 'Status'
+
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Started" volinfo_field $V0 'Status'
+
+TEST $CLI volume quota $V0 enable
+TEST $CLI volume quota $V0 limit-usage / 100MB
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+TEST $QDD $M0/file 256 40
+
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "10.0MB" quotausage "/"
+
+EXPECT "0" echo $(df -k $M0 | grep -q '10240 '; echo $?)
+EXPECT "0" echo $(df -k $M0 | grep -q '92160 '; echo $?)
+
+rm -f $QDD
+cleanup
diff --git a/tests/bugs/ec/bug-1236065.t b/tests/bugs/ec/bug-1236065.t
new file mode 100644
index 00000000000..9181e73ec19
--- /dev/null
+++ b/tests/bugs/ec/bug-1236065.t
@@ -0,0 +1,95 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+SCRIPT_TIMEOUT=400
+
+cleanup
+
+ec_test_dir=$M0/test
+
+function ec_test_generate_src()
+{
+ mkdir -p $ec_test_dir
+ for i in `seq 0 19`; do
+ dd if=/dev/zero of=$ec_test_dir/$i.c bs=1024 count=2
+ done
+}
+
+function ec_test_make()
+{
+ for i in `ls *.c`; do
+ file=`basename $i`
+ filename=${file%.*}
+ cp $i $filename.o
+ done
+}
+
+## step 1
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 7 redundancy 3 $H0:$B0/${V0}{0..6}
+TEST $CLI volume start $V0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "7" ec_child_up_count $V0 0
+
+## step 2
+TEST ec_test_generate_src
+
+cd $ec_test_dir
+TEST ec_test_make
+
+## step 3
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+EXPECT '5' online_brick_count
+
+TEST rm -f *.o
+TEST ec_test_make
+
+## step 4
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "7" online_brick_count
+
+# active heal
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+TEST $CLI volume heal $V0 full
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+TEST rm -f *.o
+TEST ec_test_make
+
+## step 5
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST kill_brick $V0 $H0 $B0/${V0}3
+EXPECT '5' online_brick_count
+
+TEST rm -f *.o
+TEST ec_test_make
+
+EXPECT '5' online_brick_count
+
+## step 6
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "7" online_brick_count
+
+# self-healing
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+TEST $CLI volume heal $V0 full
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+TEST rm -f *.o
+TEST ec_test_make
+
+TEST pidof glusterd
+EXPECT "$V0" volinfo_field $V0 'Volume Name'
+EXPECT 'Started' volinfo_field $V0 'Status'
+EXPECT '7' online_brick_count
+## cleanup
+cd
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+TEST rm -rf $B0/*
+
+cleanup;
diff --git a/tests/bugs/ec/bug-1251446.t b/tests/bugs/ec/bug-1251446.t
new file mode 100644
index 00000000000..5779c2e973b
--- /dev/null
+++ b/tests/bugs/ec/bug-1251446.t
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 4 redundancy 1 $H0:$B0/${V0}{0..3}
+TEST $CLI volume start $V0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "4" ec_child_up_count $V0 0
+
+TEST dd if=/dev/urandom of=$M0/test1 bs=1024k count=2
+cs=$(sha1sum $M0/test1 | awk '{ print $1 }')
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+EXPECT '3' online_brick_count
+
+TEST cp $M0/test1 $M0/test2
+EXPECT "$cs" echo $(sha1sum $M0/test2 | awk '{ print $1 }')
+
+TEST $CLI volume start $V0 force
+EXPECT '4' online_brick_count
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+TEST $CLI volume heal $V0 full
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+EXPECT "699392" stat -c "%s" $B0/${V0}0/test2
+
+# force cache clear
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume start $V0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "4" ec_child_up_count $V0 0
+
+TEST kill_brick $V0 $H0 $B0/${V0}3
+EXPECT '3' online_brick_count
+
+EXPECT "$cs" echo $(sha1sum $M0/test2 | awk '{ print $1 }')
+
+## cleanup
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/bugs/ec/bug-1304988.t b/tests/bugs/ec/bug-1304988.t
new file mode 100755
index 00000000000..334e5c25932
--- /dev/null
+++ b/tests/bugs/ec/bug-1304988.t
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# This test renames files from dir to test and vice versa in an infinite loop
+# at the same time add-brick and rebalance starts which should NOT be hanged
+
+cleanup;
+
+function rename_files {
+while :
+do
+ for i in {1..100}; do mv $M0/dir/file-$i $M0/test/newfile-$i; done
+ for i in {1..100}; do mv $M0/test/newfile-$i $M0/dir/file-$i; done
+done
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
+TEST $CLI volume start $V0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+
+mkdir $M0/dir
+mkdir $M0/test
+touch $M0/dir/file-{1..100}
+rename_files &
+back_pid1=$!;
+echo "Started rename $back_pid1"
+
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}{6..11}
+TEST $CLI volume rebalance $V0 start force
+
+#Test if rebalance completed with success.
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $V0
+echo "rebalance done..."
+kill $back_pid1
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=1332022
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=1332022
diff --git a/tests/bugs/ec/bug-1547662.t b/tests/bugs/ec/bug-1547662.t
new file mode 100644
index 00000000000..5748218587e
--- /dev/null
+++ b/tests/bugs/ec/bug-1547662.t
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# Immediately after replace-brick, trusted.ec.version will be absent, so if it
+# is present we can assume that heal was started on root
+function root_heal_attempted {
+ if [ -z $(get_hex_xattr trusted.ec.version $1) ]; then
+ echo "N"
+ else
+ echo "Y"
+ fi
+}
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST ${CLI} volume create ${V0} disperse 6 redundancy 2 ${H0}:${B0}/${V0}{0..5}
+TEST ${CLI} volume start ${V0}
+TEST ${GFS} --volfile-server ${H0} --volfile-id ${V0} ${M0}
+EXPECT_WITHIN ${CHILD_UP_TIMEOUT} "6" ec_child_up_count ${V0} 0
+
+TEST mkdir ${M0}/base
+TEST mkdir ${M0}/base/dir.{1,2}
+TEST mkdir ${M0}/base/dir.{1,2}/dir.{1,2}
+TEST mkdir ${M0}/base/dir.{1,2}/dir.{1,2}/dir.{1,2}
+TEST mkdir ${M0}/base/dir.{1,2}/dir.{1,2}/dir.{1,2}/dir.{1,2}
+TEST mkdir ${M0}/base/dir.{1,2}/dir.{1,2}/dir.{1,2}/dir.{1,2}/dir.{1,2}
+TEST mkdir ${M0}/base/dir.{1,2}/dir.{1,2}/dir.{1,2}/dir.{1,2}/dir.{1,2}/dir.{1,2}
+
+TEST ${CLI} volume replace-brick ${V0} ${H0}:${B0}/${V0}5 ${H0}:${B0}/${V0}6 commit force
+EXPECT_WITHIN ${CHILD_UP_TIMEOUT} "6" ec_child_up_count ${V0} 0
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "Y" glustershd_up_status
+EXPECT_WITHIN ${CHILD_UP_TIMEOUT} "6" ec_child_up_count_shd ${V0} 0
+EXPECT_WITHIN ${HEAL_TIMEOUT} "Y" root_heal_attempted ${B0}/${V0}6
+EXPECT_WITHIN ${HEAL_TIMEOUT} "^0$" get_pending_heal_count ${V0}
+EXPECT "^127$" echo $(find ${B0}/${V0}6/base -type d | wc -l)
+
+cleanup;
diff --git a/tests/bugs/ec/bug-1699866-check-reopen-fd.t b/tests/bugs/ec/bug-1699866-check-reopen-fd.t
new file mode 100644
index 00000000000..4386d010318
--- /dev/null
+++ b/tests/bugs/ec/bug-1699866-check-reopen-fd.t
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fileio.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
+TEST $CLI volume heal $V0 disable
+TEST $CLI volume set $V0 disperse.background-heals 0
+TEST $CLI volume set $V0 write-behind off
+TEST $CLI volume set $V0 open-behind off
+TEST $CLI volume start $V0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+
+TEST mkdir -p $M0/dir
+
+fd="$(fd_available)"
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "5" ec_child_up_count $V0 0
+
+TEST fd_open ${fd} rw $M0/dir/test
+TEST fd_write ${fd} "test1"
+TEST $CLI volume replace-brick ${V0} $H0:$B0/${V0}0 $H0:$B0/${V0}0_1 commit force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+TEST fd_write ${fd} "test2"
+TEST fd_close ${fd}
+
+cleanup
diff --git a/tests/bugs/ec/bug-1708156-honor-inodelk-contention-notify-on-partial-locks.t b/tests/bugs/ec/bug-1708156-honor-inodelk-contention-notify-on-partial-locks.t
new file mode 100644
index 00000000000..67fdb184b46
--- /dev/null
+++ b/tests/bugs/ec/bug-1708156-honor-inodelk-contention-notify-on-partial-locks.t
@@ -0,0 +1,54 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+function do_ls() {
+ local dir="${1}"
+ local i
+
+ for i in {1..50}; do
+ ls -l $M0/${dir} >/dev/null &
+ ls -l $M1/${dir} >/dev/null &
+ ls -l $M2/${dir} >/dev/null &
+ ls -l $M3/${dir} >/dev/null &
+ done
+ wait
+}
+
+function measure_time() {
+ {
+ LC_ALL=C
+ time -p "${@}"
+ } 2>&1 | awk '/^real/ { print $2 * 1000 }'
+}
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
+
+TEST $CLI volume set $V0 disperse.eager-lock on
+TEST $CLI volume set $V0 disperse.other-eager-lock on
+TEST $CLI volume set $V0 features.locks-notify-contention on
+TEST $CLI volume set $V0 disperse.eager-lock-timeout 10
+TEST $CLI volume set $V0 disperse.other-eager-lock-timeout 10
+
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M1
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M2
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M3
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 $M1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 $M2
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 $M3
+TEST mkdir $M0/dir
+TEST touch $M0/dir/file.{1..10}
+
+# Run multiple 'ls' concurrently from multiple clients so that they collide and
+# cause partial locks.
+TEST [[ $(measure_time do_ls dir) -lt 10000 ]]
+
+cleanup
diff --git a/tests/bugs/error-gen/bug-767095.t b/tests/bugs/error-gen/bug-767095.t
new file mode 100755
index 00000000000..6cc254f559d
--- /dev/null
+++ b/tests/bugs/error-gen/bug-767095.t
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6};
+
+function volinfo_field()
+{
+ local vol=$1;
+ local field=$2;
+
+ $CLI volume info $vol | grep "^$field: " | sed 's/.*: //';
+}
+
+dump_dir='/tmp/gerrit_glusterfs'
+TEST mkdir -p $dump_dir;
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+TEST $CLI volume set $V0 error-gen posix;
+TEST $CLI volume set $V0 server.statedump-path $dump_dir;
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST PID=`gluster --xml volume status patchy | grep -A 8 patchy1 | grep '<pid>' | cut -d '>' -f 2 | cut -d '<' -f 1`
+TEST kill -USR1 $PID;
+sleep 2;
+for file_name in $(ls $dump_dir)
+do
+ TEST grep "error-gen.priv" $dump_dir/$file_name;
+done
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+TEST rm -rf $dump_dir;
+
+cleanup;
diff --git a/tests/bugs/fuse/bug-1030208.t b/tests/bugs/fuse/bug-1030208.t
new file mode 100644
index 00000000000..526283cf101
--- /dev/null
+++ b/tests/bugs/fuse/bug-1030208.t
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+#Test case: Hardlink test
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+#Basic checks
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+#Create a distributed volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1..2};
+TEST $CLI volume start $V0
+
+# Mount FUSE
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+
+#Create a file and perform fop on a DIR
+TEST touch $M0/foo
+TEST ls $M0/
+
+#Create hardlink
+TEST ln $M0/foo $M0/bar
+
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/bugs/fuse/bug-1126048.c b/tests/bugs/fuse/bug-1126048.c
new file mode 100644
index 00000000000..19165ecf6f7
--- /dev/null
+++ b/tests/bugs/fuse/bug-1126048.c
@@ -0,0 +1,43 @@
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <string.h>
+
+/*
+ * This function opens a file and to trigger migration failure, unlinks the
+ * file and performs graph switch (cmd passed in argv). If everything goes fine,
+ * fsync should fail without crashing the mount process.
+ */
+int
+main(int argc, char **argv)
+{
+ int ret = 0;
+ int fd = 0;
+ char *cmd = argv[1];
+ struct stat stbuf = {
+ 0,
+ };
+
+ printf("cmd is: %s\n", cmd);
+ fd = open("a.txt", O_CREAT | O_RDWR, 0644);
+ if (fd < 0)
+ printf("open failed: %s\n", strerror(errno));
+
+ ret = unlink("a.txt");
+ if (ret < 0)
+ printf("unlink failed: %s\n", strerror(errno));
+ if (write(fd, "abc", 3) < 0)
+ printf("Not able to print %s\n", strerror(errno));
+ system(cmd);
+ sleep(1); /* No way to confirm graph switch so sleep 1 */
+ ret = fstat(fd, &stbuf);
+ if (ret < 0)
+ printf("fstat failed %\n", strerror(errno));
+ ret = fsync(fd);
+ if (ret < 0)
+ printf("Not able to fsync %s\n", strerror(errno));
+ return 0;
+}
diff --git a/tests/bugs/fuse/bug-1126048.t b/tests/bugs/fuse/bug-1126048.t
new file mode 100755
index 00000000000..5e2ed293cd3
--- /dev/null
+++ b/tests/bugs/fuse/bug-1126048.t
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+function grep_for_ebadf {
+ $M0/bug-1126048 "gluster --mode=script --wignore volume add-brick $V0 $H0:$B0/brick2" | grep -i "Bad file descriptor"
+}
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST glusterfs -s $H0 --volfile-id=$V0 $M0 --direct-io-mode=yes
+
+build_tester $(dirname $0)/bug-1126048.c
+
+TEST cp $(dirname $0)/bug-1126048 $M0
+cd $M0
+TEST grep_for_ebadf
+TEST ls -l $M0
+cd -
+TEST rm -f $(dirname $0)/bug-1126048
+cleanup;
diff --git a/tests/bugs/fuse/bug-1283103.t b/tests/bugs/fuse/bug-1283103.t
new file mode 100644
index 00000000000..56612534cb9
--- /dev/null
+++ b/tests/bugs/fuse/bug-1283103.t
@@ -0,0 +1,59 @@
+#!/bin/bash
+
+#
+# https://bugzilla.redhat.com/show_bug.cgi?id=1283103
+#
+# Test that it is possible to set and get security.*
+# xattrs other thatn security.selinux irrespective of
+# whether the mount was done with --selinux. This is
+# for example important for Samba to be able to store
+# the Windows-level acls in the security.NTACL xattr
+# when the acl_xattr vfs module is used.
+#
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+#Basic checks
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+#Create a distributed volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1..2};
+TEST $CLI volume start $V0
+
+# Mount FUSE without selinux:
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+
+TESTFILE="$M0/testfile"
+TEST touch ${TESTFILE}
+
+TEST echo "setfattr -n security.foobar -v value ${TESTFILE}"
+TEST setfattr -n security.foobar -v value ${TESTFILE}
+TEST getfattr -n security.foobar ${TESTFILE}
+TEST setfattr -x security.foobar ${TESTFILE}
+
+# can not currently test the security.selinux xattrs
+# since the kernel intercepts them.
+# see https://bugzilla.redhat.com/show_bug.cgi?id=1272868
+#TEST ! getfattr -n security.selinux ${TESTFILE}
+#TEST ! setfattr -n security.selinux -v value ${TESTFILE}
+
+TEST umount $M0
+
+# Mount FUSE with selinux:
+TEST glusterfs -s $H0 --volfile-id $V0 --selinux $M0
+
+TEST setfattr -n security.foobar -v value ${TESTFILE}
+TEST getfattr -n security.foobar ${TESTFILE}
+TEST setfattr -x security.foobar ${TESTFILE}
+
+# can not currently test the security.selinux xattrs
+# since the kernel intercepts them.
+# see https://bugzilla.redhat.com/show_bug.cgi?id=1272868
+#TEST setfattr -n security.selinux -v value ${TESTFILE}
+#TEST getfattr -n security.selinux ${TESTFILE}
+
+cleanup;
diff --git a/tests/bugs/fuse/bug-1309462.t b/tests/bugs/fuse/bug-1309462.t
new file mode 100644
index 00000000000..975d72d82ed
--- /dev/null
+++ b/tests/bugs/fuse/bug-1309462.t
@@ -0,0 +1,50 @@
+#!/bin/bash
+#
+# https://bugzilla.redhat.com/show_bug.cgi?id=1309462
+# Test the new fuse mount option --capability.
+# Set/get xattr on security.capability should be sent
+# down from fuse, only if --selinux or --capability option
+# is used for mounting.
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+#Basic checks
+TEST glusterd
+TEST pidof glusterd
+
+#Create a distributed volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1..2};
+TEST $CLI volume start $V0
+
+# Mount FUSE without selinux:
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+
+TESTFILE="$M0/testfile"
+TEST touch ${TESTFILE}
+
+TEST ! setfattr -n security.capability -v value ${TESTFILE}
+TEST ! getfattr -n security.capability ${TESTFILE}
+
+TEST umount $M0
+
+# Mount FUSE with selinux:
+TEST glusterfs -s $H0 --volfile-id $V0 --selinux $M0
+
+TEST setfattr -n security.capability -v value ${TESTFILE}
+TEST getfattr -n security.capability ${TESTFILE}
+TEST setfattr -x security.capability ${TESTFILE}
+
+TEST umount $M0
+
+# Mount FUSE with capability:
+TEST glusterfs -s $H0 --volfile-id $V0 --capability $M0
+
+TEST setfattr -n security.capability -v value ${TESTFILE}
+TEST getfattr -n security.capability ${TESTFILE}
+TEST setfattr -x security.capability ${TESTFILE}
+
+TEST umount $M0
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=1581735
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=1581735 \ No newline at end of file
diff --git a/tests/bugs/fuse/bug-1336818.t b/tests/bugs/fuse/bug-1336818.t
new file mode 100644
index 00000000000..53286521742
--- /dev/null
+++ b/tests/bugs/fuse/bug-1336818.t
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+#Test case: OOM score adjust
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+# Prepare
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+# Basic check
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+TEST umount $M0
+
+# Check valid value (< 0)
+TEST glusterfs --oom-score-adj=-1000 -s $H0 --volfile-id $V0 $M0
+TEST umount $M0
+
+# Check valid value (> 0)
+TEST glusterfs --oom-score-adj=1000 -s $H0 --volfile-id $V0 $M0
+TEST umount $M0
+
+# Check valid value (= 0)
+TEST glusterfs --oom-score-adj=0 -s $H0 --volfile-id $V0 $M0
+TEST umount $M0
+
+# Check invalid value (no value given)
+TEST ! glusterfs --oom-score-adj -s $H0 --volfile-id $V0 $M0
+
+# Check invalid value (< OOM_SCORE_ADJ_MIN)
+TEST ! glusterfs --oom-score-adj=-1001 -s $H0 --volfile-id $V0 $M0
+
+# Check invalid value (> OOM_SCORE_ADJ_MAX)
+TEST ! glusterfs --oom-score-adj=1001 -s $H0 --volfile-id $V0 $M0
+
+# Check invalid value (float)
+TEST ! glusterfs --oom-score-adj=12.34 -s $H0 --volfile-id $V0 $M0
+
+# Check invalid value (non-integer string)
+TEST ! glusterfs --oom-score-adj=qwerty -s $H0 --volfile-id $V0 $M0
+
+cleanup;
diff --git a/tests/bugs/fuse/bug-858215.t b/tests/bugs/fuse/bug-858215.t
new file mode 100755
index 00000000000..95999f6ad24
--- /dev/null
+++ b/tests/bugs/fuse/bug-858215.t
@@ -0,0 +1,79 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6};
+TEST $CLI volume set $V0 nfs.disable off
+
+function volinfo_field()
+{
+ local vol=$1;
+ local field=$2;
+
+ $CLI volume info $vol | grep "^$field: " | sed 's/.*: //';
+}
+
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Mount FUSE with caching disabled
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 --event-history=on -s $H0 --volfile-id $V0 $M0;
+
+## Test for checking whether the fops have been saved in the event-history
+TEST ! stat $M0/newfile;
+TEST touch $M0/newfile;
+TEST stat $M0/newfile;
+TEST rm $M0/newfile;
+
+nfs_pid=$(cat $GLUSTERD_PIDFILEDIR/nfs/nfs.pid || echo -1);
+glustershd_pid=`ps auxwww | grep glustershd | grep -v grep | awk -F " " '{print $2}'`
+TEST [ $glustershd_pid != 0 ];
+pids=$(pidof glusterfs);
+for i in $pids
+do
+ if [ $i -ne $nfs_pid ] && [ $i -ne $glustershd_pid ]; then
+ mount_pid=$i;
+ break;
+ fi
+done
+
+dump_dir='/tmp/gerrit_glusterfs'
+cat >$statedumpdir/glusterdump.options <<EOF
+all=yes
+path=$dump_dir
+EOF
+
+TEST mkdir -p $dump_dir;
+TEST kill -USR1 $mount_pid;
+sleep 2;
+for file_name in $(ls $dump_dir)
+do
+ TEST grep -q "xlator.mount.fuse.history" $dump_dir/$file_name;
+done
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+TEST rm -rf $dump_dir;
+TEST rm $statedumpdir/glusterdump.options;
+
+cleanup;
diff --git a/tests/bugs/fuse/bug-858488-min-free-disk.t b/tests/bugs/fuse/bug-858488-min-free-disk.t
new file mode 100644
index 00000000000..635dc04d1e6
--- /dev/null
+++ b/tests/bugs/fuse/bug-858488-min-free-disk.t
@@ -0,0 +1,108 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+## Start glusterd
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+## Lets create partitions for bricks
+TEST truncate -s 100M $B0/brick1
+TEST truncate -s 200M $B0/brick2
+TEST LO1=`SETUP_LOOP $B0/brick1`
+TEST MKFS_LOOP $LO1
+TEST LO2=`SETUP_LOOP $B0/brick2`
+TEST MKFS_LOOP $LO2
+TEST mkdir -p $B0/${V0}1 $B0/${V0}2
+TEST MOUNT_LOOP $LO1 $B0/${V0}1
+TEST MOUNT_LOOP $LO2 $B0/${V0}2
+
+## Lets create volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+
+## Verify volume is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+TEST glusterfs -s $H0 --volfile-id=$V0 --acl $M0
+## Real test starts here
+## ----------------------------------------------------------------------------
+
+MINFREEDISKVALUE=90%
+
+## Set min free disk to MINFREEDISKVALUE percent
+TEST $CLI volume set $V0 cluster.min-free-disk $MINFREEDISKVALUE
+
+## We need to have file name to brick map based on hash.
+## We will use this info in test case 0.
+i=1
+CONTINUE=2
+BRICK1FILE=0
+BRICK2FILE=0
+while [[ $CONTINUE -ne 0 ]]
+do
+ dd if=/dev/zero of=$M0/file$i.data bs=1024 count=1024 1>/dev/null 2>&1
+
+ if [[ -e $B0/${V0}1/file$i.data && $BRICK1FILE = "0" ]]
+ then
+ BRICK1FILE=file$i.data
+ CONTINUE=$(( $CONTINUE - 1 ))
+ fi
+
+ if [[ -e $B0/${V0}2/file$i.data && $BRICK2FILE = "0" ]]
+ then
+ BRICK2FILE=file$i.data
+ CONTINUE=$(( $CONTINUE - 1 ))
+ fi
+
+ rm $M0/file$i.data
+ let i++
+done
+
+
+## Bring free space on one of the bricks to less than minfree value by
+## creating one big file.
+dd if=/dev/zero of=$M0/fillonebrick.data bs=1024 count=25600 1>/dev/null 2>&1
+
+#Lets find out where it was created
+if [ -f $B0/${V0}1/fillonebrick.data ]
+then
+ FILETOCREATE=$BRICK1FILE
+ OTHERBRICK=$B0/${V0}2
+else
+ FILETOCREATE=$BRICK2FILE
+ OTHERBRICK=$B0/${V0}1
+fi
+
+##--------------------------------TEST CASE 0-----------------------------------
+## If we try to create a file which should go into full brick as per hash, it
+## should go into the other brick instead.
+
+## Before that let us create files just to make gluster refresh the stat
+## Using touch so it should not change the disk usage stats
+for k in {1..20};
+do
+ touch $M0/dummyfile$k
+done
+
+dd if=/dev/zero of=$M0/$FILETOCREATE bs=1024 count=2048 1>/dev/null 2>&1
+TEST [ -e $OTHERBRICK/$FILETOCREATE ]
+## Done testing, lets clean up
+TEST rm -rf $M0/*
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+$CLI volume delete $V0;
+
+UMOUNT_LOOP ${B0}/${V0}{1,2}
+rm -f ${B0}/brick{1,2}
+
+cleanup;
diff --git a/tests/bugs/fuse/bug-924726.t b/tests/bugs/fuse/bug-924726.t
new file mode 100755
index 00000000000..2d3c7680798
--- /dev/null
+++ b/tests/bugs/fuse/bug-924726.t
@@ -0,0 +1,45 @@
+#!/bin/bash
+
+TESTS_EXPECTED_IN_LOOP=10
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+function get_socket_count() {
+ netstat -nap | grep $1 | wc -l
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/$V0
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+
+TEST ls $M0
+
+GLFS_MNT_PID=`ps ax | grep "glusterfs -s $H0 \-\-volfile\-id $V0 $M0" | sed -e "s/^ *\([0-9]*\).*/\1/g"`
+
+SOCKETS_BEFORE_SWITCH=`netstat -nap | grep $GLFS_MNT_PID | grep ESTABLISHED | wc -l`
+
+for i in $(seq 1 5); do
+ TEST_IN_LOOP $CLI volume set $V0 performance.write-behind off;
+ sleep 1;
+ TEST_IN_LOOP $CLI volume set $V0 performance.write-behind on;
+ sleep 1;
+done
+
+SOCKETS_AFTER_SWITCH=`netstat -nap | grep $GLFS_MNT_PID | grep ESTABLISHED | wc -l`
+
+# currently active graph is not cleaned up till some operation on
+# mount-point. Hence there is one extra graph.
+TEST [ $SOCKETS_AFTER_SWITCH = `expr $SOCKETS_BEFORE_SWITCH + 1` ]
+
+cleanup;
diff --git a/tests/bugs/fuse/bug-963678.t b/tests/bugs/fuse/bug-963678.t
new file mode 100644
index 00000000000..006181f26e1
--- /dev/null
+++ b/tests/bugs/fuse/bug-963678.t
@@ -0,0 +1,57 @@
+#!/bin/bash
+#
+# Bug 963678 - Test discard functionality
+#
+# Test that basic discard (hole punch) functionality works via the fallocate
+# command line tool. Hole punch deallocates a region of a file, creating a hole
+# and a zero-filled data region. We verify that hole punch works, frees blocks
+# and that subsequent reads do not read stale data (caches are invalidated).
+#
+# NOTE: fuse fallocate is known to be broken with regard to cache invalidation
+# up to 3.9.0 kernels. Therefore, FOPEN_KEEP_CACHE is not used in this
+# test (opens will invalidate the fuse cache).
+###
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../fallocate.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2}
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+
+# check for fallocate and hole punch support
+require_fallocate -l 1m $M0/file
+require_fallocate -p -l 512k $M0/file && rm -f $M0/file
+
+# allocate some blocks, punch a hole and verify block allocation
+TEST fallocate -l 1m $M0/file
+blksz=`stat -c %B $M0/file`
+nblks=`stat -c %b $M0/file`
+TEST [ $(($blksz * $nblks)) -ge 1048576 ]
+TEST fallocate -p -o 512k -l 128k $M0/file
+
+nblks=`stat -c %b $M0/file`
+# allow some room for xattr blocks
+TEST [ $(($blksz * $nblks)) -lt $((917504 + 16384)) ]
+TEST unlink $M0/file
+
+# write some data, punch a hole and verify the file content changes
+TEST dd if=/dev/urandom of=$M0/file bs=1024k count=1
+TEST cp $M0/file $M0/file.copy.pre
+TEST fallocate -p -o 512k -l 128k $M0/file
+TEST cp $M0/file $M0/file.copy.post
+TEST ! cmp $M0/file.copy.pre $M0/file.copy.post
+TEST unlink $M0/file
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/bugs/fuse/bug-983477.t b/tests/bugs/fuse/bug-983477.t
new file mode 100755
index 00000000000..41ddd9e55a9
--- /dev/null
+++ b/tests/bugs/fuse/bug-983477.t
@@ -0,0 +1,53 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+#This script checks if use-readdirp option works as accepted in mount options
+
+function get_use_readdirp_value {
+ local vol=$1
+ local statedump=$(generate_mount_statedump $vol)
+ sleep 1
+ local val=$(grep "use_readdirp=" $statedump | cut -f2 -d'=' | tail -1)
+ rm -f $statedump
+ echo $val
+}
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}
+TEST $CLI volume start $V0
+#If readdirp is enabled statedump should reflect it
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 --use-readdirp=yes
+TEST cd $M0
+EXPECT "1" get_use_readdirp_value $V0
+TEST cd -
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+#If readdirp is enabled statedump should reflect it
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 --use-readdirp=no
+TEST cd $M0
+EXPECT "0" get_use_readdirp_value $V0
+TEST cd -
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+#Since args are optional on this argument just specifying "--use-readdirp" should also turn it `on` not `off`
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 --use-readdirp
+TEST cd $M0
+EXPECT "1" get_use_readdirp_value $V0
+TEST cd -
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+#By default it is enabled.
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+TEST cd $M0
+EXPECT "1" get_use_readdirp_value $V0
+TEST cd -
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+#Invalid values for use-readdirp should not be accepted
+TEST ! glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 --use-readdirp=please-fail
+
+cleanup
diff --git a/tests/bugs/fuse/bug-985074.t b/tests/bugs/fuse/bug-985074.t
new file mode 100644
index 00000000000..ffa6df54144
--- /dev/null
+++ b/tests/bugs/fuse/bug-985074.t
@@ -0,0 +1,54 @@
+#!/bin/bash
+#
+# Bug 985074 - Verify stale inode/dentry mappings are cleaned out.
+#
+# This test verifies that an inode/dentry mapping for a file removed via a
+# separate mount point is cleaned up appropriately. We create a file and hard
+# link from client 1. Next we remove the link via client 2. Finally, from client
+# 1 we attempt to rename the original filename to the name of the just removed
+# hard link.
+#
+# If the inode is not unlinked properly, the removed directory entry can resolve
+# to an inode (on the client that never saw the rm) that ends up passed down
+# through the lookup call. If md-cache holds valid metadata on the inode (due to
+# a large timeout value or recent lookup on the valid name), it is tricked into
+# returning a successful lookup that should have returned ENOENT. This manifests
+# as an error from the mv command in the following test sequence because file
+# and file.link resolve to the same file:
+#
+# # mv /mnt/glusterfs/0/file /mnt/glusterfs/0/file.link
+# mv: `/mnt/glusterfs/0/file' and `/mnt/glusterfs/0/file.link' are the same file
+#
+###
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 performance.stat-prefetch off
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 --entry-timeout=0 --attribute-timeout=0
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M1 --entry-timeout=0 --attribute-timeout=0
+
+TEST touch $M0/file
+TEST ln $M0/file $M0/file.link
+TEST ls -ali $M0 $M1
+TEST rm -f $M1/file.link
+TEST ls -ali $M0 $M1
+
+TEST mv $M0/file $M0/file.link
+TEST stat $M0/file.link
+TEST ! stat $M0/file
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M1
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/bugs/fuse/many-groups-for-acl.t b/tests/bugs/fuse/many-groups-for-acl.t
new file mode 100755
index 00000000000..a51b1bc7267
--- /dev/null
+++ b/tests/bugs/fuse/many-groups-for-acl.t
@@ -0,0 +1,123 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+
+cleanup
+
+# prepare the users and groups
+NEW_USER=bug1246275
+NEW_UID=1246275
+NEW_GID=1246275
+LAST_GID=1246403
+NEW_GIDS=${NEW_GID}
+
+# OS-specific overrides
+case $OSTYPE in
+NetBSD|Darwin)
+ # no ACLs, and only NGROUPS_MAX=16 secondary groups are supported
+ SKIP_TESTS
+ exit 0
+ ;;
+FreeBSD)
+ # NGROUPS_MAX=1023 (FreeBSD>=8.0), we can afford 200 groups
+ ;;
+Linux)
+ # NGROUPS_MAX=65536, we can afford 200 groups
+ ;;
+*)
+ ;;
+esac
+
+# create a user that belongs to many groups
+for GID in $(seq -f '%6.0f' ${NEW_GID} ${LAST_GID})
+do
+ groupadd -o -g ${GID} ${NEW_USER}-${GID}
+ NEW_GIDS="${NEW_GIDS},${NEW_USER}-${GID}"
+done
+TEST useradd -o -M -u ${NEW_UID} -g ${NEW_GID} -G ${NEW_USER}-${NEW_GIDS} ${NEW_USER}
+
+# Linux < 3.8 exports only first 32 gids of pid to userspace
+kernel_exports_few_gids=0
+if [ "$OSTYPE" = Linux ] && \
+ su -m ${NEW_USER} -c "grep ^Groups: /proc/self/status | wc -w | xargs -I@ expr @ - 1 '<' $LAST_GID - $NEW_GID + 1" > /dev/null; then
+ kernel_exports_few_gids=1
+fi
+
+# preparation done, start the tests
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create ${V0} ${H0}:${B0}/${V0}1
+TEST $CLI volume set $V0 nfs.disable off
+# disable manage-gids on the server-side for now, gets enabled later
+TEST $CLI volume set ${V0} server.manage-gids off
+TEST $CLI volume start ${V0}
+
+# This is just a synchronization hack to make sure the bricks are
+# up before going on.
+EXPECT_WITHIN ${NFS_EXPORT_TIMEOUT} "1" is_nfs_export_available
+
+# mount the volume with POSIX ACL support, without --resolve-gids
+TEST glusterfs --acl --volfile-id=/${V0} --volfile-server=${H0} ${M0}
+
+# create some directories for testing
+TEST mkdir ${M0}/first-32-gids-1
+TEST setfacl -m g:${NEW_UID}:rwx ${M0}/first-32-gids-1
+TEST mkdir ${M0}/first-32-gids-2
+TEST setfacl -m g:$[NEW_UID+16]:rwx ${M0}/first-32-gids-2
+TEST mkdir ${M0}/gid-64
+TEST setfacl -m g:$[NEW_UID+64]:rwx ${M0}/gid-64
+TEST mkdir ${M0}/gid-120
+TEST setfacl -m g:$[NEW_UID+120]:rwx ${M0}/gid-120
+
+su -m ${NEW_USER} -c "touch ${M0}/first-32-gids-1/success > /dev/null"
+TEST [ $? -eq 0 ]
+
+su -m ${NEW_USER} -c "touch ${M0}/first-32-gids-2/success > /dev/null"
+TEST [ $? -eq 0 ]
+
+su -m ${NEW_USER} -c "touch ${M0}/gid-64/success--if-all-gids-exported > /dev/null"
+TEST [ $? -eq $kernel_exports_few_gids ]
+
+su -m ${NEW_USER} -c "touch ${M0}/gid-120/failure > /dev/null"
+TEST [ $? -ne 0 ]
+
+# unmount and remount with --resolve-gids
+EXPECT_WITHIN ${UMOUNT_TIMEOUT} "Y" force_umount ${M0}
+TEST glusterfs --acl --resolve-gids --volfile-id=/${V0} --volfile-server=${H0} ${M0}
+
+su -m ${NEW_USER} -c "touch ${M0}/gid-64/success > /dev/null"
+TEST [ $? -eq 0 ]
+
+su -m ${NEW_USER} -c "touch ${M0}/gid-120/failure > /dev/null"
+TEST [ $? -ne 0 ]
+
+# enable server-side resolving of the groups
+# stopping and starting is not really needed, but it prevents races
+TEST $CLI volume stop ${V0}
+TEST $CLI volume set ${V0} server.manage-gids on
+TEST $CLI volume start ${V0}
+EXPECT_WITHIN ${NFS_EXPORT_TIMEOUT} "1" is_nfs_export_available
+
+# unmount and remount to prevent more race conditions on test systems
+EXPECT_WITHIN ${UMOUNT_TIMEOUT} "Y" force_umount ${M0}
+TEST glusterfs --acl --resolve-gids --volfile-id=/${V0} --volfile-server=${H0} ${M0}
+
+su -m ${NEW_USER} -c "touch ${M0}/gid-120/success > /dev/null"
+TEST [ $? -eq 0 ]
+
+# cleanup
+userdel --force ${NEW_USER}
+for GID in $(seq -f '%6.0f' ${NEW_GID} ${LAST_GID})
+do
+ groupdel ${NEW_USER}-${GID}
+done
+
+EXPECT_WITHIN ${UMOUNT_TIMEOUT} "Y" force_umount ${M0}
+
+TEST ${CLI} volume stop ${V0}
+TEST ${CLI} volume delete ${V0}
+
+cleanup
diff --git a/tests/bugs/fuse/setup.sh b/tests/bugs/fuse/setup.sh
new file mode 100755
index 00000000000..d44d8dba027
--- /dev/null
+++ b/tests/bugs/fuse/setup.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+#. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+#Basic checks
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+#Create a distributed volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1..2};
+TEST $CLI volume start $V0
+
+# Mount FUSE without selinux:
+TEST glusterfs -s $H0 --volfile-id $V0 $@ $M0
+
+echo "Gluster started and volume '$V0' mounted under '$M0'"
diff --git a/tests/bugs/fuse/teardown.sh b/tests/bugs/fuse/teardown.sh
new file mode 100755
index 00000000000..5d57974e1f9
--- /dev/null
+++ b/tests/bugs/fuse/teardown.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
diff --git a/tests/bugs/geo-replication/bug-1111490.t b/tests/bugs/geo-replication/bug-1111490.t
new file mode 100644
index 00000000000..9686fb5a0ef
--- /dev/null
+++ b/tests/bugs/geo-replication/bug-1111490.t
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}0
+TEST $CLI volume start $V0
+
+# mount with auxiliary gfid mount
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 --aux-gfid-mount
+
+# create file with specific gfid
+uuid=`uuidgen`
+TEST chown 10:10 $M0
+EXPECT "File creation OK" $PYTHON $(dirname $0)/../../utils/gfid-access.py \
+ $M0 ROOT file0 $uuid file 10 10 0644
+
+# check gfid
+EXPECT "$uuid" getfattr --only-values -n glusterfs.gfid.string $M0/file0
+
+# unmount and mount again so as to start with a fresh inode table
+# or use another mount...
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 --aux-gfid-mount
+
+# touch the file again (gfid-access.py handles errno)
+EXPECT "File creation OK" $PYTHON $(dirname $0)/../../utils/gfid-access.py \
+ $M0 ROOT file0 $uuid file 10 10 0644
+
+cleanup;
diff --git a/tests/bugs/geo-replication/bug-1296496.t b/tests/bugs/geo-replication/bug-1296496.t
new file mode 100644
index 00000000000..a157be7849a
--- /dev/null
+++ b/tests/bugs/geo-replication/bug-1296496.t
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+## Start and create a replicated volume
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}-{0,1}
+
+TEST $CLI volume set $V0 indexing on
+
+TEST $CLI volume start $V0;
+
+## Mount native
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+
+## Mount client-pid=-1
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 --client-pid=-1 $M1
+
+TEST touch $M0
+
+vol_uuid=$(gluster vol info $V0 | grep "Volume ID" | awk '{print $3}')
+
+xtime="trusted.glusterfs.$vol_uuid.xtime"
+
+#TEST xtime
+TEST ! getfattr -n $xtime $M0
+TEST getfattr -n $xtime $B0/${V0}-0
+TEST getfattr -n $xtime $B0/${V0}-1
+
+#TEST stime
+slave_uuid=$(uuidgen)
+stime="trusted.glusterfs.$vol_uuid.$slave_uuid.stime"
+TEST setfattr -n $stime -v "0xFFFE" $B0/${V0}-0
+TEST setfattr -n $stime -v "0xFFFF" $B0/${V0}-1
+
+TEST ! getfattr -n $stime $M0
+TEST getfattr -n $stime $M1
+
+cleanup;
diff --git a/tests/bugs/geo-replication/bug-877293.t b/tests/bugs/geo-replication/bug-877293.t
new file mode 100755
index 00000000000..c5205e8109e
--- /dev/null
+++ b/tests/bugs/geo-replication/bug-877293.t
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+TEST glusterd
+TEST pidof glusterd
+
+## Start and create a replicated volume
+mkdir -p ${B0}/${V0}-0
+mkdir -p ${B0}/${V0}-1
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}-{0,1}
+
+TEST $CLI volume set $V0 indexing on
+
+TEST $CLI volume start $V0;
+
+## Mount native
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+
+## Mount client-pid=-1
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 --client-pid=-1 $M1
+
+TEST touch $M0
+
+vol_uuid=`getfattr -n trusted.glusterfs.volume-mark -ehex $M1 | sed -n 's/^trusted.glusterfs.volume-mark=0x//p' | cut -b5-36 | sed 's/\([a-f0-9]\{8\}\)\([a-f0-9]\{4\}\)\([a-f0-9]\{4\}\)\([a-f0-9]\{4\}\)/\1-\2-\3-\4-/'`
+xtime=trusted.glusterfs.$vol_uuid.xtime
+
+TEST "getfattr -n $xtime $B0/${V0}-0 | grep -q ${xtime}="
+
+TEST kill_brick $V0 $H0 $B0/${V0}-0
+
+TEST "getfattr -n $xtime $B0/${V0}-1 | grep -q ${xtime}="
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M1
+
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+
+cleanup
diff --git a/tests/bugs/gfapi/bug-1032894.t b/tests/bugs/gfapi/bug-1032894.t
new file mode 100644
index 00000000000..88d110136e2
--- /dev/null
+++ b/tests/bugs/gfapi/bug-1032894.t
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+#Check stale indices are deleted as part of self-heal-daemon crawl.
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+cd $M0
+TEST mkdir a
+cd a
+TEST kill_brick $V0 $H0 $B0/${V0}0
+# Create stale indices
+for i in {1..10}; do echo abc > $i; done
+for i in {1..10}; do rm -f $i; done
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+
+#Since maximum depth of the directory structure that needs healin is 2
+#Trigger two self-heals. That should make sure the heal is complete
+TEST $CLI volume heal $V0
+
+EXPECT_WITHIN $HEAL_TIMEOUT "0" afr_get_index_count $B0/${V0}1
+cleanup
diff --git a/tests/bugs/gfapi/bug-1093594.c b/tests/bugs/gfapi/bug-1093594.c
new file mode 100644
index 00000000000..f7a06dd5ba8
--- /dev/null
+++ b/tests/bugs/gfapi/bug-1093594.c
@@ -0,0 +1,311 @@
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define WRITE_SIZE (128 * 1024)
+#define READ_WRITE_LOOP 100
+#define FOP_LOOP_COUNT 20
+#define TEST_CASE_LOOP 20
+
+int gfapi = 1;
+static int extension = 1;
+
+static int
+large_number_of_fops(glfs_t *fs)
+{
+ int ret = 0;
+ int i = 0;
+ glfs_fd_t *fd = NULL;
+ glfs_fd_t *fd1 = NULL;
+ char *dir1 = NULL, *dir2 = NULL, *filename1 = NULL, *filename2 = NULL;
+ char *buf = NULL;
+ struct stat sb = {
+ 0,
+ };
+
+ for (i = 0; i < FOP_LOOP_COUNT; i++) {
+ ret = asprintf(&dir1, "dir%d", extension);
+ if (ret < 0) {
+ fprintf(stderr, "cannot construct filename (%s)", strerror(errno));
+ return ret;
+ }
+
+ extension++;
+
+ ret = glfs_mkdir(fs, dir1, 0755);
+ if (ret < 0) {
+ fprintf(stderr, "mkdir(%s): %s\n", dir1, strerror(errno));
+ return -1;
+ }
+
+ fd = glfs_opendir(fs, dir1);
+ if (!fd) {
+ fprintf(stderr, "/: %s\n", strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_fsetxattr(fd, "user.dirfattr", "fsetxattr", 8, 0);
+ if (ret < 0) {
+ fprintf(stderr, "fsetxattr(%s): %d (%s)\n", dir1, ret,
+ strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_closedir(fd);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_closedir failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_rmdir(fs, dir1);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_unlink failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+
+ ret = asprintf(&filename1, "file%d", extension);
+ if (ret < 0) {
+ fprintf(stderr, "cannot construct filename (%s)", strerror(errno));
+ return ret;
+ }
+
+ ret = asprintf(&filename2, "file-%d", extension);
+ if (ret < 0) {
+ fprintf(stderr, "cannot construct filename (%s)", strerror(errno));
+ return ret;
+ }
+
+ extension++;
+
+ fd = glfs_creat(fs, filename1, O_RDWR, 0644);
+ if (!fd) {
+ fprintf(stderr, "%s: (%p) %s\n", filename1, fd, strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_rename(fs, filename1, filename2);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_rename failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_lstat(fs, filename2, &sb);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_lstat failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_close(fd);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_close failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_unlink(fs, filename2);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_unlink failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+ }
+}
+
+static int
+large_read_write(glfs_t *fs)
+{
+ int ret = 0;
+ int j = 0;
+ glfs_fd_t *fd = NULL;
+ glfs_fd_t *fd1 = NULL;
+ char *filename = NULL;
+ char *buf = NULL;
+
+ ret = asprintf(&filename, "filerw%d", extension);
+ if (ret < 0) {
+ fprintf(stderr, "cannot construct filename (%s)", strerror(errno));
+ return ret;
+ }
+
+ extension++;
+
+ fd = glfs_creat(fs, filename, O_RDWR, 0644);
+ if (!fd) {
+ fprintf(stderr, "%s: (%p) %s\n", filename, fd, strerror(errno));
+ return -1;
+ }
+
+ buf = (char *)malloc(WRITE_SIZE);
+ memset(buf, '-', WRITE_SIZE);
+
+ for (j = 0; j < READ_WRITE_LOOP; j++) {
+ ret = glfs_write(fd, buf, WRITE_SIZE, 0);
+ if (ret < 0) {
+ fprintf(stderr, "Write(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+ return ret;
+ }
+ }
+
+ fd1 = glfs_open(fs, filename, O_RDWR);
+ if (fd1 < 0) {
+ fprintf(stderr, "Open(%s): %d (%s)\n", filename, ret, strerror(errno));
+ return -1;
+ }
+
+ glfs_lseek(fd1, 0, SEEK_SET);
+ for (j = 0; j < READ_WRITE_LOOP; j++) {
+ ret = glfs_read(fd1, buf, WRITE_SIZE, 0);
+ if (ret < 0) {
+ fprintf(stderr, "Read(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+ return ret;
+ }
+ }
+
+ for (j = 0; j < READ_WRITE_LOOP; j++) {
+ ret = glfs_write(fd1, buf, WRITE_SIZE, 0);
+ if (ret < 0) {
+ fprintf(stderr, "Write(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+ return ret;
+ }
+ }
+
+ glfs_close(fd);
+ glfs_close(fd1);
+ ret = glfs_unlink(fs, filename);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_unlink failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+
+ free(buf);
+ free(filename);
+}
+
+static int
+volfile_change(const char *volname)
+{
+ int ret = 0;
+ char *cmd = NULL, *cmd1 = NULL;
+
+ ret = asprintf(&cmd, "gluster volume set %s stat-prefetch off", volname);
+ if (ret < 0) {
+ fprintf(stderr, "cannot construct cli command string (%s)",
+ strerror(errno));
+ return ret;
+ }
+
+ ret = asprintf(&cmd1, "gluster volume set %s stat-prefetch on", volname);
+ if (ret < 0) {
+ fprintf(stderr, "cannot construct cli command string (%s)",
+ strerror(errno));
+ return ret;
+ }
+
+ ret = system(cmd);
+ if (ret < 0) {
+ fprintf(stderr, "stat-prefetch off on (%s) failed", volname);
+ return ret;
+ }
+
+ ret = system(cmd1);
+ if (ret < 0) {
+ fprintf(stderr, "stat-prefetch on on (%s) failed", volname);
+ return ret;
+ }
+
+ free(cmd);
+ free(cmd1);
+ return ret;
+}
+
+int
+main(int argc, char *argv[])
+{
+ glfs_t *fs = NULL;
+ int ret = 0;
+ int i = 0;
+ glfs_fd_t *fd = NULL;
+ glfs_fd_t *fd1 = NULL;
+ char *topdir = "topdir", *filename = "file1";
+ char *buf = NULL;
+ char *logfile = NULL;
+ char *hostname = NULL;
+
+ if (argc != 4) {
+ fprintf(stderr,
+ "Expect following args %s <hostname> <Vol> <log file>\n",
+ argv[0]);
+ return -1;
+ }
+
+ hostname = argv[1];
+ logfile = argv[3];
+
+ for (i = 0; i < TEST_CASE_LOOP; i++) {
+ fs = glfs_new(argv[2]);
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL (%s)\n", strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_set_volfile_server failed ret:%d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_set_logging failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_init failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+
+ ret = large_number_of_fops(fs);
+ if (ret < 0)
+ return -1;
+
+ ret = large_read_write(fs);
+ if (ret < 0)
+ return -1;
+
+ ret = volfile_change(argv[2]);
+ if (ret < 0)
+ return -1;
+
+ ret = large_number_of_fops(fs);
+ if (ret < 0)
+ return -1;
+
+ ret = large_read_write(fs);
+ if (ret < 0)
+ return -1;
+
+ ret = glfs_fini(fs);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_fini failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+ }
+ return 0;
+}
diff --git a/tests/bugs/gfapi/bug-1093594.t b/tests/bugs/gfapi/bug-1093594.t
new file mode 100755
index 00000000000..0c220b3b007
--- /dev/null
+++ b/tests/bugs/gfapi/bug-1093594.t
@@ -0,0 +1,22 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+TEST $CLI volume start $V0;
+logdir=`gluster --print-logdir`
+
+TEST build_tester $(dirname $0)/bug-1093594.c -lgfapi
+TEST $(dirname $0)/bug-1093594 $H0 $V0 $logdir/bug-1093594.log
+
+cleanup_tester $(dirname $0)/bug-1093594
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=1371541
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=1371541
diff --git a/tests/bugs/gfapi/bug-1319374-THIS-crash.t b/tests/bugs/gfapi/bug-1319374-THIS-crash.t
new file mode 100755
index 00000000000..8d3db421c88
--- /dev/null
+++ b/tests/bugs/gfapi/bug-1319374-THIS-crash.t
@@ -0,0 +1,27 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/brick1 $H0:$B0/brick2;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST $CLI volume set $V0 diagnostics.client-log-flush-timeout 30
+
+logdir=`gluster --print-logdir`
+
+TEST build_tester $(dirname $0)/bug-1319374.c -lgfapi
+TEST $(dirname $0)/bug-1319374 $H0 $V0 $logdir/bug-1319374.log
+
+cleanup_tester $(dirname $0)/bug-1319374
+
+cleanup;
diff --git a/tests/bugs/gfapi/bug-1319374.c b/tests/bugs/gfapi/bug-1319374.c
new file mode 100644
index 00000000000..ea0dfb6b0f2
--- /dev/null
+++ b/tests/bugs/gfapi/bug-1319374.c
@@ -0,0 +1,131 @@
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define NO_INIT 1
+
+glfs_t *
+setup_new_client(char *hostname, char *volname, char *log_file, int flag)
+{
+ int ret = 0;
+ glfs_t *fs = NULL;
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ fprintf(stderr, "\nglfs_new: returned NULL (%s)\n", strerror(errno));
+ goto error;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ if (ret < 0) {
+ fprintf(stderr, "\nglfs_set_volfile_server failed ret:%d (%s)\n", ret,
+ strerror(errno));
+ goto error;
+ }
+
+ ret = glfs_set_logging(fs, log_file, 7);
+ if (ret < 0) {
+ fprintf(stderr, "\nglfs_set_logging failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ goto error;
+ }
+
+ if (flag == NO_INIT)
+ goto out;
+
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ fprintf(stderr, "\nglfs_init failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ goto error;
+ }
+
+out:
+ return fs;
+error:
+ return NULL;
+}
+
+int
+main(int argc, char *argv[])
+{
+ int ret = 0;
+ glfs_t *fs1 = NULL;
+ glfs_t *fs2 = NULL;
+ glfs_t *fs3 = NULL;
+ char *volname = NULL;
+ char *log_file = NULL;
+ char *hostname = NULL;
+
+ if (argc != 4) {
+ fprintf(
+ stderr,
+ "Expect following args %s <hostname> <Vol> <log file location>\n",
+ argv[0]);
+ return -1;
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+ log_file = argv[3];
+
+ fs1 = setup_new_client(hostname, volname, log_file, NO_INIT);
+ if (!fs1) {
+ fprintf(stderr, "\nsetup_new_client: returned NULL (%s)\n",
+ strerror(errno));
+ goto error;
+ }
+
+ fs2 = setup_new_client(hostname, volname, log_file, 0);
+ if (!fs2) {
+ fprintf(stderr, "\nsetup_new_client: returned NULL (%s)\n",
+ strerror(errno));
+ goto error;
+ }
+
+ fs3 = setup_new_client(hostname, volname, log_file, 0);
+ if (!fs3) {
+ fprintf(stderr, "\nsetup_new_client: returned NULL (%s)\n",
+ strerror(errno));
+ goto error;
+ }
+
+ ret = glfs_fini(fs3);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_fini failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ goto error;
+ }
+
+ /* The crash is seen in gf_log_flush_timeout_cbk(), and this gets
+ * triggered when 30s timer expires, hence the sleep of 31s
+ */
+ sleep(31);
+ ret = glfs_fini(fs2);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_fini failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ goto error;
+ }
+
+ ret = glfs_init(fs1);
+ if (ret < 0) {
+ fprintf(stderr, "\nglfs_init failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ goto error;
+ }
+
+ ret = glfs_fini(fs1);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_fini failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ goto error;
+ }
+
+ return 0;
+error:
+ return -1;
+}
diff --git a/tests/bugs/gfapi/bug-1447266/1460514.c b/tests/bugs/gfapi/bug-1447266/1460514.c
new file mode 100644
index 00000000000..c721559a668
--- /dev/null
+++ b/tests/bugs/gfapi/bug-1447266/1460514.c
@@ -0,0 +1,150 @@
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+#include <limits.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+#define LOG_ERR(func, ret) \
+ do { \
+ if (ret != 0) { \
+ fprintf(stderr, "%s : returned error %d (%s)\n", func, ret, \
+ strerror(errno)); \
+ goto out; \
+ } else { \
+ fprintf(stderr, "%s : returned %d\n", func, ret); \
+ } \
+ } while (0)
+
+int
+main(int argc, char *argv[])
+{
+ int ret = 0;
+ glfs_t *fs = NULL;
+ struct glfs_object *root = NULL, *dir = NULL, *subdir = NULL;
+ struct stat sb = {
+ 0,
+ };
+ char *dirname = "dir";
+ char *subdirname = "subdir";
+ char *logfile = NULL;
+ char *volname = NULL;
+ char *hostname = NULL;
+ unsigned char subdir_handle[GFAPI_HANDLE_LENGTH] = {'\0'};
+
+ if (argc != 4) {
+ fprintf(stderr, "Invalid argument\n");
+ exit(1);
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+ logfile = argv[3];
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL\n");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ LOG_ERR("glfs_set_volfile_server", ret);
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ LOG_ERR("glfs_set_logging", ret);
+
+ ret = glfs_init(fs);
+ LOG_ERR("first attempt glfs_init", ret);
+
+ root = glfs_h_lookupat(fs, NULL, "/", &sb, 0);
+ if (root == NULL) {
+ fprintf(stderr, "glfs_h_lookupat: error on lookup of / ,%s\n",
+ strerror(errno));
+ goto out;
+ }
+ dir = glfs_h_mkdir(fs, root, dirname, 0644, &sb);
+ if (dir == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error on directory creation dir ,%s\n",
+ strerror(errno));
+ goto out;
+ }
+ subdir = glfs_h_mkdir(fs, root, subdirname, 0644, &sb);
+ if (subdir == NULL) {
+ fprintf(stderr,
+ "glfs_h_mkdir: error on directory creation subdir ,%s\n",
+ strerror(errno));
+ goto out;
+ }
+ ret = glfs_h_extract_handle(subdir, subdir_handle, GFAPI_HANDLE_LENGTH);
+ if (ret < 0) {
+ fprintf(stderr,
+ "glfs_h_extract_handle: error extracting handle of %s: %s\n",
+ subdirname, strerror(errno));
+ goto out;
+ }
+
+ glfs_h_close(subdir);
+ subdir = NULL;
+ glfs_h_close(dir);
+ dir = NULL;
+
+ if (fs) {
+ ret = glfs_fini(fs);
+ fprintf(stderr, "glfs_fini(fs) returned %d \n", ret);
+ }
+
+ fs = NULL;
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL\n");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ LOG_ERR("glfs_set_volfile_server", ret);
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ LOG_ERR("glfs_set_logging", ret);
+
+ ret = glfs_init(fs);
+ LOG_ERR("second attempt glfs_init", ret);
+
+ subdir = glfs_h_create_from_handle(fs, subdir_handle, GFAPI_HANDLE_LENGTH,
+ &sb);
+ if (subdir == NULL) {
+ fprintf(
+ stderr,
+ "glfs_h_create_from_handle: error on create of %s: from (%p),%s\n",
+ subdirname, subdir_handle, strerror(errno));
+ goto out;
+ }
+ dir = glfs_h_lookupat(fs, subdir, "..", &sb, 0);
+ if (dir == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on directory lookup dir using .. ,%s\n",
+ strerror(errno));
+ goto out;
+ }
+
+out:
+ if (subdir)
+ glfs_h_close(subdir);
+ if (dir)
+ glfs_h_close(dir);
+
+ if (fs) {
+ ret = glfs_fini(fs);
+ fprintf(stderr, "glfs_fini(fs) returned %d \n", ret);
+ }
+
+ if (ret)
+ exit(1);
+ exit(0);
+}
diff --git a/tests/bugs/gfapi/bug-1447266/1460514.t b/tests/bugs/gfapi/bug-1447266/1460514.t
new file mode 100644
index 00000000000..594af75cae2
--- /dev/null
+++ b/tests/bugs/gfapi/bug-1447266/1460514.t
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+. $(dirname $0)/../../../include.rc
+. $(dirname $0)/../../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+logdir=`gluster --print-logdir`
+
+TEST build_tester $(dirname $0)/1460514.c -lgfapi -o $(dirname $0)/1460514
+TEST ./$(dirname $0)/1460514 $H0 $V0 $logdir/1460514.log
+
+cleanup_tester $(dirname $0)/1460514
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/bugs/gfapi/bug-1447266/bug-1447266.c b/tests/bugs/gfapi/bug-1447266/bug-1447266.c
new file mode 100644
index 00000000000..2b7e2d627fe
--- /dev/null
+++ b/tests/bugs/gfapi/bug-1447266/bug-1447266.c
@@ -0,0 +1,107 @@
+#include <glusterfs/api/glfs.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <stdlib.h>
+#define TOTAL_ARGS 4
+int
+main(int argc, char *argv[])
+{
+ char *cwd = (char *)malloc(PATH_MAX * sizeof(char *));
+ char *resolved = NULL;
+ char *result = NULL;
+ char *buf = NULL;
+ struct stat st;
+ char *path = NULL;
+ int ret;
+
+ if (argc != TOTAL_ARGS) {
+ printf(
+ "Please give all required command line args.\n"
+ "Format : <volname> <server_ip> <path_name>\n");
+ goto out;
+ }
+
+ glfs_t *fs = glfs_new(argv[1]);
+
+ if (fs == NULL) {
+ printf("glfs_new: %s\n", strerror(errno));
+ /* No need to fail the test for this error */
+ ret = 0;
+ goto out;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", argv[2], 24007);
+ if (ret) {
+ printf("glfs_set_volfile_server: %s\n", strerror(errno));
+ /* No need to fail the test for this error */
+ ret = 0;
+ goto out;
+ }
+
+ path = argv[3];
+
+ ret = glfs_set_logging(fs, "/tmp/gfapi.log", 7);
+ if (ret) {
+ printf("glfs_set_logging: %s\n", strerror(errno));
+ /* No need to fail the test for this error */
+ ret = 0;
+ goto out;
+ }
+
+ ret = glfs_init(fs);
+ if (ret) {
+ printf("glfs_init: %s\n", strerror(errno));
+ /* No need to fail the test for this error */
+ ret = 0;
+ goto out;
+ }
+
+ sleep(1);
+
+ ret = glfs_chdir(fs, path);
+ if (ret) {
+ printf("glfs_chdir: %s\n", strerror(errno));
+ goto out;
+ }
+
+ buf = glfs_getcwd(fs, cwd, PATH_MAX);
+ if (cwd == NULL) {
+ printf("glfs_getcwd: %s\n", strerror(errno));
+ goto out;
+ }
+
+ printf("\ncwd = %s\n\n", cwd);
+
+ result = glfs_realpath(fs, path, resolved);
+ if (result == NULL) {
+ printf("glfs_realpath: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = glfs_stat(fs, path, &st);
+ if (ret) {
+ printf("glfs_stat: %s\n", strerror(errno));
+ goto out;
+ }
+ if (cwd)
+ free(cwd);
+
+ result = glfs_realpath(fs, path, resolved);
+ if (result == NULL) {
+ printf("glfs_realpath: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = glfs_fini(fs);
+ if (ret) {
+ printf("glfs_fini: %s\n", strerror(errno));
+ /* No need to fail the test for this error */
+ ret = 0;
+ goto out;
+ }
+
+ printf("\n");
+out:
+ return ret;
+}
diff --git a/tests/bugs/gfapi/bug-1447266/bug-1447266.t b/tests/bugs/gfapi/bug-1447266/bug-1447266.t
new file mode 100644
index 00000000000..45547f4f0e7
--- /dev/null
+++ b/tests/bugs/gfapi/bug-1447266/bug-1447266.t
@@ -0,0 +1,60 @@
+#!/bin/bash
+
+. $(dirname $0)/../../../include.rc
+. $(dirname $0)/../../../volume.rc
+. $(dirname $0)/../../../snapshot.rc
+
+cleanup;
+
+TEST init_n_bricks 3;
+TEST setup_lvm 3;
+
+TEST glusterd;
+
+TEST pidof glusterd;
+
+TEST $CLI volume create $V0 $H0:$L1 $H0:$L2 $H0:$L3;
+TEST $CLI volume set $V0 nfs.disable false
+
+
+TEST $CLI volume start $V0;
+
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0;
+
+for i in {1..10} ; do echo "file" > $M0/file$i ; done
+
+# Create file and hard-links
+TEST touch $M0/f1
+TEST mkdir $M0/dir
+TEST ln $M0/f1 $M0/f2
+TEST ln $M0/f1 $M0/dir/f3
+
+TEST $CLI snapshot config activate-on-create enable
+TEST $CLI volume set $V0 features.uss enable;
+
+TEST $CLI snapshot create snap1 $V0 no-timestamp;
+
+for i in {11..20} ; do echo "file" > $M0/file$i ; done
+
+TEST $CLI snapshot create snap2 $V0 no-timestamp;
+TEST build_tester $(dirname $0)/bug-1447266.c -lgfapi
+
+#Testing strts from here-->
+
+TEST $(dirname $0)/bug-1447266 $V0 $H0 "/.."
+TEST $(dirname $0)/bug-1447266 $V0 $H0 "/."
+TEST $(dirname $0)/bug-1447266 $V0 $H0 "/../."
+TEST $(dirname $0)/bug-1447266 $V0 $H0 "/../.."
+TEST $(dirname $0)/bug-1447266 $V0 $H0 "/dir/../."
+#Since dir1 is not present, this test should fail
+TEST ! $(dirname $0)/bug-1447266 $V0 $H0 "/dir/../dir1"
+TEST $(dirname $0)/bug-1447266 $V0 $H0 "/dir/.."
+TEST $(dirname $0)/bug-1447266 $V0 $H0 "/.snaps"
+TEST $(dirname $0)/bug-1447266 $V0 $H0 "/.snaps/."
+#Since snap3 is not present, this test should fail
+TEST ! $(dirname $0)/bug-1447266 $V0 $H0 "/.snaps/.././snap3"
+TEST $(dirname $0)/bug-1447266 $V0 $H0 "/.snaps/../."
+TEST $(dirname $0)/bug-1447266 $V0 $H0 "/.snaps/./snap1/./../snap1/dir/."
+
+cleanup_tester $(dirname $0)/bug-1447266
+cleanup;
diff --git a/tests/bugs/gfapi/bug-1630804/gfapi-bz1630804.c b/tests/bugs/gfapi/bug-1630804/gfapi-bz1630804.c
new file mode 100644
index 00000000000..d151784627c
--- /dev/null
+++ b/tests/bugs/gfapi/bug-1630804/gfapi-bz1630804.c
@@ -0,0 +1,112 @@
+#include <inttypes.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+#define VALIDATE_AND_GOTO_LABEL_ON_ERROR(func, ret, label) \
+ do { \
+ if (ret < 0) { \
+ fprintf(stderr, "%s : returned error %d (%s)\n", func, ret, \
+ strerror(errno)); \
+ goto label; \
+ } \
+ } while (0)
+
+int
+main(int argc, char *argv[])
+{
+ int ret = -1;
+ int flags = O_WRONLY | O_CREAT | O_TRUNC;
+ int do_write = 0;
+ glfs_t *fs = NULL;
+ glfs_fd_t *fd1 = NULL;
+ glfs_fd_t *fd2 = NULL;
+ char *volname = NULL;
+ char *logfile = NULL;
+ const char *dirname = "/some_dir1";
+ const char *filename = "/some_dir1/testfile";
+ const char *short_filename = "testfile";
+ struct stat sb;
+ char buf[512];
+ struct dirent *entry = NULL;
+
+ if (argc != 4) {
+ fprintf(stderr, "Invalid argument\n");
+ fprintf(stderr, "Usage: %s <volname> <logfile> <do-write [0/1]\n",
+ argv[0]);
+ return 1;
+ }
+
+ volname = argv[1];
+ logfile = argv[2];
+ do_write = atoi(argv[3]);
+
+ fs = glfs_new(volname);
+ if (!fs)
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_new", ret, out);
+
+ ret = glfs_set_volfile_server(fs, "tcp", "localhost", 24007);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_set_volfile_server", ret, out);
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_set_logging", ret, out);
+
+ ret = glfs_init(fs);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_init", ret, out);
+
+ ret = glfs_mkdir(fs, dirname, 0755);
+ if (ret && errno != EEXIST)
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_mkdir", ret, out);
+
+ fd1 = glfs_creat(fs, filename, flags, 0644);
+ if (fd1 == NULL) {
+ ret = -1;
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_creat", ret, out);
+ }
+
+ if (do_write) {
+ ret = glfs_write(fd1, "hello world", 11, flags);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_write", ret, out);
+ }
+
+ fd2 = glfs_opendir(fs, dirname);
+ if (fd2 == NULL) {
+ ret = -1;
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_opendir", ret, out);
+ }
+
+ do {
+ ret = glfs_readdirplus_r(fd2, &sb, (struct dirent *)buf, &entry);
+ if (entry != NULL) {
+ if (!strcmp(entry->d_name, short_filename)) {
+ if (sb.st_mode == 0) {
+ fprintf(
+ stderr,
+ "Mode bits are incorrect: d_name - %s, st_mode - %jd\n",
+ entry->d_name, (intmax_t)sb.st_mode);
+ ret = -1;
+ goto out;
+ }
+ }
+ }
+ } while (entry != NULL);
+
+out:
+ if (fd1 != NULL)
+ glfs_close(fd1);
+ if (fd2 != NULL)
+ glfs_closedir(fd2);
+
+ if (fs) {
+ /*
+ * If this fails (as it does on Special Snowflake NetBSD for no
+ * good reason), it shouldn't affect the result of the test.
+ */
+ (void)glfs_fini(fs);
+ }
+
+ return ret;
+}
diff --git a/tests/bugs/gfapi/bug-1630804/gfapi-bz1630804.t b/tests/bugs/gfapi/bug-1630804/gfapi-bz1630804.t
new file mode 100644
index 00000000000..ac59aeeb47b
--- /dev/null
+++ b/tests/bugs/gfapi/bug-1630804/gfapi-bz1630804.t
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+. $(dirname $0)/../../../include.rc
+. $(dirname $0)/../../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 ${H0}:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+logdir=`gluster --print-logdir`
+
+build_tester $(dirname $0)/gfapi-bz1630804.c -lgfapi
+
+TEST ./$(dirname $0)/gfapi-bz1630804 $V0 $logdir/gfapi-bz1630804.log 0
+TEST ./$(dirname $0)/gfapi-bz1630804 $V0 $logdir/gfapi-bz1630804.log 1
+
+cleanup_tester $(dirname $0)/gfapi-trunc
+
+cleanup;
diff --git a/tests/bugs/gfapi/glfs_vol_set_IO_ERR.c b/tests/bugs/gfapi/glfs_vol_set_IO_ERR.c
new file mode 100644
index 00000000000..f38f01144d3
--- /dev/null
+++ b/tests/bugs/gfapi/glfs_vol_set_IO_ERR.c
@@ -0,0 +1,163 @@
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define WRITE_SIZE (128)
+
+glfs_t *
+setup_new_client(char *hostname, char *volname, char *log_fileile)
+{
+ int ret = 0;
+ glfs_t *fs = NULL;
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ fprintf(stderr, "\nglfs_new: returned NULL (%s)\n", strerror(errno));
+ goto error;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ if (ret < 0) {
+ fprintf(stderr, "\nglfs_set_volfile_server failed ret:%d (%s)\n", ret,
+ strerror(errno));
+ goto error;
+ }
+
+ ret = glfs_set_logging(fs, log_fileile, 7);
+ if (ret < 0) {
+ fprintf(stderr, "\nglfs_set_logging failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ goto error;
+ }
+
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ fprintf(stderr, "\nglfs_init failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ goto error;
+ }
+ return fs;
+error:
+ return NULL;
+}
+
+int
+write_something(glfs_t *fs)
+{
+ glfs_fd_t *fd = NULL;
+ char *buf = NULL;
+ int ret = 0;
+ int j = 0;
+
+ fd = glfs_creat(fs, "filename", O_RDWR, 0644);
+ if (!fd) {
+ fprintf(stderr, "%s: (%p) %s\n", "filename", fd, strerror(errno));
+ return -1;
+ }
+
+ buf = (char *)malloc(WRITE_SIZE);
+ memset(buf, '-', WRITE_SIZE);
+
+ for (j = 0; j < 4; j++) {
+ ret = glfs_write(fd, buf, WRITE_SIZE, 0);
+ if (ret < 0) {
+ fprintf(stderr, "Write(%s): %d (%s)\n", "filename", ret,
+ strerror(errno));
+ return ret;
+ }
+ glfs_lseek(fd, 0, SEEK_SET);
+ }
+ return 0;
+}
+
+static int
+volfile_change(const char *volname)
+{
+ int ret = 0;
+ char *cmd = NULL, *cmd1 = NULL;
+
+ ret = asprintf(&cmd, "gluster volume set %s quick-read on", volname);
+ if (ret < 0) {
+ fprintf(stderr, "cannot construct cli command string (%s)",
+ strerror(errno));
+ return ret;
+ }
+
+ ret = asprintf(&cmd1, "gluster volume set %s quick-read off", volname);
+ if (ret < 0) {
+ fprintf(stderr, "cannot construct cli command string (%s)",
+ strerror(errno));
+ return ret;
+ }
+
+ ret = system(cmd);
+ if (ret < 0) {
+ fprintf(stderr, "quick-read off on (%s) failed", volname);
+ return ret;
+ }
+
+ ret = system(cmd1);
+ if (ret < 0) {
+ fprintf(stderr, "quick-read on on (%s) failed", volname);
+ return ret;
+ }
+
+ ret = system(cmd);
+ if (ret < 0) {
+ fprintf(stderr, "quick-read off on (%s) failed", volname);
+ return ret;
+ }
+
+ free(cmd);
+ free(cmd1);
+ return ret;
+}
+
+int
+main(int argc, char *argv[])
+{
+ int ret = 0;
+ glfs_t *fs = NULL;
+ char buf[100];
+ glfs_fd_t *fd = NULL;
+
+ if (argc != 4) {
+ fprintf(
+ stderr,
+ "Expect following args %s <hostname> <Vol> <log file location>\n",
+ argv[0]);
+ return -1;
+ }
+
+ fs = setup_new_client(argv[1], argv[2], argv[3]);
+ if (!fs)
+ goto error;
+
+ ret = volfile_change(argv[2]);
+ if (ret < 0)
+ goto error;
+
+ /* This is required as volfile change takes a while to reach this
+ * gfapi client and precess the graph change. Without this the issue
+ * cannot be reproduced as in cannot be tested.
+ */
+ sleep(10);
+
+ ret = write_something(fs);
+ if (ret < 0)
+ goto error;
+
+ ret = glfs_fini(fs);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_fini failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ goto error;
+ }
+
+ return 0;
+error:
+ return -1;
+}
diff --git a/tests/bugs/gfapi/glfs_vol_set_IO_ERR.t b/tests/bugs/gfapi/glfs_vol_set_IO_ERR.t
new file mode 100755
index 00000000000..5893ef273bd
--- /dev/null
+++ b/tests/bugs/gfapi/glfs_vol_set_IO_ERR.t
@@ -0,0 +1,20 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+TEST $CLI volume start $V0;
+logdir=`gluster --print-logdir`
+
+TEST build_tester $(dirname $0)/glfs_vol_set_IO_ERR.c -lgfapi
+TEST $(dirname $0)/glfs_vol_set_IO_ERR $H0 $V0 $logdir/glfs_vol_set_IO_ERR.log
+
+cleanup_tester $(dirname $0)/glfs_vol_set_IO_ERR
+cleanup;
diff --git a/tests/bugs/glusterd/859927/repl.t b/tests/bugs/glusterd/859927/repl.t
new file mode 100755
index 00000000000..6e7c23b5b1d
--- /dev/null
+++ b/tests/bugs/glusterd/859927/repl.t
@@ -0,0 +1,59 @@
+#!/bin/bash
+
+. $(dirname $0)/../../../include.rc
+. $(dirname $0)/../../../volume.rc
+cleanup;
+
+TEST glusterd;
+TEST pidof glusterd
+
+#Tests for data-self-heal-algorithm option
+function create_setup_for_self_heal {
+ file=$1
+ kill_brick $V0 $H0 $B0/${V0}1
+ dd of=$file if=/dev/urandom bs=1024k count=1 2>&1 > /dev/null
+ $CLI volume start $V0 force
+}
+
+function test_write {
+ dd of=$M0/a if=/dev/urandom bs=1k count=1 2>&1 > /dev/null
+}
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2};
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 client-log-level DEBUG
+TEST $CLI volume set $V0 cluster.data-self-heal on
+TEST $CLI volume set $V0 cluster.metadata-self-heal on
+TEST $CLI volume set $V0 cluster.entry-self-heal on
+TEST $CLI volume start $V0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=$V0 $M0;
+
+touch $M0/a
+
+TEST $CLI volume set $V0 cluster.data-self-heal-algorithm full
+EXPECT full volume_option $V0 cluster.data-self-heal-algorithm
+create_setup_for_self_heal $M0/a
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+cat $file > /dev/null 2>&1
+EXPECT_WITHIN $HEAL_TIMEOUT 0 get_pending_heal_count $V0
+TEST cmp $B0/${V0}1/a $B0/${V0}2/a
+
+TEST $CLI volume set $V0 cluster.data-self-heal-algorithm diff
+EXPECT diff volume_option $V0 cluster.data-self-heal-algorithm
+create_setup_for_self_heal $M0/a
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+cat $file > /dev/null 2>&1
+EXPECT_WITHIN $HEAL_TIMEOUT 0 get_pending_heal_count $V0
+TEST cmp $B0/${V0}1/a $B0/${V0}2/a
+
+TEST $CLI volume reset $V0 cluster.data-self-heal-algorithm
+create_setup_for_self_heal $M0/a
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+cat $file > /dev/null 2>&1
+EXPECT_WITHIN $HEAL_TIMEOUT 0 get_pending_heal_count $V0
+TEST cmp $B0/${V0}1/a $B0/${V0}2/a
+
+TEST ! $CLI volume set $V0 cluster.data-self-heal-algorithm ""
+
+cleanup;
diff --git a/tests/bugs/glusterd/add-brick-and-validate-replicated-volume-options.t b/tests/bugs/glusterd/add-brick-and-validate-replicated-volume-options.t
new file mode 100644
index 00000000000..95d0eb69ac1
--- /dev/null
+++ b/tests/bugs/glusterd/add-brick-and-validate-replicated-volume-options.t
@@ -0,0 +1,110 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2};
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Started' volinfo_field $V0 'Status';
+
+#bug-1102656 - validating volume top command
+
+TEST $CLI volume top $V0 open
+TEST ! $CLI volume top $V0 open brick $H0:/tmp/brick
+TEST $CLI volume top $V0 read
+
+TEST $CLI volume status
+
+#bug- 1002556
+EXPECT '1 x 2 = 2' volinfo_field $V0 'Number of Bricks';
+
+TEST $CLI volume add-brick $V0 replica 3 $H0:$B0/${V0}3
+EXPECT '1 x 3 = 3' volinfo_field $V0 'Number of Bricks';
+
+TEST $CLI volume remove-brick $V0 replica 2 $H0:$B0/${V0}3 force
+EXPECT '1 x 2 = 2' volinfo_field $V0 'Number of Bricks';
+
+TEST killall glusterd
+TEST glusterd
+
+EXPECT '1 x 2 = 2' volinfo_field $V0 'Number of Bricks';
+
+#bug-1406411- fail-add-brick-when-replica-count-changes
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+TEST kill_brick $V0 $H0 $B0/${V0}1
+
+#add-brick should fail
+TEST ! $CLI_NO_FORCE volume add-brick $V0 replica 3 $H0:$B0/${V0}3
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+TEST $CLI volume add-brick $V0 replica 3 $H0:$B0/${V0}3
+
+TEST $CLI volume create $V1 $H0:$B0/${V1}{1,2};
+TEST $CLI volume start $V1
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V1 $H0 $B0/${V1}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V1 $H0 $B0/${V1}2
+TEST kill_brick $V1 $H0 $B0/${V1}1
+
+#add-brick should fail
+TEST ! $CLI_NO_FORCE volume add-brick $V1 replica 2 $H0:$B0/${V1}{3,4}
+
+TEST $CLI volume start $V1 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V1 $H0 $B0/${V1}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V1 $H0 $B0/${V1}2
+
+TEST $CLI volume add-brick $V1 replica 2 $H0:$B0/${V1}{3,4}
+
+#bug-905307 - validate cluster.post-op-delay-secs option
+
+#Strings should not be accepted.
+TEST ! $CLI volume set $V0 cluster.post-op-delay-secs abc
+
+#-ve ints should not be accepted.
+TEST ! $CLI volume set $V0 cluster.post-op-delay-secs -1
+
+#INT_MAX+1 should not be accepted.
+TEST ! $CLI volume set $V0 cluster.post-op-delay-secs 2147483648
+
+#floats should not be accepted.
+TEST ! $CLI volume set $V0 cluster.post-op-delay-secs 1.25
+
+#min val 0 should be accepted
+TEST $CLI volume set $V0 cluster.post-op-delay-secs 0
+EXPECT "0" volume_option $V0 cluster.post-op-delay-secs
+
+#max val 2147483647 should be accepted
+TEST $CLI volume set $V0 cluster.post-op-delay-secs 2147483647
+EXPECT "2147483647" volume_option $V0 cluster.post-op-delay-secs
+
+#some middle val in range 2147 should be accepted
+TEST $CLI volume set $V0 cluster.post-op-delay-secs 2147
+EXPECT "2147" volume_option $V0 cluster.post-op-delay-secs
+
+#bug-1265479 - validate-replica-volume-options
+
+#Setting data-self-heal option on for distribute-replicate volume
+TEST $CLI volume set $V1 data-self-heal on
+EXPECT 'on' volinfo_field $V1 'cluster.data-self-heal';
+TEST $CLI volume set $V1 cluster.data-self-heal on
+EXPECT 'on' volinfo_field $V1 'cluster.data-self-heal';
+
+#Setting metadata-self-heal option on for distribute-replicate volume
+TEST $CLI volume set $V1 metadata-self-heal on
+EXPECT 'on' volinfo_field $V1 'cluster.metadata-self-heal';
+TEST $CLI volume set $V1 cluster.metadata-self-heal on
+
+#Setting entry-self-heal option on for distribute-replicate volume
+TEST $CLI volume set $V1 entry-self-heal on
+EXPECT 'on' volinfo_field $V1 'cluster.entry-self-heal';
+TEST $CLI volume set $V1 cluster.entry-self-heal on
+EXPECT 'on' volinfo_field $V1 'cluster.entry-self-heal';
+
+cleanup
diff --git a/tests/bugs/glusterd/brick-mux-validation-in-cluster.t b/tests/bugs/glusterd/brick-mux-validation-in-cluster.t
new file mode 100644
index 00000000000..b6af487a791
--- /dev/null
+++ b/tests/bugs/glusterd/brick-mux-validation-in-cluster.t
@@ -0,0 +1,108 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../cluster.rc
+
+function count_brick_processes {
+ pgrep glusterfsd | wc -l
+}
+
+function count_brick_pids {
+ $CLI_1 --xml volume status all | sed -n '/.*<pid>\([^<]*\).*/s//\1/p' \
+ | grep -v "N/A" | sort | uniq | wc -l
+}
+
+function count_N/A_brick_pids {
+ $CLI_1 --xml volume status all | sed -n '/.*<pid>\([^<]*\).*/s//\1/p' \
+ | grep -- '\-1' | sort | uniq | wc -l
+}
+
+function check_peers {
+ $CLI_2 peer status | grep 'Peer in Cluster (Connected)' | wc -l
+}
+
+cleanup;
+
+TEST launch_cluster 3
+TEST $CLI_1 peer probe $H2;
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+TEST $CLI_1 peer probe $H3;
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count
+
+TEST $CLI_1 volume set all cluster.brick-multiplex on
+#bug-1609163 - bricks of normal volume should not attach to bricks of gluster_shared_storage volume
+
+##Create, start and mount meta_volume i.e., shared_storage
+TEST $CLI_1 volume create $META_VOL replica 3 $H1:$B1/${META_VOL}1 $H2:$B2/${META_VOL}1 $H3:$B3/${META_VOL}1
+TEST $CLI_1 volume start $META_VOL
+TEST mkdir -p $META_MNT
+TEST glusterfs -s $H1 --volfile-id $META_VOL $META_MNT
+
+TEST $CLI_1 volume info gluster_shared_storage
+
+EXPECT 3 count_brick_processes
+
+#create and start a new volume
+TEST $CLI_1 volume create $V0 replica 3 $H1:$B1/${V0}{1..3} $H2:$B2/${V0}{1..3}
+TEST $CLI_1 volume start $V0
+
+# bricks of normal volume should not attach to bricks of gluster_shared_storage volume
+EXPECT 5 count_brick_processes
+
+#bug-1549996 - stale brick processes on the nodes after volume deletion
+
+TEST $CLI_1 volume create $V1 replica 3 $H1:$B1/${V1}{1..3} $H2:$B2/${V1}{1..3}
+TEST $CLI_1 volume start $V1
+
+EXPECT 5 count_brick_processes
+
+TEST $CLI_1 volume stop $V0
+TEST $CLI_1 volume stop $V1
+
+EXPECT 3 count_brick_processes
+
+TEST $CLI_1 volume stop $META_VOL
+
+TEST $CLI_1 volume delete $META_VOL
+TEST $CLI_1 volume delete $V0
+TEST $CLI_1 volume delete $V1
+
+#bug-1773856 - Brick process fails to come up with brickmux on
+
+TEST $CLI_1 volume create $V0 $H1:$B1/${V0}1 $H2:$B2/${V0}1 $H3:$B3/${V0}1 force
+TEST $CLI_1 volume start $V0
+
+
+EXPECT 3 count_brick_processes
+
+#create and start a new volume
+TEST $CLI_1 volume create $V1 $H1:$B1/${V1}2 $H2:$B2/${V1}2 $H3:$B3/${V1}2 force
+TEST $CLI_1 volume start $V1
+
+EXPECT 3 count_brick_processes
+
+V2=patchy2
+TEST $CLI_1 volume create $V2 $H1:$B1/${V2}3 $H2:$B2/${V2}3 $H3:$B3/${V2}3 force
+TEST $CLI_1 volume start $V2
+
+EXPECT 3 count_brick_processes
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_brick_pids
+
+TEST kill_node 1
+
+sleep 10
+
+EXPECT_WITHIN $PROBE_TIMEOUT 1 check_peers;
+
+$CLI_2 volume set $V0 performance.readdir-ahead on
+$CLI_2 volume set $V1 performance.readdir-ahead on
+
+TEST $glusterd_1;
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 4 count_brick_pids
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 0 count_N/A_brick_pids
+
+cleanup;
diff --git a/tests/bugs/glusterd/brick-mux-validation.t b/tests/bugs/glusterd/brick-mux-validation.t
new file mode 100644
index 00000000000..61b0455f9a8
--- /dev/null
+++ b/tests/bugs/glusterd/brick-mux-validation.t
@@ -0,0 +1,104 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../traps.rc
+. $(dirname $0)/../../volume.rc
+
+function count_brick_processes {
+ pgrep glusterfsd | wc -l
+}
+
+function count_brick_pids {
+ $CLI --xml volume status all | sed -n '/.*<pid>\([^<]*\).*/s//\1/p' \
+ | grep -v "N/A" | sort | uniq | wc -l
+}
+
+cleanup;
+
+#bug-1451248 - validate brick mux after glusterd reboot
+
+TEST glusterd
+TEST $CLI volume set all cluster.brick-multiplex on
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1..3}
+TEST $CLI volume start $V0
+
+EXPECT 1 count_brick_processes
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_pids
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 online_brick_count
+
+pkill gluster
+TEST glusterd
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_processes
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_pids
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 online_brick_count
+
+TEST $CLI volume create $V1 $H0:$B0/${V1}{1..3}
+TEST $CLI volume start $V1
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_processes
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_pids
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 6 online_brick_count
+
+#bug-1560957 - brick status goes offline after remove-brick followed by add-brick
+
+pkill glusterd
+TEST glusterd
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}1 force
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}1_new force
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_processes
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_pids
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 6 online_brick_count
+
+#bug-1446172 - reset brick with brick multiplexing enabled
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
+# Create files
+for i in {1..5}
+do
+ echo $i > $M0/file$i.txt
+done
+
+TEST $CLI volume reset-brick $V0 $H0:$B0/${V0}1_new start
+
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT 5 online_brick_count
+EXPECT 1 count_brick_processes
+
+# Negative case with brick killed but volume-id xattr present
+TEST ! $CLI volume reset-brick $V0 $H0:$B0/${V0}1 $H0:$B0/${V0}1 commit
+
+# reset-brick commit force should work and should bring up the brick
+TEST $CLI volume reset-brick $V0 $H0:$B0/${V0}1_new $H0:$B0/${V0}1_new commit force
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 6 online_brick_count
+EXPECT 1 count_brick_processes
+TEST glusterfs --volfile-id=$V1 --volfile-server=$H0 $M1;
+# Create files
+for i in {1..5}
+do
+ echo $i > $M1/file$i.txt
+done
+
+TEST $CLI volume reset-brick $V1 $H0:$B0/${V1}1 start
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT 5 online_brick_count
+EXPECT 1 count_brick_processes
+
+# Simulate reset disk
+for i in {1..5}
+do
+ rm -rf $B0/${V1}1/file$i.txt
+done
+
+setfattr -x trusted.glusterfs.volume-id $B0/${V1}1
+setfattr -x trusted.gfid $B0/${V1}1
+
+# Test reset-brick commit. Using CLI_IGNORE_PARTITION since normal CLI uses
+# the --wignore flag that essentially makes the command act like "commit force"
+TEST $CLI_IGNORE_PARTITION volume reset-brick $V1 $H0:$B0/${V1}1 $H0:$B0/${V1}1 commit
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 6 online_brick_count
+EXPECT 1 count_brick_processes
+
+cleanup;
diff --git a/tests/bugs/glusterd/brick-mux.t b/tests/bugs/glusterd/brick-mux.t
new file mode 100644
index 00000000000..927940534c1
--- /dev/null
+++ b/tests/bugs/glusterd/brick-mux.t
@@ -0,0 +1,81 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+function count_brick_processes {
+ pgrep glusterfsd | wc -l
+}
+
+cleanup
+
+#bug-1444596 - validating brick mux
+
+TEST glusterd -LDEBUG
+TEST $CLI volume create $V0 $H0:$B0/brick{0,1}
+TEST $CLI volume create $V1 $H0:$B0/brick{2,3}
+
+TEST $CLI volume set all cluster.brick-multiplex on
+
+TEST $CLI volume start $V0
+TEST $CLI volume start $V1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 4 online_brick_count
+EXPECT 1 count_brick_processes
+
+#bug-1499509 - stop all the bricks when a brick process is killed
+kill -9 $(pgrep glusterfsd)
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 0 online_brick_count
+
+TEST $CLI volume start $V0 force
+TEST $CLI volume start $V1 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 4 online_brick_count
+
+
+pkill glusterd
+TEST glusterd
+
+#Check brick status after restart glusterd
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 4 online_brick_count
+EXPECT 1 count_brick_processes
+
+TEST $CLI volume set $V1 performance.io-cache-size 32MB
+TEST $CLI volume stop $V1
+TEST $CLI volume start $V1
+
+#Check No. of brick processes after change option
+EXPECT 2 count_brick_processes
+
+pkill glusterd
+TEST glusterd
+
+#Check brick status after restart glusterd should not be NA
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 4 online_brick_count
+EXPECT 2 count_brick_processes
+
+pkill glusterd
+TEST glusterd
+
+#Check brick status after restart glusterd should not be NA
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 4 online_brick_count
+EXPECT 2 count_brick_processes
+
+#bug-1444596_brick_mux_posix_hlth_chk_status
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+TEST rm -rf $H0:$B0/brick{0,1}
+
+#Check No. of brick processes after remove brick from back-end
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 online_brick_count
+
+TEST glusterfs -s $H0 --volfile-id $V1 $M0
+TEST touch $M0/file{1..10}
+
+pkill glusterd
+TEST glusterd -LDEBUG
+sleep 5
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 online_brick_count
+
+cleanup
+
diff --git a/tests/bugs/glusterd/brick-order-check-add-brick.t b/tests/bugs/glusterd/brick-order-check-add-brick.t
new file mode 100644
index 00000000000..0be31dac768
--- /dev/null
+++ b/tests/bugs/glusterd/brick-order-check-add-brick.t
@@ -0,0 +1,61 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+. $(dirname $0)/../../snapshot.rc
+
+cleanup;
+
+TEST verify_lvm_version;
+#Create cluster with 3 nodes
+TEST launch_cluster 3 -NO_DEBUG -NO_FORCE
+TEST setup_lvm 3
+
+TEST $CLI_1 peer probe $H2
+TEST $CLI_1 peer probe $H3
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count
+
+TEST $CLI_1 volume create $V0 replica 3 $H1:$L1/$V0 $H2:$L2/$V0 $H3:$L3/$V0
+EXPECT '1 x 3 = 3' volinfo_field $V0 'Number of Bricks'
+EXPECT 'Created' volinfo_field $V0 'Status'
+
+TEST $CLI_1 volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+#add-brick with or without mentioning the replica count should not fail
+TEST $CLI_1 volume add-brick $V0 replica 3 $H1:$L1/${V0}_1 $H2:$L2/${V0}_1 $H3:$L3/${V0}_1
+EXPECT '2 x 3 = 6' volinfo_field $V0 'Number of Bricks'
+
+TEST $CLI_1 volume add-brick $V0 $H1:$L1/${V0}_2 $H2:$L2/${V0}_2 $H3:$L3/${V0}_2
+EXPECT '3 x 3 = 9' volinfo_field $V0 'Number of Bricks'
+
+#adding bricks from same host should fail the brick order check
+TEST ! $CLI_1 volume add-brick $V0 $H1:$L1/${V0}_3 $H1:$L1/${V0}_4 $H1:$L1/${V0}_5
+EXPECT '3 x 3 = 9' volinfo_field $V0 'Number of Bricks'
+
+#adding bricks from same host with force should succeed
+TEST $CLI_1 volume add-brick $V0 $H1:$L1/${V0}_3 $H1:$L1/${V0}_4 $H1:$L1/${V0}_5 force
+EXPECT '4 x 3 = 12' volinfo_field $V0 'Number of Bricks'
+
+TEST $CLI_1 volume stop $V0
+TEST $CLI_1 volume delete $V0
+
+TEST $CLI_1 volume create $V0 replica 2 $H1:$L1/${V0}1 $H2:$L2/${V0}1
+EXPECT '1 x 2 = 2' volinfo_field $V0 'Number of Bricks'
+EXPECT 'Created' volinfo_field $V0 'Status'
+
+TEST $CLI_1 volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+#Add-brick with Increasing replica count
+TEST $CLI_1 volume add-brick $V0 replica 3 $H3:$L3/${V0}1
+EXPECT '1 x 3 = 3' volinfo_field $V0 'Number of Bricks'
+
+#Add-brick with Increasing replica count from same host should fail
+TEST ! $CLI_1 volume add-brick $V0 replica 5 $H1:$L1/${V0}2 $H1:$L1/${V0}3
+
+#adding multiple bricks from same host should fail the brick order check
+TEST ! $CLI_1 volume add-brick $V0 replica 3 $H1:$L1/${V0}{4..6} $H2:$L2/${V0}{7..9}
+EXPECT '1 x 3 = 3' volinfo_field $V0 'Number of Bricks'
+
+cleanup
diff --git a/tests/bugs/glusterd/bug-1070734.t b/tests/bugs/glusterd/bug-1070734.t
new file mode 100755
index 00000000000..0afcb3b37b3
--- /dev/null
+++ b/tests/bugs/glusterd/bug-1070734.t
@@ -0,0 +1,80 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+## Start glusterd
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+## Lets create volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+
+## Verify volume is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+TEST $CLI volume set $V0 nfs.disable false
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+TEST mount_nfs $H0:/$V0 $N0;
+
+############################################################################
+#TEST-PLAN:
+#Create a directory DIR and a file inside DIR
+#check the hash brick of the file
+#delete the directory for recreating later after remove-brick
+#remove the brick where the files hashed to
+#After remove-brick status says complete go on creating the same directory \
+#DIR and file
+#Check if the file now falls into the other brick
+#Check if the other brick gets the full layout and the remove brick gets \
+#the zeroed layout
+############################################################################
+
+TEST mkdir $N0/DIR;
+
+TEST touch $N0/DIR/file;
+
+if [ -f $B0/${V0}1/DIR/file ]
+then
+ HASHED=$B0/${V0}1;
+ OTHERBRICK=$B0/${V0}2;
+else
+ HASHED=$B0/${V0}2;
+ OTHERBRICK=$B0/${V0}1;
+fi
+
+TEST rm -f $N0/DIR/file;
+TEST rmdir $N0/DIR;
+TEST $CLI volume remove-brick $V0 $H0:${HASHED} start;
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" remove_brick_status_completed_field "$V0" \
+"$H0:${HASHED}";
+
+TEST mkdir $N0/DIR;
+TEST touch $N0/DIR/file;
+
+#Check now the file should fall in to OTHERBRICK
+TEST [ -f ${OTHERBRICK}/DIR/file ]
+
+#Check the DIR on HASHED should have got zeroed layout and the \
+#OTHERBRICK should have got full layout
+shorter_layout () {
+ dht_get_layout $1 | cut -c 19-
+}
+EXPECT "0000000000000000" shorter_layout $HASHED/DIR ;
+EXPECT "00000000ffffffff" shorter_layout $OTHERBRICK/DIR;
+
+## Before killing daemon to avoid deadlocks
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+cleanup
diff --git a/tests/bugs/glusterd/bug-1085330-and-bug-916549.t b/tests/bugs/glusterd/bug-1085330-and-bug-916549.t
new file mode 100644
index 00000000000..892a30d74ea
--- /dev/null
+++ b/tests/bugs/glusterd/bug-1085330-and-bug-916549.t
@@ -0,0 +1,93 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+STR="1234567890"
+volname="Vol"
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+#testcase: bug-1085330
+
+# Construct volname string such that its more than 256 characters
+for i in {1..30}
+do
+ volname+=$STR
+done
+# Now $volname is more than 256 chars
+
+TEST ! $CLI volume create $volname $H0:$B0/${volname}{1,2};
+
+TEST $CLI volume info;
+
+# Construct brick string such that its more than 256 characters
+volname="Vol1234"
+brick="brick"
+for i in {1..30}
+do
+ brick+=$STR
+done
+# Now $brick1 is more than 256 chars
+
+TEST ! $CLI volume create $volname $H0:$B0/$brick;
+
+TEST $CLI volume info;
+
+# Now try to create a volume with couple of bricks (strlen(volname) = 128 &
+# strlen(brick1) = 128
+# Command should still fail as strlen(volp path) > 256
+
+volname="Volume-0"
+brick="brick-00"
+STR="12345678"
+
+for i in {1..15}
+do
+ volname+=$STR
+ brick+=$STR
+done
+TEST ! $CLI volume create $volname $H0:$B0/$brick;
+
+TEST $CLI volume info;
+
+# test case with brick path as 255 and a trailing "/"
+brick=""
+STR1="12345678"
+volname="vol"
+
+for i in {1..31}
+do
+ brick+=$STR1
+done
+brick+="123456/"
+
+echo $brick | wc -c
+# Now $brick is exactly 255 chars, but at end a trailing space
+# This will still fail as volfpath exceeds more than _POSIX_MAX chars
+
+TEST ! $CLI volume create $volname $H0:$B0/$brick;
+
+TEST $CLI volume info;
+
+# Positive test case
+TEST $CLI volume create $V0 $H0:$B0/${V0}1;
+
+TEST $CLI volume info;
+
+TEST $CLI volume start $V0;
+
+#testcase: bug-916549
+
+pid_file=$(ls $GLUSTERD_PIDFILEDIR/vols/$V0/);
+brick_pid=$(cat $GLUSTERD_PIDFILEDIR/vols/$V0/$pid_file);
+
+kill -SIGKILL $brick_pid;
+TEST $CLI volume start $V0 force;
+TEST process_leak_count $(pidof glusterd);
+
+cleanup
+
diff --git a/tests/bugs/glusterd/bug-1091935-brick-order-check-from-cli-to-glusterd.t b/tests/bugs/glusterd/bug-1091935-brick-order-check-from-cli-to-glusterd.t
new file mode 100755
index 00000000000..9ac9d8fedd9
--- /dev/null
+++ b/tests/bugs/glusterd/bug-1091935-brick-order-check-from-cli-to-glusterd.t
@@ -0,0 +1,93 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+function check_peers {
+ $CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
+}
+
+cleanup;
+
+## Lets create partitions for bricks
+TEST truncate -s 100M $B0/brick1
+TEST truncate -s 200M $B0/brick2
+TEST truncate -s 200M $B0/brick3
+TEST truncate -s 200M $B0/brick4
+
+
+TEST LO1=`SETUP_LOOP $B0/brick1`
+TEST LO2=`SETUP_LOOP $B0/brick2`
+TEST LO3=`SETUP_LOOP $B0/brick3`
+TEST LO4=`SETUP_LOOP $B0/brick4`
+
+
+TEST MKFS_LOOP $LO1
+TEST MKFS_LOOP $LO2
+TEST MKFS_LOOP $LO3
+TEST MKFS_LOOP $LO4
+
+TEST mkdir -p ${B0}/${V0}{0..3}
+
+
+TEST MOUNT_LOOP $LO1 $B0/${V0}0
+TEST MOUNT_LOOP $LO2 $B0/${V0}1
+TEST MOUNT_LOOP $LO3 $B0/${V0}2
+TEST MOUNT_LOOP $LO4 $B0/${V0}3
+
+
+TEST launch_cluster 2;
+TEST $CLI_1 peer probe $H2;
+EXPECT_WITHIN $PROBE_TIMEOUT 1 check_peers;
+
+
+CLI_1_WITHOUT_WIGNORE=$(echo $CLI_1 | sed 's/ --wignore//')
+
+
+# Creating volume with non resolvable host name
+TEST ! $CLI_1_WITHOUT_WIGNORE volume create $V0 replica 2 \
+ $H1:$B0/${V0}0/brick redhat:$B0/${V0}1/brick \
+ $H1:$B0/${V0}2/brick redhat:$B0/${V0}3/brick
+
+
+#Workaround for Bug:1091935
+#Failure to create volume above leaves 1st brick with xattrs.
+rm -rf $B0/${V0}{0..3}/brick;
+
+
+# Creating distribute-replica volume with bad brick order. It will fail
+# due to bad brick order.
+TEST ! $CLI_1_WITHOUT_WIGNORE volume create $V0 replica 2 \
+ $H1:$B0/${V0}0/brick $H1:$B0/${V0}1/brick \
+ $H1:$B0/${V0}2/brick $H1:$B0/${V0}3/brick
+
+
+
+#Workaround for Bug:1091935
+#Failure to create volume above leaves 1st brick with xattrs.
+rm -rf $B0/${V0}{0..3}/brick;
+
+
+
+# Test for positive case, volume create should pass for
+# resolved hostnames and bricks in order.
+TEST $CLI_1_WITHOUT_WIGNORE volume create $V0 replica 2 \
+ $H1:$B0/${V0}0/brick $H2:$B0/${V0}1/brick \
+ $H1:$B0/${V0}2/brick $H2:$B0/${V0}3/brick
+
+# Delete the volume as we want to reuse bricks
+TEST $CLI_1_WITHOUT_WIGNORE volume delete $V0
+
+
+# Now with force at the end of command it will bypass brick-order check
+# for replicate or distribute-replicate volume. and it will create volume
+TEST $CLI_1_WITHOUT_WIGNORE volume create $V0 replica 2 \
+ $H1:$B0/${V0}0/brick $H1:$B0/${V0}1/brick \
+ $H1:$B0/${V0}2/brick $H1:$B0/${V0}3/brick force
+
+# Need to cleanup the loop back devices.
+UMOUNT_LOOP ${B0}/${V0}{0..3}
+rm -f ${B0}/brick{1..4}
+
+cleanup;
diff --git a/tests/bugs/glusterd/bug-1238706-daemons-stop-on-peer-cleanup.t b/tests/bugs/glusterd/bug-1238706-daemons-stop-on-peer-cleanup.t
new file mode 100644
index 00000000000..7be076caaf3
--- /dev/null
+++ b/tests/bugs/glusterd/bug-1238706-daemons-stop-on-peer-cleanup.t
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+## Test case for stopping all running daemons service on peer detach.
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+cleanup;
+
+
+## Start a 2 node virtual cluster
+TEST launch_cluster 2;
+
+## Peer probe server 2 from server 1 cli
+TEST $CLI_1 peer probe $H2;
+
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+
+## Creating and starting volume
+TEST $CLI_1 volume create $V0 $H1:$B1/${V0}0 $H1:$B1/${V0}1
+TEST $CLI_1 volume start $V0
+
+TEST $CLI_1 volume set $V0 nfs.disable off
+
+## To Do: Add test case for quota and snapshot daemon. Currently quota
+## Daemon is not working in cluster framework. And sanpd daemon
+## Start only in one node in cluster framework. Add test case
+## once patch http://review.gluster.org/#/c/11666/ merged,
+
+## We are having 2 node "nfs" daemon should run on both node.
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" get_nfs_count
+
+## Detach 2nd node from the cluster.
+TEST $CLI_1 peer detach $H2;
+
+
+## After detaching 2nd node we will have only 1 nfs and quota daemon running.
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_nfs_count
+
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=000000
diff --git a/tests/bugs/glusterd/bug-1242875-do-not-pass-volinfo-quota.t b/tests/bugs/glusterd/bug-1242875-do-not-pass-volinfo-quota.t
new file mode 100644
index 00000000000..c229d4371b6
--- /dev/null
+++ b/tests/bugs/glusterd/bug-1242875-do-not-pass-volinfo-quota.t
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+cleanup;
+
+## Start glusterd
+TEST glusterd;
+TEST pidof glusterd;
+
+## Lets create volume V0 and start the volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+TEST $CLI volume start $V0
+
+## Lets create volume V1 and start the volume
+TEST $CLI volume create $V1 $H0:$B0/${V0}2 $H0:$B0/${V0}3
+TEST $CLI volume start $V1
+
+## Enable quota on 2nd volume
+TEST $CLI volume quota $V1 enable
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_quotad_count
+
+## Killing all gluster process
+pkill gluster;
+
+## there should not be any quota daemon running after killing quota process
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" get_quotad_count
+
+## Start glusterd
+TEST glusterd;
+TEST pidof glusterd;
+
+## Quotad daemon should start on restarting the glusterd
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_quotad_count
+
+cleanup;
diff --git a/tests/bugs/glusterd/bug-1482906-peer-file-blank-line.t b/tests/bugs/glusterd/bug-1482906-peer-file-blank-line.t
new file mode 100644
index 00000000000..967595e4dbb
--- /dev/null
+++ b/tests/bugs/glusterd/bug-1482906-peer-file-blank-line.t
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../cluster.rc
+
+#Tests for add new line in peers file
+function add_new_line_to_peer_file {
+ UUID_NAME=$($CLI_1 peer status | grep Uuid)
+ PEER_ID=$(echo $UUID_NAME | cut -c 7-)
+ GD_WD=$($CLI_1 system getwd)
+ GD_WD+=/peers/
+ PATH_TO_PEER_FILE=$GD_WD$PEER_ID
+ sed -i '1s/^/\n/gm; $s/$/\n/gm' $PATH_TO_PEER_FILE
+}
+
+cleanup;
+
+TEST launch_cluster 2;
+
+TEST $CLI_1 peer probe $H2;
+
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+add_new_line_to_peer_file
+
+TEST kill_glusterd 1
+TEST $glusterd_1
+
+cleanup;
diff --git a/tests/bugs/glusterd/bug-1595320.t b/tests/bugs/glusterd/bug-1595320.t
new file mode 100644
index 00000000000..c10e11821a1
--- /dev/null
+++ b/tests/bugs/glusterd/bug-1595320.t
@@ -0,0 +1,93 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+
+cleanup
+
+function count_up_bricks {
+ $CLI --xml volume status $V0 | grep '<status>1' | wc -l
+}
+
+function count_brick_processes {
+ pgrep glusterfsd | wc -l
+}
+
+# Setup 3 LVMS
+LVM_PREFIX="test"
+TEST init_n_bricks 3
+TEST setup_lvm 3
+
+# Start glusterd
+TEST glusterd
+TEST pidof glusterd
+
+# Create volume and enable brick multiplexing
+TEST $CLI volume create $V0 $H0:$L1 $H0:$L2 $H0:$L3
+TEST $CLI v set all cluster.brick-multiplex on
+
+# Start the volume
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_up_bricks
+EXPECT 1 count_brick_processes
+
+# Kill volume ungracefully
+brick_pid=`pgrep glusterfsd`
+
+# Make sure every brick root should be consumed by a brick process
+n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L1 | grep -v ".glusterfs" | wc -l`
+TEST [ $n -eq 1 ]
+n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L2 | grep -v ".glusterfs" | wc -l`
+TEST [ $n -eq 1 ]
+n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L3 | grep -v ".glusterfs" | wc -l`
+TEST [ $n -eq 1 ]
+
+b1_pid_file=$(ls $GLUSTERD_PIDFILEDIR/vols/$V0/*d-backends-1*.pid)
+b2_pid_file=$(ls $GLUSTERD_PIDFILEDIR/vols/$V0/*d-backends-2*.pid)
+b3_pid_file=$(ls $GLUSTERD_PIDFILEDIR/vols/$V0/*d-backends-3*.pid)
+
+kill -9 $brick_pid
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 0 count_brick_processes
+
+# Unmount 3rd brick root from node
+brick_root=$L3
+_umount_lv 3
+
+# Start the volume only 2 brick should be start
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_up_bricks
+EXPECT 1 count_brick_processes
+
+brick_pid=`pgrep glusterfsd`
+
+# Make sure only two brick root should be consumed by a brick process
+n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L1 | grep -v ".glusterfs" | wc -l`
+TEST [ $n -eq 1 ]
+n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L2 | grep -v ".glusterfs" | wc -l`
+TEST [ $n -eq 1 ]
+n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L3 | grep -v ".glusterfs" | wc -l`
+TEST [ $n -eq 0 ]
+
+# Mount the brick root
+TEST mkdir -p $brick_root
+TEST mount -t xfs -o nouuid /dev/test_vg_3/brick_lvm $brick_root
+
+# Replace brick_pid file to test brick_attach code
+TEST cp $b1_pid_file $b3_pid_file
+
+# Start the volume all brick should be up
+TEST $CLI volume start $V0 force
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_up_bricks
+EXPECT 1 count_brick_processes
+
+# Make sure every brick root should be consumed by a brick process
+n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L1 | grep -v ".glusterfs" | wc -l`
+TEST [ $n -eq 1 ]
+n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L2 | grep -v ".glusterfs" | wc -l`
+TEST [ $n -eq 1 ]
+n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L3 | grep -v ".glusterfs" | wc -l`
+TEST [ $n -eq 1 ]
+
+cleanup
diff --git a/tests/bugs/glusterd/bug-1696046.t b/tests/bugs/glusterd/bug-1696046.t
new file mode 100644
index 00000000000..e1c1eb2ceb9
--- /dev/null
+++ b/tests/bugs/glusterd/bug-1696046.t
@@ -0,0 +1,113 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+function count_up_bricks {
+ $CLI --xml volume status $1 | grep '<status>1' | wc -l
+}
+
+function count_brick_processes {
+ pgrep glusterfsd | wc -l
+}
+
+logdir=`gluster --print-logdir`
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST $CLI volume set all cluster.brick-multiplex on
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3};
+TEST $CLI volume create $V1 replica 3 $H0:$B0/${V1}{1,2,3};
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+TEST $CLI volume start $V1;
+EXPECT 'Started' volinfo_field $V1 'Status';
+
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 4 count_up_bricks $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 4 count_up_bricks $V1
+
+EXPECT 1 count_brick_processes
+
+# Mount V0
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
+
+function client-log-file-name()
+{
+ logfilename=$M0".log"
+ echo ${logfilename:1} | tr / -
+}
+
+function brick-log-file-name()
+{
+ logfilename=$B0"/"$V0"1.log"
+ echo ${logfilename:1} | tr / -
+}
+
+log_file=$logdir"/"`client-log-file-name`
+nofdlog=$(cat $log_file | grep " D " | wc -l)
+TEST [ $((nofdlog)) -eq 0 ]
+
+brick_log_file=$logdir"/bricks/"`brick-log-file-name`
+nofdlog=$(cat $brick_log_file | grep " D " | wc -l)
+TEST [ $((nofdlog)) -eq 0 ]
+
+## Set brick-log-level to DEBUG
+TEST $CLI volume set $V0 diagnostics.brick-log-level DEBUG
+
+# Do some operation
+touch $M0/file1
+
+# Check debug message debug message should be exist only for V0
+# Server xlator is common in brick_mux so after enabling DEBUG log
+# some debug message should be available for other xlators like posix
+
+brick_log_file=$logdir"/bricks/"`brick-log-file-name`
+nofdlog=$(cat $brick_log_file | grep file1 | grep -v server | wc -l)
+TEST [ $((nofdlog)) -ne 0 ]
+
+#Check if any debug log exist in client-log file
+nofdlog=$(cat $log_file | grep " D " | wc -l)
+TEST [ $((nofdlog)) -eq 0 ]
+
+## Set brick-log-level to INFO
+TEST $CLI volume set $V0 diagnostics.brick-log-level INFO
+
+## Set client-log-level to DEBUG
+TEST $CLI volume set $V0 diagnostics.client-log-level DEBUG
+
+# Do some operation
+touch $M0/file2
+
+nofdlog=$(cat $brick_log_file | grep " D " | grep file2 | wc -l)
+TEST [ $((nofdlog)) -eq 0 ]
+
+nofdlog=$(cat $log_file | grep " D " | wc -l)
+TEST [ $((nofdlog)) -ne 0 ]
+
+# Unmount V0
+TEST umount $M0
+
+#Mount V1
+TEST glusterfs --volfile-id=$V1 --volfile-server=$H0 --entry-timeout=0 $M0;
+
+#do some operation
+touch $M0/file3
+
+
+# DEBUG log level is enabled only for V0 so no debug message should be available
+# in log specific to file2 creation except for server xlator, server xlator is
+# common xlator in brick mulitplex
+nofdlog=$(cat $brick_log_file | grep file3 | grep -v server | wc -l)
+TEST [ $((nofdlog)) -eq 0 ]
+
+# Unmount V1
+TEST umount $M0
+
+cleanup;
diff --git a/tests/bugs/glusterd/bug-1699339.t b/tests/bugs/glusterd/bug-1699339.t
new file mode 100644
index 00000000000..bb8d4f46eb8
--- /dev/null
+++ b/tests/bugs/glusterd/bug-1699339.t
@@ -0,0 +1,73 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+cleanup;
+
+NUM_VOLS=15
+
+
+get_brick_base () {
+ printf "%s/vol%02d" $B0 $1
+}
+
+function count_up_bricks {
+ vol=$1;
+ $CLI_1 --xml volume status $vol | grep '<status>1' | wc -l
+}
+
+create_volume () {
+
+ local vol_name=$(printf "%s-vol%02d" $V0 $1)
+
+ TEST $CLI_1 volume create $vol_name replica 3 $H1:$B1/${vol_name} $H2:$B2/${vol_name} $H3:$B3/${vol_name}
+ TEST $CLI_1 volume start $vol_name
+}
+
+TEST launch_cluster 3
+TEST $CLI_1 volume set all cluster.brick-multiplex on
+
+# The option accepts the value in the range from 5 to 200
+TEST ! $CLI_1 volume set all glusterd.vol_count_per_thread 210
+TEST ! $CLI_1 volume set all glusterd.vol_count_per_thread 4
+
+TEST $CLI_1 volume set all glusterd.vol_count_per_thread 5
+
+TEST $CLI_1 peer probe $H2;
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+TEST $CLI_1 peer probe $H3;
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count
+
+# Our infrastructure can't handle an arithmetic expression here. The formula
+# is (NUM_VOLS-1)*5 because it sees each TEST/EXPECT once but needs the other
+# NUM_VOLS-1 and there are 5 such statements in each iteration.
+TESTS_EXPECTED_IN_LOOP=28
+for i in $(seq 1 $NUM_VOLS); do
+ starttime="$(date +%s)";
+ create_volume $i
+done
+
+TEST kill_glusterd 1
+
+TESTS_EXPECTED_IN_LOOP=4
+for i in `seq 1 3 15`
+do
+vol1=$(printf "%s-vol%02d" $V0 $i)
+TEST $CLI_2 volume set $vol1 performance.readdir-ahead on
+done
+
+# Bring back 1st glusterd
+TEST $glusterd_1
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count
+
+TESTS_EXPECTED_IN_LOOP=4
+for i in `seq 1 3 15`
+do
+vol1=$(printf "%s-vol%02d" $V0 $i)
+EXPECT_WITHIN $PROBE_TIMEOUT "on" volinfo_field_1 $vol1 performance.readdir-ahead
+done
+
+cleanup
diff --git a/tests/bugs/glusterd/bug-1720566.t b/tests/bugs/glusterd/bug-1720566.t
new file mode 100644
index 00000000000..99bcf6ff785
--- /dev/null
+++ b/tests/bugs/glusterd/bug-1720566.t
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../cluster.rc
+. $(dirname $0)/../../volume.rc
+
+
+cleanup;
+V0="TestLongVolnamec363b7b536700ff06eedeae0dd9037fec363b7b536700ff06eedeae0dd9037fec363b7b536700ff06eedeae0dd9abcd"
+V1="TestLongVolname3102bd28a16c49440bd5210e4ec4d5d93102bd28a16c49440bd5210e4ec4d5d933102bd28a16c49440bd5210e4ebbcd"
+TEST launch_cluster 2;
+TEST $CLI_1 peer probe $H2;
+
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+$CLI_1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0
+EXPECT 'Created' cluster_volinfo_field 1 $V0 'Status';
+$CLI_1 volume create $V1 $H1:$B1/$V1 $H2:$B2/$V1
+EXPECT 'Created' cluster_volinfo_field 1 $V1 'Status';
+
+$CLI_1 volume start $V0
+EXPECT 'Started' cluster_volinfo_field 1 $V0 'Status';
+
+$CLI_1 volume start $V1
+EXPECT 'Started' cluster_volinfo_field 1 $V1 'Status';
+
+#Mount FUSE
+TEST glusterfs -s $H1 --volfile-id=$V0 $M0;
+
+
+#Mount FUSE
+TEST glusterfs -s $H1 --volfile-id=$V1 $M1;
+
+TEST mkdir $M0/dir{1..4};
+TEST touch $M0/dir{1..4}/files{1..4};
+
+TEST mkdir $M1/dir{1..4};
+TEST touch $M1/dir{1..4}/files{1..4};
+
+TEST $CLI_1 volume add-brick $V0 $H1:$B1/${V0}_1 $H2:$B2/${V0}_1
+TEST $CLI_1 volume add-brick $V1 $H1:$B1/${V1}_1 $H2:$B2/${V1}_1
+
+
+TEST $CLI_1 volume rebalance $V0 start
+TEST $CLI_1 volume rebalance $V1 start
+
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" cluster_rebalance_status_field 1 $V0
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" cluster_rebalance_status_field 1 $V1
+
+cleanup;
diff --git a/tests/bugs/glusterd/bug-824753-file-locker.c b/tests/bugs/glusterd/bug-824753-file-locker.c
new file mode 100644
index 00000000000..f5dababad30
--- /dev/null
+++ b/tests/bugs/glusterd/bug-824753-file-locker.c
@@ -0,0 +1,46 @@
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <stdlib.h>
+
+int
+main(int argc, char *argv[])
+{
+ int fd = -1;
+ int ret = -1;
+ char command[2048] = "";
+ char filepath[255] = "";
+ struct flock fl;
+
+ fl.l_type = F_WRLCK;
+ fl.l_whence = SEEK_SET;
+ fl.l_start = 7;
+ fl.l_len = 1;
+ fl.l_pid = getpid();
+
+ snprintf(filepath, 255, "%s/%s", argv[4], argv[5]);
+
+ fd = open(filepath, O_RDWR);
+
+ if (fd == -1)
+ return -1;
+
+ if (fcntl(fd, F_SETLKW, &fl) == -1) {
+ return -1;
+ }
+
+ snprintf(command, sizeof(command),
+ "gluster volume clear-locks %s /%s kind all posix 0,7-1 |"
+ " grep %s | awk -F'..: ' '{print $1}' | grep %s:%s/%s",
+ argv[1], argv[5], argv[2], argv[2], argv[3], argv[1]);
+
+ ret = system(command);
+ close(fd);
+
+ if (ret)
+ return -1;
+ else
+ return 0;
+}
diff --git a/tests/bugs/glusterd/bug-824753.t b/tests/bugs/glusterd/bug-824753.t
new file mode 100755
index 00000000000..b969e28f35e
--- /dev/null
+++ b/tests/bugs/glusterd/bug-824753.t
@@ -0,0 +1,45 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6};
+
+function volinfo_field()
+{
+ local vol=$1;
+ local field=$2;
+
+ $CLI volume info $vol | grep "^$field: " | sed 's/.*: //';
+}
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST glusterfs -s $H0 --volfile-id=$V0 $M0
+touch $M0/file1;
+
+TEST $CC -g $(dirname $0)/bug-824753-file-locker.c -o $(dirname $0)/file-locker
+
+TEST $(dirname $0)/file-locker $V0 $H0 $B0 $M0 file1
+
+## Finish up
+TEST rm -f $(dirname $0)/file-locker
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/bugs/glusterd/bug-948729/bug-948729-force.t b/tests/bugs/glusterd/bug-948729/bug-948729-force.t
new file mode 100644
index 00000000000..f4f71f9a1e2
--- /dev/null
+++ b/tests/bugs/glusterd/bug-948729/bug-948729-force.t
@@ -0,0 +1,103 @@
+#!/bin/bash
+
+. $(dirname $0)/../../../include.rc
+. $(dirname $0)/../../../volume.rc
+. $(dirname $0)/../../../cluster.rc
+
+function check_peers {
+ $CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
+}
+
+cleanup;
+uuid1=`uuidgen`;
+uuid2=`uuidgen`;
+uuid3=`uuidgen`;
+
+V1=patchy1
+V2=patchy2
+
+TEST launch_cluster 2;
+
+TEST $CLI_1 peer probe $H2;
+
+EXPECT_WITHIN $PROBE_TIMEOUT 1 check_peers;
+
+B3=/d/backends/3
+B4=/d/backends/4
+B5=/d/backends/5
+B6=/d/backends/6
+
+mkdir -p $B3 $B4 $B5 $B6
+
+TEST truncate -s 16M $B1/brick1
+TEST truncate -s 16M $B2/brick2
+TEST truncate -s 16M $B3/brick3
+TEST truncate -s 16M $B4/brick4
+TEST truncate -s 16M $B5/brick5
+TEST truncate -s 16M $B6/brick6
+
+TEST LD1=`SETUP_LOOP $B1/brick1`
+TEST MKFS_LOOP $LD1
+TEST LD2=`SETUP_LOOP $B2/brick2`
+TEST MKFS_LOOP $LD2
+TEST LD3=`SETUP_LOOP $B3/brick3`
+TEST MKFS_LOOP $LD3
+TEST LD4=`SETUP_LOOP $B4/brick4`
+TEST MKFS_LOOP $LD4
+TEST LD5=`SETUP_LOOP $B5/brick5`
+TEST MKFS_LOOP $LD5
+TEST LD6=`SETUP_LOOP $B6/brick6`
+TEST MKFS_LOOP $LD6
+
+mkdir -p $B1/$V0 $B2/$V0 $B3/$V0 $B4/$V0 $B5/$V0 $B6/$V0
+
+TEST MOUNT_LOOP $LD1 $B1/$V0
+TEST MOUNT_LOOP $LD2 $B2/$V0
+TEST MOUNT_LOOP $LD3 $B3/$V0
+TEST MOUNT_LOOP $LD4 $B4/$V0
+TEST MOUNT_LOOP $LD5 $B5/$V0
+TEST MOUNT_LOOP $LD6 $B6/$V0
+
+#Case 0: Parent directory of the brick is absent
+TEST ! $CLI1 volume create $V0 $H1:$B1/$V0/nonexistent/b1 $H2:$B2/$V0/nonexistent/b2 force
+
+#Case 1: File system root is being used as brick directory
+TEST $CLI1 volume create $V0 $H1:$B5/$V0 $H2:$B6/$V0 force
+
+#Case 2: Brick directory contains only one component
+TEST $CLI1 volume create $V1 $H1:/$uuid1 $H2:/$uuid2 force
+
+#Case 3: Sub-directories of the backend FS being used as brick directory
+TEST $CLI1 volume create $V2 $H1:$B1/$V0/brick1 $H2:$B2/$V0/brick2 force
+
+#add-brick tests
+TEST ! $CLI1 volume add-brick $V0 $H1:$B3/$V0/nonexistent/brick3 force
+TEST $CLI1 volume add-brick $V0 $H1:$B3/$V0 force
+TEST $CLI1 volume add-brick $V1 $H1:/$uuid3 force
+TEST $CLI1 volume add-brick $V2 $H1:$B4/$V0/brick3 force
+
+#####replace-brick tests
+#FIX-ME: replace-brick does not work with the newly introduced cluster test
+#####framework
+
+rmdir /$uuid1 /$uuid2 /$uuid3;
+
+$CLI volume stop $V0
+$CLI volume stop $V1
+$CLI volume stop $V2
+
+UMOUNT_LOOP $B1/$V0
+UMOUNT_LOOP $B2/$V0
+UMOUNT_LOOP $B3/$V0
+UMOUNT_LOOP $B4/$V0
+UMOUNT_LOOP $B5/$V0
+UMOUNT_LOOP $B6/$V0
+
+rm -f $B1/brick1
+rm -f $B2/brick2
+rm -f $B3/brick3
+rm -f $B4/brick4
+rm -f $B5/brick5
+rm -f $B6/brick6
+
+cleanup;
diff --git a/tests/bugs/glusterd/bug-948729/bug-948729-mode-script.t b/tests/bugs/glusterd/bug-948729/bug-948729-mode-script.t
new file mode 100644
index 00000000000..18bf9a1c4b6
--- /dev/null
+++ b/tests/bugs/glusterd/bug-948729/bug-948729-mode-script.t
@@ -0,0 +1,77 @@
+#!/bin/bash
+
+. $(dirname $0)/../../../include.rc
+. $(dirname $0)/../../../volume.rc
+. $(dirname $0)/../../../cluster.rc
+
+function check_peers {
+ $CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
+}
+
+cleanup;
+
+uuid1=`uuidgen`;
+uuid2=`uuidgen`;
+uuid3=`uuidgen`;
+
+TEST launch_cluster 2;
+
+TEST $CLI_1 peer probe $H2;
+
+EXPECT_WITHIN $PROBE_TIMEOUT 1 check_peers;
+
+B3=/d/backends/3
+mkdir -p $B3
+
+TEST truncate -s 16M $B1/brick1
+TEST truncate -s 16M $B2/brick2
+TEST truncate -s 16M $B3/brick3
+
+TEST LD1=`SETUP_LOOP $B1/brick1`
+TEST MKFS_LOOP $LD1
+TEST LD2=`SETUP_LOOP $B2/brick2`
+TEST MKFS_LOOP $LD2
+TEST LD3=`SETUP_LOOP $B3/brick3`
+TEST MKFS_LOOP $LD3
+
+mkdir -p $B1/$V0 $B2/$V0 $B3/$V0
+
+TEST MOUNT_LOOP $LD1 $B1/$V0
+TEST MOUNT_LOOP $LD2 $B2/$V0
+TEST MOUNT_LOOP $LD3 $B3/$V0
+
+cli1=$(echo $CLI1 | sed 's/ --wignore//')
+
+#Case 0: Parent directory of the brick is absent
+TEST ! $cli1 volume create $V0 $H1:$B1/$V0/nonexistent/b1 $H2:$B2/$V0/nonexistent/b2
+
+#Case 1: File system root being used as brick directory
+TEST ! $cli1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0
+
+#Case 2: Brick directory contains only one component
+TEST ! $cli1 volume create $V0 $H1:/$uuid1 $H2:/$uuid2
+
+#Case 3: Sub-directories of the backend FS being used as brick directory
+TEST $cli1 volume create $V0 $H1:$B1/$V0/brick1 $H2:$B2/$V0/brick2
+
+#add-brick tests
+TEST ! $cli1 volume add-brick $V0 $H1:$B3/$V0/nonexistent/brick3
+TEST ! $cli1 volume add-brick $V0 $H1:$B3/$V0
+TEST ! $cli1 volume add-brick $V0 $H1:/$uuid3
+TEST $cli1 volume add-brick $V0 $H1:$B3/$V0/brick3
+
+#####replace-brick tests
+#FIX-ME : replace-brick does not currently work in the newly introduced
+#####cluster test framework
+
+$CLI1 volume stop $V0
+
+UMOUNT_LOOP $B1/$V0
+UMOUNT_LOOP $B2/$V0
+UMOUNT_LOOP $B3/$V0
+
+rm -f $B1/brick1
+rm -f $B2/brick2
+rm -f $B3/brick3
+
+cleanup;
diff --git a/tests/bugs/glusterd/bug-948729/bug-948729.t b/tests/bugs/glusterd/bug-948729/bug-948729.t
new file mode 100644
index 00000000000..2b574aa1a14
--- /dev/null
+++ b/tests/bugs/glusterd/bug-948729/bug-948729.t
@@ -0,0 +1,80 @@
+#!/bin/bash
+
+. $(dirname $0)/../../../include.rc
+. $(dirname $0)/../../../volume.rc
+. $(dirname $0)/../../../cluster.rc
+
+function check_peers {
+ $CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
+}
+
+cleanup;
+
+uuid1=`uuidgen`;
+uuid2=`uuidgen`;
+uuid3=`uuidgen`;
+
+TEST launch_cluster 2;
+
+TEST $CLI_1 peer probe $H2;
+
+EXPECT_WITHIN $PROBE_TIMEOUT 1 check_peers;
+
+B3=/d/backends/3
+
+mkdir -p $B3
+
+TEST truncate -s 16M $B1/brick1
+TEST truncate -s 16M $B2/brick2
+TEST truncate -s 16M $B3/brick3
+
+TEST LD1=`SETUP_LOOP $B1/brick1`
+TEST MKFS_LOOP $LD1
+TEST LD2=`SETUP_LOOP $B2/brick2`
+TEST MKFS_LOOP $LD2
+TEST LD3=`SETUP_LOOP $B3/brick3`
+TEST MKFS_LOOP $LD3
+
+mkdir -p $B1/$V0 $B2/$V0 $B3/$V0
+
+TEST MOUNT_LOOP $LD1 $B1/$V0
+TEST MOUNT_LOOP $LD2 $B2/$V0
+TEST MOUNT_LOOP $LD3 $B3/$V0
+
+#Tests without options 'mode=script' and 'wignore'
+cli1=$(echo $CLI1 | sed 's/ --mode=script//')
+cli1=$(echo $cli1 | sed 's/ --wignore//')
+#Case 0: Parent directory of the brick is absent
+TEST ! $cli1 volume create $V0 $H1:$B1/$V0/nonexistent/b1 $H2:$B2/$V0/nonexistent/b2
+
+#Case 1: File system root being used as brick directory
+TEST ! $cli1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0
+
+#Case 2: Brick directory contains only one component
+TEST ! $cli1 volume create $V0 $H1:/$uuid1 $H2:/$uuid2
+
+#Case 3: Sub-directories of the backend FS being used as brick directory
+TEST $cli1 volume create $V0 $H1:$B1/$V0/brick1 $H2:$B2/$V0/brick2
+
+#add-brick tests
+TEST ! $cli1 volume add-brick $V0 $H1:$B3/$V0/nonexistent/b3
+TEST ! $cli1 volume add-brick $V0 $H1:$B3/$V0
+TEST ! $cli1 volume add-brick $V0 $H1:/$uuid3
+TEST $cli1 volume add-brick $V0 $H1:$B3/$V0/brick3
+
+#####replace-brick tests
+#FIX-ME: Replace-brick does not work currently in the newly introduced cluster
+#####test framework.
+
+$CLI1 volume stop $V0
+
+UMOUNT_LOOP $B1/$V0
+UMOUNT_LOOP $B2/$V0
+UMOUNT_LOOP $B3/$V0
+
+rm -f $B1/brick1
+rm -f $B2/brick2
+rm -f $B3/brick3
+
+
+cleanup;
diff --git a/tests/bugs/glusterd/bug-949930.t b/tests/bugs/glusterd/bug-949930.t
new file mode 100644
index 00000000000..9a6d38fa37f
--- /dev/null
+++ b/tests/bugs/glusterd/bug-949930.t
@@ -0,0 +1,29 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+V1=patchy2
+
+cleanup;
+
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+TEST $CLI volume set $V0 nfs.disable off
+TEST $CLI volume start $V0;
+
+TEST $CLI volume create $V1 $H0:$B0/${V1}{1,2};
+TEST $CLI volume set $V1 nfs.disable off
+TEST $CLI volume start $V1;
+
+TEST ! $CLI volume set $V0 performance.nfs.read-ahead blah
+EXPECT '' volume_option $V0 performance.nfs.read-ahead
+
+TEST $CLI volume set $V0 performance.nfs.read-ahead on
+EXPECT "on" volume_option $V0 performance.nfs.read-ahead
+
+EXPECT '' volume_option $V1 performance.nfs.read-ahead
+
+cleanup;
+
diff --git a/tests/bugs/glusterd/check_elastic_server.t b/tests/bugs/glusterd/check_elastic_server.t
new file mode 100644
index 00000000000..41d2140aa2b
--- /dev/null
+++ b/tests/bugs/glusterd/check_elastic_server.t
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../cluster.rc
+. $(dirname $0)/../../volume.rc
+
+function cluster_rebalance_status {
+ local vol=$1
+ $CLI_2 volume status | grep -iw "Rebalance" -A 5 | grep "Status" | sed 's/.*: //'
+}
+
+cleanup;
+TEST launch_cluster 4;
+TEST $CLI_1 peer probe $H2;
+TEST $CLI_1 peer probe $H3;
+TEST $CLI_1 peer probe $H4;
+
+EXPECT_WITHIN $PROBE_TIMEOUT 3 peer_count
+
+TEST $CLI_1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0
+EXPECT 'Created' cluster_volinfo_field 1 $V0 'Status';
+
+$CLI_1 volume start $V0
+EXPECT 'Started' cluster_volinfo_field 1 $V0 'Status';
+
+#Mount invalid volume
+TEST ! glusterfs -s $H1 --volfile-id=$V0_NA $M0;
+
+#Mount FUSE
+TEST glusterfs -s $H1 --volfile-id=$V0 $M0;
+
+TEST mkdir $M0/dir{1..4};
+TEST touch $M0/dir{1..4}/files{1..4};
+
+TEST $CLI_1 volume remove-brick $V0 $H1:$B1/$V0 start
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" cluster_remove_brick_status_completed_field "$V0 $H1:$B1/$V0"
+
+TEST $CLI_1 volume remove-brick $V0 $H1:$B1/$V0 commit
+
+kill_glusterd 1
+
+total_files=`find $M0 -name "files*" | wc -l`
+TEST [ $total_files -eq 16 ];
+
+TEST $CLI_2 volume add-brick $V0 $H3:$B3/$V0
+
+TEST $CLI_2 volume rebalance $V0 start
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" cluster_rebalance_status $V0
+
+total_files=`find $M0 -name "files*" | wc -l`
+TEST [ $total_files -eq 16 ];
+
+TEST $CLI_2 volume add-brick $V0 $H4:$B4/$V0
+
+TEST $CLI_2 volume rebalance $V0 start
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" cluster_rebalance_status $V0
+kill_glusterd 2
+
+total_files=`find $M0 -name "files*" | wc -l`
+TEST [ $total_files -eq 16 ];
+
+cleanup;
+
diff --git a/tests/bugs/glusterd/daemon-log-level-option.t b/tests/bugs/glusterd/daemon-log-level-option.t
new file mode 100644
index 00000000000..66e55e3d758
--- /dev/null
+++ b/tests/bugs/glusterd/daemon-log-level-option.t
@@ -0,0 +1,93 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+function Info_messages_count() {
+ local shd_log=$1
+ cat $shd_log | grep " I " | wc -l
+}
+
+function Warning_messages_count() {
+ local shd_log=$1
+ cat $shd_log | grep " W " | wc -l
+}
+
+function Debug_messages_count() {
+ local shd_log=$1
+ cat $shd_log | grep " D " | wc -l
+}
+
+function Trace_messages_count() {
+ local shd_log=$1
+ cat $shd_log | grep " T " | wc -l
+}
+
+cleanup;
+
+# Basic checks
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+# set cluster.daemon-log-level option to DEBUG
+TEST $CLI volume set all cluster.daemon-log-level DEBUG
+
+#Create a 3X2 distributed-replicate volume
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1..6};
+TEST $CLI volume start $V0
+
+# log should not have any trace messages
+EXPECT 0 Trace_messages_count "/var/log/glusterfs/glustershd.log"
+
+# stop the volume and remove glustershd log
+TEST $CLI volume stop $V0
+rm -f /var/log/glusterfs/glustershd.log
+
+# set cluster.daemon-log-level option to INFO and start the volume
+TEST $CLI volume set all cluster.daemon-log-level INFO
+TEST $CLI volume start $V0
+
+# log should not have any debug messages
+EXPECT 0 Debug_messages_count "/var/log/glusterfs/glustershd.log"
+
+# log should not have any trace messages
+EXPECT 0 Trace_messages_count "/var/log/glusterfs/glustershd.log"
+
+# stop the volume and remove glustershd log
+TEST $CLI volume stop $V0
+rm -f /var/log/glusterfs/glustershd.log
+
+# set cluster.daemon-log-level option to WARNING and start the volume
+TEST $CLI volume set all cluster.daemon-log-level WARNING
+TEST $CLI volume start $V0
+
+# log should not have any info messages
+EXPECT 0 Info_messages_count "/var/log/glusterfs/glustershd.log"
+
+# log should not have any debug messages
+EXPECT 0 Debug_messages_count "/var/log/glusterfs/glustershd.log"
+
+# log should not have any trace messages
+EXPECT 0 Trace_messages_count "/var/log/glusterfs/glustershd.log"
+
+# stop the volume and remove glustershd log
+TEST $CLI volume stop $V0
+rm -f /var/log/glusterfs/glustershd.log
+
+# set cluster.daemon-log-level option to ERROR and start the volume
+TEST $CLI volume set all cluster.daemon-log-level ERROR
+TEST $CLI volume start $V0
+
+# log should not have any info messages
+EXPECT 0 Info_messages_count "/var/log/glusterfs/glustershd.log"
+
+# log should not have any warning messages
+EXPECT 0 Warning_messages_count "/var/log/glusterfs/glustershd.log"
+
+# log should not have any debug messages
+EXPECT 0 Debug_messages_count "/var/log/glusterfs/glustershd.log"
+
+# log should not have any trace messages
+EXPECT 0 Trace_messages_count "/var/log/glusterfs/glustershd.log"
+
+cleanup
diff --git a/tests/bugs/glusterd/df-results-post-replace-brick-operations.t b/tests/bugs/glusterd/df-results-post-replace-brick-operations.t
new file mode 100644
index 00000000000..04f75889388
--- /dev/null
+++ b/tests/bugs/glusterd/df-results-post-replace-brick-operations.t
@@ -0,0 +1,61 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+TEST glusterd
+
+#Create brick partitions
+TEST truncate -s 100M $B0/brick1
+TEST truncate -s 100M $B0/brick2
+TEST truncate -s 100M $B0/brick3
+TEST truncate -s 100M $B0/brick4
+TEST truncate -s 100M $B0/brick5
+
+LO1=`SETUP_LOOP $B0/brick1`
+TEST [ $? -eq 0 ]
+TEST MKFS_LOOP $LO1
+
+LO2=`SETUP_LOOP $B0/brick2`
+TEST [ $? -eq 0 ]
+TEST MKFS_LOOP $LO2
+
+LO3=`SETUP_LOOP $B0/brick3`
+TEST [ $? -eq 0 ]
+TEST MKFS_LOOP $LO3
+
+LO4=`SETUP_LOOP $B0/brick4`
+TEST [ $? -eq 0 ]
+TEST MKFS_LOOP $LO4
+
+LO5=`SETUP_LOOP $B0/brick5`
+TEST [ $? -eq 0 ]
+TEST MKFS_LOOP $LO5
+
+TEST mkdir -p $B0/${V0}1 $B0/${V0}2 $B0/${V0}3 $B0/${V0}4 $B0/${V0}5
+TEST MOUNT_LOOP $LO1 $B0/${V0}1
+TEST MOUNT_LOOP $LO2 $B0/${V0}2
+TEST MOUNT_LOOP $LO3 $B0/${V0}3
+TEST MOUNT_LOOP $LO4 $B0/${V0}4
+TEST MOUNT_LOOP $LO5 $B0/${V0}5
+
+# create a subdirectory in mount point and use it for volume creation
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}1/brick1 $H0:$B0/${V0}2/brick1 $H0:$B0/${V0}3/brick1
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "3" online_brick_count
+
+# mount the volume and check the size at mount point
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0
+total_space=$(df -P $M0 | tail -1 | awk '{ print $2}')
+
+# perform replace brick operations
+TEST $CLI volume replace-brick $V0 $H0:$B0/${V0}1/brick1 $H0:$B0/${V0}4/brick1 commit force
+TEST $CLI volume replace-brick $V0 $H0:$B0/${V0}2/brick1 $H0:$B0/${V0}5/brick1 commit force
+
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+# check for the size at mount point, it should be same as previous
+total_space_new=$(df -P $M0 | tail -1 | awk '{ print $2}')
+TEST [ $total_space -eq $total_space_new ]
diff --git a/tests/bugs/glusterd/mgmt-handshake-and-volume-sync-post-glusterd-restart.t b/tests/bugs/glusterd/mgmt-handshake-and-volume-sync-post-glusterd-restart.t
new file mode 100644
index 00000000000..8001359e6b3
--- /dev/null
+++ b/tests/bugs/glusterd/mgmt-handshake-and-volume-sync-post-glusterd-restart.t
@@ -0,0 +1,71 @@
+#! /bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../cluster.rc
+
+function check_peers {
+eval \$CLI_$1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
+}
+
+cleanup
+
+TEST launch_cluster 3
+
+TEST $CLI_1 peer probe $H2
+
+#bug-1109741 - validate mgmt handshake
+
+TEST ! $CLI_3 peer probe $H1
+
+GD1_WD=$($CLI_1 system getwd)
+OP_VERS_ORIG=$(grep 'operating-version' ${GD1_WD}/glusterd.info | cut -d '=' -f 2)
+
+TEST $CLI_3 system uuid get # Needed for glusterd.info to be created
+
+GD3_WD=$($CLI_3 system getwd)
+TEST sed -rnie "'s/(operating-version=)\w+/\130600/gip'" ${GD3_WD}/glusterd.info
+
+TEST kill_glusterd 3
+TEST start_glusterd 3
+
+TEST ! $CLI_3 peer probe $H1
+
+OP_VERS_NEW=$(grep 'operating-version' ${GD1_WD}/glusterd.info | cut -d '=' -f 2)
+TEST [[ $OP_VERS_ORIG == $OP_VERS_NEW ]]
+
+#bug-948686 - volume sync after bringing up the killed node
+
+TEST $CLI_1 peer probe $H3
+EXPECT_WITHIN $PROBE_TIMEOUT 2 check_peers 1
+EXPECT_WITHIN $PROBE_TIMEOUT 2 check_peers 2
+EXPECT_WITHIN $PROBE_TIMEOUT 2 check_peers 3
+
+TEST $CLI_1 volume create $V0 replica 2 $H1:$B1/$V0 $H1:$B1/${V0}_1 $H2:$B2/$V0 $H3:$B3/$V0
+TEST $CLI_1 volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Started' volinfo_field_1 $V0 'Status'
+TEST glusterfs --volfile-server=$H1 --volfile-id=$V0 $M0
+
+#kill a node
+TEST kill_node 3
+EXPECT_WITHIN $PROBE_TIMEOUT 1 check_peers 1
+EXPECT_WITHIN $PROBE_TIMEOUT 1 check_peers 2
+
+#modify volume config to see change in volume-sync
+TEST $CLI_1 volume set $V0 write-behind off
+#add some files to the volume to see effect of volume-heal cmd
+TEST touch $M0/{1..100};
+TEST $CLI_1 volume stop $V0;
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT 'Stopped' volinfo_field_1 $V0 'Status'
+
+TEST $glusterd_3;
+EXPECT_WITHIN $PROBE_TIMEOUT 2 check_peers 1
+EXPECT_WITHIN $PROBE_TIMEOUT 2 check_peers 2
+EXPECT_WITHIN $PROBE_TIMEOUT 2 check_peers 3
+
+sleep 5
+TEST $CLI_3 volume start $V0;
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Started' volinfo_field_1 $V0 'Status'
+TEST $CLI_2 volume stop $V0;
+TEST $CLI_2 volume delete $V0;
+
+cleanup
diff --git a/tests/bugs/glusterd/optimized-basic-testcases-in-cluster.t b/tests/bugs/glusterd/optimized-basic-testcases-in-cluster.t
new file mode 100644
index 00000000000..99272e14245
--- /dev/null
+++ b/tests/bugs/glusterd/optimized-basic-testcases-in-cluster.t
@@ -0,0 +1,115 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../cluster.rc
+. $(dirname $0)/../../volume.rc
+
+function peer_count {
+eval \$CLI_$1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
+}
+
+cleanup;
+
+#bug-1454418 - Setting Port number in specific range
+sysctl net.ipv4.ip_local_reserved_ports="24007-24008,32765-32768,49152-49156"
+
+TEST launch_cluster 4;
+
+#bug-1223213
+
+# Fool the cluster to operate with 3.5 version even though binary's op-version
+# is > 3.5. This is to ensure 3.5 code path is hit to test that volume status
+# works when a node is upgraded from 3.5 to 3.7 or higher as mgmt_v3 lock is
+# been introduced in 3.6 version and onwards
+
+GD1_WD=$($CLI_1 system getwd)
+$CLI_1 system uuid get
+Old_op_version=$(cat ${GD1_WD}/glusterd.info | grep operating-version | cut -d '=' -f 2)
+
+TEST sed -rnie "'s/(operating-version=)\w+/\130500/gip'" ${GD1_WD}/glusterd.info
+
+TEST kill_glusterd 1
+TEST start_glusterd 1
+
+TEST $CLI_1 peer probe $H2;
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count 1
+
+TEST `sed -i "s/"30500"/${Old_op_version}/g" ${GD1_WD}/glusterd.info`
+
+TEST kill_glusterd 1
+TEST start_glusterd 1
+
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count 1
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count 2
+
+#bug-1454418
+sysctl net.ipv4.ip_local_reserved_ports="
+"
+
+TEST $CLI_1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0
+TEST $CLI_1 volume start $V0
+
+#bug-888752 - volume status --xml from peer in the cluster
+
+TEST $CLI_1 volume status $V0 $H2:$B2/$V0 --xml
+
+TEST $CLI_1 volume stop $V0
+TEST $CLI_1 volume delete $V0
+
+TEST $CLI_1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0
+TEST $CLI_1 volume create $V1 $H1:$B1/$V1
+
+# bug - 1635820
+# rebooting a node which doen't host bricks for any one volume
+# peer should not go into rejected state
+TEST kill_glusterd 2
+TEST start_glusterd 2
+
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count 1
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count 2
+
+TEST $CLI_1 volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Started' volinfo_field_1 $V0 'Status'
+
+TEST $CLI_1 volume start $V1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Started' volinfo_field_1 $V1 'Status'
+
+TEST $CLI_1 peer probe $H3;
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count 1
+
+TEST $CLI_1 peer probe $H4;
+EXPECT_WITHIN $PROBE_TIMEOUT 3 peer_count 1
+
+#bug-1173414 - validate mgmt-v3-remote-lock-failure
+
+for i in {1..20}
+do
+$CLI_1 volume set $V0 diagnostics.client-log-level DEBUG &
+$CLI_1 volume set $V1 barrier on
+$CLI_2 volume set $V0 diagnostics.client-log-level DEBUG &
+$CLI_2 volume set $V1 barrier on
+done
+
+EXPECT_WITHIN $PROBE_TIMEOUT 3 peer_count 1
+TEST $CLI_1 volume status
+TEST $CLI_2 volume status
+
+#bug-1293414 - validate peer detach
+
+# peers hosting bricks cannot be detached
+TEST ! $CLI_4 peer detach $H1
+EXPECT_WITHIN $PROBE_TIMEOUT 3 peer_count 1
+
+# peer not hosting bricks should be detachable
+TEST $CLI_4 peer detach $H3
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count 1
+
+#bug-1344407 - deleting a volume when peer is down should fail
+
+#volume should be stopped before deletion
+TEST $CLI_1 volume stop $V0
+
+TEST kill_glusterd 2
+TEST ! $CLI_1 volume delete $V0
+
+cleanup
diff --git a/tests/bugs/glusterd/optimized-basic-testcases.t b/tests/bugs/glusterd/optimized-basic-testcases.t
new file mode 100644
index 00000000000..b89ca22415e
--- /dev/null
+++ b/tests/bugs/glusterd/optimized-basic-testcases.t
@@ -0,0 +1,305 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+
+function get_opret_value () {
+ local VOL=$1
+ $CLI volume info $VOL --xml | sed -ne 's/.*<opRet>\([-0-9]*\)<\/opRet>/\1/p'
+}
+
+function check_brick()
+{
+ vol=$1;
+ num=$2
+ $CLI volume info $V0 | grep "Brick$num" | awk '{print $2}';
+}
+
+function brick_count()
+{
+ local vol=$1;
+
+ $CLI volume info $vol | egrep "^Brick[0-9]+: " | wc -l;
+}
+
+function get_brick_host_uuid()
+{
+ local vol=$1;
+ local uuid_regex='[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}'
+ local host_uuid_list=$($CLI volume info $vol --xml | grep "brick.uuid" | grep -o -E "$uuid_regex");
+
+ echo $host_uuid_list | awk '{print $1}'
+}
+
+function generate_statedump_and_check_for_glusterd_info {
+ pid=`pidof glusterd`
+ #remove old stale statedumps
+ cleanup_statedump $pid
+ kill -USR1 $pid
+ #Wait till the statedump is generated
+ sleep 1
+ fname=$(ls $statedumpdir | grep -E "\.$pid\.dump\.")
+ cat $statedumpdir/$fname | grep "xlator.glusterd.priv" | wc -l
+}
+
+cleanup;
+
+TEST glusterd;
+TEST pidof glusterd;
+
+#bug-1238135-lazy-daemon-initialization-on-demand
+
+GDWD=$($CLI system getwd)
+
+# glusterd.info file will be created on either first peer probe or volume
+# creation, hence we expect file to be not present in this case
+TEST ! -e $GDWD/glusterd.info
+
+#bug-913487 - setting volume options before creation of volume should fail
+
+TEST ! $CLI volume set $V0 performance.open-behind off;
+TEST pidof glusterd;
+
+#bug-1433578 - glusterd should not crash after probing a invalid peer
+
+TEST ! $CLI peer probe invalid-peer
+TEST pidof glusterd;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+#bug-1786478 - default volume option after volume reset
+addr_family=`volinfo_field $V0 'transport.address-family'`
+TEST $CLI volume reset $V0
+EXPECT $addr_family volinfo_field $V0 'transport.address-family'
+
+#bug-955588 - uuid validation
+
+uuid=`grep UUID $GLUSTERD_WORKDIR/glusterd.info | cut -f2 -d=`
+EXPECT $uuid get_brick_host_uuid $V0
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+#bug-958790 - set options from file
+
+touch $GLUSTERD_WORKDIR/groups/test
+echo "read-ahead=off" > $GLUSTERD_WORKDIR/groups/test
+echo "open-behind=off" >> $GLUSTERD_WORKDIR/groups/test
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+TEST $CLI volume set $V0 group test
+EXPECT "off" volume_option $V0 performance.read-ahead
+EXPECT "off" volume_option $V0 performance.open-behind
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+#bug-1321836 - validate opret value for non existing volume
+
+EXPECT 0 get_opret_value $V0
+EXPECT -1 get_opret_value "novol"
+
+EXPECT '2' brick_count $V0
+
+#bug-862834 - validate brick status
+
+EXPECT "$H0:$B0/${V0}1" check_brick $V0 '1';
+EXPECT "$H0:$B0/${V0}2" check_brick $V0 '2';
+
+TEST ! $CLI volume create $V1 $H0:$B0/${V1}0 $H0:$B0/${V0}1;
+
+#bug-1482344 - setting volume-option-at-cluster-level should not result in glusterd crash
+
+TEST ! $CLI volume set all transport.listen-backlog 128
+
+# Check the volume info output, if glusterd would have crashed then this command
+# will fail
+TEST $CLI volume info $V0;
+
+#bug-1002556 and bug-1199451 - command should retrieve current op-version of the node
+TEST $CLI volume get all cluster.op-version
+
+#bug-1315186 - reject-lowering-down-op-version
+
+OP_VERS_ORIG=$(grep 'operating-version' ${GDWD}/glusterd.info | cut -d '=' -f 2)
+OP_VERS_NEW=`expr $OP_VERS_ORIG-1`
+
+TEST ! $CLI volume set all $V0 cluster.op-version $OP_VERS_NEW
+
+#bug-1022055 - validate log rotate command
+
+TEST ! $CLI volume log rotate $V0;
+TEST $CLI volume log $V0 rotate;
+
+#bug-1092841 - validating barrier enable/disable
+
+TEST $CLI volume barrier $V0 enable;
+TEST ! $CLI volume barrier $V0 enable;
+
+TEST $CLI volume barrier $V0 disable;
+TEST ! $CLI volume barrier $V0 disable;
+
+#bug-1095097 - validate volume profile command
+
+TEST $CLI volume profile $V0 start
+TEST $CLI volume profile $V0 info
+
+#bug-839595 - validate server-quorum options
+
+TEST $CLI volume set $V0 cluster.server-quorum-type server
+EXPECT "server" volume_option $V0 cluster.server-quorum-type
+TEST $CLI volume set $V0 cluster.server-quorum-type none
+EXPECT "none" volume_option $V0 cluster.server-quorum-type
+TEST $CLI volume reset $V0 cluster.server-quorum-type
+TEST ! $CLI volume set $V0 cluster.server-quorum-type abc
+TEST ! $CLI volume set all cluster.server-quorum-type none
+TEST ! $CLI volume set $V0 cluster.server-quorum-ratio 100
+
+TEST ! $CLI volume set all cluster.server-quorum-ratio abc
+TEST ! $CLI volume set all cluster.server-quorum-ratio -1
+TEST ! $CLI volume set all cluster.server-quorum-ratio 100.0000005
+TEST $CLI volume set all cluster.server-quorum-ratio 0
+EXPECT "0" volume_option $V0 cluster.server-quorum-ratio
+TEST $CLI volume set all cluster.server-quorum-ratio 100
+EXPECT "100" volume_option $V0 cluster.server-quorum-ratio
+TEST $CLI volume set all cluster.server-quorum-ratio 0.0000005
+EXPECT "0.0000005" volume_option $V0 cluster.server-quorum-ratio
+TEST $CLI volume set all cluster.server-quorum-ratio 100%
+EXPECT "100%" volume_option $V0 cluster.server-quorum-ratio
+
+#bug-1265479 - validate-distributed-volume-options
+
+#Setting data-self-heal option on for distribute volume
+TEST ! $CLI volume set $V0 data-self-heal on
+EXPECT '' volinfo_field $V0 'cluster.data-self-heal';
+TEST ! $CLI volume set $V0 cluster.data-self-heal on
+EXPECT '' volinfo_field $V0 'cluster.data-self-heal';
+
+#Setting metadata-self-heal option on for distribute volume
+TEST ! $CLI volume set $V0 metadata-self-heal on
+EXPECT '' volinfo_field $V0 'cluster.metadata-self-heal';
+TEST ! $CLI volume set $V0 cluster.metadata-self-heal on
+EXPECT '' volinfo_field $V0 'cluster.metadata-self-heal';
+
+#Setting entry-self-heal option on for distribute volume
+TEST ! $CLI volume set $V0 entry-self-heal on
+EXPECT '' volinfo_field $V0 'cluster.entrydata-self-heal';
+TEST ! $CLI volume set $V0 cluster.entry-self-heal on
+EXPECT '' volinfo_field $V0 'cluster.entrydata-self-heal';
+
+#bug-1163108 - validate min-free-disk-option
+
+## Setting invalid value for option cluster.min-free-disk should fail
+TEST ! $CLI volume set $V0 min-free-disk ""
+TEST ! $CLI volume set $V0 min-free-disk 143.!/12
+TEST ! $CLI volume set $V0 min-free-disk 123%
+TEST ! $CLI volume set $V0 min-free-disk 194.34%
+
+## Setting fractional value as a size (unit is byte) for option
+## cluster.min-free-disk should fail
+TEST ! $CLI volume set $V0 min-free-disk 199.051
+TEST ! $CLI volume set $V0 min-free-disk 111.999
+
+## Setting valid value for option cluster.min-free-disk should pass
+TEST $CLI volume set $V0 min-free-disk 12%
+TEST $CLI volume set $V0 min-free-disk 56.7%
+TEST $CLI volume set $V0 min-free-disk 120
+TEST $CLI volume set $V0 min-free-disk 369.0000
+
+#bug-1179175-uss-option-validation
+
+## Set features.uss option with non-boolean value. These non-boolean value
+## for features.uss option should fail.
+TEST ! $CLI volume set $V0 features.uss abcd
+TEST ! $CLI volume set $V0 features.uss #$#$
+TEST ! $CLI volume set $V0 features.uss 2324
+
+## Setting other options with valid value. These options should succeed.
+TEST $CLI volume set $V0 barrier enable
+TEST $CLI volume set $V0 ping-timeout 60
+
+## Set features.uss option with valid boolean value. It should succeed.
+TEST $CLI volume set $V0 features.uss enable
+TEST $CLI volume set $V0 features.uss disable
+
+
+## Setting other options with valid value. These options should succeed.
+TEST $CLI volume set $V0 barrier enable
+TEST $CLI volume set $V0 ping-timeout 60
+
+#bug-1209329 - daemon-svcs-on-reset-volume
+
+##enable the bitrot and verify bitd is running or not
+TEST $CLI volume bitrot $V0 enable
+EXPECT 'on' volinfo_field $V0 'features.bitrot'
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count
+
+##Do reset force which set the bitrot options to default
+TEST $CLI volume reset $V0 force;
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" get_bitd_count
+
+##enable the uss option and verify snapd is running or not
+TEST $CLI volume set $V0 features.uss on
+EXPECT 'on' volinfo_field $V0 'features.uss'
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_snapd_count
+
+##Do reset force which set the uss options to default
+TEST $CLI volume reset $V0 force;
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" get_snapd_count
+
+##verify initial nfs disabled by default
+EXPECT "0" get_nfs_count
+
+##enable nfs and verify
+TEST $CLI volume set $V0 nfs.disable off
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available
+EXPECT "1" get_nfs_count
+
+##Do reset force which set the nfs.option to default
+TEST $CLI volume reset $V0 force;
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" get_nfs_count
+
+##enable the uss option and verify snapd is running or not
+TEST $CLI volume set $V0 features.uss on
+EXPECT 'on' volinfo_field $V0 'features.uss'
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_snapd_count
+
+##Disable the uss option using set command and verify snapd
+TEST $CLI volume set $V0 features.uss off
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" get_snapd_count
+
+##enable nfs.disable and verify
+TEST $CLI volume set $V0 nfs.disable on
+EXPECT 'on' volinfo_field $V0 'nfs.disable'
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" get_nfs_count
+
+## disable nfs.disable option using set command
+TEST $CLI volume set $V0 nfs.disable off
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_nfs_count
+
+TEST $CLI volume info;
+TEST $CLI volume create $V1 $H0:$B0/${V1}1
+TEST $CLI volume start $V1
+pkill glusterd;
+pkill glusterfsd;
+TEST glusterd
+TEST $CLI volume status $V1
+
+#bug-853601 - Avoid using /var/lib/glusterd as a brick
+TEST ! $CLI volume create "test" $H0:/var/lib/glusterd
+TEST ! $CLI volume create "test" $H0:/var/lib/glusterd force
+TEST ! $CLI volume create "test" $H0:/var/lib/glusterd/abc
+TEST ! $CLI volume create "test" $H0:/var/lib/glusterd/abc force
+mkdir -p /xyz/var/lib/glusterd/abc
+
+#bug 1716812 - volfile should be created with transport type both
+TEST $CLI volume create "test" transport tcp,rdma $H0:/xyz/var/lib/glusterd/abc
+EXPECT 'Created' volinfo_field "test" 'Status';
+
+#While taking a statedump, there is a TRY_LOCK on call_frame, which might may cause
+#failure. So Adding a EXPECT_WITHIN
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" generate_statedump_and_check_for_glusterd_info
+
+cleanup_statedump `pidof glusterd`
+cleanup
diff --git a/tests/bugs/glusterd/quorum-validation.t b/tests/bugs/glusterd/quorum-validation.t
new file mode 100644
index 00000000000..3cc3351b43b
--- /dev/null
+++ b/tests/bugs/glusterd/quorum-validation.t
@@ -0,0 +1,122 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+cleanup;
+
+TEST launch_cluster 2
+
+TEST $CLI_1 peer probe $H2;
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+TEST $CLI_1 volume create $V0 $H1:$B1/${V0}0 $H2:$B2/${V0}1
+TEST $CLI_1 volume set $V0 cluster.server-quorum-type server
+TEST $CLI_1 volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H1 $B1/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H2 $B2/${V0}1
+
+#bug-1177132 - sync server quorum options when a node is brought up
+TEST $CLI_1 volume set all cluster.server-quorum-ratio 52
+
+#Bring down 2nd glusterd
+TEST kill_glusterd 2
+EXPECT_WITHIN $PROBE_TIMEOUT 0 peer_count
+
+#bug-1104642 - sync server quorum options when a node is brought up
+#set the volume all options from the 1st glusterd
+TEST $CLI_1 volume set all cluster.server-quorum-ratio 80
+
+# Now quorum is not meet. Add-brick, Remove-brick, volume-set command
+#(Command based on syncop framework)should fail
+TEST ! $CLI_1 volume add-brick $V0 $H1:$B1/${V0}2
+TEST ! $CLI_1 volume remove-brick $V0 $H1:$B1/${V0}0 start
+TEST ! $CLI_1 volume set $V0 barrier enable
+
+#quorum is not met, rebalance/profile start should fail
+TEST ! $CLI_1 volume rebalance $V0 start
+TEST ! $CLI_1 volume profile $V0 start
+
+#bug-1690753 - Volume stop when quorum not met is successful
+TEST ! $CLI_1 volume stop $V0
+
+#Bring back the 2nd glusterd
+TEST $glusterd_2
+
+#verify whether the value has been synced
+EXPECT_WITHIN $PROBE_TIMEOUT "80" volinfo_field_1 all cluster.server-quorum-ratio
+EXPECT_WITHIN $PROBE_TIMEOUT '1' peer_count
+EXPECT_WITHIN $PROBE_TIMEOUT "80" volinfo_field_2 all cluster.server-quorum-ratio
+
+# Now quorum is meet.
+# Add-brick, Remove-brick, volume-set command should success
+TEST $CLI_1 volume add-brick $V0 $H2:$B2/${V0}2
+TEST $CLI_1 volume remove-brick $V0 $H2:$B2/${V0}2 start
+TEST $CLI_1 volume set $V0 barrier enable
+TEST $CLI_1 volume remove-brick $V0 $H2:$B2/${V0}2 stop
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H1 $B1/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H2 $B2/${V0}1
+
+## Stop the volume
+TEST $CLI_1 volume stop $V0
+
+## Bring down 2nd glusterd
+TEST kill_glusterd 2
+
+## Now quorum is not meet. Starting volume on 1st node should not success
+TEST ! $CLI_1 volume start $V0
+
+## Bring back 2nd glusterd
+TEST $glusterd_2
+
+# After 2nd glusterd come back, there will be 2 nodes in a cluster
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count;
+
+## Now quorum is meet. Starting volume on 1st node should be success.
+TEST $CLI_1 volume start $V0
+
+# Now re-execute the same profile command and this time it should succeed
+TEST $CLI_1 volume profile $V0 start
+
+#bug-1352277
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H1 $B1/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H2 $B2/${V0}1
+
+TEST $CLI_1 volume set $V0 cluster.server-quorum-type none
+
+# Bring down all the gluster processes
+TEST killall_gluster
+
+#bring back 1st glusterd and check whether the brick process comes back
+TEST $glusterd_1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H1 $B1/${V0}0
+
+#enabling quorum should bring down the brick
+TEST $CLI_1 volume set $V0 cluster.server-quorum-type server
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" cluster_brick_up_status 1 $V0 $H1 $B1/${V0}0
+
+TEST $glusterd_2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H1 $B1/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H2 $B2/${V0}1
+
+#bug-1367478 - brick processes should not be up when quorum is not met
+TEST $CLI_1 volume create $V1 $H1:$B1/${V1}1 $H2:$B2/${V1}2
+TEST $CLI_1 volume start $V1
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V1 $H1 $B1/${V1}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V1 $H2 $B2/${V1}2
+
+# Restart 2nd glusterd
+TEST kill_glusterd 2
+TEST $glusterd_2
+
+# Check if all bricks are up
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H1 $B1/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H2 $B2/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V1 $H1 $B1/${V1}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V1 $H2 $B2/${V1}2
+
+cleanup
diff --git a/tests/bugs/glusterd/rebalance-in-cluster.t b/tests/bugs/glusterd/rebalance-in-cluster.t
new file mode 100644
index 00000000000..469ec6cd48e
--- /dev/null
+++ b/tests/bugs/glusterd/rebalance-in-cluster.t
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../cluster.rc
+. $(dirname $0)/../../volume.rc
+
+function rebalance_status_field_1 {
+ $CLI_1 volume rebalance $1 status | awk '{print $7}' | sed -n 3p
+}
+
+cleanup;
+TEST launch_cluster 2;
+TEST $CLI_1 peer probe $H2;
+
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+$CLI_1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0
+EXPECT 'Created' cluster_volinfo_field 1 $V0 'Status';
+
+$CLI_1 volume start $V0
+EXPECT 'Started' cluster_volinfo_field 1 $V0 'Status';
+
+#bug-1231437
+
+#Mount FUSE
+TEST glusterfs -s $H1 --volfile-id=$V0 $M0;
+
+TEST mkdir $M0/dir{1..4};
+TEST touch $M0/dir{1..4}/files{1..4};
+
+TEST $CLI_1 volume add-brick $V0 $H1:$B1/${V0}1 $H2:$B2/${V0}1
+
+TEST $CLI_1 volume rebalance $V0 start
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" cluster_rebalance_status_field 1 $V0
+
+#bug - 1764119 - rebalance status should display detailed info when any of the node is dowm
+TEST kill_glusterd 2
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field_1 $V0
+
+TEST start_glusterd 2
+#bug-1245142
+
+$CLI_1 volume rebalance $V0 start &
+#kill glusterd2 after requst sent, so that call back is called
+#with rpc->status fail ,so roughly 1sec delay is introduced to get this scenario.
+sleep 1
+kill_glusterd 2
+#check glusterd commands are working after rebalance start command
+EXPECT 'Started' cluster_volinfo_field 1 $V0 'Status';
+
+cleanup;
+
diff --git a/tests/bugs/glusterd/rebalance-operations-in-single-node.t b/tests/bugs/glusterd/rebalance-operations-in-single-node.t
new file mode 100644
index 00000000000..ef85887f440
--- /dev/null
+++ b/tests/bugs/glusterd/rebalance-operations-in-single-node.t
@@ -0,0 +1,131 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../cluster.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+function get_rebalanced_info()
+{
+ local rebal_info_key=$2
+ $CLI volume rebalance $1 status | awk '{print $'$rebal_info_key'}' |sed -n 3p| sed 's/ *$//g'
+}
+
+volname="StartMigrationDuringRebalanceTest"
+TEST glusterd
+TEST pidof glusterd;
+
+TEST $CLI volume info;
+TEST $CLI volume create $volname $H0:$B0/${volname}{1..4};
+TEST $CLI volume start $volname;
+
+#bug-1046308 - validate rebalance on a specified volume name
+TEST $CLI volume rebalance $volname start;
+
+#bug-1089668 - validation of rebalance status and remove brick status
+#bug-963541 - after remove brick start rebalance/remove brick start without commiting should fail
+
+TEST ! $CLI volume remove-brick $volname $H0:$B0/${volname}1 status
+
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $volname
+
+TEST $CLI volume remove-brick $volname $H0:$B0/${volname}1 start
+TEST ! $CLI volume rebalance $volname start
+TEST ! $CLI volume rebalance $volname status
+TEST ! $CLI volume remove-brick $volname $H0:$B0/${volname}2 start
+
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" remove_brick_status_completed_field \
+"$volname" "$H0:$B0/${volname}1"
+TEST $CLI volume remove-brick $volname $H0:$B0/${volname}1 commit
+
+TEST $CLI volume rebalance $volname start
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $volname
+TEST $CLI volume rebalance $volname stop
+
+TEST $CLI volume remove-brick $volname $H0:$B0/${volname}2 start
+TEST $CLI volume remove-brick $volname $H0:$B0/${volname}2 stop
+
+#bug-1351021-rebalance-info-post-glusterd-restart
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1..3};
+TEST $CLI volume start $V0;
+
+#Mount volume and create data
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+TEST mkdir $M0/dir{1..10}
+TEST touch $M0/dir{1..10}/file{1..10}
+
+# Add-brick and start rebalance
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}4
+TEST $CLI volume rebalance $V0 start
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $V0
+
+#Rebalance info before glusterd restart
+OLD_REBAL_FILES=$(get_rebalanced_info $V0 2)
+OLD_SIZE=$(get_rebalanced_info $V0 3)
+OLD_SCANNED=$(get_rebalanced_info $V0 4)
+OLD_FAILURES=$(get_rebalanced_info $V0 5)
+OLD_SKIPPED=$(get_rebalanced_info $V0 6)
+
+
+pkill glusterd;
+pkill glusterfsd;
+TEST glusterd
+
+#Rebalance info after glusterd restart
+NEW_REBAL_FILES=$(get_rebalanced_info $V0 2)
+NEW_SIZE=$(get_rebalanced_info $V0 3)
+NEW_SCANNED=$(get_rebalanced_info $V0 4)
+NEW_FAILURES=$(get_rebalanced_info $V0 5)
+NEW_SKIPPED=$(get_rebalanced_info $V0 6)
+#Check rebalance info before and after glusterd restart
+TEST [ $OLD_REBAL_FILES == $NEW_REBAL_FILES ]
+TEST [ $OLD_SIZE == $NEW_SIZE ]
+TEST [ $OLD_SCANNED == $NEW_SCANNED ]
+TEST [ $OLD_FAILURES == $NEW_FAILURES ]
+TEST [ $OLD_SKIPPED == $NEW_SKIPPED ]
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+#bug-1004744 - validation of rebalance fix layout
+
+TEST $CLI volume start $V0 force
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+
+for i in `seq 11 20`;
+do
+ mkdir $M0/dir_$i
+ echo file>$M0/dir_$i/file_$i
+ for j in `seq 1 100`;
+ do
+ mkdir $M0/dir_$i/dir_$j
+ echo file>$M0/dir_$i/dir_$j/file_$j
+ done
+done
+
+#add 2 bricks
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}{5,6};
+
+#perform rebalance fix-layout
+TEST $CLI volume rebalance $V0 fix-layout start
+
+EXPECT_WITHIN $REBALANCE_TIMEOUT "fix-layout completed" fix-layout_status_field $V0;
+
+#bug-1075087 - rebalance post add brick
+TEST mkdir $M0/dir{21..30};
+TEST touch $M0/dir{21..30}/files{1..10};
+
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}{7,8}
+
+TEST $CLI volume rebalance $V0 start force
+EXPECT_WITHIN 180 "completed" rebalance_status_field $V0
+
+TEST pkill gluster
+TEST glusterd
+TEST pidof glusterd
+
+# status should be "completed" immediate after glusterd has respawned.
+EXPECT_WITHIN 20 "completed" rebalance_status_field $V0
+
+cleanup
diff --git a/tests/bugs/glusterd/remove-brick-in-cluster.t b/tests/bugs/glusterd/remove-brick-in-cluster.t
new file mode 100644
index 00000000000..de94220a906
--- /dev/null
+++ b/tests/bugs/glusterd/remove-brick-in-cluster.t
@@ -0,0 +1,60 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../cluster.rc
+
+cleanup;
+
+TEST launch_cluster 2;
+
+#bug-1047955 - remove brick from new peer in cluster
+TEST $CLI_1 volume create $V0 replica 2 $H1:$B1/${V0}{1,2,3,4}
+TEST $CLI_1 volume start $V0;
+
+TEST $CLI_1 peer probe $H2;
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+TEST $CLI_2 volume remove-brick $V0 $H1:$B1/${V0}{3,4} start;
+TEST $CLI_2 volume info
+
+#bug-964059 - volume status post remove brick start
+TEST $CLI_1 volume create $V1 $H1:$B1/${V1}0 $H2:$B2/${V1}1
+TEST $CLI_1 volume start $V1
+TEST $CLI_1 volume remove-brick $V1 $H2:$B2/${V1}1 start
+TEST $CLI_1 volume status
+
+TEST $CLI_1 volume stop $V0
+TEST $CLI_1 volume delete $V0
+
+#bug-1230121 - decrease replica count by remove-brick and increse by add-brick
+## Creating a 2x3 replicate volume
+TEST $CLI_1 volume create $V0 replica 3 $H1:$B1/brick1 $H2:$B2/brick2 \
+ $H1:$B1/brick3 $H2:$B2/brick4 \
+ $H1:$B1/brick5 $H2:$B2/brick6
+
+## Start the volume
+TEST $CLI_1 volume start $V0
+
+## Shrinking volume replica 2x3 to 2x2 by performing remove-brick operation.
+TEST $CLI_1 volume remove-brick $V0 replica 2 $H1:$B1/brick1 $H2:$B2/brick6 force
+
+## Shrinking volume replica 2x2 to 1x2 by performing remove-brick operation
+TEST $CLI_1 volume remove-brick $V0 replica 2 $H1:$B1/brick3 $H2:$B2/brick2 force
+
+## Shrinking volume replica from 1x2 to 1x1 by performing remove-brick operation
+TEST $CLI_1 volume remove-brick $V0 replica 1 $H1:$B1/brick5 force
+
+
+### Expanding volume replica by performing add-brick operation.
+
+## Expend volume replica from 1x1 to 1x2 by performing add-brick operation
+TEST $CLI_1 volume add-brick $V0 replica 2 $H1:$B1/brick5 force
+
+## Expend volume replica from 1x2 to 2x2 by performing add-brick operation
+TEST $CLI_1 volume add-brick $V0 replica 2 $H1:$B1/brick3 $H2:$B2/brick2 force
+
+## Expend volume replica from 2x2 to 2x3 by performing add-brick operation
+TEST $CLI_1 volume add-brick $V0 replica 3 $H1:$B1/brick1 $H2:$B2/brick6 force
+
+cleanup
+
diff --git a/tests/bugs/glusterd/remove-brick-testcases.t b/tests/bugs/glusterd/remove-brick-testcases.t
new file mode 100644
index 00000000000..2f982d5266f
--- /dev/null
+++ b/tests/bugs/glusterd/remove-brick-testcases.t
@@ -0,0 +1,119 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+function check_peers {
+ $CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
+}
+
+function brick_count()
+{
+ local vol=$1;
+
+ $CLI volume info $vol | egrep "^Brick[0-9]+: " | wc -l;
+}
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1..5}
+TEST $CLI volume start $V0
+
+#bug-1225716 - remove-brick on a brick which is down should fail
+#kill a brick process
+kill_brick $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" brick_up_status $V0 $H0 $B0/${V0}1
+
+#remove-brick start should fail as the brick is down
+TEST ! $CLI volume remove-brick $V0 $H0:$B0/${V0}1 start
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+
+#remove-brick start should succeed as the brick is up
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}1 start
+
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" remove_brick_status_completed_field "$V0 $H0:$B0/${V0}1"
+
+#kill a brick process
+kill_brick $V0 $H0 $B0/${V0}1
+
+#remove-brick commit should pass even if the brick is down
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}1 commit
+
+#bug-1121584 - brick-existing-validation-for-remove-brick-status-stop
+## Start remove-brick operation on the volume
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}2 start
+
+## By giving non existing brick for remove-brick status/stop command should
+## give error.
+TEST ! $CLI volume remove-brick $V0 $H0:$B0/ABCD status
+TEST ! $CLI volume remove-brick $V0 $H0:$B0/ABCD stop
+
+## By giving brick which is part of volume for remove-brick status/stop command
+## should print statistics of remove-brick operation or stop remove-brick
+## operation.
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}2 status
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}2 stop
+
+#bug-878004 - validate remove brick force
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}2 force;
+EXPECT '3' brick_count $V0
+
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}3 force;
+EXPECT '2' brick_count $V0
+
+#bug-1027171 - Do not allow commit if the bricks are not decommissioned
+#Remove bricks and commit without starting
+function remove_brick_commit_status {
+ $CLI volume remove-brick $V0 \
+ $H0:$B0/${V0}4 commit 2>&1 |grep -oE "success|decommissioned"
+}
+EXPECT "decommissioned" remove_brick_commit_status;
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0;
+
+#Create a 2X3 distributed-replicate volume
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1..6};
+TEST $CLI volume start $V0
+
+#Try to reduce replica count with start option
+function remove_brick_start_status {
+ $CLI volume remove-brick $V0 replica 2 \
+ $H0:$B0/${V0}3 $H0:$B0/${V0}6 start 2>&1 |grep -oE "success|failed"
+}
+EXPECT "failed" remove_brick_start_status;
+
+#Remove bricks with commit option
+function remove_brick_commit_status2 {
+ $CLI volume remove-brick $V0 replica 2 \
+ $H0:$B0/${V0}3 $H0:$B0/${V0}6 commit 2>&1 |
+ grep -oE "success|decommissioned"
+}
+EXPECT "decommissioned" remove_brick_commit_status2;
+TEST $CLI volume info $V0
+
+#bug-1040408 - reduce replica count of distributed replicate volume
+
+# Reduce to 2x2 volume by specifying bricks in reverse order
+function remove_brick_status {
+ $CLI volume remove-brick $V0 replica 2 \
+ $H0:$B0/${V0}6 $H0:$B0/${V0}3 force 2>&1 |grep -oE "success|failed"
+}
+EXPECT "success" remove_brick_status;
+TEST $CLI volume info $V0
+
+#bug-1120647 - remove brick validation
+
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}{4..5} start
+EXPECT_WITHIN 10 "completed" remove_brick_status_completed_field "$V0 $H0:$B0/${V0}5"
+EXPECT_WITHIN 10 "completed" remove_brick_status_completed_field "$V0 $H0:$B0/${V0}4"
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}{4..5} commit
+TEST $CLI volume remove-brick $V0 replica 1 $H0:$B0/${V0}2 force
+
+cleanup
diff --git a/tests/bugs/glusterd/remove-brick-validation.t b/tests/bugs/glusterd/remove-brick-validation.t
new file mode 100644
index 00000000000..a0ff4ff6a24
--- /dev/null
+++ b/tests/bugs/glusterd/remove-brick-validation.t
@@ -0,0 +1,68 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+function peer_count {
+eval \$CLI_$1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
+}
+
+cleanup;
+
+## start a 3 node virtual cluster
+TEST launch_cluster 3;
+
+## peer probe server 2 from server 1 cli
+TEST $CLI_1 peer probe $H2;
+
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count 1
+
+#testcase: bug-1245045-remove-brick-validation
+
+TEST $CLI_1 peer probe $H3;
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count 1
+
+TEST $CLI_1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0
+TEST $CLI_1 volume start $V0
+
+kill_glusterd 2
+
+#remove-brick should fail as the peer hosting the brick is down
+TEST ! $CLI_1 volume remove-brick $V0 $H2:$B2/${V0} start
+
+TEST $glusterd_2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H2 $B2/${V0}
+
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count 1
+
+#volume status should work
+TEST $CLI_2 volume status
+
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count 3
+TEST $CLI_1 volume remove-brick $V0 $H2:$B2/${V0} start
+kill_glusterd 2
+
+#remove-brick commit should fail as the peer hosting the brick is down
+TEST ! $CLI_1 volume remove-brick $V0 $H2:$B2/${V0} commit
+
+TEST $glusterd_2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H2 $B2/${V0}
+
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count 1
+
+#volume status should work
+TEST $CLI_2 volume status
+
+TEST $CLI_1 volume remove-brick $V0 $H2:$B2/${V0} stop
+
+kill_glusterd 3
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count 1
+
+TEST $CLI_1 volume remove-brick $V0 $H2:$B2/${V0} start
+
+TEST start_glusterd 3
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count 1
+TEST $CLI_3 volume status
+
+cleanup
diff --git a/tests/bugs/glusterd/removing-multiple-bricks-in-single-remove-brick-command.t b/tests/bugs/glusterd/removing-multiple-bricks-in-single-remove-brick-command.t
new file mode 100644
index 00000000000..00beab59137
--- /dev/null
+++ b/tests/bugs/glusterd/removing-multiple-bricks-in-single-remove-brick-command.t
@@ -0,0 +1,80 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+#Basic checks
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+#Create a 3X2 distributed-replicate volume
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1..6};
+TEST $CLI volume start $V0
+
+#bug-974007 - remove multiple replica pairs in a single brick command
+# Mount FUSE and create files
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+TEST touch $M0/file{1..10}
+
+# Remove bricks from two sub-volumes to make it a 1x2 vol.
+# Bricks in question are given in a random order but from the same subvols.
+function remove_brick_start_status {
+ $CLI volume remove-brick $V0 \
+ $H0:$B0/${V0}6 $H0:$B0/${V0}1 \
+ $H0:$B0/${V0}2 $H0:$B0/${V0}5 start 2>&1 |grep -oE "success|failed"
+}
+EXPECT "success" remove_brick_start_status;
+
+# Wait for rebalance to complete
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" remove_brick_status_completed_field "$V0" "$H0:$B0/${V0}6 $H0:$B0/${V0}1 $H0:$B0/${V0}2 $H0:$B0/${V0}5"
+
+# Check commit status
+function remove_brick_commit_status {
+ $CLI volume remove-brick $V0 \
+ $H0:$B0/${V0}6 $H0:$B0/${V0}1 \
+ $H0:$B0/${V0}2 $H0:$B0/${V0}5 commit 2>&1 |grep -oE "success|failed"
+}
+EXPECT "success" remove_brick_commit_status;
+
+
+# Check the volume type
+EXPECT "Replicate" echo `$CLI volume info |grep Type |awk '{print $2}'`
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+#bug-961669 - remove brick start should fail when reducing the replica count
+
+#Create a 3x3 dist-rep volume
+TEST $CLI volume create $V1 replica 3 $H0:$B0/${V1}{0,1,2,3,4,5,6,7,8};
+TEST $CLI volume start $V1
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "9" brick_count ${V1}
+
+# Mount FUSE and create file/directory
+TEST glusterfs -s $H0 --volfile-id $V1 $M0
+TEST touch $M0/zerobytefile.txt
+TEST mkdir $M0/test_dir
+TEST dd if=/dev/zero of=$M0/file bs=1024 count=1024
+
+function remove_brick_start {
+ $CLI volume remove-brick $V1 replica 2 $H0:$B0/${V1}{1,4,7} start 2>&1|grep -oE 'success|failed'
+}
+
+function remove_brick {
+ $CLI volume remove-brick $V1 replica 2 $H0:$B0/${V1}{1,4,7} force 2>&1|grep -oE 'success|failed'
+}
+
+#remove-brick start variant
+#Actual message displayed at cli is:
+#"volume remove-brick start: failed: Rebalancing not needed when reducing replica count. Try without the 'start' option"
+EXPECT "failed" remove_brick_start;
+
+#remove-brick commit-force
+#Actual message displayed at cli is:
+#"volume remove-brick commit force: success"
+EXPECT "success" remove_brick
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+cleanup;
diff --git a/tests/bugs/glusterd/replace-brick-operations.t b/tests/bugs/glusterd/replace-brick-operations.t
new file mode 100644
index 00000000000..044aa3d6c6d
--- /dev/null
+++ b/tests/bugs/glusterd/replace-brick-operations.t
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+## Test case for BZ: 1094119 Remove replace-brick support from gluster
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+# Start glusterd
+TEST glusterd
+TEST pidof glusterd
+
+## Lets create and start volume
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2}
+TEST $CLI volume start $V0
+
+#bug-1094119-remove-replace-brick-support-from-glusterd
+
+## Now with this patch replace-brick only accept following commad
+## volume replace-brick <VOLNAME> <SOURCE-BRICK> <NEW-BRICK> {commit force}
+## Apart form this replace brick command will failed.
+
+TEST ! $CLI volume replace-brick $V0 $H0:$B0/${V0}2 $H0:$B0/${V0}3 start
+TEST ! $CLI volume replace-brick $V0 $H0:$B0/${V0}2 $H0:$B0/${V0}3 status
+TEST ! $CLI volume replace-brick $V0 $H0:$B0/${V0}2 $H0:$B0/${V0}3 abort
+
+
+## replace-brick commit force command should success
+TEST $CLI volume replace-brick $V0 $H0:$B0/${V0}2 $H0:$B0/${V0}3 commit force
+
+#bug-1242543-replace-brick validation
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+# Replace brick1 without killing
+TEST $CLI volume replace-brick $V0 $H0:$B0/${V0}1 $H0:$B0/${V0}1_new commit force
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+TEST kill_brick $V0 $H0 $B0/${V0}1_new
+
+# Replace brick1 after killing the brick
+TEST $CLI volume replace-brick $V0 $H0:$B0/${V0}1_new $H0:$B0/${V0}1_newer commit force
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+cleanup;
diff --git a/tests/bugs/glusterd/reset-brick-and-daemons-follow-quorum.t b/tests/bugs/glusterd/reset-brick-and-daemons-follow-quorum.t
new file mode 100644
index 00000000000..e6e65c48456
--- /dev/null
+++ b/tests/bugs/glusterd/reset-brick-and-daemons-follow-quorum.t
@@ -0,0 +1,63 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+function shd_up_status_1 {
+ $CLI_1 volume status | grep "localhost" | grep "Self-heal Daemon" | awk '{print $7}'
+}
+
+function shd_up_status_2 {
+ $CLI_2 volume status | grep "localhost" | grep "Self-heal Daemon" | awk '{print $7}'
+}
+
+function get_shd_pid_2 {
+ $CLI_2 volume status | grep "localhost" | grep "Self-heal Daemon" | awk '{print $8}'
+}
+
+cleanup;
+
+function check_peers {
+ $CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
+}
+
+TEST launch_cluster 3
+TEST $CLI_1 peer probe $H2;
+EXPECT_WITHIN $PROBE_TIMEOUT 1 check_peers
+
+TEST $CLI_1 volume create $V0 replica 2 $H1:$B0/${V0} $H2:$B0/${V0}
+TEST $CLI_1 volume start $V0
+
+#testcase: bug-1507466 - validate reset-brick commit force
+# Negative case with brick not killed && volume-id xattrs present
+TEST ! $CLI_1 volume reset-brick $V0 $H1:$B0/${V0} $H1:$B0/${V0} commit force
+
+TEST $CLI_1 volume reset-brick $V0 $H1:$B0/${V0} start
+# Now test if reset-brick commit force works
+TEST $CLI_1 volume reset-brick $V0 $H1:$B0/${V0} $H1:$B0/${V0} commit force
+
+#testcase: bug-1383893 - shd should not come up after restarting the peer glusterd
+
+TEST $CLI_1 peer probe $H3;
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H1 $B0/${V0}
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H2 $B0/${V0}
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" shd_up_status_1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" shd_up_status_2
+
+# Bring down shd on 2nd node
+kill -15 $(get_shd_pid_2)
+
+# Bring down glusterd on 1st node
+TEST kill_glusterd 1
+
+#Bring back 1st glusterd
+TEST $glusterd_1
+
+# We need to wait till PROCESS_UP_TIMEOUT and then check shd service started
+#on node 2, because once glusterd regains quorum, it will restart all volume
+#level daemons
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" shd_up_status_2
+
+cleanup;
diff --git a/tests/bugs/glusterd/serialize-shd-manager-glusterd-restart.t b/tests/bugs/glusterd/serialize-shd-manager-glusterd-restart.t
new file mode 100644
index 00000000000..a871e112d87
--- /dev/null
+++ b/tests/bugs/glusterd/serialize-shd-manager-glusterd-restart.t
@@ -0,0 +1,54 @@
+#! /bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../cluster.rc
+
+function check_peers {
+count=`$CLI_3 peer status | grep 'Peer in Cluster (Connected)' | wc -l`
+echo $count
+}
+
+function check_shd {
+ps aux | grep $1 | grep glustershd | wc -l
+}
+
+cleanup
+
+
+TEST launch_cluster 6
+
+TESTS_EXPECTED_IN_LOOP=25
+for i in $(seq 2 6); do
+ hostname="H$i"
+ TEST $CLI_1 peer probe ${!hostname}
+done
+
+
+EXPECT_WITHIN $PROBE_TIMEOUT 5 check_peers;
+for i in $(seq 1 5); do
+
+ TEST $CLI_1 volume create ${V0}_$i replica 3 $H1:$B1/${V0}_$i $H2:$B2/${V0}_$i $H3:$B3/${V0}_$i $H4:$B4/${V0}_$i $H5:$B5/${V0}_$i $H6:$B6/${V0}_$i
+ TEST $CLI_1 volume start ${V0}_$i force
+
+done
+
+#kill a node
+TEST kill_node 3
+
+TEST $glusterd_3;
+EXPECT_WITHIN $PROBE_TIMEOUT 5 check_peers
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 check_shd $H3
+
+for i in $(seq 1 5); do
+
+ TEST $CLI_1 volume stop ${V0}_$i
+ TEST $CLI_1 volume delete ${V0}_$i
+
+done
+
+for i in $(seq 1 6); do
+ hostname="H$i"
+ EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT 0 check_shd ${!hostname}
+done
+cleanup
diff --git a/tests/bugs/glusterd/snapshot-operations.t b/tests/bugs/glusterd/snapshot-operations.t
new file mode 100644
index 00000000000..4705577d741
--- /dev/null
+++ b/tests/bugs/glusterd/snapshot-operations.t
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../cluster.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+
+
+cleanup;
+
+TEST verify_lvm_version
+TEST launch_cluster 3;
+TEST setup_lvm 3;
+
+TEST $CLI_1 peer probe $H2;
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+TEST $CLI_1 volume create $V0 replica 2 $H1:$L1 $H2:$L2
+EXPECT 'Created' volinfo_field $V0 'Status'
+
+TEST $CLI_1 volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+#bug-1318591 - skip-non-directories-inside-vols
+
+b="B1"
+TEST touch ${!b}/glusterd/vols/file
+
+TEST $CLI_1 snapshot create snap1 $V0 no-timestamp;
+
+TEST touch ${!b}/glusterd/snaps/snap1/file
+
+#bug-1322145 - peer hosting snapshotted bricks should not be detachable
+
+kill_glusterd 2
+
+TEST $CLI_1 peer probe $H3;
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+TEST $CLI_1 volume replace-brick $V0 $H2:$L2 $H3:$L3 commit force
+
+# peer hosting snapshotted bricks should not be detachable
+TEST ! $CLI_1 peer detach $H2
+
+TEST killall_gluster
+TEST $glusterd_1
+TEST $glusterd_2
+
+cleanup;
+
diff --git a/tests/bugs/glusterd/sync-post-glusterd-restart.t b/tests/bugs/glusterd/sync-post-glusterd-restart.t
new file mode 100644
index 00000000000..de3dff715ab
--- /dev/null
+++ b/tests/bugs/glusterd/sync-post-glusterd-restart.t
@@ -0,0 +1,54 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../cluster.rc
+
+function volume_get_field()
+{
+ local vol=$1
+ local field=$2
+ $CLI_2 volume get $vol $field | tail -1 | awk '{print $2}'
+}
+
+cleanup
+
+TEST launch_cluster 2;
+TEST $CLI_1 peer probe $H2;
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+TEST $CLI_1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0
+TEST $CLI_1 volume start $V0
+
+TEST $CLI_1 volume set $V0 performance.readdir-ahead on
+
+# Bring down 2nd glusterd
+TEST kill_glusterd 2
+
+##bug-1420637 and bug-1323287 - sync post glusterd restart
+
+TEST $CLI_1 volume set all cluster.server-quorum-ratio 60
+TEST $CLI_1 volume set $V0 performance.readdir-ahead off
+TEST $CLI_1 volume set $V0 performance.write-behind off
+
+# Bring back 2nd glusterd
+TEST $glusterd_2
+
+# After 2nd glusterd come back, there will be 2 nodes in a cluster
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count;
+
+#bug-1420637-volume sync post glusterd restart
+
+EXPECT_WITHIN $PROBE_TIMEOUT "60" volinfo_field_2 all cluster.server-quorum-ratio
+EXPECT_WITHIN $PROBE_TIMEOUT "off" volinfo_field_2 $V0 performance.readdir-ahead
+
+#bug-1323287
+EXPECT_WITHIN $PROBE_TIMEOUT 'off' volume_get_field $V0 'write-behind'
+
+#bug-1213295 - volume stop should not crash glusterd post glusterd restart
+
+TEST $CLI_2 volume stop $V0
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+TEST $CLI_1 volume create $V1 $H1:$B1/$V1 $H2:$B2/$V1
+
+cleanup
diff --git a/tests/bugs/glusterd/validating-options-for-replicated-volume.t b/tests/bugs/glusterd/validating-options-for-replicated-volume.t
new file mode 100644
index 00000000000..ddc80b17870
--- /dev/null
+++ b/tests/bugs/glusterd/validating-options-for-replicated-volume.t
@@ -0,0 +1,142 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6};
+
+## start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+#bug-1314649 - validate group virt
+TEST $CLI volume set $V0 group virt;
+
+#bug-765230 - remove-quota-related-option-after-disabling-quota
+## setting soft-timeout as 20
+TEST $CLI volume set $V0 features.soft-timeout 20
+EXPECT '20' volinfo_field $V0 'features.soft-timeout';
+
+## enabling features.quota-deem-statfs
+TEST ! $CLI volume set $V0 features.quota-deem-statfs on
+EXPECT '' volinfo_field $V0 'features.quota-deem-statfs'
+
+## enabling quota
+TEST $CLI volume quota $V0 enable
+EXPECT 'on' volinfo_field $V0 'features.quota'
+
+## eetting soft-timeout as 20
+TEST $CLI volume set $V0 features.soft-timeout 20
+EXPECT '20' volinfo_field $V0 'features.soft-timeout';
+
+## enabling features.quota-deem-statfs
+TEST $CLI volume set $V0 features.quota-deem-statfs on
+EXPECT 'on' volinfo_field $V0 'features.quota-deem-statfs'
+
+## disabling quota
+TEST $CLI volume quota $V0 disable
+EXPECT 'off' volinfo_field $V0 'features.quota'
+EXPECT '' volinfo_field $V0 'features.quota-deem-statfs'
+EXPECT '' volinfo_field $V0 'features.soft-timeout'
+
+## setting soft-timeout as 30
+TEST $CLI volume set $V0 features.soft-timeout 30
+EXPECT '30' volinfo_field $V0 'features.soft-timeout';
+
+## disabling features.quota-deem-statfs
+TEST ! $CLI volume set $V0 features.quota-deem-statfs off
+EXPECT '' volinfo_field $V0 'features.quota-deem-statfs'
+
+TEST ! $CLI volume set $V0 statedump-path ""
+TEST ! $CLI volume set $V0 statedump-path " "
+TEST $CLI volume set $V0 statedump-path "/home/"
+EXPECT "/home/" volume_option $V0 server.statedump-path
+
+TEST ! $CLI volume set $V0 background-self-heal-count ""
+TEST ! $CLI volume set $V0 background-self-heal-count " "
+TEST $CLI volume set $V0 background-self-heal-count 10
+EXPECT "10" volume_option $V0 cluster.background-self-heal-count
+
+TEST ! $CLI volume set $V0 io-cache-size ""
+TEST ! $CLI volume set $V0 io-cache-size " "
+TEST $CLI volume set $V0 io-cache-size 64MB
+EXPECT "64MB" volume_option $V0 performance.io-cache-size
+
+TEST ! $CLI volume set $V0 quick-read-cache-size ""
+TEST ! $CLI volume set $V0 quick-read-cache-size " "
+TEST $CLI volume set $V0 quick-read-cache-size 512MB
+EXPECT "512MB" volume_option $V0 performance.quick-read-cache-size
+
+TEST ! $CLI volume set $V0 self-heal-daemon ""
+TEST ! $CLI volume set $V0 self-heal-daemon " "
+TEST $CLI volume set $V0 self-heal-daemon on
+EXPECT "on" volume_option $V0 cluster.self-heal-daemon
+
+TEST ! $CLI volume set $V0 read-subvolume ""
+TEST ! $CLI volume set $V0 read-subvolume " "
+TEST $CLI volume set $V0 read-subvolume $V0-client-0
+EXPECT "$V0-client-0" volume_option $V0 cluster.read-subvolume
+
+TEST ! $CLI volume set $V0 data-self-heal-algorithm ""
+TEST ! $CLI volume set $V0 data-self-heal-algorithm " "
+TEST ! $CLI volume set $V0 data-self-heal-algorithm on
+TEST $CLI volume set $V0 data-self-heal-algorithm full
+EXPECT "full" volume_option $V0 cluster.data-self-heal-algorithm
+
+TEST ! $CLI volume set $V0 min-free-inodes ""
+TEST ! $CLI volume set $V0 min-free-inodes " "
+TEST $CLI volume set $V0 min-free-inodes 60%
+EXPECT "60%" volume_option $V0 cluster.min-free-inodes
+
+TEST ! $CLI volume set $V0 min-free-disk ""
+TEST ! $CLI volume set $V0 min-free-disk " "
+TEST $CLI volume set $V0 min-free-disk 60%
+EXPECT "60%" volume_option $V0 cluster.min-free-disk
+
+TEST $CLI volume set $V0 min-free-disk 120
+EXPECT "120" volume_option $V0 cluster.min-free-disk
+
+TEST ! $CLI volume set $V0 frame-timeout ""
+TEST ! $CLI volume set $V0 frame-timeout " "
+TEST $CLI volume set $V0 frame-timeout 0
+EXPECT "0" volume_option $V0 network.frame-timeout
+
+TEST ! $CLI volume set $V0 auth.allow ""
+TEST ! $CLI volume set $V0 auth.allow " "
+TEST $CLI volume set $V0 auth.allow 192.168.122.1
+EXPECT "192.168.122.1" volume_option $V0 auth.allow
+
+#bug-782095 - validate performance cache min/max size value
+
+## setting performance cache min size as 2MB
+TEST $CLI volume set $V0 performance.cache-min-file-size 2MB
+EXPECT '2MB' volinfo_field $V0 'performance.cache-min-file-size';
+
+## setting performance cache max size as 20MB
+TEST $CLI volume set $V0 performance.cache-max-file-size 20MB
+EXPECT '20MB' volinfo_field $V0 'performance.cache-max-file-size';
+
+## trying to set performance cache min size as 25MB
+TEST ! $CLI volume set $V0 performance.cache-min-file-size 25MB
+EXPECT '2MB' volinfo_field $V0 'performance.cache-min-file-size';
+
+## able to set performance cache min size as long as its lesser than max size
+TEST $CLI volume set $V0 performance.cache-min-file-size 15MB
+EXPECT '15MB' volinfo_field $V0 'performance.cache-min-file-size';
+
+## trying it out with only cache-max-file-size in CLI as 10MB
+TEST ! $CLI volume set $V0 cache-max-file-size 10MB
+EXPECT '20MB' volinfo_field $V0 'performance.cache-max-file-size';
+
+## finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup
diff --git a/tests/bugs/glusterd/validating-server-quorum.t b/tests/bugs/glusterd/validating-server-quorum.t
new file mode 100644
index 00000000000..ae7d83fd81c
--- /dev/null
+++ b/tests/bugs/glusterd/validating-server-quorum.t
@@ -0,0 +1,125 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+function check_fs {
+ df $1 &> /dev/null
+ echo $?
+}
+
+function check_peers {
+ $CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
+}
+
+cleanup;
+
+TEST launch_cluster 3
+
+TEST $CLI_1 peer probe $H2;
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+TEST $CLI_1 peer probe $H3;
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count
+
+# Lets create the volume
+TEST $CLI_1 volume create $V0 replica 3 $H1:$B1/${V0}1 $H2:$B2/${V0}2 $H3:$B3/${V0}3
+TEST $CLI_1 volume set $V0 cluster.server-quorum-type server
+
+# Start the volume
+TEST $CLI_1 volume start $V0
+
+#bug-1345727 - bricks should be down when quorum is not met
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H1 $B1/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H2 $B2/${V0}2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H3 $B3/${V0}3
+
+# Bring down glusterd on 2nd node
+TEST kill_glusterd 2
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+TEST kill_glusterd 3
+EXPECT_WITHIN $PROBE_TIMEOUT 0 peer_count
+
+# Server quorum is not met. Brick on 1st node must be down
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" cluster_brick_up_status 1 $V0 $H1 $B1/${V0}1
+
+# Set quorum ratio 95. means 95 % or more than 95% nodes of total available node
+# should be available for performing volume operation.
+# i.e. Server-side quorum is met if the number of nodes that are available is
+# greater than or equal to 'quorum-ratio' times the number of nodes in the
+# cluster
+TEST $CLI_1 volume set all cluster.server-quorum-ratio 95
+
+#bug-1483058 - replace-brick should fail when quorum is not met
+TEST ! $CLI_1 volume replace-brick $V0 $H2:$B2/${V0}2 $H1:$B1/${V0}2_new commit force
+
+#Bring back 2nd glusterd
+TEST $glusterd_2
+
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+# Server quorum is still not met. Bricks should be down on 1st and 2nd nodes
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" cluster_brick_up_status 1 $V0 $H1 $B1/${V0}1
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" cluster_brick_up_status 1 $V0 $H2 $B2/${V0}2
+
+# Bring back 3rd glusterd
+TEST $glusterd_3
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count
+
+# Server quorum is met now. Bricks should be up on all nodes
+# Check from 3rd instance of glusterd so that the 3rd node finishes all its
+# handshake and then report back the brick status
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 3 $V0 $H1 $B1/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 3 $V0 $H2 $B2/${V0}2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 3 $V0 $H3 $B3/${V0}3
+
+# Check from 1st instance of glusterd
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 3 $V0 $H1 $B1/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 3 $V0 $H2 $B2/${V0}2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 3 $V0 $H3 $B3/${V0}3
+
+# TODO : Because commit fe71ee7 introduced a delay of 1 sec to wait for shd connect and
+# disconnect events to be serially processed during a restart of shd daemon,
+# this introduced a race where while releasing big lock, if any command sneaks
+# and acquires the big lock, it might be able to work on a volinfo which is
+# stale. We need to find a better way to fix this.
+
+sleep 3
+
+# quorum is met. replace-brick will execute successfully
+EXPECT_WITHIN $PEER_SYNC_TIMEOUT 0 attempt_replace_brick 1 $V0 $H2:$B2/${V0}2 $H2:$B2/${V0}2_new
+
+TEST $CLI_1 volume reset all
+TEST $CLI_1 volume set $V0 cluster.server-quorum-type server
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H1 $B1/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H2 $B2/${V0}2_new
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H3 $B3/${V0}3
+
+
+#bug-913555 - volume should become unwritable when quorum does not met
+
+TEST glusterfs --volfile-server=$H1 --volfile-id=$V0 $M0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 0 check_fs $M0;
+
+# Kill one pseudo-node, make sure the others survive and volume stays up.
+TEST kill_node 3;
+EXPECT_WITHIN $PROBE_TIMEOUT 1 check_peers;
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H1 $B1/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H2 $B2/${V0}2_new
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 0 check_fs $M0;
+
+# Kill another pseudo-node, make sure the last one dies and volume goes down.
+TEST kill_node 2;
+EXPECT_WITHIN $PROBE_TIMEOUT 0 check_peers
+#two glusterfsds of the other two glusterds must be dead
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" cluster_brick_up_status 1 $V0 $H1 $B1/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 check_fs $M0;
+
+TEST $glusterd_2;
+TEST $glusterd_3;
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 0 check_fs $M0;
+
+cleanup
diff --git a/tests/bugs/glusterfs-server/bug-852147.t b/tests/bugs/glusterfs-server/bug-852147.t
new file mode 100755
index 00000000000..75db2a26e05
--- /dev/null
+++ b/tests/bugs/glusterfs-server/bug-852147.t
@@ -0,0 +1,85 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+logdir=`gluster --print-logdir`"/bricks"
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6};
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST glusterfs -s $H0 --volfile-id=$V0 $M0
+touch $M0/file1;
+
+TEST $CLI volume set $V0 performance.cache-max-file-size 20MB
+TEST $CLI volume set $V0 performance.cache-min-file-size 10MB
+
+EXPECT "20MB" volinfo_field $V0 'performance.cache-max-file-size';
+EXPECT "10MB" volinfo_field $V0 'performance.cache-min-file-size';
+
+#Performing volume reset and verifying.
+TEST $CLI volume reset $V0
+EXPECT "" volinfo_field $V0 'performance.cache-max-file-size';
+EXPECT "" volinfo_field $V0 'performance.cache-min-file-size';
+
+#Verifying vlolume-profile start, info and stop
+EXPECT "Starting volume profile on $V0 has been successful " $CLI volume profile $V0 start
+
+function vol_prof_info()
+{
+ $CLI volume profile $V0 info | grep Brick | wc -l
+}
+EXPECT "6" vol_prof_info
+
+EXPECT "Stopping volume profile on $V0 has been successful " $CLI volume profile $V0 stop
+
+function log-file-name()
+{
+ logfilename=$B0"/"$V0"1.log"
+ echo ${logfilename:1} | tr / -
+}
+
+function file-size()
+{
+ ls -lrt $1 | awk '{print $5}'
+}
+
+#Finding the current log file's size
+log_file=$logdir"/"`log-file-name`
+log_file_size=`file-size $log_file`
+
+#Removing the old backup log files
+ren_file=$log_file".*"
+rm -rf $ren_file
+
+#Initiating log rotate
+TEST $CLI volume log $V0 rotate
+
+#Capturing new log file's size
+new_file_size=`file-size $log_file`
+
+#Verifying the size of the new log file and the creation of the backup log file
+TEST ! [ $new_file_size -eq $log_file_size ]
+TEST ls -lrt $ren_file
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/bugs/glusterfs-server/bug-861542.t b/tests/bugs/glusterfs-server/bug-861542.t
new file mode 100755
index 00000000000..60d1b132fb4
--- /dev/null
+++ b/tests/bugs/glusterfs-server/bug-861542.t
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+# Distributed volume with a single brick was chosen solely for the ease of
+#implementing the test case (to be precise, for the ease of extracting the port number).
+TEST $CLI volume create $V0 $H0:$B0/brick0;
+
+TEST $CLI volume start $V0;
+
+function port_field()
+{
+ local vol=$1;
+ local opt=$2;
+ if [ $opt -eq '0' ]; then
+ $CLI volume status $vol | grep "brick0" | awk '{print $3}';
+ else
+ $CLI volume status $vol detail | grep "^TCP Port " | awk '{print $4}';
+ fi
+}
+
+function xml_port_field()
+{
+ local vol=$1;
+ local opt=$2;
+ $CLI --xml volume status $vol $opt | tr -d '\n' |\
+#Find the first occurrence of the string between <port> and </port>
+ sed -rn 's/<port>/&###/;s/<\/port>/###&/;s/^.*###(.*)###.*$/\1/p'
+}
+
+TEST $CLI volume status $V0;
+TEST $CLI volume status $V0 detail;
+TEST $CLI --xml volume status $V0;
+TEST $CLI --xml volume status $V0 detail;
+
+# Kill the brick process. After this, port number for the killed (in this case brick) process must be "N/A".
+kill `cat $GLUSTERD_PIDFILEDIR/vols/$V0/$H0-d-backends-brick0.pid`
+
+EXPECT "N/A" port_field $V0 '0'; # volume status
+EXPECT "N/A" port_field $V0 '1'; # volume status detail
+
+EXPECT "N/A" xml_port_field $V0 '';
+EXPECT "N/A" xml_port_field $V0 'detail';
+
+cleanup;
diff --git a/tests/bugs/glusterfs-server/bug-864222.t b/tests/bugs/glusterfs-server/bug-864222.t
new file mode 100755
index 00000000000..01a7a4e3afd
--- /dev/null
+++ b/tests/bugs/glusterfs-server/bug-864222.t
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../nfs.rc
+. $(dirname $0)/../../volume.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/brick0
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0
+
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+TEST mount_nfs $H0:/$V0 $N0 nolock
+cd $N0
+
+TEST ls
+
+TEST $CLI volume set $V0 nfs.enable-ino32 on
+# Main test. This should pass.
+TEST ls
+
+cd
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+cleanup
+
diff --git a/tests/bugs/glusterfs-server/bug-873549.t b/tests/bugs/glusterfs-server/bug-873549.t
new file mode 100644
index 00000000000..8b5534728fd
--- /dev/null
+++ b/tests/bugs/glusterfs-server/bug-873549.t
@@ -0,0 +1,17 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+TEST glusterd -LDEBUG;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+
+TEST $CLI volume set $V0 performance.quick-read-cache-size 512MB
+TEST $CLI volume start $V0
+TEST $CLI volume statedump $V0 all
+
+cleanup;
diff --git a/tests/bugs/glusterfs-server/bug-877992.t b/tests/bugs/glusterfs-server/bug-877992.t
new file mode 100755
index 00000000000..300000bcf2c
--- /dev/null
+++ b/tests/bugs/glusterfs-server/bug-877992.t
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+
+## Start and create a volume
+TEST glusterd -LDEBUG
+TEST pidof glusterd
+
+
+function volinfo_field()
+{
+ local vol=$1;
+ local field=$2;
+
+ $CLI volume info $vol | grep "^$field: " | sed 's/.*: //';
+}
+
+
+function hooks_prep ()
+{
+ local event=$1
+ touch /tmp/pre.out /tmp/post.out
+ touch $GLUSTERD_WORKDIR/hooks/1/"$event"/pre/Spre.sh
+ touch $GLUSTERD_WORKDIR/hooks/1/"$event"/post/Spost.sh
+
+ printf "#! /bin/bash\necho "$event"Pre > /tmp/pre.out\n" > $GLUSTERD_WORKDIR/hooks/1/"$event"/pre/Spre.sh
+ printf "#! /bin/bash\necho "$event"Post > /tmp/post.out\n" > $GLUSTERD_WORKDIR/hooks/1/"$event"/post/Spost.sh
+ chmod a+x $GLUSTERD_WORKDIR/hooks/1/"$event"/pre/Spre.sh
+ chmod a+x $GLUSTERD_WORKDIR/hooks/1/"$event"/post/Spost.sh
+}
+
+function hooks_cleanup ()
+{
+ local event=$1
+ rm /tmp/pre.out /tmp/post.out
+ rm $GLUSTERD_WORKDIR/hooks/1/"$event"/pre/Spre.sh
+ rm $GLUSTERD_WORKDIR/hooks/1/"$event"/post/Spost.sh
+}
+
+## Verify volume is created and its hooks script ran
+hooks_prep 'create'
+TEST $CLI volume create $V0 $H0:$B0/${V0}1;
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT 'createPre' cat /tmp/pre.out;
+# Spost.sh comes after S10selinux-label-brick.sh under create post hook script
+# list. So consider the delay in setting SELinux context on bricks
+EXPECT_WITHIN 5 'createPost' cat /tmp/post.out;
+hooks_cleanup 'create'
+
+
+## Start volume and verify that its hooks script ran
+hooks_prep 'start'
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+EXPECT_WITHIN 5 'startPre' cat /tmp/pre.out;
+EXPECT_WITHIN 5 'startPost' cat /tmp/post.out;
+hooks_cleanup 'start'
+
+cleanup;
diff --git a/tests/bugs/glusterfs-server/bug-887145.t b/tests/bugs/glusterfs-server/bug-887145.t
new file mode 100755
index 00000000000..db2cf3c050b
--- /dev/null
+++ b/tests/bugs/glusterfs-server/bug-887145.t
@@ -0,0 +1,99 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2};
+TEST $CLI volume set $V0 performance.open-behind off;
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0
+
+## Mount FUSE with caching disabled
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0;
+
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+
+
+useradd tmp_user 2>/dev/null 1>/dev/null;
+mkdir $M0/dir;
+mkdir $M0/other;
+cp /etc/passwd $M0/;
+cp $M0/passwd $M0/file;
+chmod 600 $M0/file;
+
+TEST mount_nfs $H0:/$V0 $N0 nolock;
+
+grep nfsnobody /etc/passwd > /dev/null
+if [ $? -eq 1 ]; then
+usr=nobody
+grp=nobody
+else
+usr=nfsnobody
+grp=nfsnobody
+fi
+chown -R $usr:$grp $M0/dir;
+chown -R tmp_user:tmp_user $M0/other;
+
+TEST $CLI volume set $V0 server.root-squash on;
+
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+
+# create files and directories in the root of the glusterfs and nfs mount
+# which is owned by root and hence the right behavior is getting EACCESS
+# as the fops are executed as nfsnobody/nobody.
+touch $M0/foo 2>/dev/null;
+TEST [ $? -ne 0 ]
+touch $N0/foo 2>/dev/null;
+TEST [ $? -ne 0 ]
+mkdir $M0/new 2>/dev/null;
+TEST [ $? -ne 0 ]
+mkdir $N0/new 2>/dev/null;
+TEST [ $? -ne 0 ]
+cp $M0/file $M0/tmp_file 2>/dev/null;
+TEST [ $? -ne 0 ]
+cp $N0/file $N0/tmp_file 2>/dev/null;
+TEST [ $? -ne 0 ]
+cat $M0/file 2>/dev/null;
+TEST [ $? -ne 0 ]
+# here read should be allowed because eventhough file "passwd" is owned
+# by root, the permissions if the file allow other users to read it.
+cat $M0/passwd 1>/dev/null;
+TEST [ $? -eq 0 ]
+cat $N0/passwd 1>/dev/null;
+TEST [ $? -eq 0 ]
+
+# create files and directories should succeed as the fops are being executed
+# inside the directory owned by nfsnobody/nobody
+TEST touch $M0/dir/file;
+TEST touch $N0/dir/foo;
+TEST mkdir $M0/dir/new;
+TEST mkdir $N0/dir/other;
+TEST rm -f $M0/dir/file $M0/dir/foo;
+TEST rmdir $N0/dir/*;
+
+# create files and directories here should fail as other directory is owned
+# by tmp_user.
+touch $M0/other/foo 2>/dev/null;
+TEST [ $? -ne 0 ]
+touch $N0/other/foo 2>/dev/null;
+TEST [ $? -ne 0 ]
+mkdir $M0/other/new 2>/dev/null;
+TEST [ $? -ne 0 ]
+mkdir $N0/other/new 2>/dev/null;
+TEST [ $? -ne 0 ]
+
+userdel tmp_user;
+rm -rf /home/tmp_user;
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/bugs/glusterfs-server/bug-889996.t b/tests/bugs/glusterfs-server/bug-889996.t
new file mode 100644
index 00000000000..d7d25c42933
--- /dev/null
+++ b/tests/bugs/glusterfs-server/bug-889996.t
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+
+rm -rf $B0/${V0}1;
+
+TEST ! $CLI volume start $V0;
+EXPECT 0 online_brick_count;
+
+cleanup;
diff --git a/tests/bugs/glusterfs-server/bug-904300.t b/tests/bugs/glusterfs-server/bug-904300.t
new file mode 100755
index 00000000000..95d5d381c8b
--- /dev/null
+++ b/tests/bugs/glusterfs-server/bug-904300.t
@@ -0,0 +1,65 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../nfs.rc
+. $(dirname $0)/../../volume.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+# 1-8
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0;
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+
+TEST mount_nfs $H0:/$V0 $N0 nolock
+TEST mkdir $N0/dir1
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+#
+# Case 1: Allow "dir1" to be mounted only from 127.0.0.1
+# 9-12
+TEST $CLI volume set $V0 export-dir \""/dir1(127.0.0.1)"\"
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 2 is_nfs_export_available
+
+TEST mount_nfs localhost:/$V0/dir1 $N0 nolock
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+#
+# Case 2: Allow "dir1" to be mounted only from 8.8.8.8. This is
+# a negative test case therefore the mount should fail.
+# 13-16
+TEST $CLI volume set $V0 export-dir \""/dir1(8.8.8.8)"\"
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 2 is_nfs_export_available
+
+TEST ! mount_nfs $H0:/$V0/dir1 $N0 nolock
+TEST ! umount $N0
+
+
+# Case 3: Variation of test case1. Here we are checking with hostname
+# instead of ip address.
+# 17-20
+TEST $CLI volume set $V0 export-dir \""/dir1($H0)"\"
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 2 is_nfs_export_available
+
+TEST mount_nfs $H0:/$V0/dir1 $N0 nolock
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+# Case 4: Variation of test case1. Here we are checking with IP range
+# 21-24
+TEST $CLI volume set $V0 export-dir \""/dir1(127.0.0.0/24)"\"
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 2 is_nfs_export_available
+
+TEST mount_nfs localhost:/$V0/dir1 $N0 nolock
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+## Finish up
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/bugs/glusterfs-server/bug-905864.c b/tests/bugs/glusterfs-server/bug-905864.c
new file mode 100644
index 00000000000..f70003736e7
--- /dev/null
+++ b/tests/bugs/glusterfs-server/bug-905864.c
@@ -0,0 +1,83 @@
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <pthread.h>
+
+pthread_t th[5] = {0};
+void
+flock_init(struct flock *f, short int type, off_t start, off_t len)
+{
+ f->l_type = type;
+ f->l_start = start;
+ f->l_len = len;
+}
+
+int
+flock_range_in_steps(int fd, int is_set, short l_type, int start, int end,
+ int step)
+{
+ int ret = 0;
+ int i = 0;
+ struct flock f = {
+ 0,
+ };
+
+ for (i = start; i + step < end; i += step) {
+ flock_init(&f, l_type, i, step);
+ ret = fcntl(fd, (is_set) ? F_SETLKW : F_GETLK, &f);
+ if (ret) {
+ perror("fcntl");
+ goto out;
+ }
+ }
+out:
+ return ret;
+}
+
+void *
+random_locker(void *arg)
+{
+ int fd = *(int *)arg;
+ int i = 0;
+ int is_set = 0;
+
+ /* use thread id to choose GETLK or SETLK operation*/
+ is_set = pthread_self() % 2;
+ (void)flock_range_in_steps(fd, is_set, F_WRLCK, 0, 400, 1);
+
+ return NULL;
+}
+
+int
+main(int argc, char **argv)
+{
+ int fd = -1;
+ int ret = 1;
+ int i = 0;
+ char *fname = NULL;
+
+ if (argc < 2)
+ goto out;
+
+ fname = argv[1];
+ fd = open(fname, O_RDWR);
+ if (fd == -1) {
+ perror("open");
+ goto out;
+ }
+
+ ret = flock_range_in_steps(fd, 1, F_WRLCK, 0, 2000, 2);
+ for (i = 0; i < 5; i++) {
+ pthread_create(&th[i], NULL, random_locker, (void *)&fd);
+ }
+ ret = flock_range_in_steps(fd, 1, F_WRLCK, 0, 2000, 2);
+ for (i = 0; i < 5; i++) {
+ pthread_join(th[i], NULL);
+ }
+out:
+ if (fd != -1)
+ close(fd);
+
+ return ret;
+}
diff --git a/tests/bugs/glusterfs-server/bug-905864.t b/tests/bugs/glusterfs-server/bug-905864.t
new file mode 100644
index 00000000000..44923a85333
--- /dev/null
+++ b/tests/bugs/glusterfs-server/bug-905864.t
@@ -0,0 +1,32 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4};
+TEST $CLI volume start $V0;
+
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0;
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M1;
+
+TEST touch $M0/file1;
+
+#following C program tries open up race(s) if any, in F_GETLK/F_SETLKW codepaths
+#of locks xlator
+TEST $CC -pthread -g3 $(dirname $0)/bug-905864.c -o $(dirname $0)/bug-905864
+
+$(dirname $0)/bug-905864 $M0/file1 &
+$(dirname $0)/bug-905864 $M1/file1;
+wait
+
+TEST rm -f $(dirname $0)/bug-905864
+EXPECT $(brick_count $V0) online_brick_count
+
+cleanup
diff --git a/tests/bugs/glusterfs-server/bug-912297.t b/tests/bugs/glusterfs-server/bug-912297.t
new file mode 100755
index 00000000000..08f5dcea9b9
--- /dev/null
+++ b/tests/bugs/glusterfs-server/bug-912297.t
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6};
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Setting owner-uid as -12
+TEST ! $CLI volume set $V0 owner-uid -12
+EXPECT '' volinfo_field $V0 'storage.owner-uid'
+
+## Setting owner-gid as -5
+TEST ! $CLI volume set $V0 owner-gid -5
+EXPECT '' volinfo_field $V0 'storage.owner-gid'
+
+## Setting owner-uid as 36
+TEST $CLI volume set $V0 owner-uid 36
+EXPECT '36' volinfo_field $V0 'storage.owner-uid'
+
+## Setting owner-gid as 36
+TEST $CLI volume set $V0 owner-gid 36
+EXPECT '36' volinfo_field $V0 'storage.owner-gid'
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/bugs/glusterfs/bug-1482528.t b/tests/bugs/glusterfs/bug-1482528.t
new file mode 100644
index 00000000000..3adf260bdcd
--- /dev/null
+++ b/tests/bugs/glusterfs/bug-1482528.t
@@ -0,0 +1,100 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+#Basic checks
+TEST glusterd
+TEST pidof glusterd
+
+#Create a distributed volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1..2}
+TEST $CLI volume start $V0
+
+# Mount FUSE without selinux:
+TEST glusterfs -s $H0 --volfile-id $V0 $@ $M0
+
+TEST touch $M0/default.txt
+EXPECT "644" stat -c %a $M0/default.txt
+
+TEST chmod 0444 $M0/default.txt
+EXPECT "444" stat -c %a $M0/default.txt
+
+TEST mkdir $M0/default
+EXPECT "755" stat -c %a $M0/default
+
+TEST chmod 0444 $M0/default
+EXPECT "444" stat -c %a $M0/default
+
+TEST mkfifo $M0/mkfifo
+EXPECT "644" stat -c %a $M0/mkfifo
+
+TEST mknod $M0/dmknod b 4 5
+EXPECT "644" stat -c %a $M0/dmknod
+
+#Set the create-directory-mask and create-mask options
+TEST $CLI volume set $V0 storage.create-directory-mask 0444
+TEST $CLI volume set $V0 storage.create-mask 0444
+
+TEST mkdir $M0/create-directory
+EXPECT "444" stat -c %a $M0/create-directory
+
+TEST touch $M0/create-mask.txt
+EXPECT "444" stat -c %a $M0/create-mask.txt
+
+TEST chmod 0777 $M0/create-mask.txt
+EXPECT "444" stat -c %a $M0/create-mask.txt
+
+TEST chmod 0400 $M0/create-mask.txt
+EXPECT "400" stat -c %a $M0/create-mask.txt
+
+TEST chmod 0777 $M0/create-directory
+EXPECT "444" stat -c %a $M0/create-directory
+
+TEST chmod 0400 $M0/create-directory
+EXPECT "400" stat -c %a $M0/create-directory
+
+TEST mkfifo $M0/cfifo
+EXPECT "444" stat -c %a $M0/cfifo
+
+TEST chmod 0777 $M0/cfifo
+EXPECT "444" stat -c %a $M0/cfifo
+
+TEST mknod $M0/cmknod b 4 5
+EXPECT "444" stat -c %a $M0/cmknod
+
+#set force-create-mode and force-directory-mode options
+TEST $CLI volume set $V0 storage.force-create-mode 0777
+TEST $CLI volume set $V0 storage.force-directory-mode 0333
+
+TEST touch $M0/force-create-mode.txt
+EXPECT "777" stat -c %a $M0/force-create-mode.txt
+
+TEST mkdir $M0/force-directory
+EXPECT "777" stat -c %a $M0/force-directory
+
+TEST chmod 0222 $M0/force-create-mode.txt
+EXPECT "777" stat -c %a $M0/force-create-mode.txt
+
+TEST chmod 0222 $M0/force-directory
+EXPECT "333" stat -c %a $M0/force-directory
+
+TEST mkdir $M0/link
+TEST ln -s $M0/force-create-mode.txt $M0/link
+EXPECT "777" stat -c %a $M0/link/force-create-mode.txt
+
+TEST ln $M0/force-create-mode.txt $M0/link/fc.txt
+EXPECT "777" stat -c %a $M0/link/fc.txt
+
+TEST setfacl -m o:r $M0/force-create-mode.txt
+EXPECT "777" stat -c %a $M0/force-create-mode.txt
+
+TEST ln -s $M0/force-directory $M0/link
+EXPECT "777" stat -c %a $M0/link/force-directory
+
+TEST mkfifo $M0/ffifo
+EXPECT "777" stat -c %a $M0/ffifo
+
+TEST mknod $M0/mknod b 4 5
+EXPECT "777" stat -c %a $M0/mknod
diff --git a/tests/bugs/glusterfs/bug-811493.t b/tests/bugs/glusterfs/bug-811493.t
new file mode 100755
index 00000000000..98f7c121a02
--- /dev/null
+++ b/tests/bugs/glusterfs/bug-811493.t
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI system uuid reset;
+
+uuid1=$(grep UUID $GLUSTERD_WORKDIR/glusterd.info | cut -f 2 -d "=");
+
+TEST $CLI system uuid reset;
+uuid2=$(grep UUID $GLUSTERD_WORKDIR/glusterd.info | cut -f 2 -d "=");
+
+TEST [ $uuid1 != $uuid2 ]
+
+cleanup
diff --git a/tests/bugs/glusterfs/bug-844688.t b/tests/bugs/glusterfs/bug-844688.t
new file mode 100755
index 00000000000..65f41b342a5
--- /dev/null
+++ b/tests/bugs/glusterfs/bug-844688.t
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+function check_callstack_log {
+ local text=$1
+ statedump_file=$(generate_mount_statedump $V0);
+ grep $text $statedump_file 2>/dev/null 1>/dev/null;
+ if [ $? -eq 0 ]; then
+ echo "1";
+ else
+ echo "0";
+ fi;
+}
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/brick0
+TEST $CLI volume start $V0
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+
+mount_pid=$(get_mount_process_pid $V0);
+# enable dumping of call stack creation and frame creation times in statedump
+# monitoring is enabled by default
+
+# We want to make sure that there is a pending frame in gluster stack.
+# For that we are creating a blocking lock scenario.
+
+TEST touch $M0/lockfile;
+# Open two fd's on the same file
+exec 8>$M0/lockfile;
+exec 9>$M0/lockfile;
+
+# First flock will succeed and the second one will block, hence the background run.
+flock -x 8 ;
+flock -x 9 &
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" check_callstack_log "callstack-creation-time";
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" check_callstack_log "frame-creation-time";
+
+flock -u 8
+flock -u 9;
+
+# Closing the fd's
+exec 8>&-
+exec 9>&-
+
+TEST rm -f $M0/lockfile;
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+rm -f $statedumpdir/glusterdump.$mount_pid.*;
+cleanup
diff --git a/tests/bugs/glusterfs/bug-848251.t b/tests/bugs/glusterfs/bug-848251.t
new file mode 100644
index 00000000000..69ffe680f7f
--- /dev/null
+++ b/tests/bugs/glusterfs/bug-848251.t
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+
+TEST $CLI volume start $V0;
+
+#enable quota
+TEST $CLI volume quota $V0 enable;
+
+#mount on a random dir
+TEST MOUNTDIR="/tmp/$RANDOM"
+TEST mkdir $MOUNTDIR
+TEST glusterfs -s $H0 --volfile-id=$V0 $MOUNTDIR
+
+function set_quota(){
+ mkdir "$MOUNTDIR/$name"
+ $CLI volume quota $V0 limit-usage /$name 50KB
+}
+
+function quota_list(){
+ $CLI volume quota $V0 list | grep -- /$name | awk '{print $3}'
+}
+
+TEST name=":d1"
+#file name containing ':' in the start
+TEST set_quota
+EXPECT "80%" quota_list
+
+TEST name=":d1/d:1"
+#file name containing ':' in between
+TEST set_quota
+EXPECT "80%" quota_list
+
+TEST name=":d1/d:1/d1:"
+#file name containing ':' in the end
+TEST set_quota
+EXPECT "80%" quota_list
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $MOUNTDIR
+TEST rm -rf $MOUNTDIR
+TEST $CLI volume stop $V0
+
+cleanup;
diff --git a/tests/bugs/glusterfs/bug-853690.t b/tests/bugs/glusterfs/bug-853690.t
new file mode 100755
index 00000000000..59facfcddb0
--- /dev/null
+++ b/tests/bugs/glusterfs/bug-853690.t
@@ -0,0 +1,91 @@
+#!/bin/bash
+#
+# Bug 853690 - Test that short writes do not lead to corruption.
+#
+# Mismanagement of short writes in AFR leads to corruption and immediately
+# detectable split-brain. Write a file to a replica volume using error-gen
+# to cause short writes on one replica.
+#
+# Short writes are also possible during heal. If ignored, the files are marked
+# consistent and silently differ. After reading the file, cause a lookup, wait
+# for self-heal and verify that the afr xattrs do not match.
+#
+########
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST mkdir -p $B0/test{1,2}
+
+# Our graph is a two brick replica with 100% frequency of short writes on one
+# side of the replica. This guarantees a single write fop leads to an out-of-sync
+# situation.
+cat > $B0/test.vol <<EOF
+volume test-posix-0
+ type storage/posix
+ option directory $B0/test1
+end-volume
+
+volume test-error-0
+ type debug/error-gen
+ option failure 100
+ option enable writev
+ option error-no GF_ERROR_SHORT_WRITE
+ subvolumes test-posix-0
+end-volume
+
+volume test-locks-0
+ type features/locks
+ subvolumes test-error-0
+end-volume
+
+volume test-posix-1
+ type storage/posix
+ option directory $B0/test2
+end-volume
+
+volume test-locks-1
+ type features/locks
+ subvolumes test-posix-1
+end-volume
+
+volume test-replicate-0
+ type cluster/replicate
+ option background-self-heal-count 0
+ subvolumes test-locks-0 test-locks-1
+end-volume
+EOF
+
+TEST glusterd
+
+TEST glusterfs --volfile=$B0/test.vol --attribute-timeout=0 --entry-timeout=0 $M0
+
+# Send a single write, guaranteed to be short on one replica, and attempt to
+# read the data back. Failure to detect the short write results in different
+# file sizes and immediate split-brain (EIO).
+TEST dd if=/dev/urandom of=$M0/file bs=128k count=1
+TEST dd if=$M0/file of=/dev/null bs=128k count=1
+########
+#
+# Test self-heal with short writes...
+#
+########
+
+# Cause a lookup and wait a few seconds for posterity. This self-heal also fails
+# due to a short write.
+TEST ls $M0/file
+# Verify the attributes on the healthy replica do not reflect consistency with
+# the other replica.
+xa=`getfattr -n trusted.afr.test-locks-0 -e hex $B0/test2/file 2>&1 | grep = | cut -f2 -d=`
+EXPECT_NOT 0x000000000000000000000000 echo $xa
+
+TEST rm -f $M0/file
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+rm -f $B0/test.vol
+rm -rf $B0/test1 $B0/test2
+
+cleanup;
+
diff --git a/tests/bugs/glusterfs/bug-856455.t b/tests/bugs/glusterfs/bug-856455.t
new file mode 100644
index 00000000000..d02b39bda8e
--- /dev/null
+++ b/tests/bugs/glusterfs/bug-856455.t
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+BRICK_COUNT=3
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1 $H0:$B0/${V0}2
+TEST $CLI volume start $V0
+
+## Mount FUSE with caching disabled
+TEST $GFS -s $H0 --volfile-id $V0 $M0;
+
+function query_pathinfo()
+{
+ local path=$1;
+ local retval;
+
+ local pathinfo=$(getfattr -n trusted.glusterfs.pathinfo $path);
+ retval=$(echo $pathinfo | grep -o 'POSIX' | wc -l);
+ echo $retval
+}
+
+TEST touch $M0/f00f;
+TEST mkdir $M0/f00d;
+
+# verify pathinfo for a file and directory
+EXPECT 1 query_pathinfo $M0/f00f;
+EXPECT $BRICK_COUNT query_pathinfo $M0/f00d;
+
+# Kill a brick process and then query for pathinfo
+# for directories pathinfo should list backend patch from available (up) subvolumes
+
+kill_brick $V0 $H0 ${B0}/${V0}1
+
+EXPECT `expr $BRICK_COUNT - 1` query_pathinfo $M0/f00d;
+
+cleanup;
diff --git a/tests/bugs/glusterfs/bug-860297.t b/tests/bugs/glusterfs/bug-860297.t
new file mode 100644
index 00000000000..c2d21553f68
--- /dev/null
+++ b/tests/bugs/glusterfs/bug-860297.t
@@ -0,0 +1,13 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+TEST $CLI volume create $V0 $H0:$B0/brick1
+setfattr -x trusted.glusterfs.volume-id $B0/brick1
+## If Extended attribute trusted.glusterfs.volume-id is not present
+## then volume should not be able to start
+TEST ! $CLI volume start $V0;
+cleanup;
diff --git a/tests/bugs/glusterfs/bug-861015-index.t b/tests/bugs/glusterfs/bug-861015-index.t
new file mode 100644
index 00000000000..74ffc45bf2e
--- /dev/null
+++ b/tests/bugs/glusterfs/bug-861015-index.t
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1,2,3,4,5}
+TEST $CLI volume set $V0 ensure-durability off
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST kill_brick $V0 $H0 $B0/${V0}4
+cd $M0
+HEAL_FILES=0
+for i in {1..10}
+do
+ echo "abc" > $i
+ HEAL_FILES=$(($HEAL_FILES+1))
+done
+HEAL_FILES=$(($HEAL_FILES+3)) #count brick root distribute-subvol num of times
+
+cd ~
+EXPECT "$HEAL_FILES" get_pending_heal_count $V0
+TEST rm -f $M0/*
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume heal $V0 info
+#Only root dir should be present now in the indices
+EXPECT "1" afr_get_num_indices_in_brick $B0/${V0}1
+EXPECT "1" afr_get_num_indices_in_brick $B0/${V0}3
+EXPECT "1" afr_get_num_indices_in_brick $B0/${V0}5
+cleanup
diff --git a/tests/bugs/glusterfs/bug-861015-log.t b/tests/bugs/glusterfs/bug-861015-log.t
new file mode 100644
index 00000000000..2f3e0ad14f4
--- /dev/null
+++ b/tests/bugs/glusterfs/bug-861015-log.t
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+log_wd=$(gluster --print-logdir)
+TEST glusterd
+TEST pidof glusterd
+rm -f $log_wd/glustershd.log
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+TEST kill_brick $V0 $H0 $B0/${V0}0
+cd $M0
+for i in {1..10}
+do
+ dd if=/dev/urandom of=f bs=1024k count=10 2>/dev/null
+done
+
+cd ~
+TEST $CLI volume heal $V0 info
+function count_inode_link_failures {
+ logfile=$1
+ grep "inode link failed on the inode" $logfile | wc -l
+}
+EXPECT "0" count_inode_link_failures $log_wd/glustershd.log
+cleanup
diff --git a/tests/bugs/glusterfs/bug-866459.t b/tests/bugs/glusterfs/bug-866459.t
new file mode 100644
index 00000000000..a6c767b4b0e
--- /dev/null
+++ b/tests/bugs/glusterfs/bug-866459.t
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+
+## Start and create a volume
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+## Create and start a volume with aio enabled
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2};
+TEST $CLI volume set $V0 linux-aio on
+TEST $CLI volume set $V0 background-self-heal-count 0
+TEST $CLI volume set $V0 performance.stat-prefetch off;
+TEST $CLI volume start $V0
+
+## Mount FUSE with caching disabled
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0;
+
+dd of=$M0/a if=/dev/urandom bs=1024k count=1 2>&1 > /dev/null
+B0_hiphenated=`echo $B0 | tr '/' '-'`
+## Bring a brick down
+TEST kill_brick $V0 $H0 $B0/${V0}1
+## Rewrite the file
+dd of=$M0/a if=/dev/urandom bs=1024k count=1 2>&1 > /dev/null
+TEST $CLI volume start $V0 force
+## Wait for the brick to give CHILD_UP in client protocol
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+md5offile2=`md5sum $B0/${V0}2/a | awk '{print $1}'`
+
+##trigger self-heal
+ls -l $M0/a
+
+EXPECT "$md5offile2" echo `md5sum $B0/${V0}1/a | awk '{print $1}'`
+
+## Finish up
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/bugs/glusterfs/bug-867253.t b/tests/bugs/glusterfs/bug-867253.t
new file mode 100644
index 00000000000..8c3c39baace
--- /dev/null
+++ b/tests/bugs/glusterfs/bug-867253.t
@@ -0,0 +1,64 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+function file_count()
+{
+ val=1
+
+ if [ "$1" == "0" ]
+ then
+ if [ "$2" == "0" ]
+ then
+ val=0
+ fi
+ fi
+ echo $val
+}
+
+BRICK_COUNT=2
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0
+
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+## Mount nfs, with nocache option
+TEST mount_nfs $H0:/$V0 $M0 nolock,noac;
+
+touch $M0/files{1..1000};
+
+# Kill a brick process
+kill_brick $V0 $H0 $B0/${V0}0
+
+drop_cache $M0
+
+ls -l $M0 >/dev/null;
+
+NEW_FILE_COUNT=`echo $?`;
+
+TEST $CLI volume start $V0 force
+
+# Kill a brick process
+kill_brick $V0 $H0 $B0/${V0}1
+
+drop_cache $M0
+
+ls -l $M0 >/dev/null;
+
+NEW_FILE_COUNT1=`echo $?`;
+
+EXPECT "0" file_count $NEW_FILE_COUNT $NEW_FILE_COUNT1
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+cleanup
diff --git a/tests/bugs/glusterfs/bug-869724.t b/tests/bugs/glusterfs/bug-869724.t
new file mode 100644
index 00000000000..ca5bb17081c
--- /dev/null
+++ b/tests/bugs/glusterfs/bug-869724.t
@@ -0,0 +1,37 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+
+## Start and create a volume
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}1;
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+
+## Make volume tightly consistent for metdata
+TEST $CLI volume set $V0 performance.stat-prefetch off;
+
+## Mount FUSE with caching disabled
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0;
+
+touch $M0/test;
+build_tester $(dirname $0)/getlk_owner.c
+
+TEST $(dirname $0)/getlk_owner $M0/test;
+
+rm -f $(dirname $0)/getlk_owner
+cleanup;
+
diff --git a/tests/bugs/glusterfs/bug-872923.t b/tests/bugs/glusterfs/bug-872923.t
new file mode 100755
index 00000000000..00e02c89cbe
--- /dev/null
+++ b/tests/bugs/glusterfs/bug-872923.t
@@ -0,0 +1,59 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick0 $H0:$B0/brick1
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0
+
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+TEST mount_nfs $H0:/$V0 $N0 nolock
+
+cd $N0
+mkdir test_hardlink_self_heal;
+cd test_hardlink_self_heal;
+
+for i in `seq 1 5`;
+do
+ mkdir dir.$i;
+ for j in `seq 1 10`;
+ do
+ dd if=/dev/zero of=dir.$i/file.$j bs=1k count=$j > /dev/null 2>&1;
+ done;
+done;
+
+cd ..
+TEST kill_brick $V0 $H0 $B0/brick0
+cd test_hardlink_self_heal;
+
+RET=0
+for i in `seq 1 5`;
+do
+ for j in `seq 1 10`;
+ do
+ ln dir.$i/file.$j dir.$i/link_file.$j > /dev/null 2>&1;
+ RET=$?
+ if [ $RET -ne 0 ]; then
+ break;
+ fi
+ done ;
+ if [ $RET -ne 0 ]; then
+ break;
+ fi
+done;
+
+cd
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+EXPECT "0" echo $RET;
+
+cleanup;
diff --git a/tests/bugs/glusterfs/bug-873962-spb.t b/tests/bugs/glusterfs/bug-873962-spb.t
new file mode 100644
index 00000000000..db71cc0f6fe
--- /dev/null
+++ b/tests/bugs/glusterfs/bug-873962-spb.t
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+TEST $CLI volume start $V0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=$V0 $M0 --direct-io-mode=enable
+touch $M0/a
+
+exec 5<$M0/a
+
+kill_brick $V0 $H0 $B0/${V0}0
+echo "hi" > $M0/a
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+kill_brick $V0 $H0 $B0/${V0}1
+echo "bye" > $M0/a
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+TEST ! cat $M0/a #To mark split-brain
+
+TEST ! read -u 5 line
+exec 5<&-
+
+cleanup;
diff --git a/tests/bugs/glusterfs/bug-873962.t b/tests/bugs/glusterfs/bug-873962.t
new file mode 100755
index 00000000000..7faa9998159
--- /dev/null
+++ b/tests/bugs/glusterfs/bug-873962.t
@@ -0,0 +1,107 @@
+#!/bin/bash
+
+#AFR TEST-IDENTIFIER SPLIT-BRAIN
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+B0_hiphenated=`echo $B0 | tr '/' '-'`
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2}
+
+# If we allow self-heal to happen in the background, we'll get spurious
+# failures - especially at the point labeled "FAIL HERE" but
+# occasionally elsewhere. This behavior is very timing-dependent. It
+# doesn't show up in Jenkins, but it does on JD's and KP's machines, and
+# it got sharply worse because of an unrelated fsync change (6ae6f3d)
+# which changed timing. Putting anything at the FAIL HERE marker tends
+# to make it go away most of the time on affected machines, even if the
+# "anything" is unrelated.
+#
+# What's going on is that the I/O on the first mountpoint is allowed to
+# complete even though self-heal is still in progress and the state on
+# disk does not reflect its result. In fact, the state changes during
+# self-heal create the appearance of split brain when the second I/O
+# comes in, so that fails even though we haven't actually been in split
+# brain since the manual xattr operations. By disallowing background
+# self-heal, we ensure that the second I/O can't happen before self-heal
+# is complete, because it has to follow the first I/O which now has to
+# follow self-heal.
+TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+
+#Make sure self-heal is not triggered when the bricks are re-started
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume start $V0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=$V0 $M0 --direct-io-mode=enable
+TEST touch $M0/a
+TEST touch $M0/b
+TEST touch $M0/c
+TEST touch $M0/d
+echo "1" > $M0/b
+echo "1" > $M0/d
+TEST kill_brick $V0 $H0 $B0/${V0}2
+echo "1" > $M0/a
+echo "1" > $M0/c
+TEST setfattr -n trusted.mdata -v abc $M0/b
+TEST setfattr -n trusted.mdata -v abc $M0/d
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST kill_brick $V0 $H0 $B0/${V0}1
+echo "2" > $M0/a
+echo "2" > $M0/c
+TEST setfattr -n trusted.mdata -v def $M0/b
+TEST setfattr -n trusted.mdata -v def $M0/d
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=$V0 $M1 --direct-io-mode=enable
+
+#Files are in split-brain, so open should fail
+TEST ! cat $M0/a;
+TEST ! cat $M1/a;
+TEST ! cat $M0/b;
+TEST ! cat $M1/b;
+
+#Reset split-brain status
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000000 $B0/${V0}1/a;
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000000 $B0/${V0}1/b;
+
+#The operations should do self-heal and give correct output
+EXPECT "2" cat $M0/a;
+# FAIL HERE - see comment about cluster.self-heal-background-count above.
+EXPECT "2" cat $M1/a;
+TEST dd if=$M0/b of=/dev/null bs=1024k
+EXPECT "def" getfattr -n trusted.mdata --only-values $M0/b 2>/dev/null
+EXPECT "def" getfattr -n trusted.mdata --only-values $M1/b 2>/dev/null
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M1
+
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=$V0 $M0 --direct-io-mode=enable
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=$V0 $M1 --direct-io-mode=enable
+
+#Files are in split-brain, so open should fail
+TEST ! cat $M0/c
+TEST ! cat $M1/c
+TEST ! cat $M0/d
+TEST ! cat $M1/d
+
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000000 $B0/${V0}1/c
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000000 $B0/${V0}1/d
+
+#The operations should NOT do self-heal but give correct output
+EXPECT "2" cat $M0/c
+EXPECT "2" cat $M1/c
+EXPECT "1" cat $M0/d
+EXPECT "1" cat $M1/d
+
+cleanup;
diff --git a/tests/bugs/glusterfs/bug-879490.t b/tests/bugs/glusterfs/bug-879490.t
new file mode 100755
index 00000000000..fb8d4263919
--- /dev/null
+++ b/tests/bugs/glusterfs/bug-879490.t
@@ -0,0 +1,37 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6};
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+function peer_probe()
+{
+ $CLI peer probe a.b.c.d --xml | xmllint --format - | grep "<opErrstr>"
+}
+
+EXPECT " <opErrstr>Probe returned with Transport endpoint is not connected</opErrstr>" peer_probe
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/bugs/glusterfs/bug-879494.t b/tests/bugs/glusterfs/bug-879494.t
new file mode 100755
index 00000000000..12ee466b33a
--- /dev/null
+++ b/tests/bugs/glusterfs/bug-879494.t
@@ -0,0 +1,37 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6};
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+function peer_probe()
+{
+ $CLI peer detach a.b.c.d --xml | xmllint --format - | grep "<opErrstr>"
+}
+
+EXPECT " <opErrstr>a.b.c.d is not part of cluster</opErrstr>" peer_probe
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/bugs/glusterfs/bug-892730.t b/tests/bugs/glusterfs/bug-892730.t
new file mode 100755
index 00000000000..a76961134c5
--- /dev/null
+++ b/tests/bugs/glusterfs/bug-892730.t
@@ -0,0 +1,77 @@
+#!/bin/bash
+#
+# Bug 892730 - Verify that afr handles EIO errors from the brick properly.
+#
+# The associated bug describes a problem where EIO errors returned from the
+# local filesystem of a brick that is part of a replica volume are exposed to
+# the user. This test simulates such failures and verifies that the volume
+# operates as expected.
+#
+########
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST mkdir -p $B0/test{1,2}
+
+# The graph is a two brick replica with error-gen enabled on the second brick
+# and configured to return EIO lookup errors 100% of the time. This simulates
+# a brick with a crashed or shut down local filesystem. Note that the order in
+# which errors occur is a factor in reproducing the original bug (error-gen
+# must be enabled in the second brick for this test to be effective).
+
+cat > $B0/test.vol <<EOF
+volume test-posix-0
+ type storage/posix
+ option directory $B0/test1
+end-volume
+
+volume test-locks-0
+ type features/locks
+ subvolumes test-posix-0
+end-volume
+
+volume test-posix-1
+ type storage/posix
+ option directory $B0/test2
+end-volume
+
+volume test-error-1
+ type debug/error-gen
+ option failure 100
+ option enable lookup
+ option error-no EIO
+ subvolumes test-posix-1
+end-volume
+
+volume test-locks-1
+ type features/locks
+ subvolumes test-error-1
+end-volume
+
+volume test-replicate-0
+ type cluster/replicate
+ option background-self-heal-count 0
+ subvolumes test-locks-0 test-locks-1
+end-volume
+EOF
+
+TEST glusterd
+
+TEST glusterfs --volfile=$B0/test.vol --attribute-timeout=0 --entry-timeout=0 $M0
+
+# We should be able to create and remove a file without interference from the
+# "broken" brick.
+
+TEST touch $M0/file
+TEST rm $M0/file
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+rm -f $B0/test.vol
+rm -rf $B0/test1 $B0/test2
+
+cleanup;
+
diff --git a/tests/bugs/glusterfs/bug-893338.t b/tests/bugs/glusterfs/bug-893338.t
new file mode 100644
index 00000000000..b915d3e791e
--- /dev/null
+++ b/tests/bugs/glusterfs/bug-893338.t
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6};
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+
+TEST glusterfs -s $H0 --volfile-id=$V0 $M0
+
+## Test for symlink success
+TEST touch $M0/reg_file
+TEST ln -s $M0/reg_file $M0/symlink
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/bugs/glusterfs/bug-893378.t b/tests/bugs/glusterfs/bug-893378.t
new file mode 100755
index 00000000000..444dafb44bc
--- /dev/null
+++ b/tests/bugs/glusterfs/bug-893378.t
@@ -0,0 +1,75 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+BRICK_COUNT=3
+
+function file_has_linkfile()
+{
+ i=0
+ j=0
+ while [ $i -lt $BRICK_COUNT ]
+ do
+ stat=`stat $B0/${V0}$i/$1 2>/dev/null`
+ if [ $? -eq 0 ]
+ then
+ let j++
+ let "BRICK${j}=$i"
+
+ fi
+ let i++
+ done
+ return $j
+}
+
+function get_cached_brick()
+{
+ i=1
+ while [ $i -lt 3 ]
+ do
+ test=`getfattr -n trusted.glusterfs.dht.linkto -e text $B0/${V0}$BRICK$i 2>&1`
+ if [ $? -eq 1 ]
+ then
+ cached=$BRICK"$i"
+ i=$(( $i+3 ))
+ fi
+ let i++
+ done
+
+ return $cached
+}
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1 $H0:$B0/${V0}2
+TEST $CLI volume start $V0
+
+## Mount FUSE
+TEST glusterfs --attribute-timeout=0 --entry-timeout=0 -s $H0 --volfile-id $V0 $M0;
+
+## create a linkfile on subvolume 0
+TEST touch $M0/1
+TEST mv $M0/1 $M0/2
+
+file_has_linkfile 2
+has_link=$?
+if [ $has_link -eq 2 ]
+then
+ get_cached_brick
+ CACHED=$?
+ # Kill a brick process
+ kill_brick $V0 $H0 $B0/${V0}$CACHED
+fi
+
+## trigger a lookup
+ls -l $M0/2 2>/dev/null
+
+## fail dd if file exists.
+
+dd if=/dev/zero of=$M0/2 bs=1 count=1 conv=excl 2>/dev/null
+EXPECT "1" echo $?
+
+cleanup;
diff --git a/tests/bugs/glusterfs/bug-895235.t b/tests/bugs/glusterfs/bug-895235.t
new file mode 100644
index 00000000000..ac9caae9561
--- /dev/null
+++ b/tests/bugs/glusterfs/bug-895235.t
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 ensure-durability off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 cluster.eager-lock off
+TEST $CLI volume start $V0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=$V0 $M0 --direct-io-mode=enable
+
+TEST gluster volume profile $V0 start
+TEST dd of=$M0/a if=/dev/zero bs=1024k count=1 oflag=append
+finodelk_max_latency=$($CLI volume profile $V0 info | grep FINODELK | awk 'BEGIN {max = 0} {if ($6 > max) max=$6;} END {print max}' | cut -d. -f 1 | egrep "[0-9]{7,}")
+
+TEST [ -z $finodelk_max_latency ]
+
+cleanup;
diff --git a/tests/bugs/glusterfs/bug-896431.t b/tests/bugs/glusterfs/bug-896431.t
new file mode 100755
index 00000000000..61f71141713
--- /dev/null
+++ b/tests/bugs/glusterfs/bug-896431.t
@@ -0,0 +1,89 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6};
+
+## Verify volume is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Setting cluster.subvols-per-directory as -5
+TEST ! $CLI volume set $V0 cluster.subvols-per-directory -5
+EXPECT '' volinfo_field $V0 'cluster.subvols-per-directory';
+TEST ! $CLI volume set $V0 subvols-per-directory -5
+EXPECT '' volinfo_field $V0 'cluster.subvols-per-directory';
+
+## Setting cluster.subvols-per-directory as 0
+TEST ! $CLI volume set $V0 cluster.subvols-per-directory 0
+EXPECT '' volinfo_field $V0 'cluster.subvols-per-directory';
+TEST ! $CLI volume set $V0 subvols-per-directory 0
+EXPECT '' volinfo_field $V0 'cluster.subvols-per-directory';
+
+## Setting cluster.subvols-per-directory as 4 (the total number of bricks)
+TEST ! $CLI volume set $V0 cluster.subvols-per-directory 4
+EXPECT '' volinfo_field $V0 'cluster.subvols-per-directory';
+TEST ! $CLI volume set $V0 subvols-per-directory 4
+EXPECT '' volinfo_field $V0 'cluster.subvols-per-directory';
+
+## Setting cluster.subvols-per-directory as 2 (the total number of subvolumes)
+TEST $CLI volume set $V0 cluster.subvols-per-directory 2
+EXPECT '2' volinfo_field $V0 'cluster.subvols-per-directory';
+
+## Setting cluster.subvols-per-directory as 1
+TEST $CLI volume set $V0 subvols-per-directory 1
+EXPECT '1' volinfo_field $V0 'cluster.subvols-per-directory';
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
+
+## Start and create a pure replicate volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume create $V0 replica 8 $H0:$B0/${V0}{1,2,3,4,5,6,7,8};
+
+## Verify volume is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT 'Replicate' volinfo_field $V0 'Type';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Setting cluster.subvols-per-directory as 8 for a replicate volume
+TEST ! $CLI volume set $V0 cluster.subvols-per-directory 8
+EXPECT '' volinfo_field $V0 'cluster.subvols-per-directory';
+TEST ! $CLI volume set $V0 subvols-per-directory 8
+EXPECT '' volinfo_field $V0 'cluster.subvols-per-directory';
+
+## Setting cluster.subvols-per-directory as 1 for a replicate volume
+TEST $CLI volume set $V0 cluster.subvols-per-directory 1
+EXPECT '1' volinfo_field $V0 'cluster.subvols-per-directory';
+TEST $CLI volume set $V0 subvols-per-directory 1
+EXPECT '1' volinfo_field $V0 'cluster.subvols-per-directory';
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/bugs/glusterfs/bug-902610.t b/tests/bugs/glusterfs/bug-902610.t
new file mode 100755
index 00000000000..112c947e116
--- /dev/null
+++ b/tests/bugs/glusterfs/bug-902610.t
@@ -0,0 +1,66 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+## Layout-spread set to 3, but subvols up are 2. So layout should split 50-50
+function get_layout()
+{
+ layout1=`getfattr -n trusted.glusterfs.dht -e hex $1 2>&1|grep dht |cut -d = -f2`
+ layout1_s=$(echo $layout1 | cut -c 19-26)
+ layout1_e=$(echo $layout1 | cut -c 27-34)
+ #echo "layout1 from $layout1_s to $layout1_e" > /dev/tty
+ layout2=`getfattr -n trusted.glusterfs.dht -e hex $2 2>&1|grep dht |cut -d = -f2`
+ layout2_s=$(echo $layout2 | cut -c 19-26)
+ layout2_e=$(echo $layout2 | cut -c 27-34)
+ #echo "layout2 from $layout2_s to $layout2_e" > /dev/tty
+
+ if [ x"$layout2_s" = x"00000000" ]; then
+ # Reverse so we only have the real logic in one place.
+ tmp_s=$layout1_s
+ tmp_e=$layout1_e
+ layout1_s=$layout2_s
+ layout1_e=$layout2_e
+ layout2_s=$tmp_s
+ layout2_e=$tmp_e
+ fi
+
+ # Figure out where the join point is.
+ target=$( $PYTHON -c "print('%08x' % (0x$layout1_e + 1))")
+ #echo "target for layout2 = $target" > /dev/tty
+
+ # The second layout should cover everything that the first doesn't.
+ if [ x"$layout2_s" = x"$target" -a x"$layout2_e" = x"ffffffff" ]; then
+ return 0
+ fi
+
+ return 1
+}
+
+BRICK_COUNT=4
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1 $H0:$B0/${V0}2 $H0:$B0/${V0}3
+## set subvols-per-dir option
+TEST $CLI volume set $V0 subvols-per-directory 3
+TEST $CLI volume start $V0
+
+## Mount FUSE
+TEST glusterfs -s $H0 --volfile-id $V0 $M0 --entry-timeout=0 --attribute-timeout=0;
+
+TEST ls -l $M0
+
+## kill 2 bricks to bring down available subvol < spread count
+kill_brick $V0 $H0 $B0/${V0}2
+kill_brick $V0 $H0 $B0/${V0}3
+
+mkdir $M0/dir1 2>/dev/null
+
+get_layout $B0/${V0}0/dir1 $B0/${V0}1/dir1
+EXPECT "0" echo $?
+
+cleanup;
diff --git a/tests/bugs/glusterfs/bug-906646.t b/tests/bugs/glusterfs/bug-906646.t
new file mode 100644
index 00000000000..37b8fe5c8eb
--- /dev/null
+++ b/tests/bugs/glusterfs/bug-906646.t
@@ -0,0 +1,97 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+REPLICA=2
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica $REPLICA $H0:$B0/${V0}-00 $H0:$B0/${V0}-01 $H0:$B0/${V0}-10 $H0:$B0/${V0}-11
+TEST $CLI volume start $V0
+
+TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+
+## Mount FUSE with caching disabled
+TEST $GFS -s $H0 --volfile-id $V0 $M0;
+
+function xattr_query_check()
+{
+ local path=$1
+ local xa_name=$2
+
+ local ret=$(getfattr -n $xa_name $path 2>&1 | grep -o "$xa_name: No such attribute" | wc -l)
+ echo $ret
+}
+
+function set_xattr()
+{
+ local path=$1
+ local xa_name=$2
+ local xa_val=$3
+
+ setfattr -n $xa_name -v $xa_val $path
+ echo $?
+}
+
+function remove_xattr()
+{
+ local path=$1
+ local xa_name=$2
+
+ setfattr -x $xa_name $path
+ echo $?
+}
+
+f=f00f
+pth=$M0/$f
+
+TEST touch $pth
+
+# fetch backend paths
+backend_paths=`get_backend_paths $pth`
+
+# convert it into and array
+backend_paths_array=($backend_paths)
+
+# setxattr xattr for this file
+EXPECT 0 set_xattr $pth "trusted.name" "test"
+
+# confirm the set on backend
+EXPECT 0 xattr_query_check ${backend_paths_array[0]} "trusted.name"
+EXPECT 0 xattr_query_check ${backend_paths_array[1]} "trusted.name"
+
+brick_path=`echo ${backend_paths_array[0]} | sed -n 's/\(.*\)\/'$f'/\1/p'`
+brick_id=`$CLI volume info $V0 | grep "Brick[[:digit:]]" | grep -n $brick_path | cut -f1 -d:`
+
+# Kill a brick process
+TEST kill_brick $V0 $H0 $brick_path
+
+# remove the xattr from the mount point
+EXPECT 0 remove_xattr $pth "trusted.name"
+
+# we killed ${backend_paths[0]} - so expect the xattr to be there
+# on the backend there
+EXPECT 0 xattr_query_check ${backend_paths_array[0]} "trusted.name"
+EXPECT 1 xattr_query_check ${backend_paths_array[1]} "trusted.name"
+
+# restart the brick process
+TEST $CLI volume start $V0 force
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 3
+
+TEST $CLI volume heal $V0
+
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+# check backends - xattr should not be present anywhere
+EXPECT 1 xattr_query_check ${backend_paths_array[0]} "trusted.name"
+EXPECT 1 xattr_query_check ${backend_paths_array[1]} "trusted.name"
+
+cleanup;
diff --git a/tests/bugs/glusterfs/getlk_owner.c b/tests/bugs/glusterfs/getlk_owner.c
new file mode 100644
index 00000000000..cbe277318c1
--- /dev/null
+++ b/tests/bugs/glusterfs/getlk_owner.c
@@ -0,0 +1,124 @@
+#include <stdio.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <string.h>
+
+#define GETLK_OWNER_CHECK(f, cp, label) \
+ do { \
+ switch (f.l_type) { \
+ case F_RDLCK: \
+ case F_WRLCK: \
+ ret = 1; \
+ goto label; \
+ case F_UNLCK: \
+ if (!are_flocks_sane(&f, &cp)) { \
+ ret = 1; \
+ goto label; \
+ } \
+ break; \
+ } \
+ } while (0)
+
+void
+flock_init(struct flock *f, short int type, off_t start, off_t len)
+{
+ f->l_type = type;
+ f->l_start = start;
+ f->l_len = len;
+}
+
+int
+flock_cp(struct flock *dst, struct flock *src)
+{
+ memcpy((void *)dst, (void *)src, sizeof(struct flock));
+}
+
+int
+are_flocks_sane(struct flock *src, struct flock *cpy)
+{
+ return ((src->l_whence == cpy->l_whence) &&
+ (src->l_start == cpy->l_start) && (src->l_len == cpy->l_len));
+}
+
+/*
+ * Test description:
+ * SETLK (0,3), F_WRLCK
+ * SETLK (3,3), F_WRLCK
+ *
+ * the following GETLK requests must return flock struct unmodified
+ * except for l_type to F_UNLCK
+ * GETLK (3,3), F_WRLCK
+ * GETLK (3,3), F_RDLCK
+ *
+ * */
+
+int
+main(int argc, char **argv)
+{
+ int fd = -1;
+ int ret = 1;
+ char *fname = NULL;
+ struct flock f = {
+ 0,
+ };
+ struct flock cp = {
+ 0,
+ };
+
+ if (argc < 2)
+ goto out;
+
+ fname = argv[1];
+ fd = open(fname, O_RDWR);
+ if (fd == -1) {
+ perror("open");
+ goto out;
+ }
+
+ flock_init(&f, F_WRLCK, 0, 3);
+ flock_cp(&cp, &f);
+ ret = fcntl(fd, F_SETLK, &f);
+ if (ret) {
+ perror("fcntl");
+ goto out;
+ }
+ if (!are_flocks_sane(&f, &cp)) {
+ ret = 1;
+ goto out;
+ }
+
+ flock_init(&f, F_WRLCK, 3, 3);
+ flock_cp(&cp, &f);
+ ret = fcntl(fd, F_SETLK, &f);
+ if (ret) {
+ perror("fcntl");
+ goto out;
+ }
+ if (!are_flocks_sane(&f, &cp)) {
+ ret = 1;
+ goto out;
+ }
+
+ flock_init(&f, F_WRLCK, 3, 3);
+ flock_cp(&cp, &f);
+ ret = fcntl(fd, F_GETLK, &f);
+ if (ret) {
+ perror("fcntl");
+ return 1;
+ }
+ GETLK_OWNER_CHECK(f, cp, out);
+
+ flock_init(&f, F_RDLCK, 3, 3);
+ flock_cp(&cp, &f);
+ ret = fcntl(fd, F_GETLK, &f);
+ if (ret) {
+ perror("fcntl");
+ return 1;
+ }
+ GETLK_OWNER_CHECK(f, cp, out);
+
+out:
+ if (fd != -1)
+ close(fd);
+ return ret;
+}
diff --git a/tests/bugs/heal-symlinks.t b/tests/bugs/heal-symlinks.t
new file mode 100644
index 00000000000..ecd2b525be1
--- /dev/null
+++ b/tests/bugs/heal-symlinks.t
@@ -0,0 +1,65 @@
+#!/bin/bash
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../afr.rc
+cleanup;
+
+###############################################################################
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
+
+cd $M0
+TEST "echo hello_world > FILE"
+TEST ln -s FILE SOFTLINK
+
+# Remove symlink only (not the .glusterfs entry) and trigger named heal.
+TEST rm -f $B0/${V0}2/SOFTLINK
+TEST stat SOFTLINK
+
+# To heal and clear new-entry mark on source bricks.
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+EXPECT 2 stat -c %h $B0/${V0}2/SOFTLINK
+EXPECT "hello_world" cat $B0/${V0}2/SOFTLINK
+
+cd -
+cleanup
+###############################################################################
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
+
+cd $M0
+TEST "echo hello_world > FILE"
+TEST ln -s FILE SOFTLINK
+
+# Remove symlink only (not the .glusterfs entry) and trigger named heal.
+TEST rm -f $B0/${V0}2/SOFTLINK
+TEST stat SOFTLINK
+
+# To heal and clear new-entry mark on source bricks.
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+EXPECT 2 stat -c %h $B0/${V0}2/SOFTLINK
+TEST kill_brick $V0 $H0 $B0/${V0}0
+cd -
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
+cd $M0
+EXPECT "hello_world" cat SOFTLINK
+
+cd -
+cleanup
+###############################################################################
diff --git a/tests/bugs/index/bug-1559004-EMLINK-handling.t b/tests/bugs/index/bug-1559004-EMLINK-handling.t
new file mode 100644
index 00000000000..5596fa56c4c
--- /dev/null
+++ b/tests/bugs/index/bug-1559004-EMLINK-handling.t
@@ -0,0 +1,91 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+function create_fake_links() {
+ local dst="$1"
+ local dir="$2"
+ local end=0
+ local start=0
+ local src
+
+ src="$(ls ${dst}/.glusterfs/indices/${dir}/${dir}-* | head -1)"
+ mkdir -p ${dst}/.glusterfs/dummy/${dir}
+ while ln ${src} ${dst}/.glusterfs/dummy/${dir}/link-${end}; do
+ end="$((${end} + 1))"
+ done
+
+ if [[ ${end} -gt 50 ]]; then
+ start="$((${end} - 50))"
+ fi
+ if [[ ${end} -gt 0 ]]; then
+ end="$((${end} - 1))"
+ fi
+
+ for i in $(seq ${start} ${end}); do
+ rm -f ${dst}/.glusterfs/dummy/${dir}/link-${i}
+ done
+}
+
+function count_fake_links() {
+ local dst="$1"
+ local dir="$2"
+
+ echo "$(find ${dst}/.glusterfs/dummy/${dir}/ -name "link-*" | wc -l)"
+}
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+TEST touch $B0/ext4-1
+TEST touch $B0/ext4-2
+TEST touch $B0/ext4-3
+TEST truncate -s 2GB $B0/ext4-1
+TEST truncate -s 2GB $B0/ext4-2
+TEST truncate -s 2GB $B0/ext4-3
+
+TEST mkfs.ext4 -F $B0/ext4-1
+TEST mkfs.ext4 -F $B0/ext4-2
+TEST mkfs.ext4 -F $B0/ext4-3
+TEST mkdir $B0/ext41
+TEST mkdir $B0/ext42
+TEST mkdir $B0/ext43
+TEST mount -t ext4 -o loop $B0/ext4-1 $B0/ext41
+TEST mount -t ext4 -o loop $B0/ext4-2 $B0/ext42
+TEST mount -t ext4 -o loop $B0/ext4-3 $B0/ext43
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/ext4{1,2,3}
+TEST $CLI volume start $V0
+TEST $CLI volume heal $V0 granular-entry-heal enable
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+TEST kill_brick $V0 $H0 $B0/ext41
+
+# Make sure indices exist and are initialized
+TEST touch $M0/dummy
+
+# Create enough hard links on bricks to make it fail faster. This is much
+# faster than creating ~70000 files on a volume.
+create_fake_links $B0/ext42 xattrop &
+create_fake_links $B0/ext42 entry-changes &
+wait
+count_xattrop="$(count_fake_links $B0/ext42 xattrop)"
+count_entry="$(count_fake_links $B0/ext42 entry-changes)"
+
+TEST mkdir $M0/d{1..10}
+TEST touch $M0/d{1..10}/{1..10}
+
+#On ext4 max number of hardlinks is ~65k, so there should be 2 base index files
+EXPECT "^2$" echo $(ls $B0/ext42/.glusterfs/indices/xattrop | grep xattrop | wc -l)
+EXPECT "^2$" echo $(ls $B0/ext42/.glusterfs/indices/entry-changes | grep entry-changes | wc -l)
+
+#Number of hardlinks: count_xattrop/count_entry for fake links, 101 for files,
+# 10 for dirs and 2 for base-indices and root-dir for xattrop
+EXPECT "$((${count_xattrop} + 114))" echo $(ls -l $B0/ext42/.glusterfs/indices/xattrop | grep xattrop | awk '{sum+=$2} END{print sum}')
+EXPECT "$((${count_entry} + 113))" echo $(ls -l $B0/ext42/.glusterfs/indices/entry-changes | grep entry-changes | awk '{sum+=$2} END{print sum}')
+
+cleanup
diff --git a/tests/bugs/io-cache/bug-858242.c b/tests/bugs/io-cache/bug-858242.c
new file mode 100644
index 00000000000..ac87a15533e
--- /dev/null
+++ b/tests/bugs/io-cache/bug-858242.c
@@ -0,0 +1,84 @@
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+int
+main(int argc, char *argv[])
+{
+ char *filename = NULL, *volname = NULL, *cmd = NULL;
+ char buffer[1024] = {
+ 0,
+ };
+ int fd = -1;
+ int ret = -1;
+ struct stat statbuf = {
+ 0,
+ };
+
+ if (argc != 3) {
+ fprintf(stderr, "usage: %s <file-name> <volname>\n", argv[0]);
+ goto out;
+ }
+
+ filename = argv[1];
+ volname = argv[2];
+
+ fd = open(filename, O_RDWR | O_CREAT, 0);
+ if (fd < 0) {
+ fprintf(stderr, "open (%s) failed (%s)\n", filename, strerror(errno));
+ goto out;
+ }
+
+ ret = write(fd, "test-content", 12);
+ if (ret < 0) {
+ fprintf(stderr, "write failed (%s)", strerror(errno));
+ goto out;
+ }
+
+ ret = fsync(fd);
+ if (ret < 0) {
+ fprintf(stderr, "fsync failed (%s)", strerror(errno));
+ goto out;
+ }
+
+ ret = fstat(fd, &statbuf);
+ if (ret < 0) {
+ fprintf(stderr, "fstat failed (%s)", strerror(errno));
+ goto out;
+ }
+
+ ret = asprintf(&cmd, "gluster --mode=script volume stop %s force", volname);
+ if (ret < 0) {
+ fprintf(stderr, "cannot construct cli command string (%s)",
+ strerror(errno));
+ goto out;
+ }
+
+ ret = system(cmd);
+ if (ret < 0) {
+ fprintf(stderr, "stopping volume (%s) failed", volname);
+ goto out;
+ }
+
+ sleep(3);
+
+ ret = read(fd, buffer, 1024);
+ if (ret >= 0) {
+ fprintf(stderr,
+ "read should've returned error, "
+ "but is successful\n");
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
diff --git a/tests/bugs/io-cache/bug-858242.t b/tests/bugs/io-cache/bug-858242.t
new file mode 100755
index 00000000000..0c8ffb6ab30
--- /dev/null
+++ b/tests/bugs/io-cache/bug-858242.t
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST $CLI volume set $V0 performance.quick-read off
+
+#mount on a random dir
+TEST glusterfs --entry-timeout=3600 --attribute-timeout=3600 -s $H0 --volfile-id=$V0 $M0 --direct-io-mode=yes
+
+build_tester $(dirname $0)/bug-858242.c
+
+TEST $(dirname $0)/bug-858242 $M0/testfile $V0
+
+TEST rm -rf $(dirname $0)/858242
+cleanup;
+
diff --git a/tests/bugs/io-cache/bug-read-hang.c b/tests/bugs/io-cache/bug-read-hang.c
new file mode 100644
index 00000000000..e1fae97e7e8
--- /dev/null
+++ b/tests/bugs/io-cache/bug-read-hang.c
@@ -0,0 +1,125 @@
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define NO_INIT 1
+
+int count = 0;
+void
+read_cbk(glfs_fd_t *fd, ssize_t ret, void *data)
+{
+ count++;
+}
+
+glfs_t *
+setup_new_client(char *hostname, char *volname, char *log_file, int flag)
+{
+ int ret = 0;
+ glfs_t *fs = NULL;
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ fprintf(stderr, "\nglfs_new: returned NULL (%s)\n", strerror(errno));
+ goto error;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ if (ret < 0) {
+ fprintf(stderr, "\nglfs_set_volfile_server failed ret:%d (%s)\n", ret,
+ strerror(errno));
+ goto error;
+ }
+
+ ret = glfs_set_logging(fs, log_file, 7);
+ if (ret < 0) {
+ fprintf(stderr, "\nglfs_set_logging failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ goto error;
+ }
+
+ if (flag == NO_INIT)
+ goto out;
+
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ fprintf(stderr, "\nglfs_init failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ goto error;
+ }
+
+out:
+ return fs;
+error:
+ return NULL;
+}
+
+int
+main(int argc, char *argv[])
+{
+ int ret = 0;
+ glfs_t *fs = NULL;
+ struct glfs_fd *fd = NULL;
+ char *volname = NULL;
+ char *log_file = NULL;
+ char *hostname = NULL;
+ char *buf = NULL;
+ struct stat stat;
+
+ if (argc != 4) {
+ fprintf(
+ stderr,
+ "Expect following args %s <hostname> <Vol> <log file location>\n",
+ argv[0]);
+ return -1;
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+ log_file = argv[3];
+
+ fs = setup_new_client(hostname, volname, log_file, 0);
+ if (!fs) {
+ fprintf(stderr, "\nsetup_new_client: returned NULL (%s)\n",
+ strerror(errno));
+ goto error;
+ }
+
+ fd = glfs_opendir(fs, "/");
+ if (!fd) {
+ fprintf(stderr, "/: %s\n", strerror(errno));
+ return -1;
+ }
+
+ glfs_readdirplus(fd, &stat);
+
+ fd = glfs_open(fs, "/test", O_RDWR);
+ if (fd == NULL) {
+ fprintf(stderr, "glfs_open: returned NULL\n");
+ goto error;
+ }
+
+ buf = (char *)malloc(5);
+
+ ret = glfs_pread(fd, buf, 5, 0, 0, NULL);
+ if (ret < 0) {
+ fprintf(stderr, "Read(%s): %d (%s)\n", "test", ret, strerror(errno));
+ return ret;
+ }
+
+ free(buf);
+ glfs_close(fd);
+
+ ret = glfs_fini(fs);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_fini failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+
+ return 0;
+error:
+ return -1;
+}
diff --git a/tests/bugs/io-cache/bug-read-hang.t b/tests/bugs/io-cache/bug-read-hang.t
new file mode 100755
index 00000000000..f8efe281723
--- /dev/null
+++ b/tests/bugs/io-cache/bug-read-hang.t
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+#. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+#Basic checks
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1..2};
+
+TEST $CLI volume set $V0 features.cache-invalidation on
+TEST $CLI volume set $V0 features.cache-invalidation-timeout 600
+TEST $CLI volume set $V0 performance.cache-invalidation on
+TEST $CLI volume set $V0 performance.md-cache-timeout 600
+TEST $CLI volume set $V0 performance.cache-samba-metadata on
+TEST $CLI volume set $V0 open-behind off
+
+logdir=`gluster --print-logdir`
+
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+echo "Hello" > $M0/test
+
+TEST build_tester $(dirname $0)/bug-read-hang.c -lgfapi
+TEST $(dirname $0)/bug-read-hang $H0 $V0 $logdir/bug-read-hang.log
+
+cleanup_tester $(dirname $0)/bug-read-hang
+
+cleanup;
diff --git a/tests/bugs/io-stats/bug-1598548.t b/tests/bugs/io-stats/bug-1598548.t
new file mode 100755
index 00000000000..19b0c053d08
--- /dev/null
+++ b/tests/bugs/io-stats/bug-1598548.t
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+
+checkdumpthread () {
+ local brick_pid=$(get_brick_pid $1 $2 $3)
+ local thread_count=$(gstack $brick_pid | grep -c _ios_dump_thread)
+ echo $thread_count
+}
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}0
+TEST $CLI volume start $V0
+
+TEST $CLI volume profile $V0 start
+EXPECT 0 checkdumpthread $V0 $H0 $B0/${V0}0
+
+TEST $CLI volume set $V0 diagnostics.stats-dump-interval 3
+EXPECT 1 checkdumpthread $V0 $H0 $B0/${V0}0
+
+TEST $CLI volume set $V0 diagnostics.stats-dump-interval 10
+EXPECT 1 checkdumpthread $V0 $H0 $B0/${V0}0
+
+TEST $CLI volume set $V0 diagnostics.stats-dump-interval 0
+EXPECT 0 checkdumpthread $V0 $H0 $B0/${V0}0
+
+TEST $CLI volume set $V0 diagnostics.stats-dump-interval 7
+EXPECT 1 checkdumpthread $V0 $H0 $B0/${V0}0
+
+TEST $CLI volume set $V0 diagnostics.stats-dump-interval 0
+EXPECT 0 checkdumpthread $V0 $H0 $B0/${V0}0
+
+TEST $CLI volume set $V0 diagnostics.stats-dump-interval 11
+EXPECT 1 checkdumpthread $V0 $H0 $B0/${V0}0
+
+cleanup;
diff --git a/tests/bugs/logging/bug-823081.t b/tests/bugs/logging/bug-823081.t
new file mode 100755
index 00000000000..bd1965d2d49
--- /dev/null
+++ b/tests/bugs/logging/bug-823081.t
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+cmd_log_history="cmd_history.log"
+V1=patchy2
+
+TEST glusterd
+TEST pidof glusterd
+
+logdir=`gluster --print-logdir`
+function set_tail ()
+{
+ vol=$1;
+ tail_success="volume create $vol $H0:$B0/${vol}1 $H0:$B0/${vol}2 : SUCCESS"
+ tail_failure="volume create $vol $H0:$B0/${vol}1 $H0:$B0/${vol}2 : FAILED : Volume $vol already exists"
+ tail_success_force="volume create $vol $H0:$B0/${vol}1 $H0:$B0/${vol}2 force : SUCCESS"
+ tail_failure_force="volume create $vol $H0:$B0/${vol}1 $H0:$B0/${vol}2 force : FAILED : Volume $vol already exists"
+}
+
+set_tail $V0;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+tail=`tail -n 1 $logdir/$cmd_log_history | cut -d " " -f 6-`
+TEST [[ \"$tail\" == \"$tail_success\" ]]
+
+TEST ! $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+tail=`tail -n 1 $logdir/$cmd_log_history | cut -d " " -f 6-`
+TEST [[ \"$tail\" == \"$tail_failure\" ]]
+
+set_tail $V1;
+TEST gluster volume create $V1 $H0:$B0/${V1}{1,2} force;
+tail=`tail -n 1 $logdir/$cmd_log_history | cut -d " " -f 6-`
+TEST [[ \"$tail\" == \"$tail_success_force\" ]]
+
+TEST ! gluster volume create $V1 $H0:$B0/${V1}{1,2} force;
+tail=`tail -n 1 $logdir/$cmd_log_history | cut -d " " -f 6-`
+TEST [[ \"$tail\" == \"$tail_failure_force\" ]]
+
+cleanup;
diff --git a/tests/bugs/md-cache/afr-stale-read.t b/tests/bugs/md-cache/afr-stale-read.t
new file mode 100755
index 00000000000..7cee5afe27e
--- /dev/null
+++ b/tests/bugs/md-cache/afr-stale-read.t
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+#. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+#Basic checks
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1..2};
+
+TEST $CLI volume set $V0 features.cache-invalidation on
+TEST $CLI volume set $V0 features.cache-invalidation-timeout 600
+TEST $CLI volume set $V0 performance.cache-invalidation on
+TEST $CLI volume set $V0 performance.md-cache-timeout 600
+TEST $CLI volume set $V0 performance.cache-samba-metadata on
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 read-subvolume $V0-client-0
+TEST $CLI volume set $V0 performance.quick-read off
+
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M1
+
+#Write some data from M0 and read it from M1,
+#so that M1 selects a read subvol, and caches the lookup
+TEST `echo "one" > $M0/file1`
+EXPECT "one" cat $M1/file1
+
+#Fail few writes from M0 on brick-0, as a result of this failure
+#upcall in brick-0 will invalidate the read subvolume of M1.
+TEST chattr +i $B0/${V0}1/file1
+TEST `echo "two" > $M0/file1`
+TEST `echo "three" > $M0/file1`
+TEST `echo "four" > $M0/file1`
+TEST `echo "five" > $M0/file1`
+
+EXPECT_WITHIN $MDC_TIMEOUT "five" cat $M1/file1
+TEST chattr -i $B0/${V0}1/file1
+cleanup;
diff --git a/tests/bugs/md-cache/bug-1211863.t b/tests/bugs/md-cache/bug-1211863.t
new file mode 100755
index 00000000000..ba9bde9fee8
--- /dev/null
+++ b/tests/bugs/md-cache/bug-1211863.t
@@ -0,0 +1,69 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+## 1. Start glusterd
+TEST glusterd;
+
+## 2. Lets create volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2,3};
+
+## 3. Start the volume
+TEST $CLI volume start $V0
+
+## 4. Enable the upcall xlator, and increase the md-cache timeout to max
+TEST $CLI volume set $V0 performance.md-cache-timeout 600
+TEST $CLI volume set $V0 performance.xattr-cache-list "user.*"
+
+## 6. Create two gluster mounts
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M1
+
+## 8. Create a file
+TEST touch $M0/file1
+
+## 9. Setxattr from mount-0
+TEST "setfattr -n user.DOSATTRIB -v "abc" $M0/file1"
+## 10. Getxattr from mount-1, this should return the correct value as it is a fresh getxattr
+TEST "getfattr -n user.DOSATTRIB $M1/file1 | grep -q abc"
+
+## 11. Now modify the same xattr from mount-0 again
+TEST "setfattr -n user.DOSATTRIB -v "xyz" $M0/file1"
+## 12. Since the xattr is already cached in mount-1 it returns the old xattr
+ #value, until the timeout (600)
+TEST "getfattr -n user.DOSATTRIB $M1/file1 | grep -q abc"
+
+## 13. Unmount to clean all the cache
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M1
+
+TEST $CLI volume set $V0 features.cache-invalidation on
+TEST $CLI volume set $V0 features.cache-invalidation-timeout 600
+TEST $CLI volume set $V0 performance.cache-invalidation on
+
+## 19. Restart the volume to restart the bick process
+TEST $CLI volume stop $V0
+TEST $CLI volume start $V0
+
+## 21. Create two gluster mounts
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M1
+
+## 23. Repeat the tests 11-14, but this time since cache invalidation is on,
+ #the getxattr will reflect the new value
+TEST "setfattr -n user.DOSATTRIB -v "abc" $M0/file1"
+TEST "getfattr -n user.DOSATTRIB $M1/file1 | grep -q abc"
+TEST "setfattr -n user.DOSATTRIB -v "xyz" $M0/file1"
+sleep 2; #There can be a very very small window where the next getxattr
+ #reaches md-cache, before the cache-invalidation caused by previous
+ #setxattr, reaches md-cache. Hence sleeping for 2 sec.
+ #Also it should not be > 600.
+TEST "getfattr -n user.DOSATTRIB $M1/file1 | grep -q xyz"
+
+TEST $CLI volume set $V0 cache-samba-metadata off
+EXPECT 'off' volinfo_field $V0 'performance.cache-samba-metadata'
+
+cleanup;
diff --git a/tests/bugs/md-cache/bug-1211863_unlink.t b/tests/bugs/md-cache/bug-1211863_unlink.t
new file mode 100755
index 00000000000..34392ed919f
--- /dev/null
+++ b/tests/bugs/md-cache/bug-1211863_unlink.t
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+## Start glusterd
+TEST glusterd;
+
+## Lets create volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2,3};
+
+## Start the volume
+TEST $CLI volume start $V0
+
+## Enable the upcall xlator, and increase the md-cache timeout to max
+TEST $CLI volume set $V0 features.cache-invalidation on
+TEST $CLI volume set $V0 features.cache-invalidation-timeout 600
+TEST $CLI volume set $V0 performance.cache-invalidation on
+TEST $CLI volume set $V0 performance.md-cache-timeout 600
+TEST $CLI volume set $V0 performance.cache-samba-metadata on
+
+## Create two gluster mounts
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M1
+
+## Create files and directories from M0
+TEST mkdir $M0/dir1
+TEST touch $M0/dir1/file{1..5}
+
+## Lookup few files from M1, so that md-cache cahces
+TEST ls -l $M1/dir1/file2
+TEST ls -l $M1/dir1/file3
+
+## Remove the looked up file from M0
+TEST rm $M0/dir1/file2
+TEST mv $M0/dir1/file3 $M0/dir1/file6
+
+## Check if the files are not visible from both M0 and M1
+EXPECT_WITHIN $MDC_TIMEOUT "N" path_exists $M0/dir1/file2
+EXPECT_WITHIN $MDC_TIMEOUT "N" path_exists $M0/dir1/file3
+EXPECT_WITHIN $MDC_TIMEOUT "Y" path_exists $M0/dir1/file6
+
+EXPECT_WITHIN $MDC_TIMEOUT "N" path_exists $M1/dir1/file2
+EXPECT_WITHIN $MDC_TIMEOUT "N" path_exists $M1/dir1/file3
+EXPECT_WITHIN $MDC_TIMEOUT "Y" path_exists $M1/dir1/file6
+
+cleanup;
diff --git a/tests/bugs/md-cache/bug-1476324.t b/tests/bugs/md-cache/bug-1476324.t
new file mode 100644
index 00000000000..c34f412a15e
--- /dev/null
+++ b/tests/bugs/md-cache/bug-1476324.t
@@ -0,0 +1,27 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2,3};
+
+TEST $CLI volume start $V0
+
+TEST $CLI volume set $V0 performance.md-cache-timeout 600
+TEST $CLI volume set $V0 performance.cache-samba-metadata on
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+TEST touch $M0/file1
+
+TEST "setfattr -n user.DOSATTRIB -v 0sAAOW $M0/file1"
+TEST "getfattr -n user.DOSATTRIB $M0/file1 -e base64 | grep -q 0sAAOW"
+
+TEST "setfattr -n user.DOSATTRIB -v 0x00ff $M0/file1"
+TEST "getfattr -n user.DOSATTRIB $M0/file1 -e hex | grep -q 0x00ff"
+
+cleanup;
diff --git a/tests/bugs/md-cache/bug-1632503.t b/tests/bugs/md-cache/bug-1632503.t
new file mode 100755
index 00000000000..aeb57f65639
--- /dev/null
+++ b/tests/bugs/md-cache/bug-1632503.t
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+TESTS_EXPECTED_IN_LOOP=5
+
+TEST glusterd;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2,3};
+
+TEST $CLI volume start $V0
+
+TEST $CLI volume set $V0 performance.md-cache-timeout 600
+TEST $CLI volume set $V0 performance.md-cache-statfs on
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+for i in $(seq 1 5); do
+ TEST_IN_LOOP df $M0;
+done
+
+cleanup;
diff --git a/tests/bugs/md-cache/bug-1726205.t b/tests/bugs/md-cache/bug-1726205.t
new file mode 100644
index 00000000000..795130e9bd8
--- /dev/null
+++ b/tests/bugs/md-cache/bug-1726205.t
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2,3};
+
+TEST $CLI volume start $V0
+
+TEST $CLI volume set $V0 group samba
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+TEST touch $M0/file
+TEST "setfattr -n "user.DosStream.Zone.Identifier:\$DATA" -v '\0' $M0/file"
+TEST "getfattr -n "user.DosStream.Zone.Identifier:\$DATA" -e hex $M0/file | grep -q 0x00"
+
+cleanup;
diff --git a/tests/bugs/md-cache/setxattr-prepoststat.t b/tests/bugs/md-cache/setxattr-prepoststat.t
new file mode 100755
index 00000000000..01fa768299c
--- /dev/null
+++ b/tests/bugs/md-cache/setxattr-prepoststat.t
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+## 1. Start glusterd
+TEST glusterd;
+
+## 2. Lets create volume
+TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
+
+TEST $CLI volume set $V0 group metadata-cache
+TEST $CLI volume set $V0 performance.xattr-cache-list "user.*"
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M1
+
+TEST touch $M0/file1
+TEST `echo "abakjshdjahskjdhakjhdskjac" >> $M0/file1`
+size=`stat -c '%s' $M0/file1`
+
+## Setxattr from mount-0
+TEST "setfattr -n user.DOSATTRIB -v "abc" $M0/file1"
+EXPECT $size stat -c '%s' $M0/file1
+
+## Getxattr from mount-1, this should return the correct value
+TEST "getfattr -n user.DOSATTRIB $M1/file1 | grep -q abc"
+
+TEST "setfattr -x user.DOSATTRIB $M1/file1"
+EXPECT $size stat -c '%s' $M1/file1
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M1
+
+cleanup;
diff --git a/tests/bugs/nfs/bug-1053579.t b/tests/bugs/nfs/bug-1053579.t
new file mode 100755
index 00000000000..2f53172e24c
--- /dev/null
+++ b/tests/bugs/nfs/bug-1053579.t
@@ -0,0 +1,114 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup
+
+# prepare the users and groups
+NEW_USER=bug1053579
+NEW_UID=1053579
+NEW_GID=1053579
+LAST_GID=1053779
+NEW_GIDS=${NEW_GID}
+
+# OS-specific overrides
+case $OSTYPE in
+NetBSD|Darwin)
+ # only NGROUPS_MAX=16 secondary groups are supported
+ LAST_GID=1053593
+ ;;
+FreeBSD)
+ # NGROUPS_MAX=1023 (FreeBSD>=8.0), we can afford 200 groups
+ ;;
+Linux)
+ # NGROUPS_MAX=65536, we can afford 200 groups
+ ;;
+*)
+ ;;
+esac
+
+# create a user that belongs to many groups
+for GID in $(seq -f '%6.0f' ${NEW_GID} ${LAST_GID})
+do
+ groupadd -o -g ${GID} ${NEW_USER}-${GID}
+ NEW_GIDS="${NEW_GIDS},${NEW_USER}-${GID}"
+done
+TEST useradd -o -M -u ${NEW_UID} -g ${NEW_GID} -G ${NEW_USER}-${NEW_GIDS} ${NEW_USER}
+
+# preparation done, start the tests
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}1
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume set $V0 nfs.server-aux-gids on
+TEST $CLI volume start $V0
+
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available
+
+# mount the volume
+TEST mount_nfs $H0:/$V0 $N0 nolock
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+# the actual test, this used to crash
+su -m ${NEW_USER} -c "stat $N0/. > /dev/null"
+TEST [ $? -eq 0 ]
+
+# create a file that only a user in a high-group can access
+echo 'Hello World!' > $N0/README
+chgrp ${LAST_GID} $N0/README
+chmod 0640 $N0/README
+
+#su -m ${NEW_USER} -c "cat $N0/README 2>&1 > /dev/null"
+su -m ${NEW_USER} -c "cat $N0/README"
+ret=$?
+
+case $OSTYPE in
+Linux) # Linux NFS fails with big GID
+ if [ $ret -ne 0 ] ; then
+ res="Y"
+ else
+ res="N"
+ fi
+ ;;
+*) # Other systems should cope better
+ if [ $ret -eq 0 ] ; then
+ res="Y"
+ else
+ res="N"
+ fi
+ ;;
+esac
+TEST [ "x$res" = "xY" ]
+
+# This passes only on build.gluster.org, not reproducible on other machines?!
+#su -m ${NEW_USER} -c "cat $M0/README 2>&1 > /dev/null"
+#TEST [ $? -ne 0 ]
+
+# enable server.manage-gids and things should work
+TEST $CLI volume set $V0 server.manage-gids on
+
+su -m ${NEW_USER} -c "cat $N0/README 2>&1 > /dev/null"
+TEST [ $? -eq 0 ]
+su -m ${NEW_USER} -c "cat $M0/README 2>&1 > /dev/null"
+TEST [ $? -eq 0 ]
+
+# cleanup
+userdel --force ${NEW_USER}
+for GID in $(seq -f '%6.0f' ${NEW_GID} ${LAST_GID})
+do
+ groupdel ${NEW_USER}-${GID}
+done
+
+rm -f $N0/README
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup
diff --git a/tests/bugs/nfs/bug-1143880-fix-gNFSd-auth-crash.t b/tests/bugs/nfs/bug-1143880-fix-gNFSd-auth-crash.t
new file mode 100644
index 00000000000..c360db4c91c
--- /dev/null
+++ b/tests/bugs/nfs/bug-1143880-fix-gNFSd-auth-crash.t
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2}
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume start $V0
+
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+
+TEST mount_nfs $H0:/$V0 $N0 nolock
+TEST mkdir -p $N0/foo
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+TEST mount_nfs $H0:/$V0/foo $N0 nolock
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+cleanup
diff --git a/tests/bugs/nfs/bug-1157223-symlink-mounting.t b/tests/bugs/nfs/bug-1157223-symlink-mounting.t
new file mode 100644
index 00000000000..dea609ed193
--- /dev/null
+++ b/tests/bugs/nfs/bug-1157223-symlink-mounting.t
@@ -0,0 +1,126 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume info;
+TEST $CLI volume create $V0 $H0:$B0/$V0
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0;
+
+## Wait for volume to register with rpc.mountd
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+
+## Mount NFS
+TEST mount_nfs $H0:/$V0 $N0 nolock;
+
+mkdir $N0/dir1;
+mkdir $N0/dir2;
+pushd $N0/ ;
+
+##link created using relative path
+ln -s dir1 symlink1;
+
+##relative path contains ".."
+ln -s ../dir1 dir2/symlink2;
+
+##link created using absolute path
+ln -s $N0/dir1 symlink3;
+
+##link pointing to another symlinks
+ln -s symlink1 symlink4
+ln -s symlink3 symlink5
+
+##dead links
+ln -s does/not/exist symlink6
+
+##link which contains ".." points out of glusterfs
+ln -s ../../ symlink7
+
+##links pointing to unauthorized area
+ln -s .glusterfs symlink8
+
+popd ;
+
+##Umount the volume
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+## Mount and umount NFS via directory
+TEST mount_nfs $H0:/$V0/dir1 $N0 nolock;
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+## Mount and umount NFS via symlink1
+TEST mount_nfs $H0:/$V0/symlink1 $N0 nolock;
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+## Mount and umount NFS via symlink2
+TEST mount_nfs $H0:/$V0/dir2/symlink2 $N0 nolock;
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+## Mount NFS via symlink3 should fail
+TEST ! mount_nfs $H0:/$V0/symlink3 $N0 nolock;
+
+## Mount and umount NFS via symlink4
+TEST mount_nfs $H0:/$V0/symlink4 $N0 nolock;
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+## Mount NFS via symlink5 should fail
+TEST ! mount_nfs $H0:/$V0/symlink5 $N0 nolock;
+
+## Mount NFS via symlink6 should fail
+TEST ! mount_nfs $H0:/$V0/symlink6 $N0 nolock;
+
+## Mount NFS via symlink7 should fail
+TEST ! mount_nfs $H0:/$V0/symlink7 $N0 nolock;
+
+## Mount NFS via symlink8 should fail
+TEST ! mount_nfs $H0:/$V0/symlink8 $N0 nolock;
+
+##Similar check for udp mount
+$CLI volume stop $V0
+TEST $CLI volume set $V0 nfs.mount-udp on
+$CLI volume start $V0
+
+## Wait for volume to register with rpc.mountd
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+
+## Mount and umount NFS via directory
+TEST mount_nfs $H0:/$V0/dir1 $N0 nolock,mountproto=udp,proto=tcp;
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+## Mount and umount NFS via symlink1
+TEST mount_nfs $H0:/$V0/symlink1 $N0 nolock,mountproto=udp,proto=tcp;
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+## Mount and umount NFS via symlink2
+TEST mount_nfs $H0:/$V0/dir2/symlink2 $N0 nolock,mountproto=udp,proto=tcp;
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+## Mount NFS via symlink3 should fail
+TEST ! mount_nfs $H0:/$V0/symlink3 $N0 nolock,mountproto=udp,proto=tcp;
+
+## Mount and umount NFS via symlink4
+TEST mount_nfs $H0:/$V0/symlink4 $N0 nolock,mountproto=udp,proto=tcp;
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+## Mount NFS via symlink5 should fail
+TEST ! mount_nfs $H0:/$V0/symlink5 $N0 nolock,mountproto=udp,proto=tcp;
+
+## Mount NFS via symlink6 should fail
+TEST ! mount_nfs $H0:/$V0/symlink6 $N0 nolock,mountproto=udp,proto=tcp;
+
+##symlink7 is not check here, because in udp mount ../../ resolves into root '/'
+
+## Mount NFS via symlink8 should fail
+TEST ! mount_nfs $H0:/$V0/symlink8 $N0 nolock,mountproto=udp,proto=tcp;
+
+rm -rf $H0:$B0/
+cleanup;
diff --git a/tests/bugs/nfs/bug-1161092-nfs-acls.t b/tests/bugs/nfs/bug-1161092-nfs-acls.t
new file mode 100644
index 00000000000..45a22e79336
--- /dev/null
+++ b/tests/bugs/nfs/bug-1161092-nfs-acls.t
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+TEST $CLI volume set $V0 nfs.disable false
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+TEST mount_nfs $H0:/$V0 $N0
+
+TEST touch $N0/file1
+TEST chmod 700 $N0/file1
+TEST getfacl $N0/file1
+
+TEST $CLI volume set $V0 root-squash on
+TEST getfacl $N0/file1
+
+TEST umount_nfs $H0:/$V0 $N0
+TEST mount_nfs $H0:/$V0 $N0
+TEST getfacl $N0/file1
+
+## Before killing daemon to avoid deadlocks
+umount_nfs $N0
+
+cleanup;
+
diff --git a/tests/bugs/nfs/bug-1166862.t b/tests/bugs/nfs/bug-1166862.t
new file mode 100755
index 00000000000..c4f51a2d446
--- /dev/null
+++ b/tests/bugs/nfs/bug-1166862.t
@@ -0,0 +1,69 @@
+#!/bin/bash
+#
+# When nfs.mount-rmtab is disabled, it should not get updated.
+#
+# Based on: bug-904065.t
+#
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+# count the lines of a file, return 0 if the file does not exist
+function count_lines()
+{
+ if [ -n "$1" ]
+ then
+ $@ 2>/dev/null | wc -l
+ else
+ echo 0
+ fi
+}
+
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../nfs.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/brick1
+EXPECT 'Created' volinfo_field $V0 'Status'
+TEST $CLI volume set $V0 nfs.disable false
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+# glusterfs/nfs needs some time to start up in the background
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+
+# disable the rmtab by settting it to the magic "/-" value
+TEST $CLI volume set $V0 nfs.mount-rmtab /-
+
+# before mounting the rmtab should be empty
+EXPECT '0' count_lines cat $GLUSTERD_WORKDIR/nfs/rmtab
+
+TEST mount_nfs $H0:/$V0 $N0 nolock
+EXPECT '0' count_lines cat $GLUSTERD_WORKDIR/nfs/rmtab
+
+# showmount should list one client
+EXPECT '1' count_lines showmount --no-headers $H0
+
+# unmount
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+# after resetting the option, the rmtab should get updated again
+TEST $CLI volume reset $V0 nfs.mount-rmtab
+
+# before mounting the rmtab should be empty
+EXPECT '0' count_lines cat $GLUSTERD_WORKDIR/nfs/rmtab
+
+TEST mount_nfs $H0:/$V0 $N0 nolock
+EXPECT '2' count_lines cat $GLUSTERD_WORKDIR/nfs/rmtab
+
+# removing a mount
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+EXPECT '0' count_lines cat $GLUSTERD_WORKDIR/nfs/rmtab
+
+cleanup
diff --git a/tests/bugs/nfs/bug-1210338.c b/tests/bugs/nfs/bug-1210338.c
new file mode 100644
index 00000000000..d4099244176
--- /dev/null
+++ b/tests/bugs/nfs/bug-1210338.c
@@ -0,0 +1,31 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+
+int
+main(int argc, char *argv[])
+{
+ int ret = -1;
+ int fd = -1;
+
+ fd = open(argv[1], O_CREAT | O_EXCL, 0644);
+
+ if (fd == -1) {
+ fprintf(stderr, "creation of the file %s failed (%s)\n", argv[1],
+ strerror(errno));
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ if (fd > 0)
+ close(fd);
+
+ return ret;
+}
diff --git a/tests/bugs/nfs/bug-1210338.t b/tests/bugs/nfs/bug-1210338.t
new file mode 100644
index 00000000000..b5c9245affd
--- /dev/null
+++ b/tests/bugs/nfs/bug-1210338.t
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+NFS_SOURCE=$(dirname $0)/bug-1210338.c
+NFS_EXEC=$(dirname $0)/excl_create
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+TEST mount_nfs $H0:/$V0 $N0 nolock
+
+build_tester $NFS_SOURCE -o $NFS_EXEC
+TEST [ -e $NFS_EXEC ]
+
+TEST $NFS_EXEC $N0/my_file
+
+rm -f $NFS_EXEC;
+
+cleanup
diff --git a/tests/bugs/nfs/bug-1302948.t b/tests/bugs/nfs/bug-1302948.t
new file mode 100755
index 00000000000..a2fb0e68ff0
--- /dev/null
+++ b/tests/bugs/nfs/bug-1302948.t
@@ -0,0 +1,13 @@
+#!/bin/bash
+# TEST the nfs.rdirplus option
+. $(dirname $0)/../../include.rc
+
+cleanup
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 nfs.rdirplus off
+TEST $CLI volume set $V0 nfs.rdirplus on
+cleanup
diff --git a/tests/bugs/nfs/bug-847622.t b/tests/bugs/nfs/bug-847622.t
new file mode 100755
index 00000000000..5ccee722ed9
--- /dev/null
+++ b/tests/bugs/nfs/bug-847622.t
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../nfs.rc
+. $(dirname $0)/../../volume.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+case $OSTYPE in
+NetBSD)
+ echo "Skip test on ACL which are not available on NetBSD" >&2
+ SKIP_TESTS
+ exit 0
+ ;;
+*)
+ ;;
+esac
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/brick0
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0
+
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+TEST mount_nfs $H0:/$V0 $N0 nolock
+cd $N0
+
+# simple getfacl setfacl commands
+TEST touch testfile
+TEST setfacl -m u:14:r testfile
+TEST getfacl testfile
+
+cd
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+cleanup
+
diff --git a/tests/bugs/nfs/bug-877885.t b/tests/bugs/nfs/bug-877885.t
new file mode 100755
index 00000000000..dca315a3d01
--- /dev/null
+++ b/tests/bugs/nfs/bug-877885.t
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../nfs.rc
+. $(dirname $0)/../../volume.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick0 $H0:$B0/brick1
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0
+
+## Mount FUSE with caching disabled
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 \
+$M0;
+
+TEST touch $M0/file
+TEST mkdir $M0/dir
+
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+TEST mount_nfs $H0:/$V0 $N0 nolock
+cd $N0
+
+rm -rf * &
+
+TEST mount_nfs $H0:/$V0 $N1 retry=0,nolock;
+
+cd;
+
+kill %1;
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N1
+
+cleanup
diff --git a/tests/bugs/nfs/bug-904065.t b/tests/bugs/nfs/bug-904065.t
new file mode 100755
index 00000000000..0eba86e7ee8
--- /dev/null
+++ b/tests/bugs/nfs/bug-904065.t
@@ -0,0 +1,100 @@
+#!/bin/bash
+#
+# This test does not use 'showmount' from the nfs-utils package, it would
+# require setting up a portmapper (either rpcbind or portmap, depending on the
+# Linux distribution used for testing). The persistancy of the rmtab should not
+# affect the current showmount outputs, so existing regression tests should be
+# sufficient.
+#
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+# count the lines of a file, return 0 if the file does not exist
+function count_lines()
+{
+ if [ -e "$1" ]
+ then
+ wc -l < $1
+ else
+ echo 0
+ fi
+}
+
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../nfs.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/brick1
+EXPECT 'Created' volinfo_field $V0 'Status'
+TEST $CLI volume set $V0 nfs.disable false
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+# glusterfs/nfs needs some time to start up in the background
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+
+# before mounting the rmtab should be empty
+EXPECT '0' count_lines $GLUSTERD_WORKDIR/nfs/rmtab
+
+TEST mount_nfs $H0:/$V0 $N0 nolock
+# the output would looks similar to:
+#
+# hostname-0=172.31.122.104
+# mountpoint-0=/ufo
+#
+EXPECT '2' count_lines $GLUSTERD_WORKDIR/nfs/rmtab
+
+# duplicate mounts should not be recorded (client could have crashed)
+TEST mount_nfs $H0:/$V0 $N1 nolock
+EXPECT '2' count_lines $GLUSTERD_WORKDIR/nfs/rmtab
+
+# removing a mount should (even if there are two) should remove the entry
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N1
+EXPECT '0' count_lines $GLUSTERD_WORKDIR/nfs/rmtab
+
+# unmounting the other mount should work flawlessly
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+EXPECT '0' count_lines $GLUSTERD_WORKDIR/nfs/rmtab
+
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 --volfile-server=$H0 --volfile-id=$V0 $M0
+
+# we'll create a fake rmtab here, similar to how an other storage server would do
+# using an invalid IP address to prevent (unlikely) collisions on the test-machine
+cat << EOF > $M0/rmtab
+hostname-0=127.0.0.256
+mountpoint-0=/ufo
+EOF
+EXPECT '2' count_lines $M0/rmtab
+
+# reconfigure merges the rmtab with the one on the volume
+TEST gluster volume set $V0 nfs.mount-rmtab $M0/rmtab
+
+# glusterfs/nfs needs some time to restart
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+
+# Apparently "is_nfs_export_available" might return even if the export is
+# not, in fact, available. (eyeroll) Give it a bit of extra time.
+#
+# TBD: fix the broken shell function instead of working around it here
+sleep 5
+
+# a new mount should be added to the rmtab, not overwrite exiting ones
+TEST mount_nfs $H0:/$V0 $N0 nolock
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '4' count_lines $M0/rmtab
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+EXPECT '2' count_lines $M0/rmtab
+
+# TODO: nfs/reconfigure() is never called and is therefor disabled. When the
+# NFS-server supports reloading and does not get restarted anymore, we should
+# add a test that includes the merging of entries in the old rmtab with the new
+# rmtab.
+
+cleanup
diff --git a/tests/bugs/nfs/bug-915280.t b/tests/bugs/nfs/bug-915280.t
new file mode 100755
index 00000000000..bd279157c25
--- /dev/null
+++ b/tests/bugs/nfs/bug-915280.t
@@ -0,0 +1,54 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+function volinfo_field()
+{
+ local vol=$1;
+ local field=$2;
+
+ $CLI volume info $vol | grep "^$field: " | sed 's/.*: //';
+}
+
+TEST $CLI volume create $V0 $H0:$B0/brick1 $H0:$B0/brick2;
+EXPECT 'Created' volinfo_field $V0 'Status';
+TEST $CLI volume set $V0 nfs.disable false
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+MOUNTDIR=$N0;
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+TEST mount_nfs $H0:/$V0 $N0 nolock,timeo=30,retrans=1
+TEST touch $N0/testfile
+
+TEST $CLI volume set $V0 debug.error-gen client
+TEST $CLI volume set $V0 debug.error-fops stat
+TEST $CLI volume set $V0 debug.error-failure 100
+
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+
+pid_file=$(read_nfs_pidfile);
+
+getfacl $N0/testfile 2>/dev/null
+
+nfs_pid=$(get_nfs_pid);
+if [ ! $nfs_pid ]
+then
+ nfs_pid=0;
+fi
+
+TEST [ $nfs_pid -eq $pid_file ]
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $MOUNTDIR
+
+cleanup;
diff --git a/tests/bugs/nfs/bug-970070.t b/tests/bugs/nfs/bug-970070.t
new file mode 100755
index 00000000000..61be4844e51
--- /dev/null
+++ b/tests/bugs/nfs/bug-970070.t
@@ -0,0 +1,13 @@
+#!/bin/bash
+# TEST the nfs.acl option
+. $(dirname $0)/../../include.rc
+
+cleanup
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 nfs.acl off
+TEST $CLI volume set $V0 nfs.acl on
+cleanup
diff --git a/tests/bugs/nfs/bug-974972.t b/tests/bugs/nfs/bug-974972.t
new file mode 100755
index 00000000000..975c46f85a4
--- /dev/null
+++ b/tests/bugs/nfs/bug-974972.t
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+#This script checks that nfs mount does not fail lookup on files with split-brain
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 cluster.eager-lock off
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+TEST mount_nfs $H0:/$V0 $N0
+TEST touch $N0/1
+TEST kill_brick ${V0} ${H0} ${B0}/${V0}1
+echo abc > $N0/1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" nfs_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_nfs $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_nfs $V0 1
+
+TEST kill_brick ${V0} ${H0} ${B0}/${V0}0
+echo def > $N0/1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" nfs_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_nfs $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_nfs $V0 1
+
+#Lookup should not fail
+TEST ls $N0/1
+TEST ! cat $N0/1
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+cleanup
diff --git a/tests/bugs/nfs/showmount-many-clients.t b/tests/bugs/nfs/showmount-many-clients.t
new file mode 100644
index 00000000000..c6c9c35d60a
--- /dev/null
+++ b/tests/bugs/nfs/showmount-many-clients.t
@@ -0,0 +1,43 @@
+#!/bin/bash
+#
+# The nfs.rpc-auth-allow volume option is used to generate the list of clients
+# that are displayed as able to mount the export. The "group" in the export
+# should be a list of all clients, identified by "name". In previous versions,
+# the "name" was the copied string from nfs.rpc-auth-allow. This is not
+# correct, as the volume option should be parsed and split into different
+# groups.
+#
+# When the single string is passed, this testcase fails when the
+# nfs.rpc-auth-allow volume option is longer than 256 characters. By splitting
+# the groups into their own structures, this testcase passes.
+#
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../nfs.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/brick1
+EXPECT 'Created' volinfo_field $V0 'Status'
+TEST $CLI volume set $V0 nfs.disable false
+
+CLIENTS=$(echo 127.0.0.{1..128} | tr ' ' ,)
+TEST $CLI volume set $V0 nfs.rpc-auth-allow ${CLIENTS}
+TEST $CLI volume set $V0 nfs.rpc-auth-reject all
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+# glusterfs/nfs needs some time to start up in the background
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+
+# showmount should not timeout (no reply is sent on error)
+TEST showmount -e $H0
+
+cleanup
diff --git a/tests/bugs/nfs/socket-as-fifo.py b/tests/bugs/nfs/socket-as-fifo.py
new file mode 100755
index 00000000000..eb507e1d30b
--- /dev/null
+++ b/tests/bugs/nfs/socket-as-fifo.py
@@ -0,0 +1,33 @@
+#
+# Create a unix domain socket and test if it is a socket (and not a fifo/pipe).
+#
+# Author: Niels de Vos <ndevos@redhat.com>
+#
+
+from __future__ import print_function
+import os
+import stat
+import sys
+import socket
+
+ret = 1
+
+if len(sys.argv) != 2:
+ print('Usage: %s <socket>' % (sys.argv[0]))
+ sys.exit(ret)
+
+path = sys.argv[1]
+
+sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+sock.bind(path)
+
+stbuf = os.stat(path)
+mode = stbuf.st_mode
+
+if stat.S_ISSOCK(mode):
+ ret = 0
+
+sock.close()
+os.unlink(path)
+
+sys.exit(ret)
diff --git a/tests/bugs/nfs/socket-as-fifo.t b/tests/bugs/nfs/socket-as-fifo.t
new file mode 100644
index 00000000000..d9b9e959ce3
--- /dev/null
+++ b/tests/bugs/nfs/socket-as-fifo.t
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+TEST mount_nfs $H0:/$V0 $N0 nolock
+
+# this is the actual test
+TEST $PYTHON $(dirname $0)/socket-as-fifo.py $N0/not-a-fifo.socket
+
+TEST umount_nfs $N0
+
+cleanup
diff --git a/tests/bugs/nfs/subdir-trailing-slash.t b/tests/bugs/nfs/subdir-trailing-slash.t
new file mode 100644
index 00000000000..6a114877ac7
--- /dev/null
+++ b/tests/bugs/nfs/subdir-trailing-slash.t
@@ -0,0 +1,32 @@
+#!/bin/bash
+#
+# Verify that mounting a subdir over NFS works, even with a trailing /
+#
+# For example:
+# mount -t nfs server.example.com:/volume/subdir/
+#
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0
+TEST $CLI volume set $V0 nfs.disable false
+
+TEST $CLI volume start $V0
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available
+
+TEST mount_nfs $H0:/$V0 $N0 nolock
+TEST mkdir -p $N0/subdir
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+TEST mount_nfs $H0:/$V0/subdir/ $N0 nolock
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+cleanup
diff --git a/tests/bugs/nfs/zero-atime.t b/tests/bugs/nfs/zero-atime.t
new file mode 100755
index 00000000000..2a940091ad9
--- /dev/null
+++ b/tests/bugs/nfs/zero-atime.t
@@ -0,0 +1,33 @@
+#!/bin/bash
+#
+# posix_do_utimes() sets atime and mtime to the values in the passed IATT. If
+# not set, these values are 0 and cause a atime/mtime set to the Epoch.
+#
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+TEST mount_nfs $H0:/$V0 $N0 nolock
+
+# create a file for testing
+TEST dd if=/dev/urandom of=$M0/small count=1 bs=1024k
+
+# timezone in UTC results in atime=0 if not set correctly
+TEST TZ=UTC dd if=/dev/urandom of=$M0/small bs=64k count=1 conv=nocreat
+TEST [ "$(stat --format=%X $M0/small)" != "0" ]
+
+TEST rm $M0/small
+
+cleanup
diff --git a/tests/bugs/nl-cache/bug-1451588.t b/tests/bugs/nl-cache/bug-1451588.t
new file mode 100755
index 00000000000..cf07d04c5cc
--- /dev/null
+++ b/tests/bugs/nl-cache/bug-1451588.t
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0..4}
+EXPECT 'Created' volinfo_field $V0 'Status'
+
+TEST $CLI volume set $V0 performance.nl-cache on
+TEST $CLI volume set $V0 features.cache-invalidation on
+TEST $CLI volume set $V0 features.cache-invalidation-timeout 600
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST glusterfs --volfile-id=/$V0 --aux-gfid-mount --volfile-server=$H0 $M0
+
+TEST ! stat $M0/.gfid/1901b1a0-c612-46ee-b45a-e8345d5a0b48
+
+cleanup;
+G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
diff --git a/tests/bugs/posix/bug-1034716.t b/tests/bugs/posix/bug-1034716.t
new file mode 100644
index 00000000000..d36f8b598f4
--- /dev/null
+++ b/tests/bugs/posix/bug-1034716.t
@@ -0,0 +1,60 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+#Basic checks
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+#Create a distributed volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1..2};
+TEST $CLI volume start $V0
+
+# Mount FUSE
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+
+#Create a file and perform fop on a DIR
+TEST touch $M0/foo
+
+function xattr_query_check() {
+ local path=$1
+
+ local ret=`getfattr -m . -d $path 2>&1 | grep -c 'trusted.glusterfs'`
+ echo $ret
+}
+
+function set_xattr() {
+ local path=$1
+ local xa_name=$2
+ local xa_val=$3
+
+ setfattr -n $xa_name -v $xa_val $path
+ echo $?
+}
+
+function remove_xattr() {
+ local path=$1
+ local xa_name=$2
+
+ setfattr -x $xa_name $path
+ echo $?
+}
+
+EXPECT 0 xattr_query_check $M0/
+EXPECT 0 xattr_query_check $M0/foo
+
+EXPECT 1 set_xattr $M0/ 'trusted.glusterfs.volume-id' 'foo'
+EXPECT 1 remove_xattr $M0/ 'trusted.glusterfs.volume-id'
+
+
+## Finish up
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/bugs/posix/bug-1040275-brick-uid-reset-on-volume-restart.t b/tests/bugs/posix/bug-1040275-brick-uid-reset-on-volume-restart.t
new file mode 100755
index 00000000000..3839c6e3380
--- /dev/null
+++ b/tests/bugs/posix/bug-1040275-brick-uid-reset-on-volume-restart.t
@@ -0,0 +1,60 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+function get_uid() {
+ stat -c '%u' $1;
+}
+
+function get_gid() {
+ stat -c '%g' $1;
+}
+
+function check_stat() {
+ stat $1
+ echo $?
+}
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6};
+
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT '6' brick_count $V0
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+
+EXPECT 0 get_uid $M0;
+EXPECT 0 get_gid $M0;
+
+TEST chown 100:101 $M0;
+
+EXPECT 100 get_uid $M0;
+EXPECT 101 get_gid $M0;
+
+TEST $CLI volume stop $V0;
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" online_brick_count
+
+TEST $CLI volume start $V0;
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "6" online_brick_count
+
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 3
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 4
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 5
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" check_stat $M0
+EXPECT 100 get_uid $M0;
+EXPECT 101 get_gid $M0;
+
+cleanup;
diff --git a/tests/bugs/posix/bug-1113960.t b/tests/bugs/posix/bug-1113960.t
new file mode 100755
index 00000000000..ee42de2a092
--- /dev/null
+++ b/tests/bugs/posix/bug-1113960.t
@@ -0,0 +1,98 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+
+NUM_DIRS=50
+NUM_FILES=10
+
+create_dirs () {
+ for (( i=1; i<=$NUM_DIRS; i+=1));do
+ mkdir $1/olddir$i
+ for (( j=0; j<$NUM_FILES; j+=1));do
+ echo "This is file $j in dir $i" > $1/olddir$i/file$j;
+ done
+ done
+ echo "0" > $M0/status
+}
+
+move_dirs () {
+ old_path="$1"
+ new_path="$1"
+
+ #Create a deep directory
+ for (( i=$NUM_DIRS; i>=2; i-=1));do
+ mv $1/olddir$i $1/olddir`expr $i - 1` > /dev/null 2>&1;
+ done
+
+ #Start renaming files and dirs so the paths change for the
+ #posix_handle_path calculations
+
+ for (( i=1; i<=$NUM_DIRS; i+=1));do
+ old_path="$new_path/olddir$i"
+ new_path="$new_path/longernamedir$i"
+ mv $old_path $new_path;
+
+ for (( j=0; j<$NUM_FILES; j+=1));do
+ mv $new_path/file$j $new_path/newfile$j > /dev/null 2>&1;
+ done
+ done
+ echo "done" > $M0/status
+}
+
+ls_loop () {
+ #Loop until the move_dirs function is done
+ for (( i=0; i<=500; i+=1 )); do
+ ls -lR $1 > /dev/null 2>&1
+ done
+}
+
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2,3,4};
+
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT '4' brick_count $V0
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Mount FUSE with caching disabled (read-write)
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0;
+
+TEST create_dirs $M0
+
+## Mount FUSE with caching disabled (read-write) again
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M1;
+
+TEST glusterfs -s $H0 --volfile-id $V0 $M2;
+
+(ls_loop $M1)&
+ls_pid1=$!
+
+(ls_loop $M2)&
+ls_pid2=$!
+
+
+#Start moving/renaming the directories so the paths change
+TEST move_dirs $M0
+
+EXPECT_WITHIN 180 "done" cat $M0/status
+
+#Kill the ls processes
+
+kill -SIGTERM $ls_pid1
+kill -SIGTERM $ls_pid2
+
+
+#Check if all bricks are still up
+EXPECT '4' online_brick_count $V0
+
+cleanup;
diff --git a/tests/bugs/posix/bug-1122028.t b/tests/bugs/posix/bug-1122028.t
new file mode 100755
index 00000000000..492668cf1dc
--- /dev/null
+++ b/tests/bugs/posix/bug-1122028.t
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+# Create a 1x1 distributed volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}0
+EXPECT 'Created' volinfo_field $V0 'Status'
+
+# Start volume
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+# Mount volume over FUSE
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+
+TEST mkdir $M0/dir
+TEST touch $M0/dir/a
+TEST ln $M0/dir/a $M0/dir/b
+
+# Confirm hardlinks
+inum1=$(ls -i $M0/dir/a | cut -d' ' -f1)
+inum2=$(ls -i $M0/dir/b | cut -d' ' -f1)
+TEST [ "$inum1" = "$inum2" ]
+
+# Turn on build-pgfid
+TEST $CLI volume set $V0 build-pgfid on
+EXPECT 'on' volinfo_field $V0 'storage.build-pgfid'
+
+# Unlink files
+TEST unlink $M0/dir/a
+TEST unlink $M0/dir/b
+
+# Unmount
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+# Stop the volume
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+# Delete the volume
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup
diff --git a/tests/bugs/posix/bug-1175711.c b/tests/bugs/posix/bug-1175711.c
new file mode 100644
index 00000000000..8ab193c4014
--- /dev/null
+++ b/tests/bugs/posix/bug-1175711.c
@@ -0,0 +1,37 @@
+#include <stdio.h>
+#include <dirent.h>
+#include <string.h>
+#include <assert.h>
+
+int
+main(int argc, char **argv)
+{
+ DIR *dir = NULL;
+ struct dirent *entry = NULL;
+ int ret = 0;
+ char *path = NULL;
+
+ assert(argc == 2);
+ path = argv[1];
+
+ dir = opendir(path);
+ if (!dir) {
+ printf("opendir(%s) failed.\n", path);
+ return -1;
+ }
+
+#ifdef _DIRENT_HAVE_D_TYPE
+ while ((entry = readdir(dir)) != NULL) {
+ if (entry->d_type == DT_UNKNOWN) {
+ printf("d_type found to be DT_UNKNOWN\n");
+ ret = -1;
+ break;
+ }
+ }
+#endif
+
+ if (dir)
+ closedir(dir);
+
+ return ret;
+}
diff --git a/tests/bugs/posix/bug-1175711.t b/tests/bugs/posix/bug-1175711.t
new file mode 100755
index 00000000000..f4162544d92
--- /dev/null
+++ b/tests/bugs/posix/bug-1175711.t
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+# Create, start and mount the volume.
+TEST glusterd;
+TEST $CLI volume create $V0 $H0:$B0/$V0;
+TEST $CLI volume start $V0;
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+
+# Compile the test program
+TEST $CC -Wall $(dirname $0)/bug-1175711.c -o $(dirname $0)/bug-1175711
+
+# Create directory and some entries inside them.
+mkdir -p $M0/dir-bug-1175711
+mkdir -p $M0/dir-bug-1175711/DT_DIR
+touch $M0/dir-bug-1175711/DT_REG
+
+# Invoke the test program and pass path of directory to it.
+TEST $(dirname $0)/bug-1175711 $M0/dir-bug-1175711
+
+# Unmount, stop and delete the volume
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/bugs/posix/bug-1360679.t b/tests/bugs/posix/bug-1360679.t
new file mode 100644
index 00000000000..fbb9d027ddb
--- /dev/null
+++ b/tests/bugs/posix/bug-1360679.t
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fileio.rc
+
+cleanup
+function num_entries {
+ ls -l $1 | wc -l
+}
+
+function create_unlink_entry {
+ for i in {0..1}
+ do
+ mkdir -p $B0/${V0}$i/.glusterfs/unlink/{1..3}/{1..10}/1
+ dd if=/dev/urandom of=$B0/${V0}$i/.glusterfs/unlink/file-1 bs=1M count=1
+ dd if=/dev/urandom of=$B0/${V0}$i/.glusterfs/unlink/file-2 bs=1M count=1
+ dd if=/dev/urandom of=$B0/${V0}$i/.glusterfs/unlink/1/file-1 bs=1M count=1
+ dd if=/dev/urandom of=$B0/${V0}$i/.glusterfs/unlink/2/file-1 bs=1M count=1
+ dd if=/dev/urandom of=$B0/${V0}$i/.glusterfs/unlink/3/file-1 bs=1M count=1
+ ln $B0/${V0}$i/.glusterfs/unlink/file-1 $B0/${V0}$i/.glusterfs/unlink/file-link
+ ln -s $B0/${V0}$i/.glusterfs/unlink/1 $B0/${V0}$i/.glusterfs/unlink/link
+ ln -s $B0/${V0}$i/.glusterfs/unlink/2 $B0/${V0}$i/.glusterfs/unlink/link-2
+ done
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
+TEST $CLI volume start $V0
+TEST $CLI volume stop $V0
+create_unlink_entry
+TEST $CLI volume start $V0
+EXPECT_WITHIN $UNLINK_TIMEOUT "^1$" num_entries $B0/${V0}0/.glusterfs/unlink/
+EXPECT_WITHIN $UNLINK_TIMEOUT "^1$" num_entries $B0/${V0}1/.glusterfs/unlink/
+cleanup;
diff --git a/tests/bugs/posix/bug-1619720.t b/tests/bugs/posix/bug-1619720.t
new file mode 100755
index 00000000000..bfd304dc809
--- /dev/null
+++ b/tests/bugs/posix/bug-1619720.t
@@ -0,0 +1,58 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../dht.rc
+
+cleanup;
+
+
+# Test steps:
+# The test checks to make sure that the trusted.pgfid.xx xattr is set on
+# both the linkto and data files post the final rename.
+# The test creates files file-1 and file-3 so that src_hashed = dst_hashed,
+# src_cached = dst_cached and xxx_hashed != xxx_cached.
+# It then renames file-1 to file-3 which triggers the posix_mknod call
+# which updates the trusted.pgfid.xx xattr.
+
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 storage.build-pgfid on
+
+## Mount FUSE
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+
+TEST mkdir $M0/tmp
+
+
+
+# Not the best way to do this but I need files which hash to the same subvol and
+# whose cached subvols are the same.
+# In a 2 subvol distributed volume, file-{1,3} hash to the same subvol.
+# file-2 will hash to the other subvol
+
+TEST touch $M0/tmp/file-2
+pgfid_xattr_name=$(getfattr -m "trusted.pgfid.*" $B0/${V0}1/tmp/file-2 | grep "trusted.pgfid")
+echo $pgfid_xattr_name
+
+
+TEST mv $M0/tmp/file-2 $M0/tmp/file-1
+TEST touch $M0/tmp/file-2
+TEST mv $M0/tmp/file-2 $M0/tmp/file-3
+
+# At this point, both the file-1 and file-3 data files exist on one subvol
+# and both linkto files on the other
+
+TEST mv -f $M0/tmp/file-1 $M0/tmp/file-3
+
+
+TEST getfattr -n $pgfid_xattr_name $B0/${V0}0/tmp/file-3
+TEST getfattr -n $pgfid_xattr_name $B0/${V0}1/tmp/file-3
+
+# Not required for the test but an extra check if required.
+# The linkto file was not renamed Without the fix.
+#TEST mv $M0/tmp/file-3 $M0/tmp/file-6
+cleanup;
diff --git a/tests/bugs/posix/bug-1651445.t b/tests/bugs/posix/bug-1651445.t
new file mode 100644
index 00000000000..4d08b69b9b0
--- /dev/null
+++ b/tests/bugs/posix/bug-1651445.t
@@ -0,0 +1,54 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+
+cleanup
+
+TEST verify_lvm_version
+TEST glusterd
+TEST pidof glusterd
+TEST init_n_bricks 3
+TEST setup_lvm 3
+
+TEST $CLI volume create $V0 replica 3 $H0:$L{1,2,3}
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+#Setting the size in bytes
+TEST $CLI volume set $V0 storage.reserve 40MB
+
+#wait 5s to reset disk_space_full flag
+sleep 5
+
+TEST dd if=/dev/zero of=$M0/a bs=100M count=1
+TEST dd if=/dev/zero of=$M0/b bs=10M count=1
+
+# Wait 5s to update disk_space_full flag because thread check disk space
+# after every 5s
+
+sleep 5
+# setup_lvm create lvm partition of 150M and 40M are reserve so after
+# consuming more than 110M next dd should fail
+TEST ! dd if=/dev/zero of=$M0/c bs=5M count=1
+TEST dd if=/dev/urandom of=$M0/a bs=1022 count=1 oflag=seek_bytes,sync seek=102 conv=notrunc
+
+rm -rf $M0/*
+
+#Setting the size in percent and repeating the above steps
+TEST $CLI volume set $V0 storage.reserve 40
+
+sleep 5
+
+TEST dd if=/dev/zero of=$M0/a bs=80M count=1
+TEST dd if=/dev/zero of=$M0/b bs=10M count=1
+
+sleep 5
+TEST ! dd if=/dev/zero of=$M0/c bs=5M count=1
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup
diff --git a/tests/bugs/posix/bug-765380.t b/tests/bugs/posix/bug-765380.t
new file mode 100644
index 00000000000..384b8022a42
--- /dev/null
+++ b/tests/bugs/posix/bug-765380.t
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+REPLICA=2
+
+TEST $CLI volume create $V0 replica $REPLICA $H0:$B0/${V0}00 $H0:$B0/${V0}01 $H0:$B0/${V0}10 $H0:$B0/${V0}11
+TEST $CLI volume start $V0
+
+## Mount FUSE with caching disabled
+TEST $GFS -s $H0 --volfile-id $V0 $M0;
+
+function count_hostname_or_uuid_from_pathinfo()
+{
+ pathinfo=$(getfattr -n trusted.glusterfs.pathinfo $M0/f00f)
+ echo $pathinfo | grep -o $1 | wc -l
+}
+
+TEST touch $M0/f00f
+
+EXPECT $REPLICA count_hostname_or_uuid_from_pathinfo $H0
+
+# turn on node-uuid-pathinfo option
+TEST $CLI volume set $V0 node-uuid-pathinfo on
+
+# do not expext hostname as part of the pathinfo string
+EXPECT 0 count_hostname_or_uuid_from_pathinfo $H0
+
+uuid=$(grep UUID $GLUSTERD_WORKDIR/glusterd.info | cut -f2 -d=)
+
+# ... but expect the uuid $REPLICA times
+EXPECT $REPLICA count_hostname_or_uuid_from_pathinfo $uuid
+
+cleanup;
diff --git a/tests/bugs/posix/bug-990028.t b/tests/bugs/posix/bug-990028.t
new file mode 100755
index 00000000000..bef36a8897d
--- /dev/null
+++ b/tests/bugs/posix/bug-990028.t
@@ -0,0 +1,157 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fileio.rc
+
+cleanup;
+
+TESTS_EXPECTED_IN_LOOP=153
+
+function __init()
+{
+ TEST glusterd
+ TEST pidof glusterd
+ TEST $CLI volume info;
+
+ TEST $CLI volume create $V0 $H0:$B0/brick
+
+ EXPECT 'Created' volinfo_field $V0 'Status';
+
+ TEST $CLI volume start $V0
+
+ TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0
+
+ TEST $CLI volume quota $V0 enable
+}
+
+#CASE-1
+#checking pgfid under same directory
+function links_in_same_directory()
+{
+ # create a file file1
+ TEST touch $M0/file1
+
+ # create 50 hardlinks for file1
+ for i in `seq 2 50`; do
+ TEST_IN_LOOP ln $M0/file1 $M0/file$i
+ done
+
+ # store the pgfid of file1 in PGFID_FILE1 [should be 50 now (0x000000032)]
+ PGFID_FILE1=`getfattr -m "trusted.pgfid.*" -de hex $B0/brick/file1 2>&1 | grep "trusted.pgfid" | gawk -F '=' '{print $2}'`
+
+ # compare the pgfid(link value ) of each hard links are equal or not
+ for i in `seq 2 50`; do
+ TEMP=`getfattr -m "trusted.pgfid.*" -de hex $B0/brick/file$i 2>&1 | grep "trusted.pgfid" | gawk -F '=' '{print $2}'`
+ TEST_IN_LOOP [ $PGFID_FILE1 = $TEMP ]
+ done
+
+ # check if no of links value is 50 or not
+ TEST [ $PGFID_FILE1 = "0x00000032" ]
+
+ # unlink file 2 to 50
+ for i in `seq 2 50`; do
+ TEST_IN_LOOP unlink $M0/file$i;
+ done
+
+ # now check if pgfid value is 1 or not
+ PGFID_FILE1=`getfattr -m "trusted.pgfid.*" -de hex $B0/brick/file1 2>&1 | grep "trusted.pgfid" | gawk -F '=' '{print $2}'`;
+
+ TEST [ $PGFID_FILE1 = "0x00000001" ]
+
+ TEST rm -f $M0/*
+}
+
+##checking pgfid under diff directories
+function links_across_directories()
+{
+ TEST mkdir $M0/dir1 $M0/dir2;
+
+ # create a file in dir1
+ TEST touch $M0/dir1/file1;
+
+ # create hard link for file1 in dir2
+ TEST ln $M0/dir1/file1 $M0/dir2/file2;
+
+ #first check is to find whether there are two pgfids or not
+ LINES=`getfattr -m "trusted.pgfid.*" -de hex $B0/brick/dir1/file1 2>&1 | grep "trusted.pgfid" | wc -l`
+ TEST [ $LINES = 2 ]
+
+ for i in $(seq 1 2); do
+ HL=`getfattr -m "trusted.pgfid.*" -de hex $B0/brick/dir$i/file$i 2>&1 | grep "trusted.pgfid" | awk -v n=$i 'NR==n' | cut -d'=' -f2`
+ TEST_IN_LOOP [ $HL = "0x00000001" ]
+ done
+
+ #now unlink file2 and check the pgfid of file1
+ #1. no. of pgfid should be one
+ #2. no. of hard link should be one
+ TEST unlink $M0/dir2/file2
+
+ LINES=`getfattr -m "trusted.pgfid.*" -de hex $B0/brick/dir1/file1 2>&1 | grep "trusted.pgfid" | wc -l`
+ TEST [ $LINES == 1 ]
+
+ #next to check is to whether they contain hard link value of one or not
+ HL=`getfattr -m "trusted.pgfid.*" -de hex $B0/brick/dir1/file1 2>&1 | grep "trusted.pgfid" | cut -d'=' -f2`
+ TEST [ $HL = "0x00000001" ]
+
+ #rename file under same directory
+
+ TEST touch $M0/r_file1
+ PGFID_rfile1=`getfattr -m "trusted.pgfid.*" -de hex $B0/brick/r_file1 2>&1 | grep "trusted.pgfid"`
+
+ #cross check whether hard link count is one
+ HL=`getfattr -m "trusted.pgfid.*" -de hex $B0/brick/r_file1 2>&1 | grep "trusted.pgfid" | cut -d'=' -f2`
+
+ TEST [ $HL = "0x00000001" ]
+
+ #now rename the file to r_file1
+ TEST mv $M0/r_file1 $M0/r_file2
+
+ #now check the pgfid hard link count is still one or not
+ HL=`getfattr -m "trusted.pgfid.*" -de hex $B0/brick/r_file2 2>&1 | grep "trusted.pgfid" | cut -d'=' -f2`
+
+ TEST [ $HL = "0x00000001" ]
+
+ #now move the file to a different directory where it has no hard link and check
+ TEST mkdir $M0/dir3;
+ TEST mv $M0/r_file2 $M0/dir3;
+
+ #now check the pgfid has changed or not and hard limit is one or not
+ PGFID_newDir=`getfattr -m "trusted.pgfid.*" -de hex $B0/brick/dir3/r_file2 2>&1 | grep "trusted.pgfid"`
+
+ #now the older pgfid and new pgfid shouldn't match
+ TEST [ $PGFID_rfile1 != $PGFID_newDir ]
+
+ HL=`getfattr -m "trusted.pgfid" -de hex $B0/brick/dir3/r_file2 2>&1 | grep "trusted.pgfid" | cut -d'=' -f2`
+ TEST [ $HL = "0x00000001" ]
+
+ TEST touch $M0/dir1/rl_file_1
+ ln $M0/dir1/rl_file_1 $M0/dir2/rl_file_2
+ mv $M0/dir1/rl_file_1 $M0/dir2
+
+ #now the there should be just one pgfid for both files
+ for i in $(seq 1 2); do
+ NL=`getfattr -m "trusted.pgfid" -de hex $B0/brick/dir2/rl_file_$i 2>&1 | grep "trusted.pgfid"|wc -l `
+ TEST_IN_LOOP [ $HL = "0x00000001" ]
+ done
+
+ #now pgfid of both files should match
+ P_rl_file_1=`getfattr -m "trusted.pgfid" -de hex $B0/brick/dir2/rl_file_1 2>&1 | grep "trusted.pgfid"`
+ P_rl_file_2=`getfattr -m "trusted.pgfid" -de hex $B0/brick/dir2/rl_file_2 2>&1 | grep "trusted.pgfid"`
+ TEST [ $P_rl_file_1 = $P_rl_file_2 ]
+
+ #now the no of hard link should be two for both rl_file_1 and rl_file_2
+ for i in $(seq 1 2); do
+ HL=`getfattr -m "trusted.pgfid" -de hex $B0/brick/dir2/rl_file_$i 2>&1 | grep "trusted.pgfid" | cut -d'=' -f2`
+ TEST_IN_LOOP [ $HL = "0x00000002" ]
+ done
+
+ TEST rm -rf $M0/*
+}
+
+__init;
+links_in_same_directory;
+links_across_directories;
+TEST $CLI volume stop $V0
+
+cleanup
diff --git a/tests/bugs/posix/bug-gfid-path.t b/tests/bugs/posix/bug-gfid-path.t
new file mode 100644
index 00000000000..1bbbe9f0670
--- /dev/null
+++ b/tests/bugs/posix/bug-gfid-path.t
@@ -0,0 +1,70 @@
+#!/bin/bash
+
+# This test case is for the bug where, even though a file is
+# created when gfid2path option is turned off (default is ON),
+# getfattr of "glusterfs.gfidtopath" was succeeding for that
+# file. Ideally the getfattr should fail, as the file does not
+# have its path(s) stored as a extended attribute (because it
+# was created when gfid2path option was off)
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2,3,4};
+
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT '4' brick_count $V0
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+
+TEST mkdir $M0/dir
+TEST mkdir $M0/new
+TEST mkdir $M0/3
+
+TEST touch $M0/dir/file
+
+# except success as by default gfid2path is enabled
+# and the glusterfs.gfidtopath xattr should give the
+# path of the object as the value
+
+TEST getfattr -n glusterfs.gfidtopath $M0/dir/file
+
+# turn off gfid2path feature
+TEST $CLI volume set $V0 storage.gfid2path off
+
+TEST touch $M0/new/foo
+
+# again enable gfid2path. This has to be enabled before
+# trying the getfattr. Because, glusterfs.gfidtopath xattr
+# request is handled only if gfid2path is enabled. If not,
+# then getxattr on glusterfs.gfid2path fails anyways. In this
+# context we want getfattr to fail, because the file was created
+# when gfid2path feature was disabled and not because gfid2path
+# feature itself is disabled.
+TEST $CLI volume set $V0 storage.gfid2path on
+
+# getfattr should fail as it is attempted on a file
+# which does not have its path stored as a xattr
+# (because file got created after disabling gfid2path)
+TEST ! getfattr -n glusterfs.gfidtopath $M0/new/foo;
+
+
+
+TEST touch $M0/3/new
+
+# should be successful
+TEST getfattr -n glusterfs.gfidtopath $M0/3/new
+
+TEST rm -rf $M0/*
+
+cleanup;
diff --git a/tests/bugs/posix/disallow-gfid-volumeid-fremovexattr.c b/tests/bugs/posix/disallow-gfid-volumeid-fremovexattr.c
new file mode 100644
index 00000000000..4ed3181d48f
--- /dev/null
+++ b/tests/bugs/posix/disallow-gfid-volumeid-fremovexattr.c
@@ -0,0 +1,104 @@
+#include <glusterfs/api/glfs.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+int
+main(int argc, char *argv[])
+{
+ glfs_t *fs = NULL;
+ int ret = 0;
+ int i = 0;
+ glfs_fd_t *fd = NULL;
+ char *logfile = NULL;
+ char *hostname = NULL;
+
+ if (argc != 4) {
+ fprintf(stderr,
+ "Expect following args %s <hostname> <Vol> <log file>\n",
+ argv[0]);
+ return -1;
+ }
+
+ hostname = argv[1];
+ logfile = argv[3];
+
+ fs = glfs_new(argv[2]);
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL (%s)\n", strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_set_volfile_server failed ret:%d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_set_logging failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_init failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+
+ fd = glfs_opendir(fs, "/");
+ if (!fd) {
+ fprintf(stderr, "glfs_opendir failed with (%s)\n", strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_fremovexattr(fd, "trusted.gfid");
+ if (ret == 0 || errno != EPERM) {
+ fprintf(stderr,
+ "glfs_fremovexattr gfid exited with ret: "
+ "%d (%s)\n",
+ ret, strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_fremovexattr(fd, "trusted.glusterfs.volume-id");
+ if (ret == 0 || errno != EPERM) {
+ fprintf(stderr,
+ "glfs_fremovexattr volume-id exited with ret: "
+ "%d (%s)\n",
+ ret, strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_fsetxattr(fd, "trusted.abc", "abc", 3, 0);
+ if (ret < 0) {
+ fprintf(stderr,
+ "glfs_fsetxattr trusted.abc exited with ret: "
+ "%d (%s)\n",
+ ret, strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_fremovexattr(fd, "trusted.abc");
+ if (ret < 0) {
+ fprintf(stderr,
+ "glfs_fremovexattr trusted.abc exited with "
+ "ret: %d (%s)\n",
+ ret, strerror(errno));
+ return -1;
+ }
+
+ (void)glfs_closedir(fd);
+ ret = glfs_fini(fs);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_fini failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+ return 0;
+}
diff --git a/tests/bugs/posix/disallow-gfid-volumeid-fremovexattr.t b/tests/bugs/posix/disallow-gfid-volumeid-fremovexattr.t
new file mode 100755
index 00000000000..b9fd44ae0d7
--- /dev/null
+++ b/tests/bugs/posix/disallow-gfid-volumeid-fremovexattr.t
@@ -0,0 +1,21 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0};
+TEST $CLI volume start $V0;
+logdir=`gluster --print-logdir`
+
+
+TEST build_tester $(dirname $0)/disallow-gfid-volumeid-fremovexattr.c -lgfapi
+TEST $(dirname $0)/disallow-gfid-volumeid-fremovexattr $H0 $V0 $logdir/disallow-gfid-volumeid-fremovexattr.log
+
+cleanup_tester $(dirname $0)/disallow-gfid-volumeid-fremovexattr
+cleanup;
diff --git a/tests/bugs/posix/disallow-gfid-volumeid-removexattr.t b/tests/bugs/posix/disallow-gfid-volumeid-removexattr.t
new file mode 100644
index 00000000000..d26eb21ccc5
--- /dev/null
+++ b/tests/bugs/posix/disallow-gfid-volumeid-removexattr.t
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+#This test checks that gfid/volume-id removexattrs are not allowed.
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+#Basic checks
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+#Create a distributed volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1..2};
+TEST $CLI volume start $V0
+
+# Mount FUSE
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+
+TEST ! setfattr -x trusted.gfid $M0
+TEST ! setfattr -x trusted.glusterfs.volume-id $M0
+TEST setfattr -n trusted.abc -v abc $M0
+TEST setfattr -x trusted.abc $M0
+
+cleanup;
diff --git a/tests/bugs/protocol/bug-1321578.t b/tests/bugs/protocol/bug-1321578.t
new file mode 100644
index 00000000000..83904817467
--- /dev/null
+++ b/tests/bugs/protocol/bug-1321578.t
@@ -0,0 +1,82 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+check_mounted () {
+ df | grep $1 | wc -l
+}
+
+CHECK_MOUNT_TIMEOUT=7
+
+TEST glusterd
+TEST $CLI volume create $V0 $H0:$B0/$V0
+
+# Set auth.allow to dummy hostname so it *doesn't* include ourselves.
+TEST $CLI volume set $V0 auth.allow example.org
+TEST $CLI volume start $V0
+
+# "System getspec" will include the username and password if the request comes
+# from a server (which we are). Unfortunately, this will cause authentication
+# to succeed in auth.login regardless of whether auth.addr is working properly
+# or not, which is useless to us. To get a proper test, strip out those lines.
+$CLI system getspec $V0 | sed -e /username/d -e /password/d > fubar.vol
+
+# This mount should fail because auth.allow doesn't include us.
+TEST $GFS -f fubar.vol $M0
+
+EXPECT_WITHIN $CHECK_MOUNT_TIMEOUT 0 check_mounted $M0
+
+# Add tests when only username is present, but not password
+# "System getspec" will include the username and password if the request comes
+# from a server (which we are). Unfortunately, this will cause authentication
+# to succeed in auth.login regardless of whether auth.addr is working properly
+# or not, which is useless to us. To get a proper test, strip out those lines.
+$CLI system getspec $V0 | sed -e /password/d > fubar.vol
+
+# This mount should fail because auth.allow doesn't include our password.
+TEST $GFS -f fubar.vol $M0
+
+# If we had DONT_EXPECT_WITHIN we could use that, but we don't.
+EXPECT_WITHIN $CHECK_MOUNT_TIMEOUT 0 check_mounted $M0
+
+# Now, add a test for login failure when server doesn't have the password entry
+# Add tests when only username is present, but not password
+# "System getspec" will include the username and password if the request comes
+# from a server (which we are). Unfortunately, this will cause authentication
+# to succeed in auth.login regardless of whether auth.addr is working properly
+# or not, which is useless to us. To get a proper test, strip out those lines.
+$CLI system getspec $V0 > fubar.vol
+TEST $CLI volume stop $V0
+
+sed -i -e '/password /d' /var/lib/glusterd/vols/$V0/$V0.*$V0.vol
+
+TEST $CLI volume start $V0
+
+# This mount should fail because auth.allow doesn't include our password.
+TEST $GFS -f fubar.vol $M0
+
+EXPECT_WITHIN $CHECK_MOUNT_TIMEOUT 0 check_mounted $M0
+
+# Set auth.allow to include us. This mount should therefore succeed.
+TEST $CLI volume set $V0 auth.allow $H0
+$CLI system getspec $V0 | sed -e /password/d > fubar.vol
+
+TEST $GFS -f fubar.vol $M0
+EXPECT_WITHIN $CHECK_MOUNT_TIMEOUT 1 check_mounted $M0
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+# Set auth.reject to include us. This mount should therefore fail.
+TEST $CLI volume stop $V0
+
+TEST $CLI volume set $V0 auth.allow "\*"
+TEST $CLI volume set $V0 auth.reject $H0
+TEST $CLI volume start $V0
+
+# Do this, so login module is not in picture
+$CLI system getspec $V0 | sed -e /password/d > fubar.vol
+
+TEST $GFS -f fubar.vol $M0
+EXPECT_WITHIN $CHECK_MOUNT_TIMEOUT 0 check_mounted $M0
+
+cleanup
diff --git a/tests/bugs/protocol/bug-1390914.t b/tests/bugs/protocol/bug-1390914.t
new file mode 100644
index 00000000000..e3dab92de5a
--- /dev/null
+++ b/tests/bugs/protocol/bug-1390914.t
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fileio.rc
+cleanup;
+
+#test that fops are not wound on anon-fd when fd is not open on that brick
+TEST glusterd;
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3};
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 diagnostics.client-log-level DEBUG
+TEST $CLI volume heal $V0 disable
+TEST $CLI volume start $V0
+TEST $CLI volume profile $V0 start
+TEST $GFS -s $H0 --volfile-id=$V0 --direct-io-mode=enable $M0;
+
+TEST touch $M0/1
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST fd_open 200 'w' "$M0/1"
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+#lk should only happen on 2 bricks, if there is a bug, it will plant a lock
+#with anon-fd on first-brick which will never be released because flush won't
+#be wound below server xlator for anon-fd
+TEST flock -x -n 200
+TEST fd_close 200
+
+TEST fd_open 200 'w' "$M0/1"
+#this lock will fail if there is a stale lock
+TEST flock -x -n 200
+TEST fd_close 200
+cleanup;
diff --git a/tests/bugs/protocol/bug-1433815-auth-allow.t b/tests/bugs/protocol/bug-1433815-auth-allow.t
new file mode 100644
index 00000000000..a78c0eb7111
--- /dev/null
+++ b/tests/bugs/protocol/bug-1433815-auth-allow.t
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+check_mounted () {
+ df | grep $1 | wc -l
+}
+
+get_addresses () {
+ ip addr | sed -n '/.*inet \([0-9.]*\).*/s//\1/p' | tr '\n' ','
+}
+
+TEST glusterd
+TEST $CLI volume create $V0 $H0:$B0/$V0
+
+# Set auth.allow so it *doesn't* include ourselves.
+TEST $CLI volume set $V0 auth.allow 1.2.3.4
+TEST $CLI volume start $V0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
+
+# "System getspec" will include the username and password if the request comes
+# from a server (which we are). Unfortunately, this will cause authentication
+# to succeed in auth.login regardless of whether auth.addr is working properly
+# or not, which is useless to us. To get a proper test, strip out those lines.
+$CLI system getspec $V0 | sed -e /username/d -e /password/d > fubar.vol
+
+# This mount should fail because auth.allow doesn't include us.
+TEST $GFS -f fubar.vol $M0
+# If we had DONT_EXPECT_WITHIN we could use that, but we don't.
+sleep 10
+EXPECT 0 check_mounted $M0
+
+# Set auth.allow to include us. This mount should therefore succeed.
+TEST $CLI volume set $V0 auth.allow "$(get_addresses)"
+TEST $GFS -f fubar.vol $M0
+sleep 10
+EXPECT 1 check_mounted $M0
+
+cleanup
diff --git a/tests/bugs/protocol/bug-762989.t b/tests/bugs/protocol/bug-762989.t
new file mode 100755
index 00000000000..7d201b78b58
--- /dev/null
+++ b/tests/bugs/protocol/bug-762989.t
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+# Skip the entire test if ip_local_reserved_ports does not exist
+if [ ! -f /proc/sys/net/ipv4/ip_local_reserved_ports ] ; then
+ echo "Skip test on /proc/sys/net/ipv4/ip_local_reserved_ports, "\
+ "which does not exists on this system" >&2
+ SKIP_TESTS
+ exit 0
+fi
+
+## reserve port 1023
+older_ports=$(cat /proc/sys/net/ipv4/ip_local_reserved_ports);
+echo "1023" > /proc/sys/net/ipv4/ip_local_reserved_ports;
+
+## Start and create a volume
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6};
+
+TEST $CLI volume start $V0;
+
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 \
+$M0;
+
+## Wait for volume to register with rpc.mountd
+sleep 6;
+## check if port 1023 (which has been reserved) is used by the gluster processes
+op=$(netstat -ntp | grep gluster | grep -w 1023);
+EXPECT "" echo $op;
+
+#set the reserved ports to the older values
+echo $older_ports > /proc/sys/net/ipv4/ip_local_reserved_ports
+
+cleanup;
diff --git a/tests/bugs/protocol/bug-808400-dist.t b/tests/bugs/protocol/bug-808400-dist.t
new file mode 100755
index 00000000000..0df972585c0
--- /dev/null
+++ b/tests/bugs/protocol/bug-808400-dist.t
@@ -0,0 +1,32 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/brick1 $H0:$B0/brick2;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+MOUNTDIR=$M0;
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 --volfile-server=$H0 --volfile-id=$V0 $MOUNTDIR;
+
+build_tester $(dirname $0)/bug-808400-flock.c
+build_tester $(dirname $0)/bug-808400-fcntl.c
+
+TEST $(dirname $0)/bug-808400-flock $MOUNTDIR/testfile \'gluster volume set $V0 performance.write-behind off\'
+TEST $(dirname $0)/bug-808400-fcntl $MOUNTDIR/testfile \'gluster volume set $V0 performance.write-behind on\'
+
+TEST rm -rf $MOUNTDIR/*
+TEST rm -rf $(dirname $0)/bug-808400-flock $(dirname $0)/bug-808400-fcntl $(dirname $0)/glusterfs.log
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $MOUNTDIR
+
+cleanup;
diff --git a/tests/bugs/protocol/bug-808400-fcntl.c b/tests/bugs/protocol/bug-808400-fcntl.c
new file mode 100644
index 00000000000..a703ca5c120
--- /dev/null
+++ b/tests/bugs/protocol/bug-808400-fcntl.c
@@ -0,0 +1,124 @@
+#include <sys/file.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/wait.h>
+
+#ifndef linux
+#define fstat64(fd, st) fstat(fd, st)
+#endif
+
+int
+run_child(char *filename)
+{
+ int fd = -1, ret = -1;
+ struct flock lock = {
+ 0,
+ };
+ int ppid = 0;
+
+ fd = open(filename, O_RDWR);
+ if (fd < 0) {
+ fprintf(stderr, "open failed (%s)\n", strerror(errno));
+ goto out;
+ }
+
+ ppid = getppid();
+
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 0;
+
+ ret = fcntl(fd, F_GETLK, &lock);
+ if (ret < 0) {
+ fprintf(stderr, "GETLK failed (%s)\n", strerror(errno));
+ goto out;
+ }
+
+ if ((lock.l_type == F_UNLCK) || (ppid != lock.l_pid)) {
+ fprintf(stderr,
+ "no locks present, though parent has held "
+ "one\n");
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+main(int argc, char *argv[])
+{
+ int fd = -1, ret = -1, status = 0;
+ char *filename = NULL, *cmd = NULL;
+ struct stat stbuf = {
+ 0,
+ };
+ struct flock lock = {
+ 0,
+ };
+
+ if (argc != 3) {
+ fprintf(stderr,
+ "Usage: %s <filename> "
+ "<gluster-cmd-to-trigger-graph-switch>\n",
+ argv[0]);
+ goto out;
+ }
+
+ filename = argv[1];
+ cmd = argv[2];
+
+ fd = open(filename, O_RDWR | O_CREAT, 0);
+ if (fd < 0) {
+ fprintf(stderr, "open (%s) failed (%s)\n", filename, strerror(errno));
+ goto out;
+ }
+
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 0;
+
+ ret = fcntl(fd, F_SETLK, &lock);
+ if (ret < 0) {
+ fprintf(stderr, "fcntl failed (%s)\n", strerror(errno));
+ goto out;
+ }
+
+ system(cmd);
+
+ /* wait till graph switch completes */
+ ret = fstat64(fd, &stbuf);
+ if (ret < 0) {
+ fprintf(stderr, "fstat64 failure (%s)\n", strerror(errno));
+ goto out;
+ }
+
+ sleep(10);
+
+ /* By now old-graph would be disconnected and locks should be cleaned
+ * up if they are not migrated. Check that by trying to acquire a lock
+ * on a new fd opened by another process on same file.
+ */
+ ret = fork();
+ if (ret == 0) {
+ ret = run_child(filename);
+ } else {
+ wait(&status);
+ if (WIFEXITED(status)) {
+ ret = WEXITSTATUS(status);
+ } else {
+ ret = 0;
+ }
+ }
+
+out:
+ return ret;
+}
diff --git a/tests/bugs/protocol/bug-808400-flock.c b/tests/bugs/protocol/bug-808400-flock.c
new file mode 100644
index 00000000000..54a507cc227
--- /dev/null
+++ b/tests/bugs/protocol/bug-808400-flock.c
@@ -0,0 +1,100 @@
+#include <sys/file.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/wait.h>
+
+#ifndef linux
+#define fstat64(fd, st) fstat(fd, st)
+#endif
+
+int
+run_child(char *filename)
+{
+ int fd = -1, ret = -1;
+
+ fd = open(filename, O_RDWR);
+ if (fd < 0) {
+ fprintf(stderr, "open failed (%s)\n", strerror(errno));
+ goto out;
+ }
+
+ ret = flock(fd, LOCK_EX | LOCK_NB);
+ if ((ret == 0) || (errno != EWOULDBLOCK)) {
+ fprintf(stderr,
+ "no locks present, though parent has held "
+ "one\n");
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+main(int argc, char *argv[])
+{
+ int fd = -1, ret = -1, status = 0;
+ char *filename = NULL, *cmd = NULL;
+ struct stat stbuf = {
+ 0,
+ };
+
+ if (argc != 3) {
+ fprintf(stderr,
+ "Usage: %s <filename> "
+ "<gluster-cmd-to-trigger-graph-switch>\n",
+ argv[0]);
+ goto out;
+ }
+
+ filename = argv[1];
+ cmd = argv[2];
+
+ fd = open(filename, O_RDWR | O_CREAT, 0);
+ if (fd < 0) {
+ fprintf(stderr, "open (%s) failed (%s)\n", filename, strerror(errno));
+ goto out;
+ }
+
+ ret = flock(fd, LOCK_EX);
+ if (ret < 0) {
+ fprintf(stderr, "flock failed (%s)\n", strerror(errno));
+ goto out;
+ }
+
+ system(cmd);
+
+ /* wait till graph switch completes */
+ ret = fstat64(fd, &stbuf);
+ if (ret < 0) {
+ fprintf(stderr, "fstat64 failure (%s)\n", strerror(errno));
+ goto out;
+ }
+
+ sleep(10);
+
+ /* By now old-graph would be disconnected and locks should be cleaned
+ * up if they are not migrated. Check that by trying to acquire a lock
+ * on a new fd opened by another process on same file
+ */
+ ret = fork();
+ if (ret == 0) {
+ ret = run_child(filename);
+ } else {
+ wait(&status);
+ if (WIFEXITED(status)) {
+ ret = WEXITSTATUS(status);
+ } else {
+ ret = 0;
+ }
+ }
+
+out:
+ return ret;
+}
diff --git a/tests/bugs/protocol/bug-808400-repl.t b/tests/bugs/protocol/bug-808400-repl.t
new file mode 100755
index 00000000000..611e5ec93b7
--- /dev/null
+++ b/tests/bugs/protocol/bug-808400-repl.t
@@ -0,0 +1,31 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick1 $H0:$B0/brick2;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+MOUNTDIR=$M0;
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 --volfile-server=$H0 --volfile-id=$V0 $MOUNTDIR;
+
+build_tester $(dirname $0)/bug-808400-flock.c
+build_tester $(dirname $0)/bug-808400-fcntl.c
+
+TEST $(dirname $0)/bug-808400-flock $MOUNTDIR/testfile \'gluster volume set $V0 performance.write-behind off\'
+TEST $(dirname $0)/bug-808400-fcntl $MOUNTDIR/testfile \'gluster volume set $V0 performance.write-behind on\'
+
+TEST rm -rf $MOUNTDIR/*
+TEST rm -rf $(dirname $0)/bug-808400-flock $(dirname $0)/bug-808400-fcntl $(dirname $0)/glusterfs.log
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $MOUNTDIR
+
+cleanup;
diff --git a/tests/bugs/protocol/bug-808400.t b/tests/bugs/protocol/bug-808400.t
new file mode 100755
index 00000000000..4ae1722fca2
--- /dev/null
+++ b/tests/bugs/protocol/bug-808400.t
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+#mount on a random dir
+TEST MOUNTDIR="/tmp/$RANDOM"
+TEST mkdir $MOUNTDIR
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 --volfile-server=$H0 --volfile-id=$V0 $MOUNTDIR;
+
+build_tester $(dirname $0)/bug-808400-flock.c
+build_tester $(dirname $0)/bug-808400-fcntl.c
+
+TEST $(dirname $0)/bug-808400-flock $MOUNTDIR/testfile \'gluster volume set $V0 performance.write-behind off\'
+TEST $(dirname $0)/bug-808400-fcntl $MOUNTDIR/testfile \'gluster volume set $V0 performance.write-behind on\'
+
+TEST rm -rf $MOUNTDIR/*
+TEST rm -rf $(dirname $0)/bug-808400-flock $(dirname $0)/bug-808400-fcntl $(dirname $0)/glusterfs.log
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $MOUNTDIR
+TEST rm -rf $MOUNTDIR
+
+cleanup;
diff --git a/tests/bugs/quick-read/bug-846240.t b/tests/bugs/quick-read/bug-846240.t
new file mode 100755
index 00000000000..bb997e10013
--- /dev/null
+++ b/tests/bugs/quick-read/bug-846240.t
@@ -0,0 +1,59 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fileio.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+function volinfo_field()
+{
+ local vol=$1;
+ local field=$2;
+
+ $CLI volume info $vol | grep "^$field: " | sed 's/.*: //';
+}
+
+TEST $CLI volume create $V0 $H0:$B0/brick1 $H0:$B0/brick2;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+MOUNTDIR=$M0;
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $MOUNTDIR;
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M1;
+
+TEST touch $M0/testfile;
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0;
+
+# open the file with the fd as 4
+TEST fd=`fd_available`;
+TEST fd_open $fd 'w' "$M0/testfile";
+
+# remove the file from the other mount point. If unlink is sent from
+# $M0 itself, then the file will be actually opened by open-behind which
+# we dont want for this testcase
+TEST rm -f $M1/testfile;
+
+# below command opens the file and writes to the file.
+# upon open, open-behind unwinds the open call with success.
+# now when write comes, open-behind actually opens the file
+# and then sends write on the fd. But before sending open itself,
+# the file would have been removed from the mount $M1. open() gets error
+# and the write call which is put into a stub (open had to be sent first)
+# should unwind with the error received in the open call.
+TEST ! fd_write $fd data
+
+TEST fd_close $fd;
+
+TEST rm -rf $MOUNTDIR/*
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $MOUNTDIR
+
+cleanup;
diff --git a/tests/bugs/quick-read/bz1523599/bz1523599.t b/tests/bugs/quick-read/bz1523599/bz1523599.t
new file mode 100755
index 00000000000..5027efe8e9a
--- /dev/null
+++ b/tests/bugs/quick-read/bz1523599/bz1523599.t
@@ -0,0 +1,32 @@
+#!/bin/bash
+
+. $(dirname $0)/../../../include.rc
+. $(dirname $0)/../../../volume.rc
+. $(dirname $0)/../../../fileio.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+logdir=`gluster --print-logdir`
+
+TEST build_tester $(dirname $0)/test_bz1523599.c -lgfapi -o $(dirname $0)/test_bz1523599
+TEST ./$(dirname $0)/test_bz1523599 0 $H0 $V0 test_bz1523599 $logdir/bz1523599.log
+TEST ./$(dirname $0)/test_bz1523599 1 $H0 $V0 test_bz1523599 $logdir/bz1523599.log
+TEST ./$(dirname $0)/test_bz1523599 0 $H0 $V0 test_bz1523599 $logdir/bz1523599.log
+TEST ./$(dirname $0)/test_bz1523599 2 $H0 $V0 test_bz1523599 $logdir/bz1523599.log
+
+cleanup_tester $(dirname $0)/test_bz1523599
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
+
diff --git a/tests/bugs/quick-read/bz1523599/test_bz1523599.c b/tests/bugs/quick-read/bz1523599/test_bz1523599.c
new file mode 100644
index 00000000000..5076a9447f3
--- /dev/null
+++ b/tests/bugs/quick-read/bz1523599/test_bz1523599.c
@@ -0,0 +1,198 @@
+/*
+ * ./test_bz1523599 0 vm140-111 gv0 test211 log
+ * ./test_bz1523599 1 vm140-111 gv0 test211 log
+ * Open - Discard - Read - Then check read information to see if the initial
+ * TEST_STR_LEN/2 bytes read zero
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+#include <errno.h>
+#include <sys/uio.h>
+
+#define TEST_STR_LEN 2048
+
+enum fallocate_flag {
+ TEST_WRITE,
+ TEST_DISCARD,
+ TEST_ZEROFILL,
+};
+
+void
+print_str(char *str, int len)
+{
+ int i, addr;
+
+ printf("%07x\t", 0);
+ for (i = 0; i < len; i++) {
+ printf("%02x", str[i]);
+ if (i) {
+ if ((i + 1) % 16 == 0)
+ printf("\n%07x\t", i + 1);
+ else if ((i + 1) % 4 == 0)
+ printf(" ");
+ }
+ }
+ printf("\n");
+}
+
+int
+test_read(char *str, int total_length, int len_zero)
+{
+ int i;
+ int ret = 0;
+
+ for (i = 0; i < len_zero; i++) {
+ if (str[i]) {
+ fprintf(stderr, "char at position %d not zeroed out\n", i);
+ ret = -EIO;
+ goto out;
+ }
+ }
+
+ for (i = len_zero; i < total_length; i++) {
+ if (str[i] != 0x11) {
+ fprintf(stderr, "char at position %d does not contain pattern\n",
+ i);
+ ret = -EIO;
+ goto out;
+ }
+ }
+out:
+ return ret;
+}
+
+int
+main(int argc, char *argv[])
+{
+ int opcode;
+ char *host_name, *volume_name, *file_path, *glfs_log_path;
+ glfs_t *fs = NULL;
+ glfs_fd_t *fd = NULL;
+ off_t offset = 0;
+ size_t len_zero = TEST_STR_LEN / 2;
+ char writestr[TEST_STR_LEN];
+ char readstr[TEST_STR_LEN];
+ struct iovec iov = {&readstr, TEST_STR_LEN};
+ int i;
+ int ret = 1;
+
+ for (i = 0; i < TEST_STR_LEN; i++)
+ writestr[i] = 0x11;
+ for (i = 0; i < TEST_STR_LEN; i++)
+ readstr[i] = 0x22;
+
+ if (argc != 6) {
+ fprintf(
+ stderr,
+ "Syntax: %s <test type> <host> <volname> <file-path> <log-file>\n",
+ argv[0]);
+ return 1;
+ }
+
+ opcode = atoi(argv[1]);
+ host_name = argv[2];
+ volume_name = argv[3];
+ file_path = argv[4];
+ glfs_log_path = argv[5];
+
+ fs = glfs_new(volume_name);
+ if (!fs) {
+ perror("glfs_new");
+ return 1;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", host_name, 24007);
+ if (ret != 0) {
+ perror("glfs_set_volfile_server");
+ goto out;
+ }
+
+ ret = glfs_set_logging(fs, glfs_log_path, 7);
+ if (ret != 0) {
+ perror("glfs_set_logging");
+ goto out;
+ }
+
+ ret = glfs_init(fs);
+ if (ret != 0) {
+ perror("glfs_init");
+ goto out;
+ }
+
+ fd = glfs_creat(fs, file_path, O_RDWR, 0777);
+ if (fd == NULL) {
+ perror("glfs_creat");
+ ret = -1;
+ goto out;
+ }
+
+ switch (opcode) {
+ case TEST_WRITE:
+ fprintf(stderr, "Test Write\n");
+ ret = glfs_write(fd, writestr, TEST_STR_LEN, 0);
+ if (ret < 0) {
+ perror("glfs_write");
+ goto out;
+ } else if (ret != TEST_STR_LEN) {
+ fprintf(stderr, "insufficient data written %d \n", ret);
+ ret = -EIO;
+ goto out;
+ }
+ ret = 0;
+ goto out;
+ case TEST_DISCARD:
+ fprintf(stderr, "Test Discard\n");
+ ret = glfs_discard(fd, offset, len_zero);
+ if (ret < 0) {
+ if (errno == EOPNOTSUPP) {
+ fprintf(stderr, "Operation not supported\n");
+ ret = 0;
+ goto out;
+ }
+ perror("glfs_discard");
+ goto out;
+ }
+ goto test_read;
+ case TEST_ZEROFILL:
+ fprintf(stderr, "Test Zerofill\n");
+ ret = glfs_zerofill(fd, offset, len_zero);
+ if (ret < 0) {
+ if (errno == EOPNOTSUPP) {
+ fprintf(stderr, "Operation not supported\n");
+ ret = 0;
+ goto out;
+ }
+ perror("glfs_zerofill");
+ goto out;
+ }
+ goto test_read;
+ default:
+ ret = -1;
+ fprintf(stderr, "Incorrect test code %d\n", opcode);
+ goto out;
+ }
+
+test_read:
+ ret = glfs_readv(fd, &iov, 1, 0);
+ if (ret < 0) {
+ perror("glfs_readv");
+ goto out;
+ }
+
+ /* printf("Read str\n"); print_str(readstr, TEST_STR_LEN); printf("\n"); */
+ ret = test_read(readstr, TEST_STR_LEN, len_zero);
+
+out:
+ if (fd)
+ glfs_close(fd);
+ glfs_fini(fs);
+
+ if (ret)
+ return -1;
+
+ return 0;
+}
diff --git a/tests/bugs/quota/afr-quota-xattr-mdata-heal.t b/tests/bugs/quota/afr-quota-xattr-mdata-heal.t
new file mode 100644
index 00000000000..ebfa5545728
--- /dev/null
+++ b/tests/bugs/quota/afr-quota-xattr-mdata-heal.t
@@ -0,0 +1,140 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+TEST glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+TEST $CLI volume quota $V0 enable
+TEST $CLI volume quota $V0 limit-usage / 1MB
+TEST mkdir $M0/d
+TEST $CLI volume quota $V0 limit-usage /d 1MB
+TEST touch $M0/d/a
+echo abc > $M0/d/a
+
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "512Bytes" quotausage "/"
+
+#Set the acl xattrs directly on backend, for some reason on mount it gives error
+acl_access_val="0x0200000001000600ffffffff04000400ffffffff10000400ffffffff20000400ffffffff"
+acl_file_val="0x0000000400000001ffffffff0006000000000004ffffffff0004000000000010ffffffff0004000000000020ffffffff00040000"
+TEST setfattr -n system.posix_acl_access -v $acl_access_val $B0/${V0}0/d
+TEST setfattr -n trusted.SGI_ACL_FILE -v $acl_file_val $B0/${V0}0/d
+TEST setfattr -n system.posix_acl_access -v $acl_access_val $B0/${V0}1/d
+TEST setfattr -n trusted.SGI_ACL_FILE -v $acl_file_val $B0/${V0}1/d
+TEST setfattr -n trusted.foo -v "baz" $M0/d
+TEST setfattr -n trusted.foo -v "baz" $M0/d/a
+TEST setfattr -n trusted.foo1 -v "baz1" $M0/d
+TEST setfattr -n trusted.foo1 -v "baz1" $M0/d/a
+TEST setfattr -n trusted.foo3 -v "unchanged" $M0/d
+TEST setfattr -n trusted.foo3 -v "unchanged" $M0/d/a
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+#Induce metadata self-heal
+TEST setfattr -n trusted.foo -v "bar" $M0/d
+TEST setfattr -n trusted.foo -v "bar" $M0/d/a
+TEST setfattr -x trusted.foo1 $M0/d
+TEST setfattr -x trusted.foo1 $M0/d/a
+TEST setfattr -n trusted.foo2 -v "bar2" $M0/d
+TEST setfattr -n trusted.foo2 -v "bar2" $M0/d/a
+d_quota_contri=$(getfattr -d -m . -e hex $B0/${V0}1/d | grep -E "trusted.glusterfs.quota.*.contri")
+d_quota_dirty=$(getfattr -d -m . -e hex $B0/${V0}1/d | grep -E "trusted.glusterfs.quota.dirty")
+d_quota_limit=$(getfattr -d -m . -e hex $B0/${V0}1/d | grep -E "trusted.glusterfs.quota.limit-set")
+d_quota_size=$(getfattr -d -m . -e hex $B0/${V0}1/d | grep -E "trusted.glusterfs.quota.size")
+
+a_pgfid=$(getfattr -d -m . -e hex $B0/${V0}1/d/a | grep -E "trusted.pgfid.")
+
+#Change internal xattrs in the backend, later check that they are not healed
+TEST setfattr -n trusted.glusterfs.quota.00000000-0000-0000-0000-000000000001.contri -v 0x0000000000000400 $B0/${V0}0/d
+TEST setfattr -n trusted.glusterfs.quota.dirty -v 0x0000000000000400 $B0/${V0}0/d
+TEST setfattr -n trusted.glusterfs.quota.limit-set -v 0x0000000000000400 $B0/${V0}0/d #This will be healed, this is external xattr
+TEST setfattr -n trusted.glusterfs.quota.size -v 0x0000000000000400 $B0/${V0}0/d
+TEST setfattr -n $(echo $a_pgfid | cut -f1 -d'=') -v "orphan" $B0/${V0}0/d/a
+
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+
+#Check external xattrs match
+EXPECT "bar" echo $(getfattr -d -m. -e text $B0/${V0}0/d | grep trusted.foo)
+EXPECT "bar" echo $(getfattr -d -m. -e text $B0/${V0}0/d/a | grep trusted.foo)
+TEST ! getfattr -n trusted.foo1 $B0/${V0}0/d
+TEST ! getfattr -n trusted.foo1 $B0/${V0}0/d/a
+EXPECT "unchanged" echo $(getfattr -d -m. -e text $B0/${V0}0/d | grep trusted.foo3)
+EXPECT "unchanged" echo $(getfattr -d -m. -e text $B0/${V0}0/d/a | grep trusted.foo3)
+EXPECT "bar2" echo $(getfattr -d -m. -e text $B0/${V0}0/d | grep trusted.foo2)
+EXPECT "bar2" echo $(getfattr -d -m. -e text $B0/${V0}0/d/a | grep trusted.foo2)
+EXPECT "$d_quota_limit" echo $(getfattr -d -m . -e hex $B0/${V0}0/d | grep "trusted.glusterfs.quota.limit-set")
+
+EXPECT "bar" echo $(getfattr -d -m. -e text $B0/${V0}1/d | grep trusted.foo)
+EXPECT "bar" echo $(getfattr -d -m. -e text $B0/${V0}1/d/a | grep trusted.foo)
+TEST ! getfattr -n trusted.foo1 $B0/${V0}1/d
+TEST ! getfattr -n trusted.foo1 $B0/${V0}1/d/a
+EXPECT "unchanged" echo $(getfattr -d -m. -e text $B0/${V0}1/d | grep trusted.foo3)
+EXPECT "unchanged" echo $(getfattr -d -m. -e text $B0/${V0}1/d/a | grep trusted.foo3)
+EXPECT "bar2" echo $(getfattr -d -m. -e text $B0/${V0}1/d | grep trusted.foo2)
+EXPECT "bar2" echo $(getfattr -d -m. -e text $B0/${V0}1/d/a | grep trusted.foo2)
+EXPECT "$d_quota_limit" echo $(getfattr -d -m . -e hex $B0/${V0}1/d | grep "trusted.glusterfs.quota.limit-set")
+
+#Test that internal xattrs on B0 are not healed
+EXPECT 0x0000000000000400 echo $(getfattr -d -m. -e hex $B0/${V0}0/d | grep trusted.glusterfs.quota.00000000-0000-0000-0000-000000000001.contri)
+EXPECT 0x0000000000000400 echo $(getfattr -d -m. -e hex $B0/${V0}0/d | grep trusted.glusterfs.quota.dirty)
+EXPECT "$d_quota_limit" echo $(getfattr -d -m. -e hex $B0/${V0}0/d | grep trusted.glusterfs.quota.limit-set) #This will be healed, this is external xattr
+EXPECT 0x0000000000000400 echo $(getfattr -d -m. -e hex $B0/${V0}0/d | grep trusted.glusterfs.quota.size)
+EXPECT "$acl_access_val" echo $(getfattr -d -m. -e hex $B0/${V0}0/d | grep system.posix_acl_access)
+EXPECT "$acl_file_val" echo $(getfattr -d -m. -e hex $B0/${V0}0/d | grep trusted.SGI_ACL_FILE)
+EXPECT "orphan" echo $(getfattr -d -m. -e text $B0/${V0}0/d/a | grep $(echo $a_pgfid | cut -f1 -d'='))
+
+#Test that xattrs didn't go bad in source
+EXPECT "$d_quota_contri" echo $(getfattr -d -m . -e hex $B0/${V0}1/d | grep -E "trusted.glusterfs.quota.*.contri")
+EXPECT "$d_quota_dirty" echo $(getfattr -d -m . -e hex $B0/${V0}1/d | grep -E "trusted.glusterfs.quota.dirty")
+EXPECT "$d_quota_limit" echo $(getfattr -d -m . -e hex $B0/${V0}1/d | grep -E "trusted.glusterfs.quota.limit-set")
+EXPECT "$d_quota_size" echo $(getfattr -d -m . -e hex $B0/${V0}1/d | grep -E "trusted.glusterfs.quota.size")
+EXPECT "$a_pgfid" echo $(getfattr -d -m . -e hex $B0/${V0}1/d/a | grep -E "trusted.pgfid.")
+EXPECT "$acl_access_val" echo $(getfattr -d -m. -e hex $B0/${V0}1/d | grep system.posix_acl_access)
+EXPECT "$acl_file_val" echo $(getfattr -d -m. -e hex $B0/${V0}1/d | grep trusted.SGI_ACL_FILE)
+
+#Do a lookup and it shouldn't trigger metadata self-heal and heal xattrs
+EXPECT "bar" echo $(getfattr -d -m. -e text $B0/${V0}0/d | grep trusted.foo)
+EXPECT "bar" echo $(getfattr -d -m. -e text $B0/${V0}0/d/a | grep trusted.foo)
+TEST ! getfattr -n trusted.foo1 $B0/${V0}0/d
+TEST ! getfattr -n trusted.foo1 $B0/${V0}0/d/a
+EXPECT "unchanged" echo $(getfattr -d -m. -e text $B0/${V0}0/d | grep trusted.foo3)
+EXPECT "unchanged" echo $(getfattr -d -m. -e text $B0/${V0}0/d/a | grep trusted.foo3)
+EXPECT "bar2" echo $(getfattr -d -m. -e text $B0/${V0}0/d | grep trusted.foo2)
+EXPECT "bar2" echo $(getfattr -d -m. -e text $B0/${V0}0/d/a | grep trusted.foo2)
+EXPECT "$d_quota_limit" echo $(getfattr -d -m . -e hex $B0/${V0}0/d | grep "trusted.glusterfs.quota.limit-set")
+
+EXPECT "bar" echo $(getfattr -d -m. -e text $B0/${V0}1/d | grep trusted.foo)
+EXPECT "bar" echo $(getfattr -d -m. -e text $B0/${V0}1/d/a | grep trusted.foo)
+TEST ! getfattr -n trusted.foo1 $B0/${V0}1/d
+TEST ! getfattr -n trusted.foo1 $B0/${V0}1/d/a
+EXPECT "unchanged" echo $(getfattr -d -m. -e text $B0/${V0}1/d | grep trusted.foo3)
+EXPECT "unchanged" echo $(getfattr -d -m. -e text $B0/${V0}1/d/a | grep trusted.foo3)
+EXPECT "bar2" echo $(getfattr -d -m. -e text $B0/${V0}1/d | grep trusted.foo2)
+EXPECT "bar2" echo $(getfattr -d -m. -e text $B0/${V0}1/d/a | grep trusted.foo2)
+EXPECT "$d_quota_limit" echo $(getfattr -d -m . -e hex $B0/${V0}1/d | grep "trusted.glusterfs.quota.limit-set")
+
+#Test that internal xattrs on B0 are not healed
+EXPECT 0x0000000000000400 echo $(getfattr -d -m. -e hex $B0/${V0}0/d | grep trusted.glusterfs.quota.00000000-0000-0000-0000-000000000001.contri)
+EXPECT 0x0000000000000400 echo $(getfattr -d -m. -e hex $B0/${V0}0/d | grep trusted.glusterfs.quota.dirty)
+EXPECT "$d_quota_limit" echo $(getfattr -d -m. -e hex $B0/${V0}0/d | grep trusted.glusterfs.quota.limit-set) #This will be healed, this is external xattr
+EXPECT 0x0000000000000400 echo $(getfattr -d -m. -e hex $B0/${V0}0/d | grep trusted.glusterfs.quota.size)
+EXPECT "orphan" echo $(getfattr -d -m. -e text $B0/${V0}0/d/a | grep $(echo $a_pgfid | cut -f1 -d'='))
+
+#Test that xattrs didn't go bad in source
+EXPECT "$d_quota_contri" echo $(getfattr -d -m . -e hex $B0/${V0}1/d | grep -E "trusted.glusterfs.quota.*.contri")
+EXPECT "$d_quota_dirty" echo $(getfattr -d -m . -e hex $B0/${V0}1/d | grep -E "trusted.glusterfs.quota.dirty")
+EXPECT "$d_quota_limit" echo $(getfattr -d -m . -e hex $B0/${V0}1/d | grep -E "trusted.glusterfs.quota.limit-set")
+EXPECT "$d_quota_size" echo $(getfattr -d -m . -e hex $B0/${V0}1/d | grep -E "trusted.glusterfs.quota.size")
+EXPECT "$a_pgfid" echo $(getfattr -d -m . -e hex $B0/${V0}1/d/a | grep -E "trusted.pgfid.")
+
+EXPECT "$acl_access_val" echo $(getfattr -d -m. -e hex $B0/${V0}0/d | grep system.posix_acl_access)
+EXPECT "$acl_file_val" echo $(getfattr -d -m. -e hex $B0/${V0}0/d | grep trusted.SGI_ACL_FILE)
+EXPECT "$acl_access_val" echo $(getfattr -d -m. -e hex $B0/${V0}1/d | grep system.posix_acl_access)
+EXPECT "$acl_file_val" echo $(getfattr -d -m. -e hex $B0/${V0}1/d | grep trusted.SGI_ACL_FILE)
+cleanup
diff --git a/tests/bugs/quota/bug-1035576.t b/tests/bugs/quota/bug-1035576.t
new file mode 100644
index 00000000000..cbc1b69ebb3
--- /dev/null
+++ b/tests/bugs/quota/bug-1035576.t
@@ -0,0 +1,53 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+#This script tests that self-heal of limit-set xattr is happening on a directory
+#but self-heal of quota.size xattr is not happening
+
+cleanup;
+
+TEST glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+#Lets disable perf-xls so that lookup would reach afr
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 cluster.data-self-heal on
+TEST $CLI volume set $V0 cluster.metadata-self-heal on
+TEST $CLI volume set $V0 cluster.entry-self-heal on
+TEST $CLI volume quota $V0 enable
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+cd $M0
+TEST mkdir $M0/a
+TEST $CLI volume quota $V0 limit-usage /a 1GB
+echo abc > $M0/a/f
+$CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+quota_limit_val1=$(get_hex_xattr trusted.glusterfs.quota.limit-set $B0/${V0}1/a)
+quota_size_val1=$(get_hex_xattr trusted.glusterfs.quota.size $B0/${V0}1/a)
+
+#Trigger entry,metadata self-heal
+TEST ls $M0/a
+
+quota_limit_val0=$(get_hex_xattr trusted.glusterfs.quota.limit-set $B0/${V0}0/a)
+quota_size_val0=$(get_hex_xattr trusted.glusterfs.quota.size $B0/${V0}0/a)
+
+#Test that limit-set xattr is healed
+TEST [ $quota_limit_val0 == $quota_limit_val1 ]
+
+#Only entry, metadata self-heal is done quota size value should not be same
+TEST [ $quota_size_val0 != $quota_size_val1 ]
+TEST cat $M0/a/f
+
+#Now that data self-heal is done quota size value should be same
+quota_size_val0=$(get_hex_xattr trusted.glusterfs.quota.size $B0/${V0}0/a)
+TEST [ $quota_size_val0 == $quota_size_val1 ]
+cleanup
diff --git a/tests/bugs/quota/bug-1038598.t b/tests/bugs/quota/bug-1038598.t
new file mode 100644
index 00000000000..108e14cb8d8
--- /dev/null
+++ b/tests/bugs/quota/bug-1038598.t
@@ -0,0 +1,52 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+QDD=$(dirname $0)/quota
+# compile the test write program and run it
+build_tester $(dirname $0)/../../basic/quota.c -o $QDD
+
+TEST glusterd
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2};
+
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT '2' brick_count $V0
+
+TEST $CLI volume start $V0;
+
+TEST $CLI volume quota $V0 enable
+sleep 5
+
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+
+TEST mkdir -p $M0/test_dir
+TEST $CLI volume quota $V0 limit-usage /test_dir 10MB 50
+
+EXPECT "10.0MB" quota_hard_limit "/test_dir";
+EXPECT "50%" quota_soft_limit "/test_dir";
+
+TEST $QDD $M0/test_dir/file1.txt 256 16
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "4.0MB" quotausage "/test_dir";
+EXPECT 'No' quota_sl_exceeded "/test_dir";
+EXPECT 'No' quota_hl_exceeded "/test_dir";
+
+TEST $QDD $M0/test_dir/file1.txt 256 24
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "6.0MB" quotausage "/test_dir";
+EXPECT 'Yes' quota_sl_exceeded "/test_dir";
+EXPECT 'No' quota_hl_exceeded "/test_dir";
+
+#set timeout to 0 so that quota gets enforced without any lag
+TEST $CLI volume set $V0 features.hard-timeout 0
+TEST $CLI volume set $V0 features.soft-timeout 0
+
+TEST ! $QDD $M0/test_dir/file1.txt 256 60
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT 'Yes' quota_sl_exceeded "/test_dir";
+EXPECT 'Yes' quota_hl_exceeded "/test_dir";
+
+rm -f $QDD
+
+cleanup;
diff --git a/tests/bugs/quota/bug-1087198.t b/tests/bugs/quota/bug-1087198.t
new file mode 100644
index 00000000000..618a46b957d
--- /dev/null
+++ b/tests/bugs/quota/bug-1087198.t
@@ -0,0 +1,86 @@
+#!/bin/bash
+
+## The script tests the logging of the quota in the bricks after reaching soft
+## limit of the configured limit.
+##
+## Steps:
+## 1. Create and mount the volume
+## 2. Enable quota and set the limit on 2 directories
+## 3. Write some data to cross the limit
+## 4. Grep the string expected in brick logs
+## 5. Wait for 10 seconds (alert timeout is set to 10s)
+## 6. Repeat 3 and 4.
+## 7. Cleanup
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../fileio.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+QDD=$(dirname $0)/quota
+# compile the test write program and run it
+build_tester $(dirname $0)/../../basic/quota.c -o $QDD
+
+#1
+## Step 1
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/brick{1..4};
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0;
+
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+TEST mount_nfs $H0:/$V0 $N0 noac,nolock
+
+QUOTA_LIMIT_DIR="quota_limit_dir"
+BRICK_LOG_DIR="`gluster --print-logdir`/bricks"
+
+#9
+TEST mkdir $N0/$QUOTA_LIMIT_DIR
+
+#10
+## Step 2
+TEST $CLI volume quota $V0 enable
+TEST $CLI volume quota $V0 alert-time 10
+TEST $CLI volume quota $V0 hard-timeout 0
+TEST $CLI volume quota $V0 soft-timeout 0
+
+# Set limit to 200KB (204800B)
+TEST $CLI volume quota $V0 limit-usage / 204800B
+TEST $CLI volume quota $V0 limit-usage /$QUOTA_LIMIT_DIR 100KB
+
+#16
+## Step 3 and 4
+TEST $QDD $N0/$QUOTA_LIMIT_DIR/95KB_file 1 95
+#Uncomment below TEST once the bug# 1202292 is fixed
+#TEST grep -e "\"Usage crossed soft limit:.*used by /$QUOTA_LIMIT_DIR\"" -- $BRICK_LOG_DIR/*
+
+TEST $QDD $N0/100KB_file 1 100
+#Uncomment below TEST once the bug# 1202292 is fixed
+#TEST grep -e "\"Usage crossed soft limit:.*used by /\"" -- $BRICK_LOG_DIR/*
+
+#20
+## Step 5
+TEST sleep 10
+
+## Step 6
+TEST $QDD $N0/$QUOTA_LIMIT_DIR/1KB_file 1 1
+TEST grep -e "\"Usage is above soft limit:.*used by /$QUOTA_LIMIT_DIR\"" -- $BRICK_LOG_DIR/*
+
+#23
+TEST $QDD $N0/1KB_file 1 1
+TEST grep -e "\"Usage is above soft limit:.*used by /\"" -- $BRICK_LOG_DIR/*
+
+#25
+## Step 7
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+TEST $CLI volume stop $V0
+
+rm -f $QDD
+
+cleanup;
diff --git a/tests/bugs/quota/bug-1104692.t b/tests/bugs/quota/bug-1104692.t
new file mode 100755
index 00000000000..9640996135f
--- /dev/null
+++ b/tests/bugs/quota/bug-1104692.t
@@ -0,0 +1,26 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}0 $H0:$B0/${V0}1 $H0:$B0/${V0}2 $H0:$B0/${V0}3
+TEST $CLI volume start $V0
+
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+TEST mkdir -p $M0/limit_one/limit_two/limit_three $M0/limit_four \
+ $M0/limit_one/limit_five
+
+TEST $CLI volume set $V0 server.root-squash on
+TEST $CLI volume quota $V0 enable
+
+TEST $CLI volume quota $V0 limit-usage / 1GB
+TEST $CLI volume quota $V0 limit-usage /limit_one 1GB
+TEST $CLI volume quota $V0 limit-usage /limit_one/limit_two 1GB
+TEST $CLI volume quota $V0 limit-usage /limit_one/limit_two/limit_three 1GB
+TEST $CLI volume quota $V0 limit-usage /limit_four 1GB
+TEST $CLI volume quota $V0 limit-usage /limit_one/limit_five 1GB
+
+cleanup;
diff --git a/tests/bugs/quota/bug-1153964.t b/tests/bugs/quota/bug-1153964.t
new file mode 100644
index 00000000000..2e449d3ba00
--- /dev/null
+++ b/tests/bugs/quota/bug-1153964.t
@@ -0,0 +1,81 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+function rename_loop()
+{
+ local i=0
+ local limit=$1
+ while [ $i -lt $limit ]
+ do
+ j=$[$i + 1]
+ mv $N0/test_dir/file$i $N0/test_dir/file$j
+ if [ "$?" != "0" ]
+ then
+ return 1
+ fi
+ i=$[$i + 1]
+ done
+ return 0
+}
+
+function createFile_and_checkLimit()
+{
+ local count_val=$1;
+ dd if=/dev/zero of="$N0/test_dir/file0" bs=1048576 count=$count_val
+ sleep 3
+ if [ -f $N0/test_dir/file0 ]
+ then
+ rename_loop 10
+ if [ "$?" == "0" ]
+ then
+ echo "Y"
+ else
+ echo "N"
+ fi
+ fi
+}
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}1 $H0:$B0/${V0}2
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0
+
+TEST $CLI volume quota $V0 enable
+EXPECT 'on' volinfo_field $V0 'features.quota'
+
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+TEST mount_nfs $H0:/$V0 $N0 nolock;
+TEST mkdir -p $N0/test_dir/
+
+# Try to rename file under various case and check if
+# quota limit exceeds or not.
+TEST $CLI volume quota $V0 limit-usage /test_dir 100MB
+# Case1 : If used size is less than hard-limit size
+# Create a 600MB file
+EXPECT 'Y' createFile_and_checkLimit 60
+
+TEST rm -rf $N0/test_dir/*
+# Case2 : If used size is equal to hard-limit size
+# Create a 100MB file
+EXPECT 'Y' createFile_and_checkLimit 100
+
+TEST rm -rf $N0/test_dir/*
+# Case3 : If used size is greater than hard-limit size
+# Create a 110MB file
+EXPECT 'Y' createFile_and_checkLimit 110
+
+# remove this directory as it has been created as part
+# of above testcase
+TEST rm -rf $N0/test_dir/
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+cleanup;
diff --git a/tests/bugs/quota/bug-1178130.t b/tests/bugs/quota/bug-1178130.t
new file mode 100644
index 00000000000..ccd6b792cf8
--- /dev/null
+++ b/tests/bugs/quota/bug-1178130.t
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+# This regression test tries to ensure renaming a directory with content, and
+# no limit set, is accounted properly, when moved into a directory with quota
+# limit set.
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+QDD=$(dirname $0)/quota
+# compile the test write program and run it
+build_tester $(dirname $0)/../../basic/quota.c -o $QDD
+
+TEST glusterd
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2};
+TEST $CLI volume start $V0;
+
+TEST $CLI volume quota $V0 enable;
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+TEST $CLI volume quota $V0 limit-usage / 500MB
+TEST $CLI volume quota $V0 hard-timeout 0
+TEST $CLI volume quota $V0 soft-timeout 0
+
+TEST $QDD $M0/file 256 40
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "10.0MB" quotausage "/"
+
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST mv $M0/file $M0/file2
+TEST $CLI volume start $V0 force;
+
+#wait for self heal to complete
+EXPECT_WITHIN $HEAL_TIMEOUT "0" STAT "$B0/${V0}2/file2"
+
+#usage should remain same after rename and self-heal operation
+EXPECT "10.0MB" quotausage "/"
+
+rm -f $QDD
+
+cleanup;
diff --git a/tests/bugs/quota/bug-1235182.t b/tests/bugs/quota/bug-1235182.t
new file mode 100644
index 00000000000..6091146cb97
--- /dev/null
+++ b/tests/bugs/quota/bug-1235182.t
@@ -0,0 +1,61 @@
+#!/bin/bash
+
+# This regression test tries to ensure renaming a directory with content, and
+# no limit set, is accounted properly, when moved into a directory with quota
+# limit set.
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+QDD=$(dirname $0)/quota
+# compile the test write program and run it
+build_tester $(dirname $0)/../../basic/quota.c -o $QDD
+
+TEST glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0};
+TEST $CLI volume start $V0;
+
+TEST $CLI volume quota $V0 enable;
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+TEST $CLI volume quota $V0 limit-usage / 1GB
+TEST $CLI volume quota $V0 hard-timeout 0
+TEST $CLI volume quota $V0 soft-timeout 0
+
+TEST mkdir $M0/1
+$QDD $M0/1/f1 256 400&
+PID=$!
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" STAT $M0/1/f1
+TESTS_EXPECTED_IN_LOOP=150
+for i in {1..50}; do
+ ii=`expr $i + 1`;
+ touch $M0/$i/f$ii
+ echo Hello > $M0/$i/f$ii
+
+ #rename within same dir
+ TEST_IN_LOOP mv -f $M0/$i/f$i $M0/$i/f$ii;
+
+ #rename to different dir
+ TEST_IN_LOOP mkdir $M0/$ii
+ TEST_IN_LOOP mv -f $M0/$i/f$ii $M0/$ii/f$ii;
+ stat $M0/$ii/f$ii >/dev/null
+done
+
+echo "Wait for process with pid $PID to complete"
+wait $PID
+echo "Process with pid $PID finished"
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" STAT $M0/51/f51
+
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "100.0MB" quotausage "/"
+
+rm -f $QDD
+
+cleanup;
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=000000
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
diff --git a/tests/bugs/quota/bug-1243798.t b/tests/bugs/quota/bug-1243798.t
new file mode 100644
index 00000000000..fa6abeb08fb
--- /dev/null
+++ b/tests/bugs/quota/bug-1243798.t
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0;
+
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+TEST mount_nfs $H0:/$V0 $N0 noac,nolock
+
+TEST mkdir -p $N0/dir1/dir2
+TEST touch $N0/dir1/dir2/file
+
+TEST $CLI volume quota $V0 enable
+TEST $CLI volume quota $V0 hard-timeout 0
+TEST $CLI volume quota $V0 soft-timeout 0
+TEST $CLI volume quota $V0 limit-objects /dir1 10
+
+TEST stat $N0/dir1/dir2/file
+
+sleep 2
+
+#Remove size and contri xattr from /dir1
+#Remove contri xattr from /dir1/dir2
+setfattr -x trusted.glusterfs.quota.size.1 $B0/$V0/dir1
+setfattr -x trusted.glusterfs.quota.00000000-0000-0000-0000-000000000001.contri.1 $B0/$V0/dir1
+contri=$(getfattr -d -m . -e hex $B0/$V0/dir1/dir2 | grep contri | awk -F= '{print $1}')
+setfattr -x $contri $B0/$V0/dir1/dir2
+
+#Initiate healing by writing to a file
+echo Hello > $N0/dir1/dir2/file
+
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "2" quota_object_list_field "/dir1" 5
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+cleanup;
diff --git a/tests/bugs/quota/bug-1250582-volume-reset-should-not-remove-quota-quota-deem-statfs.t b/tests/bugs/quota/bug-1250582-volume-reset-should-not-remove-quota-quota-deem-statfs.t
new file mode 100644
index 00000000000..3b55e739bf9
--- /dev/null
+++ b/tests/bugs/quota/bug-1250582-volume-reset-should-not-remove-quota-quota-deem-statfs.t
@@ -0,0 +1,53 @@
+#!/bin/bash
+
+# This test ensures that 'gluster volume reset' command do not remove
+# features.quota-deem-statfs, features.quota.
+# Also, tests that 'gluster volume set features.quota-deem-statfs' can be
+# turned on/off when quota is enabled.
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${v0}{1,2};
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST $CLI volume quota $V0 enable
+EXPECT 'on' volinfo_field $V0 'features.quota'
+EXPECT 'on' volinfo_field $V0 'features.inode-quota'
+EXPECT 'on' volinfo_field $V0 'features.quota-deem-statfs'
+
+TEST $CLI volume reset $V0
+EXPECT 'on' volinfo_field $V0 'features.quota'
+EXPECT 'on' volinfo_field $V0 'features.inode-quota'
+EXPECT 'on' volinfo_field $V0 'features.quota-deem-statfs'
+
+TEST $CLI volume reset $V0 force
+EXPECT 'on' volinfo_field $V0 'features.quota'
+EXPECT 'on' volinfo_field $V0 'features.inode-quota'
+EXPECT 'on' volinfo_field $V0 'features.quota-deem-statfs'
+
+TEST $CLI volume reset $V0 features.quota-deem-statfs
+EXPECT 'on' volinfo_field $V0 'features.quota-deem-statfs'
+
+TEST $CLI volume set $V0 features.quota-deem-statfs off
+EXPECT 'off' volinfo_field $V0 'features.quota-deem-statfs'
+
+TEST $CLI volume set $V0 features.quota-deem-statfs on
+EXPECT 'on' volinfo_field $V0 'features.quota-deem-statfs'
+
+TEST $CLI volume quota $V0 disable
+EXPECT 'off' volinfo_field $V0 'features.quota'
+EXPECT 'off' volinfo_field $V0 'features.inode-quota'
+EXPECT '' volinfo_field $V0 'features.quota-deem-statfs'
+
+cleanup;
+
diff --git a/tests/bugs/quota/bug-1260545.t b/tests/bugs/quota/bug-1260545.t
new file mode 100644
index 00000000000..46808022f01
--- /dev/null
+++ b/tests/bugs/quota/bug-1260545.t
@@ -0,0 +1,53 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+QDD=$(dirname $0)/quota
+# compile the test write program and run it
+build_tester $(dirname $0)/../../basic/quota.c -o $QDD
+
+TEST glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}1 $H0:$B0/${V0}2;
+TEST $CLI volume start $V0;
+
+TEST $CLI volume quota $V0 enable;
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+TEST $CLI volume quota $V0 limit-usage / 11MB
+TEST $CLI volume quota $V0 hard-timeout 0
+TEST $CLI volume quota $V0 soft-timeout 0
+
+TEST $QDD $M0/f1 256 40
+
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "10.0MB" quotausage "/"
+
+if [ -f "$B0/${V0}1/f1" ]; then
+ HASHED="$B0/${V0}1"
+ OTHER="$B0/${V0}2"
+else
+ HASHED="$B0/${V0}2"
+ OTHER="$B0/${V0}1"
+fi
+
+TEST $CLI volume remove-brick $V0 $H0:${HASHED} start
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" remove_brick_status_completed_field "$V0" "$H0:${HASHED}";
+
+#check consistency in mount point and also check that file is migrated to OTHER
+TEST [ -f "$OTHER/f1" ];
+TEST [ -f "$M0/f1" ];
+
+#check that remove-brick status should not have any failed or skipped files
+var=`$CLI volume remove-brick $V0 $H0:${HASHED} status | grep completed`
+TEST [ `echo $var | awk '{print $5}'` = "0" ]
+TEST [ `echo $var | awk '{print $6}'` = "0" ]
+
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "10.0MB" quotausage "/"
+
+rm -f $QDD
+cleanup;
diff --git a/tests/bugs/quota/bug-1287996.t b/tests/bugs/quota/bug-1287996.t
new file mode 100644
index 00000000000..2f46ee1ca2d
--- /dev/null
+++ b/tests/bugs/quota/bug-1287996.t
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../cluster.rc
+
+function check_peers {
+ $CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
+}
+
+cleanup;
+
+TEST launch_cluster 2;
+
+TEST $CLI_1 volume create $V0 $H1:$B1/$V0
+TEST $CLI_1 volume start $V0
+TEST $CLI_1 volume quota $V0 enable
+
+TEST $CLI_1 peer probe $H2;
+EXPECT_WITHIN $PROBE_TIMEOUT 1 check_peers
+
+cleanup;
diff --git a/tests/bugs/quota/bug-1292020.t b/tests/bugs/quota/bug-1292020.t
new file mode 100644
index 00000000000..b70047ae3f9
--- /dev/null
+++ b/tests/bugs/quota/bug-1292020.t
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+function write_sample_data () {
+ dd if=/dev/zero of=$M0/f1 bs=256k count=400 2>&1 |
+ egrep -i 'exceeded|no space' && echo 'passed'
+}
+
+cleanup;
+
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST $CLI volume create $V0 $H0:$B0/$V0
+TEST $CLI volume start $V0;
+TEST $CLI volume quota $V0 enable;
+TEST $CLI volume quota $V0 limit-usage / 1
+
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0;
+
+
+EXPECT_WITHIN 30 "passed" write_sample_data
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+cleanup;
diff --git a/tests/bugs/quota/bug-1293601.t b/tests/bugs/quota/bug-1293601.t
new file mode 100644
index 00000000000..741758b73f5
--- /dev/null
+++ b/tests/bugs/quota/bug-1293601.t
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4}
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "4" online_brick_count
+TEST $CLI volume quota $V0 enable
+
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0;
+
+for i in {1..512}; do
+ dd if=/dev/zero of=$M0/f$i bs=1k count=1
+done
+
+mkdir $M0/dir1
+for i in {513..1024}; do
+ dd if=/dev/zero of=$M0/dir1/f$i bs=1k count=1
+done
+
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "1.0MB" quotausage "/"
+
+TEST $CLI volume quota $V0 disable
+TEST $CLI volume quota $V0 enable
+
+EXPECT_WITHIN 60 "1.0MB" quotausage "/"
+
+cleanup;
diff --git a/tests/bugs/read-only/bug-1134822-read-only-default-in-graph.t b/tests/bugs/read-only/bug-1134822-read-only-default-in-graph.t
new file mode 100644
index 00000000000..cc27d04656f
--- /dev/null
+++ b/tests/bugs/read-only/bug-1134822-read-only-default-in-graph.t
@@ -0,0 +1,67 @@
+#!/bin/bash
+
+#Test case: This test checks when a volume is made read-only though volume set
+# and bricks are not restarted no write operations can be performed on
+# this volume
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+#Create a volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}0;
+TEST $CLI volume start $V0
+
+# Mount FUSE and create file/directory, create should succeed as the read-only
+# is off by default
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+TEST touch $M0/zerobytefile1.txt
+TEST mkdir $M0/test_dir1
+TEST dd if=/dev/zero of=$M0/file1 bs=1024 count=1024
+
+# turn on read-only option through volume set
+TEST gluster volume set $V0 read-only on
+
+# worm feature can't be enabled if read-only is enabled
+TEST ! gluster volume set $V0 worm on
+
+# turn off read-only option through volume set
+TEST gluster volume set $V0 read-only off
+
+# turn on worm option through volume set
+TEST gluster volume set $V0 worm on
+
+# read-only feature can't be enabled if worm is enabled
+TEST ! gluster volume set $V0 read-only on
+
+
+TEST gluster volume set $V0 worm off
+TEST gluster volume set $V0 read-only on
+
+# Check whether read-operations can be performed or not
+TEST cat $M0/file1
+
+# All write operations should fail now
+TEST ! touch $M0/zerobytefile2.txt
+TEST ! mkdir $M0/test_dir2
+TEST ! dd if=/dev/zero of=$M0/file2 bs=1024 count=1024
+
+# turn off read-only option through volume set
+TEST gluster volume set $V0 read-only off
+
+# All write operations should succeed now
+TEST touch $M0/zerobytefile2.txt
+TEST mkdir $M0/test_dir2
+TEST dd if=/dev/zero of=$M0/file2 bs=1024 count=1024
+
+# Turn on worm
+TEST gluster volume set $V0 worm on
+
+# unlink should fail now
+TEST ! rm -rf $M0/zerobytefile2.txt
+
+cleanup;
diff --git a/tests/bugs/readdir-ahead/bug-1390050.c b/tests/bugs/readdir-ahead/bug-1390050.c
new file mode 100644
index 00000000000..9578df2dd90
--- /dev/null
+++ b/tests/bugs/readdir-ahead/bug-1390050.c
@@ -0,0 +1,72 @@
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <dirent.h>
+#include <string.h>
+#include <errno.h>
+
+int
+main(int argc, char *argv[])
+{
+ const char *glfs_dir = NULL, *filepath = NULL;
+ DIR *dirfd = NULL;
+ int filefd = 0, ret = 0;
+ struct stat stbuf = {
+ 0,
+ };
+ size_t size_before_write = 0;
+
+ glfs_dir = argv[1];
+ filepath = argv[2];
+ dirfd = opendir(glfs_dir);
+ if (dirfd == NULL) {
+ fprintf(stderr, "opening directory failed (%s)\n", strerror(errno));
+ goto err;
+ }
+
+ filefd = open(filepath, O_RDWR);
+ if (filefd < 0) {
+ fprintf(stderr, "open failed on path %s (%s)\n", filepath,
+ strerror(errno));
+ goto err;
+ }
+
+ ret = stat(filepath, &stbuf);
+ if (ret < 0) {
+ fprintf(stderr, "stat failed on path %s (%s)\n", filepath,
+ strerror(errno));
+ goto err;
+ }
+
+ size_before_write = stbuf.st_size;
+
+ ret = write(filefd, "testdata", strlen("testdata123") + 1);
+ if (ret <= 0) {
+ fprintf(stderr, "write failed (%s)\n", strerror(errno));
+ goto err;
+ }
+
+ while (readdir(dirfd)) {
+ /* do nothing */
+ }
+
+ ret = stat(filepath, &stbuf);
+ if (ret < 0) {
+ fprintf(stderr, "stat failed on path %s (%s)\n", strerror(errno));
+ goto err;
+ }
+
+ if (stbuf.st_size == size_before_write) {
+ fprintf(stderr,
+ "file size (%lu) has not changed even after "
+ "its written to\n",
+ stbuf.st_size);
+ goto err;
+ }
+
+ return 0;
+err:
+ return -1;
+}
diff --git a/tests/bugs/readdir-ahead/bug-1390050.t b/tests/bugs/readdir-ahead/bug-1390050.t
new file mode 100644
index 00000000000..ab1d7d4ead9
--- /dev/null
+++ b/tests/bugs/readdir-ahead/bug-1390050.t
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fileio.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B{0..1}/$V0
+TEST $CLI volume set $V0 readdir-ahead on
+
+DIRECTORY="$M0/subdir1/subdir2"
+
+#Make sure md-cache has large timeout to hold stat from readdirp_cbk in its cache
+TEST $CLI volume set $V0 performance.md-cache-timeout 600
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+rm -rf $M0/*
+TEST mkdir -p $DIRECTORY
+rm -rf $DIRECTORY/*
+TEST touch $DIRECTORY/file{0..10}
+rdd_tester=$(dirname $0)/rdd-tester
+TEST build_tester $(dirname $0)/bug-1390050.c -o $rdd_tester
+TEST $rdd_tester $DIRECTORY $DIRECTORY/file4
+rm -f $rdd_tester
+cleanup;
+
diff --git a/tests/bugs/readdir-ahead/bug-1436090.t b/tests/bugs/readdir-ahead/bug-1436090.t
new file mode 100755
index 00000000000..e0877f15684
--- /dev/null
+++ b/tests/bugs/readdir-ahead/bug-1436090.t
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+cleanup;
+
+TEST launch_cluster 2;
+TEST $CLI_1 peer probe $H2;
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+$CLI_1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0
+EXPECT 'Created' cluster_volinfo_field 1 $V0 'Status';
+
+$CLI_1 volume start $V0
+EXPECT 'Started' cluster_volinfo_field 1 $V0 'Status';
+
+TEST glusterfs -s $H1 --volfile-id $V0 $M0;
+TEST mkdir $M0/dir1
+
+# Create a large file (4 GB), so that rebalance takes time
+# Since we really don't care about the contents of the file, we use fallocate
+# to generate the file much faster. We could also use truncate, which is even
+# faster, but rebalance could take advantage of an sparse file and migrate it
+# in an optimized way, but we don't want a fast migration.
+TEST fallocate -l 4G $M0/dir1/foo
+
+TEST mv $M0/dir1/foo $M0/dir1/bar
+
+TEST $CLI_1 volume rebalance $V0 start force
+TEST ! $CLI_1 volume set $V0 parallel-readdir on
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" cluster_rebalance_status_field 1 $V0
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" cluster_rebalance_status_field 2 $V0
+TEST $CLI_1 volume set $V0 parallel-readdir on
+TEST mv $M0/dir1/bar $M0/dir1/foo
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST glusterfs -s $H1 --volfile-id $V0 $M0;
+TEST $CLI_1 volume rebalance $V0 start force
+TEST ln $M0/dir1/foo $M0/dir1/bar
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" cluster_rebalance_status_field 1 $V0
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" cluster_rebalance_status_field 2 $V0
+cleanup;
diff --git a/tests/bugs/readdir-ahead/bug-1439640.t b/tests/bugs/readdir-ahead/bug-1439640.t
new file mode 100755
index 00000000000..dcd54076444
--- /dev/null
+++ b/tests/bugs/readdir-ahead/bug-1439640.t
@@ -0,0 +1,31 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B{0..1}/$V0
+TEST $CLI volume set $V0 readdir-ahead on
+TEST $CLI volume start $V0
+
+TEST ! $CLI volume set $V0 parallel-readdir sdf
+
+TEST $CLI volume set $V0 parallel-readdir off
+TEST $CLI volume set $V0 parallel-readdir on
+
+TEST ! $CLI volume set $V0 rda-cache-limit 0
+TEST ! $CLI volume set $V0 rda-cache-limit -634
+TEST ! $CLI volume set $V0 rda-cache-limit 87adh
+TEST ! $CLI volume set $V0 parallel-readdir sdf
+
+TEST ! $CLI volume set $V0 rda-request-size 0
+TEST ! $CLI volume set $V0 rda-request-size -634
+TEST ! $CLI volume set $V0 rda-request-size 87adh
+
+TEST $CLI volume set $V0 rda-cache-limit 10MB
+TEST $CLI volume set $V0 rda-request-size 128KB
+
+cleanup;
diff --git a/tests/bugs/readdir-ahead/bug-1446516.t b/tests/bugs/readdir-ahead/bug-1446516.t
new file mode 100755
index 00000000000..2bf6520d861
--- /dev/null
+++ b/tests/bugs/readdir-ahead/bug-1446516.t
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1..4}
+TEST $CLI volume start $V0
+
+TEST $CLI volume set $V0 parallel-readdir on
+
+TEST $CLI volume set $V0 rda-cache-limit 4GB
+
+TEST $CLI volume set $V0 parallel-readdir off
+
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+
+cleanup;
diff --git a/tests/bugs/readdir-ahead/bug-1512437.t b/tests/bugs/readdir-ahead/bug-1512437.t
new file mode 100755
index 00000000000..50eaa7d6696
--- /dev/null
+++ b/tests/bugs/readdir-ahead/bug-1512437.t
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}1
+TEST $CLI volume start $V0
+
+TEST $CLI volume set $V0 parallel-readdir on
+TEST $CLI volume set $V0 readdir-optimize on
+
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+TEST mkdir -p $M0/subdir1/subdir2;
+umount $M0
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+count=`ls -1 $M0/subdir1 | wc -l`
+TEST [ $count -eq 1 ]
+
+cleanup;
diff --git a/tests/bugs/readdir-ahead/bug-1670253-consistent-metadata.t b/tests/bugs/readdir-ahead/bug-1670253-consistent-metadata.t
new file mode 100644
index 00000000000..6adfc17c92c
--- /dev/null
+++ b/tests/bugs/readdir-ahead/bug-1670253-consistent-metadata.t
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 readdir-ahead on #on by default as of writing this .t.
+TEST $CLI volume set $V0 consistent-metadata on
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+TEST touch $M0/FILE
+echo "abc" >> $M0/FILE
+EXPECT "^0$" echo $?
+EXPECT "abc" cat $M0/FILE
+echo "truncate" >$M0/FILE
+EXPECT "^0$" echo $?
+EXPECT "truncate" cat $M0/FILE
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+cleanup;
diff --git a/tests/bugs/replicate/886998/strict-readdir.t b/tests/bugs/replicate/886998/strict-readdir.t
new file mode 100644
index 00000000000..63fe313b201
--- /dev/null
+++ b/tests/bugs/replicate/886998/strict-readdir.t
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+. $(dirname $0)/../../../include.rc
+. $(dirname $0)/../../../volume.rc
+
+function num_files_in_dir {
+ d=$1
+ ls $d | sort | uniq | wc -l
+}
+
+#Basic sanity tests for readdir functionality
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/r2d2_0 $H0:$B0/r2d2_1 $H0:$B0/r2d2_2 $H0:$B0/r2d2_3
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-server=$H0 --volfile-id=/$V0 $M0
+
+TEST touch $M0/{1..100}
+EXPECT "100" num_files_in_dir $M0
+
+TEST kill_brick $V0 $H0 $B0/r2d2_0
+TEST kill_brick $V0 $H0 $B0/r2d2_2
+EXPECT "100" num_files_in_dir $M0
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+TEST kill_brick $V0 $H0 $B0/r2d2_1
+TEST kill_brick $V0 $H0 $B0/r2d2_3
+EXPECT "100" num_files_in_dir $M0
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 3
+
+TEST $CLI volume set $V0 cluster.strict-readdir on
+EXPECT "on" volinfo_field $V0 cluster.strict-readdir
+TEST kill_brick $V0 $H0 $B0/r2d2_0
+TEST kill_brick $V0 $H0 $B0/r2d2_2
+EXPECT "100" num_files_in_dir $M0
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+TEST kill_brick $V0 $H0 $B0/r2d2_1
+TEST kill_brick $V0 $H0 $B0/r2d2_3
+EXPECT "100" num_files_in_dir $M0
+cleanup;
diff --git a/tests/bugs/replicate/bug-1015990-rep.t b/tests/bugs/replicate/bug-1015990-rep.t
new file mode 100755
index 00000000000..ab8166e372a
--- /dev/null
+++ b/tests/bugs/replicate/bug-1015990-rep.t
@@ -0,0 +1,61 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4};
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 3
+
+
+TEST kill_brick $V0 $H0 $B0/$V0"1"
+TEST kill_brick $V0 $H0 $B0/$V0"3"
+
+for i in {1..100}; do echo "STRING" > $M0/File$i; done
+
+# Check shd is connected to all up bricks before running statistics command.
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 3
+
+command_output=$(gluster volume heal $V0 statistics heal-count replica $H0:$B0/$V0"1")
+substring="Number of entries:"
+count=0
+while read -r line;
+do
+ if [[ "$line" == *$substring* ]]
+ then
+ value=$(echo $line | cut -f 2 -d :)
+ count=$(($count + $value))
+ fi
+
+done <<< "$command_output"
+
+brick_2_entries_count=$count
+xattrop_count_brick_2=$(count_sh_entries $B0/$V0"2")
+EXPECT $brick_2_entries_count echo $xattrop_count_brick_2
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-1015990.t b/tests/bugs/replicate/bug-1015990.t
new file mode 100755
index 00000000000..a8b12f2c202
--- /dev/null
+++ b/tests/bugs/replicate/bug-1015990.t
@@ -0,0 +1,69 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4};
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+
+
+
+TEST kill_brick $V0 $H0 $B0/$V0"1"
+sleep 5
+TEST kill_brick $V0 $H0 $B0/$V0"3"
+sleep 5
+
+for i in {1..100}; do echo "STRING" > $M0/File$i; done
+
+brick_2_sh_entries=$(count_sh_entries $B0/$V0"2")
+brick_4_sh_entries=$(count_sh_entries $B0/$V0"4")
+
+
+command_output=$(gluster volume heal $V0 statistics heal-count)
+
+
+substring="Number of entries:"
+count=0
+while read -r line;
+do
+ if [[ "$line" == *$substring* ]]
+ then
+ value=$(echo $line | cut -f 2 -d :)
+ count=$(($count + $value))
+ fi
+
+done <<< "$command_output"
+
+brick_2_entries_count=$(($count-$value))
+brick_4_entries_count=$value
+
+xattrop_count_brick_2=$(count_sh_entries $B0/$V0"2")
+xattrop_count_brick_4=$(count_sh_entries $B0/$V0"4")
+EXPECT $brick_2_entries_count echo $xattrop_count_brick_2
+EXPECT $brick_4_entries_count echo $xattrop_count_brick_4
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0
+
+cleanup;
+
diff --git a/tests/bugs/replicate/bug-1032927.t b/tests/bugs/replicate/bug-1032927.t
new file mode 100644
index 00000000000..eb663d03fed
--- /dev/null
+++ b/tests/bugs/replicate/bug-1032927.t
@@ -0,0 +1,32 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+#This tests if pathinfo getxattr fails when one of the bricks is down
+#Lets hope it doesn't
+
+cleanup;
+function get_pathinfo_in_loop {
+ failed=0
+ for i in {1..1000}
+ do
+ getfattr -n trusted.glusterfs.pathinfo $M0 2>/dev/null
+ if [ $? -ne 0 ]; then failed=1;break; fi
+ done
+ return $failed
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+cd $M0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+
+#when one of the bricks is down getfattr of pathinfo should not fail
+#Lets just do the test for 1000 times to see if we hit the race
+TEST get_pathinfo_in_loop
+
+cleanup
diff --git a/tests/bugs/replicate/bug-1037501.t b/tests/bugs/replicate/bug-1037501.t
new file mode 100755
index 00000000000..ce079555b50
--- /dev/null
+++ b/tests/bugs/replicate/bug-1037501.t
@@ -0,0 +1,104 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+function write_file()
+{
+ path="$1"; shift
+ echo "$*" > "$path"
+}
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+## Start and create a volume
+mkdir -p ${B0}/${V0}-0
+mkdir -p ${B0}/${V0}-1
+mkdir -p ${B0}/${V0}-2
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}-{0,1,2}
+
+## Verify volume is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Mount native
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+
+TEST `echo "TEST-FILE" > $M0/File`
+TEST `mkdir $M0/Dir`
+TEST `ln $M0/File $M0/Link`
+TEST `mknod $M0/FIFO p`
+
+TEST $CLI volume add-brick $V0 replica 4 $H0:$B0/$V0-3 force
+TEST $CLI volume add-brick $V0 replica 5 $H0:$B0/$V0-4 force
+TEST $CLI volume add-brick $V0 replica 6 $H0:$B0/$V0-5 force
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 3
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 4
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 5
+TEST gluster volume heal $V0 full
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" path_exists $B0/$V0-0/File
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" path_exists $B0/$V0-1/File
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" path_exists $B0/$V0-2/File
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" path_exists $B0/$V0-3/File
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" path_exists $B0/$V0-4/File
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" path_exists $B0/$V0-5/File
+
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" path_exists $B0/$V0-0/Link
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" path_exists $B0/$V0-1/Link
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" path_exists $B0/$V0-2/Link
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" path_exists $B0/$V0-3/Link
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" path_exists $B0/$V0-4/Link
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" path_exists $B0/$V0-5/Link
+
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" path_exists $B0/$V0-0/Dir
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" path_exists $B0/$V0-1/Dir
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" path_exists $B0/$V0-2/Dir
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" path_exists $B0/$V0-3/Dir
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" path_exists $B0/$V0-4/Dir
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" path_exists $B0/$V0-5/Dir
+
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" path_exists $B0/$V0-0/FIFO
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" path_exists $B0/$V0-1/FIFO
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" path_exists $B0/$V0-2/FIFO
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" path_exists $B0/$V0-3/FIFO
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" path_exists $B0/$V0-4/FIFO
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" path_exists $B0/$V0-5/FIFO
+
+EXPECT 10 stat -c '%s' $B0/$V0-0/File
+EXPECT 10 stat -c '%s' $B0/$V0-1/File
+EXPECT 10 stat -c '%s' $B0/$V0-2/File
+EXPECT 10 stat -c '%s' $B0/$V0-3/File
+EXPECT 10 stat -c '%s' $B0/$V0-4/File
+EXPECT 10 stat -c '%s' $B0/$V0-5/File
+
+EXPECT 3 stat -c '%h' $B0/$V0-0/Link
+EXPECT 3 stat -c '%h' $B0/$V0-1/Link
+EXPECT 3 stat -c '%h' $B0/$V0-2/Link
+EXPECT 3 stat -c '%h' $B0/$V0-3/Link
+EXPECT 3 stat -c '%h' $B0/$V0-4/Link
+EXPECT 3 stat -c '%h' $B0/$V0-5/Link
+
+EXPECT 'directory' stat -c '%F' $B0/$V0-0/Dir
+EXPECT 'directory' stat -c '%F' $B0/$V0-1/Dir
+EXPECT 'directory' stat -c '%F' $B0/$V0-2/Dir
+EXPECT 'directory' stat -c '%F' $B0/$V0-3/Dir
+EXPECT 'directory' stat -c '%F' $B0/$V0-4/Dir
+EXPECT 'directory' stat -c '%F' $B0/$V0-5/Dir
+
+EXPECT 'fifo' stat -c '%F' $B0/$V0-0/FIFO
+EXPECT 'fifo' stat -c '%F' $B0/$V0-1/FIFO
+EXPECT 'fifo' stat -c '%F' $B0/$V0-2/FIFO
+EXPECT 'fifo' stat -c '%F' $B0/$V0-3/FIFO
+EXPECT 'fifo' stat -c '%F' $B0/$V0-4/FIFO
+EXPECT 'fifo' stat -c '%F' $B0/$V0-5/FIFO
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-1046624.t b/tests/bugs/replicate/bug-1046624.t
new file mode 100755
index 00000000000..e2762ea6764
--- /dev/null
+++ b/tests/bugs/replicate/bug-1046624.t
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+
+## Start and create a volume
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}-{0,1}
+
+## Verify volume is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+
+## Make sure automatic self-heal doesn't perturb our results.
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 stat-prefetch off
+TEST $CLI volume set $V0 background-self-heal-count 0
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Mount native
+TEST ${GFS} --volfile-server=$H0 --volfile-id=$V0 --use-readdirp=no $M0
+
+TEST `echo "TEST-FILE" > $M0/File`
+TEST `mkdir $M0/Dir`
+TEST kill_brick $V0 $H0 $B0/${V0}-0
+EXPECT_WITHIN ${PROCESS_DOWN_TIMEOUT} "^0$" afr_child_up_status $V0 0
+
+TEST `ln -s $M0/File $M0/Link1`
+TEST `ln -s $M0/Dir $M0/Link2`
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+TEST `find $M0/ 2>/dev/null 1>/dev/null`
+TEST `find $M0/ | xargs stat 2>/dev/null 1>/dev/null`
+
+TEST stat $B0/${V0}-0/Link1
+TEST stat $B0/${V0}-0/Link2
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-1058797.t b/tests/bugs/replicate/bug-1058797.t
new file mode 100644
index 00000000000..598062a0dab
--- /dev/null
+++ b/tests/bugs/replicate/bug-1058797.t
@@ -0,0 +1,48 @@
+#!/bin/bash
+#Test that the setuid bit is healed correctly.
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+#Basic checks
+TEST glusterd
+
+#Create a 1x2 replica volume
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1};
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 cluster.data-self-heal on
+TEST $CLI volume set $V0 cluster.metadata-self-heal on
+TEST $CLI volume set $V0 cluster.entry-self-heal on
+
+# FUSE mount;create a file
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+TEST touch $M0/file
+
+#Kill brick1 and set S_ISUID and S_ISGID bits from mount point
+kill_brick $V0 $H0 $B0/brick1
+TEST chmod +x,+s $M0/file
+
+#Get file permissions from backend brick0 and verify that S_ISUID is indeed set
+file_permissions1=`ls -l $B0/brick0/file | awk '{print $1}'| cut -d. -f1 | cut -d- -f2,3,4,5,6`
+setuid_bit1=`echo $file_permissions1 | cut -b3`
+EXPECT "s" echo $setuid_bit1
+
+#Restart volume and do lookup from mount to trigger heal
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST dd if=$M0/file of=/dev/null
+
+#Get file permissions from healed brick1 and verify that S_ISUID is indeed set
+file_permissions2=`ls -l $B0/brick1/file | awk '{print $1}' | cut -d. -f1 | cut -d- -f2,3,4,5,6`
+setuid_bit2=`echo $file_permissions2 | cut -b3`
+EXPECT "s" echo $setuid_bit2
+
+#Also compare the entire permission string,just to be sure
+EXPECT $file_permissions1 echo $file_permissions2
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-1101647.t b/tests/bugs/replicate/bug-1101647.t
new file mode 100644
index 00000000000..708bc1a1e29
--- /dev/null
+++ b/tests/bugs/replicate/bug-1101647.t
@@ -0,0 +1,31 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2};
+TEST $CLI volume start $V0;
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+
+#Create base entry in indices/xattrop
+echo "Data">$M0/file
+
+TEST $CLI volume heal $V0
+#Entries from indices/xattrop should not be cleared after a heal.
+EXPECT 1 count_index_entries $B0/$V0"1"
+EXPECT 1 count_index_entries $B0/$V0"2"
+
+TEST kill_brick $V0 $H0 $B0/${V0}2
+echo "More data">>$M0/file
+
+EXPECT 1 echo `$CLI volume heal $V0 statistics heal-count|grep "Number of entries:"|head -n1|awk '{print $4}'`
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-1130892.t b/tests/bugs/replicate/bug-1130892.t
new file mode 100644
index 00000000000..c7509f33cc2
--- /dev/null
+++ b/tests/bugs/replicate/bug-1130892.t
@@ -0,0 +1,76 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+# Create a 1X2 replica
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}-{0,1}
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+# Disable self-heal daemon
+TEST gluster volume set $V0 self-heal-daemon off
+
+# Enable Client side heal
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+
+# Disable all perf-xlators
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+
+# Volume start
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+# FUSE Mount
+TEST ${GFS} -s $H0 --volfile-id $V0 $M0
+
+# Create files and dirs
+TEST mkdir -p $M0/one/two/
+TEST `echo "Carpe diem" > $M0/one/two/three`
+
+# Simulate disk-replacement
+TEST kill_brick $V0 $H0 $B0/${V0}-1
+EXPECT_WITHIN ${PROCESS_DOWN_TIMEOUT} "^0$" afr_child_up_status $V0 1
+TEST rm -rf $B0/${V0}-1/one
+TEST rm -rf $B0/${V0}-1/.glusterfs
+
+#Ideally, disk replacement is done using reset-brick or replace-brick gluster CLI
+#which will create .glusterfs folder.
+mkdir $B0/${V0}-1/.glusterfs && chmod 600 $B0/${V0}-1/.glusterfs
+
+# Start force
+TEST $CLI volume start $V0 force
+
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+TEST stat $M0/one
+
+sleep 1
+
+# Check pending xattrs
+EXPECT "00000000" afr_get_specific_changelog_xattr $B0/${V0}-0/one trusted.afr.$V0-client-1 data
+EXPECT_NOT "00000000" afr_get_specific_changelog_xattr $B0/${V0}-0/one trusted.afr.$V0-client-1 entry
+EXPECT_NOT "00000000" afr_get_specific_changelog_xattr $B0/${V0}-0/one trusted.afr.$V0-client-1 metadata
+
+TEST gluster volume set $V0 self-heal-daemon on
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" is_dir_heal_done $B0/${V0}-0 $B0/${V0}-1 one
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" is_dir_heal_done $B0/${V0}-0 $B0/${V0}-1 one/two
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" is_file_heal_done $B0/${V0}-0 $B0/${V0}-1 one/two/three
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-1132102.t b/tests/bugs/replicate/bug-1132102.t
new file mode 100644
index 00000000000..c7dbbf818aa
--- /dev/null
+++ b/tests/bugs/replicate/bug-1132102.t
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+#This tests that mknod and create fops mark necessary pending changelog
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+TEST kill_brick $V0 $H0 $B0/${V0}0
+cd $M0
+TEST mkfifo fifo
+TEST mknod block b 0 0
+TEST touch a
+EXPECT "00000000" afr_get_specific_changelog_xattr $B0/${V0}1/fifo trusted.afr.$V0-client-0 data
+EXPECT "00000000" afr_get_specific_changelog_xattr $B0/${V0}1/fifo trusted.afr.$V0-client-0 entry
+EXPECT_NOT "00000000" afr_get_specific_changelog_xattr $B0/${V0}1/fifo trusted.afr.$V0-client-0 metadata
+EXPECT "00000000" afr_get_specific_changelog_xattr $B0/${V0}1/block trusted.afr.$V0-client-0 data
+EXPECT "00000000" afr_get_specific_changelog_xattr $B0/${V0}1/block trusted.afr.$V0-client-0 entry
+EXPECT_NOT "00000000" afr_get_specific_changelog_xattr $B0/${V0}1/block trusted.afr.$V0-client-0 metadata
+EXPECT_NOT "00000000" afr_get_specific_changelog_xattr $B0/${V0}1/a trusted.afr.$V0-client-0 data
+EXPECT "00000000" afr_get_specific_changelog_xattr $B0/${V0}1/a trusted.afr.$V0-client-0 entry
+EXPECT_NOT "00000000" afr_get_specific_changelog_xattr $B0/${V0}1/a trusted.afr.$V0-client-0 metadata
+cleanup
diff --git a/tests/bugs/replicate/bug-1134691-afr-lookup-metadata-heal.t b/tests/bugs/replicate/bug-1134691-afr-lookup-metadata-heal.t
new file mode 100644
index 00000000000..b69a38ae788
--- /dev/null
+++ b/tests/bugs/replicate/bug-1134691-afr-lookup-metadata-heal.t
@@ -0,0 +1,56 @@
+#!/bin/bash
+#### Test iatt and user xattr heal from lookup code path ####
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/brick{0,1,2}
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 cluster.data-self-heal on
+TEST $CLI volume set $V0 cluster.metadata-self-heal on
+TEST $CLI volume set $V0 cluster.entry-self-heal on
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+
+cd $M0
+TEST touch file
+TEST setfattr -n user.attribute1 -v "value" $B0/brick0/file
+TEST kill_brick $V0 $H0 $B0/brick2
+TEST chmod +x file
+iatt=$(stat -c "%g:%u:%A" file)
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+EXPECT 2 get_pending_heal_count $V0
+
+#Trigger metadataheal
+TEST stat file
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+#iattrs must be matching
+iatt1=$(stat -c "%g:%u:%A" $B0/brick0/file)
+iatt2=$(stat -c "%g:%u:%A" $B0/brick1/file)
+iatt3=$(stat -c "%g:%u:%A" $B0/brick2/file)
+EXPECT $iatt echo $iatt1
+EXPECT $iatt echo $iatt2
+EXPECT $iatt echo $iatt3
+
+#xattrs must be matching
+xatt1_cnt=$(getfattr -d $B0/brick0/file|wc|awk '{print $1}')
+xatt2_cnt=$(getfattr -d $B0/brick1/file|wc|awk '{print $1}')
+xatt3_cnt=$(getfattr -d $B0/brick2/file|wc|awk '{print $1}')
+EXPECT "$xatt1_cnt" echo $xatt2_cnt
+EXPECT "$xatt1_cnt" echo $xatt3_cnt
+
+#changelogs must be zero
+xattr1=$(get_hex_xattr trusted.afr.$V0-client-2 $B0/brick0/file)
+xattr2=$(get_hex_xattr trusted.afr.$V0-client-2 $B0/brick1/file)
+EXPECT "000000000000000000000000" echo $xattr1
+EXPECT "000000000000000000000000" echo $xattr2
+
+cd -
+cleanup;
diff --git a/tests/bugs/replicate/bug-1139230.t b/tests/bugs/replicate/bug-1139230.t
new file mode 100644
index 00000000000..9ceac6c4f4e
--- /dev/null
+++ b/tests/bugs/replicate/bug-1139230.t
@@ -0,0 +1,58 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+# Create a 1X2 replica
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}-{0,1}
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+# Volume start
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+# FUSE Mount
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+
+TEST mkdir -p $M0/one
+
+# Kill a brick
+TEST kill_brick $V0 $H0 $B0/${V0}-1
+
+TEST `echo "A long" > $M0/one/two`
+
+# Start force
+TEST $CLI volume start $V0 force
+
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" is_dir_heal_done $B0/${V0}-0 $B0/${V0}-1 one
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" is_file_heal_done $B0/${V0}-0 $B0/${V0}-1 one/two
+
+# Pending xattrs should be set for all the bricks once self-heal is done
+# Check pending xattrs
+EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/${V0}-0/one trusted.afr.$V0-client-0
+EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/${V0}-0/one trusted.afr.$V0-client-1
+EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/${V0}-1/one trusted.afr.$V0-client-0
+EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/${V0}-1/one trusted.afr.$V0-client-1
+EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/${V0}-0/one trusted.afr.dirty
+EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/${V0}-1/one trusted.afr.dirty
+
+TEST `echo "time ago" > $M0/one/three`
+
+# Pending xattrs should be set for all the bricks once transaction is done
+# Check pending xattrs
+EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/${V0}-0/one/three trusted.afr.$V0-client-0
+EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/${V0}-0/one/three trusted.afr.$V0-client-1
+EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/${V0}-1/one/three trusted.afr.$V0-client-0
+EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/${V0}-1/one/three trusted.afr.$V0-client-1
+EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/${V0}-0/one/three trusted.afr.dirty
+EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/${V0}-1/one/three trusted.afr.dirty
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-1180545.t b/tests/bugs/replicate/bug-1180545.t
new file mode 100644
index 00000000000..5e40edd6c38
--- /dev/null
+++ b/tests/bugs/replicate/bug-1180545.t
@@ -0,0 +1,77 @@
+#!/bin/bash
+
+#Create gfid split-brain of directory and check if conservative merge
+#completes successfully.
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+function check_sh_entries() {
+ local expected="$1"
+ local count=
+ local good="0"
+ shift
+
+ for i in $*; do
+ count="$(count_sh_entries $i)"
+ if [[ "x${count}" == "x${expected}" ]]; then
+ good="$((good + 1))"
+ fi
+ done
+ if [[ "x${good}" != "x${last_good}" ]]; then
+ last_good="${good}"
+# This triggers a sweep of the heal index. However if more than one brick
+# tries to heal the same directory at the same time, one of them will take
+# the lock and the other will give up, waiting for the next heal cycle, which
+# is set to 60 seconds (the minimum valid value). So, each time we detect
+# that one brick has completed the heal, we trigger another heal.
+ $CLI volume heal $V0
+ fi
+
+ echo "${good}"
+}
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1}
+TEST $CLI volume set $V0 cluster.heal-timeout 60
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 stat-prefetch off
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+#Create files with alternate brick down. One file has gfid mismatch.
+TEST mkdir $M0/DIR
+
+TEST kill_brick $V0 $H0 $B0/brick1
+TEST touch $M0/DIR/FILE
+TEST touch $M0/DIR/file{1..5}
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+TEST kill_brick $V0 $H0 $B0/brick0
+TEST touch $M0/DIR/FILE
+TEST touch $M0/DIR/file{6..10}
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+#Trigger heal and verify number of entries in backend
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+
+TEST $CLI volume heal $V0
+
+last_good=""
+
+EXPECT_WITHIN $HEAL_TIMEOUT "2" check_sh_entries 2 $B0/brick{0,1}
+
+#Two entries for DIR and two for FILE
+EXPECT_WITHIN $HEAL_TIMEOUT "4" get_pending_heal_count $V0
+TEST diff <(ls $B0/brick0/DIR) <(ls $B0/brick1/DIR)
+cleanup
diff --git a/tests/bugs/replicate/bug-1190069-afr-stale-index-entries.t b/tests/bugs/replicate/bug-1190069-afr-stale-index-entries.t
new file mode 100644
index 00000000000..fe8e992e8f8
--- /dev/null
+++ b/tests/bugs/replicate/bug-1190069-afr-stale-index-entries.t
@@ -0,0 +1,57 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+cleanup;
+
+#Stale entries in xattrop folder for files which do not need heal must be removed during the next index heal crawl.
+
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1};
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume start $V0;
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+TEST `echo hello>$M0/datafile`
+TEST touch $M0/mdatafile
+
+#Create split-brain and reset the afr xattrs, so that we have only the entry inside xattrop folder.
+#This is to simulate the case where the brick crashed just before pre-op happened, but index xlator created the entry inside xattrop folder.
+
+#Create data, metadata SB.
+TEST kill_brick $V0 $H0 $B0/$V0"1"
+TEST stat $M0/datafile
+TEST `echo append>>$M0/datafile`
+TEST chmod +x $M0/mdatafile
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT '1' afr_child_up_status_meta $M0 $V0-replicate-0 1
+TEST kill_brick $V0 $H0 $B0/$V0"0"
+TEST stat $M0/datafile
+TEST `echo append>>$M0/datafile`
+TEST chmod -x $M0/mdatafile
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT '1' afr_child_up_status_meta $M0 $V0-replicate-0 0
+TEST ! cat $M0/datafile
+
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT '1' afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT '1' afr_child_up_status_in_shd $V0 1
+
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT '2' count_sh_entries $B0/$V0"0"
+EXPECT_WITHIN $HEAL_TIMEOUT '2' count_sh_entries $B0/$V0"1"
+
+#Reset xattrs and trigger heal.
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000000 $B0/${V0}0/datafile
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000000 $B0/${V0}1/datafile
+
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000000 $B0/${V0}0/mdatafile
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000000 $B0/${V0}1/mdatafile
+
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+EXPECT_WITHIN $HEAL_TIMEOUT '0' count_sh_entries $B0/$V0"0"
+EXPECT_WITHIN $HEAL_TIMEOUT '0' count_sh_entries $B0/$V0"1"
+
+cleanup
diff --git a/tests/bugs/replicate/bug-1221481-allow-fops-on-dir-split-brain.t b/tests/bugs/replicate/bug-1221481-allow-fops-on-dir-split-brain.t
new file mode 100644
index 00000000000..6ff471fbf15
--- /dev/null
+++ b/tests/bugs/replicate/bug-1221481-allow-fops-on-dir-split-brain.t
@@ -0,0 +1,42 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+cleanup;
+
+#Allow readdirs to proceed on directories that are in split-brain
+
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1};
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume start $V0;
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST mkdir $M0/dir
+TEST touch $M0/dir/file{1..5}
+
+#Create entry split-brain
+TEST kill_brick $V0 $H0 $B0/$V0"1"
+EXPECT_WITHIN ${PROCESS_DOWN_TIMEOUT} "^0$" afr_child_up_status $V0 1
+TEST touch $M0/dir/FILE
+EXPECT_WITHIN ${UMOUNT_TIMEOUT} "^Y$" force_umount $M0
+TEST $CLI volume start $V0 force
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT '1' afr_child_up_status_meta $M0 $V0-replicate-0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT '1' afr_child_up_status_meta $M0 $V0-replicate-0 1
+TEST kill_brick $V0 $H0 $B0/$V0"0"
+EXPECT_WITHIN ${PROCESS_DOWN_TIMEOUT} "^0$" afr_child_up_status $V0 0
+TEST touch $M0/dir/FILE
+EXPECT_WITHIN ${UMOUNT_TIMEOUT} "^Y$" force_umount $M0
+TEST $CLI volume start $V0 force
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT '1' afr_child_up_status_meta $M0 $V0-replicate-0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT '1' afr_child_up_status_meta $M0 $V0-replicate-0 1
+
+cd $M0/dir
+EXPECT "6" echo $(ls | wc -l)
+TEST ! cat FILE
+TEST `echo hello>hello.txt`
+cd -
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+cleanup
diff --git a/tests/bugs/replicate/bug-1238398-split-brain-resolution.t b/tests/bugs/replicate/bug-1238398-split-brain-resolution.t
new file mode 100644
index 00000000000..8ef3aae979f
--- /dev/null
+++ b/tests/bugs/replicate/bug-1238398-split-brain-resolution.t
@@ -0,0 +1,51 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+function get_split_brain_status {
+ local path=$1
+ echo `getfattr -n replica.split-brain-status $path` | cut -f2 -d"=" | sed -e 's/^"//' -e 's/"$//'
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+
+#Disable self-heal-daemon
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
+
+TEST `echo "some-data" > $M0/metadata-split-brain.txt`
+
+#Create metadata split-brain
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST chmod 666 $M0/metadata-split-brain.txt
+
+TEST $CLI volume start $V0 force
+TEST kill_brick $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+TEST chmod 757 $M0/metadata-split-brain.txt
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+EXPECT 2 get_pending_heal_count $V0
+
+#Inspect the file in metadata-split-brain
+EXPECT "data-split-brain:no metadata-split-brain:yes Choices:patchy-client-0,patchy-client-1" get_split_brain_status $M0/metadata-split-brain.txt
+TEST setfattr -n replica.split-brain-choice -v $V0-client-0 $M0/metadata-split-brain.txt
+
+EXPECT "757" stat -c %a $M0/metadata-split-brain.txt
+
+TEST setfattr -n replica.split-brain-choice -v $V0-client-1 $M0/metadata-split-brain.txt
+EXPECT "666" stat -c %a $M0/metadata-split-brain.txt
+
+cleanup;
+
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=000000
diff --git a/tests/bugs/replicate/bug-1238508-self-heal.t b/tests/bugs/replicate/bug-1238508-self-heal.t
new file mode 100644
index 00000000000..24fb07d31f0
--- /dev/null
+++ b/tests/bugs/replicate/bug-1238508-self-heal.t
@@ -0,0 +1,51 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+
+# Disable self-heal-daemon
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+TEST mkdir $M0/olddir;
+TEST `echo "some-data" > $M0/olddir/oldfile`
+
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST mv $M0/olddir/oldfile $M0/olddir/newfile;
+TEST mv $M0/olddir $M0/newdir;
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+
+# Test if the files are present on both the bricks
+EXPECT "newdir" ls $B0/${V0}0/
+EXPECT "newdir" ls $B0/${V0}1/
+EXPECT "newfile" ls $B0/${V0}0/newdir/
+EXPECT "newfile" ls $B0/${V0}1/newdir/
+
+# Test if gfid-link files in .glusterfs also provide correct info
+brick0gfid=$(gf_get_gfid_backend_file_path $B0/${V0}0 newdir)
+brick1gfid=$(gf_get_gfid_backend_file_path $B0/${V0}1 newdir)
+EXPECT "newfile" ls $brick0gfid
+EXPECT "newfile" ls $brick1gfid
+
+# Test if the files are accessible from the mount
+EXPECT "newdir" ls $M0/
+EXPECT "newfile" ls $M0/newdir
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-1250170-fsync.c b/tests/bugs/replicate/bug-1250170-fsync.c
new file mode 100644
index 00000000000..21fd96594aa
--- /dev/null
+++ b/tests/bugs/replicate/bug-1250170-fsync.c
@@ -0,0 +1,56 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <string.h>
+
+int
+main(int argc, char **argv)
+{
+ char *file = NULL;
+ int fd = -1;
+ char *buffer = NULL;
+ size_t buf_size = 0;
+ size_t written = 0;
+ int ret = 0;
+ off_t offset = 0;
+ int i = 0;
+ int loop_count = 5;
+
+ if (argc < 2) {
+ printf("Usage:%s <filename>\n", argv[0]);
+ return -1;
+ }
+
+ file = argv[1];
+ buf_size = 1024;
+ buffer = malloc(buf_size);
+ if (!buffer) {
+ perror("malloc");
+ return -1;
+ }
+ memset(buffer, 'R', buf_size);
+
+ fd = open(file, O_WRONLY);
+ if (fd == -1) {
+ perror("open");
+ return -1;
+ }
+
+ for (i = 0; i < loop_count; i++) {
+ ret = write(fd, buffer, buf_size);
+ if (ret == -1) {
+ perror("write");
+ return ret;
+ } else {
+ written += ret;
+ }
+ offset = lseek(fd, 0, SEEK_SET);
+ }
+
+ free(buffer);
+ return 0;
+}
diff --git a/tests/bugs/replicate/bug-1250170-fsync.t b/tests/bugs/replicate/bug-1250170-fsync.t
new file mode 100644
index 00000000000..7a3fdbf8bb5
--- /dev/null
+++ b/tests/bugs/replicate/bug-1250170-fsync.t
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+TEST gcc $(dirname $0)/bug-1250170-fsync.c -o $(dirname $0)/bug-1250170-fsync
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+TEST touch $M0/file
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST gluster volume profile $V0 start
+#Perform 5 non-sequential writes.
+TEST $(dirname $0)/bug-1250170-fsync $M0/file
+
+#Run profile info initially to filter out the interval statistics in the
+#subsequent runs.
+TEST $CLI volume profile $V0 info
+#We get only cumulative statistics.
+write_count=$($CLI volume profile $V0 info | grep WRITE |awk '{count += $8} END {print count}')
+fsync_count=$($CLI volume profile $V0 info | grep FSYNC |awk '{count += $8} END {print count}')
+
+EXPECT "5" echo $write_count
+TEST [ -z $fsync_count ]
+
+TEST $CLI volume profile $V0 stop
+TEST umount $M0
+rm -f $(dirname $0)/bug-1250170-fsync
+cleanup
diff --git a/tests/bugs/replicate/bug-1266876-allow-reset-brick-for-same-path.t b/tests/bugs/replicate/bug-1266876-allow-reset-brick-for-same-path.t
new file mode 100644
index 00000000000..884b7892954
--- /dev/null
+++ b/tests/bugs/replicate/bug-1266876-allow-reset-brick-for-same-path.t
@@ -0,0 +1,54 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
+# Create files
+for i in {1..5}
+do
+ echo $i > $M0/file$i.txt
+done
+
+# Negative case with brick not killed && volume-id xattrs present
+TEST ! $CLI volume reset-brick $V0 $H0:$B0/${V0}1 $H0:$B0/${V0}1 commit force
+TEST kill_brick $V0 $H0 $B0/${V0}1
+
+# Negative case with brick killed but volume-id xattr present
+TEST ! $CLI volume reset-brick $V0 $H0:$B0/${V0}1 $H0:$B0/${V0}1 commit
+
+TEST $CLI volume reset-brick $V0 $H0:$B0/${V0}1 start
+# Simulated reset disk
+for i in {1..5}
+do
+ rm -rf $B0/${V0}{1}/file$i.txt
+done
+for i in {6..10}
+do
+ echo $i > $M0/file$i.txt
+done
+
+# Now reset the brick
+TEST $CLI volume reset-brick $V0 $H0:$B0/${V0}1 $H0:$B0/${V0}1 commit force
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+
+TEST $CLI volume heal $V0
+
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+
+# Check if entry-heal has happened
+TEST diff <(ls $B0/${V0}0 | sort) <(ls $B0/${V0}1 | sort)
+EXPECT "10" echo $(ls $B0/${V0}1 | wc -l)
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-1292379.t b/tests/bugs/replicate/bug-1292379.t
new file mode 100644
index 00000000000..be1bf699173
--- /dev/null
+++ b/tests/bugs/replicate/bug-1292379.t
@@ -0,0 +1,58 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+. $(dirname $0)/../../fileio.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.eager-lock off
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0
+
+TEST wfd=`fd_available`
+TEST fd_open $wfd "w" $M0/a
+
+TEST fd_write $wfd "abcd"
+
+# Kill brick-0
+TEST kill_brick $V0 $H0 $B0/${V0}0
+
+# While brick-0 is down, rename 'a' to 'b'
+TEST mv $M0/a $M0/b
+
+TEST fd_write $wfd "lmn"
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST fd_write $wfd "pqrs"
+TEST $CLI volume set $V0 self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+
+TEST $CLI volume heal $V0
+
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+TEST fd_write $wfd "xyz"
+TEST fd_close $wfd
+
+md5sum_b0=$(md5sum $B0/${V0}0/b | awk '{print $1}')
+
+EXPECT "$md5sum_b0" echo `md5sum $B0/${V0}1/b | awk '{print $1}'`
+
+TEST umount $M0
+cleanup
diff --git a/tests/bugs/replicate/bug-1297695.t b/tests/bugs/replicate/bug-1297695.t
new file mode 100644
index 00000000000..d5c1a214fe2
--- /dev/null
+++ b/tests/bugs/replicate/bug-1297695.t
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+function write_to_file {
+ dd of=$M0/dir/file if=/dev/urandom bs=1024k count=128 2>&1 >/dev/null
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+
+TEST $CLI volume set $V0 cluster.eager-lock on
+TEST $CLI volume set $V0 post-op-delay-secs 3
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+
+TEST $CLI volume start $V0
+TEST $CLI volume profile $V0 start
+TEST $CLI volume set $V0 ensure-durability off
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+TEST mkdir $M0/dir
+TEST touch $M0/dir/file
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST `echo 'abc' > $M0/dir/file`
+
+TEST $CLI volume start $V0 force
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+write_to_file &
+#Test if the MAX [F]INODELK fop latency is of the order of seconds.
+EXPECT "^1$" get_pending_heal_count $V0
+inodelk_max_latency=$($CLI volume profile $V0 info | grep INODELK | awk 'BEGIN {max = 0} {if ($6 > max) max=$6;} END {print max}' | cut -d. -f 1 | egrep "[0-9]{7,}")
+TEST [ -z $inodelk_max_latency ]
+cleanup
diff --git a/tests/bugs/replicate/bug-1305031-block-reads-on-metadata-sbrain.t b/tests/bugs/replicate/bug-1305031-block-reads-on-metadata-sbrain.t
new file mode 100644
index 00000000000..780ddb9250c
--- /dev/null
+++ b/tests/bugs/replicate/bug-1305031-block-reads-on-metadata-sbrain.t
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+#Test that for files in metadata-split-brain, we do not wind even a single read.
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST dd if=/dev/urandom of=$M0/file bs=1024 count=1024
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST chmod 700 $M0/file
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST chmod 777 $M0/file
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST umount $M0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+lines=`cat $M0/file|wc|awk '{print $1}'`
+EXPECT 0 echo $lines
+TEST umount $M0
+cleanup
diff --git a/tests/bugs/replicate/bug-1325792.t b/tests/bugs/replicate/bug-1325792.t
new file mode 100644
index 00000000000..73190e5f341
--- /dev/null
+++ b/tests/bugs/replicate/bug-1325792.t
@@ -0,0 +1,25 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1,2,3}
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 3
+
+
+EXPECT 1 echo `$CLI volume heal $V0 statistics heal-count replica $H0:$B0/${V0}0 | grep -A 1 ${V0}0 | grep "entries" | wc -l`
+EXPECT 1 echo `$CLI volume heal $V0 statistics heal-count replica $H0:$B0/${V0}1 | grep -A 1 ${V0}1 | grep "entries" | wc -l`
+EXPECT 1 echo `$CLI volume heal $V0 statistics heal-count replica $H0:$B0/${V0}2 | grep -A 1 ${V0}2 | grep "entries" | wc -l`
+EXPECT 1 echo `$CLI volume heal $V0 statistics heal-count replica $H0:$B0/${V0}3 | grep -A 1 ${V0}3 | grep "entries" | wc -l`
+
+cleanup
diff --git a/tests/bugs/replicate/bug-1335652.t b/tests/bugs/replicate/bug-1335652.t
new file mode 100644
index 00000000000..653a1b05ce2
--- /dev/null
+++ b/tests/bugs/replicate/bug-1335652.t
@@ -0,0 +1,29 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 shard on
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 data-self-heal off
+TEST $CLI volume set $V0 entry-self-heal off
+TEST $CLI volume set $V0 metadata-self-heal off
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+#Kill the zero'th brick so that 1st and 2nd get marked dirty
+TEST kill_brick $V0 $H0 $B0/${V0}0
+
+TEST dd if=/dev/urandom of=$M0/file bs=10MB count=20
+
+#At any point value of dirty should not be greater than 0 on source bricks
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.dirty $B0/${V0}1/.shard
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.dirty $B0/${V0}2/.shard
+
+rm -rf $M0/file;
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-1340623-mkdir-fails-remove-brick-started.t b/tests/bugs/replicate/bug-1340623-mkdir-fails-remove-brick-started.t
new file mode 100644
index 00000000000..6d177a7d3f8
--- /dev/null
+++ b/tests/bugs/replicate/bug-1340623-mkdir-fails-remove-brick-started.t
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+## Create a 2x2 volume
+TEST $CLI volume create $V0 replica 2 $H0:$B0/r11 $H0:$B0/r12 $H0:$B0/r21 $H0:$B0/r22;
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start the volume
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## for release > 3.7 , gluster nfs is off by default
+TEST $CLI vol set $V0 nfs.disable off;
+
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+
+TEST mount_nfs $H0:/$V0 $N0;
+
+## create some directories and files inside mount
+mkdir $N0/io;
+for j in {1..10}; do mkdir $N0/io/b$j; for k in {1..10}; do touch $N0/io/b$j/c$k; done done
+
+TEST $CLI volume remove-brick $V0 $H0:$B0/r11 $H0:$B0/r12 start;
+
+TEST mkdir $N0/dir1;
+
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" remove_brick_status_completed_field "$V0" "$H0:$B0/r11 $H0:$B0/r12"
+
+TEST $CLI volume remove-brick $V0 $H0:$B0/r11 $H0:$B0/r12 commit;
+
+TEST mkdir $N0/dir2;
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-1341650.t b/tests/bugs/replicate/bug-1341650.t
new file mode 100644
index 00000000000..610342ca5bd
--- /dev/null
+++ b/tests/bugs/replicate/bug-1341650.t
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0..2}
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+TEST $CLI volume quota $V0 enable
+TEST $CLI volume set $V0 quota-deem-statfs on
+TEST $CLI volume set $V0 soft-timeout 0
+TEST $CLI volume set $V0 hard-timeout 0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0
+TEST mkdir $M0/dir
+TEST $CLI volume quota $V0 limit-objects /dir 10
+
+TEST touch $M0/dir/file1
+TEST touch $M0/dir/file2
+TEST touch $M0/dir/file3
+TEST touch $M0/dir/file4
+TEST touch $M0/dir/file5
+
+# Kill 3rd brick and create entries
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST touch $M0/dir/file6
+TEST touch $M0/dir/file7
+TEST touch $M0/dir/file8
+TEST touch $M0/dir/file9
+
+# Quota object limit is reached. Remove object for create to succeed.
+TEST ! touch $M0/dir/file10
+
+TEST rm $M0/dir/file1
+TEST touch $M0/dir/file10
+
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.dirty $B0/${V0}0/dir
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.dirty $B0/${V0}1/dir
+
+TEST $CLI volume start $V0 force
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+#Check that no convervative merge happened.file1 must not be present on any brick.
+TEST ! stat $B0/${V0}0/dir/file1
+TEST ! stat $B0/${V0}1/dir/file1
+TEST ! stat $B0/${V0}2/dir/file1
+
+TEST umount $M0
+cleanup
diff --git a/tests/bugs/replicate/bug-1363721.t b/tests/bugs/replicate/bug-1363721.t
new file mode 100644
index 00000000000..0ed34d8a4f4
--- /dev/null
+++ b/tests/bugs/replicate/bug-1363721.t
@@ -0,0 +1,118 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+FILE_UPDATE_TIMEOUT=20
+cleanup
+
+function size_increased {
+ local file=$1
+ local size=$2
+ local new_size=$(stat -c%s $file)
+ if [ $new_size -gt $size ];
+ then
+ echo "Y"
+ else
+ echo "N"
+ fi
+}
+
+function has_write_failed {
+ local pid=$1
+ if [ -d /proc/$pid ]; then echo "N"; else echo "Y"; fi
+}
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 --direct-io-mode=enable $M0
+
+cd $M0
+
+# Start writing to a file.
+(dd if=/dev/urandom of=$M0/file1 bs=1k 2>/dev/null 1>/dev/null)&
+dd_pid=$!
+
+# Let IO happen
+EXPECT_WITHIN $FILE_UPDATE_TIMEOUT "Y" size_increased file1 0
+
+# Now kill the zeroth brick
+kill_brick $V0 $H0 $B0/${V0}0
+
+# Let IO continue
+EXPECT_WITHIN $FILE_UPDATE_TIMEOUT "Y" size_increased file1 $(stat -c%s file1)
+
+# Now bring the brick back up
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+# Let IO continue
+EXPECT_WITHIN $FILE_UPDATE_TIMEOUT "Y" size_increased file1 $(stat -c%s file1)
+
+# Now kill the first brick
+kill_brick $V0 $H0 $B0/${V0}1
+
+# Let IO continue
+EXPECT_WITHIN $FILE_UPDATE_TIMEOUT "Y" size_increased file1 $(stat -c%s file1)
+
+# Now bring the brick back up
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+# Let IO continue for 3 seconds
+sleep 3
+
+# Now kill the second brick
+kill_brick $V0 $H0 $B0/${V0}2
+
+# At this point the write should have been failed.
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "Y" has_write_failed $dd_pid
+
+# Also make sure that the second brick is never an accused.
+
+md5sum_2=$(md5sum $B0/${V0}2/file1 | awk '{print $1}')
+
+EXPECT_NOT "$md5sum_2" echo `md5sum $B0/${V0}0/file1 | awk '{print $1}'`
+EXPECT_NOT "$md5sum_2" echo `md5sum $B0/${V0}1/file1 | awk '{print $1}'`
+
+EXPECT_NOT "00000000" afr_get_specific_changelog_xattr $B0/${V0}0/file1 trusted.afr.dirty data
+EXPECT_NOT "00000000" afr_get_specific_changelog_xattr $B0/${V0}1/file1 trusted.afr.dirty data
+
+EXPECT "00000000" afr_get_specific_changelog_xattr $B0/${V0}0/file1 trusted.afr.$V0-client-2 data
+EXPECT "00000000" afr_get_specific_changelog_xattr $B0/${V0}1/file1 trusted.afr.$V0-client-2 data
+EXPECT "00000000" afr_get_specific_changelog_xattr $B0/${V0}2/file1 trusted.afr.$V0-client-2 data
+EXPECT "00000000" afr_get_specific_changelog_xattr $B0/${V0}0/file1 trusted.afr.$V0-client-2 metadata
+EXPECT "00000000" afr_get_specific_changelog_xattr $B0/${V0}1/file1 trusted.afr.$V0-client-2 metadata
+EXPECT "00000000" afr_get_specific_changelog_xattr $B0/${V0}2/file1 trusted.afr.$V0-client-2 metadata
+
+# Now bring the brick back up
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+# Enable shd
+TEST $CLI volume set $V0 self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+
+TEST $CLI volume heal $V0
+
+# Wait for heal to complete
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+EXPECT "$md5sum_2" echo `md5sum $B0/${V0}0/file1 | awk '{print $1}'`
+EXPECT "$md5sum_2" echo `md5sum $B0/${V0}1/file1 | awk '{print $1}'`
+EXPECT "$md5sum_2" echo `md5sum $B0/${V0}2/file1 | awk '{print $1}'`
+
+cd ~
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+cleanup
diff --git a/tests/bugs/replicate/bug-1365455.t b/tests/bugs/replicate/bug-1365455.t
new file mode 100644
index 00000000000..1953e2a9327
--- /dev/null
+++ b/tests/bugs/replicate/bug-1365455.t
@@ -0,0 +1,54 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+function check_size
+{
+ for i in {1..10}; do
+ size1=`stat -c %s $B0/${V0}0/tmp$i`
+ size2=`stat -c %s $B0/${V0}1/tmp$i`
+ if [[ $size1 -eq 0 ]] || [[ $size2 -eq 0 ]] || [[ $size1 -ne $size2 ]]; then
+ return 1
+ fi
+ done
+
+ return 0
+}
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}0;
+
+TEST $CLI volume start $V0;
+
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+
+for i in {1..10}
+do
+ echo abc > $M0/tmp$i
+done
+
+
+# Add Another brick
+TEST $CLI volume add-brick $V0 replica 2 $H0:$B0/${V0}1
+
+#Check if self heal daemon has come up
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+
+#Check if self heal daemon is able to see all bricks
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+
+# Wait for heal to complete
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+# Check if entry-heal has happened
+TEST diff <(ls $B0/${V0}0 | sort) <(ls $B0/${V0}1 | sort)
+
+#Check size of files on bricks
+TEST check_size
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-1386188-sbrain-fav-child.t b/tests/bugs/replicate/bug-1386188-sbrain-fav-child.t
new file mode 100644
index 00000000000..d049d95ef9a
--- /dev/null
+++ b/tests/bugs/replicate/bug-1386188-sbrain-fav-child.t
@@ -0,0 +1,82 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 data-self-heal off
+TEST $CLI volume set $V0 entry-self-heal off
+TEST $CLI volume set $V0 metadata-self-heal off
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
+TEST touch $M0/data.txt
+TEST touch $M0/mdata.txt
+
+#Create data and metadata split-brain
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST dd if=/dev/urandom of=$M0/data.txt bs=1024 count=1024
+TEST setfattr -n user.value -v value1 $M0/mdata.txt
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST dd if=/dev/urandom of=$M0/data.txt bs=1024 count=1024
+TEST setfattr -n user.value -v value2 $M0/mdata.txt
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+## Check that the file still in split-brain,
+ ## I/O fails
+ cat $M0/data.txt > /dev/null
+ EXPECT "1" echo $?
+ ## pending xattrs blame each other.
+ brick0_pending=$(get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0/data.txt)
+ brick1_pending=$(get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1/data.txt)
+ TEST [ $brick0_pending -ne "000000000000000000000000" ]
+ TEST [ $brick1_pending -ne "000000000000000000000000" ]
+
+ ## I/O fails
+ getfattr -n user.value $M0/mdata.txt
+ EXPECT "1" echo $?
+ brick0_pending=$(get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0/mdata.txt)
+ brick1_pending=$(get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1/mdata.txt)
+ TEST [ $brick0_pending -ne "000000000000000000000000" ]
+ TEST [ $brick1_pending -ne "000000000000000000000000" ]
+
+## Let us use mtime as fav-child policy. So brick0 will be source.
+ # Set dirty (data part) on the sink brick to check if it is reset later along with the pending xattr.
+ TEST setfattr -n trusted.afr.dirty -v 0x000000010000000000000000 $B0/${V0}1/data.txt
+ # Set dirty (metadata part) on the sink brick to check if it is reset later along with the pending xattr.
+ TEST setfattr -n trusted.afr.dirty -v 0x000000000000000100000000 $B0/${V0}1/mdata.txt
+
+ TEST $CLI volume set $V0 favorite-child-policy mtime
+
+ # Reading the file should be allowed and sink brick xattrs must be reset.
+ cat $M0/data.txt > /dev/null
+ EXPECT "0" echo $?
+ TEST brick1_pending=$(get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1/data.txt)
+ TEST brick1_dirty=$(get_hex_xattr trusted.afr.dirty $B0/${V0}1/data.txt)
+ TEST [ $brick1_dirty -eq "000000000000000000000000" ]
+ TEST [ $brick1_pending -eq "000000000000000000000000" ]
+
+ # Accessing the file should be allowed and sink brick xattrs must be reset.
+ EXPECT "value2" echo $(getfattr --only-values -n user.value $M0/mdata.txt)
+ TEST brick1_pending=$(get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1/data.txt)
+ TEST brick1_dirty=$(get_hex_xattr trusted.afr.dirty $B0/${V0}1/data.txt)
+ TEST [ $brick1_dirty -eq "000000000000000000000000" ]
+ TEST [ $brick1_pending -eq "000000000000000000000000" ]
+
+#Enable shd and heal the file.
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+EXPECT 0 get_pending_heal_count $V0
+cleanup;
diff --git a/tests/bugs/replicate/bug-1402730.t b/tests/bugs/replicate/bug-1402730.t
new file mode 100644
index 00000000000..c7866df463b
--- /dev/null
+++ b/tests/bugs/replicate/bug-1402730.t
@@ -0,0 +1,47 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 granular-entry-heal on
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0
+
+TEST mkdir -p $M0/a/b/c -p
+cd $M0/a/b/c
+
+TEST kill_brick $V0 $H0 $B0/${V0}2
+rm -rf $B0/${V0}2/*
+rm -rf $B0/${V0}2/.glusterfs
+
+#Ideally, disk replacement is done using reset-brick or replace-brick gluster CLI
+#which will create .glusterfs folder.
+mkdir $B0/${V0}2/.glusterfs && chmod 600 $B0/${V0}2/.glusterfs
+
+TEST $CLI volume start $V0 force
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+TEST touch file
+
+GFID_C=$(get_gfid_string $M0/a/b/c)
+TEST stat $B0/${V0}0/.glusterfs/indices/entry-changes/$GFID_C/file
+TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$GFID_C/file
+
+EXPECT_NOT "00000000" afr_get_specific_changelog_xattr $B0/${V0}0/a/b/c trusted.afr.$V0-client-2 entry
+EXPECT_NOT "00000000" afr_get_specific_changelog_xattr $B0/${V0}1/a/b/c trusted.afr.$V0-client-2 entry
+
+cd ~
+
+cleanup
diff --git a/tests/bugs/replicate/bug-1408712.t b/tests/bugs/replicate/bug-1408712.t
new file mode 100644
index 00000000000..9499a598ef1
--- /dev/null
+++ b/tests/bugs/replicate/bug-1408712.t
@@ -0,0 +1,101 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup
+
+TESTS_EXPECTED_IN_LOOP=12
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 features.shard-block-size 4MB
+TEST $CLI volume heal $V0 granular-entry-heal enable
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 performance.flush-behind off
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 2
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M1 $V0-replicate-0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M1 $V0-replicate-0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M1 $V0-replicate-0 2
+
+TEST cd $M0
+TEST dd if=/dev/zero of=file bs=1M count=8
+
+# Kill brick-0.
+TEST kill_brick $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" brick_up_status $V0 $H0 $B0/${V0}0
+
+TEST "dd if=/dev/zero bs=1M count=8 >> file"
+
+FILE_GFID=$(get_gfid_string $M0/file)
+
+# Test that the index associated with '/.shard' is created on B1 and B2.
+TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$DOT_SHARD_GFID
+TEST stat $B0/${V0}2/.glusterfs/indices/entry-changes/$DOT_SHARD_GFID
+# Check for successful creation of granular entry indices
+for i in {2..3}
+do
+ TEST_IN_LOOP stat $B0/${V0}1/.glusterfs/indices/entry-changes/$DOT_SHARD_GFID/$FILE_GFID.$i
+ TEST_IN_LOOP stat $B0/${V0}2/.glusterfs/indices/entry-changes/$DOT_SHARD_GFID/$FILE_GFID.$i
+done
+
+TEST cd ~
+TEST md5sum $M1/file
+
+# Test that the index associated with '/.shard' and the created shards do not disappear on B1 and B2.
+TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$DOT_SHARD_GFID
+TEST stat $B0/${V0}2/.glusterfs/indices/entry-changes/$DOT_SHARD_GFID
+for i in {2..3}
+do
+ TEST_IN_LOOP stat $B0/${V0}1/.glusterfs/indices/entry-changes/$DOT_SHARD_GFID/$FILE_GFID.$i
+ TEST_IN_LOOP stat $B0/${V0}2/.glusterfs/indices/entry-changes/$DOT_SHARD_GFID/$FILE_GFID.$i
+done
+
+# Start the brick that was down
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+# Enable shd
+TEST gluster volume set $V0 cluster.self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+
+TEST $CLI volume heal $V0
+
+# Wait for heal to complete
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+# Now verify that there are no name indices left after self-heal
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$DOT_SHARD_GFID
+TEST ! stat $B0/${V0}2/.glusterfs/indices/entry-changes/$DOT_SHARD_GFID
+
+for i in {2..3}
+do
+ TEST_IN_LOOP ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$DOT_SHARD_GFID/$FILE_GFID.$i
+ TEST_IN_LOOP ! stat $B0/${V0}2/.glusterfs/indices/entry-changes/$DOT_SHARD_GFID/$FILE_GFID.$i
+done
+
+cleanup
diff --git a/tests/bugs/replicate/bug-1417522-block-split-brain-resolution.t b/tests/bugs/replicate/bug-1417522-block-split-brain-resolution.t
new file mode 100644
index 00000000000..d0e2fee8bcd
--- /dev/null
+++ b/tests/bugs/replicate/bug-1417522-block-split-brain-resolution.t
@@ -0,0 +1,69 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0..2}
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 data-self-heal off
+TEST $CLI volume set $V0 entry-self-heal off
+TEST $CLI volume set $V0 metadata-self-heal off
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
+TEST touch $M0/file
+
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST dd if=/dev/urandom of=$M0/file bs=1024 count=10
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST dd if=/dev/urandom of=$M0/file bs=1024 count=20
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST ! dd if=$M0/file of=/dev/null
+SOURCE_BRICK_MD5=$(md5sum $B0/${V0}0/file | cut -d\ -f1)
+
+# Various fav-child policies must not heal the file when some bricks are down.
+TEST $CLI volume set $V0 favorite-child-policy size
+TEST ! dd if=$M0/file of=/dev/null
+TEST $CLI volume set $V0 favorite-child-policy ctime
+TEST ! dd if=$M0/file of=/dev/null
+TEST $CLI volume set $V0 favorite-child-policy mtime
+TEST ! dd if=$M0/file of=/dev/null
+TEST $CLI volume set $V0 favorite-child-policy majority
+TEST ! dd if=$M0/file of=/dev/null
+
+# CLI/mount based split-brain resolution must also not work.
+TEST ! $CLI volume heal $V0 split-brain bigger-file /file
+TEST ! $CLI volume heal $V0 split-brain mtime /file
+TEST ! $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}2 /file1
+
+TEST ! getfattr -n replica.split-brain-status $M0/file
+TEST ! setfattr -n replica.split-brain-choice -v $V0-client-1 $M0/file
+
+# Bring all bricks back up and launch heal.
+TEST $CLI volume set $V0 self-heal-daemon on
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+TEST $CLI volume heal $V0
+EXPECT 0 get_pending_heal_count $V0
+B1_MD5=$(md5sum $B0/${V0}1/file | cut -d\ -f1)
+B2_MD5=$(md5sum $B0/${V0}2/file | cut -d\ -f1)
+TEST [ "$SOURCE_BRICK_MD5" == "$B1_MD5" ]
+TEST [ "$SOURCE_BRICK_MD5" == "$B2_MD5" ]
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+cleanup;
+
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=000000
diff --git a/tests/bugs/replicate/bug-1433571-undo-pending-only-on-up-bricks.t b/tests/bugs/replicate/bug-1433571-undo-pending-only-on-up-bricks.t
new file mode 100644
index 00000000000..10ce0131f4f
--- /dev/null
+++ b/tests/bugs/replicate/bug-1433571-undo-pending-only-on-up-bricks.t
@@ -0,0 +1,79 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+# Disable self-heal-daemon, client-side-heal and set quorum-type to none
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume set $V0 cluster.quorum-type none
+
+#Kill bricks 0 & 1 and create a file to have pending entry for 0 & 1 on brick 2
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+echo "file 1" >> $M0/f1
+EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}2
+EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}2
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+#Kill bricks 1 & 2 and create a file to have pending entry for 1 & 2 on brick 0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST kill_brick $V0 $H0 $B0/${V0}2
+echo "file 2" >> $M0/f2
+EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0
+EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+#Kill bricks 2 & 0 and create a file to have pending entry for 2 & 0 on brick 1
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST kill_brick $V0 $H0 $B0/${V0}0
+echo "file 3" >> $M0/f3
+EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1
+EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+# We were killing one brick and checking that entry heal does not reset the
+# pending xattrs for the down brick. Now that we need all bricks to be up for
+# entry heal, I'm removing that test from the .t
+
+TEST $CLI volume set $V0 cluster.data-self-heal on
+TEST $CLI volume set $V0 cluster.metadata-self-heal on
+TEST $CLI volume set $V0 cluster.entry-self-heal on
+
+TEST ls $M0
+TEST cat $M0/f1
+TEST cat $M0/f2
+TEST cat $M0/f3
+
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}2
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}2
+
+#Check whether all the bricks contains all the 3 files.
+EXPECT "3" echo $(ls $B0/${V0}0 | wc -l)
+EXPECT "3" echo $(ls $B0/${V0}1 | wc -l)
+EXPECT "3" echo $(ls $B0/${V0}2 | wc -l)
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-1438255-do-not-mark-self-accusing-xattrs.t b/tests/bugs/replicate/bug-1438255-do-not-mark-self-accusing-xattrs.t
new file mode 100644
index 00000000000..cdcaf62c925
--- /dev/null
+++ b/tests/bugs/replicate/bug-1438255-do-not-mark-self-accusing-xattrs.t
@@ -0,0 +1,46 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+NEW_USER=bug1438255
+NEW_UID=1438255
+NEW_GID=1438255
+
+TEST groupadd -o -g ${NEW_GID} ${NEW_USER}-${NEW_GID}
+TEST useradd -o -M -u ${NEW_UID} -g ${NEW_GID} -K MAIL_DIR=/dev/null ${NEW_USER}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+
+TEST touch $M0/FILE
+TEST kill_brick $V0 $H0 $B0/${V0}2
+chown $NEW_UID:$NEW_GID $M0/FILE
+EXPECT "000000000000000100000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0/FILE
+EXPECT "000000000000000100000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1/FILE
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+# setfattr done as NEW_USER fails on 3rd brick with EPERM but suceeds on
+# the first 2 and hence on the mount.
+su -m bug1438255 -c "setfattr -n user.myattr -v myvalue $M0/FILE"
+TEST [ $? -eq 0 ]
+EXPECT "000000000000000200000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0/FILE
+EXPECT "000000000000000200000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1/FILE
+# Brick 3 does not have any self-blaming pending xattr.
+TEST ! getfattr -n trusted.afr.$V0-client-2 $B0/${V0}2/FILE
+
+TEST userdel --force ${NEW_USER}
+TEST groupdel ${NEW_USER}-${NEW_GID}
+cleanup
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=000000
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
diff --git a/tests/bugs/replicate/bug-1448804-check-quorum-type-values.t b/tests/bugs/replicate/bug-1448804-check-quorum-type-values.t
new file mode 100644
index 00000000000..5bacf3edcfe
--- /dev/null
+++ b/tests/bugs/replicate/bug-1448804-check-quorum-type-values.t
@@ -0,0 +1,47 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0..1}
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+# Default quorum-type for replica 2 is none. quorum-count is zero but it is not displayed.
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "none" get_quorum_type $M0 $V0 0
+cat $M0/.meta/graphs/active/$V0-replicate-0/private|grep quorum-count
+TEST [ $? -ne 0 ]
+
+# Convert to replica-3.
+TEST $CLI volume add-brick $V0 replica 3 $H0:$B0/${V0}2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+# Default quorum-type for replica 3 is auto. quorum-count is INT_MAX but it is not displayed.
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "auto" get_quorum_type $M0 $V0 0
+cat $M0/.meta/graphs/active/$V0-replicate-0/private|grep quorum-count
+TEST [ $? -ne 0 ]
+
+# Change the type to fixed.
+TEST $CLI volume set $V0 cluster.quorum-type fixed
+# We haven't set quorum-count yet, so it takes the default value of zero in reconfigure() and hence the quorum-type is displayed as none.
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "none" get_quorum_type $M0 $V0 0
+cat $M0/.meta/graphs/active/$V0-replicate-0/private|grep quorum-count
+TEST [ $? -ne 0 ]
+
+# set quorum-count and check.
+TEST $CLI volume set $V0 cluster.quorum-count 1
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "fixed" get_quorum_type $M0 $V0 0
+EXPECT "1" echo `cat $M0/.meta/graphs/active/$V0-replicate-0/private|grep quorum-count|awk '{print $3}'`
+
+# reset to default values.
+TEST $CLI volume reset $V0 cluster.quorum-type
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "auto" get_quorum_type $M0 $V0 0
+cat $M0/.meta/graphs/active/$V0-replicate-0/private|grep quorum-count
+TEST [ $? -ne 0 ]
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-1473026.t b/tests/bugs/replicate/bug-1473026.t
new file mode 100644
index 00000000000..efb3ffa0d39
--- /dev/null
+++ b/tests/bugs/replicate/bug-1473026.t
@@ -0,0 +1,31 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=KNOWN_ISSUE,BUG=1473026
+#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=1473026
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume start $V0;
+
+#kill one brick (this has some issue)
+TEST kill_brick $V0 $H0 $B0/${V0}1
+
+#kill the brick to be replaced
+TEST kill_brick $V0 $H0 $B0/${V0}0
+
+# We know this command would fail because file system is read only now
+TEST ! $CLI volume replace-brick $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}4 commit force
+
+TEST pkill glusterd
+
+# Glusterd should start but the volume info and brick volfiles don't match
+TEST glusterd
+TEST pidof glusterd
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-1477169-entry-selfheal-rename.t b/tests/bugs/replicate/bug-1477169-entry-selfheal-rename.t
new file mode 100644
index 00000000000..bb858a8a63d
--- /dev/null
+++ b/tests/bugs/replicate/bug-1477169-entry-selfheal-rename.t
@@ -0,0 +1,52 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+TEST mkdir -p $M0/d1/dir012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 $M0/d2
+gfid_d1=$(gf_get_gfid_xattr $B0/${V0}0/d1)
+gfid_d2=$(gf_get_gfid_xattr $B0/${V0}0/d2)
+gfid_dir=$(gf_get_gfid_xattr $B0/${V0}0/d1/dir012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789)
+
+gfid_str_d1=$(gf_gfid_xattr_to_str $gfid_d1)
+gfid_str_d2=$(gf_gfid_xattr_to_str $gfid_d2)
+gfid_str_d3=$(gf_gfid_xattr_to_str $gfid_dir)
+
+# Kill 3rd brick and rename the dir from mount.
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST mv $M0/d1/dir012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 $M0/d2
+
+# Bring it back and trigger heal.
+TEST $CLI volume start $V0 force
+
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+# Check that .glusterfs symlink for dir exists and points to d2/dir012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
+TEST linkname=$(readlink $B0/${V0}2/.glusterfs/${gfid_str_d3:0:2}/${gfid_str_d3:2:2}/$gfid_str_d3)
+EXPECT "dir012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789" basename $linkname
+TEST parent_dir_gfid_str=$(echo $linkname|cut -d / -f5)
+EXPECT $gfid_str_d2 echo $parent_dir_gfid_str
+
+TEST rmdir $M0/d2/dir012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
+
+TEST ! stat $B0/${V0}0/.glusterfs/${gfid_str_d3:0:2}/${gfid_str_d3:2:2}/$gfid_str_d3
+TEST ! stat $B0/${V0}1/.glusterfs/${gfid_str_d3:0:2}/${gfid_str_d3:2:2}/$gfid_str_d3
+TEST ! stat $B0/${V0}2/.glusterfs/${gfid_str_d3:0:2}/${gfid_str_d3:2:2}/$gfid_str_d3
+cleanup;
diff --git a/tests/bugs/replicate/bug-1480525.t b/tests/bugs/replicate/bug-1480525.t
new file mode 100644
index 00000000000..7c63bb2e4ea
--- /dev/null
+++ b/tests/bugs/replicate/bug-1480525.t
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+
+EXPECT_NOT "-1" echo `cat $M0/.meta/graphs/active/$V0-replicate-0/private|grep read_child |awk '{print $3}'`
+TEST $CLI volume set $V0 choose-local off
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "-1" echo `cat $M0/.meta/graphs/active/$V0-replicate-0/private|grep read_child |awk '{print $3}'`
+
+cleanup
diff --git a/tests/bugs/replicate/bug-1493415-gfid-heal.t b/tests/bugs/replicate/bug-1493415-gfid-heal.t
new file mode 100644
index 00000000000..8a79febf4b4
--- /dev/null
+++ b/tests/bugs/replicate/bug-1493415-gfid-heal.t
@@ -0,0 +1,78 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST $CLI volume set $V0 self-heal-daemon off
+
+# Create base entry in indices/xattrop
+echo "Data" > $M0/FILE
+
+#------------------------------------------------------------------------------#
+TEST touch $M0/f1
+gfid_f1=$(gf_get_gfid_xattr $B0/${V0}0/f1)
+gfid_str_f1=$(gf_gfid_xattr_to_str $gfid_f1)
+
+# Remove gfid xattr and .glusterfs hard link from 2nd brick. This simulates a
+# brick crash at the point where file got created but no xattrs were set.
+TEST setfattr -x trusted.gfid $B0/${V0}1/f1
+TEST rm $B0/${V0}1/.glusterfs/${gfid_str_f1:0:2}/${gfid_str_f1:2:2}/$gfid_str_f1
+
+# storage/posix considers that a file without gfid changed less than a second
+# before doesn't exist, so we need to wait for a second to force posix to
+# consider that this is a valid file but without gfid.
+sleep 2
+
+# Assume there were no pending xattrs on parent dir due to 1st brick crashing
+# too. Then name heal from client must heal the gfid.
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST stat $M0/f1
+EXPECT "$gfid_f1" gf_get_gfid_xattr $B0/${V0}1/f1
+TEST stat $B0/${V0}1/.glusterfs/${gfid_str_f1:0:2}/${gfid_str_f1:2:2}/$gfid_str_f1
+
+#------------------------------------------------------------------------------#
+TEST mkdir $M0/dir
+TEST touch $M0/dir/f2
+gfid_f2=$(gf_get_gfid_xattr $B0/${V0}0/dir/f2)
+gfid_str_f2=$(gf_gfid_xattr_to_str $gfid_f2)
+
+# Remove gfid xattr and .glusterfs hard link from 2nd brick. This simulates a
+# brick crash at the point where file got created but no xattrs were set.
+TEST setfattr -x trusted.gfid $B0/${V0}1/dir/f2
+TEST rm $B0/${V0}1/.glusterfs/${gfid_str_f2:0:2}/${gfid_str_f2:2:2}/$gfid_str_f2
+
+#Now simulate setting of pending entry xattr on parent dir of 1st brick.
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}0/dir
+create_brick_xattrop_entry $B0/${V0}0 dir
+
+# storage/posix considers that a file without gfid changed less than a second
+# before doesn't exist, so we need to wait for a second to force posix to
+# consider that this is a valid file but without gfid.
+sleep 2
+
+#Trigger entry-heal via shd
+TEST $CLI volume set $V0 self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+EXPECT "$gfid_f2" gf_get_gfid_xattr $B0/${V0}1/dir/f2
+TEST stat $B0/${V0}1/.glusterfs/${gfid_str_f2:0:2}/${gfid_str_f2:2:2}/$gfid_str_f2
+
+#------------------------------------------------------------------------------#
+cleanup;
diff --git a/tests/bugs/replicate/bug-1498570-client-iot-graph-check.t b/tests/bugs/replicate/bug-1498570-client-iot-graph-check.t
new file mode 100644
index 00000000000..2b3b3040228
--- /dev/null
+++ b/tests/bugs/replicate/bug-1498570-client-iot-graph-check.t
@@ -0,0 +1,48 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+TESTS_EXPECTED_IN_LOOP=21
+function reset_cluster
+{
+ cleanup
+ TEST glusterd
+ TEST pidof glusterd
+
+}
+function check_iot_option
+{
+ local enabled=$1
+ local is_loaded_in_graph=$2
+
+ EXPECT "$enabled" volume_get_field $V0 client-io-threads
+ IOT_STRING="volume\ $V0-io-threads"
+ grep "$IOT_STRING" $GLUSTERD_WORKDIR/vols/$V0/trusted-$V0.tcp-fuse.vol
+ TEST ret=$?
+ EXPECT_NOT "$is_loaded_in_graph" echo $ret
+}
+
+reset_cluster
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1}
+check_iot_option on 1
+
+reset_cluster
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+check_iot_option off 0
+
+reset_cluster
+TEST $CLI volume create $V0 $H0:$B0/${V0}0
+TEST $CLI volume add-brick $V0 replica 2 $H0:$B0/${V0}1
+check_iot_option off 0
+TEST $CLI volume remove-brick $V0 replica 1 $H0:$B0/${V0}1 force
+check_iot_option on 1
+
+reset_cluster
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0..5}
+TEST $CLI volume set $V0 client-io-threads on
+check_iot_option on 1
+TEST $CLI volume remove-brick $V0 replica 2 $H0:$B0/${V0}2 $H0:$B0/${V0}5 force
+check_iot_option on 1
+
+cleanup
diff --git a/tests/bugs/replicate/bug-1539358-split-brain-detection.t b/tests/bugs/replicate/bug-1539358-split-brain-detection.t
new file mode 100755
index 00000000000..7b71a7a9e7d
--- /dev/null
+++ b/tests/bugs/replicate/bug-1539358-split-brain-detection.t
@@ -0,0 +1,89 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2};
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+
+###############################################################################yy
+# Case of 2 bricks blaming the third and the third blaming the other two.
+
+TEST `echo "hello" >> $M0/file`
+
+# B0 and B2 must blame B1
+TEST kill_brick $V0 $H0 $B0/$V0"1"
+TEST `echo "append" >> $M0/file`
+EXPECT "00000001" afr_get_specific_changelog_xattr $B0/${V0}0/file trusted.afr.$V0-client-1 data
+EXPECT "00000001" afr_get_specific_changelog_xattr $B0/${V0}2/file trusted.afr.$V0-client-1 data
+CLIENT_MD5=$(md5sum $M0/file | cut -d\ -f1)
+
+# B1 must blame B0 and B2
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000000000000 $B0/$V0"1"/file
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000000000000 $B0/$V0"1"/file
+
+# Launch heal
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+TEST $CLI volume set $V0 self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+B0_MD5=$(md5sum $B0/${V0}0/file | cut -d\ -f1)
+B1_MD5=$(md5sum $B0/${V0}1/file | cut -d\ -f1)
+B2_MD5=$(md5sum $B0/${V0}2/file | cut -d\ -f1)
+TEST [ "$CLIENT_MD5" == "$B0_MD5" ]
+TEST [ "$CLIENT_MD5" == "$B1_MD5" ]
+TEST [ "$CLIENT_MD5" == "$B2_MD5" ]
+
+TEST rm $M0/file
+
+###############################################################################yy
+# Case of each brick blaming the next one in a cyclic manner
+
+TEST `echo "hello" >> $M0/file`
+# Mark cyclic xattrs and modify file content directly on the bricks.
+TEST $CLI volume set $V0 self-heal-daemon off
+setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000000000000 $B0/$V0"0"/file
+setfattr -n trusted.afr.dirty -v 0x000000010000000000000000 $B0/$V0"0"/file
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000000000000 $B0/$V0"1"/file
+setfattr -n trusted.afr.dirty -v 0x000000010000000000000000 $B0/$V0"1"/file
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000000000000 $B0/$V0"2"/file
+setfattr -n trusted.afr.dirty -v 0x000000010000000000000000 $B0/$V0"2"/file
+
+TEST `echo "ab" >> $B0/$V0"0"/file`
+TEST `echo "cdef" >> $B0/$V0"1"/file`
+TEST `echo "ghi" >> $B0/$V0"2"/file`
+
+# Add entry to xattrop dir to trigger index heal.
+xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
+base_entry_b0=`ls $xattrop_dir0`
+gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/file))
+ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
+EXPECT_WITHIN $HEAL_TIMEOUT "^1$" get_pending_heal_count $V0
+
+# Launch heal
+TEST $CLI volume set $V0 self-heal-daemon on
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+B0_MD5=$(md5sum $B0/${V0}0/file | cut -d\ -f1)
+B1_MD5=$(md5sum $B0/${V0}1/file | cut -d\ -f1)
+B2_MD5=$(md5sum $B0/${V0}2/file | cut -d\ -f1)
+TEST [ "$B0_MD5" == "$B1_MD5" ]
+TEST [ "$B0_MD5" == "$B2_MD5" ]
+###############################################################################yy
+cleanup
diff --git a/tests/bugs/replicate/bug-1561129-enospc.t b/tests/bugs/replicate/bug-1561129-enospc.t
new file mode 100644
index 00000000000..1b402fcc781
--- /dev/null
+++ b/tests/bugs/replicate/bug-1561129-enospc.t
@@ -0,0 +1,24 @@
+#!/bin/bash
+#Tests that sequential write workload doesn't lead to FSYNCs
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST truncate -s 128M $B0/xfs_image
+TEST mkfs.xfs -f $B0/xfs_image
+TEST mkdir $B0/bricks
+TEST mount -t xfs -o loop $B0/xfs_image $B0/bricks
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/bricks/brick{0,1,3}
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+# Write 50MB of data, which will try to consume 50x3=150MB on $B0/bricks.
+# Before that, we hit ENOSPC in pre-op cbk, which should not crash the mount.
+TEST ! dd if=/dev/zero of=$M0/a bs=1M count=50
+TEST stat $M0/a
+cleanup;
diff --git a/tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t b/tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t
new file mode 100644
index 00000000000..49c4dea4e9c
--- /dev/null
+++ b/tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t
@@ -0,0 +1,72 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+function create_files {
+ local i=1
+ while (true)
+ do
+ dd if=/dev/zero of=$M0/file$i bs=1M count=10
+ if [ -e $B0/${V0}0/file$i ] || [ -e $B0/${V0}1/file$i ]; then
+ ((i++))
+ else
+ break
+ fi
+ done
+ echo $i
+}
+
+TEST glusterd
+
+#Create brick partitions
+TEST truncate -s 100M $B0/brick0
+TEST truncate -s 100M $B0/brick1
+#Have the 3rd brick of a higher size to test the scenario of entry transaction
+#passing on only one brick and not on other bricks.
+TEST truncate -s 110M $B0/brick2
+LO1=`SETUP_LOOP $B0/brick0`
+TEST [ $? -eq 0 ]
+TEST MKFS_LOOP $LO1
+LO2=`SETUP_LOOP $B0/brick1`
+TEST [ $? -eq 0 ]
+TEST MKFS_LOOP $LO2
+LO3=`SETUP_LOOP $B0/brick2`
+TEST [ $? -eq 0 ]
+TEST MKFS_LOOP $LO3
+TEST mkdir -p $B0/${V0}0 $B0/${V0}1 $B0/${V0}2
+TEST MOUNT_LOOP $LO1 $B0/${V0}0
+TEST MOUNT_LOOP $LO2 $B0/${V0}1
+TEST MOUNT_LOOP $LO3 $B0/${V0}2
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0
+
+i=$(create_files)
+TEST ! ls $B0/${V0}0/file$i
+TEST ! ls $B0/${V0}1/file$i
+TEST ls $B0/${V0}2/file$i
+dirty=$(get_hex_xattr trusted.afr.dirty $B0/${V0}2)
+TEST [ "$dirty" != "000000000000000000000000" ]
+
+TEST $CLI volume set $V0 self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+TEST rm -f $M0/file1
+
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+TEST force_umount $M0
+TEST $CLI volume stop $V0
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+TEST $CLI volume delete $V0;
+UMOUNT_LOOP ${B0}/${V0}{0,1,2}
+rm -f ${B0}/brick{0,1,2}
+cleanup;
diff --git a/tests/bugs/replicate/bug-1591193-assign-gfid-and-heal.t b/tests/bugs/replicate/bug-1591193-assign-gfid-and-heal.t
new file mode 100644
index 00000000000..c6e5459e9a8
--- /dev/null
+++ b/tests/bugs/replicate/bug-1591193-assign-gfid-and-heal.t
@@ -0,0 +1,128 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+function check_gfid_and_link_count
+{
+ local file=$1
+
+ file_gfid_b0=$(gf_get_gfid_xattr $B0/${V0}0/$file)
+ TEST [ ! -z $file_gfid_b0 ]
+ file_gfid_b1=$(gf_get_gfid_xattr $B0/${V0}1/$file)
+ file_gfid_b2=$(gf_get_gfid_xattr $B0/${V0}2/$file)
+ EXPECT $file_gfid_b0 echo $file_gfid_b1
+ EXPECT $file_gfid_b0 echo $file_gfid_b2
+
+ EXPECT "2" stat -c %h $B0/${V0}0/$file
+ EXPECT "2" stat -c %h $B0/${V0}1/$file
+ EXPECT "2" stat -c %h $B0/${V0}2/$file
+}
+TESTS_EXPECTED_IN_LOOP=30
+
+##############################################################################
+# Test on 1x3 volume
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2};
+TEST $CLI volume start $V0;
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+
+# Create files directly in the backend on different bricks
+echo $RANDOM >> $B0/${V0}0/file1
+echo $RANDOM >> $B0/${V0}1/file2
+echo $RANDOM >> $B0/${V0}2/file3
+
+# To prevent is_fresh_file code path
+sleep 2
+
+# Access them from mount to trigger name + gfid heal.
+TEST stat $M0/file1
+TEST stat $M0/file2
+TEST stat $M0/file3
+
+# Launch index heal to complete any pending data/metadata heals.
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+# Check each file has a gfid and the .glusterfs hardlink
+check_gfid_and_link_count file1
+check_gfid_and_link_count file2
+check_gfid_and_link_count file3
+
+TEST rm $M0/file1
+TEST rm $M0/file2
+TEST rm $M0/file3
+cleanup;
+
+##############################################################################
+# Test on 1x (2+1) volume
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 arbiter 1 $H0:$B0/${V0}{0,1,2};
+TEST $CLI volume start $V0;
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+
+# Create files directly in the backend on different bricks
+echo $RANDOM >> $B0/${V0}0/file1
+echo $RANDOM >> $B0/${V0}1/file2
+touch $B0/${V0}2/file3
+
+# To prevent is_fresh_file code path
+sleep 2
+
+# Access them from mount to trigger name + gfid heal.
+TEST stat $M0/file1
+TEST stat $M0/file2
+
+# Though file is created on all 3 bricks, lookup will fail as arbiter blames the
+# other 2 bricks and ariter is not 'readable'.
+TEST ! stat $M0/file3
+
+# Launch index heal to complete any pending data/metadata heals.
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+# Now file3 should be accesible from mount.
+TEST stat $M0/file3
+
+# Check each file has a gfid and the .glusterfs hardlink
+check_gfid_and_link_count file1
+check_gfid_and_link_count file2
+check_gfid_and_link_count file3
+
+TEST rm $M0/file1
+TEST rm $M0/file2
+TEST rm $M0/file3
+cleanup;
diff --git a/tests/bugs/replicate/bug-1626994-info-split-brain.t b/tests/bugs/replicate/bug-1626994-info-split-brain.t
new file mode 100644
index 00000000000..86bfecb1a9e
--- /dev/null
+++ b/tests/bugs/replicate/bug-1626994-info-split-brain.t
@@ -0,0 +1,62 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+# Test to check dirs having dirty xattr do not show up in info split-brain.
+
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2};
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+# Create base entry in indices/xattrop
+echo "Data" > $M0/FILE
+rm -f $M0/FILE
+EXPECT "1" count_index_entries $B0/${V0}0
+EXPECT "1" count_index_entries $B0/${V0}1
+EXPECT "1" count_index_entries $B0/${V0}2
+
+TEST mkdir $M0/dirty_dir
+TEST mkdir $M0/pending_dir
+
+# Set dirty xattrs on all bricks to simulate the case where entry transaction
+# succeeded only the pre-op phase.
+TEST setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}0/dirty_dir
+TEST setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}1/dirty_dir
+TEST setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}2/dirty_dir
+create_brick_xattrop_entry $B0/${V0}0 dirty_dir
+# Should not show up as split-brain.
+EXPECT "0" afr_get_split_brain_count $V0
+
+# replace/reset brick case where the new brick has dirty and the other 2 bricks
+# blame it should not be reported as split-brain.
+TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/${V0}0
+TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/${V0}1
+TEST setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}2
+create_brick_xattrop_entry $B0/${V0}0 "/"
+# Should not show up as split-brain.
+EXPECT "0" afr_get_split_brain_count $V0
+
+# Set pending xattrs on all bricks blaming each other to simulate the case of
+# entry split-brain.
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}0/pending_dir
+TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/${V0}1/pending_dir
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}2/pending_dir
+create_brick_xattrop_entry $B0/${V0}0 pending_dir
+# Should show up as split-brain.
+EXPECT "1" afr_get_split_brain_count $V0
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-1637249-gfid-heal.t b/tests/bugs/replicate/bug-1637249-gfid-heal.t
new file mode 100644
index 00000000000..e824f14531e
--- /dev/null
+++ b/tests/bugs/replicate/bug-1637249-gfid-heal.t
@@ -0,0 +1,149 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1};
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 entry-self-heal off
+TEST $CLI volume start $V0;
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+###############################################################################
+
+# Test for gfid + name heal when there is no 'source' brick, i.e. parent dir
+# xattrs are in split-brain or have dirty xattrs.
+
+TEST mkdir $M0/dir_pending
+TEST dd if=/dev/urandom of=$M0/dir_pending/file1 bs=1024 count=1024
+TEST mkdir $M0/dir_pending/dir11
+TEST mkdir $M0/dir_dirty
+TEST touch $M0/dir_dirty/file2
+
+# Set pending entry xattrs on dir_pending and remove gfid of entries under it on one brick.
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}0/dir_pending
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}1/dir_pending
+
+gfid_f1=$(gf_get_gfid_xattr $B0/${V0}0/dir_pending/file1)
+gfid_str_f1=$(gf_gfid_xattr_to_str $gfid_f1)
+TEST setfattr -x trusted.gfid $B0/${V0}1/dir_pending/file1
+TEST rm $B0/${V0}1/.glusterfs/${gfid_str_f1:0:2}/${gfid_str_f1:2:2}/$gfid_str_f1
+
+gfid_d11=$(gf_get_gfid_xattr $B0/${V0}0/dir_pending/dir11)
+gfid_str_d11=$(gf_gfid_xattr_to_str $gfid_d11)
+TEST setfattr -x trusted.gfid $B0/${V0}1/dir_pending/dir11
+TEST rm $B0/${V0}1/.glusterfs/${gfid_str_d11:0:2}/${gfid_str_d11:2:2}/$gfid_str_d11
+
+
+# Set dirty entry xattrs on dir_dirty and remove gfid of entries under it on one brick.
+TEST setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}1/dir_dirty
+gfid_f2=$(gf_get_gfid_xattr $B0/${V0}0/dir_dirty/file2)
+gfid_str_f2=$(gf_gfid_xattr_to_str $gfid_f2)
+TEST setfattr -x trusted.gfid $B0/${V0}1/dir_dirty/file2
+TEST rm $B0/${V0}1/.glusterfs/${gfid_str_f2:0:2}/${gfid_str_f2:2:2}/$gfid_str_f2
+
+# Create a file under dir_pending directly on the backend only on 1 brick
+TEST touch $B0/${V0}1/dir_pending/file3
+
+# Create a file under dir_pending directly on the backend on all bricks
+TEST touch $B0/${V0}0/dir_pending/file4
+TEST touch $B0/${V0}1/dir_pending/file4
+
+# Stop & start the volume and mount client again.
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+TEST stat $M0/dir_pending/file1
+EXPECT "$gfid_f1" gf_get_gfid_xattr $B0/${V0}1/dir_pending/file1
+TEST stat $B0/${V0}1/.glusterfs/${gfid_str_f1:0:2}/${gfid_str_f1:2:2}/$gfid_str_f1
+
+TEST stat $M0/dir_pending/dir11
+EXPECT "$gfid_d11" gf_get_gfid_xattr $B0/${V0}1/dir_pending/dir11
+TEST stat $B0/${V0}1/.glusterfs/${gfid_str_d11:0:2}/${gfid_str_d11:2:2}/$gfid_str_d11
+
+
+TEST stat $M0/dir_dirty/file2
+EXPECT "$gfid_f2" gf_get_gfid_xattr $B0/${V0}1/dir_dirty/file2
+TEST stat $B0/${V0}1/.glusterfs/${gfid_str_f2:0:2}/${gfid_str_f2:2:2}/$gfid_str_f2
+
+TEST stat $M0/dir_pending/file3 # This assigns gfid on 2nd brick and heals the entry on to the 1st brick.
+gfid_f3=$(gf_get_gfid_xattr $B0/${V0}1/dir_pending/file3)
+TEST [ ! -z "$gfid_f3" ]
+EXPECT "$gfid_f3" gf_get_gfid_xattr $B0/${V0}0/dir_pending/file3
+
+TEST stat $M0/dir_pending/file4
+gfid_f4=$(gf_get_gfid_xattr $B0/${V0}0/dir_pending/file4)
+TEST [ ! -z "$gfid_f4" ]
+EXPECT "$gfid_f4" gf_get_gfid_xattr $B0/${V0}1/dir_pending/file4
+###############################################################################
+
+# Test for gfid + name heal when all bricks are 'source', i.e. parent dir
+# does not have any pending or dirty xattrs.
+
+TEST mkdir $M0/dir_clean
+TEST dd if=/dev/urandom of=$M0/dir_clean/file1 bs=1024 count=1024
+TEST mkdir $M0/dir_clean/dir11
+
+gfid_f1=$(gf_get_gfid_xattr $B0/${V0}0/dir_clean/file1)
+gfid_str_f1=$(gf_gfid_xattr_to_str $gfid_f1)
+TEST setfattr -x trusted.gfid $B0/${V0}1/dir_clean/file1
+TEST rm $B0/${V0}1/.glusterfs/${gfid_str_f1:0:2}/${gfid_str_f1:2:2}/$gfid_str_f1
+
+gfid_d11=$(gf_get_gfid_xattr $B0/${V0}0/dir_clean/dir11)
+gfid_str_d11=$(gf_gfid_xattr_to_str $gfid_d11)
+TEST setfattr -x trusted.gfid $B0/${V0}1/dir_clean/dir11
+TEST rm $B0/${V0}1/.glusterfs/${gfid_str_d11:0:2}/${gfid_str_d11:2:2}/$gfid_str_d11
+
+# Create a file under dir_clean directly on the backend only on 1 brick
+TEST touch $B0/${V0}1/dir_clean/file3
+
+# Create a file under dir_clean directly on the backend on all bricks
+TEST touch $B0/${V0}0/dir_clean/file4
+TEST touch $B0/${V0}1/dir_clean/file4
+
+# Stop & start the volume and mount client again.
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+TEST stat $M0/dir_clean/file1
+EXPECT "$gfid_f1" gf_get_gfid_xattr $B0/${V0}1/dir_clean/file1
+TEST stat $B0/${V0}1/.glusterfs/${gfid_str_f1:0:2}/${gfid_str_f1:2:2}/$gfid_str_f1
+
+TEST stat $M0/dir_clean/dir11
+EXPECT "$gfid_d11" gf_get_gfid_xattr $B0/${V0}1/dir_clean/dir11
+TEST stat $B0/${V0}1/.glusterfs/${gfid_str_d11:0:2}/${gfid_str_d11:2:2}/$gfid_str_d11
+
+TEST stat $M0/dir_clean/file3 # This assigns gfid on 2nd brick and heals the entry on to the 1st brick.
+gfid_f3=$(gf_get_gfid_xattr $B0/${V0}1/dir_clean/file3)
+TEST [ ! -z "$gfid_f3" ]
+EXPECT "$gfid_f3" gf_get_gfid_xattr $B0/${V0}0/dir_clean/file3
+
+TEST stat $M0/dir_clean/file4
+gfid_f4=$(gf_get_gfid_xattr $B0/${V0}0/dir_clean/file4)
+TEST [ ! -z "$gfid_f4" ]
+EXPECT "$gfid_f4" gf_get_gfid_xattr $B0/${V0}1/dir_clean/file4
+###############################################################################
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-1637802-arbiter-stale-data-heal-lock.t b/tests/bugs/replicate/bug-1637802-arbiter-stale-data-heal-lock.t
new file mode 100644
index 00000000000..d7d1f285e01
--- /dev/null
+++ b/tests/bugs/replicate/bug-1637802-arbiter-stale-data-heal-lock.t
@@ -0,0 +1,45 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+# Test to check that data self-heal does not leave any stale lock.
+
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume create $V0 replica 3 arbiter 1 $H0:$B0/${V0}{0,1,2};
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+# Create base entry in indices/xattrop
+echo "Data" > $M0/FILE
+
+# Kill arbiter brick and write to FILE.
+TEST kill_brick $V0 $H0 $B0/${V0}2
+echo "arbiter down" >> $M0/FILE
+EXPECT 2 get_pending_heal_count $V0
+
+# Bring it back up and let heal complete.
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+# write to the FILE must succeed.
+echo "this must succeed" >> $M0/FILE
+TEST [ $? -eq 0 ]
+cleanup;
diff --git a/tests/bugs/replicate/bug-1655050-dir-sbrain-size-policy.t b/tests/bugs/replicate/bug-1655050-dir-sbrain-size-policy.t
new file mode 100644
index 00000000000..63f72e86bf6
--- /dev/null
+++ b/tests/bugs/replicate/bug-1655050-dir-sbrain-size-policy.t
@@ -0,0 +1,55 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+#Create replica 2 volume
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 data-self-heal off
+TEST $CLI volume set $V0 entry-self-heal off
+TEST $CLI volume set $V0 metadata-self-heal off
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+cd $M0
+TEST mkdir dir
+
+#Create metadata split-brain
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST chmod 757 dir
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST chmod 747 dir
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+#Use size as fav-child policy.
+TEST $CLI volume set $V0 cluster.favorite-child-policy size
+
+#Enable shd and heal the file.
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+
+EXPECT_WITHIN $HEAL_TIMEOUT "2" get_pending_heal_count $V0
+
+b1c1dir=$(afr_get_specific_changelog_xattr $B0/${V0}0/dir \
+ trusted.afr.$V0-client-1 "metadata")
+b2c0dir=$(afr_get_specific_changelog_xattr $B0/${V0}1/dir \
+ trusted.afr.$V0-client-0 "metadata")
+
+EXPECT "00000001" echo $b1c1dir
+EXPECT "00000001" echo $b2c0dir
+
+#Finish up
+TEST force_umount $M0
+cleanup;
diff --git a/tests/bugs/replicate/bug-1655052-sbrain-policy-same-size.t b/tests/bugs/replicate/bug-1655052-sbrain-policy-same-size.t
new file mode 100755
index 00000000000..319736e1157
--- /dev/null
+++ b/tests/bugs/replicate/bug-1655052-sbrain-policy-same-size.t
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+#Test the split-brain resolution CLI commands.
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+#Create replica 2 volume
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST touch $M0/file
+
+############ Healing using favorite-child-policy = size and size of bricks is same #################
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST dd if=/dev/urandom of=$M0/file bs=1024 count=1024
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST dd if=/dev/urandom of=$M0/file bs=1024 count=1024
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+
+#file still in split-brain
+EXPECT_WITHIN $HEAL_TIMEOUT "2" get_pending_heal_count $V0
+cat $M0/file > /dev/null
+EXPECT_NOT "^0$" echo $?
+
+#We know that both bricks have same size file
+TEST $CLI volume set $V0 cluster.favorite-child-policy size
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "2" get_pending_heal_count $V0
+cat $M0/file > /dev/null
+EXPECT_NOT "^0$" echo $?
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+cleanup
+
diff --git a/tests/bugs/replicate/bug-1655854-support-dist-to-rep3-arb-conversion.t b/tests/bugs/replicate/bug-1655854-support-dist-to-rep3-arb-conversion.t
new file mode 100644
index 00000000000..783016dc3c0
--- /dev/null
+++ b/tests/bugs/replicate/bug-1655854-support-dist-to-rep3-arb-conversion.t
@@ -0,0 +1,95 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+# Conversion from 2x1 to 2x3
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1}
+EXPECT 'Created' volinfo_field $V0 'Status';
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+TEST mkdir $M0/dir
+TEST dd if=/dev/urandom of=$M0/dir/file bs=100K count=5
+file_md5sum=$(md5sum $M0/dir/file | awk '{print $1}')
+
+TEST $CLI volume add-brick $V0 replica 3 $H0:$B0/${V0}{2..5}
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}3
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}4
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}5
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 3
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 4
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 5
+
+# Trigger heal and wait for for it to complete
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+# Check whether the directory & file are healed to the newly added bricks
+TEST ls $B0/${V0}2/dir
+TEST ls $B0/${V0}3/dir
+TEST ls $B0/${V0}4/dir
+TEST ls $B0/${V0}5/dir
+
+TEST [ $file_md5sum == $(md5sum $B0/${V0}4/dir/file | awk '{print $1}') ]
+TEST [ $file_md5sum == $(md5sum $B0/${V0}5/dir/file | awk '{print $1}') ]
+
+
+# Conversion from 2x1 to 2x(2+1)
+
+TEST $CLI volume create $V1 $H0:$B0/${V1}{0,1}
+EXPECT 'Created' volinfo_field $V1 'Status';
+TEST $CLI volume start $V1
+EXPECT 'Started' volinfo_field $V1 'Status';
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V1 $H0 $B0/${V1}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V1 $H0 $B0/${V1}1
+
+TEST $GFS --volfile-id=$V1 --volfile-server=$H0 $M1;
+TEST mkdir $M1/dir
+TEST dd if=/dev/urandom of=$M1/dir/file bs=100K count=5
+file_md5sum=$(md5sum $M1/dir/file | awk '{print $1}')
+
+TEST $CLI volume add-brick $V1 replica 3 arbiter 1 $H0:$B0/${V1}{2..5}
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V1 $H0 $B0/${V1}2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V1 $H0 $B0/${V1}3
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V1 $H0 $B0/${V1}4
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V1 $H0 $B0/${V1}5
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V1 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V1 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V1 2
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V1 3
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V1 4
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V1 5
+
+# Trigger heal and wait for for it to complete
+TEST $CLI volume heal $V1
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V1
+
+# Check whether the directory & file are healed to the newly added bricks
+TEST ls $B0/${V1}2/dir
+TEST ls $B0/${V1}3/dir
+TEST ls $B0/${V1}4/dir
+TEST ls $B0/${V1}5/dir
+
+EXPECT "0" stat -c %s $B0/${V1}5/dir/file
+TEST [ $file_md5sum == $(md5sum $B0/${V1}4/dir/file | awk '{print $1}') ]
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-1657783-do-not-update-read-subvol-on-rename-link.t b/tests/bugs/replicate/bug-1657783-do-not-update-read-subvol-on-rename-link.t
new file mode 100644
index 00000000000..b180f0e1239
--- /dev/null
+++ b/tests/bugs/replicate/bug-1657783-do-not-update-read-subvol-on-rename-link.t
@@ -0,0 +1,40 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0..2}
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume set $V0 performance.write-behind off
+
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+TEST mkdir $M0/dir
+TEST "echo abc > $M0/file1"
+TEST "echo uvw > $M0/file2"
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST "echo def > $M0/file1"
+TEST "echo xyz > $M0/file2"
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+TEST kill_brick $V0 $H0 $B0/${V0}1
+
+# Rename file1 and read it. Read should be served from the 3rd brick
+TEST mv $M0/file1 $M0/file3
+EXPECT "def" cat $M0/file3
+
+# Create a link to file2 and read it. Read should be served from the 3rd brick
+TEST ln $M0/file2 $M0/dir/file4
+EXPECT "xyz" cat $M0/dir/file4
+EXPECT "xyz" cat $M0/file2
+
+cleanup
diff --git a/tests/bugs/replicate/bug-1686568-send-truncate-on-arbiter-from-shd.t b/tests/bugs/replicate/bug-1686568-send-truncate-on-arbiter-from-shd.t
new file mode 100644
index 00000000000..78581e99614
--- /dev/null
+++ b/tests/bugs/replicate/bug-1686568-send-truncate-on-arbiter-from-shd.t
@@ -0,0 +1,38 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+CHANGELOG_PATH_0="$B0/${V0}2/.glusterfs/changelogs"
+ROLLOVER_TIME=100
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 changelog.changelog on
+TEST $CLI volume set $V0 changelog.rollover-time $ROLLOVER_TIME
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+TEST dd if=/dev/zero of=$M0/file1 bs=128K count=5
+
+TEST $CLI volume profile $V0 start
+TEST $CLI volume add-brick $V0 replica 3 arbiter 1 $H0:$B0/${V0}2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+TEST $CLI volume profile $V0 info
+truncate_count=$($CLI volume profile $V0 info | grep TRUNCATE | awk '{count += $8} END {print count}')
+
+EXPECT "1" echo $truncate_count
+EXPECT "1" check_changelog_op ${CHANGELOG_PATH_0} "^ D "
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-1696599-io-hang.t b/tests/bugs/replicate/bug-1696599-io-hang.t
new file mode 100755
index 00000000000..869cdb94bda
--- /dev/null
+++ b/tests/bugs/replicate/bug-1696599-io-hang.t
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fileio.rc
+
+#Tests that local structures in afr are removed from granted/blocked list of
+#locks when inodelk fails on all bricks
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1..3}
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.client-io-threads off
+TEST $CLI volume set $V0 delay-gen locks
+TEST $CLI volume set $V0 delay-gen.delay-duration 5000000
+TEST $CLI volume set $V0 delay-gen.delay-percentage 100
+TEST $CLI volume set $V0 delay-gen.enable finodelk
+
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+TEST $GFS -s $H0 --volfile-id $V0 $M0
+TEST touch $M0/file
+#Trigger write and stop bricks so inodelks fail on all bricks leading to
+#lock failure condition
+echo abc >> $M0/file &
+
+TEST $CLI volume stop $V0
+TEST $CLI volume reset $V0 delay-gen
+wait
+TEST $CLI volume start $V0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 2
+#Test that only one write succeeded, this tests that delay-gen worked as
+#expected
+echo abc >> $M0/file
+EXPECT "abc" cat $M0/file
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-1717819-metadata-split-brain-detection.t b/tests/bugs/replicate/bug-1717819-metadata-split-brain-detection.t
new file mode 100644
index 00000000000..76d1f2170f2
--- /dev/null
+++ b/tests/bugs/replicate/bug-1717819-metadata-split-brain-detection.t
@@ -0,0 +1,136 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2};
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+TEST $CLI volume heal $V0 disable
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+###############################################################################
+# Case of 2 bricks blaming the third and the third blaming the other two.
+
+TEST mkdir $M0/dir
+
+# B0 and B2 must blame B1
+TEST kill_brick $V0 $H0 $B0/$V0"1"
+TEST setfattr -n user.metadata -v 1 $M0/dir
+EXPECT "00000001" afr_get_specific_changelog_xattr $B0/${V0}0/dir trusted.afr.$V0-client-1 metadata
+EXPECT "00000001" afr_get_specific_changelog_xattr $B0/${V0}2/dir trusted.afr.$V0-client-1 metadata
+CLIENT_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $M0/dir)
+
+# B1 must blame B0 and B2
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000100000000 $B0/$V0"1"/dir
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000100000000 $B0/$V0"1"/dir
+
+# Launch heal
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}1
+TEST $CLI volume heal $V0 enable
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+B0_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}0/dir)
+B1_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}1/dir)
+B2_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}2/dir)
+
+TEST [ "$CLIENT_XATTR" == "$B0_XATTR" ]
+TEST [ "$CLIENT_XATTR" == "$B1_XATTR" ]
+TEST [ "$CLIENT_XATTR" == "$B2_XATTR" ]
+TEST setfattr -x user.metadata $M0/dir
+
+###############################################################################
+# Case of each brick blaming the next one in a cyclic manner
+
+TEST $CLI volume heal $V0 disable
+TEST `echo "hello" >> $M0/dir/file`
+# Mark cyclic xattrs and modify metadata directly on the bricks.
+setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000100000000 $B0/$V0"0"/dir/file
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000100000000 $B0/$V0"1"/dir/file
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000100000000 $B0/$V0"2"/dir/file
+
+setfattr -n user.metadata -v 1 $B0/$V0"0"/dir/file
+setfattr -n user.metadata -v 2 $B0/$V0"1"/dir/file
+setfattr -n user.metadata -v 3 $B0/$V0"2"/dir/file
+
+# Add entry to xattrop dir to trigger index heal.
+xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
+base_entry_b0=`ls $xattrop_dir0`
+gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/file))
+ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
+EXPECT_WITHIN $HEAL_TIMEOUT "^1$" get_pending_heal_count $V0
+
+# Launch heal
+TEST $CLI volume heal $V0 enable
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+B0_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}0/dir/file)
+B1_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}1/dir/file)
+B2_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}2/dir/file)
+
+TEST [ "$B0_XATTR" == "$B1_XATTR" ]
+TEST [ "$B0_XATTR" == "$B2_XATTR" ]
+TEST rm -f $M0/dir/file
+
+###############################################################################
+# Case of 2 bricks having quorum blaming and the other having only one blaming.
+
+TEST $CLI volume heal $V0 disable
+TEST `echo "hello" >> $M0/dir/file`
+# B0 and B2 must blame B1
+TEST kill_brick $V0 $H0 $B0/$V0"1"
+TEST setfattr -n user.metadata -v 1 $M0/dir/file
+EXPECT "00000001" afr_get_specific_changelog_xattr $B0/${V0}0/dir/file trusted.afr.$V0-client-1 metadata
+EXPECT "00000001" afr_get_specific_changelog_xattr $B0/${V0}2/dir/file trusted.afr.$V0-client-1 metadata
+
+# B1 must blame B0 and B2
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000100000000 $B0/$V0"1"/dir/file
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000100000000 $B0/$V0"1"/dir/file
+
+# B0 must blame B2
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000100000000 $B0/$V0"0"/dir/file
+
+# Modify the metadata directly on the bricks B1 & B2.
+setfattr -n user.metadata -v 2 $B0/$V0"1"/dir/file
+setfattr -n user.metadata -v 3 $B0/$V0"2"/dir/file
+
+# Launch heal
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}1
+TEST $CLI volume heal $V0 enable
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+B0_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}0/dir/file)
+B1_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}1/dir/file)
+B2_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}2/dir/file)
+
+TEST [ "$B0_XATTR" == "$B1_XATTR" ]
+TEST [ "$B0_XATTR" == "$B2_XATTR" ]
+
+###############################################################################
+
+cleanup
diff --git a/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t b/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t
new file mode 100644
index 00000000000..0aeaaafc84c
--- /dev/null
+++ b/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t
@@ -0,0 +1,116 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2};
+TEST $CLI volume start $V0;
+TEST $CLI volume set $V0 cluster.heal-timeout 5
+TEST $CLI volume heal $V0 disable
+EXPECT 'Started' volinfo_field $V0 'Status';
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+TEST mkdir $M0/dir
+
+##########################################################################################
+# GFID link file and the GFID is missing on one brick and all the bricks are being blamed.
+
+TEST touch $M0/dir/file
+#TEST kill_brick $V0 $H0 $B0/$V0"1"
+
+#B0 and B2 must blame B1
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir
+setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/$V0"0"/dir
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir
+
+# Add entry to xattrop dir to trigger index heal.
+xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
+base_entry_b0=`ls $xattrop_dir0`
+gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/))
+ln -s $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
+EXPECT "^1$" get_pending_heal_count $V0
+
+# Remove the gfid xattr and the link file on one brick.
+gfid_file=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file)
+gfid_str_file=$(gf_gfid_xattr_to_str $gfid_file)
+TEST setfattr -x trusted.gfid $B0/${V0}0/dir/file
+TEST rm -f $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
+
+# Launch heal
+TEST $CLI volume heal $V0 enable
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
+
+# Wait for 2 second to force posix to consider that this is a valid file but
+# without gfid.
+sleep 2
+TEST $CLI volume heal $V0
+
+# Heal should not fail as the file is missing gfid xattr and the link file,
+# which is not actually the gfid or type mismatch.
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+EXPECT "$gfid_file" gf_get_gfid_xattr $B0/${V0}0/dir/file
+TEST stat $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
+rm -f $M0/dir/file
+
+
+###########################################################################################
+# GFID link file and the GFID is missing on two bricks and all the bricks are being blamed.
+
+TEST $CLI volume heal $V0 disable
+TEST touch $M0/dir/file
+#TEST kill_brick $V0 $H0 $B0/$V0"1"
+
+#B0 and B2 must blame B1
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir
+setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/$V0"0"/dir
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir
+
+# Add entry to xattrop dir to trigger index heal.
+xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
+base_entry_b0=`ls $xattrop_dir0`
+gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/))
+ln -s $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
+EXPECT "^1$" get_pending_heal_count $V0
+
+# Remove the gfid xattr and the link file on two bricks.
+gfid_file=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file)
+gfid_str_file=$(gf_gfid_xattr_to_str $gfid_file)
+TEST setfattr -x trusted.gfid $B0/${V0}0/dir/file
+TEST rm -f $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
+TEST setfattr -x trusted.gfid $B0/${V0}1/dir/file
+TEST rm -f $B0/${V0}1/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
+
+# Launch heal
+TEST $CLI volume heal $V0 enable
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
+
+# Wait for 2 second to force posix to consider that this is a valid file but
+# without gfid.
+sleep 2
+TEST $CLI volume heal $V0
+
+# Heal should not fail as the file is missing gfid xattr and the link file,
+# which is not actually the gfid or type mismatch.
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+EXPECT "$gfid_file" gf_get_gfid_xattr $B0/${V0}0/dir/file
+TEST stat $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
+EXPECT "$gfid_file" gf_get_gfid_xattr $B0/${V0}1/dir/file
+TEST stat $B0/${V0}1/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
+
+cleanup
diff --git a/tests/bugs/replicate/bug-1728770-pass-xattrs.t b/tests/bugs/replicate/bug-1728770-pass-xattrs.t
new file mode 100644
index 00000000000..159c4fcc6a1
--- /dev/null
+++ b/tests/bugs/replicate/bug-1728770-pass-xattrs.t
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+
+cleanup;
+
+function fop_on_bad_disk {
+ local path=$1
+ mkdir $path/dir{1..1000} 2>/dev/null
+ mv $path/dir1 $path/newdir
+ touch $path/foo.txt
+ echo $?
+}
+
+function ls_fop_on_bad_disk {
+ local path=$1
+ ls $path
+ echo $?
+}
+
+TEST init_n_bricks 6;
+TEST setup_lvm 6;
+
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST $CLI volume create $V0 replica 3 $H0:$L1 $H0:$L2 $H0:$L3 $H0:$L4 $H0:$L5 $H0:$L6;
+TEST $CLI volume set $V0 health-check-interval 1000;
+
+TEST $CLI volume start $V0;
+
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0;
+#corrupt last disk
+dd if=/dev/urandom of=/dev/mapper/patchy_snap_vg_6-brick_lvm bs=512K count=200 status=progress && sync
+
+
+# Test the disk is now returning EIO for touch and ls
+EXPECT_WITHIN $DISK_FAIL_TIMEOUT "^1$" fop_on_bad_disk "$L6"
+EXPECT_WITHIN $DISK_FAIL_TIMEOUT "^2$" ls_fop_on_bad_disk "$L6"
+
+TEST touch $M0/foo{1..100}
+TEST $CLI volume remove-brick $V0 replica 3 $H0:$L4 $H0:$L5 $H0:$L6 start
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" remove_brick_status_completed_field "$V0" "$H0:$L4 $H0:$L5 $H0:$L6";
+
+#check that remove-brick status should not have any failed or skipped files
+var=`$CLI volume remove-brick $V0 $H0:$L4 $H0:$L5 $H0:$L6 status | grep completed`
+TEST [ `echo $var | awk '{print $5}'` = "0" ]
+TEST [ `echo $var | awk '{print $6}'` = "0" ]
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-1734370-entry-heal-restore-time.t b/tests/bugs/replicate/bug-1734370-entry-heal-restore-time.t
new file mode 100644
index 00000000000..14dfae89135
--- /dev/null
+++ b/tests/bugs/replicate/bug-1734370-entry-heal-restore-time.t
@@ -0,0 +1,102 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+function time_stamps_match {
+ path=$1
+ mtime_source_b0=$(get_mtime $B0/${V0}0/$path)
+ atime_source_b0=$(get_atime $B0/${V0}0/$path)
+ mtime_source_b2=$(get_mtime $B0/${V0}2/$path)
+ atime_source_b2=$(get_atime $B0/${V0}2/$path)
+ mtime_sink_b1=$(get_mtime $B0/${V0}1/$path)
+ atime_sink_b1=$(get_atime $B0/${V0}1/$path)
+
+ #The same brick must be the source of heal for both atime and mtime.
+ if [[ ( $mtime_source_b0 -eq $mtime_sink_b1 && $atime_source_b0 -eq $atime_sink_b1 ) || \
+ ( $mtime_source_b2 -eq $mtime_sink_b1 && $atime_source_b2 -eq $atime_sink_b1 ) ]]
+ then
+ echo "Y"
+ else
+ echo "Mtimes: $mtime_source_b0:$mtime_sink_b1:$mtime_source_b2 Atimes: $atime_source_b0:$atime_sink_b1:$atime_source_b2"
+ fi
+
+}
+
+function mtimes_match {
+ path=$1
+ mtime_source_b0=$(get_mtime $B0/${V0}0/$path)
+ mtime_source_b2=$(get_mtime $B0/${V0}2/$path)
+ mtime_sink_b1=$(get_mtime $B0/${V0}1/$path)
+
+ if [[ ( $mtime_source_b0 -eq $mtime_sink_b1) || \
+ ( $mtime_source_b2 -eq $mtime_sink_b1) ]]
+ then
+ echo "Y"
+ else
+ echo "Mtimes: $mtime_source_b0:$mtime_sink_b1:$mtime_source_b2"
+ fi
+
+}
+
+# Test that the parent dir's timestamps are restored during entry-heal.
+GET_MDATA_PATH=$(dirname $0)/../../utils
+build_tester $GET_MDATA_PATH/get-mdata-xattr.c
+
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2};
+TEST $CLI volume start $V0;
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+###############################################################################
+TEST mkdir $M0/DIR
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST touch $M0/DIR/FILE
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+
+EXPECT "Y" time_stamps_match DIR
+ctime_source1=$(get_ctime $B0/${V0}0/$path)
+ctime_source2=$(get_ctime $B0/${V0}2/$path)
+ctime_sink=$(get_ctime $B0/${V0}1/$path)
+TEST [ $ctime_source1 -eq $ctime_sink ]
+TEST [ $ctime_source2 -eq $ctime_sink ]
+
+
+###############################################################################
+# Repeat the test with ctime feature disabled.
+TEST $CLI volume set $V0 features.ctime off
+TEST mkdir $M0/DIR2
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST touch $M0/DIR2/FILE
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+TEST $CLI volume heal $V0
+#Executing parallel heal may lead to changing atime after heal. So better
+#to test just the mtime
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+
+EXPECT "Y" mtimes_match DIR2
+
+TEST rm $GET_MDATA_PATH/get-mdata-xattr
+cleanup;
diff --git a/tests/bugs/replicate/bug-1744548-heal-timeout.t b/tests/bugs/replicate/bug-1744548-heal-timeout.t
new file mode 100644
index 00000000000..011535066f9
--- /dev/null
+++ b/tests/bugs/replicate/bug-1744548-heal-timeout.t
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+function get_cumulative_opendir_count {
+#sed command prints content between Cumulative and Interval, this keeps content from Cumulative stats
+ $CLI volume profile $V0 info |sed -n '/^Cumulative/,/^Interval/p'|grep OPENDIR| awk '{print $8}'|tr -d '\n'
+}
+
+cleanup;
+
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume heal $V0 disable
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+TEST ! $CLI volume heal $V0
+
+# Enable shd and verify that index crawl is triggered immediately.
+TEST $CLI volume profile $V0 start
+TEST $CLI volume profile $V0 info clear
+TEST $CLI volume heal $V0 enable
+# Each brick does 4 opendirs, corresponding to dirty, xattrop and entry-changes, anonymous-inode
+EXPECT_WITHIN 4 "^444$" get_cumulative_opendir_count
+
+# Check that a change in heal-timeout is honoured immediately.
+TEST $CLI volume set $V0 cluster.heal-timeout 5
+sleep 10
+# Two crawls must have happened.
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^121212$" get_cumulative_opendir_count
+
+# shd must not heal if it is disabled and heal-timeout is changed.
+TEST $CLI volume heal $V0 disable
+#Wait for configuration update and any opendir fops to complete
+sleep 10
+TEST $CLI volume profile $V0 info clear
+TEST $CLI volume set $V0 cluster.heal-timeout 6
+#Better to wait for more than 6 seconds to account for configuration updates
+sleep 10
+COUNT=`$CLI volume profile $V0 info incremental |grep OPENDIR|awk '{print $8}'|tr -d '\n'`
+TEST [ -z $COUNT ]
+cleanup;
diff --git a/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t b/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t
new file mode 100644
index 00000000000..96279084065
--- /dev/null
+++ b/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t
@@ -0,0 +1,89 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup
+
+function check_gfid_and_link_count
+{
+ local file=$1
+
+ file_gfid_b0=$(gf_get_gfid_xattr $B0/${V0}0/$file)
+ TEST [ ! -z $file_gfid_b0 ]
+ file_gfid_b1=$(gf_get_gfid_xattr $B0/${V0}1/$file)
+ file_gfid_b2=$(gf_get_gfid_xattr $B0/${V0}2/$file)
+ EXPECT $file_gfid_b0 echo $file_gfid_b1
+ EXPECT $file_gfid_b0 echo $file_gfid_b2
+
+ EXPECT "2" stat -c %h $B0/${V0}0/$file
+ EXPECT "2" stat -c %h $B0/${V0}1/$file
+ EXPECT "2" stat -c %h $B0/${V0}2/$file
+}
+TESTS_EXPECTED_IN_LOOP=18
+
+################################################################################
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2};
+TEST $CLI volume start $V0;
+TEST $CLI volume set $V0 cluster.heal-timeout 5
+TEST $CLI volume heal $V0 disable
+EXPECT 'Started' volinfo_field $V0 'Status';
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+TEST mkdir $M0/dir
+TEST `echo "File 1 " > $M0/dir/file1`
+TEST touch $M0/dir/file{2..4}
+
+# Remove file2 from 1st & 3rd bricks
+TEST rm -f $B0/$V0"0"/dir/file2
+TEST rm -f $B0/$V0"2"/dir/file2
+
+# Remove file3 and the .glusterfs hardlink from 1st & 2nd bricks
+gfid_file3=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file3)
+gfid_str_file3=$(gf_gfid_xattr_to_str $gfid_file3)
+TEST rm $B0/$V0"0"/.glusterfs/${gfid_str_file3:0:2}/${gfid_str_file3:2:2}/$gfid_str_file3
+TEST rm $B0/$V0"1"/.glusterfs/${gfid_str_file3:0:2}/${gfid_str_file3:2:2}/$gfid_str_file3
+TEST rm -f $B0/$V0"0"/dir/file3
+TEST rm -f $B0/$V0"1"/dir/file3
+
+# Remove the .glusterfs hardlink and the gfid xattr of file4 on 3rd brick
+gfid_file4=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file4)
+gfid_str_file4=$(gf_gfid_xattr_to_str $gfid_file4)
+TEST rm $B0/$V0"2"/.glusterfs/${gfid_str_file4:0:2}/${gfid_str_file4:2:2}/$gfid_str_file4
+TEST setfattr -x trusted.gfid $B0/$V0"2"/dir/file4
+
+# B0 and B2 blame each other
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir
+
+# Add entry to xattrop dir on first brick.
+xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
+base_entry_b0=`ls $xattrop_dir0`
+gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/))
+TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
+
+EXPECT "^1$" get_pending_heal_count $V0
+
+# Launch heal
+TEST $CLI volume heal $V0 enable
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+# All the files must be present on all the bricks after conservative merge and
+# should have the gfid xattr and the .glusterfs hardlink.
+check_gfid_and_link_count dir/file1
+check_gfid_and_link_count dir/file2
+check_gfid_and_link_count dir/file3
+check_gfid_and_link_count dir/file4
+
+cleanup
diff --git a/tests/bugs/replicate/bug-1756938-replica-3-sbrain-cli.t b/tests/bugs/replicate/bug-1756938-replica-3-sbrain-cli.t
new file mode 100644
index 00000000000..c1bdf34ee6d
--- /dev/null
+++ b/tests/bugs/replicate/bug-1756938-replica-3-sbrain-cli.t
@@ -0,0 +1,111 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 features.shard enable
+TEST $CLI volume set $V0 features.shard-block-size 4MB
+
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+TEST glusterfs --volfile-server=$H0 --volfile-id=/$V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+#Create split-brain by setting afr xattrs/gfids manually.
+#file1 is non-sharded and will be in data split-brain.
+#file2 will have one shard which will be in data split-brain.
+#file3 will have one shard which will be in gfid split-brain.
+#file4 will have one shard which will be in data & metadata split-brain.
+TEST dd if=/dev/zero of=$M0/file1 bs=1024 count=1024 oflag=direct
+TEST dd if=/dev/zero of=$M0/file2 bs=1M count=6 oflag=direct
+TEST dd if=/dev/zero of=$M0/file3 bs=1M count=6 oflag=direct
+TEST dd if=/dev/zero of=$M0/file4 bs=1M count=6 oflag=direct
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+#-------------------------------------------------------------------------------
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000000000000 $B0/${V0}0/file1
+TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000000000000 $B0/${V0}0/file1
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000000000000 $B0/${V0}1/file1
+TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000000000000 $B0/${V0}1/file1
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000000000000 $B0/${V0}2/file1
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000000000000 $B0/${V0}2/file1
+
+#-------------------------------------------------------------------------------
+gfid_f2=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/file2))
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000000000000 $B0/${V0}0/.shard/$gfid_f2.1
+TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000000000000 $B0/${V0}0/.shard/$gfid_f2.1
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000000000000 $B0/${V0}1/.shard/$gfid_f2.1
+TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000000000000 $B0/${V0}1/.shard/$gfid_f2.1
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000000000000 $B0/${V0}2/.shard/$gfid_f2.1
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000000000000 $B0/${V0}2/.shard/$gfid_f2.1
+
+#-------------------------------------------------------------------------------
+TESTS_EXPECTED_IN_LOOP=5
+function assign_new_gfid {
+ brickpath=$1
+ filename=$2
+ gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $brickpath/$filename))
+ gfid_shard=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $brickpath/.shard/$gfid.1))
+
+ TEST rm $brickpath/.glusterfs/${gfid_shard:0:2}/${gfid_shard:2:2}/$gfid_shard
+ TEST setfattr -x trusted.gfid $brickpath/.shard/$gfid.1
+ new_gfid=$(get_random_gfid)
+ new_gfid_str=$(gf_gfid_xattr_to_str $new_gfid)
+ TEST setfattr -n trusted.gfid -v $new_gfid $brickpath/.shard/$gfid.1
+ TEST mkdir -p $brickpath/.glusterfs/${new_gfid_str:0:2}/${new_gfid_str:2:2}
+ TEST ln $brickpath/.shard/$gfid.1 $brickpath/.glusterfs/${new_gfid_str:0:2}/${new_gfid_str:2:2}/$new_gfid_str
+}
+assign_new_gfid $B0/$V0"1" file3
+assign_new_gfid $B0/$V0"2" file3
+
+#-------------------------------------------------------------------------------
+gfid_f4=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/file4))
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000100000000 $B0/${V0}0/.shard/$gfid_f4.1
+TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000100000000 $B0/${V0}0/.shard/$gfid_f4.1
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000100000000 $B0/${V0}1/.shard/$gfid_f4.1
+TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000100000000 $B0/${V0}1/.shard/$gfid_f4.1
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000100000000 $B0/${V0}2/.shard/$gfid_f4.1
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000100000000 $B0/${V0}2/.shard/$gfid_f4.1
+
+#-------------------------------------------------------------------------------
+#Add entry to xattrop dir on first brick and check for split-brain.
+xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
+base_entry_b0=`ls $xattrop_dir0`
+
+gfid_f1=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/file1))
+TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_f1
+
+gfid_f2_shard1=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/.shard/$gfid_f2.1))
+TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_f2_shard1
+
+gfid_f3=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/file3))
+gfid_f3_shard1=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/.shard/$gfid_f3.1))
+TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_f3_shard1
+
+gfid_f4_shard1=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/.shard/$gfid_f4.1))
+TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_f4_shard1
+
+#-------------------------------------------------------------------------------
+#gfid split-brain won't show up in split-brain count.
+EXPECT "3" afr_get_split_brain_count $V0
+EXPECT_NOT "^0$" get_pending_heal_count $V0
+
+#Resolve split-brains
+TEST $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}1 /file1
+GFIDSTR="gfid:$gfid_f2_shard1"
+TEST $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}1 $GFIDSTR
+TEST $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}1 /.shard/$gfid_f3.1
+TEST $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}1 /.shard/$gfid_f4.1
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+cleanup;
diff --git a/tests/bugs/replicate/bug-1761531-metadata-heal-restore-time.t b/tests/bugs/replicate/bug-1761531-metadata-heal-restore-time.t
new file mode 100644
index 00000000000..7e24eaec03d
--- /dev/null
+++ b/tests/bugs/replicate/bug-1761531-metadata-heal-restore-time.t
@@ -0,0 +1,74 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+cleanup
+
+GET_MDATA_PATH=$(dirname $0)/../../utils
+build_tester $GET_MDATA_PATH/get-mdata-xattr.c
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/brick{0..2}
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+TEST touch $M0/a
+sleep 1
+TEST kill_brick $V0 $H0 $B0/brick0
+TEST touch $M0/a
+
+EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+mtime0=$(get_mtime $B0/brick0/a)
+mtime1=$(get_mtime $B0/brick1/a)
+TEST [ $mtime0 -eq $mtime1 ]
+
+ctime0=$(get_ctime $B0/brick0/a)
+ctime1=$(get_ctime $B0/brick1/a)
+TEST [ $ctime0 -eq $ctime1 ]
+
+###############################################################################
+# Repeat the test with ctime feature disabled.
+TEST $CLI volume set $V0 features.ctime off
+
+TEST touch $M0/b
+sleep 1
+TEST kill_brick $V0 $H0 $B0/brick0
+TEST touch $M0/b
+
+EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+mtime2=$(get_mtime $B0/brick0/b)
+mtime3=$(get_mtime $B0/brick1/b)
+TEST [ $mtime2 -eq $mtime3 ]
+
+TEST rm $GET_MDATA_PATH/get-mdata-xattr
+
+TEST force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup
diff --git a/tests/bugs/replicate/bug-1801624-entry-heal.t b/tests/bugs/replicate/bug-1801624-entry-heal.t
new file mode 100644
index 00000000000..94b465181fa
--- /dev/null
+++ b/tests/bugs/replicate/bug-1801624-entry-heal.t
@@ -0,0 +1,58 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/brick{0,1,2}
+TEST $CLI volume set $V0 heal-timeout 5
+TEST $CLI volume start $V0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+TEST $CLI volume heal $V0 granular-entry-heal enable
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+echo "Data">$M0/FILE
+ret=$?
+TEST [ $ret -eq 0 ]
+
+# Re-create the file when a brick is down.
+TEST kill_brick $V0 $H0 $B0/brick1
+TEST rm $M0/FILE
+echo "New Data">$M0/FILE
+ret=$?
+TEST [ $ret -eq 0 ]
+EXPECT_WITHIN $HEAL_TIMEOUT "4" get_pending_heal_count $V0
+
+# Launching index heal must not reset parent dir afr xattrs or remove granular entry indices.
+$CLI volume heal $V0 # CLI will fail but heal is launched anyway.
+TEST sleep 5 # give index heal a chance to do one run.
+brick0_pending=$(get_hex_xattr trusted.afr.$V0-client-1 $B0/brick0/)
+brick2_pending=$(get_hex_xattr trusted.afr.$V0-client-1 $B0/brick2/)
+TEST [ $brick0_pending -eq "000000000000000000000002" ]
+TEST [ $brick2_pending -eq "000000000000000000000002" ]
+EXPECT "FILE" ls $B0/brick0/.glusterfs/indices/entry-changes/00000000-0000-0000-0000-000000000001/
+EXPECT "FILE" ls $B0/brick2/.glusterfs/indices/entry-changes/00000000-0000-0000-0000-000000000001/
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/brick1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+$CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+
+# No gfid-split-brain (i.e. EIO) must be seen. Try on fresh mount to avoid cached values.
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+TEST cat $M0/FILE
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+cleanup;
diff --git a/tests/bugs/replicate/bug-765564.t b/tests/bugs/replicate/bug-765564.t
new file mode 100644
index 00000000000..098d225018f
--- /dev/null
+++ b/tests/bugs/replicate/bug-765564.t
@@ -0,0 +1,86 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+## Start and create a volume
+mkdir -p ${B0}/${V0}-0
+mkdir -p ${B0}/${V0}-1
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}-{0,1}
+
+TEST $CLI volume set $V0 performance.io-cache off;
+TEST $CLI volume set $V0 performance.write-behind off;
+TEST $CLI volume set $V0 performance.stat-prefetch off
+
+TEST $CLI volume start $V0;
+
+## Mount native
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+
+#returns success if 'olddir' is absent
+#'olddir' must be absent in both replicas
+function rm_succeeded () {
+ local dir1=$1
+ [[ -d $H0:$B0/${V0}-0/$dir1 || -d $H0:$B0/${V0}-1/$dir1 ]] && return 0
+ return 1
+}
+
+# returns successes if 'newdir' is present
+#'newdir' must be present in both replicas
+function mv_succeeded () {
+ local dir1=$1
+ [[ -d $H0:$B0/${V0}-0/$dir1 && -d $H0:$B0/${V0}-1/$dir1 ]] && return 1
+ return 0
+}
+
+# returns zero on success
+# Only one of rm and mv can succeed. This is captured by the XOR below
+
+function chk_backend_consistency(){
+ local dir1=$1
+ local dir2=$2
+ local rm_status=rm_succeeded $dir1
+ local mv_status=mv_succeeded $dir2
+ [[ ( $rm_status && ! $mv_status ) || ( ! $rm_status && $mv_status ) ]] && return 0
+ return 1
+}
+
+#concurrent removal/rename of dirs
+function rm_mv_correctness () {
+ ret=0
+ for i in {1..100}; do
+ mkdir $M0/"dir"$i
+ rmdir $M0/"dir"$i &
+ mv $M0/"dir"$i $M0/"adir"$i &
+ wait
+ tmp_ret=$(chk_backend_consistency "dir"$i "adir"$i)
+ (( ret += tmp_ret ))
+ rm -rf $M0/"dir"$i
+ rm -rf $M0/"adir"$i
+ done
+ return $ret
+}
+
+TEST touch $M0/a;
+TEST mv $M0/a $M0/b;
+
+#test rename fop when one of the bricks is down
+kill_brick ${V0} ${H0} ${B0}/${V0}-1;
+TEST touch $M0/h;
+TEST mv $M0/h $M0/1;
+
+TEST $CLI volume start $V0 force;
+
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1;
+find $M0 2>/dev/null 1>/dev/null;
+find $M0 | xargs stat 2>/dev/null 1>/dev/null;
+
+TEST rm_mv_correctness;
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+cleanup;
+
diff --git a/tests/bugs/replicate/bug-767585-gfid.t b/tests/bugs/replicate/bug-767585-gfid.t
new file mode 100755
index 00000000000..4176aabb544
--- /dev/null
+++ b/tests/bugs/replicate/bug-767585-gfid.t
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+#Test cases to perform gfid-self-heal
+#file 'a' should be assigned a fresh gfid
+#file 'b' should be healed with gfid1 from brick1
+#file 'c' should be healed with gfid2 from brick2
+
+gfid1="0x8428b7193a764bf8be8046fb860b8993"
+gfid2="0x85ad91afa2f74694bf52c3326d048209"
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 --direct-io-mode=enable
+touch $B0/${V0}0/a $B0/${V0}1/a
+touch $B0/${V0}0/b $B0/${V0}1/b
+touch $B0/${V0}0/c $B0/${V0}1/c
+
+TEST setfattr -n trusted.gfid -v $gfid1 $B0/${V0}0/b
+TEST setfattr -n trusted.gfid -v $gfid2 $B0/${V0}1/c
+
+sleep 2
+
+TEST stat $M0/a
+TEST stat $M0/b
+TEST stat $M0/c
+
+TEST gf_get_gfid_xattr $B0/${V0}0/a
+TEST gf_get_gfid_xattr $B0/${V0}1/a
+
+EXPECT "$gfid1" gf_get_gfid_xattr $B0/${V0}0/b
+EXPECT "$gfid1" gf_get_gfid_xattr $B0/${V0}1/b
+
+EXPECT "$gfid2" gf_get_gfid_xattr $B0/${V0}0/c
+EXPECT "$gfid2" gf_get_gfid_xattr $B0/${V0}1/c
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-802417.t b/tests/bugs/replicate/bug-802417.t
new file mode 100755
index 00000000000..f213439401e
--- /dev/null
+++ b/tests/bugs/replicate/bug-802417.t
@@ -0,0 +1,120 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+function write_file()
+{
+ path="$1"; shift
+ echo "$*" > "$path"
+}
+
+cleanup;
+
+#####################################################
+# We are currently not triggering data heal unless all bricks of the replica are
+# up. We will need to modify this .t once the fix for preventing stale reads
+# being served to clients for files in spurious split-brains is done. Spurious
+# split-brains here means afr xattrs indicates sbrain but it is actually not.
+# Self-heal will heal such files automatically but before the heal completes,
+# reads can be served which needs fixing.
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=000000
+######################################################
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+## Start and create a volume
+mkdir -p ${B0}/${V0}-0
+mkdir -p ${B0}/${V0}-1
+mkdir -p ${B0}/${V0}-2
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}-{0,1,2}
+
+## Verify volume is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Make sure io-cache and write-behind don't interfere.
+TEST $CLI volume set $V0 performance.io-cache off;
+TEST $CLI volume set $V0 performance.write-behind off;
+TEST $CLI volume set $V0 performance.stat-prefetch off
+
+## Make sure automatic self-heal doesn't perturb our results.
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 cluster.data-self-heal on
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST $CLI volume set $V0 cluster.quorum-type none
+## Mount native
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+
+## Create a file with some recognizably stale data.
+TEST write_file $M0/a_file "old_data"
+
+## Kill two of the bricks and write some newer data.
+TEST kill_brick ${V0} ${H0} ${B0}/${V0}-1
+TEST kill_brick ${V0} ${H0} ${B0}/${V0}-2
+TEST write_file $M0/a_file "new_data"
+
+## Bring all the bricks up and kill one so we do a partial self-heal.
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2
+TEST kill_brick ${V0} ${H0} ${B0}/${V0}-2
+TEST dd if=${M0}/a_file of=/dev/null
+
+
+obs_path_0=${B0}/${V0}-0/a_file
+obs_path_1=${B0}/${V0}-1/a_file
+obs_path_2=${B0}/${V0}-2/a_file
+
+tgt_xattr_0="trusted.afr.${V0}-client-0"
+tgt_xattr_1="trusted.afr.${V0}-client-1"
+tgt_xattr_2="trusted.afr.${V0}-client-2"
+
+actual=$(afr_get_changelog_xattr $obs_path_0 $tgt_xattr_0)
+EXPECT "0x000000000000000000000000|^\$" echo $actual
+
+EXPECT_WITHIN $HEAL_TIMEOUT "0x000000000000000000000000" \
+afr_get_changelog_xattr $obs_path_0 $tgt_xattr_1
+
+actual=$(afr_get_changelog_xattr $obs_path_0 $tgt_xattr_2)
+EXPECT "0x000000030000000000000000" echo $actual
+
+actual=$(afr_get_changelog_xattr $obs_path_1 $tgt_xattr_0)
+EXPECT "0x000000000000000000000000|^\$" echo $actual
+
+actual=$(afr_get_changelog_xattr $obs_path_1 $tgt_xattr_1)
+EXPECT "0x000000000000000000000000|^\$" echo $actual
+
+actual=$(afr_get_changelog_xattr $obs_path_1 $tgt_xattr_2)
+EXPECT "0x000000010000000000000000" echo $actual
+
+actual=$(afr_get_changelog_xattr $obs_path_2 $tgt_xattr_0)
+EXPECT "0x000000000000000000000000|^\$" echo $actual
+
+actual=$(afr_get_changelog_xattr $obs_path_2 $tgt_xattr_1)
+EXPECT "0x000000000000000000000000|^\$" echo $actual
+
+actual=$(afr_get_changelog_xattr $obs_path_2 $tgt_xattr_2)
+EXPECT "0x000000000000000000000000|^\$" echo $actual
+
+if [ "$EXIT_EARLY" = "1" ]; then
+ exit 0;
+fi
+
+## Finish up
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-821056.t b/tests/bugs/replicate/bug-821056.t
new file mode 100644
index 00000000000..81186d86309
--- /dev/null
+++ b/tests/bugs/replicate/bug-821056.t
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 cluster.eager-lock off
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.write-behind on
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+TEST $CLI volume start $V0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=$V0 $M0 --direct-io-mode=enable
+touch $M0/a
+
+#Open file with fd as 5
+exec 5>$M0/a
+realpath=$(gf_get_gfid_backend_file_path $B0/${V0}0 "a")
+
+kill_brick $V0 $H0 $B0/${V0}0
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 "$realpath"
+
+kill_brick $V0 $H0 $B0/${V0}0
+TEST gf_rm_file_and_gfid_link $B0/${V0}0 "a"
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+ls -l $M0/a > /dev/null 2>&1 #Make sure the file is re-created
+EXPECT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 "$realpath"
+EXPECT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $B0/${V0}0/a
+
+for i in {1..1024}; do
+ echo "open sesame" >&5
+done
+
+EXPECT_WITHIN $REOPEN_TIMEOUT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 "$realpath"
+#close the fd
+exec 5>&-
+
+#Check that anon-fd based file is not leaking.
+EXPECT_WITHIN $REOPEN_TIMEOUT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 "$realpath"
+cleanup;
diff --git a/tests/bugs/replicate/bug-830665.t b/tests/bugs/replicate/bug-830665.t
new file mode 100755
index 00000000000..68180424803
--- /dev/null
+++ b/tests/bugs/replicate/bug-830665.t
@@ -0,0 +1,127 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../nfs.rc
+. $(dirname $0)/../../volume.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+function recreate {
+ rm -rf $1 && mkdir -p $1
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+## Start and create a volume
+recreate ${B0}/${V0}-0
+recreate ${B0}/${V0}-1
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}-{0,1}
+TEST $CLI volume set $V0 nfs.disable false
+
+function volinfo_field()
+{
+ local vol=$1;
+ local field=$2;
+
+ $CLI volume info $vol | grep "^$field: " | sed 's/.*: //';
+}
+
+#EXPECT_WITHIN fails the test if the command it executes fails. This function
+#returns "" when the file doesn't exist
+function friendly_cat {
+ if [ ! -f $1 ];
+ then
+ echo "";
+ else
+ cat $1;
+ fi
+}
+
+## Verify volume is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Make sure stat-prefetch doesn't prevent self-heal checks.
+TEST $CLI volume set $V0 performance.stat-prefetch off;
+
+## Make sure automatic self-heal doesn't perturb our results.
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+## Mount NFS
+TEST mount_nfs $H0:/$V0 $N0 nolock;
+
+## Create some files and directories
+echo "test_data" > $N0/a_file;
+mkdir $N0/a_dir;
+echo "more_test_data" > $N0/a_dir/another_file;
+
+## Unmount and stop the volume.
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+TEST $CLI volume stop $V0;
+
+# Recreate the brick. Note that because of http://review.gluster.org/#change,4202
+# we need to preserve and restore the volume ID or else the brick (and thus the
+# entire not-very-HA-any-more volume) won't start. When that bug is fixed, we can
+# remove the [gs]etxattr calls.
+volid=$(getfattr -e hex -n trusted.glusterfs.volume-id $B0/${V0}-0 2> /dev/null \
+ | grep = | cut -d= -f2)
+rm -rf $B0/${V0}-0;
+mkdir $B0/${V0}-0;
+#Ideally, disk replacement is done using reset-brick or replace-brick gluster CLI
+#which will create .glusterfs folder.
+mkdir $B0/${V0}-0/.glusterfs && chmod 600 $B0/${V0}-0/.glusterfs
+
+setfattr -n trusted.glusterfs.volume-id -v $volid $B0/${V0}-0
+
+## Restart and remount. Note that we use actimeo=0 so that the stat calls
+## we need for self-heal don't get blocked by the NFS client.
+TEST $CLI volume start $V0;
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+TEST mount_nfs $H0:/$V0 $N0 nolock,actimeo=0;
+
+## The Linux NFS client has a really charming habit of caching stuff right
+## after mount, even though we set actimeo=0 above. Life would be much easier
+## if NFS developers cared as much about correctness as they do about shaving
+## a few seconds off of benchmarks.
+ls -l $N0 &> /dev/null;
+sleep 5;
+
+## Force entry self-heal.
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST gluster volume heal $V0 full
+#ls -lR $N0 > /dev/null;
+
+## Do NOT check through the NFS mount here. That will force a new self-heal
+## check, but we want to test whether self-heal already happened.
+
+## Make sure everything's in order on the recreated brick.
+EXPECT_WITHIN $HEAL_TIMEOUT 'test_data' friendly_cat $B0/${V0}-0/a_file;
+EXPECT_WITHIN $HEAL_TIMEOUT 'more_test_data' friendly_cat $B0/${V0}-0/a_dir/another_file;
+
+if [ "$EXIT_EARLY" = "1" ]; then
+ exit 0;
+fi
+
+## Finish up
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-859581.t b/tests/bugs/replicate/bug-859581.t
new file mode 100755
index 00000000000..d8b45a257a1
--- /dev/null
+++ b/tests/bugs/replicate/bug-859581.t
@@ -0,0 +1,53 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2}
+EXPECT 'Created' volinfo_field $V0 'Status';
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST glusterfs --direct-io-mode=yes --use-readdirp=no --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0;
+
+mkdir -p $M0/dir1/dir2
+
+TEST rm -f $(gf_get_gfid_backend_file_path $B0/${V0}1 "dir1")
+TEST rmdir $B0/${V0}1/dir1/dir2
+
+TEST stat $M0/dir1/dir2
+
+TEST [ -d $B0/${V0}1/dir1/dir2 ]
+TEST [ ! -d $(gf_get_gfid_backend_file_path $B0/${V0}1 "dir1") ]
+
+# Stop the volume to flush caches and force symlink recreation
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status';
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0;
+
+# Till now, protocol/server was not doing inode linking as part of readdirp.
+# But pas part of user servicable snapshots patcth, changes to do inode linking
+# in protocol/server in readdirp, were introduced. So now to make sure
+# the gfid handle of dir1 is healed, explicit lookup has to be sent on it.
+# Otherwise, whenever ls -l is done just on the mount point $M0, lookup on the
+# entries received as part of readdirp, is not sent, because the inodes for
+# those entries were linked as part of readdirp itself. i.e instead of doing
+# "ls -l $M0", it has to be the below command.
+ls -l $M0/dir1;
+
+TEST [ -h $(gf_get_gfid_backend_file_path $B0/${V0}1 "dir1") ]
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup
+
diff --git a/tests/bugs/replicate/bug-865825.t b/tests/bugs/replicate/bug-865825.t
new file mode 100755
index 00000000000..ffb2e0f6437
--- /dev/null
+++ b/tests/bugs/replicate/bug-865825.t
@@ -0,0 +1,82 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+## Start and create a volume
+mkdir -p ${B0}/${V0}-0
+mkdir -p ${B0}/${V0}-1
+mkdir -p ${B0}/${V0}-2
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}-{0,1,2}
+
+function volinfo_field()
+{
+ local vol=$1;
+ local field=$2;
+
+ $CLI volume info $vol | grep "^$field: " | sed 's/.*: //';
+}
+
+
+## Verify volume is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Make sure io-cache and write-behind don't interfere.
+TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+TEST $CLI volume set $V0 performance.io-cache off;
+TEST $CLI volume set $V0 performance.quick-read off;
+TEST $CLI volume set $V0 performance.write-behind off;
+TEST $CLI volume set $V0 performance.stat-prefetch off
+
+## Make sure automatic self-heal doesn't perturb our results.
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Mount native
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+
+## Create a file with some recognizable contents.
+echo "test_data" > $M0/a_file;
+
+## Unmount.
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+## Mess with the flags as though brick-0 accuses brick-2 while brick-1 is
+## missing its brick-2 changelog altogether.
+value=0x000000010000000000000000
+setfattr -n trusted.afr.${V0}-client-2 -v $value $B0/${V0}-0/a_file
+setfattr -x trusted.afr.${V0}-client-2 $B0/${V0}-1/a_file
+echo "wrong_data" > $B0/${V0}-2/a_file
+
+gluster volume set $V0 cluster.self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+gluster volume heal $V0 full
+
+## Make sure brick 2 now has the correct contents.
+EXPECT_WITHIN $HEAL_TIMEOUT "test_data" cat $B0/${V0}-2/a_file
+
+if [ "$EXIT_EARLY" = "1" ]; then
+ exit 0;
+fi
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-880898.t b/tests/bugs/replicate/bug-880898.t
new file mode 100644
index 00000000000..660d34ca25f
--- /dev/null
+++ b/tests/bugs/replicate/bug-880898.t
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick1 $H0:$B0/brick2
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/brick1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/brick2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+pkill glusterfs
+uuid=""
+for line in $(cat $GLUSTERD_WORKDIR/glusterd.info)
+do
+ if [[ $line == UUID* ]]
+ then
+ uuid=`echo $line | sed -r 's/^.{5}//'`
+ fi
+done
+
+#Command execution should fail reporting that the bricks are not running.
+TEST ! $CLI volume heal $V0 info
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-884328.t b/tests/bugs/replicate/bug-884328.t
new file mode 100644
index 00000000000..acc8e542240
--- /dev/null
+++ b/tests/bugs/replicate/bug-884328.t
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+
+TEST check_option_help_presence "cluster.quorum-type"
+TEST check_option_help_presence "cluster.quorum-count"
+cleanup;
diff --git a/tests/bugs/replicate/bug-886998.t b/tests/bugs/replicate/bug-886998.t
new file mode 100644
index 00000000000..bcac235ff09
--- /dev/null
+++ b/tests/bugs/replicate/bug-886998.t
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+# This tests that the replicate trash directory(.landfill) has following
+# properties.
+# Note: This is to have backward compatibility with 3.3 glusterfs
+# In the latest releases this dir is present inside .glusterfs of brick.
+# 1) lookup of trash dir fails
+# 2) readdir does not show this directory
+# 3) Self-heal does not do any self-heal of these directories.
+gfid1="0xc2e75dde97f346e7842d1076a8e699f8"
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 --direct-io-mode=enable
+
+TEST mkdir $B0/${V0}1/.landfill
+TEST setfattr -n trusted.gfid -v $gfid1 $B0/${V0}1/.landfill
+TEST mkdir $B0/${V0}0/.landfill
+TEST setfattr -n trusted.gfid -v $gfid1 $B0/${V0}0/.landfill
+
+TEST ! stat $M0/.landfill
+EXPECT "" echo $(ls -a $M0 | grep ".landfill")
+
+TEST rmdir $B0/${V0}0/.landfill
+#Force a conservative merge and it should not create .landfill
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000000 $B0/${V0}0/
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}0/
+
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}1/
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000000 $B0/${V0}1/
+
+EXPECT "" echo $(ls -a $M0 | grep ".landfill")
+TEST ! stat $B0/${V0}0/.landfill
+TEST stat $B0/${V0}1/.landfill
+
+#TEST that the dir is not deleted even when xattrs suggest to delete
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000000 $B0/${V0}0/
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}0/
+
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000000 $B0/${V0}1/
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000000 $B0/${V0}1/
+
+EXPECT "" echo $(ls -a $M0 | grep ".landfill")
+TEST ! stat $B0/${V0}0/.landfill
+TEST stat $B0/${V0}1/.landfill
+cleanup;
diff --git a/tests/bugs/replicate/bug-888174.t b/tests/bugs/replicate/bug-888174.t
new file mode 100644
index 00000000000..8c70265513d
--- /dev/null
+++ b/tests/bugs/replicate/bug-888174.t
@@ -0,0 +1,62 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+#This tests if flush, fsync wakes up the delayed post-op or not.
+#If it is not woken up, INODELK from the next command waits
+#for post-op-delay secs. There would be pending changelog even after the command
+#completes.
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/r2_0 $H0:$B0/r2_1
+
+TEST $CLI volume set $V0 cluster.eager-lock on
+
+TEST $CLI volume set $V0 performance.flush-behind off
+EXPECT "off" volume_option $V0 performance.flush-behind
+
+TEST $CLI volume set $V0 cluster.post-op-delay-secs 3
+EXPECT "3" volume_option $V0 cluster.post-op-delay-secs
+
+TEST $CLI volume start $V0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=$V0 $M0
+
+#Check that INODELK MAX latency is not in the order of seconds
+TEST gluster volume profile $V0 start
+for i in {1..5}
+do
+ echo hi > $M0/a
+done
+#Test if the MAX INODELK fop latency is of the order of seconds.
+inodelk_max_latency=$($CLI volume profile $V0 info | grep INODELK | awk 'BEGIN {max = 0} {if ($6 > max) max=$6;} END {print max}' | cut -d. -f 1 | egrep "[0-9]{7,}")
+
+TEST [ -z $inodelk_max_latency ]
+
+TEST dd of=$M0/a if=/dev/urandom bs=1024k count=10 conv=fsync
+#Check for no trace of pending changelog. Flush should make sure of it.
+EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/r2_0/a trusted.afr.dirty
+EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/r2_1/a trusted.afr.dirty
+
+
+dd of=$M0/a if=/dev/urandom bs=1024k count=1024 2>/dev/null &
+p=$!
+#trigger graph switches, tests for fsync not leaving any pending flags
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+
+kill -TERM $p
+#wait for dd to exit
+wait > /dev/null 2>&1
+
+#Goal is to check if there is permanent FOOL changelog
+sleep 5
+EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/r2_0/a trusted.afr.dirty
+EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/r2_1/a trusted.afr.dirty
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-913051.t b/tests/bugs/replicate/bug-913051.t
new file mode 100644
index 00000000000..6794995e6fe
--- /dev/null
+++ b/tests/bugs/replicate/bug-913051.t
@@ -0,0 +1,56 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fileio.rc
+
+cleanup;
+
+#Test that afr opens the file on the bricks that were offline at the time of
+# open after the brick comes online. This tests for writev, readv triggering
+# open-fd-fix in afr.
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+TEST $CLI volume start $V0
+TEST $GFS --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=$V0 --direct-io-mode=enable $M0
+TEST kill_brick $V0 $H0 $B0/${V0}0
+
+TEST mkdir $M0/dir
+TEST touch $M0/dir/a
+TEST touch $M0/dir/b
+echo abc > $M0/dir/b
+
+TEST wfd=`fd_available`
+TEST fd_open $wfd "w" $M0/dir/a
+TEST rfd=`fd_available`
+TEST fd_open $rfd "r" $M0/dir/b
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+#attempt self-heal so that the files are created on brick-0
+
+TEST dd if=$M0/dir/a of=/dev/null bs=1024k
+TEST dd if=$M0/dir/b of=/dev/null bs=1024k
+
+#trigger writev for attempting open-fd-fix in afr
+TEST fd_write $wfd "open sesame"
+
+#trigger readv for attempting open-fd-fix in afr
+TEST fd_cat $rfd
+
+EXPECT_WITHIN $REOPEN_TIMEOUT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 "$realpatha"
+EXPECT_WITHIN $REOPEN_TIMEOUT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 "$realpathb"
+
+TEST fd_close $wfd
+TEST fd_close $rfd
+cleanup;
diff --git a/tests/bugs/replicate/bug-916226.t b/tests/bugs/replicate/bug-916226.t
new file mode 100644
index 00000000000..893905f9a47
--- /dev/null
+++ b/tests/bugs/replicate/bug-916226.t
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}0 $H0:$B0/${V0}1 $H0:$B0/${V0}2 $H0:$B0/${V0}3
+TEST $CLI volume set $V0 cluster.eager-lock on
+TEST $CLI volume start $V0
+
+## Mount FUSE
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+
+TEST mkdir $M0/dir{1..10};
+TEST touch $M0/dir{1..10}/files{1..10};
+
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}4 $H0:/$B0/${V0}5
+
+TEST $CLI volume rebalance $V0 start force
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $V0
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-918437-sh-mtime.t b/tests/bugs/replicate/bug-918437-sh-mtime.t
new file mode 100644
index 00000000000..6a194b14a9b
--- /dev/null
+++ b/tests/bugs/replicate/bug-918437-sh-mtime.t
@@ -0,0 +1,71 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+function get_mtime {
+ local f=$1
+ stat $f | grep Modify | awk '{print $2 $3}' | cut -f1 -d'.'
+}
+
+function file_exists {
+ if [ -f $1 ]; then echo "Y"; else echo "N"; fi
+}
+cleanup;
+
+## Tests if mtime is correct after self-heal.
+TEST glusterd
+TEST pidof glusterd
+TEST mkdir -p $B0/gfs0/brick0{1,2}
+TEST $CLI volume create $V0 replica 2 transport tcp $H0:$B0/gfs0/brick01 $H0:$B0/gfs0/brick02
+TEST $CLI volume set $V0 nfs.disable on
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --direct-io-mode=enable
+# file 'a' is healed from brick02 to brick01 where as file 'b' is healed from
+# brick01 to brick02
+
+TEST cp -p /etc/passwd $M0/a
+TEST cp -p /etc/passwd $M0/b
+
+#Store mtimes before self-heals
+TEST modify_atstamp=$(get_mtime $B0/gfs0/brick02/a)
+TEST modify_btstamp=$(get_mtime $B0/gfs0/brick02/b)
+
+TEST $CLI volume stop $V0
+TEST gf_rm_file_and_gfid_link $B0/gfs0/brick01 a
+TEST gf_rm_file_and_gfid_link $B0/gfs0/brick02 b
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+
+#TODO remove these 2 lines once heal-full is fixed in v2.
+TEST stat $M0/a
+TEST stat $M0/b
+
+TEST gluster volume heal $V0 full
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" file_exists $B0/gfs0/brick01/a
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" file_exists $B0/gfs0/brick02/b
+EXPECT_WITHIN $HEAL_TIMEOUT 0 get_pending_heal_count $V0
+
+size=`stat -c '%s' /etc/passwd`
+EXPECT $size stat -c '%s' $B0/gfs0/brick01/a
+
+TEST modify_atstamp1=$(get_mtime $B0/gfs0/brick01/a)
+TEST modify_atstamp2=$(get_mtime $B0/gfs0/brick02/a)
+EXPECT $modify_atstamp echo $modify_atstamp1
+EXPECT $modify_atstamp echo $modify_atstamp2
+
+TEST modify_btstamp1=$(get_mtime $B0/gfs0/brick01/b)
+TEST modify_btstamp2=$(get_mtime $B0/gfs0/brick02/b)
+EXPECT $modify_btstamp echo $modify_btstamp1
+EXPECT $modify_btstamp echo $modify_btstamp2
+cleanup;
diff --git a/tests/bugs/replicate/bug-921231.t b/tests/bugs/replicate/bug-921231.t
new file mode 100755
index 00000000000..81504612f63
--- /dev/null
+++ b/tests/bugs/replicate/bug-921231.t
@@ -0,0 +1,31 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# This test writes to same file with 2 fds and tests that cluster.eager-lock is not
+# causing extra delay because of post-op-delay-secs
+cleanup;
+
+function write_to_file {
+ dd of=$M0/1 if=/dev/zero bs=1024k count=128 oflag=append 2>&1 >/dev/null
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+TEST $CLI volume set $V0 cluster.eager-lock on
+TEST $CLI volume set $V0 post-op-delay-secs 3
+TEST $CLI volume set $V0 client-log-level DEBUG
+TEST $CLI volume start $V0
+TEST $CLI volume profile $V0 start
+TEST $CLI volume set $V0 ensure-durability off
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+write_to_file &
+write_to_file &
+wait
+#Test if the MAX [F]INODELK fop latency is of the order of seconds.
+inodelk_max_latency=$($CLI volume profile $V0 info | grep INODELK | awk 'BEGIN {max = 0} {if ($6 > max) max=$6;} END {print max}' | cut -d. -f 1 | egrep "[0-9]{7,}")
+TEST [ -z $inodelk_max_latency ]
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-957877.t b/tests/bugs/replicate/bug-957877.t
new file mode 100644
index 00000000000..bcce7e3c9e7
--- /dev/null
+++ b/tests/bugs/replicate/bug-957877.t
@@ -0,0 +1,33 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0;
+
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0;
+kill_brick $V0 $H0 $B0/${V0}0
+TEST touch $M0/f1
+TEST setfattr -n "user.foo" -v "test" $M0/f1
+
+BRICK=$B0"/${V0}1"
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+
+# Wait for self-heal to complete
+EXPECT_WITHIN $HEAL_TIMEOUT '0' count_sh_entries $BRICK;
+
+TEST getfattr -n "user.foo" $B0/${V0}0/f1;
+
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-976800.t b/tests/bugs/replicate/bug-976800.t
new file mode 100644
index 00000000000..27f8b27619e
--- /dev/null
+++ b/tests/bugs/replicate/bug-976800.t
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# This test checks if there are any open fds on the brick
+# even after the file is closed on the mount. This particular
+# test tests dd with "fsync" to check afr's fsync codepath
+cleanup;
+
+function is_fd_open {
+ local v=$1
+ local h=$2
+ local b=$3
+ local bpid=$(get_brick_pid $v $h $b)
+ ls -l /proc/$bpid/fd | grep -w "\-> $b/1"
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 ensure-durability off
+TEST $CLI volume set $V0 cluster.eager-lock off
+TEST $CLI volume set $V0 flush-behind off
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+TEST dd of=$M0/1 if=/dev/zero bs=1k count=1 conv=fsync
+TEST ! is_fd_open $V0 $H0 $B0/${V0}0
+cleanup;
diff --git a/tests/bugs/replicate/bug-977797.t b/tests/bugs/replicate/bug-977797.t
new file mode 100755
index 00000000000..9a8f36c956c
--- /dev/null
+++ b/tests/bugs/replicate/bug-977797.t
@@ -0,0 +1,96 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2};
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 open-behind off
+TEST $CLI volume set $V0 quick-read off
+TEST $CLI volume set $V0 read-ahead off
+TEST $CLI volume set $V0 write-behind off
+TEST $CLI volume set $V0 io-cache off
+TEST $CLI volume set $V0 cluster.data-self-heal on
+TEST $CLI volume set $V0 cluster.metadata-self-heal on
+TEST $CLI volume set $V0 cluster.entry-self-heal on
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+
+TEST mkdir -p $M0/a
+TEST `echo "GLUSTERFS" > $M0/a/file`
+
+TEST kill_brick $V0 $H0 $B0/$V0"1"
+
+TEST chown root $M0/a
+TEST chown root $M0/a/file
+TEST `echo "GLUSTER-FILE-SYSTEM" > $M0/a/file`
+TEST mkdir $M0/a/b
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0;
+
+
+
+TEST kill_brick $V0 $H0 $B0/$V0"2"
+
+TEST chmod 757 $M0/a
+TEST chmod 757 $M0/a/file
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1;
+
+#Trigger entry heal of $M0/a
+getfattr -n user.nosuchattr $M0/a
+dd if=$M0/a/file of=/dev/null bs=1024k
+#read fails, but heal is triggered.
+TEST [ $? -ne 0 ]
+
+EXPECT_WITHIN $HEAL_TIMEOUT "00000000" \
+afr_get_specific_changelog_xattr $B0/$V0"1"/a/file trusted.afr.$V0-client-0 "data"
+
+EXPECT_WITHIN $HEAL_TIMEOUT "00000000" \
+afr_get_specific_changelog_xattr $B0/$V0"1"/a/file trusted.afr.$V0-client-1 "data"
+
+EXPECT_WITHIN $HEAL_TIMEOUT "00000000" \
+afr_get_specific_changelog_xattr $B0/$V0"2"/a/file trusted.afr.$V0-client-0 "data"
+
+EXPECT_WITHIN $HEAL_TIMEOUT "00000000" \
+afr_get_specific_changelog_xattr $B0/$V0"2"/a/file trusted.afr.$V0-client-1 "data"
+
+EXPECT_WITHIN $HEAL_TIMEOUT "00000000" \
+afr_get_specific_changelog_xattr $B0/$V0"1"/a trusted.afr.$V0-client-0 "entry"
+
+EXPECT_WITHIN $HEAL_TIMEOUT "00000000" \
+afr_get_specific_changelog_xattr $B0/$V0"1"/a trusted.afr.$V0-client-1 "entry"
+
+EXPECT_WITHIN $HEAL_TIMEOUT "00000000" \
+afr_get_specific_changelog_xattr $B0/$V0"2"/a trusted.afr.$V0-client-0 "entry"
+
+EXPECT_WITHIN $HEAL_TIMEOUT "00000000" \
+afr_get_specific_changelog_xattr $B0/$V0"2"/a trusted.afr.$V0-client-1 "entry"
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-978794.t b/tests/bugs/replicate/bug-978794.t
new file mode 100644
index 00000000000..8e43e74bf79
--- /dev/null
+++ b/tests/bugs/replicate/bug-978794.t
@@ -0,0 +1,29 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fileio.rc
+
+
+# This test opens 100 fds and triggers graph switches to check if fsync
+# as part of graph-switch causes crash or not.
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST touch $M0/{1..100}
+for i in {1..100}; do fd[$i]=`fd_available`; fd_open ${fd[$i]} 'w' $M0/$i; done
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}{2,3}
+TEST $CLI volume rebalance $V0 start force
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $V0
+TEST cat $M0/{1..100}
+for i in {1..100}; do fd_write ${fd[$i]} 'abc'; done
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}{4,5}
+TEST $CLI volume rebalance $V0 start force
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $V0
+for i in {1..100}; do fd_write ${fd[$i]} 'abc'; done
+TEST cat $M0/{1..100}
+cleanup
diff --git a/tests/bugs/replicate/bug-979365.t b/tests/bugs/replicate/bug-979365.t
new file mode 100755
index 00000000000..c09c7d51772
--- /dev/null
+++ b/tests/bugs/replicate/bug-979365.t
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+#This script checks that ensure-durability option enables/disables afr
+#sending fsyncs
+cleanup;
+
+function num_fsyncs {
+ $CLI volume profile $V0 info | grep -w FSYNC | wc -l
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 ensure-durability on
+TEST $CLI volume set $V0 cluster.eager-lock off
+TEST $CLI volume start $V0
+TEST $CLI volume profile $V0 start
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST dd of=$M0/a if=/dev/zero bs=1024k count=10
+#fsyncs take a while to complete.
+sleep 5
+
+# There can be zero or more fsyncs, depending on the order
+# in which the writes reached the server, in turn deciding
+# whether they were treated as "appending" writes or not.
+
+TEST [[ $(num_fsyncs) -ge 0 ]]
+#Stop the volume to erase the profile info of old operations
+TEST $CLI volume profile $V0 stop
+TEST $CLI volume stop $V0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+#Disable ensure-durability now to disable fsyncs in afr.
+TEST $CLI volume set $V0 ensure-durability off
+TEST $CLI volume start $V0
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST $CLI volume profile $V0 start
+TEST dd of=$M0/a if=/dev/zero bs=1024k count=10
+#fsyncs take a while to complete.
+sleep 5
+TEST [[ $(num_fsyncs) -eq 0 ]]
+
+cleanup
diff --git a/tests/bugs/replicate/bug-986905.t b/tests/bugs/replicate/bug-986905.t
new file mode 100755
index 00000000000..f4f7386ebc4
--- /dev/null
+++ b/tests/bugs/replicate/bug-986905.t
@@ -0,0 +1,27 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+#This script checks if hardlinks that are created while a brick is down are
+#healed properly.
+
+cleanup;
+function get_inum {
+ ls -i $1 | awk '{print $1}'
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST touch $M0/a
+TEST ln $M0/a $M0/link_a
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST ls -l $M0
+inum=$(get_inum $B0/${V0}0/a)
+EXPECT "$inum" get_inum $B0/${V0}0/link_a
+cleanup
diff --git a/tests/bugs/replicate/issue-1254-prioritize-enospc.t b/tests/bugs/replicate/issue-1254-prioritize-enospc.t
new file mode 100644
index 00000000000..fab94b71b27
--- /dev/null
+++ b/tests/bugs/replicate/issue-1254-prioritize-enospc.t
@@ -0,0 +1,80 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+function create_bricks {
+ TEST truncate -s 100M $B0/brick0
+ TEST truncate -s 100M $B0/brick1
+ TEST truncate -s 20M $B0/brick2
+ LO1=`SETUP_LOOP $B0/brick0`
+ TEST [ $? -eq 0 ]
+ TEST MKFS_LOOP $LO1
+ LO2=`SETUP_LOOP $B0/brick1`
+ TEST [ $? -eq 0 ]
+ TEST MKFS_LOOP $LO2
+ LO3=`SETUP_LOOP $B0/brick2`
+ TEST [ $? -eq 0 ]
+ TEST MKFS_LOOP $LO3
+ TEST mkdir -p $B0/${V0}0 $B0/${V0}1 $B0/${V0}2
+ TEST MOUNT_LOOP $LO1 $B0/${V0}0
+ TEST MOUNT_LOOP $LO2 $B0/${V0}1
+ TEST MOUNT_LOOP $LO3 $B0/${V0}2
+}
+
+function create_files {
+ local i=1
+ while (true)
+ do
+ touch $M0/file$i
+ if [ -e $B0/${V0}2/file$i ];
+ then
+ ((i++))
+ else
+ break
+ fi
+ done
+}
+
+TESTS_EXPECTED_IN_LOOP=13
+
+#Arbiter volume: Check for ENOSPC when arbiter brick becomes full#
+TEST glusterd
+create_bricks
+TEST $CLI volume create $V0 replica 3 arbiter 1 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0
+
+create_files
+TEST kill_brick $V0 $H0 $B0/${V0}1
+error1=$(touch $M0/file-1 2>&1)
+EXPECT "No space left on device" echo $error1
+error2=$(mkdir $M0/dir-1 2>&1)
+EXPECT "No space left on device" echo $error2
+error3=$((echo "Test" > $M0/file-3) 2>&1)
+EXPECT "No space left on device" echo $error3
+
+cleanup
+
+#Replica-3 volume: Check for ENOSPC when one of the brick becomes full#
+#Keeping the third brick of lower size to simulate disk full scenario#
+TEST glusterd
+create_bricks
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0
+
+create_files
+TEST kill_brick $V0 $H0 $B0/${V0}1
+error1=$(touch $M0/file-1 2>&1)
+EXPECT "No space left on device" echo $error1
+error2=$(mkdir $M0/dir-1 2>&1)
+EXPECT "No space left on device" echo $error2
+error3=$((cat /dev/zero > $M0/file1) 2>&1)
+EXPECT "No space left on device" echo $error3
+
+cleanup
diff --git a/tests/bugs/replicate/mdata-heal-no-xattrs.t b/tests/bugs/replicate/mdata-heal-no-xattrs.t
new file mode 100644
index 00000000000..d3b0c504c80
--- /dev/null
+++ b/tests/bugs/replicate/mdata-heal-no-xattrs.t
@@ -0,0 +1,59 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2};
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+echo "Data">$M0/FILE
+ret=$?
+TEST [ $ret -eq 0 ]
+
+# Change permission on brick-0: simulates the case where there is metadata
+# mismatch but no pending xattrs. This brick will become the source for heal.
+TEST chmod +x $B0/$V0"0"/FILE
+
+# Add gfid to xattrop
+xattrop_b0=$(afr_get_index_path $B0/$V0"0")
+base_entry_b0=`ls $xattrop_b0`
+gfid_str_FILE=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/FILE))
+TEST ln $xattrop_b0/$base_entry_b0 $xattrop_b0/$gfid_str_FILE
+EXPECT_WITHIN $HEAL_TIMEOUT "^1$" get_pending_heal_count $V0
+
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+# Brick-0 should contain xattrs blaming other 2 bricks.
+# The values will be zero because heal is over.
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0/FILE
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0/FILE
+TEST ! getfattr -n trusted.afr.$V0-client-0 $B0/${V0}0/FILE
+
+# Brick-1 and Brick-2 must not contain any afr xattrs.
+TEST ! getfattr -n trusted.afr.$V0-client-0 $B0/${V0}1/FILE
+TEST ! getfattr -n trusted.afr.$V0-client-1 $B0/${V0}1/FILE
+TEST ! getfattr -n trusted.afr.$V0-client-2 $B0/${V0}1/FILE
+TEST ! getfattr -n trusted.afr.$V0-client-0 $B0/${V0}2/FILE
+TEST ! getfattr -n trusted.afr.$V0-client-1 $B0/${V0}2/FILE
+TEST ! getfattr -n trusted.afr.$V0-client-2 $B0/${V0}2/FILE
+
+# check permission bits.
+EXPECT '755' stat -c %a $B0/${V0}0/FILE
+EXPECT '755' stat -c %a $B0/${V0}1/FILE
+EXPECT '755' stat -c %a $B0/${V0}2/FILE
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+cleanup;
diff --git a/tests/bugs/replicate/ta-inode-refresh-read.t b/tests/bugs/replicate/ta-inode-refresh-read.t
new file mode 100644
index 00000000000..6dd6ff7f163
--- /dev/null
+++ b/tests/bugs/replicate/ta-inode-refresh-read.t
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+# Test read transaction inode refresh logic for thin-arbiter.
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../thin-arbiter.rc
+cleanup;
+TEST ta_create_brick_and_volfile brick0
+TEST ta_create_brick_and_volfile brick1
+TEST ta_create_ta_and_volfile ta
+TEST ta_start_brick_process brick0
+TEST ta_start_brick_process brick1
+TEST ta_start_ta_process ta
+
+TEST ta_create_mount_volfile brick0 brick1 ta
+# Set afr xlator options to choose brick0 as read-subvol.
+sed -i '/iam-self-heal-daemon/a \ option read-subvolume-index 0' $B0/mount.vol
+TEST [ $? -eq 0 ]
+sed -i '/iam-self-heal-daemon/a \ option choose-local false' $B0/mount.vol
+TEST [ $? -eq 0 ]
+
+TEST ta_start_mount_process $M0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_up_status $V0 $M0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "trusted.afr.patchy-ta-2" ls $B0/ta
+
+TEST touch $M0/FILE
+TEST ls $B0/brick0/FILE
+TEST ls $B0/brick1/FILE
+TEST ! ls $B0/ta/FILE
+TEST setfattr -n user.name -v ravi $M0/FILE
+
+# Remove gfid hardlink from brick0 which is the read-subvol for FILE.
+# This triggers inode refresh up on a getfattr and eventually calls
+# afr_ta_read_txn(). Without this patch, afr_ta_read_txn() will again query
+# brick0 causing getfattr to fail.
+TEST rm -f $(gf_get_gfid_backend_file_path $B0/brick0 FILE)
+TEST getfattr -n user.name $M0/FILE
+
+cleanup;
diff --git a/tests/bugs/rpc/bug-1043886.t b/tests/bugs/rpc/bug-1043886.t
new file mode 100755
index 00000000000..c1ea7a71e8b
--- /dev/null
+++ b/tests/bugs/rpc/bug-1043886.t
@@ -0,0 +1,58 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2};
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0
+
+## Mount FUSE with caching disabled
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0;
+
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+
+## Mount volume as NFS export
+TEST mount_nfs $H0:/$V0 $N0 nolock;
+
+# just a random uid/gid
+uid=22162
+gid=5845
+
+mkdir $N0/other;
+chown $uid:$gid $N0/other;
+
+TEST $CLI volume set $V0 server.root-squash on;
+TEST $CLI volume set $V0 server.anonuid $uid;
+TEST $CLI volume set $V0 server.anongid $gid;
+
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+
+# create files and directories in the root of the glusterfs and nfs mount
+# which is owned by root and hence the right behavior is getting EACCESS
+# as the fops are executed as nfsnobody.
+touch $M0/file 2>/dev/null;
+TEST [ $? -ne 0 ]
+mkdir $M0/dir 2>/dev/null;
+TEST [ $? -ne 0 ]
+
+# Here files and directories should be getting created as other directory is owned
+# by tmp_user as server.anonuid and server.anongid have the value of tmp_user uid and gid
+TEST touch $M0/other/file 2>/dev/null;
+TEST [ "$(stat -c %u:%g $N0/other/file)" = "$uid:$gid" ];
+TEST mkdir $M0/other/dir 2>/dev/null;
+TEST [ "$(stat -c %u:%g $N0/other/dir)" = "$uid:$gid" ];
+
+## Before killing daemon to avoid deadlocks
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/bugs/rpc/bug-847624.t b/tests/bugs/rpc/bug-847624.t
new file mode 100755
index 00000000000..fe8fc982887
--- /dev/null
+++ b/tests/bugs/rpc/bug-847624.t
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../nfs.rc
+. $(dirname $0)/../../volume.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup
+
+#1
+TEST glusterd
+TEST pidof glusterd
+#3
+TEST $CLI volume create $V0 $H0:$B0/$V0
+TEST $CLI volume set $V0 nfs.disable off
+TEST $CLI volume set $V0 nfs.drc on
+TEST $CLI volume start $V0
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+TEST mount_nfs $H0:/$V0 $N0 nolock
+cd $N0
+#7
+TEST dbench -t 10 10
+TEST rm -rf $N0/*
+cd
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+#10
+TEST $CLI volume set $V0 nfs.drc-size 10000
+cleanup
diff --git a/tests/bugs/rpc/bug-884452.t b/tests/bugs/rpc/bug-884452.t
new file mode 100644
index 00000000000..c161a68190d
--- /dev/null
+++ b/tests/bugs/rpc/bug-884452.t
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0
+TEST $CLI volume start $V0
+
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+TEST touch $M0/{1..10000}
+
+RUN_LS_LOOP_FILE="$M0/run-ls-loop"
+function ls-loop
+{
+ while [ -f $RUN_LS_LOOP_FILE ]; do
+ ls -lR $M0 1>/dev/null 2>&1
+ done;
+}
+
+touch $RUN_LS_LOOP_FILE
+ls-loop &
+
+function vol-status-loop
+{
+ for i in {1..1000}; do
+ $CLI volume status $V0 clients >/dev/null 2>&1
+ if [ $? -ne 0 ]; then
+ return 1
+ fi
+ done;
+
+ return 0
+}
+
+TEST vol-status-loop
+
+rm -f $RUN_LS_LOOP_FILE
+wait
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+cleanup;
diff --git a/tests/bugs/rpc/bug-921072.t b/tests/bugs/rpc/bug-921072.t
new file mode 100755
index 00000000000..ae7eb0101bc
--- /dev/null
+++ b/tests/bugs/rpc/bug-921072.t
@@ -0,0 +1,132 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../nfs.rc
+. $(dirname $0)/../../volume.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+#1
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+TEST mount_nfs $H0:/$V0 $N0 nolock
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+# based on ip addresses (1-4)
+# case 1: allow only localhost ip
+TEST $CLI volume set $V0 nfs.rpc-auth-allow 127.0.0.1
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+
+TEST mount_nfs localhost:/$V0 $N0 nolock
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+# case 2: allow only non-localhost ip
+TEST $CLI volume set $V0 nfs.rpc-auth-allow 192.168.1.1
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+#11
+TEST ! mount_nfs localhost:/$V0 $N0 nolock
+TEST $CLI volume reset $V0 force
+TEST $CLI volume set $V0 nfs.disable off
+# case 3: reject only localhost ip
+TEST $CLI volume set $V0 nfs.rpc-auth-reject 127.0.0.1
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+
+TEST ! mount_nfs localhost:/$V0 $N0 nolock
+
+# case 4: reject only non-localhost ip
+TEST $CLI volume set $V0 nfs.rpc-auth-reject 192.168.1.1
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+
+TEST mount_nfs localhost:/$V0 $N0 nolock
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+
+
+# NEED TO CHECK BOTH IP AND NAME BASED AUTH.
+# CASES WITH NFS.ADDR-NAMELOOKUP ON (5-12)
+TEST $CLI volume reset $V0 force
+TEST $CLI volume set $V0 nfs.disable off
+TEST $CLI volume set $V0 nfs.addr-namelookup on
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+#20
+TEST mount_nfs localhost:/$V0 $N0 nolock
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+# case 5: allow only localhost
+TEST $CLI volume set $V0 nfs.rpc-auth-allow localhost
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+
+TEST mount_nfs localhost:/$V0 $N0 nolock
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+# case 6: allow only somehost
+TEST $CLI volume set $V0 nfs.rpc-auth-allow somehost
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+
+TEST ! mount_nfs localhost:/$V0 $N0 nolock
+
+# case 7: reject only localhost
+TEST $CLI volume reset $V0 force
+TEST $CLI volume set $V0 nfs.disable off
+TEST $CLI volume set $V0 nfs.addr-namelookup on
+TEST $CLI volume set $V0 nfs.rpc-auth-reject localhost
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+#30
+TEST ! mount_nfs localhost:/$V0 $N0 nolock
+
+# case 8: reject only somehost
+TEST $CLI volume set $V0 nfs.rpc-auth-reject somehost
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+
+TEST mount_nfs localhost:/$V0 $N0 nolock
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+# based on ip addresses: repeat of cases 1-4
+# case 9: allow only localhost ip
+TEST $CLI volume reset $V0 force
+TEST $CLI volume set $V0 nfs.disable off
+TEST $CLI volume set $V0 nfs.addr-namelookup on
+TEST $CLI volume set $V0 nfs.rpc-auth-allow 127.0.0.1
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+
+TEST mount_nfs localhost:/$V0 $N0 nolock
+TEST mkdir -p $N0/subdir
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+# case 10: allow a non-localhost ip
+TEST $CLI volume set $V0 nfs.rpc-auth-allow 192.168.1.1
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+#41
+TEST ! mount_nfs localhost:/$V0 $N0 nolock
+
+# case 11: reject only localhost ip
+TEST $CLI volume reset $V0 force
+TEST $CLI volume set $V0 nfs.disable off
+TEST $CLI volume set $V0 nfs.addr-namelookup on
+TEST $CLI volume set $V0 nfs.rpc-auth-reject 127.0.0.1
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+
+TEST ! mount_nfs localhost:/$V0 $N0 nolock
+TEST ! mount_nfs localhost:/$V0/subdir $N0 nolock
+
+# case 12: reject only non-localhost ip
+TEST $CLI volume set $V0 nfs.rpc-auth-reject 192.168.1.1
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+
+TEST mount_nfs localhost:/$V0 $N0 nolock
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+TEST mount_nfs localhost:/$V0/subdir $N0 nolock
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+TEST $CLI volume stop --mode=script $V0
+#52
+TEST $CLI volume delete --mode=script $V0
+cleanup
diff --git a/tests/bugs/rpc/bug-954057.t b/tests/bugs/rpc/bug-954057.t
new file mode 100755
index 00000000000..40acdc2fdc7
--- /dev/null
+++ b/tests/bugs/rpc/bug-954057.t
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# This script checks if use-readdirp option works as accepted in mount options
+
+# Note on re-reading $M0/new after enabling root-squash:
+# Since we have readen it once, the file is present in various caches.
+# In order to actually fail on second attempt we must:
+# 1) drop kernel cache
+# 2) make sure FUSE does not cache the entry. This is also
+# in the kernel, but not flushed by a failed umount.
+# Using $GFS enforces this because it sets --entry-timeout=0
+# 3) make sure reading new permissins does not produce stale
+# information from glusterfs metadata cache. Setting volume
+# option performance.stat-prefetch off enforces that.
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+TEST mkdir $M0/dir
+TEST mkdir $M0/nobody
+grep nfsnobody /etc/passwd > /dev/null
+if [ $? -eq 1 ]; then
+usr=nobody
+grp=nobody
+else
+usr=nfsnobody
+grp=nfsnobody
+fi
+TEST chown $usr:$grp $M0/nobody
+TEST `echo "file" >> $M0/file`
+TEST cp $M0/file $M0/new
+TEST chmod 700 $M0/new
+TEST cat $M0/new
+
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 server.root-squash enable
+drop_cache $M0
+TEST ! mkdir $M0/other
+TEST mkdir $M0/nobody/other
+TEST cat $M0/file
+TEST ! cat $M0/new
+TEST `echo "nobody" >> $M0/nobody/file`
+
+#mount the client without root-squashing
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --no-root-squash=yes $M1
+TEST mkdir $M1/m1_dir
+TEST `echo "file" >> $M1/m1_file`
+TEST cp $M0/file $M1/new
+TEST chmod 700 $M1/new
+TEST cat $M1/new
+
+TEST $CLI volume set $V0 server.root-squash disable
+TEST mkdir $M0/other
+TEST cat $M0/new
+
+cleanup
diff --git a/tests/bugs/shard/bug-1245547.t b/tests/bugs/shard/bug-1245547.t
new file mode 100644
index 00000000000..3c46785d10f
--- /dev/null
+++ b/tests/bugs/shard/bug-1245547.t
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+#Create a file.
+TEST touch $M0/foo
+#Write some data into it.
+TEST `echo "abc" > $M0/foo`
+
+#This should ensure /.shard is created on the bricks.
+TEST stat $B0/${V0}0/.shard
+TEST stat $B0/${V0}1/.shard
+
+#Create a file 'bar' with holes.
+TEST touch $M0/bar
+TEST truncate -s 10G $M0/bar
+#Unlink on such a file should succeed.
+TEST unlink $M0/bar
+
+#Create a file 'baz' with holes.
+TEST touch $M0/baz
+TEST truncate -s 10G $M0/baz
+#Rename with a sharded existing dest that has holes must succeed.
+TEST mv -f $M0/foo $M0/baz
+
+cleanup
diff --git a/tests/bugs/shard/bug-1248887.t b/tests/bugs/shard/bug-1248887.t
new file mode 100644
index 00000000000..2c51f7ce0e8
--- /dev/null
+++ b/tests/bugs/shard/bug-1248887.t
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 features.shard-block-size 4MB
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+#Create a file.
+TEST touch $M0/foo
+#Write some data into it.
+TEST `echo "abc" > $M0/foo`
+EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/foo
+EXPECT "0000000000000004000000000000000000000000000000010000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/foo
+EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/foo
+EXPECT "0000000000000004000000000000000000000000000000010000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/foo
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST `echo "abc" >> $M0/foo`
+EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/foo
+EXPECT "0000000000000008000000000000000000000000000000010000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/foo
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/foo
+EXPECT "0000000000000008000000000000000000000000000000010000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/foo
+EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/foo
+EXPECT "0000000000000008000000000000000000000000000000010000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/foo
+
+cleanup;
diff --git a/tests/bugs/shard/bug-1250855.t b/tests/bugs/shard/bug-1250855.t
new file mode 100644
index 00000000000..b8bc3b42513
--- /dev/null
+++ b/tests/bugs/shard/bug-1250855.t
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+TESTS_EXPECTED_IN_LOOP=40
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+TEST mkdir $M0/dir
+
+for i in {1..20}; do
+ TEST_IN_LOOP touch $M0/dir/$i;
+done
+
+TEST $CLI volume set $V0 features.shard on
+
+TEST ls $M0
+TEST ls $M0/dir
+
+for i in {1..10}; do
+ TEST_IN_LOOP mv $M0/dir/$i $M0/dir/$i-sharded;
+done
+
+for i in {11..20}; do
+ TEST_IN_LOOP unlink $M0/dir/$i;
+done
+
+cleanup;
diff --git a/tests/bugs/shard/bug-1251824.t b/tests/bugs/shard/bug-1251824.t
new file mode 100644
index 00000000000..d81685d01de
--- /dev/null
+++ b/tests/bugs/shard/bug-1251824.t
@@ -0,0 +1,109 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../common-utils.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1,2,3}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 features.shard-block-size 4MB
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0
+TEST useradd -M test_user 2>/dev/null
+
+# Create 3 files as root.
+TEST touch $M0/foo
+TEST touch $M0/bar
+TEST touch $M0/baz
+TEST touch $M0/qux
+TEST mkdir $M0/dir
+
+# Change ownership to non-root on foo and bar.
+TEST chown test_user:test_user $M0/foo
+TEST chown test_user:test_user $M0/bar
+
+# Write 6M of data on foo as non-root, 2M overflowing into block-1.
+TEST run_cmd_as_user test_user "dd if=/dev/zero of=$M0/foo bs=1M count=6"
+
+# Ensure owner and group are root on the block-1 shard.
+gfid_foo=$(get_gfid_string $M0/foo)
+
+EXPECT "root" echo `find $B0 -name $gfid_foo.1 | xargs stat -c %U`
+EXPECT "root" echo `find $B0 -name $gfid_foo.1 | xargs stat -c %G`
+
+#Ensure /.shard is owned by root.
+EXPECT "root" echo `find $B0/${V0}0 -name .shard | xargs stat -c %U`
+EXPECT "root" echo `find $B0/${V0}0 -name .shard | xargs stat -c %G`
+EXPECT "root" echo `find $B0/${V0}1 -name .shard | xargs stat -c %U`
+EXPECT "root" echo `find $B0/${V0}1 -name .shard | xargs stat -c %G`
+EXPECT "root" echo `find $B0/${V0}2 -name .shard | xargs stat -c %U`
+EXPECT "root" echo `find $B0/${V0}2 -name .shard | xargs stat -c %G`
+EXPECT "root" echo `find $B0/${V0}3 -name .shard | xargs stat -c %U`
+EXPECT "root" echo `find $B0/${V0}3 -name .shard | xargs stat -c %G`
+
+# Write 6M of data on bar as root.
+TEST dd if=/dev/zero of=$M0/bar bs=1M count=6
+
+# Ensure owner and group are root on the block-1 shard.
+gfid_bar=$(get_gfid_string $M0/bar)
+
+EXPECT "root" echo `find $B0 -name $gfid_bar.1 | xargs stat -c %U`
+EXPECT "root" echo `find $B0 -name $gfid_bar.1 | xargs stat -c %G`
+
+# Write 6M of data on baz as root.
+TEST dd if=/dev/zero of=$M0/baz bs=1M count=6
+
+gfid_baz=$(get_gfid_string $M0/baz)
+
+# Ensure owner and group are root on the block-1 shard.
+EXPECT "root" echo `find $B0 -name $gfid_baz.1 | xargs stat -c %U`
+EXPECT "root" echo `find $B0 -name $gfid_baz.1 | xargs stat -c %G`
+
+# Test to ensure unlink from an unauthorized user does not lead to only
+# the shards under /.shard getting unlinked while that on the base file fails
+# with EPERM/ACCES.
+
+TEST ! run_cmd_as_user test_user "unlink $M0/baz"
+TEST find $B0/*/.shard/$gfid_baz.1
+
+# Test to ensure rename of a file where the dest file exists and is sharded,
+# from an unauthorized user does not lead to only the shards under /.shard
+# getting unlinked while that on the base file fails with EPERM/ACCES.
+
+TEST ! run_cmd_as_user test_user "mv -f $M0/qux $M0/baz"
+TEST find $B0/*/.shard/$gfid_baz.1
+TEST stat $M0/qux
+
+# Shard translator executes steps in the following order while doing a truncate
+# to a lower size:
+# 1) unlinking shards under /.shard first with frame->root->{uid,gid} being 0,
+# 2) truncate the original file by the right amount.
+# The following two tests are towards ensuring that truncate attempt from an
+# unauthorised user doesn't result in only the shards under /.shard getting
+# removed (since they're being performed as root) while step 2) above fails,
+# leaving the file in an inconsistent state.
+
+TEST ! run_cmd_as_user test_user "truncate -s 1M $M0/baz"
+TEST find $B0/*/.shard/$gfid_baz.1
+
+# Perform a cp as non-root user. This should trigger readv() which will trigger
+# reads on first shard of "foo" under /.shard, and this must not fail if shard
+# translator correctly sets frame->root->uid,gid to 0 before reading off the
+# first shard, since it's owned by root.
+TEST chown test_user:test_user $M0/dir
+TEST run_cmd_as_user test_user "cp $M0/foo $M0/dir/quux"
+
+md5sum_foo=$(md5sum $M0/foo | awk '{print $1}')
+EXPECT "$md5sum_foo" echo `md5sum $M0/dir/quux | awk '{print $1}'`
+
+userdel test_user
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/bugs/shard/bug-1256580.t b/tests/bugs/shard/bug-1256580.t
new file mode 100644
index 00000000000..279fcc54e48
--- /dev/null
+++ b/tests/bugs/shard/bug-1256580.t
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1,2,3}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0
+
+TEST mkdir $M0/dir
+TEST touch $M0/dir/file
+
+# Create "file" with holes.
+TEST truncate -s 6M $M0/dir/file
+EXPECT '6291456' stat -c %s $M0/dir/file
+
+# Perform writes that do not cross the 6M boundary
+TEST dd if=/dev/zero of=$M0/dir/file bs=1024 seek=3072 count=2048 conv=notrunc
+
+# Ensure that the file size is 6M (as opposed to 8M that would appear in the
+# presence of this bug).
+EXPECT '6291456' stat -c %s $M0/dir/file
+
+#Extend the write beyond EOF such that it again creates a hole of 1M size
+TEST dd if=/dev/zero of=$M0/dir/file bs=1024 seek=7168 count=2048 conv=notrunc
+
+# Ensure that the file size is not greater than 9M.
+EXPECT '9437184' stat -c %s $M0/dir/file
+cleanup
diff --git a/tests/bugs/shard/bug-1258334.t b/tests/bugs/shard/bug-1258334.t
new file mode 100644
index 00000000000..94ed822aae8
--- /dev/null
+++ b/tests/bugs/shard/bug-1258334.t
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1,2,3}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0
+
+TEST mkdir $M0/dir
+TEST touch $M0/dir/foo
+TEST touch $M0/dir/bar
+TEST touch $M0/dir/new
+
+TEST truncate -s 14M $M0/dir/foo
+TEST truncate -s 14M $M0/dir/bar
+
+# Perform writes that fall on the 2nd block of "foo" (counting from 0)
+TEST dd if=/dev/zero of=$M0/dir/foo bs=1024 seek=10240 count=2048 conv=notrunc
+
+# Perform writes that fall on the 2nd block of "bar" (counting from 0)
+TEST dd if=/dev/zero of=$M0/dir/bar bs=1024 seek=10240 count=2048 conv=notrunc
+
+# Now unlink "foo". If the bug exists, it should fail with EINVAL.
+TEST unlink $M0/dir/foo
+
+# Now rename "new" to "bar". If the bug exists, it should fail with EINVAL.
+TEST mv -f $M0/dir/new $M0/dir/bar
+
+TEST dd if=/dev/zero of=$M0/dir/new bs=1024 count=5120
+
+# Now test that this fix does not break unlink of files without holes
+TEST unlink $M0/dir/new
+
+cleanup
diff --git a/tests/bugs/shard/bug-1259651.t b/tests/bugs/shard/bug-1259651.t
new file mode 100644
index 00000000000..72856fdbaad
--- /dev/null
+++ b/tests/bugs/shard/bug-1259651.t
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1,2,3}
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0
+
+TEST mkdir $M0/dir
+TEST touch $M0/dir/file_plain
+
+# Create "file_plain" with holes.
+TEST truncate -s 12M $M0/dir/file_plain
+
+# Perform writes on it that would create holes.
+TEST dd if=/dev/zero of=$M0/dir/file_plain bs=1024 seek=10240 count=1024 conv=notrunc
+
+md5sum_file_plain=$(md5sum $M0/dir/file_plain | awk '{print $1}')
+
+# Now enable sharding on the volume.
+TEST $CLI volume set $V0 features.shard on
+
+# Create a sharded file called "file_sharded"
+TEST touch $M0/dir/file_sharded
+
+# Truncate it to make it sparse
+TEST truncate -s 12M $M0/dir/file_sharded
+
+# Perform writes on it that would create holes in block-0 and block-1.
+TEST dd if=/dev/zero of=$M0/dir/file_sharded bs=1024 seek=10240 count=1024 conv=notrunc
+
+# If this bug is fixed, md5sum of file_sharded and file_plain should be same.
+EXPECT "$md5sum_file_plain" echo `md5sum $M0/dir/file_sharded | awk '{print $1}'`
+
+cleanup
diff --git a/tests/bugs/shard/bug-1260637.t b/tests/bugs/shard/bug-1260637.t
new file mode 100644
index 00000000000..21008ee19dd
--- /dev/null
+++ b/tests/bugs/shard/bug-1260637.t
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}0
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0
+
+# Create a file.
+TEST touch $M0/foo
+
+# Check that the shard xattrs are set in the backend.
+TEST getfattr -n trusted.glusterfs.shard.block-size $B0/${V0}0/foo
+TEST getfattr -n trusted.glusterfs.shard.file-size $B0/${V0}0/foo
+
+# Verify that shard xattrs are not exposed on the mount.
+TEST ! getfattr -n trusted.glusterfs.shard.block-size $M0/foo
+TEST ! getfattr -n trusted.glusterfs.shard.file-size $M0/foo
+
+# Verify that shard xattrs cannot be set from the mount.
+TEST ! setfattr -n trusted.glusterfs.shard.block-size -v "123" $M0/foo
+TEST ! setfattr -n trusted.glusterfs.shard.file-size -v "123" $M0/foo
+
+# Verify that shard xattrs cannot be removed from the mount.
+TEST ! setfattr -x trusted.glusterfs.shard.block-size $M0/foo
+TEST ! setfattr -x trusted.glusterfs.shard.file-size $M0/foo
+
+# Verify that shard xattrs are not listed when listxattr is triggered.
+TEST ! "getfattr -d -m . $M0/foo | grep shard"
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup
diff --git a/tests/bugs/shard/bug-1261773.t b/tests/bugs/shard/bug-1261773.t
new file mode 100644
index 00000000000..46d5a8b91c9
--- /dev/null
+++ b/tests/bugs/shard/bug-1261773.t
@@ -0,0 +1,14 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST check_option_help_presence "features.shard"
+TEST check_option_help_presence "features.shard-block-size"
+
+cleanup
diff --git a/tests/bugs/shard/bug-1272986.t b/tests/bugs/shard/bug-1272986.t
new file mode 100644
index 00000000000..66e896ad0c4
--- /dev/null
+++ b/tests/bugs/shard/bug-1272986.t
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1,2,3}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume start $V0
+
+# $M0 is where the reads will be done and $M1 is where files will be created,
+# written to, etc.
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M1
+
+# Write some data into a file, such that its size crosses the shard block size.
+TEST dd if=/dev/urandom of=$M1/file bs=1M count=5 conv=notrunc oflag=direct
+
+md5sum1_reader=$(md5sum $M0/file | awk '{print $1}')
+
+EXPECT "$md5sum1_reader" echo `md5sum $M1/file | awk '{print $1}'`
+
+# Append some more data into the file.
+TEST dd if=/dev/urandom of=$M1/file bs=256k count=1 conv=notrunc oflag=direct
+
+md5sum2_reader=$(dd if=$M0/file iflag=direct bs=256k| md5sum | awk '{print $1}')
+
+# Test to see if the reader refreshes its cache correctly as part of the reads
+# triggered through md5sum. If it does, then the md5sum on the reader and writer
+# must match.
+EXPECT "$md5sum2_reader" echo `md5sum $M1/file | awk '{print $1}'`
+
+cleanup
diff --git a/tests/bugs/shard/bug-1342298.t b/tests/bugs/shard/bug-1342298.t
new file mode 100644
index 00000000000..ecd7720e8db
--- /dev/null
+++ b/tests/bugs/shard/bug-1342298.t
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+echo a > $M0/a
+TEST dd if=$M0/a of=/dev/null bs=4096 count=1 iflag=direct
+
+cleanup;
diff --git a/tests/bugs/shard/bug-1468483.t b/tests/bugs/shard/bug-1468483.t
new file mode 100644
index 00000000000..e462b8d54d5
--- /dev/null
+++ b/tests/bugs/shard/bug-1468483.t
@@ -0,0 +1,58 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../common-utils.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}0
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 features.shard-block-size 16MB
+TEST $CLI volume start $V0
+TEST $CLI volume profile $V0 start
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+TEST dd if=/dev/zero conv=fsync of=$M0/foo bs=1M count=100
+
+#This should ensure /.shard is created on the bricks.
+TEST stat $B0/${V0}0/.shard
+
+gfid_foo=$(get_gfid_string $M0/foo)
+
+TEST stat $B0/${V0}0/.shard/$gfid_foo.1
+TEST stat $B0/${V0}0/.shard/$gfid_foo.2
+TEST stat $B0/${V0}0/.shard/$gfid_foo.3
+TEST stat $B0/${V0}0/.shard/$gfid_foo.4
+TEST stat $B0/${V0}0/.shard/$gfid_foo.5
+TEST stat $B0/${V0}0/.shard/$gfid_foo.6
+
+# For a file with 7 shards, there should be 7 fsyncs on the brick. Without this
+# fix, I was seeing only 1 fsync (on the base shard alone).
+
+EXPECT "7" echo `$CLI volume profile $V0 info incremental | grep -w FSYNC | awk '{print $8}'`
+
+useradd -M test_user 2>/dev/null
+
+TEST touch $M0/bar
+
+# Change ownership to non-root on bar.
+TEST chown test_user:test_user $M0/bar
+
+TEST $CLI volume profile $V0 stop
+TEST $CLI volume profile $V0 start
+
+# Write 100M of data on bar as non-root.
+TEST run_cmd_as_user test_user "dd if=/dev/zero conv=fsync of=$M0/bar bs=1M count=100"
+
+EXPECT "7" echo `$CLI volume profile $V0 info incremental | grep -w FSYNC | awk '{print $8}'`
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+userdel test_user
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup
diff --git a/tests/bugs/shard/bug-1488546.t b/tests/bugs/shard/bug-1488546.t
new file mode 100644
index 00000000000..60480dc55e5
--- /dev/null
+++ b/tests/bugs/shard/bug-1488546.t
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 md-cache-timeout 60
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0
+
+TEST dd if=/dev/zero of=$M0/file bs=1M count=20
+TEST ln $M0/file $M0/linkey
+
+EXPECT "20971520" stat -c %s $M0/linkey
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/bugs/shard/bug-1568521-EEXIST.t b/tests/bugs/shard/bug-1568521-EEXIST.t
new file mode 100644
index 00000000000..2f9f165aa63
--- /dev/null
+++ b/tests/bugs/shard/bug-1568521-EEXIST.t
@@ -0,0 +1,91 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+function get_file_count {
+ ls $1* | wc -l
+}
+
+FILE_COUNT_TIME=5
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 features.shard-block-size 4MB
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0
+
+TEST mkdir $M0/dir
+# Unlink a temporary file to trigger creation of .remove_me
+TEST touch $M0/tmp
+TEST unlink $M0/tmp
+
+TEST stat $B0/${V0}0/.shard/.remove_me
+TEST stat $B0/${V0}1/.shard/.remove_me
+
+TEST dd if=/dev/zero of=$M0/dir/file bs=1024 count=9216
+gfid_file=$(get_gfid_string $M0/dir/file)
+
+# Create marker file from the backend to simulate ENODATA.
+touch $B0/${V0}0/.shard/.remove_me/$gfid_file
+touch $B0/${V0}1/.shard/.remove_me/$gfid_file
+
+# Set block and file size to incorrect values of 64MB and 5MB to simulate "stale xattrs" case
+# and confirm that the correct values are set when the actual unlink takes place
+
+TEST setfattr -n trusted.glusterfs.shard.block-size -v 0x0000000004000000 $B0/${V0}0/.shard/.remove_me/$gfid_file
+TEST setfattr -n trusted.glusterfs.shard.block-size -v 0x0000000004000000 $B0/${V0}1/.shard/.remove_me/$gfid_file
+
+TEST setfattr -n trusted.glusterfs.shard.file-size -v 0x0000000000500000000000000000000000000000000000000000000000000000 $B0/${V0}0/.shard/.remove_me/$gfid_file
+TEST setfattr -n trusted.glusterfs.shard.file-size -v 0x0000000000500000000000000000000000000000000000000000000000000000 $B0/${V0}1/.shard/.remove_me/$gfid_file
+
+# Sleep for 2 seconds to prevent posix_gfid_heal() from believing marker file is "fresh" and failing lookup with ENOENT
+sleep 2
+
+TEST unlink $M0/dir/file
+TEST ! stat $B0/${V0}0/dir/file
+TEST ! stat $B0/${V0}1/dir/file
+
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_file
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_file
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/$gfid_file
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/$gfid_file
+
+##############################
+### Repeat test for rename ###
+##############################
+
+TEST touch $M0/src
+TEST dd if=/dev/zero of=$M0/dir/dst bs=1024 count=9216
+gfid_dst=$(get_gfid_string $M0/dir/dst)
+
+# Create marker file from the backend to simulate ENODATA.
+touch $B0/${V0}0/.shard/.remove_me/$gfid_dst
+touch $B0/${V0}1/.shard/.remove_me/$gfid_dst
+
+# Set block and file size to incorrect values of 64MB and 5MB to simulate "stale xattrs" case
+# and confirm that the correct values are set when the actual unlink takes place
+
+TEST setfattr -n trusted.glusterfs.shard.block-size -v 0x0000000004000000 $B0/${V0}0/.shard/.remove_me/$gfid_dst
+TEST setfattr -n trusted.glusterfs.shard.block-size -v 0x0000000004000000 $B0/${V0}1/.shard/.remove_me/$gfid_dst
+
+TEST setfattr -n trusted.glusterfs.shard.file-size -v 0x0000000000500000000000000000000000000000000000000000000000000000 $B0/${V0}0/.shard/.remove_me/$gfid_dst
+TEST setfattr -n trusted.glusterfs.shard.file-size -v 0x0000000000500000000000000000000000000000000000000000000000000000 $B0/${V0}1/.shard/.remove_me/$gfid_dst
+
+# Sleep for 2 seconds to prevent posix_gfid_heal() from believing marker file is "fresh" and failing lookup with ENOENT
+sleep 2
+
+TEST mv -f $M0/src $M0/dir/dst
+TEST ! stat $B0/${V0}0/src
+TEST ! stat $B0/${V0}1/src
+
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/$gfid_dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/$gfid_dst
+
+cleanup
diff --git a/tests/bugs/shard/bug-1568521.t b/tests/bugs/shard/bug-1568521.t
new file mode 100644
index 00000000000..167fb635ac8
--- /dev/null
+++ b/tests/bugs/shard/bug-1568521.t
@@ -0,0 +1,53 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+
+function delete_files {
+ local mountpoint=$1;
+ local success=0;
+ local value=$2
+ for i in {1..500}; do
+ unlink $mountpoint/file-$i 2>/dev/null 1>/dev/null
+ if [ $? -eq 0 ]; then
+ echo $2 >> $B0/output.txt
+ fi
+ done
+ echo $success
+}
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 shard-block-size 4MB
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M1
+
+for i in {1..500}; do
+ dd if=/dev/urandom of=$M0/file-$i bs=1M count=2
+done
+
+for i in {1..500}; do
+ stat $M1/file-$i > /dev/null
+done
+
+delete_files $M0 0 &
+delete_files $M1 1 &
+wait
+
+success1=$(grep 0 $B0/output.txt | wc -l);
+success2=$(grep 1 $B0/output.txt | wc -l);
+
+echo "Success1 is $success1";
+echo "Success2 is $success2";
+
+success_total=$((success1 + success2));
+
+EXPECT 500 echo $success_total
+
+cleanup
diff --git a/tests/bugs/shard/bug-1605056-2.t b/tests/bugs/shard/bug-1605056-2.t
new file mode 100644
index 00000000000..a9c10fec3ea
--- /dev/null
+++ b/tests/bugs/shard/bug-1605056-2.t
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 features.shard-block-size 4MB
+TEST $CLI volume set $V0 features.shard-lru-limit 25
+TEST $CLI volume set $V0 performance.write-behind off
+
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+
+# Perform a write that would cause 25 shards to be created under .shard
+TEST dd if=/dev/zero of=$M0/foo bs=1M count=104
+
+# Write into another file bar to ensure all of foo's shards are evicted from lru list of $M0
+TEST dd if=/dev/zero of=$M0/bar bs=1M count=104
+
+# Delete foo from $M0. If there's a bug, the mount will crash.
+TEST unlink $M0/foo
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup
diff --git a/tests/bugs/shard/bug-1605056.t b/tests/bugs/shard/bug-1605056.t
new file mode 100644
index 00000000000..c2329ea79f8
--- /dev/null
+++ b/tests/bugs/shard/bug-1605056.t
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+SHARD_COUNT_TIME=5
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 features.shard-block-size 4MB
+TEST $CLI volume set $V0 features.shard-lru-limit 25
+TEST $CLI volume set $V0 performance.write-behind off
+
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M1
+
+# Perform a write that would cause 25 shards to be created under .shard
+TEST dd if=/dev/zero of=$M0/foo bs=1M count=104
+
+# Read the file from $M1, indirectly filling up the lru list.
+TEST `cat $M1/foo > /dev/null`
+statedump=$(generate_mount_statedump $V0 $M1)
+sleep 1
+EXPECT "25" echo $(grep "inode-count" $statedump | cut -f2 -d'=' | tail -1)
+rm -f $statedump
+
+# Delete foo from $M0.
+TEST unlink $M0/foo
+
+# Send stat on foo from $M1 to force $M1 to "forget" inode associated with foo.
+# Now the ghost shards associated with "foo" are still in lru list of $M1.
+TEST ! stat $M1/foo
+
+# Let's force the ghost shards of "foo" out of lru list by looking up more shards
+# through I/O on a file named "bar" from $M1. This should crash if the base inode
+# had been destroyed by now.
+
+TEST dd if=/dev/zero of=$M1/bar bs=1M count=104
+
+###############################################
+#### Now for some inode ref-leak tests ... ####
+###############################################
+
+# Expect there to be 29 active inodes - 26 belonging to "bar", 1 for .shard,
+# 1 for .shard/remove_me and 1 for '/'
+EXPECT_WITHIN $SHARD_COUNT_TIME `expr 26 + 3` get_mount_active_size_value $V0 $M1
+
+TEST rm -f $M1/bar
+EXPECT_WITHIN $SHARD_COUNT_TIME 3 get_mount_active_size_value $V0 $M1
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M1
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup
diff --git a/tests/bugs/shard/bug-1669077.t b/tests/bugs/shard/bug-1669077.t
new file mode 100644
index 00000000000..8d3a67a36be
--- /dev/null
+++ b/tests/bugs/shard/bug-1669077.t
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+SHARD_COUNT_TIME=5
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 features.shard-block-size 4MB
+TEST $CLI volume set $V0 features.shard-lru-limit 25
+
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+
+# If the bug still exists, client should crash during fallocate below
+TEST fallocate -l 200M $M0/foo
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup
diff --git a/tests/bugs/shard/bug-1696136-lru-limit-equals-deletion-rate.t b/tests/bugs/shard/bug-1696136-lru-limit-equals-deletion-rate.t
new file mode 100644
index 00000000000..3e4a65af19a
--- /dev/null
+++ b/tests/bugs/shard/bug-1696136-lru-limit-equals-deletion-rate.t
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fallocate.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 features.shard-block-size 4MB
+TEST $CLI volume set $V0 features.shard-lru-limit 120
+TEST $CLI volume set $V0 features.shard-deletion-rate 120
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+
+TEST build_tester $(dirname $0)/bug-1696136.c -lgfapi -Wall -O2
+
+# Create a file
+TEST touch $M0/file1
+
+# Fallocate a 500M file. This will make sure number of participant shards are > lru-limit
+TEST $(dirname $0)/bug-1696136 $H0 $V0 "0" "0" "536870912" /file1 `gluster --print-logdir`/glfs-$V0.log
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+rm -f $(dirname $0)/bug-1696136
+
+cleanup
diff --git a/tests/bugs/shard/bug-1696136.c b/tests/bugs/shard/bug-1696136.c
new file mode 100644
index 00000000000..cb650535b09
--- /dev/null
+++ b/tests/bugs/shard/bug-1696136.c
@@ -0,0 +1,122 @@
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+enum fallocate_flag {
+ TEST_FALLOCATE_NONE,
+ TEST_FALLOCATE_KEEP_SIZE,
+ TEST_FALLOCATE_ZERO_RANGE,
+ TEST_FALLOCATE_PUNCH_HOLE,
+ TEST_FALLOCATE_MAX,
+};
+
+int
+get_fallocate_flag(int opcode)
+{
+ int ret = 0;
+
+ switch (opcode) {
+ case TEST_FALLOCATE_NONE:
+ ret = 0;
+ break;
+ case TEST_FALLOCATE_KEEP_SIZE:
+ ret = FALLOC_FL_KEEP_SIZE;
+ break;
+ case TEST_FALLOCATE_ZERO_RANGE:
+ ret = FALLOC_FL_ZERO_RANGE;
+ break;
+ case TEST_FALLOCATE_PUNCH_HOLE:
+ ret = FALLOC_FL_PUNCH_HOLE;
+ break;
+ default:
+ ret = -1;
+ break;
+ }
+ return ret;
+}
+
+int
+main(int argc, char *argv[])
+{
+ int ret = 1;
+ int opcode = -1;
+ off_t offset = 0;
+ size_t len = 0;
+ glfs_t *fs = NULL;
+ glfs_fd_t *fd = NULL;
+
+ if (argc != 8) {
+ fprintf(stderr,
+ "Syntax: %s <host> <volname> <opcode> <offset> <len> "
+ "<file-path> <log-file>\n",
+ argv[0]);
+ return 1;
+ }
+
+ fs = glfs_new(argv[2]);
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL\n");
+ return 1;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", argv[1], 24007);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_set_volfile_server: returned %d\n", ret);
+ goto out;
+ }
+
+ ret = glfs_set_logging(fs, argv[7], 7);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_set_logging: returned %d\n", ret);
+ goto out;
+ }
+
+ ret = glfs_init(fs);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_init: returned %d\n", ret);
+ goto out;
+ }
+
+ opcode = atoi(argv[3]);
+ opcode = get_fallocate_flag(opcode);
+ if (opcode < 0) {
+ fprintf(stderr, "get_fallocate_flag: invalid flag \n");
+ goto out;
+ }
+
+ /* Note that off_t is signed but size_t isn't. */
+ offset = strtol(argv[4], NULL, 10);
+ len = strtoul(argv[5], NULL, 10);
+
+ fd = glfs_open(fs, argv[6], O_RDWR);
+ if (fd == NULL) {
+ fprintf(stderr, "glfs_open: returned NULL\n");
+ goto out;
+ }
+
+ ret = glfs_fallocate(fd, opcode, offset, len);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_fallocate: returned %d\n", ret);
+ goto out;
+ }
+
+ ret = glfs_unlink(fs, argv[6]);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_unlink: returned %d\n", ret);
+ goto out;
+ }
+ /* Sleep for 3s to give enough time for background deletion to complete
+ * during which if the bug exists, the process will crash.
+ */
+ sleep(3);
+ ret = 0;
+
+out:
+ if (fd)
+ glfs_close(fd);
+ glfs_fini(fs);
+ return ret;
+}
diff --git a/tests/bugs/shard/bug-1696136.t b/tests/bugs/shard/bug-1696136.t
new file mode 100644
index 00000000000..b6dc858f083
--- /dev/null
+++ b/tests/bugs/shard/bug-1696136.t
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fallocate.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 features.shard-block-size 4MB
+TEST $CLI volume set $V0 features.shard-lru-limit 120
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+
+TEST build_tester $(dirname $0)/bug-1696136.c -lgfapi -Wall -O2
+
+# Create a file
+TEST touch $M0/file1
+
+# Fallocate a 500M file. This will make sure number of participant shards are > lru-limit
+TEST $(dirname $0)/bug-1696136 $H0 $V0 "0" "0" "536870912" /file1 `gluster --print-logdir`/glfs-$V0.log
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+rm -f $(dirname $0)/bug-1696136
+
+cleanup
diff --git a/tests/bugs/shard/bug-1705884.t b/tests/bugs/shard/bug-1705884.t
new file mode 100644
index 00000000000..f6e50376a58
--- /dev/null
+++ b/tests/bugs/shard/bug-1705884.t
@@ -0,0 +1,32 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fallocate.rc
+
+cleanup
+
+require_fallocate -l 1m $M0/file
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+
+TEST fallocate -l 200M $M0/foo
+EXPECT `echo "$(( ( 200 * 1024 * 1024 ) / 512 ))"` stat -c %b $M0/foo
+TEST truncate -s 0 $M0/foo
+EXPECT "0" stat -c %b $M0/foo
+TEST fallocate -l 100M $M0/foo
+EXPECT `echo "$(( ( 100 * 1024 * 1024 ) / 512 ))"` stat -c %b $M0/foo
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup
diff --git a/tests/bugs/shard/bug-1738419.t b/tests/bugs/shard/bug-1738419.t
new file mode 100644
index 00000000000..8d0a31d9754
--- /dev/null
+++ b/tests/bugs/shard/bug-1738419.t
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 network.remote-dio off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 performance.strict-o-direct on
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+
+TEST dd if=/dev/zero of=$M0/metadata bs=501 count=1
+
+EXPECT "501" echo $("dd" if=$M0/metadata bs=4096 count=1 of=/dev/null iflag=direct 2>&1 | awk '/bytes/ {print $1}')
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup
diff --git a/tests/bugs/shard/bug-shard-discard.c b/tests/bugs/shard/bug-shard-discard.c
new file mode 100644
index 00000000000..6fa93fb89d1
--- /dev/null
+++ b/tests/bugs/shard/bug-shard-discard.c
@@ -0,0 +1,70 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+int
+main(int argc, char *argv[])
+{
+ int ret = 0;
+ off_t off = 0;
+ size_t len = 0;
+ glfs_t *fs = NULL;
+ glfs_fd_t *fd = NULL;
+
+ if (argc != 7) {
+ fprintf(
+ stderr,
+ "Syntax: %s <host> <volname> <file-path> <off> <len> <log-file>\n",
+ argv[0]);
+ return 1;
+ }
+
+ fs = glfs_new(argv[2]);
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL\n");
+ return 1;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", argv[1], 24007);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_set_volfile_server: returned %d\n", ret);
+ goto out;
+ }
+
+ ret = glfs_set_logging(fs, argv[6], 7);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_set_logging: returned %d\n", ret);
+ goto out;
+ }
+
+ ret = glfs_init(fs);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_init: returned %d\n", ret);
+ goto out;
+ }
+
+ fd = glfs_open(fs, argv[3], O_RDWR);
+ if (fd == NULL) {
+ fprintf(stderr, "glfs_open: returned NULL\n");
+ goto out;
+ }
+
+ /* Note that off_t is signed but size_t isn't. */
+ off = strtol(argv[4], NULL, 10);
+ len = strtoul(argv[5], NULL, 10);
+
+ ret = glfs_discard(fd, off, len);
+ if (ret <= 0) {
+ fprintf(stderr, "glfs_discard: returned %d\n", ret);
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ if (fd)
+ glfs_close(fd);
+ glfs_fini(fs);
+ return ret;
+}
diff --git a/tests/bugs/shard/bug-shard-discard.t b/tests/bugs/shard/bug-shard-discard.t
new file mode 100644
index 00000000000..910ade14801
--- /dev/null
+++ b/tests/bugs/shard/bug-shard-discard.t
@@ -0,0 +1,65 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+FILE_COUNT_TIME=5
+
+function get_shard_count {
+ ls $1/$2.* | wc -l
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0..3}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 features.shard-block-size 4MB
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+
+# Create a file.
+TEST touch $M0/foo
+TEST dd if=/dev/urandom of=$M0/foo bs=1M count=10
+
+# This should ensure /.shard is created on the bricks.
+TEST stat $B0/${V0}0/.shard
+TEST stat $B0/${V0}1/.shard
+TEST stat $B0/${V0}2/.shard
+TEST stat $B0/${V0}3/.shard
+
+#Note the size of the file, it should be 10M
+EXPECT '10485760' stat -c %s $M0/foo
+
+gfid_foo=$(get_gfid_string $M0/foo)
+
+TEST build_tester $(dirname $0)/bug-shard-discard.c -lgfapi -Wall -O2
+#Call discard on the file at off=7M and len=3M
+TEST $(dirname $0)/bug-shard-discard $H0 $V0 /foo 7340032 3145728 `gluster --print-logdir`/glfs-$V0.log
+
+#Ensure that discard doesn't change the original size of the file.
+EXPECT '10485760' stat -c %s $M0/foo
+
+# Ensure that the last shard is all zero'd out
+EXPECT "1" file_all_zeroes `find $B0 -name $gfid_foo.2`
+EXPECT_NOT "1" file_all_zeroes `find $B0 -name $gfid_foo.1`
+
+# Now unlink the file. And ensure that all shards associated with the file are cleaned up
+TEST unlink $M0/foo
+
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_shard_count $B0/${V0}0/.shard $gfid_foo
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_shard_count $B0/${V0}1/.shard $gfid_foo
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_shard_count $B0/${V0}2/.shard $gfid_foo
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_shard_count $B0/${V0}3/.shard $gfid_foo
+TEST ! stat $M0/foo
+
+#clean up everything
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+TEST rm -f $(dirname $0)/bug-shard-discard
+
+cleanup
diff --git a/tests/bugs/shard/bug-shard-zerofill.c b/tests/bugs/shard/bug-shard-zerofill.c
new file mode 100644
index 00000000000..ed4c8c54dc2
--- /dev/null
+++ b/tests/bugs/shard/bug-shard-zerofill.c
@@ -0,0 +1,60 @@
+#include <stdio.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+int
+main(int argc, char *argv[])
+{
+ glfs_t *fs = NULL;
+ glfs_fd_t *fd = NULL;
+ int ret = 1;
+
+ if (argc != 5) {
+ fprintf(stderr, "Syntax: %s <host> <volname> <file-path> <log-file>\n",
+ argv[0]);
+ return 1;
+ }
+
+ fs = glfs_new(argv[2]);
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL\n");
+ return 1;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", argv[1], 24007);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_set_volfile_server: returned %d\n", ret);
+ goto out;
+ }
+ ret = glfs_set_logging(fs, argv[4], 7);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_set_logging: returned %d\n", ret);
+ goto out;
+ }
+ ret = glfs_init(fs);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_init: returned %d\n", ret);
+ goto out;
+ }
+
+ fd = glfs_open(fs, argv[3], O_RDWR);
+ if (fd == NULL) {
+ fprintf(stderr, "glfs_open: returned NULL\n");
+ goto out;
+ }
+
+ /* Zero-fill "foo" with 10MB of data */
+ ret = glfs_zerofill(fd, 0, 10485760);
+ if (ret <= 0) {
+ fprintf(stderr, "glfs_zerofill: returned %d\n", ret);
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ if (fd)
+ glfs_close(fd);
+ glfs_fini(fs);
+ return ret;
+}
diff --git a/tests/bugs/shard/bug-shard-zerofill.t b/tests/bugs/shard/bug-shard-zerofill.t
new file mode 100644
index 00000000000..4a919a24b99
--- /dev/null
+++ b/tests/bugs/shard/bug-shard-zerofill.t
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0..3}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 features.shard-block-size 4MB
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+
+# Create a file.
+TEST touch $M0/foo
+
+gfid_foo=$(get_gfid_string $M0/foo)
+
+TEST build_tester $(dirname $0)/bug-shard-zerofill.c -lgfapi -Wall -O2
+TEST $(dirname $0)/bug-shard-zerofill $H0 $V0 /foo `gluster --print-logdir`/glfs-$V0.log
+
+# This should ensure /.shard is created on the bricks.
+TEST stat $B0/${V0}0/.shard
+TEST stat $B0/${V0}1/.shard
+TEST stat $B0/${V0}2/.shard
+TEST stat $B0/${V0}3/.shard
+
+EXPECT "4194304" echo `find $B0 -name $gfid_foo.1 | xargs stat -c %s`
+EXPECT "2097152" echo `find $B0 -name $gfid_foo.2 | xargs stat -c %s`
+
+EXPECT "1" file_all_zeroes $M0/foo
+
+TEST `echo "abc" >> $M0/foo`
+
+EXPECT_NOT "1" file_all_zeroes $M0/foo
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+TEST rm -f $(dirname $0)/bug-shard-zerofill
+
+cleanup
diff --git a/tests/bugs/shard/configure-lru-limit.t b/tests/bugs/shard/configure-lru-limit.t
new file mode 100644
index 00000000000..923a4d8d747
--- /dev/null
+++ b/tests/bugs/shard/configure-lru-limit.t
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 features.shard-block-size 4MB
+TEST $CLI volume set $V0 features.shard-lru-limit 25
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status';
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+
+# Perform a write that would cause 25 shards to be created, 24 of them under .shard
+TEST dd if=/dev/zero of=$M0/foo bs=1M count=100
+
+statedump=$(generate_mount_statedump $V0)
+sleep 1
+EXPECT "25" echo $(grep "lru-max-limit" $statedump | cut -f2 -d'=' | tail -1)
+
+# Base shard is never added to this list. So all other shards should make up for 24 inodes in lru list
+EXPECT "24" echo $(grep "inode-count" $statedump | cut -f2 -d'=' | tail -1)
+
+rm -f $statedump
+
+# Test to ensure there's no "reconfiguration" of the value once set.
+TEST $CLI volume set $V0 features.shard-lru-limit 30
+statedump=$(generate_mount_statedump $V0)
+sleep 1
+EXPECT "25" echo $(grep "lru-max-limit" $statedump | cut -f2 -d'=' | tail -1)
+rm -f $statedump
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+statedump=$(generate_mount_statedump $V0)
+sleep 1
+EXPECT "30" echo $(grep "lru-max-limit" $statedump | cut -f2 -d'=' | tail -1)
+rm -f $statedump
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup
diff --git a/tests/bugs/shard/issue-1243.t b/tests/bugs/shard/issue-1243.t
new file mode 100644
index 00000000000..ba22d2b74fe
--- /dev/null
+++ b/tests/bugs/shard/issue-1243.t
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 features.shard-block-size 4MB
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 performance.strict-o-direct on
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+
+TEST $CLI volume set $V0 md-cache-timeout 10
+
+# Write data into a file such that its size crosses shard-block-size
+TEST dd if=/dev/zero of=$M0/foo bs=1048576 count=8 oflag=direct
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+
+# Execute a setxattr on the file.
+TEST setfattr -n trusted.libvirt -v some-value $M0/foo
+
+# Size of the file should be the aggregated size, not the shard-block-size
+EXPECT '8388608' stat -c %s $M0/foo
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+
+# Execute a removexattr on the file.
+TEST setfattr -x trusted.libvirt $M0/foo
+
+# Size of the file should be the aggregated size, not the shard-block-size
+EXPECT '8388608' stat -c %s $M0/foo
+cleanup
diff --git a/tests/bugs/shard/issue-1281.t b/tests/bugs/shard/issue-1281.t
new file mode 100644
index 00000000000..9704caa8944
--- /dev/null
+++ b/tests/bugs/shard/issue-1281.t
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+
+#Open a file and store descriptor in fd = 5
+exec 5>$M0/foo
+
+#Unlink the same file which is opened in prev step
+TEST unlink $M0/foo
+
+#Write something on the file using the open fd = 5
+echo "issue-1281" >&5
+
+#Write on the descriptor should be succesful
+EXPECT 0 echo $?
+
+#Close the fd = 5
+exec 5>&-
+
+cleanup
diff --git a/tests/bugs/shard/issue-1425.t b/tests/bugs/shard/issue-1425.t
new file mode 100644
index 00000000000..bbe82c0e5b2
--- /dev/null
+++ b/tests/bugs/shard/issue-1425.t
@@ -0,0 +1,45 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+FILE_COUNT_TIME=5
+
+function get_file_count {
+ ls $1* | wc -l
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}0
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 features.shard-block-size 4MB
+TEST $CLI volume start $V0
+TEST $CLI volume profile $V0 start
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+
+TEST fallocate -l 20M $M0/foo
+gfid_new=$(get_gfid_string $M0/foo)
+
+# Check for the base shard
+TEST stat $M0/foo
+TEST stat $B0/${V0}0/foo
+
+# There should be 4 associated shards
+EXPECT_WITHIN $FILE_COUNT_TIME 4 get_file_count $B0/${V0}0/.shard/$gfid_new
+
+# There should be 1+4 shards and we expect 4 lookups less than on the build without this patch
+EXPECT "21" echo `$CLI volume profile $V0 info incremental | grep -w LOOKUP | awk '{print $8}'`
+
+# Delete the base shard and check shards get cleaned up
+TEST unlink $M0/foo
+
+TEST ! stat $M0/foo
+TEST ! stat $B0/${V0}0/foo
+
+# There should be no shards now
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/$gfid_new
+cleanup
diff --git a/tests/bugs/shard/parallel-truncate-read.t b/tests/bugs/shard/parallel-truncate-read.t
new file mode 100644
index 00000000000..4de876f58f6
--- /dev/null
+++ b/tests/bugs/shard/parallel-truncate-read.t
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+#This test will crash if shard's LRU contains a shard's inode even after the
+#inode is forgotten. Minimum time for crash to happen I saw was 180 seconds
+
+. $(dirname $0)/../../include.rc
+
+function keep_writing {
+ cd $M0;
+ while [ -f /tmp/parallel-truncate-read ]
+ do
+ dd if=/dev/zero of=file1 bs=1M count=16
+ done
+ cd
+}
+
+function keep_reading {
+ cd $M0;
+ while [ -f /tmp/parallel-truncate-read ]
+ do
+ cat file1 > /dev/null
+ done
+ cd
+}
+
+cleanup;
+
+TEST touch /tmp/parallel-truncate-read
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+keep_writing &
+keep_reading &
+sleep 180
+TEST rm -f /tmp/parallel-truncate-read
+wait
+#test that the mount is operational
+TEST stat $M0
+
+cleanup;
diff --git a/tests/bugs/shard/shard-append-test.c b/tests/bugs/shard/shard-append-test.c
new file mode 100644
index 00000000000..c7debb2b182
--- /dev/null
+++ b/tests/bugs/shard/shard-append-test.c
@@ -0,0 +1,183 @@
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+#include <limits.h>
+#include <string.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+#define LOG_ERR(msg) \
+ do { \
+ fprintf(stderr, "%s : Error (%s)\n", msg, strerror(errno)); \
+ } while (0)
+
+/*This test tests that shard xlator handles offset in appending writes
+ * correctly. This test performs writes of 1025 bytes 1025 times, in 5 threads
+ * with different threads. The buffer to be written is same character repeated
+ * 1025 times in the buffer for a thread. At the end it reads the buffer till
+ * end of file and tests that the read of 1025 bytes is always same character
+ * and the content read is 5*1025*1025 size. 1025 bytes is chosen because it
+ * will lead to write on more than one shard at some point when the size is
+ * going over the initial shard*/
+pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
+int thread_data = '1';
+
+glfs_t *
+init_glfs(const char *hostname, const char *volname, const char *logfile)
+{
+ int ret = -1;
+ glfs_t *fs = NULL;
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ LOG_ERR("glfs_new failed");
+ return NULL;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ if (ret < 0) {
+ LOG_ERR("glfs_set_volfile_server failed");
+ goto out;
+ }
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ if (ret < 0) {
+ LOG_ERR("glfs_set_logging failed");
+ goto out;
+ }
+
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ LOG_ERR("glfs_init failed");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret) {
+ glfs_fini(fs);
+ fs = NULL;
+ }
+
+ return fs;
+}
+
+void *
+write_data(void *data)
+{
+ char buf[1025] = {0};
+ glfs_fd_t *glfd = NULL;
+ glfs_t *fs = data;
+ int i = 0;
+
+ pthread_mutex_lock(&lock);
+ {
+ memset(buf, thread_data, sizeof(buf));
+ thread_data++;
+ }
+ pthread_mutex_unlock(&lock);
+
+ for (i = 0; i < 1025; i++) {
+ glfd = glfs_creat(fs, "parallel-write.txt", O_WRONLY | O_APPEND,
+ S_IRUSR | S_IWUSR | O_SYNC);
+ if (!glfd) {
+ LOG_ERR("Failed to create file");
+ exit(1);
+ }
+
+ if (glfs_write(glfd, buf, sizeof(buf), 0) < 0) {
+ LOG_ERR("Failed to write to file");
+ exit(1);
+ }
+ if (glfs_close(glfd) != 0) {
+ LOG_ERR("Failed to close file");
+ exit(1);
+ }
+ }
+ return NULL;
+}
+
+int
+main(int argc, char *argv[])
+{
+ pthread_t tid[5] = {0};
+ char buf[1025] = {0};
+ char cmp_buf[1025] = {0};
+ int ret = 0;
+ char *hostname = NULL;
+ char *volname = NULL;
+ char *logfile = NULL;
+ glfs_t *fs = NULL;
+ glfs_fd_t *glfd = NULL;
+ ssize_t bytes_read = 0;
+ ssize_t total_bytes_read = 0;
+ int i = 0;
+
+ if (argc != 4) {
+ fprintf(stderr, "Invalid argument\n");
+ exit(1);
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+ logfile = argv[3];
+
+ fs = init_glfs(hostname, volname, logfile);
+ if (fs == NULL) {
+ LOG_ERR("init_glfs failed");
+ return -1;
+ }
+
+ for (i = 0; i < 5; i++) {
+ pthread_create(&tid[i], NULL, write_data, fs);
+ }
+
+ for (i = 0; i < 5; i++) {
+ pthread_join(tid[i], NULL);
+ }
+ glfd = glfs_open(fs, "parallel-write.txt", O_RDONLY);
+ if (!glfd) {
+ LOG_ERR("Failed to open file for reading");
+ exit(1);
+ }
+
+ while ((bytes_read = glfs_read(glfd, buf, sizeof(buf), 0)) > 0) {
+ if (bytes_read != sizeof(buf)) {
+ fprintf(stderr,
+ "Didn't read complete data read: %zd "
+ "expected: %lu",
+ bytes_read, sizeof(buf));
+ exit(1);
+ }
+
+ total_bytes_read += bytes_read;
+ if (buf[0] < '1' || buf[0] >= thread_data) {
+ fprintf(stderr, "Invalid character found: %c", buf[0]);
+ exit(1);
+ }
+ memset(cmp_buf, buf[0], sizeof(cmp_buf));
+ if (memcmp(cmp_buf, buf, sizeof(cmp_buf))) {
+ LOG_ERR("Data corrupted");
+ exit(1);
+ }
+ memset(cmp_buf, 0, sizeof(cmp_buf));
+ }
+
+ if (total_bytes_read != 5 * 1025 * 1025) {
+ fprintf(stderr,
+ "Failed to read what is written, read; %zd, "
+ "expected %zu",
+ total_bytes_read, 5 * 1025 * 1025);
+ exit(1);
+ }
+
+ if (glfs_close(glfd) != 0) {
+ LOG_ERR("Failed to close");
+ exit(1);
+ }
+ return 0;
+}
diff --git a/tests/bugs/shard/shard-append-test.t b/tests/bugs/shard/shard-append-test.t
new file mode 100644
index 00000000000..f8719f2a2c1
--- /dev/null
+++ b/tests/bugs/shard/shard-append-test.t
@@ -0,0 +1,32 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 replica 3 ${H0}:$B0/brick{1,2,3};
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 features.shard-block-size 4MB
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+
+#Uncomment the following line after shard-queuing is implemented
+#TEST $CLI volume set $V0 performance.write-behind off
+
+TEST $CLI volume set $V0 performance.strict-o-direct on
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume start $V0;
+
+logdir=`gluster --print-logdir`
+
+TEST build_tester $(dirname $0)/shard-append-test.c -lgfapi -lpthread
+
+TEST ./$(dirname $0)/shard-append-test ${H0} $V0 $logdir/shard-append-test.log
+
+cleanup_tester $(dirname $0)/shard-append-test
+
+cleanup;
diff --git a/tests/bugs/shard/shard-fallocate.c b/tests/bugs/shard/shard-fallocate.c
new file mode 100644
index 00000000000..cb0714e8564
--- /dev/null
+++ b/tests/bugs/shard/shard-fallocate.c
@@ -0,0 +1,113 @@
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+enum fallocate_flag {
+ TEST_FALLOCATE_NONE,
+ TEST_FALLOCATE_KEEP_SIZE,
+ TEST_FALLOCATE_ZERO_RANGE,
+ TEST_FALLOCATE_PUNCH_HOLE,
+ TEST_FALLOCATE_MAX,
+};
+
+int
+get_fallocate_flag(int opcode)
+{
+ int ret = 0;
+
+ switch (opcode) {
+ case TEST_FALLOCATE_NONE:
+ ret = 0;
+ break;
+ case TEST_FALLOCATE_KEEP_SIZE:
+ ret = FALLOC_FL_KEEP_SIZE;
+ break;
+ case TEST_FALLOCATE_ZERO_RANGE:
+ ret = FALLOC_FL_ZERO_RANGE;
+ break;
+ case TEST_FALLOCATE_PUNCH_HOLE:
+ ret = FALLOC_FL_PUNCH_HOLE;
+ break;
+ default:
+ ret = -1;
+ break;
+ }
+ return ret;
+}
+
+int
+main(int argc, char *argv[])
+{
+ int ret = 1;
+ int opcode = -1;
+ off_t offset = 0;
+ size_t len = 0;
+ glfs_t *fs = NULL;
+ glfs_fd_t *fd = NULL;
+
+ if (argc != 8) {
+ fprintf(stderr,
+ "Syntax: %s <host> <volname> <opcode> <offset> <len> "
+ "<file-path> <log-file>\n",
+ argv[0]);
+ return 1;
+ }
+
+ fs = glfs_new(argv[2]);
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL\n");
+ return 1;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", argv[1], 24007);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_set_volfile_server: returned %d\n", ret);
+ goto out;
+ }
+
+ ret = glfs_set_logging(fs, argv[7], 7);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_set_logging: returned %d\n", ret);
+ goto out;
+ }
+
+ ret = glfs_init(fs);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_init: returned %d\n", ret);
+ goto out;
+ }
+
+ opcode = atoi(argv[3]);
+ opcode = get_fallocate_flag(opcode);
+ if (opcode < 0) {
+ fprintf(stderr, "get_fallocate_flag: invalid flag \n");
+ goto out;
+ }
+
+ /* Note that off_t is signed but size_t isn't. */
+ offset = strtol(argv[4], NULL, 10);
+ len = strtoul(argv[5], NULL, 10);
+
+ fd = glfs_open(fs, argv[6], O_RDWR);
+ if (fd == NULL) {
+ fprintf(stderr, "glfs_open: returned NULL\n");
+ goto out;
+ }
+
+ ret = glfs_fallocate(fd, opcode, offset, len);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_fallocate: returned %d\n", ret);
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ if (fd)
+ glfs_close(fd);
+ glfs_fini(fs);
+ return ret;
+}
diff --git a/tests/bugs/shard/shard-inode-refcount-test.t b/tests/bugs/shard/shard-inode-refcount-test.t
new file mode 100644
index 00000000000..3fd181be690
--- /dev/null
+++ b/tests/bugs/shard/shard-inode-refcount-test.t
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+SHARD_COUNT_TIME=5
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}0
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 features.shard-block-size 4MB
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0
+
+TEST dd if=/dev/zero conv=fsync of=$M0/one-plus-five-shards bs=1M count=23
+
+ACTIVE_INODES_BEFORE=$(get_mount_active_size_value $V0)
+TEST rm -f $M0/one-plus-five-shards
+# Expect 5 inodes less. But one inode more than before because .remove_me would be created.
+EXPECT_WITHIN $SHARD_COUNT_TIME `expr $ACTIVE_INODES_BEFORE - 5 + 1` get_mount_active_size_value $V0 $M0
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup
diff --git a/tests/bugs/shard/unlinks-and-renames.t b/tests/bugs/shard/unlinks-and-renames.t
new file mode 100644
index 00000000000..990ca69a8b1
--- /dev/null
+++ b/tests/bugs/shard/unlinks-and-renames.t
@@ -0,0 +1,333 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+# The aim of this test script is to exercise the various codepaths of unlink
+# and rename fops in sharding and make sure they work fine.
+#
+
+FILE_COUNT_TIME=5
+
+function get_file_count {
+ ls $1* | wc -l
+}
+
+#################################################
+################### UNLINK ######################
+#################################################
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 features.shard-block-size 4MB
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0
+
+TEST mkdir $M0/dir
+TEST touch $M0/dir/foo
+TEST touch $M0/dir/new
+
+##########################################
+##### 01. Unlink with /.shard absent #####
+##########################################
+
+TEST truncate -s 5M $M0/dir/foo
+TEST ! stat $B0/${V0}0/.shard
+TEST ! stat $B0/${V0}1/.shard
+# Test to ensure that unlink doesn't fail due to absence of /.shard
+gfid_foo=$(get_gfid_string $M0/dir/foo)
+TEST unlink $M0/dir/foo
+TEST stat $B0/${V0}0/.shard/.remove_me
+TEST stat $B0/${V0}1/.shard/.remove_me
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_foo
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_foo
+
+######################################################
+##### 02. Unlink of a sharded file without holes #####
+######################################################
+
+# Create a 9M sharded file
+TEST dd if=/dev/zero of=$M0/dir/new bs=1024 count=9216
+gfid_new=$(get_gfid_string $M0/dir/new)
+# Ensure its shards are created.
+TEST stat $B0/${V0}0/.shard/$gfid_new.1
+TEST stat $B0/${V0}1/.shard/$gfid_new.1
+TEST stat $B0/${V0}0/.shard/$gfid_new.2
+TEST stat $B0/${V0}1/.shard/$gfid_new.2
+TEST unlink $M0/dir/new
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/$gfid_new
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/$gfid_new
+TEST ! stat $M0/dir/new
+TEST ! stat $B0/${V0}0/dir/new
+TEST ! stat $B0/${V0}1/dir/new
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_new
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_new
+
+###########################################
+##### 03. Unlink with /.shard present #####
+###########################################
+
+TEST truncate -s 5M $M0/dir/foo
+gfid_foo=$(get_gfid_string $M0/dir/foo)
+# Ensure its shards are absent.
+TEST ! stat $B0/${V0}0/.shard/$gfid_foo.1
+TEST ! stat $B0/${V0}1/.shard/$gfid_foo.1
+# Test to ensure that unlink of a sparse file works fine.
+TEST unlink $M0/dir/foo
+TEST ! stat $B0/${V0}0/dir/foo
+TEST ! stat $B0/${V0}1/dir/foo
+TEST ! stat $M0/dir/foo
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_foo
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_foo
+
+#################################################################
+##### 04. Unlink of a file with only one block (the zeroth) #####
+#################################################################
+
+TEST touch $M0/dir/foo
+gfid_foo=$(get_gfid_string $M0/dir/foo)
+TEST dd if=/dev/zero of=$M0/dir/foo bs=1024 count=1024
+# Test to ensure that unlink of a file with only base shard works fine.
+TEST unlink $M0/dir/foo
+TEST ! stat $B0/${V0}0/dir/foo
+TEST ! stat $B0/${V0}1/dir/foo
+TEST ! stat $M0/dir/foo
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_foo
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_foo
+
+########################################################
+##### 05. Unlink of a sharded file with hard-links #####
+########################################################
+
+# Create a 9M sharded file
+TEST dd if=/dev/zero of=$M0/dir/original bs=1024 count=9216
+gfid_original=$(get_gfid_string $M0/dir/original)
+# Ensure its shards are created.
+TEST stat $B0/${V0}0/.shard/$gfid_original.1
+TEST stat $B0/${V0}1/.shard/$gfid_original.1
+TEST stat $B0/${V0}0/.shard/$gfid_original.2
+TEST stat $B0/${V0}1/.shard/$gfid_original.2
+# Create a hard link.
+TEST ln $M0/dir/original $M0/link
+# Now delete the original file.
+TEST unlink $M0/dir/original
+TEST ! stat $B0/${V0}0/.shard/.remove_me/$gfid_original
+TEST ! stat $B0/${V0}1/.shard/.remove_me/$gfid_original
+# Ensure the shards are still intact.
+TEST stat $B0/${V0}0/.shard/$gfid_original.1
+TEST stat $B0/${V0}1/.shard/$gfid_original.1
+TEST stat $B0/${V0}0/.shard/$gfid_original.2
+TEST stat $B0/${V0}1/.shard/$gfid_original.2
+TEST ! stat $M0/dir/original
+TEST stat $M0/link
+TEST stat $B0/${V0}0/link
+TEST stat $B0/${V0}1/link
+# Now delete the last link.
+TEST unlink $M0/link
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_original
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_original
+# Ensure that the shards are all cleaned up.
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/$gfid_original
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/$gfid_original
+TEST ! stat $M0/link
+TEST ! stat $B0/${V0}0/link
+TEST ! stat $B0/${V0}1/link
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup
+
+#################################################
+################### RENAME ######################
+#################################################
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 features.shard-block-size 4MB
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0
+
+TEST mkdir $M0/dir
+TEST touch $M0/dir/src
+TEST touch $M0/dir/dst
+
+##########################################
+##### 06. Rename with /.shard absent #####
+##########################################
+
+TEST truncate -s 5M $M0/dir/dst
+gfid_dst=$(get_gfid_string $M0/dir/dst)
+TEST ! stat $B0/${V0}0/.shard
+TEST ! stat $B0/${V0}1/.shard
+# Test to ensure that rename doesn't fail due to absence of /.shard
+TEST mv -f $M0/dir/src $M0/dir/dst
+TEST ! stat $M0/dir/src
+TEST stat $M0/dir/dst
+TEST ! stat $B0/${V0}0/dir/src
+TEST ! stat $B0/${V0}1/dir/src
+TEST stat $B0/${V0}0/dir/dst
+TEST stat $B0/${V0}1/dir/dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_dst
+
+######################################################
+##### 07. Rename to a sharded file without holes #####
+######################################################
+
+TEST unlink $M0/dir/dst
+TEST touch $M0/dir/src
+# Create a 9M sharded file
+TEST dd if=/dev/zero of=$M0/dir/dst bs=1024 count=9216
+gfid_dst=$(get_gfid_string $M0/dir/dst)
+# Ensure its shards are created.
+TEST stat $B0/${V0}0/.shard/$gfid_dst.1
+TEST stat $B0/${V0}1/.shard/$gfid_dst.1
+TEST stat $B0/${V0}0/.shard/$gfid_dst.2
+TEST stat $B0/${V0}1/.shard/$gfid_dst.2
+TEST mv -f $M0/dir/src $M0/dir/dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/$gfid_dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/$gfid_dst
+TEST ! stat $M0/dir/src
+TEST stat $M0/dir/dst
+TEST ! stat $B0/${V0}0/dir/src
+TEST ! stat $B0/${V0}1/dir/src
+TEST stat $B0/${V0}0/dir/dst
+TEST stat $B0/${V0}1/dir/dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_dst
+
+#######################################################
+##### 08. Rename of dst file with /.shard present #####
+#######################################################
+
+TEST unlink $M0/dir/dst
+TEST touch $M0/dir/src
+TEST truncate -s 5M $M0/dir/dst
+gfid_dst=$(get_gfid_string $M0/dir/dst)
+# Test to ensure that rename into a sparse file works fine.
+TEST mv -f $M0/dir/src $M0/dir/dst
+TEST ! stat $M0/dir/src
+TEST stat $M0/dir/dst
+TEST ! stat $B0/${V0}0/dir/src
+TEST ! stat $B0/${V0}1/dir/src
+TEST stat $B0/${V0}0/dir/dst
+TEST stat $B0/${V0}1/dir/dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_dst
+
+###################################################################
+##### 09. Rename of dst file with only one block (the zeroth) #####
+###################################################################
+
+TEST unlink $M0/dir/dst
+TEST touch $M0/dir/src
+TEST dd if=/dev/zero of=$M0/dir/dst bs=1024 count=1024
+gfid_dst=$(get_gfid_string $M0/dir/dst)
+# Test to ensure that rename into a file with only base shard works fine.
+TEST mv -f $M0/dir/src $M0/dir/dst
+TEST ! stat $M0/dir/src
+TEST stat $M0/dir/dst
+TEST ! stat $B0/${V0}0/dir/src
+TEST ! stat $B0/${V0}1/dir/src
+TEST stat $B0/${V0}0/dir/dst
+TEST stat $B0/${V0}1/dir/dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_dst
+
+############################################################
+##### 10. Rename to a dst sharded file with hard-links #####
+############################################################
+
+TEST unlink $M0/dir/dst
+TEST touch $M0/dir/src
+# Create a 9M sharded file
+TEST dd if=/dev/zero of=$M0/dir/dst bs=1024 count=9216
+gfid_dst=$(get_gfid_string $M0/dir/dst)
+# Ensure its shards are created.
+TEST stat $B0/${V0}0/.shard/$gfid_dst.1
+TEST stat $B0/${V0}1/.shard/$gfid_dst.1
+TEST stat $B0/${V0}0/.shard/$gfid_dst.2
+TEST stat $B0/${V0}1/.shard/$gfid_dst.2
+# Create a hard link.
+TEST ln $M0/dir/dst $M0/link
+# Now rename src to the dst.
+TEST mv -f $M0/dir/src $M0/dir/dst
+# Ensure the shards are still intact.
+TEST stat $B0/${V0}0/.shard/$gfid_dst.1
+TEST stat $B0/${V0}1/.shard/$gfid_dst.1
+TEST stat $B0/${V0}0/.shard/$gfid_dst.2
+TEST stat $B0/${V0}1/.shard/$gfid_dst.2
+TEST ! stat $M0/dir/src
+TEST ! stat $B0/${V0}0/dir/src
+TEST ! stat $B0/${V0}1/dir/src
+TEST ! stat $B0/${V0}0/.shard/.remove_me/$gfid_dst
+TEST ! stat $B0/${V0}1/.shard/.remove_me/$gfid_dst
+# Now rename another file to the last link.
+TEST touch $M0/dir/src2
+TEST mv -f $M0/dir/src2 $M0/link
+# Ensure that the shards are all cleaned up.
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/$gfid_dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/$gfid_dst
+TEST ! stat $B0/${V0}0/.shard/$gfid_dst.1
+TEST ! stat $B0/${V0}1/.shard/$gfid_dst.1
+TEST ! stat $B0/${V0}0/.shard/$gfid_dst.2
+TEST ! stat $B0/${V0}1/.shard/$gfid_dst.2
+TEST ! stat $M0/dir/src2
+TEST ! stat $B0/${V0}0/dir/src2
+TEST ! stat $B0/${V0}1/dir/src2
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_dst
+
+##############################################################
+##### 11. Rename with non-existent dst and a sharded src #####
+##############################################################l
+
+TEST touch $M0/dir/src
+TEST dd if=/dev/zero of=$M0/dir/src bs=1024 count=9216
+gfid_src=$(get_gfid_string $M0/dir/src)
+# Ensure its shards are created.
+TEST stat $B0/${V0}0/.shard/$gfid_src.1
+TEST stat $B0/${V0}1/.shard/$gfid_src.1
+TEST stat $B0/${V0}0/.shard/$gfid_src.2
+TEST stat $B0/${V0}1/.shard/$gfid_src.2
+# Now rename src to the dst.
+TEST mv $M0/dir/src $M0/dir/dst2
+
+TEST stat $B0/${V0}0/.shard/$gfid_src.1
+TEST stat $B0/${V0}1/.shard/$gfid_src.1
+TEST stat $B0/${V0}0/.shard/$gfid_src.2
+TEST stat $B0/${V0}1/.shard/$gfid_src.2
+TEST ! stat $M0/dir/src
+TEST ! stat $B0/${V0}0/dir/src
+TEST ! stat $B0/${V0}1/dir/src
+TEST stat $M0/dir/dst2
+TEST stat $B0/${V0}0/dir/dst2
+TEST stat $B0/${V0}1/dir/dst2
+
+#############################################################################
+##### 12. Rename with non-existent dst and a sharded src with no shards #####
+#############################################################################
+
+TEST touch $M0/dir/src
+TEST dd if=/dev/zero of=$M0/dir/src bs=1024 count=1024
+gfid_src=$(get_gfid_string $M0/dir/src)
+TEST ! stat $B0/${V0}0/.shard/$gfid_src.1
+TEST ! stat $B0/${V0}1/.shard/$gfid_src.1
+# Now rename src to the dst.
+TEST mv $M0/dir/src $M0/dir/dst1
+TEST ! stat $M0/dir/src
+TEST ! stat $B0/${V0}0/dir/src
+TEST ! stat $B0/${V0}1/dir/src
+TEST stat $M0/dir/dst1
+TEST stat $B0/${V0}0/dir/dst1
+TEST stat $B0/${V0}1/dir/dst1
+
+cleanup
diff --git a/tests/bugs/shard/zero-flag.t b/tests/bugs/shard/zero-flag.t
new file mode 100644
index 00000000000..1f39787ab9f
--- /dev/null
+++ b/tests/bugs/shard/zero-flag.t
@@ -0,0 +1,76 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fallocate.rc
+
+cleanup
+
+require_fallocate -l 1m $M0/file
+require_fallocate -p -l 512k $M0/file && rm -f $M0/file
+require_fallocate -z -l 512k $M0/file && rm -f $M0/file
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1,2,3}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 features.shard-block-size 4MB
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+TEST build_tester $(dirname $0)/shard-fallocate.c -lgfapi -Wall -O2
+
+# On file1 confirm that when fallocate's offset + len > cur file size,
+# the new file size will increase.
+TEST touch $M0/tmp
+TEST `echo 'abcdefghijklmnopqrstuvwxyz' > $M0/tmp`
+TEST touch $M0/file1
+
+gfid_file1=$(get_gfid_string $M0/file1)
+
+TEST $(dirname $0)/shard-fallocate $H0 $V0 "0" "0" "6291456" /file1 `gluster --print-logdir`/glfs-$V0.log
+
+EXPECT '6291456' stat -c %s $M0/file1
+
+# This should ensure /.shard is created on the bricks.
+TEST stat $B0/${V0}0/.shard
+TEST stat $B0/${V0}1/.shard
+TEST stat $B0/${V0}2/.shard
+TEST stat $B0/${V0}3/.shard
+
+EXPECT "2097152" echo `find $B0 -name $gfid_file1.1 | xargs stat -c %s`
+EXPECT "1" file_all_zeroes $M0/file1
+
+
+# On file2 confirm that fallocate to already allocated region of the
+# file does not change the content of the file.
+TEST truncate -s 6M $M0/file2
+TEST dd if=$M0/tmp of=$M0/file2 bs=1 seek=3145728 count=26 conv=notrunc
+md5sum_file2=$(md5sum $M0/file2 | awk '{print $1}')
+
+TEST $(dirname $0)/shard-fallocate $H0 $V0 "0" "3145728" "26" /file2 `gluster --print-logdir`/glfs-$V0.log
+
+EXPECT '6291456' stat -c %s $M0/file2
+EXPECT "$md5sum_file2" echo `md5sum $M0/file2 | awk '{print $1}'`
+
+# On file3 confirm that fallocate to a region of the file that consists
+#of holes creates a new shard in its place, fallocates it and there is no
+#change in the file content seen by the application.
+TEST touch $M0/file3
+
+gfid_file3=$(get_gfid_string $M0/file3)
+
+TEST dd if=$M0/tmp of=$M0/file3 bs=1 seek=9437184 count=26 conv=notrunc
+TEST ! stat $B0/$V0*/.shard/$gfid_file3.1
+TEST stat $B0/$V0*/.shard/$gfid_file3.2
+md5sum_file3=$(md5sum $M0/file3 | awk '{print $1}')
+EXPECT "1048602" echo `find $B0 -name $gfid_file3.2 | xargs stat -c %s`
+
+TEST $(dirname $0)/shard-fallocate $H0 $V0 "0" "5242880" "1048576" /file3 `gluster --print-logdir`/glfs-$V0.log
+EXPECT "$md5sum_file3" echo `md5sum $M0/file3 | awk '{print $1}'`
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+rm -f $(dirname $0)/shard-fallocate
+cleanup
diff --git a/tests/bugs/snapshot/bug-1045333.t b/tests/bugs/snapshot/bug-1045333.t
new file mode 100755
index 00000000000..6c0b995b5b0
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1045333.t
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../snapshot.rc
+
+cleanup;
+TEST verify_lvm_version;
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST setup_lvm 1
+
+TEST $CLI volume create $V0 $H0:$L1
+TEST $CLI volume start $V0
+
+
+S1="${V0}-snap1" #Create snapshot with name contains hyphen(-)
+S2="-${V0}-snap2" #Create snapshot with name starts with hyphen(-)
+#Create snapshot with a long name
+S3="${V0}_single_gluster_volume_is_accessible_by_multiple_clients_offline_snapshot_is_a_long_name"
+
+TEST $CLI snapshot create $S1 $V0 no-timestamp
+TEST snapshot_exists 0 $S1
+
+TEST $CLI snapshot create $S2 $V0 no-timestamp
+TEST snapshot_exists 0 $S2
+
+TEST $CLI snapshot create $S3 $V0 no-timestamp
+TEST snapshot_exists 0 $S3
+
+
+TEST glusterfs -s $H0 --volfile-id=/snaps/$S1/$V0 $M0
+TEST glusterfs -s $H0 --volfile-id=/snaps/$S2/$V0 $M1
+TEST glusterfs -s $H0 --volfile-id=/snaps/$S3/$V0 $M2
+
+#Clean up
+#TEST $CLI snapshot delete $S1
+#TEST $CLI snapshot delete $S2
+#TEST $CLI snapshot delete $S3
+
+TEST $CLI volume stop $V0 force
+#TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/bugs/snapshot/bug-1049834.t b/tests/bugs/snapshot/bug-1049834.t
new file mode 100755
index 00000000000..29c75cc7f96
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1049834.t
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../cluster.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+
+cleanup;
+TEST verify_lvm_version
+TEST launch_cluster 2
+TEST setup_lvm 2
+
+TEST $CLI_1 peer probe $H2
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+TEST $CLI_1 volume create $V0 $H1:$L1 $H2:$L2
+EXPECT 'Created' volinfo_field $V0 'Status'
+
+TEST $CLI_1 volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+#Setting the snap-max-hard-limit to 4
+TEST $CLI_1 snapshot config $V0 snap-max-hard-limit 4
+PID_1=$!
+wait $PID_1
+
+#Creating 3 snapshots on the volume (which is the soft-limit)
+TEST create_n_snapshots $V0 3 $V0_snap
+TEST snapshot_n_exists $V0 3 $V0_snap
+
+#Creating the 4th snapshot on the volume and expecting it to be created
+# but with the deletion of the oldest snapshot i.e 1st snapshot
+TEST $CLI_1 snapshot create ${V0}_snap4 ${V0} no-timestamp
+TEST snapshot_exists 1 ${V0}_snap4
+TEST ! snapshot_exists 1 ${V0}_snap1
+TEST $CLI_1 snapshot delete ${V0}_snap4
+TEST $CLI_1 snapshot create ${V0}_snap1 ${V0} no-timestamp
+TEST snapshot_exists 1 ${V0}_snap1
+
+#Deleting the 4 snaps
+#TEST delete_n_snapshots $V0 4 $V0_snap
+#TEST ! snapshot_n_exists $V0 4 $V0_snap
+
+cleanup;
diff --git a/tests/bugs/snapshot/bug-1064768.t b/tests/bugs/snapshot/bug-1064768.t
new file mode 100644
index 00000000000..53140a0e13e
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1064768.t
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick0 $H0:$B0/brick1
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Started' volinfo_field $V0 'Status';
+
+TEST $CLI volume profile $V0 start
+TEST $CLI volume profile $V0 info
+TEST $CLI volume profile $V0 stop
+
+TEST $CLI volume status
+TEST $CLI volume stop $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Stopped' volinfo_field $V0 'Status';
+cleanup;
diff --git a/tests/bugs/snapshot/bug-1087203.t b/tests/bugs/snapshot/bug-1087203.t
new file mode 100644
index 00000000000..035be098576
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1087203.t
@@ -0,0 +1,103 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+. $(dirname $0)/../../cluster.rc
+
+function get_volume_info ()
+{
+ local var=$1
+ $CLI_1 volume info $V0 | grep "^$var" | sed 's/.*: //'
+}
+
+cleanup;
+
+TEST verify_lvm_version
+TEST launch_cluster 2
+TEST setup_lvm 2
+
+TEST $CLI_1 peer probe $H2;
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count;
+
+TEST $CLI_1 volume create $V0 $H1:$L1 $H2:$L2
+EXPECT "$V0" get_volume_info 'Volume Name';
+EXPECT 'Created' get_volume_info 'Status';
+
+TEST $CLI_1 volume start $V0
+EXPECT 'Started' get_volume_info 'Status';
+
+
+# Setting system limit
+TEST $CLI_1 snapshot config snap-max-hard-limit 100
+
+# Volume limit cannot exceed system limit, as limit is set to 100,
+# this should fail.
+TEST ! $CLI_1 snapshot config $V0 snap-max-hard-limit 101
+
+# Following are the invalid cases
+TEST ! $CLI_1 snapshot config $V0 snap-max-hard-limit a10
+TEST ! $CLI_1 snapshot config snap-max-hard-limit 10a
+TEST ! $CLI_1 snapshot config snap-max-hard-limit 10%
+TEST ! $CLI_1 snapshot config snap-max-soft-limit 50%1
+TEST ! $CLI_1 snapshot config snap-max-soft-limit 0111
+TEST ! $CLI_1 snapshot config snap-max-hard-limit OXA
+TEST ! $CLI_1 snapshot config snap-max-hard-limit 11.11
+TEST ! $CLI_1 snapshot config snap-max-soft-limit 50%
+TEST ! $CLI_1 snapshot config snap-max-hard-limit -100
+TEST ! $CLI_1 snapshot config snap-max-soft-limit -90
+
+# Soft limit cannot be assigned to volume
+TEST ! $CLI_1 snapshot config $V0 snap-max-soft-limit 10
+
+# Valid case
+TEST $CLI_1 snapshot config snap-max-soft-limit 50
+TEST $CLI_1 snapshot config $V0 snap-max-hard-limit 10
+
+# Validating auto-delete feature
+# Make sure auto-delete is disabled by default
+EXPECT 'disable' snap_config CLI_1 'auto-delete'
+
+# Test for invalid value for auto-delete
+TEST ! $CLI_1 snapshot config auto-delete test
+
+TEST $CLI_1 snapshot config snap-max-hard-limit 6
+TEST $CLI_1 snapshot config snap-max-soft-limit 50
+
+# Create 4 snapshots
+snap_index=1
+snap_count=4
+TEST snap_create CLI_1 $V0 $snap_index $snap_count
+
+# If auto-delete is disabled then oldest snapshot
+# should not be deleted automatically.
+EXPECT '4' get_snap_count CLI_1;
+
+TEST snap_delete CLI_1 $snap_index $snap_count;
+
+# After all those 4 snaps are deleted, There will not be any snaps present
+EXPECT '0' get_snap_count CLI_1;
+
+TEST $CLI_1 snapshot config auto-delete enable
+
+# auto-delete is already enabled, Hence expect a failure.
+TEST ! $CLI_1 snapshot config auto-delete on
+
+# Testing other boolean values with auto-delete
+TEST $CLI_1 snapshot config auto-delete off
+EXPECT 'off' snap_config CLI_1 'auto-delete'
+
+TEST $CLI_1 snapshot config auto-delete true
+EXPECT 'true' snap_config CLI_1 'auto-delete'
+
+# Try to create 4 snaps again, As auto-delete is enabled
+# oldest snap should be deleted and snapcount should be 3
+
+TEST snap_create CLI_1 $V0 $snap_index $snap_count;
+EXPECT '3' get_snap_count CLI_1;
+
+TEST $CLI_1 snapshot config auto-delete disable
+EXPECT 'disable' snap_config CLI_1 'auto-delete'
+
+cleanup;
+
diff --git a/tests/bugs/snapshot/bug-1090042.t b/tests/bugs/snapshot/bug-1090042.t
new file mode 100755
index 00000000000..98531a9751e
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1090042.t
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+
+cleanup;
+
+TEST init_n_bricks 3;
+TEST setup_lvm 3;
+TEST glusterd;
+
+TEST $CLI volume create $V0 replica 3 $H0:$L1 $H0:$L2 $H0:$L3;
+TEST $CLI volume start $V0;
+
+TEST kill_brick $V0 $H0 $L1;
+
+#Normal snap create should fail
+TEST ! $CLI snapshot create ${V0}_snap1 $V0 no-timestamp;
+TEST ! snapshot_exists 0 ${V0}_snap1;
+
+#With changes introduced in BZ #1184344 force snap create should fail too
+TEST ! $CLI snapshot create ${V0}_snap1 $V0 no-timestamp force;
+TEST ! snapshot_exists 0 ${V0}_snap1;
+
+cleanup;
diff --git a/tests/bugs/snapshot/bug-1109770.t b/tests/bugs/snapshot/bug-1109770.t
new file mode 100644
index 00000000000..22511995937
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1109770.t
@@ -0,0 +1,65 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+. $(dirname $0)/../../fileio.rc
+. $(dirname $0)/../../nfs.rc
+
+cleanup;
+
+TEST init_n_bricks 3;
+TEST setup_lvm 3;
+
+TEST glusterd;
+
+TEST pidof glusterd;
+
+TEST $CLI volume create $V0 $H0:$L1 $H0:$L2 $H0:$L3;
+
+TEST $CLI volume start $V0;
+
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0;
+
+for i in {1..10} ; do echo "file" > $M0/file$i ; done
+
+TEST $CLI snapshot create snap1 $V0 no-timestamp;
+
+for i in {11..20} ; do echo "file" > $M0/file$i ; done
+
+TEST $CLI snapshot create snap2 $V0 no-timestamp;
+
+mkdir $M0/dir1;
+mkdir $M0/dir2;
+
+for i in {1..10} ; do echo "foo" > $M0/dir1/foo$i ; done
+for i in {1..10} ; do echo "foo" > $M0/dir2/foo$i ; done
+
+TEST $CLI snapshot create snap3 $V0 no-timestamp;
+
+for i in {11..20} ; do echo "foo" > $M0/dir1/foo$i ; done
+for i in {11..20} ; do echo "foo" > $M0/dir2/foo$i ; done
+
+TEST $CLI snapshot create snap4 $V0 no-timestamp;
+
+TEST $CLI volume set $V0 features.uss enable;
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Y' check_if_snapd_exist
+
+TEST $CLI volume set $V0 features.uss disable;
+
+SNAPD_PID=$(ps auxww | grep snapd | grep -v grep | awk '{print $2}');
+
+TEST ! [ $SNAPD_PID -gt 0 ];
+
+TEST $CLI volume set $V0 features.uss enable;
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Y' check_if_snapd_exist
+
+TEST $CLI volume stop $V0;
+
+SNAPD_PID=$(ps auxww | grep snapd | grep -v grep | awk '{print $2}');
+
+TEST ! [ $SNAPD_PID -gt 0 ];
+
+cleanup ;
diff --git a/tests/bugs/snapshot/bug-1109889.t b/tests/bugs/snapshot/bug-1109889.t
new file mode 100644
index 00000000000..5fdc7dc9506
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1109889.t
@@ -0,0 +1,74 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+. $(dirname $0)/../../fileio.rc
+. $(dirname $0)/../../nfs.rc
+
+cleanup;
+
+TEST init_n_bricks 3;
+TEST setup_lvm 3;
+
+TEST glusterd;
+
+TEST pidof glusterd;
+
+TEST $CLI volume create $V0 $H0:$L1 $H0:$L2 $H0:$L3;
+
+TEST $CLI volume start $V0;
+
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0;
+
+MOUNT_PID=$(get_mount_process_pid $V0 $M0)
+
+for i in {1..10} ; do echo "file" > $M0/file$i ; done
+
+TEST $CLI snapshot config activate-on-create enable
+
+TEST $CLI snapshot create snap1 $V0 no-timestamp;
+
+for i in {11..20} ; do echo "file" > $M0/file$i ; done
+
+TEST $CLI snapshot create snap2 $V0 no-timestamp;
+
+mkdir $M0/dir1;
+mkdir $M0/dir2;
+
+for i in {1..10} ; do echo "foo" > $M0/dir1/foo$i ; done
+for i in {1..10} ; do echo "foo" > $M0/dir2/foo$i ; done
+
+TEST $CLI snapshot create snap3 $V0 no-timestamp;
+
+for i in {11..20} ; do echo "foo" > $M0/dir1/foo$i ; done
+for i in {11..20} ; do echo "foo" > $M0/dir2/foo$i ; done
+
+TEST $CLI snapshot create snap4 $V0 no-timestamp;
+
+TEST $CLI volume set $V0 features.uss enable;
+
+#let snapd get started properly and client connect to snapd
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" snap_client_connected_status $V0
+
+SNAPD_PID=$(ps auxww | grep snapd | grep -v grep | awk '{print $2}');
+
+TEST [ $SNAPD_PID -gt 0 ];
+
+TEST stat $M0/.snaps;
+
+kill -KILL $SNAPD_PID;
+
+# let snapd die properly
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "0" snap_client_connected_status $V0
+
+TEST ! stat $M0/.snaps;
+
+TEST $CLI volume start $V0 force;
+
+# let client get the snapd port from glusterd and connect
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" snap_client_connected_status $V0
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" STAT $M0/.snaps
+
+cleanup;
diff --git a/tests/bugs/snapshot/bug-1111041.t b/tests/bugs/snapshot/bug-1111041.t
new file mode 100755
index 00000000000..efda9688d8b
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1111041.t
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fileio.rc
+. $(dirname $0)/../../nfs.rc
+
+cleanup;
+
+function is_snapd_running {
+ $CLI volume status $1 | grep "Snapshot Daemon" | wc -l;
+}
+
+function snapd_pid {
+ $CLI volume status $V0 | grep "Snapshot Daemon" | awk {'print $8'}
+}
+
+TEST glusterd;
+
+TEST pidof glusterd;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+
+TEST $CLI volume start $V0;
+
+EXPECT "0" is_snapd_running $v0
+
+TEST $CLI volume set $V0 features.uss enable;
+
+EXPECT "1" is_snapd_running $V0
+
+SNAPD_PID=$(snapd_pid);
+
+TEST [ $SNAPD_PID -gt 0 ]
+
+kill -9 $SNAPD_PID
+
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^N/A$" snapd_pid
+
+cleanup ;
diff --git a/tests/bugs/snapshot/bug-1112613.t b/tests/bugs/snapshot/bug-1112613.t
new file mode 100644
index 00000000000..e566de056bc
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1112613.t
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+. $(dirname $0)/../../cluster.rc
+
+cleanup;
+
+V1="patchy2"
+
+TEST verify_lvm_version;
+TEST launch_cluster 2
+TEST setup_lvm 2
+
+TEST $CLI_1 peer probe $H2
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+TEST $CLI_1 volume create $V0 $H1:$L1
+TEST $CLI_1 volume start $V0
+TEST $CLI_1 volume create $V1 $H2:$L2
+TEST $CLI_1 volume start $V1
+
+# Create 3 snapshots for volume $V0
+snap_count=3
+snap_index=1
+TEST snap_create CLI_1 $V0 $snap_index $snap_count;
+
+# Create 3 snapshots for volume $V1
+snap_count=4
+snap_index=11
+TEST snap_create CLI_1 $V1 $snap_index $snap_count;
+
+EXPECT '3' get_snap_count CLI_1 $V0;
+EXPECT '4' get_snap_count CLI_1 $V1;
+EXPECT '7' get_snap_count CLI_1
+
+TEST $CLI_1 snapshot delete volume $V0
+EXPECT '0' get_snap_count CLI_1 $V0;
+EXPECT '4' get_snap_count CLI_1 $V1;
+EXPECT '4' get_snap_count CLI_1
+
+TEST $CLI_1 snapshot delete all
+EXPECT '0' get_snap_count CLI_1 $V0;
+EXPECT '0' get_snap_count CLI_1 $V1;
+EXPECT '0' get_snap_count CLI_1
+
+cleanup;
+
diff --git a/tests/bugs/snapshot/bug-1113975.t b/tests/bugs/snapshot/bug-1113975.t
new file mode 100644
index 00000000000..86c1739fb46
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1113975.t
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+
+cleanup;
+
+TEST init_n_bricks 3;
+TEST setup_lvm 3;
+
+TEST glusterd;
+
+TEST pidof glusterd;
+
+TEST $CLI volume create $V0 $H0:$L1 $H0:$L2 $H0:$L3;
+
+TEST $CLI volume start $V0;
+
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0;
+
+for i in {1..10} ; do echo "file" > $M0/file$i ; done
+
+TEST $CLI snapshot create snap1 $V0 no-timestamp;
+
+for i in {11..20} ; do echo "file" > $M0/file$i ; done
+
+TEST $CLI snapshot create snap2 $V0 no-timestamp;
+
+TEST $CLI volume stop $V0
+
+TEST $CLI snapshot restore snap1;
+
+TEST $CLI snapshot restore snap2;
+
+TEST $CLI volume start $V0
+
+cleanup ;
diff --git a/tests/bugs/snapshot/bug-1155042-dont-display-deactivated-snapshots.t b/tests/bugs/snapshot/bug-1155042-dont-display-deactivated-snapshots.t
new file mode 100644
index 00000000000..c5a285eb775
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1155042-dont-display-deactivated-snapshots.t
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+
+cleanup;
+
+TEST init_n_bricks 2
+TEST setup_lvm 2
+TEST glusterd;
+
+TEST $CLI volume create $V0 $H0:$L1 $H0:$L2
+TEST $CLI volume start $V0
+
+# enable uss and mount the volume
+TEST $CLI volume set $V0 features.uss enable
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0
+
+# create 10 snapshots and check if all are being reflected
+# in the USS world
+gluster snapshot config activate-on-create enable
+for i in {1..10}; do $CLI snapshot create snap$i $V0 no-timestamp; done
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 10 uss_count_snap_displayed $M0
+
+# snapshots should not be displayed after deactivation
+for i in {1..10}; do $CLI snapshot deactivate snap$i --mode=script; done
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 0 uss_count_snap_displayed $M0
+
+# activate all the snapshots and check if all the activated snapshots
+# are displayed again
+for i in {1..10}; do $CLI snapshot activate snap$i --mode=script; done
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 10 uss_count_snap_displayed $M0
+
+cleanup;
+
diff --git a/tests/bugs/snapshot/bug-1157991.t b/tests/bugs/snapshot/bug-1157991.t
new file mode 100755
index 00000000000..f626ef2b705
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1157991.t
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../snapshot.rc
+
+cleanup;
+TEST verify_lvm_version;
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST setup_lvm 1
+
+TEST $CLI volume create $V0 $H0:$L1
+TEST $CLI volume start $V0
+
+TEST $CLI snapshot create snap1 $V0 no-timestamp
+EXPECT 'Stopped' snapshot_status snap1;
+
+TEST $CLI snapshot config activate-on-create enable
+TEST $CLI snapshot create snap2 $V0 no-timestamp
+EXPECT 'Started' snapshot_status snap2;
+
+#Clean up
+TEST $CLI snapshot delete snap1
+TEST $CLI snapshot delete snap2
+
+TEST $CLI volume stop $V0 force
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/bugs/snapshot/bug-1162462.t b/tests/bugs/snapshot/bug-1162462.t
new file mode 100755
index 00000000000..5c2e4fe37f0
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1162462.t
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+
+cleanup;
+
+TEST init_n_bricks 3;
+TEST setup_lvm 3;
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST $CLI volume create $V0 $H0:$L1 $H0:$L2 $H0:$L3;
+TEST $CLI volume start $V0;
+TEST $CLI volume set $V0 features.uss enable;
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0;
+
+mkdir $M0/test
+echo "file1" > $M0/file1
+ln -s $M0/file1 $M0/test/file_symlink
+ls -l $M0/ > /dev/null
+ls -l $M0/test/ > /dev/null
+
+TEST $CLI snapshot create snap1 $V0 no-timestamp;
+$CLI snapshot activate snap1;
+EXPECT 'Started' snapshot_status snap1;
+
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" snap_client_connected_status $V0
+ls $M0/.snaps/snap1/test/ > /dev/null
+ls -l $M0/.snaps/snap1/test/ > /dev/null
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" snap_client_connected_status $V0
+
+TEST $CLI snapshot delete snap1;
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/bugs/snapshot/bug-1162498.t b/tests/bugs/snapshot/bug-1162498.t
new file mode 100755
index 00000000000..a97e4429ee7
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1162498.t
@@ -0,0 +1,56 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+
+cleanup;
+TEST verify_lvm_version;
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST setup_lvm 1
+
+TEST $CLI volume create $V0 $H0:$L1
+TEST $CLI volume start $V0
+
+TEST $CLI snapshot config activate-on-create enable
+TEST $CLI volume set $V0 features.uss enable
+
+TEST glusterfs -s $H0 --volfile-id=$V0 $M0
+
+TEST mkdir $M0/xyz
+
+TEST $CLI snapshot create snap1 $V0 no-timestamp
+TEST $CLI snapshot create snap2 $V0 no-timestamp
+
+TEST rmdir $M0/xyz
+
+TEST $CLI snapshot create snap3 $V0 no-timestamp
+TEST $CLI snapshot create snap4 $V0 no-timestamp
+
+TEST mkdir $M0/xyz
+TEST ls $M0/xyz/.snaps/
+
+TEST $CLI volume stop $V0
+TEST $CLI snapshot restore snap2
+TEST $CLI volume start $V0
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST glusterfs -s $H0 --volfile-id=$V0 $M0
+
+#Dir xyz exists in snap1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" STAT $M0/xyz
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "3" count_snaps $M0/xyz
+TEST mkdir $M0/abc
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "3" count_snaps $M0/abc
+
+#Clean up
+TEST $CLI snapshot delete snap1
+TEST $CLI snapshot delete snap3
+TEST $CLI snapshot delete snap4
+TEST $CLI volume stop $V0 force
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/bugs/snapshot/bug-1166197.t b/tests/bugs/snapshot/bug-1166197.t
new file mode 100755
index 00000000000..b070ae271ba
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1166197.t
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../snapshot.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+CURDIR=`pwd`
+
+TEST verify_lvm_version;
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST setup_lvm 1
+
+TEST $CLI volume create $V0 $H0:$L1
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0
+TEST $CLI snapshot config activate-on-create enable
+TEST $CLI volume set $V0 features.uss enable
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Started' volinfo_field $V0 'Status';
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+TEST mount_nfs $H0:/$V0 $N0 nolock
+TEST mkdir $N0/testdir
+
+TEST $CLI snapshot create snap1 $V0 no-timestamp
+TEST $CLI snapshot create snap2 $V0 no-timestamp
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" STAT $N0/testdir/.snaps
+
+TEST cd $N0/testdir
+TEST cd .snaps
+TEST ls
+
+TEST $CLI snapshot deactivate snap2
+TEST ls
+
+TEST cd $CURDIR
+
+#Clean up
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+TEST $CLI snapshot delete snap1
+TEST $CLI snapshot delete snap2
+TEST $CLI volume stop $V0 force
+TEST $CLI volume delete $V0
+
+cleanup;
+
diff --git a/tests/bugs/snapshot/bug-1167580-set-proper-uid-and-gid-during-nfs-access.t b/tests/bugs/snapshot/bug-1167580-set-proper-uid-and-gid-during-nfs-access.t
new file mode 100644
index 00000000000..52a7a790b97
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1167580-set-proper-uid-and-gid-during-nfs-access.t
@@ -0,0 +1,205 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../nfs.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+# This function returns a value "Y" if user can execute
+# the given command. Else it will return "N"
+# @arg-1 : Name of the user
+# @arg-2 : Path of the file
+# @arg-3 : command to be executed
+function check_if_permitted () {
+ local usr=$1
+ local path=$2
+ local cmd=$3
+ local var
+ local ret
+ var=$(su - $usr -c "$cmd $path")
+ ret=$?
+
+ if [ "$cmd" == "cat" ]
+ then
+ if [ "$var" == "Test" ]
+ then
+ echo "Y"
+ else
+ echo "N"
+ fi
+ else
+ if [ "$ret" == "0" ]
+ then
+ echo "Y"
+ else
+ echo "N"
+ fi
+ fi
+}
+
+# Create a directory in /tmp to specify which directory to make
+# as home directory for user
+home_dir=$(mktemp -d)
+chmod 777 $home_dir
+
+function get_new_user() {
+ local temp=$(uuidgen | tr -dc 'a-zA-Z' | head -c 8)
+ id $temp
+ if [ "$?" == "0" ]
+ then
+ get_new_user
+ else
+ echo $temp
+ fi
+}
+
+function create_user() {
+ local user=$1
+ local group=$2
+
+ if [ "$group" == "" ]
+ then
+ /usr/sbin/useradd -d $home_dir/$user $user
+ else
+ /usr/sbin/useradd -d $home_dir/$user -G $group $user
+ fi
+
+ return $?
+}
+
+cleanup;
+
+TEST setup_lvm 1
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$L1
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0
+
+# Mount the volume as both fuse and nfs mount
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+TEST mount_nfs $H0:/$V0 $N0 nolock
+
+# Create 2 user
+user1=$(get_new_user)
+create_user $user1
+user2=$(get_new_user)
+create_user $user2
+
+# create a file for which only user1 has access
+echo "Test" > $M0/README
+chown $user1 $M0/README
+chmod 700 $M0/README
+
+# enable uss and take a snapshot
+TEST $CLI volume set $V0 uss enable
+TEST $CLI snapshot config activate-on-create on
+TEST $CLI snapshot create snap1 $V0 no-timestamp
+
+# try to access the file using user1 account.
+# It should succeed with both normal mount and snapshot world.
+# There is time delay in which snapd might not have got the notification
+# from glusterd about snapshot create hence using "EXPECT_WITHIN"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" check_if_permitted $user1 $M0/README cat
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" check_if_permitted $user1 $N0/README cat
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" check_if_permitted $user1 $M0/.snaps/snap1/README cat
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" check_if_permitted $user1 $N0/.snaps/snap1/README cat
+
+
+# try to access the file using user2 account
+# It should fail from both normal mount and snapshot world
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "N" check_if_permitted $user2 $M0/README cat
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "N" check_if_permitted $user2 $N0/README cat
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "N" check_if_permitted $user2 $M0/.snaps/snap1/README cat
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "N" check_if_permitted $user2 $N0/.snaps/snap1/README cat
+
+# We need to test another scenario where user belonging to one group
+# tries to access files from user belonging to another group
+# instead of using the already created users and making the test case look complex
+# I thought of using two different users.
+
+# The test case written below does the following things
+# 1) Create 2 users (user{3,4}), belonging to 2 different groups (group{3,4})
+# 2) Take a snapshot "snap2"
+# 3) Create a file for which only users belonging to group3 have
+# permission to read
+# 4) Test various combinations of Read-Write, Fuse-NFS mount, User{3,4,5}
+# from both normal mount, and USS world.
+
+echo "Test" > $M0/file3
+
+chmod 740 $M0/file3
+
+group3=$(get_new_user)
+groupadd $group3
+
+group4=$(get_new_user)
+groupadd $group4
+
+user3=$(get_new_user)
+create_user $user3 $group3
+
+user4=$(get_new_user)
+create_user $user4 $group4
+
+user5=$(get_new_user)
+create_user $user5
+
+chgrp $group3 $M0/file3
+
+TEST $CLI snapshot create snap2 $V0 no-timestamp
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" check_if_permitted $user3 $M0/file3 cat
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" check_if_permitted $user3 $M0/.snaps/snap2/file3 cat
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "N" check_if_permitted $user3 $M0/file3 "echo Hello >"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "N" check_if_permitted $user3 $M0/.snaps/snap2/file3 "echo Hello >"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" check_if_permitted $user3 $N0/file3 cat
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" check_if_permitted $user3 $N0/.snaps/snap2/file3 cat
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "N" check_if_permitted $user3 $N0/file3 "echo Hello >"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "N" check_if_permitted $user3 $N0/.snaps/snap2/file3 "echo Hello >"
+
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "N" check_if_permitted $user4 $M0/file3 cat
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "N" check_if_permitted $user4 $M0/.snaps/snap2/file3 cat
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "N" check_if_permitted $user4 $M0/file3 "echo Hello >"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "N" check_if_permitted $user4 $M0/.snaps/snap2/file3 "echo Hello >"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "N" check_if_permitted $user4 $N0/file3 cat
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "N" check_if_permitted $user4 $N0/.snaps/snap2/file3 cat
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "N" check_if_permitted $user4 $N0/file3 "echo Hello >"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "N" check_if_permitted $user4 $N0/.snaps/snap2/file3 "echo Hello >"
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "N" check_if_permitted $user5 $M0/file3 cat
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "N" check_if_permitted $user5 $M0/.snaps/snap2/file3 cat
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "N" check_if_permitted $user5 $M0/file3 "echo Hello >"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "N" check_if_permitted $user5 $M0/.snaps/snap2/file3 "echo Hello >"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "N" check_if_permitted $user5 $N0/file3 cat
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "N" check_if_permitted $user5 $N0/.snaps/snap2/file3 cat
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "N" check_if_permitted $user5 $N0/file3 "echo Hello >"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "N" check_if_permitted $user5 $N0/.snaps/snap2/file3 "echo Hello >"
+
+# cleanup
+/usr/sbin/userdel -f -r $user1
+/usr/sbin/userdel -f -r $user2
+/usr/sbin/userdel -f -r $user3
+/usr/sbin/userdel -f -r $user4
+/usr/sbin/userdel -f -r $user5
+
+#cleanup all the home directory which is created as part of this test case
+if [ -d "$home_dir" ]
+then
+ rm -rf $home_dir
+fi
+
+
+groupdel $group3
+groupdel $group4
+
+TEST $CLI snapshot delete all
+
+cleanup;
+
+
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=000000
diff --git a/tests/bugs/snapshot/bug-1168875.t b/tests/bugs/snapshot/bug-1168875.t
new file mode 100644
index 00000000000..9737784fd84
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1168875.t
@@ -0,0 +1,96 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+. $(dirname $0)/../../fileio.rc
+. $(dirname $0)/../../nfs.rc
+
+cleanup;
+
+function check_entry_point_exists ()
+{
+ local entry_point=$1;
+ local _path=$2;
+
+ ls -a $_path | grep $entry_point;
+
+ if [ $? -eq 0 ]; then
+ echo 'Y';
+ else
+ echo 'N';
+ fi
+}
+
+TEST init_n_bricks 3;
+TEST setup_lvm 3;
+
+TEST glusterd;
+
+TEST pidof glusterd;
+
+TEST $CLI volume create $V0 $H0:$L1 $H0:$L2 $H0:$L3;
+
+TEST $CLI volume start $V0;
+
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 --xlator-option *-snapview-client.snapdir-entry-path=/dir $M0;
+
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $N0;
+for i in {1..10} ; do echo "file" > $M0/file$i ; done
+
+
+for i in {11..20} ; do echo "file" > $M0/file$i ; done
+
+mkdir $M0/dir;
+
+for i in {1..10} ; do echo "file" > $M0/dir/file$i ; done
+
+mkdir $M0/dir1;
+mkdir $M0/dir2;
+
+for i in {1..10} ; do echo "foo" > $M0/dir1/foo$i ; done
+for i in {1..10} ; do echo "foo" > $M0/dir2/foo$i ; done
+
+for i in {11..20} ; do echo "foo" > $M0/dir1/foo$i ; done
+for i in {11..20} ; do echo "foo" > $M0/dir2/foo$i ; done
+
+TEST $CLI snapshot create snap1 $V0 no-timestamp;
+TEST $CLI snapshot activate snap1;
+
+TEST $CLI volume set $V0 features.uss enable;
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Y' check_if_snapd_exist
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'N' check_entry_point_exists .snaps $M0/dir
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'N' check_entry_point_exists .snaps $N0/dir
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'N' check_entry_point_exists .snaps $M0/dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'N' check_entry_point_exists .snaps $N0/dir1
+
+TEST $CLI volume set $V0 features.show-snapshot-directory enable;
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" STAT $M0/dir
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" STAT $N0/dir
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" STAT $M0/dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" STAT $N0/dir1
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Y' check_entry_point_exists ".snaps" $M0/dir
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'N' check_entry_point_exists ".snaps" $N0/dir
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'N' check_entry_point_exists ".snaps" $M0/dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'N' check_entry_point_exists ".snaps" $N0/dir1
+
+TEST $CLI volume set $V0 features.show-snapshot-directory disable;
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" STAT $M0/dir
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" STAT $N0/dir
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" STAT $M0/dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" STAT $N0/dir1
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'N' check_entry_point_exists ".snaps" $M0/dir
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'N' check_entry_point_exists ".snaps" $N0/dir
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'N' check_entry_point_exists ".snaps" $M0/dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'N' check_entry_point_exists ".snaps" $N0/dir1
+
+cleanup;
diff --git a/tests/bugs/snapshot/bug-1178079.t b/tests/bugs/snapshot/bug-1178079.t
new file mode 100644
index 00000000000..a1a6b0b9d49
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1178079.t
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+#Create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1..2};
+TEST $CLI volume start $V0;
+
+TEST $CLI volume set $V0 features.uss on;
+
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+
+TEST touch $M0/file;
+
+TEST getfattr -d -m . -e hex $M0/file;
+
+cleanup;
diff --git a/tests/bugs/snapshot/bug-1202436-calculate-quota-cksum-during-snap-restore.t b/tests/bugs/snapshot/bug-1202436-calculate-quota-cksum-during-snap-restore.t
new file mode 100644
index 00000000000..addc05917d8
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1202436-calculate-quota-cksum-during-snap-restore.t
@@ -0,0 +1,37 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../cluster.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+
+cleanup;
+
+TEST verify_lvm_version
+TEST launch_cluster 2
+TEST setup_lvm 1
+
+TEST $CLI_1 volume create $V0 $H1:$L1
+EXPECT 'Created' volinfo_field $V0 'Status'
+
+TEST $CLI_1 volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+# Quota is not working with cluster test framework
+# Need to check why, Until then commenting out this
+#TEST $CLI_1 volume quota $V0 enable
+#EXPECT 'on' volinfo_field $V0 'features.quota'
+
+TEST $CLI_1 snapshot create ${V0}_snap $V0
+EXPECT '1' get_snap_count CLI_1 $V0
+
+TEST $CLI_1 volume stop $V0
+EXPECT 'Stopped' volinfo_field $V0 'Status'
+
+TEST $CLI_1 snapshot restore $($CLI_1 snapshot list)
+EXPECT '0' get_snap_count CLI_1 $V0
+
+TEST $CLI_1 peer probe $H2
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+cleanup;
diff --git a/tests/bugs/snapshot/bug-1205592.t b/tests/bugs/snapshot/bug-1205592.t
new file mode 100644
index 00000000000..f7d99345c8d
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1205592.t
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../cluster.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+
+cleanup;
+TEST verify_lvm_version
+TEST launch_cluster 3
+TEST setup_lvm 3
+
+TEST $CLI_1 peer probe $H2
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+TEST $CLI_1 peer probe $H3
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count
+
+TEST $CLI_1 volume create $V0 $H1:$L1 $H2:$L2 $H3:$L3
+EXPECT 'Created' volinfo_field $V0 'Status'
+
+TEST $CLI_1 volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+
+kill_glusterd 3
+# If glusterd-quorum is not met then snapshot-create should fail
+TEST ! $CLI_1 snapshot create ${V0}_snap1 ${V0} no-timestamp
+
+cleanup;
diff --git a/tests/bugs/snapshot/bug-1227646.t b/tests/bugs/snapshot/bug-1227646.t
new file mode 100644
index 00000000000..9b73dfdb32f
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1227646.t
@@ -0,0 +1,31 @@
+#!/bin/bash
+
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+TEST init_n_bricks 3;
+TEST setup_lvm 3;
+
+TEST glusterd;
+TEST pidof glusterd;
+
+#TEST $CLI volume create $V0 $H0:$L1 $H0:$L2 $H0:$L3;
+TEST $CLI volume create $V0 $H0:$L2 $H0:$L3;
+TEST $CLI volume start $V0;
+
+TEST $CLI snapshot create snap1 $V0 no-timestamp;
+TEST $CLI volume stop $V0
+TEST $CLI snapshot restore snap1;
+TEST $CLI volume start $V0
+
+TEST pkill gluster
+TEST glusterd
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Started' volinfo_field $V0 'Status'
+
+TEST $CLI volume stop $V0
+
+cleanup ;
+
diff --git a/tests/bugs/snapshot/bug-1232430.t b/tests/bugs/snapshot/bug-1232430.t
new file mode 100755
index 00000000000..50411b1dbfc
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1232430.t
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../snapshot.rc
+
+cleanup;
+TEST verify_lvm_version;
+TEST glusterd -LDEBUG;
+TEST pidof glusterd;
+
+TEST setup_lvm 1
+
+TEST $CLI volume create $V0 $H0:$L1/brick_dir
+TEST $CLI volume start $V0
+
+TEST $CLI snapshot create snap1 $V0 no-timestamp
+
+TEST $CLI snapshot delete snap1
+
+TEST $CLI volume stop $V0 force
+TEST $CLI volume delete $V0
+cleanup
diff --git a/tests/bugs/snapshot/bug-1250387.t b/tests/bugs/snapshot/bug-1250387.t
new file mode 100755
index 00000000000..c9039e37f73
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1250387.t
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+
+cleanup;
+
+TEST init_n_bricks 1;
+TEST setup_lvm 1;
+
+TEST glusterd;
+
+TEST pidof glusterd;
+
+TEST $CLI volume create $V0 $H0:$L1;
+
+TEST $CLI volume start $V0;
+
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0;
+
+TEST $CLI snapshot create snap1 $V0 no-timestamp;
+
+EXPECT "description" get-cmd-field-xml "snapshot info snap1" "description"
+
+cleanup ;
diff --git a/tests/bugs/snapshot/bug-1260848.t b/tests/bugs/snapshot/bug-1260848.t
new file mode 100644
index 00000000000..6455d8297b2
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1260848.t
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../nfs.rc
+. $(dirname $0)/../../volume.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume set $V0 uss on
+TEST $CLI volume start $V0
+
+## Wait for volume to register with rpc.mountd
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+
+## Mount NFS
+TEST mount_nfs $H0:/$V0 $N0 nolock;
+
+TEST df -h $N0
+
+cleanup;
diff --git a/tests/bugs/snapshot/bug-1275616.t b/tests/bugs/snapshot/bug-1275616.t
new file mode 100755
index 00000000000..dcaeae30f90
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1275616.t
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../snapshot.rc
+
+cleanup;
+TEST verify_lvm_version;
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST setup_lvm 1
+
+TEST $CLI volume create $V0 $H0:$L1
+TEST $CLI volume start $V0
+TEST $CLI snapshot config activate-on-create enable
+
+TEST $CLI snapshot config $V0 snap-max-hard-limit 100
+TEST $CLI snapshot create snap1 $V0 no-timestamp
+
+TEST $CLI snapshot config $V0 snap-max-hard-limit 150
+TEST $CLI snapshot create snap2 $V0 no-timestamp
+
+TEST $CLI snapshot config $V0 snap-max-hard-limit 200
+TEST $CLI snapshot create snap3 $V0 no-timestamp
+EXPECT '197' snap_info_volume CLI "Snaps Available" $V0;
+
+TEST $CLI volume stop $V0
+
+# Restore the snapshots and verify the snap-max-hard-limit
+# and the Snaps Available
+TEST $CLI snapshot restore snap1
+EXPECT '98' snap_info_volume CLI "Snaps Available" $V0;
+EXPECT '100' snap_config_volume CLI 'snap-max-hard-limit' $V0
+
+TEST $CLI snapshot restore snap2
+EXPECT '149' snap_info_volume CLI "Snaps Available" $V0;
+EXPECT '150' snap_config_volume CLI 'snap-max-hard-limit' $V0
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Yes" get_snap_brick_status snap3
+
+#Take a clone and verify it inherits snapshot's snap-max-hard-limit
+TEST $CLI snapshot clone clone1 snap3
+
+EXPECT '149' snap_info_volume CLI "Snaps Available" $V0;
+EXPECT '150' snap_config_volume CLI 'snap-max-hard-limit' $V0
+
+EXPECT '200' snap_info_volume CLI "Snaps Available" clone1
+EXPECT '200' snap_config_volume CLI 'snap-max-hard-limit' clone1
+
+cleanup;
diff --git a/tests/bugs/snapshot/bug-1279327.t b/tests/bugs/snapshot/bug-1279327.t
new file mode 100644
index 00000000000..4e4be6eeea6
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1279327.t
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../snapshot.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+TEST verify_lvm_version;
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST init_n_bricks 3
+TEST setup_lvm 3
+
+TEST $CLI volume create $V0 $H0:$L1
+TEST $CLI volume start $V0
+TEST $CLI volume quota $V0 enable
+
+TEST $CLI snapshot create snap1 $V0 no-timestamp
+TEST $CLI snapshot activate snap1
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Yes" get_snap_brick_status snap1
+
+#Take a clone and verify it inherits snapshot's snap-max-hard-limit
+TEST $CLI snapshot clone clone1 snap1
+TEST $CLI volume start clone1
+EXPECT 'Started' volinfo_field clone1 'Status';
+
+cleanup;
diff --git a/tests/bugs/snapshot/bug-1316437.t b/tests/bugs/snapshot/bug-1316437.t
new file mode 100644
index 00000000000..300c03c97f5
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1316437.t
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+
+cleanup;
+
+TEST glusterd
+
+# Intentionally not carving lvms for this as we will not be taking
+# snapshots in this testcase
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6};
+
+TEST $CLI volume start $V0;
+
+TEST $CLI volume set $V0 features.uss enable;
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Y' check_if_snapd_exist
+
+killall glusterd glusterfsd glusterfs
+
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT 'N' check_if_snapd_exist
+
+glusterd
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Y' check_if_snapd_exist
+
+cleanup;
diff --git a/tests/bugs/snapshot/bug-1322772-real-path-fix-for-snapshot.t b/tests/bugs/snapshot/bug-1322772-real-path-fix-for-snapshot.t
new file mode 100644
index 00000000000..488bd462a01
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1322772-real-path-fix-for-snapshot.t
@@ -0,0 +1,56 @@
+#!/bin/bash
+
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+. $(dirname $0)/../../include.rc
+cleanup;
+
+TESTS_EXPECTED_IN_LOOP=2
+
+TEST verify_lvm_version
+TEST init_n_bricks 2
+TEST setup_lvm 2
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$L1
+EXPECT 'Created' volinfo_field $V0 'Status'
+
+TEST $CLI volume create $V1 $H0:$L2
+EXPECT 'Created' volinfo_field $V1 'Status'
+
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+TEST $CLI volume start $V1
+EXPECT 'Started' volinfo_field $V1 'Status'
+
+TEST $CLI snapshot config activate-on-create enable
+TEST $CLI snapshot create ${V0}_snap $V0 no-timestamp
+TEST $CLI snapshot create ${V1}_snap $V1 no-timestamp
+
+# Simulate a node reboot by unmounting the brick, snap_brick and followed by
+# deleting the brick. Now once glusterd restarts, it should be able to construct
+# and remount the snap brick
+snap_bricks=`gluster snap status | grep "Brick Path" | awk -F ":" '{print $3}'`
+
+TEST $CLI volume stop $V1
+TEST $CLI snapshot restore ${V1}_snap;
+
+pkill gluster
+for snap_brick in $snap_bricks
+do
+ echo "Unmounting snap brick" $snap_brick
+ EXPECT_WITHIN_TEST_IN_LOOP $UMOUNT_TIMEOUT "Y" force_umount $snap_brick
+done
+
+rm -rf $snap_brick
+
+TEST glusterd
+TEST pidof glusterd
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $L1
+
+cleanup
+
diff --git a/tests/bugs/snapshot/bug-1399598-uss-with-ssl.t b/tests/bugs/snapshot/bug-1399598-uss-with-ssl.t
new file mode 100755
index 00000000000..f4e4e6ec4d2
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1399598-uss-with-ssl.t
@@ -0,0 +1,108 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../traps.rc
+. $(dirname $0)/../../snapshot.rc
+. $(dirname $0)/../../ssl.rc
+
+function file_exists
+{
+ if [ -f $1 ]; then echo "Y"; else echo "N"; fi
+}
+
+function volume_online_brick_count
+{
+ $CLI volume status $V0 | awk '$1 == "Brick" && $6 != "N/A" { print $6}' | wc -l;
+}
+
+function total_online_bricks
+{
+ # This will count snapd, which isn't really a brick, but callers can
+ # account for that so it's OK.
+ find $GLUSTERD_PIDFILEDIR -name '*.pid' | wc -l
+}
+
+cleanup;
+
+# Initialize the test setup
+TEST setup_lvm 1;
+
+TEST create_self_signed_certs
+
+# Start glusterd
+TEST glusterd
+TEST pidof glusterd;
+#EST $CLI volume set all cluster.brick-multiplex on
+
+# Create and start the volume
+TEST $CLI volume create $V0 $H0:$L1/b1;
+
+TEST $CLI volume start $V0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" volume_online_brick_count
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" total_online_bricks
+
+# Mount the volume and create some files
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0;
+
+TEST touch $M0/file;
+
+# Enable activate-on-create
+TEST $CLI snapshot config activate-on-create enable;
+
+# Create a snapshot
+TEST $CLI snapshot create snap1 $V0 no-timestamp;
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" total_online_bricks
+
+TEST $CLI volume set $V0 features.uss enable;
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "3" total_online_bricks
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Y' check_if_snapd_exist
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" file_exists $M0/file
+# Volume set can trigger graph switch therefore chances are we send this
+# req to old graph. Old graph will not have .snaps. Therefore we should
+# wait for some time.
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" file_exists $M0/.snaps/snap1/file
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+# Enable management encryption
+touch $GLUSTERD_WORKDIR/secure-access
+killall_gluster
+
+TEST glusterd
+TEST pidof glusterd;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" volume_online_brick_count
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "3" total_online_bricks
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Y' check_if_snapd_exist
+
+# Mount the volume
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0;
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" file_exists $M0/file
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" file_exists $M0/.snaps/snap1/file
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+# Enable I/O encryption
+TEST $CLI volume set $V0 client.ssl on
+TEST $CLI volume set $V0 server.ssl on
+
+killall_gluster
+
+TEST glusterd
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" volume_online_brick_count
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "3" total_online_bricks
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Y' check_if_snapd_exist
+
+# Mount the volume
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0;
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" file_exists $M0/file
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" file_exists $M0/.snaps/snap1/file
+
+TEST $CLI snapshot delete all
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/bugs/snapshot/bug-1482023-snpashot-issue-with-other-processes-accessing-mounted-path.t b/tests/bugs/snapshot/bug-1482023-snpashot-issue-with-other-processes-accessing-mounted-path.t
new file mode 100644
index 00000000000..04a85db0c1a
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1482023-snpashot-issue-with-other-processes-accessing-mounted-path.t
@@ -0,0 +1,133 @@
+#!/bin/bash
+
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../cluster.rc
+
+function create_snapshots() {
+ $CLI_1 snapshot create ${V0}_snap ${V0} no-timestamp &
+ PID_1=$!
+
+ $CLI_1 snapshot create ${V1}_snap ${V1} no-timestamp &
+ PID_2=$!
+
+ wait $PID_1 $PID_2
+}
+
+function activate_snapshots() {
+ $CLI_1 snapshot activate ${V0}_snap &
+ PID_1=$!
+
+ $CLI_1 snapshot activate ${V1}_snap &
+ PID_2=$!
+
+ wait $PID_1 $PID_2
+}
+
+function deactivate_snapshots() {
+ $CLI_1 snapshot deactivate ${V0}_snap &
+ PID_1=$!
+
+ $CLI_1 snapshot deactivate ${V1}_snap &
+ PID_2=$!
+
+ wait $PID_1 $PID_2
+}
+cleanup;
+
+TEST verify_lvm_version;
+# Create cluster with 3 nodes
+TEST launch_cluster 3;
+TEST setup_lvm 3
+
+TEST $CLI_1 peer probe $H2;
+TEST $CLI_1 peer probe $H3;
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count;
+
+# Create volumes
+TEST $CLI_1 volume create $V0 $H1:$L1
+TEST $CLI_2 volume create $V1 $H2:$L2 $H3:$L3
+
+# Start volumes
+TEST $CLI_1 volume start $V0
+TEST $CLI_2 volume start $V1
+
+TEST $CLI_1 snapshot config activate-on-create enable
+
+# Snapshot Operations
+create_snapshots
+
+EXPECT 'Started' snapshot_status ${V0}_snap;
+EXPECT 'Started' snapshot_status ${V1}_snap;
+
+deactivate_snapshots
+
+EXPECT 'Stopped' snapshot_status ${V0}_snap;
+EXPECT 'Stopped' snapshot_status ${V1}_snap;
+
+activate_snapshots
+
+EXPECT 'Started' snapshot_status ${V0}_snap;
+EXPECT 'Started' snapshot_status ${V1}_snap;
+
+# This Function will get snap id form snap info command and will
+# check for mount point in system against snap id.
+function mounted_snaps
+{
+ snap_id=`$CLI_1 snap info $1_snap | grep "Snap Volume Name" |
+ awk -F ":" '{print $2}'`
+ echo `mount | grep $snap_id | wc -l`
+}
+
+EXPECT "1" mounted_snaps ${V0}
+EXPECT "2" mounted_snaps ${V1}
+
+deactivate_snapshots
+
+EXPECT "0" mounted_snaps ${V0}
+EXPECT "0" mounted_snaps ${V1}
+
+# This part of test is designed to validate that updates are properly being
+# handled during handshake.
+
+activate_snapshots
+
+EXPECT 'Started' snapshot_status ${V0}_snap;
+EXPECT 'Started' snapshot_status ${V1}_snap;
+
+kill_glusterd 2
+
+deactivate_snapshots
+EXPECT 'Stopped' snapshot_status ${V0}_snap;
+EXPECT 'Stopped' snapshot_status ${V1}_snap;
+
+TEST start_glusterd 2
+
+# Updates form friend should reflect as snap was deactivated while glusterd
+# process was inactive and mount point should also not exist.
+
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count;
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" mounted_snaps ${V0}
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" mounted_snaps ${V1}
+
+# It might be possible that the import snap synctask is still updating the data,
+# we need to allow a buffer time to be on the safer side
+sleep 2
+
+kill_glusterd 2
+activate_snapshots
+EXPECT 'Started' snapshot_status ${V0}_snap;
+EXPECT 'Started' snapshot_status ${V1}_snap;
+TEST start_glusterd 2
+
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count;
+
+# Updates form friend should reflect as snap was activated while glusterd
+# process was inactive and mount point should exist.
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" mounted_snaps ${V0}
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" mounted_snaps ${V1}
+
+cleanup;
+# run first!
+#G_TESTDEF_TEST_STATUS_CENTOS6=BRICK_MUX_BAD_TEST,BUG=1743069
diff --git a/tests/bugs/snapshot/bug-1512451-snapshot-creation-failed-after-brick-reset.t b/tests/bugs/snapshot/bug-1512451-snapshot-creation-failed-after-brick-reset.t
new file mode 100644
index 00000000000..53b274e8819
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1512451-snapshot-creation-failed-after-brick-reset.t
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../cluster.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+
+cleanup;
+TEST verify_lvm_version
+TEST launch_cluster 2
+TEST setup_lvm 2
+
+TEST $CLI_1 peer probe $H2
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+TEST $CLI_1 volume create $V0 $H1:$L1/B1 $H2:$L2/B1
+EXPECT 'Created' volinfo_field $V0 'Status'
+
+TEST $CLI_1 volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+TEST $CLI_1 snapshot create ${V0}_snap1 ${V0} no-timestamp
+TEST snapshot_exists 1 ${V0}_snap1
+
+TEST $CLI_1 snapshot delete ${V0}_snap1
+TEST ! snapshot_exists 1 ${V0}_snap1
+
+TEST $CLI_1 volume reset-brick $V0 $H1:$L1/B1 start
+TEST $CLI_1 volume reset-brick $V0 $H1:$L1/B1 $H1:$L1/B1 commit force
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H1 $L1/B1
+
+TEST $CLI_1 snapshot create ${V0}_snap1 ${V0} no-timestamp
+TEST snapshot_exists 1 ${V0}_snap1
+
+TEST $CLI_1 snapshot delete ${V0}_snap1
+TEST ! snapshot_exists 1 ${V0}_snap1
+
+cleanup;
diff --git a/tests/bugs/snapshot/bug-1597662.t b/tests/bugs/snapshot/bug-1597662.t
new file mode 100644
index 00000000000..f582930476a
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1597662.t
@@ -0,0 +1,58 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+
+cleanup;
+
+TEST init_n_bricks 3;
+TEST setup_lvm 3;
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST $CLI volume create $V0 $H0:$L1 $H0:$L2 $H0:$L3;
+TEST $CLI volume start $V0;
+
+snap_path=/var/run/gluster/snaps
+
+TEST $CLI snapshot create snap1 $V0 no-timestamp;
+
+$CLI snapshot activate snap1;
+
+EXPECT 'Started' snapshot_status snap1;
+
+# This Function will check for entry /var/run/gluster/snaps/<snap-name>
+# against snap-name
+
+function is_snap_path
+{
+ echo `ls $snap_path | grep snap1 | wc -l`
+}
+
+# snap is active so snap_path should exist
+EXPECT "1" is_snap_path
+
+$CLI snapshot deactivate snap1;
+EXPECT_WITHIN ${PROCESS_DOWN_TIMEOUT} 'Stopped' snapshot_status snap1
+# snap is deactivated so snap_path should not exist
+EXPECT "0" is_snap_path
+
+# activate snap again
+$CLI snapshot activate snap1;
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} 'Started' snapshot_status snap1
+
+# snap is active so snap_path should exist
+EXPECT "1" is_snap_path
+
+# delete snap now
+TEST $CLI snapshot delete snap1;
+
+# snap is deleted so snap_path should not exist
+EXPECT "0" is_snap_path
+
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+
+cleanup;
+
diff --git a/tests/bugs/snapshot/bug-1618004-fix-memory-corruption-in-snap-import.t b/tests/bugs/snapshot/bug-1618004-fix-memory-corruption-in-snap-import.t
new file mode 100644
index 00000000000..a2c004e435e
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1618004-fix-memory-corruption-in-snap-import.t
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+. $(dirname $0)/../../cluster.rc
+
+function get_volume_info ()
+{
+ local var=$1
+ $CLI_1 volume info $V0 | grep "^$var" | sed 's/.*: //'
+}
+
+cleanup;
+
+TEST verify_lvm_version
+TEST launch_cluster 2
+TEST setup_lvm 2
+
+TEST $CLI_1 peer probe $H2;
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count;
+
+TEST $CLI_1 volume create $V0 $H1:$L1 $H2:$L2
+EXPECT "$V0" get_volume_info 'Volume Name';
+EXPECT 'Created' get_volume_info 'Status';
+
+TEST $CLI_1 volume start $V0
+EXPECT 'Started' get_volume_info 'Status';
+
+
+# Setting system limit
+TEST $CLI_1 snapshot config activate-on-create enable
+
+TEST $CLI_1 snapshot create snap1 $V0 no-timestamp description "test"
+TEST kill_glusterd 1
+#deactivate snapshot for changing snap version, so that handshake will
+#happen when glusterd is restarted
+TEST $CLI_2 snapshot deactivate snap1
+TEST start_glusterd 1
+
+#Wait till handshake complete
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} 'Stopped' snapshot_status snap1
+
+#Delete the snapshot, without this fix, delete will lead to assertion failure
+$CLI_1 snapshot delete all
+EXPECT '0' get_snap_count CLI_1;
+cleanup;
+
diff --git a/tests/bugs/stripe/bug-1002207.t b/tests/bugs/stripe/bug-1002207.t
new file mode 100644
index 00000000000..c58a6e20ab8
--- /dev/null
+++ b/tests/bugs/stripe/bug-1002207.t
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume create $V0 stripe 2 $H0:$B0/${V0}{1,2,3,4,5,6,7,8};
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST glusterfs --attribute-timeout=0 --entry-timeout=0 -s $H0 --volfile-id=$V0 $M0;
+TEST dd if=/dev/zero of=$M0/file$i.data bs=1024 count=1024;
+
+function xattr_query_check()
+{
+ local path=$1
+ local xa_name=$2
+
+ local ret=$(getfattr -n $xa_name $path 2>&1 | grep -o "$xa_name: No such attribute" | wc -l)
+ echo $ret
+}
+
+function set_xattr()
+{
+ local path=$1
+ local xa_name=$2
+ local xa_val=$3
+
+ setfattr -n $xa_name -v $xa_val $path
+ echo $?
+}
+
+EXPECT 0 set_xattr $M0/file$i.data "trusted.name" "testofafairlylongxattrstringthatbutnotlongenoughtofailmemoryallocation"
+EXPECT 0 xattr_query_check $M0/file$i.data "trusted.name"
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=000000
diff --git a/tests/bugs/stripe/bug-1111454.t b/tests/bugs/stripe/bug-1111454.t
new file mode 100644
index 00000000000..1509dd7b1a2
--- /dev/null
+++ b/tests/bugs/stripe/bug-1111454.t
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+#symlink resolution should succeed
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 stripe 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+TEST mkdir $M0/dir
+TEST touch $M0/dir/file
+TEST ln -s file $M0/dir/symlinkfile
+TEST ls -lR $M0
+cleanup
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=000000
diff --git a/tests/bugs/trace/bug-797171.t b/tests/bugs/trace/bug-797171.t
new file mode 100755
index 00000000000..b823e477229
--- /dev/null
+++ b/tests/bugs/trace/bug-797171.t
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+TEST $CLI volume set $V0 debug.trace marker;
+TEST $CLI volume set $V0 debug.log-history on
+
+TEST $CLI volume start $V0;
+
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 \
+$M0;
+
+touch $M0/{1..22};
+rm -f $M0/*;
+
+pid_file=$(ls $GLUSTERD_PIDFILEDIR/vols/$V0/);
+brick_pid=$(cat $GLUSTERD_PIDFILEDIR/vols/$V0/$pid_file);
+
+mkdir $statedumpdir/statedump_tmp/;
+echo "path=$statedumpdir/statedump_tmp" > $statedumpdir/glusterdump.options;
+echo "all=yes" >> $statedumpdir/glusterdump.options;
+
+TEST $CLI volume statedump $V0 history;
+
+file_name=$(ls $statedumpdir/statedump_tmp);
+TEST grep "xlator.debug.trace.history" $statedumpdir/statedump_tmp/$file_name;
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+rm -rf $statedumpdir/statedump_tmp;
+rm -f $statedumpdir/glusterdump.options;
+
+cleanup;
diff --git a/tests/bugs/transport/bug-873367.t b/tests/bugs/transport/bug-873367.t
new file mode 100755
index 00000000000..8070bc1b83c
--- /dev/null
+++ b/tests/bugs/transport/bug-873367.t
@@ -0,0 +1,45 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+SSL_BASE=/etc/ssl
+SSL_KEY=$SSL_BASE/glusterfs.key
+SSL_CERT=$SSL_BASE/glusterfs.pem
+SSL_CA=$SSL_BASE/glusterfs.ca
+
+cleanup;
+rm -f $SSL_BASE/glusterfs.*
+mkdir -p $B0/1
+mkdir -p $M0
+
+TEST openssl genrsa -out $SSL_KEY 2048
+TEST openssl req -new -x509 -key $SSL_KEY -subj /CN=Anyone -out $SSL_CERT
+ln $SSL_CERT $SSL_CA
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/1
+TEST $CLI volume set $V0 server.ssl on
+TEST $CLI volume set $V0 client.ssl on
+TEST $CLI volume set $V0 ssl.certificate-depth 6
+TEST $CLI volume set $V0 ssl.cipher-list HIGH
+TEST $CLI volume set $V0 auth.ssl-allow Anyone
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+echo some_data > $M0/data_file
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+# If the bug is not fixed, the next mount will fail.
+
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+EXPECT some_data cat $M0/data_file
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/bugs/unclassified/bug-1034085.t b/tests/bugs/unclassified/bug-1034085.t
new file mode 100644
index 00000000000..aacaa24d642
--- /dev/null
+++ b/tests/bugs/unclassified/bug-1034085.t
@@ -0,0 +1,31 @@
+#!/bin/bash
+#Test case: Check the creation of indices/xattrop dir as soon as brick comes up.
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+#Create a volume
+TEST glusterd;
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}-{0,1};
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST mkdir -p $B0/${V0}-0/.glusterfs/indices/
+TEST touch $B0/${V0}-0/.glusterfs/indices/xattrop
+
+#Volume start should not work when xattrop dir not created
+TEST ! $CLI volume start $V0;
+
+TEST rm $B0/${V0}-0/.glusterfs/indices/xattrop
+
+#Volume start should work now
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+#Check for the existence of indices/xattrop dir
+TEST [ -d $B0/${V0}-0/.glusterfs/indices/xattrop/ ];
+
+cleanup;
diff --git a/tests/bugs/unclassified/bug-1357397.t b/tests/bugs/unclassified/bug-1357397.t
new file mode 100644
index 00000000000..e2ec6f4d253
--- /dev/null
+++ b/tests/bugs/unclassified/bug-1357397.t
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2}
+
+TEST $CLI volume start $V0
+
+TEST $CLI volume set $V0 features.trash on
+TEST $CLI volume set $V0 features.trash-internal-op on
+
+TEST [ -e $B0/${V0}1/.trashcan ]
+
+TEST [ -e $B0/${V0}1/.trashcan/internal_op ]
+
+TEST $CLI volume stop $V0
+
+rm -rf $B0/${V0}1/.trashcan/internal_op
+
+TEST [ ! -e $B0/${V0}1/.trashcan/internal_op ]
+
+TEST $CLI volume start $V0 force
+
+TEST [ -e $B0/${V0}1/.trashcan/internal_op ]
+
+cleanup
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=1385758
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=1385758
diff --git a/tests/bugs/unclassified/bug-874498.t b/tests/bugs/unclassified/bug-874498.t
new file mode 100644
index 00000000000..2aa9b168a8a
--- /dev/null
+++ b/tests/bugs/unclassified/bug-874498.t
@@ -0,0 +1,64 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick1 $H0:$B0/brick2;
+TEST $CLI volume start $V0;
+
+
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0;
+B0_hiphenated=`echo $B0 | tr '/' '-'`
+kill_brick $V0 $H0 $B0/brick1
+
+echo "GLUSTER FILE SYSTEM" > $M0/FILE1
+echo "GLUSTER FILE SYSTEM" > $M0/FILE2
+
+FILEN=$B0"/brick2"
+XATTROP=$FILEN/.glusterfs/indices/xattrop
+
+function get_gfid()
+{
+path_of_file=$1
+
+gfid_value=`getfattr -d -m . $path_of_file -e hex 2>/dev/null | grep trusted.gfid | grep -v gfid2path | cut --complement -c -15 | sed 's/\([a-f0-9]\{8\}\)\([a-f0-9]\{4\}\)\([a-f0-9]\{4\}\)\([a-f0-9]\{4\}\)/\1-\2-\3-\4-/'`
+
+echo $gfid_value
+}
+
+GFID_ROOT=`get_gfid $B0/brick2`
+GFID_FILE1=`get_gfid $B0/brick2/FILE1`
+GFID_FILE2=`get_gfid $B0/brick2/FILE2`
+
+
+count=0
+for i in `ls $XATTROP`
+do
+ if [ "$i" == "$GFID_ROOT" ] || [ "$i" == "$GFID_FILE1" ] || [ "$i" == "$GFID_FILE2" ]
+ then
+ count=$(( count + 1 ))
+ fi
+done
+
+EXPECT "3" echo $count
+
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+
+
+##Expected number of entries are 0 in the .glusterfs/indices/xattrop directory
+EXPECT_WITHIN $HEAL_TIMEOUT '0' count_sh_entries $FILEN;
+
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/bugs/unclassified/bug-991622.t b/tests/bugs/unclassified/bug-991622.t
new file mode 100644
index 00000000000..17b37a7767d
--- /dev/null
+++ b/tests/bugs/unclassified/bug-991622.t
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fileio.rc
+
+#This tests that no fd leaks are observed in unlink/rename in open-behind
+function leaked_fds {
+ ls -l /proc/$(get_brick_pid $V0 $H0 $B0/$V0)/fd | grep deleted
+}
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0
+TEST $CLI volume set $V0 open-behind on
+TEST $CLI volume start $V0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=$V0 $M0 --direct-io-mode=enable
+
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' "$M0/testfile1"
+TEST fd_write $fd1 "content"
+
+TEST fd2=`fd_available`
+TEST fd_open $fd2 'w' "$M0/testfile2"
+TEST fd_write $fd2 "content"
+
+TEST touch $M0/a
+TEST rm $M0/testfile1
+TEST mv $M0/a $M0/testfile2
+TEST fd_close $fd1
+TEST fd_close $fd2
+TEST ! leaked_fds
+cleanup;
diff --git a/tests/bugs/upcall/bug-1227204.t b/tests/bugs/upcall/bug-1227204.t
new file mode 100755
index 00000000000..fc393b1837f
--- /dev/null
+++ b/tests/bugs/upcall/bug-1227204.t
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+# This regression test tries to ensure that quota limit-usage set work with
+# features.cache-invalidation on.
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4,5,6};
+TEST $CLI volume start $V0;
+
+TEST $CLI volume set $V0 features.cache-invalidation on;
+TEST $CLI volume quota $V0 enable;
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+TEST mkdir -p $M0/1/2;
+TEST $CLI volume quota $V0 limit-usage /1/2 100MB 70%;
+
+TEST $CLI volume status $V0
+TEST $CLI volume stop $V0
+
+cleanup;
diff --git a/tests/bugs/upcall/bug-1369430.t b/tests/bugs/upcall/bug-1369430.t
new file mode 100755
index 00000000000..f53c17a1495
--- /dev/null
+++ b/tests/bugs/upcall/bug-1369430.t
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+## 1. Start glusterd
+TEST glusterd;
+
+## 2. Lets create volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2,3};
+
+## 3. Start the volume
+TEST $CLI volume start $V0
+
+## 4. Enable the upcall xlator, and increase the md-cache timeout to max
+TEST $CLI volume set $V0 features.cache-invalidation on
+TEST $CLI volume set $V0 features.cache-invalidation-timeout 600
+TEST $CLI volume set $V0 performance.cache-invalidation on
+TEST $CLI volume set $V0 performance.md-cache-timeout 600
+TEST $CLI volume set $V0 performance.cache-samba-metadata on
+
+## 8. Create two gluster mounts
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M1
+
+## 10. Create directory and files from the M0
+TEST mkdir $M0/dir1
+TEST touch $M0/dir1/file{1..5}
+
+## 12. Access the files via readdirp, from M1
+TEST ls -l $M1/dir1/
+
+# Change the stat of one of the files from M0 and wait for it to
+# invalidate the md-cache of another mount M0
+echo "hello" > $M0/dir1/file2
+sleep 2;
+
+## 13. Expct non zero size when stat from M1
+EXPECT_NOT "0" stat -c %s $M1/dir1/file2
+
+cleanup;
diff --git a/tests/bugs/upcall/bug-1394131.t b/tests/bugs/upcall/bug-1394131.t
new file mode 100755
index 00000000000..b371ce4e682
--- /dev/null
+++ b/tests/bugs/upcall/bug-1394131.t
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+## 1. Start glusterd
+TEST glusterd;
+
+## 2. Lets create volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2,3};
+
+## 3. Enable the upcall xlator, and increase the md-cache timeout to max
+TEST $CLI volume set $V0 features.cache-invalidation on
+TEST $CLI volume set $V0 features.cache-invalidation-timeout 600
+TEST $CLI volume set $V0 indexing on
+
+## 6. Start the volume
+TEST $CLI volume start $V0
+
+## 7. Create two gluster mounts
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+## 8. Create directory and files from the M0
+TEST touch $M0/file1
+TEST mv $M0/file1 $M0/file2
+
+cleanup;
diff --git a/tests/bugs/upcall/bug-1422776.t b/tests/bugs/upcall/bug-1422776.t
new file mode 100755
index 00000000000..cb249ce1cd2
--- /dev/null
+++ b/tests/bugs/upcall/bug-1422776.t
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+## Start glusterd
+TEST glusterd;
+
+## Lets create volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2,3};
+
+## Enable the upcall xlator, and increase the md-cache timeout to max
+TEST $CLI volume set $V0 features.cache-invalidation on
+TEST $CLI volume set $V0 features.cache-invalidation-timeout 600
+TEST $CLI volume set $V0 indexing on
+
+## Start the volume
+TEST $CLI volume start $V0
+TEST $CLI volume quota $V0 enable
+
+## Create two gluster mounts
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+## Create directory and files from the M0
+TEST touch $M0/file1
+TEST mv $M0/file1 $M0/file2
+
+cleanup;
diff --git a/tests/bugs/upcall/bug-1458127.t b/tests/bugs/upcall/bug-1458127.t
new file mode 100755
index 00000000000..e844f37f1d3
--- /dev/null
+++ b/tests/bugs/upcall/bug-1458127.t
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0}
+EXPECT 'Created' volinfo_field $V0 'Status'
+
+TEST $CLI volume set $V0 performance.nl-cache on
+TEST $CLI volume set $V0 nl-cache-positive-entry on
+TEST $CLI volume set $V0 nl-cache-timeout 2
+TEST $CLI volume set $V0 features.cache-invalidation on
+TEST $CLI volume set $V0 features.cache-invalidation-timeout 2
+TEST $CLI volume set $V0 md-cache-timeout 20
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M1
+
+TEST mkdir $M0/dir
+TEST touch $M0/dir/xyz
+#Wait until upcall clears the fact that M0 had accessed dir
+sleep 4
+TEST mv $M0/dir/xyz $M0/dir/xyz1
+TEST ! ls $M0/dir/file1
+TEST touch $M1/dir/file1
+TEST ls $M0/dir/file1
+TEST ls $M0/dir/file1
+
+cleanup;
diff --git a/tests/bugs/upcall/bug-upcall-stat.t b/tests/bugs/upcall/bug-upcall-stat.t
new file mode 100755
index 00000000000..0ba944ec441
--- /dev/null
+++ b/tests/bugs/upcall/bug-upcall-stat.t
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+TEST glusterd;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1};
+
+TEST $CLI volume set $V0 features.cache-invalidation on
+TEST $CLI volume set $V0 features.cache-invalidation-timeout 600
+TEST $CLI volume set $V0 performance.cache-invalidation on
+TEST $CLI volume set $V0 performance.md-cache-timeout 600
+TEST $CLI volume set $V0 performance.cache-samba-metadata on
+
+#TEST $CLI volume stop $V0
+TEST $CLI volume start $V0
+
+## 5. Create two gluster mounts
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M1
+
+TEST $CLI volume profile $V0 start
+
+## 8. Create a file
+TEST touch $M0/file1
+
+TEST "setfattr -n user.DOSATTRIB -v "abc" $M0/file1"
+TEST "getfattr -n user.DOSATTRIB $M1/file1 | grep -q abc"
+TEST "setfattr -n user.DOSATTRIB -v "xyz" $M0/file1"
+sleep 2; #There can be a very very small window where the next getxattr
+ #reaches md-cache, before the cache-invalidation caused by previous
+ #setxattr, reaches md-cache. Hence sleeping for 2 sec.
+ #Also it should not be > 600.
+TEST "getfattr -n user.DOSATTRIB $M1/file1 | grep -q xyz"
+
+$CLI volume profile $V0 info | grep -q CI_XATTR
+EXPECT '0' echo $?
diff --git a/tests/bugs/write-behind/bug-1058663.c b/tests/bugs/write-behind/bug-1058663.c
new file mode 100644
index 00000000000..aedf97d7487
--- /dev/null
+++ b/tests/bugs/write-behind/bug-1058663.c
@@ -0,0 +1,123 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <signal.h>
+
+#define FILE_SIZE 1048576
+
+/* number of tests to run */
+#define RUN_LOOP 1000
+
+/* number of SIGBUS before exiting */
+#define MAX_SIGBUS 1
+static int expect_sigbus;
+static int sigbus_received;
+
+/* test for truncate()/seek()/write()/mmap()
+ * There should ne no SIGBUS triggered.
+ */
+void
+seek_write(char *filename)
+{
+ int fd;
+ uint8_t *map;
+ int i;
+
+ fd = open(filename, O_RDWR | O_CREAT | O_TRUNC, 0600);
+ lseek(fd, FILE_SIZE - 1, SEEK_SET);
+ write(fd, "\xff", 1);
+
+ map = mmap(NULL, FILE_SIZE, PROT_READ, MAP_PRIVATE, fd, 0);
+ for (i = 0; i < (FILE_SIZE - 1); i++) {
+ if (map[i] != 0) /* should never be true */
+ abort();
+ }
+ munmap(map, FILE_SIZE);
+
+ close(fd);
+}
+
+int
+read_after_eof(char *filename)
+{
+ int ret = 0;
+ int fd;
+ char *data;
+ uint8_t *map;
+
+ fd = open(filename, O_RDWR | O_CREAT | O_TRUNC, 0600);
+ lseek(fd, FILE_SIZE - 1, SEEK_SET);
+ write(fd, "\xff", 1);
+
+ /* trigger verify that reading after EOF fails */
+ ret = read(fd, data, FILE_SIZE / 2);
+ if (ret != 0)
+ return 1;
+
+ /* map an area of 1 byte after FILE_SIZE */
+ map = mmap(NULL, 1, PROT_READ, MAP_PRIVATE, fd, FILE_SIZE);
+ /* map[0] is an access after EOF, it should trigger SIGBUS */
+ if (map[0] != 0)
+ /* it is expected that we exit before we get here */
+ if (!sigbus_received)
+ return 1;
+ munmap(map, FILE_SIZE);
+
+ close(fd);
+
+ return ret;
+}
+
+/* signal handler for SIGBUS */
+void
+catch_sigbus(int signum)
+{
+ switch (signum) {
+#ifdef __NetBSD__
+ /* Depending on architecture, we can get SIGSEGV */
+ case SIGSEGV: /* FALLTHROUGH */
+#endif
+ case SIGBUS:
+ sigbus_received++;
+ if (!expect_sigbus)
+ exit(EXIT_FAILURE);
+ if (sigbus_received >= MAX_SIGBUS)
+ exit(EXIT_SUCCESS);
+ break;
+ default:
+ printf("Unexpected signal received: %d\n", signum);
+ }
+}
+
+int
+main(int argc, char **argv)
+{
+ int i = 0;
+
+ if (argc == 1) {
+ printf("Usage: %s <filename>\n", argv[0]);
+ return EXIT_FAILURE;
+ }
+
+#ifdef __NetBSD__
+ /* Depending on architecture, we can get SIGSEGV */
+ signal(SIGSEGV, catch_sigbus);
+#endif
+ signal(SIGBUS, catch_sigbus);
+
+ /* the next test should not trigger SIGBUS */
+ expect_sigbus = 0;
+ for (i = 0; i < RUN_LOOP; i++) {
+ seek_write(argv[1]);
+ }
+
+ /* the next test should trigger SIGBUS */
+ expect_sigbus = 1;
+ if (read_after_eof(argv[1]))
+ return EXIT_FAILURE;
+
+ return EXIT_SUCCESS;
+}
diff --git a/tests/bugs/write-behind/bug-1058663.t b/tests/bugs/write-behind/bug-1058663.t
new file mode 100644
index 00000000000..a900a6d7afa
--- /dev/null
+++ b/tests/bugs/write-behind/bug-1058663.t
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/$V0;
+TEST $CLI volume start $V0;
+
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+
+# compile the test program and run it
+TEST $CC $(dirname $0)/bug-1058663.c -o $(dirname $0)/bug-1058663;
+TEST $(dirname $0)/bug-1058663 $M0/bug-1058663.bin;
+TEST rm -f $(dirname $0)/M0/bug-1058663.bin;
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/bugs/write-behind/bug-1279730.c b/tests/bugs/write-behind/bug-1279730.c
new file mode 100644
index 00000000000..706ae67b102
--- /dev/null
+++ b/tests/bugs/write-behind/bug-1279730.c
@@ -0,0 +1,149 @@
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <assert.h>
+
+int
+main(int argc, char *argv[])
+{
+ int fd = -1, ret = -1, len = 0;
+ char *path = NULL,
+ buf[128] =
+ {
+ 0,
+ },
+ *cmd = NULL;
+ struct stat stbuf = {
+ 0,
+ };
+ int write_to_child[2] =
+ {
+ 0,
+ },
+ write_to_parent[2] = {
+ 0,
+ };
+
+ path = argv[1];
+ cmd = argv[2];
+
+ assert(argc == 3);
+
+ ret = pipe(write_to_child);
+ if (ret < 0) {
+ fprintf(stderr,
+ "creation of write-to-child pipe failed "
+ "(%s)\n",
+ strerror(errno));
+ goto out;
+ }
+
+ ret = pipe(write_to_parent);
+ if (ret < 0) {
+ fprintf(stderr,
+ "creation of write-to-parent pipe failed "
+ "(%s)\n",
+ strerror(errno));
+ goto out;
+ }
+
+ ret = fork();
+ switch (ret) {
+ case 0:
+ close(write_to_child[1]);
+ close(write_to_parent[0]);
+
+ /* child, wait for instructions to execute command */
+ ret = read(write_to_child[0], buf, 128);
+ if (ret < 0) {
+ fprintf(stderr, "child: read on pipe failed (%s)\n",
+ strerror(errno));
+ goto out;
+ }
+
+ system(cmd);
+
+ ret = write(write_to_parent[1], "1", 2);
+ if (ret < 0) {
+ fprintf(stderr, "child: write to pipe failed (%s)\n",
+ strerror(errno));
+ goto out;
+ }
+ break;
+
+ case -1:
+ fprintf(stderr, "fork failed (%s)\n", strerror(errno));
+ goto out;
+
+ default:
+ close(write_to_parent[1]);
+ close(write_to_child[0]);
+
+ fd = open(path, O_CREAT | O_RDWR | O_APPEND, S_IRWXU);
+ if (fd < 0) {
+ fprintf(stderr, "open failed (%s)\n", strerror(errno));
+ goto out;
+ }
+
+ len = strlen("test-content") + 1;
+ ret = write(fd, "test-content", len);
+
+ if (ret < len) {
+ fprintf(stderr, "write failed %d (%s)\n", ret, strerror(errno));
+ }
+
+ ret = pread(fd, buf, 128, 0);
+ if ((ret == len) && (strcmp(buf, "test-content") == 0)) {
+ fprintf(stderr,
+ "read should've failed as previous "
+ "write would've failed with EDQUOT, but its "
+ "successful");
+ ret = -1;
+ goto out;
+ }
+
+ ret = write(write_to_child[1], "1", 2);
+ if (ret < 0) {
+ fprintf(stderr, "parent: write to pipe failed (%s)\n",
+ strerror(errno));
+ goto out;
+ }
+
+ ret = read(write_to_parent[0], buf, 128);
+ if (ret < 0) {
+ fprintf(stderr, "parent: read from pipe failed (%s)\n",
+ strerror(errno));
+ goto out;
+ }
+
+ /* this will force a sync on cached-write and now that quota
+ limit is increased, sync will be successful. ignore return
+ value as fstat would fail with EDQUOT (picked up from
+ cached-write because of previous sync failure.
+ */
+ fstat(fd, &stbuf);
+
+ ret = pread(fd, buf, 128, 0);
+ if (ret != len) {
+ fprintf(stderr,
+ "post cmd read failed %d (data:%s) "
+ "(error:%s)\n",
+ ret, buf, strerror(errno));
+ goto out;
+ }
+
+ if (strcmp(buf, "test-content")) {
+ fprintf(stderr, "wrong data (%s)\n", buf);
+ goto out;
+ }
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
diff --git a/tests/bugs/write-behind/bug-1279730.t b/tests/bugs/write-behind/bug-1279730.t
new file mode 100755
index 00000000000..20447c349d5
--- /dev/null
+++ b/tests/bugs/write-behind/bug-1279730.t
@@ -0,0 +1,37 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fileio.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/$V0;
+TEST $CLI volume start $V0;
+TEST $CLI volume quota $V0 enable
+TEST $CLI volume quota $V0 limit-usage / 4
+TEST $CLI volume quota $V0 hard-timeout 0
+TEST $CLI volume quota $V0 soft-timeout 0
+
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+
+# compile the test program and run it
+TEST $CC -O0 -g3 $(dirname $0)/bug-1279730.c -o $(dirname $0)/bug-1279730
+
+TEST $(dirname $0)/bug-1279730 $M0/file "\"$CLI volume quota $V0 limit-usage / 1024\""
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+
+cleanup;
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=1279730
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=1279730
+
diff --git a/tests/bugs/write-behind/issue-884.c b/tests/bugs/write-behind/issue-884.c
new file mode 100644
index 00000000000..e9c33b351ad
--- /dev/null
+++ b/tests/bugs/write-behind/issue-884.c
@@ -0,0 +1,267 @@
+
+#define _GNU_SOURCE
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <time.h>
+#include <assert.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <pthread.h>
+
+#include <glusterfs/api/glfs.h>
+
+/* Based on a reproducer by Stefan Ring. It seems to be quite sensible to any
+ * timing modification, so the code has been maintained as is, only with minor
+ * changes. */
+
+struct glfs *glfs;
+
+pthread_mutex_t the_mutex = PTHREAD_MUTEX_INITIALIZER;
+pthread_cond_t the_cond = PTHREAD_COND_INITIALIZER;
+
+typedef struct _my_aiocb {
+ int64_t size;
+ volatile int64_t seq;
+ int which;
+} my_aiocb;
+
+typedef struct _worker_data {
+ my_aiocb cb;
+ struct iovec iov;
+ int64_t offset;
+} worker_data;
+
+typedef struct {
+ worker_data wdata[2];
+
+ volatile unsigned busy;
+} all_data_t;
+
+all_data_t all_data;
+
+static void
+completion_fnc(struct glfs_fd *fd, ssize_t ret, struct glfs_stat *pre,
+ struct glfs_stat *post, void *arg)
+{
+ void *the_thread;
+ my_aiocb *cb = (my_aiocb *)arg;
+ long seq = cb->seq;
+
+ assert(ret == cb->size);
+
+ pthread_mutex_lock(&the_mutex);
+ pthread_cond_broadcast(&the_cond);
+
+ all_data.busy &= ~(1 << cb->which);
+ cb->seq = -1;
+
+ the_thread = (void *)pthread_self();
+ printf("worker %d is done from thread %p, seq %ld!\n", cb->which,
+ the_thread, seq);
+
+ pthread_mutex_unlock(&the_mutex);
+}
+
+static void
+init_wdata(worker_data *data, int which)
+{
+ data->cb.which = which;
+ data->cb.seq = -1;
+
+ data->iov.iov_base = malloc(1024 * 1024);
+ memset(data->iov.iov_base, 6,
+ 1024 * 1024); /* tail part never overwritten */
+}
+
+static void
+init()
+{
+ all_data.busy = 0;
+
+ init_wdata(&all_data.wdata[0], 0);
+ init_wdata(&all_data.wdata[1], 1);
+}
+
+static void
+do_write(struct glfs_fd *fd, int content, int size, int64_t seq,
+ worker_data *wdata, const char *name)
+{
+ int ret;
+
+ wdata->cb.size = size;
+ wdata->cb.seq = seq;
+
+ if (content >= 0)
+ memset(wdata->iov.iov_base, content, size);
+ wdata->iov.iov_len = size;
+
+ pthread_mutex_lock(&the_mutex);
+ printf("(%d) dispatching write \"%s\", offset %lx, len %x, seq %ld\n",
+ wdata->cb.which, name, (long)wdata->offset, size, (long)seq);
+ pthread_mutex_unlock(&the_mutex);
+ ret = glfs_pwritev_async(fd, &wdata->iov, 1, wdata->offset, 0,
+ completion_fnc, &wdata->cb);
+ assert(ret >= 0);
+}
+
+#define IDLE 0 // both workers must be idle
+#define ANY 1 // use any worker, other one may be busy
+
+int
+get_worker(int waitfor, int64_t excl_seq)
+{
+ int which;
+
+ pthread_mutex_lock(&the_mutex);
+
+ while (waitfor == IDLE && (all_data.busy & 3) != 0 ||
+ waitfor == ANY &&
+ ((all_data.busy & 3) == 3 ||
+ excl_seq >= 0 && (all_data.wdata[0].cb.seq == excl_seq ||
+ all_data.wdata[1].cb.seq == excl_seq)))
+ pthread_cond_wait(&the_cond, &the_mutex);
+
+ if (!(all_data.busy & 1))
+ which = 0;
+ else
+ which = 1;
+
+ all_data.busy |= (1 << which);
+
+ pthread_mutex_unlock(&the_mutex);
+
+ return which;
+}
+
+static int
+doit(struct glfs_fd *fd)
+{
+ int ret;
+ int64_t seq = 0;
+ int64_t offset = 0; // position in file, in blocks
+ int64_t base = 0x1000; // where to place the data, in blocks
+
+ int async_mode = ANY;
+
+ init();
+
+ for (;;) {
+ int which;
+ worker_data *wdata;
+
+ // for growing to the first offset
+ for (;;) {
+ int gap = base + 0x42 - offset;
+ if (!gap)
+ break;
+ if (gap > 80)
+ gap = 80;
+
+ which = get_worker(IDLE, -1);
+ wdata = &all_data.wdata[which];
+
+ wdata->offset = offset << 9;
+ do_write(fd, 0, gap << 9, seq++, wdata, "gap-filling");
+
+ offset += gap;
+ }
+
+ // 8700
+ which = get_worker(IDLE, -1);
+ wdata = &all_data.wdata[which];
+
+ wdata->offset = (base + 0x42) << 9;
+ do_write(fd, 1, 62 << 9, seq++, wdata, "!8700");
+
+ // 8701
+ which = get_worker(IDLE, -1);
+ wdata = &all_data.wdata[which];
+
+ wdata->offset = (base + 0x42) << 9;
+ do_write(fd, 2, 55 << 9, seq++, wdata, "!8701");
+
+ // 8702
+ which = get_worker(async_mode, -1);
+ wdata = &all_data.wdata[which];
+
+ wdata->offset = (base + 0x79) << 9;
+ do_write(fd, 3, 54 << 9, seq++, wdata, "!8702");
+
+ // 8703
+ which = get_worker(async_mode, -1);
+ wdata = &all_data.wdata[which];
+
+ wdata->offset = (base + 0xaf) << 9;
+ do_write(fd, 4, 81 << 9, seq++, wdata, "!8703");
+
+ // 8704
+ // this writes both 5s and 6s
+ // the range of 5s is the one that overwrites 8703
+
+ which = get_worker(async_mode, seq - 1);
+ wdata = &all_data.wdata[which];
+
+ memset(wdata->iov.iov_base, 5, 81 << 9);
+ wdata->offset = (base + 0xaf) << 9;
+ do_write(fd, -1, 1623 << 9, seq++, wdata, "!8704");
+
+ offset = base + 0x706;
+ base += 0x1000;
+ if (base >= 0x100000)
+ break;
+ }
+
+ printf("done!\n");
+ fflush(stdout);
+
+ pthread_mutex_lock(&the_mutex);
+
+ while ((all_data.busy & 3) != 0)
+ pthread_cond_wait(&the_cond, &the_mutex);
+
+ pthread_mutex_unlock(&the_mutex);
+
+ ret = glfs_close(fd);
+ assert(ret >= 0);
+ /*
+ ret = glfs_fini(glfs);
+ assert(ret >= 0);
+ */
+ return 0;
+}
+
+int
+main(int argc, char *argv[])
+{
+ int ret;
+ int open_flags = O_RDWR | O_DIRECT | O_TRUNC;
+ struct glfs_fd *fd;
+
+ glfs = glfs_new(argv[1]);
+ if (!glfs) {
+ printf("glfs_new!\n");
+ goto out;
+ }
+ ret = glfs_set_volfile_server(glfs, "tcp", "localhost", 24007);
+ if (ret < 0) {
+ printf("set_volfile!\n");
+ goto out;
+ }
+ ret = glfs_init(glfs);
+ if (ret) {
+ printf("init!\n");
+ goto out;
+ }
+ fd = glfs_open(glfs, argv[2], open_flags);
+ if (!fd) {
+ printf("open!\n");
+ goto out;
+ }
+ srand(time(NULL));
+ return doit(fd);
+out:
+ return 1;
+}
diff --git a/tests/bugs/write-behind/issue-884.t b/tests/bugs/write-behind/issue-884.t
new file mode 100755
index 00000000000..2bcf7d15265
--- /dev/null
+++ b/tests/bugs/write-behind/issue-884.t
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# This test tries to detect a race condition in write-behind. It's based on a
+# reproducer written by Stefan Ring that is able to hit it sometimes. On my
+# system, it happened around 10% of the runs. This means that if this bug
+# appears again, this test will fail once every 10 runs. Most probably this
+# failure will be hidden by the automatic test retry of the testing framework.
+#
+# Please, if this test fails, it needs to be analyzed in detail.
+
+function run() {
+ "${@}" >/dev/null
+}
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0
+# This makes it easier to hit the issue
+TEST $CLI volume set $V0 client-log-level TRACE
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0
+
+build_tester $(dirname $0)/issue-884.c -lgfapi
+
+TEST touch $M0/testfile
+
+# This program generates a file of 535694336 bytes with a fixed pattern
+TEST run $(dirname $0)/issue-884 $V0 testfile
+
+# This is the md5sum of the expected pattern without corruption
+EXPECT "ad105f9349345a70fc697632cbb5eec8" echo "$(md5sum $B0/$V0/testfile | awk '{ print $1; }')"
+
+cleanup
diff --git a/tests/changelog.rc b/tests/changelog.rc
new file mode 100644
index 00000000000..ffad2e80632
--- /dev/null
+++ b/tests/changelog.rc
@@ -0,0 +1,9 @@
+
+function count_htime_files {
+ ls -l $B0/$V0"1"/.glusterfs/changelogs/htime/ | grep HTIME | wc -l
+}
+
+function count_changelog_files {
+ # Where $1 is the brick name passed
+ ls -l $1/.glusterfs/changelogs/ | grep CHANGELOG | wc -l
+}
diff --git a/tests/cleanup.sh b/tests/cleanup.sh
new file mode 100644
index 00000000000..29fd6929fac
--- /dev/null
+++ b/tests/cleanup.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+
+. $(dirname $0)/include.rc
+cleanup
diff --git a/tests/cluster.rc b/tests/cluster.rc
new file mode 100644
index 00000000000..34f5b02398f
--- /dev/null
+++ b/tests/cluster.rc
@@ -0,0 +1,218 @@
+#!/bin/bash
+
+CLUSTER_PFX="127.1.1"; # ".x" for each glusterd
+CLUSTER_COUNT=1; # Just initial definition
+
+function launch_cluster() {
+ local count=$1;
+
+ CLUSTER_COUNT=$count;
+
+ define_backends $count;
+ define_hosts $count;
+ define_glusterds $count $2;
+ define_clis $count $3;
+
+ start_glusterds;
+}
+
+
+function define_backends() {
+ local b;
+
+ for i in `seq 1 $count`; do
+ eval "B$i=$B0/$i";
+ done
+
+ for i in `seq 1 $count`; do
+ b="B$i";
+ mkdir -p ${!b}/glusterd;
+ mkdir -p ${!b}/run;
+ done
+}
+
+
+function define_glusterds() {
+ local count=$1;
+ local h;
+ local b;
+ local wopt;
+ local bopt;
+ local popt;
+
+ for i in `seq 1 $count`; do
+ b="B$i";
+ h="H$i";
+ wopt="management.working-directory=${!b}/glusterd";
+ ropt="management.run-directory=${!b}/run/gluster";
+ bopt="management.transport.socket.bind-address=${!h}";
+ popt="--pid-file=${!b}/glusterd.pid";
+ sopt="management.glusterd-sockfile=${!b}/glusterd/gd.sock"
+ #Get the logdir
+ logdir=`gluster --print-logdir`
+ clopt="management.cluster-test-mode=${logdir}/$i";
+ #Fetch the testcases name and prefix the glusterd log with it
+ logfile=`echo ${0##*/}`_glusterd$i.log
+ lopt="--log-file=$logdir/$i/$logfile"
+ if [ "$2" == "-LDEBUG" ]; then
+ eval "glusterd_$i='glusterd -LDEBUG --xlator-option $wopt --xlator-option $bopt --xlator-option $ropt --xlator-option $sopt --xlator-option $clopt $lopt $popt'";
+ eval "glusterd$i='glusterd -LDEBUG --xlator-option $wopt --xlator-option $bopt --xlator-option $ropt --xlator-option $sopt --xlator-option $clopt $lopt $popt'";
+ else
+ eval "glusterd_$i='glusterd --xlator-option $wopt --xlator-option $bopt --xlator-option $ropt --xlator-option $sopt --xlator-option $clopt $lopt $popt'";
+ eval "glusterd$i='glusterd --xlator-option $wopt --xlator-option $bopt --xlator-option $ropt --xlator-option $sopt --xlator-option $clopt $lopt $popt'";
+ fi
+ done
+}
+
+function start_glusterd() {
+ local g
+ local index=$1
+
+ g="glusterd_${index}"
+ ${!g}
+}
+
+function start_glusterds() {
+ for i in `seq 1 $CLUSTER_COUNT`; do
+ start_glusterd $i
+ done
+}
+
+
+function kill_glusterd() {
+ local index=$1;
+ local b;
+ local pidfile;
+
+ b="B$index";
+ pidfile="${!b}/glusterd.pid";
+
+ kill `cat $pidfile`;
+}
+
+function restart_glusterd() {
+ local index=$1
+ local b
+ local pidfile
+ local g
+
+ b="B$index"
+ pidfile="${!b}/glusterd.pid"
+
+ kill `cat $pidfile`
+
+ g="glusterd_${index}"
+ ${!g}
+}
+
+function kill_node() {
+ local index=$1;
+ local h;
+
+ h="H$index";
+
+ terminate_pids $(ps -ef | grep gluster | grep ${!h} | awk '{print $2}')
+ find $B0/$index/glusterd/vols -name '*.pid' | xargs rm -f
+}
+
+
+function define_hosts() {
+ local count=$1;
+
+ for i in `seq 1 $count`; do
+ eval "H_$i=${CLUSTER_PFX}.$i"
+ eval "H$i=${CLUSTER_PFX}.$i";
+ case $OSTYPE in
+ NetBSD)
+ ifconfig lo0 alias ${CLUSTER_PFX}.$i 2>/dev/null
+ ;;
+ *)
+ ;;
+ esac
+ done
+}
+
+
+function define_clis() {
+ local count=$1;
+ local h;
+
+ for i in `seq 1 $count`; do
+ b="B$i";
+ #get the logdir
+ logdir=`gluster --print-logdir`
+ #Fetch the testcases name and prefix the cli log with it
+ logfile=`echo ${0##*/}`_cli$i.log
+ lopt="--log-file=$logdir/$logfile"
+ logfile1=`echo ${0##*/}`_cli_$i.log
+ lopt1="--log-file=$logdir/$logfile1"
+
+
+ if [ "$2" == "-NO_FORCE" ]; then
+ eval "CLI_$i='$CLI_NO_FORCE --glusterd-sock=${!b}/glusterd/gd.sock $lopt'";
+ eval "CLI$i='$CLI_NO_FORCE --glusterd-sock=${!b}/glusterd/gd.sock $lopt1'";
+ else
+ eval "CLI_$i='$CLI --glusterd-sock=${!b}/glusterd/gd.sock $lopt'";
+ eval "CLI$i='$CLI --glusterd-sock=${!b}/glusterd/gd.sock $lopt1'";
+ fi
+ done
+}
+
+function peer_count() {
+ $CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
+}
+
+function attempt_replace_brick {
+ local cli_no=$1
+ local vol=$2;
+ local src_brick=$3;
+ local dst_brick=$4;
+
+ eval \$CLI_$cli_no volume replace-brick $vol $src_brick $dst_brick commit force;
+ echo $?
+}
+
+function cluster_rebalance_status_field {
+ #The rebalance status can be up to 3 words, (e.g.:'fix-layout in progress'), hence the awk-print $7 thru $9.
+ #But if the status is less than 3 words, it also prints the next field i.e the run_time_in_secs.(e.g.:'completed 3.00').
+ #So we trim the numbers out with `tr`. Finally remove the trailing white spaces with sed. What we get is one of the
+ #strings in the 'cli_vol_task_status_str' char array of cli-rpc-ops.c
+
+ eval \$CLI_$1 volume rebalance $2 status | awk '{print $7,$8,$9}' |sed -n 3p |tr -d '[^0-9+\.]'|sed 's/ *$//g'
+}
+
+function cluster_volinfo_field()
+{
+ local vol=$2;
+ local field=$3;
+ eval \$CLI_$1 volume info $vol | grep "^$field: " | sed 's/.*: //';
+}
+
+function volinfo_field_1()
+{
+ local vol=$1;
+ local field=$2;
+
+ $CLI_1 volume info $vol | grep "^$field: " | sed 's/.*: //';
+}
+
+function volinfo_field_2()
+{
+ local vol=$1;
+ local field=$2;
+
+ $CLI_2 volume info $vol | grep "^$field: " | sed 's/.*: //';
+}
+
+function cluster_brick_up_status {
+ local vol=$2
+ local host=$3
+ local brick=$4
+ eval \$CLI_$1 volume status $vol $host:$brick --xml | sed -ne 's/.*<status>\([01]\)<\/status>/\1/p'
+}
+
+function cluster_remove_brick_status_completed_field {
+ local vol=$1
+ local brick_list=$2
+ $CLI_1 volume remove-brick $vol $brick_list status | awk '{print $7}' | sed -n 3p
+}
diff --git a/tests/common-utils.rc b/tests/common-utils.rc
new file mode 100644
index 00000000000..2be4076e8b6
--- /dev/null
+++ b/tests/common-utils.rc
@@ -0,0 +1,7 @@
+
+function run_cmd_as_user {
+ local user=$1
+ shift
+ su -m $user -c "$*" || return 1
+ return 0
+}
diff --git a/tests/configfiles/bad_exports b/tests/configfiles/bad_exports
new file mode 100644
index 00000000000..6fd18d9415a
--- /dev/null
+++ b/tests/configfiles/bad_exports
@@ -0,0 +1,9 @@
+#$Id$
+#/0838586658093758013308385866580937580133083858665809375801330838586658093758013308385866580937580133083858665809375801330838586658093758013308385866580937580133 @test(sec=sys,rw,anonuid=0) 10.35.11.32(sec=sys,rw,anonuid=0)
+
+/test @test(sec=sys,rw,anonuid=0) shreyas.facebook.com(sec=sys,rw,anonuid=0) shreyas.s(sec=sys,rw,anonuid=595)
+çççßåß僃
+/asdf @ObVyg571RJaorkGbgVerI9esrck8yiVD7NVqqJvj2H9DuPH3SzHnYLIXjd4zZjuuh2N0O0bYYQf7VYNrYHoxc1llgRU1iEsQRy2XaWnUlhTHKVEL8tt1TrbZCi8qXyg0l058rTnW4msvU5hW83ESwyOE4bBSz4VsW0sJaVd8Gv4waZqojemLN8AIlAoChVOfP1yhuAP1298ejkaf2fjhdfa4t4effhgadff342fdddgasdg42gahgdmnui24290hfjdjadgdkjhg2nvncms(sec=sys,rw,anonuid=1)
+#/vol/root -sec=sys,rw=@storage.prn1:@storage.ash4:@storage.frc1,anon=0
+#/vol/home107 -sec=sys,rw,nosuid,root=@storage.prn1:@storage.ash4:@storage.frc1:@hr.ash3:@hr.prn1:ldap001.prn1.facebook.com:ldap001.frc1.facebook.com
+#/vol/home109 -sec=sys,rw,nosuid,root=@storage.prn1:@storage.ash4:@storage.frc1:@hr.ash3:@hr.prn1:ldap001.prn1.facebook.com:ldap001.frc1.facebook.com
diff --git a/tests/configfiles/bad_netgroups b/tests/configfiles/bad_netgroups
new file mode 100644
index 00000000000..ea27edfef10
--- /dev/null
+++ b/tests/configfiles/bad_netgroups
@@ -0,0 +1,5 @@
+asdf ng1
+ng1 ng2
+ng2 (dev1763.prn2.facebook.com, ,)
+
+emptyng
diff --git a/tests/configfiles/big_exports b/tests/configfiles/big_exports
new file mode 100644
index 00000000000..9ca5d655664
--- /dev/null
+++ b/tests/configfiles/big_exports
@@ -0,0 +1,10 @@
+/75213U8JV58PBY7F0VFGJ080MH3K71 @ZXV3UE7WJSCZSPMPAYUBACCZUOD0XY(sec=sys,rw,anonuid=0) 9PAC2KCTKRIH62CPGAMAUAJGLVQNS3(sec=sys,rw,anonuid=0)
+/O4DYT8D6QVS9EKEHTYOPTYL6IWU4DN @KLBH3LB3UN5LWDWPPQEQWEHYVL3K0A(sec=sys,rw,anonuid=0) B37PXMCQMY5IQPDGV08XC7ITYT650V(sec=sys,rw,anonuid=0)
+/OFHJLTKZMDAN28Q9IQQQIPFUEZ2YAN @UY3K3B8C05OQ4OTX42VXQKJ2CGJ8QX(sec=sys,rw,anonuid=0) AM0ET70HT6YND7D8RKG446LEOW40EC(sec=sys,rw,anonuid=0)
+/3VDZ2JHFQ2JGF2GQGYQH38UPAW6A6T @DEPUVDYZOJFCSQ7KD07NVPAFGEG7YJ(sec=sys,rw,anonuid=0) 5HI538NCEYF7KY7HC1F69UBWFVTIGA(sec=sys,rw,anonuid=0)
+/4ZI3ZRJUNQM21ZM8VB891X4ZCUHK7E @7U8TNSZ55AWJAOPAIV67OGPWLGM4JV(sec=sys,rw,anonuid=0) 8698JR9V4KKENE7UGYHV3T4XG9K0NH(sec=sys,rw,anonuid=0)
+/A4CSZ2FQ3VYPT9R0HYN3QVQ7TK9IHI @G2Z45H649YZ9WNC3OSU7STCLT3VWT9(sec=sys,rw,anonuid=0) 65CA94Z7JXZ0F0JB5EP95I6FBJT673(sec=sys,rw,anonuid=0)
+/G91PI0EX5TUYSX91IAH49M1GEMNKSP @O5IFIYJUENNNK16U0FK0QCDE0DK9G2(sec=sys,rw,anonuid=0) A8AZTTWC7BMTV8YW8XE4R57WUOSUMZ(sec=sys,rw,anonuid=0)
+/YCZFA0ALYC284R60E7QXQN7AVSILFO @7OGJV2J1NOII7UOGN12SUNRW3XBWWG(sec=sys,rw,anonuid=0) HDCDTY43SXOAH1TNUKB23MO9DE574W(sec=sys,rw,anonuid=0)
+/VBGB57O8R87B9N4E8QPGU6D55DVZE5 @F95KY58VAUOUX30QKIN16U987UU9BE(sec=sys,rw,anonuid=0) WGSH35L15FT2IC0IT9PTCU8SCYW9W4(sec=sys,rw,anonuid=0)
+/NTHST2FDSP35BKEEIOQIQX38722AN0 @T9BXSDXF2N5HVOM8P1BN0Q5IQ6RC34(sec=sys,rw,anonuid=0) OLJR1KXJRY14UEZNV1LP7RV68KPIW7(sec=sys,rw,anonuid=0)
diff --git a/tests/configfiles/exports b/tests/configfiles/exports
new file mode 100644
index 00000000000..82ba450403e
--- /dev/null
+++ b/tests/configfiles/exports
@@ -0,0 +1 @@
+/test @test(sec=sys,rw,anonuid=0) 10.35.11.31(sec=sys,rw,anonuid=0)
diff --git a/tests/configfiles/exports-v6 b/tests/configfiles/exports-v6
new file mode 100644
index 00000000000..426b1ef5705
--- /dev/null
+++ b/tests/configfiles/exports-v6
@@ -0,0 +1 @@
+/test @test(rw,anonuid=0,sec=sys,) 2401:db00:11:1:face:0:3d:0(rw,anonuid=0,sec=sys,)
diff --git a/tests/configfiles/exports_bad_opt b/tests/configfiles/exports_bad_opt
new file mode 100644
index 00000000000..70a56890e4c
--- /dev/null
+++ b/tests/configfiles/exports_bad_opt
@@ -0,0 +1 @@
+/groot asdf(r) @ngtop(r)
diff --git a/tests/configfiles/netgroups b/tests/configfiles/netgroups
new file mode 100644
index 00000000000..f1f5fcdc145
--- /dev/null
+++ b/tests/configfiles/netgroups
@@ -0,0 +1,4 @@
+asdf ng1
+ng1 ng2
+ng2 (dev1763.prn2.example.com,,)
+ng3 (dev-1763.prn-2.example.com,,)
diff --git a/tests/dht.rc b/tests/dht.rc
new file mode 100644
index 00000000000..6918ebde04b
--- /dev/null
+++ b/tests/dht.rc
@@ -0,0 +1,174 @@
+#!/bin/bash
+
+dhthashdebugxattr="dht.file.hashed-subvol."
+
+function get_layout()
+{
+ getfattr -n trusted.glusterfs.dht -e hex $1 2>&1 | grep dht | cut -d = -f2
+}
+
+## populates $BRICK1 and $BRICK2 with hashed/cached subvolume. These will be
+## used by get_cached_brick and get_hashed_brick
+
+function file_has_linkfile()
+{
+ k=0
+ l=0
+ while [ $k -lt $BRICK_COUNT ]
+ do
+ stat=$(stat $B0/${V0}$k/$1 2>/dev/null)
+ if [ $? -eq 0 ]
+ then
+ let l++
+ let "BRICK${l}=$k"
+
+ fi
+ let k++
+ done
+ return $l
+}
+
+function get_cached_brick()
+{
+ i=1
+ brick=$BRICK1
+ while [ $i -lt 3 ]
+ do
+ test=$(getfattr -n trusted.glusterfs.dht.linkto -e text $B0/${V0}$brick/$1 2>&1)
+ if [ $? -eq 1 ]
+ then
+ cached=$brick
+ i=$(( $i+3 ))
+ fi
+ brick=$BRICK1
+ let i++
+ done
+
+ return $cached
+}
+
+function get_hashed_brick()
+{
+ j=1
+ brick=$BRICK1
+ while [ $j -lt 3 ]
+ do
+ test=$(getfattr -n trusted.glusterfs.dht.linkto -e text $B0/${V0}$brick/$1 2>&1)
+ if [ $? -eq 0 ]
+ then
+ hashed=$brick
+ j=$(( $j+3 ))
+ fi
+ brick=$BRICK2
+ let j++
+ done
+
+ return $hashed
+}
+
+
+function cluster_rebalance_completed()
+{
+ val=1
+
+ # Rebalance status will be either "failed" or "completed"
+
+ test=$($CLI_1 volume rebalance $V0 status | grep "in progress" 2>&1)
+ if [ $? -ne 0 ]
+ then
+ val=0
+ fi
+
+ echo $val
+ # Do not *return* the value here. If it's non-zero, that will cause
+ # EXPECT_WITHIN (e.g. in bug-884455.t) to return prematurely, leading to
+ # a spurious test failure. Nothing else checks the return value anyway
+ # (they all check the output) so there's no need for it to be non-zero
+ # just because grep didn't find what we want.
+}
+
+function rebalance_completed()
+{
+ val=1
+ test=$($CLI volume rebalance $V0 status | grep localhost | grep "completed" 2>&1)
+ if [ $? -eq 0 ]
+ then
+ val=0
+ fi
+
+ echo $val
+ # Do not *return* the value here. If it's non-zero, that will cause
+ # EXPECT_WITHIN (e.g. in bug-884455.t) to return prematurely, leading to
+ # a spurious test failure. Nothing else checks the return value anyway
+ # (they all check the output) so there's no need for it to be non-zero
+ # just because grep didn't find what we want.
+}
+
+function remove_brick_completed()
+{
+ val=1
+ test=$(gluster volume remove-brick $V0 $H0:$B0/${V0}2 status | grep localhost | grep "completed" 2>&1)
+ if [ $? -eq 0 ]
+ then
+ val=0
+ fi
+
+ echo $val
+}
+
+function dht_get_linkto_target()
+{
+ local path=$1;
+ echo $(getfattr -e text --only-values --absolute-names -n trusted.glusterfs.dht.linkto $path)
+}
+
+function is_dht_linkfile()
+{
+ local path=$1
+ retval=0
+ local output=$(stat -c %a $path)
+ if [ $output -eq 1000 ]; then
+ retval=1
+ fi
+
+ echo $retval
+ return $retval
+}
+
+
+# Given an existing directory on the volume, get the hashed subvol for a file
+# in that directory
+# Input: filename dirpath_on_mount
+
+function dht_get_hash_subvol()
+{
+ local hashed_subvol
+ hashed_subvol=$(getfattr --only-values -n "$dhthashdebugxattr$1" $2 2>/dev/null)
+ echo $hashed_subvol
+}
+
+
+# Find the first filename that hashes to the same subvol
+# as $1
+# Input: subvol_name dirpath_on_mount file_pattern
+
+function dht_first_filename_with_hashsubvol()
+{
+ local in_subvol=$1
+ local in_path=$2
+ local in_hash_subvol
+ local file_pattern=$3
+ local filename
+
+ for i in {1..50}
+ do
+ filename="$file_pattern$i"
+ in_hash_subvol=$(dht_get_hash_subvol "$filename" "$in_path")
+ # echo $in_hash_subvol
+ if [ "$in_subvol" == "$in_hash_subvol" ]; then
+ fn_return_val=$filename
+ return 0
+ fi
+ done
+ return 1
+}
diff --git a/tests/ec.rc b/tests/ec.rc
new file mode 100644
index 00000000000..f18752fc99a
--- /dev/null
+++ b/tests/ec.rc
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+function ec_up_status()
+{
+ local v=$1
+ local m=$2
+ local ec_id=$3
+ grep -E "^up =" $m/.meta/graphs/active/${v}-disperse-${ec_id}/private | cut -f2 -d'='
+}
+
+function ec_option_value()
+{
+ local v=$1
+ local m=$2
+ local ec_id=$3
+ local opt=$4
+ grep -E "^$opt =" $m/.meta/graphs/active/${v}-disperse-${ec_id}/private | cut -f2 -d'='| awk '{print $1}'
+}
diff --git a/tests/env.rc.in b/tests/env.rc.in
new file mode 100644
index 00000000000..0478d66aec6
--- /dev/null
+++ b/tests/env.rc.in
@@ -0,0 +1,42 @@
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+
+PATH=@bindir@:@sbindir@:$PATH
+export PATH
+
+GLUSTERD_PIDFILEDIR=@localstatedir@/run/gluster
+export GLUSTERD_PIDFILEDIR
+
+LD_LIBRARY_PATH=@libdir@:$LD_LIBRARY_PATH
+export LD_LIBRARY_PATH
+
+LIBRARY_PATH=@libdir@:$LIBRARY_PATH
+export LIBRARY_PATH
+
+CPATH=@includedir@:$CPATH
+export CPATH
+
+GLUSTERD_WORKDIR=@GLUSTERD_WORKDIR@
+export GLUSTERD_WORKDIR
+
+PKG_CONFIG_PATH=@pkgconfigdir@:$PKG_CONFIG_PATH
+export PKG_CONFIG_PATH
+
+PYTHONPATH=@BUILD_PYTHON_SITE_PACKAGES@:$PYTHON_PATH
+export PYTHONPATH
+
+PYTHON=@PYTHON@
+export PYTHON
+
+PYTHONPATH=@BUILD_PYTHON_SITE_PACKAGES@:$PYTHON_PATH
+export PYTHONPATH
+
+GLUSTER_CMD_DIR=@sbindir@
+export GLUSTER_CMD_DIR
+
+GLUSTER_LIBEXECDIR=@GLUSTERFS_LIBEXECDIR@
+export GLUSTER_LIBEXECDIR
+
+RUN_NFS_TESTS=@BUILD_GNFS@
+export RUN_NFS_TESTS
diff --git a/tests/experimental/.gitignore b/tests/experimental/.gitignore
new file mode 100644
index 00000000000..803908720af
--- /dev/null
+++ b/tests/experimental/.gitignore
@@ -0,0 +1,7 @@
+# Initial commit, git needs a file to add a directory
+# First tests to appear here, should remove this file
+# Directives:
+# Ignore everything in this directory
+*
+# Except this file
+!.gitignore
diff --git a/tests/fallocate.rc b/tests/fallocate.rc
new file mode 100644
index 00000000000..d117ba9c051
--- /dev/null
+++ b/tests/fallocate.rc
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+# Helper to verify a given fallocate command is supported and skip a test
+# otherwise. Older versions of the fallocate utility might not support all modes
+# (i.e., discard) and older versions of fuse might not support the associated
+# fallocate requests.
+
+function require_fallocate()
+{
+ output=`fallocate $* 2>&1`
+ ret=$?
+ if [ ! $ret -eq 0 ] && ([[ $output == *unsupported* ]] ||
+ [[ $output == *invalid* ]] ||
+ [[ $output == *"not supported"* ]])
+ then
+ SKIP_TESTS
+ exit
+ fi
+}
diff --git a/tests/fdl.rc b/tests/fdl.rc
new file mode 100644
index 00000000000..df58305b923
--- /dev/null
+++ b/tests/fdl.rc
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+log_base=$($CLI --print-logdir)
+log_id=${B0}/${V0}-0
+log_id=${log_id:1} # Remove initial slash
+log_id=${log_id//\//-} # Replace remaining slashes with dashes
+FDL_META_FILE=${log_base}/${log_id}-meta-1.jnl
+FDL_DATA_FILE=${log_base}/${log_id}-data-1.jnl
+
+check_logfile() {
+ [ $(gf_logdump $FDL_META_FILE $FDL_DATA_FILE | grep $1 | wc -l) -ge $2 ]
+}
diff --git a/tests/features/delay-gen.t b/tests/features/delay-gen.t
new file mode 100755
index 00000000000..72e6dbb7697
--- /dev/null
+++ b/tests/features/delay-gen.t
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}1
+
+EXPECT "$V0" volinfo_field $V0 'Volume Name'
+EXPECT 'Created' volinfo_field $V0 'Status'
+
+TEST $CLI volume set $V0 delay-gen posix
+TEST $CLI volume set $V0 delay-gen.delay-duration 1000000
+TEST $CLI volume set $V0 delay-gen.delay-percentage 100
+TEST $CLI volume set $V0 delay-gen.enable read,write
+
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+TEST $CLI volume profile $V0 start
+## Mount FUSE with caching disabled (read-write)
+TEST $GFS -s $H0 --volfile-id $V0 $M0
+
+TEST dd if=/dev/zero of=$M0/1 count=1 bs=128k oflag=sync
+
+#Write should take at least a second
+write_max_latency=$($CLI volume profile $V0 info | grep WRITE | awk 'BEGIN {max = 0} {if ($6 > max) max=$6;} END {print max}' | cut -d. -f 1 | egrep "[0-9]{7,}")
+
+#Create should not take a second
+create_max_latency=$($CLI volume profile $V0 info | grep CREATE | awk 'BEGIN {max = 0} {if ($6 > max) max=$6;} END {print max}' | cut -d. -f 1 | egrep "[0-9]{7,}")
+
+TEST [ ! -z $write_max_latency ];
+TEST [ -z $create_max_latency ];
+
+# Not providing a particular fop will make it test everything
+TEST $CLI volume reset $V0 delay-gen.enable
+TEST $CLI volume set $V0 delay-gen.delay-duration 100
+
+cp $(dirname ${0})/../basic/gfapi/glfsxmp-coverage.c glfsxmp.c
+build_tester ./glfsxmp.c -lgfapi
+./glfsxmp $V0 $H0 >/dev/null
+cleanup_tester ./glfsxmp
+rm ./glfsxmp.c
+
+$(dirname $0)/../basic/rpc-coverage.sh $M0 >/dev/null
+
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=1501397
diff --git a/tests/features/dh1024.pem b/tests/features/dh1024.pem
new file mode 100644
index 00000000000..fe514bd4ee5
--- /dev/null
+++ b/tests/features/dh1024.pem
@@ -0,0 +1,5 @@
+-----BEGIN DH PARAMETERS-----
+MIGHAoGBAL2k+efZ6g50PpL41G96IaRw2OTH921yhHMNSXBE/K+R6oTkJFcNJs1N
+q+a1Ko2xCBDa5MgvudqWep6PvE06rzEaJPW8ITdu8j3Eo9T1rorJ3CctpE/CaRl2
+7v4DNe+Mho6q1MPlG5PfXEZWgbT7tjn/Y6lwD/B2CoMzAx+4DXgbAgEC
+-----END DH PARAMETERS-----
diff --git a/tests/features/fdl-overflow.t b/tests/features/fdl-overflow.t
new file mode 100644
index 00000000000..34b941d2f2a
--- /dev/null
+++ b/tests/features/fdl-overflow.t
@@ -0,0 +1,72 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../fdl.rc
+
+_check_sizes () {
+ local n=0
+ local sz
+ local total_sz=0
+
+ # We don't care about the sizes of the meta files. That would be
+ # embedding too much of the implementation into the test.
+ n=$(ls ${log_base}/${log_id}-meta-*.jnl | wc -l)
+ [ $n = 2 ] || return 1
+
+ # We *do* care about the sizes of the data files, which should exactly
+ # reflect the amount of data written via dd.
+ n=0
+ while read sz name; do
+ G_LOG "found journal ${name} size ${sz}MB"
+ n=$((n+1))
+ total_sz=$((total_sz+sz))
+ done < <(du -sm ${log_base}/${log_id}-data-*.jnl)
+ [ $n = 2 ] || return 1
+ # On our CentOS and NetBSD regression-test systems, but not on my Fedora
+ # development system, each file ends up being slightly larger than its
+ # data size because of metadata, and 'du' rounds that up to a full extra
+ # megabyte. We'll allow either result, because what we're really
+ # looking for is a complete failure to roll over from one file to
+ # another at the appropriate size.
+ [ $total_sz = 20 -o $total_sz = $((n+20)) ] || return 1
+
+ return 0
+}
+
+check_sizes () {
+ set -x
+ _check_sizes
+ ret=$?
+ set +x
+ return ret
+}
+
+if [ x"$OSTYPE" = x"NetBSD" ]; then
+ CREAT_OFLAG="creat,"
+else
+ CREAT_OFLAG=""
+fi
+
+TEST rm -f ${log_base}/${log_id}-*.log
+TEST glusterd
+TEST pidof glusterd
+
+# Get a simple volume set up and mounted with FDL active.
+TEST $CLI volume create $V0 ${H0}:${B0}/${V0}-0
+TEST $CLI volume set $V0 changelog.changelog off
+TEST $CLI volume set $V0 features.fdl on
+TEST $CLI volume start $V0
+TEST $GFS -s $H0 --volfile-id $V0 $M0
+
+# Generate some I/O and unmount/stop so we can see log sizes.
+TEST dd if=/dev/zero of=$M0/twentyMB bs=1048576 count=20 \
+ oflag=${CREAT_OFLAG}sync
+TEST umount $M0
+TEST $CLI volume stop $V0
+
+TEST _check_sizes
+
+cleanup
+#G_TESTDEF_TEST_STATUS_CENTOS6=KNOWN_ISSUE,BUG=1385758
+#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=1385758
diff --git a/tests/features/fdl.t b/tests/features/fdl.t
new file mode 100644
index 00000000000..5a3c13fc850
--- /dev/null
+++ b/tests/features/fdl.t
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../fdl.rc
+
+if [ x"$OSTYPE" = x"NetBSD" ]; then
+ CREAT_OFLAG="creat,"
+else
+ CREAT_OFLAG=""
+fi
+
+TEST rm -f $FDL_META_FILE $FDL_DATA_FILE
+TEST glusterd
+TEST pidof glusterd
+
+# Get a simple volume set up and mounted with FDL active.
+TEST $CLI volume create $V0 ${H0}:${B0}/${V0}-0
+TEST $CLI volume set $V0 changelog.changelog off
+TEST $CLI volume set $V0 features.fdl on
+TEST $CLI volume start $V0
+TEST $GFS -s $H0 --volfile-id $V0 $M0
+
+# Generate some I/O and unmount.
+TEST mkdir -p $M0/abc/def
+TEST dd if=/dev/zero of=$M0/abc/def/ghi bs=128 count=2 \
+ oflag=${CREAT_OFLAG}sync
+TEST chmod 314 $M0/abc/def/ghi
+TEST rm -rf $M0/abc
+TEST umount $M0
+
+# Check that gf_logdump works, and shows the ops we just issued. There will be
+# more SETATTR ops than the one corresponding to our chmod, because some are
+# issued internally. We have to guess a bit about where the log will be.
+TEST check_logfile GF_FOP_MKDIR 2
+TEST check_logfile GF_FOP_CREATE 1
+TEST check_logfile GF_FOP_WRITE 2
+TEST check_logfile GF_FOP_SETATTR 1
+TEST check_logfile GF_FOP_UNLINK 1
+TEST check_logfile GF_FOP_RMDIR 2
+
+cleanup
+#G_TESTDEF_TEST_STATUS_CENTOS6=KNOWN_ISSUE,BUG=1385758
+#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=1385758
diff --git a/tests/features/flock_interrupt.t b/tests/features/flock_interrupt.t
new file mode 100644
index 00000000000..b8717e30dfb
--- /dev/null
+++ b/tests/features/flock_interrupt.t
@@ -0,0 +1,32 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}0;
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+TEST touch $M0/testfile;
+
+echo > got_lock
+flock $M0/testfile sleep 6 & { sleep 0.3; flock -w 2 $M0/testfile true; echo ok > got_lock; } &
+
+EXPECT_WITHIN 4 ok cat got_lock;
+
+## Finish up
+rm -f got_lock;
+cleanup;
diff --git a/tests/features/fuse-lru-limit.t b/tests/features/fuse-lru-limit.t
new file mode 100644
index 00000000000..dd6be2d5397
--- /dev/null
+++ b/tests/features/fuse-lru-limit.t
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "2" online_brick_count
+
+EXPECT "1" get_mount_active_size_value $V0 $M0
+EXPECT "0" get_mount_lru_size_value $V0 $M0
+
+mkdir ${M0}/dir-{1..9}
+for i in {1..9}; do
+ for j in {1..1000}; do
+ echo "Test file" > ${M0}/dir-$i/file-$j;
+ done;
+done
+lc=$(get_mount_lru_size_value $V0 ${M0})
+# ideally it should be 9000+
+TEST [ $lc -ge 9000 ]
+
+TEST umount $M0
+
+TEST glusterfs -s $H0 --volfile-id $V0 --lru-limit 1000 $M0
+
+TEST find $M0
+lc=$(get_mount_lru_size_value $V0 ${M0})
+# ideally it should be <1000
+# Not sure if there are any possibilities of buffer need.
+TEST [ $lc -le 1000 ]
+
+TEST rm -rf $M0/*
+
+EXPECT "1" get_mount_active_size_value $V0 $M0
+EXPECT "0" get_mount_lru_size_value $V0 $M0
+
+cleanup
diff --git a/tests/features/glfs-lease-recall.c b/tests/features/glfs-lease-recall.c
new file mode 100644
index 00000000000..9a60f9beec1
--- /dev/null
+++ b/tests/features/glfs-lease-recall.c
@@ -0,0 +1,372 @@
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+
+/* Few rules:
+ * 1. A client may have multiple lease keys, but a lease key cannot be shared by
+ * multiple clients.
+ * 2. Lease key can be set before open, or in glfs_lease request. A lease key
+ * set like this is valid for the lifetime of the fd, i.e. a fd cannot have
+ * multiple lease key. But a lease key can be shared across multiple fds.
+ */
+glfs_t *client1 = NULL, *client2 = NULL;
+glfs_fd_t *fd1 = NULL;
+FILE *log_file = NULL;
+char lid1[GLFS_LEASE_ID_SIZE] = "lid1-clnt1",
+ lid2[GLFS_LEASE_ID_SIZE] = "lid2-clnt2";
+char lid3[GLFS_LEASE_ID_SIZE] = "lid3-clnt2", lid4[GLFS_LEASE_ID_SIZE] = {
+ 0,
+};
+char *volname = NULL, *glfs_log_file = NULL;
+int upcall_recv = 0;
+
+#define MAX_CLIENTS 4
+#define MAX_FDS 4
+#define TEST_FILE "/test/lease"
+#define SHUD_PASS 0
+#define SHUD_FAIL -1
+#define NONE 0
+
+static void
+recall_cbk(struct glfs_lease lease, void *data);
+
+static int
+set_read_lease(glfs_fd_t *fd, char ld[])
+{
+ struct glfs_lease lease = {
+ 0,
+ };
+ int ret = 0;
+
+ memset(&lease, 0, sizeof(lease));
+ lease.cmd = GLFS_SET_LEASE;
+ lease.lease_type = GLFS_RD_LEASE;
+ memcpy(&lease.lease_id, ld, GLFS_LEASE_ID_SIZE);
+ ret = glfs_lease(fd, &lease, &recall_cbk, fd);
+ if (ret < 0) {
+ fprintf(log_file, "\n RD_LEASE failed with ret: %d (%s)", ret,
+ strerror(errno));
+ return -1;
+ }
+ fprintf(log_file, "\n Took RD_LEASE");
+ return ret;
+}
+
+static int
+set_write_lease(glfs_fd_t *fd, char ld[])
+{
+ struct glfs_lease lease = {
+ 0,
+ };
+ int ret = 0;
+
+ memset(&lease, 0, sizeof(lease));
+ lease.cmd = GLFS_SET_LEASE;
+ lease.lease_type = GLFS_RW_LEASE;
+ memcpy(&lease.lease_id, ld, GLFS_LEASE_ID_SIZE);
+ ret = glfs_lease(fd, &lease, &recall_cbk, NULL);
+ if (ret < 0) {
+ fprintf(log_file, "\n RW_LEASE failed with ret: %d (%s)", ret,
+ strerror(errno));
+ return -1;
+ }
+ fprintf(log_file, "\n Took RW_LEASE");
+ return ret;
+}
+
+static int
+get_lease(glfs_fd_t *fd, char ld[])
+{
+ struct glfs_lease lease = {
+ 0,
+ };
+ int ret = 0;
+
+ memset(&lease, 0, sizeof(lease));
+ lease.cmd = GLFS_GET_LEASE;
+ lease.lease_type = -1;
+ memcpy(&lease.lease_id, ld, GLFS_LEASE_ID_SIZE);
+ ret = glfs_lease(fd, &lease, &recall_cbk, NULL);
+ if (ret < 0) {
+ fprintf(log_file, "\n GET_LEASE failed with ret: %d (%s)", ret,
+ strerror(errno));
+ return -1;
+ }
+ if (lease.lease_type == GLFS_RD_LEASE)
+ fprintf(log_file, "\n Esisting Lease: RD_LEASE");
+ else if (lease.lease_type == GLFS_RW_LEASE)
+ fprintf(log_file, "\n Esisting Lease: RW_LEASE");
+ else if (lease.lease_type == 3)
+ fprintf(log_file, "\n Esisting Lease: RD_LEASE|RW_LEASE");
+ else if (lease.lease_type == 0)
+ fprintf(log_file, "\n Esisting Lease: NONE");
+ else
+ fprintf(log_file, "\n Existing lease type:%d", lease.lease_type);
+ return lease.lease_type;
+}
+
+static int
+unlk_write_lease(glfs_fd_t *fd, char ld[])
+{
+ struct glfs_lease lease = {
+ 0,
+ };
+ int ret = 0;
+
+ memset(&lease, 0, sizeof(lease));
+ lease.cmd = GLFS_UNLK_LEASE;
+ lease.lease_type = GLFS_RW_LEASE;
+ memcpy(&lease.lease_id, ld, GLFS_LEASE_ID_SIZE);
+ ret = glfs_lease(fd, &lease, &recall_cbk, NULL);
+ if (ret < 0) {
+ fprintf(log_file, "\n Unlock RW_LESAE failed with ret: %d (%s)", ret,
+ strerror(errno));
+ return -1;
+ }
+ fprintf(log_file, "\n Unlocked RW_LEASE");
+ return ret;
+}
+
+static int
+unlk_read_lease(glfs_fd_t *fd, char ld[])
+{
+ struct glfs_lease lease = {
+ 0,
+ };
+ int ret = 0;
+
+ memset(&lease, 0, sizeof(lease));
+ lease.cmd = GLFS_UNLK_LEASE;
+ lease.lease_type = GLFS_RD_LEASE;
+ memcpy(&lease.lease_id, ld, GLFS_LEASE_ID_SIZE);
+
+ ret = glfs_lease(fd, &lease, &recall_cbk, NULL);
+ if (ret < 0) {
+ fprintf(log_file, "\n Unlock RD_LEASE failed with ret: %d (%s)", ret,
+ strerror(errno));
+ return -1;
+ }
+ fprintf(log_file, "\n Unlocked RD_LEASE");
+ return ret;
+}
+
+void
+up_async_lease_recall(struct glfs_upcall *up_arg, void *data)
+{
+ struct glfs_upcall_lease *in_arg = NULL;
+ enum glfs_upcall_reason reason = 0;
+ struct glfs_object *object = NULL;
+ uint64_t flags = 0;
+ uint64_t expire = 0;
+
+ if (!up_arg)
+ return;
+
+ reason = glfs_upcall_get_reason(up_arg);
+
+ /* Expect 'GLFS_UPCALL_RECALL_LEASE' upcall event. */
+
+ if (reason == GLFS_UPCALL_RECALL_LEASE) {
+ in_arg = glfs_upcall_get_event(up_arg);
+
+ object = glfs_upcall_lease_get_object(in_arg);
+
+ fprintf(log_file,
+ " upcall event type - %d,"
+ " object(%p)\n",
+ reason, object);
+ upcall_recv = 1;
+ }
+
+ glfs_free(up_arg);
+ return;
+}
+
+glfs_t *
+setup_new_client(char *volname, char *log_fileile)
+{
+ int ret = 0;
+ glfs_t *fs = NULL;
+ int up_events = GLFS_EVENT_ANY;
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ fprintf(log_file, "\nglfs_new: returned NULL (%s)\n", strerror(errno));
+ goto error;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", "localhost", 24007);
+ if (ret < 0) {
+ fprintf(log_file, "\nglfs_set_volfile_server failed ret:%d (%s)\n", ret,
+ strerror(errno));
+ goto error;
+ }
+
+ ret = glfs_set_logging(fs, log_fileile, 7);
+ if (ret < 0) {
+ fprintf(log_file, "\nglfs_set_logging failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ goto error;
+ }
+
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ fprintf(log_file, "\nglfs_init failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ goto error;
+ }
+
+ /* Register Upcalls */
+ ret = glfs_upcall_register(fs, up_events, up_async_lease_recall, NULL);
+
+ /* Check if the return mask contains the event */
+ if ((ret < 0) || !(ret & GLFS_EVENT_RECALL_LEASE)) {
+ fprintf(stderr,
+ "glfs_upcall_register return doesn't contain"
+ " upcall event - GLFS_EVENT_RECALL_LEASE\n");
+ goto error;
+ }
+
+ return fs;
+error:
+ if (fs)
+ glfs_fini(fs);
+ return NULL;
+}
+
+#define OPEN(client, flags, fd, lease_id) \
+ do { \
+ int ret_val = 0; \
+ ret_val = glfs_setfsleaseid(lease_id); \
+ if (ret_val) { \
+ fprintf(log_file, \
+ "\nglfs_setfsleaseid failed with ret: %d (%s)\n", ret, \
+ strerror(errno)); \
+ return -1; \
+ } \
+ fd = glfs_open(client, TEST_FILE, flags); \
+ if (fd == NULL) { \
+ fprintf(log_file, "\nglfs_open failed with ret: %d (%s)\n", ret, \
+ strerror(errno)); \
+ return -1; \
+ } \
+ } while (0)
+
+#define VERIFY_RESULT(test_case, ret, value) \
+ do { \
+ if (ret != value) { \
+ fprintf(log_file, \
+ "\n Testcase %d failed, ret = %d, value=%d\n", \
+ test_case, ret, value); \
+ goto error; /*test unsuccessful*/ \
+ } \
+ fprintf(log_file, "\n Testcase %d Succeeded\n", test_case); \
+ } while (0)
+
+static void
+recall_cbk(struct glfs_lease lease, void *data)
+{
+ int ret = -1;
+ char ld[GLFS_LEASE_ID_SIZE] = "";
+
+ fprintf(log_file, "\nRECALL received on lease_id:(%s)", lease.lease_id);
+ memcpy(ld, lease.lease_id, GLFS_LEASE_ID_SIZE);
+ ret = unlk_write_lease((glfs_fd_t *)data, ld);
+ VERIFY_RESULT(500, ret, SHUD_PASS);
+error:
+ return;
+}
+
+static int
+testcase_recall_conflict_lease()
+{
+ struct glfs_object *obj = NULL;
+ glfs_fd_t *fd1 = NULL;
+ int ret = 0;
+ struct glfs_lease lease = {
+ 0,
+ };
+
+ fprintf(log_file,
+ "\n Basic test case for conflicting lease causing recall");
+
+ memset(&lease, 0, sizeof(lease));
+ lease.cmd = GLFS_SET_LEASE;
+ lease.lease_type = GLFS_RD_LEASE;
+ memcpy(&lease.lease_id, lid2, GLFS_LEASE_ID_SIZE);
+ /* Open fd on client 1 in RD mode */
+ OPEN(client1, O_RDWR, fd1, lid1);
+ ret = set_write_lease(fd1, lid1);
+ VERIFY_RESULT(1, ret, SHUD_PASS);
+
+ /* reset counter */
+ upcall_recv = 0;
+
+ obj = glfs_h_lookupat(client2, NULL, TEST_FILE, NULL, 0);
+ ret = glfs_h_lease(client2, obj, &lease);
+ VERIFY_RESULT(2, ret, SHUD_FAIL);
+
+ sleep(3);
+ /* should recv upcall */
+ VERIFY_RESULT(6, !upcall_recv, SHUD_PASS);
+
+ ret = unlk_write_lease(fd1, lid1);
+ VERIFY_RESULT(5, ret, SHUD_PASS);
+
+ ret = glfs_h_close(obj);
+ VERIFY_RESULT(3, ret, SHUD_PASS);
+ ret = glfs_close(fd1);
+ VERIFY_RESULT(4, ret, SHUD_PASS);
+
+ return 0;
+error:
+ return -1;
+}
+
+int
+main(int argc, char *argv[])
+{
+ int ret = 0;
+ int i = 0;
+ glfs_fd_t *fd = NULL;
+ glfs_fd_t *fd1 = NULL;
+ char *topdir = "topdir", *filename = "file1";
+ char *buf = NULL;
+ int x = 0;
+ ssize_t xattr_size = -1;
+
+ if (argc != 4) {
+ fprintf(stderr,
+ "Expect following args %s <Vol> <glfs client log file> "
+ "<testcase log file>\n",
+ argv[0]);
+ return -1;
+ }
+
+ log_file = fopen(argv[3], "w");
+ if (!log_file)
+ goto error;
+
+ volname = argv[1];
+ glfs_log_file = argv[2];
+
+ /* Setup 2 clients */
+ client1 = setup_new_client(volname, glfs_log_file);
+ client2 = setup_new_client(volname, glfs_log_file);
+
+ ret = testcase_recall_conflict_lease();
+ VERIFY_RESULT(101, ret, SHUD_PASS);
+
+ glfs_fini(client1);
+ glfs_fini(client2);
+
+ fclose(log_file);
+ return 0;
+error:
+ return -1;
+}
diff --git a/tests/features/glfs-lease.c b/tests/features/glfs-lease.c
new file mode 100644
index 00000000000..e82cd875b38
--- /dev/null
+++ b/tests/features/glfs-lease.c
@@ -0,0 +1,717 @@
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+
+/* Few rules:
+ * 1. A client may have multiple lease keys, but a lease key cannot be shared by
+ * multiple clients.
+ * 2. Lease key can be set before open, or in glfs_lease request. A lease key
+ * set like this is valid for the lifetime of the fd, i.e. a fd cannot have
+ * multiple lease key. But a lease key can be shared across multiple fds.
+ */
+glfs_t *client1 = NULL, *client2 = NULL, *client3 = NULL, *client4 = NULL;
+glfs_fd_t *fd1 = NULL, *fd2 = NULL, *fd3 = NULL, *fd4 = NULL;
+FILE *log_file = NULL;
+char lid1[GLFS_LEASE_ID_SIZE] = "lid1-clnt1",
+ lid2[GLFS_LEASE_ID_SIZE] = "lid2-clnt2";
+char lid3[GLFS_LEASE_ID_SIZE] = "lid3-clnt2", lid4[GLFS_LEASE_ID_SIZE] = {
+ 0,
+};
+char *volname = NULL, *glfs_log_file = NULL;
+
+#define MAX_CLIENTS 4
+#define MAX_FDS 4
+#define TEST_FILE "/test/lease"
+#define SHUD_PASS 0
+#define SHUD_FAIL -1
+#define NONE 0
+
+static void
+recall_cbk(struct glfs_lease lease, void *data);
+
+static int
+set_read_lease(glfs_fd_t *fd, char ld[])
+{
+ struct glfs_lease lease = {
+ 0,
+ };
+ int ret = 0;
+
+ memset(&lease, 0, sizeof(lease));
+ lease.cmd = GLFS_SET_LEASE;
+ lease.lease_type = GLFS_RD_LEASE;
+ memcpy(&lease.lease_id, ld, GLFS_LEASE_ID_SIZE);
+ ret = glfs_lease(fd, &lease, &recall_cbk, fd);
+ if (ret < 0) {
+ fprintf(log_file, "\n RD_LEASE failed with ret: %d (%s)", ret,
+ strerror(errno));
+ return -1;
+ }
+ fprintf(log_file, "\n Took RD_LEASE");
+ return ret;
+}
+
+static int
+set_write_lease(glfs_fd_t *fd, char ld[])
+{
+ struct glfs_lease lease = {
+ 0,
+ };
+ int ret = 0;
+
+ memset(&lease, 0, sizeof(lease));
+ lease.cmd = GLFS_SET_LEASE;
+ lease.lease_type = GLFS_RW_LEASE;
+ memcpy(&lease.lease_id, ld, GLFS_LEASE_ID_SIZE);
+ ret = glfs_lease(fd, &lease, &recall_cbk, NULL);
+ if (ret < 0) {
+ fprintf(log_file, "\n RW_LEASE failed with ret: %d (%s)", ret,
+ strerror(errno));
+ return -1;
+ }
+ fprintf(log_file, "\n Took RW_LEASE");
+ return ret;
+}
+
+static int
+get_lease(glfs_fd_t *fd, char ld[])
+{
+ struct glfs_lease lease = {
+ 0,
+ };
+ int ret = 0;
+
+ memset(&lease, 0, sizeof(lease));
+ lease.cmd = GLFS_GET_LEASE;
+ lease.lease_type = -1;
+ memcpy(&lease.lease_id, ld, GLFS_LEASE_ID_SIZE);
+ ret = glfs_lease(fd, &lease, &recall_cbk, NULL);
+ if (ret < 0) {
+ fprintf(log_file, "\n GET_LEASE failed with ret: %d (%s)", ret,
+ strerror(errno));
+ return -1;
+ }
+ if (lease.lease_type == GLFS_RD_LEASE)
+ fprintf(log_file, "\n Esisting Lease: RD_LEASE");
+ else if (lease.lease_type == GLFS_RW_LEASE)
+ fprintf(log_file, "\n Esisting Lease: RW_LEASE");
+ else if (lease.lease_type == 3)
+ fprintf(log_file, "\n Esisting Lease: RD_LEASE|RW_LEASE");
+ else if (lease.lease_type == 0)
+ fprintf(log_file, "\n Esisting Lease: NONE");
+ else
+ fprintf(log_file, "\n Existing lease type:%d", lease.lease_type);
+ return lease.lease_type;
+}
+
+static int
+unlk_write_lease(glfs_fd_t *fd, char ld[])
+{
+ struct glfs_lease lease = {
+ 0,
+ };
+ int ret = 0;
+
+ memset(&lease, 0, sizeof(lease));
+ lease.cmd = GLFS_UNLK_LEASE;
+ lease.lease_type = GLFS_RW_LEASE;
+ memcpy(&lease.lease_id, ld, GLFS_LEASE_ID_SIZE);
+ ret = glfs_lease(fd, &lease, &recall_cbk, NULL);
+ if (ret < 0) {
+ fprintf(log_file, "\n Unlock RW_LESAE failed with ret: %d (%s)", ret,
+ strerror(errno));
+ return -1;
+ }
+ fprintf(log_file, "\n Unlocked RW_LEASE");
+ return ret;
+}
+
+static int
+unlk_read_lease(glfs_fd_t *fd, char ld[])
+{
+ struct glfs_lease lease = {
+ 0,
+ };
+ int ret = 0;
+
+ memset(&lease, 0, sizeof(lease));
+ lease.cmd = GLFS_UNLK_LEASE;
+ lease.lease_type = GLFS_RD_LEASE;
+ memcpy(&lease.lease_id, ld, GLFS_LEASE_ID_SIZE);
+
+ ret = glfs_lease(fd, &lease, &recall_cbk, NULL);
+ if (ret < 0) {
+ fprintf(log_file, "\n Unlock RD_LEASE failed with ret: %d (%s)", ret,
+ strerror(errno));
+ return -1;
+ }
+ fprintf(log_file, "\n Unlocked RD_LEASE");
+ return ret;
+}
+
+glfs_t *
+setup_new_client(char *volname, char *log_fileile)
+{
+ int ret = 0;
+ glfs_t *fs = NULL;
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ fprintf(log_file, "\nglfs_new: returned NULL (%s)\n", strerror(errno));
+ goto error;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", "localhost", 24007);
+ if (ret < 0) {
+ fprintf(log_file, "\nglfs_set_volfile_server failed ret:%d (%s)\n", ret,
+ strerror(errno));
+ goto error;
+ }
+
+ ret = glfs_set_logging(fs, log_fileile, 7);
+ if (ret < 0) {
+ fprintf(log_file, "\nglfs_set_logging failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ goto error;
+ }
+
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ fprintf(log_file, "\nglfs_init failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ goto error;
+ }
+ return fs;
+error:
+ return NULL;
+}
+
+#define OPEN(client, flags, fd, lease_id) \
+ do { \
+ int ret_val = 0; \
+ ret_val = glfs_setfsleaseid(lease_id); \
+ if (ret_val) { \
+ fprintf(log_file, \
+ "\nglfs_setfsleaseid failed with ret: %d (%s)\n", ret, \
+ strerror(errno)); \
+ return -1; \
+ } \
+ fd = glfs_open(client, TEST_FILE, flags); \
+ if (fd == NULL) { \
+ fprintf(log_file, "\nglfs_open failed with ret: %d (%s)\n", ret, \
+ strerror(errno)); \
+ return -1; \
+ } \
+ } while (0)
+
+#define VERIFY_RESULT(test_case, ret, value) \
+ do { \
+ if (ret != value) { \
+ fprintf(log_file, \
+ "\n Testcase %d failed, ret = %d, value=%d\n", \
+ test_case, ret, value); \
+ goto error; /*test unsuccessful*/ \
+ } \
+ fprintf(log_file, "\n Testcase %d Succeeded\n", test_case); \
+ } while (0)
+
+static void
+recall_cbk(struct glfs_lease lease, void *data)
+{
+ int ret = -1;
+ char ld[GLFS_LEASE_ID_SIZE] = "";
+
+ fprintf(log_file, "\nRECALL received on lease_id:(%s)", lease.lease_id);
+ memcpy(ld, lease.lease_id, GLFS_LEASE_ID_SIZE);
+ ret = unlk_write_lease((glfs_fd_t *)data, ld);
+ VERIFY_RESULT(500, ret, SHUD_PASS);
+error:
+ return;
+}
+
+static int
+testcase1_rd_lease()
+{
+ glfs_fd_t *fd1 = NULL;
+ int ret = 0;
+
+ fprintf(log_file, "\n Basic test case for Read lease:");
+ /* Open fd on client 1 in RD mode */
+ OPEN(client1, O_RDONLY, fd1, lid1);
+ ret = set_write_lease(fd1, lid1);
+ VERIFY_RESULT(1, ret, SHUD_FAIL);
+
+ ret = set_read_lease(fd1, lid1);
+ VERIFY_RESULT(2, ret, SHUD_PASS);
+
+ ret = get_lease(fd1, lid1);
+ VERIFY_RESULT(3, ret, GLFS_RD_LEASE);
+
+ ret = unlk_write_lease(fd1, lid1);
+ VERIFY_RESULT(4, ret, SHUD_FAIL);
+
+ ret = unlk_read_lease(fd1, lid1);
+ VERIFY_RESULT(5, ret, SHUD_PASS);
+
+ ret = get_lease(fd1, lid1);
+ VERIFY_RESULT(6, ret, NONE);
+
+ ret = unlk_read_lease(fd1, lid1);
+ VERIFY_RESULT(7, ret, SHUD_PASS);
+
+ ret = glfs_close(fd1);
+ VERIFY_RESULT(8, ret, SHUD_PASS);
+
+ return 0;
+error:
+ return -1;
+}
+
+static int
+testcase2_wr_lease()
+{
+ glfs_fd_t *fd1 = NULL;
+ int ret = 0;
+
+ fprintf(log_file, "\n Basic test case for Write lease:");
+ /* Open fd on client 1 in WRonly mode */
+ OPEN(client1, O_WRONLY, fd1, lid1);
+ ret = set_read_lease(fd1, lid1);
+ VERIFY_RESULT(1, ret, SHUD_FAIL);
+
+ ret = unlk_write_lease(fd1, lid1);
+ VERIFY_RESULT(2, ret, SHUD_PASS);
+
+ ret = set_write_lease(fd1, lid1);
+ VERIFY_RESULT(3, ret, SHUD_PASS);
+
+ ret = get_lease(fd1, lid1);
+ VERIFY_RESULT(4, ret, GLFS_RW_LEASE);
+
+ ret = unlk_write_lease(fd1, lid1);
+ VERIFY_RESULT(5, ret, SHUD_PASS);
+
+ ret = get_lease(fd1, lid1);
+ VERIFY_RESULT(6, ret, NONE);
+
+ ret = unlk_read_lease(fd1, lid1);
+ VERIFY_RESULT(7, ret, SHUD_FAIL);
+
+ ret = glfs_close(fd1);
+ VERIFY_RESULT(8, ret, SHUD_PASS);
+
+ return 0;
+error:
+ return -1;
+}
+
+static int
+testcase3_rd_wr_lease()
+{
+ glfs_fd_t *fd1 = NULL;
+ int ret = 0;
+
+ fprintf(log_file, "\n Basic test case for Read Write lease:");
+ /* Open fd on client 1 in WRonly mode */
+ OPEN(client1, O_RDWR, fd1, lid1);
+ ret = set_read_lease(fd1, lid1);
+ VERIFY_RESULT(1, ret, SHUD_PASS);
+
+ ret = set_write_lease(fd1, lid1);
+ VERIFY_RESULT(2, ret, SHUD_PASS);
+
+ ret = get_lease(fd1, lid1);
+ VERIFY_RESULT(3, ret, (GLFS_RW_LEASE | GLFS_RD_LEASE));
+
+ ret = unlk_write_lease(fd1, lid1);
+ VERIFY_RESULT(4, ret, SHUD_PASS);
+
+ ret = get_lease(fd1, lid1);
+ VERIFY_RESULT(5, ret, GLFS_RD_LEASE);
+
+ ret = unlk_read_lease(fd1, lid1);
+ VERIFY_RESULT(6, ret, SHUD_PASS);
+
+ ret = get_lease(fd1, lid1);
+ VERIFY_RESULT(7, ret, NONE);
+
+ ret = glfs_close(fd1);
+ VERIFY_RESULT(8, ret, SHUD_PASS);
+
+ return 0;
+error:
+ return -1;
+}
+
+static int
+testcase4_rd_lease_multi_clnt()
+{
+ glfs_fd_t *fd1 = NULL;
+ glfs_fd_t *fd2 = NULL;
+ int ret = 0;
+
+ fprintf(log_file, "\n Basic test case for multi client Read lease:");
+
+ /* Open fd on client 1 in RD mode */
+ OPEN(client1, O_RDONLY, fd1, lid1);
+
+ /* Open fd on client 2 in RW mode */
+ OPEN(client2, O_RDONLY, fd2, lid2);
+
+ ret = set_read_lease(fd1, lid1);
+ VERIFY_RESULT(1, ret, SHUD_PASS);
+
+ ret = set_read_lease(fd2, lid2);
+ VERIFY_RESULT(2, ret, SHUD_PASS);
+
+ ret = get_lease(fd1, lid1);
+ VERIFY_RESULT(3, ret, GLFS_RD_LEASE);
+
+ ret = unlk_read_lease(fd1, lid1);
+ VERIFY_RESULT(4, ret, SHUD_PASS);
+
+ ret = unlk_read_lease(fd2, lid2);
+ VERIFY_RESULT(5, ret, SHUD_PASS);
+
+ ret = get_lease(fd1, lid1);
+ VERIFY_RESULT(6, ret, NONE);
+
+ ret = get_lease(fd2, lid2);
+ VERIFY_RESULT(7, ret, NONE);
+
+ ret = glfs_close(fd1);
+ VERIFY_RESULT(8, ret, SHUD_PASS);
+
+ ret = glfs_close(fd2);
+ VERIFY_RESULT(9, ret, SHUD_PASS);
+
+ return 0;
+error:
+ return -1;
+}
+
+static int
+testcase5_openfd_multi_lid()
+{
+ glfs_fd_t *fd1 = NULL;
+ glfs_fd_t *fd2 = NULL;
+ glfs_fd_t *fd3 = NULL;
+ int ret = 0;
+
+ fprintf(log_file, "\n Basic test case for multi lid openfd check:");
+
+ /* Open fd on client 1 in RD mode */
+ OPEN(client1, O_RDONLY, fd1, lid1);
+
+ /* Open fd on client 2 in RW mode */
+ OPEN(client2, O_RDWR, fd2, lid2);
+ OPEN(client2, O_RDWR, fd3, lid2);
+
+ ret = set_read_lease(fd1, lid1);
+ VERIFY_RESULT(
+ 1, ret,
+ SHUD_FAIL); /*As there are other openfds in WR mode from diff lid*/
+
+ ret = set_write_lease(fd2, lid2);
+ VERIFY_RESULT(
+ 2, ret, SHUD_FAIL); /*As thers is another fd in RD mode from diff lid */
+
+ ret = glfs_close(fd1);
+ VERIFY_RESULT(3, ret, SHUD_PASS);
+
+ ret = set_write_lease(fd2, lid2);
+ VERIFY_RESULT(4, ret, SHUD_PASS);
+
+ ret = unlk_write_lease(fd2, lid2);
+ VERIFY_RESULT(5, ret, SHUD_PASS);
+
+ ret = glfs_close(fd2);
+ VERIFY_RESULT(6, ret, SHUD_PASS);
+
+ ret = glfs_close(fd3);
+ VERIFY_RESULT(7, ret, SHUD_PASS);
+
+ return 0;
+error:
+ return -1;
+}
+
+static int
+testcase6_openfd_same_lid()
+{
+ glfs_fd_t *fd1 = NULL;
+ glfs_fd_t *fd2 = NULL;
+ glfs_fd_t *fd3 = NULL;
+ int ret = 0;
+
+ fprintf(log_file, "\n Basic test case for same lid openfd check:");
+
+ /* Open fd on client 2 in RW mode */
+ OPEN(client1, O_RDWR, fd1, lid2);
+ OPEN(client1, O_RDWR, fd2, lid2);
+
+ ret = set_write_lease(fd1, lid2);
+ VERIFY_RESULT(4, ret, SHUD_PASS);
+
+ ret = set_write_lease(fd2, lid2);
+ VERIFY_RESULT(4, ret, SHUD_PASS);
+
+ ret = set_read_lease(fd2, lid2);
+ VERIFY_RESULT(4, ret, SHUD_PASS);
+
+ ret = unlk_write_lease(fd1, lid2);
+ VERIFY_RESULT(5, ret, SHUD_PASS);
+
+ ret = unlk_read_lease(fd2, lid2);
+ VERIFY_RESULT(5, ret, SHUD_PASS);
+
+ ret = unlk_write_lease(fd2, lid2);
+ VERIFY_RESULT(5, ret, SHUD_PASS);
+
+ ret = glfs_close(fd1);
+ VERIFY_RESULT(6, ret, SHUD_PASS);
+
+ ret = glfs_close(fd2);
+ VERIFY_RESULT(7, ret, SHUD_PASS);
+
+ return 0;
+error:
+ return -1;
+}
+
+static int
+testcase7_rd_multi_lid()
+{
+ glfs_fd_t *fd1 = NULL;
+ glfs_fd_t *fd2 = NULL;
+ int ret = 0;
+
+ fprintf(log_file, "\n Basic test case for multi lease id Read lease:");
+
+ /* Open fd on client 1 in RD mode */
+ OPEN(client2, O_RDONLY, fd1, lid2);
+
+ /* Open fd on client 2 in RD mode */
+ OPEN(client2, O_RDONLY, fd2, lid3);
+
+ ret = set_read_lease(fd1, lid2);
+ VERIFY_RESULT(1, ret, SHUD_PASS);
+
+ ret = set_read_lease(fd2, lid3);
+ VERIFY_RESULT(2, ret, SHUD_PASS);
+
+ ret = get_lease(fd1, lid2);
+ VERIFY_RESULT(3, ret, GLFS_RD_LEASE);
+
+ ret = unlk_read_lease(fd1, lid2);
+ VERIFY_RESULT(4, ret, SHUD_PASS);
+
+ ret = unlk_read_lease(fd2, lid3);
+ VERIFY_RESULT(5, ret, SHUD_PASS);
+
+ ret = get_lease(fd1, lid2);
+ VERIFY_RESULT(6, ret, NONE);
+
+ ret = get_lease(fd2, lid3);
+ VERIFY_RESULT(7, ret, NONE);
+
+ ret = glfs_close(fd1);
+ VERIFY_RESULT(8, ret, SHUD_PASS);
+
+ ret = glfs_close(fd2);
+ VERIFY_RESULT(9, ret, SHUD_PASS);
+
+ return 0;
+error:
+ return -1;
+}
+
+static int
+testcase8_client_disconnect()
+{
+ glfs_fd_t *fd1 = NULL;
+ glfs_fd_t *fd2 = NULL;
+ int ret = 0;
+
+ fprintf(log_file, "\n Basic test case for client disconnect cleanup");
+
+ /* Open fd on client 1 in RD mode */
+ OPEN(client1, O_RDWR, fd1, lid1);
+
+ ret = set_read_lease(fd1, lid1);
+ VERIFY_RESULT(1, ret, SHUD_PASS);
+
+ ret = get_lease(fd1, lid1);
+ VERIFY_RESULT(2, ret, GLFS_RD_LEASE);
+
+ ret = set_write_lease(fd1, lid1);
+ VERIFY_RESULT(3, ret, SHUD_PASS);
+
+ ret = get_lease(fd1, lid1);
+ VERIFY_RESULT(4, ret, (GLFS_RD_LEASE | GLFS_RW_LEASE));
+
+ ret = glfs_fini(client1);
+ VERIFY_RESULT(5, ret, SHUD_PASS);
+
+ /* Open fd on client 2 in RD mode */
+ OPEN(client2, O_RDONLY, fd2, lid3);
+
+ ret = get_lease(fd2, lid3);
+ VERIFY_RESULT(6, ret, NONE);
+
+ ret = glfs_close(fd2);
+ VERIFY_RESULT(7, ret, SHUD_PASS);
+
+ client1 = setup_new_client(volname, glfs_log_file);
+
+ return 0;
+error:
+ return -1;
+}
+
+static int
+testcase9_recall_conflict_lease()
+{
+ struct glfs_object *obj = NULL;
+ glfs_fd_t *fd1 = NULL;
+ int ret = 0;
+ struct glfs_lease lease = {
+ 0,
+ };
+
+ fprintf(log_file,
+ "\n Basic test case for conflicting lease causing recall");
+
+ memset(&lease, 0, sizeof(lease));
+ lease.cmd = GLFS_SET_LEASE;
+ lease.lease_type = GLFS_RD_LEASE;
+ memcpy(&lease.lease_id, lid2, GLFS_LEASE_ID_SIZE);
+ /* Open fd on client 1 in RD mode */
+ OPEN(client1, O_RDWR, fd1, lid1);
+ ret = set_write_lease(fd1, lid1);
+ VERIFY_RESULT(1, ret, SHUD_PASS);
+
+ obj = glfs_h_lookupat(client2, NULL, TEST_FILE, NULL, 0);
+ ret = glfs_h_lease(client2, obj, &lease);
+ VERIFY_RESULT(2, ret, SHUD_FAIL);
+
+ ret = unlk_write_lease(fd1, lid1);
+ VERIFY_RESULT(5, ret, SHUD_PASS);
+
+ sleep(3);
+ ret = glfs_h_close(obj);
+ VERIFY_RESULT(3, ret, SHUD_PASS);
+ ret = glfs_close(fd1);
+ VERIFY_RESULT(4, ret, SHUD_PASS);
+
+ return 0;
+error:
+ return -1;
+}
+
+static int
+testcase10_recall_open_conflict()
+{
+ glfs_fd_t *fd1 = NULL;
+ glfs_fd_t *fd2 = NULL;
+ int ret = 0;
+
+ fprintf(log_file, "\n Basic test case for conflicting open causing recall");
+
+ /* Open fd on client 1 in RW mode */
+ OPEN(client1, O_RDWR, fd1, lid1);
+
+ ret = set_write_lease(fd1, lid1);
+ VERIFY_RESULT(1, ret, SHUD_PASS);
+
+ /* Open fd on client 1 in RW mode */
+ OPEN(client2, O_RDWR, fd2, lid2);
+
+ /* TODO: Check for recall cbk functionality */
+ ret = glfs_close(fd1);
+ VERIFY_RESULT(2, ret, SHUD_PASS);
+
+ ret = glfs_close(fd2);
+ VERIFY_RESULT(3, ret, SHUD_PASS);
+
+ return 0;
+error:
+ return -1;
+}
+
+int
+main(int argc, char *argv[])
+{
+ int ret = 0;
+ int i = 0;
+ glfs_fd_t *fd = NULL;
+ glfs_fd_t *fd1 = NULL;
+ char *topdir = "topdir", *filename = "file1";
+ char *buf = NULL;
+ int x = 0;
+ ssize_t xattr_size = -1;
+
+ if (argc != 4) {
+ fprintf(stderr,
+ "Expect following args %s <Vol> <glfs client log file> "
+ "<testcase log file>\n",
+ argv[0]);
+ return -1;
+ }
+
+ log_file = fopen(argv[3], "w");
+ if (!log_file)
+ goto error;
+
+ volname = argv[1];
+ glfs_log_file = argv[2];
+
+ /* Setup 3 clients */
+ client1 = setup_new_client(volname, glfs_log_file);
+ client2 = setup_new_client(volname, glfs_log_file);
+ client3 = setup_new_client(volname, glfs_log_file);
+
+ ret = testcase1_rd_lease();
+ VERIFY_RESULT(101, ret, SHUD_PASS);
+
+ ret = testcase2_wr_lease();
+ VERIFY_RESULT(102, ret, SHUD_PASS);
+
+ ret = testcase3_rd_wr_lease();
+ VERIFY_RESULT(103, ret, SHUD_PASS);
+
+ ret = testcase4_rd_lease_multi_clnt();
+ VERIFY_RESULT(104, ret, SHUD_PASS);
+
+ ret = testcase5_openfd_multi_lid();
+ VERIFY_RESULT(105, ret, SHUD_PASS);
+
+ ret = testcase6_openfd_same_lid();
+ VERIFY_RESULT(106, ret, SHUD_PASS);
+
+ ret = testcase7_rd_multi_lid();
+ VERIFY_RESULT(107, ret, SHUD_PASS);
+
+ ret = testcase8_client_disconnect();
+ VERIFY_RESULT(108, ret, SHUD_PASS);
+
+ ret = testcase9_recall_conflict_lease();
+ VERIFY_RESULT(109, ret, SHUD_PASS);
+
+ ret = testcase10_recall_open_conflict();
+ VERIFY_RESULT(110, ret, SHUD_PASS);
+
+ glfs_fini(client1);
+ glfs_fini(client2);
+ glfs_fini(client3);
+
+ fclose(log_file);
+ return 0;
+error:
+ return -1;
+}
diff --git a/tests/features/glfs-lease.t b/tests/features/glfs-lease.t
new file mode 100755
index 00000000000..6ef6da05043
--- /dev/null
+++ b/tests/features/glfs-lease.t
@@ -0,0 +1,31 @@
+#!/bin/bash
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0};
+TEST $CLI volume set $V0 leases on
+TEST $CLI volume set $V0 open-behind off
+TEST $CLI volume set $V0 write-behind on
+TEST $CLI volume start $V0
+
+logdir=`gluster --print-logdir`
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
+TEST mkdir $M0/test
+TEST touch $M0/test/lease
+
+build_tester $(dirname $0)/glfs-lease.c -lgfapi
+build_tester $(dirname $0)/glfs-lease-recall.c -lgfapi
+TEST $(dirname $0)/glfs-lease $V0 $logdir/glfs-lease.log $logdir/lease-test.log
+TEST $(dirname $0)/glfs-lease-recall $V0 $logdir/glfs-lease-recall.log $logdir/lease-test-recall.log
+
+TEST $CLI volume set $V0 leases off
+
+cleanup_tester $(dirname $0)/glfs-lease
+cleanup;
diff --git a/tests/features/interrupt.t b/tests/features/interrupt.t
new file mode 100644
index 00000000000..067eb1b7486
--- /dev/null
+++ b/tests/features/interrupt.t
@@ -0,0 +1,71 @@
+#!/bin/bash
+
+##Copy this file to tests/bugs before running run.sh (cp extras/test/bug-920583.t tests/bugs/)
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+TESTS_EXPECTED_IN_LOOP=4
+
+cleanup;
+logdir=`gluster --print-logdir`
+
+TEST build_tester $(dirname $0)/open_and_sleep.c
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6,7,8,9};
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+function log-file-name()
+{
+ logfilename=$M0".log"
+ echo ${logfilename:1} | tr / -
+}
+
+log_file=$logdir"/"`log-file-name`
+
+function test_interrupt {
+ local handlebool="$1"
+ local logpattern="$2"
+
+ TEST $GFS --volfile-id=$V0 --volfile-server=$H0 --fuse-flush-handle-interrupt=$handlebool --log-level=DEBUG $M0
+
+ # If the test helper fails (which is considered a setup error, not failure of the test
+ # case itself), kill will be invoked without argument, and that will be the actual
+ # error which is caught.
+ TEST "./$(dirname $0)/open_and_sleep $M0/testfile-$handlebool | { sleep 0.1; xargs -n1 kill -INT; }"
+
+ TEST "grep -E '$logpattern' $log_file"
+ # Basic sanity check, making sure filesystem has not crashed.
+ TEST test -f $M0/testfile-$handlebool
+}
+
+# Theoretically FLUSH might finish before INTERRUPT is handled,
+# in which case we'd get the "no handler found" message instead of
+# "interrupt handler triggered" (but it's unlikely).
+# If that's observed, the pattern can be changed to
+# 'FLUSH.*interrupt handler triggered|[I]NTERRUPT.*no handler found'
+# to fix the test.
+test_interrupt yes '[F]LUSH.*interrupt handler triggered'
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+test_interrupt no '[I]NTERRUPT.*no handler found'
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup_tester $(dirname $0)/open_and_sleep;
+cleanup;
diff --git a/tests/features/ipc.t b/tests/features/ipc.t
new file mode 100755
index 00000000000..5c92287eaa1
--- /dev/null
+++ b/tests/features/ipc.t
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=GH269
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=GH269
+
+cleanup;
+mkdir -p $B0/1
+mkdir -p $M0
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/1
+TEST $CLI volume start $V0
+
+# Find OS-dependent EOPNOTSUPP value from system headers
+EOPNOTSUPP=$( echo '#include <errno.h>\\EOPNOTSUPP\\' | tr '\\' '\n' | \
+ cc -E -c - | tail -1 )
+
+# liglusterfs embbeds its own UUID implementation. The function name
+# may be the same as in built(in implementation from libc, but with
+# different prototype. In that case, we must make sure python will
+# use libglusterfs's version, and dlopen() does not make any guarantee
+# on this. By preloading libglusterfs.so before launching python, we
+# ensure libglusterfs's UUID functions will be used.
+LD_PRELOAD=${prefix}/lib/libglusterfs.so
+export LD_PRELOAD
+
+# This is a pretty lame test. Basically we just want to make sure that we
+# get all the way through the translator stacks on client and server to get a
+# simple error (EOPNOTSUPP) instead of a crash, RPC error, etc.
+EXPECT ${EOPNOTSUPP} $PYTHON $(dirname $0)/ipctest.py $H0 $V0
+
+unset LD_PRELOAD
+cleanup;
diff --git a/tests/features/ipctest.py b/tests/features/ipctest.py
new file mode 100755
index 00000000000..f6f699cf5c4
--- /dev/null
+++ b/tests/features/ipctest.py
@@ -0,0 +1,28 @@
+
+from __future__ import print_function
+import ctypes
+
+api = ctypes.CDLL("libgfapi.so", mode=ctypes.RTLD_GLOBAL)
+
+api.glfs_ipc.argtypes = [ ctypes.c_void_p, ctypes.c_int, ctypes.c_void_p, ctypes.c_void_p ]
+api.glfs_ipc.restype = ctypes.c_int
+
+def do_ipc (host, volume):
+ fs = api.glfs_new(volume)
+ #api.glfs_set_logging(fs, "/dev/stderr", 7)
+ api.glfs_set_volfile_server(fs, "tcp", host, 24007)
+
+ api.glfs_init(fs)
+ ret = api.glfs_ipc(fs, 1470369258, 0, 0)
+ api.glfs_fini(fs)
+
+ return ret
+
+if __name__ == "__main__":
+ import sys
+
+ try:
+ res = do_ipc(*sys.argv[1:3])
+ print(res)
+ except:
+ print("IPC failed (volume not started?)")
diff --git a/tests/features/lock_revocation.t b/tests/features/lock_revocation.t
new file mode 100644
index 00000000000..67bc13159f9
--- /dev/null
+++ b/tests/features/lock_revocation.t
@@ -0,0 +1,54 @@
+#!/bin/bash
+logdir=$(gluster --print-logdir)
+BRICK_LOGFILES="$logdir/bricks/d-backends-brick?.log"
+rm -f $BRICK_LOGFILES &> /dev/null
+
+# Test that lock revocation works
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+cleanup;
+
+function deadlock_fop() {
+ local MNT=$1
+ for i in {1..1000}; do
+ dd if=/dev/zero of=$MNT/testfile bs=1k count=10 &> /dev/null
+ if grep "MONKEY LOCKING" $BRICK_LOGFILES &> /dev/null; then
+ break
+ fi
+ done
+}
+
+function monkey_unlock() {
+ grep "MONKEY LOCKING" $BRICK_LOGFILES &> /dev/null && echo SUCCESS
+ return 0
+}
+
+function append_to_file() {
+ local FILE_PATH=$1
+ echo "hello" >> $FILE_PATH
+ return 0
+}
+
+#Init
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1}
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 features.locks-monkey-unlocking on
+TEST $CLI volume set $V0 features.locks-revocation-secs 2
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=$V0 -s $H0 $M0;
+TEST $GFS --volfile-id=$V0 -s $H0 $M1;
+
+# Deadlock writes to a file using monkey unlocking
+deadlock_fop $M0 &
+EXPECT_WITHIN 60 "SUCCESS" monkey_unlock
+
+# Sleep > unlock timeout and attempt to write to the file
+sleep 3
+TEST append_to_file $M1/testfile
+
+cleanup
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=1369401
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=1448364
diff --git a/tests/features/mandatory-lock-forced.c b/tests/features/mandatory-lock-forced.c
new file mode 100644
index 00000000000..4028d6c6eaf
--- /dev/null
+++ b/tests/features/mandatory-lock-forced.c
@@ -0,0 +1,143 @@
+#include <stdio.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/wait.h>
+
+#define LOG_ERR(func, err) \
+ do { \
+ fprintf(stderr, "%s : returned error (%s)\n", func, strerror(err)); \
+ exit(err); \
+ } while (0)
+
+int fd;
+struct flock lock;
+char *buf = "ten bytes!";
+char *fname = "/mnt/glusterfs/0/mand.lock";
+int open_flags, child, err, status, blocked = 0;
+
+int
+do_child(char *argv[])
+{
+ /* Initialize file open flags */
+ if (strcmp(argv[2], "BLOCK") == 0)
+ open_flags = O_RDWR;
+ else if (strcmp(argv[2], "TRUNC") == 0)
+ open_flags = O_RDWR | O_TRUNC | O_NONBLOCK;
+ else if (strcmp(argv[2], "NONE") == 0)
+ open_flags = O_RDWR | O_NONBLOCK;
+ else
+ LOG_ERR("Invalid option:", EINVAL);
+
+ /* Open the file */
+ fd = open(fname, open_flags);
+ if (fd == -1)
+ LOG_ERR("Child open", errno);
+
+ /* Perform the file operation*/
+ if (strcmp(argv[3], "READ") == 0) {
+ buf = NULL;
+ err = read(fd, buf, 10);
+ if (err == -1)
+ LOG_ERR("Child read", errno);
+ } else if (strcmp(argv[3], "WRITE") == 0) {
+ err = write(fd, buf, 10);
+ if (err == -1)
+ LOG_ERR("Child write", errno);
+ } else if (strcmp(argv[3], "FTRUNCATE") == 0) {
+ err = ftruncate(fd, 5);
+ if (err)
+ LOG_ERR("Child ftruncate", errno);
+ } else
+ LOG_ERR("Invalid operation:", EINVAL);
+
+ /* Close child fd */
+ err = close(fd);
+ if (err)
+ LOG_ERR("Child close", errno);
+
+ /* Exit success */
+ exit(0);
+}
+
+int
+main(int argc, char *argv[])
+{
+ if (argc < 4) {
+ fprintf(stderr,
+ "Wrong usage: Use as ./mandatory-lock "
+ "<RD_LCK/WR_LCK> <BLOCK/TRUNC/NONE> "
+ "<READ/WRITE/FTRUNCATE\n");
+ exit(EINVAL);
+ }
+ /* Create an empty lock file */
+ fd = open(fname, O_CREAT | O_RDWR, 0755);
+ if (fd == -1)
+ LOG_ERR("Parent create", errno);
+
+ /* Determine the type of lock */
+ if (strcmp(argv[1], "RD_LCK") == 0)
+ lock.l_type = F_RDLCK;
+ else if (strcmp(argv[1], "WR_LCK") == 0)
+ lock.l_type = F_WRLCK;
+ else
+ LOG_ERR("Parent lock type", EINVAL);
+
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0L;
+ lock.l_len = 0L;
+
+ /* Let parent acquire the initial lock */
+ err = fcntl(fd, F_SETLK, &lock);
+ if (err)
+ LOG_ERR("Parent lock", errno);
+
+ /* Now fork a child */
+ child = fork();
+ if (child == 0)
+ /* Perform the child operations */
+ do_child(argv);
+ else {
+ /* If blocking mode, then sleep for 2 seconds
+ * and wait for the child */
+ if (strcmp(argv[2], "NONE") != 0) {
+ sleep(2);
+ if (waitpid(child, &status, WNOHANG) == 0)
+ blocked = 1;
+ /* Release the parent lock so that the
+ * child can terminate */
+ lock.l_type = F_UNLCK;
+ err = fcntl(fd, F_SETLK, &lock);
+ if (err)
+ LOG_ERR("Parent unlock", errno);
+ }
+
+ /* Wait for child to finish */
+ waitpid(child, &status, 0);
+
+ /* Close the parent fd */
+ err = close(fd);
+ if (err)
+ LOG_ERR("Parent close", errno);
+
+ /* Remove the lock file*/
+ err = unlink(fname);
+ if (err)
+ LOG_ERR("Parent unlink", errno);
+
+ /* If not blocked, exit with child exit status*/
+ errno = WEXITSTATUS(status);
+
+ /* If blocked, exit with corresponding
+ * error code */
+ if (blocked)
+ errno = EWOULDBLOCK;
+
+ if (errno != 0)
+ printf("%s\n", strerror(errno));
+
+ exit(errno);
+ }
+}
diff --git a/tests/features/mandatory-lock-forced.t b/tests/features/mandatory-lock-forced.t
new file mode 100644
index 00000000000..563669c6774
--- /dev/null
+++ b/tests/features/mandatory-lock-forced.t
@@ -0,0 +1,80 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+# Start glusterd [1]
+TEST glusterd
+
+# Create and verify the volume information [2-4]
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+# Turn off the performance translators [5-9]
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.read-ahead off
+
+# Start and mount the volume [10-11]
+TEST $CLI volume start $V0;
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+build_tester $(dirname $0)/mandatory-lock-forced.c -o $(dirname $0)/mandatory-lock
+
+# Various read/write tests without enabling mandatory-locking [12-18]
+$(dirname $0)/mandatory-lock RD_LCK NONE READ
+TEST [ $? -eq 0 ]
+
+$(dirname $0)/mandatory-lock RD_LCK NONE WRITE
+TEST [ $? -eq 0 ]
+
+$(dirname $0)/mandatory-lock WR_LCK NONE READ
+TEST [ $? -eq 0 ]
+
+$(dirname $0)/mandatory-lock WR_LCK NONE WRITE
+TEST [ $? -eq 0 ]
+
+# Specifies O_TRUNC during open
+$(dirname $0)/mandatory-lock RD_LCK TRUNC READ
+TEST [ $? -eq 0 ]
+
+$(dirname $0)/mandatory-lock RD_LCK NONE FTRUNCATE
+TEST [ $? -eq 0 ]
+
+$(dirname $0)/mandatory-lock RD_LCK BLOCK WRITE
+TEST [ $? -eq 0 ]
+
+# Enable mandatory-locking [19]
+TEST $CLI volume set $V0 mandatory-locking forced
+
+# Restart the volume to take the change into effect [20-23]
+TEST umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+# Repeat the above tests with mandatory-locking [24-30]
+$(dirname $0)/mandatory-lock RD_LCK NONE READ
+TEST [ $? -eq 0 ]
+
+EXPECT "Resource temporarily unavailable" $(dirname $0)/mandatory-lock RD_LCK NONE WRITE
+
+EXPECT "Resource temporarily unavailable" $(dirname $0)/mandatory-lock WR_LCK NONE READ
+
+EXPECT "Resource temporarily unavailable" $(dirname $0)/mandatory-lock WR_LCK NONE WRITE
+
+EXPECT "Resource temporarily unavailable" $(dirname $0)/mandatory-lock RD_LCK TRUNC READ
+
+EXPECT "Resource temporarily unavailable" $(dirname $0)/mandatory-lock RD_LCK NONE FTRUNCATE
+
+EXPECT "Resource temporarily unavailable" $(dirname $0)/mandatory-lock RD_LCK BLOCK WRITE
+
+rm -rf $(dirname $0)/mandatory-lock
+
+cleanup
+
+#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=1326464
diff --git a/tests/features/nuke.t b/tests/features/nuke.t
new file mode 100755
index 00000000000..f1f5f9f90ab
--- /dev/null
+++ b/tests/features/nuke.t
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+create_files () {
+ mkdir $1
+ for i in $(seq 0 99); do
+ mkdir $1/dir$i
+ for j in $(seq 0 99); do
+ touch $1/dir$i/file$j
+ done
+ done
+}
+
+count_files () {
+ ls $1 | wc -l
+}
+
+LANDFILL=$B0/${V0}1/.glusterfs/landfill
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2}
+TEST $CLI volume start $V0
+TEST $GFS -s $H0 --volfile-id $V0 $M0
+
+TEST create_files $M0/foo
+TEST [ $(count_files $LANDFILL) = "0" ]
+
+# This should immediately send the whole directory to the landfill.
+TEST setfattr -n glusterfs.dht.nuke -v trinity $M0/foo
+
+# Make sure the directory's not visible on the mountpoint, and is visible in
+# the brick's landfill.
+TEST ! ls $M0/foo
+TEST [ $(count_files $LANDFILL) = "1" ]
+
+# Make sure the janitor thread cleans it up in a timely fashion.
+EXPECT_WITHIN 60 "0" count_files $LANDFILL
+
+cleanup
diff --git a/tests/features/open_and_sleep.c b/tests/features/open_and_sleep.c
new file mode 100644
index 00000000000..7d0e22a2503
--- /dev/null
+++ b/tests/features/open_and_sleep.c
@@ -0,0 +1,27 @@
+#include <unistd.h>
+#include <stdio.h>
+#include <fcntl.h>
+
+int
+main(int argc, char **argv)
+{
+ pid_t pid;
+ int fd;
+
+ if (argc >= 2) {
+ fd = open(argv[1], O_RDWR | O_CREAT, 0644);
+ if (fd == -1) {
+ fprintf(stderr, "cannot open/create %s\n", argv[1]);
+ return 1;
+ }
+ }
+
+ pid = getpid();
+ printf("%d\n", pid);
+ fflush(stdout);
+
+ for (;;)
+ sleep(1);
+
+ return 0;
+}
diff --git a/tests/features/openssl.cnf.in b/tests/features/openssl.cnf.in
new file mode 100644
index 00000000000..1fce34b11b9
--- /dev/null
+++ b/tests/features/openssl.cnf.in
@@ -0,0 +1,41 @@
+[ req ]
+distinguished_name = req_distinguished_name
+x509_extensions = v3_ca
+[ req_distinguished_name ]
+commonName = Common Name
+commonName_max = 64
+[ v3_ca ]
+subjectKeyIdentifier = hash
+authorityKeyIdentifier = keyid:always,issuer:always
+basicConstraints = CA:true
+[ ca ]
+default_ca = CA_default
+[ CA_default ]
+dir = @TMPDIR@
+certs = $dir/certs
+crl_dir = $dir/crl
+database = $dir/index.txt
+unique_subjecta = no
+new_certs_dir = $dir/newcerts
+certificate = $dir/ca.crt
+serial = $dir/serial
+crl = $dir/crl.pem
+private_key = $dir/self.key
+x509_extensions = usr_cert
+name_opt = ca_default
+cert_opt = ca_default
+default_days = 365
+default_crl_days = 30
+crl_extensions = crl_ext
+default_md = sha256
+preserve = no
+policy = policy_test
+[ policy_test ]
+commonName = supplied
+[ usr_cert ]
+basicConstraints = CA:FALSE
+subjectKeyIdentifier = hash
+authorityKeyIdentifier = keyid,issuer:always
+crlDistributionPoints = URI:file://@TMPDIR@/crl.pem
+[ crl_ext ]
+authorityKeyIdentifier = keyid:always,issuer:always
diff --git a/tests/features/readdir-ahead.t b/tests/features/readdir-ahead.t
new file mode 100755
index 00000000000..75223df61bf
--- /dev/null
+++ b/tests/features/readdir-ahead.t
@@ -0,0 +1,45 @@
+#!/bin/bash
+#
+# Test basic readdir-ahead functionality. Verify that readdir-ahead can be
+# enabled, create a set of files and run some ls tests.
+#
+###
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0
+TEST $CLI volume start $V0
+
+TEST $CLI volume set $V0 readdir-ahead on
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+
+TEST mkdir $M0/test
+for i in $(seq 0 99)
+do
+ touch $M0/test/$i
+done
+
+count=`ls -1 $M0/test | wc -l`
+TEST [ $count -eq 100 ]
+
+count=`ls -1 $M0/test | wc -l`
+TEST [ $count -eq 100 ]
+
+TEST rm -rf $M0/test/*
+
+count=`ls -1 $M0/test | wc -l`
+TEST [ $count -eq 0 ]
+
+TEST rmdir $M0/test
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0;
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/features/recon.t b/tests/features/recon.t
new file mode 100644
index 00000000000..82ef6fd755d
--- /dev/null
+++ b/tests/features/recon.t
@@ -0,0 +1,59 @@
+#!/bin/bash
+
+. $(dirname $0)/../traps.rc
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../fdl.rc
+
+tmpdir=$(mktemp -d -t ${0##*/}.XXXXXX)
+push_trapfunc "rm -rf $tmpdir"
+
+write_file () {
+ echo "peekaboo" > $1
+}
+
+TEST rm -f $FDL_META_FILE $FDL_DATA_FILE
+TEST glusterd
+TEST pidof glusterd
+
+# Get a simple volume set up and mounted with FDL active.
+TEST $CLI volume create $V0 ${H0}:${B0}/${V0}-0
+TEST $CLI volume set $V0 features.fdl on
+TEST $CLI volume start $V0
+TEST $GFS -s $H0 --volfile-id $V0 $M0
+
+# Generate some I/O and then copy off the journal files for later.
+TEST mkdir -p $M0/abc/def
+TEST write_file $M0/abc/def/ghi
+#EST chmod 314 $M0/abc/def/ghi
+cp ${FDL_META_FILE} ${FDL_DATA_FILE} ${tmpdir}
+
+# Get back to an empty state and unmount.
+TEST rm -rf $M0/abc
+TEST umount $M0
+
+# Make sure we really are in an empty state. Otherwise the tests below could
+# pass just because we never cleaned up in the first place.
+TEST [ ! -d ${B0}/${V0}-0/abc ]
+
+# Create a stub volfile.
+vol_file=${GLUSTERD_WORKDIR}/vols/${V0}/${V0}.${H0}.${log_id}.vol
+vol_id_line=$(grep volume-id ${vol_file})
+cat > ${tmpdir}/recon.vol << EOF
+volume recon-posix
+ type storage/posix
+ option directory ${B0}/${V0}-0
+${vol_id_line}
+end-volume
+EOF
+
+TEST gf_recon ${tmpdir}/recon.vol ${tmpdir}/$(basename ${FDL_META_FILE}) \
+ ${tmpdir}/$(basename ${FDL_DATA_FILE})
+
+TEST [ -d ${B0}/${V0}-0/abc/def ]
+EXPECT "peekaboo" cat ${B0}/${V0}-0/abc/def/ghi
+# TBD: test permissions, xattrs
+
+cleanup
+#G_TESTDEF_TEST_STATUS_CENTOS6=KNOWN_ISSUE,BUG=1385758
+#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=1385758
diff --git a/tests/features/ssl-authz.t b/tests/features/ssl-authz.t
new file mode 100755
index 00000000000..497083e5a3a
--- /dev/null
+++ b/tests/features/ssl-authz.t
@@ -0,0 +1,121 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+ping_file () {
+ echo hello > $1 2> /dev/null
+}
+for d in /etc/ssl /etc/openssl /usr/local/etc/openssl ; do
+ if test -d $d ; then
+ SSL_BASE=$d
+ break
+ fi
+done
+SSL_KEY=$SSL_BASE/glusterfs.key
+SSL_CERT=$SSL_BASE/glusterfs.pem
+SSL_CA=$SSL_BASE/glusterfs.ca
+
+cleanup;
+rm -f $SSL_BASE/glusterfs.*
+mkdir -p $B0/1
+mkdir -p $M0
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI v set all cluster.brick-multiplex on
+# Construct a cipher list that excludes CBC because of POODLE.
+# http://web.nvd.nist.gov/view/vuln/detail?vulnId=CVE-2014-3566
+#
+# Since this is a bit opaque, here's what it does:
+# (1) Get the ciphers matching a normal cipher-list spec
+# (2) Delete any colon-separated entries containing "CBC"
+# (3) Collapse adjacent colons from deleted entries
+# (4) Remove colons at the beginning or end
+function valid_ciphers {
+ openssl ciphers 'HIGH:!SSLv2' | sed \
+ -e '/[^:]*CBC[^:]*/s///g' \
+ -e '/::*/s//:/g' \
+ -e '/^:/s///' \
+ -e '/:$/s///'
+}
+
+TEST openssl genrsa -out $SSL_KEY 2048
+TEST openssl req -new -x509 -key $SSL_KEY -subj /CN=Anyone -out $SSL_CERT
+ln $SSL_CERT $SSL_CA
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/{1,2,3} force
+TEST $CLI volume set $V0 server.ssl on
+TEST $CLI volume set $V0 client.ssl on
+TEST $CLI volume set $V0 ssl.cipher-list $(valid_ciphers)
+TEST $CLI volume start $V0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" online_brick_count
+
+# This mount should SUCCEED because ssl-allow=* by default. This effectively
+# disables SSL authorization, though authentication and encryption might still
+# be enabled.
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+TEST ping_file $M0/before
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+glusterfsd_pid=`pgrep glusterfsd`
+TEST [ $glusterfsd_pid != 0 ]
+start=`pmap -x $glusterfsd_pid | grep total | awk -F " " '{print $4}'`
+echo "Memory consumption for glusterfsd process"
+for i in $(seq 1 100); do
+ gluster v heal $V0 info >/dev/null
+done
+#Wait to cleanup memory
+sleep 10
+end=`pmap -x $glusterfsd_pid | grep total | awk -F " " '{print $4}'`
+diff=$((end-start))
+
+# If memory consumption is more than 15M some leak in SSL code path
+
+TEST [ $diff -lt 15000 ]
+
+
+# Set ssl-allow to a wildcard that includes our identity.
+TEST $CLI volume stop $V0
+TEST $CLI volume set $V0 auth.ssl-allow Any*
+TEST $CLI volume start $V0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" online_brick_count
+
+# This mount should SUCCEED because we match the wildcard.
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+TEST ping_file $M0/before
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+# Set ssl-allow to include the identity we've created.
+TEST $CLI volume stop $V0
+TEST $CLI volume set $V0 auth.ssl-allow Anyone
+TEST $CLI volume start $V0
+
+# This mount should SUCCEED because this specific identity is allowed.
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+TEST ping_file $M0/before
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+# Change the authorized user name. Note that servers don't pick up changes
+# automagically like clients do, so we have to stop/start ourselves.
+TEST $CLI volume stop $V0
+TEST $CLI volume set $V0 auth.ssl-allow NotYou
+TEST $CLI volume start $V0
+
+# This mount should FAIL because the identity given by our certificate does not
+# match the allowed user. In other words, authentication works (they know who
+# we are) but authorization doesn't (we're not the right person).
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0
+
+# Looks like /*/bin/glusterfs isn't returning error status correctly (again).
+# We may get an unusable mount where ping will fail, or no mount at all,
+# where ping will write to the mount point instead of the mounted filesystem.
+# In order to avoid spurious failures, create a file by ping and check it
+# is absent from the brick.
+ping_file $M0/after
+TEST test -f $B0/1/before
+TEST ! test -f $B0/1/after
+
+cleanup;
diff --git a/tests/features/ssl-ciphers.t b/tests/features/ssl-ciphers.t
new file mode 100644
index 00000000000..b70fe360e02
--- /dev/null
+++ b/tests/features/ssl-ciphers.t
@@ -0,0 +1,244 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+brick_port() {
+ $CLI --xml volume status $1 | sed -n '/.*<port>\([0-9]*\).*/s//\1/p'
+}
+
+wait_mount() {
+ i=1
+ while [ $i -lt $CONFIG_UPDATE_TIMEOUT ] ; do
+ sleep 1
+ i=$(( $i + 1 ))
+ mounted=`mount|awk -v m=$1 '
+ BEGIN {r = "N";}
+ ($3 == m) {r = "Y"; exit;}
+ END {print r;}
+ '`
+ if [ "x${mounted}" = "xY" ] ; then
+ ls $M0 2>/dev/null || continue
+ break;
+ fi
+ done
+
+ if [ "x${mounted}" = "xY" ] ; then
+ ls $M0 2>/dev/null || mounted="N"
+ fi
+
+ echo $mounted
+}
+
+openssl_connect() {
+ ssl_opt="-verify 3 -verify_return_error -CAfile $SSL_CA"
+ ssl_opt="$ssl_opt -crl_check_all -CApath $TMPDIR"
+ cmd="echo "" | openssl s_client $ssl_opt $@ 2>/dev/null"
+ CIPHER=$(eval $cmd | awk -F "Cipher is" '{print $2}' | tr -d '[:space:]' | awk -F " " '{print $1}')
+ if [ "x${CIPHER}" = "x" -o "x${CIPHER}" = "x0000" -o "x${CIPHER}" = "x(NONE)" ] ; then
+ echo "N"
+ else
+ echo "Y"
+ fi
+}
+
+#Validate the cipher to pass EXPECT test case before call openssl_connect
+check_cipher() {
+ cmd="echo "" | openssl s_client $@ 2> /dev/null"
+ cipher=$(eval $cmd |awk -F "Cipher is" '{print $2}' | tr -d '[:space:]' | awk -F " " '{print $1}')
+ if [ "x${cipher}" = "x" -o "x${cipher}" = "x0000" -o "x${cipher}" = "x(NONE)" ] ; then
+ echo "N"
+ else
+ echo "Y"
+ fi
+}
+
+cleanup;
+mkdir -p $B0
+mkdir -p $M0
+
+TMPDIR=`mktemp -d /tmp/${0##*/}.XXXXXX`
+TEST test -d $TMPDIR
+
+SSL_KEY=$TMPDIR/self.key
+SSL_CSR=$TMPDIR/self.csr
+SSL_CERT=$TMPDIR/self.crt
+SSL_CA=$TMPDIR/ca.crt
+SSL_CFG=$TMPDIR/openssl.cnf
+SSL_CRL=$TMPDIR/crl.pem
+
+sed "s|@TMPDIR@|${TMPDIR}|" `pwd`/`dirname $0`/openssl.cnf.in > $SSL_CFG
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST openssl genrsa -out $SSL_KEY 2048 2>/dev/null
+TEST openssl req -config $SSL_CFG -new -key $SSL_KEY -x509 \
+ -subj /CN=CA -out $SSL_CA
+TEST openssl req -config $SSL_CFG -new -key $SSL_KEY \
+ -subj /CN=$H0 -out $SSL_CSR
+
+echo "01" > $TMPDIR/serial
+TEST touch $TMPDIR/index.txt $TMPDIR/index.txx.attr
+TEST mkdir -p $TMPDIR/certs $TMPDIR/newcerts $TMPDIR/crl
+TEST openssl ca -batch -config $SSL_CFG -in $SSL_CSR -out $SSL_CERT 2>&1
+
+touch $SSL_CRL
+CRLHASH=`openssl x509 -hash -fingerprint -noout -in $SSL_CA|sed -n '1s/$/.r0/p'`
+ln -sf $SSL_CRL $TMPDIR/$CRLHASH
+TEST openssl ca -config $SSL_CFG -gencrl -out $SSL_CRL 2>&1
+
+
+TEST $CLI volume create $V0 $H0:$B0/1
+TEST $CLI volume set $V0 server.ssl on
+TEST $CLI volume set $V0 client.ssl on
+TEST $CLI volume set $V0 ssl.private-key $SSL_KEY
+TEST $CLI volume set $V0 ssl.own-cert $SSL_CERT
+TEST $CLI volume set $V0 ssl.ca-list $SSL_CA
+TEST $CLI volume start $V0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
+
+BRICK_PORT=`brick_port $V0`
+
+# Test we can connect
+EXPECT "Y" openssl_connect -connect $H0:$BRICK_PORT
+
+# Test SSLv2 protocol fails
+EXPECT "N" openssl_connect -ssl2 -connect $H0:$BRICK_PORT
+
+# Test SSLv3 protocol fails
+EXPECT "N" openssl_connect -ssl3 -connect $H0:$BRICK_PORT
+
+# Test TLSv1 protocol based on openssl version
+cmd="openssl version"
+ver=$(eval $cmd | awk -F " " '{print $2}' | grep "^1.1")
+if [ "x${ver}" = "x" ]; then
+ supp="N"
+else
+ supp="Y"
+fi
+EXPECT "${supp}" openssl_connect -tls1 -connect $H0:$BRICK_PORT
+
+# Test a HIGH CBC cipher
+cph=`check_cipher -cipher AES256-SHA -connect $H0:$BRICK_PORT`
+EXPECT "$cph" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT
+
+# Test EECDH
+cph=`check_cipher -cipher EECDH -connect $H0:$BRICK_PORT`
+EXPECT "$cph" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT
+
+# test MD5 fails
+cph=`check_cipher -cipher DES-CBC3-MD5 -connect $H0:$BRICK_PORT`
+EXPECT "$cph" openssl_connect -cipher DES-CBC3-MD5 -connect $H0:$BRICK_PORT
+
+# test RC4 fails
+cph=`check_cipher -cipher RC4-SHA -connect $H0:$BRICK_PORT`
+EXPECT "$cph" openssl_connect -cipher RC4-SHA -connect $H0:$BRICK_PORT
+
+# test eNULL fails
+cph=`check_cipher -cipher NULL-SHA256 -connect $H0:$BRICK_PORT`
+EXPECT "$cph" openssl_connect -cipher NULL-SHA256 -connect $H0:$BRICK_PORT
+
+# test SHA2
+cph=`check_cipher -cipher AES256-SHA256 -connect $H0:$BRICK_PORT`
+EXPECT "$cph" openssl_connect -cipher AES256-SHA256 -connect $H0:$BRICK_PORT
+
+# test GCM
+cph=`check_cipher -cipher AES256-GCM-SHA384 -connect $H0:$BRICK_PORT`
+EXPECT "$cph" openssl_connect -cipher AES256-GCM-SHA384 -connect $H0:$BRICK_PORT
+
+# Test DH fails without DH params
+cph=`check_cipher -cipher EDH -connect $H0:$BRICK_PORT`
+EXPECT "$cph" openssl_connect -cipher EDH -connect $H0:$BRICK_PORT
+
+# Test DH with DH params
+TEST $CLI volume set $V0 ssl.dh-param `pwd`/`dirname $0`/dh1024.pem
+EXPECT "`pwd`/`dirname $0`/dh1024.pem" volume_option $V0 ssl.dh-param
+TEST $CLI volume stop $V0
+TEST $CLI volume start $V0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
+BRICK_PORT=`brick_port $V0`
+EXPECT "Y" openssl_connect -cipher EDH -connect $H0:$BRICK_PORT
+
+# Test the cipher-list option
+TEST $CLI volume set $V0 ssl.cipher-list AES256-SHA
+EXPECT AES256-SHA volume_option $V0 ssl.cipher-list
+TEST $CLI volume stop $V0
+TEST $CLI volume start $V0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
+BRICK_PORT=`brick_port $V0`
+cph=`check_cipher -cipher AES256-SHA -connect $H0:$BRICK_PORT`
+EXPECT "$cph" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT
+cph=`check_cipher -cipher AES128-SHA -connect $H0:$BRICK_PORT`
+EXPECT "$cph" openssl_connect -cipher AES128-SHA -connect $H0:$BRICK_PORT
+
+# Test the ec-curve option
+TEST $CLI volume set $V0 ssl.cipher-list EECDH:EDH:!TLSv1
+EXPECT EECDH:EDH:!TLSv1 volume_option $V0 ssl.cipher-list
+TEST $CLI volume stop $V0
+TEST $CLI volume start $V0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
+BRICK_PORT=`brick_port $V0`
+cph=`check_cipher -cipher AES256-SHA -connect $H0:$BRICK_PORT`
+EXPECT "$cph" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT
+cph=`check_cipher -cipher EECDH -connect $H0:$BRICK_PORT`
+EXPECT "$cph" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT
+
+TEST $CLI volume set $V0 ssl.ec-curve invalid
+EXPECT invalid volume_option $V0 ssl.ec-curve
+TEST $CLI volume stop $V0
+TEST $CLI volume start $V0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
+BRICK_PORT=`brick_port $V0`
+cph=`check_cipher -cipher EECDH -connect $H0:$BRICK_PORT`
+EXPECT "$cph" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT
+
+TEST $CLI volume set $V0 ssl.ec-curve secp521r1
+EXPECT secp521r1 volume_option $V0 ssl.ec-curve
+TEST $CLI volume stop $V0
+TEST $CLI volume start $V0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
+BRICK_PORT=`brick_port $V0`
+EXPECT "Y" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT
+
+# test revocation
+TEST $CLI volume set $V0 ssl.crl-path $TMPDIR
+EXPECT $TMPDIR volume_option $V0 ssl.crl-path
+$GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+EXPECT "Y" wait_mount $M0
+TEST_FILE=`mktemp $M0/${0##*/}.XXXXXX`
+TEST test -f $TEST_FILE
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST openssl ca -batch -config $SSL_CFG -revoke $SSL_CERT 2>&1
+TEST openssl ca -config $SSL_CFG -gencrl -out $SSL_CRL 2>&1
+
+# Failed once revoked
+# Although client fails to mount without restarting the server after crl-path
+# is set when no actual crl file is found on the client, it would also fail
+# when server is restarted for the same reason. Since the socket initialization
+# code is the same for client and server, the crl verification flags need to
+# be turned off for the client to avoid SSL searching for CRLs in the
+# ssl.crl-path. If no CRL files are found in the ssl.crl-path, SSL fails the
+# connect() attempt on the client.
+TEST $CLI volume stop $V0
+TEST $CLI volume start $V0
+$GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+EXPECT "N" wait_mount $M0
+TEST ! test -f $TEST_FILE
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+# Succeed with CRL disabled
+TEST $CLI volume stop $V0
+TEST $CLI volume set $V0 ssl.crl-path NULL
+EXPECT NULL volume_option $V0 ssl.crl-path
+TEST $CLI volume start $V0
+$GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+EXPECT "Y" wait_mount $M0
+TEST test -f $TEST_FILE
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+rm -rf $TMPDIR
+cleanup;
diff --git a/tests/features/subdir-mount.t b/tests/features/subdir-mount.t
new file mode 100644
index 00000000000..a02bd6befc4
--- /dev/null
+++ b/tests/features/subdir-mount.t
@@ -0,0 +1,121 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../nfs.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4};
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+
+## Mount FUSE with caching disabled (read-write)
+TEST $GFS -s $H0 --volfile-id $V0 --volume-name ${V0}-dht $M0;
+
+TEST ! stat $M0/subdir1;
+TEST mkdir $M0/subdir1;
+TEST ! stat $M0/subdir2;
+TEST mkdir $M0/subdir2;
+TEST ! stat $M0/subdir1/subdir1.1;
+TEST mkdir $M0/subdir1/subdir1.1;
+TEST ! stat $M0/subdir1/subdir1.1/subdir1.2;
+TEST mkdir $M0/subdir1/subdir1.1/subdir1.2;
+
+# mount volume/subdir1
+TEST $GFS --subdir-mount /subdir1 -s $H0 --volfile-id $V0 --volume-name ${V0}-dht $M1;
+
+TEST touch $M0/topfile;
+TEST ! stat $M1/topfile;
+
+TEST touch $M1/subdir1_file;
+TEST ! stat $M0/subdir1_file;
+TEST stat $M0/subdir1/subdir1_file;
+
+# mount volume/subdir2
+TEST $GFS --subdir-mount /subdir2 -s $H0 --volfile-id $V0 $M2;
+
+TEST ! stat $M2/topfile;
+
+TEST touch $M2/subdir2_file;
+TEST ! stat $M0/subdir2_file;
+TEST ! stat $M1/subdir2_file;
+TEST stat $M0/subdir2/subdir2_file;
+
+# umount $M1 / $M2
+TEST umount $M1
+TEST umount $M2
+
+# mount non-existing subdir ; this works with mount.glusterfs,
+# but with glusterfs, the script doesn't returns error.
+#TEST ! $GFS --subdir-mount subdir_not_there -s $H0 --volfile-id $V0 $M1;
+
+# mount subdir with depth
+TEST $GFS --subdir-mount /subdir1/subdir1.1/subdir1.2 -s $H0 --volfile-id $V0 $M2;
+TEST ! stat $M2/topfile;
+TEST touch $M2/subdir1.2_file;
+TEST ! stat $M0/subdir1.2_file;
+TEST stat $M0/subdir1/subdir1.1/subdir1.2/subdir1.2_file;
+
+TEST umount $M2
+
+# Lets validate the options # Not having '*' in here as there was some
+# problem with option validation with this
+TEST $CLI volume set $V0 auth.allow 192.168.1.1
+
+TEST $CLI volume set $V0 auth.allow "192.168.1.1,10.10.\*.\*,::1"
+
+TEST $CLI volume set $V0 auth.allow "/subdir1\(1.2.3.4\),/\(192.168.10.2\|192.168.11.1\),/subdir2\(1.2.3.4\)"
+
+# directories should be absolute
+TEST ! $CLI volume set $V0 auth.allow "subdir2\(1.2.3.4\)"
+
+# support subdir inside subdir
+TEST $CLI volume set $V0 auth.allow '/subdir1/subdir1.1/subdir1.2/\(1.2.3.4\|::1\),/\(192.168.10.1\|192.168.11.1\),/subdir2\(1.2.3.4\)'
+
+TEST $CLI volume stop $V0
+
+TEST $CLI volume start $V0
+
+TEST $GFS --subdir-mount /subdir1/subdir1.1/subdir1.2 -s $H0 --volfile-id $V0 $M2
+TEST stat $M2
+
+initcnt=`grep -i create-subdir-mounts /var/log/glusterfs/glusterd.log | wc -l`
+# mount shouldn't fail even after add-brick
+TEST $CLI volume add-brick $V0 replica 2 $H0:$B0/${V0}{5,6};
+
+# Wait to execute create-subdir-mounts.sh script by glusterd
+newcnt=`grep -i create-subdir-mounts /var/log/glusterfs/glusterd.log | wc -l`
+while [ $newcnt -eq $initcnt ]
+do
+ newcnt=`grep -i create-subdir-mounts /var/log/glusterfs/glusterd.log | wc -l`
+ sleep 1
+done
+
+# Existing mount should still be active
+mount_inode=$(stat --format "%i" "$M2")
+TEST test "$mount_inode" == "1"
+
+TEST umount $M2
+
+# Now the exported subdirs should be automatically healed due to
+# hook scripts. Check if the mount is successful.
+TEST $GFS --subdir-mount /subdir2 -s $H0 --volfile-id $V0 $M2
+mount_inode=$(stat --format "%i" "$M2")
+TEST test "$mount_inode" == "1"
+
+TEST umount $M0
+TEST umount $M2
+
+
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+## This should clean the mountpoints
+cleanup;
diff --git a/tests/features/trash.t b/tests/features/trash.t
new file mode 100755
index 00000000000..da5b50bc85a
--- /dev/null
+++ b/tests/features/trash.t
@@ -0,0 +1,247 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../dht.rc
+
+cleanup
+
+test_mount() {
+ volume=$1
+ mount=$2
+ test_dir=$3
+ RETVAL=0
+ glusterfs -s $H0 --volfile-id $volume $mount --attribute-timeout=0
+
+ if [ "x$test_dir" = "x" ] ; then return $RETVAL; fi
+
+ timeout=0
+ while [ $timeout -lt $PROCESS_UP_TIMEOUT ] ; do
+ timeout=$(( $timeout + 1 ))
+ test -d $test_dir
+ RETVAL=$?
+ if [ $RETVAL -eq 0 ] ; then break ; fi
+ sleep 1
+ done
+
+ return $RETVAL
+}
+
+start_vol() {
+ volume=$1
+ mount=$2
+ test_dir=$3
+ $CLI volume start $volume
+ test_mount $volume $mount $test_dir
+ RETVAL=$?
+ return $RETVAL
+}
+
+create_files() {
+ echo 'Hi' > $1
+ echo 'Hai' > $2
+}
+
+file_exists () {
+ vol=$1
+ shift
+ for file in `ls $B0/${vol}1/$@ 2> /dev/null` ; do
+ test -e ${file} && return 0
+ done
+ for file in `ls $B0/${vol}2/$@ 2> /dev/null` ; do
+ test -e ${file} && return 0
+ done
+
+ return 1
+}
+
+unlink_op() {
+
+ rm -f $M0/$1
+ ls $M0/.trashcan/1/2/3 &> /dev/null
+ sleep 2
+
+ test ! -e $M0/$1
+ wildcard_exists $M0/.trashcan/$1*
+
+ # remove from trashcan
+ rm -f $M0/.trashcan/$1*
+ wildcard_not_exists $M0/.trashcan/$1*
+}
+
+truncate_op() {
+ truncate -s 2 $M0/$1
+ ls $M0/.trashcan/1/2/3 &> /dev/null
+ sleep 2
+
+ test -e $M0/$1
+ test $(ls -l $M0/$1 | awk '{print $5}') -eq 2 &> /dev/null
+ wildcard_exists $M0/.trashcan/$1*
+ test $(ls -l $M0/.trashcan/$1*|awk '{print $5}') -eq $2 &> /dev/null
+
+ # truncate from trashcan
+ truncate -s 1 $M0/.trashcan/$1*
+ test $(ls $M0/.trashcan/$1* | wc -l) -eq 1
+}
+
+wildcard_exists() {
+ test -e $1
+ if [ $? -eq 0 ]; then echo "Y"; else echo "N"; fi
+}
+
+wildcard_not_exists() {
+ test ! -e $1
+ if [ $? -eq 0 ]; then echo "Y"; else echo "N"; fi
+}
+
+# testing glusterd
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+# creating distributed volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2}
+
+# checking volume status
+EXPECT "$V0" volinfo_field $V0 'Volume Name'
+EXPECT 'Created' volinfo_field $V0 'Status'
+EXPECT '2' brick_count $V0
+
+# test without enabling trash translator
+TEST start_vol $V0 $M0
+
+# test on enabling trash translator
+TEST $CLI volume set $V0 features.trash on
+EXPECT 'on' volinfo_field $V0 'features.trash'
+
+# files directly under mount point
+create_files $M0/file1 $M0/file2
+TEST file_exists $V0 file1 file2
+
+# perform unlink
+TEST unlink_op file1
+
+# perform truncate
+TEST truncate_op file2 4
+
+# create files directory hierarchy and check
+mkdir -p $M0/1/2/3
+create_files $M0/1/2/3/foo1 $M0/1/2/3/foo2
+TEST file_exists $V0 1/2/3/foo1 1/2/3/foo2
+
+# perform unlink
+TEST unlink_op 1/2/3/foo1
+
+# perform truncate
+TEST truncate_op 1/2/3/foo2 4
+
+# create a directory for eliminate pattern
+mkdir $M0/a
+
+# set the eliminate pattern
+TEST $CLI volume set $V0 features.trash-eliminate-path /a
+EXPECT '/a' volinfo_field $V0 'features.trash-eliminate-path'
+
+# create two files and check
+create_files $M0/a/test1 $M0/a/test2
+TEST file_exists $V0 a/test1 a/test2
+
+# remove from eliminate pattern
+rm -f $M0/a/test1
+EXPECT "Y" wildcard_not_exists $M0/.trashcan/a/test1*
+
+# truncate from eliminate path
+truncate -s 2 $M0/a/test2
+TEST [ -e $M0/a/test2 ]
+TEST [ `ls -l $M0/a/test2 | awk '{print $5}'` -eq 2 ]
+EXPECT "Y" wildcard_not_exists $M0/.trashcan/a/test2*
+
+# set internal op on
+TEST $CLI volume set $V0 features.trash-internal-op on
+EXPECT 'on' volinfo_field $V0 'features.trash-internal-op'
+
+# again create two files and check
+create_files $M0/inop1 $M0/inop2
+TEST file_exists $V0 inop1 inop2
+
+# perform unlink
+TEST unlink_op inop1
+
+# perform truncate
+TEST truncate_op inop2 4
+
+# remove one brick and restart the volume
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}2 force
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST start_vol $V0 $M0 $M0/.trashcan
+
+# again create two files and check
+create_files $M0/rebal1 $M0/rebal2
+TEST file_exists $V0 rebal1 rebal2
+
+# add one brick
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}3
+TEST [ -d $B0/${V0}3 ]
+
+
+# perform rebalance
+TEST $CLI volume rebalance $V0 start force
+EXPECT_WITHIN $REBALANCE_TIMEOUT "0" rebalance_completed
+
+#Find out which file was migrated to the new brick
+file_name=$(ls $B0/${V0}3/rebal*| xargs basename)
+
+# check whether rebalance was succesful
+EXPECT "Y" wildcard_exists $B0/${V0}3/$file_name*
+EXPECT "Y" wildcard_exists $B0/${V0}1/.trashcan/internal_op/$file_name*
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+# force required in case rebalance is not over
+TEST $CLI volume stop $V0 force
+
+# create a replicated volume
+TEST $CLI volume create $V1 replica 2 $H0:$B0/${V1}{1,2}
+
+# checking volume status
+EXPECT "$V1" volinfo_field $V1 'Volume Name'
+EXPECT 'Replicate' volinfo_field $V1 'Type'
+EXPECT 'Created' volinfo_field $V1 'Status'
+EXPECT '2' brick_count $V1
+
+# enable trash with options and start the replicate volume by disabling automatic self-heal
+TEST $CLI volume set $V1 features.trash on
+TEST $CLI volume set $V1 features.trash-internal-op on
+EXPECT 'on' volinfo_field $V1 'features.trash'
+EXPECT 'on' volinfo_field $V1 'features.trash-internal-op'
+TEST start_vol $V1 $M1 $M1/.trashcan
+
+# mount and check for trash directory
+TEST [ -d $M1/.trashcan/internal_op ]
+
+# create a file and check
+touch $M1/self
+TEST [ -e $B0/${V1}1/self -a -e $B0/${V1}2/self ]
+
+# kill one brick and delete the file from mount point
+kill_brick $V1 $H0 $B0/${V1}1
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "1" online_brick_count
+rm -f $M1/self
+EXPECT "Y" wildcard_exists $B0/${V1}2/.trashcan/self*
+
+# check renaming of trash directory through cli
+TEST $CLI volume set $V0 trash-dir abc
+TEST start_vol $V0 $M0 $M0/abc
+TEST [ -e $M0/abc -a ! -e $M0/.trashcan ]
+EXPECT "Y" wildcard_exists $B0/${V0}1/abc/internal_op/rebal*
+
+# ensure that rename and delete operation on trash directory fails
+rm -rf $M0/abc/internal_op
+TEST [ -e $M0/abc/internal_op ]
+rm -rf $M0/abc/
+TEST [ -e $M0/abc ]
+mv $M0/abc $M0/trash
+TEST [ -e $M0/abc ]
+
+cleanup
diff --git a/tests/features/unhashed-auto.t b/tests/features/unhashed-auto.t
new file mode 100755
index 00000000000..0a6bbfbb07d
--- /dev/null
+++ b/tests/features/unhashed-auto.t
@@ -0,0 +1,125 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../dht.rc
+
+NFILES=100
+
+touch_files () {
+ for i in $(seq 1 $NFILES); do
+ touch $(printf $M0/dir/file%02d $i)
+ done
+}
+
+count_files () {
+ found=0
+ for i in $(seq 1 $NFILES); do
+ if [ -f $(printf $M0/dir/file%02d $i) ]; then
+ found=$((found+1))
+ fi
+ done
+ echo "found $found files" > /dev/tty
+ echo $found
+}
+
+wait_for_rebalance () {
+ while true; do
+ tmp=$(rebalance_completed)
+ if [ $tmp -eq 1 ]; then
+ sleep 1
+ else
+ break
+ fi
+ done
+}
+
+get_xattr () {
+ cmd="getfattr --absolute-names --only-values -n trusted.glusterfs.dht"
+ $cmd $1 | od -tx1 -An | tr -d ' '
+}
+
+get_xattr_hash () {
+ cmd="getfattr --absolute-names --only-values -n trusted.glusterfs.dht"
+ $cmd $1 | od -tx1 -An | awk '{printf("%s%s%s%s\n", $1, $2, $3, $4);}'
+}
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2}
+EXPECT "$V0" volinfo_field $V0 'Volume Name'
+EXPECT 'Created' volinfo_field $V0 'Status'
+
+TEST $CLI volume set $V0 cluster.lookup-optimize ON
+
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+# Create some files for later tests.
+TEST $GFS -s $H0 --volfile-id $V0 $M0
+TEST mkdir $M0/dir
+TEST touch_files
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+# Add a brick and do the fix-layout part of rebalance to update directory layouts
+# (including their directory commit hashes).
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}3
+EXPECT '3' brick_count $V0
+TEST $CLI volume rebalance $V0 fix-layout start
+TEST wait_for_rebalance
+
+# Now for the sneaky part. *Undo* the part of rebalance that updated the volume
+# commit hash, forcing a false match between that and the directory commit hashes.
+TEST setfattr -x trusted.glusterfs.dht.commithash $B0/${V0}1
+TEST setfattr -x trusted.glusterfs.dht.commithash $B0/${V0}2
+TEST setfattr -x trusted.glusterfs.dht.commithash $B0/${V0}3
+
+# Mount and check that we do *not* see all of the files. This indicates that we
+# correctly skipped the broadcast lookup that would have found them.
+TEST $GFS -s $H0 --volfile-id $V0 $M0
+TEST [ $(count_files) -ne 100 ]
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+# Do the fix-layout again to generate a new volume commit hash.
+TEST $CLI volume rebalance $V0 fix-layout start
+TEST wait_for_rebalance
+
+# Mount and check that we *do* see all of the files. This indicates that we saw
+# the mismatch and did the broadcast lookup this time.
+TEST $GFS -s $H0 --volfile-id $V0 $M0
+TEST [ $(count_files) -eq 100 ]
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+# Do a *full* rebalance and verify that the directory commit hash changed.
+old_val=$(get_xattr $B0/${V0}1/dir)
+TEST $CLI volume rebalance $V0 start
+TEST wait_for_rebalance
+new_val=$(get_xattr $B0/${V0}1/dir)
+TEST [ ! x"$old_val" = x"$new_val" ]
+
+# Force an anomoly on an existing layout and heal it
+## The healed layout should not carry a commit-hash (or should carry 1 in the
+## commit-hash)
+TEST setfattr -x trusted.glusterfs.dht $B0/${V0}1/dir
+TEST $GFS -s $H0 --volfile-id $V0 $M0
+TEST [ -d $M0/dir ]
+new_hash=$(get_xattr_hash $B0/${V0}1/dir)
+TEST [ x"$new_hash" = x"00000001" ]
+new_hash=$(get_xattr_hash $B0/${V0}2/dir)
+TEST [ x"$new_hash" = x"00000001" ]
+
+# Unset the option and check that newly created directories get 1 in the
+# disk layout
+TEST $CLI volume set $V0 cluster.lookup-optimize off
+TEST mkdir $M0/dir1
+new_hash=$(get_xattr_hash $B0/${V0}1/dir1)
+TEST [ x"$new_hash" = x"00000001" ]
+new_hash=$(get_xattr_hash $B0/${V0}2/dir1)
+TEST [ x"$new_hash" = x"00000001" ]
+
+
+cleanup
diff --git a/tests/features/weighted-rebalance.t b/tests/features/weighted-rebalance.t
new file mode 100755
index 00000000000..0d730406fd0
--- /dev/null
+++ b/tests/features/weighted-rebalance.t
@@ -0,0 +1,78 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../dht.rc
+
+NFILES=1000
+
+touch_files () {
+ for i in $(seq 1 $NFILES); do
+ touch $(printf $M0/dir/file%02d $i) 2> /dev/null
+ done
+}
+
+count_files () {
+ found=0
+ for i in $(seq 1 $NFILES); do
+ if [ -f $(printf $1/dir/file%02d $i) ]; then
+ found=$((found+1))
+ fi
+ done
+ echo $found
+}
+
+get_xattr () {
+ cmd="getfattr --absolute-names --only-values -n trusted.glusterfs.dht"
+ $cmd $1 | od -tx1 -An | tr -d ' '
+}
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+TEST mkdir ${B0}/${V0}{1,2}
+
+TEST truncate -s $((40*1024*1024)) ${B0}/disk1
+TEST MKFS_LOOP -i 512 ${B0}/disk1
+TEST MOUNT_LOOP ${B0}/disk1 ${B0}/${V0}1
+
+TEST truncate -s $((80*1024*1024)) ${B0}/disk2
+TEST MKFS_LOOP -i 512 ${B0}/disk2
+TEST MOUNT_LOOP ${B0}/disk2 ${B0}/${V0}2
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2}
+EXPECT "$V0" volinfo_field $V0 'Volume Name'
+EXPECT 'Created' volinfo_field $V0 'Status'
+
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+# Create some files for later tests.
+TEST $GFS -s $H0 --volfile-id $V0 $M0
+TEST mkdir $M0/dir
+TEST touch_files
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+# Check that the larger brick got more of the files.
+nfiles=$(count_files ${B0}/${V0}2)
+#echo $nfiles $(get_xattr ${B0}/${V0}1) $(get_xattr ${B0}/${V0}2) 3>&2 2>&1 1>&3 3>&-
+TEST [ $nfiles -ge 580 ]
+
+# Turn off the size-weighted rebalance.
+TEST $CLI volume set $V0 cluster.weighted-rebalance off
+
+# Rebalance again and check that the distribution is even again.
+TEST $CLI volume rebalance $V0 start force
+EXPECT_WITHIN $REBALANCE_TIMEOUT "0" rebalance_completed
+nfiles=$(count_files ${B0}/${V0}2)
+#echo $nfiles $(get_xattr ${B0}/${V0}1) $(get_xattr ${B0}/${V0}2) 3>&2 2>&1 1>&3 3>&-
+TEST [ $nfiles -le 580 ]
+
+$CLI volume stop $V0
+UMOUNT_LOOP ${B0}/${V0}{1,2}
+rm -f ${B0}/disk{1,2}
+
+cleanup
diff --git a/tests/features/worm.t b/tests/features/worm.t
new file mode 100755
index 00000000000..40b08cdee02
--- /dev/null
+++ b/tests/features/worm.t
@@ -0,0 +1,117 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}1
+
+EXPECT "$V0" volinfo_field $V0 'Volume Name'
+EXPECT 'Created' volinfo_field $V0 'Status'
+
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+## Mount FUSE with caching disabled (read-write)
+TEST $GFS -s $H0 --volfile-id $V0 $M0
+
+## Tests for the volume level WORM
+TEST `echo "File 1" > $M0/file1`
+TEST touch $M0/file2
+
+## Enable the volume level WORM
+TEST $CLI volume set $V0 features.worm 1
+TEST ! mv $M0/file1 $M0/file11
+TEST `echo "block" > $M0/file2`
+
+## Disable the volume level WORM and delete the legacy files
+TEST $CLI volume set $V0 features.worm 0
+TEST rm -f $M0/*
+
+## Enable file level WORM
+TEST $CLI volume set $V0 features.worm-file-level 1
+TEST $CLI volume set $V0 features.default-retention-period 10
+TEST $CLI volume set $V0 features.auto-commit-period 5
+
+## Tests for manual transition to WORM/Retained state
+TEST `echo "worm 1" > $M0/file1`
+TEST chmod 0444 $M0/file1
+sleep 5
+TEST `echo "line 1" > $M0/file1`
+TEST ! mv $M0/file1 $M0/file2
+sleep 10
+TEST ! link $M0/file1 $M0/file2
+sleep 5
+TEST rm -f $M0/file1
+
+## Test for mv over WORM/Reatined state
+TEST `echo "worm 1" > $M0/file1`
+TEST chmod 0444 $M0/file1
+TEST `echo "worm 2" > $M0/file2`
+TEST ! mv $M0/file2 $M0/file1
+TEST rm -f $M0/file2
+sleep 10
+TEST rm -f $M0/file1
+
+## Test for state transition over write.
+TEST `echo "worm 1" > $M0/file3`
+sleep 5
+TEST `echo "worm 2" >> $M0/file3`
+EXPECT 'worm 1' cat $M0/file3
+TEST ! rm -f $M0/file3
+
+## Test for checking if Worm files are undeletable after setting worm-files-deletable as 0.
+TEST $CLI volume set $V0 features.worm-files-deletable 0
+TEST `echo "worm 1" > $M0/file4`
+TEST chmod 0444 $M0/file4
+sleep 10
+TEST `echo "worm 1" >> $M0/file4`
+TEST ! rm -f $M0/file4
+
+## Test for state transition if auto-commit-period is 0
+TEST $CLI volume set $V0 features.auto-commit-period 0
+TEST `echo "worm 1" > $M0/file5`
+EXPECT '3/10/0' echo $(getfattr -e text --absolute-names --only-value -n "trusted.reten_state" $B0/${V0}1/file5)
+EXPECT 'worm 1' cat $M0/file5
+TEST ! rm -f $M0/file5
+TEST $CLI volume set $V0 features.auto-commit-period 5
+
+## Test for checking if retention-period is updated on increasing the access time of a WORM-RETAINED file.
+TEST $CLI volume set $V0 features.worm-files-deletable 1
+TEST `echo "worm 1" >> $M0/file1`
+initial_timestamp=$(date +%s)
+current_time_seconds=$(date +%S | sed 's/^0*//' );
+TEST chmod 0444 $M0/file1
+EXPECT '3/10/5' echo $(getfattr -e text --absolute-names --only-value -n "trusted.reten_state" $B0/${V0}1/file1)
+changed_timestamp=$(date +%Y%m%d%H%M --date '60 seconds');
+seconds_diff=`expr 60 - $((current_time_seconds))`
+TEST `touch -a -t "${changed_timestamp}" $M0/file1`
+EXPECT "3/$seconds_diff/5" echo $(getfattr -e text --absolute-names --only-value -n "trusted.reten_state" $B0/${V0}1/file1)
+sleep $seconds_diff
+TEST `echo "worm 2" >> $M0/file1`
+EXPECT "$initial_timestamp" echo $(stat --printf %X $M0/file1)
+
+
+## Test for checking if retention-period is updated on decreasing the access time of a WORM-RETAINED file
+TEST $CLI volume set $V0 features.default-retention-period 120
+initial_timestamp=$(date +%s)
+current_time_seconds=$(date +%S | sed 's/^0*//' );
+TEST chmod 0444 $M0/file1
+EXPECT '3/120/5' echo $(getfattr -e text --absolute-names --only-value -n "trusted.reten_state" $B0/${V0}1/file1)
+changed_timestamp=$(date +%Y%m%d%H%M --date '60 seconds');
+seconds_diff=`expr 60 - $((current_time_seconds))`
+TEST `touch -a -t "${changed_timestamp}" $M0/file1`
+EXPECT "3/$seconds_diff/5" echo $(getfattr -e text --absolute-names --only-value -n "trusted.reten_state" $B0/${V0}1/file1)
+sleep $seconds_diff
+TEST `echo "worm 4" >> $M0/file1`
+EXPECT "$initial_timestamp" echo $(stat --printf %X $M0/file1)
+TEST rm -f $M0/file1
+
+TEST $CLI volume stop $V0
+EXPECT 'Stopped' volinfo_field $V0 'Status'
+
+cleanup;
diff --git a/tests/features/worm_sh.t b/tests/features/worm_sh.t
new file mode 100644
index 00000000000..da7afc084b2
--- /dev/null
+++ b/tests/features/worm_sh.t
@@ -0,0 +1,75 @@
+#!/bin/bash
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+## Enable file level WORM
+TEST $CLI volume set $V0 features.worm-file-level 1
+TEST $CLI volume set $V0 features.default-retention-period 100
+TEST $CLI volume set $V0 features.auto-commit-period 5
+
+## Tests for manual transition to WORM/Retained state
+TEST `echo "worm1" > $M0/file1`
+TEST chmod 0444 $M0/file1
+sleep 5
+TEST `echo "worm2" > $M0/file2`
+TEST chmod 0444 $M0/file2
+sleep 5
+TEST `echo "worm3" > $M0/file3`
+TEST chmod 0444 $M0/file3
+sleep 5
+
+## Stopp one of the bricks
+TEST kill_brick $V0 $H0 $B0/${V0}1
+
+## Manipulate the WORMed-Files
+TEST $CLI volume set $V0 features.worm-file-level 0
+sleep 5
+
+TEST chmod 0777 $M0/file1
+TEST `echo "test" >> $M0/file1`
+TEST `echo "test" >> $M0/file3`
+TEST `rm -rf $M0/file2`
+
+## Metadata changes
+TEST setfattr -n user.test -v qwerty $M0/file3
+sleep 5
+
+## Enable file level WORM again
+TEST $CLI volume set $V0 features.worm-file-level 1
+
+## Restart volume and trigger self-heal
+TEST $CLI volume stop $V0 force
+TEST $CLI volume start $V0 force
+TEST $CLI volume set $V0 self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+
+# Wait for heal to complete
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+
+# Check if entry-heal has happened
+TEST diff <(ls $B0/${V0}0 | sort) <(ls $B0/${V0}1 | sort)
+
+# Test if data was healed
+TEST diff $B0/${V0}0/file1 $B0/${V0}1/file1
+TEST diff $B0/${V0}0/file3 $B0/${V0}1/file3
+
+# Test if metadata was healed and exists on both the bricks
+EXPECT "qwerty" get_text_xattr user.test $B0/${V0}1/file3
+EXPECT "qwerty" get_text_xattr user.test $B0/${V0}0/file3
+
+cleanup;
diff --git a/tests/fileio.rc b/tests/fileio.rc
new file mode 100644
index 00000000000..ec0767832e8
--- /dev/null
+++ b/tests/fileio.rc
@@ -0,0 +1,61 @@
+#!/bin/bash
+
+function fd_available() {
+ for i in {1..65536}; do
+ if [ ! -e /proc/$$/fd/$i ]; then
+ echo $i;
+ return 0;
+ fi
+ done
+
+ return 1;
+}
+
+function fd_open() {
+ local fd=$1;
+ local mode=$2
+ local path=$3;
+
+ case $mode in
+ r)
+ eval "exec $fd<$path";;
+ w)
+ eval "exec $fd>$path";;
+ rw)
+ eval "exec $fd<>$path";;
+ *)
+ false;;
+ esac
+}
+
+
+function fd_cat() {
+ local fd=$1;
+
+ eval "cat <&$fd";
+}
+
+
+function fd_write() {
+ local fd=$1;
+ shift;
+ local msg="$@";
+
+ eval "echo $@ >&$fd";
+}
+
+
+function fd_close() {
+ local fd=$1;
+
+ eval "exec $fd>&-";
+}
+
+
+function fd_based_example() {
+ TEST fd=`fd_available`;
+ TEST fd_open $fd "rw" $M0/filename;
+ TEST fd_cat $fd; # print existing stuff
+ TEST fd_write $fd "new stuff"; # append
+ TEST fd_close $fd;
+}
diff --git a/tests/geo-rep.rc b/tests/geo-rep.rc
new file mode 100644
index 00000000000..9ba4262730e
--- /dev/null
+++ b/tests/geo-rep.rc
@@ -0,0 +1,495 @@
+GEO_REP_TIMEOUT=120
+CHECK_MOUNT_TIMEOUT=50
+#check for mount point
+function check_mounted () {
+ df | grep $1 | wc -l
+}
+
+function check_status_num_rows()
+{
+ local search_key=$1
+ $GEOREP_CLI $master $slave status | grep -F "$search_key" | wc -l
+}
+
+function check_fanout_status_num_rows()
+{
+ local search_key=$1
+ $GEOREP_CLI $master status | grep -F "$search_key" | wc -l
+}
+
+function check_fanout_status_detail_num_rows()
+{
+ local search_key=$1
+ $GEOREP_CLI $master status detail | grep -F "$search_key" | wc -l
+}
+
+function check_all_status_num_rows()
+{
+ local search_key=$1
+ $GEOREP_CLI status | grep -F "$search_key" | wc -l
+}
+
+function check_all_status_detail_num_rows()
+{
+ local search_key=$1
+ $GEOREP_CLI status detail | grep -F "$search_key" | wc -l
+}
+
+function verify_checkpoint_met()
+{
+ local master=$1
+ local slave=$2
+ $GEOREP_CLI $master $slave status detail| grep -F "Yes" | wc -l
+}
+
+function check_keys_distributed()
+{
+ local search_key=$(cat /var/lib/glusterd/geo-replication/master_slave_common_secret.pem.pub)
+ grep -F "$search_key" ~/.ssh/authorized_keys > /dev/null
+ echo $?
+}
+
+function check_common_secret_file()
+{
+ stat /var/lib/glusterd/geo-replication/master_slave_common_secret.pem.pub
+ echo $?
+}
+
+function create_rename_symlink_case()
+{
+ mkdir ${mastermnt}/MUL_REN_SYMLINK
+ cd ${mastermnt}/MUL_REN_SYMLINK
+ mkdir sym_dir1
+ ln -s "sym_dir1" sym1
+ mv sym1 sym2
+ mv sym2 sym3
+ mv sym3 sym4
+ cd -
+}
+
+function create_data()
+{
+ prefix=$1
+
+ # GF_FOP_MKNOD
+ # GF_FOP_MKDIR
+ # GF_FOP_UNLINK
+ # GF_FOP_RMDIR
+ # GF_FOP_SYMLINK
+ # GF_FOP_RENAME
+ # GF_FOP_LINK
+ # GF_FOP_SETXATTR
+ # GF_FOP_REMOVEXATTR
+ # GF_FOP_CREATE
+ # GF_FOP_SETATTR
+
+ # Regular file + data
+ echo "HelloWorld!" > ${master_mnt}/${prefix}_f1
+ touch ${master_mnt}/${prefix}_f2
+ touch ${master_mnt}/${prefix}_f3
+
+ # non-ascii filename test
+ echo "Hello non-ascii" > ${master_mnt}/${prefix}_f1_ಸಂತಸ
+ touch ${master_mnt}/${prefix}_f2_ಸಂತಸ
+ touch ${master_mnt}/${prefix}_f3_ಸಂತಸ
+
+ # dir
+ mkdir ${master_mnt}/${prefix}_d1
+ mkdir ${master_mnt}/${prefix}_d2
+ mkdir ${master_mnt}/${prefix}_d3
+
+ # non-ascii dir and filename test
+ mkdir ${master_mnt}/${prefix}_d1_ನನà³à²¨
+ mkdir ${master_mnt}/${prefix}_d2_ಸಂತಸ
+ mkdir ${master_mnt}/${prefix}_d3_ಸಂತಸ
+ echo "Hello non-ascii" > ${master_mnt}/${prefix}_d1_ನನà³à²¨/ಸಂತಸ
+
+ # Hardlink + non-ascii name
+ ln ${master_mnt}/${prefix}_f1 ${master_mnt}/${prefix}_hl1
+ ln ${master_mnt}/${prefix}_f1 ${master_mnt}/${prefix}_hl1_ಸಂತಸ
+
+ # Symlink
+ cd ${master_mnt}
+ ln -s ${prefix}_f1 ${prefix}_sl1
+ ln -s ${prefix}_f1 ${prefix}_sl1_ಸಂತಸ
+ cd -
+
+ # UNLINK
+ rm ${master_mnt}/${prefix}_f2
+ rm ${master_mnt}/${prefix}_f2_ಸಂತಸ
+
+ # RMDIR
+ rmdir ${master_mnt}/${prefix}_d2
+ rmdir ${master_mnt}/${prefix}_d2_ಸಂತಸ
+
+ # Rename - File
+ mv ${master_mnt}/${prefix}_f3 ${master_mnt}/${prefix}_f4
+ mv ${master_mnt}/${prefix}_f3_ಸಂತಸ ${master_mnt}/${prefix}_f4_ಸಂತಸ
+
+ # Rename - Dir
+ mv ${master_mnt}/${prefix}_d3 ${master_mnt}/${prefix}_d4
+ mv ${master_mnt}/${prefix}_d3_ಸಂತಸ ${master_mnt}/${prefix}_d4_ಸಂತಸ
+
+ # chown
+ touch ${master_mnt}/${prefix}_chown_f1
+ chown 1000:1000 ${master_mnt}/${prefix}_chown_f1
+ touch ${master_mnt}/${prefix}_chown_f1_ಸಂತಸ
+ chown 1000:1000 ${master_mnt}/${prefix}_chown_f1_ಸಂತಸ
+}
+
+function create_data_hang()
+{
+ prefix=$1
+ mkdir ${master_mnt}/${prefix}
+ cd ${master_mnt}/${prefix}
+ # ~1k files is required with 1 sync-job and hang happens if
+ # stderr buffer of tar/ssh executed with Popen is full (i.e., 64k).
+ # 64k is hit when ~800 files were not found while syncing data
+ # from master. So around 1k files is required to hit the condition.
+ for i in {1..1000}
+ do
+ echo "test data" > file$i
+ mv -f file$i file
+ done
+ cd -
+}
+
+function chown_file_ok()
+{
+ local file_owner=$(stat --format "%u:%g" "$1")
+ if test "X$file_owner" != "X1000:1000"; then echo 1; else echo 0; fi
+}
+
+function regular_file_ok()
+{
+ local file_type=$(stat --format "%F" "$1")
+ if test "X$file_type" != "Xregular file"; then echo 1; else echo 0; fi
+}
+
+function directory_ok()
+{
+ file_type=$(stat --format "%F" "$1")
+ if test "X$file_type" != "Xdirectory"; then echo 1; else echo 0; fi
+}
+
+function unlink_ok()
+{
+ stat "$1" > /dev/null 2>&1
+ rc=$?
+ echo $rc
+}
+
+function hardlink_file_ok()
+{
+ orig_file=$1
+ link_file=$2
+
+ orig_inode=$(stat --format "%i" "$orig_file")
+ rc=$?
+ if test $rc != 0; then
+ echo $rc
+ else
+ link_inode=$(stat --format "%i" "$link_file")
+ rc=$?
+ if test $rc != 0; then
+ echo $rc
+ else
+ if test $orig_inode != $link_inode; then
+ echo 1
+ else
+ echo 0
+ fi
+ fi
+ fi
+}
+
+function data_ok()
+{
+ path=$1
+ data1="$2"
+ data2=$(cat $path)
+ echo "data1:$data1"
+ echo "data2:$data2"
+ if test "X$data1" != "X$data2"; then
+ echo 1
+ else
+ echo 0
+ fi
+}
+
+function arequal_checksum()
+{
+ master=$1
+ slave=$2
+ ret=$(diff <(arequal-checksum -p $master) <(arequal-checksum -p $slave) | wc -l)
+ echo x$ret
+}
+
+function symlink_ok()
+{
+ local orig_file_name=$1
+ local symlink_file=$2
+
+ local file_type=$(stat --format "%F" "$symlink_file")
+ if test "X$file_type" != "Xsymbolic link"; then
+ echo 1
+ else
+ local fname=$(readlink $symlink_file)
+ if test "X$fname" != "X$orig_file_name"; then
+ echo 2
+ else
+ echo 0
+ fi
+ fi
+
+}
+
+function rename_file_ok()
+{
+ old_name=$1
+ new_name=$2
+
+ if [ -f $old_name ]; then
+ echo 1
+ elif [ ! -f $new_name ]; then
+ echo 2
+ else
+ echo 0
+ fi
+}
+
+function rename_dir_ok()
+{
+ old_name=$1
+ new_name=$2
+
+ if [ -d $old_name ]; then
+ echo 1
+ elif [ ! -d $new_name ]; then
+ echo 2
+ else
+ echo 0
+ fi
+}
+
+function create_georep_session()
+{
+ $CLI system:: execute gsec_create
+ rc=$?
+ if test $rc != 0; then
+ echo $rc
+ else
+ $CLI volume geo-rep $master $slave create push-pem
+ rc=$?
+ if test $rc != 0; then
+ echo $rc
+ else
+ echo 0
+ fi
+ fi
+}
+
+# logrotate_simulate should be called (rotate_count + 1) times to cause
+# an unlink and a gfid re-allocation.
+# remember to keep the file name and rotate_count the same across the
+# calls
+function logrotate_simulate()
+{
+ file_name=$1
+ declare -i rotate_count=$2
+
+ while [ $rotate_count -ge 0 ]; do
+ source_file="$file_name.$((rotate_count))"
+ if [ $rotate_count -eq 0 ]; then
+ source_file="$file_name"
+ fi
+ if [ -f "${source_file}" ]; then
+ mv "${source_file}" "$file_name.$((rotate_count+1))"
+ fi
+ ((rotate_count--))
+ done
+
+ # logrotate causes gfid to be rellocated to a new file created
+ # after an unlink and a blind rename later causes georep session
+ # to go Faulty
+ # this should not happen if source basename on slave is tested
+ # to be linked with its own gfid as on master, before invoking
+ # the rename syscall
+ touch $file_name
+ rotate_count=$2
+ unlink_file_name="$file_name.$((rotate_count+1))"
+ unlink $unlink_file_name 2>/dev/null
+}
+
+function create_rename()
+{
+ file_name=$1
+ echo $file_name > $file_name
+ mv $file_name $file_name.bak
+}
+
+function create_rename_ok()
+{
+ file_name=$1
+ # after a log replay, we don't expect the original file
+ # to be recreated i.e. a dangling entry without a corresponding
+ # back-end gfid link should not exist on the slave
+ if [ -f "$file_name" ]; then
+ echo 1
+ else
+ echo 0
+ fi
+}
+
+function hardlink_rename()
+{
+ file_name=$1
+ echo $file_name > $file_name
+ ln $file_name $file_name.hl
+ mv $file_name.hl $file_name.hl1
+}
+
+function hardlink_rename_ok()
+{
+ file_name=$1
+ # the hardlink file should not exist on the slave after renaming
+ # to one of its links on changelog reprocessing
+ if [ ! -f "$file_name" ]; then
+ echo 1
+ elif [ ! -f "$file_name.hl1" ]; then
+ echo 2
+ elif [ -f "$file_name.hl" ]; then
+ echo 3
+ else
+ echo 0
+ fi
+}
+
+function create_symlink_rename_mkdir_data()
+{
+ mkdir ${master_mnt}/symlink_test1
+ touch ${master_mnt}/symlink_test1/file1
+ ln -s "./file1" ${master_mnt}/symlink_test1/sym_link
+ mv ${master_mnt}/symlink_test1/sym_link ${master_mnt}/symlink_test1/rn_sym_link
+ mkdir ${master_mnt}/symlink_test1/sym_link
+}
+function verify_symlink_rename_mkdir_data()
+{
+ sym_dir=$1
+ if [ ! -f $sym_dir/file1 ]; then
+ echo 1
+ elif [ ! -h $sym_dir/rn_sym_link ]; then
+ echo 2
+ elif [ ! -d $sym_dir/sym_link ]; then
+ echo 3
+ else
+ echo 0
+ fi
+}
+
+function create_rsnapshot_data()
+{
+ rm -rf /tmp/rsnapshot_symlinkbug
+ mkdir /tmp/rsnapshot_symlinkbug
+ ln -f -s /does/not/exist /tmp/rsnapshot_symlinkbug/a_symlink
+ rsync -a /tmp/rsnapshot_symlinkbug ${master_mnt}/
+ cp -al ${master_mnt}/rsnapshot_symlinkbug ${master_mnt}/rsnapshot_symlinkbug.0
+ ln -f -s /does/not/exist2 /tmp/rsnapshot_symlinkbug/a_symlink
+ rsync -a /tmp/rsnapshot_symlinkbug ${master_mnt}/
+ cp -al ${master_mnt}/rsnapshot_symlinkbug ${master_mnt}/rsnapshot_symlinkbug.1
+}
+
+function verify_rsnapshot_data()
+{
+ dir="$1/rsnapshot_symlinkbug"
+ dir0="$1/rsnapshot_symlinkbug.0"
+ dir1="$1/rsnapshot_symlinkbug.1"
+ if [ ! -d "$dir" ]; then
+ echo 1
+ elif [ ! -h $dir/a_symlink ]; then
+ echo 2
+ elif test "X$(readlink $dir/a_symlink)" != "X/does/not/exist2"; then
+ echo 3
+ elif [ ! -h $dir0/a_symlink ]; then
+ echo 4
+ elif test "X$(readlink $dir0/a_symlink)" != "X/does/not/exist"; then
+ echo 5
+ elif [ ! -h $dir1/a_symlink ]; then
+ echo 6
+ elif test "X$(readlink $dir1/a_symlink)" != "X/does/not/exist2"; then
+ echo 7
+ else
+ echo 0
+ fi
+}
+
+function create_hardlink_rename_data()
+{
+ dir=${master_mnt}/hardlink_rename_issue
+ mkdir $dir
+ echo "test_data" > $dir/f1
+ ln $dir/f1 $dir/f2
+ mv $dir/f2 $dir/f3
+ unlink $dir/f1
+}
+
+function verify_hardlink_rename_data()
+{
+ dir=$1/hardlink_rename_issue
+ if [ ! -d $dir ]; then
+ echo 1
+ elif [ -f $dir/f1 ]; then
+ echo 2
+ elif [ -f $dir/f2 ]; then
+ echo 3
+ elif [ ! -f $dir/f3 ]; then
+ echo 4
+ elif test "Xtest_data" != "X$(cat $dir/f3)"; then
+ echo 5
+ else
+ echo 0
+ fi
+}
+
+function check_slave_read_only()
+{
+ volum=$1
+ gluster volume info $1 | grep 'features.read-only: on'
+ echo $?
+}
+
+function create_rename_with_existing_destination()
+{
+ dir=$1/rename_with_existing_destination
+ mkdir $dir
+ for i in {1..5}
+ do
+ echo "Data_set$i" > $dir/data_set$i
+ mv $dir/data_set$i $dir/data_set -f
+ done
+}
+
+function verify_rename_with_existing_destination()
+{
+ dir=$1/rename_with_existing_destination
+
+ if [ ! -d $dir ]; then
+ echo 1
+ elif [ ! -f $dir/data_set ]; then
+ echo 2
+ elif [ -f $dir/data_set1 ]; then
+ echo 3
+ elif [ -f $dir/data_set2 ]; then
+ echo 4
+ elif [ -f $dir/data_set3 ]; then
+ echo 5
+ elif [ -f $dir/data_set4 ]; then
+ echo 6
+ elif [ -f $dir/data_set5 ]; then
+ echo 7
+ elif test "XData_set5" != "X$(cat $dir/data_set)"; then
+ echo 8
+ else
+ echo 0
+ fi
+}
diff --git a/tests/gfid2path/block-mount-access.t b/tests/gfid2path/block-mount-access.t
new file mode 100644
index 00000000000..b1726ad9604
--- /dev/null
+++ b/tests/gfid2path/block-mount-access.t
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../afr.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+## Create a 2*2 volume
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start the volume
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+## Mount the volume
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0;
+
+#CREATE
+fname=$M0/file1
+touch $fname;
+backpath=$B0/${V0}1/file1
+
+pgfid="00000000-0000-0000-0000-000000000001"
+
+#Check for the presence of xattr
+key="trusted.gfid2path"
+gfid2path_xattr=$(getfattr -h -d -m. $backpath 2>/dev/null | grep -a $key | cut -f1 -d'=')
+
+#Check getxattr
+TEST ! getfattr -h -n $gfid2path_xattr $M0/file1
+
+#Check listgetxattr
+EXPECT_NOT $gfid2path_xattr get_xattr_key $key $M0/file1
+
+#Check removexattr
+TEST ! setfattr -h -x $gfid2path_xattr $M0/file1
+
+#Check setxattr
+TEST ! setfattr -h -n "trusted.gfid2path.d16e15bafe6e4257" -v "$pgfid/file2" $M0/file1
+
+#Cleanup
+cleanup;
diff --git a/tests/gfid2path/get-gfid-to-path.t b/tests/gfid2path/get-gfid-to-path.t
new file mode 100644
index 00000000000..dea95f4c9f8
--- /dev/null
+++ b/tests/gfid2path/get-gfid-to-path.t
@@ -0,0 +1,72 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../afr.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+## Create a 1*2 volume
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start the volume
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+
+## Mount the volume
+TEST $GFS --volfile-server=$H0 --aux-gfid-mount --volfile-id=$V0 $M0;
+
+root_gfid="00000000-0000-0000-0000-000000000001"
+
+#Check for ROOT
+EXPECT "/" get_gfid2path $M0/.gfid/$root_gfid
+
+#CREATE
+fname=$M0/file1
+touch $fname;
+
+#Get gfid of file1
+gfid=$(getfattr -h --only-values -n glusterfs.gfid.string $M0/file1)
+
+#Get path from virt xattr
+EXPECT "/file1" get_gfid2path $M0/.gfid/$gfid
+
+#Create hardlink and get path
+ln $fname $M0/hl_file1
+EXPECT "/file1" get_gfid2path $M0/.gfid/$gfid
+EXPECT "/hl_file1" get_gfid2path $M0/.gfid/$gfid
+
+#Rename and get path
+mv $fname $M0/rn_file1
+EXPECT "/hl_file1" get_gfid2path $M0/.gfid/$gfid
+EXPECT "/rn_file1" get_gfid2path $M0/.gfid/$gfid
+
+#Create symlink and get path
+ln -s $fname $M0/sym_file1
+gfid=$(getfattr -h --only-values -n glusterfs.gfid.string $M0/sym_file1)
+EXPECT "/sym_file1" get_gfid2path $M0/.gfid/$gfid
+
+#Create dir and get path
+mkdir -p $M0/dir1/dir2
+gfid=$(getfattr -h --only-values -n glusterfs.gfid.string $M0/dir1/dir2)
+EXPECT "/dir1/dir2" get_gfid2path $M0/.gfid/$gfid
+
+#Create file under dir2 and get path
+touch $M0/dir1/dir2/file1
+gfid=$(getfattr -h --only-values -n glusterfs.gfid.string $M0/dir1/dir2/file1)
+EXPECT "/dir1/dir2/file1" get_gfid2path $M0/.gfid/$gfid
+
+#Create hardlink under dir2 and get path
+ln $M0/dir1/dir2/file1 $M0/dir1/hl_file1
+gfid=$(getfattr -h --only-values -n glusterfs.gfid.string $M0/dir1/dir2/file1)
+EXPECT "/dir1/dir2/file1" get_gfid2path $M0/.gfid/$gfid
+EXPECT "/dir1/hl_file1" get_gfid2path $M0/.gfid/$gfid
+
+cleanup;
diff --git a/tests/gfid2path/gfid2path_fuse.t b/tests/gfid2path/gfid2path_fuse.t
new file mode 100644
index 00000000000..d0fe1fc16ae
--- /dev/null
+++ b/tests/gfid2path/gfid2path_fuse.t
@@ -0,0 +1,166 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+CLI_SETGFID2PATH="gluster-setgfid2path";
+
+cleanup;
+
+XXHSUM_SOURCE="$(dirname $0)/../../contrib/xxhash/xxhsum.c $(dirname $0)/../../contrib/xxhash/xxhash.c"
+XXHSUM_EXEC=$(dirname $0)/xxhsum
+
+## Build xxhsum C source
+build_tester $XXHSUM_SOURCE -o $XXHSUM_EXEC -I$(dirname $0)/../../contrib/xxhash
+TEST [ -e $XXHSUM_EXEC ]
+
+TEST glusterd
+TEST pidof glusterd
+
+## Create a single brick volume (B=1)
+TEST $CLI volume create $V0 $H0:$B0/${V0}1;
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT '1' brick_count $V0
+
+## Start the volume
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Mount the volume
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0;
+
+## disable gfid2path
+TEST $CLI volume set $V0 gfid2path disable
+
+pgfid="00000000-0000-0000-0000-000000000001"
+xxh64_file=$B0/${V0}1/xxh64_file
+
+# Create a file before enabling gfid2path
+fname=$M0/before_file1
+touch $fname;
+backpath=$B0/${V0}1/before_file1
+
+# Set gfid2path xattr
+TEST $CLI_SETGFID2PATH $backpath
+
+#Check for the presence of xattr
+pgfid_bname=$pgfid/before_file1
+echo -n $pgfid_bname > $xxh64_file
+xxh64sum=$(($XXHSUM_EXEC $xxh64_file) 2>/dev/null | awk '{print $1}')
+key="trusted.gfid2path.$xxh64sum"
+EXPECT $pgfid_bname get_text_xattr $key $backpath
+
+## enable gfid2path
+TEST $CLI volume set $V0 gfid2path enable
+
+#CREATE
+fname=$M0/file1
+touch $fname;
+backpath=$B0/${V0}1/file1
+
+#Check for the presence of xattr
+pgfid_bname=$pgfid/file1
+echo -n $pgfid_bname > $xxh64_file
+xxh64sum=$(($XXHSUM_EXEC $xxh64_file) 2>/dev/null | awk '{print $1}')
+key="trusted.gfid2path.$xxh64sum"
+EXPECT $pgfid_bname get_text_xattr $key $backpath
+
+#MKNOD
+fname=$M0/mknod_file1
+mknod $fname p;
+backpath=$B0/${V0}1/mknod_file1
+
+#Check for the presence of xattr
+pgfid_bname=$pgfid/mknod_file1
+echo -n $pgfid_bname > $xxh64_file
+xxh64sum=$(($XXHSUM_EXEC $xxh64_file) 2>/dev/null | awk '{print $1}')
+key="trusted.gfid2path.$xxh64sum"
+EXPECT $pgfid_bname get_text_xattr $key $backpath
+
+#LINK
+fname1=$M0/file1
+fname2=$M0/hl_file1
+ln $fname1 $fname2
+backpath1=$B0/${V0}1/file1
+backpath2=$B0/${V0}1/hl_file1
+
+#Check for the presence of two xattrs
+pgfid_bname=$pgfid/file1
+echo -n $pgfid_bname > $xxh64_file
+xxh64sum=$(($XXHSUM_EXEC $xxh64_file) 2>/dev/null | awk '{print $1}')
+key="trusted.gfid2path.$xxh64sum"
+EXPECT $pgfid_bname get_text_xattr $key $backpath1
+
+pgfid_bname=$pgfid/hl_file1
+echo -n $pgfid_bname > $xxh64_file
+xxh64sum=$(($XXHSUM_EXEC $xxh64_file) 2>/dev/null | awk '{print $1}')
+key="trusted.gfid2path.$xxh64sum"
+EXPECT $pgfid_bname get_text_xattr $key $backpath2
+
+#RENAME
+fname1=$M0/file1
+fname2=$M0/rn_file1
+mv $fname1 $fname2
+backpath=$B0/${V0}1/rn_file1
+
+#Check for the presence of new xattr
+pgfid_bname=$pgfid/file1
+echo -n $pgfid_bname > $xxh64_file
+xxh64sum=$(($XXHSUM_EXEC $xxh64_file) 2>/dev/null | awk '{print $1}')
+key="trusted.gfid2path.$xxh64sum"
+EXPECT_NOT $pgfid_bname get_text_xattr $key $backpath
+
+pgfid_bname=$pgfid/rn_file1
+echo -n $pgfid_bname > $xxh64_file
+xxh64sum=$(($XXHSUM_EXEC $xxh64_file) 2>/dev/null | awk '{print $1}')
+key="trusted.gfid2path.$xxh64sum"
+EXPECT $pgfid_bname get_text_xattr $key $backpath
+
+#UNLINK
+fname1=$M0/hl_file1
+rm -f $fname1
+fname2=$M0/rn_file1
+backpath=$B0/${V0}1/rn_file1
+
+#Check removal of xattr
+pgfid_bname=$pgfid/hl_file1
+echo -n $pgfid_bname > $xxh64_file
+xxh64sum=$(($XXHSUM_EXEC $xxh64_file) 2>/dev/null | awk '{print $1}')
+key="trusted.gfid2path.$xxh64sum"
+EXPECT_NOT $pgfid_bname get_text_xattr $key $backpath
+
+pgfid_bname=$pgfid/rn_file1
+echo -n $pgfid_bname > $xxh64_file
+xxh64sum=$(($XXHSUM_EXEC $xxh64_file) 2>/dev/null | awk '{print $1}')
+key="trusted.gfid2path.$xxh64sum"
+EXPECT $pgfid_bname get_text_xattr $key $backpath
+
+#SYMLINK
+fname=rn_file1
+sym_fname=$M0/sym_file1
+ln -s $fname $sym_fname
+backpath=$B0/${V0}1/sym_file1
+
+#Check for the presence of xattr
+pgfid_bname=$pgfid/sym_file1
+echo -n $pgfid_bname > $xxh64_file
+xxh64sum=$(($XXHSUM_EXEC $xxh64_file) 2>/dev/null | awk '{print $1}')
+key="trusted.gfid2path.$xxh64sum"
+EXPECT $pgfid_bname get_text_xattr $key $backpath
+
+#FINAL UNLINK
+fname=$M0/rn_file1
+sym_fname=$M0/sym_file1
+mknod_fname=$M0/mknod_file1
+
+rm -f $fname
+rm -f $sym_fname
+rm -f $mknod_fname
+TEST ! stat $fname
+TEST ! stat $sym_fname
+TEST ! stat $mknod_fname
+
+#Cleanups
+rm -f $STUB_EXEC
+cleanup;
diff --git a/tests/gfid2path/gfid2path_nfs.t b/tests/gfid2path/gfid2path_nfs.t
new file mode 100644
index 00000000000..d1ea7df2f4d
--- /dev/null
+++ b/tests/gfid2path/gfid2path_nfs.t
@@ -0,0 +1,152 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+XXHSUM_SOURCE="$(dirname $0)/../../contrib/xxhash/xxhsum.c $(dirname $0)/../../contrib/xxhash/xxhash.c"
+XXHSUM_EXEC=$(dirname $0)/xxhsum
+
+## Build xxhsum C source
+build_tester $XXHSUM_SOURCE -o $XXHSUM_EXEC -I$(dirname $0)/../../contrib/xxhash
+TEST [ -e $XXHSUM_EXEC ]
+
+TEST glusterd
+TEST pidof glusterd
+
+## Create a single brick volume (B=1)
+TEST $CLI volume create $V0 $H0:$B0/${V0}1;
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT '1' brick_count $V0
+
+TEST $CLI volume set $V0 nfs.disable false
+
+## Start the volume
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Wait for volume to register with rpc.mountd
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available
+
+## Mount the volume
+TEST mount_nfs $H0:/$V0 $N0 nolock;
+
+
+pgfid="00000000-0000-0000-0000-000000000001"
+xxh64_file=$B0/${V0}1/xxh64_file
+
+#CREATE
+fname=$N0/file1
+touch $fname;
+backpath=$B0/${V0}1/file1
+
+#Check for the presence of xattr
+pgfid_bname=$pgfid/file1
+echo -n $pgfid_bname > $xxh64_file
+xxh64sum=$(($XXHSUM_EXEC $xxh64_file) 2>/dev/null | awk '{print $1}')
+key="trusted.gfid2path.$xxh64sum"
+EXPECT $pgfid_bname get_text_xattr $key $backpath
+
+#MKNOD
+fname=$N0/mknod_file1
+mknod $fname p;
+backpath=$B0/${V0}1/mknod_file1
+
+#Check for the presence of xattr
+pgfid_bname=$pgfid/mknod_file1
+echo -n $pgfid_bname > $xxh64_file
+xxh64sum=$(($XXHSUM_EXEC $xxh64_file) 2>/dev/null | awk '{print $1}')
+key="trusted.gfid2path.$xxh64sum"
+EXPECT $pgfid_bname get_text_xattr $key $backpath
+
+#LINK
+fname1=$N0/file1
+fname2=$N0/hl_file1
+ln $fname1 $fname2
+backpath1=$B0/${V0}1/file1
+backpath2=$B0/${V0}1/hl_file1
+
+#Check for the presence of two xattrs
+pgfid_bname=$pgfid/file1
+echo -n $pgfid_bname > $xxh64_file
+xxh64sum=$(($XXHSUM_EXEC $xxh64_file) 2>/dev/null | awk '{print $1}')
+key="trusted.gfid2path.$xxh64sum"
+EXPECT $pgfid_bname get_text_xattr $key $backpath1
+
+pgfid_bname=$pgfid/hl_file1
+echo -n $pgfid_bname > $xxh64_file
+xxh64sum=$(($XXHSUM_EXEC $xxh64_file) 2>/dev/null | awk '{print $1}')
+key="trusted.gfid2path.$xxh64sum"
+EXPECT $pgfid_bname get_text_xattr $key $backpath2
+
+#RENAME
+fname1=$N0/file1
+fname2=$N0/rn_file1
+mv $fname1 $fname2
+backpath=$B0/${V0}1/rn_file1
+
+#Check for the presence of new xattr
+pgfid_bname=$pgfid/file1
+echo -n $pgfid_bname > $xxh64_file
+xxh64sum=$(($XXHSUM_EXEC $xxh64_file) 2>/dev/null | awk '{print $1}')
+key="trusted.gfid2path.$xxh64sum"
+EXPECT_NOT $pgfid_bname get_text_xattr $key $backpath
+
+pgfid_bname=$pgfid/rn_file1
+echo -n $pgfid_bname > $xxh64_file
+xxh64sum=$(($XXHSUM_EXEC $xxh64_file) 2>/dev/null | awk '{print $1}')
+key="trusted.gfid2path.$xxh64sum"
+EXPECT $pgfid_bname get_text_xattr $key $backpath
+
+#UNLINK
+fname1=$N0/hl_file1
+rm -f $fname1
+fname2=$N0/rn_file1
+backpath=$B0/${V0}1/rn_file1
+
+#Check removal of xattr
+pgfid_bname=$pgfid/hl_file1
+echo -n $pgfid_bname > $xxh64_file
+xxh64sum=$(($XXHSUM_EXEC $xxh64_file) 2>/dev/null | awk '{print $1}')
+key="trusted.gfid2path.$xxh64sum"
+EXPECT_NOT $pgfid_bname get_text_xattr $key $backpath
+
+pgfid_bname=$pgfid/rn_file1
+echo -n $pgfid_bname > $xxh64_file
+xxh64sum=$(($XXHSUM_EXEC $xxh64_file) 2>/dev/null | awk '{print $1}')
+key="trusted.gfid2path.$xxh64sum"
+EXPECT $pgfid_bname get_text_xattr $key $backpath
+
+#SYMLINK
+fname=rn_file1
+sym_fname=$N0/sym_file1
+ln -s $fname $sym_fname
+backpath=$B0/${V0}1/sym_file1
+
+#Check for the presence of xattr
+pgfid_bname=$pgfid/sym_file1
+echo -n $pgfid_bname > $xxh64_file
+xxh64sum=$(($XXHSUM_EXEC $xxh64_file) 2>/dev/null | awk '{print $1}')
+key="trusted.gfid2path.$xxh64sum"
+EXPECT $pgfid_bname get_text_xattr $key $backpath
+
+#FINAL UNLINK
+fname=$N0/rn_file1
+sym_fname=$N0/sym_file1
+mknod_fname=$N0/mknod_file1
+
+rm -f $fname
+rm -f $sym_fname
+rm -f $mknod_fname
+TEST ! stat $fname
+TEST ! stat $sym_fname
+TEST ! stat $mknod_fname
+
+#Cleanups
+rm -f $STUB_EXEC
+cleanup;
diff --git a/tests/glusterfind/glusterfind-basic.t b/tests/glusterfind/glusterfind-basic.t
new file mode 100644
index 00000000000..ccb33fb1fc8
--- /dev/null
+++ b/tests/glusterfind/glusterfind-basic.t
@@ -0,0 +1,84 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../env.rc
+
+SCRIPT_TIMEOUT=300
+
+##Cleanup and start glusterd
+cleanup;
+TEST glusterd;
+TEST pidof glusterd
+
+##create .keys
+mkdir -p /var/lib/glusterd/glusterfind/.keys
+
+#create_and_start test_volume
+TEST $CLI volume create test-vol $H0:$B0/b1 $H0:$B0/b2 $H0:$B0/b3
+TEST gluster volume start test-vol
+
+##Mount test-vol
+TEST glusterfs -s $H0 --volfile-id test-vol $M0
+
+TEST timestamp1=$(date +'%s')
+
+##Create files and dirs inside the mount point
+TEST mkdir -p $M0/dir1
+TEST touch $M0/file1
+
+##Glusterfind Create
+TEST glusterfind create sess_vol1 test-vol --force
+
+##################################################################################
+#Incremental crawl
+##################################################################################
+##Glusterfind Pre
+TEST glusterfind pre sess_vol1 test-vol output_file.txt
+
+#Glusterfind Post
+TEST glusterfind post sess_vol1 test-vol
+
+##Glusterfind List
+EXPECT '1' echo $(glusterfind list | grep sess_vol1 | wc -l)
+
+TEST timestamp2=$(date +'%s')
+
+##Glusterfind Query
+TEST glusterfind query test-vol --since-time $timestamp1 --end-time $timestamp2 output_file.txt
+
+#################################################################################
+#Full Crawl
+#################################################################################
+##Glusterfind Pre
+TEST glusterfind pre sess_vol1 test-vol output_file.txt --full --regenerate-outfile
+EXPECT '1' echo $(grep 'NEW dir1' output_file.txt | wc -l)
+EXPECT '1' echo $(grep 'NEW file1' output_file.txt | wc -l)
+
+##Glusterfind Query commands
+TEST glusterfind query test-vol --full output_file.txt
+EXPECT '1' echo $(grep 'NEW dir1' output_file.txt | wc -l)
+EXPECT '1' echo $(grep 'NEW file1' output_file.txt | wc -l)
+
+##using tag, full crawl
+TEST glusterfind query test-vol --full --tag-for-full-find NEW output_file.txt
+EXPECT '1' echo $(grep 'NEW dir1' output_file.txt | wc -l)
+EXPECT '1' echo $(grep 'NEW file1' output_file.txt | wc -l)
+
+##using -field-separator option, full crawl
+glusterfind query test-vol --full output_file.txt --field-separator "=="
+EXPECT '1' echo $(grep 'NEW==dir1' output_file.txt | wc -l)
+EXPECT '1' echo $(grep 'NEW==file1' output_file.txt | wc -l)
+
+##Adding or Replacing a Brick from an Existing Glusterfind Session
+TEST gluster volume add-brick test-vol $H0:$B0/b4 force
+
+##To make existing session work after brick add
+TEST glusterfind create sess_vol test-vol --force
+EXPECT '1' echo $(glusterfind list | grep sess_vol1 | wc -l)
+
+##glusterfind delete
+TEST glusterfind delete sess_vol test-vol
+
+rm -rf output_file.txt
+cleanup;
diff --git a/tests/include.rc b/tests/include.rc
new file mode 100644
index 00000000000..0dc7d830449
--- /dev/null
+++ b/tests/include.rc
@@ -0,0 +1,1350 @@
+
+checkpoint_time="$(date +%s%N)"
+
+M0=${M0:=/mnt/glusterfs/0}; # 0th mount point for FUSE
+M1=${M1:=/mnt/glusterfs/1}; # 1st mount point for FUSE
+M2=${M2:=/mnt/glusterfs/2}; # 2nd mount point for FUSE
+M3=${M3:=/mnt/glusterfs/3}; # 3rd mount point for FUSE
+N0=${N0:=/mnt/nfs/0}; # 0th mount point for NFS
+N1=${N1:=/mnt/nfs/1}; # 1st mount point for NFS
+V0=${V0:=patchy}; # volume name to use in tests
+V1=${V1:=patchy1}; # volume name to use in tests
+GMV0=${GMV0:=master}; # master volume name to use in geo-rep tests
+GSV0=${GSV0:=slave}; # slave volume name to use in geo-rep tests
+GSV1=${GSV1:=slave1}; # slave volume name to use in geo-rep tests
+B0=${B0:=/d/backends}; # top level of brick directories
+WORKDIRS="$B0 $M0 $M1 $M2 $M3 $N0 $N1"
+
+ROOT_GFID="00000000-0000-0000-0000-000000000001"
+DOT_SHARD_GFID="be318638-e8a0-4c6d-977d-7a937aa84806"
+
+META_VOL=${META_VOL:=gluster_shared_storage}; # shared gluster storage volume used by snapshot scheduler, nfs ganesha and geo-rep.
+META_MNT=${META_MNT:=/var/run/gluster/shared_storage}; # Mount point of shared gluster volume.
+
+CC=cc
+OSTYPE=$(uname -s)
+
+env_dir=$(dirname $0)
+while true; do
+ ENV_RC=${env_dir}/env.rc
+ if [ -f ${ENV_RC} ]; then
+ break
+ fi
+ new_dir=$(dirname $env_dir)
+ if [ x"$new_dir" = x"$old_dir" ]; then
+ ENV_RC="/not/found"
+ break
+ fi
+ old_dir=$env_dir
+ env_dir=$new_dir
+done
+
+if [ ! -f $ENV_RC ]; then
+ echo "Aborting." | tee /dev/stderr
+ echo | tee /dev/stderr
+ echo "env.rc not found" | tee /dev/stderr
+ echo | tee /dev/stderr
+ echo "Please correct the problem and try again." | tee /dev/stderr
+ echo | tee /dev/stderr
+ exit 1
+fi
+. $ENV_RC
+
+H0=${H0:=`hostname`}; # hostname
+MOUNT_TYPE_FUSE="fuse.glusterfs"
+GREP_MOUNT_OPT_RO="grep (ro"
+GREP_MOUNT_OPT_RW="grep (rw"
+UMOUNT_F="umount -f"
+
+PATH=$PATH:${PWD}/tests/utils
+
+case $OSTYPE in
+Linux)
+ H0=${H0:=`hostname --fqdn`}; # hostname
+ ;;
+NetBSD)
+ MOUNT_TYPE_FUSE="puffs|perfuse|fuse.glusterfs"
+ GREP_MOUNT_OPT_RO="grep (read-only"
+ GREP_MOUNT_OPT_RW="grep -v (read-only"
+ UMOUNT_F="umount -f -R"
+ ;;
+*)
+ ;;
+esac
+
+DEBUG=${DEBUG:=0} # turn on debugging?
+
+PROCESS_DOWN_TIMEOUT=5
+PROCESS_UP_TIMEOUT=45
+NFS_EXPORT_TIMEOUT=20
+CHILD_UP_TIMEOUT=20
+PROBE_TIMEOUT=60
+PEER_SYNC_TIMEOUT=20
+REBALANCE_TIMEOUT=600
+REOPEN_TIMEOUT=20
+HEAL_TIMEOUT=80
+IO_HEAL_TIMEOUT=120
+MARKER_UPDATE_TIMEOUT=20
+JANITOR_TIMEOUT=60
+UMOUNT_TIMEOUT=5
+CONFIG_UPDATE_TIMEOUT=5
+AUTH_REFRESH_INTERVAL=10
+GRAPH_SWITCH_TIMEOUT=10
+UNLINK_TIMEOUT=5
+MDC_TIMEOUT=5
+IO_WAIT_TIMEOUT=5
+DISK_FAIL_TIMEOUT=80
+
+LOGDIR=$(gluster --print-logdir)
+
+statedumpdir=`gluster --print-statedumpdir`; # Default directory for statedump
+
+CLI="gluster --mode=script --wignore";
+CLI_NO_FORCE="gluster --mode=script";
+
+# CLI_IGNORE_PARTITION makes sure that the warning related to bricks being on
+# root partition is ignored while running the command in a "no force" mode
+CLI_IGNORE_PARTITION="gluster --mode=script --wignore-partition"
+
+function wait_delay() {
+ local delay="$1"
+ local interval="$2"
+ shift 2
+ local deadline="$(($(date +%s%N) + ${delay}000000000))"
+
+ $*
+ while [[ $? -ne 0 ]]; do
+ if [[ $(date +%s%N) -ge ${deadline} ]]; then
+ return 1
+ fi
+ sleep ${interval}
+ $*
+ done
+
+ return 0
+}
+
+_GFS () {
+ glusterfs "$@"
+ local mount_ret=$?
+ if [ $mount_ret != 0 ]; then
+ return $mount_ret
+ fi
+ local mount_point=${!#}
+ local i=0
+ while true; do
+ touch $mount_point/xy_zzy 2> /dev/null && break
+ i=$((i+1))
+ [ $i -lt 100 ] || break
+ sleep 0.1
+ done
+ rm -f $mount_point/xy_zzy
+ return $mount_ret
+}
+GFS="_GFS --attribute-timeout=0 --entry-timeout=0";
+
+mkdir -p $WORKDIRS
+
+case $OSTYPE in
+FreeBSD | Darwin)
+wc () {
+ if test "x$1" = "x-l"; then
+ awk '{ lines++ } END {print lines}'
+ fi
+ if test "x$1" = "x-w"; then
+ awk '{ words += NF } END {print words}' }
+ fi
+ if test "x$1" = "x-c"; then
+ awk '{ chars += length($0) + 1 } END {print chars}'
+ fi
+ if test "x$1" = "x-m"; then
+ awk '{ chars += length($0) + 1 } END {print chars}'
+ fi
+}
+;;
+NetBSD)
+wc() {
+ /usr/bin/wc $@ | sed 's/^ *\([0-9]*\).*$/\1/g'
+}
+;;
+esac
+
+testcnt=`egrep '^[[:space:]]*(EXPECT|EXPECT_NOT|TEST|EXPECT_WITHIN|EXPECT_KEYWORD)[[:space:]]' $0 | wc -l`
+expect_tests=`egrep '^[[:space:]]*TESTS_EXPECTED_IN_LOOP[[:space:]]*' $0`
+
+x_ifs=$IFS
+IFS=$'\n'
+for line in $expect_tests; do
+ expect_tests=`echo $line | cut -f 2 -d =`
+ testcnt=`expr $testcnt + $expect_tests`
+done
+IFS=$x_ifs
+
+echo "1..`echo $testcnt`"
+
+t=1
+
+function dbg()
+{
+ [ "x$DEBUG" = "x0" ] || echo "$*" >&2;
+}
+
+function G_LOG()
+{
+ local g_log_logdir;
+ g_log_logdir=`$CLI --print-logdir`
+ test -d $g_log_logdir
+ if [ $? != 0 ]; then
+ return
+ fi
+ local g_log_string;
+ g_log_string="++++++++++ G_LOG:$0: TEST: $@ ++++++++++"
+ g_log_string="`date -u +["%F %T.%6N"]`:$g_log_string"
+ local g_log_filename
+ for g_log_filename in `find $g_log_logdir/ -type f -name \*.log`;
+ do
+ echo "$g_log_string" >> "$g_log_filename"
+ done
+}
+
+function test_header()
+{
+ dbg "=========================";
+ dbg "TEST $t (line $TESTLINE): $*";
+ saved_cmd="$*"
+ start_time="$(date +%s%N)"
+}
+
+
+function test_footer()
+{
+ RET=$?
+ local lineno=$1
+ local err=$2
+ local end_time
+ local elapsed1
+ local elapsed2
+
+ end_time="$(date +%s%N)"
+ elapsed1="$(((start_time - checkpoint_time) / 1000000))"
+ elapsed2="$(((end_time - start_time) / 1000000))"
+ checkpoint_time="$end_time"
+ if [ $RET -eq 0 ]; then
+ printf "ok %3d [%7d/%7d] <%4d> '%s'\n" "$t" "$elapsed1" "$elapsed2" "$lineno" "$saved_cmd";
+ else
+ printf "not ok %3d [%7d/%7d] <%4d> '%s' -> '%s'\n" "$t" "$elapsed1" "$elapsed2" "$lineno" "$saved_cmd" "$err"
+ if [ "$EXIT_EARLY" = "1" ]; then
+ cleanup
+ exit $RET
+ fi
+ fi
+
+ dbg "RESULT $t: $RET";
+
+ t=`expr $t + 1`;
+}
+
+function test_expect_footer()
+{
+ local lineno=$1
+ local e=$2
+ local a=$3
+ local err=""
+
+ if ! [[ "$a" =~ $e ]]; then
+ err="Got \"$a\" instead of \"$e\""
+ fi
+ [[ "$a" =~ $e ]];
+
+ test_footer "$lineno" "$err";
+}
+
+function _EXPECT()
+{
+ TESTLINE=$1;
+ shift;
+ local a=""
+
+ G_LOG $TESTLINE "$@";
+ test_header "$@";
+
+ e="$1";
+ shift;
+ a=$("$@" | tail -1)
+
+ if [ "x$e" = "x" ] ; then
+ test_expect_footer "$TESTLINE" "x$e" "x$a";
+ else
+ test_expect_footer "$TESTLINE" "$e" "$a";
+ fi
+}
+
+function test_expect_not_footer()
+{
+ local lineno=$1
+ local e=$2
+ local a=$3
+ local err=""
+
+ if [[ "$a" =~ $e ]]; then
+ err="Got \"$a\" when not expecting it"
+ fi
+
+ ! [[ "$a" =~ $e ]];
+ test_footer "$lineno" "$err";
+}
+
+function _EXPECT_NOT()
+{
+ TESTLINE=$1;
+ shift;
+ local a=""
+
+ G_LOG $TESTLINE "$@";
+ test_header "$@";
+
+ e="$1";
+ shift;
+ a=$("$@" | tail -1)
+
+ if [ "x$e" = "x" ] ; then
+ test_expect_not_footer "$TESTLINE" "x$e" "x$a";
+ else
+ test_expect_not_footer "$TESTLINE" "$e" "$a";
+ fi
+}
+
+function _EXPECT_KEYWORD()
+{
+ TESTLINE=$1;
+ shift;
+ G_LOG $TESTLINE "$@";
+ test_header "$@";
+
+ e="$1";
+ shift;
+ "$@" | tail -1 | grep -q "$e"
+
+ test_footer "$TESTLINE";
+}
+
+function _TEST()
+{
+ TESTLINE=$1;
+ shift;
+ local redirect=""
+
+ G_LOG $TESTLINE "$@";
+ test_header "$@";
+
+ if [ "$1" = "!" ]; then
+ redirect="2>&1"
+ fi
+
+ eval "$@" >/dev/null $redirect
+
+ test_footer "$TESTLINE";
+}
+
+#This function should be used carefully.
+#The expected regex, given to this function, should be
+#used within ^ and $ to match exactly with the output of
+#command.
+function _EXPECT_WITHIN()
+{
+ TESTLINE=$1
+ shift;
+
+ local timeout=$1
+ shift;
+
+ G_LOG $TESTLINE "$@";
+ test_header "$@"
+
+ e=$1;
+ a="";
+ shift;
+
+ local endtime="$(( ${timeout}000000000 + $(date +%s%N) ))"
+
+ # We *want* this to be globally visible.
+ EW_RETRIES=0
+
+ while [[ "$(date +%s%N)" < "$endtime" ]]; do
+ a=$("$@" | tail -1 ; exit ${PIPESTATUS[0]})
+ ## Check command success
+ if [ $? -ne 0 ]; then
+ break;
+ fi
+ ## Check match success
+ if [[ "$a" =~ $e ]]; then
+ break;
+ fi
+ sleep 0.25;
+ EW_RETRIES=$((EW_RETRIES+1))
+ done
+
+ if [ "x$e" = "x" ] ; then
+ test_expect_footer "$TESTLINE" "x$e" "x$a";
+ else
+ test_expect_footer "$TESTLINE" "$e" "$a";
+ fi
+}
+
+
+function SKIP_TESTS()
+{
+ dbg "Skipping tests $t-$testcnt";
+ while [ $t -le $testcnt ]; do
+ true ; test_footer;
+ done
+}
+
+
+function _TEST_IN_LOOP()
+{
+ testcnt=`expr $testcnt + 1`;
+ _TEST $@
+}
+
+function _EXPECT_WITHIN_TEST_IN_LOOP()
+{
+ testcnt=`expr $testcnt + 1`;
+ _EXPECT_WITHIN $@
+}
+
+which killall > /dev/null || {
+ killall() {
+ pkill $@
+ }
+}
+
+which pidof > /dev/null || {
+ pidof() {
+ $PYTHON pidof.py $@
+ }
+}
+
+stat -c %s /dev/null > /dev/null 2>&1 || {
+ stat() {
+ local format=""
+ local f=""
+
+ if [ "x$1" = "x-c" ] ; then
+ oformat=$2
+ shift
+ shift
+ files=$@
+ else
+ files=$@
+ fi
+
+ for f in $files ; do
+ format=$oformat
+
+ # %t/%T should return 0 for non devices.
+ case "${format}" in
+ *%t*|*%T*)
+ `which stat` -f '%HT' $f | grep -q 'Device$' || \
+ format=`echo "${format}" | sed 's/%t/0/g; s/%T/0/g;'`
+ ;;
+ *)
+ ;;
+ esac
+
+ if [ "x${format}" = "x" ] ; then
+ `which stat` $f
+ else
+ cmd=""
+ case $format in
+ *%u*) cmd="${cmd} s/%u/`$( which stat ) -f %u $f`/g;" ;&
+ *%g*) cmd="${cmd} s/%g/`$( which stat ) -f %g $f`/g;" ;&
+ *%a*) cmd="${cmd} s/%a/`$( which stat ) -f %p $f |
+ sed 's/^..//; s/^0//'`/g;" ;&
+ *%A*) cmd="${cmd} s/%A/`ls -ld $f|awk '{print $1}'`/g;" ;&
+ *%s*) cmd="${cmd} s/%s/`$( which stat ) -f %z $f`/g;" ;&
+ *%h*) cmd="${cmd} s/%h/`$( which stat ) -f %l $f`/g;" ;&
+ *%F*) cmd="${cmd} s/%F/`$( which stat ) -f %HT $f | sed '
+ s/Directory/directory/;
+ s/Fifo File/fifo/;
+ s/Symbolic Link/symbolic link/;
+ s/Regular File/regular file/;
+ s/Block Device/block special file/;
+ s/Character Device/character special file/;
+ ' | sed \"$(
+ test -s $f || echo 's/regular file/regular empty file/g'
+ )\"`/g;" ;&
+ *%n*) cmd="${cmd} s|%n|`$( which stat ) -f %N $f`|g;" ;&
+ *%Y*) cmd="${cmd} s/%Y/`$( which stat ) -f %m $f`/g;" ;&
+ *%X*) cmd="${cmd} s/%X/`$( which stat ) -f %a $f`/g;" ;&
+ *%Z*) cmd="${cmd} s/%Z/`$( which stat ) -f %c $f`/g;" ;&
+ *%.Z*) cmd="${cmd} s/%.Z/`$( which stat ) -f %.9Fc $f`/g;" ;&
+ *%b*) cmd="${cmd} s/%b/`$( which stat ) -f %b $f`/g;" ;&
+ *%B*) cmd="${cmd} s/%B/512/g;" ;&
+ *%t*) cmd="${cmd} s/%t/`$( which stat ) -f %XHr $f`/g;" ;&
+ *%T*) cmd="${cmd} s/%T/`$( which stat ) -f %XLr $f`/g;" ;&
+ esac
+
+ `which stat` -f "`echo $format|sed \"$cmd\"`" $f
+ fi
+ done
+ }
+}
+
+function signal_pids() {
+ local sig="$1"
+ shift
+ local pids=($*)
+
+ if [[ ${#pids[@]} -gt 0 ]]; then
+ kill -${sig} ${pids[@]} 2>/dev/null || true
+ fi
+}
+
+function check_pids() {
+ local pids=($*)
+ local tmp=()
+ local pid
+
+ for pid in "${pids[@]}"; do
+ kill -0 "${pid}" 2>/dev/null && tmp+=(${pid})
+ done
+
+ echo "${tmp[@]}"
+}
+
+function pids_alive() {
+ local pids=($*)
+
+ if [[ "$(check_pids ${pids[@]})" != "" ]]; then
+ return 1;
+ fi
+
+ return 0
+}
+
+function terminate_pids() {
+ local pids=($*)
+
+ signal_pids TERM ${pids[@]}
+ wait_delay ${PROCESS_DOWN_TIMEOUT} 0.1 pids_alive ${pids[@]}
+ if [[ $? -ne 0 ]]; then
+ pids=($(check_pids ${pids[@]}))
+ signal_pids KILL ${pids[@]}
+ wait_delay 1 0.1 pids_alive ${pids[@]}
+ if [[ $? -ne 0 ]]; then
+ return 2
+ fi
+
+ return 1
+ fi
+
+ return 0
+}
+
+function process_pids() {
+ local proc
+ local pids=()
+
+ for proc in $*; do
+ pids+=($(pgrep ${proc}))
+ done
+
+ echo "${pids[@]}"
+}
+
+## Lock files should get automatically removed once "usradd" or "groupadd"
+## command finishes. But sometimes we encounter situations (bugs) where
+## some of these files may not get properly unlocked after the execution of
+## the command. In that case, when we execute useradd next time, it may show
+## the error “cannot lock /etc/password†or “unable to lock group fileâ€.
+## So, to avoid any such errors, check for any lock files under /etc.
+## and remove those.
+
+function remove_lock_files()
+{
+ if [ ! -f /etc/passwd.lock ];
+ then
+ rm -rf /etc/passwd.lock;
+ fi
+
+ if [ ! -f /etc/group.lock ];
+ then
+ rm -rf /etc/group.lock;
+ fi
+
+ if [ ! -f /etc/shadow.lock ];
+ then
+ rm -rf /etc/shadow.lock;
+ fi
+
+ if [ ! -f /etc/gshadow.lock ];
+ then
+ rm -rf /etc/gshadow.lock;
+ fi
+}
+
+
+function cleanup()
+{
+ local end_time
+
+ # Prepare flags for umount
+ case `uname -s` in
+ Linux)
+ flag="-l"
+ ;;
+ NetBSD)
+ flag="-f -R"
+ ;;
+ FreeBSD|Darwin)
+ flag="-f"
+ ;;
+ *)
+ flag=""
+ ;;
+ esac
+
+ # Clean up lock files.
+ remove_lock_files
+
+ # Clean up all client mounts
+ for m in `mount | grep fuse.glusterfs | awk '{print $3}'`; do
+ umount $flag $m
+ done
+
+ # Unmount all well known mount points
+ umount $flag $M0 2>/dev/null || umount -f $M0 2>/dev/null || true;
+ umount $flag $M1 2>/dev/null || umount -f $M1 2>/dev/null || true;
+ umount $flag $M2 2>/dev/null || umount -f $M2 2>/dev/null || true;
+ umount $flag $N0 2>/dev/null || umount -f $N0 2>/dev/null || true;
+ umount $flag $N1 2>/dev/null || umount -f $N1 2>/dev/null || true;
+
+
+ # unmount all stale mounts from /tmp, This is a temporary work around
+ # till the stale mount in /tmp is found.
+ umount $flag /tmp/mnt* 2>/dev/null
+
+
+ # Send SIGTERM to all gluster processes and rpc.statd that are still running
+ terminate_pids $(process_pids glusterfs glusterfsd glusterd rpc.statd)
+
+ test x"$OSTYPE" = x"NetBSD" && pkill -9 perfused || true
+
+ # unregister nfs and related services from portmapper/rpcbind
+ ## nfs
+ rpcinfo -d 100003 3 2>/dev/null || true;
+ ## mountd
+ rpcinfo -d 100005 1 2>/dev/null || true;
+ rpcinfo -d 100005 3 2>/dev/null || true;
+ ## nlockmgr
+ rpcinfo -d 100021 1 2>/dev/null || true;
+ rpcinfo -d 100021 4 2>/dev/null || true;
+ ## nfs_acl
+ rpcinfo -d 100227 3 2>/dev/null || true;
+
+ # unmount brick filesystems after killing daemons
+ MOUNTPOINTS=`mount | grep "$B0/" | awk '{print $3}'`
+ for m in $MOUNTPOINTS;
+ do
+ umount $flag $m
+ done
+
+ # Cleanup lvm
+ type cleanup_lvm &>/dev/null && cleanup_lvm || true;
+
+ # Destroy loop devices
+ # TODO: This should be a function DESTROY_LOOP
+ case `uname -s` in
+ Linux)
+ LOOPDEVICES=`losetup -a | grep "$B0/" | \
+ awk '{print $1}' | tr -d :`
+ for l in $LOOPDEVICES;
+ do
+ losetup -d $l
+ done
+ ;;
+ NetBSD)
+ # cleanup loopback device with unmounted backing store
+ for vnd in /dev/vnd* ; do
+ vnconfig -l ${vnd} 2>&1 | \
+ grep -q 'Bad file descriptor' && vnconfig -u ${vnd}
+ done
+
+ vnd=`vnconfig -l | \
+ awk '!/not in use/{printf("%s%s:%d ", $1, $2, $5);}'`
+ for l in ${vnd} ; do
+ dev=${l%%:*}
+ tmp=${l#*:}
+ fs=${tmp%%:*}
+ inode=${tmp#*:}
+ file=`find -x ${fs} -inum ${inode} -print -exit`
+ echo ${file} | grep "$B0/" && \
+ LOOPDEVICES="${LOOPDEVICES} $dev"
+ done
+ for l in $LOOPDEVICES;
+ do
+ vnconfig -u $l
+ done
+ ;;
+ *)
+ echo "`uname -s` loopback device supportmissing"
+ ;;
+ esac
+
+ # remove contents of "GLUSTERD_WORKDIR" except hooks and groups
+ # directories.
+ if [ -n $GLUSTERD_WORKDIR ]
+ then
+ find $GLUSTERD_WORKDIR/* -maxdepth 0 -name 'hooks' -prune \
+ -o -name 'groups' -prune -o -exec rm -rf '{}' ';'
+ else
+ echo "GLUSTERD_WORKDIR is not set"
+ fi
+
+ # Complete cleanup time
+ rm -rf "$B0/*" "/etc/glusterd/*";
+ rm -rf $WORKDIRS
+ find $GLUSTERD_PIDFILEDIR -name "*.pid" | xargs rm -rf
+ leftover=""
+ for d in $WORKDIRS ; do
+ if test -d $d ; then
+ leftover="$leftover $d"
+ fi
+ done
+ if [ "x$leftover" != "x" ] ; then
+ echo "Aborting."
+ echo
+ echo "$d could not be deleted, here are the left over items"
+ for d in $leftover; do
+ find $d -exec ls -ld {} \;
+ done
+ echo
+ echo "Please correct the problem and try again."
+ echo
+ return 1;
+ fi >&2
+
+ mkdir -p $WORKDIRS
+ # This is usually the last thing a test script calls, so our return
+ # value becomes their exit value. While it's not great for the mkdir
+ # above to fail, promoting that into a failure of the whole test (and
+ # thus of an entire regression-test run) seems a bit excessive. Make
+ # sure we return good status anyway.
+
+ return 0
+}
+
+function force_terminate () {
+ local ret=$?;
+ >&2 echo -e "\nreceived external"\
+ "signal --`kill -l $ret`--, calling 'cleanup' ...\n";
+ cleanup;
+ exit $ret;
+}
+
+trap force_terminate INT TERM HUP
+
+function volinfo_field()
+{
+ local vol=$1;
+ local field=$2;
+
+ $CLI volume info $vol | grep "^$field: " | sed 's/.*: //';
+}
+
+function cleanup_tester ()
+{
+ local exe=$1
+ rm -f $exe
+}
+
+function build_tester ()
+{
+ local cfile=$1
+ local fname=$(basename "$cfile")
+ local ext="${fname##*.}"
+ local execname="${fname%.*}"
+ shift
+ local cflags=$*
+ if [ `echo $cflags | grep -c "lgfapi" ` -gt 0 ]
+ then
+ cflags="$cflags $(pkg-config glusterfs-api --cflags-only-I --libs-only-L)"
+ fi
+ $CC -g -o $(dirname $cfile)/$execname $cfile $cflags
+}
+
+function process_leak_count ()
+{
+ local pid=$1;
+ return $(ls -lh /proc/$pid/fd | grep "(deleted)"| wc -l)
+}
+
+which truncate > /dev/null || {
+ truncate() {
+ local nocreate=0
+ local ioblocks=0
+ local fileref=""
+ local newsize=""
+
+ args=`getopt xor:s: $*`
+ if [ $? -ne 0 ]; then
+ echo 'Usage: truncate [-co](-r file | -s size) file ...'
+ exit 2
+ fi
+ set -- $args
+ while [ $# -gt 0 ]; do
+ case "$1" in
+ -c)
+ nocreate=1;
+ ;;
+ -o)
+ ioblocks=1;
+ echo "Unimplemented -o option"
+ exit 2
+ ;;
+ -r)
+ fileref=$2;
+ shift;
+ ;;
+ -s)
+ newsize=$2;
+ shift;
+ ;;
+ --)
+ shift;
+ break;
+ ;;
+ *)
+ echo 'Usage: truncate [-co](-r file | -s size) file ...'
+ exit 2;
+ ;;
+ esac
+ shift
+ done
+
+ if [ "x$newsize" = "x" -a "x$fileref" = "x" ] ; then
+ echo 'Usage: truncate [-co](-r file | -s size) file ...'
+ exit 2;
+ fi
+
+ if [ "x$newsize" != "x" -a "x$fileref" != "x" ] ; then
+ echo 'Usage: truncate [-co](-r file | -s size) file ...'
+ exit 2;
+ fi
+
+ if [ "x$newsize" != "x" ] ; then
+ echo $newsize | grep -q '^[-_<>%/]' && {
+ echo "Unimplemented prefix in ${newsize}"
+ exit 2;
+ }
+
+ echo $newsize | egrep -q '[TPEZY]B?$' && {
+ echo "Unit not implemented for ${newsize}"
+ exit 2;
+ }
+
+ case $newsize in
+ *KB)
+ newsize=$(( ${newsize/KB/} * 1000 ))
+ ;;
+ *K)
+ newsize=$(( ${newsize/K/} * 1024 ))
+ ;;
+ *MB)
+ newsize=$(( ${newsize/MB/} * 1000 * 1000 ))
+ ;;
+ *M)
+ newsize=$(( ${newsize/M/} * 1024 * 1024 ))
+ ;;
+ *GB)
+ newsize=$(( ${newsize/GB/} * 1000 * 1000 * 1000 ))
+ ;;
+ *G)
+ newsize=$(( ${newsize/G/} * 1024 * 1024 * 1024 ))
+ ;;
+ esac
+
+ fi
+
+ if [ "x$fileref" != "x" ] ; then
+ if [ ! -f $fileref ] ; then
+ echo "File does not exists: ${fileref}"
+ exit 2;
+ fi
+ newsize=`ls -l ${fileref}|awk '{print $5}'`
+ fi
+
+ if [ $# -eq 0 ]; then
+ echo 'Usage: truncate [-co](-r file | -s size) file ...'
+ exit 2;
+ fi
+
+ for f in $* ; do
+ if [ "x$nocreate" = "x1" -a ! -f $f ] ; then
+ continue;
+ fi
+
+ dd bs=1 seek=$newsize if=/dev/null of=$f msgfmt=quiet
+ done
+ }
+}
+
+which md5sum > /dev/null || {
+ md5sum() {
+ for f in $* ; do
+ md5 $f | awk -F'[() ]' '{printf("%s %s\n", $6, $3)}'
+ done
+ }
+}
+
+which setfattr > /dev/null || {
+ setfattr() {
+ $PYTHON setfattr.py $@
+ }
+}
+
+which getfattr > /dev/null || {
+ getfattr() {
+ $PYTHON getfattr.py $@
+ }
+}
+
+which sha1sum > /dev/null || {
+ sha1sum() {
+ case $OSTYPE in
+ Darwin)
+ for f in $* ; do
+ openssl sha1 $f | awk -F'[() ]' '{printf("%s %s\n", $4, $2)}'
+ done
+ ;;
+ NetBSD | FreeBSD)
+ for f in $* ; do
+ sha1 $f | awk -F'[() ]' '{printf("%s %s\n", $6, $3)}'
+ done
+ ;;
+ esac
+ }
+}
+
+userdel --help 2>/dev/null | grep -q -- '--force' || {
+ userdel() {
+ if [ "x$1" = "x--force" ]; then
+ user=$2
+ else
+ user=$1
+ fi
+ eval "$( which userdel ) $user"
+ }
+}
+
+useradd --help 2>/dev/null | grep -q -- '--no-create-home' || {
+ useradd() {
+ # Just remove -M (do not create home) which is the default
+ # other options are identical
+ args=`echo $*|sed 's/-M//'`
+ eval "$( which useradd ) $args"
+ }
+}
+
+userdel --help 2>/dev/null | grep -q -- '--force' || {
+ userdel() {
+ if [ "x$1" = "x--force" ]; then
+ user=$2
+ else
+ user=$1
+ fi
+ eval "$( which userdel ) $user"
+ }
+}
+
+useradd --help 2>/dev/null | grep -q -- '--no-create-home' || {
+ useradd() {
+ # Just remove -M (do not create home) which is the default
+ # other options are identical
+ args=`echo $*|sed 's/-M//'`
+ eval "$( which useradd ) $args"
+ }
+}
+
+DBG_TEST () {
+ read -p "execute \"$*\"? " x;
+ case $x in
+ 'y')
+ _TEST "$@"
+ ;;
+ 'q')
+ exit 0
+ ;;
+ *)
+ echo "skipping"
+ ;;
+ esac
+}
+
+alias EXPECT='_EXPECT $LINENO'
+alias EXPECT_NOT='_EXPECT_NOT $LINENO'
+if [ -n "$GF_INTERACTIVE" ]; then
+ alias TEST='DBG_TEST $LINENO'
+else
+ alias TEST='_TEST $LINENO'
+fi
+alias EXPECT_WITHIN='_EXPECT_WITHIN $LINENO'
+alias EXPECT_KEYWORD='_EXPECT_KEYWORD $LINENO'
+alias TEST_IN_LOOP='_TEST_IN_LOOP $LINENO'
+alias EXPECT_WITHIN_TEST_IN_LOOP='_EXPECT_WITHIN_TEST_IN_LOOP $LINENO'
+shopt -s expand_aliases
+
+if [ x"$OSTYPE" = x"Linux" ]; then
+ alias dd="dd status=none"
+elif [ x"$OSTYPE" = x"NetBSD" ]; then
+ alias dd="dd msgfmt=quiet"
+fi
+# MacOS doesn't seem to support either option. Doing nothing at all is
+# probably the safest option there and on anything we don't recognize, but
+# if you want to reduce the noise level and know the correct option for
+# your favorite platform please feel free to add it here.
+
+function SETUP_LOOP ()
+{
+ if [ $# != 1 ] ; then
+ echo "SETUP_LOOP usage" >&2
+ return 1;
+ fi
+
+ backend=$1
+
+ case ${OSTYPE} in
+ Linux)
+ losetup --find --show ${backend}
+ ;;
+ NetBSD)
+ vnd=`vnconfig -l|awk -F: '/not in use/{print $1; exit}'`
+ if [ "x${vnd}" = "x" ] ; then
+ echo "no more vnd" >&2
+ return 1;
+ fi
+ vnconfig ${vnd} ${backend}
+ echo ${vnd}
+ ;;
+ *)
+ echo "Please define SETUP_LOOP for ${OSTYPE} in include.rc" >&2
+ return 1;
+ ;;
+ esac
+}
+
+function MKFS_LOOP ()
+{
+ args=`getopt i: $*`
+ if [ $? -ne 0 ] ; then
+ echo "MKFS_LOOP usage" >&2
+ return 1;
+ fi
+ set -- ${args}
+
+ isize=""
+ while test $# -gt 0; do
+ case "$1" in
+ -i) isize=$2; shift ;;
+ --) shift; break ;;
+ esac
+ shift
+ done
+
+ dev=$1
+
+ case ${OSTYPE} in
+ Linux)
+ test "x${isize}" != "x" && isize="-i size=${isize}"
+ mkfs.xfs -f ${isize} ${dev}
+ ;;
+ NetBSD)
+ test "x${isize}" != "x" && isize="-i ${isize}"
+
+ echo ${dev} | grep -q '^vnd'
+ if [ $? -ne 0 ] ; then
+ vnd=`vnconfig -l|awk -F: '/not in use/{print $1; exit}'`
+ if [ "x${vnd}" = "x" ] ; then
+ echo "no more vnd" >&2
+ return 1;
+ fi
+ vnconfig ${vnd} ${dev}
+ else
+ vnd=${dev}
+ fi
+ newfs ${isize} /dev/r${vnd}a
+ ;;
+ *)
+ echo "Please define MKFS_LOOP for ${OSTYPE} in include.rc" >&2
+ return 1;
+ ;;
+ esac
+}
+
+# usage: log_newer timestamp "string"
+# search in glusterfs logs for "string" logged after timestamp seconds
+# since the Epoch (usually obtained by date +%s)
+log_newer()
+{
+ ts=$1
+ msg=$2
+ logdir=`$CLI --print-logdir`
+
+ local x_ifs=$IFS
+ IFS="["
+ for date in `grep -hr "$msg" $logdir | grep -v "G_LOG" | awk -F '[\]]' '{print $1}'` ; do
+ if [ `date -d "$date" +%s` -gt $ts ] ; then
+ IFS=$x_ifs
+ return 0;
+ fi
+ done 2>/dev/null
+ IFS=$x_ifs
+ return 1
+}
+
+function MOUNT_LOOP ()
+{
+ if [ $# != 2 ] ; then
+ echo "MOUNT_LOOP usage" >&2
+ return 1;
+ fi
+
+ dev=$1
+ target=$2
+
+ case ${OSTYPE} in
+ Linux)
+ echo ${dev} | grep -q '^/dev/loop'
+ if [ $? -eq 0 ] ; then
+ mount -t xfs ${dev} ${target}
+ else
+ mount -o loop ${dev} ${target}
+ fi
+ ;;
+ NetBSD)
+ echo ${dev} | grep -q '^vnd'
+ if [ $? -ne 0 ] ; then
+ ino=`/usr/bin/stat -f %i ${dev}`
+ dev=`vnconfig -l | awk -v ino=${ino} -F'[: ]*' '($5 == ino) {print $1}'`
+ fi
+
+ mount /dev/${dev}a ${target} >&2
+ if [ $? -ne 0 ] ; then
+ echo "failed to mount /dev/${dev}a on ${target}" >&2
+ return 1;
+ fi
+
+ mkdir -p ${target}/.attribute/system ${target}/.attribute/user
+ mount -u -o extattr ${target} >&2
+
+ ;;
+ *)
+ echo "Please define MOUNT_LOOP for ${OSTYPE} in include.rc" >&2
+ return 1;
+ ;;
+ esac
+}
+
+function UMOUNT_LOOP ()
+{
+ case ${OSTYPE} in
+ Linux)
+ force_umount $*
+ ;;
+ NetBSD)
+ for target in $* ; do
+ dev=`mount | awk -v target=${target} '($3 == target) {print $1}'`
+ force_umount ${target}
+ echo ${dev} | grep -q '^/dev/vnd'
+ if [ $? -eq 0 ] ; then
+ dev=`echo ${dev} | sed 's|^/dev/||; s|a$||'`
+ vnconfig -u ${dev}
+ else
+ ino=`/usr/bin/stat -f %i ${dev}`
+ dev=`vnconfig -l | awk -v ino=${ino} -F'[: ]*' '($5 == ino) {print $1}'`
+ if [ "x${dev}" != "x" ] ; then
+ vnconfig -u ${dev}
+ fi
+ fi
+ done
+ ;;
+ *)
+ echo "Please define UMOUNT_LOOP for ${OSTYPE} in include.rc" >&2
+ return 1;
+ ;;
+ esac
+}
+
+function SETUP_LOOP ()
+{
+ if [ $# != 1 ] ; then
+ echo "SETUP_LOOP usage" >&2
+ return 1;
+ fi
+
+ backend=$1
+
+ case ${OSTYPE} in
+ Linux)
+ losetup --find --show ${backend}
+ ;;
+ NetBSD)
+ vnd=`vnconfig -l|awk -F: '/not in use/{print $1; exit}'`
+ if [ "x${vnd}" = "x" ] ; then
+ echo "no more vnd" >&2
+ return 1;
+ fi
+ vnconfig ${vnd} ${backend}
+ echo ${vnd}
+ ;;
+ *)
+ echo "Please define SETUP_LOOP for ${OSTYPE} in include.rc" >&2
+ return 1;
+ ;;
+ esac
+}
+
+function MKFS_LOOP ()
+{
+ args=`getopt i: $*`
+ if [ $? -ne 0 ] ; then
+ echo "MKFS_LOOP usage" >&2
+ return 1;
+ fi
+ set -- ${args}
+
+ isize=""
+ while test $# -gt 0; do
+ case "$1" in
+ -i) isize=$2; shift ;;
+ --) shift; break ;;
+ esac
+ shift
+ done
+
+ dev=$1
+
+ case ${OSTYPE} in
+ Linux)
+ test "x${isize}" != "x" && isize="-i size=${isize}"
+ mkfs.xfs -f ${isize} ${dev}
+ ;;
+ NetBSD)
+ test "x${isize}" != "x" && isize="-i ${isize}"
+
+ echo ${dev} | grep -q '^vnd'
+ if [ $? -ne 0 ] ; then
+ vnd=`vnconfig -l|awk -F: '/not in use/{print $1; exit}'`
+ if [ "x${vnd}" = "x" ] ; then
+ echo "no more vnd" >&2
+ return 1;
+ fi
+ vnconfig ${vnd} ${dev}
+ else
+ vnd=${dev}
+ fi
+ newfs ${isize} /dev/r${vnd}a
+ ;;
+ *)
+ echo "Please define MKFS_LOOP for ${OSTYPE} in include.rc" >&2
+ return 1;
+ ;;
+ esac
+}
+
+function MOUNT_LOOP ()
+{
+ if [ $# != 2 ] ; then
+ echo "MOUNT_LOOP usage" >&2
+ return 1;
+ fi
+
+ dev=$1
+ target=$2
+
+ case ${OSTYPE} in
+ Linux)
+ echo ${dev} | grep -q '^/dev/loop'
+ if [ $? -eq 0 ] ; then
+ mount -t xfs ${dev} ${target}
+ else
+ mount -o loop ${dev} ${target}
+ fi
+ ;;
+ NetBSD)
+ echo ${dev} | grep -q '^vnd'
+ if [ $? -ne 0 ] ; then
+ ino=`/usr/bin/stat -f %i ${dev}`
+ dev=`vnconfig -l | awk -v ino=${ino} -F'[: ]*' '($5 == ino) {print $1}'`
+ fi
+
+ mount /dev/${dev}a ${target} >&2
+ if [ $? -ne 0 ] ; then
+ echo "failed to mount /dev/${dev}a on ${target}" >&2
+ return 1;
+ fi
+
+ mkdir -p ${target}/.attribute/system ${target}/.attribute/user
+ mount -u -o extattr ${target} >&2
+
+ ;;
+ *)
+ echo "Please define MOUNT_LOOP for ${OSTYPE} in include.rc" >&2
+ return 1;
+ ;;
+ esac
+}
+
+function UMOUNT_LOOP ()
+{
+ case ${OSTYPE} in
+ Linux)
+ force_umount $*
+ ;;
+ NetBSD)
+ for target in $* ; do
+ dev=`mount | awk -v target=${target} '($3 == target) {print $1}'`
+ force_umount ${target}
+ echo ${dev} | grep -q '^/dev/vnd'
+ if [ $? -eq 0 ] ; then
+ dev=`echo ${dev} | sed 's|^/dev/||; s|a$||'`
+ vnconfig -u ${dev}
+ else
+ ino=`/usr/bin/stat -f %i ${dev}`
+ dev=`vnconfig -l | awk -v ino=${ino} -F'[: ]*' '($5 == ino) {print $1}'`
+ if [ "x${dev}" != "x" ] ; then
+ vnconfig -u ${dev}
+ fi
+ fi
+ done
+ ;;
+ *)
+ echo "Please define UMOUNT_LOOP for ${OSTYPE} in include.rc" >&2
+ return 1;
+ ;;
+ esac
+}
+
+function STAT()
+{
+ stat $1
+ echo $?
+}
+
+function STAT_INO()
+{
+ local ino=$(stat -c '%i' $1)
+ if [ $? -eq 0 ]; then
+ echo $ino
+ else
+ echo 0
+ fi
+}
+
+function get_md5_sum()
+{
+ local file=$1;
+ md5_sum=$(md5sum $file | awk '{print $1}');
+ echo $md5_sum
+}
diff --git a/tests/line-coverage/afr-heal-info.t b/tests/line-coverage/afr-heal-info.t
new file mode 100644
index 00000000000..182665917c4
--- /dev/null
+++ b/tests/line-coverage/afr-heal-info.t
@@ -0,0 +1,43 @@
+#!/bin/bash
+#Test that parallel heal-info command execution doesn't result in spurious
+#entries with locking-scheme granular
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+
+function write_and_del_file {
+ dd of=$M0/a.txt if=/dev/zero bs=1024k count=100
+ rm -f $M0/b.txt
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1}
+TEST $CLI volume set $V0 locking-scheme granular
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+TEST touch $M0/a.txt $M0/b.txt
+write_and_del_file &
+touch $B0/f1 $B0/f2
+
+# All above is similar to basic/afr/heal-info.t
+
+TEST $CLI volume heal $V0 enable
+TEST $CLI volume heal $V0 info --xml
+TEST $CLI volume heal $V0 info summary
+TEST $CLI volume heal $V0 info summary --xml
+TEST $CLI volume heal $V0 info split-brain
+TEST $CLI volume heal $V0 info split-brain --xml
+
+TEST $CLI volume heal $V0 statistics heal-count
+
+# It may fail as the file is not in splitbrain
+$CLI volume heal $V0 split-brain latest-mtime /a.txt
+
+TEST $CLI volume heal $V0 disable
+
+TEST $CLI volume stop $V0
+cleanup;
diff --git a/tests/line-coverage/arbiter-coverage.t b/tests/line-coverage/arbiter-coverage.t
new file mode 100755
index 00000000000..82b470141b5
--- /dev/null
+++ b/tests/line-coverage/arbiter-coverage.t
@@ -0,0 +1,32 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 arbiter 1 $H0:$B0/${V0}{1,2,3,4,5,6};
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+
+TEST $GFS -s $H0 --volfile-id $V0 $M1;
+
+cp $(dirname ${0})/../basic/gfapi/glfsxmp-coverage.c glfsxmp.c
+build_tester ./glfsxmp.c -lgfapi
+$(dirname $0)/../basic/rpc-coverage.sh $M1 >/dev/null
+./glfsxmp $V0 $H0 >/dev/null
+
+TEST cleanup_tester ./glfsxmp
+TEST rm ./glfsxmp.c
+
+## Finish up
+TEST $CLI volume stop $V0;
+
+TEST $CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/line-coverage/cli-peer-and-volume-operations.t b/tests/line-coverage/cli-peer-and-volume-operations.t
new file mode 100644
index 00000000000..0cf8dbe81f9
--- /dev/null
+++ b/tests/line-coverage/cli-peer-and-volume-operations.t
@@ -0,0 +1,135 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../cluster.rc
+. $(dirname $0)/../volume.rc
+
+function peer_count {
+eval \$CLI_$1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
+}
+
+cleanup
+
+TEST launch_cluster 3
+
+TEST $CLI_1 system uuid reset
+
+## basic peer commands
+TEST $CLI_1 peer probe $H2
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count 1
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count 2
+
+#probe a unreachable node
+TEST kill_glusterd 3
+TEST ! $CLI_1 peer probe $H3
+
+#detach a node which is not a part of cluster
+TEST ! $CLI_1 peer detach $H3
+TEST ! $CLI_1 peer detach $H3 force
+
+TEST start_glusterd 3
+TEST $CLI_1 peer probe $H3
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count 1
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count 2
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count 3
+
+# probe a node which is already part of cluster
+TEST $CLI_1 peer probe $H3
+
+#probe an invalid address
+TEST ! $CLI_1 peer probe 1024.1024.1024.1024
+
+TEST $CLI_1 pool list
+
+TEST $CLI_1 --help
+TEST $CLI_1 --version
+TEST $CLI_1 --print-logdir
+TEST $CLI_1 --print-statedumpdir
+
+# try unrecognised command
+TEST ! $CLI_1 volume
+TEST pidof glusterd
+
+## all help commands
+TEST $CLI_1 global help
+TEST $CLI_1 help
+
+TEST $CLI_1 peer help
+TEST $CLI_1 volume help
+TEST $CLI_1 volume bitrot help
+TEST $CLI_1 volume quota help
+TEST $CLI_1 snapshot help
+
+## volume operations
+TEST $CLI_1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0 $H3:$B3/$V0
+# create a volume with already existing volume name
+TEST ! $CLI_1 volume create $V0 $H1:$B1/$V1 $H2:$B2/$V1
+TEST $CLI_1 volume start $V0
+EXPECT 'Started' cluster_volinfo_field 1 $V0 'Status';
+
+# Mount the volume and create files
+TEST glusterfs -s $H1 --volfile-id $V0 $M1
+TEST touch $M1/file{1..100}
+
+#fails because $V0 is not shd compatible
+TEST ! $CLI_1 volume status $V0 shd
+
+#test explicitly provided options
+TEST $CLI_1 --timeout=120 --log-level=INFO volume status
+
+#changing timezone to a different one, to check localtime logging feature
+TEST export TZ='Asia/Kolkata'
+TEST restart_glusterd 1
+
+#localtime logging enable
+TEST $CLI_1 volume set all cluster.localtime-logging enable
+EXPECT '1' logging_time_check $LOGDIR
+
+#localtime logging disable
+TEST $CLI_1 volume set all cluster.localtime-logging disable
+EXPECT '0' logging_time_check $LOGDIR
+
+#changing timezone back to original timezone
+TEST export TZ='UTC'
+
+#negative tests for volume options
+#'set' option to enable quota/inode-quota is now depreciated
+TEST ! $CLI_1 volume set $V0 quota enable
+TEST ! $CLI_1 volume set $V0 inode-quota enable
+
+#invalid transport type 'rcp'
+TEST ! $CLI_1 volume set $V0 config.transport rcp
+
+#'op-version' option is not valid for a single volume
+TEST ! $CLI_1 volume set $V0 cluster.op-version 72000
+
+#'op-version' option can't be used with any other option
+TEST ! $CLI_1 volume set all cluster.localtime-logging disable cluster.op-version 72000
+
+#invalid format of 'op-version'
+TEST ! $CLI_1 volume set all cluster.op-version 72-000
+
+#provided 'op-version' value is greater than max allowed op-version
+op_version=$($CLI_1 volume get all cluster.max-op-version | awk 'NR==3 {print$2}')
+op_version=$((op_version+1000)) #this can be any number greater than 0
+TEST ! $CLI_1 volume set all cluster.op-version $op_version
+
+#provided 'op-verison' value cannot be less than the current cluster op-version value
+TEST ! $CLI_1 volume set all cluster.op-version 00000
+
+# system commnds
+TEST $CLI_1 system help
+TEST $CLI_1 system uuid get
+TEST $CLI_1 system getspec $V0
+TEST $CLI_1 system getwd
+TEST $CLI_1 system fsm log
+
+# Both these may fail, but it covers xdr functions and some
+# more code in cli/glusterd
+$CLI_1 system:: mount test local:/$V0
+$CLI_1 system:: umount $M0 lazy
+$CLI_1 system:: copy file options
+$CLI_1 system:: portmap brick2port $H0:$B0/brick
+$CLI_1 system:: uuid reset
+
+cleanup
diff --git a/tests/line-coverage/cli-volume-top-profile-coverage.t b/tests/line-coverage/cli-volume-top-profile-coverage.t
new file mode 100644
index 00000000000..35713c26faa
--- /dev/null
+++ b/tests/line-coverage/cli-volume-top-profile-coverage.t
@@ -0,0 +1,62 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../cluster.rc
+. $(dirname $0)/../volume.rc
+
+cleanup
+
+# Creating cluster
+TEST launch_cluster 3
+
+# Probing peers
+TEST $CLI_1 peer probe $H2
+TEST $CLI_1 peer probe $H3
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count 3
+
+# Creating a volume and starting it.
+TEST $CLI_1 volume create $V0 replica 3 $H1:$B1/$V0 $H2:$B2/$V0 $H3:$B3/$V0
+TEST $CLI_1 volume start $V0
+EXPECT 'Started' cluster_volinfo_field 1 $V0 'Status';
+
+TEST glusterfs -s $H1 --volfile-id $V0 $M1
+TEST touch $M1/file{1..100}
+
+# Testing volume top command with and without xml output
+function test_volume_top_cmds () {
+ local ret=0
+ declare -a top_cmds=("read" "open" "write" "opendir" "readdir")
+ for cmd in ${top_cmds[@]}; do
+ $CLI_1 volume top $V0 $cmd
+ (( ret += $? ))
+ $CLI_1 volume top $V0 clear
+ (( ret += $? ))
+ $CLI_1 volume top $V0 $cmd --xml
+ (( ret += $? ))
+ $CLI_1 volume top $V0 $cmd brick $H1:$B1/$V0
+ (( ret += $? ))
+ $CLI_1 volume top $V0 clear brick $H1:$B1/$V0
+ (( ret += $? ))
+ $CLI_1 volume top $V0 $cmd brick $H1:$B1/$V0 --xml
+ (( ret += $? ))
+ done
+ return $ret
+}
+
+# Testing volume profile command with and without xml
+function test_volume_profile_cmds () {
+ local ret=0
+ declare -a profile_cmds=("start" "info" "info peek" "info cumulative" "info clear" "info incremental peek" "stop")
+ for cmd in "${profile_cmds[@]}"; do
+ $CLI_1 volume profile $V0 $cmd
+ (( ret += $? ))
+ $CLI_1 volume profile $V0 $cmd --xml
+ (( ret += $? ))
+ done
+ return $ret
+}
+
+TEST test_volume_top_cmds;
+TEST test_volume_profile_cmds;
+
+cleanup
diff --git a/tests/line-coverage/errorgen-coverage.t b/tests/line-coverage/errorgen-coverage.t
new file mode 100755
index 00000000000..f4622428d79
--- /dev/null
+++ b/tests/line-coverage/errorgen-coverage.t
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+# Because I have added 10 iterations of rpc-coverage and glfsxmp for errorgen
+SCRIPT_TIMEOUT=600
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6};
+
+TEST $CLI volume set $V0 error-gen posix;
+TEST $CLI volume set $V0 debug.error-failure 3%;
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+
+TEST $GFS -s $H0 --volfile-id $V0 $M1;
+
+cp $(dirname ${0})/../basic/gfapi/glfsxmp-coverage.c glfsxmp.c
+build_tester ./glfsxmp.c -lgfapi
+for i in $(seq 1 10); do
+ # as there is error-gen, there can be errors, so no
+ # need to test for success of below two commands
+ $(dirname $0)/../basic/rpc-coverage.sh $M1 >/dev/null
+ ./glfsxmp $V0 $H0 >/dev/null
+done
+
+TEST cleanup_tester ./glfsxmp
+TEST rm ./glfsxmp.c
+
+## Finish up
+TEST $CLI volume stop $V0;
+
+TEST $CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/line-coverage/log-and-brick-ops-negative-case.t b/tests/line-coverage/log-and-brick-ops-negative-case.t
new file mode 100644
index 00000000000..d86cb452282
--- /dev/null
+++ b/tests/line-coverage/log-and-brick-ops-negative-case.t
@@ -0,0 +1,82 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup
+TEST glusterd
+TEST pidof glusterd
+
+#create volumes
+TEST $CLI volume create ${V0}_1 $H0:$B0/v{1..2}
+
+TEST $CLI volume create ${V0}_2 replica 3 arbiter 1 $H0:$B0/v{3..5}
+
+TEST $CLI volume create ${V0}_3 disperse 3 redundancy 1 $H0:$B0/v{6..8}
+TEST $CLI volume start ${V0}_3
+EXPECT 'Started' volinfo_field ${V0}_3 'Status'
+
+TEST $CLI volume create ${V0}_4 replica 3 $H0:$B0/v{9..14}
+TEST $CLI volume start ${V0}_4
+EXPECT 'Started' volinfo_field ${V0}_4 'Status'
+
+#log rotate option
+#provided volume does not exist
+TEST ! $CLI volume log ${V0}_5 rotate
+
+#volume must be started before using log rotate option
+TEST ! $CLI volume log ${V0}_1 rotate
+TEST $CLI volume start ${V0}_1
+EXPECT 'Started' volinfo_field ${V0}_1 'Status'
+
+#incorrect brick provided for the volume
+TEST ! $CLI volume log ${V0}_1 rotate $H0:$B0/v15
+
+#add-brick operations
+#volume must be in started to state to increase replica count
+TEST ! $CLI volume add-brick ${V0}_2 replica 4 $H0:$B0/v15
+TEST $CLI volume start ${V0}_2
+EXPECT 'Started' volinfo_field ${V0}_2 'Status'
+
+#incorrect number of bricks for a replica 4 volume
+TEST ! $CLI volume add-brick ${V0}_1 replica 4 $H0:$B0/v15
+
+#replica count provided is less than the current replica count
+TEST ! $CLI volume add-brick ${V0}_2 replica 2 $H0:$B0/v15
+
+#dispersed to replicated dispersed not possible
+TEST ! $CLI volume add-brick ${V0}_3 replica 2 $H0:$B0/v15
+
+#remove-brick operations
+#replica count option provided for dispersed vol
+TEST ! $CLI volume remove-brick ${V0}_3 replica 2 $H0:$B0/v8 start
+
+#given replica count is greater than the current replica count
+TEST ! $CLI volume remove-brick ${V0}_2 replica 4 $H0:$B0/v5 start
+
+#number of bricks to be removed, must be a multiple of replica count
+TEST ! $CLI volume remove-brick ${V0}_2 replica 3 $H0:$B0/v{3..4} start
+
+#less number of bricks given to reduce the replica count
+TEST ! $CLI volume remove-brick ${V0}_2 replica 1 $H0:$B0/v3 start
+
+#bricks should be from different subvol
+TEST ! $CLI volume remove-brick ${V0}_4 replica 2 $H0:$B0/v{13..14} start
+
+#arbiter must be removed to reduce replica count
+TEST ! $CLI volume remove-brick ${V0}_2 replica 1 $H0:$B0/v{3..4} start
+
+#removal of bricks is not allowed without reducing the replica count explicitly
+TEST ! $CLI volume remove-brick ${V0}_2 replica 3 $H0:$B0/v{3..5} start
+
+#incorrect brick for given vol
+TEST ! $CLI volume remove-brick ${V0}_1 $H0:$B0/v15 start
+
+#removing all the bricks are not allowed
+TEST ! $CLI volume remove-brick ${V0}_1 $H0:$B0/v{1..2} start
+
+#volume must not be stopped state while removing bricks
+TEST $CLI volume stop ${V0}_1
+TEST ! $CLI volume remove-brick ${V0}_1 $H0:$B0/v1 start
+
+cleanup \ No newline at end of file
diff --git a/tests/line-coverage/meta-max-coverage.t b/tests/line-coverage/meta-max-coverage.t
new file mode 100755
index 00000000000..1cc07610aa7
--- /dev/null
+++ b/tests/line-coverage/meta-max-coverage.t
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}
+TEST $CLI volume start $V0;
+
+## Mount FUSE
+TEST $GFS -s $H0 --volfile-id $V0 $M1
+
+TEST stat $M1/.meta/
+
+# expect failures in rpc-coverage.sh execution.
+res=$($(dirname $0)/../basic/rpc-coverage.sh $M1/.meta)
+
+
+# Expect errors here, hence no need to 'check for success'
+for file in $(find $M1/.meta type f -print); do
+ cat $file >/dev/null
+ echo 1>$file
+ echo hello>$file
+done
+
+TEST umount $M1
+
+cleanup;
diff --git a/tests/line-coverage/namespace-linecoverage.t b/tests/line-coverage/namespace-linecoverage.t
new file mode 100644
index 00000000000..8de6a0f279b
--- /dev/null
+++ b/tests/line-coverage/namespace-linecoverage.t
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+cleanup;
+
+TEST glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2,3,4,5,6,7,8}
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 cluster.read-subvolume-index 0
+TEST $CLI volume set $V0 features.tag-namespaces on
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 storage.build-pgfid on
+
+sleep 2
+
+## Mount FUSE
+TEST $GFS -s $H0 --volfile-id $V0 $M1;
+
+
+mkdir -p $M1/namespace
+
+# subvol_1 = bar, subvol_2 = foo, subvol_3 = hey
+# Test create, write (tagged by loc, fd respectively).
+touch $M1/namespace/{bar,foo,hey}
+
+open $M1/namespace/hey
+
+## TODO: best way to increase coverage is to have a gfapi program
+## which covers maximum fops
+TEST $(dirname $0)/../basic/rpc-coverage.sh $M1
+
+TEST cp $(dirname ${0})/../basic/gfapi/glfsxmp-coverage.c glfsxmp.c
+TEST build_tester ./glfsxmp.c -lgfapi
+TEST ./glfsxmp $V0 $H0
+TEST cleanup_tester ./glfsxmp
+TEST rm ./glfsxmp.c
+
+cleanup;
diff --git a/tests/line-coverage/old-protocol.t b/tests/line-coverage/old-protocol.t
new file mode 100755
index 00000000000..5676e5636db
--- /dev/null
+++ b/tests/line-coverage/old-protocol.t
@@ -0,0 +1,37 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6};
+
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT '6' brick_count $V0
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+file="/var/lib/glusterd/vols/$V0/trusted-$V0.tcp-fuse.vol"
+sed -i -e 's$send-gids true$send-gids true\n option testing.old-protocol true$g' $file
+
+## Mount FUSE
+TEST $GFS -s $H0 --volfile-id $V0 $M1;
+
+## TODO: best way to increase coverage is to have a gfapi program
+## which covers maximum fops
+TEST $(dirname $0)/../basic/rpc-coverage.sh $M1
+
+TEST cp $(dirname ${0})/../basic/gfapi/glfsxmp-coverage.c glfsxmp.c
+TEST build_tester ./glfsxmp.c -lgfapi
+TEST ./glfsxmp $V0 $H0
+TEST cleanup_tester ./glfsxmp
+TEST rm ./glfsxmp.c
+
+cleanup;
diff --git a/tests/line-coverage/quiesce-coverage.t b/tests/line-coverage/quiesce-coverage.t
new file mode 100755
index 00000000000..ca29343451e
--- /dev/null
+++ b/tests/line-coverage/quiesce-coverage.t
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6};
+
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT '6' brick_count $V0
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+file="/var/lib/glusterd/vols/$V0/trusted-$V0.tcp-fuse.vol"
+
+cat >> ${file} <<EOF
+
+volume quiesce
+ type features/quiesce
+ subvolumes ${V0}
+end-volume
+EOF
+
+## Mount FUSE
+TEST $GFS -s $H0 --volfile-id $V0 $M1;
+
+## TODO: best way to increase coverage is to have a gfapi program
+## which covers maximum fops
+TEST $(dirname $0)/../basic/rpc-coverage.sh $M1
+
+TEST cp $(dirname ${0})/../basic/gfapi/glfsxmp-coverage.c glfsxmp.c
+TEST build_tester ./glfsxmp.c -lgfapi
+TEST ./glfsxmp $V0 $H0
+TEST cleanup_tester ./glfsxmp
+TEST rm ./glfsxmp.c
+
+cleanup;
diff --git a/tests/line-coverage/shard-coverage.t b/tests/line-coverage/shard-coverage.t
new file mode 100644
index 00000000000..1797999c146
--- /dev/null
+++ b/tests/line-coverage/shard-coverage.t
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/brick
+TEST $CLI volume set $V0 features.shard on
+EXPECT 'Created' volinfo_field $V0 'Status'
+
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+# It is good to copy the file locally and build it, so the scope remains
+# inside tests directory.
+TEST cp $(dirname ${0})/../basic/gfapi/glfsxmp-coverage.c glfsxmp.c
+TEST build_tester ./glfsxmp.c -lgfapi
+TEST ./glfsxmp $V0 $H0
+TEST cleanup_tester ./glfsxmp
+TEST rm ./glfsxmp.c
+
+TEST $GFS -s $H0 --volfile-id $V0 $M1;
+
+TEST $(dirname $0)/../basic/rpc-coverage.sh $M1
+
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup
diff --git a/tests/line-coverage/some-features-in-libglusterfs.t b/tests/line-coverage/some-features-in-libglusterfs.t
new file mode 100644
index 00000000000..5719c4e039c
--- /dev/null
+++ b/tests/line-coverage/some-features-in-libglusterfs.t
@@ -0,0 +1,67 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+function grep_string {
+ local f=$1
+ local string=$2
+ # The output of test script also shows up in log. Ignore them.
+ echo $(grep ${string} ${f} | grep -v "++++++" | wc -l)
+}
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}
+TEST $CLI volume set $V0 client-log-level TRACE
+TEST $CLI volume start $V0;
+
+log_file="$(gluster --print-logdir)/gluster.log"
+## Mount FUSE
+TEST $GFS -s $H0 --log-file $log_file --volfile-id $V0 $M1
+
+## Cover 'monitoring.c' here
+pgrep 'glusterfs' | xargs kill -USR2
+
+EXPECT_WITHIN 2 1 grep_string $log_file 'sig:USR2'
+
+## Also cover statedump
+pgrep 'glusterfs' | xargs kill -USR1
+
+EXPECT_WITHIN 2 1 grep_string $log_file 'sig:USR1'
+
+## Also cover SIGHUP
+pgrep 'glusterfs' | xargs kill -HUP
+
+EXPECT_WITHIN 2 1 grep_string $log_file 'sig:HUP'
+
+## Also cover SIGTERM
+pgrep 'glusterfs' | xargs kill -TERM
+
+EXPECT_WITHIN 2 1 grep_string $log_file 'cleanup_and_exit'
+
+# Previous call should make umount of the process.
+# force_umount $M1
+
+# TODO: below section is commented out, mainly as our regression treats the test
+# as failure because sending ABRT signal will cause the process to dump core.
+# Our regression treats the test as failure, if there is a core.
+# FIXME: figure out a way to run this test, because this part of the code gets
+# executed only when there is coredump, and it is critical for debugging, to
+# keep it working always.
+
+# # Restart client
+# TEST $GFS -s $H0 --log-file $log_file --volfile-id $V0 $M1
+#
+# ## Also cover SIGABRT
+# pgrep 'glusterfs ' | xargs kill -ABRT
+#
+# TEST [ 1 -eq $(grep 'pending frames' $log_file | wc -l) ]
+
+TEST rm $log_file
+
+cleanup;
diff --git a/tests/line-coverage/volfile-with-all-graph-syntax.t b/tests/line-coverage/volfile-with-all-graph-syntax.t
new file mode 100644
index 00000000000..b137432cceb
--- /dev/null
+++ b/tests/line-coverage/volfile-with-all-graph-syntax.t
@@ -0,0 +1,73 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST mkdir -p $B0/test
+cat > $B0/test.vol <<EOF
+volume test
+ type storage/posix
+ option directory $B0/test
+ option multiple-line-string "I am
+ testing a feature of volfile graph.l"
+ option single-line-string "this is running on $H0"
+ option option-with-back-tick `date +%Y%M%d`
+end-volume
+EOF
+
+# This should succeed, but it will have some unknown options, which is OK.
+TEST glusterfs -f $B0/test.vol $M0;
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0;
+
+# This should not succeed
+cat > $B0/test.vol <<EOF
+volume test
+ type storage/posix
+EOF
+TEST ! glusterfs -f $B0/test.vol $M0;
+
+
+# This should not succeed
+cat > $B0/test.vol <<EOF
+ type storage/posix
+end-volume
+EOF
+TEST ! glusterfs -f $B0/test.vol $M0;
+
+# This should not succeed
+cat > $B0/test.vol <<EOF
+volume test
+end-volume
+EOF
+TEST ! glusterfs -f $B0/test.vol $M0;
+
+# This should not succeed
+cat > $B0/test.vol <<EOF
+volume test
+ option test and test
+end-volume
+EOF
+TEST ! glusterfs -f $B0/test.vol $M0;
+
+# This should not succeed
+cat > $B0/test.vol <<EOF
+volume test
+ subvolumes
+end-volume
+EOF
+TEST ! glusterfs -f $B0/test.vol $M0;
+
+# This should not succeed
+cat > $B0/test.vol <<EOF
+volume test
+ type storage/posix
+ new-option key value
+ option directory $B0/test
+end-volume
+EOF
+TEST ! glusterfs -f $B0/test.vol $M0;
+
+cleanup;
diff --git a/tests/nfs.rc b/tests/nfs.rc
new file mode 100644
index 00000000000..2140f311c33
--- /dev/null
+++ b/tests/nfs.rc
@@ -0,0 +1,70 @@
+#!/bin/bash
+
+# Due to portmap registration NFS takes some time to
+# export all volumes. Therefore tests should start only
+# after exports are visible by showmount command. This
+# routine will check if showmount shows the exports or not
+#
+function is_nfs_export_available ()
+{
+ local vol=$1
+
+ if [ "$vol" == "" ]; then
+ vol=$V0
+ fi
+
+ exp=$(showmount -e localhost 2> /dev/null | grep $vol | wc -l)
+ echo "$exp"
+}
+
+function mount_nfs ()
+{
+ local e=$1
+ local m=$2
+ local opt=$3
+ if [ ! -z "$opt" ]; then opt=",$opt"; fi
+ opt="soft,intr,vers=3$opt"
+
+ nopt=""
+ for o in ${opt//,/ }; do
+ case $OSTYPE in
+ NetBSD)
+ test "x${nopt}" = "x" && nopt="tcp,-R=2,"
+
+ case $o in
+ nolock|noac|actimeo=*|mountproto=udp)
+ continue
+ ;;
+ proto=tcp)
+ o="tcp"
+ ;;
+ vers=3)
+ o="nfsv3"
+ ;;
+ retry=*)
+ o=${o/retry=/-R}
+ ;;
+ timeo=*)
+ o=${o/timeo=/-t}
+ ;;
+ retrans=*)
+ o=${o/retrans=/-x}
+ ;;
+ *)
+ ;;
+ esac
+ ;;
+ *)
+ ;;
+ esac
+ if [ ! -z "$nopt" ]; then nopt="${nopt},"; fi
+ nopt="${nopt}$o"
+ done
+
+ mount -t nfs -o $nopt $e $m
+}
+
+function umount_nfs {
+ ${UMOUNT_F} $1
+ if [ $? -eq 0 ]; then echo "Y"; else echo "N"; fi
+}
diff --git a/tests/performance/open-behind.t b/tests/performance/open-behind.t
new file mode 100755
index 00000000000..002a98a8352
--- /dev/null
+++ b/tests/performance/open-behind.t
@@ -0,0 +1,75 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info 2>/dev/null;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+TEST $CLI volume set $V0 performance.flush-behind off;
+
+TEST $CLI volume start $V0;
+
+## Mount FUSE
+TEST $GFS -s $H0 --volfile-id $V0 $M0;
+
+TEST $GFS -s $H0 --volfile-id $V0 $M1;
+
+D0="hello-this-is-a-test-message0";
+F0="test-file0";
+
+function write_to()
+{
+ local file="$1";
+ local data="$2";
+
+ echo "$data" > "$file";
+}
+
+
+TEST write_to "$M0/$F0" "$D0";
+EXPECT "$D0" cat $M1/$F0;
+
+# open-behind delays open and uses anonymous fds for fops like
+# fstat and readv. So after creating the file, if volume is restarted
+# then later when the file is read, because of the use of anonymous fds
+# volume top open will show number of files opened as 0.
+TEST $CLI volume stop $V0;
+sleep 1;
+TEST $CLI volume start $V0;
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" client_connected_status_meta $M0 ${V0}-client-0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" client_connected_status_meta $M0 ${V0}-client-1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" client_connected_status_meta $M1 ${V0}-client-0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" client_connected_status_meta $M1 ${V0}-client-1
+
+cat $M1/$F0 >/dev/null;
+
+string=$(gluster volume top $V0 open | grep -w "$F0");
+
+EXPECT "" echo $string;
+
+TEST $CLI volume set $V0 performance.open-behind off;
+
+EXPECT_WITHIN $GRAPH_SWITCH_TIMEOUT "2" num_graphs $M0;
+EXPECT_WITHIN $GRAPH_SWITCH_TIMEOUT "2" num_graphs $M1;
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" client_connected_status_meta $M0 ${V0}-client-0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" client_connected_status_meta $M0 ${V0}-client-1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" client_connected_status_meta $M1 ${V0}-client-0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" client_connected_status_meta $M1 ${V0}-client-1
+
+D1="hello-this-is-a-test-message1";
+F1="test-file1";
+
+TEST write_to "$M0/$F1" "$D1";
+EXPECT "$D1" cat $M1/$F1;
+
+EXPECT "$D0" cat $M1/$F0;
+
+$CLI volume top $V0 open | grep -w "$F0" >/dev/null 2>&1
+TEST [ $? -eq 0 ];
+
+cleanup;
diff --git a/tests/performance/quick-read.t b/tests/performance/quick-read.t
new file mode 100644
index 00000000000..26ec175df0d
--- /dev/null
+++ b/tests/performance/quick-read.t
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+
+TEST $CLI volume start $V0;
+
+## Mount FUSE
+TEST $GFS -s $H0 --volfile-id $V0 $M0;
+
+TEST $GFS -s $H0 --volfile-id $V0 $M1;
+
+D0="hello-this-is-a-test-message0";
+F0="test-file0";
+
+function write_to()
+{
+ local file="$1";
+ local data="$2";
+
+ echo "$data" > "$file";
+}
+
+
+TEST write_to "$M0/$F0" "$D0";
+EXPECT "$D0" cat $M1/$F0;
+EXPECT "$D0" cat $M0/$F0;
+EXPECT "$D0" cat $M1/$F0;
+EXPECT "$D0" cat $M0/$F0;
+
+sleep 1;
+
+EXPECT "$D0" cat $M1/$F0;
+EXPECT "$D0" cat $M0/$F0;
+EXPECT "$D0" cat $M1/$F0;
+EXPECT "$D0" cat $M0/$F0;
+
+TEST $CLI volume set $V0 performance.quick-read off;
+
+D1="hello-this-is-a-test-message1";
+F1="test-file1";
+
+TEST write_to "$M0/$F1" "$D1";
+EXPECT "$D1" cat $M0/$F1;
+
+EXPECT "$D0" cat $M1/$F0;
+
+cleanup;
diff --git a/tests/snapshot.rc b/tests/snapshot.rc
new file mode 100644
index 00000000000..f6da514f826
--- /dev/null
+++ b/tests/snapshot.rc
@@ -0,0 +1,484 @@
+#!/bin/bash
+
+LVM_DEFINED=0
+LVM_PREFIX="patchy_snap"
+LVM_COUNT=0
+VHD_SIZE="300M"
+
+#This function will init B# bricks
+#This is used when launch_cluster is
+#not called to init B#. Call it before
+#setup_lvm
+function init_n_bricks() {
+ local count=$1
+ for i in `seq 1 $count`; do
+ eval "B$i=/d/backends/$i"
+ done
+}
+
+count_snaps () {
+ ls $1/.snaps | wc -l
+}
+
+function init_lvm() {
+ if [ "$1" == "" ]; then
+ echo "Error: Invalid argument supplied"
+ return 1
+ fi
+ LVM_COUNT=$1
+
+ if [ "$2" != "" ]; then
+ VHD_SIZE=$2
+ fi
+
+ local b
+ local i
+
+ if [ "$B1" = "" ]; then
+ B1=$B0
+ fi
+
+ for i in `seq 1 $LVM_COUNT`; do
+ b="B$i"
+ if [ "${!b}" = "" ]; then
+ echo "Error: $b not defined."
+ echo "Please run launch_cluster with atleast $LVM_COUNT nodes"
+ return 1
+ fi
+
+ eval "L$i=${!b}/${LVM_PREFIX}_mnt"
+ l="L$i"
+ mkdir -p ${!l}
+ if [ $? -ne 0 ]; then
+ echo "Error: failed to create dir ${!l}"
+ return 1
+ fi
+
+ eval "VG$i=${LVM_PREFIX}_vg_${i}"
+ done
+
+ LVM_DEFINED=1
+ return 0
+}
+
+function verify_lvm_version() {
+ if `/sbin/lvcreate --help | grep -q thin`; then
+ return 0;
+ fi
+ return 1;
+}
+
+function setup_lvm() {
+ init_lvm $@ || return 1
+ _setup_lvm
+ return 0
+}
+
+function cleanup_lvm() {
+ pkill gluster
+ sleep 2
+
+ if [ "$LVM_DEFINED" = "1" ]; then
+ _cleanup_lvm >/dev/null 2>&1
+ fi
+
+ _cleanup_lvm_again >/dev/null 2>&1
+ # TODO Delete cleanup has open bug
+ # once fixed delete this
+ mount | grep "run/gluster/snaps" | awk '{print $3}' | xargs umount 2> /dev/null
+ mount | grep "patchy_snap" | awk '{print $3}' | xargs umount 2> /dev/null
+ \rm -rf /var/run/gluster/snaps/*
+ lvscan | grep "/dev/patchy_snap" | awk '{print $2}'| xargs lvremove -f 2> /dev/null
+ vgs | grep patchy_snap | awk '{print $1}' | xargs vgremove -f 2>/dev/null
+ \rm -rf /dev/patchy*
+ return 0
+}
+
+# Find out how this file was sourced, source traps.rc the same way, and use
+# push_trapfunc to make sure cleanup_lvm gets called before we exit.
+. $(dirname ${BASH_SOURCE[0]})/traps.rc
+push_trapfunc cleanup_lvm
+
+########################################################
+# Private Functions
+########################################################
+function _setup_lvm() {
+ local count=$LVM_COUNT
+ local b
+ local i
+
+ for i in `seq 1 $count`; do
+ b="B$i"
+
+ _create_vhd ${!b} $i
+ _create_lv ${!b} $i
+ _mount_lv $i
+ done
+}
+
+function _cleanup_lvm() {
+ local count=$LVM_COUNT
+ local b
+ local i
+
+ for i in `seq 1 $count`; do
+ b="B$i"
+ _umount_lv $i
+ _remove_lv $i
+ _remove_vhd ${!b}
+ done
+}
+
+function _cleanup_lvm_again() {
+ local file
+
+ mount | grep $LVM_PREFIX | awk '{print $3}' | xargs -r ${UMOUNT_F}
+
+ /sbin/vgs | grep $LVM_PREFIX | awk '{print $1}' | xargs -r vgremove -f
+
+ find $B0 -name "${LVM_PREFIX}_loop" | xargs -r losetup -d
+
+ find $B0 -name "${LVM_PREFIX}*" | xargs -r rm -rf
+
+ find /run/gluster/snaps -name "*${LVM_PREFIX}*" | xargs -r rm -rf
+
+ for file in `ls /run/gluster/snaps`; do
+ find /run/gluster/snaps/$file -mmin -2 | xargs -r rm -rf
+ done
+}
+
+########################################################
+########################################################
+function _create_vhd() {
+ local dir=$1
+ local num=$2
+ local loop_num=`expr $2 + 8`
+
+ fallocate -l${VHD_SIZE} $dir/${LVM_PREFIX}_vhd
+ mknod -m660 $dir/${LVM_PREFIX}_loop b 7 $loop_num
+ /sbin/losetup $dir/${LVM_PREFIX}_loop $dir/${LVM_PREFIX}_vhd
+}
+
+function _create_lv() {
+ local dir=$1
+ local num=$2
+ local vg="VG$num"
+ local thinpoolsize="200M"
+ local virtualsize="150M"
+
+ /sbin/pvcreate $dir/${LVM_PREFIX}_loop
+ /sbin/vgcreate ${!vg} $dir/${LVM_PREFIX}_loop
+
+ /sbin/lvcreate -L ${thinpoolsize} -T /dev/${!vg}/thinpool
+ /sbin/lvcreate -V ${virtualsize} -T /dev/${!vg}/thinpool -n brick_lvm
+
+ mkfs.xfs -f /dev/${!vg}/brick_lvm
+}
+
+function _mount_lv() {
+ local num=$1
+ local vg="VG$num"
+ local l="L$num"
+
+ mount -t xfs -o nouuid /dev/${!vg}/brick_lvm ${!l}
+}
+
+function _umount_lv() {
+ local num=$1
+ local l="L$num"
+
+ ${UMOUNT_F} ${!l} 2>/dev/null || true
+ rmdir ${!l} 2>/dev/null || true
+}
+
+function _remove_lv() {
+ local num=$1
+ local vg="VG$num"
+
+ vgremove -f ${!vg}
+}
+
+function _remove_vhd() {
+ local dir=$1
+
+ losetup -d $dir/${LVM_PREFIX}_loop
+ rm -f $dir/${LVM_PREFIX}_loop
+ rm -f $dir/${LVM_PREFIX}_vhd
+}
+
+########################################################
+# Utility Functions
+########################################################
+function snapshot_exists() {
+ local clitype=$1
+ local snapname=$2
+ local cli=$CLI
+ if [ "$clitype" == "1" ]; then
+ cli=$CLI_1;
+ fi
+ if [ "$clitype" == "2" ]; then
+ cli=$CLI_2;
+ fi
+ $cli snapshot list | egrep -q "^$snapname\$"
+ return $?
+}
+
+#Create N number of snaps in a given volume
+#Arg1 : <Volume Name>
+#Arg2 : <Count of snaps to be created>
+#Arg3 : <Snap Name Pattern>
+#Return: Returns 0 if all snaps are created ,
+# if not will return exit code of last failed
+# snap create command.
+function create_n_snapshots() {
+ local cli=$1
+ local vol=$1
+ local snap_count=$2
+ local snap_name=$3
+ local ret=0
+ for i in `seq 1 $snap_count`; do
+ $CLI_1 snapshot create $snap_name$i ${vol} no-timestamp &
+ PID_1=$!
+ wait $PID_1
+ ret=$?
+ if [ "$ret" != "0" ]; then
+ break
+ fi
+ done
+ return $ret
+}
+
+
+#Delete N number of snaps in a given volume
+#Arg1 : <Volume Name>
+#Arg2 : <Count of snaps to be deleted>
+#Arg3 : <Snap Name Pattern>
+#Return: Returns 0 if all snaps are Delete,
+# if not will return exit code of last failed
+# snap delete command.
+function delete_n_snapshots() {
+ local vol=$1
+ local snap_count=$2
+ local snap_name=$3
+ local ret=0
+ for i in `seq 1 $snap_count`; do
+ $CLI_1 snapshot delete $snap_name$i &
+ PID_1=$!
+ wait $PID_1
+ temp=$?
+ if [ "$temp" != "0" ]; then
+ ret=$temp
+ fi
+ done
+ return $ret
+}
+
+#Check for the existance of N number of snaps in a given volume
+#Arg1 : <Volume Name>
+#Arg2 : <Count of snaps to be checked>
+#Arg3 : <Snap Name Pattern>
+#Return: Returns 0 if all snaps exists,
+# if not will return exit code of last failed
+# snapshot_exists().
+function snapshot_n_exists() {
+ local vol=$1
+ local snap_count=$2
+ local snap_name=$3
+ local ret=0
+ for i in `seq 1 $snap_count`; do
+ snapshot_exists 1 $snap_name$i
+ ret=$?
+ if [ "$ret" != "0" ]; then
+ break
+ fi
+ done
+ return $ret
+}
+
+#Check for the status of snapshot for a volume
+#Arg1 : <Snap Name>
+function snapshot_status()
+{
+ local snap=$1;
+ local cli=$CLI_1;
+ if [ "$cli" = "" ]; then
+ cli=$CLI
+ fi
+
+ #TODO: Right now just fetches the status of the single snap volume.
+ #When snapshot will have multiple snap volumes, should have a
+ #cummulative logic for status
+ $cli snapshot info $snap | grep "Status" | sed 's/.*: //';
+}
+
+
+#Check the different status of a particular snapshot
+#Arg1 : <Snap name>
+#Arg2 : <Filed in status>
+#Arg3 : <Expected value>
+function snapshot_snap_status()
+{
+
+ local snap=$1;
+ local cli=$CLI_1;
+ local field=$2;
+ local expected=$3;
+ if [ "$cli" = "" ]; then
+ cli=$CLI
+ fi
+ for i in $($cli snapshot status $snap | grep "$field" | \
+ cut -d ':' -f2 | awk '{print $1}') ;
+ do
+ if [ "$i" != "$expected" ]; then
+ echo "Failed"
+ return 1;
+ fi;
+ done;
+echo "Success"
+return 0;
+}
+
+
+# TODO: Cleanup code duplication
+function volinfo_field()
+{
+ local vol=$1;
+ local field=$2;
+
+ $CLI_1 volume info $vol | grep "^$field: " | sed 's/.*: //';
+}
+
+
+function volume_exists() {
+ $CLI_1 volume info $1 > /dev/null 2>&1
+ if [ $? -eq 0 ]; then
+ echo "Y"
+ else
+ echo "N"
+ fi
+}
+
+# arg-1 : From which node the command should be trigerred
+# Ex : $CLI_1, $CLI_2, etc.
+# arg-2 : Volume name
+# arg-3 : Starting index for the snapname "snap$i"
+# arg-4 : Number of snapshots to be taken
+function snap_create()
+{
+ eval local cli_index=\$$1
+ local volname=$2
+ local i=$3
+ local limit=$[$i + $4]
+
+ while [ $i -lt $limit ]
+ do
+ $cli_index snapshot create snap$i $volname no-timestamp
+ i=$[$i+1]
+ done
+}
+
+# arg-1 : From which node the command should be trigerred
+# Ex : $CLI_1. $CLI_2, etc.
+# arg-2 : Volume name.
+function get_snap_count()
+{
+ eval local cli_index=\$$1
+ local volname=$2
+
+
+ if [ -z "$2" ]
+ then
+ $cli_index snapshot list | grep -v "No snapshots present"\
+ | wc -l
+ else
+ $cli_index snapshot list $volname\
+ | grep -v "No snapshots present"\
+ | wc -l
+ fi
+}
+
+# arg-1 : From which node the command should be trigerred
+# Ex : $CLI_1, $CLI_2, etc.
+# arg-2 : Starting index for the snapname "snap$i"
+# arg-3 : Number of snapshots to be deleted.
+function snap_delete()
+{
+ eval local cli_index=\$$1
+ local i=$2
+ local limit=$[$i + $3]
+
+ while [ $i -lt $limit ]
+ do
+ $cli_index snapshot delete snap$i
+ i=$[$i+1]
+ done
+}
+
+# arg-1 : From which node the command should be triggered
+# Ex : $CLI_1, $CLI_2, etc.
+# arg-2 : key value
+function snap_config()
+{
+ eval local cli_index=\$$1
+ local var=$2
+ $cli_index snapshot config | grep "^$var" | sed 's/.*: //'
+}
+
+function check_if_snapd_exist() {
+ local pid
+ pid=$(ps aux | grep "snapd" | grep -v grep | awk '{print $2}')
+ if [ -n "$pid" ]; then echo "Y"; else echo "N"; fi
+}
+
+# returns number of snapshot being displayed in ".snaps" directory
+function uss_count_snap_displayed() {
+ local path=$1
+ ls $path/.snaps | wc -l
+}
+
+function snap_info_volume()
+{
+ eval local cli_index=\$$1
+ local var=$2
+ local vol=$3
+ $cli_index snapshot info volume $vol | grep "^$var" | sed 's/.*: //'
+}
+
+function snap_config_volume()
+{
+ eval local cli_index=\$$1
+ local var=$2
+ local vol=$3
+ $cli_index snapshot config $vol| grep "^$var" | sed 's/.*: //'
+}
+
+#return specific fields of xml output
+function get-cmd-field-xml()
+{
+ local cli=$CLI_1;
+ if [ "$cli" = "" ]; then
+ cli=$CLI
+ fi
+
+ COMMAND=$1
+ PATTERN=$2
+
+ $cli $COMMAND --xml | xmllint --format - | grep $PATTERN
+}
+
+function get_snap_brick_status()
+{
+ local snap=$1;
+
+ $CLI snapshot status $snap | grep "Brick Running" | sed 's/.*: //';
+}
+
+case $OSTYPE in
+NetBSD)
+ echo "Skip test on LVM which is not available on NetBSD" >&2
+ SKIP_TESTS
+ exit 0
+ ;;
+*)
+ ;;
+esac
diff --git a/tests/ssl.rc b/tests/ssl.rc
new file mode 100644
index 00000000000..b1ccc4c8d38
--- /dev/null
+++ b/tests/ssl.rc
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+for d in /etc/ssl /etc/openssl /usr/local/etc/openssl ; do
+ if test -d $d ; then
+ SSL_BASE=$d
+ break
+ fi
+done
+
+if [ ! -d "$SSL_BASE" ]; then
+ echo "Skip test! SSL certificate path missing in the system" >&2
+ SKIP_TESTS
+ exit 0
+fi
+
+SSL_KEY=$SSL_BASE/glusterfs.key
+SSL_CERT=$SSL_BASE/glusterfs.pem
+SSL_CA=$SSL_BASE/glusterfs.ca
+
+
+# Create self-signed certificates
+function create_self_signed_certs (){
+ openssl genrsa -out $SSL_KEY 2048
+ openssl req -new -x509 -key $SSL_KEY -subj /CN=Anyone -out $SSL_CERT
+ ln $SSL_CERT $SSL_CA
+ return $?
+}
+
+function cleanup_certs () {
+ rm -f $SSL_BASE/glusterfs.*
+}
+
+push_trapfunc cleanup_certs
+
+cleanup_certs
diff --git a/tests/thin-arbiter.rc b/tests/thin-arbiter.rc
new file mode 100644
index 00000000000..e26d91b1907
--- /dev/null
+++ b/tests/thin-arbiter.rc
@@ -0,0 +1,613 @@
+declare -A PORTMAP
+PORTCURR=49152
+function ta_create_ta_and_volfile()
+{
+ local b=$B0/$1
+ mkdir -p $b/.glusterfs/indices
+cat > $B0/ta.vol <<EOF
+volume ta-posix
+ type storage/posix
+ option directory $b
+end-volume
+
+volume ta-thin-arbiter
+ type features/thin-arbiter
+ subvolumes ta-posix
+end-volume
+
+volume ta-locks
+ type features/locks
+ option notify-contention yes
+ subvolumes ta-thin-arbiter
+end-volume
+
+volume ta-upcall
+ type features/upcall
+ option cache-invalidation off
+ subvolumes ta-locks
+end-volume
+
+volume ta-io-threads
+ type performance/io-threads
+ subvolumes ta-upcall
+end-volume
+
+volume ta-index
+ type features/index
+ option xattrop-pending-watchlist trusted.afr.ta-
+ option xattrop-dirty-watchlist trusted.afr.dirty
+ option index-base $b/.glusterfs/indices
+ subvolumes ta-io-threads
+end-volume
+
+volume ta-io-stats
+ type debug/io-stats
+ option count-fop-hits off
+ option latency-measurement off
+ option log-level WARNING
+ option unique-id $b
+ subvolumes ta-index
+end-volume
+
+volume ta-server
+ type protocol/server
+ option transport.listen-backlog 10
+ option transport.socket.keepalive-count 9
+ option transport.socket.keepalive-interval 2
+ option transport.socket.keepalive-time 20
+ option transport.tcp-user-timeout 0
+ option transport.socket.keepalive 1
+ option auth.addr.$b.allow *
+ option auth-path $b
+ option transport.address-family inet
+ option transport-type tcp
+ subvolumes ta-io-stats
+end-volume
+EOF
+}
+
+function ta_create_brick_and_volfile()
+{
+ local b=$B0/$1
+ mkdir -p $b/.glusterfs/indices
+cat > $B0/${1}.vol <<EOF
+volume ${V0}-posix
+ type storage/posix
+ option directory $b
+end-volume
+
+volume ${V0}-locks
+ type features/locks
+ subvolumes ${V0}-posix
+end-volume
+
+volume ${V0}-leases
+ type features/leases
+ option leases off
+ subvolumes ${V0}-locks
+end-volume
+
+volume ${V0}-upcall
+ type features/upcall
+ option cache-invalidation off
+ subvolumes ${V0}-leases
+end-volume
+
+volume ${V0}-io-threads
+ type performance/io-threads
+ subvolumes ${V0}-upcall
+end-volume
+
+volume ${V0}-index
+ type features/index
+ option xattrop-pending-watchlist trusted.afr.${V0}-
+ option xattrop-dirty-watchlist trusted.afr.dirty
+ option index-base $b/.glusterfs/indices
+ subvolumes ${V0}-io-threads
+end-volume
+
+volume $b
+ type debug/io-stats
+ option count-fop-hits off
+ option latency-measurement off
+ option log-level INFO
+ option unique-id $b
+ subvolumes ${V0}-index
+end-volume
+
+volume ${V0}-server
+ type protocol/server
+ option transport.listen-backlog 1024
+ option transport.socket.keepalive-count 9
+ option transport.socket.keepalive-interval 2
+ option transport.socket.keepalive-time 20
+ option transport.tcp-user-timeout 0
+ option transport.socket.keepalive 1
+ option auth.addr.$b.allow *
+ option auth-path $b
+ option auth.login.459d48e8-2a92-4f11-89f2-077b29f6f86d.password a0ad63dd-8314-4f97-9160-1b93e3cb1f0b
+ option auth.login.$b.allow 459d48e8-2a92-4f11-89f2-077b29f6f86d
+ option transport.address-family inet
+ option transport-type tcp
+ subvolumes $b
+end-volume
+EOF
+}
+
+function ta_set_port_by_name()
+{
+ if [ -z ${PORTMAP[$1]} ]
+ then
+ PORTMAP[$1]=$PORTCURR
+ PORTCURR=$((PORTCURR+1))
+ fi
+}
+
+function ta_start_brick_process()
+{
+ ta_set_port_by_name $1
+ local p=${PORTMAP[$1]}
+ if glusterfs -p $B0/${1}.pid --volfile=$B0/${1}.vol -l $(gluster --print-logdir)/${1}.log --xlator-option ${V0}-server.listen-port=$p
+ then
+ cat $B0/${1}.pid
+ else
+ echo ""
+ return 1
+ fi
+}
+
+function ta_start_ta_process()
+{
+ ta_set_port_by_name $1
+ local p=${PORTMAP[$1]}
+ if glusterfs -p $B0/${1}.pid --volfile=$B0/${1}.vol -l $(gluster --print-logdir)/${1}.log --xlator-option ta-server.listen-port=$p
+ then
+ cat $B0/${1}.pid
+ else
+ echo ""
+ return 1
+ fi
+}
+
+function ta_start_mount_process()
+{
+ mkdir -p $1
+ identifier=$(echo $1 | tr / .)
+ if glusterfs --entry-timeout=0 --attribute-timeout=0 -p $B0/${identifier}.pid --volfile=$B0/mount.vol $1
+ then
+ cat $B0/$identifier.pid
+ else
+ echo ""
+ return 1
+ fi
+}
+
+function ta_get_mount_pid()
+{
+ local mount_path=$1
+ identifier=$(echo $mount_path | tr / .)
+ cat $B0/${identifier}.pid
+}
+
+function ta_create_mount_volfile()
+{
+ local b0=$B0/$1
+ local b1=$B0/$2
+ local ta=$B0/$3
+ local b0_port=${PORTMAP[$1]}
+ local b1_port=${PORTMAP[$2]}
+ local ta_port=${PORTMAP[$3]}
+cat > $B0/mount.vol <<EOF
+volume ${V0}-client-0
+ type protocol/client
+ option remote-host $H0
+ option client-bind-insecure off
+ option transport.socket.keepalive-interval 2
+ option transport.socket.keepalive-time 20
+ option transport.socket.ssl-enabled off
+ option remote-subvolume $b0
+ option transport.tcp-user-timeout 0
+ option transport.socket.keepalive-count 9
+ option transport-type tcp
+ option ping-timeout 42
+ option send-gids on
+ option remote-port $b0_port
+ option password a0ad63dd-8314-4f97-9160-1b93e3cb1f0b
+ option username 459d48e8-2a92-4f11-89f2-077b29f6f86d
+end-volume
+
+volume ${V0}-client-1
+ type protocol/client
+ option remote-host $H0
+ option client-bind-insecure off
+ option transport.socket.keepalive-interval 2
+ option transport.socket.keepalive-time 20
+ option transport.socket.ssl-enabled off
+ option remote-subvolume $b1
+ option transport.tcp-user-timeout 0
+ option transport.socket.keepalive-count 9
+ option transport-type tcp
+ option ping-timeout 42
+ option send-gids on
+ option remote-port $b1_port
+ option password a0ad63dd-8314-4f97-9160-1b93e3cb1f0b
+ option username 459d48e8-2a92-4f11-89f2-077b29f6f86d
+end-volume
+
+volume ${V0}-thin-arbiter-client
+ type protocol/client
+ option client-bind-insecure off
+ option transport.socket.ssl-enabled off
+ option remote-subvolume $ta
+ option ping-timeout 42
+ option remote-host $H0
+ option send-gids on
+ option transport.socket.keepalive-interval 2
+ option remote-port $ta_port
+ option transport-type tcp
+ option transport.tcp-user-timeout 0
+ option transport.socket.keepalive-time 20
+ option transport.socket.keepalive-count 9
+end-volume
+
+volume ${V0}-replicate-0
+ type cluster/replicate
+ option afr-dirty-xattr trusted.afr.dirty
+ option iam-self-heal-daemon off
+ option afr-pending-xattr ${V0}-client-0,${V0}-client-1,${V0}-ta-2
+ option thin-arbiter $H0:$ta
+ subvolumes ${V0}-client-0 ${V0}-client-1 ${V0}-thin-arbiter-client
+end-volume
+
+volume ${V0}-distribute
+ type cluster/distribute
+ option rebal-throttle normal
+ option force-migration off
+ option lookup-optimize on
+ option weighted-rebalance on
+ option write-freq-threshold 0
+ option assert-no-child-down off
+ option lock-migration off
+ option lookup-unhashed on
+ option randomize-hash-range-by-gfid off
+ option unhashed-sticky-bit off
+ option use-readdirp on
+ option readdir-optimize off
+ option xattr-name trusted.glusterfs.dht
+ option read-freq-threshold 0
+ option min-free-disk 10%
+ option min-free-inodes 5%
+ option rebalance-stats off
+ subvolumes ${V0}-replicate-0
+end-volume
+
+volume ${V0}-write-behind
+ type performance/write-behind
+ option strict-O_DIRECT off
+ option strict-write-ordering off
+ option resync-failed-syncs-after-fsync off
+ option aggregate-size 128KB
+ option flush-behind on
+ option cache-size 1MB
+ option trickling-writes on
+ subvolumes ${V0}-distribute
+end-volume
+
+volume ${V0}-read-ahead
+ type performance/read-ahead
+ option force-atime-update false
+ option page-count 4
+ option page-size 131072
+ option pass-through false
+ subvolumes ${V0}-write-behind
+end-volume
+
+volume ${V0}-readdir-ahead
+ type performance/readdir-ahead
+ option rda-low-wmark 4096
+ option rda-high-wmark 128KB
+ option rda-cache-limit 10MB
+ option parallel-readdir off
+ option pass-through false
+ option rda-request-size 131072
+ subvolumes ${V0}-read-ahead
+end-volume
+
+volume ${V0}-io-cache
+ type performance/io-cache
+ option cache-timeout 1
+ option cache-size 32MB
+ option min-file-size 0
+ option max-file-size 0
+ option pass-through false
+ subvolumes ${V0}-readdir-ahead
+end-volume
+
+volume ${V0}-quick-read
+ type performance/quick-read
+ option cache-invalidation false
+ option ctime-invalidation false
+ option cache-size 128MB
+ option cache-timeout 1
+ option max-file-size 64KB
+ subvolumes ${V0}-io-cache
+end-volume
+
+volume ${V0}-open-behind
+ type performance/open-behind
+ option use-anonymous-fd yes
+ option lazy-open yes
+ option read-after-open no
+ option pass-through false
+ subvolumes ${V0}-quick-read
+end-volume
+
+volume ${V0}-md-cache
+ type performance/md-cache
+ option pass-through false
+ option cache-capability-xattrs true
+ option cache-posix-acl false
+ option cache-swift-metadata true
+ option cache-samba-metadata false
+ option md-cache-timeout 1
+ option force-readdirp true
+ option cache-invalidation false
+ option md-cache-statfs off
+ option cache-selinux false
+ option cache-ima-xattrs true
+ subvolumes ${V0}-open-behind
+end-volume
+
+volume ${V0}-io-threads
+ type performance/io-threads
+ option normal-prio-threads 16
+ option enable-least-priority on
+ option idle-time 120
+ option cleanup-disconnected-reqs off
+ option pass-through false
+ option thread-count 16
+ option high-prio-threads 16
+ option low-prio-threads 16
+ option least-prio-threads 1
+ subvolumes ${V0}-md-cache
+end-volume
+
+volume ${V0}
+ type debug/io-stats
+ option client-logger gluster-log
+ option client-log-buf-size 5
+ option latency-measurement off
+ option client-log-level INFO
+ option brick-log-level INFO
+ option count-fop-hits off
+ option sys-log-level CRITICAL
+ option brick-log-format with-msg-id
+ option brick-log-buf-size 5
+ option dump-fd-stats off
+ option ios-dump-interval 0
+ option ios-dump-format json
+ option client-log-format with-msg-id
+ option log-buf-size 5
+ option log-flush-timeout 120
+ option client-log-flush-timeout 120
+ option ios-sample-interval 0
+ option ios-sample-buf-size 65535
+ option brick-logger gluster-log
+ option ios-dnscache-ttl-sec 86400
+ option brick-log-flush-timeout 120
+ option unique-id /no/such/path
+ subvolumes ${V0}-io-threads
+end-volume
+EOF
+}
+
+function ta_kill_brick()
+{
+ local p=$(cat $B0/${1}.pid)
+ echo > $B0/${1}.pid
+ kill -9 $p
+}
+
+function ta_get_pid_by_brick_name()
+{
+ cat $B0/${1}.pid
+}
+
+function ta_up_status()
+{
+ local v=$1
+ local m=$2
+ local replica_id=$3
+ grep -E "^up = " $m/.meta/graphs/active/${v}-replicate-${replica_id}/private | cut -f2 -d'='
+}
+
+function ta_create_shd_volfile()
+{
+ local b0=$B0/$1
+ local b1=$B0/$2
+ local ta=$B0/$3
+ local b0_port=${PORTMAP[$1]}
+ local b1_port=${PORTMAP[$2]}
+ local ta_port=${PORTMAP[$3]}
+cat > $B0/glustershd.vol <<EOF
+volume ${V0}-replicate-0-client-0
+ type protocol/client
+ option send-gids on
+ option transport.socket.keepalive-interval 2
+ option remote-host $H0
+ option remote-subvolume $b0
+ option ping-timeout 42
+ option client-bind-insecure off
+ option transport.socket.own-thread off
+ option frame-timeout 1800
+ option non-blocking-io off
+ option transport.socket.keepalive 1
+ option transport.socket.keepalive-count 9
+ option transport.tcp-user-timeout 0
+ option transport.socket.nodelay 1
+ option transport.socket.keepalive-time 20
+ option transport.socket.read-fail-log off
+ option transport-type tcp
+ option filter-O_DIRECT disable
+ option event-threads 2
+ option transport.listen-backlog 1024
+ option transport.socket.ssl-enabled off
+ option password a0ad63dd-8314-4f97-9160-1b93e3cb1f0b
+ option username 459d48e8-2a92-4f11-89f2-077b29f6f86d
+ option remote-port $b0_port
+end-volume
+
+volume ${V0}-replicate-0-client-1
+ type protocol/client
+ option remote-host $H0
+ option transport.socket.keepalive-time 20
+ option transport.socket.keepalive-count 9
+ option transport.socket.own-thread off
+ option transport.socket.ssl-enabled off
+ option transport-type tcp
+ option remote-subvolume $b1
+ option event-threads 2
+ option transport.tcp-user-timeout 0
+ option transport.socket.keepalive 1
+ option transport.socket.nodelay 1
+ option transport.socket.read-fail-log off
+ option frame-timeout 1800
+ option ping-timeout 42
+ option client-bind-insecure off
+ option filter-O_DIRECT disable
+ option send-gids on
+ option non-blocking-io off
+ option transport.listen-backlog 1024
+ option transport.socket.keepalive-interval 2
+ option password a0ad63dd-8314-4f97-9160-1b93e3cb1f0b
+ option username 459d48e8-2a92-4f11-89f2-077b29f6f86d
+ option remote-port $b1_port
+end-volume
+
+volume ${V0}-replicate-0-thin-arbiter-client
+ type protocol/client
+ option frame-timeout 1800
+ option event-threads 2
+ option transport.listen-backlog 1024
+ option transport.socket.nodelay 1
+ option transport.socket.keepalive-count 9
+ option transport.socket.ssl-enabled off
+ option transport-type tcp
+ option remote-subvolume $ta
+ option filter-O_DIRECT disable
+ option non-blocking-io off
+ option transport.socket.keepalive-interval 2
+ option transport.socket.read-fail-log off
+ option remote-host $H0
+ option send-gids on
+ option transport.tcp-user-timeout 0
+ option transport.socket.keepalive-time 20
+ option ping-timeout 42
+ option client-bind-insecure off
+ option transport.socket.keepalive 1
+ option transport.socket.own-thread off
+ option remote-port $ta_port
+end-volume
+
+volume ${V0}-replicate-0
+ type cluster/replicate
+ option background-self-heal-count 8
+ option metadata-self-heal on
+ option data-change-log on
+ option entrylk-trace off
+ option iam-self-heal-daemon yes
+ option afr-dirty-xattr trusted.afr.dirty
+ option heal-timeout 10
+ option read-hash-mode 1
+ option metadata-splitbrain-forced-heal off
+ option thin-arbiter $H0:$ta
+ option shd-max-threads 1
+ option afr-pending-xattr ${V0}-client-0,${V0}-client-1,${V0}-ta-2
+ option halo-max-latency 5
+ option halo-max-replicas 99999
+ option entry-change-log on
+ option halo-nfsd-max-latency 5
+ option inodelk-trace off
+ option pre-op-compat on
+ option eager-lock on
+ option self-heal-readdir-size 1KB
+ option ensure-durability on
+ option locking-scheme full
+ option halo-enabled False
+ option heal-wait-queue-length 128
+ option entry-self-heal on
+ option self-heal-daemon on
+ option quorum-reads no
+ option shd-wait-qlength 1024
+ option choose-local true
+ option halo-min-replicas 2
+ option data-self-heal on
+ option metadata-change-log on
+ option consistent-metadata no
+ option full-lock yes
+ option use-compound-fops no
+ option halo-shd-max-latency 99999
+ option quorum-type none
+ option favorite-child-policy none
+ option read-subvolume-index -1
+ option optimistic-change-log on
+ option iam-nfs-daemon off
+ option post-op-delay-secs 1
+ option granular-entry-heal no
+ option consistent-io no
+ option data-self-heal-window-size 1
+ subvolumes ${V0}-replicate-0-client-0 ${V0}-replicate-0-client-1 ${V0}-replicate-0-thin-arbiter-client
+end-volume
+
+volume glustershd
+ type debug/io-stats
+ option log-buf-size 5
+ option ios-dump-format json
+ option latency-measurement off
+ option sys-log-level CRITICAL
+ option brick-log-level INFO
+ option client-logger gluster-log
+ option client-log-format with-msg-id
+ option brick-log-format with-msg-id
+ option client-log-buf-size 5
+ option log-flush-timeout 120
+ option ios-dump-interval 0
+ option ios-sample-interval 0
+ option ios-dnscache-ttl-sec 86400
+ option count-fop-hits off
+ option client-log-level INFO
+ option brick-logger gluster-log
+ option brick-log-buf-size 5
+ option ios-sample-buf-size 65535
+ option client-log-flush-timeout 120
+ option brick-log-flush-timeout 120
+ option unique-id /no/such/path
+ option dump-fd-stats off
+ subvolumes ${V0}-replicate-0
+end-volume
+EOF
+}
+
+function ta_start_shd_process()
+{
+ if glusterfs -p $B0/${1}.pid --volfile=$B0/${1}.vol -l $(gluster --print-logdir)/${1}.log --process-name=glustershd
+ then
+ cat $B0/${1}.pid
+ else
+ echo ""
+ return 1
+ fi
+}
+
+function ta_mount_child_up_status()
+{
+ local mount_path=$1
+ #brick_id is (brick-num in volume info - 1)
+ local vol=$2
+ local brick_id=$3
+ local pid=$(ta_get_mount_pid $mount_path)
+ local fpath=$(generate_statedump $pid)
+ up=$(grep -a -B1 trusted.afr.$vol-client-$brick_id $fpath | head -1 | cut -f2 -d'=')
+ rm -f $fpath
+ echo "$up"
+}
diff --git a/tests/traps.rc b/tests/traps.rc
new file mode 100644
index 00000000000..f011960c97e
--- /dev/null
+++ b/tests/traps.rc
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+# Make sure this only gets included/executed once. Unfortunately, bash doesn't
+# usually distinguish between values that are unset and values that are null.
+# To work around that, we declare TRAPFUNCS to be a one-element array right at
+# the start, but that one element is : which is defined to do nothing.
+
+if [ ${#TRAPFUNCS[@]} = 0 ]; then
+ TRAPFUNCS=(:)
+
+ push_trapfunc () {
+ TRAPFUNCS[${#TRAPFUNCS[@]}]="$@"
+ }
+
+ execute_trapfuncs () {
+ for i in "${TRAPFUNCS[@]}"; do
+ $i
+ done
+ }
+
+ trap execute_trapfuncs EXIT
+fi
diff --git a/tests/utils/arequal-checksum.c b/tests/utils/arequal-checksum.c
new file mode 100644
index 00000000000..b51a054162b
--- /dev/null
+++ b/tests/utils/arequal-checksum.c
@@ -0,0 +1,633 @@
+/*
+ Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#define _XOPEN_SOURCE 500
+
+#include <ftw.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <dirent.h>
+#include <stdlib.h>
+#include <libgen.h>
+#include <stdint.h>
+#include <dirent.h>
+#include <argp.h>
+
+/*
+ * FTW_ACTIONRETVAL is a GNU libc extension. It is used here to skip
+ * hierarchies. On other systems we will still walk the tree, ignoring
+ * entries.
+ */
+#ifndef FTW_ACTIONRETVAL
+#define FTW_ACTIONRETVAL 0
+#endif
+
+int debug = 0;
+
+typedef struct {
+ char test_directory[4096];
+ char **ignored_directory;
+ unsigned int directories_ignored;
+} arequal_config_t;
+
+static arequal_config_t arequal_config;
+
+static error_t
+arequal_parse_opts(int key, char *arg, struct argp_state *_state);
+
+static struct argp_option arequal_options[] = {
+ {"ignore", 'i', "IGNORED", 0, "entry in the given path to be ignored"},
+ {"path", 'p', "PATH", 0, "path where arequal has to be run"},
+ {0, 0, 0, 0, 0}};
+
+#define DBG(fmt...) \
+ do { \
+ if (debug) { \
+ fprintf(stderr, "D "); \
+ fprintf(stderr, fmt); \
+ } \
+ } while (0)
+
+void
+add_to_list(char *arg);
+void
+get_absolute_path(char directory[], char *arg);
+
+static int
+roof(int a, int b)
+{
+ return ((((a) + (b)-1) / ((b) ? (b) : 1)) * (b));
+}
+
+void
+add_to_list(char *arg)
+{
+ char *string = NULL;
+ int index = 0;
+
+ index = arequal_config.directories_ignored - 1;
+ string = strdup(arg);
+
+ if (!arequal_config.ignored_directory) {
+ arequal_config.ignored_directory = calloc(1, sizeof(char *));
+ } else
+ arequal_config.ignored_directory = realloc(
+ arequal_config.ignored_directory, sizeof(char *) * (index + 1));
+
+ arequal_config.ignored_directory[index] = string;
+}
+
+static error_t
+arequal_parse_opts(int key, char *arg, struct argp_state *_state)
+{
+ switch (key) {
+ case 'i': {
+ arequal_config.directories_ignored++;
+ add_to_list(arg);
+ } break;
+ case 'p': {
+ if (arg[0] == '/')
+ strcpy(arequal_config.test_directory, arg);
+ else
+ get_absolute_path(arequal_config.test_directory, arg);
+
+ if (arequal_config
+ .test_directory[strlen(arequal_config.test_directory) -
+ 1] == '/')
+ arequal_config
+ .test_directory[strlen(arequal_config.test_directory) - 1] =
+ '\0';
+ } break;
+
+ case ARGP_KEY_NO_ARGS:
+ break;
+ case ARGP_KEY_ARG:
+ break;
+ case ARGP_KEY_END:
+ if (_state->argc == 1) {
+ argp_usage(_state);
+ }
+ }
+
+ return 0;
+}
+
+void
+get_absolute_path(char directory[], char *arg)
+{
+ char cwd[4096] = {
+ 0,
+ };
+
+ if (getcwd(cwd, sizeof(cwd)) == NULL)
+ printf("some error in getting cwd\n");
+
+ if (strcmp(arg, ".") != 0) {
+ if (cwd[strlen(cwd)] != '/')
+ cwd[strlen(cwd)] = '/';
+ strcat(cwd, arg);
+ }
+ strcpy(directory, cwd);
+}
+
+static struct argp argp = {
+ arequal_options, arequal_parse_opts, "",
+ "arequal - Tool which calculates the checksum of all the entries"
+ "present in a given directory"};
+
+/* All this runs in single thread, hence using 'global' variables */
+
+unsigned long long avg_uid_file = 0;
+unsigned long long avg_uid_dir = 0;
+unsigned long long avg_uid_symlink = 0;
+unsigned long long avg_uid_other = 0;
+
+unsigned long long avg_gid_file = 0;
+unsigned long long avg_gid_dir = 0;
+unsigned long long avg_gid_symlink = 0;
+unsigned long long avg_gid_other = 0;
+
+unsigned long long avg_mode_file = 0;
+unsigned long long avg_mode_dir = 0;
+unsigned long long avg_mode_symlink = 0;
+unsigned long long avg_mode_other = 0;
+
+unsigned long long global_ctime_checksum = 0;
+
+unsigned long long count_dir = 0;
+unsigned long long count_file = 0;
+unsigned long long count_symlink = 0;
+unsigned long long count_other = 0;
+
+unsigned long long checksum_file1 = 0;
+unsigned long long checksum_file2 = 0;
+unsigned long long checksum_dir = 0;
+unsigned long long checksum_symlink = 0;
+unsigned long long checksum_other = 0;
+
+unsigned long long
+checksum_path(const char *path)
+{
+ unsigned long long csum = 0;
+ unsigned long long *nums = 0;
+ int len = 0;
+ int cnt = 0;
+
+ len = roof(strlen(path), sizeof(csum));
+ cnt = len / sizeof(csum);
+
+ nums = __builtin_alloca(len);
+ memset(nums, 0, len);
+ strcpy((char *)nums, path);
+
+ while (cnt) {
+ csum ^= *nums;
+ nums++;
+ cnt--;
+ }
+
+ return csum;
+}
+
+int
+checksum_md5(const char *path, const struct stat *sb)
+{
+ uint64_t this_data_checksum = 0;
+ FILE *filep = NULL;
+ char *cmd = NULL;
+ char strvalue[17] = {
+ 0,
+ };
+ int ret = -1;
+ int len = 0;
+ const char *pos = NULL;
+ char *cpos = NULL;
+
+ /* Have to escape single-quotes in filename.
+ * First, calculate the size of the buffer I'll need.
+ */
+ for (pos = path; *pos; pos++) {
+ if (*pos == '\'')
+ len += 4;
+ else
+ len += 1;
+ }
+
+ cmd = malloc(sizeof(char) * (len + 20));
+ cmd[0] = '\0';
+
+ /* Now, build the command with single quotes escaped. */
+
+ cpos = cmd;
+#if defined(linux)
+ strcpy(cpos, "md5sum '");
+ cpos += 8;
+#elif defined(__NetBSD__)
+ strcpy(cpos, "md5 -n '");
+ cpos += 8;
+#elif defined(__FreeBSD__) || defined(__APPLE__)
+ strcpy(cpos, "md5 -q '");
+ cpos += 8;
+#else
+#error "Please add system-specific md5 command"
+#endif
+
+ /* Add the file path, with every single quotes replaced with this sequence:
+ * '\''
+ */
+
+ for (pos = path; *pos; pos++) {
+ if (*pos == '\'') {
+ strcpy(cpos, "'\\''");
+ cpos += 4;
+ } else {
+ *cpos = *pos;
+ cpos++;
+ }
+ }
+
+ /* Add on the trailing single-quote and null-terminate. */
+ strcpy(cpos, "'");
+
+ filep = popen(cmd, "r");
+ if (!filep) {
+ perror(path);
+ goto out;
+ }
+
+ if (fread(strvalue, sizeof(char), 16, filep) != 16) {
+ fprintf(stderr, "%s: short read\n", path);
+ goto out;
+ }
+
+ this_data_checksum = strtoull(strvalue, NULL, 16);
+ if (-1 == this_data_checksum) {
+ fprintf(stderr, "%s: %s\n", strvalue, strerror(errno));
+ goto out;
+ }
+ checksum_file1 ^= this_data_checksum;
+
+ if (fread(strvalue, sizeof(char), 16, filep) != 16) {
+ fprintf(stderr, "%s: short read\n", path);
+ goto out;
+ }
+
+ this_data_checksum = strtoull(strvalue, NULL, 16);
+ if (-1 == this_data_checksum) {
+ fprintf(stderr, "%s: %s\n", strvalue, strerror(errno));
+ goto out;
+ }
+ checksum_file2 ^= this_data_checksum;
+
+ ret = 0;
+out:
+ if (filep)
+ pclose(filep);
+
+ if (cmd)
+ free(cmd);
+
+ return ret;
+}
+
+int
+checksum_filenames(const char *path, const struct stat *sb)
+{
+ DIR *dirp = NULL;
+ struct dirent *entry = NULL;
+ unsigned long long csum = 0;
+ int i = 0;
+ int found = 0;
+
+ dirp = opendir(path);
+ if (!dirp) {
+ perror(path);
+ goto out;
+ }
+
+ errno = 0;
+ while ((entry = readdir(dirp))) {
+ /* do not calculate the checksum of the entries which user has
+ told to ignore and proceed to other siblings.*/
+ if (arequal_config.ignored_directory) {
+ for (i = 0; i < arequal_config.directories_ignored; i++) {
+ if ((strcmp(entry->d_name,
+ arequal_config.ignored_directory[i]) == 0)) {
+ found = 1;
+ DBG("ignoring the entry %s\n", entry->d_name);
+ break;
+ }
+ }
+ if (found == 1) {
+ found = 0;
+ continue;
+ }
+ }
+ csum = checksum_path(entry->d_name);
+ checksum_dir ^= csum;
+ }
+
+ if (errno) {
+ perror(path);
+ goto out;
+ }
+
+out:
+ if (dirp)
+ closedir(dirp);
+
+ return 0;
+}
+
+int
+process_file(const char *path, const struct stat *sb)
+{
+ int ret = 0;
+
+ count_file++;
+
+ avg_uid_file ^= sb->st_uid;
+ avg_gid_file ^= sb->st_gid;
+ avg_mode_file ^= sb->st_mode;
+
+ ret = checksum_md5(path, sb);
+
+ return ret;
+}
+
+int
+process_dir(const char *path, const struct stat *sb)
+{
+ unsigned long long csum = 0;
+
+ count_dir++;
+
+ avg_uid_dir ^= sb->st_uid;
+ avg_gid_dir ^= sb->st_gid;
+ avg_mode_dir ^= sb->st_mode;
+
+ csum = checksum_filenames(path, sb);
+
+ checksum_dir ^= csum;
+
+ return 0;
+}
+
+int
+process_symlink(const char *path, const struct stat *sb)
+{
+ int ret = 0;
+ char buf[4096] = {
+ 0,
+ };
+ unsigned long long csum = 0;
+
+ count_symlink++;
+
+ avg_uid_symlink ^= sb->st_uid;
+ avg_gid_symlink ^= sb->st_gid;
+ avg_mode_symlink ^= sb->st_mode;
+
+ ret = readlink(path, buf, 4096);
+ if (ret < 0) {
+ perror(path);
+ goto out;
+ }
+
+ DBG("readlink (%s) => %s\n", path, buf);
+
+ csum = checksum_path(buf);
+
+ DBG("checksum_path (%s) => %llx\n", buf, csum);
+
+ checksum_symlink ^= csum;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+process_other(const char *path, const struct stat *sb)
+{
+ count_other++;
+
+ avg_uid_other ^= sb->st_uid;
+ avg_gid_other ^= sb->st_gid;
+ avg_mode_other ^= sb->st_mode;
+
+ checksum_other ^= sb->st_rdev;
+
+ return 0;
+}
+
+static int
+ignore_entry(const char *bname, const char *dname)
+{
+ int i;
+
+ for (i = 0; i < arequal_config.directories_ignored; i++) {
+ if (strcmp(bname, arequal_config.ignored_directory[i]) == 0 &&
+ strncmp(arequal_config.test_directory, dname,
+ strlen(arequal_config.test_directory)) == 0)
+ return 1;
+ }
+
+ return 0;
+}
+
+int
+process_entry(const char *path, const struct stat *sb, int typeflag,
+ struct FTW *ftwbuf)
+{
+ int ret = 0;
+ char *name = NULL;
+ char *bname = NULL;
+ char *dname = NULL;
+ int i = 0;
+
+ /* The if condition below helps in ignoring some directories in
+ the given path. If the name of the entry is one of the directory
+ names that the user told to ignore, then that directory will not
+ be processed and will return FTW_SKIP_SUBTREE to nftw which will
+ not crawl this directory and move on to other siblings.
+ Note that for nftw to recognize FTW_SKIP_TREE, FTW_ACTIONRETVAL
+ should be passed as an argument to nftw.
+
+ This mainly helps in calculating the checksum of network filesystems
+ (client-server), where the server might have some hidden directories
+ for managing the filesystem. So to calculate the sanity of filesystem
+ one has to get the checksum of the client and then the export directory
+ of server by telling arequal to ignore some of the directories which
+ are not part of the namespace.
+ */
+
+ if (arequal_config.ignored_directory) {
+#ifndef FTW_SKIP_SUBTREE
+ char *cp;
+
+ name = strdup(path);
+ dname = dirname(name);
+
+ for (cp = strtok(name, "/"); cp; cp = strtok(NULL, "/")) {
+ if (ignore_entry(cp, dname)) {
+ DBG("ignoring %s\n", path);
+ if (name)
+ free(name);
+ return 0;
+ }
+ }
+#else /* FTW_SKIP_SUBTREE */
+ name = strdup(path);
+
+ name[strlen(name)] = '\0';
+
+ bname = strrchr(name, '/');
+ if (bname)
+ bname++;
+
+ dname = dirname(name);
+ if (ignore_entry(bname, dname)) {
+ DBG("ignoring %s\n", bname);
+ ret = FTW_SKIP_SUBTREE;
+ if (name)
+ free(name);
+ return ret;
+ }
+#endif /* FTW_SKIP_SUBTREE */
+ }
+
+ DBG("processing entry %s\n", path);
+
+ switch ((S_IFMT & sb->st_mode)) {
+ case S_IFDIR:
+ ret = process_dir(path, sb);
+ break;
+ case S_IFREG:
+ ret = process_file(path, sb);
+ break;
+ case S_IFLNK:
+ ret = process_symlink(path, sb);
+ break;
+ default:
+ ret = process_other(path, sb);
+ break;
+ }
+
+ if (name)
+ free(name);
+ return ret;
+}
+
+int
+display_counts(FILE *fp)
+{
+ fprintf(fp, "\n");
+ fprintf(fp, "Entry counts\n");
+ fprintf(fp, "Regular files : %lld\n", count_file);
+ fprintf(fp, "Directories : %lld\n", count_dir);
+ fprintf(fp, "Symbolic links : %lld\n", count_symlink);
+ fprintf(fp, "Other : %lld\n", count_other);
+ fprintf(fp, "Total : %lld\n",
+ (count_file + count_dir + count_symlink + count_other));
+
+ return 0;
+}
+
+int
+display_checksums(FILE *fp)
+{
+ fprintf(fp, "\n");
+ fprintf(fp, "Checksums\n");
+ fprintf(fp, "Regular files : %llx%llx\n", checksum_file1, checksum_file2);
+ fprintf(fp, "Directories : %llx\n", checksum_dir);
+ fprintf(fp, "Symbolic links : %llx\n", checksum_symlink);
+ fprintf(fp, "Other : %llx\n", checksum_other);
+ fprintf(fp, "Total : %llx\n",
+ (checksum_file1 ^ checksum_file2 ^ checksum_dir ^ checksum_symlink ^
+ checksum_other));
+
+ return 0;
+}
+
+int
+display_metadata(FILE *fp)
+{
+ fprintf(fp, "\n");
+ fprintf(fp, "Metadata checksums\n");
+ fprintf(fp, "Regular files : %llx\n",
+ (avg_uid_file + 13) * (avg_gid_file + 11) * (avg_mode_file + 7));
+ fprintf(fp, "Directories : %llx\n",
+ (avg_uid_dir + 13) * (avg_gid_dir + 11) * (avg_mode_dir + 7));
+ fprintf(fp, "Symbolic links : %llx\n",
+ (avg_uid_symlink + 13) * (avg_gid_symlink + 11) *
+ (avg_mode_symlink + 7));
+ fprintf(fp, "Other : %llx\n",
+ (avg_uid_other + 13) * (avg_gid_other + 11) * (avg_mode_other + 7));
+
+ return 0;
+}
+
+int
+display_stats(FILE *fp)
+{
+ display_counts(fp);
+
+ display_metadata(fp);
+
+ display_checksums(fp);
+
+ return 0;
+}
+
+int
+main(int argc, char *argv[])
+{
+ int ret = 0;
+ int i = 0;
+
+ ret = argp_parse(&argp, argc, argv, 0, 0, NULL);
+ if (ret != 0) {
+ fprintf(stderr, "parsing arguments failed\n");
+ return -2;
+ }
+
+ /* Use FTW_ACTIONRETVAL to take decision on what to do depending upon */
+ /* the return value of the callback function */
+ /* (process_entry in this case) */
+ ret = nftw(arequal_config.test_directory, process_entry, 30,
+ FTW_ACTIONRETVAL | FTW_PHYS | FTW_MOUNT);
+ if (ret != 0) {
+ fprintf(stderr, "ftw (%s) returned %d (%s), terminating\n", argv[1],
+ ret, strerror(errno));
+ return 1;
+ }
+
+ display_stats(stdout);
+
+ if (arequal_config.ignored_directory) {
+ for (i = 0; i < arequal_config.directories_ignored; i++) {
+ if (arequal_config.ignored_directory[i])
+ free(arequal_config.ignored_directory[i]);
+ }
+ free(arequal_config.ignored_directory);
+ }
+
+ return 0;
+}
diff --git a/tests/utils/changelog/changelog.h b/tests/utils/changelog/changelog.h
new file mode 100644
index 00000000000..1502b689eb4
--- /dev/null
+++ b/tests/utils/changelog/changelog.h
@@ -0,0 +1,125 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GF_CHANGELOG_H
+#define _GF_CHANGELOG_H
+
+struct gf_brick_spec;
+
+/**
+ * Max bit shiter for event selection
+ */
+#define CHANGELOG_EV_SELECTION_RANGE 5
+
+#define CHANGELOG_OP_TYPE_JOURNAL (1 << 0)
+#define CHANGELOG_OP_TYPE_OPEN (1 << 1)
+#define CHANGELOG_OP_TYPE_CREATE (1 << 2)
+#define CHANGELOG_OP_TYPE_RELEASE (1 << 3)
+#define CHANGELOG_OP_TYPE_BR_RELEASE \
+ (1 << 4) /* logical release (last close()), \
+ sent by bitrot stub */
+#define CHANGELOG_OP_TYPE_MAX (1 << CHANGELOG_EV_SELECTION_RANGE)
+
+struct ev_open {
+ unsigned char gfid[16];
+ int32_t flags;
+};
+
+struct ev_creat {
+ unsigned char gfid[16];
+ int32_t flags;
+};
+
+struct ev_release {
+ unsigned char gfid[16];
+};
+
+struct ev_release_br {
+ unsigned long version;
+ unsigned char gfid[16];
+ int32_t sign_info;
+};
+
+struct ev_changelog {
+ char path[PATH_MAX];
+};
+
+typedef struct changelog_event {
+ unsigned int ev_type;
+
+ union {
+ struct ev_open open;
+ struct ev_creat create;
+ struct ev_release release;
+ struct ev_changelog journal;
+ struct ev_release_br releasebr;
+ } u;
+} changelog_event_t;
+
+#define CHANGELOG_EV_SIZE (sizeof(changelog_event_t))
+
+/**
+ * event callback, connected & disconnection defs
+ */
+typedef void(CALLBACK)(void *, char *, void *, changelog_event_t *);
+typedef void *(INIT)(void *, struct gf_brick_spec *);
+typedef void(FINI)(void *, char *, void *);
+typedef void(CONNECT)(void *, char *, void *);
+typedef void(DISCONNECT)(void *, char *, void *);
+
+struct gf_brick_spec {
+ char *brick_path;
+ unsigned int filter;
+
+ INIT *init;
+ FINI *fini;
+ CALLBACK *callback;
+ CONNECT *connected;
+ DISCONNECT *disconnected;
+
+ void *ptr;
+};
+
+/* API set */
+
+int
+gf_changelog_register(char *brick_path, char *scratch_dir, char *log_file,
+ int log_levl, int max_reconnects);
+ssize_t
+gf_changelog_scan();
+
+int
+gf_changelog_start_fresh();
+
+ssize_t
+gf_changelog_next_change(char *bufptr, size_t maxlen);
+
+int
+gf_changelog_done(char *file);
+
+/* newer flexible API */
+int
+gf_changelog_init(void *xl);
+
+int
+gf_changelog_register_generic(struct gf_brick_spec *bricks, int count,
+ int ordered, char *logfile, int lvl, void *xl);
+
+int
+gf_history_changelog(char *changelog_dir, unsigned long start,
+ unsigned long end, int n_parallel,
+ unsigned long *actual_end);
+int
+gf_history_changelog_scan();
+ssize_t
+gf_history_changelog_next_change(char *bufptr, size_t maxlen);
+int
+gf_history_changelog_done(char *file);
+#endif
diff --git a/tests/utils/changelog/get-history.c b/tests/utils/changelog/get-history.c
new file mode 100644
index 00000000000..9963ab76958
--- /dev/null
+++ b/tests/utils/changelog/get-history.c
@@ -0,0 +1,71 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/**
+ * get set of new changes every 10 seconds (just print the file names)
+ *
+ * Compile it using:
+ * gcc -o gethistory `pkg-config --cflags libgfchangelog` get-history.c \
+ * `pkg-config --libs libgfchangelog`
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/un.h>
+#include <limits.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include "changelog.h"
+
+int
+main(int argc, char **argv)
+{
+ int ret = 0;
+ unsigned long end_ts = 0;
+ int start = 0;
+ int end = 0;
+
+ ret = gf_changelog_init(NULL);
+ if (ret) {
+ printf("-1");
+ fflush(stdout);
+ return -1;
+ }
+
+ ret = gf_changelog_register("/d/backends/patchy0", "/tmp/scratch_v1",
+ "/var/log/glusterfs/changes.log", 9, 5);
+ if (ret) {
+ printf("-2");
+ fflush(stdout);
+ return -1;
+ }
+
+ start = atoi(argv[1]);
+ end = atoi(argv[2]);
+
+ ret = gf_history_changelog("/d/backends/patchy0/.glusterfs/changelogs",
+ start, end, 3, &end_ts);
+ if (ret < 0) {
+ printf("-3");
+ fflush(stdout);
+ return -1;
+ } else if (ret == 1) {
+ printf("1");
+ fflush(stdout);
+ return 0;
+ }
+
+out:
+ printf("0");
+ fflush(stdout);
+ return 0;
+}
diff --git a/tests/utils/changelog/test-changelog-api.c b/tests/utils/changelog/test-changelog-api.c
new file mode 100644
index 00000000000..f4eb066b630
--- /dev/null
+++ b/tests/utils/changelog/test-changelog-api.c
@@ -0,0 +1,98 @@
+/*
+ Copyright (c) 2019 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/**
+ * get set of new changes every 5 seconds (just print the file names)
+ *
+ * Compile it using:
+ * gcc -o getchanges `pkg-config --cflags libgfchangelog` get-changes.c \
+ * `pkg-config --libs libgfchangelog`
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/un.h>
+#include <limits.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <errno.h>
+
+#include "changelog.h"
+
+int
+main(int argc, char **argv)
+{
+ int i = 0;
+ int ret = 0;
+ ssize_t nr_changes = 0;
+ ssize_t changes = 0;
+ char fbuf[PATH_MAX] = {
+ 0,
+ };
+
+ ret = gf_changelog_init(NULL);
+ if (ret) {
+ printf("-1");
+ fflush(stdout);
+ return -1;
+ }
+
+ /* get changes for brick "/d/backends/patchy0" */
+ ret = gf_changelog_register("/d/backends/patchy0", "/tmp/scratch_v1",
+ "/var/log/glusterfs/changes.log", 9, 5);
+ if (ret) {
+ printf("-2");
+ fflush(stdout);
+ return -1;
+ }
+
+ while (1) {
+ i = 0;
+ nr_changes = gf_changelog_scan();
+ if (nr_changes < 0) {
+ printf("-4");
+ fflush(stdout);
+ return -1;
+ }
+
+ if (nr_changes == 0)
+ goto next;
+
+ while ((changes = gf_changelog_next_change(fbuf, PATH_MAX)) > 0) {
+ /* process changelog */
+ /* ... */
+ /* ... */
+ /* ... */
+ /* done processing */
+
+ ret = gf_changelog_done(fbuf);
+ if (ret) {
+ printf("-5");
+ fflush(stdout);
+ return -1;
+ }
+ }
+
+ if (changes == -1) {
+ printf("-6");
+ fflush(stdout);
+ return -1;
+ }
+
+ next:
+ sleep(2);
+ }
+
+out:
+ printf("0");
+ fflush(stdout);
+ return ret;
+}
diff --git a/tests/utils/changelog/test-history-api.c b/tests/utils/changelog/test-history-api.c
new file mode 100644
index 00000000000..d78e387df10
--- /dev/null
+++ b/tests/utils/changelog/test-history-api.c
@@ -0,0 +1,111 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/**
+ * get set of new changes every 10 seconds (just print the file names)
+ *
+ * Compile it using:
+ * gcc -o gethistory `pkg-config --cflags libgfchangelog` get-history.c \
+ * `pkg-config --libs libgfchangelog`
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/un.h>
+#include <limits.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include "changelog.h"
+
+int
+main(int argc, char **argv)
+{
+ int ret = 0;
+ int i = 0;
+ unsigned long end_ts = 0;
+ ssize_t nr_changes = 0;
+ ssize_t changes = 0;
+ int start = 0;
+ int end = 0;
+ char fbuf[PATH_MAX] = {
+ 0,
+ };
+
+ ret = gf_changelog_init(NULL);
+ if (ret) {
+ printf("-1");
+ fflush(stdout);
+ return -1;
+ }
+
+ ret = gf_changelog_register("/d/backends/patchy0", "/tmp/scratch_v1",
+ "/var/log/glusterfs/changes.log", 9, 5);
+ if (ret) {
+ printf("-2");
+ fflush(stdout);
+ return -1;
+ }
+
+ start = atoi(argv[1]);
+ end = atoi(argv[2]);
+
+ ret = gf_history_changelog("/d/backends/patchy0/.glusterfs/changelogs",
+ start, end, 3, &end_ts);
+ if (ret < 0) {
+ printf("-3");
+ fflush(stdout);
+ return -1;
+ } else if (ret == 1) {
+ printf("1");
+ fflush(stdout);
+ return 0;
+ }
+
+ while (1) {
+ nr_changes = gf_history_changelog_scan();
+ if (nr_changes < 0) {
+ printf("-4");
+ fflush(stdout);
+ return -1;
+ }
+
+ if (nr_changes == 0) {
+ goto out;
+ }
+
+ while ((changes = gf_history_changelog_next_change(fbuf, PATH_MAX)) >
+ 0) {
+ /* process changelog */
+ /* ... */
+ /* ... */
+ /* ... */
+ /* done processing */
+
+ ret = gf_history_changelog_done(fbuf);
+ if (ret) {
+ printf("-5");
+ fflush(stdout);
+ return -1;
+ }
+ }
+ if (changes == -1) {
+ printf("-6");
+ fflush(stdout);
+ return -1;
+ }
+ }
+
+out:
+ printf("0");
+ fflush(stdout);
+ return 0;
+}
diff --git a/tests/utils/changelogparser.py b/tests/utils/changelogparser.py
new file mode 100644
index 00000000000..3b8f81d1bad
--- /dev/null
+++ b/tests/utils/changelogparser.py
@@ -0,0 +1,236 @@
+# -*- coding: utf-8 -*-
+"""
+Why?
+
+Converts this
+
+GlusterFS Changelog | version: v1.1 | encoding : 2
+E0b99ef11-4b79-4cd0-9730-b5a0e8c4a8c0^@4^@16877^@0^@0^@00000000-0000-0000-0000-
+000000000001/dir1^@Ec5250af6-720e-4bfe-b938-827614304f39^@23^@33188^@0^@0^@0b99
+ef11-4b79-4cd0-9730-b5a0e8c4a8c0/hello.txt^@Dc5250af6-720e-4bfe-b938-827614304f
+39^@Dc5250af6-720e-4bfe-b938-827614304f39^@
+
+
+to human readable :)
+
+E 0b99ef11-4b79-4cd0-9730-b5a0e8c4a8c0 MKDIR 16877 0 000000000-0000-0000-0000
+ -000000000001/dir1
+E c5250af6-720e-4bfe-b938-827614304f39 CREATE 33188 0 0 0b99ef11-4b79-4cd0-9730
+ -b5a0e8c4a8c0/hello.txt
+D c5250af6-720e-4bfe-b938-827614304f39
+D c5250af6-720e-4bfe-b938-827614304f39
+
+
+"""
+import sys
+import codecs
+
+ENTRY = 'E'
+META = 'M'
+DATA = 'D'
+SEP = "\x00"
+
+GF_FOP = [
+ "NULL", "STAT", "READLINK", "MKNOD", "MKDIR", "UNLINK",
+ "RMDIR", "SYMLINK", "RENAME", "LINK", "TRUNCATE", "OPEN",
+ "READ", "WRITE", "STATFS", "FLUSH", "FSYNC", "SETXATTR",
+ "GETXATTR", "REMOVEXATTR", "OPENDIR", "FSYNCDIR", "ACCESS",
+ "CREATE", "FTRUNCATE", "FSTAT", "LK", "LOOKUP", "READDIR",
+ "INODELK", "FINODELK", "ENTRYLK", "FENTRYLK", "XATTROP",
+ "FXATTROP", "FSETXATTR", "FGETXATTR", "RCHECKSUM", "SETATTR",
+ "FSETATTR", "READDIRP", "GETSPEC", "FORGET", "RELEASE",
+ "RELEASEDIR", "FREMOVEXATTR", "FALLOCATE", "DISCARD", "ZEROFILL"]
+
+
+class NumTokens_V11(object):
+ E = 7
+ M = 3
+ D = 2
+ NULL = 3
+ MKNOD = 7
+ MKDIR = 7
+ UNLINK = 4
+ RMDIR = 4
+ SYMLINK = 4
+ RENAME = 5
+ LINK = 4
+ SETXATTR = 3
+ REMOVEXATTR = 3
+ CREATE = 7
+ SETATTR = 3
+ FTRUNCATE = 3
+ FXATTROP = 3
+
+
+class NumTokens_V12(NumTokens_V11):
+ UNLINK = 5
+ RMDIR = 5
+
+
+class Version:
+ V11 = "v1.1"
+ V12 = "v1.2"
+
+
+class Record(object):
+ def __init__(self, **kwargs):
+ self.ts = kwargs.get("ts", None)
+ self.fop_type = kwargs.get("fop_type", None)
+ self.gfid = kwargs.get("gfid", None)
+ self.path = kwargs.get("path", None)
+ self.fop = kwargs.get("fop", None)
+ self.path1 = kwargs.get("path1", None)
+ self.path2 = kwargs.get("path2", None)
+ self.mode = kwargs.get("mode", None)
+ self.uid = kwargs.get("uid", None)
+ self.gid = kwargs.get("gid", None)
+
+ def create_mknod_mkdir(self, **kwargs):
+ self.path = kwargs.get("path", None)
+ self.fop = kwargs.get("fop", None)
+ self.mode = kwargs.get("mode", None)
+ self.uid = kwargs.get("uid", None)
+ self.gid = kwargs.get("gid", None)
+
+ def metadata(self, **kwargs):
+ self.fop = kwargs.get("fop", None)
+
+ def rename(self, **kwargs):
+ self.fop = kwargs.get("fop", None)
+ self.path1 = kwargs.get("path1", None)
+ self.path2 = kwargs.get("path2", None)
+
+ def link_symlink_unlink_rmdir(self, **kwargs):
+ self.path = kwargs.get("path", None)
+ self.fop = kwargs.get("fop", None)
+
+ def __unicode__(self):
+ if self.fop_type == "D":
+ return u"{ts} {fop_type} {gfid}".format(**self.__dict__)
+ elif self.fop_type == "M":
+ return u"{ts} {fop_type} {gfid} {fop}".format(**self.__dict__)
+ elif self.fop_type == "E":
+ if self.fop in ["CREATE", "MKNOD", "MKDIR"]:
+ return (u"{ts} {fop_type} {gfid} {fop} "
+ u"{path} {mode} {uid} {gid}".format(**self.__dict__))
+ elif self.fop == "RENAME":
+ return (u"{ts} {fop_type} {gfid} {fop} "
+ u"{path1} {path2}".format(**self.__dict__))
+ elif self.fop in ["LINK", "SYMLINK", "UNLINK", "RMDIR"]:
+ return (u"{ts} {fop_type} {gfid} {fop} "
+ u"{path}".format(**self.__dict__))
+ else:
+ return repr(self.__dict__)
+ else:
+ return repr(self.__dict__)
+
+ def __str__(self):
+ if sys.version_info >= (3,):
+ return self.__unicode__()
+ else:
+ return unicode(self).encode('utf-8')
+
+
+def get_num_tokens(data, tokens, version=Version.V11):
+ if version == Version.V11:
+ cls_numtokens = NumTokens_V11
+ elif version == Version.V12:
+ cls_numtokens = NumTokens_V12
+ else:
+ sys.stderr.write("Unknown Changelog Version\n")
+ sys.exit(1)
+
+ if data[tokens[0]] in [ENTRY, META]:
+ if len(tokens) >= 3:
+ return getattr(cls_numtokens, GF_FOP[int(data[tokens[2]])])
+ else:
+ return None
+ else:
+ return getattr(cls_numtokens, data[tokens[0]])
+
+
+def process_record(data, tokens, changelog_ts, callback):
+ if data[tokens[0]] in [ENTRY, META]:
+ try:
+ tokens[2] = GF_FOP[int(data[tokens[2]])]
+ except ValueError:
+ tokens[2] = "NULL"
+
+ if not changelog_ts:
+ ts1 = int(changelog_ts)
+ else:
+ ts1=""
+ record = Record(ts=ts1, fop_type=data[tokens[0]],
+ gfid=data[tokens[1]])
+ if data[tokens[0]] == META:
+ record.metadata(fop=tokens[2])
+ elif data[tokens[0]] == ENTRY:
+ if tokens[2] in ["CREATE", "MKNOD", "MKDIR"]:
+ record.create_mknod_mkdir(fop=tokens[2],
+ path=data[tokens[6]],
+ mode=int(data[tokens[3]]),
+ uid=int(data[tokens[4]]),
+ gid=int(data[tokens[5]]))
+ elif tokens[2] == "RENAME":
+ record.rename(fop=tokens[2],
+ path1=data[tokens[3]],
+ path2=data[tokens[4]])
+ if tokens[2] in ["LINK", "SYMLINK", "UNLINK", "RMDIR"]:
+ record.link_symlink_unlink_rmdir(fop=tokens[2],
+ path=data[tokens[3]])
+ callback(record)
+
+
+def default_callback(record):
+ sys.stdout.write(u"{0}\n".format(record))
+
+
+def parse(filename, callback=default_callback):
+ data = None
+ tokens = []
+ changelog_ts = filename.rsplit(".")[-1]
+ with codecs.open(filename, mode="rb", encoding="utf-8") as f:
+ # GlusterFS Changelog | version: v1.1 | encoding : 2
+ header = f.readline()
+ version = header.split()[4]
+
+ data = f.readline()
+
+ slice_start = 0
+ in_record = False
+
+ prev_char = ""
+ next_char = ""
+ for i, c in enumerate(data):
+ next_char = ""
+ if len(data) >= (i + 2):
+ next_char = data[i+1]
+
+ if not in_record and c in [ENTRY, META, DATA]:
+ tokens.append(slice(slice_start, i+1))
+ slice_start = i+1
+ in_record = True
+ continue
+
+ if c == SEP and ((prev_char != SEP and next_char == SEP) or
+ (prev_char == SEP and next_char != SEP) or
+ (prev_char != SEP and next_char != SEP)):
+ tokens.append(slice(slice_start, i))
+ slice_start = i+1
+
+ num_tokens = get_num_tokens(data, tokens, version)
+
+ if num_tokens == len(tokens):
+ process_record(data, tokens, changelog_ts, callback)
+ in_record = False
+ tokens = []
+
+ prev_char = c
+
+ # process last record
+ if slice_start < (len(data) - 1):
+ tokens.append(slice(slice_start, len(data)))
+ process_record(data, tokens, changelog_ts, callback)
+ tokens = []
+
+parse(sys.argv[1])
diff --git a/tests/utils/create-files.py b/tests/utils/create-files.py
new file mode 100755
index 00000000000..04736e9c73b
--- /dev/null
+++ b/tests/utils/create-files.py
@@ -0,0 +1,593 @@
+
+# This script was developed by Vijaykumar Koppad (vkoppad@redhat.com)
+# The latest version of this script can found at
+# http://github.com/vijaykumar-koppad/crefi
+
+from __future__ import with_statement
+import os
+import re
+import sys
+import time
+import errno
+import xattr
+import string
+import random
+import logging
+import tarfile
+import argparse
+
+datsiz = 0
+timr = 0
+
+def get_ascii_upper_alpha_digits():
+ if sys.version_info > (3,0):
+ return string.ascii_uppercase+string.digits
+ else:
+ return string.uppercase+string.digits
+
+def setLogger(filename):
+ global logger
+ logger = logging.getLogger(filename)
+ logger.setLevel(logging.DEBUG)
+ return
+
+
+def setupLogger(filename):
+ logger = logging.getLogger(filename)
+ logger.setLevel(logging.DEBUG)
+ formatter = logging.Formatter('%(asctime)s - %(message)s')
+ ch = logging.StreamHandler()
+ ch.setLevel(logging.INFO)
+ ch.setFormatter(formatter)
+ logger.addHandler(ch)
+ return logger
+
+
+def os_rd(src, size):
+ global datsiz
+ fd = os.open(src, os.O_RDONLY)
+ data = os.read(fd, size)
+ os.close(fd)
+ datsiz = datsiz + size
+ return data
+
+
+def os_wr(dest, data):
+ global timr
+ st = time.time()
+ fd = os.open(dest, os.O_WRONLY | os.O_CREAT | os.O_EXCL, 0o644)
+ os.write(fd, data)
+ os.close(fd)
+ ed = time.time()
+ timr = timr+(ed-st)
+ return
+
+
+def create_sparse_file(fil, size, mins, maxs, rand):
+ if rand:
+ size = random.randint(mins, maxs)
+ else:
+ size = size
+ data = os_rd("/dev/zero", size)
+ os_wr(fil, data)
+ return
+
+
+def create_binary_file(fil, size, mins, maxs, rand):
+ if rand:
+ size = random.randint(mins, maxs)
+ else:
+ size = size
+ data = os_rd("/dev/urandom", size)
+ os_wr(fil, data)
+ return
+
+
+def create_txt_file(fil, size, mins, maxs, rand):
+ if rand:
+ size = random.randint(mins, maxs)
+ if size < 500*1024:
+ data = os_rd("/etc/services", size)
+ os_wr(fil, data)
+ else:
+ data = os_rd("/etc/services", 512*1024)
+ file_size = 0
+ fd = os.open(fil, os.O_WRONLY | os.O_CREAT | os.O_EXCL, 0o644)
+ while file_size < size:
+ os.write(fd, data)
+ file_size += 500*1024
+ os.close(fd)
+ return
+
+
+def create_tar_file(fil, size, mins, maxs, rand):
+ if rand:
+ size = random.randint(mins, maxs)
+ else:
+ size = size
+ data = os_rd("/dev/urandom", size)
+ os_wr(fil, data)
+ tar = tarfile.open(fil+".tar.gz", "w:gz")
+ tar.add(fil)
+ tar.close()
+ os.unlink(fil)
+ return
+
+
+def get_filename(flen):
+ size = flen
+ char = get_ascii_upper_alpha_digits()
+ st = ''.join(random.choice(char) for i in range(size))
+ ti = str((hex(int(str(time.time()).split('.')[0])))[2:])
+ return ti+"%%"+st
+
+
+def text_files(files, file_count, inter, size, mins, maxs, rand,
+ flen, randname, dir_path):
+ global datsiz, timr
+ for k in range(files):
+ if not file_count % inter:
+ logger.info("Total files created -- "+str(file_count))
+ if not randname:
+ fil = dir_path+"/"+"file"+str(k)
+ else:
+ fil = dir_path+"/"+get_filename(flen)
+ create_txt_file(fil, size, mins, maxs, rand)
+ file_count += 1
+ return file_count
+
+
+def sparse_files(files, file_count, inter, size, mins, maxs,
+ rand, flen, randname, dir_path):
+ for k in range(files):
+ if not file_count % inter:
+ logger.info("Total files created -- "+str(file_count))
+ if not randname:
+ fil = dir_path+"/"+"file"+str(k)
+ else:
+ fil = dir_path+"/"+get_filename(flen)
+ create_sparse_file(fil, size, mins, maxs, rand)
+ file_count += 1
+ return file_count
+
+
+def binary_files(files, file_count, inter, size, mins, maxs,
+ rand, flen, randname, dir_path):
+ for k in range(files):
+ if not file_count % inter:
+ logger.info("Total files created -- "+str(file_count))
+ if not randname:
+ fil = dir_path+"/"+"file"+str(k)
+ else:
+ fil = dir_path+"/"+get_filename(flen)
+ create_binary_file(fil, size, mins, maxs, rand)
+ file_count += 1
+ return file_count
+
+
+def tar_files(files, file_count, inter, size, mins, maxs,
+ rand, flen, randname, dir_path):
+ for k in range(files):
+ if not file_count % inter:
+ logger.info("Total files created -- "+str(file_count))
+ if not randname:
+ fil = dir_path+"/"+"file"+str(k)
+ else:
+ fil = dir_path+"/"+get_filename(flen)
+ create_tar_file(fil, size, mins, maxs, rand)
+ file_count += 1
+ return file_count
+
+
+def setxattr_files(files, randname, dir_path):
+ char = get_ascii_upper_alpha_digits()
+ if not randname:
+ for k in range(files):
+ v = ''.join(random.choice(char) for i in range(10))
+ n = "user."+v
+ xattr.setxattr(dir_path+"/"+"file"+str(k), n, v)
+ else:
+ dirs = os.listdir(dir_path+"/")
+ for fil in dirs:
+ v = ''.join(random.choice(char) for i in range(10))
+ n = "user."+v
+ xattr.setxattr(dir_path+"/"+fil, n, v)
+ return
+
+
+def rename_files(files, flen, randname, dir_path):
+ if not randname:
+ for k in range(files):
+ os.rename(dir_path + "/" + "file" + str(k),
+ dir_path + "/" + "file" + str(files+k))
+ else:
+ dirs = os.listdir(dir_path)
+ for fil in dirs:
+ if not os.path.isdir(fil):
+ newfil = get_filename(flen)
+ os.rename(dir_path + "/" + fil,
+ dir_path + "/" + newfil)
+ return
+
+
+def truncate_files(files, mins, maxs, randname, dir_path):
+ if not randname:
+ for k in range(files):
+ byts = random.randint(mins, maxs)
+ fd = os.open(dir_path + "/" + "file" + str(k), os.O_WRONLY)
+ os.ftruncate(fd, byts)
+ os.close(fd)
+ else:
+ dirs = os.listdir(dir_path)
+ for fil in dirs:
+ if not os.path.isdir(dir_path+"/"+fil):
+ byts = random.randint(mins, maxs)
+ fd = os.open(dir_path+"/"+fil, os.O_WRONLY)
+ os.ftruncate(fd, byts)
+ os.close(fd)
+ return
+
+
+def chmod_files(files, flen, randname, dir_path):
+ if not randname:
+ for k in range(files):
+ mod = random.randint(0, 511)
+ os.chmod(dir_path+"/"+"file"+str(k), mod)
+ else:
+ dirs = os.listdir(dir_path)
+ for fil in dirs:
+ mod = random.randint(0, 511)
+ os.chmod(dir_path+"/"+fil, mod)
+ return
+
+def random_og(path):
+ u = random.randint(1025, 65536)
+ g = -1
+ os.chown(path, u, g)
+
+def chown_files(files, flen, randname, dir_path):
+ if not randname:
+ for k in range(files):
+ random_og(dir_path+"/"+"file"+str(k))
+ else:
+ dirs = os.listdir(dir_path)
+ for fil in dirs:
+ random_og(dir_path+"/"+fil)
+ return
+
+
+def chgrp_files(files, flen, randname, dir_path):
+ if not randname:
+ for k in range(files):
+ random_og(dir_path+"/"+"file"+str(k))
+ else:
+ dirs = os.listdir(dir_path)
+ for fil in dirs:
+ random_og(dir_path+"/"+fil)
+ return
+
+
+def symlink_files(files, flen, randname, dir_path):
+ try:
+ os.makedirs(dir_path+"/"+"symlink_to_files")
+ except OSError as ex:
+ if ex.errno is not errno.EEXIST:
+ raise
+ if not randname:
+ for k in range(files):
+ src_file = "file"+str(k)
+ os.symlink(dir_path+"/"+src_file,
+ dir_path+"/"+"symlink_to_files/file"+str(k)+"_sym")
+ else:
+ dirs = os.listdir(dir_path)
+ for fil in dirs:
+ newfil = get_filename(flen)
+ os.symlink(dir_path+"/"+fil,
+ dir_path+"/"+"symlink_to_files/"+newfil)
+ return
+
+
+def hardlink_files(files, flen, randname, dir_path):
+ try:
+ os.makedirs(dir_path+"/"+"hardlink_to_files")
+ except OSError as ex:
+ if ex.errno is not errno.EEXIST:
+ raise
+ if not randname:
+ for k in range(files):
+ src_file = "file"+str(k)
+ os.link(dir_path+"/"+src_file,
+ dir_path+"/"+"hardlink_to_files/file"+str(k)+"_hard")
+ else:
+ dirs = os.listdir(dir_path)
+ for fil in dirs:
+ if not os.path.isdir(dir_path+"/"+fil):
+ newfil = get_filename(flen)
+ os.link(dir_path+"/"+fil,
+ dir_path+"/"+"hardlink_to_files/"+newfil)
+ return
+
+
+def human2bytes(size):
+ size_short = {
+ 1024: ['K', 'KB', 'KiB', 'k', 'kB', 'kiB'],
+ 1024*1024: ['M', 'MB', 'MiB'],
+ 1024*1024*1024: ['G', 'GB', 'GiB']
+ }
+ num = re.search('(\d+)', size).group()
+ ext = size[len(num):]
+ num = int(num)
+ if ext == '':
+ return num
+ for value, keys in size_short.items():
+ if ext in keys:
+ size = num*value
+ return size
+
+
+def bytes2human(byts):
+ abbr = {
+ 1 << 30: "GB",
+ 1 << 20: "MB",
+ 1 << 10: "KB",
+ 1: "bytes"
+ }
+ if byts == 1:
+ return '1 bytes'
+ for factor, suffix in abbr.items():
+ if byts >= factor:
+ break
+ return "%.3f %s" % (byts / factor, suffix)
+
+
+def multipledir(mnt_pnt, brdth, depth, files, fop, file_type="text",
+ inter="1000", size="100K", mins="10K", maxs="500K",
+ rand=False, l=10, randname=False):
+ files_count = 1
+ size = human2bytes(size)
+ maxs = human2bytes(maxs)
+ mins = human2bytes(mins)
+ for i in range(brdth):
+ dir_path = mnt_pnt
+ for j in range(depth):
+ dir_path = dir_path+"/"+"level"+str(j)+str(i)
+ try:
+ os.makedirs(dir_path)
+ except OSError as ex:
+ if ex.errno is not errno.EEXIST:
+ raise
+
+ if fop == "create":
+ logger.info("Entering the directory level"+str(j)+str(i))
+ if file_type == "text":
+ files_count = text_files(files, files_count, inter, size,
+ mins, maxs, rand, l, randname,
+ dir_path)
+ elif file_type == "sparse":
+ files_count = sparse_files(files, files_count, inter, size,
+ mins, maxs, rand, l, randname,
+ dir_path)
+ elif file_type == "binary":
+ files_count = binary_files(files, files_count, inter, size,
+ mins, maxs, rand, l, randname,
+ dir_path)
+ elif file_type == "tar":
+ files_count = tar_files(files, files_count, inter, size,
+ mins, maxs, rand, l, randname,
+ dir_path)
+ else:
+ logger.error("Not a valid file type")
+ sys.exit(1)
+
+ elif fop == "rename":
+ logger.info("Started renaming files for the files 0 to " +
+ str(files)+" in the directory level"+str(j) +
+ str(i)+" ...")
+ rename_files(files, l, randname, dir_path)
+ logger.info("Finished renaming files for the files 0 to " +
+ str(files)+" in the directory level"+str(j)+str(i))
+
+ elif fop == "chmod":
+ logger.info("Started changing permission of files for the " +
+ "files 0 to "+str(files)+" in the directory level"
+ + str(j)+str(i)+" ...")
+ chmod_files(files, l, randname, dir_path)
+ logger.info("Finished changing permission of files for " +
+ "the files 0 to "+str(files) +
+ " in the directory level"+str(j)+str(i))
+
+ elif fop == "chown":
+ logger.info("Started changing ownership of files for the " +
+ "files 0 to " + str(files) +
+ " in the directory level"+str(j)+str(i)+" ...")
+ chown_files(files, l, randname, dir_path)
+ logger.info("Finished changing ownership of files for " +
+ "the files 0 to "+str(files) +
+ " in the directory level"+str(j)+str(i))
+
+ elif fop == "chgrp":
+ logger.info("Started changing group ownership of files for " +
+ "the files 0 to " + str(files) +
+ " in the directory level"+str(j)+str(i)+" ...")
+ chgrp_files(files, l, randname, dir_path)
+ logger.info("Finished changing group ownership of files for " +
+ "the files 0 to "+str(files) +
+ " in the directory level"+str(j)+str(i))
+
+ elif fop == "symlink":
+ logger.info("Started creating symlink to the files 0 to " +
+ str(files)+" in the directory level" +
+ str(j)+str(i)+"...")
+ symlink_files(files, l, randname, dir_path)
+ logger.info("Finished creating symlink to the files 0 to " +
+ str(files) + " in the directory level" +
+ str(j)+str(i))
+
+ elif fop == "hardlink":
+ logger.info("Started creating hardlink to the files 0 to " +
+ str(files)+" in the directory level" +
+ str(j)+str(i)+"...")
+ hardlink_files(files, l, randname, dir_path)
+ logger.info("Finished creating hardlink to the files 0 to " +
+ str(files) + " in the directory level" +
+ str(j)+str(i))
+
+ elif fop == "truncate":
+ logger.info("Started truncating the files 0 to " +
+ str(files)+" in the directory level" +
+ str(j)+str(i)+"...")
+ truncate_files(files, mins, maxs, randname, dir_path)
+ logger.info("Finished truncating the files 0 to " +
+ str(files)+" in the directory level" +
+ str(j)+str(i))
+
+ elif fop == "setxattr":
+ logger.info("Started setxattr to the files 0 to " +
+ str(files)+" in the directory level" +
+ str(j)+str(i)+"...")
+ setxattr_files(files, randname, dir_path)
+ logger.info("Finished setxattr to the files 0 to " +
+ str(files)+" in the directory level" +
+ str(j)+str(i))
+
+ if fop == "create":
+ thrpt = datsiz / timr
+ logger.info("finished creating files with throughput ---- " +
+ bytes2human(thrpt)+"ps")
+
+
+def singledir(mnt_pnt, files, fop, file_type="text", inter="1000", size="100K",
+ mins="10K", maxs="500K", rand=False, l=10, randname=False):
+
+ files_count = 1
+ size = human2bytes(size)
+ maxs = human2bytes(maxs)
+ mins = human2bytes(mins)
+ if fop == "create":
+ if file_type == "text":
+ files_count = text_files(files, files_count, inter, size, mins,
+ maxs, rand, l, randname, mnt_pnt)
+ elif file_type == "sparse":
+ files_count = sparse_files(files, files_count, inter, size, mins,
+ maxs, rand, l, randname, mnt_pnt)
+ elif file_type == "binary":
+ files_count = binary_files(files, files_count, inter, size, mins,
+ maxs, rand, l, randname, mnt_pnt)
+ elif file_type == "tar":
+ files_count = tar_files(files, files_count, inter, size, mins,
+ maxs, rand, l, randname, mnt_pnt)
+ else:
+ logger.info("Not a valid file type")
+ sys.exit(1)
+ thrpt = datsiz / timr
+ logger.info("finished creating files with avg throughput ---- " +
+ bytes2human(thrpt)+"ps")
+
+ elif fop == "rename":
+ logger.info("Started renaming files for the files 0 to " +
+ str(files) + "...")
+ rename_files(files, l, randname, mnt_pnt)
+ logger.info("Finished renaming files for the files 0 to "+str(files))
+
+ elif fop == "chmod":
+ logger.info("Started changing permission for the files 0 to " +
+ str(files)+" ...")
+ chmod_files(files, l, randname, mnt_pnt)
+ logger.info("Finished changing permission files for the files 0 to " +
+ str(files))
+
+ elif fop == "chown":
+ logger.info("Started changing ownership for the files 0 to " +
+ str(files)+"...")
+ chown_files(files, l, randname, mnt_pnt)
+ logger.info("Finished changing ownership for the files 0 to " +
+ str(files))
+
+ elif fop == "chgrp":
+ logger.info("Started changing group ownership for the files 0 to " +
+ str(files)+"...")
+ chgrp_files(files, l, randname, mnt_pnt)
+ logger.info("Finished changing group ownership for the files 0 to " +
+ str(files))
+
+ elif fop == "symlink":
+ logger.info("Started creating symlink to the files 0 to " +
+ str(files)+"...")
+ symlink_files(files, l, randname, mnt_pnt)
+ logger.info("Finished creating symlink to the files 0 to " +
+ str(files))
+
+ elif fop == "hardlink":
+ logger.info("Started creating hardlink to the files 0 to " +
+ str(files)+"...")
+ hardlink_files(files, l, randname, mnt_pnt)
+ logger.info("Finished creating hardlink to the files 0 to " +
+ str(files))
+
+ elif fop == "truncate":
+ logger.info("Started truncating the files 0 to " + str(files)+"...")
+ truncate_files(files, mins, maxs, randname, mnt_pnt)
+ logger.info("Finished truncating the files 0 to " + str(files))
+
+ elif fop == "setxattr":
+ logger.info("Started setxattr to the files 0 to " + str(files)+"...")
+ setxattr_files(files, randname, mnt_pnt)
+ logger.info("Finished setxattr to the files 0 to " + str(files))
+
+
+if __name__ == '__main__':
+ usage = "usage: %prog [option] <MNT_PT>"
+ parser = argparse.ArgumentParser(formatter_class=argparse.
+ ArgumentDefaultsHelpFormatter)
+ parser.add_argument("-n", dest="files", type=int, default=100,
+ help="number of files in each level ")
+ parser.add_argument("--size", action="store", default="100k",
+ help="size of the files to be used ")
+ parser.add_argument("--random", action="store_true", default=False,
+ help="random size of the file between --min and --max")
+ parser.add_argument("--max", action="store", default="500K",
+ help="maximum size of the files, if random is True")
+ parser.add_argument("--min", action="store", default="10K",
+ help="minimum size of the files, if random is True")
+ parser.add_argument("--single", action="store_true", dest="dir",
+ default=True, help="create files in single directory")
+ parser.add_argument("--multi", action="store_false", dest="dir",
+ help="create files in multiple directories")
+ parser.add_argument("-b", dest="brdth", type=int, default=5,
+ help="number of directories in one level(works " +
+ "with --multi) ")
+ parser.add_argument("-d", dest="depth", type=int, default=5,
+ help="number of levels of directories (works " +
+ "with --multi) ")
+ parser.add_argument("-l", dest="flen", type=int, default=10,
+ help="number of bytes for filename ( Used only when " +
+ "randname is enabled) ")
+ parser.add_argument("-t", action="store", dest="file_type",
+ default="text", choices=["text", "sparse", "binary",
+ "tar"],
+ help="type of the file to be created ()")
+ parser.add_argument("-I", dest="inter", type=int, default=100,
+ help="print number files created of interval")
+ parser.add_argument("--fop", action="store", dest="fop", default="create",
+ choices=["create", "rename", "chmod", "chown", "chgrp",
+ "symlink", "hardlink", "truncate",
+ "setxattr"],
+ help="fop to be performed on the files")
+ parser.add_argument("-R", dest="randname", action="store_false",
+ default=True, help="To disable random file name " +
+ "(default: Enabled)")
+ parser.add_argument("mntpnt", help="Mount point")
+
+ args = parser.parse_args()
+ logger = setupLogger("testlost")
+ args.mntpnt = os.path.abspath(args.mntpnt)
+
+ if args.dir:
+ singledir(args.mntpnt, args.files, args.fop, args.file_type,
+ args.inter, args.size, args.min, args.max,
+ args.random, args.flen, args.randname)
+ else:
+ multipledir(args.mntpnt, args.brdth, args.depth, args.files,
+ args.fop, args.file_type, args.inter, args.size,
+ args.min, args.max, args.random, args.flen,
+ args.randname)
diff --git a/tests/utils/get-mdata-xattr.c b/tests/utils/get-mdata-xattr.c
new file mode 100644
index 00000000000..e9f54717263
--- /dev/null
+++ b/tests/utils/get-mdata-xattr.c
@@ -0,0 +1,152 @@
+/*
+ Copyright (c) 2019 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <stdlib.h>
+#include <endian.h>
+#include <stdio.h>
+#include <time.h>
+#include <string.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <sys/xattr.h>
+#include <errno.h>
+
+typedef struct gf_timespec_disk {
+ uint64_t tv_sec;
+ uint64_t tv_nsec;
+} gf_timespec_disk_t;
+
+/* posix_mdata_t on disk structure */
+typedef struct __attribute__((__packed__)) posix_mdata_disk {
+ /* version of structure, bumped up if any new member is added */
+ uint8_t version;
+ /* flags indicates valid fields in the structure */
+ uint64_t flags;
+ gf_timespec_disk_t ctime;
+ gf_timespec_disk_t mtime;
+ gf_timespec_disk_t atime;
+} posix_mdata_disk_t;
+
+/* In memory representation posix metadata xattr */
+typedef struct {
+ /* version of structure, bumped up if any new member is added */
+ uint8_t version;
+ /* flags indicates valid fields in the structure */
+ uint64_t flags;
+ struct timespec ctime;
+ struct timespec mtime;
+ struct timespec atime;
+} posix_mdata_t;
+
+#define GF_XATTR_MDATA_KEY "trusted.glusterfs.mdata"
+
+/* posix_mdata_from_disk converts posix_mdata_disk_t into host byte order
+ */
+static inline void
+posix_mdata_from_disk(posix_mdata_t *out, posix_mdata_disk_t *in)
+{
+ out->version = in->version;
+ out->flags = be64toh(in->flags);
+
+ out->ctime.tv_sec = be64toh(in->ctime.tv_sec);
+ out->ctime.tv_nsec = be64toh(in->ctime.tv_nsec);
+
+ out->mtime.tv_sec = be64toh(in->mtime.tv_sec);
+ out->mtime.tv_nsec = be64toh(in->mtime.tv_nsec);
+
+ out->atime.tv_sec = be64toh(in->atime.tv_sec);
+ out->atime.tv_nsec = be64toh(in->atime.tv_nsec);
+}
+
+/* posix_fetch_mdata_xattr fetches the posix_mdata_t from disk */
+static int
+posix_fetch_mdata_xattr(const char *real_path, posix_mdata_t *metadata)
+{
+ size_t size = -1;
+ char *value = NULL;
+ char gfid_str[64] = {0};
+
+ char *key = GF_XATTR_MDATA_KEY;
+
+ if (!metadata || !real_path) {
+ goto err;
+ }
+
+ /* Get size */
+ size = lgetxattr(real_path, key, NULL, 0);
+ if (size == -1) {
+ goto err;
+ }
+
+ value = calloc(size + 1, sizeof(char));
+ if (!value) {
+ goto err;
+ }
+
+ /* Get xattr value */
+ size = lgetxattr(real_path, key, value, size);
+ if (size == -1) {
+ goto err;
+ }
+ posix_mdata_from_disk(metadata, (posix_mdata_disk_t *)value);
+
+out:
+ if (value)
+ free(value);
+ return 0;
+err:
+ if (value)
+ free(value);
+ return -1;
+}
+
+int
+main(int argc, char *argv[])
+{
+ posix_mdata_t metadata;
+ uint64_t result;
+
+ if (argc != 3) {
+ /*
+ Usage: get_mdata_xattr -c|-m|-a <file-name>
+ where -c --> ctime
+ -m --> mtime
+ -a --> atime
+ */
+ printf("-1");
+ goto err;
+ }
+
+ if (posix_fetch_mdata_xattr(argv[2], &metadata)) {
+ printf("-1");
+ goto err;
+ }
+
+ switch (argv[1][1]) {
+ case 'c':
+ result = metadata.ctime.tv_sec;
+ break;
+ case 'm':
+ result = metadata.mtime.tv_sec;
+ break;
+ case 'a':
+ result = metadata.atime.tv_sec;
+ break;
+ default:
+ printf("-1");
+ goto err;
+ }
+ printf("%" PRIu64, result);
+ fflush(stdout);
+ return 0;
+err:
+ fflush(stdout);
+ return -1;
+}
diff --git a/tests/utils/getfattr.py b/tests/utils/getfattr.py
new file mode 100755
index 00000000000..3eb40e1c887
--- /dev/null
+++ b/tests/utils/getfattr.py
@@ -0,0 +1,133 @@
+
+from __future__ import print_function
+import os
+import sys
+from optparse import OptionParser
+
+import xattr
+
+def handle_textencoding(attr):
+ ### required for Python's handling of NULL strings.
+ attr_null_replace = (attr.encode('hex').decode('hex')).replace('\x00',
+ '\\000')
+ return attr_null_replace
+
+def getfattr(path, option):
+ attr = xattr.getxattr(path, option.name)
+ encoded_attr = attr
+
+ if option.encoding == "text":
+ ## special case handle it.
+ encoded_attr = handle_textencoding(attr)
+ else:
+ encoded_attr = attr.encode(option.encoding)
+
+ if option.onlyvalues:
+ print (encoded_attr)
+ return
+
+ print_getfattr (path, option, encoded_attr)
+ return
+
+def print_getfattr (path, option, encoded_attr=None):
+ if encoded_attr:
+ if option.encoding == "hex":
+ print(("%s=0x%s" % (option.name, encoded_attr)))
+ elif option.encoding == "base64":
+ print(("%s=0s%s" % (option.name, encoded_attr)))
+ else:
+ print(("%s=\"%s\"" % (option.name, encoded_attr)))
+ else:
+ print(option.name)
+
+ return
+
+def print_header (path, absnames):
+ if absnames:
+ print(("# file: %s" % path))
+ else:
+ print ("getfattr: Removing leading '/' from absolute path names")
+ print(("# file: %s" % path[1:]))
+
+if __name__ == '__main__':
+ usage = "usage: %prog [-n name|-d] [-e en] [-m pattern] path...."
+ parser = OptionParser(usage=usage)
+ parser.add_option("-n", action="store", dest="name", type="string",
+ help="Dump the value of the named extended attribute"
+ " extended attribute.")
+ parser.add_option("-d", action="store_true", dest="dump",
+ help="Dump the values of all extended attributes"
+ " associated with pathname.")
+ parser.add_option("-e", action="store", dest="encoding", type="string",
+ default="base64",
+ help="Encode values after retrieving"
+ " them. Valid values of [en] are `text`, `hex`,"
+ " and `base64`. Values encoded as text strings are"
+ " enclosed in double quotes (\"), while strings"
+ " encoded as hexadecimal and base64 are prefixed with"
+ " 0x and 0s, respectively.")
+ parser.add_option("-m", action="store", dest="pattern", type="string",
+ help="Only include attributes with names matching the"
+ " regular expression pattern. The default value for"
+ " pattern is \"^user\\.\", which includes all the"
+ " attributes in the user namespace. Specify \"-\" for"
+ " including all attributes. Refer to attr(5) for a more"
+ " detailed discussion of namespaces.")
+ parser.add_option("--absolute-names", action="store_true", dest="absnames",
+ help="Do not strip leading slash characters ('/')."
+ " The default behaviour is to strip leading slash characters.")
+ parser.add_option("--only-values", action="store_true", dest="onlyvalues",
+ help="Dump out the raw extended attribute value(s)"
+ " without encoding them.")
+
+ (option, args) = parser.parse_args()
+ if not args:
+ print ("Usage: getfattr [-hRLP] [-n name|-d] [-e en] [-m pattern]"
+ " path...")
+ print ("Try `getfattr --help' for more information.")
+ sys.exit(1)
+
+ if option.dump and option.name:
+ print ("-d and -n are mutually exclusive...")
+ sys.exit(1)
+
+ if option.pattern and option.name:
+ print ("-m and -n are mutually exclusive...")
+ sys.exit(1)
+
+ if option.encoding:
+ if (not (option.encoding.strip() == "hex" or
+ option.encoding.strip() == "base64" or
+ option.encoding.strip() == "text")):
+ print(("unrecognized encoding parameter... %s, please use"
+ " `text`, `base64` or `hex`" % option.encoding))
+ sys.exit(1)
+
+ args[0] = os.path.abspath(args[0])
+
+ if option.name:
+ print_header(args[0], option.absnames)
+ try:
+ getfattr(args[0], option)
+ except KeyError as err:
+ print(("Invalid key %s" % err))
+ sys.exit(1)
+ except IOError as err:
+ print (err)
+ sys.exit(1)
+
+ if option.pattern:
+ print_header(args[0], option.absnames)
+ try:
+ xattrs = xattr.listxattr(args[0])
+ for attr in xattrs:
+ if option.dump:
+ option.name = attr.encode('utf-8')
+ getfattr(args[0], option)
+ else:
+ option.name = attr.encode('utf-8')
+ print_getfattr(args[0], option, None)
+
+ except IOError as err:
+ print (err)
+ sys.exit(1)
diff --git a/tests/utils/gfid-access.py b/tests/utils/gfid-access.py
new file mode 100755
index 00000000000..c35c1223df6
--- /dev/null
+++ b/tests/utils/gfid-access.py
@@ -0,0 +1,124 @@
+#
+# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+from __future__ import print_function
+import os
+import sys
+import stat
+import time
+import struct
+import random
+import libcxattr
+
+from errno import EEXIST
+
+Xattr = libcxattr.Xattr()
+
+def umask():
+ return os.umask(0)
+
+def _fmt_mknod(l):
+ return "!II%dsI%dsIII" % (37, l+1)
+
+def _fmt_mkdir(l):
+ return "!II%dsI%dsII" % (37, l+1)
+
+def _fmt_symlink(l1, l2):
+ return "!II%dsI%ds%ds" % (37, l1+1, l2+1)
+
+
+if sys.version_info > (3,):
+ def entry_pack_reg(gf, bn, mo, uid, gid):
+ bn_encoded = bn.encode()
+ blen = len(bn_encoded)
+ return struct.pack(_fmt_mknod(blen),
+ uid, gid, gf.encode(), mo, bn_encoded,
+ stat.S_IMODE(mo), 0, umask())
+
+ # mkdir
+ def entry_pack_dir(gf, bn, mo, uid, gid):
+ bn_encoded = bn.encode()
+ blen = len(bn_encoded)
+ return struct.pack(_fmt_mkdir(blen),
+ uid, gid, gf.encode(), mo, bn_encoded,
+ stat.S_IMODE(mo), umask())
+ # symlink
+ def entry_pack_symlink(gf, bn, lnk, st):
+ bn_encoded = bn.encode()
+ blen = len(bn_encoded)
+ lnk_encoded = lnk.encode()
+ llen = len(lnk_encoded)
+ return struct.pack(_fmt_symlink(blen, llen),
+ st['uid'], st['gid'],
+ gf.encode(), st['mode'], bn_encoded,
+ lnk_encoded)
+
+else:
+ def entry_pack_reg(gf, bn, mo, uid, gid):
+ blen = len(bn)
+ return struct.pack(_fmt_mknod(blen),
+ uid, gid, gf, mo, bn,
+ stat.S_IMODE(mo), 0, umask())
+
+ def entry_pack_dir(gf, bn, mo, uid, gid):
+ blen = len(bn)
+ return struct.pack(_fmt_mkdir(blen),
+ uid, gid, gf, mo, bn,
+ stat.S_IMODE(mo), umask())
+
+ def entry_pack_symlink(gf, bn, lnk, mo, uid, gid):
+ blen = len(bn)
+ llen = len(lnk)
+ return struct.pack(_fmt_symlink(blen, llen),
+ uid, gid, gf, mo, bn, lnk)
+
+if __name__ == '__main__':
+ if len(sys.argv) < 9:
+ print(("USAGE: %s <mount> <pargfid|ROOT> <filename> <GFID> <file type>"
+ " <uid> <gid> <file permission(octal str)>" % (sys.argv[0])))
+ sys.exit(-1) # nothing to do
+ mtpt = sys.argv[1]
+ pargfid = sys.argv[2]
+ fname = sys.argv[3]
+ randomgfid = sys.argv[4]
+ ftype = sys.argv[5]
+ uid = int(sys.argv[6])
+ gid = int(sys.argv[7])
+ perm = int(sys.argv[8], 8)
+
+ os.chdir(mtpt)
+ if pargfid == 'ROOT':
+ pargfid = '.gfid/00000000-0000-0000-0000-000000000001'
+ else:
+ pargfid = '.gfid/' + pargfid
+
+ blob = None
+
+ # entry op: use non-zero uid/gid (to catch gfid-access xlator bugs)
+ if ftype == 'file':
+ mode = stat.S_IFREG | perm
+ blob = entry_pack_reg(randomgfid, fname, mode, uid, gid)
+ elif ftype =='dir':
+ mode = stat.S_IFDIR | perm
+ blob = entry_pack_dir(randomgfid, fname, mode, uid, gid)
+ else: # not yet...
+ sys.exit(-1)
+
+ if blob == None:
+ sys.exit(-1)
+ try:
+ Xattr.lsetxattr(pargfid, 'glusterfs.gfid.newfile', blob)
+ except OSError:
+ ex = sys.exc_info()[1]
+ if not ex.errno in [EEXIST]:
+ raise
+ sys.exit(-1)
+ print("File creation OK")
+ sys.exit(0)
diff --git a/tests/utils/libcxattr.py b/tests/utils/libcxattr.py
new file mode 100644
index 00000000000..3f3ed1fffbb
--- /dev/null
+++ b/tests/utils/libcxattr.py
@@ -0,0 +1,108 @@
+#
+# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+import os
+import sys
+from ctypes import CDLL, c_int
+from py2py3 import bytearray_to_str, gr_create_string_buffer
+from py2py3 import gr_query_xattr, gr_lsetxattr, gr_lremovexattr
+
+
+class Xattr(object):
+
+ """singleton that wraps the extended attributes system
+ interface for python using ctypes
+
+ Just implement it to the degree we need it, in particular
+ - we need just the l*xattr variants, ie. we never want symlinks to be
+ followed
+ - don't need size discovery for getxattr, as we always know the exact
+ sizes we expect
+ """
+
+ if sys.hexversion >= 0x02060000:
+ from ctypes import DEFAULT_MODE
+ libc = CDLL("libc.so.6", DEFAULT_MODE, None, True)
+ else:
+ libc = CDLL("libc.so.6")
+
+ @classmethod
+ def geterrno(cls):
+ if sys.hexversion >= 0x02060000:
+ from ctypes import get_errno
+ return get_errno()
+ # breaks on NetBSD
+ return c_int.in_dll(cls.libc, 'errno').value
+
+ @classmethod
+ def raise_oserr(cls):
+ errn = cls.geterrno()
+ raise OSError(errn, os.strerror(errn))
+
+ @classmethod
+ def _query_xattr(cls, path, siz, syscall, *a):
+ if siz:
+ buf = gr_create_string_buffer(siz)
+ else:
+ buf = None
+ ret = getattr(cls.libc, syscall)(*((path,) + a + (buf, siz)))
+ if ret == -1:
+ cls.raise_oserr()
+ if siz:
+ # py2 and py3 compatibility. Convert bytes array
+ # to string
+ result = bytearray_to_str(buf.raw)
+ return result[:ret]
+ else:
+ return ret
+
+ @classmethod
+ def lgetxattr(cls, path, attr, siz=0):
+ return gr_query_xattr(cls, path, siz, 'lgetxattr', attr)
+
+ @classmethod
+ def lgetxattr_buf(cls, path, attr):
+ """lgetxattr variant with size discovery"""
+ size = cls.lgetxattr(path, attr)
+ if size == -1:
+ cls.raise_oserr()
+ if size == 0:
+ return ''
+ return cls.lgetxattr(path, attr, size)
+
+ @classmethod
+ def llistxattr(cls, path, siz=0):
+ ret = gr_query_xattr(cls, path, siz, 'llistxattr')
+ if isinstance(ret, str):
+ ret = ret.strip('\0')
+ ret = ret.split('\0') if ret else []
+ return ret
+
+ @classmethod
+ def lsetxattr(cls, path, attr, val):
+ ret = gr_lsetxattr(cls, path, attr, val)
+ if ret == -1:
+ cls.raise_oserr()
+
+ @classmethod
+ def lremovexattr(cls, path, attr):
+ ret = gr_lremovexattr(cls, path, attr)
+ if ret == -1:
+ cls.raise_oserr()
+
+ @classmethod
+ def llistxattr_buf(cls, path):
+ """listxattr variant with size discovery"""
+ size = cls.llistxattr(path)
+ if size == -1:
+ cls.raise_oserr()
+ if size == 0:
+ return []
+ return cls.llistxattr(path, size)
diff --git a/tests/utils/pidof.py b/tests/utils/pidof.py
new file mode 100755
index 00000000000..4b7071c0a48
--- /dev/null
+++ b/tests/utils/pidof.py
@@ -0,0 +1,45 @@
+
+from __future__ import print_function
+import sys
+
+try:
+ import psutil
+except ImportError:
+ print("Please install psutil --> pip install psutil")
+ sys.exit(1)
+
+def pmap_find(p, name):
+ for m in p.memory_maps(grouped=True):
+ if m.path.endswith("%s.so" % name):
+ return True
+ continue
+ return False
+
+def pidof(processname):
+ for p in psutil.process_iter():
+ if p.pid == 0:
+ continue
+ if "gluster" in processname:
+ if processname == "glusterd" and pmap_find(p, "glusterd"):
+ print((p.pid))
+ if processname == "glusterfs" and pmap_find(p, "client"):
+ print((p.pid))
+ if processname == "glusterfsd" and pmap_find(p, "posix-acl"):
+ print((p.pid))
+ continue
+ if processname.strip() == p.name():
+ print((p.pid))
+
+def main(argv):
+ if len(argv) < 2:
+ sys.stderr.write("Usage: %s <processname>\n" % (argv[0],))
+ return 1
+ try:
+ pidof(argv[1])
+ except Exception as err:
+ print(err)
+ sys.stderr.write("Please be root - %s\n" % err);
+ sys.exit(1)
+
+if __name__ == "__main__":
+ main(sys.argv)
diff --git a/tests/utils/py2py3.py b/tests/utils/py2py3.py
new file mode 100644
index 00000000000..63aca10fd26
--- /dev/null
+++ b/tests/utils/py2py3.py
@@ -0,0 +1,186 @@
+#
+# Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+# All python2/python3 compatibility routines
+
+import sys
+import os
+import stat
+import struct
+from ctypes import create_string_buffer
+
+def umask():
+ return os.umask(0)
+
+if sys.version_info >= (3,):
+ def pipe():
+ (r, w) = os.pipe()
+ os.set_inheritable(r, True)
+ os.set_inheritable(w, True)
+ return (r, w)
+
+ # Raw conversion of bytearray to string. Used in the cases where
+ # buffer is created by create_string_buffer which is a 8-bit char
+ # array and passed to syscalls to fetch results. Using encode/decode
+ # doesn't work as it converts to string altering the size.
+ def bytearray_to_str(byte_arr):
+ return ''.join([chr(b) for b in byte_arr])
+
+ # Raw conversion of string to bytes. This is required to convert
+ # back the string into bytearray(c char array) to use in struc
+ # pack/unpacking. Again encode/decode can't be used as it
+ # converts it alters size.
+ def str_to_bytearray(string):
+ return bytes([ord(c) for c in string])
+
+ def gr_create_string_buffer(size):
+ return create_string_buffer(b'\0', size)
+
+ def gr_query_xattr(cls, path, size, syscall, attr=None):
+ if attr:
+ return cls._query_xattr(path.encode(), size, syscall,
+ attr.encode())
+ else:
+ return cls._query_xattr(path.encode(), size, syscall)
+
+ def gr_lsetxattr(cls, path, attr, val):
+ return cls.libc.lsetxattr(path.encode(), attr.encode(), val,
+ len(val), 0)
+
+ def gr_lremovexattr(cls, path, attr):
+ return cls.libc.lremovexattr(path.encode(), attr.encode())
+
+ def gr_cl_register(cls, brick, path, log_file, log_level, retries):
+ return cls._get_api('gf_changelog_register')(brick.encode(),
+ path.encode(),
+ log_file.encode(),
+ log_level, retries)
+
+ def gr_cl_done(cls, clfile):
+ return cls._get_api('gf_changelog_done')(clfile.encode())
+
+ def gr_cl_history_changelog(cls, changelog_path, start, end, num_parallel,
+ actual_end):
+ return cls._get_api('gf_history_changelog')(changelog_path.encode(),
+ start, end, num_parallel,
+ actual_end)
+
+ def gr_cl_history_done(cls, clfile):
+ return cls._get_api('gf_history_changelog_done')(clfile.encode())
+
+ # regular file
+
+ def entry_pack_reg(cls, gf, bn, mo, uid, gid):
+ bn_encoded = bn.encode()
+ blen = len(bn_encoded)
+ return struct.pack(cls._fmt_mknod(blen),
+ uid, gid, gf.encode(), mo, bn_encoded,
+ stat.S_IMODE(mo), 0, umask())
+
+ def entry_pack_reg_stat(cls, gf, bn, st):
+ bn_encoded = bn.encode()
+ blen = len(bn_encoded)
+ mo = st['mode']
+ return struct.pack(cls._fmt_mknod(blen),
+ st['uid'], st['gid'],
+ gf.encode(), mo, bn_encoded,
+ stat.S_IMODE(mo), 0, umask())
+ # mkdir
+
+ def entry_pack_mkdir(cls, gf, bn, mo, uid, gid):
+ bn_encoded = bn.encode()
+ blen = len(bn_encoded)
+ return struct.pack(cls._fmt_mkdir(blen),
+ uid, gid, gf.encode(), mo, bn_encoded,
+ stat.S_IMODE(mo), umask())
+ # symlink
+
+ def entry_pack_symlink(cls, gf, bn, lnk, st):
+ bn_encoded = bn.encode()
+ blen = len(bn_encoded)
+ lnk_encoded = lnk.encode()
+ llen = len(lnk_encoded)
+ return struct.pack(cls._fmt_symlink(blen, llen),
+ st['uid'], st['gid'],
+ gf.encode(), st['mode'], bn_encoded,
+ lnk_encoded)
+else:
+ def pipe():
+ (r, w) = os.pipe()
+ return (r, w)
+
+ # Raw conversion of bytearray to string
+ def bytearray_to_str(byte_arr):
+ return byte_arr
+
+ # Raw conversion of string to bytearray
+ def str_to_bytearray(string):
+ return string
+
+ def gr_create_string_buffer(size):
+ return create_string_buffer('\0', size)
+
+ def gr_query_xattr(cls, path, size, syscall, attr=None):
+ if attr:
+ return cls._query_xattr(path, size, syscall, attr)
+ else:
+ return cls._query_xattr(path, size, syscall)
+
+ def gr_lsetxattr(cls, path, attr, val):
+ return cls.libc.lsetxattr(path, attr, val, len(val), 0)
+
+ def gr_lremovexattr(cls, path, attr):
+ return cls.libc.lremovexattr(path, attr)
+
+ def gr_cl_register(cls, brick, path, log_file, log_level, retries):
+ return cls._get_api('gf_changelog_register')(brick, path, log_file,
+ log_level, retries)
+
+ def gr_cl_done(cls, clfile):
+ return cls._get_api('gf_changelog_done')(clfile)
+
+ def gr_cl_history_changelog(cls, changelog_path, start, end, num_parallel,
+ actual_end):
+ return cls._get_api('gf_history_changelog')(changelog_path, start, end,
+ num_parallel, actual_end)
+
+ def gr_cl_history_done(cls, clfile):
+ return cls._get_api('gf_history_changelog_done')(clfile)
+
+ # regular file
+
+ def entry_pack_reg(cls, gf, bn, mo, uid, gid):
+ blen = len(bn)
+ return struct.pack(cls._fmt_mknod(blen),
+ uid, gid, gf, mo, bn,
+ stat.S_IMODE(mo), 0, umask())
+
+ def entry_pack_reg_stat(cls, gf, bn, st):
+ blen = len(bn)
+ mo = st['mode']
+ return struct.pack(cls._fmt_mknod(blen),
+ st['uid'], st['gid'],
+ gf, mo, bn,
+ stat.S_IMODE(mo), 0, umask())
+ # mkdir
+
+ def entry_pack_mkdir(cls, gf, bn, mo, uid, gid):
+ blen = len(bn)
+ return struct.pack(cls._fmt_mkdir(blen),
+ uid, gid, gf, mo, bn,
+ stat.S_IMODE(mo), umask())
+ # symlink
+
+ def entry_pack_symlink(cls, gf, bn, lnk, st):
+ blen = len(bn)
+ llen = len(lnk)
+ return struct.pack(cls._fmt_symlink(blen, llen),
+ st['uid'], st['gid'],
+ gf, st['mode'], bn, lnk)
diff --git a/tests/utils/setfattr.py b/tests/utils/setfattr.py
new file mode 100755
index 00000000000..8b7b6abacc0
--- /dev/null
+++ b/tests/utils/setfattr.py
@@ -0,0 +1,77 @@
+
+import os
+import sys
+from optparse import OptionParser
+
+import xattr
+
+def convert(string):
+ tmp_string = string
+ if (string[0] == '0' and
+ (string[1] == 's' or
+ string[1] == 'S')):
+ tmp_string = string.strip('%s%s' %
+ (string[0],
+ string[1]))
+ return tmp_string.decode('base64')
+
+ if (string[0] == '0' and
+ (string[1] == 'x' or
+ string[1] == 'X')):
+ tmp_string = string.split('%s%s' %
+ (string[0],
+ string[1]))
+ return tmp_string[1].decode('hex')
+
+ return tmp_string
+
+if __name__ == '__main__':
+ usage = "usage: %prog [-n name] [-v value] [-x name]"
+ parser = OptionParser(usage=usage)
+ parser.add_option("-n", action="store", dest="name", type="string",
+ help="Specifies the name of the extended attribute to set.")
+ parser.add_option("-v", action="store", dest="value", type="string",
+ help="Specifies the new value of the extended attribute."
+ " There are three methods available for encoding the value."
+ " If the given string is enclosed in double quotes, the"
+ " inner string is treated as text. In that case,"
+ " backslashes and double quotes have special meanings"
+ " and need to be escaped by a preceding backslash. Any"
+ " control characters can be encoded as a backslash"
+ " followed by three digits as its ASCII code in octal."
+ " If the given string begins with 0x or 0X, it expresses"
+ " a hexadecimal number. If the given string begins with"
+ " 0s or 0S, base64 encoding is expected.")
+ parser.add_option("-x", action="store", dest="xname", type="string",
+ help="Remove the named extended attribute entirely.")
+
+ (option, args) = parser.parse_args()
+ if not args:
+ print ("Usage: setfattr {-n name} [-v value] file...")
+ print (" setfattr {-x name} file...")
+ print ("Try `setfattr --help' for more information.")
+ sys.exit(1)
+
+ if option.name and option.xname:
+ print ("-n and -x are mutually exclusive...")
+ sys.exit(1)
+
+ if option.name:
+ if option.value is None:
+ print ("-n option requires -v value...")
+
+ args[0] = os.path.abspath(args[0])
+
+ if option.name and option.value:
+ try:
+ xattr.setxattr(args[0], option.name, convert(option.value))
+ except Exception as err:
+ print (err)
+ sys.exit(1)
+
+ if option.xname:
+ try:
+ xattr.removexattr(args[0], option.xname)
+ except Exception as err:
+ print (err)
+ sys.exit(1)
diff --git a/tests/utils/testn.sh b/tests/utils/testn.sh
new file mode 100755
index 00000000000..079351d8529
--- /dev/null
+++ b/tests/utils/testn.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+#
+# Use this script to identify the command and line-number of test-cases.
+#
+
+if [ -z "${1}" -a -z "${2}" ]
+then
+ echo "Usage: ${0} path/to/test/case.t testnumber"
+ exit 1
+elif [ -z "${2}" ]
+then
+ echo "ERROR: The second parameter to ${0} should be a number."
+ exit 2
+fi
+
+awk '{print FNR " " $0}' ${1} | egrep '^[[:digit:]]+[[:space:]]*(EXPECT|TEST|EXPECT_WITHIN|EXPECT_KEYWORD)' | sed -n ${2}p
diff --git a/tests/vagrant/vagrant-template-centos6/Vagrantfile b/tests/vagrant/vagrant-template-centos6/Vagrantfile
new file mode 100644
index 00000000000..b276f90768d
--- /dev/null
+++ b/tests/vagrant/vagrant-template-centos6/Vagrantfile
@@ -0,0 +1,55 @@
+# -*- mode: ruby -*-
+# vi: set ft=ruby :
+
+Vagrant.configure("2") do |config|
+ config.vm.define "vagrant-testVM" do |testvm|
+ testvm.vm.box = "raghavendra-talur/gluster-dev-centos6"
+ testvm.vm.hostname = "vagrant-testVM"
+ #testvm.ssh.insert_key = false
+ testvm.vm.synced_folder ".", "/vagrant", disabled: true
+
+ host = RbConfig::CONFIG['host_os']
+ # Give VM 1/4 system memory & access to all cpu cores on the host
+ if host =~ /darwin/
+ cpus = `sysctl -n hw.ncpu`.to_i
+ # sysctl returns Bytes and we need to convert to MB
+ mem = `sysctl -n hw.memsize`.to_i / 1024 / 1024 / 4
+ elsif host =~ /linux/
+ cpus = `nproc`.to_i
+ # meminfo shows KB and we need to convert to MB
+ mem = `grep 'MemTotal' /proc/meminfo | sed -e 's/MemTotal://' -e 's/ kB//'`.to_i / 1024 / 4
+ else # sorry Windows folks, I can't help you
+ cpus = 2
+ mem = 1024
+ end
+
+ # Define basic config for VM, memory, cpu, storage pool
+ testvm.vm.provider "libvirt" do |lv|
+ lv.storage_pool_name = "default"
+ lv.memory = mem
+ lv.cpus = cpus
+
+
+ # We need a brick partition, lets have a 5G disk for that.
+ # If you need more bricks, just add more letters to the
+ # string below.
+ "b".split("").each do |i|
+ lv.storage :file,
+ #:path => "",
+ #:allow_existing => "",
+ :device => "vd#{i}",
+ :size => "5G",
+ :type => "qcow2",
+ :bus => "virtio",
+ :cache => "default"
+ end
+ end
+
+ # Let's provision
+ testvm.vm.provision "ansible", run: "always" do |setup|
+ setup.verbose = "v"
+ setup.playbook = "setup.yml"
+ end
+
+ end
+end
diff --git a/tests/vagrant/vagrant-template-centos6/roles/daemon-services/tasks/main.yml b/tests/vagrant/vagrant-template-centos6/roles/daemon-services/tasks/main.yml
new file mode 100644
index 00000000000..0e4c83244cc
--- /dev/null
+++ b/tests/vagrant/vagrant-template-centos6/roles/daemon-services/tasks/main.yml
@@ -0,0 +1,3 @@
+---
+- name: stop and disable kernel nfs
+ service: name=nfs state=stopped enabled=no
diff --git a/tests/vagrant/vagrant-template-centos6/roles/fix-localhost/tasks/main.yml b/tests/vagrant/vagrant-template-centos6/roles/fix-localhost/tasks/main.yml
new file mode 100644
index 00000000000..84dd252b65b
--- /dev/null
+++ b/tests/vagrant/vagrant-template-centos6/roles/fix-localhost/tasks/main.yml
@@ -0,0 +1,6 @@
+---
+- name: remove IPv6 address for localhost
+ shell: sed -i '/::1/s/localhost //' /etc/hosts
+
+- name: add IPv4 address for localhost
+ shell: sed -i '/127.0.0.1/s/$/ localhost/' /etc/hosts
diff --git a/tests/vagrant/vagrant-template-centos6/roles/install-pkgs/tasks/main.yml b/tests/vagrant/vagrant-template-centos6/roles/install-pkgs/tasks/main.yml
new file mode 100644
index 00000000000..bf3eff077b4
--- /dev/null
+++ b/tests/vagrant/vagrant-template-centos6/roles/install-pkgs/tasks/main.yml
@@ -0,0 +1,92 @@
+---
+- name: install deltarpm
+ yum: name=deltarpm state=present
+
+- name: remove samba3
+ shell: yum -y remove samba*
+
+- name: update system
+ shell: yum -y update
+
+- name: install epel repo
+ yum: name=http://download.fedoraproject.org/pub/epel/6/x86_64/epel-release-6-8.noarch.rpm state=present
+
+- name: install other packages
+ yum: name={{ item }} state=present
+ with_items:
+ - attr
+ - autoconf
+ - automake
+ - bc
+ - bison
+ #- libcmocka-devel
+ - dbench
+ - dos2unix
+ - e2fsprogs
+ - findutils
+ - flex
+ - fuse-devel
+ - fuse-libs
+ - gcc
+ - gdb
+ - git
+ - glib2-devel
+ #- hostname
+ - libacl-devel
+ - libaio-devel
+ - libattr-devel
+ - libibverbs-devel
+ - librdmacm-devel
+ - libselinux-python
+ - libsemanage-python
+ - libtool
+ - libxml2-devel
+ - lvm2-devel
+ - make
+ #- man-db
+ - mock
+ - net-tools
+ #- nfs-ganesha-gluster
+ - nfs-utils
+ - openssh-server
+ - openssl-devel
+ - perl-Test-Harness
+ - pkgconfig
+ #- procps-ng
+ - psmisc
+ - python-devel
+ - python-eventlet
+ - python-netifaces
+ - python-paste-deploy
+ - python-setuptools
+ - python-simplejson
+ - python-sphinx
+ - python-webob
+ - pyxattr
+ - readline-devel
+ - rpm-build
+ - screen
+ - strace
+ - supervisor
+ - systemtap-sdt-devel
+ - sqlite-devel
+ - samba4*
+ - userspace-rcu-devel
+ - vim
+ - wget
+ - which
+ - xfsprogs
+ - yajl-devel
+
+- name: install dev help packages, not required by Gluster
+ yum: name={{ item }} state=present
+ with_items:
+ - cgdb
+ - clang
+ - lsof
+ - perf
+ - sysstat
+ - systemtap
+ - systemtap-runtime
+ - tcpdump
+ - valgrind
diff --git a/tests/vagrant/vagrant-template-centos6/roles/iptables/tasks/main.yml b/tests/vagrant/vagrant-template-centos6/roles/iptables/tasks/main.yml
new file mode 100644
index 00000000000..768cb0e8668
--- /dev/null
+++ b/tests/vagrant/vagrant-template-centos6/roles/iptables/tasks/main.yml
@@ -0,0 +1,3 @@
+---
+- name: disable iptables, need to add specific rules later
+ shell: iptables -F
diff --git a/tests/vagrant/vagrant-template-centos6/roles/mock-user/tasks/main.yml b/tests/vagrant/vagrant-template-centos6/roles/mock-user/tasks/main.yml
new file mode 100644
index 00000000000..c8e1209937e
--- /dev/null
+++ b/tests/vagrant/vagrant-template-centos6/roles/mock-user/tasks/main.yml
@@ -0,0 +1,3 @@
+---
+- name: Add mock user; required for rpm.t
+ user: name=mock group=mock
diff --git a/tests/vagrant/vagrant-template-centos6/roles/prepare-brick/tasks/main.yml b/tests/vagrant/vagrant-template-centos6/roles/prepare-brick/tasks/main.yml
new file mode 100644
index 00000000000..6b3f6b8d3ea
--- /dev/null
+++ b/tests/vagrant/vagrant-template-centos6/roles/prepare-brick/tasks/main.yml
@@ -0,0 +1,6 @@
+---
+- name: Format backend
+ filesystem: fstype=xfs dev=/dev/vdb
+
+- name: Add entry to fstab and mount
+ mount: name=/d src=/dev/vdb fstype=xfs state=mounted
diff --git a/tests/vagrant/vagrant-template-centos6/roles/remove-gluster-pkgs/tasks/main.yml b/tests/vagrant/vagrant-template-centos6/roles/remove-gluster-pkgs/tasks/main.yml
new file mode 100644
index 00000000000..c91efa9ba7c
--- /dev/null
+++ b/tests/vagrant/vagrant-template-centos6/roles/remove-gluster-pkgs/tasks/main.yml
@@ -0,0 +1,4 @@
+---
+- name: Erase gluster packages, keep dependencies; we will source install
+ shell: rpm -ev --nodeps `rpm -qa | grep ^gluster`
+ ignore_errors: True
diff --git a/tests/vagrant/vagrant-template-centos6/setup.yml b/tests/vagrant/vagrant-template-centos6/setup.yml
new file mode 100644
index 00000000000..520f1cdb019
--- /dev/null
+++ b/tests/vagrant/vagrant-template-centos6/setup.yml
@@ -0,0 +1,15 @@
+---
+- hosts: all
+ sudo: true
+ roles:
+ #Installing packages invoke dnf and metadata download takes a long time.
+ #The box used in Vagrantfile has all the packages installed.
+ #Refer to main.yml file in install-pkgs role to get list of packages.
+ #install-pkgs role is hence disabled by default.
+ #- install-pkgs
+ - remove-gluster-pkgs
+ - prepare-brick
+ - mock-user
+ - iptables
+ - fix-localhost
+ - daemon-services
diff --git a/tests/vagrant/vagrant-template-fedora/Vagrantfile b/tests/vagrant/vagrant-template-fedora/Vagrantfile
new file mode 100644
index 00000000000..df806c7aaee
--- /dev/null
+++ b/tests/vagrant/vagrant-template-fedora/Vagrantfile
@@ -0,0 +1,56 @@
+# -*- mode: ruby -*-
+# vi: set ft=ruby :
+
+Vagrant.configure("2") do |config|
+ config.vm.define "vagrant-testVM" do |testvm|
+ testvm.vm.box = "gluster-dev-fedora"
+ testvm.vm.box_url = "http://download.gluster.org/pub/gluster/glusterfs/vagrant/gluster-dev-fedora/boxes/gluster-dev-fedora.json"
+ testvm.vm.hostname = "vagrant-testVM"
+ #testvm.ssh.insert_key = false
+ testvm.vm.synced_folder ".", "/vagrant", disabled: true
+
+ host = RbConfig::CONFIG['host_os']
+ # Give VM 1/4 system memory & access to all cpu cores on the host
+ if host =~ /darwin/
+ cpus = `sysctl -n hw.ncpu`.to_i
+ # sysctl returns Bytes and we need to convert to MB
+ mem = `sysctl -n hw.memsize`.to_i / 1024 / 1024 / 4
+ elsif host =~ /linux/
+ cpus = `nproc`.to_i
+ # meminfo shows KB and we need to convert to MB
+ mem = `grep 'MemTotal' /proc/meminfo | sed -e 's/MemTotal://' -e 's/ kB//'`.to_i / 1024 / 4
+ else # sorry Windows folks, I can't help you
+ cpus = 2
+ mem = 1024
+ end
+
+ # Define basic config for VM, memory, cpu, storage pool
+ testvm.vm.provider "libvirt" do |lv|
+ lv.storage_pool_name = "default"
+ lv.memory = mem
+ lv.cpus = cpus
+
+
+ # We need a brick partition, lets have a 5G disk for that.
+ # If you need more bricks, just add more letters to the
+ # string below.
+ "b".split("").each do |i|
+ lv.storage :file,
+ #:path => "",
+ #:allow_existing => "",
+ :device => "vd#{i}",
+ :size => "5G",
+ :type => "qcow2",
+ :bus => "virtio",
+ :cache => "default"
+ end
+ end
+
+ # Let's provision
+ testvm.vm.provision "ansible", run: "always" do |setup|
+ setup.verbose = "v"
+ setup.playbook = "setup.yml"
+ end
+
+ end
+end
diff --git a/tests/vagrant/vagrant-template-fedora/roles/daemon-services/tasks/main.yml b/tests/vagrant/vagrant-template-fedora/roles/daemon-services/tasks/main.yml
new file mode 100644
index 00000000000..98d077b1f2e
--- /dev/null
+++ b/tests/vagrant/vagrant-template-fedora/roles/daemon-services/tasks/main.yml
@@ -0,0 +1,3 @@
+---
+- name: stop and disable kernel nfs
+ service: name=nfs-server state=stopped enabled=no
diff --git a/tests/vagrant/vagrant-template-fedora/roles/fix-localhost/tasks/main.yml b/tests/vagrant/vagrant-template-fedora/roles/fix-localhost/tasks/main.yml
new file mode 100644
index 00000000000..84dd252b65b
--- /dev/null
+++ b/tests/vagrant/vagrant-template-fedora/roles/fix-localhost/tasks/main.yml
@@ -0,0 +1,6 @@
+---
+- name: remove IPv6 address for localhost
+ shell: sed -i '/::1/s/localhost //' /etc/hosts
+
+- name: add IPv4 address for localhost
+ shell: sed -i '/127.0.0.1/s/$/ localhost/' /etc/hosts
diff --git a/tests/vagrant/vagrant-template-fedora/roles/install-pkgs/tasks/main.yml b/tests/vagrant/vagrant-template-fedora/roles/install-pkgs/tasks/main.yml
new file mode 100644
index 00000000000..2512034cdd7
--- /dev/null
+++ b/tests/vagrant/vagrant-template-fedora/roles/install-pkgs/tasks/main.yml
@@ -0,0 +1,85 @@
+---
+- name: install deltarpm
+ dnf: name=deltarpm state=present
+
+- name: update system
+ shell: dnf update -y
+
+- name: install other packages
+ dnf: name={{ item }} state=present
+ with_items:
+ - attr
+ - autoconf
+ - automake
+ - bc
+ - bison
+ - libcmocka-devel
+ - cifs-utils
+ - dbench
+ - dos2unix
+ - e2fsprogs
+ - findutils
+ - flex
+ - fuse-devel
+ - fuse-libs
+ - gcc
+ - gdb
+ - git
+ - glib2-devel
+ - hostname
+ - libacl-devel
+ - libaio-devel
+ - libattr-devel
+ - libibverbs-devel
+ - librdmacm-devel
+ - libtool
+ - libxml2-devel
+ - lvm2-devel
+ - make
+ - man-db
+ - mock
+ - net-tools
+ - nfs-ganesha-gluster
+ - nfs-utils
+ - openssh-server
+ - openssl-devel
+ - perl-Test-Harness
+ - pkgconfig
+ - procps-ng
+ - psmisc
+ - python-devel
+ - python-eventlet
+ - python-netifaces
+ - python-paste-deploy
+ - python-setuptools
+ - python-simplejson
+ - python-sphinx
+ - python-webob
+ - pyxattr
+ - readline-devel
+ - rpm-build
+ - screen
+ - strace
+ - supervisor
+ - systemtap-sdt-devel
+ - sqlite-devel
+ - samba*
+ - userspace-rcu-devel
+ - vim
+ - wget
+ - which
+ - xfsprogs
+ - yajl-devel
+
+- name: install dev help packages, not required by Gluster
+ dnf: name={{ item }} state=present
+ with_items:
+ - cgdb
+ - clang
+ - lsof
+ - perf
+ - sysstat
+ - systemtap
+ - systemtap-runtime
+ - tcpdump
+ - valgrind
diff --git a/tests/vagrant/vagrant-template-fedora/roles/iptables/tasks/main.yml b/tests/vagrant/vagrant-template-fedora/roles/iptables/tasks/main.yml
new file mode 100644
index 00000000000..768cb0e8668
--- /dev/null
+++ b/tests/vagrant/vagrant-template-fedora/roles/iptables/tasks/main.yml
@@ -0,0 +1,3 @@
+---
+- name: disable iptables, need to add specific rules later
+ shell: iptables -F
diff --git a/tests/vagrant/vagrant-template-fedora/roles/mock-user/tasks/main.yml b/tests/vagrant/vagrant-template-fedora/roles/mock-user/tasks/main.yml
new file mode 100644
index 00000000000..c8e1209937e
--- /dev/null
+++ b/tests/vagrant/vagrant-template-fedora/roles/mock-user/tasks/main.yml
@@ -0,0 +1,3 @@
+---
+- name: Add mock user; required for rpm.t
+ user: name=mock group=mock
diff --git a/tests/vagrant/vagrant-template-fedora/roles/prepare-brick/tasks/main.yml b/tests/vagrant/vagrant-template-fedora/roles/prepare-brick/tasks/main.yml
new file mode 100644
index 00000000000..6b3f6b8d3ea
--- /dev/null
+++ b/tests/vagrant/vagrant-template-fedora/roles/prepare-brick/tasks/main.yml
@@ -0,0 +1,6 @@
+---
+- name: Format backend
+ filesystem: fstype=xfs dev=/dev/vdb
+
+- name: Add entry to fstab and mount
+ mount: name=/d src=/dev/vdb fstype=xfs state=mounted
diff --git a/tests/vagrant/vagrant-template-fedora/roles/remove-gluster-pkgs/tasks/main.yml b/tests/vagrant/vagrant-template-fedora/roles/remove-gluster-pkgs/tasks/main.yml
new file mode 100644
index 00000000000..c91efa9ba7c
--- /dev/null
+++ b/tests/vagrant/vagrant-template-fedora/roles/remove-gluster-pkgs/tasks/main.yml
@@ -0,0 +1,4 @@
+---
+- name: Erase gluster packages, keep dependencies; we will source install
+ shell: rpm -ev --nodeps `rpm -qa | grep ^gluster`
+ ignore_errors: True
diff --git a/tests/vagrant/vagrant-template-fedora/roles/selinux/tasks/main.yml b/tests/vagrant/vagrant-template-fedora/roles/selinux/tasks/main.yml
new file mode 100644
index 00000000000..c9ba9618428
--- /dev/null
+++ b/tests/vagrant/vagrant-template-fedora/roles/selinux/tasks/main.yml
@@ -0,0 +1,3 @@
+---
+- name: Allow gfapi in Samba to bind to other ports than well known smb ports
+ seboolean: name=samba_load_libgfapi state=yes persistent=yes
diff --git a/tests/vagrant/vagrant-template-fedora/setup.yml b/tests/vagrant/vagrant-template-fedora/setup.yml
new file mode 100644
index 00000000000..fc42a8157f3
--- /dev/null
+++ b/tests/vagrant/vagrant-template-fedora/setup.yml
@@ -0,0 +1,16 @@
+---
+- hosts: all
+ sudo: true
+ roles:
+ #Installing packages invoke dnf and metadata download takes a long time.
+ #The box used in Vagrantfile has all the packages installed.
+ #Refer to main.yml file in install-pkgs role to get list of packages.
+ #install-pkgs role is hence disabled by default.
+ #- install-pkgs
+ - remove-gluster-pkgs
+ - prepare-brick
+ - mock-user
+ - selinux
+ - iptables
+ - fix-localhost
+ - daemon-services
diff --git a/tests/volume.rc b/tests/volume.rc
new file mode 100644
index 00000000000..b38848c0e52
--- /dev/null
+++ b/tests/volume.rc
@@ -0,0 +1,1002 @@
+function volinfo_field()
+{
+ local vol=$1;
+ local field=$2;
+
+ $CLI volume info $vol | grep "^$field: " | sed 's/.*: //';
+}
+
+function volume_get_field()
+{
+ local vol=$1
+ local field=$2
+ $CLI volume get $vol $field | tail -1 | awk '{print $2}'
+}
+
+
+function brick_count()
+{
+ local vol=$1;
+
+ $CLI volume info $vol | egrep "^Brick[0-9]+: " | wc -l;
+}
+
+function check_brick_status() {
+ cmd="gluster --xml volume status"
+ local daemon=$1
+
+ if [[ -z $daemon ]]
+ then
+ echo `$cmd | grep '<status>1' | wc -l`
+ else
+ echo `$cmd | grep -A 5 ${daemon} | grep '<status>1' | wc -l`
+ fi
+}
+
+function online_brick_count ()
+{
+ local v1=0
+ local v2=0
+ local v3=0
+ local v4=0
+ local v5=0
+ local tot=0
+
+ #First count total Number of bricks and then subtract daemon status
+ v1=`check_brick_status`
+ v2=`check_brick_status "Self-heal"`
+ v3=`check_brick_status "Quota"`
+ v4=`check_brick_status "Snapshot"`
+ v5=`check_brick_status "Tier"`
+ v6=`check_brick_status "Scrubber"`
+ v7=`check_brick_status "Bitrot"`
+
+ tot=$((v1-v2-v3-v4-v5-v6-v7))
+ echo $tot
+
+}
+
+
+function brick_up_status {
+ local vol=$1
+ local host=$2
+ local brick=$3
+ $CLI volume status $vol $host:$brick --xml | sed -ne 's/.*<status>\([01]\)<\/status>/\1/p'
+}
+
+function volume_option()
+{
+ local vol=$1
+ local key=$2
+ $CLI volume info $vol | egrep "^$key: " | cut -f2 -d' ';
+}
+
+function rebalance_status_field {
+ $CLI volume rebalance $1 status | awk '{print $7}' | sed -n 3p
+}
+
+function fix-layout_status_field {
+ #The fix-layout status can be up to 3 words, (ex:'fix-layout in progress'), hence the awk-print $2 thru $4.
+ #But if the status is less than 3 words, it also prints the next field i.e the run_time_in_secs.(ex:'completed 3.00').
+ #So we trim the numbers out with `tr`. Finally remove the trailing white spaces with sed. What we get is one of the
+ #strings in the 'cli_vol_task_status_str' char array of cli-rpc-ops.c
+
+ $CLI volume rebalance $1 status | awk '{print $2,$3,$4}' |sed -n 3p |tr -d '[^0-9+\.]'|sed 's/ *$//g'
+}
+
+function remove_brick_status_completed_field {
+ local vol=$1
+ local brick_list=$2
+ $CLI volume remove-brick $vol $brick_list status | awk '{print $7}' | sed -n 3p
+}
+
+function get_mount_process_pid {
+ local vol=$1
+ local mnt=$2
+ ps auxww | grep glusterfs | grep -E "volfile-id[ =]/?$vol .*$mnt" | awk '{print $2}' | head -1
+}
+
+function get_nfs_pid ()
+{
+ ps auxww | grep "volfile-id\ gluster\/nfs" | awk '{print $2}' | head -1
+}
+
+function read_nfs_pidfile ()
+{
+ echo `cat $GLUSTERD_PIDFILEDIR/nfs/nfs.pid`
+}
+
+function cleanup_statedump {
+ pid=$1
+ rm -f $statedumpdir/*$pid.dump.*
+ #.vimrc friendly comment */
+}
+
+function wait_statedump_ready {
+ local maxtime="${1}000000000"
+ local pid="$2"
+ local deadline="$(($(date +%s%N) + maxtime))"
+ local fname
+
+ while [[ "$(date +%s%N)" < "$deadline" ]]; do
+ fname="$statedumpdir/$(ls $statedumpdir | grep -E "\.$pid\.dump\.")"
+ if [[ -f "$fname" ]]; then
+ grep "^DUMP-END-TIME" "$fname" >/dev/null
+ if [[ $? -eq 0 ]]; then
+ echo $fname
+ return
+ fi
+ fi
+ sleep 0.1
+ done
+
+ echo "nostatedump"
+}
+
+function generate_statedump {
+ pid=$1
+ #remove old stale statedumps
+ cleanup_statedump $pid
+ kill -USR1 $pid
+ wait_statedump_ready 3 $pid
+}
+
+function generate_mount_statedump {
+ local vol=$1
+ local mnt=$2
+ generate_statedump $(get_mount_process_pid $vol $mnt)
+}
+
+function cleanup_mount_statedump {
+ local vol=$1
+ cleanup_statedump $(get_mount_process_pid $vol)
+}
+
+function snap_client_connected_status {
+ local vol=$1
+ local fpath=$(generate_mount_statedump $vol)
+ up=$(grep -a -A1 xlator.protocol.client.$vol-snapd-client.priv $fpath | tail -1 | cut -f 2 -d'=')
+ rm -f $fpath
+ echo "$up"
+}
+
+function _jbrc_child_up_status {
+ local vol=$1
+ #brick_id is (brick-num in volume info - 1)
+ local brick_id=$2
+ local gen_state_dump=$3
+ local fpath=$($gen_state_dump $vol)
+ up=$(grep -a -B1 child_$brick_id=$vol-client-$brick_id $fpath | head -1 | cut -f2 -d'=')
+ rm -f $fpath
+ echo "$up"
+}
+
+function jbrc_child_up_status {
+ local vol=$1
+ #brick_id is (brick-num in volume info - 1)
+ local brick_id=$2
+ _jbrc_child_up_status $vol $brick_id generate_mount_statedump
+}
+
+function _afr_child_up_status {
+ local vol=$1
+ #brick_id is (brick-num in volume info - 1)
+ local brick_id=$2
+ local gen_state_dump=$3
+ local fpath=$($gen_state_dump $vol)
+ up=$(grep -a -B1 trusted.afr.$vol-client-$brick_id $fpath | head -1 | cut -f2 -d'=')
+ rm -f $fpath
+ echo "$up"
+}
+
+function afr_child_up_status_meta {
+ local mnt=$1
+ local repl=$2
+ local child=$3
+ grep -E "^child_up\[$child\]" $mnt/.meta/graphs/active/$repl/private | awk '{print $3}'
+}
+
+function client_connected_status_meta {
+ local mnt=$1
+ local client=$2
+ grep "connected" $mnt/.meta/graphs/active/$client/private | awk '{print $3}'
+}
+
+function afr_child_up_status {
+ local vol=$1
+ #brick_id is (brick-num in volume info - 1)
+ local brick_id=$2
+ _afr_child_up_status $vol $brick_id generate_mount_statedump
+}
+
+function ec_get_info {
+ local vol=$1
+ local dist_id=$2
+ local key=$3
+ local fpath=$4
+ local value=$(sed -n "/^\[cluster\/disperse\.$vol-disperse-$dist_id\]/,/^\[/{s/^$key=\(.*\)/\1/p;}" $fpath | head -1)
+ rm -f $fpath
+ echo "$value"
+}
+
+function ec_child_up_status {
+ local vol=$1
+ local dist_id=$2
+ local brick_id=$(($3 + 1))
+ local mnt=$4
+ local mask=$(ec_get_info $vol $dist_id "childs_up_mask" $(generate_mount_statedump $vol $mnt))
+ echo "${mask: -$brick_id:1}"
+}
+
+function ec_child_up_count {
+ local vol=$1
+ local dist_id=$2
+ local mnt=$3
+ ec_get_info $vol $dist_id "childs_up" $(generate_mount_statedump $vol $mnt)
+}
+
+function ec_child_up_status_shd {
+ local vol=$1
+ local dist_id=$2
+ local brick_id=$(($3 + 1))
+ local mask=$(ec_get_info $vol $dist_id "childs_up_mask" $(generate_shd_statedump $vol))
+ echo "${mask: -$brick_id:1}"
+}
+
+function ec_child_up_count_shd {
+ local vol=$1
+ local dist_id=$2
+ ec_get_info $vol $dist_id "childs_up" $(generate_shd_statedump $vol)
+}
+
+function get_shd_process_pid {
+ local vol=$1
+ ps auxww | grep "process-name\ glustershd" | awk '{print $2}' | head -1
+}
+
+function generate_shd_statedump {
+ local vol=$1
+ generate_statedump $(get_shd_process_pid $vol)
+}
+
+function generate_nfs_statedump {
+ generate_statedump $(get_nfs_pid)
+}
+
+function generate_brick_statedump {
+ local vol=$1
+ local host=$2
+ local brick=$3
+ generate_statedump $(get_brick_pid $vol $host $brick)
+}
+
+function afr_child_up_status_in_shd {
+ local vol=$1
+ #brick_id is (brick-num in volume info - 1)
+ local brick_id=$2
+ _afr_child_up_status $vol $brick_id generate_shd_statedump
+}
+
+function afr_child_up_status_in_nfs {
+ local vol=$1
+ #brick_id is (brick-num in volume info - 1)
+ local brick_id=$2
+ _afr_child_up_status $vol $brick_id generate_nfs_statedump
+}
+
+function nfs_up_status {
+ gluster volume status | grep "NFS Server" | awk '{print $7}'
+}
+
+function glustershd_up_status {
+ gluster volume status | grep "Self-heal Daemon" | awk '{print $7}'
+}
+
+function quotad_up_status {
+ gluster volume status | grep "Quota Daemon" | awk '{print $7}'
+}
+
+function get_glusterd_pid {
+ pgrep '^glusterd$' | head -1
+}
+
+function get_brick_pidfile {
+ local vol=$1
+ local host=$2
+ local brick=$3
+ local brick_hiphenated=$(echo $brick | tr '/' '-')
+ echo $GLUSTERD_PIDFILEDIR/vols/$vol/${host}${brick_hiphenated}.pid
+}
+
+function get_brick_pid {
+ cat $(get_brick_pidfile $*)
+}
+
+function kill_brick {
+ local vol=$1
+ local host=$2
+ local brick=$3
+
+ local pidfile=$(get_brick_pidfile $vol $host $brick)
+ local cmdline="/proc/$(cat $pidfile)/cmdline"
+ local socket=$(cat $cmdline | tr '\0' '\n' | grep '\.socket$')
+
+ gf_attach -d $socket $brick
+
+ local deadline="$(($(date +%s%N) + ${PROCESS_UP_TIMEOUT}000000000))"
+ while [[ "$(date +%s%N)" < "$deadline" ]]; do
+ if [[ "$(brick_up_status $vol $host $brick)" == "0" ]]; then
+ break
+ fi
+ done
+}
+
+function check_option_help_presence {
+ local option=$1
+ $CLI volume set help | grep "^Option:" | grep -w $option
+}
+
+function afr_get_changelog_xattr {
+ local file=$1
+ local xkey=$2
+ local xval=$(getfattr -n $xkey -e hex $file 2>/dev/null | grep "$xkey" | cut -f2 -d'=')
+ if [ -z $xval ]; then
+ xval="0x000000000000000000000000"
+ fi
+ echo $xval
+}
+
+function get_pending_heal_count {
+ local vol=$1
+ gluster volume heal $vol info | grep "Number of entries" | awk '{ sum+=$4} END {print sum}'
+}
+
+function afr_get_split_brain_count {
+ local vol=$1
+ gluster volume heal $vol info split-brain | grep "Number of entries in split-brain" | awk '{ sum+=$6} END {print sum}'
+}
+
+function afr_get_index_path {
+ local brick_path=$1
+ echo "$brick_path/.glusterfs/indices/xattrop"
+}
+
+function afr_get_num_indices_in_brick {
+ local brick_path=$1
+ echo $(ls $(afr_get_index_path $brick_path) | grep -v xattrop | wc -l)
+}
+
+function gf_get_gfid_xattr {
+ file=$1
+ getfattr -n trusted.gfid -e hex $file 2>/dev/null | grep "trusted.gfid" | cut -f2 -d'='
+}
+
+function gf_gfid_xattr_to_str {
+ xval=$1
+ echo "${xval:2:8}-${xval:10:4}-${xval:14:4}-${xval:18:4}-${xval:22:12}"
+}
+
+function get_text_xattr {
+ local key=$1
+ local path=$2
+ getfattr -h -d -m. -e text $path 2>/dev/null | grep -a $key | cut -f2 -d'='
+}
+
+function get_gfid2path {
+ local path=$1
+ getfattr -h --only-values -n glusterfs.gfidtopath $path 2>/dev/null
+}
+
+function get_mdata {
+ local path=$1
+ getfattr -h -e hex -n trusted.glusterfs.mdata $path 2>/dev/null | grep "trusted.glusterfs.mdata" | cut -f2 -d'='
+}
+
+function get_mdata_count {
+ getfattr -d -m . -e hex $@ 2>/dev/null | grep mdata | wc -l
+}
+
+function get_mdata_uniq_count {
+ getfattr -d -m . -e hex $@ 2>/dev/null | grep mdata | uniq | wc -l
+}
+
+function get_xattr_key {
+ local key=$1
+ local path=$2
+ getfattr -h -d -m. -e text $path 2>/dev/null | grep -a $key | cut -f1 -d'='
+}
+
+function gf_check_file_opened_in_brick {
+ vol=$1
+ host=$2
+ brick=$3
+ realpath=$4
+ ls -l /proc/$(get_brick_pid $vol $host $brick)/fd | grep "${realpath}$" 2>&1 > /dev/null
+ if [ $? -eq 0 ]; then
+ echo "Y"
+ else
+ echo "N"
+ fi
+}
+
+function gf_get_gfid_backend_file_path {
+ brickpath=$1
+ filepath_in_brick=$2
+ gfid=$(gf_get_gfid_xattr "$brickpath/$filepath_in_brick")
+ gfidstr=$(gf_gfid_xattr_to_str $gfid)
+ echo "$brickpath/.glusterfs/${gfidstr:0:2}/${gfidstr:2:2}/$gfidstr"
+}
+
+function gf_rm_file_and_gfid_link {
+ brickpath=$1
+ filepath_in_brick=$2
+ rm -f $(gf_get_gfid_backend_file_path $brickpath $filepath_in_brick)
+ rm -f "$brickpath/$filepath_in_brick"
+}
+
+
+function gd_is_replace_brick_completed {
+ local host=$1
+ local vol=$2
+ local src_brick=$3
+ local dst_brick=$4
+ $CLI volume replace-brick $vol $src_brick $dst_brick status | grep -i "Migration complete"
+ if [ $? -eq 0 ]; then
+ echo "Y"
+ else
+ echo "N"
+ fi
+}
+
+function dht_get_layout {
+ local my_xa=trusted.glusterfs.dht
+ getfattr -d -e hex -n $my_xa $1 2> /dev/null | grep "$my_xa=" | cut -d= -f2
+}
+
+function afr_get_specific_changelog_xattr ()
+{
+ local path=$1
+ local key=$2
+ local type=$3
+ local specific_changelog=""
+
+ changelog_xattr=$(afr_get_changelog_xattr "$path" "$key")
+ if [ "$type" == "data" ]; then
+ specific_changelog=${changelog_xattr:2:8}
+ elif [ "$type" == "metadata" ]; then
+ specific_changelog=${changelog_xattr:10:8}
+ elif [ "$type" == "entry" ]; then
+ specific_changelog=${changelog_xattr:18:8}
+ else
+ specific_changlog="error"
+ fi
+
+ echo $specific_changelog
+}
+##
+ # query pathinfo xattr and extract POSIX pathname(s)
+ ##
+function get_backend_paths {
+ local path=$1
+
+ getfattr -m . -n trusted.glusterfs.pathinfo $path | tr ' ' '\n' | sed -n 's/<POSIX.*:.*:\(.*\)>.*/\1/p'
+}
+
+#Gets the xattr value in hex, also removed 0x in front of the value
+function get_hex_xattr {
+ local key=$1
+ local path=$2
+ getfattr -d -m. -e hex $2 2>/dev/null | grep $1 | cut -f2 -d'=' | cut -f2 -d'x'
+}
+
+function cumulative_stat_count {
+ echo "$1" | grep "Cumulative Stats:" | wc -l
+}
+
+function incremental_stat_count {
+ echo "$1" | grep "Interval$2Stats:" | wc -l
+}
+
+function cleared_stat_count {
+ echo "$1" | grep "Cleared stats." | wc -l
+}
+
+function data_read_count {
+ echo "$1" | grep "Data Read:$2bytes" | wc -l
+}
+
+function data_written_count {
+ echo "$1" | grep "Data Written:$2bytes" | wc -l
+}
+
+function has_holes {
+ if [ $((`stat -c '%b*%B-%s' $1`)) -lt 0 ];
+ then
+ echo "1"
+ else
+ echo "0"
+ fi
+}
+
+function do_volume_operations() {
+ local operation=$1
+ local count=$2
+ local force=$3
+
+ local pids=()
+ local cli
+ local v
+
+ for i in `seq 1 $count`; do
+ cli="CLI_$i"
+ v="V`expr $i - 1`"
+ ${!cli} volume $operation ${!v} $force &
+ pids[$i]=$!
+ done
+
+ for i in `seq 1 $count`; do
+ wait ${pids[$i]}
+ done
+}
+
+function start_volumes() {
+ do_volume_operations start $1
+}
+
+function stop_volumes() {
+ do_volume_operations stop $1
+}
+
+function start_force_volumes() {
+ do_volume_operations start $1 force
+}
+
+function stop_force_volumes() {
+ do_volume_operations stop $1 force
+}
+
+function delete_volumes() {
+ do_volume_operations delete $1
+}
+
+function volume_exists() {
+ $CLI volume info $1 > /dev/null 2>&1
+ if [ $? -eq 0 ]; then
+ echo "Y"
+ else
+ echo "N"
+ fi
+}
+
+function killall_gluster() {
+ terminate_pids $(process_pids gluster)
+ find $GLUSTERD_PIDFILEDIR -name '*.pid' | xargs rm -f
+}
+
+function afr_get_index_count {
+ local brick=$1
+ ls $1/.glusterfs/indices/xattrop | grep -v xattrop | wc -l
+}
+
+function landfill_entry_count {
+ local brick=$1
+ ls $brick/.glusterfs/landfill | wc -l
+}
+
+function path_exists {
+ stat $1
+ if [ $? -eq 0 ]; then echo "Y"; else echo "N"; fi
+}
+
+function path_size {
+ local size=$(stat -c %s $1)
+ if [ $? -eq 0 ]; then echo $size; else echo ""; fi
+}
+
+function force_umount {
+ ${UMOUNT_F} $*
+ if [ $? -eq 0 ]; then echo "Y"; else echo "N"; fi
+}
+
+function assign_gfid {
+ local gfid=$1
+ local file=$2
+ setfattr -n trusted.gfid -v $1 $2
+}
+
+function get_random_gfid {
+ echo "0x"$(uuidgen | awk -F '-' 'BEGIN {OFS=""} {print $1,$2,$3,$4,$5}')
+}
+
+function volgen_volume_exists {
+ local volfile="$1"
+ local xl_vol="$2"
+ local xl_type="$3"
+ local xl_feature="$4"
+ xl=$(sed -e "/./{H;\$!d;}" -e "x;/volume $xl_vol/!d;/type $xl_type\/$xl_feature/!d" $volfile)
+ if [ -z "$xl" ];
+ then
+ echo "N"
+ else
+ echo "Y"
+ fi
+}
+
+function volgen_volume_option {
+ local volfile="$1"
+ local xl_vol="$2"
+ local xl_type="$3"
+ local xl_feature="$4"
+ local xl_option="$5"
+ sed -e "/./{H;\$!d;}" -e "x;/volume $xl_vol/!d;/type $xl_type\/$xl_feature/!d;/option $xl_option/!d" $volfile | grep " $xl_option " | awk '{print $3}'
+}
+
+function mount_get_option_value {
+ local m=$1
+ local subvol=$2
+ local key=$3
+
+ grep -w "$3" $m/.meta/graphs/active/$subvol/private | awk '{print $3}'
+}
+
+function get_volume_mark {
+ getfattr -n trusted.glusterfs.volume-mark -ehex $1 | sed -n 's/^trusted.glusterfs.volume-mark=0x//p' | cut -b5-36 | sed 's/\([a-f0-9]\{8\}\)\([a-f0-9]\{4\}\)\([a-f0-9]\{4\}\)\([a-f0-9]\{4\}\)/\1-\2-\3-\4-/'
+}
+
+# setup geo-rep in a single a node.
+
+function setup_georep {
+
+ $CLI volume create $GMV0 replica 2 $H0:$B0/${GMV0}{1,2,3,4};
+
+ $CLI volume start $GMV0
+
+ $CLI volume create $GSV0 replica 2 $H0:$B0/${GSV0}{1,2,3,4};
+
+ $CLI volume start $GSV0
+
+ $CLI system:: execute gsec_create
+
+ $CLI volume geo-rep $GMV0 $H0::$GSV0 create push-pem
+
+ $CLI volume geo-rep $GMV0 $H0::$GSV0 start
+
+ sleep 80 # after start geo-rep takes a minute to get stable
+
+}
+
+
+# stop and delete geo-rep session
+
+function cleanup_georep {
+
+ $CLI volume geo-rep $GMV0 $H0::$GSV0 stop
+
+ $CLI volume geo-rep $GMV0 $H0::$GSV0 delete
+}
+
+function num_graphs
+{
+ local mountpoint=$1
+ echo `ls $mountpoint/.meta/graphs/ | grep -v active | wc -l`
+}
+
+function get_aux()
+{
+##Check if a auxiliary mount is there
+local aux_suffix=$1
+local rundir=$(gluster --print-statedumpdir)
+local pidfile="${rundir}/${V0}$aux_suffix.pid"
+if [ -f $pidfile ];
+then
+ local pid=$(cat ${rundir}/${V0}.pid)
+ pidof glusterfs 2>&1 | grep -w $pid > /dev/null
+
+ if [ $? -eq 0 ]
+ then
+ echo "0"
+ else
+ echo "1"
+ fi
+else
+ echo "1"
+fi
+}
+
+function get_list_aux()
+{
+# check for quota list aux mount
+ get_aux "_quota_list"
+}
+
+function get_limit_aux()
+{
+# check for quota list aux mount
+ get_aux "_quota_limit"
+}
+
+function check_for_xattr {
+ local xattr=$1
+ local filepath=$2
+ getfattr -n $xattr $filepath 2>/dev/null | grep "$xattr" | cut -f1 -d'='
+}
+
+function get_bitd_count {
+ ps auxww | grep glusterfs | grep bitd.pid | grep -v grep | wc -l
+}
+
+function get_scrubd_count {
+ ps auxww | grep glusterfs | grep scrub.pid | grep -v grep | wc -l
+}
+
+function get_quarantine_count {
+ ls -l "$1/.glusterfs/quarantine" | wc -l
+}
+
+function get_quotad_count {
+ ps auxww | grep glusterfs | grep quotad.pid | grep -v grep | wc -l
+}
+
+function get_nfs_count {
+ ps auxww | grep glusterfs | grep nfs.pid | grep -v grep | wc -l
+}
+
+function get_snapd_count {
+ ps auxww | grep glusterfs | grep snapd.pid | grep -v grep | wc -l
+}
+
+function drop_cache() {
+ case $OSTYPE in
+ Linux)
+ echo 3 > /proc/sys/vm/drop_caches
+ ;;
+ *)
+ # fail but flush caches
+ ( cd $1 && umount $1 2>/dev/null )
+ ;;
+ esac
+}
+
+function quota_list_field () {
+ local QUOTA_PATH=$1
+ local FIELD=$2
+ local awk_arg="{print \$$FIELD}"
+
+ $CLI volume quota $V0 list $QUOTA_PATH | grep $QUOTA_PATH | awk "$awk_arg"
+}
+
+function quota_object_list_field () {
+ local QUOTA_PATH=$1
+ local FIELD=$2
+ local awk_arg="{print \$$FIELD}"
+
+ $CLI volume quota $V0 list-objects $QUOTA_PATH | grep $QUOTA_PATH | awk "$awk_arg"
+}
+
+function quotausage()
+{
+ quota_list_field $1 4
+}
+
+function quota_hard_limit()
+{
+ quota_list_field $1 2
+}
+
+function quota_soft_limit()
+{
+ quota_list_field $1 3
+}
+
+function quota_sl_exceeded()
+{
+ quota_list_field $1 6
+}
+
+function quota_hl_exceeded()
+{
+ quota_list_field $1 7
+
+}
+
+function quota_object_hard_limit()
+{
+ quota_object_list_field $1 2
+}
+
+function scrub_status()
+{
+ local vol=$1;
+ local field=$2;
+
+ $CLI volume bitrot $vol scrub status | grep "^$field: " | sed 's/.*: //';
+}
+
+function get_gfid_string {
+ local path=$1;
+ getfattr -n glusterfs.gfid.string $1 2>/dev/null \
+ | grep glusterfs.gfid.string | cut -d '"' -f 2
+}
+
+function file_all_zeroes {
+ < $1 tr -d '\0' | read -n 1 || echo 1
+}
+
+function get_hard_link_count {
+ local path=$1;
+ stat -c %h $path
+}
+
+function count_sh_entries()
+{
+ ls $1/.glusterfs/indices/xattrop | grep -v "xattrop-" | wc -l
+}
+
+function check_brick_multiplex() {
+ cnt="$(ls /var/log/glusterfs/bricks|wc -l)"
+ local ret=$($CLI volume info|grep "cluster.brick-multiplex"|cut -d" " -f2)
+ local bcnt="$(brick_count)"
+
+ if [ $bcnt -ne 1 ]; then
+ if [ "$ret" = "on" ] || [ $cnt -eq 1 ]; then
+ echo "Y"
+ else
+ echo "N"
+ fi
+ else
+ echo "N"
+ fi
+}
+
+function get_fd_count {
+ local vol=$1
+ local host=$2
+ local brick=$3
+ local fname=$4
+ local val="$(check_brick_multiplex)"
+ local gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $brick/$fname))
+ local statedump=$(generate_brick_statedump $vol $host $brick)
+ if [ $val == "N" ]; then
+ count=$(grep "gfid=$gfid_str" $statedump -A2 | grep fd-count | cut -f2 -d'=' | tail -1)
+ else
+ count=$(grep "${brick}.active.1" -A3 $statedump | grep "gfid=$gfid_str" -A2 | grep fd-count | cut -f2 -d'=' | tail -1)
+ fi
+# If no information is found for a given gfid, it means it has not been
+# accessed, so it doesn't have any open fd. In this case we return 0.
+ count="${count:-0}"
+ rm -f $statedump
+ echo $count
+}
+
+
+function get_active_fd_count {
+ local vol=$1
+ local host=$2
+ local brick=$3
+ local fname=$4
+ local val="$(check_brick_multiplex)"
+ local gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $brick/$fname))
+ local statedump=$(generate_brick_statedump $vol $host $brick)
+ if [ $val == "N" ]; then
+ count=$(grep "gfid=$gfid_str" $statedump -A2 | grep fd-count | cut -f2 -d'=' | tail -1)
+ else
+ count=$(grep "${brick}.active.1" -A3 $statedump | grep "gfid=$gfid_str" -A2 | grep fd-count | cut -f2 -d'=' | tail -1)
+ fi
+ rm -f $statedump
+ echo $count
+}
+
+function get_mount_active_size_value {
+ local vol=$1
+ local mount=$2
+ local statedump=$(generate_mount_statedump $vol $mount)
+ local val=$(grep "active_size" $statedump | cut -f2 -d'=' | tail -1)
+ rm -f $statedump
+ echo $val
+}
+
+function get_mount_lru_size_value {
+ local vol=$1
+ local mount=$2
+ local statedump=$(generate_mount_statedump $vol $mount)
+ local val=$(grep "lru_size" $statedump | cut -f2 -d'=' | tail -1)
+ rm -f $statedump
+ echo $val
+}
+
+function check_changelog_op {
+ local clog_path=$1
+ local op=$2
+
+ $PYTHON $(dirname $0)/../../utils/changelogparser.py ${clog_path}/CHANGELOG | grep "$op" | wc -l
+}
+
+function processed_changelogs {
+ local processed_dir=$1
+ count=$(ls -l $processed_dir | grep CHANGELOG | wc -l)
+ if [ $count -gt 0 ];
+ then
+ echo "Y"
+ else
+ echo "N"
+ fi
+}
+
+function volgen_check_ancestry {
+ #Returns Y if ancestor_xl is an ancestor of $child_xl according to the volfile
+ local volfile="$1"
+
+ local child_xl_type="$2"
+ local child_xl="$3"
+
+ local ancestor_xl_type="$4"
+ local ancestor_xl="$5"
+
+ child_linenum=$(awk '/type $child_xl_type\/$child_xl/ {print FNR}' $volfile)
+ ancestor_linenum=$(awk '/type $ancestor_xl_type\/$ancestor_xl/ {print FNR}' $volfile)
+
+ if [ $child_linenum -lt $ancestor_linenum ];
+ then
+ echo "Y"
+ else
+ echo "N"
+ fi
+}
+
+function get_shd_mux_pid {
+ local volume=$1
+ pid=`$CLI volume status $volume shd | awk '/Self-heal/{print $8}'`
+ echo $pid
+}
+
+function shd_count {
+ ps aux | grep "glustershd" | grep -v grep | wc -l
+}
+
+function number_healer_threads_shd {
+ local pid=$(get_shd_mux_pid $1)
+ pstack $pid | grep $2 | wc -l
+}
+
+function get_mtime {
+ local time=$(get-mdata-xattr -m $1)
+ if [ $time == "-1" ];
+ then
+ echo $(stat -c %Y $1)
+ else
+ echo $time
+ fi
+}
+
+function get_ctime {
+ local time=$(get-mdata-xattr -c $1)
+ if [ $time == "-1" ];
+ then
+ echo $(stat -c %Z $1)
+ else
+ echo $time
+ fi
+}
+
+function get_atime {
+ local time=$(get-mdata-xattr -a $1)
+ if [ $time == "-1" ];
+ then
+ echo $(stat -c %X $1)
+ else
+ echo $time
+ fi
+}
+
+function get-xml()
+{
+ $CLI $1 --xml | xmllint --format - | grep $2 | sed 's/\(<"$2">\|<\/"$2">\)//g'
+}
+
+function logging_time_check()
+{
+ local logdir=$1
+ local logfile=`echo ${0##*/}`_glusterd1.log
+
+ cat $logdir/1/$logfile | tail -n 2 | head -n 1 | grep $(date +%H:%M) | wc -l
+}
diff --git a/tools/Makefile.am b/tools/Makefile.am
new file mode 100644
index 00000000000..5808a3728cd
--- /dev/null
+++ b/tools/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = gfind_missing_files glusterfind setgfid2path
+
+CLEANFILES =
diff --git a/tools/gfind_missing_files/Makefile.am b/tools/gfind_missing_files/Makefile.am
new file mode 100644
index 00000000000..181fe7091f3
--- /dev/null
+++ b/tools/gfind_missing_files/Makefile.am
@@ -0,0 +1,32 @@
+gfindmissingfilesdir = $(GLUSTERFS_LIBEXECDIR)/gfind_missing_files
+
+if WITH_SERVER
+gfindmissingfiles_SCRIPTS = gfind_missing_files.sh gfid_to_path.sh \
+ gfid_to_path.py
+endif
+
+EXTRA_DIST = gfind_missing_files.sh gfid_to_path.sh \
+ gfid_to_path.py
+
+if WITH_SERVER
+gfindmissingfiles_PROGRAMS = gcrawler
+endif
+
+gcrawler_SOURCES = gcrawler.c
+gcrawler_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+gcrawler_LDFLAGS = $(GF_LDFLAGS)
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+if WITH_SERVER
+uninstall-local:
+ rm -f $(DESTDIR)$(sbindir)/gfind_missing_files
+
+install-data-local:
+ rm -f $(DESTDIR)$(sbindir)/gfind_missing_files
+ ln -s $(GLUSTERFS_LIBEXECDIR)/gfind_missing_files/gfind_missing_files.sh $(DESTDIR)$(sbindir)/gfind_missing_files
+endif
+
+CLEANFILES =
diff --git a/tools/gfind_missing_files/gcrawler.c b/tools/gfind_missing_files/gcrawler.c
new file mode 100644
index 00000000000..4acbe92bc8f
--- /dev/null
+++ b/tools/gfind_missing_files/gcrawler.c
@@ -0,0 +1,581 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <stdio.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <dirent.h>
+#include <assert.h>
+#include <glusterfs/locking.h>
+
+#include <glusterfs/compat.h>
+#include <glusterfs/list.h>
+#include <glusterfs/syscall.h>
+
+#define THREAD_MAX 32
+#define BUMP(name) INC(name, 1)
+#define DEFAULT_WORKERS 4
+
+#define NEW(x) \
+ { \
+ x = calloc(1, sizeof(typeof(*x))); \
+ }
+
+#define err(x...) fprintf(stderr, x)
+#define out(x...) fprintf(stdout, x)
+#define dbg(x...) \
+ do { \
+ if (debug) \
+ fprintf(stdout, x); \
+ } while (0)
+#define tout(x...) \
+ do { \
+ out("[%ld] ", pthread_self()); \
+ out(x); \
+ } while (0)
+#define terr(x...) \
+ do { \
+ err("[%ld] ", pthread_self()); \
+ err(x); \
+ } while (0)
+#define tdbg(x...) \
+ do { \
+ dbg("[%ld] ", pthread_self()); \
+ dbg(x); \
+ } while (0)
+
+int debug = 0;
+const char *slavemnt = NULL;
+int workers = 0;
+
+struct stats {
+ unsigned long long int cnt_skipped_gfids;
+};
+
+pthread_spinlock_t stats_lock;
+
+struct stats stats_total;
+int stats = 0;
+
+#define INC(name, val) \
+ do { \
+ if (!stats) \
+ break; \
+ pthread_spin_lock(&stats_lock); \
+ { \
+ stats_total.cnt_##name += val; \
+ } \
+ pthread_spin_unlock(&stats_lock); \
+ } while (0)
+
+void
+stats_dump()
+{
+ if (!stats)
+ return;
+
+ out("-------------------------------------------\n");
+ out("Skipped_Files : %10lld\n", stats_total.cnt_skipped_gfids);
+ out("-------------------------------------------\n");
+}
+
+struct dirjob {
+ struct list_head list;
+
+ char *dirname;
+
+ struct dirjob *parent;
+ int ret; /* final status of this subtree */
+ int refcnt; /* how many dirjobs have this as parent */
+
+ pthread_spinlock_t lock;
+};
+
+struct xwork {
+ pthread_t cthreads[THREAD_MAX]; /* crawler threads */
+ int count;
+ int idle;
+ int stop;
+
+ struct dirjob crawl;
+
+ struct dirjob *rootjob; /* to verify completion in xwork_fini() */
+
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+};
+
+struct dirjob *
+dirjob_ref(struct dirjob *job)
+{
+ pthread_spin_lock(&job->lock);
+ {
+ job->refcnt++;
+ }
+ pthread_spin_unlock(&job->lock);
+
+ return job;
+}
+
+void
+dirjob_free(struct dirjob *job)
+{
+ assert(list_empty(&job->list));
+
+ pthread_spin_destroy(&job->lock);
+ free(job->dirname);
+ free(job);
+}
+
+void
+dirjob_ret(struct dirjob *job, int err)
+{
+ int ret = 0;
+ int refcnt = 0;
+ struct dirjob *parent = NULL;
+
+ pthread_spin_lock(&job->lock);
+ {
+ refcnt = --job->refcnt;
+ job->ret = (job->ret || err);
+ }
+ pthread_spin_unlock(&job->lock);
+
+ if (refcnt == 0) {
+ ret = job->ret;
+
+ if (ret)
+ terr("Failed: %s (%d)\n", job->dirname, ret);
+ else
+ tdbg("Finished: %s\n", job->dirname);
+
+ parent = job->parent;
+ if (parent)
+ dirjob_ret(parent, ret);
+
+ dirjob_free(job);
+ job = NULL;
+ }
+}
+
+struct dirjob *
+dirjob_new(const char *dir, struct dirjob *parent)
+{
+ struct dirjob *job = NULL;
+
+ NEW(job);
+ if (!job)
+ return NULL;
+
+ job->dirname = strdup(dir);
+ if (!job->dirname) {
+ free(job);
+ return NULL;
+ }
+
+ INIT_LIST_HEAD(&job->list);
+ pthread_spin_init(&job->lock, PTHREAD_PROCESS_PRIVATE);
+ job->ret = 0;
+
+ if (parent)
+ job->parent = dirjob_ref(parent);
+
+ job->refcnt = 1;
+
+ return job;
+}
+
+void
+xwork_addcrawl(struct xwork *xwork, struct dirjob *job)
+{
+ pthread_mutex_lock(&xwork->mutex);
+ {
+ list_add_tail(&job->list, &xwork->crawl.list);
+ pthread_cond_broadcast(&xwork->cond);
+ }
+ pthread_mutex_unlock(&xwork->mutex);
+}
+
+int
+xwork_add(struct xwork *xwork, const char *dir, struct dirjob *parent)
+{
+ struct dirjob *job = NULL;
+
+ job = dirjob_new(dir, parent);
+ if (!job)
+ return -1;
+
+ xwork_addcrawl(xwork, job);
+
+ return 0;
+}
+
+struct dirjob *
+xwork_pick(struct xwork *xwork, int block)
+{
+ struct dirjob *job = NULL;
+ struct list_head *head = NULL;
+
+ head = &xwork->crawl.list;
+
+ pthread_mutex_lock(&xwork->mutex);
+ {
+ for (;;) {
+ if (xwork->stop)
+ break;
+
+ if (!list_empty(head)) {
+ job = list_entry(head->next, typeof(*job), list);
+ list_del_init(&job->list);
+ break;
+ }
+
+ if (((xwork->count * 2) == xwork->idle) &&
+ list_empty(&xwork->crawl.list)) {
+ /* no outstanding jobs, and no
+ active workers
+ */
+ tdbg("Jobless. Terminating\n");
+ xwork->stop = 1;
+ pthread_cond_broadcast(&xwork->cond);
+ break;
+ }
+
+ if (!block)
+ break;
+
+ xwork->idle++;
+ pthread_cond_wait(&xwork->cond, &xwork->mutex);
+ xwork->idle--;
+ }
+ }
+ pthread_mutex_unlock(&xwork->mutex);
+
+ return job;
+}
+
+int
+skip_name(const char *dirname, const char *name)
+{
+ if (strcmp(name, ".") == 0)
+ return 1;
+
+ if (strcmp(name, "..") == 0)
+ return 1;
+
+ if (strcmp(name, "changelogs") == 0)
+ return 1;
+
+ if (strcmp(name, "health_check") == 0)
+ return 1;
+
+ if (strcmp(name, "indices") == 0)
+ return 1;
+
+ if (strcmp(name, "landfill") == 0)
+ return 1;
+
+ return 0;
+}
+
+int
+skip_stat(struct dirjob *job, const char *name)
+{
+ if (job == NULL)
+ return 0;
+
+ if (strcmp(job->dirname, ".glusterfs") == 0) {
+ tdbg(
+ "Directly adding directories under .glusterfs "
+ "to global list: %s\n",
+ name);
+ return 1;
+ }
+
+ if (job->parent != NULL) {
+ if (strcmp(job->parent->dirname, ".glusterfs") == 0) {
+ tdbg(
+ "Directly adding directories under .glusterfs/XX "
+ "to global list: %s\n",
+ name);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+int
+xworker_do_crawl(struct xwork *xwork, struct dirjob *job)
+{
+ DIR *dirp = NULL;
+ int ret = -1;
+ int boff;
+ int plen;
+ char *path = NULL;
+ struct dirjob *cjob = NULL;
+ struct stat statbuf = {
+ 0,
+ };
+ struct dirent *entry;
+ struct dirent scratch[2] = {
+ {
+ 0,
+ },
+ };
+ char gfid_path[PATH_MAX] = {
+ 0,
+ };
+
+ plen = strlen(job->dirname) + 256 + 2;
+ path = alloca(plen);
+
+ tdbg("Entering: %s\n", job->dirname);
+
+ dirp = sys_opendir(job->dirname);
+ if (!dirp) {
+ terr("opendir failed on %s (%s)\n", job->dirname, strerror(errno));
+ goto out;
+ }
+
+ boff = sprintf(path, "%s/", job->dirname);
+
+ for (;;) {
+ errno = 0;
+ entry = sys_readdir(dirp, scratch);
+ if (!entry || errno != 0) {
+ if (errno != 0) {
+ err("readdir(%s): %s\n", job->dirname, strerror(errno));
+ ret = errno;
+ goto out;
+ }
+ break;
+ }
+
+ if (entry->d_ino == 0)
+ continue;
+
+ if (skip_name(job->dirname, entry->d_name))
+ continue;
+
+ /* It is sure that, children and grandchildren of .glusterfs
+ * are directories, just add them to global queue.
+ */
+ if (skip_stat(job, entry->d_name)) {
+ strncpy(path + boff, entry->d_name, (plen - boff));
+ cjob = dirjob_new(path, job);
+ if (!cjob) {
+ err("dirjob_new(%s): %s\n", path, strerror(errno));
+ ret = -1;
+ goto out;
+ }
+ xwork_addcrawl(xwork, cjob);
+ continue;
+ }
+
+ (void)snprintf(gfid_path, sizeof(gfid_path), "%s/.gfid/%s", slavemnt,
+ entry->d_name);
+ ret = sys_lstat(gfid_path, &statbuf);
+
+ if (ret && errno == ENOENT) {
+ out("%s\n", entry->d_name);
+ BUMP(skipped_gfids);
+ }
+
+ if (ret && errno != ENOENT) {
+ err("stat on slave failed(%s): %s\n", gfid_path, strerror(errno));
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ if (dirp)
+ (void)sys_closedir(dirp);
+
+ return ret;
+}
+
+void *
+xworker_crawl(void *data)
+{
+ struct xwork *xwork = data;
+ struct dirjob *job = NULL;
+ int ret = -1;
+
+ while ((job = xwork_pick(xwork, 0))) {
+ ret = xworker_do_crawl(xwork, job);
+ dirjob_ret(job, ret);
+ }
+
+ return NULL;
+}
+
+int
+xwork_fini(struct xwork *xwork, int stop)
+{
+ int i = 0;
+ int ret = 0;
+ void *tret = 0;
+
+ pthread_mutex_lock(&xwork->mutex);
+ {
+ xwork->stop = (xwork->stop || stop);
+ pthread_cond_broadcast(&xwork->cond);
+ }
+ pthread_mutex_unlock(&xwork->mutex);
+
+ for (i = 0; i < xwork->count; i++) {
+ pthread_join(xwork->cthreads[i], &tret);
+ tdbg("CThread id %ld returned %p\n", xwork->cthreads[i], tret);
+ }
+
+ if (debug) {
+ assert(xwork->rootjob->refcnt == 1);
+ dirjob_ret(xwork->rootjob, 0);
+ }
+
+ if (stats)
+ pthread_spin_destroy(&stats_lock);
+
+ return ret;
+}
+
+int
+xwork_init(struct xwork *xwork, int count)
+{
+ int i = 0;
+ int ret = 0;
+ struct dirjob *rootjob = NULL;
+
+ if (stats)
+ pthread_spin_init(&stats_lock, PTHREAD_PROCESS_PRIVATE);
+
+ pthread_mutex_init(&xwork->mutex, NULL);
+ pthread_cond_init(&xwork->cond, NULL);
+
+ INIT_LIST_HEAD(&xwork->crawl.list);
+
+ rootjob = dirjob_new(".glusterfs", NULL);
+ if (debug)
+ xwork->rootjob = dirjob_ref(rootjob);
+
+ xwork_addcrawl(xwork, rootjob);
+
+ xwork->count = count;
+ for (i = 0; i < count; i++) {
+ ret = pthread_create(&xwork->cthreads[i], NULL, xworker_crawl, xwork);
+ if (ret)
+ break;
+ tdbg("Spawned crawler %d thread %ld\n", i, xwork->cthreads[i]);
+ }
+
+ return ret;
+}
+
+int
+xfind(const char *basedir)
+{
+ struct xwork xwork;
+ int ret = 0;
+ char *cwd = NULL;
+
+ ret = chdir(basedir);
+ if (ret) {
+ err("%s: %s\n", basedir, strerror(errno));
+ return ret;
+ }
+
+ cwd = getcwd(0, 0);
+ if (!cwd) {
+ err("getcwd(): %s\n", strerror(errno));
+ return -1;
+ }
+
+ tdbg("Working directory: %s\n", cwd);
+ free(cwd);
+
+ memset(&xwork, 0, sizeof(xwork));
+
+ ret = xwork_init(&xwork, workers);
+ if (ret == 0)
+ xworker_crawl(&xwork);
+
+ ret = xwork_fini(&xwork, ret);
+ stats_dump();
+
+ return ret;
+}
+
+static char *
+parse_and_validate_args(int argc, char *argv[])
+{
+ char *basedir = NULL;
+ struct stat d = {
+ 0,
+ };
+ int ret = -1;
+#ifndef __FreeBSD__
+ unsigned char volume_id[16];
+#endif /* __FreeBSD__ */
+ char *slv_mnt = NULL;
+
+ if (argc != 4) {
+ err("Usage: %s <DIR> <SLAVE-VOL-MOUNT> <CRAWL-THREAD-COUNT>\n",
+ argv[0]);
+ return NULL;
+ }
+
+ basedir = argv[1];
+ ret = sys_lstat(basedir, &d);
+ if (ret) {
+ err("%s: %s\n", basedir, strerror(errno));
+ return NULL;
+ }
+
+#ifndef __FreeBSD__
+ ret = sys_lgetxattr(basedir, "trusted.glusterfs.volume-id", volume_id, 16);
+ if (ret != 16) {
+ err("%s:Not a valid brick path.\n", basedir);
+ return NULL;
+ }
+#endif /* __FreeBSD__ */
+
+ slv_mnt = argv[2];
+ ret = sys_lstat(slv_mnt, &d);
+ if (ret) {
+ err("%s: %s\n", slv_mnt, strerror(errno));
+ return NULL;
+ }
+ slavemnt = argv[2];
+
+ workers = atoi(argv[3]);
+ if (workers <= 0)
+ workers = DEFAULT_WORKERS;
+
+ return basedir;
+}
+
+int
+main(int argc, char *argv[])
+{
+ char *basedir = NULL;
+
+ basedir = parse_and_validate_args(argc, argv);
+ if (!basedir)
+ return 1;
+
+ xfind(basedir);
+
+ return 0;
+}
diff --git a/tools/gfind_missing_files/gfid_to_path.py b/tools/gfind_missing_files/gfid_to_path.py
new file mode 100644
index 00000000000..01e08a9494a
--- /dev/null
+++ b/tools/gfind_missing_files/gfid_to_path.py
@@ -0,0 +1,162 @@
+#!/usr/bin/python3
+
+# Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
+import sys
+import os
+import xattr
+import uuid
+import re
+import errno
+
+CHANGELOG_SEARCH_MAX_TRY = 31
+DEC_CTIME_START = 5
+ROOT_GFID = "00000000-0000-0000-0000-000000000001"
+MAX_NUM_CHANGELOGS_TRY = 2
+
+
+def output_not_found(gfid):
+ # Write GFID to stderr
+ sys.stderr.write("%s\n" % gfid)
+
+
+def output_success(path):
+ # Write converted Path to Stdout
+ sys.stdout.write("%s\n" % path)
+
+
+def full_dir_path(gfid):
+ out_path = ""
+ while True:
+ path = os.path.join(".glusterfs", gfid[0:2], gfid[2:4], gfid)
+ path_readlink = os.readlink(path)
+ pgfid = os.path.dirname(path_readlink)
+ out_path = os.path.join(os.path.basename(path_readlink), out_path)
+ if pgfid == "../../00/00/%s" % ROOT_GFID:
+ out_path = os.path.join("./", out_path)
+ break
+ gfid = os.path.basename(pgfid)
+ return out_path
+
+
+def find_path_from_changelog(fd, gfid):
+ """
+ In given Changelog File, finds using following pattern
+ <T><GFID>\x00<TYPE>\x00<MODE>\x00<UID>\x00<GID>\x00<PARGFID>/<BASENAME>
+ Pattern search finds PARGFID and BASENAME, Convert PARGFID to Path
+ Using readlink and add basename to form Full path.
+ """
+ content = fd.read()
+
+ pattern = "E%s" % gfid
+ pattern += "\x00(3|23)\x00\d+\x00\d+\x00\d+\x00([^\x00]+)/([^\x00]+)"
+ pat = re.compile(pattern)
+ match = pat.search(content)
+
+ if match:
+ pgfid = match.group(2)
+ basename = match.group(3)
+ if pgfid == ROOT_GFID:
+ return os.path.join("./", basename)
+ else:
+ full_path_parent = full_dir_path(pgfid)
+ if full_path_parent:
+ return os.path.join(full_path_parent, basename)
+
+ return None
+
+
+def gfid_to_path(gfid):
+ """
+ Try readlink, if it is directory it succeeds.
+ Get ctime of the GFID file, Decrement by 5 sec
+ Search for Changelog filename, Since Changelog file generated
+ every 15 sec, Search and get immediate next Changelog after the file
+ Creation. Get the Path by searching in Changelog file.
+ Get the resultant file's GFID and Compare with the input, If these
+ GFIDs are different then Some thing is changed(May be Rename)
+ """
+ gfid = gfid.strip()
+ gpath = os.path.join(".glusterfs", gfid[0:2], gfid[2:4], gfid)
+ try:
+ output_success(full_dir_path(gfid))
+ return
+ except OSError:
+ # Not an SymLink
+ pass
+
+ try:
+ ctime = int(os.stat(gpath).st_ctime)
+ ctime -= DEC_CTIME_START
+ except (OSError, IOError):
+ output_not_found(gfid)
+ return
+
+ path = None
+ found_changelog = False
+ changelog_parse_try = 0
+ for i in range(CHANGELOG_SEARCH_MAX_TRY):
+ cl = os.path.join(".glusterfs/changelogs", "CHANGELOG.%s" % ctime)
+
+ try:
+ with open(cl, "rb") as f:
+ changelog_parse_try += 1
+ found_changelog = True
+ path = find_path_from_changelog(f, gfid)
+ if not path and changelog_parse_try < MAX_NUM_CHANGELOGS_TRY:
+ ctime += 1
+ continue
+ break
+ except (IOError, OSError) as e:
+ if e.errno == errno.ENOENT:
+ ctime += 1
+ else:
+ break
+
+ if not found_changelog:
+ output_not_found(gfid)
+ return
+
+ if not path:
+ output_not_found(gfid)
+ return
+ gfid1 = str(uuid.UUID(bytes=xattr.get(path, "trusted.gfid")))
+ if gfid != gfid1:
+ output_not_found(gfid)
+ return
+
+ output_success(path)
+
+
+def main():
+ num_arguments = 3
+ if not sys.stdin.isatty():
+ num_arguments = 2
+
+ if len(sys.argv) != num_arguments:
+ sys.stderr.write("Invalid arguments\nUsage: "
+ "%s <BRICK_PATH> <GFID_FILE>\n" % sys.argv[0])
+ sys.exit(1)
+
+ path = sys.argv[1]
+
+ if sys.stdin.isatty():
+ gfid_list = os.path.abspath(sys.argv[2])
+ os.chdir(path)
+ with open(gfid_list) as f:
+ for gfid in f:
+ gfid_to_path(gfid)
+ else:
+ os.chdir(path)
+ for gfid in sys.stdin:
+ gfid_to_path(gfid)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/gfind_missing_files/gfid_to_path.sh b/tools/gfind_missing_files/gfid_to_path.sh
new file mode 100644
index 00000000000..ebe817ac2f3
--- /dev/null
+++ b/tools/gfind_missing_files/gfid_to_path.sh
@@ -0,0 +1,43 @@
+#!/bin/sh
+
+## Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
+## This file is part of GlusterFS.
+##
+## This file is licensed to you under your choice of the GNU Lesser
+## General Public License, version 3 or any later version (LGPLv3 or
+## later), or the GNU General Public License, version 2 (GPLv2), in all
+## cases as published by the Free Software Foundation.
+
+E_BADARGS=65
+
+
+gfid_to_path ()
+{
+ brick_dir=$1;
+ gfid_file=$(readlink -e $2);
+
+ current_dir=$(pwd);
+ cd $brick_dir;
+
+ while read gfid
+ do
+ to_search=`echo .glusterfs/${gfid:0:2}"/"${gfid:2:2}"/"$gfid`;
+ find . -samefile $to_search | grep -v $to_search;
+ done < $gfid_file;
+
+ cd $current_dir;
+}
+
+
+main ()
+{
+ if [ $# -ne 2 ]
+ then
+ echo "Usage: `basename $0` BRICK_DIR GFID_FILE";
+ exit $E_BADARGS;
+ fi
+
+ gfid_to_path $1 $2;
+}
+
+main "$@";
diff --git a/tools/gfind_missing_files/gfind_missing_files.sh b/tools/gfind_missing_files/gfind_missing_files.sh
new file mode 100644
index 00000000000..e7aaa0b5dd4
--- /dev/null
+++ b/tools/gfind_missing_files/gfind_missing_files.sh
@@ -0,0 +1,119 @@
+#!/bin/sh
+
+## Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
+## This file is part of GlusterFS.
+##
+## This file is licensed to you under your choice of the GNU Lesser
+## General Public License, version 3 or any later version (LGPLv3 or
+## later), or the GNU General Public License, version 2 (GPLv2), in all
+## cases as published by the Free Software Foundation.
+
+BRICKPATH= #Brick path of gluster volume
+SLAVEHOST= #Slave hostname
+SLAVEVOL= #Slave volume
+SLAVEMNT= #Slave gluster volume mount point
+WORKERS=4 #Default number of worker threads
+
+out()
+{
+ echo "$@";
+}
+
+fatal()
+{
+ out FATAL "$@";
+ exit 1
+}
+
+ping_host ()
+{
+ ### Use bash internal socket support
+ {
+ exec 400<>/dev/tcp/$1/$2
+ if [ $? -ne '0' ]; then
+ return 1;
+ else
+ exec 400>&-
+ return 0;
+ fi
+ } 1>&2 2>/dev/null
+}
+
+mount_slave()
+{
+ local i; # inode number
+ SSH_PORT=22
+
+ SLAVEMNT=`mktemp -d`
+ [ "x$SLAVEMNT" = "x" ] && fatal "Could not mktemp directory";
+ [ -d "$SLAVEMNT" ] || fatal "$SLAVEMNT not a directory";
+
+ ping_host ${SLAVEHOST} $SSH_PORT
+ if [ $? -ne 0 ]; then
+ echo "$SLAVEHOST not reachable.";
+ exit 1;
+ fi;
+
+ glusterfs --volfile-id=$SLAVEVOL --aux-gfid-mount --volfile-server=$SLAVEHOST $SLAVEMNT;
+ i=$(stat -c '%i' $SLAVEMNT);
+ [ "x$i" = "x1" ] || fatal "Could not mount volume $2 on $SLAVEMNT Please check host and volume exists";
+}
+
+parse_cli()
+{
+ if [ "$#" -ne 4 ]; then
+ echo "Usage: gfind_missing_files <brick-path> <slave-host> <slave-vol> <OUTFILE>"
+ exit 1
+ else
+ BRICKPATH=$1;
+ SLAVEHOST=$2;
+ SLAVEVOL=$3;
+ OUTFILE=$4;
+
+ mount_slave;
+ echo "Slave volume is mounted at ${SLAVEMNT}"
+ echo
+ fi
+}
+
+main()
+{
+ parse_cli "$@";
+
+ echo "Calling crawler...";
+ path=$(readlink -e $0)
+ $(dirname $path)/gcrawler ${BRICKPATH} ${SLAVEMNT} ${WORKERS} > ${OUTFILE}
+
+ #Clean up the mount
+ umount $SLAVEMNT;
+ rmdir $SLAVEMNT;
+
+ echo "Crawl Complete."
+ num_files_missing=$(wc -l ${OUTFILE} | awk '{print $1}')
+ if [ $num_files_missing -eq 0 ]
+ then
+ echo "Total Missing File Count : 0"
+ exit 0;
+ fi
+
+ echo "gfids of skipped files are available in the file ${OUTFILE}"
+ echo
+ echo "Starting gfid to path conversion"
+
+ #Call python script to convert gfids to full pathname
+ INFILE=$(readlink -e ${OUTFILE})
+ python $(dirname $path)/gfid_to_path.py ${BRICKPATH} ${INFILE} 1> ${OUTFILE}_pathnames 2> ${OUTFILE}_gfids
+ echo "Path names of skipped files are available in the file ${OUTFILE}_pathnames"
+
+ gfid_to_path_failures=$(wc -l ${OUTFILE}_gfids | awk '{print $1}')
+ if [ $gfid_to_path_failures -gt 0 ]
+ then
+ echo "WARNING: Unable to convert some GFIDs to Paths, GFIDs logged to ${OUTFILE}_gfids"
+ echo "Use $(dirname $path)/gfid_to_path.sh <brick-path> ${OUTFILE}_gfids to convert those GFIDs to Path"
+ fi
+
+ #Output
+ echo "Total Missing File Count : $(wc -l ${OUTFILE} | awk '{print $1}')"
+}
+
+main "$@";
diff --git a/tools/glusterfind/Makefile.am b/tools/glusterfind/Makefile.am
new file mode 100644
index 00000000000..f17dbdb228e
--- /dev/null
+++ b/tools/glusterfind/Makefile.am
@@ -0,0 +1,24 @@
+SUBDIRS = src
+
+EXTRA_DIST = S57glusterfind-delete-post.py glusterfind
+
+if WITH_SERVER
+bin_SCRIPTS = glusterfind
+endif
+
+CLEANFILES = $(bin_SCRIPTS)
+
+if WITH_SERVER
+deletehookscriptsdir = $(GLUSTERFS_LIBEXECDIR)/glusterfind/
+deletehookscripts_SCRIPTS = S57glusterfind-delete-post.py
+
+uninstall-local:
+ rm -f $(DESTDIR)$(GLUSTERD_WORKDIR)/hooks/1/delete/post/S57glusterfind-delete-post
+
+install-data-local:
+ $(mkdir_p) $(DESTDIR)$(GLUSTERD_WORKDIR)/glusterfind/.keys
+ $(mkdir_p) $(DESTDIR)$(GLUSTERD_WORKDIR)/hooks/1/delete/post/
+ rm -f $(DESTDIR)$(GLUSTERD_WORKDIR)/hooks/1/delete/post/S57glusterfind-delete-post
+ ln -s $(GLUSTERFS_LIBEXECDIR)/glusterfind/S57glusterfind-delete-post.py \
+ $(DESTDIR)$(GLUSTERD_WORKDIR)/hooks/1/delete/post/S57glusterfind-delete-post
+endif
diff --git a/tools/glusterfind/S57glusterfind-delete-post.py b/tools/glusterfind/S57glusterfind-delete-post.py
new file mode 100755
index 00000000000..5beece220f0
--- /dev/null
+++ b/tools/glusterfind/S57glusterfind-delete-post.py
@@ -0,0 +1,69 @@
+#!/usr/bin/python3
+import os
+import shutil
+from errno import ENOENT
+from subprocess import Popen, PIPE
+from argparse import ArgumentParser
+
+
+DEFAULT_GLUSTERD_WORKDIR = "/var/lib/glusterd"
+
+
+def handle_rm_error(func, path, exc_info):
+ if exc_info[1].errno == ENOENT:
+ return
+
+ raise exc_info[1]
+
+
+def get_glusterd_workdir():
+ p = Popen(["gluster", "system::", "getwd"],
+ stdout=PIPE, stderr=PIPE, universal_newlines=True)
+
+ out, _ = p.communicate()
+
+ if p.returncode == 0:
+ return out.strip()
+ else:
+ return DEFAULT_GLUSTERD_WORKDIR
+
+
+def get_args():
+ parser = ArgumentParser(description="Volume delete post hook script")
+ parser.add_argument("--volname")
+ return parser.parse_args()
+
+
+def main():
+ args = get_args()
+ glusterfind_dir = os.path.join(get_glusterd_workdir(), "glusterfind")
+
+ # Check all session directories, if any directory found for
+ # the deleted volume, cleanup all the session directories
+ try:
+ ls_glusterfind_dir = os.listdir(glusterfind_dir)
+ except OSError:
+ ls_glusterfind_dir = []
+
+ for session in ls_glusterfind_dir:
+ # don't blow away the keys directory
+ if session == ".keys":
+ continue
+
+ # Possible session directory
+ volume_session_path = os.path.join(glusterfind_dir,
+ session,
+ args.volname)
+ if os.path.exists(volume_session_path):
+ shutil.rmtree(volume_session_path, onerror=handle_rm_error)
+
+ # Try to Remove directory, if any other dir exists for different
+ # volume, then rmdir will fail with ENOTEMPTY which is fine
+ try:
+ os.rmdir(os.path.join(glusterfind_dir, session))
+ except (OSError, IOError):
+ pass
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/glusterfind/glusterfind.in b/tools/glusterfind/glusterfind.in
new file mode 100644
index 00000000000..ca154b625dd
--- /dev/null
+++ b/tools/glusterfind/glusterfind.in
@@ -0,0 +1,18 @@
+#!/usr/bin/python3
+
+# Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
+import sys
+sys.path.insert(1, '@GLUSTERFS_LIBEXECDIR@/')
+sys.path.insert(1, '@GLUSTERFS_LIBEXECDIR@/glusterfind')
+
+from glusterfind.main import main
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/glusterfind/src/Makefile.am b/tools/glusterfind/src/Makefile.am
new file mode 100644
index 00000000000..43b6141b01c
--- /dev/null
+++ b/tools/glusterfind/src/Makefile.am
@@ -0,0 +1,16 @@
+glusterfinddir = $(GLUSTERFS_LIBEXECDIR)/glusterfind
+
+if WITH_SERVER
+glusterfind_PYTHON = conf.py utils.py __init__.py \
+ main.py libgfchangelog.py changelogdata.py gfind_py2py3.py
+
+glusterfind_SCRIPTS = changelog.py nodeagent.py \
+ brickfind.py
+
+glusterfind_DATA = tool.conf
+endif
+
+EXTRA_DIST = changelog.py nodeagent.py brickfind.py \
+ tool.conf changelogdata.py
+
+CLEANFILES =
diff --git a/tools/glusterfind/src/__init__.py b/tools/glusterfind/src/__init__.py
new file mode 100644
index 00000000000..1753698b5fa
--- /dev/null
+++ b/tools/glusterfind/src/__init__.py
@@ -0,0 +1,9 @@
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
diff --git a/tools/glusterfind/src/brickfind.py b/tools/glusterfind/src/brickfind.py
new file mode 100644
index 00000000000..73b6350188d
--- /dev/null
+++ b/tools/glusterfind/src/brickfind.py
@@ -0,0 +1,118 @@
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
+import os
+import sys
+import logging
+from argparse import ArgumentParser, RawDescriptionHelpFormatter
+try:
+ import urllib.parse as urllib
+except ImportError:
+ import urllib
+import time
+
+from utils import mkdirp, setup_logger, create_file, output_write, find
+import conf
+
+
+PROG_DESCRIPTION = """
+Changelog Crawler
+"""
+
+logger = logging.getLogger()
+
+
+def brickfind_crawl(brick, args):
+ if brick.endswith("/"):
+ brick = brick[0:len(brick)-1]
+
+ working_dir = os.path.dirname(args.outfile)
+ mkdirp(working_dir, exit_on_err=True, logger=logger)
+ create_file(args.outfile, exit_on_err=True, logger=logger)
+
+ with open(args.outfile, "a+") as fout:
+ brick_path_len = len(brick)
+
+ def output_callback(path, filter_result, is_dir):
+ path = path.strip()
+ path = path[brick_path_len+1:]
+
+ if args.type == "both":
+ output_write(fout, path, args.output_prefix,
+ encode=(not args.no_encode), tag=args.tag,
+ field_separator=args.field_separator)
+ else:
+ if (is_dir and args.type == "d") or (
+ (not is_dir) and args.type == "f"):
+ output_write(fout, path, args.output_prefix,
+ encode=(not args.no_encode), tag=args.tag,
+ field_separator=args.field_separator)
+
+ ignore_dirs = [os.path.join(brick, dirname)
+ for dirname in
+ conf.get_opt("brick_ignore_dirs").split(",")]
+
+ find(brick, callback_func=output_callback,
+ ignore_dirs=ignore_dirs)
+
+ fout.flush()
+ os.fsync(fout.fileno())
+
+
+def _get_args():
+ parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter,
+ description=PROG_DESCRIPTION)
+
+ parser.add_argument("session", help="Session Name")
+ parser.add_argument("volume", help="Volume Name")
+ parser.add_argument("node", help="Node Name")
+ parser.add_argument("brick", help="Brick Name")
+ parser.add_argument("outfile", help="Output File")
+ parser.add_argument("tag", help="Tag to prefix file name with")
+ parser.add_argument("--only-query", help="Only query, No session update",
+ action="store_true")
+ parser.add_argument("--debug", help="Debug", action="store_true")
+ parser.add_argument("--no-encode",
+ help="Do not encode path in outfile",
+ action="store_true")
+ parser.add_argument("--output-prefix", help="File prefix in output",
+ default=".")
+ parser.add_argument('--type', help="type: f, f-files only"
+ " d, d-directories only, by default = both",
+ default='both')
+ parser.add_argument("--field-separator", help="Field separator",
+ default=" ")
+
+ return parser.parse_args()
+
+
+if __name__ == "__main__":
+ args = _get_args()
+ session_dir = os.path.join(conf.get_opt("session_dir"), args.session)
+ status_file = os.path.join(session_dir, args.volume,
+ "%s.status" % urllib.quote_plus(args.brick))
+ status_file_pre = status_file + ".pre"
+ mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True,
+ logger=logger)
+ mkdirp(os.path.join(conf.get_opt("log_dir"), args.session, args.volume),
+ exit_on_err=True)
+ log_file = os.path.join(conf.get_opt("log_dir"),
+ args.session,
+ args.volume,
+ "brickfind.log")
+ setup_logger(logger, log_file, args.debug)
+
+ time_to_update = int(time.time())
+ brickfind_crawl(args.brick, args)
+ if not args.only_query:
+ with open(status_file_pre, "w") as f:
+ f.write(str(time_to_update))
+ sys.exit(0)
diff --git a/tools/glusterfind/src/changelog.py b/tools/glusterfind/src/changelog.py
new file mode 100644
index 00000000000..a5e9ea4288f
--- /dev/null
+++ b/tools/glusterfind/src/changelog.py
@@ -0,0 +1,469 @@
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
+import os
+import sys
+import time
+import xattr
+import logging
+from gfind_py2py3 import bytearray_to_str
+from argparse import ArgumentParser, RawDescriptionHelpFormatter
+import hashlib
+try:
+ import urllib.parse as urllib
+except ImportError:
+ import urllib
+import codecs
+
+import libgfchangelog
+from utils import mkdirp, symlink_gfid_to_path
+from utils import fail, setup_logger, find
+from utils import get_changelog_rollover_time
+from utils import output_path_prepare
+from changelogdata import ChangelogData
+import conf
+
+
+CHANGELOG_LOG_LEVEL = 9
+CHANGELOG_CONN_RETRIES = 5
+CHANGELOGAPI_NUM_WORKERS = 3
+PROG_DESCRIPTION = """
+Changelog Crawler
+"""
+history_turns = 0
+history_turn_time = 0
+
+logger = logging.getLogger()
+
+
+def pgfid_to_path(brick, changelog_data):
+ """
+ For all the pgfids in table, converts into path using recursive
+ readlink.
+ """
+ # pgfid1 to path1 in case of CREATE/MKNOD/MKDIR/LINK/SYMLINK
+ for row in changelog_data.gfidpath_get_distinct("pgfid1", {"path1": ""}):
+ # In case of Data/Metadata only, pgfid1 will not be there
+ if row[0] == "":
+ continue
+
+ try:
+ path = symlink_gfid_to_path(brick, row[0])
+ path = output_path_prepare(path, args)
+ changelog_data.gfidpath_set_path1(path, row[0])
+ except (IOError, OSError) as e:
+ logger.warn("Error converting to path: %s" % e)
+ continue
+
+ # pgfid2 to path2 in case of RENAME
+ for row in changelog_data.gfidpath_get_distinct("pgfid2",
+ {"type": "RENAME",
+ "path2": ""}):
+ # Only in case of Rename pgfid2 exists
+ if row[0] == "":
+ continue
+
+ try:
+ path = symlink_gfid_to_path(brick, row[0])
+ path = output_path_prepare(path, args)
+ changelog_data.gfidpath_set_path2(path, row[0])
+ except (IOError, OSError) as e:
+ logger.warn("Error converting to path: %s" % e)
+ continue
+
+
+def populate_pgfid_and_inodegfid(brick, changelog_data):
+ """
+ For all the DATA/METADATA modifications GFID,
+ If symlink, directly convert to Path using Readlink.
+ If not symlink, try to get PGFIDs via xattr query and populate it
+ to pgfid table, collect inodes in inodegfid table
+ """
+ for row in changelog_data.gfidpath_get({"path1": "", "type": "MODIFY"}):
+ gfid = row[3].strip()
+ p = os.path.join(brick, ".glusterfs", gfid[0:2], gfid[2:4], gfid)
+ if os.path.islink(p):
+ # It is a Directory if GFID backend path is symlink
+ try:
+ path = symlink_gfid_to_path(brick, gfid)
+ path = output_path_prepare(path, args)
+ changelog_data.gfidpath_update({"path1": path},
+ {"gfid": gfid})
+ except (IOError, OSError) as e:
+ logger.warn("Error converting to path: %s" % e)
+ continue
+ else:
+ try:
+ # INODE and GFID to inodegfid table
+ changelog_data.inodegfid_add(os.stat(p).st_ino, gfid)
+ file_xattrs = xattr.list(p)
+ for x in file_xattrs:
+ x_str = bytearray_to_str(x)
+ if x_str.startswith("trusted.pgfid."):
+ # PGFID in pgfid table
+ changelog_data.pgfid_add(x_str.split(".")[-1])
+ except (IOError, OSError):
+ # All OS Errors ignored, since failures will be logged
+ # in End. All GFIDs present in gfidpath table
+ continue
+
+
+def enum_hard_links_using_gfid2path(brick, gfid, args):
+ hardlinks = []
+ p = os.path.join(brick, ".glusterfs", gfid[0:2], gfid[2:4], gfid)
+ if not os.path.isdir(p):
+ # we have a symlink or a normal file
+ try:
+ file_xattrs = xattr.list(p)
+ for x in file_xattrs:
+ x_str = bytearray_to_str(x)
+ if x_str.startswith("trusted.gfid2path."):
+ # get the value for the xattr i.e. <PGFID>/<BN>
+ v = xattr.getxattr(p, x_str)
+ v_str = bytearray_to_str(v)
+ pgfid, bn = v_str.split(os.sep)
+ try:
+ path = symlink_gfid_to_path(brick, pgfid)
+ fullpath = os.path.join(path, bn)
+ fullpath = output_path_prepare(fullpath, args)
+ hardlinks.append(fullpath)
+ except (IOError, OSError) as e:
+ logger.warn("Error converting to path: %s" % e)
+ continue
+ except (IOError, OSError):
+ pass
+ return hardlinks
+
+
+def gfid_to_all_paths_using_gfid2path(brick, changelog_data, args):
+ path = ""
+ for row in changelog_data.gfidpath_get({"path1": "", "type": "MODIFY"}):
+ gfid = row[3].strip()
+ logger.debug("Processing gfid %s" % gfid)
+ hardlinks = enum_hard_links_using_gfid2path(brick, gfid, args)
+
+ path = ",".join(hardlinks)
+
+ changelog_data.gfidpath_update({"path1": path}, {"gfid": gfid})
+
+
+def gfid_to_path_using_pgfid(brick, changelog_data, args):
+ """
+ For all the pgfids collected, Converts to Path and
+ does readdir on those directories and looks up inodegfid
+ table for matching inode number.
+ """
+ populate_pgfid_and_inodegfid(brick, changelog_data)
+
+ # If no GFIDs needs conversion to Path
+ if not changelog_data.inodegfid_exists({"converted": 0}):
+ return
+
+ def inode_filter(path):
+ # Looks in inodegfid table, if exists returns
+ # inode number else None
+ try:
+ st = os.lstat(path)
+ except (OSError, IOError):
+ st = None
+
+ if st and changelog_data.inodegfid_exists({"inode": st.st_ino}):
+ return st.st_ino
+
+ return None
+
+ # Length of brick path, to remove from output path
+ brick_path_len = len(brick)
+
+ def output_callback(path, inode):
+ # For each path found, encodes it and updates path1
+ # Also updates converted flag in inodegfid table as 1
+ path = path.strip()
+ path = path[brick_path_len+1:]
+
+ path = output_path_prepare(path, args)
+
+ changelog_data.append_path1(path, inode)
+ changelog_data.inodegfid_update({"converted": 1}, {"inode": inode})
+
+ ignore_dirs = [os.path.join(brick, dirname)
+ for dirname in
+ conf.get_opt("brick_ignore_dirs").split(",")]
+
+ for row in changelog_data.pgfid_get():
+ try:
+ path = symlink_gfid_to_path(brick, row[0])
+ find(os.path.join(brick, path),
+ callback_func=output_callback,
+ filter_func=inode_filter,
+ ignore_dirs=ignore_dirs,
+ subdirs_crawl=False)
+ except (IOError, OSError) as e:
+ logger.warn("Error converting to path: %s" % e)
+ continue
+
+
+def gfid_to_path_using_batchfind(brick, changelog_data):
+ # If all the GFIDs converted using gfid_to_path_using_pgfid
+ if not changelog_data.inodegfid_exists({"converted": 0}):
+ return
+
+ def inode_filter(path):
+ # Looks in inodegfid table, if exists returns
+ # inode number else None
+ try:
+ st = os.lstat(path)
+ except (OSError, IOError):
+ st = None
+
+ if st and changelog_data.inodegfid_exists({"inode": st.st_ino}):
+ return st.st_ino
+
+ return None
+
+ # Length of brick path, to remove from output path
+ brick_path_len = len(brick)
+
+ def output_callback(path, inode):
+ # For each path found, encodes it and updates path1
+ # Also updates converted flag in inodegfid table as 1
+ path = path.strip()
+ path = path[brick_path_len+1:]
+ path = output_path_prepare(path, args)
+
+ changelog_data.append_path1(path, inode)
+
+ ignore_dirs = [os.path.join(brick, dirname)
+ for dirname in
+ conf.get_opt("brick_ignore_dirs").split(",")]
+
+ # Full Namespace Crawl
+ find(brick, callback_func=output_callback,
+ filter_func=inode_filter,
+ ignore_dirs=ignore_dirs)
+
+
+def parse_changelog_to_db(changelog_data, filename, args):
+ """
+ Parses a Changelog file and populates data in gfidpath table
+ """
+ with codecs.open(filename, encoding="utf-8") as f:
+ changelogfile = os.path.basename(filename)
+ for line in f:
+ data = line.strip().split(" ")
+ if data[0] == "E" and data[2] in ["CREATE", "MKNOD", "MKDIR"]:
+ # CREATE/MKDIR/MKNOD
+ changelog_data.when_create_mknod_mkdir(changelogfile, data)
+ elif data[0] in ["D", "M"]:
+ # DATA/META
+ if not args.only_namespace_changes:
+ changelog_data.when_data_meta(changelogfile, data)
+ elif data[0] == "E" and data[2] in ["LINK", "SYMLINK"]:
+ # LINK/SYMLINK
+ changelog_data.when_link_symlink(changelogfile, data)
+ elif data[0] == "E" and data[2] == "RENAME":
+ # RENAME
+ changelog_data.when_rename(changelogfile, data)
+ elif data[0] == "E" and data[2] in ["UNLINK", "RMDIR"]:
+ # UNLINK/RMDIR
+ changelog_data.when_unlink_rmdir(changelogfile, data)
+
+
+def get_changes(brick, hash_dir, log_file, start, end, args):
+ """
+ Makes use of libgfchangelog's history API to get changelogs
+ containing changes from start and end time. Further collects
+ the modified gfids from the changelogs and writes the list
+ of gfid to 'gfid_list' file.
+ """
+ session_dir = os.path.join(conf.get_opt("session_dir"),
+ args.session)
+ status_file = os.path.join(session_dir, args.volume,
+ "%s.status" % urllib.quote_plus(args.brick))
+
+ # Get previous session
+ try:
+ with open(status_file) as f:
+ start = int(f.read().strip())
+ except (ValueError, OSError, IOError):
+ start = args.start
+
+ try:
+ libgfchangelog.cl_init()
+ libgfchangelog.cl_register(brick, hash_dir, log_file,
+ CHANGELOG_LOG_LEVEL, CHANGELOG_CONN_RETRIES)
+ except libgfchangelog.ChangelogException as e:
+ fail("%s Changelog register failed: %s" % (brick, e), logger=logger)
+
+ # Output files to record GFIDs and GFID to Path failure GFIDs
+ changelog_data = ChangelogData(args.outfile, args)
+
+ # Changelogs path(Hard coded to BRICK/.glusterfs/changelogs
+ cl_path = os.path.join(brick, ".glusterfs/changelogs")
+
+ # Fail if History fails for requested Start and End
+ try:
+ actual_end = libgfchangelog.cl_history_changelog(
+ cl_path, start, end, CHANGELOGAPI_NUM_WORKERS)
+ except libgfchangelog.ChangelogException as e:
+ fail("%s: %s Historical Changelogs not available: %s" %
+ (args.node, brick, e), logger=logger)
+
+ logger.info("[1/4] Starting changelog parsing ...")
+ try:
+ # scan followed by getchanges till scan returns zero.
+ # history_scan() is blocking call, till it gets the number
+ # of changelogs to process. Returns zero when no changelogs
+ # to be processed. returns positive value as number of changelogs
+ # to be processed, which will be fetched using
+ # history_getchanges()
+ changes = []
+ while libgfchangelog.cl_history_scan() > 0:
+ changes = libgfchangelog.cl_history_getchanges()
+
+ for change in changes:
+ # Ignore if last processed changelog comes
+ # again in list
+ if change.endswith(".%s" % start):
+ continue
+ try:
+ parse_changelog_to_db(changelog_data, change, args)
+ libgfchangelog.cl_history_done(change)
+ except IOError as e:
+ logger.warn("Error parsing changelog file %s: %s" %
+ (change, e))
+
+ changelog_data.commit()
+ except libgfchangelog.ChangelogException as e:
+ fail("%s Error during Changelog Crawl: %s" % (brick, e),
+ logger=logger)
+
+ logger.info("[1/4] Finished changelog parsing.")
+
+ # Convert all pgfid available from Changelogs
+ logger.info("[2/4] Starting 'pgfid to path' conversions ...")
+ pgfid_to_path(brick, changelog_data)
+ changelog_data.commit()
+ logger.info("[2/4] Finished 'pgfid to path' conversions.")
+
+ # Convert all gfids recorded for data and metadata to all hardlink paths
+ logger.info("[3/4] Starting 'gfid2path' conversions ...")
+ gfid_to_all_paths_using_gfid2path(brick, changelog_data, args)
+ changelog_data.commit()
+ logger.info("[3/4] Finished 'gfid2path' conversions.")
+
+ # If some GFIDs fail to get converted from previous step,
+ # convert using find
+ logger.info("[4/4] Starting 'gfid to path using batchfind' "
+ "conversions ...")
+ gfid_to_path_using_batchfind(brick, changelog_data)
+ changelog_data.commit()
+ logger.info("[4/4] Finished 'gfid to path using batchfind' conversions.")
+
+ return actual_end
+
+
+def changelog_crawl(brick, start, end, args):
+ """
+ Init function, prepares working dir and calls Changelog query
+ """
+ if brick.endswith("/"):
+ brick = brick[0:len(brick)-1]
+
+ # WORKING_DIR/BRICKHASH/OUTFILE
+ working_dir = os.path.dirname(args.outfile)
+ brickhash = hashlib.sha1(brick.encode())
+ brickhash = str(brickhash.hexdigest())
+ working_dir = os.path.join(working_dir, brickhash)
+
+ mkdirp(working_dir, exit_on_err=True, logger=logger)
+
+ log_file = os.path.join(conf.get_opt("log_dir"),
+ args.session,
+ args.volume,
+ "changelog.%s.log" % brickhash)
+
+ logger.info("%s Started Changelog Crawl. Start: %s, End: %s"
+ % (brick, start, end))
+ return get_changes(brick, working_dir, log_file, start, end, args)
+
+
+def _get_args():
+ parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter,
+ description=PROG_DESCRIPTION)
+
+ parser.add_argument("session", help="Session Name")
+ parser.add_argument("volume", help="Volume Name")
+ parser.add_argument("node", help="Node Name")
+ parser.add_argument("brick", help="Brick Name")
+ parser.add_argument("outfile", help="Output File")
+ parser.add_argument("start", help="Start Time", type=int)
+ parser.add_argument("end", help="End Time", type=int)
+ parser.add_argument("--only-query", help="Query mode only (no session)",
+ action="store_true")
+ parser.add_argument("--debug", help="Debug", action="store_true")
+ parser.add_argument("--no-encode",
+ help="Do not encode path in outfile",
+ action="store_true")
+ parser.add_argument("--output-prefix", help="File prefix in output",
+ default=".")
+ parser.add_argument("--type",default="both")
+ parser.add_argument("-N", "--only-namespace-changes",
+ help="List only namespace changes",
+ action="store_true")
+
+ return parser.parse_args()
+
+
+if __name__ == "__main__":
+ args = _get_args()
+ mkdirp(os.path.join(conf.get_opt("log_dir"), args.session, args.volume),
+ exit_on_err=True)
+ log_file = os.path.join(conf.get_opt("log_dir"),
+ args.session,
+ args.volume,
+ "changelog.log")
+ setup_logger(logger, log_file, args.debug)
+
+ session_dir = os.path.join(conf.get_opt("session_dir"), args.session)
+ status_file = os.path.join(session_dir, args.volume,
+ "%s.status" % urllib.quote_plus(args.brick))
+ status_file_pre = status_file + ".pre"
+ mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True,
+ logger=logger)
+
+ end = -1
+ if args.only_query:
+ start = args.start
+ end = args.end
+ else:
+ try:
+ with open(status_file) as f:
+ start = int(f.read().strip())
+ except (ValueError, OSError, IOError):
+ start = args.start
+
+ # end time is optional; so a -1 may be sent to use the default method of
+ # identifying the end time
+ if end == -1:
+ end = int(time.time()) - get_changelog_rollover_time(args.volume)
+
+ logger.info("%s Started Changelog Crawl - Start: %s End: %s" % (args.brick,
+ start,
+ end))
+ actual_end = changelog_crawl(args.brick, start, end, args)
+ if not args.only_query:
+ with open(status_file_pre, "w") as f:
+ f.write(str(actual_end))
+
+ logger.info("%s Finished Changelog Crawl - End: %s" % (args.brick,
+ actual_end))
+ sys.exit(0)
diff --git a/tools/glusterfind/src/changelogdata.py b/tools/glusterfind/src/changelogdata.py
new file mode 100644
index 00000000000..641593cf4b1
--- /dev/null
+++ b/tools/glusterfind/src/changelogdata.py
@@ -0,0 +1,440 @@
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
+import sqlite3
+import os
+
+from utils import RecordType, unquote_plus_space_newline
+from utils import output_path_prepare
+
+
+class OutputMerger(object):
+ """
+ Class to merge the output files collected from
+ different nodes
+ """
+ def __init__(self, db_path, all_dbs):
+ self.conn = sqlite3.connect(db_path)
+ self.cursor = self.conn.cursor()
+ self.cursor_reader = self.conn.cursor()
+ query = "DROP TABLE IF EXISTS finallist"
+ self.cursor.execute(query)
+
+ query = """
+ CREATE TABLE finallist(
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ ts VARCHAR,
+ type VARCHAR,
+ gfid VARCHAR,
+ path1 VARCHAR,
+ path2 VARCHAR,
+ UNIQUE (type, path1, path2) ON CONFLICT IGNORE
+ )
+ """
+ self.cursor.execute(query)
+
+ # If node database exists, read each db and insert into
+ # final table. Ignore if combination of TYPE PATH1 PATH2
+ # already exists
+ for node_db in all_dbs:
+ if os.path.exists(node_db):
+ conn = sqlite3.connect(node_db)
+ cursor = conn.cursor()
+ query = """
+ SELECT ts, type, gfid, path1, path2
+ FROM gfidpath
+ WHERE path1 != ''
+ ORDER BY id ASC
+ """
+ for row in cursor.execute(query):
+ self.add_if_not_exists(row[0], row[1], row[2],
+ row[3], row[4])
+
+ self.conn.commit()
+
+ def add_if_not_exists(self, ts, ty, gfid, path1, path2=""):
+ # Adds record to finallist only if not exists
+ query = """
+ INSERT INTO finallist(ts, type, gfid, path1, path2)
+ VALUES(?, ?, ?, ?, ?)
+ """
+ self.cursor.execute(query, (ts, ty, gfid, path1, path2))
+
+ def get(self):
+ query = """SELECT type, path1, path2 FROM finallist
+ ORDER BY ts ASC, id ASC"""
+ return self.cursor_reader.execute(query)
+
+ def get_failures(self):
+ query = """
+ SELECT gfid
+ FROM finallist
+ WHERE path1 = '' OR (path2 = '' AND type = 'RENAME')
+ """
+ return self.cursor_reader.execute(query)
+
+
+class ChangelogData(object):
+ def __init__(self, dbpath, args):
+ self.conn = sqlite3.connect(dbpath)
+ self.cursor = self.conn.cursor()
+ self.cursor_reader = self.conn.cursor()
+ self._create_table_gfidpath()
+ self._create_table_pgfid()
+ self._create_table_inodegfid()
+ self.args = args
+ self.path_sep = "/"
+
+ def _create_table_gfidpath(self):
+ drop_table = "DROP TABLE IF EXISTS gfidpath"
+ self.cursor.execute(drop_table)
+
+ create_table = """
+ CREATE TABLE gfidpath(
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ ts VARCHAR,
+ type VARCHAR,
+ gfid VARCHAR(40),
+ pgfid1 VARCHAR(40) DEFAULT '',
+ bn1 VARCHAR(500) DEFAULT '',
+ pgfid2 VARCHAR(40) DEFAULT '',
+ bn2 VARCHAR(500) DEFAULT '',
+ path1 VARCHAR DEFAULT '',
+ path2 VARCHAR DEFAULT ''
+ )
+ """
+ self.cursor.execute(create_table)
+
+ create_index = """
+ CREATE INDEX gfid_index ON gfidpath(gfid);
+ """
+ self.cursor.execute(create_index)
+
+ def _create_table_inodegfid(self):
+ drop_table = "DROP TABLE IF EXISTS inodegfid"
+ self.cursor.execute(drop_table)
+
+ create_table = """
+ CREATE TABLE inodegfid(
+ inode INTEGER PRIMARY KEY,
+ gfid VARCHAR(40),
+ converted INTEGER DEFAULT 0,
+ UNIQUE (inode, gfid) ON CONFLICT IGNORE
+ )
+ """
+ self.cursor.execute(create_table)
+
+ def _create_table_pgfid(self):
+ drop_table = "DROP TABLE IF EXISTS pgfid"
+ self.cursor.execute(drop_table)
+
+ create_table = """
+ CREATE TABLE pgfid(
+ pgfid VARCHAR(40) PRIMARY KEY,
+ UNIQUE (pgfid) ON CONFLICT IGNORE
+ )
+ """
+ self.cursor.execute(create_table)
+
+ def _get(self, tablename, filters):
+ # SELECT * FROM <TABLENAME> WHERE <CONDITION>
+ params = []
+ query = "SELECT * FROM %s WHERE 1=1" % tablename
+
+ for key, value in filters.items():
+ query += " AND %s = ?" % key
+ params.append(value)
+
+ return self.cursor_reader.execute(query, params)
+
+ def _get_distinct(self, tablename, distinct_field, filters):
+ # SELECT DISTINCT <COL> FROM <TABLENAME> WHERE <CONDITION>
+ params = []
+ query = "SELECT DISTINCT %s FROM %s WHERE 1=1" % (distinct_field,
+ tablename)
+
+ for key, value in filters.items():
+ query += " AND %s = ?" % key
+ params.append(value)
+
+ return self.cursor_reader.execute(query, params)
+
+ def _delete(self, tablename, filters):
+ # DELETE FROM <TABLENAME> WHERE <CONDITIONS>
+ query = "DELETE FROM %s WHERE 1=1" % tablename
+ params = []
+
+ for key, value in filters.items():
+ query += " AND %s = ?" % key
+ params.append(value)
+
+ self.cursor.execute(query, params)
+
+ def _add(self, tablename, data):
+ # INSERT INTO <TABLENAME>(<col1>, <col2>..) VALUES(?,?..)
+ query = "INSERT INTO %s(" % tablename
+ fields = []
+ params = []
+ for key, value in data.items():
+ fields.append(key)
+ params.append(value)
+
+ values_substitute = len(fields)*["?"]
+ query += "%s) VALUES(%s)" % (",".join(fields),
+ ",".join(values_substitute))
+ self.cursor.execute(query, params)
+
+ def _update(self, tablename, data, filters):
+ # UPDATE <TABLENAME> SET col1 = ?,.. WHERE col1=? AND ..
+ params = []
+ update_fields = []
+ for key, value in data.items():
+ update_fields.append("%s = ?" % key)
+ params.append(value)
+
+ query = "UPDATE %s SET %s WHERE 1 = 1" % (tablename,
+ ", ".join(update_fields))
+
+ for key, value in filters.items():
+ query += " AND %s = ?" % key
+ params.append(value)
+
+ self.cursor.execute(query, params)
+
+ def _exists(self, tablename, filters):
+ if not filters:
+ return False
+
+ query = "SELECT COUNT(1) FROM %s WHERE 1=1" % tablename
+ params = []
+
+ for key, value in filters.items():
+ query += " AND %s = ?" % key
+ params.append(value)
+
+ self.cursor.execute(query, params)
+ row = self.cursor.fetchone()
+ return True if row[0] > 0 else False
+
+ def gfidpath_add(self, changelogfile, ty, gfid, pgfid1="", bn1="",
+ pgfid2="", bn2="", path1="", path2=""):
+ self._add("gfidpath", {
+ "ts": changelogfile.split(".")[-1],
+ "type": ty,
+ "gfid": gfid,
+ "pgfid1": pgfid1,
+ "bn1": bn1,
+ "pgfid2": pgfid2,
+ "bn2": bn2,
+ "path1": path1,
+ "path2": path2
+ })
+
+ def gfidpath_update(self, data, filters):
+ self._update("gfidpath", data, filters)
+
+ def gfidpath_delete(self, filters):
+ self._delete("gfidpath", filters)
+
+ def gfidpath_exists(self, filters):
+ return self._exists("gfidpath", filters)
+
+ def gfidpath_get(self, filters={}):
+ return self._get("gfidpath", filters)
+
+ def gfidpath_get_distinct(self, distinct_field, filters={}):
+ return self._get_distinct("gfidpath", distinct_field, filters)
+
+ def pgfid_add(self, pgfid):
+ self._add("pgfid", {
+ "pgfid": pgfid
+ })
+
+ def pgfid_update(self, data, filters):
+ self._update("pgfid", data, filters)
+
+ def pgfid_get(self, filters={}):
+ return self._get("pgfid", filters)
+
+ def pgfid_get_distinct(self, distinct_field, filters={}):
+ return self._get_distinct("pgfid", distinct_field, filters)
+
+ def pgfid_exists(self, filters):
+ return self._exists("pgfid", filters)
+
+ def inodegfid_add(self, inode, gfid, converted=0):
+ self._add("inodegfid", {
+ "inode": inode,
+ "gfid": gfid,
+ "converted": converted
+ })
+
+ def inodegfid_update(self, data, filters):
+ self._update("inodegfid", data, filters)
+
+ def inodegfid_get(self, filters={}):
+ return self._get("inodegfid", filters)
+
+ def inodegfid_get_distinct(self, distinct_field, filters={}):
+ return self._get_distinct("inodegfid", distinct_field, filters)
+
+ def inodegfid_exists(self, filters):
+ return self._exists("inodegfid", filters)
+
+ def append_path1(self, path, inode):
+ # || is for concatenate in SQL
+ query = """UPDATE gfidpath SET path1 = path1 || ',' || ?
+ WHERE gfid IN (SELECT gfid FROM inodegfid WHERE inode = ?)"""
+ self.cursor.execute(query, (path, inode))
+
+ def gfidpath_set_path1(self, path1, pgfid1):
+ # || is for concatenate in SQL
+ if path1 == "":
+ update_str1 = "? || bn1"
+ update_str2 = "? || bn2"
+ else:
+ update_str1 = "? || '{0}' || bn1".format(self.path_sep)
+ update_str2 = "? || '{0}' || bn2".format(self.path_sep)
+
+ query = """UPDATE gfidpath SET path1 = %s
+ WHERE pgfid1 = ?""" % update_str1
+ self.cursor.execute(query, (path1, pgfid1))
+
+ # Set Path2 if pgfid1 and pgfid2 are same
+ query = """UPDATE gfidpath SET path2 = %s
+ WHERE pgfid2 = ?""" % update_str2
+ self.cursor.execute(query, (path1, pgfid1))
+
+ def gfidpath_set_path2(self, path2, pgfid2):
+ # || is for concatenate in SQL
+ if path2 == "":
+ update_str = "? || bn2"
+ else:
+ update_str = "? || '{0}' || bn2".format(self.path_sep)
+
+ query = """UPDATE gfidpath SET path2 = %s
+ WHERE pgfid2 = ?""" % update_str
+ self.cursor.execute(query, (path2, pgfid2))
+
+ def when_create_mknod_mkdir(self, changelogfile, data):
+ # E <GFID> <MKNOD|CREATE|MKDIR> <MODE> <USER> <GRP> <PGFID>/<BNAME>
+ # Add the Entry to DB
+ pgfid1, bn1 = data[6].split("/", 1)
+
+ if self.args.no_encode:
+ bn1 = unquote_plus_space_newline(bn1).strip()
+
+ self.gfidpath_add(changelogfile, RecordType.NEW, data[1], pgfid1, bn1)
+
+ def when_rename(self, changelogfile, data):
+ # E <GFID> RENAME <OLD_PGFID>/<BNAME> <PGFID>/<BNAME>
+ pgfid1, bn1 = data[3].split("/", 1)
+ pgfid2, bn2 = data[4].split("/", 1)
+
+ if self.args.no_encode:
+ bn1 = unquote_plus_space_newline(bn1).strip()
+ bn2 = unquote_plus_space_newline(bn2).strip()
+
+ if self.gfidpath_exists({"gfid": data[1], "type": "NEW",
+ "pgfid1": pgfid1, "bn1": bn1}):
+ # If <OLD_PGFID>/<BNAME> is same as CREATE, Update
+ # <NEW_PGFID>/<BNAME> in NEW.
+ self.gfidpath_update({"pgfid1": pgfid2, "bn1": bn2},
+ {"gfid": data[1], "type": "NEW",
+ "pgfid1": pgfid1, "bn1": bn1})
+ elif self.gfidpath_exists({"gfid": data[1], "type": "RENAME",
+ "pgfid2": pgfid1, "bn2": bn1}):
+ # If we are renaming file back to original name then just
+ # delete the entry since it will effectively be a no-op
+ if self.gfidpath_exists({"gfid": data[1], "type": "RENAME",
+ "pgfid2": pgfid1, "bn2": bn1,
+ "pgfid1": pgfid2, "bn1": bn2}):
+ self.gfidpath_delete({"gfid": data[1], "type": "RENAME",
+ "pgfid2": pgfid1, "bn2": bn1})
+ else:
+ # If <OLD_PGFID>/<BNAME> is same as <PGFID2>/<BN2>
+ # (may be previous RENAME)
+ # then UPDATE <NEW_PGFID>/<BNAME> as <PGFID2>/<BN2>
+ self.gfidpath_update({"pgfid2": pgfid2, "bn2": bn2},
+ {"gfid": data[1], "type": "RENAME",
+ "pgfid2": pgfid1, "bn2": bn1})
+ else:
+ # Else insert as RENAME
+ self.gfidpath_add(changelogfile, RecordType.RENAME, data[1],
+ pgfid1, bn1, pgfid2, bn2)
+
+ if self.gfidpath_exists({"gfid": data[1], "type": "MODIFY"}):
+ # If MODIFY exists already for that GFID, remove it and insert
+ # again so that MODIFY entry comes after RENAME entry
+ # Output will have MODIFY <NEWNAME>
+ self.gfidpath_delete({"gfid": data[1], "type": "MODIFY"})
+ self.gfidpath_add(changelogfile, RecordType.MODIFY, data[1])
+
+ def when_link_symlink(self, changelogfile, data):
+ # E <GFID> <LINK|SYMLINK> <PGFID>/<BASENAME>
+ # Add as New record in Db as Type NEW
+ pgfid1, bn1 = data[3].split("/", 1)
+ if self.args.no_encode:
+ bn1 = unquote_plus_space_newline(bn1).strip()
+
+ self.gfidpath_add(changelogfile, RecordType.NEW, data[1], pgfid1, bn1)
+
+ def when_data_meta(self, changelogfile, data):
+ # If GFID row exists, Ignore else Add to Db
+ if not self.gfidpath_exists({"gfid": data[1], "type": "NEW"}) and \
+ not self.gfidpath_exists({"gfid": data[1], "type": "MODIFY"}):
+ self.gfidpath_add(changelogfile, RecordType.MODIFY, data[1])
+
+ def when_unlink_rmdir(self, changelogfile, data):
+ # E <GFID> <UNLINK|RMDIR> <PGFID>/<BASENAME>
+ pgfid1, bn1 = data[3].split("/", 1)
+
+ if self.args.no_encode:
+ bn1 = unquote_plus_space_newline(bn1).strip()
+
+ deleted_path = data[4] if len(data) == 5 else ""
+ if deleted_path != "":
+ deleted_path = unquote_plus_space_newline(deleted_path)
+ deleted_path = output_path_prepare(deleted_path, self.args)
+
+ if self.gfidpath_exists({"gfid": data[1], "type": "NEW",
+ "pgfid1": pgfid1, "bn1": bn1}):
+ # If path exists in table as NEW with same GFID
+ # Delete that row
+ self.gfidpath_delete({"gfid": data[1], "type": "NEW",
+ "pgfid1": pgfid1, "bn1": bn1})
+ else:
+ # Else Record as DELETE
+ self.gfidpath_add(changelogfile, RecordType.DELETE, data[1],
+ pgfid1, bn1, path1=deleted_path)
+
+ # Update path1 as deleted_path if pgfid1 and bn1 is same as deleted
+ self.gfidpath_update({"path1": deleted_path}, {"gfid": data[1],
+ "pgfid1": pgfid1,
+ "bn1": bn1})
+
+ # Update path2 as deleted_path if pgfid2 and bn2 is same as deleted
+ self.gfidpath_update({"path2": deleted_path}, {
+ "type": RecordType.RENAME,
+ "gfid": data[1],
+ "pgfid2": pgfid1,
+ "bn2": bn1})
+
+ # If deleted directory is parent for somebody
+ query1 = """UPDATE gfidpath SET path1 = ? || '{0}' || bn1
+ WHERE pgfid1 = ? AND path1 != ''""".format(self.path_sep)
+ self.cursor.execute(query1, (deleted_path, data[1]))
+
+ query1 = """UPDATE gfidpath SET path2 = ? || '{0}' || bn1
+ WHERE pgfid2 = ? AND path2 != ''""".format(self.path_sep)
+ self.cursor.execute(query1, (deleted_path, data[1]))
+
+ def commit(self):
+ self.conn.commit()
diff --git a/tools/glusterfind/src/conf.py b/tools/glusterfind/src/conf.py
new file mode 100644
index 00000000000..3849ba5dd1f
--- /dev/null
+++ b/tools/glusterfind/src/conf.py
@@ -0,0 +1,31 @@
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
+import os
+try:
+ from ConfigParser import ConfigParser
+except ImportError:
+ from configparser import ConfigParser
+
+config = ConfigParser()
+config.read(os.path.join(os.path.dirname(os.path.abspath(__file__)),
+ "tool.conf"))
+
+
+def list_change_detectors():
+ return dict(config.items("change_detectors")).keys()
+
+
+def get_opt(opt):
+ return config.get("vars", opt)
+
+
+def get_change_detector(opt):
+ return config.get("change_detectors", opt)
diff --git a/tools/glusterfind/src/gfind_py2py3.py b/tools/glusterfind/src/gfind_py2py3.py
new file mode 100644
index 00000000000..87324fbf350
--- /dev/null
+++ b/tools/glusterfind/src/gfind_py2py3.py
@@ -0,0 +1,88 @@
+#
+# Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+# All python2/python3 compatibility routines
+
+import os
+import sys
+from ctypes import create_string_buffer
+
+if sys.version_info >= (3,):
+
+ # Raw conversion of bytearray to string. Used in the cases where
+ # buffer is created by create_string_buffer which is a 8-bit char
+ # array and passed to syscalls to fetch results. Using encode/decode
+ # doesn't work as it converts to string altering the size.
+ # def bytearray_to_str(byte_arr):
+ def bytearray_to_str(byte_arr):
+ return ''.join([chr(b) for b in byte_arr])
+
+ def gf_create_string_buffer(size):
+ return create_string_buffer(b'\0', size)
+
+ def gfind_history_changelog(libgfc, changelog_path, start, end, num_parallel,
+ actual_end):
+ return libgfc.gf_history_changelog(changelog_path.encode(), start, end, num_parallel,
+ actual_end)
+
+ def gfind_changelog_register(libgfc, brick, path, log_file, log_level,
+ retries):
+ return libgfc.gf_changelog_register(brick.encode(), path.encode(), log_file.encode(),
+ log_level, retries)
+
+ def gfind_history_changelog_done(libgfc, clfile):
+ return libgfc.gf_history_changelog_done(clfile.encode())
+
+ def gfind_write_row(f, row, field_separator, p_rep, row_2_rep):
+ f.write(u"{0}{1}{2}{3}{4}\n".format(row,
+ field_separator,
+ p_rep,
+ field_separator,
+ row_2_rep))
+
+ def gfind_write(f, row, field_separator, p_rep):
+ f.write(u"{0}{1}{2}\n".format(row,
+ field_separator,
+ p_rep))
+
+
+else:
+
+ # Raw conversion of bytearray to string
+ def bytearray_to_str(byte_arr):
+ return byte_arr
+
+ def gf_create_string_buffer(size):
+ return create_string_buffer('\0', size)
+
+ def gfind_history_changelog(libgfc, changelog_path, start, end, num_parallel,
+ actual_end):
+ return libgfc.gf_history_changelog(changelog_path, start, end,
+ num_parallel, actual_end)
+
+ def gfind_changelog_register(libgfc, brick, path, log_file, log_level,
+ retries):
+ return libgfc.gf_changelog_register(brick, path, log_file,
+ log_level, retries)
+
+ def gfind_history_changelog_done(libgfc, clfile):
+ return libgfc.gf_history_changelog_done(clfile)
+
+ def gfind_write_row(f, row, field_separator, p_rep, row_2_rep):
+ f.write(u"{0}{1}{2}{3}{4}\n".format(row,
+ field_separator,
+ p_rep,
+ field_separator,
+ row_2_rep).encode())
+
+ def gfind_write(f, row, field_separator, p_rep):
+ f.write(u"{0}{1}{2}\n".format(row,
+ field_separator,
+ p_rep).encode())
diff --git a/tools/glusterfind/src/libgfchangelog.py b/tools/glusterfind/src/libgfchangelog.py
new file mode 100644
index 00000000000..513bb101e93
--- /dev/null
+++ b/tools/glusterfind/src/libgfchangelog.py
@@ -0,0 +1,92 @@
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
+import os
+from ctypes import CDLL, RTLD_GLOBAL, get_errno, create_string_buffer, c_ulong, byref
+from ctypes.util import find_library
+from gfind_py2py3 import bytearray_to_str, gf_create_string_buffer
+from gfind_py2py3 import gfind_history_changelog, gfind_changelog_register
+from gfind_py2py3 import gfind_history_changelog_done
+
+
+class ChangelogException(OSError):
+ pass
+
+libgfc = CDLL(find_library("gfchangelog"), mode=RTLD_GLOBAL, use_errno=True)
+
+
+def raise_oserr(prefix=None):
+ errn = get_errno()
+ prefix_or_empty = prefix + ": " if prefix else ""
+ raise ChangelogException(errn, prefix_or_empty + os.strerror(errn))
+
+
+def cl_init():
+ ret = libgfc.gf_changelog_init(None)
+ if ret == -1:
+ raise_oserr(prefix="gf_changelog_init")
+
+
+def cl_register(brick, path, log_file, log_level, retries=0):
+ ret = gfind_changelog_register(libgfc, brick, path, log_file,log_level, retries)
+ if ret == -1:
+ raise_oserr(prefix="gf_changelog_register")
+
+
+def cl_history_scan():
+ ret = libgfc.gf_history_changelog_scan()
+ if ret == -1:
+ raise_oserr(prefix="gf_history_changelog_scan")
+
+ return ret
+
+
+def cl_history_changelog(changelog_path, start, end, num_parallel):
+ actual_end = c_ulong()
+ ret = gfind_history_changelog(libgfc,changelog_path, start, end,
+ num_parallel,
+ byref(actual_end))
+ if ret == -1:
+ raise_oserr(prefix="gf_history_changelog")
+
+ return actual_end.value
+
+
+def cl_history_startfresh():
+ ret = libgfc.gf_history_changelog_start_fresh()
+ if ret == -1:
+ raise_oserr(prefix="gf_history_changelog_start_fresh")
+
+
+def cl_history_getchanges():
+ """ remove hardcoding for path name length """
+ def clsort(f):
+ return f.split('.')[-1]
+
+ changes = []
+ buf = gf_create_string_buffer(4096)
+
+ while True:
+ ret = libgfc.gf_history_changelog_next_change(buf, 4096)
+ if ret in (0, -1):
+ break
+ # py2 and py3 compatibility
+ result = bytearray_to_str(buf.raw[:ret - 1])
+ changes.append(result)
+ if ret == -1:
+ raise_oserr(prefix="gf_history_changelog_next_change")
+
+ return sorted(changes, key=clsort)
+
+
+def cl_history_done(clfile):
+ ret = gfind_history_changelog_done(libgfc, clfile)
+ if ret == -1:
+ raise_oserr(prefix="gf_history_changelog_done")
diff --git a/tools/glusterfind/src/main.py b/tools/glusterfind/src/main.py
new file mode 100644
index 00000000000..4b5466d0114
--- /dev/null
+++ b/tools/glusterfind/src/main.py
@@ -0,0 +1,921 @@
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
+import sys
+from errno import ENOENT, ENOTEMPTY
+import time
+from multiprocessing import Process
+import os
+import xml.etree.cElementTree as etree
+from argparse import ArgumentParser, RawDescriptionHelpFormatter, Action
+from gfind_py2py3 import gfind_write_row, gfind_write
+import logging
+import shutil
+import tempfile
+import signal
+from datetime import datetime
+import codecs
+import re
+
+from utils import execute, is_host_local, mkdirp, fail
+from utils import setup_logger, human_time, handle_rm_error
+from utils import get_changelog_rollover_time, cache_output, create_file
+import conf
+from changelogdata import OutputMerger
+
+PROG_DESCRIPTION = """
+GlusterFS Incremental API
+"""
+ParseError = etree.ParseError if hasattr(etree, 'ParseError') else SyntaxError
+
+logger = logging.getLogger()
+vol_statusStr = ""
+gtmpfilename = None
+g_pid_nodefile_map = {}
+
+
+class StoreAbsPath(Action):
+ def __init__(self, option_strings, dest, nargs=None, **kwargs):
+ super(StoreAbsPath, self).__init__(option_strings, dest, **kwargs)
+
+ def __call__(self, parser, namespace, values, option_string=None):
+ setattr(namespace, self.dest, os.path.abspath(values))
+
+
+def get_pem_key_path(session, volume):
+ return os.path.join(conf.get_opt("session_dir"),
+ session,
+ volume,
+ "%s_%s_secret.pem" % (session, volume))
+
+
+def node_cmd(host, host_uuid, task, cmd, args, opts):
+ """
+ Runs command via ssh if host is not local
+ """
+ try:
+ localdir = is_host_local(host_uuid)
+
+ # this is so to avoid deleting the ssh keys on local node which
+ # otherwise cause ssh password prompts on the console (race conditions)
+ # mode_delete() should be cleaning up the session tree
+ if localdir and task == "delete":
+ return
+
+ pem_key_path = get_pem_key_path(args.session, args.volume)
+
+ if not localdir:
+ # prefix with ssh command if not local node
+ cmd = ["ssh",
+ "-oNumberOfPasswordPrompts=0",
+ "-oStrictHostKeyChecking=no",
+ # We force TTY allocation (-t -t) so that Ctrl+C is handed
+ # through; see:
+ # https://bugzilla.redhat.com/show_bug.cgi?id=1382236
+ # Note that this turns stderr of the remote `cmd`
+ # into stdout locally.
+ "-t",
+ "-t",
+ "-i", pem_key_path,
+ "root@%s" % host] + cmd
+
+ (returncode, err, out) = execute(cmd, logger=logger)
+ if returncode != 0:
+ # Because the `-t -t` above turns the remote stderr into
+ # local stdout, we need to log both stderr and stdout
+ # here to print all error messages.
+ fail("%s - %s failed; stdout (including remote stderr):\n"
+ "%s\n"
+ "stderr:\n"
+ "%s" % (host, task, out, err),
+ returncode,
+ logger=logger)
+
+ if opts.get("copy_outfile", False) and not localdir:
+ cmd_copy = ["scp",
+ "-oNumberOfPasswordPrompts=0",
+ "-oStrictHostKeyChecking=no",
+ "-i", pem_key_path,
+ "root@%s:/%s" % (host, opts.get("node_outfile")),
+ os.path.dirname(opts.get("node_outfile"))]
+ execute(cmd_copy, exit_msg="%s - Copy command failed" % host,
+ logger=logger)
+ except KeyboardInterrupt:
+ sys.exit(2)
+
+
+def run_cmd_nodes(task, args, **kwargs):
+ global g_pid_nodefile_map
+ nodes = get_nodes(args.volume)
+ pool = []
+ for num, node in enumerate(nodes):
+ host, brick = node[1].split(":")
+ host_uuid = node[0]
+ cmd = []
+ opts = {}
+
+ # tmpfilename is valid only for tasks: pre, query and cleanup
+ tmpfilename = kwargs.get("tmpfilename", "BADNAME")
+
+ node_outfile = os.path.join(conf.get_opt("working_dir"),
+ args.session, args.volume,
+ tmpfilename,
+ "tmp_output_%s" % num)
+
+ if task == "pre":
+ if vol_statusStr != "Started":
+ fail("Volume %s is not online" % args.volume,
+ logger=logger)
+
+ # If Full backup is requested or start time is zero, use brickfind
+ change_detector = conf.get_change_detector("changelog")
+ tag = None
+ if args.full:
+ change_detector = conf.get_change_detector("brickfind")
+ tag = args.tag_for_full_find.strip()
+ if tag == "":
+ tag = '""' if not is_host_local(host_uuid) else ""
+
+ # remote file will be copied into this directory
+ mkdirp(os.path.dirname(node_outfile),
+ exit_on_err=True, logger=logger)
+
+ FS = args.field_separator
+ if not is_host_local(host_uuid):
+ FS = "'" + FS + "'"
+
+ cmd = [change_detector,
+ args.session,
+ args.volume,
+ host,
+ brick,
+ node_outfile] + \
+ ([str(kwargs.get("start")), str(kwargs.get("end"))]
+ if not args.full else []) + \
+ ([tag] if tag is not None else []) + \
+ ["--output-prefix", args.output_prefix] + \
+ (["--debug"] if args.debug else []) + \
+ (["--no-encode"] if args.no_encode else []) + \
+ (["--only-namespace-changes"] if args.only_namespace_changes
+ else []) + \
+ (["--type", args.type]) + \
+ (["--field-separator", FS] if args.full else [])
+
+ opts["node_outfile"] = node_outfile
+ opts["copy_outfile"] = True
+ elif task == "query":
+ # If Full backup is requested or start time is zero, use brickfind
+ tag = None
+ change_detector = conf.get_change_detector("changelog")
+ if args.full:
+ change_detector = conf.get_change_detector("brickfind")
+ tag = args.tag_for_full_find.strip()
+ if tag == "":
+ tag = '""' if not is_host_local(host_uuid) else ""
+
+ # remote file will be copied into this directory
+ mkdirp(os.path.dirname(node_outfile),
+ exit_on_err=True, logger=logger)
+
+ FS = args.field_separator
+ if not is_host_local(host_uuid):
+ FS = "'" + FS + "'"
+
+ cmd = [change_detector,
+ args.session,
+ args.volume,
+ host,
+ brick,
+ node_outfile] + \
+ ([str(kwargs.get("start")), str(kwargs.get("end"))]
+ if not args.full else []) + \
+ ([tag] if tag is not None else []) + \
+ ["--only-query"] + \
+ ["--output-prefix", args.output_prefix] + \
+ (["--debug"] if args.debug else []) + \
+ (["--no-encode"] if args.no_encode else []) + \
+ (["--only-namespace-changes"]
+ if args.only_namespace_changes else []) + \
+ (["--type", args.type]) + \
+ (["--field-separator", FS] if args.full else [])
+
+ opts["node_outfile"] = node_outfile
+ opts["copy_outfile"] = True
+ elif task == "cleanup":
+ # After pre/query run, cleanup the working directory and other
+ # temp files. Remove the directory to which node_outfile has
+ # been copied in main node
+ try:
+ os.remove(node_outfile)
+ except (OSError, IOError):
+ logger.warn("Failed to cleanup temporary file %s" %
+ node_outfile)
+ pass
+
+ cmd = [conf.get_opt("nodeagent"),
+ "cleanup",
+ args.session,
+ args.volume,
+ os.path.dirname(node_outfile)] + \
+ (["--debug"] if args.debug else [])
+ elif task == "create":
+ if vol_statusStr != "Started":
+ fail("Volume %s is not online" % args.volume,
+ logger=logger)
+
+ # When glusterfind create, create session directory in
+ # each brick nodes
+ cmd = [conf.get_opt("nodeagent"),
+ "create",
+ args.session,
+ args.volume,
+ brick,
+ kwargs.get("time_to_update")] + \
+ (["--debug"] if args.debug else []) + \
+ (["--reset-session-time"] if args.reset_session_time
+ else [])
+ elif task == "post":
+ # Rename pre status file to actual status file in each node
+ cmd = [conf.get_opt("nodeagent"),
+ "post",
+ args.session,
+ args.volume,
+ brick] + \
+ (["--debug"] if args.debug else [])
+ elif task == "delete":
+ # When glusterfind delete, cleanup all the session files/dirs
+ # from each node.
+ cmd = [conf.get_opt("nodeagent"),
+ "delete",
+ args.session,
+ args.volume] + \
+ (["--debug"] if args.debug else [])
+
+ if cmd:
+ p = Process(target=node_cmd,
+ args=(host, host_uuid, task, cmd, args, opts))
+ p.start()
+ pool.append(p)
+ g_pid_nodefile_map[p.pid] = node_outfile
+
+ for num, p in enumerate(pool):
+ p.join()
+ if p.exitcode != 0:
+ logger.warn("Command %s failed in %s" % (task, nodes[num][1]))
+ if task in ["create", "delete"]:
+ fail("Command %s failed in %s" % (task, nodes[num][1]))
+ elif task == "pre" or task == "query":
+ if args.disable_partial:
+ sys.exit(1)
+ else:
+ del g_pid_nodefile_map[p.pid]
+
+
+@cache_output
+def get_nodes(volume):
+ """
+ Get the gluster volume info xml output and parse to get
+ the brick details.
+ """
+ global vol_statusStr
+
+ cmd = ["gluster", 'volume', 'info', volume, "--xml"]
+ _, data, _ = execute(cmd,
+ exit_msg="Failed to Run Gluster Volume Info",
+ logger=logger)
+ tree = etree.fromstring(data)
+
+ # Test to check if volume has been deleted after session creation
+ count_el = tree.find('volInfo/volumes/count')
+ if int(count_el.text) == 0:
+ fail("Unable to get volume details", logger=logger)
+
+ # this status is used in caller: run_cmd_nodes
+ vol_statusStr = tree.find('volInfo/volumes/volume/statusStr').text
+ vol_typeStr = tree.find('volInfo/volumes/volume/typeStr').text
+
+ nodes = []
+ volume_el = tree.find('volInfo/volumes/volume')
+ try:
+ brick_elems = []
+ if vol_typeStr == "Tier":
+ brick_elems.append('bricks/hotBricks/brick')
+ brick_elems.append('bricks/coldBricks/brick')
+ else:
+ brick_elems.append('bricks/brick')
+
+ for elem in brick_elems:
+ for b in volume_el.findall(elem):
+ nodes.append((b.find('hostUuid').text,
+ b.find('name').text))
+ except (ParseError, AttributeError, ValueError) as e:
+ fail("Failed to parse Volume Info: %s" % e, logger=logger)
+
+ return nodes
+
+
+def _get_args():
+ parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter,
+ description=PROG_DESCRIPTION)
+ subparsers = parser.add_subparsers(dest="mode")
+ subparsers.required = True
+
+ # create <SESSION> <VOLUME> [--debug] [--force]
+ parser_create = subparsers.add_parser('create')
+ parser_create.add_argument("session", help="Session Name")
+ parser_create.add_argument("volume", help="Volume Name")
+ parser_create.add_argument("--debug", help="Debug", action="store_true")
+ parser_create.add_argument("--force", help="Force option to recreate "
+ "the session", action="store_true")
+ parser_create.add_argument("--reset-session-time",
+ help="Reset Session Time to Current Time",
+ action="store_true")
+
+ # delete <SESSION> <VOLUME> [--debug]
+ parser_delete = subparsers.add_parser('delete')
+ parser_delete.add_argument("session", help="Session Name")
+ parser_delete.add_argument("volume", help="Volume Name")
+ parser_delete.add_argument("--debug", help="Debug", action="store_true")
+
+ # list [--session <SESSION>] [--volume <VOLUME>]
+ parser_list = subparsers.add_parser('list')
+ parser_list.add_argument("--session", help="Session Name", default="")
+ parser_list.add_argument("--volume", help="Volume Name", default="")
+ parser_list.add_argument("--debug", help="Debug", action="store_true")
+
+ # pre <SESSION> <VOLUME> <OUTFILE>
+ # [--output-prefix <OUTPUT_PREFIX>] [--full]
+ parser_pre = subparsers.add_parser('pre')
+ parser_pre.add_argument("session", help="Session Name")
+ parser_pre.add_argument("volume", help="Volume Name")
+ parser_pre.add_argument("outfile", help="Output File", action=StoreAbsPath)
+ parser_pre.add_argument("--debug", help="Debug", action="store_true")
+ parser_pre.add_argument("--no-encode",
+ help="Do not encode path in output file",
+ action="store_true")
+ parser_pre.add_argument("--full", help="Full find", action="store_true")
+ parser_pre.add_argument("--disable-partial", help="Disable Partial find, "
+ "Fail when one node fails", action="store_true")
+ parser_pre.add_argument("--output-prefix", help="File prefix in output",
+ default=".")
+ parser_pre.add_argument("--regenerate-outfile",
+ help="Regenerate outfile, discard the outfile "
+ "generated from last pre command",
+ action="store_true")
+ parser_pre.add_argument("-N", "--only-namespace-changes",
+ help="List only namespace changes",
+ action="store_true")
+ parser_pre.add_argument("--tag-for-full-find",
+ help="Tag prefix for file names emitted during"
+ " a full find operation; default: \"NEW\"",
+ default="NEW")
+ parser_pre.add_argument('--type', help="type: f, f-files only"
+ " d, d-directories only, by default = both",
+ default='both', choices=["f", "d", "both"])
+ parser_pre.add_argument("--field-separator", help="Field separator string",
+ default=" ")
+
+ # query <VOLUME> <OUTFILE> --since-time <SINCE_TIME>
+ # [--output-prefix <OUTPUT_PREFIX>] [--full]
+ parser_query = subparsers.add_parser('query')
+ parser_query.add_argument("volume", help="Volume Name")
+ parser_query.add_argument("outfile", help="Output File",
+ action=StoreAbsPath)
+ parser_query.add_argument("--since-time", help="UNIX epoch time since "
+ "which listing is required", type=int)
+ parser_query.add_argument("--end-time", help="UNIX epoch time up to "
+ "which listing is required", type=int)
+ parser_query.add_argument("--no-encode",
+ help="Do not encode path in output file",
+ action="store_true")
+ parser_query.add_argument("--full", help="Full find", action="store_true")
+ parser_query.add_argument("--debug", help="Debug", action="store_true")
+ parser_query.add_argument("--disable-partial", help="Disable Partial find,"
+ " Fail when one node fails", action="store_true")
+ parser_query.add_argument("--output-prefix", help="File prefix in output",
+ default=".")
+ parser_query.add_argument("-N", "--only-namespace-changes",
+ help="List only namespace changes",
+ action="store_true")
+ parser_query.add_argument("--tag-for-full-find",
+ help="Tag prefix for file names emitted during"
+ " a full find operation; default: \"NEW\"",
+ default="NEW")
+ parser_query.add_argument('--type', help="type: f, f-files only"
+ " d, d-directories only, by default = both",
+ default='both', choices=["f", "d", "both"])
+ parser_query.add_argument("--field-separator",
+ help="Field separator string",
+ default=" ")
+
+ # post <SESSION> <VOLUME>
+ parser_post = subparsers.add_parser('post')
+ parser_post.add_argument("session", help="Session Name")
+ parser_post.add_argument("volume", help="Volume Name")
+ parser_post.add_argument("--debug", help="Debug", action="store_true")
+
+ return parser.parse_args()
+
+
+def ssh_setup(args):
+ pem_key_path = get_pem_key_path(args.session, args.volume)
+
+ if not os.path.exists(pem_key_path):
+ # Generate ssh-key
+ cmd = ["ssh-keygen",
+ "-N",
+ "",
+ "-f",
+ pem_key_path]
+ execute(cmd,
+ exit_msg="Unable to generate ssh key %s"
+ % pem_key_path,
+ logger=logger)
+
+ logger.info("Ssh key generated %s" % pem_key_path)
+
+ try:
+ shutil.copyfile(pem_key_path + ".pub",
+ os.path.join(conf.get_opt("session_dir"),
+ ".keys",
+ "%s_%s_secret.pem.pub" % (args.session,
+ args.volume)))
+ except (IOError, OSError) as e:
+ fail("Failed to copy public key to %s: %s"
+ % (os.path.join(conf.get_opt("session_dir"), ".keys"), e),
+ logger=logger)
+
+ # Copy pub file to all nodes
+ cmd = ["gluster",
+ "system::",
+ "copy",
+ "file",
+ "/glusterfind/.keys/%s.pub" % os.path.basename(pem_key_path)]
+
+ execute(cmd, exit_msg="Failed to distribute ssh keys", logger=logger)
+
+ logger.info("Distributed ssh key to all nodes of Volume")
+
+ # Add to authorized_keys file in each node
+ cmd = ["gluster",
+ "system::",
+ "execute",
+ "add_secret_pub",
+ "root",
+ "/glusterfind/.keys/%s.pub" % os.path.basename(pem_key_path)]
+ execute(cmd,
+ exit_msg="Failed to add ssh keys to authorized_keys file",
+ logger=logger)
+
+ logger.info("Ssh key added to authorized_keys of Volume nodes")
+
+
+def enable_volume_options(args):
+ execute(["gluster", "volume", "set",
+ args.volume, "build-pgfid", "on"],
+ exit_msg="Failed to set volume option build-pgfid on",
+ logger=logger)
+ logger.info("Volume option set %s, build-pgfid on" % args.volume)
+
+ execute(["gluster", "volume", "set",
+ args.volume, "changelog.changelog", "on"],
+ exit_msg="Failed to set volume option "
+ "changelog.changelog on", logger=logger)
+ logger.info("Volume option set %s, changelog.changelog on"
+ % args.volume)
+
+ execute(["gluster", "volume", "set",
+ args.volume, "changelog.capture-del-path", "on"],
+ exit_msg="Failed to set volume option "
+ "changelog.capture-del-path on", logger=logger)
+ logger.info("Volume option set %s, changelog.capture-del-path on"
+ % args.volume)
+
+
+def write_output(outfile, outfilemerger, field_separator):
+ with codecs.open(outfile, "a", encoding="utf-8") as f:
+ for row in outfilemerger.get():
+ # Multiple paths in case of Hardlinks
+ paths = row[1].split(",")
+ row_2_rep = None
+ for p in paths:
+ if p == "":
+ continue
+ p_rep = p.replace("//", "/")
+ if not row_2_rep:
+ row_2_rep = row[2].replace("//", "/")
+ if p_rep == row_2_rep:
+ continue
+
+ if row_2_rep and row_2_rep != "":
+ gfind_write_row(f, row[0], field_separator, p_rep, row_2_rep)
+
+ else:
+ gfind_write(f, row[0], field_separator, p_rep)
+
+def validate_volume(volume):
+ cmd = ["gluster", 'volume', 'info', volume, "--xml"]
+ _, data, _ = execute(cmd,
+ exit_msg="Failed to Run Gluster Volume Info",
+ logger=logger)
+ try:
+ tree = etree.fromstring(data)
+ statusStr = tree.find('volInfo/volumes/volume/statusStr').text
+ except (ParseError, AttributeError) as e:
+ fail("Invalid Volume: Check the Volume name! %s" % e)
+ if statusStr != "Started":
+ fail("Volume %s is not online" % volume)
+
+# The rules for a valid session name.
+SESSION_NAME_RULES = {
+ 'min_length': 2,
+ 'max_length': 256, # same as maximum volume length
+ # Specifies all alphanumeric characters, underscore, hyphen.
+ 'valid_chars': r'0-9a-zA-Z_-',
+}
+
+
+# checks valid session name, fail otherwise
+def validate_session_name(session):
+ # Check for minimum length
+ if len(session) < SESSION_NAME_RULES['min_length']:
+ fail('session_name must be at least ' +
+ str(SESSION_NAME_RULES['min_length']) + ' characters long.')
+ # Check for maximum length
+ if len(session) > SESSION_NAME_RULES['max_length']:
+ fail('session_name must not exceed ' +
+ str(SESSION_NAME_RULES['max_length']) + ' characters length.')
+
+ # Matches strings composed entirely of characters specified within
+ if not re.match(r'^[' + SESSION_NAME_RULES['valid_chars'] +
+ ']+$', session):
+ fail('Session name can only contain these characters: ' +
+ SESSION_NAME_RULES['valid_chars'])
+
+
+def mode_create(session_dir, args):
+ validate_session_name(args.session)
+
+ logger.debug("Init is called - Session: %s, Volume: %s"
+ % (args.session, args.volume))
+ mkdirp(session_dir, exit_on_err=True, logger=logger)
+ mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True,
+ logger=logger)
+ status_file = os.path.join(session_dir, args.volume, "status")
+
+ if os.path.exists(status_file) and not args.force:
+ fail("Session %s already created" % args.session, logger=logger)
+
+ if not os.path.exists(status_file) or args.force:
+ ssh_setup(args)
+ enable_volume_options(args)
+
+ # Add Rollover time to current time to make sure changelogs
+ # will be available if we use this time as start time
+ time_to_update = int(time.time()) + get_changelog_rollover_time(
+ args.volume)
+
+ run_cmd_nodes("create", args, time_to_update=str(time_to_update))
+
+ if not os.path.exists(status_file) or args.reset_session_time:
+ with open(status_file, "w") as f:
+ f.write(str(time_to_update))
+
+ sys.stdout.write("Session %s created with volume %s\n" %
+ (args.session, args.volume))
+
+ sys.exit(0)
+
+
+def mode_query(session_dir, args):
+ global gtmpfilename
+ global g_pid_nodefile_map
+
+ # Verify volume status
+ cmd = ["gluster", 'volume', 'info', args.volume, "--xml"]
+ _, data, _ = execute(cmd,
+ exit_msg="Failed to Run Gluster Volume Info",
+ logger=logger)
+ try:
+ tree = etree.fromstring(data)
+ statusStr = tree.find('volInfo/volumes/volume/statusStr').text
+ except (ParseError, AttributeError) as e:
+ fail("Invalid Volume: %s" % e, logger=logger)
+
+ if statusStr != "Started":
+ fail("Volume %s is not online" % args.volume, logger=logger)
+
+ mkdirp(session_dir, exit_on_err=True, logger=logger)
+ mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True,
+ logger=logger)
+ mkdirp(os.path.dirname(args.outfile), exit_on_err=True, logger=logger)
+
+ # Configure cluster for pasword-less SSH
+ ssh_setup(args)
+
+ # Enable volume options for changelog capture
+ enable_volume_options(args)
+
+ # Test options
+ if not args.full and args.type in ["f", "d"]:
+ fail("--type can only be used with --full")
+ if not args.since_time and not args.end_time and not args.full:
+ fail("Please specify either {--since-time and optionally --end-time} "
+ "or --full", logger=logger)
+
+ if args.since_time and args.end_time and args.full:
+ fail("Please specify either {--since-time and optionally --end-time} "
+ "or --full, but not both",
+ logger=logger)
+
+ if args.end_time and not args.since_time:
+ fail("Please specify --since-time as well", logger=logger)
+
+ # Start query command processing
+ start = -1
+ end = -1
+ if args.since_time:
+ start = args.since_time
+ if args.end_time:
+ end = args.end_time
+ else:
+ start = 0 # --full option is handled separately
+
+ logger.debug("Query is called - Session: %s, Volume: %s, "
+ "Start time: %s, End time: %s"
+ % ("default", args.volume, start, end))
+
+ prefix = datetime.now().strftime("%Y%m%d-%H%M%S-%f-")
+ gtmpfilename = prefix + next(tempfile._get_candidate_names())
+
+ run_cmd_nodes("query", args, start=start, end=end,
+ tmpfilename=gtmpfilename)
+
+ # Merger
+ if args.full:
+ if len(g_pid_nodefile_map) > 0:
+ cmd = ["sort", "-u"] + list(g_pid_nodefile_map.values()) + \
+ ["-o", args.outfile]
+ execute(cmd,
+ exit_msg="Failed to merge output files "
+ "collected from nodes", logger=logger)
+ else:
+ fail("Failed to collect any output files from peers. "
+ "Looks like all bricks are offline.", logger=logger)
+ else:
+ # Read each Changelogs db and generate finaldb
+ create_file(args.outfile, exit_on_err=True, logger=logger)
+ outfilemerger = OutputMerger(args.outfile + ".db",
+ list(g_pid_nodefile_map.values()))
+ write_output(args.outfile, outfilemerger, args.field_separator)
+
+ try:
+ os.remove(args.outfile + ".db")
+ except (IOError, OSError):
+ pass
+
+ run_cmd_nodes("cleanup", args, tmpfilename=gtmpfilename)
+
+ sys.stdout.write("Generated output file %s\n" % args.outfile)
+
+
+def mode_pre(session_dir, args):
+ global gtmpfilename
+ global g_pid_nodefile_map
+
+ """
+ Read from Session file and write to session.pre file
+ """
+ endtime_to_update = int(time.time()) - get_changelog_rollover_time(
+ args.volume)
+ status_file = os.path.join(session_dir, args.volume, "status")
+ status_file_pre = status_file + ".pre"
+
+ mkdirp(os.path.dirname(args.outfile), exit_on_err=True, logger=logger)
+
+ if not args.full and args.type in ["f", "d"]:
+ fail("--type can only be used with --full")
+
+ # If Pre status file exists and running pre command again
+ if os.path.exists(status_file_pre) and not args.regenerate_outfile:
+ fail("Post command is not run after last pre, "
+ "use --regenerate-outfile")
+
+ start = 0
+ try:
+ with open(status_file) as f:
+ start = int(f.read().strip())
+ except ValueError:
+ pass
+ except (OSError, IOError) as e:
+ fail("Error Opening Session file %s: %s"
+ % (status_file, e), logger=logger)
+
+ logger.debug("Pre is called - Session: %s, Volume: %s, "
+ "Start time: %s, End time: %s"
+ % (args.session, args.volume, start, endtime_to_update))
+
+ prefix = datetime.now().strftime("%Y%m%d-%H%M%S-%f-")
+ gtmpfilename = prefix + next(tempfile._get_candidate_names())
+
+ run_cmd_nodes("pre", args, start=start, end=-1, tmpfilename=gtmpfilename)
+
+ # Merger
+ if args.full:
+ if len(g_pid_nodefile_map) > 0:
+ cmd = ["sort", "-u"] + list(g_pid_nodefile_map.values()) + \
+ ["-o", args.outfile]
+ execute(cmd,
+ exit_msg="Failed to merge output files "
+ "collected from nodes", logger=logger)
+ else:
+ fail("Failed to collect any output files from peers. "
+ "Looks like all bricks are offline.", logger=logger)
+ else:
+ # Read each Changelogs db and generate finaldb
+ create_file(args.outfile, exit_on_err=True, logger=logger)
+ outfilemerger = OutputMerger(args.outfile + ".db",
+ list(g_pid_nodefile_map.values()))
+ write_output(args.outfile, outfilemerger, args.field_separator)
+
+ try:
+ os.remove(args.outfile + ".db")
+ except (IOError, OSError):
+ pass
+
+ run_cmd_nodes("cleanup", args, tmpfilename=gtmpfilename)
+
+ with open(status_file_pre, "w") as f:
+ f.write(str(endtime_to_update))
+
+ sys.stdout.write("Generated output file %s\n" % args.outfile)
+
+
+def mode_post(session_dir, args):
+ """
+ If pre session file exists, overwrite session file
+ If pre session file does not exists, return ERROR
+ """
+ status_file = os.path.join(session_dir, args.volume, "status")
+ logger.debug("Post is called - Session: %s, Volume: %s"
+ % (args.session, args.volume))
+ status_file_pre = status_file + ".pre"
+
+ if os.path.exists(status_file_pre):
+ run_cmd_nodes("post", args)
+ os.rename(status_file_pre, status_file)
+ sys.stdout.write("Session %s with volume %s updated\n" %
+ (args.session, args.volume))
+ sys.exit(0)
+ else:
+ fail("Pre script is not run", logger=logger)
+
+
+def mode_delete(session_dir, args):
+ run_cmd_nodes("delete", args)
+ shutil.rmtree(os.path.join(session_dir, args.volume),
+ onerror=handle_rm_error)
+ sys.stdout.write("Session %s with volume %s deleted\n" %
+ (args.session, args.volume))
+
+ # If the session contains only this volume, then cleanup the
+ # session directory. If a session contains multiple volumes
+ # then os.rmdir will fail with ENOTEMPTY
+ try:
+ os.rmdir(session_dir)
+ except OSError as e:
+ if not e.errno == ENOTEMPTY:
+ logger.warn("Failed to delete session directory: %s" % e)
+
+
+def mode_list(session_dir, args):
+ """
+ List available sessions to stdout, if session name is set
+ only list that session.
+ """
+ if args.session:
+ if not os.path.exists(os.path.join(session_dir, args.session)):
+ fail("Invalid Session", logger=logger)
+ sessions = [args.session]
+ else:
+ sessions = []
+ for d in os.listdir(session_dir):
+ if d != ".keys":
+ sessions.append(d)
+
+ output = []
+ for session in sessions:
+ # Session Volume Last Processed
+ volnames = os.listdir(os.path.join(session_dir, session))
+
+ for volname in volnames:
+ if args.volume and args.volume != volname:
+ continue
+
+ status_file = os.path.join(session_dir, session, volname, "status")
+ last_processed = None
+ try:
+ with open(status_file) as f:
+ last_processed = f.read().strip()
+ except (OSError, IOError) as e:
+ if e.errno == ENOENT:
+ continue
+ else:
+ raise
+ output.append((session, volname, last_processed))
+
+ if output:
+ sys.stdout.write("%s %s %s\n" % ("SESSION".ljust(25),
+ "VOLUME".ljust(25),
+ "SESSION TIME".ljust(25)))
+ sys.stdout.write("-"*75)
+ sys.stdout.write("\n")
+ for session, volname, last_processed in output:
+ sess_time = 'Session Corrupted'
+ if last_processed:
+ try:
+ sess_time = human_time(last_processed)
+ except TypeError:
+ sess_time = 'Session Corrupted'
+ sys.stdout.write("%s %s %s\n" % (session.ljust(25),
+ volname.ljust(25),
+ sess_time.ljust(25)))
+
+ if not output:
+ if args.session or args.volume:
+ fail("Invalid Session", logger=logger)
+ else:
+ sys.stdout.write("No sessions found.\n")
+
+
+def main():
+ global gtmpfilename
+
+ args = None
+
+ try:
+ args = _get_args()
+ mkdirp(conf.get_opt("session_dir"), exit_on_err=True)
+
+ # force the default session name if mode is "query"
+ if args.mode == "query":
+ args.session = "default"
+
+ if args.mode == "list":
+ session_dir = conf.get_opt("session_dir")
+ else:
+ session_dir = os.path.join(conf.get_opt("session_dir"),
+ args.session)
+
+ if not os.path.exists(session_dir) and \
+ args.mode not in ["create", "list", "query"]:
+ fail("Invalid session %s" % args.session)
+
+ # volume involved, validate the volume first
+ if args.mode not in ["list"]:
+ validate_volume(args.volume)
+
+
+ # "default" is a system defined session name
+ if args.mode in ["create", "post", "pre", "delete"] and \
+ args.session == "default":
+ fail("Invalid session %s" % args.session)
+
+ vol_dir = os.path.join(session_dir, args.volume)
+ if not os.path.exists(vol_dir) and args.mode not in \
+ ["create", "list", "query"]:
+ fail("Session %s not created with volume %s" %
+ (args.session, args.volume))
+
+ mkdirp(os.path.join(conf.get_opt("log_dir"),
+ args.session,
+ args.volume),
+ exit_on_err=True)
+ log_file = os.path.join(conf.get_opt("log_dir"),
+ args.session,
+ args.volume,
+ "cli.log")
+ setup_logger(logger, log_file, args.debug)
+
+ # globals() will have all the functions already defined.
+ # mode_<args.mode> will be the function name to be called
+ globals()["mode_" + args.mode](session_dir, args)
+ except KeyboardInterrupt:
+ if args is not None:
+ if args.mode == "pre" or args.mode == "query":
+ # cleanup session
+ if gtmpfilename is not None:
+ # no more interrupts until we clean up
+ signal.signal(signal.SIGINT, signal.SIG_IGN)
+ run_cmd_nodes("cleanup", args, tmpfilename=gtmpfilename)
+
+ # Interrupted, exit with non zero error code
+ sys.exit(2)
diff --git a/tools/glusterfind/src/nodeagent.py b/tools/glusterfind/src/nodeagent.py
new file mode 100644
index 00000000000..679daa6fa76
--- /dev/null
+++ b/tools/glusterfind/src/nodeagent.py
@@ -0,0 +1,139 @@
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
+import shutil
+import sys
+import os
+import logging
+from argparse import ArgumentParser, RawDescriptionHelpFormatter
+try:
+ import urllib.parse as urllib
+except ImportError:
+ import urllib
+from errno import ENOTEMPTY
+
+from utils import setup_logger, mkdirp, handle_rm_error
+import conf
+
+logger = logging.getLogger()
+
+
+def mode_cleanup(args):
+ working_dir = os.path.join(conf.get_opt("working_dir"),
+ args.session,
+ args.volume,
+ args.tmpfilename)
+
+ mkdirp(os.path.join(conf.get_opt("log_dir"), args.session, args.volume),
+ exit_on_err=True)
+ log_file = os.path.join(conf.get_opt("log_dir"),
+ args.session,
+ args.volume,
+ "changelog.log")
+
+ setup_logger(logger, log_file)
+
+ try:
+ shutil.rmtree(working_dir, onerror=handle_rm_error)
+ except (OSError, IOError) as e:
+ logger.error("Failed to delete working directory: %s" % e)
+ sys.exit(1)
+
+
+def mode_create(args):
+ session_dir = os.path.join(conf.get_opt("session_dir"),
+ args.session)
+ status_file = os.path.join(session_dir, args.volume,
+ "%s.status" % urllib.quote_plus(args.brick))
+
+ mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True,
+ logger=logger)
+
+ if not os.path.exists(status_file) or args.reset_session_time:
+ with open(status_file, "w") as f:
+ f.write(args.time_to_update)
+
+ sys.exit(0)
+
+
+def mode_post(args):
+ session_dir = os.path.join(conf.get_opt("session_dir"), args.session)
+ status_file = os.path.join(session_dir, args.volume,
+ "%s.status" % urllib.quote_plus(args.brick))
+
+ mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True,
+ logger=logger)
+ status_file_pre = status_file + ".pre"
+
+ if os.path.exists(status_file_pre):
+ os.rename(status_file_pre, status_file)
+ sys.exit(0)
+
+
+def mode_delete(args):
+ session_dir = os.path.join(conf.get_opt("session_dir"),
+ args.session)
+ shutil.rmtree(os.path.join(session_dir, args.volume),
+ onerror=handle_rm_error)
+
+ # If the session contains only this volume, then cleanup the
+ # session directory. If a session contains multiple volumes
+ # then os.rmdir will fail with ENOTEMPTY
+ try:
+ os.rmdir(session_dir)
+ except OSError as e:
+ if not e.errno == ENOTEMPTY:
+ logger.warn("Failed to delete session directory: %s" % e)
+
+
+def _get_args():
+ parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter,
+ description="Node Agent")
+ subparsers = parser.add_subparsers(dest="mode")
+
+ parser_cleanup = subparsers.add_parser('cleanup')
+ parser_cleanup.add_argument("session", help="Session Name")
+ parser_cleanup.add_argument("volume", help="Volume Name")
+ parser_cleanup.add_argument("tmpfilename", help="Temporary File Name")
+ parser_cleanup.add_argument("--debug", help="Debug", action="store_true")
+
+ parser_session_create = subparsers.add_parser('create')
+ parser_session_create.add_argument("session", help="Session Name")
+ parser_session_create.add_argument("volume", help="Volume Name")
+ parser_session_create.add_argument("brick", help="Brick Path")
+ parser_session_create.add_argument("time_to_update", help="Time to Update")
+ parser_session_create.add_argument("--reset-session-time",
+ help="Reset Session Time",
+ action="store_true")
+ parser_session_create.add_argument("--debug", help="Debug",
+ action="store_true")
+
+ parser_post = subparsers.add_parser('post')
+ parser_post.add_argument("session", help="Session Name")
+ parser_post.add_argument("volume", help="Volume Name")
+ parser_post.add_argument("brick", help="Brick Path")
+ parser_post.add_argument("--debug", help="Debug",
+ action="store_true")
+
+ parser_delete = subparsers.add_parser('delete')
+ parser_delete.add_argument("session", help="Session Name")
+ parser_delete.add_argument("volume", help="Volume Name")
+ parser_delete.add_argument("--debug", help="Debug",
+ action="store_true")
+ return parser.parse_args()
+
+
+if __name__ == "__main__":
+ args = _get_args()
+
+ # globals() will have all the functions already defined.
+ # mode_<args.mode> will be the function name to be called
+ globals()["mode_" + args.mode](args)
diff --git a/tools/glusterfind/src/tool.conf.in b/tools/glusterfind/src/tool.conf.in
new file mode 100644
index 00000000000..a80f4a784c0
--- /dev/null
+++ b/tools/glusterfind/src/tool.conf.in
@@ -0,0 +1,10 @@
+[vars]
+session_dir=@GLUSTERD_WORKDIR@/glusterfind/
+working_dir=@GLUSTERFSD_MISCDIR@/glusterfind/
+log_dir=/var/log/glusterfs/glusterfind/
+nodeagent=@GLUSTERFS_LIBEXECDIR@/glusterfind/nodeagent.py
+brick_ignore_dirs=.glusterfs,.trashcan
+
+[change_detectors]
+changelog=@GLUSTERFS_LIBEXECDIR@/glusterfind/changelog.py
+brickfind=@GLUSTERFS_LIBEXECDIR@/glusterfind/brickfind.py \ No newline at end of file
diff --git a/tools/glusterfind/src/utils.py b/tools/glusterfind/src/utils.py
new file mode 100644
index 00000000000..906ebd8f252
--- /dev/null
+++ b/tools/glusterfind/src/utils.py
@@ -0,0 +1,267 @@
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
+import sys
+from subprocess import PIPE, Popen
+from errno import EEXIST, ENOENT
+import xml.etree.cElementTree as etree
+import logging
+import os
+from datetime import datetime
+
+ROOT_GFID = "00000000-0000-0000-0000-000000000001"
+DEFAULT_CHANGELOG_INTERVAL = 15
+SPACE_ESCAPE_CHAR = "%20"
+NEWLINE_ESCAPE_CHAR = "%0A"
+PERCENTAGE_ESCAPE_CHAR = "%25"
+
+ParseError = etree.ParseError if hasattr(etree, 'ParseError') else SyntaxError
+cache_data = {}
+
+
+class RecordType(object):
+ NEW = "NEW"
+ MODIFY = "MODIFY"
+ RENAME = "RENAME"
+ DELETE = "DELETE"
+
+
+def cache_output(func):
+ def wrapper(*args, **kwargs):
+ global cache_data
+ if cache_data.get(func.__name__, None) is None:
+ cache_data[func.__name__] = func(*args, **kwargs)
+
+ return cache_data[func.__name__]
+ return wrapper
+
+
+def handle_rm_error(func, path, exc_info):
+ if exc_info[1].errno == ENOENT:
+ return
+
+ raise exc_info[1]
+
+
+def find(path, callback_func=lambda x: True, filter_func=lambda x: True,
+ ignore_dirs=[], subdirs_crawl=True):
+ if path in ignore_dirs:
+ return
+
+ # Capture filter_func output and pass it to callback function
+ filter_result = filter_func(path)
+ if filter_result is not None:
+ callback_func(path, filter_result, os.path.isdir(path))
+
+ for p in os.listdir(path):
+ full_path = os.path.join(path, p)
+
+ is_dir = os.path.isdir(full_path)
+ if is_dir:
+ if subdirs_crawl:
+ find(full_path, callback_func, filter_func, ignore_dirs)
+ else:
+ filter_result = filter_func(full_path)
+ if filter_result is not None:
+ callback_func(full_path, filter_result)
+ else:
+ filter_result = filter_func(full_path)
+ if filter_result is not None:
+ callback_func(full_path, filter_result, is_dir)
+
+
+def output_write(f, path, prefix=".", encode=False, tag="",
+ field_separator=" "):
+ if path == "":
+ return
+
+ if prefix != ".":
+ path = os.path.join(prefix, path)
+
+ if encode:
+ path = quote_plus_space_newline(path)
+
+ # set the field separator
+ FS = "" if tag == "" else field_separator
+
+ f.write("%s%s%s\n" % (tag.strip(), FS, path))
+
+
+def human_time(ts):
+ return datetime.fromtimestamp(float(ts)).strftime("%Y-%m-%d %H:%M:%S")
+
+
+def setup_logger(logger, path, debug=False):
+ if debug:
+ logger.setLevel(logging.DEBUG)
+ else:
+ logger.setLevel(logging.INFO)
+
+ # create the logging file handler
+ fh = logging.FileHandler(path)
+
+ formatter = logging.Formatter("[%(asctime)s] %(levelname)s "
+ "[%(module)s - %(lineno)s:%(funcName)s] "
+ "- %(message)s")
+
+ fh.setFormatter(formatter)
+
+ # add handler to logger object
+ logger.addHandler(fh)
+
+
+def create_file(path, exit_on_err=False, logger=None):
+ """
+ If file exists overwrite. Print error to stderr and exit
+ if exit_on_err is set, else raise the exception. Consumer
+ should handle the exception.
+ """
+ try:
+ open(path, 'w').close()
+ except (OSError, IOError) as e:
+ if exit_on_err:
+ fail("Failed to create file %s: %s" % (path, e), logger=logger)
+ else:
+ raise
+
+
+def mkdirp(path, exit_on_err=False, logger=None):
+ """
+ Try creating required directory structure
+ ignore EEXIST and raise exception for rest of the errors.
+ Print error in stderr and exit if exit_on_err is set, else
+ raise exception.
+ """
+ try:
+ os.makedirs(path)
+ except (OSError, IOError) as e:
+ if e.errno == EEXIST and os.path.isdir(path):
+ pass
+ else:
+ if exit_on_err:
+ fail("Fail to create dir %s: %s" % (path, e), logger=logger)
+ else:
+ raise
+
+
+def fail(msg, code=1, logger=None):
+ """
+ Write error to stderr and exit
+ """
+ if logger:
+ logger.error(msg)
+ sys.stderr.write("%s\n" % msg)
+ sys.exit(code)
+
+
+def execute(cmd, exit_msg=None, logger=None):
+ """
+ If failure_msg is not None then return returncode, out and error.
+ If failure msg is set, write to stderr and exit.
+ """
+ p = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE, close_fds=True)
+
+ (out, err) = p.communicate()
+ if p.returncode != 0 and exit_msg is not None:
+ fail("%s: %s" % (exit_msg, err), p.returncode, logger=logger)
+
+ return (p.returncode, out, err)
+
+
+def symlink_gfid_to_path(brick, gfid):
+ """
+ Each directories are symlinked to file named GFID
+ in .glusterfs directory of brick backend. Using readlink
+ we get PARGFID/basename of dir. readlink recursively till
+ we get PARGFID as ROOT_GFID.
+ """
+ if gfid == ROOT_GFID:
+ return ""
+
+ out_path = ""
+ while True:
+ path = os.path.join(brick, ".glusterfs", gfid[0:2], gfid[2:4], gfid)
+ path_readlink = os.readlink(path)
+ pgfid = os.path.dirname(path_readlink)
+ out_path = os.path.join(os.path.basename(path_readlink), out_path)
+ if pgfid == "../../00/00/%s" % ROOT_GFID:
+ break
+ gfid = os.path.basename(pgfid)
+ return out_path
+
+
+@cache_output
+def get_my_uuid():
+ cmd = ["gluster", "system::", "uuid", "get", "--xml"]
+ rc, out, err = execute(cmd)
+
+ if rc != 0:
+ return None
+
+ tree = etree.fromstring(out)
+ uuid_el = tree.find("uuidGenerate/uuid")
+ return uuid_el.text
+
+
+def is_host_local(host_uuid):
+ # Get UUID only if it is not done previously
+ # else Cache the UUID value
+ my_uuid = get_my_uuid()
+ if my_uuid == host_uuid:
+ return True
+
+ return False
+
+
+def get_changelog_rollover_time(volumename):
+ cmd = ["gluster", "volume", "get", volumename,
+ "changelog.rollover-time", "--xml"]
+ rc, out, err = execute(cmd)
+
+ if rc != 0:
+ return DEFAULT_CHANGELOG_INTERVAL
+
+ try:
+ tree = etree.fromstring(out)
+ val = tree.find('volGetopts/Opt/Value').text
+ if val is not None:
+ # Filter the value by split, as it may be 'X (DEFAULT)'
+ # and we only need 'X'
+ return int(val.split(' ', 1)[0])
+ except ParseError:
+ return DEFAULT_CHANGELOG_INTERVAL
+
+
+def output_path_prepare(path, args):
+ """
+ If Prefix is set, joins to Path, removes ending slash
+ and encodes it.
+ """
+ if args.output_prefix != ".":
+ path = os.path.join(args.output_prefix, path)
+ if path.endswith("/"):
+ path = path[0:len(path)-1]
+
+ if args.no_encode:
+ return path
+ else:
+ return quote_plus_space_newline(path)
+
+
+def unquote_plus_space_newline(s):
+ return s.replace(SPACE_ESCAPE_CHAR, " ")\
+ .replace(NEWLINE_ESCAPE_CHAR, "\n")\
+ .replace(PERCENTAGE_ESCAPE_CHAR, "%")
+
+
+def quote_plus_space_newline(s):
+ return s.replace("%", PERCENTAGE_ESCAPE_CHAR)\
+ .replace(" ", SPACE_ESCAPE_CHAR)\
+ .replace("\n", NEWLINE_ESCAPE_CHAR)
diff --git a/tools/setgfid2path/Makefile.am b/tools/setgfid2path/Makefile.am
new file mode 100644
index 00000000000..c14787a80ce
--- /dev/null
+++ b/tools/setgfid2path/Makefile.am
@@ -0,0 +1,5 @@
+SUBDIRS = src
+
+EXTRA_DIST = gluster-setgfid2path.8
+
+man8_MANS = gluster-setgfid2path.8
diff --git a/tools/setgfid2path/gluster-setgfid2path.8 b/tools/setgfid2path/gluster-setgfid2path.8
new file mode 100644
index 00000000000..2e228ca8514
--- /dev/null
+++ b/tools/setgfid2path/gluster-setgfid2path.8
@@ -0,0 +1,54 @@
+
+.\" Copyright (c) 2017 Red Hat, Inc. <http://www.redhat.com>
+.\" This file is part of GlusterFS.
+.\"
+.\" This file is licensed to you under your choice of the GNU Lesser
+.\" General Public License, version 3 or any later version (LGPLv3 or
+.\" later), or the GNU General Public License, version 2 (GPLv2), in all
+.\" cases as published by the Free Software Foundation.
+.\"
+.\"
+.TH gluster-setgfid2path 8 "Command line utility to set GFID to Path Xattrs"
+.SH NAME
+gluster-setgfid2path - Gluster tool to set GFID to Path xattrs
+.SH SYNOPSIS
+.B gluster-setgfid2path
+.IR file
+.SH DESCRIPTION
+New feature introduced with Gluster release 3.12, to find full path from GFID.
+This feature can be enabled using Volume set command \fBgluster volume set
+<VOLUME> storage.gfid2path enable\fR
+.PP
+Once \fBgfid2path\fR feature is enabled, it starts recording the necessary
+xattrs required for the feature. But it will not add xattrs for the already
+existing files. This tool provides facility to update the gfid2path xattrs for
+the given file path.
+
+.SH EXAMPLES
+To add xattrs of a single file,
+.PP
+.nf
+.RS
+gluster-setgfid2path /bricks/b1/hello.txt
+.RE
+.fi
+.PP
+To set xattr for all the existing files, run the below script on each bricks.
+.PP
+.nf
+.RS
+BRICK=/bricks/b1
+find $BRICK -type d \\( -path "${BRICK}/.trashcan" -o -path \\
+ "${BRICK}/.glusterfs" \\) -prune -o -type f \\
+ -exec gluster-setgfid2path {} \\;
+.RE
+.fi
+.PP
+.SH SEE ALSO
+.nf
+\fBgluster\fR(8)
+\fR
+.fi
+.SH COPYRIGHT
+.nf
+Copyright(c) 2017 Red Hat, Inc. <http://www.redhat.com>
diff --git a/tools/setgfid2path/src/Makefile.am b/tools/setgfid2path/src/Makefile.am
new file mode 100644
index 00000000000..7316d117070
--- /dev/null
+++ b/tools/setgfid2path/src/Makefile.am
@@ -0,0 +1,16 @@
+gluster_setgfid2pathdir = $(sbindir)
+
+if WITH_SERVER
+gluster_setgfid2path_PROGRAMS = gluster-setgfid2path
+endif
+
+gluster_setgfid2path_SOURCES = main.c
+
+gluster_setgfid2path_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+gluster_setgfid2path_LDFLAGS = $(GF_LDFLAGS)
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_builddir)/rpc/xdr/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
diff --git a/tools/setgfid2path/src/main.c b/tools/setgfid2path/src/main.c
new file mode 100644
index 00000000000..4320a7b2481
--- /dev/null
+++ b/tools/setgfid2path/src/main.c
@@ -0,0 +1,130 @@
+/*
+ Copyright (c) 2017 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+ */
+#include <stdio.h>
+#include <libgen.h>
+
+#include <glusterfs/common-utils.h>
+#include <glusterfs/syscall.h>
+
+#define MAX_GFID2PATH_LINK_SUP 500
+#define GFID_SIZE 16
+#define GFID_XATTR_KEY "trusted.gfid"
+
+int
+main(int argc, char **argv)
+{
+ int ret = 0;
+ struct stat st;
+ char *dname = NULL;
+ char *bname = NULL;
+ ssize_t ret_size = 0;
+ uuid_t pgfid_raw = {
+ 0,
+ };
+ char pgfid[36 + 1] = "";
+ char xxh64[GF_XXH64_DIGEST_LENGTH * 2 + 1] = {
+ 0,
+ };
+ char pgfid_bname[1024] = {
+ 0,
+ };
+ char *key = NULL;
+ char *val = NULL;
+ size_t key_size = 0;
+ size_t val_size = 0;
+ const char *file_path = NULL;
+ char *file_path1 = NULL;
+ char *file_path2 = NULL;
+
+ if (argc != 2) {
+ fprintf(stderr, "Usage: setgfid2path <file-path>\n");
+ return -1;
+ }
+
+ ret = sys_lstat(argv[1], &st);
+ if (ret != 0) {
+ fprintf(stderr, "Invalid File Path\n");
+ return -1;
+ }
+
+ if (st.st_nlink >= MAX_GFID2PATH_LINK_SUP) {
+ fprintf(stderr,
+ "Number of Hardlink support exceeded. "
+ "max=%d\n",
+ MAX_GFID2PATH_LINK_SUP);
+ return -1;
+ }
+
+ file_path = argv[1];
+ file_path1 = strdup(file_path);
+ file_path2 = strdup(file_path);
+
+ dname = dirname(file_path1);
+ bname = basename(file_path2);
+
+ /* Get GFID of Parent directory */
+ ret_size = sys_lgetxattr(dname, GFID_XATTR_KEY, pgfid_raw, GFID_SIZE);
+ if (ret_size != GFID_SIZE) {
+ fprintf(stderr, "Failed to get GFID of parent directory. dir=%s\n",
+ dname);
+ ret = -1;
+ goto out;
+ }
+
+ /* Convert to UUID format */
+ if (uuid_utoa_r(pgfid_raw, pgfid) == NULL) {
+ fprintf(stderr,
+ "Failed to format GFID of parent directory. "
+ "dir=%s GFID=%s\n",
+ dname, pgfid_raw);
+ ret = -1;
+ goto out;
+ }
+
+ /* Find xxhash for PGFID/BaseName */
+ snprintf(pgfid_bname, sizeof(pgfid_bname), "%s/%s", pgfid, bname);
+ gf_xxh64_wrapper((unsigned char *)pgfid_bname, strlen(pgfid_bname),
+ GF_XXHSUM64_DEFAULT_SEED, xxh64);
+
+ key_size = SLEN(GFID2PATH_XATTR_KEY_PREFIX) + GF_XXH64_DIGEST_LENGTH * 2 +
+ 1;
+ key = alloca(key_size);
+ snprintf(key, key_size, GFID2PATH_XATTR_KEY_PREFIX "%s", xxh64);
+
+ val_size = UUID_CANONICAL_FORM_LEN + NAME_MAX + 2;
+ val = alloca(val_size);
+ snprintf(val, val_size, "%s/%s", pgfid, bname);
+
+ /* Set the Xattr, ignore if same key xattr already exists */
+ ret = sys_lsetxattr(file_path, key, val, strlen(val), XATTR_CREATE);
+ if (ret == -1) {
+ if (errno == EEXIST) {
+ printf("Xattr already exists, ignoring..\n");
+ ret = 0;
+ goto out;
+ }
+
+ fprintf(stderr, "Failed to set gfid2path xattr. errno=%d\n error=%s",
+ errno, strerror(errno));
+ ret = -1;
+ goto out;
+ }
+
+ printf("Success. file=%s key=%s value=%s\n", file_path, key, val);
+
+out:
+ if (file_path1 != NULL)
+ free(file_path1);
+
+ if (file_path2 != NULL)
+ free(file_path2);
+
+ return ret;
+}
diff --git a/xlators/Makefile.am b/xlators/Makefile.am
index b1643d26ca0..ef20cbb64fa 100644
--- a/xlators/Makefile.am
+++ b/xlators/Makefile.am
@@ -1,3 +1,13 @@
-SUBDIRS = cluster storage protocol performance debug features encryption mount nfs mgmt system
+if BUILD_GNFS
+ GNFS_DIR = nfs
+endif
+
+DIST_SUBDIRS = cluster storage protocol performance debug features \
+ mount nfs mgmt system playground meta
+
+SUBDIRS = cluster storage protocol performance debug features \
+ mount ${GNFS_DIR} mgmt system playground meta
+
+EXTRA_DIST = xlator.sym
CLEANFILES =
diff --git a/xlators/bindings/Makefile.am b/xlators/bindings/Makefile.am
deleted file mode 100644
index f7766580257..00000000000
--- a/xlators/bindings/Makefile.am
+++ /dev/null
@@ -1 +0,0 @@
-SUBDIRS = $(BINDINGS_SUBDIRS)
diff --git a/xlators/bindings/python/src/Makefile.am b/xlators/bindings/python/src/Makefile.am
deleted file mode 100644
index c0b9141c667..00000000000
--- a/xlators/bindings/python/src/Makefile.am
+++ /dev/null
@@ -1,19 +0,0 @@
-
-xlator_PROGRAMS = python.so
-
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/bindings
-
-python_PYTHON = gluster.py glustertypes.py glusterstack.py
-
-pythondir = $(xlatordir)/python
-
-python_so_SOURCES = python.c
-
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles \
- $(PYTHON_CPPLAGS) -DGLUSTER_PYTHON_PATH=\"$(pythondir)\"
-
-AM_LDFLAGS = $(PYTHON_LDFLAGS)
-
-CLEANFILES =
-
diff --git a/xlators/bindings/python/src/gluster.py b/xlators/bindings/python/src/gluster.py
deleted file mode 100644
index ee0eb131011..00000000000
--- a/xlators/bindings/python/src/gluster.py
+++ /dev/null
@@ -1,47 +0,0 @@
-# Copyright (c) 2007 Chris AtLee <chris@atlee.ca>
-# This file is part of GlusterFS.
-#
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-#
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
-from ctypes import *
-from glustertypes import *
-from glusterstack import *
-import sys
-import inspect
-
-libglusterfs = CDLL("libglusterfs.so")
-_gf_log = libglusterfs._gf_log
-_gf_log.restype = c_int32
-_gf_log.argtypes = [c_char_p, c_char_p, c_char_p, c_int32, c_int, c_char_p]
-
-gf_log_loglevel = c_int.in_dll(libglusterfs, "gf_log_loglevel")
-
-GF_LOG_NONE = 0
-GF_LOG_CRITICAL = 1
-GF_LOG_ERROR = 2
-GF_LOG_WARNING = 3
-GF_LOG_DEBUG = 4
-
-def gf_log(module, level, fmt, *params):
- if level <= gf_log_loglevel:
- frame = sys._getframe(1)
- _gf_log(module, frame.f_code.co_filename, frame.f_code.co_name,
- frame.f_lineno, level, fmt, *params)
-
-class ComplexTranslator(object):
- def __init__(self, xlator):
- self.xlator = xlator_t.from_address(xlator)
-
- def __getattr__(self, item):
- return getattr(self.xlator, item)
diff --git a/xlators/bindings/python/src/glusterstack.py b/xlators/bindings/python/src/glusterstack.py
deleted file mode 100644
index ba24c81652e..00000000000
--- a/xlators/bindings/python/src/glusterstack.py
+++ /dev/null
@@ -1,55 +0,0 @@
-# Copyright (c) 2007 Chris AtLee <chris@atlee.ca>
-# This file is part of GlusterFS.
-#
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-#
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
-from ctypes import *
-from glustertypes import *
-
-libc = CDLL("libc.so.6")
-calloc = libc.calloc
-calloc.argtypes = [c_int, c_int]
-calloc.restype = c_void_p
-
-# TODO: Can these be done in C somehow?
-def stack_wind(frame, rfn, obj, fn, *params):
- """Frame is a frame object"""
- _new = cast(calloc(1, sizeof(call_frame_t)), POINTER(call_frame_t))
- _new[0].root = frame.root
- _new[0].next = frame.root[0].frames.next
- _new[0].prev = pointer(frame.root[0].frames)
- if frame.root[0].frames.next:
- frame.root[0].frames.next[0].prev = _new
- frame.root[0].frames.next = _new
- _new[0].this = obj
- # TODO: Type checking like tmp_cbk?
- _new[0].ret = rfn
- _new[0].parent = pointer(frame)
- _new[0].cookie = cast(_new, c_void_p)
- # TODO: Initialize lock
- #_new.lock.init()
- frame.ref_count += 1
- fn(_new, obj, *params)
-
-def stack_unwind(frame, *params):
- """Frame is a frame object"""
- fn = frame[0].ret
- parent = frame[0].parent[0]
- parent.ref_count -= 1
-
- op_ret = params[0]
- op_err = params[1]
- params = params[2:]
- fn(parent, call_frame_t.from_address(frame[0].cookie), parent.this,
- op_ret, op_err, *params)
diff --git a/xlators/bindings/python/src/glustertypes.py b/xlators/bindings/python/src/glustertypes.py
deleted file mode 100644
index e9069d07c72..00000000000
--- a/xlators/bindings/python/src/glustertypes.py
+++ /dev/null
@@ -1,167 +0,0 @@
-# Copyright (c) 2007 Chris AtLee <chris@atlee.ca>
-# This file is part of GlusterFS.
-#
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-#
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
-from ctypes import *
-import collections
-
-#
-# Forward declaration of some gluster types
-#
-class call_frame_t(Structure):
- pass
-
-class call_ctx_t(Structure):
- pass
-
-class call_pool_t(Structure):
- pass
-
-class xlator_t(Structure):
- def _getFirstChild(self):
- return self.children[0].xlator
- firstChild = property(_getFirstChild)
-
-class xlator_list_t(Structure):
- pass
-
-class xlator_fops(Structure):
- pass
-
-class xlator_mops(Structure):
- pass
-
-class glusterfs_ctx_t(Structure):
- pass
-
-class list_head(Structure):
- pass
-
-class dict_t(Structure):
- pass
-
-class inode_table_t(Structure):
- pass
-
-class fd_t(Structure):
- pass
-
-class iovec(Structure):
- _fields_ = [
- ("iov_base", c_void_p),
- ("iov_len", c_size_t),
- ]
-
- def __init__(self, s):
- self.iov_base = cast(c_char_p(s), c_void_p)
- self.iov_len = len(s)
-
- def getBytes(self):
- return string_at(self.iov_base, self.iov_len)
-
-# This is a pthread_spinlock_t
-# TODO: what happens to volatile-ness?
-gf_lock_t = c_int
-
-uid_t = c_uint32
-gid_t = c_uint32
-pid_t = c_int32
-
-off_t = c_int64
-
-#
-# Function pointer types
-#
-ret_fn_t = CFUNCTYPE(c_int32, POINTER(call_frame_t), POINTER(call_frame_t),
- POINTER(xlator_t), c_int32, c_int32)
-
-fini_fn_t = CFUNCTYPE(None, POINTER(xlator_t))
-init_fn_t = CFUNCTYPE(c_int32, POINTER(xlator_t))
-event_notify_fn_t = CFUNCTYPE(c_int32, POINTER(xlator_t), c_int32, c_void_p)
-
-list_head._fields_ = [
- ("next", POINTER(list_head)),
- ("prev", POINTER(list_head)),
- ]
-
-call_frame_t._fields_ = [
- ("root", POINTER(call_ctx_t)),
- ("parent", POINTER(call_frame_t)),
- ("next", POINTER(call_frame_t)),
- ("prev", POINTER(call_frame_t)),
- ("local", c_void_p),
- ("this", POINTER(xlator_t)),
- ("ret", ret_fn_t),
- ("ref_count", c_int32),
- ("lock", gf_lock_t),
- ("cookie", c_void_p),
- ("op", c_int32),
- ("type", c_int8),
- ]
-
-call_ctx_t._fields_ = [
- ("all_frames", list_head),
- ("trans", c_void_p),
- ("pool", call_pool_t),
- ("unique", c_uint64),
- ("state", c_void_p),
- ("uid", uid_t),
- ("gid", gid_t),
- ("pid", pid_t),
- ("frames", call_frame_t),
- ("req_refs", POINTER(dict_t)),
- ("rsp_refs", POINTER(dict_t)),
- ]
-
-xlator_t._fields_ = [
- ("name", c_char_p),
- ("type", c_char_p),
- ("next", POINTER(xlator_t)),
- ("prev", POINTER(xlator_t)),
- ("parent", POINTER(xlator_t)),
- ("children", POINTER(xlator_list_t)),
- ("fops", POINTER(xlator_fops)),
- ("mops", POINTER(xlator_mops)),
- ("fini", fini_fn_t),
- ("init", init_fn_t),
- ("notify", event_notify_fn_t),
- ("options", POINTER(dict_t)),
- ("ctx", POINTER(glusterfs_ctx_t)),
- ("itable", POINTER(inode_table_t)),
- ("ready", c_char),
- ("private", c_void_p),
- ]
-
-xlator_list_t._fields_ = [
- ("xlator", POINTER(xlator_t)),
- ("next", POINTER(xlator_list_t)),
- ]
-
-fop_functions = collections.defaultdict(lambda: c_void_p)
-fop_function_names = ['lookup', 'forget', 'stat', 'fstat', 'chmod', 'fchmod',
- 'chown', 'fchown', 'truncate', 'ftruncate', 'utimens', 'access',
- 'readlink', 'mknod', 'mkdir', 'unlink', 'rmdir', 'symlink',
- 'rename', 'link', 'create', 'open', 'readv', 'writev', 'flush',
- 'close', 'fsync', 'opendir', 'readdir', 'closedir', 'fsyncdir',
- 'statfs', 'setxattr', 'getxattr', 'removexattr', 'lk', 'writedir',
- # TODO: Call backs?
- ]
-
-fop_writev_t = CFUNCTYPE(c_int32, POINTER(call_frame_t), POINTER(xlator_t),
- POINTER(fd_t), POINTER(iovec), c_int32,
- off_t)
-
-fop_functions['writev'] = fop_writev_t
-xlator_fops._fields_ = [(f, fop_functions[f]) for f in fop_function_names]
diff --git a/xlators/bindings/python/src/python.c b/xlators/bindings/python/src/python.c
deleted file mode 100644
index 3310a211513..00000000000
--- a/xlators/bindings/python/src/python.c
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
- Copyright (c) 2007-2010 Chris AtLee <chris@atlee.ca>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include <Python.h>
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "xlator.h"
-#include "logging.h"
-#include "defaults.h"
-
-typedef struct
-{
- char *scriptname;
- PyObject *pXlator;
- PyObject *pScriptModule;
- PyObject *pGlusterModule;
- PyThreadState *pInterp;
-
- PyObject *pFrameType, *pVectorType, *pFdType;
-} python_private_t;
-
-int32_t
-python_writev (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- struct iovec *vector,
- int32_t count,
- off_t offset)
-{
- python_private_t *priv = (python_private_t *)this->private;
- gf_log("python", GF_LOG_DEBUG, "In writev");
- if (PyObject_HasAttrString(priv->pXlator, "writev"))
- {
-
- PyObject *retval = PyObject_CallMethod(priv->pXlator, "writev",
- "O O O i l",
- PyObject_CallMethod(priv->pFrameType, "from_address", "O&", PyLong_FromVoidPtr, frame),
- PyObject_CallMethod(priv->pFdType, "from_address", "O&", PyLong_FromVoidPtr, fd),
- PyObject_CallMethod(priv->pVectorType, "from_address", "O&", PyLong_FromVoidPtr, vector),
- count,
- offset);
- if (PyErr_Occurred())
- {
- PyErr_Print();
- }
- Py_XDECREF(retval);
- }
- else
- {
- return default_writev(frame, this, fd, vector, count, offset);
- }
- return 0;
-}
-
-struct xlator_fops fops = {
- .writev = python_writev
-};
-
-static PyObject *
-AnonModule_FromFile (const char* fname)
-{
- // Get the builtins
- PyThreadState* pThread = PyThreadState_Get();
- PyObject *pBuiltins = pThread->interp->builtins;
-
- if (PyErr_Occurred())
- {
- PyErr_Print();
- return NULL;
- }
-
- // Create a new dictionary for running code in
- PyObject *pModuleDict = PyDict_New();
- PyDict_SetItemString(pModuleDict, "__builtins__", pBuiltins);
- Py_INCREF(pBuiltins);
-
- // Run the file in the new context
- FILE* fp = fopen(fname, "r");
- PyRun_File(fp, fname, Py_file_input, pModuleDict, pModuleDict);
- fclose(fp);
- if (PyErr_Occurred())
- {
- PyErr_Print();
- Py_DECREF(pModuleDict);
- Py_DECREF(pBuiltins);
- return NULL;
- }
-
- // Create an object to hold the new context
- PyRun_String("class ModuleWrapper(object):\n\tpass\n", Py_single_input, pModuleDict, pModuleDict);
- if (PyErr_Occurred())
- {
- PyErr_Print();
- Py_DECREF(pModuleDict);
- Py_DECREF(pBuiltins);
- return NULL;
- }
- PyObject *pModule = PyRun_String("ModuleWrapper()", Py_eval_input, pModuleDict, pModuleDict);
- if (PyErr_Occurred())
- {
- PyErr_Print();
- Py_DECREF(pModuleDict);
- Py_DECREF(pBuiltins);
- Py_XDECREF(pModule);
- return NULL;
- }
-
- // Set the new context's dictionary to the one we used to run the code
- // inside
- PyObject_SetAttrString(pModule, "__dict__", pModuleDict);
- if (PyErr_Occurred())
- {
- PyErr_Print();
- Py_DECREF(pModuleDict);
- Py_DECREF(pBuiltins);
- Py_DECREF(pModule);
- return NULL;
- }
-
- return pModule;
-}
-
-int32_t
-init (xlator_t *this)
-{
- // This is ok to call more than once per process
- Py_InitializeEx(0);
-
- if (!this->children) {
- gf_log ("python", GF_LOG_ERROR,
- "FATAL: python should have exactly one child");
- return -1;
- }
-
- python_private_t *priv = CALLOC (sizeof (python_private_t), 1);
- ERR_ABORT (priv);
-
- data_t *scriptname = dict_get (this->options, "scriptname");
- if (scriptname) {
- priv->scriptname = data_to_str(scriptname);
- } else {
- gf_log("python", GF_LOG_ERROR,
- "FATAL: python requires the scriptname parameter");
- return -1;
- }
-
- priv->pInterp = Py_NewInterpreter();
-
- // Adjust python's path
- PyObject *syspath = PySys_GetObject("path");
- PyObject *path = PyString_FromString(GLUSTER_PYTHON_PATH);
- PyList_Append(syspath, path);
- Py_DECREF(path);
-
- gf_log("python", GF_LOG_DEBUG,
- "Loading gluster module");
-
- priv->pGlusterModule = PyImport_ImportModule("gluster");
- if (PyErr_Occurred())
- {
- PyErr_Print();
- return -1;
- }
-
- priv->pFrameType = PyObject_GetAttrString(priv->pGlusterModule, "call_frame_t");
- priv->pFdType = PyObject_GetAttrString(priv->pGlusterModule, "fd_t");
- priv->pVectorType = PyObject_GetAttrString(priv->pGlusterModule, "iovec");
-
- gf_log("python", GF_LOG_DEBUG, "Loading script...%s", priv->scriptname);
-
- priv->pScriptModule = AnonModule_FromFile(priv->scriptname);
- if (!priv->pScriptModule || PyErr_Occurred())
- {
- gf_log("python", GF_LOG_ERROR, "Error loading %s", priv->scriptname);
- PyErr_Print();
- return -1;
- }
-
- if (!PyObject_HasAttrString(priv->pScriptModule, "xlator"))
- {
- gf_log("python", GF_LOG_ERROR, "%s does not have a xlator attribute", priv->scriptname);
- return -1;
- }
- gf_log("python", GF_LOG_DEBUG, "Instantiating translator");
- priv->pXlator = PyObject_CallMethod(priv->pScriptModule, "xlator", "O&",
- PyLong_FromVoidPtr, this);
- if (PyErr_Occurred() || !priv->pXlator)
- {
- PyErr_Print();
- return -1;
- }
-
- this->private = priv;
-
- gf_log ("python", GF_LOG_DEBUG, "python xlator loaded");
- return 0;
-}
-
-void
-fini (xlator_t *this)
-{
- python_private_t *priv = (python_private_t*)(this->private);
- Py_DECREF(priv->pXlator);
- Py_DECREF(priv->pScriptModule);
- Py_DECREF(priv->pGlusterModule);
- Py_DECREF(priv->pFrameType);
- Py_DECREF(priv->pFdType);
- Py_DECREF(priv->pVectorType);
- Py_EndInterpreter(priv->pInterp);
- return;
-}
diff --git a/xlators/bindings/python/src/testxlator.py b/xlators/bindings/python/src/testxlator.py
deleted file mode 100644
index 507455c856a..00000000000
--- a/xlators/bindings/python/src/testxlator.py
+++ /dev/null
@@ -1,56 +0,0 @@
-# Copyright (c) 2007 Chris AtLee <chris@atlee.ca>
-# This file is part of GlusterFS.
-#
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-#
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
-
-"""
-This is a test translator written in python.
-
-Important things to note:
- This file must be import-able from glusterfsd. This probably means
- setting PYTHONPATH to where this file is located.
-
- This file must have a top-level xlator class object that will be
- used to instantiate individual translators.
-"""
-from gluster import *
-
-class MyXlator(ComplexTranslator):
- name = "MyXlator"
- def writev_cbk(self, frame, cookie, op_ret, op_errno, buf):
- stack_unwind(frame, op_ret, op_errno, buf)
- return 0
-
- def writev(self, frame, fd, vector, count, offset):
- gf_log(self.name, GF_LOG_WARNING, "writev %i bytes", vector.iov_len)
- # TODO: Use cookie to pass this to writev_cbk
- old_count = vector.iov_len
-
- data = vector.getBytes().encode("zlib")
-
- vector = iovec(data)
- gf_log(self.name, GF_LOG_WARNING, "writev %i bytes", vector.iov_len)
-
- @ret_fn_t
- def rfn(frame, prev, this, op_ret, op_errno, *params):
- if len(params) == 0:
- params = [0]
- return self.writev_cbk(frame, prev, old_count, op_errno, *params)
-
- stack_wind(frame, rfn, self.firstChild,
- self.firstChild[0].fops[0].writev, fd, vector, count, offset)
- return 0
-
-xlator = MyXlator
diff --git a/xlators/cluster/Makefile.am b/xlators/cluster/Makefile.am
index 0990822a7d3..8e067d5ab58 100644
--- a/xlators/cluster/Makefile.am
+++ b/xlators/cluster/Makefile.am
@@ -1,3 +1,3 @@
-SUBDIRS = stripe afr dht
+SUBDIRS = afr dht ec
CLEANFILES =
diff --git a/xlators/cluster/afr/src/Makefile.am b/xlators/cluster/afr/src/Makefile.am
index 95db5dd9645..610819b28fc 100644
--- a/xlators/cluster/afr/src/Makefile.am
+++ b/xlators/cluster/afr/src/Makefile.am
@@ -1,35 +1,35 @@
-xlator_LTLIBRARIES = afr.la pump.la
+xlator_LTLIBRARIES = afr.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster
afr_common_source = afr-dir-read.c afr-dir-write.c afr-inode-read.c \
- afr-inode-write.c afr-open.c afr-transaction.c afr-self-heal-data.c \
- afr-self-heal-common.c afr-self-heal-metadata.c afr-self-heal-entry.c \
- afr-self-heal-algorithm.c afr-lk-common.c afr-self-heald.c \
+ afr-inode-write.c afr-open.c afr-transaction.c afr-lk-common.c \
+ afr-read-txn.c \
$(top_builddir)/xlators/lib/src/libxlator.c
-afr_la_LDFLAGS = -module -avoidversion
-afr_la_SOURCES = $(afr_common_source) afr.c
-afr_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+AFR_SELFHEAL_SOURCES = afr-self-heal-common.c afr-self-heal-data.c \
+ afr-self-heal-entry.c afr-self-heal-metadata.c afr-self-heald.c \
+ afr-self-heal-name.c
-pump_la_LDFLAGS = -module -avoidversion
-pump_la_SOURCES = $(afr_common_source) pump.c
-pump_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+afr_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+afr_la_SOURCES = $(afr_common_source) $(AFR_SELFHEAL_SOURCES) afr.c
+afr_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
noinst_HEADERS = afr.h afr-transaction.h afr-inode-write.h afr-inode-read.h \
- afr-dir-read.h afr-dir-write.h afr-self-heal.h afr-self-heal-common.h \
- afr-self-heal-algorithm.h pump.h afr-mem-types.h afr-common.c \
- afr-self-heald.h $(top_builddir)/xlators/lib/src/libxlator.h \
- $(top_builddir)/glusterfsd/src/glusterfsd.h
+ afr-dir-read.h afr-dir-write.h afr-self-heal.h afr-mem-types.h \
+ afr-common.c afr-self-heald.h \
+ $(top_builddir)/xlators/lib/src/libxlator.h afr-messages.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
+AM_CPPFLAGS = $(GF_CPPFLAGS) \
-I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/xlators/lib/src \
- -I$(top_srcdir)/rpc/rpc-lib/src -shared -nostartfiles $(GF_CFLAGS)
+ -I$(top_srcdir)/rpc/rpc-lib/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
uninstall-local:
rm -f $(DESTDIR)$(xlatordir)/replicate.so
- rm -f $(DESTDIR)$(xlatordir)/pump.so
install-data-hook:
ln -sf afr.so $(DESTDIR)$(xlatordir)/replicate.so
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 0e1b6dce682..032ab5c8001 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -15,29 +15,20 @@
#include <stdlib.h>
#include <signal.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
+#include <glusterfs/glusterfs.h>
#include "afr.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "list.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
-#include "byte-order.h"
-#include "statedump.h"
-#include "inode.h"
-
-#include "fd.h"
+#include <glusterfs/dict.h>
+#include <glusterfs/hashfn.h>
+#include <glusterfs/list.h>
+#include <glusterfs/call-stub.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/statedump.h>
+#include <glusterfs/events.h>
+#include <glusterfs/upcall-utils.h>
#include "afr-inode-read.h"
#include "afr-inode-write.h"
@@ -45,4022 +36,7843 @@
#include "afr-dir-write.h"
#include "afr-transaction.h"
#include "afr-self-heal.h"
-#include "afr-self-heal-common.h"
#include "afr-self-heald.h"
-#include "pump.h"
-
-#define AFR_ICTX_OPENDIR_DONE_MASK 0x0000000200000000ULL
-#define AFR_ICTX_SPLIT_BRAIN_MASK 0x0000000100000000ULL
-#define AFR_ICTX_READ_CHILD_MASK 0x00000000FFFFFFFFULL
-
-int
-afr_lookup_done_success_action (call_frame_t *frame, xlator_t *this,
- gf_boolean_t fail_conflict);
-void
-afr_children_copy (int32_t *dst, int32_t *src, unsigned int child_count)
-{
- int i = 0;
-
- for (i = 0; i < child_count; i++)
- dst[i] = src[i];
-}
+#include "afr-messages.h"
-void
-afr_xattr_req_prepare (xlator_t *this, dict_t *xattr_req, const char *path)
+int32_t
+afr_quorum_errno(afr_private_t *priv)
{
- int i = 0;
- afr_private_t *priv = NULL;
- int ret = 0;
-
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_set_uint64 (xattr_req, priv->pending_key[i],
- 3 * sizeof(int32_t));
- if (ret < 0)
- gf_log (this->name, GF_LOG_WARNING,
- "%s: Unable to set dict value for %s",
- path, priv->pending_key[i]);
- /* 3 = data+metadata+entry */
- }
- ret = dict_set_int32 (xattr_req, GF_GFIDLESS_LOOKUP, 1);
- if (ret) {
- gf_log (this->name, GF_LOG_DEBUG, "%s: failed to set gfidless "
- "lookup", path);
- }
+ return ENOTCONN;
}
-int
-afr_lookup_xattr_req_prepare (afr_local_t *local, xlator_t *this,
- dict_t *xattr_req, loc_t *loc, void **gfid_req)
+gf_boolean_t
+afr_is_private_directory(afr_private_t *priv, uuid_t pargfid, const char *name,
+ pid_t pid)
{
- int ret = -ENOMEM;
+ if (!__is_root_gfid(pargfid)) {
+ return _gf_false;
+ }
- GF_ASSERT (gfid_req);
+ if (strcmp(name, GF_REPLICATE_TRASH_DIR) == 0) {
+ /*For backward compatibility /.landfill is private*/
+ return _gf_true;
+ }
- *gfid_req = NULL;
- local->xattr_req = dict_new ();
- if (!local->xattr_req)
- goto out;
- if (xattr_req)
- dict_copy (xattr_req, local->xattr_req);
+ if (pid == GF_CLIENT_PID_GSYNCD) {
+ /*geo-rep needs to create/sync private directory on slave because
+ * it appears in changelog*/
+ return _gf_false;
+ }
- afr_xattr_req_prepare (this, local->xattr_req, loc->path);
- ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_INODELK_COUNT, 0);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: Unable to set dict value for %s",
- loc->path, GLUSTERFS_INODELK_COUNT);
- }
- ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_ENTRYLK_COUNT, 0);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: Unable to set dict value for %s",
- loc->path, GLUSTERFS_ENTRYLK_COUNT);
+ if (pid == GF_CLIENT_PID_GLFS_HEAL || pid == GF_CLIENT_PID_SELF_HEALD) {
+ if (strcmp(name, priv->anon_inode_name) == 0) {
+ /* anonymous-inode dir is private*/
+ return _gf_true;
}
-
- ret = dict_set_uint32 (local->xattr_req, GLUSTERFS_PARENT_ENTRYLK, 0);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: Unable to set dict value for %s",
- loc->path, GLUSTERFS_PARENT_ENTRYLK);
+ } else {
+ if (strncmp(name, AFR_ANON_DIR_PREFIX, strlen(AFR_ANON_DIR_PREFIX)) ==
+ 0) {
+ /* anonymous-inode dir prefix is private for geo-rep to work*/
+ return _gf_true;
}
+ }
- ret = dict_get_ptr (local->xattr_req, "gfid-req", gfid_req);
- if (ret) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s: failed to get the gfid from dict", loc->path);
- *gfid_req = NULL;
- } else {
- if (loc->parent != NULL)
- dict_del (local->xattr_req, "gfid-req");
- }
- ret = 0;
-out:
- return ret;
+ return _gf_false;
}
void
-afr_lookup_save_gfid (uuid_t dst, void* new, const loc_t *loc)
+afr_fill_success_replies(afr_local_t *local, afr_private_t *priv,
+ unsigned char *replies)
{
- inode_t *inode = NULL;
+ int i = 0;
- inode = loc->inode;
- if (inode && !uuid_is_null (inode->gfid))
- uuid_copy (dst, inode->gfid);
- else if (!uuid_is_null (loc->gfid))
- uuid_copy (dst, loc->gfid);
- else if (new && !uuid_is_null (new))
- uuid_copy (dst, new);
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].valid && local->replies[i].op_ret == 0) {
+ replies[i] = 1;
+ } else {
+ replies[i] = 0;
+ }
+ }
}
int
-afr_errno_count (int32_t *children, int *child_errno,
- unsigned int child_count, int32_t op_errno)
-{
- int i = 0;
- int errno_count = 0;
- int child = 0;
+afr_fav_child_reset_sink_xattrs(void *opaque);
- for (i = 0; i < child_count; i++) {
- if (children) {
- child = children[i];
- if (child == -1)
- break;
- } else {
- child = i;
- }
- if (child_errno[child] == op_errno)
- errno_count++;
- }
- return errno_count;
-}
+int
+afr_fav_child_reset_sink_xattrs_cbk(int ret, call_frame_t *frame, void *opaque);
-int32_t
-afr_set_dict_gfid (dict_t *dict, uuid_t gfid)
-{
- int ret = 0;
- uuid_t *pgfid = NULL;
+static void
+afr_discover_done(call_frame_t *frame, xlator_t *this);
- GF_ASSERT (gfid);
+int
+afr_dom_lock_acquire_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ afr_local_t *local = frame->local;
+ afr_private_t *priv = this->private;
+ int i = (long)cookie;
+
+ local->cont.lk.dom_lock_op_ret[i] = op_ret;
+ local->cont.lk.dom_lock_op_errno[i] = op_errno;
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_LK_HEAL_DOM,
+ "%s: Failed to acquire %s on %s",
+ uuid_utoa(local->fd->inode->gfid), AFR_LK_HEAL_DOM,
+ priv->children[i]->name);
+ } else {
+ local->cont.lk.dom_locked_nodes[i] = 1;
+ }
+
+ syncbarrier_wake(&local->barrier);
+
+ return 0;
+}
- pgfid = GF_CALLOC (1, sizeof (uuid_t), gf_common_mt_char);
- if (!pgfid) {
- ret = -1;
- goto out;
+int
+afr_dom_lock_acquire(call_frame_t *frame)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ struct gf_flock flock = {
+ 0,
+ };
+ int i = 0;
+
+ priv = frame->this->private;
+ local = frame->local;
+ local->cont.lk.dom_locked_nodes = GF_CALLOC(
+ priv->child_count, sizeof(*local->cont.lk.locked_nodes),
+ gf_afr_mt_char);
+ if (!local->cont.lk.dom_locked_nodes) {
+ return -ENOMEM;
+ }
+ local->cont.lk.dom_lock_op_ret = GF_CALLOC(
+ priv->child_count, sizeof(*local->cont.lk.dom_lock_op_ret),
+ gf_afr_mt_int32_t);
+ if (!local->cont.lk.dom_lock_op_ret) {
+ return -ENOMEM; /* CALLOC'd members are freed in afr_local_cleanup. */
+ }
+ local->cont.lk.dom_lock_op_errno = GF_CALLOC(
+ priv->child_count, sizeof(*local->cont.lk.dom_lock_op_errno),
+ gf_afr_mt_int32_t);
+ if (!local->cont.lk.dom_lock_op_errno) {
+ return -ENOMEM; /* CALLOC'd members are freed in afr_local_cleanup. */
+ }
+ flock.l_type = F_WRLCK;
+
+ AFR_ONALL(frame, afr_dom_lock_acquire_cbk, finodelk, AFR_LK_HEAL_DOM,
+ local->fd, F_SETLK, &flock, NULL);
+
+ if (!afr_has_quorum(local->cont.lk.dom_locked_nodes, frame->this, NULL))
+ goto blocking_lock;
+
+ /*If any of the bricks returned EAGAIN, we still need blocking locks.*/
+ if (AFR_COUNT(local->cont.lk.dom_locked_nodes, priv->child_count) !=
+ priv->child_count) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->cont.lk.dom_lock_op_ret[i] == -1 &&
+ local->cont.lk.dom_lock_op_errno[i] == EAGAIN)
+ goto blocking_lock;
}
+ }
- uuid_copy (*pgfid, gfid);
+ return 0;
- ret = dict_set_dynptr (dict, "gfid-req", pgfid, sizeof (uuid_t));
- if (ret)
- gf_log (THIS->name, GF_LOG_ERROR, "gfid set failed");
-
-out:
- if (ret && pgfid)
- GF_FREE (pgfid);
+blocking_lock:
+ afr_dom_lock_release(frame);
+ AFR_ONALL(frame, afr_dom_lock_acquire_cbk, finodelk, AFR_LK_HEAL_DOM,
+ local->fd, F_SETLKW, &flock, NULL);
+ if (!afr_has_quorum(local->cont.lk.dom_locked_nodes, frame->this, NULL)) {
+ afr_dom_lock_release(frame);
+ return -afr_quorum_errno(priv);
+ }
- return ret;
+ return 0;
}
-afr_inode_ctx_t*
-afr_inode_ctx_get_from_addr (uint64_t addr, int32_t child_count)
+int
+afr_dom_lock_release_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
{
- int ret = -1;
- afr_inode_ctx_t *ctx = NULL;
- size_t size = 0;
+ afr_local_t *local = frame->local;
+ afr_private_t *priv = this->private;
+ int i = (long)cookie;
- GF_ASSERT (child_count > 0);
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_LK_HEAL_DOM,
+ "%s: Failed to release %s on %s", local->loc.path,
+ AFR_LK_HEAL_DOM, priv->children[i]->name);
+ }
+ local->cont.lk.dom_locked_nodes[i] = 0;
- if (!addr) {
- ctx = GF_CALLOC (1, sizeof (*ctx),
- gf_afr_mt_inode_ctx_t);
- if (!ctx)
- goto out;
- size = sizeof (*ctx->fresh_children);
- ctx->fresh_children = GF_CALLOC (child_count, size,
- gf_afr_mt_int32_t);
- if (!ctx->fresh_children)
- goto out;
- } else {
- ctx = (afr_inode_ctx_t*) (long) addr;
- }
- ret = 0;
-out:
- if (ret && ctx) {
- if (ctx->fresh_children)
- GF_FREE (ctx->fresh_children);
- GF_FREE (ctx);
- ctx = NULL;
- }
- return ctx;
+ syncbarrier_wake(&local->barrier);
+
+ return 0;
}
void
-afr_inode_get_ctx (xlator_t *this, inode_t *inode, afr_inode_params_t *params)
+afr_dom_lock_release(call_frame_t *frame)
{
- GF_ASSERT (inode);
- GF_ASSERT (params);
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ unsigned char *locked_on = NULL;
+ struct gf_flock flock = {
+ 0,
+ };
+
+ local = frame->local;
+ priv = frame->this->private;
+ locked_on = local->cont.lk.dom_locked_nodes;
+ if (AFR_COUNT(locked_on, priv->child_count) == 0)
+ return;
+ flock.l_type = F_UNLCK;
- int ret = 0;
- afr_inode_ctx_t *ctx = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- uint64_t ctx_addr = 0;
- int32_t read_child = -1;
- int32_t *fresh_children = NULL;
+ AFR_ONLIST(locked_on, frame, afr_dom_lock_release_cbk, finodelk,
+ AFR_LK_HEAL_DOM, local->fd, F_SETLK, &flock, NULL);
- priv = this->private;
- LOCK (&inode->lock);
- {
- ret = __inode_ctx_get (inode, this, &ctx_addr);
- if (ret < 0)
- goto unlock;
- ctx = afr_inode_ctx_get_from_addr (ctx_addr, priv->child_count);
- if (!ctx)
- goto unlock;
- switch (params->op) {
- case AFR_INODE_GET_READ_CTX:
- fresh_children = params->u.read_ctx.children;
- read_child = (int32_t)(ctx->masks &
- AFR_ICTX_READ_CHILD_MASK);
- params->u.read_ctx.read_child = read_child;
- if (!fresh_children)
- goto unlock;
- for (i = 0; i < priv->child_count; i++)
- fresh_children[i] = ctx->fresh_children[i];
- break;
- case AFR_INODE_GET_OPENDIR_DONE:
- params->u.value = _gf_false;
- if (ctx->masks & AFR_ICTX_OPENDIR_DONE_MASK)
- params->u.value = _gf_true;
- break;
- case AFR_INODE_GET_SPLIT_BRAIN:
- params->u.value = _gf_false;
- if (ctx->masks & AFR_ICTX_SPLIT_BRAIN_MASK)
- params->u.value = _gf_true;
- ;
- break;
- default:
- GF_ASSERT (0);
- break;
- }
- }
-unlock:
- UNLOCK (&inode->lock);
+ return;
}
-gf_boolean_t
-afr_is_split_brain (xlator_t *this, inode_t *inode)
+static void
+afr_lk_heal_info_cleanup(afr_lk_heal_info_t *info)
{
- afr_inode_params_t params = {0};
-
- params.op = AFR_INODE_GET_SPLIT_BRAIN;
- afr_inode_get_ctx (this, inode, &params);
- return params.u.value;
+ if (!info)
+ return;
+ if (info->xdata_req)
+ dict_unref(info->xdata_req);
+ if (info->fd)
+ fd_unref(info->fd);
+ GF_FREE(info->locked_nodes);
+ GF_FREE(info->child_up_event_gen);
+ GF_FREE(info->child_down_event_gen);
+ GF_FREE(info);
}
-gf_boolean_t
-afr_is_opendir_done (xlator_t *this, inode_t *inode)
+static int
+afr_add_lock_to_saved_locks(call_frame_t *frame, xlator_t *this)
{
- afr_inode_params_t params = {0};
-
- params.op = AFR_INODE_GET_OPENDIR_DONE;
- afr_inode_get_ctx (this, inode, &params);
- return params.u.value;
+ afr_private_t *priv = this->private;
+ afr_local_t *local = frame->local;
+ afr_lk_heal_info_t *info = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int ret = -ENOMEM;
+
+ info = GF_CALLOC(sizeof(*info), 1, gf_afr_mt_lk_heal_info_t);
+ if (!info) {
+ goto cleanup;
+ }
+ INIT_LIST_HEAD(&info->pos);
+ info->fd = fd_ref(local->fd);
+ info->cmd = local->cont.lk.cmd;
+ info->pid = frame->root->pid;
+ info->flock = local->cont.lk.user_flock;
+ info->xdata_req = dict_copy_with_ref(local->xdata_req, NULL);
+ if (!info->xdata_req) {
+ goto cleanup;
+ }
+ info->lk_owner = frame->root->lk_owner;
+ info->locked_nodes = GF_MALLOC(
+ sizeof(*info->locked_nodes) * priv->child_count, gf_afr_mt_char);
+ if (!info->locked_nodes) {
+ goto cleanup;
+ }
+ memcpy(info->locked_nodes, local->cont.lk.locked_nodes,
+ sizeof(*info->locked_nodes) * priv->child_count);
+ info->child_up_event_gen = GF_CALLOC(sizeof(*info->child_up_event_gen),
+ priv->child_count, gf_afr_mt_int32_t);
+ if (!info->child_up_event_gen) {
+ goto cleanup;
+ }
+ info->child_down_event_gen = GF_CALLOC(sizeof(*info->child_down_event_gen),
+ priv->child_count,
+ gf_afr_mt_int32_t);
+ if (!info->child_down_event_gen) {
+ goto cleanup;
+ }
+
+ LOCK(&local->fd->lock);
+ {
+ fd_ctx = __afr_fd_ctx_get(local->fd, this);
+ if (fd_ctx)
+ fd_ctx->lk_heal_info = info;
+ }
+ UNLOCK(&local->fd->lock);
+ if (!fd_ctx) {
+ goto cleanup;
+ }
+
+ LOCK(&priv->lock);
+ {
+ list_add_tail(&info->pos, &priv->saved_locks);
+ }
+ UNLOCK(&priv->lock);
+
+ return 0;
+cleanup:
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_LK_HEAL_DOM,
+ "%s: Failed to add lock to healq",
+ uuid_utoa(local->fd->inode->gfid));
+ if (info) {
+ afr_lk_heal_info_cleanup(info);
+ if (fd_ctx) {
+ LOCK(&local->fd->lock);
+ {
+ fd_ctx->lk_heal_info = NULL;
+ }
+ UNLOCK(&local->fd->lock);
+ }
+ }
+ return ret;
}
-
-int32_t
-afr_inode_get_read_ctx (xlator_t *this, inode_t *inode, int32_t *fresh_children)
+static int
+afr_remove_lock_from_saved_locks(afr_local_t *local, xlator_t *this)
{
- afr_inode_params_t params = {0};
-
- params.op = AFR_INODE_GET_READ_CTX;
- params.u.read_ctx.children = fresh_children;
- afr_inode_get_ctx (this, inode, &params);
- return params.u.read_ctx.read_child;
+ afr_private_t *priv = this->private;
+ struct gf_flock flock = local->cont.lk.user_flock;
+ afr_lk_heal_info_t *info = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int ret = -EINVAL;
+
+ fd_ctx = afr_fd_ctx_get(local->fd, this);
+ if (!fd_ctx || !fd_ctx->lk_heal_info) {
+ goto out;
+ }
+
+ info = fd_ctx->lk_heal_info;
+ if ((info->flock.l_start != flock.l_start) ||
+ (info->flock.l_whence != flock.l_whence) ||
+ (info->flock.l_len != flock.l_len)) {
+ /*TODO: Compare lkowners too.*/
+ goto out;
+ }
+
+ LOCK(&priv->lock);
+ {
+ list_del(&fd_ctx->lk_heal_info->pos);
+ }
+ UNLOCK(&priv->lock);
+
+ afr_lk_heal_info_cleanup(info);
+ fd_ctx->lk_heal_info = NULL;
+ ret = 0;
+out:
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_LK_HEAL_DOM,
+ "%s: Failed to remove lock from healq",
+ uuid_utoa(local->fd->inode->gfid));
+ return ret;
}
-void
-afr_inode_ctx_set_read_child (afr_inode_ctx_t *ctx, int32_t read_child)
+int
+afr_lock_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
{
- uint64_t remaining_mask = 0;
- uint64_t mask = 0;
-
- remaining_mask = (~AFR_ICTX_READ_CHILD_MASK & ctx->masks);
- mask = (AFR_ICTX_READ_CHILD_MASK & read_child);
- ctx->masks = remaining_mask | mask;
+ afr_local_t *local = frame->local;
+ int i = (long)cookie;
+
+ local->replies[i].valid = 1;
+ local->replies[i].op_ret = op_ret;
+ local->replies[i].op_errno = op_errno;
+ if (op_ret != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_LK_HEAL_DOM,
+ "Failed to heal lock on child %d for %s", i,
+ uuid_utoa(local->fd->inode->gfid));
+ }
+ syncbarrier_wake(&local->barrier);
+ return 0;
}
-void
-afr_inode_ctx_set_read_ctx (afr_inode_ctx_t *ctx, int32_t read_child,
- int32_t *fresh_children, int32_t child_count)
+int
+afr_getlk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
{
- int i = 0;
+ afr_local_t *local = frame->local;
+ int i = (long)cookie;
+
+ local->replies[i].valid = 1;
+ local->replies[i].op_ret = op_ret;
+ local->replies[i].op_errno = op_errno;
+ if (op_ret != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_LK_HEAL_DOM,
+ "Failed getlk for %s", uuid_utoa(local->fd->inode->gfid));
+ } else {
+ local->cont.lk.getlk_rsp[i] = *lock;
+ }
+
+ syncbarrier_wake(&local->barrier);
+ return 0;
+}
- afr_inode_ctx_set_read_child (ctx, read_child);
- for (i = 0; i < child_count; i++) {
- if (fresh_children)
- ctx->fresh_children[i] = fresh_children[i];
- else
- ctx->fresh_children[i] = -1;
- }
+static gf_boolean_t
+afr_does_lk_owner_match(call_frame_t *frame, afr_private_t *priv,
+ afr_lk_heal_info_t *info)
+{
+ int i = 0;
+ afr_local_t *local = frame->local;
+ struct gf_flock flock = {
+ 0,
+ };
+ gf_boolean_t ret = _gf_true;
+ char *wind_on = alloca0(priv->child_count);
+ unsigned char *success_replies = alloca0(priv->child_count);
+ local->cont.lk.getlk_rsp = GF_CALLOC(sizeof(*local->cont.lk.getlk_rsp),
+ priv->child_count, gf_afr_mt_gf_lock);
+
+ flock = info->flock;
+ for (i = 0; i < priv->child_count; i++) {
+ if (info->locked_nodes[i])
+ wind_on[i] = 1;
+ }
+
+ AFR_ONLIST(wind_on, frame, afr_getlk_cbk, lk, info->fd, F_GETLK, &flock,
+ info->xdata_req);
+
+ afr_fill_success_replies(local, priv, success_replies);
+ if (AFR_COUNT(success_replies, priv->child_count) == 0) {
+ ret = _gf_false;
+ goto out;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid || local->replies[i].op_ret != 0)
+ continue;
+ if (local->cont.lk.getlk_rsp[i].l_type == F_UNLCK)
+ continue;
+ /*TODO: Do we really need to compare lkowner if F_UNLCK is true?*/
+ if (!is_same_lkowner(&local->cont.lk.getlk_rsp[i].l_owner,
+ &info->lk_owner)) {
+ ret = _gf_false;
+ break;
+ }
+ }
+out:
+ afr_local_replies_wipe(local, priv);
+ GF_FREE(local->cont.lk.getlk_rsp);
+ local->cont.lk.getlk_rsp = NULL;
+ return ret;
}
-void
-afr_inode_ctx_rm_stale_children (afr_inode_ctx_t *ctx, int32_t *stale_children,
- int32_t child_count)
+static void
+afr_mark_fd_bad(fd_t *fd, xlator_t *this)
{
- int i = 0;
- int32_t read_child = -1;
+ afr_fd_ctx_t *fd_ctx = NULL;
- GF_ASSERT (stale_children);
- for (i = 0; i < child_count; i++) {
- if (stale_children[i] == -1)
- break;
- afr_children_rm_child (ctx->fresh_children,
- stale_children[i], child_count);
+ if (!fd)
+ return;
+ LOCK(&fd->lock);
+ {
+ fd_ctx = __afr_fd_ctx_get(fd, this);
+ if (fd_ctx) {
+ fd_ctx->is_fd_bad = _gf_true;
+ fd_ctx->lk_heal_info = NULL;
}
- read_child = (int32_t)(ctx->masks & AFR_ICTX_READ_CHILD_MASK);
- if (!afr_is_child_present (ctx->fresh_children, child_count,
- read_child))
- afr_inode_ctx_set_read_child (ctx, ctx->fresh_children[0]);
+ }
+ UNLOCK(&fd->lock);
}
-void
-afr_inode_ctx_set_opendir_done (afr_inode_ctx_t *ctx)
+static void
+afr_add_lock_to_lkhealq(afr_private_t *priv, afr_lk_heal_info_t *info)
{
- uint64_t remaining_mask = 0;
- uint64_t mask = 0;
-
- remaining_mask = (~AFR_ICTX_OPENDIR_DONE_MASK & ctx->masks);
- mask = (0xFFFFFFFFFFFFFFFFULL & AFR_ICTX_OPENDIR_DONE_MASK);
- ctx->masks = remaining_mask | mask;
+ LOCK(&priv->lock);
+ {
+ list_del(&info->pos);
+ list_add_tail(&info->pos, &priv->lk_healq);
+ }
+ UNLOCK(&priv->lock);
}
-void
-afr_inode_ctx_set_splitbrain (afr_inode_ctx_t *ctx, gf_boolean_t set)
+static void
+afr_lock_heal_do(call_frame_t *frame, afr_private_t *priv,
+ afr_lk_heal_info_t *info)
{
- uint64_t remaining_mask = 0;
- uint64_t mask = 0;
-
- if (set) {
- remaining_mask = (~AFR_ICTX_SPLIT_BRAIN_MASK & ctx->masks);
- mask = (0xFFFFFFFFFFFFFFFFULL & AFR_ICTX_SPLIT_BRAIN_MASK);
- ctx->masks = remaining_mask | mask;
- } else {
- ctx->masks = (~AFR_ICTX_SPLIT_BRAIN_MASK & ctx->masks);
- }
+ int i = 0;
+ int op_errno = 0;
+ int32_t *current_event_gen = NULL;
+ afr_local_t *local = frame->local;
+ xlator_t *this = frame->this;
+ char *wind_on = alloca0(priv->child_count);
+ gf_boolean_t retry = _gf_true;
+
+ frame->root->pid = info->pid;
+ lk_owner_copy(&frame->root->lk_owner, &info->lk_owner);
+
+ op_errno = -afr_dom_lock_acquire(frame);
+ if ((op_errno != 0)) {
+ goto release;
+ }
+
+ if (!afr_does_lk_owner_match(frame, priv, info)) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_LK_HEAL_DOM,
+ "Ignoring lock heal for %s since lk-onwers mismatch. "
+ "Lock possibly pre-empted by another client.",
+ uuid_utoa(info->fd->inode->gfid));
+ goto release;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (info->locked_nodes[i])
+ continue;
+ wind_on[i] = 1;
+ }
+
+ current_event_gen = alloca(priv->child_count);
+ memcpy(current_event_gen, info->child_up_event_gen,
+ priv->child_count * sizeof *current_event_gen);
+ AFR_ONLIST(wind_on, frame, afr_lock_heal_cbk, lk, info->fd, info->cmd,
+ &info->flock, info->xdata_req);
+
+ LOCK(&priv->lock);
+ {
+ for (i = 0; i < priv->child_count; i++) {
+ if (!wind_on[i])
+ continue;
+ if ((!local->replies[i].valid) || (local->replies[i].op_ret != 0)) {
+ continue;
+ }
+
+ if ((current_event_gen[i] == info->child_up_event_gen[i]) &&
+ (current_event_gen[i] > info->child_down_event_gen[i])) {
+ info->locked_nodes[i] = 1;
+ retry = _gf_false;
+ list_del_init(&info->pos);
+ list_add_tail(&info->pos, &priv->saved_locks);
+ } else {
+ /*We received subsequent child up/down events while heal was in
+ * progress; don't mark child as healed. Attempt again on the
+ * new child up*/
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_LK_HEAL_DOM,
+ "Event gen mismatch: skipped healing lock on child %d "
+ "for %s.",
+ i, uuid_utoa(info->fd->inode->gfid));
+ }
+ }
+ }
+ UNLOCK(&priv->lock);
+
+release:
+ afr_dom_lock_release(frame);
+ if (retry)
+ afr_add_lock_to_lkhealq(priv, info);
+ return;
}
-void
-afr_inode_set_ctx (xlator_t *this, inode_t *inode, afr_inode_params_t *params)
-{
- GF_ASSERT (inode);
- GF_ASSERT (params);
-
- int ret = 0;
- afr_inode_ctx_t *ctx = NULL;
- afr_private_t *priv = NULL;
- uint64_t ctx_addr = 0;
- gf_boolean_t set = _gf_false;
- int32_t read_child = -1;
- int32_t *fresh_children = NULL;
- int32_t *stale_children = NULL;
-
- priv = this->private;
- LOCK (&inode->lock);
- {
- ret = __inode_ctx_get (inode, this, &ctx_addr);
- if (ret < 0)
- ctx_addr = 0;
- ctx = afr_inode_ctx_get_from_addr (ctx_addr, priv->child_count);
- if (!ctx)
- goto unlock;
- switch (params->op) {
- case AFR_INODE_SET_READ_CTX:
- read_child = params->u.read_ctx.read_child;
- fresh_children = params->u.read_ctx.children;
- afr_inode_ctx_set_read_ctx (ctx, read_child,
- fresh_children,
- priv->child_count);
- break;
- case AFR_INODE_RM_STALE_CHILDREN:
- stale_children = params->u.read_ctx.children;
- afr_inode_ctx_rm_stale_children (ctx,
- stale_children,
- priv->child_count);
- break;
- case AFR_INODE_SET_OPENDIR_DONE:
- afr_inode_ctx_set_opendir_done (ctx);
- break;
- case AFR_INODE_SET_SPLIT_BRAIN:
- set = params->u.value;
- afr_inode_ctx_set_splitbrain (ctx, set);
- break;
- default:
- GF_ASSERT (0);
- break;
- }
- ret = __inode_ctx_put (inode, this, (uint64_t)ctx);
- if (ret) {
- gf_log_callingfn (this->name, GF_LOG_ERROR, "failed to "
- "set the inode ctx (%s)",
- uuid_utoa (inode->gfid));
- }
- }
-unlock:
- UNLOCK (&inode->lock);
+static int
+afr_lock_heal_done(int ret, call_frame_t *frame, void *opaque)
+{
+ STACK_DESTROY(frame->root);
+ return 0;
}
-void
-afr_set_split_brain (xlator_t *this, inode_t *inode, gf_boolean_t set)
+static int
+afr_lock_heal(void *opaque)
{
- afr_inode_params_t params = {0};
-
- params.op = AFR_INODE_SET_SPLIT_BRAIN;
- params.u.value = set;
- afr_inode_set_ctx (this, inode, &params);
+ call_frame_t *frame = (call_frame_t *)opaque;
+ call_frame_t *iter_frame = NULL;
+ xlator_t *this = frame->this;
+ afr_private_t *priv = this->private;
+ afr_lk_heal_info_t *info = NULL;
+ afr_lk_heal_info_t *tmp = NULL;
+ struct list_head healq = {
+ 0,
+ };
+ int ret = 0;
+
+ iter_frame = afr_copy_frame(frame);
+ if (!iter_frame) {
+ return ENOMEM;
+ }
+
+ INIT_LIST_HEAD(&healq);
+ LOCK(&priv->lock);
+ {
+ list_splice_init(&priv->lk_healq, &healq);
+ }
+ UNLOCK(&priv->lock);
+
+ list_for_each_entry_safe(info, tmp, &healq, pos)
+ {
+ GF_ASSERT((AFR_COUNT(info->locked_nodes, priv->child_count) <
+ priv->child_count));
+ ((afr_local_t *)(iter_frame->local))->fd = fd_ref(info->fd);
+ afr_lock_heal_do(iter_frame, priv, info);
+ AFR_STACK_RESET(iter_frame);
+ if (iter_frame->local == NULL) {
+ ret = ENOTCONN;
+ gf_msg(frame->this->name, GF_LOG_ERROR, ENOTCONN,
+ AFR_MSG_LK_HEAL_DOM,
+ "Aborting processing of lk_healq."
+ "Healing will be reattempted on next child up for locks "
+ "that are still in quorum.");
+ LOCK(&priv->lock);
+ {
+ list_add_tail(&healq, &priv->lk_healq);
+ }
+ UNLOCK(&priv->lock);
+ break;
+ }
+ }
+
+ AFR_STACK_DESTROY(iter_frame);
+ return ret;
}
-void
-afr_set_opendir_done (xlator_t *this, inode_t *inode)
+static int
+__afr_lock_heal_synctask(xlator_t *this, afr_private_t *priv, int child)
{
- afr_inode_params_t params = {0};
+ int ret = 0;
+ call_frame_t *frame = NULL;
+ afr_lk_heal_info_t *info = NULL;
+ afr_lk_heal_info_t *tmp = NULL;
- params.op = AFR_INODE_SET_OPENDIR_DONE;
- afr_inode_set_ctx (this, inode, &params);
-}
+ if (priv->shd.iamshd)
+ return 0;
-void
-afr_inode_set_read_ctx (xlator_t *this, inode_t *inode, int32_t read_child,
- int32_t *fresh_children)
-{
- afr_inode_params_t params = {0};
- afr_private_t *priv = NULL;
+ list_for_each_entry_safe(info, tmp, &priv->saved_locks, pos)
+ {
+ info->child_up_event_gen[child] = priv->event_generation;
+ list_del_init(&info->pos);
+ list_add_tail(&info->pos, &priv->lk_healq);
+ }
- priv = this->private;
- GF_ASSERT (read_child >= 0);
- GF_ASSERT (fresh_children);
- GF_ASSERT (afr_is_child_present (fresh_children, priv->child_count,
- read_child));
+ frame = create_frame(this, this->ctx->pool);
+ if (!frame)
+ return -1;
- params.op = AFR_INODE_SET_READ_CTX;
- params.u.read_ctx.read_child = read_child;
- params.u.read_ctx.children = fresh_children;
- afr_inode_set_ctx (this, inode, &params);
+ ret = synctask_new(this->ctx->env, afr_lock_heal, afr_lock_heal_done, frame,
+ frame);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_LK_HEAL_DOM,
+ "Failed to launch lock heal synctask");
+
+ return ret;
}
-void
-afr_inode_rm_stale_children (xlator_t *this, inode_t *inode,
- int32_t *stale_children)
+static int
+__afr_mark_pending_lk_heal(xlator_t *this, afr_private_t *priv, int child)
{
- afr_inode_params_t params = {0};
+ afr_lk_heal_info_t *info = NULL;
+ afr_lk_heal_info_t *tmp = NULL;
- GF_ASSERT (stale_children);
-
- params.op = AFR_INODE_RM_STALE_CHILDREN;
- params.u.read_ctx.children = stale_children;
- afr_inode_set_ctx (this, inode, &params);
+ if (priv->shd.iamshd)
+ return 0;
+ list_for_each_entry_safe(info, tmp, &priv->saved_locks, pos)
+ {
+ info->child_down_event_gen[child] = priv->event_generation;
+ if (info->locked_nodes[child] == 1)
+ info->locked_nodes[child] = 0;
+ if (!afr_has_quorum(info->locked_nodes, this, NULL)) {
+ /* Since the lock was lost on quorum no. of nodes, we should
+ * not attempt to heal it anymore. Some other client could have
+ * acquired the lock, modified data and released it and this
+ * client wouldn't know about it if we heal it.*/
+ afr_mark_fd_bad(info->fd, this);
+ list_del(&info->pos);
+ afr_lk_heal_info_cleanup(info);
+ /* We're not winding an unlock on the node where the lock is still
+ * present because when fencing logic switches over to the new
+ * client (since we marked the fd bad), it should preempt any
+ * existing lock. */
+ }
+ }
+ return 0;
}
gf_boolean_t
-afr_is_source_child (int32_t *sources, int32_t child_count, int32_t child)
+afr_is_consistent_io_possible(afr_local_t *local, afr_private_t *priv,
+ int32_t *op_errno)
{
- gf_boolean_t source_xattrs = _gf_false;
-
- GF_ASSERT (child < child_count);
-
- if ((child >= 0) && (child < child_count) &&
- sources[child]) {
- source_xattrs = _gf_true;
- }
- return source_xattrs;
+ if (priv->consistent_io && local->call_count != priv->child_count) {
+ gf_msg(THIS->name, GF_LOG_INFO, 0, AFR_MSG_SUBVOLS_DOWN,
+ "All subvolumes are not up");
+ if (op_errno)
+ *op_errno = ENOTCONN;
+ return _gf_false;
+ }
+ return _gf_true;
}
gf_boolean_t
-afr_is_child_present (int32_t *success_children, int32_t child_count,
- int32_t child)
+afr_is_lock_mode_mandatory(dict_t *xdata)
{
- gf_boolean_t success_child = _gf_false;
- int i = 0;
+ int ret = 0;
+ uint32_t lk_mode = GF_LK_ADVISORY;
- GF_ASSERT (child < child_count);
+ ret = dict_get_uint32(xdata, GF_LOCK_MODE, &lk_mode);
+ if (!ret && lk_mode == GF_LK_MANDATORY)
+ return _gf_true;
- for (i = 0; i < child_count; i++) {
- if (success_children[i] == -1)
- break;
- if (child == success_children[i]) {
- success_child = _gf_true;
- break;
- }
- }
- return success_child;
+ return _gf_false;
}
-gf_boolean_t
-afr_is_read_child (int32_t *success_children, int32_t *sources,
- int32_t child_count, int32_t child)
+call_frame_t *
+afr_copy_frame(call_frame_t *base)
{
- gf_boolean_t success_child = _gf_false;
- gf_boolean_t source = _gf_false;
+ afr_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ int op_errno = 0;
- GF_ASSERT (success_children);
- GF_ASSERT (child_count > 0);
+ frame = copy_frame(base);
+ if (!frame)
+ return NULL;
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local) {
+ AFR_STACK_DESTROY(frame);
+ return NULL;
+ }
- success_child = afr_is_child_present (success_children, child_count,
- child);
- if (!success_child)
- goto out;
- if (NULL == sources) {
- source = _gf_true;
- goto out;
- }
- source = afr_is_source_child (sources, child_count, child);
-out:
- return (success_child && source);
+ return frame;
}
-/* If sources is NULL the xattrs are assumed to be of source for all
- * success_children.
- */
-int
-afr_select_read_child_from_policy (int32_t *success_children, int32_t child_count,
- int32_t prev_read_child,
- int32_t config_read_child, int32_t *sources)
+/* Check if an entry or inode could be undergoing a transaction. */
+gf_boolean_t
+afr_is_possibly_under_txn(afr_transaction_type type, afr_local_t *local,
+ xlator_t *this)
{
- int32_t read_child = -1;
- int i = 0;
+ int i = 0;
+ int tmp = 0;
+ afr_private_t *priv = NULL;
+ GF_UNUSED char *key = NULL;
+ int keylen = 0;
+
+ priv = this->private;
+
+ if (type == AFR_ENTRY_TRANSACTION) {
+ key = GLUSTERFS_PARENT_ENTRYLK;
+ keylen = SLEN(GLUSTERFS_PARENT_ENTRYLK);
+ } else if (type == AFR_DATA_TRANSACTION) {
+ /*FIXME: Use GLUSTERFS_INODELK_DOM_COUNT etc. once
+ * pl_inodelk_xattr_fill supports separate keys for different
+ * domains.*/
+ key = GLUSTERFS_INODELK_COUNT;
+ keylen = SLEN(GLUSTERFS_INODELK_COUNT);
+ }
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].xdata)
+ continue;
+ if (dict_get_int32n(local->replies[i].xdata, key, keylen, &tmp) == 0)
+ if (tmp)
+ return _gf_true;
+ }
- GF_ASSERT (success_children);
+ return _gf_false;
+}
- read_child = prev_read_child;
- if (afr_is_read_child (success_children, sources, child_count,
- read_child))
- goto out;
+static void
+afr_inode_ctx_destroy(afr_inode_ctx_t *ctx)
+{
+ int i = 0;
- read_child = config_read_child;
- if (afr_is_read_child (success_children, sources, child_count,
- read_child))
- goto out;
+ if (!ctx)
+ return;
- for (i = 0; i < child_count; i++) {
- read_child = success_children[i];
- if (read_child < 0)
- break;
- if (afr_is_read_child (success_children, sources, child_count,
- read_child))
- goto out;
- }
- read_child = -1;
+ for (i = 0; i < AFR_NUM_CHANGE_LOGS; i++) {
+ GF_FREE(ctx->pre_op_done[i]);
+ }
-out:
- return read_child;
+ GF_FREE(ctx);
}
-/* This function should be used when all the success_children are sources
- */
-void
-afr_set_read_ctx_from_policy (xlator_t *this, inode_t *inode,
- int32_t *fresh_children, int32_t prev_read_child,
- int32_t config_read_child)
-{
- int read_child = -1;
- afr_private_t *priv = NULL;
-
- priv = this->private;
- read_child = afr_select_read_child_from_policy (fresh_children,
- priv->child_count,
- prev_read_child,
- config_read_child,
- NULL);
- if (read_child >= 0)
- afr_inode_set_read_ctx (this, inode, read_child,
- fresh_children);
-}
-
-/* afr_next_call_child ()
- * This is a common function used by all the read-type fops
- * This function should not be called with the inode's read_children array.
- * The fop's handler should make a copy of the inode's read_children,
- * preferred read_child into the local vars, because while this function is
- * in execution there is a chance for inode's read_ctx to change.
- */
-int32_t
-afr_next_call_child (int32_t *fresh_children, unsigned char *child_up,
- size_t child_count, int32_t *last_index,
- int32_t read_child)
+int
+__afr_inode_ctx_get(xlator_t *this, inode_t *inode, afr_inode_ctx_t **ctx)
{
- int next_index = 0;
- int32_t next_call_child = -1;
-
- GF_ASSERT (last_index);
-
- next_index = *last_index;
-retry:
- next_index++;
- if ((next_index >= child_count) ||
- (fresh_children[next_index] == -1))
- goto out;
- if ((fresh_children[next_index] == read_child) ||
- (!child_up[fresh_children[next_index]]))
- goto retry;
- *last_index = next_index;
- next_call_child = fresh_children[next_index];
+ uint64_t ctx_int = 0;
+ int ret = -1;
+ int i = -1;
+ int num_locks = -1;
+ afr_inode_ctx_t *ictx = NULL;
+ afr_lock_t *lock = NULL;
+ afr_private_t *priv = this->private;
+
+ ret = __inode_ctx_get(inode, this, &ctx_int);
+ if (ret == 0) {
+ *ctx = (afr_inode_ctx_t *)(uintptr_t)ctx_int;
+ return 0;
+ }
+
+ ictx = GF_CALLOC(1, sizeof(afr_inode_ctx_t), gf_afr_mt_inode_ctx_t);
+ if (!ictx)
+ goto out;
+
+ for (i = 0; i < AFR_NUM_CHANGE_LOGS; i++) {
+ ictx->pre_op_done[i] = GF_CALLOC(sizeof *ictx->pre_op_done[i],
+ priv->child_count, gf_afr_mt_int32_t);
+ if (!ictx->pre_op_done[i]) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
+
+ num_locks = sizeof(ictx->lock) / sizeof(afr_lock_t);
+ for (i = 0; i < num_locks; i++) {
+ lock = &ictx->lock[i];
+ INIT_LIST_HEAD(&lock->post_op);
+ INIT_LIST_HEAD(&lock->frozen);
+ INIT_LIST_HEAD(&lock->waiting);
+ INIT_LIST_HEAD(&lock->owners);
+ }
+
+ ctx_int = (uint64_t)(uintptr_t)ictx;
+ ret = __inode_ctx_set(inode, this, &ctx_int);
+ if (ret) {
+ goto out;
+ }
+
+ ictx->spb_choice = -1;
+ ictx->read_subvol = 0;
+ ictx->write_subvol = 0;
+ ictx->lock_count = 0;
+ ret = 0;
+ *ctx = ictx;
out:
- return next_call_child;
+ if (ret) {
+ afr_inode_ctx_destroy(ictx);
+ }
+ return ret;
}
- /* This function should not be called with the inode's read_children array.
- * The fop's handler should make a copy of the inode's read_children,
- * preferred read_child into the local vars, because while this function is
- * in execution there is a chance for inode's read_ctx to change.
+/*
+ * INODE CTX 64-bit VALUE FORMAT FOR SMALL (<= 16) SUBVOL COUNTS:
+ *
+ * |<---------- 64bit ------------>|
+ * 63 32 31 16 15 0
+ * | EVENT_GEN | DATA | METADATA |
+ *
+ *
+ * METADATA (bit-0 .. bit-15): bitmap representing subvolumes from which
+ * metadata can be attempted to be read.
+ *
+ * bit-0 => priv->subvolumes[0]
+ * bit-1 => priv->subvolumes[1]
+ * ... etc. till bit-15
+ *
+ * DATA (bit-16 .. bit-31): bitmap representing subvolumes from which data
+ * can be attempted to be read.
+ *
+ * bit-16 => priv->subvolumes[0]
+ * bit-17 => priv->subvolumes[1]
+ * ... etc. till bit-31
+ *
+ * EVENT_GEN (bit-32 .. bit-63): event generation (i.e priv->event_generation)
+ * when DATA and METADATA was last updated.
+ *
+ * If EVENT_GEN is < priv->event_generation,
+ * or is 0, it means afr_inode_refresh() needs
+ * to be called to recalculate the bitmaps.
*/
-int32_t
-afr_get_call_child (xlator_t *this, unsigned char *child_up, int32_t read_child,
- int32_t *fresh_children,
- int32_t *call_child, int32_t *last_index)
-{
- int ret = 0;
- afr_private_t *priv = NULL;
- int i = 0;
- GF_ASSERT (child_up);
- GF_ASSERT (call_child);
- GF_ASSERT (last_index);
- GF_ASSERT (fresh_children);
+int
+__afr_set_in_flight_sb_status(xlator_t *this, afr_local_t *local,
+ inode_t *inode)
+{
+ int i = 0;
+ int txn_type = 0;
+ int count = 0;
+ int index = -1;
+ uint16_t datamap_old = 0;
+ uint16_t metadatamap_old = 0;
+ uint16_t datamap = 0;
+ uint16_t metadatamap = 0;
+ uint16_t tmp_map = 0;
+ uint16_t mask = 0;
+ uint32_t event = 0;
+ uint64_t val = 0;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ txn_type = local->transaction.type;
+
+ if (txn_type == AFR_DATA_TRANSACTION)
+ val = local->inode_ctx->write_subvol;
+ else
+ val = local->inode_ctx->read_subvol;
+
+ metadatamap_old = metadatamap = (val & 0x000000000000ffff);
+ datamap_old = datamap = (val & 0x00000000ffff0000) >> 16;
+ event = (val & 0xffffffff00000000) >> 32;
+
+ if (txn_type == AFR_DATA_TRANSACTION)
+ tmp_map = datamap;
+ else if (txn_type == AFR_METADATA_TRANSACTION)
+ tmp_map = metadatamap;
+
+ count = gf_bits_count(tmp_map);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->transaction.failed_subvols[i])
+ continue;
+
+ mask = 1 << i;
+ if (txn_type == AFR_METADATA_TRANSACTION)
+ metadatamap &= ~mask;
+ else if (txn_type == AFR_DATA_TRANSACTION)
+ datamap &= ~mask;
+ }
+
+ switch (txn_type) {
+ case AFR_METADATA_TRANSACTION:
+ if ((metadatamap_old != 0) && (metadatamap == 0) && (count == 1)) {
+ index = gf_bits_index(tmp_map);
+ local->transaction.in_flight_sb_errno = local->replies[index]
+ .op_errno;
+ local->transaction.in_flight_sb = _gf_true;
+ metadatamap |= (1 << index);
+ }
+ if (metadatamap_old != metadatamap) {
+ __afr_inode_need_refresh_set(inode, this);
+ }
+ break;
+
+ case AFR_DATA_TRANSACTION:
+ if ((datamap_old != 0) && (datamap == 0) && (count == 1)) {
+ index = gf_bits_index(tmp_map);
+ local->transaction.in_flight_sb_errno = local->replies[index]
+ .op_errno;
+ local->transaction.in_flight_sb = _gf_true;
+ datamap |= (1 << index);
+ }
+ if (datamap_old != datamap)
+ __afr_inode_need_refresh_set(inode, this);
+ break;
- if (read_child < 0) {
- ret = -EIO;
- goto out;
- }
- priv = this->private;
- *call_child = -1;
- *last_index = -1;
+ default:
+ break;
+ }
- if (child_up[read_child]) {
- *call_child = read_child;
- } else {
- for (i = 0; i < priv->child_count; i++) {
- if (fresh_children[i] == -1)
- break;
- if (child_up[fresh_children[i]]) {
- *call_child = fresh_children[i];
- ret = 0;
- break;
- }
- }
+ val = ((uint64_t)metadatamap) | (((uint64_t)datamap) << 16) |
+ (((uint64_t)event) << 32);
- if (*call_child == -1) {
- ret = -ENOTCONN;
- goto out;
- }
+ if (txn_type == AFR_DATA_TRANSACTION)
+ local->inode_ctx->write_subvol = val;
+ local->inode_ctx->read_subvol = val;
- *last_index = i;
- }
-out:
- gf_log (this->name, GF_LOG_DEBUG, "Returning %d, call_child: %d, "
- "last_index: %d", ret, *call_child, *last_index);
- return ret;
+ return 0;
}
-void
-afr_reset_xattr (dict_t **xattr, unsigned int child_count)
+gf_boolean_t
+afr_is_symmetric_error(call_frame_t *frame, xlator_t *this)
{
- unsigned int i = 0;
-
- if (!xattr)
- goto out;
- for (i = 0; i < child_count; i++) {
- if (xattr[i]) {
- dict_unref (xattr[i]);
- xattr[i] = NULL;
- }
- }
-out:
- return;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int op_errno = 0;
+ int i_errno = 0;
+ gf_boolean_t matching_errors = _gf_true;
+ int i = 0;
+
+ priv = this->private;
+ local = frame->local;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+ if (local->replies[i].op_ret != -1) {
+ /* Operation succeeded on at least one subvol,
+ so it is not a failed-everywhere situation.
+ */
+ matching_errors = _gf_false;
+ break;
+ }
+ i_errno = local->replies[i].op_errno;
+
+ if (i_errno == ENOTCONN) {
+ /* ENOTCONN is not a symmetric error. We do not
+ know if the operation was performed on the
+ backend or not.
+ */
+ matching_errors = _gf_false;
+ break;
+ }
+
+ if (!op_errno) {
+ op_errno = i_errno;
+ } else if (op_errno != i_errno) {
+ /* Mismatching op_errno's */
+ matching_errors = _gf_false;
+ break;
+ }
+ }
+
+ return matching_errors;
}
-void
-afr_local_sh_cleanup (afr_local_t *local, xlator_t *this)
+int
+afr_set_in_flight_sb_status(xlator_t *this, call_frame_t *frame, inode_t *inode)
{
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
+ int ret = -1;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
- sh = &local->self_heal;
- priv = this->private;
+ priv = this->private;
+ local = frame->local;
- if (sh->buf)
- GF_FREE (sh->buf);
-
- if (sh->parentbufs)
- GF_FREE (sh->parentbufs);
+ /* If this transaction saw no failures, then exit. */
+ if (AFR_COUNT(local->transaction.failed_subvols, priv->child_count) == 0)
+ return 0;
- if (sh->inode)
- inode_unref (sh->inode);
+ if (afr_is_symmetric_error(frame, this))
+ return 0;
- if (sh->xattr) {
- afr_reset_xattr (sh->xattr, priv->child_count);
- GF_FREE (sh->xattr);
- }
+ LOCK(&inode->lock);
+ {
+ ret = __afr_set_in_flight_sb_status(this, local, inode);
+ }
+ UNLOCK(&inode->lock);
- if (sh->child_errno)
- GF_FREE (sh->child_errno);
+ return ret;
+}
- afr_matrix_cleanup (sh->pending_matrix, priv->child_count);
- afr_matrix_cleanup (sh->delta_matrix, priv->child_count);
+int
+__afr_inode_read_subvol_get_small(inode_t *inode, xlator_t *this,
+ unsigned char *data, unsigned char *metadata,
+ int *event_p)
+{
+ afr_private_t *priv = NULL;
+ int ret = -1;
+ uint16_t datamap = 0;
+ uint16_t metadatamap = 0;
+ uint32_t event = 0;
+ uint64_t val = 0;
+ int i = 0;
+ afr_inode_ctx_t *ctx = NULL;
+
+ priv = this->private;
+
+ ret = __afr_inode_ctx_get(this, inode, &ctx);
+ if (ret < 0)
+ return ret;
- if (sh->sources)
- GF_FREE (sh->sources);
+ val = ctx->read_subvol;
- if (sh->success)
- GF_FREE (sh->success);
+ metadatamap = (val & 0x000000000000ffff);
+ datamap = (val & 0x00000000ffff0000) >> 16;
+ event = (val & 0xffffffff00000000) >> 32;
- if (sh->locked_nodes)
- GF_FREE (sh->locked_nodes);
+ for (i = 0; i < priv->child_count; i++) {
+ if (metadata)
+ metadata[i] = (metadatamap >> i) & 1;
+ if (data)
+ data[i] = (datamap >> i) & 1;
+ }
- if (sh->healing_fd) {
- fd_unref (sh->healing_fd);
- sh->healing_fd = NULL;
- }
+ if (event_p)
+ *event_p = event;
+ return ret;
+}
- if (sh->linkname)
- GF_FREE ((char *)sh->linkname);
+int
+__afr_inode_read_subvol_set_small(inode_t *inode, xlator_t *this,
+ unsigned char *data, unsigned char *metadata,
+ int event)
+{
+ afr_private_t *priv = NULL;
+ uint16_t datamap = 0;
+ uint16_t metadatamap = 0;
+ uint64_t val = 0;
+ int i = 0;
+ int ret = -1;
+ afr_inode_ctx_t *ctx = NULL;
- if (sh->success_children)
- GF_FREE (sh->success_children);
+ priv = this->private;
- if (sh->fresh_children)
- GF_FREE (sh->fresh_children);
+ ret = __afr_inode_ctx_get(this, inode, &ctx);
+ if (ret)
+ goto out;
- if (sh->fresh_parent_dirs)
- GF_FREE (sh->fresh_parent_dirs);
+ for (i = 0; i < priv->child_count; i++) {
+ if (data[i])
+ datamap |= (1 << i);
+ if (metadata[i])
+ metadatamap |= (1 << i);
+ }
- loc_wipe (&sh->parent_loc);
- loc_wipe (&sh->lookup_loc);
+ val = ((uint64_t)metadatamap) | (((uint64_t)datamap) << 16) |
+ (((uint64_t)event) << 32);
- if (sh->checksum)
- GF_FREE (sh->checksum);
+ ctx->read_subvol = val;
- if (sh->write_needed)
- GF_FREE (sh->write_needed);
- if (sh->healing_fd)
- fd_unref (sh->healing_fd);
+ ret = 0;
+out:
+ return ret;
}
-
-void
-afr_local_transaction_cleanup (afr_local_t *local, xlator_t *this)
+int
+__afr_inode_read_subvol_get(inode_t *inode, xlator_t *this, unsigned char *data,
+ unsigned char *metadata, int *event_p)
{
- afr_private_t * priv = NULL;
+ afr_private_t *priv = NULL;
+ int ret = -1;
- priv = this->private;
+ priv = this->private;
- afr_matrix_cleanup (local->pending, priv->child_count);
+ if (priv->child_count <= 16)
+ ret = __afr_inode_read_subvol_get_small(inode, this, data, metadata,
+ event_p);
+ else
+ /* TBD: allocate structure with array and read from it */
+ ret = -1;
- if (local->internal_lock.locked_nodes)
- GF_FREE (local->internal_lock.locked_nodes);
+ return ret;
+}
- if (local->internal_lock.inode_locked_nodes)
- GF_FREE (local->internal_lock.inode_locked_nodes);
+int
+__afr_inode_split_brain_choice_get(inode_t *inode, xlator_t *this,
+ int *spb_choice)
+{
+ afr_inode_ctx_t *ctx = NULL;
+ int ret = -1;
- if (local->internal_lock.entry_locked_nodes)
- GF_FREE (local->internal_lock.entry_locked_nodes);
+ ret = __afr_inode_ctx_get(this, inode, &ctx);
+ if (ret < 0)
+ return ret;
- if (local->internal_lock.lower_locked_nodes)
- GF_FREE (local->internal_lock.lower_locked_nodes);
+ *spb_choice = ctx->spb_choice;
+ return 0;
+}
+int
+__afr_inode_read_subvol_set(inode_t *inode, xlator_t *this, unsigned char *data,
+ unsigned char *metadata, int event)
+{
+ afr_private_t *priv = NULL;
+ int ret = -1;
- GF_FREE (local->transaction.pre_op);
- GF_FREE (local->transaction.child_errno);
- GF_FREE (local->transaction.eager_lock);
+ priv = this->private;
- GF_FREE (local->transaction.basename);
- GF_FREE (local->transaction.new_basename);
+ if (priv->child_count <= 16)
+ ret = __afr_inode_read_subvol_set_small(inode, this, data, metadata,
+ event);
+ else
+ ret = -1;
- loc_wipe (&local->transaction.parent_loc);
- loc_wipe (&local->transaction.new_parent_loc);
+ return ret;
}
-
-void
-afr_local_cleanup (afr_local_t *local, xlator_t *this)
+int
+__afr_inode_split_brain_choice_set(inode_t *inode, xlator_t *this,
+ int spb_choice)
{
- afr_private_t * priv = NULL;
+ afr_inode_ctx_t *ctx = NULL;
+ int ret = -1;
- if (!local)
- return;
+ ret = __afr_inode_ctx_get(this, inode, &ctx);
+ if (ret)
+ goto out;
- afr_local_sh_cleanup (local, this);
+ ctx->spb_choice = spb_choice;
- afr_local_transaction_cleanup (local, this);
+ ret = 0;
+out:
+ return ret;
+}
- priv = this->private;
+int
+afr_inode_read_subvol_get(inode_t *inode, xlator_t *this, unsigned char *data,
+ unsigned char *metadata, int *event_p)
+{
+ int ret = -1;
- loc_wipe (&local->loc);
- loc_wipe (&local->newloc);
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
- if (local->fd)
- fd_unref (local->fd);
+ LOCK(&inode->lock);
+ {
+ ret = __afr_inode_read_subvol_get(inode, this, data, metadata, event_p);
+ }
+ UNLOCK(&inode->lock);
+out:
+ return ret;
+}
- if (local->xattr_req)
- dict_unref (local->xattr_req);
+int
+afr_inode_get_readable(call_frame_t *frame, inode_t *inode, xlator_t *this,
+ unsigned char *readable, int *event_p, int type)
+{
+ afr_private_t *priv = this->private;
+ afr_local_t *local = frame->local;
+ unsigned char *data = alloca0(priv->child_count);
+ unsigned char *metadata = alloca0(priv->child_count);
+ int data_count = 0;
+ int metadata_count = 0;
+ int event_generation = 0;
+ int ret = 0;
+
+ ret = afr_inode_read_subvol_get(inode, this, data, metadata,
+ &event_generation);
+ if (ret == -1)
+ return -EIO;
+
+ data_count = AFR_COUNT(data, priv->child_count);
+ metadata_count = AFR_COUNT(metadata, priv->child_count);
+
+ if (inode->ia_type == IA_IFDIR) {
+ /* For directories, allow even if it is in data split-brain. */
+ if (type == AFR_METADATA_TRANSACTION || local->op == GF_FOP_STAT ||
+ local->op == GF_FOP_FSTAT) {
+ if (!metadata_count)
+ return -EIO;
+ }
+ } else {
+ /* For files, abort in case of data/metadata split-brain. */
+ if (!data_count || !metadata_count) {
+ return -EIO;
+ }
+ }
+
+ if (type == AFR_METADATA_TRANSACTION && readable)
+ memcpy(readable, metadata, priv->child_count * sizeof *metadata);
+ if (type == AFR_DATA_TRANSACTION && readable) {
+ if (!data_count)
+ memcpy(readable, local->child_up,
+ priv->child_count * sizeof *readable);
+ else
+ memcpy(readable, data, priv->child_count * sizeof *data);
+ }
+ if (event_p)
+ *event_p = event_generation;
+ return 0;
+}
- if (local->dict)
- dict_unref (local->dict);
+static int
+afr_inode_split_brain_choice_get(inode_t *inode, xlator_t *this,
+ int *spb_choice)
+{
+ int ret = -1;
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
+
+ LOCK(&inode->lock);
+ {
+ ret = __afr_inode_split_brain_choice_get(inode, this, spb_choice);
+ }
+ UNLOCK(&inode->lock);
+out:
+ return ret;
+}
- if (local->child_up)
- GF_FREE (local->child_up);
+/*
+ * frame is used to get the favourite policy. Since
+ * afr_inode_split_brain_choice_get was called with afr_open, it is possible to
+ * have a frame with out local->replies. So in that case, frame is passed as
+ * null, hence this function will handle the frame NULL case.
+ */
+int
+afr_split_brain_read_subvol_get(inode_t *inode, xlator_t *this,
+ call_frame_t *frame, int *spb_subvol)
+{
+ int ret = -1;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- if (local->child_errno)
- GF_FREE (local->child_errno);
+ GF_VALIDATE_OR_GOTO("afr", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
+ GF_VALIDATE_OR_GOTO(this->name, spb_subvol, out);
- if (local->fresh_children)
- GF_FREE (local->fresh_children);
+ priv = this->private;
- if (local->fd_open_on)
- GF_FREE (local->fd_open_on);
+ ret = afr_inode_split_brain_choice_get(inode, this, spb_subvol);
+ if (*spb_subvol < 0 && priv->fav_child_policy && frame && frame->local) {
+ local = frame->local;
+ *spb_subvol = afr_sh_get_fav_by_policy(this, local->replies, inode,
+ NULL);
+ if (*spb_subvol >= 0) {
+ ret = 0;
+ }
+ }
- { /* lookup */
- if (local->cont.lookup.xattrs) {
- afr_reset_xattr (local->cont.lookup.xattrs,
- priv->child_count);
- GF_FREE (local->cont.lookup.xattrs);
- local->cont.lookup.xattrs = NULL;
- }
+out:
+ return ret;
+}
+int
+afr_inode_read_subvol_set(inode_t *inode, xlator_t *this, unsigned char *data,
+ unsigned char *metadata, int event)
+{
+ int ret = -1;
- if (local->cont.lookup.xattr) {
- dict_unref (local->cont.lookup.xattr);
- }
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
- if (local->cont.lookup.inode) {
- inode_unref (local->cont.lookup.inode);
- }
+ LOCK(&inode->lock);
+ {
+ ret = __afr_inode_read_subvol_set(inode, this, data, metadata, event);
+ }
+ UNLOCK(&inode->lock);
+out:
+ return ret;
+}
- if (local->cont.lookup.postparents)
- GF_FREE (local->cont.lookup.postparents);
+int
+afr_inode_split_brain_choice_set(inode_t *inode, xlator_t *this, int spb_choice)
+{
+ int ret = -1;
- if (local->cont.lookup.bufs)
- GF_FREE (local->cont.lookup.bufs);
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
- if (local->cont.lookup.success_children)
- GF_FREE (local->cont.lookup.success_children);
+ LOCK(&inode->lock);
+ {
+ ret = __afr_inode_split_brain_choice_set(inode, this, spb_choice);
+ }
+ UNLOCK(&inode->lock);
+out:
+ return ret;
+}
- if (local->cont.lookup.sources)
- GF_FREE (local->cont.lookup.sources);
- }
+/* The caller of this should perform afr_inode_refresh, if this function
+ * returns _gf_true
+ */
+gf_boolean_t
+afr_is_inode_refresh_reqd(inode_t *inode, xlator_t *this, int event_gen1,
+ int event_gen2)
+{
+ gf_boolean_t need_refresh = _gf_false;
+ afr_inode_ctx_t *ctx = NULL;
+ int ret = -1;
- { /* getxattr */
- if (local->cont.getxattr.name)
- GF_FREE (local->cont.getxattr.name);
- }
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
- { /* lk */
- if (local->cont.lk.locked_nodes)
- GF_FREE (local->cont.lk.locked_nodes);
- }
+ LOCK(&inode->lock);
+ {
+ ret = __afr_inode_ctx_get(this, inode, &ctx);
+ if (ret)
+ goto unlock;
- { /* create */
- if (local->cont.create.fd)
- fd_unref (local->cont.create.fd);
- if (local->cont.create.params)
- dict_unref (local->cont.create.params);
- }
+ need_refresh = ctx->need_refresh;
+ /* Hoping that the caller will do inode_refresh followed by
+ * this, hence setting the need_refresh to false */
+ ctx->need_refresh = _gf_false;
+ }
+unlock:
+ UNLOCK(&inode->lock);
- { /* mknod */
- if (local->cont.mknod.params)
- dict_unref (local->cont.mknod.params);
- }
+ if (event_gen1 != event_gen2)
+ need_refresh = _gf_true;
+out:
+ return need_refresh;
+}
- { /* mkdir */
- if (local->cont.mkdir.params)
- dict_unref (local->cont.mkdir.params);
- }
+int
+__afr_inode_need_refresh_set(inode_t *inode, xlator_t *this)
+{
+ int ret = -1;
+ afr_inode_ctx_t *ctx = NULL;
- { /* symlink */
- if (local->cont.symlink.params)
- dict_unref (local->cont.symlink.params);
- }
+ ret = __afr_inode_ctx_get(this, inode, &ctx);
+ if (ret == 0) {
+ ctx->need_refresh = _gf_true;
+ }
- { /* writev */
- GF_FREE (local->cont.writev.vector);
- }
+ return ret;
+}
- { /* setxattr */
- if (local->cont.setxattr.dict)
- dict_unref (local->cont.setxattr.dict);
- }
+int
+afr_inode_need_refresh_set(inode_t *inode, xlator_t *this)
+{
+ int ret = -1;
- { /* fsetxattr */
- if (local->cont.fsetxattr.dict)
- dict_unref (local->cont.fsetxattr.dict);
- }
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
- { /* removexattr */
- GF_FREE (local->cont.removexattr.name);
- }
- { /* xattrop */
- if (local->cont.xattrop.xattr)
- dict_unref (local->cont.xattrop.xattr);
- }
- { /* fxattrop */
- if (local->cont.fxattrop.xattr)
- dict_unref (local->cont.fxattrop.xattr);
- }
- { /* symlink */
- GF_FREE (local->cont.symlink.linkpath);
- }
+ LOCK(&inode->lock);
+ {
+ ret = __afr_inode_need_refresh_set(inode, this);
+ }
+ UNLOCK(&inode->lock);
+out:
+ return ret;
+}
- { /* opendir */
- if (local->cont.opendir.checksum)
- GF_FREE (local->cont.opendir.checksum);
- }
+int
+afr_spb_choice_timeout_cancel(xlator_t *this, inode_t *inode)
+{
+ afr_inode_ctx_t *ctx = NULL;
+ int ret = -1;
+
+ if (!inode)
+ return ret;
- { /* readdirp */
- if (local->cont.readdir.dict)
- dict_unref (local->cont.readdir.dict);
+ LOCK(&inode->lock);
+ {
+ ret = __afr_inode_ctx_get(this, inode, &ctx);
+ if (ret < 0 || !ctx) {
+ UNLOCK(&inode->lock);
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR,
+ "Failed to cancel split-brain choice timer.");
+ goto out;
+ }
+ ctx->spb_choice = -1;
+ if (ctx->timer) {
+ gf_timer_call_cancel(this->ctx, ctx->timer);
+ ctx->timer = NULL;
}
+ ret = 0;
+ }
+ UNLOCK(&inode->lock);
+out:
+ return ret;
+}
- if (local->xdata_req)
- dict_unref (local->xdata_req);
+void
+afr_set_split_brain_choice_cbk(void *data)
+{
+ inode_t *inode = data;
+ xlator_t *this = THIS;
- if (local->xdata_rsp)
- dict_unref (local->xdata_rsp);
+ afr_spb_choice_timeout_cancel(this, inode);
+ inode_invalidate(inode);
+ inode_unref(inode);
+ return;
}
-
int
-afr_frame_return (call_frame_t *frame)
+afr_set_split_brain_choice(int ret, call_frame_t *frame, void *opaque)
{
- afr_local_t *local = NULL;
- int call_count = 0;
+ int op_errno = ENOMEM;
+ afr_private_t *priv = NULL;
+ afr_inode_ctx_t *ctx = NULL;
+ inode_t *inode = NULL;
+ loc_t *loc = NULL;
+ xlator_t *this = NULL;
+ afr_spbc_timeout_t *data = opaque;
+ struct timespec delta = {
+ 0,
+ };
+ gf_boolean_t timer_set = _gf_false;
+ gf_boolean_t timer_cancelled = _gf_false;
+ gf_boolean_t timer_reset = _gf_false;
+ int old_spb_choice = -1;
+
+ frame = data->frame;
+ loc = data->loc;
+ this = frame->this;
+ priv = this->private;
+
+ if (ret) {
+ op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ delta.tv_sec = priv->spb_choice_timeout;
+ delta.tv_nsec = 0;
+
+ if (!loc->inode) {
+ ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ if (!(data->d_spb || data->m_spb)) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR,
+ "Cannot set "
+ "replica.split-brain-choice on %s. File is"
+ " not in data/metadata split-brain.",
+ uuid_utoa(loc->gfid));
+ ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ /*
+ * we're ref'ing the inode before LOCK like it is done elsewhere in the
+ * code. If we ref after LOCK, coverity complains of possible deadlocks.
+ */
+ inode = inode_ref(loc->inode);
+
+ LOCK(&inode->lock);
+ {
+ ret = __afr_inode_ctx_get(this, inode, &ctx);
+ if (ret) {
+ UNLOCK(&inode->lock);
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR,
+ "Failed to get inode_ctx for %s", loc->name);
+ goto post_unlock;
+ }
- local = frame->local;
+ old_spb_choice = ctx->spb_choice;
+ ctx->spb_choice = data->spb_child_index;
- LOCK (&frame->lock);
- {
- call_count = --local->call_count;
- }
- UNLOCK (&frame->lock);
+ /* Possible changes in spb-choice :
+ * valid to -1 : cancel timer and unref
+ * valid to valid : cancel timer and inject new one
+ * -1 to -1 : unref and do not do anything
+ * -1 to valid : inject timer
+ */
- return call_count;
+ /* ctx->timer is NULL iff previous value of
+ * ctx->spb_choice is -1
+ */
+ if (ctx->timer) {
+ if (ctx->spb_choice == -1) {
+ if (!gf_timer_call_cancel(this->ctx, ctx->timer)) {
+ ctx->timer = NULL;
+ timer_cancelled = _gf_true;
+ }
+ /* If timer cancel failed here it means that the
+ * previous cbk will be executed which will set
+ * spb_choice to -1. So we can consider the
+ * 'valid to -1' case to be a success
+ * (i.e. ret = 0) and goto unlock.
+ */
+ goto unlock;
+ }
+ goto reset_timer;
+ } else {
+ if (ctx->spb_choice == -1)
+ goto unlock;
+ goto set_timer;
+ }
+
+ reset_timer:
+ ret = gf_timer_call_cancel(this->ctx, ctx->timer);
+ if (ret != 0) {
+ /* We need to bail out now instead of launching a new
+ * timer. Otherwise the cbk of the previous timer event
+ * will cancel the new ctx->timer.
+ */
+ ctx->spb_choice = old_spb_choice;
+ ret = -1;
+ op_errno = EAGAIN;
+ goto unlock;
+ }
+ ctx->timer = NULL;
+ timer_reset = _gf_true;
+
+ set_timer:
+ ctx->timer = gf_timer_call_after(this->ctx, delta,
+ afr_set_split_brain_choice_cbk, inode);
+ if (!ctx->timer) {
+ ctx->spb_choice = old_spb_choice;
+ ret = -1;
+ op_errno = ENOMEM;
+ }
+ if (!timer_reset && ctx->timer)
+ timer_set = _gf_true;
+ if (timer_reset && !ctx->timer)
+ timer_cancelled = _gf_true;
+ }
+unlock:
+ UNLOCK(&inode->lock);
+post_unlock:
+ if (!timer_set)
+ inode_unref(inode);
+ if (timer_cancelled)
+ inode_unref(inode);
+ /*
+ * We need to invalidate the inode to prevent the kernel from serving
+ * reads from an older cached value despite a change in spb_choice to
+ * a new value.
+ */
+ inode_invalidate(inode);
+out:
+ GF_FREE(data);
+ AFR_STACK_UNWIND(setxattr, frame, ret, op_errno, NULL);
+ return 0;
}
int
-afr_set_elem_count_get (unsigned char *elems, int child_count)
+afr_accused_fill(xlator_t *this, dict_t *xdata, unsigned char *accused,
+ afr_transaction_type type)
{
- int i = 0;
- int ret = 0;
-
- for (i = 0; i < child_count; i++)
- if (elems[i])
- ret++;
- return ret;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int idx = afr_index_for_transaction_type(type);
+ void *pending_raw = NULL;
+ int pending[3];
+ int ret = 0;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ ret = dict_get_ptr(xdata, priv->pending_key[i], &pending_raw);
+ if (ret) /* no pending flags */
+ continue;
+ memcpy(pending, pending_raw, sizeof(pending));
+
+ if (ntoh32(pending[idx]))
+ accused[i] = 1;
+ }
+
+ return 0;
}
-/**
- * up_children_count - return the number of children that are up
- */
+int
+afr_accuse_smallfiles(xlator_t *this, struct afr_reply *replies,
+ unsigned char *data_accused)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ uint64_t maxsize = 0;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (replies[i].valid && replies[i].xdata &&
+ dict_get_sizen(replies[i].xdata, GLUSTERFS_BAD_INODE))
+ continue;
+ if (data_accused[i])
+ continue;
+ if (replies[i].poststat.ia_size > maxsize)
+ maxsize = replies[i].poststat.ia_size;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (data_accused[i])
+ continue;
+ if (AFR_IS_ARBITER_BRICK(priv, i))
+ continue;
+ if (replies[i].poststat.ia_size < maxsize)
+ data_accused[i] = 1;
+ }
+
+ return 0;
+}
-unsigned int
-afr_up_children_count (unsigned char *child_up, unsigned int child_count)
+int
+afr_readables_fill(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ unsigned char *data_accused, unsigned char *metadata_accused,
+ unsigned char *data_readable,
+ unsigned char *metadata_readable, struct afr_reply *replies)
{
- return afr_set_elem_count_get (child_up, child_count);
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ dict_t *xdata = NULL;
+ int i = 0;
+ int ret = 0;
+ ia_type_t ia_type = IA_INVAL;
+
+ local = frame->local;
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ data_readable[i] = 1;
+ metadata_readable[i] = 1;
+ }
+ if (AFR_IS_ARBITER_BRICK(priv, ARBITER_BRICK_INDEX)) {
+ data_readable[ARBITER_BRICK_INDEX] = 0;
+ metadata_readable[ARBITER_BRICK_INDEX] = 0;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (replies) { /* Lookup */
+ if (!replies[i].valid || replies[i].op_ret == -1 ||
+ (replies[i].xdata &&
+ dict_get_sizen(replies[i].xdata, GLUSTERFS_BAD_INODE))) {
+ data_readable[i] = 0;
+ metadata_readable[i] = 0;
+ continue;
+ }
+
+ xdata = replies[i].xdata;
+ ia_type = replies[i].poststat.ia_type;
+ } else { /* pre-op xattrop */
+ xdata = local->transaction.changelog_xdata[i];
+ ia_type = inode->ia_type;
+ }
+
+ if (!xdata)
+ continue; /* mkdir_cbk sends NULL xdata_rsp. */
+ afr_accused_fill(this, xdata, data_accused,
+ (ia_type == IA_IFDIR) ? AFR_ENTRY_TRANSACTION
+ : AFR_DATA_TRANSACTION);
+
+ afr_accused_fill(this, xdata, metadata_accused,
+ AFR_METADATA_TRANSACTION);
+ }
+
+ if (replies && ia_type != IA_INVAL && ia_type != IA_IFDIR &&
+ /* We want to accuse small files only when we know for
+ * sure that there is no IO happening. Otherwise, the
+ * ia_sizes obtained in post-refresh replies may
+ * mismatch due to a race between inode-refresh and
+ * ongoing writes, causing spurious heal launches*/
+ !afr_is_possibly_under_txn(AFR_DATA_TRANSACTION, local, this)) {
+ afr_accuse_smallfiles(this, replies, data_accused);
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (data_accused[i]) {
+ data_readable[i] = 0;
+ ret = 1;
+ }
+ if (metadata_accused[i]) {
+ metadata_readable[i] = 0;
+ ret = 1;
+ }
+ }
+ return ret;
}
-unsigned int
-afr_locked_children_count (unsigned char *children, unsigned int child_count)
+int
+afr_replies_interpret(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ gf_boolean_t *start_heal)
{
- return afr_set_elem_count_get (children, child_count);
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ struct afr_reply *replies = NULL;
+ int event_generation = 0;
+ int i = 0;
+ unsigned char *data_accused = NULL;
+ unsigned char *metadata_accused = NULL;
+ unsigned char *data_readable = NULL;
+ unsigned char *metadata_readable = NULL;
+ int ret = 0;
+
+ local = frame->local;
+ priv = this->private;
+ replies = local->replies;
+ event_generation = local->event_generation;
+
+ data_accused = alloca0(priv->child_count);
+ data_readable = alloca0(priv->child_count);
+ metadata_accused = alloca0(priv->child_count);
+ metadata_readable = alloca0(priv->child_count);
+
+ ret = afr_readables_fill(frame, this, inode, data_accused, metadata_accused,
+ data_readable, metadata_readable, replies);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (start_heal && priv->child_up[i] &&
+ (data_accused[i] || metadata_accused[i])) {
+ *start_heal = _gf_true;
+ break;
+ }
+ }
+ afr_inode_read_subvol_set(inode, this, data_readable, metadata_readable,
+ event_generation);
+ return ret;
}
-unsigned int
-afr_pre_op_done_children_count (unsigned char *pre_op,
- unsigned int child_count)
+int
+afr_refresh_selfheal_done(int ret, call_frame_t *heal, void *opaque)
{
- return afr_set_elem_count_get (pre_op, child_count);
+ if (heal)
+ AFR_STACK_DESTROY(heal);
+ return 0;
}
-gf_boolean_t
-afr_is_fresh_lookup (loc_t *loc, xlator_t *this)
+int
+afr_inode_refresh_err(call_frame_t *frame, xlator_t *this)
{
- uint64_t ctx = 0;
- int32_t ret = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int err = 0;
- GF_ASSERT (loc);
- GF_ASSERT (this);
- GF_ASSERT (loc->inode);
+ local = frame->local;
+ priv = this->private;
- ret = inode_ctx_get (loc->inode, this, &ctx);
- if (0 == ret)
- return _gf_false;
- return _gf_true;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].valid && !local->replies[i].op_ret) {
+ err = 0;
+ goto ret;
+ }
+ }
+
+ err = afr_final_errno(local, priv);
+ret:
+ return err;
}
-void
-afr_update_loc_gfids (loc_t *loc, struct iatt *buf, struct iatt *postparent)
+gf_boolean_t
+afr_selfheal_enabled(const xlator_t *this)
{
- GF_ASSERT (loc);
- GF_ASSERT (buf);
+ const afr_private_t *priv = this->private;
- uuid_copy (loc->gfid, buf->ia_gfid);
- if (postparent)
- uuid_copy (loc->pargfid, postparent->ia_gfid);
+ return priv->data_self_heal || priv->metadata_self_heal ||
+ priv->entry_self_heal;
}
int
-afr_lookup_build_response_params (afr_local_t *local, xlator_t *this)
+afr_txn_refresh_done(call_frame_t *frame, xlator_t *this, int err)
{
- struct iatt *buf = NULL;
- struct iatt *postparent = NULL;
- dict_t **xattr = NULL;
- int32_t *success_children = NULL;
- int32_t *sources = NULL;
- afr_private_t *priv = NULL;
- int32_t read_child = -1;
- int ret = 0;
- int i = 0;
+ call_frame_t *heal_frame = NULL;
+ afr_local_t *heal_local = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ inode_t *inode = NULL;
+ int event_generation = 0;
+ int read_subvol = -1;
+ int ret = 0;
+
+ local = frame->local;
+ inode = local->inode;
+ priv = this->private;
+
+ if (err)
+ goto refresh_done;
+
+ if (local->op == GF_FOP_LOOKUP)
+ goto refresh_done;
+
+ ret = afr_inode_get_readable(frame, inode, this, local->readable,
+ &event_generation, local->transaction.type);
+
+ if (ret == -EIO) {
+ /* No readable subvolume even after refresh ==> splitbrain.*/
+ if (!priv->fav_child_policy) {
+ err = EIO;
+ goto refresh_done;
+ }
+ read_subvol = afr_sh_get_fav_by_policy(this, local->replies, inode,
+ NULL);
+ if (read_subvol == -1) {
+ err = EIO;
+ goto refresh_done;
+ }
+
+ heal_frame = afr_frame_create(this, NULL);
+ if (!heal_frame) {
+ err = EIO;
+ goto refresh_done;
+ }
+ heal_local = heal_frame->local;
+ heal_local->xdata_req = dict_new();
+ if (!heal_local->xdata_req) {
+ err = EIO;
+ AFR_STACK_DESTROY(heal_frame);
+ goto refresh_done;
+ }
+ heal_local->heal_frame = frame;
+ ret = synctask_new(this->ctx->env, afr_fav_child_reset_sink_xattrs,
+ afr_fav_child_reset_sink_xattrs_cbk, heal_frame,
+ heal_frame);
+ return 0;
+ }
- GF_ASSERT (local);
+refresh_done:
+ afr_local_replies_wipe(local, this->private);
+ local->refreshfn(frame, this, err);
- buf = &local->cont.lookup.buf;
- postparent = &local->cont.lookup.postparent;
- xattr = &local->cont.lookup.xattr;
- priv = this->private;
+ return 0;
+}
- read_child = afr_inode_get_read_ctx (this, local->cont.lookup.inode,
- local->fresh_children);
- if (read_child < 0) {
- ret = -1;
- goto out;
- }
- success_children = local->cont.lookup.success_children;
- sources = local->cont.lookup.sources;
- memset (sources, 0, sizeof (*sources) * priv->child_count);
- afr_children_intersection_get (local->fresh_children, success_children,
- sources, priv->child_count);
- if (!sources[read_child]) {
- read_child = -1;
- for (i = 0; i < priv->child_count; i++) {
- if (sources[i]) {
- read_child = i;
- break;
- }
- }
- }
- if (read_child < 0) {
- ret = -1;
- goto out;
+int
+afr_inode_refresh_done(call_frame_t *frame, xlator_t *this, int error)
+{
+ call_frame_t *heal_frame = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ gf_boolean_t start_heal = _gf_false;
+ afr_local_t *heal_local = NULL;
+ unsigned char *success_replies = NULL;
+ int ret = 0;
+
+ if (error != 0) {
+ goto refresh_done;
+ }
+
+ local = frame->local;
+ priv = this->private;
+ success_replies = alloca0(priv->child_count);
+ afr_fill_success_replies(local, priv, success_replies);
+
+ if (priv->thin_arbiter_count && local->is_read_txn &&
+ AFR_COUNT(success_replies, priv->child_count) != priv->child_count) {
+ /* We need to query the good bricks and/or thin-arbiter.*/
+ if (success_replies[0]) {
+ local->read_txn_query_child = AFR_CHILD_ZERO;
+ } else if (success_replies[1]) {
+ local->read_txn_query_child = AFR_CHILD_ONE;
+ }
+ error = EINVAL;
+ goto refresh_done;
+ }
+
+ if (!afr_has_quorum(success_replies, this, frame)) {
+ error = afr_final_errno(frame->local, this->private);
+ if (!error)
+ error = afr_quorum_errno(priv);
+ goto refresh_done;
+ }
+
+ ret = afr_replies_interpret(frame, this, local->refreshinode, &start_heal);
+
+ if (ret && afr_selfheal_enabled(this) && start_heal) {
+ heal_frame = afr_frame_create(this, NULL);
+ if (!heal_frame)
+ goto refresh_done;
+ heal_local = heal_frame->local;
+ heal_local->refreshinode = inode_ref(local->refreshinode);
+ heal_local->heal_frame = heal_frame;
+ if (!afr_throttled_selfheal(heal_frame, this)) {
+ AFR_STACK_DESTROY(heal_frame);
+ goto refresh_done;
+ }
+ }
+
+refresh_done:
+ afr_txn_refresh_done(frame, this, error);
+
+ return 0;
+}
+
+void
+afr_inode_refresh_subvol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *buf,
+ dict_t *xdata, struct iatt *par)
+{
+ afr_local_t *local = NULL;
+ int call_child = (long)cookie;
+ int8_t need_heal = 1;
+ int call_count = 0;
+ int ret = 0;
+
+ local = frame->local;
+ local->replies[call_child].valid = 1;
+ local->replies[call_child].op_ret = op_ret;
+ local->replies[call_child].op_errno = op_errno;
+ if (op_ret != -1) {
+ local->replies[call_child].poststat = *buf;
+ if (par)
+ local->replies[call_child].postparent = *par;
+ if (xdata)
+ local->replies[call_child].xdata = dict_ref(xdata);
+ }
+
+ if (xdata) {
+ ret = dict_get_int8(xdata, "link-count", &need_heal);
+ if (ret) {
+ gf_msg_debug(this->name, -ret, "Unable to get link count");
}
- gf_log (this->name, GF_LOG_DEBUG, "Building lookup response from %d",
- read_child);
- if (!*xattr)
- *xattr = dict_ref (local->cont.lookup.xattrs[read_child]);
- *buf = local->cont.lookup.bufs[read_child];
- *postparent = local->cont.lookup.postparents[read_child];
+ }
- if (IA_INVAL == local->cont.lookup.inode->ia_type) {
- /* fix for RT #602 */
- local->cont.lookup.inode->ia_type = buf->ia_type;
+ local->replies[call_child].need_heal = need_heal;
+ call_count = afr_frame_return(frame);
+ if (call_count == 0) {
+ afr_set_need_heal(this, local);
+ ret = afr_inode_refresh_err(frame, this);
+ if (ret) {
+ gf_msg_debug(this->name, ret, "afr_inode_refresh_err failed");
}
-out:
- return ret;
+ afr_inode_refresh_done(frame, this, ret);
+ }
}
-static void
-afr_lookup_update_lk_counts (afr_local_t *local, xlator_t *this,
- int child_index, dict_t *xattr)
-{
- uint32_t inodelk_count = 0;
- uint32_t entrylk_count = 0;
- int ret = -1;
- uint32_t parent_entrylk = 0;
-
- GF_ASSERT (local);
- GF_ASSERT (this);
- GF_ASSERT (xattr);
- GF_ASSERT (child_index >= 0);
-
- ret = dict_get_uint32 (xattr, GLUSTERFS_INODELK_COUNT,
- &inodelk_count);
- if (ret == 0)
- local->inodelk_count += inodelk_count;
-
- ret = dict_get_uint32 (xattr, GLUSTERFS_ENTRYLK_COUNT,
- &entrylk_count);
- if (ret == 0)
- local->entrylk_count += entrylk_count;
- ret = dict_get_uint32 (xattr, GLUSTERFS_PARENT_ENTRYLK,
- &parent_entrylk);
- if (!ret)
- local->cont.lookup.parent_entrylk += parent_entrylk;
+int
+afr_inode_refresh_subvol_with_lookup_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret,
+ int op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata,
+ struct iatt *par)
+{
+ afr_inode_refresh_subvol_cbk(frame, cookie, this, op_ret, op_errno, buf,
+ xdata, par);
+ return 0;
}
-static void
-afr_lookup_set_self_heal_params_by_xattr (afr_local_t *local, xlator_t *this,
- dict_t *xattr)
+int
+afr_inode_refresh_subvol_with_lookup(call_frame_t *frame, xlator_t *this, int i,
+ inode_t *inode, uuid_t gfid, dict_t *xdata)
{
- GF_ASSERT (local);
- GF_ASSERT (this);
- GF_ASSERT (xattr);
+ loc_t loc = {
+ 0,
+ };
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ loc.inode = inode;
+ if (gf_uuid_is_null(inode->gfid) && gfid) {
+ /* To handle setattr/setxattr on yet to be linked inode from
+ * dht */
+ gf_uuid_copy(loc.gfid, gfid);
+ } else {
+ gf_uuid_copy(loc.gfid, inode->gfid);
+ }
+
+ STACK_WIND_COOKIE(frame, afr_inode_refresh_subvol_with_lookup_cbk,
+ (void *)(long)i, priv->children[i],
+ priv->children[i]->fops->lookup, &loc, xdata);
+ return 0;
+}
- if (afr_sh_has_metadata_pending (xattr, this)) {
- local->self_heal.do_metadata_self_heal = _gf_true;
- gf_log(this->name, GF_LOG_DEBUG,
- "metadata self-heal is pending for %s.",
- local->loc.path);
- }
+int
+afr_inode_refresh_subvol_with_fstat_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ afr_inode_refresh_subvol_cbk(frame, cookie, this, op_ret, op_errno, buf,
+ xdata, NULL);
+ return 0;
+}
- if (afr_sh_has_entry_pending (xattr, this)) {
- local->self_heal.do_entry_self_heal = _gf_true;
- gf_log(this->name, GF_LOG_DEBUG,
- "entry self-heal is pending for %s.", local->loc.path);
- }
+int
+afr_inode_refresh_subvol_with_fstat(call_frame_t *frame, xlator_t *this, int i,
+ dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
- if (afr_sh_has_data_pending (xattr, this)) {
- local->self_heal.do_data_self_heal = _gf_true;
- gf_log(this->name, GF_LOG_DEBUG,
- "data self-heal is pending for %s.", local->loc.path);
- }
+ priv = this->private;
+ local = frame->local;
+
+ STACK_WIND_COOKIE(frame, afr_inode_refresh_subvol_with_fstat_cbk,
+ (void *)(long)i, priv->children[i],
+ priv->children[i]->fops->fstat, local->fd, xdata);
+ return 0;
}
-static void
-afr_detect_self_heal_by_iatt (afr_local_t *local, xlator_t *this,
- struct iatt *buf, struct iatt *lookup_buf)
+int
+afr_inode_refresh_do(call_frame_t *frame, xlator_t *this)
{
- if (PERMISSION_DIFFERS (buf, lookup_buf)) {
- /* mismatching permissions */
- gf_log (this->name, GF_LOG_DEBUG,
- "permissions differ for %s ", local->loc.path);
- local->self_heal.do_metadata_self_heal = _gf_true;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+ int i = 0;
+ int ret = 0;
+ dict_t *xdata = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ unsigned char *wind_subvols = NULL;
+
+ priv = this->private;
+ local = frame->local;
+ wind_subvols = alloca0(priv->child_count);
+
+ afr_local_replies_wipe(local, priv);
+
+ if (local->fd) {
+ fd_ctx = afr_fd_ctx_get(local->fd, this);
+ if (!fd_ctx) {
+ afr_inode_refresh_done(frame, this, EINVAL);
+ return 0;
}
+ }
- if (OWNERSHIP_DIFFERS (buf, lookup_buf)) {
- /* mismatching permissions */
- local->self_heal.do_metadata_self_heal = _gf_true;
- gf_log (this->name, GF_LOG_DEBUG,
- "ownership differs for %s ", local->loc.path);
- }
+ xdata = dict_new();
+ if (!xdata) {
+ afr_inode_refresh_done(frame, this, ENOMEM);
+ return 0;
+ }
- if (SIZE_DIFFERS (buf, lookup_buf)
- && IA_ISREG (buf->ia_type)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "size differs for %s ", local->loc.path);
- local->self_heal.do_data_self_heal = _gf_true;
- }
+ ret = afr_xattr_req_prepare(this, xdata);
+ if (ret != 0) {
+ dict_unref(xdata);
+ afr_inode_refresh_done(frame, this, -ret);
+ return 0;
+ }
- if (uuid_compare (buf->ia_gfid, lookup_buf->ia_gfid)) {
- /* mismatching gfid */
- gf_log (this->name, GF_LOG_WARNING,
- "%s: gfid different on subvolume", local->loc.path);
- }
-}
+ ret = dict_set_sizen_str_sizen(xdata, "link-count", GF_XATTROP_INDEX_COUNT);
+ if (ret) {
+ gf_msg_debug(this->name, -ret, "Unable to set link-count in dict ");
+ }
-static void
-afr_detect_self_heal_by_lookup_status (afr_local_t *local, xlator_t *this,
- gf_boolean_t split_brain)
-{
- GF_ASSERT (local);
- GF_ASSERT (this);
-
- if ((local->success_count > 0) && (local->enoent_count > 0)) {
- local->self_heal.do_metadata_self_heal = _gf_true;
- local->self_heal.do_data_self_heal = _gf_true;
- local->self_heal.do_entry_self_heal = _gf_true;
- local->self_heal.do_gfid_self_heal = _gf_true;
- local->self_heal.do_missing_entry_self_heal = _gf_true;
- gf_log(this->name, GF_LOG_DEBUG,
- "entries are missing in lookup of %s.",
- local->loc.path);
- //If all self-heals are needed no need to check for other rules
- goto out;
- }
+ ret = dict_set_str_sizen(xdata, GLUSTERFS_INODELK_DOM_COUNT, this->name);
+ if (ret) {
+ gf_msg_debug(this->name, -ret,
+ "Unable to set inodelk-dom-count in dict ");
+ }
- if ((local->success_count > 0) && split_brain &&
- IA_ISREG (local->cont.lookup.inode->ia_type)) {
- local->self_heal.do_data_self_heal = _gf_true;
- local->self_heal.do_metadata_self_heal = _gf_true;
- gf_log (this->name, GF_LOG_DEBUG,
- "split brain detected during lookup of %s.",
- local->loc.path);
+ if (local->fd) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i] && fd_ctx->opened_on[i] == AFR_FD_OPENED)
+ wind_subvols[i] = 1;
}
+ } else {
+ memcpy(wind_subvols, local->child_up,
+ sizeof(*local->child_up) * priv->child_count);
+ }
-out:
- return;
-}
+ local->call_count = AFR_COUNT(wind_subvols, priv->child_count);
-gf_boolean_t
-afr_can_self_heal_proceed (afr_self_heal_t *sh, afr_private_t *priv)
-{
- GF_ASSERT (sh);
- GF_ASSERT (priv);
+ call_count = local->call_count;
+ if (!call_count) {
+ dict_unref(xdata);
+ if (local->fd && AFR_COUNT(local->child_up, priv->child_count))
+ afr_inode_refresh_done(frame, this, EBADFD);
+ else
+ afr_inode_refresh_done(frame, this, ENOTCONN);
+ return 0;
+ }
+ for (i = 0; i < priv->child_count; i++) {
+ if (!wind_subvols[i])
+ continue;
- return (sh->do_gfid_self_heal
- || sh->do_missing_entry_self_heal
- || (afr_data_self_heal_enabled (priv->data_self_heal) &&
- sh->do_data_self_heal)
- || (priv->metadata_self_heal && sh->do_metadata_self_heal)
- || (priv->entry_self_heal && sh->do_entry_self_heal));
-}
+ if (local->fd)
+ afr_inode_refresh_subvol_with_fstat(frame, this, i, xdata);
+ else
+ afr_inode_refresh_subvol_with_lookup(
+ frame, this, i, local->refreshinode, local->refreshgfid, xdata);
-afr_transaction_type
-afr_transaction_type_get (ia_type_t ia_type)
-{
- afr_transaction_type type = AFR_METADATA_TRANSACTION;
+ if (!--call_count)
+ break;
+ }
- GF_ASSERT (ia_type != IA_INVAL);
+ dict_unref(xdata);
- if (IA_ISDIR (ia_type)) {
- type = AFR_ENTRY_TRANSACTION;
- } else if (IA_ISREG (ia_type)) {
- type = AFR_DATA_TRANSACTION;
- }
- return type;
+ return 0;
}
int
-afr_lookup_select_read_child (afr_local_t *local, xlator_t *this,
- int32_t *read_child)
+afr_inode_refresh(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ uuid_t gfid, afr_inode_refresh_cbk_t refreshfn)
{
- ia_type_t ia_type = IA_INVAL;
- int32_t source = -1;
- int ret = -1;
- dict_t **xattrs = NULL;
- int32_t *success_children = NULL;
- afr_transaction_type type = AFR_METADATA_TRANSACTION;
+ afr_local_t *local = NULL;
- GF_ASSERT (local);
- GF_ASSERT (this);
- GF_ASSERT (local->success_count > 0);
+ local = frame->local;
- success_children = local->cont.lookup.success_children;
- /*We can take the success_children[0] only because we already
- *handle the conflicting children other wise, we could select the
- *read_child based on wrong file type
- */
- ia_type = local->cont.lookup.bufs[success_children[0]].ia_type;
- type = afr_transaction_type_get (ia_type);
- xattrs = local->cont.lookup.xattrs;
- source = afr_lookup_select_read_child_by_txn_type (this, local, xattrs,
- type);
- if (source < 0) {
- gf_log (this->name, GF_LOG_DEBUG, "failed to select source "
- "for %s", local->loc.path);
- goto out;
- }
+ local->refreshfn = refreshfn;
- gf_log (this->name, GF_LOG_DEBUG, "Source selected as %d for %s",
- source, local->loc.path);
- *read_child = source;
- ret = 0;
-out:
- return ret;
-}
+ if (local->refreshinode) {
+ inode_unref(local->refreshinode);
+ local->refreshinode = NULL;
+ }
-static inline gf_boolean_t
-afr_is_transaction_running (afr_local_t *local)
-{
- GF_ASSERT (local->fop == GF_FOP_LOOKUP);
- return ((local->inodelk_count > 0) || (local->entrylk_count > 0));
-}
+ local->refreshinode = inode_ref(inode);
-void
-afr_launch_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode,
- gf_boolean_t background, ia_type_t ia_type, char *reason,
- void (*gfid_sh_success_cbk) (call_frame_t *sh_frame,
- xlator_t *this),
- int (*unwind) (call_frame_t *frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- int32_t sh_failed))
-{
- afr_local_t *local = NULL;
- char sh_type_str[256] = {0,};
- char *bg = "";
-
- GF_ASSERT (frame);
- GF_ASSERT (this);
- GF_ASSERT (inode);
- GF_ASSERT (ia_type != IA_INVAL);
+ if (gfid)
+ gf_uuid_copy(local->refreshgfid, gfid);
+ else
+ gf_uuid_clear(local->refreshgfid);
- local = frame->local;
- local->self_heal.background = background;
- local->self_heal.type = ia_type;
- local->self_heal.unwind = unwind;
- local->self_heal.gfid_sh_success_cbk = gfid_sh_success_cbk;
-
- afr_self_heal_type_str_get (&local->self_heal,
- sh_type_str,
- sizeof (sh_type_str));
-
- if (background)
- bg = "background";
- gf_log (this->name, GF_LOG_DEBUG,
- "%s %s self-heal triggered. path: %s, reason: %s", bg,
- sh_type_str, local->loc.path, reason);
-
- afr_self_heal (frame, this, inode);
-}
-
-unsigned int
-afr_gfid_missing_count (const char *xlator_name, int32_t *success_children,
- struct iatt *bufs, unsigned int child_count,
- const char *path)
-{
- unsigned int gfid_miss_count = 0;
- int i = 0;
- struct iatt *child1 = NULL;
-
- for (i = 0; i < child_count; i++) {
- if (success_children[i] == -1)
- break;
- child1 = &bufs[success_children[i]];
- if (uuid_is_null (child1->ia_gfid)) {
- gf_log (xlator_name, GF_LOG_DEBUG, "%s: gfid is null"
- " on subvolume %d", path, success_children[i]);
- gfid_miss_count++;
- }
- }
+ afr_inode_refresh_do(frame, this);
- return gfid_miss_count;
+ return 0;
}
-static int
-afr_lookup_gfid_missing_count (afr_local_t *local, xlator_t *this)
+int
+afr_xattr_req_prepare(xlator_t *this, dict_t *xattr_req)
{
- int32_t *success_children = NULL;
- afr_private_t *priv = NULL;
- struct iatt *bufs = NULL;
- int miss_count = 0;
+ int i = 0;
+ afr_private_t *priv = NULL;
+ int ret = 0;
- priv = this->private;
- bufs = local->cont.lookup.bufs;
- success_children = local->cont.lookup.success_children;
+ priv = this->private;
- miss_count = afr_gfid_missing_count (this->name, success_children,
- bufs, priv->child_count,
- local->loc.path);
- return miss_count;
+ for (i = 0; i < priv->child_count; i++) {
+ ret = dict_set_uint64(xattr_req, priv->pending_key[i],
+ AFR_NUM_CHANGE_LOGS * sizeof(int));
+ if (ret < 0)
+ gf_msg(this->name, GF_LOG_WARNING, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Unable to set dict value for %s", priv->pending_key[i]);
+ /* 3 = data+metadata+entry */
+ }
+ ret = dict_set_uint64(xattr_req, AFR_DIRTY,
+ AFR_NUM_CHANGE_LOGS * sizeof(int));
+ if (ret) {
+ gf_msg_debug(this->name, -ret,
+ "failed to set dirty "
+ "query flag");
+ }
+
+ ret = dict_set_int32_sizen(xattr_req, "list-xattr", 1);
+ if (ret) {
+ gf_msg_debug(this->name, -ret, "Unable to set list-xattr in dict ");
+ }
+
+ return ret;
}
-gf_boolean_t
-afr_conflicting_iattrs (struct iatt *bufs, int32_t *success_children,
- unsigned int child_count, const char *path,
- const char *xlator_name)
-{
- gf_boolean_t conflicting = _gf_false;
- int i = 0;
- struct iatt *child1 = NULL;
- struct iatt *child2 = NULL;
- uuid_t *gfid = NULL;
-
- for (i = 0; i < child_count; i++) {
- if (success_children[i] == -1)
- break;
- child1 = &bufs[success_children[i]];
- if ((!gfid) && (!uuid_is_null (child1->ia_gfid)))
- gfid = &child1->ia_gfid;
-
- if (i == 0)
- continue;
-
- child2 = &bufs[success_children[i-1]];
- if (FILETYPE_DIFFERS (child1, child2)) {
- gf_log (xlator_name, GF_LOG_WARNING, "%s: filetype "
- "differs on subvolumes (%d, %d)", path,
- success_children[i-1], success_children[i]);
- conflicting = _gf_true;
- goto out;
- }
- if (!gfid || uuid_is_null (child1->ia_gfid))
- continue;
- if (uuid_compare (*gfid, child1->ia_gfid)) {
- gf_log (xlator_name, GF_LOG_WARNING, "%s: gfid differs"
- " on subvolume %d", path, success_children[i]);
- conflicting = _gf_true;
- goto out;
- }
- }
+int
+afr_lookup_xattr_req_prepare(afr_local_t *local, xlator_t *this,
+ dict_t *xattr_req, loc_t *loc)
+{
+ int ret = -ENOMEM;
+
+ if (!local->xattr_req)
+ local->xattr_req = dict_new();
+
+ if (!local->xattr_req)
+ goto out;
+
+ if (xattr_req && (xattr_req != local->xattr_req))
+ dict_copy(xattr_req, local->xattr_req);
+
+ ret = afr_xattr_req_prepare(this, local->xattr_req);
+
+ ret = dict_set_uint64(local->xattr_req, GLUSTERFS_INODELK_COUNT, 0);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, AFR_MSG_DICT_SET_FAILED,
+ "%s: Unable to set dict value for %s", loc->path,
+ GLUSTERFS_INODELK_COUNT);
+ }
+ ret = dict_set_uint64(local->xattr_req, GLUSTERFS_ENTRYLK_COUNT, 0);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, AFR_MSG_DICT_SET_FAILED,
+ "%s: Unable to set dict value for %s", loc->path,
+ GLUSTERFS_ENTRYLK_COUNT);
+ }
+
+ ret = dict_set_uint32(local->xattr_req, GLUSTERFS_PARENT_ENTRYLK, 0);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, AFR_MSG_DICT_SET_FAILED,
+ "%s: Unable to set dict value for %s", loc->path,
+ GLUSTERFS_PARENT_ENTRYLK);
+ }
+
+ ret = dict_set_sizen_str_sizen(local->xattr_req, "link-count",
+ GF_XATTROP_INDEX_COUNT);
+ if (ret) {
+ gf_msg_debug(this->name, -ret, "Unable to set link-count in dict ");
+ }
+
+ ret = 0;
out:
- return conflicting;
+ return ret;
}
-/* afr_update_gfid_from_iatts: This function should be called only if the
- * iatts are not conflicting.
- */
-void
-afr_update_gfid_from_iatts (uuid_t uuid, struct iatt *bufs,
- int32_t *success_children, unsigned int child_count)
-{
- uuid_t *gfid = NULL;
- int i = 0;
- int child = 0;
-
- for (i = 0; i < child_count; i++) {
- child = success_children[i];
- if (child == -1)
- break;
- if ((!gfid) && (!uuid_is_null (bufs[child].ia_gfid))) {
- gfid = &bufs[child].ia_gfid;
- } else if (gfid && (!uuid_is_null (bufs[child].ia_gfid))) {
- if (uuid_compare (*gfid, bufs[child].ia_gfid)) {
- GF_ASSERT (0);
- goto out;
- }
- }
- }
- if (gfid && (!uuid_is_null (*gfid)))
- uuid_copy (uuid, *gfid);
-out:
- return;
+int
+afr_least_pending_reads_child(afr_private_t *priv, unsigned char *readable)
+{
+ int i = 0;
+ int child = -1;
+ int64_t read_iter = -1;
+ int64_t pending_read = -1;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (AFR_IS_ARBITER_BRICK(priv, i) || !readable[i])
+ continue;
+ read_iter = GF_ATOMIC_GET(priv->pending_reads[i]);
+ if (child == -1 || read_iter < pending_read) {
+ pending_read = read_iter;
+ child = i;
+ }
+ }
+
+ return child;
}
-static gf_boolean_t
-afr_lookup_conflicting_entries (afr_local_t *local, xlator_t *this)
+static int32_t
+afr_least_latency_child(afr_private_t *priv, unsigned char *readable)
{
- afr_private_t *priv = NULL;
- gf_boolean_t conflict = _gf_false;
+ int32_t i = 0;
+ int child = -1;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (AFR_IS_ARBITER_BRICK(priv, i) || !readable[i] ||
+ priv->child_latency[i] < 0)
+ continue;
- priv = this->private;
- conflict = afr_conflicting_iattrs (local->cont.lookup.bufs,
- local->cont.lookup.success_children,
- priv->child_count, local->loc.path,
- this->name);
- return conflict;
+ if (child == -1 ||
+ priv->child_latency[i] < priv->child_latency[child]) {
+ child = i;
+ }
+ }
+ return child;
}
-gf_boolean_t
-afr_open_only_data_self_heal (char *data_self_heal)
+static int32_t
+afr_least_latency_times_pending_reads_child(afr_private_t *priv,
+ unsigned char *readable)
{
- return !strcmp (data_self_heal, "open");
+ int32_t i = 0;
+ int child = -1;
+ int64_t pending_read = 0;
+ int64_t latency = -1;
+ int64_t least_latency = -1;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (AFR_IS_ARBITER_BRICK(priv, i) || !readable[i] ||
+ priv->child_latency[i] < 0)
+ continue;
+
+ pending_read = GF_ATOMIC_GET(priv->pending_reads[i]);
+ latency = (pending_read + 1) * priv->child_latency[i];
+
+ if (child == -1 || latency < least_latency) {
+ least_latency = latency;
+ child = i;
+ }
+ }
+ return child;
}
-gf_boolean_t
-afr_data_self_heal_enabled (char *data_self_heal)
+int
+afr_hash_child(afr_read_subvol_args_t *args, afr_private_t *priv,
+ unsigned char *readable)
{
- gf_boolean_t enabled = _gf_false;
-
- if (gf_string2boolean (data_self_heal, &enabled) == -1) {
- enabled = !strcmp (data_self_heal, "open");
- GF_ASSERT (enabled);
- }
-
- return enabled;
+ uuid_t gfid_copy = {
+ 0,
+ };
+ pid_t pid;
+ int child = -1;
+
+ switch (priv->hash_mode) {
+ case AFR_READ_POLICY_FIRST_UP:
+ break;
+ case AFR_READ_POLICY_GFID_HASH:
+ gf_uuid_copy(gfid_copy, args->gfid);
+ child = SuperFastHash((char *)gfid_copy, sizeof(gfid_copy)) %
+ priv->child_count;
+ break;
+ case AFR_READ_POLICY_GFID_PID_HASH:
+ if (args->ia_type != IA_IFDIR) {
+ /*
+ * Why getpid? Because it's one of the cheapest calls
+ * available - faster than gethostname etc. - and
+ * returns a constant-length value that's sure to be
+ * shorter than a UUID. It's still very unlikely to be
+ * the same across clients, so it still provides good
+ * mixing. We're not trying for perfection here. All we
+ * need is a low probability that multiple clients
+ * won't converge on the same subvolume.
+ */
+ gf_uuid_copy(gfid_copy, args->gfid);
+ pid = getpid();
+ *(pid_t *)gfid_copy ^= pid;
+ }
+ child = SuperFastHash((char *)gfid_copy, sizeof(gfid_copy)) %
+ priv->child_count;
+ break;
+ case AFR_READ_POLICY_LESS_LOAD:
+ child = afr_least_pending_reads_child(priv, readable);
+ break;
+ case AFR_READ_POLICY_LEAST_LATENCY:
+ child = afr_least_latency_child(priv, readable);
+ break;
+ case AFR_READ_POLICY_LOAD_LATENCY_HYBRID:
+ child = afr_least_latency_times_pending_reads_child(priv, readable);
+ break;
+ }
+
+ return child;
}
-static void
-afr_lookup_set_self_heal_params (afr_local_t *local, xlator_t *this)
+int
+afr_read_subvol_select_by_policy(inode_t *inode, xlator_t *this,
+ unsigned char *readable,
+ afr_read_subvol_args_t *args)
{
- int i = 0;
- struct iatt *bufs = NULL;
- dict_t **xattr = NULL;
- afr_private_t *priv = NULL;
- int32_t child1 = -1;
- int32_t child2 = -1;
- afr_self_heal_t *sh = NULL;
- gf_boolean_t split_brain = _gf_false;
-
- priv = this->private;
- sh = &local->self_heal;
-
- split_brain = afr_is_split_brain (this, local->cont.lookup.inode);
- split_brain = split_brain || local->cont.lookup.possible_spb;
- afr_detect_self_heal_by_lookup_status (local, this, split_brain);
-
- if (afr_lookup_gfid_missing_count (local, this))
- local->self_heal.do_gfid_self_heal = _gf_true;
-
- if (_gf_true == afr_lookup_conflicting_entries (local, this))
- local->self_heal.do_missing_entry_self_heal = _gf_true;
- else
- afr_update_gfid_from_iatts (local->self_heal.sh_gfid_req,
- local->cont.lookup.bufs,
- local->cont.lookup.success_children,
- priv->child_count);
-
- bufs = local->cont.lookup.bufs;
- for (i = 1; i < local->success_count; i++) {
- child1 = local->cont.lookup.success_children[i-1];
- child2 = local->cont.lookup.success_children[i];
- afr_detect_self_heal_by_iatt (local, this,
- &bufs[child1], &bufs[child2]);
- }
-
- xattr = local->cont.lookup.xattrs;
- for (i = 0; i < local->success_count; i++) {
- child1 = local->cont.lookup.success_children[i];
- afr_lookup_set_self_heal_params_by_xattr (local, this,
- xattr[child1]);
- }
- if (afr_open_only_data_self_heal (priv->data_self_heal)
- && !split_brain)
- sh->do_data_self_heal = _gf_false;
+ int i = 0;
+ int read_subvol = -1;
+ afr_private_t *priv = NULL;
+ afr_read_subvol_args_t local_args = {
+ 0,
+ };
+
+ priv = this->private;
+
+ /* first preference - explicitly specified or local subvolume */
+ if (priv->read_child >= 0 && readable[priv->read_child])
+ return priv->read_child;
+
+ if (inode_is_linked(inode)) {
+ gf_uuid_copy(local_args.gfid, inode->gfid);
+ local_args.ia_type = inode->ia_type;
+ } else if (args) {
+ local_args = *args;
+ }
+
+ /* second preference - use hashed mode */
+ read_subvol = afr_hash_child(&local_args, priv, readable);
+ if (read_subvol >= 0 && readable[read_subvol])
+ return read_subvol;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (readable[i])
+ return i;
+ }
+
+ /* no readable subvolumes, either split brain or all subvols down */
+
+ return -1;
}
int
-afr_self_heal_lookup_unwind (call_frame_t *frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- int32_t sh_failed)
+afr_inode_read_subvol_type_get(inode_t *inode, xlator_t *this,
+ unsigned char *readable, int *event_p, int type)
{
- afr_local_t *local = NULL;
- int ret = -1;
- dict_t *xattr = NULL;
-
- local = frame->local;
+ int ret = -1;
- if (op_ret == -1) {
- local->op_ret = -1;
- if (afr_error_more_important (local->op_errno, op_errno))
- local->op_errno = op_errno;
-
- goto out;
- } else {
- local->op_ret = 0;
- }
-
- afr_lookup_done_success_action (frame, this, _gf_true);
- xattr = local->cont.lookup.xattr;
- if (xattr) {
- ret = dict_set_int32 (xattr, "sh-failed", sh_failed);
- if (ret)
- gf_log (this->name, GF_LOG_ERROR, "%s: Failed to set "
- "sh-failed to %d", local->loc.path, sh_failed);
-
- if (local->self_heal.actual_sh_started == _gf_true &&
- sh_failed == 0) {
- ret = dict_set_int32 (xattr, "actual-sh-done", 1);
- if (ret)
- gf_log(this->name, GF_LOG_ERROR, "%s: Failed to"
- " set actual-sh-done to %d",
- local->loc.path,
- local->self_heal.actual_sh_started);
- }
+ if (type == AFR_METADATA_TRANSACTION)
+ ret = afr_inode_read_subvol_get(inode, this, 0, readable, event_p);
+ else
+ ret = afr_inode_read_subvol_get(inode, this, readable, 0, event_p);
+ return ret;
+}
- }
-out:
- AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
- local->cont.lookup.inode, &local->cont.lookup.buf,
- local->cont.lookup.xattr,
- &local->cont.lookup.postparent);
+void
+afr_readables_intersect_get(inode_t *inode, xlator_t *this, int *event,
+ unsigned char *intersection)
+{
+ afr_private_t *priv = NULL;
+ unsigned char *data_readable = NULL;
+ unsigned char *metadata_readable = NULL;
+ unsigned char *intersect = NULL;
+
+ priv = this->private;
+ data_readable = alloca0(priv->child_count);
+ metadata_readable = alloca0(priv->child_count);
+ intersect = alloca0(priv->child_count);
+
+ afr_inode_read_subvol_get(inode, this, data_readable, metadata_readable,
+ event);
+
+ AFR_INTERSECT(intersect, data_readable, metadata_readable,
+ priv->child_count);
+ if (intersection)
+ memcpy(intersection, intersect,
+ sizeof(*intersection) * priv->child_count);
+}
- return 0;
+int
+afr_read_subvol_get(inode_t *inode, xlator_t *this, int *subvol_p,
+ unsigned char *readables, int *event_p,
+ afr_transaction_type type, afr_read_subvol_args_t *args)
+{
+ afr_private_t *priv = NULL;
+ unsigned char *readable = NULL;
+ unsigned char *intersection = NULL;
+ int subvol = -1;
+ int event = 0;
+
+ priv = this->private;
+
+ readable = alloca0(priv->child_count);
+ intersection = alloca0(priv->child_count);
+
+ afr_inode_read_subvol_type_get(inode, this, readable, &event, type);
+
+ afr_readables_intersect_get(inode, this, &event, intersection);
+
+ if (AFR_COUNT(intersection, priv->child_count) > 0)
+ subvol = afr_read_subvol_select_by_policy(inode, this, intersection,
+ args);
+ else
+ subvol = afr_read_subvol_select_by_policy(inode, this, readable, args);
+ if (subvol_p)
+ *subvol_p = subvol;
+ if (event_p)
+ *event_p = event;
+ if (readables)
+ memcpy(readables, readable, sizeof(*readables) * priv->child_count);
+ return subvol;
}
-//TODO: At the moment only lookup needs this, so not doing any checks, in the
-// future we will have to do fop specific operations
void
-afr_post_gfid_sh_success (call_frame_t *sh_frame, xlator_t *this)
+afr_local_transaction_cleanup(afr_local_t *local, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_local_t *sh_local = NULL;
- afr_private_t *priv = NULL;
- afr_self_heal_t *sh = NULL;
- int i = 0;
- struct iatt *lookup_bufs = NULL;
- struct iatt *lookup_parentbufs = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
- sh_local = sh_frame->local;
- sh = &sh_local->self_heal;
- local = sh->orig_frame->local;
- lookup_bufs = local->cont.lookup.bufs;
- lookup_parentbufs = local->cont.lookup.postparents;
- priv = this->private;
+ priv = this->private;
- memcpy (lookup_bufs, sh->buf, priv->child_count * sizeof (*sh->buf));
- memcpy (lookup_parentbufs, sh->parentbufs,
- priv->child_count * sizeof (*sh->parentbufs));
+ afr_matrix_cleanup(local->pending, priv->child_count);
- afr_reset_xattr (local->cont.lookup.xattrs, priv->child_count);
- if (local->cont.lookup.xattr) {
- dict_unref (local->cont.lookup.xattr);
- local->cont.lookup.xattr = NULL;
- }
+ GF_FREE(local->internal_lock.lower_locked_nodes);
- for (i = 0; i < priv->child_count; i++) {
- if (sh->xattr[i])
- local->cont.lookup.xattrs[i] = dict_ref (sh->xattr[i]);
- }
+ afr_lockees_cleanup(&local->internal_lock);
- afr_reset_children (local->cont.lookup.success_children,
- priv->child_count);
- afr_children_copy (local->cont.lookup.success_children,
- sh->fresh_children, priv->child_count);
-}
+ GF_FREE(local->transaction.pre_op);
-static void
-afr_lookup_perform_self_heal (call_frame_t *frame, xlator_t *this,
- gf_boolean_t *sh_launched)
-{
- unsigned int up_count = 0;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- char *reason = NULL;
-
- GF_ASSERT (sh_launched);
- *sh_launched = _gf_false;
- priv = this->private;
- local = frame->local;
-
- up_count = afr_up_children_count (local->child_up, priv->child_count);
- if (up_count == 1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Only 1 child up - do not attempt to detect self heal");
- goto out;
+ GF_FREE(local->transaction.pre_op_sources);
+ if (local->transaction.changelog_xdata) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->transaction.changelog_xdata[i])
+ continue;
+ dict_unref(local->transaction.changelog_xdata[i]);
}
+ GF_FREE(local->transaction.changelog_xdata);
+ }
- afr_lookup_set_self_heal_params (local, this);
- if (afr_can_self_heal_proceed (&local->self_heal, priv)) {
- if (afr_is_transaction_running (local))
- goto out;
+ GF_FREE(local->transaction.failed_subvols);
- reason = "lookup detected pending operations";
- afr_launch_self_heal (frame, this, local->cont.lookup.inode,
- _gf_true, local->cont.lookup.buf.ia_type,
- reason, afr_post_gfid_sh_success,
- afr_self_heal_lookup_unwind);
- *sh_launched = _gf_true;
- }
-out:
- return;
+ GF_FREE(local->transaction.basename);
+ GF_FREE(local->transaction.new_basename);
+
+ loc_wipe(&local->transaction.parent_loc);
+ loc_wipe(&local->transaction.new_parent_loc);
}
void
-afr_get_fresh_children (int32_t *success_children, int32_t *sources,
- int32_t *fresh_children, unsigned int child_count)
-{
- unsigned int i = 0;
- unsigned int j = 0;
-
- GF_ASSERT (success_children);
- GF_ASSERT (sources);
- GF_ASSERT (fresh_children);
-
- afr_reset_children (fresh_children, child_count);
- for (i = 0; i < child_count; i++) {
- if (success_children[i] == -1)
- break;
- if (afr_is_read_child (success_children, sources, child_count,
- success_children[i])) {
- fresh_children[j] = success_children[i];
- j++;
- }
- }
+afr_reply_wipe(struct afr_reply *reply)
+{
+ if (reply->xdata) {
+ dict_unref(reply->xdata);
+ reply->xdata = NULL;
+ }
+
+ if (reply->xattr) {
+ dict_unref(reply->xattr);
+ reply->xattr = NULL;
+ }
}
-static int
-afr_lookup_set_read_ctx (afr_local_t *local, xlator_t *this, int32_t read_child)
+void
+afr_replies_wipe(struct afr_reply *replies, int count)
{
- afr_private_t *priv = NULL;
+ int i = 0;
+
+ for (i = 0; i < count; i++) {
+ afr_reply_wipe(&replies[i]);
+ }
+}
- GF_ASSERT (read_child >= 0);
+void
+afr_local_replies_wipe(afr_local_t *local, afr_private_t *priv)
+{
+ if (!local->replies)
+ return;
- priv = this->private;
- afr_get_fresh_children (local->cont.lookup.success_children,
- local->cont.lookup.sources,
- local->fresh_children, priv->child_count);
- afr_inode_set_read_ctx (this, local->cont.lookup.inode, read_child,
- local->fresh_children);
+ afr_replies_wipe(local->replies, priv->child_count);
- return 0;
+ memset(local->replies, 0, sizeof(*local->replies) * priv->child_count);
}
-int
-afr_lookup_done_success_action (call_frame_t *frame, xlator_t *this,
- gf_boolean_t fail_conflict)
+static gf_boolean_t
+afr_fop_lock_is_unlock(call_frame_t *frame)
{
- int32_t read_child = -1;
- int32_t ret = -1;
- afr_local_t *local = NULL;
- gf_boolean_t fresh_lookup = _gf_false;
+ afr_local_t *local = frame->local;
+ switch (local->op) {
+ case GF_FOP_INODELK:
+ case GF_FOP_FINODELK:
+ if ((F_UNLCK == local->cont.inodelk.in_flock.l_type) &&
+ (local->cont.inodelk.in_cmd == F_SETLKW ||
+ local->cont.inodelk.in_cmd == F_SETLK))
+ return _gf_true;
+ break;
+ case GF_FOP_ENTRYLK:
+ case GF_FOP_FENTRYLK:
+ if (ENTRYLK_UNLOCK == local->cont.entrylk.in_cmd)
+ return _gf_true;
+ break;
+ default:
+ return _gf_false;
+ }
+ return _gf_false;
+}
- local = frame->local;
- fresh_lookup = local->cont.lookup.fresh_lookup;
+static gf_boolean_t
+afr_lk_is_unlock(int32_t cmd, struct gf_flock *flock)
+{
+ switch (cmd) {
+ case F_RESLK_UNLCK:
+ return _gf_true;
+ break;
- if (local->loc.parent == NULL)
- fail_conflict = _gf_true;
+#if F_SETLKW != F_SETLKW64
+ case F_SETLKW64:
+#endif
+ case F_SETLKW:
- if (afr_lookup_conflicting_entries (local, this)) {
- if (fail_conflict == _gf_false)
- ret = 0;
- goto out;
- }
+#if F_SETLK != F_SETLK64
+ case F_SETLK64:
+#endif
+ case F_SETLK:
+ if (F_UNLCK == flock->l_type)
+ return _gf_true;
+ break;
+ default:
+ return _gf_false;
+ }
+ return _gf_false;
+}
- ret = afr_lookup_select_read_child (local, this, &read_child);
- if (!afr_is_transaction_running (local) || fresh_lookup) {
- if (read_child < 0)
- goto out;
+void
+afr_handle_inconsistent_fop(call_frame_t *frame, int32_t *op_ret,
+ int32_t *op_errno)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
- ret = afr_lookup_set_read_ctx (local, this, read_child);
- if (ret)
- goto out;
- }
+ if (!frame || !frame->this || !frame->local || !frame->this->private)
+ return;
- ret = afr_lookup_build_response_params (local, this);
- if (ret)
- goto out;
- afr_update_loc_gfids (&local->loc,
- &local->cont.lookup.buf,
- &local->cont.lookup.postparent);
+ if (*op_ret < 0)
+ return;
- ret = 0;
-out:
- if (ret) {
- local->op_ret = -1;
- local->op_errno = EIO;
- }
- return ret;
-}
+ /* Failing inodelk/entrylk/lk here is not a good idea because we
+ * need to cleanup the locks on the other bricks if we choose to fail
+ * the fop here. The brick may go down just after unwind happens as well
+ * so anyways the fop will fail when the next fop is sent so leaving
+ * it like this for now.*/
+ local = frame->local;
+ switch (local->op) {
+ case GF_FOP_LOOKUP:
+ case GF_FOP_INODELK:
+ case GF_FOP_FINODELK:
+ case GF_FOP_ENTRYLK:
+ case GF_FOP_FENTRYLK:
+ case GF_FOP_LK:
+ return;
+ default:
+ break;
+ }
-int
-afr_lookup_get_latest_subvol (afr_local_t *local, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- int32_t *success_children = NULL;
- struct iatt *bufs = NULL;
- int i = 0;
- int child = 0;
- int lsubvol = -1;
+ priv = frame->this->private;
+ if (!priv->consistent_io)
+ return;
- priv = this->private;
- success_children = local->cont.lookup.success_children;
- bufs = local->cont.lookup.bufs;
- for (i = 0; i < priv->child_count; i++) {
- child = success_children[i];
- if (child == -1)
- break;
- if (uuid_is_null (bufs[child].ia_gfid))
- continue;
- if (lsubvol < 0) {
- lsubvol = child;
- } else if (bufs[lsubvol].ia_ctime < bufs[child].ia_ctime) {
- lsubvol = child;
- } else if ((bufs[lsubvol].ia_ctime == bufs[child].ia_ctime) &&
- (bufs[lsubvol].ia_ctime_nsec < bufs[child].ia_ctime_nsec)) {
- lsubvol = child;
- }
- }
- return lsubvol;
-}
+ if (local->event_generation &&
+ (local->event_generation != priv->event_generation))
+ goto inconsistent;
-void
-afr_lookup_mark_other_entries_stale (afr_local_t *local, xlator_t *this,
- int subvol)
-{
- afr_private_t *priv = NULL;
- int32_t *success_children = NULL;
- struct iatt *bufs = NULL;
- int i = 0;
- int child = 0;
-
- priv = this->private;
- success_children = local->cont.lookup.success_children;
- bufs = local->cont.lookup.bufs;
- memcpy (local->fresh_children, success_children,
- sizeof (*success_children) * priv->child_count);
- for (i = 0; i < priv->child_count; i++) {
- child = local->fresh_children[i];
- if (child == -1)
- break;
- if (child == subvol)
- continue;
- if (uuid_is_null (bufs[child].ia_gfid) &&
- (bufs[child].ia_type == bufs[subvol].ia_type))
- continue;
- afr_children_rm_child (success_children, child,
- priv->child_count);
- local->success_count--;
- }
- afr_reset_children (local->fresh_children, priv->child_count);
+ return;
+inconsistent:
+ *op_ret = -1;
+ *op_errno = ENOTCONN;
}
void
-afr_succeed_lookup_on_latest_iatt (afr_local_t *local, xlator_t *this)
+afr_local_cleanup(afr_local_t *local, xlator_t *this)
{
- int lsubvol = 0;
-
- if (!afr_lookup_conflicting_entries (local, this))
- goto out;
+ afr_private_t *priv = NULL;
- lsubvol = afr_lookup_get_latest_subvol (local, this);
- if (lsubvol < 0)
- goto out;
- afr_lookup_mark_other_entries_stale (local, this, lsubvol);
-out:
+ if (!local)
return;
-}
-static void
-afr_lookup_done (call_frame_t *frame, xlator_t *this)
-{
- int unwind = 1;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int ret = -1;
- gf_boolean_t sh_launched = _gf_false;
- gf_boolean_t fail_conflict = _gf_false;
- int gfid_miss_count = 0;
- int enotconn_count = 0;
- int up_children_count = 0;
-
- priv = this->private;
- local = frame->local;
+ syncbarrier_destroy(&local->barrier);
- if (local->op_ret < 0)
- goto unwind;
-
- if (local->cont.lookup.parent_entrylk && local->success_count > 1)
- afr_succeed_lookup_on_latest_iatt (local, this);
-
- gfid_miss_count = afr_lookup_gfid_missing_count (local, this);
- up_children_count = afr_up_children_count (local->child_up,
- priv->child_count);
- enotconn_count = priv->child_count - up_children_count;
- if ((gfid_miss_count == local->success_count) &&
- (enotconn_count > 0)) {
- local->op_ret = -1;
- local->op_errno = EIO;
- gf_log (this->name, GF_LOG_ERROR, "Failing lookup for %s, "
- "LOOKUP on a file without gfid is not allowed when "
- "some of the children are down", local->loc.path);
- goto unwind;
- }
-
- if ((gfid_miss_count == local->success_count) &&
- uuid_is_null (local->cont.lookup.gfid_req)) {
- local->op_ret = -1;
- local->op_errno = ENODATA;
- gf_log (this->name, GF_LOG_ERROR, "%s: No gfid present",
- local->loc.path);
- goto unwind;
- }
-
- if (gfid_miss_count && uuid_is_null (local->cont.lookup.gfid_req))
- fail_conflict = _gf_true;
- ret = afr_lookup_done_success_action (frame, this, fail_conflict);
- if (ret)
- goto unwind;
- uuid_copy (local->self_heal.sh_gfid_req, local->cont.lookup.gfid_req);
+ afr_local_transaction_cleanup(local, this);
- afr_lookup_perform_self_heal (frame, this, &sh_launched);
- if (sh_launched) {
- unwind = 0;
- goto unwind;
- }
+ priv = this->private;
- unwind:
- if (unwind) {
- AFR_STACK_UNWIND (lookup, frame, local->op_ret,
- local->op_errno, local->cont.lookup.inode,
- &local->cont.lookup.buf,
- local->cont.lookup.xattr,
- &local->cont.lookup.postparent);
- }
-}
+ loc_wipe(&local->loc);
+ loc_wipe(&local->newloc);
-/*
- * During a lookup, some errors are more "important" than
- * others in that they must be given higher priority while
- * returning to the user.
- *
- * The hierarchy is ESTALE > ENOENT > others
- *
- */
+ if (local->fd)
+ fd_unref(local->fd);
-gf_boolean_t
-afr_error_more_important (int32_t old_errno, int32_t new_errno)
-{
- gf_boolean_t ret = _gf_true;
+ if (local->xattr_req)
+ dict_unref(local->xattr_req);
- /* Nothing should ever overwrite ESTALE */
- if (old_errno == ESTALE)
- ret = _gf_false;
+ if (local->xattr_rsp)
+ dict_unref(local->xattr_rsp);
- /* Nothing should overwrite ENOENT, except ESTALE/EIO*/
- else if ((old_errno == ENOENT) && (new_errno != ESTALE)
- && (new_errno != EIO))
- ret = _gf_false;
+ if (local->dict)
+ dict_unref(local->dict);
- return ret;
-}
+ afr_local_replies_wipe(local, priv);
+ GF_FREE(local->replies);
-int32_t
-afr_resultant_errno_get (int32_t *children,
- int *child_errno, unsigned int child_count)
-{
- int i = 0;
- int32_t op_errno = 0;
- int child = 0;
-
- for (i = 0; i < child_count; i++) {
- if (children) {
- child = children[i];
- if (child == -1)
- break;
- } else {
- child = i;
- }
- if (afr_error_more_important (op_errno, child_errno[child]))
- op_errno = child_errno[child];
- }
- return op_errno;
+ GF_FREE(local->child_up);
+
+ GF_FREE(local->read_attempted);
+
+ GF_FREE(local->readable);
+ GF_FREE(local->readable2);
+
+ if (local->inode)
+ inode_unref(local->inode);
+
+ if (local->parent)
+ inode_unref(local->parent);
+
+ if (local->parent2)
+ inode_unref(local->parent2);
+
+ if (local->refreshinode)
+ inode_unref(local->refreshinode);
+
+ { /* getxattr */
+ GF_FREE(local->cont.getxattr.name);
+ }
+
+ { /* lk */
+ GF_FREE(local->cont.lk.locked_nodes);
+ GF_FREE(local->cont.lk.dom_locked_nodes);
+ GF_FREE(local->cont.lk.dom_lock_op_ret);
+ GF_FREE(local->cont.lk.dom_lock_op_errno);
+ }
+
+ { /* create */
+ if (local->cont.create.fd)
+ fd_unref(local->cont.create.fd);
+ if (local->cont.create.params)
+ dict_unref(local->cont.create.params);
+ }
+
+ { /* mknod */
+ if (local->cont.mknod.params)
+ dict_unref(local->cont.mknod.params);
+ }
+
+ { /* mkdir */
+ if (local->cont.mkdir.params)
+ dict_unref(local->cont.mkdir.params);
+ }
+
+ { /* symlink */
+ if (local->cont.symlink.params)
+ dict_unref(local->cont.symlink.params);
+ }
+
+ { /* writev */
+ GF_FREE(local->cont.writev.vector);
+ if (local->cont.writev.iobref)
+ iobref_unref(local->cont.writev.iobref);
+ }
+
+ { /* setxattr */
+ if (local->cont.setxattr.dict)
+ dict_unref(local->cont.setxattr.dict);
+ }
+
+ { /* fsetxattr */
+ if (local->cont.fsetxattr.dict)
+ dict_unref(local->cont.fsetxattr.dict);
+ }
+
+ { /* removexattr */
+ GF_FREE(local->cont.removexattr.name);
+ }
+ { /* xattrop */
+ if (local->cont.xattrop.xattr)
+ dict_unref(local->cont.xattrop.xattr);
+ }
+ { /* symlink */
+ GF_FREE(local->cont.symlink.linkpath);
+ }
+
+ { /* opendir */
+ GF_FREE(local->cont.opendir.checksum);
+ }
+
+ { /* open */
+ if (local->cont.open.fd)
+ fd_unref(local->cont.open.fd);
+ }
+
+ { /* readdirp */
+ if (local->cont.readdir.dict)
+ dict_unref(local->cont.readdir.dict);
+ }
+
+ { /* inodelk */
+ GF_FREE(local->cont.inodelk.volume);
+ if (local->cont.inodelk.xdata)
+ dict_unref(local->cont.inodelk.xdata);
+ }
+
+ { /* entrylk */
+ GF_FREE(local->cont.entrylk.volume);
+ GF_FREE(local->cont.entrylk.basename);
+ if (local->cont.entrylk.xdata)
+ dict_unref(local->cont.entrylk.xdata);
+ }
+
+ if (local->xdata_req)
+ dict_unref(local->xdata_req);
+
+ if (local->xdata_rsp)
+ dict_unref(local->xdata_rsp);
}
-static void
-afr_lookup_handle_error (afr_local_t *local, int32_t op_ret, int32_t op_errno)
+int
+afr_frame_return(call_frame_t *frame)
{
- GF_ASSERT (local);
- if (op_errno == ENOENT)
- local->enoent_count++;
+ afr_local_t *local = NULL;
+ int call_count = 0;
- if (afr_error_more_important (local->op_errno, op_errno))
- local->op_errno = op_errno;
+ local = frame->local;
- if (local->op_errno == ESTALE) {
- local->op_ret = -1;
- }
+ LOCK(&frame->lock);
+ {
+ call_count = --local->call_count;
+ }
+ UNLOCK(&frame->lock);
+
+ return call_count;
}
-static void
-afr_set_root_inode_on_first_lookup (afr_local_t *local, xlator_t *this,
- inode_t *inode)
+static char *afr_ignore_xattrs[] = {GF_SELINUX_XATTR_KEY, QUOTA_SIZE_KEY, NULL};
+
+gf_boolean_t
+afr_is_xattr_ignorable(char *key)
{
- afr_private_t *priv = NULL;
- GF_ASSERT (inode);
+ int i = 0;
- if (!__is_root_gfid (inode->gfid))
- goto out;
- if (!afr_is_fresh_lookup (&local->loc, this))
- goto out;
- priv = this->private;
- if ((priv->first_lookup)) {
- gf_log (this->name, GF_LOG_INFO, "added root inode");
- priv->root_inode = inode_ref (inode);
- priv->first_lookup = 0;
- }
-out:
- return;
+ if (!strncmp(key, AFR_XATTR_PREFIX, SLEN(AFR_XATTR_PREFIX)))
+ return _gf_true;
+ for (i = 0; afr_ignore_xattrs[i]; i++) {
+ if (!strcmp(key, afr_ignore_xattrs[i]))
+ return _gf_true;
+ }
+ return _gf_false;
}
-static void
-afr_lookup_cache_args (afr_local_t *local, int child_index, dict_t *xattr,
- struct iatt *buf, struct iatt *postparent)
+static gf_boolean_t
+afr_xattr_match_needed(dict_t *this, char *key1, data_t *value1, void *data)
{
- GF_ASSERT (child_index >= 0);
- local->cont.lookup.xattrs[child_index] = dict_ref (xattr);
- local->cont.lookup.postparents[child_index] = *postparent;
- local->cont.lookup.bufs[child_index] = *buf;
-}
+ /* Ignore all non-disk (i.e. virtual) xattrs right away. */
+ if (!gf_is_valid_xattr_namespace(key1))
+ return _gf_false;
-static void
-afr_lookup_handle_first_success (afr_local_t *local, xlator_t *this,
- inode_t *inode, struct iatt *buf)
-{
- local->cont.lookup.inode = inode_ref (inode);
- local->cont.lookup.buf = *buf;
- afr_set_root_inode_on_first_lookup (local, this, inode);
-}
+ /* Ignore on-disk xattrs that AFR doesn't need to heal. */
+ if (!afr_is_xattr_ignorable(key1))
+ return _gf_true;
-static void
-afr_lookup_handle_success (afr_local_t *local, xlator_t *this, int32_t child_index,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *xattr,
- struct iatt *postparent)
-{
- if (local->success_count == 0) {
- if (local->op_errno != ESTALE) {
- local->op_ret = op_ret;
- local->op_errno = 0;
- }
- afr_lookup_handle_first_success (local, this, inode, buf);
- }
- afr_lookup_update_lk_counts (local, this,
- child_index, xattr);
+ return _gf_false;
+}
- afr_lookup_cache_args (local, child_index, xattr,
- buf, postparent);
- local->cont.lookup.success_children[local->success_count] = child_index;
- local->success_count++;
+gf_boolean_t
+afr_xattrs_are_equal(dict_t *dict1, dict_t *dict2)
+{
+ return are_dicts_equal(dict1, dict2, afr_xattr_match_needed, NULL);
}
-int
-afr_lookup_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf, dict_t *xattr,
- struct iatt *postparent)
+static int
+afr_get_parent_read_subvol(xlator_t *this, inode_t *parent,
+ struct afr_reply *replies, unsigned char *readable)
{
- afr_local_t * local = NULL;
- int call_count = -1;
- int child_index = -1;
+ int i = 0;
+ int par_read_subvol = -1;
+ int par_read_subvol_iter = -1;
+ afr_private_t *priv = NULL;
- child_index = (long) cookie;
+ priv = this->private;
- LOCK (&frame->lock);
- {
- local = frame->local;
+ if (parent)
+ par_read_subvol = afr_data_subvol_get(parent, this, NULL, NULL, NULL,
+ NULL);
- if (op_ret == -1) {
- afr_lookup_handle_error (local, op_ret, op_errno);
- goto unlock;
- }
- afr_lookup_handle_success (local, this, child_index, op_ret,
- op_errno, inode, buf, xattr,
- postparent);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
- }
-unlock:
- UNLOCK (&frame->lock);
+ if (replies[i].op_ret < 0)
+ continue;
- call_count = afr_frame_return (frame);
- if (call_count == 0) {
- afr_lookup_done (frame, this);
+ if (par_read_subvol_iter == -1) {
+ par_read_subvol_iter = i;
+ continue;
}
- return 0;
+ if ((par_read_subvol_iter != par_read_subvol) && readable[i])
+ par_read_subvol_iter = i;
+
+ if (i == par_read_subvol)
+ par_read_subvol_iter = i;
+ }
+ /* At the end of the for-loop, the only reason why @par_read_subvol_iter
+ * could be -1 is when this LOOKUP has failed on all sub-volumes.
+ * So it is okay to send an arbitrary subvolume (0 in this case)
+ * as parent read subvol.
+ */
+ if (par_read_subvol_iter == -1)
+ par_read_subvol_iter = 0;
+
+ return par_read_subvol_iter;
}
int
-afr_lookup_cont_init (afr_local_t *local, unsigned int child_count)
+afr_read_subvol_decide(inode_t *inode, xlator_t *this,
+ afr_read_subvol_args_t *args, unsigned char *readable)
{
- int ret = -ENOMEM;
- struct iatt *iatts = NULL;
- int32_t *success_children = NULL;
- int32_t *sources = NULL;
+ int event = 0;
+ afr_private_t *priv = NULL;
+ unsigned char *intersection = NULL;
- GF_ASSERT (local);
- local->cont.lookup.xattrs = GF_CALLOC (child_count,
- sizeof (*local->cont.lookup.xattr),
- gf_afr_mt_dict_t);
- if (NULL == local->cont.lookup.xattrs)
- goto out;
+ priv = this->private;
+ intersection = alloca0(priv->child_count);
- iatts = GF_CALLOC (child_count, sizeof (*iatts), gf_afr_mt_iatt);
- if (NULL == iatts)
- goto out;
- local->cont.lookup.postparents = iatts;
+ afr_readables_intersect_get(inode, this, &event, intersection);
- iatts = GF_CALLOC (child_count, sizeof (*iatts), gf_afr_mt_iatt);
- if (NULL == iatts)
- goto out;
- local->cont.lookup.bufs = iatts;
+ if (AFR_COUNT(intersection, priv->child_count) <= 0) {
+ /* TODO: If we have one brick with valid data_readable and
+ * another with metadata_readable, try to send an iatt with
+ * valid bits from both.*/
+ return -1;
+ }
- success_children = afr_children_create (child_count);
- if (NULL == success_children)
- goto out;
- local->cont.lookup.success_children = success_children;
+ memcpy(readable, intersection, sizeof(*readable) * priv->child_count);
- local->fresh_children = afr_children_create (child_count);
- if (NULL == local->fresh_children)
- goto out;
+ return afr_read_subvol_select_by_policy(inode, this, intersection, args);
+}
- sources = GF_CALLOC (sizeof (*sources), child_count, gf_afr_mt_int32_t);
- if (NULL == sources)
- goto out;
- local->cont.lookup.sources = sources;
+static inline int
+afr_first_up_child(call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
- ret = 0;
-out:
- return ret;
+ local = frame->local;
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++)
+ if (local->replies[i].valid && local->replies[i].op_ret == 0)
+ return i;
+ return -1;
}
-int
-afr_lookup (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xattr_req)
+static void
+afr_attempt_readsubvol_set(call_frame_t *frame, xlator_t *this,
+ unsigned char *success_replies,
+ unsigned char *data_readable, int *read_subvol)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- void *gfid_req = NULL;
- int ret = -1;
- int i = 0;
- int call_count = 0;
- uint64_t ctx = 0;
- int32_t op_errno = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int spb_subvol = -1;
+ int child_count = -1;
- priv = this->private;
+ if (*read_subvol != -1)
+ return;
- AFR_LOCAL_ALLOC_OR_GOTO (local, out);
+ priv = this->private;
+ local = frame->local;
+ child_count = priv->child_count;
+
+ afr_split_brain_read_subvol_get(local->inode, this, frame, &spb_subvol);
+ if ((spb_subvol >= 0) &&
+ (AFR_COUNT(success_replies, child_count) == child_count)) {
+ *read_subvol = spb_subvol;
+ } else if (!priv->quorum_count ||
+ frame->root->pid == GF_CLIENT_PID_GLFS_HEAL) {
+ *read_subvol = afr_first_up_child(frame, this);
+ } else if (priv->quorum_count &&
+ afr_has_quorum(data_readable, this, NULL)) {
+ /* read_subvol is guaranteed to be valid if we hit this path. */
+ *read_subvol = afr_first_up_child(frame, this);
+ } else {
+ /* If quorum is enabled and we do not have a
+ readable yet, it means all good copies are down.
+ */
+ local->op_ret = -1;
+ local->op_errno = ENOTCONN;
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_READ_SUBVOL_ERROR,
+ "no read "
+ "subvols for %s",
+ local->loc.path);
+ }
+ if (*read_subvol >= 0)
+ dict_del_sizen(local->replies[*read_subvol].xdata, GF_CONTENT_KEY);
+}
+static void
+afr_lookup_done(call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = -1;
+ int op_errno = 0;
+ int read_subvol = 0;
+ int par_read_subvol = 0;
+ int ret = -1;
+ unsigned char *readable = NULL;
+ unsigned char *success_replies = NULL;
+ int event = 0;
+ struct afr_reply *replies = NULL;
+ uuid_t read_gfid = {
+ 0,
+ };
+ gf_boolean_t locked_entry = _gf_false;
+ gf_boolean_t in_flight_create = _gf_false;
+ gf_boolean_t can_interpret = _gf_true;
+ inode_t *parent = NULL;
+ ia_type_t ia_type = IA_INVAL;
+ afr_read_subvol_args_t args = {
+ 0,
+ };
+ char *gfid_heal_msg = NULL;
+
+ priv = this->private;
+ local = frame->local;
+ replies = local->replies;
+ parent = local->loc.parent;
+
+ locked_entry = afr_is_possibly_under_txn(AFR_ENTRY_TRANSACTION, local,
+ this);
+
+ readable = alloca0(priv->child_count);
+ success_replies = alloca0(priv->child_count);
+
+ afr_inode_read_subvol_get(parent, this, readable, NULL, &event);
+ par_read_subvol = afr_get_parent_read_subvol(this, parent, replies,
+ readable);
+
+ /* First, check if we have a gfid-change from somewhere,
+ If so, propagate that so that a fresh lookup can be
+ issued
+ */
+ if (local->cont.lookup.needs_fresh_lookup) {
local->op_ret = -1;
+ local->op_errno = ESTALE;
+ goto error;
+ }
- frame->local = local;
- local->fop = GF_FOP_LOOKUP;
+ op_errno = afr_final_errno(frame->local, this->private);
+ local->op_errno = op_errno;
- loc_copy (&local->loc, loc);
- ret = loc_path (&local->loc, NULL);
- if (ret < 0) {
- op_errno = EINVAL;
- goto out;
- }
+ read_subvol = -1;
+ afr_fill_success_replies(local, priv, success_replies);
- if (!strcmp (local->loc.path, "/" GF_REPLICATE_TRASH_DIR)) {
- op_errno = ENOENT;
- goto out;
- }
-
- ret = inode_ctx_get (local->loc.inode, this, &ctx);
- if (ret == 0) {
- /* lookup is a revalidate */
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
- local->read_child_index = afr_inode_get_read_ctx (this,
- local->loc.inode,
- NULL);
- } else {
- LOCK (&priv->read_child_lock);
- {
- local->read_child_index = (++priv->read_child_rr)
- % (priv->child_count);
- }
- UNLOCK (&priv->read_child_lock);
- local->cont.lookup.fresh_lookup = _gf_true;
+ if (replies[i].op_ret == -1) {
+ if (locked_entry && replies[i].op_errno == ENOENT) {
+ in_flight_create = _gf_true;
+ }
+ continue;
}
- local->child_up = memdup (priv->child_up,
- sizeof (*local->child_up) * priv->child_count);
- if (NULL == local->child_up) {
- op_errno = ENOMEM;
- goto out;
+ if (read_subvol == -1 || !readable[read_subvol]) {
+ read_subvol = i;
+ gf_uuid_copy(read_gfid, replies[i].poststat.ia_gfid);
+ ia_type = replies[i].poststat.ia_type;
+ local->op_ret = 0;
}
+ }
- ret = afr_lookup_cont_init (local, priv->child_count);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
+ if (in_flight_create && !afr_has_quorum(success_replies, this, NULL)) {
+ local->op_ret = -1;
+ local->op_errno = ENOENT;
+ goto error;
+ }
+
+ if (read_subvol == -1)
+ goto error;
+ /* We now have a read_subvol, which is readable[] (if there
+ were any). Next we look for GFID mismatches. We don't
+ consider a GFID mismatch as an error if read_subvol is
+ readable[] but the mismatching GFID subvol is not.
+ */
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret == -1) {
+ continue;
+ }
+
+ if (!gf_uuid_compare(replies[i].poststat.ia_gfid, read_gfid))
+ continue;
+
+ can_interpret = _gf_false;
+
+ if (locked_entry)
+ continue;
+
+ /* Now GFIDs mismatch. It's OK as long as this subvol
+ is not readable[] but read_subvol is */
+ if (readable[read_subvol] && !readable[i])
+ continue;
+
+ /* If we were called from glfsheal and there is still a gfid
+ * mismatch, succeed the lookup and let glfsheal print the
+ * response via gfid-heal-msg.*/
+ if (!dict_get_str_sizen(local->xattr_req, "gfid-heal-msg",
+ &gfid_heal_msg))
+ goto cant_interpret;
+
+ /* LOG ERROR */
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto error;
+ }
+
+ /* Forth, for the finalized GFID, pick the best subvolume
+ to return stats from.
+ */
+ read_subvol = -1;
+ memset(readable, 0, sizeof(*readable) * priv->child_count);
+ if (can_interpret) {
+ if (!afr_has_quorum(success_replies, this, NULL))
+ goto cant_interpret;
+ /* It is safe to call afr_replies_interpret() because we have
+ a response from all the UP subvolumes and all of them resolved
+ to the same GFID
+ */
+ gf_uuid_copy(args.gfid, read_gfid);
+ args.ia_type = ia_type;
+ ret = afr_replies_interpret(frame, this, local->inode, NULL);
+ read_subvol = afr_read_subvol_decide(local->inode, this, &args,
+ readable);
+ if (read_subvol == -1)
+ goto cant_interpret;
+ if (ret) {
+ afr_inode_need_refresh_set(local->inode, this);
+ dict_del_sizen(local->replies[read_subvol].xdata, GF_CONTENT_KEY);
}
-
- local->call_count = afr_up_children_count (local->child_up,
- priv->child_count);
- call_count = local->call_count;
- if (local->call_count == 0) {
- ret = -1;
- op_errno = ENOTCONN;
- goto out;
+ } else {
+ cant_interpret:
+ afr_attempt_readsubvol_set(frame, this, success_replies, readable,
+ &read_subvol);
+ if (read_subvol == -1) {
+ goto error;
}
+ }
- /* By default assume ENOTCONN. On success it will be set to 0. */
- local->op_errno = ENOTCONN;
+ afr_handle_quota_size(frame, this);
- local->call_count = afr_up_children_count (local->child_up,
- priv->child_count);
- ret = afr_lookup_xattr_req_prepare (local, this, xattr_req, &local->loc,
- &gfid_req);
+ afr_set_need_heal(this, local);
+ if (AFR_IS_ARBITER_BRICK(priv, read_subvol) && local->op_ret == 0) {
+ local->op_ret = -1;
+ local->op_errno = ENOTCONN;
+ gf_msg_debug(this->name, 0,
+ "Arbiter cannot be a read subvol "
+ "for %s",
+ local->loc.path);
+ goto error;
+ }
+
+ ret = dict_get_str_sizen(local->xattr_req, "gfid-heal-msg", &gfid_heal_msg);
+ if (!ret) {
+ ret = dict_set_str_sizen(local->replies[read_subvol].xdata,
+ "gfid-heal-msg", gfid_heal_msg);
if (ret) {
- local->op_errno = -ret;
- goto out;
- }
- afr_lookup_save_gfid (local->cont.lookup.gfid_req, gfid_req,
- &local->loc);
- local->fop = GF_FOP_LOOKUP;
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_lookup_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->lookup,
- &local->loc, local->xattr_req);
- if (!--call_count)
- break;
- }
- }
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_DICT_SET_FAILED,
+ "Error setting gfid-heal-msg dict");
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ }
+ }
+
+ AFR_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno,
+ local->inode, &local->replies[read_subvol].poststat,
+ local->replies[read_subvol].xdata,
+ &local->replies[par_read_subvol].postparent);
+ return;
+
+error:
+ AFR_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno, NULL, NULL,
+ NULL, NULL);
+}
- ret = 0;
-out:
- if (ret)
- AFR_STACK_UNWIND (lookup, frame, -1, op_errno,
- NULL, NULL, NULL, NULL);
+/*
+ * During a lookup, some errors are more "important" than
+ * others in that they must be given higher priority while
+ * returning to the user.
+ *
+ * The hierarchy is ENODATA > ENOENT > ESTALE > ENOSPC others
+ */
- return 0;
+int
+afr_higher_errno(int32_t old_errno, int32_t new_errno)
+{
+ if (old_errno == ENODATA || new_errno == ENODATA)
+ return ENODATA;
+ if (old_errno == ENOENT || new_errno == ENOENT)
+ return ENOENT;
+ if (old_errno == ESTALE || new_errno == ESTALE)
+ return ESTALE;
+ if (old_errno == ENOSPC || new_errno == ENOSPC)
+ return ENOSPC;
+
+ return new_errno;
}
+int
+afr_final_errno(afr_local_t *local, afr_private_t *priv)
+{
+ int i = 0;
+ int op_errno = 0;
+ int tmp_errno = 0;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+ if (local->replies[i].op_ret >= 0)
+ continue;
+ tmp_errno = local->replies[i].op_errno;
+ op_errno = afr_higher_errno(op_errno, tmp_errno);
+ }
+
+ return op_errno;
+}
+
+static int32_t
+afr_local_discovery_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ int ret = 0;
+ char *pathinfo = NULL;
+ gf_boolean_t is_local = _gf_false;
+ afr_private_t *priv = NULL;
+ int32_t child_index = -1;
+
+ if (op_ret != 0) {
+ goto out;
+ }
+
+ priv = this->private;
+ child_index = (int32_t)(long)cookie;
+
+ ret = dict_get_str_sizen(dict, GF_XATTR_PATHINFO_KEY, &pathinfo);
+ if (ret != 0) {
+ goto out;
+ }
+
+ ret = glusterfs_is_local_pathinfo(pathinfo, &is_local);
+ if (ret) {
+ goto out;
+ }
+
+ /*
+ * Note that one local subvolume will override another here. The only
+ * way to avoid that would be to retain extra information about whether
+ * the previous read_child is local, and it's just not worth it. Even
+ * the slowest local subvolume is far preferable to a remote one.
+ */
+ if (is_local) {
+ priv->local[child_index] = 1;
+ /* Don't set arbiter as read child. */
+ if (AFR_IS_ARBITER_BRICK(priv, child_index))
+ goto out;
+ gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_LOCAL_CHILD,
+ "selecting local read_child %s",
+ priv->children[child_index]->name);
+
+ priv->read_child = child_index;
+ }
+out:
+ STACK_DESTROY(frame->root);
+ return 0;
+}
+
+static void
+afr_attempt_local_discovery(xlator_t *this, int32_t child_index)
+{
+ call_frame_t *newframe = NULL;
+ loc_t tmploc = {
+ 0,
+ };
+ afr_private_t *priv = this->private;
+
+ newframe = create_frame(this, this->ctx->pool);
+ if (!newframe) {
+ return;
+ }
-/* {{{ open */
+ tmploc.gfid[sizeof(tmploc.gfid) - 1] = 1;
+ STACK_WIND_COOKIE(newframe, afr_local_discovery_cbk,
+ (void *)(long)child_index, priv->children[child_index],
+ priv->children[child_index]->fops->getxattr, &tmploc,
+ GF_XATTR_PATHINFO_KEY, NULL);
+}
int
-__afr_fd_ctx_set (xlator_t *this, fd_t *fd)
+afr_lookup_sh_metadata_wrap(void *opaque)
{
- afr_private_t * priv = NULL;
- int ret = -1;
- uint64_t ctx = 0;
- afr_fd_ctx_t * fd_ctx = NULL;
+ call_frame_t *frame = opaque;
+ afr_local_t *local = NULL;
+ xlator_t *this = NULL;
+ inode_t *inode = NULL;
+ afr_private_t *priv = NULL;
+ struct afr_reply *replies = NULL;
+ int i = 0, first = -1;
+ int ret = -1;
+ dict_t *dict = NULL;
+
+ local = frame->local;
+ this = frame->this;
+ priv = this->private;
+ replies = local->replies;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret == -1)
+ continue;
+ first = i;
+ break;
+ }
+ if (first == -1)
+ goto out;
+
+ if (afr_selfheal_metadata_by_stbuf(this, &replies[first].poststat))
+ goto out;
+
+ afr_local_replies_wipe(local, this->private);
+
+ dict = dict_new();
+ if (!dict)
+ goto out;
+ if (local->xattr_req) {
+ dict_copy(local->xattr_req, dict);
+ }
+
+ ret = dict_set_sizen_str_sizen(dict, "link-count", GF_XATTROP_INDEX_COUNT);
+ if (ret) {
+ gf_msg_debug(this->name, -ret, "Unable to set link-count in dict ");
+ }
+
+ if (loc_is_nameless(&local->loc)) {
+ ret = afr_selfheal_unlocked_discover_on(frame, local->inode,
+ local->loc.gfid, local->replies,
+ local->child_up, dict);
+ } else {
+ inode = afr_selfheal_unlocked_lookup_on(frame, local->loc.parent,
+ local->loc.name, local->replies,
+ local->child_up, dict);
+ }
+ if (inode)
+ inode_unref(inode);
+out:
+ if (loc_is_nameless(&local->loc))
+ afr_discover_done(frame, this);
+ else
+ afr_lookup_done(frame, this);
- VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (fd, out);
+ if (dict)
+ dict_unref(dict);
- priv = this->private;
+ return 0;
+}
- ret = __fd_ctx_get (fd, this, &ctx);
+gf_boolean_t
+afr_is_pending_set(xlator_t *this, dict_t *xdata, int type)
+{
+ int idx = -1;
+ afr_private_t *priv = NULL;
+ void *pending_raw = NULL;
+ int *pending_int = NULL;
+ int i = 0;
- if (ret == 0)
- goto out;
+ priv = this->private;
+ idx = afr_index_for_transaction_type(type);
- fd_ctx = GF_CALLOC (1, sizeof (afr_fd_ctx_t),
- gf_afr_mt_afr_fd_ctx_t);
- if (!fd_ctx) {
- ret = -ENOMEM;
- goto out;
- }
+ if (dict_get_ptr(xdata, AFR_DIRTY, &pending_raw) == 0) {
+ if (pending_raw) {
+ pending_int = pending_raw;
- fd_ctx->pre_op_done = GF_CALLOC (sizeof (*fd_ctx->pre_op_done),
- priv->child_count,
- gf_afr_mt_char);
- if (!fd_ctx->pre_op_done) {
- ret = -ENOMEM;
- goto out;
+ if (ntoh32(pending_int[idx]))
+ return _gf_true;
}
+ }
- fd_ctx->pre_op_piggyback = GF_CALLOC (sizeof (*fd_ctx->pre_op_piggyback),
- priv->child_count,
- gf_afr_mt_char);
- if (!fd_ctx->pre_op_piggyback) {
- ret = -ENOMEM;
- goto out;
+ for (i = 0; i < priv->child_count; i++) {
+ if (dict_get_ptr(xdata, priv->pending_key[i], &pending_raw))
+ continue;
+ if (!pending_raw)
+ continue;
+ pending_int = pending_raw;
+
+ if (ntoh32(pending_int[idx]))
+ return _gf_true;
+ }
+
+ return _gf_false;
+}
+
+static gf_boolean_t
+afr_can_start_metadata_self_heal(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ struct afr_reply *replies = NULL;
+ int i = 0, first = -1;
+ gf_boolean_t start = _gf_false;
+ struct iatt stbuf = {
+ 0,
+ };
+
+ local = frame->local;
+ replies = local->replies;
+ priv = this->private;
+
+ if (!priv->metadata_self_heal)
+ return _gf_false;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret == -1)
+ continue;
+ if (first == -1) {
+ first = i;
+ stbuf = replies[i].poststat;
+ continue;
}
- fd_ctx->opened_on = GF_CALLOC (sizeof (*fd_ctx->opened_on),
- priv->child_count,
- gf_afr_mt_int32_t);
- if (!fd_ctx->opened_on) {
- ret = -ENOMEM;
- goto out;
+ if (afr_is_pending_set(this, replies[i].xdata,
+ AFR_METADATA_TRANSACTION)) {
+ /* Let shd do the heal so that lookup is not blocked
+ * on getting metadata lock/doing the heal */
+ start = _gf_false;
+ break;
}
- fd_ctx->lock_piggyback = GF_CALLOC (sizeof (*fd_ctx->lock_piggyback),
- priv->child_count,
- gf_afr_mt_char);
- if (!fd_ctx->lock_piggyback) {
- ret = -ENOMEM;
- goto out;
+ if (gf_uuid_compare(stbuf.ia_gfid, replies[i].poststat.ia_gfid)) {
+ start = _gf_false;
+ break;
+ }
+ if (!IA_EQUAL(stbuf, replies[i].poststat, type)) {
+ start = _gf_false;
+ break;
}
- fd_ctx->lock_acquired = GF_CALLOC (sizeof (*fd_ctx->lock_acquired),
- priv->child_count,
- gf_afr_mt_char);
- if (!fd_ctx->lock_acquired) {
- ret = -ENOMEM;
- goto out;
+ /*Check if iattrs need heal*/
+ if ((!IA_EQUAL(stbuf, replies[i].poststat, uid)) ||
+ (!IA_EQUAL(stbuf, replies[i].poststat, gid)) ||
+ (!IA_EQUAL(stbuf, replies[i].poststat, prot))) {
+ start = _gf_true;
+ continue;
}
- fd_ctx->up_count = priv->up_count;
- fd_ctx->down_count = priv->down_count;
+ /*Check if xattrs need heal*/
+ if (!afr_xattrs_are_equal(replies[first].xdata, replies[i].xdata))
+ start = _gf_true;
+ }
- fd_ctx->locked_on = GF_CALLOC (sizeof (*fd_ctx->locked_on),
- priv->child_count,
- gf_afr_mt_char);
- if (!fd_ctx->locked_on) {
- ret = -ENOMEM;
- goto out;
- }
+ return start;
+}
- pthread_mutex_init (&fd_ctx->delay_lock, NULL);
- INIT_LIST_HEAD (&fd_ctx->paused_calls);
- INIT_LIST_HEAD (&fd_ctx->entries);
- fd_ctx->call_child = -1;
+int
+afr_lookup_metadata_heal_check(call_frame_t *frame, xlator_t *this)
- ret = __fd_ctx_set (fd, this, (uint64_t)(long) fd_ctx);
- if (ret)
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to set fd ctx (%p)", fd);
+{
+ call_frame_t *heal = NULL;
+ afr_local_t *local = NULL;
+ int ret = 0;
+
+ local = frame->local;
+ if (!afr_can_start_metadata_self_heal(frame, this))
+ goto out;
+
+ heal = afr_frame_create(this, &ret);
+ if (!heal) {
+ ret = -ret;
+ goto out;
+ }
+
+ ret = synctask_new(this->ctx->env, afr_lookup_sh_metadata_wrap,
+ afr_refresh_selfheal_done, heal, frame);
+ if (ret)
+ goto out;
+ return ret;
out:
- return ret;
+ if (loc_is_nameless(&local->loc))
+ afr_discover_done(frame, this);
+ else
+ afr_lookup_done(frame, this);
+ if (heal)
+ AFR_STACK_DESTROY(heal);
+ return ret;
}
-
int
-afr_fd_ctx_set (xlator_t *this, fd_t *fd)
+afr_lookup_selfheal_wrap(void *opaque)
{
- int ret = -1;
-
- LOCK (&fd->lock);
- {
- ret = __afr_fd_ctx_set (this, fd);
- }
- UNLOCK (&fd->lock);
-
- return ret;
+ int ret = 0;
+ call_frame_t *frame = opaque;
+ afr_local_t *local = NULL;
+ xlator_t *this = NULL;
+ inode_t *inode = NULL;
+ uuid_t pargfid = {
+ 0,
+ };
+
+ local = frame->local;
+ this = frame->this;
+ loc_pargfid(&local->loc, pargfid);
+
+ ret = afr_selfheal_name(frame->this, pargfid, local->loc.name,
+ &local->cont.lookup.gfid_req, local->xattr_req);
+ if (ret == -EIO)
+ goto unwind;
+
+ afr_local_replies_wipe(local, this->private);
+
+ inode = afr_selfheal_unlocked_lookup_on(frame, local->loc.parent,
+ local->loc.name, local->replies,
+ local->child_up, local->xattr_req);
+ if (inode)
+ inode_unref(inode);
+
+ afr_lookup_metadata_heal_check(frame, this);
+ return 0;
+
+unwind:
+ AFR_STACK_UNWIND(lookup, frame, -1, EIO, NULL, NULL, NULL, NULL);
+ return 0;
}
-/* {{{ flush */
-
int
-afr_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+afr_lookup_entry_heal(call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- int call_count = -1;
-
- local = frame->local;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ call_frame_t *heal = NULL;
+ int i = 0, first = -1;
+ gf_boolean_t name_state_mismatch = _gf_false;
+ struct afr_reply *replies = NULL;
+ int ret = 0;
+ unsigned char *par_readables = NULL;
+ unsigned char *success = NULL;
+ int32_t op_errno = 0;
+ uuid_t gfid = {0};
+
+ local = frame->local;
+ replies = local->replies;
+ priv = this->private;
+ par_readables = alloca0(priv->child_count);
+ success = alloca0(priv->child_count);
+
+ ret = afr_inode_read_subvol_get(local->loc.parent, this, par_readables,
+ NULL, NULL);
+ if (ret < 0 || AFR_COUNT(par_readables, priv->child_count) == 0) {
+ /* In this case set par_readables to all 1 so that name_heal
+ * need checks at the end of this function will flag missing
+ * entry when name state mismatches*/
+ memset(par_readables, 1, priv->child_count);
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+
+ if (replies[i].op_ret == 0) {
+ if (gf_uuid_is_null(gfid)) {
+ gf_uuid_copy(gfid, replies[i].poststat.ia_gfid);
+ }
+ success[i] = 1;
+ } else {
+ if ((replies[i].op_errno != ENOTCONN) &&
+ (replies[i].op_errno != ENOENT) &&
+ (replies[i].op_errno != ESTALE)) {
+ op_errno = replies[i].op_errno;
+ }
+ }
- LOCK (&frame->lock);
- {
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- }
- local->success_count++;
- }
+ /*gfid is missing, needs heal*/
+ if ((replies[i].op_ret == -1) && (replies[i].op_errno == ENODATA)) {
+ goto name_heal;
+ }
- local->op_errno = op_errno;
+ if (first == -1) {
+ first = i;
+ continue;
}
- UNLOCK (&frame->lock);
- call_count = afr_frame_return (frame);
+ if (replies[i].op_ret != replies[first].op_ret) {
+ name_state_mismatch = _gf_true;
+ }
- if (call_count == 0)
- AFR_STACK_UNWIND(flush, frame, local->op_ret,
- local->op_errno, NULL);
+ if (replies[i].op_ret == 0) {
+ /* Rename after this lookup may succeed if we don't do
+ * a name-heal and the destination may not have pending xattrs
+ * to indicate which name is good and which is bad so always do
+ * this heal*/
+ if (gf_uuid_compare(replies[i].poststat.ia_gfid, gfid)) {
+ goto name_heal;
+ }
+ }
+ }
- return 0;
+ if (name_state_mismatch) {
+ if (!priv->quorum_count)
+ goto name_heal;
+ if (!afr_has_quorum(success, this, NULL))
+ goto name_heal;
+ if (op_errno)
+ goto name_heal;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+ if (par_readables[i] && replies[i].op_ret < 0 &&
+ replies[i].op_errno != ENOTCONN) {
+ goto name_heal;
+ }
+ }
+ }
+
+ goto metadata_heal;
+
+name_heal:
+ heal = afr_frame_create(this, NULL);
+ if (!heal)
+ goto metadata_heal;
+
+ ret = synctask_new(this->ctx->env, afr_lookup_selfheal_wrap,
+ afr_refresh_selfheal_done, heal, frame);
+ if (ret) {
+ AFR_STACK_DESTROY(heal);
+ goto metadata_heal;
+ }
+ return ret;
+
+metadata_heal:
+ ret = afr_lookup_metadata_heal_check(frame, this);
+
+ return ret;
}
int
-afr_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+afr_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, inode_t *inode, struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int ret = -1;
- int op_errno = 0;
- int call_count = -1;
- int i = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
+ afr_local_t *local = NULL;
+ int call_count = -1;
+ int child_index = -1;
+ GF_UNUSED int ret = 0;
+ int8_t need_heal = 1;
+
+ child_index = (long)cookie;
+
+ local = frame->local;
+
+ local->replies[child_index].valid = 1;
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
+ /*
+ * On revalidate lookup if the gfid-changed, afr should unwind the fop
+ * with ESTALE so that a fresh lookup will be sent by the top xlator.
+ * So remember it.
+ */
+ if (xdata && dict_get_sizen(xdata, "gfid-changed"))
+ local->cont.lookup.needs_fresh_lookup = _gf_true;
+
+ if (xdata) {
+ ret = dict_get_int8(xdata, "link-count", &need_heal);
+ local->replies[child_index].need_heal = need_heal;
+ } else {
+ local->replies[child_index].need_heal = need_heal;
+ }
+ if (op_ret != -1) {
+ local->replies[child_index].poststat = *buf;
+ local->replies[child_index].postparent = *postparent;
+ if (xdata)
+ local->replies[child_index].xdata = dict_ref(xdata);
+ }
+
+ call_count = afr_frame_return(frame);
+ if (call_count == 0) {
+ afr_set_need_heal(this, local);
+ afr_lookup_entry_heal(frame, this);
+ }
+
+ return 0;
+}
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
+static void
+afr_discover_unwind(call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int read_subvol = -1;
+ int ret = 0;
+ unsigned char *data_readable = NULL;
+ unsigned char *success_replies = NULL;
+
+ priv = this->private;
+ local = frame->local;
+ data_readable = alloca0(priv->child_count);
+ success_replies = alloca0(priv->child_count);
+
+ afr_fill_success_replies(local, priv, success_replies);
+ if (AFR_COUNT(success_replies, priv->child_count) > 0)
+ local->op_ret = 0;
+
+ if (local->op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = afr_final_errno(frame->local, this->private);
+ goto error;
+ }
- ret = afr_local_init(local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ if (!afr_has_quorum(success_replies, this, frame))
+ goto unwind;
- local->fd = fd_ref(fd);
- call_count = local->call_count;
+ ret = afr_replies_interpret(frame, this, local->inode, NULL);
+ if (ret) {
+ afr_inode_need_refresh_set(local->inode, this);
+ }
- /*
- * Ideally we should synchronize flush against completion of writing
- * the delayed changelog, but for now we just push it out first...
- */
- afr_delayed_changelog_wake_up(this, fd);
+ read_subvol = afr_read_subvol_decide(local->inode, this, NULL,
+ data_readable);
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_flush_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->flush,
- local->fd, NULL);
+unwind:
+ afr_attempt_readsubvol_set(frame, this, success_replies, data_readable,
+ &read_subvol);
+ if (read_subvol == -1)
+ goto error;
- if (!--call_count)
- break;
- }
- }
+ if (AFR_IS_ARBITER_BRICK(priv, read_subvol) && local->op_ret == 0) {
+ local->op_ret = -1;
+ local->op_errno = ENOTCONN;
+ gf_msg_debug(this->name, 0,
+ "Arbiter cannot be a read subvol "
+ "for %s",
+ local->loc.path);
+ }
+
+ AFR_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno,
+ local->inode, &local->replies[read_subvol].poststat,
+ local->replies[read_subvol].xdata,
+ &local->replies[read_subvol].postparent);
+ return;
+
+error:
+ AFR_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno, NULL, NULL,
+ NULL, NULL);
+}
- ret = 0;
+static int
+afr_ta_id_file_check(void *opaque)
+{
+ afr_private_t *priv = NULL;
+ xlator_t *this = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt stbuf = {
+ 0,
+ };
+ dict_t *dict = NULL;
+ uuid_t gfid = {
+ 0,
+ };
+ fd_t *fd = NULL;
+ int ret = 0;
+
+ this = opaque;
+ priv = this->private;
+
+ ret = afr_fill_ta_loc(this, &loc, _gf_false);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed to populate thin-arbiter loc for: %s.", loc.name);
+ goto out;
+ }
+
+ ret = syncop_lookup(priv->children[THIN_ARBITER_BRICK_INDEX], &loc, &stbuf,
+ 0, 0, 0);
+ if (ret == 0) {
+ goto out;
+ } else if (ret == -ENOENT) {
+ fd = fd_create(loc.inode, getpid());
+ if (!fd)
+ goto out;
+ dict = dict_new();
+ if (!dict)
+ goto out;
+ gf_uuid_generate(gfid);
+ ret = dict_set_gfuuid(dict, "gfid-req", gfid, true);
+ ret = syncop_create(priv->children[THIN_ARBITER_BRICK_INDEX], &loc,
+ O_RDWR, 0664, fd, &stbuf, dict, NULL);
+ }
out:
- if (ret < 0)
- AFR_STACK_UNWIND(flush, frame, -1, op_errno, NULL);
+ if (ret == 0) {
+ gf_uuid_copy(priv->ta_gfid, stbuf.ia_gfid);
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed to lookup/create thin-arbiter id file.");
+ }
+ if (dict)
+ dict_unref(dict);
+ if (fd)
+ fd_unref(fd);
+ loc_wipe(&loc);
+
+ return 0;
+}
- return 0;
+static int
+afr_ta_id_file_check_cbk(int ret, call_frame_t *ta_frame, void *opaque)
+{
+ return 0;
}
-/* }}} */
+static void
+afr_discover_done(call_frame_t *frame, xlator_t *this)
+{
+ int ret = 0;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ if (!priv->thin_arbiter_count)
+ goto unwind;
+ if (!gf_uuid_is_null(priv->ta_gfid))
+ goto unwind;
+
+ ret = synctask_new(this->ctx->env, afr_ta_id_file_check,
+ afr_ta_id_file_check_cbk, NULL, this);
+ if (ret)
+ goto unwind;
+unwind:
+ afr_discover_unwind(frame, this);
+}
+int
+afr_discover_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, inode_t *inode, struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ afr_local_t *local = NULL;
+ int call_count = -1;
+ int child_index = -1;
+ GF_UNUSED int ret = 0;
+ int8_t need_heal = 1;
+
+ child_index = (long)cookie;
+
+ local = frame->local;
+
+ local->replies[child_index].valid = 1;
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
+ if (op_ret != -1) {
+ local->replies[child_index].poststat = *buf;
+ local->replies[child_index].postparent = *postparent;
+ if (xdata)
+ local->replies[child_index].xdata = dict_ref(xdata);
+ }
+
+ if (local->do_discovery && (op_ret == 0))
+ afr_attempt_local_discovery(this, child_index);
+
+ if (xdata) {
+ ret = dict_get_int8(xdata, "link-count", &need_heal);
+ local->replies[child_index].need_heal = need_heal;
+ } else {
+ local->replies[child_index].need_heal = need_heal;
+ }
+
+ call_count = afr_frame_return(frame);
+ if (call_count == 0) {
+ afr_set_need_heal(this, local);
+ afr_lookup_metadata_heal_check(frame, this);
+ }
+
+ return 0;
+}
int
-afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd)
+afr_discover_do(call_frame_t *frame, xlator_t *this, int err)
{
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
- int ret = 0;
- afr_fd_paused_call_t *paused_call = NULL;
- afr_fd_paused_call_t *tmp = NULL;
+ int ret = 0;
+ int i = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (err) {
+ local->op_errno = err;
+ goto out;
+ }
+
+ call_count = local->call_count = AFR_COUNT(local->child_up,
+ priv->child_count);
+
+ ret = afr_lookup_xattr_req_prepare(local, this, local->xattr_req,
+ &local->loc);
+ if (ret) {
+ local->op_errno = -ret;
+ goto out;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE(
+ frame, afr_discover_cbk, (void *)(long)i, priv->children[i],
+ priv->children[i]->fops->lookup, &local->loc, local->xattr_req);
+ if (!--call_count)
+ break;
+ }
+ }
- ret = fd_ctx_get (fd, this, &ctx);
- if (ret < 0)
- goto out;
+ return 0;
+out:
+ AFR_STACK_UNWIND(lookup, frame, -1, local->op_errno, 0, 0, 0, 0);
+ return 0;
+}
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+int
+afr_discover(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
+{
+ int op_errno = ENOMEM;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int event = 0;
- if (fd_ctx) {
- if (fd_ctx->pre_op_done)
- GF_FREE (fd_ctx->pre_op_done);
+ priv = this->private;
- if (fd_ctx->opened_on)
- GF_FREE (fd_ctx->opened_on);
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
- if (fd_ctx->locked_on)
- GF_FREE (fd_ctx->locked_on);
+ if (!local->call_count) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
- if (fd_ctx->pre_op_piggyback)
- GF_FREE (fd_ctx->pre_op_piggyback);
- list_for_each_entry_safe (paused_call, tmp, &fd_ctx->paused_calls,
- call_list) {
- list_del_init (&paused_call->call_list);
- GF_FREE (paused_call);
- }
+ if (__is_root_gfid(loc->inode->gfid)) {
+ if (!priv->root_inode)
+ priv->root_inode = inode_ref(loc->inode);
+
+ if (priv->choose_local && !priv->did_discovery) {
+ /* Logic to detect which subvolumes of AFR are
+ local, in order to prefer them for reads
+ */
+ local->do_discovery = _gf_true;
+ priv->did_discovery = _gf_true;
+ }
+ }
- if (fd_ctx->lock_piggyback)
- GF_FREE (fd_ctx->lock_piggyback);
+ local->op = GF_FOP_LOOKUP;
- if (fd_ctx->lock_acquired)
- GF_FREE (fd_ctx->lock_acquired);
+ loc_copy(&local->loc, loc);
- pthread_mutex_destroy (&fd_ctx->delay_lock);
+ local->inode = inode_ref(loc->inode);
- GF_FREE (fd_ctx);
+ if (xattr_req) {
+ /* If xattr_req was null, afr_lookup_xattr_req_prepare() will
+ allocate one for us */
+ local->xattr_req = dict_copy_with_ref(xattr_req, NULL);
+ if (!local->xattr_req) {
+ op_errno = ENOMEM;
+ goto out;
}
+ }
-out:
+ if (gf_uuid_is_null(loc->inode->gfid)) {
+ afr_discover_do(frame, this, 0);
return 0;
+ }
+
+ afr_read_subvol_get(loc->inode, this, NULL, NULL, &event,
+ AFR_DATA_TRANSACTION, NULL);
+
+ afr_discover_do(frame, this, 0);
+
+ return 0;
+out:
+ AFR_STACK_UNWIND(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ return 0;
}
+int
+afr_lookup_do(call_frame_t *frame, xlator_t *this, int err)
+{
+ int ret = 0;
+ int i = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (err < 0) {
+ local->op_errno = err;
+ goto out;
+ }
+
+ call_count = local->call_count = AFR_COUNT(local->child_up,
+ priv->child_count);
+
+ ret = afr_lookup_xattr_req_prepare(local, this, local->xattr_req,
+ &local->loc);
+ if (ret) {
+ local->op_errno = -ret;
+ goto out;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE(
+ frame, afr_lookup_cbk, (void *)(long)i, priv->children[i],
+ priv->children[i]->fops->lookup, &local->loc, local->xattr_req);
+ if (!--call_count)
+ break;
+ }
+ }
+ return 0;
+out:
+ AFR_STACK_UNWIND(lookup, frame, -1, local->op_errno, 0, 0, 0, 0);
+ return 0;
+}
+
+/*
+ * afr_lookup()
+ *
+ * The goal here is to figure out what the element getting looked up is.
+ * i.e what is the GFID, inode type and a conservative estimate of the
+ * inode attributes are.
+ *
+ * As we lookup, operations may be underway on the entry name and the
+ * inode. In lookup() we are primarily concerned only with the entry
+ * operations. If the entry is getting unlinked or renamed, we detect
+ * what operation is underway by querying for on-going transactions and
+ * pending self-healing on the entry through xdata.
+ *
+ * If the entry is a file/dir, it may need self-heal and/or in a
+ * split-brain condition. Lookup is not the place to worry about these
+ * conditions. Outcast marking will naturally handle them in the read
+ * paths.
+ *
+ * Here is a brief goal of what we are trying to achieve:
+ *
+ * - LOOKUP on all subvolumes concurrently, querying on-going transaction
+ * and pending self-heal info from the servers.
+ *
+ * - If all servers reply the same inode type and GFID, the overall call
+ * MUST be a success.
+ *
+ * - If inode types or GFIDs mismatch, and there IS either an on-going
+ * transaction or pending self-heal, inspect what the nature of the
+ * transaction or pending heal is, and select the appropriate subvolume's
+ * reply as the winner.
+ *
+ * - If inode types or GFIDs mismatch, and there are no on-going transactions
+ * or pending self-heal on the entry name on any of the servers, fail the
+ * lookup with EIO. Something has gone wrong beyond reasonable action.
+ */
int
-afr_release (xlator_t *this, fd_t *fd)
+afr_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
{
- afr_locked_fd_t *locked_fd = NULL;
- afr_locked_fd_t *tmp = NULL;
- afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int32_t op_errno = 0;
+ int event = 0;
+ int ret = 0;
+
+ if (loc_is_nameless(loc)) {
+ if (xattr_req)
+ dict_del_sizen(xattr_req, "gfid-req");
+ afr_discover(frame, this, loc, xattr_req);
+ return 0;
+ }
- priv = this->private;
+ if (afr_is_private_directory(this->private, loc->parent->gfid, loc->name,
+ frame->root->pid)) {
+ op_errno = EPERM;
+ goto out;
+ }
- afr_cleanup_fd_ctx (this, fd);
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
- list_for_each_entry_safe (locked_fd, tmp, &priv->saved_fds,
- list) {
+ if (!local->call_count) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
- if (locked_fd->fd == fd) {
- list_del_init (&locked_fd->list);
- GF_FREE (locked_fd);
- }
+ local->op = GF_FOP_LOOKUP;
+ loc_copy(&local->loc, loc);
+
+ local->inode = inode_ref(loc->inode);
+
+ if (xattr_req) {
+ /* If xattr_req was null, afr_lookup_xattr_req_prepare() will
+ allocate one for us */
+ local->xattr_req = dict_copy_with_ref(xattr_req, NULL);
+ if (!local->xattr_req) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ ret = dict_get_gfuuid(local->xattr_req, "gfid-req",
+ &local->cont.lookup.gfid_req);
+ if (ret == 0) {
+ dict_del_sizen(local->xattr_req, "gfid-req");
}
+ }
- return 0;
+ afr_read_subvol_get(loc->parent, this, NULL, NULL, &event,
+ AFR_DATA_TRANSACTION, NULL);
+
+ afr_lookup_do(frame, this, 0);
+
+ return 0;
+out:
+ AFR_STACK_UNWIND(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+
+ return 0;
}
+void
+_afr_cleanup_fd_ctx(xlator_t *this, afr_fd_ctx_t *fd_ctx)
+{
+ afr_private_t *priv = this->private;
-/* {{{ fsync */
+ if (fd_ctx->lk_heal_info) {
+ LOCK(&priv->lock);
+ {
+ list_del(&fd_ctx->lk_heal_info->pos);
+ }
+ afr_lk_heal_info_cleanup(fd_ctx->lk_heal_info);
+ fd_ctx->lk_heal_info = NULL;
+ }
+ GF_FREE(fd_ctx->opened_on);
+ GF_FREE(fd_ctx);
+ return;
+}
int
-afr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+afr_cleanup_fd_ctx(xlator_t *this, fd_t *fd)
{
- afr_local_t *local = NULL;
- int call_count = -1;
- int child_index = (long) cookie;
- int read_child = 0;
+ uint64_t ctx = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int ret = 0;
- local = frame->local;
+ ret = fd_ctx_get(fd, this, &ctx);
+ if (ret < 0)
+ goto out;
- read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
+ fd_ctx = (afr_fd_ctx_t *)(long)ctx;
- LOCK (&frame->lock);
- {
- if (child_index == read_child) {
- local->read_child_returned = _gf_true;
- }
+ if (fd_ctx) {
+ _afr_cleanup_fd_ctx(this, fd_ctx);
+ }
- if (op_ret == 0) {
- local->op_ret = 0;
+out:
+ return 0;
+}
- if (local->success_count == 0) {
- local->cont.fsync.prebuf = *prebuf;
- local->cont.fsync.postbuf = *postbuf;
- }
+int
+afr_release(xlator_t *this, fd_t *fd)
+{
+ afr_cleanup_fd_ctx(this, fd);
- if (child_index == read_child) {
- local->cont.fsync.prebuf = *prebuf;
- local->cont.fsync.postbuf = *postbuf;
- }
+ return 0;
+}
- local->success_count++;
- }
+afr_fd_ctx_t *
+__afr_fd_ctx_get(fd_t *fd, xlator_t *this)
+{
+ uint64_t ctx = 0;
+ int ret = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ ret = __fd_ctx_get(fd, this, &ctx);
- call_count = afr_frame_return (frame);
+ if (ret < 0) {
+ ret = __afr_fd_ctx_set(this, fd);
+ if (ret < 0)
+ goto out;
- if (call_count == 0) {
- AFR_STACK_UNWIND (fsync, frame, local->op_ret, local->op_errno,
- &local->cont.fsync.prebuf,
- &local->cont.fsync.postbuf,
- NULL);
- }
+ ret = __fd_ctx_get(fd, this, &ctx);
+ if (ret < 0)
+ goto out;
+ }
- return 0;
+ fd_ctx = (afr_fd_ctx_t *)(long)ctx;
+out:
+ return fd_ctx;
}
+afr_fd_ctx_t *
+afr_fd_ctx_get(fd_t *fd, xlator_t *this)
+{
+ afr_fd_ctx_t *fd_ctx = NULL;
+
+ LOCK(&fd->lock);
+ {
+ fd_ctx = __afr_fd_ctx_get(fd, this);
+ }
+ UNLOCK(&fd->lock);
+
+ return fd_ctx;
+}
int
-afr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t datasync, dict_t *xdata)
+__afr_fd_ctx_set(xlator_t *this, fd_t *fd)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int ret = -1;
- int i = 0;
- int32_t call_count = 0;
- int32_t op_errno = 0;
+ afr_private_t *priv = NULL;
+ int ret = -1;
+ uint64_t ctx = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int i = 0;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ VALIDATE_OR_GOTO(this->private, out);
+ VALIDATE_OR_GOTO(fd, out);
- priv = this->private;
+ priv = this->private;
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
+ ret = __fd_ctx_get(fd, this, &ctx);
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ if (ret == 0)
+ goto out;
- call_count = local->call_count;
+ fd_ctx = GF_CALLOC(1, sizeof(afr_fd_ctx_t), gf_afr_mt_afr_fd_ctx_t);
+ if (!fd_ctx) {
+ ret = -ENOMEM;
+ goto out;
+ }
- local->fd = fd_ref (fd);
+ fd_ctx->opened_on = GF_CALLOC(sizeof(*fd_ctx->opened_on), priv->child_count,
+ gf_afr_mt_int32_t);
+ if (!fd_ctx->opened_on) {
+ ret = -ENOMEM;
+ goto out;
+ }
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_fsync_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fsync,
- fd, datasync, xdata);
- if (!--call_count)
- break;
- }
- }
+ for (i = 0; i < priv->child_count; i++) {
+ if (fd_is_anonymous(fd))
+ fd_ctx->opened_on[i] = AFR_FD_OPENED;
+ else
+ fd_ctx->opened_on[i] = AFR_FD_NOT_OPENED;
+ }
- ret = 0;
+ fd_ctx->readdir_subvol = -1;
+ fd_ctx->lk_heal_info = NULL;
+
+ ret = __fd_ctx_set(fd, this, (uint64_t)(long)fd_ctx);
+ if (ret)
+ gf_msg_debug(this->name, 0, "failed to set fd ctx (%p)", fd);
out:
- if (ret < 0)
- AFR_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
+ if (ret && fd_ctx)
+ _afr_cleanup_fd_ctx(this, fd_ctx);
+ return ret;
}
-/* }}} */
-
-/* {{{ fsync */
+/* {{{ flush */
-int32_t
-afr_fsyncdir_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xdata)
+int
+afr_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
{
- afr_local_t *local = NULL;
- int call_count = -1;
+ afr_local_t *local = NULL;
+ int call_count = -1;
- local = frame->local;
+ local = frame->local;
- LOCK (&frame->lock);
- {
- if (op_ret == 0)
- local->op_ret = 0;
-
- local->op_errno = op_errno;
+ LOCK(&frame->lock);
+ {
+ if (op_ret != -1) {
+ local->op_ret = op_ret;
+ if (!local->xdata_rsp && xdata)
+ local->xdata_rsp = dict_ref(xdata);
+ } else {
+ local->op_errno = op_errno;
}
- UNLOCK (&frame->lock);
+ call_count = --local->call_count;
+ }
+ UNLOCK(&frame->lock);
- call_count = afr_frame_return (frame);
+ if (call_count == 0)
+ AFR_STACK_UNWIND(flush, frame, local->op_ret, local->op_errno,
+ local->xdata_rsp);
- if (call_count == 0)
- AFR_STACK_UNWIND (fsyncdir, frame, local->op_ret,
- local->op_errno, xdata);
+ return 0;
+}
- return 0;
+static int
+afr_flush_wrapper(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ int i = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+
+ priv = this->private;
+ local = frame->local;
+ call_count = local->call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE(frame, afr_flush_cbk, (void *)(long)i,
+ priv->children[i], priv->children[i]->fops->flush,
+ local->fd, xdata);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
}
+afr_local_t *
+afr_wakeup_same_fd_delayed_op(xlator_t *this, afr_lock_t *lock, fd_t *fd)
+{
+ afr_local_t *local = NULL;
+
+ if (lock->delay_timer) {
+ local = list_entry(lock->post_op.next, afr_local_t,
+ transaction.owner_list);
+ if (fd == local->fd) {
+ if (gf_timer_call_cancel(this->ctx, lock->delay_timer)) {
+ local = NULL;
+ } else {
+ lock->delay_timer = NULL;
+ }
+ } else {
+ local = NULL;
+ }
+ }
+
+ return local;
+}
-int32_t
-afr_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t datasync, dict_t *xdata)
+void
+afr_delayed_changelog_wake_resume(xlator_t *this, inode_t *inode,
+ call_stub_t *stub)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int ret = -1;
- int i = 0;
- int32_t call_count = 0;
- int32_t op_errno = 0;
+ afr_inode_ctx_t *ctx = NULL;
+ afr_lock_t *lock = NULL;
+ afr_local_t *metadata_local = NULL;
+ afr_local_t *data_local = NULL;
+ LOCK(&inode->lock);
+ {
+ (void)__afr_inode_ctx_get(this, inode, &ctx);
+ lock = &ctx->lock[AFR_DATA_TRANSACTION];
+ data_local = afr_wakeup_same_fd_delayed_op(this, lock, stub->args.fd);
+ lock = &ctx->lock[AFR_METADATA_TRANSACTION];
+ metadata_local = afr_wakeup_same_fd_delayed_op(this, lock,
+ stub->args.fd);
+ }
+ UNLOCK(&inode->lock);
+
+ if (data_local) {
+ data_local->transaction.resume_stub = stub;
+ } else if (metadata_local) {
+ metadata_local->transaction.resume_stub = stub;
+ } else {
+ call_resume(stub);
+ }
+ if (data_local) {
+ afr_delayed_changelog_wake_up_cbk(data_local);
+ }
+ if (metadata_local) {
+ afr_delayed_changelog_wake_up_cbk(metadata_local);
+ }
+}
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+int
+afr_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ call_stub_t *stub = NULL;
+ int op_errno = ENOMEM;
- priv = this->private;
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
+ local->op = GF_FOP_FLUSH;
+ if (!afr_is_consistent_io_possible(local, this->private, &op_errno))
+ goto out;
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ local->fd = fd_ref(fd);
- call_count = local->call_count;
+ stub = fop_flush_stub(frame, afr_flush_wrapper, fd, xdata);
+ if (!stub)
+ goto out;
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_fsyncdir_cbk,
- priv->children[i],
- priv->children[i]->fops->fsyncdir,
- fd, datasync, xdata);
- if (!--call_count)
- break;
- }
- }
+ afr_delayed_changelog_wake_resume(this, fd->inode, stub);
- ret = 0;
+ return 0;
out:
- if (ret < 0)
- AFR_STACK_UNWIND (fsyncdir, frame, -1, op_errno, NULL);
- return 0;
+ AFR_STACK_UNWIND(flush, frame, -1, op_errno, NULL);
+ return 0;
}
-/* }}} */
-
-/* {{{ xattrop */
-
-int32_t
-afr_xattrop_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xattr, dict_t *xdata)
+int
+afr_fsyncdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_local_t *local = NULL;
- int call_count = -1;
-
- local = frame->local;
+ afr_local_t *local = NULL;
+ int call_count = -1;
- LOCK (&frame->lock);
- {
- if (op_ret == 0) {
- if (!local->cont.xattrop.xattr)
- local->cont.xattrop.xattr = dict_ref (xattr);
- local->op_ret = 0;
- }
+ local = frame->local;
- local->op_errno = op_errno;
+ LOCK(&frame->lock);
+ {
+ if (op_ret == 0) {
+ local->op_ret = 0;
+ if (!local->xdata_rsp && xdata)
+ local->xdata_rsp = dict_ref(xdata);
+ } else {
+ local->op_errno = op_errno;
}
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
+ call_count = --local->call_count;
+ }
+ UNLOCK(&frame->lock);
- if (call_count == 0)
- AFR_STACK_UNWIND (xattrop, frame, local->op_ret, local->op_errno,
- local->cont.xattrop.xattr, xdata);
+ if (call_count == 0)
+ AFR_STACK_UNWIND(fsyncdir, frame, local->op_ret, local->op_errno,
+ local->xdata_rsp);
- return 0;
+ return 0;
}
-
-int32_t
-afr_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+int
+afr_fsyncdir(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int ret = -1;
- int i = 0;
- int32_t call_count = 0;
- int32_t op_errno = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
+ int32_t call_count = 0;
+ int32_t op_errno = ENOMEM;
+
+ priv = this->private;
+
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
+
+ local->op = GF_FOP_FSYNCDIR;
+ if (!afr_is_consistent_io_possible(local, priv, &op_errno))
+ goto out;
+
+ call_count = local->call_count;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND(frame, afr_fsyncdir_cbk, priv->children[i],
+ priv->children[i]->fops->fsyncdir, fd, datasync, xdata);
+ if (!--call_count)
+ break;
+ }
+ }
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ return 0;
+out:
+ AFR_STACK_UNWIND(fsyncdir, frame, -1, op_errno, NULL);
- priv = this->private;
+ return 0;
+}
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
+/* }}} */
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+static int
+afr_serialized_lock_wind(call_frame_t *frame, xlator_t *this);
- call_count = local->call_count;
+static gf_boolean_t
+afr_is_conflicting_lock_present(int32_t op_ret, int32_t op_errno)
+{
+ if (op_ret == -1 && op_errno == EAGAIN)
+ return _gf_true;
+ return _gf_false;
+}
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_xattrop_cbk,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- loc, optype, xattr, xdata);
- if (!--call_count)
- break;
- }
- }
+static void
+afr_fop_lock_unwind(call_frame_t *frame, glusterfs_fop_t op, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ switch (op) {
+ case GF_FOP_INODELK:
+ AFR_STACK_UNWIND(inodelk, frame, op_ret, op_errno, xdata);
+ break;
+ case GF_FOP_FINODELK:
+ AFR_STACK_UNWIND(finodelk, frame, op_ret, op_errno, xdata);
+ break;
+ case GF_FOP_ENTRYLK:
+ AFR_STACK_UNWIND(entrylk, frame, op_ret, op_errno, xdata);
+ break;
+ case GF_FOP_FENTRYLK:
+ AFR_STACK_UNWIND(fentrylk, frame, op_ret, op_errno, xdata);
+ break;
+ default:
+ break;
+ }
+}
- ret = 0;
-out:
- if (ret < 0)
- AFR_STACK_UNWIND (xattrop, frame, -1, op_errno, NULL, NULL);
- return 0;
+static void
+afr_fop_lock_wind(call_frame_t *frame, xlator_t *this, int child_index,
+ int32_t (*lock_cbk)(call_frame_t *, void *, xlator_t *,
+ int32_t, int32_t, dict_t *))
+{
+ afr_local_t *local = frame->local;
+ afr_private_t *priv = this->private;
+ int i = child_index;
+
+ switch (local->op) {
+ case GF_FOP_INODELK:
+ STACK_WIND_COOKIE(
+ frame, lock_cbk, (void *)(long)i, priv->children[i],
+ priv->children[i]->fops->inodelk,
+ (const char *)local->cont.inodelk.volume, &local->loc,
+ local->cont.inodelk.cmd, &local->cont.inodelk.flock,
+ local->cont.inodelk.xdata);
+ break;
+ case GF_FOP_FINODELK:
+ STACK_WIND_COOKIE(
+ frame, lock_cbk, (void *)(long)i, priv->children[i],
+ priv->children[i]->fops->finodelk,
+ (const char *)local->cont.inodelk.volume, local->fd,
+ local->cont.inodelk.cmd, &local->cont.inodelk.flock,
+ local->cont.inodelk.xdata);
+ break;
+ case GF_FOP_ENTRYLK:
+ STACK_WIND_COOKIE(
+ frame, lock_cbk, (void *)(long)i, priv->children[i],
+ priv->children[i]->fops->entrylk, local->cont.entrylk.volume,
+ &local->loc, local->cont.entrylk.basename,
+ local->cont.entrylk.cmd, local->cont.entrylk.type,
+ local->cont.entrylk.xdata);
+ break;
+ case GF_FOP_FENTRYLK:
+ STACK_WIND_COOKIE(
+ frame, lock_cbk, (void *)(long)i, priv->children[i],
+ priv->children[i]->fops->fentrylk, local->cont.entrylk.volume,
+ local->fd, local->cont.entrylk.basename,
+ local->cont.entrylk.cmd, local->cont.entrylk.type,
+ local->cont.entrylk.xdata);
+ break;
+ default:
+ break;
+ }
}
-/* }}} */
+void
+afr_fop_lock_proceed(call_frame_t *frame)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
-/* {{{ fxattrop */
+ local = frame->local;
+ priv = frame->this->private;
+
+ if (local->fop_lock_state != AFR_FOP_LOCK_PARALLEL) {
+ afr_fop_lock_unwind(frame, local->op, local->op_ret, local->op_errno,
+ local->xdata_rsp);
+ return;
+ }
+ /* At least one child is up */
+ /*
+ * Non-blocking locks also need to be serialized. Otherwise there is
+ * a chance that both the mounts which issued same non-blocking inodelk
+ * may endup not acquiring the lock on any-brick.
+ * Ex: Mount1 and Mount2
+ * request for full length lock on file f1. Mount1 afr may acquire the
+ * partial lock on brick-1 and may not acquire the lock on brick-2
+ * because Mount2 already got the lock on brick-2, vice versa. Since
+ * both the mounts only got partial locks, afr treats them as failure in
+ * gaining the locks and unwinds with EAGAIN errno.
+ */
+ local->op_ret = -1;
+ local->op_errno = EUCLEAN;
+ local->fop_lock_state = AFR_FOP_LOCK_SERIAL;
+ afr_local_replies_wipe(local, priv);
+ if (local->xdata_rsp)
+ dict_unref(local->xdata_rsp);
+ local->xdata_rsp = NULL;
+ switch (local->op) {
+ case GF_FOP_INODELK:
+ case GF_FOP_FINODELK:
+ local->cont.inodelk.cmd = local->cont.inodelk.in_cmd;
+ local->cont.inodelk.flock = local->cont.inodelk.in_flock;
+ if (local->cont.inodelk.xdata)
+ dict_unref(local->cont.inodelk.xdata);
+ local->cont.inodelk.xdata = NULL;
+ if (local->xdata_req)
+ local->cont.inodelk.xdata = dict_ref(local->xdata_req);
+ break;
+ case GF_FOP_ENTRYLK:
+ case GF_FOP_FENTRYLK:
+ local->cont.entrylk.cmd = local->cont.entrylk.in_cmd;
+ if (local->cont.entrylk.xdata)
+ dict_unref(local->cont.entrylk.xdata);
+ local->cont.entrylk.xdata = NULL;
+ if (local->xdata_req)
+ local->cont.entrylk.xdata = dict_ref(local->xdata_req);
+ break;
+ default:
+ break;
+ }
+ afr_serialized_lock_wind(frame, frame->this);
+}
+
+static int32_t
+afr_unlock_partial_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
-int32_t
-afr_fxattrop_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xattr, dict_t *xdata)
{
- afr_local_t *local = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int child_index = (long)cookie;
+ uuid_t gfid = {0};
- int call_count = -1;
+ local = frame->local;
+ priv = this->private;
- local = frame->local;
+ if (op_ret < 0 && op_errno != ENOTCONN) {
+ if (local->fd)
+ gf_uuid_copy(gfid, local->fd->inode->gfid);
+ else
+ loc_gfid(&local->loc, gfid);
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_UNLOCK_FAIL,
+ "%s: Failed to unlock %s on %s "
+ "with lk_owner: %s",
+ uuid_utoa(gfid), gf_fop_list[local->op],
+ priv->children[child_index]->name,
+ lkowner_utoa(&frame->root->lk_owner));
+ }
+
+ call_count = afr_frame_return(frame);
+ if (call_count == 0)
+ afr_fop_lock_proceed(frame);
+
+ return 0;
+}
- LOCK (&frame->lock);
- {
- if (op_ret == 0) {
- if (!local->cont.fxattrop.xattr)
- local->cont.fxattrop.xattr = dict_ref (xattr);
+static int32_t
+afr_unlock_locks_and_proceed(call_frame_t *frame, xlator_t *this,
+ int call_count)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ if (call_count == 0) {
+ afr_fop_lock_proceed(frame);
+ goto out;
+ }
+
+ local = frame->local;
+ priv = this->private;
+ local->call_count = call_count;
+ switch (local->op) {
+ case GF_FOP_INODELK:
+ case GF_FOP_FINODELK:
+ local->cont.inodelk.flock.l_type = F_UNLCK;
+ local->cont.inodelk.cmd = F_SETLK;
+ if (local->cont.inodelk.xdata)
+ dict_unref(local->cont.inodelk.xdata);
+ local->cont.inodelk.xdata = NULL;
+ break;
+ case GF_FOP_ENTRYLK:
+ case GF_FOP_FENTRYLK:
+ local->cont.entrylk.cmd = ENTRYLK_UNLOCK;
+ if (local->cont.entrylk.xdata)
+ dict_unref(local->cont.entrylk.xdata);
+ local->cont.entrylk.xdata = NULL;
+ break;
+ default:
+ break;
+ }
- local->op_ret = 0;
- }
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ if (local->replies[i].op_ret == -1)
+ continue;
- call_count = afr_frame_return (frame);
+ afr_fop_lock_wind(frame, this, i, afr_unlock_partial_lock_cbk);
- if (call_count == 0)
- AFR_STACK_UNWIND (fxattrop, frame, local->op_ret, local->op_errno,
- local->cont.fxattrop.xattr, xdata);
+ if (!--call_count)
+ break;
+ }
- return 0;
+out:
+ return 0;
}
-
int32_t
-afr_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
- gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+afr_fop_lock_done(call_frame_t *frame, xlator_t *this)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int ret = -1;
- int i = 0;
- int32_t call_count = 0;
- int32_t op_errno = 0;
+ int i = 0;
+ int lock_count = 0;
+ unsigned char *success = NULL;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- priv = this->private;
+ local = frame->local;
+ priv = this->private;
+ success = alloca0(priv->child_count);
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ if (local->replies[i].op_ret == 0) {
+ lock_count++;
+ success[i] = 1;
+ }
- call_count = local->call_count;
+ if (local->op_ret == -1 && local->op_errno == EAGAIN)
+ continue;
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_fxattrop_cbk,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- fd, optype, xattr, xdata);
- if (!--call_count)
- break;
- }
+ if ((local->replies[i].op_ret == -1) &&
+ (local->replies[i].op_errno == EAGAIN)) {
+ local->op_ret = -1;
+ local->op_errno = EAGAIN;
+ continue;
}
- ret = 0;
-out:
- if (ret < 0)
- AFR_STACK_UNWIND (fxattrop, frame, -1, op_errno, NULL, NULL);
- return 0;
-}
+ if (local->replies[i].op_ret == 0)
+ local->op_ret = 0;
-/* }}} */
+ local->op_errno = local->replies[i].op_errno;
+ }
+ if (afr_fop_lock_is_unlock(frame))
+ goto unwind;
-int32_t
-afr_inodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
+ if (afr_is_conflicting_lock_present(local->op_ret, local->op_errno)) {
+ afr_unlock_locks_and_proceed(frame, this, lock_count);
+ } else if (priv->quorum_count && !afr_has_quorum(success, this, NULL)) {
+ local->fop_lock_state = AFR_FOP_LOCK_QUORUM_FAILED;
+ local->op_ret = -1;
+ local->op_errno = afr_final_errno(local, priv);
+ if (local->op_errno == 0)
+ local->op_errno = afr_quorum_errno(priv);
+ afr_unlock_locks_and_proceed(frame, this, lock_count);
+ } else {
+ goto unwind;
+ }
+
+ return 0;
+unwind:
+ afr_fop_lock_unwind(frame, local->op, local->op_ret, local->op_errno,
+ local->xdata_rsp);
+ return 0;
+}
+static int
+afr_common_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_local_t *local = NULL;
- int call_count = -1;
+ afr_local_t *local = NULL;
+ int child_index = (long)cookie;
- local = frame->local;
+ local = frame->local;
- LOCK (&frame->lock);
+ local->replies[child_index].valid = 1;
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
+ if (op_ret == 0 && xdata) {
+ local->replies[child_index].xdata = dict_ref(xdata);
+ LOCK(&frame->lock);
{
- if (op_ret == 0)
- local->op_ret = 0;
-
- local->op_errno = op_errno;
+ if (!local->xdata_rsp)
+ local->xdata_rsp = dict_ref(xdata);
}
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
+ UNLOCK(&frame->lock);
+ }
+ return 0;
+}
- if (call_count == 0)
- AFR_STACK_UNWIND (inodelk, frame, local->op_ret,
- local->op_errno, xdata);
+static int32_t
+afr_serialized_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
- return 0;
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int child_index = (long)cookie;
+ int next_child = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ afr_common_lock_cbk(frame, cookie, this, op_ret, op_errno, xdata);
+
+ for (next_child = child_index + 1; next_child < priv->child_count;
+ next_child++) {
+ if (local->child_up[next_child])
+ break;
+ }
+
+ if (afr_is_conflicting_lock_present(op_ret, op_errno) ||
+ (next_child == priv->child_count)) {
+ afr_fop_lock_done(frame, this);
+ } else {
+ afr_fop_lock_wind(frame, this, next_child, afr_serialized_lock_cbk);
+ }
+
+ return 0;
}
-
-int32_t
-afr_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd,
- struct gf_flock *flock, dict_t *xdata)
+static int
+afr_serialized_lock_wind(call_frame_t *frame, xlator_t *this)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int ret = -1;
- int i = 0;
- int32_t call_count = 0;
- int32_t op_errno = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ priv = this->private;
+ local = frame->local;
- priv = this->private;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ afr_fop_lock_wind(frame, this, i, afr_serialized_lock_cbk);
+ break;
+ }
+ }
+ return 0;
+}
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
+static int32_t
+afr_parallel_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+{
+ int call_count = 0;
- call_count = local->call_count;
+ afr_common_lock_cbk(frame, cookie, this, op_ret, op_errno, xdata);
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_inodelk_cbk,
- priv->children[i],
- priv->children[i]->fops->inodelk,
- volume, loc, cmd, flock, xdata);
-
- if (!--call_count)
- break;
- }
- }
+ call_count = afr_frame_return(frame);
+ if (call_count == 0)
+ afr_fop_lock_done(frame, this);
- ret = 0;
-out:
- if (ret < 0)
- AFR_STACK_UNWIND (inodelk, frame, -1, op_errno, NULL);
- return 0;
+ return 0;
}
+static int
+afr_parallel_lock_wind(call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int call_count = 0;
+ int i = 0;
+
+ priv = this->private;
+ local = frame->local;
+ call_count = local->call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->child_up[i])
+ continue;
+ afr_fop_lock_wind(frame, this, i, afr_parallel_lock_cbk);
+ if (!--call_count)
+ break;
+ }
+ return 0;
+}
-int32_t
-afr_finodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xdata)
-
+static int
+afr_fop_handle_lock(call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- int call_count = -1;
+ afr_local_t *local = frame->local;
+ int op_errno = 0;
- local = frame->local;
+ if (!afr_fop_lock_is_unlock(frame)) {
+ if (!afr_is_consistent_io_possible(local, this->private, &op_errno))
+ goto out;
- LOCK (&frame->lock);
- {
- if (op_ret == 0)
- local->op_ret = 0;
+ switch (local->op) {
+ case GF_FOP_INODELK:
+ case GF_FOP_FINODELK:
+ local->cont.inodelk.cmd = F_SETLK;
+ break;
+ case GF_FOP_ENTRYLK:
+ case GF_FOP_FENTRYLK:
+ local->cont.entrylk.cmd = ENTRYLK_LOCK_NB;
+ break;
+ default:
+ break;
+ }
+ }
- local->op_errno = op_errno;
+ if (local->xdata_req) {
+ switch (local->op) {
+ case GF_FOP_INODELK:
+ case GF_FOP_FINODELK:
+ local->cont.inodelk.xdata = dict_ref(local->xdata_req);
+ break;
+ case GF_FOP_ENTRYLK:
+ case GF_FOP_FENTRYLK:
+ local->cont.entrylk.xdata = dict_ref(local->xdata_req);
+ break;
+ default:
+ break;
}
- UNLOCK (&frame->lock);
+ }
- call_count = afr_frame_return (frame);
+ local->fop_lock_state = AFR_FOP_LOCK_PARALLEL;
+ afr_parallel_lock_wind(frame, this);
+out:
+ return -op_errno;
+}
- if (call_count == 0)
- AFR_STACK_UNWIND (finodelk, frame, local->op_ret,
- local->op_errno, xdata);
+static int32_t
+afr_handle_inodelk(call_frame_t *frame, xlator_t *this, glusterfs_fop_t fop,
+ const char *volume, loc_t *loc, fd_t *fd, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int32_t op_errno = ENOMEM;
+
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
+
+ local->op = fop;
+ if (loc)
+ loc_copy(&local->loc, loc);
+ if (fd && (flock->l_type != F_UNLCK)) {
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ local->fd = fd_ref(fd);
+ }
+
+ local->cont.inodelk.volume = gf_strdup(volume);
+ if (!local->cont.inodelk.volume) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ local->cont.inodelk.in_cmd = cmd;
+ local->cont.inodelk.cmd = cmd;
+ local->cont.inodelk.in_flock = *flock;
+ local->cont.inodelk.flock = *flock;
+ if (xdata)
+ local->xdata_req = dict_ref(xdata);
+
+ op_errno = -afr_fop_handle_lock(frame, frame->this);
+ if (op_errno)
+ goto out;
+ return 0;
+out:
+ afr_fop_lock_unwind(frame, fop, -1, op_errno, NULL);
- return 0;
+ return 0;
}
+int32_t
+afr_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc,
+ int32_t cmd, struct gf_flock *flock, dict_t *xdata)
+{
+ afr_handle_inodelk(frame, this, GF_FOP_INODELK, volume, loc, NULL, cmd,
+ flock, xdata);
+ return 0;
+}
int32_t
-afr_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *flock,
- dict_t *xdata)
+afr_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int ret = -1;
- int i = 0;
- int32_t call_count = 0;
- int32_t op_errno = 0;
+ afr_handle_inodelk(frame, this, GF_FOP_FINODELK, volume, NULL, fd, cmd,
+ flock, xdata);
+ return 0;
+}
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+static int
+afr_handle_entrylk(call_frame_t *frame, xlator_t *this, glusterfs_fop_t fop,
+ const char *volume, loc_t *loc, fd_t *fd,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int32_t op_errno = ENOMEM;
+
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
+
+ local->op = fop;
+ if (loc)
+ loc_copy(&local->loc, loc);
+ if (fd && (cmd != ENTRYLK_UNLOCK)) {
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ local->fd = fd_ref(fd);
+ }
+ local->cont.entrylk.cmd = cmd;
+ local->cont.entrylk.in_cmd = cmd;
+ local->cont.entrylk.type = type;
+ local->cont.entrylk.volume = gf_strdup(volume);
+ local->cont.entrylk.basename = gf_strdup(basename);
+ if (!local->cont.entrylk.volume || !local->cont.entrylk.basename) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ if (xdata)
+ local->xdata_req = dict_ref(xdata);
+ op_errno = -afr_fop_handle_lock(frame, frame->this);
+ if (op_errno)
+ goto out;
+
+ return 0;
+out:
+ afr_fop_lock_unwind(frame, fop, -1, op_errno, NULL);
+ return 0;
+}
- priv = this->private;
+int
+afr_entrylk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
+{
+ afr_handle_entrylk(frame, this, GF_FOP_ENTRYLK, volume, loc, NULL, basename,
+ cmd, type, xdata);
+ return 0;
+}
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
+int
+afr_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
+{
+ afr_handle_entrylk(frame, this, GF_FOP_FENTRYLK, volume, NULL, fd, basename,
+ cmd, type, xdata);
+ return 0;
+}
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+int
+afr_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct statvfs *statvfs, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int call_count = 0;
+ struct statvfs *buf = NULL;
- call_count = local->call_count;
+ local = frame->local;
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_finodelk_cbk,
- priv->children[i],
- priv->children[i]->fops->finodelk,
- volume, fd, cmd, flock, xdata);
-
- if (!--call_count)
- break;
+ LOCK(&frame->lock);
+ {
+ if (op_ret != 0) {
+ local->op_errno = op_errno;
+ goto unlock;
+ }
+
+ local->op_ret = op_ret;
+
+ buf = &local->cont.statfs.buf;
+ if (local->cont.statfs.buf_set) {
+ if (statvfs->f_bavail < buf->f_bavail) {
+ *buf = *statvfs;
+ if (xdata) {
+ if (local->xdata_rsp)
+ dict_unref(local->xdata_rsp);
+ local->xdata_rsp = dict_ref(xdata);
}
+ }
+ } else {
+ *buf = *statvfs;
+ local->cont.statfs.buf_set = 1;
+ if (xdata)
+ local->xdata_rsp = dict_ref(xdata);
}
+ }
+unlock:
+ call_count = --local->call_count;
+ UNLOCK(&frame->lock);
- ret = 0;
+ if (call_count == 0)
+ AFR_STACK_UNWIND(statfs, frame, local->op_ret, local->op_errno,
+ &local->cont.statfs.buf, local->xdata_rsp);
+
+ return 0;
+}
+
+int
+afr_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int call_count = 0;
+ int32_t op_errno = ENOMEM;
+
+ priv = this->private;
+
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
+
+ local->op = GF_FOP_STATFS;
+ if (!afr_is_consistent_io_possible(local, priv, &op_errno))
+ goto out;
+
+ if (priv->arbiter_count == 1 && local->child_up[ARBITER_BRICK_INDEX])
+ local->call_count--;
+ call_count = local->call_count;
+ if (!call_count) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ if (AFR_IS_ARBITER_BRICK(priv, i))
+ continue;
+ STACK_WIND(frame, afr_statfs_cbk, priv->children[i],
+ priv->children[i]->fops->statfs, loc, xdata);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
out:
- if (ret < 0)
- AFR_STACK_UNWIND (finodelk, frame, -1, op_errno, NULL);
- return 0;
+ AFR_STACK_UNWIND(statfs, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
}
+int32_t
+afr_lk_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = this->private;
+ int call_count = -1;
+ int child_index = (long)cookie;
+
+ local = frame->local;
+
+ if (op_ret < 0 && op_errno != ENOTCONN && op_errno != EBADFD) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_UNLOCK_FAIL,
+ "gfid=%s: unlock failed on subvolume %s "
+ "with lock owner %s",
+ uuid_utoa(local->fd->inode->gfid),
+ priv->children[child_index]->name,
+ lkowner_utoa(&frame->root->lk_owner));
+ }
+
+ call_count = afr_frame_return(frame);
+ if (call_count == 0) {
+ AFR_STACK_UNWIND(lk, frame, local->op_ret, local->op_errno, NULL,
+ local->xdata_rsp);
+ }
+
+ return 0;
+}
int32_t
-afr_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+afr_lk_unlock(call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- int call_count = -1;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int call_count = 0;
- local = frame->local;
+ local = frame->local;
+ priv = this->private;
- LOCK (&frame->lock);
- {
- if (op_ret == 0)
- local->op_ret = 0;
+ call_count = afr_locked_nodes_count(local->cont.lk.locked_nodes,
+ priv->child_count);
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ if (call_count == 0) {
+ AFR_STACK_UNWIND(lk, frame, local->op_ret, local->op_errno, NULL,
+ local->xdata_rsp);
+ return 0;
+ }
- call_count = afr_frame_return (frame);
+ local->call_count = call_count;
- if (call_count == 0)
- AFR_STACK_UNWIND (entrylk, frame, local->op_ret,
- local->op_errno, xdata);
+ local->cont.lk.user_flock.l_type = F_UNLCK;
- return 0;
-}
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->cont.lk.locked_nodes[i]) {
+ STACK_WIND_COOKIE(frame, afr_lk_unlock_cbk, (void *)(long)i,
+ priv->children[i], priv->children[i]->fops->lk,
+ local->fd, F_SETLK, &local->cont.lk.user_flock,
+ NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+ return 0;
+}
int32_t
-afr_entrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc,
- const char *basename, entrylk_cmd cmd, entrylk_type type,
- dict_t *xdata)
+afr_lk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int ret = -1;
- int i = 0;
- int32_t call_count = 0;
- int32_t op_errno = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int child_index = -1;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ local = frame->local;
+ priv = this->private;
- priv = this->private;
+ child_index = (long)cookie;
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
+ afr_common_lock_cbk(frame, cookie, this, op_ret, op_errno, xdata);
+ if (op_ret < 0 && op_errno == EAGAIN) {
+ local->op_ret = -1;
+ local->op_errno = EAGAIN;
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ afr_lk_unlock(frame, this);
+ return 0;
+ }
+
+ if (op_ret == 0) {
+ local->op_ret = 0;
+ local->op_errno = 0;
+ local->cont.lk.locked_nodes[child_index] = 1;
+ local->cont.lk.ret_flock = *lock;
+ }
+
+ child_index++;
+
+ if (child_index < priv->child_count) {
+ STACK_WIND_COOKIE(frame, afr_lk_cbk, (void *)(long)child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->lk, local->fd,
+ local->cont.lk.cmd, &local->cont.lk.user_flock,
+ local->xdata_req);
+ } else if (priv->quorum_count &&
+ !afr_has_quorum(local->cont.lk.locked_nodes, this, NULL)) {
+ local->op_ret = -1;
+ local->op_errno = afr_final_errno(local, priv);
- call_count = local->call_count;
+ afr_lk_unlock(frame, this);
+ } else {
+ if (local->op_ret < 0)
+ local->op_errno = afr_final_errno(local, priv);
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_entrylk_cbk,
- priv->children[i],
- priv->children[i]->fops->entrylk,
- volume, loc, basename, cmd, type, xdata);
-
- if (!--call_count)
- break;
- }
- }
+ AFR_STACK_UNWIND(lk, frame, local->op_ret, local->op_errno,
+ &local->cont.lk.ret_flock, local->xdata_rsp);
+ }
- ret = 0;
-out:
- if (ret < 0)
- AFR_STACK_UNWIND (entrylk, frame, -1, op_errno, NULL);
- return 0;
+ return 0;
}
+int
+afr_lk_transaction_cbk(int ret, call_frame_t *frame, void *opaque)
+{
+ return 0;
+}
+int
+afr_lk_txn_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int child_index = -1;
+
+ local = frame->local;
+ child_index = (long)cookie;
+ afr_common_lock_cbk(frame, cookie, this, op_ret, op_errno, xdata);
+ if (op_ret == 0) {
+ local->op_ret = 0;
+ local->op_errno = 0;
+ local->cont.lk.locked_nodes[child_index] = 1;
+ local->cont.lk.ret_flock = *lock;
+ }
+ syncbarrier_wake(&local->barrier);
+ return 0;
+}
-int32_t
-afr_fentrylk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
-
+int
+afr_lk_txn_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
+{
+ afr_local_t *local = frame->local;
+ afr_private_t *priv = this->private;
+ int child_index = (long)cookie;
+
+ if (op_ret < 0 && op_errno != ENOTCONN && op_errno != EBADFD) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_UNLOCK_FAIL,
+ "gfid=%s: unlock failed on subvolume %s "
+ "with lock owner %s",
+ uuid_utoa(local->fd->inode->gfid),
+ priv->children[child_index]->name,
+ lkowner_utoa(&frame->root->lk_owner));
+ }
+ return 0;
+}
+int
+afr_lk_transaction(void *opaque)
{
- afr_local_t *local = NULL;
- int call_count = -1;
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ char *wind_on = NULL;
+ int op_errno = 0;
+ int i = 0;
+ int ret = 0;
+
+ frame = (call_frame_t *)opaque;
+ local = frame->local;
+ this = frame->this;
+ priv = this->private;
+ wind_on = alloca0(priv->child_count);
+
+ if (priv->arbiter_count || priv->child_count != 3) {
+ op_errno = ENOTSUP;
+ gf_msg(frame->this->name, GF_LOG_ERROR, op_errno, AFR_MSG_LK_HEAL_DOM,
+ "%s: Lock healing supported only for replica 3 volumes.",
+ uuid_utoa(local->fd->inode->gfid));
+ goto err;
+ }
+
+ op_errno = -afr_dom_lock_acquire(frame); // Released during
+ // AFR_STACK_UNWIND
+ if (op_errno != 0) {
+ goto err;
+ }
+ if (priv->quorum_count &&
+ !afr_has_quorum(local->cont.lk.dom_locked_nodes, this, NULL)) {
+ op_errno = afr_final_errno(local, priv);
+ goto err;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (priv->child_up[i] && local->cont.lk.dom_locked_nodes[i])
+ wind_on[i] = 1;
+ }
+ AFR_ONLIST(wind_on, frame, afr_lk_txn_wind_cbk, lk, local->fd,
+ local->cont.lk.cmd, &local->cont.lk.user_flock,
+ local->xdata_req);
+
+ if (priv->quorum_count &&
+ !afr_has_quorum(local->cont.lk.locked_nodes, this, NULL)) {
+ local->op_ret = -1;
+ local->op_errno = afr_final_errno(local, priv);
+ goto unlock;
+ } else {
+ if (local->cont.lk.user_flock.l_type == F_UNLCK)
+ ret = afr_remove_lock_from_saved_locks(local, this);
+ else
+ ret = afr_add_lock_to_saved_locks(frame, this);
+ if (ret) {
+ local->op_ret = -1;
+ local->op_errno = -ret;
+ goto unlock;
+ }
+ AFR_STACK_UNWIND(lk, frame, local->op_ret, local->op_errno,
+ &local->cont.lk.ret_flock, local->xdata_rsp);
+ }
- local = frame->local;
+ return 0;
- LOCK (&frame->lock);
- {
- if (op_ret == 0)
- local->op_ret = 0;
+unlock:
+ local->cont.lk.user_flock.l_type = F_UNLCK;
+ AFR_ONLIST(local->cont.lk.locked_nodes, frame, afr_lk_txn_unlock_cbk, lk,
+ local->fd, F_SETLK, &local->cont.lk.user_flock, NULL);
+err:
+ AFR_STACK_UNWIND(lk, frame, -1, op_errno, NULL, NULL);
+ return -1;
+}
- local->op_errno = op_errno;
+int
+afr_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int ret = 0;
+ int i = 0;
+ int32_t op_errno = ENOMEM;
+
+ priv = this->private;
+
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
+
+ local->op = GF_FOP_LK;
+ if (!afr_lk_is_unlock(cmd, flock)) {
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ if (!afr_is_consistent_io_possible(local, priv, &op_errno))
+ goto out;
+ }
+
+ local->cont.lk.locked_nodes = GF_CALLOC(
+ priv->child_count, sizeof(*local->cont.lk.locked_nodes),
+ gf_afr_mt_char);
+
+ if (!local->cont.lk.locked_nodes) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ local->fd = fd_ref(fd);
+ local->cont.lk.cmd = cmd;
+ local->cont.lk.user_flock = *flock;
+ local->cont.lk.ret_flock = *flock;
+ if (xdata)
+ local->xdata_req = dict_ref(xdata);
+
+ if (afr_is_lock_mode_mandatory(xdata)) {
+ ret = synctask_new(this->ctx->env, afr_lk_transaction,
+ afr_lk_transaction_cbk, frame, frame);
+ if (ret) {
+ op_errno = ENOMEM;
+ goto out;
}
- UNLOCK (&frame->lock);
+ return 0;
+ }
- call_count = afr_frame_return (frame);
+ STACK_WIND_COOKIE(frame, afr_lk_cbk, (void *)(long)0, priv->children[i],
+ priv->children[i]->fops->lk, fd, cmd, flock,
+ local->xdata_req);
- if (call_count == 0)
- AFR_STACK_UNWIND (fentrylk, frame, local->op_ret,
- local->op_errno, xdata);
+ return 0;
+out:
+ AFR_STACK_UNWIND(lk, frame, -1, op_errno, NULL, NULL);
- return 0;
+ return 0;
}
+int32_t
+afr_lease_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_lease *lease,
+ dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int call_count = -1;
+
+ local = frame->local;
+ call_count = afr_frame_return(frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND(lease, frame, local->op_ret, local->op_errno, lease,
+ xdata);
+
+ return 0;
+}
int32_t
-afr_fentrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd,
- const char *basename, entrylk_cmd cmd,
- entrylk_type type, dict_t *xdata)
+afr_lease_unlock(call_frame_t *frame, xlator_t *this)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int ret = -1;
- int i = 0;
- int32_t call_count = 0;
- int32_t op_errno = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int call_count = 0;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ local = frame->local;
+ priv = this->private;
- priv = this->private;
+ call_count = afr_locked_nodes_count(local->cont.lease.locked_nodes,
+ priv->child_count);
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
+ if (call_count == 0) {
+ AFR_STACK_UNWIND(lease, frame, local->op_ret, local->op_errno,
+ &local->cont.lease.ret_lease, NULL);
+ return 0;
+ }
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ local->call_count = call_count;
- call_count = local->call_count;
+ local->cont.lease.user_lease.cmd = GF_UNLK_LEASE;
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_fentrylk_cbk,
- priv->children[i],
- priv->children[i]->fops->fentrylk,
- volume, fd, basename, cmd, type, xdata);
-
- if (!--call_count)
- break;
- }
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->cont.lease.locked_nodes[i]) {
+ STACK_WIND(frame, afr_lease_unlock_cbk, priv->children[i],
+ priv->children[i]->fops->lease, &local->loc,
+ &local->cont.lease.user_lease, NULL);
+
+ if (!--call_count)
+ break;
}
+ }
- ret = 0;
-out:
- if (ret < 0)
- AFR_STACK_UNWIND (fentrylk, frame, -1, op_errno, NULL);
- return 0;
+ return 0;
}
int32_t
-afr_statfs_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct statvfs *statvfs, dict_t *xdata)
+afr_lease_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct gf_lease *lease, dict_t *xdata)
{
- afr_local_t *local = NULL;
- int call_count = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int child_index = -1;
- LOCK (&frame->lock);
- {
- local = frame->local;
-
- if (op_ret == 0) {
- local->op_ret = op_ret;
-
- if (local->cont.statfs.buf_set) {
- if (statvfs->f_bavail < local->cont.statfs.buf.f_bavail)
- local->cont.statfs.buf = *statvfs;
- } else {
- local->cont.statfs.buf = *statvfs;
- local->cont.statfs.buf_set = 1;
- }
- }
+ local = frame->local;
+ priv = this->private;
- if (op_ret == -1)
- local->op_errno = op_errno;
+ child_index = (long)cookie;
- }
- UNLOCK (&frame->lock);
+ afr_common_lock_cbk(frame, cookie, this, op_ret, op_errno, xdata);
+ if (op_ret < 0 && op_errno == EAGAIN) {
+ local->op_ret = -1;
+ local->op_errno = EAGAIN;
- call_count = afr_frame_return (frame);
+ afr_lease_unlock(frame, this);
+ return 0;
+ }
+
+ if (op_ret == 0) {
+ local->op_ret = 0;
+ local->op_errno = 0;
+ local->cont.lease.locked_nodes[child_index] = 1;
+ local->cont.lease.ret_lease = *lease;
+ }
+
+ child_index++;
+ if (child_index < priv->child_count) {
+ STACK_WIND_COOKIE(frame, afr_lease_cbk, (void *)(long)child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->lease, &local->loc,
+ &local->cont.lease.user_lease, xdata);
+ } else if (priv->quorum_count &&
+ !afr_has_quorum(local->cont.lease.locked_nodes, this, NULL)) {
+ local->op_ret = -1;
+ local->op_errno = afr_final_errno(local, priv);
- if (call_count == 0)
- AFR_STACK_UNWIND (statfs, frame, local->op_ret, local->op_errno,
- &local->cont.statfs.buf, xdata);
+ afr_lease_unlock(frame, this);
+ } else {
+ if (local->op_ret < 0)
+ local->op_errno = afr_final_errno(local, priv);
+ AFR_STACK_UNWIND(lease, frame, local->op_ret, local->op_errno,
+ &local->cont.lease.ret_lease, NULL);
+ }
- return 0;
+ return 0;
}
-
-int32_t
-afr_statfs (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xdata)
+int
+afr_lease(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct gf_lease *lease, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- int child_count = 0;
- afr_local_t * local = NULL;
- int i = 0;
- int ret = -1;
- int call_count = 0;
- int32_t op_errno = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int32_t op_errno = ENOMEM;
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (loc, out);
+ priv = this->private;
- priv = this->private;
- child_count = priv->child_count;
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
+ local->op = GF_FOP_LEASE;
+ local->cont.lease.locked_nodes = GF_CALLOC(
+ priv->child_count, sizeof(*local->cont.lease.locked_nodes),
+ gf_afr_mt_char);
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ if (!local->cont.lease.locked_nodes) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- call_count = local->call_count;
+ loc_copy(&local->loc, loc);
+ local->cont.lease.user_lease = *lease;
+ local->cont.lease.ret_lease = *lease;
- for (i = 0; i < child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_statfs_cbk,
- priv->children[i],
- priv->children[i]->fops->statfs,
- loc, xdata);
- if (!--call_count)
- break;
- }
- }
+ STACK_WIND_COOKIE(frame, afr_lease_cbk, (void *)(long)0, priv->children[0],
+ priv->children[0]->fops->lease, loc, lease, xdata);
- ret = 0;
+ return 0;
out:
- if (ret < 0)
- AFR_STACK_UNWIND (statfs, frame, -1, op_errno, NULL, NULL);
- return 0;
-}
+ AFR_STACK_UNWIND(lease, frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
-int32_t
-afr_lk_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
- dict_t *xdata)
+int
+afr_ipc_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
{
- afr_local_t * local = NULL;
- int call_count = -1;
-
- local = frame->local;
- call_count = afr_frame_return (frame);
+ afr_local_t *local = NULL;
+ int child_index = (long)cookie;
+ int call_count = 0;
+ gf_boolean_t failed = _gf_false;
+ gf_boolean_t succeeded = _gf_false;
+ int i = 0;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ local->replies[child_index].valid = 1;
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
+ if (xdata)
+ local->replies[child_index].xdata = dict_ref(xdata);
+
+ call_count = afr_frame_return(frame);
+ if (call_count)
+ goto out;
+ /* If any of the subvolumes failed with other than ENOTCONN
+ * return error else return success unless all the subvolumes
+ * failed.
+ * TODO: In case of failure, we need to unregister the xattrs
+ * from the other subvolumes where it succeeded (once upcall
+ * fixes the Bz-1371622)*/
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+ if (local->replies[i].op_ret < 0 &&
+ local->replies[i].op_errno != ENOTCONN) {
+ local->op_ret = local->replies[i].op_ret;
+ local->op_errno = local->replies[i].op_errno;
+ if (local->xdata_rsp)
+ dict_unref(local->xdata_rsp);
+ local->xdata_rsp = NULL;
+ if (local->replies[i].xdata) {
+ local->xdata_rsp = dict_ref(local->replies[i].xdata);
+ }
+ failed = _gf_true;
+ break;
+ }
+ if (local->replies[i].op_ret == 0) {
+ succeeded = _gf_true;
+ local->op_ret = 0;
+ local->op_errno = 0;
+ if (!local->xdata_rsp && local->replies[i].xdata) {
+ local->xdata_rsp = dict_ref(local->replies[i].xdata);
+ }
+ }
+ }
+
+ if (!succeeded && !failed) {
+ local->op_ret = -1;
+ local->op_errno = ENOTCONN;
+ }
- if (call_count == 0)
- AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,
- lock, xdata);
+ AFR_STACK_UNWIND(ipc, frame, local->op_ret, local->op_errno,
+ local->xdata_rsp);
- return 0;
+out:
+ return 0;
}
-
-int32_t
-afr_lk_unlock (call_frame_t *frame, xlator_t *this)
+int
+afr_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- int i = 0;
- int call_count = 0;
+ afr_local_t *local = NULL;
+ int32_t op_errno = -1;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int call_cnt = -1;
- local = frame->local;
- priv = this->private;
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
- call_count = afr_locked_nodes_count (local->cont.lk.locked_nodes,
- priv->child_count);
+ if (op != GF_IPC_TARGET_UPCALL)
+ goto wind_default;
- if (call_count == 0) {
- AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,
- &local->cont.lk.ret_flock, NULL);
- return 0;
- }
+ VALIDATE_OR_GOTO(this->private, err);
+ priv = this->private;
- local->call_count = call_count;
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto err;
- local->cont.lk.user_flock.l_type = F_UNLCK;
+ call_cnt = local->call_count;
+ if (xdata) {
for (i = 0; i < priv->child_count; i++) {
- if (local->cont.lk.locked_nodes[i]) {
- STACK_WIND (frame, afr_lk_unlock_cbk,
- priv->children[i],
- priv->children[i]->fops->lk,
- local->fd, F_SETLK,
- &local->cont.lk.user_flock, NULL);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+ if (dict_set_int8(xdata, priv->pending_key[i], 0) < 0)
+ goto err;
+ }
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->child_up[i])
+ continue;
+
+ STACK_WIND_COOKIE(frame, afr_ipc_cbk, (void *)(long)i,
+ priv->children[i], priv->children[i]->fops->ipc, op,
+ xdata);
+ if (!--call_cnt)
+ break;
+ }
+ return 0;
+
+err:
+ if (op_errno == -1)
+ op_errno = errno;
+ AFR_STACK_UNWIND(ipc, frame, -1, op_errno, NULL);
+
+ return 0;
+
+wind_default:
+ STACK_WIND(frame, default_ipc_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ipc, op, xdata);
+ return 0;
}
-
-int32_t
-afr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
+int
+afr_forget(xlator_t *this, inode_t *inode)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int child_index = -1;
-/* int ret = 0; */
+ uint64_t ctx_int = 0;
+ afr_inode_ctx_t *ctx = NULL;
+ afr_spb_choice_timeout_cancel(this, inode);
+ inode_ctx_del(inode, this, &ctx_int);
+ if (!ctx_int)
+ return 0;
- local = frame->local;
- priv = this->private;
+ ctx = (afr_inode_ctx_t *)(uintptr_t)ctx_int;
+ afr_inode_ctx_destroy(ctx);
+ return 0;
+}
- child_index = (long) cookie;
+int
+afr_priv_dump(xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+ char key[GF_DUMP_MAX_BUF_LEN];
+ int i = 0;
+
+ GF_ASSERT(this);
+ priv = this->private;
+
+ GF_ASSERT(priv);
+ snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name);
+ gf_proc_dump_add_section("%s", key_prefix);
+ gf_proc_dump_write("child_count", "%u", priv->child_count);
+ for (i = 0; i < priv->child_count; i++) {
+ sprintf(key, "child_up[%d]", i);
+ gf_proc_dump_write(key, "%d", priv->child_up[i]);
+ sprintf(key, "pending_key[%d]", i);
+ gf_proc_dump_write(key, "%s", priv->pending_key[i]);
+ sprintf(key, "pending_reads[%d]", i);
+ gf_proc_dump_write(key, "%" PRId64,
+ GF_ATOMIC_GET(priv->pending_reads[i]));
+ sprintf(key, "child_latency[%d]", i);
+ gf_proc_dump_write(key, "%" PRId64, priv->child_latency[i]);
+ sprintf(key, "halo_child_up[%d]", i);
+ gf_proc_dump_write(key, "%d", priv->halo_child_up[i]);
+ }
+ gf_proc_dump_write("data_self_heal", "%d", priv->data_self_heal);
+ gf_proc_dump_write("metadata_self_heal", "%d", priv->metadata_self_heal);
+ gf_proc_dump_write("entry_self_heal", "%d", priv->entry_self_heal);
+ gf_proc_dump_write("read_child", "%d", priv->read_child);
+ gf_proc_dump_write("wait_count", "%u", priv->wait_count);
+ gf_proc_dump_write("heal-wait-queue-length", "%d", priv->heal_wait_qlen);
+ gf_proc_dump_write("heal-waiters", "%d", priv->heal_waiters);
+ gf_proc_dump_write("background-self-heal-count", "%d",
+ priv->background_self_heal_count);
+ gf_proc_dump_write("healers", "%d", priv->healers);
+ gf_proc_dump_write("read-hash-mode", "%d", priv->hash_mode);
+ gf_proc_dump_write("use-anonymous-inode", "%d", priv->use_anon_inode);
+ if (priv->quorum_count == AFR_QUORUM_AUTO) {
+ gf_proc_dump_write("quorum-type", "auto");
+ } else if (priv->quorum_count == 0) {
+ gf_proc_dump_write("quorum-type", "none");
+ } else {
+ gf_proc_dump_write("quorum-type", "fixed");
+ gf_proc_dump_write("quorum-count", "%d", priv->quorum_count);
+ }
+ gf_proc_dump_write("up", "%u", afr_has_quorum(priv->child_up, this, NULL));
+ if (priv->thin_arbiter_count) {
+ gf_proc_dump_write("ta_child_up", "%d", priv->ta_child_up);
+ gf_proc_dump_write("ta_bad_child_index", "%d",
+ priv->ta_bad_child_index);
+ gf_proc_dump_write("ta_notify_dom_lock_offset", "%" PRId64,
+ priv->ta_notify_dom_lock_offset);
+ }
+
+ return 0;
+}
- if (!child_went_down (op_ret, op_errno) && (op_ret == -1)) {
- local->op_ret = -1;
- local->op_errno = op_errno;
+/**
+ * find_child_index - find the child's index in the array of subvolumes
+ * @this: AFR
+ * @child: child
+ */
- afr_lk_unlock (frame, this);
- return 0;
- }
+static int
+afr_find_child_index(xlator_t *this, xlator_t *child)
+{
+ afr_private_t *priv = NULL;
+ int child_count = -1;
+ int i = -1;
+
+ priv = this->private;
+ child_count = priv->child_count;
+ if (priv->thin_arbiter_count) {
+ child_count++;
+ }
+
+ for (i = 0; i < child_count; i++) {
+ if ((xlator_t *)child == priv->children[i])
+ break;
+ }
+
+ return i;
+}
- if (op_ret == 0) {
- local->op_ret = 0;
- local->op_errno = 0;
- local->cont.lk.locked_nodes[child_index] = 1;
- local->cont.lk.ret_flock = *lock;
- }
+int
+__afr_get_up_children_count(afr_private_t *priv)
+{
+ int up_children = 0;
+ int i = 0;
- child_index++;
+ for (i = 0; i < priv->child_count; i++)
+ if (priv->child_up[i] == 1)
+ up_children++;
- if (child_index < priv->child_count) {
- STACK_WIND_COOKIE (frame, afr_lk_cbk, (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->lk,
- local->fd, local->cont.lk.cmd,
- &local->cont.lk.user_flock, xdata);
- } else if (local->op_ret == -1) {
- /* all nodes have gone down */
+ return up_children;
+}
- AFR_STACK_UNWIND (lk, frame, -1, ENOTCONN,
- &local->cont.lk.ret_flock, NULL);
- } else {
- /* locking has succeeded on all nodes that are up */
+static int
+__get_heard_from_all_status(xlator_t *this)
+{
+ afr_private_t *priv = this->private;
+ int i;
- /* temporarily
- ret = afr_mark_locked_nodes (this, local->fd,
- local->cont.lk.locked_nodes);
- if (ret)
- gf_log (this->name, GF_LOG_DEBUG,
- "Could not save locked nodes info in fdctx");
+ for (i = 0; i < priv->child_count; i++) {
+ if (!priv->last_event[i]) {
+ return 0;
+ }
+ }
+ if (priv->thin_arbiter_count && !priv->ta_child_up) {
+ return 0;
+ }
+ return 1;
+}
- ret = afr_save_locked_fd (this, local->fd);
- if (ret)
- gf_log (this->name, GF_LOG_DEBUG,
- "Could not save locked fd");
+glusterfs_event_t
+__afr_transform_event_from_state(xlator_t *this)
+{
+ int i = 0;
+ int up_children = 0;
+ afr_private_t *priv = this->private;
+
+ if (__get_heard_from_all_status(this))
+ /* have_heard_from_all. Let afr_notify() do the propagation. */
+ return GF_EVENT_MAXVAL;
+
+ up_children = __afr_get_up_children_count(priv);
+ /* Treat the children with pending notification, as having sent a
+ * GF_EVENT_CHILD_DOWN. i.e. set the event as GF_EVENT_SOME_DESCENDENT_DOWN,
+ * as done in afr_notify() */
+ for (i = 0; i < priv->child_count; i++) {
+ if (priv->last_event[i])
+ continue;
+ priv->last_event[i] = GF_EVENT_SOME_DESCENDENT_DOWN;
+ priv->child_up[i] = 0;
+ }
+
+ if (up_children)
+ /* We received at least one child up */
+ return GF_EVENT_CHILD_UP;
+ else
+ return GF_EVENT_CHILD_DOWN;
+
+ return GF_EVENT_MAXVAL;
+}
- */
- AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,
- &local->cont.lk.ret_flock, NULL);
- }
+static void
+afr_notify_cbk(void *data)
+{
+ xlator_t *this = data;
+ afr_private_t *priv = this->private;
+ glusterfs_event_t event = GF_EVENT_MAXVAL;
+ gf_boolean_t propagate = _gf_false;
+
+ LOCK(&priv->lock);
+ {
+ if (!priv->timer) {
+ /*
+ * Either child_up/child_down is already sent to parent.
+ * This is a spurious wake up.
+ */
+ goto unlock;
+ }
+ priv->timer = NULL;
+ event = __afr_transform_event_from_state(this);
+ if (event != GF_EVENT_MAXVAL)
+ propagate = _gf_true;
+ }
+unlock:
+ UNLOCK(&priv->lock);
+ if (propagate)
+ default_notify(this, event, NULL);
+}
- return 0;
+static void
+__afr_launch_notify_timer(xlator_t *this, afr_private_t *priv)
+{
+ struct timespec delay = {
+ 0,
+ };
+
+ gf_msg_debug(this->name, 0, "Initiating child-down timer");
+ delay.tv_sec = 10;
+ delay.tv_nsec = 0;
+ priv->timer = gf_timer_call_after(this->ctx, delay, afr_notify_cbk, this);
+ if (priv->timer == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_TIMER_CREATE_FAIL,
+ "Cannot create timer for delayed initialization");
+ }
}
+static int
+find_best_down_child(xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ int i = -1;
+ int32_t best_child = -1;
+ int64_t best_latency = INT64_MAX;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!priv->child_up[i] && priv->child_latency[i] >= 0 &&
+ priv->child_latency[i] < best_latency) {
+ best_child = i;
+ best_latency = priv->child_latency[i];
+ }
+ }
+ if (best_child >= 0) {
+ gf_msg_debug(this->name, 0,
+ "Found best down child (%d) @ %" PRId64 " ms latency",
+ best_child, best_latency);
+ }
+ return best_child;
+}
int
-afr_lk (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
+find_worst_up_child(xlator_t *this)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int i = 0;
- int32_t op_errno = 0;
- int ret = -1;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ afr_private_t *priv = NULL;
+ int i = -1;
+ int32_t worst_child = -1;
+ int64_t worst_latency = INT64_MIN;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (priv->child_up[i] && priv->child_latency[i] >= 0 &&
+ priv->child_latency[i] > worst_latency) {
+ worst_child = i;
+ worst_latency = priv->child_latency[i];
+ }
+ }
+ if (worst_child >= 0) {
+ gf_msg_debug(this->name, 0,
+ "Found worst up child (%d) @ %" PRId64 " ms latency",
+ worst_child, worst_latency);
+ }
+ return worst_child;
+}
- priv = this->private;
+void
+__afr_handle_ping_event(xlator_t *this, xlator_t *child_xlator, const int idx,
+ int64_t halo_max_latency_msec, int32_t *event,
+ int64_t child_latency_msec)
+{
+ afr_private_t *priv = NULL;
+ int up_children = 0;
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
+ priv = this->private;
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ priv->child_latency[idx] = child_latency_msec;
+ gf_msg_debug(child_xlator->name, 0, "Client ping @ %" PRId64 " ms",
+ child_latency_msec);
+ if (priv->shd.iamshd)
+ return;
- local->cont.lk.locked_nodes = GF_CALLOC (priv->child_count,
- sizeof (*local->cont.lk.locked_nodes),
- gf_afr_mt_char);
+ up_children = __afr_get_up_children_count(priv);
- if (!local->cont.lk.locked_nodes) {
- op_errno = ENOMEM;
- goto out;
+ if (child_latency_msec > halo_max_latency_msec &&
+ priv->child_up[idx] == 1 && up_children > priv->halo_min_replicas) {
+ if ((up_children - 1) < priv->halo_min_replicas) {
+ gf_log(child_xlator->name, GF_LOG_INFO,
+ "Overriding halo threshold, "
+ "min replicas: %d",
+ priv->halo_min_replicas);
+ } else {
+ gf_log(child_xlator->name, GF_LOG_INFO,
+ "Child latency (%" PRId64
+ " ms) "
+ "exceeds halo threshold (%" PRId64
+ "), "
+ "marking child down.",
+ child_latency_msec, halo_max_latency_msec);
+ if (priv->halo_child_up[idx]) {
+ *event = GF_EVENT_CHILD_DOWN;
+ }
+ }
+ } else if (child_latency_msec < halo_max_latency_msec &&
+ priv->child_up[idx] == 0) {
+ if (up_children < priv->halo_max_replicas) {
+ gf_log(child_xlator->name, GF_LOG_INFO,
+ "Child latency (%" PRId64
+ " ms) "
+ "below halo threshold (%" PRId64
+ "), "
+ "marking child up.",
+ child_latency_msec, halo_max_latency_msec);
+ if (priv->halo_child_up[idx]) {
+ *event = GF_EVENT_CHILD_UP;
+ }
+ } else {
+ gf_log(child_xlator->name, GF_LOG_INFO,
+ "Not marking child %d up, "
+ "max replicas (%d) reached.",
+ idx, priv->halo_max_replicas);
}
+ }
+}
- local->fd = fd_ref (fd);
- local->cont.lk.cmd = cmd;
- local->cont.lk.user_flock = *flock;
- local->cont.lk.ret_flock = *flock;
-
- STACK_WIND_COOKIE (frame, afr_lk_cbk, (void *) (long) 0,
- priv->children[i],
- priv->children[i]->fops->lk,
- fd, cmd, flock, xdata);
+static int64_t
+afr_get_halo_latency(xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ int64_t halo_max_latency_msec = 0;
+
+ priv = this->private;
+
+ if (priv->shd.iamshd) {
+ halo_max_latency_msec = priv->shd.halo_max_latency_msec;
+ } else if (priv->nfsd.iamnfsd) {
+ halo_max_latency_msec = priv->nfsd.halo_max_latency_msec;
+ } else {
+ halo_max_latency_msec = priv->halo_max_latency_msec;
+ }
+ gf_msg_debug(this->name, 0, "Using halo latency %" PRId64,
+ halo_max_latency_msec);
+ return halo_max_latency_msec;
+}
- ret = 0;
+void
+__afr_handle_child_up_event(xlator_t *this, xlator_t *child_xlator,
+ const int idx, int64_t child_latency_msec,
+ int32_t *event, int32_t *call_psh,
+ int32_t *up_child)
+{
+ afr_private_t *priv = NULL;
+ int up_children = 0;
+ int worst_up_child = -1;
+ int64_t halo_max_latency_msec = afr_get_halo_latency(this);
+
+ priv = this->private;
+
+ /*
+ * This only really counts if the child was never up
+ * (value = -1) or had been down (value = 0). See
+ * comment at GF_EVENT_CHILD_DOWN for a more detailed
+ * explanation.
+ */
+ if (priv->child_up[idx] != 1) {
+ priv->event_generation++;
+ }
+ priv->child_up[idx] = 1;
+
+ *call_psh = 1;
+ *up_child = idx;
+ up_children = __afr_get_up_children_count(priv);
+ /*
+ * If this is an _actual_ CHILD_UP event, we
+ * want to set the child_latency to MAX to indicate
+ * the child needs ping data to be available before doing child-up
+ */
+ if (!priv->halo_enabled)
+ goto out;
+
+ if (child_latency_msec < 0) {
+ /*set to INT64_MAX-1 so that it is found for best_down_child*/
+ priv->halo_child_up[idx] = 1;
+ if (priv->child_latency[idx] < 0) {
+ priv->child_latency[idx] = AFR_HALO_MAX_LATENCY;
+ }
+ }
+
+ /*
+ * Handle the edge case where we exceed
+ * halo_min_replicas and we've got a child which is
+ * marked up as it was helping to satisfy the
+ * halo_min_replicas even though it's latency exceeds
+ * halo_max_latency_msec.
+ */
+ if (up_children > priv->halo_min_replicas) {
+ worst_up_child = find_worst_up_child(this);
+ if (worst_up_child >= 0 &&
+ priv->child_latency[worst_up_child] > halo_max_latency_msec) {
+ gf_msg_debug(this->name, 0,
+ "Marking child %d down, "
+ "doesn't meet halo threshold (%" PRId64
+ "), and > "
+ "halo_min_replicas (%d)",
+ worst_up_child, halo_max_latency_msec,
+ priv->halo_min_replicas);
+ priv->child_up[worst_up_child] = 0;
+ up_children--;
+ }
+ }
+
+ if (up_children > priv->halo_max_replicas && !priv->shd.iamshd) {
+ worst_up_child = find_worst_up_child(this);
+ if (worst_up_child < 0) {
+ worst_up_child = idx;
+ }
+ priv->child_up[worst_up_child] = 0;
+ up_children--;
+ gf_msg_debug(this->name, 0,
+ "Marking child %d down, "
+ "up_children (%d) > halo_max_replicas (%d)",
+ worst_up_child, up_children, priv->halo_max_replicas);
+ }
out:
- if (ret < 0)
- AFR_STACK_UNWIND (lk, frame, -1, op_errno, NULL, NULL);
- return 0;
+ if (up_children == 1) {
+ gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_SUBVOL_UP,
+ "Subvolume '%s' came back up; "
+ "going online.",
+ child_xlator->name);
+ gf_event(EVENT_AFR_SUBVOL_UP, "client-pid=%d; subvol=%s",
+ this->ctx->cmd_args.client_pid, this->name);
+ } else {
+ *event = GF_EVENT_SOME_DESCENDENT_UP;
+ }
+
+ priv->last_event[idx] = *event;
}
-int
-afr_forget (xlator_t *this, inode_t *inode)
+void
+__afr_handle_child_down_event(xlator_t *this, xlator_t *child_xlator, int idx,
+ int64_t child_latency_msec, int32_t *event,
+ int32_t *call_psh, int32_t *up_child)
{
- uint64_t ctx_addr = 0;
- afr_inode_ctx_t *ctx = NULL;
-
- inode_ctx_get (inode, this, &ctx_addr);
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int up_children = 0;
+ int down_children = 0;
+ int best_down_child = -1;
+
+ priv = this->private;
+
+ /*
+ * If a brick is down when we start, we'll get a
+ * CHILD_DOWN to indicate its initial state. There
+ * was never a CHILD_UP in this case, so if we
+ * increment "down_count" the difference between than
+ * and "up_count" will no longer be the number of
+ * children that are currently up. This has serious
+ * implications e.g. for quorum enforcement, so we
+ * don't increment these values unless the event
+ * represents an actual state transition between "up"
+ * (value = 1) and anything else.
+ */
+ if (priv->child_up[idx] == 1) {
+ priv->event_generation++;
+ }
+
+ /*
+ * If this is an _actual_ CHILD_DOWN event, we
+ * want to set the child_latency to < 0 to indicate
+ * the child is really disconnected.
+ */
+ if (child_latency_msec < 0) {
+ priv->child_latency[idx] = child_latency_msec;
+ priv->halo_child_up[idx] = 0;
+ }
+ priv->child_up[idx] = 0;
+
+ up_children = __afr_get_up_children_count(priv);
+ /*
+ * Handle the edge case where we need to find the
+ * next best child (to mark up) as marking this child
+ * down would cause us to fall below halo_min_replicas.
+ * We will also force the SHD to heal this child _now_
+ * as we want it to be up to date if we are going to
+ * begin using it synchronously.
+ */
+ if (priv->halo_enabled && up_children < priv->halo_min_replicas) {
+ best_down_child = find_best_down_child(this);
+ if (best_down_child >= 0) {
+ gf_msg_debug(this->name, 0,
+ "Swapping out child %d for "
+ "child %d to satisfy halo_min_replicas (%d).",
+ idx, best_down_child, priv->halo_min_replicas);
+ priv->child_up[best_down_child] = 1;
+ *call_psh = 1;
+ *up_child = best_down_child;
+ }
+ }
+ for (i = 0; i < priv->child_count; i++)
+ if (priv->child_up[i] == 0)
+ down_children++;
+ if (down_children == priv->child_count) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SUBVOLS_DOWN,
+ "All subvolumes are down. Going "
+ "offline until at least one of them "
+ "comes back up.");
+ gf_event(EVENT_AFR_SUBVOLS_DOWN, "client-pid=%d; subvol=%s",
+ this->ctx->cmd_args.client_pid, this->name);
+ } else {
+ *event = GF_EVENT_SOME_DESCENDENT_DOWN;
+ }
+ priv->last_event[idx] = *event;
+}
- if (!ctx_addr)
- goto out;
+void
+afr_ta_lock_release_synctask(xlator_t *this)
+{
+ call_frame_t *ta_frame = NULL;
+ int ret = 0;
- ctx = (afr_inode_ctx_t *)(long)ctx_addr;
- if (ctx->fresh_children)
- GF_FREE (ctx->fresh_children);
- GF_FREE (ctx);
-out:
- return 0;
+ ta_frame = afr_ta_frame_create(this);
+ if (!ta_frame) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_THIN_ARB,
+ "Failed to create ta_frame");
+ return;
+ }
+
+ ret = synctask_new(this->ctx->env, afr_release_notify_lock_for_ta,
+ afr_ta_lock_release_done, ta_frame, this);
+ if (ret) {
+ STACK_DESTROY(ta_frame->root);
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_THIN_ARB,
+ "Failed to release "
+ "AFR_TA_DOM_NOTIFY lock.");
+ }
}
-int
-afr_priv_dump (xlator_t *this)
+static void
+afr_handle_inodelk_contention(xlator_t *this, struct gf_upcall *upcall)
{
- afr_private_t *priv = NULL;
- char key_prefix[GF_DUMP_MAX_BUF_LEN];
- char key[GF_DUMP_MAX_BUF_LEN];
- int i = 0;
+ struct gf_upcall_inodelk_contention *lc = NULL;
+ unsigned int inmem_count = 0;
+ unsigned int onwire_count = 0;
+ afr_private_t *priv = this->private;
+ lc = upcall->data;
- GF_ASSERT (this);
- priv = this->private;
+ if (strcmp(lc->domain, AFR_TA_DOM_NOTIFY) != 0)
+ return;
- GF_ASSERT (priv);
- snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name);
- gf_proc_dump_add_section(key_prefix);
- gf_proc_dump_write("child_count", "%u", priv->child_count);
- gf_proc_dump_write("read_child_rr", "%u", priv->read_child_rr);
- for (i = 0; i < priv->child_count; i++) {
- sprintf (key, "child_up[%d]", i);
- gf_proc_dump_write(key, "%d", priv->child_up[i]);
- sprintf (key, "pending_key[%d]", i);
- gf_proc_dump_write(key, "%s", priv->pending_key[i]);
- }
- gf_proc_dump_write("data_self_heal", "%s", priv->data_self_heal);
- gf_proc_dump_write("metadata_self_heal", "%d", priv->metadata_self_heal);
- gf_proc_dump_write("entry_self_heal", "%d", priv->entry_self_heal);
- gf_proc_dump_write("data_change_log", "%d", priv->data_change_log);
- gf_proc_dump_write("metadata_change_log", "%d", priv->metadata_change_log);
- gf_proc_dump_write("entry-change_log", "%d", priv->entry_change_log);
- gf_proc_dump_write("read_child", "%d", priv->read_child);
- gf_proc_dump_write("favorite_child", "%d", priv->favorite_child);
- gf_proc_dump_write("wait_count", "%u", priv->wait_count);
+ if (priv->shd.iamshd) {
+ /* shd should ignore AFR_TA_DOM_NOTIFY release requests. */
+ return;
+ }
+ LOCK(&priv->lock);
+ {
+ if (priv->release_ta_notify_dom_lock == _gf_true) {
+ /* Ignore multiple release requests from shds.*/
+ UNLOCK(&priv->lock);
+ return;
+ }
+ priv->release_ta_notify_dom_lock = _gf_true;
+ inmem_count = priv->ta_in_mem_txn_count;
+ onwire_count = priv->ta_on_wire_txn_count;
+ }
+ UNLOCK(&priv->lock);
+ if (inmem_count || onwire_count)
+ /* lock release will happen in txn code path after
+ * in-memory or on-wire txns are over.*/
+ return;
- return 0;
+ afr_ta_lock_release_synctask(this);
}
-
-/**
- * find_child_index - find the child's index in the array of subvolumes
- * @this: AFR
- * @child: child
- */
-
-static int
-find_child_index (xlator_t *this, xlator_t *child)
+static void
+afr_handle_upcall_event(xlator_t *this, struct gf_upcall *upcall)
{
- afr_private_t *priv = NULL;
- int i = -1;
+ struct gf_upcall_cache_invalidation *up_ci = NULL;
+ afr_private_t *priv = this->private;
+ inode_t *inode = NULL;
+ inode_table_t *itable = NULL;
+ int i = 0;
+
+ switch (upcall->event_type) {
+ case GF_UPCALL_INODELK_CONTENTION:
+ afr_handle_inodelk_contention(this, upcall);
+ break;
+ case GF_UPCALL_CACHE_INVALIDATION:
+ up_ci = (struct gf_upcall_cache_invalidation *)upcall->data;
+
+ /* Since md-cache will be aggressively filtering
+ * lookups, the stale read issue will be more
+ * pronounced. Hence when a pending xattr is set notify
+ * all the md-cache clients to invalidate the existing
+ * stat cache and send the lookup next time */
+ if (!up_ci->dict)
+ break;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!dict_get(up_ci->dict, priv->pending_key[i]))
+ continue;
+ up_ci->flags |= UP_INVAL_ATTR;
+ itable = ((xlator_t *)this->graph->top)->itable;
+ /*Internal processes may not have itable for
+ *top xlator*/
+ if (itable)
+ inode = inode_find(itable, upcall->gfid);
+ if (inode)
+ afr_inode_need_refresh_set(inode, this);
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+}
- priv = this->private;
+int32_t
+afr_notify(xlator_t *this, int32_t event, void *data, void *data2)
+{
+ afr_private_t *priv = NULL;
+ xlator_t *child_xlator = NULL;
+ int i = -1;
+ int propagate = 0;
+ int had_heard_from_all = 0;
+ int have_heard_from_all = 0;
+ int idx = -1;
+ int ret = -1;
+ int call_psh = 0;
+ int up_child = -1;
+ dict_t *input = NULL;
+ dict_t *output = NULL;
+ gf_boolean_t had_quorum = _gf_false;
+ gf_boolean_t has_quorum = _gf_false;
+ int64_t halo_max_latency_msec = 0;
+ int64_t child_latency_msec = -1;
+
+ child_xlator = (xlator_t *)data;
+
+ priv = this->private;
+
+ if (!priv)
+ return 0;
- for (i = 0; i < priv->child_count; i++) {
- if ((xlator_t *) child == priv->children[i])
- break;
+ /*
+ * We need to reset this in case children come up in "staggered"
+ * fashion, so that we discover a late-arriving local subvolume. Note
+ * that we could end up issuing N lookups to the first subvolume, and
+ * O(N^2) overall, but N is small for AFR so it shouldn't be an issue.
+ */
+ priv->did_discovery = _gf_false;
+
+ /* parent xlators don't need to know about every child_up, child_down
+ * because of afr ha. If all subvolumes go down, child_down has
+ * to be triggered. In that state when 1 subvolume comes up child_up
+ * needs to be triggered. dht optimizes revalidate lookup by sending
+ * it only to one of its subvolumes. When child up/down happens
+ * for afr's subvolumes dht should be notified by child_modified. The
+ * subsequent revalidate lookup happens on all the dht's subvolumes
+ * which triggers afr self-heals if any.
+ */
+ idx = afr_find_child_index(this, child_xlator);
+ if (idx < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_CHILD_UP,
+ "Received child_up from invalid subvolume");
+ goto out;
+ }
+
+ had_quorum = priv->quorum_count &&
+ afr_has_quorum(priv->child_up, this, NULL);
+ if (event == GF_EVENT_CHILD_PING) {
+ child_latency_msec = (int64_t)(uintptr_t)data2;
+ if (priv->halo_enabled) {
+ halo_max_latency_msec = afr_get_halo_latency(this);
+
+ /* Calculates the child latency and sets event
+ */
+ LOCK(&priv->lock);
+ {
+ __afr_handle_ping_event(this, child_xlator, idx,
+ halo_max_latency_msec, &event,
+ child_latency_msec);
+ }
+ UNLOCK(&priv->lock);
+ } else {
+ LOCK(&priv->lock);
+ {
+ priv->child_latency[idx] = child_latency_msec;
+ }
+ UNLOCK(&priv->lock);
}
+ }
- return i;
-}
+ if (event == GF_EVENT_CHILD_PING) {
+ /* This is the only xlator that handles PING, no reason to
+ * propagate.
+ */
+ goto out;
+ }
-int32_t
-afr_notify (xlator_t *this, int32_t event,
- void *data, void *data2)
-{
- afr_private_t *priv = NULL;
- int i = -1;
- int up_children = 0;
- int down_children = 0;
- int propagate = 0;
- int had_heard_from_all = 0;
- int have_heard_from_all = 0;
- int idx = -1;
- int ret = -1;
- int call_psh = 0;
- int up_child = AFR_ALL_CHILDREN;
- dict_t *input = NULL;
- dict_t *output = NULL;
-
- priv = this->private;
-
- if (!priv)
- return 0;
-
- had_heard_from_all = 1;
- for (i = 0; i < priv->child_count; i++) {
- if (!priv->last_event[i]) {
- had_heard_from_all = 0;
- }
+ if (event == GF_EVENT_TRANSLATOR_OP) {
+ LOCK(&priv->lock);
+ {
+ had_heard_from_all = __get_heard_from_all_status(this);
}
+ UNLOCK(&priv->lock);
- /* parent xlators dont need to know about every child_up, child_down
- * because of afr ha. If all subvolumes go down, child_down has
- * to be triggered. In that state when 1 subvolume comes up child_up
- * needs to be triggered. dht optimizes revalidate lookup by sending
- * it only to one of its subvolumes. When child up/down happens
- * for afr's subvolumes dht should be notified by child_modified. The
- * subsequent revalidate lookup happens on all the dht's subvolumes
- * which triggers afr self-heals if any.
- */
- idx = find_child_index (this, data);
- if (idx < 0) {
- gf_log (this->name, GF_LOG_ERROR, "Received child_up "
- "from invalid subvolume");
- goto out;
+ if (!had_heard_from_all) {
+ ret = -1;
+ } else {
+ input = data;
+ output = data2;
+ ret = afr_xl_op(this, input, output);
}
+ goto out;
+ }
+
+ if (event == GF_EVENT_UPCALL) {
+ afr_handle_upcall_event(this, data);
+ }
+ LOCK(&priv->lock);
+ {
+ had_heard_from_all = __get_heard_from_all_status(this);
switch (event) {
- case GF_EVENT_CHILD_UP:
- LOCK (&priv->lock);
- {
- /*
- * This only really counts if the child was never up
- * (value = -1) or had been down (value = 0). See
- * comment at GF_EVENT_CHILD_DOWN for a more detailed
- * explanation.
- */
- if (priv->child_up[idx] != 1) {
- priv->up_count++;
- }
- priv->child_up[idx] = 1;
-
- call_psh = 1;
- up_child = idx;
- for (i = 0; i < priv->child_count; i++)
- if (priv->child_up[i] == 1)
- up_children++;
- if (up_children == 1) {
- gf_log (this->name, GF_LOG_INFO,
- "Subvolume '%s' came back up; "
- "going online.", ((xlator_t *)data)->name);
- } else {
- event = GF_EVENT_CHILD_MODIFIED;
- }
-
- priv->last_event[idx] = event;
+ case GF_EVENT_PARENT_UP:
+ __afr_launch_notify_timer(this, priv);
+ propagate = 1;
+ break;
+ case GF_EVENT_CHILD_UP:
+ if (priv->thin_arbiter_count &&
+ (idx == AFR_CHILD_THIN_ARBITER)) {
+ priv->ta_child_up = 1;
+ priv->ta_event_gen++;
+ break;
}
- UNLOCK (&priv->lock);
-
+ __afr_handle_child_up_event(this, child_xlator, idx,
+ child_latency_msec, &event,
+ &call_psh, &up_child);
+ __afr_lock_heal_synctask(this, priv, idx);
break;
- case GF_EVENT_CHILD_DOWN:
- LOCK (&priv->lock);
- {
- /*
- * If a brick is down when we start, we'll get a
- * CHILD_DOWN to indicate its initial state. There
- * was never a CHILD_UP in this case, so if we
- * increment "down_count" the difference between than
- * and "up_count" will no longer be the number of
- * children that are currently up. This has serious
- * implications e.g. for quorum enforcement, so we
- * don't increment these values unless the event
- * represents an actual state transition between "up"
- * (value = 1) and anything else.
- */
- if (priv->child_up[idx] == 1) {
- priv->down_count++;
- }
- priv->child_up[idx] = 0;
-
- for (i = 0; i < priv->child_count; i++)
- if (priv->child_up[i] == 0)
- down_children++;
- if (down_children == priv->child_count) {
- gf_log (this->name, GF_LOG_ERROR,
- "All subvolumes are down. Going offline "
- "until atleast one of them comes back up.");
- } else {
- event = GF_EVENT_CHILD_MODIFIED;
- }
-
- priv->last_event[idx] = event;
+ case GF_EVENT_CHILD_DOWN:
+ if (priv->thin_arbiter_count &&
+ (idx == AFR_CHILD_THIN_ARBITER)) {
+ priv->ta_child_up = 0;
+ priv->ta_event_gen++;
+ afr_ta_locked_priv_invalidate(priv);
+ break;
}
- UNLOCK (&priv->lock);
-
+ __afr_handle_child_down_event(this, child_xlator, idx,
+ child_latency_msec, &event,
+ &call_psh, &up_child);
+ __afr_mark_pending_lk_heal(this, priv, idx);
break;
- case GF_EVENT_CHILD_CONNECTING:
- LOCK (&priv->lock);
- {
- priv->last_event[idx] = event;
- }
- UNLOCK (&priv->lock);
+ case GF_EVENT_CHILD_CONNECTING:
+ priv->last_event[idx] = event;
break;
- case GF_EVENT_TRANSLATOR_OP:
- input = data;
- output = data2;
- ret = afr_xl_op (this, input, output);
- goto out;
+ case GF_EVENT_SOME_DESCENDENT_DOWN:
+ priv->last_event[idx] = event;
break;
-
- default:
+ default:
propagate = 1;
break;
}
+ have_heard_from_all = __get_heard_from_all_status(this);
+ if (!had_heard_from_all && have_heard_from_all) {
+ if (priv->timer) {
+ gf_timer_call_cancel(this->ctx, priv->timer);
+ priv->timer = NULL;
+ }
+ /* This is the first event which completes aggregation
+ of events from all subvolumes. If at least one subvol
+ had come up, propagate CHILD_UP, but only this time
+ */
+ event = GF_EVENT_CHILD_DOWN;
+ for (i = 0; i < priv->child_count; i++) {
+ if (priv->last_event[i] == GF_EVENT_CHILD_UP) {
+ event = GF_EVENT_CHILD_UP;
+ break;
+ }
- /* have all subvolumes reported status once by now? */
- have_heard_from_all = 1;
- for (i = 0; i < priv->child_count; i++) {
- if (!priv->last_event[i])
- have_heard_from_all = 0;
+ if (priv->last_event[i] == GF_EVENT_CHILD_CONNECTING) {
+ event = GF_EVENT_CHILD_CONNECTING;
+ /* continue to check other events for CHILD_UP */
+ }
+ }
+ }
+ }
+ UNLOCK(&priv->lock);
+
+ if (priv->quorum_count) {
+ has_quorum = afr_has_quorum(priv->child_up, this, NULL);
+ if (!had_quorum && has_quorum) {
+ gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_QUORUM_MET,
+ "Client-quorum is met");
+ gf_event(EVENT_AFR_QUORUM_MET, "client-pid=%d; subvol=%s",
+ this->ctx->cmd_args.client_pid, this->name);
+ }
+ if (had_quorum && !has_quorum) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_QUORUM_FAIL,
+ "Client-quorum is not met");
+ gf_event(EVENT_AFR_QUORUM_FAIL, "client-pid=%d; subvol=%s",
+ this->ctx->cmd_args.client_pid, this->name);
+ }
+ }
+
+ /* if all subvols have reported status, no need to hide anything
+ or wait for anything else. Just propagate blindly */
+ if (have_heard_from_all)
+ propagate = 1;
+
+ ret = 0;
+ if (propagate)
+ ret = default_notify(this, event, data);
+
+ if ((!had_heard_from_all) || call_psh) {
+ /* Launch self-heal on all local subvolumes if:
+ * a) We have_heard_from_all for the first time
+ * b) Already heard from everyone, but we now got a child-up
+ * event.
+ */
+ if (have_heard_from_all) {
+ afr_selfheal_childup(this, priv);
}
+ }
+out:
+ return ret;
+}
- /* if all subvols have reported status, no need to hide anything
- or wait for anything else. Just propagate blindly */
- if (have_heard_from_all)
- propagate = 1;
+int
+afr_local_init(afr_local_t *local, afr_private_t *priv, int32_t *op_errno)
+{
+ int __ret = -1;
+ local->op_ret = -1;
+ local->op_errno = EUCLEAN;
+
+ __ret = syncbarrier_init(&local->barrier);
+ if (__ret) {
+ if (op_errno)
+ *op_errno = __ret;
+ goto out;
+ }
+
+ local->child_up = GF_MALLOC(priv->child_count * sizeof(*local->child_up),
+ gf_afr_mt_char);
+ if (!local->child_up) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ memcpy(local->child_up, priv->child_up,
+ sizeof(*local->child_up) * priv->child_count);
+ local->call_count = AFR_COUNT(local->child_up, priv->child_count);
+ if (local->call_count == 0) {
+ gf_msg(THIS->name, GF_LOG_INFO, 0, AFR_MSG_SUBVOLS_DOWN,
+ "no subvolumes up");
+ if (op_errno)
+ *op_errno = ENOTCONN;
+ goto out;
+ }
+
+ local->event_generation = priv->event_generation;
+
+ local->read_attempted = GF_CALLOC(priv->child_count, sizeof(char),
+ gf_afr_mt_char);
+ if (!local->read_attempted) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ local->readable = GF_CALLOC(priv->child_count, sizeof(char),
+ gf_afr_mt_char);
+ if (!local->readable) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ local->readable2 = GF_CALLOC(priv->child_count, sizeof(char),
+ gf_afr_mt_char);
+ if (!local->readable2) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ local->read_subvol = -1;
+
+ local->replies = GF_CALLOC(priv->child_count, sizeof(*local->replies),
+ gf_afr_mt_reply_t);
+ if (!local->replies) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ local->need_full_crawl = _gf_false;
+ if (priv->thin_arbiter_count) {
+ local->ta_child_up = priv->ta_child_up;
+ local->ta_failed_subvol = AFR_CHILD_UNKNOWN;
+ local->read_txn_query_child = AFR_CHILD_UNKNOWN;
+ local->ta_event_gen = priv->ta_event_gen;
+ local->fop_state = TA_SUCCESS;
+ }
+ local->is_new_entry = _gf_false;
+
+ INIT_LIST_HEAD(&local->healer);
+ return 0;
+out:
+ return -1;
+}
- if (!had_heard_from_all && have_heard_from_all) {
- /* This is the first event which completes aggregation
- of events from all subvolumes. If at least one subvol
- had come up, propagate CHILD_UP, but only this time
- */
- event = GF_EVENT_CHILD_DOWN;
-
- LOCK (&priv->lock);
- {
- up_children = afr_up_children_count (priv->child_up,
- priv->child_count);
- for (i = 0; i < priv->child_count; i++) {
- if (priv->last_event[i] == GF_EVENT_CHILD_UP) {
- event = GF_EVENT_CHILD_UP;
- break;
- }
-
- if (priv->last_event[i] ==
- GF_EVENT_CHILD_CONNECTING) {
- event = GF_EVENT_CHILD_CONNECTING;
- /* continue to check other events for CHILD_UP */
- }
- }
- }
- UNLOCK (&priv->lock);
- }
+int
+afr_internal_lock_init(afr_internal_lock_t *lk, size_t child_count)
+{
+ int ret = -ENOMEM;
- ret = 0;
- if (propagate)
- ret = default_notify (this, event, data);
- if (call_psh && priv->shd.iamshd)
- afr_proactive_self_heal ((void*) (long) up_child);
+ lk->lower_locked_nodes = GF_CALLOC(sizeof(*lk->lower_locked_nodes),
+ child_count, gf_afr_mt_char);
+ if (NULL == lk->lower_locked_nodes)
+ goto out;
+
+ lk->lock_op_ret = -1;
+ lk->lock_op_errno = EUCLEAN;
+ ret = 0;
out:
- return ret;
+ return ret;
}
-int
-afr_first_up_child (unsigned char *child_up, size_t child_count)
+void
+afr_matrix_cleanup(int32_t **matrix, unsigned int m)
{
- int ret = -1;
- int i = 0;
+ int i = 0;
- GF_ASSERT (child_up);
+ if (!matrix)
+ goto out;
+ for (i = 0; i < m; i++) {
+ GF_FREE(matrix[i]);
+ }
- for (i = 0; i < child_count; i++) {
- if (child_up[i]) {
- ret = i;
- break;
- }
- }
+ GF_FREE(matrix);
+out:
+ return;
+}
- return ret;
+int32_t **
+afr_matrix_create(unsigned int m, unsigned int n)
+{
+ int32_t **matrix = NULL;
+ int i = 0;
+
+ matrix = GF_CALLOC(sizeof(*matrix), m, gf_afr_mt_int32_t);
+ if (!matrix)
+ goto out;
+
+ for (i = 0; i < m; i++) {
+ matrix[i] = GF_CALLOC(sizeof(*matrix[i]), n, gf_afr_mt_int32_t);
+ if (!matrix[i])
+ goto out;
+ }
+ return matrix;
+out:
+ afr_matrix_cleanup(matrix, m);
+ return NULL;
}
int
-afr_local_init (afr_local_t *local, afr_private_t *priv, int32_t *op_errno)
+afr_transaction_local_init(afr_local_t *local, xlator_t *this)
{
- int ret = -1;
+ int ret = -ENOMEM;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ INIT_LIST_HEAD(&local->transaction.wait_list);
+ INIT_LIST_HEAD(&local->transaction.owner_list);
+ INIT_LIST_HEAD(&local->ta_waitq);
+ INIT_LIST_HEAD(&local->ta_onwireq);
+ ret = afr_internal_lock_init(&local->internal_lock, priv->child_count);
+ if (ret < 0)
+ goto out;
+
+ ret = -ENOMEM;
+ local->pre_op_compat = priv->pre_op_compat;
+
+ local->transaction.pre_op = GF_CALLOC(sizeof(*local->transaction.pre_op),
+ priv->child_count, gf_afr_mt_char);
+ if (!local->transaction.pre_op)
+ goto out;
+
+ local->transaction.changelog_xdata = GF_CALLOC(
+ sizeof(*local->transaction.changelog_xdata), priv->child_count,
+ gf_afr_mt_dict_t);
+ if (!local->transaction.changelog_xdata)
+ goto out;
+
+ if (priv->arbiter_count == 1) {
+ local->transaction.pre_op_sources = GF_CALLOC(
+ sizeof(*local->transaction.pre_op_sources), priv->child_count,
+ gf_afr_mt_char);
+ if (!local->transaction.pre_op_sources)
+ goto out;
+ }
+
+ local->transaction.failed_subvols = GF_CALLOC(
+ sizeof(*local->transaction.failed_subvols), priv->child_count,
+ gf_afr_mt_char);
+ if (!local->transaction.failed_subvols)
+ goto out;
+
+ local->pending = afr_matrix_create(priv->child_count, AFR_NUM_CHANGE_LOGS);
+ if (!local->pending)
+ goto out;
+
+ ret = 0;
+out:
+ return ret;
+}
- local->op_ret = -1;
- local->op_errno = EUCLEAN;
-
- local->child_up = GF_CALLOC (priv->child_count,
- sizeof (*local->child_up),
- gf_afr_mt_char);
- if (!local->child_up) {
- if (op_errno)
- *op_errno = ENOMEM;
- goto out;
- }
+void
+afr_set_low_priority(call_frame_t *frame)
+{
+ frame->root->pid = LOW_PRIO_PROC_PID;
+}
- memcpy (local->child_up, priv->child_up,
- sizeof (*local->child_up) * priv->child_count);
- local->call_count = afr_up_children_count (local->child_up,
- priv->child_count);
- if (local->call_count == 0) {
- gf_log (THIS->name, GF_LOG_INFO, "no subvolumes up");
- if (op_errno)
- *op_errno = ENOTCONN;
- goto out;
- }
+void
+afr_priv_destroy(afr_private_t *priv)
+{
+ int i = 0;
+ int child_count = -1;
- local->child_errno = GF_CALLOC (priv->child_count,
- sizeof (*local->child_errno),
- gf_afr_mt_int32_t);
- if (!local->child_errno) {
- if (op_errno)
- *op_errno = ENOMEM;
- goto out;
- }
+ if (!priv)
+ goto out;
- ret = 0;
+ GF_FREE(priv->sh_domain);
+ GF_FREE(priv->last_event);
+
+ child_count = priv->child_count;
+ if (priv->thin_arbiter_count) {
+ child_count++;
+ }
+ if (priv->pending_key) {
+ for (i = 0; i < child_count; i++)
+ GF_FREE(priv->pending_key[i]);
+ }
+
+ GF_FREE(priv->pending_reads);
+ GF_FREE(priv->local);
+ GF_FREE(priv->pending_key);
+ GF_FREE(priv->children);
+ GF_FREE(priv->anon_inode);
+ GF_FREE(priv->child_up);
+ GF_FREE(priv->halo_child_up);
+ GF_FREE(priv->child_latency);
+ LOCK_DESTROY(&priv->lock);
+
+ GF_FREE(priv);
out:
- return ret;
+ return;
}
-int
-afr_internal_lock_init (afr_internal_lock_t *lk, size_t child_count,
- transaction_lk_type_t lk_type)
+int **
+afr_mark_pending_changelog(afr_private_t *priv, unsigned char *pending,
+ dict_t *xattr, ia_type_t iat)
{
- int ret = -ENOMEM;
+ int i = 0;
+ int **changelog = NULL;
+ int idx = -1;
+ int m_idx = 0;
+ int d_idx = 0;
+ int ret = 0;
+
+ m_idx = afr_index_for_transaction_type(AFR_METADATA_TRANSACTION);
+ d_idx = afr_index_for_transaction_type(AFR_DATA_TRANSACTION);
+
+ idx = afr_index_from_ia_type(iat);
+
+ changelog = afr_matrix_create(priv->child_count, AFR_NUM_CHANGE_LOGS);
+ if (!changelog)
+ goto out;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!pending[i])
+ continue;
+
+ changelog[i][m_idx] = hton32(1);
+ if (idx != -1)
+ changelog[i][idx] = hton32(1);
+ /* If the newentry marking is on a newly created directory,
+ * then mark it with the full-heal indicator.
+ */
+ if ((IA_ISDIR(iat)) && (priv->esh_granular))
+ changelog[i][d_idx] = hton32(1);
+ }
+ ret = afr_set_pending_dict(priv, xattr, changelog);
+ if (ret < 0) {
+ afr_matrix_cleanup(changelog, priv->child_count);
+ return NULL;
+ }
+out:
+ return changelog;
+}
- lk->inode_locked_nodes = GF_CALLOC (sizeof (*lk->inode_locked_nodes),
- child_count, gf_afr_mt_char);
- if (NULL == lk->inode_locked_nodes)
- goto out;
+static dict_t *
+afr_set_heal_info(char *status)
+{
+ dict_t *dict = NULL;
+ int ret = -1;
- lk->entry_locked_nodes = GF_CALLOC (sizeof (*lk->entry_locked_nodes),
- child_count, gf_afr_mt_char);
- if (NULL == lk->entry_locked_nodes)
- goto out;
+ dict = dict_new();
+ if (!dict) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = dict_set_dynstr_sizen(dict, "heal-info", status);
+ if (ret)
+ gf_msg("", GF_LOG_WARNING, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Failed to set heal-info key to "
+ "%s",
+ status);
+out:
+ /* Any error other than EINVAL, dict_set_dynstr frees status */
+ if (ret == -ENOMEM || ret == -EINVAL) {
+ GF_FREE(status);
+ }
+
+ if (ret && dict) {
+ dict_unref(dict);
+ dict = NULL;
+ }
+ return dict;
+}
- lk->locked_nodes = GF_CALLOC (sizeof (*lk->locked_nodes),
- child_count, gf_afr_mt_char);
- if (NULL == lk->locked_nodes)
- goto out;
+static gf_boolean_t
+afr_is_dirty_count_non_unary_for_txn(xlator_t *this, struct afr_reply *replies,
+ afr_transaction_type type)
+{
+ afr_private_t *priv = this->private;
+ int *dirty = alloca0(priv->child_count * sizeof(int));
+ int i = 0;
- lk->lower_locked_nodes = GF_CALLOC (sizeof (*lk->lower_locked_nodes),
- child_count, gf_afr_mt_char);
- if (NULL == lk->lower_locked_nodes)
- goto out;
+ afr_selfheal_extract_xattr(this, replies, type, dirty, NULL);
+ for (i = 0; i < priv->child_count; i++) {
+ if (dirty[i] > 1)
+ return _gf_true;
+ }
- lk->lock_op_ret = -1;
- lk->lock_op_errno = EUCLEAN;
- lk->transaction_lk_type = lk_type;
+ return _gf_false;
+}
- ret = 0;
-out:
- return ret;
+static gf_boolean_t
+afr_is_dirty_count_non_unary(xlator_t *this, struct afr_reply *replies,
+ ia_type_t ia_type)
+{
+ gf_boolean_t data_chk = _gf_false;
+ gf_boolean_t mdata_chk = _gf_false;
+ gf_boolean_t entry_chk = _gf_false;
+
+ switch (ia_type) {
+ case IA_IFDIR:
+ mdata_chk = _gf_true;
+ entry_chk = _gf_true;
+ break;
+ case IA_IFREG:
+ mdata_chk = _gf_true;
+ data_chk = _gf_true;
+ break;
+ default:
+ /*IA_IFBLK, IA_IFCHR, IA_IFLNK, IA_IFIFO, IA_IFSOCK*/
+ mdata_chk = _gf_true;
+ break;
+ }
+
+ if (data_chk && afr_is_dirty_count_non_unary_for_txn(
+ this, replies, AFR_DATA_TRANSACTION)) {
+ return _gf_true;
+ } else if (mdata_chk && afr_is_dirty_count_non_unary_for_txn(
+ this, replies, AFR_METADATA_TRANSACTION)) {
+ return _gf_true;
+ } else if (entry_chk && afr_is_dirty_count_non_unary_for_txn(
+ this, replies, AFR_ENTRY_TRANSACTION)) {
+ return _gf_true;
+ }
+
+ return _gf_false;
}
-void
-afr_matrix_cleanup (int32_t **matrix, unsigned int m)
+static int
+afr_update_heal_status(xlator_t *this, struct afr_reply *replies,
+ ia_type_t ia_type, gf_boolean_t *esh, gf_boolean_t *dsh,
+ gf_boolean_t *msh, unsigned char pending)
{
- int i = 0;
+ int ret = -1;
+ GF_UNUSED int ret1 = 0;
+ int i = 0;
+ int io_domain_lk_count = 0;
+ int shd_domain_lk_count = 0;
+ afr_private_t *priv = NULL;
+ char *key1 = NULL;
+ char *key2 = NULL;
+
+ priv = this->private;
+ key1 = alloca0(strlen(GLUSTERFS_INODELK_DOM_PREFIX) + 2 +
+ strlen(this->name));
+ key2 = alloca0(strlen(GLUSTERFS_INODELK_DOM_PREFIX) + 2 +
+ strlen(priv->sh_domain));
+ sprintf(key1, "%s:%s", GLUSTERFS_INODELK_DOM_PREFIX, this->name);
+ sprintf(key2, "%s:%s", GLUSTERFS_INODELK_DOM_PREFIX, priv->sh_domain);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if ((replies[i].valid != 1) || (replies[i].op_ret != 0))
+ continue;
+ if (!io_domain_lk_count) {
+ ret1 = dict_get_int32(replies[i].xdata, key1, &io_domain_lk_count);
+ }
+ if (!shd_domain_lk_count) {
+ ret1 = dict_get_int32(replies[i].xdata, key2, &shd_domain_lk_count);
+ }
+ }
+
+ if (!pending) {
+ if ((afr_is_dirty_count_non_unary(this, replies, ia_type)) ||
+ (!io_domain_lk_count)) {
+ /* Needs heal. */
+ ret = 0;
+ } else {
+ /* No heal needed. */
+ *dsh = *esh = *msh = 0;
+ }
+ } else {
+ if (shd_domain_lk_count) {
+ ret = -EAGAIN; /*For 'possibly-healing'. */
+ } else {
+ ret = 0; /*needs heal. Just set a non -ve value so that it is
+ assumed as the source index.*/
+ }
+ }
+ return ret;
+}
+
+/*return EIO, EAGAIN or pending*/
+int
+afr_lockless_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid,
+ inode_t **inode, gf_boolean_t *entry_selfheal,
+ gf_boolean_t *data_selfheal,
+ gf_boolean_t *metadata_selfheal, unsigned char *pending)
+{
+ int ret = -1;
+ int i = 0;
+ afr_private_t *priv = NULL;
+ struct afr_reply *replies = NULL;
+ gf_boolean_t dsh = _gf_false;
+ gf_boolean_t msh = _gf_false;
+ gf_boolean_t esh = _gf_false;
+ unsigned char *sources = NULL;
+ unsigned char *sinks = NULL;
+ unsigned char *valid_on = NULL;
+ uint64_t *witness = NULL;
+
+ priv = this->private;
+ replies = alloca0(sizeof(*replies) * priv->child_count);
+ sources = alloca0(sizeof(*sources) * priv->child_count);
+ sinks = alloca0(sizeof(*sinks) * priv->child_count);
+ witness = alloca0(sizeof(*witness) * priv->child_count);
+ valid_on = alloca0(sizeof(*valid_on) * priv->child_count);
+
+ ret = afr_selfheal_unlocked_inspect(frame, this, gfid, inode, &dsh, &msh,
+ &esh, replies);
+ if (ret)
+ goto out;
+ for (i = 0; i < priv->child_count; i++) {
+ if (replies[i].valid && replies[i].op_ret == 0) {
+ valid_on[i] = 1;
+ }
+ }
+ if (msh) {
+ ret = afr_selfheal_find_direction(frame, this, replies,
+ AFR_METADATA_TRANSACTION, valid_on,
+ sources, sinks, witness, pending);
+ if (*pending & PFLAG_SBRAIN)
+ ret = -EIO;
+ if (ret)
+ goto out;
+ }
+ if (dsh) {
+ ret = afr_selfheal_find_direction(frame, this, replies,
+ AFR_DATA_TRANSACTION, valid_on,
+ sources, sinks, witness, pending);
+ if (*pending & PFLAG_SBRAIN)
+ ret = -EIO;
+ if (ret)
+ goto out;
+ }
+ if (esh) {
+ ret = afr_selfheal_find_direction(frame, this, replies,
+ AFR_ENTRY_TRANSACTION, valid_on,
+ sources, sinks, witness, pending);
+ if (*pending & PFLAG_SBRAIN)
+ ret = -EIO;
+ if (ret)
+ goto out;
+ }
+
+ ret = afr_update_heal_status(this, replies, (*inode)->ia_type, &esh, &dsh,
+ &msh, *pending);
+out:
+ *data_selfheal = dsh;
+ *entry_selfheal = esh;
+ *metadata_selfheal = msh;
+ if (replies)
+ afr_replies_wipe(replies, priv->child_count);
+ return ret;
+}
- if (!matrix)
+int
+afr_get_heal_info(call_frame_t *frame, xlator_t *this, loc_t *loc)
+{
+ gf_boolean_t data_selfheal = _gf_false;
+ gf_boolean_t metadata_selfheal = _gf_false;
+ gf_boolean_t entry_selfheal = _gf_false;
+ unsigned char pending = 0;
+ dict_t *dict = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
+ inode_t *inode = NULL;
+ char *substr = NULL;
+ char *status = NULL;
+ call_frame_t *heal_frame = NULL;
+ afr_local_t *heal_local = NULL;
+
+ /*Use frame with lk-owner set*/
+ heal_frame = afr_frame_create(frame->this, &op_errno);
+ if (!heal_frame) {
+ ret = -1;
+ goto out;
+ }
+ heal_local = heal_frame->local;
+ heal_frame->local = frame->local;
+
+ ret = afr_lockless_inspect(heal_frame, this, loc->gfid, &inode,
+ &entry_selfheal, &data_selfheal,
+ &metadata_selfheal, &pending);
+
+ if (ret == -ENOMEM) {
+ ret = -1;
+ goto out;
+ }
+
+ if (pending & PFLAG_PENDING) {
+ gf_asprintf(&substr, "-pending");
+ if (!substr)
+ goto out;
+ }
+
+ if (ret == -EIO) {
+ ret = gf_asprintf(&status, "split-brain%s", substr ? substr : "");
+ if (ret < 0) {
+ goto out;
+ }
+ dict = afr_set_heal_info(status);
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+ } else if (ret == -EAGAIN) {
+ ret = gf_asprintf(&status, "possibly-healing%s", substr ? substr : "");
+ if (ret < 0) {
+ goto out;
+ }
+ dict = afr_set_heal_info(status);
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+ } else if (ret >= 0) {
+ /* value of ret = source index
+ * so ret >= 0 and at least one of the 3 booleans set to
+ * true means a source is identified; heal is required.
+ */
+ if (!data_selfheal && !entry_selfheal && !metadata_selfheal) {
+ status = gf_strdup("no-heal");
+ if (!status) {
+ ret = -1;
goto out;
- for (i = 0; i < m; i++) {
- GF_FREE (matrix[i]);
+ }
+ dict = afr_set_heal_info(status);
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+ } else {
+ ret = gf_asprintf(&status, "heal%s", substr ? substr : "");
+ if (ret < 0) {
+ goto out;
+ }
+ dict = afr_set_heal_info(status);
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+ }
+ } else if (ret < 0) {
+ /* Apart from above checked -ve ret values, there are
+ * other possible ret values like ENOTCONN
+ * (returned when number of valid replies received are
+ * less than 2)
+ * in which case heal is required when one of the
+ * selfheal booleans is set.
+ */
+ if (data_selfheal || entry_selfheal || metadata_selfheal) {
+ ret = gf_asprintf(&status, "heal%s", substr ? substr : "");
+ if (ret < 0) {
+ goto out;
+ }
+ dict = afr_set_heal_info(status);
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
}
+ }
+
+ ret = 0;
+ op_errno = 0;
- GF_FREE (matrix);
out:
- return;
+ if (heal_frame) {
+ heal_frame->local = heal_local;
+ AFR_STACK_DESTROY(heal_frame);
+ }
+ AFR_STACK_UNWIND(getxattr, frame, ret, op_errno, dict, NULL);
+ if (dict)
+ dict_unref(dict);
+ if (inode)
+ inode_unref(inode);
+ GF_FREE(substr);
+ return ret;
}
-int32_t**
-afr_matrix_create (unsigned int m, unsigned int n)
+int
+_afr_is_split_brain(call_frame_t *frame, xlator_t *this,
+ struct afr_reply *replies, afr_transaction_type type,
+ gf_boolean_t *spb)
{
- int32_t **matrix = NULL;
- int i = 0;
+ afr_private_t *priv = NULL;
+ uint64_t *witness = NULL;
+ unsigned char *sources = NULL;
+ unsigned char *sinks = NULL;
+ int sources_count = 0;
+ int ret = 0;
+
+ priv = this->private;
+
+ sources = alloca0(priv->child_count);
+ sinks = alloca0(priv->child_count);
+ witness = alloca0(priv->child_count * sizeof(*witness));
+
+ ret = afr_selfheal_find_direction(frame, this, replies, type,
+ priv->child_up, sources, sinks, witness,
+ NULL);
+ if (ret)
+ return ret;
- matrix = GF_CALLOC (sizeof (*matrix), m, gf_afr_mt_int32_t);
- if (!matrix)
- goto out;
+ sources_count = AFR_COUNT(sources, priv->child_count);
+ if (!sources_count)
+ *spb = _gf_true;
- for (i = 0; i < m; i++) {
- matrix[i] = GF_CALLOC (sizeof (*matrix[i]), n,
- gf_afr_mt_int32_t);
- if (!matrix[i])
- goto out;
- }
- return matrix;
-out:
- afr_matrix_cleanup (matrix, m);
- return NULL;
+ return ret;
}
int
-afr_transaction_local_init (afr_local_t *local, xlator_t *this)
+afr_is_split_brain(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ uuid_t gfid, gf_boolean_t *d_spb, gf_boolean_t *m_spb)
{
- int child_up_count = 0;
- int ret = -ENOMEM;
- afr_private_t *priv = NULL;
+ int ret = -1;
+ afr_private_t *priv = NULL;
+ struct afr_reply *replies = NULL;
- priv = this->private;
- ret = afr_internal_lock_init (&local->internal_lock, priv->child_count,
- AFR_TRANSACTION_LK);
- if (ret < 0)
- goto out;
+ priv = this->private;
- ret = -ENOMEM;
- child_up_count = afr_up_children_count (local->child_up,
- priv->child_count);
- if (priv->optimistic_change_log && child_up_count == priv->child_count)
- local->optimistic_change_log = 1;
+ replies = alloca0(sizeof(*replies) * priv->child_count);
- local->first_up_child = afr_first_up_child (local->child_up,
- priv->child_count);
+ ret = afr_selfheal_unlocked_discover(frame, inode, gfid, replies);
+ if (ret)
+ goto out;
- local->transaction.eager_lock =
- GF_CALLOC (sizeof (*local->transaction.eager_lock),
- priv->child_count,
- gf_afr_mt_int32_t);
+ if (!afr_can_decide_split_brain_source_sinks(replies, priv->child_count)) {
+ ret = -EAGAIN;
+ goto out;
+ }
- if (!local->transaction.eager_lock)
- goto out;
+ ret = _afr_is_split_brain(frame, this, replies, AFR_DATA_TRANSACTION,
+ d_spb);
+ if (ret)
+ goto out;
- local->fresh_children = afr_children_create (priv->child_count);
- if (!local->fresh_children)
- goto out;
+ ret = _afr_is_split_brain(frame, this, replies, AFR_METADATA_TRANSACTION,
+ m_spb);
+out:
+ if (replies) {
+ afr_replies_wipe(replies, priv->child_count);
+ replies = NULL;
+ }
+ return ret;
+}
+
+int
+afr_get_split_brain_status_cbk(int ret, call_frame_t *frame, void *opaque)
+{
+ GF_FREE(opaque);
+ return 0;
+}
- if (local->fd) {
- local->fd_open_on = GF_CALLOC (sizeof (*local->fd_open_on),
- priv->child_count,
- gf_afr_mt_char);
- if (!local->fd_open_on)
- goto out;
+int
+afr_get_split_brain_status(void *opaque)
+{
+ gf_boolean_t d_spb = _gf_false;
+ gf_boolean_t m_spb = _gf_false;
+ int ret = -1;
+ int op_errno = 0;
+ int i = 0;
+ char *choices = NULL;
+ char *status = NULL;
+ dict_t *dict = NULL;
+ inode_t *inode = NULL;
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+ loc_t *loc = NULL;
+ afr_spb_status_t *data = NULL;
+
+ data = opaque;
+ frame = data->frame;
+ this = frame->this;
+ loc = data->loc;
+ priv = this->private;
+ children = priv->children;
+
+ inode = afr_inode_find(this, loc->gfid);
+ if (!inode)
+ goto out;
+
+ dict = dict_new();
+ if (!dict) {
+ op_errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ /* Calculation for string length :
+ * (child_count X length of child-name) + SLEN(" Choices :")
+ * child-name consists of :
+ * a) 251 = max characters for volname according to GD_VOLUME_NAME_MAX
+ * b) strlen("-client-00,") assuming 16 replicas
+ */
+ choices = alloca0(priv->child_count * (256 + SLEN("-client-00,")) +
+ SLEN(" Choices:"));
+
+ ret = afr_is_split_brain(frame, this, inode, loc->gfid, &d_spb, &m_spb);
+ if (ret) {
+ op_errno = -ret;
+ if (ret == -EAGAIN) {
+ ret = dict_set_sizen_str_sizen(dict, GF_AFR_SBRAIN_STATUS,
+ SBRAIN_HEAL_NO_GO_MSG);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret,
+ AFR_MSG_DICT_SET_FAILED,
+ "Failed to set GF_AFR_SBRAIN_STATUS in dict");
+ }
+ }
+ ret = -1;
+ goto out;
+ }
+
+ if (d_spb || m_spb) {
+ sprintf(choices, " Choices:");
+ for (i = 0; i < priv->child_count; i++) {
+ strcat(choices, children[i]->name);
+ strcat(choices, ",");
}
+ choices[strlen(choices) - 1] = '\0';
- local->transaction.pre_op = GF_CALLOC (sizeof (*local->transaction.pre_op),
- priv->child_count,
- gf_afr_mt_char);
- if (!local->transaction.pre_op)
- goto out;
+ ret = gf_asprintf(&status,
+ "data-split-brain:%s "
+ "metadata-split-brain:%s%s",
+ (d_spb) ? "yes" : "no", (m_spb) ? "yes" : "no",
+ choices);
- local->pending = afr_matrix_create (priv->child_count,
- AFR_NUM_CHANGE_LOGS);
- if (!local->pending)
- goto out;
+ if (-1 == ret) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ ret = dict_set_dynstr_sizen(dict, GF_AFR_SBRAIN_STATUS, status);
+ if (ret) {
+ op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+ } else {
+ ret = dict_set_sizen_str_sizen(dict, GF_AFR_SBRAIN_STATUS,
+ SFILE_NOT_UNDER_DATA);
+ if (ret) {
+ op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+ }
- local->transaction.child_errno =
- GF_CALLOC (sizeof (*local->transaction.child_errno),
- priv->child_count,
- gf_afr_mt_int32_t);
- local->transaction.erase_pending = 1;
+ ret = 0;
+out:
+ AFR_STACK_UNWIND(getxattr, frame, ret, op_errno, dict, NULL);
+ if (dict)
+ dict_unref(dict);
+ if (inode)
+ inode_unref(inode);
+ return ret;
+}
+int32_t
+afr_heal_splitbrain_file(call_frame_t *frame, xlator_t *this, loc_t *loc)
+{
+ int ret = 0;
+ int op_errno = 0;
+ dict_t *dict = NULL;
+ afr_local_t *local = NULL;
+ afr_local_t *heal_local = NULL;
+ call_frame_t *heal_frame = NULL;
+
+ local = frame->local;
+ dict = dict_new();
+ if (!dict) {
+ op_errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ heal_frame = afr_frame_create(this, &op_errno);
+ if (!heal_frame) {
+ ret = -1;
+ goto out;
+ }
+ heal_local = heal_frame->local;
+ heal_frame->local = frame->local;
+ /*Initiate heal with heal_frame with lk-owner set so that inodelk/entrylk
+ * work correctly*/
+ ret = afr_selfheal_do(heal_frame, this, loc->gfid);
+
+ if (ret == 1 || ret == 2) {
+ ret = dict_set_sizen_str_sizen(dict, "sh-fail-msg",
+ SFILE_NOT_IN_SPLIT_BRAIN);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Failed to set sh-fail-msg in dict");
ret = 0;
+ goto out;
+ } else {
+ if (local->xdata_rsp) {
+ /* 'sh-fail-msg' has been set in the dict during self-heal.*/
+ dict_copy(local->xdata_rsp, dict);
+ ret = 0;
+ } else if (ret < 0) {
+ op_errno = -ret;
+ ret = -1;
+ }
+ }
+
out:
- return ret;
+ if (heal_frame) {
+ heal_frame->local = heal_local;
+ AFR_STACK_DESTROY(heal_frame);
+ }
+ if (local->op == GF_FOP_GETXATTR)
+ AFR_STACK_UNWIND(getxattr, frame, ret, op_errno, dict, NULL);
+ else if (local->op == GF_FOP_SETXATTR)
+ AFR_STACK_UNWIND(setxattr, frame, ret, op_errno, NULL);
+ if (dict)
+ dict_unref(dict);
+ return ret;
+}
+
+int
+afr_get_child_index_from_name(xlator_t *this, char *name)
+{
+ afr_private_t *priv = this->private;
+ int index = -1;
+
+ for (index = 0; index < priv->child_count; index++) {
+ if (!strcmp(priv->children[index]->name, name))
+ goto out;
+ }
+ index = -1;
+out:
+ return index;
}
void
-afr_reset_children (int32_t *fresh_children, int32_t child_count)
+afr_priv_need_heal_set(afr_private_t *priv, gf_boolean_t need_heal)
{
- unsigned int i = 0;
- for (i = 0; i < child_count; i++)
- fresh_children[i] = -1;
+ LOCK(&priv->lock);
+ {
+ priv->need_heal = need_heal;
+ }
+ UNLOCK(&priv->lock);
}
-int32_t*
-afr_children_create (int32_t child_count)
+void
+afr_set_need_heal(xlator_t *this, afr_local_t *local)
+{
+ int i = 0;
+ afr_private_t *priv = this->private;
+ gf_boolean_t need_heal = _gf_false;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].valid && local->replies[i].need_heal) {
+ need_heal = _gf_true;
+ break;
+ }
+ }
+ afr_priv_need_heal_set(priv, need_heal);
+ return;
+}
+
+gf_boolean_t
+afr_get_need_heal(xlator_t *this)
{
- int32_t *children = NULL;
- int i = 0;
+ afr_private_t *priv = this->private;
+ gf_boolean_t need_heal = _gf_true;
+
+ LOCK(&priv->lock);
+ {
+ need_heal = priv->need_heal;
+ }
+ UNLOCK(&priv->lock);
+ return need_heal;
+}
+
+int
+afr_get_msg_id(char *op_type)
+{
+ if (!strcmp(op_type, GF_AFR_REPLACE_BRICK))
+ return AFR_MSG_REPLACE_BRICK_STATUS;
+ else if (!strcmp(op_type, GF_AFR_ADD_BRICK))
+ return AFR_MSG_ADD_BRICK_STATUS;
+ return -1;
+}
+
+int
+afr_fav_child_reset_sink_xattrs_cbk(int ret, call_frame_t *heal_frame,
+ void *opaque)
+{
+ call_frame_t *txn_frame = NULL;
+ afr_local_t *local = NULL;
+ afr_local_t *heal_local = NULL;
+ xlator_t *this = NULL;
+
+ heal_local = heal_frame->local;
+ txn_frame = heal_local->heal_frame;
+ local = txn_frame->local;
+ this = txn_frame->this;
+
+ /* Refresh the inode agan and proceed with the transaction.*/
+ afr_inode_refresh(txn_frame, this, local->inode, NULL, local->refreshfn);
+
+ AFR_STACK_DESTROY(heal_frame);
+
+ return 0;
+}
- GF_ASSERT (child_count > 0);
+int
+afr_fav_child_reset_sink_xattrs(void *opaque)
+{
+ call_frame_t *heal_frame = NULL;
+ call_frame_t *txn_frame = NULL;
+ xlator_t *this = NULL;
+ gf_boolean_t d_spb = _gf_false;
+ gf_boolean_t m_spb = _gf_false;
+ afr_local_t *heal_local = NULL;
+ afr_local_t *txn_local = NULL;
+ afr_private_t *priv = NULL;
+ inode_t *inode = NULL;
+ unsigned char *locked_on = NULL;
+ unsigned char *sources = NULL;
+ unsigned char *sinks = NULL;
+ unsigned char *healed_sinks = NULL;
+ unsigned char *undid_pending = NULL;
+ struct afr_reply *locked_replies = NULL;
+ int ret = 0;
+
+ heal_frame = (call_frame_t *)opaque;
+ heal_local = heal_frame->local;
+ txn_frame = heal_local->heal_frame;
+ txn_local = txn_frame->local;
+ this = txn_frame->this;
+ inode = txn_local->inode;
+ priv = this->private;
+ locked_on = alloca0(priv->child_count);
+ sources = alloca0(priv->child_count);
+ sinks = alloca0(priv->child_count);
+ healed_sinks = alloca0(priv->child_count);
+ undid_pending = alloca0(priv->child_count);
+ locked_replies = alloca0(sizeof(*locked_replies) * priv->child_count);
+
+ ret = _afr_is_split_brain(txn_frame, this, txn_local->replies,
+ AFR_DATA_TRANSACTION, &d_spb);
+
+ ret = _afr_is_split_brain(txn_frame, this, txn_local->replies,
+ AFR_METADATA_TRANSACTION, &m_spb);
+
+ /* Take appropriate locks and reset sink xattrs. */
+ if (d_spb) {
+ ret = afr_selfheal_inodelk(heal_frame, this, inode, this->name, 0, 0,
+ locked_on);
+ {
+ if (ret < priv->child_count)
+ goto data_unlock;
+ ret = __afr_selfheal_data_prepare(
+ heal_frame, this, inode, locked_on, sources, sinks,
+ healed_sinks, undid_pending, locked_replies, NULL);
+ }
+ data_unlock:
+ afr_selfheal_uninodelk(heal_frame, this, inode, this->name, 0, 0,
+ locked_on);
+ }
+
+ if (m_spb) {
+ memset(locked_on, 0, sizeof(*locked_on) * priv->child_count);
+ memset(undid_pending, 0, sizeof(*undid_pending) * priv->child_count);
+ ret = afr_selfheal_inodelk(heal_frame, this, inode, this->name,
+ LLONG_MAX - 1, 0, locked_on);
+ {
+ if (ret < priv->child_count)
+ goto mdata_unlock;
+ ret = __afr_selfheal_metadata_prepare(
+ heal_frame, this, inode, locked_on, sources, sinks,
+ healed_sinks, undid_pending, locked_replies, NULL);
+ }
+ mdata_unlock:
+ afr_selfheal_uninodelk(heal_frame, this, inode, this->name,
+ LLONG_MAX - 1, 0, locked_on);
+ }
+
+ return ret;
+}
- children = GF_CALLOC (child_count, sizeof (*children),
- gf_afr_mt_int32_t);
- if (NULL == children)
+/*
+ * Concatenates the xattrs in local->replies separated by a delimiter.
+ */
+int
+afr_serialize_xattrs_with_delimiter(call_frame_t *frame, xlator_t *this,
+ char *buf, const char *default_str,
+ int32_t *serz_len, char delimiter)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ char *xattr = NULL;
+ int i = 0;
+ int len = 0;
+ int keylen = 0;
+ size_t str_len = 0;
+ int ret = -1;
+
+ priv = this->private;
+ local = frame->local;
+
+ keylen = strlen(local->cont.getxattr.name);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid || local->replies[i].op_ret) {
+ str_len = strlen(default_str);
+ buf = strncat(buf, default_str, str_len);
+ len += str_len;
+ buf[len++] = delimiter;
+ buf[len] = '\0';
+ } else {
+ ret = dict_get_strn(local->replies[i].xattr,
+ local->cont.getxattr.name, keylen, &xattr);
+ if (ret) {
+ gf_msg("TEST", GF_LOG_ERROR, -ret, AFR_MSG_DICT_GET_FAILED,
+ "Failed to get the node_uuid of brick "
+ "%d",
+ i);
goto out;
- for (i = 0; i < child_count; i++)
- children[i] = -1;
+ }
+ str_len = strlen(xattr);
+ buf = strncat(buf, xattr, str_len);
+ len += str_len;
+ buf[len++] = delimiter;
+ buf[len] = '\0';
+ }
+ }
+ buf[--len] = '\0'; /*remove the last delimiter*/
+ if (serz_len)
+ *serz_len = ++len;
+ ret = 0;
+
out:
- return children;
+ return ret;
}
-void
-afr_children_add_child (int32_t *children, int32_t child,
- int32_t child_count)
-{
- gf_boolean_t child_found = _gf_false;
- int i = 0;
-
- for (i = 0; i < child_count; i++) {
- if (children[i] == -1)
- break;
- if (children[i] == child) {
- child_found = _gf_true;
- break;
- }
- }
+uint64_t
+afr_write_subvol_get(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ uint64_t write_subvol = 0;
- if (!child_found) {
- GF_ASSERT (i < child_count);
- children[i] = child;
- }
+ local = frame->local;
+ LOCK(&local->inode->lock);
+ write_subvol = local->inode_ctx->write_subvol;
+ UNLOCK(&local->inode->lock);
+
+ return write_subvol;
}
-void
-afr_children_rm_child (int32_t *children, int32_t child, int32_t child_count)
-{
- int i = 0;
-
- GF_ASSERT ((child >= 0) && (child < child_count));
- for (i = 0; i < child_count; i++) {
- if (children[i] == -1)
- break;
- if (children[i] == child) {
- if (i != (child_count - 1))
- memmove (children + i, children + i + 1,
- sizeof (*children)*(child_count - i - 1));
- children[child_count - 1] = -1;
- break;
- }
- }
+int
+afr_write_subvol_set(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ unsigned char *data_accused = NULL;
+ unsigned char *metadata_accused = NULL;
+ unsigned char *data_readable = NULL;
+ unsigned char *metadata_readable = NULL;
+ uint16_t datamap = 0;
+ uint16_t metadatamap = 0;
+ uint64_t val = 0;
+ int event = 0;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+ data_accused = alloca0(priv->child_count);
+ metadata_accused = alloca0(priv->child_count);
+ data_readable = alloca0(priv->child_count);
+ metadata_readable = alloca0(priv->child_count);
+ event = local->event_generation;
+
+ afr_readables_fill(frame, this, local->inode, data_accused,
+ metadata_accused, data_readable, metadata_readable,
+ NULL);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (data_readable[i])
+ datamap |= (1 << i);
+ if (metadata_readable[i])
+ metadatamap |= (1 << i);
+ }
+
+ val = ((uint64_t)metadatamap) | (((uint64_t)datamap) << 16) |
+ (((uint64_t)event) << 32);
+
+ LOCK(&local->inode->lock);
+ {
+ if (local->inode_ctx->write_subvol == 0 &&
+ local->transaction.type == AFR_DATA_TRANSACTION) {
+ local->inode_ctx->write_subvol = val;
+ }
+ }
+ UNLOCK(&local->inode->lock);
+
+ return 0;
}
int
-afr_get_children_count (int32_t *children, unsigned int child_count)
+afr_write_subvol_reset(call_frame_t *frame, xlator_t *this)
{
- int count = 0;
- int i = 0;
+ afr_local_t *local = NULL;
- for (i = 0; i < child_count; i++) {
- if (children[i] == -1)
- break;
- count++;
- }
- return count;
+ local = frame->local;
+ LOCK(&local->inode->lock);
+ {
+ GF_ASSERT(local->inode_ctx->lock_count > 0);
+ local->inode_ctx->lock_count--;
+
+ if (!local->inode_ctx->lock_count)
+ local->inode_ctx->write_subvol = 0;
+ }
+ UNLOCK(&local->inode->lock);
+
+ return 0;
}
-void
-afr_set_low_priority (call_frame_t *frame)
+int
+afr_set_inode_local(xlator_t *this, afr_local_t *local, inode_t *inode)
{
- frame->root->pid = LOW_PRIO_PROC_PID;
+ int ret = 0;
+
+ local->inode = inode_ref(inode);
+ LOCK(&local->inode->lock);
+ {
+ ret = __afr_inode_ctx_get(this, local->inode, &local->inode_ctx);
+ }
+ UNLOCK(&local->inode->lock);
+ if (ret < 0) {
+ gf_msg_callingfn(
+ this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_INODE_CTX_GET_FAILED,
+ "Error getting inode ctx %s", uuid_utoa(local->inode->gfid));
+ }
+ return ret;
}
-int
-afr_child_fd_ctx_set (xlator_t *this, fd_t *fd, int32_t child,
- int flags)
+gf_boolean_t
+afr_ta_is_fop_called_from_synctask(xlator_t *this)
{
- int ret = 0;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
+ struct synctask *task = NULL;
+ gf_lkowner_t tmp_owner = {
+ 0,
+ };
- GF_ASSERT (fd && fd->inode);
- ret = afr_fd_ctx_set (this, fd);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "could not set fd ctx for fd=%p", fd);
- goto out;
- }
+ task = synctask_get();
+ if (!task)
+ return _gf_false;
- ret = fd_ctx_get (fd, this, &ctx);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "could not get fd ctx for fd=%p", fd);
- goto out;
- }
+ set_lk_owner_from_ptr(&tmp_owner, (void *)this);
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
- fd_ctx->opened_on[child] = AFR_FD_OPENED;
- if (!IA_ISDIR (fd->inode->ia_type)) {
- fd_ctx->flags = flags;
- }
- ret = 0;
+ if (!is_same_lkowner(&tmp_owner, &task->frame->root->lk_owner))
+ return _gf_false;
+
+ return _gf_true;
+}
+
+int
+afr_ta_post_op_lock(xlator_t *this, loc_t *loc)
+{
+ int ret = 0;
+ uuid_t gfid = {
+ 0,
+ };
+ afr_private_t *priv = this->private;
+ gf_boolean_t locked = _gf_false;
+ struct gf_flock flock1 = {
+ 0,
+ };
+ struct gf_flock flock2 = {
+ 0,
+ };
+ int32_t cmd = 0;
+
+ /* Clients must take AFR_TA_DOM_NOTIFY lock only when the previous lock
+ * has been released in afr_notify due to upcall notification from shd.
+ */
+ GF_ASSERT(priv->ta_notify_dom_lock_offset == 0);
+
+ if (!priv->shd.iamshd)
+ GF_ASSERT(afr_ta_is_fop_called_from_synctask(this));
+ flock1.l_type = F_WRLCK;
+
+ while (!locked) {
+ if (priv->shd.iamshd) {
+ cmd = F_SETLKW;
+ flock1.l_start = 0;
+ flock1.l_len = 0;
+ } else {
+ cmd = F_SETLK;
+ gf_uuid_generate(gfid);
+ flock1.l_start = gfid_to_ino(gfid);
+ if (flock1.l_start < 0)
+ flock1.l_start = -flock1.l_start;
+ flock1.l_len = 1;
+ }
+ ret = syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX],
+ AFR_TA_DOM_NOTIFY, loc, cmd, &flock1, NULL, NULL);
+ if (!ret) {
+ locked = _gf_true;
+ priv->ta_notify_dom_lock_offset = flock1.l_start;
+ } else if (ret == -EAGAIN) {
+ continue;
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed to get "
+ "AFR_TA_DOM_NOTIFY lock on %s.",
+ loc->name);
+ goto out;
+ }
+ }
+
+ flock2.l_type = F_WRLCK;
+ flock2.l_start = 0;
+ flock2.l_len = 0;
+ ret = syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX],
+ AFR_TA_DOM_MODIFY, loc, F_SETLKW, &flock2, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed to get AFR_TA_DOM_MODIFY lock on %s.", loc->name);
+ flock1.l_type = F_UNLCK;
+ ret = syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX],
+ AFR_TA_DOM_NOTIFY, loc, F_SETLK, &flock1, NULL,
+ NULL);
+ }
out:
- return ret;
+ return ret;
+}
+
+int
+afr_ta_post_op_unlock(xlator_t *this, loc_t *loc)
+{
+ afr_private_t *priv = this->private;
+ struct gf_flock flock = {
+ 0,
+ };
+ int ret = 0;
+
+ if (!priv->shd.iamshd)
+ GF_ASSERT(afr_ta_is_fop_called_from_synctask(this));
+ flock.l_type = F_UNLCK;
+ flock.l_start = 0;
+ flock.l_len = 0;
+
+ ret = syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX],
+ AFR_TA_DOM_MODIFY, loc, F_SETLK, &flock, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed to unlock AFR_TA_DOM_MODIFY lock.");
+ goto out;
+ }
+
+ if (!priv->shd.iamshd)
+ /* Mounts (clients) will not release the AFR_TA_DOM_NOTIFY lock
+ * in post-op as they use it as a notification mechanism. When
+ * shd sends a lock request on TA during heal, the clients will
+ * receive a lock-contention upcall notification upon which they
+ * will release the AFR_TA_DOM_NOTIFY lock after completing the
+ * in flight I/O.*/
+ goto out;
+
+ ret = syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX],
+ AFR_TA_DOM_NOTIFY, loc, F_SETLK, &flock, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed to unlock AFR_TA_DOM_NOTIFY lock.");
+ }
+out:
+ return ret;
+}
+
+call_frame_t *
+afr_ta_frame_create(xlator_t *this)
+{
+ call_frame_t *frame = NULL;
+ void *lk_owner = NULL;
+
+ frame = create_frame(this, this->ctx->pool);
+ if (!frame)
+ return NULL;
+ lk_owner = (void *)this;
+ afr_set_lk_owner(frame, this, lk_owner);
+ return frame;
}
gf_boolean_t
-afr_have_quorum (char *logname, afr_private_t *priv)
+afr_ta_has_quorum(afr_private_t *priv, afr_local_t *local)
{
- unsigned int quorum = 0;
+ int data_count = 0;
- GF_VALIDATE_OR_GOTO(logname,priv,out);
+ data_count = AFR_COUNT(local->child_up, priv->child_count);
+ if (data_count == 2) {
+ return _gf_true;
+ } else if (data_count == 1 && local->ta_child_up) {
+ return _gf_true;
+ }
- quorum = priv->quorum_count;
- if (quorum != AFR_QUORUM_AUTO) {
- return (priv->up_count >= (priv->down_count + quorum));
- }
+ return _gf_false;
+}
- quorum = priv->child_count / 2 + 1;
- if (priv->up_count >= (priv->down_count + quorum)) {
- return _gf_true;
- }
+static gf_boolean_t
+afr_is_add_replica_mount_lookup_on_root(call_frame_t *frame)
+{
+ afr_local_t *local = NULL;
- /*
- * Special case for even numbers of nodes: if we have exactly half
- * and that includes the first ("senior-most") node, then that counts
- * as quorum even if it wouldn't otherwise. This supports e.g. N=2
- * while preserving the critical property that there can only be one
- * such group.
- */
- if ((priv->child_count % 2) == 0) {
- quorum = priv->child_count / 2;
- if (priv->up_count >= (priv->down_count + quorum)) {
- if (priv->child_up[0]) {
- return _gf_true;
- }
- }
- }
+ if (frame->root->pid != GF_CLIENT_PID_ADD_REPLICA_MOUNT)
+ return _gf_false;
-out:
+ local = frame->local;
+
+ if (local->op != GF_FOP_LOOKUP)
+ /* TODO:If the replica count is being increased on a plain distribute
+ * volume that was never mounted, we need to allow setxattr on '/' with
+ * GF_CLIENT_PID_NO_ROOT_SQUASH to accomodate for DHT layout setting */
+ return _gf_false;
+
+ if (local->inode == NULL)
+ return _gf_false;
+
+ if (!__is_root_gfid(local->inode->gfid))
return _gf_false;
+
+ return _gf_true;
+}
+
+gf_boolean_t
+afr_lookup_has_quorum(call_frame_t *frame, const unsigned int up_children_count)
+{
+ if (frame && (up_children_count > 0) &&
+ afr_is_add_replica_mount_lookup_on_root(frame))
+ return _gf_true;
+
+ return _gf_false;
}
void
-afr_priv_destroy (afr_private_t *priv)
+afr_handle_replies_quorum(call_frame_t *frame, xlator_t *this)
{
- int i = 0;
+ afr_local_t *local = frame->local;
+ afr_private_t *priv = this->private;
+ unsigned char *success_replies = NULL;
- if (!priv)
- goto out;
- inode_unref (priv->root_inode);
- GF_FREE (priv->shd.pos);
- GF_FREE (priv->shd.pending);
- GF_FREE (priv->shd.inprogress);
-// for (i = 0; i < priv->child_count; i++)
-// if (priv->shd.timer && priv->shd.timer[i])
-// gf_timer_call_cancel (this->ctx, priv->shd.timer[i]);
- GF_FREE (priv->shd.timer);
-
- if (priv->shd.healed)
- eh_destroy (priv->shd.healed);
-
- if (priv->shd.heal_failed)
- eh_destroy (priv->shd.heal_failed);
-
- if (priv->shd.split_brain)
- eh_destroy (priv->shd.split_brain);
-
- GF_FREE (priv->last_event);
- if (priv->pending_key) {
- for (i = 0; i < priv->child_count; i++)
- GF_FREE (priv->pending_key[i]);
- }
- GF_FREE (priv->pending_key);
- GF_FREE (priv->children);
- GF_FREE (priv->child_up);
- LOCK_DESTROY (&priv->lock);
- LOCK_DESTROY (&priv->read_child_lock);
- pthread_mutex_destroy (&priv->mutex);
- GF_FREE (priv);
-out:
- return;
+ success_replies = alloca0(priv->child_count);
+ afr_fill_success_replies(local, priv, success_replies);
+
+ if (priv->quorum_count && !afr_has_quorum(success_replies, this, NULL)) {
+ local->op_errno = afr_final_errno(local, priv);
+ if (!local->op_errno)
+ local->op_errno = afr_quorum_errno(priv);
+ local->op_ret = -1;
+ }
}
-int
-xlator_subvolume_count (xlator_t *this)
+gf_boolean_t
+afr_ta_dict_contains_pending_xattr(dict_t *dict, afr_private_t *priv, int child)
{
- int i = 0;
- xlator_list_t *list = NULL;
+ int *pending = NULL;
+ int ret = 0;
+ int i = 0;
+
+ ret = dict_get_ptr(dict, priv->pending_key[child], (void *)&pending);
+ if (ret == 0) {
+ for (i = 0; i < AFR_NUM_CHANGE_LOGS; i++) {
+ /* Not doing a ntoh32(pending) as we just want to check
+ * if it is non-zero or not. */
+ if (pending[i]) {
+ return _gf_true;
+ }
+ }
+ }
- for (list = this->children; list; list = list->next)
- i++;
- return i;
+ return _gf_false;
}
diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c
index c6628db6b95..f8bf8340dab 100644
--- a/xlators/cluster/afr/src/afr-dir-read.c
+++ b/xlators/cluster/afr/src/afr-dir-read.c
@@ -8,739 +8,339 @@
cases as published by the Free Software Foundation.
*/
-
#include <libgen.h>
#include <unistd.h>
-#include <fnmatch.h>
#include <sys/time.h>
#include <stdlib.h>
#include <signal.h>
#include <string.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "list.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
-#include "checksum.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/list.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/compat.h>
#include "afr.h"
-#include "afr-self-heal.h"
-#include "afr-self-heal-common.h"
-
-int
-afr_examine_dir_sh_unwind (call_frame_t *frame, xlator_t *this, int32_t op_ret,
- int32_t op_errno, int32_t sh_failed)
-{
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- afr_set_opendir_done (this, local->fd->inode);
-
- AFR_STACK_UNWIND (opendir, frame, local->op_ret,
- local->op_errno, local->fd, NULL);
-
- return 0;
-}
-
-
-gf_boolean_t
-__checksums_differ (uint32_t *checksum, int child_count,
- unsigned char *child_up)
-{
- int ret = _gf_false;
- int i = 0;
- uint32_t cksum = 0;
- gf_boolean_t activate_check = _gf_false;
-
- for (i = 0; i < child_count; i++) {
- if (!child_up[i])
- continue;
- if (_gf_false == activate_check) {
- cksum = checksum[i];
- activate_check = _gf_true;
- continue;
- }
-
- if (cksum != checksum[i]) {
- ret = _gf_true;
- break;
- }
-
- cksum = checksum[i];
- }
-
- return ret;
-}
-
+#include "afr-transaction.h"
int32_t
-afr_examine_dir_readdir_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- gf_dirent_t *entries, dict_t *xdata)
+afr_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
- gf_dirent_t * entry = NULL;
- gf_dirent_t * tmp = NULL;
- char *reason = NULL;
- int child_index = 0;
- uint32_t entry_cksum = 0;
- int call_count = 0;
- off_t last_offset = 0;
- inode_t *inode = NULL;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
- inode = local->fd->inode;
-
- child_index = (long) cookie;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_INFO,
- "%s: failed to do opendir on %s",
- local->loc.path, priv->children[child_index]->name);
- local->op_ret = -1;
- local->op_ret = op_errno;
- goto out;
- }
+ afr_local_t *local = NULL;
+ int call_count = -1;
+ int32_t child_index = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
- if (op_ret == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s: no entries found in %s",
- local->loc.path, priv->children[child_index]->name);
- goto out;
- }
+ local = frame->local;
+ fd_ctx = local->fd_ctx;
+ child_index = (long)cookie;
- list_for_each_entry_safe (entry, tmp, &entries->list, list) {
- entry_cksum = gf_rsync_weak_checksum ((unsigned char *)entry->d_name,
- strlen (entry->d_name));
- local->cont.opendir.checksum[child_index] ^= entry_cksum;
- }
+ local->replies[child_index].valid = 1;
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
- list_for_each_entry (entry, &entries->list, list) {
- last_offset = entry->d_off;
+ LOCK(&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ fd_ctx->opened_on[child_index] = AFR_FD_NOT_OPENED;
+ } else {
+ local->op_ret = op_ret;
+ fd_ctx->opened_on[child_index] = AFR_FD_OPENED;
+ if (!local->xdata_rsp && xdata)
+ local->xdata_rsp = dict_ref(xdata);
}
+ call_count = --local->call_count;
+ }
+ UNLOCK(&frame->lock);
- /* read more entries */
-
- STACK_WIND_COOKIE (frame, afr_examine_dir_readdir_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->readdir,
- local->fd, 131072, last_offset, NULL);
+ if (call_count == 0) {
+ afr_handle_replies_quorum(frame, this);
+ AFR_STACK_UNWIND(opendir, frame, local->op_ret, local->op_errno,
+ local->fd, NULL);
+ }
- return 0;
-
-out:
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- if (__checksums_differ (local->cont.opendir.checksum,
- priv->child_count,
- local->child_up)) {
-
- sh->do_entry_self_heal = _gf_true;
- sh->forced_merge = _gf_true;
-
- reason = "checksums of directory differ";
- afr_launch_self_heal (frame, this, inode, _gf_false,
- inode->ia_type, reason, NULL,
- afr_examine_dir_sh_unwind);
- } else {
- afr_set_opendir_done (this, inode);
-
- AFR_STACK_UNWIND (opendir, frame, local->op_ret,
- local->op_errno, local->fd, NULL);
- }
- }
-
- return 0;
+ return 0;
}
-
int
-afr_examine_dir (call_frame_t *frame, xlator_t *this)
+afr_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- int i = 0;
- int call_count = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
+ int call_count = -1;
+ int32_t op_errno = ENOMEM;
+ afr_fd_ctx_t *fd_ctx = NULL;
- local = frame->local;
- priv = this->private;
+ priv = this->private;
- local->cont.opendir.checksum = GF_CALLOC (priv->child_count,
- sizeof (*local->cont.opendir.checksum),
- gf_afr_mt_int32_t);
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
- call_count = afr_up_children_count (local->child_up, priv->child_count);
+ local->op = GF_FOP_OPENDIR;
- local->call_count = call_count;
+ if (priv->quorum_count && !afr_has_quorum(local->child_up, this, NULL)) {
+ op_errno = afr_quorum_errno(priv);
+ goto out;
+ }
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_examine_dir_readdir_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->readdir,
- local->fd, 131072, 0, NULL);
+ if (!afr_is_consistent_io_possible(local, priv, &op_errno))
+ goto out;
- if (!--call_count)
- break;
- }
- }
-
- return 0;
-}
-
-
-int32_t
-afr_opendir_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- fd_t *fd, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int32_t up_children_count = 0;
- int ret = -1;
- int call_count = -1;
- int32_t child_index = 0;
-
- priv = this->private;
- local = frame->local;
- child_index = (long) cookie;
-
- up_children_count = afr_up_children_count (local->child_up,
- priv->child_count);
-
- LOCK (&frame->lock);
- {
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- ret = afr_child_fd_ctx_set (this, fd, child_index, 0);
- if (ret) {
- local->op_ret = -1;
- local->op_errno = -ret;
- goto unlock;
- }
- }
-
- local->op_errno = op_errno;
- }
-unlock:
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- if (local->op_ret != 0)
- goto out;
-
- if (!afr_is_opendir_done (this, local->fd->inode) &&
- up_children_count > 1) {
-
- /*
- * This is the first opendir on this inode. We need
- * to check if the directory's entries are the same
- * on all subvolumes. This is needed in addition
- * to regular entry self-heal because the readdir
- * call is sent only to the first subvolume, and
- * thus files that exist only there will never be healed
- * otherwise (assuming changelog shows no anomalies).
- */
-
- gf_log (this->name, GF_LOG_TRACE,
- "reading contents of directory %s looking for mismatch",
- local->loc.path);
-
- afr_examine_dir (frame, this);
-
- } else {
- /* do the unwind */
- goto out;
- }
- }
+ fd_ctx = afr_fd_ctx_get(fd, this);
+ if (!fd_ctx)
+ goto out;
- return 0;
+ loc_copy(&local->loc, loc);
-out:
- AFR_STACK_UNWIND (opendir, frame, local->op_ret,
- local->op_errno, local->fd, NULL);
+ local->fd = fd_ref(fd);
+ local->fd_ctx = fd_ctx;
- return 0;
-}
+ call_count = local->call_count;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE(frame, afr_opendir_cbk, (void *)(long)i,
+ priv->children[i],
+ priv->children[i]->fops->opendir, loc, fd, NULL);
-int32_t
-afr_opendir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, fd_t *fd)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- int child_count = 0;
- int i = 0;
- int ret = -1;
- int call_count = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- child_count = priv->child_count;
-
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
-
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
-
- loc_copy (&local->loc, loc);
-
- local->fd = fd_ref (fd);
-
- call_count = local->call_count;
-
- for (i = 0; i < child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_opendir_cbk,
- (void*) (long) i,
- priv->children[i],
- priv->children[i]->fops->opendir,
- loc, fd, NULL);
-
- if (!--call_count)
- break;
- }
+ if (!--call_count)
+ break;
}
+ }
- ret = 0;
+ return 0;
out:
- if (ret < 0)
- AFR_STACK_UNWIND (opendir, frame, -1, op_errno, fd, NULL);
-
- return 0;
+ AFR_STACK_UNWIND(opendir, frame, -1, op_errno, fd, NULL);
+ return 0;
}
-
-/**
- * Common algorithm for directory read calls:
- *
- * - Try the fop on the first child that is up
- * - if we have failed due to ENOTCONN:
- * try the next child
- *
- * Applicable to: readdir
- */
-
-
-struct entry_name {
- char *name;
- struct list_head list;
-};
-
-
-static gf_boolean_t
-remembered_name (const char *name, struct list_head *entries)
+static int
+afr_validate_read_subvol(inode_t *inode, xlator_t *this, int par_read_subvol)
{
- struct entry_name *e = NULL;
- gf_boolean_t ret = _gf_false;
-
- list_for_each_entry (e, entries, list) {
- if (!strcmp (name, e->name)) {
- ret = _gf_true;
- goto out;
- }
- }
+ int gen = 0;
+ int entry_read_subvol = 0;
+ unsigned char *data_readable = NULL;
+ unsigned char *metadata_readable = NULL;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ data_readable = alloca0(priv->child_count);
+ metadata_readable = alloca0(priv->child_count);
+
+ afr_inode_read_subvol_get(inode, this, data_readable, metadata_readable,
+ &gen);
+
+ if (gen != priv->event_generation || !data_readable[par_read_subvol] ||
+ !metadata_readable[par_read_subvol])
+ return -1;
+
+ /* Once the control reaches the following statement, it means that the
+ * parent's read subvol is perfectly readable. So calling
+ * either afr_data_subvol_get() or afr_metadata_subvol_get() would
+ * yield the same result. Hence, choosing afr_data_subvol_get() below.
+ */
+
+ if (!priv->consistent_metadata)
+ return 0;
-out:
- return ret;
+ /* For an inode fetched through readdirp which is yet to be linked,
+ * inode ctx would not be initialised (yet). So this function returns
+ * -1 above due to gen being 0, which is why it is OK to pass NULL for
+ * read_subvol_args here.
+ */
+ entry_read_subvol = afr_data_subvol_get(inode, this, NULL, NULL, NULL,
+ NULL);
+ if (entry_read_subvol != par_read_subvol)
+ return -1;
+
+ return 0;
}
-
static void
-afr_remember_entries (gf_dirent_t *entries, fd_t *fd)
+afr_readdir_transform_entries(call_frame_t *frame, gf_dirent_t *subvol_entries,
+ int subvol, gf_dirent_t *entries, fd_t *fd)
{
- struct entry_name *n = NULL;
- gf_dirent_t *entry = NULL;
- int ret = 0;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
-
- ret = fd_ctx_get (fd, THIS, &ctx);
- if (ret < 0) {
- gf_log (THIS->name, GF_LOG_INFO,
- "could not get fd ctx for fd=%p", fd);
- return;
+ int ret = -1;
+ gf_dirent_t *entry = NULL;
+ gf_dirent_t *tmp = NULL;
+ xlator_t *this = NULL;
+ afr_private_t *priv = NULL;
+ gf_boolean_t need_heal = _gf_false;
+ gf_boolean_t validate_subvol = _gf_false;
+
+ this = THIS;
+ priv = this->private;
+
+ need_heal = afr_get_need_heal(this);
+ validate_subvol = need_heal | priv->consistent_metadata;
+
+ list_for_each_entry_safe(entry, tmp, &subvol_entries->list, list)
+ {
+ if (afr_is_private_directory(priv, fd->inode->gfid, entry->d_name,
+ frame->root->pid)) {
+ continue;
}
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+ list_del_init(&entry->list);
+ list_add_tail(&entry->list, &entries->list);
- list_for_each_entry (entry, &entries->list, list) {
- n = GF_CALLOC (1, sizeof (*n), gf_afr_mt_entry_name);
- n->name = gf_strdup (entry->d_name);
- INIT_LIST_HEAD (&n->list);
+ if (!validate_subvol)
+ continue;
- list_add (&n->list, &fd_ctx->entries);
+ if (entry->inode) {
+ ret = afr_validate_read_subvol(entry->inode, this, subvol);
+ if (ret == -1) {
+ inode_unref(entry->inode);
+ entry->inode = NULL;
+ continue;
+ }
}
+ }
}
-
-static off_t
-afr_filter_entries (gf_dirent_t *entries, fd_t *fd)
+int32_t
+afr_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *subvol_entries,
+ dict_t *xdata)
{
- gf_dirent_t *entry = NULL;
- gf_dirent_t *tmp = NULL;
- int ret = 0;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
- off_t offset = 0;
-
- ret = fd_ctx_get (fd, THIS, &ctx);
- if (ret < 0) {
- gf_log (THIS->name, GF_LOG_INFO,
- "could not get fd ctx for fd=%p", fd);
- return -1;
- }
+ afr_local_t *local = NULL;
+ gf_dirent_t entries;
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+ INIT_LIST_HEAD(&entries.list);
- list_for_each_entry_safe (entry, tmp, &entries->list, list) {
- offset = entry->d_off;
+ local = frame->local;
- if (remembered_name (entry->d_name, &fd_ctx->entries)) {
- list_del (&entry->list);
- GF_FREE (entry);
- }
- }
+ if (op_ret < 0 && !local->cont.readdir.offset) {
+ /* failover only if this was first readdir, detected
+ by offset == 0 */
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
- return offset;
-}
+ afr_read_txn_continue(frame, this, (long)cookie);
+ return 0;
+ }
+ if (op_ret >= 0)
+ afr_readdir_transform_entries(frame, subvol_entries, (long)cookie,
+ &entries, local->fd);
-static void
-afr_forget_entries (fd_t *fd)
-{
- struct entry_name *entry = NULL;
- struct entry_name *tmp = NULL;
- int ret = 0;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
-
- ret = fd_ctx_get (fd, THIS, &ctx);
- if (ret < 0) {
- gf_log (THIS->name, GF_LOG_INFO,
- "could not get fd ctx for fd=%p", fd);
- return;
- }
+ AFR_STACK_UNWIND(readdir, frame, op_ret, op_errno, &entries, xdata);
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+ gf_dirent_free(&entries);
- list_for_each_entry_safe (entry, tmp, &fd_ctx->entries, list) {
- GF_FREE (entry->name);
- list_del (&entry->list);
- GF_FREE (entry);
- }
+ return 0;
}
-
-int32_t
-afr_readdir_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- gf_dirent_t *entries, dict_t *xdata)
+int
+afr_readdir_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t * local = NULL;
- gf_dirent_t * entry = NULL;
- gf_dirent_t * tmp = NULL;
-
- local = frame->local;
-
- if (op_ret == -1)
- goto out;
-
- list_for_each_entry_safe (entry, tmp, &entries->list, list) {
- if ((local->fd->inode == local->fd->inode->table->root)
- && !strcmp (entry->d_name, GF_REPLICATE_TRASH_DIR)) {
- list_del_init (&entry->list);
- GF_FREE (entry);
- }
- }
-
-out:
- AFR_STACK_UNWIND (readdir, frame, op_ret, op_errno, entries, NULL);
-
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+
+ priv = this->private;
+ local = frame->local;
+ fd_ctx = afr_fd_ctx_get(local->fd, this);
+ if (!fd_ctx) {
+ local->op_errno = EINVAL;
+ local->op_ret = -1;
+ }
+
+ if (subvol == -1 || !fd_ctx) {
+ AFR_STACK_UNWIND(readdir, frame, local->op_ret, local->op_errno, 0, 0);
return 0;
+ }
+
+ fd_ctx->readdir_subvol = subvol;
+
+ if (local->op == GF_FOP_READDIR)
+ STACK_WIND_COOKIE(frame, afr_readdir_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->readdir, local->fd,
+ local->cont.readdir.size, local->cont.readdir.offset,
+ local->xdata_req);
+ else
+ STACK_WIND_COOKIE(frame, afr_readdir_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->readdirp, local->fd,
+ local->cont.readdir.size, local->cont.readdir.offset,
+ local->xdata_req);
+ return 0;
}
-
-int32_t
-afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
- dict_t *xdata)
+int
+afr_do_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, int whichop, dict_t *dict)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
- int32_t next_call_child = -1;
- int ret = 0;
- gf_dirent_t * entry = NULL;
- gf_dirent_t * tmp = NULL;
- int32_t *last_index = NULL;
- int32_t read_child = -1;
- int32_t *fresh_children = NULL;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
- off_t offset = 0;
- int32_t call_child = -1;
-
- priv = this->private;
- children = priv->children;
-
- local = frame->local;
-
- if ((priv->readdir_failover == _gf_false) && (op_ret < 0))
- goto out;
-
- read_child = (long) cookie;
- last_index = &local->cont.readdir.last_index;
- fresh_children = local->fresh_children;
-
- /* the value of the last_index changes if afr_next_call_child is
- * called. So to find the call_child of this callback use last_index
- * before the next_call_child call.
- */
- if (*last_index == -1)
- call_child = read_child;
- else
- call_child = fresh_children[*last_index];
-
- if (priv->strict_readdir) {
- ret = fd_ctx_get (local->fd, this, &ctx);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "could not get fd ctx for fd=%p", local->fd);
- op_ret = -1;
- op_errno = -ret;
- goto out;
- }
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- if (op_ret == -1) {
- next_call_child = afr_next_call_child (fresh_children,
- local->child_up,
- priv->child_count,
- last_index,
- read_child);
- if (next_call_child < 0)
- goto out;
- gf_log (this->name, GF_LOG_TRACE,
- "starting readdir afresh on child %d, offset %"PRId64,
- next_call_child, (uint64_t) 0);
-
- fd_ctx->failed_over = _gf_true;
-
- STACK_WIND_COOKIE (frame, afr_readdirp_cbk,
- (void *) (long) read_child,
- children[next_call_child],
- children[next_call_child]->fops->readdirp,
- local->fd,
- local->cont.readdir.size, 0,
- local->cont.readdir.dict);
- return 0;
- }
- }
-
- if (op_ret != -1) {
- list_for_each_entry_safe (entry, tmp, &entries->list, list) {
- if ((local->fd->inode == local->fd->inode->table->root)
- && !strcmp (entry->d_name, GF_REPLICATE_TRASH_DIR)) {
- list_del_init (&entry->list);
- GF_FREE (entry);
- }
- }
- }
-
- if (priv->strict_readdir) {
- if (fd_ctx->failed_over) {
- if (list_empty (&entries->list)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no entries found");
- goto out;
- }
-
- offset = afr_filter_entries (entries, local->fd);
-
- afr_remember_entries (entries, local->fd);
-
- if (list_empty (&entries->list)) {
- /* All the entries we got were duplicate. We
- shouldn't send an empty list now, because
- that will make the application stop reading. So
- try to get more entries */
-
- gf_log (this->name, GF_LOG_TRACE,
- "trying to fetch non-duplicate entries "
- "from offset %"PRId64", child %s",
- offset, children[call_child]->name);
-
- STACK_WIND_COOKIE (frame, afr_readdirp_cbk,
- (void *) (long) read_child,
- children[call_child],
- children[call_child]->fops->readdirp,
- local->fd, local->cont.readdir.size, offset,
- local->cont.readdir.dict);
- return 0;
- }
- } else {
- afr_remember_entries (entries, local->fd);
- }
- }
-
+ afr_local_t *local = NULL;
+ int32_t op_errno = 0;
+ int subvol = -1;
+ afr_fd_ctx_t *fd_ctx = NULL;
+
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
+
+ fd_ctx = afr_fd_ctx_get(fd, this);
+ if (!fd_ctx) {
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ local->op = whichop;
+ local->fd = fd_ref(fd);
+ local->cont.readdir.size = size;
+ local->cont.readdir.offset = offset;
+ local->xdata_req = (dict) ? dict_ref(dict) : NULL;
+
+ subvol = fd_ctx->readdir_subvol;
+
+ if (offset == 0 || subvol == -1) {
+ /* First readdir has option of failing over and selecting
+ an appropriate read subvolume */
+ afr_read_txn(frame, this, fd->inode, afr_readdir_wind,
+ AFR_DATA_TRANSACTION);
+ } else {
+ /* But continued readdirs MUST stick to the same subvolume
+ without an option to failover */
+ afr_readdir_wind(frame, this, subvol);
+ }
+
+ return 0;
out:
- AFR_STACK_UNWIND (readdirp, frame, op_ret, op_errno, entries, NULL);
-
- return 0;
+ AFR_STACK_UNWIND(readdir, frame, -1, op_errno, NULL, NULL);
+ return 0;
}
int32_t
-afr_do_readdir (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset, int whichop, dict_t *dict)
+afr_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- xlator_t **children = NULL;
- int call_child = 0;
- afr_local_t *local = NULL;
- afr_fd_ctx_t *fd_ctx = NULL;
- int ret = -1;
- int32_t op_errno = 0;
- uint64_t read_child = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
- children = priv->children;
-
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
-
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
-
- local->fresh_children = afr_children_create (priv->child_count);
- if (!local->fresh_children) {
- op_errno = ENOMEM;
- goto out;
- }
-
- read_child = afr_inode_get_read_ctx (this, fd->inode,
- local->fresh_children);
- ret = afr_get_call_child (this, local->child_up, read_child,
- local->fresh_children,
- &call_child,
- &local->cont.readdir.last_index);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- fd_ctx = afr_fd_ctx_get (fd, this);
- if (!fd_ctx) {
- op_errno = EBADF;
- goto out;
- }
-
- if ((offset == 0) || (fd_ctx->call_child == -1)) {
- fd_ctx->call_child = call_child;
- } else if ((priv->readdir_failover == _gf_false) &&
- (call_child != fd_ctx->call_child)) {
- op_errno = EBADF;
- goto out;
- }
-
- local->fd = fd_ref (fd);
- local->cont.readdir.size = size;
- local->cont.readdir.dict = (dict)? dict_ref (dict) : NULL;
-
- if (priv->strict_readdir) {
- if (fd_ctx->last_tried != call_child) {
- gf_log (this->name, GF_LOG_TRACE,
- "first up child has changed from %d to %d, "
- "restarting readdir from offset 0",
- fd_ctx->last_tried, call_child);
-
- fd_ctx->failed_over = _gf_true;
- offset = 0;
- }
+ afr_do_readdir(frame, this, fd, size, offset, GF_FOP_READDIR, xdata);
- fd_ctx->last_tried = call_child;
- }
-
- if (whichop == GF_FOP_READDIR)
- STACK_WIND_COOKIE (frame, afr_readdir_cbk,
- (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->readdir, fd,
- size, offset, dict);
- else
- STACK_WIND_COOKIE (frame, afr_readdirp_cbk,
- (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->readdirp, fd,
- size, offset, dict);
-
- return 0;
-out:
- AFR_STACK_UNWIND (readdir, frame, -1, op_errno, NULL, NULL);
- return 0;
+ return 0;
}
-
int32_t
-afr_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset, dict_t *xdata)
+afr_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *dict)
{
- afr_do_readdir (frame, this, fd, size, offset, GF_FOP_READDIR, xdata);
- return 0;
-}
-
+ afr_do_readdir(frame, this, fd, size, offset, GF_FOP_READDIRP, dict);
-int32_t
-afr_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset, dict_t *dict)
-{
- afr_do_readdir (frame, this, fd, size, offset, GF_FOP_READDIRP, dict);
- return 0;
+ return 0;
}
-
int32_t
-afr_releasedir (xlator_t *this, fd_t *fd)
+afr_releasedir(xlator_t *this, fd_t *fd)
{
- afr_forget_entries (fd);
- afr_cleanup_fd_ctx (this, fd);
+ afr_cleanup_fd_ctx(this, fd);
- return 0;
+ return 0;
}
diff --git a/xlators/cluster/afr/src/afr-dir-read.h b/xlators/cluster/afr/src/afr-dir-read.h
index 09456d15949..773e925ec6c 100644
--- a/xlators/cluster/afr/src/afr-dir-read.h
+++ b/xlators/cluster/afr/src/afr-dir-read.h
@@ -11,26 +11,23 @@
#ifndef __DIR_READ_H__
#define __DIR_READ_H__
-
int32_t
-afr_opendir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, fd_t *fd, dict_t *xdata);
+afr_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata);
int32_t
-afr_releasedir (xlator_t *this, fd_t *fd);
+afr_releasedir(xlator_t *this, fd_t *fd);
int32_t
-afr_readdir (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset, dict_t *xdata);
-
+afr_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata);
int32_t
-afr_readdirp (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset, dict_t *dict);
+afr_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *dict);
int32_t
-afr_checksum (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, dict_t *xdata);
-
+afr_checksum(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ dict_t *xdata);
#endif /* __DIR_READ_H__ */
diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c
index 1349fd089a9..b7cceb79158 100644
--- a/xlators/cluster/afr/src/afr-dir-write.c
+++ b/xlators/cluster/afr/src/afr-dir-write.c
@@ -8,1236 +8,930 @@
cases as published by the Free Software Foundation.
*/
-
#include <libgen.h>
#include <unistd.h>
-#include <fnmatch.h>
#include <sys/time.h>
#include <stdlib.h>
#include <signal.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
+#include <glusterfs/glusterfs.h>
#include "afr.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "list.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
+#include <glusterfs/dict.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/list.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/byte-order.h>
#include "afr.h"
#include "afr-transaction.h"
+void
+afr_mark_entry_pending_changelog(call_frame_t *frame, xlator_t *this);
+
int
-afr_build_parent_loc (loc_t *parent, loc_t *child, int32_t *op_errno)
+afr_build_parent_loc(loc_t *parent, loc_t *child, int32_t *op_errno)
{
- int ret = -1;
- char *child_path = NULL;
-
- if (!child->parent) {
- if (op_errno)
- *op_errno = EINVAL;
- goto out;
- }
-
- child_path = gf_strdup (child->path);
- if (!child_path) {
- if (op_errno)
- *op_errno = ENOMEM;
- goto out;
- }
- parent->path = dirname (child_path);
- parent->inode = inode_ref (child->parent);
- uuid_copy (parent->gfid, child->pargfid);
-
- ret = 0;
+ int ret = -1;
+ char *child_path = NULL;
+
+ if (!child->parent) {
+ if (op_errno)
+ *op_errno = EINVAL;
+ goto out;
+ }
+
+ child_path = gf_strdup(child->path);
+ if (!child_path) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ parent->path = gf_strdup(dirname(child_path));
+ if (!parent->path) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ parent->inode = inode_ref(child->parent);
+ gf_uuid_copy(parent->gfid, child->pargfid);
+
+ ret = 0;
out:
- return ret;
-}
+ GF_FREE(child_path);
-/* {{{ create */
+ return ret;
+}
-int
-afr_create_unwind (call_frame_t *frame, xlator_t *this)
+static void
+__afr_dir_write_finalize(call_frame_t *frame, xlator_t *this)
{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
- struct iatt *unwind_buf = NULL;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- if (local->cont.create.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.create.read_child_buf;
- } else {
- unwind_buf = &local->cont.create.buf;
- }
-
- AFR_STACK_UNWIND (create, main_frame,
- local->op_ret, local->op_errno,
- local->cont.create.fd,
- local->cont.create.inode,
- unwind_buf, &local->cont.create.preparent,
- &local->cont.create.postparent,
- local->xdata_rsp);
- }
-
- return 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int inode_read_subvol = -1;
+ int parent_read_subvol = -1;
+ int parent2_read_subvol = -1;
+ int i = 0;
+ afr_read_subvol_args_t args = {
+ 0,
+ };
+
+ local = frame->local;
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+ if (local->replies[i].op_ret == -1)
+ continue;
+ gf_uuid_copy(args.gfid, local->replies[i].poststat.ia_gfid);
+ args.ia_type = local->replies[i].poststat.ia_type;
+ break;
+ }
+
+ if (local->inode) {
+ if (local->op != GF_FOP_RENAME && local->op != GF_FOP_LINK)
+ afr_replies_interpret(frame, this, local->inode, NULL);
+
+ inode_read_subvol = afr_data_subvol_get(local->inode, this, NULL, NULL,
+ NULL, &args);
+ }
+
+ if (local->parent)
+ parent_read_subvol = afr_data_subvol_get(local->parent, this, NULL,
+ local->readable, NULL, NULL);
+
+ if (local->parent2)
+ parent2_read_subvol = afr_data_subvol_get(local->parent2, this, NULL,
+ local->readable2, NULL, NULL);
+
+ local->op_ret = -1;
+ local->op_errno = afr_final_errno(local, priv);
+ afr_pick_error_xdata(local, priv, local->parent, local->readable,
+ local->parent2, local->readable2);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+ if (local->replies[i].op_ret < 0) {
+ if (local->inode)
+ afr_inode_need_refresh_set(local->inode, this);
+ if (local->parent)
+ afr_inode_need_refresh_set(local->parent, this);
+ if (local->parent2)
+ afr_inode_need_refresh_set(local->parent2, this);
+ continue;
+ }
+
+ if (local->op_ret == -1) {
+ local->op_ret = local->replies[i].op_ret;
+ local->op_errno = local->replies[i].op_errno;
+
+ local->cont.dir_fop.buf = local->replies[i].poststat;
+ local->cont.dir_fop.preparent = local->replies[i].preparent;
+ local->cont.dir_fop.postparent = local->replies[i].postparent;
+ local->cont.dir_fop.prenewparent = local->replies[i].preparent2;
+ local->cont.dir_fop.postnewparent = local->replies[i].postparent2;
+ if (local->xdata_rsp) {
+ dict_unref(local->xdata_rsp);
+ local->xdata_rsp = NULL;
+ }
+
+ if (local->replies[i].xdata)
+ local->xdata_rsp = dict_ref(local->replies[i].xdata);
+ continue;
+ }
+
+ if (i == inode_read_subvol) {
+ local->cont.dir_fop.buf = local->replies[i].poststat;
+ if (local->replies[i].xdata) {
+ if (local->xdata_rsp)
+ dict_unref(local->xdata_rsp);
+ local->xdata_rsp = dict_ref(local->replies[i].xdata);
+ }
+ }
+
+ if (i == parent_read_subvol) {
+ local->cont.dir_fop.preparent = local->replies[i].preparent;
+ local->cont.dir_fop.postparent = local->replies[i].postparent;
+ }
+
+ if (i == parent2_read_subvol) {
+ local->cont.dir_fop.prenewparent = local->replies[i].preparent2;
+ local->cont.dir_fop.postnewparent = local->replies[i].postparent2;
+ }
+ }
}
-
-int
-afr_create_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- fd_t *fd, inode_t *inode, struct iatt *buf,
+static void
+__afr_dir_write_fill(call_frame_t *frame, xlator_t *this, int child_index,
+ int op_ret, int op_errno, struct iatt *poststat,
struct iatt *preparent, struct iatt *postparent,
+ struct iatt *preparent2, struct iatt *postparent2,
dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
- int ret = 0;
- int call_count = -1;
- int child_index = -1;
- int32_t *fresh_children = NULL;
-
- local = frame->local;
- priv = this->private;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
-
- ret = afr_fd_ctx_set (this, fd);
-
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "could not set ctx on fd=%p", fd);
-
- local->op_ret = -1;
- local->op_errno = -ret;
- goto unlock;
- }
-
- ret = fd_ctx_get (fd, this, &ctx);
-
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "could not get fd ctx for fd=%p", fd);
- local->op_ret = -1;
- local->op_errno = -ret;
- goto unlock;
- }
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- fd_ctx->opened_on[child_index] = AFR_FD_OPENED;
- fd_ctx->flags = local->cont.create.flags;
-
- if (local->success_count == 0) {
- local->cont.create.buf = *buf;
- if (xdata)
- local->xdata_rsp = dict_ref(xdata);
- }
-
- if (child_index == local->read_child_index) {
- local->cont.create.read_child_buf = *buf;
- local->cont.create.preparent = *preparent;
- local->cont.create.postparent = *postparent;
- }
-
- local->cont.create.inode = inode;
-
- fresh_children = local->fresh_children;
- fresh_children[local->success_count] = child_index;
- local->success_count++;
- }
-
- local->op_errno = op_errno;
- }
-
-unlock:
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- afr_set_read_ctx_from_policy (this, inode,
- local->fresh_children,
- local->read_child_index,
- priv->read_child);
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ afr_local_t *local = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+
+ local = frame->local;
+ fd_ctx = local->fd_ctx;
+
+ local->replies[child_index].valid = 1;
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
+ if (xdata)
+ local->replies[child_index].xdata = dict_ref(xdata);
+
+ if (op_ret >= 0) {
+ if (poststat)
+ local->replies[child_index].poststat = *poststat;
+ if (preparent)
+ local->replies[child_index].preparent = *preparent;
+ if (postparent)
+ local->replies[child_index].postparent = *postparent;
+ if (preparent2)
+ local->replies[child_index].preparent2 = *preparent2;
+ if (postparent2)
+ local->replies[child_index].postparent2 = *postparent2;
+ if (fd_ctx)
+ fd_ctx->opened_on[child_index] = AFR_FD_OPENED;
+ } else {
+ if (op_errno != ENOTEMPTY)
+ afr_transaction_fop_failed(frame, this, child_index);
+ if (fd_ctx)
+ fd_ctx->opened_on[child_index] = AFR_FD_NOT_OPENED;
+ }
+
+ return;
}
-
-int
-afr_create_wind (call_frame_t *frame, xlator_t *this)
+static int
+__afr_dir_write_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ struct iatt *preparent2, struct iatt *postparent2,
+ dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
+ afr_local_t *local = NULL;
+ int child_index = (long)cookie;
+ int call_count = -1;
+ afr_private_t *priv = NULL;
- local = frame->local;
- priv = this->private;
+ priv = this->private;
+ local = frame->local;
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
+ LOCK(&frame->lock);
+ {
+ __afr_dir_write_fill(frame, this, child_index, op_ret, op_errno, buf,
+ preparent, postparent, preparent2, postparent2,
+ xdata);
+ call_count = --local->call_count;
+ }
+ UNLOCK(&frame->lock);
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
+ if (call_count == 0) {
+ __afr_dir_write_finalize(frame, this);
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_create_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->create,
- &local->loc,
- local->cont.create.flags,
- local->cont.create.mode,
- local->umask,
- local->cont.create.fd,
- local->xdata_req);
- if (!--call_count)
- break;
- }
+ if (afr_txn_nothing_failed(frame, this)) {
+ /*if it did pre-op, it will do post-op changing ctime*/
+ if (priv->consistent_metadata && afr_needs_changelog_update(local))
+ afr_zero_fill_stat(local);
+ local->transaction.unwind(frame, this);
}
- return 0;
-}
-
-
-int
-afr_create_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t * local = NULL;
-
- local = frame->local;
-
- local->transaction.unwind (frame, this);
+ afr_mark_entry_pending_changelog(frame, this);
- AFR_STACK_DESTROY (frame);
+ afr_transaction_resume(frame, this);
+ }
- return 0;
+ return 0;
}
-
int
-afr_create (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, mode_t mode,
- mode_t umask, fd_t *fd, dict_t *params)
+afr_mark_new_entry_changelog_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ dict_t *xattr, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- call_frame_t *transaction_frame = NULL;
- int ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- QUORUM_CHECK(create,out);
+ int call_count = 0;
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- op_errno = ENOMEM;
- goto out;
- }
-
- AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
- local = transaction_frame->local;
-
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
-
- loc_copy (&local->loc, loc);
-
- LOCK (&priv->read_child_lock);
- {
- local->read_child_index = (++priv->read_child_rr)
- % (priv->child_count);
- }
- UNLOCK (&priv->read_child_lock);
-
- local->cont.create.flags = flags;
- local->cont.create.mode = mode;
- local->cont.create.fd = fd_ref (fd);
- local->umask = umask;
- if (params)
- local->xdata_req = dict_ref (params);
+ call_count = afr_frame_return(frame);
- local->transaction.fop = afr_create_wind;
- local->transaction.done = afr_create_done;
- local->transaction.unwind = afr_create_unwind;
+ if (call_count == 0)
+ AFR_STACK_DESTROY(frame);
- ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
- &op_errno);
- if (ret)
- goto out;
-
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (loc->path);
-
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
-
- ret = 0;
-out:
- if (ret < 0) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (create, frame, -1, op_errno,
- NULL, NULL, NULL, NULL, NULL, NULL);
- }
-
- return 0;
+ return 0;
}
-/* }}} */
-
-/* {{{ mknod */
-
-int
-afr_mknod_unwind (call_frame_t *frame, xlator_t *this)
+void
+afr_mark_new_entry_changelog(call_frame_t *frame, xlator_t *this)
{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
- struct iatt *unwind_buf = NULL;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- if (local->cont.mknod.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.mknod.read_child_buf;
- } else {
- unwind_buf = &local->cont.mknod.buf;
- }
-
- AFR_STACK_UNWIND (mknod, main_frame,
- local->op_ret, local->op_errno,
- local->cont.mknod.inode,
- unwind_buf, &local->cont.mknod.preparent,
- &local->cont.mknod.postparent,
- NULL);
- }
-
- return 0;
+ call_frame_t *new_frame = NULL;
+ afr_local_t *local = NULL;
+ afr_local_t *new_local = NULL;
+ afr_private_t *priv = NULL;
+ dict_t *xattr = NULL;
+ int32_t **changelog = NULL;
+ int i = 0;
+ int op_errno = ENOMEM;
+ unsigned char *pending = NULL;
+ int call_count = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ new_frame = copy_frame(frame);
+ if (!new_frame)
+ goto out;
+
+ new_local = AFR_FRAME_INIT(new_frame, op_errno);
+ if (!new_local)
+ goto out;
+
+ xattr = dict_new();
+ if (!xattr)
+ goto out;
+
+ pending = alloca0(priv->child_count);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i] &&
+ !local->transaction.failed_subvols[i]) {
+ call_count++;
+ continue;
+ }
+ pending[i] = 1;
+ }
+
+ changelog = afr_mark_pending_changelog(priv, pending, xattr,
+ local->cont.dir_fop.buf.ia_type);
+ if (!changelog)
+ goto out;
+
+ new_local->pending = changelog;
+ gf_uuid_copy(new_local->loc.gfid, local->cont.dir_fop.buf.ia_gfid);
+ new_local->loc.inode = inode_ref(local->inode);
+
+ new_local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (pending[i])
+ continue;
+
+ STACK_WIND_COOKIE(new_frame, afr_mark_new_entry_changelog_cbk,
+ (void *)(long)i, priv->children[i],
+ priv->children[i]->fops->xattrop, &new_local->loc,
+ GF_XATTROP_ADD_ARRAY, xattr, NULL);
+ if (!--call_count)
+ break;
+ }
+
+ new_frame = NULL;
+out:
+ if (new_frame)
+ AFR_STACK_DESTROY(new_frame);
+ if (xattr)
+ dict_unref(xattr);
+ return;
}
-
-int
-afr_mknod_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+void
+afr_mark_entry_pending_changelog(call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int child_index = -1;
- int32_t *fresh_children = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int pre_op_count = 0;
+ int failed_count = 0;
+ unsigned char *success_replies = NULL;
- local = frame->local;
- priv = this->private;
+ local = frame->local;
+ priv = this->private;
- child_index = (long) cookie;
+ if (local->op_ret < 0)
+ return;
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
+ if (local->op != GF_FOP_CREATE && local->op != GF_FOP_MKNOD &&
+ local->op != GF_FOP_MKDIR)
+ return;
- if (op_ret != -1) {
- local->op_ret = op_ret;
+ pre_op_count = AFR_COUNT(local->transaction.pre_op, priv->child_count);
+ failed_count = AFR_COUNT(local->transaction.failed_subvols,
+ priv->child_count);
- if (local->success_count == 0)
- local->cont.mknod.buf = *buf;
+ /* FOP succeeded on all bricks. */
+ if (pre_op_count == priv->child_count && !failed_count)
+ return;
- if (child_index == local->read_child_index) {
- local->cont.mknod.read_child_buf = *buf;
- local->cont.mknod.preparent = *preparent;
- local->cont.mknod.postparent = *postparent;
- }
+ /* FOP did not suceed on quorum no. of bricks. */
+ success_replies = alloca0(priv->child_count);
+ afr_fill_success_replies(local, priv, success_replies);
+ if (!afr_has_quorum(success_replies, this, NULL))
+ return;
- local->cont.mknod.inode = inode;
+ if (priv->thin_arbiter_count) {
+ /*Mark new entry using ta file*/
+ local->is_new_entry = _gf_true;
+ return;
+ }
- fresh_children = local->fresh_children;
- fresh_children[local->success_count] = child_index;
- local->success_count++;
- }
+ afr_mark_new_entry_changelog(frame, this);
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- afr_set_read_ctx_from_policy (this, inode,
- local->fresh_children,
- local->read_child_index,
- priv->read_child);
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return;
}
+/* {{{ create */
-int32_t
-afr_mknod_wind (call_frame_t *frame, xlator_t *this)
+int
+afr_create_unwind(call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
- local = frame->local;
- priv = this->private;
+ local = frame->local;
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_mknod_wind_cbk, (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->mknod,
- &local->loc, local->cont.mknod.mode,
- local->cont.mknod.dev,
- local->umask,
- local->xdata_req);
- if (!--call_count)
- break;
- }
- }
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
-}
+ AFR_STACK_UNWIND(create, main_frame, local->op_ret, local->op_errno,
+ local->cont.create.fd, local->inode,
+ &local->cont.dir_fop.buf, &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent, local->xdata_rsp);
+ return 0;
+}
int
-afr_mknod_done (call_frame_t *frame, xlator_t *this)
+afr_create_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t * local = NULL;
-
- local = frame->local;
-
- local->transaction.unwind (frame, this);
- AFR_STACK_DESTROY (frame);
-
- return 0;
+ return __afr_dir_write_cbk(frame, cookie, this, op_ret, op_errno, buf,
+ preparent, postparent, NULL, NULL, xdata);
}
-
int
-afr_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dev_t dev, mode_t umask, dict_t *params)
+afr_create_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- QUORUM_CHECK(mknod,out);
-
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- op_errno = ENOMEM;
- goto out;
- }
-
- AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
- local = transaction_frame->local;
-
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
-
- loc_copy (&local->loc, loc);
-
- LOCK (&priv->read_child_lock);
- {
- local->read_child_index = (++priv->read_child_rr)
- % (priv->child_count);
- }
- UNLOCK (&priv->read_child_lock);
-
- local->cont.mknod.mode = mode;
- local->cont.mknod.dev = dev;
- local->umask = umask;
- if (params)
- local->xdata_req = dict_ref (params);
-
- local->transaction.fop = afr_mknod_wind;
- local->transaction.done = afr_mknod_done;
- local->transaction.unwind = afr_mknod_unwind;
-
- ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
- &op_errno);
- if (ret)
- goto out;
-
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (loc->path);
-
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
-
- ret = 0;
-out:
- if (ret < 0) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (mknod, frame, -1, op_errno,
- NULL, NULL, NULL, NULL, NULL);
- }
-
- return 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ STACK_WIND_COOKIE(frame, afr_create_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->create, &local->loc,
+ local->cont.create.flags, local->cont.create.mode,
+ local->umask, local->cont.create.fd, local->xdata_req);
+ return 0;
}
-/* }}} */
-
-/* {{{ mkdir */
-
-
int
-afr_mkdir_unwind (call_frame_t *frame, xlator_t *this)
+afr_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
- struct iatt *unwind_buf = NULL;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- if (local->cont.mkdir.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.mkdir.read_child_buf;
- } else {
- unwind_buf = &local->cont.mkdir.buf;
- }
-
- AFR_STACK_UNWIND (mkdir, main_frame,
- local->op_ret, local->op_errno,
- local->cont.mkdir.inode,
- unwind_buf, &local->cont.mkdir.preparent,
- &local->cont.mkdir.postparent,
- NULL);
- }
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
+
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
+
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
+
+ loc_copy(&local->loc, loc);
+
+ local->fd_ctx = afr_fd_ctx_get(fd, this);
+ if (!local->fd_ctx)
+ goto out;
+
+ local->inode = inode_ref(loc->inode);
+ local->parent = inode_ref(loc->parent);
+
+ local->op = GF_FOP_CREATE;
+ local->cont.create.flags = flags;
+ local->fd_ctx->flags = flags;
+ local->cont.create.mode = mode;
+ local->cont.create.fd = fd_ref(fd);
+ local->umask = umask;
+
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
+
+ if (!local->xdata_req)
+ goto out;
+
+ local->transaction.wind = afr_create_wind;
+ local->transaction.unwind = afr_create_unwind;
+
+ ret = afr_build_parent_loc(&local->transaction.parent_loc, loc, &op_errno);
+ if (ret)
+ goto out;
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME(loc->path);
+ ret = afr_transaction(transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ return 0;
+out:
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- return 0;
+ AFR_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
}
+/* }}} */
+
+/* {{{ mknod */
int
-afr_mkdir_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+afr_mknod_unwind(call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int child_index = -1;
- int32_t *fresh_children = NULL;
-
- local = frame->local;
- priv = this->private;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
-
- if (local->success_count == 0)
- local->cont.mkdir.buf = *buf;
-
- if (child_index == local->read_child_index) {
- local->cont.mkdir.read_child_buf = *buf;
- local->cont.mkdir.preparent = *preparent;
- local->cont.mkdir.postparent = *postparent;
- }
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
- local->cont.mkdir.inode = inode;
-
- fresh_children = local->fresh_children;
- fresh_children[local->success_count] = child_index;
- local->success_count++;
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- afr_set_read_ctx_from_policy (this, inode,
- local->fresh_children,
- local->read_child_index,
- priv->read_child);
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
+ local = frame->local;
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
-}
+ AFR_STACK_UNWIND(mknod, main_frame, local->op_ret, local->op_errno,
+ local->inode, &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent, local->xdata_rsp);
+ return 0;
+}
int
-afr_mkdir_wind (call_frame_t *frame, xlator_t *this)
+afr_mknod_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_mkdir_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->mkdir,
- &local->loc, local->cont.mkdir.mode,
- local->umask,
- local->xdata_req);
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+ return __afr_dir_write_cbk(frame, cookie, this, op_ret, op_errno, buf,
+ preparent, postparent, NULL, NULL, xdata);
}
-
int
-afr_mkdir_done (call_frame_t *frame, xlator_t *this)
+afr_mknod_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t * local = NULL;
-
- local = frame->local;
-
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
-
- return 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ STACK_WIND_COOKIE(frame, afr_mknod_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->mknod, &local->loc,
+ local->cont.mknod.mode, local->cont.mknod.dev,
+ local->umask, local->xdata_req);
+ return 0;
}
-
int
-afr_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, mode_t umask, dict_t *params)
+afr_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t dev, mode_t umask, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- QUORUM_CHECK(mkdir,out);
-
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- op_errno = ENOMEM;
- goto out;
- }
-
- AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
- local = transaction_frame->local;
-
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
-
- loc_copy (&local->loc, loc);
-
- LOCK (&priv->read_child_lock);
- {
- local->read_child_index = (++priv->read_child_rr)
- % (priv->child_count);
- }
- UNLOCK (&priv->read_child_lock);
-
- local->cont.mkdir.mode = mode;
- local->umask = umask;
- if (params)
- local->xdata_req = dict_ref (params);
-
- local->transaction.fop = afr_mkdir_wind;
- local->transaction.done = afr_mkdir_done;
- local->transaction.unwind = afr_mkdir_unwind;
-
- ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
- &op_errno);
- if (ret)
- goto out;
-
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (loc->path);
-
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
-
- ret = 0;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
+
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
+
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
+
+ loc_copy(&local->loc, loc);
+ local->inode = inode_ref(loc->inode);
+ local->parent = inode_ref(loc->parent);
+
+ local->op = GF_FOP_MKNOD;
+ local->cont.mknod.mode = mode;
+ local->cont.mknod.dev = dev;
+ local->umask = umask;
+
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
+
+ if (!local->xdata_req)
+ goto out;
+
+ local->transaction.wind = afr_mknod_wind;
+ local->transaction.unwind = afr_mknod_unwind;
+
+ ret = afr_build_parent_loc(&local->transaction.parent_loc, loc, &op_errno);
+ if (ret)
+ goto out;
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME(loc->path);
+ ret = afr_transaction(transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ return 0;
out:
- if (ret < 0) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
-
- AFR_STACK_UNWIND (mkdir, frame, -1, op_errno,
- NULL, NULL, NULL, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- return 0;
+ AFR_STACK_UNWIND(mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
+ return 0;
}
/* }}} */
-/* {{{ link */
-
+/* {{{ mkdir */
int
-afr_link_unwind (call_frame_t *frame, xlator_t *this)
+afr_mkdir_unwind(call_frame_t *frame, xlator_t *this)
{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
- struct iatt *unwind_buf = NULL;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- if (local->cont.link.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.link.read_child_buf;
- } else {
- unwind_buf = &local->cont.link.buf;
- }
-
- AFR_STACK_UNWIND (link, main_frame,
- local->op_ret, local->op_errno,
- local->cont.link.inode,
- unwind_buf, &local->cont.link.preparent,
- &local->cont.link.postparent,
- NULL);
- }
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
+ local = frame->local;
+
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
-}
+ AFR_STACK_UNWIND(mkdir, main_frame, local->op_ret, local->op_errno,
+ local->inode, &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent, local->xdata_rsp);
+ return 0;
+}
int
-afr_link_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+afr_mkdir_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
struct iatt *postparent, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int child_index = -1;
- int32_t *fresh_children = NULL;
-
- local = frame->local;
- priv = this->private;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
-
- if (local->success_count == 0) {
- local->cont.link.buf = *buf;
- }
-
- if (child_index == local->read_child_index) {
- local->cont.link.read_child_buf = *buf;
- local->cont.link.preparent = *preparent;
- local->cont.link.postparent = *postparent;
- }
-
- fresh_children = local->fresh_children;
- fresh_children[local->success_count] = child_index;
- local->success_count++;
- }
-
- local->cont.link.inode = inode;
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
+ return __afr_dir_write_cbk(frame, cookie, this, op_ret, op_errno, buf,
+ preparent, postparent, NULL, NULL, xdata);
+}
- if (call_count == 0) {
- afr_set_read_ctx_from_policy (this, inode,
- local->fresh_children,
- local->read_child_index,
- priv->read_child);
- local->transaction.unwind (frame, this);
+int
+afr_mkdir_wind(call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- local->transaction.resume (frame, this);
- }
+ local = frame->local;
+ priv = this->private;
- return 0;
+ STACK_WIND_COOKIE(frame, afr_mkdir_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->mkdir, &local->loc,
+ local->cont.mkdir.mode, local->umask, local->xdata_req);
+ return 0;
}
-
int
-afr_link_wind (call_frame_t *frame, xlator_t *this)
+afr_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
+
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
+
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
+
+ loc_copy(&local->loc, loc);
+ local->inode = inode_ref(loc->inode);
+ local->parent = inode_ref(loc->parent);
+
+ local->cont.mkdir.mode = mode;
+ local->umask = umask;
+
+ if (!xdata || !dict_get_sizen(xdata, "gfid-req")) {
+ op_errno = EPERM;
+ gf_msg_callingfn(this->name, GF_LOG_WARNING, op_errno,
+ AFR_MSG_GFID_NULL,
+ "mkdir: %s is received "
+ "without gfid-req %p",
+ loc->path, xdata);
+ goto out;
+ }
+
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ if (!local->xdata_req) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ local->op = GF_FOP_MKDIR;
+ local->transaction.wind = afr_mkdir_wind;
+ local->transaction.unwind = afr_mkdir_unwind;
+
+ ret = afr_build_parent_loc(&local->transaction.parent_loc, loc, &op_errno);
+ if (ret)
+ goto out;
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME(loc->path);
+ ret = afr_transaction(transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ return 0;
+out:
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
+ AFR_STACK_UNWIND(mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
+ return 0;
+}
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
+/* }}} */
- local->call_count = call_count;
+/* {{{ link */
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_link_wind_cbk, (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->link,
- &local->loc,
- &local->newloc, local->xdata_req);
+int
+afr_link_unwind(call_frame_t *frame, xlator_t *this)
+{
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
- if (!--call_count)
- break;
- }
- }
+ local = frame->local;
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
-}
+ AFR_STACK_UNWIND(link, main_frame, local->op_ret, local->op_errno,
+ local->inode, &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent, local->xdata_rsp);
+ return 0;
+}
int
-afr_link_done (call_frame_t *frame, xlator_t *this)
+afr_link_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t * local = frame->local;
+ return __afr_dir_write_cbk(frame, cookie, this, op_ret, op_errno, buf,
+ preparent, postparent, NULL, NULL, xdata);
+}
- local->transaction.unwind (frame, this);
+int
+afr_link_wind(call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- AFR_STACK_DESTROY (frame);
+ local = frame->local;
+ priv = this->private;
- return 0;
+ STACK_WIND_COOKIE(frame, afr_link_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->link, &local->loc,
+ &local->newloc, local->xdata_req);
+ return 0;
}
-
int
-afr_link (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+afr_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- priv = this->private;
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
- QUORUM_CHECK(link,out);
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- op_errno = ENOMEM;
- goto out;
- }
+ loc_copy(&local->loc, oldloc);
+ loc_copy(&local->newloc, newloc);
- AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
- local = transaction_frame->local;
+ local->inode = inode_ref(oldloc->inode);
+ local->parent = inode_ref(newloc->parent);
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
- loc_copy (&local->loc, oldloc);
- loc_copy (&local->newloc, newloc);
- if (xdata)
- local->xdata_req = dict_ref (xdata);
+ if (!local->xdata_req)
+ goto out;
- LOCK (&priv->read_child_lock);
- {
- local->read_child_index = (++priv->read_child_rr)
- % (priv->child_count);
- }
- UNLOCK (&priv->read_child_lock);
+ local->op = GF_FOP_LINK;
- local->transaction.fop = afr_link_wind;
- local->transaction.done = afr_link_done;
- local->transaction.unwind = afr_link_unwind;
+ local->transaction.wind = afr_link_wind;
+ local->transaction.unwind = afr_link_unwind;
- ret = afr_build_parent_loc (&local->transaction.parent_loc, newloc,
- &op_errno);
- if (ret)
- goto out;
+ ret = afr_build_parent_loc(&local->transaction.parent_loc, newloc,
+ &op_errno);
+ if (ret)
+ goto out;
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (newloc->path);
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME(newloc->path);
+ ret = afr_transaction(transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
-
- ret = 0;
+ return 0;
out:
- if (ret < 0) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (link, frame, -1, op_errno,
- NULL, NULL, NULL, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- return 0;
+ AFR_STACK_UNWIND(link, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
+ return 0;
}
/* }}} */
/* {{{ symlink */
-
-int
-afr_symlink_unwind (call_frame_t *frame, xlator_t *this)
-{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
- struct iatt *unwind_buf = NULL;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- if (local->cont.symlink.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.symlink.read_child_buf;
- } else {
- unwind_buf = &local->cont.symlink.buf;
- }
-
- AFR_STACK_UNWIND (symlink, main_frame,
- local->op_ret, local->op_errno,
- local->cont.symlink.inode,
- unwind_buf, &local->cont.symlink.preparent,
- &local->cont.symlink.postparent,
- NULL);
- }
-
- return 0;
-}
-
-
int
-afr_symlink_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+afr_symlink_unwind(call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int child_index = -1;
- int32_t *fresh_children = NULL;
-
- local = frame->local;
- priv = this->private;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
-
- if (local->success_count == 0)
- local->cont.symlink.buf = *buf;
-
- if (child_index == local->read_child_index) {
- local->cont.symlink.read_child_buf = *buf;
- local->cont.symlink.preparent = *preparent;
- local->cont.symlink.postparent = *postparent;
- }
-
- local->cont.symlink.inode = inode;
-
- fresh_children = local->fresh_children;
- fresh_children[local->success_count] = child_index;
- local->success_count++;
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- afr_set_read_ctx_from_policy (this, inode,
- local->fresh_children,
- local->read_child_index,
- priv->read_child);
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
+ local = frame->local;
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
-}
+ AFR_STACK_UNWIND(symlink, main_frame, local->op_ret, local->op_errno,
+ local->inode, &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent, local->xdata_rsp);
+ return 0;
+}
int
-afr_symlink_wind (call_frame_t *frame, xlator_t *this)
+afr_symlink_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_symlink_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->symlink,
- local->cont.symlink.linkpath,
- &local->loc,
- local->umask,
- local->xdata_req);
-
- if (!--call_count)
- break;
-
- }
- }
-
- return 0;
+ return __afr_dir_write_cbk(frame, cookie, this, op_ret, op_errno, buf,
+ preparent, postparent, NULL, NULL, xdata);
}
-
int
-afr_symlink_done (call_frame_t *frame, xlator_t *this)
+afr_symlink_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t * local = frame->local;
-
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
-
- return 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ STACK_WIND_COOKIE(frame, afr_symlink_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->symlink,
+ local->cont.symlink.linkpath, &local->loc, local->umask,
+ local->xdata_req);
+ return 0;
}
-
int
-afr_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkpath, loc_t *loc, mode_t umask, dict_t *params)
+afr_symlink(call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- QUORUM_CHECK(symlink,out);
-
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- op_errno = ENOMEM;
- goto out;
- }
-
- AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
- local = transaction_frame->local;
-
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
-
- loc_copy (&local->loc, loc);
-
- LOCK (&priv->read_child_lock);
- {
- local->read_child_index = (++priv->read_child_rr)
- % (priv->child_count);
- }
- UNLOCK (&priv->read_child_lock);
-
- local->cont.symlink.linkpath = gf_strdup (linkpath);
- local->umask = umask;
- if (params)
- local->xdata_req = dict_ref (params);
-
- local->transaction.fop = afr_symlink_wind;
- local->transaction.done = afr_symlink_done;
- local->transaction.unwind = afr_symlink_unwind;
-
- ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
- &op_errno);
- if (ret)
- goto out;
-
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (loc->path);
-
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
-
- ret = 0;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
+
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
+
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
+
+ loc_copy(&local->loc, loc);
+ local->inode = inode_ref(loc->inode);
+ local->parent = inode_ref(loc->parent);
+
+ local->cont.symlink.linkpath = gf_strdup(linkpath);
+ local->umask = umask;
+
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
+
+ if (!local->xdata_req)
+ goto out;
+
+ local->op = GF_FOP_SYMLINK;
+ local->transaction.wind = afr_symlink_wind;
+ local->transaction.unwind = afr_symlink_unwind;
+
+ ret = afr_build_parent_loc(&local->transaction.parent_loc, loc, &op_errno);
+ if (ret)
+ goto out;
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME(loc->path);
+ ret = afr_transaction(transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ return 0;
out:
- if (ret < 0) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (symlink, frame, -1, op_errno,
- NULL, NULL, NULL, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- return 0;
+ AFR_STACK_UNWIND(symlink, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
}
/* }}} */
@@ -1245,217 +939,118 @@ out:
/* {{{ rename */
int
-afr_rename_unwind (call_frame_t *frame, xlator_t *this)
+afr_rename_unwind(call_frame_t *frame, xlator_t *this)
{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
- struct iatt *unwind_buf = NULL;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- if (local->cont.rename.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.rename.read_child_buf;
- } else {
- unwind_buf = &local->cont.rename.buf;
- }
-
- AFR_STACK_UNWIND (rename, main_frame,
- local->op_ret, local->op_errno,
- unwind_buf,
- &local->cont.rename.preoldparent,
- &local->cont.rename.postoldparent,
- &local->cont.rename.prenewparent,
- &local->cont.rename.postnewparent,
- NULL);
- }
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
-}
+ AFR_STACK_UNWIND(rename, main_frame, local->op_ret, local->op_errno,
+ &local->cont.dir_fop.buf, &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
+ &local->cont.dir_fop.prenewparent,
+ &local->cont.dir_fop.postnewparent, local->xdata_rsp);
+ return 0;
+}
int
-afr_rename_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent,
- dict_t *xdata)
+afr_rename_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
- afr_local_t * local = NULL;
- int call_count = -1;
- int child_index = -1;
-
- local = frame->local;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno) && op_errno != ENOTEMPTY)
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
-
- if (buf) {
- local->cont.rename.buf = *buf;
- }
-
- local->success_count++;
- }
-
- if (child_index == local->read_child_index) {
- local->cont.rename.read_child_buf = *buf;
-
- local->cont.rename.preoldparent = *preoldparent;
- local->cont.rename.postoldparent = *postoldparent;
- local->cont.rename.prenewparent = *prenewparent;
- local->cont.rename.postnewparent = *postnewparent;
- }
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return __afr_dir_write_cbk(frame, cookie, this, op_ret, op_errno, buf,
+ preoldparent, postoldparent, prenewparent,
+ postnewparent, xdata);
}
-
-int32_t
-afr_rename_wind (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_rename_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->rename,
- &local->loc,
- &local->newloc, NULL);
- if (!--call_count)
- break;
- }
- }
-
- return 0;
-}
-
-
int
-afr_rename_done (call_frame_t *frame, xlator_t *this)
+afr_rename_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t * local = frame->local;
-
- local->transaction.unwind (frame, this);
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- AFR_STACK_DESTROY (frame);
+ local = frame->local;
+ priv = this->private;
- return 0;
+ STACK_WIND_COOKIE(frame, afr_rename_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->rename, &local->loc,
+ &local->newloc, local->xdata_req);
+ return 0;
}
-
int
-afr_rename (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+afr_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- QUORUM_CHECK(rename,out);
-
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- op_errno = ENOMEM;
- goto out;
- }
-
- AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
- local = transaction_frame->local;
-
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
-
- loc_copy (&local->loc, oldloc);
- loc_copy (&local->newloc, newloc);
-
- local->read_child_index = afr_inode_get_read_ctx (this, oldloc->inode, NULL);
-
- local->transaction.fop = afr_rename_wind;
- local->transaction.done = afr_rename_done;
- local->transaction.unwind = afr_rename_unwind;
-
- ret = afr_build_parent_loc (&local->transaction.parent_loc, oldloc,
- &op_errno);
- if (ret)
- goto out;
- ret = afr_build_parent_loc (&local->transaction.new_parent_loc, newloc,
- &op_errno);
- if (ret)
- goto out;
-
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (oldloc->path);
- local->transaction.new_basename = AFR_BASENAME (newloc->path);
-
- afr_transaction (transaction_frame, this, AFR_ENTRY_RENAME_TRANSACTION);
-
- ret = 0;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
+
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
+
+ loc_copy(&local->loc, oldloc);
+ loc_copy(&local->newloc, newloc);
+
+ local->inode = inode_ref(oldloc->inode);
+ local->parent = inode_ref(oldloc->parent);
+ local->parent2 = inode_ref(newloc->parent);
+
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
+
+ if (!local->xdata_req)
+ goto out;
+
+ local->op = GF_FOP_RENAME;
+ local->transaction.wind = afr_rename_wind;
+ local->transaction.unwind = afr_rename_unwind;
+
+ ret = afr_build_parent_loc(&local->transaction.parent_loc, oldloc,
+ &op_errno);
+ if (ret)
+ goto out;
+ ret = afr_build_parent_loc(&local->transaction.new_parent_loc, newloc,
+ &op_errno);
+ if (ret)
+ goto out;
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME(oldloc->path);
+ local->transaction.new_basename = AFR_BASENAME(newloc->path);
+ ret = afr_transaction(transaction_frame, this,
+ AFR_ENTRY_RENAME_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ return 0;
out:
- if (ret < 0) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
-
- AFR_STACK_UNWIND (rename, frame, -1, op_errno,
- NULL, NULL, NULL, NULL, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- return 0;
+ AFR_STACK_UNWIND(rename, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
}
/* }}} */
@@ -1463,395 +1058,205 @@ out:
/* {{{ unlink */
int
-afr_unlink_unwind (call_frame_t *frame, xlator_t *this)
+afr_unlink_unwind(call_frame_t *frame, xlator_t *this)
{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
-
- local = frame->local;
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- AFR_STACK_UNWIND (unlink, main_frame,
- local->op_ret, local->op_errno,
- &local->cont.unlink.preparent,
- &local->cont.unlink.postparent,
- NULL);
- }
+ local = frame->local;
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
-}
-
-
-int
-afr_unlink_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- afr_local_t * local = NULL;
- int call_count = -1;
- int child_index = (long) cookie;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (child_index == local->read_child_index) {
- local->read_child_returned = _gf_true;
- }
-
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.unlink.preparent = *preparent;
- local->cont.unlink.postparent = *postparent;
- }
-
- if (child_index == local->read_child_index) {
- local->cont.unlink.preparent = *preparent;
- local->cont.unlink.postparent = *postparent;
- }
-
- local->success_count++;
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ AFR_STACK_UNWIND(unlink, main_frame, local->op_ret, local->op_errno,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent, local->xdata_rsp);
+ return 0;
}
-
-int32_t
-afr_unlink_wind (call_frame_t *frame, xlator_t *this)
+int
+afr_unlink_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_unlink_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->unlink,
- &local->loc, local->xflag,
- local->xdata_req);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+ return __afr_dir_write_cbk(frame, cookie, this, op_ret, op_errno, NULL,
+ preparent, postparent, NULL, NULL, xdata);
}
-
-int32_t
-afr_unlink_done (call_frame_t *frame, xlator_t *this)
+int
+afr_unlink_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t * local = frame->local;
-
- local->transaction.unwind (frame, this);
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- AFR_STACK_DESTROY (frame);
+ local = frame->local;
+ priv = this->private;
- return 0;
+ STACK_WIND_COOKIE(frame, afr_unlink_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->unlink, &local->loc,
+ local->xflag, local->xdata_req);
+ return 0;
}
-
-int32_t
-afr_unlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int xflag, dict_t *xdata)
+int
+afr_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- QUORUM_CHECK(unlink,out);
-
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- op_errno = ENOMEM;
- goto out;
- }
-
- AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
- local = transaction_frame->local;
-
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
-
- loc_copy (&local->loc, loc);
- local->xflag = xflag;
- if (xdata)
- local->xdata_req = dict_ref (xdata);
-
- local->transaction.fop = afr_unlink_wind;
- local->transaction.done = afr_unlink_done;
- local->transaction.unwind = afr_unlink_unwind;
-
- ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
- &op_errno);
- if (ret)
- goto out;
-
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (loc->path);
-
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
-
- ret = 0;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
+
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
+
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
+
+ loc_copy(&local->loc, loc);
+ local->xflag = xflag;
+
+ local->inode = inode_ref(loc->inode);
+ local->parent = inode_ref(loc->parent);
+
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
+
+ if (!local->xdata_req)
+ goto out;
+
+ local->op = GF_FOP_UNLINK;
+ local->transaction.wind = afr_unlink_wind;
+ local->transaction.unwind = afr_unlink_unwind;
+
+ ret = afr_build_parent_loc(&local->transaction.parent_loc, loc, &op_errno);
+ if (ret)
+ goto out;
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME(loc->path);
+ ret = afr_transaction(transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ return 0;
out:
- if (ret < 0) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (unlink, frame, -1, op_errno,
- NULL, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- return 0;
+ AFR_STACK_UNWIND(unlink, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
/* }}} */
/* {{{ rmdir */
-
-
int
-afr_rmdir_unwind (call_frame_t *frame, xlator_t *this)
+afr_rmdir_unwind(call_frame_t *frame, xlator_t *this)
{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
-
- local = frame->local;
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- AFR_STACK_UNWIND (rmdir, main_frame,
- local->op_ret, local->op_errno,
- &local->cont.rmdir.preparent,
- &local->cont.rmdir.postparent,
- NULL);
- }
+ local = frame->local;
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
-}
-
-
-int
-afr_rmdir_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- afr_local_t * local = NULL;
- int call_count = -1;
- int child_index = (long) cookie;
- int read_child = 0;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (child_index == read_child) {
- local->read_child_returned = _gf_true;
- }
-
- if (afr_fop_failed (op_ret, op_errno) && (op_errno != ENOTEMPTY))
- afr_transaction_fop_failed (frame, this, child_index);
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.rmdir.preparent = *preparent;
- local->cont.rmdir.postparent = *postparent;
-
- }
-
- if (child_index == read_child) {
- local->cont.rmdir.preparent = *preparent;
- local->cont.rmdir.postparent = *postparent;
- }
-
- local->success_count++;
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ AFR_STACK_UNWIND(rmdir, main_frame, local->op_ret, local->op_errno,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent, local->xdata_rsp);
+ return 0;
}
-
int
-afr_rmdir_wind (call_frame_t *frame, xlator_t *this)
+afr_rmdir_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_rmdir_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->rmdir,
- &local->loc, local->cont.rmdir.flags,
- NULL);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+ return __afr_dir_write_cbk(frame, cookie, this, op_ret, op_errno, NULL,
+ preparent, postparent, NULL, NULL, xdata);
}
-
int
-afr_rmdir_done (call_frame_t *frame, xlator_t *this)
+afr_rmdir_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t * local = frame->local;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- local->transaction.unwind (frame, this);
+ local = frame->local;
+ priv = this->private;
- AFR_STACK_DESTROY (frame);
-
- return 0;
+ STACK_WIND_COOKIE(frame, afr_rmdir_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->rmdir, &local->loc,
+ local->cont.rmdir.flags, local->xdata_req);
+ return 0;
}
-
int
-afr_rmdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int flags, dict_t *xdata)
+afr_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- QUORUM_CHECK(rmdir,out);
-
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- op_errno = ENOMEM;
- goto out;
- }
-
- AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
- local = transaction_frame->local;
-
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
-
- local->cont.rmdir.flags = flags;
- loc_copy (&local->loc, loc);
-
- local->transaction.fop = afr_rmdir_wind;
- local->transaction.done = afr_rmdir_done;
- local->transaction.unwind = afr_rmdir_unwind;
-
- ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
- &op_errno);
- if (ret)
- goto out;
-
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (loc->path);
-
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
-
- ret = 0;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
+
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
+
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
+
+ loc_copy(&local->loc, loc);
+ local->inode = inode_ref(loc->inode);
+ local->parent = inode_ref(loc->parent);
+
+ local->cont.rmdir.flags = flags;
+
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
+
+ if (!local->xdata_req)
+ goto out;
+
+ local->op = GF_FOP_RMDIR;
+ local->transaction.wind = afr_rmdir_wind;
+ local->transaction.unwind = afr_rmdir_unwind;
+
+ ret = afr_build_parent_loc(&local->transaction.parent_loc, loc, &op_errno);
+ if (ret)
+ goto out;
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME(loc->path);
+ ret = afr_transaction(transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ return 0;
out:
- if (ret < 0) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (rmdir, frame, -1, op_errno, NULL, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- return 0;
+ AFR_STACK_UNWIND(rmdir, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
/* }}} */
diff --git a/xlators/cluster/afr/src/afr-dir-write.h b/xlators/cluster/afr/src/afr-dir-write.h
index 02f0a3682d9..1d88c3b9b26 100644
--- a/xlators/cluster/afr/src/afr-dir-write.h
+++ b/xlators/cluster/afr/src/afr-dir-write.h
@@ -12,36 +12,35 @@
#define __DIR_WRITE_H__
int32_t
-afr_create (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, mode_t mode,
- mode_t umask, fd_t *fd, dict_t *xdata);
+afr_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata);
int32_t
-afr_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t dev, mode_t umask, dict_t *xdata);
+afr_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t dev, mode_t umask, dict_t *xdata);
int32_t
-afr_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata);
+afr_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata);
int32_t
-afr_unlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int xflag, dict_t *xdata);
+afr_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata);
int32_t
-afr_rmdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int flags, dict_t *xdata);
+afr_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata);
int32_t
-afr_link (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc, dict_t *xdata);
+afr_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata);
int32_t
-afr_rename (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc, dict_t *xdata);
+afr_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata);
int
-afr_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkpath, loc_t *oldloc, mode_t umask, dict_t *params);
+afr_symlink(call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *oldloc, mode_t umask, dict_t *params);
#endif /* __DIR_WRITE_H__ */
diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c
index 40e5abd3a03..c5521704de2 100644
--- a/xlators/cluster/afr/src/afr-inode-read.c
+++ b/xlators/cluster/afr/src/afr-inode-read.c
@@ -8,7 +8,6 @@
cases as published by the Free Software Foundation.
*/
-
#include <libgen.h>
#include <unistd.h>
#include <fnmatch.h>
@@ -16,1351 +15,1880 @@
#include <stdlib.h>
#include <signal.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
+#include <glusterfs/glusterfs.h>
#include "afr.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "list.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
+#include <glusterfs/dict.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/list.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/quota-common-utils.h>
+
+#include "afr-transaction.h"
+#include "afr-messages.h"
-/**
- * Common algorithm for inode read calls:
- *
- * - Try the fop on the first child that is up
- * - if we have failed due to ENOTCONN:
- * try the next child
- *
- * Applicable to: access, stat, fstat, readlink, getxattr
- */
+/*
+ * Quota size xattrs are not maintained by afr. There is a
+ * possibility that they differ even when both the directory changelog xattrs
+ * suggest everything is fine. So if there is at least one 'source' check among
+ * the sources which has the maximum quota size. Otherwise check among all the
+ * available ones for maximum quota size. This way if there is a source and
+ * stale copies it always votes for the 'source'.
+ * */
+
+int
+afr_handle_quota_size(call_frame_t *frame, xlator_t *this)
+{
+ unsigned char *readable = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ struct afr_reply *replies = NULL;
+ int i = 0;
+ int ret = 0;
+ quota_meta_t size = {
+ 0,
+ };
+ quota_meta_t max_size = {
+ 0,
+ };
+ int readable_cnt = 0;
+ int read_subvol = -1;
+
+ local = frame->local;
+ priv = this->private;
+ replies = local->replies;
+
+ readable = alloca0(priv->child_count);
+
+ afr_inode_read_subvol_get(local->inode, this, readable, 0, 0);
+
+ readable_cnt = AFR_COUNT(readable, priv->child_count);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret == -1)
+ continue;
+ if (readable_cnt && !readable[i])
+ continue;
+ if (!replies[i].xdata)
+ continue;
+ ret = quota_dict_get_meta(replies[i].xdata, QUOTA_SIZE_KEY,
+ SLEN(QUOTA_SIZE_KEY), &size);
+ if (ret == -1)
+ continue;
+ if (read_subvol == -1)
+ read_subvol = i;
+ if (size.size > max_size.size ||
+ (size.file_count + size.dir_count) >
+ (max_size.file_count + max_size.dir_count))
+ read_subvol = i;
+
+ if (size.size > max_size.size)
+ max_size.size = size.size;
+ if (size.file_count > max_size.file_count)
+ max_size.file_count = size.file_count;
+ if (size.dir_count > max_size.dir_count)
+ max_size.dir_count = size.dir_count;
+ }
+
+ if (max_size.size == 0 && max_size.file_count == 0 &&
+ max_size.dir_count == 0)
+ return read_subvol;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret == -1)
+ continue;
+ if (readable_cnt && !readable[i])
+ continue;
+ if (!replies[i].xdata)
+ continue;
+ quota_dict_set_meta(replies[i].xdata, QUOTA_SIZE_KEY, &max_size,
+ IA_IFDIR);
+ }
+
+ return read_subvol;
+}
/* {{{ access */
-int32_t
-afr_access_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
+int
+afr_access_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
- int unwind = 1;
- int32_t *last_index = NULL;
- int32_t next_call_child = -1;
- int32_t read_child = -1;
- int32_t *fresh_children = NULL;
-
- priv = this->private;
- children = priv->children;
-
- local = frame->local;
+ afr_local_t *local = NULL;
- read_child = (long) cookie;
-
- if (op_ret == -1) {
- last_index = &local->cont.access.last_index;
- fresh_children = local->fresh_children;
- next_call_child = afr_next_call_child (fresh_children,
- local->child_up,
- priv->child_count,
- last_index, read_child);
- if (next_call_child < 0)
- goto out;
-
- unwind = 0;
-
- STACK_WIND_COOKIE (frame, afr_access_cbk,
- (void *) (long) read_child,
- children[next_call_child],
- children[next_call_child]->fops->access,
- &local->loc, local->cont.access.mask,
- NULL);
- }
+ local = frame->local;
-out:
- if (unwind) {
- AFR_STACK_UNWIND (access, frame, op_ret, op_errno, xdata);
- }
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ afr_read_txn_continue(frame, this, (long)cookie);
return 0;
-}
-
+ }
-int32_t
-afr_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
- dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- xlator_t **children = NULL;
- int call_child = 0;
- afr_local_t *local = NULL;
- int32_t op_errno = 0;
- int32_t read_child = -1;
- int ret = -1;
+ AFR_STACK_UNWIND(access, frame, op_ret, op_errno, xdata);
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ return 0;
+}
- priv = this->private;
- VALIDATE_OR_GOTO (priv->children, out);
+int
+afr_access_wind(call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
- children = priv->children;
+ priv = this->private;
+ local = frame->local;
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
+ if (subvol == -1) {
+ AFR_STACK_UNWIND(access, frame, local->op_ret, local->op_errno, 0);
+ return 0;
+ }
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ STACK_WIND_COOKIE(frame, afr_access_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->access, &local->loc,
+ local->cont.access.mask, local->xdata_req);
+ return 0;
+}
- local->fresh_children = afr_children_create (priv->child_count);
- if (!local->fresh_children) {
- op_errno = ENOMEM;
- goto out;
- }
+int
+afr_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int mask,
+ dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int op_errno = 0;
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
- read_child = afr_inode_get_read_ctx (this, loc->inode,
- local->fresh_children);
- ret = afr_get_call_child (this, local->child_up, read_child,
- local->fresh_children,
- &call_child,
- &local->cont.access.last_index);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ local->op = GF_FOP_ACCESS;
+ loc_copy(&local->loc, loc);
+ local->cont.access.mask = mask;
+ if (xdata)
+ local->xdata_req = dict_ref(xdata);
- loc_copy (&local->loc, loc);
- local->cont.access.mask = mask;
+ afr_read_txn(frame, this, loc->inode, afr_access_wind,
+ AFR_METADATA_TRANSACTION);
- STACK_WIND_COOKIE (frame, afr_access_cbk,
- (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->access,
- loc, mask, xdata);
-
- ret = 0;
+ return 0;
out:
- if (ret < 0)
- AFR_STACK_UNWIND (access, frame, -1, op_errno, NULL);
- return 0;
-}
+ AFR_STACK_UNWIND(access, frame, -1, op_errno, NULL);
+ return 0;
+}
/* }}} */
/* {{{ stat */
-int32_t
-afr_stat_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct iatt *buf, dict_t *xdata)
+int
+afr_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
- int unwind = 1;
- int32_t *last_index = NULL;
- int32_t next_call_child = -1;
- int32_t read_child = -1;
- int32_t *fresh_children = NULL;
+ afr_local_t *local = NULL;
- priv = this->private;
- children = priv->children;
+ local = frame->local;
- read_child = (long) cookie;
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
- local = frame->local;
-
- if (op_ret == -1) {
- last_index = &local->cont.stat.last_index;
- fresh_children = local->fresh_children;
- next_call_child = afr_next_call_child (fresh_children,
- local->child_up,
- priv->child_count,
- last_index, read_child);
- if (next_call_child < 0)
- goto out;
-
- unwind = 0;
-
- STACK_WIND_COOKIE (frame, afr_stat_cbk,
- (void *) (long) read_child,
- children[next_call_child],
- children[next_call_child]->fops->stat,
- &local->loc, NULL);
- }
+ afr_read_txn_continue(frame, this, (long)cookie);
+ return 0;
+ }
-out:
- if (unwind) {
- AFR_STACK_UNWIND (stat, frame, op_ret, op_errno, buf, xdata);
- }
+ AFR_STACK_UNWIND(stat, frame, op_ret, op_errno, buf, xdata);
- return 0;
+ return 0;
}
-
-int32_t
-afr_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+int
+afr_stat_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- xlator_t **children = NULL;
- int call_child = 0;
- int32_t op_errno = 0;
- int32_t read_child = -1;
- int ret = -1;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ priv = this->private;
+ local = frame->local;
- priv = this->private;
- VALIDATE_OR_GOTO (priv->children, out);
-
- children = priv->children;
+ if (subvol == -1) {
+ AFR_STACK_UNWIND(stat, frame, local->op_ret, local->op_errno, 0, 0);
+ return 0;
+ }
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
+ STACK_WIND_COOKIE(
+ frame, afr_stat_cbk, (void *)(long)subvol, priv->children[subvol],
+ priv->children[subvol]->fops->stat, &local->loc, local->xdata_req);
+ return 0;
+}
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+int
+afr_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int op_errno = 0;
- local->fresh_children = afr_children_create (priv->child_count);
- if (!local->fresh_children) {
- op_errno = ENOMEM;
- goto out;
- }
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
- read_child = afr_inode_get_read_ctx (this, loc->inode,
- local->fresh_children);
- ret = afr_get_call_child (this, local->child_up, read_child,
- local->fresh_children,
- &call_child,
- &local->cont.stat.last_index);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
- loc_copy (&local->loc, loc);
+ local->op = GF_FOP_STAT;
+ loc_copy(&local->loc, loc);
+ if (xdata)
+ local->xdata_req = dict_ref(xdata);
- STACK_WIND_COOKIE (frame, afr_stat_cbk, (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->stat,
- loc, xdata);
+ afr_read_txn(frame, this, loc->inode, afr_stat_wind, AFR_DATA_TRANSACTION);
- ret = 0;
+ return 0;
out:
- if (ret < 0)
- AFR_STACK_UNWIND (stat, frame, -1, op_errno, NULL, NULL);
+ AFR_STACK_UNWIND(stat, frame, -1, op_errno, NULL, NULL);
- return 0;
+ return 0;
}
-
/* }}} */
/* {{{ fstat */
-int32_t
-afr_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- dict_t *xdata)
+int
+afr_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- xlator_t **children = NULL;
- int unwind = 1;
- int32_t *last_index = NULL;
- int32_t next_call_child = -1;
- int32_t read_child = -1;
- int32_t *fresh_children = NULL;
-
- priv = this->private;
- children = priv->children;
-
- local = frame->local;
+ afr_local_t *local = NULL;
- read_child = (long) cookie;
-
- if (op_ret == -1) {
- last_index = &local->cont.fstat.last_index;
- fresh_children = local->fresh_children;
- next_call_child = afr_next_call_child (fresh_children,
- local->child_up,
- priv->child_count,
- last_index, read_child);
- if (next_call_child < 0)
- goto out;
-
- unwind = 0;
-
- STACK_WIND_COOKIE (frame, afr_fstat_cbk,
- (void *) (long) read_child,
- children[next_call_child],
- children[next_call_child]->fops->fstat,
- local->fd, NULL);
- }
+ local = frame->local;
-out:
- if (unwind) {
- AFR_STACK_UNWIND (fstat, frame, op_ret, op_errno, buf, xdata);
- }
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ afr_read_txn_continue(frame, this, (long)cookie);
return 0;
-}
-
-
-int32_t
-afr_fstat (call_frame_t *frame, xlator_t *this,
- fd_t *fd, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- xlator_t **children = NULL;
- int call_child = 0;
- int32_t op_errno = 0;
- int32_t read_child = 0;
- int ret = -1;
+ }
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (fd, out);
- VALIDATE_OR_GOTO (this->private, out);
+ AFR_STACK_UNWIND(fstat, frame, op_ret, op_errno, buf, xdata);
- priv = this->private;
- VALIDATE_OR_GOTO (priv->children, out);
-
- children = priv->children;
-
- VALIDATE_OR_GOTO (fd->inode, out);
+ return 0;
+}
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
+int
+afr_fstat_wind(call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ priv = this->private;
+ local = frame->local;
- local->fresh_children = afr_children_create (priv->child_count);
- if (!local->fresh_children) {
- op_errno = ENOMEM;
- goto out;
- }
+ if (subvol == -1) {
+ AFR_STACK_UNWIND(fstat, frame, local->op_ret, local->op_errno, 0, 0);
+ return 0;
+ }
- read_child = afr_inode_get_read_ctx (this, fd->inode,
- local->fresh_children);
+ STACK_WIND_COOKIE(
+ frame, afr_fstat_cbk, (void *)(long)subvol, priv->children[subvol],
+ priv->children[subvol]->fops->fstat, local->fd, local->xdata_req);
+ return 0;
+}
+int32_t
+afr_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int op_errno = 0;
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
- ret = afr_get_call_child (this, local->child_up, read_child,
- local->fresh_children,
- &call_child,
- &local->cont.fstat.last_index);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ local->op = GF_FOP_FSTAT;
+ local->fd = fd_ref(fd);
+ if (xdata)
+ local->xdata_req = dict_ref(xdata);
- local->fd = fd_ref (fd);
+ afr_fix_open(fd, this);
- ret = afr_open_fd_fix (frame, this, _gf_false);
- if (ret) {
- op_errno = -ret;
- goto out;
- }
- STACK_WIND_COOKIE (frame, afr_fstat_cbk, (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->fstat,
- fd, xdata);
+ afr_read_txn(frame, this, fd->inode, afr_fstat_wind, AFR_DATA_TRANSACTION);
- ret = 0;
+ return 0;
out:
- if (ret < 0)
- AFR_STACK_UNWIND (fstat, frame, -1, op_errno, NULL, NULL);
+ AFR_STACK_UNWIND(fstat, frame, -1, op_errno, NULL, NULL);
- return 0;
+ return 0;
}
/* }}} */
/* {{{ readlink */
-int32_t
-afr_readlink_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- const char *buf, struct iatt *sbuf, dict_t *xdata)
+int
+afr_readlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, const char *buf,
+ struct iatt *sbuf, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
- int unwind = 1;
- int32_t *last_index = NULL;
- int32_t next_call_child = -1;
- int32_t read_child = -1;
- int32_t *fresh_children = NULL;
-
- priv = this->private;
- children = priv->children;
-
- local = frame->local;
+ afr_local_t *local = NULL;
- read_child = (long) cookie;
-
- if (op_ret == -1) {
- last_index = &local->cont.readlink.last_index;
- fresh_children = local->fresh_children;
- next_call_child = afr_next_call_child (fresh_children,
- local->child_up,
- priv->child_count,
- last_index, read_child);
- if (next_call_child < 0)
- goto out;
-
- unwind = 0;
- STACK_WIND_COOKIE (frame, afr_readlink_cbk,
- (void *) (long) read_child,
- children[next_call_child],
- children[next_call_child]->fops->readlink,
- &local->loc,
- local->cont.readlink.size, NULL);
- }
+ local = frame->local;
-out:
- if (unwind) {
- AFR_STACK_UNWIND (readlink, frame, op_ret, op_errno, buf, sbuf,
- xdata);
- }
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ afr_read_txn_continue(frame, this, (long)cookie);
return 0;
-}
+ }
+ AFR_STACK_UNWIND(readlink, frame, op_ret, op_errno, buf, sbuf, xdata);
+ return 0;
+}
-int32_t
-afr_readlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc, size_t size, dict_t *xdata)
+int
+afr_readlink_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_private_t *priv = NULL;
- xlator_t **children = NULL;
- int call_child = 0;
- afr_local_t *local = NULL;
- int32_t op_errno = 0;
- int32_t read_child = -1;
- int ret = -1;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ local = frame->local;
+ priv = this->private;
- priv = this->private;
- VALIDATE_OR_GOTO (priv->children, out);
-
- children = priv->children;
-
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
+ if (subvol == -1) {
+ AFR_STACK_UNWIND(readlink, frame, local->op_ret, local->op_errno, 0, 0,
+ 0);
+ return 0;
+ }
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ STACK_WIND_COOKIE(frame, afr_readlink_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->readlink, &local->loc,
+ local->cont.readlink.size, local->xdata_req);
+ return 0;
+}
- local->fresh_children = afr_children_create (priv->child_count);
- if (!local->fresh_children) {
- op_errno = ENOMEM;
- goto out;
- }
- read_child = afr_inode_get_read_ctx (this, loc->inode,
- local->fresh_children);
- ret = afr_get_call_child (this, local->child_up, read_child,
- local->fresh_children,
- &call_child,
- &local->cont.readlink.last_index);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+int
+afr_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int32_t op_errno = 0;
- loc_copy (&local->loc, loc);
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
- local->cont.readlink.size = size;
+ local->op = GF_FOP_READLINK;
+ loc_copy(&local->loc, loc);
+ local->cont.readlink.size = size;
+ if (xdata)
+ local->xdata_req = dict_ref(xdata);
- STACK_WIND_COOKIE (frame, afr_readlink_cbk,
- (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->readlink,
- loc, size, xdata);
+ afr_read_txn(frame, this, loc->inode, afr_readlink_wind,
+ AFR_DATA_TRANSACTION);
- ret = 0;
+ return 0;
out:
- if (ret < 0)
- AFR_STACK_UNWIND (readlink, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
+ AFR_STACK_UNWIND(readlink, frame, -1, op_errno, 0, 0, 0);
+ return 0;
+}
/* }}} */
/* {{{ getxattr */
struct _xattr_key {
- char *key;
- struct list_head list;
+ char *key;
+ struct list_head list;
};
-
-void
-__gather_xattr_keys (dict_t *dict, char *key, data_t *value,
- void *data)
+int
+__gather_xattr_keys(dict_t *dict, char *key, data_t *value, void *data)
{
- struct list_head * list = data;
- struct _xattr_key * xkey = NULL;
+ struct list_head *list = data;
+ struct _xattr_key *xkey = NULL;
- if (!strncmp (key, AFR_XATTR_PREFIX,
- strlen (AFR_XATTR_PREFIX))) {
+ if (!strncmp(key, AFR_XATTR_PREFIX, SLEN(AFR_XATTR_PREFIX))) {
+ xkey = GF_MALLOC(sizeof(*xkey), gf_afr_mt_xattr_key);
+ if (!xkey)
+ return -1;
- xkey = GF_CALLOC (1, sizeof (*xkey), gf_afr_mt_xattr_key);
- if (!xkey)
- return;
+ xkey->key = key;
+ INIT_LIST_HEAD(&xkey->list);
- xkey->key = key;
- INIT_LIST_HEAD (&xkey->list);
-
- list_add_tail (&xkey->list, list);
- }
+ list_add_tail(&xkey->list, list);
+ }
+ return 0;
}
-
void
-__filter_xattrs (dict_t *dict)
+afr_filter_xattrs(dict_t *dict)
{
- struct list_head keys = {0,};
- struct _xattr_key *key = NULL;
- struct _xattr_key *tmp = NULL;
+ struct list_head keys = {
+ 0,
+ };
+ struct _xattr_key *key = NULL;
+ struct _xattr_key *tmp = NULL;
- INIT_LIST_HEAD (&keys);
+ INIT_LIST_HEAD(&keys);
- dict_foreach (dict, __gather_xattr_keys,
- (void *) &keys);
+ dict_foreach(dict, __gather_xattr_keys, (void *)&keys);
- list_for_each_entry_safe (key, tmp, &keys, list) {
- dict_del (dict, key->key);
+ list_for_each_entry_safe(key, tmp, &keys, list)
+ {
+ dict_del(dict, key->key);
- list_del_init (&key->list);
+ list_del_init(&key->list);
- GF_FREE (key);
- }
+ GF_FREE(key);
+ }
}
+static gf_boolean_t
+afr_getxattr_ignorable_errnos(int32_t op_errno)
+{
+ if (op_errno == ENODATA || op_errno == ENOTSUP || op_errno == ERANGE ||
+ op_errno == ENAMETOOLONG)
+ return _gf_true;
-
-int32_t
-afr_getxattr_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *dict, dict_t *xdata)
+ return _gf_false;
+}
+int
+afr_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
- int unwind = 1;
- int32_t *last_index = NULL;
- int32_t next_call_child = -1;
- int32_t read_child = -1;
- int32_t *fresh_children = NULL;
-
- priv = this->private;
- children = priv->children;
+ afr_local_t *local = NULL;
- local = frame->local;
+ local = frame->local;
- read_child = (long) cookie;
-
- if (op_ret == -1) {
- last_index = &local->cont.getxattr.last_index;
- fresh_children = local->fresh_children;
- next_call_child = afr_next_call_child (fresh_children,
- local->child_up,
- priv->child_count,
- last_index, read_child);
- if (next_call_child < 0)
- goto out;
-
- unwind = 0;
- STACK_WIND_COOKIE (frame, afr_getxattr_cbk,
- (void *) (long) read_child,
- children[next_call_child],
- children[next_call_child]->fops->getxattr,
- &local->loc,
- local->cont.getxattr.name,
- NULL);
- }
+ if (op_ret < 0 && !afr_getxattr_ignorable_errnos(op_errno)) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
-out:
- if (unwind) {
- if (op_ret >= 0 && dict)
- __filter_xattrs (dict);
+ afr_read_txn_continue(frame, this, (long)cookie);
+ return 0;
+ }
- AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, xdata);
- }
+ if (dict)
+ afr_filter_xattrs(dict);
+
+ AFR_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, xdata);
+
+ return 0;
+}
+int
+afr_getxattr_wind(call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (subvol == -1) {
+ AFR_STACK_UNWIND(getxattr, frame, local->op_ret, local->op_errno, NULL,
+ NULL);
return 0;
+ }
+
+ STACK_WIND_COOKIE(frame, afr_getxattr_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->getxattr, &local->loc,
+ local->cont.getxattr.name, local->xdata_req);
+ return 0;
}
int32_t
-afr_getxattr_unwind (call_frame_t *frame, int op_ret, int op_errno,
- dict_t *dict, dict_t *xdata)
+afr_getxattr_unwind(call_frame_t *frame, int op_ret, int op_errno, dict_t *dict,
+ dict_t *xdata)
{
- AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, xdata);
- return 0;
+ AFR_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
}
int32_t
-afr_getxattr_clrlk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *dict, dict_t *xdata)
+afr_fgetxattr_clrlk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- xlator_t **children = NULL;
- dict_t *xattr = NULL;
- char *tmp_report = NULL;
- char lk_summary[1024] = {0,};
- int serz_len = 0;
- int32_t callcnt = 0;
- long int cky = 0;
- int ret = 0;
-
- priv = this->private;
- children = priv->children;
-
- local = frame->local;
- cky = (long) cookie;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret == -1)
- local->child_errno[cky] = op_errno;
-
- if (!local->dict)
- local->dict = dict_new ();
- if (local->dict) {
- ret = dict_get_str (dict, local->cont.getxattr.name,
- &tmp_report);
- if (ret)
- goto unlock;
- ret = dict_set_dynstr (local->dict,
- children[cky]->name,
- gf_strdup (tmp_report));
- if (ret)
- goto unlock;
- }
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ dict_t *xattr = NULL;
+ char *tmp_report = NULL;
+ char lk_summary[1024] = {
+ 0,
+ };
+ int serz_len = 0;
+ int32_t callcnt = 0;
+ long int cky = 0;
+ int ret = 0;
+ int keylen = 0;
+ int children_keylen = 0;
+
+ priv = this->private;
+ children = priv->children;
+
+ local = frame->local;
+ cky = (long)cookie;
+ keylen = strlen(local->cont.getxattr.name);
+ children_keylen = strlen(children[cky]->name);
+
+ LOCK(&frame->lock);
+ {
+ callcnt = --local->call_count;
+ if (op_ret == -1)
+ local->replies[cky].op_errno = op_errno;
+
+ if (!local->dict)
+ local->dict = dict_new();
+ if (local->dict) {
+ ret = dict_get_strn(dict, local->cont.getxattr.name, keylen,
+ &tmp_report);
+ if (ret)
+ goto unlock;
+ ret = dict_set_dynstrn(local->dict, children[cky]->name,
+ children_keylen, gf_strdup(tmp_report));
+ if (ret)
+ goto unlock;
}
+ }
unlock:
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- xattr = dict_new ();
- if (!xattr) {
- op_ret = -1;
- op_errno = ENOMEM;
- goto unwind;
- }
- ret = dict_serialize_value_with_delim (local->dict,
- lk_summary,
- &serz_len, '\n');
- if (ret) {
- op_ret = -1;
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Error serializing dictionary");
- goto unwind;
- }
- if (serz_len == -1)
- snprintf (lk_summary, sizeof (lk_summary),
- "No locks cleared.");
- ret = dict_set_dynstr (xattr, local->cont.getxattr.name,
- gf_strdup (lk_summary));
- if (ret) {
- op_ret = -1;
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Error setting dictionary");
- goto unwind;
- }
+ UNLOCK(&frame->lock);
+
+ if (!callcnt) {
+ xattr = dict_new();
+ if (!xattr) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ ret = dict_serialize_value_with_delim(local->dict, lk_summary,
+ &serz_len, '\n');
+ if (ret) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ if (serz_len == -1)
+ snprintf(lk_summary, sizeof(lk_summary), "No locks cleared.");
+ ret = dict_set_dynstrn(xattr, local->cont.getxattr.name, keylen,
+ gf_strdup(lk_summary));
+ if (ret) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_DICT_SET_FAILED,
+ "Error setting dictionary");
+ goto unwind;
+ }
+
+ op_errno = afr_final_errno(local, priv);
+
+ unwind:
+ AFR_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, xattr, xdata);
+ if (xattr)
+ dict_unref(xattr);
+ }
- unwind:
- // Updating child_errno with more recent 'events'
- local->child_errno[cky] = op_errno;
- op_errno = afr_resultant_errno_get (NULL, local->child_errno,
- priv->child_count);
- AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, xattr, xdata);
+ return ret;
+}
- if (xattr)
- dict_unref (xattr);
+int32_t
+afr_getxattr_clrlk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ dict_t *xattr = NULL;
+ char *tmp_report = NULL;
+ char lk_summary[1024] = {
+ 0,
+ };
+ int serz_len = 0;
+ int32_t callcnt = 0;
+ long int cky = 0;
+ int ret = 0;
+ int keylen = 0;
+ int children_keylen = 0;
+
+ priv = this->private;
+ children = priv->children;
+
+ local = frame->local;
+ cky = (long)cookie;
+
+ keylen = strlen(local->cont.getxattr.name);
+ children_keylen = strlen(children[cky]->name);
+
+ LOCK(&frame->lock);
+ {
+ callcnt = --local->call_count;
+ if (op_ret == -1)
+ local->replies[cky].op_errno = op_errno;
+
+ if (!local->dict)
+ local->dict = dict_new();
+ if (local->dict) {
+ ret = dict_get_strn(dict, local->cont.getxattr.name, keylen,
+ &tmp_report);
+ if (ret)
+ goto unlock;
+ ret = dict_set_dynstrn(local->dict, children[cky]->name,
+ children_keylen, gf_strdup(tmp_report));
+ if (ret)
+ goto unlock;
+ }
+ }
+unlock:
+ UNLOCK(&frame->lock);
+
+ if (!callcnt) {
+ xattr = dict_new();
+ if (!xattr) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ ret = dict_serialize_value_with_delim(local->dict, lk_summary,
+ &serz_len, '\n');
+ if (ret) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ if (serz_len == -1)
+ snprintf(lk_summary, sizeof(lk_summary), "No locks cleared.");
+ ret = dict_set_dynstrn(xattr, local->cont.getxattr.name, keylen,
+ gf_strdup(lk_summary));
+ if (ret) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_DICT_SET_FAILED,
+ "Error setting dictionary");
+ goto unwind;
}
- return ret;
+ op_errno = afr_final_errno(local, priv);
+
+ unwind:
+ AFR_STACK_UNWIND(getxattr, frame, op_ret, op_errno, xattr, xdata);
+
+ if (xattr)
+ dict_unref(xattr);
+ }
+
+ return ret;
}
/**
* node-uuid cbk uses next child querying mechanism
*/
int32_t
-afr_getxattr_node_uuid_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *dict, dict_t *xdata)
+afr_getxattr_node_uuid_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- xlator_t **children = NULL;
- int unwind = 1;
- int curr_call_child = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ xlator_t **children = NULL;
+ int unwind = 1;
+ int curr_call_child = 0;
- priv = this->private;
- children = priv->children;
+ priv = this->private;
+ children = priv->children;
- local = frame->local;
+ local = frame->local;
+
+ if (op_ret == -1) { /** query the _next_ child */
+
+ /**
+ * _current_ becomes _next_
+ * If done with all children and yet no success; give up !
+ */
+ curr_call_child = (int)((long)cookie);
+ if (++curr_call_child == priv->child_count)
+ goto unwind;
+
+ gf_msg_debug(this->name, op_errno,
+ "op_ret (-1): Re-querying afr-child (%d/%d)",
+ curr_call_child, priv->child_count);
+
+ unwind = 0;
+ STACK_WIND_COOKIE(
+ frame, afr_getxattr_node_uuid_cbk, (void *)(long)curr_call_child,
+ children[curr_call_child],
+ children[curr_call_child]->fops->getxattr, &local->loc,
+ local->cont.getxattr.name, local->xdata_req);
+ }
+
+unwind:
+ if (unwind)
+ AFR_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, xdata);
+
+ return 0;
+}
+
+/**
+ * list-node-uuids cbk returns the list of node_uuids for the subvolume.
+ */
+int32_t
+afr_getxattr_list_node_uuids_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int32_t callcnt = 0;
+ int ret = 0;
+ char *xattr_serz = NULL;
+ long cky = 0;
+ int32_t tlen = 0;
+
+ local = frame->local;
+ priv = this->private;
+ cky = (long)cookie;
+
+ LOCK(&frame->lock);
+ {
+ callcnt = --local->call_count;
+ local->replies[cky].valid = 1;
+ local->replies[cky].op_ret = op_ret;
+ local->replies[cky].op_errno = op_errno;
+
+ if (op_ret < 0)
+ goto unlock;
+
+ local->op_ret = 0;
+
+ if (!local->xdata_rsp && xdata)
+ local->xdata_rsp = dict_ref(xdata);
+ local->replies[cky].xattr = dict_ref(dict);
+ }
+
+unlock:
+ UNLOCK(&frame->lock);
- if (op_ret == -1) { /** query the _next_ child */
-
- /**
- * _current_ becomes _next_
- * If done with all childs and yet no success; give up !
- */
- curr_call_child = (int) ((long)cookie);
- if (++curr_call_child == priv->child_count)
- goto unwind;
-
- gf_log (this->name, GF_LOG_WARNING,
- "op_ret (-1): Re-querying afr-child (%d/%d)",
- curr_call_child, priv->child_count);
-
- unwind = 0;
- STACK_WIND_COOKIE (frame, afr_getxattr_node_uuid_cbk,
- (void *) (long) curr_call_child,
- children[curr_call_child],
- children[curr_call_child]->fops->getxattr,
- &local->loc,
- local->cont.getxattr.name,
- NULL);
+ if (!callcnt) {
+ if (local->op_ret != 0) {
+ /* All bricks gave an error. */
+ local->op_errno = afr_final_errno(local, priv);
+ goto unwind;
}
- unwind:
- if (unwind)
- AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, NULL);
+ /*Since we store the UUID0_STR as node uuid for down bricks and
+ *for non zero op_ret, assigning length to priv->child_count
+ *number of uuids*/
+ local->cont.getxattr.xattr_len = (SLEN(UUID0_STR) + 2) *
+ priv->child_count;
+
+ if (!local->dict)
+ local->dict = dict_new();
+ if (!local->dict) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unwind;
+ }
- return 0;
+ xattr_serz = GF_CALLOC(local->cont.getxattr.xattr_len, sizeof(char),
+ gf_common_mt_char);
+
+ if (!xattr_serz) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ ret = afr_serialize_xattrs_with_delimiter(frame, this, xattr_serz,
+ UUID0_STR, &tlen, ' ');
+ if (ret) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ GF_FREE(xattr_serz);
+ goto unwind;
+ }
+ ret = dict_set_dynstr_sizen(local->dict, GF_XATTR_LIST_NODE_UUIDS_KEY,
+ xattr_serz);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Cannot set node_uuid key in dict");
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ if (ret == -EINVAL)
+ GF_FREE(xattr_serz);
+ } else {
+ local->op_ret = local->cont.getxattr.xattr_len - 1;
+ local->op_errno = 0;
+ }
+
+ unwind:
+ AFR_STACK_UNWIND(getxattr, frame, local->op_ret, local->op_errno,
+ local->dict, local->xdata_rsp);
+ }
+
+ return ret;
}
int32_t
-afr_getxattr_pathinfo_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *dict, dict_t *xdata)
+afr_getxattr_quota_size_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
- afr_local_t *local = NULL;
- int32_t callcnt = 0;
- int ret = 0;
- char *xattr = NULL;
- char *xattr_serz = NULL;
- char xattr_cky[1024] = {0,};
- dict_t *nxattr = NULL;
- long cky = 0;
- int32_t padding = 0;
- int32_t tlen = 0;
-
- if (!frame || !frame->local || !this) {
- gf_log ("", GF_LOG_ERROR, "possible NULL deref");
- goto out;
+ int idx = (long)cookie;
+ int call_count = 0;
+ afr_local_t *local = frame->local;
+ int read_subvol = -1;
+
+ local->replies[idx].valid = 1;
+ local->replies[idx].op_ret = op_ret;
+ local->replies[idx].op_errno = op_errno;
+ if (dict)
+ local->replies[idx].xdata = dict_ref(dict);
+ call_count = afr_frame_return(frame);
+ if (call_count == 0) {
+ local->inode = inode_ref(local->loc.inode);
+ read_subvol = afr_handle_quota_size(frame, this);
+ if (read_subvol != -1) {
+ op_ret = local->replies[read_subvol].op_ret;
+ op_errno = local->replies[read_subvol].op_errno;
+ dict = local->replies[read_subvol].xdata;
}
+ AFR_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, xdata);
+ }
+
+ return 0;
+}
+
+int32_t
+afr_getxattr_lockinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ int call_cnt = 0, len = 0;
+ char *lockinfo_buf = NULL;
+ dict_t *lockinfo = NULL, *newdict = NULL;
+ afr_local_t *local = NULL;
+ LOCK(&frame->lock);
+ {
local = frame->local;
- cky = (long) cookie;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (!dict || (op_ret < 0))
- goto out;
-
- if (!local->dict)
- local->dict = dict_new ();
-
- if (local->dict) {
- ret = dict_get_str (dict,
- local->cont.getxattr.name,
- &xattr);
- if (ret)
- goto out;
-
- xattr = gf_strdup (xattr);
-
- (void)snprintf (xattr_cky, 1024, "%s-%ld",
- local->cont.getxattr.name, cky);
- ret = dict_set_dynstr (local->dict,
- xattr_cky, xattr);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "Cannot set xattr cookie key");
- goto out;
- }
-
- local->cont.getxattr.xattr_len += strlen (xattr) + 1;
- }
- }
- out:
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if (!local->cont.getxattr.xattr_len)
- goto unwind;
-
- nxattr = dict_new ();
- if (!nxattr)
- goto unwind;
-
- /* extra bytes for decorations (brackets and <>'s) */
- padding += strlen (this->name) + strlen (AFR_PATHINFO_HEADER) + 4;
- local->cont.getxattr.xattr_len += (padding + 2);
-
- xattr_serz = GF_CALLOC (local->cont.getxattr.xattr_len,
- sizeof (char), gf_common_mt_char);
-
- if (!xattr_serz)
- goto unwind;
-
- /* the xlator info */
- (void) sprintf (xattr_serz, "(<"AFR_PATHINFO_HEADER"%s> ",
- this->name);
-
- /* actual series of pathinfo */
- ret = dict_serialize_value_with_delim (local->dict,
- xattr_serz + strlen (xattr_serz),
- &tlen, ' ');
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Error serializing"
- " dictionary");
- goto unwind;
+
+ call_cnt = --local->call_count;
+
+ if ((op_ret < 0) || (!dict && !xdata)) {
+ goto unlock;
+ }
+
+ if (xdata) {
+ if (!local->xdata_rsp) {
+ local->xdata_rsp = dict_new();
+ if (!local->xdata_rsp) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unlock;
}
+ }
+ }
+
+ if (!dict) {
+ goto unlock;
+ }
- /* closing part */
- *(xattr_serz + padding + tlen) = ')';
- *(xattr_serz + padding + tlen + 1) = '\0';
+ op_ret = dict_get_ptr_and_len(dict, GF_XATTR_LOCKINFO_KEY,
+ (void **)&lockinfo_buf, &len);
- ret = dict_set_dynstr (nxattr, local->cont.getxattr.name,
- xattr_serz);
- if (ret)
- gf_log (this->name, GF_LOG_ERROR, "Cannot set pathinfo"
- " key in dict");
+ if (!lockinfo_buf) {
+ goto unlock;
+ }
+
+ if (!local->dict) {
+ local->dict = dict_new();
+ if (!local->dict) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unlock;
+ }
+ }
+ }
+unlock:
+ UNLOCK(&frame->lock);
+
+ if (lockinfo_buf != NULL) {
+ lockinfo = dict_new();
+ if (lockinfo == NULL) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ } else {
+ op_ret = dict_unserialize(lockinfo_buf, len, &lockinfo);
+
+ if (lockinfo && local->dict) {
+ dict_copy(lockinfo, local->dict);
+ }
+ }
+ }
+
+ if (xdata && local->xdata_rsp) {
+ dict_copy(xdata, local->xdata_rsp);
+ }
+
+ if (!call_cnt) {
+ newdict = dict_new();
+ if (!newdict) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unwind;
+ }
- unwind:
- AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, nxattr,
- xdata);
+ op_ret = dict_allocate_and_serialize(
+ local->dict, (char **)&lockinfo_buf, (unsigned int *)&len);
+ if (op_ret != 0) {
+ local->op_ret = -1;
+ goto unwind;
+ }
- if (nxattr)
- dict_unref (nxattr);
+ op_ret = dict_set_dynptr(newdict, GF_XATTR_LOCKINFO_KEY,
+ (void *)lockinfo_buf, len);
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -op_ret;
+ goto unwind;
}
- return ret;
+ unwind:
+ AFR_STACK_UNWIND(getxattr, frame, op_ret, op_errno, newdict,
+ local->xdata_rsp);
+ }
+
+ dict_unref(lockinfo);
+
+ return 0;
}
-static gf_boolean_t
-afr_is_special_xattr (const char *name, fop_getxattr_cbk_t *cbk)
+int32_t
+afr_fgetxattr_lockinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
- gf_boolean_t is_spl = _gf_true;
+ int call_cnt = 0, len = 0;
+ char *lockinfo_buf = NULL;
+ dict_t *lockinfo = NULL, *newdict = NULL;
+ afr_local_t *local = NULL;
- GF_ASSERT (cbk);
- if (!cbk) {
- is_spl = _gf_false;
- goto out;
+ LOCK(&frame->lock);
+ {
+ local = frame->local;
+
+ call_cnt = --local->call_count;
+
+ if ((op_ret < 0) || (!dict && !xdata)) {
+ goto unlock;
}
- if (!strcmp (name, GF_XATTR_PATHINFO_KEY))
- *cbk = afr_getxattr_pathinfo_cbk;
+ if (xdata) {
+ if (!local->xdata_rsp) {
+ local->xdata_rsp = dict_new();
+ if (!local->xdata_rsp) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unlock;
+ }
+ }
+ }
- else if (!strncmp (name, GF_XATTR_CLRLK_CMD,
- strlen (GF_XATTR_CLRLK_CMD)))
- *cbk = afr_getxattr_clrlk_cbk;
- else
- is_spl = _gf_false;
+ if (!dict) {
+ goto unlock;
+ }
-out:
- return is_spl;
-}
+ op_ret = dict_get_ptr_and_len(dict, GF_XATTR_LOCKINFO_KEY,
+ (void **)&lockinfo_buf, &len);
-static void
-afr_getxattr_frm_all_children (xlator_t *this, call_frame_t *frame,
- const char *name, loc_t *loc,
- fop_getxattr_cbk_t cbk)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- xlator_t **children = NULL;
- int i = 0;
+ if (!lockinfo_buf) {
+ goto unlock;
+ }
- priv = this->private;
- children = priv->children;
+ if (!local->dict) {
+ local->dict = dict_new();
+ if (!local->dict) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unlock;
+ }
+ }
+ }
+unlock:
+ UNLOCK(&frame->lock);
+
+ if (lockinfo_buf != NULL) {
+ lockinfo = dict_new();
+ if (lockinfo == NULL) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ } else {
+ op_ret = dict_unserialize(lockinfo_buf, len, &lockinfo);
+
+ if (lockinfo && local->dict) {
+ dict_copy(lockinfo, local->dict);
+ }
+ }
+ }
+
+ if (xdata && local->xdata_rsp) {
+ dict_copy(xdata, local->xdata_rsp);
+ }
+
+ if (!call_cnt) {
+ newdict = dict_new();
+ if (!newdict) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unwind;
+ }
- local = frame->local;
- local->call_count = priv->child_count;
+ op_ret = dict_allocate_and_serialize(
+ local->dict, (char **)&lockinfo_buf, (unsigned int *)&len);
+ if (op_ret != 0) {
+ local->op_ret = -1;
+ goto unwind;
+ }
- for (i = 0; i < priv->child_count; i++) {
- STACK_WIND_COOKIE (frame, cbk,
- (void *) (long) i,
- children[i], children[i]->fops->getxattr,
- loc, name, NULL);
+ op_ret = dict_set_dynptr(newdict, GF_XATTR_LOCKINFO_KEY,
+ (void *)lockinfo_buf, len);
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -op_ret;
+ goto unwind;
}
- return;
+
+ unwind:
+ AFR_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, newdict,
+ local->xdata_rsp);
+ }
+
+ dict_unref(lockinfo);
+
+ return 0;
}
int32_t
-afr_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name, dict_t *xdata)
+afr_fgetxattr_pathinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
- afr_private_t *priv = NULL;
- xlator_t **children = NULL;
- int call_child = 0;
- afr_local_t *local = NULL;
- xlator_list_t *trav = NULL;
- xlator_t **sub_volumes = NULL;
- int i = 0;
- int32_t op_errno = 0;
- int32_t read_child = -1;
- int ret = -1;
- fop_getxattr_cbk_t cbk = NULL;
+ afr_local_t *local = NULL;
+ int32_t callcnt = 0;
+ int ret = 0;
+ char *xattr = NULL;
+ char *xattr_serz = NULL;
+ int keylen = 0;
+ char xattr_cky[1024] = {
+ 0,
+ };
+ int xattr_cky_len = 0;
+ dict_t *nxattr = NULL;
+ long cky = 0;
+ int32_t padding = 0;
+ int32_t tlen = 0;
+
+ if (!frame || !frame->local || !this) {
+ gf_msg("", GF_LOG_ERROR, 0, AFR_MSG_INVALID_ARG, "possible NULL deref");
+ goto out;
+ }
+
+ local = frame->local;
+ cky = (long)cookie;
+ keylen = strlen(local->cont.getxattr.name);
+ xattr_cky_len = snprintf(xattr_cky, sizeof(xattr_cky), "%s-%ld",
+ local->cont.getxattr.name, cky);
+ LOCK(&frame->lock);
+ {
+ callcnt = --local->call_count;
+ if (op_ret < 0) {
+ local->op_errno = op_errno;
+ } else {
+ local->op_ret = op_ret;
+ if (!local->xdata_rsp && xdata)
+ local->xdata_rsp = dict_ref(xdata);
+ }
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ if (!dict || (op_ret < 0))
+ goto unlock;
- priv = this->private;
- VALIDATE_OR_GOTO (priv->children, out);
+ if (!local->dict) {
+ local->dict = dict_new();
+ if (!local->dict)
+ goto unlock;
+ }
+ ret = dict_get_strn(dict, local->cont.getxattr.name, keylen, &xattr);
+ if (ret)
+ goto unlock;
- children = priv->children;
+ xattr = gf_strdup(xattr);
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
+ ret = dict_set_dynstrn(local->dict, xattr_cky, xattr_cky_len, xattr);
+ if (ret) {
+ UNLOCK(&frame->lock);
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Cannot set xattr cookie key");
+ goto post_unlock;
+ }
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ local->cont.getxattr.xattr_len += strlen(xattr) + 1;
+ }
+unlock:
+ UNLOCK(&frame->lock);
+post_unlock:
+ if (!callcnt) {
+ if (!local->cont.getxattr.xattr_len)
+ goto unwind;
- loc_copy (&local->loc, loc);
- if (!name)
- goto no_name;
+ nxattr = dict_new();
+ if (!nxattr)
+ goto unwind;
- local->cont.getxattr.name = gf_strdup (name);
+ /* extra bytes for decorations (brackets and <>'s) */
+ padding += strlen(this->name) + SLEN(AFR_PATHINFO_HEADER) + 4;
+ local->cont.getxattr.xattr_len += (padding + 2);
- if (!strncmp (name, AFR_XATTR_PREFIX,
- strlen (AFR_XATTR_PREFIX))) {
- gf_log (this->name, GF_LOG_INFO,
- "%s: no data present for key %s",
- loc->path, name);
- op_errno = ENODATA;
- goto out;
- }
- if ((strcmp (GF_XATTR_MARKER_KEY, name) == 0)
- && (-1 == frame->root->pid)) {
+ xattr_serz = GF_MALLOC(local->cont.getxattr.xattr_len,
+ gf_common_mt_char);
- local->marker.call_count = priv->child_count;
+ if (!xattr_serz)
+ goto unwind;
- sub_volumes = alloca ( priv->child_count * sizeof (xlator_t *));
- for (i = 0, trav = this->children; trav ;
- trav = trav->next, i++) {
+ /* the xlator info */
+ int xattr_serz_len = sprintf(
+ xattr_serz, "(<" AFR_PATHINFO_HEADER "%s> ", this->name);
- *(sub_volumes + i) = trav->xlator;
- }
+ /* actual series of pathinfo */
+ ret = dict_serialize_value_with_delim(
+ local->dict, xattr_serz + xattr_serz_len, &tlen, ' ');
+ if (ret) {
+ GF_FREE(xattr_serz);
+ goto unwind;
+ }
- if (cluster_getmarkerattr (frame, this, loc, name,
- local, afr_getxattr_unwind,
- sub_volumes,
- priv->child_count,
- MARKER_UUID_TYPE,
- priv->vol_uuid)) {
-
- gf_log (this->name, GF_LOG_INFO,
- "%s: failed to get marker attr (%s)",
- loc->path, name);
- op_errno = EINVAL;
- goto out;
- }
+ /* closing part */
+ *(xattr_serz + padding + tlen) = ')';
+ *(xattr_serz + padding + tlen + 1) = '\0';
- return 0;
+ ret = dict_set_dynstrn(nxattr, local->cont.getxattr.name, keylen,
+ xattr_serz);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Cannot set pathinfo key in dict");
+ if (ret == -EINVAL)
+ GF_FREE(xattr_serz);
}
- /*
- * if we are doing getxattr with pathinfo as the key then we
- * collect information from all childs
- */
- if (afr_is_special_xattr (name, &cbk)) {
- afr_getxattr_frm_all_children (this, frame, name,
- loc, cbk);
- return 0;
+ unwind:
+ AFR_STACK_UNWIND(fgetxattr, frame, local->op_ret, local->op_errno,
+ nxattr, local->xdata_rsp);
+
+ if (nxattr)
+ dict_unref(nxattr);
+ }
+
+out:
+ return ret;
+}
+
+int32_t
+afr_getxattr_pathinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int32_t callcnt = 0;
+ int ret = 0;
+ char *xattr = NULL;
+ char *xattr_serz = NULL;
+ char xattr_cky[1024] = {
+ 0,
+ };
+ int keylen = 0;
+ int xattr_cky_len = 0;
+ dict_t *nxattr = NULL;
+ long cky = 0;
+ int32_t padding = 0;
+ int32_t tlen = 0;
+
+ if (!frame || !frame->local || !this) {
+ gf_msg("", GF_LOG_ERROR, 0, AFR_MSG_INVALID_ARG, "possible NULL deref");
+ goto out;
+ }
+
+ local = frame->local;
+ cky = (long)cookie;
+ keylen = strlen(local->cont.getxattr.name);
+ xattr_cky_len = snprintf(xattr_cky, sizeof(xattr_cky), "%s-%ld",
+ local->cont.getxattr.name, cky);
+ LOCK(&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (op_ret < 0) {
+ local->op_errno = op_errno;
+ } else {
+ local->op_ret = op_ret;
+ if (!local->xdata_rsp && xdata)
+ local->xdata_rsp = dict_ref(xdata);
}
- if (XATTR_IS_NODE_UUID (name)) {
- i = 0;
- STACK_WIND_COOKIE (frame, afr_getxattr_node_uuid_cbk,
- (void *) (long) i,
- children[i],
- children[i]->fops->getxattr,
- loc, name, xdata);
- return 0;
+ if (!dict || (op_ret < 0))
+ goto unlock;
+
+ if (!local->dict) {
+ local->dict = dict_new();
+ if (!local->dict)
+ goto unlock;
}
+ ret = dict_get_strn(dict, local->cont.getxattr.name, keylen, &xattr);
+ if (ret)
+ goto unlock;
- if (*priv->vol_uuid) {
- if ((match_uuid_local (name, priv->vol_uuid) == 0)
- && (-1 == frame->root->pid)) {
- local->marker.call_count = priv->child_count;
-
- sub_volumes = alloca ( priv->child_count * sizeof (xlator_t *));
- for (i = 0, trav = this->children; trav ;
- trav = trav->next, i++) {
-
- *(sub_volumes + i) = trav->xlator;
-
- }
-
- if (cluster_getmarkerattr (frame, this, loc,
- name, local,
- afr_getxattr_unwind,
- sub_volumes,
- priv->child_count,
- MARKER_XTIME_TYPE,
- priv->vol_uuid)) {
- gf_log (this->name, GF_LOG_INFO,
- "%s: failed to get marker attr (%s)",
- loc->path, name);
- op_errno = EINVAL;
- goto out;
- }
-
- return 0;
- }
+ xattr = gf_strdup(xattr);
+
+ ret = dict_set_dynstrn(local->dict, xattr_cky, xattr_cky_len, xattr);
+ if (ret) {
+ UNLOCK(&frame->lock);
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Cannot set xattr cookie key");
+ goto post_unlock;
}
-no_name:
- local->fresh_children = afr_children_create (priv->child_count);
- if (!local->fresh_children) {
- op_errno = ENOMEM;
- goto out;
+ local->cont.getxattr.xattr_len += strlen(xattr) + 1;
+ }
+unlock:
+ UNLOCK(&frame->lock);
+post_unlock:
+ if (!callcnt) {
+ if (!local->cont.getxattr.xattr_len)
+ goto unwind;
+
+ nxattr = dict_new();
+ if (!nxattr)
+ goto unwind;
+
+ /* extra bytes for decorations (brackets and <>'s) */
+ padding += strlen(this->name) + SLEN(AFR_PATHINFO_HEADER) + 4;
+ local->cont.getxattr.xattr_len += (padding + 2);
+
+ xattr_serz = GF_MALLOC(local->cont.getxattr.xattr_len,
+ gf_common_mt_char);
+
+ if (!xattr_serz)
+ goto unwind;
+
+ /* the xlator info */
+ int xattr_serz_len = sprintf(
+ xattr_serz, "(<" AFR_PATHINFO_HEADER "%s> ", this->name);
+
+ /* actual series of pathinfo */
+ ret = dict_serialize_value_with_delim(
+ local->dict, xattr_serz + xattr_serz_len, &tlen, ' ');
+ if (ret) {
+ GF_FREE(xattr_serz);
+ goto unwind;
}
- read_child = afr_inode_get_read_ctx (this, loc->inode, local->fresh_children);
- ret = afr_get_call_child (this, local->child_up, read_child,
- local->fresh_children,
- &call_child,
- &local->cont.getxattr.last_index);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
+ /* closing part */
+ *(xattr_serz + padding + tlen) = ')';
+ *(xattr_serz + padding + tlen + 1) = '\0';
+
+ ret = dict_set_dynstrn(nxattr, local->cont.getxattr.name, keylen,
+ xattr_serz);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Cannot set pathinfo key in dict");
+ if (ret == -EINVAL)
+ GF_FREE(xattr_serz);
}
- STACK_WIND_COOKIE (frame, afr_getxattr_cbk,
- (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->getxattr,
- loc, name, xdata);
+ unwind:
+ AFR_STACK_UNWIND(getxattr, frame, local->op_ret, local->op_errno,
+ nxattr, local->xdata_rsp);
+
+ if (nxattr)
+ dict_unref(nxattr);
+ }
- ret = 0;
out:
- if (ret < 0)
- AFR_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL, NULL);
- return 0;
+ return ret;
}
-/* {{{ fgetxattr */
+static int
+afr_aggregate_stime_xattr(dict_t *this, char *key, data_t *value, void *data)
+{
+ int ret = 0;
+
+ if (fnmatch(GF_XATTR_STIME_PATTERN, key, FNM_NOESCAPE) == 0)
+ ret = gf_get_max_stime(THIS, data, key, value);
+ return ret;
+}
int32_t
-afr_fgetxattr_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *dict, dict_t *xdata)
+afr_common_getxattr_stime_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
- int unwind = 1;
- int32_t *last_index = NULL;
- int32_t next_call_child = -1;
- int32_t read_child = -1;
- int32_t *fresh_children = NULL;
-
- priv = this->private;
- children = priv->children;
+ afr_local_t *local = NULL;
+ int32_t callcnt = 0;
- local = frame->local;
+ if (!frame || !frame->local || !this) {
+ gf_msg("", GF_LOG_ERROR, 0, AFR_MSG_INVALID_ARG, "possible NULL deref");
+ goto out;
+ }
+
+ local = frame->local;
+
+ LOCK(&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (!dict || (op_ret < 0)) {
+ local->op_errno = op_errno;
+ goto cleanup;
+ }
+
+ if (!local->dict)
+ local->dict = dict_copy_with_ref(dict, NULL);
+ else
+ dict_foreach(dict, afr_aggregate_stime_xattr, local->dict);
+ local->op_ret = 0;
+ }
+
+cleanup:
+ UNLOCK(&frame->lock);
+
+ if (!callcnt) {
+ AFR_STACK_UNWIND(getxattr, frame, local->op_ret, local->op_errno,
+ local->dict, xdata);
+ }
+
+out:
+ return 0;
+}
- read_child = (long) cookie;
-
- if (op_ret == -1) {
- last_index = &local->cont.getxattr.last_index;
- fresh_children = local->fresh_children;
- next_call_child = afr_next_call_child (fresh_children,
- local->child_up,
- priv->child_count,
- last_index, read_child);
- if (next_call_child < 0)
- goto out;
-
- unwind = 0;
- STACK_WIND_COOKIE (frame, afr_fgetxattr_cbk,
- (void *) (long) read_child,
- children[next_call_child],
- children[next_call_child]->fops->fgetxattr,
- local->fd,
- local->cont.getxattr.name,
- NULL);
+static gf_boolean_t
+afr_is_special_xattr(const char *name, fop_getxattr_cbk_t *cbk,
+ gf_boolean_t is_fgetxattr)
+{
+ gf_boolean_t is_spl = _gf_true;
+
+ GF_ASSERT(cbk);
+ if (!cbk || !name) {
+ is_spl = _gf_false;
+ goto out;
+ }
+
+ if (!strcmp(name, GF_XATTR_PATHINFO_KEY) ||
+ !strcmp(name, GF_XATTR_USER_PATHINFO_KEY)) {
+ if (is_fgetxattr) {
+ *cbk = afr_fgetxattr_pathinfo_cbk;
+ } else {
+ *cbk = afr_getxattr_pathinfo_cbk;
+ }
+ } else if (!strncmp(name, GF_XATTR_CLRLK_CMD, SLEN(GF_XATTR_CLRLK_CMD))) {
+ if (is_fgetxattr) {
+ *cbk = afr_fgetxattr_clrlk_cbk;
+ } else {
+ *cbk = afr_getxattr_clrlk_cbk;
}
+ } else if (!strncmp(name, GF_XATTR_LOCKINFO_KEY,
+ SLEN(GF_XATTR_LOCKINFO_KEY))) {
+ if (is_fgetxattr) {
+ *cbk = afr_fgetxattr_lockinfo_cbk;
+ } else {
+ *cbk = afr_getxattr_lockinfo_cbk;
+ }
+ } else if (fnmatch(GF_XATTR_STIME_PATTERN, name, FNM_NOESCAPE) == 0) {
+ *cbk = afr_common_getxattr_stime_cbk;
+ } else if (strcmp(name, QUOTA_SIZE_KEY) == 0) {
+ *cbk = afr_getxattr_quota_size_cbk;
+ } else if (!strcmp(name, GF_XATTR_LIST_NODE_UUIDS_KEY)) {
+ *cbk = afr_getxattr_list_node_uuids_cbk;
+ } else {
+ is_spl = _gf_false;
+ }
out:
- if (unwind) {
- if (op_ret >= 0 && dict)
- __filter_xattrs (dict);
+ return is_spl;
+}
- AFR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, dict,
- xdata);
+static void
+afr_getxattr_all_subvols(xlator_t *this, call_frame_t *frame, const char *name,
+ loc_t *loc, fop_getxattr_cbk_t cbk)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
+ int call_count = 0;
+
+ priv = this->private;
+
+ local = frame->local;
+ // local->call_count set in afr_local_init
+ call_count = local->call_count;
+
+ if (!strcmp(name, GF_XATTR_LIST_NODE_UUIDS_KEY)) {
+ GF_FREE(local->cont.getxattr.name);
+ local->cont.getxattr.name = gf_strdup(GF_XATTR_NODE_UUID_KEY);
+ }
+
+ // If up-children count is 0, afr_local_init would have failed already
+ // and the call would have unwound so not handling it here.
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE(frame, cbk, (void *)(long)i, priv->children[i],
+ priv->children[i]->fops->getxattr, loc,
+ local->cont.getxattr.name, NULL);
+ if (!--call_count)
+ break;
}
+ }
+ return;
+}
- return 0;
+int
+afr_marker_populate_args(call_frame_t *frame, int type, int *gauge,
+ xlator_t **subvols)
+{
+ xlator_t *this = frame->this;
+ afr_private_t *priv = this->private;
+
+ memcpy(subvols, priv->children, sizeof(*subvols) * priv->child_count);
+
+ if (type == MARKER_XTIME_TYPE) {
+ /*Don't error out on ENOENT/ENOTCONN */
+ gauge[MCNT_NOTFOUND] = 0;
+ gauge[MCNT_ENOTCONN] = 0;
+ }
+ return priv->child_count;
}
-int32_t
-afr_fgetxattr_unwind (call_frame_t *frame,
- int op_ret, int op_errno, dict_t *dict, dict_t *xdata)
+static int
+afr_handle_heal_xattrs(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *heal_op)
+{
+ int ret = -1;
+ afr_spb_status_t *data = NULL;
+
+ if (!strcmp(heal_op, GF_HEAL_INFO)) {
+ afr_get_heal_info(frame, this, loc);
+ ret = 0;
+ goto out;
+ }
+
+ if (!strcmp(heal_op, GF_AFR_HEAL_SBRAIN)) {
+ afr_heal_splitbrain_file(frame, this, loc);
+ ret = 0;
+ goto out;
+ }
+
+ if (!strcmp(heal_op, GF_AFR_SBRAIN_STATUS)) {
+ data = GF_CALLOC(1, sizeof(*data), gf_afr_mt_spb_status_t);
+ if (!data) {
+ ret = 1;
+ goto out;
+ }
+ data->frame = frame;
+ data->loc = loc;
+ ret = synctask_new(this->ctx->env, afr_get_split_brain_status,
+ afr_get_split_brain_status_cbk, NULL, data);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN_STATUS,
+ "Failed to create"
+ " synctask. Unable to fetch split-brain status"
+ " for %s.",
+ loc->name);
+ ret = 1;
+ goto out;
+ }
+ goto out;
+ }
+
+out:
+ if (ret == 1) {
+ AFR_STACK_UNWIND(getxattr, frame, -1, ENOMEM, NULL, NULL);
+ if (data)
+ GF_FREE(data);
+ ret = 0;
+ }
+ return ret;
+}
+int32_t
+afr_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name,
+ dict_t *xdata)
{
- AFR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, dict, xdata);
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ xlator_t **children = NULL;
+ int i = 0;
+ int32_t op_errno = 0;
+ int ret = -1;
+ fop_getxattr_cbk_t cbk = NULL;
+
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
+
+ priv = this->private;
+
+ children = priv->children;
+
+ loc_copy(&local->loc, loc);
+
+ local->op = GF_FOP_GETXATTR;
+
+ if (xdata)
+ local->xdata_req = dict_ref(xdata);
+
+ if (!name)
+ goto no_name;
+
+ local->cont.getxattr.name = gf_strdup(name);
+
+ if (!local->cont.getxattr.name) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ if (!strncmp(name, AFR_XATTR_PREFIX, SLEN(AFR_XATTR_PREFIX))) {
+ op_errno = ENODATA;
+ goto out;
+ }
+
+ if (cluster_handle_marker_getxattr(frame, loc, name, priv->vol_uuid,
+ afr_getxattr_unwind,
+ afr_marker_populate_args) == 0)
+ return 0;
+
+ ret = afr_handle_heal_xattrs(frame, this, &local->loc, name);
+ if (ret == 0)
+ return 0;
+
+ /*
+ * Heal daemons don't have IO threads ... and as a result they
+ * send this getxattr down and eventually crash :(
+ */
+ op_errno = -1;
+ GF_CHECK_XATTR_KEY_AND_GOTO(name, IO_THREADS_QUEUE_SIZE_KEY, op_errno, out);
+
+ /*
+ * Special xattrs which need responses from all subvols
+ */
+ if (afr_is_special_xattr(name, &cbk, 0)) {
+ afr_getxattr_all_subvols(this, frame, name, loc, cbk);
+ return 0;
+ }
+
+ if (XATTR_IS_NODE_UUID(name)) {
+ i = 0;
+ STACK_WIND_COOKIE(frame, afr_getxattr_node_uuid_cbk, (void *)(long)i,
+ children[i], children[i]->fops->getxattr, loc, name,
+ xdata);
return 0;
+ }
+
+no_name:
+
+ afr_read_txn(frame, this, local->loc.inode, afr_getxattr_wind,
+ AFR_METADATA_TRANSACTION);
+
+ ret = 0;
+out:
+ if (ret < 0)
+ AFR_STACK_UNWIND(getxattr, frame, -1, op_errno, NULL, NULL);
+ return 0;
}
+/* {{{ fgetxattr */
+
int32_t
-afr_fgetxattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, const char *name, dict_t *xdata)
+afr_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- xlator_t **children = NULL;
- int call_child = 0;
- afr_local_t *local = NULL;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- int32_t read_child = -1;
+ afr_local_t *local = NULL;
+ local = frame->local;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
- priv = this->private;
- VALIDATE_OR_GOTO (priv->children, out);
+ afr_read_txn_continue(frame, this, (long)cookie);
+ return 0;
+ }
- children = priv->children;
+ if (dict)
+ afr_filter_xattrs(dict);
- AFR_LOCAL_ALLOC_OR_GOTO (local, out);
- frame->local = local;
+ AFR_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, dict, xdata);
- op_ret = afr_local_init (local, priv, &op_errno);
- if (op_ret < 0) {
- op_errno = -op_ret;
- goto out;
- }
+ return 0;
+}
+
+int
+afr_fgetxattr_wind(call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- local->fd = fd_ref (fd);
- if (name)
- local->cont.getxattr.name = gf_strdup (name);
+ local = frame->local;
+ priv = this->private;
- /* pathinfo gets handled only in getxattr() */
+ if (subvol == -1) {
+ AFR_STACK_UNWIND(fgetxattr, frame, local->op_ret, local->op_errno, NULL,
+ NULL);
+ return 0;
+ }
- local->fresh_children = afr_children_create (priv->child_count);
- if (!local->fresh_children) {
- op_errno = ENOMEM;
- goto out;
- }
+ STACK_WIND_COOKIE(frame, afr_fgetxattr_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->fgetxattr, local->fd,
+ local->cont.getxattr.name, local->xdata_req);
+ return 0;
+}
- read_child = afr_inode_get_read_ctx (this, fd->inode, local->fresh_children);
- op_ret = afr_get_call_child (this, local->child_up, read_child,
- local->fresh_children,
- &call_child,
- &local->cont.getxattr.last_index);
- if (op_ret < 0) {
- op_errno = -op_ret;
- op_ret = -1;
- goto out;
+static void
+afr_fgetxattr_all_subvols(xlator_t *this, call_frame_t *frame,
+ fop_fgetxattr_cbk_t cbk)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
+ int call_count = 0;
+
+ priv = this->private;
+
+ local = frame->local;
+ // local->call_count set in afr_local_init
+ call_count = local->call_count;
+
+ // If up-children count is 0, afr_local_init would have failed already
+ // and the call would have unwound so not handling it here.
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE(frame, cbk, (void *)(long)i, priv->children[i],
+ priv->children[i]->fops->fgetxattr, local->fd,
+ local->cont.getxattr.name, NULL);
+ if (!--call_count)
+ break;
}
+ }
- STACK_WIND_COOKIE (frame, afr_fgetxattr_cbk,
- (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->fgetxattr,
- fd, name, xdata);
+ return;
+}
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, NULL, NULL);
+int
+afr_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
+ dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int32_t op_errno = 0;
+ fop_fgetxattr_cbk_t cbk = NULL;
+
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
+
+ local->op = GF_FOP_FGETXATTR;
+ local->fd = fd_ref(fd);
+ if (name) {
+ local->cont.getxattr.name = gf_strdup(name);
+ if (!local->cont.getxattr.name) {
+ op_errno = ENOMEM;
+ goto out;
}
+ }
+ if (xdata)
+ local->xdata_req = dict_ref(xdata);
+
+ /* pathinfo gets handled only in getxattr(), but we need to handle
+ * lockinfo.
+ * If we are doing fgetxattr with lockinfo as the key then we
+ * collect information from all children.
+ */
+ if (afr_is_special_xattr(name, &cbk, 1)) {
+ afr_fgetxattr_all_subvols(this, frame, cbk);
return 0;
-}
+ }
+
+ afr_fix_open(fd, this);
+ afr_read_txn(frame, this, fd->inode, afr_fgetxattr_wind,
+ AFR_METADATA_TRANSACTION);
+
+ return 0;
+out:
+ AFR_STACK_UNWIND(fgetxattr, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
/* }}} */
/* {{{ readv */
-/**
- * read algorithm:
- *
- * if the user has specified a read subvolume, use it
- * otherwise -
- * use the inode number to hash it to one of the subvolumes, and
- * read from there (to balance read load)
- *
- * if any of the above read's fail, try the children in sequence
- * beginning at the beginning
- */
+int
+afr_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iovec *vector, int32_t count,
+ struct iatt *buf, struct iobref *iobref, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
-int32_t
-afr_readv_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct iovec *vector, int32_t count, struct iatt *buf,
- struct iobref *iobref, dict_t *xdata)
+ local = frame->local;
+
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+
+ afr_read_txn_continue(frame, this, (long)cookie);
+ return 0;
+ }
+
+ AFR_STACK_UNWIND(readv, frame, op_ret, op_errno, vector, count, buf, iobref,
+ xdata);
+ return 0;
+}
+
+int
+afr_readv_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
- int unwind = 1;
- int32_t *last_index = NULL;
- int32_t next_call_child = -1;
- int32_t *fresh_children = NULL;
- int32_t read_child = -1;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ local = frame->local;
+ priv = this->private;
- priv = this->private;
- VALIDATE_OR_GOTO (priv->children, out);
+ if (subvol == -1) {
+ AFR_STACK_UNWIND(readv, frame, local->op_ret, local->op_errno, 0, 0, 0,
+ 0, 0);
+ return 0;
+ }
- children = priv->children;
+ STACK_WIND_COOKIE(
+ frame, afr_readv_cbk, (void *)(long)subvol, priv->children[subvol],
+ priv->children[subvol]->fops->readv, local->fd, local->cont.readv.size,
+ local->cont.readv.offset, local->cont.readv.flags, local->xdata_req);
+ return 0;
+}
- local = frame->local;
+int
+afr_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int32_t op_errno = 0;
- read_child = (long) cookie;
-
- if (op_ret == -1) {
- last_index = &local->cont.readv.last_index;
- fresh_children = local->fresh_children;
- next_call_child = afr_next_call_child (fresh_children,
- local->child_up,
- priv->child_count,
- last_index, read_child);
- if (next_call_child < 0)
- goto out;
-
- unwind = 0;
-
- STACK_WIND_COOKIE (frame, afr_readv_cbk,
- (void *) (long) read_child,
- children[next_call_child],
- children[next_call_child]->fops->readv,
- local->fd, local->cont.readv.size,
- local->cont.readv.offset,
- local->cont.readv.flags,
- NULL);
- }
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
+
+ local->op = GF_FOP_READ;
+ local->fd = fd_ref(fd);
+ local->cont.readv.size = size;
+ local->cont.readv.offset = offset;
+ local->cont.readv.flags = flags;
+ if (xdata)
+ local->xdata_req = dict_ref(xdata);
+
+ afr_fix_open(fd, this);
+ afr_read_txn(frame, this, fd->inode, afr_readv_wind, AFR_DATA_TRANSACTION);
+
+ return 0;
out:
- if (unwind) {
- AFR_STACK_UNWIND (readv, frame, op_ret, op_errno,
- vector, count, buf, iobref, xdata);
- }
+ AFR_STACK_UNWIND(readv, frame, -1, op_errno, 0, 0, 0, 0, 0);
- return 0;
+ return 0;
}
+/* }}} */
-int32_t
-afr_readv (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata)
+/* {{{ seek */
+
+int
+afr_seek_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, off_t offset, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
- int call_child = 0;
- int32_t op_errno = 0;
- int32_t read_child = -1;
- int ret = -1;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (fd, out);
-
- priv = this->private;
- children = priv->children;
-
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
+ afr_local_t *local = NULL;
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ local = frame->local;
- local->fresh_children = afr_children_create (priv->child_count);
- if (!local->fresh_children) {
- op_errno = ENOMEM;
- goto out;
- }
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
- read_child = afr_inode_get_read_ctx (this, fd->inode, local->fresh_children);
- ret = afr_get_call_child (this, local->child_up, read_child,
- local->fresh_children,
- &call_child,
- &local->cont.readv.last_index);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ afr_read_txn_continue(frame, this, (long)cookie);
+ return 0;
+ }
- local->fd = fd_ref (fd);
+ AFR_STACK_UNWIND(seek, frame, op_ret, op_errno, offset, xdata);
+ return 0;
+}
- local->cont.readv.size = size;
- local->cont.readv.offset = offset;
- local->cont.readv.flags = flags;
+int
+afr_seek_wind(call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- ret = afr_open_fd_fix (frame, this, _gf_false);
- if (ret) {
- op_errno = -ret;
- goto out;
- }
- STACK_WIND_COOKIE (frame, afr_readv_cbk,
- (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->readv,
- fd, size, offset, flags, xdata);
+ local = frame->local;
+ priv = this->private;
- ret = 0;
-out:
- if (ret < 0) {
- AFR_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL,
- NULL, NULL);
- }
+ if (subvol == -1) {
+ AFR_STACK_UNWIND(seek, frame, local->op_ret, local->op_errno, 0, NULL);
return 0;
+ }
+
+ STACK_WIND_COOKIE(
+ frame, afr_seek_cbk, (void *)(long)subvol, priv->children[subvol],
+ priv->children[subvol]->fops->seek, local->fd, local->cont.seek.offset,
+ local->cont.seek.what, local->xdata_req);
+ return 0;
}
+int
+afr_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ gf_seek_what_t what, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int32_t op_errno = 0;
+
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
+
+ local->op = GF_FOP_SEEK;
+ local->fd = fd_ref(fd);
+ local->cont.seek.offset = offset;
+ local->cont.seek.what = what;
+ if (xdata)
+ local->xdata_req = dict_ref(xdata);
+
+ afr_fix_open(fd, this);
+
+ afr_read_txn(frame, this, fd->inode, afr_seek_wind, AFR_DATA_TRANSACTION);
+
+ return 0;
+out:
+ AFR_STACK_UNWIND(seek, frame, -1, op_errno, 0, NULL);
+
+ return 0;
+}
/* }}} */
diff --git a/xlators/cluster/afr/src/afr-inode-read.h b/xlators/cluster/afr/src/afr-inode-read.h
index e4091a793e0..8c982bc7e6f 100644
--- a/xlators/cluster/afr/src/afr-inode-read.h
+++ b/xlators/cluster/afr/src/afr-inode-read.h
@@ -12,31 +12,34 @@
#define __INODE_READ_H__
int32_t
-afr_access (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t mask, dict_t *xdata);
+afr_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
+ dict_t *xdata);
int32_t
-afr_stat (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xdata);
+afr_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata);
int32_t
-afr_fstat (call_frame_t *frame, xlator_t *this,
- fd_t *fd, dict_t *xdata);
+afr_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata);
int32_t
-afr_readlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc, size_t size, dict_t *xdata);
+afr_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata);
int32_t
-afr_readv (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata);
+afr_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata);
int32_t
-afr_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name, dict_t *xdata);
+afr_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name,
+ dict_t *xdata);
int32_t
-afr_fgetxattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, const char *name, dict_t *xdata);
-
+afr_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
+ dict_t *xdata);
+
+int
+afr_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ gf_seek_what_t what, dict_t *xdata);
+int
+afr_handle_quota_size(call_frame_t *frame, xlator_t *this);
#endif /* __INODE_READ_H__ */
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
index 02e36b13aba..1d6e4f3570a 100644
--- a/xlators/cluster/afr/src/afr-inode-write.c
+++ b/xlators/cluster/afr/src/afr-inode-write.c
@@ -8,2101 +8,2558 @@
cases as published by the Free Software Foundation.
*/
-
-#include <libgen.h>
#include <unistd.h>
-#include <fnmatch.h>
#include <sys/time.h>
#include <stdlib.h>
#include <signal.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "afr.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "list.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
-
+#include <glusterfs/glusterfs.h>
#include "afr.h"
+#include <glusterfs/dict.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/compat.h>
+#include "protocol-common.h"
+#include <glusterfs/byte-order.h>
#include "afr-transaction.h"
-#include "afr-self-heal-common.h"
+#include "afr-self-heal.h"
+#include "afr-messages.h"
-/* {{{ writev */
-
-int
-afr_writev_unwind (call_frame_t *frame, xlator_t *this)
+static void
+__afr_inode_write_finalize(call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- call_frame_t *main_frame = NULL;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- AFR_STACK_UNWIND (writev, main_frame,
- local->op_ret, local->op_errno,
- &local->cont.writev.prebuf,
- &local->cont.writev.postbuf,
- NULL);
- }
- return 0;
+ int i = 0;
+ int ret = 0;
+ int read_subvol = 0;
+ struct iatt *stbuf = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_read_subvol_args_t args = {
+ 0,
+ };
+
+ local = frame->local;
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, local->inode, out);
+
+ /*This code needs to stay till DHT sends fops on linked
+ * inodes*/
+ if (!inode_is_linked(local->inode)) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+ if (local->replies[i].op_ret == -1)
+ continue;
+ if (!gf_uuid_is_null(local->replies[i].poststat.ia_gfid)) {
+ gf_uuid_copy(args.gfid, local->replies[i].poststat.ia_gfid);
+ args.ia_type = local->replies[i].poststat.ia_type;
+ break;
+ } else {
+ ret = dict_get_bin(local->replies[i].xdata,
+ DHT_IATT_IN_XDATA_KEY, (void **)&stbuf);
+ if (ret)
+ continue;
+ gf_uuid_copy(args.gfid, stbuf->ia_gfid);
+ args.ia_type = stbuf->ia_type;
+ break;
+ }
+ }
+ }
+
+ if (local->transaction.type == AFR_METADATA_TRANSACTION) {
+ read_subvol = afr_metadata_subvol_get(local->inode, this, NULL,
+ local->readable, NULL, &args);
+ } else {
+ read_subvol = afr_data_subvol_get(local->inode, this, NULL,
+ local->readable, NULL, &args);
+ }
+
+ local->op_ret = -1;
+ local->op_errno = afr_final_errno(local, priv);
+ afr_pick_error_xdata(local, priv, local->inode, local->readable, NULL,
+ NULL);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+ if (local->replies[i].op_ret < 0)
+ continue;
+
+ /* Order of checks in the compound conditional
+ below is important.
+
+ - Highest precedence: largest op_ret
+ - Next precedence: if all op_rets are equal, read subvol
+ - Least precedence: any succeeded subvol
+ */
+ if ((local->op_ret < local->replies[i].op_ret) ||
+ ((local->op_ret == local->replies[i].op_ret) &&
+ (i == read_subvol))) {
+ local->op_ret = local->replies[i].op_ret;
+ local->op_errno = local->replies[i].op_errno;
+
+ local->cont.inode_wfop.prebuf = local->replies[i].prestat;
+ local->cont.inode_wfop.postbuf = local->replies[i].poststat;
+
+ if (local->replies[i].xdata) {
+ if (local->xdata_rsp)
+ dict_unref(local->xdata_rsp);
+ local->xdata_rsp = dict_ref(local->replies[i].xdata);
+ }
+ if (local->replies[i].xattr) {
+ if (local->xattr_rsp)
+ dict_unref(local->xattr_rsp);
+ local->xattr_rsp = dict_ref(local->replies[i].xattr);
+ }
+ }
+ }
+
+ afr_set_in_flight_sb_status(this, frame, local->inode);
+out:
+ return;
}
-
-int
-afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+static void
+__afr_inode_write_fill(call_frame_t *frame, xlator_t *this, int child_index,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xattr, dict_t *xdata)
{
- afr_local_t * local = NULL;
- int child_index = (long) cookie;
- int call_count = -1;
- int read_child = 0;
-
- local = frame->local;
-
- read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ local->replies[child_index].valid = 1;
+
+ if (AFR_IS_ARBITER_BRICK(priv, child_index) && op_ret == 1)
+ op_ret = iov_length(local->cont.writev.vector,
+ local->cont.writev.count);
+
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
+ if (xdata)
+ local->replies[child_index].xdata = dict_ref(xdata);
+
+ if (op_ret >= 0) {
+ if (prebuf)
+ local->replies[child_index].prestat = *prebuf;
+ if (postbuf)
+ local->replies[child_index].poststat = *postbuf;
+ if (xattr)
+ local->replies[child_index].xattr = dict_ref(xattr);
+ } else {
+ afr_transaction_fop_failed(frame, this, child_index);
+ }
+
+ return;
+}
- LOCK (&frame->lock);
- {
- if (child_index == read_child) {
- local->read_child_returned = _gf_true;
- }
+static int
+__afr_inode_write_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xattr, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int child_index = (long)cookie;
+ int call_count = -1;
+ afr_private_t *priv = NULL;
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
+ priv = this->private;
+ local = frame->local;
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.writev.prebuf = *prebuf;
- local->cont.writev.postbuf = *postbuf;
- }
+ LOCK(&frame->lock);
+ {
+ __afr_inode_write_fill(frame, this, child_index, op_ret, op_errno,
+ prebuf, postbuf, xattr, xdata);
+ call_count = --local->call_count;
+ }
+ UNLOCK(&frame->lock);
- if (child_index == read_child) {
- local->cont.writev.prebuf = *prebuf;
- local->cont.writev.postbuf = *postbuf;
- }
- }
+ if (call_count == 0) {
+ __afr_inode_write_finalize(frame, this);
- local->op_errno = op_errno;
+ if (afr_txn_nothing_failed(frame, this)) {
+ /*if it did pre-op, it will do post-op changing ctime*/
+ if (priv->consistent_metadata && afr_needs_changelog_update(local))
+ afr_zero_fill_stat(local);
+ local->transaction.unwind(frame, this);
}
- UNLOCK (&frame->lock);
- call_count = afr_frame_return (frame);
+ afr_transaction_resume(frame, this);
+ }
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
- return 0;
+ return 0;
}
-int
-afr_writev_wind (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- int call_count = -1;
-
- local = frame->local;
- priv = this->private;
+/* {{{ writev */
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
+void
+afr_writev_copy_outvars(call_frame_t *src_frame, call_frame_t *dst_frame)
+{
+ afr_local_t *src_local = NULL;
+ afr_local_t *dst_local = NULL;
+
+ src_local = src_frame->local;
+ dst_local = dst_frame->local;
+
+ dst_local->op_ret = src_local->op_ret;
+ dst_local->op_errno = src_local->op_errno;
+ dst_local->cont.inode_wfop.prebuf = src_local->cont.inode_wfop.prebuf;
+ dst_local->cont.inode_wfop.postbuf = src_local->cont.inode_wfop.postbuf;
+ if (src_local->xdata_rsp)
+ dst_local->xdata_rsp = dict_ref(src_local->xdata_rsp);
+}
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
+void
+afr_writev_unwind(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = this->private;
- local->call_count = call_count;
+ local = frame->local;
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_writev_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->writev,
- local->fd,
- local->cont.writev.vector,
- local->cont.writev.count,
- local->cont.writev.offset,
- local->cont.writev.flags,
- local->cont.writev.iobref,
- NULL);
-
- if (!--call_count)
- break;
- }
- }
+ if (priv->consistent_metadata)
+ afr_zero_fill_stat(local);
- return 0;
+ AFR_STACK_UNWIND(writev, frame, local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf, local->xdata_rsp);
}
-
int
-afr_writev_done (call_frame_t *frame, xlator_t *this)
+afr_transaction_writev_unwind(call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
+ call_frame_t *fop_frame = NULL;
- local = frame->local;
+ fop_frame = afr_transaction_detach_fop_frame(frame);
- iobref_unref (local->cont.writev.iobref);
- local->cont.writev.iobref = NULL;
+ if (fop_frame) {
+ afr_writev_copy_outvars(frame, fop_frame);
+ afr_writev_unwind(fop_frame, this);
+ }
+ return 0;
+}
- local->transaction.unwind (frame, this);
+static void
+afr_writev_handle_short_writes(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+ /*
+ * We already have the best case result of the writev calls staged
+ * as the return value. Any writev that returns some value less
+ * than the best case is now out of sync, so mark the fop as
+ * failed. Note that fops that have returned with errors have
+ * already been marked as failed.
+ */
+ for (i = 0; i < priv->child_count; i++) {
+ if ((!local->replies[i].valid) || (local->replies[i].op_ret == -1))
+ continue;
+
+ if (local->replies[i].op_ret < local->op_ret)
+ afr_transaction_fop_failed(frame, this, i);
+ }
+}
- AFR_STACK_DESTROY (frame);
+void
+afr_inode_write_fill(call_frame_t *frame, xlator_t *this, int child_index,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ int ret = 0;
+ afr_local_t *local = frame->local;
+ uint32_t open_fd_count = 0;
+ uint32_t write_is_append = 0;
+ int32_t num_inodelks = 0;
+
+ LOCK(&frame->lock);
+ {
+ __afr_inode_write_fill(frame, this, child_index, op_ret, op_errno,
+ prebuf, postbuf, NULL, xdata);
+ if (op_ret == -1 || !xdata)
+ goto unlock;
+
+ write_is_append = 0;
+ ret = dict_get_uint32(xdata, GLUSTERFS_WRITE_IS_APPEND,
+ &write_is_append);
+ if (ret || !write_is_append)
+ local->append_write = _gf_false;
+
+ ret = dict_get_uint32(xdata, GLUSTERFS_ACTIVE_FD_COUNT, &open_fd_count);
+ if (ret < 0)
+ goto unlock;
+ if (open_fd_count > local->open_fd_count) {
+ local->open_fd_count = open_fd_count;
+ local->update_open_fd_count = _gf_true;
+ }
- return 0;
+ ret = dict_get_int32_sizen(xdata, GLUSTERFS_INODELK_COUNT,
+ &num_inodelks);
+ if (ret < 0)
+ goto unlock;
+ if (num_inodelks > local->num_inodelks) {
+ local->num_inodelks = num_inodelks;
+ local->update_num_inodelks = _gf_true;
+ }
+ }
+unlock:
+ UNLOCK(&frame->lock);
}
+void
+afr_process_post_writev(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_lock_t *lock = NULL;
+
+ local = frame->local;
+
+ if (!local->stable_write && !local->append_write)
+ /* An appended write removes the necessity to
+ fsync() the file. This is because self-heal
+ has the logic to check for larger file when
+ the xattrs are not reliably pointing at
+ a stale file.
+ */
+ afr_fd_report_unstable_write(this, local);
+
+ __afr_inode_write_finalize(frame, this);
+
+ afr_writev_handle_short_writes(frame, this);
+
+ if (local->update_open_fd_count)
+ local->inode_ctx->open_fd_count = local->open_fd_count;
+ if (local->update_num_inodelks &&
+ local->transaction.type == AFR_DATA_TRANSACTION) {
+ lock = &local->inode_ctx->lock[local->transaction.type];
+ lock->num_inodelks = local->num_inodelks;
+ }
+}
int
-afr_do_writev (call_frame_t *frame, xlator_t *this)
+afr_writev_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- call_frame_t *transaction_frame = NULL;
- afr_local_t *local = NULL;
- int op_ret = -1;
- int op_errno = 0;
-
- local = frame->local;
+ call_frame_t *fop_frame = NULL;
+ int child_index = (long)cookie;
+ int call_count = -1;
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- op_errno = ENOMEM;
- goto out;
- }
+ afr_inode_write_fill(frame, this, child_index, op_ret, op_errno, prebuf,
+ postbuf, xdata);
- transaction_frame->local = local;
- frame->local = NULL;
+ call_count = afr_frame_return(frame);
- local->op = GF_FOP_WRITE;
+ if (call_count == 0) {
+ afr_process_post_writev(frame, this);
- local->success_count = 0;
-
- local->transaction.fop = afr_writev_wind;
- local->transaction.done = afr_writev_done;
- local->transaction.unwind = afr_writev_unwind;
-
- local->transaction.main_frame = frame;
- if (local->fd->flags & O_APPEND) {
- local->transaction.start = 0;
- local->transaction.len = 0;
+ if (!afr_txn_nothing_failed(frame, this)) {
+ // Don't unwind until post-op is complete
+ afr_transaction_resume(frame, this);
} else {
- local->transaction.start = local->cont.writev.offset;
- local->transaction.len = iov_length (local->cont.writev.vector,
- local->cont.writev.count);
- }
-
- afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
-
- op_ret = 0;
-out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (writev, frame, op_ret, op_errno, NULL, NULL, NULL);
- }
-
- return 0;
+ /*
+ * Generally inode-write fops do transaction.unwind then
+ * transaction.resume, but writev needs to make sure that
+ * delayed post-op frame is placed in fdctx before unwind
+ * happens. This prevents the race of flush doing the
+ * changelog wakeup first in fuse thread and then this
+ * writev placing its delayed post-op frame in fdctx.
+ * This helps flush make sure all the delayed post-ops are
+ * completed.
+ */
+
+ fop_frame = afr_transaction_detach_fop_frame(frame);
+ afr_writev_copy_outvars(frame, fop_frame);
+ afr_transaction_resume(frame, this);
+ afr_writev_unwind(fop_frame, this);
+ }
+ }
+ return 0;
}
static int
-afr_prepare_loc (call_frame_t *frame, fd_t *fd)
-{
- afr_local_t *local = NULL;
- char *name = NULL;
- char *path = NULL;
- int ret = 0;
-
- if ((!fd) || (!fd->inode))
- return -1;
-
- local = frame->local;
- ret = inode_path (fd->inode, NULL, (char **)&path);
- if (ret <= 0) {
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "Unable to get path for gfid: %s",
- uuid_utoa (fd->inode->gfid));
- return -1;
- }
-
- if (local->loc.path) {
- if (strcmp (path, local->loc.path))
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "overwriting old loc->path %s with %s",
- local->loc.path, path);
- GF_FREE ((char *)local->loc.path);
- }
- local->loc.path = path;
-
- name = strrchr (local->loc.path, '/');
- if (name)
- name++;
- local->loc.name = name;
-
- if (local->loc.inode) {
- inode_unref (local->loc.inode);
- }
- local->loc.inode = inode_ref (fd->inode);
-
- if (local->loc.parent) {
- inode_unref (local->loc.parent);
- }
-
- local->loc.parent = inode_parent (local->loc.inode, 0, NULL);
-
- return 0;
+afr_arbiter_writev_wind(call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = frame->local;
+ afr_private_t *priv = this->private;
+ static char byte = 0xFF;
+ static struct iovec vector = {&byte, 1};
+ int32_t count = 1;
+
+ STACK_WIND_COOKIE(
+ frame, afr_writev_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol], priv->children[subvol]->fops->writev, local->fd,
+ &vector, count, local->cont.writev.offset, local->cont.writev.flags,
+ local->cont.writev.iobref, local->xdata_req);
+
+ return 0;
}
-afr_fd_paused_call_t*
-afr_paused_call_create (call_frame_t *frame)
+int
+afr_writev_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t *local = NULL;
- afr_fd_paused_call_t *paused_call = NULL;
-
- local = frame->local;
- GF_ASSERT (local->fop_call_continue);
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- paused_call = GF_CALLOC (1, sizeof (*paused_call),
- gf_afr_fd_paused_call_t);
- if (paused_call) {
- INIT_LIST_HEAD (&paused_call->call_list);
- paused_call->frame = frame;
- }
+ local = frame->local;
+ priv = this->private;
- return paused_call;
+ if (AFR_IS_ARBITER_BRICK(priv, subvol)) {
+ afr_arbiter_writev_wind(frame, this, subvol);
+ return 0;
+ }
+
+ STACK_WIND_COOKIE(frame, afr_writev_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->writev, local->fd,
+ local->cont.writev.vector, local->cont.writev.count,
+ local->cont.writev.offset, local->cont.writev.flags,
+ local->cont.writev.iobref, local->xdata_req);
+ return 0;
}
-static int
-afr_pause_fd_fop (call_frame_t *frame, xlator_t *this, afr_fd_ctx_t *fd_ctx)
+int
+afr_do_writev(call_frame_t *frame, xlator_t *this)
{
- afr_fd_paused_call_t *paused_call = NULL;
- int ret = 0;
-
- paused_call = afr_paused_call_create (frame);
- if (paused_call)
- list_add (&paused_call->call_list, &fd_ctx->paused_calls);
- else
- ret = -ENOMEM;
+ call_frame_t *transaction_frame = NULL;
+ afr_local_t *local = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
+
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
+
+ local = frame->local;
+ transaction_frame->local = local;
+ frame->local = NULL;
+
+ if (!AFR_FRAME_INIT(frame, op_errno))
+ goto out;
+
+ local->op = GF_FOP_WRITE;
+
+ local->transaction.wind = afr_writev_wind;
+ local->transaction.unwind = afr_transaction_writev_unwind;
+
+ local->transaction.main_frame = frame;
+
+ if (local->fd->flags & O_APPEND) {
+ /*
+ * Backend vfs ignores the 'offset' for append mode fd so
+ * locking just the region provided for the writev does not
+ * give consistency guarantee. The actual write may happen at a
+ * completely different range than the one provided by the
+ * offset, len in the fop. So lock the entire file.
+ */
+ local->transaction.start = 0;
+ local->transaction.len = 0;
+ } else {
+ local->transaction.start = local->cont.writev.offset;
+ local->transaction.len = iov_length(local->cont.writev.vector,
+ local->cont.writev.count);
+ }
+
+ ret = afr_transaction(transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ return 0;
+out:
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- return ret;
+ AFR_STACK_UNWIND(writev, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
-static void
-afr_trigger_open_fd_self_heal (call_frame_t *frame, xlator_t *this)
+int
+afr_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
+ int32_t count, off_t offset, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- inode_t *inode = NULL;
- char *reason = NULL;
+ afr_local_t *local = NULL;
+ int op_errno = ENOMEM;
+ int ret = -1;
+
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
+
+ local->cont.writev.vector = iov_dup(vector, count);
+ if (!local->cont.writev.vector)
+ goto out;
+ local->cont.writev.count = count;
+ local->cont.writev.offset = offset;
+ local->cont.writev.flags = flags;
+ local->cont.writev.iobref = iobref_ref(iobref);
+
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
+
+ if (!local->xdata_req)
+ goto out;
+
+ local->fd = fd_ref(fd);
+ ret = afr_set_inode_local(this, local, fd->inode);
+ if (ret)
+ goto out;
+
+ if (dict_set_uint32(local->xdata_req, GLUSTERFS_ACTIVE_FD_COUNT, 4)) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ if (dict_set_str_sizen(local->xdata_req, GLUSTERFS_INODELK_DOM_COUNT,
+ this->name)) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ if (dict_set_uint32(local->xdata_req, GLUSTERFS_WRITE_IS_APPEND, 4)) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ /* Set append_write to be true speculatively. If on any
+ server it turns not be true, we unset it in the
+ callback.
+ */
+ local->append_write = _gf_true;
+
+ /* detect here, but set it in writev_wind_cbk *after* the unstable
+ write is performed
+ */
+ local->stable_write = !!((fd->flags | flags) & (O_SYNC | O_DSYNC));
+
+ afr_fix_open(fd, this);
+
+ afr_do_writev(frame, this);
+
+ return 0;
+out:
+ AFR_STACK_UNWIND(writev, frame, -1, op_errno, NULL, NULL, NULL);
- local = frame->local;
- sh = &local->self_heal;
- inode = local->fd->inode;
+ return 0;
+}
- sh->do_missing_entry_self_heal = _gf_true;
- sh->do_gfid_self_heal = _gf_true;
- sh->do_data_self_heal = _gf_true;
+/* }}} */
- reason = "subvolume came online";
- afr_launch_self_heal (frame, this, inode, _gf_true, inode->ia_type,
- reason, NULL, NULL);
-}
+/* {{{ truncate */
int
-afr_open_fd_fix (call_frame_t *frame, xlator_t *this, gf_boolean_t pause_fop)
+afr_truncate_unwind(call_frame_t *frame, xlator_t *this)
{
- int ret = 0;
- int i = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
- inode_t *inode = NULL;
- gf_boolean_t need_self_heal = _gf_false;
- int *need_open = NULL;
- int need_open_count = 0;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- gf_boolean_t fop_continue = _gf_true;
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
- local = frame->local;
- priv = this->private;
+ local = frame->local;
- GF_ASSERT (local->fd);
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
+ return 0;
- inode = local->fd->inode;
- //gfid is not set in rebalance, that case needs to be handled.
- if (fd_is_anonymous (local->fd) ||
- !inode || uuid_is_null (inode->gfid)) {
- fop_continue = _gf_true;
- goto out;
- }
+ AFR_STACK_UNWIND(truncate, main_frame, local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf, local->xdata_rsp);
+ return 0;
+}
- if (pause_fop)
- GF_ASSERT (local->fop_call_continue);
+int
+afr_truncate_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
- ret = afr_prepare_loc (frame, local->fd);
- if (ret < 0) {
- //File does not exist we cant open it.
- ret = 0;
- goto out;
- }
+ local = frame->local;
- fd_ctx = afr_fd_ctx_get (local->fd, this);
- if (!fd_ctx) {
- ret = -EINVAL;
- goto out;
- }
+ if (op_ret == 0 && prebuf->ia_size != postbuf->ia_size)
+ local->stable_write = _gf_false;
- LOCK (&local->fd->lock);
- {
- if (fd_ctx->up_count < priv->up_count) {
- need_self_heal = _gf_true;
- fd_ctx->up_count = priv->up_count;
- fd_ctx->down_count = priv->down_count;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if ((fd_ctx->opened_on[i] == AFR_FD_NOT_OPENED) &&
- local->child_up[i]) {
- fd_ctx->opened_on[i] = AFR_FD_OPENING;
- if (!need_open)
- need_open = GF_CALLOC (priv->child_count,
- sizeof (*need_open),
- gf_afr_mt_int32_t);
- need_open[i] = 1;
- need_open_count++;
- } else if (pause_fop && local->child_up[i] &&
- (fd_ctx->opened_on[i] == AFR_FD_OPENING)) {
- local->fop_paused = _gf_true;
- }
- }
-
- if (local->fop_paused) {
- GF_ASSERT (pause_fop);
- gf_log (this->name, GF_LOG_INFO, "Pause fd %p",
- local->fd);
- ret = afr_pause_fd_fop (frame, this, fd_ctx);
- if (ret)
- goto unlock;
- fop_continue = _gf_false;
- }
- }
-unlock:
- UNLOCK (&local->fd->lock);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Failed to fix fd for %s",
- local->loc.path);
- fop_continue = _gf_false;
- goto out;
- }
+ return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, prebuf,
+ postbuf, NULL, xdata);
+}
- if (need_self_heal)
- afr_trigger_open_fd_self_heal (frame, this);
+int
+afr_truncate_wind(call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- if (!need_open_count)
- goto out;
+ local = frame->local;
+ priv = this->private;
- gf_log (this->name, GF_LOG_INFO, "Opening fd %p", local->fd);
- afr_fix_open (frame, this, fd_ctx, need_open_count, need_open);
- fop_continue = _gf_false;
-out:
- if (need_open)
- GF_FREE (need_open);
- if (fop_continue && local->fop_call_continue)
- local->fop_call_continue (frame, this);
- return ret;
+ STACK_WIND_COOKIE(frame, afr_truncate_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->truncate, &local->loc,
+ local->cont.truncate.offset, local->xdata_req);
+ return 0;
}
int
-afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int32_t count, off_t offset,
- uint32_t flags, struct iobref *iobref, dict_t *xdata)
+afr_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- int ret = -1;
- int op_errno = 0;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
- priv = this->private;
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
- QUORUM_CHECK(writev,out);
+ local->cont.truncate.offset = offset;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
+ if (!local->xdata_req)
+ goto out;
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ local->transaction.wind = afr_truncate_wind;
+ local->transaction.unwind = afr_truncate_unwind;
- local->cont.writev.vector = iov_dup (vector, count);
- local->cont.writev.count = count;
- local->cont.writev.offset = offset;
- local->cont.writev.flags = flags;
- local->cont.writev.iobref = iobref_ref (iobref);
+ loc_copy(&local->loc, loc);
+ ret = afr_set_inode_local(this, local, loc->inode);
+ if (ret)
+ goto out;
- local->fd = fd_ref (fd);
- local->fop_call_continue = afr_do_writev;
+ local->op = GF_FOP_TRUNCATE;
- ret = afr_open_fd_fix (frame, this, _gf_true);
- if (ret) {
- op_errno = -ret;
- goto out;
- }
+ local->transaction.main_frame = frame;
+ local->transaction.start = offset;
+ local->transaction.len = 0;
- ret = 0;
+ /* Set it true speculatively, will get reset in afr_truncate_wind_cbk
+ if truncate was not a NOP */
+ local->stable_write = _gf_true;
+
+ ret = afr_transaction(transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ return 0;
out:
- if (ret < 0)
- AFR_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL);
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- return 0;
+ AFR_STACK_UNWIND(truncate, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
-
/* }}} */
-/* {{{ truncate */
+/* {{{ ftruncate */
int
-afr_truncate_unwind (call_frame_t *frame, xlator_t *this)
+afr_ftruncate_unwind(call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- AFR_STACK_UNWIND (truncate, main_frame, local->op_ret,
- local->op_errno,
- &local->cont.truncate.prebuf,
- &local->cont.truncate.postbuf,
- NULL);
- }
+ local = frame->local;
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
-}
+ AFR_STACK_UNWIND(ftruncate, main_frame, local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf, local->xdata_rsp);
+ return 0;
+}
int
-afr_truncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+afr_ftruncate_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
struct iatt *postbuf, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- int child_index = (long) cookie;
- int read_child = 0;
- int call_count = -1;
- int need_unwind = 0;
-
- local = frame->local;
- priv = this->private;
-
- read_child = afr_inode_get_read_ctx (this, local->loc.inode, NULL);
-
- LOCK (&frame->lock);
- {
- if (child_index == read_child) {
- local->read_child_returned = _gf_true;
- }
-
- if (afr_fop_failed (op_ret, op_errno) && op_errno != EFBIG)
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.truncate.prebuf = *prebuf;
- local->cont.truncate.postbuf = *postbuf;
- }
-
- if (child_index == read_child) {
- local->cont.truncate.prebuf = *prebuf;
- local->cont.truncate.postbuf = *postbuf;
- }
-
- local->success_count++;
-
- if ((local->success_count >= priv->wait_count)
- && local->read_child_returned) {
- need_unwind = 1;
- }
- }
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- if (need_unwind)
- local->transaction.unwind (frame, this);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
-
- return 0;
-}
-
-
-int32_t
-afr_truncate_wind (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
+ afr_local_t *local = NULL;
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
+ local = frame->local;
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_truncate_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->truncate,
- &local->loc,
- local->cont.truncate.offset,
- NULL);
-
- if (!--call_count)
- break;
- }
- }
+ if (op_ret == 0 && prebuf->ia_size != postbuf->ia_size)
+ local->stable_write = _gf_false;
- return 0;
+ return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, prebuf,
+ postbuf, NULL, xdata);
}
-
int
-afr_truncate_done (call_frame_t *frame, xlator_t *this)
+afr_ftruncate_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- local->transaction.unwind (frame, this);
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- AFR_STACK_DESTROY (frame);
+ local = frame->local;
+ priv = this->private;
- return 0;
+ STACK_WIND_COOKIE(frame, afr_ftruncate_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->ftruncate, local->fd,
+ local->cont.ftruncate.offset, local->xdata_req);
+ return 0;
}
-
int
-afr_truncate (call_frame_t *frame, xlator_t *this,
- loc_t *loc, off_t offset, dict_t *xdata)
+afr_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t *transaction_frame = NULL;
- int ret = -1;
- int op_errno = 0;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
- priv = this->private;
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
- QUORUM_CHECK(truncate,out);
+ local->cont.ftruncate.offset = offset;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- op_errno = ENOMEM;
- goto out;
- }
+ if (!local->xdata_req)
+ goto out;
- AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
- local = transaction_frame->local;
+ local->fd = fd_ref(fd);
+ ret = afr_set_inode_local(this, local, fd->inode);
+ if (ret)
+ goto out;
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ local->op = GF_FOP_FTRUNCATE;
- local->cont.truncate.offset = offset;
+ local->transaction.wind = afr_ftruncate_wind;
+ local->transaction.unwind = afr_ftruncate_unwind;
- local->transaction.fop = afr_truncate_wind;
- local->transaction.done = afr_truncate_done;
- local->transaction.unwind = afr_truncate_unwind;
+ local->transaction.main_frame = frame;
- loc_copy (&local->loc, loc);
+ local->transaction.start = local->cont.ftruncate.offset;
+ local->transaction.len = 0;
- local->transaction.main_frame = frame;
- local->transaction.start = offset;
- local->transaction.len = 0;
+ afr_fix_open(fd, this);
- afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ /* Set it true speculatively, will get reset in afr_ftruncate_wind_cbk
+ if truncate was not a NOP */
+ local->stable_write = _gf_true;
- ret = 0;
+ ret = afr_transaction(transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ return 0;
out:
- if (ret < 0) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL, NULL);
- }
+ AFR_STACK_UNWIND(ftruncate, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
+ return 0;
}
-
/* }}} */
-/* {{{ ftruncate */
-
+/* {{{ setattr */
int
-afr_ftruncate_unwind (call_frame_t *frame, xlator_t *this)
+afr_setattr_unwind(call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
- local = frame->local;
+ local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- AFR_STACK_UNWIND (ftruncate, main_frame, local->op_ret,
- local->op_errno,
- &local->cont.ftruncate.prebuf,
- &local->cont.ftruncate.postbuf,
- NULL);
- }
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
-}
+ AFR_STACK_UNWIND(setattr, main_frame, local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf, local->xdata_rsp);
+ return 0;
+}
int
-afr_ftruncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+afr_setattr_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *preop,
+ struct iatt *postop, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- int child_index = (long) cookie;
- int call_count = -1;
- int need_unwind = 0;
- int read_child = 0;
-
- local = frame->local;
- priv = this->private;
-
- read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
-
- LOCK (&frame->lock);
- {
- if (child_index == read_child) {
- local->read_child_returned = _gf_true;
- }
-
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.ftruncate.prebuf = *prebuf;
- local->cont.ftruncate.postbuf = *postbuf;
- }
-
- if (child_index == read_child) {
- local->cont.ftruncate.prebuf = *prebuf;
- local->cont.ftruncate.postbuf = *postbuf;
- }
-
- local->success_count++;
-
- if ((local->success_count >= priv->wait_count)
- && local->read_child_returned) {
- need_unwind = 1;
- }
- }
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- if (need_unwind)
- local->transaction.unwind (frame, this);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, preop,
+ postop, NULL, xdata);
}
-
int
-afr_ftruncate_wind (call_frame_t *frame, xlator_t *this)
+afr_setattr_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_ftruncate_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->ftruncate,
- local->fd,
- local->cont.ftruncate.offset,
- NULL);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ STACK_WIND_COOKIE(frame, afr_setattr_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->setattr, &local->loc,
+ &local->cont.setattr.in_buf, local->cont.setattr.valid,
+ local->xdata_req);
+ return 0;
}
-
int
-afr_ftruncate_done (call_frame_t *frame, xlator_t *this)
+afr_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *buf,
+ int32_t valid, dict_t *xdata)
{
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
+
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
+
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
+
+ local->cont.setattr.in_buf = *buf;
+ local->cont.setattr.valid = valid;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
+
+ if (!local->xdata_req)
+ goto out;
+
+ local->transaction.wind = afr_setattr_wind;
+ local->transaction.unwind = afr_setattr_unwind;
+
+ loc_copy(&local->loc, loc);
+ ret = afr_set_inode_local(this, local, loc->inode);
+ if (ret)
+ goto out;
+
+ local->op = GF_FOP_SETATTR;
+
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
+
+ ret = afr_transaction(transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ return 0;
+out:
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- return 0;
+ AFR_STACK_UNWIND(setattr, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
+/* {{{ fsetattr */
int
-afr_do_ftruncate (call_frame_t *frame, xlator_t *this)
+afr_fsetattr_unwind(call_frame_t *frame, xlator_t *this)
{
- call_frame_t * transaction_frame = NULL;
- afr_local_t * local = NULL;
- int op_ret = -1;
- int op_errno = 0;
-
- local = frame->local;
-
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- goto out;
- }
-
- transaction_frame->local = local;
- frame->local = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
- local->op = GF_FOP_FTRUNCATE;
+ local = frame->local;
- local->transaction.fop = afr_ftruncate_wind;
- local->transaction.done = afr_ftruncate_done;
- local->transaction.unwind = afr_ftruncate_unwind;
-
- local->transaction.main_frame = frame;
-
- local->transaction.start = local->cont.ftruncate.offset;
- local->transaction.len = 0;
-
- afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
+ return 0;
- op_ret = 0;
-out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, NULL,
- NULL, NULL);
- }
+ AFR_STACK_UNWIND(fsetattr, main_frame, local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf, local->xdata_rsp);
+ return 0;
+}
- return 0;
+int
+afr_fsetattr_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preop,
+ struct iatt *postop, dict_t *xdata)
+{
+ return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, preop,
+ postop, NULL, xdata);
}
+int
+afr_fsetattr_wind(call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ STACK_WIND_COOKIE(frame, afr_fsetattr_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->fsetattr, local->fd,
+ &local->cont.fsetattr.in_buf, local->cont.fsetattr.valid,
+ local->xdata_req);
+ return 0;
+}
int
-afr_ftruncate (call_frame_t *frame, xlator_t *this,
- fd_t *fd, off_t offset, dict_t *xdata)
+afr_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *buf,
+ int32_t valid, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t *transaction_frame = NULL;
- int ret = -1;
- int op_errno = 0;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
- priv = this->private;
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
- QUORUM_CHECK(ftruncate,out);
+ local->cont.fsetattr.in_buf = *buf;
+ local->cont.fsetattr.valid = valid;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
+ if (!local->xdata_req)
+ goto out;
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ local->transaction.wind = afr_fsetattr_wind;
+ local->transaction.unwind = afr_fsetattr_unwind;
- local->cont.ftruncate.offset = offset;
+ local->fd = fd_ref(fd);
+ ret = afr_set_inode_local(this, local, fd->inode);
+ if (ret)
+ goto out;
- local->fd = fd_ref (fd);
- local->fop_call_continue = afr_do_ftruncate;
+ local->op = GF_FOP_FSETATTR;
- ret = afr_open_fd_fix (frame, this, _gf_true);
- if (ret) {
- op_errno = -ret;
- goto out;
- }
+ afr_fix_open(fd, this);
- ret = 0;
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
+
+ ret = afr_transaction(transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ return 0;
out:
- if (ret < 0) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- return 0;
+ AFR_STACK_UNWIND(fsetattr, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
-/* }}} */
-
-/* {{{ setattr */
+/* {{{ setxattr */
int
-afr_setattr_unwind (call_frame_t *frame, xlator_t *this)
+afr_setxattr_unwind(call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- AFR_STACK_UNWIND (setattr, main_frame, local->op_ret,
- local->op_errno,
- &local->cont.setattr.preop_buf,
- &local->cont.setattr.postop_buf,
- NULL);
- }
+ local = frame->local;
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
+
+ AFR_STACK_UNWIND(setxattr, main_frame, local->op_ret, local->op_errno,
+ local->xdata_rsp);
+ return 0;
}
+int
+afr_setxattr_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, NULL,
+ NULL, NULL, xdata);
+}
int
-afr_setattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop, dict_t *xdata)
+afr_setxattr_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- int child_index = (long) cookie;
- int read_child = 0;
- int call_count = -1;
- int need_unwind = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ STACK_WIND_COOKIE(frame, afr_setxattr_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->setxattr, &local->loc,
+ local->cont.setxattr.dict, local->cont.setxattr.flags,
+ local->xdata_req);
+ return 0;
+}
- local = frame->local;
- priv = this->private;
+int
+afr_emptyb_set_pending_changelog_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ dict_t *xattr, dict_t *xdata)
- read_child = afr_inode_get_read_ctx (this, local->loc.inode, NULL);
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i, ret = 0;
+ char *op_type = NULL;
- LOCK (&frame->lock);
- {
- if (child_index == read_child) {
- local->read_child_returned = _gf_true;
- }
+ local = frame->local;
+ priv = this->private;
+ i = (long)cookie;
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
+ local->replies[i].valid = 1;
+ local->replies[i].op_ret = op_ret;
+ local->replies[i].op_errno = op_errno;
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.setattr.preop_buf = *preop;
- local->cont.setattr.postop_buf = *postop;
- }
+ ret = dict_get_str_sizen(local->xdata_req, "replicate-brick-op", &op_type);
+ if (ret)
+ goto out;
- if (child_index == read_child) {
- local->cont.setattr.preop_buf = *preop;
- local->cont.setattr.postop_buf = *postop;
- }
+ gf_smsg(this->name, op_ret ? GF_LOG_ERROR : GF_LOG_INFO,
+ op_ret ? op_errno : 0, AFR_MSG_SET_PEND_XATTR, "name=%s",
+ priv->children[i]->name, "op_ret=%s",
+ op_ret ? "failed" : "succeeded", NULL);
- local->success_count++;
+out:
+ syncbarrier_wake(&local->barrier);
+ return 0;
+}
- if ((local->success_count >= priv->wait_count)
- && local->read_child_returned) {
- need_unwind = 1;
- }
- }
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+int
+afr_emptyb_set_pending_changelog(call_frame_t *frame, xlator_t *this,
+ unsigned char *locked_nodes)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int ret = 0, i = 0;
- if (need_unwind)
- local->transaction.unwind (frame, this);
+ local = frame->local;
+ priv = this->private;
- call_count = afr_frame_return (frame);
+ AFR_ONLIST(locked_nodes, frame, afr_emptyb_set_pending_changelog_cbk,
+ xattrop, &local->loc, GF_XATTROP_ADD_ARRAY, local->xattr_req,
+ NULL);
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
+ /* It is sufficient if xattrop was successful on one child */
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
- return 0;
+ if (local->replies[i].op_ret == 0) {
+ ret = 0;
+ goto out;
+ } else {
+ ret = afr_higher_errno(ret, local->replies[i].op_errno);
+ }
+ }
+out:
+ return -ret;
}
-
-int32_t
-afr_setattr_wind (call_frame_t *frame, xlator_t *this)
+static int
+_afr_handle_empty_brick_type(xlator_t *this, call_frame_t *frame, loc_t *loc,
+ int empty_index, afr_transaction_type type,
+ char *op_type, const int op_type_len)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
+ int count = 0;
+ int ret = -ENOMEM;
+ int idx = -1;
+ int d_idx = -1;
+ unsigned char *locked_nodes = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ locked_nodes = alloca0(priv->child_count);
+
+ idx = afr_index_for_transaction_type(type);
+ d_idx = afr_index_for_transaction_type(AFR_DATA_TRANSACTION);
+
+ local->pending = afr_matrix_create(priv->child_count, AFR_NUM_CHANGE_LOGS);
+ if (!local->pending)
+ goto out;
+
+ local->pending[empty_index][idx] = hton32(1);
+
+ if ((priv->esh_granular) && (type == AFR_ENTRY_TRANSACTION))
+ local->pending[empty_index][d_idx] = hton32(1);
+
+ local->xdata_req = dict_new();
+ if (!local->xdata_req)
+ goto out;
+
+ ret = dict_set_nstrn(local->xdata_req, "replicate-brick-op",
+ SLEN("replicate-brick-op"), op_type, op_type_len);
+ if (ret)
+ goto out;
+
+ local->xattr_req = dict_new();
+ if (!local->xattr_req)
+ goto out;
+
+ ret = afr_set_pending_dict(priv, local->xattr_req, local->pending);
+ if (ret < 0)
+ goto out;
+
+ if (AFR_ENTRY_TRANSACTION == type) {
+ count = afr_selfheal_entrylk(frame, this, loc->inode, this->name, NULL,
+ locked_nodes);
+ } else {
+ count = afr_selfheal_inodelk(frame, this, loc->inode, this->name,
+ LLONG_MAX - 1, 0, locked_nodes);
+ }
+
+ if (!count) {
+ gf_smsg(this->name, GF_LOG_ERROR, EAGAIN, AFR_MSG_REPLACE_BRICK_STATUS,
+ NULL);
+ ret = -EAGAIN;
+ goto unlock;
+ }
+
+ ret = afr_emptyb_set_pending_changelog(frame, this, locked_nodes);
+ if (ret)
+ goto unlock;
+ ret = 0;
+unlock:
+ if (AFR_ENTRY_TRANSACTION == type) {
+ afr_selfheal_unentrylk(frame, this, loc->inode, this->name, NULL,
+ locked_nodes, NULL);
+ } else {
+ afr_selfheal_uninodelk(frame, this, loc->inode, this->name,
+ LLONG_MAX - 1, 0, locked_nodes);
+ }
+out:
+ return ret;
+}
- local->call_count = call_count;
+void
+afr_brick_args_cleanup(void *opaque)
+{
+ afr_empty_brick_args_t *data = NULL;
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_setattr_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->setattr,
- &local->loc,
- &local->cont.setattr.in_buf,
- local->cont.setattr.valid,
- NULL);
-
- if (!--call_count)
- break;
- }
- }
+ data = opaque;
+ loc_wipe(&data->loc);
+ GF_FREE(data);
+}
- return 0;
+int
+_afr_handle_empty_brick_cbk(int ret, call_frame_t *frame, void *opaque)
+{
+ afr_brick_args_cleanup(opaque);
+ return 0;
}
+int
+_afr_handle_empty_brick(void *opaque)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int empty_index = -1;
+ int ret = -1;
+ int op_errno = ENOMEM;
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+ char *op_type = NULL;
+ int op_type_len = 0;
+ afr_empty_brick_args_t *data = NULL;
+ call_frame_t *op_frame = NULL;
+
+ data = opaque;
+ frame = data->frame;
+ empty_index = data->empty_index;
+ if (!data->op_type)
+ goto out;
+
+ op_frame = copy_frame(frame);
+ if (!op_frame) {
+ ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ op_type = data->op_type;
+ op_type_len = strlen(op_type);
+ this = op_frame->this;
+ priv = this->private;
+
+ afr_set_lk_owner(op_frame, this, op_frame->root);
+ local = AFR_FRAME_INIT(op_frame, op_errno);
+ if (!local)
+ goto out;
+
+ loc_copy(&local->loc, &data->loc);
+
+ gf_smsg(this->name, GF_LOG_INFO, 0, AFR_MSG_NEW_BRICK, "name=%s",
+ priv->children[empty_index]->name, NULL);
+
+ ret = _afr_handle_empty_brick_type(this, op_frame, &local->loc, empty_index,
+ AFR_METADATA_TRANSACTION, op_type,
+ op_type_len);
+ if (ret) {
+ op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ dict_unref(local->xdata_req);
+ dict_unref(local->xattr_req);
+ afr_matrix_cleanup(local->pending, priv->child_count);
+ local->pending = NULL;
+ local->xattr_req = NULL;
+ local->xdata_req = NULL;
+
+ ret = _afr_handle_empty_brick_type(this, op_frame, &local->loc, empty_index,
+ AFR_ENTRY_TRANSACTION, op_type,
+ op_type_len);
+ if (ret) {
+ op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+out:
+ if (op_frame) {
+ AFR_STACK_DESTROY(op_frame);
+ }
+ AFR_STACK_UNWIND(setxattr, frame, ret, op_errno, NULL);
+ return 0;
+}
int
-afr_setattr_done (call_frame_t *frame, xlator_t *this)
+afr_split_brain_resolve_do(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ char *data)
{
- afr_local_t *local = NULL;
+ afr_local_t *local = NULL;
+ int ret = -1;
+ int op_errno = EINVAL;
+
+ local = frame->local;
+ local->xdata_req = dict_new();
+
+ if (!local->xdata_req) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ ret = dict_set_int32_sizen(local->xdata_req, "heal-op",
+ GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK);
+ if (ret) {
+ op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_str_sizen(local->xdata_req, "child-name", data);
+ if (ret) {
+ op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+ /* set spb choice to -1 whether heal succeeds or not:
+ * If heal succeeds : spb-choice should be set to -1 as
+ * it is no longer valid; file is not
+ * in split-brain anymore.
+ * If heal doesn't succeed:
+ * spb-choice should be set to -1
+ * otherwise reads will be served
+ * from spb-choice which is misleading.
+ */
+ ret = afr_inode_split_brain_choice_set(loc->inode, this, -1);
+ if (ret)
+ gf_smsg(this->name, GF_LOG_WARNING, 0, AFR_MSG_SPLIT_BRAIN_SET_FAILED,
+ NULL);
+ afr_heal_splitbrain_file(frame, this, loc);
+ ret = 0;
+out:
+ if (ret < 0)
+ AFR_STACK_UNWIND(setxattr, frame, -1, op_errno, NULL);
+ return 0;
+}
- local = frame->local;
+int
+afr_get_split_brain_child_index(xlator_t *this, void *value, size_t len)
+{
+ int spb_child_index = -1;
+ char *spb_child_str = NULL;
- local->transaction.unwind (frame, this);
+ spb_child_str = alloca0(len + 1);
+ memcpy(spb_child_str, value, len);
- AFR_STACK_DESTROY (frame);
+ if (!strcmp(spb_child_str, "none"))
+ return -2;
- return 0;
+ spb_child_index = afr_get_child_index_from_name(this, spb_child_str);
+ if (spb_child_index < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_SUBVOL,
+ "subvol=%s", spb_child_str, NULL);
+ }
+ return spb_child_index;
}
-
int
-afr_setattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, struct iatt *buf, int32_t valid, dict_t *xdata)
+afr_can_set_split_brain_choice(void *opaque)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t *transaction_frame = NULL;
- int ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- QUORUM_CHECK(setattr,out);
+ afr_spbc_timeout_t *data = opaque;
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+ loc_t *loc = NULL;
+ int ret = -1;
+
+ frame = data->frame;
+ loc = data->loc;
+ this = frame->this;
+
+ ret = afr_is_split_brain(frame, this, loc->inode, loc->gfid, &data->d_spb,
+ &data->m_spb);
+
+ if (ret)
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ AFR_MSG_SPLIT_BRAIN_DETERMINE_FAILED, "gfid=%s",
+ uuid_utoa(loc->gfid), NULL);
+ return ret;
+}
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- op_errno = ENOMEM;
+int
+afr_handle_split_brain_commands(xlator_t *this, call_frame_t *frame, loc_t *loc,
+ dict_t *dict)
+{
+ void *choice_value = NULL;
+ void *resolve_value = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_spbc_timeout_t *data = NULL;
+ int len = 0;
+ int spb_child_index = -1;
+ int ret = -1;
+ int op_errno = EINVAL;
+
+ priv = this->private;
+
+ ret = dict_get_ptr_and_len(dict, GF_AFR_SBRAIN_CHOICE, &choice_value, &len);
+ ret = dict_get_ptr_and_len(dict, GF_AFR_SBRAIN_RESOLVE, &resolve_value,
+ &len);
+ if (!choice_value && !resolve_value) {
+ ret = -1;
+ goto out;
+ }
+
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local) {
+ ret = 1;
+ goto out;
+ }
+
+ local->op = GF_FOP_SETXATTR;
+
+ if (choice_value) {
+ spb_child_index = afr_get_split_brain_child_index(this, choice_value,
+ len);
+ if (spb_child_index < 0) {
+ /* Case where value was "none" */
+ if (spb_child_index == -2)
+ spb_child_index = -1;
+ else {
+ ret = 1;
+ op_errno = EINVAL;
goto out;
+ }
}
- AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
- local = transaction_frame->local;
-
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
-
- local->cont.setattr.in_buf = *buf;
- local->cont.setattr.valid = valid;
-
- local->transaction.fop = afr_setattr_wind;
- local->transaction.done = afr_setattr_done;
- local->transaction.unwind = afr_setattr_unwind;
-
- loc_copy (&local->loc, loc);
-
- local->transaction.main_frame = frame;
- local->transaction.start = LLONG_MAX - 1;
- local->transaction.len = 0;
+ data = GF_CALLOC(1, sizeof(*data), gf_afr_mt_spbc_timeout_t);
+ if (!data) {
+ ret = 1;
+ goto out;
+ }
+ data->spb_child_index = spb_child_index;
+ data->frame = frame;
+ loc_copy(&local->loc, loc);
+ data->loc = &local->loc;
+ ret = synctask_new(this->ctx->env, afr_can_set_split_brain_choice,
+ afr_set_split_brain_choice, NULL, data);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN_STATUS,
+ "name=%s", loc->name, NULL);
+ ret = 1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+ ret = 0;
+ goto out;
+ }
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (resolve_value) {
+ spb_child_index = afr_get_split_brain_child_index(this, resolve_value,
+ len);
+ if (spb_child_index < 0) {
+ ret = 1;
+ goto out;
+ }
+ afr_split_brain_resolve_do(frame, this, loc,
+ priv->children[spb_child_index]->name);
ret = 0;
+ }
out:
- if (ret < 0) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL, NULL);
- }
-
- return 0;
+ /* key was correct but value was invalid when ret == 1 */
+ if (ret == 1) {
+ AFR_STACK_UNWIND(setxattr, frame, -1, op_errno, NULL);
+ if (data)
+ GF_FREE(data);
+ ret = 0;
+ }
+ return ret;
}
-/* {{{ fsetattr */
-
int
-afr_fsetattr_unwind (call_frame_t *frame, xlator_t *this)
+afr_handle_spb_choice_timeout(xlator_t *this, call_frame_t *frame, dict_t *dict)
{
- afr_local_t * local = NULL;
- call_frame_t *main_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
+ uint64_t timeout = 0;
+ afr_private_t *priv = NULL;
- local = frame->local;
+ priv = this->private;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- AFR_STACK_UNWIND (fsetattr, main_frame, local->op_ret,
- local->op_errno,
- &local->cont.fsetattr.preop_buf,
- &local->cont.fsetattr.postop_buf,
- NULL);
- }
+ ret = dict_get_uint64(dict, GF_AFR_SPB_CHOICE_TIMEOUT, &timeout);
+ if (!ret) {
+ priv->spb_choice_timeout = timeout * 60;
+ AFR_STACK_UNWIND(setxattr, frame, ret, op_errno, NULL);
+ }
- return 0;
+ return ret;
}
-
int
-afr_fsetattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop, dict_t *xdata)
+afr_handle_empty_brick(xlator_t *this, call_frame_t *frame, loc_t *loc,
+ dict_t *dict)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- int child_index = (long) cookie;
- int read_child = 0;
- int call_count = -1;
- int need_unwind = 0;
+ int ret = -1;
+ int ab_ret = -1;
+ int empty_index = -1;
+ int op_errno = EPERM;
+ char *empty_brick = NULL;
+ char *op_type = NULL;
+ afr_empty_brick_args_t *data = NULL;
+
+ ret = dict_get_str_sizen(dict, GF_AFR_REPLACE_BRICK, &empty_brick);
+ if (!ret)
+ op_type = GF_AFR_REPLACE_BRICK;
+
+ ab_ret = dict_get_str_sizen(dict, GF_AFR_ADD_BRICK, &empty_brick);
+ if (!ab_ret)
+ op_type = GF_AFR_ADD_BRICK;
+
+ if (ret && ab_ret)
+ goto out;
+
+ if (frame->root->pid != GF_CLIENT_PID_ADD_REPLICA_MOUNT) {
+ gf_smsg(this->name, GF_LOG_ERROR, EPERM, AFR_MSG_INTERNAL_ATTR,
+ "op_type=%s", op_type, NULL);
+ ret = 1;
+ goto out;
+ }
+ empty_index = afr_get_child_index_from_name(this, empty_brick);
+
+ if (empty_index < 0) {
+ /* Didn't belong to this replica pair
+ * Just do a no-op
+ */
+ AFR_STACK_UNWIND(setxattr, frame, 0, 0, NULL);
+ return 0;
+ } else {
+ data = GF_CALLOC(1, sizeof(*data), gf_afr_mt_empty_brick_t);
+ if (!data) {
+ ret = 1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+ data->frame = frame;
+ loc_copy(&data->loc, loc);
+ data->empty_index = empty_index;
+ data->op_type = op_type;
+ ret = synctask_new(this->ctx->env, _afr_handle_empty_brick,
+ _afr_handle_empty_brick_cbk, NULL, data);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN_STATUS,
+ NULL);
+ ret = 1;
+ op_errno = ENOMEM;
+ afr_brick_args_cleanup(data);
+ goto out;
+ }
+ }
+ ret = 0;
+out:
+ if (ret == 1) {
+ AFR_STACK_UNWIND(setxattr, frame, -1, op_errno, NULL);
+ ret = 0;
+ }
+ return ret;
+}
- local = frame->local;
- priv = this->private;
+static int
+afr_handle_special_xattr(xlator_t *this, call_frame_t *frame, loc_t *loc,
+ dict_t *dict)
+{
+ int ret = -1;
- read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
+ ret = afr_handle_split_brain_commands(this, frame, loc, dict);
+ if (ret == 0)
+ goto out;
- LOCK (&frame->lock);
- {
- if (child_index == read_child) {
- local->read_child_returned = _gf_true;
- }
+ ret = afr_handle_spb_choice_timeout(this, frame, dict);
+ if (ret == 0)
+ goto out;
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
+ /* Applicable for replace-brick and add-brick commands */
+ ret = afr_handle_empty_brick(this, frame, loc, dict);
+out:
+ return ret;
+}
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.fsetattr.preop_buf = *preop;
- local->cont.fsetattr.postop_buf = *postop;
- }
+int
+afr_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = EINVAL;
- if (child_index == read_child) {
- local->cont.fsetattr.preop_buf = *preop;
- local->cont.fsetattr.postop_buf = *postop;
- }
+ GF_IF_INTERNAL_XATTR_GOTO("trusted.afr.*", dict, op_errno, out);
- local->success_count++;
+ GF_IF_INTERNAL_XATTR_GOTO("trusted.glusterfs.afr.*", dict, op_errno, out);
- if ((local->success_count >= priv->wait_count)
- && local->read_child_returned) {
- need_unwind = 1;
- }
- }
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ ret = afr_handle_special_xattr(this, frame, loc, dict);
+ if (ret == 0)
+ return 0;
- if (need_unwind)
- local->transaction.unwind (frame, this);
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
- call_count = afr_frame_return (frame);
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
+ local->cont.setxattr.dict = dict_ref(dict);
+ local->cont.setxattr.flags = flags;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
- return 0;
-}
+ if (!local->xdata_req)
+ goto out;
+ local->transaction.wind = afr_setxattr_wind;
+ local->transaction.unwind = afr_setxattr_unwind;
-int32_t
-afr_fsetattr_wind (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
+ loc_copy(&local->loc, loc);
+ ret = afr_set_inode_local(this, local, loc->inode);
+ if (ret)
+ goto out;
- local = frame->local;
- priv = this->private;
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
+ local->op = GF_FOP_SETXATTR;
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
+ ret = afr_transaction(transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- local->call_count = call_count;
+ return 0;
+out:
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_fsetattr_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fsetattr,
- local->fd,
- &local->cont.fsetattr.in_buf,
- local->cont.fsetattr.valid,
- NULL);
-
- if (!--call_count)
- break;
- }
- }
+ AFR_STACK_UNWIND(setxattr, frame, -1, op_errno, NULL);
- return 0;
+ return 0;
}
+/* {{{ fsetxattr */
int
-afr_fsetattr_done (call_frame_t *frame, xlator_t *this)
+afr_fsetxattr_unwind(call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
- local = frame->local;
+ local = frame->local;
- local->transaction.unwind (frame, this);
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
+ return 0;
- AFR_STACK_DESTROY (frame);
+ AFR_STACK_UNWIND(fsetxattr, main_frame, local->op_ret, local->op_errno,
+ local->xdata_rsp);
+ return 0;
+}
- return 0;
+int
+afr_fsetxattr_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, NULL,
+ NULL, NULL, xdata);
}
int
-afr_fsetattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, struct iatt *buf, int32_t valid, dict_t *xdata)
+afr_fsetxattr_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t *transaction_frame = NULL;
- int ret = -1;
- int op_errno = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ STACK_WIND_COOKIE(frame, afr_fsetxattr_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->fsetxattr, local->fd,
+ local->cont.fsetxattr.dict, local->cont.fsetxattr.flags,
+ local->xdata_req);
+ return 0;
+}
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+int
+afr_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- priv = this->private;
+ GF_IF_INTERNAL_XATTR_GOTO("trusted.afr.*", dict, op_errno, out);
- QUORUM_CHECK(fsetattr,out);
+ GF_IF_INTERNAL_XATTR_GOTO("trusted.glusterfs.afr.*", dict, op_errno, out);
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- op_errno = ENOMEM;
- goto out;
- }
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
- AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
- local = transaction_frame->local;
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ local->cont.fsetxattr.dict = dict_ref(dict);
+ local->cont.fsetxattr.flags = flags;
- local->cont.fsetattr.in_buf = *buf;
- local->cont.fsetattr.valid = valid;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
- local->transaction.fop = afr_fsetattr_wind;
- local->transaction.done = afr_fsetattr_done;
- local->transaction.unwind = afr_fsetattr_unwind;
+ if (!local->xdata_req)
+ goto out;
- local->fd = fd_ref (fd);
+ local->transaction.wind = afr_fsetxattr_wind;
+ local->transaction.unwind = afr_fsetxattr_unwind;
- ret = afr_open_fd_fix (transaction_frame, this, _gf_false);
- if (ret) {
- op_errno = -ret;
- goto out;
- }
+ local->fd = fd_ref(fd);
+ ret = afr_set_inode_local(this, local, fd->inode);
+ if (ret)
+ goto out;
- local->transaction.main_frame = frame;
- local->transaction.start = LLONG_MAX - 1;
- local->transaction.len = 0;
+ local->op = GF_FOP_FSETXATTR;
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
- ret = 0;
+ ret = afr_transaction(transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ return 0;
out:
- if (ret < 0) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (fsetattr, frame, -1, op_errno, NULL, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- return 0;
+ AFR_STACK_UNWIND(fsetxattr, frame, -1, op_errno, NULL);
+ return 0;
}
+/* }}} */
-/* {{{ setxattr */
-
+/* {{{ removexattr */
int
-afr_setxattr_unwind (call_frame_t *frame, xlator_t *this)
+afr_removexattr_unwind(call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- call_frame_t *main_frame = NULL;
-
- local = frame->local;
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
+ local = frame->local;
- if (main_frame) {
- AFR_STACK_UNWIND (setxattr, main_frame,
- local->op_ret, local->op_errno,
- NULL);
- }
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
-}
-
-int
-afr_setxattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int need_unwind = 0;
-
- local = frame->local;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- }
- local->success_count++;
-
- if (local->success_count == priv->child_count) {
- need_unwind = 1;
- }
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- if (need_unwind)
- local->transaction.unwind (frame, this);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ AFR_STACK_UNWIND(removexattr, main_frame, local->op_ret, local->op_errno,
+ local->xdata_rsp);
+ return 0;
}
-
int
-afr_setxattr_wind (call_frame_t *frame, xlator_t *this)
+afr_removexattr_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_setxattr_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->setxattr,
- &local->loc,
- local->cont.setxattr.dict,
- local->cont.setxattr.flags,
- NULL);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+ return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, NULL,
+ NULL, NULL, xdata);
}
-
int
-afr_setxattr_done (call_frame_t *frame, xlator_t *this)
+afr_removexattr_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t *local = frame->local;
-
- local->transaction.unwind (frame, this);
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- AFR_STACK_DESTROY (frame);
+ local = frame->local;
+ priv = this->private;
- return 0;
+ STACK_WIND_COOKIE(frame, afr_removexattr_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->removexattr, &local->loc,
+ local->cont.removexattr.name, local->xdata_req);
+ return 0;
}
int
-afr_setxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *dict, int32_t flags, dict_t *xdata)
+afr_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- call_frame_t *transaction_frame = NULL;
- data_pair_t *trav = NULL;
- int ret = -1;
- int op_errno = EINVAL;
-
- VALIDATE_OR_GOTO (this, out);
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- GF_IF_INTERNAL_XATTR_GOTO ("trusted.afr.*", dict,
- trav, op_errno, out);
+ GF_IF_NATIVE_XATTR_GOTO("trusted.afr.*", name, op_errno, out);
- GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.afr.*", dict,
- trav, op_errno, out);
+ GF_IF_NATIVE_XATTR_GOTO("trusted.glusterfs.afr.*", name, op_errno, out);
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this->private, out);
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
- priv = this->private;
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
- QUORUM_CHECK(setxattr,out);
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- op_errno = ENOMEM;
- goto out;
- }
+ local->cont.removexattr.name = gf_strdup(name);
- AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
- local = transaction_frame->local;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ if (!local->xdata_req)
+ goto out;
- local->cont.setxattr.dict = dict_ref (dict);
- local->cont.setxattr.flags = flags;
+ local->transaction.wind = afr_removexattr_wind;
+ local->transaction.unwind = afr_removexattr_unwind;
- local->transaction.fop = afr_setxattr_wind;
- local->transaction.done = afr_setxattr_done;
- local->transaction.unwind = afr_setxattr_unwind;
+ loc_copy(&local->loc, loc);
+ ret = afr_set_inode_local(this, local, loc->inode);
+ if (ret)
+ goto out;
- loc_copy (&local->loc, loc);
+ local->op = GF_FOP_REMOVEXATTR;
- local->transaction.main_frame = frame;
- local->transaction.start = LLONG_MAX - 1;
- local->transaction.len = 0;
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ ret = afr_transaction(transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- ret = 0;
+ return 0;
out:
- if (ret < 0) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- return 0;
+ AFR_STACK_UNWIND(removexattr, frame, -1, op_errno, NULL);
+ return 0;
}
-/* {{{ fsetxattr */
-
-
+/* ffremovexattr */
int
-afr_fsetxattr_unwind (call_frame_t *frame, xlator_t *this)
+afr_fremovexattr_unwind(call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
- local = frame->local;
+ local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- AFR_STACK_UNWIND (fsetxattr, main_frame,
- local->op_ret, local->op_errno,
- NULL);
- }
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
+
+ AFR_STACK_UNWIND(fremovexattr, main_frame, local->op_ret, local->op_errno,
+ local->xdata_rsp);
+ return 0;
}
+int
+afr_fremovexattr_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, NULL,
+ NULL, NULL, xdata);
+}
int
-afr_fsetxattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+afr_fremovexattr_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int need_unwind = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- local = frame->local;
- priv = this->private;
+ local = frame->local;
+ priv = this->private;
- LOCK (&frame->lock);
- {
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- }
- local->success_count++;
+ STACK_WIND_COOKIE(frame, afr_fremovexattr_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->fremovexattr, local->fd,
+ local->cont.removexattr.name, local->xdata_req);
+ return 0;
+}
- if (local->success_count == priv->child_count) {
- need_unwind = 1;
- }
- }
+int
+afr_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ GF_IF_NATIVE_XATTR_GOTO("trusted.afr.*", name, op_errno, out);
- if (need_unwind)
- local->transaction.unwind (frame, this);
+ GF_IF_NATIVE_XATTR_GOTO("trusted.glusterfs.afr.*", name, op_errno, out);
- call_count = afr_frame_return (frame);
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
- return 0;
-}
+ local->cont.removexattr.name = gf_strdup(name);
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
+ if (!local->xdata_req)
+ goto out;
-int
-afr_fsetxattr_wind (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
+ local->transaction.wind = afr_fremovexattr_wind;
+ local->transaction.unwind = afr_fremovexattr_unwind;
- local = frame->local;
- priv = this->private;
+ local->fd = fd_ref(fd);
+ ret = afr_set_inode_local(this, local, fd->inode);
+ if (ret)
+ goto out;
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
+ local->op = GF_FOP_FREMOVEXATTR;
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
- local->call_count = call_count;
+ ret = afr_transaction(transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_fsetxattr_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fsetxattr,
- local->fd,
- local->cont.fsetxattr.dict,
- local->cont.fsetxattr.flags,
- NULL);
-
- if (!--call_count)
- break;
- }
- }
+ return 0;
+out:
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- return 0;
-}
+ AFR_STACK_UNWIND(fremovexattr, frame, -1, op_errno, NULL);
+ return 0;
+}
int
-afr_fsetxattr_done (call_frame_t *frame, xlator_t *this)
+afr_fallocate_unwind(call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = frame->local;
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
+ local = frame->local;
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
+
+ AFR_STACK_UNWIND(fallocate, main_frame, local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf, local->xdata_rsp);
+ return 0;
}
int
-afr_fsetxattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, dict_t *dict, int32_t flags, dict_t *xdata)
+afr_fallocate_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- call_frame_t *transaction_frame = NULL;
- int ret = -1;
- int op_errno = EINVAL;
- data_pair_t *trav = NULL;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- GF_IF_INTERNAL_XATTR_GOTO ("trusted.afr.*", dict,
- trav, op_errno, out);
+ return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, prebuf,
+ postbuf, NULL, xdata);
+}
- GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.afr.*", dict,
- trav, op_errno, out);
+int
+afr_fallocate_wind(call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ STACK_WIND_COOKIE(frame, afr_fallocate_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->fallocate, local->fd,
+ local->cont.fallocate.mode, local->cont.fallocate.offset,
+ local->cont.fallocate.len, local->xdata_req);
+ return 0;
+}
- if (ret)
- goto out;
+int
+afr_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ call_frame_t *transaction_frame = NULL;
+ afr_local_t *local = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- priv = this->private;
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
- QUORUM_CHECK(fsetxattr,out);
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
- AFR_LOCAL_ALLOC_OR_GOTO (local, out);
+ local->cont.fallocate.mode = mode;
+ local->cont.fallocate.offset = offset;
+ local->cont.fallocate.len = len;
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ local->fd = fd_ref(fd);
+ ret = afr_set_inode_local(this, local, fd->inode);
+ if (ret)
+ goto out;
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- goto out;
- }
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
- transaction_frame->local = local;
+ if (!local->xdata_req)
+ goto out;
- local->op_ret = -1;
+ local->op = GF_FOP_FALLOCATE;
- local->cont.fsetxattr.dict = dict_ref (dict);
- local->cont.fsetxattr.flags = flags;
+ local->transaction.wind = afr_fallocate_wind;
+ local->transaction.unwind = afr_fallocate_unwind;
- local->transaction.fop = afr_fsetxattr_wind;
- local->transaction.done = afr_fsetxattr_done;
- local->transaction.unwind = afr_fsetxattr_unwind;
+ local->transaction.main_frame = frame;
- local->fd = fd_ref (fd);
+ local->transaction.start = local->cont.fallocate.offset;
+ local->transaction.len = 0;
- local->transaction.main_frame = frame;
- local->transaction.start = LLONG_MAX - 1;
- local->transaction.len = 0;
+ afr_fix_open(fd, this);
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ ret = afr_transaction(transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- ret = 0;
+ return 0;
out:
- if (ret < 0) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- return 0;
+ AFR_STACK_UNWIND(fallocate, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
/* }}} */
-
-/* {{{ removexattr */
-
+/* {{{ discard */
int
-afr_removexattr_unwind (call_frame_t *frame, xlator_t *this)
+afr_discard_unwind(call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- call_frame_t *main_frame = NULL;
-
- local = frame->local;
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
+ local = frame->local;
- if (main_frame) {
- AFR_STACK_UNWIND (removexattr, main_frame,
- local->op_ret, local->op_errno,
- NULL);
- }
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
-}
+ AFR_STACK_UNWIND(discard, main_frame, local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf, local->xdata_rsp);
+ return 0;
+}
int
-afr_removexattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+afr_discard_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- int call_count = -1;
- int need_unwind = 0;
+ return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, prebuf,
+ postbuf, NULL, xdata);
+}
- local = frame->local;
- priv = this->private;
+int
+afr_discard_wind(call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ STACK_WIND_COOKIE(frame, afr_discard_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->discard, local->fd,
+ local->cont.discard.offset, local->cont.discard.len,
+ local->xdata_req);
+ return 0;
+}
- LOCK (&frame->lock);
- {
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- }
- local->success_count++;
+int
+afr_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- if (local->success_count == priv->wait_count) {
- need_unwind = 1;
- }
- }
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
- if (need_unwind)
- local->transaction.unwind (frame, this);
+ local->cont.discard.offset = offset;
+ local->cont.discard.len = len;
- call_count = afr_frame_return (frame);
+ local->fd = fd_ref(fd);
+ ret = afr_set_inode_local(this, local, fd->inode);
+ if (ret)
+ goto out;
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
- return 0;
-}
+ if (!local->xdata_req)
+ goto out;
+ local->op = GF_FOP_DISCARD;
-int32_t
-afr_removexattr_wind (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
+ local->transaction.wind = afr_discard_wind;
+ local->transaction.unwind = afr_discard_unwind;
- local = frame->local;
- priv = this->private;
+ local->transaction.main_frame = frame;
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
+ local->transaction.start = local->cont.discard.offset;
+ local->transaction.len = 0;
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
+ afr_fix_open(fd, this);
- local->call_count = call_count;
+ ret = afr_transaction(transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_removexattr_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->removexattr,
- &local->loc,
- local->cont.removexattr.name,
- NULL);
-
- if (!--call_count)
- break;
- }
- }
+ return 0;
+out:
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- return 0;
+ AFR_STACK_UNWIND(discard, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
+/* {{{ zerofill */
int
-afr_removexattr_done (call_frame_t *frame, xlator_t *this)
+afr_zerofill_unwind(call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = frame->local;
-
- local->transaction.unwind (frame, this);
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
- AFR_STACK_DESTROY (frame);
+ local = frame->local;
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
-}
+ AFR_STACK_UNWIND(discard, main_frame, local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf, local->xdata_rsp);
+ return 0;
+}
int
-afr_removexattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name, dict_t *xdata)
+afr_zerofill_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- call_frame_t *transaction_frame = NULL;
- int ret = -1;
- int op_errno = 0;
+ return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, prebuf,
+ postbuf, NULL, xdata);
+}
- VALIDATE_OR_GOTO (this, out);
+int
+afr_zerofill_wind(call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ STACK_WIND_COOKIE(frame, afr_zerofill_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->zerofill, local->fd,
+ local->cont.zerofill.offset, local->cont.zerofill.len,
+ local->xdata_req);
+ return 0;
+}
- GF_IF_NATIVE_XATTR_GOTO ("trusted.afr.*",
- name, op_errno, out);
+int
+afr_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- GF_IF_NATIVE_XATTR_GOTO ("trusted.glusterfs.afr.*",
- name, op_errno, out);
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (loc, out);
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
- priv = this->private;
+ local->cont.zerofill.offset = offset;
+ local->cont.zerofill.len = len;
- QUORUM_CHECK(removexattr,out);
+ local->fd = fd_ref(fd);
+ ret = afr_set_inode_local(this, local, fd->inode);
+ if (ret)
+ goto out;
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- op_errno = ENOMEM;
- goto out;
- }
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
- AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
- local = transaction_frame->local;
+ if (!local->xdata_req)
+ goto out;
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ local->op = GF_FOP_ZEROFILL;
- local->cont.removexattr.name = gf_strdup (name);
+ local->transaction.wind = afr_zerofill_wind;
+ local->transaction.unwind = afr_zerofill_unwind;
- local->transaction.fop = afr_removexattr_wind;
- local->transaction.done = afr_removexattr_done;
- local->transaction.unwind = afr_removexattr_unwind;
+ local->transaction.main_frame = frame;
- loc_copy (&local->loc, loc);
+ local->transaction.start = local->cont.zerofill.offset;
+ local->transaction.len = len;
- local->transaction.main_frame = frame;
- local->transaction.start = LLONG_MAX - 1;
- local->transaction.len = 0;
+ afr_fix_open(fd, this);
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ ret = afr_transaction(transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- ret = 0;
+ return 0;
out:
- if (ret < 0) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- return 0;
+ AFR_STACK_UNWIND(zerofill, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
-/* ffremovexattr */
-int
-afr_fremovexattr_unwind (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t * local = NULL;
- call_frame_t *main_frame = NULL;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
+/* }}} */
- if (main_frame) {
- AFR_STACK_UNWIND (fremovexattr, main_frame,
- local->op_ret, local->op_errno,
- NULL);
- }
- return 0;
+int32_t
+afr_xattrop_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr,
+ dict_t *xdata)
+{
+ return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, NULL,
+ NULL, xattr, xdata);
}
-
int
-afr_fremovexattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+afr_xattrop_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- int call_count = -1;
- int need_unwind = 0;
-
- local = frame->local;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- }
- local->success_count++;
-
- if (local->success_count == priv->wait_count) {
- need_unwind = 1;
- }
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- if (need_unwind)
- local->transaction.unwind (frame, this);
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ STACK_WIND_COOKIE(frame, afr_xattrop_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->xattrop, &local->loc,
+ local->cont.xattrop.optype, local->cont.xattrop.xattr,
+ local->xdata_req);
+ return 0;
+}
- call_count = afr_frame_return (frame);
+int
+afr_xattrop_unwind(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
+ local = frame->local;
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
-}
+ AFR_STACK_UNWIND(xattrop, main_frame, local->op_ret, local->op_errno,
+ local->xattr_rsp, local->xdata_rsp);
+ return 0;
+}
int32_t
-afr_fremovexattr_wind (call_frame_t *frame, xlator_t *this)
+afr_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
- local->call_count = call_count;
+ local->cont.xattrop.xattr = dict_ref(xattr);
+ local->cont.xattrop.optype = optype;
+ if (xdata)
+ local->xdata_req = dict_ref(xdata);
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_fremovexattr_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fremovexattr,
- local->fd,
- local->cont.removexattr.name,
- NULL);
-
- if (!--call_count)
- break;
- }
- }
+ local->transaction.wind = afr_xattrop_wind;
+ local->transaction.unwind = afr_xattrop_unwind;
- return 0;
-}
+ loc_copy(&local->loc, loc);
+ ret = afr_set_inode_local(this, local, loc->inode);
+ if (ret)
+ goto out;
+ local->op = GF_FOP_XATTROP;
-int
-afr_fremovexattr_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t * local = frame->local;
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
- local->transaction.unwind (frame, this);
+ ret = afr_transaction(transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- AFR_STACK_DESTROY (frame);
+ return 0;
+out:
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- return 0;
+ AFR_STACK_UNWIND(xattrop, frame, -1, op_errno, NULL, NULL);
+ return 0;
}
+int32_t
+afr_fxattrop_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr,
+ dict_t *xdata)
+{
+ return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, NULL,
+ NULL, xattr, xdata);
+}
int
-afr_fremovexattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, const char *name, dict_t *xdata)
+afr_fxattrop_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t *transaction_frame = NULL;
- int ret = -1;
- int op_ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (this, out);
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ STACK_WIND_COOKIE(frame, afr_fxattrop_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->fxattrop, local->fd,
+ local->cont.xattrop.optype, local->cont.xattrop.xattr,
+ local->xdata_req);
+ return 0;
+}
- GF_IF_NATIVE_XATTR_GOTO ("trusted.afr.*",
- name, op_errno, out);
+int
+afr_fxattrop_unwind(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
- GF_IF_NATIVE_XATTR_GOTO ("trusted.glusterfs.afr.*",
- name, op_errno, out);
+ local = frame->local;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this->private, out);
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
+ return 0;
- priv = this->private;
+ AFR_STACK_UNWIND(fxattrop, main_frame, local->op_ret, local->op_errno,
+ local->xattr_rsp, local->xdata_rsp);
+ return 0;
+}
- QUORUM_CHECK(fremovexattr, out);
+int32_t
+afr_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
+
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
+
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
+
+ local->cont.xattrop.xattr = dict_ref(xattr);
+ local->cont.xattrop.optype = optype;
+ if (xdata)
+ local->xdata_req = dict_ref(xdata);
+
+ local->transaction.wind = afr_fxattrop_wind;
+ local->transaction.unwind = afr_fxattrop_unwind;
+
+ local->fd = fd_ref(fd);
+ ret = afr_set_inode_local(this, local, fd->inode);
+ if (ret)
+ goto out;
+
+ local->op = GF_FOP_FXATTROP;
+
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
+
+ ret = afr_transaction(transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ return 0;
+out:
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- goto out;
- }
+ AFR_STACK_UNWIND(fxattrop, frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
- AFR_LOCAL_ALLOC_OR_GOTO (local, out);
+int
+afr_fsync_unwind(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ local = frame->local;
- transaction_frame->local = local;
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
+ return 0;
- local->op_ret = -1;
+ AFR_STACK_UNWIND(fsync, main_frame, local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf, local->xdata_rsp);
- local->cont.removexattr.name = gf_strdup (name);
+ return 0;
+}
- local->transaction.fop = afr_fremovexattr_wind;
- local->transaction.done = afr_fremovexattr_done;
- local->transaction.unwind = afr_fremovexattr_unwind;
+int
+afr_fsync_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, prebuf,
+ postbuf, NULL, xdata);
+}
- local->fd = fd_ref (fd);
+int
+afr_fsync_wind(call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- local->transaction.main_frame = frame;
- local->transaction.start = LLONG_MAX - 1;
- local->transaction.len = 0;
+ local = frame->local;
+ priv = this->private;
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ STACK_WIND_COOKIE(frame, afr_fsync_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->fsync, local->fd,
+ local->cont.fsync.datasync, local->xdata_req);
+ return 0;
+}
- op_ret = 0;
+int
+afr_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int32_t op_errno = ENOMEM;
+ int8_t last_fsync = 0;
+
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
+
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
+
+ if (xdata) {
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ if (dict_get_int8(xdata, "last-fsync", &last_fsync) == 0) {
+ if (last_fsync) {
+ local->transaction.disable_delayed_post_op = _gf_true;
+ }
+ }
+ } else {
+ local->xdata_req = dict_new();
+ }
+
+ if (!local->xdata_req)
+ goto out;
+
+ local->fd = fd_ref(fd);
+ ret = afr_set_inode_local(this, local, fd->inode);
+ if (ret)
+ goto out;
+
+ local->op = GF_FOP_FSYNC;
+ local->cont.fsync.datasync = datasync;
+
+ if (afr_fd_has_witnessed_unstable_write(this, fd->inode)) {
+ /* don't care. we only wanted to CLEAR the bit */
+ }
+
+ local->transaction.wind = afr_fsync_wind;
+ local->transaction.unwind = afr_fsync_unwind;
+
+ local->transaction.main_frame = frame;
+
+ ret = afr_transaction(transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ return 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- return 0;
+ AFR_STACK_UNWIND(fsync, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
}
diff --git a/xlators/cluster/afr/src/afr-inode-write.h b/xlators/cluster/afr/src/afr-inode-write.h
index ed11079fd1b..a787069b7a1 100644
--- a/xlators/cluster/afr/src/afr-inode-write.h
+++ b/xlators/cluster/afr/src/afr-inode-write.h
@@ -12,60 +12,83 @@
#define __INODE_WRITE_H__
int32_t
-afr_chmod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dict_t *xdata);
+afr_chmod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dict_t *xdata);
int32_t
-afr_chown (call_frame_t *frame, xlator_t *this,
- loc_t *loc, uid_t uid, gid_t gid, dict_t *xdata);
+afr_chown(call_frame_t *frame, xlator_t *this, loc_t *loc, uid_t uid, gid_t gid,
+ dict_t *xdata);
int
-afr_fchown (call_frame_t *frame, xlator_t *this,
- fd_t *fd, uid_t uid, gid_t gid, dict_t *xdata);
+afr_fchown(call_frame_t *frame, xlator_t *this, fd_t *fd, uid_t uid, gid_t gid,
+ dict_t *xdata);
int32_t
-afr_fchmod (call_frame_t *frame, xlator_t *this,
- fd_t *fd, mode_t mode, dict_t *xdata);
+afr_fchmod(call_frame_t *frame, xlator_t *this, fd_t *fd, mode_t mode,
+ dict_t *xdata);
int32_t
-afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int32_t count, off_t offset,
- uint32_t flags, struct iobref *iobref, dict_t *xdata);
+afr_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
+ int32_t count, off_t offset, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata);
int32_t
-afr_truncate (call_frame_t *frame, xlator_t *this,
- loc_t *loc, off_t offset, dict_t *xdata);
+afr_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata);
int32_t
-afr_ftruncate (call_frame_t *frame, xlator_t *this,
- fd_t *fd, off_t offset, dict_t *xdata);
+afr_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata);
int32_t
-afr_utimens (call_frame_t *frame, xlator_t *this,
- loc_t *loc, struct timespec tv[2], dict_t *xdata);
+afr_utimens(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct timespec tv[2], dict_t *xdata);
int
-afr_setattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, struct iatt *buf, int32_t valid, dict_t *xdata);
+afr_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *buf,
+ int32_t valid, dict_t *xdata);
int
-afr_fsetattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, struct iatt *buf, int32_t valid, dict_t *xdata);
+afr_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *buf,
+ int32_t valid, dict_t *xdata);
int32_t
-afr_setxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *dict, int32_t flags, dict_t *xdata);
+afr_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata);
int32_t
-afr_fsetxattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, dict_t *dict, int32_t flags, dict_t *xdata);
+afr_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata);
int32_t
-afr_removexattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name, dict_t *xdata);
+afr_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata);
int32_t
-afr_fremovexattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, const char *name, dict_t *xdata);
+afr_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata);
+int
+afr_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata);
+
+int
+afr_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata);
+
+int
+afr_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata);
+
+int32_t
+afr_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata);
+
+int32_t
+afr_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata);
+
+int
+afr_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata);
#endif /* __INODE_WRITE_H__ */
diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c
index 5e61be4d4df..bc8eabe0f43 100644
--- a/xlators/cluster/afr/src/afr-lk-common.c
+++ b/xlators/cluster/afr/src/afr-lk-common.c
@@ -8,2270 +8,784 @@
cases as published by the Free Software Foundation.
*/
-#include "dict.h"
-#include "byte-order.h"
-#include "common-utils.h"
+#include <glusterfs/dict.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/common-utils.h>
#include "afr.h"
#include "afr-transaction.h"
+#include "afr-messages.h"
#include <signal.h>
-
-#define LOCKED_NO 0x0 /* no lock held */
-#define LOCKED_YES 0x1 /* for DATA, METADATA, ENTRY and higher_path */
-#define LOCKED_LOWER 0x2 /* for lower path */
-
-#define AFR_TRACE_INODELK_IN(frame, this, params ...) \
- do { \
- afr_private_t *_priv = this->private; \
- if (!_priv->inodelk_trace) \
- break; \
- afr_trace_inodelk_in (frame, this, params); \
- } while (0);
-
-#define AFR_TRACE_INODELK_OUT(frame, this, params ...) \
- do { \
- afr_private_t *_priv = this->private; \
- if (!_priv->inodelk_trace) \
- break; \
- afr_trace_inodelk_out (frame, this, params); \
- } while (0);
-
-#define AFR_TRACE_ENTRYLK_IN(frame, this, params ...) \
- do { \
- afr_private_t *_priv = this->private; \
- if (!_priv->entrylk_trace) \
- break; \
- afr_trace_entrylk_in (frame, this, params); \
- } while (0);
-
-#define AFR_TRACE_ENTRYLK_OUT(frame, this, params ...) \
- do { \
- afr_private_t *_priv = this->private; \
- if (!_priv->entrylk_trace) \
- break; \
- afr_trace_entrylk_out (frame, this, params); \
- } while (0);
-
-int
-afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index);
-
-static int
-afr_copy_locked_nodes (call_frame_t *frame, xlator_t *this);
-
-static uint64_t afr_lock_number = 1;
-
-static uint64_t
-get_afr_lock_number ()
-{
- return (++afr_lock_number);
-}
-
-int
-afr_set_lock_number (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- int_lock->lock_number = get_afr_lock_number ();
-
- return 0;
-}
+#define LOCKED_NO 0x0 /* no lock held */
+#define LOCKED_YES 0x1 /* for DATA, METADATA, ENTRY and higher_path */
+#define LOCKED_LOWER 0x2 /* for lower path */
void
-afr_set_lk_owner (call_frame_t *frame, xlator_t *this, void *lk_owner)
+afr_lockee_cleanup(afr_lockee_t *lockee)
{
- gf_log (this->name, GF_LOG_TRACE,
- "Setting lk-owner=%llu",
- (unsigned long long) (unsigned long)lk_owner);
+ if (lockee->fd) {
+ fd_unref(lockee->fd);
+ lockee->fd = NULL;
+ } else {
+ loc_wipe(&lockee->loc);
+ }
- set_lk_owner_from_ptr (&frame->root->lk_owner, lk_owner);
-}
+ GF_FREE(lockee->basename);
+ lockee->basename = NULL;
+ GF_FREE(lockee->locked_nodes);
+ lockee->locked_nodes = NULL;
-static int
-is_afr_lock_selfheal (afr_local_t *local)
-{
- afr_internal_lock_t *int_lock = NULL;
- int ret = -1;
-
- int_lock = &local->internal_lock;
-
- switch (int_lock->selfheal_lk_type) {
- case AFR_DATA_SELF_HEAL_LK:
- case AFR_METADATA_SELF_HEAL_LK:
- ret = 1;
- break;
- case AFR_ENTRY_SELF_HEAL_LK:
- ret = 0;
- break;
- }
-
- return ret;
-
-}
-
-int32_t
-internal_lock_count (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int32_t call_count = 0;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- if (local->fd) {
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i] && local->fd_open_on[i])
- ++call_count;
- }
- } else {
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i])
- ++call_count;
- }
- }
-
- return call_count;
-}
-
-static void
-afr_print_inodelk (char *str, int size, int cmd,
- struct gf_flock *flock, gf_lkowner_t *owner)
-{
- char *cmd_str = NULL;
- char *type_str = NULL;
-
- switch (cmd) {
-#if F_GETLK != F_GETLK64
- case F_GETLK64:
-#endif
- case F_GETLK:
- cmd_str = "GETLK";
- break;
-
-#if F_SETLK != F_SETLK64
- case F_SETLK64:
-#endif
- case F_SETLK:
- cmd_str = "SETLK";
- break;
-
-#if F_SETLKW != F_SETLKW64
- case F_SETLKW64:
-#endif
- case F_SETLKW:
- cmd_str = "SETLKW";
- break;
-
- default:
- cmd_str = "<null>";
- break;
- }
-
- switch (flock->l_type) {
- case F_RDLCK:
- type_str = "READ";
- break;
- case F_WRLCK:
- type_str = "WRITE";
- break;
- case F_UNLCK:
- type_str = "UNLOCK";
- break;
- default:
- type_str = "UNKNOWN";
- break;
- }
-
- snprintf (str, size, "lock=INODELK, cmd=%s, type=%s, "
- "start=%llu, len=%llu, pid=%llu, lk-owner=%s",
- cmd_str, type_str, (unsigned long long) flock->l_start,
- (unsigned long long) flock->l_len,
- (unsigned long long) flock->l_pid,
- lkowner_utoa (owner));
-
-}
-
-static void
-afr_print_lockee (char *str, int size, loc_t *loc, fd_t *fd,
- int child_index)
-{
- snprintf (str, size, "path=%s, fd=%p, child=%d",
- loc->path ? loc->path : "<nul>",
- fd ? fd : NULL,
- child_index);
+ return;
}
void
-afr_print_entrylk (char *str, int size, const char *basename,
- gf_lkowner_t *owner)
-{
- snprintf (str, size, "Basename=%s, lk-owner=%s",
- basename ? basename : "<nul>",
- lkowner_utoa (owner));
-}
-
-static void
-afr_print_verdict (int op_ret, int op_errno, char *str)
-{
- if (op_ret < 0) {
- if (op_errno == EAGAIN)
- strcpy (str, "EAGAIN");
- else
- strcpy (str, "FAILED");
- }
- else
- strcpy (str, "GRANTED");
-}
-
-static void
-afr_set_lock_call_type (afr_lock_call_type_t lock_call_type,
- char *lock_call_type_str,
- afr_internal_lock_t *int_lock)
-{
- switch (lock_call_type) {
- case AFR_INODELK_TRANSACTION:
- if (int_lock->transaction_lk_type == AFR_TRANSACTION_LK)
- strcpy (lock_call_type_str, "AFR_INODELK_TRANSACTION");
- else
- strcpy (lock_call_type_str, "AFR_INODELK_SELFHEAL");
- break;
- case AFR_INODELK_NB_TRANSACTION:
- if (int_lock->transaction_lk_type == AFR_TRANSACTION_LK)
- strcpy (lock_call_type_str, "AFR_INODELK_NB_TRANSACTION");
- else
- strcpy (lock_call_type_str, "AFR_INODELK_NB_SELFHEAL");
- break;
- case AFR_ENTRYLK_TRANSACTION:
- if (int_lock->transaction_lk_type == AFR_TRANSACTION_LK)
- strcpy (lock_call_type_str, "AFR_ENTRYLK_TRANSACTION");
- else
- strcpy (lock_call_type_str, "AFR_ENTRYLK_SELFHEAL");
- break;
- case AFR_ENTRYLK_NB_TRANSACTION:
- if (int_lock->transaction_lk_type == AFR_TRANSACTION_LK)
- strcpy (lock_call_type_str, "AFR_ENTRYLK_NB_TRANSACTION");
- else
- strcpy (lock_call_type_str, "AFR_ENTRYLK_NB_SELFHEAL");
- break;
- default:
- strcpy (lock_call_type_str, "UNKNOWN");
- break;
- }
-
-}
-
-static void
-afr_trace_inodelk_out (call_frame_t *frame, xlator_t *this,
- afr_lock_call_type_t lock_call_type,
- afr_lock_op_type_t lk_op_type, struct gf_flock *flock,
- int op_ret, int op_errno, int32_t child_index)
+afr_lockees_cleanup(afr_internal_lock_t *int_lock)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
-
- char lockee[256];
- char lock_call_type_str[256];
- char verdict[16];
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- afr_print_lockee (lockee, 256, &local->loc, local->fd, child_index);
+ int i = 0;
- afr_set_lock_call_type (lock_call_type, lock_call_type_str, int_lock);
-
- afr_print_verdict (op_ret, op_errno, verdict);
-
- gf_log (this->name, GF_LOG_INFO,
- "[%s %s] [%s] lk-owner=%s Lockee={%s} Number={%llu}",
- lock_call_type_str,
- lk_op_type == AFR_LOCK_OP ? "LOCK REPLY" : "UNLOCK REPLY",
- verdict, lkowner_utoa (&frame->root->lk_owner), lockee,
- (unsigned long long) int_lock->lock_number);
+ for (i = 0; i < int_lock->lockee_count; i++) {
+ afr_lockee_cleanup(&int_lock->lockee[i]);
+ }
+ return;
}
-
-static void
-afr_trace_inodelk_in (call_frame_t *frame, xlator_t *this,
- afr_lock_call_type_t lock_call_type,
- afr_lock_op_type_t lk_op_type, struct gf_flock *flock,
- int32_t cmd, int32_t child_index)
-{
- afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
-
- char lock[256];
- char lockee[256];
- char lock_call_type_str[256];
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- afr_print_inodelk (lock, 256, cmd, flock, &frame->root->lk_owner);
- afr_print_lockee (lockee, 256, &local->loc, local->fd, child_index);
-
- afr_set_lock_call_type (lock_call_type, lock_call_type_str, int_lock);
-
- gf_log (this->name, GF_LOG_INFO,
- "[%s %s] Lock={%s} Lockee={%s} Number={%llu}",
- lock_call_type_str,
- lk_op_type == AFR_LOCK_OP ? "LOCK REQUEST" : "UNLOCK REQUEST",
- lock, lockee,
- (unsigned long long) int_lock->lock_number);
-
+int
+afr_entry_lockee_cmp(const void *l1, const void *l2)
+{
+ const afr_lockee_t *r1 = l1;
+ const afr_lockee_t *r2 = l2;
+ int ret = 0;
+ uuid_t gfid1 = {0};
+ uuid_t gfid2 = {0};
+
+ loc_gfid((loc_t *)&r1->loc, gfid1);
+ loc_gfid((loc_t *)&r2->loc, gfid2);
+ ret = gf_uuid_compare(gfid1, gfid2);
+ /*Entrylks with NULL basename are the 'smallest'*/
+ if (ret == 0) {
+ if (!r1->basename)
+ return -1;
+ if (!r2->basename)
+ return 1;
+ ret = strcmp(r1->basename, r2->basename);
+ }
+
+ if (ret <= 0)
+ return -1;
+ else
+ return 1;
}
-static void
-afr_trace_entrylk_in (call_frame_t *frame, xlator_t *this,
- afr_lock_call_type_t lock_call_type,
- afr_lock_op_type_t lk_op_type, const char *basename,
- int32_t child_index)
-{
- afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
-
- char lock[256];
- char lockee[256];
- char lock_call_type_str[256];
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- afr_print_entrylk (lock, 256, basename, &frame->root->lk_owner);
- afr_print_lockee (lockee, 256, &local->loc, local->fd, child_index);
-
- afr_set_lock_call_type (lock_call_type, lock_call_type_str, int_lock);
-
- gf_log (this->name, GF_LOG_INFO,
- "[%s %s] Lock={%s} Lockee={%s} Number={%llu}",
- lock_call_type_str,
- lk_op_type == AFR_LOCK_OP ? "LOCK REQUEST" : "UNLOCK REQUEST",
- lock, lockee,
- (unsigned long long) int_lock->lock_number);
-}
+int
+afr_lock_blocking(call_frame_t *frame, xlator_t *this, int child_index);
-static void
-afr_trace_entrylk_out (call_frame_t *frame, xlator_t *this,
- afr_lock_call_type_t lock_call_type,
- afr_lock_op_type_t lk_op_type, const char *basename,
- int op_ret, int op_errno, int32_t child_index)
+void
+afr_set_lk_owner(call_frame_t *frame, xlator_t *this, void *lk_owner)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
-
- char lock[256];
- char lockee[256];
- char lock_call_type_str[256];
- char verdict[16];
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- afr_print_lockee (lockee, 256, &local->loc, local->fd, child_index);
-
- afr_set_lock_call_type (lock_call_type, lock_call_type_str, int_lock);
-
- afr_print_verdict (op_ret, op_errno, verdict);
-
- gf_log (this->name, GF_LOG_INFO,
- "[%s %s] [%s] Lock={%s} Lockee={%s} Number={%llu}",
- lock_call_type_str,
- lk_op_type == AFR_LOCK_OP ? "LOCK REPLY" : "UNLOCK REPLY",
- verdict,
- lock, lockee,
- (unsigned long long) int_lock->lock_number);
+ gf_msg_trace(this->name, 0, "Setting lk-owner=%llu",
+ (unsigned long long)(unsigned long)lk_owner);
+ set_lk_owner_from_ptr(&frame->root->lk_owner, lk_owner);
}
-static int
-transaction_lk_op (afr_local_t *local)
+int32_t
+internal_lock_count(call_frame_t *frame, xlator_t *this)
{
- afr_internal_lock_t *int_lock = NULL;
- int ret = -1;
-
- int_lock = &local->internal_lock;
-
- if (int_lock->transaction_lk_type == AFR_TRANSACTION_LK) {
- gf_log (THIS->name, GF_LOG_DEBUG,
- "lk op is for a transaction");
- ret = 1;
- }
- else if (int_lock->transaction_lk_type == AFR_SELFHEAL_LK) {
- gf_log (THIS->name, GF_LOG_DEBUG,
- "lk op is for a self heal");
-
- ret = 0;
- }
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int32_t call_count = 0;
+ int i = 0;
- if (ret == -1)
- gf_log (THIS->name, GF_LOG_DEBUG,
- "lk op is not set");
+ local = frame->local;
+ priv = this->private;
- return ret;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i])
+ ++call_count;
+ }
+ return call_count;
}
-static int
-is_afr_lock_transaction (afr_local_t *local)
+int
+afr_add_entry_lockee(afr_local_t *local, loc_t *loc, char *basename,
+ int child_count)
{
- int ret = 0;
+ int ret = -ENOMEM;
+ afr_internal_lock_t *int_lock = &local->internal_lock;
+ afr_lockee_t *lockee = &int_lock->lockee[int_lock->lockee_count];
- switch (local->transaction.type) {
- case AFR_DATA_TRANSACTION:
- case AFR_METADATA_TRANSACTION:
- ret = 1;
- break;
+ GF_ASSERT(int_lock->lockee_count < AFR_LOCKEE_COUNT_MAX);
+ loc_copy(&lockee->loc, loc);
+ lockee->basename = (basename) ? gf_strdup(basename) : NULL;
+ if (basename && !lockee->basename)
+ goto out;
- case AFR_ENTRY_RENAME_TRANSACTION:
- case AFR_ENTRY_TRANSACTION:
- ret = 0;
- break;
+ lockee->locked_count = 0;
+ lockee->locked_nodes = GF_CALLOC(child_count, sizeof(*lockee->locked_nodes),
+ gf_afr_mt_afr_node_character);
- }
+ if (!lockee->locked_nodes)
+ goto out;
- return ret;
+ ret = 0;
+ int_lock->lockee_count++;
+out:
+ if (ret) {
+ afr_lockee_cleanup(lockee);
+ }
+ return ret;
}
-static int
-initialize_entrylk_variables (call_frame_t *frame, xlator_t *this)
+int
+afr_add_inode_lockee(afr_local_t *local, int child_count)
{
- afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
- afr_private_t *priv = NULL;
+ int ret = -ENOMEM;
+ afr_internal_lock_t *int_lock = &local->internal_lock;
+ afr_lockee_t *lockee = &int_lock->lockee[int_lock->lockee_count];
- int i = 0;
+ if (local->fd) {
+ lockee->fd = fd_ref(local->fd);
+ } else {
+ loc_copy(&lockee->loc, &local->loc);
+ }
- priv = this->private;
- local = frame->local;
- int_lock = &local->internal_lock;
+ lockee->locked_count = 0;
+ lockee->locked_nodes = GF_CALLOC(child_count, sizeof(*lockee->locked_nodes),
+ gf_afr_mt_afr_node_character);
- int_lock->entrylk_lock_count = 0;
- int_lock->lock_op_ret = -1;
- int_lock->lock_op_errno = 0;
-
- for (i = 0; i < priv->child_count; i++) {
- int_lock->entry_locked_nodes[i] = 0;
- }
+ if (!lockee->locked_nodes)
+ goto out;
- return 0;
+ ret = 0;
+ int_lock->lockee_count++;
+out:
+ if (ret) {
+ afr_lockee_cleanup(lockee);
+ }
+ return ret;
}
static int
-initialize_inodelk_variables (call_frame_t *frame, xlator_t *this)
+initialize_internal_lock_variables(call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
-
- priv = this->private;
- local = frame->local;
- int_lock = &local->internal_lock;
-
- int_lock->inodelk_lock_count = 0;
- int_lock->lock_op_ret = -1;
- int_lock->lock_op_errno = 0;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_private_t *priv = NULL;
- for (i = 0; i < priv->child_count; i++) {
- int_lock->inode_locked_nodes[i] = 0;
- }
+ int i = 0;
- return 0;
-}
-
-loc_t *
-lower_path (loc_t *l1, const char *b1, loc_t *l2, const char *b2)
-{
- int ret = 0;
+ priv = this->private;
+ local = frame->local;
+ int_lock = &local->internal_lock;
- ret = uuid_compare (l1->inode->gfid, l2->inode->gfid);
+ int_lock->lock_count = 0;
+ int_lock->lock_op_ret = -1;
+ int_lock->lock_op_errno = 0;
+ int_lock->lk_attempted_count = 0;
- if (ret == 0)
- ret = strcmp (b1, b2);
+ for (i = 0; i < AFR_LOCKEE_COUNT_MAX; i++) {
+ if (!int_lock->lockee[i].locked_nodes)
+ break;
+ int_lock->lockee[i].locked_count = 0;
+ memset(int_lock->lockee[i].locked_nodes, 0,
+ sizeof(*int_lock->lockee[i].locked_nodes) * priv->child_count);
+ }
- if (ret <= 0)
- return l1;
- else
- return l2;
+ return 0;
}
int
-afr_locked_nodes_count (unsigned char *locked_nodes, int child_count)
-
+afr_lockee_locked_nodes_count(afr_internal_lock_t *int_lock)
{
- int i = 0;
- int call_count = 0;
+ int call_count = 0;
+ int i = 0;
- for (i = 0; i < child_count; i++) {
- if (locked_nodes[i] & LOCKED_YES)
- call_count++;
- }
+ for (i = 0; i < int_lock->lockee_count; i++)
+ call_count += int_lock->lockee[i].locked_count;
- return call_count;
+ return call_count;
}
-/* FIXME: What if UNLOCK fails */
-static int32_t
-afr_unlock_common_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
- int call_count = 0;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- LOCK (&frame->lock);
- {
- call_count = --int_lock->lk_call_count;
- }
- UNLOCK (&frame->lock);
-
- if (call_count == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "All internal locks unlocked");
- int_lock->lock_cbk (frame, this);
- }
-
- return 0;
-}
+int
+afr_locked_nodes_count(unsigned char *locked_nodes, int child_count)
-static int32_t
-afr_unlock_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
- int32_t child_index = (long)cookie;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- AFR_TRACE_INODELK_OUT (frame, this, AFR_INODELK_TRANSACTION,
- AFR_UNLOCK_OP, NULL, op_ret,
- op_errno, child_index);
-
- if (op_ret < 0 && op_errno != ENOTCONN && op_errno != EBADFD) {
- gf_log (this->name, GF_LOG_INFO, "%s: unlock failed on %d "
- "unlock by %s", local->loc.path, child_index,
- lkowner_utoa (&frame->root->lk_owner));
- }
+ int i = 0;
+ int call_count = 0;
+ for (i = 0; i < child_count; i++) {
+ if (locked_nodes[i] & LOCKED_YES)
+ call_count++;
+ }
- int_lock->inode_locked_nodes[child_index] &= LOCKED_NO;
- if (local->transaction.eager_lock)
- local->transaction.eager_lock[child_index] = 0;
-
- afr_unlock_common_cbk (frame, cookie, this, op_ret, op_errno, xdata);
-
- return 0;
-
+ return call_count;
}
-static int
-afr_unlock_inodelk (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- struct gf_flock flock = {0,};
- struct gf_flock full_flock = {0,};
- struct gf_flock *flock_use = NULL;
- int call_count = 0;
- int i = 0;
- int piggyback = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
-
-
- local = frame->local;
- int_lock = &local->internal_lock;
- priv = this->private;
-
- flock.l_start = int_lock->lk_flock.l_start;
- flock.l_len = int_lock->lk_flock.l_len;
- flock.l_type = F_UNLCK;
-
- full_flock.l_type = F_UNLCK;
- call_count = afr_locked_nodes_count (int_lock->inode_locked_nodes,
- priv->child_count);
-
- int_lock->lk_call_count = call_count;
-
- if (!call_count) {
- gf_log (this->name, GF_LOG_TRACE,
- "No internal locks unlocked");
- int_lock->lock_cbk (frame, this);
- goto out;
- }
-
- if (local->fd)
- fd_ctx = afr_fd_ctx_get (local->fd, this);
-
- for (i = 0; i < priv->child_count; i++) {
- if ((int_lock->inode_locked_nodes[i] & LOCKED_YES)
- != LOCKED_YES)
- continue;
-
- if (local->fd) {
- flock_use = &flock;
- if (!local->transaction.eager_lock[i]) {
- goto wind;
- }
-
- piggyback = 0;
-
- LOCK (&local->fd->lock);
- {
- if (fd_ctx->lock_piggyback[i]) {
- fd_ctx->lock_piggyback[i]--;
- piggyback = 1;
- } else {
- fd_ctx->lock_acquired[i]--;
- }
- }
- UNLOCK (&local->fd->lock);
-
- if (piggyback) {
- afr_unlock_inodelk_cbk (frame, (void *) (long) i,
- this, 1, 0, NULL);
- if (!--call_count)
- break;
- continue;
- }
-
- flock_use = &full_flock;
- wind:
- AFR_TRACE_INODELK_IN (frame, this,
- AFR_INODELK_TRANSACTION,
- AFR_UNLOCK_OP, flock_use, F_SETLK,
- i);
-
- STACK_WIND_COOKIE (frame, afr_unlock_inodelk_cbk,
- (void *) (long)i,
- priv->children[i],
- priv->children[i]->fops->finodelk,
- this->name, local->fd,
- F_SETLK, flock_use, NULL);
-
- if (!--call_count)
- break;
-
- } else {
- AFR_TRACE_INODELK_IN (frame, this,
- AFR_INODELK_TRANSACTION,
- AFR_UNLOCK_OP, &flock, F_SETLK, i);
-
- STACK_WIND_COOKIE (frame, afr_unlock_inodelk_cbk,
- (void *) (long)i,
- priv->children[i],
- priv->children[i]->fops->inodelk,
- this->name, &local->loc,
- F_SETLK, &flock, NULL);
-
- if (!--call_count)
- break;
- }
- }
-out:
- return 0;
-}
-
-static int32_t
-afr_unlock_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- afr_local_t *local = NULL;
- int32_t child_index = (long)cookie;
-
- local = frame->local;
-
- AFR_TRACE_ENTRYLK_OUT (frame, this, AFR_ENTRYLK_TRANSACTION,
- AFR_UNLOCK_OP, NULL, op_ret,
- op_errno, child_index);
-
- if (op_ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: unlock failed on %d, reason: %s",
- local->loc.path, child_index, strerror (op_errno));
- }
-
- afr_unlock_common_cbk (frame, cookie, this, op_ret, op_errno, NULL);
-
- return 0;
-}
-
-static int
-afr_unlock_entrylk (call_frame_t *frame, xlator_t *this)
+static void
+afr_log_locks_failure(call_frame_t *frame, char *where, char *what,
+ int op_errno)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- const char *basename = NULL;
- loc_t *loc = NULL;
- int call_count = 0;
- int i = -1;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- priv = this->private;
-
- basename = int_lock->lk_basename;
- if (int_lock->lk_loc)
- loc = int_lock->lk_loc;
-
- call_count = afr_locked_nodes_count (int_lock->entry_locked_nodes,
- priv->child_count);
- int_lock->lk_call_count = call_count;
-
- if (!call_count){
- gf_log (this->name, GF_LOG_TRACE,
- "No internal locks unlocked");
- int_lock->lock_cbk (frame, this);
- goto out;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (int_lock->entry_locked_nodes[i] & LOCKED_YES) {
- AFR_TRACE_ENTRYLK_IN (frame, this,
- AFR_ENTRYLK_NB_TRANSACTION,
- AFR_UNLOCK_OP, basename, i);
-
- STACK_WIND_COOKIE (frame, afr_unlock_entrylk_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->entrylk,
- this->name,
- loc, basename,
- ENTRYLK_UNLOCK, ENTRYLK_WRLCK, NULL);
-
- if (!--call_count)
- break;
- }
- }
+ xlator_t *this = frame->this;
+ gf_lkowner_t *lk_owner = &frame->root->lk_owner;
+ afr_local_t *local = frame->local;
+ const char *fop = NULL;
+ char *gfid = NULL;
+ const char *name = NULL;
-out:
- return 0;
+ fop = gf_fop_list[local->op];
+ switch (local->transaction.type) {
+ case AFR_ENTRY_RENAME_TRANSACTION:
+ case AFR_ENTRY_TRANSACTION:
+ switch (local->op) {
+ case GF_FOP_LINK:
+ gfid = uuid_utoa(local->newloc.pargfid);
+ name = local->newloc.name;
+ break;
+ default:
+ gfid = uuid_utoa(local->loc.pargfid);
+ name = local->loc.name;
+ break;
+ }
+ gf_msg(this->name, GF_LOG_WARNING, op_errno,
+ AFR_MSG_INTERNAL_LKS_FAILED,
+ "Unable to do entry %s with lk-owner:%s on %s "
+ "while attempting %s on {pgfid:%s, name:%s}.",
+ what, lkowner_utoa(lk_owner), where, fop, gfid, name);
+ break;
+ case AFR_DATA_TRANSACTION:
+ case AFR_METADATA_TRANSACTION:
+ gfid = uuid_utoa(local->inode->gfid);
+ gf_msg(this->name, GF_LOG_WARNING, op_errno,
+ AFR_MSG_INTERNAL_LKS_FAILED,
+ "Unable to do inode %s with lk-owner:%s on %s "
+ "while attempting %s on gfid:%s.",
+ what, lkowner_utoa(lk_owner), where, fop, gfid);
+ break;
+ }
}
static int32_t
-afr_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+afr_unlock_common_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- int child_index = (long) cookie;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ int lockee_num = 0;
+ int call_count = 0;
+ int child_index = 0;
+ int ret = 0;
- local = frame->local;
- int_lock = &local->internal_lock;
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ priv = this->private;
+ lockee_num = (int)((long)cookie) / priv->child_count;
+ child_index = (int)((long)cookie) % priv->child_count;
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- if (op_errno == ENOSYS) {
- /* return ENOTSUP */
- gf_log (this->name, GF_LOG_ERROR,
- "subvolume does not support locking. "
- "please load features/locks xlator on server");
- local->op_ret = op_ret;
- int_lock->lock_op_ret = op_ret;
- }
-
- local->op_errno = op_errno;
- int_lock->lock_op_errno = op_errno;
- }
- }
- UNLOCK (&frame->lock);
-
- if ((op_ret == -1) &&
- (op_errno == ENOSYS)) {
- afr_unlock (frame, this);
- } else {
- if (op_ret == 0) {
- int_lock->locked_nodes[child_index] |= LOCKED_YES;
- int_lock->lock_count++;
- }
- afr_lock_blocking (frame, this, child_index + 1);
- }
+ if (op_ret < 0 && op_errno != ENOTCONN && op_errno != EBADFD) {
+ afr_log_locks_failure(frame, priv->children[child_index]->name,
+ "unlock", op_errno);
+ }
- return 0;
-}
+ int_lock->lockee[lockee_num].locked_nodes[child_index] &= LOCKED_NO;
+ if (local->transaction.type == AFR_DATA_TRANSACTION && op_ret != 1)
+ ret = afr_write_subvol_reset(frame, this);
-static int32_t
-afr_blocking_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- AFR_TRACE_INODELK_OUT (frame, this, AFR_INODELK_TRANSACTION,
- AFR_LOCK_OP, NULL, op_ret,
- op_errno, (long) cookie);
+ LOCK(&frame->lock);
+ {
+ call_count = --int_lock->lk_call_count;
+ }
+ UNLOCK(&frame->lock);
- afr_lock_cbk (frame, cookie, this, op_ret, op_errno, xdata);
- return 0;
+ if (call_count == 0) {
+ int_lock->lock_cbk(frame, this);
+ }
+ return ret;
}
-static int32_t
-afr_lock_lower_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- loc_t *lower = NULL;
- loc_t *higher = NULL;
- const char *higher_name = NULL;
- int child_index = (long) cookie;
-
- priv = this->private;
- local = frame->local;
- int_lock = &local->internal_lock;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- if (op_errno == ENOSYS) {
- /* return ENOTSUP */
-
- gf_log (this->name, GF_LOG_ERROR,
- "subvolume does not support locking. "
- "please load features/locks xlator on server");
-
- local->op_ret = op_ret;
- }
-
- local->op_errno = op_errno;
- }
- }
- UNLOCK (&frame->lock);
-
- if (op_ret != 0) {
- afr_copy_locked_nodes (frame, this);
- afr_unlock (frame, this);
- goto out;
- } else {
- int_lock->lower_locked_nodes[child_index] |= LOCKED_LOWER;
- int_lock->lock_count++;
- }
-
- /* The lower path has been locked. Now lock the higher path */
-
- lower = lower_path (&local->transaction.parent_loc,
- local->transaction.basename,
- &local->transaction.new_parent_loc,
- local->transaction.new_basename);
-
- higher = (lower == &local->transaction.parent_loc ?
- &local->transaction.new_parent_loc :
- &local->transaction.parent_loc);
-
- higher_name = (higher == &local->transaction.parent_loc ?
- local->transaction.basename :
- local->transaction.new_basename);
-
- AFR_TRACE_ENTRYLK_IN (frame, this, AFR_ENTRYLK_TRANSACTION,
- AFR_LOCK_OP, higher_name, child_index);
-
-
- STACK_WIND_COOKIE (frame, afr_lock_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->entrylk,
- this->name, higher, higher_name,
- ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
-
-out:
- return 0;
-}
-
-static int32_t
-afr_blocking_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- AFR_TRACE_ENTRYLK_OUT (frame, this, AFR_ENTRYLK_TRANSACTION,
- AFR_LOCK_OP, NULL, op_ret,
- op_errno, (long)cookie);
-
- afr_lock_cbk (frame, cookie, this, op_ret, op_errno, xdata);
- return 0;
-}
-
-static int
-afr_copy_locked_nodes (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- priv = this->private;
- local = frame->local;
- int_lock = &local->internal_lock;
-
- switch (local->transaction.type) {
- case AFR_DATA_TRANSACTION:
- case AFR_METADATA_TRANSACTION:
- memcpy (int_lock->inode_locked_nodes,
- int_lock->locked_nodes,
- priv->child_count);
- int_lock->inodelk_lock_count = int_lock->lock_count;
- break;
-
- case AFR_ENTRY_RENAME_TRANSACTION:
+void
+afr_internal_lock_wind(call_frame_t *frame,
+ int32_t (*cbk)(call_frame_t *, void *, xlator_t *,
+ int32_t, int32_t, dict_t *),
+ void *cookie, int child, int lockee_num,
+ gf_boolean_t blocking, gf_boolean_t unlock)
+{
+ afr_local_t *local = frame->local;
+ xlator_t *this = frame->this;
+ afr_private_t *priv = this->private;
+ afr_internal_lock_t *int_lock = &local->internal_lock;
+ entrylk_cmd cmd = ENTRYLK_LOCK_NB;
+ int32_t cmd1 = F_SETLK;
+ struct gf_flock flock = {
+ 0,
+ };
+
+ switch (local->transaction.type) {
case AFR_ENTRY_TRANSACTION:
- memcpy (int_lock->entry_locked_nodes,
- int_lock->locked_nodes,
- priv->child_count);
- int_lock->entrylk_lock_count = int_lock->lock_count;
- break;
- }
-
- return 0;
-
-}
-
-int
-afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- loc_t *lower = NULL;
- const char *lower_name = NULL;
- struct gf_flock flock = {0,};
- uint64_t ctx = 0;
- int ret = 0;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- priv = this->private;
-
- flock.l_start = int_lock->lk_flock.l_start;
- flock.l_len = int_lock->lk_flock.l_len;
- flock.l_type = int_lock->lk_flock.l_type;
-
- if (local->fd) {
- ret = fd_ctx_get (local->fd, this, &ctx);
-
- if (ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "unable to get fd ctx for fd=%p",
- local->fd);
-
- local->op_ret = -1;
- int_lock->lock_op_ret = -1;
-
- afr_copy_locked_nodes (frame, this);
-
- afr_unlock (frame, this);
-
- return 0;
- }
-
- /* skip over children that or down
- or don't have the fd open */
-
- while ((child_index < priv->child_count)
- && (!local->child_up[child_index] ||
- !local->fd_open_on[child_index]))
-
- child_index++;
- } else {
- /* skip over children that are down */
- while ((child_index < priv->child_count)
- && !local->child_up[child_index])
- child_index++;
- }
-
- if ((child_index == priv->child_count) &&
- int_lock->lock_count == 0) {
-
- gf_log (this->name, GF_LOG_INFO,
- "unable to lock on even one child");
-
- local->op_ret = -1;
- int_lock->lock_op_ret = -1;
-
- afr_copy_locked_nodes (frame, this);
-
- afr_unlock(frame, this);
-
- return 0;
-
- }
-
- if ((child_index == priv->child_count)
- || (int_lock->lock_count == int_lock->lk_expected_count)) {
-
- /* we're done locking */
-
- gf_log (this->name, GF_LOG_DEBUG,
- "we're done locking");
-
- afr_copy_locked_nodes (frame, this);
-
- int_lock->lock_op_ret = 0;
- int_lock->lock_cbk (frame, this);
- return 0;
- }
-
- switch (local->transaction.type) {
- case AFR_DATA_TRANSACTION:
- case AFR_METADATA_TRANSACTION:
-
- if (local->fd) {
- AFR_TRACE_INODELK_IN (frame, this,
- AFR_INODELK_TRANSACTION,
- AFR_LOCK_OP, &flock, F_SETLKW,
- child_index);
-
- STACK_WIND_COOKIE (frame, afr_blocking_inodelk_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->finodelk,
- this->name, local->fd,
- F_SETLKW, &flock, NULL);
-
- } else {
- AFR_TRACE_INODELK_IN (frame, this,
- AFR_INODELK_TRANSACTION,
- AFR_LOCK_OP, &flock, F_SETLKW,
- child_index);
-
- STACK_WIND_COOKIE (frame, afr_blocking_inodelk_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->inodelk,
- this->name, &local->loc,
- F_SETLKW, &flock, NULL);
- }
-
- break;
-
case AFR_ENTRY_RENAME_TRANSACTION:
- {
- lower = lower_path (&local->transaction.parent_loc,
- local->transaction.basename,
- &local->transaction.new_parent_loc,
- local->transaction.new_basename);
+ if (unlock) {
+ cmd = ENTRYLK_UNLOCK;
+ } else if (blocking) { /*Doesn't make sense to have blocking
+ unlock*/
+ cmd = ENTRYLK_LOCK;
+ }
+
+ if (local->fd) {
+ STACK_WIND_COOKIE(frame, cbk, cookie, priv->children[child],
+ priv->children[child]->fops->fentrylk,
+ int_lock->domain,
+ int_lock->lockee[lockee_num].fd,
+ int_lock->lockee[lockee_num].basename, cmd,
+ ENTRYLK_WRLCK, NULL);
+ } else {
+ STACK_WIND_COOKIE(frame, cbk, cookie, priv->children[child],
+ priv->children[child]->fops->entrylk,
+ int_lock->domain,
+ &int_lock->lockee[lockee_num].loc,
+ int_lock->lockee[lockee_num].basename, cmd,
+ ENTRYLK_WRLCK, NULL);
+ }
+ break;
- lower_name = (lower == &local->transaction.parent_loc ?
- local->transaction.basename :
- local->transaction.new_basename);
-
- AFR_TRACE_ENTRYLK_IN (frame, this, AFR_ENTRYLK_TRANSACTION,
- AFR_LOCK_OP, lower_name, child_index);
-
-
- STACK_WIND_COOKIE (frame, afr_lock_lower_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->entrylk,
- this->name, lower, lower_name,
- ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
-
- break;
- }
-
- case AFR_ENTRY_TRANSACTION:
- if (local->fd) {
- AFR_TRACE_ENTRYLK_IN (frame, this,
- AFR_ENTRYLK_TRANSACTION,
- AFR_LOCK_OP, local->transaction.basename,
- child_index);
-
- STACK_WIND_COOKIE (frame, afr_blocking_entrylk_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->fentrylk,
- this->name, local->fd,
- local->transaction.basename,
- ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
- } else {
- AFR_TRACE_ENTRYLK_IN (frame, this,
- AFR_ENTRYLK_TRANSACTION,
- AFR_LOCK_OP, local->transaction.basename,
- child_index);
-
- STACK_WIND_COOKIE (frame, afr_blocking_entrylk_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->entrylk,
- this->name,
- &local->transaction.parent_loc,
- local->transaction.basename,
- ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
- }
-
- break;
- }
-
- return 0;
-}
-
-int32_t
-afr_blocking_lock (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int up_count = 0;
-
- priv = this->private;
- local = frame->local;
- int_lock = &local->internal_lock;
-
- switch (local->transaction.type) {
case AFR_DATA_TRANSACTION:
case AFR_METADATA_TRANSACTION:
- initialize_inodelk_variables (frame, this);
- break;
+ flock = int_lock->lockee[lockee_num].flock;
+ if (unlock) {
+ flock.l_type = F_UNLCK;
+ } else if (blocking) { /*Doesn't make sense to have blocking
+ unlock*/
+ cmd1 = F_SETLKW;
+ }
+
+ if (local->fd) {
+ STACK_WIND_COOKIE(
+ frame, cbk, cookie, priv->children[child],
+ priv->children[child]->fops->finodelk, int_lock->domain,
+ int_lock->lockee[lockee_num].fd, cmd1, &flock, NULL);
+ } else {
+ STACK_WIND_COOKIE(
+ frame, cbk, cookie, priv->children[child],
+ priv->children[child]->fops->inodelk, int_lock->domain,
+ &int_lock->lockee[lockee_num].loc, cmd1, &flock, NULL);
+ }
+ break;
+ }
+}
- case AFR_ENTRY_RENAME_TRANSACTION:
- up_count = afr_up_children_count (local->child_up,
- priv->child_count);
- int_lock->lk_expected_count = 2 * up_count;
- //fallthrough
- case AFR_ENTRY_TRANSACTION:
- initialize_entrylk_variables (frame, this);
+static int
+afr_unlock_now(call_frame_t *frame, xlator_t *this)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+ int child_index = 0;
+ int lockee_num = 0;
+ int i = -1;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ priv = this->private;
+
+ call_count = afr_lockee_locked_nodes_count(int_lock);
+
+ int_lock->lk_call_count = call_count;
+
+ if (!call_count) {
+ gf_msg_trace(this->name, 0, "No internal locks unlocked");
+ int_lock->lock_cbk(frame, this);
+ goto out;
+ }
+
+ for (i = 0; i < int_lock->lockee_count * priv->child_count; i++) {
+ lockee_num = i / priv->child_count;
+ child_index = i % priv->child_count;
+ if (int_lock->lockee[lockee_num].locked_nodes[child_index] &
+ LOCKED_YES) {
+ afr_internal_lock_wind(frame, afr_unlock_common_cbk,
+ (void *)(long)i, child_index, lockee_num,
+ _gf_false, _gf_true);
+ if (!--call_count)
break;
}
+ }
- afr_lock_blocking (frame, this, 0);
-
- return 0;
+out:
+ return 0;
}
static int32_t
-afr_nonblocking_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- int call_count = 0;
- int child_index = (long) cookie;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- AFR_TRACE_ENTRYLK_OUT (frame, this, AFR_ENTRYLK_TRANSACTION,
- AFR_LOCK_OP, NULL, op_ret,
- op_errno, (long) cookie);
-
- if (op_ret < 0 ) {
- if (op_errno == ENOSYS) {
- /* return ENOTSUP */
- gf_log (this->name, GF_LOG_ERROR,
- "subvolume does not support locking. "
- "please load features/locks xlator on server");
- local->op_ret = op_ret;
- int_lock->lock_op_ret = op_ret;
-
- int_lock->lock_op_errno = op_errno;
- local->op_errno = op_errno;
+afr_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int cky = (long)cookie;
+ int child_index = 0;
+ int lockee_num = 0;
+
+ priv = this->private;
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ child_index = ((int)cky) % priv->child_count;
+ lockee_num = ((int)cky) / priv->child_count;
+
+ LOCK(&frame->lock);
+ {
+ if (op_ret == -1) {
+ if (op_errno == ENOSYS) {
+ /* return ENOTSUP */
+ gf_msg(this->name, GF_LOG_ERROR, ENOSYS,
+ AFR_MSG_LOCK_XLATOR_NOT_LOADED,
+ "subvolume does not support locking. "
+ "please load features/locks xlator on server");
+ local->op_ret = op_ret;
+ int_lock->lock_op_ret = op_ret;
+ }
+
+ local->op_errno = op_errno;
+ int_lock->lock_op_errno = op_errno;
+ }
+
+ int_lock->lk_attempted_count++;
+ }
+ UNLOCK(&frame->lock);
+
+ if ((op_ret == -1) && (op_errno == ENOSYS)) {
+ afr_unlock_now(frame, this);
+ } else {
+ if (op_ret == 0) {
+ int_lock->lockee[lockee_num]
+ .locked_nodes[child_index] |= LOCKED_YES;
+ int_lock->lockee[lockee_num].locked_count++;
+ int_lock->lock_count++;
+ if (local->transaction.type == AFR_DATA_TRANSACTION) {
+ LOCK(&local->inode->lock);
+ {
+ local->inode_ctx->lock_count++;
}
- } else if (op_ret == 0) {
- int_lock->entry_locked_nodes[child_index] |= LOCKED_YES;
- int_lock->entrylk_lock_count++;
+ UNLOCK(&local->inode->lock);
+ }
}
+ afr_lock_blocking(frame, this, cky + 1);
+ }
- LOCK (&frame->lock);
- {
- call_count = --int_lock->lk_call_count;
- }
- UNLOCK (&frame->lock);
-
- if (call_count == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "Last locking reply received");
- /* all locks successful. Proceed to call FOP */
- if (int_lock->entrylk_lock_count ==
- int_lock->lk_expected_count) {
- gf_log (this->name, GF_LOG_TRACE,
- "All servers locked. Calling the cbk");
- int_lock->lock_op_ret = 0;
- int_lock->lock_cbk (frame, this);
- }
- /* Not all locks were successful. Unlock and try locking
- again, this time with serially blocking locks */
- else {
- gf_log (this->name, GF_LOG_TRACE,
- "%d servers locked. Trying again with blocking calls",
- int_lock->lock_count);
-
- afr_unlock(frame, this);
- }
- }
-
- return 0;
+ return 0;
}
-void
-afr_mark_fd_open_on (afr_local_t *local, afr_fd_ctx_t *fd_ctx,
- size_t child_count)
+static gf_boolean_t
+_is_lock_wind_needed(afr_local_t *local, int child_index)
{
- int i = 0;
-
- GF_ASSERT (local->fd_open_on);
-
- memset (local->fd_open_on, 0, sizeof (*local->fd_open_on)*child_count);
- for (i = 0; i < child_count; i++)
- if (fd_ctx->opened_on[i] == AFR_FD_OPENED)
- local->fd_open_on[i] = 1;
-}
-
-int
-afr_nonblocking_entrylk (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- afr_fd_ctx_t *fd_ctx = NULL;
- const char *basename = NULL;
- loc_t *loc = NULL;
- int32_t call_count = 0;
- int i = 0;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- priv = this->private;
-
- initialize_entrylk_variables (frame, this);
-
- basename = int_lock->lk_basename;
- if (int_lock->lk_loc)
- loc = int_lock->lk_loc;
-
- if (local->fd) {
- fd_ctx = afr_fd_ctx_get (local->fd, this);
- if (!fd_ctx) {
- gf_log (this->name, GF_LOG_INFO,
- "unable to get fd ctx for fd=%p",
- local->fd);
-
- local->op_ret = -1;
- int_lock->lock_op_ret = -1;
- local->op_errno = EINVAL;
- int_lock->lock_op_errno = EINVAL;
-
- return -1;
- }
-
- afr_mark_fd_open_on (local, fd_ctx, priv->child_count);
- call_count = internal_lock_count (frame, this);
- int_lock->lk_call_count = call_count;
- int_lock->lk_expected_count = call_count;
+ if (!local->child_up[child_index])
+ return _gf_false;
- if (!call_count) {
- gf_log (this->name, GF_LOG_INFO,
- "fd not open on any subvolumes. aborting.");
- afr_unlock (frame, this);
- goto out;
- }
-
- /* Send non-blocking entrylk calls only on up children
- and where the fd has been opened */
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i] && local->fd_open_on[i]) {
- AFR_TRACE_ENTRYLK_IN (frame, this,
- AFR_ENTRYLK_NB_TRANSACTION,
- AFR_LOCK_OP, basename, i);
-
- STACK_WIND_COOKIE (frame, afr_nonblocking_entrylk_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fentrylk,
- this->name, local->fd,
- basename,
- ENTRYLK_LOCK_NB, ENTRYLK_WRLCK,
- NULL);
- }
- }
- } else {
- GF_ASSERT (loc);
-
- call_count = internal_lock_count (frame, this);
- int_lock->lk_call_count = call_count;
- int_lock->lk_expected_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- AFR_TRACE_ENTRYLK_IN (frame, this,
- AFR_ENTRYLK_NB_TRANSACTION,
- AFR_LOCK_OP, basename, i);
-
- STACK_WIND_COOKIE (frame, afr_nonblocking_entrylk_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->entrylk,
- this->name, loc, basename,
- ENTRYLK_LOCK_NB, ENTRYLK_WRLCK,
- NULL);
-
- if (!--call_count)
- break;
-
- }
- }
- }
-out:
- return 0;
+ return _gf_true;
}
-int32_t
-afr_nonblocking_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+static gf_boolean_t
+is_blocking_locks_count_sufficient(call_frame_t *frame, xlator_t *this)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- int call_count = 0;
- int child_index = (long) cookie;
- afr_fd_ctx_t *fd_ctx = NULL;
-
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ int child = 0;
+ int nlockee = 0;
+ int lockee_count = 0;
+ gf_boolean_t ret = _gf_true;
- local = frame->local;
- int_lock = &local->internal_lock;
+ local = frame->local;
+ priv = this->private;
+ int_lock = &local->internal_lock;
+ lockee_count = int_lock->lockee_count;
- AFR_TRACE_INODELK_OUT (frame, this, AFR_INODELK_NB_TRANSACTION,
- AFR_LOCK_OP, NULL, op_ret,
- op_errno, (long) cookie);
-
- if (op_ret < 0) {
- if (op_errno == ENOSYS) {
- /* return ENOTSUP */
- gf_log (this->name, GF_LOG_ERROR,
- "subvolume does not support locking. "
- "please load features/locks xlator on server");
- local->op_ret = op_ret;
- int_lock->lock_op_ret = op_ret;
- int_lock->lock_op_errno = op_errno;
- local->op_errno = op_errno;
- }
- if (local->transaction.eager_lock)
- local->transaction.eager_lock[child_index] = 0;
- } else {
- int_lock->inode_locked_nodes[child_index]
- |= LOCKED_YES;
- int_lock->inodelk_lock_count++;
-
- if (local->transaction.eager_lock &&
- local->transaction.eager_lock[child_index] && local->fd) {
- fd_ctx = afr_fd_ctx_get (local->fd, this);
- /* piggybacked */
-
- if (op_ret == 1) {
- /* piggybacked */
- } else if (op_ret == 0) {
- /* lock acquired from server */
- LOCK (&local->fd->lock);
- {
- fd_ctx->lock_acquired[child_index]++;
- }
- UNLOCK (&local->fd->lock);
- }
- }
- }
-
- LOCK (&frame->lock);
- {
- call_count = --int_lock->lk_call_count;
- }
- UNLOCK (&frame->lock);
- if (call_count == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "Last inode locking reply received");
- /* all locks successful. Proceed to call FOP */
- if (int_lock->inodelk_lock_count ==
- int_lock->lk_expected_count) {
- gf_log (this->name, GF_LOG_TRACE,
- "All servers locked. Calling the cbk");
- int_lock->lock_op_ret = 0;
- int_lock->lock_cbk (frame, this);
- }
- /* Not all locks were successful. Unlock and try locking
- again, this time with serially blocking locks */
- else {
- gf_log (this->name, GF_LOG_TRACE,
- "%d servers locked. Trying again with blocking calls",
- int_lock->lock_count);
-
- afr_unlock(frame, this);
- }
+ if (int_lock->lock_count == 0) {
+ afr_log_locks_failure(frame, "any subvolume", "lock",
+ int_lock->lock_op_errno);
+ return _gf_false;
+ }
+ /* For FOPS that take multiple sets of locks (mkdir, rename),
+ * there must be at least one brick on which the locks from
+ * all lock sets were successful. */
+ for (child = 0; child < priv->child_count; child++) {
+ ret = _gf_true;
+ for (nlockee = 0; nlockee < lockee_count; nlockee++) {
+ if (!(int_lock->lockee[nlockee].locked_nodes[child] & LOCKED_YES))
+ ret = _gf_false;
}
+ if (ret)
+ return ret;
+ }
+ if (!ret)
+ afr_log_locks_failure(frame, "all", "lock", int_lock->lock_op_errno);
- return 0;
+ return ret;
}
int
-afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- afr_fd_ctx_t *fd_ctx = NULL;
- int32_t call_count = 0;
- int i = 0;
- int ret = 0;
- struct gf_flock flock = {0,};
- struct gf_flock full_flock = {0,};
- struct gf_flock *flock_use = NULL;
- int piggyback = 0;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- priv = this->private;
-
- flock.l_start = int_lock->lk_flock.l_start;
- flock.l_len = int_lock->lk_flock.l_len;
- flock.l_type = int_lock->lk_flock.l_type;
-
- full_flock.l_type = int_lock->lk_flock.l_type;
-
- initialize_inodelk_variables (frame, this);
-
- if (local->fd) {
- fd_ctx = afr_fd_ctx_get (local->fd, this);
- if (!fd_ctx) {
- gf_log (this->name, GF_LOG_INFO,
- "unable to get fd ctx for fd=%p",
- local->fd);
-
- local->op_ret = -1;
- int_lock->lock_op_ret = -1;
- local->op_errno = EINVAL;
- int_lock->lock_op_errno = EINVAL;
-
- ret = -1;
- goto out;
- }
-
- afr_mark_fd_open_on (local, fd_ctx, priv->child_count);
- call_count = internal_lock_count (frame, this);
- int_lock->lk_call_count = call_count;
- int_lock->lk_expected_count = call_count;
-
- if (!call_count) {
- gf_log (this->name, GF_LOG_INFO,
- "fd not open on any subvolumes. aborting.");
- afr_unlock (frame, this);
- goto out;
- }
-
- /* Send non-blocking inodelk calls only on up children
- and where the fd has been opened */
- for (i = 0; i < priv->child_count; i++) {
- if (!local->child_up[i] || !local->fd_open_on[i])
- continue;
-
- flock_use = &flock;
- if (!priv->eager_lock) {
- goto wind;
- }
-
- piggyback = 0;
- local->transaction.eager_lock[i] = 1;
-
- afr_set_delayed_post_op (frame, this);
-
- LOCK (&local->fd->lock);
- {
- if (fd_ctx->lock_acquired[i]) {
- fd_ctx->lock_piggyback[i]++;
- piggyback = 1;
- }
- }
- UNLOCK (&local->fd->lock);
-
- if (piggyback) {
- /* (op_ret == 1) => indicate piggybacked lock */
- afr_nonblocking_inodelk_cbk (frame, (void *) (long) i,
- this, 1, 0, NULL);
- if (!--call_count)
- break;
- continue;
- }
- flock_use = &full_flock;
- wind:
- AFR_TRACE_INODELK_IN (frame, this,
- AFR_INODELK_NB_TRANSACTION,
- AFR_LOCK_OP, flock_use, F_SETLK, i);
-
- STACK_WIND_COOKIE (frame, afr_nonblocking_inodelk_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->finodelk,
- this->name, local->fd,
- F_SETLK, flock_use, NULL);
-
- if (!--call_count)
- break;
- }
- } else {
- call_count = internal_lock_count (frame, this);
- int_lock->lk_call_count = call_count;
- int_lock->lk_expected_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (!local->child_up[i])
- continue;
- AFR_TRACE_INODELK_IN (frame, this,
- AFR_INODELK_NB_TRANSACTION,
- AFR_LOCK_OP, &flock, F_SETLK, i);
-
- STACK_WIND_COOKIE (frame, afr_nonblocking_inodelk_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->inodelk,
- this->name, &local->loc,
- F_SETLK, &flock, NULL);
-
- if (!--call_count)
- break;
- }
- }
-out:
- return ret;
-}
-
-static int
-__is_lower_locked (call_frame_t *frame, xlator_t *this)
+afr_lock_blocking(call_frame_t *frame, xlator_t *this, int cookie)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int count = 0;
- int i = 0;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++) {
- if (int_lock->lower_locked_nodes[i] & LOCKED_LOWER)
- count++;
- }
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ uint64_t ctx = 0;
+ int ret = 0;
+ int child_index = 0;
+ int lockee_num = 0;
- return count;
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ priv = this->private;
+ child_index = cookie % priv->child_count;
+ lockee_num = cookie / priv->child_count;
-}
+ if (local->fd) {
+ ret = fd_ctx_get(local->fd, this, &ctx);
-static int
-__is_higher_locked (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int count = 0;
- int i = 0;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++) {
- if (int_lock->locked_nodes[i] & LOCKED_YES)
- count++;
- }
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_FD_CTX_GET_FAILED,
+ "unable to get fd ctx for fd=%p", local->fd);
- return count;
+ local->op_ret = -1;
+ int_lock->lock_op_ret = -1;
-}
+ afr_unlock_now(frame, this);
-static int
-afr_unlock_lower_entrylk (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- const char *basename = NULL;
- loc_t *loc = NULL;
- int call_count = 0;
- int i = -1;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- priv = this->private;
-
- basename = int_lock->lk_basename;
- if (int_lock->lk_loc)
- loc = int_lock->lk_loc;
-
- call_count = __is_lower_locked (frame, this);
- int_lock->lk_call_count = call_count;
-
- if (!call_count){
- gf_log (this->name, GF_LOG_TRACE,
- "No internal locks unlocked");
- int_lock->lock_cbk (frame, this);
- goto out;
+ return 0;
}
+ }
- for (i = 0; i < priv->child_count; i++) {
- if (int_lock->lower_locked_nodes[i] & LOCKED_LOWER) {
- AFR_TRACE_ENTRYLK_IN (frame, this,
- AFR_ENTRYLK_NB_TRANSACTION,
- AFR_UNLOCK_OP, basename, i);
-
- STACK_WIND_COOKIE (frame, afr_unlock_entrylk_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->entrylk,
- this->name,
- loc, basename,
- ENTRYLK_UNLOCK, ENTRYLK_WRLCK, NULL);
+ if (int_lock->lk_expected_count == int_lock->lk_attempted_count) {
+ if (!is_blocking_locks_count_sufficient(frame, this)) {
+ local->op_ret = -1;
+ int_lock->lock_op_ret = -1;
- if (!--call_count)
- break;
+ afr_unlock_now(frame, this);
- }
+ return 0;
}
+ }
-out:
- return 0;
-
-}
+ if (int_lock->lk_expected_count == int_lock->lk_attempted_count) {
+ /* we're done locking */
+ gf_msg_debug(this->name, 0, "we're done locking");
-static int
-afr_post_unlock_higher_cbk (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- local->transaction.done (frame, this);
+ int_lock->lock_op_ret = 0;
+ int_lock->lock_cbk(frame, this);
return 0;
-}
-
-static int
-afr_post_unlock_lower_cbk (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- loc_t *lower = NULL;
- loc_t *higher = NULL;
- const char *higher_name = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- lower = lower_path (&local->transaction.parent_loc,
- local->transaction.basename,
- &local->transaction.new_parent_loc,
- local->transaction.new_basename);
-
- higher = (lower == &local->transaction.parent_loc ?
- &local->transaction.new_parent_loc :
- &local->transaction.parent_loc);
-
- higher_name = (higher == &local->transaction.parent_loc ?
- local->transaction.basename :
- local->transaction.new_basename);
-
- if (__is_higher_locked (frame, this)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "unlocking higher");
- int_lock->lk_basename = higher_name;
- int_lock->lk_loc = higher;
- int_lock->lock_cbk = afr_post_unlock_higher_cbk;
-
- afr_unlock_entrylk (frame, this);
- } else
- local->transaction.done (frame, this);
+ }
+ if (!_is_lock_wind_needed(local, child_index)) {
+ afr_lock_blocking(frame, this, cookie + 1);
return 0;
-}
+ }
-static int
-afr_rename_unlock (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- loc_t *lower = NULL;
- const char *lower_name = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- lower = lower_path (&local->transaction.parent_loc,
- local->transaction.basename,
- &local->transaction.new_parent_loc,
- local->transaction.new_basename);
-
- lower_name = (lower == &local->transaction.parent_loc ?
- local->transaction.basename :
- local->transaction.new_basename);
-
- if (__is_lower_locked (frame, this)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "unlocking lower");
- int_lock->lk_basename = lower_name;
- int_lock->lk_loc = lower;
- int_lock->lock_cbk = afr_post_unlock_lower_cbk;
-
- afr_unlock_lower_entrylk (frame, this);
- } else
- afr_post_unlock_lower_cbk (frame, this);
-
- return 0;
-}
-
-static int
-afr_rename_transaction (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- return (local->transaction.type ==
- AFR_ENTRY_RENAME_TRANSACTION);
+ afr_internal_lock_wind(frame, afr_lock_cbk, (void *)(long)cookie,
+ child_index, lockee_num, _gf_true, _gf_false);
+ return 0;
}
int32_t
-afr_unlock (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- if (transaction_lk_op (local)) {
- if (is_afr_lock_transaction (local))
- afr_unlock_inodelk (frame, this);
- else
- if (!afr_rename_transaction (frame, this))
- afr_unlock_entrylk (frame, this);
- else
- afr_rename_unlock (frame, this);
- } else {
- if (is_afr_lock_selfheal (local))
- afr_unlock_inodelk (frame, this);
- else
- afr_unlock_entrylk (frame, this);
- }
-
- return 0;
-}
-
-int
-afr_mark_locked_nodes (xlator_t *this, fd_t *fd,
- unsigned char *locked_nodes)
+afr_blocking_lock(call_frame_t *frame, xlator_t *this)
{
- afr_private_t *priv = NULL;
- afr_fd_ctx_t *fdctx = NULL;
- uint64_t tmp = 0;
- int ret = 0;
-
- priv = this->private;
-
- ret = afr_fd_ctx_set (this, fd);
- if (ret)
- goto out;
-
- ret = fd_ctx_get (fd, this, &tmp);
- if (ret) {
- gf_log (this->name, GF_LOG_INFO,
- "failed to get the fd ctx");
- goto out;
- }
- fdctx = (afr_fd_ctx_t *) (long) tmp;
-
- GF_ASSERT (fdctx->locked_on);
-
- memcpy (fdctx->locked_on, locked_nodes,
- priv->child_count);
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int up_count = 0;
-out:
- return ret;
-}
+ priv = this->private;
+ local = frame->local;
+ int_lock = &local->internal_lock;
-static int
-__is_fd_saved (xlator_t *this, fd_t *fd)
-{
- afr_locked_fd_t *locked_fd = NULL;
- afr_private_t *priv = NULL;
- int found = 0;
+ up_count = AFR_COUNT(local->child_up, priv->child_count);
+ int_lock->lk_call_count = int_lock->lk_expected_count =
+ (int_lock->lockee_count * up_count);
+ initialize_internal_lock_variables(frame, this);
- priv = this->private;
+ afr_lock_blocking(frame, this, 0);
- list_for_each_entry (locked_fd, &priv->saved_fds, list) {
- if (locked_fd->fd == fd) {
- found = 1;
- break;
- }
- }
-
- return found;
+ return 0;
}
-static int
-__afr_save_locked_fd (xlator_t *this, fd_t *fd)
+static int32_t
+afr_nb_internal_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_locked_fd_t *locked_fd = NULL;
- int ret = 0;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ int call_count = 0;
+ int child_index = 0;
+ int lockee_num = 0;
+ afr_private_t *priv = NULL;
- priv = this->private;
+ priv = this->private;
- locked_fd = GF_CALLOC (1, sizeof (*locked_fd),
- gf_afr_mt_locked_fd);
- if (!locked_fd) {
- ret = -1;
- goto out;
- }
-
- locked_fd->fd = fd;
- INIT_LIST_HEAD (&locked_fd->list);
-
- list_add_tail (&locked_fd->list, &priv->saved_fds);
-
-out:
- return ret;
-}
-
-int
-afr_save_locked_fd (xlator_t *this, fd_t *fd)
-{
- afr_private_t *priv = NULL;
- int ret = 0;
+ child_index = ((long)cookie) % priv->child_count;
+ lockee_num = ((long)cookie) / priv->child_count;
- priv = this->private;
+ local = frame->local;
+ int_lock = &local->internal_lock;
- pthread_mutex_lock (&priv->mutex);
+ if (op_ret == 0 && local->transaction.type == AFR_DATA_TRANSACTION) {
+ LOCK(&local->inode->lock);
{
- if (__is_fd_saved (this, fd)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "fd=%p already saved", fd);
- goto unlock;
- }
-
- ret = __afr_save_locked_fd (this, fd);
- if (ret) {
- gf_log (this->name, GF_LOG_INFO,
- "fd=%p could not be saved", fd);
- goto unlock;
- }
+ local->inode_ctx->lock_count++;
}
-unlock:
- pthread_mutex_unlock (&priv->mutex);
-
- return ret;
-}
-
-static int
-afr_lock_recovery_cleanup (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_locked_fd_t *locked_fd = NULL;
-
- local = frame->local;
-
- locked_fd = local->locked_fd;
-
- STACK_DESTROY (frame->root);
- afr_local_cleanup (local, this);
-
- afr_save_locked_fd (this, locked_fd->fd);
-
- return 0;
-
-}
-
-static int
-afr_get_source_lock_recovery (xlator_t *this, fd_t *fd)
-{
- afr_fd_ctx_t *fdctx = NULL;
- afr_private_t *priv = NULL;
- uint64_t tmp = 0;
- int i = 0;
- int source_child = -1;
- int ret = 0;
+ UNLOCK(&local->inode->lock);
+ }
- priv = this->private;
-
- ret = fd_ctx_get (fd, this, &tmp);
- if (ret)
- goto out;
-
- fdctx = (afr_fd_ctx_t *) (long) tmp;
-
- for (i = 0; i < priv->child_count; i++) {
- if (fdctx->locked_on[i]) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Found lock recovery source=%d", i);
- source_child = i;
- break;
- }
+ LOCK(&frame->lock);
+ {
+ if (op_ret < 0) {
+ if (op_errno == ENOSYS) {
+ /* return ENOTSUP */
+ gf_msg(this->name, GF_LOG_ERROR, ENOSYS,
+ AFR_MSG_LOCK_XLATOR_NOT_LOADED,
+ "subvolume does not support "
+ "locking. please load features/locks"
+ " xlator on server");
+ local->op_ret = op_ret;
+ int_lock->lock_op_ret = op_ret;
+
+ int_lock->lock_op_errno = op_errno;
+ local->op_errno = op_errno;
+ }
+ } else if (op_ret == 0) {
+ int_lock->lockee[lockee_num]
+ .locked_nodes[child_index] |= LOCKED_YES;
+ int_lock->lockee[lockee_num].locked_count++;
+ int_lock->lock_count++;
}
-out:
- return source_child;
-
-}
+ call_count = --int_lock->lk_call_count;
+ }
+ UNLOCK(&frame->lock);
-int32_t
-afr_get_locks_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
- dict_t *xdata);
-int32_t
-afr_recover_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
- dict_t *xdata)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int32_t source_child = 0;
- struct gf_flock flock = {0,};
-
- local = frame->local;
- priv = this->private;
-
- if (op_ret) {
- gf_log (this->name, GF_LOG_INFO,
- "lock recovery failed");
- goto cleanup;
+ if (call_count == 0) {
+ gf_msg_trace(this->name, 0, "Last locking reply received");
+ /* all locks successful. Proceed to call FOP */
+ if (int_lock->lock_count == int_lock->lk_expected_count) {
+ gf_msg_trace(this->name, 0, "All servers locked. Calling the cbk");
+ int_lock->lock_op_ret = 0;
+ int_lock->lock_cbk(frame, this);
}
+ /* Not all locks were successful. Unlock and try locking
+ again, this time with serially blocking locks */
+ else {
+ gf_msg_trace(this->name, 0,
+ "%d servers locked. Trying again "
+ "with blocking calls",
+ int_lock->lock_count);
- source_child = local->source_child;
-
- memcpy (&flock, lock, sizeof (*lock));
-
- STACK_WIND_COOKIE (frame, afr_get_locks_fd_cbk,
- (void *) (long) source_child,
- priv->children[source_child],
- priv->children[source_child]->fops->lk,
- local->fd, F_GETLK_FD, &flock, NULL);
-
- return 0;
-
-cleanup:
- afr_lock_recovery_cleanup (frame, this);
- return 0;
-}
-
-int
-afr_recover_lock (call_frame_t *frame, xlator_t *this,
- struct gf_flock *flock)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int32_t lock_recovery_child = 0;
-
- priv = this->private;
- local = frame->local;
-
- lock_recovery_child = local->lock_recovery_child;
-
- frame->root->lk_owner = flock->l_owner;
-
- STACK_WIND_COOKIE (frame, afr_recover_lock_cbk,
- (void *) (long) lock_recovery_child,
- priv->children[lock_recovery_child],
- priv->children[lock_recovery_child]->fops->lk,
- local->fd, F_SETLK, flock, NULL);
-
- return 0;
-}
-
-static int
-is_afr_lock_eol (struct gf_flock *lock)
-{
- int ret = 0;
-
- if ((lock->l_type == GF_LK_EOL))
- ret = 1;
-
- return ret;
-}
-
-int32_t
-afr_get_locks_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
- dict_t *xdata)
-{
- if (op_ret) {
- gf_log (this->name, GF_LOG_INFO,
- "Failed to get locks on fd");
- goto cleanup;
+ afr_unlock_now(frame, this);
}
+ }
- gf_log (this->name, GF_LOG_DEBUG,
- "Got a lock on fd");
-
- if (is_afr_lock_eol (lock)) {
- gf_log (this->name, GF_LOG_INFO,
- "Reached EOL on locks on fd");
- goto cleanup;
- }
-
- afr_recover_lock (frame, this, lock);
-
- return 0;
-
-cleanup:
- afr_lock_recovery_cleanup (frame, this);
-
- return 0;
+ return 0;
}
-static int
-afr_lock_recovery (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- fd_t *fd = NULL;
- int ret = 0;
- int32_t source_child = 0;
- struct gf_flock flock = {0,};
-
- priv = this->private;
- local = frame->local;
-
- fd = local->fd;
-
- source_child = afr_get_source_lock_recovery (this, fd);
- if (source_child < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "Could not recover locks due to lock "
- "split brain");
- ret = -1;
- goto out;
+int
+afr_lock_nonblocking(call_frame_t *frame, xlator_t *this)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int child = 0;
+ int lockee_num = 0;
+ int32_t call_count = 0;
+ int i = 0;
+ int ret = 0;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ priv = this->private;
+
+ initialize_internal_lock_variables(frame, this);
+
+ if (local->fd) {
+ fd_ctx = afr_fd_ctx_get(local->fd, this);
+ if (!fd_ctx) {
+ gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_FD_CTX_GET_FAILED,
+ "unable to get fd ctx for fd=%p", local->fd);
+
+ local->op_ret = -1;
+ int_lock->lock_op_ret = -1;
+ local->op_errno = EINVAL;
+ int_lock->lock_op_errno = EINVAL;
+
+ afr_unlock_now(frame, this);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ call_count = int_lock->lockee_count * internal_lock_count(frame, this);
+ int_lock->lk_call_count = call_count;
+ int_lock->lk_expected_count = call_count;
+
+ if (!call_count) {
+ gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_INFO_COMMON,
+ "fd not open on any subvolumes. aborting.");
+ afr_unlock_now(frame, this);
+ goto out;
+ }
+
+ /* Send non-blocking lock calls only on up children
+ and where the fd has been opened */
+ for (i = 0; i < int_lock->lockee_count * priv->child_count; i++) {
+ child = i % priv->child_count;
+ lockee_num = i / priv->child_count;
+ if (local->child_up[child]) {
+ afr_internal_lock_wind(frame, afr_nb_internal_lock_cbk,
+ (void *)(long)i, child, lockee_num,
+ _gf_false, _gf_false);
+ if (!--call_count)
+ break;
}
-
- local->source_child = source_child;
-
- /* the flock can be zero filled as we're querying incrementally
- the locks held on the fd.
- */
- STACK_WIND_COOKIE (frame, afr_get_locks_fd_cbk,
- (void *) (long) source_child,
- priv->children[source_child],
- priv->children[source_child]->fops->lk,
- local->fd, F_GETLK_FD, &flock, NULL);
-
+ }
out:
- return ret;
-}
-
-
-static int
-afr_mark_fd_opened (xlator_t *this, fd_t *fd, int32_t child_index)
-{
- afr_fd_ctx_t *fdctx = NULL;
- uint64_t tmp = 0;
- int ret = 0;
-
- ret = fd_ctx_get (fd, this, &tmp);
- if (ret)
- goto out;
-
- fdctx = (afr_fd_ctx_t *) (long) tmp;
-
- fdctx->opened_on[child_index] = AFR_FD_OPENED;
-
-out:
- return ret;
+ return ret;
}
int32_t
-afr_lock_recovery_preopen_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd,
- dict_t *xdata)
-{
- int32_t child_index = (long )cookie;
- int ret = 0;
-
- if (op_ret) {
- gf_log (this->name, GF_LOG_INFO,
- "Reopen during lock-recovery failed");
- goto cleanup;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "Open succeeded => proceed to recover locks");
-
- ret = afr_lock_recovery (frame, this);
- if (ret) {
- gf_log (this->name, GF_LOG_INFO,
- "Lock recovery failed");
- goto cleanup;
- }
-
- ret = afr_mark_fd_opened (this, fd, child_index);
- if (ret) {
- gf_log (this->name, GF_LOG_INFO,
- "Marking fd open failed");
- goto cleanup;
- }
-
- return 0;
-
-cleanup:
- afr_lock_recovery_cleanup (frame, this);
- return 0;
-}
-
-static int
-afr_lock_recovery_preopen (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- uint64_t tmp = 0;
- afr_fd_ctx_t *fdctx = NULL;
- loc_t loc = {0,};
- int32_t child_index = 0;
- int ret = 0;
-
- priv = this->private;
- local = frame->local;
-
- GF_ASSERT (local && local->fd);
-
- ret = fd_ctx_get (local->fd, this, &tmp);
- if (ret)
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to get the context of fd",
- uuid_utoa (local->fd->inode->gfid));
- fdctx = (afr_fd_ctx_t *) (long) tmp;
- /* TODO: instead we should return from the function */
- GF_ASSERT (fdctx);
-
- child_index = local->lock_recovery_child;
-
- inode_path (local->fd->inode, NULL, (char **)&loc.path);
- loc.name = strrchr (loc.path, '/');
- loc.inode = inode_ref (local->fd->inode);
- loc.parent = inode_parent (local->fd->inode, 0, NULL);
-
-
- STACK_WIND_COOKIE (frame, afr_lock_recovery_preopen_cbk,
- (void *)(long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->open,
- &loc, fdctx->flags, local->fd, NULL);
-
- return 0;
-}
-
-static int
-is_fd_opened (fd_t *fd, int32_t child_index)
-{
- afr_fd_ctx_t *fdctx = NULL;
- uint64_t tmp = 0;
- int ret = 0;
-
- ret = fd_ctx_get (fd, THIS, &tmp);
- if (ret)
- goto out;
-
- fdctx = (afr_fd_ctx_t *) (long) tmp;
-
- if (fdctx->opened_on[child_index] == AFR_FD_OPENED)
- ret = 1;
-
-out:
- return ret;
-}
-
-int
-afr_attempt_lock_recovery (xlator_t *this, int32_t child_index)
-{
- call_frame_t *frame = NULL;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_locked_fd_t *locked_fd = NULL;
- afr_locked_fd_t *tmp = NULL;
- int ret = -1;
- struct list_head locks_list = {0,};
- int32_t op_errno = 0;
-
-
- priv = this->private;
-
- if (list_empty (&priv->saved_fds))
- goto out;
-
- frame = create_frame (this, this->ctx->pool);
- if (!frame) {
- ret = -1;
- goto out;
- }
-
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0) {
- ret = -1;
- goto out;
- }
-
- frame->local = local;
-
- INIT_LIST_HEAD (&locks_list);
-
- pthread_mutex_lock (&priv->mutex);
- {
- list_splice_init (&priv->saved_fds, &locks_list);
- }
- pthread_mutex_unlock (&priv->mutex);
-
- list_for_each_entry_safe (locked_fd, tmp,
- &locks_list, list) {
-
- list_del_init (&locked_fd->list);
-
- local->fd = fd_ref (locked_fd->fd);
- local->lock_recovery_child = child_index;
- local->locked_fd = locked_fd;
-
- if (!is_fd_opened (locked_fd->fd, child_index)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "attempting open before lock "
- "recovery");
- afr_lock_recovery_preopen (frame, this);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "attempting lock recovery "
- "without a preopen");
- afr_lock_recovery (frame, this);
- }
- }
+afr_unlock(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_lock_t *lock = NULL;
+
+ local = frame->local;
+
+ if (!local->transaction.eager_lock_on)
+ goto out;
+ lock = &local->inode_ctx->lock[local->transaction.type];
+ LOCK(&local->inode->lock);
+ {
+ list_del_init(&local->transaction.owner_list);
+ if (list_empty(&lock->owners) && list_empty(&lock->post_op)) {
+ local->transaction.do_eager_unlock = _gf_true;
+ /*TODO: Need to get metadata use on_disk and inherit/uninherit
+ *GF_ASSERT (!local->inode_ctx->on_disk[local->transaction.type]);
+ *GF_ASSERT (!local->inode_ctx->inherited[local->transaction.type]);
+ */
+ GF_ASSERT(lock->release);
+ }
+ }
+ UNLOCK(&local->inode->lock);
+ if (!local->transaction.do_eager_unlock) {
+ local->internal_lock.lock_cbk(frame, this);
+ return 0;
+ }
out:
- if ((ret < 0) && frame)
- AFR_STACK_DESTROY (frame);
- return ret;
-}
-
-void
-afr_lk_transfer_datalock (call_frame_t *dst, call_frame_t *src,
- unsigned int child_count)
-{
- afr_local_t *dst_local = NULL;
- afr_local_t *src_local = NULL;
- afr_internal_lock_t *dst_lock = NULL;
- afr_internal_lock_t *src_lock = NULL;
-
- dst_local = dst->local;
- dst_lock = &dst_local->internal_lock;
- src_local = src->local;
- src_lock = &src_local->internal_lock;
- if (src_lock->inode_locked_nodes) {
- memcpy (dst_lock->inode_locked_nodes,
- src_lock->inode_locked_nodes,
- sizeof (*dst_lock->inode_locked_nodes) * child_count);
- memset (src_lock->inode_locked_nodes, 0,
- sizeof (*src_lock->inode_locked_nodes) * child_count);
- }
-
- dst_lock->transaction_lk_type = src_lock->transaction_lk_type;
- dst_lock->selfheal_lk_type = src_lock->selfheal_lk_type;
- dst_lock->inodelk_lock_count = src_lock->inodelk_lock_count;
- src_lock->inodelk_lock_count = 0;
+ afr_unlock_now(frame, this);
+ return 0;
}
diff --git a/xlators/cluster/afr/src/afr-mem-types.h b/xlators/cluster/afr/src/afr-mem-types.h
index 7e684801fae..816065fb57a 100644
--- a/xlators/cluster/afr/src/afr-mem-types.h
+++ b/xlators/cluster/afr/src/afr-mem-types.h
@@ -8,40 +8,31 @@
cases as published by the Free Software Foundation.
*/
-
#ifndef __AFR_MEM_TYPES_H__
#define __AFR_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_afr_mem_types_ {
- gf_afr_mt_iovec = gf_common_mt_end + 1,
- gf_afr_mt_afr_fd_ctx_t,
- gf_afr_mt_afr_private_t,
- gf_afr_mt_int32_t,
- gf_afr_mt_char,
- gf_afr_mt_xattr_key,
- gf_afr_mt_dict_t,
- gf_afr_mt_xlator_t,
- gf_afr_mt_iatt,
- gf_afr_mt_int,
- gf_afr_mt_afr_node_character,
- gf_afr_mt_sh_diff_loop_state,
- gf_afr_mt_uint8_t,
- gf_afr_mt_loc_t,
- gf_afr_mt_entry_name,
- gf_afr_mt_pump_priv,
- gf_afr_mt_locked_fd,
- gf_afr_mt_inode_ctx_t,
- gf_afr_fd_paused_call_t,
- gf_afr_mt_crawl_data_t,
- gf_afr_mt_brick_pos_t,
- gf_afr_mt_shd_bool_t,
- gf_afr_mt_shd_timer_t,
- gf_afr_mt_shd_event_t,
- gf_afr_mt_time_t,
- gf_afr_mt_pos_data_t,
- gf_afr_mt_end
+ gf_afr_mt_afr_fd_ctx_t = gf_common_mt_end + 1,
+ gf_afr_mt_afr_private_t,
+ gf_afr_mt_int32_t,
+ gf_afr_mt_char,
+ gf_afr_mt_xattr_key,
+ gf_afr_mt_dict_t,
+ gf_afr_mt_xlator_t,
+ gf_afr_mt_afr_node_character,
+ gf_afr_mt_inode_ctx_t,
+ gf_afr_mt_shd_event_t,
+ gf_afr_mt_reply_t,
+ gf_afr_mt_subvol_healer_t,
+ gf_afr_mt_spbc_timeout_t,
+ gf_afr_mt_spb_status_t,
+ gf_afr_mt_empty_brick_t,
+ gf_afr_mt_child_latency_t,
+ gf_afr_mt_atomic_t,
+ gf_afr_mt_lk_heal_info_t,
+ gf_afr_mt_gf_lock,
+ gf_afr_mt_end
};
#endif
-
diff --git a/xlators/cluster/afr/src/afr-messages.h b/xlators/cluster/afr/src/afr-messages.h
new file mode 100644
index 00000000000..e73fd997765
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-messages.h
@@ -0,0 +1,167 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+ */
+
+#ifndef _AFR_MESSAGES_H_
+#define _AFR_MESSAGES_H_
+
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(
+ AFR, AFR_MSG_QUORUM_FAIL, AFR_MSG_QUORUM_MET, AFR_MSG_QUORUM_OVERRIDE,
+ AFR_MSG_INVALID_CHILD_UP, AFR_MSG_SUBVOL_UP, AFR_MSG_SUBVOLS_DOWN,
+ AFR_MSG_ENTRY_UNLOCK_FAIL, AFR_MSG_SPLIT_BRAIN, AFR_MSG_OPEN_FAIL,
+ AFR_MSG_UNLOCK_FAIL, AFR_MSG_REPLACE_BRICK_STATUS, AFR_MSG_GFID_NULL,
+ AFR_MSG_FD_CREATE_FAILED, AFR_MSG_DICT_SET_FAILED,
+ AFR_MSG_EXPUNGING_FILE_OR_DIR, AFR_MSG_MIGRATION_IN_PROGRESS,
+ AFR_MSG_CHILD_MISCONFIGURED, AFR_MSG_VOL_MISCONFIGURED,
+ AFR_MSG_INTERNAL_LKS_FAILED, AFR_MSG_INVALID_FD, AFR_MSG_LOCK_INFO,
+ AFR_MSG_LOCK_XLATOR_NOT_LOADED, AFR_MSG_FD_CTX_GET_FAILED,
+ AFR_MSG_INVALID_SUBVOL, AFR_MSG_PUMP_XLATOR_ERROR, AFR_MSG_SELF_HEAL_INFO,
+ AFR_MSG_READ_SUBVOL_ERROR, AFR_MSG_DICT_GET_FAILED, AFR_MSG_INFO_COMMON,
+ AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR, AFR_MSG_LOCAL_CHILD, AFR_MSG_INVALID_DATA,
+ AFR_MSG_INVALID_ARG, AFR_MSG_INDEX_DIR_GET_FAILED, AFR_MSG_FSYNC_FAILED,
+ AFR_MSG_FAVORITE_CHILD, AFR_MSG_SELF_HEAL_FAILED,
+ AFR_MSG_SPLIT_BRAIN_STATUS, AFR_MSG_ADD_BRICK_STATUS, AFR_MSG_NO_CHANGELOG,
+ AFR_MSG_TIMER_CREATE_FAIL, AFR_MSG_SBRAIN_FAV_CHILD_POLICY,
+ AFR_MSG_INODE_CTX_GET_FAILED, AFR_MSG_THIN_ARB,
+ AFR_MSG_THIN_ARB_XATTROP_FAILED, AFR_MSG_THIN_ARB_LOC_POP_FAILED,
+ AFR_MSG_GET_PEND_VAL, AFR_MSG_THIN_ARB_SKIP_SHD, AFR_MSG_UNKNOWN_SET,
+ AFR_MSG_NO_XL_ID, AFR_MSG_SELF_HEAL_INFO_START,
+ AFR_MSG_SELF_HEAL_INFO_FINISH, AFR_MSG_INCRE_COUNT,
+ AFR_MSG_ADD_TO_OUTPUT_FAILED, AFR_MSG_SET_TIME_FAILED,
+ AFR_MSG_GFID_MISMATCH_DETECTED, AFR_MSG_GFID_HEAL_MSG,
+ AFR_MSG_THIN_ARB_LOOKUP_FAILED, AFR_MSG_DICT_CREATE_FAILED,
+ AFR_MSG_NO_MAJORITY_TO_RESOLVE, AFR_MSG_TYPE_MISMATCH,
+ AFR_MSG_SIZE_POLICY_NOT_APPLICABLE, AFR_MSG_NO_CHILD_SELECTED,
+ AFR_MSG_INVALID_CHILD, AFR_MSG_RESOLVE_CONFLICTING_DATA,
+ SERROR_GETTING_SRC_BRICK, SNO_DIFF_IN_MTIME, SNO_BIGGER_FILE,
+ SALL_BRICKS_UP_TO_RESOLVE, AFR_MSG_UNLOCK_FAILED, AFR_MSG_POST_OP_FAILED,
+ AFR_MSG_TA_FRAME_CREATE_FAILED, AFR_MSG_SET_KEY_XATTROP_FAILED,
+ AFR_MSG_BLOCKING_ENTRYLKS_FAILED, AFR_MSG_FOP_FAILED,
+ AFR_MSG_CLEAN_UP_FAILED, AFR_MSG_UNABLE_TO_FETCH, AFR_MSG_XATTR_SET_FAILED,
+ AFR_MSG_SPLIT_BRAIN_REPLICA, AFR_MSG_INODE_CTX_FAILED,
+ AFR_MSG_LOOKUP_FAILED, AFR_MSG_ALL_SUBVOLS_DOWN,
+ AFR_MSG_RELEASE_LOCK_FAILED, AFR_MSG_CLEAR_TIME_SPLIT_BRAIN,
+ AFR_MSG_READ_FAILED, AFR_MSG_LAUNCH_FAILED, AFR_MSG_READ_SUBVOL_NOT_UP,
+ AFR_MSG_LK_HEAL_DOM, AFR_MSG_NEW_BRICK, AFR_MSG_SPLIT_BRAIN_SET_FAILED,
+ AFR_MSG_SPLIT_BRAIN_DETERMINE_FAILED, AFR_MSG_HEALER_SPAWN_FAILED,
+ AFR_MSG_ADD_CRAWL_EVENT_FAILED, AFR_MSG_NULL_DEREF, AFR_MSG_SET_PEND_XATTR,
+ AFR_MSG_INTERNAL_ATTR);
+
+#define AFR_MSG_DICT_GET_FAILED_STR "Dict get failed"
+#define AFR_MSG_DICT_SET_FAILED_STR "Dict set failed"
+#define AFR_MSG_HEALER_SPAWN_FAILED_STR "Healer spawn failed"
+#define AFR_MSG_ADD_CRAWL_EVENT_FAILED_STR "Adding crawl event failed"
+#define AFR_MSG_INVALID_ARG_STR "Invalid argument"
+#define AFR_MSG_INDEX_DIR_GET_FAILED_STR "unable to get index-dir on "
+#define AFR_MSG_THIN_ARB_LOOKUP_FAILED_STR "Failed lookup on file"
+#define AFR_MSG_DICT_CREATE_FAILED_STR "Failed to create dict."
+#define AFR_MSG_THIN_ARB_XATTROP_FAILED_STR "Xattrop failed."
+#define AFR_MSG_THIN_ARB_LOC_POP_FAILED_STR \
+ "Failed to populate loc for thin-arbiter"
+#define AFR_MSG_GET_PEND_VAL_STR "Error getting value of pending"
+#define AFR_MSG_THIN_ARB_SKIP_SHD_STR "I am not the god shd. skipping."
+#define AFR_MSG_UNKNOWN_SET_STR "Unknown set"
+#define AFR_MSG_NO_XL_ID_STR "xl does not have id"
+#define AFR_MSG_SELF_HEAL_INFO_START_STR "starting full sweep on"
+#define AFR_MSG_SELF_HEAL_INFO_FINISH_STR "finished full sweep on"
+#define AFR_MSG_INCRE_COUNT_STR "Could not increment the counter."
+#define AFR_MSG_ADD_TO_OUTPUT_FAILED_STR "Could not add to output"
+#define AFR_MSG_SET_TIME_FAILED_STR "Could not set time"
+#define AFR_MSG_GFID_HEAL_MSG_STR "Error setting gfid-heal-msg dict"
+#define AFR_MSG_NO_MAJORITY_TO_RESOLVE_STR \
+ "No majority to resolve gfid split brain"
+#define AFR_MSG_GFID_MISMATCH_DETECTED_STR "Gfid mismatch dectected"
+#define AFR_MSG_SELF_HEAL_INFO_STR "performing selfheal"
+#define AFR_MSG_TYPE_MISMATCH_STR "TYPE mismatch"
+#define AFR_MSG_SIZE_POLICY_NOT_APPLICABLE_STR \
+ "Size policy is not applicable to directories."
+#define AFR_MSG_NO_CHILD_SELECTED_STR \
+ "No child selected by favorite-child policy"
+#define AFR_MSG_INVALID_CHILD_STR "Invalid child"
+#define AFR_MSG_RESOLVE_CONFLICTING_DATA_STR \
+ "selected as authentic to resolve conflicting data"
+#define SERROR_GETTING_SRC_BRICK_STR "Error getting the source brick"
+#define SNO_DIFF_IN_MTIME_STR "No difference in mtime"
+#define SNO_BIGGER_FILE_STR "No bigger file"
+#define SALL_BRICKS_UP_TO_RESOLVE_STR \
+ "All the bricks should be up to resolve the gfid split brain"
+#define AFR_MSG_UNLOCK_FAILED_STR "Failed to unlock"
+#define AFR_MSG_POST_OP_FAILED_STR "Post-op on thin-arbiter failed"
+#define AFR_MSG_TA_FRAME_CREATE_FAILED_STR "Failed to create ta_frame"
+#define AFR_MSG_SET_KEY_XATTROP_FAILED_STR "Could not set key during xattrop"
+#define AFR_MSG_BLOCKING_ENTRYLKS_FAILED_STR "Blocking entrylks failed"
+#define AFR_MSG_FSYNC_FAILED_STR "fsync failed"
+#define AFR_MSG_QUORUM_FAIL_STR "quorum is not met"
+#define AFR_MSG_FOP_FAILED_STR "Failing Fop"
+#define AFR_MSG_INVALID_SUBVOL_STR "not a subvolume"
+#define AFR_MSG_VOL_MISCONFIGURED_STR "Volume is dangling"
+#define AFR_MSG_CHILD_MISCONFIGURED_STR \
+ "replicate translator needs more than one subvolume defined"
+#define AFR_MSG_CLEAN_UP_FAILED_STR "Failed to clean up healer threads"
+#define AFR_MSG_QUORUM_OVERRIDE_STR "overriding quorum-count"
+#define AFR_MSG_UNABLE_TO_FETCH_STR \
+ "Unable to fetch afr-pending-xattr option from volfile. Falling back to " \
+ "using client translator names"
+#define AFR_MSG_NULL_DEREF_STR "possible NULL deref"
+#define AFR_MSG_XATTR_SET_FAILED_STR "Cannot set xattr cookie key"
+#define AFR_MSG_SPLIT_BRAIN_STATUS_STR "Failed to create synctask"
+#define AFR_MSG_SUBVOLS_DOWN_STR "All subvolumes are not up"
+#define AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR_STR \
+ "Failed to cancel split-brain choice"
+#define AFR_MSG_SPLIT_BRAIN_REPLICA_STR \
+ "Cannot set replica. File is not in data/metadata split-brain"
+#define AFR_MSG_INODE_CTX_FAILED_STR "Failed to get inode_ctx"
+#define AFR_MSG_READ_SUBVOL_ERROR_STR "no read subvols"
+#define AFR_MSG_LOCAL_CHILD_STR "selecting local read-child"
+#define AFR_MSG_LOOKUP_FAILED_STR "Failed to lookup/create thin-arbiter id file"
+#define AFR_MSG_TIMER_CREATE_FAIL_STR \
+ "Cannot create timer for delayed initialization"
+#define AFR_MSG_SUBVOL_UP_STR "Subvolume came back up; going online"
+#define AFR_MSG_ALL_SUBVOLS_DOWN_STR \
+ "All subvolumes are down. Going offline until atleast one of them is up"
+#define AFR_MSG_RELEASE_LOCK_FAILED_STR "Failed to release lock"
+#define AFR_MSG_INVALID_CHILD_UP_STR "Received child_up from invalid subvolume"
+#define AFR_MSG_QUORUM_MET_STR "Client-quorum is met"
+#define AFR_MSG_EXPUNGING_FILE_OR_DIR_STR "expunging file or dir"
+#define AFR_MSG_SELF_HEAL_FAILED_STR "Invalid"
+#define AFR_MSG_SPLIT_BRAIN_STR "Skipping conservative mergeon the file"
+#define AFR_MSG_CLEAR_TIME_SPLIT_BRAIN_STR "clear time split brain"
+#define AFR_MSG_READ_FAILED_STR "Failing read since good brick is down"
+#define AFR_MSG_LAUNCH_FAILED_STR "Failed to launch synctask"
+#define AFR_MSG_READ_SUBVOL_NOT_UP_STR \
+ "read subvolume in this generation is not up"
+#define AFR_MSG_INTERNAL_LKS_FAILED_STR \
+ "Unable to work with lk-owner while attempting fop"
+#define AFR_MSG_LOCK_XLATOR_NOT_LOADED_STR \
+ "subvolume does not support locking. please load features/locks xlator " \
+ "on server."
+#define AFR_MSG_FD_CTX_GET_FAILED_STR "unable to get fd ctx"
+#define AFR_MSG_INFO_COMMON_STR "fd not open on any subvolumes, aborting."
+#define AFR_MSG_REPLACE_BRICK_STATUS_STR "Couldn't acquire lock on any child."
+#define AFR_MSG_NEW_BRICK_STR "New brick"
+#define AFR_MSG_SPLIT_BRAIN_SET_FAILED_STR \
+ "Failed to set split-brain choice to -1"
+#define AFR_MSG_SPLIT_BRAIN_DETERMINE_FAILED_STR \
+ "Failed to determine split-brain. Aborting split-brain-choice set"
+#define AFR_MSG_OPEN_FAIL_STR "Failed to open subvolume"
+#define AFR_MSG_SET_PEND_XATTR_STR "Set of pending xattr"
+#define AFR_MSG_INTERNAL_ATTR_STR "is an internal extended attribute"
+#endif /* !_AFR_MESSAGES_H_ */
diff --git a/xlators/cluster/afr/src/afr-open.c b/xlators/cluster/afr/src/afr-open.c
index 35090d80e4b..64856042b65 100644
--- a/xlators/cluster/afr/src/afr-open.c
+++ b/xlators/cluster/afr/src/afr-open.c
@@ -8,438 +8,346 @@
cases as published by the Free Software Foundation.
*/
-#include <libgen.h>
#include <unistd.h>
-#include <fnmatch.h>
#include <sys/time.h>
#include <stdlib.h>
#include <signal.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
+#include <glusterfs/glusterfs.h>
#include "afr.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "list.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
-#include "byte-order.h"
-#include "statedump.h"
-
-#include "fd.h"
-
-#include "afr-inode-read.h"
-#include "afr-inode-write.h"
-#include "afr-dir-read.h"
-#include "afr-dir-write.h"
-#include "afr-transaction.h"
-#include "afr-self-heal.h"
-#include "afr-self-heal-common.h"
-
-int
-afr_stale_child_up (afr_local_t *local, xlator_t *this)
-{
- int i = 0;
- afr_private_t *priv = NULL;
- int up = -1;
+#include <glusterfs/dict.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/statedump.h>
- priv = this->private;
-
- if (!local->fresh_children)
- local->fresh_children = afr_children_create (priv->child_count);
- if (!local->fresh_children)
- goto out;
-
- afr_inode_get_read_ctx (this, local->fd->inode, local->fresh_children);
- if (priv->child_count == afr_get_children_count (local->fresh_children,
- priv->child_count))
- goto out;
-
- for (i = 0; i < priv->child_count; i++) {
- if (!local->child_up[i])
- continue;
- if (afr_is_child_present (local->fresh_children,
- priv->child_count, i))
- continue;
- up = i;
- break;
- }
-out:
- return up;
-}
+#include "afr-transaction.h"
-void
-afr_perform_data_self_heal (call_frame_t *frame, xlator_t *this)
+gf_boolean_t
+afr_is_fd_fixable(fd_t *fd)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- inode_t *inode = NULL;
- int st_child = -1;
- char reason[64] = {0};
-
- local = frame->local;
- sh = &local->self_heal;
- inode = local->fd->inode;
-
- if (!IA_ISREG (inode->ia_type))
- goto out;
-
- st_child = afr_stale_child_up (local, this);
- if (st_child < 0)
- goto out;
-
- sh->do_data_self_heal = _gf_true;
- sh->do_metadata_self_heal = _gf_true;
- sh->do_gfid_self_heal = _gf_true;
- sh->do_missing_entry_self_heal = _gf_true;
-
- snprintf (reason, sizeof (reason), "stale subvolume %d detected",
- st_child);
- afr_launch_self_heal (frame, this, inode, _gf_true, inode->ia_type,
- reason, NULL, NULL);
-out:
- return;
+ if (!fd || !fd->inode)
+ return _gf_false;
+ else if (fd_is_anonymous(fd))
+ return _gf_false;
+ else if (gf_uuid_is_null(fd->inode->gfid))
+ return _gf_false;
+
+ return _gf_true;
}
int
-afr_open_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+afr_open_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- afr_local_t * local = frame->local;
- afr_private_t *priv = NULL;
-
- priv = this->private;
- if (afr_open_only_data_self_heal (priv->data_self_heal))
- afr_perform_data_self_heal (frame, this);
- AFR_STACK_UNWIND (open, frame, local->op_ret, local->op_errno,
- local->fd, xdata);
- return 0;
-}
+ afr_local_t *local = frame->local;
+ AFR_STACK_UNWIND(open, frame, local->op_ret, local->op_errno,
+ local->cont.open.fd, xdata);
+ return 0;
+}
int
-afr_open_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- fd_t *fd, dict_t *xdata)
+afr_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- afr_local_t * local = NULL;
- int ret = 0;
- int call_count = -1;
- int child_index = (long) cookie;
- afr_private_t *priv = NULL;
-
- priv = this->private;
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- }
-
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- local->success_count++;
-
- ret = afr_child_fd_ctx_set (this, fd, child_index,
- local->cont.open.flags);
- if (ret) {
- local->op_ret = -1;
- local->op_errno = -ret;
- goto unlock;
- }
- }
+ afr_local_t *local = NULL;
+ int call_count = -1;
+ int child_index = (long)cookie;
+ afr_fd_ctx_t *fd_ctx = NULL;
+
+ local = frame->local;
+ fd_ctx = local->fd_ctx;
+
+ local->replies[child_index].valid = 1;
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
+
+ LOCK(&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ fd_ctx->opened_on[child_index] = AFR_FD_NOT_OPENED;
+ } else {
+ local->op_ret = op_ret;
+ fd_ctx->opened_on[child_index] = AFR_FD_OPENED;
+ if (!local->xdata_rsp && xdata)
+ local->xdata_rsp = dict_ref(xdata);
}
-unlock:
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- if ((local->cont.open.flags & O_TRUNC)
- && (local->op_ret >= 0)) {
- STACK_WIND (frame, afr_open_ftruncate_cbk,
- this, this->fops->ftruncate,
- fd, 0, NULL);
- } else {
- if (afr_open_only_data_self_heal (priv->data_self_heal))
- afr_perform_data_self_heal (frame, this);
- AFR_STACK_UNWIND (open, frame, local->op_ret,
- local->op_errno, local->fd, xdata);
- }
+ call_count = --local->call_count;
+ }
+ UNLOCK(&frame->lock);
+
+ if (call_count == 0) {
+ afr_handle_replies_quorum(frame, this);
+ if (local->op_ret == -1) {
+ AFR_STACK_UNWIND(open, frame, local->op_ret, local->op_errno, NULL,
+ NULL);
+ } else if (fd_ctx->flags & O_TRUNC) {
+ STACK_WIND(frame, afr_open_ftruncate_cbk, this,
+ this->fops->ftruncate, fd, 0, NULL);
+ } else {
+ AFR_STACK_UNWIND(open, frame, local->op_ret, local->op_errno,
+ local->cont.open.fd, local->xdata_rsp);
}
+ }
- return 0;
+ return 0;
}
int
-afr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, dict_t *xdata)
+afr_open_continue(call_frame_t *frame, xlator_t *this, int err)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- int i = 0;
- int ret = -1;
- int32_t call_count = 0;
- int32_t op_errno = 0;
- int32_t wind_flags = flags & (~O_TRUNC);
- //We can't let truncation to happen outside transaction.
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (loc, out);
-
- priv = this->private;
-
- if (flags & (O_CREAT|O_TRUNC)) {
- QUORUM_CHECK(open,out);
- }
-
- if (afr_is_split_brain (this, loc->inode)) {
- /* self-heal failed */
- gf_log (this->name, GF_LOG_WARNING,
- "failed to open as split brain seen, returning EIO");
- op_errno = EIO;
- goto out;
- }
-
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+ int i = 0;
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ local = frame->local;
+ priv = this->private;
- call_count = local->call_count;
- loc_copy (&local->loc, loc);
-
- local->cont.open.flags = flags;
-
- local->fd = fd_ref (fd);
+ if (err) {
+ AFR_STACK_UNWIND(open, frame, -1, err, NULL, NULL);
+ } else {
+ local->call_count = AFR_COUNT(local->child_up, priv->child_count);
+ call_count = local->call_count;
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_open_cbk, (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->open,
- loc, wind_flags, fd, xdata);
-
- if (!--call_count)
- break;
- }
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE(frame, afr_open_cbk, (void *)(long)i,
+ priv->children[i],
+ priv->children[i]->fops->open, &local->loc,
+ (local->cont.open.flags & ~O_TRUNC),
+ local->cont.open.fd, local->xdata_req);
+ if (!--call_count)
+ break;
+ }
}
+ }
+ return 0;
+}
- ret = 0;
+int
+afr_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int spb_subvol = 0;
+ int event_generation = 0;
+ int ret = 0;
+ int32_t op_errno = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+
+ // We can't let truncation to happen outside transaction.
+
+ priv = this->private;
+
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
+
+ local->op = GF_FOP_OPEN;
+ fd_ctx = afr_fd_ctx_get(fd, this);
+ if (!fd_ctx) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ if (priv->quorum_count && !afr_has_quorum(local->child_up, this, NULL)) {
+ op_errno = afr_quorum_errno(priv);
+ goto out;
+ }
+
+ if (!afr_is_consistent_io_possible(local, priv, &op_errno))
+ goto out;
+
+ local->inode = inode_ref(loc->inode);
+ loc_copy(&local->loc, loc);
+ local->fd_ctx = fd_ctx;
+ fd_ctx->flags = flags;
+ if (xdata)
+ local->xdata_req = dict_ref(xdata);
+
+ local->cont.open.flags = flags;
+ local->cont.open.fd = fd_ref(fd);
+
+ ret = afr_inode_get_readable(frame, local->inode, this, NULL,
+ &event_generation, AFR_DATA_TRANSACTION);
+ if ((ret < 0) &&
+ (afr_split_brain_read_subvol_get(local->inode, this, NULL,
+ &spb_subvol) == 0) &&
+ spb_subvol < 0) {
+ afr_inode_refresh(frame, this, local->inode, local->inode->gfid,
+ afr_open_continue);
+ } else {
+ afr_open_continue(frame, this, 0);
+ }
+
+ return 0;
out:
- if (ret < 0)
- AFR_STACK_UNWIND (open, frame, -1, op_errno, fd, xdata);
+ AFR_STACK_UNWIND(open, frame, -1, op_errno, fd, NULL);
- return 0;
+ return 0;
}
-//NOTE: this function should be called with holding the lock on
-//fd to which fd_ctx belongs
-void
-afr_get_resumable_calls (xlator_t *this, afr_fd_ctx_t *fd_ctx,
- struct list_head *list)
+int
+afr_openfd_fix_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ dict_t *xdata)
{
- afr_fd_paused_call_t *paused_call = NULL;
- afr_fd_paused_call_t *tmp = NULL;
- afr_local_t *call_local = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- gf_boolean_t call = _gf_false;
-
- priv = this->private;
- list_for_each_entry_safe (paused_call, tmp, &fd_ctx->paused_calls,
- call_list) {
- call = _gf_true;
- call_local = paused_call->frame->local;
- for (i = 0; i < priv->child_count; i++) {
- if (call_local->child_up[i] &&
- (fd_ctx->opened_on[i] == AFR_FD_OPENING))
- call = _gf_false;
- }
-
- if (call) {
- list_del_init (&paused_call->call_list);
- list_add (&paused_call->call_list, list);
- }
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int call_count = 0;
+ int child_index = (long)cookie;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (op_ret >= 0) {
+ gf_msg_debug(this->name, 0,
+ "fd for %s opened "
+ "successfully on subvolume %s",
+ local->loc.path, priv->children[child_index]->name);
+ } else {
+ gf_smsg(this->name, fop_log_level(GF_FOP_OPEN, op_errno), op_errno,
+ AFR_MSG_OPEN_FAIL, "path=%s", local->loc.path, "subvolume=%s",
+ priv->children[child_index]->name, NULL);
+ }
+
+ fd_ctx = local->fd_ctx;
+
+ LOCK(&local->fd->lock);
+ {
+ if (op_ret >= 0) {
+ fd_ctx->opened_on[child_index] = AFR_FD_OPENED;
+ } else {
+ fd_ctx->opened_on[child_index] = AFR_FD_NOT_OPENED;
}
+ }
+ UNLOCK(&local->fd->lock);
+
+ call_count = afr_frame_return(frame);
+ if (call_count == 0)
+ AFR_STACK_DESTROY(frame);
+
+ return 0;
}
-void
-afr_resume_calls (xlator_t *this, struct list_head *list)
+static int
+afr_fd_ctx_need_open(fd_t *fd, xlator_t *this, unsigned char *need_open)
{
- afr_fd_paused_call_t *paused_call = NULL;
- afr_fd_paused_call_t *tmp = NULL;
- afr_local_t *call_local = NULL;
-
- list_for_each_entry_safe (paused_call, tmp, list, call_list) {
- list_del_init (&paused_call->call_list);
- call_local = paused_call->frame->local;
- call_local->fop_call_continue (paused_call->frame, this);
- GF_FREE (paused_call);
+ afr_fd_ctx_t *fd_ctx = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int count = 0;
+
+ priv = this->private;
+
+ fd_ctx = afr_fd_ctx_get(fd, this);
+ if (!fd_ctx)
+ return 0;
+
+ LOCK(&fd->lock);
+ {
+ for (i = 0; i < priv->child_count; i++) {
+ if (fd_ctx->opened_on[i] == AFR_FD_NOT_OPENED &&
+ priv->child_up[i]) {
+ fd_ctx->opened_on[i] = AFR_FD_OPENING;
+ need_open[i] = 1;
+ count++;
+ } else {
+ need_open[i] = 0;
+ }
}
+ }
+ UNLOCK(&fd->lock);
+
+ return count;
}
-int
-afr_openfd_fix_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+void
+afr_fix_open(fd_t *fd, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- afr_fd_ctx_t *fd_ctx = NULL;
- int call_count = 0;
- int child_index = (long) cookie;
- struct list_head paused_calls = {0};
- gf_boolean_t fop_paused = _gf_false;
- fd_t *local_fd = NULL;
-
- priv = this->private;
- local = frame->local;
- fop_paused = local->fop_paused;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ int ret = -1;
+ int32_t op_errno = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ unsigned char *need_open = NULL;
+ int call_count = 0;
- if (op_ret >= 0) {
- gf_log (this->name, GF_LOG_INFO, "fd for %s opened "
- "successfully on subvolume %s", local->loc.path,
- priv->children[child_index]->name);
- }
+ priv = this->private;
- local_fd = fd_ref (local->fd);
- call_count = afr_frame_return (frame);
- //Note: Do not access any thing using the frame outside call_count 0
+ if (!afr_is_fd_fixable(fd))
+ goto out;
- //Note: No frame locking needed for this block of code
- fd_ctx = afr_fd_ctx_get (local_fd, this);
- if (!fd_ctx) {
- gf_log (this->name, GF_LOG_WARNING,
- "failed to get fd context, %p", local_fd);
- goto out;
- }
+ fd_ctx = afr_fd_ctx_get(fd, this);
+ if (!fd_ctx)
+ goto out;
- LOCK (&local_fd->lock);
- {
- if (op_ret >= 0) {
- fd_ctx->opened_on[child_index] = AFR_FD_OPENED;
- } else {
- fd_ctx->opened_on[child_index] = AFR_FD_NOT_OPENED;
- }
- if (call_count == 0) {
- INIT_LIST_HEAD (&paused_calls);
- afr_get_resumable_calls (this, fd_ctx, &paused_calls);
- }
- }
- UNLOCK (&local_fd->lock);
-out:
- if (call_count == 0) {
- afr_resume_calls (this, &paused_calls);
- //If the fop is paused then resume_calls will continue the fop
- if (fop_paused)
- goto done;
-
- if (local->fop_call_continue)
- local->fop_call_continue (frame, this);
- else
- AFR_STACK_DESTROY (frame);
- }
+ need_open = alloca0(priv->child_count);
-done:
- fd_unref (local_fd);
- return 0;
-}
+ call_count = afr_fd_ctx_need_open(fd, this, need_open);
+ if (!call_count)
+ goto out;
-int
-afr_fix_open (call_frame_t *frame, xlator_t *this, afr_fd_ctx_t *fd_ctx,
- int need_open_count, int *need_open)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- call_frame_t *open_frame = NULL;
- afr_local_t *open_local = NULL;
- int ret = -1;
- ia_type_t ia_type = IA_INVAL;
- int32_t op_errno = 0;
-
- GF_ASSERT (fd_ctx);
- GF_ASSERT (need_open_count > 0);
- GF_ASSERT (need_open);
-
- local = frame->local;
- priv = this->private;
- if (!local->fop_call_continue) {
- open_frame = copy_frame (frame);
- if (!open_frame) {
- ret = -ENOMEM;
- goto out;
- }
- AFR_LOCAL_ALLOC_OR_GOTO (open_frame->local, out);
- open_local = open_frame->local;
- ret = afr_local_init (open_local, priv, &op_errno);
- if (ret < 0)
- goto out;
- loc_copy (&open_local->loc, &local->loc);
- open_local->fd = fd_ref (local->fd);
- } else {
- ret = 0;
- open_frame = frame;
- open_local = local;
- }
+ frame = create_frame(this, this->ctx->pool);
+ if (!frame)
+ goto out;
- open_local->call_count = need_open_count;
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
- gf_log (this->name, GF_LOG_DEBUG, "need open count: %d",
- need_open_count);
+ local->loc.inode = inode_ref(fd->inode);
+ ret = loc_path(&local->loc, NULL);
+ if (ret < 0)
+ goto out;
- ia_type = open_local->fd->inode->ia_type;
- GF_ASSERT (ia_type != IA_INVAL);
- for (i = 0; i < priv->child_count; i++) {
- if (!need_open[i])
- continue;
- if (IA_IFDIR == ia_type) {
- gf_log (this->name, GF_LOG_DEBUG,
- "opening fd for dir %s on subvolume %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (open_frame, afr_openfd_fix_open_cbk,
- (void*) (long) i,
- priv->children[i],
- priv->children[i]->fops->opendir,
- &open_local->loc, open_local->fd,
- NULL);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "opening fd for file %s on subvolume %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (open_frame, afr_openfd_fix_open_cbk,
- (void *)(long) i,
- priv->children[i],
- priv->children[i]->fops->open,
- &open_local->loc, fd_ctx->flags,
- open_local->fd, NULL);
- }
+ local->fd = fd_ref(fd);
+ local->fd_ctx = fd_ctx;
+
+ local->call_count = call_count;
+
+ gf_msg_debug(this->name, 0, "need open count: %d", call_count);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!need_open[i])
+ continue;
+ if (IA_IFDIR == fd->inode->ia_type) {
+ gf_msg_debug(this->name, 0, "opening fd for dir %s on subvolume %s",
+ local->loc.path, priv->children[i]->name);
+
+ STACK_WIND_COOKIE(frame, afr_openfd_fix_open_cbk, (void *)(long)i,
+ priv->children[i],
+ priv->children[i]->fops->opendir, &local->loc,
+ local->fd, NULL);
+ } else {
+ gf_msg_debug(this->name, 0,
+ "opening fd for file %s on subvolume %s",
+ local->loc.path, priv->children[i]->name);
+
+ STACK_WIND_COOKIE(frame, afr_openfd_fix_open_cbk, (void *)(long)i,
+ priv->children[i], priv->children[i]->fops->open,
+ &local->loc, fd_ctx->flags & (~O_TRUNC),
+ local->fd, NULL);
}
- op_errno = 0;
- ret = 0;
+
+ if (!--call_count)
+ break;
+ }
+
+ return;
out:
- if (op_errno)
- ret = -op_errno;
- if (ret && open_frame)
- AFR_STACK_DESTROY (open_frame);
- return ret;
+ if (frame)
+ AFR_STACK_DESTROY(frame);
}
diff --git a/xlators/cluster/afr/src/afr-read-txn.c b/xlators/cluster/afr/src/afr-read-txn.c
new file mode 100644
index 00000000000..6fc2c75145c
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-read-txn.c
@@ -0,0 +1,494 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "afr.h"
+#include "afr-transaction.h"
+#include "afr-messages.h"
+
+void
+afr_pending_read_increment(afr_private_t *priv, int child_index)
+{
+ if (child_index < 0 || child_index > priv->child_count)
+ return;
+
+ GF_ATOMIC_INC(priv->pending_reads[child_index]);
+}
+
+void
+afr_pending_read_decrement(afr_private_t *priv, int child_index)
+{
+ if (child_index < 0 || child_index > priv->child_count)
+ return;
+
+ GF_ATOMIC_DEC(priv->pending_reads[child_index]);
+}
+
+void
+afr_read_txn_wind(call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ afr_pending_read_decrement(priv, local->read_subvol);
+ local->read_subvol = subvol;
+ afr_pending_read_increment(priv, subvol);
+ local->readfn(frame, this, subvol);
+}
+
+int
+afr_read_txn_next_subvol(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int subvol = -1;
+
+ local = frame->local;
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->readable[i]) {
+ /* don't even bother trying here.
+ just mark as attempted and move on. */
+ local->read_attempted[i] = 1;
+ continue;
+ }
+
+ if (!local->read_attempted[i]) {
+ subvol = i;
+ break;
+ }
+ }
+
+ /* If no more subvols were available for reading, we leave
+ @subvol as -1, which is an indication we have run out of
+ readable subvols. */
+ if (subvol != -1)
+ local->read_attempted[subvol] = 1;
+ afr_read_txn_wind(frame, this, subvol);
+
+ return 0;
+}
+
+static int
+afr_ta_read_txn_done(int ret, call_frame_t *ta_frame, void *opaque)
+{
+ STACK_DESTROY(ta_frame->root);
+ return 0;
+}
+
+static int
+afr_ta_read_txn(void *opaque)
+{
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+ int read_subvol = -1;
+ int query_child = AFR_CHILD_UNKNOWN;
+ int possible_bad_child = AFR_CHILD_UNKNOWN;
+ int ret = 0;
+ int op_errno = ENOMEM;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ struct gf_flock flock = {
+ 0,
+ };
+ dict_t *xdata_req = NULL;
+ dict_t *xdata_rsp = NULL;
+ int **pending = NULL;
+ loc_t loc = {
+ 0,
+ };
+
+ frame = (call_frame_t *)opaque;
+ this = frame->this;
+ local = frame->local;
+ priv = this->private;
+ query_child = local->read_txn_query_child;
+
+ if (query_child == AFR_CHILD_ZERO) {
+ possible_bad_child = AFR_CHILD_ONE;
+ } else if (query_child == AFR_CHILD_ONE) {
+ possible_bad_child = AFR_CHILD_ZERO;
+ } else {
+ /*read_txn_query_child is AFR_CHILD_UNKNOWN*/
+ goto out;
+ }
+
+ /* Ask the query_child to see if it blames the possibly bad one. */
+ xdata_req = dict_new();
+ if (!xdata_req)
+ goto out;
+
+ pending = afr_matrix_create(priv->child_count, AFR_NUM_CHANGE_LOGS);
+ if (!pending)
+ goto out;
+
+ ret = afr_set_pending_dict(priv, xdata_req, pending);
+ if (ret < 0)
+ goto out;
+
+ if (local->fd) {
+ ret = syncop_fxattrop(priv->children[query_child], local->fd,
+ GF_XATTROP_ADD_ARRAY, xdata_req, NULL, &xdata_rsp,
+ NULL);
+ } else {
+ ret = syncop_xattrop(priv->children[query_child], &local->loc,
+ GF_XATTROP_ADD_ARRAY, xdata_req, NULL, &xdata_rsp,
+ NULL);
+ }
+ if (ret || !xdata_rsp) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed xattrop for gfid %s on %s",
+ uuid_utoa(local->inode->gfid),
+ priv->children[query_child]->name);
+ op_errno = -ret;
+ goto out;
+ }
+
+ if (afr_ta_dict_contains_pending_xattr(xdata_rsp, priv,
+ possible_bad_child)) {
+ read_subvol = query_child;
+ goto out;
+ }
+ dict_unref(xdata_rsp);
+ xdata_rsp = NULL;
+
+ /* It doesn't. So query thin-arbiter to see if it blames any data brick. */
+ ret = afr_fill_ta_loc(this, &loc, _gf_true);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed to populate thin-arbiter loc for: %s.", loc.name);
+ goto out;
+ }
+ flock.l_type = F_WRLCK; /*start and length are already zero. */
+ ret = syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX],
+ AFR_TA_DOM_MODIFY, &loc, F_SETLKW, &flock, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "gfid:%s: Failed to get AFR_TA_DOM_MODIFY lock on %s.",
+ uuid_utoa(local->inode->gfid),
+ priv->pending_key[THIN_ARBITER_BRICK_INDEX]);
+ op_errno = -ret;
+ goto out;
+ }
+
+ ret = syncop_xattrop(priv->children[THIN_ARBITER_BRICK_INDEX], &loc,
+ GF_XATTROP_ADD_ARRAY, xdata_req, NULL, &xdata_rsp,
+ NULL);
+ if (ret || !xdata_rsp) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "gfid:%s: Failed xattrop on %s.", uuid_utoa(local->inode->gfid),
+ priv->pending_key[THIN_ARBITER_BRICK_INDEX]);
+ op_errno = -ret;
+ goto unlock;
+ }
+
+ if (!afr_ta_dict_contains_pending_xattr(xdata_rsp, priv, query_child)) {
+ read_subvol = query_child;
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, EIO, AFR_MSG_THIN_ARB,
+ "Failing read for gfid %s since good brick %s is down",
+ uuid_utoa(local->inode->gfid),
+ priv->children[possible_bad_child]->name);
+ op_errno = EIO;
+ }
+
+unlock:
+ flock.l_type = F_UNLCK;
+ ret = syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX],
+ AFR_TA_DOM_MODIFY, &loc, F_SETLK, &flock, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "gfid:%s: Failed to unlock AFR_TA_DOM_MODIFY lock on "
+ "%s.",
+ uuid_utoa(local->inode->gfid),
+ priv->pending_key[THIN_ARBITER_BRICK_INDEX]);
+ }
+out:
+ if (xdata_req)
+ dict_unref(xdata_req);
+ if (xdata_rsp)
+ dict_unref(xdata_rsp);
+ if (pending)
+ afr_matrix_cleanup(pending, priv->child_count);
+ loc_wipe(&loc);
+
+ if (read_subvol == -1) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ }
+ afr_read_txn_wind(frame, this, read_subvol);
+ return ret;
+}
+
+void
+afr_ta_read_txn_synctask(call_frame_t *frame, xlator_t *this)
+{
+ call_frame_t *ta_frame = NULL;
+ afr_local_t *local = NULL;
+ int ret = 0;
+
+ local = frame->local;
+ ta_frame = afr_ta_frame_create(this);
+ if (!ta_frame) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_THIN_ARB,
+ "Failed to create ta_frame");
+ goto out;
+ }
+ ret = synctask_new(this->ctx->env, afr_ta_read_txn, afr_ta_read_txn_done,
+ ta_frame, frame);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_THIN_ARB,
+ "Failed to launch "
+ "afr_ta_read_txn synctask for gfid %s.",
+ uuid_utoa(local->inode->gfid));
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ STACK_DESTROY(ta_frame->root);
+ goto out;
+ }
+ return;
+out:
+ afr_read_txn_wind(frame, this, -1);
+}
+
+int
+afr_read_txn_refresh_done(call_frame_t *frame, xlator_t *this, int err)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int read_subvol = -1;
+ inode_t *inode = NULL;
+ int ret = -1;
+ int spb_subvol = -1;
+
+ local = frame->local;
+ inode = local->inode;
+ priv = this->private;
+
+ if (err) {
+ if (!priv->thin_arbiter_count)
+ goto readfn;
+ if (err != EINVAL)
+ goto readfn;
+ /* We need to query the good bricks and/or thin-arbiter.*/
+ afr_ta_read_txn_synctask(frame, this);
+ return 0;
+ }
+
+ read_subvol = afr_read_subvol_select_by_policy(inode, this, local->readable,
+ NULL);
+ if (read_subvol == -1) {
+ err = EIO;
+ goto readfn;
+ }
+
+ if (local->read_attempted[read_subvol]) {
+ afr_read_txn_next_subvol(frame, this);
+ return 0;
+ }
+
+ local->read_attempted[read_subvol] = 1;
+readfn:
+ if (read_subvol == -1) {
+ ret = afr_split_brain_read_subvol_get(inode, this, frame, &spb_subvol);
+ if ((ret == 0) && spb_subvol >= 0)
+ read_subvol = spb_subvol;
+ }
+
+ if (read_subvol == -1) {
+ AFR_SET_ERROR_AND_CHECK_SPLIT_BRAIN(-1, err);
+ }
+ afr_read_txn_wind(frame, this, read_subvol);
+
+ return 0;
+}
+
+int
+afr_read_txn_continue(call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (!local->refreshed) {
+ local->refreshed = _gf_true;
+ afr_inode_refresh(frame, this, local->inode, NULL,
+ afr_read_txn_refresh_done);
+ } else {
+ afr_read_txn_next_subvol(frame, this);
+ }
+
+ return 0;
+}
+
+/* afr_read_txn_wipe:
+
+ clean internal variables in @local in order to make
+ it possible to call afr_read_txn() multiple times from
+ the same frame
+*/
+
+void
+afr_read_txn_wipe(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ local->readfn = NULL;
+
+ if (local->inode)
+ inode_unref(local->inode);
+
+ for (i = 0; i < priv->child_count; i++) {
+ local->read_attempted[i] = 0;
+ local->readable[i] = 0;
+ }
+}
+
+/*
+ afr_read_txn:
+
+ This is the read transaction function. The way it works:
+
+ - Determine read-subvolume from inode ctx.
+
+ - If read-subvolume's generation was stale, refresh ctx once by
+ calling afr_inode_refresh()
+
+ Else make an attempt to read on read-subvolume.
+
+ - If attempted read on read-subvolume fails, refresh ctx once
+ by calling afr_inode_refresh()
+
+ - After ctx refresh, query read-subvolume freshly and attempt
+ read once.
+
+ - If read fails, try every other readable[] subvolume before
+ finally giving up. readable[] elements are set by afr_inode_refresh()
+ based on dirty and pending flags.
+
+ - If file is in split brain in the backend, generation will be
+ kept 0 by afr_inode_refresh() and readable[] will be set 0 for
+ all elements. Therefore reads always fail.
+*/
+
+int
+afr_read_txn(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ afr_read_txn_wind_t readfn, afr_transaction_type type)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ unsigned char *data = NULL;
+ unsigned char *metadata = NULL;
+ int read_subvol = -1;
+ int event_generation = 0;
+ int ret = -1;
+
+ priv = this->private;
+ local = frame->local;
+ data = alloca0(priv->child_count);
+ metadata = alloca0(priv->child_count);
+
+ afr_read_txn_wipe(frame, this);
+
+ local->readfn = readfn;
+ local->inode = inode_ref(inode);
+ local->is_read_txn = _gf_true;
+ local->transaction.type = type;
+
+ if (priv->quorum_count && !afr_has_quorum(local->child_up, this, NULL)) {
+ local->op_ret = -1;
+ local->op_errno = afr_quorum_errno(priv);
+ goto read;
+ }
+
+ if (!afr_is_consistent_io_possible(local, priv, &local->op_errno)) {
+ local->op_ret = -1;
+ goto read;
+ }
+
+ if (priv->thin_arbiter_count && !afr_ta_has_quorum(priv, local)) {
+ local->op_ret = -1;
+ local->op_errno = -afr_quorum_errno(priv);
+ goto read;
+ }
+
+ if (priv->thin_arbiter_count &&
+ AFR_COUNT(local->child_up, priv->child_count) != priv->child_count) {
+ if (local->child_up[0]) {
+ local->read_txn_query_child = AFR_CHILD_ZERO;
+ } else if (local->child_up[1]) {
+ local->read_txn_query_child = AFR_CHILD_ONE;
+ }
+ afr_ta_read_txn_synctask(frame, this);
+ return 0;
+ }
+
+ ret = afr_inode_read_subvol_get(inode, this, data, metadata,
+ &event_generation);
+ if (ret == -1)
+ /* very first transaction on this inode */
+ goto refresh;
+ AFR_INTERSECT(local->readable, data, metadata, priv->child_count);
+
+ gf_msg_debug(this->name, 0,
+ "%s: generation now vs cached: %d, "
+ "%d",
+ uuid_utoa(inode->gfid), local->event_generation,
+ event_generation);
+ if (afr_is_inode_refresh_reqd(inode, this, local->event_generation,
+ event_generation))
+ /* servers have disconnected / reconnected, and possibly
+ rebooted, very likely changing the state of freshness
+ of copies */
+ goto refresh;
+
+ read_subvol = afr_read_subvol_select_by_policy(inode, this, local->readable,
+ NULL);
+
+ if (read_subvol < 0 || read_subvol > priv->child_count) {
+ gf_msg_debug(this->name, 0,
+ "Unreadable subvolume %d found "
+ "with event generation %d for gfid %s.",
+ read_subvol, event_generation, uuid_utoa(inode->gfid));
+ goto refresh;
+ }
+
+ if (!local->child_up[read_subvol]) {
+ /* should never happen, just in case */
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_READ_SUBVOL_ERROR,
+ "subvolume %d is the "
+ "read subvolume in this generation, but is not up",
+ read_subvol);
+ goto refresh;
+ }
+
+ local->read_attempted[read_subvol] = 1;
+
+read:
+ afr_read_txn_wind(frame, this, read_subvol);
+
+ return 0;
+
+refresh:
+ afr_inode_refresh(frame, this, inode, NULL, afr_read_txn_refresh_done);
+
+ return 0;
+}
diff --git a/xlators/cluster/afr/src/afr-self-heal-algorithm.c b/xlators/cluster/afr/src/afr-self-heal-algorithm.c
deleted file mode 100644
index 448178bc753..00000000000
--- a/xlators/cluster/afr/src/afr-self-heal-algorithm.c
+++ /dev/null
@@ -1,810 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-
-#include <openssl/md5.h>
-#include "glusterfs.h"
-#include "afr.h"
-#include "xlator.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "list.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
-#include "byte-order.h"
-
-#include "afr-transaction.h"
-#include "afr-self-heal.h"
-#include "afr-self-heal-common.h"
-#include "afr-self-heal-algorithm.h"
-
-/*
- This file contains the various self-heal algorithms
-*/
-
-static int
-sh_loop_driver (call_frame_t *sh_frame, xlator_t *this,
- gf_boolean_t is_first_call, call_frame_t *old_loop_frame);
-static int
-sh_loop_return (call_frame_t *sh_frame, xlator_t *this, call_frame_t *loop_frame,
- int32_t op_ret, int32_t op_errno);
-static int
-sh_destroy_frame (call_frame_t *frame, xlator_t *this)
-{
- if (!frame)
- goto out;
-
- AFR_STACK_DESTROY (frame);
-out:
- return 0;
-}
-
-static void
-sh_private_cleanup (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_sh_algo_private_t *sh_priv = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
-
- sh_priv = sh->private;
- if (sh_priv)
- GF_FREE (sh_priv);
-}
-
-static int
-sh_number_of_writes_needed (unsigned char *write_needed, int child_count)
-{
- int writes = 0;
- int i = 0;
-
- for (i = 0; i < child_count; i++) {
- if (write_needed[i])
- writes++;
- }
-
- return writes;
-}
-
-
-static int
-sh_loop_driver_done (call_frame_t *sh_frame, xlator_t *this,
- call_frame_t *last_loop_frame)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_sh_algo_private_t *sh_priv = NULL;
- int32_t total_blocks = 0;
- int32_t diff_blocks = 0;
-
- local = sh_frame->local;
- sh = &local->self_heal;
- sh_priv = sh->private;
- if (sh_priv) {
- total_blocks = sh_priv->total_blocks;
- diff_blocks = sh_priv->diff_blocks;
- }
-
- sh_private_cleanup (sh_frame, this);
- if (sh->op_failed) {
- GF_ASSERT (!last_loop_frame);
- //loop_finish should have happened and the old_loop should be NULL
- gf_log (this->name, GF_LOG_DEBUG,
- "self-heal aborting on %s",
- local->loc.path);
-
- local->self_heal.algo_abort_cbk (sh_frame, this);
- } else {
- GF_ASSERT (last_loop_frame);
- if (diff_blocks == total_blocks) {
- gf_log (this->name, GF_LOG_DEBUG, "full self-heal "
- "completed on %s",local->loc.path);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "diff self-heal on %s: completed. "
- "(%d blocks of %d were different (%.2f%%))",
- local->loc.path, diff_blocks, total_blocks,
- ((diff_blocks * 1.0)/total_blocks) * 100);
- }
-
- sh->old_loop_frame = last_loop_frame;
- local->self_heal.algo_completion_cbk (sh_frame, this);
- }
-
- return 0;
-}
-
-int
-sh_loop_finish (call_frame_t *loop_frame, xlator_t *this)
-{
- afr_local_t *loop_local = NULL;
- afr_self_heal_t *loop_sh = NULL;
-
- if (!loop_frame)
- goto out;
-
- loop_local = loop_frame->local;
- if (loop_local) {
- loop_sh = &loop_local->self_heal;
- }
-
- if (loop_sh && loop_sh->data_lock_held) {
- afr_sh_data_unlock (loop_frame, this,
- sh_destroy_frame);
- } else {
- sh_destroy_frame (loop_frame, this);
- }
-out:
- return 0;
-}
-
-static int
-sh_loop_lock_success (call_frame_t *loop_frame, xlator_t *this)
-{
- afr_local_t *loop_local = NULL;
- afr_self_heal_t *loop_sh = NULL;
-
- loop_local = loop_frame->local;
- loop_sh = &loop_local->self_heal;
-
- sh_loop_finish (loop_sh->old_loop_frame, this);
- loop_sh->old_loop_frame = NULL;
-
- gf_log (this->name, GF_LOG_DEBUG, "Acquired lock for range %"PRIu64
- " %"PRIu64, loop_sh->offset, loop_sh->block_size);
- loop_sh->data_lock_held = _gf_true;
- loop_sh->sh_data_algo_start (loop_frame, this);
- return 0;
-}
-
-static int
-sh_loop_lock_failure (call_frame_t *loop_frame, xlator_t *this)
-{
- call_frame_t *sh_frame = NULL;
- afr_local_t *loop_local = NULL;
- afr_self_heal_t *loop_sh = NULL;
-
- loop_local = loop_frame->local;
- loop_sh = &loop_local->self_heal;
- sh_frame = loop_sh->sh_frame;
-
- gf_log (this->name, GF_LOG_ERROR, "failed lock for range %"PRIu64
- " %"PRIu64, loop_sh->offset, loop_sh->block_size);
- sh_loop_finish (loop_sh->old_loop_frame, this);
- loop_sh->old_loop_frame = NULL;
- sh_loop_return (sh_frame, this, loop_frame, -1, ENOTCONN);
- return 0;
-}
-
-static int
-sh_loop_frame_create (call_frame_t *sh_frame, xlator_t *this,
- call_frame_t *old_loop_frame, call_frame_t **loop_frame)
-{
- call_frame_t *new_loop_frame = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_local_t *new_loop_local = NULL;
- afr_self_heal_t *new_loop_sh = NULL;
- afr_private_t *priv = NULL;
-
- GF_ASSERT (sh_frame);
- GF_ASSERT (loop_frame);
-
- *loop_frame = NULL;
- local = sh_frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- new_loop_frame = copy_frame (sh_frame);
- if (!new_loop_frame)
- goto out;
- //We want the frame to have same lk_owner as sh_frame
- //so that locks translator allows conflicting locks
- new_loop_local = afr_local_copy (local, this);
- if (!new_loop_local)
- goto out;
- new_loop_frame->local = new_loop_local;
-
- new_loop_sh = &new_loop_local->self_heal;
- new_loop_sh->sources = memdup (sh->sources,
- priv->child_count * sizeof (*sh->sources));
- if (!new_loop_sh->sources)
- goto out;
- new_loop_sh->write_needed = GF_CALLOC (priv->child_count,
- sizeof (*new_loop_sh->write_needed),
- gf_afr_mt_char);
- if (!new_loop_sh->write_needed)
- goto out;
- new_loop_sh->checksum = GF_CALLOC (priv->child_count, MD5_DIGEST_LENGTH,
- gf_afr_mt_uint8_t);
- if (!new_loop_sh->checksum)
- goto out;
- new_loop_sh->inode = inode_ref (sh->inode);
- new_loop_sh->sh_data_algo_start = sh->sh_data_algo_start;
- new_loop_sh->source = sh->source;
- new_loop_sh->active_sinks = sh->active_sinks;
- new_loop_sh->healing_fd = fd_ref (sh->healing_fd);
- new_loop_sh->file_has_holes = sh->file_has_holes;
- new_loop_sh->old_loop_frame = old_loop_frame;
- new_loop_sh->sh_frame = sh_frame;
- *loop_frame = new_loop_frame;
- return 0;
-out:
- sh_destroy_frame (new_loop_frame, this);
- return -ENOMEM;
-}
-
-static int
-sh_loop_start (call_frame_t *sh_frame, xlator_t *this, off_t offset,
- call_frame_t *old_loop_frame)
-{
- call_frame_t *new_loop_frame = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_local_t *new_loop_local = NULL;
- afr_self_heal_t *new_loop_sh = NULL;
- int ret = 0;
-
- GF_ASSERT (sh_frame);
-
- local = sh_frame->local;
- sh = &local->self_heal;
-
- ret = sh_loop_frame_create (sh_frame, this, old_loop_frame,
- &new_loop_frame);
- if (ret)
- goto out;
- new_loop_local = new_loop_frame->local;
- new_loop_sh = &new_loop_local->self_heal;
- new_loop_sh->offset = offset;
- new_loop_sh->block_size = sh->block_size;
- afr_sh_data_lock (new_loop_frame, this, offset, new_loop_sh->block_size,
- _gf_true, sh_loop_lock_success, sh_loop_lock_failure);
- return 0;
-out:
- sh->op_failed = 1;
- if (old_loop_frame)
- sh_loop_finish (old_loop_frame, this);
- sh_loop_return (sh_frame, this, new_loop_frame, -1, ENOMEM);
- return 0;
-}
-
-static int
-sh_loop_driver (call_frame_t *sh_frame, xlator_t *this,
- gf_boolean_t is_first_call, call_frame_t *old_loop_frame)
-{
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
- afr_sh_algo_private_t *sh_priv = NULL;
- gf_boolean_t is_driver_done = _gf_false;
- blksize_t block_size = 0;
- int loop = 0;
- off_t offset = 0;
- afr_private_t *priv = NULL;
-
- priv = this->private;
- local = sh_frame->local;
- sh = &local->self_heal;
- sh_priv = sh->private;
-
- LOCK (&sh_priv->lock);
- {
- if (!is_first_call)
- sh_priv->loops_running--;
- offset = sh_priv->offset;
- block_size = sh->block_size;
- while ((!sh->eof_reached) && (0 == sh->op_failed) &&
- (sh_priv->loops_running < priv->data_self_heal_window_size)
- && (sh_priv->offset < sh->file_size)) {
-
- loop++;
- sh_priv->offset += block_size;
- sh_priv->loops_running++;
-
- if (!is_first_call)
- break;
- }
- if (0 == sh_priv->loops_running) {
- is_driver_done = _gf_true;
- }
- }
- UNLOCK (&sh_priv->lock);
-
- if (0 == loop) {
- //loop finish does unlock, but the erasing of the pending
- //xattrs needs to happen before that so do not finish the loop
- if (is_driver_done && !sh->op_failed)
- goto driver_done;
- if (old_loop_frame) {
- sh_loop_finish (old_loop_frame, this);
- old_loop_frame = NULL;
- }
- }
-
- //If we have more loops to form we should finish previous loop after
- //the next loop lock
- while (loop--) {
- if (sh->op_failed) {
- // op failed in other loop, stop spawning more loops
- if (old_loop_frame) {
- sh_loop_finish (old_loop_frame, this);
- old_loop_frame = NULL;
- }
- sh_loop_driver (sh_frame, this, _gf_false, NULL);
- } else {
- gf_log (this->name, GF_LOG_TRACE, "spawning a loop "
- "for offset %"PRId64, offset);
-
- sh_loop_start (sh_frame, this, offset, old_loop_frame);
- old_loop_frame = NULL;
- offset += block_size;
- }
- }
-
-driver_done:
- if (is_driver_done) {
- sh_loop_driver_done (sh_frame, this, old_loop_frame);
- }
- return 0;
-}
-
-static int
-sh_loop_return (call_frame_t *sh_frame, xlator_t *this, call_frame_t *loop_frame,
- int32_t op_ret, int32_t op_errno)
-{
- afr_local_t * loop_local = NULL;
- afr_self_heal_t * loop_sh = NULL;
- afr_local_t * sh_local = NULL;
- afr_self_heal_t *sh = NULL;
-
- sh_local = sh_frame->local;
- sh = &sh_local->self_heal;
-
- if (loop_frame) {
- loop_local = loop_frame->local;
- if (loop_local)
- loop_sh = &loop_local->self_heal;
- if (loop_sh)
- gf_log (this->name, GF_LOG_TRACE, "loop for offset "
- "%"PRId64" returned", loop_sh->offset);
- }
-
- if (op_ret == -1) {
- sh->op_failed = 1;
- afr_sh_set_error (sh, op_errno);
- if (loop_frame) {
- sh_loop_finish (loop_frame, this);
- loop_frame = NULL;
- }
- }
-
- sh_loop_driver (sh_frame, this, _gf_false, loop_frame);
-
- return 0;
-}
-
-static int
-sh_loop_write_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- struct iatt *postbuf, dict_t *xdata)
-{
- afr_private_t * priv = NULL;
- afr_local_t * loop_local = NULL;
- afr_self_heal_t * loop_sh = NULL;
- call_frame_t *sh_frame = NULL;
- afr_local_t * sh_local = NULL;
- afr_self_heal_t *sh = NULL;
- int call_count = 0;
- int child_index = 0;
-
- priv = this->private;
- loop_local = loop_frame->local;
- loop_sh = &loop_local->self_heal;
-
- sh_frame = loop_sh->sh_frame;
- sh_local = sh_frame->local;
- sh = &sh_local->self_heal;
-
- child_index = (long) cookie;
-
- gf_log (this->name, GF_LOG_TRACE,
- "wrote %d bytes of data from %s to child %d, offset %"PRId64"",
- op_ret, sh_local->loc.path, child_index, loop_sh->offset);
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "write to %s failed on subvolume %s (%s)",
- sh_local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
-
- sh->op_failed = 1;
- afr_sh_set_error (loop_sh, op_errno);
- }
-
- call_count = afr_frame_return (loop_frame);
-
- if (call_count == 0) {
- sh_loop_return (sh_frame, this, loop_frame,
- loop_sh->op_ret, loop_sh->op_errno);
- }
-
- return 0;
-}
-
-static void
-sh_prune_writes_needed (call_frame_t *sh_frame, call_frame_t *loop_frame,
- afr_private_t *priv)
-{
- afr_local_t *sh_local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_local_t *loop_local = NULL;
- afr_self_heal_t *loop_sh = NULL;
- int i = 0;
-
- sh_local = sh_frame->local;
- sh = &sh_local->self_heal;
-
- if (!strcmp (sh->algo->name, "diff"))
- return;
-
- loop_local = loop_frame->local;
- loop_sh = &loop_local->self_heal;
-
- /* full self-heal guarantees there exists atleast 1 file with size 0
- * That means for other files we can preserve holes that come after
- * its size before 'trim'
- */
- for (i = 0; i < priv->child_count; i++) {
- if (loop_sh->write_needed[i] &&
- ((loop_sh->offset + 1) > sh->buf[i].ia_size))
- loop_sh->write_needed[i] = 0;
- }
-}
-
-static int
-sh_loop_read_cbk (call_frame_t *loop_frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct iovec *vector, int32_t count, struct iatt *buf,
- struct iobref *iobref, dict_t *xdata)
-{
- afr_private_t * priv = NULL;
- afr_local_t * loop_local = NULL;
- afr_self_heal_t * loop_sh = NULL;
- call_frame_t *sh_frame = NULL;
- int i = 0;
- int call_count = 0;
- afr_local_t * sh_local = NULL;
- afr_self_heal_t * sh = NULL;
-
- priv = this->private;
- loop_local = loop_frame->local;
- loop_sh = &loop_local->self_heal;
-
- sh_frame = loop_sh->sh_frame;
- sh_local = sh_frame->local;
- sh = &sh_local->self_heal;
-
- gf_log (this->name, GF_LOG_TRACE,
- "read %d bytes of data from %s, offset %"PRId64"",
- op_ret, loop_local->loc.path, loop_sh->offset);
-
- if (op_ret <= 0) {
- if (op_ret < 0) {
- sh->op_failed = 1;
- gf_log (this->name, GF_LOG_ERROR, "read failed on %d "
- "for %s reason :%s", sh->source,
- sh_local->loc.path, strerror (errno));
- } else {
- sh->eof_reached = _gf_true;
- gf_log (this->name, GF_LOG_DEBUG, "Eof reached for %s",
- sh_local->loc.path);
- }
- sh_loop_return (sh_frame, this, loop_frame, op_ret, op_errno);
- goto out;
- }
-
- if (loop_sh->file_has_holes && iov_0filled (vector, count) == 0)
- sh_prune_writes_needed (sh_frame, loop_frame, priv);
-
- call_count = sh_number_of_writes_needed (loop_sh->write_needed,
- priv->child_count);
- if (call_count == 0) {
- sh_loop_return (sh_frame, this, loop_frame, 0, 0);
- goto out;
- }
- loop_local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (!loop_sh->write_needed[i])
- continue;
- STACK_WIND_COOKIE (loop_frame, sh_loop_write_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->writev,
- loop_sh->healing_fd, vector, count,
- loop_sh->offset, 0, iobref, NULL);
-
- if (!--call_count)
- break;
- }
-
-out:
- return 0;
-}
-
-
-static int
-sh_loop_read (call_frame_t *loop_frame, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- afr_local_t *loop_local = NULL;
- afr_self_heal_t *loop_sh = NULL;
-
- priv = this->private;
- loop_local = loop_frame->local;
- loop_sh = &loop_local->self_heal;
-
- STACK_WIND_COOKIE (loop_frame, sh_loop_read_cbk,
- (void *) (long) loop_sh->source,
- priv->children[loop_sh->source],
- priv->children[loop_sh->source]->fops->readv,
- loop_sh->healing_fd, loop_sh->block_size,
- loop_sh->offset, 0, NULL);
-
- return 0;
-}
-
-
-static int
-sh_diff_checksum_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- uint32_t weak_checksum, uint8_t *strong_checksum,
- dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- afr_local_t *loop_local = NULL;
- afr_self_heal_t *loop_sh = NULL;
- call_frame_t *sh_frame = NULL;
- afr_local_t *sh_local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_sh_algo_private_t *sh_priv = NULL;
- int child_index = 0;
- int call_count = 0;
- int i = 0;
- int write_needed = 0;
-
- priv = this->private;
-
- loop_local = loop_frame->local;
- loop_sh = &loop_local->self_heal;
-
- sh_frame = loop_sh->sh_frame;
- sh_local = sh_frame->local;
- sh = &sh_local->self_heal;
-
- sh_priv = sh->private;
-
- child_index = (long) cookie;
-
- if (op_ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "checksum on %s failed on subvolume %s (%s)",
- sh_local->loc.path, priv->children[child_index]->name,
- strerror (op_errno));
- sh->op_failed = 1;
- } else {
- memcpy (loop_sh->checksum + child_index * MD5_DIGEST_LENGTH,
- strong_checksum, MD5_DIGEST_LENGTH);
- }
-
- call_count = afr_frame_return (loop_frame);
-
- if (call_count == 0) {
- for (i = 0; i < priv->child_count; i++) {
- if (sh->sources[i] || !sh_local->child_up[i])
- continue;
-
- if (memcmp (loop_sh->checksum + (i * MD5_DIGEST_LENGTH),
- loop_sh->checksum + (sh->source * MD5_DIGEST_LENGTH),
- MD5_DIGEST_LENGTH)) {
- /*
- Checksums differ, so this block
- must be written to this sink
- */
-
- gf_log (this->name, GF_LOG_DEBUG,
- "checksum on subvolume %s at offset %"
- PRId64" differs from that on source",
- priv->children[i]->name, loop_sh->offset);
-
- write_needed = loop_sh->write_needed[i] = 1;
- }
- }
-
- LOCK (&sh_priv->lock);
- {
- sh_priv->total_blocks++;
- if (write_needed)
- sh_priv->diff_blocks++;
- }
- UNLOCK (&sh_priv->lock);
-
- if (write_needed && !sh->op_failed) {
- sh_loop_read (loop_frame, this);
- } else {
- sh_loop_return (sh_frame, this, loop_frame,
- op_ret, op_errno);
- }
- }
-
- return 0;
-}
-
-static int
-sh_diff_checksum (call_frame_t *loop_frame, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- afr_local_t *loop_local = NULL;
- afr_self_heal_t *loop_sh = NULL;
- int call_count = 0;
- int i = 0;
-
- priv = this->private;
- loop_local = loop_frame->local;
- loop_sh = &loop_local->self_heal;
-
- call_count = loop_sh->active_sinks + 1; /* sinks and source */
-
- loop_local->call_count = call_count;
-
- STACK_WIND_COOKIE (loop_frame, sh_diff_checksum_cbk,
- (void *) (long) loop_sh->source,
- priv->children[loop_sh->source],
- priv->children[loop_sh->source]->fops->rchecksum,
- loop_sh->healing_fd,
- loop_sh->offset, loop_sh->block_size, NULL);
-
- for (i = 0; i < priv->child_count; i++) {
- if (loop_sh->sources[i] || !loop_local->child_up[i])
- continue;
-
- STACK_WIND_COOKIE (loop_frame, sh_diff_checksum_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->rchecksum,
- loop_sh->healing_fd,
- loop_sh->offset, loop_sh->block_size, NULL);
-
- if (!--call_count)
- break;
- }
-
- return 0;
-}
-
-static int
-sh_full_read_write_to_sinks (call_frame_t *loop_frame, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- afr_local_t *loop_local = NULL;
- afr_self_heal_t *loop_sh = NULL;
- int i = 0;
-
- priv = this->private;
- loop_local = loop_frame->local;
- loop_sh = &loop_local->self_heal;
-
- for (i = 0; i < priv->child_count; i++) {
- if (loop_sh->sources[i] || !loop_local->child_up[i])
- continue;
- loop_sh->write_needed[i] = 1;
- }
- sh_loop_read (loop_frame, this);
- return 0;
-}
-
-afr_sh_algo_private_t*
-afr_sh_priv_init ()
-{
- afr_sh_algo_private_t *sh_priv = NULL;
-
- sh_priv = GF_CALLOC (1, sizeof (*sh_priv),
- gf_afr_mt_afr_private_t);
- if (!sh_priv)
- goto out;
-
- LOCK_INIT (&sh_priv->lock);
-out:
- return sh_priv;
-}
-
-void
-afr_sh_transfer_lock (call_frame_t *dst, call_frame_t *src,
- unsigned int child_count)
-{
- afr_local_t *dst_local = NULL;
- afr_self_heal_t *dst_sh = NULL;
- afr_local_t *src_local = NULL;
- afr_self_heal_t *src_sh = NULL;
-
- dst_local = dst->local;
- dst_sh = &dst_local->self_heal;
- src_local = src->local;
- src_sh = &src_local->self_heal;
- GF_ASSERT (src_sh->data_lock_held);
- GF_ASSERT (!dst_sh->data_lock_held);
- afr_lk_transfer_datalock (dst, src, child_count);
- src_sh->data_lock_held = _gf_false;
- dst_sh->data_lock_held = _gf_true;
-}
-
-int
-afr_sh_start_loops (call_frame_t *sh_frame, xlator_t *this,
- afr_sh_algo_fn sh_data_algo_start)
-{
- call_frame_t *first_loop_frame = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int ret = 0;
- afr_private_t *priv = NULL;
-
- local = sh_frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- sh->sh_data_algo_start = sh_data_algo_start;
- local->call_count = 0;
- ret = sh_loop_frame_create (sh_frame, this, NULL, &first_loop_frame);
- if (ret)
- goto out;
- afr_sh_transfer_lock (first_loop_frame, sh_frame, priv->child_count);
- sh->private = afr_sh_priv_init ();
- if (!sh->private) {
- ret = -1;
- goto out;
- }
- sh_loop_driver (sh_frame, this, _gf_true, first_loop_frame);
- ret = 0;
-out:
- if (ret) {
- sh->op_failed = 1;
- sh_loop_driver_done (sh_frame, this, NULL);
- }
- return 0;
-}
-
-int
-afr_sh_algo_diff (call_frame_t *sh_frame, xlator_t *this)
-{
- afr_sh_start_loops (sh_frame, this, sh_diff_checksum);
- return 0;
-}
-
-int
-afr_sh_algo_full (call_frame_t *sh_frame, xlator_t *this)
-{
- afr_sh_start_loops (sh_frame, this, sh_full_read_write_to_sinks);
- return 0;
-}
-
-struct afr_sh_algorithm afr_self_heal_algorithms[] = {
- {.name = "full", .fn = afr_sh_algo_full},
- {.name = "diff", .fn = afr_sh_algo_diff},
- {0, 0},
-};
diff --git a/xlators/cluster/afr/src/afr-self-heal-algorithm.h b/xlators/cluster/afr/src/afr-self-heal-algorithm.h
deleted file mode 100644
index 6b20789b1bb..00000000000
--- a/xlators/cluster/afr/src/afr-self-heal-algorithm.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef __AFR_SELF_HEAL_ALGORITHM_H__
-#define __AFR_SELF_HEAL_ALGORITHM_H__
-
-typedef int (*afr_sh_algo_fn) (call_frame_t *frame,
- xlator_t *this);
-
-struct afr_sh_algorithm {
- const char *name;
- afr_sh_algo_fn fn;
-};
-
-extern struct afr_sh_algorithm afr_self_heal_algorithms[3];
-typedef struct {
- gf_lock_t lock;
- unsigned int loops_running;
- off_t offset;
-
- int32_t total_blocks;
- int32_t diff_blocks;
-} afr_sh_algo_private_t;
-
-#endif /* __AFR_SELF_HEAL_ALGORITHM_H__ */
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index 22b49bc08a4..a580a1584cc 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
This file is licensed to you under your choice of the GNU Lesser
@@ -8,2413 +8,2927 @@
cases as published by the Free Software Foundation.
*/
-#include "glusterfs.h"
-#include "xlator.h"
-#include "byte-order.h"
-
#include "afr.h"
-#include "afr-transaction.h"
-#include "afr-self-heal-common.h"
#include "afr-self-heal.h"
-#include "pump.h"
-
-void
-afr_sh_reset (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+#include <glusterfs/byte-order.h>
+#include "protocol-common.h"
+#include "afr-messages.h"
+#include <glusterfs/events.h>
- memset (sh->child_errno, 0,
- sizeof (*sh->child_errno) * priv->child_count);
- memset (sh->buf, 0, sizeof (*sh->buf) * priv->child_count);
- memset (sh->parentbufs, 0,
- sizeof (*sh->parentbufs) * priv->child_count);
- memset (sh->success, 0, sizeof (*sh->success) * priv->child_count);
- memset (sh->locked_nodes, 0,
- sizeof (*sh->locked_nodes) * priv->child_count);
- sh->active_sinks = 0;
-
- afr_reset_xattr (sh->xattr, priv->child_count);
-}
-
-//Intersection[child]=1 if child is part of intersection
void
-afr_children_intersection_get (int32_t *set1, int32_t *set2,
- int *intersection, unsigned int child_count)
-{
- int i = 0;
+afr_heal_synctask(xlator_t *this, afr_local_t *local);
- memset (intersection, 0, sizeof (*intersection) * child_count);
- for (i = 0; i < child_count; i++) {
- intersection[i] = afr_is_child_present (set1, child_count, i)
- && afr_is_child_present (set2, child_count,
- i);
+int
+afr_lookup_and_heal_gfid(xlator_t *this, inode_t *parent, const char *name,
+ inode_t *inode, struct afr_reply *replies, int source,
+ unsigned char *sources, void *gfid, int *gfid_idx)
+{
+ afr_private_t *priv = NULL;
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ unsigned char *wind_on = NULL;
+ ia_type_t ia_type = IA_INVAL;
+ dict_t *xdata = NULL;
+ loc_t loc = {
+ 0,
+ };
+ int ret = 0;
+ int i = 0;
+
+ priv = this->private;
+ wind_on = alloca0(priv->child_count);
+ if (source >= 0 && replies[source].valid && replies[source].op_ret == 0)
+ ia_type = replies[source].poststat.ia_type;
+
+ if (ia_type != IA_INVAL)
+ goto heal;
+
+ /* If ia_type is still invalid, it means either
+ * (a)'source' was -1, i.e. parent dir pending xattrs are in split-brain
+ * (or) (b) The parent dir pending xattrs are all zeroes (i.e. all bricks
+ * are sources) and the 'source' we selected earlier might be the one where
+ * the file is not actually present.
+ *
+ * In both cases, let us pick a brick with a successful reply and use its
+ * ia_type.
+ * */
+ for (i = 0; i < priv->child_count; i++) {
+ if (source == -1) {
+ /* case (a) above. */
+ if (replies[i].valid && replies[i].op_ret == 0 &&
+ replies[i].poststat.ia_type != IA_INVAL) {
+ ia_type = replies[i].poststat.ia_type;
+ break;
+ }
+ } else {
+ /* case (b) above. */
+ if (i == source)
+ continue;
+ if (sources[i] && replies[i].valid && replies[i].op_ret == 0 &&
+ replies[i].poststat.ia_type != IA_INVAL) {
+ ia_type = replies[i].poststat.ia_type;
+ break;
+ }
}
-}
+ }
-/**
- * select_source - select a source and return it
- */
+heal:
+ /* gfid heal on those subvolumes that do not have gfid associated
+ * with the inode and update those replies.
+ */
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret != 0)
+ continue;
-int
-afr_sh_select_source (int sources[], int child_count)
-{
- int i = 0;
- for (i = 0; i < child_count; i++)
- if (sources[i])
- return i;
+ if (gf_uuid_is_null(gfid) &&
+ !gf_uuid_is_null(replies[i].poststat.ia_gfid) &&
+ replies[i].poststat.ia_type == ia_type)
+ gfid = replies[i].poststat.ia_gfid;
- return -1;
-}
+ if (!gf_uuid_is_null(replies[i].poststat.ia_gfid) ||
+ replies[i].poststat.ia_type != ia_type)
+ continue;
-void
-afr_sh_mark_source_sinks (call_frame_t *frame, xlator_t *this)
-{
- int i = 0;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int active_sinks = 0;
+ wind_on[i] = 1;
+ }
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ if (AFR_COUNT(wind_on, priv->child_count) == 0)
+ return 0;
+ xdata = dict_new();
+ if (!xdata) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = dict_set_gfuuid(xdata, "gfid-req", gfid, true);
+ if (ret) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ frame = afr_frame_create(this, &ret);
+ if (!frame) {
+ ret = -ret;
+ goto out;
+ }
+
+ local = frame->local;
+ loc.parent = inode_ref(parent);
+ gf_uuid_copy(loc.pargfid, parent->gfid);
+ loc.name = name;
+ loc.inode = inode_ref(inode);
+
+ AFR_ONLIST(wind_on, frame, afr_selfheal_discover_cbk, lookup, &loc, xdata);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!wind_on[i])
+ continue;
+ afr_reply_wipe(&replies[i]);
+ afr_reply_copy(&replies[i], &local->replies[i]);
+ }
+ if (gfid_idx && (*gfid_idx == -1)) {
+ /*Pick a brick where the gifd heal was successful.*/
for (i = 0; i < priv->child_count; i++) {
- if (sh->sources[i] == 0 && local->child_up[i] == 1) {
- active_sinks++;
- sh->success[i] = 1;
- } else if (sh->sources[i] == 1 && local->child_up[i] == 1) {
- sh->success[i] = 1;
- }
+ if (!wind_on[i])
+ continue;
+ if (replies[i].valid && replies[i].op_ret == 0 &&
+ !gf_uuid_is_null(replies[i].poststat.ia_gfid)) {
+ *gfid_idx = i;
+ break;
+ }
}
- sh->active_sinks = active_sinks;
+ }
+out:
+ if (gfid_idx && (*gfid_idx == -1) && (ret == 0) && local) {
+ ret = -afr_final_errno(local, priv);
+ }
+ loc_wipe(&loc);
+ if (frame)
+ AFR_STACK_DESTROY(frame);
+ if (xdata)
+ dict_unref(xdata);
+
+ return ret;
}
int
-afr_sh_source_count (int sources[], int child_count)
+afr_gfid_sbrain_source_from_src_brick(xlator_t *this, struct afr_reply *replies,
+ char *src_brick)
{
- int i = 0;
- int nsource = 0;
+ int i = 0;
+ afr_private_t *priv = NULL;
- for (i = 0; i < child_count; i++)
- if (sources[i])
- nsource++;
- return nsource;
+ priv = this->private;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret == -1)
+ continue;
+ if (strcmp(priv->children[i]->name, src_brick) == 0)
+ return i;
+ }
+ return -1;
}
-void
-afr_sh_set_error (afr_self_heal_t *sh, int32_t op_errno)
-{
- sh->op_ret = -1;
- if (afr_error_more_important (sh->op_errno, op_errno))
- sh->op_errno = op_errno;
-}
-
-void
-afr_sh_print_pending_matrix (int32_t *pending_matrix[], xlator_t *this)
+int
+afr_selfheal_gfid_mismatch_by_majority(struct afr_reply *replies,
+ int child_count)
{
- afr_private_t * priv = this->private;
- char *buf = NULL;
- char *ptr = NULL;
- int i = 0;
- int j = 0;
+ int j = 0;
+ int i = 0;
+ int votes;
- /* 10 digits per entry + 1 space + '[' and ']' */
- buf = GF_MALLOC (priv->child_count * 11 + 8, gf_afr_mt_char);
+ for (i = 0; i < child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret == -1)
+ continue;
- for (i = 0; i < priv->child_count; i++) {
- ptr = buf;
- ptr += sprintf (ptr, "[ ");
- for (j = 0; j < priv->child_count; j++) {
- ptr += sprintf (ptr, "%d ", pending_matrix[i][j]);
- }
- sprintf (ptr, "]");
- gf_log (this->name, GF_LOG_DEBUG,
- "pending_matrix: %s", buf);
+ votes = 1;
+ for (j = i + 1; j < child_count; j++) {
+ if ((!gf_uuid_compare(replies[i].poststat.ia_gfid,
+ replies[j].poststat.ia_gfid)))
+ votes++;
+ if (votes > child_count / 2)
+ return i;
}
+ }
- GF_FREE (buf);
+ return -1;
}
-void
-afr_init_pending_matrix (int32_t **pending_matrix, size_t child_count)
+int
+afr_gfid_sbrain_source_from_bigger_file(struct afr_reply *replies,
+ int child_count)
{
- int i = 0;
- int j = 0;
-
- GF_ASSERT (pending_matrix);
+ int i = 0;
+ int src = -1;
+ uint64_t size = 0;
- for (i = 0; i < child_count; i++) {
- for (j = 0; j < child_count; j++) {
- pending_matrix[i][j] = 0;
- }
+ for (i = 0; i < child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret == -1)
+ continue;
+ if (size < replies[i].poststat.ia_size) {
+ src = i;
+ size = replies[i].poststat.ia_size;
+ } else if (replies[i].poststat.ia_size == size) {
+ src = -1;
}
+ }
+ return src;
}
-void
-afr_mark_ignorant_subvols_as_pending (int32_t **pending_matrix,
- unsigned char *ignorant_subvols,
- size_t child_count)
-{
- int i = 0;
- int j = 0;
-
- GF_ASSERT (pending_matrix);
- GF_ASSERT (ignorant_subvols);
-
- for (i = 0; i < child_count; i++) {
- if (ignorant_subvols[i]) {
- for (j = 0; j < child_count; j++) {
- if (!ignorant_subvols[j])
- pending_matrix[j][i] += 1;
- }
- }
- }
+int
+afr_gfid_sbrain_source_from_latest_mtime(struct afr_reply *replies,
+ int child_count)
+{
+ int i = 0;
+ int src = -1;
+ uint32_t mtime = 0;
+ uint32_t mtime_nsec = 0;
+
+ for (i = 0; i < child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret != 0)
+ continue;
+ if ((mtime < replies[i].poststat.ia_mtime) ||
+ ((mtime == replies[i].poststat.ia_mtime) &&
+ (mtime_nsec < replies[i].poststat.ia_mtime_nsec))) {
+ src = i;
+ mtime = replies[i].poststat.ia_mtime;
+ mtime_nsec = replies[i].poststat.ia_mtime_nsec;
+ } else if ((mtime == replies[i].poststat.ia_mtime) &&
+ (mtime_nsec == replies[i].poststat.ia_mtime_nsec)) {
+ src = -1;
+ }
+ }
+ return src;
}
int
-afr_build_pending_matrix (char **pending_key, int32_t **pending_matrix,
- unsigned char *ignorant_subvols,
- dict_t *xattr[], afr_transaction_type type,
- size_t child_count)
-{
- /* Indexable by result of afr_index_for_transaction_type(): 0 -- 2. */
- int32_t pending[3] = {0,};
- void *pending_raw = NULL;
- int ret = -1;
- int i = 0;
- int j = 0;
- int k = 0;
-
- afr_init_pending_matrix (pending_matrix, child_count);
-
- for (i = 0; i < child_count; i++) {
- pending_raw = NULL;
-
- for (j = 0; j < child_count; j++) {
- ret = dict_get_ptr (xattr[i], pending_key[j],
- &pending_raw);
-
- if (ret != 0) {
- /*
- * There is no xattr present. This means this
- * subvolume should be considered an 'ignorant'
- * subvolume.
- */
-
- if (ignorant_subvols)
- ignorant_subvols[i] = 1;
- continue;
- }
-
- memcpy (pending, pending_raw, sizeof(pending));
- k = afr_index_for_transaction_type (type);
-
- pending_matrix[i][j] = ntoh32 (pending[k]);
+afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies,
+ inode_t *inode, uuid_t pargfid, const char *bname,
+ int src_idx, int child_idx,
+ unsigned char *locked_on, int *src, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ char g1[64] = {
+ 0,
+ };
+ char g2[64] = {
+ 0,
+ };
+ int up_count = 0;
+ int heal_op = -1;
+ int ret = -1;
+ char *src_brick = NULL;
+
+ *src = -1;
+ priv = this->private;
+ up_count = AFR_COUNT(locked_on, priv->child_count);
+ if (up_count != priv->child_count) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
+ "All the bricks should be up to resolve the gfid split "
+ "barin");
+ if (xdata) {
+ ret = dict_set_sizen_str_sizen(xdata, "gfid-heal-msg",
+ SALL_BRICKS_UP_TO_RESOLVE);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_DICT_SET_FAILED,
+ "Error setting"
+ " gfid-heal-msg dict");
+ }
+ goto out;
+ }
+
+ if (xdata) {
+ ret = dict_get_int32_sizen(xdata, "heal-op", &heal_op);
+ if (ret)
+ goto fav_child;
+ } else {
+ goto fav_child;
+ }
+
+ switch (heal_op) {
+ case GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE:
+ *src = afr_gfid_sbrain_source_from_bigger_file(replies,
+ priv->child_count);
+ if (*src == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
+ SNO_BIGGER_FILE);
+ if (xdata) {
+ ret = dict_set_sizen_str_sizen(xdata, "gfid-heal-msg",
+ SNO_BIGGER_FILE);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ AFR_MSG_DICT_SET_FAILED,
+ "Error"
+ " setting gfid-heal-msg dict");
}
- }
-
- return ret;
-}
-
-typedef enum {
- AFR_NODE_INVALID,
- AFR_NODE_INNOCENT,
- AFR_NODE_FOOL,
- AFR_NODE_WISE,
-} afr_node_type;
+ }
+ break;
+
+ case GF_SHD_OP_SBRAIN_HEAL_FROM_LATEST_MTIME:
+ *src = afr_gfid_sbrain_source_from_latest_mtime(replies,
+ priv->child_count);
+ if (*src == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
+ SNO_DIFF_IN_MTIME);
+ if (xdata) {
+ ret = dict_set_sizen_str_sizen(xdata, "gfid-heal-msg",
+ SNO_DIFF_IN_MTIME);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ AFR_MSG_DICT_SET_FAILED,
+ "Error"
+ "setting gfid-heal-msg dict");
+ }
+ }
+ break;
+
+ case GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK:
+ ret = dict_get_str_sizen(xdata, "child-name", &src_brick);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
+ "Error getting the source "
+ "brick");
+ break;
+ }
+ *src = afr_gfid_sbrain_source_from_src_brick(this, replies,
+ src_brick);
+ if (*src == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
+ SERROR_GETTING_SRC_BRICK);
+ if (xdata) {
+ ret = dict_set_sizen_str_sizen(xdata, "gfid-heal-msg",
+ SERROR_GETTING_SRC_BRICK);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ AFR_MSG_DICT_SET_FAILED,
+ "Error"
+ " setting gfid-heal-msg dict");
+ }
+ }
+ break;
-typedef struct {
- afr_node_type type;
- int wisdom;
-} afr_node_character;
+ default:
+ break;
+ }
+ goto out;
+
+fav_child:
+ switch (priv->fav_child_policy) {
+ case AFR_FAV_CHILD_BY_SIZE:
+ *src = afr_sh_fav_by_size(this, replies, inode);
+ break;
+ case AFR_FAV_CHILD_BY_MTIME:
+ *src = afr_sh_fav_by_mtime(this, replies, inode);
+ break;
+ case AFR_FAV_CHILD_BY_CTIME:
+ *src = afr_sh_fav_by_ctime(this, replies, inode);
+ break;
+ case AFR_FAV_CHILD_BY_MAJORITY:
+ if (priv->child_count != 2)
+ *src = afr_selfheal_gfid_mismatch_by_majority(
+ replies, priv->child_count);
+ else
+ *src = -1;
+
+ if (*src == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
+ "No majority to resolve "
+ "gfid split brain");
+ }
+ break;
+ default:
+ break;
+ }
+out:
+ if (*src == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
+ "Gfid mismatch detected for <gfid:%s>/%s>, %s on %s and"
+ " %s on %s.",
+ uuid_utoa(pargfid), bname,
+ uuid_utoa_r(replies[child_idx].poststat.ia_gfid, g1),
+ priv->children[child_idx]->name,
+ uuid_utoa_r(replies[src_idx].poststat.ia_gfid, g2),
+ priv->children[src_idx]->name);
+ gf_event(EVENT_AFR_SPLIT_BRAIN,
+ "client-pid=%d;"
+ "subvol=%s;type=gfid;file="
+ "<gfid:%s>/%s>;count=2;child-%d=%s;gfid-%d=%s;"
+ "child-%d=%s;gfid-%d=%s",
+ this->ctx->cmd_args.client_pid, this->name, uuid_utoa(pargfid),
+ bname, child_idx, priv->children[child_idx]->name, child_idx,
+ uuid_utoa_r(replies[child_idx].poststat.ia_gfid, g1), src_idx,
+ priv->children[src_idx]->name, src_idx,
+ uuid_utoa_r(replies[src_idx].poststat.ia_gfid, g2));
+ return -1;
+ }
+ return 0;
+}
-static int
-afr_sh_is_innocent (int32_t *array, int child_count)
+int
+afr_selfheal_post_op_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
{
- int i = 0;
- int ret = 1; /* innocent until proven guilty */
-
- for (i = 0; i < child_count; i++) {
- if (array[i]) {
- ret = 0;
- break;
- }
- }
+ afr_local_t *local = NULL;
- return ret;
-}
+ local = frame->local;
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ syncbarrier_wake(&local->barrier);
-static int
-afr_sh_is_fool (int32_t *array, int i, int child_count)
-{
- return array[i]; /* fool if accuses itself */
+ return 0;
}
-
-static int
-afr_sh_is_wise (int32_t *array, int i, int child_count)
+int
+afr_selfheal_post_op(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ int subvol, dict_t *xattr, dict_t *xdata)
{
- return !array[i]; /* wise if does not accuse itself */
-}
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ loc_t loc = {
+ 0,
+ };
+ int ret = 0;
+ priv = this->private;
+ local = frame->local;
-static int
-afr_sh_all_nodes_innocent (afr_node_character *characters,
- int child_count)
-{
- int i = 0;
- int ret = 1;
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
- for (i = 0; i < child_count; i++) {
- if (characters[i].type != AFR_NODE_INNOCENT) {
- ret = 0;
- break;
- }
- }
+ local->op_ret = 0;
- return ret;
-}
+ STACK_WIND(frame, afr_selfheal_post_op_cbk, priv->children[subvol],
+ priv->children[subvol]->fops->xattrop, &loc,
+ GF_XATTROP_ADD_ARRAY, xattr, xdata);
+ syncbarrier_wait(&local->barrier, 1);
+ if (local->op_ret < 0)
+ ret = -local->op_errno;
-static int
-afr_sh_wise_nodes_exist (afr_node_character *characters, int child_count)
-{
- int i = 0;
- int ret = 0;
-
- for (i = 0; i < child_count; i++) {
- if (characters[i].type == AFR_NODE_WISE) {
- ret = 1;
- break;
- }
- }
+ loc_wipe(&loc);
+ local->op_ret = 0;
- return ret;
+ return ret;
}
+int
+afr_check_stale_error(struct afr_reply *replies, afr_private_t *priv)
+{
+ int i = 0;
+ int op_errno = 0;
+ int tmp_errno = 0;
+ int stale_count = 0;
+
+ for (i = 0; i < priv->child_count; i++) {
+ tmp_errno = replies[i].op_errno;
+ if (tmp_errno == ENOENT || tmp_errno == ESTALE) {
+ op_errno = afr_higher_errno(op_errno, tmp_errno);
+ stale_count++;
+ }
+ }
+ if (stale_count != priv->child_count)
+ return -ENOTCONN;
+ else
+ return -op_errno;
+}
-/*
- * The 'wisdom' of a wise node is 0 if any other wise node accuses it.
- * It is 1 if no other wise node accuses it.
- * Only wise nodes with wisdom 1 are sources.
- *
- * If no nodes with wisdom 1 exist, a split-brain has occurred.
- */
-
-static void
-afr_sh_compute_wisdom (int32_t *pending_matrix[],
- afr_node_character characters[], int child_count)
+int
+afr_sh_generic_fop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
{
- int i = 0;
- int j = 0;
+ int i = (long)cookie;
+ afr_local_t *local = NULL;
- for (i = 0; i < child_count; i++) {
- if (characters[i].type == AFR_NODE_WISE) {
- characters[i].wisdom = 1;
-
- for (j = 0; j < child_count; j++) {
- if ((characters[j].type == AFR_NODE_WISE)
- && pending_matrix[j][i]) {
-
- characters[i].wisdom = 0;
- }
- }
- }
- }
-}
+ local = frame->local;
+ local->replies[i].valid = 1;
+ local->replies[i].op_ret = op_ret;
+ local->replies[i].op_errno = op_errno;
+ if (pre)
+ local->replies[i].prestat = *pre;
+ if (post)
+ local->replies[i].poststat = *post;
+ if (xdata)
+ local->replies[i].xdata = dict_ref(xdata);
-static int
-afr_sh_wise_nodes_conflict (afr_node_character *characters,
- int child_count)
-{
- int i = 0;
- int ret = 1;
+ syncbarrier_wake(&local->barrier);
- for (i = 0; i < child_count; i++) {
- if ((characters[i].type == AFR_NODE_WISE)
- && characters[i].wisdom == 1) {
+ return 0;
+}
- /* There is atleast one bona-fide wise node */
- ret = 0;
- break;
- }
+int
+afr_selfheal_restore_time(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ int source, unsigned char *healed_sinks,
+ struct afr_reply *replies)
+{
+ loc_t loc = {
+ 0,
+ };
+
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+
+ AFR_ONLIST(healed_sinks, frame, afr_sh_generic_fop_cbk, setattr, &loc,
+ &replies[source].poststat,
+ (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME | GF_SET_ATTR_CTIME),
+ NULL);
+
+ loc_wipe(&loc);
+
+ return 0;
+}
+
+dict_t *
+afr_selfheal_output_xattr(xlator_t *this, gf_boolean_t is_full_crawl,
+ afr_transaction_type type, int *output_dirty,
+ int **output_matrix, int subvol,
+ int **full_heal_mtx_out)
+{
+ int j = 0;
+ int idx = 0;
+ int d_idx = 0;
+ int ret = 0;
+ int *raw = 0;
+ dict_t *xattr = NULL;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ idx = afr_index_for_transaction_type(type);
+ d_idx = afr_index_for_transaction_type(AFR_DATA_TRANSACTION);
+
+ xattr = dict_new();
+ if (!xattr)
+ return NULL;
+
+ /* clear dirty */
+ raw = GF_CALLOC(sizeof(int), AFR_NUM_CHANGE_LOGS, gf_afr_mt_int32_t);
+ if (!raw)
+ goto err;
+
+ raw[idx] = hton32(output_dirty[subvol]);
+ ret = dict_set_bin(xattr, AFR_DIRTY, raw,
+ sizeof(int) * AFR_NUM_CHANGE_LOGS);
+ if (ret) {
+ GF_FREE(raw);
+ goto err;
+ }
+
+ /* clear/set pending */
+ for (j = 0; j < priv->child_count; j++) {
+ raw = GF_CALLOC(sizeof(int), AFR_NUM_CHANGE_LOGS, gf_afr_mt_int32_t);
+ if (!raw)
+ goto err;
+
+ raw[idx] = hton32(output_matrix[subvol][j]);
+ if (is_full_crawl)
+ raw[d_idx] = hton32(full_heal_mtx_out[subvol][j]);
+
+ ret = dict_set_bin(xattr, priv->pending_key[j], raw,
+ sizeof(int) * AFR_NUM_CHANGE_LOGS);
+ if (ret) {
+ GF_FREE(raw);
+ goto err;
}
+ }
- return ret;
+ return xattr;
+err:
+ if (xattr)
+ dict_unref(xattr);
+ return NULL;
}
+int
+afr_selfheal_undo_pending(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ unsigned char *sources, unsigned char *sinks,
+ unsigned char *healed_sinks,
+ unsigned char *undid_pending,
+ afr_transaction_type type, struct afr_reply *replies,
+ unsigned char *locked_on)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
+ int j = 0;
+ unsigned char *pending = NULL;
+ int *input_dirty = NULL;
+ int **input_matrix = NULL;
+ int **full_heal_mtx_in = NULL;
+ int **full_heal_mtx_out = NULL;
+ int *output_dirty = NULL;
+ int **output_matrix = NULL;
+ dict_t *xattr = NULL;
+ dict_t *xdata = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ pending = alloca0(priv->child_count);
+
+ input_dirty = alloca0(priv->child_count * sizeof(int));
+ input_matrix = ALLOC_MATRIX(priv->child_count, int);
+ full_heal_mtx_in = ALLOC_MATRIX(priv->child_count, int);
+ full_heal_mtx_out = ALLOC_MATRIX(priv->child_count, int);
+ output_dirty = alloca0(priv->child_count * sizeof(int));
+ output_matrix = ALLOC_MATRIX(priv->child_count, int);
+
+ xdata = dict_new();
+ if (!xdata)
+ return -1;
-static int
-afr_sh_mark_wisest_as_sources (int sources[],
- afr_node_character *characters,
- int child_count)
-{
- int nsources = 0;
- int i = 0;
+ afr_selfheal_extract_xattr(this, replies, type, input_dirty, input_matrix);
- for (i = 0; i < child_count; i++) {
- if (characters[i].wisdom == 1) {
- sources[i] = 1;
- nsources++;
- }
- }
+ if (local->need_full_crawl)
+ afr_selfheal_extract_xattr(this, replies, AFR_DATA_TRANSACTION, NULL,
+ full_heal_mtx_in);
- return nsources;
-}
+ for (i = 0; i < priv->child_count; i++)
+ if (sinks[i] && !healed_sinks[i])
+ pending[i] = 1;
-static void
-afr_compute_witness_of_fools (int32_t *witnesses, int32_t **pending_matrix,
- afr_node_character *characters,
- int32_t child_count)
-{
- int i = 0;
- int j = 0;
- int witness = 0;
-
- GF_ASSERT (witnesses);
- GF_ASSERT (pending_matrix);
- GF_ASSERT (characters);
- GF_ASSERT (child_count > 0);
-
- for (i = 0; i < child_count; i++) {
- if (characters[i].type != AFR_NODE_FOOL)
- continue;
-
- witness = 0;
- for (j = 0; j < child_count; j++) {
- if (i == j)
- continue;
- witness += pending_matrix[i][j];
- }
- witnesses[i] = witness;
+ for (i = 0; i < priv->child_count; i++) {
+ for (j = 0; j < priv->child_count; j++) {
+ if (pending[j]) {
+ output_matrix[i][j] = 1;
+ if (type == AFR_ENTRY_TRANSACTION)
+ full_heal_mtx_out[i][j] = 1;
+ } else if (locked_on[j]) {
+ output_matrix[i][j] = -input_matrix[i][j];
+ if (type == AFR_ENTRY_TRANSACTION)
+ full_heal_mtx_out[i][j] = -full_heal_mtx_in[i][j];
+ }
}
-}
+ }
-static int32_t
-afr_find_biggest_witness_among_fools (int32_t *witnesses,
- afr_node_character *characters,
- int32_t child_count)
-{
- int i = 0;
- int biggest_witness = -1;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!pending[i])
+ output_dirty[i] = -input_dirty[i];
+ }
- GF_ASSERT (witnesses);
- GF_ASSERT (characters);
- GF_ASSERT (child_count > 0);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!locked_on[i])
+ /* perform post-op only on subvols we had locked
+ and inspected on.
+ */
+ continue;
+ if (undid_pending[i])
+ /* We already unset the pending xattrs in
+ * _afr_fav_child_reset_sink_xattrs(). */
+ continue;
- for (i = 0; i < child_count; i++) {
- if (characters[i].type != AFR_NODE_FOOL)
- continue;
+ xattr = afr_selfheal_output_xattr(this, local->need_full_crawl, type,
+ output_dirty, output_matrix, i,
+ full_heal_mtx_out);
+ if (!xattr) {
+ continue;
+ }
- if (biggest_witness < witnesses[i])
- biggest_witness = witnesses[i];
+ if ((type == AFR_ENTRY_TRANSACTION) && (priv->esh_granular)) {
+ if (xdata && dict_set_int8(xdata, GF_XATTROP_PURGE_INDEX, 1))
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_DICT_SET_FAILED,
+ "Failed to set"
+ " dict value for %s",
+ GF_XATTROP_PURGE_INDEX);
}
- return biggest_witness;
-}
-int
-afr_mark_fool_as_source_by_witness (int32_t *sources, int32_t *witnesses,
- afr_node_character *characters,
- int32_t child_count, int32_t witness)
-{
- int i = 0;
- int nsources = 0;
+ afr_selfheal_post_op(frame, this, inode, i, xattr, xdata);
+ dict_unref(xattr);
+ }
- GF_ASSERT (sources);
- GF_ASSERT (witnesses);
- GF_ASSERT (characters);
- GF_ASSERT (child_count > 0);
+ if (xdata)
+ dict_unref(xdata);
- for (i = 0; i < child_count; i++) {
- if (characters[i].type != AFR_NODE_FOOL)
- continue;
+ return 0;
+}
- if (witness == witnesses[i]) {
- sources[i] = 1;
- nsources++;
- }
- }
- return nsources;
+void
+afr_reply_copy(struct afr_reply *dst, struct afr_reply *src)
+{
+ dict_t *xdata = NULL;
+
+ dst->valid = src->valid;
+ dst->op_ret = src->op_ret;
+ dst->op_errno = src->op_errno;
+ dst->prestat = src->prestat;
+ dst->poststat = src->poststat;
+ dst->preparent = src->preparent;
+ dst->postparent = src->postparent;
+ dst->preparent2 = src->preparent2;
+ dst->postparent2 = src->postparent2;
+ if (src->xdata)
+ xdata = dict_ref(src->xdata);
+ else
+ xdata = NULL;
+ if (dst->xdata)
+ dict_unref(dst->xdata);
+ dst->xdata = xdata;
+ if (xdata && dict_get_str_boolean(xdata, "fips-mode-rchecksum",
+ _gf_false) == _gf_true) {
+ memcpy(dst->checksum, src->checksum, SHA256_DIGEST_LENGTH);
+ } else {
+ memcpy(dst->checksum, src->checksum, MD5_DIGEST_LENGTH);
+ }
+ dst->fips_mode_rchecksum = src->fips_mode_rchecksum;
}
-static int
-afr_mark_biggest_of_fools_as_source (int32_t *sources, int32_t **pending_matrix,
- afr_node_character *characters,
- int child_count)
+void
+afr_replies_copy(struct afr_reply *dst, struct afr_reply *src, int count)
{
- int32_t biggest_witness = 0;
- int nsources = 0;
- int32_t *witnesses = NULL;
+ int i = 0;
- GF_ASSERT (child_count > 0);
-
- witnesses = GF_CALLOC (child_count, sizeof (*witnesses),
- gf_afr_mt_int32_t);
- if (NULL == witnesses) {
- nsources = -1;
- goto out;
- }
+ if (dst == src)
+ return;
- afr_compute_witness_of_fools (witnesses, pending_matrix, characters,
- child_count);
- biggest_witness = afr_find_biggest_witness_among_fools (witnesses,
- characters,
- child_count);
- nsources = afr_mark_fool_as_source_by_witness (sources, witnesses,
- characters, child_count,
- biggest_witness);
-out:
- if (witnesses)
- GF_FREE (witnesses);
- return nsources;
+ for (i = 0; i < count; i++) {
+ afr_reply_copy(&dst[i], &src[i]);
+ }
}
int
-afr_mark_child_as_source_by_uid (int32_t *sources, struct iatt *bufs,
- int32_t *success_children,
- unsigned int child_count, uint32_t uid)
-{
- int i = 0;
- int nsources = 0;
- int child = 0;
-
- for (i = 0; i < child_count; i++) {
- if (-1 == success_children[i])
- break;
-
- child = success_children[i];
- if (uid == bufs[child].ia_uid) {
- sources[child] = 1;
- nsources++;
- }
- }
- return nsources;
-}
+afr_selfheal_fill_dirty(xlator_t *this, int *dirty, int subvol, int idx,
+ dict_t *xdata)
+{
+ void *pending_raw = NULL;
+ int pending[3] = {
+ 0,
+ };
-int
-afr_get_child_with_lowest_uid (struct iatt *bufs, int32_t *success_children,
- unsigned int child_count)
-{
- int i = 0;
- int smallest = -1;
- int child = 0;
-
- for (i = 0; i < child_count; i++) {
- if (-1 == success_children[i])
- break;
- child = success_children[i];
- if ((smallest == -1) ||
- (bufs[child].ia_uid < bufs[smallest].ia_uid)) {
- smallest = child;
- }
- }
- return smallest;
-}
+ if (!dirty)
+ return 0;
-static int
-afr_sh_mark_lowest_uid_as_source (struct iatt *bufs, int32_t *success_children,
- int child_count, int32_t *sources)
-{
- int nsources = 0;
- int smallest = 0;
+ if (dict_get_ptr(xdata, AFR_DIRTY, &pending_raw))
+ return -1;
- smallest = afr_get_child_with_lowest_uid (bufs, success_children,
- child_count);
- if (smallest < 0) {
- nsources = -1;
- goto out;
- }
- nsources = afr_mark_child_as_source_by_uid (sources, bufs,
- success_children, child_count,
- bufs[smallest].ia_uid);
-out:
- return nsources;
-}
+ if (!pending_raw)
+ return -1;
-int
-afr_get_no_xattr_dir_read_child (xlator_t *this, int32_t *success_children,
- struct iatt *bufs)
-{
- afr_private_t *priv = NULL;
- int i = 0;
- int child = -1;
- int read_child = -1;
+ memcpy(pending, pending_raw, sizeof(pending));
- priv = this->private;
- for (i = 0; i < priv->child_count; i++) {
- child = success_children[i];
- if (child < 0)
- break;
- if (read_child < 0)
- read_child = child;
- else if (bufs[read_child].ia_size < bufs[child].ia_size)
- read_child = child;
- }
- return read_child;
+ dirty[subvol] = ntoh32(pending[idx]);
+
+ return 0;
}
int
-afr_sh_mark_zero_size_file_as_sink (struct iatt *bufs, int32_t *success_children,
- int child_count, int32_t *sources)
-{
- int nsources = 0;
- int i = 0;
- int child = 0;
- gf_boolean_t sink_exists = _gf_false;
- gf_boolean_t source_exists = _gf_false;
- int source = -1;
-
- for (i = 0; i < child_count; i++) {
- child = success_children[i];
- if (child < 0)
- break;
- if (!bufs[child].ia_size) {
- sink_exists = _gf_true;
- continue;
- }
- if (!source_exists) {
- source_exists = _gf_true;
- source = child;
- continue;
- }
- if (bufs[source].ia_size != bufs[child].ia_size) {
- nsources = -1;
- goto out;
- }
- }
- if (!source_exists && !sink_exists) {
- nsources = -1;
- goto out;
- }
+afr_selfheal_fill_matrix(xlator_t *this, int **matrix, int subvol, int idx,
+ dict_t *xdata)
+{
+ int i = 0;
+ void *pending_raw = NULL;
+ int pending[3] = {
+ 0,
+ };
+ afr_private_t *priv = NULL;
- if (!source_exists || !sink_exists)
- goto out;
+ priv = this->private;
- for (i = 0; i < child_count; i++) {
- child = success_children[i];
- if (child < 0)
- break;
- if (bufs[child].ia_size) {
- sources[child] = 1;
- nsources++;
- }
- }
-out:
- return nsources;
-}
+ if (!matrix)
+ return 0;
-char *
-afr_get_character_str (afr_node_type type)
-{
- char *character = NULL;
+ for (i = 0; i < priv->child_count; i++) {
+ if (dict_get_ptr(xdata, priv->pending_key[i], &pending_raw))
+ continue;
- switch (type) {
- case AFR_NODE_INNOCENT:
- character = "innocent";
- break;
- case AFR_NODE_FOOL:
- character = "fool";
- break;
- case AFR_NODE_WISE:
- character = "wise";
- break;
- default:
- character = "invalid";
- break;
- }
- return character;
-}
+ if (!pending_raw)
+ continue;
-afr_node_type
-afr_find_child_character_type (int32_t *pending_row, int32_t child,
- unsigned int child_count)
-{
- afr_node_type type = AFR_NODE_INVALID;
+ memcpy(pending, pending_raw, sizeof(pending));
- GF_ASSERT ((child >= 0) && (child < child_count));
+ matrix[subvol][i] = ntoh32(pending[idx]);
+ }
- if (afr_sh_is_innocent (pending_row, child_count))
- type = AFR_NODE_INNOCENT;
- else if (afr_sh_is_fool (pending_row, child, child_count))
- type = AFR_NODE_FOOL;
- else if (afr_sh_is_wise (pending_row, child, child_count))
- type = AFR_NODE_WISE;
- return type;
+ return 0;
}
int
-afr_build_sources (xlator_t *this, dict_t **xattr, struct iatt *bufs,
- int32_t **pending_matrix, int32_t *sources,
- int32_t *success_children, afr_transaction_type type,
- int32_t *subvol_status, gf_boolean_t ignore_ignorant)
+afr_selfheal_extract_xattr(xlator_t *this, struct afr_reply *replies,
+ afr_transaction_type type, int *dirty, int **matrix)
{
- afr_private_t *priv = NULL;
- afr_self_heal_type sh_type = AFR_SELF_HEAL_INVALID;
- int nsources = -1;
- unsigned char *ignorant_subvols = NULL;
- unsigned int child_count = 0;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ dict_t *xdata = NULL;
+ int idx = -1;
- priv = this->private;
- child_count = priv->child_count;
+ idx = afr_index_for_transaction_type(type);
- if (afr_get_children_count (success_children, priv->child_count) == 0)
- goto out;
+ priv = this->private;
- if (!ignore_ignorant) {
- ignorant_subvols = GF_CALLOC (sizeof (*ignorant_subvols),
- child_count, gf_afr_mt_char);
- if (NULL == ignorant_subvols)
- goto out;
- }
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret != 0)
+ continue;
- afr_build_pending_matrix (priv->pending_key, pending_matrix,
- ignorant_subvols, xattr, type,
- priv->child_count);
+ if (!replies[i].xdata)
+ continue;
- if (!ignore_ignorant)
- afr_mark_ignorant_subvols_as_pending (pending_matrix,
- ignorant_subvols,
- priv->child_count);
- sh_type = afr_self_heal_type_for_transaction (type);
- if (AFR_SELF_HEAL_INVALID == sh_type)
- goto out;
+ xdata = replies[i].xdata;
- afr_sh_print_pending_matrix (pending_matrix, this);
+ afr_selfheal_fill_dirty(this, dirty, i, idx, xdata);
+ afr_selfheal_fill_matrix(this, matrix, i, idx, xdata);
+ }
- nsources = afr_mark_sources (this, sources, pending_matrix, bufs,
- sh_type, success_children, subvol_status);
-out:
- GF_FREE (ignorant_subvols);
- return nsources;
+ return 0;
}
+/*
+ * If by chance there are multiple sources with differing sizes, select
+ * the largest file as the source.
+ *
+ * This can happen if data was directly modified in the backend or for snapshots
+ */
void
-afr_find_character_types (afr_node_character *characters,
- int32_t **pending_matrix, int32_t *success_children,
- unsigned int child_count)
-{
- afr_node_type type = AFR_NODE_INVALID;
- int child = 0;
- int i = 0;
-
- for (i = 0; i < child_count; i++) {
- child = success_children[i];
- if (child == -1)
- break;
- type = afr_find_child_character_type (pending_matrix[child],
- child, child_count);
- characters[child].type = type;
+afr_mark_largest_file_as_source(xlator_t *this, unsigned char *sources,
+ struct afr_reply *replies)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ uint64_t size = 0;
+
+ /* Find source with biggest file size */
+ priv = this->private;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+ if (!replies[i].valid || replies[i].op_ret != 0) {
+ sources[i] = 0;
+ continue;
+ }
+ if (size <= replies[i].poststat.ia_size) {
+ size = replies[i].poststat.ia_size;
}
+ }
+
+ /* Mark sources with less size as not source */
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+ if (size > replies[i].poststat.ia_size)
+ sources[i] = 0;
+ }
}
void
-afr_mark_success_children_sources (int32_t *sources, int32_t *success_children,
- unsigned int child_count)
-{
- int i = 0;
- for (i = 0; i < child_count; i++) {
- if (success_children[i] == -1)
- break;
- sources[success_children[i]] = 1;
- }
+afr_mark_latest_mtime_file_as_source(xlator_t *this, unsigned char *sources,
+ struct afr_reply *replies)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ uint32_t mtime = 0;
+ uint32_t mtime_nsec = 0;
+
+ priv = this->private;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+ if (!replies[i].valid || replies[i].op_ret != 0) {
+ sources[i] = 0;
+ continue;
+ }
+ if ((mtime < replies[i].poststat.ia_mtime) ||
+ ((mtime == replies[i].poststat.ia_mtime) &&
+ (mtime_nsec < replies[i].poststat.ia_mtime_nsec))) {
+ mtime = replies[i].poststat.ia_mtime;
+ mtime_nsec = replies[i].poststat.ia_mtime_nsec;
+ }
+ }
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+ if ((mtime > replies[i].poststat.ia_mtime) ||
+ ((mtime == replies[i].poststat.ia_mtime) &&
+ (mtime_nsec > replies[i].poststat.ia_mtime_nsec))) {
+ sources[i] = 0;
+ }
+ }
}
-/**
- * mark_sources: Mark all 'source' nodes and return number of source
- * nodes found
- *
- * A node (a row in the pending matrix) belongs to one of
- * three categories:
- *
- * M is the pending matrix.
- *
- * 'innocent' - M[i] is all zeroes
- * 'fool' - M[i] has i'th element = 1 (self-reference)
- * 'wise' - M[i] has i'th element = 0, others are 1 or 0.
- *
- * All 'innocent' nodes are sinks. If all nodes are innocent, no self-heal is
- * needed.
- *
- * A 'wise' node can be a source. If two 'wise' nodes conflict, it is
- * a split-brain. If one wise node refers to the other but the other doesn't
- * refer back, the referrer is a source.
- *
- * All fools are sinks, unless there are no 'wise' nodes. In that case,
- * one of the fools is made a source.
- */
-int
-afr_mark_sources (xlator_t *this, int32_t *sources, int32_t **pending_matrix,
- struct iatt *bufs, afr_self_heal_type type,
- int32_t *success_children, int32_t *subvol_status)
-{
- /* stores the 'characters' (innocent, fool, wise) of the nodes */
- afr_node_character *characters = NULL;
- int nsources = -1;
- unsigned int child_count = 0;
- afr_private_t *priv = NULL;
-
- priv = this->private;
- child_count = priv->child_count;
- characters = GF_CALLOC (sizeof (afr_node_character),
- child_count, gf_afr_mt_afr_node_character);
- if (!characters)
- goto out;
+void
+afr_mark_active_sinks(xlator_t *this, unsigned char *sources,
+ unsigned char *locked_on, unsigned char *sinks)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
- this = THIS;
-
- /* start clean */
- memset (sources, 0, sizeof (*sources) * child_count);
- nsources = 0;
- afr_find_character_types (characters, pending_matrix, success_children,
- child_count);
- if (afr_sh_all_nodes_innocent (characters, child_count)) {
- switch (type) {
- case AFR_SELF_HEAL_METADATA:
- nsources = afr_sh_mark_lowest_uid_as_source (bufs,
- success_children,
- child_count,
- sources);
- break;
- case AFR_SELF_HEAL_DATA:
- nsources = afr_sh_mark_zero_size_file_as_sink (bufs,
- success_children,
- child_count,
- sources);
- if ((nsources < 0) && subvol_status)
- *subvol_status |= SPLIT_BRAIN;
- break;
- default:
- break;
- }
- goto out;
- }
+ priv = this->private;
- if (afr_sh_wise_nodes_exist (characters, child_count)) {
- afr_sh_compute_wisdom (pending_matrix, characters, child_count);
-
- if (afr_sh_wise_nodes_conflict (characters, child_count)) {
- if (subvol_status)
- *subvol_status |= SPLIT_BRAIN;
- nsources = -1;
- } else {
- nsources = afr_sh_mark_wisest_as_sources (sources,
- characters,
- child_count);
- }
- } else {
- if (subvol_status)
- *subvol_status |= ALL_FOOLS;
- nsources = afr_mark_biggest_of_fools_as_source (sources,
- pending_matrix,
- characters,
- child_count);
- }
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i] && locked_on[i])
+ sinks[i] = 1;
+ else
+ sinks[i] = 0;
+ }
+}
-out:
- if (nsources == 0)
- afr_mark_success_children_sources (sources, success_children,
- child_count);
- GF_FREE (characters);
+gf_boolean_t
+afr_dict_contains_heal_op(call_frame_t *frame)
+{
+ afr_local_t *local = NULL;
+ dict_t *xdata_req = NULL;
+ int ret = 0;
+ int heal_op = -1;
+
+ local = frame->local;
+ xdata_req = local->xdata_req;
+ ret = dict_get_int32_sizen(xdata_req, "heal-op", &heal_op);
+ if (ret)
+ return _gf_false;
+ if (local->xdata_rsp == NULL) {
+ local->xdata_rsp = dict_new();
+ if (!local->xdata_rsp)
+ return _gf_true;
+ }
+ ret = dict_set_sizen_str_sizen(local->xdata_rsp, "sh-fail-msg",
+ SFILE_NOT_IN_SPLIT_BRAIN);
- gf_log (this->name, GF_LOG_DEBUG, "Number of sources: %d", nsources);
- return nsources;
+ return _gf_true;
}
-void
-afr_sh_pending_to_delta (afr_private_t *priv, dict_t **xattr,
- int32_t *delta_matrix[], unsigned char success[],
- int child_count, afr_transaction_type type)
+gf_boolean_t
+afr_can_decide_split_brain_source_sinks(struct afr_reply *replies,
+ int child_count)
{
- int i = 0;
- int j = 0;
+ int i = 0;
- afr_build_pending_matrix (priv->pending_key, delta_matrix, NULL,
- xattr, type, priv->child_count);
- for (i = 0; i < priv->child_count; i++)
- for (j = 0; j < priv->child_count; j++)
- delta_matrix[i][j] = -delta_matrix[i][j];
+ for (i = 0; i < child_count; i++)
+ if (replies[i].valid != 1 || replies[i].op_ret != 0)
+ return _gf_false;
+
+ return _gf_true;
}
+int
+afr_mark_split_brain_source_sinks_by_heal_op(
+ call_frame_t *frame, xlator_t *this, unsigned char *sources,
+ unsigned char *sinks, unsigned char *healed_sinks, unsigned char *locked_on,
+ struct afr_reply *replies, afr_transaction_type type, int heal_op)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ dict_t *xdata_req = NULL;
+ dict_t *xdata_rsp = NULL;
+ int ret = 0;
+ int i = 0;
+ char *name = NULL;
+ int source = -1;
+
+ local = frame->local;
+ priv = this->private;
+ xdata_req = local->xdata_req;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (locked_on[i])
+ if (sources[i] || !sinks[i] || !healed_sinks[i]) {
+ ret = -1;
+ goto out;
+ }
+ }
+ if (local->xdata_rsp == NULL) {
+ local->xdata_rsp = dict_new();
+ if (!local->xdata_rsp) {
+ ret = -1;
+ goto out;
+ }
+ }
+ xdata_rsp = local->xdata_rsp;
+
+ if (!afr_can_decide_split_brain_source_sinks(replies, priv->child_count)) {
+ ret = dict_set_sizen_str_sizen(xdata_rsp, "sh-fail-msg",
+ SBRAIN_HEAL_NO_GO_MSG);
+ ret = -1;
+ goto out;
+ }
+
+ for (i = 0; i < priv->child_count; i++)
+ if (locked_on[i])
+ sources[i] = 1;
+ switch (heal_op) {
+ case GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE:
+ if (type == AFR_METADATA_TRANSACTION) {
+ ret = dict_set_sizen_str_sizen(xdata_rsp, "sh-fail-msg",
+ SUSE_SOURCE_BRICK_TO_HEAL);
+ if (!ret)
+ ret = -1;
+ goto out;
+ }
+ afr_mark_largest_file_as_source(this, sources, replies);
+ if (AFR_COUNT(sources, priv->child_count) != 1) {
+ ret = dict_set_sizen_str_sizen(xdata_rsp, "sh-fail-msg",
+ SNO_BIGGER_FILE);
+ if (!ret)
+ ret = -1;
+ goto out;
+ }
+ break;
+ case GF_SHD_OP_SBRAIN_HEAL_FROM_LATEST_MTIME:
+ if (type == AFR_METADATA_TRANSACTION) {
+ ret = dict_set_sizen_str_sizen(xdata_rsp, "sh-fail-msg",
+ SUSE_SOURCE_BRICK_TO_HEAL);
+ if (!ret)
+ ret = -1;
+ goto out;
+ }
+ afr_mark_latest_mtime_file_as_source(this, sources, replies);
+ if (AFR_COUNT(sources, priv->child_count) != 1) {
+ ret = dict_set_sizen_str_sizen(xdata_rsp, "sh-fail-msg",
+ SNO_DIFF_IN_MTIME);
+ if (!ret)
+ ret = -1;
+ goto out;
+ }
+ break;
+ case GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK:
+ ret = dict_get_str_sizen(xdata_req, "child-name", &name);
+ if (ret)
+ goto out;
+ source = afr_get_child_index_from_name(this, name);
+ if (source < 0) {
+ ret = dict_set_sizen_str_sizen(xdata_rsp, "sh-fail-msg",
+ SINVALID_BRICK_NAME);
+ if (!ret)
+ ret = -1;
+ goto out;
+ }
+ if (locked_on[source] != 1) {
+ ret = dict_set_sizen_str_sizen(xdata_rsp, "sh-fail-msg",
+ SBRICK_IS_NOT_UP);
+ if (!ret)
+ ret = -1;
+ goto out;
+ }
+ memset(sources, 0, sizeof(*sources) * priv->child_count);
+ sources[source] = 1;
+ break;
+ default:
+ ret = -1;
+ goto out;
+ }
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i]) {
+ source = i;
+ break;
+ }
+ }
+ sinks[source] = 0;
+ healed_sinks[source] = 0;
+ ret = source;
+out:
+ if (ret < 0)
+ memset(sources, 0, sizeof(*sources) * priv->child_count);
+ return ret;
+}
int
-afr_sh_delta_to_xattr (afr_private_t *priv,
- int32_t *delta_matrix[], dict_t *xattr[],
- int child_count, afr_transaction_type type)
-{
- int i = 0;
- int j = 0;
- int k = 0;
- int ret = 0;
- int32_t *pending = NULL;
-
- for (i = 0; i < child_count; i++) {
- if (!xattr[i])
- continue;
-
- for (j = 0; j < child_count; j++) {
- pending = GF_CALLOC (sizeof (int32_t), 3,
- gf_afr_mt_int32_t);
-
- if (!pending)
- continue;
- /* 3 = data+metadata+entry */
-
- k = afr_index_for_transaction_type (type);
-
- pending[k] = hton32 (delta_matrix[i][j]);
-
- ret = dict_set_bin (xattr[i], priv->pending_key[j],
- pending,
- 3 * sizeof (int32_t));
- if (ret < 0)
- gf_log (THIS->name, GF_LOG_WARNING,
- "Unable to set dict value.");
+afr_sh_fav_by_majority(xlator_t *this, struct afr_reply *replies,
+ inode_t *inode)
+{
+ afr_private_t *priv;
+ int vote_count = -1;
+ int fav_child = -1;
+ int i = 0;
+ int k = 0;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (replies[i].valid == 1) {
+ gf_msg_debug(this->name, 0,
+ "Child:%s mtime_sec = %" PRId64 ", size = %" PRIu64
+ " for gfid %s",
+ priv->children[i]->name, replies[i].poststat.ia_mtime,
+ replies[i].poststat.ia_size, uuid_utoa(inode->gfid));
+ vote_count = 0;
+ for (k = 0; k < priv->child_count; k++) {
+ if ((replies[k].poststat.ia_mtime ==
+ replies[i].poststat.ia_mtime) &&
+ (replies[k].poststat.ia_size ==
+ replies[i].poststat.ia_size)) {
+ vote_count++;
}
+ }
+ if (vote_count > priv->child_count / 2) {
+ fav_child = i;
+ break;
+ }
}
- return 0;
+ }
+ return fav_child;
}
-
+/*
+ * afr_sh_fav_by_mtime: Choose favorite child by mtime.
+ */
int
-afr_sh_has_metadata_pending (dict_t *xattr, xlator_t *this)
-{
- /* Indexable by result of afr_index_for_transaction_type(): 0 -- 2. */
- int32_t pending[3] = {0,};
- void *pending_raw = NULL;
- afr_private_t *priv = NULL;
- int ret = -1;
- int i = 0;
- int j = 0;
-
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_get_ptr (xattr, priv->pending_key[i],
- &pending_raw);
-
- if (ret != 0)
- return 0;
-
- memcpy (pending, pending_raw, sizeof(pending));
- j = afr_index_for_transaction_type (AFR_METADATA_TRANSACTION);
-
- if (pending[j])
- return 1;
- }
+afr_sh_fav_by_mtime(xlator_t *this, struct afr_reply *replies, inode_t *inode)
+{
+ afr_private_t *priv;
+ int fav_child = -1;
+ int i = 0;
+ uint32_t cmp_mtime = 0;
+ uint32_t cmp_mtime_nsec = 0;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (replies[i].valid == 1) {
+ gf_msg_debug(this->name, 0,
+ "Child:%s mtime = %" PRId64
+ ", mtime_nsec = %d for "
+ "gfid %s",
+ priv->children[i]->name, replies[i].poststat.ia_mtime,
+ replies[i].poststat.ia_mtime_nsec,
+ uuid_utoa(inode->gfid));
+ if (replies[i].poststat.ia_mtime > cmp_mtime) {
+ cmp_mtime = replies[i].poststat.ia_mtime;
+ cmp_mtime_nsec = replies[i].poststat.ia_mtime_nsec;
+ fav_child = i;
+ } else if ((replies[i].poststat.ia_mtime == cmp_mtime) &&
+ (replies[i].poststat.ia_mtime_nsec > cmp_mtime_nsec)) {
+ cmp_mtime = replies[i].poststat.ia_mtime;
+ cmp_mtime_nsec = replies[i].poststat.ia_mtime_nsec;
+ fav_child = i;
+ }
+ }
+ }
+ return fav_child;
+}
- return 0;
+/*
+ * afr_sh_fav_by_ctime: Choose favorite child by ctime.
+ */
+int
+afr_sh_fav_by_ctime(xlator_t *this, struct afr_reply *replies, inode_t *inode)
+{
+ afr_private_t *priv;
+ int fav_child = -1;
+ int i = 0;
+ uint32_t cmp_ctime = 0;
+ uint32_t cmp_ctime_nsec = 0;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (replies[i].valid == 1) {
+ gf_msg_debug(this->name, 0,
+ "Child:%s ctime = %" PRId64
+ ", ctime_nsec = %d for "
+ "gfid %s",
+ priv->children[i]->name, replies[i].poststat.ia_ctime,
+ replies[i].poststat.ia_ctime_nsec,
+ uuid_utoa(inode->gfid));
+ if (replies[i].poststat.ia_ctime > cmp_ctime) {
+ cmp_ctime = replies[i].poststat.ia_ctime;
+ cmp_ctime_nsec = replies[i].poststat.ia_ctime_nsec;
+ fav_child = i;
+ } else if ((replies[i].poststat.ia_ctime == cmp_ctime) &&
+ (replies[i].poststat.ia_ctime_nsec > cmp_ctime_nsec)) {
+ cmp_ctime = replies[i].poststat.ia_ctime;
+ cmp_ctime_nsec = replies[i].poststat.ia_ctime_nsec;
+ fav_child = i;
+ }
+ }
+ }
+ return fav_child;
}
+/*
+ * afr_sh_fav_by_size: Choose favorite child by size
+ * when not all files are of zero size.
+ */
+int
+afr_sh_fav_by_size(xlator_t *this, struct afr_reply *replies, inode_t *inode)
+{
+ afr_private_t *priv;
+ int fav_child = -1;
+ int i = 0;
+ uint64_t cmp_sz = 0;
+
+ priv = this->private;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid) {
+ continue;
+ }
+ gf_msg_debug(this->name, 0,
+ "Child:%s file size = %" PRIu64 " for gfid %s",
+ priv->children[i]->name, replies[i].poststat.ia_size,
+ uuid_utoa(inode->gfid));
+ if (replies[i].poststat.ia_type == IA_IFDIR) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SBRAIN_FAV_CHILD_POLICY,
+ "Cannot perform selfheal on %s. "
+ "Size policy is not applicable to directories.",
+ uuid_utoa(inode->gfid));
+ break;
+ }
+ if (replies[i].poststat.ia_size > cmp_sz) {
+ cmp_sz = replies[i].poststat.ia_size;
+ fav_child = i;
+ } else if (replies[i].poststat.ia_size == cmp_sz) {
+ fav_child = -1;
+ }
+ }
+ if (fav_child == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
+ "No bigger file");
+ }
+ return fav_child;
+}
int
-afr_sh_has_data_pending (dict_t *xattr, xlator_t *this)
+afr_sh_get_fav_by_policy(xlator_t *this, struct afr_reply *replies,
+ inode_t *inode, char **policy_str)
{
- /* Indexable by result of afr_index_for_transaction_type(): 0 -- 2. */
- int32_t pending[3] = {0,};
- void *pending_raw = NULL;
- afr_private_t *priv = NULL;
- int ret = -1;
- int i = 0;
- int j = 0;
+ afr_private_t *priv = NULL;
+ int fav_child = -1;
- priv = this->private;
+ priv = this->private;
+ if (!afr_can_decide_split_brain_source_sinks(replies, priv->child_count)) {
+ return -1;
+ }
+
+ switch (priv->fav_child_policy) {
+ case AFR_FAV_CHILD_BY_SIZE:
+ fav_child = afr_sh_fav_by_size(this, replies, inode);
+ if (policy_str && fav_child >= 0) {
+ *policy_str = "SIZE";
+ }
+ break;
+ case AFR_FAV_CHILD_BY_CTIME:
+ fav_child = afr_sh_fav_by_ctime(this, replies, inode);
+ if (policy_str && fav_child >= 0) {
+ *policy_str = "CTIME";
+ }
+ break;
+ case AFR_FAV_CHILD_BY_MTIME:
+ fav_child = afr_sh_fav_by_mtime(this, replies, inode);
+ if (policy_str && fav_child >= 0) {
+ *policy_str = "MTIME";
+ }
+ break;
+ case AFR_FAV_CHILD_BY_MAJORITY:
+ fav_child = afr_sh_fav_by_majority(this, replies, inode);
+ if (policy_str && fav_child >= 0) {
+ *policy_str = "MAJORITY";
+ }
+ break;
+ case AFR_FAV_CHILD_NONE:
+ default:
+ break;
+ }
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_get_ptr (xattr, priv->pending_key[i],
- &pending_raw);
+ return fav_child;
+}
- if (ret != 0)
- return 0;
+int
+afr_mark_split_brain_source_sinks_by_policy(
+ call_frame_t *frame, xlator_t *this, inode_t *inode, unsigned char *sources,
+ unsigned char *sinks, unsigned char *healed_sinks, unsigned char *locked_on,
+ struct afr_reply *replies, afr_transaction_type type)
+{
+ afr_private_t *priv = NULL;
+ int fav_child = -1;
+ char mtime_str[256];
+ char ctime_str[256];
+ char *policy_str = NULL;
+ struct tm *tm_ptr;
+ time_t time;
+
+ priv = this->private;
+
+ fav_child = afr_sh_get_fav_by_policy(this, replies, inode, &policy_str);
+ if (fav_child == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SBRAIN_FAV_CHILD_POLICY,
+ "No child selected by favorite-child policy.");
+ } else if (fav_child > priv->child_count - 1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SBRAIN_FAV_CHILD_POLICY,
+ "Invalid child (%d) "
+ "selected by policy %s.",
+ fav_child, policy_str);
+ } else if (fav_child >= 0) {
+ time = replies[fav_child].poststat.ia_mtime;
+ tm_ptr = localtime(&time);
+ strftime(mtime_str, sizeof(mtime_str), "%Y-%m-%d %H:%M:%S", tm_ptr);
+ time = replies[fav_child].poststat.ia_ctime;
+ tm_ptr = localtime(&time);
+ strftime(ctime_str, sizeof(ctime_str), "%Y-%m-%d %H:%M:%S", tm_ptr);
+
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_SBRAIN_FAV_CHILD_POLICY,
+ "Source %s selected as authentic to resolve conflicting data "
+ "in file (gfid:%s) by %s (%" PRIu64
+ " bytes @ %s mtime, %s "
+ "ctime).",
+ priv->children[fav_child]->name, uuid_utoa(inode->gfid),
+ policy_str, replies[fav_child].poststat.ia_size, mtime_str,
+ ctime_str);
+
+ sources[fav_child] = 1;
+ sinks[fav_child] = 0;
+ healed_sinks[fav_child] = 0;
+ }
+ return fav_child;
+}
- memcpy (pending, pending_raw, sizeof(pending));
- j = afr_index_for_transaction_type (AFR_DATA_TRANSACTION);
+gf_boolean_t
+afr_is_file_empty_on_all_children(afr_private_t *priv,
+ struct afr_reply *replies)
+{
+ int i = 0;
- if (pending[j])
- return 1;
- }
+ for (i = 0; i < priv->child_count; i++) {
+ if ((!replies[i].valid) || (replies[i].op_ret != 0) ||
+ (replies[i].poststat.ia_size != 0))
+ return _gf_false;
+ }
- return 0;
+ return _gf_true;
}
-
int
-afr_sh_has_entry_pending (dict_t *xattr, xlator_t *this)
-{
- /* Indexable by result of afr_index_for_transaction_type(): 0 -- 2. */
- int32_t pending[3] = {0,};
- void *pending_raw = NULL;
- afr_private_t *priv = NULL;
- int ret = -1;
- int i = 0;
- int j = 0;
-
- priv = this->private;
+afr_mark_source_sinks_if_file_empty(xlator_t *this, unsigned char *sources,
+ unsigned char *sinks,
+ unsigned char *healed_sinks,
+ unsigned char *locked_on,
+ struct afr_reply *replies,
+ afr_transaction_type type)
+{
+ int source = -1;
+ int i = 0;
+ afr_private_t *priv = this->private;
+ struct iatt stbuf = {
+ 0,
+ };
+
+ if ((AFR_COUNT(locked_on, priv->child_count) < priv->child_count) ||
+ (afr_success_count(replies, priv->child_count) < priv->child_count))
+ return -1;
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_get_ptr (xattr, priv->pending_key[i],
- &pending_raw);
+ if (type == AFR_DATA_TRANSACTION) {
+ if (!afr_is_file_empty_on_all_children(priv, replies))
+ return -1;
+ goto mark;
+ }
+
+ /*For AFR_METADATA_TRANSACTION, metadata must be same on all bricks.*/
+ stbuf = replies[0].poststat;
+ for (i = 1; i < priv->child_count; i++) {
+ if ((!IA_EQUAL(stbuf, replies[i].poststat, type)) ||
+ (!IA_EQUAL(stbuf, replies[i].poststat, uid)) ||
+ (!IA_EQUAL(stbuf, replies[i].poststat, gid)) ||
+ (!IA_EQUAL(stbuf, replies[i].poststat, prot)))
+ return -1;
+ }
+ for (i = 1; i < priv->child_count; i++) {
+ if (!afr_xattrs_are_equal(replies[0].xdata, replies[i].xdata))
+ return -1;
+ }
+
+mark:
+ /* data/metadata is same on all bricks. Pick one of them as source. Rest
+ * are sinks.*/
+ for (i = 0; i < priv->child_count; i++) {
+ if (source == -1) {
+ source = i;
+ sources[i] = 1;
+ sinks[i] = 0;
+ healed_sinks[i] = 0;
+ continue;
+ }
+ sources[i] = 0;
+ sinks[i] = 1;
+ healed_sinks[i] = 1;
+ }
+
+ return source;
+}
+
+/* Return a source depending on the type of heal_op, and set sources[source],
+ * sinks[source] and healed_sinks[source] to 1, 0 and 0 respectively. Do so
+ * only if the following condition is met:
+ * ∀i((i ∈ locked_on[] ∧ i=1)==>(sources[i]=0 ∧ sinks[i]=1 ∧ healed_sinks[i]=1))
+ * i.e. for each locked node, sources[node] is 0; healed_sinks[node] and
+ * sinks[node] are 1. This should be the case if the file is in split-brain.
+ */
+int
+afr_mark_split_brain_source_sinks(
+ call_frame_t *frame, xlator_t *this, inode_t *inode, unsigned char *sources,
+ unsigned char *sinks, unsigned char *healed_sinks, unsigned char *locked_on,
+ struct afr_reply *replies, afr_transaction_type type)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ dict_t *xdata_req = NULL;
+ int heal_op = -1;
+ int ret = -1;
+ int source = -1;
+
+ local = frame->local;
+ priv = this->private;
+ xdata_req = local->xdata_req;
+
+ source = afr_mark_source_sinks_if_file_empty(
+ this, sources, sinks, healed_sinks, locked_on, replies, type);
+ if (source >= 0)
+ return source;
+
+ ret = dict_get_int32_sizen(xdata_req, "heal-op", &heal_op);
+ if (ret)
+ goto autoheal;
+
+ source = afr_mark_split_brain_source_sinks_by_heal_op(
+ frame, this, sources, sinks, healed_sinks, locked_on, replies, type,
+ heal_op);
+ return source;
+
+autoheal:
+ /* Automatically heal if fav_child_policy is set. */
+ if (priv->fav_child_policy != AFR_FAV_CHILD_NONE) {
+ source = afr_mark_split_brain_source_sinks_by_policy(
+ frame, this, inode, sources, sinks, healed_sinks, locked_on,
+ replies, type);
+ if (source != -1) {
+ ret = dict_set_int32_sizen(xdata_req, "fav-child-policy", 1);
+ if (ret)
+ return -1;
+ }
+ }
+
+ return source;
+}
- if (ret != 0)
- return 0;
+int
+_afr_fav_child_reset_sink_xattrs(call_frame_t *frame, xlator_t *this,
+ inode_t *inode, int source,
+ unsigned char *healed_sinks,
+ unsigned char *undid_pending,
+ afr_transaction_type type,
+ unsigned char *locked_on,
+ struct afr_reply *replies)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int *input_dirty = NULL;
+ int **input_matrix = NULL;
+ int *output_dirty = NULL;
+ int **output_matrix = NULL;
+ dict_t *xattr = NULL;
+ dict_t *xdata = NULL;
+ int i = 0;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (!dict_get_sizen(local->xdata_req, "fav-child-policy"))
+ return 0;
- memcpy (pending, pending_raw, sizeof(pending));
- j = afr_index_for_transaction_type (AFR_ENTRY_TRANSACTION);
+ xdata = dict_new();
+ if (!xdata)
+ return -1;
- if (pending[j])
- return 1;
- }
+ input_dirty = alloca0(priv->child_count * sizeof(int));
+ input_matrix = ALLOC_MATRIX(priv->child_count, int);
+ output_dirty = alloca0(priv->child_count * sizeof(int));
+ output_matrix = ALLOC_MATRIX(priv->child_count, int);
- return 0;
-}
+ afr_selfheal_extract_xattr(this, replies, type, input_dirty, input_matrix);
-int
-afr_sh_missing_entries_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
+ for (i = 0; i < priv->child_count; i++) {
+ if (i == source || !healed_sinks[i])
+ continue;
+ output_dirty[i] = -input_dirty[i];
+ output_matrix[i][source] = -input_matrix[i][source];
+ }
- local = frame->local;
- sh = &local->self_heal;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!healed_sinks[i] || !locked_on[i])
+ continue;
+ xattr = afr_selfheal_output_xattr(this, _gf_false, type, output_dirty,
+ output_matrix, i, NULL);
- afr_sh_reset (frame, this);
+ afr_selfheal_post_op(frame, this, inode, i, xattr, xdata);
- if (local->govinda_gOvinda) {
- gf_log (this->name, GF_LOG_DEBUG,
- "split brain found, aborting selfheal of %s",
- local->loc.path);
- sh->op_failed = 1;
- }
+ undid_pending[i] = 1;
+ dict_unref(xattr);
+ }
- if (sh->op_failed) {
- sh->completion_cbk (frame, this);
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "proceeding to metadata check on %s",
- local->loc.path);
- afr_self_heal_metadata (frame, this);
- }
+ if (xdata)
+ dict_unref(xdata);
- return 0;
+ return 0;
}
-
-static int
-afr_sh_missing_entries_finish (call_frame_t *frame, xlator_t *this)
+gf_boolean_t
+afr_does_witness_exist(xlator_t *this, uint64_t *witness)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
+ int i = 0;
+ afr_private_t *priv = NULL;
- int_lock->lock_cbk = afr_sh_missing_entries_done;
- afr_unlock (frame, this);
+ priv = this->private;
- return 0;
+ for (i = 0; i < priv->child_count; i++) {
+ if (witness[i])
+ return _gf_true;
+ }
+ return _gf_false;
}
-int
-afr_sh_common_create (afr_self_heal_t *sh, unsigned int child_count)
+unsigned int
+afr_get_quorum_count(afr_private_t *priv)
{
- int ret = -ENOMEM;
- sh->buf = GF_CALLOC (child_count, sizeof (*sh->buf),
- gf_afr_mt_iatt);
- if (!sh->buf)
- goto out;
- sh->parentbufs = GF_CALLOC (child_count, sizeof (*sh->parentbufs),
- gf_afr_mt_iatt);
- if (!sh->parentbufs)
- goto out;
- sh->child_errno = GF_CALLOC (child_count, sizeof (*sh->child_errno),
- gf_afr_mt_int);
- if (!sh->child_errno)
- goto out;
- sh->success_children = afr_children_create (child_count);
- if (!sh->success_children)
- goto out;
- sh->fresh_children = afr_children_create (child_count);
- if (!sh->fresh_children)
- goto out;
- sh->xattr = GF_CALLOC (child_count, sizeof (*sh->xattr),
- gf_afr_mt_dict_t);
- if (!sh->xattr)
- goto out;
- ret = 0;
-out:
- return ret;
+ if (priv->quorum_count == AFR_QUORUM_AUTO) {
+ return priv->child_count / 2 + 1;
+ } else {
+ return priv->quorum_count;
+ }
}
void
-afr_sh_common_lookup_resp_handler (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf,
- dict_t *xattr, struct iatt *postparent,
- loc_t *loc)
-{
- int child_index = 0;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- priv = this->private;
- sh = &local->self_heal;
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == 0) {
- sh->buf[child_index] = *buf;
- sh->parentbufs[child_index] = *postparent;
- sh->success_children[sh->success_count] = child_index;
- sh->success_count++;
- sh->xattr[child_index] = dict_ref (xattr);
- } else {
- gf_log (this->name, GF_LOG_DEBUG, "path %s on subvolume"
- " %s => -1 (%s)", loc->path,
- priv->children[child_index]->name,
- strerror (op_errno));
- local->self_heal.child_errno[child_index] = op_errno;
- }
- }
- UNLOCK (&frame->lock);
+afr_selfheal_post_op_failure_accounting(afr_private_t *priv, char *accused,
+ unsigned char *sources,
+ unsigned char *locked_on)
+{
+ int i = 0;
+ unsigned int quorum_count = 0;
+
+ if (AFR_COUNT(sources, priv->child_count) != 0)
return;
-}
-gf_boolean_t
-afr_valid_ia_type (ia_type_t ia_type)
-{
- switch (ia_type) {
- case IA_IFSOCK:
- case IA_IFREG:
- case IA_IFBLK:
- case IA_IFCHR:
- case IA_IFIFO:
- case IA_IFLNK:
- case IA_IFDIR:
- return _gf_true;
- default:
- return _gf_false;
+ quorum_count = afr_get_quorum_count(priv);
+ for (i = 0; i < priv->child_count; i++) {
+ if ((accused[i] < quorum_count) && locked_on[i]) {
+ sources[i] = 1;
}
- return _gf_false;
+ }
+ return;
}
-int
-afr_impunge_frame_create (call_frame_t *frame, xlator_t *this,
- int active_source, call_frame_t **impunge_frame)
-{
- afr_local_t *local = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- int32_t op_errno = 0;
- afr_private_t *priv = NULL;
- int ret = 0;
- call_frame_t *new_frame = NULL;
+/*
+ * This function determines if a self-heal is required for a given inode,
+ * and if needed, in what direction.
+ *
+ * locked_on[] is the array representing servers which have been locked and
+ * from which xattrs have been fetched for analysis.
+ *
+ * The output of the function is by filling the arrays sources[] and sinks[].
+ *
+ * sources[i] is set if i'th server is an eligible source for a selfheal.
+ *
+ * sinks[i] is set if i'th server needs to be healed.
+ *
+ * if sources[0..N] are all set, there is no need for a selfheal.
+ *
+ * if sinks[0..N] are all set, the inode is in split brain.
+ *
+ */
- op_errno = ENOMEM;
- priv = this->private;
- new_frame = copy_frame (frame);
- if (!new_frame) {
- goto out;
+int
+afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this,
+ struct afr_reply *replies,
+ afr_transaction_type type, unsigned char *locked_on,
+ unsigned char *sources, unsigned char *sinks,
+ uint64_t *witness, unsigned char *pflag)
+{
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int j = 0;
+ int *dirty = NULL; /* Denotes if dirty xattr is set */
+ int **matrix = NULL; /* Changelog matrix */
+ char *accused = NULL; /* Accused others without any self-accusal */
+ char *pending = NULL; /* Have pending operations on others */
+ char *self_accused = NULL; /* Accused itself */
+
+ priv = this->private;
+
+ dirty = alloca0(priv->child_count * sizeof(int));
+ accused = alloca0(priv->child_count);
+ pending = alloca0(priv->child_count);
+ self_accused = alloca0(priv->child_count);
+ matrix = ALLOC_MATRIX(priv->child_count, int);
+ memset(witness, 0, sizeof(*witness) * priv->child_count);
+
+ /* First construct the pending matrix for further analysis */
+ afr_selfheal_extract_xattr(this, replies, type, dirty, matrix);
+
+ if (pflag) {
+ for (i = 0; i < priv->child_count; i++) {
+ for (j = 0; j < priv->child_count; j++)
+ if (matrix[i][j])
+ *pflag |= PFLAG_PENDING;
+ if (*pflag)
+ break;
}
+ }
+
+ if (afr_success_count(replies, priv->child_count) < priv->child_count) {
+ /* Treat this just like locks not being acquired */
+ return -ENOTCONN;
+ }
+
+ /* short list all self-accused */
+ for (i = 0; i < priv->child_count; i++) {
+ if (matrix[i][i])
+ self_accused[i] = 1;
+ }
+
+ /* Next short list all accused to exclude them from being sources */
+ /* Self-accused can't accuse others as they are FOOLs */
+ for (i = 0; i < priv->child_count; i++) {
+ for (j = 0; j < priv->child_count; j++) {
+ if (matrix[i][j]) {
+ if (!self_accused[i])
+ accused[j] += 1;
+ if (i != j)
+ pending[i] += 1;
+ }
+ }
+ }
+
+ /* Short list all non-accused as sources */
+ for (i = 0; i < priv->child_count; i++) {
+ if (!accused[i] && locked_on[i])
+ sources[i] = 1;
+ else
+ sources[i] = 0;
+ }
+
+ /* Everyone accused by non-self-accused sources are sinks */
+ memset(sinks, 0, priv->child_count);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+ if (self_accused[i])
+ continue;
+ for (j = 0; j < priv->child_count; j++) {
+ if (matrix[i][j])
+ sinks[j] = 1;
+ }
+ }
+
+ /* For breaking ties provide with number of fops they witnessed */
+
+ /*
+ * count the pending fops witnessed from itself to others when it is
+ * self-accused
+ */
+ for (i = 0; i < priv->child_count; i++) {
+ if (!self_accused[i])
+ continue;
+ for (j = 0; j < priv->child_count; j++) {
+ if (i == j)
+ continue;
+ witness[i] += matrix[i][j];
+ }
+ }
+
+ if (type == AFR_DATA_TRANSACTION || type == AFR_METADATA_TRANSACTION)
+ afr_selfheal_post_op_failure_accounting(priv, accused, sources,
+ locked_on);
+
+ /* If no sources, all locked nodes are sinks - split brain */
+ if (AFR_COUNT(sources, priv->child_count) == 0) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (locked_on[i])
+ sinks[i] = 1;
+ }
+ if (pflag)
+ *pflag |= PFLAG_SBRAIN;
+ }
+
+ /* One more class of witness similar to dirty in v2 is where no pending
+ * exists but we have self-accusing markers. This can happen in afr-v1
+ * if the brick crashes just after doing xattrop on self but
+ * before xattrop on the other xattrs on the brick in pre-op. */
+ if (AFR_COUNT(pending, priv->child_count) == 0) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (self_accused[i])
+ witness[i] += matrix[i][i];
+ }
+ } else {
+ /* In afr-v1 if a file is self-accused and has pending
+ * operations on others then it is similar to 'dirty' in afr-v2.
+ * Consider such cases as witness.
+ */
+ for (i = 0; i < priv->child_count; i++) {
+ if (self_accused[i] && pending[i])
+ witness[i] += matrix[i][i];
+ }
+ }
- AFR_LOCAL_ALLOC_OR_GOTO (impunge_local, out);
-
- local = frame->local;
- new_frame->local = impunge_local;
- impunge_sh = &impunge_local->self_heal;
- impunge_sh->sh_frame = frame;
- impunge_sh->active_source = active_source;
- impunge_local->child_up = memdup (local->child_up,
- sizeof (*local->child_up) *
- priv->child_count);
- if (!impunge_local->child_up)
- goto out;
-
- impunge_local->pending = afr_matrix_create (priv->child_count,
- AFR_NUM_CHANGE_LOGS);
- if (!impunge_local->pending)
- goto out;
+ /* count the number of dirty fops witnessed */
+ for (i = 0; i < priv->child_count; i++)
+ witness[i] += dirty[i];
- ret = afr_sh_common_create (impunge_sh, priv->child_count);
- if (ret) {
- op_errno = -ret;
- goto out;
- }
- op_errno = 0;
- *impunge_frame = new_frame;
-out:
- if (op_errno && new_frame)
- AFR_STACK_DESTROY (new_frame);
- return -op_errno;
+ return 0;
}
void
-afr_sh_missing_entry_call_impunge_recreate (call_frame_t *frame, xlator_t *this,
- struct iatt *buf,
- struct iatt *postparent,
- afr_impunge_done_cbk_t impunge_done)
-{
- call_frame_t *impunge_frame = NULL;
- afr_local_t *local = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- int ret = 0;
- unsigned int enoent_count = 0;
- afr_private_t *priv = NULL;
- int i = 0;
- int32_t op_errno = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- enoent_count = afr_errno_count (NULL, sh->child_errno,
- priv->child_count, ENOENT);
- if (!enoent_count) {
- gf_log (this->name, GF_LOG_INFO,
- "no missing files - %s. proceeding to metadata check",
- local->loc.path);
- goto out;
- }
- sh->impunge_done = impunge_done;
- ret = afr_impunge_frame_create (frame, this, sh->source, &impunge_frame);
- if (ret)
- goto out;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- loc_copy (&impunge_local->loc, &local->loc);
- ret = afr_build_parent_loc (&impunge_sh->parent_loc,
- &impunge_local->loc, &op_errno);
- if (ret) {
- ret = -op_errno;
- goto out;
- }
- impunge_local->call_count = enoent_count;
- impunge_sh->entrybuf = sh->buf[sh->source];
- impunge_sh->parentbuf = sh->parentbufs[sh->source];
- for (i = 0; i < priv->child_count; i++) {
- if (!impunge_local->child_up[i]) {
- impunge_sh->child_errno[i] = ENOTCONN;
- continue;
- }
- if (sh->child_errno[i] != ENOENT) {
- impunge_sh->child_errno[i] = EEXIST;
- continue;
- }
- }
- for (i = 0; i < priv->child_count; i++) {
- if (sh->child_errno[i] != ENOENT)
- continue;
- afr_sh_entry_impunge_create (impunge_frame, this, i);
- enoent_count--;
- }
- GF_ASSERT (!enoent_count);
- return;
-out:
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "impunge of %s failed, "
- "reason: %s", local->loc.path, strerror (-ret));
- sh->op_failed = 1;
- }
- afr_sh_missing_entries_finish (frame, this);
+afr_log_selfheal(uuid_t gfid, xlator_t *this, int ret, char *type, int source,
+ unsigned char *sources, unsigned char *healed_sinks)
+{
+ char *status = NULL;
+ char *sinks_str = NULL;
+ char *p = NULL;
+ char *sources_str = NULL;
+ char *q = NULL;
+ afr_private_t *priv = NULL;
+ gf_loglevel_t loglevel = GF_LOG_NONE;
+ int i = 0;
+
+ priv = this->private;
+ sinks_str = alloca0(priv->child_count * 8);
+ p = sinks_str;
+ sources_str = alloca0(priv->child_count * 8);
+ q = sources_str;
+ for (i = 0; i < priv->child_count; i++) {
+ if (healed_sinks[i])
+ p += sprintf(p, "%d ", i);
+ if (sources[i]) {
+ if (source == i) {
+ q += sprintf(q, "[%d] ", i);
+ } else {
+ q += sprintf(q, "%d ", i);
+ }
+ }
+ }
+
+ if (ret < 0) {
+ status = "Failed";
+ loglevel = GF_LOG_DEBUG;
+ } else {
+ status = "Completed";
+ loglevel = GF_LOG_INFO;
+ }
+
+ gf_msg(this->name, loglevel, 0, AFR_MSG_SELF_HEAL_INFO,
+ "%s %s selfheal on %s. "
+ "sources=%s sinks=%s",
+ status, type, uuid_utoa(gfid), sources_str, sinks_str);
}
int
-afr_sh_create_entry_cbk (call_frame_t *frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+afr_selfheal_discover_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *parbuf)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
+ afr_local_t *local = NULL;
+ int i = -1;
+ GF_UNUSED int ret = -1;
+ int8_t need_heal = 1;
- local = frame->local;
- sh = &local->self_heal;
- if (op_ret < 0)
- sh->op_failed = 1;
- afr_sh_missing_entries_finish (frame, this);
- return 0;
-}
+ local = frame->local;
+ i = (long)cookie;
-static int
-sh_missing_entries_create (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int type = 0;
- struct iatt *buf = NULL;
- struct iatt *postparent = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
-
- buf = &sh->buf[sh->source];
- postparent = &sh->parentbufs[sh->source];
-
- type = buf->ia_type;
- if (!afr_valid_ia_type (type)) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: unknown file type: 0%o", local->loc.path, type);
- local->govinda_gOvinda = 1;
- afr_sh_missing_entries_finish (frame, this);
- goto out;
- }
+ local->replies[i].valid = 1;
+ local->replies[i].op_ret = op_ret;
+ local->replies[i].op_errno = op_errno;
+ if (buf)
+ local->replies[i].poststat = *buf;
+ if (parbuf)
+ local->replies[i].postparent = *parbuf;
+ if (xdata) {
+ local->replies[i].xdata = dict_ref(xdata);
+ ret = dict_get_int8(xdata, "link-count", &need_heal);
+ }
- afr_sh_missing_entry_call_impunge_recreate (frame, this,
- buf, postparent,
- afr_sh_create_entry_cbk);
-out:
- return 0;
+ local->replies[i].need_heal = need_heal;
+ syncbarrier_wake(&local->barrier);
+
+ return 0;
}
-void
-afr_sh_missing_entries_lookup_done (call_frame_t *frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- ia_type_t ia_type = IA_INVAL;
- int32_t nsources = 0;
- loc_t *loc = NULL;
- int32_t subvol_status = 0;
- afr_transaction_type txn_type = AFR_DATA_TRANSACTION;
- gf_boolean_t split_brain = _gf_false;
- int read_child = -1;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
- loc = &local->loc;
-
- if (op_ret < 0) {
- if (op_errno == EIO)
- local->govinda_gOvinda = 1;
- // EIO can happen if finding the fresh parent dir failed
- goto out;
- }
+inode_t *
+afr_selfheal_unlocked_lookup_on(call_frame_t *frame, inode_t *parent,
+ const char *name, struct afr_reply *replies,
+ unsigned char *lookup_on, dict_t *xattr)
+{
+ loc_t loc = {
+ 0,
+ };
+ dict_t *xattr_req = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ inode_t *inode = NULL;
- //now No chance for the ia_type to conflict
- ia_type = sh->buf[sh->success_children[0]].ia_type;
- txn_type = afr_transaction_type_get (ia_type);
- nsources = afr_build_sources (this, sh->xattr, sh->buf,
- sh->pending_matrix, sh->sources,
- sh->success_children, txn_type,
- &subvol_status, _gf_false);
- if (nsources < 0) {
- gf_log (this->name, GF_LOG_INFO, "No sources for dir of %s,"
- " in missing entry self-heal, continuing with the rest"
- " of the self-heals", local->loc.path);
- if (subvol_status & SPLIT_BRAIN) {
- split_brain = _gf_true;
- switch (txn_type) {
- case AFR_DATA_TRANSACTION:
- nsources = 1;
- sh->sources[sh->success_children[0]] = 1;
- break;
- case AFR_ENTRY_TRANSACTION:
- read_child = afr_get_no_xattr_dir_read_child
- (this,
- sh->success_children,
- sh->buf);
- sh->sources[read_child] = 1;
- nsources = 1;
- break;
- default:
- op_errno = EIO;
- goto out;
- }
- } else {
- op_errno = EIO;
- goto out;
- }
- }
+ local = frame->local;
+ priv = frame->this->private;
- afr_get_fresh_children (sh->success_children, sh->sources,
- sh->fresh_children, priv->child_count);
- sh->source = sh->fresh_children[0];
- if (sh->source == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "No active sources found.");
- op_errno = EIO;
- goto out;
- }
+ xattr_req = dict_new();
+ if (!xattr_req)
+ return NULL;
- if (sh->gfid_sh_success_cbk)
- sh->gfid_sh_success_cbk (frame, this);
- sh->type = sh->buf[sh->source].ia_type;
- if (uuid_is_null (loc->inode->gfid))
- uuid_copy (loc->gfid, sh->buf[sh->source].ia_gfid);
- if (split_brain) {
- afr_sh_missing_entries_finish (frame, this);
- } else {
- sh_missing_entries_create (frame, this);
- }
- return;
-out:
- sh->op_failed = 1;
- afr_sh_set_error (sh, op_errno);
- afr_sh_missing_entries_finish (frame, this);
- return;
+ if (xattr)
+ dict_copy(xattr, xattr_req);
+
+ if (afr_xattr_req_prepare(frame->this, xattr_req) != 0) {
+ dict_unref(xattr_req);
+ return NULL;
+ }
+
+ inode = inode_new(parent->table);
+ if (!inode) {
+ dict_unref(xattr_req);
+ return NULL;
+ }
+
+ loc.parent = inode_ref(parent);
+ gf_uuid_copy(loc.pargfid, parent->gfid);
+ loc.name = name;
+ loc.inode = inode_ref(inode);
+
+ AFR_ONLIST(lookup_on, frame, afr_selfheal_discover_cbk, lookup, &loc,
+ xattr_req);
+
+ afr_replies_copy(replies, local->replies, priv->child_count);
+
+ loc_wipe(&loc);
+ dict_unref(xattr_req);
+
+ return inode;
}
static int
-afr_sh_common_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *xattr,
- struct iatt *postparent)
-{
- int call_count = 0;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- afr_sh_common_lookup_resp_handler (frame, cookie, this, op_ret,
- op_errno, inode, buf, xattr,
- postparent, &sh->lookup_loc);
- call_count = afr_frame_return (frame);
-
- if (call_count)
- goto out;
- op_ret = -1;
- if (!sh->success_count) {
- op_errno = afr_resultant_errno_get (NULL, sh->child_errno,
- priv->child_count);
- gf_log (this->name, GF_LOG_ERROR, "Failed to lookup %s, "
- "reason %s", sh->lookup_loc.path,
- strerror (op_errno));
- goto done;
- }
+afr_set_multi_dom_lock_count_request(xlator_t *this, dict_t *dict)
+{
+ int ret = 0;
+ afr_private_t *priv = NULL;
+ char *key1 = NULL;
+ char *key2 = NULL;
- if ((sh->lookup_flags & AFR_LOOKUP_FAIL_CONFLICTS) &&
- (afr_conflicting_iattrs (sh->buf, sh->success_children,
- priv->child_count,
- sh->lookup_loc.path, this->name))) {
- op_errno = EIO;
- gf_log (this->name, GF_LOG_ERROR, "Conflicting entries "
- "for %s", sh->lookup_loc.path);
- goto done;
- }
+ priv = this->private;
+ key1 = alloca0(strlen(GLUSTERFS_INODELK_DOM_PREFIX) + 2 +
+ strlen(this->name));
+ key2 = alloca0(strlen(GLUSTERFS_INODELK_DOM_PREFIX) + 2 +
+ strlen(priv->sh_domain));
- if ((sh->lookup_flags & AFR_LOOKUP_FAIL_MISSING_GFIDS) &&
- (afr_gfid_missing_count (this->name, sh->success_children,
- sh->buf, priv->child_count,
- sh->lookup_loc.path))) {
- op_errno = ENODATA;
- gf_log (this->name, GF_LOG_ERROR, "Missing Gfids "
- "for %s", sh->lookup_loc.path);
- goto done;
- }
- op_ret = 0;
+ ret = dict_set_uint32(dict, GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS, 1);
+ if (ret)
+ return ret;
-done:
- sh->lookup_done (frame, this, op_ret, op_errno);
-out:
- return 0;
+ sprintf(key1, "%s:%s", GLUSTERFS_INODELK_DOM_PREFIX, this->name);
+ ret = dict_set_uint32(dict, key1, 1);
+ if (ret)
+ return ret;
+
+ sprintf(key2, "%s:%s", GLUSTERFS_INODELK_DOM_PREFIX, priv->sh_domain);
+ ret = dict_set_uint32(dict, key2, 1);
+ if (ret)
+ return ret;
+
+ return 0;
}
int
-afr_sh_remove_entry_cbk (call_frame_t *frame, xlator_t *this, int child,
- int32_t op_ret, int32_t op_errno)
-{
- int call_count = 0;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
-
- GF_ASSERT (sh->post_remove_call);
- if ((op_ret == -1) && (op_errno != ENOENT)) {
- gf_log (this->name, GF_LOG_ERROR,
- "purge entry %s failed, on child %d reason, %s",
- local->loc.path, child, strerror (op_errno));
- LOCK (&frame->lock);
- {
- afr_sh_set_error (sh, EIO);
- sh->op_failed = 1;
- }
- UNLOCK (&frame->lock);
- }
- call_count = afr_frame_return (frame);
- if (call_count == 0)
- sh->post_remove_call (frame, this);
- return 0;
-}
+afr_selfheal_unlocked_discover_on(call_frame_t *frame, inode_t *inode,
+ uuid_t gfid, struct afr_reply *replies,
+ unsigned char *discover_on, dict_t *dict)
+{
+ loc_t loc = {
+ 0,
+ };
+ dict_t *xattr_req = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = frame->this->private;
+
+ xattr_req = dict_new();
+ if (!xattr_req)
+ return -ENOMEM;
+ if (dict)
+ dict_copy(dict, xattr_req);
+
+ if (afr_xattr_req_prepare(frame->this, xattr_req) != 0) {
+ dict_unref(xattr_req);
+ return -ENOMEM;
+ }
+
+ if (afr_set_multi_dom_lock_count_request(frame->this, xattr_req)) {
+ dict_unref(xattr_req);
+ return -1;
+ }
-void
-afr_sh_call_entry_expunge_remove (call_frame_t *frame, xlator_t *this,
- int child_index, struct iatt *buf,
- struct iatt *parentbuf,
- afr_expunge_done_cbk_t expunge_done)
-{
- call_frame_t *expunge_frame = NULL;
- afr_local_t *local = NULL;
- afr_local_t *expunge_local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_self_heal_t *expunge_sh = NULL;
- int32_t op_errno = 0;
- int ret = 0;
-
- expunge_frame = copy_frame (frame);
- if (!expunge_frame) {
- goto out;
- }
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, gfid);
- AFR_LOCAL_ALLOC_OR_GOTO (expunge_local, out);
+ AFR_ONLIST(discover_on, frame, afr_selfheal_discover_cbk, lookup, &loc,
+ xattr_req);
- local = frame->local;
- sh = &local->self_heal;
- expunge_frame->local = expunge_local;
- expunge_sh = &expunge_local->self_heal;
- expunge_sh->sh_frame = frame;
- loc_copy (&expunge_local->loc, &local->loc);
- ret = afr_build_parent_loc (&expunge_sh->parent_loc,
- &expunge_local->loc, &op_errno);
- if (ret) {
- ret = -op_errno;
- goto out;
- }
- sh->expunge_done = expunge_done;
- afr_sh_entry_expunge_remove (expunge_frame, this, child_index, buf,
- parentbuf);
- return;
-out:
- gf_log (this->name, GF_LOG_ERROR, "Expunge of %s failed, reason: %s",
- local->loc.path, strerror (op_errno));
- expunge_done (frame, this, child_index, -1, op_errno);
-}
+ afr_replies_copy(replies, local->replies, priv->child_count);
-void
-afr_sh_remove_stale_lookup_info (afr_self_heal_t *sh, int32_t *success_children,
- int32_t *fresh_children,
- unsigned int child_count)
-{
- int i = 0;
-
- for (i = 0; i < child_count; i++) {
- if (afr_is_child_present (success_children, child_count, i) &&
- !afr_is_child_present (fresh_children, child_count, i)) {
- sh->child_errno[i] = ENOENT;
- GF_ASSERT (sh->xattr[i]);
- dict_unref (sh->xattr[i]);
- sh->xattr[i] = NULL;
- }
- }
+ loc_wipe(&loc);
+ dict_unref(xattr_req);
+
+ return 0;
}
int
-afr_sh_purge_stale_entries_done (call_frame_t *frame, xlator_t *this)
+afr_selfheal_unlocked_discover(call_frame_t *frame, inode_t *inode, uuid_t gfid,
+ struct afr_reply *replies)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ dict_t *dict = NULL;
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ local = frame->local;
- if (sh->op_failed) {
- afr_sh_missing_entries_finish (frame, this);
- } else {
- if (afr_gfid_missing_count (this->name, sh->fresh_children,
- sh->buf, priv->child_count,
- local->loc.path)) {
- afr_sh_common_lookup (frame, this, &local->loc,
- afr_sh_missing_entries_lookup_done,
- sh->sh_gfid_req,
- AFR_LOOKUP_FAIL_CONFLICTS|
- AFR_LOOKUP_FAIL_MISSING_GFIDS,
- NULL);
- } else {
- //No need to set gfid so goto missing entries lookup done
- //Behave as if you have done the lookup
- afr_sh_remove_stale_lookup_info (sh,
- sh->success_children,
- sh->fresh_children,
- priv->child_count);
- afr_children_copy (sh->success_children,
- sh->fresh_children,
- priv->child_count);
- afr_sh_missing_entries_lookup_done (frame, this, 0, 0);
- }
- }
- return 0;
+ if (local->xattr_req)
+ dict = local->xattr_req;
+
+ return afr_selfheal_unlocked_discover_on(frame, inode, gfid, replies,
+ local->child_up, dict);
}
-gf_boolean_t
-afr_sh_purge_entry_condition (afr_local_t *local, afr_private_t *priv,
- int child)
+unsigned int
+afr_success_count(struct afr_reply *replies, unsigned int count)
{
- afr_self_heal_t *sh = NULL;
-
- sh = &local->self_heal;
+ int i = 0;
+ unsigned int success = 0;
- if (local->child_up[child] &&
- (!afr_is_child_present (sh->fresh_parent_dirs, priv->child_count,
- child))
- && (sh->child_errno[child] != ENOENT))
- return _gf_true;
-
- return _gf_false;
+ for (i = 0; i < count; i++)
+ if (replies[i].valid && replies[i].op_ret == 0)
+ success++;
+ return success;
}
-gf_boolean_t
-afr_sh_purge_stale_entry_condition (afr_local_t *local, afr_private_t *priv,
- int child)
+int
+afr_selfheal_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
{
- afr_self_heal_t *sh = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
- sh = &local->self_heal;
+ local = frame->local;
+ i = (long)cookie;
- if (local->child_up[child] &&
- (!afr_is_child_present (sh->fresh_children, priv->child_count,
- child))
- && (sh->child_errno[child] != ENOENT))
- return _gf_true;
+ local->replies[i].valid = 1;
+ local->replies[i].op_ret = op_ret;
+ local->replies[i].op_errno = op_errno;
- return _gf_false;
+ syncbarrier_wake(&local->barrier);
+
+ return 0;
}
-void
-afr_sh_purge_entry_common (call_frame_t *frame, xlator_t *this,
- gf_boolean_t purge_condition (afr_local_t *local,
- afr_private_t *priv,
- int child))
+int
+afr_locked_fill(call_frame_t *frame, xlator_t *this, unsigned char *locked_on)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- afr_self_heal_t *sh = NULL;
- int i = 0;
- int call_count = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ int i = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int count = 0;
- for (i = 0; i < priv->child_count; i++) {
- if (purge_condition (local, priv, i))
- call_count++;
- }
+ local = frame->local;
+ priv = this->private;
- if (call_count == 0) {
- sh->post_remove_call (frame, this);
- goto out;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].valid && local->replies[i].op_ret == 0) {
+ locked_on[i] = 1;
+ count++;
+ } else {
+ locked_on[i] = 0;
}
+ }
- local->call_count = call_count;
- for (i = 0; i < priv->child_count; i++) {
- if (!purge_condition (local, priv, i))
- continue;
- gf_log (this->name, GF_LOG_INFO, "purging the stale entry %s "
- "on %d", local->loc.path, i);
- afr_sh_call_entry_expunge_remove (frame, this,
- (long) i, &sh->buf[i],
- &sh->parentbufs[i],
- afr_sh_remove_entry_cbk);
- }
-out:
- return;
+ return count;
}
-void
-afr_sh_purge_entry (call_frame_t *frame, xlator_t *this)
+int
+afr_selfheal_tryinodelk(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, off_t off, size_t size,
+ unsigned char *locked_on)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct gf_flock flock = {
+ 0,
+ };
- local = frame->local;
- sh = &local->self_heal;
- sh->post_remove_call = afr_sh_missing_entries_finish;
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
- afr_sh_purge_entry_common (frame, this, afr_sh_purge_entry_condition);
+ flock.l_type = F_WRLCK;
+ flock.l_start = off;
+ flock.l_len = size;
+
+ AFR_ONALL(frame, afr_selfheal_lock_cbk, inodelk, dom, &loc, F_SETLK, &flock,
+ NULL);
+
+ loc_wipe(&loc);
+
+ return afr_locked_fill(frame, this, locked_on);
}
-void
-afr_sh_purge_stale_entry (call_frame_t *frame, xlator_t *this)
+int
+afr_selfheal_inodelk(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, off_t off, size_t size,
+ unsigned char *locked_on)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
+ loc_t loc = {
+ 0,
+ };
+ struct gf_flock flock = {
+ 0,
+ };
+ afr_local_t *local = NULL;
+ int i = 0;
+ afr_private_t *priv = NULL;
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ priv = this->private;
+ local = frame->local;
- sh->post_remove_call = afr_sh_purge_stale_entries_done;
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
- for (i = 0; i < priv->child_count; i++) {
- if (afr_is_child_present (sh->fresh_children,
- priv->child_count, i))
- continue;
+ flock.l_type = F_WRLCK;
+ flock.l_start = off;
+ flock.l_len = size;
- if ((!local->child_up[i]) || sh->child_errno[i] != 0)
- continue;
+ AFR_ONALL(frame, afr_selfheal_lock_cbk, inodelk, dom, &loc, F_SETLK, &flock,
+ NULL);
- GF_ASSERT (!uuid_is_null (sh->entrybuf.ia_gfid) ||
- uuid_is_null (sh->buf[i].ia_gfid));
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].op_ret == -1 &&
+ local->replies[i].op_errno == EAGAIN) {
+ afr_locked_fill(frame, this, locked_on);
+ afr_selfheal_uninodelk(frame, this, inode, dom, off, size,
+ locked_on);
- if ((sh->entrybuf.ia_type != sh->buf[i].ia_type) ||
- (uuid_compare (sh->buf[i].ia_gfid,
- sh->entrybuf.ia_gfid)))
- continue;
+ AFR_SEQ(frame, afr_selfheal_lock_cbk, inodelk, dom, &loc, F_SETLKW,
+ &flock, NULL);
+ break;
+ }
+ }
- afr_children_add_child (sh->fresh_children, i,
- priv->child_count);
+ loc_wipe(&loc);
- }
- afr_sh_purge_entry_common (frame, this,
- afr_sh_purge_stale_entry_condition);
+ return afr_locked_fill(frame, this, locked_on);
}
-void
-afr_sh_save_child_iatts_from_policy (int32_t *children, struct iatt *bufs,
- struct iatt *save,
- unsigned int child_count)
-{
- int i = 0;
- int child = 0;
- gf_boolean_t saved = _gf_false;
-
- GF_ASSERT (save);
- //if iatt buf with gfid exists sets it
- for (i = 0; i < child_count; i++) {
- child = children[i];
- if (child == -1)
- break;
- *save = bufs[child];
- saved = _gf_true;
- if (!uuid_is_null (save->ia_gfid))
- break;
+static void
+afr_get_lock_and_eagain_counts(afr_private_t *priv, struct afr_reply *replies,
+ int *lock_count, int *eagain_count)
+{
+ int i = 0;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+ if (replies[i].op_ret == 0) {
+ (*lock_count)++;
+ } else if (replies[i].op_ret == -1 && replies[i].op_errno == EAGAIN) {
+ (*eagain_count)++;
}
- GF_ASSERT (saved);
+ }
}
-void
-afr_get_children_of_fresh_parent_dirs (afr_self_heal_t *sh,
- unsigned int child_count)
+/*Do blocking locks if number of locks acquired is majority and there were some
+ * EAGAINs. Useful for odd-way replication*/
+int
+afr_selfheal_tie_breaker_inodelk(call_frame_t *frame, xlator_t *this,
+ inode_t *inode, char *dom, off_t off,
+ size_t size, unsigned char *locked_on)
{
- afr_children_intersection_get (sh->success_children,
- sh->fresh_parent_dirs,
- sh->sources, child_count);
- afr_get_fresh_children (sh->success_children, sh->sources,
- sh->fresh_children, child_count);
- memset (sh->sources, 0, sizeof (*sh->sources) * child_count);
-}
+ loc_t loc = {
+ 0,
+ };
+ struct gf_flock flock = {
+ 0,
+ };
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int lock_count = 0;
+ int eagain_count = 0;
-void
-afr_sh_children_lookup_done (call_frame_t *frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int32_t fresh_child_enoents = 0;
- int32_t fresh_parent_count = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- if (op_ret < 0)
- goto fail;
- afr_get_children_of_fresh_parent_dirs (sh, priv->child_count);
- fresh_parent_count = afr_get_children_count (sh->fresh_parent_dirs,
- priv->child_count);
- //we need the enoent count of the subvols present in fresh_parent_dirs
- fresh_child_enoents = afr_errno_count (sh->fresh_parent_dirs,
- sh->child_errno,
- priv->child_count, ENOENT);
- if (fresh_child_enoents == fresh_parent_count) {
- gf_log (this->name, GF_LOG_INFO, "Deleting stale file %s",
- local->loc.path);
- afr_sh_set_error (sh, ENOENT);
- sh->op_failed = 1;
- afr_sh_purge_entry (frame, this);
- } else if (!afr_conflicting_iattrs (sh->buf, sh->fresh_children,
- priv->child_count, local->loc.path,
- this->name)) {
- afr_sh_save_child_iatts_from_policy (sh->fresh_children,
- sh->buf, &sh->entrybuf,
- priv->child_count);
- afr_update_gfid_from_iatts (sh->sh_gfid_req, sh->buf,
- sh->fresh_children,
- priv->child_count);
- afr_sh_purge_stale_entry (frame, this);
- } else {
- op_errno = EIO;
- local->govinda_gOvinda = 1;
- goto fail;
- }
+ priv = this->private;
+ local = frame->local;
- return;
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
-fail:
- sh->op_failed = 1;
- afr_sh_set_error (sh, op_errno);
- afr_sh_missing_entries_finish (frame, this);
- return;
-}
+ flock.l_type = F_WRLCK;
+ flock.l_start = off;
+ flock.l_len = size;
-static void
-afr_sh_find_fresh_parents (call_frame_t *frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int enoent_count = 0;
- int nsources = 0;
- int source = -1;
- int32_t subvol_status = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- if (op_ret < 0)
- goto out;
- enoent_count = afr_errno_count (NULL, sh->child_errno,
- priv->child_count, ENOENT);
- if (enoent_count > 0) {
- gf_log (this->name, GF_LOG_INFO, "Parent dir missing for %s,"
- " in missing entry self-heal, aborting missing-entry "
- "self-heal",
- local->loc.path);
- afr_sh_missing_entries_finish (frame, this);
- return;
- }
+ AFR_ONALL(frame, afr_selfheal_lock_cbk, inodelk, dom, &loc, F_SETLK, &flock,
+ NULL);
- nsources = afr_build_sources (this, sh->xattr, sh->buf,
- sh->pending_matrix, sh->sources,
- sh->success_children,
- AFR_ENTRY_TRANSACTION, &subvol_status,
- _gf_true);
- if ((subvol_status & ALL_FOOLS) ||
- (subvol_status & SPLIT_BRAIN)) {
- gf_log (this->name, GF_LOG_INFO, "%s: Performing conservative "
- "merge", sh->parent_loc.path);
- afr_mark_success_children_sources (sh->sources,
- sh->success_children,
- priv->child_count);
- } else if (nsources < 0) {
- gf_log (this->name, GF_LOG_ERROR, "No sources for dir "
- "of %s, in missing entry self-heal, aborting "
- "self-heal", local->loc.path);
- op_errno = EIO;
- goto out;
- }
+ afr_get_lock_and_eagain_counts(priv, local->replies, &lock_count,
+ &eagain_count);
- source = afr_sh_select_source (sh->sources, priv->child_count);
- if (source == -1) {
- GF_ASSERT (0);
- gf_log (this->name, GF_LOG_DEBUG, "No active sources found.");
- op_errno = EIO;
- goto out;
- }
- afr_get_fresh_children (sh->success_children, sh->sources,
- sh->fresh_parent_dirs, priv->child_count);
- afr_sh_common_lookup (frame, this, &local->loc,
- afr_sh_children_lookup_done, NULL, 0,
- NULL);
- return;
+ if (lock_count > priv->child_count / 2 && eagain_count) {
+ afr_locked_fill(frame, this, locked_on);
+ afr_selfheal_uninodelk(frame, this, inode, dom, off, size, locked_on);
-out:
- afr_sh_set_error (sh, op_errno);
- sh->op_failed = 1;
- afr_sh_missing_entries_finish (frame, this);
- return;
-}
+ AFR_SEQ(frame, afr_selfheal_lock_cbk, inodelk, dom, &loc, F_SETLKW,
+ &flock, NULL);
+ }
-void
-afr_sh_common_reset (afr_self_heal_t *sh, unsigned int child_count)
-{
- int i = 0;
+ loc_wipe(&loc);
- for (i = 0; i < child_count; i++) {
- memset (&sh->buf[i], 0, sizeof (sh->buf[i]));
- memset (&sh->parentbufs[i], 0, sizeof (sh->parentbufs[i]));
- sh->child_errno[i] = 0;
- }
- memset (&sh->parentbuf, 0, sizeof (sh->parentbuf));
- sh->success_count = 0;
- afr_reset_children (sh->success_children, child_count);
- afr_reset_children (sh->fresh_children, child_count);
- afr_reset_xattr (sh->xattr, child_count);
- loc_wipe (&sh->lookup_loc);
+ return afr_locked_fill(frame, this, locked_on);
}
-/* afr self-heal state will be lost if this call is made
- * please check the afr_sh_common_reset that is called in this function
- */
int
-afr_sh_common_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
- afr_lookup_done_cbk_t lookup_done , uuid_t gfid,
- int32_t flags, dict_t *xdata)
-{
- afr_local_t *local = NULL;
- int i = 0;
- int call_count = 0;
- afr_private_t *priv = NULL;
- dict_t *xattr_req = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- priv = this->private;
- sh = &local->self_heal;
-
- call_count = afr_up_children_count (local->child_up, priv->child_count);
-
- local->call_count = call_count;
-
- xattr_req = dict_new();
-
- if (xattr_req) {
- afr_xattr_req_prepare (this, xattr_req, loc->path);
- if (gfid) {
- gf_log (this->name, GF_LOG_DEBUG,
- "looking up %s with gfid: %s",
- loc->path, uuid_utoa (gfid));
- GF_ASSERT (!uuid_is_null (gfid));
- afr_set_dict_gfid (xattr_req, gfid);
- }
- }
+afr_selfheal_uninodelk(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, off_t off, size_t size,
+ const unsigned char *locked_on)
+{
+ loc_t loc = {
+ 0,
+ };
+ struct gf_flock flock = {
+ 0,
+ };
- afr_sh_common_reset (sh, priv->child_count);
- sh->lookup_done = lookup_done;
- loc_copy (&sh->lookup_loc, loc);
- sh->lookup_flags = flags;
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- gf_log (this->name, GF_LOG_DEBUG,
- "looking up %s on subvolume %s",
- loc->path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame,
- afr_sh_common_lookup_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->lookup,
- loc, xattr_req);
-
- if (!--call_count)
- break;
- }
- }
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
- if (xattr_req)
- dict_unref (xattr_req);
+ flock.l_type = F_UNLCK;
+ flock.l_start = off;
+ flock.l_len = size;
- return 0;
-}
+ AFR_ONLIST(locked_on, frame, afr_selfheal_lock_cbk, inodelk, dom, &loc,
+ F_SETLK, &flock, NULL);
+ loc_wipe(&loc);
+ return 0;
+}
int
-afr_sh_post_nb_entrylk_conflicting_sh_cbk (call_frame_t *frame, xlator_t *this)
+afr_selfheal_tryentrylk(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, const char *name, unsigned char *locked_on)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
+ loc_t loc = {
+ 0,
+ };
- local = frame->local;
- int_lock = &local->internal_lock;
- sh = &local->self_heal;
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
- if (int_lock->lock_op_ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "Non blocking entrylks failed.");
- sh->op_failed = -1;
- afr_sh_missing_entries_done (frame, this);
- } else {
+ AFR_ONALL(frame, afr_selfheal_lock_cbk, entrylk, dom, &loc, name,
+ ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL);
- gf_log (this->name, GF_LOG_DEBUG,
- "Non blocking entrylks done. Proceeding to FOP");
- afr_sh_common_lookup (frame, this, &sh->parent_loc,
- afr_sh_find_fresh_parents,
- NULL, AFR_LOOKUP_FAIL_CONFLICTS,
- NULL);
- }
+ loc_wipe(&loc);
- return 0;
+ return afr_locked_fill(frame, this, locked_on);
}
int
-afr_sh_post_nb_entrylk_gfid_sh_cbk (call_frame_t *frame, xlator_t *this)
+afr_selfheal_entrylk(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, const char *name, unsigned char *locked_on)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
+ loc_t loc = {
+ 0,
+ };
+ afr_local_t *local = NULL;
+ int i = 0;
+ afr_private_t *priv = NULL;
- local = frame->local;
- sh = &local->self_heal;
- int_lock = &local->internal_lock;
+ priv = this->private;
+ local = frame->local;
- if (int_lock->lock_op_ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "Non blocking entrylks failed.");
- afr_sh_missing_entries_done (frame, this);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "Non blocking entrylks done. Proceeding to FOP");
- afr_sh_common_lookup (frame, this, &local->loc,
- afr_sh_missing_entries_lookup_done,
- sh->sh_gfid_req, AFR_LOOKUP_FAIL_CONFLICTS|
- AFR_LOOKUP_FAIL_MISSING_GFIDS,
- NULL);
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+
+ AFR_ONALL(frame, afr_selfheal_lock_cbk, entrylk, dom, &loc, name,
+ ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].op_ret == -1 &&
+ local->replies[i].op_errno == EAGAIN) {
+ afr_locked_fill(frame, this, locked_on);
+ afr_selfheal_unentrylk(frame, this, inode, dom, name, locked_on,
+ NULL);
+
+ AFR_SEQ(frame, afr_selfheal_lock_cbk, entrylk, dom, &loc, name,
+ ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
+ break;
}
+ }
- return 0;
+ loc_wipe(&loc);
+
+ return afr_locked_fill(frame, this, locked_on);
}
int
-afr_sh_entrylk (call_frame_t *frame, xlator_t *this, loc_t *loc,
- char *base_name, afr_lock_cbk_t lock_cbk)
+afr_selfheal_tie_breaker_entrylk(call_frame_t *frame, xlator_t *this,
+ inode_t *inode, char *dom, const char *name,
+ unsigned char *locked_on)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
+ loc_t loc = {
+ 0,
+ };
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int lock_count = 0;
+ int eagain_count = 0;
- local = frame->local;
- int_lock = &local->internal_lock;
+ priv = this->private;
+ local = frame->local;
- int_lock->transaction_lk_type = AFR_SELFHEAL_LK;
- int_lock->selfheal_lk_type = AFR_ENTRY_SELF_HEAL_LK;
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
- afr_set_lock_number (frame, this);
+ AFR_ONALL(frame, afr_selfheal_lock_cbk, entrylk, dom, &loc, name,
+ ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL);
- int_lock->lk_basename = base_name;
- int_lock->lk_loc = loc;
- int_lock->lock_cbk = lock_cbk;
+ afr_get_lock_and_eagain_counts(priv, local->replies, &lock_count,
+ &eagain_count);
- afr_nonblocking_entrylk (frame, this);
+ if (lock_count > priv->child_count / 2 && eagain_count) {
+ afr_locked_fill(frame, this, locked_on);
+ afr_selfheal_unentrylk(frame, this, inode, dom, name, locked_on, NULL);
- return 0;
+ AFR_SEQ(frame, afr_selfheal_lock_cbk, entrylk, dom, &loc, name,
+ ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
+ }
+
+ loc_wipe(&loc);
+
+ return afr_locked_fill(frame, this, locked_on);
}
-static int
-afr_self_heal_parent_entrylk (call_frame_t *frame, xlator_t *this,
- afr_lock_cbk_t lock_cbk)
+int
+afr_selfheal_unentrylk(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, const char *name, unsigned char *locked_on,
+ dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_internal_lock_t *int_lock = NULL;
- int ret = -1;
- int32_t op_errno = 0;
+ loc_t loc = {
+ 0,
+ };
- local = frame->local;
- sh = &local->self_heal;
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
- gf_log (this->name, GF_LOG_TRACE,
- "attempting to recreate missing entries for path=%s",
- local->loc.path);
+ AFR_ONLIST(locked_on, frame, afr_selfheal_lock_cbk, entrylk, dom, &loc,
+ name, ENTRYLK_UNLOCK, ENTRYLK_WRLCK, xdata);
- ret = afr_build_parent_loc (&sh->parent_loc, &local->loc, &op_errno);
- if (ret)
- goto out;
+ loc_wipe(&loc);
- afr_sh_entrylk (frame, this, &sh->parent_loc, NULL,
- lock_cbk);
- return 0;
-out:
- int_lock = &local->internal_lock;
- int_lock->lock_op_ret = -1;
- lock_cbk (frame, this);
- return 0;
+ return 0;
}
-static int
-afr_self_heal_conflicting_entries (call_frame_t *frame, xlator_t *this)
+gf_boolean_t
+afr_is_data_set(xlator_t *this, dict_t *xdata)
{
- afr_self_heal_parent_entrylk (frame, this,
- afr_sh_post_nb_entrylk_conflicting_sh_cbk);
- return 0;
+ return afr_is_pending_set(this, xdata, AFR_DATA_TRANSACTION);
}
-static int
-afr_self_heal_gfids (call_frame_t *frame, xlator_t *this)
+gf_boolean_t
+afr_is_metadata_set(xlator_t *this, dict_t *xdata)
{
- afr_self_heal_parent_entrylk (frame, this,
- afr_sh_post_nb_entrylk_gfid_sh_cbk);
- return 0;
+ return afr_is_pending_set(this, xdata, AFR_METADATA_TRANSACTION);
}
-afr_local_t *afr_local_copy (afr_local_t *l, xlator_t *this)
+gf_boolean_t
+afr_is_entry_set(xlator_t *this, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *lc = NULL;
- afr_self_heal_t *sh = NULL;
- afr_self_heal_t *shc = NULL;
-
- priv = this->private;
-
- sh = &l->self_heal;
-
- lc = mem_get0 (this->local_pool);
- if (!lc)
- goto out;
-
- shc = &lc->self_heal;
-
- shc->unwind = sh->unwind;
- shc->gfid_sh_success_cbk = sh->gfid_sh_success_cbk;
- shc->do_missing_entry_self_heal = sh->do_missing_entry_self_heal;
- shc->do_gfid_self_heal = sh->do_gfid_self_heal;
- shc->do_data_self_heal = sh->do_data_self_heal;
- shc->do_metadata_self_heal = sh->do_metadata_self_heal;
- shc->do_entry_self_heal = sh->do_entry_self_heal;
- shc->forced_merge = sh->forced_merge;
- shc->background = sh->background;
- shc->type = sh->type;
-
- uuid_copy (shc->sh_gfid_req, sh->sh_gfid_req);
- if (l->loc.path)
- loc_copy (&lc->loc, &l->loc);
-
- lc->child_up = memdup (l->child_up,
- sizeof (*lc->child_up) * priv->child_count);
- if (l->xattr_req)
- lc->xattr_req = dict_ref (l->xattr_req);
-
- if (l->cont.lookup.inode)
- lc->cont.lookup.inode = inode_ref (l->cont.lookup.inode);
- if (l->cont.lookup.xattr)
- lc->cont.lookup.xattr = dict_ref (l->cont.lookup.xattr);
- if (l->internal_lock.inode_locked_nodes)
- lc->internal_lock.inode_locked_nodes =
- memdup (l->internal_lock.inode_locked_nodes,
- sizeof (*lc->internal_lock.inode_locked_nodes) * priv->child_count);
- else
- lc->internal_lock.inode_locked_nodes =
- GF_CALLOC (sizeof (*l->internal_lock.inode_locked_nodes),
- priv->child_count,
- gf_afr_mt_char);
- if (l->internal_lock.entry_locked_nodes)
- lc->internal_lock.entry_locked_nodes =
- memdup (l->internal_lock.entry_locked_nodes,
- sizeof (*lc->internal_lock.entry_locked_nodes) * priv->child_count);
- else
- lc->internal_lock.entry_locked_nodes =
- GF_CALLOC (sizeof (*l->internal_lock.entry_locked_nodes),
- priv->child_count,
- gf_afr_mt_char);
- if (l->internal_lock.locked_nodes)
- lc->internal_lock.locked_nodes =
- memdup (l->internal_lock.locked_nodes,
- sizeof (*lc->internal_lock.locked_nodes) * priv->child_count);
- else
- lc->internal_lock.locked_nodes =
- GF_CALLOC (sizeof (*l->internal_lock.locked_nodes),
- priv->child_count,
- gf_afr_mt_char);
+ return afr_is_pending_set(this, xdata, AFR_ENTRY_TRANSACTION);
+}
- lc->internal_lock.inodelk_lock_count =
- l->internal_lock.inodelk_lock_count;
- lc->internal_lock.entrylk_lock_count =
- l->internal_lock.entrylk_lock_count;
+/*
+ * This function inspects the looked up replies (in an unlocked manner)
+ * and decides whether a locked verification and possible healing is
+ * required or not. It updates the three booleans for each type
+ * of healing. If the boolean flag gets set to FALSE, then we are sure
+ * no healing is required. If the boolean flag gets set to TRUE then
+ * we have to proceed with locked reinspection.
+ */
+int
+afr_selfheal_unlocked_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid,
+ inode_t **link_inode, gf_boolean_t *data_selfheal,
+ gf_boolean_t *metadata_selfheal,
+ gf_boolean_t *entry_selfheal,
+ struct afr_reply *replies_dst)
+{
+ afr_private_t *priv = NULL;
+ inode_t *inode = NULL;
+ int i = 0;
+ int valid_cnt = 0;
+ struct iatt first = {
+ 0,
+ };
+ int first_idx = 0;
+ struct afr_reply *replies = NULL;
+ int ret = -1;
+
+ priv = this->private;
+
+ inode = afr_inode_find(this, gfid);
+ if (!inode)
+ goto out;
+
+ replies = alloca0(sizeof(*replies) * priv->child_count);
+
+ ret = afr_selfheal_unlocked_discover(frame, inode, gfid, replies);
+ if (ret)
+ goto out;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+ if (replies[i].op_ret == -1)
+ continue;
+
+ /* The data segment of the changelog can be non-zero to indicate
+ * the directory needs a full heal. So the check below ensures
+ * it's not a directory before setting the data_selfheal boolean.
+ */
+ if (data_selfheal && !IA_ISDIR(replies[i].poststat.ia_type) &&
+ afr_is_data_set(this, replies[i].xdata))
+ *data_selfheal = _gf_true;
+
+ if (metadata_selfheal && afr_is_metadata_set(this, replies[i].xdata))
+ *metadata_selfheal = _gf_true;
+
+ if (entry_selfheal && afr_is_entry_set(this, replies[i].xdata))
+ *entry_selfheal = _gf_true;
+
+ valid_cnt++;
+ if (valid_cnt == 1) {
+ first = replies[i].poststat;
+ first_idx = i;
+ continue;
+ }
+
+ if (!IA_EQUAL(first, replies[i].poststat, type)) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
+ "TYPE mismatch %d vs %d on %s for gfid:%s",
+ (int)first.ia_type, (int)replies[i].poststat.ia_type,
+ priv->children[i]->name,
+ uuid_utoa(replies[i].poststat.ia_gfid));
+ gf_event(EVENT_AFR_SPLIT_BRAIN,
+ "client-pid=%d;"
+ "subvol=%s;"
+ "type=file;gfid=%s;"
+ "ia_type-%d=%s;ia_type-%d=%s",
+ this->ctx->cmd_args.client_pid, this->name,
+ uuid_utoa(replies[i].poststat.ia_gfid), first_idx,
+ gf_inode_type_to_str(first.ia_type), i,
+ gf_inode_type_to_str(replies[i].poststat.ia_type));
+ ret = -EIO;
+ goto out;
+ }
+
+ if (!IA_EQUAL(first, replies[i].poststat, uid)) {
+ gf_msg_debug(this->name, 0,
+ "UID mismatch "
+ "%d vs %d on %s for gfid:%s",
+ (int)first.ia_uid, (int)replies[i].poststat.ia_uid,
+ priv->children[i]->name,
+ uuid_utoa(replies[i].poststat.ia_gfid));
+
+ if (metadata_selfheal)
+ *metadata_selfheal = _gf_true;
+ }
+
+ if (!IA_EQUAL(first, replies[i].poststat, gid)) {
+ gf_msg_debug(this->name, 0,
+ "GID mismatch "
+ "%d vs %d on %s for gfid:%s",
+ (int)first.ia_uid, (int)replies[i].poststat.ia_uid,
+ priv->children[i]->name,
+ uuid_utoa(replies[i].poststat.ia_gfid));
+
+ if (metadata_selfheal)
+ *metadata_selfheal = _gf_true;
+ }
+
+ if (!IA_EQUAL(first, replies[i].poststat, prot)) {
+ gf_msg_debug(this->name, 0,
+ "MODE mismatch "
+ "%d vs %d on %s for gfid:%s",
+ (int)st_mode_from_ia(first.ia_prot, 0),
+ (int)st_mode_from_ia(replies[i].poststat.ia_prot, 0),
+ priv->children[i]->name,
+ uuid_utoa(replies[i].poststat.ia_gfid));
+
+ if (metadata_selfheal)
+ *metadata_selfheal = _gf_true;
+ }
+
+ if (IA_ISREG(first.ia_type) &&
+ !IA_EQUAL(first, replies[i].poststat, size)) {
+ gf_msg_debug(this->name, 0,
+ "SIZE mismatch "
+ "%lld vs %lld on %s for gfid:%s",
+ (long long)first.ia_size,
+ (long long)replies[i].poststat.ia_size,
+ priv->children[i]->name,
+ uuid_utoa(replies[i].poststat.ia_gfid));
+
+ if (data_selfheal)
+ *data_selfheal = _gf_true;
+ }
+ }
+
+ if (valid_cnt > 0 && link_inode) {
+ *link_inode = inode_link(inode, NULL, NULL, &first);
+ if (!*link_inode) {
+ ret = -EINVAL;
+ goto out;
+ }
+ } else if (valid_cnt < 2) {
+ ret = afr_check_stale_error(replies, priv);
+ goto out;
+ }
+
+ ret = 0;
out:
- return lc;
+ if (replies && replies_dst)
+ afr_replies_copy(replies_dst, replies, priv->child_count);
+ if (inode)
+ inode_unref(inode);
+ if (replies)
+ afr_replies_wipe(replies, priv->child_count);
+
+ return ret;
}
-int
-afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this)
+inode_t *
+afr_inode_find(xlator_t *this, uuid_t gfid)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
- afr_local_t * orig_frame_local = NULL;
- afr_self_heal_t * orig_frame_sh = NULL;
- char sh_type_str[256] = {0,};
- gf_boolean_t split_brain = _gf_false;
+ inode_table_t *table = NULL;
+ inode_t *inode = NULL;
- priv = this->private;
- local = bgsh_frame->local;
- sh = &local->self_heal;
+ table = this->itable;
+ if (!table)
+ return NULL;
- if (local->govinda_gOvinda || sh->mdata_spb || sh->data_spb)
- split_brain = _gf_true;
+ inode = inode_find(table, gfid);
+ if (inode)
+ return inode;
- afr_set_split_brain (this, sh->inode, split_brain);
+ inode = inode_new(table);
+ if (!inode)
+ return NULL;
- afr_self_heal_type_str_get (sh, sh_type_str,
- sizeof(sh_type_str));
- if (sh->op_failed) {
- gf_loglevel_t loglevel = GF_LOG_ERROR;
- if (priv->shd.iamshd)
- loglevel = GF_LOG_DEBUG;
+ gf_uuid_copy(inode->gfid, gfid);
- gf_log (this->name, loglevel, "background %s self-heal "
- "failed on %s", sh_type_str, local->loc.path);
+ return inode;
+}
- } else {
- gf_log (this->name, GF_LOG_DEBUG, "background %s self-heal "
- "completed on %s", sh_type_str, local->loc.path);
+call_frame_t *
+afr_frame_create(xlator_t *this, int32_t *op_errno)
+{
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ pid_t pid = GF_CLIENT_PID_SELF_HEALD;
- }
+ frame = create_frame(this, this->ctx->pool);
+ if (!frame) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ return NULL;
+ }
- FRAME_SU_UNDO (bgsh_frame, afr_local_t);
+ local = AFR_FRAME_INIT(frame, (*op_errno));
+ if (!local) {
+ STACK_DESTROY(frame->root);
+ return NULL;
+ }
- if (!sh->unwound && sh->unwind) {
- orig_frame_local = sh->orig_frame->local;
- orig_frame_sh = &orig_frame_local->self_heal;
- orig_frame_sh->actual_sh_started = _gf_true;
- sh->unwind (sh->orig_frame, this, sh->op_ret, sh->op_errno,
- sh->op_failed);
- }
+ syncopctx_setfspid(&pid);
- if (sh->background) {
- LOCK (&priv->lock);
- {
- priv->background_self_heals_started--;
- }
- UNLOCK (&priv->lock);
- }
+ frame->root->pid = pid;
- AFR_STACK_DESTROY (bgsh_frame);
+ afr_set_lk_owner(frame, this, frame->root);
- return 0;
+ return frame;
}
int
-afr_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int32_t op_errno = 0;
- int ret = 0;
- afr_self_heal_t *orig_sh = NULL;
- call_frame_t *sh_frame = NULL;
- afr_local_t *sh_local = NULL;
- loc_t *loc = NULL;
-
- local = frame->local;
- orig_sh = &local->self_heal;
- priv = this->private;
-
- GF_ASSERT (local->loc.path);
-
- gf_log (this->name, GF_LOG_TRACE,
- "performing self heal on %s (metadata=%d data=%d entry=%d)",
- local->loc.path,
- local->self_heal.do_metadata_self_heal,
- local->self_heal.do_data_self_heal,
- local->self_heal.do_entry_self_heal);
-
- op_errno = ENOMEM;
- sh_frame = copy_frame (frame);
- if (!sh_frame)
- goto out;
- afr_set_lk_owner (sh_frame, this, sh_frame->root);
- afr_set_low_priority (sh_frame);
+afr_selfheal_newentry_mark(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ int source, struct afr_reply *replies,
+ unsigned char *sources, unsigned char *newentry)
+{
+ int ret = 0;
+ int i = 0;
+ afr_private_t *priv = NULL;
+ dict_t *xattr = NULL;
+ int **changelog = NULL;
- sh_local = afr_local_copy (local, this);
- if (!sh_local)
- goto out;
- sh_frame->local = sh_local;
- sh = &sh_local->self_heal;
+ priv = this->private;
- sh->inode = inode_ref (inode);
- sh->orig_frame = frame;
+ gf_uuid_copy(inode->gfid, replies[source].poststat.ia_gfid);
- sh->completion_cbk = afr_self_heal_completion_cbk;
+ xattr = dict_new();
+ if (!xattr)
+ return -ENOMEM;
- sh->success = GF_CALLOC (priv->child_count, sizeof (*sh->success),
- gf_afr_mt_char);
- if (!sh->success)
- goto out;
- sh->sources = GF_CALLOC (sizeof (*sh->sources), priv->child_count,
- gf_afr_mt_int);
- if (!sh->sources)
- goto out;
- sh->locked_nodes = GF_CALLOC (sizeof (*sh->locked_nodes),
- priv->child_count,
- gf_afr_mt_int);
- if (!sh->locked_nodes)
- goto out;
+ changelog = afr_mark_pending_changelog(priv, newentry, xattr,
+ replies[source].poststat.ia_type);
- sh->pending_matrix = afr_matrix_create (priv->child_count,
- priv->child_count);
- if (!sh->pending_matrix)
- goto out;
+ if (!changelog) {
+ ret = -ENOMEM;
+ goto out;
+ }
- sh->delta_matrix = afr_matrix_create (priv->child_count,
- priv->child_count);
- if (!sh->delta_matrix)
- goto out;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+ ret |= afr_selfheal_post_op(frame, this, inode, i, xattr, NULL);
+ }
+out:
+ if (changelog)
+ afr_matrix_cleanup(changelog, priv->child_count);
+ if (xattr)
+ dict_unref(xattr);
+ return ret;
+}
- sh->fresh_parent_dirs = afr_children_create (priv->child_count);
- if (!sh->fresh_parent_dirs)
- goto out;
- ret = afr_sh_common_create (sh, priv->child_count);
- if (ret) {
- op_errno = -ret;
- goto out;
- }
+int
+afr_selfheal_do(call_frame_t *frame, xlator_t *this, uuid_t gfid)
+{
+ int ret = -1;
+ int entry_ret = 1;
+ int metadata_ret = 1;
+ int data_ret = 1;
+ int or_ret = 0;
+ inode_t *inode = NULL;
+ fd_t *fd = NULL;
+ gf_boolean_t data_selfheal = _gf_false;
+ gf_boolean_t metadata_selfheal = _gf_false;
+ gf_boolean_t entry_selfheal = _gf_false;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ ret = afr_selfheal_unlocked_inspect(frame, this, gfid, &inode,
+ &data_selfheal, &metadata_selfheal,
+ &entry_selfheal, NULL);
+ if (ret)
+ goto out;
+
+ if (!(data_selfheal || metadata_selfheal || entry_selfheal)) {
+ ret = 2;
+ goto out;
+ }
+
+ if (inode->ia_type == IA_IFREG) {
+ ret = afr_selfheal_data_open(this, inode, &fd);
+ if (!fd) {
+ ret = -EIO;
+ goto out;
+ }
+ }
+
+ if (data_selfheal && priv->data_self_heal)
+ data_ret = afr_selfheal_data(frame, this, fd);
+
+ if (metadata_selfheal && priv->metadata_self_heal)
+ metadata_ret = afr_selfheal_metadata(frame, this, inode);
+
+ if (entry_selfheal && priv->entry_self_heal)
+ entry_ret = afr_selfheal_entry(frame, this, inode);
+
+ or_ret = (data_ret | metadata_ret | entry_ret);
+
+ if (data_ret == -EIO || metadata_ret == -EIO || entry_ret == -EIO)
+ ret = -EIO;
+ else if (data_ret == 1 && metadata_ret == 1 && entry_ret == 1)
+ ret = 1;
+ else if (or_ret < 0)
+ ret = or_ret;
+ else
+ ret = 0;
- if (local->self_heal.background) {
- LOCK (&priv->lock);
- {
- if (priv->background_self_heals_started
- < priv->background_self_heal_count) {
- priv->background_self_heals_started++;
+out:
+ if (inode)
+ inode_unref(inode);
+ if (fd)
+ fd_unref(fd);
+ return ret;
+}
+/*
+ * This is the entry point for healing a given GFID. The return values for this
+ * function are as follows:
+ * '0' if the self-heal is successful
+ * '1' if the afr-xattrs are non-zero (due to on-going IO) and no heal is needed
+ * '2' if the afr-xattrs are all-zero and no heal is needed
+ * $errno if the heal on the gfid failed.
+ */
+int
+afr_selfheal(xlator_t *this, uuid_t gfid)
+{
+ int ret = -1;
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
- } else {
- local->self_heal.background = _gf_false;
- sh->background = _gf_false;
- }
- }
- UNLOCK (&priv->lock);
- }
+ frame = afr_frame_create(this, NULL);
+ if (!frame)
+ return ret;
- if (!local->loc.parent) {
- sh->do_missing_entry_self_heal = _gf_false;
- sh->do_gfid_self_heal = _gf_false;
- }
+ local = frame->local;
+ local->xdata_req = dict_new();
- FRAME_SU_DO (sh_frame, afr_local_t);
- if (sh->do_missing_entry_self_heal) {
- afr_self_heal_conflicting_entries (sh_frame, this);
- } else if (sh->do_gfid_self_heal) {
- GF_ASSERT (!uuid_is_null (sh->sh_gfid_req));
- afr_self_heal_gfids (sh_frame, this);
- } else {
- loc = &sh_local->loc;
- if (uuid_is_null (loc->inode->gfid) && uuid_is_null (loc->gfid)) {
- if (!uuid_is_null (inode->gfid))
- GF_ASSERT (!uuid_compare (inode->gfid,
- sh->sh_gfid_req));
- uuid_copy (loc->gfid, sh->sh_gfid_req);
- }
- gf_log (this->name, GF_LOG_TRACE,
- "proceeding to metadata check on %s",
- local->loc.path);
+ ret = afr_selfheal_do(frame, this, gfid);
- afr_sh_missing_entries_done (sh_frame, this);
- }
- op_errno = 0;
+ if (frame)
+ AFR_STACK_DESTROY(frame);
-out:
- if (op_errno) {
- orig_sh->unwind (frame, this, -1, op_errno, 1);
- if (sh_frame)
- AFR_STACK_DESTROY (sh_frame);
- }
- return 0;
+ return ret;
}
-void
-afr_self_heal_type_str_get (afr_self_heal_t *self_heal_p, char *str,
- size_t size)
+afr_local_t *
+__afr_dequeue_heals(afr_private_t *priv)
{
- GF_ASSERT (str && (size > strlen (" missing-entry gfid "
- "meta-data data entry")));
+ afr_local_t *local = NULL;
- if (self_heal_p->do_metadata_self_heal) {
- snprintf (str, size, " meta-data");
- }
+ if (list_empty(&priv->heal_waiting))
+ goto none;
+ if ((priv->background_self_heal_count > 0) &&
+ (priv->healers >= priv->background_self_heal_count))
+ goto none;
- if (self_heal_p->do_data_self_heal) {
- snprintf (str + strlen(str), size - strlen(str), " data");
- }
+ local = list_entry(priv->heal_waiting.next, afr_local_t, healer);
+ priv->heal_waiters--;
+ GF_ASSERT(priv->heal_waiters >= 0);
+ list_del_init(&local->healer);
+ list_add(&local->healer, &priv->healing);
+ priv->healers++;
+ return local;
+none:
+ gf_msg_debug(THIS->name, 0,
+ "Nothing dequeued. "
+ "Num healers: %d, Num Waiters: %d",
+ priv->healers, priv->heal_waiters);
+ return NULL;
+}
- if (self_heal_p->do_entry_self_heal) {
- snprintf (str + strlen(str), size - strlen(str), " entry");
- }
+int
+afr_refresh_selfheal_wrap(void *opaque)
+{
+ call_frame_t *heal_frame = opaque;
+ afr_local_t *local = heal_frame->local;
+ int ret = 0;
- if (self_heal_p->do_missing_entry_self_heal) {
- snprintf (str + strlen(str), size - strlen(str),
- " missing-entry");
- }
+ ret = afr_selfheal(heal_frame->this, local->refreshinode->gfid);
+ return ret;
+}
- if (self_heal_p->do_gfid_self_heal) {
- snprintf (str + strlen(str), size - strlen(str), " gfid");
- }
+int
+afr_refresh_heal_done(int ret, call_frame_t *frame, void *opaque)
+{
+ call_frame_t *heal_frame = opaque;
+ xlator_t *this = heal_frame->this;
+ afr_private_t *priv = this->private;
+ afr_local_t *local = heal_frame->local;
+
+ LOCK(&priv->lock);
+ {
+ list_del_init(&local->healer);
+ priv->healers--;
+ GF_ASSERT(priv->healers >= 0);
+ local = __afr_dequeue_heals(priv);
+ }
+ UNLOCK(&priv->lock);
+
+ AFR_STACK_DESTROY(heal_frame);
+
+ if (local)
+ afr_heal_synctask(this, local);
+ return 0;
}
-afr_self_heal_type
-afr_self_heal_type_for_transaction (afr_transaction_type type)
+void
+afr_heal_synctask(xlator_t *this, afr_local_t *local)
{
- afr_self_heal_type sh_type = AFR_SELF_HEAL_INVALID;
+ int ret = 0;
+ call_frame_t *heal_frame = NULL;
- switch (type) {
- case AFR_DATA_TRANSACTION:
- sh_type = AFR_SELF_HEAL_DATA;
- break;
- case AFR_METADATA_TRANSACTION:
- sh_type = AFR_SELF_HEAL_METADATA;
- break;
- case AFR_ENTRY_TRANSACTION:
- sh_type = AFR_SELF_HEAL_ENTRY;
- break;
- case AFR_ENTRY_RENAME_TRANSACTION:
- GF_ASSERT (0);
- break;
+ heal_frame = local->heal_frame;
+ ret = synctask_new(this->ctx->env, afr_refresh_selfheal_wrap,
+ afr_refresh_heal_done, heal_frame, heal_frame);
+ if (ret < 0)
+ /* Heal not launched. Will be queued when the next inode
+ * refresh happens and shd hasn't healed it yet. */
+ afr_refresh_heal_done(ret, heal_frame, heal_frame);
+}
+
+gf_boolean_t
+afr_throttled_selfheal(call_frame_t *frame, xlator_t *this)
+{
+ gf_boolean_t can_heal = _gf_true;
+ afr_private_t *priv = this->private;
+ afr_local_t *local = frame->local;
+
+ LOCK(&priv->lock);
+ {
+ if ((priv->background_self_heal_count > 0) &&
+ (priv->heal_wait_qlen + priv->background_self_heal_count) >
+ (priv->heal_waiters + priv->healers)) {
+ list_add_tail(&local->healer, &priv->heal_waiting);
+ priv->heal_waiters++;
+ local = __afr_dequeue_heals(priv);
+ } else {
+ can_heal = _gf_false;
}
- return sh_type;
+ }
+ UNLOCK(&priv->lock);
+
+ if (can_heal) {
+ if (local)
+ afr_heal_synctask(this, local);
+ else
+ gf_msg_debug(this->name, 0,
+ "Max number of heals are "
+ "pending, background self-heal rejected.");
+ }
+
+ return can_heal;
}
int
-afr_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name)
+afr_choose_source_by_policy(afr_private_t *priv, unsigned char *sources,
+ afr_transaction_type type)
{
- int ret = -1;
- uuid_t pargfid = {0};
+ int source = -1;
+ int i = 0;
- if (!child)
- goto out;
+ /* Give preference to local child to save on bandwidth */
+ for (i = 0; i < priv->child_count; i++) {
+ if (priv->local[i] && sources[i]) {
+ if ((type == AFR_DATA_TRANSACTION) && AFR_IS_ARBITER_BRICK(priv, i))
+ continue;
- if (!uuid_is_null (parent->inode->gfid))
- uuid_copy (pargfid, parent->inode->gfid);
- else if (!uuid_is_null (parent->gfid))
- uuid_copy (pargfid, parent->gfid);
+ source = i;
+ goto out;
+ }
+ }
- if (uuid_is_null (pargfid))
- goto out;
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i]) {
+ source = i;
+ goto out;
+ }
+ }
+out:
+ return source;
+}
- if (strcmp (parent->path, "/") == 0)
- ret = gf_asprintf ((char **)&child->path, "/%s", name);
- else
- ret = gf_asprintf ((char **)&child->path, "%s/%s", parent->path,
- name);
+static int
+afr_anon_inode_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ afr_local_t *local = frame->local;
+ int i = (long)cookie;
+
+ local->replies[i].valid = 1;
+ local->replies[i].op_ret = op_ret;
+ local->replies[i].op_errno = op_errno;
+ if (op_ret == 0) {
+ local->op_ret = 0;
+ local->replies[i].poststat = *buf;
+ local->replies[i].preparent = *preparent;
+ local->replies[i].postparent = *postparent;
+ }
+ if (xdata) {
+ local->replies[i].xdata = dict_ref(xdata);
+ }
+
+ syncbarrier_wake(&local->barrier);
+ return 0;
+}
- if (-1 == ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "asprintf failed while setting child path");
+int
+afr_anon_inode_create(xlator_t *this, int child, inode_t **linked_inode)
+{
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = this->private;
+ unsigned char *mkdir_on = alloca0(priv->child_count);
+ unsigned char *lookup_on = alloca0(priv->child_count);
+ loc_t loc = {0};
+ int32_t op_errno = 0;
+ int32_t child_op_errno = 0;
+ struct iatt iatt = {0};
+ dict_t *xdata = NULL;
+ uuid_t anon_inode_gfid = {0};
+ int mkdir_count = 0;
+ int i = 0;
+
+ /*Try to mkdir everywhere and return success if the dir exists on 'child'
+ */
+
+ if (!priv->use_anon_inode) {
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ frame = afr_frame_create(this, &op_errno);
+ if (op_errno) {
+ goto out;
+ }
+ local = frame->local;
+ if (!local->child_up[child]) {
+ /*Other bricks may need mkdir so don't error out yet*/
+ child_op_errno = ENOTCONN;
+ }
+ gf_uuid_parse(priv->anon_gfid_str, anon_inode_gfid);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->child_up[i])
+ continue;
+
+ if (priv->anon_inode[i]) {
+ mkdir_on[i] = 0;
+ } else {
+ mkdir_on[i] = 1;
+ mkdir_count++;
}
+ }
- child->name = strrchr (child->path, '/');
- if (child->name)
- child->name++;
+ if (mkdir_count == 0) {
+ *linked_inode = inode_find(this->itable, anon_inode_gfid);
+ if (*linked_inode) {
+ op_errno = 0;
+ goto out;
+ }
+ }
- child->parent = inode_ref (parent->inode);
- child->inode = inode_new (parent->inode->table);
- uuid_copy (child->pargfid, pargfid);
+ loc.parent = inode_ref(this->itable->root);
+ loc.name = priv->anon_inode_name;
+ loc.inode = inode_new(this->itable);
+ if (!loc.inode) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- if (!child->inode) {
- ret = -1;
- goto out;
+ xdata = dict_new();
+ if (!xdata) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ op_errno = -dict_set_gfuuid(xdata, "gfid-req", anon_inode_gfid, _gf_true);
+ if (op_errno) {
+ goto out;
+ }
+
+ if (mkdir_count == 0) {
+ memcpy(lookup_on, local->child_up, priv->child_count);
+ goto lookup;
+ }
+
+ AFR_ONLIST(mkdir_on, frame, afr_anon_inode_mkdir_cbk, mkdir, &loc, 0755, 0,
+ xdata);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!mkdir_on[i]) {
+ continue;
+ }
+
+ if (local->replies[i].op_ret == 0) {
+ priv->anon_inode[i] = 1;
+ iatt = local->replies[i].poststat;
+ } else if (local->replies[i].op_ret < 0 &&
+ local->replies[i].op_errno == EEXIST) {
+ lookup_on[i] = 1;
+ } else if (i == child) {
+ child_op_errno = local->replies[i].op_errno;
+ }
+ }
+
+ if (AFR_COUNT(lookup_on, priv->child_count) == 0) {
+ goto link;
+ }
+
+lookup:
+ AFR_ONLIST(lookup_on, frame, afr_selfheal_discover_cbk, lookup, &loc,
+ xdata);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!lookup_on[i]) {
+ continue;
+ }
+
+ if (local->replies[i].op_ret == 0) {
+ if (gf_uuid_compare(anon_inode_gfid,
+ local->replies[i].poststat.ia_gfid) == 0) {
+ priv->anon_inode[i] = 1;
+ iatt = local->replies[i].poststat;
+ } else {
+ if (i == child)
+ child_op_errno = EINVAL;
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_DATA,
+ "%s has gfid: %s", priv->anon_inode_name,
+ uuid_utoa(local->replies[i].poststat.ia_gfid));
+ }
+ } else if (i == child) {
+ child_op_errno = local->replies[i].op_errno;
+ }
+ }
+link:
+ if (!gf_uuid_is_null(iatt.ia_gfid)) {
+ *linked_inode = inode_link(loc.inode, loc.parent, loc.name, &iatt);
+ if (*linked_inode) {
+ op_errno = 0;
+ inode_lookup(*linked_inode);
+ } else {
+ op_errno = ENOMEM;
}
+ goto out;
+ }
- ret = 0;
out:
- if ((ret == -1) && child)
- loc_wipe (child);
-
- return ret;
+ if (xdata)
+ dict_unref(xdata);
+ loc_wipe(&loc);
+ /*child_op_errno takes precedence*/
+ if (child_op_errno == 0) {
+ child_op_errno = op_errno;
+ }
+
+ if (child_op_errno && *linked_inode) {
+ inode_unref(*linked_inode);
+ *linked_inode = NULL;
+ }
+ if (frame)
+ AFR_STACK_DESTROY(frame);
+ return -child_op_errno;
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.h b/xlators/cluster/afr/src/afr-self-heal-common.h
deleted file mode 100644
index 0ef5edc9226..00000000000
--- a/xlators/cluster/afr/src/afr-self-heal-common.h
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef __AFR_SELF_HEAL_COMMON_H__
-#define __AFR_SELF_HEAL_COMMON_H__
-
-#define FILE_HAS_HOLES(buf) (((buf)->ia_size) > ((buf)->ia_blocks * 512))
-#define AFR_SH_MIN_PARTICIPANTS 2
-
-typedef enum {
- AFR_SELF_HEAL_ENTRY,
- AFR_SELF_HEAL_METADATA,
- AFR_SELF_HEAL_DATA,
- AFR_SELF_HEAL_INVALID = -1,
-} afr_self_heal_type;
-
-typedef enum {
- AFR_LOOKUP_FAIL_CONFLICTS = 1,
- AFR_LOOKUP_FAIL_MISSING_GFIDS = 2,
-} afr_lookup_flags_t;
-
-int
-afr_sh_select_source (int sources[], int child_count);
-
-int
-afr_sh_source_count (int sources[], int child_count);
-
-void
-afr_sh_print_pending_matrix (int32_t *pending_matrix[], xlator_t *this);
-
-int
-afr_build_pending_matrix (char **pending_key, int32_t **pending_matrix,
- unsigned char *ignorant_subvols,
- dict_t *xattr[], afr_transaction_type type,
- size_t child_count);
-
-void
-afr_sh_pending_to_delta (afr_private_t *priv, dict_t **xattr,
- int32_t *delta_matrix[], unsigned char success[],
- int child_count, afr_transaction_type type);
-
-int
-afr_mark_sources (xlator_t *this, int32_t *sources, int32_t **pending_matrix,
- struct iatt *bufs, afr_self_heal_type type,
- int32_t *success_children, int32_t *subvol_status);
-
-int
-afr_sh_delta_to_xattr (afr_private_t *priv,
- int32_t *delta_matrix[], dict_t *xattr[],
- int child_count, afr_transaction_type type);
-
-void
-afr_self_heal_type_str_get (afr_self_heal_t *self_heal_p, char *str,
- size_t size);
-
-afr_self_heal_type
-afr_self_heal_type_for_transaction (afr_transaction_type type);
-
-int
-afr_build_sources (xlator_t *this, dict_t **xattr, struct iatt *bufs,
- int32_t **pending_matrix, int32_t *sources,
- int32_t *success_children, afr_transaction_type type,
- int32_t *subvol_status, gf_boolean_t ignore_ignorant);
-void
-afr_sh_common_reset (afr_self_heal_t *sh, unsigned int child_count);
-
-void
-afr_sh_common_lookup_resp_handler (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf,
- dict_t *xattr, struct iatt *postparent,
- loc_t *loc);
-
-int
-afr_sh_common_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
- afr_lookup_done_cbk_t lookup_cbk, uuid_t uuid,
- int32_t flags, dict_t *xdata);
-int
-afr_sh_entry_expunge_remove (call_frame_t *expunge_frame, xlator_t *this,
- int active_src, struct iatt *buf,
- struct iatt *parentbuf);
-int
-afr_sh_entrylk (call_frame_t *frame, xlator_t *this, loc_t *loc,
- char *base_name, afr_lock_cbk_t lock_cbk);
-int
-afr_sh_entry_impunge_create (call_frame_t *impunge_frame, xlator_t *this,
- int child_index);
-int
-afr_sh_data_unlock (call_frame_t *frame, xlator_t *this,
- afr_lock_cbk_t lock_cbk);
-afr_local_t *
-afr_local_copy (afr_local_t *l, xlator_t *this);
-int
-afr_sh_data_lock (call_frame_t *frame, xlator_t *this,
- off_t start, off_t len, gf_boolean_t block,
- afr_lock_cbk_t success_handler,
- afr_lock_cbk_t failure_handler);
-void
-afr_sh_set_error (afr_self_heal_t *sh, int32_t op_errno);
-void
-afr_sh_mark_source_sinks (call_frame_t *frame, xlator_t *this);
-typedef int
-(*afr_fxattrop_cbk_t) (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xattr, dict_t *xdata);
-int
-afr_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name);
-int
-afr_impunge_frame_create (call_frame_t *frame, xlator_t *this,
- int active_source, call_frame_t **impunge_frame);
-void
-afr_sh_reset (call_frame_t *frame, xlator_t *this);
-
-void
-afr_children_intersection_get (int32_t *set1, int32_t *set2,
- int *intersection, unsigned int child_count);
-int
-afr_get_no_xattr_dir_read_child (xlator_t *this, int32_t *success_children,
- struct iatt *bufs);
-#endif /* __AFR_SELF_HEAL_COMMON_H__ */
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
index 16ec50871e6..37bcc2b3f9e 100644
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
This file is licensed to you under your choice of the GNU Lesser
@@ -8,1419 +8,884 @@
cases as published by the Free Software Foundation.
*/
-#include <libgen.h>
-#include <unistd.h>
-#include <fnmatch.h>
-#include <sys/time.h>
-#include <stdlib.h>
-#include <signal.h>
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
#include "afr.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "list.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
-#include "byte-order.h"
-
-#include "afr-transaction.h"
#include "afr-self-heal.h"
-#include "afr-self-heal-common.h"
-#include "afr-self-heal-algorithm.h"
+#include <glusterfs/byte-order.h>
+#include "protocol-common.h"
+#include "afr-messages.h"
+#include <glusterfs/events.h>
-int
-afr_sh_data_fail (call_frame_t *frame, xlator_t *this);
-
-static inline gf_boolean_t
-afr_sh_data_proceed (unsigned int success_count)
-{
- return (success_count >= AFR_SH_MIN_PARTICIPANTS);
+#define HAS_HOLES(i) ((i->ia_blocks * 512) < (i->ia_size))
+static int
+__checksum_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, uint32_t weak, uint8_t *strong, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ struct afr_reply *replies = NULL;
+ int i = (long)cookie;
+
+ local = frame->local;
+ replies = local->replies;
+
+ replies[i].valid = 1;
+ replies[i].op_ret = op_ret;
+ replies[i].op_errno = op_errno;
+ if (xdata) {
+ replies[i].buf_has_zeroes = dict_get_str_boolean(
+ xdata, "buf-has-zeroes", _gf_false);
+ replies[i].fips_mode_rchecksum = dict_get_str_boolean(
+ xdata, "fips-mode-rchecksum", _gf_false);
+ }
+ if (strong) {
+ if (replies[i].fips_mode_rchecksum) {
+ memcpy(local->replies[i].checksum, strong, SHA256_DIGEST_LENGTH);
+ } else {
+ memcpy(local->replies[i].checksum, strong, MD5_DIGEST_LENGTH);
+ }
+ }
+
+ syncbarrier_wake(&local->barrier);
+ return 0;
+}
+
+static gf_boolean_t
+__afr_can_skip_data_block_heal(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int source, unsigned char *healed_sinks,
+ off_t offset, size_t size, struct iatt *poststat)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ unsigned char *wind_subvols = NULL;
+ gf_boolean_t checksum_match = _gf_true;
+ struct afr_reply *replies = NULL;
+ dict_t *xdata = NULL;
+ int i = 0;
+
+ priv = this->private;
+ local = frame->local;
+ replies = local->replies;
+
+ xdata = dict_new();
+ if (!xdata)
+ goto out;
+ if (dict_set_int32_sizen(xdata, "check-zero-filled", 1)) {
+ dict_unref(xdata);
+ goto out;
+ }
+
+ wind_subvols = alloca0(priv->child_count);
+ for (i = 0; i < priv->child_count; i++) {
+ if (i == source || healed_sinks[i])
+ wind_subvols[i] = 1;
+ }
+
+ AFR_ONLIST(wind_subvols, frame, __checksum_cbk, rchecksum, fd, offset, size,
+ xdata);
+ if (xdata)
+ dict_unref(xdata);
+
+ if (!replies[source].valid || replies[source].op_ret != 0)
+ return _gf_false;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (i == source)
+ continue;
+ if (replies[i].valid) {
+ if (memcmp(replies[source].checksum, replies[i].checksum,
+ replies[source].fips_mode_rchecksum
+ ? SHA256_DIGEST_LENGTH
+ : MD5_DIGEST_LENGTH)) {
+ checksum_match = _gf_false;
+ break;
+ }
+ }
+ }
+
+ if (checksum_match) {
+ if (HAS_HOLES(poststat))
+ return _gf_true;
+
+ /* For non-sparse files, we might be better off writing the
+ * zeroes to sinks to avoid mismatch of disk-usage in bricks. */
+ if (local->replies[source].buf_has_zeroes)
+ return _gf_false;
+ else
+ return _gf_true;
+ }
+out:
+ return _gf_false;
+}
+
+static gf_boolean_t
+__afr_is_sink_zero_filled(xlator_t *this, fd_t *fd, size_t size, off_t offset,
+ int sink)
+{
+ afr_private_t *priv = NULL;
+ struct iobref *iobref = NULL;
+ struct iovec *iovec = NULL;
+ int count = 0;
+ int ret = 0;
+ gf_boolean_t zero_filled = _gf_false;
+
+ priv = this->private;
+ ret = syncop_readv(priv->children[sink], fd, size, offset, 0, &iovec,
+ &count, &iobref, NULL, NULL, NULL);
+ if (ret < 0)
+ goto out;
+ ret = iov_0filled(iovec, count);
+ if (!ret)
+ zero_filled = _gf_true;
+out:
+ if (iovec)
+ GF_FREE(iovec);
+ if (iobref)
+ iobref_unref(iobref);
+ return zero_filled;
}
-extern int
-sh_loop_finish (call_frame_t *loop_frame, xlator_t *this);
-
-int
-afr_post_sh_big_lock_success (call_frame_t *frame, xlator_t *this);
-
-int
-afr_post_sh_big_lock_failure (call_frame_t *frame, xlator_t *this);
-
-int
-afr_sh_data_finish (call_frame_t *frame, xlator_t *this);
-
-int
-afr_sh_data_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
-
- sh->completion_cbk (frame, this);
+static int
+__afr_selfheal_data_read_write(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int source, unsigned char *healed_sinks,
+ off_t offset, size_t size,
+ struct afr_reply *replies, int type)
+{
+ struct iovec *iovec = NULL;
+ int count = 0;
+ struct iobref *iobref = NULL;
+ int ret = 0;
+ int i = 0;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ ret = syncop_readv(priv->children[source], fd, size, offset, 0, &iovec,
+ &count, &iobref, NULL, NULL, NULL);
+ if (ret <= 0)
+ return ret;
- return 0;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!healed_sinks[i])
+ continue;
+
+ /*
+ * TODO: Use fiemap() and discard() to heal holes
+ * in the future.
+ *
+ * For now,
+ *
+ * - if the source had any holes at all,
+ * AND
+ * - if we are writing past the original file size
+ * of the sink
+ * AND
+ * - is NOT the last block of the source file. if
+ * the block contains EOF, it has to be written
+ * in order to set the file size even if the
+ * last block is 0-filled.
+ * AND
+ * - if the read buffer is filled with only 0's
+ *
+ * then, skip writing to this source. We don't depend
+ * on the write to happen to update the size as we
+ * have performed an ftruncate() upfront anyways.
+ */
+#define is_last_block(o, b, s) ((s >= o) && (s <= (o + b)))
+ if (HAS_HOLES((&replies[source].poststat)) &&
+ offset >= replies[i].poststat.ia_size &&
+ !is_last_block(offset, size, replies[source].poststat.ia_size) &&
+ (iov_0filled(iovec, count) == 0))
+ continue;
+
+ /* Avoid filling up sparse regions of the sink with 0-filled
+ * writes.*/
+ if (type == AFR_SELFHEAL_DATA_FULL &&
+ HAS_HOLES((&replies[source].poststat)) &&
+ ((offset + size) <= replies[i].poststat.ia_size) &&
+ (iov_0filled(iovec, count) == 0) &&
+ __afr_is_sink_zero_filled(this, fd, size, offset, i)) {
+ continue;
+ }
+
+ ret = syncop_writev(priv->children[i], fd, iovec, count, offset, iobref,
+ 0, NULL, NULL, NULL, NULL);
+ if (ret != iov_length(iovec, count)) {
+ /* write() failed on this sink. unset the corresponding
+ member in sinks[] (which is healed_sinks[] in the
+ caller) so that this server does NOT get considered
+ as successfully healed.
+ */
+ healed_sinks[i] = 0;
+ }
+ }
+ if (iovec)
+ GF_FREE(iovec);
+ if (iobref)
+ iobref_unref(iobref);
+
+ return ret;
+}
+
+static gf_boolean_t
+afr_source_sinks_locked(xlator_t *this, unsigned char *locked_on, int source,
+ unsigned char *healed_sinks)
+{
+ afr_private_t *priv = this->private;
+ int i = 0;
+
+ if (!locked_on[source])
+ return _gf_false;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (healed_sinks[i] && locked_on[i])
+ return _gf_true;
+ }
+
+ return _gf_false;
}
-
-int
-afr_sh_data_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+static int
+afr_selfheal_data_block(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int source, unsigned char *healed_sinks, off_t offset,
+ size_t size, int type, struct afr_reply *replies)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int child_index = (long) cookie;
-
- local = frame->local;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "flush failed on %s on subvolume %s: %s",
- local->loc.path, priv->children[child_index]->name,
- strerror (op_errno));
- }
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- afr_sh_data_done (frame, this);
- }
+ int ret = -1;
+ afr_private_t *priv = NULL;
+ unsigned char *data_lock = NULL;
- return 0;
-}
+ priv = this->private;
+ data_lock = alloca0(priv->child_count);
-int
-afr_sh_data_close (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- afr_self_heal_t *sh = NULL;
- int i = 0;
- int call_count = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- call_count = afr_set_elem_count_get (sh->success,
- priv->child_count);
- local->call_count = call_count;
-
- if (call_count == 0) {
- afr_sh_data_done (frame, this);
- return 0;
+ ret = afr_selfheal_inodelk(frame, this, fd->inode, this->name, offset, size,
+ data_lock);
+ {
+ if (!afr_source_sinks_locked(this, data_lock, source, healed_sinks)) {
+ ret = -ENOTCONN;
+ goto unlock;
}
- for (i = 0; i < priv->child_count; i++) {
- if (!sh->success[i])
- continue;
- gf_log (this->name, GF_LOG_DEBUG,
- "closing fd of %s on %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_data_flush_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->flush,
- sh->healing_fd, NULL);
-
- if (!--call_count)
- break;
+ if (type == AFR_SELFHEAL_DATA_DIFF &&
+ __afr_can_skip_data_block_heal(frame, this, fd, source,
+ healed_sinks, offset, size,
+ &replies[source].poststat)) {
+ ret = 0;
+ goto unlock;
}
- return 0;
+ ret = __afr_selfheal_data_read_write(
+ frame, this, fd, source, healed_sinks, offset, size, replies, type);
+ }
+unlock:
+ afr_selfheal_uninodelk(frame, this, fd->inode, this->name, offset, size,
+ data_lock);
+ return ret;
}
-int
-afr_sh_data_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *statpre,
- struct iatt *statpost, dict_t *xdata)
+static int
+afr_selfheal_data_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ unsigned char *healed_sinks)
{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int child_index = (long) cookie;
-
- local = frame->local;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_INFO,
- "setattr failed on %s on subvolume %s: %s",
- local->loc.path, priv->children[child_index]->name,
- strerror (op_errno));
- }
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- afr_sh_data_finish (frame, this);
- }
+ local = frame->local;
+ priv = this->private;
+ if (!priv->ensure_durability)
return 0;
-}
-
-int
-afr_sh_data_setattr (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- afr_self_heal_t *sh = NULL;
- int i = 0;
- int call_count = 0;
- int source = 0;
- int32_t valid = 0;
- struct iatt stbuf = {0,};
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- source = sh->source;
-
- valid |= (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME);
-
- stbuf.ia_atime = sh->buf[source].ia_atime;
- stbuf.ia_atime_nsec = sh->buf[source].ia_atime_nsec;
- stbuf.ia_mtime = sh->buf[source].ia_mtime;
- stbuf.ia_mtime_nsec = sh->buf[source].ia_mtime_nsec;
-
- call_count = afr_set_elem_count_get (sh->success,
- priv->child_count);
- local->call_count = call_count;
-
- if (call_count == 0) {
- GF_ASSERT (0);
- afr_sh_data_finish (frame, this);
- return 0;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (!sh->success[i])
- continue;
-
- STACK_WIND_COOKIE (frame, afr_sh_data_setattr_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->setattr,
- &local->loc, &stbuf, valid, NULL);
- if (!--call_count)
- break;
- }
+ AFR_ONLIST(healed_sinks, frame, afr_sh_generic_fop_cbk, fsync, fd, 0, NULL);
- return 0;
+ for (i = 0; i < priv->child_count; i++)
+ if (healed_sinks[i] && local->replies[i].op_ret != 0)
+ /* fsync() failed. Do NOT consider this server
+ as successfully healed. Mark it so.
+ */
+ healed_sinks[i] = 0;
+ return 0;
}
-int
-afr_sh_data_setattr_fstat_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct iatt *buf, dict_t *xdata)
+static int
+afr_data_self_heal_type_get(afr_private_t *priv, unsigned char *healed_sinks,
+ int source, struct afr_reply *replies)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int child_index = (long) cookie;
-
- local = frame->local;
- sh = &local->self_heal;
+ int type = AFR_SELFHEAL_DATA_FULL;
+ int i = 0;
- GF_ASSERT (sh->source == child_index);
- if (op_ret != -1) {
- sh->buf[child_index] = *buf;
- afr_sh_data_setattr (frame, this);
- } else {
- gf_log (this->name, GF_LOG_ERROR, "%s: Failed to set "
- "time-stamps after self-heal", local->loc.path);
- afr_sh_data_fail (frame, this);
+ if (priv->data_self_heal_algorithm == AFR_SELFHEAL_DATA_DYNAMIC) {
+ type = AFR_SELFHEAL_DATA_FULL;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!healed_sinks[i] && i != source)
+ continue;
+ if (replies[i].poststat.ia_size) {
+ type = AFR_SELFHEAL_DATA_DIFF;
+ break;
+ }
}
-
- return 0;
-}
-
-/*
- * If there are any writes after the self-heal is triggered then the
- * stbuf stored in local->self_heal.buf[] will be invalid so we do one more
- * stat on the source and then set the [am]times
- */
-int
-afr_sh_set_timestamps (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- STACK_WIND_COOKIE (frame, afr_sh_data_setattr_fstat_cbk,
- (void *) (long) sh->source,
- priv->children[sh->source],
- priv->children[sh->source]->fops->fstat,
- sh->healing_fd, NULL);
- return 0;
-}
-
-//Fun fact, lock_cbk is being used for both lock & unlock
-int
-afr_sh_data_unlock (call_frame_t *frame, xlator_t *this,
- afr_lock_cbk_t lock_cbk)
-{
- afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- sh = &local->self_heal;
-
- GF_ASSERT (sh->data_lock_held);
-
- sh->data_lock_held = _gf_false;
- int_lock->lock_cbk = lock_cbk;
- afr_unlock (frame, this);
-
- return 0;
-}
-
-int
-afr_sh_data_finish (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
-
- gf_log (this->name, GF_LOG_DEBUG,
- "finishing data selfheal of %s", local->loc.path);
-
- if (sh->data_lock_held)
- afr_sh_data_unlock (frame, this, afr_sh_data_close);
- else
- afr_sh_data_close (frame, this);
-
- return 0;
+ } else {
+ type = priv->data_self_heal_algorithm;
+ }
+ return type;
}
-int
-afr_sh_data_fail (call_frame_t *frame, xlator_t *this)
+static int
+afr_selfheal_data_do(call_frame_t *frame, xlator_t *this, fd_t *fd, int source,
+ unsigned char *healed_sinks, struct afr_reply *replies)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ off_t off = 0;
+ size_t block = 0;
+ int type = AFR_SELFHEAL_DATA_FULL;
+ int ret = -1;
+ call_frame_t *iter_frame = NULL;
+ unsigned char arbiter_sink_status = 0;
- local = frame->local;
- sh = &local->self_heal;
+ gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_SELF_HEAL_INFO,
+ "performing data selfheal on %s", uuid_utoa(fd->inode->gfid));
- gf_log (this->name, GF_LOG_DEBUG,
- "finishing failed data selfheal of %s", local->loc.path);
+ priv = this->private;
+ if (priv->arbiter_count) {
+ arbiter_sink_status = healed_sinks[ARBITER_BRICK_INDEX];
+ healed_sinks[ARBITER_BRICK_INDEX] = 0;
+ }
- sh->op_failed = 1;
- if (sh->data_lock_held)
- afr_sh_data_unlock (frame, this, afr_sh_data_close);
- else
- afr_sh_data_close (frame, this);
- return 0;
-}
+ block = 128 * 1024 * priv->data_self_heal_window_size;
-int
-afr_sh_data_erase_pending_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xattr, dict_t *xdata)
-{
- int call_count = 0;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int32_t child_index = (long) cookie;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
- if (op_ret < 0) {
- gf_log (this->name, GF_LOG_ERROR, "Erasing of pending change "
- "log failed on %s for subvol %s, reason: %s",
- local->loc.path, priv->children[child_index]->name,
- strerror (op_errno));
- sh->op_failed = 1;
- }
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- if (sh->op_failed) {
- if (sh->old_loop_frame)
- sh_loop_finish (sh->old_loop_frame, this);
- sh->old_loop_frame = NULL;
- afr_sh_data_fail (frame, this);
- goto out;
- }
- if (!IA_ISREG (sh->type)) {
- afr_sh_data_finish (frame, this);
- goto out;
- }
- GF_ASSERT (sh->old_loop_frame);
- afr_sh_data_lock (frame, this, 0, 0, _gf_true,
- afr_post_sh_big_lock_success,
- afr_post_sh_big_lock_failure);
- }
-out:
- return 0;
-}
-
-int
-afr_sh_data_erase_pending (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int i = 0;
- dict_t **erase_xattr = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- afr_sh_pending_to_delta (priv, sh->xattr, sh->delta_matrix, sh->success,
- priv->child_count, AFR_DATA_TRANSACTION);
- gf_log (this->name, GF_LOG_DEBUG, "Delta matrix for: %s",
- lkowner_utoa (&frame->root->lk_owner));
- afr_sh_print_pending_matrix (sh->delta_matrix, this);
-
- erase_xattr = GF_CALLOC (sizeof (*erase_xattr), priv->child_count,
- gf_afr_mt_dict_t);
+ type = afr_data_self_heal_type_get(priv, healed_sinks, source, replies);
- for (i = 0; i < priv->child_count; i++) {
- if (sh->xattr[i]) {
- call_count++;
+ iter_frame = afr_copy_frame(frame);
+ if (!iter_frame) {
+ ret = -ENOMEM;
+ goto out;
+ }
- erase_xattr[i] = get_new_dict();
- dict_ref (erase_xattr[i]);
- }
+ for (off = 0; off < replies[source].poststat.ia_size; off += block) {
+ if (AFR_COUNT(healed_sinks, priv->child_count) == 0) {
+ ret = -ENOTCONN;
+ goto out;
}
- afr_sh_delta_to_xattr (priv, sh->delta_matrix, erase_xattr,
- priv->child_count, AFR_DATA_TRANSACTION);
+ ret = afr_selfheal_data_block(iter_frame, this, fd, source,
+ healed_sinks, off, block, type, replies);
+ if (ret < 0)
+ goto out;
- GF_ASSERT (call_count);
- local->call_count = call_count;
- for (i = 0; i < priv->child_count; i++) {
- if (!erase_xattr[i])
- continue;
-
- gf_log (this->name, GF_LOG_DEBUG,
- "erasing pending flags from %s on %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_data_erase_pending_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- sh->healing_fd,
- GF_XATTROP_ADD_ARRAY, erase_xattr[i],
- NULL);
- if (!--call_count)
- break;
+ AFR_STACK_RESET(iter_frame);
+ if (iter_frame->local == NULL) {
+ ret = -ENOTCONN;
+ goto out;
}
+ }
- for (i = 0; i < priv->child_count; i++) {
- if (erase_xattr[i]) {
- dict_unref (erase_xattr[i]);
- }
- }
- GF_FREE (erase_xattr);
+ ret = afr_selfheal_data_fsync(frame, this, fd, healed_sinks);
- return 0;
-}
-
-
-static struct afr_sh_algorithm *
-sh_algo_from_name (xlator_t *this, char *name)
-{
- int i = 0;
-
- while (afr_self_heal_algorithms[i].name) {
- if (!strcmp (name, afr_self_heal_algorithms[i].name)) {
- return &afr_self_heal_algorithms[i];
- }
-
- i++;
- }
+out:
+ if (arbiter_sink_status)
+ healed_sinks[ARBITER_BRICK_INDEX] = arbiter_sink_status;
- return NULL;
+ if (iter_frame)
+ AFR_STACK_DESTROY(iter_frame);
+ return ret;
}
-
static int
-sh_zero_byte_files_exist (afr_local_t *local, int child_count)
+__afr_selfheal_truncate_sinks(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ unsigned char *healed_sinks, uint64_t size)
{
- int i = 0;
- int ret = 0;
- afr_self_heal_t *sh = NULL;
-
- sh = &local->self_heal;
- for (i = 0; i < child_count; i++) {
- if (!local->child_up[i] || sh->child_errno[i])
- continue;
- if (sh->buf[i].ia_size == 0) {
- ret = 1;
- break;
- }
- }
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
- return ret;
-}
+ local = frame->local;
+ priv = this->private;
+ /* This will send truncate on the arbiter brick as well if it is marked as
+ * sink. If changelog is enabled on the volume it captures truncate as a
+ * data transactions on the arbiter brick. This will help geo-rep to
+ * properly sync the data from master to slave if arbiter is the ACTIVE
+ * brick during syncing and which had got some entries healed for data as
+ * part of self heal.
+ */
+ AFR_ONLIST(healed_sinks, frame, afr_sh_generic_fop_cbk, ftruncate, fd, size,
+ NULL);
-struct afr_sh_algorithm *
-afr_sh_data_pick_algo (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t * priv = NULL;
- struct afr_sh_algorithm * algo = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
- algo = sh_algo_from_name (this, priv->data_self_heal_algorithm);
-
- if (algo == NULL) {
- /* option not set, so fall back on heuristics */
-
- if (sh_zero_byte_files_exist (local, priv->child_count)
- || (sh->file_size <= (priv->data_self_heal_window_size *
- this->ctx->page_size))) {
-
- /*
- * If the file does not exist on one of the subvolumes,
- * or a zero-byte file exists (created by entry self-heal)
- * the entire content has to be copied anyway, so there
- * is no benefit from using the "diff" algorithm.
- *
- * If the file size is about the same as page size,
- * the entire file can be read and written with a few
- * (pipelined) STACK_WINDs, which will be faster
- * than "diff" which has to read checksums and then
- * read and write.
- */
-
- algo = sh_algo_from_name (this, "full");
-
- } else {
- algo = sh_algo_from_name (this, "diff");
- }
- }
+ for (i = 0; i < priv->child_count; i++)
+ if (healed_sinks[i] && local->replies[i].op_ret == -1)
+ /* truncate() failed. Do NOT consider this server
+ as successfully healed. Mark it so.
+ */
+ healed_sinks[i] = 0;
- return algo;
+ return 0;
}
-
-int
-afr_sh_data_sync_prepare (call_frame_t *frame, xlator_t *this)
+gf_boolean_t
+afr_has_source_witnesses(xlator_t *this, unsigned char *sources,
+ uint64_t *witness)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- struct afr_sh_algorithm *sh_algo = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
+ int i = 0;
+ afr_private_t *priv = NULL;
- sh->algo_completion_cbk = afr_sh_data_erase_pending;
- sh->algo_abort_cbk = afr_sh_data_fail;
+ priv = this->private;
- sh_algo = afr_sh_data_pick_algo (frame, this);
-
- sh->algo = sh_algo;
- sh_algo->fn (frame, this);
-
- return 0;
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i] && witness[i])
+ return _gf_true;
+ }
+ return _gf_false;
}
-int
-afr_sh_data_trim_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+static gf_boolean_t
+afr_does_size_mismatch(xlator_t *this, unsigned char *sources,
+ struct afr_reply *replies)
{
- int call_count = 0;
- int child_index = 0;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "ftruncate of %s on subvolume %s failed (%s)",
- local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- sh->op_failed = 1;
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "ftruncate of %s on subvolume %s completed",
- local->loc.path,
- priv->children[child_index]->name);
- }
- }
- UNLOCK (&frame->lock);
+ int i = 0;
+ afr_private_t *priv = NULL;
+ struct iatt *min = NULL;
+ struct iatt *max = NULL;
- call_count = afr_frame_return (frame);
+ priv = this->private;
- if (call_count == 0) {
- if (sh->op_failed)
- afr_sh_data_fail (frame, this);
- else
- afr_sh_data_sync_prepare (frame, this);
- }
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
- return 0;
-}
+ if (replies[i].op_ret < 0)
+ continue;
+ if (!sources[i])
+ continue;
-int
-afr_sh_data_trim_sinks (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t *sh = NULL;
- int *sources = NULL;
- int call_count = 0;
- int i = 0;
+ if (AFR_IS_ARBITER_BRICK(priv, i) && (replies[i].poststat.ia_size == 0))
+ continue;
+ if (!min)
+ min = &replies[i].poststat;
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
+ if (!max)
+ max = &replies[i].poststat;
- sources = sh->sources;
- call_count = sh->active_sinks;
+ if (min->ia_size > replies[i].poststat.ia_size)
+ min = &replies[i].poststat;
- local->call_count = call_count;
+ if (max->ia_size < replies[i].poststat.ia_size)
+ max = &replies[i].poststat;
+ }
- for (i = 0; i < priv->child_count; i++) {
- if (sources[i] || !local->child_up[i])
- continue;
-
- STACK_WIND_COOKIE (frame, afr_sh_data_trim_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->ftruncate,
- sh->healing_fd, sh->file_size,
- NULL);
-
- if (!--call_count)
- break;
- }
+ if (min && max) {
+ if (min->ia_size != max->ia_size)
+ return _gf_true;
+ }
- return 0;
+ return _gf_false;
}
-int
-afr_sh_inode_set_read_ctx (afr_self_heal_t *sh, xlator_t *this)
+static void
+afr_mark_biggest_witness_as_source(xlator_t *this, unsigned char *sources,
+ uint64_t *witness)
{
- afr_private_t *priv = NULL;
- int ret = 0;
- int i = 0;
-
- priv = this->private;
- sh->source = afr_sh_select_source (sh->sources, priv->child_count);
- if (sh->source < 0) {
- ret = -1;
- goto out;
- }
+ int i = 0;
+ afr_private_t *priv = NULL;
+ uint64_t biggest_witness = 0;
- /* detect changes not visible through pending flags -- JIC */
- for (i = 0; i < priv->child_count; i++) {
- if (i == sh->source || sh->child_errno[i])
- continue;
+ priv = this->private;
+ /* Find source with biggest witness count */
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+ if (biggest_witness < witness[i])
+ biggest_witness = witness[i];
+ }
- if (SIZE_DIFFERS (&sh->buf[i], &sh->buf[sh->source]))
- sh->sources[i] = 0;
- }
+ /* Mark files with less witness count as not source */
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+ if (witness[i] < biggest_witness)
+ sources[i] = 0;
+ }
- afr_reset_children (sh->fresh_children, priv->child_count);
- afr_get_fresh_children (sh->success_children, sh->sources,
- sh->fresh_children, priv->child_count);
- afr_inode_set_read_ctx (this, sh->inode, sh->source,
- sh->fresh_children);
-out:
- return ret;
+ return;
}
-void
-afr_sh_data_fix (call_frame_t *frame, xlator_t *this)
+/* This is a tie breaker function. Only one source be assigned here */
+static void
+afr_mark_newest_file_as_source(xlator_t *this, unsigned char *sources,
+ struct afr_reply *replies)
{
- int source = 0;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- source = sh->source;
- sh->block_size = this->ctx->page_size;
- sh->file_size = sh->buf[source].ia_size;
-
- if (FILE_HAS_HOLES (&sh->buf[source]))
- sh->file_has_holes = 1;
-
- if (sh->background && sh->unwind && !sh->unwound) {
- sh->unwind (sh->orig_frame, this, sh->op_ret, sh->op_errno,
- sh->op_failed);
- sh->unwound = _gf_true;
- }
-
- afr_sh_mark_source_sinks (frame, this);
- if (sh->active_sinks == 0) {
- gf_log (this->name, GF_LOG_INFO,
- "no active sinks for performing self-heal on file %s",
- local->loc.path);
- afr_sh_data_finish (frame, this);
- return;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "self-healing file %s from subvolume %s to %d other",
- local->loc.path, priv->children[sh->source]->name,
- sh->active_sinks);
-
- sh->actual_sh_started = _gf_true;
- afr_sh_data_trim_sinks (frame, this);
-}
-
-int
-afr_sh_data_fxattrop_fstat_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int nsources = 0;
- int ret = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- gf_log (this->name, GF_LOG_DEBUG, "Pending matrix for: %s",
- lkowner_utoa (&frame->root->lk_owner));
-
- nsources = afr_build_sources (this, sh->xattr, sh->buf, sh->pending_matrix,
- sh->sources, sh->success_children,
- AFR_DATA_TRANSACTION, NULL, _gf_true);
- if ((nsources == -1)
- && (priv->favorite_child != -1)
- && (sh->child_errno[priv->favorite_child] == 0)) {
-
- gf_log (this->name, GF_LOG_DEBUG,
- "Picking favorite child %s as authentic source to "
- "resolve conflicting data of %s",
- priv->children[priv->favorite_child]->name,
- local->loc.path);
-
- sh->sources[priv->favorite_child] = 1;
-
- nsources = afr_sh_source_count (sh->sources,
- priv->child_count);
- }
+ int i = 0;
+ afr_private_t *priv = NULL;
+ int source = -1;
+ uint32_t max_ctime = 0;
- if (nsources == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "Unable to self-heal contents of '%s' (possible "
- "split-brain). Please delete the file from all but "
- "the preferred subvolume.", local->loc.path);
+ priv = this->private;
+ /* Find source with latest ctime */
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
- sh->data_spb = _gf_true;
- afr_sh_data_fail (frame, this);
- return 0;
+ if (max_ctime <= replies[i].poststat.ia_ctime) {
+ source = i;
+ max_ctime = replies[i].poststat.ia_ctime;
}
+ }
- sh->data_spb = _gf_false;
- ret = afr_sh_inode_set_read_ctx (sh, this);
- if (ret) {
- gf_log (this->name, GF_LOG_DEBUG,
- "No active sources found.");
-
- afr_sh_data_fail (frame, this);
- return 0;
- }
-
- if (sh->sync_done) {
- afr_sh_data_setattr (frame, this);
- } else {
- if (nsources == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "No self-heal needed for %s",
- local->loc.path);
-
- afr_sh_data_finish (frame, this);
- return 0;
- }
-
- afr_sh_data_fix (frame, this);
- }
- return 0;
-}
-
-int
-afr_lookup_select_read_child_by_txn_type (xlator_t *this, afr_local_t *local,
- dict_t **xattr,
- afr_transaction_type txn_type)
-{
- afr_private_t *priv = NULL;
- int read_child = -1;
- int32_t **pending_matrix = NULL;
- int32_t *sources = NULL;
- int32_t *success_children = NULL;
- struct iatt *bufs = NULL;
- int32_t nsources = 0;
- int32_t prev_read_child = -1;
- int32_t config_read_child = -1;
- int32_t subvol_status = 0;
-
- priv = this->private;
- bufs = local->cont.lookup.bufs;
- success_children = local->cont.lookup.success_children;
-
- pending_matrix = afr_matrix_create (priv->child_count,
- priv->child_count);
- if (NULL == pending_matrix)
- goto out;
-
- sources = local->cont.lookup.sources;
- memset (sources, 0, sizeof (*sources) * priv->child_count);
-
- nsources = afr_build_sources (this, xattr, bufs, pending_matrix,
- sources, success_children, txn_type,
- &subvol_status, _gf_false);
- if (subvol_status & SPLIT_BRAIN) {
- gf_log (this->name, GF_LOG_DEBUG, "%s: Possible split-brain",
- local->loc.path);
- switch (txn_type) {
- case AFR_DATA_TRANSACTION:
- local->cont.lookup.possible_spb = _gf_true;
- nsources = 1;
- sources[success_children[0]] = 1;
- break;
- case AFR_ENTRY_TRANSACTION:
- read_child = afr_get_no_xattr_dir_read_child (this,
- success_children,
- bufs);
- sources[read_child] = 1;
- nsources = 1;
- break;
- default:
- break;
- }
- }
- if (nsources < 0)
- goto out;
-
- prev_read_child = local->read_child_index;
- config_read_child = priv->read_child;
- read_child = afr_select_read_child_from_policy (success_children,
- priv->child_count,
- prev_read_child,
- config_read_child,
- sources);
-out:
- afr_matrix_cleanup (pending_matrix, priv->child_count);
- gf_log (this->name, GF_LOG_DEBUG, "returning read_child: %d",
- read_child);
- return read_child;
-}
-
-int
-afr_sh_data_special_file_fix (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- afr_self_heal_t *sh = NULL;
- afr_local_t *local = NULL;
- int i = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- for (i = 0; i < priv->child_count ; i++)
- if (1 == local->child_up[i])
- sh->success[i] = 1;
-
- afr_sh_data_erase_pending (frame, this);
-
- return 0;
+ /* Only mark one of the files as source to break ties */
+ memset(sources, 0, sizeof(*sources) * priv->child_count);
+ sources[source] = 1;
}
-int
-afr_sh_data_fstat_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct iatt *buf, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int call_count = -1;
- int child_index = (long) cookie;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- if (op_ret != -1) {
- gf_log (this->name, GF_LOG_TRACE,
- "fstat of %s on %s succeeded",
- local->loc.path,
- priv->children[child_index]->name);
-
- sh->buf[child_index] = *buf;
- sh->success_children[sh->success_count] = child_index;
- sh->success_count++;
- } else {
- gf_log (this->name, GF_LOG_ERROR, "%s: fstat failed "
- "on %s, reason %s", local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- sh->child_errno[child_index] = op_errno;
- }
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- /* Previous versions of glusterfs might have set
- * the pending data xattrs which need to be erased
- */
- if (!afr_sh_data_proceed (sh->success_count)) {
- gf_log (this->name, GF_LOG_ERROR, "inspecting metadata "
- "succeeded on < %d children, aborting "
- "self-heal for %s", AFR_SH_MIN_PARTICIPANTS,
- local->loc.path);
- afr_sh_data_fail (frame, this);
- goto out;
- }
- if (IA_ISREG (sh->type))
- afr_sh_data_fxattrop_fstat_done (frame, this);
- else
- afr_sh_data_special_file_fix (frame, this);
- }
-out:
- return 0;
-}
-
-
-int
-afr_sh_data_fstat (call_frame_t *frame, xlator_t *this)
-{
- afr_self_heal_t *sh = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int i = 0;
- int child = 0;
- int32_t *fstat_children = NULL;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- fstat_children = memdup (sh->success_children,
- sizeof (*fstat_children) * priv->child_count);
- if (!fstat_children) {
- afr_sh_data_fail (frame, this);
- goto out;
- }
- call_count = sh->success_count;
- local->call_count = call_count;
+static int
+__afr_selfheal_data_finalize_source(
+ call_frame_t *frame, xlator_t *this, inode_t *inode, unsigned char *sources,
+ unsigned char *sinks, unsigned char *healed_sinks, unsigned char *locked_on,
+ unsigned char *undid_pending, struct afr_reply *replies, uint64_t *witness)
+{
+ afr_private_t *priv = NULL;
+ int source = -1;
+ int sources_count = 0;
+ priv = this->private;
+
+ sources_count = AFR_COUNT(sources, priv->child_count);
+
+ if ((AFR_CMP(locked_on, healed_sinks, priv->child_count) == 0) ||
+ !sources_count) {
+ /* split brain */
+ source = afr_mark_split_brain_source_sinks(
+ frame, this, inode, sources, sinks, healed_sinks, locked_on,
+ replies, AFR_DATA_TRANSACTION);
+ if (source < 0) {
+ gf_event(EVENT_AFR_SPLIT_BRAIN,
+ "client-pid=%d;"
+ "subvol=%s;type=data;"
+ "file=%s",
+ this->ctx->cmd_args.client_pid, this->name,
+ uuid_utoa(inode->gfid));
+ return -EIO;
+ }
+
+ _afr_fav_child_reset_sink_xattrs(
+ frame, this, inode, source, healed_sinks, undid_pending,
+ AFR_DATA_TRANSACTION, locked_on, replies);
+ goto out;
+ }
+
+ /* No split brain at this point. If we were called from
+ * afr_heal_splitbrain_file(), abort.*/
+ if (afr_dict_contains_heal_op(frame))
+ return -EIO;
+
+ /* If there are no witnesses/size-mismatches on sources we are done*/
+ if (!afr_does_size_mismatch(this, sources, replies) &&
+ !afr_has_source_witnesses(this, sources, witness))
+ goto out;
+
+ afr_mark_largest_file_as_source(this, sources, replies);
+ afr_mark_biggest_witness_as_source(this, sources, witness);
+ afr_mark_newest_file_as_source(this, sources, replies);
+ if (priv->arbiter_count)
+ /* Choose non-arbiter brick as source for empty files. */
+ afr_mark_source_sinks_if_file_empty(this, sources, sinks, healed_sinks,
+ locked_on, replies,
+ AFR_DATA_TRANSACTION);
- memset (sh->buf, 0, sizeof (*sh->buf) * priv->child_count);
- afr_reset_children (sh->success_children, priv->child_count);
- sh->success_count = 0;
- for (i = 0; i < priv->child_count; i++) {
- child = fstat_children[i];
- if (child == -1)
- break;
- STACK_WIND_COOKIE (frame, afr_sh_data_fstat_cbk,
- (void *) (long) child,
- priv->children[child],
- priv->children[child]->fops->fstat,
- sh->healing_fd, NULL);
- --call_count;
- }
- GF_ASSERT (!call_count);
out:
- if (fstat_children)
- GF_FREE (fstat_children);
- return 0;
-}
+ afr_mark_active_sinks(this, sources, locked_on, healed_sinks);
+ source = afr_choose_source_by_policy(priv, sources, AFR_DATA_TRANSACTION);
-void
-afr_sh_common_fxattrop_resp_handler (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xattr)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int child_index = (long) cookie;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- if (op_ret != -1) {
- gf_log (this->name, GF_LOG_TRACE,
- "fxattrop of %s on %s succeeded",
- local->loc.path,
- priv->children[child_index]->name);
-
- sh->xattr[child_index] = dict_ref (xattr);
- sh->success_children[sh->success_count] = child_index;
- sh->success_count++;
- } else {
- gf_log (this->name, GF_LOG_ERROR, "fxattrop of %s "
- "failed on %s, reason %s", local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- sh->child_errno[child_index] = op_errno;
- }
- }
- UNLOCK (&frame->lock);
+ return source;
}
+/*
+ * __afr_selfheal_data_prepare:
+ *
+ * This function inspects the on-disk xattrs and determines which subvols
+ * are sources and sinks.
+ *
+ * The return value is the index of the subvolume to be used as the source
+ * for self-healing, or -1 if no healing is necessary/split brain.
+ */
int
-afr_sh_data_fxattrop_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xattr, dict_t *xdata)
-{
- int call_count = -1;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
-
- afr_sh_common_fxattrop_resp_handler (frame, cookie, this, op_ret,
- op_errno, xattr);
-
- call_count = afr_frame_return (frame);
- if (call_count == 0) {
- if (!afr_sh_data_proceed (sh->success_count)) {
- gf_log (this->name, GF_LOG_ERROR, "%s, inspecting "
- "change log succeeded on < %d children",
- local->loc.path, AFR_SH_MIN_PARTICIPANTS);
- afr_sh_data_fail (frame, this);
- goto out;
- }
- afr_sh_data_fstat (frame, this);
- }
-out:
- return 0;
-}
-
-
-int
-afr_sh_data_fxattrop (call_frame_t *frame, xlator_t *this)
+__afr_selfheal_data_prepare(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ unsigned char *locked_on, unsigned char *sources,
+ unsigned char *sinks, unsigned char *healed_sinks,
+ unsigned char *undid_pending,
+ struct afr_reply *replies, unsigned char *pflag)
{
- afr_self_heal_t *sh = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- dict_t *xattr_req = NULL;
- int32_t *zero_pending = NULL;
- int call_count = 0;
- int i = 0;
- int ret = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- call_count = afr_up_children_count (local->child_up,
- priv->child_count);
-
- local->call_count = call_count;
-
- xattr_req = dict_new();
- if (!xattr_req) {
- ret = -1;
- goto out;
- }
+ int ret = -1;
+ int source = -1;
+ afr_private_t *priv = NULL;
+ uint64_t *witness = NULL;
- for (i = 0; i < priv->child_count; i++) {
- zero_pending = GF_CALLOC (3, sizeof (*zero_pending),
- gf_afr_mt_int32_t);
- if (!zero_pending) {
- ret = -1;
- goto out;
- }
- ret = dict_set_dynptr (xattr_req, priv->pending_key[i],
- zero_pending,
- 3 * sizeof (*zero_pending));
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "Unable to set dict value");
- goto out;
- } else {
- zero_pending = NULL;
- }
- }
+ priv = this->private;
- afr_reset_xattr (sh->xattr, priv->child_count);
- afr_reset_children (sh->success_children, priv->child_count);
- memset (sh->child_errno, 0,
- sizeof (*sh->child_errno) * priv->child_count);
- sh->success_count = 0;
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_sh_data_fxattrop_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- sh->healing_fd, GF_XATTROP_ADD_ARRAY,
- xattr_req, NULL);
-
- if (!--call_count)
- break;
- }
- }
-
-out:
- if (xattr_req)
- dict_unref (xattr_req);
+ ret = afr_selfheal_unlocked_discover(frame, inode, inode->gfid, replies);
- if (ret) {
- if (zero_pending)
- GF_FREE (zero_pending);
- afr_sh_data_fail (frame, this);
- }
+ if (ret)
+ return ret;
- return 0;
-}
+ witness = alloca0(priv->child_count * sizeof(*witness));
+ ret = afr_selfheal_find_direction(frame, this, replies,
+ AFR_DATA_TRANSACTION, locked_on, sources,
+ sinks, witness, pflag);
+ if (ret)
+ return ret;
-int
-afr_sh_data_big_lock_success (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
+ /* Initialize the healed_sinks[] array optimistically to
+ the intersection of to-be-healed (i.e sinks[]) and
+ the list of servers which are up (i.e locked_on[]).
+ As we encounter failures in the healing process, we
+ will unmark the respective servers in the healed_sinks[]
+ array.
+ */
+ AFR_INTERSECT(healed_sinks, sinks, locked_on, priv->child_count);
- local = frame->local;
- sh = &local->self_heal;
+ source = __afr_selfheal_data_finalize_source(
+ frame, this, inode, sources, sinks, healed_sinks, locked_on,
+ undid_pending, replies, witness);
+ if (source < 0)
+ return -EIO;
- sh->data_lock_held = _gf_true;
- afr_sh_data_fxattrop (frame, this);
- return 0;
+ return source;
}
-int
-afr_sh_data_post_blocking_inodelk_cbk (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- sh = &local->self_heal;
-
- if (int_lock->lock_op_ret < 0) {
- gf_log (this->name, GF_LOG_ERROR, "Blocking data inodelks "
- "failed for %s. by %s",
- local->loc.path, lkowner_utoa (&frame->root->lk_owner));
-
- sh->data_lock_failure_handler (frame, this);
- } else {
+static int
+__afr_selfheal_data(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ unsigned char *locked_on)
+{
+ afr_private_t *priv = NULL;
+ int ret = -1;
+ unsigned char *sources = NULL;
+ unsigned char *sinks = NULL;
+ unsigned char *data_lock = NULL;
+ unsigned char *healed_sinks = NULL;
+ unsigned char *undid_pending = NULL;
+ struct afr_reply *locked_replies = NULL;
+ int source = -1;
+ gf_boolean_t did_sh = _gf_true;
+ gf_boolean_t is_arbiter_the_only_sink = _gf_false;
+ gf_boolean_t empty_file = _gf_false;
+
+ priv = this->private;
+
+ sources = alloca0(priv->child_count);
+ sinks = alloca0(priv->child_count);
+ healed_sinks = alloca0(priv->child_count);
+ data_lock = alloca0(priv->child_count);
+ undid_pending = alloca0(priv->child_count);
+
+ locked_replies = alloca0(sizeof(*locked_replies) * priv->child_count);
+
+ ret = afr_selfheal_inodelk(frame, this, fd->inode, this->name, 0, 0,
+ data_lock);
+ {
+ if (ret < priv->child_count) {
+ gf_msg_debug(this->name, 0,
+ "%s: Skipping "
+ "self-heal as only %d number "
+ "of subvolumes "
+ "could be locked",
+ uuid_utoa(fd->inode->gfid), ret);
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+
+ ret = __afr_selfheal_data_prepare(frame, this, fd->inode, data_lock,
+ sources, sinks, healed_sinks,
+ undid_pending, locked_replies, NULL);
+ if (ret < 0)
+ goto unlock;
+
+ if (AFR_COUNT(healed_sinks, priv->child_count) == 0) {
+ did_sh = _gf_false;
+ goto unlock;
+ }
+
+ source = ret;
+
+ if (AFR_IS_ARBITER_BRICK(priv, source)) {
+ empty_file = afr_is_file_empty_on_all_children(priv,
+ locked_replies);
+ if (empty_file)
+ goto restore_time;
+
+ did_sh = _gf_false;
+ goto unlock;
+ }
+
+ ret = __afr_selfheal_truncate_sinks(
+ frame, this, fd, healed_sinks,
+ locked_replies[source].poststat.ia_size);
+ if (ret < 0)
+ goto unlock;
+
+ if (priv->arbiter_count &&
+ AFR_COUNT(healed_sinks, priv->child_count) == 1 &&
+ healed_sinks[ARBITER_BRICK_INDEX]) {
+ is_arbiter_the_only_sink = _gf_true;
+ goto restore_time;
+ }
+ ret = 0;
+ }
+unlock:
+ afr_selfheal_uninodelk(frame, this, fd->inode, this->name, 0, 0, data_lock);
+ if (ret < 0)
+ goto out;
+
+ if (!did_sh)
+ goto out;
+
+ ret = afr_selfheal_data_do(frame, this, fd, source, healed_sinks,
+ locked_replies);
+ if (ret)
+ goto out;
+restore_time:
+ afr_selfheal_restore_time(frame, this, fd->inode, source, healed_sinks,
+ locked_replies);
+
+ if (!is_arbiter_the_only_sink && !empty_file) {
+ ret = afr_selfheal_inodelk(frame, this, fd->inode, this->name, 0, 0,
+ data_lock);
+ if (ret < priv->child_count) {
+ ret = -ENOTCONN;
+ did_sh = _gf_false;
+ goto skip_undo_pending;
+ }
+ }
+ ret = afr_selfheal_undo_pending(
+ frame, this, fd->inode, sources, sinks, healed_sinks, undid_pending,
+ AFR_DATA_TRANSACTION, locked_replies, data_lock);
+skip_undo_pending:
+ afr_selfheal_uninodelk(frame, this, fd->inode, this->name, 0, 0, data_lock);
+out:
- gf_log (this->name, GF_LOG_DEBUG, "Blocking data inodelks "
- "done for %s by %s. Proceding to self-heal",
- local->loc.path, lkowner_utoa (&frame->root->lk_owner));
+ if (did_sh)
+ afr_log_selfheal(fd->inode->gfid, this, ret, "data", source, sources,
+ healed_sinks);
+ else
+ ret = 1;
- sh->data_lock_success_handler (frame, this);
- }
+ if (locked_replies)
+ afr_replies_wipe(locked_replies, priv->child_count);
- return 0;
+ return ret;
}
int
-afr_sh_data_post_nonblocking_inodelk_cbk (call_frame_t *frame, xlator_t *this)
+afr_selfheal_data_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ dict_t *xdata)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- sh = &local->self_heal;
-
- if (int_lock->lock_op_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG, "Non Blocking data inodelks "
- "failed for %s. by %s",
- local->loc.path, lkowner_utoa (&frame->root->lk_owner));
-
- if (!sh->data_lock_block) {
- sh->data_lock_failure_handler(frame, this);
- } else {
- int_lock->lock_cbk =
- afr_sh_data_post_blocking_inodelk_cbk;
- afr_blocking_lock (frame, this);
- }
- } else {
-
- gf_log (this->name, GF_LOG_DEBUG, "Non Blocking data inodelks "
- "done for %s by %s. Proceeding to self-heal",
- local->loc.path, lkowner_utoa (&frame->root->lk_owner));
- sh->data_lock_success_handler (frame, this);
- }
-
- return 0;
-}
+ afr_local_t *local = NULL;
+ int i = (long)cookie;
-int
-afr_sh_data_lock_rec (call_frame_t *frame, xlator_t *this, off_t start, off_t len)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
+ local = frame->local;
- local = frame->local;
- int_lock = &local->internal_lock;
+ local->replies[i].valid = 1;
+ local->replies[i].op_ret = op_ret;
+ local->replies[i].op_errno = op_errno;
- int_lock->transaction_lk_type = AFR_SELFHEAL_LK;
- int_lock->selfheal_lk_type = AFR_DATA_SELF_HEAL_LK;
+ syncbarrier_wake(&local->barrier);
- afr_set_lock_number (frame, this);
-
- int_lock->lk_flock.l_start = start;
- int_lock->lk_flock.l_len = len;
- int_lock->lk_flock.l_type = F_WRLCK;
- int_lock->lock_cbk = afr_sh_data_post_nonblocking_inodelk_cbk;
-
- afr_nonblocking_inodelk (frame, this);
-
- return 0;
+ return 0;
}
int
-afr_post_sh_big_lock_success (call_frame_t *frame, xlator_t *this)
+afr_selfheal_data_open(xlator_t *this, inode_t *inode, fd_t **fd)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
-
- GF_ASSERT (sh->old_loop_frame);
- sh_loop_finish (sh->old_loop_frame, this);
- sh->old_loop_frame = NULL;
- sh->data_lock_held = _gf_true;
- sh->sync_done = _gf_true;
- afr_sh_data_fxattrop (frame, this);
- return 0;
-}
+ int ret = 0;
+ fd_t *fd_tmp = NULL;
+ loc_t loc = {
+ 0,
+ };
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
-int
-afr_post_sh_big_lock_failure (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
+ priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- GF_ASSERT (sh->old_loop_frame);
- sh_loop_finish (sh->old_loop_frame, this);
- sh->old_loop_frame = NULL;
- afr_sh_set_timestamps (frame, this);
- return 0;
-}
+ fd_tmp = fd_create(inode, 0);
+ if (!fd_tmp)
+ return -ENOMEM;
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
-int
-afr_sh_data_lock (call_frame_t *frame, xlator_t *this,
- off_t start, off_t len, gf_boolean_t block,
- afr_lock_cbk_t success_handler,
- afr_lock_cbk_t failure_handler)
-{
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
+ frame = afr_frame_create(this, &ret);
+ if (!frame) {
+ ret = -ret;
+ fd_unref(fd_tmp);
+ goto out;
+ }
+ local = frame->local;
- local = frame->local;
- sh = &local->self_heal;
+ AFR_ONLIST(local->child_up, frame, afr_selfheal_data_open_cbk, open, &loc,
+ O_RDWR | O_LARGEFILE, fd_tmp, NULL);
- sh->data_lock_success_handler = success_handler;
- sh->data_lock_failure_handler = failure_handler;
- sh->data_lock_block = block;
- return afr_sh_data_lock_rec (frame, this, start, len);
-}
+ ret = -ENOTCONN;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
-int
-afr_sh_data_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int child_index = 0;
- gf_boolean_t block = _gf_true;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- child_index = (long) cookie;
-
- /* TODO: some of the open's might fail.
- In that case, modify cleanup fn to send flush on those
- fd's which are already open */
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "open of %s failed on child %s (%s)",
- local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- sh->op_failed = 1;
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "open of %s succeeded on child %s",
- local->loc.path,
- priv->children[child_index]->name);
- }
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- if (sh->op_failed) {
- afr_sh_data_fail (frame, this);
- return 0;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "fd for %s opened, commencing sync",
- local->loc.path);
-
- /*
- * The read and write self-heal trigger codepaths do not provide
- * an unwind callback. We run a trylock in these codepaths
- * because we are sensitive to locking latency.
- */
- block = sh->unwind ? _gf_true : _gf_false;
- afr_sh_data_lock (frame, this, 0, 0, block,
- afr_sh_data_big_lock_success,
- afr_sh_data_fail);
+ if (local->replies[i].op_ret < 0) {
+ ret = -local->replies[i].op_errno;
+ continue;
}
- return 0;
-}
+ ret = 0;
+ break;
+ }
+ if (ret < 0) {
+ fd_unref(fd_tmp);
+ goto out;
+ } else {
+ fd_bind(fd_tmp);
+ }
+
+ *fd = fd_tmp;
+out:
+ loc_wipe(&loc);
+ if (frame)
+ AFR_STACK_DESTROY(frame);
+ return ret;
+}
int
-afr_sh_data_open (call_frame_t *frame, xlator_t *this)
+afr_selfheal_data(call_frame_t *frame, xlator_t *this, fd_t *fd)
{
- int i = 0;
- int call_count = 0;
- fd_t *fd = NULL;
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ afr_private_t *priv = NULL;
+ unsigned char *locked_on = NULL;
+ int ret = 0;
+ inode_t *inode = fd->inode;
- call_count = afr_up_children_count (local->child_up, priv->child_count);
- local->call_count = call_count;
+ priv = this->private;
- fd = fd_create (local->loc.inode, frame->root->pid);
- sh->healing_fd = fd;
+ locked_on = alloca0(priv->child_count);
- /* open sinks */
- for (i = 0; i < priv->child_count; i++) {
- if(!local->child_up[i])
- continue;
-
- STACK_WIND_COOKIE (frame, afr_sh_data_open_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->open,
- &local->loc,
- O_RDWR|O_LARGEFILE, fd, NULL);
-
- if (!--call_count)
- break;
+ ret = afr_selfheal_tie_breaker_inodelk(frame, this, inode, priv->sh_domain,
+ 0, 0, locked_on);
+ {
+ if (ret < priv->child_count) {
+ gf_msg_debug(this->name, 0,
+ "%s: Skipping "
+ "self-heal as only %d number of "
+ "subvolumes could be locked",
+ uuid_utoa(fd->inode->gfid), ret);
+ /* Either less than two subvols available, or another
+ selfheal (from another server) is in progress. Skip
+ for now in any case there isn't anything to do.
+ */
+ ret = -ENOTCONN;
+ goto unlock;
}
- return 0;
-}
-
-
-int
-afr_self_heal_data (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = this->private;
-
- local = frame->local;
- sh = &local->self_heal;
-
- /* Self-heal completion cbk changes inode split-brain status based on
- * govinda_gOvinda, mdata_spb, data_spb values. Initialize data_spb
- * with current split-brain status. If for some reason self-heal
- * fails(locking phase etc), it makes sure we retain the split-brain
- * status before this self-heal started.
- */
- sh->data_spb = afr_is_split_brain (this, sh->inode);
- if (sh->do_data_self_heal &&
- afr_data_self_heal_enabled (priv->data_self_heal)) {
- afr_sh_data_open (frame, this);
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "not doing data self heal on %s",
- local->loc.path);
- afr_sh_data_done (frame, this);
- }
+ ret = __afr_selfheal_data(frame, this, fd, locked_on);
+ }
+unlock:
+ afr_selfheal_uninodelk(frame, this, inode, priv->sh_domain, 0, 0,
+ locked_on);
- return 0;
+ return ret;
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
index 874e8ede4ea..64893f441e3 100644
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
This file is licensed to you under your choice of the GNU Lesser
@@ -8,2409 +8,1269 @@
cases as published by the Free Software Foundation.
*/
-#include <libgen.h>
-#include <unistd.h>
-#include <fnmatch.h>
-#include <sys/time.h>
-#include <stdlib.h>
-#include <signal.h>
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "inode.h"
#include "afr.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "list.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
-#include "byte-order.h"
-
-#include "afr-transaction.h"
#include "afr-self-heal.h"
-#include "afr-self-heal-common.h"
-
-#define AFR_INIT_SH_FRAME_VALS(_frame, _local, _sh, _sh_frame, _sh_local, _sh_sh)\
- do {\
- _local = _frame->local;\
- _sh = &_local->self_heal;\
- _sh_frame = _sh->sh_frame;\
- _sh_local = _sh_frame->local;\
- _sh_sh = &_sh_local->self_heal;\
- } while (0);
-
-int
-afr_sh_entry_impunge_create_file (call_frame_t *impunge_frame, xlator_t *this,
- int child_index);
-int
-afr_sh_entry_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
-
- sh->completion_cbk (frame, this);
-
- return 0;
-}
-
-
-int
-afr_sh_entry_unlock (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- int_lock->lock_cbk = afr_sh_entry_done;
- afr_unlock (frame, this);
-
- return 0;
-}
-
-
-int
-afr_sh_entry_finish (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- gf_log (this->name, GF_LOG_TRACE,
- "finishing entry selfheal of %s", local->loc.path);
-
- afr_sh_entry_unlock (frame, this);
-
- return 0;
-}
-
-
-int
-afr_sh_entry_erase_pending_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xattr, dict_t *xdata)
-{
- long i = 0;
- int call_count = 0;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_local_t *orig_local = NULL;
- call_frame_t *orig_frame = NULL;
- afr_private_t *priv = NULL;
- int32_t read_child = -1;
-
- local = frame->local;
- priv = this->private;
- sh = &local->self_heal;
- i = (long)cookie;
-
-
- afr_children_add_child (sh->fresh_children, i, priv->child_count);
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_INFO,
- "%s: failed to erase pending xattrs on %s (%s)",
- local->loc.path, priv->children[i]->name,
- strerror (op_errno));
- }
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- if (sh->source == -1) {
- //this happens if the forced merge option is set
- read_child = sh->fresh_children[0];
- } else {
- read_child = sh->source;
- }
- afr_inode_set_read_ctx (this, sh->inode, read_child,
- sh->fresh_children);
- orig_frame = sh->orig_frame;
- orig_local = orig_frame->local;
-
- if (sh->source != -1) {
- orig_local->cont.lookup.buf.ia_nlink = sh->buf[sh->source].ia_nlink;
- }
-
- afr_sh_entry_finish (frame, this);
- }
-
- return 0;
-}
-
+#include <glusterfs/byte-order.h>
+#include "afr-transaction.h"
+#include "afr-messages.h"
+#include <glusterfs/syncop-utils.h>
+#include <glusterfs/events.h>
int
-afr_sh_entry_erase_pending (call_frame_t *frame, xlator_t *this)
+afr_selfheal_entry_anon_inode(xlator_t *this, inode_t *dir, const char *name,
+ inode_t *inode, int child,
+ struct afr_reply *replies,
+ gf_boolean_t *anon_inode)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int i = 0;
- dict_t **erase_xattr = NULL;
- int need_unwind = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- if (sh->entries_skipped) {
- need_unwind = 1;
- sh->op_failed = _gf_true;
- goto out;
- }
- afr_sh_pending_to_delta (priv, sh->xattr, sh->delta_matrix, sh->success,
- priv->child_count, AFR_ENTRY_TRANSACTION);
-
- erase_xattr = GF_CALLOC (sizeof (*erase_xattr), priv->child_count,
- gf_afr_mt_dict_t);
-
- for (i = 0; i < priv->child_count; i++) {
- if (sh->xattr[i]) {
- call_count++;
-
- erase_xattr[i] = get_new_dict();
- dict_ref (erase_xattr[i]);
- }
- }
-
- if (call_count == 0)
- need_unwind = 1;
-
- afr_sh_delta_to_xattr (priv, sh->delta_matrix, erase_xattr,
- priv->child_count, AFR_ENTRY_TRANSACTION);
-
- local->call_count = call_count;
- for (i = 0; i < priv->child_count; i++) {
- if (!erase_xattr[i])
- continue;
-
- gf_log (this->name, GF_LOG_TRACE,
- "erasing pending flags from %s on %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_entry_erase_pending_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->loc,
- GF_XATTROP_ADD_ARRAY, erase_xattr[i],
- NULL);
- if (!--call_count)
- break;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (erase_xattr[i]) {
- dict_unref (erase_xattr[i]);
- }
- }
- GF_FREE (erase_xattr);
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ int ret = 0;
+ int i = 0;
+ char g[64] = {0};
+ unsigned char *lookup_success = NULL;
+ call_frame_t *frame = NULL;
+ loc_t loc2 = {
+ 0,
+ };
+ loc_t loc = {
+ 0,
+ };
+
+ priv = this->private;
+ subvol = priv->children[child];
+ lookup_success = alloca0(priv->child_count);
+ uuid_utoa_r(replies[child].poststat.ia_gfid, g);
+ loc.inode = inode_new(inode->table);
+ if (!loc.inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ if (replies[child].poststat.ia_type == IA_IFDIR) {
+ /* This directory may have sub-directory hierarchy which may need to
+ * be preserved for subsequent heals. So unconditionally move the
+ * directory to anonymous-inode directory*/
+ *anon_inode = _gf_true;
+ goto anon_inode;
+ }
+
+ frame = afr_frame_create(this, &ret);
+ if (!frame) {
+ ret = -ret;
+ goto out;
+ }
+ local = frame->local;
+ gf_uuid_copy(loc.gfid, replies[child].poststat.ia_gfid);
+ AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup, &loc,
+ NULL);
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].op_ret == 0) {
+ lookup_success[i] = 1;
+ } else if (local->replies[i].op_errno != ENOENT &&
+ local->replies[i].op_errno != ESTALE) {
+ ret = -local->replies[i].op_errno;
+ }
+ }
+
+ if (priv->quorum_count) {
+ if (afr_has_quorum(lookup_success, this, NULL)) {
+ *anon_inode = _gf_true;
+ }
+ } else if (AFR_COUNT(lookup_success, priv->child_count) > 1) {
+ *anon_inode = _gf_true;
+ } else if (ret) {
+ goto out;
+ }
+
+anon_inode:
+ if (!*anon_inode) {
+ ret = 0;
+ goto out;
+ }
+
+ loc.parent = inode_ref(dir);
+ gf_uuid_copy(loc.pargfid, dir->gfid);
+ loc.name = name;
+
+ ret = afr_anon_inode_create(this, child, &loc2.parent);
+ if (ret < 0)
+ goto out;
+
+ loc2.name = g;
+ ret = syncop_rename(subvol, &loc, &loc2, NULL, NULL);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, AFR_MSG_EXPUNGING_FILE_OR_DIR,
+ "Rename to %s dir %s/%s (%s) on %s failed",
+ priv->anon_inode_name, uuid_utoa(dir->gfid), name, g,
+ subvol->name);
+ } else {
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR,
+ "Rename to %s dir %s/%s (%s) on %s successful",
+ priv->anon_inode_name, uuid_utoa(dir->gfid), name, g,
+ subvol->name);
+ }
out:
- if (need_unwind)
- afr_sh_entry_finish (frame, this);
+ loc_wipe(&loc);
+ loc_wipe(&loc2);
+ if (frame) {
+ AFR_STACK_DESTROY(frame);
+ }
- return 0;
+ return ret;
}
-
-
-static int
-next_active_source (call_frame_t *frame, xlator_t *this,
- int current_active_source)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int source = -1;
- int next_active_source = -1;
- int i = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- source = sh->source;
-
- if (source != -1) {
- if (current_active_source != source)
- next_active_source = source;
- goto out;
- }
-
- /*
- the next active sink becomes the source for the
- 'conservative decision' of merging all entries
- */
-
- for (i = 0; i < priv->child_count; i++) {
- if ((sh->sources[i] == 0)
- && (local->child_up[i] == 1)
- && (i > current_active_source)) {
-
- next_active_source = i;
- break;
- }
- }
-out:
- return next_active_source;
-}
-
-
-
-static int
-next_active_sink (call_frame_t *frame, xlator_t *this,
- int current_active_sink)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int next_active_sink = -1;
- int i = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- /*
- the next active sink becomes the source for the
- 'conservative decision' of merging all entries
- */
-
- for (i = 0; i < priv->child_count; i++) {
- if ((sh->sources[i] == 0)
- && (local->child_up[i] == 1)
- && (i > current_active_sink)) {
-
- next_active_sink = i;
- break;
- }
- }
-
- return next_active_sink;
-}
-
-int
-afr_sh_entry_impunge_all (call_frame_t *frame, xlator_t *this);
-
-int
-afr_sh_entry_impunge_subvol (call_frame_t *frame, xlator_t *this);
-
-int
-afr_sh_entry_expunge_all (call_frame_t *frame, xlator_t *this);
-
-int
-afr_sh_entry_expunge_subvol (call_frame_t *frame, xlator_t *this,
- int active_src);
-
-int
-afr_sh_entry_expunge_entry_done (call_frame_t *frame, xlator_t *this,
- int active_src, int32_t op_ret,
- int32_t op_errno)
-{
- int call_count = 0;
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- afr_sh_entry_expunge_subvol (frame, this, active_src);
-
- return 0;
-}
-
-int
-afr_sh_entry_expunge_parent_setattr_cbk (call_frame_t *expunge_frame,
- void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop,
- dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- afr_local_t *expunge_local = NULL;
- afr_self_heal_t *expunge_sh = NULL;
- call_frame_t *frame = NULL;
- int active_src = (long) cookie;
- afr_self_heal_t *sh = NULL;
- afr_local_t *local = NULL;
-
- priv = this->private;
- expunge_local = expunge_frame->local;
- expunge_sh = &expunge_local->self_heal;
- frame = expunge_sh->sh_frame;
- local = frame->local;
- sh = &local->self_heal;
-
- if (op_ret != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "setattr on parent directory of %s on subvolume %s failed: %s",
- expunge_local->loc.path,
- priv->children[active_src]->name, strerror (op_errno));
- }
-
- AFR_STACK_DESTROY (expunge_frame);
- sh->expunge_done (frame, this, active_src, op_ret, op_errno);
-
- return 0;
-}
-
-
-int
-afr_sh_entry_expunge_remove_cbk (call_frame_t *expunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- afr_local_t *expunge_local = NULL;
- afr_self_heal_t *expunge_sh = NULL;
- int active_src = 0;
- int32_t valid = 0;
-
- priv = this->private;
- expunge_local = expunge_frame->local;
- expunge_sh = &expunge_local->self_heal;
-
- active_src = (long) cookie;
-
- if (op_ret == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "removed %s on %s",
- expunge_local->loc.path,
- priv->children[active_src]->name);
- } else {
- gf_log (this->name, GF_LOG_INFO,
- "removing %s on %s failed (%s)",
- expunge_local->loc.path,
- priv->children[active_src]->name,
- strerror (op_errno));
- }
-
- valid = GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME;
-
- STACK_WIND_COOKIE (expunge_frame, afr_sh_entry_expunge_parent_setattr_cbk,
- (void *) (long) active_src,
- priv->children[active_src],
- priv->children[active_src]->fops->setattr,
- &expunge_sh->parent_loc,
- &expunge_sh->parentbuf,
- valid, NULL);
-
- return 0;
-}
-
-
-int
-afr_sh_entry_expunge_unlink (call_frame_t *expunge_frame, xlator_t *this,
- int active_src)
-{
- afr_private_t *priv = NULL;
- afr_local_t *expunge_local = NULL;
-
- priv = this->private;
- expunge_local = expunge_frame->local;
-
- gf_log (this->name, GF_LOG_TRACE,
- "expunging file %s on %s",
- expunge_local->loc.path, priv->children[active_src]->name);
-
- STACK_WIND_COOKIE (expunge_frame, afr_sh_entry_expunge_remove_cbk,
- (void *) (long) active_src,
- priv->children[active_src],
- priv->children[active_src]->fops->unlink,
- &expunge_local->loc, 0, NULL);
-
- return 0;
-}
-
-
-
int
-afr_sh_entry_expunge_rmdir (call_frame_t *expunge_frame, xlator_t *this,
- int active_src)
+afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name,
+ inode_t *inode, int child, struct afr_reply *replies)
{
- afr_private_t *priv = NULL;
- afr_local_t *expunge_local = NULL;
-
- priv = this->private;
- expunge_local = expunge_frame->local;
-
- gf_log (this->name, GF_LOG_DEBUG,
- "expunging directory %s on %s",
- expunge_local->loc.path, priv->children[active_src]->name);
-
- STACK_WIND_COOKIE (expunge_frame, afr_sh_entry_expunge_remove_cbk,
- (void *) (long) active_src,
- priv->children[active_src],
- priv->children[active_src]->fops->rmdir,
- &expunge_local->loc, 1, NULL);
-
- return 0;
-}
-
-
-int
-afr_sh_entry_expunge_remove (call_frame_t *expunge_frame, xlator_t *this,
- int active_src, struct iatt *buf,
- struct iatt *parentbuf)
-{
- afr_private_t *priv = NULL;
- afr_local_t *expunge_local = NULL;
- afr_self_heal_t *expunge_sh = NULL;
- call_frame_t *frame = NULL;
- int type = 0;
- afr_self_heal_t *sh = NULL;
- afr_local_t *local = NULL;
- loc_t *loc = NULL;
-
- priv = this->private;
- expunge_local = expunge_frame->local;
- expunge_sh = &expunge_local->self_heal;
- frame = expunge_sh->sh_frame;
- local = frame->local;
- sh = &local->self_heal;
- loc = &expunge_local->loc;
-
- type = buf->ia_type;
- if (loc->parent && uuid_is_null (loc->parent->gfid))
- uuid_copy (loc->pargfid, parentbuf->ia_gfid);
-
- switch (type) {
- case IA_IFSOCK:
- case IA_IFREG:
- case IA_IFBLK:
- case IA_IFCHR:
- case IA_IFIFO:
- case IA_IFLNK:
- afr_sh_entry_expunge_unlink (expunge_frame, this, active_src);
- break;
+ char g[64] = {0};
+ afr_private_t *priv = NULL;
+ xlator_t *subvol = NULL;
+ int ret = 0;
+ loc_t loc = {
+ 0,
+ };
+ gf_boolean_t anon_inode = _gf_false;
+
+ priv = this->private;
+ subvol = priv->children[child];
+
+ if ((!replies[child].valid) || (replies[child].op_ret < 0)) {
+ /*Nothing to do*/
+ ret = 0;
+ goto out;
+ }
+
+ if (priv->use_anon_inode) {
+ ret = afr_selfheal_entry_anon_inode(this, dir, name, inode, child,
+ replies, &anon_inode);
+ if (ret < 0 || anon_inode)
+ goto out;
+ }
+
+ loc.parent = inode_ref(dir);
+ loc.inode = inode_new(inode->table);
+ if (!loc.inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ loc.name = name;
+ switch (replies[child].poststat.ia_type) {
case IA_IFDIR:
- afr_sh_entry_expunge_rmdir (expunge_frame, this, active_src);
- break;
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR,
+ "expunging dir %s/%s (%s) on %s", uuid_utoa(dir->gfid), name,
+ uuid_utoa_r(replies[child].poststat.ia_gfid, g),
+ subvol->name);
+ ret = syncop_rmdir(subvol, &loc, 1, NULL, NULL);
+ break;
default:
- gf_log (this->name, GF_LOG_ERROR,
- "%s has unknown file type on %s: 0%o",
- expunge_local->loc.path,
- priv->children[active_src]->name, type);
- goto out;
- break;
- }
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR,
+ "expunging file %s/%s (%s) on %s", uuid_utoa(dir->gfid),
+ name, uuid_utoa_r(replies[child].poststat.ia_gfid, g),
+ subvol->name);
+ ret = syncop_unlink(subvol, &loc, NULL, NULL);
+ break;
+ }
- return 0;
out:
- AFR_STACK_DESTROY (expunge_frame);
- sh->expunge_done (frame, this, active_src, -1, EINVAL);
-
- return 0;
+ loc_wipe(&loc);
+ return ret;
}
-
int
-afr_sh_entry_expunge_lookup_cbk (call_frame_t *expunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf, dict_t *x,
- struct iatt *postparent)
+afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source,
+ unsigned char *sources, inode_t *dir,
+ const char *name, inode_t *inode,
+ struct afr_reply *replies)
{
- afr_private_t *priv = NULL;
- afr_local_t *expunge_local = NULL;
- afr_self_heal_t *expunge_sh = NULL;
- call_frame_t *frame = NULL;
- int active_src = 0;
- afr_self_heal_t *sh = NULL;
- afr_local_t *local = NULL;
-
- priv = this->private;
- expunge_local = expunge_frame->local;
- expunge_sh = &expunge_local->self_heal;
- frame = expunge_sh->sh_frame;
- active_src = (long) cookie;
- local = frame->local;
- sh = &local->self_heal;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "lookup of %s on %s failed (%s)",
- expunge_local->loc.path,
- priv->children[active_src]->name,
- strerror (op_errno));
+ int ret = 0;
+ loc_t loc = {
+ 0,
+ };
+ loc_t srcloc = {
+ 0,
+ };
+ loc_t anonloc = {
+ 0,
+ };
+ xlator_t *this = frame->this;
+ afr_private_t *priv = NULL;
+ dict_t *xdata = NULL;
+ struct iatt *iatt = NULL;
+ char *linkname = NULL;
+ mode_t mode = 0;
+ struct iatt newent = {
+ 0,
+ };
+ unsigned char *newentry = NULL;
+ char iatt_uuid_str[64] = {0};
+ char dir_uuid_str[64] = {0};
+
+ priv = this->private;
+ iatt = &replies[source].poststat;
+ uuid_utoa_r(iatt->ia_gfid, iatt_uuid_str);
+ if (iatt->ia_type == IA_INVAL || gf_uuid_is_null(iatt->ia_gfid)) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SELF_HEAL_FAILED,
+ "Invalid ia_type (%d) or gfid(%s). source brick=%d, "
+ "pargfid=%s, name=%s",
+ iatt->ia_type, iatt_uuid_str, source,
+ uuid_utoa_r(dir->gfid, dir_uuid_str), name);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ xdata = dict_new();
+ if (!xdata)
+ return -ENOMEM;
+ newentry = alloca0(priv->child_count);
+ loc.parent = inode_ref(dir);
+ gf_uuid_copy(loc.pargfid, dir->gfid);
+ loc.name = name;
+ loc.inode = inode_ref(inode);
+
+ ret = afr_selfheal_entry_delete(this, dir, name, inode, dst, replies);
+ if (ret)
+ goto out;
+
+ ret = dict_set_gfuuid(xdata, "gfid-req", replies[source].poststat.ia_gfid,
+ true);
+ if (ret)
+ goto out;
+
+ srcloc.inode = inode_ref(inode);
+ gf_uuid_copy(srcloc.gfid, iatt->ia_gfid);
+ ret = syncop_lookup(priv->children[dst], &srcloc, 0, 0, 0, 0);
+ if (ret == -ENOENT || ret == -ESTALE) {
+ newentry[dst] = 1;
+ ret = afr_selfheal_newentry_mark(frame, this, inode, source, replies,
+ sources, newentry);
+ if (ret)
+ goto out;
+ } else if (ret == 0 && iatt->ia_type == IA_IFDIR && priv->use_anon_inode) {
+ // Try rename from hidden directory
+ ret = afr_anon_inode_create(this, dst, &anonloc.parent);
+ if (ret < 0)
+ goto out;
+ anonloc.inode = inode_ref(inode);
+ anonloc.name = iatt_uuid_str;
+ ret = syncop_rename(priv->children[dst], &anonloc, &loc, NULL, NULL);
+ if (ret == -ENOENT || ret == -ESTALE)
+ ret = -1; /*This sets 'mismatch' to true*/
+ goto out;
+ }
+
+ mode = st_mode_from_ia(iatt->ia_prot, iatt->ia_type);
+
+ switch (iatt->ia_type) {
+ case IA_IFDIR:
+ ret = syncop_mkdir(priv->children[dst], &loc, mode, 0, xdata, NULL);
+ break;
+ case IA_IFLNK:
+ if (!newentry[dst]) {
+ ret = syncop_link(priv->children[dst], &srcloc, &loc, &newent,
+ NULL, NULL);
+ } else {
+ ret = syncop_readlink(priv->children[source], &srcloc,
+ &linkname, 4096, NULL, NULL);
+ if (ret <= 0)
+ goto out;
+ ret = syncop_symlink(priv->children[dst], &loc, linkname, NULL,
+ xdata, NULL);
+ }
+ break;
+ default:
+ ret = dict_set_int32_sizen(xdata, GLUSTERFS_INTERNAL_FOP_KEY, 1);
+ if (ret)
goto out;
- }
-
- afr_sh_entry_expunge_remove (expunge_frame, this, active_src, buf,
- postparent);
+ ret = syncop_mknod(
+ priv->children[dst], &loc, mode,
+ makedev(ia_major(iatt->ia_rdev), ia_minor(iatt->ia_rdev)),
+ &newent, xdata, NULL);
+ break;
+ }
- return 0;
out:
- AFR_STACK_DESTROY (expunge_frame);
- sh->expunge_done (frame, this, active_src, op_ret, op_errno);
-
- return 0;
+ if (xdata)
+ dict_unref(xdata);
+ GF_FREE(linkname);
+ loc_wipe(&loc);
+ loc_wipe(&srcloc);
+ loc_wipe(&anonloc);
+ return ret;
}
-
-int
-afr_sh_entry_expunge_purge (call_frame_t *expunge_frame, xlator_t *this,
- int active_src)
-{
- afr_private_t *priv = NULL;
- afr_local_t *expunge_local = NULL;
-
- priv = this->private;
- expunge_local = expunge_frame->local;
-
- gf_log (this->name, GF_LOG_TRACE,
- "looking up %s on %s",
- expunge_local->loc.path, priv->children[active_src]->name);
-
- STACK_WIND_COOKIE (expunge_frame, afr_sh_entry_expunge_lookup_cbk,
- (void *) (long) active_src,
- priv->children[active_src],
- priv->children[active_src]->fops->lookup,
- &expunge_local->loc, NULL);
-
- return 0;
-}
-
-int
-afr_sh_entry_expunge_entry_cbk (call_frame_t *expunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf, dict_t *x,
- struct iatt *postparent)
+static int
+__afr_selfheal_heal_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ char *name, inode_t *inode, int source,
+ unsigned char *sources, unsigned char *healed_sinks,
+ unsigned char *locked_on, struct afr_reply *replies)
{
- afr_private_t *priv = NULL;
- afr_local_t *expunge_local = NULL;
- afr_self_heal_t *expunge_sh = NULL;
- int source = 0;
- call_frame_t *frame = NULL;
- int active_src = 0;
- int need_expunge = 0;
- afr_self_heal_t *sh = NULL;
- afr_local_t *local = NULL;
-
- priv = this->private;
- expunge_local = expunge_frame->local;
- expunge_sh = &expunge_local->self_heal;
- frame = expunge_sh->sh_frame;
- active_src = expunge_sh->active_source;
- source = (long) cookie;
- local = frame->local;
- sh = &local->self_heal;
-
- if (op_ret == -1 && op_errno == ENOENT)
- need_expunge = 1;
- else if (op_ret == -1)
- goto out;
-
- if (!uuid_is_null (expunge_sh->entrybuf.ia_gfid) &&
- !uuid_is_null (buf->ia_gfid) &&
- (uuid_compare (expunge_sh->entrybuf.ia_gfid, buf->ia_gfid) != 0)) {
- char uuidbuf1[64];
- char uuidbuf2[64];
- gf_log (this->name, GF_LOG_DEBUG,
- "entry %s found on %s with mismatching gfid (%s/%s)",
- expunge_local->loc.path,
- priv->children[source]->name,
- uuid_utoa_r (expunge_sh->entrybuf.ia_gfid, uuidbuf1),
- uuid_utoa_r (buf->ia_gfid, uuidbuf2));
- need_expunge = 1;
- }
+ int ret = 0;
+ afr_private_t *priv = NULL;
+ int i = 0;
- if (need_expunge) {
- gf_log (this->name, GF_LOG_INFO,
- "Entry %s is missing on %s and deleting from "
- "replica's other bricks",
- expunge_local->loc.path,
- priv->children[source]->name);
+ priv = this->private;
- if (postparent)
- expunge_sh->parentbuf = *postparent;
+ if (!replies[source].valid)
+ return -EIO;
- afr_sh_entry_expunge_purge (expunge_frame, this, active_src);
-
- return 0;
- }
+ /* Skip healing this entry if the last lookup on it failed for reasons
+ * other than ENOENT.
+ */
+ if ((replies[source].op_ret < 0) && (replies[source].op_errno != ENOENT))
+ return -replies[source].op_errno;
-out:
- if (op_ret == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "%s exists under %s",
- expunge_local->loc.path,
- priv->children[source]->name);
+ if (replies[source].op_ret == 0) {
+ ret = afr_lookup_and_heal_gfid(this, fd->inode, name, inode, replies,
+ source, sources,
+ &replies[source].poststat.ia_gfid, NULL);
+ if (ret)
+ return ret;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!healed_sinks[i])
+ continue;
+ if (replies[source].op_ret == -1 &&
+ replies[source].op_errno == ENOENT) {
+ ret = afr_selfheal_entry_delete(this, fd->inode, name, inode, i,
+ replies);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "looking up %s under %s failed (%s)",
- expunge_local->loc.path,
- priv->children[source]->name,
- strerror (op_errno));
- }
+ if (!gf_uuid_compare(replies[i].poststat.ia_gfid,
+ replies[source].poststat.ia_gfid))
+ continue;
- AFR_STACK_DESTROY (expunge_frame);
- sh->expunge_done (frame, this, active_src, op_ret, op_errno);
-
- return 0;
-}
-
-
-int
-afr_sh_entry_expunge_entry (call_frame_t *frame, xlator_t *this,
- gf_dirent_t *entry)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int ret = -1;
- call_frame_t *expunge_frame = NULL;
- afr_local_t *expunge_local = NULL;
- afr_self_heal_t *expunge_sh = NULL;
- int active_src = 0;
- int source = 0;
- int op_errno = 0;
- char *name = NULL;
- int op_ret = -1;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- active_src = sh->active_source;
- source = sh->source;
- sh->expunge_done = afr_sh_entry_expunge_entry_done;
-
- name = entry->d_name;
-
- if ((strcmp (name, ".") == 0)
- || (strcmp (name, "..") == 0)
- || ((strcmp (local->loc.path, "/") == 0)
- && (strcmp (name, GF_REPLICATE_TRASH_DIR) == 0))) {
-
- gf_log (this->name, GF_LOG_TRACE,
- "skipping inspection of %s under %s",
- name, local->loc.path);
- op_ret = 0;
- goto out;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "inspecting existence of %s under %s",
- name, local->loc.path);
-
- expunge_frame = copy_frame (frame);
- if (!expunge_frame) {
- op_errno = ENOMEM;
- goto out;
- }
-
- AFR_LOCAL_ALLOC_OR_GOTO (expunge_local, out);
-
- expunge_frame->local = expunge_local;
- expunge_sh = &expunge_local->self_heal;
- expunge_sh->sh_frame = frame;
- expunge_sh->active_source = active_src;
- expunge_sh->entrybuf = entry->d_stat;
- loc_copy (&expunge_sh->parent_loc, &local->loc);
-
- ret = afr_build_child_loc (this, &expunge_local->loc, &local->loc,
- name);
- if (ret != 0) {
- op_errno = EINVAL;
- goto out;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "looking up %s on %s", expunge_local->loc.path,
- priv->children[source]->name);
-
- STACK_WIND_COOKIE (expunge_frame,
- afr_sh_entry_expunge_entry_cbk,
- (void *) (long) source,
- priv->children[source],
- priv->children[source]->fops->lookup,
- &expunge_local->loc, NULL);
-
- ret = 0;
-out:
- if (ret == -1)
- sh->expunge_done (frame, this, active_src, op_ret, op_errno);
-
- return 0;
-}
-
-
-int
-afr_sh_entry_expunge_readdir_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- gf_dirent_t *entries, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- gf_dirent_t *entry = NULL;
- off_t last_offset = 0;
- int active_src = 0;
- int entry_count = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- active_src = sh->active_source;
-
- if (op_ret <= 0) {
- if (op_ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "readdir of %s on subvolume %s failed (%s)",
- local->loc.path,
- priv->children[active_src]->name,
- strerror (op_errno));
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "readdir of %s on subvolume %s complete",
- local->loc.path,
- priv->children[active_src]->name);
- }
-
- afr_sh_entry_expunge_all (frame, this);
- return 0;
+ ret = afr_selfheal_recreate_entry(frame, i, source, sources,
+ fd->inode, name, inode, replies);
}
+ if (ret < 0)
+ break;
+ }
- list_for_each_entry (entry, &entries->list, list) {
- last_offset = entry->d_off;
- entry_count++;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "readdir'ed %d entries from %s",
- entry_count, priv->children[active_src]->name);
-
- sh->offset = last_offset;
- local->call_count = entry_count;
-
- list_for_each_entry (entry, &entries->list, list) {
- afr_sh_entry_expunge_entry (frame, this, entry);
- }
-
- return 0;
+ return ret;
}
-int
-afr_sh_entry_expunge_subvol (call_frame_t *frame, xlator_t *this,
- int active_src)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- STACK_WIND (frame, afr_sh_entry_expunge_readdir_cbk,
- priv->children[active_src],
- priv->children[active_src]->fops->readdirp,
- sh->healing_fd, sh->block_size, sh->offset, NULL);
-
- return 0;
-}
-
-
-int
-afr_sh_entry_expunge_all (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int active_src = -1;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- sh->offset = 0;
-
- if (sh->source == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no active sources for %s to expunge entries",
- local->loc.path);
- goto out;
- }
-
- active_src = next_active_sink (frame, this, sh->active_source);
- sh->active_source = active_src;
-
- if (sh->op_failed) {
- goto out;
- }
-
- if (active_src == -1) {
- /* completed creating missing files on all subvolumes */
- goto out;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "expunging entries of %s on %s to other sinks",
- local->loc.path, priv->children[active_src]->name);
-
- afr_sh_entry_expunge_subvol (frame, this, active_src);
-
- return 0;
-out:
- afr_sh_entry_impunge_all (frame, this);
- return 0;
-
-}
-
-
-int
-afr_sh_entry_impunge_entry_done (call_frame_t *frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- int call_count = 0;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
- if (op_ret < 0)
- sh->entries_skipped = _gf_true;
- call_count = afr_frame_return (frame);
- if (call_count == 0)
- afr_sh_entry_impunge_subvol (frame, this);
-
- return 0;
-}
-
-void
-afr_sh_entry_call_impunge_done (call_frame_t *impunge_frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+static int
+afr_selfheal_detect_gfid_and_type_mismatch(xlator_t *this,
+ struct afr_reply *replies,
+ inode_t *inode, uuid_t pargfid,
+ char *bname, int src_idx,
+ unsigned char *locked_on, int *src)
{
- afr_local_t *impunge_local = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- call_frame_t *frame = NULL;
-
- AFR_INIT_SH_FRAME_VALS (impunge_frame, impunge_local, impunge_sh,
- frame, local, sh);
-
- AFR_STACK_DESTROY (impunge_frame);
- sh->impunge_done (frame, this, op_ret, op_errno);
+ int i = 0;
+ int ret = -1;
+ afr_private_t *priv = NULL;
+ void *gfid = NULL;
+ ia_type_t ia_type = IA_INVAL;
+
+ priv = this->private;
+ gfid = &replies[src_idx].poststat.ia_gfid;
+ ia_type = replies[src_idx].poststat.ia_type;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (i == src_idx)
+ continue;
+
+ if (!replies[i].valid)
+ continue;
+
+ if (replies[i].op_ret != 0)
+ continue;
+
+ if (gf_uuid_is_null(replies[i].poststat.ia_gfid))
+ continue;
+
+ if (replies[i].poststat.ia_type == IA_INVAL)
+ continue;
+
+ if (ia_type == IA_INVAL || gf_uuid_is_null(gfid)) {
+ src_idx = i;
+ ia_type = replies[src_idx].poststat.ia_type;
+ gfid = &replies[src_idx].poststat.ia_gfid;
+ continue;
+ }
+
+ if (gf_uuid_compare(gfid, replies[i].poststat.ia_gfid) &&
+ (ia_type == replies[i].poststat.ia_type)) {
+ ret = afr_gfid_split_brain_source(this, replies, inode, pargfid,
+ bname, src_idx, i, locked_on, src,
+ NULL);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
+ "Skipping conservative merge on the "
+ "file.");
+ return ret;
+ }
+
+ if (ia_type != replies[i].poststat.ia_type) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
+ "Type mismatch detected "
+ "for <gfid:%s>/%s>, %s on %s and %s on %s. "
+ "Skipping conservative merge on the file.",
+ uuid_utoa(pargfid), bname,
+ gf_inode_type_to_str(replies[i].poststat.ia_type),
+ priv->children[i]->name,
+ gf_inode_type_to_str(replies[src_idx].poststat.ia_type),
+ priv->children[src_idx]->name);
+ gf_event(EVENT_AFR_SPLIT_BRAIN,
+ "client-pid=%d;"
+ "subvol=%s;type=file;"
+ "file=<gfid:%s>/%s>;count=2;child-%d=%s;type-"
+ "%d=%s;child-%d=%s;type-%d=%s",
+ this->ctx->cmd_args.client_pid, this->name,
+ uuid_utoa(pargfid), bname, i, priv->children[i]->name, i,
+ gf_inode_type_to_str(replies[i].poststat.ia_type), src_idx,
+ priv->children[src_idx]->name, src_idx,
+ gf_inode_type_to_str(replies[src_idx].poststat.ia_type));
+ return -1;
+ }
+ }
+
+ return 0;
}
-int
-afr_sh_entry_impunge_setattr_cbk (call_frame_t *impunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop,
- dict_t *xdata)
+static int
+__afr_selfheal_merge_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ char *name, inode_t *inode, unsigned char *sources,
+ unsigned char *healed_sinks,
+ unsigned char *locked_on, struct afr_reply *replies)
{
- int call_count = 0;
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- int child_index = 0;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- child_index = (long) cookie;
-
- if (op_ret == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "setattr done for %s on %s",
- impunge_local->loc.path,
- priv->children[child_index]->name);
- } else {
- gf_log (this->name, GF_LOG_INFO,
- "setattr (%s) on %s failed (%s)",
- impunge_local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- }
-
- call_count = afr_frame_return (impunge_frame);
- if (call_count == 0) {
- afr_sh_entry_call_impunge_done (impunge_frame, this,
- 0, op_errno);
- }
+ int ret = 0;
+ int i = 0;
+ int source = -1;
+ int src = -1;
+ afr_private_t *priv = NULL;
- return 0;
-}
+ priv = this->private;
-int
-afr_sh_entry_impunge_parent_setattr_cbk (call_frame_t *setattr_frame,
- void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop,
- dict_t *xdata)
-{
- int call_count = 0;
- afr_local_t *setattr_local = NULL;
-
- setattr_local = setattr_frame->local;
- if (op_ret != 0) {
- gf_log (this->name, GF_LOG_INFO,
- "setattr on parent directory (%s) failed: %s",
- setattr_local->loc.path, strerror (op_errno));
+ for (i = 0; i < priv->child_count; i++) {
+ if (replies[i].valid && replies[i].op_ret == 0) {
+ source = i;
+ break;
}
+ }
- call_count = afr_frame_return (setattr_frame);
- if (call_count == 0)
- AFR_STACK_DESTROY (setattr_frame);
+ if (source == -1) {
+ /* entry got deleted in the mean time? */
return 0;
-}
-
-int
-afr_sh_entry_impunge_setattr (call_frame_t *impunge_frame, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_local_t *setattr_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- call_frame_t *setattr_frame = NULL;
- int32_t valid = 0;
- int32_t op_errno = 0;
- int child_index = 0;
- int call_count = 0;
- int i = 0;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
-
- gf_log (this->name, GF_LOG_DEBUG,
- "setting ownership of %s on %s to %d/%d",
- impunge_local->loc.path,
- priv->children[child_index]->name,
- impunge_sh->entrybuf.ia_uid,
- impunge_sh->entrybuf.ia_gid);
-
- setattr_frame = copy_frame (impunge_frame);
- if (!setattr_frame) {
- op_errno = ENOMEM;
- goto out;
- }
- AFR_LOCAL_ALLOC_OR_GOTO (setattr_frame->local, out);
- setattr_local = setattr_frame->local;
- call_count = afr_errno_count (NULL, impunge_sh->child_errno,
- priv->child_count, 0);
- loc_copy (&setattr_local->loc, &impunge_sh->parent_loc);
- impunge_local->call_count = call_count;
- setattr_local->call_count = call_count;
+ }
+
+ /* Set all the sources as 1, otheriwse newentry_mark won't be set */
+ for (i = 0; i < priv->child_count; i++) {
+ if (replies[i].valid && replies[i].op_ret == 0) {
+ sources[i] = 1;
+ }
+ }
+
+ ret = afr_lookup_and_heal_gfid(this, fd->inode, name, inode, replies,
+ source, sources,
+ &replies[source].poststat.ia_gfid, NULL);
+ if (ret)
+ return ret;
+
+ /* In case of type mismatch / unable to resolve gfid mismatch on the
+ * entry, return -1.*/
+ ret = afr_selfheal_detect_gfid_and_type_mismatch(
+ this, replies, inode, fd->inode->gfid, name, source, locked_on, &src);
+
+ if (ret < 0)
+ return ret;
+ if (src != -1) {
+ source = src;
for (i = 0; i < priv->child_count; i++) {
- if (impunge_sh->child_errno[i])
- continue;
- valid = GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME;
- STACK_WIND_COOKIE (setattr_frame,
- afr_sh_entry_impunge_parent_setattr_cbk,
- (void *) (long) i, priv->children[i],
- priv->children[i]->fops->setattr,
- &setattr_local->loc,
- &impunge_sh->parentbuf, valid, NULL);
-
- valid = GF_SET_ATTR_UID | GF_SET_ATTR_GID |
- GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME;
- STACK_WIND_COOKIE (impunge_frame,
- afr_sh_entry_impunge_setattr_cbk,
- (void *) (long) i, priv->children[i],
- priv->children[i]->fops->setattr,
- &impunge_local->loc,
- &impunge_sh->entrybuf, valid, NULL);
- call_count--;
- }
- GF_ASSERT (!call_count);
- return 0;
-out:
- if (setattr_frame)
- AFR_STACK_DESTROY (setattr_frame);
- afr_sh_entry_call_impunge_done (impunge_frame, this, 0, op_errno);
- return 0;
-}
-
-int
-afr_sh_entry_impunge_xattrop_cbk (call_frame_t *impunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- dict_t *xattr, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- int child_index = 0;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
-
- child_index = (long) cookie;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_INFO,
- "%s: failed to perform xattrop on %s (%s)",
- impunge_local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- goto out;
+ if (i != src && replies[i].valid &&
+ gf_uuid_compare(replies[src].poststat.ia_gfid,
+ replies[i].poststat.ia_gfid)) {
+ sources[i] = 0;
+ }
}
+ }
- afr_sh_entry_impunge_setattr (impunge_frame, this);
- return 0;
-out:
- afr_sh_entry_call_impunge_done (impunge_frame, this,
- -1, op_errno);
- return 0;
-}
+ for (i = 0; i < priv->child_count; i++) {
+ if (i == source || !healed_sinks[i])
+ continue;
-void
-afr_sh_prepare_new_entry_pending_matrix (int32_t **pending,
- int *child_errno,
- struct iatt *buf,
- unsigned int child_count)
-{
- int midx = 0;
- int idx = 0;
- int i = 0;
-
- midx = afr_index_for_transaction_type (AFR_METADATA_TRANSACTION);
- if (IA_ISDIR (buf->ia_type))
- idx = afr_index_for_transaction_type (AFR_ENTRY_TRANSACTION);
- else if (IA_ISREG (buf->ia_type))
- idx = afr_index_for_transaction_type (AFR_DATA_TRANSACTION);
- else
- idx = -1;
- for (i = 0; i < child_count; i++) {
- if (child_errno[i])
- continue;
- pending[i][midx] = hton32 (1);
- if (idx == -1)
- continue;
- pending[i][idx] = hton32 (1);
+ if (src != -1) {
+ if (!gf_uuid_compare(replies[src].poststat.ia_gfid,
+ replies[i].poststat.ia_gfid))
+ continue;
+ } else if (replies[i].op_errno != ENOENT) {
+ continue;
}
-}
-
-int
-afr_sh_entry_impunge_perform_xattrop (call_frame_t *impunge_frame,
- xlator_t *this)
-{
- int active_src = 0;
- dict_t *xattr = NULL;
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- int32_t op_errno = 0;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- active_src = impunge_sh->active_source;
-
- afr_sh_prepare_new_entry_pending_matrix (impunge_local->pending,
- impunge_sh->child_errno,
- &impunge_sh->entrybuf,
- priv->child_count);
- xattr = dict_new ();
- if (!xattr) {
- op_errno = ENOMEM;
- goto out;
- }
-
- afr_set_pending_dict (priv, xattr, impunge_local->pending);
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_xattrop_cbk,
- (void *) (long) active_src,
- priv->children[active_src],
- priv->children[active_src]->fops->xattrop,
- &impunge_local->loc, GF_XATTROP_ADD_ARRAY, xattr, NULL);
+ ret |= afr_selfheal_recreate_entry(frame, i, source, sources, fd->inode,
+ name, inode, replies);
+ }
- if (xattr)
- dict_unref (xattr);
- return 0;
-out:
- afr_sh_entry_call_impunge_done (impunge_frame, this,
- -1, op_errno);
- return 0;
+ return ret;
}
-int
-afr_sh_entry_impunge_newfile_cbk (call_frame_t *impunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+static int
+__afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ char *name, inode_t *inode, int source,
+ unsigned char *sources, unsigned char *healed_sinks,
+ unsigned char *locked_on, struct afr_reply *replies)
{
- int call_count = 0;
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- int child_index = 0;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
-
- child_index = (long) cookie;
-
- if (op_ret == -1) {
- impunge_sh->child_errno[child_index] = op_errno;
- gf_log (this->name, GF_LOG_ERROR,
- "creation of %s on %s failed (%s)",
- impunge_local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- } else {
- impunge_sh->child_errno[child_index] = 0;
- }
-
- call_count = afr_frame_return (impunge_frame);
- if (call_count == 0) {
- if (!afr_errno_count (NULL, impunge_sh->child_errno,
- priv->child_count, 0)) {
- // new_file creation failed every where
- afr_sh_entry_call_impunge_done (impunge_frame, this,
- -1, op_errno);
- goto out;
- }
- afr_sh_entry_impunge_perform_xattrop (impunge_frame, this);
- }
-out:
- return 0;
+ int ret = -1;
+
+ if (source < 0)
+ ret = __afr_selfheal_merge_dirent(frame, this, fd, name, inode, sources,
+ healed_sinks, locked_on, replies);
+ else
+ ret = __afr_selfheal_heal_dirent(frame, this, fd, name, inode, source,
+ sources, healed_sinks, locked_on,
+ replies);
+ return ret;
}
-int
-afr_sh_entry_impunge_hardlink_cbk (call_frame_t *impunge_frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+static gf_boolean_t
+is_full_heal_marker_present(xlator_t *this, dict_t *xdata, int idx)
{
- int call_count = 0;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
-
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
-
- if (IA_IFLNK == impunge_sh->entrybuf.ia_type) {
- //For symlinks impunge is attempted un-conditionally
- //So the file can already exist.
- if ((op_ret < 0) && (op_errno == EEXIST))
- op_ret = 0;
- }
-
- call_count = afr_frame_return (impunge_frame);
- if (call_count == 0)
- afr_sh_entry_call_impunge_done (impunge_frame, this,
- op_ret, op_errno);
-
- return 0;
+ int i = 0;
+ int pending[3] = {
+ 0,
+ };
+ void *pending_raw = NULL;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ if (!xdata)
+ return _gf_false;
+
+ /* Iterate over each of the priv->pending_keys[] elements and then
+ * see if any of them have data segment non-zero. If they do, return
+ * true. Else return false.
+ */
+ for (i = 0; i < priv->child_count; i++) {
+ if (dict_get_ptr(xdata, priv->pending_key[i], &pending_raw))
+ continue;
+
+ if (!pending_raw)
+ continue;
+
+ memcpy(pending, pending_raw, sizeof(pending));
+ if (ntoh32(pending[idx]))
+ return _gf_true;
+ }
+
+ return _gf_false;
}
-int
-afr_sh_entry_impunge_hardlink (call_frame_t *impunge_frame, xlator_t *this,
- int child_index)
+static gf_boolean_t
+afr_need_full_heal(xlator_t *this, struct afr_reply *replies, int source,
+ unsigned char *healed_sinks, afr_transaction_type type)
{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- loc_t *loc = NULL;
- struct iatt *buf = NULL;
- loc_t oldloc = {0};
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- loc = &impunge_local->loc;
- buf = &impunge_sh->entrybuf;
-
- oldloc.inode = inode_ref (loc->inode);
- uuid_copy (oldloc.gfid, buf->ia_gfid);
- gf_log (this->name, GF_LOG_DEBUG, "linking missing file %s on %s",
- loc->path, priv->children[child_index]->name);
-
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_hardlink_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->link,
- &oldloc, loc, NULL);
- loc_wipe (&oldloc);
+ int i = 0;
+ int idx = 0;
+ afr_private_t *priv = NULL;
- return 0;
-}
+ priv = this->private;
-int
-afr_sh_nameless_lookup_cbk (call_frame_t *impunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *xattr,
- struct iatt *postparent)
-{
- if (op_ret < 0) {
- afr_sh_entry_impunge_create_file (impunge_frame, this,
- (long)cookie);
- } else {
- afr_sh_entry_impunge_hardlink (impunge_frame, this,
- (long)cookie);
- }
- return 0;
-}
+ if (!priv->esh_granular)
+ return _gf_true;
-int
-afr_sh_entry_impunge_check_hardlink (call_frame_t *impunge_frame,
- xlator_t *this,
- int child_index, struct iatt *stbuf)
-{
- afr_private_t *priv = NULL;
- call_frame_t *frame = NULL;
- afr_local_t *impunge_local = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- afr_self_heal_t *sh = NULL;
- loc_t *loc = NULL;
- dict_t *xattr_req = NULL;
- loc_t oldloc = {0};
- int ret = -1;
-
- priv = this->private;
- AFR_INIT_SH_FRAME_VALS (impunge_frame, impunge_local, impunge_sh,
- frame, local, sh);
- loc = &impunge_local->loc;
-
- xattr_req = dict_new ();
- if (!xattr_req)
- goto out;
- oldloc.inode = inode_ref (loc->inode);
- uuid_copy (oldloc.gfid, stbuf->ia_gfid);
-
- STACK_WIND_COOKIE (impunge_frame, afr_sh_nameless_lookup_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->lookup,
- &oldloc, xattr_req);
- ret = 0;
-out:
- if (xattr_req)
- dict_unref (xattr_req);
- loc_wipe (&oldloc);
- if (ret)
- sh->impunge_done (frame, this, -1, ENOMEM);
- return 0;
-}
+ if (type != AFR_ENTRY_TRANSACTION)
+ return _gf_true;
-int
-afr_sh_entry_impunge_mknod (call_frame_t *impunge_frame, xlator_t *this,
- int child_index, struct iatt *stbuf)
-{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- dict_t *dict = NULL;
- int ret = 0;
+ priv = this->private;
+ idx = afr_index_for_transaction_type(AFR_DATA_TRANSACTION);
- priv = this->private;
- impunge_local = impunge_frame->local;
+ /* If there is a clear source, check whether the full-heal-indicator
+ * is present in its xdata. Otherwise, we need to examine all the
+ * participating bricks and then figure if *even* one of them has a
+ * full-heal-indicator.
+ */
- gf_log (this->name, GF_LOG_DEBUG,
- "creating missing file %s on %s",
- impunge_local->loc.path,
- priv->children[child_index]->name);
+ if (source != -1) {
+ if (is_full_heal_marker_present(this, replies[source].xdata, idx))
+ return _gf_true;
+ }
- dict = dict_new ();
- if (!dict)
- gf_log (this->name, GF_LOG_ERROR, "Out of memory");
+ /* else ..*/
- GF_ASSERT (!uuid_is_null (stbuf->ia_gfid));
- ret = afr_set_dict_gfid (dict, stbuf->ia_gfid);
- if (ret)
- gf_log (this->name, GF_LOG_INFO, "%s: gfid set failed",
- impunge_local->loc.path);
-
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_newfile_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->mknod,
- &impunge_local->loc,
- st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type),
- makedev (ia_major (stbuf->ia_rdev),
- ia_minor (stbuf->ia_rdev)), 0, dict);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!healed_sinks[i])
+ continue;
- if (dict)
- dict_unref (dict);
+ if (is_full_heal_marker_present(this, replies[i].xdata, idx))
+ return _gf_true;
+ }
- return 0;
+ return _gf_false;
}
-
-
-int
-afr_sh_entry_impunge_mkdir (call_frame_t *impunge_frame, xlator_t *this,
- int child_index, struct iatt *stbuf)
+static int
+__afr_selfheal_entry_finalize_source(xlator_t *this, unsigned char *sources,
+ unsigned char *healed_sinks,
+ unsigned char *locked_on,
+ struct afr_reply *replies,
+ uint64_t *witness)
{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- dict_t *dict = NULL;
-
- int ret = 0;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
+ afr_private_t *priv = NULL;
+ int source = -1;
+ int sources_count = 0;
+ int i = 0;
- dict = dict_new ();
- if (!dict) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- return 0;
- }
-
- GF_ASSERT (!uuid_is_null (stbuf->ia_gfid));
- ret = afr_set_dict_gfid (dict, stbuf->ia_gfid);
- if (ret)
- gf_log (this->name, GF_LOG_INFO, "%s: gfid set failed",
- impunge_local->loc.path);
+ priv = this->private;
- gf_log (this->name, GF_LOG_DEBUG,
- "creating missing directory %s on %s",
- impunge_local->loc.path,
- priv->children[child_index]->name);
+ sources_count = AFR_COUNT(sources, priv->child_count);
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_newfile_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->mkdir,
- &impunge_local->loc,
- st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type),
- 0, dict);
+ if ((AFR_CMP(locked_on, healed_sinks, priv->child_count) == 0) ||
+ !sources_count || afr_does_witness_exist(this, witness)) {
+ memset(sources, 0, sizeof(*sources) * priv->child_count);
+ afr_mark_active_sinks(this, sources, locked_on, healed_sinks);
+ return -1;
+ }
- if (dict)
- dict_unref (dict);
+ source = afr_choose_source_by_policy(priv, sources, AFR_ENTRY_TRANSACTION);
- return 0;
-}
-
-
-int
-afr_sh_entry_impunge_symlink (call_frame_t *impunge_frame, xlator_t *this,
- int child_index, const char *linkname)
-{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- dict_t *dict = NULL;
- struct iatt *buf = NULL;
- int ret = 0;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
-
- buf = &impunge_local->cont.symlink.buf;
-
- dict = dict_new ();
- if (!dict) {
- afr_sh_entry_call_impunge_done (impunge_frame, this,
- -1, ENOMEM);
- goto out;
+ /*If the selected source does not blame any other brick, then mark
+ * everything as sink to trigger conservative merge.
+ */
+ if (source != -1 && !AFR_COUNT(healed_sinks, priv->child_count)) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (locked_on[i]) {
+ sources[i] = 0;
+ healed_sinks[i] = 1;
+ }
}
+ return -1;
+ }
- GF_ASSERT (!uuid_is_null (buf->ia_gfid));
- ret = afr_set_dict_gfid (dict, buf->ia_gfid);
- if (ret)
- gf_log (this->name, GF_LOG_INFO,
- "%s: dict set gfid failed",
- impunge_local->loc.path);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "creating missing symlink %s -> %s on %s",
- impunge_local->loc.path, linkname,
- priv->children[child_index]->name);
-
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_newfile_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->symlink,
- linkname, &impunge_local->loc, 0, dict);
-
- if (dict)
- dict_unref (dict);
-out:
- return 0;
+ return source;
}
-
int
-afr_sh_entry_impunge_symlink_unlink_cbk (call_frame_t *impunge_frame,
- void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+__afr_selfheal_entry_prepare(call_frame_t *frame, xlator_t *this,
+ inode_t *inode, unsigned char *locked_on,
+ unsigned char *sources, unsigned char *sinks,
+ unsigned char *healed_sinks,
+ struct afr_reply *replies, int *source_p,
+ unsigned char *pflag)
{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- int child_index = -1;
- int call_count = -1;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
-
- child_index = (long) cookie;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_INFO,
- "unlink of %s on %s failed (%s)",
- impunge_local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- goto out;
- }
-
- afr_sh_entry_impunge_symlink (impunge_frame, this, child_index,
- impunge_sh->linkname);
-
- return 0;
-out:
- LOCK (&impunge_frame->lock);
- {
- call_count = --impunge_local->call_count;
- }
- UNLOCK (&impunge_frame->lock);
-
- if (call_count == 0)
- afr_sh_entry_call_impunge_done (impunge_frame, this,
- op_ret, op_errno);
-
- return 0;
+ int ret = -1;
+ int source = -1;
+ afr_private_t *priv = NULL;
+ uint64_t *witness = NULL;
+
+ priv = this->private;
+
+ ret = afr_selfheal_unlocked_discover(frame, inode, inode->gfid, replies);
+ if (ret)
+ return ret;
+
+ witness = alloca0(sizeof(*witness) * priv->child_count);
+ ret = afr_selfheal_find_direction(frame, this, replies,
+ AFR_ENTRY_TRANSACTION, locked_on, sources,
+ sinks, witness, pflag);
+ if (ret)
+ return ret;
+
+ /* Initialize the healed_sinks[] array optimistically to
+ the intersection of to-be-healed (i.e sinks[]) and
+ the list of servers which are up (i.e locked_on[]).
+
+ As we encounter failures in the healing process, we
+ will unmark the respective servers in the healed_sinks[]
+ array.
+ */
+ AFR_INTERSECT(healed_sinks, sinks, locked_on, priv->child_count);
+
+ source = __afr_selfheal_entry_finalize_source(this, sources, healed_sinks,
+ locked_on, replies, witness);
+
+ if (source < 0) {
+ /* If source is < 0 (typically split-brain), we perform a
+ conservative merge of entries rather than erroring out */
+ }
+ *source_p = source;
+
+ return ret;
}
-
-int
-afr_sh_entry_impunge_symlink_unlink (call_frame_t *impunge_frame, xlator_t *this,
- int child_index)
+static int
+afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ char *name, inode_t *parent_idx_inode,
+ xlator_t *subvol, gf_boolean_t full_crawl)
{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
-
- gf_log (this->name, GF_LOG_DEBUG,
- "unlinking symlink %s with wrong target on %s",
- impunge_local->loc.path,
- priv->children[child_index]->name);
-
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_symlink_unlink_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->unlink,
- &impunge_local->loc, 0, NULL);
-
+ int ret = 0;
+ int source = -1;
+ unsigned char *locked_on = NULL;
+ unsigned char *sources = NULL;
+ unsigned char *sinks = NULL;
+ unsigned char *healed_sinks = NULL;
+ inode_t *inode = NULL;
+ struct afr_reply *replies = NULL;
+ struct afr_reply *par_replies = NULL;
+ afr_private_t *priv = NULL;
+ dict_t *xattr = NULL;
+
+ priv = this->private;
+
+ if (afr_is_private_directory(priv, fd->inode->gfid, name,
+ GF_CLIENT_PID_SELF_HEALD)) {
return 0;
+ }
+
+ xattr = dict_new();
+ if (!xattr)
+ return -ENOMEM;
+ ret = dict_set_int32_sizen(xattr, GF_GFIDLESS_LOOKUP, 1);
+ if (ret) {
+ dict_unref(xattr);
+ return -1;
+ }
+
+ sources = alloca0(priv->child_count);
+ sinks = alloca0(priv->child_count);
+ healed_sinks = alloca0(priv->child_count);
+ locked_on = alloca0(priv->child_count);
+
+ replies = alloca0(priv->child_count * sizeof(*replies));
+ par_replies = alloca0(priv->child_count * sizeof(*par_replies));
+
+ ret = afr_selfheal_entrylk(frame, this, fd->inode, this->name, NULL,
+ locked_on);
+ {
+ if (ret < priv->child_count) {
+ gf_msg_debug(this->name, 0,
+ "%s: Skipping "
+ "entry self-heal as only %d sub-volumes "
+ " could be locked in %s domain",
+ uuid_utoa(fd->inode->gfid), ret, this->name);
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+
+ ret = __afr_selfheal_entry_prepare(frame, this, fd->inode, locked_on,
+ sources, sinks, healed_sinks,
+ par_replies, &source, NULL);
+ if (ret < 0)
+ goto unlock;
+
+ inode = afr_selfheal_unlocked_lookup_on(frame, fd->inode, name, replies,
+ locked_on, xattr);
+ if (!inode) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
+
+ ret = __afr_selfheal_entry_dirent(frame, this, fd, name, inode, source,
+ sources, healed_sinks, locked_on,
+ replies);
+
+ if ((ret == 0) && (priv->esh_granular) && parent_idx_inode) {
+ ret = afr_shd_entry_purge(subvol, parent_idx_inode, name,
+ inode->ia_type);
+ /* Why is ret force-set to 0? We do not care about
+ * index purge failing for full heal as it is quite
+ * possible during replace-brick that not all files
+ * and directories have their name indices present in
+ * entry-changes/.
+ */
+ ret = 0;
+ }
+ }
+
+unlock:
+ afr_selfheal_unentrylk(frame, this, fd->inode, this->name, NULL, locked_on,
+ NULL);
+ if (inode)
+ inode_unref(inode);
+ if (replies)
+ afr_replies_wipe(replies, priv->child_count);
+ if (par_replies)
+ afr_replies_wipe(par_replies, priv->child_count);
+ if (xattr)
+ dict_unref(xattr);
+
+ return ret;
}
-
-int
-afr_sh_entry_impunge_readlink_sink_cbk (call_frame_t *impunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- const char *linkname, struct iatt *sbuf, dict_t *xdata)
+static inode_t *
+afr_shd_entry_changes_index_inode(xlator_t *this, xlator_t *subvol,
+ uuid_t pargfid)
{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- int child_index = -1;
- int call_count = -1;
- int active_src = -1;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- active_src = impunge_sh->active_source;
-
- child_index = (long) cookie;
-
- if ((op_ret == -1) && (op_errno != ENOENT)) {
- gf_log (this->name, GF_LOG_INFO,
- "readlink of %s on %s failed (%s)",
- impunge_local->loc.path,
- priv->children[active_src]->name,
- strerror (op_errno));
- goto out;
- }
-
- /* symlink doesn't exist on the sink */
-
- if ((op_ret == -1) && (op_errno == ENOENT)) {
- afr_sh_entry_impunge_symlink (impunge_frame, this,
- child_index, impunge_sh->linkname);
- return 0;
- }
-
-
- /* symlink exists on the sink, so check if targets match */
-
- if (strcmp (linkname, impunge_sh->linkname) == 0) {
- /* targets match, nothing to do */
-
- goto out;
- } else {
- /*
- * Hah! Sneaky wolf in sheep's clothing!
- */
- afr_sh_entry_impunge_symlink_unlink (impunge_frame, this,
- child_index);
- return 0;
- }
+ int ret = -1;
+ void *index_gfid = NULL;
+ loc_t rootloc = {
+ 0,
+ };
+ loc_t loc = {
+ 0,
+ };
+ dict_t *xattr = NULL;
+ inode_t *inode = NULL;
+ struct iatt iatt = {
+ 0,
+ };
+
+ rootloc.inode = inode_ref(this->itable->root);
+ gf_uuid_copy(rootloc.gfid, rootloc.inode->gfid);
+
+ ret = syncop_getxattr(subvol, &rootloc, &xattr,
+ GF_XATTROP_ENTRY_CHANGES_GFID, NULL, NULL);
+ if (ret || !xattr) {
+ errno = -ret;
+ goto out;
+ }
+
+ ret = dict_get_ptr(xattr, GF_XATTROP_ENTRY_CHANGES_GFID, &index_gfid);
+ if (ret) {
+ errno = EINVAL;
+ goto out;
+ }
+
+ loc.inode = inode_new(this->itable);
+ if (!loc.inode) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ gf_uuid_copy(loc.pargfid, index_gfid);
+ loc.name = gf_strdup(uuid_utoa(pargfid));
+
+ ret = syncop_lookup(subvol, &loc, &iatt, NULL, NULL, NULL);
+ if (ret < 0) {
+ errno = -ret;
+ goto out;
+ }
+
+ inode = inode_link(loc.inode, NULL, NULL, &iatt);
out:
- LOCK (&impunge_frame->lock);
- {
- call_count = --impunge_local->call_count;
- }
- UNLOCK (&impunge_frame->lock);
+ if (xattr)
+ dict_unref(xattr);
+ loc_wipe(&rootloc);
+ GF_FREE((char *)loc.name);
+ loc_wipe(&loc);
- if (call_count == 0)
- afr_sh_entry_call_impunge_done (impunge_frame, this,
- op_ret, op_errno);
-
- return 0;
+ return inode;
}
-
-int
-afr_sh_entry_impunge_readlink_sink (call_frame_t *impunge_frame, xlator_t *this,
- int child_index)
-{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
-
- gf_log (this->name, GF_LOG_DEBUG,
- "checking symlink target of %s on %s",
- impunge_local->loc.path, priv->children[child_index]->name);
-
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_readlink_sink_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->readlink,
- &impunge_local->loc, 4096, NULL);
-
- return 0;
-}
-
-
-int
-afr_sh_entry_impunge_readlink_cbk (call_frame_t *impunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- const char *linkname, struct iatt *sbuf, dict_t *xdata)
+static int
+afr_selfheal_entry_do_subvol(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int child)
{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- int child_index = -1;
- int call_count = -1;
- int active_src = -1;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- active_src = impunge_sh->active_source;
-
- child_index = (long) cookie;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_INFO,
- "readlink of %s on %s failed (%s)",
- impunge_local->loc.path,
- priv->children[active_src]->name,
- strerror (op_errno));
- goto out;
- }
-
- impunge_sh->linkname = gf_strdup (linkname);
- afr_sh_entry_impunge_readlink_sink (impunge_frame, this, child_index);
-
- return 0;
-
-out:
- LOCK (&impunge_frame->lock);
+ int ret = 0;
+ gf_dirent_t entries;
+ gf_dirent_t *entry = NULL;
+ off_t offset = 0;
+ call_frame_t *iter_frame = NULL;
+ xlator_t *subvol = NULL;
+ afr_private_t *priv = NULL;
+ gf_boolean_t mismatch = _gf_false;
+ afr_local_t *local = NULL;
+ loc_t loc = {
+ 0,
+ };
+
+ priv = this->private;
+ subvol = priv->children[child];
+
+ INIT_LIST_HEAD(&entries.list);
+
+ local = frame->local;
+
+ iter_frame = afr_copy_frame(frame);
+ if (!iter_frame)
+ return -ENOMEM;
+
+ loc.inode = afr_shd_entry_changes_index_inode(this, subvol,
+ fd->inode->gfid);
+
+ while ((ret = syncop_readdir(subvol, fd, 131072, offset, &entries, NULL,
+ NULL))) {
+ if (ret > 0)
+ ret = 0;
+ list_for_each_entry(entry, &entries.list, list)
{
- call_count = --impunge_local->call_count;
- }
- UNLOCK (&impunge_frame->lock);
-
- if (call_count == 0)
- afr_sh_entry_call_impunge_done (impunge_frame, this,
- op_ret, op_errno);
-
- return 0;
-}
-
-
-int
-afr_sh_entry_impunge_readlink (call_frame_t *impunge_frame, xlator_t *this,
- int child_index, struct iatt *stbuf)
-{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- int active_src = -1;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- active_src = impunge_sh->active_source;
- impunge_local->cont.symlink.buf = *stbuf;
-
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_readlink_cbk,
- (void *) (long) child_index,
- priv->children[active_src],
- priv->children[active_src]->fops->readlink,
- &impunge_local->loc, 4096, NULL);
-
- return 0;
-}
-
-int
-afr_sh_entry_impunge_create (call_frame_t *impunge_frame, xlator_t *this,
- int child_index)
-{
- call_frame_t *frame = NULL;
- afr_local_t *impunge_local = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- ia_type_t type = IA_INVAL;
- int active_src = 0;
- struct iatt *buf = NULL;
-
- AFR_INIT_SH_FRAME_VALS (impunge_frame, impunge_local, impunge_sh,
- frame, local, sh);
- active_src = impunge_sh->active_source;
- afr_update_loc_gfids (&impunge_local->loc, &impunge_sh->entrybuf,
- &impunge_sh->parentbuf);
-
- buf = &impunge_sh->entrybuf;
- type = buf->ia_type;
-
- switch (type) {
- case IA_IFSOCK:
- case IA_IFREG:
- case IA_IFBLK:
- case IA_IFCHR:
- case IA_IFIFO:
- case IA_IFLNK:
- afr_sh_entry_impunge_check_hardlink (impunge_frame, this,
- child_index, buf);
- break;
- case IA_IFDIR:
- afr_sh_entry_impunge_mkdir (impunge_frame, this,
- child_index, buf);
- break;
- default:
- gf_log (this->name, GF_LOG_ERROR,
- "%s has unknown file type on %s: 0%o",
- impunge_local->loc.path,
- priv->children[active_src]->name, type);
- sh->impunge_done (frame, this, -1, EINVAL);
- break;
- }
+ offset = entry->d_off;
- return 0;
-}
+ if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, ".."))
+ continue;
-int
-afr_sh_entry_impunge_create_file (call_frame_t *impunge_frame, xlator_t *this,
- int child_index)
-{
- call_frame_t *frame = NULL;
- afr_local_t *impunge_local = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- ia_type_t type = IA_INVAL;
- int active_src = 0;
- struct iatt *buf = NULL;
-
- AFR_INIT_SH_FRAME_VALS (impunge_frame, impunge_local, impunge_sh,
- frame, local, sh);
- active_src = impunge_sh->active_source;
- buf = &impunge_sh->entrybuf;
- type = buf->ia_type;
-
- switch (type) {
- case IA_IFSOCK:
- case IA_IFREG:
- case IA_IFBLK:
- case IA_IFCHR:
- case IA_IFIFO:
- afr_sh_entry_impunge_mknod (impunge_frame, this,
- child_index, buf);
+ ret = afr_selfheal_entry_dirent(iter_frame, this, fd, entry->d_name,
+ loc.inode, subvol,
+ local->need_full_crawl);
+ AFR_STACK_RESET(iter_frame);
+ if (iter_frame->local == NULL) {
+ ret = -ENOTCONN;
break;
- case IA_IFLNK:
- afr_sh_entry_impunge_readlink (impunge_frame, this,
- child_index, buf);
- break;
- default:
- gf_log (this->name, GF_LOG_ERROR,
- "%s has unknown file type on %s: 0%o",
- impunge_local->loc.path,
- priv->children[active_src]->name, type);
- sh->impunge_done (frame, this, -1, EINVAL);
+ }
+
+ if (ret == -1) {
+ /* gfid or type mismatch. */
+ mismatch = _gf_true;
+ ret = 0;
+ }
+ if (ret)
break;
}
- return 0;
-}
-
-gf_boolean_t
-afr_sh_need_recreate (afr_self_heal_t *impunge_sh, unsigned int child,
- unsigned int child_count)
-{
- gf_boolean_t recreate = _gf_false;
-
- GF_ASSERT (impunge_sh->child_errno);
-
- if (child == impunge_sh->active_source)
- goto out;
+ gf_dirent_free(&entries);
+ if (ret)
+ break;
+ }
- if (IA_IFLNK == impunge_sh->entrybuf.ia_type) {
- recreate = _gf_true;
- goto out;
- }
+ loc_wipe(&loc);
- if (impunge_sh->child_errno[child] == ENOENT)
- recreate = _gf_true;
-out:
- return recreate;
+ AFR_STACK_DESTROY(iter_frame);
+ if (mismatch == _gf_true)
+ /* undo pending will be skipped */
+ ret = -1;
+ return ret;
}
-unsigned int
-afr_sh_recreate_count (afr_self_heal_t *impunge_sh, int *sources,
- unsigned int child_count)
+static int
+afr_selfheal_entry_granular_dirent(xlator_t *subvol, gf_dirent_t *entry,
+ loc_t *parent, void *data)
{
- int count = 0;
- int i = 0;
+ int ret = 0;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ afr_granular_esh_args_t *args = data;
+
+ /* Look up the actual inode associated with entry. If the lookup returns
+ * ESTALE or ENOENT, then it means we have a stale index. Remove it.
+ * This is analogous to the check in afr_shd_index_heal() except that
+ * here it is achieved through LOOKUP and in afr_shd_index_heal() through
+ * a GETXATTR.
+ */
+
+ loc.inode = inode_new(args->xl->itable);
+ loc.parent = inode_ref(args->heal_fd->inode);
+ gf_uuid_copy(loc.pargfid, loc.parent->gfid);
+ loc.name = entry->d_name;
+
+ ret = syncop_lookup(args->xl, &loc, &iatt, NULL, NULL, NULL);
+ if ((ret == -ENOENT) || (ret == -ESTALE)) {
+ /* The name indices under the pgfid index dir are guaranteed
+ * to be regular files. Hence the hardcoding.
+ */
+ afr_shd_entry_purge(subvol, parent->inode, entry->d_name, IA_IFREG);
+ ret = 0;
+ goto out;
+ }
+ /* TBD: afr_shd_zero_xattrop? */
- for (i = 0; i < child_count; i++) {
- if (afr_sh_need_recreate (impunge_sh, i, child_count))
- count++;
- }
+ ret = afr_selfheal_entry_dirent(args->frame, args->xl, args->heal_fd,
+ entry->d_name, parent->inode, subvol,
+ _gf_false);
+ AFR_STACK_RESET(args->frame);
+ if (args->frame->local == NULL)
+ ret = -ENOTCONN;
- return count;
-}
+ if (ret == -1)
+ args->mismatch = _gf_true;
-int
-afr_sh_entry_call_impunge_recreate (call_frame_t *impunge_frame,
- xlator_t *this)
-{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- call_frame_t *frame = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- unsigned int recreate_count = 0;
- int i = 0;
- int active_src = 0;
-
- priv = this->private;
- AFR_INIT_SH_FRAME_VALS (impunge_frame, impunge_local, impunge_sh,
- frame, local, sh);
- active_src = impunge_sh->active_source;
- impunge_sh->entrybuf = impunge_sh->buf[active_src];
- impunge_sh->parentbuf = impunge_sh->parentbufs[active_src];
- recreate_count = afr_sh_recreate_count (impunge_sh, sh->sources,
- priv->child_count);
- if (!recreate_count) {
- afr_sh_entry_call_impunge_done (impunge_frame, this, 0, 0);
- goto out;
- }
- impunge_local->call_count = recreate_count;
- for (i = 0; i < priv->child_count; i++) {
- if (!impunge_local->child_up[i]) {
- impunge_sh->child_errno[i] = ENOTCONN;
- continue;
- }
- if (!afr_sh_need_recreate (impunge_sh, i, priv->child_count)) {
- impunge_sh->child_errno[i] = EEXIST;
- continue;
- }
- }
- for (i = 0; i < priv->child_count; i++) {
- if (!afr_sh_need_recreate (impunge_sh, i, priv->child_count))
- continue;
- (void)afr_sh_entry_impunge_create (impunge_frame, this, i);
- recreate_count--;
- }
- GF_ASSERT (!recreate_count);
out:
- return 0;
+ loc_wipe(&loc);
+ return 0;
}
-void
-afr_sh_entry_common_lookup_done (call_frame_t *impunge_frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- call_frame_t *frame = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- unsigned int gfid_miss_count = 0;
- unsigned int children_up_count = 0;
- uuid_t gfid = {0};
- int active_src = 0;
-
- priv = this->private;
- AFR_INIT_SH_FRAME_VALS (impunge_frame, impunge_local, impunge_sh,
- frame, local, sh);
- active_src = impunge_sh->active_source;
-
- if (op_ret < 0)
- goto done;
- if (impunge_sh->child_errno[active_src]) {
- op_ret = -1;
- op_errno = impunge_sh->child_errno[active_src];
- goto done;
- }
-
- gfid_miss_count = afr_gfid_missing_count (this->name,
- impunge_sh->success_children,
- impunge_sh->buf, priv->child_count,
- impunge_local->loc.path);
- children_up_count = afr_up_children_count (impunge_local->child_up,
- priv->child_count);
- if ((gfid_miss_count == children_up_count) &&
- (children_up_count < priv->child_count)) {
- op_ret = -1;
- op_errno = ENODATA;
- gf_log (this->name, GF_LOG_ERROR, "Not all children are up, "
- "gfid should not be assigned in this state for %s",
- impunge_local->loc.path);
- goto done;
- }
-
- if (gfid_miss_count) {
- afr_update_gfid_from_iatts (gfid, impunge_sh->buf,
- impunge_sh->success_children,
- priv->child_count);
- if (uuid_is_null (gfid)) {
- sh->entries_skipped = _gf_true;
- gf_log (this->name, GF_LOG_INFO, "%s: Skipping entry "
- "self-heal because of gfid absence",
- impunge_local->loc.path);
- goto done;
- }
- afr_sh_common_lookup (impunge_frame, this, &impunge_local->loc,
- afr_sh_entry_common_lookup_done, gfid,
- AFR_LOOKUP_FAIL_CONFLICTS |
- AFR_LOOKUP_FAIL_MISSING_GFIDS,
- NULL);
- } else {
- afr_sh_entry_call_impunge_recreate (impunge_frame, this);
- }
- return;
-done:
- afr_sh_entry_call_impunge_done (impunge_frame, this,
- op_ret, op_errno);
- return;
-}
-
-int
-afr_sh_entry_impunge_entry (call_frame_t *frame, xlator_t *this,
- gf_dirent_t *entry)
+static int
+afr_selfheal_entry_granular(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int subvol_idx, gf_boolean_t is_src)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- int ret = -1;
- call_frame_t *impunge_frame = NULL;
- afr_local_t *impunge_local = NULL;
- int active_src = 0;
- int op_errno = 0;
- int op_ret = -1;
-
- local = frame->local;
- sh = &local->self_heal;
-
- active_src = sh->active_source;
- sh->impunge_done = afr_sh_entry_impunge_entry_done;
-
- if ((strcmp (entry->d_name, ".") == 0)
- || (strcmp (entry->d_name, "..") == 0)
- || ((strcmp (local->loc.path, "/") == 0)
- && (strcmp (entry->d_name, GF_REPLICATE_TRASH_DIR) == 0))) {
-
- gf_log (this->name, GF_LOG_TRACE,
- "skipping inspection of %s under %s",
- entry->d_name, local->loc.path);
- op_ret = 0;
- goto out;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "inspecting existence of %s under %s",
- entry->d_name, local->loc.path);
-
- ret = afr_impunge_frame_create (frame, this, active_src,
- &impunge_frame);
- if (ret) {
- op_errno = -ret;
- goto out;
- }
+ int ret = 0;
+ loc_t loc = {
+ 0,
+ };
+ xlator_t *subvol = NULL;
+ afr_private_t *priv = NULL;
+ afr_granular_esh_args_t args = {
+ 0,
+ };
+
+ priv = this->private;
+ subvol = priv->children[subvol_idx];
+
+ args.frame = afr_copy_frame(frame);
+ if (!args.frame)
+ goto out;
+ args.xl = this;
+ /* args.heal_fd represents the fd associated with the original directory
+ * on which entry heal is being attempted.
+ */
+ args.heal_fd = fd;
+
+ /* @subvol here represents the subvolume of AFR where
+ * indices/entry-changes/<pargfid> will be processed
+ */
+ loc.inode = afr_shd_entry_changes_index_inode(this, subvol,
+ fd->inode->gfid);
+ if (!loc.inode) {
+ /* If granular heal failed on the sink (as it might sometimes
+ * because it is the src that would mostly contain the granular
+ * changelogs and the sink's entry-changes would be empty),
+ * do not treat heal as failure.
+ */
+ if (is_src)
+ ret = -errno;
+ else
+ ret = 0;
+ goto out;
+ }
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- ret = afr_build_child_loc (this, &impunge_local->loc, &local->loc,
- entry->d_name);
- loc_copy (&impunge_sh->parent_loc, &local->loc);
- if (ret != 0) {
- op_errno = ENOMEM;
- goto out;
- }
+ ret = syncop_dir_scan(subvol, &loc, GF_CLIENT_PID_SELF_HEALD, &args,
+ afr_selfheal_entry_granular_dirent);
- afr_sh_common_lookup (impunge_frame, this, &impunge_local->loc,
- afr_sh_entry_common_lookup_done, NULL,
- AFR_LOOKUP_FAIL_CONFLICTS, NULL);
+ loc_wipe(&loc);
- op_ret = 0;
+ if (args.mismatch == _gf_true)
+ ret = -1;
out:
- if (ret) {
- if (impunge_frame)
- AFR_STACK_DESTROY (impunge_frame);
- sh->impunge_done (frame, this, op_ret, op_errno);
- }
-
- return 0;
-}
-
-
-int
-afr_sh_entry_impunge_readdir_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- gf_dirent_t *entries, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- gf_dirent_t *entry = NULL;
- off_t last_offset = 0;
- int active_src = 0;
- int entry_count = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- active_src = sh->active_source;
-
- if (op_ret <= 0) {
- if (op_ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "readdir of %s on subvolume %s failed (%s)",
- local->loc.path,
- priv->children[active_src]->name,
- strerror (op_errno));
- sh->op_failed = 1;
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "readdir of %s on subvolume %s complete",
- local->loc.path,
- priv->children[active_src]->name);
- }
-
- afr_sh_entry_impunge_all (frame, this);
- return 0;
- }
-
- list_for_each_entry (entry, &entries->list, list) {
- last_offset = entry->d_off;
- entry_count++;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "readdir'ed %d entries from %s",
- entry_count, priv->children[active_src]->name);
-
- sh->offset = last_offset;
- local->call_count = entry_count;
-
- list_for_each_entry (entry, &entries->list, list) {
- afr_sh_entry_impunge_entry (frame, this, entry);
- }
-
- return 0;
-}
-
-
-int
-afr_sh_entry_impunge_subvol (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int32_t active_src = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
- active_src = sh->active_source;
- gf_log (this->name, GF_LOG_DEBUG, "%s: readdir from offset %zd",
- local->loc.path, sh->offset);
-
- STACK_WIND (frame, afr_sh_entry_impunge_readdir_cbk,
- priv->children[active_src],
- priv->children[active_src]->fops->readdirp,
- sh->healing_fd, sh->block_size, sh->offset, NULL);
-
- return 0;
+ if (args.frame)
+ AFR_STACK_DESTROY(args.frame);
+ return ret;
}
-
-int
-afr_sh_entry_impunge_all (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int active_src = -1;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- sh->offset = 0;
-
- active_src = next_active_source (frame, this, sh->active_source);
- sh->active_source = active_src;
-
- if (sh->op_failed) {
- afr_sh_entry_finish (frame, this);
- return 0;
- }
-
- if (active_src == -1) {
- /* completed creating missing files on all subvolumes */
- afr_sh_entry_erase_pending (frame, this);
- return 0;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "impunging entries of %s on %s to other sinks",
- local->loc.path, priv->children[active_src]->name);
-
- afr_sh_entry_impunge_subvol (frame, this);
-
- return 0;
-}
-
-
-int
-afr_sh_entry_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int child_index = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- child_index = (long) cookie;
-
- /* TODO: some of the open's might fail.
- In that case, modify cleanup fn to send flush on those
- fd's which are already open */
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "opendir of %s failed on child %s (%s)",
- local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- sh->op_failed = 1;
- }
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- if (sh->op_failed) {
- afr_sh_entry_finish (frame, this);
- return 0;
- }
- gf_log (this->name, GF_LOG_TRACE,
- "fd for %s opened, commencing sync",
- local->loc.path);
-
- sh->active_source = -1;
- afr_sh_entry_expunge_all (frame, this);
- }
-
- return 0;
-}
-
-
-int
-afr_sh_entry_open (call_frame_t *frame, xlator_t *this)
+static int
+afr_selfheal_entry_do(call_frame_t *frame, xlator_t *this, fd_t *fd, int source,
+ unsigned char *sources, unsigned char *healed_sinks)
{
- int i = 0;
- int call_count = 0;
-
- int source = -1;
- int *sources = NULL;
-
- fd_t *fd = NULL;
-
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- source = local->self_heal.source;
- sources = local->self_heal.sources;
-
- sh->block_size = 65536; //131072
- sh->offset = 0;
-
- call_count = sh->active_sinks;
- if (source != -1)
- call_count++;
-
- local->call_count = call_count;
-
- fd = fd_create (local->loc.inode, frame->root->pid);
- sh->healing_fd = fd;
-
- if (source != -1) {
- gf_log (this->name, GF_LOG_TRACE,
- "opening directory %s on subvolume %s (source)",
- local->loc.path, priv->children[source]->name);
-
- /* open source */
- STACK_WIND_COOKIE (frame, afr_sh_entry_opendir_cbk,
- (void *) (long) source,
- priv->children[source],
- priv->children[source]->fops->opendir,
- &local->loc, fd, NULL);
- call_count--;
- }
-
- /* open sinks */
- for (i = 0; i < priv->child_count; i++) {
- if (sources[i] || !local->child_up[i])
- continue;
-
- gf_log (this->name, GF_LOG_TRACE,
- "opening directory %s on subvolume %s (sink)",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_entry_opendir_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->opendir,
- &local->loc, fd, NULL);
-
- if (!--call_count)
- break;
- }
-
- return 0;
-}
-
+ int i = 0;
+ int ret = 0;
+ gf_boolean_t mismatch = _gf_false;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_SELF_HEAL_INFO,
+ "performing entry selfheal on %s", uuid_utoa(fd->inode->gfid));
+
+ for (i = 0; i < priv->child_count; i++) {
+ /* Expunge */
+ if (!healed_sinks[i])
+ continue;
+
+ if (!local->need_full_crawl)
+ /* Why call afr_selfheal_entry_granular() on a "healed sink",
+ * given that it is the source that contains the granular
+ * indices?
+ * If the index for this directory is non-existent or empty on
+ * this subvol (=> clear sink), then it will return early
+ * without failure status.
+ * If the index is non-empty and it is yet a 'healed sink', then
+ * it is due to a split-brain in which case we anyway need to
+ * crawl the indices/entry-changes/pargfid directory.
+ */
+ ret = afr_selfheal_entry_granular(frame, this, fd, i, _gf_false);
+ else
+ ret = afr_selfheal_entry_do_subvol(frame, this, fd, i);
-int
-afr_sh_entry_sync_prepare (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int source = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- source = sh->source;
-
- afr_sh_mark_source_sinks (frame, this);
- if (source != -1)
- sh->success[source] = 1;
-
- if (sh->active_sinks == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "no active sinks for self-heal on dir %s",
- local->loc.path);
- afr_sh_entry_finish (frame, this);
- return 0;
- }
- if (source == -1 && sh->active_sinks < 2) {
- gf_log (this->name, GF_LOG_TRACE,
- "cannot sync with 0 sources and 1 sink on dir %s",
- local->loc.path);
- afr_sh_entry_finish (frame, this);
- return 0;
+ if (ret == -1) {
+ /* gfid or type mismatch. */
+ mismatch = _gf_true;
+ ret = 0;
}
+ if (ret)
+ break;
+ }
- if (source != -1)
- gf_log (this->name, GF_LOG_DEBUG,
- "self-healing directory %s from subvolume %s to "
- "%d other",
- local->loc.path, priv->children[source]->name,
- sh->active_sinks);
+ if (!ret && source != -1) {
+ /* Impunge */
+ if (local->need_full_crawl)
+ ret = afr_selfheal_entry_do_subvol(frame, this, fd, source);
else
- gf_log (this->name, GF_LOG_DEBUG,
- "no active sources for %s found. "
- "merging all entries as a conservative decision",
- local->loc.path);
-
- sh->actual_sh_started = _gf_true;
- afr_sh_entry_open (frame, this);
-
- return 0;
+ ret = afr_selfheal_entry_granular(frame, this, fd, source,
+ _gf_true);
+ }
+
+ if (mismatch == _gf_true)
+ /* undo pending will be skipped */
+ ret = -1;
+ return ret;
}
-
-void
-afr_sh_entry_fix (call_frame_t *frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+static int
+__afr_selfheal_entry(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ unsigned char *locked_on)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int source = 0;
- int nsources = 0;
- int32_t subvol_status = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- if (op_ret < 0) {
- sh->op_failed = 1;
- afr_sh_set_error (sh, op_errno);
- afr_sh_entry_finish (frame, this);
- goto out;
- }
-
- if (sh->forced_merge) {
- sh->source = -1;
- goto heal;
- }
-
- nsources = afr_build_sources (this, sh->xattr, sh->buf,
- sh->pending_matrix, sh->sources,
- sh->success_children,
- AFR_ENTRY_TRANSACTION, &subvol_status,
- _gf_true);
- if ((subvol_status & ALL_FOOLS) ||
- (subvol_status & SPLIT_BRAIN)) {
- gf_log (this->name, GF_LOG_INFO, "%s: Performing conservative "
- "merge", local->loc.path);
- source = -1;
- memset (sh->sources, 0,
- sizeof (*sh->sources) * priv->child_count);
- } else if (nsources == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "No self-heal needed for %s",
- local->loc.path);
-
- afr_sh_entry_finish (frame, this);
- return;
- } else {
- source = afr_sh_select_source (sh->sources, priv->child_count);
- }
-
- sh->source = source;
-
- afr_reset_children (sh->fresh_children, priv->child_count);
- afr_get_fresh_children (sh->success_children, sh->sources,
- sh->fresh_children, priv->child_count);
- if (sh->source >= 0)
- afr_inode_set_read_ctx (this, sh->inode, sh->source,
- sh->fresh_children);
-
-heal:
- afr_sh_entry_sync_prepare (frame, this);
+ int ret = -1;
+ int source = -1;
+ unsigned char *sources = NULL;
+ unsigned char *sinks = NULL;
+ unsigned char *data_lock = NULL;
+ unsigned char *postop_lock = NULL;
+ unsigned char *healed_sinks = NULL;
+ unsigned char *undid_pending = NULL;
+ struct afr_reply *locked_replies = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ gf_boolean_t did_sh = _gf_true;
+
+ priv = this->private;
+ local = frame->local;
+
+ sources = alloca0(priv->child_count);
+ sinks = alloca0(priv->child_count);
+ healed_sinks = alloca0(priv->child_count);
+ undid_pending = alloca0(priv->child_count);
+ data_lock = alloca0(priv->child_count);
+ postop_lock = alloca0(priv->child_count);
+
+ locked_replies = alloca0(sizeof(*locked_replies) * priv->child_count);
+
+ ret = afr_selfheal_entrylk(frame, this, fd->inode, this->name, NULL,
+ data_lock);
+ {
+ if (ret < priv->child_count) {
+ gf_msg_debug(this->name, 0,
+ "%s: Skipping "
+ "entry self-heal as only %d sub-volumes could "
+ "be locked in %s domain",
+ uuid_utoa(fd->inode->gfid), ret, this->name);
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+
+ ret = __afr_selfheal_entry_prepare(frame, this, fd->inode, data_lock,
+ sources, sinks, healed_sinks,
+ locked_replies, &source, NULL);
+ if (AFR_COUNT(healed_sinks, priv->child_count) == 0) {
+ did_sh = _gf_false;
+ goto unlock;
+ }
+
+ local->need_full_crawl = afr_need_full_heal(
+ this, locked_replies, source, healed_sinks, AFR_ENTRY_TRANSACTION);
+ }
+unlock:
+ afr_selfheal_unentrylk(frame, this, fd->inode, this->name, NULL, data_lock,
+ NULL);
+ if (ret < 0)
+ goto out;
+
+ if (!did_sh)
+ goto out;
+
+ ret = afr_selfheal_entry_do(frame, this, fd, source, sources, healed_sinks);
+ if (ret)
+ goto out;
+
+ /* Take entrylks in xlator domain before doing post-op (undo-pending) in
+ * entry self-heal. This is to prevent a parallel name self-heal on
+ * an entry under @fd->inode from reading pending xattrs while it is
+ * being modified by SHD after entry sh below, given that
+ * name self-heal takes locks ONLY in xlator domain and is free to read
+ * pending changelog in the absence of the following locking.
+ */
+ ret = afr_selfheal_entrylk(frame, this, fd->inode, this->name, NULL,
+ postop_lock);
+ {
+ if (AFR_CMP(data_lock, postop_lock, priv->child_count) != 0) {
+ gf_msg_debug(this->name, 0,
+ "%s: Skipping "
+ "post-op after entry self-heal as %d "
+ "sub-volumes, as opposed to %d, "
+ "could be locked in %s domain",
+ uuid_utoa(fd->inode->gfid), ret,
+ AFR_COUNT(data_lock, priv->child_count), this->name);
+ ret = -ENOTCONN;
+ goto postop_unlock;
+ }
+
+ afr_selfheal_restore_time(frame, this, fd->inode, source, healed_sinks,
+ locked_replies);
+ ret = afr_selfheal_undo_pending(
+ frame, this, fd->inode, sources, sinks, healed_sinks, undid_pending,
+ AFR_ENTRY_TRANSACTION, locked_replies, postop_lock);
+ }
+postop_unlock:
+ afr_selfheal_unentrylk(frame, this, fd->inode, this->name, NULL,
+ postop_lock, NULL);
out:
- return;
+ if (did_sh)
+ afr_log_selfheal(fd->inode->gfid, this, ret, "entry", source, sources,
+ healed_sinks);
+ else
+ ret = 1;
+
+ if (locked_replies)
+ afr_replies_wipe(locked_replies, priv->child_count);
+ return ret;
}
-int
-afr_sh_post_nonblocking_entry_cbk (call_frame_t *frame, xlator_t *this)
+static fd_t *
+afr_selfheal_data_opendir(xlator_t *this, inode_t *inode)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- sh = &local->self_heal;
-
- if (int_lock->lock_op_ret < 0) {
- gf_log (this->name, GF_LOG_ERROR, "Non Blocking entrylks "
- "failed for %s.", local->loc.path);
- sh->op_failed = 1;
- afr_sh_entry_done (frame, this);
- } else {
-
- gf_log (this->name, GF_LOG_DEBUG, "Non Blocking entrylks done "
- "for %s. Proceeding to FOP", local->loc.path);
- afr_sh_common_lookup (frame, this, &local->loc,
- afr_sh_entry_fix, NULL,
- AFR_LOOKUP_FAIL_CONFLICTS |
- AFR_LOOKUP_FAIL_MISSING_GFIDS,
- NULL);
- }
-
- return 0;
+ loc_t loc = {
+ 0,
+ };
+ int ret = 0;
+ fd_t *fd = NULL;
+
+ fd = fd_create(inode, 0);
+ if (!fd)
+ return NULL;
+
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+
+ ret = syncop_opendir(this, &loc, fd, NULL, NULL);
+ if (ret) {
+ fd_unref(fd);
+ fd = NULL;
+ } else {
+ fd_bind(fd);
+ }
+
+ loc_wipe(&loc);
+ return fd;
}
int
-afr_self_heal_entry (call_frame_t *frame, xlator_t *this)
+afr_selfheal_entry(call_frame_t *frame, xlator_t *this, inode_t *inode)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
-
- priv = this->private;
- local = frame->local;
-
- if (local->self_heal.do_entry_self_heal && priv->entry_self_heal) {
- afr_sh_entrylk (frame, this, &local->loc, NULL,
- afr_sh_post_nonblocking_entry_cbk);
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "proceeding to completion on %s",
- local->loc.path);
- afr_sh_entry_done (frame, this);
- }
-
- return 0;
+ afr_private_t *priv = NULL;
+ unsigned char *locked_on = NULL;
+ fd_t *fd = NULL;
+ int ret = 0;
+
+ priv = this->private;
+
+ fd = afr_selfheal_data_opendir(this, inode);
+ if (!fd)
+ return -EIO;
+
+ locked_on = alloca0(priv->child_count);
+
+ ret = afr_selfheal_tie_breaker_entrylk(frame, this, inode, priv->sh_domain,
+ NULL, locked_on);
+ {
+ if (ret < priv->child_count) {
+ gf_msg_debug(this->name, 0,
+ "%s: Skipping "
+ "entry self-heal as only %d sub-volumes could "
+ "be locked in %s domain",
+ uuid_utoa(fd->inode->gfid), ret, priv->sh_domain);
+ /* Either less than two subvols available, or another
+ selfheal (from another server) is in progress. Skip
+ for now in any case there isn't anything to do.
+ */
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+
+ ret = __afr_selfheal_entry(frame, this, fd, locked_on);
+ }
+unlock:
+ afr_selfheal_unentrylk(frame, this, inode, priv->sh_domain, NULL, locked_on,
+ NULL);
+
+ if (fd)
+ fd_unref(fd);
+
+ return ret;
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c
index e013d5c4714..03f43bad16e 100644
--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c
+++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
This file is licensed to you under your choice of the GNU Lesser
@@ -8,602 +8,539 @@
cases as published by the Free Software Foundation.
*/
-#include <libgen.h>
-#include <unistd.h>
-#include <fnmatch.h>
-#include <sys/time.h>
-#include <stdlib.h>
-#include <signal.h>
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
#include "afr.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "list.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
-#include "byte-order.h"
-
-#include "afr-transaction.h"
#include "afr-self-heal.h"
-#include "afr-self-heal-common.h"
+#include <glusterfs/byte-order.h>
+#include "protocol-common.h"
+#include <glusterfs/events.h>
+#define AFR_HEAL_ATTR (GF_SET_ATTR_UID | GF_SET_ATTR_GID | GF_SET_ATTR_MODE)
-int
-afr_sh_metadata_done (call_frame_t *frame, xlator_t *this)
+static gf_boolean_t
+_afr_ignorable_key_match(dict_t *d, char *k, data_t *val, void *mdata)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
-
- afr_sh_reset (frame, this);
- if (sh->mdata_spb) {
- gf_log (this->name, GF_LOG_DEBUG,
- "split-brain detected, aborting selfheal of %s",
- local->loc.path);
- sh->op_failed = 1;
- sh->completion_cbk (frame, this);
- } else {
- if (IA_ISDIR (sh->type)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "proceeding to entry check on %s",
- local->loc.path);
- afr_self_heal_entry (frame, this);
- return 0;
- }
- gf_log (this->name, GF_LOG_DEBUG,
- "proceeding to data check on %s",
- local->loc.path);
- afr_self_heal_data (frame, this);
- }
-
- return 0;
+ return afr_is_xattr_ignorable(k);
}
-int
-afr_sh_inode_unlock (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- int_lock->lock_cbk = afr_sh_metadata_done;
- afr_unlock (frame, this);
-
- return 0;
-}
-
-int
-afr_sh_metadata_finish (call_frame_t *frame, xlator_t *this)
-{
- afr_sh_inode_unlock (frame, this);
-
- return 0;
-}
-
-
-int
-afr_sh_metadata_erase_pending_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xattr, dict_t *xdata)
+void
+afr_delete_ignorable_xattrs(dict_t *xattr)
{
- afr_local_t *local = NULL;
- int call_count = 0;
- long i = 0;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
-
- local = frame->local;
- priv = this->private;
- sh = &local->self_heal;
- i = (long)cookie;
-
- if ((!IA_ISREG (sh->buf[sh->source].ia_type)) &&
- (!IA_ISDIR (sh->buf[sh->source].ia_type))) {
- afr_children_add_child (sh->fresh_children, i,
- priv->child_count);
- }
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- if ((!IA_ISREG (sh->buf[sh->source].ia_type)) &&
- (!IA_ISDIR (sh->buf[sh->source].ia_type))) {
- afr_inode_set_read_ctx (this, sh->inode, sh->source,
- sh->fresh_children);
- }
- afr_sh_metadata_finish (frame, this);
- }
-
- return 0;
+ dict_foreach_match(xattr, _afr_ignorable_key_match, NULL,
+ dict_remove_foreach_fn, NULL);
}
-
int
-afr_sh_metadata_erase_pending (call_frame_t *frame, xlator_t *this)
+__afr_selfheal_metadata_do(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ int source, unsigned char *healed_sinks,
+ struct afr_reply *locked_replies)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int i = 0;
- dict_t **erase_xattr = NULL;
-
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- afr_sh_pending_to_delta (priv, sh->xattr, sh->delta_matrix,
- sh->success, priv->child_count,
- AFR_METADATA_TRANSACTION);
-
- erase_xattr = GF_CALLOC (sizeof (*erase_xattr), priv->child_count,
- gf_afr_mt_dict_t);
- if (!erase_xattr)
- return -ENOMEM;
-
- for (i = 0; i < priv->child_count; i++) {
- if (sh->xattr[i]) {
- call_count++;
-
- erase_xattr[i] = get_new_dict();
- dict_ref (erase_xattr[i]);
- }
+ int ret = -1;
+ loc_t loc = {
+ 0,
+ };
+ dict_t *xattr = NULL;
+ dict_t *old_xattr = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+
+ priv = this->private;
+
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+
+ gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_SELF_HEAL_INFO,
+ "performing metadata selfheal on %s", uuid_utoa(inode->gfid));
+
+ ret = syncop_getxattr(priv->children[source], &loc, &xattr, NULL, NULL,
+ NULL);
+ if (ret < 0) {
+ ret = -EIO;
+ goto out;
+ }
+
+ afr_delete_ignorable_xattrs(xattr);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (old_xattr) {
+ dict_unref(old_xattr);
+ old_xattr = NULL;
}
- afr_sh_delta_to_xattr (priv, sh->delta_matrix, erase_xattr,
- priv->child_count, AFR_METADATA_TRANSACTION);
-
- local->call_count = call_count;
-
- if (call_count == 0) {
- gf_log (this->name, GF_LOG_INFO,
- "metadata of %s not healed on any subvolume",
- local->loc.path);
-
- afr_sh_metadata_finish (frame, this);
+ if (!healed_sinks[i])
+ continue;
+
+ ret = syncop_setattr(priv->children[i], &loc,
+ &locked_replies[source].poststat, AFR_HEAL_ATTR,
+ NULL, NULL, NULL, NULL);
+ if (ret)
+ healed_sinks[i] = 0;
+
+ ret = syncop_getxattr(priv->children[i], &loc, &old_xattr, 0, NULL,
+ NULL);
+ if (old_xattr) {
+ afr_delete_ignorable_xattrs(old_xattr);
+ ret = syncop_removexattr(priv->children[i], &loc, "", old_xattr,
+ NULL);
+ if (ret)
+ healed_sinks[i] = 0;
}
- for (i = 0; i < priv->child_count; i++) {
- if (!erase_xattr[i])
- continue;
-
- gf_log (this->name, GF_LOG_TRACE,
- "erasing pending flags from %s on %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_metadata_erase_pending_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->loc,
- GF_XATTROP_ADD_ARRAY, erase_xattr[i],
- NULL);
- if (!--call_count)
- break;
- }
+ ret = syncop_setxattr(priv->children[i], &loc, xattr, 0, NULL, NULL);
+ if (ret)
+ healed_sinks[i] = 0;
+ }
+ ret = 0;
- for (i = 0; i < priv->child_count; i++) {
- if (erase_xattr[i]) {
- dict_unref (erase_xattr[i]);
- }
- }
- GF_FREE (erase_xattr);
-
- return 0;
-}
-
-
-int
-afr_sh_metadata_sync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int child_index = 0;
-
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_INFO,
- "setting attributes failed for %s on %s (%s)",
- local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
-
- sh->success[child_index] = 0;
- }
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- afr_sh_metadata_erase_pending (frame, this);
+out:
+ loc_wipe(&loc);
+ if (xattr)
+ dict_unref(xattr);
+ if (old_xattr)
+ dict_unref(old_xattr);
- return 0;
+ return ret;
}
-
-int
-afr_sh_metadata_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop, dict_t *xdata)
+static uint64_t
+mtime_ns(struct iatt *ia)
{
- afr_sh_metadata_sync_cbk (frame, cookie, this, op_ret, op_errno, xdata);
-
- return 0;
-}
-
+ uint64_t ret;
-int
-afr_sh_metadata_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- afr_sh_metadata_sync_cbk (frame, cookie, this, op_ret, op_errno, xdata);
+ ret = (((uint64_t)(ia->ia_mtime)) * 1000000000) +
+ (uint64_t)(ia->ia_mtime_nsec);
- return 0;
+ return ret;
}
-
-int
-afr_sh_metadata_sync (call_frame_t *frame, xlator_t *this, dict_t *xattr)
+/*
+ * When directory content is modified, [mc]time is updated. On
+ * Linux, the filesystem does it, while at least on NetBSD, the
+ * kernel file-system independent code does it. This means that
+ * when entries are added while bricks are down, the kernel sends
+ * a SETATTR [mc]time which will cause metadata split brain for
+ * the directory. In this case, clear the split brain by finding
+ * the source with the most recent modification date.
+ */
+static int
+afr_dirtime_splitbrain_source(call_frame_t *frame, xlator_t *this,
+ struct afr_reply *replies,
+ unsigned char *locked_on)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int source = 0;
- int active_sinks = 0;
- int call_count = 0;
- int i = 0;
-
- struct iatt stbuf = {0,};
- int32_t valid = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- source = sh->source;
- active_sinks = sh->active_sinks;
-
- /*
- * 2 calls per sink - setattr, setxattr
- */
- if (xattr)
- call_count = active_sinks * 2;
- else
- call_count = active_sinks;
-
- local->call_count = call_count;
-
- stbuf.ia_atime = sh->buf[source].ia_atime;
- stbuf.ia_atime_nsec = sh->buf[source].ia_atime_nsec;
- stbuf.ia_mtime = sh->buf[source].ia_mtime;
- stbuf.ia_mtime_nsec = sh->buf[source].ia_mtime_nsec;
-
- stbuf.ia_uid = sh->buf[source].ia_uid;
- stbuf.ia_gid = sh->buf[source].ia_gid;
-
- stbuf.ia_type = sh->buf[source].ia_type;
- stbuf.ia_prot = sh->buf[source].ia_prot;
-
- valid = GF_SET_ATTR_MODE |
- GF_SET_ATTR_UID | GF_SET_ATTR_GID |
- GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME;
-
- for (i = 0; i < priv->child_count; i++) {
- if (call_count == 0) {
- break;
- }
- if (sh->sources[i] || !local->child_up[i])
- continue;
-
- gf_log (this->name, GF_LOG_DEBUG,
- "self-healing metadata of %s from %s to %s",
- local->loc.path, priv->children[source]->name,
- priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_metadata_setattr_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->setattr,
- &local->loc, &stbuf, valid, NULL);
-
- call_count--;
-
- if (!xattr)
- continue;
-
- STACK_WIND_COOKIE (frame, afr_sh_metadata_xattr_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->setxattr,
- &local->loc, xattr, 0, NULL);
- call_count--;
- }
-
- return 0;
+ afr_private_t *priv = NULL;
+ int source = -1;
+ struct iatt source_ia;
+ struct iatt child_ia;
+ uint64_t mtime = 0;
+ int i;
+ int ret = -1;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!locked_on[i])
+ continue;
+
+ if (!replies[i].valid)
+ continue;
+
+ if (replies[i].op_ret != 0)
+ continue;
+
+ if (mtime_ns(&replies[i].poststat) <= mtime)
+ continue;
+
+ mtime = mtime_ns(&replies[i].poststat);
+ source = i;
+ }
+
+ if (source == -1)
+ goto out;
+
+ source_ia = replies[source].poststat;
+ if (source_ia.ia_type != IA_IFDIR)
+ goto out;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (i == source)
+ continue;
+
+ if (!replies[i].valid)
+ continue;
+
+ if (replies[i].op_ret != 0)
+ continue;
+
+ child_ia = replies[i].poststat;
+
+ if (!IA_EQUAL(source_ia, child_ia, gfid) ||
+ !IA_EQUAL(source_ia, child_ia, type) ||
+ !IA_EQUAL(source_ia, child_ia, prot) ||
+ !IA_EQUAL(source_ia, child_ia, uid) ||
+ !IA_EQUAL(source_ia, child_ia, gid) ||
+ !afr_xattrs_are_equal(replies[source].xdata, replies[i].xdata))
+ goto out;
+ }
+
+ /*
+ * Metadata split brain is just about [amc]time
+ * We return our source.
+ */
+ ret = source;
+out:
+ return ret;
}
-
-int
-afr_sh_metadata_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xattr,
- dict_t *xdata)
+static int
+__afr_selfheal_metadata_mark_pending_xattrs(call_frame_t *frame, xlator_t *this,
+ inode_t *inode,
+ struct afr_reply *replies,
+ unsigned char *sources)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int source = 0;
-
- int i;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- source = sh->source;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "getxattr of %s failed on subvolume %s (%s). proceeding without xattr",
- local->loc.path, priv->children[source]->name,
- strerror (op_errno));
-
- afr_sh_metadata_sync (frame, this, NULL);
- } else {
- for (i = 0; i < priv->child_count; i++) {
- dict_del (xattr, priv->pending_key[i]);
- }
-
- afr_sh_metadata_sync (frame, this, xattr);
+ int ret = 0;
+ int i = 0;
+ int m_idx = 0;
+ afr_private_t *priv = NULL;
+ int raw[AFR_NUM_CHANGE_LOGS] = {0};
+ dict_t *xattr = NULL;
+
+ priv = this->private;
+ m_idx = afr_index_for_transaction_type(AFR_METADATA_TRANSACTION);
+ raw[m_idx] = 1;
+
+ xattr = dict_new();
+ if (!xattr)
+ return -ENOMEM;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i])
+ continue;
+ ret = dict_set_static_bin(xattr, priv->pending_key[i], raw,
+ sizeof(int) * AFR_NUM_CHANGE_LOGS);
+ if (ret) {
+ ret = -1;
+ goto out;
}
-
- return 0;
-}
-
-
-int
-afr_sh_metadata_sync_prepare (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int source = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- source = sh->source;
-
- afr_sh_mark_source_sinks (frame, this);
- if (sh->active_sinks == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no active sinks for performing self-heal on file %s",
- local->loc.path);
- afr_sh_metadata_finish (frame, this);
- return 0;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+ ret = afr_selfheal_post_op(frame, this, inode, i, xattr, NULL);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_INFO, -ret, AFR_MSG_SELF_HEAL_INFO,
+ "Failed to set pending metadata xattr on child %d for %s", i,
+ uuid_utoa(inode->gfid));
+ goto out;
}
+ }
- gf_log (this->name, GF_LOG_TRACE,
- "syncing metadata of %s from subvolume %s to %d active sinks",
- local->loc.path, priv->children[source]->name,
- sh->active_sinks);
-
- sh->actual_sh_started = _gf_true;
- STACK_WIND (frame, afr_sh_metadata_getxattr_cbk,
- priv->children[source],
- priv->children[source]->fops->getxattr,
- &local->loc, NULL, NULL);
+ afr_replies_wipe(replies, priv->child_count);
+ ret = afr_selfheal_unlocked_discover(frame, inode, inode->gfid, replies);
- return 0;
+out:
+ if (xattr)
+ dict_unref(xattr);
+ return ret;
}
-
-void
-afr_sh_metadata_fix (call_frame_t *frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+/*
+ * Look for mismatching uid/gid or mode or user xattrs even if
+ * AFR xattrs don't say so, and pick one arbitrarily as winner. */
+
+static int
+__afr_selfheal_metadata_finalize_source(call_frame_t *frame, xlator_t *this,
+ inode_t *inode, unsigned char *sources,
+ unsigned char *sinks,
+ unsigned char *healed_sinks,
+ unsigned char *undid_pending,
+ unsigned char *locked_on,
+ struct afr_reply *replies)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int nsources = 0;
- int source = 0;
- int i = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- if (op_ret < 0) {
- sh->op_failed = 1;
- afr_sh_set_error (sh, op_errno);
- afr_sh_metadata_finish (frame, this);
- goto out;
+ int i = 0;
+ afr_private_t *priv = NULL;
+ struct iatt srcstat = {
+ 0,
+ };
+ int source = -1;
+ int sources_count = 0;
+ int ret = 0;
+
+ priv = this->private;
+
+ sources_count = AFR_COUNT(sources, priv->child_count);
+
+ if ((AFR_CMP(locked_on, healed_sinks, priv->child_count) == 0) ||
+ !sources_count) {
+ source = afr_mark_split_brain_source_sinks(
+ frame, this, inode, sources, sinks, healed_sinks, locked_on,
+ replies, AFR_METADATA_TRANSACTION);
+ if (source >= 0) {
+ _afr_fav_child_reset_sink_xattrs(
+ frame, this, inode, source, healed_sinks, undid_pending,
+ AFR_METADATA_TRANSACTION, locked_on, replies);
+ goto out;
}
- nsources = afr_build_sources (this, sh->xattr, sh->buf,
- sh->pending_matrix, sh->sources,
- sh->success_children,
- AFR_METADATA_TRANSACTION, NULL, _gf_false);
- if ((nsources == -1)
- && (priv->favorite_child != -1)
- && (sh->child_errno[priv->favorite_child] == 0)) {
-
- gf_log (this->name, GF_LOG_WARNING,
- "Picking favorite child %s as authentic source to resolve conflicting metadata of %s",
- priv->children[priv->favorite_child]->name,
- local->loc.path);
-
- sh->sources[priv->favorite_child] = 1;
-
- nsources = afr_sh_source_count (sh->sources,
- priv->child_count);
- }
-
- if (nsources == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "Unable to self-heal permissions/ownership of '%s' "
- "(possible split-brain). Please fix the file on "
- "all backend volumes", local->loc.path);
- sh->mdata_spb = _gf_true;
-
- afr_sh_metadata_finish (frame, this);
- goto out;
- }
- sh->mdata_spb = _gf_false;
- if (nsources == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "No self-heal needed for %s",
- local->loc.path);
-
- afr_sh_metadata_finish (frame, this);
- goto out;
+ /* If this is a directory mtime/ctime only split brain
+ use the most recent */
+ source = afr_dirtime_splitbrain_source(frame, this, replies, locked_on);
+ if (source != -1) {
+ gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_SPLIT_BRAIN,
+ "clear time "
+ "split brain on %s",
+ uuid_utoa(replies[source].poststat.ia_gfid));
+ sources[source] = 1;
+ healed_sinks[source] = 0;
+ goto out;
}
- source = afr_sh_select_source (sh->sources, priv->child_count);
-
- if (source == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "No active sources found.");
-
- afr_sh_metadata_finish (frame, this);
- goto out;
+ if (!priv->metadata_splitbrain_forced_heal) {
+ gf_event(EVENT_AFR_SPLIT_BRAIN,
+ "client-pid=%d;"
+ "subvol=%s;"
+ "type=metadata;file=%s",
+ this->ctx->cmd_args.client_pid, this->name,
+ uuid_utoa(inode->gfid));
+ return -EIO;
}
- sh->source = source;
-
- /* detect changes not visible through pending flags -- JIC */
+ /* Metadata split brain, select one subvol
+ arbitrarily */
for (i = 0; i < priv->child_count; i++) {
- if (i == source || sh->child_errno[i])
- continue;
-
- if (PERMISSION_DIFFERS (&sh->buf[i], &sh->buf[source]))
- sh->sources[i] = 0;
-
- if (OWNERSHIP_DIFFERS (&sh->buf[i], &sh->buf[source]))
- sh->sources[i] = 0;
+ if (locked_on[i] && healed_sinks[i]) {
+ sources[i] = 1;
+ healed_sinks[i] = 0;
+ break;
+ }
}
-
- if ((!IA_ISREG (sh->buf[source].ia_type)) &&
- (!IA_ISDIR (sh->buf[source].ia_type))) {
- afr_reset_children (sh->fresh_children, priv->child_count);
- afr_get_fresh_children (sh->success_children, sh->sources,
- sh->fresh_children, priv->child_count);
- afr_inode_set_read_ctx (this, sh->inode, sh->source,
- sh->fresh_children);
+ }
+
+ /* No split brain at this point. If we were called from
+ * afr_heal_splitbrain_file(), abort.*/
+ if (afr_dict_contains_heal_op(frame))
+ return -EIO;
+
+ source = afr_choose_source_by_policy(priv, sources,
+ AFR_METADATA_TRANSACTION);
+ srcstat = replies[source].poststat;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i] || i == source)
+ continue;
+ if (!IA_EQUAL(srcstat, replies[i].poststat, type) ||
+ !IA_EQUAL(srcstat, replies[i].poststat, uid) ||
+ !IA_EQUAL(srcstat, replies[i].poststat, gid) ||
+ !IA_EQUAL(srcstat, replies[i].poststat, prot)) {
+ gf_msg_debug(this->name, 0,
+ "%s: iatt mismatch "
+ "for source(%d) vs (%d)",
+ uuid_utoa(replies[source].poststat.ia_gfid), source,
+ i);
+ sources[i] = 0;
+ healed_sinks[i] = 1;
}
-
- afr_sh_metadata_sync_prepare (frame, this);
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i] || i == source)
+ continue;
+ if (!afr_xattrs_are_equal(replies[source].xdata, replies[i].xdata)) {
+ gf_msg_debug(this->name, 0,
+ "%s: xattr mismatch "
+ "for source(%d) vs (%d)",
+ uuid_utoa(replies[source].poststat.ia_gfid), source,
+ i);
+ sources[i] = 0;
+ healed_sinks[i] = 1;
+ }
+ }
+ if ((sources_count == priv->child_count) && (source > -1) &&
+ (AFR_COUNT(healed_sinks, priv->child_count) != 0)) {
+ ret = __afr_selfheal_metadata_mark_pending_xattrs(frame, this, inode,
+ replies, sources);
+ if (ret < 0)
+ return ret;
+ }
out:
- return;
+ afr_mark_active_sinks(this, sources, locked_on, healed_sinks);
+ return source;
}
int
-afr_sh_metadata_post_nonblocking_inodelk_cbk (call_frame_t *frame,
- xlator_t *this)
+__afr_selfheal_metadata_prepare(call_frame_t *frame, xlator_t *this,
+ inode_t *inode, unsigned char *locked_on,
+ unsigned char *sources, unsigned char *sinks,
+ unsigned char *healed_sinks,
+ unsigned char *undid_pending,
+ struct afr_reply *replies, unsigned char *pflag)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- if (int_lock->lock_op_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG, "Non Blocking metadata "
- "inodelks failed for %s.", local->loc.path);
- gf_log (this->name, GF_LOG_DEBUG, "Metadata self-heal "
- "failed for %s.", local->loc.path);
- afr_sh_metadata_done (frame, this);
- } else {
-
- gf_log (this->name, GF_LOG_DEBUG, "Non Blocking metadata "
- "inodelks done for %s. Proceeding to FOP",
- local->loc.path);
- afr_sh_common_lookup (frame, this, &local->loc,
- afr_sh_metadata_fix, NULL,
- AFR_LOOKUP_FAIL_CONFLICTS |
- AFR_LOOKUP_FAIL_MISSING_GFIDS,
- NULL);
+ int ret = -1;
+ int source = -1;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ uint64_t *witness = NULL;
+
+ priv = this->private;
+
+ ret = afr_selfheal_unlocked_discover(frame, inode, inode->gfid, replies);
+ if (ret)
+ return ret;
+
+ witness = alloca0(sizeof(*witness) * priv->child_count);
+ ret = afr_selfheal_find_direction(frame, this, replies,
+ AFR_METADATA_TRANSACTION, locked_on,
+ sources, sinks, witness, pflag);
+ if (ret)
+ return ret;
+
+ /* Initialize the healed_sinks[] array optimistically to
+ the intersection of to-be-healed (i.e sinks[]) and
+ the list of servers which are up (i.e locked_on[]).
+
+ As we encounter failures in the healing process, we
+ will unmark the respective servers in the healed_sinks[]
+ array.
+ */
+ AFR_INTERSECT(healed_sinks, sinks, locked_on, priv->child_count);
+
+ /* If any source has witness, pick first
+ * witness source and make everybody else sinks */
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i] && witness[i]) {
+ source = i;
+ break;
+ }
+ }
+
+ if (source != -1) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (i != source && sources[i]) {
+ sources[i] = 0;
+ healed_sinks[i] = 1;
+ }
}
+ }
- return 0;
+ source = __afr_selfheal_metadata_finalize_source(
+ frame, this, inode, sources, sinks, healed_sinks, undid_pending,
+ locked_on, replies);
+
+ if (source < 0)
+ return -EIO;
+
+ return source;
}
int
-afr_sh_metadata_lock (call_frame_t *frame, xlator_t *this)
+afr_selfheal_metadata(call_frame_t *frame, xlator_t *this, inode_t *inode)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- int_lock->transaction_lk_type = AFR_SELFHEAL_LK;
- int_lock->selfheal_lk_type = AFR_METADATA_SELF_HEAL_LK;
+ afr_private_t *priv = NULL;
+ int ret = -1;
+ unsigned char *sources = NULL;
+ unsigned char *sinks = NULL;
+ unsigned char *data_lock = NULL;
+ unsigned char *healed_sinks = NULL;
+ unsigned char *undid_pending = NULL;
+ struct afr_reply *locked_replies = NULL;
+ gf_boolean_t did_sh = _gf_true;
+ int source = -1;
+
+ priv = this->private;
+
+ sources = alloca0(priv->child_count);
+ sinks = alloca0(priv->child_count);
+ healed_sinks = alloca0(priv->child_count);
+ undid_pending = alloca0(priv->child_count);
+ data_lock = alloca0(priv->child_count);
+
+ locked_replies = alloca0(sizeof(*locked_replies) * priv->child_count);
+
+ ret = afr_selfheal_inodelk(frame, this, inode, this->name, LLONG_MAX - 1, 0,
+ data_lock);
+ {
+ if (ret < priv->child_count) {
+ ret = -ENOTCONN;
+ goto unlock;
+ }
- afr_set_lock_number (frame, this);
+ ret = __afr_selfheal_metadata_prepare(
+ frame, this, inode, data_lock, sources, sinks, healed_sinks,
+ undid_pending, locked_replies, NULL);
+ if (ret < 0)
+ goto unlock;
- int_lock->lk_flock.l_start = LLONG_MAX - 1;
- int_lock->lk_flock.l_len = 0;
- int_lock->lk_flock.l_type = F_WRLCK;
- int_lock->lock_cbk = afr_sh_metadata_post_nonblocking_inodelk_cbk;
+ source = ret;
- afr_nonblocking_inodelk (frame, this);
+ if (AFR_COUNT(healed_sinks, priv->child_count) == 0) {
+ did_sh = _gf_false;
+ goto unlock;
+ }
- return 0;
+ ret = __afr_selfheal_metadata_do(frame, this, inode, source,
+ healed_sinks, locked_replies);
+ if (ret)
+ goto unlock;
+
+ afr_selfheal_restore_time(frame, this, inode, source, healed_sinks,
+ locked_replies);
+
+ ret = afr_selfheal_undo_pending(
+ frame, this, inode, sources, sinks, healed_sinks, undid_pending,
+ AFR_METADATA_TRANSACTION, locked_replies, data_lock);
+ }
+unlock:
+ afr_selfheal_uninodelk(frame, this, inode, this->name, LLONG_MAX - 1, 0,
+ data_lock);
+
+ if (did_sh)
+ afr_log_selfheal(inode->gfid, this, ret, "metadata", source, sources,
+ healed_sinks);
+ else
+ ret = 1;
+
+ if (locked_replies)
+ afr_replies_wipe(locked_replies, priv->child_count);
+ return ret;
}
-
int
-afr_self_heal_metadata (call_frame_t *frame, xlator_t *this)
+afr_selfheal_metadata_by_stbuf(xlator_t *this, struct iatt *stbuf)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = this->private;
-
- local = frame->local;
- sh = &local->self_heal;
-
- /* Self-heal completion cbk changes inode split-brain status based on
- * govinda_gOvinda, mdata_spb, data_spb values. Initialize mdata_spb
- * with current split-brain status. If for some reason self-heal
- * fails(locking phase etc), it makes sure we retain the split-brain
- * status before this self-heal started.
- */
- sh->mdata_spb = afr_is_split_brain (this, sh->inode);
-
- if (local->self_heal.do_metadata_self_heal && priv->metadata_self_heal) {
- afr_sh_metadata_lock (frame, this);
- } else {
- afr_sh_metadata_done (frame, this);
- }
-
- return 0;
+ inode_t *inode = NULL;
+ inode_t *link_inode = NULL;
+ call_frame_t *frame = NULL;
+ int ret = 0;
+
+ if (gf_uuid_is_null(stbuf->ia_gfid)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ inode = inode_new(this->itable);
+ if (!inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ link_inode = inode_link(inode, NULL, NULL, stbuf);
+ if (!link_inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ frame = afr_frame_create(this, &ret);
+ if (!frame) {
+ ret = -ret;
+ goto out;
+ }
+
+ ret = afr_selfheal_metadata(frame, this, link_inode);
+out:
+ if (inode)
+ inode_unref(inode);
+ if (link_inode)
+ inode_unref(link_inode);
+ if (frame)
+ AFR_STACK_DESTROY(frame);
+ return ret;
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c
new file mode 100644
index 00000000000..834aac86d48
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-self-heal-name.c
@@ -0,0 +1,616 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/events.h>
+#include "afr.h"
+#include "afr-self-heal.h"
+#include "afr-messages.h"
+
+int
+__afr_selfheal_assign_gfid(xlator_t *this, inode_t *parent, uuid_t pargfid,
+ const char *bname, inode_t *inode,
+ struct afr_reply *replies, void *gfid,
+ unsigned char *locked_on, int source,
+ unsigned char *sources, gf_boolean_t is_gfid_absent,
+ int *gfid_idx)
+{
+ int ret = 0;
+ int up_count = 0;
+ int locked_count = 0;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ gf_uuid_copy(parent->gfid, pargfid);
+
+ if (is_gfid_absent) {
+ /* Ensure all children of AFR are up before performing gfid heal, to
+ * guard against the possibility of gfid split brain. */
+
+ up_count = AFR_COUNT(priv->child_up, priv->child_count);
+ if (up_count != priv->child_count) {
+ ret = -EIO;
+ goto out;
+ }
+
+ locked_count = AFR_COUNT(locked_on, priv->child_count);
+ if (locked_count != priv->child_count) {
+ ret = -EIO;
+ goto out;
+ }
+ }
+
+ ret = afr_lookup_and_heal_gfid(this, parent, bname, inode, replies, source,
+ sources, gfid, gfid_idx);
+
+out:
+ return ret;
+}
+
+int
+__afr_selfheal_name_impunge(call_frame_t *frame, xlator_t *this,
+ inode_t *parent, uuid_t pargfid, const char *bname,
+ inode_t *inode, struct afr_reply *replies,
+ int gfid_idx)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ int ret = 0;
+ unsigned char *sources = NULL;
+
+ priv = this->private;
+
+ sources = alloca0(priv->child_count);
+
+ gf_uuid_copy(parent->gfid, pargfid);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret != 0)
+ continue;
+
+ if (gf_uuid_compare(replies[i].poststat.ia_gfid,
+ replies[gfid_idx].poststat.ia_gfid) == 0) {
+ sources[i] = 1;
+ continue;
+ }
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i])
+ continue;
+
+ ret |= afr_selfheal_recreate_entry(frame, i, gfid_idx, sources, parent,
+ bname, inode, replies);
+ }
+
+ return ret;
+}
+
+int
+__afr_selfheal_name_expunge(xlator_t *this, inode_t *parent, uuid_t pargfid,
+ const char *bname, inode_t *inode,
+ struct afr_reply *replies)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ int ret = 0;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+
+ if (replies[i].op_ret)
+ continue;
+
+ ret |= afr_selfheal_entry_delete(this, parent, bname, inode, i,
+ replies);
+ }
+
+ return ret;
+}
+
+static gf_boolean_t
+afr_selfheal_name_need_heal_check(xlator_t *this, struct afr_reply *replies)
+{
+ int i = 0;
+ int first_idx = -1;
+ gf_boolean_t need_heal = _gf_false;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+
+ if ((replies[i].op_ret == -1) && (replies[i].op_errno == ENODATA))
+ need_heal = _gf_true;
+
+ if (first_idx == -1) {
+ first_idx = i;
+ continue;
+ }
+
+ if (replies[i].op_ret != replies[first_idx].op_ret)
+ need_heal = _gf_true;
+
+ if (gf_uuid_compare(replies[i].poststat.ia_gfid,
+ replies[first_idx].poststat.ia_gfid))
+ need_heal = _gf_true;
+
+ if ((replies[i].op_ret == 0) &&
+ (gf_uuid_is_null(replies[i].poststat.ia_gfid)))
+ need_heal = _gf_true;
+ }
+
+ return need_heal;
+}
+
+static int
+afr_selfheal_name_type_mismatch_check(xlator_t *this, struct afr_reply *replies,
+ int source, unsigned char *sources,
+ uuid_t pargfid, const char *bname)
+{
+ int i = 0;
+ int type_idx = -1;
+ ia_type_t inode_type = IA_INVAL;
+ ia_type_t inode_type1 = IA_INVAL;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret != 0)
+ continue;
+
+ if (replies[i].poststat.ia_type == IA_INVAL)
+ continue;
+
+ if (inode_type == IA_INVAL) {
+ inode_type = replies[i].poststat.ia_type;
+ type_idx = i;
+ continue;
+ }
+ inode_type1 = replies[i].poststat.ia_type;
+ if (sources[i] || source == -1) {
+ if ((sources[type_idx] || source == -1) &&
+ (inode_type != inode_type1)) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_SPLIT_BRAIN,
+ "Type mismatch for <gfid:%s>/%s: "
+ "%s on %s and %s on %s",
+ uuid_utoa(pargfid), bname,
+ gf_inode_type_to_str(inode_type1),
+ priv->children[i]->name,
+ gf_inode_type_to_str(inode_type),
+ priv->children[type_idx]->name);
+ gf_event(EVENT_AFR_SPLIT_BRAIN,
+ "client-pid=%d;"
+ "subvol=%s;type=file;"
+ "file=<gfid:%s>/%s;count=2;"
+ "child-%d=%s;type-%d=%s;child-%d=%s;"
+ "type-%d=%s",
+ this->ctx->cmd_args.client_pid, this->name,
+ uuid_utoa(pargfid), bname, i, priv->children[i]->name,
+ i, gf_inode_type_to_str(inode_type1), type_idx,
+ priv->children[type_idx]->name, type_idx,
+ gf_inode_type_to_str(inode_type));
+ return -EIO;
+ }
+ inode_type = replies[i].poststat.ia_type;
+ type_idx = i;
+ }
+ }
+ return 0;
+}
+
+static int
+afr_selfheal_name_gfid_mismatch_check(xlator_t *this, struct afr_reply *replies,
+ int source, unsigned char *sources,
+ int *gfid_idx, uuid_t pargfid,
+ const char *bname, inode_t *inode,
+ unsigned char *locked_on, dict_t *xdata)
+{
+ int i = 0;
+ int gfid_idx_iter = -1;
+ int ret = -1;
+ void *gfid = NULL;
+ void *gfid1 = NULL;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret != 0)
+ continue;
+
+ if (gf_uuid_is_null(replies[i].poststat.ia_gfid))
+ continue;
+
+ if (!gfid) {
+ gfid = &replies[i].poststat.ia_gfid;
+ gfid_idx_iter = i;
+ continue;
+ }
+
+ gfid1 = &replies[i].poststat.ia_gfid;
+ if (sources[i] || source == -1) {
+ if ((sources[gfid_idx_iter] || source == -1) &&
+ gf_uuid_compare(gfid, gfid1)) {
+ ret = afr_gfid_split_brain_source(this, replies, inode, pargfid,
+ bname, gfid_idx_iter, i,
+ locked_on, gfid_idx, xdata);
+ if (!ret && *gfid_idx >= 0) {
+ ret = dict_set_sizen_str_sizen(xdata, "gfid-heal-msg",
+ "GFID split-brain resolved");
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ AFR_MSG_DICT_SET_FAILED,
+ "Error setting gfid-"
+ "heal-msg dict");
+ }
+ return ret;
+ }
+ gfid = &replies[i].poststat.ia_gfid;
+ gfid_idx_iter = i;
+ }
+ }
+
+ *gfid_idx = gfid_idx_iter;
+ return 0;
+}
+
+static gf_boolean_t
+afr_selfheal_name_source_empty_check(xlator_t *this, struct afr_reply *replies,
+ unsigned char *sources, int source)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ gf_boolean_t source_is_empty = _gf_true;
+
+ priv = this->private;
+
+ if (source == -1) {
+ source_is_empty = _gf_false;
+ goto out;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+
+ if (replies[i].op_ret == -1 && replies[i].op_errno == ENOENT)
+ continue;
+
+ source_is_empty = _gf_false;
+ break;
+ }
+out:
+ return source_is_empty;
+}
+
+int
+__afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent,
+ uuid_t pargfid, const char *bname, inode_t *inode,
+ unsigned char *sources, unsigned char *sinks,
+ unsigned char *healed_sinks, int source,
+ unsigned char *locked_on, struct afr_reply *replies,
+ void *gfid_req, dict_t *xdata)
+{
+ int gfid_idx = -1;
+ int ret = -1;
+ void *gfid = NULL;
+ gf_boolean_t source_is_empty = _gf_true;
+ gf_boolean_t need_heal = _gf_false;
+ gf_boolean_t is_gfid_absent = _gf_false;
+
+ need_heal = afr_selfheal_name_need_heal_check(this, replies);
+ if (!need_heal)
+ return 0;
+
+ source_is_empty = afr_selfheal_name_source_empty_check(this, replies,
+ sources, source);
+ if (source_is_empty) {
+ ret = __afr_selfheal_name_expunge(this, parent, pargfid, bname, inode,
+ replies);
+ if (ret == -EIO)
+ ret = -1;
+ return ret;
+ }
+
+ ret = afr_selfheal_name_type_mismatch_check(this, replies, source, sources,
+ pargfid, bname);
+ if (ret)
+ return ret;
+
+ ret = afr_selfheal_name_gfid_mismatch_check(this, replies, source, sources,
+ &gfid_idx, pargfid, bname,
+ inode, locked_on, xdata);
+ if (ret)
+ return ret;
+
+ if (gfid_idx == -1) {
+ if (!gfid_req || gf_uuid_is_null(gfid_req))
+ return -1;
+ gfid = gfid_req;
+ } else {
+ gfid = &replies[gfid_idx].poststat.ia_gfid;
+ if (source == -1)
+ /* Either entry split-brain or dirty xattrs are present on parent.*/
+ source = gfid_idx;
+ }
+
+ is_gfid_absent = (gfid_idx == -1) ? _gf_true : _gf_false;
+ ret = __afr_selfheal_assign_gfid(this, parent, pargfid, bname, inode,
+ replies, gfid, locked_on, source, sources,
+ is_gfid_absent, &gfid_idx);
+ if (ret || (gfid_idx < 0))
+ return ret;
+
+ ret = __afr_selfheal_name_impunge(frame, this, parent, pargfid, bname,
+ inode, replies, gfid_idx);
+ if (ret == -EIO)
+ ret = -1;
+
+ return ret;
+}
+
+int
+__afr_selfheal_name_finalize_source(xlator_t *this, unsigned char *sources,
+ unsigned char *healed_sinks,
+ unsigned char *locked_on, uint64_t *witness)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ int source = -1;
+ int sources_count = 0;
+
+ priv = this->private;
+
+ sources_count = AFR_COUNT(sources, priv->child_count);
+
+ if ((AFR_CMP(locked_on, healed_sinks, priv->child_count) == 0) ||
+ !sources_count || afr_does_witness_exist(this, witness)) {
+ memset(sources, 0, sizeof(*sources) * priv->child_count);
+ afr_mark_active_sinks(this, sources, locked_on, healed_sinks);
+ return -1;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i]) {
+ source = i;
+ break;
+ }
+ }
+
+ return source;
+}
+
+int
+__afr_selfheal_name_prepare(call_frame_t *frame, xlator_t *this,
+ inode_t *parent, uuid_t pargfid,
+ unsigned char *locked_on, unsigned char *sources,
+ unsigned char *sinks, unsigned char *healed_sinks,
+ int *source_p)
+{
+ int ret = -1;
+ int source = -1;
+ afr_private_t *priv = NULL;
+ struct afr_reply *replies = NULL;
+ uint64_t *witness = NULL;
+
+ priv = this->private;
+
+ replies = alloca0(priv->child_count * sizeof(*replies));
+
+ ret = afr_selfheal_unlocked_discover(frame, parent, pargfid, replies);
+ if (ret)
+ goto out;
+
+ witness = alloca0(sizeof(*witness) * priv->child_count);
+ ret = afr_selfheal_find_direction(frame, this, replies,
+ AFR_ENTRY_TRANSACTION, locked_on, sources,
+ sinks, witness, NULL);
+ if (ret)
+ goto out;
+
+ /* Initialize the healed_sinks[] array optimistically to
+ the intersection of to-be-healed (i.e sinks[]) and
+ the list of servers which are up (i.e locked_on[]).
+
+ As we encounter failures in the healing process, we
+ will unmark the respective servers in the healed_sinks[]
+ array.
+ */
+ AFR_INTERSECT(healed_sinks, sinks, locked_on, priv->child_count);
+
+ source = __afr_selfheal_name_finalize_source(this, sources, healed_sinks,
+ locked_on, witness);
+ if (source < 0) {
+ /* If source is < 0 (typically split-brain), we perform a
+ conservative merge of entries rather than erroring out */
+ }
+ *source_p = source;
+
+out:
+ if (replies)
+ afr_replies_wipe(replies, priv->child_count);
+
+ return ret;
+}
+
+int
+afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent,
+ uuid_t pargfid, const char *bname, void *gfid_req,
+ dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ unsigned char *sources = NULL;
+ unsigned char *sinks = NULL;
+ unsigned char *healed_sinks = NULL;
+ unsigned char *locked_on = NULL;
+ int source = -1;
+ struct afr_reply *replies = NULL;
+ int ret = -1;
+ inode_t *inode = NULL;
+ dict_t *xattr = NULL;
+
+ xattr = dict_new();
+ if (!xattr)
+ return -ENOMEM;
+
+ ret = dict_set_int32_sizen(xattr, GF_GFIDLESS_LOOKUP, 1);
+ if (ret) {
+ dict_unref(xattr);
+ return -1;
+ }
+
+ priv = this->private;
+
+ locked_on = alloca0(priv->child_count);
+ sources = alloca0(priv->child_count);
+ sinks = alloca0(priv->child_count);
+ healed_sinks = alloca0(priv->child_count);
+
+ replies = alloca0(priv->child_count * sizeof(*replies));
+
+ ret = afr_selfheal_entrylk(frame, this, parent, this->name, bname,
+ locked_on);
+ {
+ if (ret < priv->child_count) {
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+
+ ret = __afr_selfheal_name_prepare(frame, this, parent, pargfid,
+ locked_on, sources, sinks,
+ healed_sinks, &source);
+ if (ret)
+ goto unlock;
+
+ inode = afr_selfheal_unlocked_lookup_on(frame, parent, bname, replies,
+ locked_on, xattr);
+ if (!inode) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
+
+ ret = __afr_selfheal_name_do(frame, this, parent, pargfid, bname, inode,
+ sources, sinks, healed_sinks, source,
+ locked_on, replies, gfid_req, xdata);
+ }
+unlock:
+ afr_selfheal_unentrylk(frame, this, parent, this->name, bname, locked_on,
+ NULL);
+ if (inode)
+ inode_unref(inode);
+
+ if (replies)
+ afr_replies_wipe(replies, priv->child_count);
+ if (xattr)
+ dict_unref(xattr);
+
+ return ret;
+}
+
+int
+afr_selfheal_name_unlocked_inspect(call_frame_t *frame, xlator_t *this,
+ inode_t *parent, uuid_t pargfid,
+ const char *bname, gf_boolean_t *need_heal)
+{
+ afr_private_t *priv = NULL;
+ int i = 0;
+ struct afr_reply *replies = NULL;
+ inode_t *inode = NULL;
+ int first_idx = -1;
+ afr_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ replies = alloca0(sizeof(*replies) * priv->child_count);
+
+ inode = afr_selfheal_unlocked_lookup_on(frame, parent, bname, replies,
+ local->child_up, NULL);
+ if (!inode)
+ return -ENOMEM;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+
+ if ((replies[i].op_ret == -1) && (replies[i].op_errno == ENODATA)) {
+ *need_heal = _gf_true;
+ break;
+ }
+
+ if (first_idx == -1) {
+ first_idx = i;
+ continue;
+ }
+
+ if (replies[i].op_ret != replies[first_idx].op_ret) {
+ *need_heal = _gf_true;
+ break;
+ }
+
+ if (gf_uuid_compare(replies[i].poststat.ia_gfid,
+ replies[first_idx].poststat.ia_gfid)) {
+ *need_heal = _gf_true;
+ break;
+ }
+ }
+
+ if (inode)
+ inode_unref(inode);
+ if (replies)
+ afr_replies_wipe(replies, priv->child_count);
+ return 0;
+}
+
+int
+afr_selfheal_name(xlator_t *this, uuid_t pargfid, const char *bname,
+ void *gfid_req, dict_t *xdata)
+{
+ inode_t *parent = NULL;
+ call_frame_t *frame = NULL;
+ int ret = -1;
+ gf_boolean_t need_heal = _gf_false;
+
+ parent = afr_inode_find(this, pargfid);
+ if (!parent)
+ goto out;
+
+ frame = afr_frame_create(this, NULL);
+ if (!frame)
+ goto out;
+
+ ret = afr_selfheal_name_unlocked_inspect(frame, this, parent, pargfid,
+ bname, &need_heal);
+ if (ret)
+ goto out;
+
+ if (need_heal) {
+ ret = afr_selfheal_name_do(frame, this, parent, pargfid, bname,
+ gfid_req, xdata);
+ if (ret)
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (parent)
+ inode_unref(parent);
+ if (frame)
+ AFR_STACK_DESTROY(frame);
+
+ return ret;
+}
diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h
index 627a2115a09..48e6dbcfb18 100644
--- a/xlators/cluster/afr/src/afr-self-heal.h
+++ b/xlators/cluster/afr/src/afr-self-heal.h
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
This file is licensed to you under your choice of the GNU Lesser
@@ -8,42 +8,370 @@
cases as published by the Free Software Foundation.
*/
-#ifndef __AFR_SELF_HEAL_H__
-#define __AFR_SELF_HEAL_H__
+#ifndef _AFR_SELFHEAL_H
+#define _AFR_SELFHEAL_H
-#include <sys/stat.h>
+/* Perform fop on all UP subvolumes and wait for all callbacks to return */
-#define FILETYPE_DIFFERS(buf1,buf2) ((buf1)->ia_type != (buf2)->ia_type)
-#define PERMISSION_DIFFERS(buf1,buf2) (st_mode_from_ia ((buf1)->ia_prot, (buf1)->ia_type) != st_mode_from_ia ((buf2)->ia_prot, (buf2)->ia_type))
-#define OWNERSHIP_DIFFERS(buf1,buf2) (((buf1)->ia_uid != (buf2)->ia_uid) || ((buf1)->ia_gid != (buf2)->ia_gid))
-#define SIZE_DIFFERS(buf1,buf2) ((buf1)->ia_size != (buf2)->ia_size)
+#define AFR_ONALL(frame, rfn, fop, args...) \
+ do { \
+ afr_local_t *__local = frame->local; \
+ afr_private_t *__priv = frame->this->private; \
+ int __i = 0, __count = 0; \
+ unsigned char *__child_up = alloca(__priv->child_count); \
+ \
+ memcpy(__child_up, __priv->child_up, \
+ sizeof(*__child_up) * __priv->child_count); \
+ __count = AFR_COUNT(__child_up, __priv->child_count); \
+ \
+ __local->barrier.waitfor = __count; \
+ afr_local_replies_wipe(__local, __priv); \
+ \
+ for (__i = 0; __i < __priv->child_count; __i++) { \
+ if (!__child_up[__i]) \
+ continue; \
+ STACK_WIND_COOKIE(frame, rfn, (void *)(long)__i, \
+ __priv->children[__i], \
+ __priv->children[__i]->fops->fop, args); \
+ } \
+ syncbarrier_wait(&__local->barrier, __count); \
+ } while (0)
-#define SIZE_GREATER(buf1,buf2) ((buf1)->ia_size > (buf2)->ia_size)
+/* Perform fop on all subvolumes represented by list[] array and wait
+ for all callbacks to return */
+
+#define AFR_ONLIST(list, frame, rfn, fop, args...) \
+ do { \
+ afr_local_t *__local = frame->local; \
+ afr_private_t *__priv = frame->this->private; \
+ int __i = 0; \
+ int __count = 0; \
+ unsigned char *__list = alloca(__priv->child_count); \
+ \
+ memcpy(__list, list, sizeof(*__list) * __priv->child_count); \
+ __count = AFR_COUNT(__list, __priv->child_count); \
+ __local->barrier.waitfor = __count; \
+ afr_local_replies_wipe(__local, __priv); \
+ \
+ for (__i = 0; __i < __priv->child_count; __i++) { \
+ if (!__list[__i]) \
+ continue; \
+ STACK_WIND_COOKIE(frame, rfn, (void *)(long)__i, \
+ __priv->children[__i], \
+ __priv->children[__i]->fops->fop, args); \
+ } \
+ syncbarrier_wait(&__local->barrier, __count); \
+ } while (0)
+
+#define AFR_SEQ(frame, rfn, fop, args...) \
+ do { \
+ afr_local_t *__local = frame->local; \
+ afr_private_t *__priv = frame->this->private; \
+ int __i = 0; \
+ \
+ afr_local_replies_wipe(__local, __priv); \
+ \
+ for (__i = 0; __i < __priv->child_count; __i++) { \
+ if (!__priv->child_up[__i]) \
+ continue; \
+ STACK_WIND_COOKIE(frame, rfn, (void *)(long)__i, \
+ __priv->children[__i], \
+ __priv->children[__i]->fops->fop, args); \
+ syncbarrier_wait(&__local->barrier, 1); \
+ } \
+ } while (0)
+
+#define ALLOC_MATRIX(n, type) \
+ ({ \
+ int __i; \
+ type **__ptr = alloca(n * sizeof(type *)); \
+ \
+ for (__i = 0; __i < n; __i++) \
+ __ptr[__i] = alloca0(n * sizeof(type)); \
+ __ptr; \
+ })
+
+#define IA_EQUAL(f, s, field) \
+ (memcmp(&(f.ia_##field), &(s.ia_##field), sizeof(s.ia_##field)) == 0)
+
+#define SBRAIN_HEAL_NO_GO_MSG \
+ "Failed to obtain replies from all bricks of " \
+ "the replica (are they up?). Cannot resolve split-brain."
+#define SFILE_NOT_IN_SPLIT_BRAIN "File not in split-brain"
+#define SNO_BIGGER_FILE "No bigger file"
+#define SNO_DIFF_IN_MTIME "No difference in mtime"
+#define SUSE_SOURCE_BRICK_TO_HEAL \
+ "Use source-brick option to heal metadata" \
+ " split-brain"
+#define SINVALID_BRICK_NAME "Invalid brick name"
+#define SBRICK_IS_NOT_UP "Brick is not up"
+#define SBRICK_NOT_CONNECTED "Brick is not connected"
+#define SLESS_THAN2_BRICKS_in_REP "< 2 bricks in replica are up"
+#define SBRICK_IS_REMOTE "Brick is remote"
+#define SSTARTED_SELF_HEAL "Started self-heal"
+#define SOP_NOT_SUPPORTED "Operation Not Supported"
+#define SFILE_NOT_UNDER_DATA \
+ "The file is not under data or metadata " \
+ "split-brain"
+#define SFILE_NOT_IN_SPLIT_BRAIN "File not in split-brain"
+#define SALL_BRICKS_UP_TO_RESOLVE \
+ "All the bricks should be up to resolve the" \
+ " gfid split brain"
+#define SERROR_GETTING_SRC_BRICK "Error getting the source brick"
+int
+afr_selfheal(xlator_t *this, uuid_t gfid);
+
+gf_boolean_t
+afr_throttled_selfheal(call_frame_t *frame, xlator_t *this);
+
+int
+afr_selfheal_name(xlator_t *this, uuid_t gfid, const char *name, void *gfid_req,
+ dict_t *xdata);
+
+int
+afr_selfheal_data(call_frame_t *frame, xlator_t *this, fd_t *fd);
+
+int
+afr_selfheal_metadata(call_frame_t *frame, xlator_t *this, inode_t *inode);
+
+int
+afr_selfheal_entry(call_frame_t *frame, xlator_t *this, inode_t *inode);
+
+int
+afr_lookup_and_heal_gfid(xlator_t *this, inode_t *parent, const char *name,
+ inode_t *inode, struct afr_reply *replies, int source,
+ unsigned char *sources, void *gfid, int *gfid_idx);
+
+int
+afr_selfheal_inodelk(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, off_t off, size_t size,
+ unsigned char *locked_on);
+
+int
+afr_selfheal_tryinodelk(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, off_t off, size_t size,
+ unsigned char *locked_on);
+
+int
+afr_selfheal_tie_breaker_inodelk(call_frame_t *frame, xlator_t *this,
+ inode_t *inode, char *dom, off_t off,
+ size_t size, unsigned char *locked_on);
+
+int
+afr_selfheal_uninodelk(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, off_t off, size_t size,
+ const unsigned char *locked_on);
+
+int
+afr_selfheal_entrylk(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, const char *name, unsigned char *locked_on);
+
+int
+afr_selfheal_tryentrylk(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, const char *name, unsigned char *locked_on);
+
+int
+afr_selfheal_tie_breaker_entrylk(call_frame_t *frame, xlator_t *this,
+ inode_t *inode, char *dom, const char *name,
+ unsigned char *locked_on);
+
+int
+afr_selfheal_unentrylk(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, const char *name, unsigned char *locked_on,
+ dict_t *xdata);
+
+int
+afr_selfheal_unlocked_discover(call_frame_t *frame, inode_t *inode, uuid_t gfid,
+ struct afr_reply *replies);
+
+int
+afr_selfheal_unlocked_discover_on(call_frame_t *frame, inode_t *inode,
+ uuid_t gfid, struct afr_reply *replies,
+ unsigned char *discover_on, dict_t *dict);
+inode_t *
+afr_selfheal_unlocked_lookup_on(call_frame_t *frame, inode_t *parent,
+ const char *name, struct afr_reply *replies,
+ unsigned char *lookup_on, dict_t *xattr);
+
+int
+afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this,
+ struct afr_reply *replies,
+ afr_transaction_type type, unsigned char *locked_on,
+ unsigned char *sources, unsigned char *sinks,
+ uint64_t *witness, unsigned char *flag);
+int
+afr_selfheal_fill_matrix(xlator_t *this, int **matrix, int subvol, int idx,
+ dict_t *xdata);
+
+int
+afr_selfheal_extract_xattr(xlator_t *this, struct afr_reply *replies,
+ afr_transaction_type type, int *dirty, int **matrix);
int
-afr_sh_has_metadata_pending (dict_t *xattr, xlator_t *this);
+afr_sh_generic_fop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata);
+
int
-afr_sh_has_entry_pending (dict_t *xattr, xlator_t *this);
+afr_selfheal_restore_time(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ int source, unsigned char *healed_sinks,
+ struct afr_reply *replies);
int
-afr_sh_has_data_pending (dict_t *xattr, xlator_t *this);
+afr_selfheal_undo_pending(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ unsigned char *sources, unsigned char *sinks,
+ unsigned char *healed_sinks,
+ unsigned char *undid_pending,
+ afr_transaction_type type, struct afr_reply *replies,
+ unsigned char *locked_on);
int
-afr_self_heal_entry (call_frame_t *frame, xlator_t *this);
+afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source,
+ unsigned char *sources, inode_t *dir,
+ const char *name, inode_t *inode,
+ struct afr_reply *replies);
int
-afr_self_heal_data (call_frame_t *frame, xlator_t *this);
+afr_selfheal_post_op(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ int subvol, dict_t *xattr, dict_t *xdata);
+
+call_frame_t *
+afr_frame_create(xlator_t *this, int32_t *op_errno);
+
+inode_t *
+afr_inode_find(xlator_t *this, uuid_t gfid);
int
-afr_self_heal_metadata (call_frame_t *frame, xlator_t *this);
+afr_selfheal_discover_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *parbuf);
+void
+afr_reply_copy(struct afr_reply *dst, struct afr_reply *src);
+
+void
+afr_replies_copy(struct afr_reply *dst, struct afr_reply *src, int count);
+
+int
+afr_selfheal_newentry_mark(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ int source, struct afr_reply *replies,
+ unsigned char *sources, unsigned char *newentry);
+
+unsigned int
+afr_success_count(struct afr_reply *replies, unsigned int count);
+
+void
+afr_log_selfheal(uuid_t gfid, xlator_t *this, int ret, char *type, int source,
+ unsigned char *sources, unsigned char *healed_sinks);
+
+void
+afr_mark_largest_file_as_source(xlator_t *this, unsigned char *sources,
+ struct afr_reply *replies);
+void
+afr_mark_active_sinks(xlator_t *this, unsigned char *sources,
+ unsigned char *locked_on, unsigned char *sinks);
+
+gf_boolean_t
+afr_dict_contains_heal_op(call_frame_t *frame);
+gf_boolean_t
+afr_can_decide_split_brain_source_sinks(struct afr_reply *replies,
+ int child_count);
int
-afr_self_heal_get_source (xlator_t *this, afr_local_t *local, dict_t **xattr);
+afr_mark_split_brain_source_sinks(
+ call_frame_t *frame, xlator_t *this, inode_t *inode, unsigned char *sources,
+ unsigned char *sinks, unsigned char *healed_sinks, unsigned char *locked_on,
+ struct afr_reply *replies, afr_transaction_type type);
int
-afr_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode);
+afr_sh_get_fav_by_policy(xlator_t *this, struct afr_reply *replies,
+ inode_t *inode, char **policy_str);
+
+int
+_afr_fav_child_reset_sink_xattrs(call_frame_t *frame, xlator_t *this,
+ inode_t *inode, int source,
+ unsigned char *healed_sinks,
+ unsigned char *undid_pending,
+ afr_transaction_type type,
+ unsigned char *locked_on,
+ struct afr_reply *replies);
+
+int
+afr_get_child_index_from_name(xlator_t *this, char *name);
+
+gf_boolean_t
+afr_does_witness_exist(xlator_t *this, uint64_t *witness);
+
+int
+__afr_selfheal_data_prepare(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ unsigned char *locked_on, unsigned char *sources,
+ unsigned char *sinks, unsigned char *healed_sinks,
+ unsigned char *undid_pending,
+ struct afr_reply *replies, unsigned char *flag);
+
+int
+__afr_selfheal_metadata_prepare(call_frame_t *frame, xlator_t *this,
+ inode_t *inode, unsigned char *locked_on,
+ unsigned char *sources, unsigned char *sinks,
+ unsigned char *healed_sinks,
+ unsigned char *undid_pending,
+ struct afr_reply *replies, unsigned char *flag);
+int
+__afr_selfheal_entry_prepare(call_frame_t *frame, xlator_t *this,
+ inode_t *inode, unsigned char *locked_on,
+ unsigned char *sources, unsigned char *sinks,
+ unsigned char *healed_sinks,
+ struct afr_reply *replies, int *source_p,
+ unsigned char *flag);
int
-afr_lookup_select_read_child_by_txn_type (xlator_t *this, afr_local_t *local,
- dict_t **xattr,
- afr_transaction_type txn_type);
-#endif /* __AFR_SELF_HEAL_H__ */
+afr_selfheal_unlocked_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid,
+ inode_t **link_inode, gf_boolean_t *data_selfheal,
+ gf_boolean_t *metadata_selfheal,
+ gf_boolean_t *entry_selfheal,
+ struct afr_reply *replies);
+
+int
+afr_selfheal_do(call_frame_t *frame, xlator_t *this, uuid_t gfid);
+
+int
+afr_selfheal_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata);
+
+int
+afr_locked_fill(call_frame_t *frame, xlator_t *this, unsigned char *locked_on);
+int
+afr_choose_source_by_policy(afr_private_t *priv, unsigned char *sources,
+ afr_transaction_type type);
+
+int
+afr_selfheal_metadata_by_stbuf(xlator_t *this, struct iatt *stbuf);
+
+int
+afr_sh_fav_by_size(xlator_t *this, struct afr_reply *replies, inode_t *inode);
+int
+afr_sh_fav_by_mtime(xlator_t *this, struct afr_reply *replies, inode_t *inode);
+int
+afr_sh_fav_by_ctime(xlator_t *this, struct afr_reply *replies, inode_t *inode);
+
+int
+afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies,
+ inode_t *inode, uuid_t pargfid, const char *bname,
+ int src_idx, int child_idx,
+ unsigned char *locked_on, int *src, dict_t *xdata);
+int
+afr_mark_source_sinks_if_file_empty(xlator_t *this, unsigned char *sources,
+ unsigned char *sinks,
+ unsigned char *healed_sinks,
+ unsigned char *locked_on,
+ struct afr_reply *replies,
+ afr_transaction_type type);
+
+gf_boolean_t
+afr_is_file_empty_on_all_children(afr_private_t *priv,
+ struct afr_reply *replies);
+
+int
+afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name,
+ inode_t *inode, int child, struct afr_reply *replies);
+int
+afr_anon_inode_create(xlator_t *this, int child, inode_t **linked_inode);
+#endif /* !_AFR_SELFHEAL_H */
diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
index d5773040770..109fd4b7421 100644
--- a/xlators/cluster/afr/src/afr-self-heald.c
+++ b/xlators/cluster/afr/src/afr-self-heald.c
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
This file is licensed to you under your choice of the GNU Lesser
@@ -8,1184 +8,1709 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
#include "afr.h"
-#include "syncop.h"
+#include "afr-self-heal.h"
#include "afr-self-heald.h"
-#include "afr-self-heal-common.h"
#include "protocol-common.h"
-#include "event-history.h"
+#include <glusterfs/syncop-utils.h>
+#include "afr-messages.h"
+#include <glusterfs/byte-order.h>
+
+#define AFR_EH_SPLIT_BRAIN_LIMIT 1024
+#define AFR_STATISTICS_HISTORY_SIZE 50
+
+#define ASSERT_LOCAL(this, healer) \
+ if (!afr_shd_is_subvol_local(this, healer->subvol)) { \
+ healer->local = _gf_false; \
+ if (safe_break(healer)) { \
+ break; \
+ } else { \
+ continue; \
+ } \
+ } else { \
+ healer->local = _gf_true; \
+ }
+
+#define NTH_INDEX_HEALER(this, n) \
+ &((((afr_private_t *)this->private))->shd.index_healers[n])
+#define NTH_FULL_HEALER(this, n) \
+ &((((afr_private_t *)this->private))->shd.full_healers[n])
+
+char *
+afr_subvol_name(xlator_t *this, int subvol)
+{
+ afr_private_t *priv = NULL;
-#define AFR_POLL_TIMEOUT 600
+ priv = this->private;
+ if (subvol < 0 || subvol > priv->child_count)
+ return NULL;
-typedef enum {
- STOP_CRAWL_ON_SINGLE_SUBVOL = 1
-} afr_crawl_flags_t;
+ return priv->children[subvol]->name;
+}
-typedef enum {
- HEAL = 1,
- INFO
-} shd_crawl_op;
+void
+afr_destroy_crawl_event_data(void *data)
+{
+ return;
+}
-typedef struct shd_dump {
- dict_t *dict;
- xlator_t *this;
- int child;
-} shd_dump_t;
+void
+afr_destroy_shd_event_data(void *data)
+{
+ shd_event_t *shd_event = data;
-typedef struct shd_event_ {
- int child;
- char *path;
-} shd_event_t;
+ if (!shd_event)
+ return;
+ GF_FREE(shd_event->path);
-typedef struct shd_pos_ {
- int child;
- xlator_t *this;
- afr_child_pos_t pos;
-} shd_pos_t;
+ return;
+}
-typedef int
-(*afr_crawl_done_cbk_t) (int ret, call_frame_t *sync_frame, void *crawl_data);
+gf_boolean_t
+afr_shd_is_subvol_local(xlator_t *this, int subvol)
+{
+ afr_private_t *priv = NULL;
+ gf_boolean_t is_local = _gf_false;
+ loc_t loc = {
+ 0,
+ };
+
+ loc.inode = this->itable->root;
+ gf_uuid_copy(loc.gfid, loc.inode->gfid);
+ priv = this->private;
+ syncop_is_subvol_local(priv->children[subvol], &loc, &is_local);
+ return is_local;
+}
-void
-afr_start_crawl (xlator_t *this, int idx, afr_crawl_type_t crawl,
- process_entry_cbk_t process_entry, void *op_data,
- gf_boolean_t exclusive, int crawl_flags,
- afr_crawl_done_cbk_t crawl_done);
+int
+__afr_shd_healer_wait(struct subvol_healer *healer)
+{
+ afr_private_t *priv = NULL;
+ struct timespec wait_till = {
+ 0,
+ };
+ int ret = 0;
-static int
-_crawl_directory (fd_t *fd, loc_t *loc, afr_crawl_data_t *crawl_data);
+ priv = healer->this->private;
-int
-afr_syncop_find_child_position (void *data);
+disabled_loop:
+ wait_till.tv_sec = gf_time() + priv->shd.timeout;
-static int
-_loc_assign_gfid_path (loc_t *loc)
-{
- int ret = -1;
- char gfid_path[64] = {0};
-
- if (loc->inode && !uuid_is_null (loc->inode->gfid)) {
- ret = inode_path (loc->inode, NULL, (char**)&loc->path);
- } else if (!uuid_is_null (loc->gfid)) {
- snprintf (gfid_path, sizeof (gfid_path), "<gfid:%s>",
- uuid_utoa (loc->gfid));
- loc->path = gf_strdup (gfid_path);
- if (loc->path)
- ret = 0;
- }
- return ret;
+ while (!healer->rerun) {
+ ret = pthread_cond_timedwait(&healer->cond, &healer->mutex, &wait_till);
+ if (ret == ETIMEDOUT)
+ break;
+ }
+
+ ret = healer->rerun;
+ healer->rerun = 0;
+
+ if (!priv->shd.enabled)
+ goto disabled_loop;
+
+ return ret;
}
-void
-shd_cleanup_event (void *event)
+int
+afr_shd_healer_wait(struct subvol_healer *healer)
{
- shd_event_t *shd_event = event;
+ int ret = 0;
- if (!shd_event)
- goto out;
- if (shd_event->path)
- GF_FREE (shd_event->path);
- GF_FREE (shd_event);
-out:
- return;
+ pthread_mutex_lock(&healer->mutex);
+ {
+ ret = __afr_shd_healer_wait(healer);
+ }
+ pthread_mutex_unlock(&healer->mutex);
+
+ return ret;
}
-int
-afr_get_local_child (afr_self_heald_t *shd, unsigned int child_count)
+gf_boolean_t
+safe_break(struct subvol_healer *healer)
{
- int i = 0;
- int ret = -1;
- for (i = 0; i < child_count; i++) {
- if (shd->pos[i] == AFR_POS_LOCAL) {
- ret = i;
- break;
- }
- }
- return ret;
+ gf_boolean_t ret = _gf_false;
+
+ pthread_mutex_lock(&healer->mutex);
+ {
+ if (healer->rerun)
+ goto unlock;
+
+ healer->running = _gf_false;
+ ret = _gf_true;
+ }
+unlock:
+ pthread_mutex_unlock(&healer->mutex);
+
+ return ret;
}
-static int
-_build_index_loc (xlator_t *this, loc_t *loc, char *name, loc_t *parent)
+inode_t *
+afr_shd_inode_find(xlator_t *this, xlator_t *subvol, uuid_t gfid)
{
- int ret = 0;
-
- uuid_copy (loc->pargfid, parent->inode->gfid);
- loc->path = "";
- loc->name = name;
- loc->parent = inode_ref (parent->inode);
- if (!loc->parent) {
- loc->path = NULL;
- loc_wipe (loc);
- ret = -1;
- }
- return ret;
+ int ret = 0;
+ uint64_t val = IA_INVAL;
+ dict_t *xdata = NULL;
+ dict_t *rsp_dict = NULL;
+ inode_t *inode = NULL;
+
+ xdata = dict_new();
+ if (!xdata)
+ goto out;
+
+ ret = dict_set_int8(xdata, GF_INDEX_IA_TYPE_GET_REQ, 1);
+ if (ret)
+ goto out;
+
+ ret = syncop_inode_find(this, subvol, gfid, &inode, xdata, &rsp_dict);
+ if (ret < 0)
+ goto out;
+
+ if (rsp_dict) {
+ ret = dict_get_uint64(rsp_dict, GF_INDEX_IA_TYPE_GET_RSP, &val);
+ if (ret)
+ goto out;
+ }
+ ret = inode_ctx_set2(inode, subvol, 0, &val);
+out:
+ if (ret && inode) {
+ inode_unref(inode);
+ inode = NULL;
+ }
+ if (xdata)
+ dict_unref(xdata);
+ if (rsp_dict)
+ dict_unref(rsp_dict);
+ return inode;
}
-int
-_add_path_to_dict (xlator_t *this, dict_t *output, int child, char *path,
- struct timeval *tv, gf_boolean_t dyn)
+inode_t *
+afr_shd_index_inode(xlator_t *this, xlator_t *subvol, char *vgfid)
{
- //subkey not used for now
- int ret = -1;
- uint64_t count = 0;
- char key[256] = {0};
- int xl_id = 0;
+ loc_t rootloc = {
+ 0,
+ };
+ inode_t *inode = NULL;
+ int ret = 0;
+ dict_t *xattr = NULL;
+ void *index_gfid = NULL;
- ret = dict_get_int32 (output, this->name, &xl_id);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "xl does not have id");
- goto out;
- }
+ rootloc.inode = inode_ref(this->itable->root);
+ gf_uuid_copy(rootloc.gfid, rootloc.inode->gfid);
- snprintf (key, sizeof (key), "%d-%d-count", xl_id, child);
- ret = dict_get_uint64 (output, key, &count);
+ ret = syncop_getxattr(subvol, &rootloc, &xattr, vgfid, NULL, NULL);
+ if (ret || !xattr) {
+ errno = -ret;
+ goto out;
+ }
- snprintf (key, sizeof (key), "%d-%d-%"PRIu64, xl_id, child, count);
- if (dyn)
- ret = dict_set_dynstr (output, key, path);
- else
- ret = dict_set_str (output, key, path);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "%s: Could not add to output",
- path);
- goto out;
- }
+ ret = dict_get_ptr(xattr, vgfid, &index_gfid);
+ if (ret)
+ goto out;
- if (!tv)
- goto inc_count;
- snprintf (key, sizeof (key), "%d-%d-%"PRIu64"-time", xl_id,
- child, count);
- ret = dict_set_uint32 (output, key, tv->tv_sec);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "%s: Could not set time",
- path);
- goto out;
- }
+ gf_msg_debug(this->name, 0, "%s dir gfid for %s: %s", vgfid, subvol->name,
+ uuid_utoa(index_gfid));
+
+ inode = afr_shd_inode_find(this, subvol, index_gfid);
-inc_count:
- snprintf (key, sizeof (key), "%d-%d-count", xl_id, child);
- ret = dict_set_uint64 (output, key, count + 1);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Could not increment count");
- goto out;
- }
- ret = 0;
out:
- return ret;
+ loc_wipe(&rootloc);
+
+ if (xattr)
+ dict_unref(xattr);
+
+ return inode;
}
int
-_get_path_from_gfid_loc (xlator_t *this, xlator_t *readdir_xl, loc_t *child,
- char **fpath)
+afr_shd_entry_purge(xlator_t *subvol, inode_t *inode, char *name,
+ ia_type_t type)
{
- dict_t *xattr = NULL;
- char *path = NULL;
- int ret = -1;
+ int ret = 0;
+ loc_t loc = {
+ 0,
+ };
- ret = syncop_getxattr (readdir_xl, child, &xattr,
- GFID_TO_PATH_KEY);
- if (ret)
- goto out;
- ret = dict_get_str (xattr, GFID_TO_PATH_KEY, &path);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Failed to get path for "
- "gfid %s", uuid_utoa (child->gfid));
- goto out;
- }
- path = gf_strdup (path);
- if (!path) {
- ret = -1;
- goto out;
- }
- ret = 0;
-out:
- if (!ret)
- *fpath = path;
- if (xattr)
- dict_unref (xattr);
- return ret;
+ loc.parent = inode_ref(inode);
+ loc.name = name;
+
+ if (IA_ISDIR(type))
+ ret = syncop_rmdir(subvol, &loc, 1, NULL, NULL);
+ else
+ ret = syncop_unlink(subvol, &loc, NULL, NULL);
+
+ loc_wipe(&loc);
+ return ret;
}
-int
-_add_event_to_dict (circular_buffer_t *cb, void *data)
+void
+afr_shd_zero_xattrop(xlator_t *this, uuid_t gfid)
{
- int ret = 0;
- shd_dump_t *dump_data = NULL;
- shd_event_t *shd_event = NULL;
+ call_frame_t *frame = NULL;
+ inode_t *inode = NULL;
+ afr_private_t *priv = NULL;
+ dict_t *xattr = NULL;
+ int ret = 0;
+ int i = 0;
+ int raw[AFR_NUM_CHANGE_LOGS] = {0};
+
+ priv = this->private;
+ frame = afr_frame_create(this, NULL);
+ if (!frame)
+ goto out;
+ inode = afr_inode_find(this, gfid);
+ if (!inode)
+ goto out;
+ xattr = dict_new();
+ if (!xattr)
+ goto out;
+ ret = dict_set_static_bin(xattr, AFR_DIRTY, raw,
+ sizeof(int) * AFR_NUM_CHANGE_LOGS);
+ if (ret)
+ goto out;
+ for (i = 0; i < priv->child_count; i++) {
+ ret = dict_set_static_bin(xattr, priv->pending_key[i], raw,
+ sizeof(int) * AFR_NUM_CHANGE_LOGS);
+ if (ret)
+ goto out;
+ }
+
+ /*Send xattrop to all bricks. Doing a lookup to see if bricks are up or
+ * has valid repies for this gfid seems a bit of an overkill.*/
+ for (i = 0; i < priv->child_count; i++)
+ afr_selfheal_post_op(frame, this, inode, i, xattr, NULL);
- dump_data = data;
- shd_event = cb->data;
- if (shd_event->child != dump_data->child)
- goto out;
- ret = _add_path_to_dict (dump_data->this, dump_data->dict,
- dump_data->child, shd_event->path, &cb->tv,
- _gf_false);
out:
- return ret;
+ if (frame)
+ AFR_STACK_DESTROY(frame);
+ if (inode)
+ inode_unref(inode);
+ if (xattr)
+ dict_unref(xattr);
+ return;
}
int
-_add_eh_to_dict (xlator_t *this, eh_t *eh, dict_t *dict, int child)
+afr_shd_selfheal_name(struct subvol_healer *healer, int child, uuid_t parent,
+ const char *bname)
{
- shd_dump_t dump_data = {0};
+ int ret = -1;
- dump_data.this = this;
- dump_data.dict = dict;
- dump_data.child = child;
- eh_dump (eh, &dump_data, _add_event_to_dict);
- return 0;
+ ret = afr_selfheal_name(THIS, parent, bname, NULL, NULL);
+
+ return ret;
}
int
-_add_summary_to_dict (xlator_t *this, afr_crawl_data_t *crawl_data,
- gf_dirent_t *entry,
- loc_t *childloc, loc_t *parentloc, struct iatt *iattr)
+afr_shd_selfheal(struct subvol_healer *healer, int child, uuid_t gfid)
{
- dict_t *output = NULL;
- xlator_t *readdir_xl = NULL;
- int ret = -1;
- char *path = NULL;
-
- if (uuid_is_null (childloc->gfid))
- goto out;
+ int ret = 0;
+ eh_t *eh = NULL;
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ shd_event_t *shd_event = NULL;
+ char *path = NULL;
+ xlator_t *subvol = NULL;
+ xlator_t *this = NULL;
+ crawl_event_t *crawl_event = NULL;
+
+ this = healer->this;
+ priv = this->private;
+ shd = &priv->shd;
+ crawl_event = &healer->crawl_event;
+
+ subvol = priv->children[child];
+
+ // If this fails with ENOENT/ESTALE index is stale
+ ret = syncop_gfid_to_path(this->itable, subvol, gfid, &path);
+ if (ret < 0)
+ return ret;
- output = crawl_data->op_data;
- readdir_xl = crawl_data->readdir_xl;
+ ret = afr_selfheal(this, gfid);
+
+ LOCK(&priv->lock);
+ {
+ if (ret == -EIO) {
+ eh = shd->split_brain;
+ crawl_event->split_brain_count++;
+ } else if (ret < 0) {
+ crawl_event->heal_failed_count++;
+ } else if (ret == 0) {
+ crawl_event->healed_count++;
+ }
+ }
+ UNLOCK(&priv->lock);
- ret = _get_path_from_gfid_loc (this, readdir_xl, childloc, &path);
- if (ret)
- goto out;
+ if (eh) {
+ shd_event = GF_CALLOC(1, sizeof(*shd_event), gf_afr_mt_shd_event_t);
+ if (!shd_event)
+ goto out;
- ret = _add_path_to_dict (this, output, crawl_data->child, path, NULL,
- _gf_true);
-out:
- if (ret && path)
- GF_FREE (path);
- return ret;
-}
+ shd_event->child = child;
+ shd_event->path = path;
-void
-_remove_stale_index (xlator_t *this, xlator_t *readdir_xl,
- loc_t *parent, char *fname)
-{
- int ret = 0;
- loc_t index_loc = {0};
+ if (eh_save_history(eh, shd_event) < 0)
+ goto out;
- ret = _build_index_loc (this, &index_loc, fname, parent);
- if (ret)
- goto out;
- gf_log (this->name, GF_LOG_DEBUG, "Removing stale index "
- "for %s on %s", index_loc.name, readdir_xl->name);
- ret = syncop_unlink (readdir_xl, &index_loc);
- if (ret && (errno != ENOENT)) {
- gf_log (this->name, GF_LOG_ERROR, "%s: Failed to remove"
- " index on %s - %s", index_loc.name,
- readdir_xl->name, strerror (errno));
- }
- index_loc.path = NULL;
- loc_wipe (&index_loc);
+ shd_event = NULL;
+ path = NULL;
+ }
out:
- return;
+ GF_FREE(shd_event);
+ GF_FREE(path);
+ return ret;
}
void
-_crawl_post_sh_action (xlator_t *this, loc_t *parent, loc_t *child,
- int32_t op_ret, int32_t op_errno, dict_t *xattr_rsp,
- afr_crawl_data_t *crawl_data)
+afr_shd_sweep_prepare(struct subvol_healer *healer)
{
- int ret = 0;
- afr_private_t *priv = NULL;
- afr_self_heald_t *shd = NULL;
- eh_t *eh = NULL;
- char *path = NULL;
- shd_event_t *event = NULL;
- int32_t sh_failed = 0;
- gf_boolean_t split_brain = 0;
- int32_t actual_sh_done = 0;
- priv = this->private;
- shd = &priv->shd;
- if (crawl_data->crawl == INDEX) {
- if ((op_ret < 0) && (op_errno == ENOENT)) {
- _remove_stale_index (this, crawl_data->readdir_xl,
- parent, uuid_utoa (child->gfid));
- goto out;
- }
- ret = _get_path_from_gfid_loc (this, crawl_data->readdir_xl,
- child, &path);
- if (ret)
- goto out;
- } else {
- path = gf_strdup (child->path);
- if (!path) {
- ret = -1;
- goto out;
- }
- }
+ crawl_event_t *event = NULL;
- if (xattr_rsp) {
- ret = dict_get_int32 (xattr_rsp, "sh-failed", &sh_failed);
- ret = dict_get_int32 (xattr_rsp, "actual-sh-done", &actual_sh_done);
- }
-
- split_brain = afr_is_split_brain (this, child->inode);
+ event = &healer->crawl_event;
- if ((op_ret < 0 && op_errno == EIO) || split_brain) {
- eh = shd->split_brain;
- } else if ((op_ret < 0) || sh_failed) {
- eh = shd->heal_failed;
- } else if (actual_sh_done == 1) {
- eh = shd->healed;
- }
+ event->healed_count = 0;
+ event->split_brain_count = 0;
+ event->heal_failed_count = 0;
- ret = -1;
+ event->start_time = gf_time();
+ event->end_time = 0;
+ _mask_cancellation();
+}
- if (eh != NULL) {
- event = GF_CALLOC (1, sizeof (*event), gf_afr_mt_shd_event_t);
- if (!event)
- goto out;
- event->child = crawl_data->child;
- event->path = path;
+void
+afr_shd_sweep_done(struct subvol_healer *healer)
+{
+ crawl_event_t *event = NULL;
+ crawl_event_t *history = NULL;
+ afr_self_heald_t *shd = NULL;
- ret = eh_save_history (eh, event);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR, "%s:Failed to save "
- "to event history, (%d, %s)", path, op_ret,
- strerror (op_errno));
+ event = &healer->crawl_event;
+ shd = &(((afr_private_t *)healer->this->private)->shd);
- goto out;
- }
- } else {
- gf_log (this->name, GF_LOG_DEBUG, "%s:Self heal already done ",
- path);
+ event->end_time = gf_time();
+ history = gf_memdup(event, sizeof(*event));
+ event->start_time = 0;
- }
- ret = 0;
-out:
- if (ret && path)
- GF_FREE (path);
+ if (!history)
return;
+
+ if (eh_save_history(shd->statistics[healer->subvol], history) < 0)
+ GF_FREE(history);
+ _unmask_cancellation();
}
int
-_self_heal_entry (xlator_t *this, afr_crawl_data_t *crawl_data, gf_dirent_t *entry,
- loc_t *child, loc_t *parent, struct iatt *iattr)
+afr_shd_index_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
+ void *data)
{
- struct iatt parentbuf = {0};
- int ret = 0;
- dict_t *xattr_rsp = NULL;
-
- gf_log (this->name, GF_LOG_DEBUG, "lookup %s", child->path);
-
- ret = syncop_lookup (this, child, NULL,
- iattr, &xattr_rsp, &parentbuf);
- _crawl_post_sh_action (this, parent, child, ret, errno, xattr_rsp,
- crawl_data);
- if (xattr_rsp)
- dict_unref (xattr_rsp);
- return ret;
-}
+ struct subvol_healer *healer = data;
+ afr_private_t *priv = NULL;
+ uuid_t gfid = {0};
+ int ret = 0;
+ uint64_t val = IA_INVAL;
-static int
-afr_crawl_done (int ret, call_frame_t *sync_frame, void *data)
-{
- GF_FREE (data);
- STACK_DESTROY (sync_frame->root);
+ priv = healer->this->private;
+ if (!priv->shd.enabled)
+ return -EBUSY;
+
+ gf_msg_debug(healer->this->name, 0, "got entry: %s from %s", entry->d_name,
+ priv->children[healer->subvol]->name);
+
+ ret = gf_uuid_parse(entry->d_name, gfid);
+ if (ret)
return 0;
-}
-void
-_do_self_heal_on_subvol (xlator_t *this, int child, afr_crawl_type_t crawl)
-{
- afr_start_crawl (this, child, crawl, _self_heal_entry,
- NULL, _gf_true, STOP_CRAWL_ON_SINGLE_SUBVOL,
- afr_crawl_done);
+ inode_ctx_get2(parent->inode, subvol, NULL, &val);
+
+ ret = afr_shd_selfheal(healer, healer->subvol, gfid);
+
+ if (ret == -ENOENT || ret == -ESTALE)
+ afr_shd_entry_purge(subvol, parent->inode, entry->d_name, val);
+
+ if (ret == 2)
+ /* If bricks crashed in pre-op after creating indices/xattrop
+ * link but before setting afr changelogs, we end up with stale
+ * xattrop links but zero changelogs. Remove such entries by
+ * sending a post-op with zero changelogs.
+ */
+ afr_shd_zero_xattrop(healer->this, gfid);
+
+ return 0;
}
-gf_boolean_t
-_crawl_proceed (xlator_t *this, int child, int crawl_flags, char **reason)
+int
+afr_shd_index_sweep(struct subvol_healer *healer, char *vgfid)
{
- afr_private_t *priv = NULL;
- afr_self_heald_t *shd = NULL;
- gf_boolean_t proceed = _gf_false;
- char *msg = NULL;
-
- priv = this->private;
- shd = &priv->shd;
- if (!shd->enabled) {
- msg = "Self-heal daemon is not enabled";
- gf_log (this->name, GF_LOG_DEBUG, "%s", msg);
- goto out;
- }
- if (!priv->child_up[child]) {
- gf_log (this->name, GF_LOG_DEBUG, "Stopping crawl for %s , "
- "subvol went down", priv->children[child]->name);
- msg = "Brick is Not connected";
- goto out;
- }
+ loc_t loc = {0};
+ afr_private_t *priv = NULL;
+ int ret = 0;
+ xlator_t *subvol = NULL;
+ dict_t *xdata = NULL;
+ call_frame_t *frame = NULL;
+
+ priv = healer->this->private;
+ subvol = priv->children[healer->subvol];
+
+ frame = afr_frame_create(healer->this, &ret);
+ if (!frame) {
+ ret = -ret;
+ goto out;
+ }
+
+ loc.inode = afr_shd_index_inode(healer->this, subvol, vgfid);
+ if (!loc.inode) {
+ gf_msg(healer->this->name, GF_LOG_WARNING, 0,
+ AFR_MSG_INDEX_DIR_GET_FAILED, "unable to get index-dir on %s",
+ subvol->name);
+ ret = -errno;
+ goto out;
+ }
+
+ xdata = dict_new();
+ if (!xdata || dict_set_int32_sizen(xdata, "get-gfid-type", 1)) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = syncop_mt_dir_scan(frame, subvol, &loc, GF_CLIENT_PID_SELF_HEALD,
+ healer, afr_shd_index_heal, xdata,
+ priv->shd.max_threads, priv->shd.wait_qlength);
+
+ if (ret == 0)
+ ret = healer->crawl_event.healed_count;
- if (crawl_flags & STOP_CRAWL_ON_SINGLE_SUBVOL) {
- if (afr_up_children_count (priv->child_up,
- priv->child_count) < 2) {
- gf_log (this->name, GF_LOG_DEBUG, "Stopping crawl as "
- "< 2 children are up");
- msg = "< 2 bricks in replica are running";
- goto out;
- }
- }
- proceed = _gf_true;
out:
- if (reason)
- *reason = msg;
- return proceed;
+ loc_wipe(&loc);
+
+ if (xdata)
+ dict_unref(xdata);
+ if (frame)
+ AFR_STACK_DESTROY(frame);
+ return ret;
}
int
-_do_crawl_op_on_local_subvols (xlator_t *this, afr_crawl_type_t crawl,
- shd_crawl_op op, dict_t *output)
+afr_shd_index_sweep_all(struct subvol_healer *healer)
{
- afr_private_t *priv = NULL;
- char *status = NULL;
- char *subkey = NULL;
- char key[256] = {0};
- shd_pos_t pos_data = {0};
- int op_ret = -1;
- int xl_id = -1;
- int i = 0;
- int ret = 0;
- int crawl_flags = 0;
-
- priv = this->private;
- if (op == HEAL)
- crawl_flags |= STOP_CRAWL_ON_SINGLE_SUBVOL;
-
- ret = dict_get_int32 (output, this->name, &xl_id);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Invalid input, "
- "translator-id is not available");
- goto out;
- }
- pos_data.this = this;
- subkey = "status";
- for (i = 0; i < priv->child_count; i++) {
- if (_crawl_proceed (this, i, crawl_flags, &status)) {
- pos_data.child = i;
- ret = synctask_new (this->ctx->env,
- afr_syncop_find_child_position,
- NULL, NULL, &pos_data);
- if (ret) {
- status = "Not able to find brick location";
- } else if (pos_data.pos == AFR_POS_REMOTE) {
- status = "brick is remote";
- } else {
- op_ret = 0;
- if (op == HEAL) {
- status = "Started self-heal";
- _do_self_heal_on_subvol (this, i,
- crawl);
- } else {
- status = "";
- afr_start_crawl (this, i, INDEX,
- _add_summary_to_dict,
- output, _gf_false, 0,
- NULL);
- }
- }
- snprintf (key, sizeof (key), "%d-%d-%s", xl_id,
- i, subkey);
- ret = dict_set_str (output, key, status);
- if (!op_ret && (crawl == FULL))
- break;
- }
- snprintf (key, sizeof (key), "%d-%d-%s", xl_id, i, subkey);
- ret = dict_set_str (output, key, status);
- }
+ int ret = 0;
+ int count = 0;
+
+ ret = afr_shd_index_sweep(healer, GF_XATTROP_INDEX_GFID);
+ if (ret < 0)
+ goto out;
+ count = ret;
+
+ ret = afr_shd_index_sweep(healer, GF_XATTROP_DIRTY_GFID);
+ if (ret < 0)
+ goto out;
+ count += ret;
+
+ ret = afr_shd_index_sweep(healer, GF_XATTROP_ENTRY_CHANGES_GFID);
+ if (ret < 0)
+ goto out;
+ count += ret;
out:
- return op_ret;
+ if (ret < 0)
+ return ret;
+ else
+ return count;
}
int
-_do_self_heal_on_local_subvols (xlator_t *this, afr_crawl_type_t crawl,
- dict_t *output)
+afr_shd_full_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
+ void *data)
{
- return _do_crawl_op_on_local_subvols (this, crawl, HEAL, output);
+ struct subvol_healer *healer = data;
+ xlator_t *this = healer->this;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ if (this->cleanup_starting) {
+ return -ENOTCONN;
+ }
+
+ if (!priv->shd.enabled)
+ return -EBUSY;
+
+ afr_shd_selfheal_name(healer, healer->subvol, parent->inode->gfid,
+ entry->d_name);
+
+ afr_shd_selfheal(healer, healer->subvol, entry->d_stat.ia_gfid);
+
+ return 0;
}
int
-_get_index_summary_on_local_subvols (xlator_t *this, dict_t *output)
+afr_shd_full_sweep(struct subvol_healer *healer, inode_t *inode)
{
- return _do_crawl_op_on_local_subvols (this, INDEX, INFO, output);
+ afr_private_t *priv = NULL;
+ loc_t loc = {0};
+
+ priv = healer->this->private;
+ loc.inode = inode;
+ return syncop_ftw(priv->children[healer->subvol], &loc,
+ GF_CLIENT_PID_SELF_HEALD, healer, afr_shd_full_heal);
}
int
-_add_all_subvols_eh_to_dict (xlator_t *this, eh_t *eh, dict_t *dict)
+afr_shd_fill_ta_loc(xlator_t *this, loc_t *loc)
{
- afr_private_t *priv = NULL;
- afr_self_heald_t *shd = NULL;
- int i = 0;
+ afr_private_t *priv = NULL;
+ struct iatt stbuf = {
+ 0,
+ };
+ int ret = -1;
+
+ priv = this->private;
+ loc->parent = inode_ref(this->itable->root);
+ gf_uuid_copy(loc->pargfid, loc->parent->gfid);
+ loc->name = priv->pending_key[THIN_ARBITER_BRICK_INDEX];
+ loc->inode = inode_new(loc->parent->table);
+ GF_CHECK_ALLOC(loc->inode, ret, out);
+
+ if (!gf_uuid_is_null(priv->ta_gfid))
+ goto assign_gfid;
+
+ ret = syncop_lookup(priv->children[THIN_ARBITER_BRICK_INDEX], loc, &stbuf,
+ 0, 0, 0);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed lookup on file %s.", loc->name);
+ goto out;
+ }
+
+ gf_uuid_copy(priv->ta_gfid, stbuf.ia_gfid);
+
+assign_gfid:
+ gf_uuid_copy(loc->gfid, priv->ta_gfid);
+ ret = 0;
- priv = this->private;
- shd = &priv->shd;
+out:
+ if (ret)
+ loc_wipe(loc);
- for (i = 0; i < priv->child_count; i++) {
- if (shd->pos[i] != AFR_POS_LOCAL)
- continue;
- _add_eh_to_dict (this, eh, dict, i);
- }
- return 0;
+ return ret;
}
int
-afr_xl_op (xlator_t *this, dict_t *input, dict_t *output)
+_afr_shd_ta_get_xattrs(xlator_t *this, loc_t *loc, dict_t **xdata)
{
- gf_xl_afr_op_t op = GF_AFR_OP_INVALID;
- int ret = 0;
- afr_private_t *priv = NULL;
- afr_self_heald_t *shd = NULL;
- int xl_id = 0;
+ afr_private_t *priv = NULL;
+ dict_t *xattr = NULL;
+ int raw[AFR_NUM_CHANGE_LOGS] = {
+ 0,
+ };
+ int ret = -1;
+ int i = 0;
+
+ priv = this->private;
+
+ xattr = dict_new();
+ if (!xattr) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_DICT_GET_FAILED,
+ "Failed to create dict.");
+ goto out;
+ }
+ for (i = 0; i < priv->child_count; i++) {
+ ret = dict_set_static_bin(xattr, priv->pending_key[i], &raw,
+ AFR_NUM_CHANGE_LOGS * sizeof(int));
+ if (ret)
+ goto out;
+ }
- priv = this->private;
- shd = &priv->shd;
+ ret = syncop_xattrop(priv->children[THIN_ARBITER_BRICK_INDEX], loc,
+ GF_XATTROP_ADD_ARRAY, xattr, NULL, xdata, NULL);
+ if (ret || !(*xdata)) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Xattrop failed on %s.", loc->name);
+ }
- ret = dict_get_int32 (input, "xl-op", (int32_t*)&op);
- if (ret)
- goto out;
- ret = dict_get_int32 (input, this->name, &xl_id);
- if (ret)
- goto out;
- ret = dict_set_int32 (output, this->name, xl_id);
- if (ret)
- goto out;
- switch (op) {
- case GF_AFR_OP_HEAL_INDEX:
- ret = _do_self_heal_on_local_subvols (this, INDEX, output);
- break;
- case GF_AFR_OP_HEAL_FULL:
- ret = _do_self_heal_on_local_subvols (this, FULL, output);
- break;
- case GF_AFR_OP_INDEX_SUMMARY:
- (void)_get_index_summary_on_local_subvols (this, output);
- ret = 0;
- break;
- case GF_AFR_OP_HEALED_FILES:
- ret = _add_all_subvols_eh_to_dict (this, shd->healed, output);
- break;
- case GF_AFR_OP_HEAL_FAILED_FILES:
- ret = _add_all_subvols_eh_to_dict (this, shd->heal_failed,
- output);
- break;
- case GF_AFR_OP_SPLIT_BRAIN_FILES:
- ret = _add_all_subvols_eh_to_dict (this, shd->split_brain,
- output);
- break;
- default:
- gf_log (this->name, GF_LOG_ERROR, "Unknown set op %d", op);
- break;
- }
out:
- dict_del (output, this->name);
- return ret;
+ if (xattr)
+ dict_unref(xattr);
+
+ return ret;
}
void
-afr_poll_self_heal (void *data)
+afr_shd_ta_get_xattrs(xlator_t *this, loc_t *loc, struct subvol_healer *healer,
+ dict_t **xdata)
{
- afr_private_t *priv = NULL;
- afr_self_heald_t *shd = NULL;
- struct timeval timeout = {0};
- xlator_t *this = NULL;
- long child = (long)data;
- gf_timer_t *old_timer = NULL;
- gf_timer_t *new_timer = NULL;
- shd_pos_t pos_data = {0};
- int ret = 0;
-
- this = THIS;
- priv = this->private;
- shd = &priv->shd;
-
- if (shd->pos[child] == AFR_POS_UNKNOWN) {
- pos_data.this = this;
- pos_data.child = child;
- ret = synctask_new (this->ctx->env,
- afr_syncop_find_child_position,
- NULL, NULL, &pos_data);
- if (!ret)
- shd->pos[child] = pos_data.pos;
- }
- if (shd->enabled && (shd->pos[child] == AFR_POS_LOCAL))
- _do_self_heal_on_subvol (this, child, INDEX);
- timeout.tv_sec = AFR_POLL_TIMEOUT;
- timeout.tv_usec = 0;
- //notify and previous timer should be synchronized.
- LOCK (&priv->lock);
- {
- old_timer = shd->timer[child];
- if (shd->pos[child] == AFR_POS_REMOTE)
- goto unlock;
- shd->timer[child] = gf_timer_call_after (this->ctx, timeout,
- afr_poll_self_heal,
- data);
- new_timer = shd->timer[child];
- }
-unlock:
- UNLOCK (&priv->lock);
-
- if (old_timer)
- gf_timer_call_cancel (this->ctx, old_timer);
- if (!new_timer && (shd->pos[child] != AFR_POS_REMOTE)) {
- gf_log (this->name, GF_LOG_WARNING,
- "Could not create self-heal polling timer for %s",
- priv->children[child]->name);
+ int ret = 0;
+
+ loc_wipe(loc);
+ if (afr_shd_fill_ta_loc(this, loc)) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed to populate thin-arbiter loc for: %s.", loc->name);
+ ret = -1;
+ goto out;
+ }
+
+ ret = afr_ta_post_op_lock(this, loc);
+ if (ret)
+ goto out;
+
+ ret = _afr_shd_ta_get_xattrs(this, loc, xdata);
+ if (ret) {
+ if (*xdata) {
+ dict_unref(*xdata);
+ *xdata = NULL;
}
- return;
+ }
+
+ afr_ta_post_op_unlock(this, loc);
+
+out:
+ if (ret)
+ healer->rerun = 1;
}
-static int
-afr_local_child_poll_self_heal (int ret, call_frame_t *sync_frame, void *data)
+int
+afr_shd_ta_unset_xattrs(xlator_t *this, loc_t *loc, dict_t **xdata, int healer)
{
- afr_self_heald_t *shd = NULL;
- shd_pos_t *pos_data = data;
- afr_private_t *priv = NULL;
+ afr_private_t *priv = NULL;
+ dict_t *xattr = NULL;
+ gf_boolean_t need_xattrop = _gf_false;
+ void *pending_raw = NULL;
+ int *raw = NULL;
+ int pending[AFR_NUM_CHANGE_LOGS] = {
+ 0,
+ };
+ int i = 0;
+ int j = 0;
+ int val = 0;
+ int ret = -1;
+
+ priv = this->private;
+
+ xattr = dict_new();
+ if (!xattr) {
+ goto out;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ raw = GF_CALLOC(AFR_NUM_CHANGE_LOGS, sizeof(int), gf_afr_mt_int32_t);
+ if (!raw) {
+ goto out;
+ }
- if (ret)
- goto out;
+ ret = dict_get_ptr(*xdata, priv->pending_key[i], &pending_raw);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_GET_FAILED,
+ "Error getting value "
+ "of pending key %s",
+ priv->pending_key[i]);
+ GF_FREE(raw);
+ goto out;
+ }
+
+ memcpy(pending, pending_raw, sizeof(pending));
+ for (j = 0; j < AFR_NUM_CHANGE_LOGS; j++) {
+ val = ntoh32(pending[j]);
+ if (val) {
+ if (i == healer) {
+ gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_THIN_ARB,
+ "I am "
+ "not the good shd. Skipping. "
+ "SHD = %d.",
+ healer);
+ ret = 0;
+ GF_FREE(raw);
+ goto out;
+ }
+ need_xattrop = _gf_true;
+ raw[j] = hton32(-val);
+ }
+ }
+
+ ret = dict_set_bin(xattr, priv->pending_key[i], raw,
+ AFR_NUM_CHANGE_LOGS * sizeof(int));
+ if (ret) {
+ GF_FREE(raw);
+ goto out;
+ }
+
+ if (need_xattrop)
+ break;
+ }
+
+ if (!need_xattrop) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = syncop_xattrop(priv->children[THIN_ARBITER_BRICK_INDEX], loc,
+ GF_XATTROP_ADD_ARRAY, xattr, NULL, NULL, NULL);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Xattrop failed.");
- priv = pos_data->this->private;
- shd = &priv->shd;
- shd->pos[pos_data->child] = pos_data->pos;
- if (pos_data->pos != AFR_POS_REMOTE)
- afr_poll_self_heal ((void*)(long)pos_data->child);
out:
- GF_FREE (data);
- return 0;
+ if (xattr)
+ dict_unref(xattr);
+
+ return ret;
}
void
-afr_proactive_self_heal (void *data)
+afr_shd_ta_check_and_unset_xattrs(xlator_t *this, loc_t *loc,
+ struct subvol_healer *healer,
+ dict_t *pre_crawl_xdata)
{
- xlator_t *this = NULL;
- long child = (long)data;
- shd_pos_t *pos_data = NULL;
- int ret = 0;
+ int ret_lock = 0;
+ int ret = 0;
+ dict_t *post_crawl_xdata = NULL;
- this = THIS;
+ ret_lock = afr_ta_post_op_lock(this, loc);
+ if (ret_lock)
+ goto unref;
- //Position of brick could have changed and it could be local now.
- //Compute the position again
- pos_data = GF_CALLOC (1, sizeof (*pos_data), gf_afr_mt_pos_data_t);
- if (!pos_data)
- goto out;
- pos_data->this = this;
- pos_data->child = child;
- ret = synctask_new (this->ctx->env, afr_syncop_find_child_position,
- afr_local_child_poll_self_heal, NULL, pos_data);
- if (ret)
- goto out;
-out:
- return;
+ ret = _afr_shd_ta_get_xattrs(this, loc, &post_crawl_xdata);
+ if (ret)
+ goto unref;
+
+ if (!are_dicts_equal(pre_crawl_xdata, post_crawl_xdata, NULL, NULL)) {
+ ret = -1;
+ goto unref;
+ }
+
+ ret = afr_shd_ta_unset_xattrs(this, loc, &post_crawl_xdata, healer->subvol);
+
+unref:
+ if (post_crawl_xdata) {
+ dict_unref(post_crawl_xdata);
+ post_crawl_xdata = NULL;
+ }
+
+ if (ret || ret_lock)
+ healer->rerun = 1;
+
+ if (!ret_lock)
+ afr_ta_post_op_unlock(this, loc);
}
-int
-_link_inode_update_loc (xlator_t *this, loc_t *loc, struct iatt *iattr)
+gf_boolean_t
+afr_bricks_available_for_heal(afr_private_t *priv)
{
- inode_t *link_inode = NULL;
- int ret = -1;
+ int up_children = 0;
- link_inode = inode_link (loc->inode, NULL, NULL, iattr);
- if (link_inode == NULL) {
- gf_log (this->name, GF_LOG_ERROR, "inode link failed "
- "on the inode (%s)", uuid_utoa (iattr->ia_gfid));
- goto out;
+ up_children = __afr_get_up_children_count(priv);
+ if (up_children < 2) {
+ return _gf_false;
+ }
+ return _gf_true;
+}
+
+static gf_boolean_t
+afr_shd_ta_needs_heal(xlator_t *this, struct subvol_healer *healer)
+{
+ dict_t *xdata = NULL;
+ afr_private_t *priv = NULL;
+ loc_t loc = {
+ 0,
+ };
+ int ret = -1;
+ int i = 0;
+ gf_boolean_t need_heal = _gf_false;
+
+ priv = this->private;
+
+ ret = afr_shd_fill_ta_loc(this, &loc);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed to populate thin-arbiter loc for: %s.", loc.name);
+ healer->rerun = 1;
+ goto out;
+ }
+
+ if (_afr_shd_ta_get_xattrs(this, &loc, &xdata)) {
+ healer->rerun = 1;
+ goto out;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (afr_ta_dict_contains_pending_xattr(xdata, priv, i)) {
+ need_heal = _gf_true;
+ break;
}
- inode_unref (loc->inode);
- loc->inode = link_inode;
- ret = 0;
+ }
+
out:
- return ret;
+ if (xdata)
+ dict_unref(xdata);
+ loc_wipe(&loc);
+
+ return need_heal;
}
-int
-afr_crawl_build_start_loc (xlator_t *this, afr_crawl_data_t *crawl_data,
- loc_t *dirloc)
+static int
+afr_shd_anon_inode_cleaner(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
+ void *data)
{
- afr_private_t *priv = NULL;
- dict_t *xattr = NULL;
- void *index_gfid = NULL;
- loc_t rootloc = {0};
- struct iatt iattr = {0};
- struct iatt parent = {0};
- int ret = 0;
- xlator_t *readdir_xl = crawl_data->readdir_xl;
-
- priv = this->private;
- if (crawl_data->crawl == FULL) {
- afr_build_root_loc (this, dirloc);
- } else {
- afr_build_root_loc (this, &rootloc);
- ret = syncop_getxattr (readdir_xl, &rootloc, &xattr,
- GF_XATTROP_INDEX_GFID);
- if (ret < 0)
- goto out;
- ret = dict_get_ptr (xattr, GF_XATTROP_INDEX_GFID, &index_gfid);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR, "failed to get index "
- "dir gfid on %s", readdir_xl->name);
- goto out;
- }
- if (!index_gfid) {
- gf_log (this->name, GF_LOG_ERROR, "index gfid empty "
- "on %s", readdir_xl->name);
- ret = -1;
- goto out;
- }
- uuid_copy (dirloc->gfid, index_gfid);
- dirloc->path = "";
- dirloc->inode = inode_new (priv->root_inode->table);
- ret = syncop_lookup (readdir_xl, dirloc, NULL,
- &iattr, NULL, &parent);
- if (ret < 0) {
- if (errno != ENOENT) {
- gf_log (this->name, GF_LOG_ERROR, "lookup "
- "failed on index dir on %s - (%s)",
- readdir_xl->name, strerror (errno));
- }
- goto out;
+ struct subvol_healer *healer = data;
+ afr_private_t *priv = healer->this->private;
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ int ret = 0;
+ loc_t loc = {0};
+ int count = 0;
+ int i = 0;
+ int op_errno = 0;
+ struct iatt *iatt = NULL;
+ gf_boolean_t multiple_links = _gf_false;
+ unsigned char *gfid_present = alloca0(priv->child_count);
+ unsigned char *entry_present = alloca0(priv->child_count);
+ char *type = "file";
+
+ frame = afr_frame_create(healer->this, &ret);
+ if (!frame) {
+ ret = -ret;
+ goto out;
+ }
+ local = frame->local;
+ if (AFR_COUNT(local->child_up, priv->child_count) != priv->child_count) {
+ gf_msg_debug(healer->this->name, 0,
+ "Not all bricks are up. Skipping "
+ "cleanup of %s on %s",
+ entry->d_name, subvol->name);
+ ret = 0;
+ goto out;
+ }
+
+ loc.inode = inode_new(parent->inode->table);
+ if (!loc.inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ ret = gf_uuid_parse(entry->d_name, loc.gfid);
+ if (ret) {
+ ret = 0;
+ goto out;
+ }
+ AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup, &loc,
+ NULL);
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].op_ret == 0) {
+ count++;
+ gfid_present[i] = 1;
+ iatt = &local->replies[i].poststat;
+ if (iatt->ia_type == IA_IFDIR) {
+ type = "dir";
+ }
+
+ if (i == healer->subvol) {
+ if (local->replies[i].poststat.ia_nlink > 1) {
+ multiple_links = _gf_true;
}
- ret = _link_inode_update_loc (this, dirloc, &iattr);
- if (ret)
- goto out;
+ }
+ } else if (local->replies[i].op_errno != ENOENT &&
+ local->replies[i].op_errno != ESTALE) {
+ /*We don't have complete view. Skip the entry*/
+ gf_msg_debug(healer->this->name, local->replies[i].op_errno,
+ "Skipping cleanup of %s on %s", entry->d_name,
+ subvol->name);
+ ret = 0;
+ goto out;
+ }
+ }
+
+ /*Inode is deleted from subvol*/
+ if (count == 1 || (iatt->ia_type != IA_IFDIR && multiple_links)) {
+ gf_msg(healer->this->name, GF_LOG_WARNING, 0,
+ AFR_MSG_EXPUNGING_FILE_OR_DIR, "expunging %s %s/%s on %s", type,
+ priv->anon_inode_name, entry->d_name, subvol->name);
+ ret = afr_shd_entry_purge(subvol, parent->inode, entry->d_name,
+ iatt->ia_type);
+ if (ret == -ENOENT || ret == -ESTALE)
+ ret = 0;
+ } else if (count > 1) {
+ loc_wipe(&loc);
+ loc.parent = inode_ref(parent->inode);
+ loc.name = entry->d_name;
+ loc.inode = inode_new(parent->inode->table);
+ if (!loc.inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup,
+ &loc, NULL);
+ count = 0;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].op_ret == 0) {
+ count++;
+ entry_present[i] = 1;
+ iatt = &local->replies[i].poststat;
+ } else if (local->replies[i].op_errno != ENOENT &&
+ local->replies[i].op_errno != ESTALE) {
+ /*We don't have complete view. Skip the entry*/
+ gf_msg_debug(healer->this->name, local->replies[i].op_errno,
+ "Skipping cleanup of %s on %s", entry->d_name,
+ subvol->name);
+ ret = 0;
+ goto out;
+ }
+ }
+ for (i = 0; i < priv->child_count; i++) {
+ if (gfid_present[i] && !entry_present[i]) {
+ /*Entry is not anonymous on at least one subvol*/
+ gf_msg_debug(healer->this->name, 0,
+ "Valid entry present on %s "
+ "Skipping cleanup of %s on %s",
+ priv->children[i]->name, entry->d_name,
+ subvol->name);
+ ret = 0;
+ goto out;
+ }
}
+
+ gf_msg(healer->this->name, GF_LOG_WARNING, 0,
+ AFR_MSG_EXPUNGING_FILE_OR_DIR,
+ "expunging %s %s/%s on all subvols", type, priv->anon_inode_name,
+ entry->d_name);
ret = 0;
+ for (i = 0; i < priv->child_count; i++) {
+ op_errno = -afr_shd_entry_purge(priv->children[i], loc.parent,
+ entry->d_name, iatt->ia_type);
+ if (op_errno != ENOENT && op_errno != ESTALE) {
+ ret |= -op_errno;
+ }
+ }
+ }
+
out:
- if (xattr)
- dict_unref (xattr);
- loc_wipe (&rootloc);
- return ret;
+ if (frame)
+ AFR_STACK_DESTROY(frame);
+ loc_wipe(&loc);
+ return ret;
}
-int
-afr_crawl_opendir (xlator_t *this, afr_crawl_data_t *crawl_data, fd_t **dirfd,
- loc_t *dirloc)
+static void
+afr_cleanup_anon_inode_dir(struct subvol_healer *healer)
{
- fd_t *fd = NULL;
- int ret = 0;
-
- if (crawl_data->crawl == FULL) {
- fd = fd_create (dirloc->inode, crawl_data->pid);
- if (!fd) {
- gf_log (this->name, GF_LOG_ERROR,
- "Failed to create fd for %s", dirloc->path);
- ret = -1;
- goto out;
- }
-
- ret = syncop_opendir (crawl_data->readdir_xl, dirloc, fd);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "opendir failed on %s", dirloc->path);
- goto out;
- }
- } else {
- fd = fd_anonymous (dirloc->inode);
- }
- ret = 0;
+ int ret = 0;
+ call_frame_t *frame = NULL;
+ afr_private_t *priv = healer->this->private;
+ loc_t loc = {0};
+
+ ret = afr_anon_inode_create(healer->this, healer->subvol, &loc.inode);
+ if (ret)
+ goto out;
+
+ frame = afr_frame_create(healer->this, &ret);
+ if (!frame) {
+ ret = -ret;
+ goto out;
+ }
+
+ ret = syncop_mt_dir_scan(frame, priv->children[healer->subvol], &loc,
+ GF_CLIENT_PID_SELF_HEALD, healer,
+ afr_shd_anon_inode_cleaner, NULL,
+ priv->shd.max_threads, priv->shd.wait_qlength);
out:
- if (!ret)
- *dirfd = fd;
- return ret;
+ if (frame)
+ AFR_STACK_DESTROY(frame);
+ loc_wipe(&loc);
+ return;
}
-xlator_t*
-afr_crawl_readdir_xl_get (xlator_t *this, afr_crawl_data_t *crawl_data)
+void *
+afr_shd_index_healer(void *data)
{
- afr_private_t *priv = this->private;
+ struct subvol_healer *healer = NULL;
+ xlator_t *this = NULL;
+ int ret = 0;
+ afr_private_t *priv = NULL;
+ dict_t *pre_crawl_xdata = NULL;
+ loc_t loc = {
+ 0,
+ };
+
+ healer = data;
+ THIS = this = healer->this;
+ priv = this->private;
+
+ for (;;) {
+ afr_shd_healer_wait(healer);
+
+ if (!afr_bricks_available_for_heal(priv))
+ continue;
+
+ ASSERT_LOCAL(this, healer);
+ priv->local[healer->subvol] = healer->local;
+
+ if (priv->thin_arbiter_count) {
+ if (afr_shd_ta_needs_heal(this, healer))
+ afr_shd_ta_get_xattrs(this, &loc, healer, &pre_crawl_xdata);
+ }
- if (crawl_data->crawl == FULL) {
- return this;
- } else {
- return priv->children[crawl_data->child];
+ do {
+ gf_msg_debug(this->name, 0, "starting index sweep on subvol %s",
+ afr_subvol_name(this, healer->subvol));
+
+ afr_shd_sweep_prepare(healer);
+
+ ret = afr_shd_index_sweep_all(healer);
+
+ afr_shd_sweep_done(healer);
+ /*
+ As long as at least one gfid was
+ healed, keep retrying. We may have
+ just healed a directory and thereby
+ created entries for other gfids which
+ could not be healed thus far.
+ */
+
+ gf_msg_debug(this->name, 0, "finished index sweep on subvol %s",
+ afr_subvol_name(this, healer->subvol));
+ /*
+ Give a pause before retrying to avoid a busy loop
+ in case the only entry in index is because of
+ an ongoing I/O.
+ */
+ sleep(1);
+ } while (ret > 0);
+
+ if (ret == 0) {
+ afr_cleanup_anon_inode_dir(healer);
}
- return NULL;
-}
-int
-afr_crawl_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent,
- gf_dirent_t *entry, afr_crawl_data_t *crawl_data)
-{
- int ret = -1;
- afr_private_t *priv = NULL;
+ if (ret == 0 && pre_crawl_xdata &&
+ !healer->crawl_event.heal_failed_count) {
+ afr_shd_ta_check_and_unset_xattrs(this, &loc, healer,
+ pre_crawl_xdata);
+ }
- priv = this->private;
- if (crawl_data->crawl == FULL) {
- ret = afr_build_child_loc (this, child, parent, entry->d_name);
- } else {
- child->inode = inode_new (priv->root_inode->table);
- if (!child->inode)
- goto out;
- uuid_parse (entry->d_name, child->gfid);
- ret = _loc_assign_gfid_path (child);
+ if (pre_crawl_xdata) {
+ dict_unref(pre_crawl_xdata);
+ pre_crawl_xdata = NULL;
}
-out:
- return ret;
+ }
+
+ return NULL;
}
-static int
-_process_entries (xlator_t *this, loc_t *parentloc, gf_dirent_t *entries,
- off_t *offset, afr_crawl_data_t *crawl_data)
+void *
+afr_shd_full_healer(void *data)
{
- gf_dirent_t *entry = NULL;
- gf_dirent_t *tmp = NULL;
- int ret = 0;
- loc_t entry_loc = {0};
- fd_t *fd = NULL;
- struct iatt iattr = {0};
-
- list_for_each_entry_safe (entry, tmp, &entries->list, list) {
- if (!_crawl_proceed (this, crawl_data->child,
- crawl_data->crawl_flags, NULL)) {
- ret = -1;
- goto out;
- }
- *offset = entry->d_off;
- if (IS_ENTRY_CWD (entry->d_name) ||
- IS_ENTRY_PARENT (entry->d_name))
- continue;
- if ((crawl_data->crawl == FULL) &&
- uuid_is_null (entry->d_stat.ia_gfid)) {
- gf_log (this->name, GF_LOG_WARNING, "%s/%s: No "
- "gfid present skipping",
- parentloc->path, entry->d_name);
- continue;
- }
+ struct subvol_healer *healer = NULL;
+ xlator_t *this = NULL;
+ int run = 0;
+
+ healer = data;
+ THIS = this = healer->this;
- loc_wipe (&entry_loc);
- ret = afr_crawl_build_child_loc (this, &entry_loc, parentloc,
- entry, crawl_data);
- if (ret)
- goto out;
-
- ret = crawl_data->process_entry (this, crawl_data, entry,
- &entry_loc, parentloc, &iattr);
-
- if (ret)
- continue;
-
- ret = _link_inode_update_loc (this, &entry_loc, &iattr);
- if (ret)
- goto out;
- if (crawl_data->crawl == INDEX)
- continue;
-
- if (!IA_ISDIR (iattr.ia_type))
- continue;
- fd = NULL;
- ret = afr_crawl_opendir (this, crawl_data, &fd, &entry_loc);
- if (ret)
- continue;
- ret = _crawl_directory (fd, &entry_loc, crawl_data);
- if (fd)
- fd_unref (fd);
+ for (;;) {
+ pthread_mutex_lock(&healer->mutex);
+ {
+ run = __afr_shd_healer_wait(healer);
+ if (!run)
+ healer->running = _gf_false;
}
- ret = 0;
-out:
- loc_wipe (&entry_loc);
- return ret;
+ pthread_mutex_unlock(&healer->mutex);
+
+ if (!run)
+ break;
+
+ ASSERT_LOCAL(this, healer);
+
+ gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_SELF_HEAL_INFO,
+ "starting full sweep on subvol %s",
+ afr_subvol_name(this, healer->subvol));
+
+ afr_shd_sweep_prepare(healer);
+
+ afr_shd_full_sweep(healer, this->itable->root);
+
+ afr_shd_sweep_done(healer);
+
+ gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_SELF_HEAL_INFO,
+ "finished full sweep on subvol %s",
+ afr_subvol_name(this, healer->subvol));
+ }
+
+ return NULL;
}
-static int
-_crawl_directory (fd_t *fd, loc_t *loc, afr_crawl_data_t *crawl_data)
+int
+afr_shd_healer_init(xlator_t *this, struct subvol_healer *healer)
{
- xlator_t *this = NULL;
- off_t offset = 0;
- gf_dirent_t entries;
- int ret = 0;
- gf_boolean_t free_entries = _gf_false;
- xlator_t *readdir_xl = crawl_data->readdir_xl;
-
- INIT_LIST_HEAD (&entries.list);
- this = THIS;
-
- GF_ASSERT (loc->inode);
-
- if (crawl_data->crawl == FULL)
- gf_log (this->name, GF_LOG_DEBUG, "crawling %s", loc->path);
- else
- gf_log (this->name, GF_LOG_DEBUG, "crawling INDEX %s",
- uuid_utoa (loc->gfid));
-
- while (1) {
- if (crawl_data->crawl == FULL)
- ret = syncop_readdirp (readdir_xl, fd, 131072, offset,
- NULL, &entries);
- else
- ret = syncop_readdir (readdir_xl, fd, 131072, offset,
- &entries);
- if (ret <= 0)
- break;
- ret = 0;
- free_entries = _gf_true;
+ int ret = 0;
- if (!_crawl_proceed (this, crawl_data->child,
- crawl_data->crawl_flags, NULL)) {
- ret = -1;
- goto out;
- }
- if (list_empty (&entries.list))
- goto out;
+ ret = pthread_mutex_init(&healer->mutex, NULL);
+ if (ret)
+ goto out;
- ret = _process_entries (this, loc, &entries, &offset,
- crawl_data);
- gf_dirent_free (&entries);
- free_entries = _gf_false;
- }
- ret = 0;
+ ret = pthread_cond_init(&healer->cond, NULL);
+ if (ret)
+ goto out;
+
+ healer->this = this;
+ healer->running = _gf_false;
+ healer->rerun = _gf_false;
+ healer->local = _gf_false;
out:
- if (free_entries)
- gf_dirent_free (&entries);
- return ret;
+ return ret;
}
-static char*
-position_str_get (afr_child_pos_t pos)
+int
+afr_shd_healer_spawn(xlator_t *this, struct subvol_healer *healer,
+ void *(threadfn)(void *))
{
- switch (pos) {
- case AFR_POS_UNKNOWN:
- return "unknown";
- case AFR_POS_LOCAL:
- return "local";
- case AFR_POS_REMOTE:
- return "remote";
+ int ret = 0;
+
+ pthread_mutex_lock(&healer->mutex);
+ {
+ if (healer->running) {
+ pthread_cond_signal(&healer->cond);
+ } else {
+ ret = gf_thread_create(&healer->thread, NULL, threadfn, healer,
+ "shdheal");
+ if (ret)
+ goto unlock;
+ healer->running = 1;
}
- return NULL;
+
+ healer->rerun = 1;
+ }
+unlock:
+ pthread_mutex_unlock(&healer->mutex);
+
+ return ret;
}
int
-afr_find_child_position (xlator_t *this, int child, afr_child_pos_t *pos)
+afr_shd_full_healer_spawn(xlator_t *this, int subvol)
{
- afr_private_t *priv = NULL;
- afr_self_heald_t *shd = NULL;
- dict_t *xattr_rsp = NULL;
- loc_t loc = {0};
- int ret = 0;
- char *node_uuid = NULL;
+ return afr_shd_healer_spawn(this, NTH_FULL_HEALER(this, subvol),
+ afr_shd_full_healer);
+}
- priv = this->private;
- shd = &priv->shd;
+int
+afr_shd_index_healer_spawn(xlator_t *this, int subvol)
+{
+ return afr_shd_healer_spawn(this, NTH_INDEX_HEALER(this, subvol),
+ afr_shd_index_healer);
+}
- afr_build_root_loc (this, &loc);
+int
+afr_shd_dict_add_crawl_event(xlator_t *this, dict_t *output,
+ crawl_event_t *crawl_event)
+{
+ int ret = 0;
+ uint64_t count = 0;
+ char key[128] = {0};
+ int keylen = 0;
+ char suffix[64] = {0};
+ int xl_id = 0;
+ uint64_t healed_count = 0;
+ uint64_t split_brain_count = 0;
+ uint64_t heal_failed_count = 0;
+ char *start_time_str = 0;
+ char *end_time_str = NULL;
+ char *crawl_type = NULL;
+ int progress = -1;
+ int child = -1;
+
+ child = crawl_event->child;
+ healed_count = crawl_event->healed_count;
+ split_brain_count = crawl_event->split_brain_count;
+ heal_failed_count = crawl_event->heal_failed_count;
+ crawl_type = crawl_event->crawl_type;
+
+ if (!crawl_event->start_time)
+ goto out;
+
+ start_time_str = gf_strdup(ctime(&crawl_event->start_time));
+
+ if (crawl_event->end_time)
+ end_time_str = gf_strdup(ctime(&crawl_event->end_time));
+
+ ret = dict_get_int32(output, this->name, &xl_id);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_GET_FAILED,
+ "xl does not have id");
+ goto out;
+ }
+
+ snprintf(key, sizeof(key), "statistics-%d-%d-count", xl_id, child);
+ ret = dict_get_uint64(output, key, &count);
+
+ snprintf(suffix, sizeof(suffix), "%d-%d-%" PRIu64, xl_id, child, count);
+ snprintf(key, sizeof(key), "statistics_healed_cnt-%s", suffix);
+ ret = dict_set_uint64(output, key, healed_count);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Could not add statistics_healed_count to output");
+ goto out;
+ }
+
+ snprintf(key, sizeof(key), "statistics_sb_cnt-%s", suffix);
+ ret = dict_set_uint64(output, key, split_brain_count);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Could not add statistics_split_brain_count to output");
+ goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key), "statistics_crawl_type-%s", suffix);
+ ret = dict_set_strn(output, key, keylen, crawl_type);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Could not add statistics_crawl_type to output");
+ goto out;
+ }
+
+ snprintf(key, sizeof(key), "statistics_heal_failed_cnt-%s", suffix);
+ ret = dict_set_uint64(output, key, heal_failed_count);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Could not add statistics_healed_failed_count to output");
+ goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key), "statistics_strt_time-%s", suffix);
+ ret = dict_set_dynstrn(output, key, keylen, start_time_str);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Could not add statistics_crawl_start_time to output");
+ goto out;
+ } else {
+ start_time_str = NULL;
+ }
+
+ if (!end_time_str)
+ progress = 1;
+ else
+ progress = 0;
+
+ keylen = snprintf(key, sizeof(key), "statistics_end_time-%s", suffix);
+ if (!end_time_str)
+ end_time_str = gf_strdup("Could not determine the end time");
+ ret = dict_set_dynstrn(output, key, keylen, end_time_str);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Could not add statistics_crawl_end_time to output");
+ goto out;
+ } else {
+ end_time_str = NULL;
+ }
+
+ keylen = snprintf(key, sizeof(key), "statistics_inprogress-%s", suffix);
+
+ ret = dict_set_int32n(output, key, keylen, progress);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Could not add statistics_inprogress to output");
+ goto out;
+ }
+
+ snprintf(key, sizeof(key), "statistics-%d-%d-count", xl_id, child);
+ ret = dict_set_uint64(output, key, count + 1);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Could not increment the counter.");
+ goto out;
+ }
+out:
+ GF_FREE(start_time_str);
+ GF_FREE(end_time_str);
+ return ret;
+}
- ret = syncop_getxattr (priv->children[child], &loc, &xattr_rsp,
- GF_XATTR_NODE_UUID_KEY);
+int
+afr_shd_dict_add_path(xlator_t *this, dict_t *output, int child, char *path,
+ struct timeval *tv)
+{
+ int ret = -1;
+ uint64_t count = 0;
+ char key[64] = {0};
+ int keylen = 0;
+ char xl_id_child_str[32] = {0};
+ int xl_id = 0;
+
+ ret = dict_get_int32(output, this->name, &xl_id);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_GET_FAILED,
+ "xl does not have id");
+ goto out;
+ }
+
+ snprintf(xl_id_child_str, sizeof(xl_id_child_str), "%d-%d", xl_id, child);
+ snprintf(key, sizeof(key), "%s-count", xl_id_child_str);
+ ret = dict_get_uint64(output, key, &count);
+
+ keylen = snprintf(key, sizeof(key), "%s-%" PRIu64, xl_id_child_str, count);
+ ret = dict_set_dynstrn(output, key, keylen, path);
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "%s: Could not add to output", path);
+ goto out;
+ }
+
+ if (tv) {
+ snprintf(key, sizeof(key), "%s-%" PRIu64 "-time", xl_id_child_str,
+ count);
+ ret = dict_set_uint32(output, key, tv->tv_sec);
if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "getxattr failed on %s - "
- "(%s)", priv->children[child]->name, strerror (errno));
- goto out;
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "%s: Could not set time", path);
+ goto out;
}
+ }
- ret = dict_get_str (xattr_rsp, GF_XATTR_NODE_UUID_KEY, &node_uuid);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "node-uuid key not found on "
- "child %s", priv->children[child]->name);
- goto out;
- }
+ snprintf(key, sizeof(key), "%s-count", xl_id_child_str);
- if (!strcmp (node_uuid, shd->node_uuid))
- *pos = AFR_POS_LOCAL;
- else
- *pos = AFR_POS_REMOTE;
+ ret = dict_set_uint64(output, key, count + 1);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Could not increment count");
+ goto out;
+ }
- gf_log (this->name, GF_LOG_DEBUG, "child %s is %s",
- priv->children[child]->name, position_str_get (*pos));
+ ret = 0;
out:
- if (ret)
- *pos = AFR_POS_UNKNOWN;
- loc_wipe (&loc);
- return ret;
+ return ret;
}
int
-afr_syncop_find_child_position (void *data)
+afr_add_shd_event(circular_buffer_t *cb, void *data)
{
- shd_pos_t *pos_data = data;
- int ret = 0;
+ dict_t *output = NULL;
+ xlator_t *this = THIS;
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ shd_event_t *shd_event = NULL;
+ char *path = NULL;
+
+ output = data;
+ priv = this->private;
+ shd = &priv->shd;
+ shd_event = cb->data;
+
+ if (!shd->index_healers[shd_event->child].local)
+ return 0;
- ret = afr_find_child_position (pos_data->this, pos_data->child,
- &pos_data->pos);
- return ret;
+ path = gf_strdup(shd_event->path);
+ if (!path)
+ return -ENOMEM;
+
+ afr_shd_dict_add_path(this, output, shd_event->child, path, &cb->tv);
+ return 0;
}
-static int
-afr_dir_crawl (void *data)
+int
+afr_add_crawl_event(circular_buffer_t *cb, void *data)
{
- xlator_t *this = NULL;
- int ret = -1;
- xlator_t *readdir_xl = NULL;
- fd_t *fd = NULL;
- loc_t dirloc = {0};
- afr_crawl_data_t *crawl_data = data;
-
- this = THIS;
-
- if (!_crawl_proceed (this, crawl_data->child, crawl_data->crawl_flags,
- NULL))
- goto out;
+ dict_t *output = NULL;
+ xlator_t *this = THIS;
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ crawl_event_t *crawl_event = NULL;
+
+ output = data;
+ priv = this->private;
+ shd = &priv->shd;
+ crawl_event = cb->data;
+
+ if (!shd->index_healers[crawl_event->child].local)
+ return 0;
- readdir_xl = afr_crawl_readdir_xl_get (this, crawl_data);
- if (!readdir_xl)
- goto out;
- crawl_data->readdir_xl = readdir_xl;
+ afr_shd_dict_add_crawl_event(this, output, crawl_event);
- ret = afr_crawl_build_start_loc (this, crawl_data, &dirloc);
- if (ret)
- goto out;
+ return 0;
+}
- ret = afr_crawl_opendir (this, crawl_data, &fd, &dirloc);
+int
+afr_selfheal_daemon_init(xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ int ret = -1;
+ int i = 0;
+
+ priv = this->private;
+ shd = &priv->shd;
+
+ shd->index_healers = GF_CALLOC(sizeof(*shd->index_healers),
+ priv->child_count,
+ gf_afr_mt_subvol_healer_t);
+ if (!shd->index_healers)
+ goto out;
+
+ for (i = 0; i < priv->child_count; i++) {
+ shd->index_healers[i].subvol = i;
+ ret = afr_shd_healer_init(this, &shd->index_healers[i]);
if (ret)
- goto out;
-
- ret = _crawl_directory (fd, &dirloc, crawl_data);
+ goto out;
+ }
+
+ shd->full_healers = GF_CALLOC(sizeof(*shd->full_healers), priv->child_count,
+ gf_afr_mt_subvol_healer_t);
+ if (!shd->full_healers)
+ goto out;
+ for (i = 0; i < priv->child_count; i++) {
+ shd->full_healers[i].subvol = i;
+ ret = afr_shd_healer_init(this, &shd->full_healers[i]);
if (ret)
- gf_log (this->name, GF_LOG_ERROR, "Crawl failed on %s",
- readdir_xl->name);
- else
- gf_log (this->name, GF_LOG_DEBUG, "Crawl completed "
- "on %s", readdir_xl->name);
- if (crawl_data->crawl == INDEX)
- dirloc.path = NULL;
+ goto out;
+ }
+
+ shd->split_brain = eh_new(AFR_EH_SPLIT_BRAIN_LIMIT, _gf_false,
+ afr_destroy_shd_event_data);
+ if (!shd->split_brain)
+ goto out;
+
+ shd->statistics = GF_CALLOC(sizeof(eh_t *), priv->child_count,
+ gf_common_mt_eh_t);
+ if (!shd->statistics)
+ goto out;
+
+ for (i = 0; i < priv->child_count; i++) {
+ shd->statistics[i] = eh_new(AFR_STATISTICS_HISTORY_SIZE, _gf_false,
+ afr_destroy_crawl_event_data);
+ if (!shd->statistics[i])
+ goto out;
+ shd->full_healers[i].crawl_event.child = i;
+ shd->full_healers[i].crawl_event.crawl_type = "FULL";
+ shd->index_healers[i].crawl_event.child = i;
+ shd->index_healers[i].crawl_event.crawl_type = "INDEX";
+ }
+
+ ret = 0;
out:
- if (fd)
- fd_unref (fd);
- if (crawl_data->crawl == INDEX)
- dirloc.path = NULL;
- loc_wipe (&dirloc);
- return ret;
+ return ret;
}
-static int
-afr_dir_exclusive_crawl (void *data)
+void
+afr_selfheal_childup(xlator_t *this, afr_private_t *priv)
{
- afr_private_t *priv = NULL;
- afr_self_heald_t *shd = NULL;
- gf_boolean_t crawl = _gf_false;
- int ret = 0;
- int child = -1;
- xlator_t *this = NULL;
- afr_crawl_data_t *crawl_data = data;
-
- this = THIS;
- priv = this->private;
- shd = &priv->shd;
- child = crawl_data->child;
-
- LOCK (&priv->lock);
- {
- if (shd->inprogress[child]) {
- if (shd->pending[child] != FULL)
- shd->pending[child] = crawl_data->crawl;
- } else {
- shd->inprogress[child] = _gf_true;
- crawl = _gf_true;
- }
- }
- UNLOCK (&priv->lock);
+ int subvol = 0;
- if (!crawl) {
- gf_log (this->name, GF_LOG_INFO, "Another crawl is in progress "
- "for %s", priv->children[child]->name);
- goto out;
- }
+ if (!priv->shd.iamshd)
+ return;
+ for (subvol = 0; subvol < priv->child_count; subvol++)
+ if (priv->child_up[subvol])
+ afr_shd_index_healer_spawn(this, subvol);
- do {
- afr_dir_crawl (data);
- LOCK (&priv->lock);
- {
- if (shd->pending[child] != NONE) {
- crawl_data->crawl = shd->pending[child];
- shd->pending[child] = NONE;
- } else {
- shd->inprogress[child] = _gf_false;
- crawl = _gf_false;
- }
- }
- UNLOCK (&priv->lock);
- } while (crawl);
-out:
- return ret;
+ return;
}
-void
-afr_start_crawl (xlator_t *this, int idx, afr_crawl_type_t crawl,
- process_entry_cbk_t process_entry, void *op_data,
- gf_boolean_t exclusive, int crawl_flags,
- afr_crawl_done_cbk_t crawl_done)
+int
+afr_shd_get_index_count(xlator_t *this, int i, uint64_t *count)
{
- afr_private_t *priv = NULL;
- call_frame_t *frame = NULL;
- afr_crawl_data_t *crawl_data = NULL;
- int ret = 0;
- int (*crawler) (void*) = NULL;
+ afr_private_t *priv = NULL;
+ xlator_t *subvol = NULL;
+ loc_t rootloc = {
+ 0,
+ };
+ dict_t *xattr = NULL;
+ int ret = -1;
- priv = this->private;
+ priv = this->private;
+ subvol = priv->children[i];
- frame = create_frame (this, this->ctx->pool);
- if (!frame)
- goto out;
+ rootloc.inode = inode_ref(this->itable->root);
+ gf_uuid_copy(rootloc.gfid, rootloc.inode->gfid);
- afr_set_lk_owner (frame, this, frame->root);
- afr_set_low_priority (frame);
- crawl_data = GF_CALLOC (1, sizeof (*crawl_data),
- gf_afr_mt_crawl_data_t);
- if (!crawl_data)
- goto out;
- crawl_data->process_entry = process_entry;
- crawl_data->child = idx;
- crawl_data->pid = frame->root->pid;
- crawl_data->crawl = crawl;
- crawl_data->op_data = op_data;
- crawl_data->crawl_flags = crawl_flags;
- gf_log (this->name, GF_LOG_DEBUG, "starting crawl %d for %s",
- crawl_data->crawl, priv->children[idx]->name);
-
- if (exclusive)
- crawler = afr_dir_exclusive_crawl;
- else
- crawler = afr_dir_crawl;
- ret = synctask_new (this->ctx->env, crawler,
- crawl_done, frame, crawl_data);
- if (ret)
- gf_log (this->name, GF_LOG_ERROR, "Could not create the "
- "task for %d ret %d", idx, ret);
-out:
- return;
-}
+ ret = syncop_getxattr(subvol, &rootloc, &xattr, GF_XATTROP_INDEX_COUNT,
+ NULL, NULL);
+ if (ret < 0)
+ goto out;
-void
-afr_build_root_loc (xlator_t *this, loc_t *loc)
-{
- afr_private_t *priv = NULL;
+ ret = dict_get_uint64(xattr, GF_XATTROP_INDEX_COUNT, count);
+ if (ret)
+ goto out;
+
+ ret = 0;
+
+out:
+ if (xattr)
+ dict_unref(xattr);
+ loc_wipe(&rootloc);
- priv = this->private;
- loc->path = gf_strdup ("/");
- loc->name = "";
- loc->inode = inode_ref (priv->root_inode);
- uuid_copy (loc->gfid, loc->inode->gfid);
+ return ret;
}
int
-afr_set_root_gfid (dict_t *dict)
+afr_xl_op(xlator_t *this, dict_t *input, dict_t *output)
{
- uuid_t gfid;
- int ret = 0;
+ gf_xl_afr_op_t op = GF_SHD_OP_INVALID;
+ int ret = 0;
+ int xl_id = 0;
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ struct subvol_healer *healer = NULL;
+ int i = 0;
+ char key[64];
+ int keylen = 0;
+ int this_name_len = 0;
+ int op_ret = 0;
+ uint64_t cnt = 0;
+
+#define AFR_SET_DICT_AND_LOG(name, output, key, keylen, dict_str, \
+ dict_str_len) \
+ { \
+ int ret; \
+ \
+ ret = dict_set_nstrn(output, key, keylen, dict_str, dict_str_len); \
+ if (ret) { \
+ gf_smsg(name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED, \
+ "key=%s", key, "value=%s", dict_str, NULL); \
+ } \
+ }
+
+ priv = this->private;
+ shd = &priv->shd;
+
+ ret = dict_get_int32_sizen(input, "xl-op", (int32_t *)&op);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_GET_FAILED,
+ "key=xl-op", NULL);
+ goto out;
+ }
+ this_name_len = strlen(this->name);
+ ret = dict_get_int32n(input, this->name, this_name_len, &xl_id);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_GET_FAILED,
+ "key=%s", this->name, NULL);
+ goto out;
+ }
+ ret = dict_set_int32n(output, this->name, this_name_len, xl_id);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "key=%s", this->name, NULL);
+ goto out;
+ }
+ switch (op) {
+ case GF_SHD_OP_HEAL_INDEX:
+ op_ret = 0;
+
+ for (i = 0; i < priv->child_count; i++) {
+ healer = &shd->index_healers[i];
+ keylen = snprintf(key, sizeof(key), "%d-%d-status", xl_id, i);
+
+ if (!priv->child_up[i]) {
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
+ SBRICK_NOT_CONNECTED,
+ SLEN(SBRICK_NOT_CONNECTED));
+ op_ret = -1;
+ } else if (AFR_COUNT(priv->child_up, priv->child_count) < 2) {
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
+ SLESS_THAN2_BRICKS_in_REP,
+ SLEN(SLESS_THAN2_BRICKS_in_REP));
+ op_ret = -1;
+ } else if (!afr_shd_is_subvol_local(this, healer->subvol)) {
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
+ SBRICK_IS_REMOTE,
+ SLEN(SBRICK_IS_REMOTE));
+ } else {
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
+ SSTARTED_SELF_HEAL,
+ SLEN(SSTARTED_SELF_HEAL));
- memset (gfid, 0, 16);
- gfid[15] = 1;
+ ret = afr_shd_index_healer_spawn(this, i);
- ret = afr_set_dict_gfid (dict, gfid);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ AFR_MSG_HEALER_SPAWN_FAILED, NULL);
+ }
+ }
+ }
+ break;
+ case GF_SHD_OP_HEAL_FULL:
+ op_ret = -1;
+
+ for (i = 0; i < priv->child_count; i++) {
+ healer = &shd->full_healers[i];
+ keylen = snprintf(key, sizeof(key), "%d-%d-status", xl_id, i);
+
+ if (!priv->child_up[i]) {
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
+ SBRICK_NOT_CONNECTED,
+ SLEN(SBRICK_NOT_CONNECTED));
+ } else if (AFR_COUNT(priv->child_up, priv->child_count) < 2) {
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
+ SLESS_THAN2_BRICKS_in_REP,
+ SLEN(SLESS_THAN2_BRICKS_in_REP));
+ } else if (!afr_shd_is_subvol_local(this, healer->subvol)) {
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
+ SBRICK_IS_REMOTE,
+ SLEN(SBRICK_IS_REMOTE));
+ } else {
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
+ SSTARTED_SELF_HEAL,
+ SLEN(SSTARTED_SELF_HEAL));
- return ret;
-}
+ ret = afr_shd_full_healer_spawn(this, i);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ AFR_MSG_HEALER_SPAWN_FAILED, NULL);
+ }
+ op_ret = 0;
+ }
+ }
+ break;
+ case GF_SHD_OP_INDEX_SUMMARY:
+ /* this case has been handled in glfs-heal.c */
+ break;
+ case GF_SHD_OP_SPLIT_BRAIN_FILES:
+ eh_dump(shd->split_brain, output, afr_add_shd_event);
+ break;
+ case GF_SHD_OP_STATISTICS:
+ for (i = 0; i < priv->child_count; i++) {
+ eh_dump(shd->statistics[i], output, afr_add_crawl_event);
+ ret = afr_shd_dict_add_crawl_event(
+ this, output, &shd->index_healers[i].crawl_event);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ AFR_MSG_ADD_CRAWL_EVENT_FAILED, NULL);
+ }
+
+ ret = afr_shd_dict_add_crawl_event(
+ this, output, &shd->full_healers[i].crawl_event);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ AFR_MSG_ADD_CRAWL_EVENT_FAILED, NULL);
+ }
+ }
+ break;
+ case GF_SHD_OP_STATISTICS_HEAL_COUNT:
+ case GF_SHD_OP_STATISTICS_HEAL_COUNT_PER_REPLICA:
+ op_ret = -1;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!priv->child_up[i]) {
+ keylen = snprintf(key, sizeof(key), "%d-%d-status", xl_id,
+ i);
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
+ SBRICK_NOT_CONNECTED,
+ SLEN(SBRICK_NOT_CONNECTED));
+ } else {
+ snprintf(key, sizeof(key), "%d-%d-hardlinks", xl_id, i);
+ ret = afr_shd_get_index_count(this, i, &cnt);
+ if (ret == 0) {
+ ret = dict_set_uint64(output, key, cnt);
+ }
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ AFR_MSG_DICT_SET_FAILED, NULL);
+ }
+ op_ret = 0;
+ }
+ }
+
+ break;
+
+ default:
+ gf_smsg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_ARG, "op=%d",
+ op, NULL);
+ break;
+ }
+out:
+ dict_deln(output, this->name, this_name_len);
+ return op_ret;
+
+#undef AFR_SET_DICT_AND_LOG
+}
diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h
index 8e608459684..18db728ea7b 100644
--- a/xlators/cluster/afr/src/afr-self-heald.h
+++ b/xlators/cluster/afr/src/afr-self-heald.h
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
This file is licensed to you under your choice of the GNU Lesser
@@ -8,38 +8,68 @@
cases as published by the Free Software Foundation.
*/
-#ifndef __AFR_SELF_HEALD_H__
-#define __AFR_SELF_HEALD_H__
-#include "xlator.h"
+#ifndef _AFR_SELF_HEALD_H
+#define _AFR_SELF_HEALD_H
-#define IS_ROOT_PATH(path) (!strcmp (path, "/"))
-#define IS_ENTRY_CWD(entry) (!strcmp (entry, "."))
-#define IS_ENTRY_PARENT(entry) (!strcmp (entry, ".."))
-#define AFR_ALL_CHILDREN -1
+#include <pthread.h>
-typedef struct afr_crawl_data_ {
- int child;
- pid_t pid;
- afr_crawl_type_t crawl;
- xlator_t *readdir_xl;
- void *op_data;
- int crawl_flags;
- int (*process_entry) (xlator_t *this, struct afr_crawl_data_ *crawl_data,
- gf_dirent_t *entry, loc_t *child, loc_t *parent,
- struct iatt *iattr);
-} afr_crawl_data_t;
+typedef struct {
+ char *path;
+ int child;
+} shd_event_t;
-typedef int (*process_entry_cbk_t) (xlator_t *this, afr_crawl_data_t *crawl_data,
- gf_dirent_t *entry, loc_t *child, loc_t *parent,
- struct iatt *iattr);
+typedef struct {
+ uint64_t healed_count;
+ uint64_t split_brain_count;
+ uint64_t heal_failed_count;
-void afr_build_root_loc (xlator_t *this, loc_t *loc);
+ /* If start_time is 0, it means crawler is not in progress
+ and stats are not valid */
+ time_t start_time;
+ /* If start_time is NOT 0 and end_time is 0, it means
+ cralwer is in progress */
+ time_t end_time;
+ char *crawl_type;
+ int child;
+} crawl_event_t;
-int afr_set_root_gfid (dict_t *dict);
+struct subvol_healer {
+ xlator_t *this;
+ crawl_event_t crawl_event;
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+ pthread_t thread;
+ int subvol;
+ gf_boolean_t local;
+ gf_boolean_t running;
+ gf_boolean_t rerun;
+};
-void
-afr_proactive_self_heal (void *data);
+typedef struct {
+ struct subvol_healer *index_healers;
+ struct subvol_healer *full_healers;
+
+ eh_t *split_brain;
+ eh_t **statistics;
+ int timeout;
+ uint32_t max_threads;
+ uint32_t wait_qlength;
+ uint32_t halo_max_latency_msec;
+ gf_boolean_t iamshd;
+ gf_boolean_t enabled;
+} afr_self_heald_t;
+
+int
+afr_selfheal_daemon_init(xlator_t *this);
+
+int
+afr_xl_op(xlator_t *this, dict_t *input, dict_t *output);
+
+int
+afr_shd_gfid_to_path(xlator_t *this, xlator_t *subvol, uuid_t gfid,
+ char **path_p);
int
-afr_xl_op (xlator_t *this, dict_t *input, dict_t *output);
-#endif /* __AFR_SELF_HEALD_H__ */
+afr_shd_entry_purge(xlator_t *subvol, inode_t *inode, char *name,
+ ia_type_t type);
+#endif /* !_AFR_SELF_HEALD_H */
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index b29e11fafc4..a51f79b1f43 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -8,1460 +8,2920 @@
cases as published by the Free Software Foundation.
*/
-#include "dict.h"
-#include "byte-order.h"
-#include "common-utils.h"
-#include "timer.h"
+#include <glusterfs/dict.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/timer.h>
#include "afr.h"
#include "afr-transaction.h"
+#include "afr-self-heal.h"
+#include "afr-messages.h"
#include <signal.h>
+typedef enum {
+ AFR_TRANSACTION_PRE_OP,
+ AFR_TRANSACTION_POST_OP,
+} afr_xattrop_type_t;
-#define LOCKED_NO 0x0 /* no lock held */
-#define LOCKED_YES 0x1 /* for DATA, METADATA, ENTRY and higher_path
- of RENAME */
-#define LOCKED_LOWER 0x2 /* for lower_path of RENAME */
+static void
+afr_lock_resume_shared(struct list_head *list);
+static void
+afr_post_op_handle_success(call_frame_t *frame, xlator_t *this);
-afr_fd_ctx_t *
-__afr_fd_ctx_get (fd_t *fd, xlator_t *this)
-{
- uint64_t ctx = 0;
- int ret = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
- int i = 0;
- afr_private_t *priv = NULL;
+static void
+afr_post_op_handle_failure(call_frame_t *frame, xlator_t *this, int op_errno);
- priv = this->private;
+void
+__afr_transaction_wake_shared(afr_local_t *local, struct list_head *shared);
- ret = __fd_ctx_get (fd, this, &ctx);
+void
+afr_changelog_post_op_do(call_frame_t *frame, xlator_t *this);
- if (ret < 0 && fd_is_anonymous (fd)) {
- ret = __afr_fd_ctx_set (this, fd);
- if (ret < 0)
- goto out;
+int
+afr_changelog_post_op_safe(call_frame_t *frame, xlator_t *this);
- ret = __fd_ctx_get (fd, this, &ctx);
- if (ret < 0)
- goto out;
+gf_boolean_t
+afr_changelog_pre_op_uninherit(call_frame_t *frame, xlator_t *this);
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
- for (i = 0; i < priv->child_count; i++)
- fd_ctx->opened_on[i] = AFR_FD_OPENED;
- }
+gf_boolean_t
+afr_changelog_pre_op_update(call_frame_t *frame, xlator_t *this);
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-out:
- return fd_ctx;
-}
+int
+afr_changelog_call_count(afr_transaction_type type,
+ unsigned char *pre_op_subvols,
+ unsigned char *failed_subvols,
+ unsigned int child_count);
+int
+afr_changelog_do(call_frame_t *frame, xlator_t *this, dict_t *xattr,
+ afr_changelog_resume_t changelog_resume,
+ afr_xattrop_type_t op);
+static void
+afr_ta_decide_post_op_state(call_frame_t *frame, xlator_t *this);
-afr_fd_ctx_t *
-afr_fd_ctx_get (fd_t *fd, xlator_t *this)
-{
- afr_fd_ctx_t *fd_ctx = NULL;
+static int
+afr_ta_post_op_do(void *opaque);
- LOCK(&fd->lock);
- {
- fd_ctx = __afr_fd_ctx_get (fd, this);
- }
- UNLOCK(&fd->lock);
+static int
+afr_ta_post_op_synctask(xlator_t *this, afr_local_t *local);
- return fd_ctx;
-}
+static int
+afr_changelog_post_op_done(call_frame_t *frame, xlator_t *this);
+
+static void
+afr_changelog_post_op_fail(call_frame_t *frame, xlator_t *this, int op_errno);
+void
+afr_ta_locked_priv_invalidate(afr_private_t *priv)
+{
+ priv->ta_bad_child_index = AFR_CHILD_UNKNOWN;
+ priv->release_ta_notify_dom_lock = _gf_false;
+ priv->ta_notify_dom_lock_offset = 0;
+}
static void
-afr_save_lk_owner (call_frame_t *frame)
+afr_ta_process_waitq(xlator_t *this)
{
- afr_local_t * local = NULL;
+ afr_local_t *entry = NULL;
+ afr_private_t *priv = this->private;
+ struct list_head waitq = {
+ 0,
+ };
+
+ INIT_LIST_HEAD(&waitq);
+ LOCK(&priv->lock);
+ list_splice_init(&priv->ta_waitq, &waitq);
+ UNLOCK(&priv->lock);
+ list_for_each_entry(entry, &waitq, ta_waitq)
+ {
+ afr_ta_decide_post_op_state(entry->transaction.frame, this);
+ }
+}
- local = frame->local;
+int
+afr_ta_lock_release_done(int ret, call_frame_t *ta_frame, void *opaque)
+{
+ afr_ta_process_waitq(ta_frame->this);
+ STACK_DESTROY(ta_frame->root);
+ return 0;
+}
- local->saved_lk_owner = frame->root->lk_owner;
+int
+afr_release_notify_lock_for_ta(void *opaque)
+{
+ xlator_t *this = NULL;
+ afr_private_t *priv = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct gf_flock flock = {
+ 0,
+ };
+ int ret = -1;
+
+ this = (xlator_t *)opaque;
+ priv = this->private;
+ ret = afr_fill_ta_loc(this, &loc, _gf_true);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed to populate loc for thin-arbiter.");
+ goto out;
+ }
+ flock.l_type = F_UNLCK;
+ flock.l_start = priv->ta_notify_dom_lock_offset;
+ flock.l_len = 1;
+ ret = syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX],
+ AFR_TA_DOM_NOTIFY, &loc, F_SETLK, &flock, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed to unlock AFR_TA_DOM_NOTIFY lock.");
+ }
+
+ LOCK(&priv->lock);
+ {
+ afr_ta_locked_priv_invalidate(priv);
+ }
+ UNLOCK(&priv->lock);
+out:
+ loc_wipe(&loc);
+ return ret;
}
+void
+afr_zero_fill_stat(afr_local_t *local)
+{
+ if (!local)
+ return;
+ if (local->transaction.type == AFR_DATA_TRANSACTION ||
+ local->transaction.type == AFR_METADATA_TRANSACTION) {
+ gf_zero_fill_stat(&local->cont.inode_wfop.prebuf);
+ gf_zero_fill_stat(&local->cont.inode_wfop.postbuf);
+ } else if (local->transaction.type == AFR_ENTRY_TRANSACTION ||
+ local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) {
+ gf_zero_fill_stat(&local->cont.dir_fop.buf);
+ gf_zero_fill_stat(&local->cont.dir_fop.preparent);
+ gf_zero_fill_stat(&local->cont.dir_fop.postparent);
+ if (local->transaction.type == AFR_ENTRY_TRANSACTION)
+ return;
+ gf_zero_fill_stat(&local->cont.dir_fop.prenewparent);
+ gf_zero_fill_stat(&local->cont.dir_fop.postnewparent);
+ }
+}
-static void
-afr_restore_lk_owner (call_frame_t *frame)
+/* In case of errors afr needs to choose which xdata from lower xlators it needs
+ * to unwind with. The way it is done is by checking if there are
+ * any good subvols which failed. Give preference to errnos other than
+ * ENOTCONN even if the child is source */
+void
+afr_pick_error_xdata(afr_local_t *local, afr_private_t *priv, inode_t *inode1,
+ unsigned char *readable1, inode_t *inode2,
+ unsigned char *readable2)
{
- afr_local_t * local = NULL;
+ int s = -1; /*selection*/
+ int i = 0;
+ unsigned char *readable = NULL;
+
+ if (local->xdata_rsp) {
+ dict_unref(local->xdata_rsp);
+ local->xdata_rsp = NULL;
+ }
+
+ readable = alloca0(priv->child_count * sizeof(*readable));
+ if (inode2 && readable2) { /*rename fop*/
+ AFR_INTERSECT(readable, readable1, readable2, priv->child_count);
+ } else {
+ memcpy(readable, readable1, sizeof(*readable) * priv->child_count);
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+
+ if (local->replies[i].op_ret >= 0)
+ continue;
+
+ if (local->replies[i].op_errno == ENOTCONN)
+ continue;
+
+ /*Order is important in the following condition*/
+ if ((s < 0) || (!readable[s] && readable[i]))
+ s = i;
+ }
+
+ if (s != -1 && local->replies[s].xdata) {
+ local->xdata_rsp = dict_ref(local->replies[s].xdata);
+ } else if (s == -1) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
- local = frame->local;
+ if (local->replies[i].op_ret >= 0)
+ continue;
- frame->root->lk_owner = local->saved_lk_owner;
+ if (!local->replies[i].xdata)
+ continue;
+ local->xdata_rsp = dict_ref(local->replies[i].xdata);
+ break;
+ }
+ }
}
+gf_boolean_t
+afr_needs_changelog_update(afr_local_t *local)
+{
+ if (local->transaction.type == AFR_DATA_TRANSACTION)
+ return _gf_true;
+ if (!local->optimistic_change_log)
+ return _gf_true;
+ return _gf_false;
+}
-static void
-__mark_all_pending (int32_t *pending[], int child_count,
- afr_transaction_type type)
+gf_boolean_t
+afr_changelog_has_quorum(afr_local_t *local, xlator_t *this)
{
- int i = 0;
- int j = 0;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ unsigned char *success_children = NULL;
+
+ priv = this->private;
+ success_children = alloca0(priv->child_count);
- for (i = 0; i < child_count; i++) {
- j = afr_index_for_transaction_type (type);
- pending[i][j] = hton32 (1);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->transaction.failed_subvols[i]) {
+ success_children[i] = 1;
}
+ }
+
+ if (afr_has_quorum(success_children, this, NULL)) {
+ return _gf_true;
+ }
+
+ return _gf_false;
}
+gf_boolean_t
+afr_is_write_subvol_valid(call_frame_t *frame, xlator_t *this)
+{
+ int i = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ uint64_t write_subvol = 0;
+ unsigned char *writable = NULL;
+ uint16_t datamap = 0;
+
+ local = frame->local;
+ priv = this->private;
+ writable = alloca0(priv->child_count);
+
+ write_subvol = afr_write_subvol_get(frame, this);
+ datamap = (write_subvol & 0x00000000ffff0000) >> 16;
+ for (i = 0; i < priv->child_count; i++) {
+ if (datamap & (1 << i))
+ writable[i] = 1;
+
+ if (writable[i] && !local->transaction.failed_subvols[i])
+ return _gf_true;
+ }
+
+ return _gf_false;
+}
-static void
-__mark_child_dead (int32_t *pending[], int child_count, int child,
- afr_transaction_type type)
+int
+afr_transaction_fop(call_frame_t *frame, xlator_t *this)
{
- int j = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ unsigned char *failed_subvols = NULL;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ failed_subvols = local->transaction.failed_subvols;
+ call_count = priv->child_count -
+ AFR_COUNT(failed_subvols, priv->child_count);
+ /* Fail if pre-op did not succeed on quorum no. of bricks. */
+ if (!afr_changelog_has_quorum(local, this) || !call_count) {
+ local->op_ret = -1;
+ /* local->op_errno is already captured in changelog cbk. */
+ afr_transaction_resume(frame, this);
+ return 0;
+ }
+
+ /* Fail if at least one writeable brick isn't up.*/
+ if (local->transaction.type == AFR_DATA_TRANSACTION &&
+ !afr_is_write_subvol_valid(frame, this)) {
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ afr_transaction_resume(frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i] && !failed_subvols[i]) {
+ local->transaction.wind(frame, this, i);
- j = afr_index_for_transaction_type (type);
+ if (!--call_count)
+ break;
+ }
+ }
- pending[child][j] = 0;
+ return 0;
}
+int
+afr_transaction_done(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ gf_boolean_t unwind = _gf_false;
+ afr_lock_t *lock = NULL;
+ afr_local_t *lock_local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (priv->consistent_metadata) {
+ LOCK(&frame->lock);
+ {
+ unwind = (local->transaction.main_frame != NULL);
+ }
+ UNLOCK(&frame->lock);
+ if (unwind) /*It definitely did post-op*/
+ afr_zero_fill_stat(local);
+ }
+
+ if (local->transaction.do_eager_unlock) {
+ lock = &local->inode_ctx->lock[local->transaction.type];
+ LOCK(&local->inode->lock);
+ {
+ lock->acquired = _gf_false;
+ lock->release = _gf_false;
+ list_splice_init(&lock->frozen, &lock->waiting);
+ if (list_empty(&lock->waiting))
+ goto unlock;
+ lock_local = list_entry(lock->waiting.next, afr_local_t,
+ transaction.wait_list);
+ list_del_init(&lock_local->transaction.wait_list);
+ list_add(&lock_local->transaction.owner_list, &lock->owners);
+ }
+ unlock:
+ UNLOCK(&local->inode->lock);
+ }
+ if (lock_local) {
+ afr_lock(lock_local->transaction.frame,
+ lock_local->transaction.frame->this);
+ }
+ local->transaction.unwind(frame, this);
+
+ GF_ASSERT(list_empty(&local->transaction.owner_list));
+ GF_ASSERT(list_empty(&local->transaction.wait_list));
+ AFR_STACK_DESTROY(frame);
+
+ return 0;
+}
static void
-__mark_pre_op_done_on_fd (call_frame_t *frame, xlator_t *this, int child_index)
+afr_lock_fail_shared(afr_local_t *local, struct list_head *list)
{
- afr_local_t *local = NULL;
- afr_fd_ctx_t *fd_ctx = NULL;
+ afr_local_t *each = NULL;
+
+ while (!list_empty(list)) {
+ each = list_entry(list->next, afr_local_t, transaction.wait_list);
+ list_del_init(&each->transaction.wait_list);
+ each->op_ret = -1;
+ each->op_errno = local->op_errno;
+ afr_transaction_done(each->transaction.frame,
+ each->transaction.frame->this);
+ }
+}
- local = frame->local;
+static void
+afr_handle_lock_acquire_failure(afr_local_t *local)
+{
+ struct list_head shared;
+ afr_lock_t *lock = NULL;
- if (!local->fd)
- return;
+ if (!local->transaction.eager_lock_on)
+ goto out;
- fd_ctx = afr_fd_ctx_get (local->fd, this);
+ lock = &local->inode_ctx->lock[local->transaction.type];
- if (!fd_ctx)
- goto out;
+ INIT_LIST_HEAD(&shared);
+ LOCK(&local->inode->lock);
+ {
+ lock->release = _gf_true;
+ list_splice_init(&lock->waiting, &shared);
+ }
+ UNLOCK(&local->inode->lock);
- LOCK (&local->fd->lock);
- {
- if (local->transaction.type == AFR_DATA_TRANSACTION)
- fd_ctx->pre_op_done[child_index]++;
- }
- UNLOCK (&local->fd->lock);
+ afr_lock_fail_shared(local, &shared);
+ local->transaction.do_eager_unlock = _gf_true;
out:
- return;
+ local->internal_lock.lock_cbk = afr_transaction_done;
+ afr_unlock(local->transaction.frame, local->transaction.frame->this);
}
+call_frame_t *
+afr_transaction_detach_fop_frame(call_frame_t *frame)
+{
+ afr_local_t *local = NULL;
+ call_frame_t *fop_frame = NULL;
+
+ local = frame->local;
+
+ afr_handle_inconsistent_fop(frame, &local->op_ret, &local->op_errno);
+ LOCK(&frame->lock);
+ {
+ fop_frame = local->transaction.main_frame;
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK(&frame->lock);
+
+ return fop_frame;
+}
static void
-__mark_pre_op_undone_on_fd (call_frame_t *frame, xlator_t *this, int child_index)
+afr_save_lk_owner(call_frame_t *frame)
{
- afr_local_t *local = NULL;
- afr_fd_ctx_t *fd_ctx = NULL;
+ afr_local_t *local = NULL;
- local = frame->local;
+ local = frame->local;
- if (!local->fd)
- return;
+ local->saved_lk_owner = frame->root->lk_owner;
+}
- fd_ctx = afr_fd_ctx_get (local->fd, this);
+static void
+afr_restore_lk_owner(call_frame_t *frame)
+{
+ afr_local_t *local = NULL;
- if (!fd_ctx)
- goto out;
+ local = frame->local;
- LOCK (&local->fd->lock);
- {
- if (local->transaction.type == AFR_DATA_TRANSACTION)
- fd_ctx->pre_op_done[child_index]--;
- }
- UNLOCK (&local->fd->lock);
-out:
- return;
+ frame->root->lk_owner = local->saved_lk_owner;
}
-
-static void
-__mark_non_participant_children (int32_t *pending[], int child_count,
- unsigned char *participants,
- afr_transaction_type type)
+void
+__mark_all_success(call_frame_t *frame, xlator_t *this)
{
- int i = 0;
- int j = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i;
- j = afr_index_for_transaction_type (type);
- for (i = 0; i < child_count; i++) {
- if (!participants[i])
- pending[i][j] = 0;
- }
+ local = frame->local;
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ local->transaction.failed_subvols[i] = 0;
+ }
}
+void
+afr_compute_pre_op_sources(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_transaction_type type = -1;
+ dict_t *xdata = NULL;
+ int **matrix = NULL;
+ int idx = -1;
+ int i = 0;
+ int j = 0;
+
+ priv = this->private;
+ local = frame->local;
+ type = local->transaction.type;
+ idx = afr_index_for_transaction_type(type);
+ matrix = ALLOC_MATRIX(priv->child_count, int);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->transaction.changelog_xdata[i])
+ continue;
+ xdata = local->transaction.changelog_xdata[i];
+ afr_selfheal_fill_matrix(this, matrix, i, idx, xdata);
+ }
+
+ memset(local->transaction.pre_op_sources, 1, priv->child_count);
+
+ /*If lock or pre-op failed on a brick, it is not a source. */
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.failed_subvols[i])
+ local->transaction.pre_op_sources[i] = 0;
+ }
+
+ /* If brick is blamed by others, it is not a source. */
+ for (i = 0; i < priv->child_count; i++)
+ for (j = 0; j < priv->child_count; j++)
+ if (matrix[i][j] != 0)
+ local->transaction.pre_op_sources[j] = 0;
+}
-static void
-__mark_all_success (int32_t *pending[], int child_count,
- afr_transaction_type type)
+void
+afr_txn_arbitrate_fop(call_frame_t *frame, xlator_t *this)
{
- int i;
- int j;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int pre_op_sources_count = 0;
+ int i = 0;
+
+ priv = this->private;
+ local = frame->local;
+
+ afr_compute_pre_op_sources(frame, this);
+ pre_op_sources_count = AFR_COUNT(local->transaction.pre_op_sources,
+ priv->child_count);
+
+ /* If arbiter is the only source, do not proceed. */
+ if (pre_op_sources_count < 2 &&
+ local->transaction.pre_op_sources[ARBITER_BRICK_INDEX]) {
+ local->op_ret = -1;
+ local->op_errno = ENOTCONN;
+ for (i = 0; i < priv->child_count; i++)
+ local->transaction.failed_subvols[i] = 1;
+ }
+
+ afr_transaction_fop(frame, this);
+
+ return;
+}
- for (i = 0; i < child_count; i++) {
- j = afr_index_for_transaction_type (type);
- pending[i][j] = hton32 (-1);
+int
+afr_transaction_perform_fop(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int ret = 0;
+ int failure_count = 0;
+ struct list_head shared;
+ afr_lock_t *lock = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ INIT_LIST_HEAD(&shared);
+ if (local->transaction.type == AFR_DATA_TRANSACTION &&
+ !local->transaction.inherited) {
+ ret = afr_write_subvol_set(frame, this);
+ if (ret) {
+ /*act as if operation failed on all subvols*/
+ local->op_ret = -1;
+ local->op_errno = -ret;
+ for (i = 0; i < priv->child_count; i++)
+ local->transaction.failed_subvols[i] = 1;
}
+ }
+
+ if (local->pre_op_compat)
+ /* old mode, pre-op was done as afr_changelog_do()
+ just now, before OP */
+ afr_changelog_pre_op_update(frame, this);
+
+ if (!local->transaction.eager_lock_on || local->transaction.inherited)
+ goto fop;
+ failure_count = AFR_COUNT(local->transaction.failed_subvols,
+ priv->child_count);
+ if (failure_count == priv->child_count) {
+ afr_handle_lock_acquire_failure(local);
+ return 0;
+ } else {
+ lock = &local->inode_ctx->lock[local->transaction.type];
+ LOCK(&local->inode->lock);
+ {
+ lock->acquired = _gf_true;
+ __afr_transaction_wake_shared(local, &shared);
+ }
+ UNLOCK(&local->inode->lock);
+ }
+
+fop:
+ /* Perform fops with the lk-owner from top xlator.
+ * Eg: lk-owner of posix-lk and flush should be same,
+ * flush cant clear the posix-lks without that lk-owner.
+ */
+ afr_save_lk_owner(frame);
+ frame->root->lk_owner = local->transaction.main_frame->root->lk_owner;
+
+ if (priv->arbiter_count == 1) {
+ afr_txn_arbitrate_fop(frame, this);
+ } else {
+ afr_transaction_fop(frame, this);
+ }
+
+ afr_lock_resume_shared(&shared);
+ return 0;
}
-
-static int
-__changelog_enabled (afr_private_t *priv, afr_transaction_type type)
+int
+afr_set_pending_dict(afr_private_t *priv, dict_t *xattr, int **pending)
{
- int ret = 0;
+ int i = 0;
+ int ret = 0;
- switch (type) {
- case AFR_DATA_TRANSACTION:
- if (priv->data_change_log)
- ret = 1;
+ for (i = 0; i < priv->child_count; i++) {
+ ret = dict_set_static_bin(xattr, priv->pending_key[i], pending[i],
+ AFR_NUM_CHANGE_LOGS * sizeof(int));
+ /* 3 = data+metadata+entry */
- break;
+ if (ret)
+ break;
+ }
- case AFR_METADATA_TRANSACTION:
- if (priv->metadata_change_log)
- ret = 1;
+ return ret;
+}
+static void
+afr_ta_dom_lock_check_and_release(afr_ta_fop_state_t fop_state, xlator_t *this)
+{
+ afr_private_t *priv = this->private;
+ unsigned int inmem_count = 0;
+ unsigned int onwire_count = 0;
+ gf_boolean_t release = _gf_false;
+
+ LOCK(&priv->lock);
+ {
+ /*Once we get notify lock release upcall notification,
+ if any of the fop state counters are non-zero, we will
+ not release the lock.
+ */
+ onwire_count = priv->ta_on_wire_txn_count;
+ inmem_count = priv->ta_in_mem_txn_count;
+ switch (fop_state) {
+ case TA_GET_INFO_FROM_TA_FILE:
+ onwire_count = --priv->ta_on_wire_txn_count;
+ break;
+ case TA_INFO_IN_MEMORY_SUCCESS:
+ case TA_INFO_IN_MEMORY_FAILED:
+ inmem_count = --priv->ta_in_mem_txn_count;
+ break;
+ case TA_WAIT_FOR_NOTIFY_LOCK_REL:
+ GF_ASSERT(0);
+ break;
+ case TA_SUCCESS:
break;
+ }
+ release = priv->release_ta_notify_dom_lock;
+ }
+ UNLOCK(&priv->lock);
- case AFR_ENTRY_TRANSACTION:
- case AFR_ENTRY_RENAME_TRANSACTION:
- if (priv->entry_change_log)
- ret = 1;
+ if (inmem_count != 0 || release == _gf_false || onwire_count != 0)
+ return;
- break;
+ afr_ta_lock_release_synctask(this);
+}
+
+static void
+afr_ta_process_onwireq(afr_ta_fop_state_t fop_state, xlator_t *this)
+{
+ afr_private_t *priv = this->private;
+ afr_local_t *entry = NULL;
+ int bad_child = AFR_CHILD_UNKNOWN;
+
+ struct list_head onwireq = {
+ 0,
+ };
+ INIT_LIST_HEAD(&onwireq);
+
+ LOCK(&priv->lock);
+ {
+ bad_child = priv->ta_bad_child_index;
+ if (bad_child == AFR_CHILD_UNKNOWN) {
+ /*The previous on-wire ta_post_op was a failure. Just dequeue
+ *one element to wind on-wire again. */
+ entry = list_entry(priv->ta_onwireq.next, afr_local_t, ta_onwireq);
+ list_del_init(&entry->ta_onwireq);
+ } else {
+ /* Prepare to process all fops based on bad_child_index. */
+ list_splice_init(&priv->ta_onwireq, &onwireq);
}
+ }
+ UNLOCK(&priv->lock);
- return ret;
+ if (entry) {
+ afr_ta_post_op_synctask(this, entry);
+ return;
+ } else {
+ while (!list_empty(&onwireq)) {
+ entry = list_entry(onwireq.next, afr_local_t, ta_onwireq);
+ list_del_init(&entry->ta_onwireq);
+ if (entry->ta_failed_subvol == bad_child) {
+ afr_post_op_handle_success(entry->transaction.frame, this);
+ } else {
+ afr_post_op_handle_failure(entry->transaction.frame, this, EIO);
+ }
+ }
+ }
}
+int
+afr_changelog_post_op_done(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+ int_lock = &local->internal_lock;
+
+ if (priv->thin_arbiter_count) {
+ /*fop should not come here with TA_WAIT_FOR_NOTIFY_LOCK_REL state */
+ afr_ta_dom_lock_check_and_release(local->fop_state, this);
+ }
+
+ /* Fail the FOP if post-op did not succeed on quorum no. of bricks. */
+ if (!afr_changelog_has_quorum(local, this)) {
+ local->op_ret = -1;
+ /*local->op_errno is already captured in changelog cbk*/
+ }
+
+ if (local->transaction.resume_stub) {
+ call_resume(local->transaction.resume_stub);
+ local->transaction.resume_stub = NULL;
+ }
+
+ int_lock->lock_cbk = afr_transaction_done;
+ afr_unlock(frame, this);
+
+ return 0;
+}
-static int
-__fop_changelog_needed (call_frame_t *frame, xlator_t *this)
+static void
+afr_changelog_post_op_fail(call_frame_t *frame, xlator_t *this, int op_errno)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- int op_ret = 0;
- afr_transaction_type type = -1;
+ afr_local_t *local = frame->local;
+ local->op_ret = -1;
+ local->op_errno = op_errno;
- priv = this->private;
- local = frame->local;
- type = local->transaction.type;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_THIN_ARB,
+ "Failing %s for gfid %s. Fop state is:%d", gf_fop_list[local->op],
+ uuid_utoa(local->inode->gfid), local->fop_state);
- if (__changelog_enabled (priv, type)) {
- switch (local->op) {
+ afr_changelog_post_op_done(frame, this);
+}
- case GF_FOP_WRITE:
- case GF_FOP_FTRUNCATE:
- op_ret = 1;
- break;
+unsigned char *
+afr_locked_nodes_get(afr_transaction_type type, afr_internal_lock_t *int_lock)
+{
+ /*Because same set of subvols participate in all lockee
+ * entities*/
+ return int_lock->lockee[0].locked_nodes;
+}
- case GF_FOP_FLUSH:
- op_ret = 0;
- break;
+int
+afr_changelog_call_count(afr_transaction_type type,
+ unsigned char *pre_op_subvols,
+ unsigned char *failed_subvols,
+ unsigned int child_count)
+{
+ int i = 0;
+ int call_count = 0;
- default:
- op_ret = 1;
- }
+ for (i = 0; i < child_count; i++) {
+ if (pre_op_subvols[i] && !failed_subvols[i]) {
+ call_count++;
}
+ }
- return op_ret;
+ if (type == AFR_ENTRY_RENAME_TRANSACTION)
+ call_count *= 2;
+
+ return call_count;
}
-int
-afr_set_pending_dict (afr_private_t *priv, dict_t *xattr, int32_t **pending)
+gf_boolean_t
+afr_txn_nothing_failed(call_frame_t *frame, xlator_t *this)
{
- int i = 0;
- int ret = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (priv->thin_arbiter_count) {
+ /* We need to perform post-op even if 1 data brick was down
+ * before the txn started.*/
+ if (AFR_COUNT(local->transaction.failed_subvols, priv->child_count))
+ return _gf_false;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i] &&
+ local->transaction.failed_subvols[i])
+ return _gf_false;
+ }
+
+ return _gf_true;
+}
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_set_static_bin (xattr, priv->pending_key[i],
- pending[i],
- AFR_NUM_CHANGE_LOGS * sizeof (int32_t));
- /* 3 = data+metadata+entry */
+void
+afr_handle_symmetric_errors(call_frame_t *frame, xlator_t *this)
+{
+ if (afr_is_symmetric_error(frame, this))
+ __mark_all_success(frame, this);
+}
- if (ret < 0)
- goto out;
+gf_boolean_t
+afr_has_quorum(unsigned char *subvols, xlator_t *this, call_frame_t *frame)
+{
+ unsigned int quorum_count = 0;
+ afr_private_t *priv = NULL;
+ unsigned int up_children_count = 0;
+
+ priv = this->private;
+ up_children_count = AFR_COUNT(subvols, priv->child_count);
+
+ if (afr_lookup_has_quorum(frame, up_children_count))
+ return _gf_true;
+
+ if (priv->quorum_count == AFR_QUORUM_AUTO) {
+ /*
+ * Special case for auto-quorum with an even number of nodes.
+ *
+ * A replica set with even count N can only handle the same
+ * number of failures as odd N-1 before losing "vanilla"
+ * quorum, and the probability of more simultaneous failures is
+ * actually higher. For example, with a 1% chance of failure
+ * we'd have a 0.03% chance of two simultaneous failures with
+ * N=3 but a 0.06% chance with N=4. However, the special case
+ * is necessary for N=2 because there's no real quorum in that
+ * case (i.e. can't normally survive *any* failures). In that
+ * case, we treat the first node as a tie-breaker, allowing
+ * quorum to be retained in some cases while still honoring the
+ * all-important constraint that there can not simultaneously
+ * be two partitioned sets of nodes each believing they have
+ * quorum. Of two equally sized sets, the one without that
+ * first node will lose.
+ *
+ * It turns out that the special case is beneficial for higher
+ * values of N as well. Continuing the example above, the
+ * probability of losing quorum with N=4 and this type of
+ * quorum is (very) slightly lower than with N=3 and vanilla
+ * quorum. The difference becomes even more pronounced with
+ * higher N. Therefore, even though such replica counts are
+ * unlikely to be seen in practice, we might as well use the
+ * "special" quorum then as well.
+ */
+ if ((up_children_count * 2) == priv->child_count) {
+ return subvols[0];
}
+ }
-out:
- return ret;
+ if (priv->quorum_count == AFR_QUORUM_AUTO) {
+ quorum_count = priv->child_count / 2 + 1;
+ } else {
+ quorum_count = priv->quorum_count;
+ }
+
+ if (up_children_count >= quorum_count)
+ return _gf_true;
+
+ return _gf_false;
}
+static gf_boolean_t
+afr_has_fop_quorum(call_frame_t *frame)
+{
+ xlator_t *this = frame->this;
+ afr_local_t *local = frame->local;
+ unsigned char *locked_nodes = NULL;
-static int
-afr_set_piggyback_dict (afr_private_t *priv, dict_t *xattr, int32_t **pending,
- afr_transaction_type type)
-{
- int i = 0;
- int ret = 0;
- int *arr = NULL;
- int index = 0;
- size_t pending_xattr_size = 3 * sizeof (int32_t);
- /* 3 = data+metadata+entry */
+ locked_nodes = afr_locked_nodes_get(local->transaction.type,
+ &local->internal_lock);
+ return afr_has_quorum(locked_nodes, this, NULL);
+}
- index = afr_index_for_transaction_type (type);
+static gf_boolean_t
+afr_has_fop_cbk_quorum(call_frame_t *frame)
+{
+ afr_local_t *local = frame->local;
+ xlator_t *this = frame->this;
+ afr_private_t *priv = this->private;
+ unsigned char *success = alloca0(priv->child_count);
+ int i = 0;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i])
+ if (!local->transaction.failed_subvols[i])
+ success[i] = 1;
+ }
+
+ return afr_has_quorum(success, this, NULL);
+}
- for (i = 0; i < priv->child_count; i++) {
- arr = GF_CALLOC (1, pending_xattr_size,
- gf_afr_mt_char);
- if (!arr) {
- ret = -1;
- goto out;
- }
+gf_boolean_t
+afr_need_dirty_marking(call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = this->private;
+ afr_local_t *local = NULL;
+ gf_boolean_t need_dirty = _gf_false;
- memcpy (arr, pending[i], pending_xattr_size);
+ local = frame->local;
- arr[index] = hton32 (ntoh32(arr[index]) + 1);
+ if (!priv->quorum_count || !local->optimistic_change_log)
+ return _gf_false;
- ret = dict_set_bin (xattr, priv->pending_key[i],
- arr, pending_xattr_size);
+ if (local->transaction.type == AFR_DATA_TRANSACTION ||
+ local->transaction.type == AFR_METADATA_TRANSACTION)
+ return _gf_false;
- if (ret < 0)
- goto out;
- }
+ if (AFR_COUNT(local->transaction.failed_subvols, priv->child_count) ==
+ priv->child_count)
+ return _gf_false;
-out:
- return ret;
-}
+ if (!afr_has_fop_cbk_quorum(frame))
+ need_dirty = _gf_true;
+ return need_dirty;
+}
-int
-afr_lock_server_count (afr_private_t *priv, afr_transaction_type type)
+void
+afr_handle_quorum(call_frame_t *frame, xlator_t *this)
{
- int ret = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ const char *file = NULL;
+ uuid_t gfid = {0};
- switch (type) {
- case AFR_DATA_TRANSACTION:
- ret = priv->child_count;
- break;
+ local = frame->local;
+ priv = frame->this->private;
- case AFR_METADATA_TRANSACTION:
- ret = priv->child_count;
- break;
+ if (priv->quorum_count == 0)
+ return;
+
+ /* If the fop already failed return right away to preserve errno */
+ if (local->op_ret == -1)
+ return;
+
+ /*
+ * Network split may happen just after the fops are unwound, so check
+ * if the fop succeeded in a way it still follows quorum. If it doesn't,
+ * mark the fop as failure, mark the changelogs so it reflects that
+ * failure.
+ *
+ * Scenario:
+ * There are 3 mounts on 3 machines(node1, node2, node3) all writing to
+ * single file. Network split happened in a way that node1 can't see
+ * node2, node3. Node2, node3 both of them can't see node1. Now at the
+ * time of sending write all the bricks are up. Just after write fop is
+ * wound on node1, network split happens. Node1 thinks write fop failed
+ * on node2, node3 so marks pending changelog for those 2 extended
+ * attributes on node1. Node2, node3 thinks writes failed on node1 so
+ * they mark pending changelog for node1. When the network is stable
+ * again the file already is in split-brain. These checks prevent
+ * marking pending changelog on other subvolumes if the fop doesn't
+ * succeed in a way it is still following quorum. So with this fix what
+ * is happening is, node1 will have all pending changelog(FOOL) because
+ * the write succeeded only on node1 but failed on node2, node3 so
+ * instead of marking pending changelogs on node2, node3 it just treats
+ * the fop as failure and goes into DIRTY state. Where as node2, node3
+ * say they are sources and have pending changelog to node1 so there is
+ * no split-brain with the fix. The problem is eliminated completely.
+ */
+
+ if (afr_has_fop_cbk_quorum(frame))
+ return;
+ if (afr_need_dirty_marking(frame, this))
+ goto set_response;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i])
+ afr_transaction_fop_failed(frame, frame->this, i);
+ }
+
+set_response:
+ local->op_ret = -1;
+ local->op_errno = afr_final_errno(local, priv);
+ if (local->op_errno == 0)
+ local->op_errno = afr_quorum_errno(priv);
+
+ if (local->fd) {
+ gf_uuid_copy(gfid, local->fd->inode->gfid);
+ file = uuid_utoa(gfid);
+ } else {
+ loc_path(&local->loc, local->loc.name);
+ file = local->loc.path;
+ }
+
+ gf_msg(frame->this->name, GF_LOG_WARNING, local->op_errno,
+ AFR_MSG_QUORUM_FAIL, "%s: Failing %s as quorum is not met", file,
+ gf_fop_list[local->op]);
+
+ switch (local->transaction.type) {
case AFR_ENTRY_TRANSACTION:
case AFR_ENTRY_RENAME_TRANSACTION:
- ret = priv->child_count;
- break;
- }
-
- return ret;
+ afr_pick_error_xdata(local, priv, local->parent, local->readable,
+ local->parent2, local->readable2);
+ break;
+ default:
+ afr_pick_error_xdata(local, priv, local->inode, local->readable,
+ NULL, NULL);
+ break;
+ }
}
-/* {{{ pending */
-
-int32_t
-afr_changelog_post_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xattr,
- dict_t *xdata)
+int
+afr_fill_ta_loc(xlator_t *this, loc_t *loc, gf_boolean_t is_gfid_based_fop)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int call_count = -1;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ loc->parent = inode_ref(priv->root_inode);
+ gf_uuid_copy(loc->pargfid, loc->parent->gfid);
+ loc->name = priv->pending_key[THIN_ARBITER_BRICK_INDEX];
+ if (is_gfid_based_fop && gf_uuid_is_null(priv->ta_gfid)) {
+ /* Except afr_ta_id_file_check() which is path based, all other gluster
+ * FOPS need gfid.*/
+ return -EINVAL;
+ }
+ gf_uuid_copy(loc->gfid, priv->ta_gfid);
+ loc->inode = inode_new(loc->parent->table);
+ if (!loc->inode) {
+ loc_wipe(loc);
+ return -ENOMEM;
+ }
+ return 0;
+}
- priv = this->private;
- local = frame->local;
- int_lock = &local->internal_lock;
+static int
+afr_ta_post_op_done(int ret, call_frame_t *frame, void *opaque)
+{
+ xlator_t *this = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *txn_frame = NULL;
+ afr_ta_fop_state_t fop_state;
+
+ local = (afr_local_t *)opaque;
+ fop_state = local->fop_state;
+ txn_frame = local->transaction.frame;
+ this = frame->this;
+
+ if (ret == 0) {
+ /*Mark pending xattrs on the up data brick.*/
+ afr_post_op_handle_success(txn_frame, this);
+ } else {
+ afr_post_op_handle_failure(txn_frame, this, -ret);
+ }
+
+ STACK_DESTROY(frame->root);
+ afr_ta_process_onwireq(fop_state, this);
+
+ return 0;
+}
- LOCK (&frame->lock);
- {
- call_count = --local->call_count;
+int **
+afr_set_changelog_xattr(afr_private_t *priv, unsigned char *pending,
+ dict_t *xattr, afr_local_t *local)
+{
+ int **changelog = NULL;
+ int idx = 0;
+ int ret = 0;
+ int i;
+
+ if (local->is_new_entry == _gf_true) {
+ changelog = afr_mark_pending_changelog(priv, pending, xattr,
+ local->cont.dir_fop.buf.ia_type);
+ } else {
+ idx = afr_index_for_transaction_type(local->transaction.type);
+ changelog = afr_matrix_create(priv->child_count, AFR_NUM_CHANGE_LOGS);
+ if (!changelog) {
+ goto out;
}
- UNLOCK (&frame->lock);
-
- if (call_count == 0) {
- if (afr_lock_server_count (priv, local->transaction.type) == 0) {
- local->transaction.done (frame, this);
- } else {
- int_lock->lock_cbk = local->transaction.done;
- afr_unlock (frame, this);
- }
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.failed_subvols[i])
+ changelog[i][idx] = hton32(1);
+ }
+ ret = afr_set_pending_dict(priv, xattr, changelog);
+ if (ret < 0) {
+ afr_matrix_cleanup(changelog, priv->child_count);
+ return NULL;
}
+ }
- return 0;
+out:
+ return changelog;
}
+static void
+afr_ta_locked_xattrop_validate(afr_private_t *priv, afr_local_t *local,
+ gf_boolean_t *valid)
+{
+ if (priv->ta_event_gen > local->ta_event_gen) {
+ /* We can't trust the ta's response anymore.*/
+ afr_ta_locked_priv_invalidate(priv);
+ *valid = _gf_false;
+ return;
+ }
+ return;
+}
-void
-afr_transaction_rm_stale_children (call_frame_t *frame, xlator_t *this,
- inode_t *inode, afr_transaction_type type)
-{
- int i = -1;
- int count = 0;
- int read_child = -1;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int **pending = NULL;
- int idx = 0;
- int32_t *stale_children = NULL;
- int32_t *fresh_children = NULL;
- gf_boolean_t rm_stale_children = _gf_false;
-
- idx = afr_index_for_transaction_type (type);
-
- priv = this->private;
- local = frame->local;
- pending = local->pending;
-
- if (local->op_ret < 0)
- goto out;
- fresh_children = local->fresh_children;
- read_child = afr_inode_get_read_ctx (this, inode, fresh_children);
- if (read_child < 0) {
- gf_log (this->name, GF_LOG_DEBUG, "Possible split-brain "
- "for %s", uuid_utoa (inode->gfid));
- goto out;
+static int
+afr_ta_post_op_do(void *opaque)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ xlator_t *this = NULL;
+ dict_t *xattr = NULL;
+ unsigned char *pending = NULL;
+ int **changelog = NULL;
+ int failed_subvol = -1;
+ int success_subvol = -1;
+ loc_t loc = {
+ 0,
+ };
+ int i = 0;
+ int ret = 0;
+ gf_boolean_t valid = _gf_true;
+
+ local = (afr_local_t *)opaque;
+ this = local->transaction.frame->this;
+ priv = this->private;
+
+ ret = afr_fill_ta_loc(this, &loc, _gf_true);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed to populate loc for thin-arbiter.");
+ goto out;
+ }
+
+ xattr = dict_new();
+ if (!xattr) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ pending = alloca0(priv->child_count);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.failed_subvols[i]) {
+ pending[i] = 1;
+ failed_subvol = i;
+ } else {
+ success_subvol = i;
+ }
+ }
+
+ changelog = afr_set_changelog_xattr(priv, pending, xattr, local);
+
+ if (!changelog) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = afr_ta_post_op_lock(this, &loc);
+ if (ret)
+ goto out;
+
+ ret = syncop_xattrop(priv->children[THIN_ARBITER_BRICK_INDEX], &loc,
+ GF_XATTROP_ADD_ARRAY, xattr, NULL, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Post-op on thin-arbiter id file %s failed for gfid %s.",
+ priv->pending_key[THIN_ARBITER_BRICK_INDEX],
+ uuid_utoa(local->inode->gfid));
+ }
+ LOCK(&priv->lock);
+ {
+ if (ret == 0) {
+ priv->ta_bad_child_index = failed_subvol;
+ } else if (ret == -EINVAL) {
+ priv->ta_bad_child_index = success_subvol;
+ ret = -EIO; /* TA failed the fop. Return EIO to application. */
}
- for (i = 0; i < priv->child_count; i++) {
- if (!afr_is_child_present (fresh_children,
- priv->child_count, i))
- continue;
- if (pending[i][idx])
- continue;
- /* child is down or op failed on it */
- if (!stale_children)
- stale_children = afr_children_create (priv->child_count);
- if (!stale_children)
- goto out;
+ afr_ta_locked_xattrop_validate(priv, local, &valid);
+ }
+ UNLOCK(&priv->lock);
+ if (valid == _gf_false) {
+ gf_msg(this->name, GF_LOG_ERROR, EIO, AFR_MSG_THIN_ARB,
+ "Post-op on thin-arbiter id file %s for gfid %s invalidated due "
+ "to event-gen mismatch.",
+ priv->pending_key[THIN_ARBITER_BRICK_INDEX],
+ uuid_utoa(local->inode->gfid));
+ ret = -EIO;
+ }
+
+ afr_ta_post_op_unlock(this, &loc);
+out:
+ if (xattr)
+ dict_unref(xattr);
- rm_stale_children = _gf_true;
- stale_children[count++] = i;
- gf_log (this->name, GF_LOG_DEBUG, "Removing stale child "
- "%d for %s", i, uuid_utoa (inode->gfid));
- }
+ if (changelog)
+ afr_matrix_cleanup(changelog, priv->child_count);
- if (!rm_stale_children)
- goto out;
+ loc_wipe(&loc);
- afr_inode_rm_stale_children (this, inode, stale_children);
-out:
- if (stale_children)
- GF_FREE (stale_children);
- return;
+ return ret;
}
-unsigned char*
-afr_locked_nodes_get (afr_transaction_type type, afr_internal_lock_t *int_lock)
+static int
+afr_ta_post_op_synctask(xlator_t *this, afr_local_t *local)
{
- unsigned char *locked_nodes = NULL;
- switch (type) {
- case AFR_DATA_TRANSACTION:
- case AFR_METADATA_TRANSACTION:
- locked_nodes = int_lock->inode_locked_nodes;
- break;
+ call_frame_t *ta_frame = NULL;
+ int ret = 0;
+
+ ta_frame = afr_ta_frame_create(this);
+ if (!ta_frame) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_THIN_ARB,
+ "Failed to create ta_frame");
+ goto err;
+ }
+ ret = synctask_new(this->ctx->env, afr_ta_post_op_do, afr_ta_post_op_done,
+ ta_frame, local);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_THIN_ARB,
+ "Failed to launch post-op on thin arbiter for gfid %s",
+ uuid_utoa(local->inode->gfid));
+ STACK_DESTROY(ta_frame->root);
+ goto err;
+ }
+
+ return ret;
+err:
+ afr_changelog_post_op_fail(local->transaction.frame, this, ENOMEM);
+ return ret;
+}
- case AFR_ENTRY_TRANSACTION:
- case AFR_ENTRY_RENAME_TRANSACTION:
- locked_nodes = int_lock->entry_locked_nodes;
- break;
+static void
+afr_ta_set_fop_state(afr_private_t *priv, afr_local_t *local,
+ int *on_wire_count)
+{
+ LOCK(&priv->lock);
+ {
+ if (priv->release_ta_notify_dom_lock == _gf_true) {
+ /* Put the fop in waitq until notify dom lock is released.*/
+ local->fop_state = TA_WAIT_FOR_NOTIFY_LOCK_REL;
+ list_add_tail(&local->ta_waitq, &priv->ta_waitq);
+ } else if (priv->ta_bad_child_index == AFR_CHILD_UNKNOWN) {
+ /* Post-op on thin-arbiter to decide success/failure. */
+ local->fop_state = TA_GET_INFO_FROM_TA_FILE;
+ *on_wire_count = ++priv->ta_on_wire_txn_count;
+ if (*on_wire_count > 1) {
+ /*Avoid sending multiple on-wire post-ops on TA*/
+ list_add_tail(&local->ta_onwireq, &priv->ta_onwireq);
+ }
+ } else if (local->ta_failed_subvol == priv->ta_bad_child_index) {
+ /* Post-op on TA not needed as the fop failed on the in-memory bad
+ * brick. Just mark pending xattrs on the good data brick.*/
+ local->fop_state = TA_INFO_IN_MEMORY_SUCCESS;
+ priv->ta_in_mem_txn_count++;
+ } else {
+ /* Post-op on TA not needed as the fop succeeded only on the
+ * in-memory bad data brick and not the good one. Fail the fop.*/
+ local->fop_state = TA_INFO_IN_MEMORY_FAILED;
+ priv->ta_in_mem_txn_count++;
}
- return locked_nodes;
+ }
+ UNLOCK(&priv->lock);
}
-int
-afr_changelog_pre_op_call_count (afr_transaction_type type,
- afr_internal_lock_t *int_lock,
- unsigned int child_count)
+static void
+afr_ta_fill_failed_subvol(afr_private_t *priv, afr_local_t *local)
{
- int call_count = 0;
- unsigned char *locked_nodes = NULL;
+ int i = 0;
- locked_nodes = afr_locked_nodes_get (type, int_lock);
- GF_ASSERT (locked_nodes);
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.failed_subvols[i]) {
+ local->ta_failed_subvol = i;
+ break;
+ }
+ }
+}
- call_count = afr_locked_children_count (locked_nodes, child_count);
- if (type == AFR_ENTRY_RENAME_TRANSACTION)
- call_count *= 2;
+static void
+afr_post_op_handle_success(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
- return call_count;
+ local = frame->local;
+ if (local->is_new_entry == _gf_true) {
+ afr_mark_new_entry_changelog(frame, this);
+ }
+ afr_changelog_post_op_do(frame, this);
+
+ return;
}
-int
-afr_changelog_post_op_call_count (afr_transaction_type type,
- unsigned char *pre_op,
- unsigned int child_count)
+static void
+afr_post_op_handle_failure(call_frame_t *frame, xlator_t *this, int op_errno)
{
- int call_count = 0;
+ afr_changelog_post_op_fail(frame, this, op_errno);
- call_count = afr_pre_op_done_children_count (pre_op, child_count);
- if (type == AFR_ENTRY_RENAME_TRANSACTION)
- call_count *= 2;
+ return;
+}
- return call_count;
+static void
+afr_ta_decide_post_op_state(call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int on_wire_count = 0;
+
+ priv = this->private;
+ local = frame->local;
+
+ afr_ta_set_fop_state(priv, local, &on_wire_count);
+
+ switch (local->fop_state) {
+ case TA_GET_INFO_FROM_TA_FILE:
+ if (on_wire_count == 1)
+ afr_ta_post_op_synctask(this, local);
+ /*else, fop is queued in ta_onwireq.*/
+ break;
+ case TA_WAIT_FOR_NOTIFY_LOCK_REL:
+ /*Post releasing the notify lock, we will act on this queue*/
+ break;
+ case TA_INFO_IN_MEMORY_SUCCESS:
+ afr_post_op_handle_success(frame, this);
+ break;
+ case TA_INFO_IN_MEMORY_FAILED:
+ afr_post_op_handle_failure(frame, this, EIO);
+ break;
+ default:
+ break;
+ }
+ return;
}
-int
-afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t * priv = this->private;
- afr_internal_lock_t *int_lock = NULL;
- int ret = 0;
- int i = 0;
- int call_count = 0;
-
- afr_local_t * local = NULL;
- afr_fd_ctx_t *fdctx = NULL;
- dict_t **xattr = NULL;
- int piggyback = 0;
- int index = 0;
- int nothing_failed = 1;
- int32_t changelog = 0;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- __mark_non_participant_children (local->pending, priv->child_count,
- local->transaction.pre_op,
- local->transaction.type);
-
- if (local->fd)
- afr_transaction_rm_stale_children (frame, this,
- local->fd->inode,
- local->transaction.type);
-
- xattr = alloca (priv->child_count * sizeof (*xattr));
- memset (xattr, 0, (priv->child_count * sizeof (*xattr)));
- for (i = 0; i < priv->child_count; i++) {
- xattr[i] = dict_new ();
+static void
+afr_handle_failure_using_thin_arbiter(call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = this->private;
+ afr_local_t *local = frame->local;
+
+ afr_ta_fill_failed_subvol(priv, local);
+ gf_msg_debug(this->name, 0,
+ "Fop failed on data brick (%s) for gfid=%s. "
+ "ta info needed to decide fop result.",
+ priv->children[local->ta_failed_subvol]->name,
+ uuid_utoa(local->inode->gfid));
+ afr_ta_decide_post_op_state(frame, this);
+}
+
+void
+afr_changelog_post_op_do(call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = this->private;
+ afr_local_t *local = NULL;
+ dict_t *xattr = NULL;
+ int i = 0;
+ int ret = 0;
+ int idx = 0;
+ int nothing_failed = 1;
+ gf_boolean_t need_undirty = _gf_false;
+
+ afr_handle_quorum(frame, this);
+ local = frame->local;
+ idx = afr_index_for_transaction_type(local->transaction.type);
+
+ xattr = dict_new();
+ if (!xattr) {
+ afr_changelog_post_op_fail(frame, this, ENOMEM);
+ goto out;
+ }
+
+ nothing_failed = afr_txn_nothing_failed(frame, this);
+
+ if (afr_changelog_pre_op_uninherit(frame, this))
+ need_undirty = _gf_false;
+ else
+ need_undirty = _gf_true;
+
+ if (local->op_ret < 0 && !nothing_failed) {
+ if (afr_need_dirty_marking(frame, this)) {
+ local->dirty[idx] = hton32(1);
+ goto set_dirty;
}
- call_count = afr_changelog_post_op_call_count (local->transaction.type,
- local->transaction.pre_op,
- priv->child_count);
- local->call_count = call_count;
+ afr_changelog_post_op_done(frame, this);
+ goto out;
+ }
+
+ if (nothing_failed && !need_undirty) {
+ afr_changelog_post_op_done(frame, this);
+ goto out;
+ }
+
+ if (local->transaction.in_flight_sb) {
+ afr_changelog_post_op_fail(frame, this,
+ local->transaction.in_flight_sb_errno);
+ goto out;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.failed_subvols[i])
+ local->pending[i][idx] = hton32(1);
+ }
+
+ ret = afr_set_pending_dict(priv, xattr, local->pending);
+ if (ret < 0) {
+ afr_changelog_post_op_fail(frame, this, ENOMEM);
+ goto out;
+ }
+
+ if (need_undirty)
+ local->dirty[idx] = hton32(-1);
+ else
+ local->dirty[idx] = hton32(0);
+
+set_dirty:
+ ret = dict_set_static_bin(xattr, AFR_DIRTY, local->dirty,
+ sizeof(int) * AFR_NUM_CHANGE_LOGS);
+ if (ret) {
+ afr_changelog_post_op_fail(frame, this, ENOMEM);
+ goto out;
+ }
+
+ afr_changelog_do(frame, this, xattr, afr_changelog_post_op_done,
+ AFR_TRANSACTION_POST_OP);
+out:
+ if (xattr)
+ dict_unref(xattr);
- if (local->fd)
- fdctx = afr_fd_ctx_get (local->fd, this);
+ return;
+}
- if (call_count == 0) {
- /* no child is up */
- int_lock->lock_cbk = local->transaction.done;
- afr_unlock (frame, this);
- goto out;
+static int
+afr_changelog_post_op_now(call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int failed_count = 0;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (priv->thin_arbiter_count) {
+ failed_count = AFR_COUNT(local->transaction.failed_subvols,
+ priv->child_count);
+ if (failed_count == 1) {
+ afr_handle_failure_using_thin_arbiter(frame, this);
+ return 0;
+ } else {
+ /* Txn either succeeded or failed on both data bricks. Let
+ * post_op_do handle it as the case might be. */
}
+ }
- /* check if something has failed, to handle piggybacking */
- nothing_failed = 1;
- index = afr_index_for_transaction_type (local->transaction.type);
+ afr_changelog_post_op_do(frame, this);
+ return 0;
+}
+
+gf_boolean_t
+afr_changelog_pre_op_uninherit(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_inode_ctx_t *ctx = NULL;
+ int i = 0;
+ gf_boolean_t ret = _gf_false;
+ int type = 0;
+
+ local = frame->local;
+ priv = this->private;
+ ctx = local->inode_ctx;
+
+ type = afr_index_for_transaction_type(local->transaction.type);
+ if (type != AFR_DATA_TRANSACTION)
+ return !local->transaction.dirtied;
+
+ if (local->transaction.no_uninherit)
+ return _gf_false;
+
+ /* This function must be idempotent. So check if we
+ were called before and return the same answer again.
+
+ It is important to keep this function idempotent for
+ the call in afr_changelog_post_op_safe() to not have
+ side effects on the call from afr_changelog_post_op_now()
+ */
+ if (local->transaction.uninherit_done)
+ return local->transaction.uninherit_value;
+
+ LOCK(&local->inode->lock);
+ {
for (i = 0; i < priv->child_count; i++) {
- if (local->pending[i][index] == 0) {
- nothing_failed = 0;
- break;
- }
+ if (local->transaction.pre_op[i] != ctx->pre_op_done[type][i]) {
+ ret = !local->transaction.dirtied;
+ goto unlock;
+ }
}
- index = afr_index_for_transaction_type (local->transaction.type);
- if (local->optimistic_change_log &&
- local->transaction.type != AFR_DATA_TRANSACTION) {
- /* if nothing_failed, then local->pending[..] == {0 .. 0} */
- for (i = 0; i < priv->child_count; i++) {
- changelog = ntoh32 (local->pending[i][index]);
- local->pending[i][index] = hton32 (changelog + 1);
- }
+ if (ctx->inherited[type]) {
+ ret = _gf_true;
+ ctx->inherited[type]--;
+ } else if (ctx->on_disk[type]) {
+ ret = _gf_false;
+ ctx->on_disk[type]--;
+ } else {
+ /* ASSERT */
+ ret = _gf_false;
}
- for (i = 0; i < priv->child_count; i++) {
- if (!local->transaction.pre_op[i])
- continue;
- ret = afr_set_pending_dict (priv, xattr[i], local->pending);
-
- if (ret < 0)
- gf_log (this->name, GF_LOG_INFO,
- "failed to set pending entry");
-
-
- switch (local->transaction.type) {
- case AFR_DATA_TRANSACTION:
- {
- if (!fdctx) {
- STACK_WIND (frame, afr_changelog_post_op_cbk,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->loc,
- GF_XATTROP_ADD_ARRAY, xattr[i],
- NULL);
- break;
- }
-
- LOCK (&local->fd->lock);
- {
- piggyback = 0;
- if (fdctx->pre_op_piggyback[i]) {
- fdctx->pre_op_piggyback[i]--;
- piggyback = 1;
- }
- }
- UNLOCK (&local->fd->lock);
-
- if (piggyback && !nothing_failed)
- ret = afr_set_piggyback_dict (priv, xattr[i],
- local->pending,
- local->transaction.type);
-
- if (nothing_failed && piggyback) {
- afr_changelog_post_op_cbk (frame, (void *)(long)i,
- this, 1, 0, xattr[i], NULL);
- } else {
- __mark_pre_op_undone_on_fd (frame, this, i);
- STACK_WIND_COOKIE (frame,
- afr_changelog_post_op_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- local->fd,
- GF_XATTROP_ADD_ARRAY, xattr[i],
- NULL);
- }
- }
- break;
- case AFR_METADATA_TRANSACTION:
- {
- if (nothing_failed) {
- afr_changelog_post_op_cbk (frame, (void *)(long)i,
- this, 1, 0, xattr[i],
- NULL);
- break;
- }
-
- if (local->fd)
- STACK_WIND (frame, afr_changelog_post_op_cbk,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- local->fd,
- GF_XATTROP_ADD_ARRAY, xattr[i],
- NULL);
- else
- STACK_WIND (frame, afr_changelog_post_op_cbk,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->loc,
- GF_XATTROP_ADD_ARRAY, xattr[i],
- NULL);
- }
- break;
+ if (!ctx->inherited[type] && !ctx->on_disk[type]) {
+ for (i = 0; i < priv->child_count; i++)
+ ctx->pre_op_done[type][i] = 0;
+ }
+ }
+unlock:
+ UNLOCK(&local->inode->lock);
- case AFR_ENTRY_RENAME_TRANSACTION:
- {
- if (nothing_failed) {
- afr_changelog_post_op_cbk (frame, (void *)(long)i,
- this, 1, 0, xattr[i],
- NULL);
- } else {
- STACK_WIND_COOKIE (frame, afr_changelog_post_op_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->transaction.new_parent_loc,
- GF_XATTROP_ADD_ARRAY, xattr[i],
- NULL);
- }
- call_count--;
- }
+ local->transaction.uninherit_done = _gf_true;
+ local->transaction.uninherit_value = ret;
- /*
- set it again because previous stack_wind
- might have already returned (think of case
- where subvolume is posix) and would have
- used the dict as placeholder for return
- value
- */
+ return ret;
+}
- ret = afr_set_pending_dict (priv, xattr[i], local->pending);
+gf_boolean_t
+afr_changelog_pre_op_inherit(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ gf_boolean_t ret = _gf_false;
+ int type = 0;
- if (ret < 0)
- gf_log (this->name, GF_LOG_INFO,
- "failed to set pending entry");
+ local = frame->local;
+ priv = this->private;
- /* fall through */
+ if (local->transaction.type != AFR_DATA_TRANSACTION)
+ return _gf_false;
- case AFR_ENTRY_TRANSACTION:
- {
- if (nothing_failed) {
- afr_changelog_post_op_cbk (frame, (void *)(long)i,
- this, 1, 0, xattr[i],
- NULL);
- break;
- }
-
- if (local->fd)
- STACK_WIND (frame, afr_changelog_post_op_cbk,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- local->fd,
- GF_XATTROP_ADD_ARRAY, xattr[i],
- NULL);
- else
- STACK_WIND (frame, afr_changelog_post_op_cbk,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->transaction.parent_loc,
- GF_XATTROP_ADD_ARRAY, xattr[i],
- NULL);
- }
- break;
- }
+ type = afr_index_for_transaction_type(local->transaction.type);
- if (!--call_count)
- break;
+ LOCK(&local->inode->lock);
+ {
+ if (!local->inode_ctx->on_disk[type]) {
+ /* nothing to inherit yet */
+ ret = _gf_false;
+ goto unlock;
}
-out:
for (i = 0; i < priv->child_count; i++) {
- dict_unref (xattr[i]);
+ if (local->transaction.pre_op[i] !=
+ local->inode_ctx->pre_op_done[type][i]) {
+ /* either inherit exactly, or don't */
+ ret = _gf_false;
+ goto unlock;
+ }
}
- return 0;
-}
+ local->inode_ctx->inherited[type]++;
+
+ ret = _gf_true;
+ local->transaction.inherited = _gf_true;
+ }
+unlock:
+ UNLOCK(&local->inode->lock);
+
+ return ret;
+}
-int32_t
-afr_changelog_pre_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xattr,
- dict_t *xdata)
+gf_boolean_t
+afr_changelog_pre_op_update(call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = this->private;
- int call_count = -1;
- int child_index = (long) cookie;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ gf_boolean_t ret = _gf_false;
+ int type = 0;
- local = frame->local;
+ local = frame->local;
+ priv = this->private;
- LOCK (&frame->lock);
- {
- switch (op_ret) {
- case 0:
- __mark_pre_op_done_on_fd (frame, this, child_index);
- //fallthrough we need to mark the pre_op
- case 1:
- local->transaction.pre_op[child_index] = 1;
- /* special op_ret for piggyback */
- break;
- case -1:
- if (op_errno == ENOTSUP) {
- gf_log (this->name, GF_LOG_ERROR,
- "xattrop not supported by %s",
- priv->children[child_index]->name);
- local->op_ret = -1;
-
- } else if (!child_went_down (op_ret, op_errno)) {
- gf_log (this->name, GF_LOG_ERROR,
- "xattrop failed on child %s: %s",
- priv->children[child_index]->name,
- strerror (op_errno));
- }
- local->op_errno = op_errno;
- break;
- }
+ if (local->transaction.type == AFR_ENTRY_TRANSACTION ||
+ local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION)
+ return _gf_false;
- call_count = --local->call_count;
- }
- UNLOCK (&frame->lock);
+ if (local->transaction.inherited)
+ /* was already inherited in afr_changelog_pre_op */
+ return _gf_false;
- if (call_count == 0) {
- if ((local->op_ret == -1) &&
- (local->op_errno == ENOTSUP)) {
- local->transaction.resume (frame, this);
- } else {
- __mark_all_success (local->pending, priv->child_count,
- local->transaction.type);
-
- /* Perform fops with the lk-owner from top xlator.
- * Eg: lk-owner of posix-lk and flush should be same,
- * flush cant clear the posix-lks without that lk-owner.
- */
- afr_save_lk_owner (frame);
- frame->root->lk_owner =
- local->transaction.main_frame->root->lk_owner;
-
- local->transaction.fop (frame, this);
+ if (!local->transaction.dirtied)
+ return _gf_false;
+
+ if (!afr_txn_nothing_failed(frame, this))
+ return _gf_false;
+
+ type = afr_index_for_transaction_type(local->transaction.type);
+
+ ret = _gf_false;
+
+ LOCK(&local->inode->lock);
+ {
+ if (!local->inode_ctx->on_disk[type]) {
+ for (i = 0; i < priv->child_count; i++)
+ local->inode_ctx->pre_op_done[type][i] =
+ (!local->transaction.failed_subvols[i]);
+ } else {
+ for (i = 0; i < priv->child_count; i++)
+ if (local->inode_ctx->pre_op_done[type][i] !=
+ (!local->transaction.failed_subvols[i])) {
+ local->transaction.no_uninherit = 1;
+ goto unlock;
}
}
+ local->inode_ctx->on_disk[type]++;
- return 0;
+ ret = _gf_true;
+ }
+unlock:
+ UNLOCK(&local->inode->lock);
+
+ return ret;
}
int
-afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
+afr_changelog_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, dict_t *xattr, dict_t *xdata)
{
- afr_private_t * priv = this->private;
- int i = 0;
- int ret = 0;
- int call_count = 0;
- dict_t **xattr = NULL;
- afr_fd_ctx_t *fdctx = NULL;
- afr_local_t *local = NULL;
- int piggyback = 0;
- afr_internal_lock_t *int_lock = NULL;
- unsigned char *locked_nodes = NULL;
+ afr_local_t *local = NULL;
+ int call_count = -1;
+ int child_index = -1;
- local = frame->local;
- int_lock = &local->internal_lock;
+ local = frame->local;
+ child_index = (long)cookie;
- xattr = alloca (priv->child_count * sizeof (*xattr));
- memset (xattr, 0, (priv->child_count * sizeof (*xattr)));
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ afr_transaction_fop_failed(frame, this, child_index);
+ }
- for (i = 0; i < priv->child_count; i++) {
- xattr[i] = dict_new ();
- }
-
- call_count = afr_changelog_pre_op_call_count (local->transaction.type,
- int_lock,
- priv->child_count);
- if (call_count == 0) {
- local->internal_lock.lock_cbk =
- local->transaction.done;
- afr_unlock (frame, this);
- goto out;
- }
+ if (xattr)
+ local->transaction.changelog_xdata[child_index] = dict_ref(xattr);
- local->call_count = call_count;
+ call_count = afr_frame_return(frame);
- __mark_all_pending (local->pending, priv->child_count,
- local->transaction.type);
+ if (call_count == 0) {
+ local->transaction.changelog_resume(frame, this);
+ }
- if (local->fd)
- fdctx = afr_fd_ctx_get (local->fd, this);
+ return 0;
+}
- locked_nodes = afr_locked_nodes_get (local->transaction.type, int_lock);
- for (i = 0; i < priv->child_count; i++) {
- if (!locked_nodes[i])
+void
+afr_changelog_populate_xdata(call_frame_t *frame, afr_xattrop_type_t op,
+ dict_t **xdata, dict_t **newloc_xdata)
+{
+ int i = 0;
+ int ret = 0;
+ char *key = NULL;
+ int keylen = 0;
+ const char *name = NULL;
+ dict_t *xdata1 = NULL;
+ dict_t *xdata2 = NULL;
+ xlator_t *this = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ gf_boolean_t need_entry_key_set = _gf_true;
+
+ local = frame->local;
+ this = THIS;
+ priv = this->private;
+
+ if (local->transaction.type == AFR_DATA_TRANSACTION ||
+ local->transaction.type == AFR_METADATA_TRANSACTION)
+ goto out;
+
+ if (!priv->esh_granular)
+ goto out;
+
+ xdata1 = dict_new();
+ if (!xdata1)
+ goto out;
+
+ name = local->loc.name;
+ if (local->op == GF_FOP_LINK)
+ name = local->newloc.name;
+
+ switch (op) {
+ case AFR_TRANSACTION_PRE_OP:
+ key = GF_XATTROP_ENTRY_IN_KEY;
+ break;
+ case AFR_TRANSACTION_POST_OP:
+ if (afr_txn_nothing_failed(frame, this)) {
+ key = GF_XATTROP_ENTRY_OUT_KEY;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->transaction.failed_subvols[i])
continue;
- ret = afr_set_pending_dict (priv, xattr[i], local->pending);
-
- if (ret < 0)
- gf_log (this->name, GF_LOG_INFO,
- "failed to set pending entry");
-
-
- switch (local->transaction.type) {
- case AFR_DATA_TRANSACTION:
- {
- if (!fdctx) {
- STACK_WIND_COOKIE (frame,
- afr_changelog_pre_op_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &(local->loc),
- GF_XATTROP_ADD_ARRAY, xattr[i],
- NULL);
- break;
- }
-
- LOCK (&local->fd->lock);
- {
- piggyback = 0;
- if (fdctx->pre_op_done[i]) {
- fdctx->pre_op_piggyback[i]++;
- piggyback = 1;
- fdctx->hit++;
- } else {
- fdctx->miss++;
- }
- }
- UNLOCK (&local->fd->lock);
-
- afr_set_delayed_post_op (frame, this);
-
- if (piggyback)
- afr_changelog_pre_op_cbk (frame, (void *)(long)i,
- this, 1, 0, xattr[i],
- NULL);
- else
- STACK_WIND_COOKIE (frame,
- afr_changelog_pre_op_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- local->fd,
- GF_XATTROP_ADD_ARRAY, xattr[i],
- NULL);
- }
- break;
- case AFR_METADATA_TRANSACTION:
- {
- if (local->optimistic_change_log) {
- afr_changelog_pre_op_cbk (frame, (void *)(long)i,
- this, 1, 0, xattr[i],
- NULL);
- break;
- }
-
- if (local->fd)
- STACK_WIND_COOKIE (frame,
- afr_changelog_pre_op_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- local->fd,
- GF_XATTROP_ADD_ARRAY, xattr[i],
- NULL);
- else
- STACK_WIND_COOKIE (frame,
- afr_changelog_pre_op_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &(local->loc),
- GF_XATTROP_ADD_ARRAY, xattr[i],
- NULL);
+ need_entry_key_set = _gf_false;
+ break;
}
- break;
+ /* If the transaction itself did not fail and there
+ * are no failed subvolumes, check whether the fop
+ * failed due to a symmetric error. If it did, do
+ * not set the ENTRY_OUT xattr which would end up
+ * deleting a name index which was created possibly by
+ * an earlier entry txn that may have failed on some
+ * of the sub-volumes.
+ */
+ if (local->op_ret)
+ need_entry_key_set = _gf_false;
+ } else {
+ key = GF_XATTROP_ENTRY_IN_KEY;
+ }
+ break;
+ }
+
+ if (need_entry_key_set) {
+ keylen = strlen(key);
+ ret = dict_set_strn(xdata1, key, keylen, (char *)name);
+ if (ret)
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, AFR_MSG_DICT_SET_FAILED,
+ "%s/%s: Could not set %s key during xattrop",
+ uuid_utoa(local->loc.pargfid), local->loc.name, key);
+ if (local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) {
+ xdata2 = dict_new();
+ if (!xdata2)
+ goto out;
- case AFR_ENTRY_RENAME_TRANSACTION:
- {
- if (local->optimistic_change_log) {
- afr_changelog_pre_op_cbk (frame, (void *)(long)i,
- this, 1, 0, xattr[i],
- NULL);
- } else {
- STACK_WIND_COOKIE (frame,
- afr_changelog_pre_op_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->transaction.new_parent_loc,
- GF_XATTROP_ADD_ARRAY, xattr[i],
- NULL);
- }
-
- call_count--;
- }
+ ret = dict_set_strn(xdata2, key, keylen,
+ (char *)local->newloc.name);
+ if (ret)
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, AFR_MSG_DICT_SET_FAILED,
+ "%s/%s: Could not set %s key during "
+ "xattrop",
+ uuid_utoa(local->newloc.pargfid), local->newloc.name,
+ key);
+ }
+ }
+
+ *xdata = xdata1;
+ *newloc_xdata = xdata2;
+ xdata1 = xdata2 = NULL;
+out:
+ if (xdata1)
+ dict_unref(xdata1);
+ return;
+}
+int
+afr_changelog_prepare(xlator_t *this, call_frame_t *frame, int *call_count,
+ afr_changelog_resume_t changelog_resume,
+ afr_xattrop_type_t op, dict_t **xdata,
+ dict_t **newloc_xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
- /*
- set it again because previous stack_wind
- might have already returned (think of case
- where subvolume is posix) and would have
- used the dict as placeholder for return
- value
- */
+ local = frame->local;
+ priv = this->private;
- ret = afr_set_pending_dict (priv, xattr[i], local->pending);
+ *call_count = afr_changelog_call_count(
+ local->transaction.type, local->transaction.pre_op,
+ local->transaction.failed_subvols, priv->child_count);
- if (ret < 0)
- gf_log (this->name, GF_LOG_INFO,
- "failed to set pending entry");
+ if (*call_count == 0) {
+ changelog_resume(frame, this);
+ return -1;
+ }
- /* fall through */
+ afr_changelog_populate_xdata(frame, op, xdata, newloc_xdata);
+ local->call_count = *call_count;
- case AFR_ENTRY_TRANSACTION:
- {
- if (local->optimistic_change_log) {
- afr_changelog_pre_op_cbk (frame, (void *)(long)i,
- this, 1, 0, xattr[i],
- NULL);
- break;
- }
-
- if (local->fd)
- STACK_WIND_COOKIE (frame,
- afr_changelog_pre_op_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- local->fd,
- GF_XATTROP_ADD_ARRAY, xattr[i],
- NULL);
- else
- STACK_WIND_COOKIE (frame,
- afr_changelog_pre_op_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->transaction.parent_loc,
- GF_XATTROP_ADD_ARRAY, xattr[i],
- NULL);
- }
- break;
- }
+ local->transaction.changelog_resume = changelog_resume;
+ return 0;
+}
- if (!--call_count)
- break;
- }
-out:
- for (i = 0; i < priv->child_count; i++) {
- dict_unref (xattr[i]);
+int
+afr_changelog_do(call_frame_t *frame, xlator_t *this, dict_t *xattr,
+ afr_changelog_resume_t changelog_resume, afr_xattrop_type_t op)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ dict_t *xdata = NULL;
+ dict_t *newloc_xdata = NULL;
+ int i = 0;
+ int call_count = 0;
+ int ret = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.changelog_xdata[i]) {
+ dict_unref(local->transaction.changelog_xdata[i]);
+ local->transaction.changelog_xdata[i] = NULL;
}
+ }
+
+ ret = afr_changelog_prepare(this, frame, &call_count, changelog_resume, op,
+ &xdata, &newloc_xdata);
+ if (ret)
return 0;
-}
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->transaction.pre_op[i] ||
+ local->transaction.failed_subvols[i])
+ continue;
-int
-afr_post_blocking_inodelk_cbk (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
+ switch (local->transaction.type) {
+ case AFR_DATA_TRANSACTION:
+ case AFR_METADATA_TRANSACTION:
+ if (!local->fd) {
+ STACK_WIND_COOKIE(
+ frame, afr_changelog_cbk, (void *)(long)i,
+ priv->children[i], priv->children[i]->fops->xattrop,
+ &local->loc, GF_XATTROP_ADD_ARRAY, xattr, xdata);
+ } else {
+ STACK_WIND_COOKIE(
+ frame, afr_changelog_cbk, (void *)(long)i,
+ priv->children[i], priv->children[i]->fops->fxattrop,
+ local->fd, GF_XATTROP_ADD_ARRAY, xattr, xdata);
+ }
+ break;
+ case AFR_ENTRY_RENAME_TRANSACTION:
- local = frame->local;
- int_lock = &local->internal_lock;
+ STACK_WIND_COOKIE(frame, afr_changelog_cbk, (void *)(long)i,
+ priv->children[i],
+ priv->children[i]->fops->xattrop,
+ &local->transaction.new_parent_loc,
+ GF_XATTROP_ADD_ARRAY, xattr, newloc_xdata);
+ call_count--;
- if (int_lock->lock_op_ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "Blocking inodelks failed.");
- local->transaction.done (frame, this);
- } else {
+ /* fall through */
- gf_log (this->name, GF_LOG_DEBUG,
- "Blocking inodelks done. Proceeding to FOP");
- afr_internal_lock_finish (frame, this);
+ case AFR_ENTRY_TRANSACTION:
+ if (local->fd)
+ STACK_WIND_COOKIE(
+ frame, afr_changelog_cbk, (void *)(long)i,
+ priv->children[i], priv->children[i]->fops->fxattrop,
+ local->fd, GF_XATTROP_ADD_ARRAY, xattr, xdata);
+ else
+ STACK_WIND_COOKIE(frame, afr_changelog_cbk, (void *)(long)i,
+ priv->children[i],
+ priv->children[i]->fops->xattrop,
+ &local->transaction.parent_loc,
+ GF_XATTROP_ADD_ARRAY, xattr, xdata);
+ break;
}
- return 0;
-}
+ if (!--call_count)
+ break;
+ }
+ if (xdata)
+ dict_unref(xdata);
+ if (newloc_xdata)
+ dict_unref(newloc_xdata);
+ return 0;
+}
-int
-afr_post_nonblocking_inodelk_cbk (call_frame_t *frame, xlator_t *this)
+static void
+afr_init_optimistic_changelog_for_txn(xlator_t *this, afr_local_t *local)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
+ int locked_count = 0;
+ afr_private_t *priv = NULL;
- local = frame->local;
- int_lock = &local->internal_lock;
-
- /* Initiate blocking locks if non-blocking has failed */
- if (int_lock->lock_op_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Non blocking inodelks failed. Proceeding to blocking");
- int_lock->lock_cbk = afr_post_blocking_inodelk_cbk;
- afr_blocking_lock (frame, this);
- } else {
+ priv = this->private;
- gf_log (this->name, GF_LOG_DEBUG,
- "Non blocking inodelks done. Proceeding to FOP");
- afr_internal_lock_finish (frame, this);
- }
+ locked_count = AFR_COUNT(local->transaction.pre_op, priv->child_count);
+ if (priv->optimistic_change_log && locked_count == priv->child_count)
+ local->optimistic_change_log = 1;
- return 0;
+ return;
}
-
int
-afr_post_blocking_entrylk_cbk (call_frame_t *frame, xlator_t *this)
+afr_changelog_pre_op(call_frame_t *frame, xlator_t *this)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
+ afr_private_t *priv = this->private;
+ int i = 0;
+ int ret = 0;
+ int call_count = 0;
+ int op_errno = 0;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ unsigned char *locked_nodes = NULL;
+ int idx = -1;
+ gf_boolean_t pre_nop = _gf_true;
+ dict_t *xdata_req = NULL;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ idx = afr_index_for_transaction_type(local->transaction.type);
+
+ locked_nodes = afr_locked_nodes_get(local->transaction.type, int_lock);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (locked_nodes[i]) {
+ local->transaction.pre_op[i] = 1;
+ call_count++;
+ } else {
+ local->transaction.failed_subvols[i] = 1;
+ }
+ }
+
+ afr_init_optimistic_changelog_for_txn(this, local);
+
+ if (afr_changelog_pre_op_inherit(frame, this))
+ goto next;
+
+ /* This condition should not be met with present code, as
+ * transaction.done will be called if locks are not acquired on even a
+ * single node.
+ */
+ if (call_count == 0) {
+ op_errno = ENOTCONN;
+ goto err;
+ }
+
+ /* Check if the fop can be performed on at least
+ * quorum number of nodes.
+ */
+ if (priv->quorum_count && !afr_has_fop_quorum(frame)) {
+ op_errno = int_lock->lock_op_errno;
+ if (op_errno == 0)
+ op_errno = afr_quorum_errno(priv);
+ goto err;
+ }
+
+ xdata_req = dict_new();
+ if (!xdata_req) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ if (call_count < priv->child_count)
+ pre_nop = _gf_false;
+
+ /* Set an all-zero pending changelog so that in the cbk, we can get the
+ * current on-disk values. In a replica 3 volume with arbiter enabled,
+ * these values are needed to arrive at a go/ no-go of the fop phase to
+ * avoid ending up in split-brain.*/
+
+ ret = afr_set_pending_dict(priv, xdata_req, local->pending);
+ if (ret < 0) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ if (afr_needs_changelog_update(local)) {
+ local->dirty[idx] = hton32(1);
+
+ ret = dict_set_static_bin(xdata_req, AFR_DIRTY, local->dirty,
+ sizeof(int) * AFR_NUM_CHANGE_LOGS);
+ if (ret) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- local = frame->local;
- int_lock = &local->internal_lock;
+ pre_nop = _gf_false;
+ local->transaction.dirtied = 1;
+ }
- if (int_lock->lock_op_ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "Blocking entrylks failed.");
- local->transaction.done (frame, this);
- } else {
+ if (pre_nop)
+ goto next;
- gf_log (this->name, GF_LOG_DEBUG,
- "Blocking entrylks done. Proceeding to FOP");
- afr_internal_lock_finish (frame, this);
- }
+ if (!local->pre_op_compat) {
+ dict_copy(xdata_req, local->xdata_req);
+ goto next;
+ }
- return 0;
-}
+ afr_changelog_do(frame, this, xdata_req, afr_transaction_perform_fop,
+ AFR_TRANSACTION_PRE_OP);
+ if (xdata_req)
+ dict_unref(xdata_req);
-int
-afr_post_nonblocking_entrylk_cbk (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
+ return 0;
+next:
+ afr_transaction_perform_fop(frame, this);
- local = frame->local;
- int_lock = &local->internal_lock;
+ if (xdata_req)
+ dict_unref(xdata_req);
- /* Initiate blocking locks if non-blocking has failed */
- if (int_lock->lock_op_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Non blocking entrylks failed. Proceeding to blocking");
- int_lock->lock_cbk = afr_post_blocking_entrylk_cbk;
- afr_blocking_lock (frame, this);
- } else {
+ return 0;
+err:
+ local->internal_lock.lock_cbk = afr_transaction_done;
+ local->op_ret = -1;
+ local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "Non blocking entrylks done. Proceeding to FOP");
- afr_internal_lock_finish (frame, this);
- }
+ afr_handle_lock_acquire_failure(local);
- return 0;
+ if (xdata_req)
+ dict_unref(xdata_req);
+
+ return 0;
}
+int
+afr_post_nonblocking_lock_cbk(call_frame_t *frame, xlator_t *this)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ /* Initiate blocking locks if non-blocking has failed */
+ if (int_lock->lock_op_ret < 0) {
+ gf_msg_debug(this->name, 0,
+ "Non blocking locks failed. Proceeding to blocking");
+ int_lock->lock_cbk = afr_internal_lock_finish;
+ afr_blocking_lock(frame, this);
+ } else {
+ gf_msg_debug(this->name, 0,
+ "Non blocking locks done. Proceeding to FOP");
+
+ afr_internal_lock_finish(frame, this);
+ }
+
+ return 0;
+}
int
-afr_post_blocking_rename_cbk (call_frame_t *frame, xlator_t *this)
+afr_post_blocking_rename_cbk(call_frame_t *frame, xlator_t *this)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
- local = frame->local;
- int_lock = &local->internal_lock;
+ local = frame->local;
+ int_lock = &local->internal_lock;
- if (int_lock->lock_op_ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "Blocking entrylks failed.");
- local->transaction.done (frame, this);
- } else {
+ if (int_lock->lock_op_ret < 0) {
+ gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_INTERNAL_LKS_FAILED,
+ "Blocking entrylks failed.");
- gf_log (this->name, GF_LOG_DEBUG,
- "Blocking entrylks done. Proceeding to FOP");
- afr_internal_lock_finish (frame, this);
- }
- return 0;
-}
+ afr_transaction_done(frame, this);
+ } else {
+ gf_msg_debug(this->name, 0,
+ "Blocking entrylks done. Proceeding to FOP");
+ afr_internal_lock_finish(frame, this);
+ }
+ return 0;
+}
int
-afr_post_lower_unlock_cbk (call_frame_t *frame, xlator_t *this)
+afr_post_lower_unlock_cbk(call_frame_t *frame, xlator_t *this)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
- local = frame->local;
- int_lock = &local->internal_lock;
+ local = frame->local;
+ int_lock = &local->internal_lock;
- GF_ASSERT (!int_lock->higher_locked);
+ GF_ASSERT(!int_lock->higher_locked);
- int_lock->lock_cbk = afr_post_blocking_rename_cbk;
- afr_blocking_lock (frame, this);
+ int_lock->lock_cbk = afr_post_blocking_rename_cbk;
+ afr_blocking_lock(frame, this);
- return 0;
+ return 0;
}
-
int
-afr_set_transaction_flock (afr_local_t *local)
+afr_set_transaction_flock(xlator_t *this, afr_local_t *local,
+ afr_lockee_t *lockee)
{
- afr_internal_lock_t *int_lock = NULL;
-
- int_lock = &local->internal_lock;
-
- int_lock->lk_flock.l_len = local->transaction.len;
- int_lock->lk_flock.l_start = local->transaction.start;
- int_lock->lk_flock.l_type = F_WRLCK;
-
- return 0;
+ afr_private_t *priv = NULL;
+ struct gf_flock *flock = NULL;
+
+ priv = this->private;
+ flock = &lockee->flock;
+
+ if ((priv->arbiter_count || local->transaction.eager_lock_on ||
+ priv->full_lock) &&
+ local->transaction.type == AFR_DATA_TRANSACTION) {
+ /*Lock entire file to avoid network split brains.*/
+ flock->l_len = 0;
+ flock->l_start = 0;
+ } else {
+ flock->l_len = local->transaction.len;
+ flock->l_start = local->transaction.start;
+ }
+ flock->l_type = F_WRLCK;
+
+ return 0;
}
int
-afr_lock_rec (call_frame_t *frame, xlator_t *this)
+afr_lock(call_frame_t *frame, xlator_t *this)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
- local = frame->local;
- int_lock = &local->internal_lock;
+ local = frame->local;
+ int_lock = &local->internal_lock;
- int_lock->transaction_lk_type = AFR_TRANSACTION_LK;
+ int_lock->lock_cbk = afr_post_nonblocking_lock_cbk;
+ int_lock->domain = this->name;
- switch (local->transaction.type) {
+ switch (local->transaction.type) {
case AFR_DATA_TRANSACTION:
case AFR_METADATA_TRANSACTION:
- afr_set_transaction_flock (local);
+ for (i = 0; i < int_lock->lockee_count; i++) {
+ afr_set_transaction_flock(this, local, &int_lock->lockee[i]);
+ }
- int_lock->lock_cbk = afr_post_nonblocking_inodelk_cbk;
-
- afr_nonblocking_inodelk (frame, this);
- break;
+ break;
+ case AFR_ENTRY_TRANSACTION:
+ int_lock->lk_basename = local->transaction.basename;
+ if (local->transaction.parent_loc.path)
+ int_lock->lk_loc = &local->transaction.parent_loc;
+ else
+ GF_ASSERT(local->fd);
+ break;
case AFR_ENTRY_RENAME_TRANSACTION:
+ break;
+ }
+ afr_lock_nonblocking(frame, this);
- int_lock->lock_cbk = afr_post_blocking_rename_cbk;
- afr_blocking_lock (frame, this);
- break;
+ return 0;
+}
- case AFR_ENTRY_TRANSACTION:
- int_lock->lk_basename = local->transaction.basename;
- if (&local->transaction.parent_loc)
- int_lock->lk_loc = &local->transaction.parent_loc;
- else
- GF_ASSERT (local->fd);
+static gf_boolean_t
+afr_locals_overlap(afr_local_t *local1, afr_local_t *local2)
+{
+ uint64_t start1 = local1->transaction.start;
+ uint64_t start2 = local2->transaction.start;
+ uint64_t end1 = 0;
+ uint64_t end2 = 0;
+
+ if (local1->transaction.len)
+ end1 = start1 + local1->transaction.len - 1;
+ else
+ end1 = ULLONG_MAX;
+
+ if (local2->transaction.len)
+ end2 = start2 + local2->transaction.len - 1;
+ else
+ end2 = ULLONG_MAX;
+
+ return ((end1 >= start2) && (end2 >= start1));
+}
- int_lock->lock_cbk = afr_post_nonblocking_entrylk_cbk;
- afr_nonblocking_entrylk (frame, this);
- break;
+gf_boolean_t
+afr_has_lock_conflict(afr_local_t *local, gf_boolean_t waitlist_check)
+{
+ afr_local_t *each = NULL;
+ afr_lock_t *lock = NULL;
+
+ lock = &local->inode_ctx->lock[local->transaction.type];
+ /*
+ * Once full file lock is acquired in eager-lock phase, overlapping
+ * writes do not compete for inode-locks, instead are transferred to the
+ * next writes. Because of this overlapping writes are not ordered.
+ * This can cause inconsistencies in replication.
+ * Example:
+ * Two overlapping writes w1, w2 are sent in parallel on same fd
+ * in two threads t1, t2.
+ * Both threads can execute afr_writev_wind in the following manner.
+ * t1 winds w1 on brick-0
+ * t2 winds w2 on brick-0
+ * t2 winds w2 on brick-1
+ * t1 winds w1 on brick-1
+ *
+ * This check makes sure the locks are not transferred for
+ * overlapping writes.
+ */
+ list_for_each_entry(each, &lock->owners, transaction.owner_list)
+ {
+ if (afr_locals_overlap(each, local)) {
+ return _gf_true;
}
+ }
- return 0;
+ if (!waitlist_check)
+ return _gf_false;
+ list_for_each_entry(each, &lock->waiting, transaction.wait_list)
+ {
+ if (afr_locals_overlap(each, local)) {
+ return _gf_true;
+ }
+ }
+ return _gf_false;
}
-
-int
-afr_lock (call_frame_t *frame, xlator_t *this)
+/* }}} */
+static void
+afr_copy_inodelk_vars(afr_internal_lock_t *dst, afr_internal_lock_t *src,
+ xlator_t *this, int lockee_num)
{
- afr_set_lock_number (frame, this);
-
- return afr_lock_rec (frame, this);
+ afr_private_t *priv = this->private;
+ afr_lockee_t *sl = &src->lockee[lockee_num];
+ afr_lockee_t *dl = &dst->lockee[lockee_num];
+
+ dst->domain = src->domain;
+ dl->flock.l_len = sl->flock.l_len;
+ dl->flock.l_start = sl->flock.l_start;
+ dl->flock.l_type = sl->flock.l_type;
+ dl->locked_count = sl->locked_count;
+ memcpy(dl->locked_nodes, sl->locked_nodes,
+ priv->child_count * sizeof(*dl->locked_nodes));
}
+void
+__afr_transaction_wake_shared(afr_local_t *local, struct list_head *shared)
+{
+ gf_boolean_t conflict = _gf_false;
+ afr_local_t *each = NULL;
+ afr_lock_t *lock = &local->inode_ctx->lock[local->transaction.type];
+
+ while (!conflict) {
+ if (list_empty(&lock->waiting))
+ return;
+ each = list_entry(lock->waiting.next, afr_local_t,
+ transaction.wait_list);
+ if (afr_has_lock_conflict(each, _gf_false)) {
+ conflict = _gf_true;
+ }
+ if (conflict && !list_empty(&lock->owners))
+ return;
+ afr_copy_inodelk_vars(&each->internal_lock, &local->internal_lock,
+ each->transaction.frame->this, 0);
+ list_move_tail(&each->transaction.wait_list, shared);
+ list_add_tail(&each->transaction.owner_list, &lock->owners);
+ }
+}
-/* }}} */
+static void
+afr_lock_resume_shared(struct list_head *list)
+{
+ afr_local_t *each = NULL;
+
+ while (!list_empty(list)) {
+ each = list_entry(list->next, afr_local_t, transaction.wait_list);
+ list_del_init(&each->transaction.wait_list);
+ afr_changelog_pre_op(each->transaction.frame,
+ each->transaction.frame->this);
+ }
+}
int
-afr_internal_lock_finish (call_frame_t *frame, xlator_t *this)
+afr_internal_lock_finish(call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- priv = this->private;
- local = frame->local;
-
- if (__fop_changelog_needed (frame, this)) {
- afr_changelog_pre_op (frame, this);
+ afr_local_t *local = frame->local;
+ afr_lock_t *lock = NULL;
+
+ local->internal_lock.lock_cbk = NULL;
+ if (!local->transaction.eager_lock_on) {
+ if (local->internal_lock.lock_op_ret < 0) {
+ afr_transaction_done(frame, this);
+ return 0;
+ }
+ afr_changelog_pre_op(frame, this);
+ } else {
+ lock = &local->inode_ctx->lock[local->transaction.type];
+ if (local->internal_lock.lock_op_ret < 0) {
+ afr_handle_lock_acquire_failure(local);
} else {
- __mark_all_success (local->pending, priv->child_count,
- local->transaction.type);
-
+ lock->event_generation = local->event_generation;
+ afr_changelog_pre_op(frame, this);
+ }
+ }
- /* Perform fops with the lk-owner from top xlator.
- * Eg: lk-owner of posix-lk and flush should be same,
- * flush cant clear the posix-lks without that lk-owner.
- */
- afr_save_lk_owner (frame);
- frame->root->lk_owner =
- local->transaction.main_frame->root->lk_owner;
+ return 0;
+}
- local->transaction.fop (frame, this);
+gf_boolean_t
+afr_are_conflicting_ops_waiting(afr_local_t *local, xlator_t *this)
+{
+ afr_lock_t *lock = NULL;
+ lock = &local->inode_ctx->lock[local->transaction.type];
+
+ /* Lets say mount1 has eager-lock(full-lock) and after the eager-lock
+ * is taken mount2 opened the same file, it won't be able to
+ * perform any {meta,}data operations until mount1 releases eager-lock.
+ * To avoid such scenario do not enable eager-lock for this transaction
+ * if open-fd-count is > 1 for metadata transactions and if num-inodelks > 1
+ * for data transactions
+ */
+
+ if (local->transaction.type == AFR_METADATA_TRANSACTION) {
+ if (local->inode_ctx->open_fd_count > 1) {
+ return _gf_true;
+ }
+ } else if (local->transaction.type == AFR_DATA_TRANSACTION) {
+ if (lock->num_inodelks > 1) {
+ return _gf_true;
}
+ }
- return 0;
+ return _gf_false;
}
+gf_boolean_t
+afr_is_delayed_changelog_post_op_needed(call_frame_t *frame, xlator_t *this,
+ int delay)
+{
+ afr_local_t *local = NULL;
+ afr_lock_t *lock = NULL;
+ gf_boolean_t res = _gf_false;
+
+ local = frame->local;
+ lock = &local->inode_ctx->lock[local->transaction.type];
+
+ if (!afr_txn_nothing_failed(frame, this)) {
+ lock->release = _gf_true;
+ goto out;
+ }
+
+ if (afr_are_conflicting_ops_waiting(local, this)) {
+ lock->release = _gf_true;
+ goto out;
+ }
+
+ if (!list_empty(&lock->owners))
+ goto out;
+ else
+ GF_ASSERT(list_empty(&lock->waiting));
+
+ if (lock->release) {
+ goto out;
+ }
+
+ if (!delay) {
+ goto out;
+ }
+
+ if (local->transaction.disable_delayed_post_op) {
+ goto out;
+ }
+
+ if ((local->op != GF_FOP_WRITE) && (local->op != GF_FOP_FXATTROP) &&
+ (local->op != GF_FOP_FSYNC)) {
+ /*Only allow writes/fsyncs but shard does [f]xattrops on writes, so
+ * they are fine too*/
+ goto out;
+ }
+
+ res = _gf_true;
+out:
+ return res;
+}
void
-afr_set_delayed_post_op (call_frame_t *frame, xlator_t *this)
+afr_delayed_changelog_wake_up_cbk(void *data)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- /* call this function from any of the related optimizations
- which benefit from delaying post op are enabled, namely:
+ afr_lock_t *lock = NULL;
+ afr_local_t *local = data;
+ afr_local_t *timer_local = NULL;
+ struct list_head shared;
+
+ INIT_LIST_HEAD(&shared);
+ lock = &local->inode_ctx->lock[local->transaction.type];
+ LOCK(&local->inode->lock);
+ {
+ timer_local = list_entry(lock->post_op.next, afr_local_t,
+ transaction.owner_list);
+ if (list_empty(&lock->owners) && (local == timer_local)) {
+ GF_ASSERT(list_empty(&lock->waiting));
+ /*Last owner*/
+ lock->release = _gf_true;
+ lock->delay_timer = NULL;
+ }
+ }
+ UNLOCK(&local->inode->lock);
+ afr_changelog_post_op_now(local->transaction.frame,
+ local->transaction.frame->this);
+}
- - changelog piggybacking
- - eager locking
- */
+/* SET operation */
+int
+afr_fd_report_unstable_write(xlator_t *this, afr_local_t *local)
+{
+ LOCK(&local->inode->lock);
+ {
+ local->inode_ctx->witnessed_unstable_write = _gf_true;
+ }
+ UNLOCK(&local->inode->lock);
- priv = this->private;
- if (!priv)
- return;
+ return 0;
+}
- if (!priv->post_op_delay_secs)
- return;
+/* TEST and CLEAR operation */
+gf_boolean_t
+afr_fd_has_witnessed_unstable_write(xlator_t *this, inode_t *inode)
+{
+ afr_inode_ctx_t *ctx = NULL;
+ gf_boolean_t witness = _gf_false;
- local = frame->local;
- if (!local)
- return;
+ LOCK(&inode->lock);
+ {
+ (void)__afr_inode_ctx_get(this, inode, &ctx);
- if (!local->fd)
- return;
+ if (ctx->witnessed_unstable_write) {
+ witness = _gf_true;
+ ctx->witnessed_unstable_write = _gf_false;
+ }
+ }
+ UNLOCK(&inode->lock);
- if (local->op == GF_FOP_WRITE)
- local->delayed_post_op = _gf_true;
+ return witness;
}
-
-gf_boolean_t
-is_afr_delayed_changelog_post_op_needed (call_frame_t *frame, xlator_t *this)
+int
+afr_changelog_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
{
- afr_local_t *local = NULL;
- gf_boolean_t res = _gf_false;
+ afr_private_t *priv = NULL;
+ int child_index = (long)cookie;
+ int call_count = -1;
+ afr_local_t *local = NULL;
- local = frame->local;
- if (!local)
- goto out;
+ priv = this->private;
+ local = frame->local;
- if (!local->delayed_post_op)
- goto out;
+ if (op_ret != 0) {
+ /* Failure of fsync() is as good as failure of previous
+ write(). So treat it like one.
+ */
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, AFR_MSG_FSYNC_FAILED,
+ "fsync(%s) failed on subvolume %s. Transaction was %s",
+ uuid_utoa(local->fd->inode->gfid),
+ priv->children[child_index]->name, gf_fop_list[local->op]);
- res = _gf_true;
-out:
- return res;
-}
+ afr_transaction_fop_failed(frame, this, child_index);
+ }
+ call_count = afr_frame_return(frame);
-void
-afr_delayed_changelog_post_op (xlator_t *this, call_frame_t *frame, fd_t *fd);
+ if (call_count == 0)
+ afr_changelog_post_op_now(frame, this);
-void
-afr_delayed_changelog_wake_up_cbk (void *data)
+ return 0;
+}
+
+int
+afr_changelog_fsync(call_frame_t *frame, xlator_t *this)
{
- fd_t *fd = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
+ int call_count = 0;
+ afr_private_t *priv = NULL;
+ dict_t *xdata = NULL;
+ GF_UNUSED int ret = -1;
- fd = data;
+ local = frame->local;
+ priv = this->private;
- afr_delayed_changelog_wake_up (THIS, fd);
-}
+ call_count = AFR_COUNT(local->transaction.pre_op, priv->child_count);
+ if (!call_count) {
+ /* will go straight to unlock */
+ afr_changelog_post_op_now(frame, this);
+ return 0;
+ }
-void
-afr_delayed_changelog_post_op (xlator_t *this, call_frame_t *frame, fd_t *fd)
-{
- afr_fd_ctx_t *fd_ctx = NULL;
- call_frame_t *prev_frame = NULL;
- struct timeval delta = {0, };
- afr_private_t *priv = NULL;
-
- priv = this->private;
-
- fd_ctx = afr_fd_ctx_get (fd, this);
- if (!fd_ctx)
- return;
-
- delta.tv_sec = priv->post_op_delay_secs;
- delta.tv_usec = 0;
-
- pthread_mutex_lock (&fd_ctx->delay_lock);
- {
- prev_frame = fd_ctx->delay_frame;
- fd_ctx->delay_frame = NULL;
- if (fd_ctx->delay_timer)
- gf_timer_call_cancel (this->ctx, fd_ctx->delay_timer);
- fd_ctx->delay_timer = NULL;
- if (!frame)
- goto unlock;
- fd_ctx->delay_timer = gf_timer_call_after (this->ctx, delta,
- afr_delayed_changelog_wake_up_cbk,
- fd);
- fd_ctx->delay_frame = frame;
- }
-unlock:
- pthread_mutex_unlock (&fd_ctx->delay_lock);
+ local->call_count = call_count;
- if (prev_frame) {
- afr_changelog_post_op_now (prev_frame, this);
- }
-}
+ xdata = dict_new();
+ if (xdata) {
+ ret = dict_set_int32_sizen(xdata, "batch-fsync", 1);
+ ret = dict_set_str(xdata, GLUSTERFS_INTERNAL_FOP_KEY, "yes");
+ }
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->transaction.pre_op[i])
+ continue;
-void
-afr_changelog_post_op (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
+ STACK_WIND_COOKIE(frame, afr_changelog_fsync_cbk, (void *)(long)i,
+ priv->children[i], priv->children[i]->fops->fsync,
+ local->fd, 1, xdata);
+ if (!--call_count)
+ break;
+ }
- local = frame->local;
+ if (xdata)
+ dict_unref(xdata);
- if (is_afr_delayed_changelog_post_op_needed (frame, this))
- afr_delayed_changelog_post_op (this, frame, local->fd);
- else
- afr_changelog_post_op_now (frame, this);
+ return 0;
}
-
-void
-afr_delayed_changelog_wake_up (xlator_t *this, fd_t *fd)
+int
+afr_changelog_post_op_safe(call_frame_t *frame, xlator_t *this)
{
- afr_delayed_changelog_post_op (this, NULL, fd);
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (!local->fd || local->transaction.type != AFR_DATA_TRANSACTION) {
+ afr_changelog_post_op_now(frame, this);
+ return 0;
+ }
+
+ if (afr_changelog_pre_op_uninherit(frame, this) &&
+ afr_txn_nothing_failed(frame, this)) {
+ /* just detected that this post-op is about to
+ be optimized away as a new write() has
+ already piggybacked on this frame's changelog.
+ */
+ afr_changelog_post_op_now(frame, this);
+ return 0;
+ }
+
+ /* Calling afr_changelog_post_op_now() now will result in
+ issuing ->[f]xattrop().
+
+ Performing a hard POST-OP (->[f]xattrop() FOP) is a more
+ responsible operation that what it might appear on the surface.
+
+ The changelog of a file (in the xattr of the file on the server)
+ stores information (pending count) about the state of the file
+ on the OTHER server. This changelog is blindly trusted, and must
+ therefore be updated in such a way it remains trustworthy. This
+ implies that decrementing the pending count (essentially "clearing
+ the dirty flag") must be done STRICTLY after we are sure that the
+ operation on the other server has reached stable storage.
+
+ While the backend filesystem on that server will eventually flush
+ it to stable storage, we (being in userspace) have no mechanism
+ to get notified when the write became "stable".
+
+ This means we need take matter into our own hands and issue an
+ fsync() EVEN IF THE APPLICATION WAS PERFORMING UNSTABLE WRITES,
+ and get an acknowledgement for it. And we need to wait for the
+ fsync() acknowledgement before initiating the hard POST-OP.
+
+ However if the FD itself was opened in O_SYNC or O_DSYNC then
+ we are already guaranteed that the writes were made stable as
+ part of the FOP itself. The same holds true for NFS stable
+ writes which happen on an anonymous FD with O_DSYNC or O_SYNC
+ flag set in the writev() @flags param. For all other write types,
+ mark a flag in the fdctx whenever an unstable write is witnessed.
+ */
+
+ if (!afr_fd_has_witnessed_unstable_write(this, local->inode)) {
+ afr_changelog_post_op_now(frame, this);
+ return 0;
+ }
+
+ /* Check whether users want durability and perform fsync/post-op
+ * accordingly.
+ */
+ if (priv->ensure_durability) {
+ /* Time to fsync() */
+ afr_changelog_fsync(frame, this);
+ } else {
+ afr_changelog_post_op_now(frame, this);
+ }
+
+ return 0;
}
+void
+afr_changelog_post_op(call_frame_t *frame, xlator_t *this)
+{
+ struct timespec delta = {
+ 0,
+ };
+ afr_private_t *priv = NULL;
+ afr_local_t *local = frame->local;
+ afr_lock_t *lock = NULL;
+ gf_boolean_t post_op = _gf_true;
+ struct list_head shared;
+
+ priv = this->private;
+ delta.tv_sec = priv->post_op_delay_secs;
+ delta.tv_nsec = 0;
+
+ INIT_LIST_HEAD(&shared);
+ if (!local->transaction.eager_lock_on)
+ goto out;
+
+ lock = &local->inode_ctx->lock[local->transaction.type];
+ LOCK(&local->inode->lock);
+ {
+ list_del_init(&local->transaction.owner_list);
+ list_add(&local->transaction.owner_list, &lock->post_op);
+ __afr_transaction_wake_shared(local, &shared);
+
+ if (!afr_is_delayed_changelog_post_op_needed(frame, this,
+ delta.tv_sec)) {
+ if (list_empty(&lock->owners))
+ lock->release = _gf_true;
+ goto unlock;
+ }
-int
-afr_transaction_resume (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- fd_t *fd = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- priv = this->private;
- fd = local->fd;
-
- if (fd)
- /* The wake up needs to happen independent of
- what type of fop arrives here. If it was
- a write, then it has already inherited the
- lock and changelog. If it was not a write,
- then the presumption of the optimization (of
- optimizing for successive write operations)
- fails.
- */
- afr_delayed_changelog_wake_up (this, fd);
-
- afr_restore_lk_owner (frame);
-
- if (__fop_changelog_needed (frame, this)) {
- afr_changelog_post_op (frame, this);
+ GF_ASSERT(lock->delay_timer == NULL);
+ lock->delay_timer = gf_timer_call_after(
+ this->ctx, delta, afr_delayed_changelog_wake_up_cbk, local);
+ if (!lock->delay_timer) {
+ lock->release = _gf_true;
} else {
- if (afr_lock_server_count (priv, local->transaction.type) == 0) {
- local->transaction.done (frame, this);
- } else {
- int_lock->lock_cbk = local->transaction.done;
- afr_unlock (frame, this);
- }
+ post_op = _gf_false;
}
+ }
+unlock:
+ UNLOCK(&local->inode->lock);
- return 0;
+ if (!list_empty(&shared)) {
+ afr_lock_resume_shared(&shared);
+ }
+
+out:
+ if (post_op) {
+ if (!local->transaction.eager_lock_on || lock->release) {
+ afr_changelog_post_op_safe(frame, this);
+ } else {
+ afr_changelog_post_op_now(frame, this);
+ }
+ }
}
+int
+afr_transaction_resume(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ afr_restore_lk_owner(frame);
+
+ afr_handle_symmetric_errors(frame, this);
+
+ if (!local->pre_op_compat)
+ /* new mode, pre-op was done along
+ with OP */
+ afr_changelog_pre_op_update(frame, this);
+
+ afr_changelog_post_op(frame, this);
+
+ return 0;
+}
/**
* afr_transaction_fop_failed - inform that an fop failed
*/
void
-afr_transaction_fop_failed (call_frame_t *frame, xlator_t *this, int child_index)
+afr_transaction_fop_failed(call_frame_t *frame, xlator_t *this, int child_index)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
+ afr_local_t *local = NULL;
- local = frame->local;
- priv = this->private;
+ local = frame->local;
- __mark_child_dead (local->pending, priv->child_count,
- child_index, local->transaction.type);
+ local->transaction.failed_subvols[child_index] = 1;
}
-gf_boolean_t
-_does_transaction_conflict_with_delayed_post_op (call_frame_t *frame)
-{
- afr_local_t *local = frame->local;
- //check if it is going to compete with inode lock from the fd
- if (local->fd)
- return _gf_false;
- if ((local->transaction.type == AFR_DATA_TRANSACTION) ||
- (local->transaction.type == AFR_METADATA_TRANSACTION))
- return _gf_true;
+static gf_boolean_t
+__need_previous_lock_unlocked(afr_local_t *local)
+{
+ afr_lock_t *lock = NULL;
+
+ lock = &local->inode_ctx->lock[local->transaction.type];
+ if (!lock->acquired)
return _gf_false;
+ if (lock->acquired && lock->event_generation != local->event_generation)
+ return _gf_true;
+ return _gf_false;
}
-int
-afr_transaction (call_frame_t *frame, xlator_t *this, afr_transaction_type type)
+void
+__afr_eager_lock_handle(afr_local_t *local, gf_boolean_t *take_lock,
+ gf_boolean_t *do_pre_op, afr_local_t **timer_local)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- fd_t *fd = NULL;
+ afr_lock_t *lock = NULL;
+ afr_local_t *owner_local = NULL;
+ xlator_t *this = local->transaction.frame->this;
+
+ local->transaction.eager_lock_on = _gf_true;
+ afr_set_lk_owner(local->transaction.frame, this, local->inode);
+
+ lock = &local->inode_ctx->lock[local->transaction.type];
+ if (__need_previous_lock_unlocked(local)) {
+ if (!list_empty(&lock->owners)) {
+ lock->release = _gf_true;
+ } else if (lock->delay_timer) {
+ lock->release = _gf_true;
+ if (gf_timer_call_cancel(this->ctx, lock->delay_timer)) {
+ /* It will be put in frozen list
+ * in the code flow below*/
+ } else {
+ *timer_local = list_entry(lock->post_op.next, afr_local_t,
+ transaction.owner_list);
+ lock->delay_timer = NULL;
+ }
+ }
+ }
+
+ if (lock->release) {
+ list_add_tail(&local->transaction.wait_list, &lock->frozen);
+ *take_lock = _gf_false;
+ goto out;
+ }
+
+ if (lock->delay_timer) {
+ *take_lock = _gf_false;
+ if (gf_timer_call_cancel(this->ctx, lock->delay_timer)) {
+ list_add_tail(&local->transaction.wait_list, &lock->frozen);
+ } else {
+ *timer_local = list_entry(lock->post_op.next, afr_local_t,
+ transaction.owner_list);
+ afr_copy_inodelk_vars(&local->internal_lock,
+ &(*timer_local)->internal_lock, this, 0);
+ lock->delay_timer = NULL;
+ *do_pre_op = _gf_true;
+ list_add_tail(&local->transaction.owner_list, &lock->owners);
+ }
+ goto out;
+ }
+
+ if (!list_empty(&lock->owners)) {
+ if (!lock->acquired || afr_has_lock_conflict(local, _gf_true)) {
+ list_add_tail(&local->transaction.wait_list, &lock->waiting);
+ *take_lock = _gf_false;
+ goto out;
+ }
+ owner_local = list_entry(lock->owners.next, afr_local_t,
+ transaction.owner_list);
+ afr_copy_inodelk_vars(&local->internal_lock,
+ &owner_local->internal_lock, this, 0);
+ *take_lock = _gf_false;
+ *do_pre_op = _gf_true;
+ }
+
+ if (lock->acquired)
+ GF_ASSERT(!(*take_lock));
+ list_add_tail(&local->transaction.owner_list, &lock->owners);
+out:
+ return;
+}
- local = frame->local;
- priv = this->private;
+void
+afr_transaction_start(afr_local_t *local, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ gf_boolean_t take_lock = _gf_true;
+ gf_boolean_t do_pre_op = _gf_false;
+ afr_local_t *timer_local = NULL;
+
+ priv = this->private;
+
+ if (local->transaction.type != AFR_DATA_TRANSACTION &&
+ local->transaction.type != AFR_METADATA_TRANSACTION)
+ goto lock_phase;
+
+ if (!priv->eager_lock)
+ goto lock_phase;
+
+ LOCK(&local->inode->lock);
+ {
+ __afr_eager_lock_handle(local, &take_lock, &do_pre_op, &timer_local);
+ }
+ UNLOCK(&local->inode->lock);
+lock_phase:
+ if (!local->transaction.eager_lock_on) {
+ afr_set_lk_owner(local->transaction.frame, this,
+ local->transaction.frame->root);
+ }
+
+ if (take_lock) {
+ afr_lock(local->transaction.frame, this);
+ } else if (do_pre_op) {
+ afr_changelog_pre_op(local->transaction.frame, this);
+ }
+ /*Always call delayed_changelog_wake_up_cbk after calling pre-op above
+ * so that any inheriting can happen*/
+ if (timer_local)
+ afr_delayed_changelog_wake_up_cbk(timer_local);
+}
- afr_transaction_local_init (local, this);
+int
+afr_write_txn_refresh_done(call_frame_t *frame, xlator_t *this, int err)
+{
+ afr_local_t *local = frame->local;
+
+ if (err) {
+ AFR_SET_ERROR_AND_CHECK_SPLIT_BRAIN(-1, err);
+ goto fail;
+ }
+
+ afr_transaction_start(local, this);
+ return 0;
+fail:
+ local->transaction.unwind(frame, this);
+ AFR_STACK_DESTROY(frame);
+ return 0;
+}
- local->transaction.resume = afr_transaction_resume;
- local->transaction.type = type;
+int
+afr_transaction_lockee_init(call_frame_t *frame)
+{
+ afr_local_t *local = frame->local;
+ afr_internal_lock_t *int_lock = &local->internal_lock;
+ afr_private_t *priv = frame->this->private;
+ int ret = 0;
- if (local->fd && priv->eager_lock &&
- local->transaction.type == AFR_DATA_TRANSACTION)
- afr_set_lk_owner (frame, this, local->fd);
- else
- afr_set_lk_owner (frame, this, frame->root);
+ switch (local->transaction.type) {
+ case AFR_DATA_TRANSACTION:
+ case AFR_METADATA_TRANSACTION:
+ ret = afr_add_inode_lockee(local, priv->child_count);
+ break;
- if (_does_transaction_conflict_with_delayed_post_op (frame) &&
- local->loc.inode) {
- fd = fd_lookup (local->loc.inode, frame->root->pid);
- if (fd) {
- afr_delayed_changelog_wake_up (this, fd);
- fd_unref (fd);
+ case AFR_ENTRY_TRANSACTION:
+ case AFR_ENTRY_RENAME_TRANSACTION:
+ ret = afr_add_entry_lockee(local, &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret) {
+ goto out;
+ }
+ if (local->op == GF_FOP_RENAME) {
+ ret = afr_add_entry_lockee(
+ local, &local->transaction.new_parent_loc,
+ local->transaction.new_basename, priv->child_count);
+ if (ret) {
+ goto out;
}
- }
- if (afr_lock_server_count (priv, local->transaction.type) == 0) {
- afr_internal_lock_finish (frame, this);
- } else {
- afr_lock (frame, this);
- }
+ if (local->newloc.inode &&
+ IA_ISDIR(local->newloc.inode->ia_type)) {
+ ret = afr_add_entry_lockee(local, &local->newloc, NULL,
+ priv->child_count);
+ if (ret) {
+ goto out;
+ }
+ }
+ } else if (local->op == GF_FOP_RMDIR) {
+ ret = afr_add_entry_lockee(local, &local->loc, NULL,
+ priv->child_count);
+ if (ret) {
+ goto out;
+ }
+ }
+
+ if (int_lock->lockee_count > 1) {
+ qsort(int_lock->lockee, int_lock->lockee_count,
+ sizeof(*int_lock->lockee), afr_entry_lockee_cmp);
+ }
+ break;
+ }
+out:
+ return ret;
+}
- return 0;
+int
+afr_transaction(call_frame_t *frame, xlator_t *this, afr_transaction_type type)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int ret = -1;
+ int event_generation = 0;
+
+ local = frame->local;
+ priv = this->private;
+ local->transaction.frame = frame;
+
+ local->transaction.type = type;
+
+ if (priv->quorum_count && !afr_has_quorum(local->child_up, this, NULL)) {
+ ret = -afr_quorum_errno(priv);
+ goto out;
+ }
+
+ if (!afr_is_consistent_io_possible(local, priv, &ret)) {
+ ret = -ret; /*op_errno to ret conversion*/
+ goto out;
+ }
+
+ if (priv->thin_arbiter_count && !afr_ta_has_quorum(priv, local)) {
+ ret = -afr_quorum_errno(priv);
+ goto out;
+ }
+
+ ret = afr_transaction_local_init(local, this);
+ if (ret < 0)
+ goto out;
+
+ ret = afr_transaction_lockee_init(frame);
+ if (ret)
+ goto out;
+
+ if (type != AFR_METADATA_TRANSACTION) {
+ goto txn_start;
+ }
+
+ ret = afr_inode_get_readable(frame, local->inode, this, local->readable,
+ &event_generation, type);
+ if (ret < 0 ||
+ afr_is_inode_refresh_reqd(local->inode, this, priv->event_generation,
+ event_generation)) {
+ afr_inode_refresh(frame, this, local->inode, local->loc.gfid,
+ afr_write_txn_refresh_done);
+ ret = 0;
+ goto out;
+ }
+
+txn_start:
+ ret = 0;
+ afr_transaction_start(local, this);
+out:
+ return ret;
}
diff --git a/xlators/cluster/afr/src/afr-transaction.h b/xlators/cluster/afr/src/afr-transaction.h
index 0f1f054059e..beefa26f4a6 100644
--- a/xlators/cluster/afr/src/afr-transaction.h
+++ b/xlators/cluster/afr/src/afr-transaction.h
@@ -11,24 +11,65 @@
#ifndef __TRANSACTION_H__
#define __TRANSACTION_H__
+#include "afr.h"
+
void
-afr_transaction_fop_failed (call_frame_t *frame, xlator_t *this,
- int child_index);
+afr_transaction_fop_failed(call_frame_t *frame, xlator_t *this,
+ int child_index);
+
+int32_t
+afr_transaction(call_frame_t *frame, xlator_t *this, afr_transaction_type type);
int
-afr_lock_server_count (afr_private_t *priv, afr_transaction_type type);
+afr_set_pending_dict(afr_private_t *priv, dict_t *xattr, int32_t **pending);
-int32_t
-afr_transaction (call_frame_t *frame, xlator_t *this, afr_transaction_type type);
+void
+afr_delayed_changelog_wake_up(xlator_t *this, fd_t *fd);
+
+void
+__mark_all_success(call_frame_t *frame, xlator_t *this);
+
+gf_boolean_t
+afr_txn_nothing_failed(call_frame_t *frame, xlator_t *this);
+
+int
+afr_read_txn(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ afr_read_txn_wind_t readfn, afr_transaction_type type);
-afr_fd_ctx_t *
-afr_fd_ctx_get (fd_t *fd, xlator_t *this);
int
-afr_set_pending_dict (afr_private_t *priv, dict_t *xattr, int32_t **pending);
+afr_read_txn_continue(call_frame_t *frame, xlator_t *this, int subvol);
+
void
-afr_set_delayed_post_op (call_frame_t *frame, xlator_t *this);
+afr_pending_read_increment(afr_private_t *priv, int child_index);
void
-afr_delayed_changelog_wake_up (xlator_t *this, fd_t *fd);
+afr_pending_read_decrement(afr_private_t *priv, int child_index);
+call_frame_t *
+afr_transaction_detach_fop_frame(call_frame_t *frame);
+gf_boolean_t
+afr_has_quorum(unsigned char *subvols, xlator_t *this, call_frame_t *frame);
+gf_boolean_t
+afr_needs_changelog_update(afr_local_t *local);
+void
+afr_zero_fill_stat(afr_local_t *local);
+
+void
+afr_pick_error_xdata(afr_local_t *local, afr_private_t *priv, inode_t *inode1,
+ unsigned char *readable1, inode_t *inode2,
+ unsigned char *readable2);
+int
+afr_transaction_resume(call_frame_t *frame, xlator_t *this);
+
+int
+afr_lock(call_frame_t *frame, xlator_t *this);
+
+void
+afr_delayed_changelog_wake_up_cbk(void *data);
+
+int
+afr_release_notify_lock_for_ta(void *opaque);
+
+int
+afr_ta_lock_release_done(int ret, call_frame_t *ta_frame, void *opaque);
#endif /* __TRANSACTION_H__ */
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index 8c7f452dcd9..df7366f0a65 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -15,607 +15,1330 @@
#include <stdlib.h>
#include <signal.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
#include "afr-common.c"
-
-#define SHD_INODE_LRU_LIMIT 2048
-#define AFR_EH_HEALED_LIMIT 1024
-#define AFR_EH_HEAL_FAIL_LIMIT 1024
-#define AFR_EH_SPLIT_BRAIN_LIMIT 1024
+#include "afr-messages.h"
struct volume_options options[];
+static char *afr_favorite_child_policies[AFR_FAV_CHILD_POLICY_MAX + 1] = {
+ [AFR_FAV_CHILD_NONE] = "none",
+ [AFR_FAV_CHILD_BY_SIZE] = "size",
+ [AFR_FAV_CHILD_BY_CTIME] = "ctime",
+ [AFR_FAV_CHILD_BY_MTIME] = "mtime",
+ [AFR_FAV_CHILD_BY_MAJORITY] = "majority",
+ [AFR_FAV_CHILD_POLICY_MAX] = NULL,
+};
+
int32_t
-notify (xlator_t *this, int32_t event,
- void *data, ...)
+notify(xlator_t *this, int32_t event, void *data, ...)
{
- int ret = -1;
- va_list ap;
- void *data2 = NULL;
+ int ret = -1;
+ va_list ap;
+ void *data2 = NULL;
- va_start (ap, data);
- data2 = va_arg (ap, dict_t*);
- va_end (ap);
- ret = afr_notify (this, event, data, data2);
+ va_start(ap, data);
+ data2 = va_arg(ap, dict_t *);
+ va_end(ap);
+ ret = afr_notify(this, event, data, data2);
- return ret;
+ return ret;
}
int32_t
-mem_acct_init (xlator_t *this)
+mem_acct_init(xlator_t *this)
{
- int ret = -1;
+ int ret = -1;
- if (!this)
- return ret;
-
- ret = xlator_mem_acct_init (this, gf_afr_mt_end + 1);
+ if (!this)
+ return ret;
- if (ret != 0) {
- gf_log(this->name, GF_LOG_ERROR, "Memory accounting init"
- "failed");
- return ret;
- }
+ ret = xlator_mem_acct_init(this, gf_afr_mt_end + 1);
+ if (ret != 0) {
return ret;
-}
+ }
+ return ret;
+}
int
-xlator_subvolume_index (xlator_t *this, xlator_t *subvol)
+xlator_subvolume_index(xlator_t *this, xlator_t *subvol)
{
- int index = -1;
- int i = 0;
- xlator_list_t *list = NULL;
-
- list = this->children;
-
- while (list) {
- if (subvol == list->xlator ||
- strcmp (subvol->name, list->xlator->name) == 0) {
- index = i;
- break;
- }
- list = list->next;
- i++;
+ int index = -1;
+ int i = 0;
+ xlator_list_t *list = NULL;
+
+ list = this->children;
+
+ while (list) {
+ if (subvol == list->xlator ||
+ strcmp(subvol->name, list->xlator->name) == 0) {
+ index = i;
+ break;
}
+ list = list->next;
+ i++;
+ }
- return index;
+ return index;
}
-void
-fix_quorum_options (xlator_t *this, afr_private_t *priv, char *qtype)
+static void
+fix_quorum_options(xlator_t *this, afr_private_t *priv, char *qtype,
+ dict_t *options)
{
- if (priv->quorum_count && strcmp(qtype,"fixed")) {
- gf_log(this->name,GF_LOG_WARNING,
- "quorum-type %s overriding quorum-count %u",
- qtype, priv->quorum_count);
- }
- if (!strcmp(qtype,"none")) {
- priv->quorum_count = 0;
- }
- else if (!strcmp(qtype,"auto")) {
- priv->quorum_count = AFR_QUORUM_AUTO;
- }
+ if (dict_get_sizen(options, "quorum-type") == NULL) {
+ /* If user doesn't configure anything enable auto-quorum if the
+ * replica has more than two subvolumes */
+ if (priv->child_count > 2)
+ qtype = "auto";
+ }
+
+ if (priv->quorum_count && strcmp(qtype, "fixed")) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_QUORUM_OVERRIDE,
+ "quorum-type %s overriding quorum-count %u", qtype,
+ priv->quorum_count);
+ }
+
+ if (!strcmp(qtype, "none")) {
+ priv->quorum_count = 0;
+ } else if (!strcmp(qtype, "auto")) {
+ priv->quorum_count = AFR_QUORUM_AUTO;
+ }
}
int
-reconfigure (xlator_t *this, dict_t *options)
+afr_set_favorite_child_policy(afr_private_t *priv, char *policy)
{
- afr_private_t *priv = NULL;
- xlator_t *read_subvol = NULL;
- int ret = -1;
- int index = -1;
- char *qtype = NULL;
+ int index = -1;
- priv = this->private;
+ index = gf_get_index_by_elem(afr_favorite_child_policies, policy);
+ if (index < 0 || index >= AFR_FAV_CHILD_POLICY_MAX)
+ return -1;
- GF_OPTION_RECONF ("background-self-heal-count",
- priv->background_self_heal_count, options, uint32,
- out);
+ priv->fav_child_policy = index;
- GF_OPTION_RECONF ("metadata-self-heal",
- priv->metadata_self_heal, options, bool, out);
+ return 0;
+}
- GF_OPTION_RECONF ("data-self-heal", priv->data_self_heal, options, str,
- out);
+static void
+set_data_self_heal_algorithm(afr_private_t *priv, char *algo)
+{
+ if (!algo) {
+ priv->data_self_heal_algorithm = AFR_SELFHEAL_DATA_DYNAMIC;
+ } else if (strcmp(algo, "full") == 0) {
+ priv->data_self_heal_algorithm = AFR_SELFHEAL_DATA_FULL;
+ } else if (strcmp(algo, "diff") == 0) {
+ priv->data_self_heal_algorithm = AFR_SELFHEAL_DATA_DIFF;
+ } else {
+ priv->data_self_heal_algorithm = AFR_SELFHEAL_DATA_DYNAMIC;
+ }
+}
- GF_OPTION_RECONF ("entry-self-heal", priv->entry_self_heal, options,
- bool, out);
+void
+afr_handle_anon_inode_options(afr_private_t *priv, dict_t *options)
+{
+ char *volfile_id_str = NULL;
+ uuid_t anon_inode_gfid = {0};
+
+ /*If volume id is not present don't enable anything*/
+ if (dict_get_str(options, "volume-id", &volfile_id_str))
+ return;
+ GF_ASSERT(strlen(AFR_ANON_DIR_PREFIX) + strlen(volfile_id_str) <= NAME_MAX);
+ /*anon_inode_name is not supposed to change once assigned*/
+ if (!priv->anon_inode_name[0]) {
+ snprintf(priv->anon_inode_name, sizeof(priv->anon_inode_name), "%s-%s",
+ AFR_ANON_DIR_PREFIX, volfile_id_str);
+ gf_uuid_parse(volfile_id_str, anon_inode_gfid);
+ /*Flip a bit to make sure volfile-id and anon-gfid are not same*/
+ anon_inode_gfid[0] ^= 1;
+ uuid_utoa_r(anon_inode_gfid, priv->anon_gfid_str);
+ }
+}
- GF_OPTION_RECONF ("strict-readdir", priv->strict_readdir, options, bool,
- out);
+int
+reconfigure(xlator_t *this, dict_t *options)
+{
+ afr_private_t *priv = NULL;
+ xlator_t *read_subvol = NULL;
+ int read_subvol_index = -1;
+ int timeout_old = 0;
+ int ret = -1;
+ int index = -1;
+ char *qtype = NULL;
+ char *fav_child_policy = NULL;
+ char *data_self_heal = NULL;
+ char *data_self_heal_algorithm = NULL;
+ char *locking_scheme = NULL;
+ gf_boolean_t consistent_io = _gf_false;
+ gf_boolean_t choose_local_old = _gf_false;
+ gf_boolean_t enabled_old = _gf_false;
- GF_OPTION_RECONF ("data-self-heal-window-size",
- priv->data_self_heal_window_size, options,
- uint32, out);
+ priv = this->private;
- GF_OPTION_RECONF ("data-change-log", priv->data_change_log, options,
- bool, out);
+ GF_OPTION_RECONF("metadata-splitbrain-forced-heal",
+ priv->metadata_splitbrain_forced_heal, options, bool, out);
- GF_OPTION_RECONF ("metadata-change-log",
- priv->metadata_change_log, options, bool, out);
+ GF_OPTION_RECONF("background-self-heal-count",
+ priv->background_self_heal_count, options, uint32, out);
- GF_OPTION_RECONF ("entry-change-log", priv->entry_change_log, options,
- bool, out);
+ GF_OPTION_RECONF("heal-wait-queue-length", priv->heal_wait_qlen, options,
+ uint32, out);
- GF_OPTION_RECONF ("data-self-heal-algorithm",
- priv->data_self_heal_algorithm, options, str, out);
+ GF_OPTION_RECONF("metadata-self-heal", priv->metadata_self_heal, options,
+ bool, out);
- GF_OPTION_RECONF ("self-heal-daemon", priv->shd.enabled, options, bool, out);
+ GF_OPTION_RECONF("data-self-heal", data_self_heal, options, str, out);
+ if (gf_string2boolean(data_self_heal, &priv->data_self_heal) == -1)
+ goto out;
- GF_OPTION_RECONF ("read-subvolume", read_subvol, options, xlator, out);
+ GF_OPTION_RECONF("entry-self-heal", priv->entry_self_heal, options, bool,
+ out);
- if (read_subvol) {
- index = xlator_subvolume_index (this, read_subvol);
- if (index == -1) {
- gf_log (this->name, GF_LOG_ERROR, "%s not a subvolume",
- read_subvol->name);
- goto out;
- }
- priv->read_child = index;
- }
+ GF_OPTION_RECONF("data-self-heal-window-size",
+ priv->data_self_heal_window_size, options, uint32, out);
- GF_OPTION_RECONF ("eager-lock", priv->eager_lock, options, bool, out);
- GF_OPTION_RECONF ("quorum-type", qtype, options, str, out);
- GF_OPTION_RECONF ("quorum-count", priv->quorum_count, options,
- uint32, out);
- fix_quorum_options(this,priv,qtype);
+ GF_OPTION_RECONF("data-self-heal-algorithm", data_self_heal_algorithm,
+ options, str, out);
+ set_data_self_heal_algorithm(priv, data_self_heal_algorithm);
- GF_OPTION_RECONF ("post-op-delay-secs", priv->post_op_delay_secs, options,
- uint32, out);
- GF_OPTION_RECONF ("readdir-failover", priv->readdir_failover, options,
- bool, out);
- ret = 0;
-out:
- return ret;
+ GF_OPTION_RECONF("halo-enabled", priv->halo_enabled, options, bool, out);
-}
+ GF_OPTION_RECONF("halo-shd-max-latency", priv->shd.halo_max_latency_msec,
+ options, uint32, out);
+ GF_OPTION_RECONF("halo-nfsd-max-latency", priv->nfsd.halo_max_latency_msec,
+ options, uint32, out);
-static const char *favorite_child_warning_str = "You have specified subvolume '%s' "
- "as the 'favorite child'. This means that if a discrepancy in the content "
- "or attributes (ownership, permission, etc.) of a file is detected among "
- "the subvolumes, the file on '%s' will be considered the definitive "
- "version and its contents will OVERWRITE the contents of the file on other "
- "subvolumes. All versions of the file except that on '%s' "
- "WILL BE LOST.";
+ GF_OPTION_RECONF("halo-max-latency", priv->halo_max_latency_msec, options,
+ uint32, out);
+ GF_OPTION_RECONF("halo-max-replicas", priv->halo_max_replicas, options,
+ uint32, out);
-int32_t
-init (xlator_t *this)
-{
- afr_private_t *priv = NULL;
- int child_count = 0;
- xlator_list_t *trav = NULL;
- int i = 0;
- int ret = -1;
- GF_UNUSED int op_errno = 0;
- xlator_t *read_subvol = NULL;
- xlator_t *fav_child = NULL;
- char *qtype = NULL;
-
- if (!this->children) {
- gf_log (this->name, GF_LOG_ERROR,
- "replicate translator needs more than one "
- "subvolume defined.");
- return -1;
- }
+ GF_OPTION_RECONF("halo-min-replicas", priv->halo_min_replicas, options,
+ uint32, out);
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "Volume is dangling.");
- }
+ GF_OPTION_RECONF("read-subvolume", read_subvol, options, xlator, out);
- this->private = GF_CALLOC (1, sizeof (afr_private_t),
- gf_afr_mt_afr_private_t);
- if (!this->private)
- goto out;
+ choose_local_old = priv->choose_local;
+ GF_OPTION_RECONF("choose-local", priv->choose_local, options, bool, out);
- priv = this->private;
- LOCK_INIT (&priv->lock);
- LOCK_INIT (&priv->read_child_lock);
- //lock recovery is not done in afr
- pthread_mutex_init (&priv->mutex, NULL);
- INIT_LIST_HEAD (&priv->saved_fds);
+ if (choose_local_old != priv->choose_local) {
+ priv->read_child = -1;
+ if (choose_local_old == _gf_false)
+ priv->did_discovery = _gf_false;
+ }
+
+ GF_OPTION_RECONF("read-hash-mode", priv->hash_mode, options, uint32, out);
+
+ if (read_subvol) {
+ index = xlator_subvolume_index(this, read_subvol);
+ if (index == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_SUBVOL,
+ "%s not a subvolume", read_subvol->name);
+ goto out;
+ }
+ priv->read_child = index;
+ }
+
+ GF_OPTION_RECONF("read-subvolume-index", read_subvol_index, options, int32,
+ out);
+
+ if (read_subvol_index > -1) {
+ index = read_subvol_index;
+ if (index >= priv->child_count) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_SUBVOL,
+ "%d not a subvolume-index", index);
+ goto out;
+ }
+ priv->read_child = index;
+ }
+
+ GF_OPTION_RECONF("pre-op-compat", priv->pre_op_compat, options, bool, out);
+ GF_OPTION_RECONF("locking-scheme", locking_scheme, options, str, out);
+ priv->granular_locks = (strcmp(locking_scheme, "granular") == 0);
+ GF_OPTION_RECONF("full-lock", priv->full_lock, options, bool, out);
+ GF_OPTION_RECONF("granular-entry-heal", priv->esh_granular, options, bool,
+ out);
+
+ GF_OPTION_RECONF("eager-lock", priv->eager_lock, options, bool, out);
+ GF_OPTION_RECONF("optimistic-change-log", priv->optimistic_change_log,
+ options, bool, out);
+ GF_OPTION_RECONF("quorum-type", qtype, options, str, out);
+ GF_OPTION_RECONF("quorum-count", priv->quorum_count, options, uint32, out);
+ fix_quorum_options(this, priv, qtype, options);
+ if (priv->quorum_count && !afr_has_quorum(priv->child_up, this, NULL))
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_QUORUM_FAIL,
+ "Client-quorum is not met");
+
+ GF_OPTION_RECONF("post-op-delay-secs", priv->post_op_delay_secs, options,
+ uint32, out);
+
+ GF_OPTION_RECONF(AFR_SH_READDIR_SIZE_KEY, priv->sh_readdir_size, options,
+ size_uint64, out);
+ /* Reset this so we re-discover in case the topology changed. */
+ GF_OPTION_RECONF("ensure-durability", priv->ensure_durability, options,
+ bool, out);
+
+ enabled_old = priv->shd.enabled;
+ GF_OPTION_RECONF("self-heal-daemon", priv->shd.enabled, options, bool, out);
+
+ GF_OPTION_RECONF("iam-self-heal-daemon", priv->shd.iamshd, options, bool,
+ out);
+
+ timeout_old = priv->shd.timeout;
+ GF_OPTION_RECONF("heal-timeout", priv->shd.timeout, options, int32, out);
+
+ GF_OPTION_RECONF("consistent-metadata", priv->consistent_metadata, options,
+ bool, out);
+
+ GF_OPTION_RECONF("shd-max-threads", priv->shd.max_threads, options, uint32,
+ out);
+
+ GF_OPTION_RECONF("shd-wait-qlength", priv->shd.wait_qlength, options,
+ uint32, out);
+
+ GF_OPTION_RECONF("favorite-child-policy", fav_child_policy, options, str,
+ out);
+ if (afr_set_favorite_child_policy(priv, fav_child_policy) == -1)
+ goto out;
+
+ priv->did_discovery = _gf_false;
+
+ GF_OPTION_RECONF("consistent-io", consistent_io, options, bool, out);
+ if (priv->quorum_count != 0)
+ consistent_io = _gf_false;
+ priv->consistent_io = consistent_io;
+
+ afr_handle_anon_inode_options(priv, options);
+
+ GF_OPTION_RECONF("use-anonymous-inode", priv->use_anon_inode, options, bool,
+ out);
+ if (priv->shd.enabled) {
+ if ((priv->shd.enabled != enabled_old) ||
+ (timeout_old != priv->shd.timeout))
+ afr_selfheal_childup(this, priv);
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
- child_count = xlator_subvolume_count (this);
+static int
+afr_pending_xattrs_init(afr_private_t *priv, xlator_t *this)
+{
+ int ret = -1;
+ int i = 0;
+ char *ptr = NULL;
+ char *ptr1 = NULL;
+ char *xattrs_list = NULL;
+ xlator_list_t *trav = NULL;
+ int child_count = -1;
+
+ trav = this->children;
+ child_count = priv->child_count;
+ if (priv->thin_arbiter_count) {
+ /* priv->pending_key[THIN_ARBITER_BRICK_INDEX] is used as the
+ * name of the thin arbiter file for persistence across add/
+ * removal of DHT subvols.*/
+ child_count++;
+ }
+
+ GF_OPTION_INIT("afr-pending-xattr", xattrs_list, str, out);
+ priv->pending_key = GF_CALLOC(sizeof(*priv->pending_key), child_count,
+ gf_afr_mt_char);
+ if (!priv->pending_key) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ if (!xattrs_list) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_NO_CHANGELOG,
+ "Unable to fetch afr-pending-xattr option from volfile."
+ " Falling back to using client translator names. ");
- priv->child_count = child_count;
+ while (i < child_count) {
+ ret = gf_asprintf(&priv->pending_key[i], "%s.%s", AFR_XATTR_PREFIX,
+ trav->xlator->name);
+ if (ret == -1) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ trav = trav->next;
+ i++;
+ }
+ ret = 0;
+ goto out;
+ }
+ ptr = ptr1 = gf_strdup(xattrs_list);
+ if (!ptr) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ for (i = 0, ptr = strtok(ptr, ","); ptr; ptr = strtok(NULL, ",")) {
+ ret = gf_asprintf(&priv->pending_key[i], "%s.%s", AFR_XATTR_PREFIX,
+ ptr);
+ if (ret == -1) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ i++;
+ }
+ ret = 0;
- priv->read_child = -1;
+out:
+ GF_FREE(ptr1);
+ return ret;
+}
- GF_OPTION_INIT ("read-subvolume", read_subvol, xlator, out);
- if (read_subvol) {
- priv->read_child = xlator_subvolume_index (this, read_subvol);
- if (priv->read_child == -1) {
- gf_log (this->name, GF_LOG_ERROR, "%s not a subvolume",
- read_subvol->name);
- goto out;
- }
- }
+void
+afr_ta_init(afr_private_t *priv)
+{
+ priv->thin_arbiter_count = 1;
+ priv->child_count--;
+ priv->ta_child_up = 0;
+ priv->ta_bad_child_index = AFR_CHILD_UNKNOWN;
+ priv->ta_notify_dom_lock_offset = 0;
+ priv->ta_in_mem_txn_count = 0;
+ priv->ta_on_wire_txn_count = 0;
+ priv->release_ta_notify_dom_lock = _gf_false;
+ INIT_LIST_HEAD(&priv->ta_waitq);
+ INIT_LIST_HEAD(&priv->ta_onwireq);
+ gf_uuid_clear(priv->ta_gfid);
+}
- priv->favorite_child = -1;
- GF_OPTION_INIT ("favorite-child", fav_child, xlator, out);
- if (fav_child) {
- priv->favorite_child = xlator_subvolume_index (this, fav_child);
- if (priv->favorite_child == -1) {
- gf_log (this->name, GF_LOG_ERROR, "%s not a subvolume",
- fav_child->name);
- goto out;
- }
- gf_log (this->name, GF_LOG_WARNING,
- favorite_child_warning_str, fav_child->name,
- fav_child->name, fav_child->name);
+int32_t
+init(xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ int child_count = 0;
+ xlator_list_t *trav = NULL;
+ int i = 0;
+ int ret = -1;
+ GF_UNUSED int op_errno = 0;
+ xlator_t *read_subvol = NULL;
+ int read_subvol_index = -1;
+ char *qtype = NULL;
+ char *fav_child_policy = NULL;
+ char *thin_arbiter = NULL;
+ char *data_self_heal = NULL;
+ char *locking_scheme = NULL;
+ char *data_self_heal_algorithm = NULL;
+
+ if (!this->children) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_CHILD_MISCONFIGURED,
+ "replicate translator needs more than one "
+ "subvolume defined.");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_VOL_MISCONFIGURED,
+ "Volume is dangling.");
+ }
+
+ this->private = GF_CALLOC(1, sizeof(afr_private_t),
+ gf_afr_mt_afr_private_t);
+ if (!this->private)
+ goto out;
+
+ priv = this->private;
+ INIT_LIST_HEAD(&priv->saved_locks);
+ INIT_LIST_HEAD(&priv->lk_healq);
+ LOCK_INIT(&priv->lock);
+
+ child_count = xlator_subvolume_count(this);
+
+ priv->child_count = child_count;
+
+ priv->read_child = -1;
+
+ GF_OPTION_INIT("arbiter-count", priv->arbiter_count, uint32, out);
+ GF_OPTION_INIT("thin-arbiter", thin_arbiter, str, out);
+ if (thin_arbiter && strlen(thin_arbiter) > 0) {
+ afr_ta_init(priv);
+ }
+ INIT_LIST_HEAD(&priv->healing);
+ INIT_LIST_HEAD(&priv->heal_waiting);
+
+ priv->spb_choice_timeout = AFR_DEFAULT_SPB_CHOICE_TIMEOUT;
+
+ GF_OPTION_INIT("afr-dirty-xattr", priv->afr_dirty, str, out);
+
+ GF_OPTION_INIT("metadata-splitbrain-forced-heal",
+ priv->metadata_splitbrain_forced_heal, bool, out);
+
+ GF_OPTION_INIT("read-subvolume", read_subvol, xlator, out);
+ if (read_subvol) {
+ priv->read_child = xlator_subvolume_index(this, read_subvol);
+ if (priv->read_child == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_SUBVOL,
+ "%s not a subvolume", read_subvol->name);
+ goto out;
+ }
+ }
+ GF_OPTION_INIT("read-subvolume-index", read_subvol_index, int32, out);
+ if (read_subvol_index > -1) {
+ if (read_subvol_index >= priv->child_count) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_SUBVOL,
+ "%d not a subvolume-index", read_subvol_index);
+ goto out;
}
+ priv->read_child = read_subvol_index;
+ }
+ GF_OPTION_INIT("choose-local", priv->choose_local, bool, out);
+ priv->pending_reads = GF_CALLOC(sizeof(*priv->pending_reads),
+ priv->child_count, gf_afr_mt_atomic_t);
- GF_OPTION_INIT ("background-self-heal-count",
- priv->background_self_heal_count, uint32, out);
+ GF_OPTION_INIT("read-hash-mode", priv->hash_mode, uint32, out);
- GF_OPTION_INIT ("data-self-heal", priv->data_self_heal, str, out);
+ priv->favorite_child = -1;
- GF_OPTION_INIT ("data-self-heal-algorithm",
- priv->data_self_heal_algorithm, str, out);
+ GF_OPTION_INIT("favorite-child-policy", fav_child_policy, str, out);
+ if (afr_set_favorite_child_policy(priv, fav_child_policy) == -1)
+ goto out;
- GF_OPTION_INIT ("data-self-heal-window-size",
- priv->data_self_heal_window_size, uint32, out);
+ GF_OPTION_INIT("shd-max-threads", priv->shd.max_threads, uint32, out);
- GF_OPTION_INIT ("metadata-self-heal", priv->metadata_self_heal, bool,
- out);
+ GF_OPTION_INIT("shd-wait-qlength", priv->shd.wait_qlength, uint32, out);
- GF_OPTION_INIT ("entry-self-heal", priv->entry_self_heal, bool, out);
+ GF_OPTION_INIT("background-self-heal-count",
+ priv->background_self_heal_count, uint32, out);
- GF_OPTION_INIT ("self-heal-daemon", priv->shd.enabled, bool, out);
+ GF_OPTION_INIT("heal-wait-queue-length", priv->heal_wait_qlen, uint32, out);
- GF_OPTION_INIT ("iam-self-heal-daemon", priv->shd.iamshd, bool, out);
+ GF_OPTION_INIT("data-self-heal", data_self_heal, str, out);
+ if (gf_string2boolean(data_self_heal, &priv->data_self_heal) == -1)
+ goto out;
- GF_OPTION_INIT ("data-change-log", priv->data_change_log, bool, out);
+ GF_OPTION_INIT("data-self-heal-algorithm", data_self_heal_algorithm, str,
+ out);
+ set_data_self_heal_algorithm(priv, data_self_heal_algorithm);
- GF_OPTION_INIT ("metadata-change-log", priv->metadata_change_log, bool,
- out);
+ GF_OPTION_INIT("data-self-heal-window-size",
+ priv->data_self_heal_window_size, uint32, out);
- GF_OPTION_INIT ("entry-change-log", priv->entry_change_log, bool, out);
+ GF_OPTION_INIT("metadata-self-heal", priv->metadata_self_heal, bool, out);
- GF_OPTION_INIT ("optimistic-change-log", priv->optimistic_change_log,
- bool, out);
+ GF_OPTION_INIT("entry-self-heal", priv->entry_self_heal, bool, out);
- GF_OPTION_INIT ("inodelk-trace", priv->inodelk_trace, bool, out);
+ GF_OPTION_INIT("halo-shd-max-latency", priv->shd.halo_max_latency_msec,
+ uint32, out);
- GF_OPTION_INIT ("entrylk-trace", priv->entrylk_trace, bool, out);
+ GF_OPTION_INIT("halo-max-latency", priv->halo_max_latency_msec, uint32,
+ out);
+ GF_OPTION_INIT("halo-max-replicas", priv->halo_max_replicas, uint32, out);
+ GF_OPTION_INIT("halo-min-replicas", priv->halo_min_replicas, uint32, out);
- GF_OPTION_INIT ("strict-readdir", priv->strict_readdir, bool, out);
+ GF_OPTION_INIT("halo-enabled", priv->halo_enabled, bool, out);
- GF_OPTION_INIT ("eager-lock", priv->eager_lock, bool, out);
- GF_OPTION_INIT ("quorum-type", qtype, str, out);
- GF_OPTION_INIT ("quorum-count", priv->quorum_count, uint32, out);
- fix_quorum_options(this,priv,qtype);
+ GF_OPTION_INIT("halo-nfsd-max-latency", priv->nfsd.halo_max_latency_msec,
+ uint32, out);
- GF_OPTION_INIT ("post-op-delay-secs", priv->post_op_delay_secs, uint32, out);
- GF_OPTION_INIT ("readdir-failover", priv->readdir_failover, bool, out);
+ GF_OPTION_INIT("iam-nfs-daemon", priv->nfsd.iamnfsd, bool, out);
- priv->wait_count = 1;
+ GF_OPTION_INIT("optimistic-change-log", priv->optimistic_change_log, bool,
+ out);
- priv->child_up = GF_CALLOC (sizeof (unsigned char), child_count,
- gf_afr_mt_char);
- if (!priv->child_up) {
- ret = -ENOMEM;
- goto out;
- }
+ GF_OPTION_INIT("pre-op-compat", priv->pre_op_compat, bool, out);
+ GF_OPTION_INIT("locking-scheme", locking_scheme, str, out);
+ priv->granular_locks = (strcmp(locking_scheme, "granular") == 0);
+ GF_OPTION_INIT("full-lock", priv->full_lock, bool, out);
+ GF_OPTION_INIT("granular-entry-heal", priv->esh_granular, bool, out);
- for (i = 0; i < child_count; i++)
- priv->child_up[i] = -1; /* start with unknown state.
- this initialization needed
- for afr_notify() to work
- reliably
- */
+ GF_OPTION_INIT("eager-lock", priv->eager_lock, bool, out);
+ GF_OPTION_INIT("quorum-type", qtype, str, out);
+ GF_OPTION_INIT("quorum-count", priv->quorum_count, uint32, out);
+ GF_OPTION_INIT(AFR_SH_READDIR_SIZE_KEY, priv->sh_readdir_size, size_uint64,
+ out);
+ fix_quorum_options(this, priv, qtype, this->options);
- priv->children = GF_CALLOC (sizeof (xlator_t *), child_count,
- gf_afr_mt_xlator_t);
- if (!priv->children) {
- ret = -ENOMEM;
- goto out;
- }
+ GF_OPTION_INIT("post-op-delay-secs", priv->post_op_delay_secs, uint32, out);
+ GF_OPTION_INIT("ensure-durability", priv->ensure_durability, bool, out);
- priv->pending_key = GF_CALLOC (sizeof (*priv->pending_key),
- child_count,
- gf_afr_mt_char);
- if (!priv->pending_key) {
- ret = -ENOMEM;
- goto out;
- }
+ GF_OPTION_INIT("self-heal-daemon", priv->shd.enabled, bool, out);
- trav = this->children;
- i = 0;
- while (i < child_count) {
- priv->children[i] = trav->xlator;
-
- ret = gf_asprintf (&priv->pending_key[i], "%s.%s",
- AFR_XATTR_PREFIX,
- trav->xlator->name);
- if (-1 == ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "asprintf failed to set pending key");
- ret = -ENOMEM;
- goto out;
- }
-
- trav = trav->next;
- i++;
- }
+ GF_OPTION_INIT("iam-self-heal-daemon", priv->shd.iamshd, bool, out);
+ GF_OPTION_INIT("heal-timeout", priv->shd.timeout, int32, out);
- priv->last_event = GF_CALLOC (child_count, sizeof (*priv->last_event),
- gf_afr_mt_int32_t);
- if (!priv->last_event) {
- ret = -ENOMEM;
- goto out;
- }
+ GF_OPTION_INIT("consistent-metadata", priv->consistent_metadata, bool, out);
+ GF_OPTION_INIT("consistent-io", priv->consistent_io, bool, out);
+ afr_handle_anon_inode_options(priv, this->options);
- /* keep more local here as we may need them for self-heal etc */
- this->local_pool = mem_pool_new (afr_local_t, 512);
- if (!this->local_pool) {
- ret = -1;
- gf_log (this->name, GF_LOG_ERROR,
- "failed to create local_t's memory pool");
- goto out;
- }
-
- priv->first_lookup = 1;
- priv->root_inode = NULL;
+ GF_OPTION_INIT("use-anonymous-inode", priv->use_anon_inode, bool, out);
+ if (priv->quorum_count != 0)
+ priv->consistent_io = _gf_false;
- if (!priv->shd.iamshd) {
- ret = 0;
- goto out;
- }
+ priv->wait_count = 1;
+ priv->local = GF_CALLOC(sizeof(unsigned char), child_count, gf_afr_mt_char);
+ if (!priv->local) {
ret = -ENOMEM;
- priv->shd.pos = GF_CALLOC (sizeof (*priv->shd.pos), child_count,
- gf_afr_mt_brick_pos_t);
- if (!priv->shd.pos)
- goto out;
+ goto out;
+ }
- priv->shd.pending = GF_CALLOC (sizeof (*priv->shd.pending), child_count,
- gf_afr_mt_int32_t);
- if (!priv->shd.pending)
- goto out;
+ priv->anon_inode = GF_CALLOC(sizeof(unsigned char), child_count,
+ gf_afr_mt_char);
- priv->shd.inprogress = GF_CALLOC (sizeof (*priv->shd.inprogress),
- child_count, gf_afr_mt_shd_bool_t);
- if (!priv->shd.inprogress)
- goto out;
- priv->shd.timer = GF_CALLOC (sizeof (*priv->shd.timer), child_count,
- gf_afr_mt_shd_timer_t);
- if (!priv->shd.timer)
- goto out;
+ priv->child_up = GF_CALLOC(sizeof(unsigned char), child_count,
+ gf_afr_mt_char);
- priv->shd.healed = eh_new (AFR_EH_HEALED_LIMIT, _gf_false);
- if (!priv->shd.healed)
- goto out;
+ priv->child_latency = GF_MALLOC(sizeof(*priv->child_latency) * child_count,
+ gf_afr_mt_child_latency_t);
+ priv->halo_child_up = GF_CALLOC(sizeof(unsigned char), child_count,
+ gf_afr_mt_char);
- priv->shd.heal_failed = eh_new (AFR_EH_HEAL_FAIL_LIMIT, _gf_false);
- if (!priv->shd.heal_failed)
- goto out;
+ if (!priv->child_up || !priv->child_latency || !priv->halo_child_up ||
+ !priv->anon_inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ /*Initialize to -ve ping timeout so that they are not considered
+ * in child-up events until ping-event comes*/
+ for (i = 0; i < child_count; i++)
+ priv->child_latency[i] = -1;
+
+ priv->children = GF_CALLOC(sizeof(xlator_t *), child_count,
+ gf_afr_mt_xlator_t);
+ if (!priv->children) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = afr_pending_xattrs_init(priv, this);
+ if (ret)
+ goto out;
+
+ trav = this->children;
+ i = 0;
+ while (i < child_count) {
+ priv->children[i] = trav->xlator;
+ trav = trav->next;
+ i++;
+ }
+
+ ret = gf_asprintf(&priv->sh_domain, AFR_SH_DATA_DOMAIN_FMT, this->name);
+ if (-1 == ret) {
+ ret = -ENOMEM;
+ goto out;
+ }
- priv->shd.split_brain = eh_new (AFR_EH_SPLIT_BRAIN_LIMIT, _gf_false);
- if (!priv->shd.split_brain)
- goto out;
+ priv->last_event = GF_CALLOC(child_count, sizeof(*priv->last_event),
+ gf_afr_mt_int32_t);
+ if (!priv->last_event) {
+ ret = -ENOMEM;
+ goto out;
+ }
- this->itable = inode_table_new (SHD_INODE_LRU_LIMIT, this);
- if (!this->itable)
- goto out;
- priv->root_inode = inode_ref (this->itable->root);
- GF_OPTION_INIT ("node-uuid", priv->shd.node_uuid, str, out);
+ this->itable = inode_table_new(SHD_INODE_LRU_LIMIT, this);
+ if (!this->itable) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ if (priv->shd.iamshd) {
+ ret = afr_selfheal_daemon_init(this);
+ if (ret) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
- ret = 0;
+ /* keep more local here as we may need them for self-heal etc */
+ this->local_pool = mem_pool_new(afr_local_t, 512);
+ if (!this->local_pool) {
+ ret = -1;
+ goto out;
+ }
+
+ priv->root_inode = NULL;
+
+ ret = 0;
out:
- return ret;
+ return ret;
+}
+void
+afr_destroy_healer_object(xlator_t *this, struct subvol_healer *healer)
+{
+ int ret = -1;
+
+ if (!healer)
+ return;
+
+ if (healer->running) {
+ /*
+ * If there are any resources to cleanup, We need
+ * to do that gracefully using pthread_cleanup_push
+ */
+ ret = gf_thread_cleanup_xint(healer->thread);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_SELF_HEAL_FAILED,
+ "Failed to clean up healer threads.");
+ healer->thread = 0;
+ }
+ pthread_cond_destroy(&healer->cond);
+ pthread_mutex_destroy(&healer->mutex);
}
-
-int
-fini (xlator_t *this)
+void
+afr_selfheal_daemon_fini(xlator_t *this)
+{
+ struct subvol_healer *healer = NULL;
+ afr_self_heald_t *shd = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+
+ priv = this->private;
+ if (!priv)
+ return;
+
+ shd = &priv->shd;
+ if (!shd->iamshd)
+ return;
+
+ for (i = 0; i < priv->child_count; i++) {
+ healer = &shd->index_healers[i];
+ afr_destroy_healer_object(this, healer);
+
+ healer = &shd->full_healers[i];
+ afr_destroy_healer_object(this, healer);
+
+ if (shd->statistics[i])
+ eh_destroy(shd->statistics[i]);
+ }
+ GF_FREE(shd->index_healers);
+ GF_FREE(shd->full_healers);
+ GF_FREE(shd->statistics);
+ if (shd->split_brain)
+ eh_destroy(shd->split_brain);
+}
+void
+fini(xlator_t *this)
{
- afr_private_t *priv = NULL;
+ afr_private_t *priv = NULL;
- priv = this->private;
- this->private = NULL;
- afr_priv_destroy (priv);
- if (this->itable);//I dont see any destroy func
+ priv = this->private;
- return 0;
-}
+ afr_selfheal_daemon_fini(this);
+ GF_ASSERT(list_empty(&priv->saved_locks));
+
+ LOCK(&priv->lock);
+ if (priv->timer != NULL) {
+ gf_timer_call_cancel(this->ctx, priv->timer);
+ priv->timer = NULL;
+ }
+ UNLOCK(&priv->lock);
+
+ if (this->local_pool != NULL) {
+ mem_pool_destroy(this->local_pool);
+ this->local_pool = NULL;
+ }
+ this->private = NULL;
+ afr_priv_destroy(priv);
+ if (this->itable) {
+ inode_table_destroy(this->itable);
+ this->itable = NULL;
+ }
+
+ return;
+}
struct xlator_fops fops = {
- .lookup = afr_lookup,
- .open = afr_open,
- .lk = afr_lk,
- .flush = afr_flush,
- .statfs = afr_statfs,
- .fsync = afr_fsync,
- .fsyncdir = afr_fsyncdir,
- .xattrop = afr_xattrop,
- .fxattrop = afr_fxattrop,
- .inodelk = afr_inodelk,
- .finodelk = afr_finodelk,
- .entrylk = afr_entrylk,
- .fentrylk = afr_fentrylk,
-
- /* inode read */
- .access = afr_access,
- .stat = afr_stat,
- .fstat = afr_fstat,
- .readlink = afr_readlink,
- .getxattr = afr_getxattr,
- .fgetxattr = afr_fgetxattr,
- .readv = afr_readv,
-
- /* inode write */
- .writev = afr_writev,
- .truncate = afr_truncate,
- .ftruncate = afr_ftruncate,
- .setxattr = afr_setxattr,
- .fsetxattr = afr_fsetxattr,
- .setattr = afr_setattr,
- .fsetattr = afr_fsetattr,
- .removexattr = afr_removexattr,
- .fremovexattr = afr_fremovexattr,
-
- /* dir read */
- .opendir = afr_opendir,
- .readdir = afr_readdir,
- .readdirp = afr_readdirp,
-
- /* dir write */
- .create = afr_create,
- .mknod = afr_mknod,
- .mkdir = afr_mkdir,
- .unlink = afr_unlink,
- .rmdir = afr_rmdir,
- .link = afr_link,
- .symlink = afr_symlink,
- .rename = afr_rename,
+ .lookup = afr_lookup,
+ .lk = afr_lk,
+ .flush = afr_flush,
+ .statfs = afr_statfs,
+ .fsyncdir = afr_fsyncdir,
+ .inodelk = afr_inodelk,
+ .finodelk = afr_finodelk,
+ .entrylk = afr_entrylk,
+ .fentrylk = afr_fentrylk,
+ .ipc = afr_ipc,
+ .lease = afr_lease,
+
+ /* inode read */
+ .access = afr_access,
+ .stat = afr_stat,
+ .fstat = afr_fstat,
+ .readlink = afr_readlink,
+ .getxattr = afr_getxattr,
+ .fgetxattr = afr_fgetxattr,
+ .readv = afr_readv,
+ .seek = afr_seek,
+
+ /* inode write */
+ .writev = afr_writev,
+ .truncate = afr_truncate,
+ .ftruncate = afr_ftruncate,
+ .setxattr = afr_setxattr,
+ .fsetxattr = afr_fsetxattr,
+ .setattr = afr_setattr,
+ .fsetattr = afr_fsetattr,
+ .removexattr = afr_removexattr,
+ .fremovexattr = afr_fremovexattr,
+ .fallocate = afr_fallocate,
+ .discard = afr_discard,
+ .zerofill = afr_zerofill,
+ .xattrop = afr_xattrop,
+ .fxattrop = afr_fxattrop,
+ .fsync = afr_fsync,
+
+ /*inode open*/
+ .opendir = afr_opendir,
+ .open = afr_open,
+
+ /* dir read */
+ .readdir = afr_readdir,
+ .readdirp = afr_readdirp,
+
+ /* dir write */
+ .create = afr_create,
+ .mknod = afr_mknod,
+ .mkdir = afr_mkdir,
+ .unlink = afr_unlink,
+ .rmdir = afr_rmdir,
+ .link = afr_link,
+ .symlink = afr_symlink,
+ .rename = afr_rename,
};
-
struct xlator_dumpops dumpops = {
- .priv = afr_priv_dump,
+ .priv = afr_priv_dump,
};
-
struct xlator_cbks cbks = {
- .release = afr_release,
- .releasedir = afr_releasedir,
- .forget = afr_forget,
+ .release = afr_release,
+ .releasedir = afr_releasedir,
+ .forget = afr_forget,
};
-
struct volume_options options[] = {
- { .key = {"read-subvolume" },
- .type = GF_OPTION_TYPE_XLATOR
- },
- { .key = {"favorite-child"},
- .type = GF_OPTION_TYPE_XLATOR
- },
- { .key = {"background-self-heal-count"},
- .type = GF_OPTION_TYPE_INT,
- .min = 0,
- .default_value = "16",
- },
- { .key = {"data-self-heal"},
- .type = GF_OPTION_TYPE_STR,
- .default_value = "",
- .value = {"1", "on", "yes", "true", "enable",
- "0", "off", "no", "false", "disable",
- "open"},
- .default_value = "on",
- },
- { .key = {"data-self-heal-algorithm"},
- .type = GF_OPTION_TYPE_STR,
- .default_value = "",
- .description = "Select between \"full\", \"diff\". The "
- "\"full\" algorithm copies the entire file from "
- "source to sink. The \"diff\" algorithm copies to "
- "sink only those blocks whose checksums don't match "
- "with those of source.",
- .value = { "diff", "full", "" }
- },
- { .key = {"data-self-heal-window-size"},
- .type = GF_OPTION_TYPE_INT,
- .min = 1,
- .max = 1024,
- .default_value = "1",
- .description = "Maximum number blocks per file for which self-heal "
- "process would be applied simultaneously."
- },
- { .key = {"metadata-self-heal"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "on",
- },
- { .key = {"entry-self-heal"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "on",
- },
- { .key = {"data-change-log"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "on",
- },
- { .key = {"metadata-change-log"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "on",
- },
- { .key = {"entry-change-log"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "on",
- },
- { .key = {"optimistic-change-log"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "on",
- },
- { .key = {"strict-readdir"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- },
- { .key = {"inodelk-trace"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- },
- { .key = {"entrylk-trace"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- },
- { .key = {"eager-lock"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- },
- { .key = {"self-heal-daemon"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- },
- { .key = {"iam-self-heal-daemon"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- },
- { .key = {"quorum-type"},
- .type = GF_OPTION_TYPE_STR,
- .value = { "none", "auto", "fixed", "" },
- .default_value = "none",
- .description = "If value is \"fixed\" only allow writes if "
- "quorum-count bricks are present. If value is "
- "\"auto\" only allow writes if more than half of "
- "bricks, or exactly half including the first, are "
- "present.",
- },
- { .key = {"quorum-count"},
- .type = GF_OPTION_TYPE_INT,
- .min = 1,
- .max = INT_MAX,
- .default_value = 0,
- .description = "If quorum-type is \"fixed\" only allow writes if "
- "this many bricks or present. Other quorum types "
- "will OVERWRITE this value.",
- },
- { .key = {"node-uuid"},
- .type = GF_OPTION_TYPE_STR,
- .description = "Local glusterd uuid string",
- },
- { .key = {"post-op-delay-secs"},
- .type = GF_OPTION_TYPE_INT,
- .min = 0,
- .max = INT_MAX,
- .default_value = "1",
- .description = "Time interval induced artificially before "
- "post-operation phase of the transaction to "
- "enhance overlap of adjacent write operations.",
- },
- { .key = {"readdir-failover"},
- .type = GF_OPTION_TYPE_BOOL,
- .description = "readdir(p) will not failover if this option is off",
- .default_value = "on",
- },
- { .key = {NULL} },
+ {.key = {"read-subvolume"},
+ .type = GF_OPTION_TYPE_XLATOR,
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "inode-read fops happen only on one of the bricks in "
+ "replicate. Afr will prefer the one specified using "
+ "this option if it is not stale. Option value must be "
+ "one of the xlator names of the children. "
+ "Ex: <volname>-client-0 till "
+ "<volname>-client-<number-of-bricks - 1>"},
+ {.key = {"read-subvolume-index"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "-1",
+ .op_version = {2},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "inode-read fops happen only on one of the bricks in "
+ "replicate. AFR will prefer the one specified using "
+ "this option if it is not stale. allowed options"
+ " include -1 till replica-count - 1"},
+ {.key = {"read-hash-mode"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = 5,
+ .default_value = "1",
+ .op_version = {2},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description =
+ "inode-read fops happen only on one of the bricks in "
+ "replicate. AFR will prefer the one computed using "
+ "the method specified using this option.\n"
+ "0 = first readable child of AFR, starting from 1st child.\n"
+ "1 = hash by GFID of file (all clients use "
+ "same subvolume).\n"
+ "2 = hash by GFID of file and client PID.\n"
+ "3 = brick having the least outstanding read requests.\n"
+ "4 = brick having the least network ping latency.\n"
+ "5 = Hybrid mode between 3 and 4, ie least value among "
+ "network-latency multiplied by outstanding-read-requests."},
+ {
+ .key = {"choose-local"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "true",
+ .op_version = {2},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "Choose a local subvolume (i.e. Brick) to read from"
+ " if read-subvolume is not explicitly set.",
+ },
+ {.key = {"background-self-heal-count"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = 256,
+ .default_value = "8",
+ .validate = GF_OPT_VALIDATE_MIN,
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "This specifies the number of per client self-heal "
+ "jobs that can perform parallel heals in the "
+ "background."},
+ {.key = {"halo-shd-max-latency"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 99999,
+ .default_value = "99999",
+ .op_version = {GD_OP_VERSION_3_11_0},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate", "halo"},
+ .description = "Maximum latency for shd halo replication in msec."},
+ {.key = {"halo-enabled"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "False",
+ .op_version = {GD_OP_VERSION_3_11_0},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate", "halo"},
+ .description = "Enable Halo (geo) replication mode."},
+ {.key = {"halo-nfsd-max-latency"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 99999,
+ .default_value = "5",
+ .op_version = {GD_OP_VERSION_3_11_0},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate", "halo"},
+ .description = "Maximum latency for nfsd halo replication in msec."},
+ {.key = {"halo-max-latency"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = AFR_HALO_MAX_LATENCY,
+ .default_value = "5",
+ .op_version = {GD_OP_VERSION_3_11_0},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate", "halo"},
+ .description = "Maximum latency for halo replication in msec."},
+ {.key = {"halo-max-replicas"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 99999,
+ .default_value = "99999",
+ .op_version = {GD_OP_VERSION_3_11_0},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate", "halo"},
+ .description = "The maximum number of halo replicas; replicas"
+ " beyond this value will be written asynchronously"
+ "via the SHD."},
+ {.key = {"halo-min-replicas"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 99999,
+ .default_value = "2",
+ .op_version = {GD_OP_VERSION_3_11_0},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate", "halo"},
+ .description = "The minimmum number of halo replicas, before adding "
+ "out of region replicas."},
+ {.key = {"heal-wait-queue-length"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = 10000, /*Around 100MB with sizeof(afr_local_t)= 10496 bytes*/
+ .default_value = "128",
+ .validate = GF_OPT_VALIDATE_MIN,
+ .op_version = {GD_OP_VERSION_3_7_10},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "This specifies the number of heals that can be queued"
+ " for the parallel background self heal jobs."},
+ {.key = {"data-self-heal"},
+ .type = GF_OPTION_TYPE_STR,
+ .value = {"1", "on", "yes", "true", "enable", "0", "off", "no", "false",
+ "disable", "open"},
+ .default_value = "off",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "Using this option we can enable/disable data "
+ "self-heal on the file. \"open\" means data "
+ "self-heal action will only be triggered by file "
+ "open operations."},
+ {.key = {"data-self-heal-algorithm"},
+ .type = GF_OPTION_TYPE_STR,
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "Select between \"full\", \"diff\". The "
+ "\"full\" algorithm copies the entire file from "
+ "source to sink. The \"diff\" algorithm copies to "
+ "sink only those blocks whose checksums don't match "
+ "with those of source. If no option is configured "
+ "the option is chosen dynamically as follows: "
+ "If the file does not exist on one of the sinks "
+ "or empty file exists or if the source file size is "
+ "about the same as page size the entire file will "
+ "be read and written i.e \"full\" algo, "
+ "otherwise \"diff\" algo is chosen.",
+ .value = {"diff", "full"}},
+ {.key = {"data-self-heal-window-size"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 1024,
+ .default_value = "1",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "Maximum number blocks per file for which self-heal "
+ "process would be applied simultaneously."},
+ {.key = {"metadata-self-heal"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ /*.validate_fn = validate_replica*/
+ .description = "Using this option we can enable/disable metadata "
+ "i.e. Permissions, ownerships, xattrs self-heal on "
+ "the file/directory."},
+ {.key = {"entry-self-heal"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ /*.validate_fn = validate_replica*/
+ .description = "Using this option we can enable/disable entry "
+ "self-heal on the directory."},
+ {.key = {"data-change-log"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "This option exists only for backward compatibility "
+ "and configuring it doesn't have any effect"},
+ {.key = {"metadata-change-log"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "This option exists only for backward compatibility "
+ "and configuring it doesn't have any effect"},
+ {.key = {"entry-change-log"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "This option exists only for backward compatibility "
+ "and configuring it doesn't have any effect"},
+ {.key = {"optimistic-change-log"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "Entry/Metadata fops will not perform "
+ "pre fop changelog operations in afr transaction "
+ "if this option is enabled."},
+ {.key = {"inodelk-trace"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "Enabling this option logs inode lock/unlocks"},
+ {.key = {"entrylk-trace"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "Enabling this option logs entry lock/unlocks"},
+ {.key = {"pre-op-compat"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "Use separate pre-op xattrop() FOP rather than "
+ "overloading xdata of the OP"},
+ {.key = {"eager-lock"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description =
+ "Enable/Disable eager lock for replica volume. "
+ "Lock phase of a transaction has two sub-phases. "
+ "First is an attempt to acquire locks in parallel by "
+ "broadcasting non-blocking lock requests. If lock "
+ "acquisition fails on any server, then the held locks "
+ "are unlocked and we revert to a blocking locks mode "
+ "sequentially on one server after another. If this "
+ "option is enabled the initial broadcasting lock "
+ "request attempts to acquire a full lock on the entire file. "
+ "If this fails, we revert back to the sequential "
+ "\"regional\" blocking locks as before. In the case "
+ "where such an \"eager\" lock is granted in the "
+ "non-blocking phase, it gives rise to an opportunity "
+ "for optimization. i.e, if the next write transaction "
+ "on the same FD arrives before the unlock phase of "
+ "the first transaction, it \"takes over\" the full "
+ "file lock. Similarly if yet another data transaction "
+ "arrives before the unlock phase of the \"optimized\" "
+ "transaction, that in turn \"takes over\" the lock as "
+ "well. The actual unlock now happens at the end of "
+ "the last \"optimized\" transaction."
+
+ },
+ {.key = {"self-heal-daemon"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE,
+ .tags = {"replicate"},
+ /*.validate_fn = validate_replica_heal_enable_disable*/
+ .description = "This option applies to only self-heal-daemon. "
+ "Index directory crawl and automatic healing of files "
+ "will not be performed if this option is turned off."},
+ {.key = {"iam-self-heal-daemon"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "This option differentiates if the replicate "
+ "translator is running as part of self-heal-daemon "
+ "or not."},
+ {.key = {"iam-nfs-daemon"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "This option differentiates if the replicate "
+ "translator is running as part of an NFS daemon "
+ "or not."},
+ {
+ .key = {"quorum-type"},
+ .type = GF_OPTION_TYPE_STR,
+ .value = {"none", "auto", "fixed"},
+ .default_value = "none",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ /*.option = quorum-type*/
+ .description = "If value is \"fixed\" only allow writes if "
+ "quorum-count bricks are present. If value is "
+ "\"auto\" only allow writes if more than half of "
+ "bricks, or exactly half including the first, are "
+ "present.",
+ },
+ {
+ .key = {"quorum-count"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = INT_MAX,
+ .default_value = 0,
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ /*.option = quorum-count*/
+ /*.validate_fn = validate_quorum_count*/
+ .description = "If quorum-type is \"fixed\" only allow writes if "
+ "this many bricks are present. Other quorum types "
+ "will OVERWRITE this value.",
+ },
+ {
+ .key = {"quorum-reads"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ .op_version = {GD_OP_VERSION_3_7_0},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "This option has been removed. Reads are not allowed "
+ "if quorum is not met.",
+ },
+ {
+ .key = {"node-uuid"},
+ .type = GF_OPTION_TYPE_STR,
+ .description = "Local glusterd uuid string, used in starting "
+ "self-heal-daemon so that it can crawl only on "
+ "local index directories.",
+ },
+ {
+ .key = {"post-op-delay-secs"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = INT_MAX,
+ .default_value = "1",
+ .op_version = {2},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "Time interval induced artificially before "
+ "post-operation phase of the transaction to "
+ "enhance overlap of adjacent write operations.",
+ },
+ {
+ .key = {AFR_SH_READDIR_SIZE_KEY},
+ .type = GF_OPTION_TYPE_SIZET,
+ .description = "readdirp size for performing entry self-heal",
+ .min = 1024,
+ .max = 131072,
+ .op_version = {2},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE,
+ .tags = {"replicate"},
+ .default_value = "1KB",
+ },
+ {
+ .key = {"ensure-durability"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .op_version = {3},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "Afr performs fsyncs for transactions if this "
+ "option is on to make sure the changelogs/data is "
+ "written to the disk",
+ .default_value = "on",
+ },
+ {
+ .key = {"afr-dirty-xattr"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = AFR_DIRTY_DEFAULT,
+ },
+ {.key = {"afr-pending-xattr"},
+ .type = GF_OPTION_TYPE_STR,
+ .description = "Comma separated list of xattrs that are used to "
+ "capture information on pending heals."},
+ {
+ .key = {"metadata-splitbrain-forced-heal"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ },
+ {.key = {"heal-timeout"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 5,
+ .max = INT_MAX,
+ .default_value = "600",
+ .op_version = {2},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "time interval for checking the need to self-heal "
+ "in self-heal-daemon"},
+ {
+ .key = {"consistent-metadata"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ .op_version = {GD_OP_VERSION_3_7_0},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "If this option is enabled, readdirp will force "
+ "lookups on those entries read whose read child is "
+ "not the same as that of the parent. This will "
+ "guarantee that all read operations on a file serve "
+ "attributes from the same subvol as long as it holds "
+ " a good copy of the file/dir.",
+ },
+ {.key = {"arbiter-count"},
+ .type = GF_OPTION_TYPE_INT,
+ .description = "subset of child_count. Has to be 0 or 1."},
+ {
+ .key = {"thin-arbiter"},
+ .type = GF_OPTION_TYPE_STR,
+ .op_version = {GD_OP_VERSION_4_1_0},
+ .flags = OPT_FLAG_SETTABLE,
+ .tags = {"replicate"},
+ .description = "contains host:path of thin abriter brick",
+ },
+ {.key = {"shd-max-threads"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 64,
+ .default_value = "1",
+ .op_version = {GD_OP_VERSION_3_7_12},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "Maximum number of parallel heals SHD can do per "
+ "local brick. This can substantially lower heal times"
+ ", but can also crush your bricks if you don't have "
+ "the storage hardware to support this."},
+ {
+ .key = {"shd-wait-qlength"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 655536,
+ .default_value = "1024",
+ .op_version = {GD_OP_VERSION_3_7_12},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "This option can be used to control number of heals"
+ " that can wait in SHD per subvolume",
+ },
+ {
+ .key = {"locking-scheme"},
+ .type = GF_OPTION_TYPE_STR,
+ .value = {"full", "granular"},
+ .default_value = "full",
+ .op_version = {GD_OP_VERSION_3_7_12},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "If this option is set to granular, self-heal will "
+ "stop being compatible with afr-v1, which helps afr "
+ "be more granular while self-healing",
+ },
+ {.key = {"full-lock"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "yes",
+ .op_version = {GD_OP_VERSION_3_13_2},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE,
+ .tags = {"replicate"},
+ .description = "If this option is disabled, then the IOs will take "
+ "range locks same as versions till 3.13.1."},
+ {
+ .key = {"granular-entry-heal"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ .op_version = {GD_OP_VERSION_3_8_0},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "If this option is enabled, self-heal will resort to "
+ "granular way of recording changelogs and doing entry "
+ "self-heal.",
+ },
+ {
+ .key = {"favorite-child-policy"},
+ .type = GF_OPTION_TYPE_STR,
+ .value = {"none", "size", "ctime", "mtime", "majority"},
+ .default_value = "none",
+ .op_version = {GD_OP_VERSION_3_7_12},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "This option can be used to automatically resolve "
+ "split-brains using various policies without user "
+ "intervention. \"size\" picks the file with the "
+ "biggest size as the source. \"ctime\" and \"mtime\" "
+ "pick the file with the latest ctime and mtime "
+ "respectively as the source. \"majority\" picks a file"
+ " with identical mtime and size in more than half the "
+ "number of bricks in the replica.",
+ },
+ {
+ .key = {"consistent-io"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ .description = "If this option is enabled, i/o will fail even if "
+ "one of the bricks is down in the replicas",
+ },
+ {.key = {"use-compound-fops"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ .op_version = {GD_OP_VERSION_3_8_4},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "This option exists only for backward compatibility "
+ "and configuring it doesn't have any effect"},
+ {.key = {"use-anonymous-inode"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ .op_version = {GD_OP_VERSION_8_0},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE,
+ .tags = {"replicate"},
+ .description = "Setting this option heals directory renames efficiently"},
+
+ {.key = {NULL}},
+};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "replicate",
+ .category = GF_MAINTAINED,
};
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 944f8455b88..d62f9a9caf2 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -8,1067 +8,1416 @@
cases as published by the Free Software Foundation.
*/
-
#ifndef __AFR_H__
#define __AFR_H__
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "call-stub.h"
-#include "compat-errno.h"
+#include <glusterfs/call-stub.h>
+#include <glusterfs/compat-errno.h>
#include "afr-mem-types.h"
-#include "afr-self-heal-algorithm.h"
#include "libxlator.h"
-#include "timer.h"
+#include <glusterfs/timer.h>
+#include <glusterfs/syncop.h>
+
+#include "afr-self-heald.h"
+#include "afr-messages.h"
-#define AFR_XATTR_PREFIX "trusted.afr"
+#define SHD_INODE_LRU_LIMIT 1
#define AFR_PATHINFO_HEADER "REPLICATE:"
+#define AFR_SH_READDIR_SIZE_KEY "self-heal-readdir-size"
+#define AFR_SH_DATA_DOMAIN_FMT "%s:self-heal"
+#define AFR_DIRTY_DEFAULT AFR_XATTR_PREFIX ".dirty"
+#define AFR_DIRTY (((afr_private_t *)(THIS->private))->afr_dirty)
+
+#define AFR_LOCKEE_COUNT_MAX 3
+#define AFR_DOM_COUNT_MAX 3
+#define AFR_NUM_CHANGE_LOGS 3 /*data + metadata + entry*/
+#define AFR_DEFAULT_SPB_CHOICE_TIMEOUT 300 /*in seconds*/
+
+#define ARBITER_BRICK_INDEX 2
+#define THIN_ARBITER_BRICK_INDEX 2
+#define AFR_TA_DOM_NOTIFY "afr.ta.dom-notify"
+#define AFR_TA_DOM_MODIFY "afr.ta.dom-modify"
+
+#define AFR_LK_HEAL_DOM "afr.lock-heal.domain"
+
+#define AFR_HALO_MAX_LATENCY 99999
+#define AFR_ANON_DIR_PREFIX ".glusterfs-anonymous-inode"
+
+#define PFLAG_PENDING (1 << 0)
+#define PFLAG_SBRAIN (1 << 1)
+
+typedef int (*afr_lock_cbk_t)(call_frame_t *frame, xlator_t *this);
+
+typedef int (*afr_read_txn_wind_t)(call_frame_t *frame, xlator_t *this,
+ int subvol);
+
+typedef int (*afr_inode_refresh_cbk_t)(call_frame_t *frame, xlator_t *this,
+ int err);
+
+typedef int (*afr_changelog_resume_t)(call_frame_t *frame, xlator_t *this);
+
+#define AFR_COUNT(array, max) \
+ ({ \
+ int __i; \
+ int __res = 0; \
+ for (__i = 0; __i < max; __i++) \
+ if (array[__i]) \
+ __res++; \
+ __res; \
+ })
+#define AFR_INTERSECT(dst, src1, src2, max) \
+ ({ \
+ int __i; \
+ for (__i = 0; __i < max; __i++) \
+ dst[__i] = src1[__i] && src2[__i]; \
+ })
+#define AFR_CMP(a1, a2, len) \
+ ({ \
+ int __cmp = 0; \
+ int __i; \
+ for (__i = 0; __i < len; __i++) \
+ if (a1[__i] != a2[__i]) { \
+ __cmp = 1; \
+ break; \
+ } \
+ __cmp; \
+ })
+#define AFR_IS_ARBITER_BRICK(priv, index) \
+ ((priv->arbiter_count == 1) && (index == ARBITER_BRICK_INDEX))
+
+#define AFR_SET_ERROR_AND_CHECK_SPLIT_BRAIN(ret, errnum) \
+ do { \
+ local->op_ret = ret; \
+ local->op_errno = errnum; \
+ if (local->op_errno == EIO) \
+ gf_msg(this->name, GF_LOG_ERROR, local->op_errno, \
+ AFR_MSG_SPLIT_BRAIN, \
+ "Failing %s on gfid %s: " \
+ "split-brain observed.", \
+ gf_fop_list[local->op], uuid_utoa(local->inode->gfid)); \
+ } while (0)
+
+#define AFR_ERROR_OUT_IF_FDCTX_INVALID(__fd, __this, __error, __label) \
+ do { \
+ afr_fd_ctx_t *__fd_ctx = NULL; \
+ __fd_ctx = afr_fd_ctx_get(__fd, __this); \
+ if (__fd_ctx && __fd_ctx->is_fd_bad) { \
+ __error = EBADF; \
+ goto __label; \
+ } \
+ } while (0)
-struct _pump_private;
+typedef enum {
+ AFR_READ_POLICY_FIRST_UP,
+ AFR_READ_POLICY_GFID_HASH,
+ AFR_READ_POLICY_GFID_PID_HASH,
+ AFR_READ_POLICY_LESS_LOAD,
+ AFR_READ_POLICY_LEAST_LATENCY,
+ AFR_READ_POLICY_LOAD_LATENCY_HYBRID,
+} afr_read_hash_mode_t;
-typedef int (*afr_expunge_done_cbk_t) (call_frame_t *frame, xlator_t *this,
- int child, int32_t op_error,
- int32_t op_errno);
+typedef enum {
+ AFR_FAV_CHILD_NONE,
+ AFR_FAV_CHILD_BY_SIZE,
+ AFR_FAV_CHILD_BY_CTIME,
+ AFR_FAV_CHILD_BY_MTIME,
+ AFR_FAV_CHILD_BY_MAJORITY,
+ AFR_FAV_CHILD_POLICY_MAX,
+} afr_favorite_child_policy;
-typedef int (*afr_impunge_done_cbk_t) (call_frame_t *frame, xlator_t *this,
- int32_t op_error, int32_t op_errno);
-typedef int (*afr_post_remove_call_t) (call_frame_t *frame, xlator_t *this);
+typedef enum {
+ AFR_SELFHEAL_DATA_FULL = 0,
+ AFR_SELFHEAL_DATA_DIFF,
+ AFR_SELFHEAL_DATA_DYNAMIC,
+} afr_data_self_heal_type_t;
-typedef int (*afr_lock_cbk_t) (call_frame_t *frame, xlator_t *this);
-typedef void (*afr_lookup_done_cbk_t) (call_frame_t *frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno);
+typedef enum {
+ AFR_CHILD_UNKNOWN = -1,
+ AFR_CHILD_ZERO,
+ AFR_CHILD_ONE,
+ AFR_CHILD_THIN_ARBITER,
+} afr_child_index;
typedef enum {
- AFR_POS_UNKNOWN,
- AFR_POS_LOCAL,
- AFR_POS_REMOTE
-} afr_child_pos_t;
+ TA_WAIT_FOR_NOTIFY_LOCK_REL, /*FOP came after notify domain lock upcall
+ notification and waiting for its release.*/
+ TA_GET_INFO_FROM_TA_FILE, /*FOP needs post-op on ta file to get
+ *info about which brick is bad.*/
+ TA_INFO_IN_MEMORY_SUCCESS, /*Bad brick info is in memory and fop failed
+ *on BAD brick - Success*/
+ TA_INFO_IN_MEMORY_FAILED, /*Bad brick info is in memory and fop failed
+ *on GOOD brick - Failed*/
+ TA_SUCCESS, /*FOP succeeded on both data bricks.*/
+} afr_ta_fop_state_t;
+
+struct afr_nfsd {
+ uint32_t halo_max_latency_msec;
+ gf_boolean_t iamnfsd;
+};
+
+typedef struct _afr_lk_heal_info {
+ fd_t *fd;
+ int32_t cmd;
+ struct gf_flock flock;
+ dict_t *xdata_req;
+ unsigned char *locked_nodes;
+ struct list_head pos;
+ gf_lkowner_t lk_owner;
+ pid_t pid;
+ int32_t *child_up_event_gen;
+ int32_t *child_down_event_gen;
+} afr_lk_heal_info_t;
+
+typedef struct _afr_private {
+ gf_lock_t lock; /* to guard access to child_count, etc */
+ unsigned int child_count; /* total number of children */
+ unsigned int arbiter_count; /*subset of child_count.
+ Has to be 0 or 1.*/
+
+ xlator_t **children;
+
+ inode_t *root_inode;
+
+ int favorite_child; /* subvolume to be preferred in resolving
+ split-brain cases */
+ /* For thin-arbiter. */
+ uuid_t ta_gfid;
+ unsigned int thin_arbiter_count; /* 0 or 1 at the moment.*/
+ int ta_bad_child_index;
+ int ta_event_gen;
+ unsigned int ta_in_mem_txn_count;
+ unsigned int ta_on_wire_txn_count;
+ struct list_head ta_waitq;
+ struct list_head ta_onwireq;
+
+ unsigned char *anon_inode;
+ unsigned char *child_up;
+ unsigned char *halo_child_up;
+ int64_t *child_latency;
+ unsigned char *local;
+
+ char **pending_key;
+
+ afr_data_self_heal_type_t data_self_heal_algorithm;
+ unsigned int data_self_heal_window_size; /* max number of pipelined
+ read/writes */
+
+ struct list_head heal_waiting; /*queue for files that need heal*/
+ uint32_t heal_wait_qlen; /*configurable queue length for heal_waiting*/
+ int32_t heal_waiters; /* No. of elements currently in wait queue.*/
+
+ struct list_head healing; /* queue for files that are undergoing
+ background heal*/
+ uint32_t background_self_heal_count; /*configurable queue length for
+ healing queue*/
+ int32_t healers; /* No. of elements currently undergoing background
+ heal*/
+
+ gf_boolean_t release_ta_notify_dom_lock;
+
+ gf_boolean_t metadata_self_heal; /* on/off */
+ gf_boolean_t entry_self_heal; /* on/off */
+
+ gf_boolean_t metadata_splitbrain_forced_heal; /* on/off */
+ int read_child; /* read-subvolume */
+ gf_atomic_t *pending_reads; /*No. of pending read cbks per child.*/
+
+ gf_timer_t *timer; /* launched when parent up is received */
+
+ unsigned int wait_count; /* # of servers to wait for success */
+
+ unsigned char ta_child_up;
+ gf_boolean_t optimistic_change_log;
+ gf_boolean_t eager_lock;
+ gf_boolean_t pre_op_compat; /* on/off */
+ uint32_t post_op_delay_secs;
+ unsigned int quorum_count;
+
+ off_t ta_notify_dom_lock_offset;
+ afr_favorite_child_policy fav_child_policy; /*Policy to use for automatic
+ resolution of split-brains.*/
+ afr_read_hash_mode_t hash_mode; /* for when read_child is not set */
+
+ int32_t *last_event;
+
+ /* @event_generation: Keeps count of number of events received which can
+ potentially impact consistency decisions. The events are CHILD_UP
+ and CHILD_DOWN, when we have to recalculate the freshness/staleness
+ of copies to detect if changes had happened while the other server
+ was down. CHILD_DOWN and CHILD_UP can also be received on network
+ disconnect/reconnects and not necessarily server going down/up.
+ Recalculating freshness/staleness on network events is equally
+ important as we might have had a network split brain.
+ */
+ uint32_t event_generation;
+ char vol_uuid[UUID_SIZE + 1];
+
+ gf_boolean_t choose_local;
+ gf_boolean_t did_discovery;
+ gf_boolean_t ensure_durability;
+ gf_boolean_t halo_enabled;
+ gf_boolean_t consistent_metadata;
+ gf_boolean_t need_heal;
+ gf_boolean_t granular_locks;
+ uint64_t sh_readdir_size;
+ char *sh_domain;
+ char *afr_dirty;
+
+ uint64_t spb_choice_timeout;
+
+ afr_self_heald_t shd;
+ struct afr_nfsd nfsd;
+
+ uint32_t halo_max_latency_msec;
+ uint32_t halo_max_replicas;
+ uint32_t halo_min_replicas;
+
+ gf_boolean_t full_lock;
+ gf_boolean_t esh_granular;
+ gf_boolean_t consistent_io;
+ gf_boolean_t data_self_heal; /* on/off */
+ gf_boolean_t use_anon_inode;
+
+ /*For lock healing.*/
+ struct list_head saved_locks;
+ struct list_head lk_healq;
+
+ /*For anon-inode handling */
+ char anon_inode_name[NAME_MAX + 1];
+ char anon_gfid_str[UUID_SIZE + 1];
+} afr_private_t;
typedef enum {
- SPLIT_BRAIN = 1,
- ALL_FOOLS = 2
-} afr_subvol_status_t;
+ AFR_DATA_TRANSACTION, /* truncate, write, ... */
+ AFR_METADATA_TRANSACTION, /* chmod, chown, ... */
+ AFR_ENTRY_TRANSACTION, /* create, rmdir, ... */
+ AFR_ENTRY_RENAME_TRANSACTION, /* rename */
+} afr_transaction_type;
+
+/*
+ xattr format: trusted.afr.volume = [x y z]
+ x - data pending
+ y - metadata pending
+ z - entry pending
+*/
+
+static inline int
+afr_index_for_transaction_type(afr_transaction_type type)
+{
+ switch (type) {
+ case AFR_DATA_TRANSACTION:
+ return 0;
+
+ case AFR_METADATA_TRANSACTION:
+ return 1;
+
+ case AFR_ENTRY_TRANSACTION:
+ case AFR_ENTRY_RENAME_TRANSACTION:
+ return 2;
+ }
+
+ return -1; /* make gcc happy */
+}
+
+static inline int
+afr_index_from_ia_type(ia_type_t type)
+{
+ switch (type) {
+ case IA_IFDIR:
+ return afr_index_for_transaction_type(AFR_ENTRY_TRANSACTION);
+ case IA_IFREG:
+ return afr_index_for_transaction_type(AFR_DATA_TRANSACTION);
+ default:
+ return -1;
+ }
+}
+
+typedef struct {
+ struct gf_flock flock;
+ loc_t loc;
+ fd_t *fd;
+ char *basename;
+ unsigned char *locked_nodes;
+ int locked_count;
+
+} afr_lockee_t;
+
+int
+afr_entry_lockee_cmp(const void *l1, const void *l2);
+
+typedef struct {
+ loc_t *lk_loc;
+
+ afr_lockee_t lockee[AFR_LOCKEE_COUNT_MAX];
+
+ const char *lk_basename;
+ const char *lower_basename;
+ const char *higher_basename;
+
+ unsigned char *lower_locked_nodes;
+
+ afr_lock_cbk_t lock_cbk;
+
+ int lockee_count;
+
+ int32_t lk_call_count;
+ int32_t lk_expected_count;
+ int32_t lk_attempted_count;
+
+ int32_t lock_op_ret;
+ int32_t lock_op_errno;
+ char *domain; /* Domain on which inode/entry lock/unlock in progress.*/
+ int32_t lock_count;
+ char lower_locked;
+ char higher_locked;
+} afr_internal_lock_t;
+
+struct afr_reply {
+ int valid;
+ int32_t op_ret;
+ dict_t *xattr; /*For xattrop*/
+ dict_t *xdata;
+ struct iatt poststat;
+ struct iatt postparent;
+ struct iatt prestat;
+ struct iatt preparent;
+ struct iatt preparent2;
+ struct iatt postparent2;
+ int32_t op_errno;
+ /* For rchecksum */
+ uint8_t checksum[SHA256_DIGEST_LENGTH];
+ gf_boolean_t buf_has_zeroes;
+ gf_boolean_t fips_mode_rchecksum;
+ /* For lookup */
+ int8_t need_heal;
+};
typedef enum {
- AFR_INODE_SET_READ_CTX = 1,
- AFR_INODE_RM_STALE_CHILDREN,
- AFR_INODE_SET_OPENDIR_DONE,
- AFR_INODE_SET_SPLIT_BRAIN,
- AFR_INODE_GET_READ_CTX,
- AFR_INODE_GET_OPENDIR_DONE,
- AFR_INODE_GET_SPLIT_BRAIN,
-} afr_inode_op_t;
-
-typedef struct afr_inode_params_ {
- afr_inode_op_t op;
- union {
- gf_boolean_t value;
- struct {
- int32_t read_child;
- int32_t *children;
- } read_ctx;
- } u;
-} afr_inode_params_t;
-
-typedef struct afr_inode_ctx_ {
- uint64_t masks;
- int32_t *fresh_children;//increasing order of latency
-} afr_inode_ctx_t;
+ AFR_FD_NOT_OPENED,
+ AFR_FD_OPENED,
+ AFR_FD_OPENING
+} afr_fd_open_status_t;
+
+typedef struct {
+ afr_fd_open_status_t *opened_on; /* which subvolumes the fd is open on */
+ int flags;
+
+ /* the subvolume on which the latest sequence of readdirs (starting
+ at offset 0) has begun. Till the next readdir request with 0 offset
+ arrives, we continue to read off this subvol.
+ */
+ int readdir_subvol;
+ /* lock-healing related members. */
+ gf_boolean_t is_fd_bad;
+ afr_lk_heal_info_t *lk_heal_info;
+
+} afr_fd_ctx_t;
typedef enum {
- NONE,
- INDEX,
- FULL,
-} afr_crawl_type_t;
-
-typedef struct afr_self_heald_ {
- gf_boolean_t enabled;
- gf_boolean_t iamshd;
- afr_crawl_type_t *pending;
- gf_boolean_t *inprogress;
- afr_child_pos_t *pos;
- gf_timer_t **timer;
- eh_t *healed;
- eh_t *heal_failed;
- eh_t *split_brain;
- char *node_uuid;
-} afr_self_heald_t;
+ AFR_FOP_LOCK_PARALLEL,
+ AFR_FOP_LOCK_SERIAL,
+ AFR_FOP_LOCK_QUORUM_FAILED,
+} afr_fop_lock_state_t;
+
+typedef struct _afr_inode_lock_t {
+ /* @num_inodelks:
+ Number of inodelks queried from the server, as queried through
+ xdata in FOPs. Currently, used to decide if eager-locking must be
+ temporarily disabled.
+ */
+ int32_t num_inodelks;
+ unsigned int event_generation;
+ gf_timer_t *delay_timer;
+ struct list_head owners; /*Transactions that are performing fop*/
+ struct list_head post_op; /*Transactions that are done with the fop
+ *So can not conflict with the fops*/
+ struct list_head waiting; /*Transaction that are waiting for
+ *conflicting transactions to complete*/
+ struct list_head frozen; /*Transactions that need to go as part of
+ * next batch of eager-lock*/
+ gf_boolean_t release;
+ gf_boolean_t acquired;
+} afr_lock_t;
+
+typedef struct _afr_inode_ctx {
+ uint64_t read_subvol;
+ uint64_t write_subvol;
+ int lock_count;
+ int spb_choice;
+ gf_timer_t *timer;
+ unsigned int *pre_op_done[AFR_NUM_CHANGE_LOGS];
+ int inherited[AFR_NUM_CHANGE_LOGS];
+ int on_disk[AFR_NUM_CHANGE_LOGS];
+ /*Only 2 types of transactions support eager-locks now. DATA/METADATA*/
+ afr_lock_t lock[2];
+
+ /* @open_fd_count:
+ Number of open FDs queried from the server, as queried through
+ xdata in FOPs. Currently, used to decide if eager-locking must be
+ temporarily disabled.
+ */
+ uint32_t open_fd_count;
+ gf_boolean_t need_refresh;
+
+ /* set if any write on this fd was a non stable write
+ (i.e, without O_SYNC or O_DSYNC)
+ */
+ gf_boolean_t witnessed_unstable_write;
+} afr_inode_ctx_t;
-typedef struct _afr_private {
- gf_lock_t lock; /* to guard access to child_count, etc */
- unsigned int child_count; /* total number of children */
+typedef struct _afr_local {
+ glusterfs_fop_t op;
+ unsigned int call_count;
- unsigned int read_child_rr; /* round-robin index of the read_child */
- gf_lock_t read_child_lock; /* lock to protect above */
+ /* @event_generation: copy of priv->event_generation taken at the
+ time of starting the transaction. The copy is made so that we
+ have a stable value through the various phases of the transaction.
+ */
+ unsigned int event_generation;
- xlator_t **children;
+ uint32_t open_fd_count;
+ int32_t num_inodelks;
- int first_lookup;
- inode_t *root_inode;
+ int32_t op_ret;
+ int32_t op_errno;
- unsigned char *child_up;
+ int dirty[AFR_NUM_CHANGE_LOGS];
- char **pending_key;
+ int32_t **pending;
- char *data_self_heal; /* on/off/open */
- char * data_self_heal_algorithm; /* name of algorithm */
- unsigned int data_self_heal_window_size; /* max number of pipelined
- read/writes */
+ loc_t loc;
+ loc_t newloc;
- unsigned int background_self_heal_count;
- unsigned int background_self_heals_started;
- gf_boolean_t metadata_self_heal; /* on/off */
- gf_boolean_t entry_self_heal; /* on/off */
+ fd_t *fd;
+ afr_fd_ctx_t *fd_ctx;
- gf_boolean_t data_change_log; /* on/off */
- gf_boolean_t metadata_change_log; /* on/off */
- gf_boolean_t entry_change_log; /* on/off */
+ /* @child_up: copy of priv->child_up taken at the time of transaction
+ start. The copy is taken so that we have a stable child_up array
+ through the phases of the transaction as priv->child_up[i] can keep
+ changing through time.
+ */
+ unsigned char *child_up;
- int read_child; /* read-subvolume */
- int favorite_child; /* subvolume to be preferred in resolving
- split-brain cases */
+ /* @read_attempted:
+ array of flags representing subvolumes where read operations of
+ the read transaction have already been attempted. The array is
+ first pre-filled with down subvolumes, and as reads are performed
+ on other subvolumes, those are set as well. This way if the read
+ operation fails we do not retry on that subvolume again.
+ */
+ unsigned char *read_attempted;
- gf_boolean_t inodelk_trace;
- gf_boolean_t entrylk_trace;
+ /* @readfn:
- gf_boolean_t strict_readdir;
+ pointer to function which will perform the read operation on a given
+ subvolume. Used in read transactions.
+ */
- unsigned int wait_count; /* # of servers to wait for success */
+ afr_read_txn_wind_t readfn;
- uint64_t up_count; /* number of CHILD_UPs we have seen */
- uint64_t down_count; /* number of CHILD_DOWNs we have seen */
+ /* @inode:
- struct _pump_private *pump_private; /* Set if we are loaded as pump */
- int use_afr_in_pump;
+ the inode on which the read txn is performed on. ref'ed and copied
+ from either fd->inode or loc.inode
+ */
- pthread_mutex_t mutex;
- struct list_head saved_fds; /* list of fds on which locks have succeeded */
- gf_boolean_t optimistic_change_log;
- gf_boolean_t eager_lock;
- uint32_t post_op_delay_secs;
- unsigned int quorum_count;
+ inode_t *inode;
- char vol_uuid[UUID_SIZE + 1];
- int32_t *last_event;
- afr_self_heald_t shd;
- gf_boolean_t readdir_failover;
-} afr_private_t;
+ /* @parent[2]:
-typedef struct {
- /* External interface: These are variables (some optional) that
- are set by whoever has triggered self-heal */
-
- gf_boolean_t do_data_self_heal;
- gf_boolean_t do_metadata_self_heal;
- gf_boolean_t do_entry_self_heal;
- gf_boolean_t do_gfid_self_heal;
- gf_boolean_t do_missing_entry_self_heal;
-
- gf_boolean_t forced_merge; /* Is this a self-heal triggered to
- forcibly merge the directories? */
-
- gf_boolean_t background; /* do self-heal in background
- if possible */
- ia_type_t type; /* st_mode of the entry we're doing
- self-heal on */
- inode_t *inode; /* inode on which the self-heal is
- performed on */
- uuid_t sh_gfid_req; /* gfid self-heal needs to be done
- with this gfid if it is not null */
-
- /* Function to call to unwind. If self-heal is being done in the
- background, this function will be called as soon as possible. */
-
- int (*unwind) (call_frame_t *frame, xlator_t *this, int32_t op_ret,
- int32_t op_errno, int32_t sh_failed);
-
- /* End of external interface members */
-
-
- /* array of stat's, one for each child */
- struct iatt *buf;
- struct iatt *parentbufs;
- struct iatt parentbuf;
- struct iatt entrybuf;
-
- afr_expunge_done_cbk_t expunge_done;
- afr_impunge_done_cbk_t impunge_done;
-
- /* array of xattr's, one for each child */
- dict_t **xattr;
-
- /* array containing if the lookups succeeded in the order of response
- */
- int32_t *success_children;
- int success_count;
- /* array containing the fresh children found in the self-heal process */
- int32_t *fresh_children;
- /* array containing the fresh children found in the parent lookup */
- int32_t *fresh_parent_dirs;
- /* array of errno's, one for each child */
- int *child_errno;
- /*loc used for lookup*/
- loc_t lookup_loc;
- int32_t lookup_flags;
- afr_lookup_done_cbk_t lookup_done;
-
- int32_t **pending_matrix;
- int32_t **delta_matrix;
-
- int32_t op_ret;
- int32_t op_errno;
-
- int *sources;
- int source;
- int active_source;
- int active_sinks;
- unsigned char *success;
- unsigned char *locked_nodes;
- int lock_count;
-
- const char *linkname;
- gf_boolean_t entries_skipped;
-
- int op_failed;
- gf_boolean_t actual_sh_started;
- gf_boolean_t sync_done;
- gf_boolean_t data_lock_held;
- gf_boolean_t eof_reached;
- fd_t *healing_fd;
- int file_has_holes;
- blksize_t block_size;
- off_t file_size;
- off_t offset;
- unsigned char *write_needed;
- uint8_t *checksum;
- afr_post_remove_call_t post_remove_call;
+ parent inode[s] on which directory transactions are performed.
+ */
- loc_t parent_loc;
+ inode_t *parent;
+ inode_t *parent2;
- call_frame_t *orig_frame;
- call_frame_t *old_loop_frame;
- gf_boolean_t unwound;
+ /* @readable:
- afr_sh_algo_private_t *private;
+ array of flags representing servers from which a read can be
+ performed. This is the output of afr_inode_refresh()
+ */
+ unsigned char *readable;
+ unsigned char *readable2; /*For rename transaction*/
- struct afr_sh_algorithm *algo;
- afr_lock_cbk_t data_lock_success_handler;
- afr_lock_cbk_t data_lock_failure_handler;
- gf_boolean_t data_lock_block;
- int (*completion_cbk) (call_frame_t *frame, xlator_t *this);
- int (*sh_data_algo_start) (call_frame_t *frame, xlator_t *this);
- int (*algo_completion_cbk) (call_frame_t *frame, xlator_t *this);
- int (*algo_abort_cbk) (call_frame_t *frame, xlator_t *this);
- void (*gfid_sh_success_cbk) (call_frame_t *sh_frame, xlator_t *this);
- gf_boolean_t mdata_spb;
- gf_boolean_t data_spb;
+ afr_inode_refresh_cbk_t refreshfn;
- call_frame_t *sh_frame;
-} afr_self_heal_t;
+ /* @refreshinode:
-typedef enum {
- AFR_DATA_TRANSACTION, /* truncate, write, ... */
- AFR_METADATA_TRANSACTION, /* chmod, chown, ... */
- AFR_ENTRY_TRANSACTION, /* create, rmdir, ... */
- AFR_ENTRY_RENAME_TRANSACTION, /* rename */
-} afr_transaction_type;
+ Inode currently getting refreshed.
+ */
+ inode_t *refreshinode;
-typedef enum {
- AFR_TRANSACTION_LK,
- AFR_SELFHEAL_LK,
-} transaction_lk_type_t;
+ dict_t *xattr_req;
-typedef enum {
- AFR_LOCK_OP,
- AFR_UNLOCK_OP,
-} afr_lock_op_type_t;
+ dict_t *dict;
-typedef enum {
- AFR_DATA_SELF_HEAL_LK,
- AFR_METADATA_SELF_HEAL_LK,
- AFR_ENTRY_SELF_HEAL_LK,
-}selfheal_lk_type_t;
+ int read_subvol; /* Current read subvolume */
-typedef enum {
- AFR_INODELK_TRANSACTION,
- AFR_INODELK_NB_TRANSACTION,
- AFR_ENTRYLK_TRANSACTION,
- AFR_ENTRYLK_NB_TRANSACTION,
- AFR_INODELK_SELFHEAL,
- AFR_INODELK_NB_SELFHEAL,
- AFR_ENTRYLK_SELFHEAL,
- AFR_ENTRYLK_NB_SELFHEAL,
-} afr_lock_call_type_t;
+ int optimistic_change_log;
-/*
- xattr format: trusted.afr.volume = [x y z]
- x - data pending
- y - metadata pending
- z - entry pending
-*/
+ afr_internal_lock_t internal_lock;
-static inline int
-afr_index_for_transaction_type (afr_transaction_type type)
-{
- switch (type) {
+ /*To handle setattr/setxattr on yet to be linked inode from dht*/
+ uuid_t refreshgfid;
- case AFR_DATA_TRANSACTION:
- return 0;
+ /* @refreshed:
- case AFR_METADATA_TRANSACTION:
- return 1;
+ the inode was "refreshed" (i.e, pending xattrs from all subvols
+ freshly inspected and inode ctx updated accordingly) as part of
+ this transaction already.
+ */
+ gf_boolean_t refreshed;
- case AFR_ENTRY_TRANSACTION:
- case AFR_ENTRY_RENAME_TRANSACTION:
- return 2;
- }
+ gf_boolean_t update_num_inodelks;
+ gf_boolean_t update_open_fd_count;
- return -1; /* make gcc happy */
-}
+ /*
+ @pre_op_compat:
+ compatibility mode of pre-op. send a separate pre-op and
+ op operations as part of transaction, rather than combining
+ */
-typedef struct {
- loc_t *lk_loc;
- struct gf_flock lk_flock;
-
- const char *lk_basename;
- const char *lower_basename;
- const char *higher_basename;
- char lower_locked;
- char higher_locked;
-
- unsigned char *locked_nodes;
- unsigned char *lower_locked_nodes;
- unsigned char *inode_locked_nodes;
- unsigned char *entry_locked_nodes;
-
- selfheal_lk_type_t selfheal_lk_type;
- transaction_lk_type_t transaction_lk_type;
-
- int32_t lock_count;
- int32_t inodelk_lock_count;
- int32_t entrylk_lock_count;
-
- uint64_t lock_number;
- int32_t lk_call_count;
- int32_t lk_expected_count;
-
- int32_t lock_op_ret;
- int32_t lock_op_errno;
- afr_lock_cbk_t lock_cbk;
-} afr_internal_lock_t;
+ gf_boolean_t pre_op_compat;
-typedef struct _afr_locked_fd {
- fd_t *fd;
- struct list_head list;
-} afr_locked_fd_t;
+ /* Is the current writev() going to perform a stable write?
+ i.e, is fd->flags or @flags writev param have O_SYNC or
+ O_DSYNC?
+ */
+ gf_boolean_t stable_write;
-typedef struct _afr_local {
- int uid;
- int gid;
- unsigned int call_count;
- unsigned int success_count;
- unsigned int enoent_count;
- unsigned int govinda_gOvinda;
- unsigned int read_child_index;
- unsigned char read_child_returned;
- unsigned int first_up_child;
+ /* This write appended to the file. Nnot necessarily O_APPEND,
+ just means the offset of write was at the end of file.
+ */
+ gf_boolean_t append_write;
- gf_lkowner_t saved_lk_owner;
+ /*
+ This struct contains the arguments for the "continuation"
+ (scheme-like) of fops
+ */
- int32_t op_ret;
- int32_t op_errno;
+ struct {
+ struct {
+ struct statvfs buf;
+ unsigned char buf_set;
+ } statfs;
- int32_t **pending;
+ struct {
+ fd_t *fd;
+ int32_t flags;
+ } open;
- loc_t loc;
- loc_t newloc;
+ struct {
+ struct gf_flock user_flock;
+ struct gf_flock ret_flock;
+ unsigned char *locked_nodes;
+ int32_t cmd;
+ /*For lock healing only.*/
+ unsigned char *dom_locked_nodes;
+ int32_t *dom_lock_op_ret;
+ int32_t *dom_lock_op_errno;
+ struct gf_flock *getlk_rsp;
+ } lk;
+
+ /* inode read */
- fd_t *fd;
- unsigned char *fd_open_on;
+ struct {
+ int32_t mask;
+ int last_index; /* index of the child we tried previously */
+ } access;
- glusterfs_fop_t fop;
+ struct {
+ int last_index;
+ } stat;
- unsigned char *child_up;
- int32_t *fresh_children; //in the order of response
+ struct {
+ int last_index;
+ } fstat;
- int32_t *child_errno;
+ struct {
+ size_t size;
+ int last_index;
+ } readlink;
- dict_t *xattr_req;
+ struct {
+ char *name;
+ long xattr_len;
+ int last_index;
+ } getxattr;
- int32_t inodelk_count;
- int32_t entrylk_count;
+ struct {
+ size_t size;
+ off_t offset;
+ int last_index;
+ uint32_t flags;
+ } readv;
- afr_internal_lock_t internal_lock;
+ /* dir read */
- afr_locked_fd_t *locked_fd;
- int32_t source_child;
- int32_t lock_recovery_child;
+ struct {
+ uint32_t *checksum;
+ int success_count;
+ int32_t op_ret;
+ int32_t op_errno;
+ } opendir;
- dict_t *dict;
- int optimistic_change_log;
- gf_boolean_t delayed_post_op;
+ struct {
+ int32_t op_ret;
+ int32_t op_errno;
+ size_t size;
+ off_t offset;
+ dict_t *dict;
+ int last_index;
+ gf_boolean_t failed;
+ } readdir;
+ /* inode write */
- gf_boolean_t fop_paused;
- int (*fop_call_continue) (call_frame_t *frame, xlator_t *this);
+ struct {
+ struct iatt prebuf;
+ struct iatt postbuf;
+ } inode_wfop; // common structure for all inode-write-fops
- /*
- This struct contains the arguments for the "continuation"
- (scheme-like) of fops
- */
+ struct {
+ struct iovec *vector;
+ struct iobref *iobref;
+ off_t offset;
+ int32_t op_ret;
+ int32_t count;
+ uint32_t flags;
+ } writev;
+
+ struct {
+ off_t offset;
+ } truncate;
+
+ struct {
+ off_t offset;
+ } ftruncate;
+
+ struct {
+ struct iatt in_buf;
+ int32_t valid;
+ } setattr;
+
+ struct {
+ struct iatt in_buf;
+ int32_t valid;
+ } fsetattr;
- int op;
struct {
- struct {
- unsigned char buf_set;
- struct statvfs buf;
- } statfs;
-
- struct {
- uint32_t parent_entrylk;
- uuid_t gfid_req;
- inode_t *inode;
- struct iatt buf;
- struct iatt postparent;
- dict_t **xattrs;
- dict_t *xattr;
- struct iatt *postparents;
- struct iatt *bufs;
- int32_t read_child;
- int32_t *sources;
- int32_t *success_children;
- gf_boolean_t fresh_lookup;
- gf_boolean_t possible_spb;
- } lookup;
-
- struct {
- int32_t flags;
- } open;
-
- struct {
- int32_t cmd;
- struct gf_flock user_flock;
- struct gf_flock ret_flock;
- unsigned char *locked_nodes;
- } lk;
-
- /* inode read */
-
- struct {
- int32_t mask;
- int last_index; /* index of the child we tried previously */
- } access;
-
- struct {
- int last_index;
- } stat;
-
- struct {
- int last_index;
- } fstat;
-
- struct {
- size_t size;
- int last_index;
- } readlink;
-
- struct {
- char *name;
- int last_index;
- long xattr_len;
- } getxattr;
-
- struct {
- size_t size;
- off_t offset;
- int last_index;
- uint32_t flags;
- } readv;
-
- /* dir read */
-
- struct {
- int success_count;
- int32_t op_ret;
- int32_t op_errno;
-
- uint32_t *checksum;
- } opendir;
-
- struct {
- int32_t op_ret;
- int32_t op_errno;
- size_t size;
- off_t offset;
- dict_t *dict;
- gf_boolean_t failed;
- int last_index;
- } readdir;
- /* inode write */
-
- struct {
- struct iatt prebuf;
- struct iatt postbuf;
-
- int32_t op_ret;
-
- struct iovec *vector;
- struct iobref *iobref;
- int32_t count;
- off_t offset;
- uint32_t flags;
- } writev;
-
- struct {
- struct iatt prebuf;
- struct iatt postbuf;
- } fsync;
-
- struct {
- off_t offset;
- struct iatt prebuf;
- struct iatt postbuf;
- } truncate;
-
- struct {
- off_t offset;
- struct iatt prebuf;
- struct iatt postbuf;
- } ftruncate;
-
- struct {
- struct iatt in_buf;
- int32_t valid;
- struct iatt preop_buf;
- struct iatt postop_buf;
- } setattr;
-
- struct {
- struct iatt in_buf;
- int32_t valid;
- struct iatt preop_buf;
- struct iatt postop_buf;
- } fsetattr;
-
- struct {
- dict_t *dict;
- int32_t flags;
- } setxattr;
-
- struct {
- dict_t *dict;
- int32_t flags;
- } fsetxattr;
-
- struct {
- char *name;
- } removexattr;
-
- struct {
- dict_t *xattr;
- } xattrop;
-
- struct {
- dict_t *xattr;
- } fxattrop;
-
- /* dir write */
-
- struct {
- fd_t *fd;
- dict_t *params;
- int32_t flags;
- mode_t mode;
- inode_t *inode;
- struct iatt buf;
- struct iatt preparent;
- struct iatt postparent;
- struct iatt read_child_buf;
- } create;
-
- struct {
- dev_t dev;
- mode_t mode;
- dict_t *params;
- inode_t *inode;
- struct iatt buf;
- struct iatt preparent;
- struct iatt postparent;
- struct iatt read_child_buf;
- } mknod;
-
- struct {
- int32_t mode;
- dict_t *params;
- inode_t *inode;
- struct iatt buf;
- struct iatt read_child_buf;
- struct iatt preparent;
- struct iatt postparent;
- } mkdir;
-
- struct {
- int32_t op_ret;
- int32_t op_errno;
- struct iatt preparent;
- struct iatt postparent;
- } unlink;
-
- struct {
- int flags;
- int32_t op_ret;
- int32_t op_errno;
- struct iatt preparent;
- struct iatt postparent;
- } rmdir;
-
- struct {
- struct iatt buf;
- struct iatt read_child_buf;
- struct iatt preoldparent;
- struct iatt prenewparent;
- struct iatt postoldparent;
- struct iatt postnewparent;
- } rename;
-
- struct {
- inode_t *inode;
- struct iatt buf;
- struct iatt read_child_buf;
- struct iatt preparent;
- struct iatt postparent;
- } link;
-
- struct {
- inode_t *inode;
- dict_t *params;
- struct iatt buf;
- struct iatt read_child_buf;
- char *linkpath;
- struct iatt preparent;
- struct iatt postparent;
- } symlink;
-
- struct {
- int32_t flags;
- dir_entry_t *entries;
- int32_t count;
- } setdents;
- } cont;
+ dict_t *dict;
+ int32_t flags;
+ } setxattr;
struct {
- off_t start, len;
+ dict_t *dict;
+ int32_t flags;
+ } fsetxattr;
- int *eager_lock;
+ struct {
+ char *name;
+ } removexattr;
- char *basename;
- char *new_basename;
+ struct {
+ dict_t *xattr;
+ gf_xattrop_flags_t optype;
+ } xattrop;
- loc_t parent_loc;
- loc_t new_parent_loc;
+ /* dir write */
- afr_transaction_type type;
+ struct {
+ inode_t *inode;
+ struct iatt buf;
+ struct iatt preparent;
+ struct iatt postparent;
+ struct iatt prenewparent;
+ struct iatt postnewparent;
+ } dir_fop; // common structure for all dir fops
- int success_count;
- int erase_pending;
- int failure_count;
+ struct {
+ fd_t *fd;
+ dict_t *params;
+ int32_t flags;
+ mode_t mode;
+ } create;
- int last_tried;
- int32_t *child_errno;
- unsigned char *pre_op;
+ struct {
+ dict_t *params;
+ dev_t dev;
+ mode_t mode;
+ } mknod;
- call_frame_t *main_frame;
+ struct {
+ dict_t *params;
+ int32_t mode;
+ } mkdir;
- int (*fop) (call_frame_t *frame, xlator_t *this);
+ struct {
+ dict_t *params;
+ char *linkpath;
+ } symlink;
- int (*done) (call_frame_t *frame, xlator_t *this);
+ struct {
+ off_t offset;
+ size_t len;
+ int32_t mode;
+ } fallocate;
- int (*resume) (call_frame_t *frame, xlator_t *this);
+ struct {
+ off_t offset;
+ size_t len;
+ } discard;
- int (*unwind) (call_frame_t *frame, xlator_t *this);
+ struct {
+ off_t offset;
+ off_t len;
+ struct iatt prebuf;
+ struct iatt postbuf;
+ } zerofill;
- /* post-op hook */
- } transaction;
+ struct {
+ char *volume;
+ int32_t cmd;
+ int32_t in_cmd;
+ struct gf_flock in_flock;
+ struct gf_flock flock;
+ void *xdata;
+ } inodelk;
- afr_self_heal_t self_heal;
+ struct {
+ char *volume;
+ char *basename;
+ void *xdata;
+ entrylk_cmd in_cmd;
+ entrylk_cmd cmd;
+ entrylk_type type;
+ } entrylk;
- struct marker_str marker;
+ struct {
+ off_t offset;
+ gf_seek_what_t what;
+ } seek;
- /* extra data for fops */
- dict_t *xdata_req;
- dict_t *xdata_rsp;
+ struct {
+ struct gf_lease user_lease;
+ struct gf_lease ret_lease;
+ unsigned char *locked_nodes;
+ } lease;
- mode_t umask;
- int xflag;
-} afr_local_t;
+ struct {
+ int flags;
+ } rmdir;
-typedef enum {
- AFR_FD_NOT_OPENED,
- AFR_FD_OPENED,
- AFR_FD_OPENING
-} afr_fd_open_status_t;
+ struct {
+ int32_t datasync;
+ } fsync;
-typedef struct {
- struct list_head call_list;
- call_frame_t *frame;
-} afr_fd_paused_call_t;
+ struct {
+ uuid_t gfid_req;
+ gf_boolean_t needs_fresh_lookup;
+ } lookup;
-typedef struct {
- unsigned int *pre_op_done;
- afr_fd_open_status_t *opened_on; /* which subvolumes the fd is open on */
- unsigned int *pre_op_piggyback;
+ } cont;
- unsigned int *lock_piggyback;
- unsigned int *lock_acquired;
+ struct {
+ char *basename;
+ char *new_basename;
- int flags;
- uint64_t up_count; /* number of CHILD_UPs this fd has seen */
- uint64_t down_count; /* number of CHILD_DOWNs this fd has seen */
+ loc_t parent_loc;
+ loc_t new_parent_loc;
- int32_t last_tried;
+ /* stub to resume on destruction
+ of the transaction frame */
+ call_stub_t *resume_stub;
- int hit, miss;
- gf_boolean_t failed_over;
- struct list_head entries; /* needed for readdir failover */
+ struct list_head owner_list;
+ struct list_head wait_list;
- unsigned char *locked_on; /* which subvolumes locks have been successful */
- struct list_head paused_calls; /* queued calls while fix_open happens */
+ unsigned char *pre_op;
- /* used for delayed-post-op optimization */
- pthread_mutex_t delay_lock;
- gf_timer_t *delay_timer;
- call_frame_t *delay_frame;
- int call_child;
-} afr_fd_ctx_t;
+ /* Changelog xattr dict for [f]xattrop*/
+ dict_t **changelog_xdata;
+ unsigned char *pre_op_sources;
+ /* @failed_subvols: subvolumes on which a pre-op or a
+ FOP failed. */
+ unsigned char *failed_subvols;
-/* try alloc and if it fails, goto label */
-#define AFR_LOCAL_ALLOC_OR_GOTO(var, label) do { \
- var = mem_get0 (THIS->local_pool); \
- if (!var) { \
- gf_log (this->name, GF_LOG_ERROR, \
- "out of memory :("); \
- op_errno = ENOMEM; \
- goto label; \
- } \
- } while (0);
+ call_frame_t *main_frame; /*Fop frame*/
+ call_frame_t *frame; /*Transaction frame*/
+ int (*wind)(call_frame_t *frame, xlator_t *this, int subvol);
-/* did a call fail due to a child failing? */
-#define child_went_down(op_ret, op_errno) (((op_ret) < 0) && \
- ((op_errno == ENOTCONN) || \
- (op_errno == EBADFD)))
+ int (*unwind)(call_frame_t *frame, xlator_t *this);
-#define afr_fop_failed(op_ret, op_errno) ((op_ret) == -1)
+ off_t start, len;
-/* have we tried all children? */
-#define all_tried(i, count) ((i) == (count) - 1)
+ afr_transaction_type type;
-int32_t
-afr_set_dict_gfid (dict_t *dict, uuid_t gfid);
+ int32_t in_flight_sb_errno; /* This is where the cause of the
+ failure on the last good copy of
+ the file is stored.
+ */
-int
-pump_command_reply (call_frame_t *frame, xlator_t *this);
+ /* @changelog_resume: function to be called after changlogging
+ (either pre-op or post-op) is done
+ */
+ afr_changelog_resume_t changelog_resume;
-int32_t
-afr_notify (xlator_t *this, int32_t event, void *data, void *data2);
+ gf_boolean_t eager_lock_on;
+ gf_boolean_t do_eager_unlock;
+
+ /* @dirtied: flag which indicates whether we set dirty flag
+ in the OP. Typically true when we are performing operation
+ on more than one subvol and optimistic changelog is disabled
+
+ A 'true' value set in @dirtied flag means an 'undirtying'
+ has to be done in POST-OP phase.
+ */
+ gf_boolean_t dirtied;
+ /* @inherited: flag which indicates that the dirty flags
+ of the previous transaction were inherited
+ */
+ gf_boolean_t inherited;
+
+ /*
+ @no_uninherit: flag which indicates that a pre_op_uninherit()
+ must _not_ be attempted (and returned as failure) always. This
+ flag is set when a hard pre-op is performed, but not accounted
+ for it in fd_ctx->on_disk[]. Such transactions are "isolated"
+ from the pre-op piggybacking entirely and therefore uninherit
+ must not be attempted.
+ */
+ gf_boolean_t no_uninherit;
+
+ gf_boolean_t in_flight_sb; /* Indicator for occurrence of
+ split-brain while in the middle of
+ a txn. */
+
+ /* @uninherit_done:
+ @uninherit_value:
+
+ The above pair variables make pre_op_uninherit() idempotent.
+ Both are FALSE initially. The first call to pre_op_uninherit
+ sets @uninherit_done to TRUE and the return value to
+ @uninherit_value. Further calls will check for @uninherit_done
+ to be TRUE and if so will simply return @uninherit_value.
+ */
+ gf_boolean_t uninherit_done;
+ gf_boolean_t uninherit_value;
+
+ gf_boolean_t disable_delayed_post_op;
+ } transaction;
+
+ syncbarrier_t barrier;
+
+ /* extra data for fops */
+ dict_t *xdata_req;
+ dict_t *xdata_rsp;
+
+ dict_t *xattr_rsp; /*for [f]xattrop*/
+
+ mode_t umask;
+ int xflag;
+ struct afr_reply *replies;
+
+ /* For client side background heals. */
+ struct list_head healer;
+ call_frame_t *heal_frame;
+
+ afr_inode_ctx_t *inode_ctx;
+
+ /*For thin-arbiter transactions.*/
+ int ta_failed_subvol;
+ int ta_event_gen;
+ struct list_head ta_waitq;
+ struct list_head ta_onwireq;
+ afr_ta_fop_state_t fop_state;
+ afr_fop_lock_state_t fop_lock_state;
+ gf_lkowner_t saved_lk_owner;
+ unsigned char read_txn_query_child;
+ unsigned char ta_child_up;
+ gf_boolean_t do_discovery;
+ gf_boolean_t need_full_crawl;
+ gf_boolean_t is_read_txn;
+ gf_boolean_t is_new_entry;
+} afr_local_t;
+
+typedef struct afr_spbc_timeout {
+ call_frame_t *frame;
+ loc_t *loc;
+ int spb_child_index;
+ gf_boolean_t d_spb;
+ gf_boolean_t m_spb;
+} afr_spbc_timeout_t;
+
+typedef struct afr_spb_status {
+ call_frame_t *frame;
+ loc_t *loc;
+} afr_spb_status_t;
+
+typedef struct afr_empty_brick_args {
+ call_frame_t *frame;
+ char *op_type;
+ loc_t loc;
+ int empty_index;
+} afr_empty_brick_args_t;
+
+typedef struct afr_read_subvol_args {
+ ia_type_t ia_type;
+ uuid_t gfid;
+} afr_read_subvol_args_t;
+
+typedef struct afr_granular_esh_args {
+ fd_t *heal_fd;
+ xlator_t *xl;
+ call_frame_t *frame;
+ gf_boolean_t mismatch; /* flag to represent occurrence of type/gfid
+ mismatch */
+} afr_granular_esh_args_t;
+
+int
+afr_inode_get_readable(call_frame_t *frame, inode_t *inode, xlator_t *this,
+ unsigned char *readable, int *event_p, int type);
+int
+afr_inode_read_subvol_get(inode_t *inode, xlator_t *this,
+ unsigned char *data_subvols,
+ unsigned char *metadata_subvols,
+ int *event_generation);
int
-afr_attempt_lock_recovery (xlator_t *this, int32_t child_index);
+__afr_inode_read_subvol_get(inode_t *inode, xlator_t *this,
+ unsigned char *data_subvols,
+ unsigned char *metadata_subvols,
+ int *event_generation);
int
-afr_save_locked_fd (xlator_t *this, fd_t *fd);
+__afr_inode_read_subvol_set(inode_t *inode, xlator_t *this,
+ unsigned char *data_subvols,
+ unsigned char *metadata_subvol,
+ int event_generation);
+int
+afr_inode_read_subvol_set(inode_t *inode, xlator_t *this,
+ unsigned char *data_subvols,
+ unsigned char *metadata_subvols,
+ int event_generation);
int
-afr_mark_locked_nodes (xlator_t *this, fd_t *fd,
- unsigned char *locked_nodes);
+__afr_inode_need_refresh_set(inode_t *inode, xlator_t *this);
-void
-afr_set_lk_owner (call_frame_t *frame, xlator_t *this, void *lk_owner);
+int
+afr_inode_need_refresh_set(inode_t *inode, xlator_t *this);
int
-afr_set_lock_number (call_frame_t *frame, xlator_t *this);
+afr_read_subvol_select_by_policy(inode_t *inode, xlator_t *this,
+ unsigned char *readable,
+ afr_read_subvol_args_t *args);
+int
+afr_inode_read_subvol_type_get(inode_t *inode, xlator_t *this,
+ unsigned char *readable, int *event_p, int type);
+int
+afr_read_subvol_get(inode_t *inode, xlator_t *this, int *subvol_p,
+ unsigned char *readables, int *event_p,
+ afr_transaction_type type, afr_read_subvol_args_t *args);
-loc_t *
-lower_path (loc_t *l1, const char *b1, loc_t *l2, const char *b2);
+#define afr_data_subvol_get(i, t, s, r, e, a) \
+ afr_read_subvol_get(i, t, s, r, e, AFR_DATA_TRANSACTION, a)
-int32_t
-afr_unlock (call_frame_t *frame, xlator_t *this);
+#define afr_metadata_subvol_get(i, t, s, r, e, a) \
+ afr_read_subvol_get(i, t, s, r, e, AFR_METADATA_TRANSACTION, a)
int
-afr_nonblocking_entrylk (call_frame_t *frame, xlator_t *this);
+afr_inode_refresh(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ uuid_t gfid, afr_inode_refresh_cbk_t cbk);
+
+int32_t
+afr_notify(xlator_t *this, int32_t event, void *data, void *data2);
int
-afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this);
+xattr_is_equal(dict_t *this, char *key1, data_t *value1, void *data);
int
-afr_blocking_lock (call_frame_t *frame, xlator_t *this);
+afr_add_entry_lockee(afr_local_t *local, loc_t *loc, char *basename,
+ int child_count);
int
-afr_internal_lock_finish (call_frame_t *frame, xlator_t *this);
+afr_add_inode_lockee(afr_local_t *local, int child_count);
void
-afr_lk_transfer_datalock (call_frame_t *dst, call_frame_t *src,
- unsigned int child_count);
-
-int pump_start (call_frame_t *frame, xlator_t *this);
+afr_lockees_cleanup(afr_internal_lock_t *int_lock);
int
-__afr_fd_ctx_set (xlator_t *this, fd_t *fd);
+afr_attempt_lock_recovery(xlator_t *this, int32_t child_index);
int
-afr_fd_ctx_set (xlator_t *this, fd_t *fd);
-
-int32_t
-afr_inode_get_read_ctx (xlator_t *this, inode_t *inode, int32_t *fresh_children);
+afr_mark_locked_nodes(xlator_t *this, fd_t *fd, unsigned char *locked_nodes);
void
-afr_inode_set_read_ctx (xlator_t *this, inode_t *inode, int32_t read_child,
- int32_t *fresh_children);
+afr_set_lk_owner(call_frame_t *frame, xlator_t *this, void *lk_owner);
int
-afr_build_parent_loc (loc_t *parent, loc_t *child, int32_t *op_errno);
+afr_set_lock_number(call_frame_t *frame, xlator_t *this);
-unsigned int
-afr_up_children_count (unsigned char *child_up, unsigned int child_count);
+int32_t
+afr_unlock(call_frame_t *frame, xlator_t *this);
-unsigned int
-afr_locked_children_count (unsigned char *children, unsigned int child_count);
+int
+afr_lock_nonblocking(call_frame_t *frame, xlator_t *this);
-unsigned int
-afr_pre_op_done_children_count (unsigned char *pre_op,
- unsigned int child_count);
+int
+afr_blocking_lock(call_frame_t *frame, xlator_t *this);
-gf_boolean_t
-afr_is_fresh_lookup (loc_t *loc, xlator_t *this);
+int
+afr_internal_lock_finish(call_frame_t *frame, xlator_t *this);
-void
-afr_update_loc_gfids (loc_t *loc, struct iatt *buf, struct iatt *postparent);
+int
+__afr_fd_ctx_set(xlator_t *this, fd_t *fd);
+
+afr_fd_ctx_t *
+afr_fd_ctx_get(fd_t *fd, xlator_t *this);
int
-afr_locked_nodes_count (unsigned char *locked_nodes, int child_count);
+afr_build_parent_loc(loc_t *parent, loc_t *child, int32_t *op_errno);
-void
-afr_local_cleanup (afr_local_t *local, xlator_t *this);
+int
+afr_locked_nodes_count(unsigned char *locked_nodes, int child_count);
int
-afr_frame_return (call_frame_t *frame);
+afr_replies_interpret(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ gf_boolean_t *start_heal);
-gf_boolean_t
-afr_is_split_brain (xlator_t *this, inode_t *inode);
+void
+afr_local_replies_wipe(afr_local_t *local, afr_private_t *priv);
void
-afr_set_split_brain (xlator_t *this, inode_t *inode, gf_boolean_t set);
+afr_local_cleanup(afr_local_t *local, xlator_t *this);
int
-afr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, dict_t *xdata);
-
-void
-afr_set_opendir_done (xlator_t *this, inode_t *inode);
+afr_frame_return(call_frame_t *frame);
-gf_boolean_t
-afr_is_opendir_done (xlator_t *this, inode_t *inode);
+int
+afr_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata);
void
-afr_local_transaction_cleanup (afr_local_t *local, xlator_t *this);
+afr_local_transaction_cleanup(afr_local_t *local, xlator_t *this);
int
-afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd);
+afr_cleanup_fd_ctx(xlator_t *this, fd_t *fd);
+
+#define AFR_STACK_UNWIND(fop, frame, op_ret, op_errno, params...) \
+ do { \
+ afr_local_t *__local = NULL; \
+ xlator_t *__this = NULL; \
+ int32_t __op_ret = 0; \
+ int32_t __op_errno = 0; \
+ \
+ __op_ret = op_ret; \
+ __op_errno = op_errno; \
+ if (frame) { \
+ __local = frame->local; \
+ __this = frame->this; \
+ afr_handle_inconsistent_fop(frame, &__op_ret, &__op_errno); \
+ if (__local && __local->is_read_txn) \
+ afr_pending_read_decrement(__this->private, \
+ __local->read_subvol); \
+ if (__local && __local->xdata_req && \
+ afr_is_lock_mode_mandatory(__local->xdata_req)) \
+ afr_dom_lock_release(frame); \
+ frame->local = NULL; \
+ } \
+ \
+ STACK_UNWIND_STRICT(fop, frame, __op_ret, __op_errno, params); \
+ if (__local) { \
+ afr_local_cleanup(__local, __this); \
+ mem_put(__local); \
+ } \
+ } while (0)
+
+#define AFR_STACK_DESTROY(frame) \
+ do { \
+ afr_local_t *__local = NULL; \
+ xlator_t *__this = NULL; \
+ __local = frame->local; \
+ __this = frame->this; \
+ frame->local = NULL; \
+ STACK_DESTROY(frame->root); \
+ if (__local) { \
+ afr_local_cleanup(__local, __this); \
+ mem_put(__local); \
+ } \
+ } while (0);
+
+#define AFR_FRAME_INIT(frame, op_errno) \
+ ({ \
+ frame->local = mem_get0(THIS->local_pool); \
+ if (afr_local_init(frame->local, frame->this->private, &op_errno)) { \
+ afr_local_cleanup(frame->local, frame->this); \
+ mem_put(frame->local); \
+ frame->local = NULL; \
+ }; \
+ frame->local; \
+ })
+
+#define AFR_STACK_RESET(frame) \
+ do { \
+ afr_local_t *__local = NULL; \
+ xlator_t *__this = NULL; \
+ __local = frame->local; \
+ __this = frame->this; \
+ frame->local = NULL; \
+ int __opr; \
+ STACK_RESET(frame->root); \
+ if (__local) { \
+ afr_local_cleanup(__local, __this); \
+ mem_put(__local); \
+ } \
+ AFR_FRAME_INIT(frame, __opr); \
+ } while (0)
-int
-afr_launch_openfd_self_heal (call_frame_t *frame, xlator_t *this, fd_t *fd);
-
-#define AFR_STACK_UNWIND(fop, frame, params ...) \
- do { \
- afr_local_t *__local = NULL; \
- xlator_t *__this = NULL; \
- if (frame) { \
- __local = frame->local; \
- __this = frame->this; \
- frame->local = NULL; \
- } \
- STACK_UNWIND_STRICT (fop, frame, params); \
- if (__local) { \
- afr_local_cleanup (__local, __this); \
- mem_put (__local); \
- } \
- } while (0)
-
-#define AFR_STACK_DESTROY(frame) \
- do { \
- afr_local_t *__local = NULL; \
- xlator_t *__this = NULL; \
- __local = frame->local; \
- __this = frame->this; \
- frame->local = NULL; \
- STACK_DESTROY (frame->root); \
- if (__local) { \
- afr_local_cleanup (__local, __this); \
- mem_put (__local); \
- } \
- } while (0);
-
-#define AFR_NUM_CHANGE_LOGS 3 /*data + metadata + entry*/
/* allocate and return a string that is the basename of argument */
static inline char *
-AFR_BASENAME (const char *str)
+AFR_BASENAME(const char *str)
{
- char *__tmp_str = NULL;
- char *__basename_str = NULL;
- __tmp_str = gf_strdup (str);
- __basename_str = gf_strdup (basename (__tmp_str));
- GF_FREE (__tmp_str);
- return __basename_str;
+ char *__tmp_str = NULL;
+ char *__basename_str = NULL;
+ __tmp_str = gf_strdup(str);
+ __basename_str = gf_strdup(basename(__tmp_str));
+ GF_FREE(__tmp_str);
+ return __basename_str;
}
+call_frame_t *
+afr_copy_frame(call_frame_t *base);
+
int
-afr_transaction_local_init (afr_local_t *local, xlator_t *this);
+afr_transaction_local_init(afr_local_t *local, xlator_t *this);
int32_t
-afr_marker_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name,afr_local_t *local, afr_private_t *priv );
+afr_marker_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, afr_local_t *local, afr_private_t *priv);
-int32_t *
-afr_children_create (int32_t child_count);
+int
+afr_local_init(afr_local_t *local, afr_private_t *priv, int32_t *op_errno);
int
-afr_local_init (afr_local_t *local, afr_private_t *priv, int32_t *op_errno);
+afr_internal_lock_init(afr_internal_lock_t *lk, size_t child_count);
int
-afr_internal_lock_init (afr_internal_lock_t *lk, size_t child_count,
- transaction_lk_type_t lk_type);
+afr_higher_errno(int32_t old_errno, int32_t new_errno);
int
-afr_first_up_child (unsigned char *child_up, size_t child_count);
+afr_final_errno(afr_local_t *local, afr_private_t *priv);
int
-afr_select_read_child_from_policy (int32_t *fresh_children, int32_t child_count,
- int32_t prev_read_child,
- int32_t config_read_child, int32_t *sources);
+afr_xattr_req_prepare(xlator_t *this, dict_t *xattr_req);
void
-afr_set_read_ctx_from_policy (xlator_t *this, inode_t *inode,
- int32_t *fresh_children, int32_t prev_read_child,
- int32_t config_read_child);
+afr_fix_open(fd_t *fd, xlator_t *this);
-int32_t
-afr_get_call_child (xlator_t *this, unsigned char *child_up, int32_t read_child,
- int32_t *fresh_children,
- int32_t *call_child, int32_t *last_index);
+afr_fd_ctx_t *
+afr_fd_ctx_get(fd_t *fd, xlator_t *this);
-int32_t
-afr_next_call_child (int32_t *fresh_children, unsigned char *child_up,
- size_t child_count, int32_t *last_index,
- int32_t read_child);
void
-afr_get_fresh_children (int32_t *success_children, int32_t *sources,
- int32_t *children, unsigned int child_count);
+afr_set_low_priority(call_frame_t *frame);
+int
+afr_child_fd_ctx_set(xlator_t *this, fd_t *fd, int32_t child, int flags);
+
void
-afr_children_add_child (int32_t *children, int32_t child,
- int32_t child_count);
+afr_matrix_cleanup(int32_t **pending, unsigned int m);
+
+int32_t **
+afr_matrix_create(unsigned int m, unsigned int n);
+
+int **
+afr_mark_pending_changelog(afr_private_t *priv, unsigned char *pending,
+ dict_t *xattr, ia_type_t iat);
+
void
-afr_children_rm_child (int32_t *children, int32_t child,
- int32_t child_count);
+afr_filter_xattrs(dict_t *xattr);
+
+/*
+ * Special value indicating we should use the "auto" quorum method instead of
+ * a fixed value (including zero to turn off quorum enforcement).
+ */
+#define AFR_QUORUM_AUTO INT_MAX
+
+int
+afr_fd_report_unstable_write(xlator_t *this, afr_local_t *local);
+
+gf_boolean_t
+afr_fd_has_witnessed_unstable_write(xlator_t *this, inode_t *inode);
+
+void
+afr_reply_wipe(struct afr_reply *reply);
+
void
-afr_reset_children (int32_t *children, int32_t child_count);
+afr_replies_wipe(struct afr_reply *replies, int count);
+
+gf_boolean_t
+afr_xattrs_are_equal(dict_t *dict1, dict_t *dict2);
+
+gf_boolean_t
+afr_is_xattr_ignorable(char *key);
+
+int
+afr_get_heal_info(call_frame_t *frame, xlator_t *this, loc_t *loc);
+
+int
+afr_heal_splitbrain_file(call_frame_t *frame, xlator_t *this, loc_t *loc);
+
+int
+afr_get_split_brain_status(void *opaque);
+
+int
+afr_get_split_brain_status_cbk(int ret, call_frame_t *frame, void *opaque);
+
+int
+afr_inode_split_brain_choice_set(inode_t *inode, xlator_t *this,
+ int spb_choice);
+int
+afr_split_brain_read_subvol_get(inode_t *inode, xlator_t *this,
+ call_frame_t *frame, int *spb_subvol);
+int
+afr_get_child_index_from_name(xlator_t *this, char *name);
+
+int
+afr_is_split_brain(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ uuid_t gfid, gf_boolean_t *d_spb, gf_boolean_t *m_spb);
+int
+afr_spb_choice_timeout_cancel(xlator_t *this, inode_t *inode);
+
+int
+afr_set_split_brain_choice(int ret, call_frame_t *frame, void *opaque);
+
gf_boolean_t
-afr_error_more_important (int32_t old_errno, int32_t new_errno);
+afr_get_need_heal(xlator_t *this);
+
+void
+afr_set_need_heal(xlator_t *this, afr_local_t *local);
+
int
-afr_errno_count (int32_t *children, int *child_errno,
- unsigned int child_count, int32_t op_errno);
+afr_selfheal_data_open(xlator_t *this, inode_t *inode, fd_t **fd);
+
int
-afr_get_children_count (int32_t *children, unsigned int child_count);
+afr_get_msg_id(char *op_type);
+
+int
+afr_set_in_flight_sb_status(xlator_t *this, call_frame_t *frame,
+ inode_t *inode);
+
+int32_t
+afr_quorum_errno(afr_private_t *priv);
+
gf_boolean_t
-afr_is_child_present (int32_t *success_children, int32_t child_count,
- int32_t child);
+afr_is_consistent_io_possible(afr_local_t *local, afr_private_t *priv,
+ int32_t *op_errno);
void
-afr_update_gfid_from_iatts (uuid_t uuid, struct iatt *bufs,
- int32_t *success_children,
- unsigned int child_count);
+afr_handle_inconsistent_fop(call_frame_t *frame, int32_t *op_ret,
+ int32_t *op_errno);
+
void
-afr_reset_xattr (dict_t **xattr, unsigned int child_count);
-gf_boolean_t
-afr_conflicting_iattrs (struct iatt *bufs, int32_t *success_children,
- unsigned int child_count, const char *path,
- const char *xlator_name);
-unsigned int
-afr_gfid_missing_count (const char *xlator_name, int32_t *children,
- struct iatt *bufs, unsigned int child_count,
- const char *path);
+afr_inode_write_fill(call_frame_t *frame, xlator_t *this, int child_index,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
void
-afr_xattr_req_prepare (xlator_t *this, dict_t *xattr_req, const char *path);
+afr_process_post_writev(call_frame_t *frame, xlator_t *this);
+
void
-afr_children_copy (int32_t *dst, int32_t *src, unsigned int child_count);
-afr_transaction_type
-afr_transaction_type_get (ia_type_t ia_type);
-int32_t
-afr_resultant_errno_get (int32_t *children,
- int *child_errno, unsigned int child_count);
+afr_writev_unwind(call_frame_t *frame, xlator_t *this);
+
void
-afr_inode_rm_stale_children (xlator_t *this, inode_t *inode,
- int32_t *stale_children);
+afr_writev_copy_outvars(call_frame_t *src_frame, call_frame_t *dst_frame);
+
void
-afr_launch_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode,
- gf_boolean_t background, ia_type_t ia_type, char *reason,
- void (*gfid_sh_success_cbk) (call_frame_t *sh_frame,
- xlator_t *this),
- int (*unwind) (call_frame_t *frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- int32_t sh_failed));
+afr_update_uninodelk(afr_local_t *local, afr_internal_lock_t *int_lock,
+ int32_t child_index);
+afr_fd_ctx_t *
+__afr_fd_ctx_get(fd_t *fd, xlator_t *this);
+
+gf_boolean_t
+afr_is_inode_refresh_reqd(inode_t *inode, xlator_t *this, int event_gen1,
+ int event_gen2);
+
+int
+afr_serialize_xattrs_with_delimiter(call_frame_t *frame, xlator_t *this,
+ char *buf, const char *default_str,
+ int32_t *serz_len, char delimiter);
+gf_boolean_t
+afr_is_symmetric_error(call_frame_t *frame, xlator_t *this);
+
int
-afr_fix_open (call_frame_t *frame, xlator_t *this, afr_fd_ctx_t *fd_ctx,
- int need_open_count, int *need_open);
+__afr_inode_ctx_get(xlator_t *this, inode_t *inode, afr_inode_ctx_t **ctx);
+
+uint64_t
+afr_write_subvol_get(call_frame_t *frame, xlator_t *this);
+
int
-afr_open_fd_fix (call_frame_t *frame, xlator_t *this, gf_boolean_t pause_fop);
+afr_write_subvol_set(call_frame_t *frame, xlator_t *this);
+
int
-afr_set_elem_count_get (unsigned char *elems, int child_count);
+afr_write_subvol_reset(call_frame_t *frame, xlator_t *this);
-afr_fd_ctx_t *
-afr_fd_ctx_get (fd_t *fd, xlator_t *this);
+int
+afr_set_inode_local(xlator_t *this, afr_local_t *local, inode_t *inode);
+
+int
+afr_fill_ta_loc(xlator_t *this, loc_t *loc, gf_boolean_t is_gfid_based_fop);
+
+int
+afr_ta_post_op_lock(xlator_t *this, loc_t *loc);
+
+int
+afr_ta_post_op_unlock(xlator_t *this, loc_t *loc);
gf_boolean_t
-afr_open_only_data_self_heal (char *data_self_heal);
+afr_is_pending_set(xlator_t *this, dict_t *xdata, int type);
+
+int
+__afr_get_up_children_count(afr_private_t *priv);
+
+call_frame_t *
+afr_ta_frame_create(xlator_t *this);
gf_boolean_t
-afr_data_self_heal_enabled (char *data_self_heal);
+afr_ta_has_quorum(afr_private_t *priv, afr_local_t *local);
void
-afr_set_low_priority (call_frame_t *frame);
-int
-afr_child_fd_ctx_set (xlator_t *this, fd_t *fd, int32_t child,
- int flags);
+afr_ta_lock_release_synctask(xlator_t *this);
+
+void
+afr_ta_locked_priv_invalidate(afr_private_t *priv);
gf_boolean_t
-afr_have_quorum (char *logname, afr_private_t *priv);
+afr_lookup_has_quorum(call_frame_t *frame,
+ const unsigned int up_children_count);
void
-afr_matrix_cleanup (int32_t **pending, unsigned int m);
+afr_mark_new_entry_changelog(call_frame_t *frame, xlator_t *this);
-int32_t**
-afr_matrix_create (unsigned int m, unsigned int n);
-/*
- * Special value indicating we should use the "auto" quorum method instead of
- * a fixed value (including zero to turn off quorum enforcement).
- */
-#define AFR_QUORUM_AUTO INT_MAX
+void
+afr_handle_replies_quorum(call_frame_t *frame, xlator_t *this);
-/*
- * Having this as a macro will make debugging a bit weirder, but does reduce
- * the probability of functions handling this check inconsistently.
- */
-#define QUORUM_CHECK(_func,_label) do { \
- if (priv->quorum_count && !afr_have_quorum(this->name,priv)) { \
- gf_log(this->name,GF_LOG_WARNING, \
- "failing "#_func" due to lack of quorum"); \
- op_errno = EROFS; \
- goto _label; \
- } \
-} while (0);
+gf_boolean_t
+afr_ta_dict_contains_pending_xattr(dict_t *dict, afr_private_t *priv,
+ int child);
+void
+afr_selfheal_childup(xlator_t *this, afr_private_t *priv);
+
+gf_boolean_t
+afr_is_lock_mode_mandatory(dict_t *xdata);
+
+void
+afr_dom_lock_release(call_frame_t *frame);
+
+void
+afr_fill_success_replies(afr_local_t *local, afr_private_t *priv,
+ unsigned char *replies);
+
+gf_boolean_t
+afr_is_private_directory(afr_private_t *priv, uuid_t pargfid, const char *name,
+ pid_t pid);
#endif /* __AFR_H__ */
diff --git a/xlators/cluster/afr/src/pump.c b/xlators/cluster/afr/src/pump.c
deleted file mode 100644
index eb3f8478928..00000000000
--- a/xlators/cluster/afr/src/pump.c
+++ /dev/null
@@ -1,2636 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#include <unistd.h>
-#include <sys/time.h>
-#include <stdlib.h>
-#include <fnmatch.h>
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "afr-common.c"
-#include "defaults.c"
-#include "glusterfs.h"
-
-static uint64_t pump_pid = 0;
-static inline void
-pump_fill_loc_info (loc_t *loc, struct iatt *iatt, struct iatt *parent)
-{
- afr_update_loc_gfids (loc, iatt, parent);
- uuid_copy (loc->inode->gfid, iatt->ia_gfid);
-}
-
-static int
-pump_mark_start_pending (xlator_t *this)
-{
- afr_private_t *priv = NULL;
- pump_private_t *pump_priv = NULL;
-
- priv = this->private;
- pump_priv = priv->pump_private;
-
- pump_priv->pump_start_pending = 1;
-
- return 0;
-}
-
-static int
-is_pump_start_pending (xlator_t *this)
-{
- afr_private_t *priv = NULL;
- pump_private_t *pump_priv = NULL;
-
- priv = this->private;
- pump_priv = priv->pump_private;
-
- return (pump_priv->pump_start_pending);
-}
-
-static int
-pump_remove_start_pending (xlator_t *this)
-{
- afr_private_t *priv = NULL;
- pump_private_t *pump_priv = NULL;
-
- priv = this->private;
- pump_priv = priv->pump_private;
-
- pump_priv->pump_start_pending = 0;
-
- return 0;
-}
-
-static pump_state_t
-pump_get_state ()
-{
- xlator_t *this = NULL;
- afr_private_t *priv = NULL;
- pump_private_t *pump_priv = NULL;
-
- pump_state_t ret;
-
- this = THIS;
- priv = this->private;
- pump_priv = priv->pump_private;
-
- LOCK (&pump_priv->pump_state_lock);
- {
- ret = pump_priv->pump_state;
- }
- UNLOCK (&pump_priv->pump_state_lock);
-
- return ret;
-}
-
-int
-pump_change_state (xlator_t *this, pump_state_t state)
-{
- afr_private_t *priv = NULL;
- pump_private_t *pump_priv = NULL;
-
- pump_state_t state_old;
- pump_state_t state_new;
-
-
- priv = this->private;
- pump_priv = priv->pump_private;
-
- GF_ASSERT (pump_priv);
-
- LOCK (&pump_priv->pump_state_lock);
- {
- state_old = pump_priv->pump_state;
- state_new = state;
-
- pump_priv->pump_state = state;
-
- }
- UNLOCK (&pump_priv->pump_state_lock);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "Pump changing state from %d to %d",
- state_old,
- state_new);
-
- return 0;
-}
-
-static int
-pump_set_resume_path (xlator_t *this, const char *path)
-{
- int ret = 0;
-
- afr_private_t *priv = NULL;
- pump_private_t *pump_priv = NULL;
-
- priv = this->private;
- pump_priv = priv->pump_private;
-
- GF_ASSERT (pump_priv);
-
- LOCK (&pump_priv->resume_path_lock);
- {
- strncpy (pump_priv->resume_path, path, strlen (path) + 1);
- }
- UNLOCK (&pump_priv->resume_path_lock);
-
- return ret;
-}
-
-static int
-pump_save_path (xlator_t *this, const char *path)
-{
- afr_private_t *priv = NULL;
- pump_state_t state;
- dict_t *dict = NULL;
- loc_t loc = {0};
- int dict_ret = 0;
- int ret = -1;
-
- state = pump_get_state ();
- if (state == PUMP_STATE_RESUME)
- return 0;
-
- priv = this->private;
-
- GF_ASSERT (priv->root_inode);
-
- afr_build_root_loc (this, &loc);
-
- dict = dict_new ();
- dict_ret = dict_set_str (dict, PUMP_PATH, (char *)path);
- if (dict_ret)
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to set the key %s", path, PUMP_PATH);
-
- ret = syncop_setxattr (PUMP_SOURCE_CHILD (this), &loc, dict, 0);
-
- if (ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "setxattr failed - could not save path=%s", path);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "setxattr succeeded - saved path=%s", path);
- }
-
- dict_unref (dict);
-
- loc_wipe (&loc);
- return 0;
-}
-
-static int
-pump_check_and_update_status (xlator_t *this)
-{
- pump_state_t state;
- int ret = -1;
-
- state = pump_get_state ();
-
- switch (state) {
-
- case PUMP_STATE_RESUME:
- case PUMP_STATE_RUNNING:
- {
- ret = 0;
- break;
- }
- case PUMP_STATE_PAUSE:
- {
- ret = -1;
- break;
- }
- case PUMP_STATE_ABORT:
- {
- pump_save_path (this, "/");
- ret = -1;
- break;
- }
- default:
- {
- gf_log (this->name, GF_LOG_DEBUG,
- "Unknown pump state");
- ret = -1;
- break;
- }
-
- }
-
- return ret;
-}
-
-static const char *
-pump_get_resume_path (xlator_t *this)
-{
- afr_private_t *priv = NULL;
- pump_private_t *pump_priv = NULL;
-
- const char *resume_path = NULL;
-
- priv = this->private;
- pump_priv = priv->pump_private;
-
- resume_path = pump_priv->resume_path;
-
- return resume_path;
-}
-
-static int
-pump_update_resume_state (xlator_t *this, const char *path)
-{
- pump_state_t state;
- const char *resume_path = NULL;
-
- state = pump_get_state ();
-
- if (state == PUMP_STATE_RESUME) {
- resume_path = pump_get_resume_path (this);
- if (strcmp (resume_path, "/") == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Reached the resume path (/). Proceeding to change state"
- " to running");
- pump_change_state (this, PUMP_STATE_RUNNING);
- } else if (strcmp (resume_path, path) == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Reached the resume path. Proceeding to change state"
- " to running");
- pump_change_state (this, PUMP_STATE_RUNNING);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "Not yet hit the resume path:res-path=%s,path=%s",
- resume_path, path);
- }
- }
-
- return 0;
-}
-
-static gf_boolean_t
-is_pump_traversal_allowed (xlator_t *this, const char *path)
-{
- pump_state_t state;
- const char *resume_path = NULL;
- gf_boolean_t ret = _gf_true;
-
- state = pump_get_state ();
-
- if (state == PUMP_STATE_RESUME) {
- resume_path = pump_get_resume_path (this);
- if (strstr (resume_path, path)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "On the right path to resumption path");
- ret = _gf_true;
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "Not the right path to resuming=> ignoring traverse");
- ret = _gf_false;
- }
- }
-
- return ret;
-}
-
-static int
-pump_save_file_stats (xlator_t *this, const char *path)
-{
- afr_private_t *priv = NULL;
- pump_private_t *pump_priv = NULL;
-
- priv = this->private;
- pump_priv = priv->pump_private;
-
- LOCK (&pump_priv->resume_path_lock);
- {
- pump_priv->number_files_pumped++;
-
- strncpy (pump_priv->current_file, path,
- PATH_MAX);
- }
- UNLOCK (&pump_priv->resume_path_lock);
-
- return 0;
-}
-
-static int
-gf_pump_traverse_directory (loc_t *loc)
-{
- xlator_t *this = NULL;
- fd_t *fd = NULL;
- off_t offset = 0;
- loc_t entry_loc = {0};
- gf_dirent_t *entry = NULL;
- gf_dirent_t *tmp = NULL;
- gf_dirent_t entries;
- struct iatt iatt = {0};
- struct iatt parent = {0};
- dict_t *xattr_rsp = NULL;
- int ret = 0;
- gf_boolean_t is_directory_empty = _gf_true;
- gf_boolean_t free_entries = _gf_false;
-
- INIT_LIST_HEAD (&entries.list);
- this = THIS;
-
- GF_ASSERT (loc->inode);
-
- fd = fd_create (loc->inode, pump_pid);
- if (!fd) {
- gf_log (this->name, GF_LOG_ERROR,
- "Failed to create fd for %s", loc->path);
- goto out;
- }
-
- ret = syncop_opendir (this, loc, fd);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "opendir failed on %s", loc->path);
- goto out;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "pump opendir on %s returned=%d",
- loc->path, ret);
-
- while (syncop_readdirp (this, fd, 131072, offset, NULL, &entries)) {
- free_entries = _gf_true;
-
- if (list_empty (&entries.list)) {
- gf_log (this->name, GF_LOG_TRACE,
- "no more entries in directory");
- goto out;
- }
-
- list_for_each_entry_safe (entry, tmp, &entries.list, list) {
- gf_log (this->name, GF_LOG_DEBUG,
- "found readdir entry=%s", entry->d_name);
-
- offset = entry->d_off;
- if (uuid_is_null (entry->d_stat.ia_gfid)) {
- gf_log (this->name, GF_LOG_WARNING, "%s/%s: No "
- "gfid present skipping",
- loc->path, entry->d_name);
- continue;
- }
- loc_wipe (&entry_loc);
- ret = afr_build_child_loc (this, &entry_loc, loc,
- entry->d_name);
- if (ret)
- goto out;
-
- if (!IS_ENTRY_CWD (entry->d_name) &&
- !IS_ENTRY_PARENT (entry->d_name)) {
-
- is_directory_empty = _gf_false;
- gf_log (this->name, GF_LOG_DEBUG,
- "lookup %s => %"PRId64,
- entry_loc.path,
- iatt.ia_ino);
-
- ret = syncop_lookup (this, &entry_loc, NULL,
- &iatt, &xattr_rsp, &parent);
-
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: lookup failed",
- entry_loc.path);
- continue;
- }
- pump_fill_loc_info (&entry_loc, &iatt,
- &parent);
-
- pump_update_resume_state (this, entry_loc.path);
-
- pump_save_path (this, entry_loc.path);
- pump_save_file_stats (this, entry_loc.path);
-
- ret = pump_check_and_update_status (this);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Pump beginning to exit out");
- goto out;
- }
-
- if (IA_ISDIR (iatt.ia_type)) {
- if (is_pump_traversal_allowed (this, entry_loc.path)) {
- gf_log (this->name, GF_LOG_TRACE,
- "entering dir=%s",
- entry->d_name);
- gf_pump_traverse_directory (&entry_loc);
- }
- }
- }
- }
-
- gf_dirent_free (&entries);
- free_entries = _gf_false;
- gf_log (this->name, GF_LOG_TRACE,
- "offset incremented to %d",
- (int32_t ) offset);
-
- }
-
- ret = syncop_close (fd);
- if (ret < 0)
- gf_log (this->name, GF_LOG_DEBUG, "closing the fd failed");
-
- if (is_directory_empty && IS_ROOT_PATH (loc->path)) {
- pump_change_state (this, PUMP_STATE_RUNNING);
- gf_log (this->name, GF_LOG_INFO, "Empty source brick. "
- "Nothing to be done.");
- }
-
-out:
- if (entry_loc.path)
- loc_wipe (&entry_loc);
- if (free_entries)
- gf_dirent_free (&entries);
- return 0;
-}
-
-static int
-pump_update_resume_path (xlator_t *this)
-{
- const char *resume_path = NULL;
-
- resume_path = pump_get_resume_path (this);
-
- if (resume_path) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Found a path to resume from: %s",
- resume_path);
-
- }else {
- gf_log (this->name, GF_LOG_DEBUG,
- "Did not find a path=> setting to '/'");
- pump_set_resume_path (this, "/");
- }
-
- pump_change_state (this, PUMP_STATE_RESUME);
-
- return 0;
-}
-
-static int32_t
-pump_xattr_cleaner (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- loc_t loc = {0};
- int i = 0;
- int ret = 0;
- int source = 0;
- int sink = 1;
-
- priv = this->private;
-
- afr_build_root_loc (this, &loc);
-
- ret = syncop_removexattr (priv->children[source], &loc,
- PUMP_PATH);
-
- ret = syncop_removexattr (priv->children[sink], &loc,
- PUMP_SINK_COMPLETE);
-
- for (i = 0; i < priv->child_count; i++) {
- ret = syncop_removexattr (priv->children[i], &loc,
- PUMP_SOURCE_COMPLETE);
- if (ret)
- gf_log (this->name, GF_LOG_DEBUG, "removexattr "
- "failed with %s", strerror (errno));
- }
-
- loc_wipe (&loc);
- return pump_command_reply (frame, this);
-}
-
-static int
-pump_complete_migration (xlator_t *this)
-{
- afr_private_t *priv = NULL;
- pump_private_t *pump_priv = NULL;
- dict_t *dict = NULL;
- pump_state_t state;
- loc_t loc = {0};
- int dict_ret = 0;
- int ret = -1;
-
- priv = this->private;
- pump_priv = priv->pump_private;
-
- GF_ASSERT (priv->root_inode);
-
- afr_build_root_loc (this, &loc);
-
- dict = dict_new ();
-
- state = pump_get_state ();
- if (state == PUMP_STATE_RUNNING) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Pump finished pumping");
-
- pump_priv->pump_finished = _gf_true;
-
- dict_ret = dict_set_str (dict, PUMP_SOURCE_COMPLETE, "jargon");
- if (dict_ret)
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to set the key %s",
- loc.path, PUMP_SOURCE_COMPLETE);
-
- ret = syncop_setxattr (PUMP_SOURCE_CHILD (this), &loc, dict, 0);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "setxattr failed - while notifying source complete");
- }
- dict_ret = dict_set_str (dict, PUMP_SINK_COMPLETE, "jargon");
- if (dict_ret)
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to set the key %s",
- loc.path, PUMP_SINK_COMPLETE);
-
- ret = syncop_setxattr (PUMP_SINK_CHILD (this), &loc, dict, 0);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "setxattr failed - while notifying sink complete");
- }
-
- pump_save_path (this, "/");
-
- } else if (state == PUMP_STATE_ABORT) {
- gf_log (this->name, GF_LOG_DEBUG, "Starting cleanup "
- "of pump internal xattrs");
- call_resume (pump_priv->cleaner);
- }
-
- loc_wipe (&loc);
- return 0;
-}
-
-static int
-pump_lookup_sink (loc_t *loc)
-{
- xlator_t *this = NULL;
- struct iatt iatt, parent;
- dict_t *xattr_rsp;
- dict_t *xattr_req = NULL;
- int ret = 0;
-
- this = THIS;
-
- xattr_req = dict_new ();
-
- ret = afr_set_root_gfid (xattr_req);
- if (ret)
- goto out;
-
- ret = syncop_lookup (PUMP_SINK_CHILD (this), loc,
- xattr_req, &iatt, &xattr_rsp, &parent);
-
- if (ret) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Lookup on sink child failed");
- goto out;
- }
-
-out:
- if (xattr_req)
- dict_unref (xattr_req);
-
- return ret;
-}
-
-static int
-pump_task (void *data)
-{
- xlator_t *this = NULL;
- afr_private_t *priv = NULL;
-
-
- loc_t loc = {0};
- struct iatt iatt, parent;
- dict_t *xattr_rsp = NULL;
- dict_t *xattr_req = NULL;
-
- int ret = -1;
-
- this = THIS;
- priv = this->private;
-
- GF_ASSERT (priv->root_inode);
-
- afr_build_root_loc (this, &loc);
- xattr_req = dict_new ();
- if (!xattr_req) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Out of memory");
- ret = -1;
- goto out;
- }
-
- afr_set_root_gfid (xattr_req);
- ret = syncop_lookup (this, &loc, xattr_req,
- &iatt, &xattr_rsp, &parent);
-
- gf_log (this->name, GF_LOG_TRACE,
- "lookup: path=%s gfid=%s",
- loc.path, uuid_utoa (loc.inode->gfid));
-
- ret = pump_check_and_update_status (this);
- if (ret < 0) {
- goto out;
- }
-
- pump_update_resume_path (this);
-
- afr_set_root_gfid (xattr_req);
- ret = pump_lookup_sink (&loc);
- if (ret) {
- pump_update_resume_path (this);
- goto out;
- }
-
- gf_pump_traverse_directory (&loc);
-
- pump_complete_migration (this);
-out:
- if (xattr_req)
- dict_unref (xattr_req);
-
- loc_wipe (&loc);
- return 0;
-}
-
-
-static int
-pump_task_completion (int ret, call_frame_t *sync_frame, void *data)
-{
- xlator_t *this = NULL;
- afr_private_t *priv = NULL;
-
- this = THIS;
-
- priv = this->private;
-
- inode_unref (priv->root_inode);
- STACK_DESTROY (sync_frame->root);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "Pump xlator exiting");
- return 0;
-}
-
-int
-pump_start (call_frame_t *pump_frame, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- pump_private_t *pump_priv = NULL;
-
- int ret = -1;
-
- priv = this->private;
- pump_priv = priv->pump_private;
-
- afr_set_lk_owner (pump_frame, this, pump_frame->root);
- pump_pid = (uint64_t) (unsigned long)pump_frame->root;
-
- ret = synctask_new (pump_priv->env, pump_task,
- pump_task_completion,
- pump_frame, NULL);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "starting pump failed");
- pump_change_state (this, PUMP_STATE_ABORT);
- goto out;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "setting pump as started lk_owner: %s %"PRIu64,
- lkowner_utoa (&pump_frame->root->lk_owner), pump_pid);
-
- priv->use_afr_in_pump = 1;
-out:
- return ret;
-}
-
-static int
-pump_start_synctask (xlator_t *this)
-{
- call_frame_t *frame = NULL;
- int ret = 0;
-
- frame = create_frame (this, this->ctx->pool);
- if (!frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- ret = -1;
- goto out;
- }
-
- pump_change_state (this, PUMP_STATE_RUNNING);
-
- ret = pump_start (frame, this);
-
-out:
- return ret;
-}
-
-int32_t
-pump_cmd_start_setxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
-
-{
- call_frame_t *prev = NULL;
- afr_local_t *local = NULL;
- int ret = 0;
-
- local = frame->local;
-
- if (op_ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "Could not initiate destination "
- "brick connect");
- ret = op_ret;
- goto out;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "Successfully initiated destination "
- "brick connect");
-
- pump_mark_start_pending (this);
-
- /* send the PARENT_UP as pump is ready now */
- prev = cookie;
- if (prev && prev->this)
- prev->this->notify (prev->this, GF_EVENT_PARENT_UP, this);
-
-out:
- local->op_ret = ret;
- pump_command_reply (frame, this);
-
- return 0;
-}
-
-static int
-pump_initiate_sink_connect (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- dict_t *dict = NULL;
- data_t *data = NULL;
- char *clnt_cmd = NULL;
- loc_t loc = {0};
-
- int ret = 0;
-
- priv = this->private;
- local = frame->local;
-
- GF_ASSERT (priv->root_inode);
-
- afr_build_root_loc (this, &loc);
-
- data = data_ref (dict_get (local->dict, RB_PUMP_CMD_START));
- if (!data) {
- ret = -1;
- gf_log (this->name, GF_LOG_ERROR,
- "Could not get destination brick value");
- goto out;
- }
-
- dict = dict_new ();
- if (!dict) {
- ret = -1;
- goto out;
- }
-
- clnt_cmd = GF_CALLOC (1, data->len+1, gf_common_mt_char);
- if (!clnt_cmd) {
- ret = -1;
- goto out;
- }
-
- memcpy (clnt_cmd, data->data, data->len);
- clnt_cmd[data->len] = '\0';
- gf_log (this->name, GF_LOG_DEBUG, "Got destination brick %s\n",
- clnt_cmd);
-
- ret = dict_set_dynstr (dict, CLIENT_CMD_CONNECT, clnt_cmd);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "Could not inititiate destination brick "
- "connect");
- goto out;
- }
-
- STACK_WIND (frame,
- pump_cmd_start_setxattr_cbk,
- PUMP_SINK_CHILD(this),
- PUMP_SINK_CHILD(this)->fops->setxattr,
- &loc,
- dict,
- 0, NULL);
-
- ret = 0;
-
-out:
- if (dict)
- dict_unref (dict);
-
- if (data)
- data_unref (data);
-
- if (ret && clnt_cmd)
- GF_FREE (clnt_cmd);
-
- loc_wipe (&loc);
- return ret;
-}
-
-static int
-is_pump_aborted (xlator_t *this)
-{
- pump_state_t state;
-
- state = pump_get_state ();
-
- return ((state == PUMP_STATE_ABORT));
-}
-
-int32_t
-pump_cmd_start_getxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict, dict_t *xdata)
-{
- afr_local_t *local = NULL;
- char *path = NULL;
-
- pump_state_t state;
- int ret = 0;
- int need_unwind = 0;
- int dict_ret = -1;
-
- local = frame->local;
-
- if (op_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "getxattr failed - changing pump "
- "state to RUNNING with '/'");
- path = "/";
- ret = op_ret;
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "getxattr succeeded");
-
- dict_ret = dict_get_str (dict, PUMP_PATH, &path);
- if (dict_ret < 0)
- path = "/";
- }
-
- state = pump_get_state ();
- if ((state == PUMP_STATE_RUNNING) ||
- (state == PUMP_STATE_RESUME)) {
- gf_log (this->name, GF_LOG_ERROR,
- "Pump is already started");
- ret = -1;
- goto out;
- }
-
- pump_set_resume_path (this, path);
-
- if (is_pump_aborted (this))
- /* We're re-starting pump afresh */
- ret = pump_initiate_sink_connect (frame, this);
- else {
- /* We're re-starting pump from a previous
- pause */
- gf_log (this->name, GF_LOG_DEBUG,
- "about to start synctask");
- ret = pump_start_synctask (this);
- need_unwind = 1;
- }
-
-out:
- if ((ret < 0) || (need_unwind == 1)) {
- local->op_ret = ret;
- pump_command_reply (frame, this);
- }
- return 0;
-}
-
-int
-pump_execute_status (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- pump_private_t *pump_priv = NULL;
-
- uint64_t number_files = 0;
-
- char filename[PATH_MAX];
- char *dict_str = NULL;
-
- int32_t op_ret = 0;
- int32_t op_errno = 0;
-
- dict_t *dict = NULL;
- int ret = -1;
-
- priv = this->private;
- pump_priv = priv->pump_private;
-
- LOCK (&pump_priv->resume_path_lock);
- {
- number_files = pump_priv->number_files_pumped;
- strncpy (filename, pump_priv->current_file, PATH_MAX);
- }
- UNLOCK (&pump_priv->resume_path_lock);
-
- dict_str = GF_CALLOC (1, PATH_MAX + 256, gf_afr_mt_char);
- if (!dict_str) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- op_ret = -1;
- op_errno = ENOMEM;
- goto out;
- }
-
- if (pump_priv->pump_finished) {
- snprintf (dict_str, PATH_MAX + 256, "Number of files migrated = %"PRIu64" Migration complete ",
- number_files);
- } else {
- snprintf (dict_str, PATH_MAX + 256, "Number of files migrated = %"PRIu64" Current file= %s ",
- number_files, filename);
- }
-
- dict = dict_new ();
-
- ret = dict_set_dynstr (dict, RB_PUMP_CMD_STATUS, dict_str);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "dict_set_dynstr returned negative value");
- } else {
- dict_str = NULL;
- }
-
- op_ret = 0;
-
-out:
-
- AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, NULL);
-
- if (dict)
- dict_unref (dict);
-
- if (dict_str)
- GF_FREE (dict_str);
-
- return 0;
-}
-
-int
-pump_execute_pause (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- pump_change_state (this, PUMP_STATE_PAUSE);
-
- local->op_ret = 0;
- pump_command_reply (frame, this);
-
- return 0;
-}
-
-int
-pump_execute_start (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
-
- int ret = 0;
- loc_t loc = {0};
-
- priv = this->private;
- local = frame->local;
-
- if (!priv->root_inode) {
- gf_log (this->name, GF_LOG_ERROR,
- "Pump xlator cannot be started without an initial "
- "lookup");
- ret = -1;
- goto out;
- }
-
- GF_ASSERT (priv->root_inode);
-
- afr_build_root_loc (this, &loc);
-
- STACK_WIND (frame,
- pump_cmd_start_getxattr_cbk,
- PUMP_SOURCE_CHILD(this),
- PUMP_SOURCE_CHILD(this)->fops->getxattr,
- &loc,
- PUMP_PATH, NULL);
-
- ret = 0;
-
-out:
- if (ret < 0) {
- local->op_ret = ret;
- pump_command_reply (frame, this);
- }
-
- loc_wipe (&loc);
- return 0;
-}
-
-static int
-pump_cleanup_helper (void *data) {
- call_frame_t *frame = data;
-
- pump_xattr_cleaner (frame, 0, frame->this, 0, 0, NULL);
-
- return 0;
-}
-
-static int
-pump_cleanup_done (int ret, call_frame_t *sync_frame, void *data)
-{
- STACK_DESTROY (sync_frame->root);
-
- return 0;
-}
-
-int
-pump_execute_commit (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- pump_private_t *pump_priv = NULL;
- afr_local_t *local = NULL;
- call_frame_t *sync_frame = NULL;
- int ret = 0;
-
- priv = this->private;
- pump_priv = priv->pump_private;
- local = frame->local;
-
- local->op_ret = 0;
- if (pump_priv->pump_finished) {
- pump_change_state (this, PUMP_STATE_COMMIT);
- sync_frame = create_frame (this, this->ctx->pool);
- ret = synctask_new (pump_priv->env, pump_cleanup_helper,
- pump_cleanup_done, sync_frame, frame);
- if (ret) {
- gf_log (this->name, GF_LOG_DEBUG, "Couldn't create "
- "synctask for cleaning up xattrs.");
- }
-
- } else {
- gf_log (this->name, GF_LOG_ERROR, "Commit can't proceed. "
- "Migration in progress");
- local->op_ret = -1;
- local->op_errno = EINPROGRESS;
- pump_command_reply (frame, this);
- }
-
- return 0;
-}
-int
-pump_execute_abort (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- pump_private_t *pump_priv = NULL;
- afr_local_t *local = NULL;
- call_frame_t *sync_frame = NULL;
- int ret = 0;
-
- priv = this->private;
- pump_priv = priv->pump_private;
- local = frame->local;
-
- pump_change_state (this, PUMP_STATE_ABORT);
-
- LOCK (&pump_priv->resume_path_lock);
- {
- pump_priv->number_files_pumped = 0;
- pump_priv->current_file[0] = '\0';
- }
- UNLOCK (&pump_priv->resume_path_lock);
-
- local->op_ret = 0;
- if (pump_priv->pump_finished) {
- sync_frame = create_frame (this, this->ctx->pool);
- ret = synctask_new (pump_priv->env, pump_cleanup_helper,
- pump_cleanup_done, sync_frame, frame);
- if (ret) {
- gf_log (this->name, GF_LOG_DEBUG, "Couldn't create "
- "synctask for cleaning up xattrs.");
- }
-
- } else {
- pump_priv->cleaner = fop_setxattr_cbk_stub (frame,
- pump_xattr_cleaner,
- 0, 0, NULL);
- }
-
- return 0;
-}
-
-gf_boolean_t
-pump_command_status (xlator_t *this, dict_t *dict)
-{
- char *cmd = NULL;
- int dict_ret = -1;
- int ret = _gf_true;
-
- dict_ret = dict_get_str (dict, RB_PUMP_CMD_STATUS, &cmd);
- if (dict_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Not a pump status command");
- ret = _gf_false;
- goto out;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "Hit a pump command - status");
- ret = _gf_true;
-
-out:
- return ret;
-
-}
-
-gf_boolean_t
-pump_command_pause (xlator_t *this, dict_t *dict)
-{
- char *cmd = NULL;
- int dict_ret = -1;
- int ret = _gf_true;
-
- dict_ret = dict_get_str (dict, RB_PUMP_CMD_PAUSE, &cmd);
- if (dict_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Not a pump pause command");
- ret = _gf_false;
- goto out;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "Hit a pump command - pause");
- ret = _gf_true;
-
-out:
- return ret;
-
-}
-
-gf_boolean_t
-pump_command_commit (xlator_t *this, dict_t *dict)
-{
- char *cmd = NULL;
- int dict_ret = -1;
- int ret = _gf_true;
-
- dict_ret = dict_get_str (dict, RB_PUMP_CMD_COMMIT, &cmd);
- if (dict_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Not a pump commit command");
- ret = _gf_false;
- goto out;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "Hit a pump command - commit");
- ret = _gf_true;
-
-out:
- return ret;
-
-}
-
-gf_boolean_t
-pump_command_abort (xlator_t *this, dict_t *dict)
-{
- char *cmd = NULL;
- int dict_ret = -1;
- int ret = _gf_true;
-
- dict_ret = dict_get_str (dict, RB_PUMP_CMD_ABORT, &cmd);
- if (dict_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Not a pump abort command");
- ret = _gf_false;
- goto out;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "Hit a pump command - abort");
- ret = _gf_true;
-
-out:
- return ret;
-
-}
-
-gf_boolean_t
-pump_command_start (xlator_t *this, dict_t *dict)
-{
- char *cmd = NULL;
- int dict_ret = -1;
- int ret = _gf_true;
-
- dict_ret = dict_get_str (dict, RB_PUMP_CMD_START, &cmd);
- if (dict_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Not a pump start command");
- ret = _gf_false;
- goto out;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "Hit a pump command - start");
- ret = _gf_true;
-
-out:
- return ret;
-
-}
-
-struct _xattr_key {
- char *key;
- struct list_head list;
-};
-
-static void
-__gather_xattr_keys (dict_t *dict, char *key, data_t *value,
- void *data)
-{
- struct list_head * list = data;
- struct _xattr_key * xkey = NULL;
-
- if (!strncmp (key, AFR_XATTR_PREFIX,
- strlen (AFR_XATTR_PREFIX))) {
-
- xkey = GF_CALLOC (1, sizeof (*xkey), gf_afr_mt_xattr_key);
- if (!xkey)
- return;
-
- xkey->key = key;
- INIT_LIST_HEAD (&xkey->list);
-
- list_add_tail (&xkey->list, list);
- }
-}
-
-static void
-__filter_xattrs (dict_t *dict)
-{
- struct list_head keys;
-
- struct _xattr_key *key;
- struct _xattr_key *tmp;
-
- INIT_LIST_HEAD (&keys);
-
- dict_foreach (dict, __gather_xattr_keys,
- (void *) &keys);
-
- list_for_each_entry_safe (key, tmp, &keys, list) {
- dict_del (dict, key->key);
-
- list_del_init (&key->list);
-
- GF_FREE (key);
- }
-}
-
-int32_t
-pump_getxattr_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *dict, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- xlator_t **children = NULL;
- int unwind = 1;
- int32_t *last_index = NULL;
- int32_t next_call_child = -1;
- int32_t read_child = -1;
- int32_t *fresh_children = NULL;
-
-
- priv = this->private;
- children = priv->children;
-
- local = frame->local;
-
- read_child = (long) cookie;
-
- if (op_ret == -1) {
- last_index = &local->cont.getxattr.last_index;
- fresh_children = local->fresh_children;
- next_call_child = afr_next_call_child (fresh_children,
- local->child_up,
- priv->child_count,
- last_index, read_child);
- if (next_call_child < 0)
- goto out;
-
- unwind = 0;
- STACK_WIND_COOKIE (frame, pump_getxattr_cbk,
- (void *) (long) read_child,
- children[next_call_child],
- children[next_call_child]->fops->getxattr,
- &local->loc,
- local->cont.getxattr.name, NULL);
- }
-
-out:
- if (unwind) {
- if (op_ret >= 0 && dict)
- __filter_xattrs (dict);
-
- AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, NULL);
- }
-
- return 0;
-}
-
-int32_t
-pump_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name, dict_t *xdata)
-{
- afr_private_t * priv = NULL;
- xlator_t ** children = NULL;
- int call_child = 0;
- afr_local_t *local = NULL;
- int32_t ret = -1;
- int32_t op_errno = 0;
- uint64_t read_child = 0;
-
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
- VALIDATE_OR_GOTO (priv->children, out);
-
- children = priv->children;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame, default_getxattr_cbk,
- FIRST_CHILD (this),
- (FIRST_CHILD (this))->fops->getxattr,
- loc, name, xdata);
- return 0;
- }
-
-
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
-
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
-
- if (name) {
- if (!strncmp (name, AFR_XATTR_PREFIX,
- strlen (AFR_XATTR_PREFIX))) {
-
- op_errno = ENODATA;
- goto out;
- }
-
- if (!strcmp (name, RB_PUMP_CMD_STATUS)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Hit pump command - status");
- pump_execute_status (frame, this);
- ret = 0;
- goto out;
- }
- }
-
- local->fresh_children = GF_CALLOC (priv->child_count,
- sizeof (*local->fresh_children),
- gf_afr_mt_int32_t);
- if (!local->fresh_children) {
- ret = -1;
- op_errno = ENOMEM;
- goto out;
- }
-
- read_child = afr_inode_get_read_ctx (this, loc->inode, local->fresh_children);
- ret = afr_get_call_child (this, local->child_up, read_child,
- local->fresh_children,
- &call_child,
- &local->cont.getxattr.last_index);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
- loc_copy (&local->loc, loc);
- if (name)
- local->cont.getxattr.name = gf_strdup (name);
-
- STACK_WIND_COOKIE (frame, pump_getxattr_cbk,
- (void *) (long) call_child,
- children[call_child], children[call_child]->fops->getxattr,
- loc, name, xdata);
-
- ret = 0;
-out:
- if (ret < 0)
- AFR_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-static int
-afr_setxattr_unwind (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t * local = NULL;
- call_frame_t *main_frame = NULL;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- AFR_STACK_UNWIND (setxattr, main_frame,
- local->op_ret, local->op_errno, NULL);
- }
- return 0;
-}
-
-static int
-afr_setxattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
- int call_count = -1;
- int need_unwind = 0;
-
- local = frame->local;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- }
- local->success_count++;
-
- if (local->success_count == priv->child_count) {
- need_unwind = 1;
- }
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- if (need_unwind)
- local->transaction.unwind (frame, this);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
-
- return 0;
-}
-
-static int
-afr_setxattr_wind (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_up_children_count (local->child_up, priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_setxattr_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->setxattr,
- &local->loc,
- local->cont.setxattr.dict,
- local->cont.setxattr.flags, NULL);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
-}
-
-
-static int
-afr_setxattr_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t * local = frame->local;
-
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
-
- return 0;
-}
-
-int32_t
-pump_setxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
-{
- AFR_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata);
- return 0;
-}
-
-int
-pump_command_reply (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- if (local->op_ret < 0)
- gf_log (this->name, GF_LOG_INFO,
- "Command failed");
- else
- gf_log (this->name, GF_LOG_INFO,
- "Command succeeded");
-
- AFR_STACK_UNWIND (setxattr,
- frame,
- local->op_ret,
- local->op_errno, NULL);
-
- return 0;
-}
-
-int
-pump_parse_command (call_frame_t *frame, xlator_t *this,
- afr_local_t *local, dict_t *dict)
-{
-
- int ret = -1;
-
- if (pump_command_start (this, dict)) {
- frame->local = local;
- local->dict = dict_ref (dict);
- ret = pump_execute_start (frame, this);
-
- } else if (pump_command_pause (this, dict)) {
- frame->local = local;
- local->dict = dict_ref (dict);
- ret = pump_execute_pause (frame, this);
-
- } else if (pump_command_abort (this, dict)) {
- frame->local = local;
- local->dict = dict_ref (dict);
- ret = pump_execute_abort (frame, this);
-
- } else if (pump_command_commit (this, dict)) {
- frame->local = local;
- local->dict = dict_ref (dict);
- ret = pump_execute_commit (frame, this);
- }
- return ret;
-}
-
-int
-pump_setxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *dict, int32_t flags, dict_t *xdata)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t *transaction_frame = NULL;
- data_pair_t * trav = NULL;
- int ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.pump*", dict,
- trav, op_errno, out);
-
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame, default_setxattr_cbk,
- FIRST_CHILD (this),
- (FIRST_CHILD (this))->fops->setxattr,
- loc, dict, flags, xdata);
- return 0;
- }
-
-
- AFR_LOCAL_ALLOC_OR_GOTO (local, out);
-
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0) {
- afr_local_cleanup (local, this);
- goto out;
- }
-
- ret = pump_parse_command (frame, this,
- local, dict);
- if (ret >= 0) {
- ret = 0;
- goto out;
- }
-
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- op_errno = ENOMEM;
- ret = -1;
- afr_local_cleanup (local, this);
- goto out;
- }
-
- transaction_frame->local = local;
-
- local->op_ret = -1;
-
- local->cont.setxattr.dict = dict_ref (dict);
- local->cont.setxattr.flags = flags;
-
- local->transaction.fop = afr_setxattr_wind;
- local->transaction.done = afr_setxattr_done;
- local->transaction.unwind = afr_setxattr_unwind;
-
- loc_copy (&local->loc, loc);
-
- local->transaction.main_frame = frame;
- local->transaction.start = LLONG_MAX - 1;
- local->transaction.len = 0;
-
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
-
- ret = 0;
-out:
- if (ret < 0) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
- }
-
- return 0;
-}
-
-/* Defaults */
-static int32_t
-pump_lookup (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *xattr_req)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame,
- default_lookup_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup,
- loc,
- xattr_req);
- return 0;
- }
-
- afr_lookup (frame, this, loc, xattr_req);
- return 0;
-}
-
-
-static int32_t
-pump_truncate (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- off_t offset, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame,
- default_truncate_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate,
- loc,
- offset, xdata);
- return 0;
- }
-
- afr_truncate (frame, this, loc, offset, xdata);
- return 0;
-}
-
-
-static int32_t
-pump_ftruncate (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- off_t offset, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame,
- default_ftruncate_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate,
- fd,
- offset, xdata);
- return 0;
- }
-
- afr_ftruncate (frame, this, fd, offset, xdata);
- return 0;
-}
-
-
-
-
-int
-pump_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame, default_mknod_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mknod,
- loc, mode, rdev, umask, xdata);
- return 0;
- }
- afr_mknod (frame, this, loc, mode, rdev, umask, xdata);
- return 0;
-
-}
-
-
-
-int
-pump_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame, default_mkdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mkdir,
- loc, mode, umask, xdata);
- return 0;
- }
- afr_mkdir (frame, this, loc, mode, umask, xdata);
- return 0;
-
-}
-
-
-static int32_t
-pump_unlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc, int xflag, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame,
- default_unlink_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink,
- loc, xflag, xdata);
- return 0;
- }
- afr_unlink (frame, this, loc, xflag, xdata);
- return 0;
-
-}
-
-
-static int
-pump_rmdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int flags, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
-
- priv = this->private;
-
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame, default_rmdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rmdir,
- loc, flags, xdata);
- return 0;
- }
-
- afr_rmdir (frame, this, loc, flags, xdata);
- return 0;
-
-}
-
-
-
-int
-pump_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkpath, loc_t *loc, mode_t umask, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame, default_symlink_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->symlink,
- linkpath, loc, umask, xdata);
- return 0;
- }
- afr_symlink (frame, this, linkpath, loc, umask, xdata);
- return 0;
-
-}
-
-
-static int32_t
-pump_rename (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame,
- default_rename_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rename,
- oldloc, newloc, xdata);
- return 0;
- }
- afr_rename (frame, this, oldloc, newloc, xdata);
- return 0;
-
-}
-
-
-static int32_t
-pump_link (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame,
- default_link_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->link,
- oldloc, newloc, xdata);
- return 0;
- }
- afr_link (frame, this, oldloc, newloc, xdata);
- return 0;
-
-}
-
-
-static int32_t
-pump_create (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, mode_t mode,
- mode_t umask, fd_t *fd, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame, default_create_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create,
- loc, flags, mode, umask, fd, xdata);
- return 0;
- }
- afr_create (frame, this, loc, flags, mode, umask, fd, xdata);
- return 0;
-
-}
-
-
-static int32_t
-pump_open (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flags, fd_t *fd, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame,
- default_open_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open,
- loc, flags, fd, xdata);
- return 0;
- }
- afr_open (frame, this, loc, flags, fd, xdata);
- return 0;
-
-}
-
-
-static int32_t
-pump_writev (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- struct iovec *vector,
- int32_t count,
- off_t off, uint32_t flags,
- struct iobref *iobref, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame,
- default_writev_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->writev,
- fd,
- vector,
- count,
- off, flags,
- iobref, xdata);
- return 0;
- }
-
- afr_writev (frame, this, fd, vector, count, off, flags, iobref, xdata);
- return 0;
-}
-
-
-static int32_t
-pump_flush (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame,
- default_flush_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->flush,
- fd, xdata);
- return 0;
- }
- afr_flush (frame, this, fd, xdata);
- return 0;
-
-}
-
-
-static int32_t
-pump_fsync (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t flags, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame,
- default_fsync_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsync,
- fd,
- flags, xdata);
- return 0;
- }
- afr_fsync (frame, this, fd, flags, xdata);
- return 0;
-
-}
-
-
-static int32_t
-pump_opendir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc, fd_t *fd, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame,
- default_opendir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->opendir,
- loc, fd, xdata);
- return 0;
- }
- afr_opendir (frame, this, loc, fd, xdata);
- return 0;
-
-}
-
-
-static int32_t
-pump_fsyncdir (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t flags, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame,
- default_fsyncdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsyncdir,
- fd,
- flags, xdata);
- return 0;
- }
- afr_fsyncdir (frame, this, fd, flags, xdata);
- return 0;
-
-}
-
-
-static int32_t
-pump_xattrop (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- gf_xattrop_flags_t flags,
- dict_t *dict, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame,
- default_xattrop_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->xattrop,
- loc,
- flags,
- dict, xdata);
- return 0;
- }
- afr_xattrop (frame, this, loc, flags, dict, xdata);
- return 0;
-
-}
-
-static int32_t
-pump_fxattrop (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- gf_xattrop_flags_t flags,
- dict_t *dict, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame,
- default_fxattrop_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fxattrop,
- fd,
- flags,
- dict, xdata);
- return 0;
- }
- afr_fxattrop (frame, this, fd, flags, dict, xdata);
- return 0;
-
-}
-
-
-static int32_t
-pump_removexattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- int op_errno = -1;
-
- VALIDATE_OR_GOTO (this, out);
-
- GF_IF_NATIVE_XATTR_GOTO ("trusted.glusterfs.pump*",
- name, op_errno, out);
-
- op_errno = 0;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame,
- default_removexattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->removexattr,
- loc,
- name, xdata);
- return 0;
- }
- afr_removexattr (frame, this, loc, name, xdata);
-
- out:
- if (op_errno)
- AFR_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL);
- return 0;
-
-}
-
-
-
-static int32_t
-pump_readdir (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t off, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame,
- default_readdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readdir,
- fd, size, off, xdata);
- return 0;
- }
- afr_readdir (frame, this, fd, size, off, xdata);
- return 0;
-
-}
-
-
-static int32_t
-pump_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd,
- size_t size, off_t off, dict_t *dict)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame,
- default_readdirp_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readdirp,
- fd, size, off, dict);
- return 0;
- }
- afr_readdirp (frame, this, fd, size, off, dict);
- return 0;
-
-}
-
-
-
-static int32_t
-pump_releasedir (xlator_t *this,
- fd_t *fd)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (priv->use_afr_in_pump)
- afr_releasedir (this, fd);
- return 0;
-
-}
-
-static int32_t
-pump_release (xlator_t *this,
- fd_t *fd)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (priv->use_afr_in_pump)
- afr_release (this, fd);
- return 0;
-
-}
-
-static int32_t
-pump_forget (xlator_t *this, inode_t *inode)
-{
- afr_private_t *priv = NULL;
-
- priv = this->private;
- if (priv->use_afr_in_pump)
- afr_forget (this, inode);
-
- return 0;
-}
-
-static int32_t
-pump_setattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- struct iatt *stbuf,
- int32_t valid, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame,
- default_setattr_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->setattr,
- loc, stbuf, valid, xdata);
- return 0;
- }
- afr_setattr (frame, this, loc, stbuf, valid, xdata);
- return 0;
-
-}
-
-
-static int32_t
-pump_fsetattr (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- struct iatt *stbuf,
- int32_t valid, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame,
- default_fsetattr_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fsetattr,
- fd, stbuf, valid, xdata);
- return 0;
- }
- afr_fsetattr (frame, this, fd, stbuf, valid, xdata);
- return 0;
-
-}
-
-
-/* End of defaults */
-
-
-int32_t
-mem_acct_init (xlator_t *this)
-{
- int ret = -1;
-
- if (!this)
- return ret;
-
- ret = xlator_mem_acct_init (this, gf_afr_mt_end + 1);
-
- if (ret != 0) {
- gf_log(this->name, GF_LOG_ERROR, "Memory accounting init"
- "failed");
- return ret;
- }
-
- return ret;
-}
-
-static int
-is_xlator_pump_sink (xlator_t *child)
-{
- return (child == PUMP_SINK_CHILD(THIS));
-}
-
-static int
-is_xlator_pump_source (xlator_t *child)
-{
- return (child == PUMP_SOURCE_CHILD(THIS));
-}
-
-int32_t
-notify (xlator_t *this, int32_t event,
- void *data, ...)
-{
- int ret = -1;
- xlator_t *child_xl = NULL;
-
- child_xl = (xlator_t *) data;
-
- ret = afr_notify (this, event, data, NULL);
-
- switch (event) {
- case GF_EVENT_CHILD_DOWN:
- if (is_xlator_pump_source (child_xl))
- pump_change_state (this, PUMP_STATE_ABORT);
- break;
-
- case GF_EVENT_CHILD_UP:
- if (is_xlator_pump_sink (child_xl))
- if (is_pump_start_pending (this)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "about to start synctask");
- ret = pump_start_synctask (this);
- if (ret < 0)
- gf_log (this->name, GF_LOG_DEBUG,
- "Could not start pump "
- "synctask");
- else
- pump_remove_start_pending (this);
- }
- }
-
- return ret;
-}
-
-int32_t
-init (xlator_t *this)
-{
- afr_private_t * priv = NULL;
- pump_private_t *pump_priv = NULL;
- int child_count = 0;
- xlator_list_t * trav = NULL;
- int i = 0;
- int ret = -1;
- GF_UNUSED int op_errno = 0;
-
- int source_child = 0;
-
- if (!this->children) {
- gf_log (this->name, GF_LOG_ERROR,
- "pump translator needs a source and sink"
- "subvolumes defined.");
- return -1;
- }
-
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "Volume is dangling.");
- }
-
- this->private = GF_CALLOC (1, sizeof (afr_private_t),
- gf_afr_mt_afr_private_t);
- if (!this->private)
- goto out;
-
- priv = this->private;
- LOCK_INIT (&priv->lock);
- LOCK_INIT (&priv->read_child_lock);
- //lock recovery is not done in afr
- pthread_mutex_init (&priv->mutex, NULL);
- INIT_LIST_HEAD (&priv->saved_fds);
-
- child_count = xlator_subvolume_count (this);
- if (child_count != 2) {
- gf_log (this->name, GF_LOG_ERROR,
- "There should be exactly 2 children - one source "
- "and one sink");
- return -1;
- }
- priv->child_count = child_count;
-
- priv->read_child = source_child;
- priv->favorite_child = source_child;
- priv->background_self_heal_count = 0;
-
- priv->data_self_heal = "on";
- priv->metadata_self_heal = 1;
- priv->entry_self_heal = 1;
-
- priv->data_self_heal_algorithm = "";
-
- priv->data_self_heal_window_size = 16;
-
- priv->data_change_log = 1;
- priv->metadata_change_log = 1;
- priv->entry_change_log = 1;
- priv->use_afr_in_pump = 1;
-
- /* Locking options */
-
- /* Lock server count infact does not matter. Locks are held
- on all subvolumes, in this case being the source
- and the sink.
- */
-
- priv->strict_readdir = _gf_false;
-
- priv->wait_count = 1;
- priv->child_up = GF_CALLOC (sizeof (unsigned char), child_count,
- gf_afr_mt_char);
- if (!priv->child_up) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- op_errno = ENOMEM;
- goto out;
- }
-
- priv->children = GF_CALLOC (sizeof (xlator_t *), child_count,
- gf_afr_mt_xlator_t);
- if (!priv->children) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- op_errno = ENOMEM;
- goto out;
- }
-
- priv->pending_key = GF_CALLOC (sizeof (*priv->pending_key),
- child_count,
- gf_afr_mt_char);
- if (!priv->pending_key) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- op_errno = ENOMEM;
- goto out;
- }
-
- trav = this->children;
- i = 0;
- while (i < child_count) {
- priv->children[i] = trav->xlator;
-
- ret = gf_asprintf (&priv->pending_key[i], "%s.%s", AFR_XATTR_PREFIX,
- trav->xlator->name);
- if (-1 == ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "asprintf failed to set pending key");
- op_errno = ENOMEM;
- goto out;
- }
-
- trav = trav->next;
- i++;
- }
-
- priv->first_lookup = 1;
- priv->root_inode = NULL;
-
- priv->last_event = GF_CALLOC (child_count, sizeof (*priv->last_event),
- gf_afr_mt_int32_t);
- if (!priv->last_event) {
- ret = -ENOMEM;
- goto out;
- }
-
- pump_priv = GF_CALLOC (1, sizeof (*pump_priv),
- gf_afr_mt_pump_priv);
- if (!pump_priv) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- op_errno = ENOMEM;
- goto out;
- }
-
- LOCK_INIT (&pump_priv->resume_path_lock);
- LOCK_INIT (&pump_priv->pump_state_lock);
-
- pump_priv->resume_path = GF_CALLOC (1, PATH_MAX,
- gf_afr_mt_char);
- if (!pump_priv->resume_path) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- ret = -1;
- goto out;
- }
-
- pump_priv->env = syncenv_new (0);
- if (!pump_priv->env) {
- gf_log (this->name, GF_LOG_ERROR,
- "Could not create new sync-environment");
- ret = -1;
- goto out;
- }
-
- /* keep more local here as we may need them for self-heal etc */
- this->local_pool = mem_pool_new (afr_local_t, 128);
- if (!this->local_pool) {
- ret = -1;
- gf_log (this->name, GF_LOG_ERROR,
- "failed to create local_t's memory pool");
- goto out;
- }
-
- priv->pump_private = pump_priv;
-
- pump_change_state (this, PUMP_STATE_ABORT);
-
- ret = 0;
-out:
- return ret;
-}
-
-int
-fini (xlator_t *this)
-{
- afr_private_t * priv = NULL;
- pump_private_t *pump_priv = NULL;
-
- priv = this->private;
- this->private = NULL;
- if (!priv)
- goto out;
-
- pump_priv = priv->pump_private;
- if (!pump_priv)
- goto afr_priv;
-
- if (pump_priv->env)
- syncenv_destroy (pump_priv->env);
-
- GF_FREE (pump_priv->resume_path);
- LOCK_DESTROY (&pump_priv->resume_path_lock);
- LOCK_DESTROY (&pump_priv->pump_state_lock);
- GF_FREE (pump_priv);
-afr_priv:
- afr_priv_destroy (priv);
-out:
- return 0;
-}
-
-
-struct xlator_fops fops = {
- .lookup = pump_lookup,
- .open = pump_open,
- .flush = pump_flush,
- .fsync = pump_fsync,
- .fsyncdir = pump_fsyncdir,
- .xattrop = pump_xattrop,
- .fxattrop = pump_fxattrop,
- .getxattr = pump_getxattr,
-
- /* inode write */
- .writev = pump_writev,
- .truncate = pump_truncate,
- .ftruncate = pump_ftruncate,
- .setxattr = pump_setxattr,
- .setattr = pump_setattr,
- .fsetattr = pump_fsetattr,
- .removexattr = pump_removexattr,
-
- /* dir read */
- .opendir = pump_opendir,
- .readdir = pump_readdir,
- .readdirp = pump_readdirp,
-
- /* dir write */
- .create = pump_create,
- .mknod = pump_mknod,
- .mkdir = pump_mkdir,
- .unlink = pump_unlink,
- .rmdir = pump_rmdir,
- .link = pump_link,
- .symlink = pump_symlink,
- .rename = pump_rename,
-};
-
-struct xlator_dumpops dumpops = {
- .priv = afr_priv_dump,
-};
-
-
-struct xlator_cbks cbks = {
- .release = pump_release,
- .releasedir = pump_releasedir,
- .forget = pump_forget,
-};
-
-struct volume_options options[] = {
- { .key = {NULL} },
-};
diff --git a/xlators/cluster/afr/src/pump.h b/xlators/cluster/afr/src/pump.h
deleted file mode 100644
index bc4c31a78a5..00000000000
--- a/xlators/cluster/afr/src/pump.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef __PUMP_H__
-#define __PUMP_H__
-
-#include "syncop.h"
-
-/* FIXME: Needs to be defined in a common file */
-#define CLIENT_CMD_CONNECT "trusted.glusterfs.client-connect"
-#define CLIENT_CMD_DISCONNECT "trusted.glusterfs.client-disconnect"
-
-#define PUMP_SOURCE_COMPLETE "trusted.glusterfs.pump-source-complete"
-#define PUMP_SINK_COMPLETE "trusted.glusterfs.pump-sink-complete"
-
-#define PUMP_PATH "trusted.glusterfs.pump-path"
-
-#define PUMP_SOURCE_CHILD(xl) (xl->children->xlator)
-#define PUMP_SINK_CHILD(xl) (xl->children->next->xlator)
-
-typedef enum {
- PUMP_STATE_RUNNING, /* Pump is running and migrating files */
- PUMP_STATE_RESUME, /* Pump is resuming from a previous pause */
- PUMP_STATE_PAUSE, /* Pump is paused */
- PUMP_STATE_ABORT, /* Pump is aborted */
- PUMP_STATE_COMMIT, /* Pump is commited */
-} pump_state_t;
-
-typedef struct _pump_private {
- struct syncenv *env; /* The env pointer to the pump synctask */
- char *resume_path; /* path to resume from the last pause */
- gf_lock_t resume_path_lock; /* Synchronize resume_path changes */
- gf_lock_t pump_state_lock; /* Synchronize pump_state changes */
- pump_state_t pump_state; /* State of pump */
- char current_file[PATH_MAX]; /* Current file being pumped */
- uint64_t number_files_pumped; /* Number of files pumped */
- gf_boolean_t pump_finished; /* Boolean to indicate pump termination */
- char pump_start_pending; /* Boolean to mark start pending until
- CHILD_UP */
- call_stub_t *cleaner;
-} pump_private_t;
-
-void
-build_root_loc (inode_t *inode, loc_t *loc);
-int pump_start (call_frame_t *frame, xlator_t *this);
-
-gf_boolean_t
-pump_command_start (xlator_t *this, dict_t *dict);
-
-int
-pump_execute_start (call_frame_t *frame, xlator_t *this);
-
-gf_boolean_t
-pump_command_pause (xlator_t *this, dict_t *dict);
-
-int
-pump_execute_pause (call_frame_t *frame, xlator_t *this);
-
-gf_boolean_t
-pump_command_abort (xlator_t *this, dict_t *dict);
-
-int
-pump_execute_abort (call_frame_t *frame, xlator_t *this);
-
-gf_boolean_t
-pump_command_status (xlator_t *this, dict_t *dict);
-
-int
-pump_execute_status (call_frame_t *frame, xlator_t *this);
-
-#endif /* __PUMP_H__ */
diff --git a/xlators/cluster/dht/src/Makefile.am b/xlators/cluster/dht/src/Makefile.am
index e35058d65d4..56f1f2ad7c8 100644
--- a/xlators/cluster/dht/src/Makefile.am
+++ b/xlators/cluster/dht/src/Makefile.am
@@ -1,32 +1,37 @@
-
xlator_LTLIBRARIES = dht.la nufa.la switch.la
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster
dht_common_source = dht-layout.c dht-helper.c dht-linkfile.c dht-rebalance.c \
dht-selfheal.c dht-rename.c dht-hashfn.c dht-diskusage.c \
- dht-common.c dht-inode-write.c dht-inode-read.c \
- $(top_builddir)/xlators/lib/src/libxlator.c
+ dht-common.c dht-inode-write.c dht-inode-read.c dht-shared.c \
+ dht-lock.c $(top_builddir)/xlators/lib/src/libxlator.c
dht_la_SOURCES = $(dht_common_source) dht.c
nufa_la_SOURCES = $(dht_common_source) nufa.c
switch_la_SOURCES = $(dht_common_source) switch.c
-dht_la_LDFLAGS = -module -avoidversion
+dht_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
dht_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-nufa_la_LDFLAGS = -module -avoidversion
+nufa_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
nufa_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-switch_la_LDFLAGS = -module -avoidversion
+switch_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
switch_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = dht-common.h dht-mem-types.h \
- $(top_builddir)/xlators/lib/src/libxlator.h
+noinst_HEADERS = dht-common.h dht-mem-types.h dht-messages.h \
+ dht-lock.h $(top_builddir)/xlators/lib/src/libxlator.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) \
- -I$(top_srcdir)/xlators/lib/src
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \
+ -I$(top_srcdir)/rpc/rpc-lib/src \
+ -I$(top_srcdir)/xlators/lib/src \
+ -DDATADIR=\"$(localstatedir)\" \
+ -DLIBDIR=\"$(libdir)\"
CLEANFILES =
@@ -35,3 +40,9 @@ uninstall-local:
install-data-hook:
ln -sf dht.so $(DESTDIR)$(xlatordir)/distribute.so
+
+if UNITTEST
+CLEANFILES += *.gcda *.gcno *_xunit.xml
+noinst_PROGRAMS =
+TESTS =
+endif
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index 1f4f234afe5..8ba0cc4c732 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -8,90 +8,439 @@
cases as published by the Free Software Foundation.
*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
/* TODO: add NS locking */
-#include "glusterfs.h"
-#include "xlator.h"
#include "libxlator.h"
#include "dht-common.h"
-#include "defaults.h"
-#include "byte-order.h"
+#include "dht-lock.h"
+#include <glusterfs/byte-order.h>
+#include <glusterfs/quota-common-utils.h>
+#include <glusterfs/upcall-utils.h>
+#include "glusterfs/compat-errno.h" // for ENODATA on BSD
+#include <glusterfs/common-utils.h>
#include <sys/time.h>
#include <libgen.h>
+#include <signal.h>
-void
-dht_aggregate (dict_t *this, char *key, data_t *value, void *data)
+static int
+dht_rmdir_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *entries,
+ dict_t *xdata);
+
+static int
+dht_link2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret);
+
+static int
+dht_set_dir_xattr_req(xlator_t *this, loc_t *loc, dict_t *xattr_req);
+
+static int
+dht_lookup_everywhere_done(call_frame_t *frame, xlator_t *this);
+
+static int
+dht_common_mark_mdsxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata);
+
+static int
+dht_rmdir_unlock(call_frame_t *frame, xlator_t *this);
+
+static const char *dht_dbg_vxattrs[] = {DHT_DBG_HASHED_SUBVOL_PATTERN, NULL};
+
+/* Check the xdata to make sure EBADF has been set by client xlator */
+int32_t
+dht_check_remote_fd_failed_error(dht_local_t *local, int op_ret, int op_errno)
{
- dict_t *dst = NULL;
- int64_t *ptr = 0, *size = NULL;
- int32_t ret = -1;
- data_pair_t *data_pair = NULL;
+ if (op_ret == -1 && (op_errno == EBADF || op_errno == EBADFD) &&
+ !(local->fd_checked)) {
+ return 1;
+ }
+ return 0;
+}
- dst = data;
+/* Sets the blocks and size values to fixed values. This is to be called
+ * only for dirs. The caller is responsible for checking the type
+ */
+int32_t
+dht_set_fixed_dir_stat(struct iatt *stat)
+{
+ if (stat) {
+ stat->ia_blocks = DHT_DIR_STAT_BLOCKS;
+ stat->ia_size = DHT_DIR_STAT_SIZE;
+ return 0;
+ }
+ return -1;
+}
- if (strcmp (key, GF_XATTR_QUOTA_SIZE_KEY) == 0) {
- ret = dict_get_bin (dst, key, (void **)&size);
- if (ret < 0) {
- size = GF_CALLOC (1, sizeof (int64_t),
- gf_common_mt_char);
- if (size == NULL) {
- gf_log ("dht", GF_LOG_WARNING,
- "memory allocation failed");
- return;
- }
- ret = dict_set_bin (dst, key, size, sizeof (int64_t));
- if (ret < 0) {
- gf_log ("dht", GF_LOG_WARNING,
- "dht aggregate dict set failed");
- GF_FREE (size);
- return;
- }
- }
+/* Return true if key exists in array
+ */
+static gf_boolean_t
+dht_match_xattr(const char *key)
+{
+ char **xattrs_to_heal = get_xattrs_to_heal();
- ptr = data_to_bin (value);
- if (ptr == NULL) {
- gf_log ("dht", GF_LOG_WARNING, "data to bin failed");
- return;
- }
+ return gf_get_index_by_elem(xattrs_to_heal, (char *)key) >= 0;
+}
- *size = hton64 (ntoh64 (*size) + ntoh64 (*ptr));
+static int
+dht_aggregate_quota_xattr(dict_t *dst, char *key, data_t *value)
+{
+ int ret = -1;
+ quota_meta_t *meta_dst = NULL;
+ quota_meta_t *meta_src = NULL;
+ int64_t *size = NULL;
+ int64_t dst_dir_count = 0;
+ int64_t src_dir_count = 0;
+
+ if (value == NULL) {
+ gf_msg("dht", GF_LOG_WARNING, 0, DHT_MSG_DATA_NULL,
+ "data value is NULL");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_bin(dst, key, (void **)&meta_dst);
+ if (ret < 0) {
+ meta_dst = GF_CALLOC(1, sizeof(quota_meta_t), gf_common_quota_meta_t);
+ if (meta_dst == NULL) {
+ gf_msg("dht", GF_LOG_WARNING, ENOMEM, DHT_MSG_NO_MEMORY,
+ "Memory allocation failed");
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_bin(dst, key, meta_dst, sizeof(quota_meta_t));
+ if (ret < 0) {
+ gf_msg("dht", GF_LOG_WARNING, EINVAL, DHT_MSG_DICT_SET_FAILED,
+ "dht aggregate dict set failed");
+ GF_FREE(meta_dst);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ if (value->len > sizeof(int64_t)) {
+ meta_src = data_to_bin(value);
+
+ meta_dst->size = hton64(ntoh64(meta_dst->size) +
+ ntoh64(meta_src->size));
+ meta_dst->file_count = hton64(ntoh64(meta_dst->file_count) +
+ ntoh64(meta_src->file_count));
+
+ if (value->len > (2 * sizeof(int64_t))) {
+ dst_dir_count = ntoh64(meta_dst->dir_count);
+ src_dir_count = ntoh64(meta_src->dir_count);
+
+ if (src_dir_count > dst_dir_count)
+ meta_dst->dir_count = meta_src->dir_count;
} else {
- /* compare user xattrs only */
- if (!strncmp (key, "user.", strlen ("user."))) {
- ret = dict_lookup (dst, key, &data_pair);
- if (!ret && data_pair && value) {
- ret = is_data_equal (data_pair->value, value);
- if (!ret)
- gf_log ("dht", GF_LOG_DEBUG,
- "xattr mismatch for %s", key);
- }
- }
- ret = dict_set (dst, key, value);
- if (ret)
- gf_log ("dht", GF_LOG_WARNING, "xattr dict set failed");
+ meta_dst->dir_count = 0;
}
+ } else {
+ size = data_to_bin(value);
+ meta_dst->size = hton64(ntoh64(meta_dst->size) + ntoh64(*size));
+ }
- return;
+ ret = 0;
+out:
+ return ret;
}
+static int
+add_opt(char **optsp, const char *opt)
+{
+ char *newopts = NULL;
+ unsigned oldsize = 0;
+ unsigned newsize = 0;
+
+ if (*optsp == NULL)
+ newopts = gf_strdup(opt);
+ else {
+ oldsize = strlen(*optsp);
+ newsize = oldsize + 1 + strlen(opt) + 1;
+ newopts = GF_REALLOC(*optsp, newsize);
+ if (newopts)
+ sprintf(newopts + oldsize, ",%s", opt);
+ }
+ if (newopts == NULL) {
+ gf_msg("dht", GF_LOG_WARNING, 0, DHT_MSG_NO_MEMORY,
+ "Error to add choices in buffer in add_opt");
+ return -1;
+ }
+ *optsp = newopts;
+ return 0;
+}
-void
-dht_aggregate_xattr (dict_t *dst, dict_t *src)
+/* Return Choice list from Split brain status */
+static char *
+getChoices(const char *value)
+{
+ int i = 0;
+ char *ptr = NULL;
+ char *tok = NULL;
+ char *result = NULL;
+ char *newval = NULL;
+
+ ptr = strstr(value, "Choices:");
+ if (!ptr) {
+ result = ptr;
+ goto out;
+ }
+
+ newval = gf_strdup(ptr);
+ if (!newval) {
+ result = newval;
+ goto out;
+ }
+
+ tok = strtok(newval, ":");
+ if (!tok) {
+ result = tok;
+ goto out;
+ }
+
+ while (tok) {
+ i++;
+ if (i == 2)
+ break;
+ tok = strtok(NULL, ":");
+ }
+
+ result = gf_strdup(tok);
+
+out:
+ if (newval)
+ GF_FREE(newval);
+
+ return result;
+}
+
+/* This function prepare a list of choices for key
+ (replica.split-brain-status) in case of metadata split brain
+ only on the basis of key-value passed to this function.
+ After prepare the list of choices it update the same key in dict
+ with this value to reflect the same in
+ replica.split-brain-status attr for file.
+
+*/
+
+static int
+dht_aggregate_split_brain_xattr(dict_t *dst, char *key, data_t *value)
{
- if ((dst == NULL) || (src == NULL)) {
+ int ret = 0;
+ char *oldvalue = NULL;
+ char *old_choice = NULL;
+ char *new_choice = NULL;
+ char *full_choice = NULL;
+ char *status = NULL;
+
+ if (value == NULL) {
+ gf_msg("dht", GF_LOG_WARNING, 0, DHT_MSG_DATA_NULL,
+ "GF_AFR_SBRAIN_STATUS value is NULL");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_str(dst, key, &oldvalue);
+ if (ret)
+ goto out;
+
+ /* skip code that is irrelevant if !oldvalue */
+ if (!oldvalue)
+ goto out;
+
+ if (strstr(oldvalue, "not")) {
+ gf_msg_debug("dht", 0, "Need to update split-brain status in dict");
+ ret = -1;
+ goto out;
+ }
+ if (strstr(oldvalue, "metadata-split-brain:yes") &&
+ (strstr(oldvalue, "data-split-brain:no"))) {
+ if (strstr(value->data, "not")) {
+ gf_msg_debug("dht", 0, "No need to update split-brain status");
+ ret = 0;
+ goto out;
+ }
+ if (strstr(value->data, "yes") &&
+ (strncmp(oldvalue, value->data, strlen(oldvalue)))) {
+ old_choice = getChoices(oldvalue);
+ if (!old_choice) {
+ gf_msg("dht", GF_LOG_WARNING, 0, DHT_MSG_NO_MEMORY,
+ "Error to get choices");
+ ret = -1;
+ goto out;
+ }
+
+ ret = add_opt(&full_choice, old_choice);
+ if (ret) {
+ gf_msg("dht", GF_LOG_WARNING, 0, DHT_MSG_NO_MEMORY,
+ "Error to add choices");
+ ret = -1;
goto out;
+ }
+
+ new_choice = getChoices(value->data);
+ if (!new_choice) {
+ gf_msg("dht", GF_LOG_WARNING, 0, DHT_MSG_NO_MEMORY,
+ "Error to get choices");
+ ret = -1;
+ goto out;
+ }
+
+ ret = add_opt(&full_choice, new_choice);
+ if (ret) {
+ gf_msg("dht", GF_LOG_WARNING, 0, DHT_MSG_NO_MEMORY,
+ "Error to add choices ");
+ ret = -1;
+ goto out;
+ }
+ ret = gf_asprintf(&status,
+ "data-split-brain:%s "
+ "metadata-split-brain:%s Choices:%s",
+ "no", "yes", full_choice);
+
+ if (-1 == ret) {
+ gf_msg("dht", GF_LOG_WARNING, 0, DHT_MSG_NO_MEMORY,
+ "Error to prepare status ");
+ goto out;
+ }
+ ret = dict_set_dynstr(dst, key, status);
+ if (ret) {
+ gf_msg("dht", GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set full choice");
+ }
}
+ }
- dict_foreach (src, dht_aggregate, dst);
out:
- return;
+ if (old_choice)
+ GF_FREE(old_choice);
+ if (new_choice)
+ GF_FREE(new_choice);
+ if (full_choice)
+ GF_FREE(full_choice);
+
+ return ret;
+}
+
+static int
+dht_aggregate(dict_t *this, char *key, data_t *value, void *data)
+{
+ dict_t *dst = NULL;
+ int32_t ret = -1;
+ data_t *dict_data = NULL;
+
+ dst = data;
+
+ /* compare split brain xattr only */
+ if (strcmp(key, GF_AFR_SBRAIN_STATUS) == 0) {
+ ret = dht_aggregate_split_brain_xattr(dst, key, value);
+ if (!ret)
+ goto out;
+ } else if (strcmp(key, QUOTA_SIZE_KEY) == 0) {
+ ret = dht_aggregate_quota_xattr(dst, key, value);
+ if (ret) {
+ gf_msg("dht", GF_LOG_WARNING, 0,
+ DHT_MSG_AGGREGATE_QUOTA_XATTR_FAILED,
+ "Failed to aggregate quota xattr");
+ }
+ goto out;
+ } else if (fnmatch(GF_XATTR_STIME_PATTERN, key, FNM_NOESCAPE) == 0) {
+ ret = gf_get_min_stime(THIS, dst, key, value);
+ goto out;
+ } else {
+ /* compare user xattrs only */
+ if (!strncmp(key, "user.", SLEN("user."))) {
+ ret = dict_lookup(dst, key, &dict_data);
+ if (!ret && dict_data && value) {
+ ret = is_data_equal(dict_data, value);
+ if (!ret)
+ gf_msg_debug("dht", 0, "xattr mismatch for %s", key);
+ }
+ }
+ }
+
+ ret = dict_set(dst, key, value);
+ if (ret) {
+ gf_msg("dht", GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value: key = %s", key);
+ }
+
+out:
+ return ret;
+}
+
+static void
+dht_aggregate_xattr(dict_t *dst, dict_t *src)
+{
+ if ((dst == NULL) || (src == NULL)) {
+ goto out;
+ }
+
+ dict_foreach(src, dht_aggregate, dst);
+out:
+ return;
+}
+
+/* Code to save hashed subvol on inode ctx as a mds subvol
+ */
+int
+dht_inode_ctx_mdsvol_set(inode_t *inode, xlator_t *this, xlator_t *mds_subvol)
+{
+ dht_inode_ctx_t *ctx = NULL;
+ int ret = -1;
+ uint64_t ctx_int = 0;
+ gf_boolean_t ctx_free = _gf_false;
+
+ LOCK(&inode->lock);
+ {
+ ret = __inode_ctx_get(inode, this, &ctx_int);
+ if (ctx_int) {
+ ctx = (dht_inode_ctx_t *)(uintptr_t)ctx_int;
+ ctx->mds_subvol = mds_subvol;
+ } else {
+ ctx = GF_CALLOC(1, sizeof(*ctx), gf_dht_mt_inode_ctx_t);
+ if (!ctx)
+ goto unlock;
+ ctx->mds_subvol = mds_subvol;
+ ctx_free = _gf_true;
+ ctx_int = (long)ctx;
+ ret = __inode_ctx_set(inode, this, &ctx_int);
+ }
+ }
+unlock:
+ UNLOCK(&inode->lock);
+ if (ret && ctx_free)
+ GF_FREE(ctx);
+ return ret;
+}
+
+/*Code to get mds subvol from inode ctx */
+
+int
+dht_inode_ctx_mdsvol_get(inode_t *inode, xlator_t *this, xlator_t **mdsvol)
+{
+ dht_inode_ctx_t *ctx = NULL;
+ int ret = -1;
+
+ if (!mdsvol)
+ return ret;
+
+ if (__is_root_gfid(inode->gfid)) {
+ (*mdsvol) = FIRST_CHILD(this);
+ return 0;
+ }
+
+ ret = dht_inode_ctx_get(inode, this, &ctx);
+
+ if (!ret && ctx) {
+ if (ctx->mds_subvol) {
+ *mdsvol = ctx->mds_subvol;
+ ret = 0;
+ } else {
+ ret = -1;
+ }
+ }
+
+ return ret;
}
/* TODO:
@@ -101,4744 +450,10942 @@ out:
- complete linkfile selfheal
*/
-
-int
-dht_lookup_selfheal_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int op_ret, int op_errno, dict_t *xdata)
+static int
+dht_lookup_selfheal_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
{
- dht_local_t *local = NULL;
- dht_layout_t *layout = NULL;
- int ret = -1;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
- GF_VALIDATE_OR_GOTO ("dht", frame, out);
- GF_VALIDATE_OR_GOTO ("dht", this, out);
- GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO("dht", frame, out);
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, out);
- local = frame->local;
- ret = op_ret;
+ local = frame->local;
+ conf = this->private;
+ ret = op_ret;
- FRAME_SU_UNDO (frame, dht_local_t);
+ FRAME_SU_UNDO(frame, dht_local_t);
- if (ret == 0) {
- layout = local->selfheal.layout;
- ret = dht_layout_set (this, local->inode, layout);
- }
+ if (ret == 0) {
+ layout = local->selfheal.layout;
+ ret = dht_layout_set(this, local->inode, layout);
+ }
- WIPE (&local->postparent);
+ dht_inode_ctx_time_update(local->inode, this, &local->stbuf, 1);
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update(local->loc.parent, this, &local->postparent,
+ 1);
+ }
- DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
+ DHT_STRIP_PHASE1_FLAGS(&local->stbuf);
+ dht_set_fixed_dir_stat(&local->postparent);
+ /* Delete mds xattr at the time of STACK UNWIND */
+ GF_REMOVE_INTERNAL_XATTR(conf->mds_xattr_key, local->xattr);
- DHT_STACK_UNWIND (lookup, frame, ret, local->op_errno, local->inode,
- &local->stbuf, local->xattr, &local->postparent);
+ DHT_STACK_UNWIND(lookup, frame, ret, local->op_errno, local->inode,
+ &local->stbuf, local->xattr, &local->postparent);
out:
- return ret;
+ return ret;
}
-
-int
-dht_discover_complete (xlator_t *this, call_frame_t *discover_frame)
+static int
+dht_discover_complete(xlator_t *this, call_frame_t *discover_frame)
{
- dht_local_t *local = NULL;
- call_frame_t *main_frame = NULL;
- int op_errno = 0;
- int ret = -1;
- dht_layout_t *layout = NULL;
+ dht_local_t *local = NULL;
+ dht_local_t *heal_local = NULL;
+ call_frame_t *main_frame = NULL;
+ call_frame_t *heal_frame = NULL;
+ int op_errno = 0;
+ int ret = -1;
+ dht_layout_t *layout = NULL;
+ dht_conf_t *conf = NULL;
+ uint32_t vol_commit_hash = 0;
+ xlator_t *source = NULL;
+ int heal_path = 0;
+ int error_while_marking_mds = 0;
+ int i = 0;
+ loc_t loc = {0};
+ int8_t is_read_only = 0, layout_anomalies = 0;
+ char gfid_local[GF_UUID_BUF_SIZE] = {0};
+
+ local = discover_frame->local;
+ layout = local->layout;
+ conf = this->private;
+ gf_uuid_unparse(local->gfid, gfid_local);
+
+ LOCK(&discover_frame->lock);
+ {
+ main_frame = local->main_frame;
+ local->main_frame = NULL;
+ }
+ UNLOCK(&discover_frame->lock);
- local = discover_frame->local;
- layout = local->layout;
+ if (!main_frame)
+ return 0;
- LOCK(&discover_frame->lock);
- {
- main_frame = local->main_frame;
- local->main_frame = NULL;
+ /* Code to update all extended attributed from
+ subvol to local->xattr on that internal xattr has found
+ */
+ if (conf->subvolume_cnt == 1)
+ local->need_xattr_heal = 0;
+ if (local->need_xattr_heal && (local->mds_xattr)) {
+ dht_dir_set_heal_xattr(this, local, local->xattr, local->mds_xattr,
+ NULL, NULL);
+ dict_unref(local->mds_xattr);
+ local->mds_xattr = NULL;
+ }
+
+ ret = dict_get_int8(local->xattr_req, QUOTA_READ_ONLY_KEY, &is_read_only);
+ if (ret < 0)
+ gf_msg_debug(this->name, 0, "key = %s not present in dict",
+ QUOTA_READ_ONLY_KEY);
+
+ if (local->file_count && local->dir_count) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_FILE_TYPE_MISMATCH,
+ "path %s exists as a file on one subvolume "
+ "and directory on another. "
+ "Please fix it manually",
+ local->loc.path);
+ op_errno = EIO;
+ goto out;
+ }
+
+ if (local->cached_subvol) {
+ ret = dht_layout_preset(this, local->cached_subvol, local->inode);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_SET_FAILED,
+ "failed to set layout for subvolume %s",
+ local->cached_subvol ? local->cached_subvol->name : "<nil>");
+ op_errno = EINVAL;
+ goto out;
+ }
+ } else {
+ ret = dht_layout_normalize(this, &local->loc, layout);
+ if ((ret < 0) || ((ret > 0) && (local->op_ret != 0))) {
+ /* either the layout is incorrect or the directory is
+ * not found even in one subvolume.
+ */
+ gf_msg_debug(this->name, 0,
+ "normalizing failed on %s "
+ "(overlaps/holes present: %s, "
+ "ENOENT errors: %d)",
+ local->loc.path, (ret < 0) ? "yes" : "no",
+ (ret > 0) ? ret : 0);
+ layout_anomalies = 1;
+ } else if (local->inode) {
+ dht_layout_set(this, local->inode, layout);
+ }
+ }
+
+ if (!conf->vch_forced) {
+ ret = dict_get_uint32(local->xattr, conf->commithash_xattr_name,
+ &vol_commit_hash);
+ if (ret == 0) {
+ conf->vol_commit_hash = vol_commit_hash;
}
- UNLOCK(&discover_frame->lock);
+ }
- if (!main_frame)
+ if (IA_ISDIR(local->stbuf.ia_type) && !is_read_only) {
+ for (i = 0; i < layout->cnt; i++) {
+ if (!source && !layout->list[i].err)
+ source = layout->list[i].xlator;
+ if (layout->list[i].err == ENOENT ||
+ layout->list[i].err == ESTALE) {
+ heal_path = 1;
+ }
+
+ if (source && heal_path)
+ break;
+ }
+ }
+
+ if (IA_ISDIR(local->stbuf.ia_type)) {
+ /* Call function to save hashed subvol on inode ctx if
+ internal mds xattr is not present and all subvols are up
+ */
+ if (!local->op_ret && !__is_root_gfid(local->stbuf.ia_gfid))
+ (void)dht_common_mark_mdsxattr(discover_frame,
+ &error_while_marking_mds, 1);
+
+ if (local->need_xattr_heal && !heal_path) {
+ local->need_xattr_heal = 0;
+ ret = dht_dir_xattr_heal(this, local, &op_errno);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_DIR_XATTR_HEAL_FAILED,
+ "xattr heal failed for "
+ "directory gfid is %s ",
+ gfid_local);
+ }
+ }
+ }
+
+ if (source && (heal_path || layout_anomalies || error_while_marking_mds)) {
+ gf_uuid_copy(loc.gfid, local->gfid);
+ if (gf_uuid_is_null(loc.gfid)) {
+ goto done;
+ }
+
+ if (local->inode)
+ loc.inode = inode_ref(local->inode);
+ else
+ goto done;
+
+ heal_frame = create_frame(this, this->ctx->pool);
+ if (heal_frame) {
+ heal_local = dht_local_init(heal_frame, &loc, NULL, 0);
+ if (!heal_local)
+ goto cleanup;
+
+ gf_uuid_copy(heal_local->gfid, local->gfid);
+ heal_frame->cookie = source;
+ heal_local->xattr = dict_ref(local->xattr);
+ heal_local->stbuf = local->stbuf;
+ heal_local->postparent = local->postparent;
+ heal_local->inode = inode_ref(loc.inode);
+ heal_local->main_frame = main_frame;
+ FRAME_SU_DO(heal_frame, dht_local_t);
+ ret = synctask_new(this->ctx->env, dht_heal_full_path,
+ dht_heal_full_path_done, heal_frame, heal_frame);
+ if (!ret) {
+ loc_wipe(&loc);
return 0;
+ }
+ /*
+ * Failed to spawn the synctask. Returning
+ * with out doing heal.
+ */
+ cleanup:
+ loc_wipe(&loc);
+ DHT_STACK_DESTROY(heal_frame);
+ }
+ }
+done:
+ dht_set_fixed_dir_stat(&local->postparent);
+ /* Delete mds xattr at the time of STACK UNWIND */
+ if (local->xattr)
+ GF_REMOVE_INTERNAL_XATTR(conf->mds_xattr_key, local->xattr);
- if (local->file_count && local->dir_count) {
- gf_log (this->name, GF_LOG_ERROR,
- "path %s exists as a file on one subvolume "
- "and directory on another. "
- "Please fix it manually",
- local->loc.path);
- op_errno = EIO;
- goto out;
- }
+ DHT_STACK_UNWIND(lookup, main_frame, local->op_ret, local->op_errno,
+ local->inode, &local->stbuf, local->xattr,
+ &local->postparent);
+ return 0;
- if (local->cached_subvol) {
- ret = dht_layout_preset (this, local->cached_subvol,
- local->inode);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "failed to set layout for subvolume %s",
- local->cached_subvol ? local->cached_subvol->name : "<nil>");
- op_errno = EINVAL;
- goto out;
- }
- } else {
- ret = dht_layout_normalize (this, &local->loc, layout);
- if ((ret < 0) || ((ret > 0) && (local->op_ret != 0))) {
- /* either the layout is incorrect or the directory is
- * not found even in one subvolume.
- */
- gf_log (this->name, GF_LOG_DEBUG,
- "normalizing failed on %s "
- "(overlaps/holes present: %s, "
- "ENOENT errors: %d)", local->loc.path,
- (ret < 0) ? "yes" : "no", (ret > 0) ? ret : 0);
- op_errno = EINVAL;
- goto out;
- }
+out:
+ DHT_STACK_UNWIND(lookup, main_frame, -1, op_errno, NULL, NULL, NULL, NULL);
- dht_layout_set (this, local->inode, layout);
- }
+ return ret;
+}
- DHT_STACK_UNWIND (lookup, main_frame, local->op_ret, local->op_errno,
- local->inode, &local->stbuf, local->xattr,
- &local->postparent);
- return 0;
+static int
+dht_common_mark_mdsxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ xlator_t *prev = cookie;
+ int ret = -1;
+ dht_conf_t *conf = 0;
+ dht_layout_t *layout = NULL;
+ int32_t mds_heal_fresh_lookup = 0;
+
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+
+ local = frame->local;
+ conf = this->private;
+ layout = local->selfheal.layout;
+ mds_heal_fresh_lookup = local->mds_heal_fresh_lookup;
+
+ if (op_ret) {
+ gf_msg_debug(this->name, op_ret,
+ "Failed to set %s on the MDS %s for path %s. ",
+ conf->mds_xattr_key, prev->name, local->loc.path);
+ } else {
+ /* Save mds subvol on inode ctx */
+ ret = dht_inode_ctx_mdsvol_set(local->inode, this, prev);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SET_INODE_CTX_FAILED,
+ "Failed to set mds subvol on inode ctx"
+ " %s for %s ",
+ prev->name, local->loc.path);
+ }
+ }
+ if (!local->mds_heal_fresh_lookup && layout) {
+ dht_selfheal_dir_setattr(frame, &local->loc, &local->stbuf, 0xffffffff,
+ layout);
+ }
out:
- DHT_STACK_UNWIND (lookup, main_frame, -1, op_errno, NULL, NULL, NULL,
- NULL);
-
- return ret;
+ if (mds_heal_fresh_lookup)
+ DHT_STACK_DESTROY(frame);
+ return 0;
}
+static xlator_t *
+dht_inode_get_hashed_subvol(inode_t *inode, xlator_t *this, loc_t *loc)
+{
+ char *path = NULL;
+ loc_t populate_loc = {
+ 0,
+ };
+ char *name = NULL;
+ xlator_t *hash_subvol = NULL;
+
+ if (!inode)
+ return hash_subvol;
+
+ if (loc && loc->parent && loc->path) {
+ if (!loc->name) {
+ name = strrchr(loc->path, '/');
+ if (name) {
+ loc->name = name + 1;
+ } else {
+ goto out;
+ }
+ }
+ hash_subvol = dht_subvol_get_hashed(this, loc);
+ goto out;
+ }
-int
-dht_discover_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- inode_t *inode, struct iatt *stbuf, dict_t *xattr,
- struct iatt *postparent)
-{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
- dht_layout_t *layout = NULL;
- int ret = -1;
- int is_dir = 0;
- int is_linkfile = 0;
- int attempt_unwind = 0;
-
- GF_VALIDATE_OR_GOTO ("dht", frame, out);
- GF_VALIDATE_OR_GOTO ("dht", this, out);
- GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
- GF_VALIDATE_OR_GOTO ("dht", this->private, out);
- GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+ if (!gf_uuid_is_null(inode->gfid)) {
+ populate_loc.inode = inode_ref(inode);
+ populate_loc.parent = inode_parent(populate_loc.inode, NULL, NULL);
+ inode_path(populate_loc.inode, NULL, &path);
- local = frame->local;
- prev = cookie;
+ if (!path)
+ goto out;
- layout = local->layout;
+ populate_loc.path = path;
+ if (!populate_loc.name && populate_loc.path) {
+ name = strrchr(populate_loc.path, '/');
+ if (name) {
+ populate_loc.name = name + 1;
- /* Check if the gfid is different for file from other node */
- if (!op_ret && uuid_compare (local->gfid, stbuf->ia_gfid)) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: gfid different on %s",
- local->loc.path, prev->this->name);
+ } else {
+ goto out;
+ }
}
+ hash_subvol = dht_subvol_get_hashed(this, &populate_loc);
+ }
+out:
+ if (populate_loc.inode)
+ loc_wipe(&populate_loc);
+ return hash_subvol;
+}
+/* Common function call by revalidate/selfheal code path to populate
+ internal xattr if it is not present, mark_during_fresh_lookup value
+ determines either function is call by revalidate_cbk(discover_complete)
+ or call by selfheal code path while fresh lookup.
+ Here we do wind a call serially in case of fresh lookup and
+ for other lookup code path we do wind a call parallel.The reason
+ to wind a call serially is at the time of fresh lookup directory is not
+ discovered and at the time of revalidate_lookup directory is
+ already discovered. So, revalidate codepath can race with setxattr
+ codepath and can get into spurious heals because of an ongoing setxattr.
+ This can slow down revalidates, if healing happens in foreground.
+ However, if healing happens in background, there is no direct performance
+ penalty.
+*/
+int
+dht_common_mark_mdsxattr(call_frame_t *frame, int *errst,
+ int mark_during_fresh_lookup)
+{
+ dht_local_t *local = NULL;
+ xlator_t *this = NULL;
+ xlator_t *hashed_subvol = NULL;
+ int ret = 0;
+ int i = 0;
+ dict_t *xattrs = NULL;
+ char gfid_local[GF_UUID_BUF_SIZE] = {
+ 0,
+ };
+ int32_t zero[1] = {0};
+ dht_conf_t *conf = 0;
+ dht_layout_t *layout = NULL;
+ dht_local_t *copy_local = NULL;
+ call_frame_t *xattr_frame = NULL;
+ gf_boolean_t vol_down = _gf_false;
+
+ GF_VALIDATE_OR_GOTO("dht", frame, out);
+ this = frame->this;
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ local = frame->local;
+ conf = this->private;
+ layout = local->selfheal.layout;
+ local->mds_heal_fresh_lookup = mark_during_fresh_lookup;
+
+ gf_uuid_unparse(local->gfid, gfid_local);
+
+ /* Code to update hashed subvol consider as a mds subvol
+ and wind a setxattr call on hashed subvol to update
+ internal xattr
+ */
+ if (!local->xattr || !dict_get(local->xattr, conf->mds_xattr_key)) {
+ /* It means no internal MDS xattr has been set yet
+ */
+ /* Check the status of all subvol are up while call
+ this function call by lookup code path
+ */
+ if (mark_during_fresh_lookup) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (!conf->subvolume_status[i]) {
+ vol_down = _gf_true;
+ break;
+ }
+ }
+ if (vol_down) {
+ gf_msg_debug(this->name, 0,
+ "subvol %s is down. Unable to "
+ " save mds subvol on inode for "
+ " path %s gfid is %s ",
+ conf->subvolumes[i]->name, local->loc.path,
+ gfid_local);
+ goto out;
+ }
+ }
- LOCK (&frame->lock);
- {
- /* TODO: assert equal mode on stbuf->st_mode and
- local->stbuf->st_mode
+ /* Calculate hashed subvol based on inode and parent node
+ */
+ hashed_subvol = dht_inode_get_hashed_subvol(local->inode, this,
+ &local->loc);
+ if (!hashed_subvol) {
+ gf_msg(this->name, GF_LOG_DEBUG, 0,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "Failed to get hashed subvol for path %s"
+ "gfid is %s ",
+ local->loc.path, gfid_local);
+ if (errst)
+ (*errst) = 1;
+ ret = -1;
+ goto out;
+ }
+ xattrs = dict_new();
+ if (!xattrs) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
+ "dict_new failed");
+ ret = -1;
+ goto out;
+ }
+ /* Add internal MDS xattr on disk for hashed subvol
+ */
+ ret = dht_dict_set_array(xattrs, conf->mds_xattr_key, zero, 1);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary"
+ " value:key = %s for "
+ "path %s",
+ conf->mds_xattr_key, local->loc.path);
+ ret = -1;
+ goto out;
+ }
+ /* Create a new frame to wind a call only while
+ this function call by revalidate_cbk code path
+ To wind a call parallel need to create a new frame
+ */
+ if (mark_during_fresh_lookup) {
+ xattr_frame = create_frame(this, this->ctx->pool);
+ if (!xattr_frame) {
+ ret = -1;
+ goto out;
+ }
+ copy_local = dht_local_init(xattr_frame, &(local->loc), NULL, 0);
+ if (!copy_local) {
+ ret = -1;
+ DHT_STACK_DESTROY(xattr_frame);
+ goto out;
+ }
+ copy_local->stbuf = local->stbuf;
+ copy_local->mds_heal_fresh_lookup = mark_during_fresh_lookup;
+ if (!copy_local->inode)
+ copy_local->inode = inode_ref(local->inode);
+ gf_uuid_copy(copy_local->loc.gfid, local->gfid);
+ FRAME_SU_DO(xattr_frame, dht_local_t);
+ STACK_WIND_COOKIE(xattr_frame, dht_common_mark_mdsxattr_cbk,
+ hashed_subvol, hashed_subvol,
+ hashed_subvol->fops->setxattr, &local->loc,
+ xattrs, 0, NULL);
+ } else {
+ STACK_WIND_COOKIE(frame, dht_common_mark_mdsxattr_cbk,
+ (void *)hashed_subvol, hashed_subvol,
+ hashed_subvol->fops->setxattr, &local->loc,
+ xattrs, 0, NULL);
+ }
+ } else {
+ gf_msg_debug(this->name, 0,
+ "internal xattr %s is present on subvol"
+ "on path %s gfid is %s ",
+ conf->mds_xattr_key, local->loc.path, gfid_local);
+ if (!mark_during_fresh_lookup)
+ dht_selfheal_dir_setattr(frame, &local->loc, &local->stbuf,
+ 0xffffffff, layout);
+ }
- else mkdir/chmod/chown and fix
- */
- ret = dht_layout_merge (this, layout, prev->this,
- op_ret, op_errno, xattr);
- if (ret)
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to merge layouts", local->loc.path);
+out:
+ if (xattrs)
+ dict_unref(xattrs);
+ return ret;
+}
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "lookup of %s on %s returned error (%s)",
- local->loc.path, prev->this->name,
- strerror (op_errno));
+/* Get the value of key from dict in the bytewise and save in array after
+ convert from network byte order to host byte order
+*/
+static int32_t
+dht_dict_get_array(dict_t *dict, char *key, int32_t value[], int32_t size,
+ int *errst)
+{
+ void *ptr = NULL;
+ int32_t len = -1;
+ int32_t vindex = -1;
+ int32_t err = -1;
+ int ret = 0;
+
+ if (dict == NULL) {
+ (*errst) = -1;
+ return -EINVAL;
+ }
+ err = dict_get_ptr_and_len(dict, key, &ptr, &len);
+ if (err != 0) {
+ (*errst) = -1;
+ return err;
+ }
+
+ if (len != (size * sizeof(int32_t))) {
+ (*errst) = -1;
+ return -EINVAL;
+ }
+
+ for (vindex = 0; vindex < size; vindex++) {
+ value[vindex] = ntoh32(*((int32_t *)ptr + vindex));
+ if (value[vindex] < 0)
+ ret = -1;
+ }
+
+ return ret;
+}
- goto unlock;
- }
+static int
+dht_discover_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, inode_t *inode, struct iatt *stbuf,
+ dict_t *xattr, struct iatt *postparent)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ xlator_t *prev = NULL;
+ dht_layout_t *layout = NULL;
+ int ret = -1;
+ int is_dir = 0;
+ int32_t check_mds = 0;
+ int is_linkfile = 0;
+ int attempt_unwind = 0;
+ dht_conf_t *conf = 0;
+ char gfid_local[GF_UUID_BUF_SIZE] = {0};
+ char gfid_node[GF_UUID_BUF_SIZE] = {0};
+ int32_t mds_xattr_val[1] = {0};
+ int errst = 0;
+
+ GF_VALIDATE_OR_GOTO("dht", frame, out);
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO("dht", this->private, out);
+ GF_VALIDATE_OR_GOTO("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+ conf = this->private;
+
+ layout = local->layout;
+
+ /* Check if the gfid is different for file from other node */
+ if (!op_ret && gf_uuid_compare(local->gfid, stbuf->ia_gfid)) {
+ gf_uuid_unparse(stbuf->ia_gfid, gfid_node);
+ gf_uuid_unparse(local->gfid, gfid_local);
+
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_GFID_MISMATCH,
+ "%s: gfid different on %s, gfid local = %s"
+ "gfid other = %s",
+ local->loc.path, prev->name, gfid_local, gfid_node);
+ }
+
+ LOCK(&frame->lock);
+ {
+ /* TODO: assert equal mode on stbuf->st_mode and
+ local->stbuf->st_mode
+
+ else mkdir/chmod/chown and fix
+ */
+
+ ret = dht_layout_merge(this, layout, prev, op_ret, op_errno, xattr);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_MERGE_FAILED,
+ "%s: failed to merge layouts for subvol %s", local->loc.path,
+ prev->name);
- is_linkfile = check_is_linkfile (inode, stbuf, xattr);
- is_dir = check_is_dir (inode, stbuf, xattr);
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ gf_msg_debug(this->name, op_errno,
+ "lookup of %s on %s returned error", local->loc.path,
+ prev->name);
- if (is_dir) {
- local->dir_count ++;
- } else {
- local->file_count ++;
-
- if (!is_linkfile) {
- /* real file */
- local->cached_subvol = prev->this;
- attempt_unwind = 1;
- } else {
- goto unlock;
- }
- }
+ goto unlock;
+ }
- local->op_ret = 0;
+ is_linkfile = check_is_linkfile(inode, stbuf, xattr,
+ conf->link_xattr_name);
+ is_dir = check_is_dir(inode, stbuf, xattr);
- if (local->xattr == NULL) {
- local->xattr = dict_ref (xattr);
- } else {
- dht_aggregate_xattr (local->xattr, xattr);
- }
+ if (is_dir) {
+ local->dir_count++;
+ } else {
+ local->file_count++;
+
+ if (!is_linkfile && !local->cached_subvol) {
+ /* real file */
+ /* Ok, we somehow managed to find a file on
+ * more than one subvol. ignore this or we
+ * will end up overwriting information while a
+ * a thread is potentially unwinding from
+ * dht_discover_complete
+ */
+ local->cached_subvol = prev;
+ attempt_unwind = 1;
+ } else {
+ goto unlock;
+ }
+ }
- if (local->inode == NULL)
- local->inode = inode_ref (inode);
+ local->op_ret = 0;
- dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
- dht_iatt_merge (this, &local->postparent, postparent,
- prev->this);
+ if (local->xattr == NULL) {
+ local->xattr = dict_ref(xattr);
+ } else {
+ /* Don't aggregate for files. See BZ#1484709 */
+ if (is_dir)
+ dht_aggregate_xattr(local->xattr, xattr);
}
+
+ if (local->inode == NULL)
+ local->inode = inode_ref(inode);
+
+ dht_iatt_merge(this, &local->stbuf, stbuf);
+ dht_iatt_merge(this, &local->postparent, postparent);
+
+ if (!dict_get(xattr, conf->mds_xattr_key)) {
+ goto unlock;
+ } else {
+ gf_msg_debug(this->name, 0,
+ "internal xattr %s is present on subvol"
+ "on path %s gfid is %s ",
+ conf->mds_xattr_key, local->loc.path, gfid_local);
+ }
+ check_mds = dht_dict_get_array(xattr, conf->mds_xattr_key,
+ mds_xattr_val, 1, &errst);
+ /* save mds subvol on inode ctx */
+ ret = dht_inode_ctx_mdsvol_set(local->inode, this, prev);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SET_INODE_CTX_FAILED,
+ "Failed to set hashed subvol for %s vol is %s",
+ local->loc.path, prev->name);
+ }
+
+ if ((check_mds < 0) && !errst) {
+ local->mds_xattr = dict_ref(xattr);
+ gf_msg_debug(this->name, 0,
+ "Value of %s is not zero on mds subvol"
+ "so xattr needs to be healed on non mds"
+ " path is %s and vol name is %s "
+ " gfid is %s",
+ conf->mds_xattr_key, local->loc.path, prev->name,
+ gfid_local);
+ local->need_xattr_heal = 1;
+ local->mds_subvol = prev;
+ }
+ }
unlock:
- UNLOCK (&frame->lock);
+ UNLOCK(&frame->lock);
out:
- this_call_cnt = dht_frame_return (frame);
+ /* Make sure, the thread executing dht_discover_complete is the one
+ * which calls STACK_DESTROY (frame). In the case of "attempt_unwind",
+ * this makes sure that the thread don't call dht_frame_return, till
+ * call to dht_discover_complete is done.
+ */
+ if (attempt_unwind) {
+ dht_discover_complete(this, frame);
+ }
- if (is_last_call (this_call_cnt) || attempt_unwind) {
- dht_discover_complete (this, frame);
- }
+ this_call_cnt = dht_frame_return(frame);
- if (is_last_call (this_call_cnt))
- DHT_STACK_DESTROY (frame);
+ if (is_last_call(this_call_cnt) && !attempt_unwind) {
+ dht_discover_complete(this, frame);
+ }
- return 0;
+ if (is_last_call(this_call_cnt))
+ DHT_STACK_DESTROY(frame);
+
+ return 0;
}
+static int
+dht_set_file_xattr_req(xlator_t *this, loc_t *loc, dict_t *xattr_req)
+{
+ int ret = -EINVAL;
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+ if (!conf) {
+ goto err;
+ }
+
+ if (!xattr_req) {
+ goto err;
+ }
+
+ /* Used to check whether this is a linkto file.
+ */
+ ret = dict_set_uint32(xattr_req, conf->link_xattr_name, 256);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value:key = %s for "
+ "path %s",
+ conf->link_xattr_name, loc->path);
+ goto err;
+ }
+
+ /* This is used to make sure we don't unlink linkto files
+ * which are the target of an ongoing file migration.
+ */
+ ret = dict_set_uint32(xattr_req, GLUSTERFS_OPEN_FD_COUNT, 4);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value:key = %s for "
+ "path %s",
+ GLUSTERFS_OPEN_FD_COUNT, loc->path);
+ goto err;
+ }
+
+ ret = 0;
+err:
+ return ret;
+}
-int
-dht_discover (call_frame_t *frame, xlator_t *this, loc_t *loc)
+/* This is a gfid based nameless lookup. Without a name, the hashed subvol
+ * cannot be calculated so a lookup is sent to all subvols.
+ */
+static int
+dht_do_discover(call_frame_t *frame, xlator_t *this, loc_t *loc)
{
- int ret;
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- int call_cnt = 0;
- int op_errno = EINVAL;
- int i = 0;
- call_frame_t *discover_frame = NULL;
+ int ret;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int call_cnt = 0;
+ int op_errno = EINVAL;
+ int i = 0;
+ call_frame_t *discover_frame = NULL;
+
+ conf = this->private;
+ local = frame->local;
+
+ /* As we do not know if this is a file or directory, request
+ * both file and directory xattrs
+ */
+ ret = dht_set_file_xattr_req(this, loc, local->xattr_req);
+ if (ret) {
+ goto err;
+ }
+
+ ret = dht_set_dir_xattr_req(this, loc, local->xattr_req);
+ if (ret) {
+ goto err;
+ }
+
+ if (loc_is_root(loc)) {
+ /* Request the DHT commit hash xattr (trusted.glusterfs.dht.commithash)
+ * set on the brick root.
+ */
+ ret = dict_set_uint32(local->xattr_req, conf->commithash_xattr_name,
+ sizeof(uint32_t));
+ }
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
- conf = this->private;
- local = frame->local;
+ local->layout = dht_layout_new(this, conf->subvolume_cnt);
- ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht", 4 * 4);
- if (ret)
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to set 'trusted.glusterfs.dht' key",
- loc->path);
+ if (!local->layout) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht.linkto", 256);
- if (ret)
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to set 'trusted.glusterfs.dht.linkto' key",
- loc->path);
+ gf_uuid_copy(local->gfid, loc->gfid);
- call_cnt = conf->subvolume_cnt;
- local->call_cnt = call_cnt;
+ discover_frame = copy_frame(frame);
+ if (!discover_frame) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- local->layout = dht_layout_new (this, conf->subvolume_cnt);
+ discover_frame->local = local;
+ frame->local = NULL;
+ local->main_frame = frame;
- if (!local->layout) {
- op_errno = ENOMEM;
- goto err;
- }
-
- uuid_copy (local->gfid, loc->gfid);
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND_COOKIE(discover_frame, dht_discover_cbk, conf->subvolumes[i],
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup, &local->loc,
+ local->xattr_req);
+ }
- discover_frame = copy_frame (frame);
- if (!discover_frame) {
- op_errno = ENOMEM;
- goto err;
- }
+ return 0;
- discover_frame->local = local;
- frame->local = NULL;
- local->main_frame = frame;
+err:
+ DHT_STACK_UNWIND(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
- for (i = 0; i < call_cnt; i++) {
- STACK_WIND (discover_frame, dht_discover_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->lookup,
- &local->loc, local->xattr_req);
- }
+ return 0;
+}
- return 0;
+/* Code to call syntask to heal custom xattr from hashed subvol
+ to non hashed subvol
+*/
+int
+dht_dir_xattr_heal(xlator_t *this, dht_local_t *local, int *op_errno)
+{
+ dht_local_t *copy_local = NULL;
+ call_frame_t *copy = NULL;
+ int ret = -1;
+ char gfid_local[GF_UUID_BUF_SIZE] = {0};
+
+ if (gf_uuid_is_null(local->gfid)) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DIR_XATTR_HEAL_FAILED,
+ "No gfid exists for path %s "
+ "so healing xattr is not possible",
+ local->loc.path);
+ *op_errno = EIO;
+ goto out;
+ }
+
+ gf_uuid_unparse(local->gfid, gfid_local);
+ copy = create_frame(this, this->ctx->pool);
+ if (copy) {
+ copy_local = dht_local_init(copy, &(local->loc), NULL, 0);
+ if (!copy_local) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM,
+ DHT_MSG_DIR_XATTR_HEAL_FAILED,
+ "Memory allocation failed "
+ "for path %s gfid %s ",
+ local->loc.path, gfid_local);
+ *op_errno = ENOMEM;
+ DHT_STACK_DESTROY(copy);
+ } else {
+ copy_local->stbuf = local->stbuf;
+ gf_uuid_copy(copy_local->loc.gfid, local->gfid);
+ copy_local->mds_subvol = local->mds_subvol;
+ FRAME_SU_DO(copy, dht_local_t);
+ ret = synctask_new(this->ctx->env, dht_dir_heal_xattrs,
+ dht_dir_heal_xattrs_done, copy, copy);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM,
+ DHT_MSG_DIR_XATTR_HEAL_FAILED,
+ "Synctask creation failed to heal xattr "
+ "for path %s gfid %s ",
+ local->loc.path, gfid_local);
+ *op_errno = ENOMEM;
+ DHT_STACK_DESTROY(copy);
+ }
+ }
+ }
+out:
+ return ret;
+}
-err:
- DHT_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL,
- NULL);
+static int
+dht_needs_selfheal(call_frame_t *frame, xlator_t *this)
+{
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int needs_selfheal = 0;
+ int ret = 0;
+
+ local = frame->local;
+ layout = local->layout;
+
+ if (local->need_attrheal || local->need_xattr_heal ||
+ local->need_selfheal) {
+ needs_selfheal = 1;
+ }
+
+ ret = dht_layout_normalize(this, &local->loc, layout);
+
+ if (ret != 0) {
+ gf_msg_debug(this->name, 0, "fixing assignment on %s", local->loc.path);
+ needs_selfheal = 1;
+ }
+ return needs_selfheal;
+}
+static int
+is_permission_different(ia_prot_t *prot1, ia_prot_t *prot2)
+{
+ if ((prot1->owner.read != prot2->owner.read) ||
+ (prot1->owner.write != prot2->owner.write) ||
+ (prot1->owner.exec != prot2->owner.exec) ||
+ (prot1->group.read != prot2->group.read) ||
+ (prot1->group.write != prot2->group.write) ||
+ (prot1->group.exec != prot2->group.exec) ||
+ (prot1->other.read != prot2->other.read) ||
+ (prot1->other.write != prot2->other.write) ||
+ (prot1->other.exec != prot2->other.exec) ||
+ (prot1->suid != prot2->suid) || (prot1->sgid != prot2->sgid) ||
+ (prot1->sticky != prot2->sticky)) {
+ return 1;
+ } else {
return 0;
+ }
}
-
int
-dht_lookup_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- inode_t *inode, struct iatt *stbuf, dict_t *xattr,
- struct iatt *postparent)
-{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
- dht_layout_t *layout = NULL;
- int ret = -1;
- int is_dir = 0;
-
- GF_VALIDATE_OR_GOTO ("dht", frame, out);
- GF_VALIDATE_OR_GOTO ("dht", this, out);
- GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
- GF_VALIDATE_OR_GOTO ("dht", this->private, out);
- GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf,
+ dict_t *xattr, struct iatt *postparent)
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int this_call_cnt = 0;
+ xlator_t *prev = NULL;
+ dht_layout_t *layout = NULL;
+ int ret = -1;
+ int is_dir = 0;
+ int32_t check_mds = 0;
+ int errst = 0;
+ char gfid_local[GF_UUID_BUF_SIZE] = {0};
+ char gfid_node[GF_UUID_BUF_SIZE] = {0};
+ int32_t mds_xattr_val[1] = {0};
+
+ GF_VALIDATE_OR_GOTO("dht", frame, out);
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO("dht", this->private, out);
+ GF_VALIDATE_OR_GOTO("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+ conf = this->private;
+
+ layout = local->layout;
+ gf_msg_debug(this->name, op_errno,
+ "%s: lookup on %s returned with op_ret = %d, op_errno = %d",
+ local->loc.path, prev->name, op_ret, op_errno);
+
+ /* The first successful lookup*/
+ if (!op_ret && gf_uuid_is_null(local->gfid)) {
+ memcpy(local->gfid, stbuf->ia_gfid, 16);
+ }
+ if (!gf_uuid_is_null(local->gfid)) {
+ gf_uuid_unparse(local->gfid, gfid_local);
+ }
+
+ /* Check if the gfid is different for file from other node */
+ if (!op_ret && gf_uuid_compare(local->gfid, stbuf->ia_gfid)) {
+ gf_uuid_unparse(stbuf->ia_gfid, gfid_node);
+
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_GFID_MISMATCH,
+ "%s: gfid different on %s."
+ " gfid local = %s, gfid subvol = %s",
+ local->loc.path, prev->name, gfid_local, gfid_node);
+ }
+
+ LOCK(&frame->lock);
+ {
+ /* TODO: assert equal mode on stbuf->st_mode and
+ local->stbuf->st_mode
+ else mkdir/chmod/chown and fix
+ */
+ ret = dht_layout_merge(this, layout, prev, op_ret, op_errno, xattr);
- local = frame->local;
- prev = cookie;
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
- layout = local->layout;
+ /* The GFID is missing on this subvol. Force a heal. */
+ if (op_errno == ENODATA) {
+ local->need_lookup_everywhere = 1;
+ }
+ goto unlock;
+ }
- if (!op_ret && uuid_is_null (local->gfid))
- memcpy (local->gfid, stbuf->ia_gfid, 16);
+ is_dir = check_is_dir(inode, stbuf, xattr);
+ if (!is_dir) {
+ gf_msg_debug(this->name, 0,
+ "%s: lookup on %s returned non dir 0%o"
+ "calling lookup_everywhere",
+ local->loc.path, prev->name, stbuf->ia_type);
+
+ local->need_lookup_everywhere = 1;
+ goto unlock;
+ }
- /* Check if the gfid is different for file from other node */
- if (!op_ret && uuid_compare (local->gfid, stbuf->ia_gfid)) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: gfid different on %s",
- local->loc.path, prev->this->name);
+ local->op_ret = 0;
+ if (local->xattr == NULL) {
+ local->xattr = dict_ref(xattr);
+ } else {
+ dht_aggregate_xattr(local->xattr, xattr);
+ }
+
+ if (__is_root_gfid(stbuf->ia_gfid)) {
+ ret = dht_dir_has_layout(xattr, conf->xattr_name);
+ if (ret >= 0) {
+ if (is_greater_time(local->prebuf.ia_ctime,
+ local->prebuf.ia_ctime_nsec,
+ stbuf->ia_ctime, stbuf->ia_ctime_nsec)) {
+ /* Choose source */
+ local->prebuf.ia_gid = stbuf->ia_gid;
+ local->prebuf.ia_uid = stbuf->ia_uid;
+
+ local->prebuf.ia_ctime = stbuf->ia_ctime;
+ local->prebuf.ia_ctime_nsec = stbuf->ia_ctime_nsec;
+ local->prebuf.ia_prot = stbuf->ia_prot;
+ }
+ }
}
- LOCK (&frame->lock);
- {
- /* TODO: assert equal mode on stbuf->st_mode and
- local->stbuf->st_mode
+ if (local->stbuf.ia_type != IA_INVAL) {
+ /* This is not the first subvol to respond
+ * Compare values to see if attrs need to be healed
+ */
+ if ((local->stbuf.ia_gid != stbuf->ia_gid) ||
+ (local->stbuf.ia_uid != stbuf->ia_uid) ||
+ (is_permission_different(&local->stbuf.ia_prot,
+ &stbuf->ia_prot))) {
+ local->need_attrheal = 1;
+ }
+ }
- else mkdir/chmod/chown and fix
- */
- ret = dht_layout_merge (this, layout, prev->this,
- op_ret, op_errno, xattr);
+ if (local->inode == NULL)
+ local->inode = inode_ref(inode);
- if (op_ret == -1) {
- local->op_errno = ENOENT;
- gf_log (this->name, GF_LOG_DEBUG,
- "lookup of %s on %s returned error (%s)",
- local->loc.path, prev->this->name,
- strerror (op_errno));
+ dht_iatt_merge(this, &local->stbuf, stbuf);
+ dht_iatt_merge(this, &local->postparent, postparent);
- goto unlock;
- }
+ if (!dict_get(xattr, conf->mds_xattr_key)) {
+ gf_msg_debug(this->name, 0,
+ "%s: mds xattr %s is not present "
+ "on %s(gfid = %s)",
+ local->loc.path, conf->mds_xattr_key, prev->name,
+ gfid_local);
+ goto unlock;
+ }
- is_dir = check_is_dir (inode, stbuf, xattr);
- if (!is_dir) {
- gf_log (this->name, GF_LOG_DEBUG,
- "lookup of %s on %s returned non dir 0%o",
- local->loc.path, prev->this->name,
- stbuf->ia_type);
- local->need_selfheal = 1;
- goto unlock;
- }
+ /* Save the mds subvol info and stbuf. This is the value that will
+ * be used for healing
+ */
+ local->mds_subvol = prev;
+ local->mds_stbuf = *stbuf;
- local->op_ret = 0;
- if (local->xattr == NULL) {
- local->xattr = dict_ref (xattr);
- } else {
- dht_aggregate_xattr (local->xattr, xattr);
- }
+ /* Save mds subvol on inode ctx */
- if (local->inode == NULL)
- local->inode = inode_ref (inode);
+ ret = dht_inode_ctx_mdsvol_set(local->inode, this, prev);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SET_INODE_CTX_FAILED,
+ "%s: Failed to set mds (%s)", local->loc.path, prev->name);
+ }
+ check_mds = dht_dict_get_array(xattr, conf->mds_xattr_key,
+ mds_xattr_val, 1, &errst);
+ if ((check_mds < 0) && !errst) {
+ /* Check if xattrs need to be healed on the directories */
+ local->mds_xattr = dict_ref(xattr);
+ gf_msg_debug(this->name, 0,
+ "%s: %s is not zero on %s. Xattrs need to be healed."
+ "(gfid = %s)",
+ local->loc.path, conf->mds_xattr_key, prev->name,
+ gfid_local);
+ local->need_xattr_heal = 1;
+ }
+ }
- dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
- dht_iatt_merge (this, &local->postparent, postparent,
- prev->this);
- }
unlock:
- UNLOCK (&frame->lock);
+ UNLOCK(&frame->lock);
+ this_call_cnt = dht_frame_return(frame);
- this_call_cnt = dht_frame_return (frame);
+ if (is_last_call(this_call_cnt)) {
+ /* If the mds subvol is not set correctly*/
+ if (!__is_root_gfid(local->gfid) &&
+ (!dict_get(local->xattr, conf->mds_xattr_key))) {
+ local->need_selfheal = 1;
+ }
- if (is_last_call (this_call_cnt)) {
- if (local->need_selfheal) {
- local->need_selfheal = 0;
- dht_lookup_everywhere (frame, this, &local->loc);
- return 0;
- }
+ /* No need to call xattr heal code if volume count is 1
+ */
+ if (conf->subvolume_cnt == 1) {
+ local->need_xattr_heal = 0;
+ }
- if (local->op_ret == 0) {
- ret = dht_layout_normalize (this, &local->loc, layout);
+ if (local->need_selfheal || local->need_lookup_everywhere) {
+ /* Set the gfid-req so posix will set the GFID*/
+ if (!gf_uuid_is_null(local->gfid)) {
+ /* Ok, this should _never_ happen */
+ ret = dict_set_static_bin(local->xattr_req, "gfid-req",
+ local->gfid, 16);
+ } else {
+ if (!gf_uuid_is_null(local->gfid_req))
+ ret = dict_set_static_bin(local->xattr_req, "gfid-req",
+ local->gfid_req, 16);
+ }
+ }
- if (ret != 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "fixing assignment on %s",
- local->loc.path);
- goto selfheal;
- }
+ if (local->need_lookup_everywhere) {
+ local->need_lookup_everywhere = 0;
+ dht_lookup_everywhere(frame, this, &local->loc);
+ return 0;
+ }
- dht_layout_set (this, local->inode, layout);
- }
+ if (local->op_ret == 0) {
+ if (dht_needs_selfheal(frame, this)) {
+ goto selfheal;
+ }
- DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
- DHT_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
- local->inode, &local->stbuf, local->xattr,
- &local->postparent);
+ dht_layout_set(this, local->inode, layout);
+ if (local->inode) {
+ dht_inode_ctx_time_update(local->inode, this, &local->stbuf, 1);
+ }
+
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update(local->loc.parent, this,
+ &local->postparent, 1);
+ }
}
- return 0;
+ DHT_STRIP_PHASE1_FLAGS(&local->stbuf);
+ dht_set_fixed_dir_stat(&local->postparent);
+ /* Delete mds xattr at the time of STACK UNWIND */
+ if (local->xattr)
+ GF_REMOVE_INTERNAL_XATTR(conf->mds_xattr_key, local->xattr);
+
+ DHT_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno,
+ local->inode, &local->stbuf, local->xattr,
+ &local->postparent);
+ }
+
+ return 0;
selfheal:
- FRAME_SU_DO (frame, dht_local_t);
- uuid_copy (local->loc.gfid, local->gfid);
- ret = dht_selfheal_directory (frame, dht_lookup_selfheal_cbk,
- &local->loc, layout);
+ FRAME_SU_DO(frame, dht_local_t);
+ ret = dht_selfheal_directory(frame, dht_lookup_selfheal_cbk, &local->loc,
+ layout);
out:
- return ret;
+ return ret;
+}
+
+static int
+dht_lookup_directory(call_frame_t *frame, xlator_t *this, loc_t *loc)
+{
+ int call_cnt = 0;
+ int i = 0;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO("dht", frame, out);
+ GF_VALIDATE_OR_GOTO("dht", this, unwind);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, unwind);
+ GF_VALIDATE_OR_GOTO("dht", this->private, unwind);
+ GF_VALIDATE_OR_GOTO("dht", loc, unwind);
+
+ conf = this->private;
+ local = frame->local;
+
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
+
+ local->layout = dht_layout_new(this, conf->subvolume_cnt);
+ if (!local->layout) {
+ goto unwind;
+ }
+
+ if (local->xattr != NULL) {
+ dict_unref(local->xattr);
+ local->xattr = NULL;
+ }
+
+ if (!gf_uuid_is_null(local->gfid)) {
+ /* use this gfid in order to heal any missing ones */
+ ret = dict_set_gfuuid(local->xattr_req, "gfid-req", local->gfid, true);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "%s: Failed to set dictionary value:"
+ " key = gfid-req",
+ local->loc.path);
+ }
+
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND_COOKIE(
+ frame, dht_lookup_dir_cbk, conf->subvolumes[i], conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup, &local->loc, local->xattr_req);
+ }
+ return 0;
+unwind:
+ DHT_STACK_UNWIND(lookup, frame, -1, ENOMEM, NULL, NULL, NULL, NULL);
+out:
+ return 0;
}
int
-dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- inode_t *inode, struct iatt *stbuf, dict_t *xattr,
- struct iatt *postparent)
-{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
- dht_layout_t *layout = NULL;
- dht_conf_t *conf = NULL;
- int ret = -1;
- int is_dir = 0;
- int is_linkfile = 0;
-
- GF_VALIDATE_OR_GOTO ("dht", frame, err);
- GF_VALIDATE_OR_GOTO ("dht", this, err);
- GF_VALIDATE_OR_GOTO ("dht", frame->local, err);
- GF_VALIDATE_OR_GOTO ("dht", cookie, err);
+dht_revalidate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf,
+ dict_t *xattr, struct iatt *postparent)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ xlator_t *prev = NULL;
+ dht_layout_t *layout = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+ int is_dir = 0;
+ int is_linkfile = 0;
+ int follow_link = 0;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+ uint32_t vol_commit_hash = 0;
+ xlator_t *subvol = NULL;
+ int32_t check_mds = 0;
+ int errst = 0, i = 0;
+ int32_t mds_xattr_val[1] = {0};
+
+ GF_VALIDATE_OR_GOTO("dht", frame, err);
+ GF_VALIDATE_OR_GOTO("dht", this, err);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, err);
+ GF_VALIDATE_OR_GOTO("dht", cookie, err);
+ GF_VALIDATE_OR_GOTO("dht", this->private, err);
+
+ local = frame->local;
+ prev = cookie;
+ conf = this->private;
+
+ if (!conf->vch_forced) {
+ /* Update the commithash value if available
+ */
+ ret = dict_get_uint32(xattr, conf->commithash_xattr_name,
+ &vol_commit_hash);
+ if (ret == 0) {
+ conf->vol_commit_hash = vol_commit_hash;
+ }
+ }
- local = frame->local;
- prev = cookie;
- conf = this->private;
- if (!conf)
- goto out;
+ gf_uuid_unparse(local->loc.gfid, gfid);
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
-
- if ((op_errno != ENOTCONN)
- && (op_errno != ENOENT)
- && (op_errno != ESTALE)) {
- gf_log (this->name, GF_LOG_INFO,
- "subvolume %s for %s returned -1 (%s)",
- prev->this->name, local->loc.path,
- strerror (op_errno));
- }
- if (op_errno == ESTALE) {
- /* propagate the ESTALE to parent.
- * setting local->return_estale would send
- * ESTALE to parent. */
- local->return_estale = 1;
- }
+ gf_msg_debug(this->name, op_errno,
+ "%s: revalidate lookup on %s returned op_ret %d",
+ local->loc.path, prev->name, op_ret);
- /* if it is ENOENT, we may have to do a
- * 'lookup_everywhere()' to make sure
- * the file is not migrated */
- if (op_errno == ENOENT) {
- if (IA_ISREG (local->loc.inode->ia_type)) {
- local->need_lookup_everywhere = 1;
- }
+ LOCK(&frame->lock);
+ {
+ if (gf_uuid_is_null(local->gfid)) {
+ memcpy(local->gfid, local->loc.gfid, 16);
+ }
+
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+
+ if ((op_errno != ENOTCONN) && (op_errno != ENOENT) &&
+ (op_errno != ESTALE)) {
+ gf_msg(this->name, GF_LOG_INFO, op_errno,
+ DHT_MSG_REVALIDATE_CBK_INFO,
+ "Revalidate: subvolume %s for %s "
+ "(gfid = %s) returned -1",
+ prev->name, local->loc.path, gfid);
+ }
+ if (op_errno == ESTALE) {
+ /* propagate the ESTALE to parent.
+ * setting local->return_estale would send
+ * ESTALE to parent. */
+ local->return_estale = 1;
+ }
+
+ /* if it is ENOENT, we may have to do a
+ * 'lookup_everywhere()' to make sure
+ * the file is not migrated */
+ if (op_errno == ENOENT) {
+ if (IA_ISREG(local->loc.inode->ia_type)) {
+ gf_msg_debug(this->name, 0,
+ "found ENOENT for %s. "
+ "Setting "
+ "need_lookup_everywhere"
+ " flag to 1",
+ local->loc.path);
+
+ local->need_lookup_everywhere = 1;
+ } else if (IA_ISDIR(local->loc.inode->ia_type)) {
+ layout = local->layout;
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].xlator == prev) {
+ layout->list[i].err = op_errno;
+ break;
}
- goto unlock;
+ }
+
+ local->need_selfheal = 1;
}
+ }
- if (stbuf->ia_type != local->inode->ia_type) {
- gf_log (this->name, GF_LOG_INFO,
- "mismatching filetypes 0%o v/s 0%o for %s",
- (stbuf->ia_type), (local->inode->ia_type),
- local->loc.path);
+ /* The GFID is missing on this subvol. Lookup everywhere to force a
+ * gfid heal
+ */
+ if ((op_errno == ENODATA) &&
+ (IA_ISDIR(local->loc.inode->ia_type))) {
+ local->need_lookup_everywhere = 1;
+ }
- local->op_ret = -1;
- local->op_errno = EINVAL;
+ goto unlock;
+ }
- goto unlock;
- }
+ if ((!IA_ISINVAL(local->inode->ia_type)) &&
+ stbuf->ia_type != local->inode->ia_type) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_FILE_TYPE_MISMATCH,
+ "mismatching filetypes 0%o v/s 0%o for %s,"
+ " gfid = %s",
+ (stbuf->ia_type), (local->inode->ia_type), local->loc.path,
+ gfid);
- layout = local->layout;
+ local->op_ret = -1;
+ local->op_errno = EINVAL;
- is_dir = check_is_dir (inode, stbuf, xattr);
- is_linkfile = check_is_linkfile (inode, stbuf, xattr);
+ goto unlock;
+ }
- if (is_linkfile) {
- gf_log (this->name, GF_LOG_INFO,
- "linkfile found in revalidate for %s",
- local->loc.path);
- local->return_estale = 1;
+ layout = local->layout;
- goto unlock;
+ is_dir = check_is_dir(inode, stbuf, xattr);
+ is_linkfile = check_is_linkfile(inode, stbuf, xattr,
+ conf->link_xattr_name);
+ if (is_linkfile) {
+ follow_link = 1;
+ goto unlock;
+ }
+ if (is_dir) {
+ ret = dht_dir_has_layout(xattr, conf->xattr_name);
+ if (ret >= 0) {
+ if (is_greater_time(local->prebuf.ia_ctime,
+ local->prebuf.ia_ctime_nsec,
+ stbuf->ia_ctime, stbuf->ia_ctime_nsec)) {
+ /* Choose source */
+ local->prebuf.ia_gid = stbuf->ia_gid;
+ local->prebuf.ia_uid = stbuf->ia_uid;
+
+ local->prebuf.ia_ctime = stbuf->ia_ctime;
+ local->prebuf.ia_ctime_nsec = stbuf->ia_ctime_nsec;
+
+ if (__is_root_gfid(stbuf->ia_gfid))
+ local->prebuf.ia_prot = stbuf->ia_prot;
}
+ }
+
+ if (local->stbuf.ia_type != IA_INVAL) {
+ if ((local->stbuf.ia_gid != stbuf->ia_gid) ||
+ (local->stbuf.ia_uid != stbuf->ia_uid) ||
+ is_permission_different(&local->stbuf.ia_prot,
+ &stbuf->ia_prot)) {
+ local->need_attrheal = 1;
+ }
+ }
+
+ if (!dict_get(xattr, conf->mds_xattr_key)) {
+ gf_msg_debug(this->name, 0,
+ "%s: internal xattr %s is not present"
+ " on subvol %s(gfid is %s)",
+ local->loc.path, conf->mds_xattr_key, prev->name,
+ gfid);
+ } else {
+ check_mds = dht_dict_get_array(xattr, conf->mds_xattr_key,
+ mds_xattr_val, 1, &errst);
+ local->mds_subvol = prev;
+ local->mds_stbuf.ia_gid = stbuf->ia_gid;
+ local->mds_stbuf.ia_uid = stbuf->ia_uid;
+ local->mds_stbuf.ia_prot = stbuf->ia_prot;
+
+ /* save mds subvol on inode ctx */
+ ret = dht_inode_ctx_mdsvol_set(local->inode, this, prev);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_SET_INODE_CTX_FAILED,
+ "Failed to set MDS subvol for %s vol is %s",
+ local->loc.path, prev->name);
+ }
+ if ((check_mds < 0) && !errst) {
+ /* Check if xattrs need to be healed on the directory
+ */
+ local->mds_xattr = dict_ref(xattr);
+ gf_msg_debug(this->name, 0,
+ "Value of %s is not zero on "
+ "hashed subvol so xattr needs to"
+ " be healed on non hashed"
+ " path is %s and vol name is %s "
+ " gfid is %s",
+ conf->mds_xattr_key, local->loc.path,
+ prev->name, gfid);
+ local->need_xattr_heal = 1;
+ }
+ }
+ ret = dht_layout_dir_mismatch(this, layout, prev, &local->loc,
+ xattr);
+ if (ret != 0) {
+ /* In memory layout does not match on-disk layout.
+ */
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LAYOUT_MISMATCH,
+ "Mismatching layouts for %s, gfid = %s", local->loc.path,
+ gfid);
- if (is_dir) {
- ret = dht_layout_dir_mismatch (this, layout,
- prev->this, &local->loc,
- xattr);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_INFO,
- "mismatching layouts for %s",
- local->loc.path);
-
- local->layout_mismatch = 1;
+ local->layout_mismatch = 1;
- goto unlock;
- }
- }
+ goto unlock;
+ }
+ }
- dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
- dht_iatt_merge (this, &local->postparent, postparent,
- prev->this);
+ gf_uuid_copy(local->stbuf.ia_gfid, stbuf->ia_gfid);
+ dht_iatt_merge(this, &local->stbuf, stbuf);
+ dht_iatt_merge(this, &local->postparent, postparent);
- local->op_ret = 0;
+ local->op_ret = 0;
- if (!local->xattr) {
- local->xattr = dict_ref (xattr);
- } else if (is_dir) {
- dht_aggregate_xattr (local->xattr, xattr);
- }
+ if (!local->xattr) {
+ local->xattr = dict_ref(xattr);
+ } else if (is_dir) {
+ dht_aggregate_xattr(local->xattr, xattr);
}
+ }
unlock:
- UNLOCK (&frame->lock);
-out:
- this_call_cnt = dht_frame_return (frame);
-
- if (is_last_call (this_call_cnt)) {
- if (!IA_ISDIR (local->stbuf.ia_type)
- && (local->hashed_subvol != local->cached_subvol)
- && (local->stbuf.ia_nlink == 1)
- && (conf && conf->unhashed_sticky_bit)) {
- local->stbuf.ia_prot.sticky = 1;
- }
- if (local->layout_mismatch) {
- /* Found layout mismatch in the directory, need to
- fix this in the inode context */
- dht_layout_unref (this, local->layout);
- local->layout = NULL;
- dht_lookup_directory (frame, this, &local->loc);
- return 0;
- }
+ UNLOCK(&frame->lock);
- if (local->need_lookup_everywhere) {
- /* As the current layout gave ENOENT error, we would
- need a new layout */
- dht_layout_unref (this, local->layout);
- local->layout = NULL;
-
- /* We know that current cached subvol is no more
- valid, get the new one */
- local->cached_subvol = NULL;
- dht_lookup_everywhere (frame, this, &local->loc);
- return 0;
- }
- if (local->return_estale) {
- local->op_ret = -1;
- local->op_errno = ESTALE;
- }
+ if (follow_link) {
+ /* Found a linkto file. Follow it to see if the target file exists
+ */
+ gf_uuid_copy(local->gfid, stbuf->ia_gfid);
- WIPE (&local->postparent);
+ subvol = dht_linkfile_subvol(this, inode, stbuf, xattr);
+ if (!subvol) {
+ op_errno = ESTALE;
+ local->op_ret = -1;
+ } else {
+ STACK_WIND_COOKIE(frame, dht_lookup_linkfile_cbk, subvol, subvol,
+ subvol->fops->lookup, &local->loc,
+ local->xattr_req);
+ return 0;
+ }
+ }
+
+ this_call_cnt = dht_frame_return(frame);
- DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
- DHT_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
- local->inode, &local->stbuf, local->xattr,
- &local->postparent);
+ if (is_last_call(this_call_cnt)) {
+ if (!IA_ISDIR(local->stbuf.ia_type) &&
+ (local->hashed_subvol != local->cached_subvol) &&
+ (local->stbuf.ia_nlink == 1) &&
+ (conf && conf->unhashed_sticky_bit)) {
+ local->stbuf.ia_prot.sticky = 1;
}
+ /* No need to call heal code if volume count is 1
+ */
+ if (conf->subvolume_cnt == 1)
+ local->need_xattr_heal = 0;
+
+ if (IA_ISDIR(local->stbuf.ia_type)) {
+ /* No mds xattr found. Trigger a heal to set it */
+ if (!__is_root_gfid(local->loc.inode->gfid) &&
+ (!dict_get(local->xattr, conf->mds_xattr_key)))
+ local->need_selfheal = 1;
+
+ if (dht_needs_selfheal(frame, this)) {
+ if (!__is_root_gfid(local->loc.inode->gfid)) {
+ if (local->mds_subvol) {
+ local->stbuf.ia_gid = local->mds_stbuf.ia_gid;
+ local->stbuf.ia_uid = local->mds_stbuf.ia_uid;
+ local->stbuf.ia_prot = local->mds_stbuf.ia_prot;
+ }
+ } else {
+ local->stbuf.ia_gid = local->prebuf.ia_gid;
+ local->stbuf.ia_uid = local->prebuf.ia_uid;
+ local->stbuf.ia_prot = local->prebuf.ia_prot;
+ }
-err:
- return ret;
-}
+ layout = local->layout;
+ dht_selfheal_directory(frame, dht_lookup_selfheal_cbk,
+ &local->loc, layout);
+ return 0;
+ }
+ }
+ if (local->layout_mismatch) {
+ /* Found layout mismatch in the directory, need to
+ fix this in the inode context */
+ dht_layout_unref(this, local->layout);
+ local->layout = NULL;
+ dht_lookup_directory(frame, this, &local->loc);
+ return 0;
+ }
-int
-dht_lookup_linkfile_create_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
-{
- dht_local_t *local = NULL;
- xlator_t *cached_subvol = NULL;
- dht_conf_t *conf = NULL;
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO ("dht", frame, out);
- GF_VALIDATE_OR_GOTO ("dht", this, out);
- GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
- GF_VALIDATE_OR_GOTO ("dht", this->private, out);
- GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+ if (local->need_lookup_everywhere) {
+ /* As the current layout gave ENOENT error, we would
+ need a new layout */
+ dht_layout_unref(this, local->layout);
+ local->layout = NULL;
+
+ /* We know that current cached subvol is no longer
+ valid, get the new one */
+ local->cached_subvol = NULL;
+ if (local->xattr_req) {
+ if (!gf_uuid_is_null(local->gfid)) {
+ ret = dict_set_static_bin(local->xattr_req, "gfid-req",
+ local->gfid, 16);
+ }
+ }
- local = frame->local;
- cached_subvol = local->cached_subvol;
- conf = this->private;
+ dht_lookup_everywhere(frame, this, &local->loc);
+ return 0;
+ }
+ if (local->return_estale) {
+ local->op_ret = -1;
+ local->op_errno = ESTALE;
+ }
- ret = dht_layout_preset (this, local->cached_subvol, inode);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to set layout for subvolume %s",
- cached_subvol ? cached_subvol->name : "<nil>");
- local->op_ret = -1;
- local->op_errno = EINVAL;
- goto unwind;
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update(local->loc.parent, this,
+ &local->postparent, 1);
}
- local->op_ret = 0;
- if ((local->stbuf.ia_nlink == 1)
- && (conf && conf->unhashed_sticky_bit)) {
- local->stbuf.ia_prot.sticky = 1;
+ DHT_STRIP_PHASE1_FLAGS(&local->stbuf);
+ dht_set_fixed_dir_stat(&local->postparent);
+
+ /* local->stbuf is updated only from subvols which have a layout
+ * The reason is to avoid choosing attr heal source from newly
+ * added bricks. In case e.g we have only one subvol and for
+ * some reason layout is not present on it, then local->stbuf
+ * will be EINVAL. This is an indication that the subvols
+ * active in the cluster do not have layouts on disk.
+ * Unwind with ESTALE to trigger a fresh lookup */
+ if (is_dir && local->stbuf.ia_type == IA_INVAL) {
+ local->op_ret = -1;
+ local->op_errno = ESTALE;
}
+ /* Delete mds xattr at the time of STACK UNWIND */
+ if (local->xattr)
+ GF_REMOVE_INTERNAL_XATTR(conf->mds_xattr_key, local->xattr);
+
+ DHT_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno,
+ local->inode, &local->stbuf, local->xattr,
+ &local->postparent);
+ }
+
+err:
+ return ret;
+}
+
+static int
+dht_lookup_linkfile_create_cbk(call_frame_t *frame, void *cooie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ xlator_t *cached_subvol = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ GF_VALIDATE_OR_GOTO("dht", frame, out);
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO("dht", this->private, out);
+
+ local = frame->local;
+ cached_subvol = local->cached_subvol;
+ conf = this->private;
+
+ gf_uuid_unparse(local->loc.gfid, gfid);
+
+ if (local->locked)
+ dht_unlock_namespace(frame, &local->lock[0]);
+
+ ret = dht_layout_preset(this, local->cached_subvol, local->loc.inode);
+ if (ret < 0) {
+ gf_msg_debug(this->name, EINVAL,
+ "Failed to set layout for subvolume %s, "
+ "(gfid = %s)",
+ cached_subvol ? cached_subvol->name : "<nil>", gfid);
+ local->op_ret = -1;
+ local->op_errno = EINVAL;
+ goto unwind;
+ }
+
+ local->op_ret = 0;
+ if ((local->stbuf.ia_nlink == 1) && (conf && conf->unhashed_sticky_bit)) {
+ local->stbuf.ia_prot.sticky = 1;
+ }
+
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update(local->loc.parent, this, postparent, 1);
+ }
unwind:
- WIPE (&local->postparent);
+ gf_msg_debug(this->name, 0,
+ "creation of linkto on hashed subvol:%s, "
+ "returned with op_ret %d and op_errno %d: %s",
+ local->hashed_subvol->name, op_ret, op_errno,
+ uuid_utoa(local->loc.gfid));
- DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
- DHT_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
- local->inode, &local->stbuf, local->xattr,
- &local->postparent);
+ if (local->linked == _gf_true)
+ dht_linkfile_attr_heal(frame, this);
+
+ dht_set_fixed_dir_stat(&local->postparent);
+
+ DHT_STRIP_PHASE1_FLAGS(&local->stbuf);
+ DHT_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno,
+ local->inode, &local->stbuf, local->xattr,
+ &local->postparent);
out:
- return ret;
+ return ret;
}
+static int
+dht_lookup_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ int this_call_cnt = 0;
+ dht_local_t *local = NULL;
+ const char *path = NULL;
-int
-dht_lookup_everywhere_done (call_frame_t *frame, xlator_t *this)
+ local = (dht_local_t *)frame->local;
+ path = local->loc.path;
+ FRAME_SU_UNDO(frame, dht_local_t);
+
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_UNLINK_LOOKUP_INFO,
+ "lookup_unlink returned with "
+ "op_ret -> %d and op-errno -> %d for %s",
+ op_ret, op_errno, ((path == NULL) ? "null" : path));
+
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt)) {
+ dht_lookup_everywhere_done(frame, this);
+ }
+
+ return 0;
+}
+
+static int
+dht_lookup_unlink_of_false_linkto_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- int ret = 0;
- dht_local_t *local = NULL;
- xlator_t *hashed_subvol = NULL;
- xlator_t *cached_subvol = NULL;
- dht_layout_t *layout = NULL;
+ int this_call_cnt = 0;
+ dht_local_t *local = NULL;
+ const char *path = NULL;
- local = frame->local;
- hashed_subvol = local->hashed_subvol;
- cached_subvol = local->cached_subvol;
+ local = (dht_local_t *)frame->local;
+ path = local->loc.path;
- if (local->file_count && local->dir_count) {
- gf_log (this->name, GF_LOG_ERROR,
- "path %s exists as a file on one subvolume "
- "and directory on another. "
- "Please fix it manually",
- local->loc.path);
- DHT_STACK_UNWIND (lookup, frame, -1, EIO, NULL, NULL, NULL,
- NULL);
- return 0;
- }
+ FRAME_SU_UNDO(frame, dht_local_t);
- if (local->dir_count) {
- dht_lookup_directory (frame, this, &local->loc);
- return 0;
- }
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_UNLINK_LOOKUP_INFO,
+ "lookup_unlink returned with "
+ "op_ret -> %d and op-errno -> %d for %s",
+ op_ret, op_errno, ((path == NULL) ? "null" : path));
- if (!cached_subvol) {
- DHT_STACK_UNWIND (lookup, frame, -1, ENOENT, NULL, NULL, NULL,
- NULL);
- return 0;
- }
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt)) {
+ if ((op_ret == 0) || ((op_errno != EBUSY) && (op_errno != ENOTCONN))) {
+ dht_lookup_everywhere_done(frame, this);
+ } else {
+ /*When dht_lookup_everywhere is performed, one cached
+ *and one hashed file was found and hashed file does
+ *not point to the above mentioned cached node. So it
+ *was considered as stale and an unlink was performed.
+ *But unlink fails. So may be rebalance is in progress.
+ *now ideally we have two data-files. One obtained during
+ *lookup_everywhere and one where unlink-failed. So
+ *at this point in time we cannot decide which one to
+ *choose because there are chances of first cached
+ *file is truncated after rebalance and if it is chosen
+ *as cached node, application will fail. So return EIO.*/
+
+ if (op_errno == EBUSY) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_UNLINK_FAILED,
+ "Could not unlink the linkto file as "
+ "either fd is open and/or linkto xattr "
+ "is set for %s",
+ ((path == NULL) ? "null" : path));
+ }
+ DHT_STACK_UNWIND(lookup, frame, -1, EIO, NULL, NULL, NULL, NULL);
+ }
+ }
+
+ return 0;
+}
- if (local->need_lookup_everywhere) {
- if (uuid_compare (local->gfid, local->inode->gfid)) {
- /* GFID different, return error */
- DHT_STACK_UNWIND (lookup, frame, -1, ENOENT, NULL,
- NULL, NULL, NULL);
- return 0;
- }
- local->op_ret = 0;
- local->op_errno = 0;
- layout = dht_layout_for_subvol (this, cached_subvol);
- if (!layout) {
- gf_log (this->name, GF_LOG_INFO,
- "%s: no pre-set layout for subvolume %s",
- local->loc.path, (cached_subvol ?
- cached_subvol->name :
- "<nil>"));
- }
+static int
+dht_lookup_unlink_stale_linkto_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ const char *path = NULL;
- ret = dht_layout_set (this, local->inode, layout);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "%s: failed to set layout for subvol %s",
- local->loc.path, (cached_subvol ?
- cached_subvol->name :
- "<nil>"));
- }
+ /* NOTE:
+ * If stale file unlink fails either there is an open-fd or is not an
+ * dht-linkto-file then posix_unlink returns EBUSY, which is overwritten
+ * to ENOENT
+ */
- WIPE (&local->postparent);
+ local = frame->local;
- DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
- DHT_STACK_UNWIND (lookup, frame, local->op_ret,
- local->op_errno, local->inode,
- &local->stbuf, local->xattr,
- &local->postparent);
- return 0;
- }
+ if (local) {
+ FRAME_SU_UNDO(frame, dht_local_t);
+ if (local->loc.path)
+ path = local->loc.path;
+ }
- if (!hashed_subvol) {
- gf_log (this->name, GF_LOG_INFO,
- "cannot create linkfile file for %s on %s: "
- "hashed subvolume cannot be found.",
- local->loc.path, cached_subvol->name);
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_UNLINK_LOOKUP_INFO,
+ "Returned with op_ret %d and "
+ "op_errno %d for %s",
+ op_ret, op_errno, ((path == NULL) ? "null" : path));
- local->op_ret = 0;
- local->op_errno = 0;
+ DHT_STACK_UNWIND(lookup, frame, -1, ENOENT, NULL, NULL, NULL, NULL);
- ret = dht_layout_preset (frame->this, cached_subvol,
- local->inode);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "failed to set layout for subvol %s",
- cached_subvol ? cached_subvol->name :
- "<nil>");
- local->op_ret = -1;
- local->op_errno = EINVAL;
- }
+ return 0;
+}
- WIPE (&local->postparent);
+static int
+dht_fill_dict_to_avoid_unlink_of_migrating_file(dict_t *dict)
+{
+ int ret = 0;
- DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
- DHT_STACK_UNWIND (lookup, frame, local->op_ret,
- local->op_errno, local->inode,
- &local->stbuf, local->xattr,
- &local->postparent);
- return 0;
- }
+ ret = dict_set_int32_sizen(dict, DHT_SKIP_NON_LINKTO_UNLINK, 1);
- gf_log (this->name, GF_LOG_DEBUG,
- "linking file %s existing on %s to %s (hash)",
- local->loc.path, cached_subvol->name,
- hashed_subvol->name);
+ if (ret)
+ return -1;
- ret = dht_linkfile_create (frame,
- dht_lookup_linkfile_create_cbk,
- cached_subvol, hashed_subvol, &local->loc);
+ ret = dict_set_int32_sizen(dict, DHT_SKIP_OPEN_FD_UNLINK, 1);
- return ret;
+ if (ret)
+ return -1;
+
+ return 0;
}
+static int32_t
+dht_linkfile_create_lookup_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ dht_local_t *local = NULL;
+ int call_cnt = 0, ret = 0;
+ xlator_t *subvol = NULL;
+ uuid_t gfid = {
+ 0,
+ };
+ char gfid_str[GF_UUID_BUF_SIZE] = {0};
+
+ subvol = cookie;
+ local = frame->local;
+
+ if (subvol == local->hashed_subvol) {
+ if ((op_ret == 0) || (op_errno != ENOENT))
+ local->dont_create_linkto = _gf_true;
+ } else {
+ if (gf_uuid_is_null(local->gfid))
+ gf_uuid_copy(gfid, local->loc.gfid);
+ else
+ gf_uuid_copy(gfid, local->gfid);
+
+ if ((op_ret == 0) && gf_uuid_compare(gfid, buf->ia_gfid)) {
+ gf_uuid_unparse(gfid, gfid_str);
+ gf_msg_debug(this->name, 0,
+ "gfid (%s) different on cached subvol "
+ "(%s) and looked up inode (%s), not "
+ "creating linkto",
+ uuid_utoa(buf->ia_gfid), subvol->name, gfid_str);
+ local->dont_create_linkto = _gf_true;
+ } else if (op_ret == -1) {
+ local->dont_create_linkto = _gf_true;
+ }
+ }
+
+ call_cnt = dht_frame_return(frame);
+ if (is_last_call(call_cnt)) {
+ if (local->dont_create_linkto)
+ goto no_linkto;
+ else {
+ gf_msg_debug(this->name, 0,
+ "Creating linkto file on %s(hash) to "
+ "%s on %s (gfid = %s)",
+ local->hashed_subvol->name, local->loc.path,
+ local->cached_subvol->name, gfid_str);
+
+ ret = dht_linkfile_create(frame, dht_lookup_linkfile_create_cbk,
+ this, local->cached_subvol,
+ local->hashed_subvol, &local->loc);
+
+ if (ret < 0)
+ goto no_linkto;
+ }
+ }
+
+ return 0;
+
+no_linkto:
+ gf_msg_debug(this->name, 0,
+ "skipped linkto creation (path:%s) (gfid:%s) "
+ "(hashed-subvol:%s) (cached-subvol:%s)",
+ local->loc.path, gfid_str, local->hashed_subvol->name,
+ local->cached_subvol->name);
+
+ dht_lookup_linkfile_create_cbk(frame, NULL, this, 0, 0, local->loc.inode,
+ &local->stbuf, &local->preparent,
+ &local->postparent, local->xattr);
+ return 0;
+}
-int
-dht_lookup_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
+static int32_t
+dht_call_lookup_linkfile_create(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
{
- int this_call_cnt = 0;
+ dht_local_t *local = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+ int i = 0;
+ xlator_t *subvol = NULL;
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- dht_lookup_everywhere_done (frame, this);
- }
+ local = frame->local;
+ if (gf_uuid_is_null(local->gfid))
+ gf_uuid_unparse(local->loc.gfid, gfid);
+ else
+ gf_uuid_unparse(local->gfid, gfid);
- return 0;
+ if (op_ret < 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "protecting namespace failed, skipping linkto "
+ "creation (path:%s)(gfid:%s)(hashed-subvol:%s)"
+ "(cached-subvol:%s)",
+ local->loc.path, gfid, local->hashed_subvol->name,
+ local->cached_subvol->name);
+ goto err;
+ }
+
+ local->locked = _gf_true;
+
+ local->call_cnt = 2;
+
+ for (i = 0; i < 2; i++) {
+ subvol = (subvol == NULL) ? local->hashed_subvol : local->cached_subvol;
+
+ STACK_WIND_COOKIE(frame, dht_linkfile_create_lookup_cbk, subvol, subvol,
+ subvol->fops->lookup, &local->loc, NULL);
+ }
+
+ return 0;
+
+err:
+ dht_lookup_linkfile_create_cbk(frame, NULL, this, 0, 0, local->loc.inode,
+ &local->stbuf, &local->preparent,
+ &local->postparent, local->xattr);
+ return 0;
}
+/* Rebalance is performed from cached_node to hashed_node. Initial cached_node
+ * contains a non-linkto file. After migration it is converted to linkto and
+ * then unlinked. And at hashed_subvolume, first a linkto file is present,
+ * then after migration it is converted to a non-linkto file.
+ *
+ * Lets assume a file is present on cached subvolume and a new brick is added
+ * and new brick is the new_hashed subvolume. So fresh lookup on newly added
+ * hashed subvolume will fail and dht_lookup_everywhere gets called. If just
+ * before sending the dht_lookup_everywhere request rebalance is in progress,
+ *
+ * from cached subvolume it may see: Nonlinkto or linkto or No file
+ * from hashed subvolume it may see: No file or linkto file or non-linkto file
+ *
+ * So this boils down to 9 cases:
+ * at cached_subvol at hashed_subvol
+ * ---------------- -----------------
+ *
+ *a) No file No file
+ * [request reached after [Request reached before
+ * migration] Migration]
+ *
+ *b) No file Linkto File
+ *
+ *c) No file Non-Linkto File
+ *
+ *d) Linkto No-File
+ *
+ *e) Linkto Linkto
+ *
+ *f) Linkto Non-Linkto
+ *
+ *g) NonLinkto No-File
+ *
+ *h) NonLinkto Linkto
+ *
+ *i) NonLinkto NonLinkto
+ *
+ * dht_lookup_everywhere_done takes decision based on any of the above case
+ */
-int
-dht_lookup_everywhere_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf, dict_t *xattr,
- struct iatt *postparent)
-{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
- int is_linkfile = 0;
- int is_dir = 0;
- xlator_t *subvol = NULL;
- loc_t *loc = NULL;
- xlator_t *link_subvol = NULL;
- int ret = -1;
- int32_t fd_count = 0;
-
- GF_VALIDATE_OR_GOTO ("dht", frame, out);
- GF_VALIDATE_OR_GOTO ("dht", this, out);
- GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
- GF_VALIDATE_OR_GOTO ("dht", cookie, out);
- GF_VALIDATE_OR_GOTO ("dht", this->private, out);
-
- local = frame->local;
- loc = &local->loc;
-
- prev = cookie;
- subvol = prev->this;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- if (op_errno != ENOENT)
- local->op_errno = op_errno;
- goto unlock;
- }
+static int
+dht_lookup_everywhere_done(call_frame_t *frame, xlator_t *this)
+{
+ int ret = 0;
+ dht_local_t *local = NULL;
+ xlator_t *hashed_subvol = NULL;
+ xlator_t *cached_subvol = NULL;
+ dht_layout_t *layout = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+ gf_boolean_t found_non_linkto_on_hashed = _gf_false;
+
+ local = frame->local;
+ hashed_subvol = local->hashed_subvol;
+ cached_subvol = local->cached_subvol;
+
+ gf_uuid_unparse(local->loc.gfid, gfid);
+
+ if (local->file_count && local->dir_count) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_FILE_TYPE_MISMATCH,
+ "path %s (gfid = %s)exists as a file on one "
+ "subvolume and directory on another. "
+ "Please fix it manually",
+ local->loc.path, gfid);
+ DHT_STACK_UNWIND(lookup, frame, -1, EIO, NULL, NULL, NULL, NULL);
+ return 0;
+ }
+ if (local->op_ret && local->gfid_missing) {
+ if (gf_uuid_is_null(local->gfid_req)) {
+ DHT_STACK_UNWIND(lookup, frame, -1, ENODATA, NULL, NULL, NULL,
+ NULL);
+ return 0;
+ }
+ /* A hack */
+ dht_lookup_directory(frame, this, &local->loc);
+ return 0;
+ }
+
+ if (local->dir_count) {
+ dht_lookup_directory(frame, this, &local->loc);
+ return 0;
+ }
+
+ gf_msg_debug(this->name, 0,
+ "STATUS: hashed_subvol %s "
+ "cached_subvol %s",
+ (hashed_subvol == NULL) ? "null" : hashed_subvol->name,
+ (cached_subvol == NULL) ? "null" : cached_subvol->name);
+
+ if (!cached_subvol) {
+ if (local->skip_unlink.handle_valid_link && hashed_subvol) {
+ /*Purpose of "DHT_SKIP_NON_LINKTO_UNLINK":
+ * If this lookup is performed by rebalance and this
+ * rebalance process detected hashed file and by
+ * the time it sends the lookup request to cached node,
+ * file got migrated and now at initial hashed_node,
+ * final migrated file is present. With current logic,
+ * because this process fails to find the cached_node,
+ * it will unlink the file at initial hashed_node.
+ *
+ * So we avoid this by setting key, and checking at the
+ * posix_unlink that unlink the file only if file is a
+ * linkto file and not a migrated_file.
+ */
+
+ ret = dht_fill_dict_to_avoid_unlink_of_migrating_file(
+ local->xattr_req);
+
+ if (ret) {
+ /* If for some reason, setting key in the dict
+ * fails, return with ENOENT, as with respect to
+ * this process, it detected only a stale link
+ * file.
+ *
+ * Next lookup will delete it.
+ *
+ * Performing deletion of stale link file when
+ * setting key in dict fails, may cause the data
+ * loss because of the above mentioned race.
+ */
+
+ DHT_STACK_UNWIND(lookup, frame, -1, ENOENT, NULL, NULL, NULL,
+ NULL);
+ } else {
+ local->skip_unlink.handle_valid_link = _gf_false;
+
+ gf_msg_debug(this->name, 0,
+ "No Cached was found and "
+ "unlink on hashed was skipped"
+ " so performing now: %s",
+ local->loc.path);
+ FRAME_SU_DO(frame, dht_local_t);
+ STACK_WIND(frame, dht_lookup_unlink_stale_linkto_cbk,
+ hashed_subvol, hashed_subvol->fops->unlink,
+ &local->loc, 0, local->xattr_req);
+ }
+
+ } else {
+ gf_msg_debug(this->name, 0,
+ "There was no cached file and "
+ "unlink on hashed is not skipped %s",
+ local->loc.path);
+
+ DHT_STACK_UNWIND(lookup, frame, -1, ENOENT, NULL, NULL, NULL, NULL);
+ }
+ return 0;
+ }
+
+ /* At the time of dht_lookup, no file was found on hashed and that is
+ * why dht_lookup_everywhere is called, but by the time
+ * dht_lookup_everywhere
+ * reached to server, file might have already migrated. In that case we
+ * will find a migrated file at the hashed_node. In this case store the
+ * layout in context and return successfully.
+ */
+
+ if (hashed_subvol || local->need_lookup_everywhere) {
+ if (local->need_lookup_everywhere) {
+ found_non_linkto_on_hashed = _gf_true;
+
+ } else if ((local->file_count == 1) &&
+ (hashed_subvol == cached_subvol)) {
+ gf_msg_debug(this->name, 0,
+ "found cached file on hashed subvolume "
+ "so store in context and return for %s",
+ local->loc.path);
- if (uuid_is_null (local->gfid))
- uuid_copy (local->gfid, buf->ia_gfid);
+ found_non_linkto_on_hashed = _gf_true;
+ }
+
+ if (found_non_linkto_on_hashed)
+ goto preset_layout;
+ }
+
+ if (hashed_subvol) {
+ if (local->skip_unlink.handle_valid_link == _gf_true) {
+ if (cached_subvol == local->skip_unlink.hash_links_to) {
+ if (gf_uuid_compare(local->skip_unlink.cached_gfid,
+ local->skip_unlink.hashed_gfid)) {
+ /*GFID different, return error*/
+ DHT_STACK_UNWIND(lookup, frame, -1, ESTALE, NULL, NULL,
+ NULL, NULL);
- if (uuid_compare (local->gfid, buf->ia_gfid)) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: gfid differs on subvolume %s",
- loc->path, prev->this->name);
+ return 0;
}
- is_linkfile = check_is_linkfile (inode, buf, xattr);
- is_dir = check_is_dir (inode, buf, xattr);
-
- if (is_linkfile) {
- link_subvol = dht_linkfile_subvol (this, inode, buf,
- xattr);
- gf_log (this->name, GF_LOG_DEBUG,
- "found on %s linkfile %s (-> %s)",
- subvol->name, loc->path,
- link_subvol ? link_subvol->name : "''");
- goto unlock;
+ ret = dht_layout_preset(this, cached_subvol, local->loc.inode);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_INFO, 0,
+ DHT_MSG_LAYOUT_PRESET_FAILED,
+ "Could not set pre-set layout "
+ "for subvolume %s",
+ cached_subvol->name);
}
- /* non linkfile GFID takes precedence */
- uuid_copy (local->gfid, buf->ia_gfid);
+ local->op_ret = (ret == 0) ? ret : -1;
+ local->op_errno = (ret == 0) ? ret : EINVAL;
- if (is_dir) {
- local->dir_count++;
+ /* Presence of local->cached_subvol validates
+ * that lookup from cached node is successful
+ */
- gf_log (this->name, GF_LOG_DEBUG,
- "found on %s directory %s",
- subvol->name, loc->path);
- } else {
- local->file_count++;
-
- if (!local->cached_subvol) {
- /* found one file */
- dht_iatt_merge (this, &local->stbuf, buf,
- subvol);
- local->xattr = dict_ref (xattr);
- local->cached_subvol = subvol;
- gf_log (this->name, GF_LOG_DEBUG,
- "found on %s file %s",
- subvol->name, loc->path);
-
- dht_iatt_merge (this, &local->postparent,
- postparent, subvol);
- } else {
- /* This is where we need 'rename' both entries logic */
- gf_log (this->name, GF_LOG_WARNING,
- "multiple subvolumes (%s and %s) have "
- "file %s (preferably rename the file "
- "in the backend, and do a fresh lookup)",
- local->cached_subvol->name,
- subvol->name, local->loc.path);
- }
+ if (!local->op_ret && local->loc.parent) {
+ dht_inode_ctx_time_update(local->loc.parent, this,
+ &local->postparent, 1);
}
- }
-unlock:
- UNLOCK (&frame->lock);
- if (is_linkfile) {
- ret = dict_get_int32 (xattr, GLUSTERFS_OPEN_FD_COUNT, &fd_count);
- /* Delete the linkfile only if there are no open fds on it.
- if there is a open-fd, it may be in migration */
- if (!ret && (fd_count == 0)) {
- gf_log (this->name, GF_LOG_INFO,
- "deleting stale linkfile %s on %s",
- loc->path, subvol->name);
- STACK_WIND (frame, dht_lookup_unlink_cbk,
- subvol, subvol->fops->unlink, loc, 0, NULL);
- return 0;
+ gf_msg_debug(this->name, 0,
+ "Skipped unlinking linkto file "
+ "on the hashed subvolume. "
+ "Returning success as it is a "
+ "valid linkto file. Path:%s",
+ local->loc.path);
+
+ goto unwind_hashed_and_cached;
+ } else {
+ local->skip_unlink.handle_valid_link = _gf_false;
+
+ gf_msg_debug(this->name, 0,
+ "Linkto file found on hashed "
+ "subvol "
+ "and data file found on cached "
+ "subvolume. But linkto points to "
+ "different cached subvolume (%s) "
+ "path %s",
+ (local->skip_unlink.hash_links_to
+ ? local->skip_unlink.hash_links_to->name
+ : " <nil>"),
+ local->loc.path);
+
+ if (local->skip_unlink.opend_fd_count == 0) {
+ ret = dht_fill_dict_to_avoid_unlink_of_migrating_file(
+ local->xattr_req);
+
+ if (ret) {
+ DHT_STACK_UNWIND(lookup, frame, -1, EIO, NULL, NULL,
+ NULL, NULL);
+ } else {
+ local->call_cnt = 1;
+ FRAME_SU_DO(frame, dht_local_t);
+ STACK_WIND(frame, dht_lookup_unlink_of_false_linkto_cbk,
+ hashed_subvol, hashed_subvol->fops->unlink,
+ &local->loc, 0, local->xattr_req);
+ }
+
+ return 0;
}
+ }
}
+ }
+
+preset_layout:
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- dht_lookup_everywhere_done (frame, this);
+ if (found_non_linkto_on_hashed) {
+ if (local->need_lookup_everywhere) {
+ if (gf_uuid_compare(local->gfid, local->inode->gfid)) {
+ /* GFID different, return error */
+ DHT_STACK_UNWIND(lookup, frame, -1, ENOENT, NULL, NULL, NULL,
+ NULL);
+ return 0;
+ }
}
-out:
- return ret;
-}
+ local->op_ret = 0;
+ local->op_errno = 0;
+ layout = dht_layout_for_subvol(this, cached_subvol);
+ if (!layout) {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_INFO,
+ "%s: no pre-set layout for subvolume %s,"
+ " gfid = %s",
+ local->loc.path,
+ (cached_subvol ? cached_subvol->name : "<nil>"), gfid);
+ }
+ ret = dht_layout_set(this, local->inode, layout);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_INFO,
+ "%s: failed to set layout for subvol %s, "
+ "gfid = %s",
+ local->loc.path,
+ (cached_subvol ? cached_subvol->name : "<nil>"), gfid);
+ }
-int
-dht_lookup_everywhere (call_frame_t *frame, xlator_t *this, loc_t *loc)
-{
- dht_conf_t *conf = NULL;
- dht_local_t *local = NULL;
- int i = 0;
- int call_cnt = 0;
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update(local->loc.parent, this,
+ &local->postparent, 1);
+ }
- GF_VALIDATE_OR_GOTO ("dht", frame, err);
- GF_VALIDATE_OR_GOTO ("dht", this, out);
- GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
- GF_VALIDATE_OR_GOTO ("dht", this->private, out);
- GF_VALIDATE_OR_GOTO ("dht", loc, out);
+ DHT_STRIP_PHASE1_FLAGS(&local->stbuf);
+ dht_set_fixed_dir_stat(&local->postparent);
+ DHT_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno,
+ local->inode, &local->stbuf, local->xattr,
+ &local->postparent);
+ return 0;
+ }
- conf = this->private;
- local = frame->local;
+ if (!hashed_subvol) {
+ gf_msg_debug(this->name, 0,
+ "Cannot create linkfile for %s on %s: "
+ "hashed subvolume cannot be found, gfid = %s.",
+ local->loc.path, cached_subvol->name, gfid);
- call_cnt = conf->subvolume_cnt;
- local->call_cnt = call_cnt;
+ local->op_ret = 0;
+ local->op_errno = 0;
- if (!local->inode)
- local->inode = inode_ref (loc->inode);
+ ret = dht_layout_preset(frame->this, cached_subvol, local->inode);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LAYOUT_PRESET_FAILED,
+ "Failed to set layout for subvol %s"
+ ", gfid = %s",
+ cached_subvol ? cached_subvol->name : "<nil>", gfid);
+ local->op_ret = -1;
+ local->op_errno = EINVAL;
+ }
- for (i = 0; i < call_cnt; i++) {
- STACK_WIND (frame, dht_lookup_everywhere_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->lookup,
- loc, local->xattr_req);
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update(local->loc.parent, this,
+ &local->postparent, 1);
}
+ DHT_STRIP_PHASE1_FLAGS(&local->stbuf);
+ dht_set_fixed_dir_stat(&local->postparent);
+ DHT_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno,
+ local->inode, &local->stbuf, local->xattr,
+ &local->postparent);
return 0;
-out:
- DHT_STACK_UNWIND (lookup, frame, -1, EINVAL, NULL, NULL, NULL, NULL);
-err:
- return -1;
+ }
+
+ if (frame->root->op != GF_FOP_RENAME) {
+ local->current = &local->lock[0];
+ ret = dht_protect_namespace(frame, &local->loc, hashed_subvol,
+ &local->current->ns,
+ dht_call_lookup_linkfile_create);
+ } else {
+ gf_msg_debug(this->name, 0,
+ "Creating linkto file on %s(hash) to %s on %s "
+ "(gfid = %s)",
+ hashed_subvol->name, local->loc.path, cached_subvol->name,
+ gfid);
+
+ ret = dht_linkfile_create(frame, dht_lookup_linkfile_create_cbk, this,
+ cached_subvol, hashed_subvol, &local->loc);
+ }
+
+ return ret;
+
+unwind_hashed_and_cached:
+ DHT_STRIP_PHASE1_FLAGS(&local->stbuf);
+ dht_set_fixed_dir_stat(&local->postparent);
+ DHT_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno,
+ local->inode, &local->stbuf, local->xattr,
+ &local->postparent);
+ return 0;
}
-
-int
-dht_lookup_linkfile_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int op_ret, int op_errno,
- inode_t *inode, struct iatt *stbuf, dict_t *xattr,
- struct iatt *postparent)
-{
- call_frame_t *prev = NULL;
- dht_local_t *local = NULL;
- xlator_t *subvol = NULL;
- loc_t *loc = NULL;
- dht_conf_t *conf = NULL;
- int ret = 0;
-
- GF_VALIDATE_OR_GOTO ("dht", frame, out);
- GF_VALIDATE_OR_GOTO ("dht", this, unwind);
- GF_VALIDATE_OR_GOTO ("dht", frame->local, unwind);
- GF_VALIDATE_OR_GOTO ("dht", this->private, unwind);
- GF_VALIDATE_OR_GOTO ("dht", cookie, unwind);
-
- prev = cookie;
- subvol = prev->this;
- conf = this->private;
- local = frame->local;
- loc = &local->loc;
-
+static int
+dht_lookup_everywhere_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ xlator_t *prev = NULL;
+ int is_linkfile = 0;
+ int is_dir = 0;
+ loc_t *loc = NULL;
+ xlator_t *link_subvol = NULL;
+ int ret = -1;
+ int32_t fd_count = 0;
+ dht_conf_t *conf = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+ dict_t *dict_req = {0};
+
+ GF_VALIDATE_OR_GOTO("dht", frame, out);
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO("dht", cookie, out);
+ GF_VALIDATE_OR_GOTO("dht", this->private, out);
+
+ local = frame->local;
+ loc = &local->loc;
+ conf = this->private;
+
+ prev = cookie;
+
+ gf_msg_debug(this->name, 0,
+ "returned with op_ret %d and op_errno %d (%s) "
+ "from subvol %s",
+ op_ret, op_errno, loc->path, prev->name);
+
+ LOCK(&frame->lock);
+ {
if (op_ret == -1) {
- gf_log (this->name, GF_LOG_INFO,
- "lookup of %s on %s (following linkfile) failed (%s)",
- local->loc.path, subvol->name, strerror (op_errno));
- goto err;
+ if (op_errno != ENOENT)
+ local->op_errno = op_errno;
+ if (op_errno == ENODATA)
+ local->gfid_missing = _gf_true;
+ goto unlock;
}
- if (check_is_dir (inode, stbuf, xattr)) {
- gf_log (this->name, GF_LOG_INFO,
- "lookup of %s on %s (following linkfile) reached dir",
- local->loc.path, subvol->name);
- goto err;
- }
+ if (gf_uuid_is_null(local->gfid))
+ gf_uuid_copy(local->gfid, buf->ia_gfid);
- if (check_is_linkfile (inode, stbuf, xattr)) {
- gf_log (this->name, GF_LOG_INFO,
- "lookup of %s on %s (following linkfile) reached link",
- local->loc.path, subvol->name);
- goto err;
- }
+ gf_uuid_unparse(local->gfid, gfid);
- if (uuid_compare (local->gfid, stbuf->ia_gfid)) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: gfid different on data file on %s",
- local->loc.path, subvol->name);
- goto err;
+ if (gf_uuid_compare(local->gfid, buf->ia_gfid)) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_GFID_MISMATCH,
+ "%s: gfid differs on subvolume %s,"
+ " gfid local = %s, gfid node = %s",
+ loc->path, prev->name, gfid, uuid_utoa(buf->ia_gfid));
}
- if ((stbuf->ia_nlink == 1)
- && (conf && conf->unhashed_sticky_bit)) {
- stbuf->ia_prot.sticky = 1;
+ is_linkfile = check_is_linkfile(inode, buf, xattr,
+ conf->link_xattr_name);
+
+ if (is_linkfile) {
+ link_subvol = dht_linkfile_subvol(this, inode, buf, xattr);
+ gf_msg_debug(this->name, 0, "found on %s linkfile %s (-> %s)",
+ prev->name, loc->path,
+ link_subvol ? link_subvol->name : "''");
+ goto unlock;
}
- ret = dht_layout_preset (this, prev->this, inode);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "failed to set layout for subvolume %s",
- prev->this->name);
- op_ret = -1;
- op_errno = EINVAL;
+ is_dir = check_is_dir(inode, buf, xattr);
+
+ /* non linkfile GFID takes precedence but don't overwrite
+ gfid if we have already found a cached file*/
+ if (!local->cached_subvol)
+ gf_uuid_copy(local->gfid, buf->ia_gfid);
+
+ if (is_dir) {
+ local->dir_count++;
+
+ gf_msg_debug(this->name, 0, "found on %s directory %s", prev->name,
+ loc->path);
+ } else {
+ local->file_count++;
+
+ gf_msg_debug(this->name, 0, "found cached file on %s for %s",
+ prev->name, loc->path);
+
+ if (!local->cached_subvol) {
+ /* found one file */
+ dht_iatt_merge(this, &local->stbuf, buf);
+
+ local->xattr = dict_ref(xattr);
+ local->cached_subvol = prev;
+
+ gf_msg_debug(this->name, 0,
+ "storing cached on %s file"
+ " %s",
+ prev->name, loc->path);
+
+ dht_iatt_merge(this, &local->postparent, postparent);
+
+ gf_uuid_copy(local->skip_unlink.cached_gfid, buf->ia_gfid);
+ } else {
+ /* This is where we need 'rename' both entries logic */
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_FILE_ON_MULT_SUBVOL,
+ "multiple subvolumes (%s and %s) have "
+ "file %s (preferably rename the file "
+ "in the backend,and do a fresh lookup)",
+ local->cached_subvol->name, prev->name, local->loc.path);
+ }
}
+ }
+unlock:
+ UNLOCK(&frame->lock);
+
+ if (is_linkfile) {
+ ret = dict_get_int32(xattr, GLUSTERFS_OPEN_FD_COUNT, &fd_count);
+
+ /* Any linkto file found on the non-hashed subvolume should
+ * be unlinked (performed in the "else if" block below)
+ *
+ * But if a linkto file is found on hashed subvolume, it may be
+ * pointing to valid cached node. So unlinking of linkto
+ * file on hashed subvolume is skipped and inside
+ * dht_lookup_everywhere_done, checks are performed. If this
+ * linkto file is found as stale linkto file, it is deleted
+ * otherwise unlink is skipped.
+ */
-unwind:
- WIPE (postparent);
+ if (local->hashed_subvol && local->hashed_subvol == prev) {
+ local->skip_unlink.handle_valid_link = _gf_true;
+ local->skip_unlink.opend_fd_count = fd_count;
+ local->skip_unlink.hash_links_to = link_subvol;
+ gf_uuid_copy(local->skip_unlink.hashed_gfid, buf->ia_gfid);
+
+ gf_msg_debug(this->name, 0,
+ "Found"
+ " one linkto file on hashed subvol %s "
+ "for %s: Skipping unlinking till "
+ "everywhere_done",
+ prev->name, loc->path);
+
+ } else if (!ret && (fd_count == 0)) {
+ dict_req = dict_new();
+
+ ret = dht_fill_dict_to_avoid_unlink_of_migrating_file(dict_req);
+
+ if (ret) {
+ /* Skip unlinking for dict_failure
+ *File is found as a linkto file on non-hashed,
+ *subvolume. In the current implementation,
+ *finding a linkto-file on non-hashed does not
+ *always implies that it is stale. So deletion
+ *of file should be done only when both fd is
+ *closed and linkto-xattr is set. In case of
+ *dict_set failure, avoid skipping of file.
+ *NOTE: dht_frame_return should get called for
+ * this block.
+ */
- DHT_STRIP_PHASE1_FLAGS (stbuf);
- DHT_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, stbuf, xattr,
- postparent);
+ dict_unref(dict_req);
+
+ } else {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_INFO,
+ "attempting deletion of stale linkfile "
+ "%s on %s (hashed subvol is %s)",
+ loc->path, prev->name,
+ (local->hashed_subvol ? local->hashed_subvol->name
+ : "<null>"));
+ /* *
+ * These stale files may be created using root
+ * user. Hence deletion will work only with
+ * root.
+ */
+ FRAME_SU_DO(frame, dht_local_t);
+ STACK_WIND(frame, dht_lookup_unlink_cbk, prev,
+ prev->fops->unlink, loc, 0, dict_req);
- return 0;
+ dict_unref(dict_req);
+
+ return 0;
+ }
+ }
+ }
+
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt)) {
+ dht_lookup_everywhere_done(frame, this);
+ }
-err:
- dht_lookup_everywhere (frame, this, loc);
out:
- return 0;
+ return ret;
}
-
int
-dht_lookup_directory (call_frame_t *frame, xlator_t *this, loc_t *loc)
+dht_lookup_everywhere(call_frame_t *frame, xlator_t *this, loc_t *loc)
{
- int call_cnt = 0;
- int i = 0;
- dht_conf_t *conf = NULL;
- dht_local_t *local = NULL;
- int ret = 0;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ int i = 0;
+ int call_cnt = 0;
- GF_VALIDATE_OR_GOTO ("dht", frame, out);
- GF_VALIDATE_OR_GOTO ("dht", this, unwind);
- GF_VALIDATE_OR_GOTO ("dht", frame->local, unwind);
- GF_VALIDATE_OR_GOTO ("dht", this->private, unwind);
- GF_VALIDATE_OR_GOTO ("dht", loc, unwind);
+ GF_VALIDATE_OR_GOTO("dht", frame, err);
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO("dht", this->private, out);
+ GF_VALIDATE_OR_GOTO("dht", loc, out);
- conf = this->private;
- local = frame->local;
+ conf = this->private;
+ local = frame->local;
- call_cnt = conf->subvolume_cnt;
- local->call_cnt = call_cnt;
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
- local->layout = dht_layout_new (this, conf->subvolume_cnt);
- if (!local->layout) {
- goto unwind;
- }
+ if (!local->inode)
+ local->inode = inode_ref(loc->inode);
- if (local->xattr != NULL) {
- dict_unref (local->xattr);
- local->xattr = NULL;
- }
+ gf_msg_debug(this->name, 0, "winding lookup call to %d subvols", call_cnt);
- if (!uuid_is_null (local->gfid)) {
- ret = dict_set_static_bin (local->xattr_req, "gfid-req",
- local->gfid, 16);
- if (ret)
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to set gfid", local->loc.path);
- }
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND_COOKIE(frame, dht_lookup_everywhere_cbk, conf->subvolumes[i],
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup, loc,
+ local->xattr_req);
+ }
- for (i = 0; i < call_cnt; i++) {
- STACK_WIND (frame, dht_lookup_dir_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->lookup,
- &local->loc, local->xattr_req);
- }
- return 0;
-unwind:
- DHT_STACK_UNWIND (lookup, frame, -1, ENOMEM, NULL, NULL, NULL, NULL);
+ return 0;
out:
- return 0;
-
+ DHT_STACK_UNWIND(lookup, frame, -1, EINVAL, NULL, NULL, NULL, NULL);
+err:
+ return -1;
}
-
int
-dht_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- inode_t *inode, struct iatt *stbuf, dict_t *xattr,
- struct iatt *postparent)
-{
- char is_linkfile = 0;
- char is_dir = 0;
- xlator_t *subvol = NULL;
- dht_conf_t *conf = NULL;
- dht_local_t *local = NULL;
- loc_t *loc = NULL;
- call_frame_t *prev = NULL;
- int ret = 0;
- uint64_t tmp_layout = 0;
- dht_layout_t *parent_layout = NULL;
-
- GF_VALIDATE_OR_GOTO ("dht", frame, err);
- GF_VALIDATE_OR_GOTO ("dht", this, out);
- GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
- GF_VALIDATE_OR_GOTO ("dht", cookie, out);
- GF_VALIDATE_OR_GOTO ("dht", this->private, out);
-
- conf = this->private;
-
- prev = cookie;
- local = frame->local;
- loc = &local->loc;
+dht_lookup_linkfile_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ xlator_t *prev = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ loc_t *loc = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = 0;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ GF_VALIDATE_OR_GOTO("dht", frame, out);
+ GF_VALIDATE_OR_GOTO("dht", this, unwind);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, unwind);
+ GF_VALIDATE_OR_GOTO("dht", this->private, unwind);
+ GF_VALIDATE_OR_GOTO("dht", cookie, unwind);
+
+ prev = cookie;
+ subvol = prev;
+ conf = this->private;
+ local = frame->local;
+ loc = &local->loc;
+
+ gf_uuid_unparse(loc->gfid, gfid);
+
+ if (op_ret == -1) {
+ gf_msg(this->name, GF_LOG_INFO, op_errno, DHT_MSG_LINK_FILE_LOOKUP_INFO,
+ "Lookup of %s on %s (following linkfile) failed "
+ ",gfid = %s",
+ local->loc.path, subvol->name, gfid);
+
+ /* If cached subvol returned ENOTCONN, do not do
+ lookup_everywhere. We need to make sure linkfile does not get
+ removed, which can take away the namespace, and subvol is
+ anyways down. */
+
+ local->cached_subvol = NULL;
+ if (op_errno != ENOTCONN)
+ goto err;
+ else
+ goto unwind;
+ }
+
+ if (check_is_dir(inode, stbuf, xattr)) {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LINK_FILE_LOOKUP_INFO,
+ "Lookup of %s on %s (following linkfile) reached dir,"
+ " gfid = %s",
+ local->loc.path, subvol->name, gfid);
+ goto err;
+ }
+
+ if (check_is_linkfile(inode, stbuf, xattr, conf->link_xattr_name)) {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LINK_FILE_LOOKUP_INFO,
+ "lookup of %s on %s (following linkfile) reached link,"
+ "gfid = %s",
+ local->loc.path, subvol->name, gfid);
+ goto err;
+ }
+
+ if (gf_uuid_compare(local->gfid, stbuf->ia_gfid)) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_GFID_MISMATCH,
+ "%s: gfid different on data file on %s,"
+ " gfid local = %s, gfid node = %s ",
+ local->loc.path, subvol->name, gfid, uuid_utoa(stbuf->ia_gfid));
+ goto err;
+ }
+
+ if ((stbuf->ia_nlink == 1) && (conf && conf->unhashed_sticky_bit)) {
+ stbuf->ia_prot.sticky = 1;
+ }
+
+ ret = dht_layout_preset(this, prev, inode);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LAYOUT_PRESET_FAILED,
+ "Failed to set layout for subvolume %s,"
+ "gfid = %s",
+ prev->name, gfid);
+ op_ret = -1;
+ op_errno = EINVAL;
+ }
+
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update(local->loc.parent, this, postparent, 1);
+ }
- /* This is required for handling stale linkfile deletion,
- * or any more call which happens from this 'loc'.
- */
- if (!op_ret && uuid_is_null (local->gfid))
- memcpy (local->gfid, stbuf->ia_gfid, 16);
-
- if (ENTRY_MISSING (op_ret, op_errno)) {
- gf_log (this->name, GF_LOG_TRACE, "Entry %s missing on subvol"
- " %s", loc->path, prev->this->name);
- if (conf->search_unhashed == GF_DHT_LOOKUP_UNHASHED_ON) {
- local->op_errno = ENOENT;
- dht_lookup_everywhere (frame, this, loc);
- return 0;
- }
- if ((conf->search_unhashed == GF_DHT_LOOKUP_UNHASHED_AUTO) &&
- (loc->parent)) {
- ret = inode_ctx_get (loc->parent, this, &tmp_layout);
- parent_layout = (dht_layout_t *)(long)tmp_layout;
- if (parent_layout->search_unhashed) {
- local->op_errno = ENOENT;
- dht_lookup_everywhere (frame, this, loc);
- return 0;
- }
- }
- }
+unwind:
+ DHT_STRIP_PHASE1_FLAGS(stbuf);
+ dht_set_fixed_dir_stat(postparent);
+ DHT_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, stbuf, xattr,
+ postparent);
- if (op_ret == 0) {
- is_dir = check_is_dir (inode, stbuf, xattr);
- if (is_dir) {
- local->inode = inode_ref (inode);
- local->xattr = dict_ref (xattr);
+ return 0;
+
+err:
+ dht_lookup_everywhere(frame, this, loc);
+out:
+ return 0;
+}
+
+/* Code to get hashed subvol based on inode and loc
+ First it check if loc->parent and loc->path exist then it get
+ hashed subvol based on loc.
+*/
+
+static gf_boolean_t
+dht_should_lookup_everywhere(xlator_t *this, dht_conf_t *conf, loc_t *loc)
+{
+ dht_layout_t *parent_layout = NULL;
+ int ret = 0;
+ gf_boolean_t lookup_everywhere = _gf_true;
+
+ /* lookup-optimize supersedes lookup-unhashed settings.
+ * If it is set, do not process search_unhashed
+ * If lookup-optimize if enabled, lookup everywhere if:
+ * - this is the rebalance daemon.
+ * - loc->parent is unavailable.
+ * - parent_layout is unavailable
+ * - parent_layout->commit_hash != conf->vol_commit_hash
+ */
+
+ if (conf->lookup_optimize) {
+ if (!conf->defrag && loc->parent) {
+ ret = dht_inode_ctx_layout_get(loc->parent, this, &parent_layout);
+ if (!ret && parent_layout &&
+ (parent_layout->commit_hash == conf->vol_commit_hash)) {
+ lookup_everywhere = _gf_false;
+ }
+ }
+ goto out;
+ } else {
+ if (conf->search_unhashed == GF_DHT_LOOKUP_UNHASHED_AUTO) {
+ if (loc->parent) {
+ ret = dht_inode_ctx_layout_get(loc->parent, this,
+ &parent_layout);
+ if (ret || !parent_layout ||
+ (!parent_layout->search_unhashed)) {
+ lookup_everywhere = _gf_false;
}
- }
+ } else {
+ lookup_everywhere = _gf_false;
+ }
- if (is_dir || (op_ret == -1 && op_errno == ENOTCONN)) {
- dht_lookup_directory (frame, this, &local->loc);
- return 0;
+ goto out;
}
+ }
+out:
+ return lookup_everywhere;
+}
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "Lookup of %s for subvolume"
- " %s failed with error %s", loc->path, prev->this->name,
- strerror (op_errno));
+int
+dht_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, inode_t *inode, struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ char is_linkfile = 0;
+ char is_dir = 0;
+ xlator_t *subvol = NULL;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ loc_t *loc = NULL;
+ xlator_t *prev = NULL;
+ int ret = 0;
+ uint32_t vol_commit_hash = 0;
+
+ GF_VALIDATE_OR_GOTO("dht", frame, err);
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO("dht", cookie, out);
+ GF_VALIDATE_OR_GOTO("dht", this->private, out);
+
+ conf = this->private;
+
+ prev = cookie;
+ local = frame->local;
+ loc = &local->loc;
+
+ gf_msg_debug(this->name, op_errno,
+ "%s: fresh_lookup on %s returned with op_ret %d", loc->path,
+ prev->name, op_ret);
+
+ if (op_ret == -1) {
+ if (ENTRY_MISSING(op_ret, op_errno)) {
+ if (1 == conf->subvolume_cnt) {
+ /* No need to lookup again */
goto out;
- }
+ }
- is_linkfile = check_is_linkfile (inode, stbuf, xattr);
+ gf_msg_debug(this->name, 0, "Entry %s missing on subvol %s",
+ loc->path, prev->name);
- if (!is_linkfile) {
- /* non-directory and not a linkfile */
-
- ret = dht_layout_preset (this, prev->this, inode);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "could not set pre-set layout for subvolume %s",
- prev->this->name);
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
- goto out;
- }
+ if (dht_should_lookup_everywhere(this, conf, loc)) {
+ local->op_errno = ENOENT;
+ dht_lookup_everywhere(frame, this, loc);
+ return 0;
+ }
- subvol = dht_linkfile_subvol (this, inode, stbuf, xattr);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "linkfile not having link subvolume. path=%s",
- loc->path);
- dht_lookup_everywhere (frame, this, loc);
+ } else {
+ /* posix returns ENODATA if the gfid is not set but the client and
+ * server protocol layers do not send the stbuf. We need to
+ * heal this so check if this is a directory on the other subvols.
+ */
+ if ((op_errno == ENOTCONN) || (op_errno == ENODATA)) {
+ dht_lookup_directory(frame, this, &local->loc);
return 0;
+ }
+ }
+ gf_msg_debug(this->name, op_errno, "%s: Lookup on subvolume %s failed",
+ loc->path, prev->name);
+ goto out;
+ }
+
+ /* Lookup succeeded - op_ret = 0 */
+
+ /* This is required for handling stale linkfile deletion,
+ * or any more call which happens from this 'loc'.
+ */
+ if (gf_uuid_is_null(local->gfid)) {
+ /*This is set from the first successful response*/
+ memcpy(local->gfid, stbuf->ia_gfid, 16);
+ }
+
+ if (!conf->vch_forced) {
+ /* Update the commit hash in conf if it is found */
+ ret = dict_get_uint32(xattr, conf->commithash_xattr_name,
+ &vol_commit_hash);
+ if (ret == 0) {
+ conf->vol_commit_hash = vol_commit_hash;
}
+ }
- STACK_WIND (frame, dht_lookup_linkfile_cbk,
- subvol, subvol->fops->lookup,
- &local->loc, local->xattr_req);
-
+ is_dir = check_is_dir(inode, stbuf, xattr);
+ if (is_dir) {
+ /* A directory is present on all subvols, send the lookup to
+ * all subvols now */
+ local->inode = inode_ref(inode);
+ local->xattr = dict_ref(xattr);
+ dht_lookup_directory(frame, this, &local->loc);
return 0;
+ }
-out:
- /*
- * FIXME: postparent->ia_size and postparent->st_blocks do not have
- * correct values. since, postparent corresponds to a directory these
- * two members should have values equal to sum of corresponding values
- * from each of the subvolume. See dht_iatt_merge for reference.
+ is_linkfile = check_is_linkfile(inode, stbuf, xattr, conf->link_xattr_name);
+
+ if (!is_linkfile) {
+ /* non-directory and not a linkto file. This is a data file
+ * Update the layout to point to the cached subvol
*/
- WIPE (postparent);
+ ret = dht_layout_preset(this, prev, inode);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LAYOUT_PRESET_FAILED,
+ "%s: could not set pre-set layout for subvolume %s",
+ loc->path, prev->name);
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+ goto out;
+ }
+
+ /* This is a linkto file. Get the value of the target subvol from the
+ * linkto xattr and lookup there to see if the file exists
+ */
+ subvol = dht_linkfile_subvol(this, inode, stbuf, xattr);
+ if (!subvol) {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_INFO,
+ "%s: No link subvol for linkto", loc->path);
+ dht_lookup_everywhere(frame, this, loc);
+ return 0;
+ }
+
+ gf_msg_debug(this->name, 0, "%s: Calling lookup on linkto target %s",
+ loc->path, subvol->name);
+
+ STACK_WIND_COOKIE(frame, dht_lookup_linkfile_cbk, subvol, subvol,
+ subvol->fops->lookup, &local->loc, local->xattr_req);
- DHT_STRIP_PHASE1_FLAGS (stbuf);
- DHT_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, stbuf, xattr,
- postparent);
+ return 0;
+
+out:
+ /*
+ * FIXME: postparent->ia_size and postparent->st_blocks do not have
+ * correct values. since, postparent corresponds to a directory these
+ * two members should have values equal to sum of corresponding values
+ * from each of the subvolume. See dht_iatt_merge for reference.
+ */
+
+ if (!op_ret && local && local->loc.parent) {
+ dht_inode_ctx_time_update(local->loc.parent, this, postparent, 1);
+ }
+
+ DHT_STRIP_PHASE1_FLAGS(stbuf);
+ dht_set_fixed_dir_stat(postparent);
+ DHT_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, stbuf, xattr,
+ postparent);
err:
- return 0;
+ return 0;
}
-/* For directories, check if acl xattrs have been requested (by the acl xlator),
- * if not, request for them. These xattrs are needed for dht dir self-heal to
- * perform proper self-healing of dirs
+/* For directories, check if acl xattrs have been requested (by the acl
+ * xlator), if not, request for them. These xattrs are needed for dht dir
+ * self-heal to perform proper self-healing of dirs
*/
-void
-dht_check_and_set_acl_xattr_req (inode_t *inode, dict_t *xattr_req)
+static void
+dht_check_and_set_acl_xattr_req(xlator_t *this, dict_t *xattr_req)
{
- int ret = 0;
+ int ret = 0;
- GF_ASSERT (inode);
- GF_ASSERT (xattr_req);
+ GF_ASSERT(xattr_req);
- if (inode->ia_type != IA_IFDIR)
- return;
+ if (!dict_get(xattr_req, POSIX_ACL_ACCESS_XATTR)) {
+ ret = dict_set_int8(xattr_req, POSIX_ACL_ACCESS_XATTR, 0);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value:key = %s",
+ POSIX_ACL_ACCESS_XATTR);
+ }
- if (!dict_get (xattr_req, POSIX_ACL_ACCESS_XATTR)) {
- ret = dict_set_int8 (xattr_req, POSIX_ACL_ACCESS_XATTR, 0);
- if (ret)
- gf_log (THIS->name, GF_LOG_WARNING,
- "failed to set key %s",
- POSIX_ACL_ACCESS_XATTR);
- }
+ if (!dict_get(xattr_req, POSIX_ACL_DEFAULT_XATTR)) {
+ ret = dict_set_int8(xattr_req, POSIX_ACL_DEFAULT_XATTR, 0);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value:key = %s",
+ POSIX_ACL_DEFAULT_XATTR);
+ }
- if (!dict_get (xattr_req, POSIX_ACL_DEFAULT_XATTR)) {
- ret = dict_set_int8 (xattr_req, POSIX_ACL_DEFAULT_XATTR, 0);
- if (ret)
- gf_log (THIS->name, GF_LOG_WARNING,
- "failed to set key %s",
- POSIX_ACL_DEFAULT_XATTR);
- }
+ return;
+}
- return;
+/* for directories, we need the following info:
+ * the layout : trusted.glusterfs.dht
+ * the mds information : trusted.glusterfs.dht.mds
+ * the acl info: See above
+ */
+static int
+dht_set_dir_xattr_req(xlator_t *this, loc_t *loc, dict_t *xattr_req)
+{
+ int ret = -EINVAL;
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+ if (!conf) {
+ goto err;
+ }
+
+ if (!xattr_req) {
+ goto err;
+ }
+
+ /* Xattr to get the layout for a directory
+ */
+ ret = dict_set_uint32(xattr_req, conf->xattr_name, 4 * 4);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value:key = %s for "
+ "path %s",
+ conf->xattr_name, loc->path);
+ goto err;
+ }
+
+ /*Non-fatal failure */
+ ret = dict_set_uint32(xattr_req, conf->mds_xattr_key, 4);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value:key = %s for "
+ "path %s",
+ conf->mds_xattr_key, loc->path);
+ }
+
+ dht_check_and_set_acl_xattr_req(this, xattr_req);
+ ret = 0;
+err:
+ return ret;
}
-int
-dht_lookup (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xattr_req)
-{
- xlator_t *subvol = NULL;
- xlator_t *hashed_subvol = NULL;
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- int ret = -1;
- int op_errno = -1;
- dht_layout_t *layout = NULL;
- int i = 0;
- int call_cnt = 0;
- loc_t new_loc = {0,};
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- conf = this->private;
- if (!conf)
- goto err;
+/* If the hashed subvol is present, send the lookup to only that subvol first.
+ * If no hashed subvol, send a lookup to all subvols and proceed based on the
+ * responses.
+ */
+static int
+dht_do_fresh_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc)
+{
+ int ret = -1;
+ dht_conf_t *conf = NULL;
+ xlator_t *hashed_subvol = NULL;
+ dht_local_t *local = NULL;
+ int op_errno = -1;
+ int call_cnt = 0;
+ int i = 0;
+
+ conf = this->private;
+ if (!conf) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local = frame->local;
+ if (!local) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ /* Since we don't know whether this is a file or a directory,
+ * request all xattrs*/
+ ret = dht_set_file_xattr_req(this, loc, local->xattr_req);
+ if (ret) {
+ op_errno = -ret;
+ goto err;
+ }
+
+ ret = dht_set_dir_xattr_req(this, loc, local->xattr_req);
+ if (ret) {
+ op_errno = -ret;
+ goto err;
+ }
+
+ /* Fuse sets a random value in gfid-req. If the gfid is missing
+ * on one or more subvols, posix will set the gfid to this value,
+ * causing GFID mismatches for directories. Remove the value fuse
+ * has sent before sending the lookup.
+ */
+ ret = dict_get_gfuuid(local->xattr_req, "gfid-req", &local->gfid_req);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "%s: No gfid-req available", loc->path);
+ } else {
+ dict_del(local->xattr_req, "gfid-req");
+ }
+ /* This should have been set in dht_lookup */
+ hashed_subvol = local->hashed_subvol;
+
+ if (!hashed_subvol) {
+ gf_msg_debug(this->name, 0,
+ "%s: no subvolume in layout for path, "
+ "checking on all the subvols to see if "
+ "it is a directory",
+ loc->path);
- local = dht_local_init (frame, loc, NULL, GF_FOP_LOOKUP);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
- ret = dht_filter_loc_subvol_key (this, loc, &new_loc,
- &hashed_subvol);
- if (ret) {
- loc_wipe (&local->loc);
- ret = loc_dup (&new_loc, &local->loc);
-
- /* we no more need 'new_loc' entries */
- loc_wipe (&new_loc);
-
- /* check if loc_dup() is successful */
- if (ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "copying location failed for path=%s",
- loc->path);
- goto err;
- }
+ /* Allocate a layout. This will be populated and saved in
+ * the dht inode_ctx on successful lookup
+ */
+ local->layout = dht_layout_new(this, conf->subvolume_cnt);
+ if (!local->layout) {
+ op_errno = ENOMEM;
+ goto err;
}
- if (xattr_req) {
- local->xattr_req = dict_ref (xattr_req);
- } else {
- local->xattr_req = dict_new ();
- }
+ gf_msg_debug(this->name, 0,
+ "%s: Found null hashed subvol. Calling lookup"
+ " on all nodes.",
+ loc->path);
- if (uuid_is_null (loc->pargfid) && !uuid_is_null (loc->gfid) &&
- !__is_root_gfid (loc->inode->gfid)) {
- local->cached_subvol = NULL;
- dht_discover (frame, this, loc);
- return 0;
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND_COOKIE(frame, dht_lookup_dir_cbk, conf->subvolumes[i],
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup, &local->loc,
+ local->xattr_req);
}
+ return 0;
+ }
- if (!hashed_subvol) {
- hashed_subvol = dht_subvol_get_hashed (this, loc);
- if (!hashed_subvol) {
- gf_log (this->name, GF_LOG_ERROR,
- "Failed to get hashed subvol for %s",
- loc->path);
- op_errno = EINVAL;
- goto err;
- }
- }
- local->hashed_subvol = hashed_subvol;
+ /* if the hashed_subvol is non-null, send the lookup there first so
+ * as to see whether we have a file or a directory */
+ gf_msg_debug(this->name, 0, "%s: Calling fresh lookup on %s", loc->path,
+ hashed_subvol->name);
- if (is_revalidate (loc)) {
- layout = local->layout;
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "revalidate without cache. path=%s",
- loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ STACK_WIND_COOKIE(frame, dht_lookup_cbk, hashed_subvol, hashed_subvol,
+ hashed_subvol->fops->lookup, loc, local->xattr_req);
+ return 0;
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ return 0;
+}
- if (layout->gen && (layout->gen < conf->gen)) {
- gf_log (this->name, GF_LOG_TRACE,
- "incomplete layout failure for path=%s",
- loc->path);
+static int
+dht_do_revalidate(call_frame_t *frame, xlator_t *this, loc_t *loc)
+{
+ xlator_t *subvol = NULL;
+ xlator_t *mds_subvol = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+ int op_errno = -1;
+ dht_layout_t *layout = NULL;
+ int i = 0;
+ int call_cnt = 0;
+ int gen = 0;
+
+ conf = this->private;
+ if (!conf) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local = frame->local;
+ if (!local) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!layout) {
+ gf_msg_debug(this->name, 0,
+ "path = %s. No layout found in the inode ctx.", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ /* Generation number has changed. This layout may be stale. */
+ if (layout->gen && (layout->gen < conf->gen)) {
+ gen = layout->gen;
+ dht_layout_unref(this, local->layout);
+ local->layout = NULL;
+ local->cached_subvol = NULL;
+
+ gf_msg_debug(this->name, 0,
+ "path = %s. In memory layout may be stale."
+ "(layout->gen (%d) is less than "
+ "conf->gen (%d)). Calling fresh lookup.",
+ loc->path, gen, conf->gen);
+
+ dht_do_fresh_lookup(frame, this, loc);
+ return 0;
+ }
+
+ local->inode = inode_ref(loc->inode);
+
+ /* Since we don't know whether this has changed,
+ * request all xattrs*/
+ ret = dht_set_file_xattr_req(this, loc, local->xattr_req);
+ if (ret) {
+ op_errno = -ret;
+ goto err;
+ }
+
+ ret = dht_set_dir_xattr_req(this, loc, local->xattr_req);
+ if (ret) {
+ op_errno = -ret;
+ goto err;
+ }
+
+ if (IA_ISDIR(local->inode->ia_type)) {
+ ret = dht_inode_ctx_mdsvol_get(local->inode, this, &mds_subvol);
+ if (ret || !mds_subvol) {
+ gf_msg_debug(this->name, 0, "path = %s. No mds subvol in inode ctx",
+ local->loc.path);
+ }
+ local->mds_subvol = mds_subvol;
+ local->call_cnt = conf->subvolume_cnt;
- dht_layout_unref (this, local->layout);
- local->layout = NULL;
- local->cached_subvol = NULL;
- goto do_fresh_lookup;
- }
+ /* local->call_cnt will change as responses are processed. Always use a
+ * local copy to loop through the STACK_WIND calls
+ */
- local->inode = inode_ref (loc->inode);
+ call_cnt = local->call_cnt;
- /* NOTE: we don't require 'trusted.glusterfs.dht.linkto' attribute,
- * revalidates directly go to the cached-subvolume.
- */
- ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht", 4 * 4);
-
- if (IA_ISDIR (local->inode->ia_type)) {
- local->call_cnt = call_cnt = conf->subvolume_cnt;
- for (i = 0; i < call_cnt; i++) {
- STACK_WIND (frame, dht_revalidate_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->lookup,
- loc, local->xattr_req);
- }
- return 0;
- }
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND_COOKIE(frame, dht_revalidate_cbk, conf->subvolumes[i],
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup, loc,
+ local->xattr_req);
+ }
+ return 0;
+ }
- call_cnt = local->call_cnt = layout->cnt;
+ /* If not a dir, this should be 1 */
+ local->call_cnt = layout->cnt;
+ call_cnt = local->call_cnt;
- /* need it for self-healing linkfiles which is
- 'in-migration' state */
- ret = dict_set_uint32 (local->xattr_req,
- GLUSTERFS_OPEN_FD_COUNT, 4);
+ for (i = 0; i < call_cnt; i++) {
+ subvol = layout->list[i].xlator;
- /* need it for dir self-heal */
- dht_check_and_set_acl_xattr_req (loc->inode, local->xattr_req);
+ gf_msg_debug(this->name, 0,
+ "path = %s. Calling "
+ "revalidate lookup on %s",
+ loc->path, subvol->name);
- for (i = 0; i < call_cnt; i++) {
- subvol = layout->list[i].xlator;
+ STACK_WIND_COOKIE(frame, dht_revalidate_cbk, subvol, subvol,
+ subvol->fops->lookup, &local->loc, local->xattr_req);
+ }
+ return 0;
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ return 0;
+}
- STACK_WIND (frame, dht_revalidate_cbk,
- subvol, subvol->fops->lookup,
- &local->loc, local->xattr_req);
+/* Depending on the input, decide if this is a:
+ * fresh-lookup: loc->name is provided but no dht inode ctx
+ * revalidation: loc->name is provided, dht inode ctx is present
+ * discover: gfid based nameless lookup.
+ */
- }
- } else {
- do_fresh_lookup:
- /* TODO: remove the hard-coding */
- ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht", 4 * 4);
-
- ret = dict_set_uint32 (local->xattr_req,
- DHT_LINKFILE_KEY, 256);
-
- /* need it for self-healing linkfiles which is
- 'in-migration' state */
- ret = dict_set_uint32 (local->xattr_req,
- GLUSTERFS_OPEN_FD_COUNT, 4);
-
- /* need it for dir self-heal */
- dht_check_and_set_acl_xattr_req (loc->inode, local->xattr_req);
-
- if (!hashed_subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s, "
- "checking on all the subvols to see if "
- "it is a directory", loc->path);
- call_cnt = conf->subvolume_cnt;
- local->call_cnt = call_cnt;
-
- local->layout = dht_layout_new (this,
- conf->subvolume_cnt);
- if (!local->layout) {
- op_errno = ENOMEM;
- goto err;
- }
+int
+dht_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
+{
+ xlator_t *hashed_subvol = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+ int op_errno = -1;
+ loc_t new_loc = {
+ 0,
+ };
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+ VALIDATE_OR_GOTO(loc->inode, err);
+
+ conf = this->private;
+ if (!conf)
+ goto err;
+
+ local = dht_local_init(frame, loc, NULL, GF_FOP_LOOKUP);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ ret = dht_filter_loc_subvol_key(this, loc, &new_loc, &hashed_subvol);
+ if (ret) {
+ loc_wipe(&local->loc);
+ ret = loc_dup(&new_loc, &local->loc);
+
+ /* we no longer need 'new_loc' entries */
+ loc_wipe(&new_loc);
+
+ /* check if loc_dup() is successful */
+ if (ret == -1) {
+ op_errno = errno;
+ gf_msg_debug(this->name, errno,
+ "copying location failed for path=%s", loc->path);
+ goto err;
+ }
+ }
+
+ if (xattr_req) {
+ local->xattr_req = dict_ref(xattr_req);
+ } else {
+ local->xattr_req = dict_new();
+ }
+
+ /* Nameless lookup */
+
+ /* This is usually sent by NFS. Lookups are done based on the gfid and
+ * no name information is available. Without the name, dht cannot calculate
+ * the hash and has to send a lookup to all subvols.
+ */
+ if (gf_uuid_is_null(loc->pargfid) && !gf_uuid_is_null(loc->gfid) &&
+ !__is_root_gfid(loc->inode->gfid)) {
+ local->cached_subvol = NULL;
+ dht_do_discover(frame, this, loc);
+ return 0;
+ }
- for (i = 0; i < call_cnt; i++) {
- STACK_WIND (frame, dht_lookup_dir_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->lookup,
- &local->loc, local->xattr_req);
- }
- return 0;
- }
+ if (loc_is_root(loc)) {
+ /* Request the DHT commit hash xattr (trusted.glusterfs.dht.commithash)
+ * set on the brick root.
+ */
+ ret = dict_set_uint32(local->xattr_req, conf->commithash_xattr_name,
+ sizeof(uint32_t));
+ }
- STACK_WIND (frame, dht_lookup_cbk,
- hashed_subvol, hashed_subvol->fops->lookup,
- loc, local->xattr_req);
- }
+ if (!hashed_subvol)
+ hashed_subvol = dht_subvol_get_hashed(this, loc);
+ local->hashed_subvol = hashed_subvol;
+ if (is_revalidate(loc)) {
+ /* The entry has been looked up before and has a dht inode_ctx
+ */
+ dht_do_revalidate(frame, this, loc);
+ return 0;
+ } else {
+ /* Entry has not been looked up before
+ */
+ dht_do_fresh_lookup(frame, this, loc);
return 0;
+ }
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL,
- NULL);
- return 0;
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ return 0;
}
-
-int
-dht_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+static int
+dht_unlink_linkfile_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
- local = frame->local;
- prev = cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_ret = -1;
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto unlock;
- }
+ local = frame->local;
+ prev = cookie;
- local->op_ret = 0;
+ LOCK(&frame->lock);
+ {
+ if ((op_ret == -1) &&
+ !((op_errno == ENOENT) || (op_errno == ENOTCONN))) {
+ local->op_errno = op_errno;
+ UNLOCK(&frame->lock);
+ gf_msg_debug(this->name, op_errno,
+ "Unlink link: subvolume %s returned -1", prev->name);
+ goto post_unlock;
+ }
- local->postparent = *postparent;
- local->preparent = *preparent;
+ local->op_ret = 0;
+ }
+ UNLOCK(&frame->lock);
+post_unlock:
+ dht_set_fixed_dir_stat(&local->preparent);
+ dht_set_fixed_dir_stat(&local->postparent);
+ DHT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno,
+ &local->preparent, &local->postparent, xdata);
+
+ return 0;
+}
- WIPE (&local->postparent);
- WIPE (&local->preparent);
+static int
+dht_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ xlator_t *hashed_subvol = NULL;
+
+ local = frame->local;
+ prev = cookie;
+
+ LOCK(&frame->lock);
+ {
+ if (op_ret == -1) {
+ if (op_errno != ENOENT) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ } else {
+ local->op_ret = 0;
+ }
+ UNLOCK(&frame->lock);
+ gf_msg_debug(this->name, op_errno,
+ "Unlink: subvolume %s returned -1", prev->name);
+ goto post_unlock;
}
-unlock:
- UNLOCK (&frame->lock);
- DHT_STACK_UNWIND (unlink, frame, local->op_ret, local->op_errno,
- &local->preparent, &local->postparent, NULL);
+ local->op_ret = 0;
- return 0;
-}
+ local->postparent = *postparent;
+ local->preparent = *preparent;
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update(local->loc.parent, this,
+ &local->preparent, 0);
+ dht_inode_ctx_time_update(local->loc.parent, this,
+ &local->postparent, 1);
+ }
+ }
+ UNLOCK(&frame->lock);
+post_unlock:
+ if (!local->op_ret) {
+ hashed_subvol = dht_subvol_get_hashed(this, &local->loc);
+ if (hashed_subvol && hashed_subvol != local->cached_subvol) {
+ /*
+ * If hashed and cached are different, then we need
+ * to unlink linkfile from hashed subvol if data
+ * file is deleted successfully
+ */
+ STACK_WIND_COOKIE(frame, dht_unlink_linkfile_cbk, hashed_subvol,
+ hashed_subvol, hashed_subvol->fops->unlink,
+ &local->loc, local->flags, xdata);
+ return 0;
+ }
+ }
+
+ dht_set_fixed_dir_stat(&local->preparent);
+ dht_set_fixed_dir_stat(&local->postparent);
+ DHT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno,
+ &local->preparent, &local->postparent, xdata);
+
+ return 0;
+}
-int
-dht_unlink_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+static int
+dht_common_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
+ DHT_STACK_UNWIND(setxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
- xlator_t *cached_subvol = NULL;
+static int
+dht_fix_layout_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ if (op_ret == 0) {
+ /* update the layout in the inode ctx */
local = frame->local;
- prev = cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto unlock;
- }
+ layout = local->selfheal.layout;
- local->op_ret = 0;
- }
-unlock:
- UNLOCK (&frame->lock);
+ dht_layout_set(this, local->loc.inode, layout);
+ }
- if (op_ret == -1)
- goto err;
+ DHT_STACK_UNWIND(setxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
- cached_subvol = dht_subvol_get_cached (this, local->loc.inode);
- if (!cached_subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s",
- local->loc.path);
- local->op_errno = EINVAL;
- goto err;
+static int
+dht_err_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ xlator_t *prev = NULL;
+
+ local = frame->local;
+ prev = cookie;
+
+ LOCK(&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ UNLOCK(&frame->lock);
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
+ prev->name);
+ goto post_unlock;
}
- STACK_WIND (frame, dht_unlink_cbk,
- cached_subvol, cached_subvol->fops->unlink,
- &local->loc, local->flags, NULL);
+ local->op_ret = 0;
+ }
+ UNLOCK(&frame->lock);
+post_unlock:
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt)) {
+ if ((local->fop == GF_FOP_SETXATTR) ||
+ (local->fop == GF_FOP_FSETXATTR)) {
+ DHT_STACK_UNWIND(setxattr, frame, local->op_ret, local->op_errno,
+ NULL);
+ /* 'local' itself may not be valid after this */
+ goto out;
+ }
+ if ((local->fop == GF_FOP_REMOVEXATTR) ||
+ (local->fop == GF_FOP_FREMOVEXATTR)) {
+ DHT_STACK_UNWIND(removexattr, frame, local->op_ret, local->op_errno,
+ NULL);
+ }
+ }
- return 0;
+out:
+ return 0;
+}
-err:
- DHT_STACK_UNWIND (unlink, frame, -1, local->op_errno,
- NULL, NULL, NULL);
- return 0;
+/* Set the value[] of key into dict after convert from
+ host byte order to network byte order
+*/
+int32_t
+dht_dict_set_array(dict_t *dict, char *key, int32_t value[], int32_t size)
+{
+ int ret = -1;
+ int32_t *ptr = NULL;
+ int32_t vindex;
+
+ if (value == NULL) {
+ return -EINVAL;
+ }
+
+ ptr = GF_MALLOC(sizeof(int32_t) * size, gf_common_mt_char);
+ if (ptr == NULL) {
+ return -ENOMEM;
+ }
+ for (vindex = 0; vindex < size; vindex++) {
+ ptr[vindex] = hton32(value[vindex]);
+ }
+ ret = dict_set_bin(dict, key, ptr, sizeof(int32_t) * size);
+ if (ret)
+ GF_FREE(ptr);
+ return ret;
}
static int
-dht_ufo_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xdata)
+dht_common_mds_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
+ dht_local_t *local = NULL;
+ call_frame_t *prev = cookie;
- local = frame->local;
- prev = cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_ret = -1;
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto unlock;
- }
- }
-unlock:
- UNLOCK (&frame->lock);
+ local = frame->local;
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- DHT_STACK_UNWIND (setxattr, frame, local->op_ret,
- local->op_errno, NULL);
- }
+ if (op_ret)
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
+ prev->this->name);
- return 0;
-}
+ if (local->fop == GF_FOP_SETXATTR) {
+ DHT_STACK_UNWIND(setxattr, frame, 0, op_errno, local->xdata);
+ /* 'local' itself may not be valid after this */
+ goto out;
+ }
+ if (local->fop == GF_FOP_FSETXATTR) {
+ DHT_STACK_UNWIND(fsetxattr, frame, 0, op_errno, local->xdata);
+ /* 'local' itself may not be valid after this */
+ goto out;
+ }
-int
-dht_err_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xdata)
+ if (local->fop == GF_FOP_REMOVEXATTR) {
+ DHT_STACK_UNWIND(removexattr, frame, 0, op_errno, NULL);
+ /* 'local' itself may not be valid after this */
+ goto out;
+ }
+
+ if (local->fop == GF_FOP_FREMOVEXATTR) {
+ DHT_STACK_UNWIND(fremovexattr, frame, 0, op_errno, NULL);
+ }
+
+out:
+ return 0;
+}
+
+/* Code to wind a xattrop call to add 1 on current mds internal xattr
+ value
+*/
+static int
+dht_setxattr_non_mds_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ int ret = 0;
+ dict_t *xattrop = NULL;
+ int32_t addone[1] = {1};
+ call_frame_t *prev = NULL;
+ dht_conf_t *conf = NULL;
+
+ local = frame->local;
+ prev = cookie;
+ conf = this->private;
+
+ LOCK(&frame->lock);
+ {
+ if (op_ret && !local->op_ret) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ UNLOCK(&frame->lock);
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
+ prev->this->name);
+ goto post_unlock;
+ }
+ }
+ UNLOCK(&frame->lock);
+post_unlock:
+ this_call_cnt = dht_frame_return(frame);
+
+ if (is_last_call(this_call_cnt)) {
+ if (!local->op_ret) {
+ xattrop = dict_new();
+ if (!xattrop) {
+ gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, 0,
+ "dictionary creation failed");
+ ret = -1;
+ goto out;
+ }
+ ret = dht_dict_set_array(xattrop, conf->mds_xattr_key, addone, 1);
+ if (ret != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "dictionary set array failed ");
+ ret = -1;
+ goto out;
+ }
+ if ((local->fop == GF_FOP_SETXATTR) ||
+ (local->fop == GF_FOP_REMOVEXATTR)) {
+ STACK_WIND(frame, dht_common_mds_xattrop_cbk, local->mds_subvol,
+ local->mds_subvol->fops->xattrop, &local->loc,
+ GF_XATTROP_ADD_ARRAY, xattrop, NULL);
+ } else {
+ STACK_WIND(frame, dht_common_mds_xattrop_cbk, local->mds_subvol,
+ local->mds_subvol->fops->fxattrop, local->fd,
+ GF_XATTROP_ADD_ARRAY, xattrop, NULL);
+ }
+ } else {
+ if (local->fop == GF_FOP_SETXATTR) {
+ DHT_STACK_UNWIND(setxattr, frame, 0, 0, local->xdata);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
+ }
+
+ if (local->fop == GF_FOP_FSETXATTR) {
+ DHT_STACK_UNWIND(fsetxattr, frame, 0, 0, local->xdata);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
+ }
+
+ if (local->fop == GF_FOP_REMOVEXATTR) {
+ DHT_STACK_UNWIND(removexattr, frame, 0, 0, NULL);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
+ }
+
+ if (local->fop == GF_FOP_FREMOVEXATTR) {
+ DHT_STACK_UNWIND(fremovexattr, frame, 0, 0, NULL);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
+ }
+ }
+ }
+out:
+ if (ret) {
+ if (local->fop == GF_FOP_SETXATTR) {
+ DHT_STACK_UNWIND(setxattr, frame, 0, 0, local->xdata);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
+ }
- local = frame->local;
- prev = cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto unlock;
- }
+ if (local->fop == GF_FOP_FSETXATTR) {
+ DHT_STACK_UNWIND(fsetxattr, frame, 0, 0, local->xdata);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
+ }
- local->op_ret = 0;
+ if (local->fop == GF_FOP_REMOVEXATTR) {
+ DHT_STACK_UNWIND(removexattr, frame, 0, 0, NULL);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
}
-unlock:
- UNLOCK (&frame->lock);
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- DHT_STACK_UNWIND (setxattr, frame, local->op_ret,
- local->op_errno, NULL);
+ if (local->fop == GF_FOP_FREMOVEXATTR) {
+ DHT_STACK_UNWIND(fremovexattr, frame, 0, 0, NULL);
}
+ }
+just_return:
+ if (xattrop)
+ dict_unref(xattrop);
+ return 0;
+}
- return 0;
+static int
+dht_setxattr_mds_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ call_frame_t *prev = NULL;
+ xlator_t *mds_subvol = NULL;
+ int i = 0;
+
+ local = frame->local;
+ prev = cookie;
+ conf = this->private;
+ mds_subvol = local->mds_subvol;
+
+ if (op_ret == -1) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
+ prev->this->name);
+ goto out;
+ }
+
+ local->op_ret = 0;
+ local->call_cnt = conf->subvolume_cnt - 1;
+ local->xdata = dict_ref(xdata);
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (mds_subvol && (mds_subvol == conf->subvolumes[i]))
+ continue;
+ if (local->fop == GF_FOP_SETXATTR) {
+ STACK_WIND(frame, dht_setxattr_non_mds_cbk, conf->subvolumes[i],
+ conf->subvolumes[i]->fops->setxattr, &local->loc,
+ local->xattr, local->flags, local->xattr_req);
+ }
+
+ if (local->fop == GF_FOP_FSETXATTR) {
+ STACK_WIND(frame, dht_setxattr_non_mds_cbk, conf->subvolumes[i],
+ conf->subvolumes[i]->fops->fsetxattr, local->fd,
+ local->xattr, local->flags, local->xattr_req);
+ }
+
+ if (local->fop == GF_FOP_REMOVEXATTR) {
+ STACK_WIND(frame, dht_setxattr_non_mds_cbk, conf->subvolumes[i],
+ conf->subvolumes[i]->fops->removexattr, &local->loc,
+ local->key, local->xattr_req);
+ }
+
+ if (local->fop == GF_FOP_FREMOVEXATTR) {
+ STACK_WIND(frame, dht_setxattr_non_mds_cbk, conf->subvolumes[i],
+ conf->subvolumes[i]->fops->fremovexattr, local->fd,
+ local->key, local->xattr_req);
+ }
+ }
+
+ return 0;
+out:
+ if (local->fop == GF_FOP_SETXATTR) {
+ DHT_STACK_UNWIND(setxattr, frame, local->op_ret, local->op_errno,
+ xdata);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
+ }
+
+ if (local->fop == GF_FOP_FSETXATTR) {
+ DHT_STACK_UNWIND(fsetxattr, frame, local->op_ret, local->op_errno,
+ xdata);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
+ }
+
+ if (local->fop == GF_FOP_REMOVEXATTR) {
+ DHT_STACK_UNWIND(removexattr, frame, local->op_ret, local->op_errno,
+ NULL);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
+ }
+
+ if (local->fop == GF_FOP_FREMOVEXATTR) {
+ DHT_STACK_UNWIND(fremovexattr, frame, local->op_ret, local->op_errno,
+ NULL);
+ }
+
+just_return:
+ return 0;
+}
+
+static int
+dht_xattrop_mds_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *dict, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+
+ local = frame->local;
+ prev = cookie;
+
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ local->op_ret = op_ret;
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
+ prev->this->name);
+ goto out;
+ }
+
+ if (local->fop == GF_FOP_SETXATTR) {
+ STACK_WIND(frame, dht_setxattr_mds_cbk, local->mds_subvol,
+ local->mds_subvol->fops->setxattr, &local->loc, local->xattr,
+ local->flags, local->xattr_req);
+ }
+
+ if (local->fop == GF_FOP_FSETXATTR) {
+ STACK_WIND(frame, dht_setxattr_mds_cbk, local->mds_subvol,
+ local->mds_subvol->fops->fsetxattr, local->fd, local->xattr,
+ local->flags, local->xattr_req);
+ }
+
+ if (local->fop == GF_FOP_REMOVEXATTR) {
+ STACK_WIND(frame, dht_setxattr_mds_cbk, local->mds_subvol,
+ local->mds_subvol->fops->removexattr, &local->loc,
+ local->key, local->xattr_req);
+ }
+
+ if (local->fop == GF_FOP_FREMOVEXATTR) {
+ STACK_WIND(frame, dht_setxattr_mds_cbk, local->mds_subvol,
+ local->mds_subvol->fops->fremovexattr, local->fd, local->key,
+ local->xattr_req);
+ }
+
+ return 0;
+out:
+ if (local->fop == GF_FOP_SETXATTR) {
+ DHT_STACK_UNWIND(setxattr, frame, local->op_ret, local->op_errno,
+ xdata);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
+ }
+
+ if (local->fop == GF_FOP_FSETXATTR) {
+ DHT_STACK_UNWIND(fsetxattr, frame, local->op_ret, local->op_errno,
+ xdata);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
+ }
+
+ if (local->fop == GF_FOP_REMOVEXATTR) {
+ DHT_STACK_UNWIND(removexattr, frame, local->op_ret, local->op_errno,
+ NULL);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
+ }
+
+ if (local->fop == GF_FOP_FREMOVEXATTR) {
+ DHT_STACK_UNWIND(fremovexattr, frame, local->op_ret, local->op_errno,
+ NULL);
+ }
+
+just_return:
+ return 0;
}
static void
-fill_layout_info (dht_layout_t *layout, char *buf)
+fill_layout_info(dht_layout_t *layout, char *buf)
{
- int i = 0;
- char tmp_buf[128] = {0,};
+ int i = 0;
+ char tmp_buf[128] = {
+ 0,
+ };
+
+ for (i = 0; i < layout->cnt; i++) {
+ snprintf(tmp_buf, sizeof(tmp_buf), "(%s %u %u)",
+ layout->list[i].xlator->name, layout->list[i].start,
+ layout->list[i].stop);
+ if (i)
+ strcat(buf, " ");
+ strcat(buf, tmp_buf);
+ }
+}
- for (i = 0; i < layout->cnt; i++) {
- snprintf (tmp_buf, 128, "(%s %u %u)",
- layout->list[i].xlator->name,
- layout->list[i].start,
- layout->list[i].stop);
- if (i)
- strcat (buf, " ");
- strcat (buf, tmp_buf);
- }
+static void
+dht_fill_pathinfo_xattr(xlator_t *this, dht_local_t *local, char *xattr_buf,
+ int32_t alloc_len, int flag, char *layout_buf)
+{
+ if (flag) {
+ if (local->xattr_val) {
+ snprintf(xattr_buf, alloc_len,
+ "((<" DHT_PATHINFO_HEADER "%s> %s) (%s-layout %s))",
+ this->name, local->xattr_val, this->name, layout_buf);
+ } else {
+ snprintf(xattr_buf, alloc_len, "(%s-layout %s)", this->name,
+ layout_buf);
+ }
+ } else if (local->xattr_val) {
+ snprintf(xattr_buf, alloc_len, "(<" DHT_PATHINFO_HEADER "%s> %s)",
+ this->name, local->xattr_val);
+ } else {
+ xattr_buf[0] = '\0';
+ }
}
-void
-dht_fill_pathinfo_xattr (xlator_t *this, dht_local_t *local,
- char *xattr_buf, int32_t alloc_len,
- int flag, char *layout_buf)
-{
- if (flag && local->xattr_val)
- snprintf (xattr_buf, alloc_len,
- "((<"DHT_PATHINFO_HEADER"%s> %s) (%s-layout %s))",
- this->name, local->xattr_val, this->name,
- layout_buf);
- else if (local->xattr_val)
- snprintf (xattr_buf, alloc_len,
- "(<"DHT_PATHINFO_HEADER"%s> %s)",
- this->name, local->xattr_val);
- else if (flag)
- snprintf (xattr_buf, alloc_len, "(%s-layout %s)",
- this->name, layout_buf);
+static int
+dht_vgetxattr_alloc_and_fill(dht_local_t *local, dict_t *xattr, xlator_t *this,
+ int op_errno)
+{
+ int ret = -1;
+ char *value = NULL;
+
+ ret = dict_get_str(xattr, local->xsel, &value);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_GET_XATTR_FAILED,
+ "Subvolume %s returned -1", this->name);
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ goto out;
+ }
+
+ local->alloc_len += strlen(value);
+
+ if (!local->xattr_val) {
+ local->alloc_len += (SLEN(DHT_PATHINFO_HEADER) + 10);
+ local->xattr_val = GF_MALLOC(local->alloc_len, gf_common_mt_char);
+ if (!local->xattr_val) {
+ ret = -1;
+ goto out;
+ }
+ local->xattr_val[0] = '\0';
+ }
+
+ int plen = strlen(local->xattr_val);
+ if (plen) {
+ /* extra byte(s) for \0 to be safe */
+ local->alloc_len += (plen + 2);
+ local->xattr_val = GF_REALLOC(local->xattr_val, local->alloc_len);
+ if (!local->xattr_val) {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ (void)strcat(local->xattr_val, value);
+ (void)strcat(local->xattr_val, " ");
+ local->op_ret = 0;
+
+ ret = 0;
+
+out:
+ return ret;
}
-int
-dht_vgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
-{
- dht_local_t *local = NULL;
- int ret = 0;
- int flag = 0;
- int this_call_cnt = 0;
- char *value_got = NULL;
- char layout_buf[8192] = {0,};
- char *xattr_buf = NULL;
- dict_t *dict = NULL;
- int32_t alloc_len = 0;
- int32_t plen = 0;
- call_frame_t *prev = NULL;
+static int
+dht_vgetxattr_fill_and_set(dht_local_t *local, dict_t **dict, xlator_t *this,
+ gf_boolean_t flag)
+{
+ int ret = -1;
+ char *xattr_buf = NULL;
+ char layout_buf[8192] = {
+ 0,
+ };
+
+ if (flag)
+ fill_layout_info(local->layout, layout_buf);
+
+ *dict = dict_new();
+ if (!*dict)
+ goto out;
+
+ local->xattr_val[strlen(local->xattr_val) - 1] = '\0';
+
+ /* we would need max this many bytes to create xattr string
+ * extra 40 bytes is just an estimated amount of additional
+ * space required as we include translator name and some
+ * spaces, brackets etc. when forming the pathinfo string.
+ *
+ * For node-uuid we just don't have all the pretty formatting,
+ * but since this is a generic routine for pathinfo & node-uuid
+ * we don't have conditional space allocation and try to be
+ * generic
+ */
+ local->alloc_len += (2 * strlen(this->name)) + strlen(layout_buf) + 40;
+ xattr_buf = GF_MALLOC(local->alloc_len, gf_common_mt_char);
+ if (!xattr_buf)
+ goto out;
+
+ if (XATTR_IS_PATHINFO(local->xsel)) {
+ (void)dht_fill_pathinfo_xattr(this, local, xattr_buf, local->alloc_len,
+ flag, layout_buf);
+ } else if ((XATTR_IS_NODE_UUID(local->xsel)) ||
+ (XATTR_IS_NODE_UUID_LIST(local->xsel))) {
+ (void)snprintf(xattr_buf, local->alloc_len, "%s", local->xattr_val);
+ } else {
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_GET_XATTR_FAILED,
+ "Unknown local->xsel (%s)", local->xsel);
+ GF_FREE(xattr_buf);
+ goto out;
+ }
+
+ ret = dict_set_dynstr(*dict, local->xsel, xattr_buf);
+ if (ret)
+ GF_FREE(xattr_buf);
+ GF_FREE(local->xattr_val);
- local = frame->local;
- prev = cookie;
-
- if (op_ret >= 0) {
- ret = dict_get_str (xattr, local->xsel, &value_got);
- if (!ret) {
- alloc_len = strlen (value_got);
-
- /**
- * allocate the buffer:- we allocate 10 bytes extra in
- * case we need to append ' Link: ' in the buffer for
- * another STACK_WIND
- */
- if (!local->xattr_val) {
- alloc_len += (strlen (DHT_PATHINFO_HEADER) + 10);
- local->xattr_val =
- GF_CALLOC (alloc_len,
- sizeof (char),
- gf_common_mt_char);
- }
+out:
+ return ret;
+}
- if (local->xattr_val) {
- plen = strlen (local->xattr_val);
- if (plen) {
- /* extra byte(s) for \0 to be safe */
- alloc_len += (plen + 2);
- local->xattr_val =
- GF_REALLOC (local->xattr_val,
- alloc_len);
- if (!local->xattr_val)
- goto out;
- }
-
- strcat (local->xattr_val, value_got);
- }
- local->op_ret = 0;
- }
- } else {
+static int
+dht_find_local_subvol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr,
+ dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ xlator_t *prev = NULL;
+ int this_call_cnt = 0;
+ int ret = 0;
+ char *uuid_str = NULL;
+ char *uuid_list = NULL;
+ char *next_uuid_str = NULL;
+ char *saveptr = NULL;
+ uuid_t node_uuid = {
+ 0,
+ };
+ char *uuid_list_copy = NULL;
+ int count = 0;
+ int i = 0;
+ int index = 0;
+ int found = 0;
+ nodeuuid_info_t *tmp_ptr = NULL;
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(frame->local, out);
+
+ local = frame->local;
+ prev = cookie;
+ conf = this->private;
+
+ VALIDATE_OR_GOTO(conf->defrag, out);
+
+ gf_msg_debug(this->name, 0, "subvol %s returned", prev->name);
+
+ LOCK(&frame->lock);
+ {
+ this_call_cnt = --local->call_cnt;
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ UNLOCK(&frame->lock);
+ if (op_errno == ENODATA)
+ gf_msg_debug(this->name, 0, "failed to get node-uuid");
+ else
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_GET_XATTR_FAILED, "failed to get node-uuid");
+ goto post_unlock;
+ }
+
+ ret = dict_get_str(xattr, local->xsel, &uuid_list);
+
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_GET_FAILED,
+ "Failed to get %s", local->xsel);
+ local->op_ret = -1;
+ local->op_errno = EINVAL;
+ goto unlock;
+ }
+
+ /* As DHT will not know details of its child xlators
+ * we need to parse this twice to get the count first
+ * and allocate memory later.
+ */
+ count = 0;
+ index = conf->local_subvols_cnt;
+
+ uuid_list_copy = gf_strdup(uuid_list);
+ if (!uuid_list_copy)
+ goto unlock;
+
+ for (uuid_str = strtok_r(uuid_list, " ", &saveptr); uuid_str;
+ uuid_str = next_uuid_str) {
+ next_uuid_str = strtok_r(NULL, " ", &saveptr);
+ if (gf_uuid_parse(uuid_str, node_uuid)) {
local->op_ret = -1;
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_ERROR, "Subvolume %s returned -1 "
- "(%s)", prev->this->name, strerror (op_errno));
+ local->op_errno = EINVAL;
+ UNLOCK(&frame->lock);
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_UUID_PARSE_ERROR,
+ "Failed to parse uuid for %s", prev->name);
+ goto post_unlock;
+ }
+
+ count++;
+ if (gf_uuid_compare(node_uuid, conf->defrag->node_uuid)) {
+ gf_msg_debug(this->name, 0,
+ "subvol %s does not"
+ "belong to this node",
+ prev->name);
+ } else {
+ /* handle multiple bricks of the same replica
+ * on the same node */
+ if (found)
+ continue;
+ conf->local_subvols[(conf->local_subvols_cnt)++] = prev;
+ found = 1;
+ gf_msg_debug(this->name, 0,
+ "subvol %s belongs to"
+ " this node",
+ prev->name);
+ }
+ }
+
+ if (!found) {
+ local->op_ret = 0;
+ goto unlock;
+ }
+
+ conf->local_nodeuuids[index].count = count;
+ conf->local_nodeuuids[index].elements = GF_CALLOC(
+ count, sizeof(nodeuuid_info_t), 1);
+
+ /* The node-uuids are guaranteed to be returned in the same
+ * order as the bricks
+ * A null node-uuid is returned for a brick that is down.
+ */
+
+ saveptr = NULL;
+ i = 0;
+
+ for (uuid_str = strtok_r(uuid_list_copy, " ", &saveptr); uuid_str;
+ uuid_str = next_uuid_str) {
+ next_uuid_str = strtok_r(NULL, " ", &saveptr);
+ tmp_ptr = &(conf->local_nodeuuids[index].elements[i]);
+ gf_uuid_parse(uuid_str, tmp_ptr->uuid);
+
+ if (!gf_uuid_compare(tmp_ptr->uuid, conf->defrag->node_uuid)) {
+ tmp_ptr->info = REBAL_NODEUUID_MINE;
+ }
+ i++;
+ tmp_ptr = NULL;
}
+ }
- out:
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
+ local->op_ret = 0;
+unlock:
+ UNLOCK(&frame->lock);
+post_unlock:
+ if (!is_last_call(this_call_cnt))
+ goto out;
- if (local->op_ret == -1) {
- goto unwind;
- }
+ if (local->op_ret == -1) {
+ goto unwind;
+ }
- if (local->layout->cnt > 1) {
- /* Set it for directory */
- fill_layout_info (local->layout, layout_buf);
- flag = 1;
- }
+ DHT_STACK_UNWIND(getxattr, frame, 0, 0, xattr, xdata);
+ goto out;
+
+unwind:
- dict = dict_new ();
+ GF_FREE(conf->local_nodeuuids[index].elements);
+ conf->local_nodeuuids[index].elements = NULL;
- /* we would need max this many bytes to create xattr string */
- alloc_len += (2 * strlen (this->name))
- + strlen (layout_buf)
- + 40;
- xattr_buf = GF_CALLOC (alloc_len,
- sizeof (char), gf_common_mt_char);
+ DHT_STACK_UNWIND(getxattr, frame, -1, local->op_errno, NULL, xdata);
+out:
+ GF_FREE(uuid_list_copy);
+ return 0;
+}
- if (XATTR_IS_PATHINFO (local->xsel)) {
- (void) dht_fill_pathinfo_xattr (this, local, xattr_buf,
- alloc_len, flag,
- layout_buf);
- } else if (XATTR_IS_NODE_UUID (local->xsel)) {
- (void) snprintf (xattr_buf, alloc_len, "%s",
- local->xattr_val);
- } else
- gf_log (this->name, GF_LOG_WARNING,
- "Unknown local->xsel (%s)", local->xsel);
+static int
+dht_vgetxattr_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
+{
+ int ret = 0;
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ dict_t *dict = NULL;
- ret = dict_set_dynstr (dict, local->xsel, xattr_buf);
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(frame->local, out);
- if (local->xattr_val)
- GF_FREE (local->xattr_val);
+ local = frame->local;
- DHT_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict,
- xdata);
+ LOCK(&frame->lock);
+ {
+ this_call_cnt = --local->call_cnt;
+ if (op_ret < 0) {
+ if (op_errno != ENOTCONN) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ UNLOCK(&frame->lock);
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_GET_XATTR_FAILED, "getxattr err for dir");
+ goto post_unlock;
+ }
- if (dict)
- dict_unref (dict);
+ goto unlock;
+ }
- return 0;
+ ret = dht_vgetxattr_alloc_and_fill(local, xattr, this, op_errno);
+ if (ret) {
+ UNLOCK(&frame->lock);
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_DICT_SET_FAILED,
+ "alloc or fill failure");
+ goto post_unlock;
}
+ }
+unlock:
+ UNLOCK(&frame->lock);
+post_unlock:
+ if (!is_last_call(this_call_cnt))
+ goto out;
+
+ /* -- last call: do patch ups -- */
+
+ if (local->op_ret == -1) {
+ goto unwind;
+ }
+
+ ret = dht_vgetxattr_fill_and_set(local, &dict, this, _gf_true);
+ if (ret)
+ goto unwind;
+
+ DHT_STACK_UNWIND(getxattr, frame, 0, 0, dict, xdata);
+ goto cleanup;
unwind:
- DHT_STACK_UNWIND (getxattr, frame, -1, local->op_errno, NULL, NULL);
- return 0;
+ DHT_STACK_UNWIND(getxattr, frame, -1, local->op_errno, NULL, NULL);
+cleanup:
+ if (dict)
+ dict_unref(dict);
+out:
+ return 0;
}
-int
-dht_linkinfo_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xattr,
- dict_t *xdata)
+static int
+dht_vgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, dict_t *xattr, dict_t *xdata)
{
- int ret = 0;
- char *value = NULL;
+ dht_local_t *local = NULL;
+ int ret = 0;
+ dict_t *dict = NULL;
+ xlator_t *prev = NULL;
+ gf_boolean_t flag = _gf_true;
+
+ local = frame->local;
+ prev = cookie;
+
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_GET_XATTR_FAILED,
+ "vgetxattr: Subvolume %s returned -1", prev->name);
+ goto unwind;
+ }
+
+ ret = dht_vgetxattr_alloc_and_fill(local, xattr, this, op_errno);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_NO_MEMORY,
+ "Allocation or fill failure");
+ goto unwind;
+ }
+
+ flag = (local->layout->cnt > 1) ? _gf_true : _gf_false;
+
+ ret = dht_vgetxattr_fill_and_set(local, &dict, this, flag);
+ if (ret)
+ goto unwind;
+
+ DHT_STACK_UNWIND(getxattr, frame, 0, 0, dict, xdata);
+ goto cleanup;
- if (op_ret != -1) {
- ret = dict_get_str (xattr, GF_XATTR_PATHINFO_KEY, &value);
- if (!ret) {
- ret = dict_set_str (xattr, GF_XATTR_LINKINFO_KEY, value);
- if (!ret)
- gf_log (this->name, GF_LOG_TRACE,
- "failed to set linkinfo");
- }
+unwind:
+ DHT_STACK_UNWIND(getxattr, frame, -1, local->op_errno, NULL, NULL);
+cleanup:
+ if (dict)
+ dict_unref(dict);
+
+ return 0;
+}
+
+static int
+dht_linkinfo_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr,
+ dict_t *xdata)
+{
+ int ret = 0;
+ char *value = NULL;
+
+ if (op_ret != -1) {
+ ret = dict_get_str(xattr, GF_XATTR_PATHINFO_KEY, &value);
+ if (!ret) {
+ ret = dict_set_str(xattr, GF_XATTR_LINKINFO_KEY, value);
+ if (!ret)
+ gf_msg_trace(this->name, 0, "failed to set linkinfo");
}
+ }
- DHT_STACK_UNWIND (getxattr, frame, op_ret, op_errno, xattr, xdata);
+ DHT_STACK_UNWIND(getxattr, frame, op_ret, op_errno, xattr, xdata);
- return 0;
+ return 0;
+}
+
+static int
+dht_mds_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(frame->local, err);
+ VALIDATE_OR_GOTO(this->private, err);
+
+ conf = this->private;
+ local = frame->local;
+
+ if (!xattr || (op_ret == -1)) {
+ local->op_ret = op_ret;
+ goto out;
+ }
+ dict_del(xattr, conf->xattr_name);
+ local->op_ret = 0;
+
+ if (!local->xattr) {
+ local->xattr = dict_copy_with_ref(xattr, NULL);
+ }
+
+out:
+ DHT_STACK_UNWIND(getxattr, frame, local->op_ret, op_errno, local->xattr,
+ xdata);
+ return 0;
+err:
+ DHT_STACK_UNWIND(getxattr, frame, -1, EINVAL, NULL, NULL);
+ return 0;
}
int
-dht_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
+dht_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, dict_t *xattr, dict_t *xdata)
{
- int this_call_cnt = 0;
- dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = 0;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (frame->local, out);
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(frame->local, err);
+ VALIDATE_OR_GOTO(this->private, err);
- local = frame->local;
+ conf = this->private;
+ local = frame->local;
- this_call_cnt = dht_frame_return (frame);
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto err;
+ return 0;
+ }
- if (!xattr || (op_ret == -1))
- goto out;
+ LOCK(&frame->lock);
+ {
+ if (!xattr || (op_ret == -1)) {
+ local->op_ret = op_ret;
+ goto unlock;
+ }
+
+ dict_del(xattr, conf->xattr_name);
+ dict_del(xattr, conf->mds_xattr_key);
- if (dict_get (xattr, "trusted.glusterfs.dht")) {
- dict_del (xattr, "trusted.glusterfs.dht");
+ dict_del(xattr, conf->commithash_xattr_name);
+
+ if (frame->root->pid >= 0) {
+ GF_REMOVE_INTERNAL_XATTR("trusted.glusterfs.quota*", xattr);
+ GF_REMOVE_INTERNAL_XATTR("trusted.pgfid*", xattr);
}
+
local->op_ret = 0;
if (!local->xattr) {
- local->xattr = dict_copy_with_ref (xattr, NULL);
+ local->xattr = dict_copy_with_ref(xattr, NULL);
} else {
- /* first aggregate everything into xattr and then copy into
- * local->xattr. This is required as we want to have
- * 'local->xattr' as the proper final dictionary passed above
- * distribute xlator.
- */
- dht_aggregate_xattr (xattr, local->xattr);
- local->xattr = dict_copy (xattr, local->xattr);
+ dht_aggregate_xattr(local->xattr, xattr);
}
-out:
- if (is_last_call (this_call_cnt)) {
- DHT_STACK_UNWIND (getxattr, frame, local->op_ret, op_errno,
- local->xattr, NULL);
+
+ if (!local->xdata) {
+ local->xdata = dict_ref(xdata);
+ } else if ((local->inode && IA_ISDIR(local->inode->ia_type)) ||
+ (local->fd && IA_ISDIR(local->fd->inode->ia_type))) {
+ dht_aggregate_xattr(local->xdata, xdata);
}
- return 0;
+ }
+unlock:
+ UNLOCK(&frame->lock);
+
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt)) {
+ /* If we have a valid xattr received from any one of the
+ * subvolume, let's return it */
+ if (local->xattr) {
+ local->op_ret = 0;
+ }
+
+ DHT_STACK_UNWIND(getxattr, frame, local->op_ret, op_errno, local->xattr,
+ local->xdata);
+ }
+ return 0;
+err:
+ DHT_STACK_UNWIND(getxattr, frame, -1, EINVAL, NULL, NULL);
+ return 0;
}
-int32_t
-dht_getxattr_unwind (call_frame_t *frame,
- int op_ret, int op_errno, dict_t *dict, dict_t *xdata)
+static int32_t
+dht_getxattr_unwind(call_frame_t *frame, int op_ret, int op_errno, dict_t *dict,
+ dict_t *xdata)
{
- DHT_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, xdata);
- return 0;
+ DHT_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
}
+static int
+dht_getxattr_get_real_filename_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ dict_t *xattr, dict_t *xdata)
+{
+ int this_call_cnt = 0;
+ dht_local_t *local = NULL;
-int
-dht_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *key, dict_t *xdata)
-{
- xlator_t *subvol = NULL;
- xlator_t *hashed_subvol = NULL;
- xlator_t *cached_subvol = NULL;
- dht_conf_t *conf = NULL;
- dht_local_t *local = NULL;
- dht_layout_t *layout = NULL;
- xlator_t **sub_volumes = NULL;
- int op_errno = -1;
- int i = 0;
- int cnt = 0;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
- VALIDATE_OR_GOTO (this->private, err);
-
- conf = this->private;
-
- local = dht_local_init (frame, loc, NULL, GF_FOP_GETXATTR);
- if (!local) {
- op_errno = ENOMEM;
+ local = frame->local;
- goto err;
+ LOCK(&frame->lock);
+ {
+ if (local->op_errno == EOPNOTSUPP) {
+ /* Nothing to do here, we have already found
+ * a subvol which does not have the get_real_filename
+ * optimization. If condition is for simple logic.
+ */
+ goto unlock;
}
- layout = local->layout;
- if (!layout) {
- gf_log (this->name, GF_LOG_ERROR,
- "layout is NULL");
- op_errno = ENOENT;
- goto err;
- }
+ if (op_ret == -1) {
+ if (op_errno == EOPNOTSUPP) {
+ /* This subvol does not have the optimization.
+ * Better let the user know we don't support it.
+ * Remove previous results if any.
+ */
- if (key) {
- local->key = gf_strdup (key);
- if (!local->key) {
- op_errno = ENOMEM;
- goto err;
+ if (local->xattr) {
+ dict_unref(local->xattr);
+ local->xattr = NULL;
}
- }
- if (key && ((strcmp (key, GF_XATTR_PATHINFO_KEY) == 0)
- || strcmp (key, GF_XATTR_NODE_UUID_KEY) == 0)) {
- cached_subvol = local->cached_subvol;
- (void) strncpy (local->xsel, key, 256);
+ if (local->xattr_req) {
+ dict_unref(local->xattr_req);
+ local->xattr_req = NULL;
+ }
- local->call_cnt = 1;
- STACK_WIND (frame, dht_vgetxattr_cbk, cached_subvol,
- cached_subvol->fops->getxattr, loc, key, NULL);
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ UNLOCK(&frame->lock);
+ gf_msg(this->name, GF_LOG_WARNING, op_errno,
+ DHT_MSG_UPGRADE_BRICKS,
+ "At least "
+ "one of the bricks does not support "
+ "this operation. Please upgrade all "
+ "bricks.");
+ goto post_unlock;
+ }
+
+ if (op_errno == ENOATTR) {
+ /* Do nothing, our defaults are set to this.
+ */
+ goto unlock;
+ }
+
+ /* This is a place holder for every other error
+ * case. I am not sure of how to interpret
+ * ENOTCONN etc. As of now, choosing to ignore
+ * down subvol and return a good result(if any)
+ * from other subvol.
+ */
+ UNLOCK(&frame->lock);
+ gf_msg(this->name, GF_LOG_WARNING, op_errno,
+ DHT_MSG_GET_XATTR_FAILED, "Failed to get real filename.");
+ goto post_unlock;
+ }
+
+ /* This subvol has the required file.
+ * There could be other subvols which have returned
+ * success already, choosing to return the latest good
+ * result.
+ */
+ if (local->xattr)
+ dict_unref(local->xattr);
+ local->xattr = dict_ref(xattr);
+
+ if (local->xattr_req) {
+ dict_unref(local->xattr_req);
+ local->xattr_req = NULL;
+ }
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
+ local->op_ret = op_ret;
+ local->op_errno = 0;
+ UNLOCK(&frame->lock);
+ gf_msg_debug(this->name, 0, "Found a matching file.");
+ goto post_unlock;
+ }
+unlock:
+ UNLOCK(&frame->lock);
+post_unlock:
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt)) {
+ DHT_STACK_UNWIND(getxattr, frame, local->op_ret, local->op_errno,
+ local->xattr, local->xattr_req);
+ }
+
+ return 0;
+}
- return 0;
- }
- if (key && (strcmp (key, GF_XATTR_LINKINFO_KEY) == 0)) {
- hashed_subvol = dht_subvol_get_hashed (this, loc);
- if (!hashed_subvol) {
- gf_log (this->name, GF_LOG_ERROR, "Failed to get"
- "hashed subvol for %s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+static int
+dht_getxattr_get_real_filename(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *key, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int i = 0;
+ dht_layout_t *layout = NULL;
+ int cnt = 0;
+ xlator_t *subvol = NULL;
- cached_subvol = dht_subvol_get_cached (this, loc->inode);
- if (!cached_subvol) {
- gf_log (this->name, GF_LOG_ERROR, "Failed to get"
- "cached subvol for %s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ local = frame->local;
+ layout = local->layout;
- if (hashed_subvol == cached_subvol) {
- op_errno = ENODATA;
- goto err;
- }
- if (hashed_subvol) {
- STACK_WIND (frame, dht_linkinfo_getxattr_cbk, hashed_subvol,
- hashed_subvol->fops->getxattr, loc,
- GF_XATTR_PATHINFO_KEY, NULL);
- return 0;
- }
- op_errno = ENODATA;
- goto err;
- }
+ cnt = local->call_cnt = layout->cnt;
- if (key && (!strcmp (GF_XATTR_MARKER_KEY, key))
- && (-1 == frame->root->pid)) {
+ local->op_ret = -1;
+ local->op_errno = ENOATTR;
- if (loc->inode-> ia_type == IA_IFDIR) {
- cnt = layout->cnt;
- } else {
- cnt = 1;
- }
- sub_volumes = alloca ( cnt * sizeof (xlator_t *));
- for (i = 0; i < cnt; i++)
- *(sub_volumes + i) = layout->list[i].xlator;
-
- if (cluster_getmarkerattr (frame, this, loc, key,
- local, dht_getxattr_unwind,
- sub_volumes, cnt,
- MARKER_UUID_TYPE, conf->vol_uuid)) {
- op_errno = EINVAL;
- goto err;
- }
+ for (i = 0; i < cnt; i++) {
+ subvol = layout->list[i].xlator;
+ STACK_WIND(frame, dht_getxattr_get_real_filename_cbk, subvol,
+ subvol->fops->getxattr, loc, key, xdata);
+ }
- return 0;
+ return 0;
+}
+
+static int
+dht_marker_populate_args(call_frame_t *frame, int type, int *gauge,
+ xlator_t **subvols)
+{
+ dht_local_t *local = NULL;
+ int i = 0;
+ dht_layout_t *layout = NULL;
+
+ local = frame->local;
+ layout = local->layout;
+
+ for (i = 0; i < layout->cnt; i++)
+ subvols[i] = layout->list[i].xlator;
+
+ return layout->cnt;
+}
+
+static int
+dht_is_debug_xattr_key(const char **array, char *key)
+{
+ int i = 0;
+
+ for (i = 0; array[i]; i++) {
+ if (fnmatch(array[i], key, FNM_NOESCAPE) == 0)
+ return i;
+ }
+
+ return -1;
+}
+
+/* Note we already have frame->local initialised here*/
+
+static int
+dht_handle_debug_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *key)
+{
+ dht_local_t *local = NULL;
+ int ret = -1;
+ int op_errno = ENODATA;
+ char *value = NULL;
+ loc_t file_loc = {0};
+ const char *name = NULL;
+
+ local = frame->local;
+
+ if (dht_is_debug_xattr_key(dht_dbg_vxattrs, (char *)key) == -1) {
+ goto out;
+ }
+
+ local->xattr = dict_new();
+ if (!local->xattr) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ if (strncmp(key, DHT_DBG_HASHED_SUBVOL_KEY,
+ SLEN(DHT_DBG_HASHED_SUBVOL_KEY)) == 0) {
+ name = key + strlen(DHT_DBG_HASHED_SUBVOL_KEY);
+ if (strlen(name) == 0) {
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = dht_build_child_loc(this, &file_loc, loc, (char *)name);
+ if (ret) {
+ op_errno = ENOMEM;
+ goto out;
}
- if (key && *conf->vol_uuid) {
- if ((match_uuid_local (key, conf->vol_uuid) == 0) &&
- (-1 == frame->root->pid)) {
- if (loc->inode-> ia_type == IA_IFDIR) {
- cnt = layout->cnt;
- } else {
- cnt = 1;
- }
- sub_volumes = alloca ( cnt * sizeof (xlator_t *));
- for (i = 0; i < cnt; i++)
- sub_volumes[i] = layout->list[i].xlator;
-
- if (cluster_getmarkerattr (frame, this, loc, key,
- local, dht_getxattr_unwind,
- sub_volumes, cnt,
- MARKER_XTIME_TYPE,
- conf->vol_uuid)) {
- op_errno = EINVAL;
- goto err;
- }
+ local->hashed_subvol = dht_subvol_get_hashed(this, &file_loc);
+ if (local->hashed_subvol == NULL) {
+ op_errno = ENODATA;
+ goto out;
+ }
- return 0;
- }
+ value = gf_strdup(local->hashed_subvol->name);
+ if (!value) {
+ op_errno = ENOMEM;
+ goto out;
}
- if (loc->inode-> ia_type == IA_IFDIR) {
- cnt = local->call_cnt = layout->cnt;
- } else {
- cnt = local->call_cnt = 1;
+ ret = dict_set_dynstr(local->xattr, (char *)key, value);
+ if (ret < 0) {
+ op_errno = -ret;
+ ret = -1;
+ goto out;
}
+ ret = 0;
+ goto out;
+ }
+out:
+ loc_wipe(&file_loc);
+ DHT_STACK_UNWIND(getxattr, frame, ret, op_errno, local->xattr, NULL);
+ return 0;
+}
+
+/* Virtual Xattr which returns 1 if all subvols are up,
+ else returns 0. Geo-rep then uses this virtual xattr
+ after a fresh mount and starts the I/O.
+*/
+
+enum dht_vxattr_subvol {
+ DHT_VXATTR_SUBVOLS_UP = 1,
+ DHT_VXATTR_SUBVOLS_DOWN = 0,
+};
+
+int
+dht_vgetxattr_subvol_status(call_frame_t *frame, xlator_t *this,
+ const char *key)
+{
+ dht_local_t *local = NULL;
+ int ret = -1;
+ int op_errno = ENODATA;
+ int value = DHT_VXATTR_SUBVOLS_UP;
+ int i = 0;
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+ local = frame->local;
+
+ if (!key) {
+ op_errno = EINVAL;
+ goto out;
+ }
+ local->xattr = dict_new();
+ if (!local->xattr) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (!conf->subvolume_status[i]) {
+ value = DHT_VXATTR_SUBVOLS_DOWN;
+ gf_msg_debug(this->name, 0, "subvol %s is down ",
+ conf->subvolumes[i]->name);
+ break;
+ }
+ }
+ ret = dict_set_int8(local->xattr, (char *)key, value);
+ if (ret < 0) {
+ op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+
+out:
+ DHT_STACK_UNWIND(getxattr, frame, ret, op_errno, local->xattr, NULL);
+ return 0;
+}
+
+int
+dht_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key,
+ dict_t *xdata)
+#define DHT_IS_DIR(layout) (layout->cnt > 1)
+{
+ xlator_t *subvol = NULL;
+ xlator_t *hashed_subvol = NULL;
+ xlator_t *mds_subvol = NULL;
+ xlator_t *cached_subvol = NULL;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int op_errno = -1;
+ int i = 0;
+ int cnt = 0;
+ char *node_uuid_key = NULL;
+ int ret = -1;
+
+ GF_CHECK_XATTR_KEY_AND_GOTO(key, IO_THREADS_QUEUE_SIZE_KEY, op_errno, err);
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+ VALIDATE_OR_GOTO(loc->inode, err);
+ VALIDATE_OR_GOTO(this->private, err);
+
+ conf = this->private;
+
+ local = dht_local_init(frame, loc, NULL, GF_FOP_GETXATTR);
+ if (!local) {
+ op_errno = ENOMEM;
+
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!layout) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LAYOUT_NULL,
+ "Layout is NULL");
+ op_errno = ENOENT;
+ goto err;
+ }
+
+ /* skip over code which is irrelevant without a valid key */
+ if (!key)
+ goto no_key;
+
+ local->key = gf_strdup(key);
+ if (!local->key) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ if (strncmp(key, conf->mds_xattr_key, strlen(key)) == 0) {
+ op_errno = ENOTSUP;
+ goto err;
+ }
+
+ if (strncmp(key, DHT_SUBVOL_STATUS_KEY, SLEN(DHT_SUBVOL_STATUS_KEY)) == 0) {
+ dht_vgetxattr_subvol_status(frame, this, key);
+ return 0;
+ }
+
+ /* skip over code which is irrelevant if !DHT_IS_DIR(layout) */
+ if (!DHT_IS_DIR(layout))
+ goto no_dht_is_dir;
+
+ if ((strncmp(key, GF_XATTR_GET_REAL_FILENAME_KEY,
+ SLEN(GF_XATTR_GET_REAL_FILENAME_KEY)) == 0) &&
+ DHT_IS_DIR(layout)) {
+ dht_getxattr_get_real_filename(frame, this, loc, key, xdata);
+ return 0;
+ }
+
+ if (!strcmp(key, GF_REBAL_FIND_LOCAL_SUBVOL)) {
+ ret = gf_asprintf(&node_uuid_key, "%s", GF_XATTR_LIST_NODE_UUIDS_KEY);
+ if (ret == -1 || !node_uuid_key) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_NO_MEMORY,
+ "Failed to copy node uuid key");
+ op_errno = ENOMEM;
+ goto err;
+ }
+ (void)snprintf(local->xsel, sizeof(local->xsel), "%s", node_uuid_key);
+ cnt = local->call_cnt = conf->subvolume_cnt;
+ for (i = 0; i < cnt; i++) {
+ STACK_WIND_COOKIE(frame, dht_find_local_subvol_cbk,
+ conf->subvolumes[i], conf->subvolumes[i],
+ conf->subvolumes[i]->fops->getxattr, loc,
+ node_uuid_key, xdata);
+ }
+ if (node_uuid_key)
+ GF_FREE(node_uuid_key);
+ return 0;
+ }
+
+ if (!strcmp(key, GF_REBAL_OLD_FIND_LOCAL_SUBVOL)) {
+ ret = gf_asprintf(&node_uuid_key, "%s", GF_XATTR_NODE_UUID_KEY);
+ if (ret == -1 || !node_uuid_key) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_NO_MEMORY,
+ "Failed to copy node uuid key");
+ op_errno = ENOMEM;
+ goto err;
+ }
+ (void)snprintf(local->xsel, sizeof(local->xsel), "%s", node_uuid_key);
+ cnt = local->call_cnt = conf->subvolume_cnt;
+ for (i = 0; i < cnt; i++) {
+ STACK_WIND_COOKIE(frame, dht_find_local_subvol_cbk,
+ conf->subvolumes[i], conf->subvolumes[i],
+ conf->subvolumes[i]->fops->getxattr, loc,
+ node_uuid_key, xdata);
+ }
+ if (node_uuid_key)
+ GF_FREE(node_uuid_key);
+ return 0;
+ }
+
+ /* for file use cached subvolume (obviously!): see if {}
+ * below
+ * for directory:
+ * wind to all subvolumes and exclude subvolumes which
+ * return ENOTCONN (in callback)
+ *
+ * NOTE: Don't trust inode here, as that may not be valid
+ * (until inode_link() happens)
+ */
+
+ if (XATTR_IS_PATHINFO(key) || (strcmp(key, GF_XATTR_NODE_UUID_KEY) == 0) ||
+ (strcmp(key, GF_XATTR_LIST_NODE_UUIDS_KEY) == 0)) {
+ (void)snprintf(local->xsel, sizeof(local->xsel), "%s", key);
+ cnt = local->call_cnt = layout->cnt;
for (i = 0; i < cnt; i++) {
- subvol = layout->list[i].xlator;
- STACK_WIND (frame, dht_getxattr_cbk,
- subvol, subvol->fops->getxattr,
- loc, key, NULL);
+ subvol = layout->list[i].xlator;
+ STACK_WIND(frame, dht_vgetxattr_dir_cbk, subvol,
+ subvol->fops->getxattr, loc, key, xdata);
}
return 0;
+ }
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL, NULL);
+no_dht_is_dir:
+ /* node-uuid or pathinfo for files */
+ if (XATTR_IS_PATHINFO(key) || (strcmp(key, GF_XATTR_NODE_UUID_KEY) == 0)) {
+ cached_subvol = local->cached_subvol;
+ (void)snprintf(local->xsel, sizeof(local->xsel), "%s", key);
+ local->call_cnt = 1;
+ STACK_WIND_COOKIE(frame, dht_vgetxattr_cbk, cached_subvol,
+ cached_subvol, cached_subvol->fops->getxattr, loc,
+ key, xdata);
return 0;
-}
+ }
-int
-dht_fgetxattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, const char *key, dict_t *xdata)
-{
- xlator_t *subvol = NULL;
- dht_local_t *local = NULL;
- dht_layout_t *layout = NULL;
- int op_errno = -1;
- int i = 0;
- int cnt = 0;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
- VALIDATE_OR_GOTO (fd->inode, err);
- VALIDATE_OR_GOTO (this->private, err);
-
- local = dht_local_init (frame, NULL, fd, GF_FOP_FGETXATTR);
- if (!local) {
- op_errno = ENOMEM;
-
- goto err;
+ if (strcmp(key, GF_XATTR_LINKINFO_KEY) == 0) {
+ hashed_subvol = dht_subvol_get_hashed(this, loc);
+ if (!hashed_subvol) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "Failed to get hashed subvol for %s", loc->path);
+ op_errno = EINVAL;
+ goto err;
}
- layout = local->layout;
- if (!layout) {
- gf_log (this->name, GF_LOG_ERROR,
- "layout is NULL");
- op_errno = ENOENT;
- goto err;
+ cached_subvol = dht_subvol_get_cached(this, loc->inode);
+ if (!cached_subvol) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_CACHED_SUBVOL_GET_FAILED,
+ "Failed to get cached subvol for %s", loc->path);
+ op_errno = EINVAL;
+ goto err;
}
- if (key) {
- local->key = gf_strdup (key);
- if (!local->key) {
- op_errno = ENOMEM;
- goto err;
- }
+ if (hashed_subvol == cached_subvol) {
+ op_errno = ENODATA;
+ goto err;
}
- if (fd->inode->ia_type == IA_IFDIR) {
- cnt = local->call_cnt = layout->cnt;
+ STACK_WIND(frame, dht_linkinfo_getxattr_cbk, hashed_subvol,
+ hashed_subvol->fops->getxattr, loc, GF_XATTR_PATHINFO_KEY,
+ xdata);
+ return 0;
+ }
+
+ if (dht_is_debug_xattr_key(dht_dbg_vxattrs, (char *)key) >= 0) {
+ dht_handle_debug_getxattr(frame, this, loc, key);
+ return 0;
+ }
+
+no_key:
+ if (cluster_handle_marker_getxattr(frame, loc, key, conf->vol_uuid,
+ dht_getxattr_unwind,
+ dht_marker_populate_args) == 0)
+ return 0;
+
+ if (DHT_IS_DIR(layout)) {
+ local->call_cnt = conf->subvolume_cnt;
+ cnt = conf->subvolume_cnt;
+ ret = dht_inode_ctx_mdsvol_get(loc->inode, this, &mds_subvol);
+ if (!mds_subvol) {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "Cannot determine MDS, fetching xattr %s randomly"
+ " from a subvol for path %s ",
+ key, loc->path);
} else {
- cnt = local->call_cnt = 1;
+ /* TODO need to handle it, As of now we are
+ choosing availability instead of chossing
+ consistencty, in case of mds_subvol is
+ down winding a getxattr call on other subvol
+ and return xattr
+ */
+ local->mds_subvol = mds_subvol;
+ for (i = 0; i < cnt; i++) {
+ if (conf->subvolumes[i] == mds_subvol) {
+ if (!conf->subvolume_status[i]) {
+ gf_msg(this->name, GF_LOG_INFO, 0,
+ DHT_MSG_HASHED_SUBVOL_DOWN,
+ "MDS %s is down for path"
+ " path %s so fetching xattr "
+ "%s randomly from a subvol ",
+ local->mds_subvol->name, loc->path, key);
+ ret = 1;
+ }
+ }
+ }
}
- for (i = 0; i < cnt; i++) {
- subvol = layout->list[i].xlator;
- STACK_WIND (frame, dht_getxattr_cbk,
- subvol, subvol->fops->fgetxattr,
- fd, key, NULL);
+ if (!ret && key && local->mds_subvol && dht_match_xattr(key)) {
+ STACK_WIND(frame, dht_mds_getxattr_cbk, local->mds_subvol,
+ local->mds_subvol->fops->getxattr, loc, key, xdata);
+
+ return 0;
}
- return 0;
+ } else {
+ cnt = local->call_cnt = 1;
+ }
+
+ for (i = 0; i < cnt; i++) {
+ subvol = layout->list[i].xlator;
+ STACK_WIND(frame, dht_getxattr_cbk, subvol, subvol->fops->getxattr, loc,
+ key, xdata);
+ }
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fgetxattr, frame, -1, op_errno, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(getxattr, frame, -1, op_errno, NULL, NULL);
- return 0;
+ return 0;
}
+#undef DHT_IS_DIR
int
-dht_fsetxattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, dict_t *xattr, int flags, dict_t *xdata)
+dht_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *key,
+ dict_t *xdata)
{
- xlator_t *subvol = NULL;
- dht_local_t *local = NULL;
- int op_errno = EINVAL;
- data_pair_t *trav = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
- VALIDATE_OR_GOTO (fd->inode, err);
-
- GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.dht*", xattr,
- trav, op_errno, err);
-
- local = dht_local_init (frame, NULL, fd, GF_FOP_FSETXATTR);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
+ xlator_t *subvol = NULL;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int op_errno = -1;
+ int i = 0;
+ int cnt = 0;
+ xlator_t *mds_subvol = NULL;
+ int ret = -1;
+ dht_conf_t *conf = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
+ VALIDATE_OR_GOTO(fd->inode, err);
+ VALIDATE_OR_GOTO(this->private, err);
+
+ conf = this->private;
+
+ local = dht_local_init(frame, NULL, fd, GF_FOP_FGETXATTR);
+ if (!local) {
+ op_errno = ENOMEM;
+
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!layout) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LAYOUT_NULL,
+ "Layout is NULL");
+ op_errno = ENOENT;
+ goto err;
+ }
+
+ if (key) {
+ local->key = gf_strdup(key);
+ if (!local->key) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ }
+
+ gf_uuid_unparse(fd->inode->gfid, gfid);
+
+ if ((fd->inode->ia_type == IA_IFDIR) && key &&
+ (strncmp(key, GF_XATTR_LOCKINFO_KEY, SLEN(GF_XATTR_LOCKINFO_KEY)) !=
+ 0)) {
+ local->call_cnt = conf->subvolume_cnt;
+ cnt = conf->subvolume_cnt;
+ ret = dht_inode_ctx_mdsvol_get(fd->inode, this, &mds_subvol);
+
+ if (!mds_subvol) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "cannot determine MDS, fetching xattr %s "
+ " randomly from a subvol for gfid %s ",
+ key, gfid);
+ } else {
+ /* TODO need to handle it, As of now we are
+ choosing availability instead of chossing
+ consistencty, in case of hashed_subvol is
+ down winding a getxattr call on other subvol
+ and return xattr
+ */
+ local->mds_subvol = mds_subvol;
+ for (i = 0; i < cnt; i++) {
+ if (conf->subvolumes[i] == mds_subvol) {
+ if (!conf->subvolume_status[i]) {
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_HASHED_SUBVOL_DOWN,
+ "MDS subvolume %s is down"
+ " for gfid %s so fetching xattr "
+ " %s randomly from a subvol ",
+ local->mds_subvol->name, gfid, key);
+ ret = 1;
+ }
+ }
+ }
}
- subvol = local->cached_subvol;
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ if (!ret && key && local->mds_subvol && dht_match_xattr(key)) {
+ STACK_WIND(frame, dht_mds_getxattr_cbk, local->mds_subvol,
+ local->mds_subvol->fops->fgetxattr, fd, key, NULL);
- local->call_cnt = 1;
+ return 0;
+ }
- STACK_WIND (frame, dht_err_cbk, subvol, subvol->fops->fsetxattr,
- fd, xattr, flags, NULL);
+ } else {
+ cnt = local->call_cnt = 1;
+ }
- return 0;
+ for (i = 0; i < cnt; i++) {
+ subvol = layout->list[i].xlator;
+ STACK_WIND(frame, dht_getxattr_cbk, subvol, subvol->fops->fgetxattr, fd,
+ key, NULL);
+ }
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(fgetxattr, frame, -1, op_errno, NULL, NULL);
- return 0;
+ return 0;
}
-
static int
-dht_common_setxattr_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xdata)
+dht_setxattr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
- DHT_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata);
+ dht_local_t *local = NULL;
+ int op_errno = EINVAL;
+
+ if (!frame || !frame->local)
+ goto err;
+
+ local = frame->local;
+ op_errno = local->op_errno;
+ if (we_are_not_migrating(ret)) {
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
+ */
+ DHT_STACK_UNWIND(setxattr, frame, local->op_ret, local->op_errno,
+ local->rebalance.xdata);
return 0;
+ }
+
+ if (subvol == NULL)
+ goto err;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ if (local->fop == GF_FOP_SETXATTR) {
+ STACK_WIND_COOKIE(frame, dht_file_setxattr_cbk, subvol, subvol,
+ subvol->fops->setxattr, &local->loc,
+ local->rebalance.xattr, local->rebalance.flags,
+ local->xattr_req);
+ } else {
+ STACK_WIND_COOKIE(frame, dht_file_setxattr_cbk, subvol, subvol,
+ subvol->fops->fsetxattr, local->fd,
+ local->rebalance.xattr, local->rebalance.flags,
+ local->xattr_req);
+ }
+
+ return 0;
+
+err:
+ DHT_STACK_UNWIND(setxattr, frame, (local ? local->op_ret : -1), op_errno,
+ NULL);
+ return 0;
}
int
-dht_checking_pathinfo_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xattr,
- dict_t *xdata)
+dht_file_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
{
- int i = -1;
- int ret = -1;
- char *value = NULL;
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- call_frame_t *prev = NULL;
- int this_call_cnt = 0;
+ int ret = -1;
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ struct iatt *stbuf = NULL;
+ inode_t *inode = NULL;
+ xlator_t *subvol1 = NULL, *subvol2 = NULL;
- local = frame->local;
- prev = cookie;
- conf = this->private;
+ local = frame->local;
+ prev = cookie;
- if (op_ret == -1)
- goto out;
+ local->op_errno = op_errno;
-
- ret = dict_get_str (xattr, GF_XATTR_PATHINFO_KEY, &value);
+ if ((local->fop == GF_FOP_FSETXATTR) &&
+ dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
if (ret)
- goto out;
+ goto out;
+ return 0;
+ }
- if (!strcmp (value, local->key)) {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (conf->subvolumes[i] == prev->this)
- conf->decommissioned_bricks[i] = prev->this;
- }
+ if ((op_ret == -1) && !dht_inode_missing(op_errno)) {
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1.",
+ prev->name);
+ goto out;
+ }
+
+ if (local->call_cnt != 1)
+ goto out;
+
+ ret = dict_get_bin(xdata, DHT_IATT_IN_XDATA_KEY, (void **)&stbuf);
+
+ if ((!op_ret) && !stbuf) {
+ goto out;
+ }
+
+ local->op_ret = op_ret;
+ local->rebalance.target_op_fn = dht_setxattr2;
+ if (xdata)
+ local->rebalance.xdata = dict_ref(xdata);
+
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2(stbuf)) {
+ ret = dht_rebalance_complete_check(this, frame);
+ if (!ret)
+ return 0;
+ }
+
+ /* Phase 1 of migration */
+ if (IS_DHT_MIGRATION_PHASE1(stbuf)) {
+ inode = (local->fd) ? local->fd->inode : local->loc.inode;
+
+ ret = dht_inode_ctx_get_mig_info(this, inode, &subvol1, &subvol2);
+ if (!dht_mig_info_is_invalid(local->cached_subvol, subvol1, subvol2)) {
+ dht_setxattr2(this, subvol2, frame, 0);
+ return 0;
}
+ ret = dht_rebalance_in_progress_check(this, frame);
+ if (!ret)
+ return 0;
+ }
+
out:
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- DHT_STACK_UNWIND (setxattr, frame, local->op_ret, ENOTSUP, NULL);
- }
- return 0;
+ if (local->fop == GF_FOP_SETXATTR) {
+ DHT_STACK_UNWIND(setxattr, frame, op_ret, op_errno, xdata);
+ } else {
+ DHT_STACK_UNWIND(fsetxattr, frame, op_ret, op_errno, xdata);
+ }
+
+ return 0;
}
-int
-dht_setxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xattr, int flags, dict_t *xdata)
-{
- xlator_t *subvol = NULL;
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- dht_layout_t *layout = NULL;
- int i = 0;
- int op_errno = EINVAL;
- int ret = -1;
- data_t *tmp = NULL;
- uint32_t dir_spread = 0;
- char value[4096] = {0,};
- gf_dht_migrate_data_type_t forced_rebalance = GF_DHT_MIGRATE_DATA;
- int call_cnt = 0;
- data_pair_t *trav = NULL;
-
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.dht*", xattr,
- trav, op_errno, err);
-
- conf = this->private;
- local = dht_local_init (frame, loc, NULL, GF_FOP_SETXATTR);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+/* Function is call by dict_foreach_fnmatch if key is match with
+ user.* and set boolean flag to true
+*/
+static int
+dht_is_user_xattr(dict_t *this, char *key, data_t *value, void *data)
+{
+ gf_boolean_t *user_xattr_found = data;
+ *user_xattr_found = _gf_true;
+ return 0;
+}
- subvol = local->cached_subvol;
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
+/* Common code to wind a (f)(set|remove)xattr call to set xattr on directory
+ */
+static int
+dht_dir_common_set_remove_xattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ fd_t *fd, dict_t *xattr, int flags,
+ dict_t *xdata, int *op_errno)
+
+{
+ dict_t *xattrop = NULL;
+ int32_t subone[1] = {-1};
+ gf_boolean_t uxattr_key_found = _gf_false;
+ xlator_t *mds_subvol = NULL;
+ xlator_t *travvol = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+ int i = 0;
+ int call_cnt = 0;
+ dht_local_t *local = NULL;
+ char gfid_local[GF_UUID_BUF_SIZE] = {0};
+ char **xattrs_to_heal;
+
+ conf = this->private;
+ local = frame->local;
+ call_cnt = conf->subvolume_cnt;
+ local->flags = flags;
+ xattrs_to_heal = get_xattrs_to_heal();
+
+ if (!gf_uuid_is_null(local->gfid)) {
+ gf_uuid_unparse(local->gfid, gfid_local);
+ }
+
+ if ((local->fop == GF_FOP_SETXATTR) || (local->fop == GF_FOP_FSETXATTR)) {
+ /* Check if any user xattr present in xattr
+ */
+ dict_foreach_fnmatch(xattr, "user*", dht_is_user_xattr,
+ &uxattr_key_found);
+
+ /* Check if any custom key xattr present in dict xattr
+ and start index from 1 because user xattr already
+ checked in previous line
+ */
+ for (i = 1; xattrs_to_heal[i]; i++)
+ if (dict_get(xattr, xattrs_to_heal[i]))
+ uxattr_key_found = _gf_true;
+ }
+
+ if ((local->fop == GF_FOP_REMOVEXATTR) ||
+ (local->fop == GF_FOP_FREMOVEXATTR)) {
+ /* Check if any custom key xattr present in local->key
+ */
+ for (i = 0; xattrs_to_heal[i]; i++)
+ if (strstr(local->key, xattrs_to_heal[i]))
+ uxattr_key_found = _gf_true;
+ }
+
+ /* If there is no custom key xattr present or gfid is root
+ or call_cnt is 1 then wind a (f)setxattr call on all subvols
+ */
+ if (!uxattr_key_found || __is_root_gfid(local->gfid) || call_cnt == 1) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ travvol = conf->subvolumes[i];
+ if ((local->fop == GF_FOP_SETXATTR) ||
+ (local->fop == GF_FOP_FSETXATTR)) {
+ if (fd) {
+ STACK_WIND_COOKIE(frame, dht_err_cbk, travvol, travvol,
+ travvol->fops->fsetxattr, fd, xattr,
+ flags, xdata);
+ } else {
+ STACK_WIND_COOKIE(frame, dht_err_cbk, travvol, travvol,
+ travvol->fops->setxattr, loc, xattr,
+ flags, xdata);
+ }
+ }
+
+ if ((local->fop == GF_FOP_REMOVEXATTR) ||
+ (local->fop == GF_FOP_FREMOVEXATTR)) {
+ if (fd) {
+ STACK_WIND_COOKIE(frame, dht_err_cbk, travvol, travvol,
+ travvol->fops->fremovexattr, fd,
+ local->key, local->xattr_req);
+ } else {
+ STACK_WIND_COOKIE(frame, dht_err_cbk, travvol, travvol,
+ travvol->fops->removexattr, loc,
+ local->key, local->xattr_req);
+ }
+ }
}
- layout = local->layout;
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no layout for path=%s", loc->path);
- op_errno = EINVAL;
+ return 0;
+ }
+
+ /* Calculate hash subvol based on inode and parent inode
+ */
+ if (fd) {
+ ret = dht_inode_ctx_mdsvol_get(fd->inode, this, &mds_subvol);
+ } else {
+ ret = dht_inode_ctx_mdsvol_get(loc->inode, this, &mds_subvol);
+ }
+ if (ret || !mds_subvol) {
+ if (fd) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "Failed to get mds subvol for fd %p"
+ "gfid is %s ",
+ fd, gfid_local);
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "%s: Failed to get mds subvol. (gfid is %s)", loc->path,
+ gfid_local);
+ }
+ (*op_errno) = ENOENT;
+ goto err;
+ }
+
+ local->mds_subvol = mds_subvol;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == mds_subvol) {
+ if (!conf->subvolume_status[i]) {
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_HASHED_SUBVOL_DOWN,
+ "MDS subvol is down for path "
+ " %s gfid is %s Unable to set xattr ",
+ local->loc.path, gfid_local);
+ (*op_errno) = ENOTCONN;
goto err;
+ }
+ }
+ }
+
+ if (uxattr_key_found) {
+ xattrop = dict_new();
+ if (!xattrop) {
+ gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, 0,
+ "dictionary creation failed for path %s "
+ "for gfid is %s ",
+ local->loc.path, gfid_local);
+ (*op_errno) = ENOMEM;
+ goto err;
+ }
+ local->xattr = dict_ref(xattr);
+ /* Subtract current MDS xattr value to -1 , value of MDS
+ xattr represents no. of times xattr modification failed
+ on non MDS subvols.
+ */
+ ret = dht_dict_set_array(xattrop, conf->mds_xattr_key, subone, 1);
+ if (ret != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "dictionary set array failed for path %s "
+ "for gfid is %s ",
+ local->loc.path, gfid_local);
+ if (xattrop)
+ dict_unref(xattrop);
+ (*op_errno) = ret;
+ goto err;
+ }
+ /* Wind a xattrop call to use ref counting approach
+ update mds xattr to -1 before update xattr on
+ hashed subvol and update mds xattr to +1 after update
+ xattr on all non hashed subvol
+ */
+ if (fd) {
+ STACK_WIND(frame, dht_xattrop_mds_cbk, local->mds_subvol,
+ local->mds_subvol->fops->fxattrop, fd,
+ GF_XATTROP_ADD_ARRAY, xattrop, NULL);
+ } else {
+ STACK_WIND(frame, dht_xattrop_mds_cbk, local->mds_subvol,
+ local->mds_subvol->fops->xattrop, loc,
+ GF_XATTROP_ADD_ARRAY, xattrop, NULL);
}
+ if (xattrop)
+ dict_unref(xattrop);
+ }
- local->call_cnt = call_cnt = layout->cnt;
+ return 0;
+err:
+ return -1;
+}
- /* This key is sent by Unified File and Object storage
- * to test xattr support in backend.
- */
- tmp = dict_get (xattr, "user.ufo-test");
- if (tmp) {
- if (IA_ISREG (loc->inode->ia_type)) {
- op_errno = ENOTSUP;
- goto err;
- }
- local->op_ret = 0;
- for (i = 0; i < call_cnt; i++) {
- STACK_WIND (frame, dht_ufo_xattr_cbk,
- layout->list[i].xlator,
- layout->list[i].xlator->fops->setxattr,
- loc, xattr, flags, NULL);
- }
- return 0;
+int
+dht_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xattr,
+ int flags, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ dht_local_t *local = NULL;
+ int op_errno = EINVAL;
+ dht_conf_t *conf = NULL;
+ dht_layout_t *layout = NULL;
+ int ret = -1;
+ int call_cnt = 0;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
+ VALIDATE_OR_GOTO(fd->inode, err);
+ VALIDATE_OR_GOTO(this->private, err);
+
+ conf = this->private;
+
+ if (!conf->defrag)
+ GF_IF_INTERNAL_XATTR_GOTO(conf->wild_xattr_name, xattr, op_errno, err);
+
+ local = dht_local_init(frame, NULL, fd, GF_FOP_FSETXATTR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!layout) {
+ gf_msg_debug(this->name, 0, "no layout for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->xattr_req = xdata ? dict_ref(xdata) : dict_new();
+ local->call_cnt = call_cnt = layout->cnt;
+
+ if (IA_ISDIR(fd->inode->ia_type)) {
+ local->hashed_subvol = NULL;
+ ret = dht_dir_common_set_remove_xattr(frame, this, NULL, fd, xattr,
+ flags, xdata, &op_errno);
+ if (ret)
+ goto err;
+ } else {
+ local->call_cnt = 1;
+ local->rebalance.xattr = dict_ref(xattr);
+ local->rebalance.flags = flags;
+
+ ret = dict_set_int8(local->xattr_req, DHT_IATT_IN_XDATA_KEY, 1);
+ if (ret) {
+ gf_msg_debug(this->name, 0,
+ "Failed to set dictionary key %s for fd=%p",
+ DHT_IATT_IN_XDATA_KEY, fd);
}
- tmp = dict_get (xattr, "distribute.migrate-data");
- if (tmp) {
- if (IA_ISDIR (loc->inode->ia_type)) {
- op_errno = ENOTSUP;
- goto err;
- }
+ STACK_WIND_COOKIE(frame, dht_file_setxattr_cbk, subvol, subvol,
+ subvol->fops->fsetxattr, fd, xattr, flags,
+ local->xattr_req);
+ }
+ return 0;
- /* TODO: need to interpret the 'value' for more meaning
- (ie, 'target' subvolume given there, etc) */
- memcpy (value, tmp->data, tmp->len);
- if (strcmp (value, "force") == 0)
- forced_rebalance =
- GF_DHT_MIGRATE_DATA_EVEN_IF_LINK_EXISTS;
-
- if (conf->decommission_in_progress)
- forced_rebalance = GF_DHT_MIGRATE_HARDLINK;
-
- local->rebalance.target_node = dht_subvol_get_hashed (this, loc);
- if (!local->rebalance.target_node) {
- gf_log (this->name, GF_LOG_ERROR, "Failed to get "
- "hashed subvol for %s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(fsetxattr, frame, -1, op_errno, NULL);
- local->rebalance.from_subvol = local->cached_subvol;
+ return 0;
+}
- if (local->rebalance.target_node == local->rebalance.from_subvol) {
- op_errno = EEXIST;
- goto err;
- }
- if (local->rebalance.target_node) {
- local->flags = forced_rebalance;
+static int
+dht_checking_pathinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr,
+ dict_t *xdata)
+{
+ int i = -1;
+ int ret = -1;
+ char *value = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ xlator_t *prev = NULL;
+ int this_call_cnt = 0;
+
+ local = frame->local;
+ prev = cookie;
+ conf = this->private;
+
+ if (op_ret == -1)
+ goto out;
+
+ ret = dict_get_str(xattr, GF_XATTR_PATHINFO_KEY, &value);
+ if (ret)
+ goto out;
+
+ if (!strcmp(value, local->key)) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == prev)
+ conf->decommissioned_bricks[i] = prev;
+ }
+ }
+
+out:
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt)) {
+ DHT_STACK_UNWIND(setxattr, frame, local->op_ret, ENOTSUP, NULL);
+ }
+ return 0;
+}
- ret = dht_start_rebalance_task (this, frame);
- if (!ret)
- return 0;
+static int
+dht_nuke_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT(setxattr, frame, op_ret, op_errno, NULL);
+ return 0;
+}
- gf_log (this->name, GF_LOG_ERROR,
- "%s: failed to create a new synctask",
- loc->path);
- }
- op_errno = EINVAL;
- goto err;
+static int
+dht_nuke_dir(call_frame_t *frame, xlator_t *this, loc_t *loc, data_t *tmp)
+{
+ if (!IA_ISDIR(loc->inode->ia_type)) {
+ DHT_STACK_UNWIND(setxattr, frame, -1, ENOTSUP, NULL);
+ return 0;
+ }
- }
+ /* Setxattr didn't need the parent, but rmdir does. */
+ loc->parent = inode_parent(loc->inode, NULL, NULL);
+ if (!loc->parent) {
+ DHT_STACK_UNWIND(setxattr, frame, -1, ENOENT, NULL);
+ return 0;
+ }
+ gf_uuid_copy(loc->pargfid, loc->parent->gfid);
+
+ if (!loc->name && loc->path) {
+ loc->name = strrchr(loc->path, '/');
+ if (loc->name) {
+ ++(loc->name);
+ }
+ }
+
+ /*
+ * We do this instead of calling dht_rmdir_do directly for two reasons.
+ * The first is that we want to reuse all of the initialization that
+ * dht_rmdir does, so if it ever changes we'll just follow along. The
+ * second (i.e. why we don't use STACK_WIND_TAIL) is so that we don't
+ * obscure the fact that we came in via this path instead of a genuine
+ * rmdir. That makes debugging just a tiny bit easier.
+ */
+ STACK_WIND(frame, dht_nuke_dir_cbk, this, this->fops->rmdir, loc, 1, NULL);
+
+ return 0;
+}
- tmp = dict_get (xattr, "decommission-brick");
- if (tmp) {
- /* This operation should happen only on '/' */
- if (!__is_root_gfid (loc->inode->gfid)) {
- op_errno = ENOTSUP;
- goto err;
- }
+int
+dht_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr,
+ int flags, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ dht_methods_t *methods = NULL;
+ dht_layout_t *layout = NULL;
+ int i = 0;
+ int op_errno = EINVAL;
+ int ret = -1;
+ data_t *tmp = NULL;
+ uint32_t dir_spread = 0;
+ char value[4096] = {
+ 0,
+ };
+ gf_dht_migrate_data_type_t forced_rebalance = GF_DHT_MIGRATE_DATA;
+ int call_cnt = 0;
+ uint32_t new_hash = 0;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+ VALIDATE_OR_GOTO(loc->inode, err);
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, err);
+
+ methods = &(conf->methods);
+
+ /* Rebalance daemon is allowed to set internal keys */
+ if (!conf->defrag)
+ GF_IF_INTERNAL_XATTR_GOTO(conf->wild_xattr_name, xattr, op_errno, err);
+
+ local = dht_local_init(frame, loc, NULL, GF_FOP_SETXATTR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for path=%s",
+ loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!layout) {
+ gf_msg_debug(this->name, 0, "no layout for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->call_cnt = call_cnt = layout->cnt;
+ tmp = dict_get(xattr, conf->mds_xattr_key);
+ if (tmp) {
+ op_errno = ENOTSUP;
+ goto err;
+ }
+
+ tmp = dict_get(xattr, GF_XATTR_FILE_MIGRATE_KEY);
+ if (tmp) {
+ if (IA_ISDIR(loc->inode->ia_type)) {
+ op_errno = ENOTSUP;
+ goto err;
+ }
+
+ /* TODO: need to interpret the 'value' for more meaning
+ (ie, 'target' subvolume given there, etc) */
+ memcpy(value, tmp->data, tmp->len);
+ if (strcmp(value, "force") == 0)
+ forced_rebalance = GF_DHT_MIGRATE_DATA_EVEN_IF_LINK_EXISTS;
+
+ if (conf->decommission_in_progress)
+ forced_rebalance = GF_DHT_MIGRATE_HARDLINK;
+
+ if (!loc->path) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (!local->loc.name)
+ local->loc.name = strrchr(local->loc.path, '/') + 1;
+
+ if (!local->loc.parent)
+ local->loc.parent = inode_parent(local->loc.inode, NULL, NULL);
+
+ if ((!local->loc.name) || (!local->loc.parent)) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (gf_uuid_is_null(local->loc.pargfid))
+ gf_uuid_copy(local->loc.pargfid, local->loc.parent->gfid);
+
+ methods->migration_get_dst_subvol(this, local);
+
+ if (!local->rebalance.target_node) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "Failed to get hashed subvol for %s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->rebalance.from_subvol = local->cached_subvol;
+
+ if (local->rebalance.target_node == local->rebalance.from_subvol) {
+ op_errno = EEXIST;
+ goto err;
+ }
+ if (local->rebalance.target_node) {
+ local->flags = forced_rebalance;
- memcpy (value, tmp->data, ((tmp->len < 4095) ? tmp->len : 4095));
- local->key = gf_strdup (value);
- local->call_cnt = conf->subvolume_cnt;
+ frame->root->pid = GF_CLIENT_PID_DEFRAG;
- for (i = 0 ; i < conf->subvolume_cnt; i++) {
- /* Get the pathinfo, and then compare */
- STACK_WIND (frame, dht_checking_pathinfo_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->getxattr,
- loc, GF_XATTR_PATHINFO_KEY, NULL);
- }
+ ret = dht_start_rebalance_task(this, frame);
+ if (!ret)
return 0;
+
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_REBALANCE_START_FAILED,
+ "%s: failed to create a new rebalance synctask", loc->path);
}
+ op_errno = EINVAL;
+ goto err;
+ }
- tmp = dict_get (xattr, GF_XATTR_FIX_LAYOUT_KEY);
- if (tmp) {
- gf_log (this->name, GF_LOG_INFO,
- "fixing the layout of %s", loc->path);
+ tmp = dict_get(xattr, "decommission-brick");
+ if (tmp) {
+ /* This operation should happen only on '/' */
+ if (!__is_root_gfid(loc->inode->gfid)) {
+ op_errno = ENOTSUP;
+ goto err;
+ }
- ret = dht_fix_directory_layout (frame, dht_common_setxattr_cbk,
- layout);
- if (ret) {
- op_errno = ENOTCONN;
- goto err;
- }
- return ret;
+ memcpy(value, tmp->data, min(tmp->len, 4095));
+ local->key = gf_strdup(value);
+ local->call_cnt = conf->subvolume_cnt;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ /* Get the pathinfo, and then compare */
+ STACK_WIND_COOKIE(frame, dht_checking_pathinfo_cbk,
+ conf->subvolumes[i], conf->subvolumes[i],
+ conf->subvolumes[i]->fops->getxattr, loc,
+ GF_XATTR_PATHINFO_KEY, NULL);
}
+ return 0;
+ }
- tmp = dict_get (xattr, "distribute.directory-spread-count");
- if (tmp) {
- /* Setxattr value is packed as 'binary', not string */
- memcpy (value, tmp->data, ((tmp->len < 4095)?tmp->len:4095));
- ret = gf_string2uint32 (value, &dir_spread);
- if (!ret && ((dir_spread <= conf->subvolume_cnt) &&
- (dir_spread > 0))) {
- layout->spread_cnt = dir_spread;
-
- ret = dht_fix_directory_layout (frame,
- dht_common_setxattr_cbk,
- layout);
- if (ret) {
- op_errno = ENOTCONN;
- goto err;
- }
- return ret;
- }
- gf_log (this->name, GF_LOG_ERROR,
- "wrong 'directory-spread-count' value (%s)", value);
- op_errno = ENOTSUP;
+ tmp = dict_get(xattr, GF_XATTR_FIX_LAYOUT_KEY);
+ if (tmp) {
+ ret = dict_get_uint32(xattr, "new-commit-hash", &new_hash);
+ if (ret == 0) {
+ gf_msg_debug(this->name, 0,
+ "updating commit hash for %s from %u to %u",
+ uuid_utoa(loc->gfid), layout->commit_hash, new_hash);
+ layout->commit_hash = new_hash;
+
+ ret = dht_update_commit_hash_for_layout(frame);
+ if (ret) {
+ op_errno = ENOTCONN;
goto err;
+ }
+ return ret;
}
- for (i = 0; i < call_cnt; i++) {
- STACK_WIND (frame, dht_err_cbk,
- layout->list[i].xlator,
- layout->list[i].xlator->fops->setxattr,
- loc, xattr, flags, xdata);
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_FIX_LAYOUT_INFO,
+ "fixing the layout of %s", loc->path);
+
+ ret = dht_fix_directory_layout(frame, dht_fix_layout_setxattr_cbk,
+ layout);
+ if (ret) {
+ op_errno = ENOTCONN;
+ goto err;
}
+ return ret;
+ }
+
+ tmp = dict_get(xattr, "distribute.directory-spread-count");
+ if (tmp) {
+ /* Setxattr value is packed as 'binary', not string */
+ memcpy(value, tmp->data, min(tmp->len, 4095));
+ ret = gf_string2uint32(value, &dir_spread);
+ if (!ret && ((dir_spread <= conf->subvolume_cnt) && (dir_spread > 0))) {
+ layout->spread_cnt = dir_spread;
+
+ ret = dht_fix_directory_layout(frame, dht_common_setxattr_cbk,
+ layout);
+ if (ret) {
+ op_errno = ENOTCONN;
+ goto err;
+ }
+ return ret;
+ }
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_OPERATION_NOT_SUP,
+ "wrong 'directory-spread-count' value (%s)", value);
+ op_errno = ENOTSUP;
+ goto err;
+ }
+
+ tmp = dict_get(xattr, "glusterfs.dht.nuke");
+ if (tmp) {
+ return dht_nuke_dir(frame, this, loc, tmp);
+ }
+ local->xattr_req = xdata ? dict_ref(xdata) : dict_new();
+
+ if (IA_ISDIR(loc->inode->ia_type)) {
+ local->hashed_subvol = NULL;
+ ret = dht_dir_common_set_remove_xattr(frame, this, loc, NULL, xattr,
+ flags, xdata, &op_errno);
+ if (ret)
+ goto err;
+ } else {
+ local->rebalance.xattr = dict_ref(xattr);
+ local->rebalance.flags = flags;
+ local->call_cnt = 1;
- return 0;
+ ret = dict_set_int8(local->xattr_req, DHT_IATT_IN_XDATA_KEY, 1);
+
+ STACK_WIND_COOKIE(frame, dht_file_setxattr_cbk, subvol, subvol,
+ subvol->fops->setxattr, loc, xattr, flags,
+ local->xattr_req);
+ }
+
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(setxattr, frame, -1, op_errno, NULL);
- return 0;
+ return 0;
}
-
-int
-dht_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xdata)
+static int
+dht_removexattr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
+ dht_local_t *local = NULL;
+ int op_errno = EINVAL;
- local = frame->local;
- prev = cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto unlock;
- }
+ if (!frame || !frame->local)
+ goto err;
- local->op_ret = 0;
- }
-unlock:
- UNLOCK (&frame->lock);
+ local = frame->local;
+ op_errno = local->op_errno;
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- DHT_STACK_UNWIND (removexattr, frame, local->op_ret,
- local->op_errno, NULL);
- }
+ local->call_cnt = 2; /* This is the second attempt */
+ if (we_are_not_migrating(ret)) {
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
+ */
+ DHT_STACK_UNWIND(removexattr, frame, local->op_ret, local->op_errno,
+ local->rebalance.xdata);
return 0;
-}
+ }
+
+ if (subvol == NULL)
+ goto err;
+ if (local->fop == GF_FOP_REMOVEXATTR) {
+ STACK_WIND_COOKIE(frame, dht_file_removexattr_cbk, subvol, subvol,
+ subvol->fops->removexattr, &local->loc, local->key,
+ local->xattr_req);
+ } else {
+ STACK_WIND_COOKIE(frame, dht_file_removexattr_cbk, subvol, subvol,
+ subvol->fops->fremovexattr, local->fd, local->key,
+ local->xattr_req);
+ }
+
+ return 0;
+
+err:
+ DHT_STACK_UNWIND(removexattr, frame, -1, op_errno, NULL);
+ return 0;
+}
int
-dht_removexattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *key, dict_t *xdata)
+dht_file_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
- dht_layout_t *layout = NULL;
- int call_cnt = 0;
+ int ret = -1;
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ struct iatt *stbuf = NULL;
+ inode_t *inode = NULL;
+ xlator_t *subvol1 = NULL, *subvol2 = NULL;
- int i;
+ local = frame->local;
+ prev = cookie;
- VALIDATE_OR_GOTO (this, err);
+ local->op_errno = op_errno;
- GF_IF_NATIVE_XATTR_GOTO ("trusted.glusterfs.dht*",
- key, op_errno, err);
+ if ((local->fop == GF_FOP_FREMOVEXATTR) &&
+ dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
+ return 0;
+ }
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
+ if ((op_ret == -1) && !dht_inode_missing(op_errno)) {
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
+ prev->name);
+ goto out;
+ }
- local = dht_local_init (frame, loc, NULL, GF_FOP_REMOVEXATTR);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ if (local->call_cnt != 1)
+ goto out;
- subvol = local->cached_subvol;
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ ret = dict_get_bin(xdata, DHT_IATT_IN_XDATA_KEY, (void **)&stbuf);
- layout = local->layout;
- if (!local->layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no layout for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ if ((!op_ret) && !stbuf) {
+ goto out;
+ }
- local->call_cnt = call_cnt = layout->cnt;
- local->key = gf_strdup (key);
+ local->op_ret = 0;
- for (i = 0; i < call_cnt; i++) {
- STACK_WIND (frame, dht_removexattr_cbk,
- layout->list[i].xlator,
- layout->list[i].xlator->fops->removexattr,
- loc, key, NULL);
- }
+ local->rebalance.target_op_fn = dht_removexattr2;
+ if (xdata)
+ local->rebalance.xdata = dict_ref(xdata);
- return 0;
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2(stbuf)) {
+ ret = dht_rebalance_complete_check(this, frame);
+ if (!ret)
+ return 0;
+ }
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL);
+ /* Phase 1 of migration */
+ if (IS_DHT_MIGRATION_PHASE1(stbuf)) {
+ inode = (local->fd) ? local->fd->inode : local->loc.inode;
- return 0;
+ ret = dht_inode_ctx_get_mig_info(this, inode, &subvol1, &subvol2);
+ if (!dht_mig_info_is_invalid(local->cached_subvol, subvol1, subvol2)) {
+ dht_removexattr2(this, subvol2, frame, 0);
+ return 0;
+ }
+
+ ret = dht_rebalance_in_progress_check(this, frame);
+ if (!ret)
+ return 0;
+ }
+
+out:
+ if (local->fop == GF_FOP_REMOVEXATTR) {
+ DHT_STACK_UNWIND(removexattr, frame, op_ret, op_errno, xdata);
+ } else {
+ DHT_STACK_UNWIND(fremovexattr, frame, op_ret, op_errno, xdata);
+ }
+ return 0;
}
int
-dht_fremovexattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, const char *key, dict_t *xdata)
+dht_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *key, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
- dht_layout_t *layout = NULL;
- int call_cnt = 0;
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int call_cnt = 0;
+ dht_conf_t *conf = NULL;
+ int ret = 0;
+
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(this->private, err);
+
+ conf = this->private;
+
+ GF_IF_NATIVE_XATTR_GOTO(conf->wild_xattr_name, key, op_errno, err);
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(loc, err);
+ VALIDATE_OR_GOTO(loc->inode, err);
+
+ local = dht_local_init(frame, loc, NULL, GF_FOP_REMOVEXATTR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for path=%s",
+ loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!local->layout) {
+ gf_msg_debug(this->name, 0, "no layout for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+
+ local->call_cnt = call_cnt = layout->cnt;
+ local->key = gf_strdup(key);
+
+ if (key && (strncmp(key, conf->mds_xattr_key, strlen(key)) == 0)) {
+ op_errno = ENOTSUP;
+ goto err;
+ }
+
+ if (IA_ISDIR(loc->inode->ia_type)) {
+ local->hashed_subvol = NULL;
+ ret = dht_dir_common_set_remove_xattr(frame, this, loc, NULL, NULL, 0,
+ local->xattr_req, &op_errno);
+ if (ret)
+ goto err;
- int i;
+ } else {
+ local->call_cnt = 1;
+ ret = dict_set_int8(local->xattr_req, DHT_IATT_IN_XDATA_KEY, 1);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_DICT_SET_FAILED,
+ "Failed to "
+ "set dictionary key %s for %s",
+ DHT_IATT_IN_XDATA_KEY, loc->path);
+ }
- VALIDATE_OR_GOTO (this, err);
+ STACK_WIND_COOKIE(frame, dht_file_removexattr_cbk, subvol, subvol,
+ subvol->fops->removexattr, loc, key,
+ local->xattr_req);
+ }
- GF_IF_NATIVE_XATTR_GOTO ("trusted.glusterfs.dht*",
- key, op_errno, err);
+ return 0;
- VALIDATE_OR_GOTO (frame, err);
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(removexattr, frame, -1, op_errno, NULL);
- local = dht_local_init (frame, NULL, fd, GF_FOP_FREMOVEXATTR);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ return 0;
+}
- subvol = local->cached_subvol;
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for inode=%s",
- uuid_utoa (fd->inode->gfid));
- op_errno = EINVAL;
- goto err;
- }
+int
+dht_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *key,
+ dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int call_cnt = 0;
+ dht_conf_t *conf = 0;
+ int ret = 0;
+
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(this->private, err);
+
+ conf = this->private;
+
+ GF_IF_NATIVE_XATTR_GOTO(conf->wild_xattr_name, key, op_errno, err);
+
+ VALIDATE_OR_GOTO(frame, err);
+
+ local = dht_local_init(frame, NULL, fd, GF_FOP_FREMOVEXATTR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for inode=%s",
+ uuid_utoa(fd->inode->gfid));
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!local->layout) {
+ gf_msg_debug(this->name, 0, "no layout for inode=%s",
+ uuid_utoa(fd->inode->gfid));
+ op_errno = EINVAL;
+ goto err;
+ }
+ local->xattr_req = xdata ? dict_ref(xdata) : dict_new();
+
+ local->call_cnt = call_cnt = layout->cnt;
+ local->key = gf_strdup(key);
+
+ if (IA_ISDIR(fd->inode->ia_type)) {
+ local->hashed_subvol = NULL;
+ ret = dht_dir_common_set_remove_xattr(frame, this, NULL, fd, NULL, 0,
+ local->xattr_req, &op_errno);
+ if (ret)
+ goto err;
- layout = local->layout;
- if (!local->layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no layout for inode=%s", uuid_utoa (fd->inode->gfid));
- op_errno = EINVAL;
- goto err;
+ } else {
+ local->call_cnt = 1;
+ ret = dict_set_int8(local->xattr_req, DHT_IATT_IN_XDATA_KEY, 1);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_DICT_SET_FAILED,
+ "Failed to "
+ "set dictionary key %s for fd=%p",
+ DHT_IATT_IN_XDATA_KEY, fd);
}
- local->call_cnt = call_cnt = layout->cnt;
- local->key = gf_strdup (key);
+ STACK_WIND_COOKIE(frame, dht_file_removexattr_cbk, subvol, subvol,
+ subvol->fops->fremovexattr, fd, key,
+ local->xattr_req);
+ }
- for (i = 0; i < call_cnt; i++) {
- STACK_WIND (frame, dht_removexattr_cbk,
- layout->list[i].xlator,
- layout->list[i].xlator->fops->fremovexattr,
- fd, key, NULL);
- }
-
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fremovexattr, frame, -1, op_errno, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(fremovexattr, frame, -1, op_errno, NULL);
- return 0;
+ return 0;
}
-
int
-dht_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, fd_t *fd, dict_t *xdata)
+dht_fd_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, fd_t *fd, dict_t *xdata)
{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ xlator_t *prev = NULL;
- local = frame->local;
- prev = cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto unlock;
- }
+ local = frame->local;
+ prev = cookie;
- local->op_ret = 0;
+ LOCK(&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ UNLOCK(&frame->lock);
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
+ prev->name);
+ goto post_unlock;
}
-unlock:
- UNLOCK (&frame->lock);
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt))
- DHT_STACK_UNWIND (open, frame, local->op_ret, local->op_errno,
- local->fd, NULL);
-
- return 0;
+ local->op_ret = 0;
+ }
+ UNLOCK(&frame->lock);
+post_unlock:
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt))
+ DHT_STACK_UNWIND(open, frame, local->op_ret, local->op_errno, local->fd,
+ NULL);
+
+ return 0;
}
/*
* dht_normalize_stats -
*/
static void
-dht_normalize_stats (struct statvfs *buf, unsigned long bsize,
- unsigned long frsize)
+dht_normalize_stats(struct statvfs *buf, unsigned long bsize,
+ unsigned long frsize)
{
- double factor = 0;
-
- if (buf->f_bsize != bsize) {
- buf->f_bsize = bsize;
- }
-
- if (buf->f_frsize != frsize) {
- factor = ((double) buf->f_frsize) / frsize;
- buf->f_frsize = frsize;
- buf->f_blocks = (fsblkcnt_t) (factor * buf->f_blocks);
- buf->f_bfree = (fsblkcnt_t) (factor * buf->f_bfree);
- buf->f_bavail = (fsblkcnt_t) (factor * buf->f_bavail);
-
- }
+ double factor = 0;
+
+ if (buf->f_bsize != bsize) {
+ buf->f_bsize = bsize;
+ }
+
+ if (buf->f_frsize != frsize) {
+ factor = ((double)buf->f_frsize) / frsize;
+ buf->f_frsize = frsize;
+ buf->f_blocks = (fsblkcnt_t)(factor * buf->f_blocks);
+ buf->f_bfree = (fsblkcnt_t)(factor * buf->f_bfree);
+ buf->f_bavail = (fsblkcnt_t)(factor * buf->f_bavail);
+ }
}
-int
-dht_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct statvfs *statvfs, dict_t *xdata)
+static int
+dht_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct statvfs *statvfs, dict_t *xdata)
{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- int bsize = 0;
- int frsize = 0;
+ gf_boolean_t event = _gf_false;
+ qdstatfs_action_t action = qdstatfs_action_OFF;
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ int bsize = 0;
+ int frsize = 0;
+ GF_UNUSED int ret = 0;
+ unsigned long new_usage = 0;
+ unsigned long cur_usage = 0;
+
+ local = frame->local;
+ GF_ASSERT(local);
+
+ if (xdata)
+ ret = dict_get_int8(xdata, "quota-deem-statfs", (int8_t *)&event);
+
+ LOCK(&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ goto unlock;
+ }
+ if (!statvfs) {
+ op_errno = EINVAL;
+ local->op_ret = -1;
+ goto unlock;
+ }
+ local->op_ret = 0;
+ if (local->quota_deem_statfs) {
+ if (event == _gf_true) {
+ action = qdstatfs_action_COMPARE;
+ } else {
+ action = qdstatfs_action_NEGLECT;
+ }
+ } else {
+ if (event == _gf_true) {
+ action = qdstatfs_action_REPLACE;
+ local->quota_deem_statfs = _gf_true;
+ }
+ }
- local = frame->local;
+ if (local->quota_deem_statfs) {
+ switch (action) {
+ case qdstatfs_action_NEGLECT:
+ goto unlock;
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- goto unlock;
- }
- local->op_ret = 0;
+ case qdstatfs_action_REPLACE:
+ local->statvfs = *statvfs;
+ goto unlock;
- if (local->statvfs.f_bsize != 0) {
- bsize = max(local->statvfs.f_bsize, statvfs->f_bsize);
- frsize = max(local->statvfs.f_frsize, statvfs->f_frsize);
- dht_normalize_stats(&local->statvfs, bsize, frsize);
- dht_normalize_stats(statvfs, bsize, frsize);
- } else {
- local->statvfs.f_bsize = statvfs->f_bsize;
- local->statvfs.f_frsize = statvfs->f_frsize;
- }
+ case qdstatfs_action_COMPARE:
+ new_usage = statvfs->f_blocks - statvfs->f_bfree;
+ cur_usage = local->statvfs.f_blocks -
+ local->statvfs.f_bfree;
- local->statvfs.f_blocks += statvfs->f_blocks;
- local->statvfs.f_bfree += statvfs->f_bfree;
- local->statvfs.f_bavail += statvfs->f_bavail;
- local->statvfs.f_files += statvfs->f_files;
- local->statvfs.f_ffree += statvfs->f_ffree;
- local->statvfs.f_favail += statvfs->f_favail;
- local->statvfs.f_fsid = statvfs->f_fsid;
- local->statvfs.f_flag = statvfs->f_flag;
- local->statvfs.f_namemax = statvfs->f_namemax;
+ /* Take the max of the usage from subvols */
+ if (new_usage >= cur_usage)
+ local->statvfs = *statvfs;
+ goto unlock;
+ default:
+ break;
+ }
}
-unlock:
- UNLOCK (&frame->lock);
+ if (local->statvfs.f_bsize != 0) {
+ bsize = max(local->statvfs.f_bsize, statvfs->f_bsize);
+ frsize = max(local->statvfs.f_frsize, statvfs->f_frsize);
+ dht_normalize_stats(&local->statvfs, bsize, frsize);
+ dht_normalize_stats(statvfs, bsize, frsize);
+ } else {
+ local->statvfs.f_bsize = statvfs->f_bsize;
+ local->statvfs.f_frsize = statvfs->f_frsize;
+ }
+
+ local->statvfs.f_blocks += statvfs->f_blocks;
+ local->statvfs.f_bfree += statvfs->f_bfree;
+ local->statvfs.f_bavail += statvfs->f_bavail;
+ local->statvfs.f_files += statvfs->f_files;
+ local->statvfs.f_ffree += statvfs->f_ffree;
+ local->statvfs.f_favail += statvfs->f_favail;
+ local->statvfs.f_fsid = statvfs->f_fsid;
+ local->statvfs.f_flag = statvfs->f_flag;
+ local->statvfs.f_namemax = statvfs->f_namemax;
+ }
+unlock:
+ UNLOCK(&frame->lock);
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt))
- DHT_STACK_UNWIND (statfs, frame, local->op_ret, local->op_errno,
- &local->statvfs, xdata);
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt))
+ DHT_STACK_UNWIND(statfs, frame, local->op_ret, local->op_errno,
+ &local->statvfs, xdata);
- return 0;
+ return 0;
}
-
int
-dht_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
-{
- xlator_t *subvol = NULL;
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- int op_errno = -1;
- int i = -1;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
- VALIDATE_OR_GOTO (this->private, err);
-
- conf = this->private;
-
- local = dht_local_init (frame, NULL, NULL, GF_FOP_STATFS);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+dht_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int op_errno = -1;
+ int i = -1;
+ inode_t *inode = NULL;
+ inode_table_t *itable = NULL;
+ static uuid_t root_gfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
+ loc_t newloc = {
+ 0,
+ };
- if (IA_ISDIR (loc->inode->ia_type)) {
- local->call_cnt = conf->subvolume_cnt;
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+ VALIDATE_OR_GOTO(this->private, err);
- for (i = 0; i < conf->subvolume_cnt; i++) {
- STACK_WIND (frame, dht_statfs_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->statfs, loc,
- xdata);
- }
- return 0;
+ conf = this->private;
+
+ local = dht_local_init(frame, NULL, NULL, GF_FOP_STATFS);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ if (loc->inode && !IA_ISDIR(loc->inode->ia_type)) {
+ itable = loc->inode->table;
+ if (!itable) {
+ op_errno = EINVAL;
+ goto err;
}
- subvol = dht_subvol_get_cached (this, loc->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
+ loc = &local->loc2;
+
+ inode = inode_find(itable, root_gfid);
+ if (!inode) {
+ op_errno = EINVAL;
+ goto err;
}
- local->call_cnt = 1;
+ dht_build_root_loc(inode, &newloc);
+ loc = &newloc;
+ }
- STACK_WIND (frame, dht_statfs_cbk,
- subvol, subvol->fops->statfs, loc, xdata);
+ local->call_cnt = conf->subvolume_cnt;
- return 0;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ STACK_WIND(frame, dht_statfs_cbk, conf->subvolumes[i],
+ conf->subvolumes[i]->fops->statfs, loc, xdata);
+ }
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (statfs, frame, -1, op_errno, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(statfs, frame, -1, op_errno, NULL, NULL);
- return 0;
+ return 0;
}
-
int
-dht_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
- dict_t *xdata)
+dht_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- int op_errno = -1;
- int i = -1;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int op_errno = -1;
+ int i = -1;
+ int ret = 0;
+ gf_boolean_t new_xdata = _gf_false;
+ xlator_t **subvolumes = NULL;
+ int call_count = 0;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
+ VALIDATE_OR_GOTO(this->private, err);
+
+ conf = this->private;
+
+ local = dht_local_init(frame, loc, fd, GF_FOP_OPENDIR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ local->first_up_subvol = dht_first_up_subvol(this);
+
+ if (!xdata) {
+ xdata = dict_new();
+ if (!xdata) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ new_xdata = _gf_true;
+ }
+
+ ret = dict_set_uint32(xdata, conf->link_xattr_name, 256);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value : key = %s",
+ conf->link_xattr_name);
+
+ /* dht_readdirp will wind to all subvols so open has to be sent to
+ * all subvols whether or not conf->local_subvols is set */
+
+ call_count = local->call_cnt = conf->subvolume_cnt;
+ subvolumes = conf->subvolumes;
+
+ /* In case of parallel-readdir, the readdir-ahead will be loaded
+ * below dht, in this case, if we want to enable or disable SKIP_DIRs
+ * it has to be done in opendir, so that prefetching logic in
+ * readdir-ahead, honors it */
+ for (i = 0; i < call_count; i++) {
+ if (conf->readdir_optimize == _gf_true) {
+ if (subvolumes[i] != local->first_up_subvol) {
+ ret = dict_set_int32(xdata, GF_READDIR_SKIP_DIRS, 1);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary"
+ " value :key = %s, ret:%d",
+ GF_READDIR_SKIP_DIRS, ret);
+ }
+ }
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
- VALIDATE_OR_GOTO (this->private, err);
+ STACK_WIND_COOKIE(frame, dht_fd_cbk, subvolumes[i], subvolumes[i],
+ subvolumes[i]->fops->opendir, loc, fd, xdata);
+ dict_del(xdata, GF_READDIR_SKIP_DIRS);
+ }
- conf = this->private;
+ if (new_xdata)
+ dict_unref(xdata);
- local = dht_local_init (frame, loc, fd, GF_FOP_OPENDIR);
- if (!local) {
- op_errno = ENOMEM;
+ return 0;
- goto err;
- }
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(opendir, frame, -1, op_errno, NULL, NULL);
- local->call_cnt = conf->subvolume_cnt;
+ return 0;
+}
- for (i = 0; i < conf->subvolume_cnt; i++) {
- STACK_WIND (frame, dht_fd_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->opendir,
- loc, fd, xdata);
- }
+/* dht_readdirp_cbk creates a new dentry and dentry->inode is not assigned.
+ This functions assigns an inode if all of the following conditions are
+ true:
- return 0;
+ * DHT has only one child. In this case the entire layout is present on
+ this single child and hence we can set complete layout in inode.
+ * backend has complete layout and there are no anomalies in it and from
+ this information layout can be constructed and set in inode.
+*/
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (opendir, frame, -1, op_errno, NULL, NULL);
+static void
+dht_populate_inode_for_dentry(xlator_t *this, xlator_t *subvol,
+ gf_dirent_t *entry, gf_dirent_t *orig_entry)
+{
+ dht_layout_t *layout = NULL;
+ int ret = 0;
+ loc_t loc = {
+ 0,
+ };
+
+ if (gf_uuid_is_null(orig_entry->d_stat.ia_gfid)) {
+ /* this skips the '..' entry for the root of the volume */
+ return;
+ }
- return 0;
+ gf_uuid_copy(loc.gfid, orig_entry->d_stat.ia_gfid);
+ loc.inode = inode_ref(orig_entry->inode);
+
+ if (is_revalidate(&loc)) {
+ goto out;
+ }
+
+ layout = dht_layout_new(this, 1);
+ if (!layout)
+ goto out;
+
+ ret = dht_layout_merge(this, layout, subvol, 0, 0, orig_entry->dict);
+ if (!ret) {
+ ret = dht_layout_normalize(this, &loc, layout);
+ if (ret == 0) {
+ dht_layout_set(this, orig_entry->inode, layout);
+ entry->inode = inode_ref(orig_entry->inode);
+ layout = NULL;
+ }
+ }
+
+ if (layout)
+ dht_layout_unref(this, layout);
+
+out:
+ loc_wipe(&loc);
+ return;
}
+/* Posix returns op_errno = ENOENT to indicate that there are no more
+ * entries
+ */
+static int
+dht_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, gf_dirent_t *orig_entries, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ gf_dirent_t entries;
+ gf_dirent_t *orig_entry = NULL;
+ gf_dirent_t *entry = NULL;
+ xlator_t *prev = NULL;
+ xlator_t *next_subvol = NULL;
+ off_t next_offset = 0;
+ int count = 0;
+ dht_layout_t *layout = NULL;
+ dht_conf_t *conf = NULL;
+ dht_methods_t *methods = NULL;
+ xlator_t *subvol = 0;
+ xlator_t *hashed_subvol = 0;
+ int ret = 0;
+ int readdir_optimize = 0;
+ inode_table_t *itable = NULL;
+ inode_t *inode = NULL;
+ gf_boolean_t skip_hashed_check = _gf_false;
+
+ INIT_LIST_HEAD(&entries.list);
+
+ prev = cookie;
+ local = frame->local;
+ GF_VALIDATE_OR_GOTO(this->name, local->fd, unwind);
+
+ itable = local->fd->inode->table;
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, unwind);
+
+ methods = &(conf->methods);
+
+ if (op_ret <= 0) {
+ goto done;
+ }
+
+ /* Why aren't we skipping DHT entirely in case of a single subvol?
+ * Because if this was a larger volume earlier and all but one subvol
+ * was removed, there might be stale linkto files on the subvol.
+ */
+ if (conf->subvolume_cnt == 1) {
+ /* return all directory and file entries except
+ * linkto files for a single child DHT
+ */
+ skip_hashed_check = _gf_true;
+ }
-int
-dht_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, gf_dirent_t *orig_entries, dict_t *xdata)
-{
- dht_local_t *local = NULL;
- gf_dirent_t entries;
- gf_dirent_t *orig_entry = NULL;
- gf_dirent_t *entry = NULL;
- call_frame_t *prev = NULL;
- xlator_t *next_subvol = NULL;
- off_t next_offset = 0;
- int count = 0;
- dht_layout_t *layout = 0;
- dht_conf_t *conf = NULL;
- xlator_t *subvol = 0;
- int ret = 0;
-
- INIT_LIST_HEAD (&entries.list);
- prev = cookie;
- local = frame->local;
- conf = this->private;
+ if (!local->layout)
+ local->layout = dht_layout_get(this, local->fd->inode);
- if (op_ret < 0)
- goto done;
+ layout = local->layout;
- if (!local->layout)
- local->layout = dht_layout_get (this, local->fd->inode);
+ /* This will skip the entries on the subvol without a layout,
+ * hence preventing the crash but rmdir might fail with
+ * "directory not empty" errors*/
- layout = local->layout;
+ if (layout == NULL)
+ goto done;
- list_for_each_entry (orig_entry, (&orig_entries->list), list) {
- next_offset = orig_entry->d_off;
- if ((check_is_dir (NULL, (&orig_entry->d_stat), NULL) &&
- (prev->this != dht_first_up_subvol (this))) ||
- check_is_linkfile (NULL, (&orig_entry->d_stat),
- orig_entry->dict)) {
- continue;
- }
+ if (conf->readdir_optimize == _gf_true)
+ readdir_optimize = 1;
- entry = gf_dirent_for_name (orig_entry->d_name);
- if (!entry) {
+ gf_msg_debug(this->name, 0, "Processing entries from %s", prev->name);
- goto unwind;
- }
+ list_for_each_entry(orig_entry, (&orig_entries->list), list)
+ {
+ next_offset = orig_entry->d_off;
- /* Do this if conf->search_unhashed is set to "auto" */
- if (conf->search_unhashed == GF_DHT_LOOKUP_UNHASHED_AUTO) {
- subvol = dht_layout_search (this, layout,
- orig_entry->d_name);
- if (!subvol || (subvol != prev->this)) {
- /* TODO: Count the number of entries which need
- linkfile to prove its existence in fs */
- layout->search_unhashed++;
- }
- }
+ gf_msg_debug(this->name, 0, "%s: entry = %s, type = %d", prev->name,
+ orig_entry->d_name, orig_entry->d_type);
- dht_itransform (this, prev->this, orig_entry->d_off,
- &entry->d_off);
+ if (IA_ISINVAL(orig_entry->d_stat.ia_type)) {
+ /*stat failed somewhere- display this entry but the data may
+ * be inaccurate.
+ */
+ gf_msg_debug(this->name, EINVAL, "Invalid stat for %s (gfid %s)",
+ orig_entry->d_name,
+ uuid_utoa(orig_entry->d_stat.ia_gfid));
+ }
- entry->d_stat = orig_entry->d_stat;
- entry->d_ino = orig_entry->d_ino;
- entry->d_type = orig_entry->d_type;
- entry->d_len = orig_entry->d_len;
+ if (check_is_linkfile(NULL, (&orig_entry->d_stat), orig_entry->dict,
+ conf->link_xattr_name)) {
+ gf_msg_debug(this->name, 0, "%s: %s is a linkto file", prev->name,
+ orig_entry->d_name);
+ continue;
+ }
- list_add_tail (&entry->list, &entries.list);
- count++;
+ if (skip_hashed_check) {
+ goto list;
}
- op_ret = count;
- /* We need to ensure that only the last subvolume's end-of-directory
- * notification is respected so that directory reading does not stop
- * before all subvolumes have been read. That could happen because the
- * posix for each subvolume sends a ENOENT on end-of-directory but in
- * distribute we're not concerned only with a posix's view of the
- * directory but the aggregated namespace' view of the directory.
- */
- if (prev->this != dht_last_up_subvol (this))
- op_errno = 0;
-done:
- if (count == 0) {
- /* non-zero next_offset means that
- EOF is not yet hit on the current subvol
- */
- if (next_offset == 0) {
- next_subvol = dht_subvol_next (this, prev->this);
- } else {
- next_subvol = prev->this;
+ if (check_is_dir(NULL, (&orig_entry->d_stat), NULL)) {
+ /*Directory entries filtering :
+ * a) If rebalance is running, pick from first_up_subvol
+ * b) (rebalance not running)hashed subvolume is NULL or
+ * down then filter in first_up_subvolume. Other wise the
+ * corresponding hashed subvolume will take care of the
+ * directory entry.
+ */
+ if (readdir_optimize) {
+ if (prev == local->first_up_subvol)
+ goto list;
+ else
+ continue;
+ }
+
+ hashed_subvol = methods->layout_search(this, layout,
+ orig_entry->d_name);
+
+ if (prev == hashed_subvol)
+ goto list;
+ if ((hashed_subvol && dht_subvol_status(conf, hashed_subvol)) ||
+ (prev != local->first_up_subvol))
+ continue;
+
+ goto list;
+ }
+
+ list:
+ entry = gf_dirent_for_name(orig_entry->d_name);
+ if (!entry) {
+ goto unwind;
+ }
+
+ /* Do this if conf->search_unhashed is set to "auto" */
+ if (conf->search_unhashed == GF_DHT_LOOKUP_UNHASHED_AUTO) {
+ subvol = methods->layout_search(this, layout, orig_entry->d_name);
+ if (!subvol || (subvol != prev)) {
+ /* TODO: Count the number of entries which need
+ linkfile to prove its existence in fs */
+ layout->search_unhashed++;
+ }
+ }
+
+ entry->d_off = orig_entry->d_off;
+ entry->d_stat = orig_entry->d_stat;
+ entry->d_ino = orig_entry->d_ino;
+ entry->d_type = orig_entry->d_type;
+ entry->d_len = orig_entry->d_len;
+
+ if (orig_entry->dict)
+ entry->dict = dict_ref(orig_entry->dict);
+
+ /* making sure we set the inode ctx right with layout,
+ currently possible only for non-directories, so for
+ directories don't set entry inodes */
+ if (IA_ISDIR(entry->d_stat.ia_type)) {
+ entry->d_stat.ia_blocks = DHT_DIR_STAT_BLOCKS;
+ entry->d_stat.ia_size = DHT_DIR_STAT_SIZE;
+ if (orig_entry->inode) {
+ dht_inode_ctx_time_update(orig_entry->inode, this,
+ &entry->d_stat, 1);
+
+ if (conf->subvolume_cnt == 1) {
+ dht_populate_inode_for_dentry(this, prev, entry,
+ orig_entry);
}
-
- if (!next_subvol) {
- goto unwind;
+ }
+ } else {
+ if (orig_entry->dict &&
+ dict_get(orig_entry->dict, conf->link_xattr_name)) {
+ /* Strip out the S and T flags set by rebalance*/
+ DHT_STRIP_PHASE1_FLAGS(&entry->d_stat);
+ }
+
+ if (orig_entry->inode) {
+ ret = dht_layout_preset(this, prev, orig_entry->inode);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_LAYOUT_SET_FAILED,
+ "failed to link the layout "
+ "in inode for %s",
+ orig_entry->d_name);
+
+ entry->inode = inode_ref(orig_entry->inode);
+ } else if (itable) {
+ /*
+ * orig_entry->inode might be null if any upper
+ * layer xlators below client set to null, to
+ * force a lookup on the inode even if the inode
+ * is present in the inode table. In that case
+ * we just update the ctx to make sure we didn't
+ * missed anything.
+ */
+ inode = inode_find(itable, orig_entry->d_stat.ia_gfid);
+ if (inode) {
+ ret = dht_layout_preset(this, prev, inode);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_LAYOUT_SET_FAILED,
+ "failed to link the layout"
+ " in inode for %s",
+ orig_entry->d_name);
+ inode_unref(inode);
+ inode = NULL;
}
+ }
+ }
- if (conf->readdir_optimize == _gf_true) {
- if (next_subvol != dht_first_up_subvol (this)) {
- ret = dict_set_int32 (local->xattr,
- GF_READDIR_SKIP_DIRS, 1);
- if (ret)
- gf_log (this->name, GF_LOG_ERROR,
- "dict set failed");
- }
- }
+ gf_msg_debug(this->name, 0, "%s: Adding entry = %s", prev->name,
+ entry->d_name);
- STACK_WIND (frame, dht_readdirp_cbk,
- next_subvol, next_subvol->fops->readdirp,
- local->fd, local->size, next_offset,
- local->xattr);
- return 0;
- }
+ list_add_tail(&entry->list, &entries.list);
+ count++;
+ }
-unwind:
- if (op_ret < 0)
- op_ret = 0;
+done:
- DHT_STACK_UNWIND (readdirp, frame, op_ret, op_errno, &entries, NULL);
+ /* We need to ensure that only the last subvolume's end-of-directory
+ * notification is respected so that directory reading does not stop
+ * before all subvolumes have been read. That could happen because the
+ * posix for each subvolume sends a ENOENT on end-of-directory but in
+ * distribute we're not concerned only with a posix's view of the
+ * directory but the aggregated namespace' view of the directory.
+ * Possible values:
+ * op_ret == 0 and op_errno != 0
+ * if op_errno != ENOENT : Error.Unwind.
+ * if op_errno == ENOENT : There are no more entries on this subvol.
+ * Move to the next one.
+ * op_ret > 0 and count == 0 :
+ * The subvol returned entries to dht but all were stripped out.
+ * For example, if they were linkto files or dirs where
+ * hashed_subvol != prev. Try to get some entries by winding
+ * to the next subvol. This can be dangerous if parallel readdir
+ * is enabled as it grows the stack.
+ *
+ * op_ret > 0 and count > 0:
+ * We found some entries. Unwind even if the buffer is not full.
+ *
+ */
+
+ op_ret = count;
+ if (count == 0) {
+ /* non-zero next_offset means that
+ * EOF is not yet hit on the current subvol
+ */
+ if ((next_offset == 0) || (op_errno == ENOENT)) {
+ next_offset = 0;
+ next_subvol = dht_subvol_next(this, prev);
+ } else {
+ next_subvol = prev;
+ }
- gf_dirent_free (&entries);
+ if (!next_subvol) {
+ goto unwind;
+ }
+ if (conf->readdir_optimize == _gf_true) {
+ if (next_subvol != local->first_up_subvol) {
+ ret = dict_set_int32(local->xattr, GF_READDIR_SKIP_DIRS, 1);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value"
+ ":key = %s",
+ GF_READDIR_SKIP_DIRS);
+ } else {
+ dict_del(local->xattr, GF_READDIR_SKIP_DIRS);
+ }
+ }
+
+ STACK_WIND_COOKIE(frame, dht_readdirp_cbk, next_subvol, next_subvol,
+ next_subvol->fops->readdirp, local->fd, local->size,
+ next_offset, local->xattr);
return 0;
+ }
+
+unwind:
+ /* We need to ensure that only the last subvolume's end-of-directory
+ * notification is respected so that directory reading does not stop
+ * before all subvolumes have been read. That could happen because the
+ * posix for each subvolume sends a ENOENT on end-of-directory but in
+ * distribute we're not concerned only with a posix's view of the
+ * directory but the aggregated namespace' view of the directory.
+ */
+ if (op_ret < 0)
+ op_ret = 0;
+
+ if (prev != dht_last_up_subvol(this))
+ op_errno = 0;
+
+ DHT_STACK_UNWIND(readdirp, frame, op_ret, op_errno, &entries, NULL);
+
+ gf_dirent_free(&entries);
+ return 0;
}
+static int
+dht_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, gf_dirent_t *orig_entries, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ gf_dirent_t entries;
+ gf_dirent_t *orig_entry = NULL;
+ gf_dirent_t *entry = NULL;
+ xlator_t *prev = NULL;
+ xlator_t *next_subvol = NULL;
+ off_t next_offset = 0;
+ int count = 0;
+ dht_layout_t *layout = 0;
+ xlator_t *subvol = 0;
+ dht_conf_t *conf = NULL;
+ dht_methods_t *methods = NULL;
+ gf_boolean_t skip_hashed_check = _gf_false;
+ INIT_LIST_HEAD(&entries.list);
-int
-dht_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, gf_dirent_t *orig_entries,
- dict_t *xdata)
-{
- dht_local_t *local = NULL;
- gf_dirent_t entries;
- gf_dirent_t *orig_entry = NULL;
- gf_dirent_t *entry = NULL;
- call_frame_t *prev = NULL;
- xlator_t *next_subvol = NULL;
- off_t next_offset = 0;
- int count = 0;
- dht_layout_t *layout = 0;
- xlator_t *subvol = 0;
-
- INIT_LIST_HEAD (&entries.list);
- prev = cookie;
- local = frame->local;
+ prev = cookie;
+ local = frame->local;
- if (op_ret < 0)
- goto done;
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, done);
- if (!local->layout)
- local->layout = dht_layout_get (this, local->fd->inode);
+ methods = &(conf->methods);
- layout = local->layout;
+ if (op_ret <= 0)
+ goto done;
- list_for_each_entry (orig_entry, (&orig_entries->list), list) {
- next_offset = orig_entry->d_off;
+ if (!local->layout)
+ local->layout = dht_layout_get(this, local->fd->inode);
- subvol = dht_layout_search (this, layout, orig_entry->d_name);
+ layout = local->layout;
- if (!subvol || (subvol == prev->this)) {
- entry = gf_dirent_for_name (orig_entry->d_name);
- if (!entry) {
- gf_log (this->name, GF_LOG_ERROR,
- "memory allocation failed :(");
- goto unwind;
- }
+ gf_msg_debug(this->name, 0, "Processing entries from %s", prev->name);
- dht_itransform (this, prev->this, orig_entry->d_off,
- &entry->d_off);
+ if (conf->subvolume_cnt == 1) {
+ /*return everything*/
+ skip_hashed_check = _gf_true;
+ count = op_ret;
+ goto done;
+ }
- entry->d_ino = orig_entry->d_ino;
- entry->d_type = orig_entry->d_type;
- entry->d_len = orig_entry->d_len;
+ list_for_each_entry(orig_entry, (&orig_entries->list), list)
+ {
+ next_offset = orig_entry->d_off;
- list_add_tail (&entry->list, &entries.list);
- count++;
- }
- }
- op_ret = count;
- /* We need to ensure that only the last subvolume's end-of-directory
- * notification is respected so that directory reading does not stop
- * before all subvolumes have been read. That could happen because the
- * posix for each subvolume sends a ENOENT on end-of-directory but in
- * distribute we're not concerned only with a posix's view of the
- * directory but the aggregated namespace' view of the directory.
- */
- if (prev->this != dht_last_up_subvol (this))
- op_errno = 0;
+ gf_msg_debug(this->name, 0, "%s: entry = %s, type = %d", prev->name,
+ orig_entry->d_name, orig_entry->d_type);
+ subvol = methods->layout_search(this, layout, orig_entry->d_name);
+
+ if (!subvol || (subvol == prev)) {
+ entry = gf_dirent_for_name(orig_entry->d_name);
+ if (!entry) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
+ "Memory allocation failed ");
+ goto unwind;
+ }
+
+ entry->d_off = orig_entry->d_off;
+ entry->d_ino = orig_entry->d_ino;
+ entry->d_type = orig_entry->d_type;
+ entry->d_len = orig_entry->d_len;
+
+ gf_msg_debug(this->name, 0, "%s: Adding = entry %s", prev->name,
+ entry->d_name);
+
+ list_add_tail(&entry->list, &entries.list);
+ count++;
+ }
+ }
done:
- if (count == 0) {
- /* non-zero next_offset means that
- EOF is not yet hit on the current subvol
- */
- if (next_offset == 0) {
- next_subvol = dht_subvol_next (this, prev->this);
+ op_ret = count;
+ /* We need to ensure that only the last subvolume's end-of-directory
+ * notification is respected so that directory reading does not stop
+ * before all subvolumes have been read. That could happen because the
+ * posix for each subvolume sends a ENOENT on end-of-directory but in
+ * distribute we're not concerned only with a posix's view of the
+ * directory but the aggregated namespace' view of the directory.
+ */
+ if (count == 0) {
+ if ((next_offset == 0) || (op_errno == ENOENT)) {
+ next_offset = 0;
+ next_subvol = dht_subvol_next(this, prev);
+ } else {
+ next_subvol = prev;
+ }
+
+ if (!next_subvol) {
+ goto unwind;
+ }
+
+ STACK_WIND_COOKIE(frame, dht_readdir_cbk, next_subvol, next_subvol,
+ next_subvol->fops->readdir, local->fd, local->size,
+ next_offset, NULL);
+ return 0;
+ }
+
+unwind:
+ /* We need to ensure that only the last subvolume's end-of-directory
+ * notification is respected so that directory reading does not stop
+ * before all subvolumes have been read. That could happen because the
+ * posix for each subvolume sends a ENOENT on end-of-directory but in
+ * distribute we're not concerned only with a posix's view of the
+ * directory but the aggregated namespace' view of the directory.
+ */
+
+ if (prev != dht_last_up_subvol(this))
+ op_errno = 0;
+
+ if (!skip_hashed_check) {
+ DHT_STACK_UNWIND(readdir, frame, op_ret, op_errno, &entries, NULL);
+ gf_dirent_free(&entries);
+
+ } else {
+ DHT_STACK_UNWIND(readdir, frame, op_ret, op_errno, orig_entries, NULL);
+ }
+ return 0;
+}
+
+static int
+dht_do_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t yoff, int whichop, dict_t *dict)
+{
+ dht_local_t *local = NULL;
+ int op_errno = -1;
+ xlator_t *xvol = NULL;
+ int ret = 0;
+ dht_conf_t *conf = NULL;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
+ VALIDATE_OR_GOTO(this->private, err);
+
+ conf = this->private;
+
+ local = dht_local_init(frame, NULL, NULL, whichop);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->fd = fd_ref(fd);
+ local->size = size;
+ local->xattr_req = (dict) ? dict_ref(dict) : NULL;
+ local->first_up_subvol = dht_first_up_subvol(this);
+ local->op_ret = -1;
+
+ dht_deitransform(this, yoff, &xvol);
+
+ /* TODO: do proper readdir */
+ if (whichop == GF_FOP_READDIRP) {
+ if (dict)
+ local->xattr = dict_ref(dict);
+ else
+ local->xattr = dict_new();
+
+ if (local->xattr) {
+ ret = dict_set_uint32(local->xattr, conf->link_xattr_name, 256);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value"
+ " : key = %s",
+ conf->link_xattr_name);
+
+ if (conf->readdir_optimize == _gf_true) {
+ if (xvol != local->first_up_subvol) {
+ ret = dict_set_int32(local->xattr, GF_READDIR_SKIP_DIRS, 1);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_DICT_SET_FAILED,
+ "Failed to set "
+ "dictionary value: "
+ "key = %s",
+ GF_READDIR_SKIP_DIRS);
} else {
- next_subvol = prev->this;
+ dict_del(local->xattr, GF_READDIR_SKIP_DIRS);
}
+ }
- if (!next_subvol) {
- goto unwind;
+ if (conf->subvolume_cnt == 1) {
+ ret = dict_set_uint32(local->xattr, conf->xattr_name, 4 * 4);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM,
+ DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary "
+ "value:key = %s ",
+ conf->xattr_name);
}
-
- STACK_WIND (frame, dht_readdir_cbk,
- next_subvol, next_subvol->fops->readdir,
- local->fd, local->size, next_offset, NULL);
- return 0;
+ }
}
-unwind:
- if (op_ret < 0)
- op_ret = 0;
+ STACK_WIND_COOKIE(frame, dht_readdirp_cbk, xvol, xvol,
+ xvol->fops->readdirp, fd, size, yoff, local->xattr);
+ } else {
+ STACK_WIND_COOKIE(frame, dht_readdir_cbk, xvol, xvol,
+ xvol->fops->readdir, fd, size, yoff, local->xattr);
+ }
- DHT_STACK_UNWIND (readdir, frame, op_ret, op_errno, &entries, NULL);
+ return 0;
- gf_dirent_free (&entries);
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(readdir, frame, -1, op_errno, NULL, NULL);
- return 0;
+ return 0;
}
+int
+dht_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t yoff, dict_t *xdata)
+{
+ int op = GF_FOP_READDIR;
+ dht_conf_t *conf = NULL;
+ int i = 0;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (!conf->subvolume_status[i]) {
+ op = GF_FOP_READDIRP;
+ break;
+ }
+ }
+
+ if (conf->use_readdirp)
+ op = GF_FOP_READDIRP;
+
+out:
+ dht_do_readdir(frame, this, fd, size, yoff, op, 0);
+ return 0;
+}
int
-dht_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t yoff, int whichop, dict_t *dict)
+dht_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t yoff, dict_t *dict)
{
- dht_local_t *local = NULL;
- int op_errno = -1;
- xlator_t *xvol = NULL;
- off_t xoff = 0;
- int ret = 0;
- dht_conf_t *conf = NULL;
+ dht_do_readdir(frame, this, fd, size, yoff, GF_FOP_READDIRP, dict);
+ return 0;
+}
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
+static int
+dht_fsyncdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
- conf = this->private;
+ local = frame->local;
- local = dht_local_init (frame, NULL, NULL, whichop);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ LOCK(&frame->lock);
+ {
+ if (op_ret == -1)
+ local->op_errno = op_errno;
+ else if (op_ret == 0)
+ local->op_ret = 0;
+ }
+ UNLOCK(&frame->lock);
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt))
+ DHT_STACK_UNWIND(fsyncdir, frame, local->op_ret, local->op_errno,
+ xdata);
+
+ return 0;
+}
- local->fd = fd_ref (fd);
- local->size = size;
- local->xattr_req = (dict)? dict_ref (dict) : NULL;
+int
+dht_fsyncdir(call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync,
+ dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int op_errno = -1;
+ int i = -1;
- dht_deitransform (this, yoff, &xvol, (uint64_t *)&xoff);
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
+ VALIDATE_OR_GOTO(this->private, err);
- /* TODO: do proper readdir */
- if (whichop == GF_FOP_READDIRP) {
- if (dict)
- local->xattr = dict_ref (dict);
- else
- local->xattr = dict_new ();
+ conf = this->private;
- if (local->xattr) {
- ret = dict_set_uint32 (local->xattr,
- "trusted.glusterfs.dht.linkto",
- 256);
- if (ret)
- gf_log (this->name, GF_LOG_WARNING,
- "failed to set 'glusterfs.dht.linkto'"
- " key");
- if (conf->readdir_optimize == _gf_true) {
- if (xvol != dht_first_up_subvol (this)) {
- ret = dict_set_int32 (local->xattr,
- GF_READDIR_SKIP_DIRS, 1);
- if (ret)
- gf_log (this->name,
- GF_LOG_ERROR,
- "Dict set failed");
- }
- }
- }
+ local = dht_local_init(frame, NULL, NULL, GF_FOP_FSYNCDIR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- STACK_WIND (frame, dht_readdirp_cbk, xvol, xvol->fops->readdirp,
- fd, size, xoff, local->xattr);
- } else {
- STACK_WIND (frame, dht_readdir_cbk, xvol, xvol->fops->readdir,
- fd, size, xoff, local->xattr);
- }
+ local->fd = fd_ref(fd);
+ local->call_cnt = conf->subvolume_cnt;
- return 0;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ STACK_WIND(frame, dht_fsyncdir_cbk, conf->subvolumes[i],
+ conf->subvolumes[i]->fops->fsyncdir, fd, datasync, xdata);
+ }
+
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (readdir, frame, -1, op_errno, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(fsyncdir, frame, -1, op_errno, NULL);
- return 0;
+ return 0;
}
-
int
-dht_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t yoff, dict_t *xdata)
+dht_newfile_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
- int op = GF_FOP_READDIR;
- dht_conf_t *conf = NULL;
- int i = 0;
+ xlator_t *prev = NULL;
+ int ret = -1;
+ dht_local_t *local = NULL;
+
+ if (op_ret == -1)
+ goto out;
+
+ local = frame->local;
+ if (!local) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ prev = cookie;
+
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update(local->loc.parent, this, preparent, 0);
+ dht_inode_ctx_time_update(local->loc.parent, this, postparent, 1);
+ }
+
+ ret = dht_layout_preset(this, prev, inode);
+ if (ret < 0) {
+ gf_msg_debug(this->name, EINVAL,
+ "could not set pre-set layout for subvolume %s",
+ prev ? prev->name : NULL);
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+ if (local->linked == _gf_true)
+ dht_linkfile_attr_heal(frame, this);
+out:
+ /*
+ * FIXME: ia_size and st_blocks of preparent and postparent do not have
+ * correct values. since, preparent and postparent buffers correspond
+ * to a directory these two members should have values equal to sum of
+ * corresponding values from each of the subvolume.
+ * See dht_iatt_merge for reference.
+ */
+ DHT_STRIP_PHASE1_FLAGS(stbuf);
+ dht_set_fixed_dir_stat(postparent);
+ dht_set_fixed_dir_stat(preparent);
+
+ if (local && local->lock[0].layout.parent_layout.locks) {
+ /* store op_errno for failure case*/
+ local->op_errno = op_errno;
+ local->refresh_layout_unlock(frame, this, op_ret, 1);
- conf = this->private;
- if (!conf)
- goto out;
+ if (op_ret == 0) {
+ DHT_STACK_UNWIND(mknod, frame, op_ret, op_errno, inode, stbuf,
+ preparent, postparent, xdata);
+ }
+ } else {
+ DHT_STACK_UNWIND(mknod, frame, op_ret, op_errno, inode, stbuf,
+ preparent, postparent, xdata);
+ }
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (!conf->subvolume_status[i]) {
- op = GF_FOP_READDIRP;
- break;
- }
+ return 0;
+}
+
+static int
+dht_mknod_linkfile_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ xlator_t *cached_subvol = NULL;
+ dht_conf_t *conf = NULL;
+
+ local = frame->local;
+
+ if (!local || !local->cached_subvol) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ goto err;
+ }
+
+ conf = this->private;
+ if (!conf) {
+ local->op_errno = EINVAL;
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ cached_subvol = local->cached_subvol;
+
+ if (local->params) {
+ dict_del(local->params, conf->link_xattr_name);
+ dict_del(local->params, GLUSTERFS_INTERNAL_FOP_KEY);
+ }
+
+ STACK_WIND_COOKIE(frame, dht_newfile_cbk, (void *)cached_subvol,
+ cached_subvol, cached_subvol->fops->mknod, &local->loc,
+ local->mode, local->rdev, local->umask, local->params);
+
+ return 0;
+err:
+ if (local && local->lock[0].layout.parent_layout.locks) {
+ local->refresh_layout_unlock(frame, this, -1, 1);
+ } else {
+ DHT_STACK_UNWIND(mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL);
+ }
+ return 0;
+}
+
+static int
+dht_mknod_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this,
+ xlator_t *subvol, loc_t *loc, dev_t rdev,
+ mode_t mode, mode_t umask, dict_t *params)
+{
+ dht_local_t *local = NULL;
+ xlator_t *avail_subvol = NULL;
+
+ local = frame->local;
+
+ if (!dht_is_subvol_filled(this, subvol)) {
+ gf_msg_debug(this->name, 0, "creating %s on %s", loc->path,
+ subvol->name);
+
+ STACK_WIND_COOKIE(frame, dht_newfile_cbk, (void *)subvol, subvol,
+ subvol->fops->mknod, loc, mode, rdev, umask, params);
+ } else {
+ avail_subvol = dht_free_disk_available_subvol(this, subvol, local);
+
+ if (avail_subvol != subvol) {
+ local->params = dict_ref(params);
+ local->rdev = rdev;
+ local->mode = mode;
+ local->umask = umask;
+ local->cached_subvol = avail_subvol;
+ local->hashed_subvol = subvol;
+
+ gf_msg_debug(this->name, 0, "creating %s on %s (link at %s)",
+ loc->path, avail_subvol->name, subvol->name);
+
+ dht_linkfile_create(frame, dht_mknod_linkfile_create_cbk, this,
+ avail_subvol, subvol, loc);
+
+ goto out;
}
- if (conf->use_readdirp)
- op = GF_FOP_READDIRP;
+ gf_msg_debug(this->name, 0, "creating %s on %s", loc->path,
+ subvol->name);
+ STACK_WIND_COOKIE(frame, dht_newfile_cbk, (void *)subvol, subvol,
+ subvol->fops->mknod, loc, mode, rdev, umask, params);
+ }
out:
- dht_do_readdir (frame, this, fd, size, yoff, op, 0);
- return 0;
+ return 0;
}
-int
-dht_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t yoff, dict_t *dict)
+static int32_t
+dht_mknod_do(call_frame_t *frame)
{
- dht_do_readdir (frame, this, fd, size, yoff, GF_FOP_READDIRP, dict);
- return 0;
-}
+ dht_local_t *local = NULL;
+ dht_layout_t *refreshed = NULL;
+ xlator_t *subvol = NULL;
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ dht_methods_t *methods = NULL;
+ local = frame->local;
+ this = THIS;
-int
-dht_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xdata)
-{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, err);
- local = frame->local;
+ methods = &(conf->methods);
- LOCK (&frame->lock);
- {
- if (op_ret == -1)
- local->op_errno = op_errno;
+ /* We don't need parent_loc anymore */
+ loc_wipe(&local->loc);
- if (op_ret == 0)
- local->op_ret = 0;
- }
- UNLOCK (&frame->lock);
+ loc_copy(&local->loc, &local->loc2);
+
+ loc_wipe(&local->loc2);
+
+ refreshed = local->selfheal.refreshed_layout;
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt))
- DHT_STACK_UNWIND (fsyncdir, frame, local->op_ret,
- local->op_errno, xdata);
+ subvol = methods->layout_search(this, refreshed, local->loc.name);
+ if (!subvol) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "no subvolume in "
+ "layout for path=%s",
+ local->loc.path);
+ local->op_errno = ENOENT;
+ goto err;
+ }
+
+ dht_mknod_wind_to_avail_subvol(frame, this, subvol, &local->loc,
+ local->rdev, local->mode, local->umask,
+ local->params);
+ return 0;
+err:
+ local->refresh_layout_unlock(frame, this, -1, 1);
+
+ return 0;
+}
+
+static int32_t
+dht_mknod_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ DHT_STACK_DESTROY(frame);
+ return 0;
+}
+
+static int32_t
+dht_mknod_finish(call_frame_t *frame, xlator_t *this, int op_ret,
+ int invoke_cbk)
+{
+ dht_local_t *local = NULL, *lock_local = NULL;
+ call_frame_t *lock_frame = NULL;
+ int lock_count = 0;
+
+ local = frame->local;
+ lock_count = dht_lock_count(local->lock[0].layout.parent_layout.locks,
+ local->lock[0].layout.parent_layout.lk_count);
+ if (lock_count == 0)
+ goto done;
+
+ lock_frame = copy_frame(frame);
+ if (lock_frame == NULL) {
+ goto done;
+ }
+
+ lock_local = dht_local_init(lock_frame, &local->loc, NULL,
+ lock_frame->root->op);
+ if (lock_local == NULL) {
+ goto done;
+ }
+
+ lock_local->lock[0]
+ .layout.parent_layout.locks = local->lock[0].layout.parent_layout.locks;
+ lock_local->lock[0].layout.parent_layout.lk_count =
+ local->lock[0].layout.parent_layout.lk_count;
+
+ local->lock[0].layout.parent_layout.locks = NULL;
+ local->lock[0].layout.parent_layout.lk_count = 0;
+
+ dht_unlock_inodelk(lock_frame,
+ lock_local->lock[0].layout.parent_layout.locks,
+ lock_local->lock[0].layout.parent_layout.lk_count,
+ dht_mknod_unlock_cbk);
+ lock_frame = NULL;
+
+done:
+ if (lock_frame != NULL) {
+ DHT_STACK_DESTROY(lock_frame);
+ }
+
+ if (op_ret == 0)
return 0;
+
+ DHT_STACK_UNWIND(mknod, frame, op_ret, local->op_errno, NULL, NULL, NULL,
+ NULL, NULL);
+ return 0;
}
+static int32_t
+dht_mknod_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
-int
-dht_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int datasync, dict_t *xdata)
+ local = frame->local;
+
+ if (!local) {
+ goto err;
+ }
+
+ if (op_ret < 0) {
+ gf_msg("DHT", GF_LOG_ERROR, 0, DHT_MSG_INODE_LK_ERROR,
+ "mknod lock failed for file: %s", local->loc2.name);
+
+ local->op_errno = op_errno;
+
+ goto err;
+ }
+
+ local->refresh_layout_unlock = dht_mknod_finish;
+
+ local->refresh_layout_done = dht_mknod_do;
+
+ dht_refresh_layout(frame);
+
+ return 0;
+err:
+ if (local)
+ dht_mknod_finish(frame, this, -1, 0);
+ else
+ DHT_STACK_UNWIND(mknod, frame, -1, EINVAL, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
+}
+
+static int32_t
+dht_mknod_lock(call_frame_t *frame, xlator_t *subvol)
{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- int op_errno = -1;
- int i = -1;
+ dht_local_t *local = NULL;
+ int count = 1, ret = -1;
+ dht_lock_t **lk_array = NULL;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
- VALIDATE_OR_GOTO (this->private, err);
+ GF_VALIDATE_OR_GOTO("dht", frame, err);
+ GF_VALIDATE_OR_GOTO(frame->this->name, frame->local, err);
- conf = this->private;
+ local = frame->local;
- local = dht_local_init (frame, NULL, NULL, GF_FOP_FSYNCDIR);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ lk_array = GF_CALLOC(count, sizeof(*lk_array), gf_common_mt_pointer);
- local->fd = fd_ref (fd);
- local->call_cnt = conf->subvolume_cnt;
+ if (lk_array == NULL)
+ goto err;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- STACK_WIND (frame, dht_fsyncdir_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->fsyncdir,
- fd, datasync, xdata);
- }
+ lk_array[0] = dht_lock_new(frame->this, subvol, &local->loc, F_RDLCK,
+ DHT_LAYOUT_HEAL_DOMAIN, NULL,
+ IGNORE_ENOENT_ESTALE);
- return 0;
+ if (lk_array[0] == NULL)
+ goto err;
+
+ local->lock[0].layout.parent_layout.locks = lk_array;
+ local->lock[0].layout.parent_layout.lk_count = count;
+
+ ret = dht_blocking_inodelk(frame, lk_array, count, dht_mknod_lock_cbk);
+
+ if (ret < 0) {
+ local->lock[0].layout.parent_layout.locks = NULL;
+ local->lock[0].layout.parent_layout.lk_count = 0;
+ goto err;
+ }
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fsyncdir, frame, -1, op_errno, NULL);
+ if (lk_array != NULL) {
+ dht_lock_array_free(lk_array, count);
+ GF_FREE(lk_array);
+ }
- return 0;
+ return -1;
}
+static int
+dht_refresh_parent_layout_resume(call_frame_t *frame, xlator_t *this, int ret,
+ int invoke_cbk)
+{
+ dht_local_t *local = NULL, *parent_local = NULL;
+ call_stub_t *stub = NULL;
+ call_frame_t *parent_frame = NULL;
-int
-dht_newfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- inode_t *inode, struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+ local = frame->local;
+
+ stub = local->stub;
+ local->stub = NULL;
+
+ parent_frame = stub->frame;
+ parent_local = parent_frame->local;
+
+ if (ret < 0) {
+ parent_local->op_ret = -1;
+ parent_local->op_errno = local->op_errno ? local->op_errno : EIO;
+ } else {
+ parent_local->op_ret = 0;
+ }
+
+ call_resume(stub);
+
+ DHT_STACK_DESTROY(frame);
+
+ return 0;
+}
+
+static int
+dht_refresh_parent_layout_done(call_frame_t *frame)
{
- call_frame_t *prev = NULL;
- int ret = -1;
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
+ int ret = 0;
+ local = frame->local;
- if (op_ret == -1)
- goto out;
+ if (local->op_ret < 0) {
+ ret = -1;
+ goto resume;
+ }
- local = frame->local;
- if (!local) {
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
+ dht_layout_set(frame->this, local->loc.inode,
+ local->selfheal.refreshed_layout);
- prev = cookie;
+resume:
+ dht_refresh_parent_layout_resume(frame, frame->this, ret, 1);
+ return 0;
+}
- if (local->loc.parent) {
- WIPE (preparent);
- WIPE (postparent);
- }
+static int
+dht_handle_parent_layout_change(xlator_t *this, call_stub_t *stub)
+{
+ call_frame_t *refresh_frame = NULL, *frame = NULL;
+ dht_local_t *refresh_local = NULL, *local = NULL;
- ret = dht_layout_preset (this, prev->this, inode);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "could not set pre-set layout for subvolume %s",
- prev->this->name);
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
-out:
- /*
- * FIXME: ia_size and st_blocks of preparent and postparent do not have
- * correct values. since, preparent and postparent buffers correspond
- * to a directory these two members should have values equal to sum of
- * corresponding values from each of the subvolume.
- * See dht_iatt_merge for reference.
- */
+ frame = stub->frame;
+ local = frame->local;
- DHT_STRIP_PHASE1_FLAGS (stbuf);
- DHT_STACK_UNWIND (mknod, frame, op_ret, op_errno, inode, stbuf,
- preparent, postparent, xdata);
- return 0;
+ refresh_frame = copy_frame(frame);
+ if (!refresh_frame) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
+ "mem allocation failed for refresh_frame");
+ return -1;
+ }
+
+ refresh_local = dht_local_init(refresh_frame, NULL, NULL, stub->fop);
+ if (!refresh_local) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
+ "mem allocation failed for refresh_local");
+ return -1;
+ }
+
+ refresh_local->loc.inode = inode_ref(local->loc.parent);
+ gf_uuid_copy(refresh_local->loc.gfid, local->loc.parent->gfid);
+
+ refresh_local->stub = stub;
+
+ refresh_local->refresh_layout_unlock = dht_refresh_parent_layout_resume;
+ refresh_local->refresh_layout_done = dht_refresh_parent_layout_done;
+
+ dht_refresh_layout(refresh_frame);
+ return 0;
}
-int
-dht_mknod_linkfile_create_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
+static int32_t
+dht_call_mkdir_stub(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- dht_local_t *local = NULL;
- xlator_t *cached_subvol = NULL;
+ dht_local_t *local = NULL;
+ call_stub_t *stub = NULL;
- if (op_ret == -1)
- goto err;
+ local = frame->local;
+ stub = local->stub;
+ local->stub = NULL;
- local = frame->local;
- cached_subvol = local->cached_subvol;
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ } else {
+ local->op_ret = 0;
+ }
- STACK_WIND (frame, dht_newfile_cbk,
- cached_subvol, cached_subvol->fops->mknod,
- &local->loc, local->mode, local->rdev, local->umask,
- local->params);
+ call_resume(stub);
- return 0;
+ return 0;
+}
+
+static int32_t
+dht_guard_parent_layout_and_namespace(xlator_t *subvol, call_stub_t *stub)
+{
+ dht_local_t *local = NULL;
+ int ret = -1;
+ loc_t *loc = NULL;
+ xlator_t *hashed_subvol = NULL, *this = NULL;
+ ;
+ call_frame_t *frame = NULL;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+ int32_t *parent_disk_layout = NULL;
+ dht_layout_t *parent_layout = NULL;
+ dht_conf_t *conf = NULL;
+
+ GF_VALIDATE_OR_GOTO("dht", stub, err);
+
+ frame = stub->frame;
+ this = frame->this;
+
+ conf = this->private;
+
+ local = frame->local;
+
+ local->stub = stub;
+
+ /* TODO: recheck whether we should lock on src or dst if we do similar
+ * stale layout checks for rename.
+ */
+ loc = &stub->args.loc;
+
+ gf_uuid_unparse(loc->parent->gfid, pgfid);
+
+ if (local->params == NULL) {
+ local->params = dict_new();
+ if (local->params == NULL) {
+ local->op_errno = ENOMEM;
+ gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "%s (%s/%s) (path: %s): "
+ "dict allocation failed",
+ gf_fop_list[stub->fop], pgfid, loc->name, loc->path);
+ goto err;
+ }
+ }
+
+ hashed_subvol = dht_subvol_get_hashed(this, loc);
+ if (hashed_subvol == NULL) {
+ local->op_errno = EINVAL;
+
+ gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "%s (%s/%s) (path: %s): "
+ "hashed subvolume not found",
+ gf_fop_list[stub->fop], pgfid, loc->name, loc->path);
+ goto err;
+ }
+
+ parent_layout = dht_layout_get(this, loc->parent);
+
+ ret = dht_disk_layout_extract_for_subvol(this, parent_layout, hashed_subvol,
+ &parent_disk_layout);
+ if (ret == -1) {
+ local->op_errno = EINVAL;
+ gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "%s (%s/%s) (path: %s): "
+ "extracting in-memory layout of parent failed. ",
+ gf_fop_list[stub->fop], pgfid, loc->name, loc->path);
+ goto err;
+ }
+
+ memcpy((void *)local->parent_disk_layout, (void *)parent_disk_layout,
+ sizeof(local->parent_disk_layout));
+
+ dht_layout_unref(this, parent_layout);
+ parent_layout = NULL;
+
+ ret = dict_set_str(local->params, GF_PREOP_PARENT_KEY, conf->xattr_name);
+ if (ret < 0) {
+ local->op_errno = -ret;
+ gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "%s (%s/%s) (path: %s): "
+ "setting %s key in params dictionary failed. ",
+ gf_fop_list[stub->fop], pgfid, loc->name, loc->path,
+ GF_PREOP_PARENT_KEY);
+ goto err;
+ }
+
+ ret = dict_set_bin(local->params, conf->xattr_name, parent_disk_layout,
+ 4 * 4);
+ if (ret < 0) {
+ local->op_errno = -ret;
+ gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "%s (%s/%s) (path: %s): "
+ "setting parent-layout in params dictionary failed. ",
+ gf_fop_list[stub->fop], pgfid, loc->name, loc->path);
+ goto err;
+ }
+
+ parent_disk_layout = NULL;
+ local->hashed_subvol = hashed_subvol;
+
+ local->current = &local->lock[0];
+ ret = dht_protect_namespace(frame, loc, hashed_subvol, &local->current->ns,
+ dht_call_mkdir_stub);
+ if (ret < 0)
+ goto err;
+
+ return 0;
err:
- DHT_STACK_UNWIND (mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL,
- NULL);
- return 0;
+
+ if (parent_disk_layout != NULL)
+ GF_FREE(parent_disk_layout);
+
+ if (parent_layout != NULL)
+ dht_layout_unref(this, parent_layout);
+
+ return -1;
}
int
-dht_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t rdev, mode_t umask, dict_t *params)
+dht_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *params)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- xlator_t *avail_subvol = NULL;
- dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ int i = 0;
+ int ret = 0;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+
+ conf = this->private;
+
+ dht_get_du_info(frame, this, loc);
+
+ local = dht_local_init(frame, loc, NULL, GF_FOP_MKNOD);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = dht_subvol_get_hashed(this, loc);
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no subvolume in layout for path=%s",
+ loc->path);
+ op_errno = EIO;
+ goto err;
+ }
+
+ /* Post remove-brick, the client layout may not be in sync with
+ * disk layout because of lack of lookup. Hence,a mknod call
+ * may fall on the decommissioned brick. Hence, if the
+ * hashed_subvol is part of decommissioned bricks list, do a
+ * lookup on parent dir. If a fix-layout is already done by the
+ * remove-brick process, the parent directory layout will be in
+ * sync with that of the disk. If fix-layout is still ending
+ * on the parent directory, we can let the file get created on
+ * the decommissioned brick which will be eventually migrated to
+ * non-decommissioned brick based on the new layout.
+ */
+
+ if (conf->decommission_subvols_cnt) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->decommissioned_bricks[i] &&
+ conf->decommissioned_bricks[i] == subvol) {
+ gf_msg_debug(this->name, 0,
+ "hashed subvol:%s is "
+ "part of decommission brick list for "
+ "file: %s",
+ subvol->name, loc->path);
+
+ /* dht_refresh_layout needs directory info in
+ * local->loc. Hence, storing the parent_loc in
+ * local->loc and storing the create context in
+ * local->loc2. We will restore this information
+ * in dht_creation do */
+
+ ret = loc_copy(&local->loc2, &local->loc);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
+ "loc_copy failed %s", loc->path);
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
+ goto err;
+ }
- dht_get_du_info (frame, this, loc);
+ local->params = dict_ref(params);
+ local->rdev = rdev;
+ local->mode = mode;
+ local->umask = umask;
- local = dht_local_init (frame, loc, NULL, GF_FOP_MKNOD);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ loc_wipe(&local->loc);
- subvol = dht_subvol_get_hashed (this, loc);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s",
- loc->path);
- op_errno = ENOENT;
- goto err;
- }
+ ret = dht_build_parent_loc(this, &local->loc, loc, &op_errno);
- if (!dht_is_subvol_filled (this, subvol)) {
- gf_log (this->name, GF_LOG_TRACE,
- "creating %s on %s", loc->path, subvol->name);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_LOC_FAILED,
+ "parent loc build failed");
+ goto err;
+ }
- STACK_WIND (frame, dht_newfile_cbk,
- subvol, subvol->fops->mknod,
- loc, mode, rdev, umask, params);
- } else {
- avail_subvol = dht_free_disk_available_subvol (this, subvol);
- if (avail_subvol != subvol) {
- /* Choose the minimum filled volume, and create the
- files there */
-
- local->params = dict_ref (params);
- local->cached_subvol = avail_subvol;
- local->mode = mode;
- local->rdev = rdev;
- local->umask = umask;
- dht_linkfile_create (frame,
- dht_mknod_linkfile_create_cbk,
- avail_subvol, subvol, loc);
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "creating %s on %s", loc->path, subvol->name);
+ ret = dht_mknod_lock(frame, subvol);
- STACK_WIND (frame, dht_newfile_cbk,
- subvol, subvol->fops->mknod,
- loc, mode, rdev, umask, params);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INODE_LK_ERROR,
+ "locking parent failed");
+ goto err;
}
+
+ goto done;
+ }
}
+ }
- return 0;
+ dht_mknod_wind_to_avail_subvol(frame, this, subvol, loc, rdev, mode, umask,
+ params);
+
+done:
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (mknod, frame, -1, op_errno,
- NULL, NULL, NULL, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
- return 0;
+ return 0;
}
+int
+dht_symlink(call_frame_t *frame, xlator_t *this, const char *linkname,
+ loc_t *loc, mode_t umask, dict_t *params)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+
+ local = dht_local_init(frame, loc, NULL, GF_FOP_SYMLINK);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = dht_subvol_get_hashed(this, loc);
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no subvolume in layout for path=%s",
+ loc->path);
+ op_errno = EIO;
+ goto err;
+ }
+
+ gf_msg_trace(this->name, 0, "creating %s on %s", loc->path, subvol->name);
+
+ STACK_WIND_COOKIE(frame, dht_newfile_cbk, (void *)subvol, subvol,
+ subvol->fops->symlink, linkname, loc, umask, params);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(link, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
+
+ return 0;
+}
int
-dht_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkname, loc_t *loc, mode_t umask, dict_t *params)
+dht_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
+ xlator_t *cached_subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+
+ local = dht_local_init(frame, loc, NULL, GF_FOP_UNLINK);
+ if (!local) {
+ op_errno = ENOMEM;
+
+ goto err;
+ }
+
+ cached_subvol = local->cached_subvol;
+ if (!cached_subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for path=%s",
+ loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->flags = xflag;
+ STACK_WIND_COOKIE(frame, dht_unlink_cbk, cached_subvol, cached_subvol,
+ cached_subvol->fops->unlink, loc, xflag, xdata);
+
+ return 0;
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(unlink, frame, -1, op_errno, NULL, NULL, NULL);
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
+ return 0;
+}
- local = dht_local_init (frame, loc, NULL, GF_FOP_SYMLINK);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+static int
+dht_remove_stale_linkto_cbk(int ret, call_frame_t *sync_frame, void *data)
+{
+ DHT_STACK_DESTROY(sync_frame);
+ return 0;
+}
+
+static int
+dht_remove_stale_linkto(void *data)
+{
+ call_frame_t *frame = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *this = NULL;
+ dict_t *xdata_in = NULL;
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO("dht", data, out);
+
+ frame = data;
+ local = frame->local;
+ this = frame->this;
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", local, out);
+ GF_VALIDATE_OR_GOTO("dht", local->link_subvol, out);
+
+ xdata_in = dict_new();
+ if (!xdata_in)
+ goto out;
+
+ ret = dht_fill_dict_to_avoid_unlink_of_migrating_file(xdata_in);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, 0,
+ "Failed to set keys for stale linkto"
+ "deletion on path %s",
+ local->loc.path);
+ goto out;
+ }
+
+ ret = syncop_unlink(local->link_subvol, &local->loc, xdata_in, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, 0,
+ "Removal of linkto failed"
+ " on path %s at subvol %s",
+ local->loc.path, local->link_subvol->name);
+ }
+out:
+ if (xdata_in)
+ dict_unref(xdata_in);
+ return ret;
+}
- subvol = dht_subvol_get_hashed (this, loc);
+static int
+dht_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int ret = -1;
+ gf_boolean_t stbuf_merged = _gf_false;
+ xlator_t *subvol = NULL;
+ call_frame_t *cleanup_frame = NULL;
+ dht_local_t *cleanup_local = NULL;
+
+ local = frame->local;
+
+ if (op_ret == -1) {
+ /* Remove the linkto if exists */
+ if (local->linked) {
+ cleanup_frame = create_frame(this, this->ctx->pool);
+ if (cleanup_frame) {
+ cleanup_local = dht_local_init(cleanup_frame, &local->loc2,
+ NULL, 0);
+ if (!cleanup_local || !local->link_subvol) {
+ DHT_STACK_DESTROY(cleanup_frame);
+ goto out;
+ }
+ cleanup_local->link_subvol = local->link_subvol;
+ FRAME_SU_DO(cleanup_frame, dht_local_t);
+ ret = synctask_new(this->ctx->env, dht_remove_stale_linkto,
+ dht_remove_stale_linkto_cbk, cleanup_frame,
+ cleanup_frame);
+ }
+ }
+ /* No continuation on DHT inode missing errors, as we should
+ * then have a good stbuf that states P2 happened. We would
+ * get inode missing if, the file completed migrated between
+ * the lookup and the link call */
+ goto out;
+ }
+
+ /* Update parent on success, even if P1/2 checks are positive.
+ * The second call on success will further update the parent */
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update(local->loc.parent, this, preparent, 0);
+ dht_inode_ctx_time_update(local->loc.parent, this, postparent, 1);
+ }
+
+ /* Update linkto attrs, if this is the first call and non-P2,
+ * if we detect P2 then we need to trust the attrs from the
+ * second call, not the first */
+ if (local->linked == _gf_true &&
+ ((local->call_cnt == 1 && !IS_DHT_MIGRATION_PHASE2(stbuf)) ||
+ (local->call_cnt != 1 && IS_DHT_MIGRATION_PHASE2(&local->stbuf)))) {
+ dht_iatt_merge(this, &local->stbuf, stbuf);
+ stbuf_merged = _gf_true;
+ dht_linkfile_attr_heal(frame, this);
+ }
+
+ /* No further P1/2 checks if we are in the second iteration of
+ * the call */
+ if (local->call_cnt != 1) {
+ goto out;
+ } else {
+ /* Preserve the return values, in case the migration decides
+ * to recreate the link on the same subvol that the current
+ * hased for the link was created on. */
+ dht_iatt_merge(this, &local->preparent, preparent);
+ dht_iatt_merge(this, &local->postparent, postparent);
+ if (!stbuf_merged) {
+ dht_iatt_merge(this, &local->stbuf, stbuf);
+ stbuf_merged = _gf_true;
+ }
+
+ local->inode = inode_ref(inode);
+ }
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ local->rebalance.target_op_fn = dht_link2;
+ dht_set_local_rebalance(this, local, stbuf, preparent, postparent, xdata);
+
+ /* Check if the rebalance phase2 is true */
+ if (IS_DHT_MIGRATION_PHASE2(stbuf)) {
+ ret = dht_inode_ctx_get_mig_info(this, local->loc.inode, NULL, &subvol);
if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s",
- loc->path);
- op_errno = ENOENT;
- goto err;
- }
+ /* Phase 2 of migration */
+ ret = dht_rebalance_complete_check(this, frame);
+ if (!ret)
+ return 0;
+ } else {
+ dht_link2(this, subvol, frame, 0);
+ return 0;
+ }
+ }
+
+ /* Check if the rebalance phase1 is true */
+ if (IS_DHT_MIGRATION_PHASE1(stbuf)) {
+ ret = dht_inode_ctx_get_mig_info(this, local->loc.inode, NULL, &subvol);
+ if (subvol) {
+ dht_link2(this, subvol, frame, 0);
+ return 0;
+ }
+ ret = dht_rebalance_in_progress_check(this, frame);
+ if (!ret)
+ return 0;
+ }
+out:
+ DHT_STRIP_PHASE1_FLAGS(stbuf);
- gf_log (this->name, GF_LOG_TRACE,
- "creating %s on %s", loc->path, subvol->name);
+ dht_set_fixed_dir_stat(preparent);
+ dht_set_fixed_dir_stat(postparent);
+ DHT_STACK_UNWIND(link, frame, op_ret, op_errno, inode, stbuf, preparent,
+ postparent, NULL);
- STACK_WIND (frame, dht_newfile_cbk,
- subvol, subvol->fops->symlink,
- linkname, loc, umask, params);
+ return 0;
+}
+
+static int
+dht_link2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
+{
+ dht_local_t *local = NULL;
+ int op_errno = EINVAL;
+
+ local = frame->local;
+ if (!local)
+ goto err;
+
+ op_errno = local->op_errno;
+
+ if (we_are_not_migrating(ret)) {
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
+ */
+ dht_set_fixed_dir_stat(&local->preparent);
+ dht_set_fixed_dir_stat(&local->postparent);
+
+ DHT_STACK_UNWIND(link, frame, local->op_ret, op_errno, local->inode,
+ &local->stbuf, &local->preparent, &local->postparent,
+ NULL);
+ return 0;
+ }
+
+ if (subvol == NULL) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ /* Second call to create link file could result in EEXIST as the
+ * first call created the linkto in the currently
+ * migrating subvol, which could be the new hashed subvol */
+ if (local->link_subvol == subvol) {
+ DHT_STRIP_PHASE1_FLAGS(&local->stbuf);
+ dht_set_fixed_dir_stat(&local->preparent);
+ dht_set_fixed_dir_stat(&local->postparent);
+ DHT_STACK_UNWIND(link, frame, 0, 0, local->inode, &local->stbuf,
+ &local->preparent, &local->postparent, NULL);
return 0;
+ }
+
+ local->call_cnt = 2;
+ STACK_WIND(frame, dht_link_cbk, subvol, subvol->fops->link, &local->loc,
+ &local->loc2, local->xattr_req);
+
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (link, frame, -1, op_errno,
- NULL, NULL, NULL, NULL, NULL);
+ DHT_STACK_UNWIND(link, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
- return 0;
+ return 0;
}
-
-int
-dht_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
- dict_t *xdata)
+static int
+dht_link_linkfile_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- xlator_t *cached_subvol = NULL;
- xlator_t *hashed_subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
-
- if (dht_filter_loc_subvol_key (this, loc, &local->loc,
- &cached_subvol)) {
- gf_log (this->name, GF_LOG_INFO,
- "unlinking %s on %s (given path %s)",
- local->loc.path, cached_subvol->name, loc->path);
- STACK_WIND (frame, dht_unlink_cbk,
- cached_subvol, cached_subvol->fops->unlink,
- &local->loc, xflag, xdata);
- goto done;
- }
+ dht_local_t *local = NULL;
+ xlator_t *srcvol = NULL;
- local = dht_local_init (frame, loc, NULL, GF_FOP_UNLINK);
- if (!local) {
- op_errno = ENOMEM;
+ if (op_ret == -1)
+ goto err;
- goto err;
- }
+ local = frame->local;
+ srcvol = local->linkfile.srcvol;
- hashed_subvol = dht_subvol_get_hashed (this, loc);
- if (!hashed_subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s",
- loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ STACK_WIND(frame, dht_link_cbk, srcvol, srcvol->fops->link, &local->loc,
+ &local->loc2, local->xattr_req);
- cached_subvol = local->cached_subvol;
- if (!cached_subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ return 0;
- local->flags = xflag;
- if (hashed_subvol != cached_subvol) {
- STACK_WIND (frame, dht_unlink_linkfile_cbk,
- hashed_subvol, hashed_subvol->fops->unlink, loc,
- xflag, xdata);
- } else {
- STACK_WIND (frame, dht_unlink_cbk,
- cached_subvol, cached_subvol->fops->unlink, loc,
- xflag, xdata);
- }
-done:
- return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL);
+ DHT_STRIP_PHASE1_FLAGS(stbuf);
+ dht_set_fixed_dir_stat(preparent);
+ dht_set_fixed_dir_stat(postparent);
+ DHT_STACK_UNWIND(link, frame, op_ret, op_errno, inode, stbuf, preparent,
+ postparent, xdata);
- return 0;
+ return 0;
}
+int
+dht_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ xlator_t *cached_subvol = NULL;
+ xlator_t *hashed_subvol = NULL;
+ int op_errno = -1;
+ int ret = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(oldloc, err);
+ VALIDATE_OR_GOTO(newloc, err);
+
+ local = dht_local_init(frame, oldloc, NULL, GF_FOP_LINK);
+ if (!local) {
+ op_errno = ENOMEM;
+
+ goto err;
+ }
+ local->call_cnt = 1;
+
+ cached_subvol = local->cached_subvol;
+ if (!cached_subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for path=%s",
+ oldloc->path);
+ op_errno = ENOENT;
+ goto err;
+ }
+
+ hashed_subvol = dht_subvol_get_hashed(this, newloc);
+ if (!hashed_subvol) {
+ gf_msg_debug(this->name, 0, "no subvolume in layout for path=%s",
+ newloc->path);
+ op_errno = EIO;
+ goto err;
+ }
+
+ ret = loc_copy(&local->loc2, newloc);
+ if (ret == -1) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
+ if (hashed_subvol != cached_subvol) {
+ gf_uuid_copy(local->gfid, oldloc->inode->gfid);
+ dht_linkfile_create(frame, dht_link_linkfile_cbk, this, cached_subvol,
+ hashed_subvol, newloc);
+ } else {
+ STACK_WIND(frame, dht_link_cbk, cached_subvol,
+ cached_subvol->fops->link, oldloc, newloc, xdata);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(link, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
+
+ return 0;
+}
int
-dht_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- inode_t *inode, struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+dht_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, fd_t *fd, inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
- call_frame_t *prev = NULL;
- dht_layout_t *layout = NULL;
+ xlator_t *prev = NULL;
+ int ret = -1;
+ dht_local_t *local = NULL;
+ gf_boolean_t parent_layout_changed = _gf_false;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+ xlator_t *subvol = NULL;
+
+ local = frame->local;
+
+ local = frame->local;
+ if (!local) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ parent_layout_changed = (xdata &&
+ dict_get(xdata, GF_PREOP_CHECK_FAILED))
+ ? _gf_true
+ : _gf_false;
+
+ if (parent_layout_changed) {
+ if (local && local->lock[0].layout.parent_layout.locks) {
+ /* Returning failure as the layout could not be fixed even under
+ * the lock */
+ goto out;
+ }
- prev = cookie;
+ gf_uuid_unparse(local->loc.parent->gfid, pgfid);
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "create (%s/%s) (path: %s): parent layout "
+ "changed. Attempting a layout refresh and then a "
+ "retry",
+ pgfid, local->loc.name, local->loc.path);
+
+ /*
+ dht_refresh_layout needs directory info in local->loc.Hence,
+ storing the parent_loc in local->loc and storing the create
+ context in local->loc2. We will restore this information in
+ dht_creation_do.
+ */
+
+ loc_wipe(&local->loc2);
+
+ ret = loc_copy(&local->loc2, &local->loc);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
+ "loc_copy failed %s", local->loc.path);
- if (op_ret == -1)
goto out;
+ }
- layout = dht_layout_for_subvol (this, prev->this);
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no pre-set layout for subvolume %s",
- prev->this->name);
- op_ret = -1;
- op_errno = EINVAL;
+ loc_wipe(&local->loc);
+
+ ret = dht_build_parent_loc(this, &local->loc, &local->loc2,
+ &op_errno);
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_LOC_FAILED,
+ "parent loc build failed");
goto out;
+ }
+
+ subvol = dht_subvol_get_hashed(this, &local->loc2);
+
+ ret = dht_create_lock(frame, subvol);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INODE_LK_ERROR,
+ "locking parent failed");
+ goto out;
+ }
+
+ return 0;
}
- WIPE (preparent);
- WIPE (postparent);
+ goto out;
+ }
-out:
- DHT_STRIP_PHASE1_FLAGS (stbuf);
- DHT_STACK_UNWIND (link, frame, op_ret, op_errno, inode, stbuf, preparent,
- postparent, NULL);
+ prev = cookie;
- return 0;
-}
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update(local->loc.parent, this, preparent, 0);
+ dht_inode_ctx_time_update(local->loc.parent, this, postparent, 1);
+ }
-int
-dht_link_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
-{
- dht_local_t *local = NULL;
- xlator_t *srcvol = NULL;
+ ret = dht_fd_ctx_set(this, fd, prev);
+ if (ret != 0) {
+ gf_msg_debug(this->name, 0,
+ "Possible fd leak. "
+ "Could not set fd ctx for subvol %s",
+ prev->name);
+ }
- if (op_ret == -1)
- goto err;
+ ret = dht_layout_preset(this, prev, inode);
+ if (ret != 0) {
+ gf_msg_debug(this->name, 0, "could not set preset layout for subvol %s",
+ prev->name);
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
- local = frame->local;
- srcvol = local->linkfile.srcvol;
+ local->op_errno = op_errno;
- STACK_WIND (frame, dht_link_cbk, srcvol, srcvol->fops->link,
- &local->loc, &local->loc2, xdata);
+ if (local->linked == _gf_true) {
+ local->stbuf = *stbuf;
+ dht_linkfile_attr_heal(frame, this);
+ }
+out:
- return 0;
+ DHT_STRIP_PHASE1_FLAGS(stbuf);
+ dht_set_fixed_dir_stat(preparent);
+ dht_set_fixed_dir_stat(postparent);
-err:
- DHT_STRIP_PHASE1_FLAGS (stbuf);
- DHT_STACK_UNWIND (link, frame, op_ret, op_errno, inode, stbuf, preparent,
- postparent, NULL);
+ if (local && local->lock[0].layout.parent_layout.locks) {
+ /* store op_errno for failure case*/
+ local->op_errno = op_errno;
+ local->refresh_layout_unlock(frame, this, op_ret, 1);
- return 0;
+ if (op_ret == 0) {
+ DHT_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, stbuf,
+ preparent, postparent, xdata);
+ }
+ } else {
+ DHT_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, stbuf,
+ preparent, postparent, xdata);
+ }
+ return 0;
}
+static int
+dht_create_linkfile_create_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ xlator_t *cached_subvol = NULL;
+ dht_conf_t *conf = NULL;
+
+ local = frame->local;
+ if (!local) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ goto err;
+ }
+
+ conf = this->private;
+ if (!conf) {
+ local->op_errno = EINVAL;
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ cached_subvol = local->cached_subvol;
+
+ if (local->params) {
+ dict_del(local->params, conf->link_xattr_name);
+ dict_del(local->params, GLUSTERFS_INTERNAL_FOP_KEY);
+ }
+
+ STACK_WIND_COOKIE(frame, dht_create_cbk, cached_subvol, cached_subvol,
+ cached_subvol->fops->create, &local->loc, local->flags,
+ local->mode, local->umask, local->fd, local->params);
+
+ return 0;
+err:
+ if (local && local->lock[0].layout.parent_layout.locks) {
+ local->refresh_layout_unlock(frame, this, -1, 1);
+ } else {
+ DHT_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL, NULL);
+ }
+ return 0;
+}
-int
-dht_link (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+static int
+dht_create_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this,
+ xlator_t *subvol, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd,
+ dict_t *params)
{
- xlator_t *cached_subvol = NULL;
- xlator_t *hashed_subvol = NULL;
- int op_errno = -1;
- int ret = -1;
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *avail_subvol = NULL;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (oldloc, err);
- VALIDATE_OR_GOTO (newloc, err);
+ local = frame->local;
- local = dht_local_init (frame, oldloc, NULL, GF_FOP_LINK);
- if (!local) {
- op_errno = ENOMEM;
+ if (!dht_is_subvol_filled(this, subvol)) {
+ gf_msg_debug(this->name, 0, "creating %s on %s", loc->path,
+ subvol->name);
- goto err;
- }
+ dht_set_parent_layout_in_dict(loc, this, local);
- cached_subvol = local->cached_subvol;
- if (!cached_subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", oldloc->path);
- op_errno = EINVAL;
- goto err;
- }
+ STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol,
+ subvol->fops->create, loc, flags, mode, umask, fd,
+ params);
- hashed_subvol = dht_subvol_get_hashed (this, newloc);
- if (!hashed_subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s",
- newloc->path);
- op_errno = EINVAL;
- goto err;
- }
+ } else {
+ avail_subvol = dht_free_disk_available_subvol(this, subvol, local);
- ret = loc_copy (&local->loc2, newloc);
- if (ret == -1) {
- op_errno = ENOMEM;
- goto err;
- }
+ if (avail_subvol != subvol) {
+ local->cached_subvol = avail_subvol;
+ local->hashed_subvol = subvol;
- if (hashed_subvol != cached_subvol) {
- uuid_copy (local->gfid, oldloc->inode->gfid);
- dht_linkfile_create (frame, dht_link_linkfile_cbk,
- cached_subvol, hashed_subvol, newloc);
- } else {
- STACK_WIND (frame, dht_link_cbk,
- cached_subvol, cached_subvol->fops->link,
- oldloc, newloc, xdata);
+ gf_msg_debug(this->name, 0, "creating %s on %s (link at %s)",
+ loc->path, avail_subvol->name, subvol->name);
+
+ dht_linkfile_create(frame, dht_create_linkfile_create_cbk, this,
+ avail_subvol, subvol, loc);
+
+ goto out;
}
- return 0;
+ gf_msg_debug(this->name, 0, "creating %s on %s", loc->path,
+ subvol->name);
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
+ dht_set_parent_layout_in_dict(loc, this, local);
- return 0;
+ STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol,
+ subvol->fops->create, loc, flags, mode, umask, fd,
+ params);
+ }
+out:
+ return 0;
}
-
int
-dht_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- fd_t *fd, inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+dht_build_parent_loc(xlator_t *this, loc_t *parent, loc_t *child,
+ int32_t *op_errno)
{
- call_frame_t *prev = NULL;
- int ret = -1;
- dht_local_t *local = NULL;
+ inode_table_t *table = NULL;
+ int ret = -1;
- if (op_ret == -1)
- goto out;
+ if (!parent || !child) {
+ if (op_errno)
+ *op_errno = EINVAL;
+ goto out;
+ }
- local = frame->local;
- if (!local) {
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
+ if (child->parent) {
+ parent->inode = inode_ref(child->parent);
+ if (!parent->inode) {
+ if (op_errno)
+ *op_errno = EINVAL;
+ goto out;
}
- prev = cookie;
+ gf_uuid_copy(parent->gfid, child->pargfid);
- if (local->loc.parent) {
- WIPE (preparent);
- WIPE (postparent);
+ ret = 0;
+
+ goto out;
+ } else {
+ if (gf_uuid_is_null(child->pargfid)) {
+ if (op_errno)
+ *op_errno = EINVAL;
+ goto out;
}
- ret = dht_layout_preset (this, prev->this, inode);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "could not set preset layout for subvol %s",
- prev->this->name);
- op_ret = -1;
- op_errno = EINVAL;
+ table = this->itable;
+
+ if (!table) {
+ if (op_errno) {
+ *op_errno = EINVAL;
+ goto out;
+ }
+ }
+
+ parent->inode = inode_find(table, child->pargfid);
+
+ if (!parent->inode) {
+ if (op_errno) {
+ *op_errno = EINVAL;
goto out;
+ }
}
+ gf_uuid_copy(parent->gfid, child->pargfid);
+
+ ret = 0;
+ }
+
out:
- DHT_STRIP_PHASE1_FLAGS (stbuf);
- DHT_STACK_UNWIND (create, frame, op_ret, op_errno, fd, inode, stbuf, preparent,
- postparent, NULL);
- return 0;
+ return ret;
}
-
-int
-dht_create_linkfile_create_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
+static int32_t
+dht_create_do(call_frame_t *frame)
{
- dht_local_t *local = NULL;
- xlator_t *cached_subvol = NULL;
+ dht_local_t *local = NULL;
+ dht_layout_t *refreshed = NULL;
+ xlator_t *subvol = NULL;
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ dht_methods_t *methods = NULL;
- if (op_ret == -1)
- goto err;
+ local = frame->local;
- local = frame->local;
- cached_subvol = local->cached_subvol;
+ this = THIS;
- STACK_WIND (frame, dht_create_cbk,
- cached_subvol, cached_subvol->fops->create,
- &local->loc, local->flags, local->mode,
- local->umask, local->fd, local->params);
+ conf = this->private;
- return 0;
+ GF_VALIDATE_OR_GOTO(this->name, conf, err);
+
+ methods = &(conf->methods);
+
+ /* We don't need parent_loc anymore */
+ loc_wipe(&local->loc);
+
+ loc_copy(&local->loc, &local->loc2);
+
+ loc_wipe(&local->loc2);
+
+ refreshed = local->selfheal.refreshed_layout;
+
+ subvol = methods->layout_search(this, refreshed, local->loc.name);
+
+ if (!subvol) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "no subvolume in "
+ "layout for path=%s",
+ local->loc.path);
+ local->op_errno = ENOENT;
+ goto err;
+ }
+
+ dht_create_wind_to_avail_subvol(frame, this, subvol, &local->loc,
+ local->flags, local->mode, local->umask,
+ local->fd, local->params);
+ return 0;
err:
- DHT_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL,
- NULL, NULL, NULL);
+ local->refresh_layout_unlock(frame, this, -1, 1);
+
+ return 0;
+}
+
+static int32_t
+dht_create_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ DHT_STACK_DESTROY(frame);
+ return 0;
+}
+
+static int32_t
+dht_create_finish(call_frame_t *frame, xlator_t *this, int op_ret,
+ int invoke_cbk)
+{
+ dht_local_t *local = NULL, *lock_local = NULL;
+ call_frame_t *lock_frame = NULL;
+ int lock_count = 0;
+
+ local = frame->local;
+ lock_count = dht_lock_count(local->lock[0].layout.parent_layout.locks,
+ local->lock[0].layout.parent_layout.lk_count);
+ if (lock_count == 0)
+ goto done;
+
+ lock_frame = copy_frame(frame);
+ if (lock_frame == NULL) {
+ goto done;
+ }
+
+ lock_local = dht_local_init(lock_frame, &local->loc, NULL,
+ lock_frame->root->op);
+ if (lock_local == NULL) {
+ goto done;
+ }
+
+ lock_local->lock[0]
+ .layout.parent_layout.locks = local->lock[0].layout.parent_layout.locks;
+ lock_local->lock[0].layout.parent_layout.lk_count =
+ local->lock[0].layout.parent_layout.lk_count;
+
+ local->lock[0].layout.parent_layout.locks = NULL;
+ local->lock[0].layout.parent_layout.lk_count = 0;
+
+ dht_unlock_inodelk(lock_frame,
+ lock_local->lock[0].layout.parent_layout.locks,
+ lock_local->lock[0].layout.parent_layout.lk_count,
+ dht_create_unlock_cbk);
+ lock_frame = NULL;
+
+done:
+ if (lock_frame != NULL) {
+ DHT_STACK_DESTROY(lock_frame);
+ }
+
+ if (op_ret == 0)
return 0;
+
+ DHT_STACK_UNWIND(create, frame, op_ret, local->op_errno, NULL, NULL, NULL,
+ NULL, NULL, NULL);
+ return 0;
}
-int
-dht_create (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, mode_t mode,
- mode_t umask, fd_t *fd, dict_t *params)
+static int32_t
+dht_create_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- int op_errno = -1;
- xlator_t *subvol = NULL;
- dht_local_t *local = NULL;
- xlator_t *avail_subvol = NULL;
+ dht_local_t *local = NULL;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
+ local = frame->local;
- dht_get_du_info (frame, this, loc);
+ if (!local) {
+ goto err;
+ }
- local = dht_local_init (frame, loc, fd, GF_FOP_CREATE);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ if (op_ret < 0) {
+ gf_msg("DHT", GF_LOG_ERROR, 0, DHT_MSG_INODE_LK_ERROR,
+ "Create lock failed for file: %s", local->loc2.name);
- if (dht_filter_loc_subvol_key (this, loc, &local->loc,
- &subvol)) {
- gf_log (this->name, GF_LOG_INFO,
- "creating %s on %s (got create on %s)",
- local->loc.path, subvol->name, loc->path);
- STACK_WIND (frame, dht_create_cbk,
- subvol, subvol->fops->create,
- &local->loc, flags, mode, umask, fd, params);
- goto done;
- }
+ local->op_errno = op_errno;
- subvol = dht_subvol_get_hashed (this, loc);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s",
- loc->path);
- op_errno = ENOENT;
- goto err;
- }
+ goto err;
+ }
- if (!dht_is_subvol_filled (this, subvol)) {
- gf_log (this->name, GF_LOG_TRACE,
- "creating %s on %s", loc->path, subvol->name);
- STACK_WIND (frame, dht_create_cbk,
- subvol, subvol->fops->create,
- loc, flags, mode, umask, fd, params);
- goto done;
- }
- /* Choose the minimum filled volume, and create the
- files there */
- avail_subvol = dht_free_disk_available_subvol (this, subvol);
- if (avail_subvol != subvol) {
- local->params = dict_ref (params);
- local->flags = flags;
- local->mode = mode;
- local->umask = umask;
- local->cached_subvol = avail_subvol;
- local->hashed_subvol = subvol;
- gf_log (this->name, GF_LOG_TRACE,
- "creating %s on %s (link at %s)", loc->path,
- avail_subvol->name, subvol->name);
- dht_linkfile_create (frame,
- dht_create_linkfile_create_cbk,
- avail_subvol, subvol, loc);
- goto done;
- }
- gf_log (this->name, GF_LOG_TRACE,
- "creating %s on %s", loc->path, subvol->name);
- STACK_WIND (frame, dht_create_cbk,
- subvol, subvol->fops->create,
- loc, flags, mode, umask, fd, params);
-done:
- return 0;
+ local->refresh_layout_unlock = dht_create_finish;
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL,
- NULL, NULL, NULL);
+ local->refresh_layout_done = dht_create_do;
- return 0;
-}
+ dht_refresh_layout(frame);
+ return 0;
+err:
+ if (local)
+ dht_create_finish(frame, this, -1, 0);
+ else
+ DHT_STACK_UNWIND(create, frame, -1, EINVAL, NULL, NULL, NULL, NULL,
+ NULL, NULL);
+ return 0;
+}
-int
-dht_mkdir_selfheal_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+int32_t
+dht_create_lock(call_frame_t *frame, xlator_t *subvol)
{
- dht_local_t *local = NULL;
- dht_layout_t *layout = NULL;
+ dht_local_t *local = NULL;
+ int count = 1, ret = -1;
+ dht_lock_t **lk_array = NULL;
- local = frame->local;
- layout = local->selfheal.layout;
+ GF_VALIDATE_OR_GOTO("dht", frame, err);
+ GF_VALIDATE_OR_GOTO(frame->this->name, frame->local, err);
- if (op_ret == 0) {
- dht_layout_set (this, local->inode, layout);
- if (local->loc.parent) {
- WIPE (&local->preparent);
- WIPE (&local->postparent);
- }
- }
+ local = frame->local;
- DHT_STACK_UNWIND (mkdir, frame, op_ret, op_errno,
- local->inode, &local->stbuf, &local->preparent,
- &local->postparent, NULL);
+ lk_array = GF_CALLOC(count, sizeof(*lk_array), gf_common_mt_pointer);
- return 0;
+ if (lk_array == NULL)
+ goto err;
+
+ lk_array[0] = dht_lock_new(frame->this, subvol, &local->loc, F_RDLCK,
+ DHT_LAYOUT_HEAL_DOMAIN, NULL,
+ IGNORE_ENOENT_ESTALE);
+
+ if (lk_array[0] == NULL)
+ goto err;
+
+ local->lock[0].layout.parent_layout.locks = lk_array;
+ local->lock[0].layout.parent_layout.lk_count = count;
+
+ ret = dht_blocking_inodelk(frame, lk_array, count, dht_create_lock_cbk);
+
+ if (ret < 0) {
+ local->lock[0].layout.parent_layout.locks = NULL;
+ local->lock[0].layout.parent_layout.lk_count = 0;
+ goto err;
+ }
+
+ return 0;
+err:
+ if (lk_array != NULL) {
+ dht_lock_array_free(lk_array, count);
+ GF_FREE(lk_array);
+ }
+
+ return -1;
}
int
-dht_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+dht_set_parent_layout_in_dict(loc_t *loc, xlator_t *this, dht_local_t *local)
{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- int ret = -1;
- gf_boolean_t subvol_filled = _gf_false;
- call_frame_t *prev = NULL;
- dht_layout_t *layout = NULL;
+ dht_conf_t *conf = this->private;
+ dht_layout_t *parent_layout = NULL;
+ int *parent_disk_layout = NULL;
+ xlator_t *hashed_subvol = NULL;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+ int ret = 0;
+
+ gf_uuid_unparse(loc->parent->gfid, pgfid);
+
+ parent_layout = dht_layout_get(this, loc->parent);
+ hashed_subvol = dht_subvol_get_hashed(this, loc);
+
+ ret = dht_disk_layout_extract_for_subvol(this, parent_layout, hashed_subvol,
+ &parent_disk_layout);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "%s (%s/%s) (path: %s): "
+ "extracting in-memory layout of parent failed. ",
+ gf_fop_list[local->fop], pgfid, loc->name, loc->path);
+ goto err;
+ }
+
+ ret = dict_set_str_sizen(local->params, GF_PREOP_PARENT_KEY,
+ conf->xattr_name);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "%s (%s/%s) (path: %s): "
+ "setting %s key in params dictionary failed. ",
+ gf_fop_list[local->fop], pgfid, loc->name, loc->path,
+ GF_PREOP_PARENT_KEY);
+ goto err;
+ }
+
+ ret = dict_set_bin(local->params, conf->xattr_name, parent_disk_layout,
+ 4 * 4);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "%s (%s/%s) (path: %s): "
+ "setting parent-layout in params dictionary failed. ",
+ gf_fop_list[local->fop], pgfid, loc->name, loc->path);
+ goto err;
+ }
- local = frame->local;
- prev = cookie;
- layout = local->layout;
+err:
+ dht_layout_unref(this, parent_layout);
+ return ret;
+}
- subvol_filled = dht_is_subvol_filled (this, prev->this);
+int
+dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *params)
+{
+ int op_errno = -1;
+ xlator_t *subvol = NULL;
+ xlator_t *hashed_subvol = NULL;
+ dht_local_t *local = NULL;
+ int i = 0;
+ dht_conf_t *conf = NULL;
+ int ret = 0;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+
+ conf = this->private;
+
+ dht_get_du_info(frame, this, loc);
+
+ local = dht_local_init(frame, loc, fd, GF_FOP_CREATE);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->params = dict_ref(params);
+ local->flags = flags;
+ local->mode = mode;
+ local->umask = umask;
+
+ if (dht_filter_loc_subvol_key(this, loc, &local->loc, &subvol)) {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_INFO,
+ "creating %s on %s (got create on %s)", local->loc.path,
+ subvol->name, loc->path);
+
+ /* Since lookup-optimize is enabled by default, we need
+ * to create the linkto file if required.
+ * Note this does not check for decommisioned bricks
+ * and min-free-disk limits as this is a debugging tool
+ * and not expected to be used in production.
+ */
+ hashed_subvol = dht_subvol_get_hashed(this, &local->loc);
- LOCK (&frame->lock);
- {
- if (subvol_filled && (op_ret != -1)) {
- ret = dht_layout_merge (this, layout, prev->this,
- -1, ENOSPC, NULL);
- } else {
- ret = dht_layout_merge (this, layout, prev->this,
- op_ret, op_errno, NULL);
+ if (hashed_subvol && (hashed_subvol != subvol)) {
+ /* Create the linkto file and then the data file */
+ local->cached_subvol = subvol;
+ local->hashed_subvol = hashed_subvol;
+
+ dht_linkfile_create(frame, dht_create_linkfile_create_cbk, this,
+ subvol, hashed_subvol, &local->loc);
+ goto done;
+ }
+ /* We either don't have a hashed subvol or the hashed subvol is
+ * the same as the one specified. No need to create the linkto
+ * file as we expect a lookup everywhere if there are problems
+ * with the parent layout
+ */
+
+ dht_set_parent_layout_in_dict(loc, this, local);
+
+ STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol,
+ subvol->fops->create, &local->loc, flags, mode, umask,
+ fd, params);
+ goto done;
+ }
+
+ subvol = dht_subvol_get_hashed(this, loc);
+ if (!subvol) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "no subvolume in layout for path=%s", loc->path);
+
+ op_errno = EIO;
+ goto err;
+ }
+
+ /* Post remove-brick, the client layout may not be in sync with
+ * disk layout because of lack of lookup. Hence,a create call
+ * may fall on the decommissioned brick. Hence, if the
+ * hashed_subvol is part of decommissioned bricks list, do a
+ * lookup on parent dir. If a fix-layout is already done by the
+ * remove-brick process, the parent directory layout will be in
+ * sync with that of the disk. If fix-layout is still ending
+ * on the parent directory, we can let the file get created on
+ * the decommissioned brick which will be eventually migrated to
+ * non-decommissioned brick based on the new layout.
+ */
+
+ if (conf->decommission_subvols_cnt) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->decommissioned_bricks[i] &&
+ conf->decommissioned_bricks[i] == subvol) {
+ gf_msg_debug(this->name, 0,
+ "hashed subvol:%s is "
+ "part of decommission brick list for "
+ "file: %s",
+ subvol->name, loc->path);
+
+ /* dht_refresh_layout needs directory info in
+ * local->loc. Hence, storing the parent_loc in
+ * local->loc and storing the create context in
+ * local->loc2. We will restore this information
+ * in dht_creation do */
+
+ ret = loc_copy(&local->loc2, &local->loc);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
+ "loc_copy failed %s", loc->path);
+
+ goto err;
}
- if (ret)
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to merge layouts", local->loc.path);
- if (op_ret == -1) {
- local->op_errno = op_errno;
- goto unlock;
+ loc_wipe(&local->loc);
+
+ ret = dht_build_parent_loc(this, &local->loc, loc, &op_errno);
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_LOC_FAILED,
+ "parent loc build failed");
+ goto err;
}
- dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
- dht_iatt_merge (this, &local->preparent, preparent, prev->this);
- dht_iatt_merge (this, &local->postparent, postparent,
- prev->this);
- }
-unlock:
- UNLOCK (&frame->lock);
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- dht_selfheal_new_directory (frame, dht_mkdir_selfheal_cbk,
- layout);
+ ret = dht_create_lock(frame, subvol);
+
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INODE_LK_ERROR,
+ "locking parent failed");
+ goto err;
+ }
+
+ goto done;
+ }
}
+ }
- return 0;
+ dht_create_wind_to_avail_subvol(frame, this, subvol, loc, flags, mode,
+ umask, fd, params);
+done:
+ return 0;
+
+err:
+
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL,
+ NULL);
+
+ return 0;
}
-int
-dht_mkdir_hashed_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int op_ret, int op_errno,
- inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
-{
- dht_local_t *local = NULL;
- int ret = -1;
- call_frame_t *prev = NULL;
- dht_layout_t *layout = NULL;
- dht_conf_t *conf = NULL;
- int i = 0;
- xlator_t *hashed_subvol = NULL;
-
- VALIDATE_OR_GOTO (this->private, err);
+static int
+dht_mkdir_selfheal_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
- local = frame->local;
- prev = cookie;
- layout = local->layout;
- conf = this->private;
- hashed_subvol = local->hashed_subvol;
+ local = frame->local;
+ layout = local->selfheal.layout;
- if (uuid_is_null (local->loc.gfid) && !op_ret)
- uuid_copy (local->loc.gfid, stbuf->ia_gfid);
+ FRAME_SU_UNDO(frame, dht_local_t);
+ dht_set_fixed_dir_stat(&local->preparent);
+ dht_set_fixed_dir_stat(&local->postparent);
- if (dht_is_subvol_filled (this, hashed_subvol))
- ret = dht_layout_merge (this, layout, prev->this,
- -1, ENOSPC, NULL);
- else
- ret = dht_layout_merge (this, layout, prev->this,
- op_ret, op_errno, NULL);
+ if (op_ret == 0) {
+ dht_layout_set(this, local->inode, layout);
- /* TODO: we may have to return from the function
- if layout merge fails. For now, lets just log an error */
- if (ret)
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to merge layouts", local->loc.path);
+ dht_inode_ctx_time_update(local->inode, this, &local->stbuf, 1);
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update(local->loc.parent, this,
+ &local->preparent, 0);
- if (op_ret == -1) {
- local->op_errno = op_errno;
- goto err;
+ dht_inode_ctx_time_update(local->loc.parent, this,
+ &local->postparent, 1);
}
- local->op_ret = 0;
+ }
- dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
- dht_iatt_merge (this, &local->preparent, preparent, prev->this);
- dht_iatt_merge (this, &local->postparent, postparent, prev->this);
+ DHT_STACK_UNWIND(mkdir, frame, op_ret, op_errno, local->inode,
+ &local->stbuf, &local->preparent, &local->postparent,
+ NULL);
- local->call_cnt = conf->subvolume_cnt - 1;
+ return 0;
+}
- if (uuid_is_null (local->loc.gfid))
- uuid_copy (local->loc.gfid, stbuf->ia_gfid);
- if (local->call_cnt == 0) {
- dht_selfheal_directory (frame, dht_mkdir_selfheal_cbk,
- &local->loc, layout);
+static int
+dht_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ int ret = -1;
+ gf_boolean_t subvol_filled = _gf_false;
+ gf_boolean_t dir_exists = _gf_false;
+ xlator_t *prev = NULL;
+ dht_layout_t *layout = NULL;
+
+ local = frame->local;
+ prev = cookie;
+ layout = local->layout;
+
+ subvol_filled = dht_is_subvol_filled(this, prev);
+
+ LOCK(&frame->lock);
+ {
+ if (subvol_filled && (op_ret != -1)) {
+ ret = dht_layout_merge(this, layout, prev, -1, ENOSPC, NULL);
+ } else {
+ if (op_ret == -1 && op_errno == EEXIST) {
+ /* Very likely just a race between mkdir and
+ self-heal (from lookup of a concurrent mkdir
+ attempt).
+ Ignore error for now. layout setting will
+ anyways fail if this was a different (old)
+ pre-existing different directory.
+ */
+ op_ret = 0;
+ dir_exists = _gf_true;
+ }
+ ret = dht_layout_merge(this, layout, prev, op_ret, op_errno, NULL);
}
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (conf->subvolumes[i] == hashed_subvol)
- continue;
- STACK_WIND (frame, dht_mkdir_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->mkdir, &local->loc,
- local->mode, local->umask, local->params);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_MERGE_FAILED,
+ "%s: failed to merge layouts for subvol %s", local->loc.path,
+ prev->name);
+
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ goto unlock;
}
- return 0;
-err:
- DHT_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL,
- NULL, NULL);
- return 0;
+
+ if (dir_exists)
+ goto unlock;
+
+ dht_iatt_merge(this, &local->stbuf, stbuf);
+ dht_iatt_merge(this, &local->preparent, preparent);
+ dht_iatt_merge(this, &local->postparent, postparent);
+ }
+unlock:
+ UNLOCK(&frame->lock);
+
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt)) {
+ /*Unlock entrylk and inodelk once mkdir is done on all subvols*/
+ dht_unlock_namespace(frame, &local->lock[0]);
+ FRAME_SU_DO(frame, dht_local_t);
+ dht_selfheal_new_directory(frame, dht_mkdir_selfheal_cbk, layout);
+ }
+
+ return 0;
}
+static int
+dht_mkdir_hashed_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
- int
-dht_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, mode_t umask, dict_t *params)
+static int
+dht_mkdir_helper(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *params)
{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- int op_errno = -1;
- xlator_t *hashed_subvol = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int op_errno = -1, ret = -1;
+ xlator_t *hashed_subvol = NULL;
+ int32_t *parent_disk_layout = NULL;
+ dht_layout_t *parent_layout = NULL;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+ VALIDATE_OR_GOTO(loc->inode, err);
+ VALIDATE_OR_GOTO(loc->path, err);
+ VALIDATE_OR_GOTO(this->private, err);
+
+ gf_uuid_unparse(loc->parent->gfid, pgfid);
+
+ conf = this->private;
+ local = frame->local;
+
+ if (local->op_ret == -1) {
+ gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "mkdir (%s/%s) (path: %s): refreshing parent layout "
+ "failed.",
+ pgfid, loc->name, loc->path);
+
+ op_errno = local->op_errno;
+ goto err;
+ }
+
+ local->op_ret = -1;
+
+ hashed_subvol = dht_subvol_get_hashed(this, loc);
+ if (hashed_subvol == NULL) {
+ gf_msg_debug(this->name, 0,
+ "mkdir (%s/%s) (path: %s): hashed subvol not "
+ "found",
+ pgfid, loc->name, loc->path);
+ op_errno = ENOENT;
+ goto err;
+ }
+
+ local->hashed_subvol = hashed_subvol;
+
+ parent_layout = dht_layout_get(this, loc->parent);
+
+ ret = dht_disk_layout_extract_for_subvol(this, parent_layout, hashed_subvol,
+ &parent_disk_layout);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_WARNING, EIO, DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "mkdir (%s/%s) (path: %s): "
+ "extracting in-memory layout of parent failed. ",
+ pgfid, loc->name, loc->path);
+ goto err;
+ }
+
+ if (memcmp(local->parent_disk_layout, parent_disk_layout,
+ sizeof(local->parent_disk_layout)) == 0) {
+ gf_msg(this->name, GF_LOG_WARNING, EIO, DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "mkdir (%s/%s) (path: %s): loop detected. "
+ "parent layout didn't change even though "
+ "previous attempt of mkdir failed because of "
+ "in-memory layout not matching with that on disk.",
+ pgfid, loc->name, loc->path);
+ op_errno = EIO;
+ goto err;
+ }
+
+ memcpy((void *)local->parent_disk_layout, (void *)parent_disk_layout,
+ sizeof(local->parent_disk_layout));
+
+ dht_layout_unref(this, parent_layout);
+ parent_layout = NULL;
+
+ ret = dict_set_str(params, GF_PREOP_PARENT_KEY, conf->xattr_name);
+ if (ret < 0) {
+ local->op_errno = -ret;
+ gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "mkdir (%s/%s) (path: %s): "
+ "setting %s key in params dictionary failed. ",
+ pgfid, loc->name, loc->path, GF_PREOP_PARENT_KEY);
+ goto err;
+ }
+
+ ret = dict_set_bin(params, conf->xattr_name, parent_disk_layout, 4 * 4);
+ if (ret < 0) {
+ local->op_errno = -ret;
+ gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "setting parent-layout in params dictionary failed. "
+ "mkdir (%s/%s) (path: %s)",
+ pgfid, loc->name, loc->path);
+ goto err;
+ }
+
+ parent_disk_layout = NULL;
+
+ STACK_WIND_COOKIE(frame, dht_mkdir_hashed_cbk, hashed_subvol, hashed_subvol,
+ hashed_subvol->fops->mkdir, loc, mode, umask, params);
+
+ return 0;
+err:
+ dht_unlock_namespace(frame, &local->lock[0]);
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
- VALIDATE_OR_GOTO (this->private, err);
+ op_errno = local ? local->op_errno : op_errno;
+ DHT_STACK_UNWIND(mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
- conf = this->private;
+ if (parent_disk_layout != NULL)
+ GF_FREE(parent_disk_layout);
- dht_get_du_info (frame, this, loc);
+ if (parent_layout != NULL)
+ dht_layout_unref(this, parent_layout);
- local = dht_local_init (frame, loc, NULL, GF_FOP_MKDIR);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ return 0;
+}
- hashed_subvol = dht_subvol_get_hashed (this, loc);
- if (hashed_subvol == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "hashed subvol not found for %s",
- loc->path);
- op_errno = EINVAL;
+static int
+dht_mkdir_hashed_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int ret = -1;
+ xlator_t *prev = NULL;
+ dht_layout_t *layout = NULL;
+ dht_conf_t *conf = NULL;
+ int i = 0;
+ xlator_t *hashed_subvol = NULL;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+ gf_boolean_t parent_layout_changed = _gf_false;
+ call_stub_t *stub = NULL;
+
+ local = frame->local;
+ prev = cookie;
+ layout = local->layout;
+ conf = this->private;
+ hashed_subvol = local->hashed_subvol;
+
+ gf_uuid_unparse(local->loc.parent->gfid, pgfid);
+
+ if (gf_uuid_is_null(local->loc.gfid) && !op_ret)
+ gf_uuid_copy(local->loc.gfid, stbuf->ia_gfid);
+
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+
+ parent_layout_changed = (xdata &&
+ dict_get(xdata, GF_PREOP_CHECK_FAILED))
+ ? 1
+ : 0;
+ if (parent_layout_changed) {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "mkdir (%s/%s) (path: %s): parent layout "
+ "changed. Attempting a refresh and then a "
+ "retry",
+ pgfid, local->loc.name, local->loc.path);
+
+ stub = fop_mkdir_stub(frame, dht_mkdir_helper, &local->loc,
+ local->mode, local->umask, local->params);
+ if (stub == NULL) {
goto err;
- }
+ }
- local->hashed_subvol = hashed_subvol;
- local->mode = mode;
- local->umask = umask;
- local->params = dict_ref (params);
- local->inode = inode_ref (loc->inode);
-
- local->layout = dht_layout_new (this, conf->subvolume_cnt);
- if (!local->layout) {
- op_errno = ENOMEM;
+ ret = dht_handle_parent_layout_change(this, stub);
+ if (ret) {
goto err;
- }
+ }
+
+ stub = NULL;
+
+ return 0;
+ }
+
+ goto err;
+ }
+
+ dict_del(local->params, GF_PREOP_PARENT_KEY);
+ dict_del(local->params, conf->xattr_name);
+
+ if (dht_is_subvol_filled(this, hashed_subvol))
+ ret = dht_layout_merge(this, layout, prev, -1, ENOSPC, NULL);
+ else
+ ret = dht_layout_merge(this, layout, prev, op_ret, op_errno, NULL);
+
+ /* TODO: we may have to return from the function
+ if layout merge fails. For now, lets just log an error */
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_MERGE_FAILED,
+ "%s: failed to merge layouts for subvol %s", local->loc.path,
+ prev->name);
+
+ local->op_ret = 0;
+
+ dht_iatt_merge(this, &local->stbuf, stbuf);
+ dht_iatt_merge(this, &local->preparent, preparent);
+ dht_iatt_merge(this, &local->postparent, postparent);
+
+ local->call_cnt = conf->subvolume_cnt - 1;
+ /* Delete internal mds xattr from params dict to avoid store
+ internal mds xattr on other subvols
+ */
+ dict_del(local->params, conf->mds_xattr_key);
+
+ if (gf_uuid_is_null(local->loc.gfid))
+ gf_uuid_copy(local->loc.gfid, stbuf->ia_gfid);
+
+ /* Set hashed subvol as a mds subvol on inode ctx */
+ /*if (!local->inode)
+ local->inode = inode_ref (inode);
+ */
+ ret = dht_inode_ctx_mdsvol_set(local->inode, this, hashed_subvol);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SET_INODE_CTX_FAILED,
+ "Failed to set hashed subvol for %s on inode vol is %s",
+ local->loc.path, hashed_subvol->name);
+ }
+
+ if (local->call_cnt == 0) {
+ /*Unlock namespace lock once mkdir is done on all subvols*/
+ dht_unlock_namespace(frame, &local->lock[0]);
+ FRAME_SU_DO(frame, dht_local_t);
+ dht_selfheal_directory(frame, dht_mkdir_selfheal_cbk, &local->loc,
+ layout);
+ return 0;
+ }
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == hashed_subvol)
+ continue;
+ STACK_WIND_COOKIE(frame, dht_mkdir_cbk, conf->subvolumes[i],
+ conf->subvolumes[i], conf->subvolumes[i]->fops->mkdir,
+ &local->loc, local->mode, local->umask,
+ local->params);
+ }
+
+ return 0;
+err:
+ if (local->op_ret != 0) {
+ dht_unlock_namespace(frame, &local->lock[0]);
+ }
- STACK_WIND (frame, dht_mkdir_hashed_cbk,
- hashed_subvol,
- hashed_subvol->fops->mkdir,
- loc, mode, umask, params);
+ DHT_STACK_UNWIND(mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
- return 0;
+ return 0;
+}
+static int
+dht_mkdir_guard_parent_layout_cbk(call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode, mode_t umask,
+ dict_t *params)
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = 0;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+ int ret = -1;
+ int32_t zero[1] = {0};
+
+ local = frame->local;
+ conf = this->private;
+
+ gf_uuid_unparse(loc->parent->gfid, pgfid);
+
+ if (local->op_ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "mkdir (%s/%s) (path: %s): "
+ "Acquiring lock on parent to guard against "
+ "layout-change failed.",
+ pgfid, loc->name, loc->path);
+ goto err;
+ }
+
+ local->op_ret = -1;
+ /* Add internal MDS xattr on disk for hashed subvol
+ */
+ ret = dht_dict_set_array(params, conf->mds_xattr_key, zero, 1);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value:key = %s for "
+ "path %s",
+ conf->mds_xattr_key, loc->path);
+ }
+
+ STACK_WIND_COOKIE(frame, dht_mkdir_hashed_cbk, local->hashed_subvol,
+ local->hashed_subvol, local->hashed_subvol->fops->mkdir,
+ loc, mode, umask, params);
+
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL,
- NULL, NULL);
+ DHT_STACK_UNWIND(mkdir, frame, -1, local->op_errno, NULL, NULL, NULL, NULL,
+ NULL);
- return 0;
+ return 0;
}
-
int
-dht_rmdir_selfheal_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xdata)
+dht_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *params)
{
- dht_local_t *local = NULL;
-
- local = frame->local;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int op_errno = EINVAL, ret = -1;
+ xlator_t *hashed_subvol = NULL;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+ call_stub_t *stub = NULL;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+ VALIDATE_OR_GOTO(loc->inode, err);
+ VALIDATE_OR_GOTO(loc->path, err);
+ VALIDATE_OR_GOTO(this->private, err);
+
+ gf_uuid_unparse(loc->parent->gfid, pgfid);
+
+ conf = this->private;
+
+ if (!params || !dict_get(params, "gfid-req")) {
+ op_errno = EPERM;
+ gf_msg_callingfn(this->name, GF_LOG_WARNING, op_errno,
+ DHT_MSG_GFID_NULL,
+ "mkdir: %s is received "
+ "without gfid-req %p",
+ loc->path, params);
+ goto err;
+ }
+
+ dht_get_du_info(frame, this, loc);
+
+ local = dht_local_init(frame, loc, NULL, GF_FOP_MKDIR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ hashed_subvol = dht_subvol_get_hashed(this, loc);
+ if (hashed_subvol == NULL) {
+ gf_msg_debug(this->name, 0, "hashed subvol not found for %s",
+ loc->path);
+ local->op_errno = EIO;
+ goto err;
+ }
+
+ local->hashed_subvol = hashed_subvol;
+ local->mode = mode;
+ local->umask = umask;
+ if (params)
+ local->params = dict_ref(params);
+
+ local->inode = inode_ref(loc->inode);
+
+ local->layout = dht_layout_new(this, conf->subvolume_cnt);
+ if (!local->layout) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ /* set the newly created directory hash to the commit hash
+ * if the configuration option is set. If configuration option
+ * is not set, the older clients may still be connecting to the
+ * volume and hence we need to preserve the 1 in disk[0] part of the
+ * layout xattr */
+ if (conf->lookup_optimize)
+ local->layout->commit_hash = conf->vol_commit_hash;
+ else
+ local->layout->commit_hash = DHT_LAYOUT_HASH_INVALID;
+
+ stub = fop_mkdir_stub(frame, dht_mkdir_guard_parent_layout_cbk, loc, mode,
+ umask, params);
+ if (stub == NULL) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "mkdir (%s/%s) (path: %s): "
+ "creating stub failed.",
+ pgfid, loc->name, loc->path);
+ local->op_errno = ENOMEM;
+ goto err;
+ }
+
+ ret = dht_guard_parent_layout_and_namespace(this, stub);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "mkdir (%s/%s) (path: %s) cannot wind lock request to "
+ "guard parent layout",
+ pgfid, loc->name, loc->path);
+ goto err;
+ }
+
+ return 0;
- DHT_STACK_UNWIND (rmdir, frame, local->op_ret, local->op_errno,
- &local->preparent, &local->postparent, NULL);
+err:
+ op_errno = local ? local->op_errno : op_errno;
+ DHT_STACK_UNWIND(mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
- return 0;
+ return 0;
}
-
-int
-dht_rmdir_hashed_subvol_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+static int
+dht_rmdir_selfheal_cbk(call_frame_t *heal_frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
+ dht_local_t *local = NULL;
+ dht_local_t *heal_local = NULL;
+ call_frame_t *main_frame = NULL;
- local = frame->local;
- prev = cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- local->op_ret = -1;
- if (op_errno != ENOENT && op_errno != EACCES) {
- local->need_selfheal = 1;
- }
+ heal_local = heal_frame->local;
+ main_frame = heal_local->main_frame;
+ local = main_frame->local;
+ DHT_STACK_DESTROY(heal_frame);
+ dht_set_fixed_dir_stat(&local->preparent);
+ dht_set_fixed_dir_stat(&local->postparent);
- gf_log (this->name, GF_LOG_DEBUG,
- "rmdir on %s for %s failed (%s)",
- prev->this->name, local->loc.path,
- strerror (op_errno));
- goto unlock;
- }
+ DHT_STACK_UNWIND(rmdir, main_frame, local->op_ret, local->op_errno,
+ &local->preparent, &local->postparent, NULL);
- dht_iatt_merge (this, &local->preparent, preparent, prev->this);
- dht_iatt_merge (this, &local->postparent, postparent,
- prev->this);
+ return 0;
+}
+static int
+dht_rmdir_hashed_subvol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ dht_local_t *heal_local = NULL;
+ call_frame_t *heal_frame = NULL;
+ dht_conf_t *conf = NULL;
+ int this_call_cnt = 0;
+ xlator_t *prev = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+ prev = cookie;
+ conf = this->private;
+
+ gf_uuid_unparse(local->loc.gfid, gfid);
+
+ LOCK(&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
+ if (conf->subvolume_cnt != 1) {
+ if (op_errno != ENOENT && op_errno != EACCES &&
+ op_errno != ESTALE) {
+ local->need_selfheal = 1;
+ }
+ }
+
+ gf_msg_debug(this->name, op_errno,
+ "rmdir on %s for %s failed "
+ "(gfid = %s)",
+ prev->name, local->loc.path, gfid);
+ goto unlock;
}
+
+ dht_iatt_merge(this, &local->preparent, preparent);
+ dht_iatt_merge(this, &local->postparent, postparent);
+ }
unlock:
- UNLOCK (&frame->lock);
+ UNLOCK(&frame->lock);
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- if (local->need_selfheal) {
- local->layout =
- dht_layout_get (this, local->loc.inode);
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt)) {
+ if (local->need_selfheal) {
+ dht_rmdir_unlock(frame, this);
+ local->layout = dht_layout_get(this, local->loc.inode);
- /* TODO: neater interface needed below */
- local->stbuf.ia_type = local->loc.inode->ia_type;
+ /* TODO: neater interface needed below */
+ local->stbuf.ia_type = local->loc.inode->ia_type;
- uuid_copy (local->gfid, local->loc.inode->gfid);
- dht_selfheal_restore (frame, dht_rmdir_selfheal_cbk,
- &local->loc, local->layout);
- } else {
+ gf_uuid_copy(local->gfid, local->loc.inode->gfid);
- if (local->loc.parent) {
- WIPE (&local->preparent);
- WIPE (&local->postparent);
- }
+ /* Use a different frame or else the rmdir op_ret is
+ * overwritten by that of the selfheal */
+
+ heal_frame = copy_frame(frame);
- DHT_STACK_UNWIND (rmdir, frame, local->op_ret,
- local->op_errno, &local->preparent,
- &local->postparent, NULL);
- }
+ if (heal_frame == NULL) {
+ goto err;
+ }
+
+ heal_local = dht_local_init(heal_frame, &local->loc, NULL, 0);
+ if (!heal_local) {
+ DHT_STACK_DESTROY(heal_frame);
+ goto err;
+ }
+
+ heal_local->inode = inode_ref(local->loc.inode);
+ heal_local->main_frame = frame;
+ gf_uuid_copy(heal_local->gfid, local->loc.inode->gfid);
+
+ dht_selfheal_restore(heal_frame, dht_rmdir_selfheal_cbk,
+ &heal_local->loc, heal_local->layout);
+ return 0;
+ } else {
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update(local->loc.parent, this,
+ &local->preparent, 0);
+
+ dht_inode_ctx_time_update(local->loc.parent, this,
+ &local->postparent, 1);
+ }
+
+ dht_set_fixed_dir_stat(&local->preparent);
+ dht_set_fixed_dir_stat(&local->postparent);
+
+ dht_rmdir_unlock(frame, this);
+ DHT_STACK_UNWIND(rmdir, frame, local->op_ret, local->op_errno,
+ &local->preparent, &local->postparent, NULL);
}
+ }
- return 0;
-}
+ return 0;
+err:
+ DHT_STACK_UNWIND(rmdir, frame, local->op_ret, local->op_errno, NULL, NULL,
+ NULL);
+ return 0;
+}
-int
-dht_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+static int
+dht_rmdir_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
- int done = 0;
+ DHT_STACK_DESTROY(frame);
+ return 0;
+}
- local = frame->local;
- prev = cookie;
+static int
+dht_rmdir_unlock(call_frame_t *frame, xlator_t *this)
+{
+ dht_local_t *local = NULL, *lock_local = NULL;
+ call_frame_t *lock_frame = NULL;
+ int lock_count = 0;
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- local->op_ret = -1;
+ local = frame->local;
- if (op_errno != ENOENT && op_errno != EACCES) {
- local->need_selfheal = 1;
- }
+ /* Unlock entrylk */
+ dht_unlock_entrylk_wrapper(frame, &local->lock[0].ns.directory_ns);
- gf_log (this->name, GF_LOG_DEBUG,
- "rmdir on %s for %s failed (%s)",
- prev->this->name, local->loc.path,
- strerror (op_errno));
- goto unlock;
- }
+ /* Unlock inodelk */
+ lock_count = dht_lock_count(local->lock[0].ns.parent_layout.locks,
+ local->lock[0].ns.parent_layout.lk_count);
- /* Track if rmdir succeeded on atleast one subvol*/
- local->fop_succeeded = 1;
- dht_iatt_merge (this, &local->preparent, preparent, prev->this);
- dht_iatt_merge (this, &local->postparent, postparent,
- prev->this);
- }
-unlock:
- UNLOCK (&frame->lock);
+ if (lock_count == 0)
+ goto done;
+ lock_frame = copy_frame(frame);
+ if (lock_frame == NULL)
+ goto done;
- this_call_cnt = dht_frame_return (frame);
+ lock_local = dht_local_init(lock_frame, &local->loc, NULL,
+ lock_frame->root->op);
+ if (lock_local == NULL)
+ goto done;
- /* if local->hashed_subvol, we are yet to wind to hashed_subvol. */
- if (local->hashed_subvol && (this_call_cnt == 1)) {
- done = 1;
- } else if (!local->hashed_subvol && !this_call_cnt) {
- done = 1;
- }
+ lock_local->lock[0].ns.parent_layout.locks = local->lock[0]
+ .ns.parent_layout.locks;
+ lock_local->lock[0]
+ .ns.parent_layout.lk_count = local->lock[0].ns.parent_layout.lk_count;
+ local->lock[0].ns.parent_layout.locks = NULL;
+ local->lock[0].ns.parent_layout.lk_count = 0;
+ dht_unlock_inodelk(lock_frame, lock_local->lock[0].ns.parent_layout.locks,
+ lock_local->lock[0].ns.parent_layout.lk_count,
+ dht_rmdir_unlock_cbk);
+ lock_frame = NULL;
- if (done) {
- if (local->need_selfheal && local->fop_succeeded) {
- local->layout =
- dht_layout_get (this, local->loc.inode);
+done:
+ if (lock_frame != NULL) {
+ DHT_STACK_DESTROY(lock_frame);
+ }
- /* TODO: neater interface needed below */
- local->stbuf.ia_type = local->loc.inode->ia_type;
+ return 0;
+}
- uuid_copy (local->gfid, local->loc.inode->gfid);
- dht_selfheal_restore (frame, dht_rmdir_selfheal_cbk,
- &local->loc, local->layout);
- } else if (this_call_cnt) {
- /* If non-hashed subvol's have responded, proceed */
+static int
+dht_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ xlator_t *prev = NULL;
+ int done = 0;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+ dht_local_t *heal_local = NULL;
+ call_frame_t *heal_frame = NULL;
+ int ret = -1;
+
+ local = frame->local;
+ prev = cookie;
+
+ LOCK(&frame->lock);
+ {
+ if (op_ret == -1) {
+ if ((op_errno != ENOENT) && (op_errno != ESTALE)) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
- local->need_selfheal = 0;
- STACK_WIND (frame, dht_rmdir_hashed_subvol_cbk,
- local->hashed_subvol,
- local->hashed_subvol->fops->rmdir,
- &local->loc, local->flags, NULL);
- } else if (!this_call_cnt) {
- /* All subvol's have responded, proceed */
+ if (op_errno != EACCES)
+ local->need_selfheal = 1;
+ }
- if (local->loc.parent) {
- WIPE (&local->preparent);
- WIPE (&local->postparent);
- }
+ gf_uuid_unparse(local->loc.gfid, gfid);
- DHT_STACK_UNWIND (rmdir, frame, local->op_ret,
- local->op_errno, &local->preparent,
- &local->postparent, NULL);
- }
+ gf_msg_debug(this->name, op_errno,
+ "rmdir on %s for %s failed."
+ "(gfid = %s)",
+ prev->name, local->loc.path, gfid);
+ goto unlock;
}
- return 0;
-}
+ /* Track if rmdir succeeded on at least one subvol*/
+ local->fop_succeeded = 1;
+ dht_iatt_merge(this, &local->preparent, preparent);
+ dht_iatt_merge(this, &local->postparent, postparent);
+ }
+unlock:
+ UNLOCK(&frame->lock);
+ this_call_cnt = dht_frame_return(frame);
-int
-dht_rmdir_do (call_frame_t *frame, xlator_t *this)
-{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- int i = 0;
- xlator_t *hashed_subvol = NULL;
+ /* if local->hashed_subvol, we are yet to wind to hashed_subvol. */
+ if (local->hashed_subvol && (this_call_cnt == 1)) {
+ done = 1;
+ } else if (!local->hashed_subvol && !this_call_cnt) {
+ done = 1;
+ }
- VALIDATE_OR_GOTO (this->private, err);
+ if (done) {
+ if (local->need_selfheal && local->fop_succeeded) {
+ dht_rmdir_unlock(frame, this);
+ local->layout = dht_layout_get(this, local->loc.inode);
- conf = this->private;
- local = frame->local;
+ /* TODO: neater interface needed below */
+ local->stbuf.ia_type = local->loc.inode->ia_type;
- if (local->op_ret == -1)
+ gf_uuid_copy(local->gfid, local->loc.inode->gfid);
+ heal_frame = copy_frame(frame);
+ if (heal_frame == NULL) {
goto err;
+ }
- local->call_cnt = conf->subvolume_cnt;
+ heal_local = dht_local_init(heal_frame, &local->loc, NULL, 0);
+ if (!heal_local) {
+ DHT_STACK_DESTROY(heal_frame);
+ goto err;
+ }
+
+ heal_local->inode = inode_ref(local->loc.inode);
+ heal_local->main_frame = frame;
+ gf_uuid_copy(heal_local->gfid, local->loc.inode->gfid);
+ ret = dht_selfheal_restore(heal_frame, dht_rmdir_selfheal_cbk,
+ &heal_local->loc, heal_local->layout);
+ if (ret) {
+ DHT_STACK_DESTROY(heal_frame);
+ goto err;
+ }
+
+ } else if (this_call_cnt) {
+ /* If non-hashed subvol's have responded, proceed */
+ if (local->op_ret == 0) {
+ /* Delete the dir from the hashed subvol if:
+ * The fop succeeded on at least one subvol
+ * and did not fail on any
+ * or
+ * The fop failed with ENOENT/ESTALE on
+ * all subvols */
+
+ STACK_WIND_COOKIE(frame, dht_rmdir_hashed_subvol_cbk,
+ local->hashed_subvol, local->hashed_subvol,
+ local->hashed_subvol->fops->rmdir,
+ &local->loc, local->flags, NULL);
+ } else {
+ /* hashed-subvol was non-NULL and rmdir failed on
+ * all non hashed-subvols. Unwind rmdir with
+ * local->op_ret and local->op_errno. */
+ dht_rmdir_unlock(frame, this);
+ DHT_STACK_UNWIND(rmdir, frame, local->op_ret, local->op_errno,
+ &local->preparent, &local->postparent, NULL);
- /* first remove from non-hashed_subvol */
- hashed_subvol = dht_subvol_get_hashed (this, &local->loc);
+ return 0;
+ }
+ } else if (!this_call_cnt) {
+ /* All subvol's have responded, proceed */
- if (!hashed_subvol) {
- gf_log (this->name, GF_LOG_WARNING, "failed to get hashed "
- "subvol for %s",local->loc.path);
- } else {
- local->hashed_subvol = hashed_subvol;
- }
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update(local->loc.parent, this,
+ &local->preparent, 0);
- /* When DHT has only 1 child */
- if (conf->subvolume_cnt == 1) {
- STACK_WIND (frame, dht_rmdir_hashed_subvol_cbk,
- conf->subvolumes[0],
- conf->subvolumes[0]->fops->rmdir,
- &local->loc, local->flags, NULL);
- return 0;
- }
+ dht_inode_ctx_time_update(local->loc.parent, this,
+ &local->postparent, 1);
+ }
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (hashed_subvol &&
- (hashed_subvol == conf->subvolumes[i]))
- continue;
+ dht_set_fixed_dir_stat(&local->preparent);
+ dht_set_fixed_dir_stat(&local->postparent);
- STACK_WIND (frame, dht_rmdir_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->rmdir,
- &local->loc, local->flags, NULL);
+ dht_rmdir_unlock(frame, this);
+ DHT_STACK_UNWIND(rmdir, frame, local->op_ret, local->op_errno,
+ &local->preparent, &local->postparent, NULL);
}
+ }
- return 0;
+ return 0;
err:
- DHT_STACK_UNWIND (rmdir, frame, local->op_ret, local->op_errno,
- &local->preparent, &local->postparent, NULL);
- return 0;
+ DHT_STACK_UNWIND(rmdir, frame, -1, local->op_errno, NULL, NULL, NULL);
+ return 0;
}
+static int
+dht_rmdir_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int i = 0;
+ xlator_t *hashed_subvol;
-int
-dht_rmdir_linkfile_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+ conf = this->private;
+ local = frame->local;
+
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_INODE_LK_ERROR,
+ "acquiring entrylk after inodelk failed rmdir for %s)",
+ local->loc.path);
+
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ goto err;
+ }
+
+ hashed_subvol = local->hashed_subvol;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (hashed_subvol && (hashed_subvol == conf->subvolumes[i]))
+ continue;
+
+ STACK_WIND_COOKIE(frame, dht_rmdir_cbk, conf->subvolumes[i],
+ conf->subvolumes[i], conf->subvolumes[i]->fops->rmdir,
+ &local->loc, local->flags, NULL);
+ }
+
+ return 0;
+
+err:
+ DHT_STACK_UNWIND(rmdir, frame, local->op_ret, local->op_errno,
+ &local->preparent, &local->postparent, NULL);
+
+ return 0;
+}
+
+static int
+dht_rmdir_do(call_frame_t *frame, xlator_t *this)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- xlator_t *src = NULL;
- call_frame_t *main_frame = NULL;
- dht_local_t *main_local = NULL;
- int this_call_cnt = 0;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+ xlator_t *hashed_subvol = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
- local = frame->local;
- prev = cookie;
- src = prev->this;
+ VALIDATE_OR_GOTO(frame->local, err);
+ local = frame->local;
+ VALIDATE_OR_GOTO(this->private, out);
+ conf = this->private;
- main_frame = local->main_frame;
- main_local = main_frame->local;
+ if (local->op_ret == -1)
+ goto out;
- if (op_ret == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "unlinked linkfile %s on %s",
- local->loc.path, src->name);
- } else {
- main_local->op_ret = -1;
- main_local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "unlink of %s on %s failed (%s)",
- local->loc.path, src->name, strerror (op_errno));
- }
+ local->call_cnt = conf->subvolume_cnt;
+
+ /* first remove from non-hashed_subvol */
+ hashed_subvol = dht_subvol_get_hashed(this, &local->loc);
- this_call_cnt = dht_frame_return (main_frame);
- if (is_last_call (this_call_cnt))
- dht_rmdir_do (main_frame, this);
+ if (!hashed_subvol) {
+ gf_uuid_unparse(local->loc.gfid, gfid);
- DHT_STACK_DESTROY (frame);
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "Failed to get hashed subvol for %s (gfid = %s)",
+ local->loc.path, gfid);
+ } else {
+ local->hashed_subvol = hashed_subvol;
+ }
+
+ /* When DHT has only 1 child */
+ if (conf->subvolume_cnt == 1) {
+ STACK_WIND_COOKIE(frame, dht_rmdir_hashed_subvol_cbk,
+ conf->subvolumes[0], conf->subvolumes[0],
+ conf->subvolumes[0]->fops->rmdir, &local->loc,
+ local->flags, NULL);
return 0;
+ }
+
+ local->current = &local->lock[0];
+ ret = dht_protect_namespace(frame, &local->loc, local->hashed_subvol,
+ &local->current->ns, dht_rmdir_lock_cbk);
+ if (ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = errno ? errno : EINVAL;
+ goto out;
+ }
+
+ return 0;
+
+out:
+ dht_set_fixed_dir_stat(&local->preparent);
+ dht_set_fixed_dir_stat(&local->postparent);
+
+ DHT_STACK_UNWIND(rmdir, frame, local->op_ret, local->op_errno,
+ &local->preparent, &local->postparent, NULL);
+ return 0;
+err:
+ DHT_STACK_UNWIND(rmdir, frame, -1, EINVAL, NULL, NULL, NULL);
+ return 0;
+}
+
+static void
+dht_rmdir_readdirp_done(call_frame_t *readdirp_frame, xlator_t *this)
+{
+ call_frame_t *main_frame = NULL;
+ dht_local_t *main_local = NULL;
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+
+ local = readdirp_frame->local;
+ main_frame = local->main_frame;
+ main_local = main_frame->local;
+
+ /* At least one readdirp failed.
+ * This is a bit hit or miss - if readdirp failed on more than
+ * one subvol, we don't know which error is returned.
+ */
+ if (local->op_ret == -1) {
+ main_local->op_ret = local->op_ret;
+ main_local->op_errno = local->op_errno;
+ }
+
+ this_call_cnt = dht_frame_return(main_frame);
+
+ if (is_last_call(this_call_cnt))
+ dht_rmdir_do(main_frame, this);
+
+ DHT_STACK_DESTROY(readdirp_frame);
}
+/* Keep sending readdirp on the subvol until it returns no more entries
+ * It is possible that not all entries will fit in a single readdirp in
+ * which case the rmdir will keep failing with ENOTEMPTY
+ */
-int
-dht_rmdir_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, inode_t *inode,
- struct iatt *stbuf, dict_t *xattr, struct iatt *parent)
+static int
+dht_rmdir_readdirp_do(call_frame_t *readdirp_frame, xlator_t *this)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- xlator_t *src = NULL;
- call_frame_t *main_frame = NULL;
- dht_local_t *main_local = NULL;
- int this_call_cnt = 0;
+ dht_local_t *local = NULL;
- local = frame->local;
- prev = cookie;
- src = prev->this;
+ local = readdirp_frame->local;
- main_frame = local->main_frame;
- main_local = main_frame->local;
+ if (local->op_ret == -1) {
+ /* there is no point doing another readdirp on this
+ * subvol . */
+ dht_rmdir_readdirp_done(readdirp_frame, this);
+ return 0;
+ }
- if (op_ret != 0)
- goto err;
+ STACK_WIND_COOKIE(readdirp_frame, dht_rmdir_readdirp_cbk,
+ local->hashed_subvol, local->hashed_subvol,
+ local->hashed_subvol->fops->readdirp, local->fd, 4096, 0,
+ local->xattr);
- if (check_is_linkfile (inode, stbuf, xattr) == 0) {
- main_local->op_ret = -1;
- main_local->op_errno = ENOTEMPTY;
+ return 0;
+}
- gf_log (this->name, GF_LOG_WARNING,
- "%s on %s found to be not a linkfile (type=0%o)",
- local->loc.path, src->name, stbuf->ia_type);
- goto err;
- }
+static int
+dht_rmdir_linkfile_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ xlator_t *src = NULL;
+ call_frame_t *readdirp_frame = NULL;
+ dht_local_t *readdirp_local = NULL;
+ int this_call_cnt = 0;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+ prev = cookie;
+ src = prev;
+
+ readdirp_frame = local->main_frame;
+ readdirp_local = readdirp_frame->local;
+
+ gf_uuid_unparse(local->loc.gfid, gfid);
+
+ if (op_ret == 0) {
+ gf_msg_trace(this->name, 0, "Unlinked linkfile %s on %s, gfid = %s",
+ local->loc.path, src->name, gfid);
+ } else {
+ if (op_errno != ENOENT) {
+ readdirp_local->op_ret = -1;
+ readdirp_local->op_errno = op_errno;
+ }
+ gf_msg_debug(this->name, op_errno,
+ "Unlink of %s on %s failed. (gfid = %s)", local->loc.path,
+ src->name, gfid);
+ }
+
+ this_call_cnt = dht_frame_return(readdirp_frame);
+
+ if (is_last_call(this_call_cnt))
+ dht_rmdir_readdirp_do(readdirp_frame, this);
+
+ DHT_STACK_DESTROY(frame);
+ return 0;
+}
- STACK_WIND (frame, dht_rmdir_linkfile_unlink_cbk,
- src, src->fops->unlink, &local->loc, 0, NULL);
- return 0;
+static int
+dht_rmdir_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, dict_t *xattr, struct iatt *parent)
+{
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ xlator_t *src = NULL;
+ call_frame_t *readdirp_frame = NULL;
+ dht_local_t *readdirp_local = NULL;
+ int this_call_cnt = 0;
+ dht_conf_t *conf = this->private;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+ prev = cookie;
+ src = prev;
+
+ gf_msg_debug(this->name, 0, "dht_rmdir_lookup_cbk %s", local->loc.path);
+
+ readdirp_frame = local->main_frame;
+ readdirp_local = readdirp_frame->local;
+
+ if (op_ret != 0) {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_FILE_LOOKUP_FAILED,
+ "lookup failed for %s on %s", local->loc.path, src->name);
+ goto err;
+ }
+
+ if (!check_is_linkfile(inode, stbuf, xattr, conf->link_xattr_name)) {
+ readdirp_local->op_ret = -1;
+ readdirp_local->op_errno = ENOTEMPTY;
+
+ gf_uuid_unparse(local->loc.gfid, gfid);
+
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_NOT_LINK_FILE_ERROR,
+ "%s on %s is not a linkfile (type=0%o, gfid = %s)",
+ local->loc.path, src->name, stbuf->ia_type, gfid);
+ goto err;
+ }
+
+ STACK_WIND_COOKIE(frame, dht_rmdir_linkfile_unlink_cbk, src, src,
+ src->fops->unlink, &local->loc, 0, NULL);
+ return 0;
err:
- this_call_cnt = dht_frame_return (main_frame);
- if (is_last_call (this_call_cnt))
- dht_rmdir_do (main_frame, this);
+ this_call_cnt = dht_frame_return(readdirp_frame);
+ if (is_last_call(this_call_cnt)) {
+ dht_rmdir_readdirp_do(readdirp_frame, this);
+ }
- DHT_STACK_DESTROY (frame);
- return 0;
+ DHT_STACK_DESTROY(frame);
+ return 0;
}
+static int
+dht_rmdir_cached_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, dict_t *xattr,
+ struct iatt *parent)
+{
+ dht_local_t *local = NULL;
+ xlator_t *src = NULL;
+ call_frame_t *readdirp_frame = NULL;
+ dht_local_t *readdirp_local = NULL;
+ int this_call_cnt = 0;
+ dht_conf_t *conf = this->private;
+ dict_t *xattrs = NULL;
+ int ret = 0;
+
+ local = frame->local;
+ src = local->hashed_subvol;
+
+ /* main_frame here is the readdirp_frame */
+
+ readdirp_frame = local->main_frame;
+ readdirp_local = readdirp_frame->local;
+
+ gf_msg_debug(this->name, 0, "returning for %s ", local->loc.path);
+
+ if (op_ret == 0) {
+ readdirp_local->op_ret = -1;
+ readdirp_local->op_errno = ENOTEMPTY;
+
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_SUBVOL_ERROR,
+ "%s found on cached subvol %s", local->loc.path, src->name);
+ goto err;
+ } else if (op_errno != ENOENT) {
+ readdirp_local->op_ret = -1;
+ readdirp_local->op_errno = op_errno;
+
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_SUBVOL_ERROR,
+ "%s not found on cached subvol %s", local->loc.path, src->name);
+ goto err;
+ }
+
+ xattrs = dict_new();
+ if (!xattrs) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
+ "dict_new failed");
+ goto err;
+ }
+
+ ret = dict_set_uint32(xattrs, conf->link_xattr_name, 256);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value: key = %s",
+ conf->link_xattr_name);
+ if (xattrs)
+ dict_unref(xattrs);
+ goto err;
+ }
+ STACK_WIND_COOKIE(frame, dht_rmdir_lookup_cbk, src, src, src->fops->lookup,
+ &local->loc, xattrs);
+ if (xattrs)
+ dict_unref(xattrs);
+
+ return 0;
+err:
+
+ this_call_cnt = dht_frame_return(readdirp_frame);
-int
-dht_rmdir_is_subvol_empty (call_frame_t *frame, xlator_t *this,
- gf_dirent_t *entries, xlator_t *src)
+ /* Once all the lookups/unlinks etc have returned, proceed to wind
+ * readdirp on the subvol again until no entries are returned.
+ * This is required if there are more entries than can be returned
+ * in a single readdirp call.
+ */
+
+ if (is_last_call(this_call_cnt))
+ dht_rmdir_readdirp_do(readdirp_frame, this);
+
+ DHT_STACK_DESTROY(frame);
+ return 0;
+}
+
+static int
+dht_rmdir_is_subvol_empty(call_frame_t *frame, xlator_t *this,
+ gf_dirent_t *entries, xlator_t *src)
{
- int ret = 0;
- int build_ret = 0;
- gf_dirent_t *trav = NULL;
- call_frame_t *lookup_frame = NULL;
- dht_local_t *lookup_local = NULL;
- dht_local_t *local = NULL;
- dict_t *xattrs = NULL;
+ int ret = 0;
+ int build_ret = 0;
+ gf_dirent_t *trav = NULL;
+ call_frame_t *lookup_frame = NULL;
+ dht_local_t *lookup_local = NULL;
+ dht_local_t *local = NULL;
+ dict_t *xattrs = NULL;
+ dht_conf_t *conf = this->private;
+ xlator_t *subvol = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+ int count = 0;
+ gf_boolean_t unwind = _gf_false;
+
+ local = frame->local;
+
+ list_for_each_entry(trav, &entries->list, list)
+ {
+ if (strcmp(trav->d_name, ".") == 0)
+ continue;
+ if (strcmp(trav->d_name, "..") == 0)
+ continue;
+ if (check_is_linkfile(NULL, (&trav->d_stat), trav->dict,
+ conf->link_xattr_name)) {
+ count++;
+ continue;
+ }
+
+ /* this entry is either a directory which is neither "." nor "..",
+ or a non directory which is not a linkfile. the directory is to
+ be treated as non-empty
+ */
+ return 0;
+ }
- local = frame->local;
+ xattrs = dict_new();
+ if (!xattrs) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
+ "dict_new failed");
+ return -1;
+ }
- list_for_each_entry (trav, &entries->list, list) {
- if (strcmp (trav->d_name, ".") == 0)
- continue;
- if (strcmp (trav->d_name, "..") == 0)
- continue;
- if (check_is_linkfile (NULL, (&trav->d_stat), trav->dict)) {
- ret++;
- continue;
- }
+ ret = dict_set_uint32(xattrs, conf->link_xattr_name, 256);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value: key = %s",
+ conf->link_xattr_name);
- /* this entry is either a directory which is neither "." nor "..",
- or a non directory which is not a linkfile. the directory is to
- be treated as non-empty
- */
- return 0;
+ if (xattrs)
+ dict_unref(xattrs);
+ return -1;
+ }
+
+ local->call_cnt = count;
+ ret = 0;
+
+ list_for_each_entry(trav, &entries->list, list)
+ {
+ if (strcmp(trav->d_name, ".") == 0)
+ continue;
+ if (strcmp(trav->d_name, "..") == 0)
+ continue;
+
+ lookup_frame = copy_frame(frame);
+
+ if (!lookup_frame) {
+ /* out of memory, let the rmdir fail
+ (as non-empty, unfortunately) */
+ goto err;
}
- xattrs = dict_new ();
- if (!xattrs) {
- gf_log (this->name, GF_LOG_ERROR, "dict_new failed");
- return -1;
+ lookup_local = dht_local_init(lookup_frame, NULL, NULL, GF_FOP_LOOKUP);
+ if (!lookup_local) {
+ goto err;
}
- ret = dict_set_uint32 (xattrs, DHT_LINKFILE_KEY, 256);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "failed to set linkto key"
- " in dict");
- if (xattrs)
- dict_unref (xattrs);
- return -1;
- }
-
- list_for_each_entry (trav, &entries->list, list) {
- if (strcmp (trav->d_name, ".") == 0)
- continue;
- if (strcmp (trav->d_name, "..") == 0)
- continue;
-
- lookup_frame = NULL;
- lookup_local = NULL;
-
- lookup_frame = copy_frame (frame);
- if (!lookup_frame) {
- /* out of memory, let the rmdir fail
- (as non-empty, unfortunately) */
- goto err;
- }
+ lookup_frame->local = lookup_local;
+ lookup_local->main_frame = frame;
+ lookup_local->hashed_subvol = src;
- lookup_local = mem_get0 (this->local_pool);
- if (!lookup_local) {
- goto err;
- }
+ build_ret = dht_build_child_loc(this, &lookup_local->loc, &local->loc,
+ trav->d_name);
+ if (build_ret != 0)
+ goto err;
- lookup_frame->local = lookup_local;
- lookup_local->main_frame = frame;
+ gf_uuid_copy(lookup_local->loc.gfid, trav->d_stat.ia_gfid);
- build_ret = dht_build_child_loc (this, &lookup_local->loc,
- &local->loc, trav->d_name);
- if (build_ret != 0)
- goto err;
+ gf_uuid_unparse(lookup_local->loc.gfid, gfid);
- uuid_copy (lookup_local->loc.gfid, trav->d_stat.ia_gfid);
+ gf_msg_trace(this->name, 0, "looking up %s on subvolume %s, gfid = %s",
+ lookup_local->loc.path, src->name, gfid);
- gf_log (this->name, GF_LOG_TRACE,
- "looking up %s on %s",
- lookup_local->loc.path, src->name);
+ subvol = dht_linkfile_subvol(this, NULL, &trav->d_stat, trav->dict);
+ if (!subvol || (subvol == src)) {
+ /* we need to delete the linkto file if it does not have a
+ * valid subvol or it points to itself.
+ */
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_INVALID_LINKFILE,
+ "Linkfile does not have link subvolume. "
+ "path = %s, gfid = %s",
+ lookup_local->loc.path, gfid);
- LOCK (&frame->lock);
- {
- local->call_cnt++;
- }
- UNLOCK (&frame->lock);
+ gf_msg_debug(this->name, 0, "looking up %s on subvol %s, gfid = %s",
+ lookup_local->loc.path, src->name, gfid);
- STACK_WIND (lookup_frame, dht_rmdir_lookup_cbk,
- src, src->fops->lookup,
- &lookup_local->loc, xattrs);
- ret++;
+ STACK_WIND_COOKIE(lookup_frame, dht_rmdir_lookup_cbk, src, src,
+ src->fops->lookup, &lookup_local->loc, xattrs);
+ } else {
+ gf_msg_debug(this->name, 0,
+ "Looking up linkfile target %s on "
+ " subvol %s, gfid = %s",
+ lookup_local->loc.path, subvol->name, gfid);
+
+ STACK_WIND(lookup_frame, dht_rmdir_cached_lookup_cbk, subvol,
+ subvol->fops->lookup, &lookup_local->loc, xattrs);
}
+ ret++;
- if (xattrs)
- dict_unref (xattrs);
+ lookup_frame = NULL;
+ lookup_local = NULL;
+ }
- return ret;
+ if (xattrs)
+ dict_unref(xattrs);
+
+ return ret;
err:
- if (xattrs)
- dict_unref (xattrs);
+ if (xattrs)
+ dict_unref(xattrs);
- DHT_STACK_DESTROY (lookup_frame);
- return 0;
+ if (lookup_frame)
+ DHT_STACK_DESTROY(lookup_frame);
+
+ /* Handle the case where the wound calls have unwound before the
+ * loop processing is done
+ */
+
+ LOCK(&frame->lock);
+ {
+ local->op_ret = -1;
+ local->op_errno = ENOTEMPTY;
+
+ local->call_cnt -= (count - ret);
+ if (!local->call_cnt)
+ unwind = _gf_true;
+ }
+ UNLOCK(&frame->lock);
+
+ if (!unwind) {
+ return ret;
+ }
+ return 0;
}
+/*
+ * No more entries on this subvol. Proceed to the actual rmdir operation.
+ */
-int
-dht_rmdir_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, gf_dirent_t *entries,
- dict_t *xdata)
+static int
+dht_rmdir_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- int this_call_cnt = -1;
- call_frame_t *prev = NULL;
- xlator_t *src = NULL;
- int ret = 0;
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ xlator_t *src = NULL;
+ int ret = 0;
+ char *path = NULL;
+
+ local = frame->local;
+ prev = cookie;
+ src = prev;
+
+ if (op_ret > 2) {
+ /* dht_rmdir_is_subvol_empty() may free the frame,
+ * copy path for logging.
+ */
+ path = gf_strdup(local->loc.path);
- local = frame->local;
- prev = cookie;
- src = prev->this;
-
- if (op_ret > 2) {
- ret = dht_rmdir_is_subvol_empty (frame, this, entries, src);
-
- switch (ret) {
- case 0: /* non linkfiles exist */
- gf_log (this->name, GF_LOG_TRACE,
- "readdir on %s for %s returned %d entries",
- prev->this->name, local->loc.path, op_ret);
- local->op_ret = -1;
- local->op_errno = ENOTEMPTY;
- break;
- default:
- /* @ret number of linkfiles are getting unlinked */
- gf_log (this->name, GF_LOG_TRACE,
- "readdir on %s for %s found %d linkfiles",
- prev->this->name, local->loc.path, ret);
- break;
- }
+ ret = dht_rmdir_is_subvol_empty(frame, this, entries, src);
+
+ switch (ret) {
+ case 0: /* non linkfiles exist */
+ gf_msg_trace(this->name, 0,
+ "readdir on %s for %s returned %d "
+ "entries",
+ prev->name, local->loc.path, op_ret);
+ local->op_ret = -1;
+ local->op_errno = ENOTEMPTY;
+ break;
+ default:
+ /* @ret number of linkfiles are getting unlinked */
+ gf_msg_trace(this->name, 0,
+ "readdir on %s for %s found %d "
+ "linkfiles",
+ prev->name, path, ret);
+ break;
}
+ }
- this_call_cnt = dht_frame_return (frame);
+ /* readdirp failed or no linkto files were found on this subvol */
+ if (!ret)
+ dht_rmdir_readdirp_done(frame, this);
- if (is_last_call (this_call_cnt)) {
- dht_rmdir_do (frame, this);
- }
+ GF_FREE(path);
+ return 0;
+}
+static int
+dht_rmdir_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, fd_t *fd, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = -1;
+ xlator_t *prev = NULL;
+ int ret = 0;
+ dht_conf_t *conf = this->private;
+ dict_t *dict = NULL;
+ int i = 0;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+ dht_local_t *readdirp_local = NULL;
+ call_frame_t *readdirp_frame = NULL;
+ int cnt = 0;
+
+ local = frame->local;
+ prev = cookie;
+
+ this_call_cnt = dht_frame_return(frame);
+ if (op_ret == -1) {
+ gf_uuid_unparse(local->loc.gfid, gfid);
+
+ gf_msg_debug(this->name, op_errno,
+ "opendir on %s for %s failed, "
+ "gfid = %s,",
+ prev->name, local->loc.path, gfid);
+ if ((op_errno != ENOENT) && (op_errno != ESTALE)) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ }
+ goto err;
+ }
+
+ if (!is_last_call(this_call_cnt))
return 0;
-}
+ if (local->op_ret == -1)
+ goto err;
-int
-dht_rmdir_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, fd_t *fd, dict_t *xdata)
-{
- dht_local_t *local = NULL;
- int this_call_cnt = -1;
- call_frame_t *prev = NULL;
- dict_t *dict = NULL;
- int ret = 0;
+ fd_bind(fd);
- local = frame->local;
- prev = cookie;
+ dict = dict_new();
+ if (!dict) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto err;
+ }
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "opendir on %s for %s failed (%s)",
- prev->this->name, local->loc.path,
- strerror (op_errno));
- if (op_errno != ENOENT) {
- local->op_ret = -1;
- local->op_errno = op_errno;
- }
- goto err;
+ ret = dict_set_uint32(dict, conf->link_xattr_name, 256);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "%s: Failed to set dictionary value:key = %s", local->loc.path,
+ conf->link_xattr_name);
+
+ cnt = local->call_cnt = conf->subvolume_cnt;
+
+ /* Create a separate frame per subvol as we might need
+ * to resend readdirp multiple times to get all the
+ * entries.
+ */
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ readdirp_frame = copy_frame(frame);
+
+ if (!readdirp_frame) {
+ cnt--;
+ /* Reduce the local->call_cnt as well */
+ (void)dht_frame_return(frame);
+ continue;
}
- dict = dict_new ();
- if (!dict) {
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- goto err;
+ readdirp_local = dht_local_init(readdirp_frame, &local->loc, local->fd,
+ 0);
+
+ if (!readdirp_local) {
+ DHT_STACK_DESTROY(readdirp_frame);
+ cnt--;
+ /* Reduce the local->call_cnt as well */
+ dht_frame_return(frame);
+ continue;
}
+ readdirp_local->main_frame = frame;
+ readdirp_local->op_ret = 0;
+ readdirp_local->xattr = dict_ref(dict);
+ /* overload this field to save the subvol info */
+ readdirp_local->hashed_subvol = conf->subvolumes[i];
- ret = dict_set_uint32 (dict,
- "trusted.glusterfs.dht.linkto", 256);
- if (ret)
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to set 'trusted.glusterfs.dht.linkto' key",
- local->loc.path);
+ STACK_WIND_COOKIE(readdirp_frame, dht_rmdir_readdirp_cbk,
+ conf->subvolumes[i], conf->subvolumes[i],
+ conf->subvolumes[i]->fops->readdirp,
+ readdirp_local->fd, 4096, 0, readdirp_local->xattr);
+ }
- STACK_WIND (frame, dht_rmdir_readdirp_cbk,
- prev->this, prev->this->fops->readdirp,
- local->fd, 4096, 0, dict);
+ if (dict)
+ dict_unref(dict);
- if (dict)
- dict_unref (dict);
+ /* Could not wind readdirp to any subvol */
- return 0;
+ if (!cnt)
+ goto err;
-err:
- this_call_cnt = dht_frame_return (frame);
+ return 0;
- if (is_last_call (this_call_cnt)) {
- dht_rmdir_do (frame, this);
- }
+err:
+ if (is_last_call(this_call_cnt)) {
+ dht_rmdir_do(frame, this);
+ }
- return 0;
+ return 0;
}
-
int
-dht_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
- dict_t *xdata)
+dht_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- int op_errno = -1;
- int i = -1;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int op_errno = -1;
+ int i = -1;
+ int ret = -1;
+ dict_t *xattr_req = NULL;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+ VALIDATE_OR_GOTO(loc->inode, err);
+ VALIDATE_OR_GOTO(loc->path, err);
+ VALIDATE_OR_GOTO(this->private, err);
+
+ conf = this->private;
+
+ local = dht_local_init(frame, loc, NULL, GF_FOP_RMDIR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->call_cnt = conf->subvolume_cnt;
+ local->op_ret = 0;
+ local->fop_succeeded = 0;
+
+ local->flags = flags;
+
+ local->fd = fd_create(local->loc.inode, frame->root->pid);
+ if (!local->fd) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ if (flags) {
+ return dht_rmdir_do(frame, this);
+ }
+ if (xdata) {
+ xattr_req = dict_ref(xdata);
+ } else {
+ xattr_req = dict_new();
+ }
+ if (xattr_req) {
+ ret = dict_set_uint32(xattr_req, conf->link_xattr_name, 256);
+ /* If parallel-readdir is enabled, this is required
+ * to handle stale linkto files in the directory
+ * being deleted. If this fails, log an error but
+ * do not prevent the operation.
+ */
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "%s: failed to set key %s",
+ loc->path, conf->link_xattr_name);
+ }
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "%s: failed to set key %s",
+ loc->path, conf->link_xattr_name);
+ }
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ STACK_WIND_COOKIE(frame, dht_rmdir_opendir_cbk, conf->subvolumes[i],
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->opendir, loc, local->fd,
+ xattr_req);
+ }
+
+ if (xattr_req) {
+ dict_unref(xattr_req);
+ }
+ return 0;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
- VALIDATE_OR_GOTO (this->private, err);
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(rmdir, frame, -1, op_errno, NULL, NULL, NULL);
- conf = this->private;
+ return 0;
+}
- local = dht_local_init (frame, loc, NULL, GF_FOP_RMDIR);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+static int
+dht_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
- local->call_cnt = conf->subvolume_cnt;
- local->op_ret = 0;
- local->fop_succeeded = 0;
+{
+ DHT_STACK_UNWIND(entrylk, frame, op_ret, op_errno, xdata);
+ return 0;
+}
- local->flags = flags;
+/* TODO
+ * Sending entrylk to cached subvol can result in stale lock
+ * as described in the bug 1311002.
+ */
+int
+dht_entrylk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
- local->fd = fd_create (local->loc.inode, frame->root->pid);
- if (!local->fd) {
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+ VALIDATE_OR_GOTO(loc->inode, err);
- op_errno = ENOMEM;
- goto err;
- }
+ local = dht_local_init(frame, loc, NULL, GF_FOP_ENTRYLK);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- for (i = 0; i < conf->subvolume_cnt; i++) {
- STACK_WIND (frame, dht_rmdir_opendir_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->opendir,
- loc, local->fd, NULL);
- }
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_uuid_unparse(loc->gfid, gfid);
- return 0;
+ gf_msg_debug(this->name, 0,
+ "no cached subvolume for path=%s, "
+ "gfid = %s",
+ loc->path, gfid);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->call_cnt = 1;
+
+ STACK_WIND(frame, dht_entrylk_cbk, subvol, subvol->fops->entrylk, volume,
+ loc, basename, cmd, type, xdata);
+
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (rmdir, frame, -1, op_errno,
- NULL, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(entrylk, frame, -1, op_errno, NULL);
- return 0;
+ return 0;
}
-int
-dht_entrylk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
+static int
+dht_fentrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- DHT_STACK_UNWIND (entrylk, frame, op_ret, op_errno, xdata);
- return 0;
+ DHT_STACK_UNWIND(fentrylk, frame, op_ret, op_errno, NULL);
+ return 0;
}
-
int
-dht_entrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
-{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- local = dht_local_init (frame, loc, NULL, GF_FOP_ENTRYLK);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+dht_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
- subvol = local->cached_subvol;
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
+ VALIDATE_OR_GOTO(fd->inode, err);
- local->call_cnt = 1;
+ gf_uuid_unparse(fd->inode->gfid, gfid);
- STACK_WIND (frame, dht_entrylk_cbk,
- subvol, subvol->fops->entrylk,
- volume, loc, basename, cmd, type, xdata);
+ subvol = dht_subvol_get_cached(this, fd->inode);
+ if (!subvol) {
+ gf_msg_debug(this->name, 0,
+ "No cached subvolume for fd=%p,"
+ " gfid = %s",
+ fd, gfid);
+ op_errno = EINVAL;
+ goto err;
+ }
- return 0;
+ STACK_WIND(frame, dht_fentrylk_cbk, subvol, subvol->fops->fentrylk, volume,
+ fd, basename, cmd, type, xdata);
+
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (entrylk, frame, -1, op_errno, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(fentrylk, frame, -1, op_errno, NULL);
- return 0;
+ return 0;
}
+static int32_t
+dht_ipc_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
-int
-dht_fentrylk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
+ GF_VALIDATE_OR_GOTO("dht", frame, out);
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, out);
-{
- DHT_STACK_UNWIND (fentrylk, frame, op_ret, op_errno, NULL);
- return 0;
-}
+ local = frame->local;
+ LOCK(&frame->lock);
+ {
+ if (op_ret < 0 && op_errno != ENOTCONN) {
+ local->op_errno = op_errno;
+ goto unlock;
+ }
+ local->op_ret = 0;
+ }
+unlock:
+ UNLOCK(&frame->lock);
-int
-dht_fentrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt)) {
+ DHT_STACK_UNWIND(ipc, frame, local->op_ret, local->op_errno, NULL);
+ }
+
+out:
+ return 0;
+}
+
+int32_t
+dht_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
+ dht_local_t *local = NULL;
+ int op_errno = EINVAL;
+ dht_conf_t *conf = NULL;
+ int call_cnt = 0;
+ int i = 0;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ if (op != GF_IPC_TARGET_UPCALL)
+ goto wind_default;
- STACK_WIND (frame, dht_fentrylk_cbk,
- subvol, subvol->fops->fentrylk,
- volume, fd, basename, cmd, type, xdata);
+ VALIDATE_OR_GOTO(this->private, err);
+ conf = this->private;
- return 0;
+ local = dht_local_init(frame, NULL, NULL, GF_FOP_IPC);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
+
+ if (xdata) {
+ if (dict_set_int8(xdata, conf->xattr_name, 0) < 0)
+ goto err;
+ }
+
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND(frame, dht_ipc_cbk, conf->subvolumes[i],
+ conf->subvolumes[i]->fops->ipc, op, xdata);
+ }
+
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fentrylk, frame, -1, op_errno, NULL);
+ DHT_STACK_UNWIND(ipc, frame, -1, op_errno, NULL);
- return 0;
-}
+ return 0;
+wind_default:
+ STACK_WIND(frame, default_ipc_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ipc, op, xdata);
+ return 0;
+}
int
-dht_forget (xlator_t *this, inode_t *inode)
+dht_forget(xlator_t *this, inode_t *inode)
{
- uint64_t tmp_layout = 0;
- dht_layout_t *layout = NULL;
+ uint64_t ctx_int = 0;
+ dht_inode_ctx_t *ctx = NULL;
+ dht_layout_t *layout = NULL;
- inode_ctx_del (inode, this, &tmp_layout);
-
- if (!tmp_layout)
- return 0;
-
- layout = (dht_layout_t *)(long)tmp_layout;
- dht_layout_unref (this, layout);
+ inode_ctx_del(inode, this, &ctx_int);
+ if (!ctx_int)
return 0;
-}
+ ctx = (dht_inode_ctx_t *)(long)ctx_int;
-int
-dht_notify (xlator_t *this, int event, void *data, ...)
-{
- xlator_t *subvol = NULL;
- int cnt = -1;
- int i = -1;
- dht_conf_t *conf = NULL;
- int ret = -1;
- int propagate = 0;
-
- int had_heard_from_all = 0;
- int have_heard_from_all = 0;
- struct timeval time = {0,};
- gf_defrag_info_t *defrag = NULL;
- dict_t *dict = NULL;
- gf_defrag_type cmd = 0;
- dict_t *output = NULL;
- va_list ap;
-
-
- conf = this->private;
- if (!conf)
- return ret;
+ layout = ctx->layout;
+ ctx->layout = NULL;
+ dht_layout_unref(this, layout);
+ GF_FREE(ctx);
- /* had all subvolumes reported status once till now? */
- had_heard_from_all = 1;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (!conf->last_event[i]) {
- had_heard_from_all = 0;
- }
- }
+ return 0;
+}
- switch (event) {
+int
+dht_notify(xlator_t *this, int event, void *data, ...)
+{
+ xlator_t *subvol = NULL;
+ int cnt = -1;
+ int i = -1;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+ int propagate = 0;
+
+ int had_heard_from_all = 0;
+ int have_heard_from_all = 0;
+ gf_defrag_info_t *defrag = NULL;
+ dict_t *dict = NULL;
+ gf_defrag_type cmd = 0;
+ dict_t *output = NULL;
+ va_list ap;
+ struct gf_upcall *up_data = NULL;
+ struct gf_upcall_cache_invalidation *up_ci = NULL;
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, out);
+
+ /* had all subvolumes reported status once till now? */
+ had_heard_from_all = 1;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (!conf->last_event[i]) {
+ had_heard_from_all = 0;
+ }
+ }
+
+ switch (event) {
case GF_EVENT_CHILD_UP:
- subvol = data;
+ subvol = data;
- conf->gen++;
+ conf->gen++;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (subvol == conf->subvolumes[i]) {
- cnt = i;
- break;
- }
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (subvol == conf->subvolumes[i]) {
+ cnt = i;
+ break;
}
+ }
- if (cnt == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "got GF_EVENT_CHILD_UP bad subvolume %s",
- subvol->name);
- break;
- }
+ if (cnt == -1) {
+ gf_msg_debug(this->name, 0,
+ "got GF_EVENT_CHILD_UP bad "
+ "subvolume %s",
+ subvol->name);
+ break;
+ }
- gettimeofday (&time, NULL);
- LOCK (&conf->subvolume_lock);
- {
- conf->subvolume_status[cnt] = 1;
- conf->last_event[cnt] = event;
- conf->subvol_up_time[cnt] = time.tv_sec;
- }
- UNLOCK (&conf->subvolume_lock);
+ LOCK(&conf->subvolume_lock);
+ {
+ conf->subvolume_status[cnt] = 1;
+ conf->last_event[cnt] = event;
+ conf->subvol_up_time[cnt] = gf_time();
+ }
+ UNLOCK(&conf->subvolume_lock);
- /* one of the node came back up, do a stat update */
- dht_get_du_info_for_subvol (this, cnt);
+ /* one of the node came back up, do a stat update */
+ dht_get_du_info_for_subvol(this, cnt);
- break;
+ break;
- case GF_EVENT_CHILD_MODIFIED:
- subvol = data;
+ case GF_EVENT_SOME_DESCENDENT_UP:
+ subvol = data;
+ conf->gen++;
+ propagate = 1;
- conf->gen++;
- propagate = 1;
+ break;
- break;
+ case GF_EVENT_SOME_DESCENDENT_DOWN:
+ subvol = data;
+ propagate = 1;
- case GF_EVENT_CHILD_DOWN:
- subvol = data;
-
- if (conf->assert_no_child_down) {
- gf_log (this->name, GF_LOG_WARNING,
- "Received CHILD_DOWN. Exiting");
- if (conf->defrag) {
- gf_defrag_stop (conf->defrag, NULL);
- } else {
- kill (getpid(), SIGTERM);
- }
- }
+ break;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (subvol == conf->subvolumes[i]) {
- cnt = i;
- break;
- }
- }
+ case GF_EVENT_CHILD_DOWN:
+ subvol = data;
- if (cnt == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "got GF_EVENT_CHILD_DOWN bad subvolume %s",
- subvol->name);
- break;
+ if (conf->assert_no_child_down) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_CHILD_DOWN,
+ "Received CHILD_DOWN. Exiting");
+ if (conf->defrag) {
+ gf_defrag_stop(conf, GF_DEFRAG_STATUS_FAILED, NULL);
+ } else {
+ kill(getpid(), SIGTERM);
}
+ }
- LOCK (&conf->subvolume_lock);
- {
- conf->subvolume_status[cnt] = 0;
- conf->last_event[cnt] = event;
- conf->subvol_up_time[cnt] = 0;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (subvol == conf->subvolumes[i]) {
+ cnt = i;
+ break;
}
- UNLOCK (&conf->subvolume_lock);
+ }
+ if (cnt == -1) {
+ gf_msg_debug(this->name, 0,
+ "got GF_EVENT_CHILD_DOWN bad "
+ "subvolume %s",
+ subvol->name);
break;
+ }
- case GF_EVENT_CHILD_CONNECTING:
- subvol = data;
+ LOCK(&conf->subvolume_lock);
+ {
+ conf->subvolume_status[cnt] = 0;
+ conf->last_event[cnt] = event;
+ conf->subvol_up_time[cnt] = 0;
+ }
+ UNLOCK(&conf->subvolume_lock);
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (subvol == conf->subvolumes[i]) {
- cnt = i;
- break;
- }
- }
+ for (i = 0; i < conf->subvolume_cnt; i++)
+ if (conf->last_event[i] != event)
+ event = GF_EVENT_SOME_DESCENDENT_DOWN;
+ break;
- if (cnt == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "got GF_EVENT_CHILD_CONNECTING bad subvolume %s",
- subvol->name);
- break;
- }
+ case GF_EVENT_CHILD_CONNECTING:
+ subvol = data;
- LOCK (&conf->subvolume_lock);
- {
- conf->last_event[cnt] = event;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (subvol == conf->subvolumes[i]) {
+ cnt = i;
+ break;
}
- UNLOCK (&conf->subvolume_lock);
+ }
+ if (cnt == -1) {
+ gf_msg_debug(this->name, 0,
+ "got GF_EVENT_CHILD_CONNECTING"
+ " bad subvolume %s",
+ subvol->name);
break;
- case GF_EVENT_VOLUME_DEFRAG:
- {
- if (!conf->defrag) {
- return ret;
- }
- defrag = conf->defrag;
+ }
- dict = data;
- va_start (ap, data);
- output = va_arg (ap, dict_t*);
+ LOCK(&conf->subvolume_lock);
+ {
+ conf->last_event[cnt] = event;
+ }
+ UNLOCK(&conf->subvolume_lock);
- ret = dict_get_int32 (dict, "rebalance-command",
- (int32_t*)&cmd);
- if (ret)
- return ret;
- LOCK (&defrag->lock);
- {
- if (defrag->is_exiting)
- goto unlock;
- if (cmd == GF_DEFRAG_CMD_STATUS)
- gf_defrag_status_get (defrag, output);
- else if (cmd == GF_DEFRAG_CMD_STOP)
- gf_defrag_stop (defrag, output);
- }
-unlock:
- UNLOCK (&defrag->lock);
- return 0;
- break;
- }
+ break;
+ case GF_EVENT_VOLUME_DEFRAG: {
+ if (!conf->defrag) {
+ return ret;
+ }
+ defrag = conf->defrag;
+
+ dict = data;
+ va_start(ap, data);
+ output = va_arg(ap, dict_t *);
+ ret = dict_get_int32(dict, "rebalance-command", (int32_t *)&cmd);
+ if (ret) {
+ va_end(ap);
+ return ret;
+ }
+ LOCK(&defrag->lock);
+ {
+ if (defrag->is_exiting)
+ goto unlock;
+ if ((cmd == GF_DEFRAG_CMD_STATUS) ||
+ (cmd == GF_DEFRAG_CMD_DETACH_STATUS))
+ gf_defrag_status_get(conf, output);
+ else if (cmd == GF_DEFRAG_CMD_DETACH_START)
+ defrag->cmd = GF_DEFRAG_CMD_DETACH_START;
+ else if (cmd == GF_DEFRAG_CMD_STOP ||
+ cmd == GF_DEFRAG_CMD_DETACH_STOP)
+ gf_defrag_stop(conf, GF_DEFRAG_STATUS_STOPPED, output);
+ }
+ unlock:
+ UNLOCK(&defrag->lock);
+ va_end(ap);
+ return ret;
+ break;
+ }
+ case GF_EVENT_UPCALL:
+ up_data = (struct gf_upcall *)data;
+ if (up_data->event_type != GF_UPCALL_CACHE_INVALIDATION)
+ break;
+ up_ci = (struct gf_upcall_cache_invalidation *)up_data->data;
+
+ /* Since md-cache will be aggressively filtering lookups,
+ * the stale layout issue will be more pronounced. Hence
+ * when a layout xattr is changed by the rebalance process
+ * notify all the md-cache clients to invalidate the existing
+ * stat cache and send the lookup next time*/
+ if (up_ci->dict && dict_get(up_ci->dict, conf->xattr_name))
+ up_ci->flags |= UP_EXPLICIT_LOOKUP;
+
+ /* TODO: Instead of invalidating iatt, update the new
+ * hashed/cached subvolume in dht inode_ctx */
+ if (IS_DHT_LINKFILE_MODE(&up_ci->stat))
+ up_ci->flags |= UP_EXPLICIT_LOOKUP;
+
+ propagate = 1;
+ break;
default:
- propagate = 1;
+ propagate = 1;
+ break;
+ }
+
+ /* have all subvolumes reported status once by now? */
+ have_heard_from_all = 1;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (!conf->last_event[i])
+ have_heard_from_all = 0;
+ }
+
+ /* if all subvols have reported status, no need to hide anything
+ or wait for anything else. Just propagate blindly */
+ if (have_heard_from_all) {
+ propagate = 1;
+ }
+
+ if (!had_heard_from_all && have_heard_from_all) {
+ static int run_defrag = 0;
+ /* This is the first event which completes aggregation
+ of events from all subvolumes. If at least one subvol
+ had come up, propagate CHILD_UP, but only this time
+ */
+ event = GF_EVENT_CHILD_DOWN;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->last_event[i] == GF_EVENT_CHILD_UP) {
+ event = GF_EVENT_CHILD_UP;
break;
+ }
+
+ if (conf->last_event[i] == GF_EVENT_CHILD_CONNECTING) {
+ event = GF_EVENT_CHILD_CONNECTING;
+ /* continue to check other events for CHILD_UP */
+ }
}
+ /* Rebalance is started with assert_no_child_down. So we do
+ * not need to handle CHILD_DOWN event here.
+ *
+ * If there is a graph switch, we should not restart the
+ * rebalance daemon. Use 'run_defrag' to indicate if the
+ * thread has already started.
+ */
+ if (conf->defrag && !run_defrag) {
+ run_defrag = 1;
+ ret = gf_thread_create(&conf->defrag->th, NULL, gf_defrag_start,
+ this, "dhtdg");
+ if (ret) {
+ GF_FREE(conf->defrag);
+ conf->defrag = NULL;
+ kill(getpid(), SIGTERM);
+ }
+ }
+ }
+
+ ret = 0;
+ if (propagate)
+ ret = default_notify(this, event, data);
+out:
+ return ret;
+}
+
+int
+dht_inode_ctx_layout_get(inode_t *inode, xlator_t *this, dht_layout_t **layout)
+{
+ dht_inode_ctx_t *ctx = NULL;
+ int ret = -1;
- /* have all subvolumes reported status once by now? */
- have_heard_from_all = 1;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (!conf->last_event[i])
- have_heard_from_all = 0;
- }
+ ret = dht_inode_ctx_get(inode, this, &ctx);
- /* if all subvols have reported status, no need to hide anything
- or wait for anything else. Just propagate blindly */
- if (have_heard_from_all) {
- propagate = 1;
- if (conf->defrag) {
- ret = pthread_create (&conf->defrag->th, NULL,
- gf_defrag_start, this);
- if (ret) {
- conf->defrag = NULL;
- GF_FREE (conf->defrag);
- kill (getpid(), SIGTERM);
- }
- }
+ if (!ret && ctx) {
+ if (ctx->layout) {
+ if (layout)
+ *layout = ctx->layout;
+ ret = 0;
+ } else {
+ ret = -1;
}
+ }
+ return ret;
+}
- if (!had_heard_from_all && have_heard_from_all) {
- /* This is the first event which completes aggregation
- of events from all subvolumes. If at least one subvol
- had come up, propagate CHILD_UP, but only this time
- */
- event = GF_EVENT_CHILD_DOWN;
+void
+dht_log_new_layout_for_dir_selfheal(xlator_t *this, loc_t *loc,
+ dht_layout_t *layout)
+{
+ char string[2048] = {0};
+ char *output_string = NULL;
+ int len = 0;
+ int off = 0;
+ int i = 0;
+ gf_loglevel_t log_level = gf_log_get_loglevel();
+ int ret = 0;
+
+ if (log_level < GF_LOG_INFO)
+ return;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (conf->last_event[i] == GF_EVENT_CHILD_UP) {
- event = GF_EVENT_CHILD_UP;
- break;
- }
+ if (!layout)
+ return;
- if (conf->last_event[i] == GF_EVENT_CHILD_CONNECTING) {
- event = GF_EVENT_CHILD_CONNECTING;
- /* continue to check other events for CHILD_UP */
- }
- }
- }
+ if (!layout->cnt)
+ return;
+
+ if (!loc)
+ return;
+
+ if (!loc->path)
+ return;
+
+ ret = snprintf(string, sizeof(string), "Setting layout of %s with ",
+ loc->path);
+
+ if (ret < 0)
+ return;
+
+ len += ret;
+
+ /* Calculation of total length of the string required to calloc
+ * output_string. Log includes subvolume-name, start-range, end-range
+ * and err value.
+ *
+ * This log will help to debug cases where:
+ * a) Different processes set different layout of a directory.
+ * b) Error captured in lookup, which will be filled in layout->err
+ * (like ENOENT, ESTALE etc)
+ */
+
+ for (i = 0; i < layout->cnt; i++) {
+ ret = snprintf(string, sizeof(string),
+ "[Subvol_name: %s, Err: %d , Start: "
+ "0x%x, Stop: 0x%x, Hash: 0x%x], ",
+ layout->list[i].xlator->name, layout->list[i].err,
+ layout->list[i].start, layout->list[i].stop,
+ layout->list[i].commit_hash);
+
+ if (ret < 0)
+ return;
+
+ len += ret;
+ }
+
+ len++;
+
+ output_string = GF_MALLOC(len + 1, gf_common_mt_char);
+
+ if (!output_string)
+ return;
+
+ ret = snprintf(output_string, len + 1, "Setting layout of %s with ",
+ loc->path);
+
+ if (ret < 0)
+ goto err;
+
+ off += ret;
+
+ for (i = 0; i < layout->cnt; i++) {
+ ret = snprintf(output_string + off, len - off,
+ "[Subvol_name: %s, Err: %d , Start: "
+ "0x%x, Stop: 0x%x, Hash: 0x%x], ",
+ layout->list[i].xlator->name, layout->list[i].err,
+ layout->list[i].start, layout->list[i].stop,
+ layout->list[i].commit_hash);
+
+ if (ret < 0)
+ goto err;
+
+ off += ret;
+ }
+
+ gf_msg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_LOG_FIXED_LAYOUT, "%s",
+ output_string);
+
+err:
+ GF_FREE(output_string);
+}
+
+int32_t
+dht_migration_get_dst_subvol(xlator_t *this, dht_local_t *local)
+{
+ int ret = -1;
+ if (!local)
+ goto out;
+
+ local->rebalance.target_node = dht_subvol_get_hashed(this, &local->loc);
+
+ if (local->rebalance.target_node)
ret = 0;
- if (propagate)
- ret = default_notify (this, event, data);
- return ret;
+out:
+ return ret;
+}
+
+/*
+This function should not be called more then once during a FOP
+handling path. It is valid only for for ops on files
+*/
+int32_t
+dht_set_local_rebalance(xlator_t *this, dht_local_t *local, struct iatt *stbuf,
+ struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
+{
+ if (!local)
+ return -1;
+
+ if (local->rebalance.set) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_REBAL_STRUCT_SET,
+ "local->rebalance already set");
+ }
+
+ if (stbuf)
+ memcpy(&local->rebalance.stbuf, stbuf, sizeof(struct iatt));
+
+ if (prebuf)
+ memcpy(&local->rebalance.prebuf, prebuf, sizeof(struct iatt));
+
+ if (postbuf)
+ memcpy(&local->rebalance.postbuf, postbuf, sizeof(struct iatt));
+
+ if (xdata)
+ local->rebalance.xdata = dict_ref(xdata);
+
+ local->rebalance.set = 1;
+
+ return 0;
+}
+
+int32_t
+dht_release(xlator_t *this, fd_t *fd)
+{
+ return dht_fd_ctx_destroy(this, fd);
+}
+
+static int
+dht_pt_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (!op_ret) {
+ dht_layout_set(this, inode, local->layout);
+ }
+
+ DHT_STACK_UNWIND(mkdir, frame, op_ret, op_errno, inode, stbuf, preparent,
+ postparent, NULL);
+
+ return 0;
+}
+
+int32_t
+dht_pt_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
+{
+ dht_layout_t *layout = NULL;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ bool free_xdata = false;
+ int ret = 0;
+ int op_errno = 0;
+ int32_t *disk_layout_p = NULL;
+
+ conf = this->private;
+
+ local = dht_local_init(frame, loc, NULL, GF_FOP_MKDIR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ layout = dht_layout_new(this, conf->subvolume_cnt);
+ if (!layout)
+ goto wind;
+
+ local->layout = layout;
+
+ if (!xdata) {
+ xdata = dict_new();
+ if (!xdata)
+ goto wind;
+ free_xdata = true;
+ }
+
+ /*Set the xlator or the following will crash*/
+ layout->list[0].xlator = conf->subvolumes[0];
+
+ dht_selfheal_layout_new_directory(frame, loc, layout);
+
+ dht_disk_layout_extract(this, layout, 0, &disk_layout_p);
+
+ ret = dict_set_bin(xdata, conf->xattr_name, disk_layout_p, 4 * 4);
+ if (ret) {
+ gf_msg("dht", GF_LOG_DEBUG, EINVAL, DHT_MSG_DICT_SET_FAILED,
+ "dht layout dict set failed");
+ }
+wind:
+ STACK_WIND(frame, dht_pt_mkdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata);
+ if (free_xdata)
+ dict_unref(xdata);
+ return 0;
+
+err:
+ op_errno = local ? local->op_errno : op_errno;
+ DHT_STACK_UNWIND(mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
+
+ return 0;
+}
+
+static int
+dht_pt_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
+{
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+ dict_del(xattr, conf->xattr_name);
+ dict_del(xattr, conf->mds_xattr_key);
+ dict_del(xattr, conf->commithash_xattr_name);
+
+ if (frame->root->pid >= 0) {
+ GF_REMOVE_INTERNAL_XATTR("trusted.glusterfs.quota*", xattr);
+ GF_REMOVE_INTERNAL_XATTR("trusted.pgfid*", xattr);
+ }
+
+ DHT_STACK_UNWIND(getxattr, frame, op_ret, op_errno, xattr, xdata);
+ return 0;
+}
+
+int
+dht_pt_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *key, dict_t *xdata)
+{
+ STACK_WIND(frame, dht_pt_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr, loc, key, xdata);
+ return 0;
+}
+
+static int
+dht_pt_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
+{
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+ dict_del(xattr, conf->xattr_name);
+
+ if (frame->root->pid >= 0) {
+ GF_REMOVE_INTERNAL_XATTR("trusted.glusterfs.quota*", xattr);
+ GF_REMOVE_INTERNAL_XATTR("trusted.pgfid*", xattr);
+ }
+
+ DHT_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, xattr, xdata);
+ return 0;
+}
+
+int
+dht_pt_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *key,
+ dict_t *xdata)
+{
+ STACK_WIND(frame, dht_pt_fgetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr, fd, key, xdata);
+ return 0;
+}
+
+/* The job of this function is to check if all the xlators have updated
+ * error in the layout. */
+int
+dht_dir_layout_error_check(xlator_t *this, inode_t *inode)
+{
+ dht_layout_t *layout = NULL;
+ int i = 0;
+
+ layout = dht_layout_get(this, inode);
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err == 0) {
+ return 0;
+ }
+ }
+
+ /* Returning the first xlator error as all xlators have errors */
+ return layout->list[0].err;
}
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index d03c0e3e019..fe0dc3db34a 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -8,659 +8,1377 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+#include <regex.h>
#include "dht-mem-types.h"
+#include "dht-messages.h"
+#include <glusterfs/call-stub.h>
#include "libxlator.h"
-#include "syncop.h"
+#include <glusterfs/syncop.h>
+#include <glusterfs/refcount.h>
+#include <glusterfs/timer.h>
+#include "protocol-common.h"
+#include <glusterfs/glusterfs-acl.h>
#ifndef _DHT_H
#define _DHT_H
-#define GF_XATTR_FIX_LAYOUT_KEY "distribute.fix.layout"
-#define GF_DHT_LOOKUP_UNHASHED_ON 1
+#define GF_XATTR_FIX_LAYOUT_KEY "distribute.fix.layout"
+#define GF_XATTR_FILE_MIGRATE_KEY "trusted.distribute.migrate-data"
+#define DHT_MDS_STR "mds"
+#define GF_DHT_LOOKUP_UNHASHED_OFF 0
+#define GF_DHT_LOOKUP_UNHASHED_ON 1
#define GF_DHT_LOOKUP_UNHASHED_AUTO 2
-#define DHT_PATHINFO_HEADER "DISTRIBUTE:"
+#define DHT_PATHINFO_HEADER "DISTRIBUTE:"
+#define DHT_FILE_MIGRATE_DOMAIN "dht.file.migrate"
+/* Layout synchronization */
+#define DHT_LAYOUT_HEAL_DOMAIN "dht.layout.heal"
+/* Namespace synchronization */
+#define DHT_ENTRY_SYNC_DOMAIN "dht.entry.sync"
+#define DHT_LAYOUT_HASH_INVALID 1
+#define MAX_REBAL_THREADS sysconf(_SC_NPROCESSORS_ONLN)
-#include <fnmatch.h>
+#define DHT_DIR_STAT_BLOCKS 8
+#define DHT_DIR_STAT_SIZE 4096
-typedef int (*dht_selfheal_dir_cbk_t) (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- dict_t *xdata);
-typedef int (*dht_defrag_cbk_fn_t) (xlator_t *this, call_frame_t *frame,
- int ret);
+/* Virtual xattr for subvols status */
+#define DHT_SUBVOL_STATUS_KEY "dht.subvol.status"
-struct dht_layout {
- int spread_cnt; /* layout spread count per directory,
- is controlled by 'setxattr()' with
- special key */
- int cnt;
- int preset;
- int gen;
- int type;
- int ref; /* use with dht_conf_t->layout_lock */
- int search_unhashed;
- struct {
- int err; /* 0 = normal
- -1 = dir exists and no xattr
- >0 = dir lookup failed with errno
- */
- uint32_t start;
- uint32_t stop;
- xlator_t *xlator;
- } list[0];
-};
-typedef struct dht_layout dht_layout_t;
+/* Virtual xattrs for debugging */
+#define DHT_DBG_HASHED_SUBVOL_PATTERN "dht.file.hashed-subvol.*"
+#define DHT_DBG_HASHED_SUBVOL_KEY "dht.file.hashed-subvol."
-typedef enum {
- DHT_HASH_TYPE_DM,
-} dht_hashfn_type_t;
+/* Rebalance nodeuuid flags */
+#define REBAL_NODEUUID_MINE 0x01
-/* rebalance related */
-struct dht_rebalance_ {
- xlator_t *from_subvol;
- xlator_t *target_node;
- off_t offset;
- size_t size;
- int32_t flags;
- int count;
- struct iobref *iobref;
- struct iovec *vector;
- struct iatt stbuf;
- dht_defrag_cbk_fn_t target_op_fn;
- dict_t *xdata;
-};
+typedef int (*dht_selfheal_dir_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+typedef int (*dht_defrag_cbk_fn_t)(xlator_t *this, xlator_t *dst_node,
+ call_frame_t *frame, int ret);
-struct dht_local {
- int call_cnt;
- loc_t loc;
- loc_t loc2;
- int op_ret;
- int op_errno;
- int layout_mismatch;
- /* Use stbuf as the postbuf, when we require both
- * pre and post attrs */
- struct iatt stbuf;
- struct iatt prebuf;
- struct iatt preoldparent;
- struct iatt postoldparent;
- struct iatt preparent;
- struct iatt postparent;
- struct statvfs statvfs;
- fd_t *fd;
- inode_t *inode;
- dict_t *params;
- dict_t *xattr;
- dict_t *xattr_req;
- dht_layout_t *layout;
- size_t size;
- ino_t ia_ino;
- xlator_t *src_hashed, *src_cached;
- xlator_t *dst_hashed, *dst_cached;
- xlator_t *cached_subvol;
- xlator_t *hashed_subvol;
- char need_selfheal;
- int file_count;
- int dir_count;
- call_frame_t *main_frame;
- int fop_succeeded;
- struct {
- fop_mknod_cbk_t linkfile_cbk;
- struct iatt stbuf;
- loc_t loc;
- inode_t *inode;
- dict_t *xattr;
- xlator_t *srcvol;
- } linkfile;
- struct {
- uint32_t hole_cnt;
- uint32_t overlaps_cnt;
- uint32_t down;
- uint32_t misc;
- dht_selfheal_dir_cbk_t dir_cbk;
- dht_layout_t *layout;
- } selfheal;
- uint32_t uid;
- uint32_t gid;
+typedef int (*dht_refresh_layout_unlock)(call_frame_t *frame, xlator_t *this,
+ int op_ret, int invoke_cbk);
- /* needed by nufa */
- int32_t flags;
- mode_t mode;
- dev_t rdev;
- mode_t umask;
+typedef int (*dht_refresh_layout_done_handle)(call_frame_t *frame);
- /* need for file-info */
- char *xattr_val;
- char *key;
+struct dht_layout {
+ int spread_cnt; /* layout spread count per directory,
+ is controlled by 'setxattr()' with
+ special key */
+ int cnt;
+ int preset;
+ /*
+ * The last *configuration* state for which this directory was known
+ * to be in balance. The corresponding vol_commit_hash changes
+ * whenever bricks are added or removed. This value changes when a
+ * (full) rebalance is complete. If they match, it's safe to assume
+ * that every file is where it should be and there's no need to do
+ * lookups for files elsewhere. If they don't, then we have to do a
+ * global lookup to be sure.
+ */
+ uint32_t commit_hash;
+ /*
+ * The *runtime* state of the volume, changes when connections to
+ * bricks are made or lost.
+ */
+ int gen;
+ int type;
+ gf_atomic_t ref; /* use with dht_conf_t->layout_lock */
+ uint32_t search_unhashed;
+ struct {
+ int err; /* 0 = normal
+ -1 = dir exists and no xattr
+ >0 = dir lookup failed with errno
+ */
+ uint32_t start;
+ uint32_t stop;
+ uint32_t commit_hash;
+ xlator_t *xlator;
+ } list[];
+};
+typedef struct dht_layout dht_layout_t;
+
+struct dht_stat_time {
+ uint32_t atime;
+ uint32_t atime_nsec;
+ uint32_t ctime;
+ uint32_t ctime_nsec;
+ uint32_t mtime;
+ uint32_t mtime_nsec;
+};
+
+typedef struct dht_stat_time dht_stat_time_t;
+
+struct dht_inode_ctx {
+ dht_layout_t *layout;
+ dht_stat_time_t time;
+ xlator_t *lock_subvol;
+ xlator_t *mds_subvol; /* This is only used for directories */
+};
- /* which xattr request? */
- char xsel[256];
+typedef struct dht_inode_ctx dht_inode_ctx_t;
- char *newpath;
+typedef enum {
+ DHT_HASH_TYPE_DM,
+ DHT_HASH_TYPE_DM_USER,
+} dht_hashfn_type_t;
- /* gfid related */
- uuid_t gfid;
+typedef enum {
+ DHT_INODELK,
+ DHT_ENTRYLK,
+} dht_lock_type_t;
- /*Marker Related*/
- struct marker_str marker;
+/* rebalance related */
+struct dht_rebalance_ {
+ xlator_t *from_subvol;
+ xlator_t *target_node;
+ off_t offset;
+ size_t size;
+ int32_t flags;
+ int count;
+ struct iobref *iobref;
+ struct iovec *vector;
+ struct iatt stbuf;
+ struct iatt prebuf;
+ struct iatt postbuf;
+ dht_defrag_cbk_fn_t target_op_fn;
+ dict_t *xdata;
+ dict_t *xattr;
+ dict_t *dict;
+ struct gf_flock flock;
+ int32_t set;
+ int lock_cmd;
+};
- /* flag used to make sure we need to return estale in
- {lookup,revalidate}_cbk */
- char return_estale;
- char need_lookup_everywhere;
+/**
+ * Enum to store decided action based on the qdstatfs (quota-deem-statfs)
+ * events
+ **/
+typedef enum {
+ qdstatfs_action_OFF = 0,
+ qdstatfs_action_REPLACE,
+ qdstatfs_action_NEGLECT,
+ qdstatfs_action_COMPARE,
+} qdstatfs_action_t;
- glusterfs_fop_t fop;
+typedef enum {
+ REACTION_INVALID,
+ FAIL_ON_ANY_ERROR,
+ IGNORE_ENOENT_ESTALE,
+ IGNORE_ENOENT_ESTALE_EIO,
+} dht_reaction_type_t;
+
+struct dht_skip_linkto_unlink {
+ xlator_t *hash_links_to;
+ uuid_t cached_gfid;
+ uuid_t hashed_gfid;
+ int opend_fd_count;
+ gf_boolean_t handle_valid_link;
+};
- struct dht_rebalance_ rebalance;
+typedef struct {
+ xlator_t *xl;
+ loc_t loc; /* contains/points to inode to lock on. */
+ char *domain; /* Only locks within a single domain
+ * contend with each other
+ */
+ char *basename; /* Required for entrylk */
+ gf_boolean_t locked;
+ dht_reaction_type_t do_on_failure;
+ short type; /* read/write lock. */
+ gf_lkowner_t lk_owner;
+} dht_lock_t;
+
+/* The lock structure represents inodelk. */
+typedef struct {
+ fop_inodelk_cbk_t inodelk_cbk;
+ dht_lock_t **locks;
+ int lk_count;
+ dht_reaction_type_t reaction;
+
+ /* whether locking failed on _any_ of the "locks" above */
+ int op_ret;
+ int op_errno;
+} dht_ilock_wrap_t;
+
+/* The lock structure represents entrylk. */
+typedef struct {
+ fop_entrylk_cbk_t entrylk_cbk;
+ dht_lock_t **locks;
+ int lk_count;
+ dht_reaction_type_t reaction;
+
+ /* whether locking failed on _any_ of the "locks" above */
+ int op_ret;
+ int op_errno;
+} dht_elock_wrap_t;
+
+/* The first member of dht_dir_transaction_t should be of type dht_ilock_wrap_t.
+ * Otherwise it can result in subtle memory corruption issues as in most of the
+ * places we use lock[0].layout.my_layout or lock[0].layout.parent_layout and
+ * lock[0].ns.parent_layout (like in dht_local_wipe).
+ */
+typedef union {
+ union {
+ dht_ilock_wrap_t my_layout;
+ dht_ilock_wrap_t parent_layout;
+ } layout;
+ struct dht_namespace {
+ dht_ilock_wrap_t parent_layout;
+ dht_elock_wrap_t directory_ns;
+ fop_entrylk_cbk_t ns_cbk;
+ } ns;
+} dht_dir_transaction_t;
+
+typedef int (*dht_selfheal_layout_t)(call_frame_t *frame, loc_t *loc,
+ dht_layout_t *layout);
+
+typedef gf_boolean_t (*dht_need_heal_t)(call_frame_t *frame,
+ dht_layout_t **inmem,
+ dht_layout_t **ondisk);
+struct dht_local {
+ loc_t loc;
+ loc_t loc2;
+ int call_cnt;
+ int op_ret;
+ int op_errno;
+ int layout_mismatch;
+ /* Use stbuf as the postbuf, when we require both
+ * pre and post attrs */
+ struct iatt stbuf;
+ struct iatt mds_stbuf;
+ struct iatt prebuf;
+ struct iatt preoldparent;
+ struct iatt postoldparent;
+ struct iatt preparent;
+ struct iatt postparent;
+ struct statvfs statvfs;
+ fd_t *fd;
+ inode_t *inode;
+ dict_t *params;
+ dict_t *xattr;
+ dict_t *mds_xattr;
+ dict_t *xdata; /* dict used to save xdata response by xattr fop */
+ dict_t *xattr_req;
+ dht_layout_t *layout;
+ size_t size;
+ ino_t ia_ino;
+ xlator_t *src_hashed, *src_cached;
+ xlator_t *dst_hashed, *dst_cached;
+ xlator_t *cached_subvol;
+ xlator_t *hashed_subvol;
+ xlator_t *mds_subvol; /* This is use for dir only */
+ int file_count;
+ int dir_count;
+ call_frame_t *main_frame;
+ int fop_succeeded;
+ struct {
+ fop_mknod_cbk_t linkfile_cbk;
+ struct iatt stbuf;
+ loc_t loc;
+ inode_t *inode;
+ dict_t *xattr;
+ xlator_t *srcvol;
+ } linkfile;
+ struct {
+ uint32_t hole_cnt;
+ uint32_t overlaps_cnt;
+ uint32_t down;
+ uint32_t misc;
+ dht_selfheal_dir_cbk_t dir_cbk;
+ dht_selfheal_layout_t healer;
+ dht_need_heal_t should_heal;
+ dht_layout_t *layout, *refreshed_layout;
+ uint32_t missing_cnt;
+ gf_boolean_t force_mkdir;
+ } selfheal;
+
+ dht_refresh_layout_unlock refresh_layout_unlock;
+ dht_refresh_layout_done_handle refresh_layout_done;
+
+ uint32_t uid;
+ uint32_t gid;
+ pid_t pid;
+
+ glusterfs_fop_t fop;
+
+ /* need for file-info */
+ char *xattr_val;
+ char *key;
+
+ /* needed by nufa */
+ int32_t flags;
+ mode_t mode;
+ dev_t rdev;
+ mode_t umask;
+
+ /* which xattr request? */
+ char xsel[256];
+ int32_t alloc_len;
+
+ /* gfid related */
+ uuid_t gfid;
+ uuid_t gfid_req;
+
+ xlator_t *link_subvol;
+
+ struct dht_rebalance_ rebalance;
+ xlator_t *first_up_subvol;
+
+ struct dht_skip_linkto_unlink skip_unlink;
+
+ dht_dir_transaction_t lock[2], *current;
+
+ /* inodelks during filerename for backward compatibility */
+ dht_lock_t **rename_inodelk_backward_compatible;
+
+ call_stub_t *stub;
+ int32_t parent_disk_layout[4];
+
+ /* rename rollback */
+ int *ret_cache;
+
+ loc_t loc2_copy;
+
+ int rename_inodelk_bc_count;
+ /* This is use only for directory operation */
+ int32_t valid;
+ int32_t mds_heal_fresh_lookup;
+ short lock_type;
+ char need_selfheal;
+ char need_xattr_heal;
+ char need_attrheal;
+ /* flag used to make sure we need to return estale in
+ {lookup,revalidate}_cbk */
+ char return_estale;
+ char need_lookup_everywhere;
+ /* fd open check */
+ gf_boolean_t fd_checked;
+ gf_boolean_t linked;
+ gf_boolean_t added_link;
+ gf_boolean_t is_linkfile;
+ gf_boolean_t quota_deem_statfs;
+ gf_boolean_t heal_layout;
+ gf_boolean_t locked;
+ gf_boolean_t dont_create_linkto;
+ gf_boolean_t gfid_missing;
};
typedef struct dht_local dht_local_t;
/* du - disk-usage */
struct dht_du {
- double avail_percent;
- double avail_inodes;
- uint64_t avail_space;
- uint32_t log;
+ double avail_percent;
+ double avail_inodes;
+ uint64_t avail_space;
+ uint32_t log;
+ uint32_t chunks;
+ uint32_t total_blocks;
+ uint32_t avail_blocks;
+ uint32_t frsize; /*fragment size*/
};
typedef struct dht_du dht_du_t;
enum gf_defrag_type {
- GF_DEFRAG_CMD_START = 1,
- GF_DEFRAG_CMD_STOP = 1 + 1,
- GF_DEFRAG_CMD_STATUS = 1 + 2,
- GF_DEFRAG_CMD_START_LAYOUT_FIX = 1 + 3,
- GF_DEFRAG_CMD_START_FORCE = 1 + 4,
+ GF_DEFRAG_CMD_NONE = 0,
+ GF_DEFRAG_CMD_START = 1,
+ GF_DEFRAG_CMD_STOP = 1 + 1,
+ GF_DEFRAG_CMD_STATUS = 1 + 2,
+ GF_DEFRAG_CMD_START_LAYOUT_FIX = 1 + 3,
+ GF_DEFRAG_CMD_START_FORCE = 1 + 4,
+ GF_DEFRAG_CMD_DETACH_STATUS = 1 + 11,
+ GF_DEFRAG_CMD_DETACH_START = 1 + 13,
+ GF_DEFRAG_CMD_DETACH_COMMIT = 1 + 14,
+ GF_DEFRAG_CMD_DETACH_COMMIT_FORCE = 1 + 15,
+ GF_DEFRAG_CMD_DETACH_STOP = 1 + 16,
+ /* new labels are used so it will help
+ * while removing old labels by easily differentiating.
+ * A few labels are added so that the count remains same
+ * between this enum and the ones on the xdr file.
+ * different values for the same enum cause errors and
+ * confusion.
+ */
};
typedef enum gf_defrag_type gf_defrag_type;
enum gf_defrag_status_t {
- GF_DEFRAG_STATUS_NOT_STARTED,
- GF_DEFRAG_STATUS_STARTED,
- GF_DEFRAG_STATUS_STOPPED,
- GF_DEFRAG_STATUS_COMPLETE,
- GF_DEFRAG_STATUS_FAILED,
+ GF_DEFRAG_STATUS_NOT_STARTED,
+ GF_DEFRAG_STATUS_STARTED,
+ GF_DEFRAG_STATUS_STOPPED,
+ GF_DEFRAG_STATUS_COMPLETE,
+ GF_DEFRAG_STATUS_FAILED,
+ GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED,
+ GF_DEFRAG_STATUS_LAYOUT_FIX_STOPPED,
+ GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE,
+ GF_DEFRAG_STATUS_LAYOUT_FIX_FAILED,
};
typedef enum gf_defrag_status_t gf_defrag_status_t;
+typedef struct gf_defrag_pattern_list gf_defrag_pattern_list_t;
-struct gf_defrag_info_ {
- uint64_t total_files;
- uint64_t total_data;
- uint64_t num_files_lookedup;
- uint64_t total_failures;
- gf_lock_t lock;
- int cmd;
- pthread_t th;
- gf_defrag_status_t defrag_status;
- struct rpc_clnt *rpc;
- uint32_t connected;
- uint32_t is_exiting;
- pid_t pid;
- inode_t *root_inode;
- uuid_t node_uuid;
+struct gf_defrag_pattern_list {
+ char path_pattern[256];
+ uint64_t size;
+ gf_defrag_pattern_list_t *next;
+};
+struct dht_container {
+ union {
+ struct list_head list;
+ struct {
+ struct _gf_dirent_t *next;
+ struct _gf_dirent_t *prev;
+ };
+ };
+ gf_dirent_t *df_entry;
+ xlator_t *this;
+ loc_t *parent_loc;
+ dict_t *migrate_data;
+ int local_subvol_index;
+};
+
+typedef struct nodeuuid_info {
+ char info; /* Set to 1 is this is my node's uuid*/
+ uuid_t uuid; /* Store the nodeuuid as well for debugging*/
+} nodeuuid_info_t;
+
+typedef struct subvol_nodeuuids_info {
+ nodeuuid_info_t *elements;
+ int count;
+} subvol_nodeuuids_info_t;
+
+struct gf_defrag_info_ {
+ uint64_t total_files;
+ uint64_t total_data;
+ uint64_t num_files_lookedup;
+ uint64_t total_failures;
+ uint64_t skipped;
+ uint64_t num_dirs_processed;
+ uint64_t size_processed;
+ gf_lock_t lock;
+ pthread_t th;
+ struct rpc_clnt *rpc;
+ uint32_t connected;
+ uint32_t is_exiting;
+ pid_t pid;
+ int cmd;
+ inode_t *root_inode;
+ uuid_t node_uuid;
+ time_t start_time;
+ uint32_t new_commit_hash;
+ gf_defrag_status_t defrag_status;
+ gf_defrag_pattern_list_t *defrag_pattern;
+
+ pthread_cond_t parallel_migration_cond;
+ pthread_mutex_t dfq_mutex;
+ pthread_cond_t rebalance_crawler_alarm;
+ int32_t q_entry_count;
+ int32_t global_error;
+ struct dht_container *queue;
+ int32_t crawl_done;
+ int32_t abort;
+ int32_t wakeup_crawler;
+
+ /*Throttle params*/
+ /*stands for reconfigured thread count*/
+ int32_t recon_thread_count;
+ pthread_cond_t df_wakeup_thread;
+
+ /* backpointer to make it easier to write functions for rebalance */
+ xlator_t *this;
+
+ pthread_cond_t fc_wakeup_cond;
+ pthread_mutex_t fc_mutex;
+
+ /*stands for current running thread count*/
+ int32_t current_thread_count;
+
+ gf_boolean_t stats;
+ /* lock migration flag */
+ gf_boolean_t lock_migration_enabled;
};
typedef struct gf_defrag_info_ gf_defrag_info_t;
+struct dht_methods_s {
+ int32_t (*migration_get_dst_subvol)(xlator_t *this, dht_local_t *local);
+ int32_t (*migration_other)(xlator_t *this, gf_defrag_info_t *defrag);
+ xlator_t *(*layout_search)(xlator_t *this, dht_layout_t *layout,
+ const char *name);
+};
+
+typedef struct dht_methods_s dht_methods_t;
+
struct dht_conf {
- gf_lock_t subvolume_lock;
- int subvolume_cnt;
- xlator_t **subvolumes;
- char *subvolume_status;
- int *last_event;
- dht_layout_t **file_layouts;
- dht_layout_t **dir_layouts;
- gf_boolean_t search_unhashed;
- int gen;
- dht_du_t *du_stats;
- double min_free_disk;
- double min_free_inodes;
- char disk_unit;
- int32_t refresh_interval;
- gf_boolean_t unhashed_sticky_bit;
- struct timeval last_stat_fetch;
- gf_lock_t layout_lock;
- void *private; /* Can be used by wrapper xlators over
- dht */
- gf_boolean_t use_readdirp;
- char vol_uuid[UUID_SIZE + 1];
- gf_boolean_t assert_no_child_down;
- time_t *subvol_up_time;
-
- /* This is the count used as the distribute layout for a directory */
- /* Will be a global flag to control the layout spread count */
- uint32_t dir_spread_cnt;
-
- /* to keep track of nodes which are decomissioned */
- xlator_t **decommissioned_bricks;
- int decommission_in_progress;
-
- /* defrag related */
- gf_defrag_info_t *defrag;
-
- /* Request to filter directory entries in readdir request */
-
- gf_boolean_t readdir_optimize;
+ xlator_t **subvolumes;
+ char *subvolume_status;
+ int *last_event;
+ dht_layout_t **file_layouts;
+ dht_layout_t **dir_layouts;
+ unsigned int search_unhashed;
+ int gen;
+ dht_du_t *du_stats;
+ double min_free_disk;
+ double min_free_inodes;
+ int subvolume_cnt;
+ int32_t refresh_interval;
+ gf_lock_t subvolume_lock;
+ time_t last_stat_fetch;
+ gf_lock_t layout_lock;
+ dict_t *leaf_to_subvol;
+ void *private; /* Can be used by wrapper xlators over
+ dht */
+ time_t *subvol_up_time;
+
+ /* to keep track of nodes which are decommissioned */
+ xlator_t **decommissioned_bricks;
+ int decommission_in_progress;
+ int decommission_subvols_cnt;
+
+ /* defrag related */
+ gf_defrag_info_t *defrag;
+
+ /* Support regex-based name reinterpretation. */
+ regex_t rsync_regex;
+ regex_t extra_regex;
+
+ /* Support variable xattr names. */
+ char *xattr_name;
+ char *mds_xattr_key;
+ char *link_xattr_name;
+ char *commithash_xattr_name;
+ char *wild_xattr_name;
+
+ dht_methods_t methods;
+
+ struct mem_pool *lock_pool;
+
+ /*local subvol storage for rebalance*/
+ xlator_t **local_subvols;
+ subvol_nodeuuids_info_t *local_nodeuuids;
+ int32_t local_subvols_cnt;
+
+ int dthrottle;
+
+ /* Hard link handle requirement for migration triggered from client*/
+ synclock_t link_lock;
+
+ /* lock migration */
+ gf_lock_t lock;
+
+ /* This is the count used as the distribute layout for a directory */
+ /* Will be a global flag to control the layout spread count */
+ uint32_t dir_spread_cnt;
+
+ /*
+ * "Commit hash" for this volume topology. Changed whenever bricks
+ * are added or removed.
+ */
+ uint32_t vol_commit_hash;
+
+ char vol_uuid[UUID_SIZE + 1];
+
+ char disk_unit;
+
+ gf_boolean_t lock_migration_enabled;
+
+ gf_boolean_t vch_forced;
+
+ gf_boolean_t use_fallocate;
+
+ gf_boolean_t force_migration;
+
+ gf_boolean_t lookup_optimize;
+
+ gf_boolean_t unhashed_sticky_bit;
+
+ gf_boolean_t assert_no_child_down;
+
+ gf_boolean_t use_readdirp;
+
+ /* Request to filter directory entries in readdir request */
+ gf_boolean_t readdir_optimize;
+
+ gf_boolean_t rsync_regex_valid;
+
+ gf_boolean_t extra_regex_valid;
+
+ /* Support size-weighted rebalancing (heterogeneous bricks). */
+ gf_boolean_t do_weighting;
+
+ gf_boolean_t randomize_by_gfid;
};
typedef struct dht_conf dht_conf_t;
+struct dht_dfoffset_ctx {
+ xlator_t *this;
+ off_t offset;
+ int32_t readdir_done;
+};
+typedef struct dht_dfoffset_ctx dht_dfoffset_ctx_t;
struct dht_disk_layout {
- uint32_t cnt;
- uint32_t type;
- struct {
- uint32_t start;
- uint32_t stop;
- } list[1];
+ uint32_t cnt;
+ uint32_t type;
+ struct {
+ uint32_t start;
+ uint32_t stop;
+ } list[1];
};
typedef struct dht_disk_layout dht_disk_layout_t;
typedef enum {
- GF_DHT_MIGRATE_DATA,
- GF_DHT_MIGRATE_DATA_EVEN_IF_LINK_EXISTS,
- GF_DHT_MIGRATE_HARDLINK,
- GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS
+ GF_DHT_MIGRATE_DATA,
+ GF_DHT_MIGRATE_DATA_EVEN_IF_LINK_EXISTS,
+ GF_DHT_MIGRATE_HARDLINK,
+ GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS
} gf_dht_migrate_data_type_t;
+typedef enum {
+ GF_DHT_EQUAL_DISTRIBUTION,
+ GF_DHT_WEIGHTED_DISTRIBUTION
+} dht_distribution_type_t;
+
+struct dir_dfmeta {
+ gf_dirent_t *equeue;
+ dht_dfoffset_ctx_t *offset_var;
+ struct list_head **head;
+ struct list_head **iterator;
+ int *fetch_entries;
+ /* fds corresponding to local subvols only */
+ fd_t **lfd;
+};
+
+typedef struct dht_migrate_info {
+ xlator_t *src_subvol;
+ xlator_t *dst_subvol;
+ GF_REF_DECL;
+} dht_migrate_info_t;
+
+typedef struct dht_fd_ctx {
+ uint64_t opened_on_dst;
+ GF_REF_DECL;
+} dht_fd_ctx_t;
+
#define ENTRY_MISSING(op_ret, op_errno) (op_ret == -1 && op_errno == ENOENT)
-#define is_revalidate(loc) (inode_ctx_get (loc->inode, this, NULL) == 0)
+#define is_revalidate(loc) \
+ (dht_inode_ctx_layout_get((loc)->inode, this, NULL) == 0)
#define is_last_call(cnt) (cnt == 0)
#define DHT_MIGRATION_IN_PROGRESS 1
-#define DHT_MIGRATION_COMPLETED 2
+#define DHT_MIGRATION_COMPLETED 2
-#define DHT_LINKFILE_KEY "trusted.glusterfs.dht.linkto"
-#define DHT_LINKFILE_MODE (S_ISVTX)
+#define check_is_linkfile(i, s, x, n) \
+ (IS_DHT_LINKFILE_MODE(s) && dict_get(x, n))
-#define check_is_linkfile(i,s,x) ( \
- ((st_mode_from_ia ((s)->ia_prot, (s)->ia_type) & ~S_IFMT) \
- == DHT_LINKFILE_MODE) && \
- dict_get (x, DHT_LINKFILE_KEY))
+#define IS_DHT_MIGRATION_PHASE2(buf) \
+ (IA_ISREG((buf)->ia_type) && \
+ ((st_mode_from_ia((buf)->ia_prot, (buf)->ia_type) & ~S_IFMT) == \
+ DHT_LINKFILE_MODE))
-#define IS_DHT_MIGRATION_PHASE2(buf) ( \
- IA_ISREG ((buf)->ia_type) && \
- ((st_mode_from_ia ((buf)->ia_prot, (buf)->ia_type) & \
- ~S_IFMT) == DHT_LINKFILE_MODE))
+#define IS_DHT_MIGRATION_PHASE1(buf) \
+ (IA_ISREG((buf)->ia_type) && ((buf)->ia_prot.sticky == 1) && \
+ ((buf)->ia_prot.sgid == 1))
-#define IS_DHT_MIGRATION_PHASE1(buf) ( \
- IA_ISREG ((buf)->ia_type) && \
- ((buf)->ia_prot.sticky == 1) && \
- ((buf)->ia_prot.sgid == 1))
+#define DHT_STRIP_PHASE1_FLAGS(buf) \
+ do { \
+ if ((buf) && IS_DHT_MIGRATION_PHASE1(buf)) { \
+ (buf)->ia_prot.sticky = 0; \
+ (buf)->ia_prot.sgid = 0; \
+ } \
+ } while (0)
-#define DHT_STRIP_PHASE1_FLAGS(buf) do { \
- if ((buf) && IS_DHT_MIGRATION_PHASE1(buf)) { \
- (buf)->ia_prot.sticky = 0; \
- (buf)->ia_prot.sgid = 0; \
- } \
- } while (0)
+#define dht_inode_missing(op_errno) (op_errno == ENOENT || op_errno == ESTALE)
-#define check_is_dir(i,s,x) (IA_ISDIR(s->ia_type))
+#define check_is_dir(i, s, x) (IA_ISDIR(s->ia_type))
#define layout_is_sane(layout) ((layout) && (layout->cnt > 0))
-#define DHT_STACK_UNWIND(fop, frame, params ...) do { \
- dht_local_t *__local = NULL; \
- xlator_t *__xl = NULL; \
- if (frame) { \
- __xl = frame->this; \
- __local = frame->local; \
- frame->local = NULL; \
- } \
- STACK_UNWIND_STRICT (fop, frame, params); \
- dht_local_wipe (__xl, __local); \
- } while (0)
-
-#define DHT_STACK_DESTROY(frame) do { \
- dht_local_t *__local = NULL; \
- xlator_t *__xl = NULL; \
- __xl = frame->this; \
- __local = frame->local; \
- frame->local = NULL; \
- STACK_DESTROY (frame->root); \
- dht_local_wipe (__xl, __local); \
- } while (0)
-
-dht_layout_t *dht_layout_new (xlator_t *this, int cnt);
-dht_layout_t *dht_layout_get (xlator_t *this, inode_t *inode);
-dht_layout_t *dht_layout_for_subvol (xlator_t *this, xlator_t *subvol);
-xlator_t *dht_layout_search (xlator_t *this, dht_layout_t *layout,
- const char *name);
-int dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout);
-int dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout,
- uint32_t *holes_p, uint32_t *overlaps_p,
- uint32_t *missing_p, uint32_t *down_p,
- uint32_t *misc_p, uint32_t *no_space_p);
-int dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout,
- xlator_t *subvol, loc_t *loc, dict_t *xattr);
-
-xlator_t *dht_linkfile_subvol (xlator_t *this, inode_t *inode,
- struct iatt *buf, dict_t *xattr);
-int dht_linkfile_unlink (call_frame_t *frame, xlator_t *this,
- xlator_t *subvol, loc_t *loc);
-
-int dht_layouts_init (xlator_t *this, dht_conf_t *conf);
-int dht_layout_merge (xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
- int op_ret, int op_errno, dict_t *xattr);
-
-int dht_disk_layout_extract (xlator_t *this, dht_layout_t *layout,
- int pos, int32_t **disk_layout_p);
-int dht_disk_layout_merge (xlator_t *this, dht_layout_t *layout,
- int pos, void *disk_layout_raw, int disk_layout_len);
-
-
-int dht_frame_return (call_frame_t *frame);
-
-int dht_itransform (xlator_t *this, xlator_t *subvol, uint64_t x, uint64_t *y);
-int dht_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol,
- uint64_t *x);
-
-void dht_local_wipe (xlator_t *this, dht_local_t *local);
-dht_local_t *dht_local_init (call_frame_t *frame, loc_t *loc, fd_t *fd,
- glusterfs_fop_t fop);
-int dht_iatt_merge (xlator_t *this, struct iatt *to, struct iatt *from,
- xlator_t *subvol);
-
-xlator_t *dht_subvol_get_hashed (xlator_t *this, loc_t *loc);
-xlator_t *dht_subvol_get_cached (xlator_t *this, inode_t *inode);
-xlator_t *dht_subvol_next (xlator_t *this, xlator_t *prev);
-int dht_subvol_cnt (xlator_t *this, xlator_t *subvol);
-
-int dht_hash_compute (int type, const char *name, uint32_t *hash_p);
-
-int dht_linkfile_create (call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk,
- xlator_t *tovol, xlator_t *fromvol, loc_t *loc);
-int dht_lookup_directory (call_frame_t *frame, xlator_t *this, loc_t *loc);
-int dht_lookup_everywhere (call_frame_t *frame, xlator_t *this, loc_t *loc);
-int
-dht_selfheal_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
- loc_t *loc, dht_layout_t *layout);
-int
-dht_selfheal_new_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
- dht_layout_t *layout);
-int
-dht_selfheal_restore (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
- loc_t *loc, dht_layout_t *layout);
-int
-dht_layout_sort_volname (dht_layout_t *layout);
-
-int dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc);
-
-gf_boolean_t dht_is_subvol_filled (xlator_t *this, xlator_t *subvol);
-xlator_t *dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol);
-int dht_get_du_info_for_subvol (xlator_t *this, int subvol_idx);
-
-int dht_layout_preset (xlator_t *this, xlator_t *subvol, inode_t *inode);
-int dht_layout_set (xlator_t *this, inode_t *inode, dht_layout_t *layout);;
-void dht_layout_unref (xlator_t *this, dht_layout_t *layout);
-dht_layout_t *dht_layout_ref (xlator_t *this, dht_layout_t *layout);
-xlator_t *dht_first_up_subvol (xlator_t *this);
-xlator_t *dht_last_up_subvol (xlator_t *this);
-
-int dht_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name);
-
-int dht_filter_loc_subvol_key (xlator_t *this, loc_t *loc, loc_t *new_loc,
- xlator_t **subvol);
-
-int dht_rename_cleanup (call_frame_t *frame);
-int dht_rename_links_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata);
-
-int dht_fix_directory_layout (call_frame_t *frame,
- dht_selfheal_dir_cbk_t dir_cbk,
- dht_layout_t *layout);
-
-int dht_init_subvolumes (xlator_t *this, dht_conf_t *conf);
+#define we_are_not_migrating(x) ((x) == 1)
+
+#define DHT_STACK_UNWIND(fop, frame, params...) \
+ do { \
+ dht_local_t *__local = NULL; \
+ xlator_t *__xl = NULL; \
+ if (frame) { \
+ __xl = frame->this; \
+ __local = frame->local; \
+ frame->local = NULL; \
+ } \
+ STACK_UNWIND_STRICT(fop, frame, params); \
+ dht_local_wipe(__xl, __local); \
+ } while (0)
+
+#define DHT_STACK_DESTROY(frame) \
+ do { \
+ dht_local_t *__local = NULL; \
+ xlator_t *__xl = NULL; \
+ __xl = frame->this; \
+ __local = frame->local; \
+ frame->local = NULL; \
+ STACK_DESTROY(frame->root); \
+ dht_local_wipe(__xl, __local); \
+ } while (0)
+
+#define DHT_UPDATE_TIME(ctx_sec, ctx_nsec, new_sec, new_nsec, post) \
+ do { \
+ if (ctx_sec == new_sec) \
+ new_nsec = max(new_nsec, ctx_nsec); \
+ else if (ctx_sec > new_sec) { \
+ new_sec = ctx_sec; \
+ new_nsec = ctx_nsec; \
+ } \
+ if (post) { \
+ ctx_sec = new_sec; \
+ ctx_nsec = new_nsec; \
+ } \
+ } while (0)
+
+#define is_greater_time(a, an, b, bn) \
+ (((a) < (b)) || (((a) == (b)) && ((an) < (bn))))
+
+#define DHT_MARK_FOP_INTERNAL(xattr) \
+ do { \
+ int tmp = -1; \
+ if (!xattr) { \
+ xattr = dict_new(); \
+ if (!xattr) \
+ break; \
+ } \
+ tmp = dict_set_str(xattr, GLUSTERFS_INTERNAL_FOP_KEY, "yes"); \
+ if (tmp) { \
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, \
+ "Failed to set dictionary value: key = %s," \
+ " path = %s", \
+ GLUSTERFS_INTERNAL_FOP_KEY, local->loc.path); \
+ } \
+ } while (0)
+
+dht_layout_t *
+dht_layout_new(xlator_t *this, int cnt);
+dht_layout_t *
+dht_layout_get(xlator_t *this, inode_t *inode);
+dht_layout_t *
+dht_layout_for_subvol(xlator_t *this, xlator_t *subvol);
+xlator_t *
+dht_layout_search(xlator_t *this, dht_layout_t *layout, const char *name);
+int32_t
+dht_migration_get_dst_subvol(xlator_t *this, dht_local_t *local);
+int32_t
+dht_migration_needed(xlator_t *this);
+int
+dht_layout_normalize(xlator_t *this, loc_t *loc, dht_layout_t *layout);
+void
+dht_layout_anomalies(xlator_t *this, loc_t *loc, dht_layout_t *layout,
+ uint32_t *holes_p, uint32_t *overlaps_p,
+ uint32_t *missing_p, uint32_t *down_p, uint32_t *misc_p,
+ uint32_t *no_space_p);
+int
+dht_layout_dir_mismatch(xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
+ loc_t *loc, dict_t *xattr);
+xlator_t *
+dht_linkfile_subvol(xlator_t *this, inode_t *inode, struct iatt *buf,
+ dict_t *xattr);
+int
+dht_linkfile_unlink(call_frame_t *frame, xlator_t *this, xlator_t *subvol,
+ loc_t *loc);
+
+int
+dht_layouts_init(xlator_t *this, dht_conf_t *conf);
+int
+dht_layout_merge(xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
+ int op_ret, int op_errno, dict_t *xattr);
+
+int
+dht_disk_layout_extract(xlator_t *this, dht_layout_t *layout, int pos,
+ int32_t **disk_layout_p);
+int
+dht_disk_layout_extract_for_subvol(xlator_t *this, dht_layout_t *layout,
+ xlator_t *subvol, int32_t **disk_layout_p);
+
+int
+dht_frame_return(call_frame_t *frame);
+
+int
+dht_deitransform(xlator_t *this, uint64_t y, xlator_t **subvol);
+
+void
+dht_local_wipe(xlator_t *this, dht_local_t *local);
+dht_local_t *
+dht_local_init(call_frame_t *frame, loc_t *loc, fd_t *fd, glusterfs_fop_t fop);
+int
+dht_iatt_merge(xlator_t *this, struct iatt *to, struct iatt *from);
+
+xlator_t *
+dht_subvol_get_hashed(xlator_t *this, loc_t *loc);
+xlator_t *
+dht_subvol_get_cached(xlator_t *this, inode_t *inode);
+xlator_t *
+dht_subvol_next(xlator_t *this, xlator_t *prev);
+xlator_t *
+dht_subvol_next_available(xlator_t *this, xlator_t *prev);
+int
+dht_subvol_cnt(xlator_t *this, xlator_t *subvol);
+
+int
+dht_hash_compute(xlator_t *this, int type, const char *name, uint32_t *hash_p);
+
+int
+dht_linkfile_create(call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk,
+ xlator_t *this, xlator_t *tovol, xlator_t *fromvol,
+ loc_t *loc);
+int
+dht_lookup_everywhere(call_frame_t *frame, xlator_t *this, loc_t *loc);
+int
+dht_selfheal_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
+ loc_t *loc, dht_layout_t *layout);
+int
+dht_selfheal_new_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
+ dht_layout_t *layout);
+int
+dht_selfheal_restore(call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
+ loc_t *loc, dht_layout_t *layout);
+void
+dht_layout_sort_volname(dht_layout_t *layout);
+
+int
+dht_get_du_info(call_frame_t *frame, xlator_t *this, loc_t *loc);
+
+gf_boolean_t
+dht_is_subvol_filled(xlator_t *this, xlator_t *subvol);
+xlator_t *
+dht_free_disk_available_subvol(xlator_t *this, xlator_t *subvol,
+ dht_local_t *layout);
+int
+dht_get_du_info_for_subvol(xlator_t *this, int subvol_idx);
+
+int
+dht_layout_preset(xlator_t *this, xlator_t *subvol, inode_t *inode);
+int
+dht_layout_set(xlator_t *this, inode_t *inode, dht_layout_t *layout);
+;
+void
+dht_layout_unref(xlator_t *this, dht_layout_t *layout);
+dht_layout_t *
+dht_layout_ref(xlator_t *this, dht_layout_t *layout);
+int
+dht_layout_index_for_subvol(dht_layout_t *layout, xlator_t *subvol);
+xlator_t *
+dht_first_up_subvol(xlator_t *this);
+xlator_t *
+dht_last_up_subvol(xlator_t *this);
+
+int
+dht_build_child_loc(xlator_t *this, loc_t *child, loc_t *parent, char *name);
+
+int
+dht_filter_loc_subvol_key(xlator_t *this, loc_t *loc, loc_t *new_loc,
+ xlator_t **subvol);
+
+int
+dht_rename_cleanup(call_frame_t *frame);
+int
+dht_rename_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+int
+dht_update_commit_hash_for_layout(call_frame_t *frame);
+int
+dht_fix_directory_layout(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
+ dht_layout_t *layout);
+
+int
+dht_init_subvolumes(xlator_t *this, dht_conf_t *conf);
/* migration/rebalance */
-int dht_start_rebalance_task (xlator_t *this, call_frame_t *frame);
+int
+dht_start_rebalance_task(xlator_t *this, call_frame_t *frame);
-int dht_rebalance_in_progress_check (xlator_t *this, call_frame_t *frame);
-int dht_rebalance_complete_check (xlator_t *this, call_frame_t *frame);
+int
+dht_rebalance_in_progress_check(xlator_t *this, call_frame_t *frame);
+int
+dht_rebalance_complete_check(xlator_t *this, call_frame_t *frame);
+int
+dht_init_local_subvolumes(xlator_t *this, dht_conf_t *conf);
/* FOPS */
-int32_t dht_lookup (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *xattr_req);
-
-int32_t dht_stat (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc, dict_t *xdata);
-
-int32_t dht_fstat (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd, dict_t *xdata);
-
-int32_t dht_truncate (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- off_t offset, dict_t *xdata);
-
-int32_t dht_ftruncate (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- off_t offset, dict_t *xdata);
-
-int32_t dht_access (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t mask, dict_t *xdata);
-
-int32_t dht_readlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- size_t size, dict_t *xdata);
-
-int32_t dht_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc,
- mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata);
-
-int32_t dht_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata);
-
-int32_t dht_unlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc, int xflag, dict_t *xdata);
-
-int32_t dht_rmdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int flags, dict_t *xdata);
-
-int32_t dht_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkpath, loc_t *loc, mode_t umask,
- dict_t *xdata);
-
-int32_t dht_rename (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc, dict_t *xdata);
-
-int32_t dht_link (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc, dict_t *xdata);
-
-int32_t dht_create (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, mode_t mode,
- mode_t umask, fd_t *fd, dict_t *params);
-
-int32_t dht_open (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flags, fd_t *fd, dict_t *xdata);
-
-int32_t dht_readv (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset, uint32_t flags, dict_t *xdata);
-
-int32_t dht_writev (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- struct iovec *vector,
- int32_t count,
- off_t offset,
- uint32_t flags,
- struct iobref *iobref, dict_t *xdata);
-
-int32_t dht_flush (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd, dict_t *xdata);
-
-int32_t dht_fsync (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t datasync, dict_t *xdata);
-
-int32_t dht_opendir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc, fd_t *fd, dict_t *xdata);
-
-int32_t dht_fsyncdir (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t datasync, dict_t *xdata);
-
-int32_t dht_statfs (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc, dict_t *xdata);
-
-int32_t dht_setxattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *dict,
- int32_t flags, dict_t *xdata);
-
-int32_t dht_getxattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name, dict_t *xdata);
-
-int32_t dht_fsetxattr (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- dict_t *dict,
- int32_t flags, dict_t *xdata);
-
-int32_t dht_fgetxattr (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- const char *name, dict_t *xdata);
-
-int32_t dht_removexattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name, dict_t *xdata);
-int32_t dht_fremovexattr (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- const char *name, dict_t *xdata);
-
-int32_t dht_lk (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t cmd,
- struct gf_flock *flock, dict_t *xdata);
-
-int32_t dht_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd,
- struct gf_flock *flock, dict_t *xdata);
-
-int32_t dht_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd,
- struct gf_flock *flock, dict_t *xdata);
-
-int32_t dht_entrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type, dict_t *xdata);
-
-int32_t dht_fentrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type, dict_t *xdata);
-
-int32_t dht_readdir (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size, off_t off, dict_t *xdata);
-
-int32_t dht_readdirp (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size, off_t off, dict_t *dict);
-
-int32_t dht_xattrop (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- gf_xattrop_flags_t flags,
- dict_t *dict, dict_t *xdata);
-
-int32_t dht_fxattrop (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- gf_xattrop_flags_t flags,
- dict_t *dict, dict_t *xdata);
-
-int32_t dht_forget (xlator_t *this, inode_t *inode);
-int32_t dht_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid, dict_t *xdata);
-int32_t dht_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iatt *stbuf, int32_t valid, dict_t *xdata);
-
-int32_t dht_notify (xlator_t *this, int32_t event, void *data, ...);
+int32_t
+dht_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req);
+
+int32_t
+dht_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata);
+
+int32_t
+dht_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata);
+
+int32_t
+dht_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata);
+
+int32_t
+dht_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata);
+
+int32_t
+dht_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
+ dict_t *xdata);
+
+int32_t
+dht_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata);
+
+int32_t
+dht_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata);
+
+int32_t
+dht_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata);
+
+int32_t
+dht_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata);
+
+int32_t
+dht_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata);
+
+int32_t
+dht_symlink(call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata);
+
+int32_t
+dht_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata);
+
+int32_t
+dht_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata);
+
+int32_t
+dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *params);
+
+int32_t
+dht_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata);
+
+int32_t
+dht_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata);
+
+int32_t
+dht_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
+ int32_t count, off_t offset, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata);
+
+int32_t
+dht_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata);
+
+int32_t
+dht_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata);
+
+int32_t
+dht_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata);
+
+int32_t
+dht_fsyncdir(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata);
+
+int32_t
+dht_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata);
+
+int32_t
+dht_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata);
+
+int32_t
+dht_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name,
+ dict_t *xdata);
+
+int32_t
+dht_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata);
+
+int32_t
+dht_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
+ dict_t *xdata);
+
+int32_t
+dht_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata);
+int32_t
+dht_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata);
+
+int32_t
+dht_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata);
+
+int32_t
+dht_lease(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct gf_lease *lease, dict_t *xdata);
+
+int32_t
+dht_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc,
+ int32_t cmd, struct gf_flock *flock, dict_t *xdata);
+
+int32_t
+dht_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ int32_t cmd, struct gf_flock *flock, dict_t *xdata);
+
+int32_t
+dht_entrylk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata);
+
+int32_t
+dht_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata);
+
+int32_t
+dht_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata);
+
+int32_t
+dht_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *dict);
+
+int32_t
+dht_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata);
+
+int32_t
+dht_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata);
+
+int32_t
+dht_forget(xlator_t *this, inode_t *inode);
+int32_t
+dht_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata);
+int32_t
+dht_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata);
+int32_t
+dht_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata);
+int32_t
+dht_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata);
+int32_t
+dht_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata);
+int32_t
+dht_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata);
+
+int
+dht_set_subvol_range(xlator_t *this);
+int32_t
+dht_init(xlator_t *this);
+void
+dht_fini(xlator_t *this);
+int
+dht_reconfigure(xlator_t *this, dict_t *options);
+int32_t
+dht_notify(xlator_t *this, int32_t event, void *data, ...);
/* definitions for nufa/switch */
-int dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, inode_t *inode,
- struct iatt *stbuf, dict_t *xattr,
- struct iatt *postparent);
-int dht_lookup_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+int
+dht_revalidate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf,
+ dict_t *xattr, struct iatt *postparent);
+int
+dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf,
+ dict_t *xattr, struct iatt *postparent);
+int
+dht_lookup_linkfile_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, inode_t *inode,
struct iatt *stbuf, dict_t *xattr,
struct iatt *postparent);
-int dht_lookup_linkfile_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int op_ret, int op_errno,
- inode_t *inode, struct iatt *stbuf, dict_t *xattr,
- struct iatt *postparent);
-int dht_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- inode_t *inode, struct iatt *stbuf, dict_t *xattr,
- struct iatt *postparent);
-int dht_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- fd_t *fd, inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata);
-int dht_newfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- inode_t *inode, struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata);
+int
+dht_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, inode_t *inode, struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent);
+int
+dht_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, fd_t *fd, inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata);
+int
+dht_newfile_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata);
+
+int
+dht_finodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int
+dht_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, dict_t *xattr, dict_t *xdata);
+
+int
+dht_common_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata);
+int
+gf_defrag_status_get(dht_conf_t *conf, dict_t *dict);
+
+int
+gf_defrag_stop(dht_conf_t *conf, gf_defrag_status_t status, dict_t *output);
+
+void *
+gf_defrag_start(void *this);
+
+int32_t
+gf_defrag_handle_hardlink(xlator_t *this, loc_t *loc, int *fop_errno);
+int
+dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
+ int flag, int *fop_errno);
+int
+dht_inode_ctx_layout_get(inode_t *inode, xlator_t *this,
+ dht_layout_t **layout_int);
+int
+dht_inode_ctx_layout_set(inode_t *inode, xlator_t *this,
+ dht_layout_t *layout_int);
+int
+dht_inode_ctx_time_update(inode_t *inode, xlator_t *this, struct iatt *stat,
+ int32_t update_ctx);
+void
+dht_inode_ctx_time_set(inode_t *inode, xlator_t *this, struct iatt *stat);
+
+int
+dht_inode_ctx_get(inode_t *inode, xlator_t *this, dht_inode_ctx_t **ctx);
+int
+dht_inode_ctx_set(inode_t *inode, xlator_t *this, dht_inode_ctx_t *ctx);
+int
+dht_dir_attr_heal(void *data);
+int
+dht_dir_attr_heal_done(int ret, call_frame_t *sync_frame, void *data);
+xlator_t *
+dht_subvol_with_free_space_inodes(xlator_t *this, xlator_t *subvol,
+ xlator_t *ignore, dht_layout_t *layout,
+ uint64_t filesize);
+xlator_t *
+dht_subvol_maxspace_nonzeroinode(xlator_t *this, xlator_t *subvol,
+ dht_layout_t *layout);
+int
+dht_dir_has_layout(dict_t *xattr, char *name);
+int
+dht_linkfile_attr_heal(call_frame_t *frame, xlator_t *this);
+
+int32_t
+dht_priv_dump(xlator_t *this);
+int32_t
+dht_inodectx_dump(xlator_t *this, inode_t *inode);
+
+gf_boolean_t
+dht_is_subvol_in_layout(dht_layout_t *layout, xlator_t *xlator);
+
+int
+dht_inode_ctx_get_mig_info(xlator_t *this, inode_t *inode,
+ xlator_t **src_subvol, xlator_t **dst_subvol);
+gf_boolean_t
+dht_mig_info_is_invalid(xlator_t *current, xlator_t *src_subvol,
+ xlator_t *dst_subvol);
+
+int
+dht_subvol_status(dht_conf_t *conf, xlator_t *subvol);
+
+void
+dht_log_new_layout_for_dir_selfheal(xlator_t *this, loc_t *loc,
+ dht_layout_t *layout);
int
-gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict);
+dht_layout_sort(dht_layout_t *layout);
int
-gf_defrag_stop (gf_defrag_info_t *defrag, dict_t *output);
+dht_heal_full_path(void *data);
-void*
-gf_defrag_start (void *this);
+int
+dht_heal_full_path_done(int op_ret, call_frame_t *frame, void *data);
+
+int
+dht_layout_missing_dirs(dht_layout_t *layout);
+
+int
+dht_refresh_layout(call_frame_t *frame);
+
+int
+dht_build_parent_loc(xlator_t *this, loc_t *parent, loc_t *child,
+ int32_t *op_errno);
+
+int32_t
+dht_set_local_rebalance(xlator_t *this, dht_local_t *local, struct iatt *stbuf,
+ struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata);
+void
+dht_build_root_loc(inode_t *inode, loc_t *loc);
+
+gf_boolean_t
+dht_fd_open_on_dst(xlator_t *this, fd_t *fd, xlator_t *dst);
+
+int32_t
+dht_fd_ctx_destroy(xlator_t *this, fd_t *fd);
+
+int32_t
+dht_release(xlator_t *this, fd_t *fd);
+
+int32_t
+dht_set_fixed_dir_stat(struct iatt *stat);
+
+xlator_t *
+dht_get_lock_subvolume(xlator_t *this, struct gf_flock *lock,
+ dht_local_t *local);
+
+int
+dht_lk_inode_unref(call_frame_t *frame, int32_t op_ret);
+
+int
+dht_fd_ctx_set(xlator_t *this, fd_t *fd, xlator_t *subvol);
+
+int
+dht_check_and_open_fd_on_subvol(xlator_t *this, call_frame_t *frame);
+
+/* FD fop callbacks */
+
+int
+dht_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata);
+
+int
+dht_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, dict_t *xdata);
+
+int
+dht_file_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
+
+int
+dht_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata);
+
+int
+dht_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata);
+
+int
+dht_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata);
+
+int
+dht_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata);
+
+int
+dht_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata);
+
+int
+dht_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iovec *vector, int count, struct iatt *stbuf,
+ struct iobref *iobref, dict_t *xdata);
+
+int
+dht_file_attr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *stbuf, dict_t *xdata);
+
+int
+dht_file_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata);
+
+int
+dht_file_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata);
+
+/* All custom xattr heal functions */
+int
+dht_dir_heal_xattrs(void *data);
+
+int
+dht_dir_heal_xattrs_done(int ret, call_frame_t *sync_frame, void *data);
+
+int32_t
+dht_dict_set_array(dict_t *dict, char *key, int32_t value[], int32_t size);
+
+int
+dht_set_user_xattr(dict_t *dict, char *k, data_t *v, void *data);
+
+void
+dht_dir_set_heal_xattr(xlator_t *this, dht_local_t *local, dict_t *dst,
+ dict_t *src, int *uret, int *uflag);
+
+int
+dht_dir_xattr_heal(xlator_t *this, dht_local_t *local, int *op_errno);
+
+int
+dht_common_mark_mdsxattr(call_frame_t *frame, int *errst, int flag);
+
+int
+dht_inode_ctx_mdsvol_get(inode_t *inode, xlator_t *this, xlator_t **mdsvol);
+
+int
+dht_selfheal_dir_setattr(call_frame_t *frame, loc_t *loc, struct iatt *stbuf,
+ int32_t valid, dht_layout_t *layout);
+
+/* Abstract out the DHT-IATT-IN-DICT */
+
+void
+dht_selfheal_layout_new_directory(call_frame_t *frame, loc_t *loc,
+ dht_layout_t *new_layout);
+
+int
+dht_pt_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *key,
+ dict_t *xdata);
+
+int
+dht_pt_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *key, dict_t *xdata);
int32_t
-gf_defrag_handle_hardlink (xlator_t *this, loc_t *loc, dict_t *xattrs,
- struct iatt *stbuf);
+dht_pt_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata);
+
+int
+dht_pt_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata);
+
+int32_t
+dht_check_remote_fd_failed_error(dht_local_t *local, int op_ret, int op_errno);
+
+int
+dht_common_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata);
+
+int32_t
+dht_create_lock(call_frame_t *frame, xlator_t *subvol);
+
+int
+dht_set_parent_layout_in_dict(loc_t *loc, xlator_t *this, dht_local_t *local);
+
+int
+dht_dir_layout_error_check(xlator_t *this, inode_t *inode);
+
int
-dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
- int flag);
-#endif/* _DHT_H */
+dht_inode_ctx_mdsvol_set(inode_t *inode, xlator_t *this, xlator_t *mds_subvol);
+#endif /* _DHT_H */
diff --git a/xlators/cluster/dht/src/dht-diskusage.c b/xlators/cluster/dht/src/dht-diskusage.c
index 52ea3a32aca..c0588828fdb 100644
--- a/xlators/cluster/dht/src/dht-diskusage.c
+++ b/xlators/cluster/dht/src/dht-diskusage.c
@@ -8,290 +8,480 @@
cases as published by the Free Software Foundation.
*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
/* TODO: add NS locking */
-#include "glusterfs.h"
-#include "xlator.h"
#include "dht-common.h"
-#include "defaults.h"
#include <sys/time.h>
-
+#include <glusterfs/events.h>
int
-dht_du_info_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct statvfs *statvfs,
- dict_t *xdata)
+dht_du_info_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct statvfs *statvfs, dict_t *xdata)
{
- dht_conf_t *conf = NULL;
- call_frame_t *prev = NULL;
- int this_call_cnt = 0;
- int i = 0;
- double percent = 0;
- double percent_inodes = 0;
- uint64_t bytes = 0;
-
- conf = this->private;
- prev = cookie;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "failed to get disk info from %s", prev->this->name);
- goto out;
- }
-
- if (statvfs && statvfs->f_blocks) {
- percent = (statvfs->f_bavail * 100) / statvfs->f_blocks;
- bytes = (statvfs->f_bavail * statvfs->f_frsize);
- }
-
- if (statvfs && statvfs->f_files) {
- percent_inodes = (statvfs->f_ffree * 100) / statvfs->f_files;
- } else {
- /* set percent inodes to 100 for dynamically allocated inode filesystems
- this logic holds good so that, distribute has nothing to worry about
- total inodes rather let the 'create()' to be scheduled on the hashed
- subvol regardless of the total inodes. since we have no awareness on
- loosing inodes this logic fits well
- */
- percent_inodes = 100;
- }
-
- LOCK (&conf->subvolume_lock);
- {
- for (i = 0; i < conf->subvolume_cnt; i++)
- if (prev->this == conf->subvolumes[i]) {
- conf->du_stats[i].avail_percent = percent;
- conf->du_stats[i].avail_space = bytes;
- conf->du_stats[i].avail_inodes = percent_inodes;
- gf_log (this->name, GF_LOG_DEBUG,
- "on subvolume '%s': avail_percent is: "
- "%.2f and avail_space is: %"PRIu64" "
- "and avail_inodes is: %.2f",
- prev->this->name,
- conf->du_stats[i].avail_percent,
- conf->du_stats[i].avail_space,
- conf->du_stats[i].avail_inodes);
- }
- }
- UNLOCK (&conf->subvolume_lock);
+ dht_conf_t *conf = NULL;
+ xlator_t *prev = NULL;
+ int this_call_cnt = 0;
+ int i = 0;
+ double percent = 0;
+ double percent_inodes = 0;
+ uint64_t bytes = 0;
+ uint32_t bpc; /* blocks per chunk */
+ uint32_t chunks = 0;
+
+ conf = this->private;
+ prev = cookie;
+
+ if (op_ret == -1 || !statvfs) {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno,
+ DHT_MSG_GET_DISK_INFO_ERROR, "failed to get disk info from %s",
+ prev->name);
+ goto out;
+ }
+
+ if (statvfs->f_blocks) {
+ percent = (statvfs->f_bavail * 100) / statvfs->f_blocks;
+ bytes = (statvfs->f_bavail * statvfs->f_frsize);
+ /*
+ * A 32-bit count of 1MB chunks allows a maximum brick size of
+ * ~4PB. It's possible that we could see a single local FS
+ * bigger than that some day, but this code is likely to be
+ * irrelevant by then. Meanwhile, it's more important to keep
+ * the chunk size small so the layout-calculation code that
+ * uses this value can be tested on normal machines.
+ */
+ bpc = (1 << 20) / statvfs->f_bsize;
+ chunks = (statvfs->f_blocks + bpc - 1) / bpc;
+ }
+
+ if (statvfs->f_files) {
+ percent_inodes = (statvfs->f_ffree * 100) / statvfs->f_files;
+ } else {
+ /*
+ * Set percent inodes to 100 for dynamically allocated inode
+ * filesystems. The rationale is that distribute need not
+ * worry about total inodes; rather, let the 'create()' be
+ * scheduled on the hashed subvol regardless of the total
+ * inodes.
+ */
+ percent_inodes = 100;
+ }
+
+ LOCK(&conf->subvolume_lock);
+ {
+ for (i = 0; i < conf->subvolume_cnt; i++)
+ if (prev == conf->subvolumes[i]) {
+ conf->du_stats[i].avail_percent = percent;
+ conf->du_stats[i].avail_space = bytes;
+ conf->du_stats[i].avail_inodes = percent_inodes;
+ conf->du_stats[i].chunks = chunks;
+ conf->du_stats[i].total_blocks = statvfs->f_blocks;
+ conf->du_stats[i].avail_blocks = statvfs->f_bavail;
+ conf->du_stats[i].frsize = statvfs->f_frsize;
+
+ gf_msg_debug(this->name, 0,
+ "subvolume '%s': avail_percent "
+ "is: %.2f and avail_space "
+ "is: %" PRIu64
+ " and avail_inodes"
+ " is: %.2f",
+ prev->name, conf->du_stats[i].avail_percent,
+ conf->du_stats[i].avail_space,
+ conf->du_stats[i].avail_inodes);
+ break; /* no point in looping further */
+ }
+ }
+ UNLOCK(&conf->subvolume_lock);
out:
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt))
- DHT_STACK_DESTROY (frame);
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt))
+ DHT_STACK_DESTROY(frame);
- return 0;
+ return 0;
}
int
-dht_get_du_info_for_subvol (xlator_t *this, int subvol_idx)
+dht_get_du_info_for_subvol(xlator_t *this, int subvol_idx)
{
- dht_conf_t *conf = NULL;
- call_frame_t *statfs_frame = NULL;
- dht_local_t *statfs_local = NULL;
- call_pool_t *pool = NULL;
- loc_t tmp_loc = {0,};
-
- conf = this->private;
- pool = this->ctx->pool;
-
- statfs_frame = create_frame (this, pool);
- if (!statfs_frame) {
- goto err;
- }
-
- /* local->fop value is not used in this case */
- statfs_local = dht_local_init (statfs_frame, NULL, NULL,
- GF_FOP_MAXVALUE);
- if (!statfs_local) {
- goto err;
- }
-
- /* make it root gfid, should be enough to get the proper info back */
- tmp_loc.gfid[15] = 1;
-
- statfs_local->call_cnt = 1;
- STACK_WIND (statfs_frame, dht_du_info_cbk,
- conf->subvolumes[subvol_idx],
- conf->subvolumes[subvol_idx]->fops->statfs,
- &tmp_loc, NULL);
-
- return 0;
+ dht_conf_t *conf = NULL;
+ call_frame_t *statfs_frame = NULL;
+ dht_local_t *statfs_local = NULL;
+ call_pool_t *pool = NULL;
+ loc_t tmp_loc = {
+ 0,
+ };
+
+ conf = this->private;
+ pool = this->ctx->pool;
+
+ statfs_frame = create_frame(this, pool);
+ if (!statfs_frame) {
+ goto err;
+ }
+
+ /* local->fop value is not used in this case */
+ statfs_local = dht_local_init(statfs_frame, NULL, NULL, GF_FOP_MAXVALUE);
+ if (!statfs_local) {
+ goto err;
+ }
+
+ /* make it root gfid, should be enough to get the proper info back */
+ tmp_loc.gfid[15] = 1;
+
+ statfs_local->call_cnt = 1;
+ STACK_WIND_COOKIE(
+ statfs_frame, dht_du_info_cbk, conf->subvolumes[subvol_idx],
+ conf->subvolumes[subvol_idx],
+ conf->subvolumes[subvol_idx]->fops->statfs, &tmp_loc, NULL);
+
+ return 0;
err:
- if (statfs_frame)
- DHT_STACK_DESTROY (statfs_frame);
+ if (statfs_frame)
+ DHT_STACK_DESTROY(statfs_frame);
- return -1;
+ return -1;
}
int
-dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc)
+dht_get_du_info(call_frame_t *frame, xlator_t *this, loc_t *loc)
{
- int i = 0;
- dht_conf_t *conf = NULL;
- call_frame_t *statfs_frame = NULL;
- dht_local_t *statfs_local = NULL;
- struct timeval tv = {0,};
- loc_t tmp_loc = {0,};
-
- conf = this->private;
-
- gettimeofday (&tv, NULL);
-
- /* make it root gfid, should be enough to get the proper
- info back */
- tmp_loc.gfid[15] = 1;
-
- if (tv.tv_sec > (conf->refresh_interval
- + conf->last_stat_fetch.tv_sec)) {
-
- statfs_frame = copy_frame (frame);
- if (!statfs_frame) {
- goto err;
- }
-
- /* In this case, 'local->fop' is not used */
- statfs_local = dht_local_init (statfs_frame, loc, NULL,
- GF_FOP_MAXVALUE);
- if (!statfs_local) {
- goto err;
- }
-
- statfs_local->call_cnt = conf->subvolume_cnt;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- STACK_WIND (statfs_frame, dht_du_info_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->statfs,
- &tmp_loc, NULL);
- }
-
- conf->last_stat_fetch.tv_sec = tv.tv_sec;
- }
- return 0;
+ int i = 0;
+ int ret = -1;
+ dht_conf_t *conf = NULL;
+ call_frame_t *statfs_frame = NULL;
+ dht_local_t *statfs_local = NULL;
+ loc_t tmp_loc = {
+ 0,
+ };
+ time_t now;
+
+ conf = this->private;
+ now = gf_time();
+ /* make it root gfid, should be enough to get the proper
+ info back */
+ tmp_loc.gfid[15] = 1;
+
+ if (now > (conf->refresh_interval + conf->last_stat_fetch)) {
+ statfs_frame = copy_frame(frame);
+ if (!statfs_frame) {
+ goto err;
+ }
+
+ /* In this case, 'local->fop' is not used */
+ statfs_local = dht_local_init(statfs_frame, loc, NULL, GF_FOP_MAXVALUE);
+ if (!statfs_local) {
+ goto err;
+ }
+
+ statfs_local->params = dict_new();
+ if (!statfs_local->params)
+ goto err;
+
+ ret = dict_set_int8(statfs_local->params,
+ GF_INTERNAL_IGNORE_DEEM_STATFS, 1);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set " GF_INTERNAL_IGNORE_DEEM_STATFS " in dict");
+ goto err;
+ }
+
+ statfs_local->call_cnt = conf->subvolume_cnt;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ STACK_WIND_COOKIE(statfs_frame, dht_du_info_cbk,
+ conf->subvolumes[i], conf->subvolumes[i],
+ conf->subvolumes[i]->fops->statfs, &tmp_loc,
+ statfs_local->params);
+ }
+
+ conf->last_stat_fetch = now;
+ }
+ return 0;
err:
- if (statfs_frame)
- DHT_STACK_DESTROY (statfs_frame);
+ if (statfs_frame)
+ DHT_STACK_DESTROY(statfs_frame);
- return -1;
+ return -1;
}
-
gf_boolean_t
-dht_is_subvol_filled (xlator_t *this, xlator_t *subvol)
+dht_is_subvol_filled(xlator_t *this, xlator_t *subvol)
+{
+ int i = 0;
+ char vol_name[256];
+ dht_conf_t *conf = NULL;
+ gf_boolean_t subvol_filled_inodes = _gf_false;
+ gf_boolean_t subvol_filled_space = _gf_false;
+ gf_boolean_t is_subvol_filled = _gf_false;
+ double usage = 0;
+
+ conf = this->private;
+
+ /* Check for values above specified percent or free disk */
+ LOCK(&conf->subvolume_lock);
+ {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (subvol == conf->subvolumes[i]) {
+ if (conf->disk_unit == 'p') {
+ if (conf->du_stats[i].avail_percent < conf->min_free_disk) {
+ subvol_filled_space = _gf_true;
+ break;
+ }
+
+ } else {
+ if (conf->du_stats[i].avail_space < conf->min_free_disk) {
+ subvol_filled_space = _gf_true;
+ break;
+ }
+ }
+ if (conf->du_stats[i].avail_inodes < conf->min_free_inodes) {
+ subvol_filled_inodes = _gf_true;
+ break;
+ }
+ }
+ }
+ }
+ UNLOCK(&conf->subvolume_lock);
+
+ if (subvol_filled_space && conf->subvolume_status[i]) {
+ if (!(conf->du_stats[i].log++ % (GF_UNIVERSAL_ANSWER * 10))) {
+ usage = 100 - conf->du_stats[i].avail_percent;
+
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_SUBVOL_INSUFF_SPACE,
+ "disk space on subvolume '%s' is getting "
+ "full (%.2f %%), consider adding more bricks",
+ subvol->name, usage);
+
+ (void)snprintf(vol_name, sizeof(vol_name), "%s", this->name);
+ vol_name[(strlen(this->name) - 4)] = '\0';
+
+ gf_event(EVENT_DHT_DISK_USAGE, "volume=%s;subvol=%s;usage=%.2f %%",
+ vol_name, subvol->name, usage);
+ }
+ }
+
+ if (subvol_filled_inodes && conf->subvolume_status[i]) {
+ if (!(conf->du_stats[i].log++ % (GF_UNIVERSAL_ANSWER * 10))) {
+ usage = 100 - conf->du_stats[i].avail_inodes;
+ gf_msg(this->name, GF_LOG_CRITICAL, 0, DHT_MSG_SUBVOL_INSUFF_INODES,
+ "inodes on subvolume '%s' are at "
+ "(%.2f %%), consider adding more bricks",
+ subvol->name, usage);
+
+ (void)snprintf(vol_name, sizeof(vol_name), "%s", this->name);
+ vol_name[(strlen(this->name) - 4)] = '\0';
+
+ gf_event(EVENT_DHT_INODES_USAGE,
+ "volume=%s;subvol=%s;usage=%.2f %%", vol_name,
+ subvol->name, usage);
+ }
+ }
+
+ is_subvol_filled = (subvol_filled_space || subvol_filled_inodes);
+
+ return is_subvol_filled;
+}
+
+/*Get the best subvolume to create the file in*/
+xlator_t *
+dht_free_disk_available_subvol(xlator_t *this, xlator_t *subvol,
+ dht_local_t *local)
+{
+ xlator_t *avail_subvol = NULL;
+ dht_conf_t *conf = NULL;
+ dht_layout_t *layout = NULL;
+ loc_t *loc = NULL;
+
+ conf = this->private;
+ if (!local)
+ goto out;
+ loc = &local->loc;
+ if (!local->layout) {
+ layout = dht_layout_get(this, loc->parent);
+
+ if (!layout) {
+ gf_msg_debug(this->name, 0,
+ "Missing layout. path=%s,"
+ " parent gfid = %s",
+ loc->path, uuid_utoa(loc->parent->gfid));
+ goto out;
+ }
+ } else {
+ layout = dht_layout_ref(this, local->layout);
+ }
+
+ LOCK(&conf->subvolume_lock);
+ {
+ avail_subvol = dht_subvol_with_free_space_inodes(this, subvol, NULL,
+ layout, 0);
+ if (!avail_subvol) {
+ avail_subvol = dht_subvol_maxspace_nonzeroinode(this, subvol,
+ layout);
+ }
+ }
+ UNLOCK(&conf->subvolume_lock);
+out:
+ if (!avail_subvol) {
+ gf_msg_debug(this->name, 0,
+ "No subvolume has enough free space \
+ and/or inodes to create");
+ avail_subvol = subvol;
+ }
+
+ if (layout)
+ dht_layout_unref(this, layout);
+ return avail_subvol;
+}
+
+static inline int32_t
+dht_subvol_has_err(dht_conf_t *conf, xlator_t *this, xlator_t *ignore,
+ dht_layout_t *layout)
+{
+ int ret = -1;
+ int i = 0;
+
+ if (!this || !layout)
+ goto out;
+
+ /* this check is meant for rebalance process. The source of the file
+ * should be ignored for space check */
+ if (this == ignore) {
+ goto out;
+ }
+
+ /* check if subvol has layout errors, before selecting it */
+ for (i = 0; i < layout->cnt; i++) {
+ if (!strcmp(layout->list[i].xlator->name, this->name) &&
+ (layout->list[i].err != 0)) {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ /* discard decommissioned subvol */
+ if (conf->decommission_subvols_cnt) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->decommissioned_bricks[i] &&
+ conf->decommissioned_bricks[i] == this) {
+ ret = -1;
+ goto out;
+ }
+ }
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/*Get subvolume which has both space and inodes more than the min criteria*/
+xlator_t *
+dht_subvol_with_free_space_inodes(xlator_t *this, xlator_t *subvol,
+ xlator_t *ignore, dht_layout_t *layout,
+ uint64_t filesize)
{
- int i = 0;
- dht_conf_t *conf = NULL;
- gf_boolean_t subvol_filled_inodes = _gf_false;
- gf_boolean_t subvol_filled_space = _gf_false;
- gf_boolean_t is_subvol_filled = _gf_false;
-
- conf = this->private;
-
- /* Check for values above specified percent or free disk */
- LOCK (&conf->subvolume_lock);
- {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (subvol == conf->subvolumes[i]) {
- if (conf->disk_unit == 'p') {
- if (conf->du_stats[i].avail_percent <
- conf->min_free_disk) {
- subvol_filled_space = _gf_true;
- break;
- }
-
- } else {
- if (conf->du_stats[i].avail_space <
- conf->min_free_disk) {
- subvol_filled_space = _gf_true;
- break;
- }
- }
- if (conf->du_stats[i].avail_inodes <
- conf->min_free_inodes) {
- subvol_filled_inodes = _gf_true;
- break;
- }
- }
- }
- }
- UNLOCK (&conf->subvolume_lock);
-
- if (subvol_filled_space && conf->subvolume_status[i]) {
- if (!(conf->du_stats[i].log++ % (GF_UNIVERSAL_ANSWER * 10))) {
- gf_log (this->name, GF_LOG_WARNING,
- "disk space on subvolume '%s' is getting "
- "full (%.2f %%), consider adding more nodes",
- subvol->name,
- (100 - conf->du_stats[i].avail_percent));
- }
- }
-
- if (subvol_filled_inodes && conf->subvolume_status[i]) {
- if (!(conf->du_stats[i].log++ % (GF_UNIVERSAL_ANSWER * 10))) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "inodes on subvolume '%s' are at "
- "(%.2f %%), consider adding more nodes",
- subvol->name,
- (100 - conf->du_stats[i].avail_inodes));
- }
- }
-
- is_subvol_filled = (subvol_filled_space || subvol_filled_inodes);
-
- return is_subvol_filled;
+ int i = 0;
+ double max = 0;
+ double max_inodes = 0;
+ int ignore_subvol = 0;
+ uint64_t total_blocks = 0;
+ uint64_t avail_blocks = 0;
+ uint64_t frsize = 0;
+ double post_availspace = 0;
+ double post_percent = 0;
+
+ xlator_t *avail_subvol = NULL;
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ /* check if subvol has layout errors and also it is not a
+ * decommissioned brick, before selecting it */
+ ignore_subvol = dht_subvol_has_err(conf, conf->subvolumes[i], ignore,
+ layout);
+ if (ignore_subvol)
+ continue;
+
+ if ((conf->disk_unit == 'p') &&
+ (conf->du_stats[i].avail_percent > conf->min_free_disk) &&
+ (conf->du_stats[i].avail_inodes > conf->min_free_inodes)) {
+ if ((conf->du_stats[i].avail_inodes > max_inodes) ||
+ (conf->du_stats[i].avail_percent > max)) {
+ max = conf->du_stats[i].avail_percent;
+ max_inodes = conf->du_stats[i].avail_inodes;
+ avail_subvol = conf->subvolumes[i];
+ total_blocks = conf->du_stats[i].total_blocks;
+ avail_blocks = conf->du_stats[i].avail_blocks;
+ frsize = conf->du_stats[i].frsize;
+ }
+ }
+
+ if ((conf->disk_unit != 'p') &&
+ (conf->du_stats[i].avail_space > conf->min_free_disk) &&
+ (conf->du_stats[i].avail_inodes > conf->min_free_inodes)) {
+ if ((conf->du_stats[i].avail_inodes > max_inodes) ||
+ (conf->du_stats[i].avail_space > max)) {
+ max = conf->du_stats[i].avail_space;
+ max_inodes = conf->du_stats[i].avail_inodes;
+ avail_subvol = conf->subvolumes[i];
+ }
+ }
+ }
+
+ if (avail_subvol) {
+ if (conf->disk_unit == 'p') {
+ post_availspace = (avail_blocks * frsize) - filesize;
+ post_percent = (post_availspace * 100) / (total_blocks * frsize);
+ if (post_percent < conf->min_free_disk)
+ avail_subvol = NULL;
+ }
+ if (conf->disk_unit != 'p') {
+ if ((max - filesize) < conf->min_free_disk)
+ avail_subvol = NULL;
+ }
+ }
+
+ return avail_subvol;
}
+/* Get subvol which has at least one inode and maximum space */
xlator_t *
-dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol)
+dht_subvol_maxspace_nonzeroinode(xlator_t *this, xlator_t *subvol,
+ dht_layout_t *layout)
{
- int i = 0;
- double max = 0;
- double max_inodes = 0;
- xlator_t *avail_subvol = NULL;
- dht_conf_t *conf = NULL;
-
- conf = this->private;
-
- LOCK (&conf->subvolume_lock);
- {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (conf->disk_unit == 'p') {
- if ((conf->du_stats[i].avail_percent > max)
- && (conf->du_stats[i].avail_inodes > max_inodes)) {
- max = conf->du_stats[i].avail_percent;
- max_inodes = conf->du_stats[i].avail_inodes;
- avail_subvol = conf->subvolumes[i];
- }
- } else {
- if ((conf->du_stats[i].avail_space > max)
- && (conf->du_stats[i].avail_inodes > max_inodes)) {
- max = conf->du_stats[i].avail_space;
- max_inodes = conf->du_stats[i].avail_inodes;
- avail_subvol = conf->subvolumes[i];
- }
-
- }
- }
- }
- UNLOCK (&conf->subvolume_lock);
-
- if (!avail_subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume has enough free space and inodes to create");
- }
-
- if ((max < conf->min_free_disk) && (max_inodes < conf->min_free_inodes))
- avail_subvol = subvol;
-
- if (!avail_subvol)
- avail_subvol = subvol;
-
- return avail_subvol;
+ int i = 0;
+ double max = 0;
+ int ignore_subvol = 0;
+
+ xlator_t *avail_subvol = NULL;
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ /* check if subvol has layout errors and also it is not a
+ * decommissioned brick, before selecting it*/
+
+ ignore_subvol = dht_subvol_has_err(conf, conf->subvolumes[i], NULL,
+ layout);
+ if (ignore_subvol)
+ continue;
+
+ if (conf->disk_unit == 'p') {
+ if ((conf->du_stats[i].avail_percent > max) &&
+ (conf->du_stats[i].avail_inodes > 0)) {
+ max = conf->du_stats[i].avail_percent;
+ avail_subvol = conf->subvolumes[i];
+ }
+ } else {
+ if ((conf->du_stats[i].avail_space > max) &&
+ (conf->du_stats[i].avail_inodes > 0)) {
+ max = conf->du_stats[i].avail_space;
+ avail_subvol = conf->subvolumes[i];
+ }
+ }
+ }
+
+ return avail_subvol;
}
diff --git a/xlators/cluster/dht/src/dht-hashfn.c b/xlators/cluster/dht/src/dht-hashfn.c
index 98756157db2..acda67c312a 100644
--- a/xlators/cluster/dht/src/dht-hashfn.c
+++ b/xlators/cluster/dht/src/dht-hashfn.c
@@ -8,65 +8,103 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-
-#include "glusterfs.h"
-#include "xlator.h"
#include "dht-common.h"
-#include "hashfn.h"
+#include <glusterfs/hashfn.h>
-
-int
-dht_hash_compute_internal (int type, const char *name, uint32_t *hash_p)
+static int
+dht_hash_compute_internal(int type, const char *name, const int len,
+ uint32_t *hash_p)
{
- int ret = 0;
- uint32_t hash = 0;
+ int ret = 0;
+ uint32_t hash = 0;
- switch (type) {
+ switch (type) {
case DHT_HASH_TYPE_DM:
- hash = gf_dm_hashfn (name, strlen (name));
- break;
+ case DHT_HASH_TYPE_DM_USER:
+ hash = gf_dm_hashfn(name, len);
+ break;
default:
- ret = -1;
- break;
- }
+ ret = -1;
+ break;
+ }
- if (ret == 0) {
- *hash_p = hash;
- }
+ if (ret == 0) {
+ *hash_p = hash;
+ }
- return ret;
+ return ret;
}
+/* The function returns:
+ * 0 : in case no munge took place
+ * >0 : the length (inc. terminating NULL!) of the newly modified string,
+ * if it was munged.
+ */
+static int
+dht_munge_name(const char *original, char *modified, size_t len, regex_t *re)
+{
+ regmatch_t matches[2] = {
+ {0},
+ };
+ size_t new_len = 0;
+ int ret = 0;
+
+ ret = regexec(re, original, 2, matches, 0);
-#define MAKE_RSYNC_FRIENDLY_NAME(rsync_frndly_name, name) do { \
- rsync_frndly_name = (char *) name; \
- if (name[0] == '.') { \
- char *dot = 0; \
- int namelen = 0; \
- \
- dot = strrchr (name, '.'); \
- if (dot && dot > (name + 1) && *(dot + 1)) { \
- namelen = (dot - name); \
- rsync_frndly_name = alloca (namelen); \
- strncpy (rsync_frndly_name, name + 1, \
- namelen); \
- rsync_frndly_name[namelen - 1] = 0; \
- } \
- } \
- } while (0);
+ if (ret != REG_NOMATCH) {
+ if (matches[1].rm_so != -1) {
+ new_len = matches[1].rm_eo - matches[1].rm_so;
+ /* Equal would fail due to the NUL at the end. */
+ if (new_len < len) {
+ memcpy(modified, original + matches[1].rm_so, new_len);
+ modified[new_len] = '\0';
+ return new_len + 1; /* +1 for the terminating NULL */
+ }
+ }
+ }
+ /* This is guaranteed safe because of how the dest was allocated. */
+ strcpy(modified, original);
+ return 0;
+}
int
-dht_hash_compute (int type, const char *name, uint32_t *hash_p)
+dht_hash_compute(xlator_t *this, int type, const char *name, uint32_t *hash_p)
{
- char *rsync_friendly_name = NULL;
+ char *rsync_friendly_name = NULL;
+ dht_conf_t *priv = NULL;
+ size_t len = 0;
+ int munged = 0;
+
+ priv = this->private;
+
+ if (name == NULL)
+ return -1;
- MAKE_RSYNC_FRIENDLY_NAME (rsync_friendly_name, name);
+ len = strlen(name) + 1;
+ rsync_friendly_name = alloca(len);
+
+ LOCK(&priv->lock);
+ {
+ if (priv->extra_regex_valid) {
+ munged = dht_munge_name(name, rsync_friendly_name, len,
+ &priv->extra_regex);
+ }
+
+ if (!munged && priv->rsync_regex_valid) {
+ gf_msg_trace(this->name, 0, "trying regex for %s", name);
+ munged = dht_munge_name(name, rsync_friendly_name, len,
+ &priv->rsync_regex);
+ }
+ }
+ UNLOCK(&priv->lock);
+ if (munged) {
+ gf_msg_debug(this->name, 0, "munged down to %s", rsync_friendly_name);
+ len = munged;
+ } else {
+ rsync_friendly_name = (char *)name;
+ }
- return dht_hash_compute_internal (type, rsync_friendly_name, hash_p);
+ return dht_hash_compute_internal(type, rsync_friendly_name, len - 1,
+ hash_p);
}
diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c
index 920a7aabc50..3f2fe43d5f3 100644
--- a/xlators/cluster/dht/src/dht-helper.c
+++ b/xlators/cluster/dht/src/dht-helper.c
@@ -8,904 +8,2297 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+#include "dht-common.h"
+#include "dht-lock.h"
+#include "glusterfs/compat-errno.h" // for ENODATA on BSD
+static void
+dht_free_fd_ctx(dht_fd_ctx_t *fd_ctx)
+{
+ GF_FREE(fd_ctx);
+}
-#include "glusterfs.h"
-#include "xlator.h"
-#include "dht-common.h"
+int32_t
+dht_fd_ctx_destroy(xlator_t *this, fd_t *fd)
+{
+ dht_fd_ctx_t *fd_ctx = NULL;
+ uint64_t value = 0;
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd, out);
+
+ ret = fd_ctx_del(fd, this, &value);
+ if (ret) {
+ goto out;
+ }
+
+ fd_ctx = (dht_fd_ctx_t *)(uintptr_t)value;
+ if (fd_ctx) {
+ GF_REF_PUT(fd_ctx);
+ }
+out:
+ return ret;
+}
+
+static int
+__dht_fd_ctx_set(xlator_t *this, fd_t *fd, xlator_t *dst)
+{
+ dht_fd_ctx_t *fd_ctx = NULL;
+ uint64_t value = 0;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd, out);
+
+ fd_ctx = GF_CALLOC(1, sizeof(*fd_ctx), gf_dht_mt_fd_ctx_t);
+ if (!fd_ctx) {
+ goto out;
+ }
+
+ fd_ctx->opened_on_dst = (uint64_t)(uintptr_t)dst;
+ GF_REF_INIT(fd_ctx, dht_free_fd_ctx);
+
+ value = (uint64_t)(uintptr_t)fd_ctx;
+
+ ret = __fd_ctx_set(fd, this, value);
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_FD_CTX_SET_FAILED,
+ "fd=0x%p", fd, NULL);
+ GF_REF_PUT(fd_ctx);
+ }
+out:
+ return ret;
+}
int
-dht_frame_return (call_frame_t *frame)
+dht_fd_ctx_set(xlator_t *this, fd_t *fd, xlator_t *dst)
{
- dht_local_t *local = NULL;
- int this_call_cnt = -1;
+ dht_fd_ctx_t *fd_ctx = NULL;
+ uint64_t value = 0;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd, out);
+
+ LOCK(&fd->lock);
+ {
+ ret = __fd_ctx_get(fd, this, &value);
+ if (ret && value) {
+ fd_ctx = (dht_fd_ctx_t *)(uintptr_t)value;
+ if (fd_ctx->opened_on_dst == (uint64_t)(uintptr_t)dst) {
+ /* This could happen due to racing
+ * check_progress tasks*/
+ goto unlock;
+ } else {
+ /* This would be a big problem*/
+ /* Overwrite and hope for the best*/
+ fd_ctx->opened_on_dst = (uint64_t)(uintptr_t)dst;
+ UNLOCK(&fd->lock);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_INVALID_VALUE,
+ NULL);
- if (!frame)
- return -1;
+ goto out;
+ }
+ }
+ ret = __dht_fd_ctx_set(this, fd, dst);
+ }
+unlock:
+ UNLOCK(&fd->lock);
+out:
+ return ret;
+}
- local = frame->local;
+static dht_fd_ctx_t *
+dht_fd_ctx_get(xlator_t *this, fd_t *fd)
+{
+ dht_fd_ctx_t *fd_ctx = NULL;
+ int ret = -1;
+ uint64_t tmp_val = 0;
+
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd, out);
+
+ LOCK(&fd->lock);
+ {
+ ret = __fd_ctx_get(fd, this, &tmp_val);
+ if ((ret < 0) || (tmp_val == 0)) {
+ goto unlock;
+ }
- LOCK (&frame->lock);
- {
- this_call_cnt = --local->call_cnt;
+ fd_ctx = (dht_fd_ctx_t *)(uintptr_t)tmp_val;
+ GF_REF_GET(fd_ctx);
+ }
+unlock:
+ UNLOCK(&fd->lock);
+
+out:
+ return fd_ctx;
+}
+
+gf_boolean_t
+dht_fd_open_on_dst(xlator_t *this, fd_t *fd, xlator_t *dst)
+{
+ dht_fd_ctx_t *fd_ctx = NULL;
+ gf_boolean_t opened = _gf_false;
+
+ fd_ctx = dht_fd_ctx_get(this, fd);
+
+ if (fd_ctx) {
+ if (fd_ctx->opened_on_dst == (uint64_t)(uintptr_t)dst) {
+ opened = _gf_true;
}
- UNLOCK (&frame->lock);
+ GF_REF_PUT(fd_ctx);
+ }
- return this_call_cnt;
+ return opened;
}
+void
+dht_free_mig_info(void *data)
+{
+ dht_migrate_info_t *miginfo = NULL;
-int
-dht_itransform (xlator_t *this, xlator_t *subvol, uint64_t x, uint64_t *y_p)
+ miginfo = data;
+ GF_FREE(miginfo);
+
+ return;
+}
+
+static int
+dht_inode_ctx_set_mig_info(xlator_t *this, inode_t *inode, xlator_t *src_subvol,
+ xlator_t *dst_subvol)
{
- dht_conf_t *conf = NULL;
- int cnt = 0;
- int max = 0;
- uint64_t y = 0;
+ dht_migrate_info_t *miginfo = NULL;
+ uint64_t value = 0;
+ int ret = -1;
- if (x == ((uint64_t) -1)) {
- y = (uint64_t) -1;
- goto out;
+ miginfo = GF_CALLOC(1, sizeof(*miginfo), gf_dht_mt_miginfo_t);
+ if (miginfo == NULL)
+ goto out;
+
+ miginfo->src_subvol = src_subvol;
+ miginfo->dst_subvol = dst_subvol;
+ GF_REF_INIT(miginfo, dht_free_mig_info);
+
+ value = (uint64_t)(uintptr_t)miginfo;
+
+ ret = inode_ctx_set1(inode, this, &value);
+ if (ret < 0) {
+ GF_REF_PUT(miginfo);
+ }
+
+out:
+ return ret;
+}
+
+int
+dht_inode_ctx_get_mig_info(xlator_t *this, inode_t *inode,
+ xlator_t **src_subvol, xlator_t **dst_subvol)
+{
+ int ret = -1;
+ uint64_t tmp_miginfo = 0;
+ dht_migrate_info_t *miginfo = NULL;
+
+ LOCK(&inode->lock);
+ {
+ ret = __inode_ctx_get1(inode, this, &tmp_miginfo);
+ if ((ret < 0) || (tmp_miginfo == 0)) {
+ UNLOCK(&inode->lock);
+ goto out;
}
- conf = this->private;
- if (!conf)
- goto out;
+ miginfo = (dht_migrate_info_t *)(uintptr_t)tmp_miginfo;
+ GF_REF_GET(miginfo);
+ }
+ UNLOCK(&inode->lock);
- max = conf->subvolume_cnt;
- cnt = dht_subvol_cnt (this, subvol);
+ if (src_subvol)
+ *src_subvol = miginfo->src_subvol;
- y = ((x * max) + cnt);
+ if (dst_subvol)
+ *dst_subvol = miginfo->dst_subvol;
+
+ GF_REF_PUT(miginfo);
out:
- if (y_p)
- *y_p = y;
+ return ret;
+}
- return 0;
+gf_boolean_t
+dht_mig_info_is_invalid(xlator_t *current, xlator_t *src_subvol,
+ xlator_t *dst_subvol)
+{
+ /* Not set
+ */
+ if (!src_subvol || !dst_subvol)
+ return _gf_true;
+
+ /* Invalid scenarios:
+ * The src_subvol does not match the subvol on which the current op was sent
+ * so the cached subvol has changed between the last mig_info_set and now.
+ * src_subvol == dst_subvol. The file was migrated without any FOP detecting
+ * a P2 so the old dst is now the current subvol.
+ *
+ * There is still one scenario where the info could be outdated - if
+ * file has undergone multiple migrations and ends up on the same src_subvol
+ * on which the mig_info was first set.
+ */
+ if ((current == dst_subvol) || (current != src_subvol))
+ return _gf_true;
+
+ return _gf_false;
}
+/* Used to check if fd fops have the fd opened on the cached subvol
+ * This is required when:
+ * 1. an fd is opened on FILE1 on subvol1
+ * 2. the file is migrated to subvol2
+ * 3. a lookup updates the cached subvol in the inode_ctx to subvol2
+ * 4. a write comes on the fd
+ * The write is sent to subvol2 on an fd which has been opened only on fd1
+ * Since the migration phase checks don't kick in, the fop fails with EBADF
+ *
+ */
+
int
-dht_filter_loc_subvol_key (xlator_t *this, loc_t *loc, loc_t *new_loc,
- xlator_t **subvol)
+dht_check_and_open_fd_on_subvol_complete(int ret, call_frame_t *frame,
+ void *data)
{
- char *new_name = NULL;
- char *new_path = NULL;
- xlator_list_t *trav = NULL;
- char key[1024] = {0,};
- int ret = 0; /* not found */
+ glusterfs_fop_t fop = 0;
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ xlator_t *this = NULL;
+ fd_t *fd = NULL;
+ int op_errno = -1;
+
+ local = frame->local;
+ this = frame->this;
+ fop = local->fop;
+ subvol = local->cached_subvol;
+ fd = local->fd;
+
+ if (ret) {
+ op_errno = local->op_errno;
+ goto handle_err;
+ }
+
+ switch (fop) {
+ case GF_FOP_WRITE:
+ STACK_WIND_COOKIE(frame, dht_writev_cbk, subvol, subvol,
+ subvol->fops->writev, fd, local->rebalance.vector,
+ local->rebalance.count, local->rebalance.offset,
+ local->rebalance.flags, local->rebalance.iobref,
+ local->xattr_req);
+ break;
+
+ case GF_FOP_FLUSH:
+ STACK_WIND(frame, dht_flush_cbk, subvol, subvol->fops->flush, fd,
+ local->xattr_req);
+ break;
+
+ case GF_FOP_FSETATTR:
+ STACK_WIND_COOKIE(frame, dht_file_setattr_cbk, subvol, subvol,
+ subvol->fops->fsetattr, fd,
+ &local->rebalance.stbuf, local->rebalance.flags,
+ local->xattr_req);
+ break;
+
+ case GF_FOP_ZEROFILL:
+ STACK_WIND_COOKIE(frame, dht_zerofill_cbk, subvol, subvol,
+ subvol->fops->zerofill, fd,
+ local->rebalance.offset, local->rebalance.size,
+ local->xattr_req);
+
+ break;
+
+ case GF_FOP_DISCARD:
+ STACK_WIND_COOKIE(frame, dht_discard_cbk, subvol, subvol,
+ subvol->fops->discard, local->fd,
+ local->rebalance.offset, local->rebalance.size,
+ local->xattr_req);
+ break;
+
+ case GF_FOP_FALLOCATE:
+ STACK_WIND_COOKIE(frame, dht_fallocate_cbk, subvol, subvol,
+ subvol->fops->fallocate, fd,
+ local->rebalance.flags, local->rebalance.offset,
+ local->rebalance.size, local->xattr_req);
+ break;
+
+ case GF_FOP_FTRUNCATE:
+ STACK_WIND_COOKIE(frame, dht_truncate_cbk, subvol, subvol,
+ subvol->fops->ftruncate, fd,
+ local->rebalance.offset, local->xattr_req);
+ break;
+
+ case GF_FOP_FSYNC:
+ STACK_WIND_COOKIE(frame, dht_fsync_cbk, subvol, subvol,
+ subvol->fops->fsync, local->fd,
+ local->rebalance.flags, local->xattr_req);
+ break;
+
+ case GF_FOP_READ:
+ STACK_WIND(frame, dht_readv_cbk, subvol, subvol->fops->readv,
+ local->fd, local->rebalance.size,
+ local->rebalance.offset, local->rebalance.flags,
+ local->xattr_req);
+ break;
+
+ case GF_FOP_FSTAT:
+ STACK_WIND_COOKIE(frame, dht_file_attr_cbk, subvol, subvol,
+ subvol->fops->fstat, fd, local->xattr_req);
+ break;
+
+ case GF_FOP_FSETXATTR:
+ STACK_WIND_COOKIE(frame, dht_file_setxattr_cbk, subvol, subvol,
+ subvol->fops->fsetxattr, local->fd,
+ local->rebalance.xattr, local->rebalance.flags,
+ local->xattr_req);
+ break;
+
+ case GF_FOP_FREMOVEXATTR:
+ STACK_WIND_COOKIE(frame, dht_file_removexattr_cbk, subvol, subvol,
+ subvol->fops->fremovexattr, local->fd, local->key,
+ local->xattr_req);
+
+ break;
+
+ case GF_FOP_FXATTROP:
+ STACK_WIND(frame, dht_common_xattrop_cbk, subvol,
+ subvol->fops->fxattrop, local->fd,
+ local->rebalance.flags, local->rebalance.xattr,
+ local->xattr_req);
+ break;
+
+ case GF_FOP_FGETXATTR:
+ STACK_WIND(frame, dht_getxattr_cbk, subvol, subvol->fops->fgetxattr,
+ local->fd, local->key, NULL);
+ break;
+
+ case GF_FOP_FINODELK:
+ STACK_WIND(frame, dht_finodelk_cbk, subvol, subvol->fops->finodelk,
+ local->key, local->fd, local->rebalance.lock_cmd,
+ &local->rebalance.flock, local->xattr_req);
+ break;
+ default:
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_UNKNOWN_FOP, "fd=%p",
+ fd, "gfid=%s", uuid_utoa(fd->inode->gfid), "name=%s",
+ subvol->name, NULL);
+ break;
+ }
+
+ goto out;
+
+ /* Could not open the fd on the dst. Unwind */
+
+handle_err:
+
+ switch (fop) {
+ case GF_FOP_WRITE:
+ DHT_STACK_UNWIND(writev, frame, -1, op_errno, NULL, NULL, NULL);
+ break;
+
+ case GF_FOP_FLUSH:
+ DHT_STACK_UNWIND(flush, frame, -1, op_errno, NULL);
+ break;
+
+ case GF_FOP_FSETATTR:
+ DHT_STACK_UNWIND(fsetattr, frame, -1, op_errno, NULL, NULL, NULL);
+ break;
+
+ case GF_FOP_ZEROFILL:
+ DHT_STACK_UNWIND(zerofill, frame, -1, op_errno, NULL, NULL, NULL);
+ break;
+
+ case GF_FOP_DISCARD:
+ DHT_STACK_UNWIND(discard, frame, -1, op_errno, NULL, NULL, NULL);
+ break;
+
+ case GF_FOP_FALLOCATE:
+ DHT_STACK_UNWIND(fallocate, frame, -1, op_errno, NULL, NULL, NULL);
+ break;
+
+ case GF_FOP_FTRUNCATE:
+ DHT_STACK_UNWIND(ftruncate, frame, -1, op_errno, NULL, NULL, NULL);
+ break;
+
+ case GF_FOP_FSYNC:
+ DHT_STACK_UNWIND(fsync, frame, -1, op_errno, NULL, NULL, NULL);
+ break;
+
+ case GF_FOP_READ:
+ DHT_STACK_UNWIND(readv, frame, -1, op_errno, NULL, 0, NULL, NULL,
+ NULL);
+ break;
+
+ case GF_FOP_FSTAT:
+ DHT_STACK_UNWIND(fstat, frame, -1, op_errno, NULL, NULL);
+ break;
+
+ case GF_FOP_FSETXATTR:
+ DHT_STACK_UNWIND(fsetxattr, frame, -1, op_errno, NULL);
+ break;
+
+ case GF_FOP_FREMOVEXATTR:
+ DHT_STACK_UNWIND(fremovexattr, frame, -1, op_errno, NULL);
+ break;
+
+ case GF_FOP_FXATTROP:
+ DHT_STACK_UNWIND(fxattrop, frame, -1, op_errno, NULL, NULL);
+ break;
+
+ case GF_FOP_FGETXATTR:
+ DHT_STACK_UNWIND(fgetxattr, frame, -1, op_errno, NULL, NULL);
+ break;
+
+ case GF_FOP_FINODELK:
+ DHT_STACK_UNWIND(finodelk, frame, -1, op_errno, NULL);
+ break;
+
+ default:
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_UNKNOWN_FOP, "fd=%p",
+ fd, "gfid=%s", uuid_utoa(fd->inode->gfid), "name=%s",
+ subvol->name, NULL);
+ break;
+ }
- /* Why do other tasks if first required 'char' itself is not there */
- if (!loc->name || !strchr (loc->name, '@'))
- goto out;
+out:
- trav = this->children;
- while (trav) {
- snprintf (key, 1024, "*@%s:%s", this->name, trav->xlator->name);
- if (fnmatch (key, loc->name, FNM_NOESCAPE) == 0) {
- new_name = GF_CALLOC(strlen (loc->name),
- sizeof (char),
- gf_common_mt_char);
- if (!new_name)
- goto out;
- if (fnmatch (key, loc->path, FNM_NOESCAPE) == 0) {
- new_path = GF_CALLOC(strlen (loc->path),
- sizeof (char),
- gf_common_mt_char);
- if (!new_path)
- goto out;
- strncpy (new_path, loc->path, (strlen (loc->path) -
- strlen (key) + 1));
- }
- strncpy (new_name, loc->name, (strlen (loc->name) -
- strlen (key) + 1));
-
- if (new_loc) {
- new_loc->path = ((new_path) ? new_path:
- gf_strdup (loc->path));
- new_loc->name = new_name;
- new_loc->inode = inode_ref (loc->inode);
- new_loc->parent = inode_ref (loc->parent);
- }
- *subvol = trav->xlator;
- ret = 1; /* success */
- goto out;
- }
- trav = trav->next;
+ return 0;
+}
+
+/* Check once again if the fd has been opened on the cached subvol.
+ * If not, open and update the fd_ctx.
+ */
+
+int
+dht_check_and_open_fd_on_subvol_task(void *data)
+{
+ loc_t loc = {
+ 0,
+ };
+ int ret = -1;
+ call_frame_t *frame = NULL;
+ dht_local_t *local = NULL;
+ fd_t *fd = NULL;
+ xlator_t *this = NULL;
+ xlator_t *subvol = NULL;
+
+ frame = data;
+ local = frame->local;
+ this = THIS;
+ fd = local->fd;
+ subvol = local->cached_subvol;
+
+ local->fd_checked = _gf_true;
+
+ if (fd_is_anonymous(fd) || dht_fd_open_on_dst(this, fd, subvol)) {
+ ret = 0;
+ goto out;
+ }
+
+ gf_msg_debug(this->name, 0, "Opening fd (%p, flags=0%o) on file %s @ %s",
+ fd, fd->flags, uuid_utoa(fd->inode->gfid), subvol->name);
+
+ loc.inode = inode_ref(fd->inode);
+ gf_uuid_copy(loc.gfid, fd->inode->gfid);
+
+ /* Open this on the dst subvol */
+
+ SYNCTASK_SETID(0, 0);
+
+ ret = syncop_open(subvol, &loc, (fd->flags & ~(O_CREAT | O_EXCL | O_TRUNC)),
+ fd, NULL, NULL);
+
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_OPEN_FD_ON_DST_FAILED,
+ "fd=%p", fd, "flags=0%o", fd->flags, "gfid=%s",
+ uuid_utoa(fd->inode->gfid), "name=%s", subvol->name, NULL);
+ /* This can happen if the cached subvol was updated in the
+ * inode_ctx and the fd was opened on the new cached suvol
+ * after this fop was wound on the old cached subvol.
+ * As we do not close the fd on the old subvol (a leak)
+ * don't treat ENOENT as an error and allow the phase1/phase2
+ * checks to handle it.
+ */
+
+ if ((-ret != ENOENT) && (-ret != ESTALE)) {
+ local->op_errno = -ret;
+ ret = -1;
+ } else {
+ ret = 0;
}
+
+ local->op_errno = -ret;
+ ret = -1;
+
+ } else {
+ dht_fd_ctx_set(this, fd, subvol);
+ }
+
+ SYNCTASK_SETID(frame->root->uid, frame->root->gid);
out:
- if (!ret) {
- /* !success */
- if (new_path)
- GF_FREE (new_path);
- if (new_name)
- GF_FREE (new_name);
- }
- return ret;
+ loc_wipe(&loc);
+
+ return ret;
+}
+
+int
+dht_check_and_open_fd_on_subvol(xlator_t *this, call_frame_t *frame)
+{
+ int ret = -1;
+ dht_local_t *local = NULL;
+
+ /*
+ if (dht_fd_open_on_dst (this, fd, subvol))
+ goto out;
+ */
+ local = frame->local;
+
+ ret = synctask_new(this->ctx->env, dht_check_and_open_fd_on_subvol_task,
+ dht_check_and_open_fd_on_subvol_complete, frame, frame);
+
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SYNCTASK_CREATE_FAILED,
+ "to-check-and-open fd=%p", local->fd, NULL);
+ }
+
+ return ret;
}
int
-dht_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol_p,
- uint64_t *x_p)
+dht_frame_return(call_frame_t *frame)
{
- dht_conf_t *conf = NULL;
- int cnt = 0;
- int max = 0;
- uint64_t x = 0;
- xlator_t *subvol = 0;
+ dht_local_t *local = NULL;
+ int this_call_cnt = -1;
- if (!this->private)
+ if (!frame)
+ return -1;
+
+ local = frame->local;
+
+ LOCK(&frame->lock);
+ {
+ this_call_cnt = --local->call_cnt;
+ }
+ UNLOCK(&frame->lock);
+
+ return this_call_cnt;
+}
+
+/*
+ * Use this function to specify which subvol you want the file created
+ * on - this need not be the hashed subvol.
+ * Format: <filename>@<this->name>:<subvol-name>
+ * Eg: file-1@vol1-dht:vol1-client-0
+ * where vol1 is a pure distribute volume
+ * will create file-1 on vol1-client-0
+ */
+
+int
+dht_filter_loc_subvol_key(xlator_t *this, loc_t *loc, loc_t *new_loc,
+ xlator_t **subvol)
+{
+ char *new_name = NULL;
+ char *new_path = NULL;
+ xlator_list_t *trav = NULL;
+ char key[1024] = {
+ 0,
+ };
+ int ret = 0; /* not found */
+ int keylen = 0;
+ int name_len = 0;
+ int path_len = 0;
+
+ /* Why do other tasks if first required 'char' itself is not there */
+ if (!new_loc || !loc || !loc->name || !strchr(loc->name, '@')) {
+ /* Skip the GF_FREE checks here */
+ return ret;
+ }
+
+ trav = this->children;
+ while (trav) {
+ keylen = snprintf(key, sizeof(key), "*@%s:%s", this->name,
+ trav->xlator->name);
+ /* Ignore '*' */
+ keylen = keylen - 1;
+ if (fnmatch(key, loc->name, FNM_NOESCAPE) == 0) {
+ name_len = strlen(loc->name) - keylen;
+ new_name = GF_MALLOC(name_len + 1, gf_common_mt_char);
+ if (!new_name)
goto out;
+ if (fnmatch(key, loc->path, FNM_NOESCAPE) == 0) {
+ path_len = strlen(loc->path) - keylen;
+ new_path = GF_MALLOC(path_len + 1, gf_common_mt_char);
+ if (!new_path)
+ goto out;
+ snprintf(new_path, path_len + 1, "%s", loc->path);
+ }
+ snprintf(new_name, name_len + 1, "%s", loc->name);
+
+ if (new_loc) {
+ new_loc->path = ((new_path) ? new_path : gf_strdup(loc->path));
+ new_loc->name = new_name;
+ new_loc->inode = inode_ref(loc->inode);
+ new_loc->parent = inode_ref(loc->parent);
+ }
+ *subvol = trav->xlator;
+ ret = 1; /* success */
+ goto out;
+ }
+ trav = trav->next;
+ }
+out:
+ if (!ret) {
+ /* !success */
+ GF_FREE(new_path);
+ GF_FREE(new_name);
+ }
+ return ret;
+}
- conf = this->private;
- max = conf->subvolume_cnt;
+static xlator_t *
+dht_get_subvol_from_id(xlator_t *this, int client_id)
+{
+ xlator_t *xl = NULL;
+ dht_conf_t *conf = NULL;
+ char *sid = NULL;
+ int32_t ret = -1;
- cnt = y % max;
- x = y / max;
+ conf = this->private;
- subvol = conf->subvolumes[cnt];
+ ret = gf_asprintf(&sid, "%d", client_id);
+ if (ret == -1) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_ASPRINTF_FAILED, NULL);
+ goto out;
+ }
- if (subvol_p)
- *subvol_p = subvol;
+ if (dict_get_ptr(conf->leaf_to_subvol, sid, (void **)&xl))
+ xl = NULL;
- if (x_p)
- *x_p = x;
+ GF_FREE(sid);
out:
- return 0;
+ return xl;
}
+int
+dht_deitransform(xlator_t *this, uint64_t y, xlator_t **subvol_p)
+{
+ int client_id = 0;
+ xlator_t *subvol = 0;
+ dht_conf_t *conf = NULL;
+
+ if (!this->private)
+ return -1;
+
+ conf = this->private;
+
+ client_id = gf_deitransform(this, y);
+
+ subvol = dht_get_subvol_from_id(this, client_id);
+
+ if (!subvol)
+ subvol = conf->subvolumes[0];
+
+ if (subvol_p)
+ *subvol_p = subvol;
+
+ return 0;
+}
void
-dht_local_wipe (xlator_t *this, dht_local_t *local)
+dht_local_wipe(xlator_t *this, dht_local_t *local)
{
- if (!local)
- return;
+ int i = 0;
- loc_wipe (&local->loc);
- loc_wipe (&local->loc2);
+ if (!local)
+ return;
- if (local->xattr)
- dict_unref (local->xattr);
+ loc_wipe(&local->loc);
+ loc_wipe(&local->loc2);
+ loc_wipe(&local->loc2_copy);
- if (local->inode)
- inode_unref (local->inode);
+ if (local->xattr)
+ dict_unref(local->xattr);
- if (local->layout) {
- dht_layout_unref (this, local->layout);
- local->layout = NULL;
- }
+ if (local->inode)
+ inode_unref(local->inode);
- loc_wipe (&local->linkfile.loc);
+ if (local->layout) {
+ dht_layout_unref(this, local->layout);
+ local->layout = NULL;
+ }
- if (local->linkfile.xattr)
- dict_unref (local->linkfile.xattr);
+ loc_wipe(&local->linkfile.loc);
- if (local->linkfile.inode)
- inode_unref (local->linkfile.inode);
+ if (local->linkfile.xattr)
+ dict_unref(local->linkfile.xattr);
- if (local->fd) {
- fd_unref (local->fd);
- local->fd = NULL;
- }
+ if (local->linkfile.inode)
+ inode_unref(local->linkfile.inode);
- if (local->params) {
- dict_unref (local->params);
- local->params = NULL;
- }
+ if (local->fd) {
+ fd_unref(local->fd);
+ local->fd = NULL;
+ }
- if (local->xattr_req)
- dict_unref (local->xattr_req);
+ if (local->params) {
+ dict_unref(local->params);
+ local->params = NULL;
+ }
- if (local->selfheal.layout) {
- dht_layout_unref (this, local->selfheal.layout);
- local->selfheal.layout = NULL;
- }
+ if (local->xattr_req)
+ dict_unref(local->xattr_req);
+ if (local->mds_xattr)
+ dict_unref(local->mds_xattr);
+ if (local->xdata)
+ dict_unref(local->xdata);
- if (local->newpath) {
- GF_FREE (local->newpath);
- }
+ if (local->selfheal.layout) {
+ dht_layout_unref(this, local->selfheal.layout);
+ local->selfheal.layout = NULL;
+ }
- if (local->key) {
- GF_FREE (local->key);
- }
+ if (local->selfheal.refreshed_layout) {
+ dht_layout_unref(this, local->selfheal.refreshed_layout);
+ local->selfheal.refreshed_layout = NULL;
+ }
- if (local->rebalance.vector)
- GF_FREE (local->rebalance.vector);
+ for (i = 0; i < 2; i++) {
+ dht_lock_array_free(local->lock[i].ns.parent_layout.locks,
+ local->lock[i].ns.parent_layout.lk_count);
- if (local->rebalance.iobref)
- iobref_unref (local->rebalance.iobref);
+ GF_FREE(local->lock[i].ns.parent_layout.locks);
- mem_put (local);
-}
+ dht_lock_array_free(local->lock[i].ns.directory_ns.locks,
+ local->lock[i].ns.directory_ns.lk_count);
+ GF_FREE(local->lock[i].ns.directory_ns.locks);
+ }
+ GF_FREE(local->key);
-dht_local_t *
-dht_local_init (call_frame_t *frame, loc_t *loc, fd_t *fd, glusterfs_fop_t fop)
-{
- dht_local_t *local = NULL;
- inode_t *inode = NULL;
- int ret = 0;
-
- local = mem_get0 (THIS->local_pool);
- if (!local)
- goto out;
+ if (local->rebalance.xdata)
+ dict_unref(local->rebalance.xdata);
- if (loc) {
- ret = loc_copy (&local->loc, loc);
- if (ret)
- goto out;
+ if (local->rebalance.xattr)
+ dict_unref(local->rebalance.xattr);
- inode = loc->inode;
- }
+ if (local->rebalance.dict)
+ dict_unref(local->rebalance.dict);
- if (fd) {
- local->fd = fd_ref (fd);
- if (!inode)
- inode = fd->inode;
- }
+ GF_FREE(local->rebalance.vector);
- local->op_ret = -1;
- local->op_errno = EUCLEAN;
- local->fop = fop;
+ if (local->rebalance.iobref)
+ iobref_unref(local->rebalance.iobref);
- if (inode) {
- local->layout = dht_layout_get (frame->this, inode);
- local->cached_subvol = dht_subvol_get_cached (frame->this,
- inode);
- }
+ if (local->stub) {
+ call_stub_destroy(local->stub);
+ local->stub = NULL;
+ }
- frame->local = local;
+ if (local->ret_cache)
+ GF_FREE(local->ret_cache);
-out:
- if (ret) {
- if (local)
- mem_put (local);
- local = NULL;
- }
- return local;
+ mem_put(local);
}
-
-char *
-basestr (const char *str)
+dht_local_t *
+dht_local_init(call_frame_t *frame, loc_t *loc, fd_t *fd, glusterfs_fop_t fop)
{
- char *basestr = NULL;
+ dht_local_t *local = NULL;
+ inode_t *inode = NULL;
+ int ret = 0;
- basestr = strrchr (str, '/');
- if (basestr)
- basestr ++;
+ local = mem_get0(THIS->local_pool);
+ if (!local)
+ goto out;
- return basestr;
-}
+ if (loc) {
+ ret = loc_copy(&local->loc, loc);
+ if (ret)
+ goto out;
+
+ inode = loc->inode;
+ }
+
+ if (fd) {
+ local->fd = fd_ref(fd);
+ if (!inode)
+ inode = fd->inode;
+ }
+ local->op_ret = -1;
+ local->op_errno = EUCLEAN;
+ local->fop = fop;
+
+ if (inode) {
+ local->layout = dht_layout_get(frame->this, inode);
+ local->cached_subvol = dht_subvol_get_cached(frame->this, inode);
+ }
+
+ frame->local = local;
+
+out:
+ if (ret) {
+ if (local)
+ mem_put(local);
+ local = NULL;
+ }
+ return local;
+}
xlator_t *
-dht_first_up_subvol (xlator_t *this)
+dht_first_up_subvol(xlator_t *this)
{
- dht_conf_t *conf = NULL;
- xlator_t *child = NULL;
- int i = 0;
- time_t time = 0;
+ dht_conf_t *conf = NULL;
+ xlator_t *child = NULL;
+ int i = 0;
+ time_t time = 0;
- conf = this->private;
- if (!conf)
- goto out;
+ conf = this->private;
+ if (!conf)
+ goto out;
- LOCK (&conf->subvolume_lock);
- {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (conf->subvol_up_time[i]) {
- if (!time) {
- time = conf->subvol_up_time[i];
- child = conf->subvolumes[i];
- } else if (time > conf->subvol_up_time[i]) {
- time = conf->subvol_up_time[i];
- child = conf->subvolumes[i];
- }
- }
+ LOCK(&conf->subvolume_lock);
+ {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvol_up_time[i]) {
+ if (!time) {
+ time = conf->subvol_up_time[i];
+ child = conf->subvolumes[i];
+ } else if (time > conf->subvol_up_time[i]) {
+ time = conf->subvol_up_time[i];
+ child = conf->subvolumes[i];
}
+ }
}
- UNLOCK (&conf->subvolume_lock);
+ }
+ UNLOCK(&conf->subvolume_lock);
out:
- return child;
+ return child;
}
xlator_t *
-dht_last_up_subvol (xlator_t *this)
+dht_last_up_subvol(xlator_t *this)
{
- dht_conf_t *conf = NULL;
- xlator_t *child = NULL;
- int i = 0;
-
- conf = this->private;
- if (!conf)
- goto out;
-
- LOCK (&conf->subvolume_lock);
- {
- for (i = conf->subvolume_cnt-1; i >= 0; i--) {
- if (conf->subvolume_status[i]) {
- child = conf->subvolumes[i];
- break;
- }
- }
+ dht_conf_t *conf = NULL;
+ xlator_t *child = NULL;
+ int i = 0;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ LOCK(&conf->subvolume_lock);
+ {
+ for (i = conf->subvolume_cnt - 1; i >= 0; i--) {
+ if (conf->subvolume_status[i]) {
+ child = conf->subvolumes[i];
+ break;
+ }
}
- UNLOCK (&conf->subvolume_lock);
+ }
+ UNLOCK(&conf->subvolume_lock);
out:
- return child;
+ return child;
}
xlator_t *
-dht_subvol_get_hashed (xlator_t *this, loc_t *loc)
+dht_subvol_get_hashed(xlator_t *this, loc_t *loc)
{
- dht_layout_t *layout = NULL;
- xlator_t *subvol = NULL;
+ dht_layout_t *layout = NULL;
+ xlator_t *subvol = NULL;
+ dht_conf_t *conf = NULL;
+ dht_methods_t *methods = NULL;
- GF_VALIDATE_OR_GOTO ("dht", this, out);
- GF_VALIDATE_OR_GOTO (this->name, loc, out);
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc, out);
- if (__is_root_gfid (loc->gfid)) {
- subvol = dht_first_up_subvol (this);
- goto out;
- }
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, out);
- GF_VALIDATE_OR_GOTO (this->name, loc->parent, out);
- GF_VALIDATE_OR_GOTO (this->name, loc->name, out);
+ methods = &(conf->methods);
- layout = dht_layout_get (this, loc->parent);
+ if (__is_root_gfid(loc->gfid)) {
+ subvol = dht_first_up_subvol(this);
+ goto out;
+ }
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "layout missing path=%s parent=%s",
- loc->path, uuid_utoa (loc->parent->gfid));
- goto out;
- }
+ GF_VALIDATE_OR_GOTO(this->name, loc->parent, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc->name, out);
- subvol = dht_layout_search (this, layout, loc->name);
+ layout = dht_layout_get(this, loc->parent);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "could not find subvolume for path=%s",
- loc->path);
- goto out;
- }
+ if (!layout) {
+ gf_msg_debug(this->name, 0, "Missing layout. path=%s, parent gfid =%s",
+ loc->path, uuid_utoa(loc->parent->gfid));
+ goto out;
+ }
+
+ subvol = methods->layout_search(this, layout, loc->name);
+
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "No hashed subvolume for path=%s",
+ loc->path);
+ goto out;
+ }
out:
- if (layout) {
- dht_layout_unref (this, layout);
- }
+ if (layout) {
+ dht_layout_unref(this, layout);
+ }
- return subvol;
+ return subvol;
}
-
xlator_t *
-dht_subvol_get_cached (xlator_t *this, inode_t *inode)
+dht_subvol_get_cached(xlator_t *this, inode_t *inode)
{
- dht_layout_t *layout = NULL;
- xlator_t *subvol = NULL;
+ dht_layout_t *layout = NULL;
+ xlator_t *subvol = NULL;
- GF_VALIDATE_OR_GOTO (this->name, this, out);
- GF_VALIDATE_OR_GOTO (this->name, inode, out);
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
- layout = dht_layout_get (this, inode);
+ layout = dht_layout_get(this, inode);
- if (!layout) {
- goto out;
- }
+ if (!layout) {
+ goto out;
+ }
- subvol = layout->list[0].xlator;
+ subvol = layout->list[0].xlator;
out:
- if (layout) {
- dht_layout_unref (this, layout);
- }
+ if (layout) {
+ dht_layout_unref(this, layout);
+ }
- return subvol;
+ return subvol;
}
-
xlator_t *
-dht_subvol_next (xlator_t *this, xlator_t *prev)
+dht_subvol_next(xlator_t *this, xlator_t *prev)
{
- dht_conf_t *conf = NULL;
- int i = 0;
- xlator_t *next = NULL;
-
- conf = this->private;
- if (!conf)
- goto out;
-
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (conf->subvolumes[i] == prev) {
- if ((i + 1) < conf->subvolume_cnt)
- next = conf->subvolumes[i + 1];
- break;
- }
+ dht_conf_t *conf = NULL;
+ int i = 0;
+ xlator_t *next = NULL;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == prev) {
+ if ((i + 1) < conf->subvolume_cnt)
+ next = conf->subvolumes[i + 1];
+ break;
}
+ }
out:
- return next;
+ return next;
}
+/* This func wraps around, if prev is actually the last subvol.
+ */
+xlator_t *
+dht_subvol_next_available(xlator_t *this, xlator_t *prev)
+{
+ dht_conf_t *conf = NULL;
+ int i = 0;
+ xlator_t *next = NULL;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == prev) {
+ /* if prev is last in conf->subvolumes, then wrap
+ * around.
+ */
+ if ((i + 1) < conf->subvolume_cnt) {
+ next = conf->subvolumes[i + 1];
+ } else {
+ next = conf->subvolumes[0];
+ }
+ break;
+ }
+ }
+out:
+ return next;
+}
int
-dht_subvol_cnt (xlator_t *this, xlator_t *subvol)
+dht_subvol_cnt(xlator_t *this, xlator_t *subvol)
{
- int i = 0;
- int ret = -1;
- dht_conf_t *conf = NULL;
-
- conf = this->private;
- if (!conf)
- goto out;
-
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (subvol == conf->subvolumes[i]) {
- ret = i;
- break;
- }
+ int i = 0;
+ int ret = -1;
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (subvol == conf->subvolumes[i]) {
+ ret = i;
+ break;
}
+ }
out:
- return ret;
+ return ret;
}
+#define set_if_greater(a, b) \
+ do { \
+ if ((a) < (b)) \
+ (a) = (b); \
+ } while (0)
-#define set_if_greater(a, b) do { \
- if ((a) < (b)) \
- (a) = (b); \
- } while (0)
-
+#define set_if_greater_time(a, an, b, bn) \
+ do { \
+ if (((a) < (b)) || (((a) == (b)) && ((an) < (bn)))) { \
+ (a) = (b); \
+ (an) = (bn); \
+ } \
+ } while (0)
-#define set_if_greater_time(a, an, b, bn) do { \
- if (((a) < (b)) || (((a) == (b)) && ((an) < (bn)))){ \
- (a) = (b); \
- (an) = (bn); \
- } \
- } while (0) \
+int
+dht_iatt_merge(xlator_t *this, struct iatt *to, struct iatt *from)
+{
+ if (!from || !to)
+ return 0;
+ to->ia_dev = from->ia_dev;
+
+ gf_uuid_copy(to->ia_gfid, from->ia_gfid);
+
+ to->ia_ino = from->ia_ino;
+ to->ia_prot = from->ia_prot;
+ to->ia_type = from->ia_type;
+ to->ia_nlink = from->ia_nlink;
+ to->ia_rdev = from->ia_rdev;
+ to->ia_size += from->ia_size;
+ to->ia_blksize = from->ia_blksize;
+ to->ia_blocks += from->ia_blocks;
+
+ if (IA_ISDIR(from->ia_type)) {
+ to->ia_blocks = DHT_DIR_STAT_BLOCKS;
+ to->ia_size = DHT_DIR_STAT_SIZE;
+ }
+ set_if_greater(to->ia_uid, from->ia_uid);
+ set_if_greater(to->ia_gid, from->ia_gid);
+
+ set_if_greater_time(to->ia_atime, to->ia_atime_nsec, from->ia_atime,
+ from->ia_atime_nsec);
+ set_if_greater_time(to->ia_mtime, to->ia_mtime_nsec, from->ia_mtime,
+ from->ia_mtime_nsec);
+ set_if_greater_time(to->ia_ctime, to->ia_ctime_nsec, from->ia_ctime,
+ from->ia_ctime_nsec);
+
+ return 0;
+}
int
-dht_iatt_merge (xlator_t *this, struct iatt *to,
- struct iatt *from, xlator_t *subvol)
+dht_build_child_loc(xlator_t *this, loc_t *child, loc_t *parent, char *name)
{
- if (!from || !to)
- return 0;
+ if (!child) {
+ goto err;
+ }
- to->ia_dev = from->ia_dev;
+ if (strcmp(parent->path, "/") == 0)
+ gf_asprintf((char **)&child->path, "/%s", name);
+ else
+ gf_asprintf((char **)&child->path, "%s/%s", parent->path, name);
- uuid_copy (to->ia_gfid, from->ia_gfid);
+ if (!child->path) {
+ goto err;
+ }
- to->ia_ino = from->ia_ino;
- to->ia_prot = from->ia_prot;
- to->ia_type = from->ia_type;
- to->ia_nlink = from->ia_nlink;
- to->ia_rdev = from->ia_rdev;
- to->ia_size += from->ia_size;
- to->ia_blksize = from->ia_blksize;
- to->ia_blocks += from->ia_blocks;
+ child->name = strrchr(child->path, '/');
+ if (child->name)
+ child->name++;
- set_if_greater (to->ia_uid, from->ia_uid);
- set_if_greater (to->ia_gid, from->ia_gid);
+ child->parent = inode_ref(parent->inode);
+ child->inode = inode_new(parent->inode->table);
- set_if_greater_time(to->ia_atime, to->ia_atime_nsec,
- from->ia_atime, from->ia_atime_nsec);
- set_if_greater_time (to->ia_mtime, to->ia_mtime_nsec,
- from->ia_mtime, from->ia_mtime_nsec);
- set_if_greater_time (to->ia_ctime, to->ia_ctime_nsec,
- from->ia_ctime, from->ia_ctime_nsec);
+ if (!child->inode) {
+ goto err;
+ }
- return 0;
+ return 0;
+err:
+ if (child) {
+ loc_wipe(child);
+ }
+ return -1;
}
int
-dht_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name)
+dht_init_local_subvolumes(xlator_t *this, dht_conf_t *conf)
{
- if (!child) {
- goto err;
- }
-
- if (strcmp (parent->path, "/") == 0)
- gf_asprintf ((char **)&child->path, "/%s", name);
- else
- gf_asprintf ((char **)&child->path, "%s/%s", parent->path, name);
+ xlator_list_t *subvols = NULL;
+ int cnt = 0;
- if (!child->path) {
- goto err;
- }
+ if (!conf)
+ return -1;
- child->name = strrchr (child->path, '/');
- if (child->name)
- child->name++;
+ for (subvols = this->children; subvols; subvols = subvols->next)
+ cnt++;
- child->parent = inode_ref (parent->inode);
- child->inode = inode_new (parent->inode->table);
+ conf->local_subvols = GF_CALLOC(cnt, sizeof(xlator_t *),
+ gf_dht_mt_xlator_t);
- if (!child->inode) {
- goto err;
- }
+ /* FIX FIX : do this dynamically*/
+ conf->local_nodeuuids = GF_CALLOC(cnt, sizeof(subvol_nodeuuids_info_t),
+ gf_dht_nodeuuids_t);
- return 0;
-err:
- loc_wipe (child);
+ if (!conf->local_subvols || !conf->local_nodeuuids) {
return -1;
-}
+ }
+ conf->local_subvols_cnt = 0;
+ return 0;
+}
int
-dht_init_subvolumes (xlator_t *this, dht_conf_t *conf)
+dht_init_subvolumes(xlator_t *this, dht_conf_t *conf)
{
- xlator_list_t *subvols = NULL;
- int cnt = 0;
+ xlator_list_t *subvols = NULL;
+ int cnt = 0;
- if (!conf)
- return -1;
+ if (!conf)
+ return -1;
- for (subvols = this->children; subvols; subvols = subvols->next)
- cnt++;
+ for (subvols = this->children; subvols; subvols = subvols->next)
+ cnt++;
- conf->subvolumes = GF_CALLOC (cnt, sizeof (xlator_t *),
- gf_dht_mt_xlator_t);
- if (!conf->subvolumes) {
- return -1;
- }
- conf->subvolume_cnt = cnt;
+ conf->subvolumes = GF_CALLOC(cnt, sizeof(xlator_t *), gf_dht_mt_xlator_t);
+ if (!conf->subvolumes) {
+ return -1;
+ }
+ conf->subvolume_cnt = cnt;
+ /* Doesn't make sense to do any dht layer tasks
+ if the subvol count is 1. Set it as pass_through */
+ if (cnt == 1)
+ this->pass_through = _gf_true;
- cnt = 0;
- for (subvols = this->children; subvols; subvols = subvols->next)
- conf->subvolumes[cnt++] = subvols->xlator;
+ conf->local_subvols_cnt = 0;
- conf->subvolume_status = GF_CALLOC (cnt, sizeof (char),
- gf_dht_mt_char);
- if (!conf->subvolume_status) {
- return -1;
- }
+ dht_set_subvol_range(this);
- conf->last_event = GF_CALLOC (cnt, sizeof (int),
- gf_dht_mt_char);
- if (!conf->last_event) {
- return -1;
- }
+ cnt = 0;
+ for (subvols = this->children; subvols; subvols = subvols->next)
+ conf->subvolumes[cnt++] = subvols->xlator;
- conf->subvol_up_time = GF_CALLOC (cnt, sizeof (time_t),
- gf_dht_mt_subvol_time);
- if (!conf->subvol_up_time) {
- return -1;
- }
+ conf->subvolume_status = GF_CALLOC(cnt, sizeof(char), gf_dht_mt_char);
+ if (!conf->subvolume_status) {
+ return -1;
+ }
- conf->du_stats = GF_CALLOC (conf->subvolume_cnt, sizeof (dht_du_t),
- gf_dht_mt_dht_du_t);
- if (!conf->du_stats) {
- return -1;
- }
+ conf->last_event = GF_CALLOC(cnt, sizeof(int), gf_dht_mt_char);
+ if (!conf->last_event) {
+ return -1;
+ }
- conf->decommissioned_bricks = GF_CALLOC (cnt, sizeof (xlator_t *),
- gf_dht_mt_xlator_t);
- if (!conf->decommissioned_bricks) {
- return -1;
- }
+ conf->subvol_up_time = GF_CALLOC(cnt, sizeof(time_t),
+ gf_dht_mt_subvol_time);
+ if (!conf->subvol_up_time) {
+ return -1;
+ }
- return 0;
-}
+ conf->du_stats = GF_CALLOC(conf->subvolume_cnt, sizeof(dht_du_t),
+ gf_dht_mt_dht_du_t);
+ if (!conf->du_stats) {
+ return -1;
+ }
+ conf->decommissioned_bricks = GF_CALLOC(cnt, sizeof(xlator_t *),
+ gf_dht_mt_xlator_t);
+ if (!conf->decommissioned_bricks) {
+ return -1;
+ }
+ return 0;
+}
+/*
+ op_ret values :
+ 0 : Success.
+ -1 : Failure.
+ 1 : File is being migrated but not by this DHT layer.
+*/
static int
-dht_migration_complete_check_done (int op_ret, call_frame_t *frame, void *data)
+dht_migration_complete_check_done(int op_ret, call_frame_t *frame, void *data)
{
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
- local = frame->local;
+ local = frame->local;
- local->rebalance.target_op_fn (THIS, frame, op_ret);
+ if (op_ret != 0)
+ goto out;
- return 0;
-}
+ if (local->cached_subvol == NULL) {
+ local->op_errno = EINVAL;
+ goto out;
+ }
+ subvol = local->cached_subvol;
-int
-dht_migration_complete_check_task (void *data)
-{
- int ret = -1;
- xlator_t *src_node = NULL;
- xlator_t *dst_node = NULL;
- dht_local_t *local = NULL;
- dict_t *dict = NULL;
- dht_layout_t *layout = NULL;
- struct iatt stbuf = {0,};
- xlator_t *this = NULL;
- call_frame_t *frame = NULL;
- loc_t tmp_loc = {0,};
- char *path = NULL;
-
- this = THIS;
- frame = data;
- local = frame->local;
-
- src_node = local->cached_subvol;
-
- /* getxattr on cached_subvol for 'linkto' value */
- if (!local->loc.inode)
- ret = syncop_fgetxattr (src_node, local->fd, &dict,
- DHT_LINKFILE_KEY);
- else
- ret = syncop_getxattr (src_node, &local->loc, &dict,
- DHT_LINKFILE_KEY);
-
- if (!ret)
- dst_node = dht_linkfile_subvol (this, NULL, NULL, dict);
+out:
+ local->rebalance.target_op_fn(THIS, subvol, frame, op_ret);
- if (ret) {
- if ((errno != ENOENT) || (!local->loc.inode)) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: failed to get the 'linkto' xattr %s",
- local->loc.path, strerror (errno));
- goto out;
- }
- /* Need to do lookup on hashed subvol, then get the file */
- ret = syncop_lookup (this, &local->loc, NULL, &stbuf, NULL,
- NULL);
- if (ret)
- goto out;
- dst_node = dht_subvol_get_cached (this, local->loc.inode);
- }
+ return 0;
+}
- if (!dst_node) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: failed to get the destination node",
- local->loc.path);
- ret = -1;
- goto out;
+int
+dht_migration_complete_check_task(void *data)
+{
+ int ret = -1;
+ xlator_t *src_node = NULL;
+ xlator_t *dst_node = NULL, *linkto_target = NULL;
+ dht_local_t *local = NULL;
+ dict_t *dict = NULL;
+ struct iatt stbuf = {
+ 0,
+ };
+ xlator_t *this = NULL;
+ call_frame_t *frame = NULL;
+ loc_t tmp_loc = {
+ 0,
+ };
+ char *path = NULL;
+ dht_conf_t *conf = NULL;
+ inode_t *inode = NULL;
+ fd_t *iter_fd = NULL;
+ fd_t *tmp = NULL;
+ uint64_t tmp_miginfo = 0;
+ dht_migrate_info_t *miginfo = NULL;
+ gf_boolean_t skip_open = _gf_false;
+ int open_failed = 0;
+
+ this = THIS;
+ frame = data;
+ local = frame->local;
+ conf = this->private;
+
+ src_node = local->cached_subvol;
+
+ if (!local->loc.inode && !local->fd) {
+ local->op_errno = EINVAL;
+ goto out;
+ }
+
+ inode = (!local->fd) ? local->loc.inode : local->fd->inode;
+
+ /* getxattr on cached_subvol for 'linkto' value. Do path based getxattr
+ * as root:root. If a fd is already open, access check won't be done*/
+
+ if (!local->loc.inode) {
+ ret = syncop_fgetxattr(src_node, local->fd, &dict,
+ conf->link_xattr_name, NULL, NULL);
+ } else {
+ SYNCTASK_SETID(0, 0);
+ ret = syncop_getxattr(src_node, &local->loc, &dict,
+ conf->link_xattr_name, NULL, NULL);
+ SYNCTASK_SETID(frame->root->uid, frame->root->gid);
+ }
+
+ /*
+ * Each DHT xlator layer has its own name for the linkto xattr.
+ * If the file mode bits indicate the the file is being migrated but
+ * this layer's linkto xattr is not set, it means that another
+ * DHT layer is migrating the file. In this case, return 1 so
+ * the mode bits can be passed on to the higher layer for appropriate
+ * action.
+ */
+ if (-ret == ENODATA) {
+ /* This DHT translator is not migrating this file */
+
+ ret = inode_ctx_reset1(inode, this, &tmp_miginfo);
+ if (tmp_miginfo) {
+ /* This can be a problem if the file was
+ * migrated by two different layers. Raise
+ * a warning here.
+ */
+ gf_smsg(
+ this->name, GF_LOG_WARNING, 0, DHT_MSG_HAS_MIGINFO, "tmp=%s",
+ tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid), NULL);
+
+ miginfo = (void *)(uintptr_t)tmp_miginfo;
+ GF_REF_PUT(miginfo);
+ }
+ ret = 1;
+ goto out;
+ }
+
+ if (!ret)
+ linkto_target = dht_linkfile_subvol(this, NULL, NULL, dict);
+
+ if (local->loc.inode) {
+ loc_copy(&tmp_loc, &local->loc);
+ } else {
+ tmp_loc.inode = inode_ref(inode);
+ gf_uuid_copy(tmp_loc.gfid, inode->gfid);
+ }
+
+ ret = syncop_lookup(this, &tmp_loc, &stbuf, 0, 0, 0);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_FILE_LOOKUP_FAILED,
+ "tmp=%s", tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid),
+ "name=%s", this->name, NULL);
+ local->op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ dst_node = dht_subvol_get_cached(this, tmp_loc.inode);
+ if (linkto_target && dst_node != linkto_target) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_INVALID_LINKFILE,
+ "linkto_target_name=%s", linkto_target->name, "dst_name=%s",
+ dst_node->name, NULL);
+ }
+
+ if (gf_uuid_compare(stbuf.ia_gfid, tmp_loc.inode->gfid)) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_GFID_MISMATCH, "tmp=%s",
+ tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid),
+ "dst_name=%s", dst_node->name, NULL);
+ ret = -1;
+ local->op_errno = EIO;
+ goto out;
+ }
+
+ /* update local. A layout is set in inode-ctx in lookup already */
+
+ dht_layout_unref(this, local->layout);
+
+ local->layout = dht_layout_get(frame->this, inode);
+ local->cached_subvol = dst_node;
+
+ ret = 0;
+
+ /* once we detect the migration complete, the inode-ctx2 is no more
+ required.. delete the ctx and also, it means, open() already
+ done on all the fd of inode */
+ ret = inode_ctx_reset1(inode, this, &tmp_miginfo);
+ if (tmp_miginfo) {
+ miginfo = (void *)(uintptr_t)tmp_miginfo;
+ GF_REF_PUT(miginfo);
+ goto out;
+ }
+
+ /* perform 'open()' on all the fd's present on the inode */
+ if (tmp_loc.path == NULL) {
+ inode_path(inode, NULL, &path);
+ if (path)
+ tmp_loc.path = path;
+ }
+
+ LOCK(&inode->lock);
+
+ if (list_empty(&inode->fd_list))
+ goto unlock;
+
+ /* perform open as root:root. There is window between linkfile
+ * creation(root:root) and setattr with the correct uid/gid
+ */
+ SYNCTASK_SETID(0, 0);
+
+ /* It's possible that we are the last user of iter_fd after each
+ * iteration. In this case the fd_unref() of iter_fd at the end of
+ * the loop will cause the destruction of the fd. So we need to
+ * iterate the list safely because iter_fd cannot be trusted.
+ */
+ iter_fd = list_entry((&inode->fd_list)->next, typeof(*iter_fd), inode_list);
+ while (&iter_fd->inode_list != (&inode->fd_list)) {
+ if (fd_is_anonymous(iter_fd) ||
+ (dht_fd_open_on_dst(this, iter_fd, dst_node))) {
+ if (!tmp) {
+ iter_fd = list_entry(iter_fd->inode_list.next, typeof(*iter_fd),
+ inode_list);
+ continue;
+ }
+ skip_open = _gf_true;
+ }
+ /* We need to release the inode->lock before calling
+ * syncop_open() to avoid possible deadlocks. However this
+ * can cause the iter_fd to be released by other threads.
+ * To avoid this, we take a reference before releasing the
+ * lock.
+ */
+ fd_ref(iter_fd);
+
+ UNLOCK(&inode->lock);
+
+ if (tmp) {
+ fd_unref(tmp);
+ tmp = NULL;
+ }
+ if (skip_open)
+ goto next;
+
+ /* flags for open are stripped down to allow following the
+ * new location of the file, otherwise we can get EEXIST or
+ * truncate the file again as rebalance is moving the data */
+ ret = syncop_open(dst_node, &tmp_loc,
+ (iter_fd->flags & ~(O_CREAT | O_EXCL | O_TRUNC)),
+ iter_fd, NULL, NULL);
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_OPEN_FD_ON_DST_FAILED, "id=%p", iter_fd,
+ "flags=0%o", iter_fd->flags, "path=%s", path, "name=%s",
+ dst_node->name, NULL);
+
+ open_failed = 1;
+ local->op_errno = -ret;
+ ret = -1;
+ } else {
+ dht_fd_ctx_set(this, iter_fd, dst_node);
}
- /* lookup on dst */
- if (local->loc.inode) {
- ret = syncop_lookup (dst_node, &local->loc, NULL, &stbuf, NULL, NULL);
+ next:
+ LOCK(&inode->lock);
+ skip_open = _gf_false;
+ tmp = iter_fd;
+ iter_fd = list_entry(tmp->inode_list.next, typeof(*tmp), inode_list);
+ }
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: failed to lookup the file on %s",
- local->loc.path, dst_node->name);
- goto out;
- }
+ SYNCTASK_SETID(frame->root->uid, frame->root->gid);
- if (uuid_compare (stbuf.ia_gfid, local->loc.inode->gfid)) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: gfid different on the target file on %s",
- local->loc.path, dst_node->name);
- ret = -1;
- goto out;
- }
- }
+ if (open_failed) {
+ ret = -1;
+ goto unlock;
+ }
+ ret = 0;
- /* update inode ctx (the layout) */
- dht_layout_unref (this, local->layout);
-
- if (!local->loc.inode)
- ret = dht_layout_preset (this, dst_node, local->fd->inode);
- else
- ret = dht_layout_preset (this, dst_node, local->loc.inode);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s: could not set preset layout for subvol %s",
- local->loc.path, dst_node->name);
- ret = -1;
- goto out;
- }
+unlock:
+ UNLOCK(&inode->lock);
+ if (tmp) {
+ fd_unref(tmp);
+ tmp = NULL;
+ }
- layout = dht_layout_for_subvol (this, dst_node);
- if (!layout) {
- gf_log (this->name, GF_LOG_INFO,
- "%s: no pre-set layout for subvolume %s",
- local->loc.path, dst_node ? dst_node->name : "<nil>");
- ret = -1;
- goto out;
- }
+out:
+ if (dict) {
+ dict_unref(dict);
+ }
- if (!local->loc.inode)
- ret = dht_layout_set (this, local->fd->inode, layout);
- else
- ret = dht_layout_set (this, local->loc.inode, layout);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: failed to set the new layout",
- local->loc.path);
- goto out;
+ loc_wipe(&tmp_loc);
+
+ return ret;
+}
+
+int
+dht_rebalance_complete_check(xlator_t *this, call_frame_t *frame)
+{
+ int ret = -1;
+
+ ret = synctask_new(this->ctx->env, dht_migration_complete_check_task,
+ dht_migration_complete_check_done, frame, frame);
+ return ret;
+}
+
+/* During 'in-progress' state, both nodes should have the file */
+/*
+ op_ret values :
+ 0 : Success
+ -1 : Failure.
+ 1 : File is being migrated but not by this DHT layer.
+*/
+static int
+dht_inprogress_check_done(int op_ret, call_frame_t *frame, void *data)
+{
+ dht_local_t *local = NULL;
+ xlator_t *dst_subvol = NULL, *src_subvol = NULL;
+ inode_t *inode = NULL;
+
+ local = frame->local;
+
+ if (op_ret != 0)
+ goto out;
+
+ inode = local->loc.inode ? local->loc.inode : local->fd->inode;
+
+ dht_inode_ctx_get_mig_info(THIS, inode, &src_subvol, &dst_subvol);
+ if (dht_mig_info_is_invalid(local->cached_subvol, src_subvol, dst_subvol)) {
+ dst_subvol = dht_subvol_get_cached(THIS, inode);
+ if (!dst_subvol) {
+ local->op_errno = EINVAL;
+ goto out;
}
+ }
- local->cached_subvol = dst_node;
- ret = 0;
+out:
+ local->rebalance.target_op_fn(THIS, dst_subvol, frame, op_ret);
- if (!local->fd)
- goto out;
+ return 0;
+}
- /* once we detect the migration complete, the fd-ctx is no more
- required.. delete the ctx, and do one extra 'fd_unref' for open fd */
- ret = fd_ctx_del (local->fd, this, NULL);
- if (!ret) {
- fd_unref (local->fd);
- ret = 0;
- goto out;
+static int
+dht_rebalance_inprogress_task(void *data)
+{
+ int ret = -1;
+ xlator_t *src_node = NULL;
+ xlator_t *dst_node = NULL;
+ dht_local_t *local = NULL;
+ dict_t *dict = NULL;
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+ char *path = NULL;
+ struct iatt stbuf = {
+ 0,
+ };
+ loc_t tmp_loc = {
+ 0,
+ };
+ dht_conf_t *conf = NULL;
+ inode_t *inode = NULL;
+ fd_t *iter_fd = NULL;
+ fd_t *tmp = NULL;
+ int open_failed = 0;
+ uint64_t tmp_miginfo = 0;
+ dht_migrate_info_t *miginfo = NULL;
+ gf_boolean_t skip_open = _gf_false;
+
+ this = THIS;
+ frame = data;
+ local = frame->local;
+ conf = this->private;
+
+ src_node = local->cached_subvol;
+
+ if (!local->loc.inode && !local->fd)
+ goto out;
+
+ inode = (!local->fd) ? local->loc.inode : local->fd->inode;
+
+ /* getxattr on cached_subvol for 'linkto' value. Do path based getxattr
+ * as root:root. If a fd is already open, access check won't be done*/
+ if (local->loc.inode) {
+ SYNCTASK_SETID(0, 0);
+ ret = syncop_getxattr(src_node, &local->loc, &dict,
+ conf->link_xattr_name, NULL, NULL);
+ SYNCTASK_SETID(frame->root->uid, frame->root->gid);
+ } else {
+ ret = syncop_fgetxattr(src_node, local->fd, &dict,
+ conf->link_xattr_name, NULL, NULL);
+ }
+
+ /*
+ * Each DHT xlator layer has its own name for the linkto xattr.
+ * If the file mode bits indicate the the file is being migrated but
+ * this layer's linkto xattr is not present, it means that another
+ * DHT layer is migrating the file. In this case, return 1 so
+ * the mode bits can be passed on to the higher layer for appropriate
+ * action.
+ */
+
+ if (-ret == ENODATA) {
+ /* This DHT layer is not migrating this file */
+ ret = inode_ctx_reset1(inode, this, &tmp_miginfo);
+ if (tmp_miginfo) {
+ /* This can be a problem if the file was
+ * migrated by two different layers. Raise
+ * a warning here.
+ */
+ gf_smsg(
+ this->name, GF_LOG_WARNING, 0, DHT_MSG_HAS_MIGINFO, "tmp=%s",
+ tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid), NULL);
+ miginfo = (void *)(uintptr_t)tmp_miginfo;
+ GF_REF_PUT(miginfo);
+ }
+ ret = 1;
+ goto out;
+ }
+
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_GET_XATTR_FAILED,
+ "path=%s", local->loc.path, NULL);
+ ret = -1;
+ goto out;
+ }
+
+ dst_node = dht_linkfile_subvol(this, NULL, NULL, dict);
+ if (!dst_node) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_GET_XATTR_FAILED,
+ "path=%s", local->loc.path, NULL);
+ ret = -1;
+ goto out;
+ }
+
+ local->rebalance.target_node = dst_node;
+
+ if (local->loc.inode) {
+ loc_copy(&tmp_loc, &local->loc);
+ } else {
+ tmp_loc.inode = inode_ref(inode);
+ gf_uuid_copy(tmp_loc.gfid, inode->gfid);
+ }
+
+ /* lookup on dst */
+ ret = syncop_lookup(dst_node, &tmp_loc, &stbuf, NULL, NULL, NULL);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_FILE_LOOKUP_FAILED,
+ "tmp=%s", tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid),
+ "name=%s", dst_node->name, NULL);
+ ret = -1;
+ goto out;
+ }
+
+ if (gf_uuid_compare(stbuf.ia_gfid, tmp_loc.inode->gfid)) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_GFID_MISMATCH, "tmp=%s",
+ tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid),
+ "name=%s", dst_node->name, NULL);
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+
+ if (tmp_loc.path == NULL) {
+ inode_path(inode, NULL, &path);
+ if (path)
+ tmp_loc.path = path;
+ }
+
+ LOCK(&inode->lock);
+
+ if (list_empty(&inode->fd_list))
+ goto unlock;
+
+ /* perform open as root:root. There is window between linkfile
+ * creation(root:root) and setattr with the correct uid/gid
+ */
+ SYNCTASK_SETID(0, 0);
+
+ /* It's possible that we are the last user of iter_fd after each
+ * iteration. In this case the fd_unref() of iter_fd at the end of
+ * the loop will cause the destruction of the fd. So we need to
+ * iterate the list safely because iter_fd cannot be trusted.
+ */
+ iter_fd = list_entry((&inode->fd_list)->next, typeof(*iter_fd), inode_list);
+ while (&iter_fd->inode_list != (&inode->fd_list)) {
+ /* We need to release the inode->lock before calling
+ * syncop_open() to avoid possible deadlocks. However this
+ * can cause the iter_fd to be released by other threads.
+ * To avoid this, we take a reference before releasing the
+ * lock.
+ */
+
+ if (fd_is_anonymous(iter_fd) ||
+ (dht_fd_open_on_dst(this, iter_fd, dst_node))) {
+ if (!tmp) {
+ iter_fd = list_entry(iter_fd->inode_list.next, typeof(*iter_fd),
+ inode_list);
+ continue;
+ }
+ skip_open = _gf_true;
}
- /* if 'local->fd' (ie, fd based operation), send a 'open()' on
- destination if not already done */
- if (local->loc.inode) {
- ret = syncop_open (dst_node, &local->loc,
- local->fd->flags, local->fd);
- } else {
- tmp_loc.inode = local->fd->inode;
- inode_path (local->fd->inode, NULL, &path);
- if (path)
- tmp_loc.path = path;
- ret = syncop_open (dst_node, &tmp_loc,
- local->fd->flags, local->fd);
- if (path)
- GF_FREE (path);
+ /* Yes, this is ugly but there isn't a cleaner way to do this
+ * the fd_ref is an atomic increment so not too bad. We want to
+ * reduce the number of inode locks and unlocks.
+ */
+
+ fd_ref(iter_fd);
+ UNLOCK(&inode->lock);
+ if (tmp) {
+ fd_unref(tmp);
+ tmp = NULL;
}
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: failed to send open() on target file at %s",
- local->loc.path, dst_node->name);
- goto out;
+ if (skip_open)
+ goto next;
+
+ /* flags for open are stripped down to allow following the
+ * new location of the file, otherwise we can get EEXIST or
+ * truncate the file again as rebalance is moving the data */
+ ret = syncop_open(dst_node, &tmp_loc,
+ (iter_fd->flags & ~(O_CREAT | O_EXCL | O_TRUNC)),
+ iter_fd, NULL, NULL);
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_OPEN_FD_ON_DST_FAILED, "fd=%p", iter_fd,
+ "flags=0%o", iter_fd->flags, "path=%s", path, "name=%s",
+ dst_node->name, NULL);
+ ret = -1;
+ open_failed = 1;
+ } else {
+ /* Potential fd leak if this fails here as it will be
+ reopened at the next Phase1/2 check */
+ dht_fd_ctx_set(this, iter_fd, dst_node);
}
- /* need this unref for the fd on src_node */
- fd_unref (local->fd);
- ret = 0;
+ next:
+ LOCK(&inode->lock);
+ skip_open = _gf_false;
+ tmp = iter_fd;
+ iter_fd = list_entry(tmp->inode_list.next, typeof(*tmp), inode_list);
+ }
+
+ SYNCTASK_SETID(frame->root->uid, frame->root->gid);
+
+unlock:
+ UNLOCK(&inode->lock);
+
+ if (tmp) {
+ fd_unref(tmp);
+ tmp = NULL;
+ }
+ if (open_failed) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dht_inode_ctx_set_mig_info(this, inode, src_node, dst_node);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SET_INODE_CTX_FAILED,
+ "path=%s", local->loc.path, "name=%s", dst_node->name, NULL);
+ goto out;
+ }
+
+ ret = 0;
out:
+ if (dict) {
+ dict_unref(dict);
+ }
- return ret;
+ loc_wipe(&tmp_loc);
+ return ret;
}
int
-dht_rebalance_complete_check (xlator_t *this, call_frame_t *frame)
+dht_rebalance_in_progress_check(xlator_t *this, call_frame_t *frame)
{
- int ret = -1;
+ int ret = -1;
- ret = synctask_new (this->ctx->env, dht_migration_complete_check_task,
- dht_migration_complete_check_done,
- frame, frame);
- return ret;
+ ret = synctask_new(this->ctx->env, dht_rebalance_inprogress_task,
+ dht_inprogress_check_done, frame, frame);
+ return ret;
}
-/* During 'in-progress' state, both nodes should have the file */
-static int
-dht_inprogress_check_done (int op_ret, call_frame_t *sync_frame, void *data)
+int
+dht_inode_ctx_layout_set(inode_t *inode, xlator_t *this,
+ dht_layout_t *layout_int)
{
- dht_local_t *local = NULL;
+ dht_inode_ctx_t *ctx = NULL;
+ int ret = -1;
+
+ ret = dht_inode_ctx_get(inode, this, &ctx);
+ if (!ret && ctx) {
+ ctx->layout = layout_int;
+ } else {
+ ctx = GF_CALLOC(1, sizeof(*ctx), gf_dht_mt_inode_ctx_t);
+ if (!ctx)
+ return ret;
+ ctx->layout = layout_int;
+ }
+
+ ret = dht_inode_ctx_set(inode, this, ctx);
+
+ return ret;
+}
- local = sync_frame->local;
+void
+dht_inode_ctx_time_set(inode_t *inode, xlator_t *this, struct iatt *stat)
+{
+ dht_inode_ctx_t *ctx = NULL;
+ dht_stat_time_t *time = 0;
+ int ret = -1;
- local->rebalance.target_op_fn (THIS, sync_frame, op_ret);
+ ret = dht_inode_ctx_get(inode, this, &ctx);
- return 0;
+ if (ret)
+ return;
+
+ time = &ctx->time;
+
+ time->mtime = stat->ia_mtime;
+ time->mtime_nsec = stat->ia_mtime_nsec;
+
+ time->ctime = stat->ia_ctime;
+ time->ctime_nsec = stat->ia_ctime_nsec;
+
+ time->atime = stat->ia_atime;
+ time->atime_nsec = stat->ia_atime_nsec;
+
+ return;
}
-static int
-dht_rebalance_inprogress_task (void *data)
-{
- int ret = -1;
- xlator_t *src_node = NULL;
- xlator_t *dst_node = NULL;
- dht_local_t *local = NULL;
- dict_t *dict = NULL;
- call_frame_t *frame = NULL;
- xlator_t *this = NULL;
- char *path = NULL;
- struct iatt stbuf = {0,};
- loc_t tmp_loc = {0,};
-
- this = THIS;
- frame = data;
- local = frame->local;
-
- src_node = local->cached_subvol;
-
- /* getxattr on cached_subvol for 'linkto' value */
- if (local->loc.inode)
- ret = syncop_getxattr (src_node, &local->loc, &dict,
- DHT_LINKFILE_KEY);
- else
- ret = syncop_fgetxattr (src_node, local->fd, &dict,
- DHT_LINKFILE_KEY);
+int
+dht_inode_ctx_time_update(inode_t *inode, xlator_t *this, struct iatt *stat,
+ int32_t post)
+{
+ dht_inode_ctx_t *ctx = NULL;
+ dht_stat_time_t *time = 0;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO(this->name, stat, out);
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
+
+ ret = dht_inode_ctx_get(inode, this, &ctx);
+
+ if (ret) {
+ ctx = GF_CALLOC(1, sizeof(*ctx), gf_dht_mt_inode_ctx_t);
+ if (!ctx)
+ return -1;
+ }
+
+ time = &ctx->time;
+
+ LOCK(&inode->lock);
+ {
+ DHT_UPDATE_TIME(time->mtime, time->mtime_nsec, stat->ia_mtime,
+ stat->ia_mtime_nsec, post);
+ DHT_UPDATE_TIME(time->ctime, time->ctime_nsec, stat->ia_ctime,
+ stat->ia_ctime_nsec, post);
+ DHT_UPDATE_TIME(time->atime, time->atime_nsec, stat->ia_atime,
+ stat->ia_atime_nsec, post);
+ }
+ UNLOCK(&inode->lock);
+
+ ret = dht_inode_ctx_set(inode, this, ctx);
+out:
+ return 0;
+}
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: failed to get the 'linkto' xattr %s",
- local->loc.path, strerror (errno));
- goto out;
+int
+dht_inode_ctx_get(inode_t *inode, xlator_t *this, dht_inode_ctx_t **ctx)
+{
+ int ret = -1;
+ uint64_t ctx_int = 0;
+
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
+
+ ret = inode_ctx_get(inode, this, &ctx_int);
+
+ if (ret)
+ return ret;
+
+ if (ctx)
+ *ctx = (dht_inode_ctx_t *)(uintptr_t)ctx_int;
+out:
+ return ret;
+}
+
+int
+dht_inode_ctx_set(inode_t *inode, xlator_t *this, dht_inode_ctx_t *ctx)
+{
+ int ret = -1;
+ uint64_t ctx_int = 0;
+
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
+ GF_VALIDATE_OR_GOTO(this->name, ctx, out);
+
+ ctx_int = (long)ctx;
+ ret = inode_ctx_set(inode, this, &ctx_int);
+out:
+ return ret;
+}
+
+int
+dht_subvol_status(dht_conf_t *conf, xlator_t *subvol)
+{
+ int i;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == subvol) {
+ return conf->subvolume_status[i];
}
+ }
+ return 0;
+}
- dst_node = dht_linkfile_subvol (this, NULL, NULL, dict);
- if (!dst_node) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: failed to get the 'linkto' xattr from dict",
- local->loc.path);
- ret = -1;
+inode_t *
+dht_heal_path(xlator_t *this, char *path, inode_table_t *itable)
+{
+ int ret = -1;
+ struct iatt iatt = {
+ 0,
+ };
+ inode_t *linked_inode = NULL;
+ loc_t loc = {
+ 0,
+ };
+ char *bname = NULL;
+ char *save_ptr = NULL;
+ static uuid_t gfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
+ char *tmp_path = NULL;
+
+ tmp_path = gf_strdup(path);
+ if (!tmp_path) {
+ goto out;
+ }
+
+ gf_uuid_copy(loc.pargfid, gfid);
+ loc.parent = inode_ref(itable->root);
+
+ bname = strtok_r(tmp_path, "/", &save_ptr);
+
+ /* sending a lookup on parent directory,
+ * Eg: if path is like /a/b/c/d/e/f/g/
+ * then we will send a lookup on a first and then b,c,d,etc
+ */
+
+ while (bname) {
+ linked_inode = NULL;
+ loc.inode = inode_grep(itable, loc.parent, bname);
+ if (loc.inode == NULL) {
+ loc.inode = inode_new(itable);
+ if (loc.inode == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ } else {
+ /*
+ * Inode is already populated in the inode table.
+ * Which means we already looked up the inode and
+ * linked with a dentry. So that we will skip
+ * lookup on this entry, and proceed to next.
+ */
+ linked_inode = loc.inode;
+ bname = strtok_r(NULL, "/", &save_ptr);
+ if (!bname) {
goto out;
+ }
+ inode_unref(loc.parent);
+ loc.parent = loc.inode;
+ gf_uuid_copy(loc.pargfid, loc.inode->gfid);
+ loc.inode = NULL;
+ continue;
}
- local->rebalance.target_node = dst_node;
-
- if (local->loc.inode) {
- /* lookup on dst */
- ret = syncop_lookup (dst_node, &local->loc, NULL,
- &stbuf, NULL, NULL);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: failed to lookup the file on %s",
- local->loc.path, dst_node->name);
- goto out;
- }
+ loc.name = bname;
+ ret = loc_path(&loc, bname);
- if (uuid_compare (stbuf.ia_gfid, local->loc.inode->gfid)) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: gfid different on the target file on %s",
- local->loc.path, dst_node->name);
- ret = -1;
- goto out;
- }
+ ret = syncop_lookup(this, &loc, &iatt, NULL, NULL, NULL);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_INFO, -ret, DHT_MSG_DIR_SELFHEAL_FAILED,
+ "path=%s", path, "subvolume=%s", this->name, "bname=%s",
+ bname, NULL);
+ goto out;
}
- ret = 0;
+ linked_inode = inode_link(loc.inode, loc.parent, bname, &iatt);
+ if (!linked_inode)
+ goto out;
- if (!local->fd)
- goto out;
+ loc_wipe(&loc);
+ gf_uuid_copy(loc.pargfid, linked_inode->gfid);
+ loc.inode = NULL;
- if (local->loc.inode) {
- ret = syncop_open (dst_node, &local->loc,
- local->fd->flags, local->fd);
+ bname = strtok_r(NULL, "/", &save_ptr);
+ if (bname)
+ loc.parent = linked_inode;
+ }
+out:
+ inode_ref(linked_inode);
+ loc_wipe(&loc);
+ GF_FREE(tmp_path);
+
+ return linked_inode;
+}
+
+int
+dht_heal_full_path(void *data)
+{
+ call_frame_t *heal_frame = data;
+ dht_local_t *local = NULL;
+ loc_t loc = {
+ 0,
+ };
+ dict_t *dict = NULL;
+ char *path = NULL;
+ int ret = -1;
+ xlator_t *source = NULL;
+ xlator_t *this = NULL;
+ inode_table_t *itable = NULL;
+ inode_t *inode = NULL;
+ inode_t *tmp_inode = NULL;
+
+ GF_VALIDATE_OR_GOTO("DHT", heal_frame, out);
+
+ local = heal_frame->local;
+ this = heal_frame->this;
+ source = heal_frame->cookie;
+ heal_frame->cookie = NULL;
+ gf_uuid_copy(loc.gfid, local->gfid);
+
+ if (local->loc.inode)
+ loc.inode = inode_ref(local->loc.inode);
+ else
+ goto out;
+
+ itable = loc.inode->table;
+ ret = syncop_getxattr(source, &loc, &dict, GET_ANCESTRY_PATH_KEY, NULL,
+ NULL);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_INFO, -ret, DHT_MSG_DIR_HEAL_ABORT,
+ "subvol=%s", source->name, NULL);
+ goto out;
+ }
+
+ ret = dict_get_str(dict, GET_ANCESTRY_PATH_KEY, &path);
+ if (path) {
+ inode = dht_heal_path(this, path, itable);
+ if (inode && inode != local->inode) {
+ /*
+ * if inode returned by heal function is different
+ * from what we passed, which means a racing thread
+ * already linked a different inode for dentry.
+ * So we will update our local->inode, so that we can
+ * retrurn proper inode.
+ */
+ tmp_inode = local->inode;
+ local->inode = inode;
+ inode_unref(tmp_inode);
+ tmp_inode = NULL;
} else {
- tmp_loc.inode = local->fd->inode;
- inode_path (local->fd->inode, NULL, &path);
- if (path)
- tmp_loc.path = path;
- ret = syncop_open (dst_node, &tmp_loc,
- local->fd->flags, local->fd);
- if (path)
- GF_FREE (path);
+ inode_unref(inode);
}
+ }
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: failed to send open() on target file at %s",
- local->loc.path, dst_node->name);
- goto out;
- }
+out:
+ loc_wipe(&loc);
+ if (dict)
+ dict_unref(dict);
+ return 0;
+}
- ret = fd_ctx_set (local->fd, this, (uint64_t)(long)dst_node);
+int
+dht_heal_full_path_done(int op_ret, call_frame_t *heal_frame, void *data)
+{
+ call_frame_t *main_frame = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *this = NULL;
+ int ret = -1;
+ int op_errno = 0;
+
+ local = heal_frame->local;
+ main_frame = local->main_frame;
+ local->main_frame = NULL;
+ this = heal_frame->this;
+
+ dht_set_fixed_dir_stat(&local->postparent);
+ if (local->need_xattr_heal) {
+ local->need_xattr_heal = 0;
+ ret = dht_dir_xattr_heal(this, local, &op_errno);
if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: failed to set fd-ctx target file at %s",
- local->loc.path, dst_node->name);
- goto out;
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_DIR_XATTR_HEAL_FAILED, "path=%s", local->loc.path,
+ NULL);
}
+ }
- ret = 0;
-out:
- return ret;
+ DHT_STACK_UNWIND(lookup, main_frame, 0, 0, local->inode, &local->stbuf,
+ local->xattr, &local->postparent);
+
+ DHT_STACK_DESTROY(heal_frame);
+ return 0;
}
+/* This function must be called inside an inode lock */
int
-dht_rebalance_in_progress_check (xlator_t *this, call_frame_t *frame)
+__dht_lock_subvol_set(inode_t *inode, xlator_t *this, xlator_t *lock_subvol)
{
+ dht_inode_ctx_t *ctx = NULL;
+ int ret = -1;
+ uint64_t value = 0;
- int ret = -1;
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
- ret = synctask_new (this->ctx->env, dht_rebalance_inprogress_task,
- dht_inprogress_check_done,
- frame, frame);
- return ret;
+ ret = __inode_ctx_get0(inode, this, &value);
+ if (ret || !value) {
+ return -1;
+ }
+
+ ctx = (dht_inode_ctx_t *)(uintptr_t)value;
+ ctx->lock_subvol = lock_subvol;
+out:
+ return ret;
+}
+
+xlator_t *
+dht_get_lock_subvolume(xlator_t *this, struct gf_flock *lock,
+ dht_local_t *local)
+{
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ int32_t ret = -1;
+ uint64_t value = 0;
+ xlator_t *cached_subvol = NULL;
+ dht_inode_ctx_t *ctx = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ GF_VALIDATE_OR_GOTO(this->name, lock, out);
+ GF_VALIDATE_OR_GOTO(this->name, local, out);
+
+ cached_subvol = local->cached_subvol;
+
+ if (local->loc.inode || local->fd) {
+ inode = local->loc.inode ? local->loc.inode : local->fd->inode;
+ }
+
+ if (!inode)
+ goto out;
+
+ if (!(IA_ISDIR(inode->ia_type) || IA_ISINVAL(inode->ia_type))) {
+ /*
+ * We may get non-linked inode for directories as part
+ * of the selfheal code path. So checking for IA_INVAL
+ * type also. This will only happen for directory.
+ */
+ subvol = local->cached_subvol;
+ goto out;
+ }
+
+ if (lock->l_type != F_UNLCK) {
+ /*
+ * inode purging might happen on NFS between a lk
+ * and unlk. Due to this lk and unlk might be sent
+ * to different subvols.
+ * So during a lock request, taking a ref on inode
+ * to prevent inode purging. inode unref will happen
+ * in unlock cbk code path.
+ */
+ inode_ref(inode);
+ }
+
+ LOCK(&inode->lock);
+ ret = __inode_ctx_get0(inode, this, &value);
+ if (!ret && value) {
+ ctx = (dht_inode_ctx_t *)(uintptr_t)value;
+ subvol = ctx->lock_subvol;
+ }
+ if (!subvol && lock->l_type != F_UNLCK && cached_subvol) {
+ ret = __dht_lock_subvol_set(inode, this, cached_subvol);
+ if (ret) {
+ gf_uuid_unparse(inode->gfid, gfid);
+ UNLOCK(&inode->lock);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_SET_INODE_CTX_FAILED,
+ "lock_subvol gfid=%s", gfid, NULL);
+ goto post_unlock;
+ }
+ subvol = cached_subvol;
+ }
+ UNLOCK(&inode->lock);
+post_unlock:
+ if (!subvol && inode && lock->l_type != F_UNLCK) {
+ inode_unref(inode);
+ }
+out:
+ return subvol;
+}
+
+int
+dht_lk_inode_unref(call_frame_t *frame, int32_t op_ret)
+{
+ int ret = -1;
+ dht_local_t *local = NULL;
+ inode_t *inode = NULL;
+ xlator_t *this = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+ this = frame->this;
+
+ if (local->loc.inode || local->fd) {
+ inode = local->loc.inode ? local->loc.inode : local->fd->inode;
+ }
+ if (!inode) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LOCK_INODE_UNREF_FAILED,
+ NULL);
+ goto out;
+ }
+
+ if (!(IA_ISDIR(inode->ia_type) || IA_ISINVAL(inode->ia_type))) {
+ ret = 0;
+ goto out;
+ }
+
+ switch (local->lock_type) {
+ case F_RDLCK:
+ case F_WRLCK:
+ if (op_ret) {
+ gf_uuid_unparse(inode->gfid, gfid);
+ gf_msg_debug(this->name, 0, "lock request failed for gfid %s",
+ gfid);
+ inode_unref(inode);
+ goto out;
+ }
+ break;
+
+ case F_UNLCK:
+ if (!op_ret) {
+ inode_unref(inode);
+ } else {
+ gf_uuid_unparse(inode->gfid, gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_LOCK_INODE_UNREF_FAILED, "gfid=%s", gfid, NULL);
+ goto out;
+ }
+ default:
+ break;
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+/* Code to update custom extended attributes from src dict to dst dict
+ */
+void
+dht_dir_set_heal_xattr(xlator_t *this, dht_local_t *local, dict_t *dst,
+ dict_t *src, int *uret, int *uflag)
+{
+ int ret = -1;
+ data_t *keyval = NULL;
+ int luret = -1;
+ int luflag = -1;
+ int i = 0;
+ char **xattrs_to_heal;
+
+ if (!src || !dst) {
+ gf_smsg(this->name, GF_LOG_WARNING, EINVAL, DHT_MSG_DST_NULL_SET_FAILED,
+ "path=%s", local->loc.path, NULL);
+ return;
+ }
+ /* Check if any user xattr present in src dict and set
+ it to dst dict
+ */
+ luret = dict_foreach_fnmatch(src, "user.*", dht_set_user_xattr, dst);
+ /* Check if any other custom xattr present in src dict
+ and set it to dst dict, here index start from 1 because
+ user xattr already checked in previous statement
+ */
+
+ xattrs_to_heal = get_xattrs_to_heal();
+
+ for (i = 1; xattrs_to_heal[i]; i++) {
+ keyval = dict_get(src, xattrs_to_heal[i]);
+ if (keyval) {
+ luflag = 1;
+ ret = dict_set(dst, xattrs_to_heal[i], keyval);
+ if (ret)
+ gf_smsg(this->name, GF_LOG_WARNING, ENOMEM,
+ DHT_MSG_DICT_SET_FAILED, "key=%s", xattrs_to_heal[i],
+ "path=%s", local->loc.path, NULL);
+ keyval = NULL;
+ }
+ }
+ if (uret)
+ (*uret) = luret;
+ if (uflag)
+ (*uflag) = luflag;
}
diff --git a/xlators/cluster/dht/src/dht-inode-read.c b/xlators/cluster/dht/src/dht-inode-read.c
index 7e29a7a8ce6..dbb8070b0da 100644
--- a/xlators/cluster/dht/src/dht-inode-read.c
+++ b/xlators/cluster/dht/src/dht-inode-read.c
@@ -8,1108 +8,1651 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "dht-common.h"
-int dht_access2 (xlator_t *this, call_frame_t *frame, int ret);
-int dht_readv2 (xlator_t *this, call_frame_t *frame, int ret);
-int dht_attr2 (xlator_t *this, call_frame_t *frame, int ret);
-int dht_open2 (xlator_t *this, call_frame_t *frame, int ret);
-int dht_flush2 (xlator_t *this, call_frame_t *frame, int ret);
-int dht_lk2 (xlator_t *this, call_frame_t *frame, int ret);
-int dht_fsync2 (xlator_t *this, call_frame_t *frame, int ret);
-
-int
-dht_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, fd_t *fd, dict_t *xdata)
+static int
+dht_access2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret);
+static int
+dht_readv2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret);
+static int
+dht_attr2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret);
+static int
+dht_open2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret);
+static int
+dht_flush2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret);
+static int
+dht_lk2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret);
+static int
+dht_fsync2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret);
+static int
+dht_common_xattrop2(xlator_t *this, xlator_t *subvol, call_frame_t *frame,
+ int ret);
+
+static int
+dht_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, fd_t *fd, dict_t *xdata)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- int ret = 0;
-
- local = frame->local;
- prev = cookie;
-
- local->op_errno = op_errno;
- if ((op_ret == -1) && (op_errno != ENOENT)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto out;
- }
-
- if (!op_ret || (local->call_cnt != 1))
- goto out;
-
- /* rebalance would have happened */
- local->rebalance.target_op_fn = dht_open2;
- ret = dht_rebalance_complete_check (this, frame);
- if (!ret)
- return 0;
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ int ret = 0;
+
+ local = frame->local;
+ prev = cookie;
+
+ local->op_errno = op_errno;
+ if ((op_ret == -1) && !dht_inode_missing(op_errno)) {
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
+ prev->name);
+ goto out;
+ }
+
+ /* Update ctx if the fd has been opened on the target*/
+ if (!op_ret && (local->call_cnt == 1)) {
+ dht_fd_ctx_set(this, fd, prev);
+ goto out;
+ }
+
+ if (!op_ret || (local->call_cnt != 1))
+ goto out;
+
+ /* rebalance would have happened */
+ local->rebalance.target_op_fn = dht_open2;
+ ret = dht_rebalance_complete_check(this, frame);
+ if (!ret)
+ return 0;
out:
- DHT_STACK_UNWIND (open, frame, op_ret, op_errno, local->fd, xdata);
+ DHT_STACK_UNWIND(open, frame, op_ret, op_errno, local->fd, xdata);
- return 0;
+ return 0;
}
-int
-dht_open2 (xlator_t *this, call_frame_t *frame, int op_ret)
+static int
+dht_open2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
- dht_local_t *local = NULL;
- xlator_t *subvol = NULL;
- int op_errno = EINVAL;
+ dht_local_t *local = NULL;
+ int op_errno = EINVAL;
- local = frame->local;
- if (!local)
- goto out;
+ if (!frame || !frame->local)
+ goto out;
- op_errno = ENOENT;
- if (op_ret)
- goto out;
-
- local->call_cnt = 2;
- subvol = local->cached_subvol;
+ local = frame->local;
+ op_errno = local->op_errno;
- STACK_WIND (frame, dht_open_cbk, subvol, subvol->fops->open,
- &local->loc, local->rebalance.flags, local->fd,
- NULL);
+ if (we_are_not_migrating(ret)) {
+ /* This DHT layer is not migrating the file */
+ DHT_STACK_UNWIND(open, frame, -1, local->op_errno, NULL,
+ local->rebalance.xdata);
return 0;
+ }
+
+ if (subvol == NULL)
+ goto out;
+
+ local->call_cnt = 2;
+
+ STACK_WIND_COOKIE(frame, dht_open_cbk, subvol, subvol, subvol->fops->open,
+ &local->loc, local->rebalance.flags, local->fd,
+ local->xattr_req);
+ return 0;
out:
- DHT_STACK_UNWIND (stat, frame, -1, op_errno, NULL, NULL);
- return 0;
+ DHT_STACK_UNWIND(open, frame, -1, op_errno, NULL, NULL);
+ return 0;
}
int
-dht_open (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int flags, fd_t *fd, dict_t *xdata)
+dht_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, fd_t *fd,
+ dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- local = dht_local_init (frame, loc, fd, GF_FOP_OPEN);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
- subvol = local->cached_subvol;
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
- local->rebalance.flags = flags;
- local->call_cnt = 1;
+ local = dht_local_init(frame, loc, fd, GF_FOP_OPEN);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- STACK_WIND (frame, dht_open_cbk, subvol, subvol->fops->open,
- loc, flags, fd, xdata);
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
- return 0;
+ local->rebalance.flags = flags;
+ local->call_cnt = 1;
+
+ STACK_WIND_COOKIE(frame, dht_open_cbk, subvol, subvol, subvol->fops->open,
+ loc, flags, fd, xdata);
+
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (open, frame, -1, op_errno, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(open, frame, -1, op_errno, NULL, NULL);
- return 0;
+ return 0;
}
int
-dht_file_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *stbuf, dict_t *xdata)
+dht_file_attr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *stbuf, dict_t *xdata)
{
- uint64_t tmp_subvol = 0;
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO ("dht", frame, err);
- GF_VALIDATE_OR_GOTO ("dht", this, out);
- GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
- GF_VALIDATE_OR_GOTO ("dht", cookie, out);
-
- local = frame->local;
- prev = cookie;
-
- if ((op_ret == -1) && (op_errno != ENOENT)) {
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto out;
- }
+ xlator_t *subvol1 = 0;
+ xlator_t *subvol2 = 0;
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ int ret = -1;
+ inode_t *inode = NULL;
+
+ GF_VALIDATE_OR_GOTO("dht", frame, err);
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+
+ if ((local->fop == GF_FOP_FSTAT) &&
+ dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
+ return 0;
+ }
- if (local->call_cnt != 1)
- goto out;
-
- /* Check if the rebalance phase2 is true */
- if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (stbuf)) {
- if (local->fd)
- ret = fd_ctx_get (local->fd, this, &tmp_subvol);
- if (ret) {
- /* Phase 2 of migration */
- local->rebalance.target_op_fn = dht_attr2;
- ret = dht_rebalance_complete_check (this, frame);
- } else {
- /* value is already set in fd_ctx, that means no need
- to check for whether its complete or not. */
- dht_attr2 (this, frame, 0);
- }
+ if ((op_ret == -1) && !dht_inode_missing(op_errno)) {
+ local->op_errno = op_errno;
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
+ prev->name);
+ goto out;
+ }
+
+ if (local->call_cnt != 1)
+ goto out;
+
+ local->op_errno = op_errno;
+ local->op_ret = op_ret;
+
+ /* Check if the rebalance phase2 is true */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2(stbuf)) {
+ local->rebalance.target_op_fn = dht_attr2;
+ dht_set_local_rebalance(this, local, NULL, NULL, stbuf, xdata);
+ inode = (local->fd) ? local->fd->inode : local->loc.inode;
+
+ dht_inode_ctx_get_mig_info(this, inode, &subvol1, &subvol2);
+ if (dht_mig_info_is_invalid(local->cached_subvol, subvol1, subvol2)) {
+ /* Phase 2 of migration */
+ ret = dht_rebalance_complete_check(this, frame);
+ if (!ret)
+ return 0;
+ } else {
+ /* it is a non-fd op or it is an fd based Fop and
+ opened on the dst.*/
+ if (local->fd && !dht_fd_open_on_dst(this, local->fd, subvol2)) {
+ ret = dht_rebalance_complete_check(this, frame);
if (!ret)
- return 0;
+ return 0;
+ } else {
+ dht_attr2(this, subvol2, frame, 0);
+ return 0;
+ }
}
+ }
out:
- DHT_STRIP_PHASE1_FLAGS (stbuf);
- DHT_STACK_UNWIND (stat, frame, op_ret, op_errno, stbuf, xdata);
+ DHT_STRIP_PHASE1_FLAGS(stbuf);
+ DHT_STACK_UNWIND(stat, frame, op_ret, op_errno, stbuf, xdata);
err:
- return 0;
+ return 0;
}
-int
-dht_attr2 (xlator_t *this, call_frame_t *frame, int op_ret)
+static int
+dht_attr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
- dht_local_t *local = NULL;
- xlator_t *subvol = NULL;
- int op_errno = EINVAL;
+ dht_local_t *local = NULL;
+ int op_errno = EINVAL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ op_errno = local->op_errno;
+
+ if (we_are_not_migrating(ret)) {
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
+ */
+ DHT_STACK_UNWIND(stat, frame, local->op_ret, op_errno,
+ &local->rebalance.postbuf, local->rebalance.xdata);
+ return 0;
+ }
- local = frame->local;
- if (!local)
- goto out;
+ if (subvol == NULL)
+ goto out;
- op_errno = local->op_errno;
- if (op_ret == -1)
- goto out;
+ local->call_cnt = 2;
- subvol = local->cached_subvol;
- local->call_cnt = 2;
+ if (local->fop == GF_FOP_FSTAT) {
+ STACK_WIND_COOKIE(frame, dht_file_attr_cbk, subvol, subvol,
+ subvol->fops->fstat, local->fd, local->xattr_req);
+ } else {
+ STACK_WIND_COOKIE(frame, dht_file_attr_cbk, subvol, subvol,
+ subvol->fops->stat, &local->loc, local->xattr_req);
+ }
- if (local->fop == GF_FOP_FSTAT) {
- STACK_WIND (frame, dht_file_attr_cbk, subvol,
- subvol->fops->fstat, local->fd, NULL);
- } else {
- STACK_WIND (frame, dht_file_attr_cbk, subvol,
- subvol->fops->stat, &local->loc, NULL);
- }
- return 0;
+ return 0;
out:
- DHT_STACK_UNWIND (stat, frame, -1, op_errno, NULL, NULL);
- return 0;
+ DHT_STACK_UNWIND(stat, frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
+
+static int
+dht_attr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *stbuf, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ xlator_t *prev = NULL;
+ local = frame->local;
+ prev = cookie;
+
+ LOCK(&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ UNLOCK(&frame->lock);
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
+ prev->name);
+
+ goto post_unlock;
+ }
+
+ dht_iatt_merge(this, &local->stbuf, stbuf);
+
+ local->op_ret = 0;
+ }
+ UNLOCK(&frame->lock);
+post_unlock:
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt)) {
+ DHT_STACK_UNWIND(stat, frame, local->op_ret, local->op_errno,
+ &local->stbuf, xdata);
+ }
+
+ return 0;
}
int
-dht_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *stbuf, dict_t *xdata)
+dht_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int i = 0;
+ int call_cnt = 0;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+ VALIDATE_OR_GOTO(loc->inode, err);
+ VALIDATE_OR_GOTO(loc->path, err);
+
+ local = dht_local_init(frame, loc, NULL, GF_FOP_STAT);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!layout) {
+ gf_msg_debug(this->name, 0, "no layout for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
+ if (IA_ISREG(loc->inode->ia_type)) {
+ local->call_cnt = 1;
- GF_VALIDATE_OR_GOTO ("dht", frame, err);
- GF_VALIDATE_OR_GOTO ("dht", this, out);
- GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
- GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+ subvol = local->cached_subvol;
- local = frame->local;
- prev = cookie;
+ STACK_WIND_COOKIE(frame, dht_file_attr_cbk, subvol, subvol,
+ subvol->fops->stat, loc, xdata);
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
+ return 0;
+ }
- goto unlock;
- }
+ local->call_cnt = call_cnt = layout->cnt;
- dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
+ for (i = 0; i < call_cnt; i++) {
+ subvol = layout->list[i].xlator;
+
+ STACK_WIND_COOKIE(frame, dht_attr_cbk, subvol, subvol,
+ subvol->fops->stat, loc, xdata);
+ }
+
+ return 0;
- local->op_ret = 0;
- }
-unlock:
- UNLOCK (&frame->lock);
-out:
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- DHT_STACK_UNWIND (stat, frame, local->op_ret, local->op_errno,
- &local->stbuf, xdata);
- }
err:
- return 0;
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(stat, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
}
int
-dht_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+dht_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
- dht_layout_t *layout = NULL;
- int i = 0;
- int call_cnt = 0;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
-
- local = dht_local_init (frame, loc, NULL, GF_FOP_STAT);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int i = 0;
+ int call_cnt = 0;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
+
+ local = dht_local_init(frame, NULL, fd, GF_FOP_FSTAT);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!layout) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "no layout for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
+ if (IA_ISREG(fd->inode->ia_type)) {
+ local->call_cnt = 1;
- layout = local->layout;
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no layout for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ subvol = local->cached_subvol;
- if (IA_ISREG (loc->inode->ia_type)) {
- local->call_cnt = 1;
+ STACK_WIND_COOKIE(frame, dht_file_attr_cbk, subvol, subvol,
+ subvol->fops->fstat, fd, xdata);
+ return 0;
+ }
+
+ local->call_cnt = call_cnt = layout->cnt;
+
+ for (i = 0; i < call_cnt; i++) {
+ subvol = layout->list[i].xlator;
+ STACK_WIND_COOKIE(frame, dht_attr_cbk, subvol, subvol,
+ subvol->fops->fstat, fd, xdata);
+ }
- subvol = local->cached_subvol;
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(fstat, frame, -1, op_errno, NULL, NULL);
- STACK_WIND (frame, dht_file_attr_cbk, subvol,
- subvol->fops->stat, loc, xdata);
+ return 0;
+}
+int
+dht_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iovec *vector, int count, struct iatt *stbuf,
+ struct iobref *iobref, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int ret = 0;
+ xlator_t *src_subvol = 0;
+ xlator_t *dst_subvol = 0;
+
+ local = frame->local;
+ if (!local) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ /* This is already second try, no need for re-check */
+ if (local->call_cnt != 1)
+ goto out;
+
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
+ return 0;
+ }
+
+ if ((op_ret == -1) && !dht_inode_missing(op_errno))
+ goto out;
+
+ local->op_errno = op_errno;
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2(stbuf)) {
+ local->op_ret = op_ret;
+ local->rebalance.target_op_fn = dht_readv2;
+ dht_set_local_rebalance(this, local, NULL, NULL, stbuf, xdata);
+ /* File would be migrated to other node */
+ ret = dht_inode_ctx_get_mig_info(this, local->fd->inode, &src_subvol,
+ &dst_subvol);
+
+ if (dht_mig_info_is_invalid(local->cached_subvol, src_subvol,
+ dst_subvol) ||
+ !dht_fd_open_on_dst(this, local->fd, dst_subvol)) {
+ ret = dht_rebalance_complete_check(this, frame);
+ if (!ret)
return 0;
+ } else {
+ /* value is already set in fd_ctx, that means no need
+ to check for whether its complete or not. */
+ dht_readv2(this, dst_subvol, frame, 0);
+ return 0;
}
+ }
- local->call_cnt = call_cnt = layout->cnt;
+out:
+ DHT_STRIP_PHASE1_FLAGS(stbuf);
- for (i = 0; i < call_cnt; i++) {
- subvol = layout->list[i].xlator;
+ DHT_STACK_UNWIND(readv, frame, op_ret, op_errno, vector, count, stbuf,
+ iobref, xdata);
- STACK_WIND (frame, dht_attr_cbk,
- subvol, subvol->fops->stat,
- loc, xdata);
- }
+ return 0;
+}
+static int
+dht_readv2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
+{
+ dht_local_t *local = NULL;
+ int op_errno = EINVAL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ op_errno = local->op_errno;
+
+ if (we_are_not_migrating(ret)) {
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
+ */
+ DHT_STACK_UNWIND(readv, frame, local->op_ret, op_errno, NULL, 0,
+ &local->rebalance.postbuf, NULL,
+ local->rebalance.xdata);
return 0;
+ }
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (stat, frame, -1, op_errno, NULL, NULL);
+ if (subvol == NULL)
+ goto out;
- return 0;
-}
+ local->call_cnt = 2;
+
+ STACK_WIND(frame, dht_readv_cbk, subvol, subvol->fops->readv, local->fd,
+ local->rebalance.size, local->rebalance.offset,
+ local->rebalance.flags, local->xattr_req);
+ return 0;
+
+out:
+ DHT_STACK_UNWIND(readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL);
+ return 0;
+}
int
-dht_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+dht_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, off_t off,
+ uint32_t flags, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
- dht_layout_t *layout = NULL;
- int i = 0;
- int call_cnt = 0;
-
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- local = dht_local_init (frame, NULL, fd, GF_FOP_FSTAT);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
- layout = local->layout;
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no layout for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
- if (IA_ISREG (fd->inode->ia_type)) {
- local->call_cnt = 1;
+ local = dht_local_init(frame, NULL, fd, GF_FOP_READ);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- subvol = local->cached_subvol;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
- STACK_WIND (frame, dht_file_attr_cbk, subvol,
- subvol->fops->fstat, fd, xdata);
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
- return 0;
- }
+ local->rebalance.offset = off;
+ local->rebalance.size = size;
+ local->rebalance.flags = flags;
+ local->call_cnt = 1;
- local->call_cnt = call_cnt = layout->cnt;
+ STACK_WIND(frame, dht_readv_cbk, subvol, subvol->fops->readv, local->fd,
+ local->rebalance.size, local->rebalance.offset,
+ local->rebalance.flags, local->xattr_req);
- for (i = 0; i < call_cnt; i++) {
- subvol = layout->list[i].xlator;
- STACK_WIND (frame, dht_attr_cbk,
- subvol, subvol->fops->fstat,
- fd, xdata);
- }
-
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fstat, frame, -1, op_errno, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL);
- return 0;
+ return 0;
}
-int
-dht_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- struct iovec *vector, int count, struct iatt *stbuf,
- struct iobref *iobref, dict_t *xdata)
+static int
+dht_access_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, dict_t *xdata)
{
- dht_local_t *local = NULL;
- int ret = 0;
-
- local = frame->local;
- if (!local) {
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
+ int ret = -1;
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ xlator_t *prev = NULL;
+
+ local = frame->local;
+ prev = cookie;
+
+ if (!prev)
+ goto out;
+ if (local->call_cnt != 1)
+ goto out;
+ if ((op_ret == -1) &&
+ ((op_errno == ENOTCONN) || dht_inode_missing(op_errno)) &&
+ IA_ISDIR(local->loc.inode->ia_type)) {
+ subvol = dht_subvol_next_available(this, prev);
+ if (!subvol)
+ goto out;
- /* This is already second try, no need for re-check */
- if (local->call_cnt != 1)
- goto out;
-
- if ((op_ret == -1) && (op_errno != ENOENT))
- goto out;
-
- if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (stbuf)) {
- /* File would be migrated to other node */
- ret = fd_ctx_get (local->fd, this, NULL);
- if (ret) {
- local->rebalance.target_op_fn = dht_readv2;
- ret = dht_rebalance_complete_check (this, frame);
- } else {
- /* value is already set in fd_ctx, that means no need
- to check for whether its complete or not. */
- dht_readv2 (this, frame, 0);
- }
- if (!ret)
- return 0;
+ /* check if we are done with visiting every node */
+ if (subvol == local->cached_subvol) {
+ goto out;
}
-out:
- DHT_STRIP_PHASE1_FLAGS (stbuf);
- DHT_STACK_UNWIND (readv, frame, op_ret, op_errno, vector, count, stbuf,
- iobref, xdata);
-
+ STACK_WIND_COOKIE(frame, dht_access_cbk, subvol, subvol,
+ subvol->fops->access, &local->loc,
+ local->rebalance.flags, NULL);
return 0;
+ }
+ if ((op_ret == -1) && dht_inode_missing(op_errno) &&
+ !(IA_ISDIR(local->loc.inode->ia_type))) {
+ /* File would be migrated to other node */
+ local->op_errno = op_errno;
+ local->rebalance.target_op_fn = dht_access2;
+ ret = dht_rebalance_complete_check(frame->this, frame);
+ if (!ret)
+ return 0;
+ }
+
+out:
+ DHT_STACK_UNWIND(access, frame, op_ret, op_errno, xdata);
+ return 0;
}
-int
-dht_readv2 (xlator_t *this, call_frame_t *frame, int op_ret)
+static int
+dht_access2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
- dht_local_t *local = NULL;
- xlator_t *subvol = NULL;
- int op_errno = EINVAL;
-
- local = frame->local;
- if (!local)
- goto out;
+ dht_local_t *local = NULL;
+ int op_errno = EINVAL;
- op_errno = local->op_errno;
- if (op_ret == -1)
- goto out;
+ local = frame->local;
+ if (!local)
+ goto out;
- local->call_cnt = 2;
- subvol = local->cached_subvol;
+ op_errno = local->op_errno;
- STACK_WIND (frame, dht_readv_cbk, subvol, subvol->fops->readv,
- local->fd, local->rebalance.size, local->rebalance.offset,
- local->rebalance.flags, NULL);
+ if (we_are_not_migrating(ret)) {
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
+ */
+ DHT_STACK_UNWIND(access, frame, -1, op_errno, NULL);
return 0;
+ }
+
+ if (subvol == NULL)
+ goto out;
+
+ local->call_cnt = 2;
+
+ STACK_WIND_COOKIE(frame, dht_access_cbk, subvol, subvol,
+ subvol->fops->access, &local->loc, local->rebalance.flags,
+ local->xattr_req);
+
+ return 0;
out:
- DHT_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL);
- return 0;
+ DHT_STACK_UNWIND(access, frame, -1, op_errno, NULL);
+ return 0;
}
int
-dht_readv (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t off, uint32_t flags, dict_t *xdata)
+dht_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
+ dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- local = dht_local_init (frame, NULL, fd, GF_FOP_READ);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+ VALIDATE_OR_GOTO(loc->inode, err);
+ VALIDATE_OR_GOTO(loc->path, err);
+
+ local = dht_local_init(frame, loc, NULL, GF_FOP_ACCESS);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->rebalance.flags = mask;
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for path=%s",
+ loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
+ STACK_WIND_COOKIE(frame, dht_access_cbk, subvol, subvol,
+ subvol->fops->access, loc, mask, xdata);
+
+ return 0;
- subvol = local->cached_subvol;
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(access, frame, -1, op_errno, NULL);
- local->rebalance.offset = off;
- local->rebalance.size = size;
- local->rebalance.flags = flags;
- local->call_cnt = 1;
+ return 0;
+}
- STACK_WIND (frame, dht_readv_cbk,
- subvol, subvol->fops->readv,
- fd, size, off, flags, xdata);
+int
+dht_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = 0;
+ int ret = 0;
- return 0;
+ local = frame->local;
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL);
+ local->op_errno = op_errno;
+
+ if (local->call_cnt != 1)
+ goto out;
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
return 0;
-}
+ }
-int
-dht_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xdata)
-{
- int ret = -1;
- dht_local_t *local = NULL;
+ local->rebalance.target_op_fn = dht_flush2;
- local = frame->local;
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
- if (local->call_cnt != 1)
- goto out;
+ /* If context is set, then send flush() it to the destination */
+ dht_inode_ctx_get_mig_info(this, local->fd->inode, NULL, &subvol);
+ if (subvol && dht_fd_open_on_dst(this, local->fd, subvol)) {
+ dht_flush2(this, subvol, frame, 0);
+ return 0;
+ }
- if ((op_ret == -1) && (op_errno == ENOENT)) {
- /* File would be migrated to other node */
- local->rebalance.target_op_fn = dht_access2;
- ret = dht_rebalance_complete_check (frame->this, frame);
- if (!ret)
- return 0;
+ if (op_errno == EREMOTE) {
+ ret = dht_rebalance_complete_check(this, frame);
+ if (!ret) {
+ return 0;
}
+ }
out:
- DHT_STACK_UNWIND (access, frame, op_ret, op_errno, xdata);
- return 0;
+ DHT_STACK_UNWIND(flush, frame, op_ret, op_errno, xdata);
+
+ return 0;
}
-int
-dht_access2 (xlator_t *this, call_frame_t *frame, int op_ret)
+static int
+dht_flush2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
- dht_local_t *local = NULL;
- xlator_t *subvol = NULL;
- int op_errno = EINVAL;
+ dht_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
- local = frame->local;
- if (!local)
- goto out;
+ if ((frame == NULL) || (frame->local == NULL))
+ goto out;
- op_errno = local->op_errno;
- if (op_ret == -1)
- goto out;
+ local = frame->local;
- local->call_cnt = 2;
- subvol = local->cached_subvol;
+ op_errno = local->op_errno;
- STACK_WIND (frame, dht_access_cbk, subvol, subvol->fops->access,
- &local->loc, local->rebalance.flags, NULL);
+ if (subvol == NULL)
+ goto out;
- return 0;
+ local->call_cnt = 2; /* This is the second attempt */
+
+ STACK_WIND(frame, dht_flush_cbk, subvol, subvol->fops->flush, local->fd,
+ local->xattr_req);
+
+ return 0;
out:
- DHT_STACK_UNWIND (access, frame, -1, op_errno, NULL);
- return 0;
+ DHT_STACK_UNWIND(flush, frame, -1, op_errno, NULL);
+ return 0;
}
-
int
-dht_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
- dict_t *xdata)
+dht_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- local = dht_local_init (frame, loc, NULL, GF_FOP_ACCESS);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
- local->rebalance.flags = mask;
- local->call_cnt = 1;
- subvol = local->cached_subvol;
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
- STACK_WIND (frame, dht_access_cbk, subvol, subvol->fops->access,
- loc, mask, xdata);
+ local = dht_local_init(frame, NULL, fd, GF_FOP_FLUSH);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- return 0;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
+ local->call_cnt = 1;
+
+ STACK_WIND(frame, dht_flush_cbk, subvol, subvol->fops->flush, fd,
+ local->xattr_req);
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (access, frame, -1, op_errno, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(flush, frame, -1, op_errno, NULL);
- return 0;
+ return 0;
}
-
int
-dht_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xdata)
+dht_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- int ret = -1;
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ int ret = -1;
+ inode_t *inode = NULL;
+ xlator_t *src_subvol = 0;
+ xlator_t *dst_subvol = 0;
+
+ local = frame->local;
+ prev = cookie;
+
+ local->op_errno = op_errno;
+
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
+ return 0;
+ }
+
+ if (op_ret == -1 && !dht_inode_missing(op_errno)) {
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
+ prev->name);
+ goto out;
+ }
+
+ if (local->call_cnt != 1) {
+ if (local->stbuf.ia_blocks) {
+ dht_iatt_merge(this, postbuf, &local->stbuf);
+ dht_iatt_merge(this, prebuf, &local->prebuf);
+ }
+ goto out;
+ }
- local = frame->local;
+ local->op_ret = op_ret;
+ inode = local->fd->inode;
- local->op_errno = op_errno;
+ local->rebalance.target_op_fn = dht_fsync2;
+ dht_set_local_rebalance(this, local, NULL, prebuf, postbuf, xdata);
- if (local->call_cnt != 1)
- goto out;
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2(postbuf)) {
+ ret = dht_rebalance_complete_check(this, frame);
+ if (!ret)
+ return 0;
+ }
- /* If context is set, then send flush() it to the destination */
- ret = fd_ctx_get (local->fd, this, NULL);
- if (!ret) {
- dht_flush2 (this, frame, 0);
+ /* Check if the rebalance phase1 is true */
+ if (IS_DHT_MIGRATION_PHASE1(postbuf)) {
+ dht_iatt_merge(this, &local->stbuf, postbuf);
+ dht_iatt_merge(this, &local->prebuf, prebuf);
+
+ dht_inode_ctx_get_mig_info(this, inode, &src_subvol, &dst_subvol);
+
+ if (dht_mig_info_is_invalid(local->cached_subvol, src_subvol,
+ dst_subvol) ||
+ !dht_fd_open_on_dst(this, local->fd, dst_subvol)) {
+ ret = dht_rebalance_in_progress_check(this, frame);
+ if (!ret)
return 0;
+ } else {
+ dht_fsync2(this, dst_subvol, frame, 0);
+ return 0;
}
+ }
out:
- DHT_STACK_UNWIND (flush, frame, op_ret, op_errno, xdata);
+ DHT_STRIP_PHASE1_FLAGS(postbuf);
+ DHT_STRIP_PHASE1_FLAGS(prebuf);
- return 0;
+ DHT_STACK_UNWIND(fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+
+ return 0;
}
-int
-dht_flush2 (xlator_t *this, call_frame_t *frame, int op_ret)
+static int
+dht_fsync2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
- dht_local_t *local = NULL;
- xlator_t *subvol = NULL;
- uint64_t tmp_subvol = 0;
- int ret = -1;
-
- local = frame->local;
+ dht_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
+
+ if ((frame == NULL) || (frame->local == NULL))
+ goto out;
+
+ local = frame->local;
+ op_errno = local->op_errno;
+
+ if (we_are_not_migrating(ret)) {
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
+ */
+ DHT_STACK_UNWIND(fsync, frame, local->op_ret, op_errno,
+ &local->rebalance.prebuf, &local->rebalance.postbuf,
+ local->rebalance.xdata);
+ return 0;
+ }
- ret = fd_ctx_get (local->fd, this, &tmp_subvol);
- if (!ret)
- subvol = (xlator_t *)(long)tmp_subvol;
+ if (subvol == NULL)
+ goto out;
- if (!subvol)
- subvol = local->cached_subvol;
+ local->call_cnt = 2; /* This is the second attempt */
- local->call_cnt = 2; /* This is the second attempt */
+ STACK_WIND_COOKIE(frame, dht_fsync_cbk, subvol, subvol, subvol->fops->fsync,
+ local->fd, local->rebalance.flags, local->xattr_req);
- STACK_WIND (frame, dht_flush_cbk,
- subvol, subvol->fops->flush, local->fd, NULL);
+ return 0;
- return 0;
+out:
+ DHT_STACK_UNWIND(fsync, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
-
int
-dht_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+dht_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync,
+ dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- local = dht_local_init (frame, NULL, fd, GF_FOP_FLUSH);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
- subvol = local->cached_subvol;
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
- local->call_cnt = 1;
+ local = dht_local_init(frame, NULL, fd, GF_FOP_FSYNC);
+ if (!local) {
+ op_errno = ENOMEM;
- STACK_WIND (frame, dht_flush_cbk,
- subvol, subvol->fops->flush, fd, xdata);
+ goto err;
+ }
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
- return 0;
+ local->call_cnt = 1;
+ local->rebalance.flags = datasync;
+
+ subvol = local->cached_subvol;
+
+ STACK_WIND_COOKIE(frame, dht_fsync_cbk, subvol, subvol, subvol->fops->fsync,
+ local->fd, local->rebalance.flags, local->xattr_req);
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (flush, frame, -1, op_errno, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(fsync, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
+ return 0;
}
-
-int
-dht_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, struct iatt *prebuf, struct iatt *postbuf,
- dict_t *xdata)
+/* TODO: for 'lk()' call, we need some other special error, may be ESTALE to
+ indicate that lock migration happened on the fd, so we can consider it as
+ phase 2 of migration */
+static int
+dht_lk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct gf_flock *flock, dict_t *xdata)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- int ret = -1;
+ dht_local_t *local = NULL;
+ int ret = -1;
+ xlator_t *subvol = NULL;
- local = frame->local;
- prev = cookie;
+ local = frame->local;
- local->op_errno = op_errno;
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto out;
- }
+ if (!local) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
- if (local->call_cnt != 1) {
- if (local->stbuf.ia_blocks) {
- dht_iatt_merge (this, postbuf, &local->stbuf, NULL);
- dht_iatt_merge (this, prebuf, &local->prebuf, NULL);
- }
- goto out;
- }
+ if (local->call_cnt != 1)
+ goto out;
- ret = fd_ctx_get (local->fd, this, NULL);
- if (ret) {
- local->rebalance.target_op_fn = dht_fsync2;
+ local->rebalance.target_op_fn = dht_lk2;
- /* Check if the rebalance phase1 is true */
- if (IS_DHT_MIGRATION_PHASE1 (postbuf)) {
- dht_iatt_merge (this, &local->stbuf, postbuf, NULL);
- dht_iatt_merge (this, &local->prebuf, prebuf, NULL);
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
- ret = dht_rebalance_in_progress_check (this, frame);
- }
+ if (xdata)
+ local->rebalance.xdata = dict_ref(xdata);
- /* Check if the rebalance phase2 is true */
- if (IS_DHT_MIGRATION_PHASE2 (postbuf)) {
- ret = dht_rebalance_complete_check (this, frame);
- }
+ if (op_errno == EREMOTE) {
+ dht_inode_ctx_get_mig_info(this, local->fd->inode, NULL, &subvol);
+ if (subvol && dht_fd_open_on_dst(this, local->fd, subvol)) {
+ dht_lk2(this, subvol, frame, 0);
+ return 0;
} else {
- dht_fsync2 (this, frame, 0);
- }
- if (!ret)
+ ret = dht_rebalance_complete_check(this, frame);
+ if (!ret) {
return 0;
+ }
+ }
+ }
out:
- DHT_STRIP_PHASE1_FLAGS (postbuf);
- DHT_STRIP_PHASE1_FLAGS (prebuf);
- DHT_STACK_UNWIND (fsync, frame, op_ret, op_errno,
- prebuf, postbuf, xdata);
+ dht_lk_inode_unref(frame, op_ret);
+ DHT_STACK_UNWIND(lk, frame, op_ret, op_errno, flock, xdata);
- return 0;
+ return 0;
}
-int
-dht_fsync2 (xlator_t *this, call_frame_t *frame, int op_ret)
+static int
+dht_lk2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
- dht_local_t *local = NULL;
- xlator_t *subvol = NULL;
- uint64_t tmp_subvol = 0;
- int ret = -1;
+ dht_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
- local = frame->local;
+ if ((frame == NULL) || (frame->local == NULL))
+ goto out;
- ret = fd_ctx_get (local->fd, this, &tmp_subvol);
- if (!ret)
- subvol = (xlator_t *)(long)tmp_subvol;
+ local = frame->local;
- if (!subvol)
- subvol = local->cached_subvol;
+ op_errno = local->op_errno;
- local->call_cnt = 2; /* This is the second attempt */
+ if (subvol == NULL)
+ goto out;
- STACK_WIND (frame, dht_fsync_cbk, subvol, subvol->fops->fsync,
- local->fd, local->rebalance.flags, NULL);
+ local->call_cnt = 2; /* This is the second attempt */
- return 0;
+ STACK_WIND(frame, dht_lk_cbk, subvol, subvol->fops->lk, local->fd,
+ local->rebalance.lock_cmd, &local->rebalance.flock,
+ local->xattr_req);
+
+ return 0;
+
+out:
+ DHT_STACK_UNWIND(lk, frame, -1, op_errno, NULL, NULL);
+ return 0;
}
int
-dht_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync,
- dict_t *xdata)
+dht_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int cmd,
+ struct gf_flock *flock, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
+ xlator_t *lock_subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
+
+ local = dht_local_init(frame, NULL, fd, GF_FOP_LK);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->lock_type = flock->l_type;
+ lock_subvol = dht_get_lock_subvolume(this, flock, local);
+ if (!lock_subvol) {
+ gf_msg_debug(this->name, 0, "no lock subvolume for path=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ /*
+ local->cached_subvol = lock_subvol;
+ ret = dht_check_and_open_fd_on_subvol (this, frame);
+ if (ret)
+ goto err;
+ */
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
+ local->rebalance.flock = *flock;
+ local->rebalance.lock_cmd = cmd;
+
+ local->call_cnt = 1;
+
+ STACK_WIND(frame, dht_lk_cbk, lock_subvol, lock_subvol->fops->lk, fd, cmd,
+ flock, xdata);
+
+ return 0;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(lk, frame, -1, op_errno, NULL, NULL);
- local = dht_local_init (frame, NULL, fd, GF_FOP_FSYNC);
- if (!local) {
- op_errno = ENOMEM;
+ return 0;
+}
- goto err;
- }
+static int
+dht_lease_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct gf_lease *lease, dict_t *xdata)
+{
+ DHT_STACK_UNWIND(lease, frame, op_ret, op_errno, lease, xdata);
- local->call_cnt = 1;
- local->rebalance.flags = datasync;
+ return 0;
+}
- subvol = local->cached_subvol;
+int
+dht_lease(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct gf_lease *lease, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
- STACK_WIND (frame, dht_fsync_cbk, subvol, subvol->fops->fsync,
- fd, datasync, xdata);
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
- return 0;
+ subvol = dht_subvol_get_cached(this, loc->inode);
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for path=%s",
+ loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ /* TODO: for rebalance, we need to preserve the fop arguments */
+ STACK_WIND(frame, dht_lease_cbk, subvol, subvol->fops->lease, loc, lease,
+ xdata);
+
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(lease, frame, -1, op_errno, NULL, NULL);
- return 0;
+ return 0;
}
-
-/* TODO: for 'lk()' call, we need some other special error, may be ESTALE to
- indicate that lock migration happened on the fd, so we can consider it as
- phase 2 of migration */
-int
-dht_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct gf_flock *flock, dict_t *xdata)
+/* Symlinks are currently not migrated, so no need for any check here */
+static int
+dht_readlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, const char *path, struct iatt *stbuf,
+ dict_t *xdata)
{
- DHT_STACK_UNWIND (lk, frame, op_ret, op_errno, flock, xdata);
+ dht_local_t *local = NULL;
- return 0;
-}
+ local = frame->local;
+ if (op_ret == -1)
+ goto err;
+
+ if (!local) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ }
+
+err:
+ DHT_STRIP_PHASE1_FLAGS(stbuf);
+ DHT_STACK_UNWIND(readlink, frame, op_ret, op_errno, path, stbuf, xdata);
+ return 0;
+}
int
-dht_lk (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int cmd, struct gf_flock *flock, dict_t *xdata)
+dht_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
-
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+ VALIDATE_OR_GOTO(loc->inode, err);
+ VALIDATE_OR_GOTO(loc->path, err);
+
+ local = dht_local_init(frame, loc, NULL, GF_FOP_READLINK);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for path=%s",
+ loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND(frame, dht_readlink_cbk, subvol, subvol->fops->readlink, loc,
+ size, xdata);
+
+ return 0;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(readlink, frame, -1, op_errno, NULL, NULL, NULL);
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ return 0;
+}
- /* TODO: for rebalance, we need to preserve the fop arguments */
- STACK_WIND (frame, dht_lk_cbk, subvol, subvol->fops->lk, fd,
- cmd, flock, xdata);
+/* Get both DHT_IATT_IN_XDATA_KEY and DHT_MODE_IN_XDATA_KEY
+ * Use DHT_MODE_IN_XDATA_KEY if available, else fall back to
+ * DHT_IATT_IN_XDATA_KEY
+ * This will return a dummy iatt with only the mode and type set
+ */
+static int
+dht_read_iatt_from_xdata(dict_t *xdata, struct iatt *stbuf)
+{
+ int ret = -1;
+ int32_t mode = 0;
- return 0;
+ ret = dict_get_int32(xdata, DHT_MODE_IN_XDATA_KEY, &mode);
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (lk, frame, -1, op_errno, NULL, NULL);
+ if (ret) {
+ ret = dict_get_bin(xdata, DHT_IATT_IN_XDATA_KEY, (void **)&stbuf);
+ } else {
+ stbuf->ia_prot = ia_prot_from_st_mode(mode);
+ stbuf->ia_type = ia_type_from_st_mode(mode);
+ }
- return 0;
+ return ret;
}
-/* Symlinks are currently not migrated, so no need for any check here */
int
-dht_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, const char *path,
- struct iatt *stbuf, dict_t *xdata)
+dht_common_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
+ call_frame_t *call_frame = NULL;
+ xlator_t *prev = NULL;
+ xlator_t *src_subvol = NULL;
+ xlator_t *dst_subvol = NULL;
+ struct iatt stbuf = {
+ 0,
+ };
+ int ret = -1;
+ inode_t *inode = NULL;
+
+ local = frame->local;
+ call_frame = cookie;
+ prev = call_frame->this;
+
+ local->op_errno = op_errno;
+
+ if ((op_ret == -1) && !dht_inode_missing(op_errno)) {
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1.",
+ prev->name);
+ goto out;
+ }
+
+ if (local->call_cnt != 1)
+ goto out;
+
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
+ return 0;
+ }
+
+ ret = dht_read_iatt_from_xdata(xdata, &stbuf);
+
+ if ((!op_ret) && (ret)) {
+ /* This is a potential problem and can cause corruption
+ * with sharding.
+ * Oh well. We tried.
+ */
+ goto out;
+ }
+
+ local->op_ret = op_ret;
+ local->rebalance.target_op_fn = dht_common_xattrop2;
+ if (xdata)
+ local->rebalance.xdata = dict_ref(xdata);
- local = frame->local;
- if (op_ret == -1)
- goto err;
+ if (dict)
+ local->rebalance.dict = dict_ref(dict);
- if (!local) {
- op_ret = -1;
- op_errno = EINVAL;
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2(&stbuf)) {
+ ret = dht_rebalance_complete_check(this, frame);
+ if (!ret)
+ return 0;
+ }
+
+ /* Check if the rebalance phase1 is true */
+ if (IS_DHT_MIGRATION_PHASE1(&stbuf)) {
+ inode = local->loc.inode ? local->loc.inode : local->fd->inode;
+ dht_inode_ctx_get_mig_info(this, inode, &src_subvol, &dst_subvol);
+
+ if (dht_mig_info_is_invalid(local->cached_subvol, src_subvol,
+ dst_subvol) ||
+ !dht_fd_open_on_dst(this, local->fd, dst_subvol)) {
+ ret = dht_rebalance_in_progress_check(this, frame);
+ if (!ret)
+ return 0;
+ } else {
+ dht_common_xattrop2(this, dst_subvol, frame, 0);
+ return 0;
}
+ }
-err:
- DHT_STRIP_PHASE1_FLAGS (stbuf);
- DHT_STACK_UNWIND (readlink, frame, op_ret, op_errno, path, stbuf, xdata);
+out:
+ if (local->fop == GF_FOP_XATTROP) {
+ DHT_STACK_UNWIND(xattrop, frame, op_ret, op_errno, dict, xdata);
+ } else {
+ DHT_STACK_UNWIND(fxattrop, frame, op_ret, op_errno, dict, xdata);
+ }
- return 0;
+ return 0;
}
-
-int
-dht_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
- dict_t *xdata)
+static int
+dht_common_xattrop2(xlator_t *this, xlator_t *subvol, call_frame_t *frame,
+ int ret)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- local = dht_local_init (frame, loc, NULL, GF_FOP_READLINK);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
+ dht_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
+
+ if ((frame == NULL) || (frame->local == NULL))
+ goto out;
+
+ local = frame->local;
+ op_errno = local->op_errno;
+
+ if (we_are_not_migrating(ret)) {
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
+ */
+ if (local->fop == GF_FOP_XATTROP) {
+ DHT_STACK_UNWIND(xattrop, frame, local->op_ret, op_errno,
+ local->rebalance.dict, local->rebalance.xdata);
+ } else {
+ DHT_STACK_UNWIND(fxattrop, frame, local->op_ret, op_errno,
+ local->rebalance.dict, local->rebalance.xdata);
}
- subvol = local->cached_subvol;
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ return 0;
+ }
- STACK_WIND (frame, dht_readlink_cbk,
- subvol, subvol->fops->readlink,
- loc, size, xdata);
+ if (subvol == NULL)
+ goto out;
- return 0;
+ local->call_cnt = 2; /* This is the second attempt */
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (readlink, frame, -1, op_errno, NULL, NULL, NULL);
+ if (local->fop == GF_FOP_XATTROP) {
+ STACK_WIND(frame, dht_common_xattrop_cbk, subvol, subvol->fops->xattrop,
+ &local->loc, local->rebalance.flags, local->rebalance.xattr,
+ local->xattr_req);
+ } else {
+ STACK_WIND(frame, dht_common_xattrop_cbk, subvol,
+ subvol->fops->fxattrop, local->fd, local->rebalance.flags,
+ local->rebalance.xattr, local->xattr_req);
+ }
- return 0;
-}
+ return 0;
-/* Currently no translators on top of 'distribute' will be using
- * below fops, hence not implementing 'migration' related checks
- */
+out:
-int
-dht_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+ /* If local is unavailable we could be unwinding the wrong
+ * function here */
+
+ if (local && (local->fop == GF_FOP_XATTROP)) {
+ DHT_STACK_UNWIND(xattrop, frame, -1, op_errno, NULL, NULL);
+ } else {
+ DHT_STACK_UNWIND(fxattrop, frame, -1, op_errno, NULL, NULL);
+ }
+ return 0;
+}
+
+static int
+dht_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
- DHT_STACK_UNWIND (xattrop, frame, op_ret, op_errno, dict, xdata);
- return 0;
+ DHT_STACK_UNWIND(xattrop, frame, op_ret, op_errno, dict, xdata);
+ return 0;
}
+/* Set both DHT_IATT_IN_XDATA_KEY and DHT_MODE_IN_XDATA_KEY
+ * Use DHT_MODE_IN_XDATA_KEY if available. Else fall back to
+ * DHT_IATT_IN_XDATA_KEY
+ */
+static int
+dht_request_iatt_in_xdata(dict_t *xattr_req)
+{
+ int ret = -1;
+
+ ret = dict_set_int8(xattr_req, DHT_MODE_IN_XDATA_KEY, 1);
+ ret = dict_set_int8(xattr_req, DHT_IATT_IN_XDATA_KEY, 1);
+
+ /* At least one call succeeded */
+ return ret;
+}
int
-dht_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+dht_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- local = dht_local_init (frame, loc, NULL, GF_FOP_XATTROP);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+ int ret = -1;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+ VALIDATE_OR_GOTO(loc->inode, err);
+
+ local = dht_local_init(frame, loc, NULL, GF_FOP_XATTROP);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for gfid=%s",
+ uuid_utoa(loc->inode->gfid));
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ /* Todo : Handle dirs as well. At the moment the only xlator above dht
+ * that uses xattrop is sharding and that is only for files */
+
+ if (IA_ISDIR(loc->inode->ia_type)) {
+ STACK_WIND(frame, dht_xattrop_cbk, subvol, subvol->fops->xattrop, loc,
+ flags, dict, xdata);
+
+ } else {
+ local->xattr_req = xdata ? dict_ref(xdata) : dict_new();
+ local->call_cnt = 1;
- subvol = local->cached_subvol;
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ local->rebalance.xattr = dict_ref(dict);
+ local->rebalance.flags = flags;
- local->call_cnt = 1;
+ ret = dht_request_iatt_in_xdata(local->xattr_req);
+
+ if (ret) {
+ gf_msg_debug(this->name, 0,
+ "Failed to set dictionary key %s file=%s",
+ DHT_IATT_IN_XDATA_KEY, loc->path);
+ }
- STACK_WIND (frame,
- dht_xattrop_cbk,
- subvol, subvol->fops->xattrop,
- loc, flags, dict, xdata);
+ STACK_WIND(frame, dht_common_xattrop_cbk, subvol, subvol->fops->xattrop,
+ loc, local->rebalance.flags, local->rebalance.xattr,
+ local->xattr_req);
+ }
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (xattrop, frame, -1, op_errno, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(xattrop, frame, -1, op_errno, NULL, NULL);
- return 0;
+ return 0;
}
-
-int
-dht_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+static int
+dht_fxattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
- DHT_STACK_UNWIND (fxattrop, frame, op_ret, op_errno, dict, xdata);
- return 0;
+ DHT_STACK_UNWIND(fxattrop, frame, op_ret, op_errno, dict, xdata);
+ return 0;
}
-
int
-dht_fxattrop (call_frame_t *frame, xlator_t *this,
- fd_t *fd, gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+dht_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+ int ret = -1;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
+
+ subvol = dht_subvol_get_cached(this, fd->inode);
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local = dht_local_init(frame, NULL, fd, GF_FOP_FXATTROP);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ /* Todo : Handle dirs as well. At the moment the only xlator above dht
+ * that uses xattrop is sharding and that is only for files */
+
+ if (IA_ISDIR(fd->inode->ia_type)) {
+ STACK_WIND(frame, dht_fxattrop_cbk, subvol, subvol->fops->fxattrop, fd,
+ flags, dict, xdata);
+
+ } else {
+ local->xattr_req = xdata ? dict_ref(xdata) : dict_new();
+ local->call_cnt = 1;
+
+ local->rebalance.xattr = dict_ref(dict);
+ local->rebalance.flags = flags;
+
+ ret = dht_request_iatt_in_xdata(local->xattr_req);
+
+ if (ret) {
+ gf_msg_debug(this->name, 0, "Failed to set dictionary key %s fd=%p",
+ DHT_IATT_IN_XDATA_KEY, fd);
}
- STACK_WIND (frame,
- dht_fxattrop_cbk,
- subvol, subvol->fops->fxattrop,
- fd, flags, dict, xdata);
+ STACK_WIND(frame, dht_common_xattrop_cbk, subvol,
+ subvol->fops->fxattrop, fd, local->rebalance.flags,
+ local->rebalance.xattr, local->xattr_req);
+ }
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fxattrop, frame, -1, op_errno, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(fxattrop, frame, -1, op_errno, NULL, NULL);
- return 0;
+ return 0;
}
+/* Currently no translators on top of 'distribute' will be using
+ * below fops, hence not implementing 'migration' related checks
+ */
-int
-dht_inodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
+static int
+dht_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- DHT_STACK_UNWIND (inodelk, frame, op_ret, op_errno, xdata);
- return 0;
+ dht_lk_inode_unref(frame, op_ret);
+ DHT_STACK_UNWIND(inodelk, frame, op_ret, op_errno, xdata);
+ return 0;
}
-
int32_t
-dht_inodelk (call_frame_t *frame, xlator_t *this, const char *volume,
- loc_t *loc, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
+dht_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc,
+ int32_t cmd, struct gf_flock *lock, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
+ xlator_t *lock_subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+ VALIDATE_OR_GOTO(loc->inode, err);
- local = dht_local_init (frame, loc, NULL, GF_FOP_INODELK);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ local = dht_local_init(frame, loc, NULL, GF_FOP_INODELK);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- subvol = local->cached_subvol;
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ local->lock_type = lock->l_type;
+ lock_subvol = dht_get_lock_subvolume(this, lock, local);
+ if (!lock_subvol) {
+ gf_msg_debug(this->name, 0, "no lock subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
- local->call_cnt = 1;
+ local->call_cnt = 1;
- STACK_WIND (frame,
- dht_inodelk_cbk,
- subvol, subvol->fops->inodelk,
- volume, loc, cmd, lock, xdata);
+ STACK_WIND(frame, dht_inodelk_cbk, lock_subvol, lock_subvol->fops->inodelk,
+ volume, loc, cmd, lock, xdata);
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (inodelk, frame, -1, op_errno, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(inodelk, frame, -1, op_errno, NULL);
- return 0;
+ return 0;
}
-
int
-dht_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+dht_finodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- DHT_STACK_UNWIND (finodelk, frame, op_ret, op_errno, xdata);
- return 0;
-}
+ dht_local_t *local = NULL;
+ int ret = 0;
+ GF_VALIDATE_OR_GOTO("dht", frame, out);
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, out);
-int
-dht_finodelk (call_frame_t *frame, xlator_t *this, const char *volume,
- fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
-{
- xlator_t *subvol = NULL;
- int op_errno = -1;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ local = frame->local;
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
+ return 0;
+ }
- STACK_WIND (frame, dht_finodelk_cbk, subvol, subvol->fops->finodelk,
- volume, fd, cmd, lock, xdata);
+out:
+ dht_lk_inode_unref(frame, op_ret);
+ DHT_STACK_UNWIND(finodelk, frame, op_ret, op_errno, xdata);
- return 0;
+ return 0;
+}
+
+int
+dht_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ int32_t cmd, struct gf_flock *lock, dict_t *xdata)
+{
+ xlator_t *lock_subvol = NULL;
+ dht_local_t *local = NULL;
+ int op_errno = -1;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
+
+ local = dht_local_init(frame, NULL, fd, GF_FOP_INODELK);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->call_cnt = 1;
+ local->lock_type = lock->l_type;
+
+ lock_subvol = dht_get_lock_subvolume(this, lock, local);
+ if (!lock_subvol) {
+ gf_msg_debug(this->name, 0, "no lock subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ /*
+ local->cached_subvol = lock_subvol;
+ ret = dht_check_and_open_fd_on_subvol (this, frame);
+ if (ret)
+ goto err;
+ */
+ local->rebalance.flock = *lock;
+ local->rebalance.lock_cmd = cmd;
+ local->key = gf_strdup(volume);
+
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
+ STACK_WIND(frame, dht_finodelk_cbk, lock_subvol,
+ lock_subvol->fops->finodelk, volume, fd, cmd, lock, xdata);
+
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (finodelk, frame, -1, op_errno, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(finodelk, frame, -1, op_errno, NULL);
- return 0;
+ return 0;
}
diff --git a/xlators/cluster/dht/src/dht-inode-write.c b/xlators/cluster/dht/src/dht-inode-write.c
index d4a3ecc391c..2f23ce90fbd 100644
--- a/xlators/cluster/dht/src/dht-inode-write.c
+++ b/xlators/cluster/dht/src/dht-inode-write.c
@@ -8,605 +8,1397 @@
cases as published by the Free Software Foundation.
*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "dht-common.h"
-int dht_writev2 (xlator_t *this, call_frame_t *frame, int ret);
-int dht_truncate2 (xlator_t *this, call_frame_t *frame, int ret);
-int dht_setattr2 (xlator_t *this, call_frame_t *frame, int ret);
+static int
+dht_writev2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret);
+static int
+dht_truncate2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret);
+static int
+dht_setattr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret);
+static int
+dht_fallocate2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret);
+static int
+dht_discard2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret);
+static int
+dht_zerofill2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret);
int
-dht_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+dht_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- int ret = -1;
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ int ret = -1;
+ xlator_t *subvol1 = NULL;
+ xlator_t *subvol2 = NULL;
+
+ local = frame->local;
+ prev = cookie;
+
+ if (!local) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ /* writev fails with EBADF if dht has not yet opened the fd
+ * on the cached subvol. This could happen if the file was migrated
+ * and a lookup updated the cached subvol in the inode ctx.
+ * We only check once as this could be a valid bad fd error.
+ */
+
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
+ return 0;
+ }
- if (op_ret == -1) {
- goto out;
+ if (op_ret == -1 && !dht_inode_missing(op_errno)) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
+ gf_msg_debug(this->name, 0, "subvolume %s returned -1 (%s)", prev->name,
+ strerror(op_errno));
+ goto out;
+ }
+
+ if (local->call_cnt != 1) {
+ /* preserve the modes of source */
+ if (local->stbuf.ia_blocks) {
+ dht_iatt_merge(this, postbuf, &local->stbuf);
+ dht_iatt_merge(this, prebuf, &local->prebuf);
}
+ goto out;
+ }
- local = frame->local;
- if (!local) {
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
+ local->rebalance.target_op_fn = dht_writev2;
- if (local->call_cnt != 1) {
- /* preserve the modes of source */
- if (local->stbuf.ia_blocks) {
- dht_iatt_merge (this, postbuf, &local->stbuf, NULL);
- dht_iatt_merge (this, prebuf, &local->prebuf, NULL);
- }
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ /* We might need to pass the stbuf information to the higher DHT
+ * layer for appropriate handling.
+ */
+
+ dht_set_local_rebalance(this, local, NULL, prebuf, postbuf, xdata);
+
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2(postbuf)) {
+ ret = dht_rebalance_complete_check(this, frame);
+ if (!ret)
+ return 0;
+ }
+
+ /* Check if the rebalance phase1 is true */
+ if (IS_DHT_MIGRATION_PHASE1(postbuf)) {
+ if (!local->xattr_req) {
+ local->xattr_req = dict_new();
+ if (!local->xattr_req) {
+ gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, ENOMEM,
+ "insufficient memory");
+ local->op_errno = ENOMEM;
+ local->op_ret = -1;
goto out;
+ }
}
- local->rebalance.target_op_fn = dht_writev2;
-
- /* Phase 2 of migration */
- if (IS_DHT_MIGRATION_PHASE2 (postbuf)) {
- ret = dht_rebalance_complete_check (this, frame);
- if (!ret)
- return 0;
+ ret = dict_set_uint32(local->xattr_req, GF_PROTECT_FROM_EXTERNAL_WRITES,
+ 1);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_DICT_SET_FAILED, 0,
+ "Failed to set key %s in dictionary",
+ GF_PROTECT_FROM_EXTERNAL_WRITES);
+ local->op_errno = ENOMEM;
+ local->op_ret = -1;
+ goto out;
}
- /* Check if the rebalance phase1 is true */
- if (IS_DHT_MIGRATION_PHASE1 (postbuf)) {
- dht_iatt_merge (this, &local->stbuf, postbuf, NULL);
- dht_iatt_merge (this, &local->prebuf, prebuf, NULL);
+ dht_iatt_merge(this, &local->stbuf, postbuf);
+ dht_iatt_merge(this, &local->prebuf, prebuf);
- ret = fd_ctx_get (local->fd, this, NULL);
- if (!ret) {
- dht_writev2 (this, frame, 0);
- return 0;
- }
- ret = dht_rebalance_in_progress_check (this, frame);
- if (!ret)
- return 0;
+ ret = dht_inode_ctx_get_mig_info(this, local->fd->inode, &subvol1,
+ &subvol2);
+ if (!dht_mig_info_is_invalid(local->cached_subvol, subvol1, subvol2)) {
+ if (dht_fd_open_on_dst(this, local->fd, subvol2)) {
+ dht_writev2(this, subvol2, frame, 0);
+ return 0;
+ }
}
+ ret = dht_rebalance_in_progress_check(this, frame);
+ if (!ret)
+ return 0;
+ }
out:
- DHT_STRIP_PHASE1_FLAGS (postbuf);
- DHT_STRIP_PHASE1_FLAGS (prebuf);
+ DHT_STRIP_PHASE1_FLAGS(postbuf);
+ DHT_STRIP_PHASE1_FLAGS(prebuf);
- DHT_STACK_UNWIND (writev, frame, op_ret, op_errno, prebuf, postbuf,
- xdata);
+ DHT_STACK_UNWIND(writev, frame, op_ret, op_errno, prebuf, postbuf, xdata);
- return 0;
+ return 0;
}
-int
-dht_writev2 (xlator_t *this, call_frame_t *frame, int op_ret)
+static int
+dht_writev2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
- dht_local_t *local = NULL;
- xlator_t *subvol = NULL;
- uint64_t tmp_subvol = 0;
- int ret = -1;
+ dht_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
+
+ if ((frame == NULL) || (frame->local == NULL))
+ goto out;
+
+ local = frame->local;
+ op_errno = local->op_errno;
+
+ if (we_are_not_migrating(ret)) {
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
+ */
+ DHT_STACK_UNWIND(writev, frame, local->op_ret, local->op_errno,
+ &local->rebalance.prebuf, &local->rebalance.postbuf,
+ local->rebalance.xdata);
+ return 0;
+ }
- local = frame->local;
+ if (subvol == NULL)
+ goto out;
- ret = fd_ctx_get (local->fd, this, &tmp_subvol);
- if (!ret)
- subvol = (xlator_t *)(long)tmp_subvol;
+ local->call_cnt = 2; /* This is the second attempt */
- if (!subvol)
- subvol = local->cached_subvol;
+ STACK_WIND_COOKIE(frame, dht_writev_cbk, subvol, subvol,
+ subvol->fops->writev, local->fd, local->rebalance.vector,
+ local->rebalance.count, local->rebalance.offset,
+ local->rebalance.flags, local->rebalance.iobref,
+ local->xattr_req);
- local->call_cnt = 2; /* This is the second attempt */
+ return 0;
- STACK_WIND (frame, dht_writev_cbk,
- subvol, subvol->fops->writev,
- local->fd, local->rebalance.vector, local->rebalance.count,
- local->rebalance.offset, local->rebalance.flags,
- local->rebalance.iobref, NULL);
+out:
+ DHT_STACK_UNWIND(writev, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
+ return 0;
}
int
-dht_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int count, off_t off, uint32_t flags,
- struct iobref *iobref, dict_t *xdata)
+dht_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
+ int count, off_t off, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
+
+ local = dht_local_init(frame, NULL, fd, GF_FOP_WRITE);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
+ local->rebalance.vector = iov_dup(vector, count);
+ local->rebalance.offset = off;
+ local->rebalance.count = count;
+ local->rebalance.flags = flags;
+ local->rebalance.iobref = iobref_ref(iobref);
+ local->call_cnt = 1;
+
+ STACK_WIND_COOKIE(frame, dht_writev_cbk, subvol, subvol,
+ subvol->fops->writev, fd, local->rebalance.vector,
+ local->rebalance.count, local->rebalance.offset,
+ local->rebalance.flags, local->rebalance.iobref,
+ local->xattr_req);
+
+ return 0;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(writev, frame, -1, op_errno, NULL, NULL, NULL);
- local = dht_local_init (frame, NULL, fd, GF_FOP_WRITE);
- if (!local) {
+ return 0;
+}
- op_errno = ENOMEM;
- goto err;
- }
+int
+dht_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ int ret = -1;
+ xlator_t *src_subvol = NULL;
+ xlator_t *dst_subvol = NULL;
+ inode_t *inode = NULL;
+
+ GF_VALIDATE_OR_GOTO("dht", frame, err);
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+
+ /* Needs to be checked only for ftruncate.
+ * ftruncate fails with EBADF/EINVAL if dht has not yet opened the fd
+ * on the cached subvol. This could happen if the file was migrated
+ * and a lookup updated the cached subvol in the inode ctx.
+ * We only check once as this could actually be a valid error.
+ */
+
+ if ((local->fop == GF_FOP_FTRUNCATE) &&
+ dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
+ return 0;
+ }
- subvol = local->cached_subvol;
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
+ if ((op_ret == -1) && !dht_inode_missing(op_errno)) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
+ prev->name);
+
+ goto out;
+ }
+
+ if (local->call_cnt != 1) {
+ if (local->stbuf.ia_blocks) {
+ dht_iatt_merge(this, postbuf, &local->stbuf);
+ dht_iatt_merge(this, prebuf, &local->prebuf);
}
+ goto out;
+ }
+ local->rebalance.target_op_fn = dht_truncate2;
- local->rebalance.vector = iov_dup (vector, count);
- local->rebalance.offset = off;
- local->rebalance.count = count;
- local->rebalance.flags = flags;
- local->rebalance.iobref = iobref_ref (iobref);
- local->call_cnt = 1;
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
- STACK_WIND (frame, dht_writev_cbk,
- subvol, subvol->fops->writev,
- fd, vector, count, off, flags, iobref, xdata);
+ /* We might need to pass the stbuf information to the higher DHT
+ * layer for appropriate handling.
+ */
- return 0;
+ dht_set_local_rebalance(this, local, NULL, prebuf, postbuf, xdata);
+
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2(postbuf)) {
+ ret = dht_rebalance_complete_check(this, frame);
+ if (!ret)
+ return 0;
+ }
+
+ /* Check if the rebalance phase1 is true */
+ if (IS_DHT_MIGRATION_PHASE1(postbuf)) {
+ dht_iatt_merge(this, &local->stbuf, postbuf);
+ dht_iatt_merge(this, &local->prebuf, prebuf);
+
+ inode = (local->fd) ? local->fd->inode : local->loc.inode;
+
+ dht_inode_ctx_get_mig_info(this, inode, &src_subvol, &dst_subvol);
+ if (!dht_mig_info_is_invalid(local->cached_subvol, src_subvol,
+ dst_subvol)) {
+ if ((!local->fd) ||
+ ((local->fd) &&
+ dht_fd_open_on_dst(this, local->fd, dst_subvol))) {
+ dht_truncate2(this, dst_subvol, frame, 0);
+ return 0;
+ }
+ }
+ ret = dht_rebalance_in_progress_check(this, frame);
+ if (!ret)
+ return 0;
+ }
+
+out:
+ DHT_STRIP_PHASE1_FLAGS(postbuf);
+ DHT_STRIP_PHASE1_FLAGS(prebuf);
+ DHT_STACK_UNWIND(truncate, frame, op_ret, op_errno, prebuf, postbuf, xdata);
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+static int
+dht_truncate2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
+{
+ dht_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
+
+ if (!frame || !frame->local)
+ goto out;
+ local = frame->local;
+ op_errno = local->op_errno;
+
+ /* This dht xlator is not migrating the file */
+ if (we_are_not_migrating(ret)) {
+ DHT_STACK_UNWIND(truncate, frame, local->op_ret, local->op_errno,
+ &local->rebalance.prebuf, &local->rebalance.postbuf,
+ local->rebalance.xdata);
return 0;
+ }
+
+ if (subvol == NULL)
+ goto out;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ if (local->fop == GF_FOP_TRUNCATE) {
+ STACK_WIND_COOKIE(frame, dht_truncate_cbk, subvol, subvol,
+ subvol->fops->truncate, &local->loc,
+ local->rebalance.offset, local->xattr_req);
+ } else {
+ STACK_WIND_COOKIE(frame, dht_truncate_cbk, subvol, subvol,
+ subvol->fops->ftruncate, local->fd,
+ local->rebalance.offset, local->xattr_req);
+ }
+
+ return 0;
+
+out:
+ DHT_STACK_UNWIND(truncate, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
+int
+dht_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+ VALIDATE_OR_GOTO(loc->inode, err);
+
+ local = dht_local_init(frame, loc, NULL, GF_FOP_TRUNCATE);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->rebalance.offset = offset;
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for gfid=%s",
+ uuid_utoa(loc->inode->gfid));
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
+ STACK_WIND_COOKIE(frame, dht_truncate_cbk, subvol, subvol,
+ subvol->fops->truncate, loc, offset, xdata);
+
+ return 0;
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(truncate, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
int
-dht_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+dht_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- int ret = -1;
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
+
+ local = dht_local_init(frame, NULL, fd, GF_FOP_FTRUNCATE);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->rebalance.offset = offset;
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
+ STACK_WIND_COOKIE(frame, dht_truncate_cbk, subvol, subvol,
+ subvol->fops->ftruncate, fd, local->rebalance.offset,
+ local->xattr_req);
+ return 0;
- GF_VALIDATE_OR_GOTO ("dht", frame, err);
- GF_VALIDATE_OR_GOTO ("dht", this, out);
- GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
- GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(ftruncate, frame, -1, op_errno, NULL, NULL, NULL);
- local = frame->local;
- prev = cookie;
+ return 0;
+}
- if ((op_ret == -1) && (op_errno != ENOENT)) {
- local->op_errno = op_errno;
- local->op_ret = -1;
- gf_log (this->name, GF_LOG_DEBUG,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
+int
+dht_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ int ret = -1;
+ xlator_t *src_subvol = NULL;
+ xlator_t *dst_subvol = NULL;
+
+ GF_VALIDATE_OR_GOTO("dht", frame, err);
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+
+ /* fallocate fails with EBADF if dht has not yet opened the fd
+ * on the cached subvol. This could happen if the file was migrated
+ * and a lookup updated the cached subvol in the inode ctx.
+ * We only check once as this could actually be a valid error.
+ */
+
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
+ return 0;
+ }
- goto out;
- }
+ if ((op_ret == -1) && !dht_inode_missing(op_errno)) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
+ prev->name);
- if (local->call_cnt != 1) {
- if (local->stbuf.ia_blocks) {
- dht_iatt_merge (this, postbuf, &local->stbuf, NULL);
- dht_iatt_merge (this, prebuf, &local->prebuf, NULL);
- }
- goto out;
+ goto out;
+ }
+
+ if (local->call_cnt != 1) {
+ if (local->stbuf.ia_blocks) {
+ dht_iatt_merge(this, postbuf, &local->stbuf);
+ dht_iatt_merge(this, prebuf, &local->prebuf);
}
+ goto out;
+ }
- local->rebalance.target_op_fn = dht_truncate2;
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ local->rebalance.target_op_fn = dht_fallocate2;
- /* Phase 2 of migration */
- if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (postbuf)) {
- ret = dht_rebalance_complete_check (this, frame);
- if (!ret)
- return 0;
- }
+ dht_set_local_rebalance(this, local, NULL, prebuf, postbuf, xdata);
- /* Check if the rebalance phase1 is true */
- if (IS_DHT_MIGRATION_PHASE1 (postbuf)) {
- dht_iatt_merge (this, &local->stbuf, postbuf, NULL);
- dht_iatt_merge (this, &local->prebuf, prebuf, NULL);
- ret = fd_ctx_get (local->fd, this, NULL);
- if (!ret) {
- dht_truncate2 (this, frame, 0);
- return 0;
- }
- ret = dht_rebalance_in_progress_check (this, frame);
- if (!ret)
- return 0;
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2(postbuf)) {
+ ret = dht_rebalance_complete_check(this, frame);
+ if (!ret)
+ return 0;
+ }
+
+ /* Check if the rebalance phase1 is true */
+ if (IS_DHT_MIGRATION_PHASE1(postbuf)) {
+ dht_iatt_merge(this, &local->stbuf, postbuf);
+ dht_iatt_merge(this, &local->prebuf, prebuf);
+
+ dht_inode_ctx_get_mig_info(this, local->fd->inode, &src_subvol,
+ &dst_subvol);
+ if (!dht_mig_info_is_invalid(local->cached_subvol, src_subvol,
+ dst_subvol)) {
+ if (dht_fd_open_on_dst(this, local->fd, dst_subvol)) {
+ dht_fallocate2(this, dst_subvol, frame, 0);
+ return 0;
+ }
}
+ ret = dht_rebalance_in_progress_check(this, frame);
+ if (!ret)
+ return 0;
+ }
out:
- DHT_STRIP_PHASE1_FLAGS (postbuf);
- DHT_STRIP_PHASE1_FLAGS (prebuf);
- DHT_STACK_UNWIND (truncate, frame, op_ret, op_errno,
- prebuf, postbuf, xdata);
+ DHT_STRIP_PHASE1_FLAGS(postbuf);
+ DHT_STRIP_PHASE1_FLAGS(prebuf);
+
+ DHT_STACK_UNWIND(fallocate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
err:
- return 0;
+ return 0;
}
-
-int
-dht_truncate2 (xlator_t *this, call_frame_t *frame, int op_ret)
+static int
+dht_fallocate2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
- dht_local_t *local = NULL;
- xlator_t *subvol = NULL;
- uint64_t tmp_subvol = 0;
- int ret = -1;
+ dht_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
+
+ if (!frame || !frame->local)
+ goto out;
+
+ local = frame->local;
+ op_errno = local->op_errno;
+
+ if (we_are_not_migrating(ret)) {
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
+ */
+ DHT_STACK_UNWIND(fallocate, frame, local->op_ret, local->op_errno,
+ &local->rebalance.prebuf, &local->rebalance.postbuf,
+ local->rebalance.xdata);
+ return 0;
+ }
- local = frame->local;
+ if (subvol == NULL)
+ goto out;
- if (local->fd)
- ret = fd_ctx_get (local->fd, this, &tmp_subvol);
- if (!ret)
- subvol = (xlator_t *)(long)tmp_subvol;
+ local->call_cnt = 2; /* This is the second attempt */
- if (!subvol)
- subvol = local->cached_subvol;
+ STACK_WIND_COOKIE(frame, dht_fallocate_cbk, subvol, subvol,
+ subvol->fops->fallocate, local->fd,
+ local->rebalance.flags, local->rebalance.offset,
+ local->rebalance.size, local->xattr_req);
- local->call_cnt = 2; /* This is the second attempt */
+ return 0;
- if (local->fop == GF_FOP_TRUNCATE) {
- STACK_WIND (frame, dht_truncate_cbk, subvol,
- subvol->fops->truncate, &local->loc,
- local->rebalance.offset, NULL);
- } else {
- STACK_WIND (frame, dht_truncate_cbk, subvol,
- subvol->fops->ftruncate, local->fd,
- local->rebalance.offset, NULL);
- }
+out:
+ DHT_STACK_UNWIND(fallocate, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
- return 0;
+int
+dht_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
+
+ local = dht_local_init(frame, NULL, fd, GF_FOP_FALLOCATE);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->rebalance.flags = mode;
+ local->rebalance.offset = offset;
+ local->rebalance.size = len;
+
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
+ STACK_WIND_COOKIE(frame, dht_fallocate_cbk, subvol, subvol,
+ subvol->fops->fallocate, fd, local->rebalance.flags,
+ local->rebalance.offset, local->rebalance.size,
+ local->xattr_req);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(fallocate, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
}
int
-dht_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
- dict_t *xdata)
+dht_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- local = dht_local_init (frame, loc, NULL, GF_FOP_TRUNCATE);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ int ret = -1;
+ xlator_t *src_subvol = NULL;
+ xlator_t *dst_subvol = NULL;
+
+ GF_VALIDATE_OR_GOTO("dht", frame, err);
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+
+ /* discard fails with EBADF if dht has not yet opened the fd
+ * on the cached subvol. This could happen if the file was migrated
+ * and a lookup updated the cached subvol in the inode ctx.
+ * We only check once as this could actually be a valid error.
+ */
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
+ return 0;
+ }
- local->rebalance.offset = offset;
- local->call_cnt = 1;
- subvol = local->cached_subvol;
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
+ if ((op_ret == -1) && !dht_inode_missing(op_errno)) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
+ prev->name);
+
+ goto out;
+ }
+
+ if (local->call_cnt != 1) {
+ if (local->stbuf.ia_blocks) {
+ dht_iatt_merge(this, postbuf, &local->stbuf);
+ dht_iatt_merge(this, prebuf, &local->prebuf);
}
+ goto out;
+ }
- STACK_WIND (frame, dht_truncate_cbk,
- subvol, subvol->fops->truncate,
- loc, offset, xdata);
+ local->rebalance.target_op_fn = dht_discard2;
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
- return 0;
+ dht_set_local_rebalance(this, local, NULL, prebuf, postbuf, xdata);
+
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2(postbuf)) {
+ ret = dht_rebalance_complete_check(this, frame);
+ if (!ret)
+ return 0;
+ }
+
+ /* Check if the rebalance phase1 is true */
+ if (IS_DHT_MIGRATION_PHASE1(postbuf)) {
+ dht_iatt_merge(this, &local->stbuf, postbuf);
+ dht_iatt_merge(this, &local->prebuf, prebuf);
+
+ dht_inode_ctx_get_mig_info(this, local->fd->inode, &src_subvol,
+ &dst_subvol);
+ if (!dht_mig_info_is_invalid(local->cached_subvol, src_subvol,
+ dst_subvol)) {
+ if (dht_fd_open_on_dst(this, local->fd, dst_subvol)) {
+ dht_discard2(this, dst_subvol, frame, 0);
+ return 0;
+ }
+ }
+ ret = dht_rebalance_in_progress_check(this, frame);
+ if (!ret)
+ return 0;
+ }
+
+out:
+ DHT_STRIP_PHASE1_FLAGS(postbuf);
+ DHT_STRIP_PHASE1_FLAGS(prebuf);
+ DHT_STACK_UNWIND(discard, frame, op_ret, op_errno, prebuf, postbuf, xdata);
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+static int
+dht_discard2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
+{
+ dht_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
+
+ if (!frame || !frame->local)
+ goto out;
+
+ local = frame->local;
+ op_errno = local->op_errno;
+
+ if (we_are_not_migrating(ret)) {
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
+ */
+ DHT_STACK_UNWIND(discard, frame, local->op_ret, local->op_errno,
+ &local->rebalance.prebuf, &local->rebalance.postbuf,
+ local->rebalance.xdata);
return 0;
+ }
+
+ if (subvol == NULL)
+ goto out;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ STACK_WIND_COOKIE(frame, dht_discard_cbk, subvol, subvol,
+ subvol->fops->discard, local->fd, local->rebalance.offset,
+ local->rebalance.size, local->xattr_req);
+
+ return 0;
+
+out:
+ DHT_STACK_UNWIND(discard, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
int
-dht_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- dict_t *xdata)
+dht_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- local = dht_local_init (frame, NULL, fd, GF_FOP_FTRUNCATE);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
- local->rebalance.offset = offset;
- local->call_cnt = 1;
- subvol = local->cached_subvol;
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
- STACK_WIND (frame, dht_truncate_cbk,
- subvol, subvol->fops->ftruncate,
- fd, offset, xdata);
+ local = dht_local_init(frame, NULL, fd, GF_FOP_DISCARD);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- return 0;
+ local->rebalance.offset = offset;
+ local->rebalance.size = len;
+
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
+ STACK_WIND_COOKIE(frame, dht_discard_cbk, subvol, subvol,
+ subvol->fops->discard, fd, local->rebalance.offset,
+ local->rebalance.size, local->xattr_req);
+
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(discard, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
+ return 0;
}
-/* handle cases of migration here for 'setattr()' calls */
int
-dht_file_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+dht_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- int ret = -1;
-
- local = frame->local;
- prev = cookie;
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ int ret = -1;
+ xlator_t *subvol1 = NULL, *subvol2 = NULL;
+
+ GF_VALIDATE_OR_GOTO("dht", frame, err);
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+
+ /* zerofill fails with EBADF if dht has not yet opened the fd
+ * on the cached subvol. This could happen if the file was migrated
+ * and a lookup updated the cached subvol in the inode ctx.
+ * We only check once as this could actually be a valid error.
+ */
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
+ return 0;
+ }
+ if ((op_ret == -1) && !dht_inode_missing(op_errno)) {
local->op_errno = op_errno;
- if ((op_ret == -1) && (op_errno != ENOENT)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto out;
+ local->op_ret = -1;
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
+ prev->name);
+ goto out;
+ }
+
+ if (local->call_cnt != 1) {
+ if (local->stbuf.ia_blocks) {
+ dht_iatt_merge(this, postbuf, &local->stbuf);
+ dht_iatt_merge(this, prebuf, &local->prebuf);
}
+ goto out;
+ }
- if (local->call_cnt != 1)
- goto out;
+ local->rebalance.target_op_fn = dht_zerofill2;
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
- local->rebalance.target_op_fn = dht_setattr2;
+ dht_set_local_rebalance(this, local, NULL, prebuf, postbuf, xdata);
- /* Phase 2 of migration */
- if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (postbuf)) {
- ret = dht_rebalance_complete_check (this, frame);
- if (!ret)
- return 0;
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2(postbuf)) {
+ ret = dht_rebalance_complete_check(this, frame);
+ if (!ret)
+ return 0;
+ }
+
+ /* Check if the rebalance phase1 is true */
+ if (IS_DHT_MIGRATION_PHASE1(postbuf)) {
+ dht_iatt_merge(this, &local->stbuf, postbuf);
+ dht_iatt_merge(this, &local->prebuf, prebuf);
+
+ ret = dht_inode_ctx_get_mig_info(this, local->fd->inode, &subvol1,
+ &subvol2);
+ if (!dht_mig_info_is_invalid(local->cached_subvol, subvol1, subvol2)) {
+ if (dht_fd_open_on_dst(this, local->fd, subvol2)) {
+ dht_zerofill2(this, subvol2, frame, 0);
+ return 0;
+ }
}
- /* At the end of the migration process, whatever 'attr' we
- have on source file will be migrated to destination file
- in one shot, hence we don't need to check for in progress
- state here (ie, PHASE1) */
+ ret = dht_rebalance_in_progress_check(this, frame);
+ if (!ret)
+ return 0;
+ }
+
out:
- DHT_STRIP_PHASE1_FLAGS (postbuf);
- DHT_STRIP_PHASE1_FLAGS (prebuf);
- DHT_STACK_UNWIND (setattr, frame, op_ret, op_errno,
- prebuf, postbuf, xdata);
+ DHT_STRIP_PHASE1_FLAGS(postbuf);
+ DHT_STRIP_PHASE1_FLAGS(prebuf);
- return 0;
+ DHT_STACK_UNWIND(zerofill, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+err:
+ return 0;
}
-int
-dht_setattr2 (xlator_t *this, call_frame_t *frame, int op_ret)
+static int
+dht_zerofill2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
- dht_local_t *local = NULL;
- xlator_t *subvol = NULL;
- uint64_t tmp_subvol = 0;
- int ret = -1;
+ dht_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
- local = frame->local;
+ if (!frame || !frame->local)
+ goto out;
- if (local->fd)
- ret = fd_ctx_get (local->fd, this, &tmp_subvol);
- if (!ret)
- subvol = (xlator_t *)(long)tmp_subvol;
-
- if (!subvol)
- subvol = local->cached_subvol;
-
- local->call_cnt = 2; /* This is the second attempt */
-
- if (local->fop == GF_FOP_SETATTR) {
- STACK_WIND (frame, dht_file_setattr_cbk, subvol,
- subvol->fops->setattr, &local->loc,
- &local->rebalance.stbuf, local->rebalance.flags,
- NULL);
- } else {
- STACK_WIND (frame, dht_file_setattr_cbk, subvol,
- subvol->fops->fsetattr, local->fd,
- &local->rebalance.stbuf, local->rebalance.flags,
- NULL);
- }
+ local = frame->local;
+
+ op_errno = local->op_errno;
+
+ if (we_are_not_migrating(ret)) {
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
+ */
+ DHT_STACK_UNWIND(zerofill, frame, local->op_ret, local->op_errno,
+ &local->rebalance.prebuf, &local->rebalance.postbuf,
+ local->rebalance.xdata);
return 0;
-}
+ }
+ if (subvol == NULL)
+ goto out;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ STACK_WIND_COOKIE(frame, dht_zerofill_cbk, subvol, subvol,
+ subvol->fops->zerofill, local->fd,
+ local->rebalance.offset, local->rebalance.size,
+ local->xattr_req);
+
+ return 0;
+
+out:
+
+ DHT_STACK_UNWIND(zerofill, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
-/* Keep the existing code same for all the cases other than regular file */
int
-dht_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *statpre,
- struct iatt *statpost, dict_t *xdata)
+dht_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
-
-
- local = frame->local;
- prev = cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto unlock;
- }
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
- dht_iatt_merge (this, &local->prebuf, statpre, prev->this);
- dht_iatt_merge (this, &local->stbuf, statpost, prev->this);
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
- local->op_ret = 0;
- }
-unlock:
- UNLOCK (&frame->lock);
+ local = dht_local_init(frame, NULL, fd, GF_FOP_ZEROFILL);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt))
- DHT_STACK_UNWIND (setattr, frame, local->op_ret, local->op_errno,
- &local->prebuf, &local->stbuf, xdata);
+ local->rebalance.offset = offset;
+ local->rebalance.size = len;
- return 0;
-}
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
+ STACK_WIND_COOKIE(frame, dht_zerofill_cbk, subvol, subvol,
+ subvol->fops->zerofill, fd, local->rebalance.offset,
+ local->rebalance.size, local->xattr_req);
+ return 0;
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(zerofill, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+/* handle cases of migration here for 'setattr()' calls */
int
-dht_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid, dict_t *xdata)
+dht_file_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- dht_layout_t *layout = NULL;
- dht_local_t *local = NULL;
- int op_errno = -1;
- int i = -1;
- int call_cnt = 0;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- local = dht_local_init (frame, loc, NULL, GF_FOP_SETATTR);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ int ret = -1;
- layout = local->layout;
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no layout for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ local = frame->local;
+ prev = cookie;
- if (!layout_is_sane (layout)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "layout is not sane for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ local->op_errno = op_errno;
- if (IA_ISREG (loc->inode->ia_type)) {
- /* in the regular file _cbk(), we need to check for
- migration possibilities */
- local->rebalance.stbuf = *stbuf;
- local->rebalance.flags = valid;
- local->call_cnt = 1;
- subvol = local->cached_subvol;
+ if ((local->fop == GF_FOP_FSETATTR) &&
+ dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
+ return 0;
+ }
- STACK_WIND (frame, dht_file_setattr_cbk, subvol,
- subvol->fops->setattr,
- loc, stbuf, valid, xdata);
+ if ((op_ret == -1) && !dht_inode_missing(op_errno)) {
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
+ prev->name);
+ goto out;
+ }
- return 0;
- }
+ if (local->call_cnt != 1)
+ goto out;
- local->call_cnt = call_cnt = layout->cnt;
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
- for (i = 0; i < call_cnt; i++) {
- STACK_WIND (frame, dht_setattr_cbk,
- layout->list[i].xlator,
- layout->list[i].xlator->fops->setattr,
- loc, stbuf, valid, xdata);
- }
+ local->rebalance.target_op_fn = dht_setattr2;
- return 0;
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2(postbuf)) {
+ dht_set_local_rebalance(this, local, NULL, prebuf, postbuf, xdata);
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL, NULL);
+ ret = dht_rebalance_complete_check(this, frame);
+ if (!ret)
+ return 0;
+ }
- return 0;
+ /* At the end of the migration process, whatever 'attr' we
+ have on source file will be migrated to destination file
+ in one shot, hence we don't need to check for in progress
+ state here (ie, PHASE1) */
+out:
+ DHT_STRIP_PHASE1_FLAGS(postbuf);
+ DHT_STRIP_PHASE1_FLAGS(prebuf);
+
+ DHT_STACK_UNWIND(setattr, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+
+ return 0;
}
+static int
+dht_setattr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
+{
+ dht_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
+
+ if (!frame || !frame->local)
+ goto out;
+
+ local = frame->local;
+ op_errno = local->op_errno;
+
+ if (we_are_not_migrating(ret)) {
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
+ */
+ DHT_STACK_UNWIND(setattr, frame, local->op_ret, local->op_errno,
+ &local->rebalance.prebuf, &local->rebalance.postbuf,
+ local->rebalance.xdata);
+ return 0;
+ }
+
+ if (subvol == NULL)
+ goto out;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ if (local->fop == GF_FOP_SETATTR) {
+ STACK_WIND_COOKIE(frame, dht_file_setattr_cbk, subvol, subvol,
+ subvol->fops->setattr, &local->loc,
+ &local->rebalance.stbuf, local->rebalance.flags,
+ local->xattr_req);
+ } else {
+ STACK_WIND_COOKIE(frame, dht_file_setattr_cbk, subvol, subvol,
+ subvol->fops->fsetattr, local->fd,
+ &local->rebalance.stbuf, local->rebalance.flags,
+ local->xattr_req);
+ }
+
+ return 0;
+
+out:
+ DHT_STACK_UNWIND(setattr, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+/* Keep the existing code same for all the cases other than regular file */
int
-dht_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
- int32_t valid, dict_t *xdata)
+dht_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *statpre, struct iatt *statpost,
+ dict_t *xdata)
{
- xlator_t *subvol = NULL;
- dht_layout_t *layout = NULL;
- dht_local_t *local = NULL;
- int op_errno = -1;
- int i = -1;
- int call_cnt = 0;
-
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- local = dht_local_init (frame, NULL, fd, GF_FOP_FSETATTR);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ xlator_t *prev = NULL;
- layout = local->layout;
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no layout for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ local = frame->local;
+ prev = cookie;
- if (!layout_is_sane (layout)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "layout is not sane for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
+ LOCK(&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ UNLOCK(&frame->lock);
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
+ prev->name);
+ goto post_unlock;
}
- if (IA_ISREG (fd->inode->ia_type)) {
- /* in the regular file _cbk(), we need to check for
- migration possibilities */
- local->rebalance.stbuf = *stbuf;
- local->rebalance.flags = valid;
- local->call_cnt = 1;
- subvol = local->cached_subvol;
+ dht_iatt_merge(this, &local->prebuf, statpre);
+ dht_iatt_merge(this, &local->stbuf, statpost);
+
+ local->op_ret = 0;
+ local->op_errno = 0;
+ }
+ UNLOCK(&frame->lock);
+post_unlock:
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt)) {
+ if (local->op_ret == 0)
+ dht_inode_ctx_time_set(local->loc.inode, this, &local->stbuf);
+ DHT_STACK_UNWIND(setattr, frame, local->op_ret, local->op_errno,
+ &local->prebuf, &local->stbuf, xdata);
+ }
+
+ return 0;
+}
+
+/* Keep the existing code same for all the cases other than regular file */
+int
+dht_non_mds_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ xlator_t *prev = NULL;
+
+ local = frame->local;
+ prev = cookie;
+
+ if (op_ret == -1) {
+ gf_msg(this->name, op_errno, 0, 0, "subvolume %s returned -1",
+ prev->name);
+ goto post_unlock;
+ }
+
+ LOCK(&frame->lock);
+ {
+ dht_iatt_merge(this, &local->prebuf, statpre);
+ dht_iatt_merge(this, &local->stbuf, statpost);
+
+ local->op_ret = 0;
+ local->op_errno = 0;
+ }
+ UNLOCK(&frame->lock);
+post_unlock:
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt)) {
+ dht_inode_ctx_time_set(local->loc.inode, this, &local->stbuf);
+ DHT_STACK_UNWIND(setattr, frame, 0, 0, &local->prebuf, &local->stbuf,
+ xdata);
+ }
+
+ return 0;
+}
- STACK_WIND (frame, dht_file_setattr_cbk, subvol,
- subvol->fops->fsetattr,
- fd, stbuf, valid, xdata);
+int
+dht_mds_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
- return 0;
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ xlator_t *prev = NULL;
+ xlator_t *mds_subvol = NULL;
+ struct iatt loc_stbuf = {
+ 0,
+ };
+ int i = 0;
+
+ local = frame->local;
+ prev = cookie;
+ conf = this->private;
+ mds_subvol = local->mds_subvol;
+
+ if (op_ret == -1) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
+ prev->name);
+ goto out;
+ }
+
+ local->op_ret = 0;
+ loc_stbuf = local->stbuf;
+ dht_iatt_merge(this, &local->prebuf, statpre);
+ dht_iatt_merge(this, &local->stbuf, statpost);
+
+ local->call_cnt = conf->subvolume_cnt - 1;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (mds_subvol == conf->subvolumes[i])
+ continue;
+ STACK_WIND_COOKIE(frame, dht_non_mds_setattr_cbk, conf->subvolumes[i],
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->setattr, &local->loc,
+ &loc_stbuf, local->valid, local->xattr_req);
+ }
+
+ return 0;
+out:
+ DHT_STACK_UNWIND(setattr, frame, local->op_ret, local->op_errno,
+ &local->prebuf, &local->stbuf, xdata);
+
+ return 0;
+}
+
+int
+dht_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ xlator_t *mds_subvol = NULL;
+ dht_layout_t *layout = NULL;
+ dht_local_t *local = NULL;
+ int op_errno = -1;
+ int i = -1;
+ int ret = -1;
+ int call_cnt = 0;
+ dht_conf_t *conf = NULL;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+ VALIDATE_OR_GOTO(loc->inode, err);
+ VALIDATE_OR_GOTO(loc->path, err);
+
+ conf = this->private;
+ local = dht_local_init(frame, loc, NULL, GF_FOP_SETATTR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!layout) {
+ gf_msg_debug(this->name, 0, "no layout for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (!layout_is_sane(layout)) {
+ gf_msg_debug(this->name, 0, "layout is not sane for path=%s",
+ loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
+ if (IA_ISREG(loc->inode->ia_type)) {
+ /* in the regular file _cbk(), we need to check for
+ migration possibilities */
+ local->rebalance.stbuf = *stbuf;
+ local->rebalance.flags = valid;
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+
+ STACK_WIND_COOKIE(frame, dht_file_setattr_cbk, subvol, subvol,
+ subvol->fops->setattr, loc, stbuf, valid, xdata);
+
+ return 0;
+ }
+
+ local->call_cnt = call_cnt = layout->cnt;
+
+ if (IA_ISDIR(loc->inode->ia_type) && !__is_root_gfid(loc->inode->gfid) &&
+ call_cnt != 1) {
+ ret = dht_inode_ctx_mdsvol_get(loc->inode, this, &mds_subvol);
+ if (ret || !mds_subvol) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "Failed to get mds subvol for path %s", local->loc.path);
+ op_errno = EINVAL;
+ goto err;
}
- local->call_cnt = call_cnt = layout->cnt;
+ local->mds_subvol = mds_subvol;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == mds_subvol) {
+ if (!conf->subvolume_status[i]) {
+ gf_msg(this->name, GF_LOG_WARNING, layout->list[i].err,
+ DHT_MSG_HASHED_SUBVOL_DOWN,
+ "MDS subvol is down for path "
+ " %s Unable to set attr ",
+ local->loc.path);
+ op_errno = ENOTCONN;
+ goto err;
+ }
+ }
+ }
+ local->valid = valid;
+ local->stbuf = *stbuf;
+ STACK_WIND_COOKIE(frame, dht_mds_setattr_cbk, local->mds_subvol,
+ local->mds_subvol, local->mds_subvol->fops->setattr,
+ loc, stbuf, valid, xdata);
+ return 0;
+ } else {
for (i = 0; i < call_cnt; i++) {
- STACK_WIND (frame, dht_setattr_cbk,
- layout->list[i].xlator,
- layout->list[i].xlator->fops->fsetattr,
- fd, stbuf, valid, xdata);
+ STACK_WIND_COOKIE(frame, dht_setattr_cbk, layout->list[i].xlator,
+ layout->list[i].xlator,
+ layout->list[i].xlator->fops->setattr, loc, stbuf,
+ valid, xdata);
}
+ }
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fsetattr, frame, -1, op_errno, NULL, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(setattr, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+int
+dht_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ dht_layout_t *layout = NULL;
+ dht_local_t *local = NULL;
+ int op_errno = -1;
+ int i = -1;
+ int call_cnt = 0;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
+
+ local = dht_local_init(frame, NULL, fd, GF_FOP_FSETATTR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!layout) {
+ gf_msg_debug(this->name, 0, "no layout for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (!layout_is_sane(layout)) {
+ gf_msg_debug(this->name, 0, "layout is not sane for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
+ if (IA_ISREG(fd->inode->ia_type)) {
+ /* in the regular file _cbk(), we need to check for
+ migration possibilities */
+ local->rebalance.stbuf = *stbuf;
+ local->rebalance.flags = valid;
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+ STACK_WIND_COOKIE(frame, dht_file_setattr_cbk, subvol, subvol,
+ subvol->fops->fsetattr, fd, &local->rebalance.stbuf,
+ local->rebalance.flags, local->xattr_req);
return 0;
+ }
+
+ local->call_cnt = call_cnt = layout->cnt;
+
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND_COOKIE(frame, dht_setattr_cbk, layout->list[i].xlator,
+ layout->list[i].xlator,
+ layout->list[i].xlator->fops->fsetattr, fd, stbuf,
+ valid, xdata);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(fsetattr, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
}
diff --git a/xlators/cluster/dht/src/dht-layout.c b/xlators/cluster/dht/src/dht-layout.c
index 38afa892c07..fda904c92c9 100644
--- a/xlators/cluster/dht/src/dht-layout.c
+++ b/xlators/cluster/dht/src/dht-layout.c
@@ -8,735 +8,801 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-
-#include "glusterfs.h"
-#include "xlator.h"
#include "dht-common.h"
-#include "byte-order.h"
+#include <glusterfs/byte-order.h>
+#include "unittest/unittest.h"
-#define layout_base_size (sizeof (dht_layout_t))
+#define layout_base_size (sizeof(dht_layout_t))
-#define layout_entry_size (sizeof ((dht_layout_t *)NULL)->list[0])
+#define layout_entry_size (sizeof((dht_layout_t *)NULL)->list[0])
#define layout_size(cnt) (layout_base_size + (cnt * layout_entry_size))
-
dht_layout_t *
-dht_layout_new (xlator_t *this, int cnt)
+dht_layout_new(xlator_t *this, int cnt)
{
- dht_layout_t *layout = NULL;
- dht_conf_t *conf = NULL;
+ dht_layout_t *layout = NULL;
+ dht_conf_t *conf = NULL;
+ REQUIRE(NULL != this);
+ REQUIRE(cnt >= 0);
- conf = this->private;
+ conf = this->private;
- layout = GF_CALLOC (1, layout_size (cnt),
- gf_dht_mt_dht_layout_t);
- if (!layout) {
- goto out;
- }
+ layout = GF_CALLOC(1, layout_size(cnt), gf_dht_mt_dht_layout_t);
+ if (!layout) {
+ goto out;
+ }
- layout->type = DHT_HASH_TYPE_DM;
- layout->cnt = cnt;
+ layout->type = DHT_HASH_TYPE_DM;
+ layout->cnt = cnt;
- if (conf) {
- layout->spread_cnt = conf->dir_spread_cnt;
- layout->gen = conf->gen;
- }
+ if (conf) {
+ layout->spread_cnt = conf->dir_spread_cnt;
+ layout->gen = conf->gen;
+ }
+
+ GF_ATOMIC_INIT(layout->ref, 1);
- layout->ref = 1;
+ ENSURE(NULL != layout);
+ ENSURE(layout->type == DHT_HASH_TYPE_DM);
+ ENSURE(layout->cnt == cnt);
+ ENSURE(GF_ATOMIC_GET(layout->ref) == 1);
out:
- return layout;
+ return layout;
}
-
dht_layout_t *
-dht_layout_get (xlator_t *this, inode_t *inode)
+dht_layout_get(xlator_t *this, inode_t *inode)
{
- dht_conf_t *conf = NULL;
- uint64_t layout_int = 0;
- dht_layout_t *layout = NULL;
- int ret = -1;
-
- conf = this->private;
- if (!conf)
- goto out;
-
- LOCK (&conf->layout_lock);
- {
- ret = inode_ctx_get (inode, this, &layout_int);
- if (ret == 0) {
- layout = (dht_layout_t *) (unsigned long) layout_int;
- layout->ref++;
- }
- }
- UNLOCK (&conf->layout_lock);
-
-out:
- return layout;
+ dht_layout_t *layout = NULL;
+ int ret = 0;
+
+ ret = dht_inode_ctx_layout_get(inode, this, &layout);
+ if ((!ret) && layout) {
+ GF_ATOMIC_INC(layout->ref);
+ }
+ return layout;
}
-
int
-dht_layout_set (xlator_t *this, inode_t *inode, dht_layout_t *layout)
+dht_layout_set(xlator_t *this, inode_t *inode, dht_layout_t *layout)
{
- dht_conf_t *conf = NULL;
- int oldret = -1;
- int ret = 0;
- dht_layout_t *old_layout;
- uint64_t old_layout_int;
-
- conf = this->private;
- if (!conf)
- goto out;
-
- LOCK (&conf->layout_lock);
- {
- oldret = inode_ctx_get (inode, this, &old_layout_int);
-
- layout->ref++;
- ret = inode_ctx_put (inode, this, (uint64_t) (unsigned long)
- layout);
- }
- UNLOCK (&conf->layout_lock);
-
- if (oldret == 0) {
- old_layout = (dht_layout_t *) (unsigned long) old_layout_int;
- dht_layout_unref (this, old_layout);
- }
+ dht_conf_t *conf = NULL;
+ int oldret = -1;
+ int ret = -1;
+ dht_layout_t *old_layout;
+
+ conf = this->private;
+ if (!conf || !layout)
+ goto out;
+
+ LOCK(&conf->layout_lock);
+ {
+ oldret = dht_inode_ctx_layout_get(inode, this, &old_layout);
+ if (layout)
+ GF_ATOMIC_INC(layout->ref);
+ ret = dht_inode_ctx_layout_set(inode, this, layout);
+ }
+ UNLOCK(&conf->layout_lock);
+
+ if (!oldret) {
+ dht_layout_unref(this, old_layout);
+ }
+ if (ret)
+ GF_ATOMIC_DEC(layout->ref);
out:
- return ret;
+ return ret;
}
-
void
-dht_layout_unref (xlator_t *this, dht_layout_t *layout)
+dht_layout_unref(xlator_t *this, dht_layout_t *layout)
{
- dht_conf_t *conf = NULL;
- int ref = 0;
+ int ref = 0;
- if (layout->preset || !this->private)
- return;
+ if (!layout || layout->preset || !this->private)
+ return;
- conf = this->private;
-
- LOCK (&conf->layout_lock);
- {
- ref = --layout->ref;
- }
- UNLOCK (&conf->layout_lock);
+ ref = GF_ATOMIC_DEC(layout->ref);
- if (!ref)
- GF_FREE (layout);
+ if (!ref)
+ GF_FREE(layout);
}
-
dht_layout_t *
-dht_layout_ref (xlator_t *this, dht_layout_t *layout)
+dht_layout_ref(xlator_t *this, dht_layout_t *layout)
{
- dht_conf_t *conf = NULL;
-
- if (layout->preset || !this->private)
- return layout;
+ if (layout->preset || !this->private)
+ return layout;
- conf = this->private;
- LOCK (&conf->layout_lock);
- {
- layout->ref++;
- }
- UNLOCK (&conf->layout_lock);
+ GF_ATOMIC_INC(layout->ref);
- return layout;
+ return layout;
}
-
xlator_t *
-dht_layout_search (xlator_t *this, dht_layout_t *layout, const char *name)
+dht_layout_search(xlator_t *this, dht_layout_t *layout, const char *name)
{
- uint32_t hash = 0;
- xlator_t *subvol = NULL;
- int i = 0;
- int ret = 0;
-
-
- ret = dht_hash_compute (layout->type, name, &hash);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "hash computation failed for type=%d name=%s",
- layout->type, name);
- goto out;
- }
-
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].start <= hash
- && layout->list[i].stop >= hash) {
- subvol = layout->list[i].xlator;
- break;
- }
- }
-
- if (!subvol) {
- gf_log (this->name, GF_LOG_WARNING,
- "no subvolume for hash (value) = %u", hash);
- }
+ uint32_t hash = 0;
+ xlator_t *subvol = NULL;
+ int i = 0;
+ int ret = 0;
+
+ ret = dht_hash_compute(this, layout->type, name, &hash);
+ if (ret != 0) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_COMPUTE_HASH_FAILED,
+ "type=%d", layout->type, "name=%s", name, NULL);
+ goto out;
+ }
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].start <= hash && layout->list[i].stop >= hash) {
+ subvol = layout->list[i].xlator;
+ break;
+ }
+ }
+
+ if (!subvol) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "hash-value=0x%x", hash, NULL);
+ }
out:
- return subvol;
+ return subvol;
}
-
dht_layout_t *
-dht_layout_for_subvol (xlator_t *this, xlator_t *subvol)
+dht_layout_for_subvol(xlator_t *this, xlator_t *subvol)
{
- dht_conf_t *conf = NULL;
- dht_layout_t *layout = NULL;
- int i = 0;
-
- conf = this->private;
- if (!conf)
- goto out;
-
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (conf->subvolumes[i] == subvol) {
- layout = conf->file_layouts[i];
- break;
- }
+ dht_conf_t *conf = NULL;
+ dht_layout_t *layout = NULL;
+ int i = 0;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == subvol) {
+ layout = conf->file_layouts[i];
+ break;
}
+ }
out:
- return layout;
+ return layout;
}
-
int
-dht_layouts_init (xlator_t *this, dht_conf_t *conf)
+dht_layouts_init(xlator_t *this, dht_conf_t *conf)
{
- dht_layout_t *layout = NULL;
- int i = 0;
- int ret = -1;
-
- if (!conf)
- goto out;
-
- conf->file_layouts = GF_CALLOC (conf->subvolume_cnt,
- sizeof (dht_layout_t *),
- gf_dht_mt_dht_layout_t);
- if (!conf->file_layouts) {
- goto out;
- }
+ dht_layout_t *layout = NULL;
+ int i = 0;
+ int ret = -1;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- layout = dht_layout_new (this, 1);
-
- if (!layout) {
- goto out;
- }
+ if (!conf)
+ goto out;
- layout->preset = 1;
+ conf->file_layouts = GF_CALLOC(conf->subvolume_cnt, sizeof(dht_layout_t *),
+ gf_dht_mt_dht_layout_t);
+ if (!conf->file_layouts) {
+ goto out;
+ }
- layout->list[0].xlator = conf->subvolumes[i];
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ layout = dht_layout_new(this, 1);
- conf->file_layouts[i] = layout;
+ if (!layout) {
+ goto out;
}
- ret = 0;
+ layout->preset = 1;
+
+ layout->list[0].xlator = conf->subvolumes[i];
+
+ conf->file_layouts[i] = layout;
+ }
+
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
int
-dht_disk_layout_extract (xlator_t *this, dht_layout_t *layout,
- int pos, int32_t **disk_layout_p)
+dht_disk_layout_extract(xlator_t *this, dht_layout_t *layout, int pos,
+ int32_t **disk_layout_p)
{
- int ret = -1;
- int32_t *disk_layout = NULL;
+ int ret = -1;
+ int32_t *disk_layout = NULL;
- disk_layout = GF_CALLOC (5, sizeof (int),
- gf_dht_mt_int32_t);
- if (!disk_layout) {
- goto out;
- }
+ disk_layout = GF_CALLOC(5, sizeof(int), gf_dht_mt_int32_t);
+ if (!disk_layout) {
+ goto out;
+ }
- disk_layout[0] = hton32 (1);
- disk_layout[1] = hton32 (layout->type);
- disk_layout[2] = hton32 (layout->list[pos].start);
- disk_layout[3] = hton32 (layout->list[pos].stop);
+ disk_layout[0] = hton32(layout->list[pos].commit_hash);
+ disk_layout[1] = hton32(layout->type);
+ disk_layout[2] = hton32(layout->list[pos].start);
+ disk_layout[3] = hton32(layout->list[pos].stop);
- if (disk_layout_p)
- *disk_layout_p = disk_layout;
- else
- GF_FREE (disk_layout);
+ if (disk_layout_p)
+ *disk_layout_p = disk_layout;
+ else
+ GF_FREE(disk_layout);
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
int
-dht_disk_layout_merge (xlator_t *this, dht_layout_t *layout,
- int pos, void *disk_layout_raw, int disk_layout_len)
+dht_disk_layout_extract_for_subvol(xlator_t *this, dht_layout_t *layout,
+ xlator_t *subvol, int32_t **disk_layout_p)
{
- int cnt = 0;
- int type = 0;
- int start_off = 0;
- int stop_off = 0;
- int disk_layout[4];
-
- if (!disk_layout_raw) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "error no layout on disk for merge");
- return -1;
- }
-
- GF_ASSERT (disk_layout_len == sizeof (disk_layout));
-
- memcpy (disk_layout, disk_layout_raw, disk_layout_len);
-
- cnt = ntoh32 (disk_layout[0]);
- if (cnt != 1) {
- gf_log (this->name, GF_LOG_ERROR,
- "disk layout has invalid count %d", cnt);
- return -1;
- }
+ int i = 0;
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].xlator == subvol)
+ break;
+ }
+
+ if (i == layout->cnt)
+ return -1;
- switch (disk_layout[1]) {
- case DHT_HASH_TYPE_DM:
- type = ntoh32 (disk_layout[1]);
- break;
+ return dht_disk_layout_extract(this, layout, i, disk_layout_p);
+}
+
+static int
+dht_disk_layout_merge(xlator_t *this, dht_layout_t *layout, int pos,
+ void *disk_layout_raw, int disk_layout_len)
+{
+ int type = 0;
+ int start_off = 0;
+ int stop_off = 0;
+ int commit_hash = 0;
+ int disk_layout[4];
+
+ if (!disk_layout_raw) {
+ gf_smsg(this->name, GF_LOG_CRITICAL, 0, DHT_MSG_LAYOUT_MERGE_FAILED,
+ NULL);
+ return -1;
+ }
+
+ GF_ASSERT(disk_layout_len == sizeof(disk_layout));
+
+ memcpy(disk_layout, disk_layout_raw, disk_layout_len);
+
+ type = ntoh32(disk_layout[1]);
+ switch (type) {
+ case DHT_HASH_TYPE_DM_USER:
+ gf_msg_debug(this->name, 0, "found user-set layout");
+ layout->type = type;
+ /* Fall through. */
+ case DHT_HASH_TYPE_DM:
+ break;
default:
- gf_log (this->name, GF_LOG_CRITICAL,
- "Catastrophic error layout with unknown type found %d",
- disk_layout[1]);
- return -1;
- }
+ gf_smsg(this->name, GF_LOG_CRITICAL, 0, DHT_MSG_INVALID_DISK_LAYOUT,
+ "layout=%d", disk_layout[1], NULL);
+ return -1;
+ }
- start_off = ntoh32 (disk_layout[2]);
- stop_off = ntoh32 (disk_layout[3]);
+ commit_hash = ntoh32(disk_layout[0]);
+ start_off = ntoh32(disk_layout[2]);
+ stop_off = ntoh32(disk_layout[3]);
- layout->list[pos].start = start_off;
- layout->list[pos].stop = stop_off;
+ layout->list[pos].commit_hash = commit_hash;
+ layout->list[pos].start = start_off;
+ layout->list[pos].stop = stop_off;
- gf_log (this->name, GF_LOG_TRACE,
- "merged to layout: %u - %u (type %d) from %s",
- start_off, stop_off, type,
- layout->list[pos].xlator->name);
+ gf_msg_trace(this->name, 0,
+ "merged to layout: 0x%x - 0x%x (hash 0x%x, type %d) from %s",
+ start_off, stop_off, commit_hash, type,
+ layout->list[pos].xlator->name);
- return 0;
+ return 0;
}
-
int
-dht_layout_merge (xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
- int op_ret, int op_errno, dict_t *xattr)
+dht_layout_merge(xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
+ int op_ret, int op_errno, dict_t *xattr)
{
- int i = 0;
- int ret = -1;
- int err = -1;
- void *disk_layout_raw = NULL;
- int disk_layout_len = 0;
-
- if (op_ret != 0) {
- err = op_errno;
- }
+ int i = 0;
+ int ret = -1;
+ int err = -1;
+ void *disk_layout_raw = NULL;
+ int disk_layout_len = 0;
+ dht_conf_t *conf = this->private;
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].xlator == NULL) {
- layout->list[i].err = err;
- layout->list[i].xlator = subvol;
- break;
- }
- }
+ if (op_ret != 0) {
+ err = op_errno;
+ }
- if (op_ret != 0) {
- ret = 0;
- goto out;
- }
+ if (!layout)
+ goto out;
- if (xattr) {
- /* during lookup and not mkdir */
- ret = dict_get_ptr_and_len (xattr, "trusted.glusterfs.dht",
- &disk_layout_raw, &disk_layout_len);
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].xlator == NULL) {
+ layout->list[i].err = err;
+ layout->list[i].xlator = subvol;
+ break;
}
+ }
- if (ret != 0) {
- layout->list[i].err = -1;
- gf_log (this->name, GF_LOG_TRACE,
- "missing disk layout on %s. err = %d",
- subvol->name, err);
- ret = 0;
- goto out;
- }
+ if (op_ret != 0) {
+ ret = 0;
+ goto out;
+ }
- ret = dht_disk_layout_merge (this, layout, i, disk_layout_raw,
- disk_layout_len);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "layout merge from subvolume %s failed",
- subvol->name);
- goto out;
- }
+ if (xattr) {
+ /* during lookup and not mkdir */
+ ret = dict_get_ptr_and_len(xattr, conf->xattr_name, &disk_layout_raw,
+ &disk_layout_len);
+ }
+
+ if (ret != 0) {
layout->list[i].err = 0;
+ gf_msg_trace(this->name, 0, "Missing disk layout on %s. err = %d",
+ subvol->name, err);
+ ret = 0;
+ goto out;
+ }
+
+ ret = dht_disk_layout_merge(this, layout, i, disk_layout_raw,
+ disk_layout_len);
+ if (ret != 0) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_MERGE_FAILED,
+ "subvolume=%s", subvol->name, NULL);
+ goto out;
+ }
+
+ if (layout->commit_hash == 0) {
+ layout->commit_hash = layout->list[i].commit_hash;
+ } else if (layout->commit_hash != layout->list[i].commit_hash) {
+ layout->commit_hash = DHT_LAYOUT_HASH_INVALID;
+ }
+
+ layout->list[i].err = 0;
out:
- return ret;
+ return ret;
}
+void
+dht_layout_entry_swap(dht_layout_t *layout, int i, int j)
+{
+ uint32_t start_swap = 0;
+ uint32_t stop_swap = 0;
+ uint32_t commit_hash_swap = 0;
+ xlator_t *xlator_swap = 0;
+ int err_swap = 0;
+
+ start_swap = layout->list[i].start;
+ stop_swap = layout->list[i].stop;
+ xlator_swap = layout->list[i].xlator;
+ err_swap = layout->list[i].err;
+ commit_hash_swap = layout->list[i].commit_hash;
+
+ layout->list[i].start = layout->list[j].start;
+ layout->list[i].stop = layout->list[j].stop;
+ layout->list[i].xlator = layout->list[j].xlator;
+ layout->list[i].err = layout->list[j].err;
+ layout->list[i].commit_hash = layout->list[j].commit_hash;
+
+ layout->list[j].start = start_swap;
+ layout->list[j].stop = stop_swap;
+ layout->list[j].xlator = xlator_swap;
+ layout->list[j].err = err_swap;
+ layout->list[j].commit_hash = commit_hash_swap;
+}
void
-dht_layout_entry_swap (dht_layout_t *layout, int i, int j)
+dht_layout_range_swap(dht_layout_t *layout, int i, int j)
{
- uint32_t start_swap = 0;
- uint32_t stop_swap = 0;
- xlator_t *xlator_swap = 0;
- int err_swap = 0;
-
- start_swap = layout->list[i].start;
- stop_swap = layout->list[i].stop;
- xlator_swap = layout->list[i].xlator;
- err_swap = layout->list[i].err;
-
- layout->list[i].start = layout->list[j].start;
- layout->list[i].stop = layout->list[j].stop;
- layout->list[i].xlator = layout->list[j].xlator;
- layout->list[i].err = layout->list[j].err;
-
- layout->list[j].start = start_swap;
- layout->list[j].stop = stop_swap;
- layout->list[j].xlator = xlator_swap;
- layout->list[j].err = err_swap;
+ uint32_t start_swap = 0;
+ uint32_t stop_swap = 0;
+
+ start_swap = layout->list[i].start;
+ stop_swap = layout->list[i].stop;
+
+ layout->list[i].start = layout->list[j].start;
+ layout->list[i].stop = layout->list[j].stop;
+
+ layout->list[j].start = start_swap;
+ layout->list[j].stop = stop_swap;
+}
+static int64_t
+dht_layout_entry_cmp_volname(dht_layout_t *layout, int i, int j)
+{
+ return (strcmp(layout->list[i].xlator->name, layout->list[j].xlator->name));
}
-int64_t
-dht_layout_entry_cmp_volname (dht_layout_t *layout, int i, int j)
+gf_boolean_t
+dht_is_subvol_in_layout(dht_layout_t *layout, xlator_t *xlator)
{
- return (strcmp (layout->list[i].xlator->name,
- layout->list[j].xlator->name));
+ int i = 0;
+
+ for (i = 0; i < layout->cnt; i++) {
+ /* Check if xlator is already part of layout, and layout is
+ * non-zero. */
+ if (!strcmp(layout->list[i].xlator->name, xlator->name)) {
+ if (layout->list[i].start != layout->list[i].stop)
+ return _gf_true;
+ break;
+ }
+ }
+ return _gf_false;
}
-int64_t
-dht_layout_entry_cmp (dht_layout_t *layout, int i, int j)
+static int64_t
+dht_layout_entry_cmp(dht_layout_t *layout, int i, int j)
{
- int64_t diff = 0;
+ int64_t diff = 0;
- if (layout->list[i].err || layout->list[j].err)
- diff = layout->list[i].err - layout->list[j].err;
- else
- diff = (int64_t) layout->list[i].start
- - (int64_t) layout->list[j].start;
+ /* swap zero'ed out layouts to front, if needed */
+ if (!layout->list[j].start && !layout->list[j].stop) {
+ diff = (int64_t)layout->list[i].stop - (int64_t)layout->list[j].stop;
+ goto out;
+ }
+ diff = (int64_t)layout->list[i].start - (int64_t)layout->list[j].start;
- return diff;
+out:
+ return diff;
}
-
int
-dht_layout_sort (dht_layout_t *layout)
+dht_layout_sort(dht_layout_t *layout)
{
- int i = 0;
- int j = 0;
- int64_t ret = 0;
+ int i = 0;
+ int j = 0;
+ int64_t ret = 0;
- /* TODO: O(n^2) -- bad bad */
+ /* TODO: O(n^2) -- bad bad */
- for (i = 0; i < layout->cnt - 1; i++) {
- for (j = i + 1; j < layout->cnt; j++) {
- ret = dht_layout_entry_cmp (layout, i, j);
- if (ret > 0)
- dht_layout_entry_swap (layout, i, j);
- }
+ for (i = 0; i < layout->cnt - 1; i++) {
+ for (j = i + 1; j < layout->cnt; j++) {
+ ret = dht_layout_entry_cmp(layout, i, j);
+ if (ret > 0)
+ dht_layout_entry_swap(layout, i, j);
}
+ }
- return 0;
+ return 0;
}
-int
-dht_layout_sort_volname (dht_layout_t *layout)
+void
+dht_layout_sort_volname(dht_layout_t *layout)
{
- int i = 0;
- int j = 0;
- int64_t ret = 0;
+ int i = 0;
+ int j = 0;
+ int64_t ret = 0;
- /* TODO: O(n^2) -- bad bad */
+ /* TODO: O(n^2) -- bad bad */
- for (i = 0; i < layout->cnt - 1; i++) {
- for (j = i + 1; j < layout->cnt; j++) {
- ret = dht_layout_entry_cmp_volname (layout, i, j);
- if (ret > 0)
- dht_layout_entry_swap (layout, i, j);
- }
+ for (i = 0; i < layout->cnt - 1; i++) {
+ for (j = i + 1; j < layout->cnt; j++) {
+ ret = dht_layout_entry_cmp_volname(layout, i, j);
+ if (ret > 0)
+ dht_layout_entry_swap(layout, i, j);
}
-
- return 0;
+ }
}
-
-int
-dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout,
- uint32_t *holes_p, uint32_t *overlaps_p,
- uint32_t *missing_p, uint32_t *down_p, uint32_t *misc_p,
- uint32_t *no_space_p)
+void
+dht_layout_anomalies(xlator_t *this, loc_t *loc, dht_layout_t *layout,
+ uint32_t *holes_p, uint32_t *overlaps_p,
+ uint32_t *missing_p, uint32_t *down_p, uint32_t *misc_p,
+ uint32_t *no_space_p)
{
- uint32_t overlaps = 0;
- uint32_t missing = 0;
- uint32_t down = 0;
- uint32_t misc = 0;
- uint32_t hole_cnt = 0;
- uint32_t overlap_cnt = 0;
- int i = 0;
- int ret = 0;
- uint32_t prev_stop = 0;
- uint32_t last_stop = 0;
- char is_virgin = 1;
- uint32_t no_space = 0;
-
- /* TODO: explain what is happening */
-
- last_stop = layout->list[0].start - 1;
- prev_stop = last_stop;
-
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].err) {
- switch (layout->list[i].err) {
- case -1:
- case ENOENT:
- missing++;
- break;
- case ENOTCONN:
- down++;
- break;
- case ENOSPC:
- no_space++;
- break;
- default:
- misc++;
- }
- continue;
+ uint32_t overlaps = 0;
+ uint32_t missing = 0;
+ uint32_t down = 0;
+ uint32_t misc = 0;
+ uint32_t hole_cnt = 0;
+ uint32_t overlap_cnt = 0;
+ int i = 0;
+ uint32_t prev_stop = 0;
+ uint32_t last_stop = 0;
+ char is_virgin = 1;
+ uint32_t no_space = 0;
+
+ /* This function scans through the layout spread of a directory to
+ check if there are any anomalies. Prior to calling this function
+ the layout entries should be sorted in the ascending order.
+
+ If the layout entry has err != 0
+ then increment the corresponding anomaly.
+ else
+ if (start of the current layout entry > stop + 1 of previous
+ non erroneous layout entry)
+ then it indicates a hole in the layout
+ if (start of the current layout entry < stop + 1 of previous
+ non erroneous layout entry)
+ then it indicates an overlap in the layout
+ */
+ last_stop = layout->list[0].start - 1;
+ prev_stop = last_stop;
+
+ for (i = 0; i < layout->cnt; i++) {
+ switch (layout->list[i].err) {
+ case -1:
+ case ENOENT:
+ case ESTALE:
+ missing++;
+ continue;
+ case ENOTCONN:
+ down++;
+ continue;
+ case ENOSPC:
+ no_space++;
+ continue;
+ case 0:
+ /* if err == 0 and start == stop, then it is a non misc++;
+ * participating subvolume(spread-cnt). Then, do not
+ * check for anomalies. If start != stop, then treat it
+ * as misc err */
+ if (layout->list[i].start == layout->list[i].stop) {
+ continue;
}
+ break;
+ default:
+ misc++;
+ continue;
+ }
- is_virgin = 0;
-
- if ((prev_stop + 1) < layout->list[i].start) {
- hole_cnt++;
- }
+ is_virgin = 0;
- if ((prev_stop + 1) > layout->list[i].start) {
- overlap_cnt++;
- overlaps += ((prev_stop + 1) - layout->list[i].start);
- }
- prev_stop = layout->list[i].stop;
+ if ((prev_stop + 1) < layout->list[i].start) {
+ hole_cnt++;
}
- if ((last_stop - prev_stop) || is_virgin)
- hole_cnt++;
+ if ((prev_stop + 1) > layout->list[i].start) {
+ overlap_cnt++;
+ overlaps += ((prev_stop + 1) - layout->list[i].start);
+ }
+ prev_stop = layout->list[i].stop;
+ }
- if (holes_p)
- *holes_p = hole_cnt;
+ if ((last_stop - prev_stop) || is_virgin)
+ hole_cnt++;
- if (overlaps_p)
- *overlaps_p = overlap_cnt;
+ if (holes_p)
+ *holes_p = hole_cnt;
- if (missing_p)
- *missing_p = missing;
+ if (overlaps_p)
+ *overlaps_p = overlap_cnt;
- if (down_p)
- *down_p = down;
+ if (missing_p)
+ *missing_p = missing;
- if (misc_p)
- *misc_p = misc;
+ if (down_p)
+ *down_p = down;
- if (no_space_p)
- *no_space_p = no_space;
+ if (misc_p)
+ *misc_p = misc;
- return ret;
+ if (no_space_p)
+ *no_space_p = no_space;
}
-
int
-dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout)
+dht_layout_missing_dirs(dht_layout_t *layout)
{
- int ret = 0;
- int i = 0;
- uint32_t holes = 0;
- uint32_t overlaps = 0;
- uint32_t missing = 0;
- uint32_t down = 0;
- uint32_t misc = 0;
-
- ret = dht_layout_sort (layout);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "sort failed?! how the ....");
- goto out;
- }
+ int i = 0, missing = 0;
- ret = dht_layout_anomalies (this, loc, layout,
- &holes, &overlaps,
- &missing, &down, &misc, NULL);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "error while finding anomalies in %s -- not good news",
- loc->path);
- goto out;
- }
+ if (layout == NULL)
+ goto out;
- if (holes || overlaps) {
- if (missing == layout->cnt) {
- gf_log (this->name, GF_LOG_DEBUG,
- "directory %s looked up first time",
- loc->path);
- } else {
- gf_log (this->name, GF_LOG_INFO,
- "found anomalies in %s. holes=%d overlaps=%d",
- loc->path, holes, overlaps);
- }
- ret = -1;
+ for (i = 0; i < layout->cnt; i++) {
+ if ((layout->list[i].err == ENOENT) ||
+ ((layout->list[i].err == -1) && (layout->list[i].start == 0) &&
+ (layout->list[i].stop == 0))) {
+ missing++;
}
+ }
- for (i = 0; i < layout->cnt; i++) {
- /* TODO During DHT selfheal rewrite (almost) find a better place
- * to detect this - probably in dht_layout_anomalies()
- */
- if (layout->list[i].err > 0) {
- gf_log_callingfn (this->name, GF_LOG_DEBUG,
- "path=%s err=%s on subvol=%s",
- loc->path,
- strerror (layout->list[i].err),
- (layout->list[i].xlator ?
- layout->list[i].xlator->name
- : "<>"));
- if ((layout->list[i].err == ENOENT) && (ret >= 0)) {
- ret++;
- }
- }
- }
+out:
+ return missing;
+}
+int
+dht_layout_normalize(xlator_t *this, loc_t *loc, dht_layout_t *layout)
+{
+ int ret = 0;
+ uint32_t holes = 0;
+ uint32_t overlaps = 0;
+ uint32_t missing = 0;
+ uint32_t down = 0;
+ uint32_t misc = 0, missing_dirs = 0;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ ret = dht_layout_sort(layout);
+ if (ret == -1) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_SORT_FAILED,
+ NULL);
+ goto out;
+ }
+
+ gf_uuid_unparse(loc->gfid, gfid);
+
+ dht_layout_anomalies(this, loc, layout, &holes, &overlaps, &missing, &down,
+ &misc, NULL);
+
+ if (holes || overlaps) {
+ if (missing == layout->cnt) {
+ gf_msg_debug(this->name, 0,
+ "Directory %s looked up first time"
+ " gfid = %s",
+ loc->path, gfid);
+ } else {
+ gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_ANOMALIES_INFO,
+ "path=%s", loc->path, "gfid=%s", gfid, "holes=%d", holes,
+ "overlaps=%d", overlaps, NULL);
+ }
+ ret = -1;
+ }
+
+ if (ret >= 0) {
+ missing_dirs = dht_layout_missing_dirs(layout);
+ /* TODO During DHT selfheal rewrite (almost) find a better place
+ * to detect this - probably in dht_layout_anomalies()
+ */
+ if (missing_dirs > 0)
+ ret += missing_dirs;
+ }
out:
- return ret;
+ return ret;
}
-
int
-dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
- loc_t *loc, dict_t *xattr)
+dht_dir_has_layout(dict_t *xattr, char *name)
{
- int idx = 0;
- int pos = -1;
- int ret = 0;
- int err = 0;
- int dict_ret = 0;
- int32_t disk_layout[4];
- void *disk_layout_raw = NULL;
- int32_t count = -1;
- uint32_t start_off = -1;
- uint32_t stop_off = -1;
-
-
- for (idx = 0; idx < layout->cnt; idx++) {
- if (layout->list[idx].xlator == subvol) {
- pos = idx;
- break;
- }
- }
+ void *disk_layout_raw = NULL;
- if (pos == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s - no layout info for subvolume %s",
- loc->path, subvol->name);
- ret = 1;
- goto out;
- }
+ return dict_get_ptr(xattr, name, &disk_layout_raw);
+}
- err = layout->list[pos].err;
+int
+dht_layout_dir_mismatch(xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
+ loc_t *loc, dict_t *xattr)
+{
+ int idx = 0;
+ int pos = -1;
+ int ret = 0;
+ int err = 0;
+ int dict_ret = 0;
+ int32_t disk_layout[4];
+ void *disk_layout_raw = NULL;
+ uint32_t start_off = -1;
+ uint32_t stop_off = -1;
+ uint32_t commit_hash = -1;
+ dht_conf_t *conf = this->private;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ if (loc && loc->inode)
+ gf_uuid_unparse(loc->inode->gfid, gfid);
+
+ for (idx = 0; idx < layout->cnt; idx++) {
+ if (layout->list[idx].xlator == subvol) {
+ pos = idx;
+ break;
+ }
+ }
+
+ if (pos == -1) {
+ if (loc) {
+ gf_msg_debug(this->name, 0, "%s - no layout info for subvolume %s",
+ loc ? loc->path : "path not found", subvol->name);
+ }
+ ret = 1;
+ goto out;
+ }
+
+ err = layout->list[pos].err;
+
+ if (!xattr) {
+ if (err == 0) {
+ if (loc) {
+ gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_XATTR_DICT_NULL,
+ "path=%s", loc->path, NULL);
+ } else {
+ gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_XATTR_DICT_NULL,
+ "path not found", NULL);
+ }
+ ret = -1;
+ }
+ goto out;
+ }
+
+ dict_ret = dict_get_ptr(xattr, conf->xattr_name, &disk_layout_raw);
+
+ if (dict_ret < 0) {
+ if (err == 0 && layout->list[pos].stop) {
+ if (loc) {
+ gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_DISK_LAYOUT_MISSING,
+ "path=%s", loc->path, "gfid=%s", gfid, NULL);
+ } else {
+ gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_DISK_LAYOUT_MISSING,
+ "path not found"
+ "gfid=%s",
+ gfid, NULL);
+ }
+ ret = -1;
+ }
+ goto out;
+ }
+
+ memcpy(disk_layout, disk_layout_raw, sizeof(disk_layout));
+
+ start_off = ntoh32(disk_layout[2]);
+ stop_off = ntoh32(disk_layout[3]);
+ commit_hash = ntoh32(disk_layout[0]);
+
+ if ((layout->list[pos].start != start_off) ||
+ (layout->list[pos].stop != stop_off) ||
+ (layout->list[pos].commit_hash != commit_hash)) {
+ gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_LAYOUT_INFO, "subvol=%s",
+ layout->list[pos].xlator->name, "inode-layout:start=0x%x",
+ layout->list[pos].start, "inode-layout:stop=0x%x",
+ layout->list[pos].stop, "layout-commit-hash=0x%x; ",
+ layout->list[pos].commit_hash, "disk-layout:start-off=0x%x",
+ start_off, "disk-layout:top-off=0x%x", stop_off,
+ "commit-hash=0x%x", commit_hash, NULL);
+ ret = 1;
+ } else {
+ ret = 0;
+ }
+out:
+ return ret;
+}
- if (!xattr) {
- if (err == 0) {
- gf_log (this->name, GF_LOG_INFO,
- "%s - xattr dictionary is NULL",
- loc->path);
- ret = -1;
- }
- goto out;
- }
+int
+dht_layout_preset(xlator_t *this, xlator_t *subvol, inode_t *inode)
+{
+ dht_layout_t *layout = NULL;
+ int ret = -1;
+ dht_conf_t *conf = NULL;
- dict_ret = dict_get_ptr (xattr, "trusted.glusterfs.dht",
- &disk_layout_raw);
+ conf = this->private;
+ if (!conf)
+ goto out;
- if (dict_ret < 0) {
- if (err == 0) {
- gf_log (this->name, GF_LOG_INFO,
- "%s - disk layout missing", loc->path);
- ret = -1;
- }
- goto out;
- }
+ layout = dht_layout_for_subvol(this, subvol);
+ if (!layout) {
+ gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_NO_LAYOUT_INFO,
+ "subvolume=%s", subvol ? subvol->name : "<nil>", NULL);
+ ret = -1;
+ goto out;
+ }
- memcpy (disk_layout, disk_layout_raw, sizeof (disk_layout));
+ gf_msg_debug(this->name, 0, "file = %s, subvol = %s",
+ uuid_utoa(inode->gfid), subvol ? subvol->name : "<nil>");
- count = ntoh32 (disk_layout[0]);
- if (count != 1) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s - disk layout has invalid count %d",
- loc->path, count);
- ret = -1;
- goto out;
- }
+ LOCK(&conf->layout_lock);
+ {
+ dht_inode_ctx_layout_set(inode, this, layout);
+ }
- start_off = ntoh32 (disk_layout[2]);
- stop_off = ntoh32 (disk_layout[3]);
-
- if ((layout->list[pos].start != start_off)
- || (layout->list[pos].stop != stop_off)) {
- gf_log (this->name, GF_LOG_INFO,
- "subvol: %s; inode layout - %"PRIu32" - %"PRIu32"; "
- "disk layout - %"PRIu32" - %"PRIu32,
- layout->list[pos].xlator->name,
- layout->list[pos].start, layout->list[pos].stop,
- start_off, stop_off);
- ret = 1;
- } else {
- ret = 0;
- }
+ UNLOCK(&conf->layout_lock);
+
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
int
-dht_layout_preset (xlator_t *this, xlator_t *subvol, inode_t *inode)
+dht_layout_index_for_subvol(dht_layout_t *layout, xlator_t *subvol)
{
- dht_layout_t *layout = NULL;
- int ret = -1;
- dht_conf_t *conf = NULL;
-
- conf = this->private;
- if (!conf)
- goto out;
-
- layout = dht_layout_for_subvol (this, subvol);
- if (!layout) {
- gf_log (this->name, GF_LOG_INFO,
- "no pre-set layout for subvolume %s",
- subvol ? subvol->name : "<nil>");
- ret = -1;
- goto out;
- }
+ int i = 0, ret = -1;
- LOCK (&conf->layout_lock);
- {
- inode_ctx_put (inode, this, (uint64_t)(long)layout);
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].xlator == subvol) {
+ ret = i;
+ break;
}
- UNLOCK (&conf->layout_lock);
+ }
- ret = 0;
-out:
- return ret;
+ return ret;
}
diff --git a/xlators/cluster/dht/src/dht-linkfile.c b/xlators/cluster/dht/src/dht-linkfile.c
index 803f344aff1..89ec6cca56e 100644
--- a/xlators/cluster/dht/src/dht-linkfile.c
+++ b/xlators/cluster/dht/src/dht-linkfile.c
@@ -8,183 +8,321 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+#include <glusterfs/compat.h>
+#include "dht-common.h"
+static int
+dht_linkfile_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ char is_linkfile = 0;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+ prev = cookie;
+ conf = this->private;
+
+ if (op_ret)
+ goto out;
+
+ gf_uuid_unparse(local->loc.gfid, gfid);
+
+ is_linkfile = check_is_linkfile(inode, stbuf, xattr, conf->link_xattr_name);
+ if (!is_linkfile)
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_NOT_LINK_FILE_ERROR,
+ "name=%s", prev->name, "path=%s", local->loc.path, "gfid=%s",
+ gfid, NULL);
+out:
+ local->linkfile.linkfile_cbk(frame, cookie, this, op_ret, op_errno, inode,
+ stbuf, postparent, postparent, xattr);
+ return 0;
+}
-#include "glusterfs.h"
-#include "xlator.h"
-#include "compat.h"
-#include "dht-common.h"
+static int
+dht_linkfile_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ dict_t *xattrs = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+ local = frame->local;
+ if (!op_ret)
+ local->linked = _gf_true;
-int
-dht_linkfile_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, inode_t *inode,
- struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- dht_local_t *local = NULL;
+ FRAME_SU_UNDO(frame, dht_local_t);
- local = frame->local;
+ if (op_ret && (op_errno == EEXIST)) {
+ conf = this->private;
+ subvol = cookie;
+ if (!subvol)
+ goto out;
+ xattrs = dict_new();
+ if (!xattrs)
+ goto out;
+ ret = dict_set_uint32(xattrs, conf->link_xattr_name, 256);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "mame=%s", conf->link_xattr_name, NULL);
+ goto out;
+ }
- local->linkfile.linkfile_cbk (frame, cookie, this, op_ret, op_errno,
- inode, stbuf, preparent, postparent,
- xdata);
+ STACK_WIND_COOKIE(frame, dht_linkfile_lookup_cbk, subvol, subvol,
+ subvol->fops->lookup, &local->linkfile.loc, xattrs);
+ if (xattrs)
+ dict_unref(xattrs);
return 0;
+ }
+out:
+ local->linkfile.linkfile_cbk(frame, cookie, this, op_ret, op_errno, inode,
+ stbuf, preparent, postparent, xdata);
+ if (xattrs)
+ dict_unref(xattrs);
+ return 0;
}
-
int
-dht_linkfile_create (call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk,
- xlator_t *tovol, xlator_t *fromvol, loc_t *loc)
+dht_linkfile_create(call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk,
+ xlator_t *this, xlator_t *tovol, xlator_t *fromvol,
+ loc_t *loc)
{
- dht_local_t *local = NULL;
- dict_t *dict = NULL;
- int need_unref = 0;
- int ret = 0;
-
- local = frame->local;
- local->linkfile.linkfile_cbk = linkfile_cbk;
- local->linkfile.srcvol = tovol;
-
- dict = local->params;
- if (!dict) {
- dict = dict_new ();
- if (!dict)
- goto out;
- need_unref = 1;
- }
-
- if (!uuid_is_null (local->gfid)) {
- ret = dict_set_static_bin (dict, "gfid-req", local->gfid, 16);
- if (ret)
- gf_log ("dht-linkfile", GF_LOG_INFO,
- "%s: gfid set failed", loc->path);
- }
-
- ret = dict_set_str (dict, GLUSTERFS_INTERNAL_FOP_KEY, "yes");
+ dht_local_t *local = NULL;
+ dict_t *dict = NULL;
+ int need_unref = 0;
+ int ret = 0;
+ dht_conf_t *conf = this->private;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+ local->linkfile.linkfile_cbk = linkfile_cbk;
+ local->linkfile.srcvol = tovol;
+ loc_copy(&local->linkfile.loc, loc);
+
+ local->linked = _gf_false;
+
+ dict = local->params;
+ if (!dict) {
+ dict = dict_new();
+ if (!dict)
+ goto out;
+ need_unref = 1;
+ }
+
+ if (!gf_uuid_is_null(local->gfid)) {
+ gf_uuid_unparse(local->gfid, gfid);
+
+ ret = dict_set_gfuuid(dict, "gfid-req", local->gfid, true);
if (ret)
- gf_log ("dht-linkfile", GF_LOG_INFO,
- "%s: internal-fop set failed", loc->path);
+ gf_smsg("dht-linkfile", GF_LOG_INFO, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", loc->path, "gfid=%s", gfid, NULL);
+ } else {
+ gf_uuid_unparse(loc->gfid, gfid);
+ }
+
+ ret = dict_set_str(dict, GLUSTERFS_INTERNAL_FOP_KEY, "yes");
+ if (ret)
+ gf_smsg("dht-linkfile", GF_LOG_INFO, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", loc->path, "key=%s", GLUSTERFS_INTERNAL_FOP_KEY,
+ "gfid=%s", gfid, NULL);
+
+ ret = dict_set_str(dict, conf->link_xattr_name, tovol->name);
+
+ if (ret < 0) {
+ gf_smsg(frame->this->name, GF_LOG_INFO, 0, DHT_MSG_CREATE_LINK_FAILED,
+ "path=%s", loc->path, "gfid=%s", gfid, NULL);
+ goto out;
+ }
+
+ local->link_subvol = fromvol;
+ /* Always create as root:root. dht_linkfile_attr_heal fixes the
+ * ownsership */
+ FRAME_SU_DO(frame, dht_local_t);
+ STACK_WIND_COOKIE(frame, dht_linkfile_create_cbk, fromvol, fromvol,
+ fromvol->fops->mknod, loc, S_IFREG | DHT_LINKFILE_MODE, 0,
+ 0, dict);
+
+ if (need_unref && dict)
+ dict_unref(dict);
+
+ return 0;
+out:
+ local->linkfile.linkfile_cbk(frame, frame->this, frame->this, -1, ENOMEM,
+ loc->inode, NULL, NULL, NULL, NULL);
- ret = dict_set_str (dict, "trusted.glusterfs.dht.linkto",
- tovol->name);
+ if (need_unref && dict)
+ dict_unref(dict);
- if (ret < 0) {
- gf_log (frame->this->name, GF_LOG_INFO,
- "%s: failed to initialize linkfile data",
- loc->path);
- goto out;
- }
+ return 0;
+}
- STACK_WIND (frame, dht_linkfile_create_cbk,
- fromvol, fromvol->fops->mknod, loc,
- S_IFREG | DHT_LINKFILE_MODE, 0, 0, dict);
+int
+dht_linkfile_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
- if (need_unref && dict)
- dict_unref (dict);
+ local = frame->local;
+ subvol = cookie;
- return 0;
-out:
- local->linkfile.linkfile_cbk (frame, NULL, frame->this, -1, ENOMEM,
- loc->inode, NULL, NULL, NULL, NULL);
+ if (op_ret == -1) {
+ gf_uuid_unparse(local->loc.gfid, gfid);
+ gf_smsg(this->name, GF_LOG_INFO, op_errno, DHT_MSG_UNLINK_FAILED,
+ "path=%s", local->loc.path, "gfid=%s", gfid, "subvolume=%s",
+ subvol->name, NULL);
+ }
- if (need_unref && dict)
- dict_unref (dict);
+ DHT_STACK_DESTROY(frame);
- return 0;
+ return 0;
}
-
int
-dht_linkfile_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
+dht_linkfile_unlink(call_frame_t *frame, xlator_t *this, xlator_t *subvol,
+ loc_t *loc)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- xlator_t *subvol = NULL;
-
- local = frame->local;
- prev = cookie;
- subvol = prev->this;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_INFO,
- "unlinking linkfile %s on %s failed (%s)",
- local->loc.path, subvol->name, strerror (op_errno));
- }
+ call_frame_t *unlink_frame = NULL;
+ dht_local_t *unlink_local = NULL;
- DHT_STACK_DESTROY (frame);
+ unlink_frame = copy_frame(frame);
+ if (!unlink_frame) {
+ goto err;
+ }
- return 0;
-}
+ /* Using non-fop value here, as anyways, 'local->fop' is not used in
+ this particular case */
+ unlink_local = dht_local_init(unlink_frame, loc, NULL, GF_FOP_MAXVALUE);
+ if (!unlink_local) {
+ goto err;
+ }
+ STACK_WIND_COOKIE(unlink_frame, dht_linkfile_unlink_cbk, subvol, subvol,
+ subvol->fops->unlink, &unlink_local->loc, 0, NULL);
-int
-dht_linkfile_unlink (call_frame_t *frame, xlator_t *this,
- xlator_t *subvol, loc_t *loc)
+ return 0;
+err:
+ if (unlink_frame)
+ DHT_STACK_DESTROY(unlink_frame);
+
+ return -1;
+}
+
+xlator_t *
+dht_linkfile_subvol(xlator_t *this, inode_t *inode, struct iatt *stbuf,
+ dict_t *xattr)
{
- call_frame_t *unlink_frame = NULL;
- dht_local_t *unlink_local = NULL;
+ dht_conf_t *conf = NULL;
+ xlator_t *subvol = NULL;
+ void *volname = NULL;
+ int i = 0, ret = 0;
- unlink_frame = copy_frame (frame);
- if (!unlink_frame) {
- goto err;
- }
+ conf = this->private;
- /* Using non-fop value here, as anyways, 'local->fop' is not used in
- this particular case */
- unlink_local = dht_local_init (unlink_frame, loc, NULL,
- GF_FOP_MAXVALUE);
- if (!unlink_local) {
- goto err;
- }
+ if (!xattr)
+ goto out;
- STACK_WIND (unlink_frame, dht_linkfile_unlink_cbk,
- subvol, subvol->fops->unlink,
- &unlink_local->loc, 0, NULL);
+ ret = dict_get_ptr(xattr, conf->link_xattr_name, &volname);
- return 0;
-err:
- if (unlink_frame)
- DHT_STACK_DESTROY (unlink_frame);
+ if ((-1 == ret) || !volname)
+ goto out;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (strcmp(conf->subvolumes[i]->name, (char *)volname) == 0) {
+ subvol = conf->subvolumes[i];
+ break;
+ }
+ }
- return -1;
+out:
+ return subvol;
}
+static int
+dht_linkfile_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ loc_t *loc = NULL;
-xlator_t *
-dht_linkfile_subvol (xlator_t *this, inode_t *inode, struct iatt *stbuf,
- dict_t *xattr)
+ local = frame->local;
+ loc = &local->loc;
+
+ if (op_ret)
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_SETATTR_FAILED,
+ "path=%s", (loc->path ? loc->path : "NULL"), "gfid=%s",
+ uuid_utoa(local->gfid), NULL);
+
+ DHT_STACK_DESTROY(frame);
+
+ return 0;
+}
+
+int
+dht_linkfile_attr_heal(call_frame_t *frame, xlator_t *this)
{
- dht_conf_t *conf = NULL;
- xlator_t *subvol = NULL;
- void *volname = NULL;
- int i = 0, ret = 0;
+ int ret = -1;
+ call_frame_t *copy = NULL;
+ dht_local_t *local = NULL;
+ dht_local_t *copy_local = NULL;
+ xlator_t *subvol = NULL;
+ struct iatt stbuf = {
+ 0,
+ };
+ dict_t *xattr = NULL;
+
+ local = frame->local;
+
+ GF_VALIDATE_OR_GOTO("dht", local, out);
+ GF_VALIDATE_OR_GOTO("dht", local->link_subvol, out);
+
+ if (local->stbuf.ia_type == IA_INVAL)
+ return 0;
- conf = this->private;
+ DHT_MARK_FOP_INTERNAL(xattr);
- if (!xattr)
- goto out;
+ gf_uuid_copy(local->loc.gfid, local->stbuf.ia_gfid);
- ret = dict_get_ptr (xattr, "trusted.glusterfs.dht.linkto", &volname);
+ copy = copy_frame(frame);
- if ((-1 == ret) || !volname)
- goto out;
+ if (!copy)
+ goto out;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (strcmp (conf->subvolumes[i]->name, (char *)volname) == 0) {
- subvol = conf->subvolumes[i];
- break;
- }
- }
+ copy_local = dht_local_init(copy, &local->loc, NULL, 0);
+
+ if (!copy_local)
+ goto out;
+
+ stbuf = local->stbuf;
+ subvol = local->link_subvol;
+
+ copy->local = copy_local;
+ FRAME_SU_DO(copy, dht_local_t);
+
+ STACK_WIND(copy, dht_linkfile_setattr_cbk, subvol, subvol->fops->setattr,
+ &copy_local->loc, &stbuf, (GF_SET_ATTR_UID | GF_SET_ATTR_GID),
+ xattr);
+ ret = 0;
out:
- return subvol;
+ if ((ret < 0) && (copy))
+ DHT_STACK_DESTROY(copy);
+
+ if (xattr)
+ dict_unref(xattr);
+
+ return ret;
}
diff --git a/xlators/cluster/dht/src/dht-lock.c b/xlators/cluster/dht/src/dht-lock.c
new file mode 100644
index 00000000000..638821ccee5
--- /dev/null
+++ b/xlators/cluster/dht/src/dht-lock.c
@@ -0,0 +1,1392 @@
+/*
+ Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "dht-lock.h"
+
+static char *
+dht_lock_asprintf(dht_lock_t *lock)
+{
+ char *lk_buf = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {
+ 0,
+ };
+
+ if (lock == NULL)
+ goto out;
+
+ uuid_utoa_r(lock->loc.gfid, gfid);
+
+ gf_asprintf(&lk_buf, "%s:%s", lock->xl->name, gfid);
+
+out:
+ return lk_buf;
+}
+
+static void
+dht_log_lk_array(char *name, gf_loglevel_t log_level, dht_lock_t **lk_array,
+ int count)
+{
+ int i = 0;
+ char *lk_buf = NULL;
+
+ if ((lk_array == NULL) || (count == 0))
+ goto out;
+
+ for (i = 0; i < count; i++) {
+ lk_buf = dht_lock_asprintf(lk_array[i]);
+ if (!lk_buf)
+ goto out;
+
+ gf_smsg(name, log_level, 0, DHT_MSG_LK_ARRAY_INFO, "index=%d", i,
+ "lk_buf=%s", lk_buf, NULL);
+ GF_FREE(lk_buf);
+ }
+
+out:
+ return;
+}
+
+static void
+dht_lock_stack_destroy(call_frame_t *lock_frame, dht_lock_type_t lk)
+{
+ dht_local_t *local = NULL;
+
+ local = lock_frame->local;
+
+ if (lk == DHT_INODELK) {
+ local->lock[0].layout.my_layout.locks = NULL;
+ local->lock[0].layout.my_layout.lk_count = 0;
+ } else {
+ local->lock[0].ns.directory_ns.locks = NULL;
+ local->lock[0].ns.directory_ns.lk_count = 0;
+ }
+
+ DHT_STACK_DESTROY(lock_frame);
+ return;
+}
+
+static void
+dht_lock_free(dht_lock_t *lock)
+{
+ if (lock == NULL)
+ goto out;
+
+ loc_wipe(&lock->loc);
+ GF_FREE(lock->domain);
+ GF_FREE(lock->basename);
+ mem_put(lock);
+
+out:
+ return;
+}
+
+static void
+dht_set_lkowner(dht_lock_t **lk_array, int count, gf_lkowner_t *lkowner)
+{
+ int i = 0;
+
+ if (!lk_array || !lkowner)
+ goto out;
+
+ for (i = 0; i < count; i++) {
+ lk_array[i]->lk_owner = *lkowner;
+ }
+
+out:
+ return;
+}
+
+static int
+dht_lock_request_cmp(const void *val1, const void *val2)
+{
+ dht_lock_t *lock1 = NULL;
+ dht_lock_t *lock2 = NULL;
+ int ret = -1;
+
+ lock1 = *(dht_lock_t **)val1;
+ lock2 = *(dht_lock_t **)val2;
+
+ GF_VALIDATE_OR_GOTO("dht-locks", lock1, out);
+ GF_VALIDATE_OR_GOTO("dht-locks", lock2, out);
+
+ ret = strcmp(lock1->xl->name, lock2->xl->name);
+
+ if (ret == 0) {
+ ret = gf_uuid_compare(lock1->loc.gfid, lock2->loc.gfid);
+ }
+
+out:
+ return ret;
+}
+
+static int
+dht_lock_order_requests(dht_lock_t **locks, int count)
+{
+ int ret = -1;
+
+ if (!locks || !count)
+ goto out;
+
+ qsort(locks, count, sizeof(*locks), dht_lock_request_cmp);
+ ret = 0;
+
+out:
+ return ret;
+}
+
+void
+dht_lock_array_free(dht_lock_t **lk_array, int count)
+{
+ int i = 0;
+ dht_lock_t *lock = NULL;
+
+ if (lk_array == NULL)
+ goto out;
+
+ for (i = 0; i < count; i++) {
+ lock = lk_array[i];
+ lk_array[i] = NULL;
+ dht_lock_free(lock);
+ }
+
+out:
+ return;
+}
+
+int32_t
+dht_lock_count(dht_lock_t **lk_array, int lk_count)
+{
+ int i = 0, locked = 0;
+
+ if ((lk_array == NULL) || (lk_count == 0))
+ goto out;
+
+ for (i = 0; i < lk_count; i++) {
+ if (lk_array[i]->locked)
+ locked++;
+ }
+out:
+ return locked;
+}
+
+static call_frame_t *
+dht_lock_frame(call_frame_t *parent_frame)
+{
+ call_frame_t *lock_frame = NULL;
+
+ lock_frame = copy_frame(parent_frame);
+ if (lock_frame == NULL)
+ goto out;
+
+ set_lk_owner_from_ptr(&lock_frame->root->lk_owner, parent_frame->root);
+
+out:
+ return lock_frame;
+}
+
+dht_lock_t *
+dht_lock_new(xlator_t *this, xlator_t *xl, loc_t *loc, short type,
+ const char *domain, const char *basename,
+ dht_reaction_type_t do_on_failure)
+{
+ dht_conf_t *conf = NULL;
+ dht_lock_t *lock = NULL;
+
+ conf = this->private;
+
+ lock = mem_get0(conf->lock_pool);
+ if (lock == NULL)
+ goto out;
+
+ lock->xl = xl;
+ lock->type = type;
+ lock->do_on_failure = do_on_failure;
+
+ lock->domain = gf_strdup(domain);
+ if (lock->domain == NULL) {
+ dht_lock_free(lock);
+ lock = NULL;
+ goto out;
+ }
+
+ if (basename) {
+ lock->basename = gf_strdup(basename);
+ if (lock->basename == NULL) {
+ dht_lock_free(lock);
+ lock = NULL;
+ goto out;
+ }
+ }
+
+ /* Fill only inode and gfid.
+ posix and protocol/server give preference to pargfid/basename over
+ gfid/inode for resolution if all the three parameters of loc_t are
+ present. I want to avoid the following hypothetical situation:
+
+ 1. rebalance did a lookup on a dentry and got a gfid.
+ 2. rebalance acquires lock on loc_t which was filled with gfid and
+ path (pargfid/bname) from step 1.
+ 3. somebody deleted and recreated the same file
+ 4. rename on the same path acquires lock on loc_t which now points
+ to a different inode (and hence gets the lock).
+ 5. rebalance continues to migrate file (note that not all fops done
+ by rebalance during migration are inode/gfid based Eg., unlink)
+ 6. rename continues.
+ */
+ lock->loc.inode = inode_ref(loc->inode);
+ loc_gfid(loc, lock->loc.gfid);
+
+out:
+ return lock;
+}
+
+static int
+dht_local_entrylk_init(call_frame_t *frame, dht_lock_t **lk_array, int lk_count,
+ fop_entrylk_cbk_t entrylk_cbk)
+{
+ int ret = -1;
+ dht_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local == NULL) {
+ local = dht_local_init(frame, NULL, NULL, 0);
+ }
+
+ if (local == NULL) {
+ goto out;
+ }
+
+ local->lock[0].ns.directory_ns.entrylk_cbk = entrylk_cbk;
+ local->lock[0].ns.directory_ns.locks = lk_array;
+ local->lock[0].ns.directory_ns.lk_count = lk_count;
+
+ ret = dht_lock_order_requests(local->lock[0].ns.directory_ns.locks,
+ local->lock[0].ns.directory_ns.lk_count);
+ if (ret < 0)
+ goto out;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static void
+dht_entrylk_done(call_frame_t *lock_frame)
+{
+ fop_entrylk_cbk_t entrylk_cbk = NULL;
+ call_frame_t *main_frame = NULL;
+ dht_local_t *local = NULL;
+
+ local = lock_frame->local;
+ main_frame = local->main_frame;
+
+ local->lock[0].ns.directory_ns.locks = NULL;
+ local->lock[0].ns.directory_ns.lk_count = 0;
+
+ entrylk_cbk = local->lock[0].ns.directory_ns.entrylk_cbk;
+ local->lock[0].ns.directory_ns.entrylk_cbk = NULL;
+
+ entrylk_cbk(main_frame, NULL, main_frame->this,
+ local->lock[0].ns.directory_ns.op_ret,
+ local->lock[0].ns.directory_ns.op_errno, NULL);
+
+ dht_lock_stack_destroy(lock_frame, DHT_ENTRYLK);
+ return;
+}
+
+static int32_t
+dht_unlock_entrylk_done(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+ gf_uuid_unparse(local->lock[0].ns.directory_ns.locks[0]->loc.inode->gfid,
+ gfid);
+
+ if (op_ret < 0) {
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno,
+ DHT_MSG_UNLOCK_GFID_FAILED, "gfid=%s", gfid,
+ "DHT_LAYOUT_HEAL_DOMAIN", NULL);
+ }
+
+ DHT_STACK_DESTROY(frame);
+ return 0;
+}
+
+static int32_t
+dht_unlock_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int lk_index = 0, call_cnt = 0;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ lk_index = (long)cookie;
+
+ local = frame->local;
+
+ uuid_utoa_r(local->lock[0].ns.directory_ns.locks[lk_index]->loc.gfid, gfid);
+
+ if (op_ret < 0) {
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_UNLOCKING_FAILED,
+ "name=%s",
+ local->lock[0].ns.directory_ns.locks[lk_index]->xl->name,
+ "gfid=%s", gfid, NULL);
+ } else {
+ local->lock[0].ns.directory_ns.locks[lk_index]->locked = 0;
+ }
+
+ call_cnt = dht_frame_return(frame);
+ if (is_last_call(call_cnt)) {
+ dht_entrylk_done(frame);
+ }
+
+ return 0;
+}
+
+static int32_t
+dht_unlock_entrylk(call_frame_t *frame, dht_lock_t **lk_array, int lk_count,
+ fop_entrylk_cbk_t entrylk_cbk)
+{
+ dht_local_t *local = NULL;
+ int ret = -1, i = 0;
+ call_frame_t *lock_frame = NULL;
+ int call_cnt = 0;
+
+ GF_VALIDATE_OR_GOTO("dht-locks", frame, done);
+ GF_VALIDATE_OR_GOTO(frame->this->name, lk_array, done);
+ GF_VALIDATE_OR_GOTO(frame->this->name, entrylk_cbk, done);
+
+ call_cnt = dht_lock_count(lk_array, lk_count);
+ if (call_cnt == 0) {
+ ret = 0;
+ goto done;
+ }
+
+ lock_frame = dht_lock_frame(frame);
+ if (lock_frame == NULL) {
+ gf_smsg(frame->this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_ALLOC_FRAME_FAILED_NOT_UNLOCKING_FOLLOWING_ENTRYLKS,
+ NULL);
+
+ dht_log_lk_array(frame->this->name, GF_LOG_WARNING, lk_array, lk_count);
+ goto done;
+ }
+
+ ret = dht_local_entrylk_init(lock_frame, lk_array, lk_count, entrylk_cbk);
+ if (ret < 0) {
+ gf_smsg(frame->this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_LOCAL_LOCKS_STORE_FAILED_UNLOCKING_FOLLOWING_ENTRYLK,
+ NULL);
+
+ dht_log_lk_array(frame->this->name, GF_LOG_WARNING, lk_array, lk_count);
+
+ goto done;
+ }
+
+ local = lock_frame->local;
+ local->main_frame = frame;
+ local->call_cnt = call_cnt;
+
+ for (i = 0; i < local->lock[0].ns.directory_ns.lk_count; i++) {
+ if (!local->lock[0].ns.directory_ns.locks[i]->locked)
+ continue;
+
+ lock_frame->root
+ ->lk_owner = local->lock[0].ns.directory_ns.locks[i]->lk_owner;
+ STACK_WIND_COOKIE(
+ lock_frame, dht_unlock_entrylk_cbk, (void *)(long)i,
+ local->lock[0].ns.directory_ns.locks[i]->xl,
+ local->lock[0].ns.directory_ns.locks[i]->xl->fops->entrylk,
+ local->lock[0].ns.directory_ns.locks[i]->domain,
+ &local->lock[0].ns.directory_ns.locks[i]->loc,
+ local->lock[0].ns.directory_ns.locks[i]->basename, ENTRYLK_UNLOCK,
+ ENTRYLK_WRLCK, NULL);
+ if (!--call_cnt)
+ break;
+ }
+
+ return 0;
+
+done:
+ if (lock_frame)
+ dht_lock_stack_destroy(lock_frame, DHT_ENTRYLK);
+
+ /* no locks acquired, invoke entrylk_cbk */
+ if (ret == 0)
+ entrylk_cbk(frame, NULL, frame->this, 0, 0, NULL);
+
+ return ret;
+}
+
+int32_t
+dht_unlock_entrylk_wrapper(call_frame_t *frame, dht_elock_wrap_t *entrylk)
+{
+ dht_local_t *local = NULL, *lock_local = NULL;
+ call_frame_t *lock_frame = NULL;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+ int ret = 0;
+
+ local = frame->local;
+
+ if (!entrylk || !entrylk->locks)
+ goto out;
+
+ gf_uuid_unparse(local->loc.parent->gfid, pgfid);
+
+ lock_frame = copy_frame(frame);
+ if (lock_frame == NULL) {
+ gf_smsg(frame->this->name, GF_LOG_WARNING, ENOMEM,
+ DHT_MSG_COPY_FRAME_FAILED, "pgfid=%s", pgfid, "name=%s",
+ local->loc.name, "path=%s", local->loc.path, NULL);
+ goto done;
+ }
+
+ lock_local = dht_local_init(lock_frame, NULL, NULL, 0);
+ if (lock_local == NULL) {
+ gf_smsg(frame->this->name, GF_LOG_WARNING, ENOMEM,
+ DHT_MSG_CREATE_FAILED, "local", "pgfid=%s", pgfid, "name=%s",
+ local->loc.name, "path=%s", local->loc.path, NULL);
+ goto done;
+ }
+
+ lock_frame->local = lock_local;
+
+ lock_local->lock[0].ns.directory_ns.locks = entrylk->locks;
+ lock_local->lock[0].ns.directory_ns.lk_count = entrylk->lk_count;
+ entrylk->locks = NULL;
+ entrylk->lk_count = 0;
+
+ ret = dht_unlock_entrylk(
+ lock_frame, lock_local->lock[0].ns.directory_ns.locks,
+ lock_local->lock[0].ns.directory_ns.lk_count, dht_unlock_entrylk_done);
+ if (ret)
+ goto done;
+
+ lock_frame = NULL;
+
+done:
+ if (lock_frame != NULL) {
+ DHT_STACK_DESTROY(lock_frame);
+ }
+
+out:
+ return 0;
+}
+
+static int
+dht_entrylk_cleanup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dht_entrylk_done(frame);
+ return 0;
+}
+
+static void
+dht_entrylk_cleanup(call_frame_t *lock_frame)
+{
+ dht_lock_t **lk_array = NULL;
+ int lk_count = 0, lk_acquired = 0;
+ dht_local_t *local = NULL;
+
+ local = lock_frame->local;
+
+ lk_array = local->lock[0].ns.directory_ns.locks;
+ lk_count = local->lock[0].ns.directory_ns.lk_count;
+
+ lk_acquired = dht_lock_count(lk_array, lk_count);
+ if (lk_acquired != 0) {
+ dht_unlock_entrylk(lock_frame, lk_array, lk_count,
+ dht_entrylk_cleanup_cbk);
+ } else {
+ dht_entrylk_done(lock_frame);
+ }
+
+ return;
+}
+
+static int32_t
+dht_blocking_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ int lk_index = 0;
+ int i = 0;
+ dht_local_t *local = NULL;
+
+ lk_index = (long)cookie;
+
+ local = frame->local;
+ if (op_ret == 0) {
+ local->lock[0].ns.directory_ns.locks[lk_index]->locked = _gf_true;
+ } else {
+ switch (op_errno) {
+ case ESTALE:
+ case ENOENT:
+ if (local->lock[0]
+ .ns.directory_ns.locks[lk_index]
+ ->do_on_failure != IGNORE_ENOENT_ESTALE) {
+ local->lock[0].ns.directory_ns.op_ret = -1;
+ local->lock[0].ns.directory_ns.op_errno = op_errno;
+ goto cleanup;
+ }
+ break;
+ default:
+ local->lock[0].ns.directory_ns.op_ret = -1;
+ local->lock[0].ns.directory_ns.op_errno = op_errno;
+ goto cleanup;
+ }
+ }
+
+ if (lk_index == (local->lock[0].ns.directory_ns.lk_count - 1)) {
+ for (i = 0; (i < local->lock[0].ns.directory_ns.lk_count) &&
+ (!local->lock[0].ns.directory_ns.locks[i]->locked);
+ i++)
+ ;
+
+ if (i == local->lock[0].ns.directory_ns.lk_count) {
+ local->lock[0].ns.directory_ns.op_ret = -1;
+ local->lock[0].ns.directory_ns.op_errno = op_errno;
+ }
+
+ dht_entrylk_done(frame);
+ } else {
+ dht_blocking_entrylk_rec(frame, ++lk_index);
+ }
+
+ return 0;
+
+cleanup:
+ dht_entrylk_cleanup(frame);
+
+ return 0;
+}
+
+void
+dht_blocking_entrylk_rec(call_frame_t *frame, int i)
+{
+ dht_local_t *local = NULL;
+
+ local = frame->local;
+
+ STACK_WIND_COOKIE(
+ frame, dht_blocking_entrylk_cbk, (void *)(long)i,
+ local->lock[0].ns.directory_ns.locks[i]->xl,
+ local->lock[0].ns.directory_ns.locks[i]->xl->fops->entrylk,
+ local->lock[0].ns.directory_ns.locks[i]->domain,
+ &local->lock[0].ns.directory_ns.locks[i]->loc,
+ local->lock[0].ns.directory_ns.locks[i]->basename, ENTRYLK_LOCK,
+ ENTRYLK_WRLCK, NULL);
+
+ return;
+}
+
+int
+dht_blocking_entrylk(call_frame_t *frame, dht_lock_t **lk_array, int lk_count,
+ fop_entrylk_cbk_t entrylk_cbk)
+{
+ int ret = -1;
+ call_frame_t *lock_frame = NULL;
+ dht_local_t *local = NULL;
+
+ GF_VALIDATE_OR_GOTO("dht-locks", frame, out);
+ GF_VALIDATE_OR_GOTO(frame->this->name, lk_array, out);
+ GF_VALIDATE_OR_GOTO(frame->this->name, entrylk_cbk, out);
+
+ lock_frame = dht_lock_frame(frame);
+ if (lock_frame == NULL)
+ goto out;
+
+ ret = dht_local_entrylk_init(lock_frame, lk_array, lk_count, entrylk_cbk);
+ if (ret < 0) {
+ goto out;
+ }
+
+ dht_set_lkowner(lk_array, lk_count, &lock_frame->root->lk_owner);
+
+ local = lock_frame->local;
+ local->main_frame = frame;
+
+ dht_blocking_entrylk_rec(lock_frame, 0);
+
+ return 0;
+out:
+ if (lock_frame)
+ dht_lock_stack_destroy(lock_frame, DHT_ENTRYLK);
+
+ return -1;
+}
+
+static int
+dht_local_inodelk_init(call_frame_t *frame, dht_lock_t **lk_array, int lk_count,
+ fop_inodelk_cbk_t inodelk_cbk)
+{
+ int ret = -1;
+ dht_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local == NULL) {
+ local = dht_local_init(frame, NULL, NULL, 0);
+ }
+
+ if (local == NULL) {
+ goto out;
+ }
+
+ local->lock[0].layout.my_layout.inodelk_cbk = inodelk_cbk;
+ local->lock[0].layout.my_layout.locks = lk_array;
+ local->lock[0].layout.my_layout.lk_count = lk_count;
+
+ ret = dht_lock_order_requests(local->lock[0].layout.my_layout.locks,
+ local->lock[0].layout.my_layout.lk_count);
+ if (ret < 0)
+ goto out;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static void
+dht_inodelk_done(call_frame_t *lock_frame)
+{
+ fop_inodelk_cbk_t inodelk_cbk = NULL;
+ call_frame_t *main_frame = NULL;
+ dht_local_t *local = NULL;
+
+ local = lock_frame->local;
+ main_frame = local->main_frame;
+
+ local->lock[0].layout.my_layout.locks = NULL;
+ local->lock[0].layout.my_layout.lk_count = 0;
+
+ inodelk_cbk = local->lock[0].layout.my_layout.inodelk_cbk;
+ local->lock[0].layout.my_layout.inodelk_cbk = NULL;
+
+ inodelk_cbk(main_frame, NULL, main_frame->this,
+ local->lock[0].layout.my_layout.op_ret,
+ local->lock[0].layout.my_layout.op_errno, NULL);
+
+ dht_lock_stack_destroy(lock_frame, DHT_INODELK);
+ return;
+}
+
+static int32_t
+dht_unlock_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int lk_index = 0, call_cnt = 0;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ lk_index = (long)cookie;
+
+ local = frame->local;
+ if (op_ret < 0) {
+ uuid_utoa_r(local->lock[0].layout.my_layout.locks[lk_index]->loc.gfid,
+ gfid);
+
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_UNLOCKING_FAILED,
+ "name=%s",
+ local->lock[0].layout.my_layout.locks[lk_index]->xl->name,
+ "gfid=%s", gfid, NULL);
+ } else {
+ local->lock[0].layout.my_layout.locks[lk_index]->locked = 0;
+ }
+
+ call_cnt = dht_frame_return(frame);
+ if (is_last_call(call_cnt)) {
+ dht_inodelk_done(frame);
+ }
+
+ return 0;
+}
+
+static int32_t
+dht_unlock_inodelk_done(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+ gf_uuid_unparse(local->lock[0].layout.my_layout.locks[0]->loc.inode->gfid,
+ gfid);
+
+ if (op_ret < 0) {
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno,
+ DHT_MSG_UNLOCK_GFID_FAILED, "DHT_LAYOUT_HEAL_DOMAIN gfid=%s",
+ gfid, NULL);
+ }
+
+ DHT_STACK_DESTROY(frame);
+ return 0;
+}
+
+int32_t
+dht_unlock_inodelk(call_frame_t *frame, dht_lock_t **lk_array, int lk_count,
+ fop_inodelk_cbk_t inodelk_cbk)
+{
+ dht_local_t *local = NULL;
+ struct gf_flock flock = {
+ 0,
+ };
+ int ret = -1, i = 0;
+ call_frame_t *lock_frame = NULL;
+ int call_cnt = 0;
+
+ GF_VALIDATE_OR_GOTO("dht-locks", frame, done);
+ GF_VALIDATE_OR_GOTO(frame->this->name, lk_array, done);
+ GF_VALIDATE_OR_GOTO(frame->this->name, inodelk_cbk, done);
+
+ call_cnt = dht_lock_count(lk_array, lk_count);
+ if (call_cnt == 0) {
+ ret = 0;
+ goto done;
+ }
+
+ lock_frame = dht_lock_frame(frame);
+ if (lock_frame == NULL) {
+ gf_smsg(frame->this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_ALLOC_FRAME_FAILED_NOT_UNLOCKING_FOLLOWING_ENTRYLKS,
+ NULL);
+
+ dht_log_lk_array(frame->this->name, GF_LOG_WARNING, lk_array, lk_count);
+ goto done;
+ }
+
+ ret = dht_local_inodelk_init(lock_frame, lk_array, lk_count, inodelk_cbk);
+ if (ret < 0) {
+ gf_smsg(frame->this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_LOCAL_LOCKS_STORE_FAILED_UNLOCKING_FOLLOWING_ENTRYLK,
+ NULL);
+
+ dht_log_lk_array(frame->this->name, GF_LOG_WARNING, lk_array, lk_count);
+
+ goto done;
+ }
+
+ local = lock_frame->local;
+ local->main_frame = frame;
+ local->call_cnt = call_cnt;
+
+ flock.l_type = F_UNLCK;
+
+ for (i = 0; i < local->lock[0].layout.my_layout.lk_count; i++) {
+ if (!local->lock[0].layout.my_layout.locks[i]->locked)
+ continue;
+
+ lock_frame->root
+ ->lk_owner = local->lock[0].layout.my_layout.locks[i]->lk_owner;
+ STACK_WIND_COOKIE(
+ lock_frame, dht_unlock_inodelk_cbk, (void *)(long)i,
+ local->lock[0].layout.my_layout.locks[i]->xl,
+ local->lock[0].layout.my_layout.locks[i]->xl->fops->inodelk,
+ local->lock[0].layout.my_layout.locks[i]->domain,
+ &local->lock[0].layout.my_layout.locks[i]->loc, F_SETLK, &flock,
+ NULL);
+ if (!--call_cnt)
+ break;
+ }
+
+ return 0;
+
+done:
+ if (lock_frame)
+ dht_lock_stack_destroy(lock_frame, DHT_INODELK);
+
+ /* no locks acquired, invoke inodelk_cbk */
+ if (ret == 0)
+ inodelk_cbk(frame, NULL, frame->this, 0, 0, NULL);
+
+ return ret;
+}
+
+int32_t
+dht_unlock_inodelk_wrapper(call_frame_t *frame, dht_ilock_wrap_t *inodelk)
+{
+ dht_local_t *local = NULL, *lock_local = NULL;
+ call_frame_t *lock_frame = NULL;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+ int ret = 0;
+
+ local = frame->local;
+
+ if (!inodelk || !inodelk->locks)
+ goto out;
+
+ gf_uuid_unparse(local->loc.parent->gfid, pgfid);
+
+ lock_frame = copy_frame(frame);
+ if (lock_frame == NULL) {
+ gf_smsg(frame->this->name, GF_LOG_WARNING, ENOMEM,
+ DHT_MSG_COPY_FRAME_FAILED, "pgfid=%s", pgfid, "name=%s",
+ local->loc.name, "path=%s", local->loc.path, NULL);
+ goto done;
+ }
+
+ lock_local = dht_local_init(lock_frame, NULL, NULL, 0);
+ if (lock_local == NULL) {
+ gf_smsg(frame->this->name, GF_LOG_WARNING, ENOMEM,
+ DHT_MSG_CREATE_FAILED, "local", "gfid=%s", pgfid, "name=%s",
+ local->loc.name, "path=%s", local->loc.path, NULL);
+ goto done;
+ }
+
+ lock_frame->local = lock_local;
+
+ lock_local->lock[0].layout.my_layout.locks = inodelk->locks;
+ lock_local->lock[0].layout.my_layout.lk_count = inodelk->lk_count;
+ inodelk->locks = NULL;
+ inodelk->lk_count = 0;
+
+ ret = dht_unlock_inodelk(
+ lock_frame, lock_local->lock[0].layout.my_layout.locks,
+ lock_local->lock[0].layout.my_layout.lk_count, dht_unlock_inodelk_done);
+
+ if (ret)
+ goto done;
+
+ lock_frame = NULL;
+
+done:
+ if (lock_frame != NULL) {
+ DHT_STACK_DESTROY(lock_frame);
+ }
+out:
+ return 0;
+}
+
+static int
+dht_inodelk_cleanup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dht_inodelk_done(frame);
+ return 0;
+}
+
+static void
+dht_inodelk_cleanup(call_frame_t *lock_frame)
+{
+ dht_lock_t **lk_array = NULL;
+ int lk_count = 0, lk_acquired = 0;
+ dht_local_t *local = NULL;
+
+ local = lock_frame->local;
+
+ lk_array = local->lock[0].layout.my_layout.locks;
+ lk_count = local->lock[0].layout.my_layout.lk_count;
+
+ lk_acquired = dht_lock_count(lk_array, lk_count);
+ if (lk_acquired != 0) {
+ dht_unlock_inodelk(lock_frame, lk_array, lk_count,
+ dht_inodelk_cleanup_cbk);
+ } else {
+ dht_inodelk_done(lock_frame);
+ }
+
+ return;
+}
+
+static int32_t
+dht_nonblocking_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int lk_index = 0, call_cnt = 0;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+ lk_index = (long)cookie;
+
+ if (op_ret == -1) {
+ local->lock[0].layout.my_layout.op_ret = -1;
+ local->lock[0].layout.my_layout.op_errno = op_errno;
+
+ if (local && local->lock[0].layout.my_layout.locks[lk_index]) {
+ uuid_utoa_r(local->lock[0]
+ .layout.my_layout.locks[lk_index]
+ ->loc.inode->gfid,
+ gfid);
+
+ gf_msg_debug(
+ this->name, op_errno,
+ "inodelk failed on gfid: %s "
+ "subvolume: %s",
+ gfid,
+ local->lock[0].layout.my_layout.locks[lk_index]->xl->name);
+ }
+
+ goto out;
+ }
+
+ local->lock[0].layout.my_layout.locks[lk_index]->locked = _gf_true;
+
+out:
+ call_cnt = dht_frame_return(frame);
+ if (is_last_call(call_cnt)) {
+ if (local->lock[0].layout.my_layout.op_ret < 0) {
+ dht_inodelk_cleanup(frame);
+ return 0;
+ }
+
+ dht_inodelk_done(frame);
+ }
+
+ return 0;
+}
+
+int
+dht_nonblocking_inodelk(call_frame_t *frame, dht_lock_t **lk_array,
+ int lk_count, fop_inodelk_cbk_t inodelk_cbk)
+{
+ struct gf_flock flock = {
+ 0,
+ };
+ int i = 0, ret = 0;
+ dht_local_t *local = NULL;
+ call_frame_t *lock_frame = NULL;
+
+ GF_VALIDATE_OR_GOTO("dht-locks", frame, out);
+ GF_VALIDATE_OR_GOTO(frame->this->name, lk_array, out);
+ GF_VALIDATE_OR_GOTO(frame->this->name, inodelk_cbk, out);
+
+ lock_frame = dht_lock_frame(frame);
+ if (lock_frame == NULL)
+ goto out;
+
+ ret = dht_local_inodelk_init(lock_frame, lk_array, lk_count, inodelk_cbk);
+ if (ret < 0) {
+ goto out;
+ }
+
+ dht_set_lkowner(lk_array, lk_count, &lock_frame->root->lk_owner);
+
+ local = lock_frame->local;
+ local->main_frame = frame;
+
+ local->call_cnt = lk_count;
+
+ for (i = 0; i < lk_count; i++) {
+ flock.l_type = local->lock[0].layout.my_layout.locks[i]->type;
+
+ STACK_WIND_COOKIE(
+ lock_frame, dht_nonblocking_inodelk_cbk, (void *)(long)i,
+ local->lock[0].layout.my_layout.locks[i]->xl,
+ local->lock[0].layout.my_layout.locks[i]->xl->fops->inodelk,
+ local->lock[0].layout.my_layout.locks[i]->domain,
+ &local->lock[0].layout.my_layout.locks[i]->loc, F_SETLK, &flock,
+ NULL);
+ }
+
+ return 0;
+
+out:
+ if (lock_frame)
+ dht_lock_stack_destroy(lock_frame, DHT_INODELK);
+
+ return -1;
+}
+
+static int32_t
+dht_blocking_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ int lk_index = 0;
+ int i = 0;
+ dht_local_t *local = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {
+ 0,
+ };
+ dht_reaction_type_t reaction = 0;
+
+ lk_index = (long)cookie;
+
+ local = frame->local;
+ if (op_ret == 0) {
+ local->lock[0].layout.my_layout.locks[lk_index]->locked = _gf_true;
+ } else {
+ switch (op_errno) {
+ case ESTALE:
+ case ENOENT:
+ reaction = local->lock[0]
+ .layout.my_layout.locks[lk_index]
+ ->do_on_failure;
+ if ((reaction != IGNORE_ENOENT_ESTALE) &&
+ (reaction != IGNORE_ENOENT_ESTALE_EIO)) {
+ gf_uuid_unparse(local->lock[0]
+ .layout.my_layout.locks[lk_index]
+ ->loc.gfid,
+ gfid);
+ local->lock[0].layout.my_layout.op_ret = -1;
+ local->lock[0].layout.my_layout.op_errno = op_errno;
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_INODELK_FAILED, "subvol=%s",
+ local->lock[0]
+ .layout.my_layout.locks[lk_index]
+ ->xl->name,
+ "gfid=%s", gfid, NULL);
+ goto cleanup;
+ }
+ break;
+ case EIO:
+ reaction = local->lock[0]
+ .layout.my_layout.locks[lk_index]
+ ->do_on_failure;
+ if (reaction != IGNORE_ENOENT_ESTALE_EIO) {
+ gf_uuid_unparse(local->lock[0]
+ .layout.my_layout.locks[lk_index]
+ ->loc.gfid,
+ gfid);
+ local->lock[0].layout.my_layout.op_ret = -1;
+ local->lock[0].layout.my_layout.op_errno = op_errno;
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_INODELK_FAILED, "subvol=%s",
+ local->lock[0]
+ .layout.my_layout.locks[lk_index]
+ ->xl->name,
+ "gfid=%s", gfid, NULL);
+ goto cleanup;
+ }
+ break;
+
+ default:
+ gf_uuid_unparse(
+ local->lock[0].layout.my_layout.locks[lk_index]->loc.gfid,
+ gfid);
+ local->lock[0].layout.my_layout.op_ret = -1;
+ local->lock[0].layout.my_layout.op_errno = op_errno;
+ gf_smsg(
+ this->name, GF_LOG_ERROR, op_errno, DHT_MSG_INODELK_FAILED,
+ "subvol=%s",
+ local->lock[0].layout.my_layout.locks[lk_index]->xl->name,
+ "gfid=%s", gfid, NULL);
+ goto cleanup;
+ }
+ }
+
+ if (lk_index == (local->lock[0].layout.my_layout.lk_count - 1)) {
+ for (i = 0; (i < local->lock[0].layout.my_layout.lk_count) &&
+ (!local->lock[0].layout.my_layout.locks[i]->locked);
+ i++)
+ ;
+
+ if (i == local->lock[0].layout.my_layout.lk_count) {
+ local->lock[0].layout.my_layout.op_ret = -1;
+ local->lock[0].layout.my_layout.op_errno = op_errno;
+ }
+
+ dht_inodelk_done(frame);
+ } else {
+ dht_blocking_inodelk_rec(frame, ++lk_index);
+ }
+
+ return 0;
+
+cleanup:
+ dht_inodelk_cleanup(frame);
+
+ return 0;
+}
+
+void
+dht_blocking_inodelk_rec(call_frame_t *frame, int i)
+{
+ dht_local_t *local = NULL;
+ struct gf_flock flock = {
+ 0,
+ };
+
+ local = frame->local;
+
+ flock.l_type = local->lock[0].layout.my_layout.locks[i]->type;
+
+ STACK_WIND_COOKIE(
+ frame, dht_blocking_inodelk_cbk, (void *)(long)i,
+ local->lock[0].layout.my_layout.locks[i]->xl,
+ local->lock[0].layout.my_layout.locks[i]->xl->fops->inodelk,
+ local->lock[0].layout.my_layout.locks[i]->domain,
+ &local->lock[0].layout.my_layout.locks[i]->loc, F_SETLKW, &flock, NULL);
+
+ return;
+}
+
+int
+dht_blocking_inodelk(call_frame_t *frame, dht_lock_t **lk_array, int lk_count,
+ fop_inodelk_cbk_t inodelk_cbk)
+{
+ int ret = -1;
+ call_frame_t *lock_frame = NULL;
+ dht_local_t *local = NULL;
+ dht_local_t *tmp_local = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {
+ 0,
+ };
+
+ GF_VALIDATE_OR_GOTO("dht-locks", frame, out);
+ GF_VALIDATE_OR_GOTO(frame->this->name, lk_array, out);
+ GF_VALIDATE_OR_GOTO(frame->this->name, inodelk_cbk, out);
+
+ tmp_local = frame->local;
+
+ lock_frame = dht_lock_frame(frame);
+ if (lock_frame == NULL) {
+ gf_uuid_unparse(tmp_local->loc.gfid, gfid);
+ gf_smsg("dht", GF_LOG_ERROR, ENOMEM, DHT_MSG_LOCK_FRAME_FAILED,
+ "gfid=%s", gfid, "path=%s", tmp_local->loc.path, NULL);
+ goto out;
+ }
+
+ ret = dht_local_inodelk_init(lock_frame, lk_array, lk_count, inodelk_cbk);
+ if (ret < 0) {
+ gf_uuid_unparse(tmp_local->loc.gfid, gfid);
+ gf_smsg("dht", GF_LOG_ERROR, ENOMEM, DHT_MSG_LOCAL_LOCK_INIT_FAILED,
+ "gfid=%s", gfid, "path=%s", tmp_local->loc.path, NULL);
+ goto out;
+ }
+
+ dht_set_lkowner(lk_array, lk_count, &lock_frame->root->lk_owner);
+
+ local = lock_frame->local;
+ local->main_frame = frame;
+
+ dht_blocking_inodelk_rec(lock_frame, 0);
+
+ return 0;
+out:
+ if (lock_frame)
+ dht_lock_stack_destroy(lock_frame, DHT_INODELK);
+
+ return -1;
+}
+
+void
+dht_unlock_namespace(call_frame_t *frame, dht_dir_transaction_t *lock)
+{
+ GF_VALIDATE_OR_GOTO("dht-locks", frame, out);
+ GF_VALIDATE_OR_GOTO(frame->this->name, lock, out);
+
+ dht_unlock_entrylk_wrapper(frame, &lock->ns.directory_ns);
+ dht_unlock_inodelk_wrapper(frame, &lock->ns.parent_layout);
+
+out:
+ return;
+}
+
+static int32_t
+dht_protect_namespace_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+
+ local = frame->local;
+ if (op_ret != 0)
+ dht_unlock_inodelk_wrapper(frame, &local->current->ns.parent_layout);
+
+ local->current->ns.ns_cbk(frame, cookie, this, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+dht_blocking_entrylk_after_inodelk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int ret = -1;
+ loc_t *loc = NULL;
+ dht_lock_t **lk_array = NULL;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+ int count = 0;
+ dht_elock_wrap_t *entrylk = NULL;
+
+ local = frame->local;
+ entrylk = &local->current->ns.directory_ns;
+
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ goto err;
+ }
+
+ loc = &entrylk->locks[0]->loc;
+ gf_uuid_unparse(loc->gfid, pgfid);
+
+ local->op_ret = 0;
+ lk_array = entrylk->locks;
+ count = entrylk->lk_count;
+
+ ret = dht_blocking_entrylk(frame, lk_array, count,
+ dht_protect_namespace_cbk);
+
+ if (ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_ENTRYLK_FAILED_AFT_INODELK, "fop=%s",
+ gf_fop_list[local->fop], "pgfid=%s", pgfid, "basename=%s",
+ entrylk->locks[0]->basename, NULL);
+ goto err;
+ }
+
+ return 0;
+
+err:
+ if (lk_array != NULL) {
+ dht_lock_array_free(lk_array, count);
+ GF_FREE(lk_array);
+ entrylk->locks = NULL;
+ entrylk->lk_count = 0;
+ }
+
+ /* Unlock inodelk. No harm calling unlock twice */
+ dht_unlock_inodelk_wrapper(frame, &local->current->ns.parent_layout);
+ /* Call ns_cbk. It will take care of unwinding */
+ local->current->ns.ns_cbk(frame, NULL, this, local->op_ret, local->op_errno,
+ NULL);
+ return 0;
+}
+
+/* Given the loc and the subvol, this routine takes the inodelk on
+ * the parent inode and entrylk on (parent, loc->name). This routine
+ * is specific as it supports only one subvol on which it takes inodelk
+ * and then entrylk serially.
+ */
+int
+dht_protect_namespace(call_frame_t *frame, loc_t *loc, xlator_t *subvol,
+ struct dht_namespace *ns, fop_entrylk_cbk_t ns_cbk)
+{
+ dht_ilock_wrap_t *inodelk = NULL;
+ dht_elock_wrap_t *entrylk = NULL;
+ dht_lock_t **lk_array = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *this = NULL;
+ loc_t parent = {
+ 0,
+ };
+ int ret = -1;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+ int32_t op_errno = 0;
+ int count = 1;
+
+ GF_VALIDATE_OR_GOTO("dht-locks", frame, out);
+ GF_VALIDATE_OR_GOTO(frame->this->name, loc, out);
+ GF_VALIDATE_OR_GOTO(frame->this->name, loc->parent, out);
+ GF_VALIDATE_OR_GOTO(frame->this->name, subvol, out);
+
+ local = frame->local;
+ this = frame->this;
+
+ inodelk = &ns->parent_layout;
+ entrylk = &ns->directory_ns;
+
+ /* Initialize entrylk_cbk and parent loc */
+ ns->ns_cbk = ns_cbk;
+
+ ret = dht_build_parent_loc(this, &parent, loc, &op_errno);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_LOC_FAILED,
+ "gfid=%s", loc->gfid, "name=%s", loc->name, "path=%s",
+ loc->path, NULL);
+ goto out;
+ }
+ gf_uuid_unparse(parent.gfid, pgfid);
+
+ /* Alloc inodelk */
+ inodelk->locks = GF_CALLOC(count, sizeof(*lk_array), gf_common_mt_pointer);
+ if (inodelk->locks == NULL) {
+ local->op_errno = ENOMEM;
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_CALLOC_FAILED, "fop=%s", gf_fop_list[local->fop],
+ "pgfid=%s", pgfid, "name=%s", loc->name, "path=%s", loc->path,
+ NULL);
+ goto out;
+ }
+
+ inodelk->locks[0] = dht_lock_new(this, subvol, &parent, F_RDLCK,
+ DHT_LAYOUT_HEAL_DOMAIN, NULL,
+ FAIL_ON_ANY_ERROR);
+ if (inodelk->locks[0] == NULL) {
+ local->op_errno = ENOMEM;
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_LOCK_ALLOC_FAILED, "inodelk-fop=%s",
+ gf_fop_list[local->fop], "pgfid=%s", pgfid, "name=%s",
+ loc->name, "path=%s", loc->path, NULL);
+ goto err;
+ }
+ inodelk->lk_count = count;
+
+ /* Allock entrylk */
+ entrylk->locks = GF_CALLOC(count, sizeof(*lk_array), gf_common_mt_pointer);
+ if (entrylk->locks == NULL) {
+ local->op_errno = ENOMEM;
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_CALLOC_FAILED, "entrylk-fop=%s",
+ gf_fop_list[local->fop], "pgfid=%s", pgfid, "name=%s",
+ loc->name, "path=%s", loc->path, NULL);
+
+ goto err;
+ }
+
+ entrylk->locks[0] = dht_lock_new(this, subvol, &parent, F_WRLCK,
+ DHT_ENTRY_SYNC_DOMAIN, loc->name,
+ FAIL_ON_ANY_ERROR);
+ if (entrylk->locks[0] == NULL) {
+ local->op_errno = ENOMEM;
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_LOCK_ALLOC_FAILED, "entrylk-fop=%s",
+ gf_fop_list[local->fop], "pgfid=%s", pgfid, "name=%s",
+ loc->name, "path=%s", loc->path, NULL);
+
+ goto err;
+ }
+ entrylk->lk_count = count;
+
+ /* Take read inodelk on parent. If it is successful, take write entrylk
+ * on name in cbk.
+ */
+ lk_array = inodelk->locks;
+ ret = dht_blocking_inodelk(frame, lk_array, count,
+ dht_blocking_entrylk_after_inodelk);
+ if (ret < 0) {
+ local->op_errno = EIO;
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_BLOCK_INODELK_FAILED, "fop=%s", gf_fop_list[local->fop],
+ "pgfid=%s", pgfid, "name=%s", loc->name, "path=%s", loc->path,
+ NULL);
+
+ goto err;
+ }
+
+ loc_wipe(&parent);
+
+ return 0;
+err:
+ if (entrylk->locks != NULL) {
+ dht_lock_array_free(entrylk->locks, count);
+ GF_FREE(entrylk->locks);
+ entrylk->locks = NULL;
+ entrylk->lk_count = 0;
+ }
+
+ if (inodelk->locks != NULL) {
+ dht_lock_array_free(inodelk->locks, count);
+ GF_FREE(inodelk->locks);
+ inodelk->locks = NULL;
+ inodelk->lk_count = 0;
+ }
+
+ loc_wipe(&parent);
+out:
+ return -1;
+}
diff --git a/xlators/cluster/dht/src/dht-lock.h b/xlators/cluster/dht/src/dht-lock.h
new file mode 100644
index 00000000000..6485c03fb6e
--- /dev/null
+++ b/xlators/cluster/dht/src/dht-lock.h
@@ -0,0 +1,91 @@
+/*
+ Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _DHT_LOCK_H
+#define _DHT_LOCK_H
+
+#include "dht-common.h"
+
+void
+dht_lock_array_free(dht_lock_t **lk_array, int count);
+
+int32_t
+dht_lock_count(dht_lock_t **lk_array, int lk_count);
+
+dht_lock_t *
+dht_lock_new(xlator_t *this, xlator_t *xl, loc_t *loc, short type,
+ const char *domain, const char *basename,
+ dht_reaction_type_t do_on_failure);
+
+int32_t
+dht_unlock_entrylk_wrapper(call_frame_t *, dht_elock_wrap_t *);
+
+void
+dht_blocking_entrylk_rec(call_frame_t *frame, int i);
+
+int
+dht_blocking_entrylk(call_frame_t *frame, dht_lock_t **lk_array, int lk_count,
+ fop_inodelk_cbk_t entrylk_cbk);
+
+int32_t
+dht_unlock_inodelk(call_frame_t *frame, dht_lock_t **lk_array, int lk_count,
+ fop_inodelk_cbk_t inodelk_cbk);
+
+int32_t
+dht_unlock_inodelk_wrapper(call_frame_t *, dht_ilock_wrap_t *);
+
+/* Acquire non-blocking inodelk on a list of xlators.
+ *
+ * @lk_array: array of lock requests lock on.
+ *
+ * @lk_count: number of locks in @lk_array
+ *
+ * @inodelk_cbk: will be called after inodelk replies are received
+ *
+ * @retval: -1 if stack_winding inodelk fails. 0 otherwise.
+ * inodelk_cbk is called with appropriate error on errors.
+ * On failure to acquire lock on all members of list, successful
+ * locks are unlocked before invoking cbk.
+ */
+
+int
+dht_nonblocking_inodelk(call_frame_t *frame, dht_lock_t **lk_array,
+ int lk_count, fop_inodelk_cbk_t inodelk_cbk);
+
+void
+dht_blocking_inodelk_rec(call_frame_t *frame, int i);
+
+/* same as dht_nonblocking_inodelk, but issues sequential blocking locks on
+ * @lk_array directly. locks are issued on some order which remains same
+ * for a list of xlators (irrespective of order of xlators within list).
+ */
+
+int
+dht_blocking_inodelk(call_frame_t *frame, dht_lock_t **lk_array, int lk_count,
+ fop_inodelk_cbk_t inodelk_cbk);
+
+int32_t
+dht_blocking_entrylk_after_inodelk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+int32_t
+dht_blocking_entrylk_after_inodelk_rename(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+void
+dht_unlock_namespace(call_frame_t *, dht_dir_transaction_t *);
+
+int
+dht_protect_namespace(call_frame_t *frame, loc_t *loc, xlator_t *subvol,
+ struct dht_namespace *ns, fop_entrylk_cbk_t ns_cbk);
+
+#endif /* _DHT_LOCK_H */
diff --git a/xlators/cluster/dht/src/dht-mem-types.h b/xlators/cluster/dht/src/dht-mem-types.h
index 4f3a90e914a..e3c4471334a 100644
--- a/xlators/cluster/dht/src/dht-mem-types.h
+++ b/xlators/cluster/dht/src/dht-mem-types.h
@@ -8,26 +8,31 @@
cases as published by the Free Software Foundation.
*/
-
#ifndef __DHT_MEM_TYPES_H__
#define __DHT_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_dht_mem_types_ {
- gf_dht_mt_dht_du_t = gf_common_mt_end + 1,
- gf_dht_mt_dht_conf_t,
- gf_dht_mt_char,
- gf_dht_mt_int32_t,
- gf_dht_mt_xlator_t,
- gf_dht_mt_dht_layout_t,
- gf_switch_mt_dht_conf_t,
- gf_switch_mt_dht_du_t,
- gf_switch_mt_switch_sched_array,
- gf_switch_mt_switch_struct,
- gf_dht_mt_subvol_time,
- gf_dht_mt_loc_t,
- gf_defrag_info_mt,
- gf_dht_mt_end
+ gf_dht_mt_dht_du_t = gf_common_mt_end + 1,
+ gf_dht_mt_dht_conf_t,
+ gf_dht_mt_char,
+ gf_dht_mt_int32_t,
+ gf_dht_mt_xlator_t,
+ gf_dht_mt_dht_layout_t,
+ gf_switch_mt_switch_sched_array,
+ gf_switch_mt_switch_struct,
+ gf_dht_mt_subvol_time,
+ gf_dht_mt_loc_t,
+ gf_defrag_info_mt,
+ gf_dht_mt_inode_ctx_t,
+ gf_dht_mt_dirent_t,
+ gf_dht_mt_container_t,
+ gf_dht_mt_octx_t,
+ gf_dht_mt_miginfo_t,
+ gf_dht_mt_fd_ctx_t,
+ gf_dht_ret_cache_t,
+ gf_dht_nodeuuids_t,
+ gf_dht_mt_end
};
#endif
diff --git a/xlators/cluster/dht/src/dht-messages.h b/xlators/cluster/dht/src/dht-messages.h
new file mode 100644
index 00000000000..601f8dad78b
--- /dev/null
+++ b/xlators/cluster/dht/src/dht-messages.h
@@ -0,0 +1,386 @@
+/*Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _DHT_MESSAGES_H_
+#define _DHT_MESSAGES_H_
+
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(
+ DHT, DHT_MSG_CACHED_SUBVOL_GET_FAILED, DHT_MSG_CREATE_LINK_FAILED,
+ DHT_MSG_DICT_SET_FAILED, DHT_MSG_DIR_ATTR_HEAL_FAILED,
+ DHT_MSG_DIR_SELFHEAL_FAILED, DHT_MSG_DIR_SELFHEAL_XATTR_FAILED,
+ DHT_MSG_FILE_ON_MULT_SUBVOL, DHT_MSG_FILE_TYPE_MISMATCH,
+ DHT_MSG_GFID_MISMATCH, DHT_MSG_GFID_NULL, DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ DHT_MSG_INIT_FAILED, DHT_MSG_INVALID_CONFIGURATION,
+ DHT_MSG_INVALID_DISK_LAYOUT, DHT_MSG_INVALID_OPTION,
+ DHT_MSG_LAYOUT_FIX_FAILED, DHT_MSG_LAYOUT_MERGE_FAILED,
+ DHT_MSG_LAYOUT_MISMATCH, DHT_MSG_LAYOUT_NULL, DHT_MSG_MIGRATE_DATA_COMPLETE,
+ DHT_MSG_MIGRATE_DATA_FAILED, DHT_MSG_MIGRATE_FILE_COMPLETE,
+ DHT_MSG_MIGRATE_FILE_FAILED, DHT_MSG_NO_MEMORY, DHT_MSG_OPENDIR_FAILED,
+ DHT_MSG_REBALANCE_FAILED, DHT_MSG_REBALANCE_START_FAILED,
+ DHT_MSG_REBALANCE_STATUS, DHT_MSG_REBALANCE_STOPPED, DHT_MSG_RENAME_FAILED,
+ DHT_MSG_SETATTR_FAILED, DHT_MSG_SUBVOL_INSUFF_INODES,
+ DHT_MSG_SUBVOL_INSUFF_SPACE, DHT_MSG_UNLINK_FAILED,
+ DHT_MSG_LAYOUT_SET_FAILED, DHT_MSG_LOG_FIXED_LAYOUT,
+ DHT_MSG_GET_XATTR_FAILED, DHT_MSG_FILE_LOOKUP_FAILED,
+ DHT_MSG_OPEN_FD_FAILED, DHT_MSG_SET_INODE_CTX_FAILED,
+ DHT_MSG_UNLOCKING_FAILED, DHT_MSG_DISK_LAYOUT_NULL, DHT_MSG_SUBVOL_INFO,
+ DHT_MSG_CHUNK_SIZE_INFO, DHT_MSG_LAYOUT_FORM_FAILED, DHT_MSG_SUBVOL_ERROR,
+ DHT_MSG_LAYOUT_SORT_FAILED, DHT_MSG_REGEX_INFO, DHT_MSG_FOPEN_FAILED,
+ DHT_MSG_SET_HOSTNAME_FAILED, DHT_MSG_BRICK_ERROR, DHT_MSG_SYNCOP_FAILED,
+ DHT_MSG_MIGRATE_INFO, DHT_MSG_SOCKET_ERROR, DHT_MSG_CREATE_FD_FAILED,
+ DHT_MSG_READDIR_ERROR, DHT_MSG_CHILD_LOC_BUILD_FAILED,
+ DHT_MSG_SET_SWITCH_PATTERN_ERROR, DHT_MSG_COMPUTE_HASH_FAILED,
+ DHT_MSG_FIND_LAYOUT_ANOMALIES_ERROR, DHT_MSG_ANOMALIES_INFO,
+ DHT_MSG_LAYOUT_INFO, DHT_MSG_INODE_LK_ERROR, DHT_MSG_RENAME_INFO,
+ DHT_MSG_DATA_NULL, DHT_MSG_AGGREGATE_QUOTA_XATTR_FAILED,
+ DHT_MSG_UNLINK_LOOKUP_INFO, DHT_MSG_LINK_FILE_LOOKUP_INFO,
+ DHT_MSG_OPERATION_NOT_SUP, DHT_MSG_NOT_LINK_FILE_ERROR, DHT_MSG_CHILD_DOWN,
+ DHT_MSG_UUID_PARSE_ERROR, DHT_MSG_GET_DISK_INFO_ERROR,
+ DHT_MSG_INVALID_VALUE, DHT_MSG_SWITCH_PATTERN_INFO,
+ DHT_MSG_SUBVOL_OP_FAILED, DHT_MSG_LAYOUT_PRESET_FAILED,
+ DHT_MSG_INVALID_LINKFILE, DHT_MSG_FIX_LAYOUT_INFO,
+ DHT_MSG_GET_HOSTNAME_FAILED, DHT_MSG_WRITE_FAILED,
+ DHT_MSG_MIGRATE_HARDLINK_FILE_FAILED, DHT_MSG_FSYNC_FAILED,
+ DHT_MSG_SUBVOL_DECOMMISSION_INFO, DHT_MSG_BRICK_QUERY_FAILED,
+ DHT_MSG_SUBVOL_NO_LAYOUT_INFO, DHT_MSG_OPEN_FD_ON_DST_FAILED,
+ DHT_MSG_SUBVOL_NOT_FOUND, DHT_MSG_FILE_LOOKUP_ON_DST_FAILED,
+ DHT_MSG_DISK_LAYOUT_MISSING, DHT_MSG_DICT_GET_FAILED,
+ DHT_MSG_REVALIDATE_CBK_INFO, DHT_MSG_UPGRADE_BRICKS, DHT_MSG_LK_ARRAY_INFO,
+ DHT_MSG_RENAME_NOT_LOCAL, DHT_MSG_RECONFIGURE_INFO,
+ DHT_MSG_INIT_LOCAL_SUBVOL_FAILED, DHT_MSG_SYS_CALL_GET_TIME_FAILED,
+ DHT_MSG_NO_DISK_USAGE_STATUS, DHT_MSG_SUBVOL_DOWN_ERROR,
+ DHT_MSG_REBAL_THROTTLE_INFO, DHT_MSG_COMMIT_HASH_INFO,
+ DHT_MSG_REBAL_STRUCT_SET, DHT_MSG_HAS_MIGINFO, DHT_MSG_SETTLE_HASH_FAILED,
+ DHT_MSG_DEFRAG_PROCESS_DIR_FAILED, DHT_MSG_FD_CTX_SET_FAILED,
+ DHT_MSG_STALE_LOOKUP, DHT_MSG_PARENT_LAYOUT_CHANGED,
+ DHT_MSG_LOCK_MIGRATION_FAILED, DHT_MSG_LOCK_INODE_UNREF_FAILED,
+ DHT_MSG_ASPRINTF_FAILED, DHT_MSG_DIR_LOOKUP_FAILED, DHT_MSG_INODELK_FAILED,
+ DHT_MSG_LOCK_FRAME_FAILED, DHT_MSG_LOCAL_LOCK_INIT_FAILED,
+ DHT_MSG_ENTRYLK_ERROR, DHT_MSG_INODELK_ERROR, DHT_MSG_LOC_FAILED,
+ DHT_MSG_UNKNOWN_FOP, DHT_MSG_MIGRATE_FILE_SKIPPED,
+ DHT_MSG_DIR_XATTR_HEAL_FAILED, DHT_MSG_HASHED_SUBVOL_DOWN,
+ DHT_MSG_NON_HASHED_SUBVOL_DOWN, DHT_MSG_SYNCTASK_CREATE_FAILED,
+ DHT_MSG_DIR_HEAL_ABORT, DHT_MSG_MIGRATE_SKIP, DHT_MSG_FD_CREATE_FAILED,
+ DHT_MSG_DICT_NEW_FAILED, DHT_MSG_FAILED_TO_OPEN, DHT_MSG_CREATE_FAILED,
+ DHT_MSG_FILE_NOT_EXIST, DHT_MSG_CHOWN_FAILED, DHT_MSG_FALLOCATE_FAILED,
+ DHT_MSG_FTRUNCATE_FAILED, DHT_MSG_STATFS_FAILED, DHT_MSG_WRITE_CROSS,
+ DHT_MSG_NEW_TARGET_FOUND, DHT_MSG_INSUFF_MEMORY, DHT_MSG_SET_XATTR_FAILED,
+ DHT_MSG_SET_MODE_FAILED, DHT_MSG_FILE_EXISTS_IN_DEST,
+ DHT_MSG_SYMLINK_FAILED, DHT_MSG_LINKFILE_DEL_FAILED, DHT_MSG_MKNOD_FAILED,
+ DHT_MSG_MIGRATE_CLEANUP_FAILED, DHT_MSG_LOCK_MIGRATE,
+ DHT_MSG_PARENT_BUILD_FAILED, DHT_MSG_HASHED_SUBVOL_NOT_FOUND,
+ DHT_MSG_ACQUIRE_ENTRYLK_FAILED, DHT_MSG_CREATE_DST_FAILED,
+ DHT_MSG_MIGRATION_EXIT, DHT_MSG_CHANGED_DST, DHT_MSG_TRACE_FAILED,
+ DHT_MSG_WRITE_LOCK_FAILED, DHT_MSG_GETACTIVELK_FAILED, DHT_MSG_STAT_FAILED,
+ DHT_MSG_UNLINK_PERFORM_FAILED, DHT_MSG_CLANUP_SOURCE_FILE_FAILED,
+ DHT_MSG_UNLOCK_FILE_FAILED, DHT_MSG_REMOVE_XATTR_FAILED,
+ DHT_MSG_DATA_MIGRATE_ABORT, DHT_MSG_DEFRAG_NULL, DHT_MSG_PARENT_NULL,
+ DHT_MSG_GFID_NOT_PRESENT, DHT_MSG_CHILD_LOC_FAILED,
+ DHT_MSG_SET_LOOKUP_FAILED, DHT_MSG_DIR_REMOVED, DHT_MSG_FIX_NOT_COMP,
+ DHT_MSG_SUBVOL_DETER_FAILED, DHT_MSG_LOCAL_SUBVOL, DHT_MSG_NODE_UUID,
+ DHT_MSG_SIZE_FILE, DHT_MSG_GET_DATA_SIZE_FAILED,
+ DHT_MSG_PTHREAD_JOIN_FAILED, DHT_MSG_COUNTER_THREAD_CREATE_FAILED,
+ DHT_MSG_MIGRATION_INIT_QUEUE_FAILED, DHT_MSG_PAUSED_TIMEOUT, DHT_MSG_WOKE,
+ DHT_MSG_ABORT_REBALANCE, DHT_MSG_CREATE_TASK_REBAL_FAILED,
+ DHT_MSG_REBAL_ESTIMATE_NOT_AVAIL, DHT_MSG_ADD_CHOICES_ERROR,
+ DHT_MSG_GET_CHOICES_ERROR, DHT_MSG_PREPARE_STATUS_ERROR,
+ DHT_MSG_SET_CHOICE_FAILED, DHT_MSG_SET_HASHED_SUBVOL_FAILED,
+ DHT_MSG_XATTR_HEAL_NOT_POSS, DHT_MSG_LINKTO_FILE_FAILED,
+ DHT_MSG_STALE_LINKFILE_DELETE, DHT_MSG_NO_SUBVOL_FOR_LINKTO,
+ DHT_MSG_SUBVOL_RETURNED, DHT_MSG_UNKNOWN_LOCAL_XSEL, DHT_MSG_GET_XATTR_ERR,
+ DHT_MSG_ALLOC_OR_FILL_FAILED, DHT_MSG_GET_REAL_NAME_FAILED,
+ DHT_MSG_COPY_UUID_FAILED, DHT_MSG_MDS_DETER_FAILED,
+ DHT_MSG_CREATE_REBAL_FAILED, DHT_MSG_LINK_LAYOUT_FAILED,
+ DHT_MSG_NO_SUBVOL_IN_LAYOUT, DHT_MSG_MEM_ALLOC_FAILED,
+ DHT_MSG_SET_IN_PARAMS_DICT_FAILED, DHT_MSG_LOC_COPY_FAILED,
+ DHT_MSG_PARENT_LOC_FAILED, DHT_MSG_CREATE_LOCK_FAILED,
+ DHT_MSG_PREV_ATTEMPT_FAILED, DHT_MSG_REFRESH_ATTEMPT,
+ DHT_MSG_ACQUIRE_LOCK_FAILED, DHT_MSG_CREATE_STUB_FAILED,
+ DHT_MSG_WIND_LOCK_REQ_FAILED, DHT_MSG_REFRESH_FAILED,
+ DHT_MSG_CACHED_SUBVOL_ERROR, DHT_MSG_NO_LINK_SUBVOL, DHT_MSG_SET_KEY_FAILED,
+ DHT_MSG_REMOVE_LINKTO_FAILED, DHT_MSG_LAYOUT_DICT_SET_FAILED,
+ DHT_MSG_XATTR_DICT_NULL, DHT_MSG_DUMMY_ALLOC_FAILED, DHT_MSG_DICT_IS_NULL,
+ DHT_MSG_LINK_INODE_FAILED, DHT_MSG_SELFHEAL_FAILED, DHT_MSG_NO_MDS_SUBVOL,
+ DHT_MSG_LIST_XATTRS_FAILED, DHT_MSG_RESET_INTER_XATTR_FAILED,
+ DHT_MSG_MDS_DOWN_UNABLE_TO_SET, DHT_MSG_WIND_UNLOCK_FAILED,
+ DHT_MSG_COMMIT_HASH_FAILED, DHT_MSG_UNLOCK_GFID_FAILED,
+ DHT_MSG_UNLOCK_FOLLOW_ENTRYLK, DHT_MSG_COPY_FRAME_FAILED,
+ DHT_MSG_UNLOCK_FOLLOW_LOCKS, DHT_MSG_ENTRYLK_FAILED_AFT_INODELK,
+ DHT_MSG_CALLOC_FAILED, DHT_MSG_LOCK_ALLOC_FAILED,
+ DHT_MSG_BLOCK_INODELK_FAILED,
+ DHT_MSG_LOCAL_LOCKS_STORE_FAILED_UNLOCKING_FOLLOWING_ENTRYLK,
+ DHT_MSG_ALLOC_FRAME_FAILED_NOT_UNLOCKING_FOLLOWING_ENTRYLKS,
+ DHT_MSG_DST_NULL_SET_FAILED);
+
+#define DHT_MSG_FD_CTX_SET_FAILED_STR "Failed to set fd ctx"
+#define DHT_MSG_INVALID_VALUE_STR "Different dst found in the fd ctx"
+#define DHT_MSG_UNKNOWN_FOP_STR "Unknown FOP on file"
+#define DHT_MSG_OPEN_FD_ON_DST_FAILED_STR "Failed to open the fd on file"
+#define DHT_MSG_SYNCTASK_CREATE_FAILED_STR "Failed to create synctask"
+#define DHT_MSG_ASPRINTF_FAILED_STR \
+ "asprintf failed while fetching subvol from the id"
+#define DHT_MSG_HAS_MIGINFO_STR "Found miginfo in the inode ctx"
+#define DHT_MSG_FILE_LOOKUP_FAILED_STR "failed to lookup the file"
+#define DHT_MSG_INVALID_LINKFILE_STR \
+ "linkto target is different from cached-subvol. treating as destination " \
+ "subvol"
+#define DHT_MSG_GFID_MISMATCH_STR "gfid different on the target file"
+#define DHT_MSG_GET_XATTR_FAILED_STR "failed to get 'linkto' xattr"
+#define DHT_MSG_SET_INODE_CTX_FAILED_STR "failed to set inode-ctx target file"
+#define DHT_MSG_DIR_SELFHEAL_FAILED_STR "Healing of path failed"
+#define DHT_MSG_DIR_HEAL_ABORT_STR \
+ "Failed to get path from subvol. Aborting directory healing"
+#define DHT_MSG_DIR_XATTR_HEAL_FAILED_STR "xattr heal failed for directory"
+#define DHT_MSG_LOCK_INODE_UNREF_FAILED_STR \
+ "Found a NULL inode. Failed to unref the inode"
+#define DHT_MSG_DICT_SET_FAILED_STR "Failed to set dictionary value"
+#define DHT_MSG_NOT_LINK_FILE_ERROR_STR "got non-linkfile"
+#define DHT_MSG_CREATE_LINK_FAILED_STR "failed to initialize linkfile data"
+#define DHT_MSG_UNLINK_FAILED_STR "Unlinking linkfile on subvolume failed"
+#define DHT_MSG_MIGRATE_FILE_FAILED_STR "Migrate file failed"
+#define DHT_MSG_NO_MEMORY_STR "could not allocate memory for dict"
+#define DHT_MSG_SUBVOL_ERROR_STR "Failed to get linkto subvol"
+#define DHT_MSG_MIGRATE_HARDLINK_FILE_FAILED_STR "link failed on subvol"
+#define DHT_MSG_MIGRATE_FILE_SKIPPED_STR "Migration skipped"
+#define DHT_MSG_FD_CREATE_FAILED_STR "fd create failed"
+#define DHT_MSG_DICT_NEW_FAILED_STR "dict_new failed"
+#define DHT_MSG_FAILED_TO_OPEN_STR "failed to open"
+#define DHT_MSG_CREATE_FAILED_STR "failed to create"
+#define DHT_MSG_FILE_NOT_EXIST_STR "file does not exist"
+#define DHT_MSG_CHOWN_FAILED_STR "chown failed"
+#define DHT_MSG_FALLOCATE_FAILED_STR "fallocate failed"
+#define DHT_MSG_FTRUNCATE_FAILED_STR "ftruncate failed"
+#define DHT_MSG_STATFS_FAILED_STR "failed to get statfs"
+#define DHT_MSG_WRITE_CROSS_STR \
+ "write will cross min-fre-disk for file on subvol. looking for new subvol"
+#define DHT_MSG_SUBVOL_INSUFF_SPACE_STR \
+ "Could not find any subvol with space accommodating the file. Cosider " \
+ "adding bricks"
+#define DHT_MSG_NEW_TARGET_FOUND_STR "New target found for file"
+#define DHT_MSG_INSUFF_MEMORY_STR "insufficient memory"
+#define DHT_MSG_SET_XATTR_FAILED_STR "failed to set xattr"
+#define DHT_MSG_SET_MODE_FAILED_STR "failed to set mode"
+#define DHT_MSG_FILE_EXISTS_IN_DEST_STR "file exists in destination"
+#define DHT_MSG_LINKFILE_DEL_FAILED_STR "failed to delete the linkfile"
+#define DHT_MSG_SYMLINK_FAILED_STR "symlink failed"
+#define DHT_MSG_MKNOD_FAILED_STR "mknod failed"
+#define DHT_MSG_SETATTR_FAILED_STR "failed to perform setattr"
+#define DHT_MSG_MIGRATE_CLEANUP_FAILED_STR \
+ "Migrate file cleanup failed: failed to fstat file"
+#define DHT_MSG_LOCK_MIGRATE_STR "locks will be migrated for file"
+#define DHT_MSG_PARENT_BUILD_FAILED_STR \
+ "failed to build parent loc, which is needed to acquire entrylk to " \
+ "synchronize with renames on this path. Skipping migration"
+#define DHT_MSG_HASHED_SUBVOL_NOT_FOUND_STR \
+ "cannot find hashed subvol which is needed to synchronize with renames " \
+ "on this path. Skipping migration"
+#define DHT_MSG_ACQUIRE_ENTRYLK_FAILED_STR "failed to acquire entrylk on subvol"
+#define DHT_MSG_CREATE_DST_FAILED_STR "create dst failed for file"
+#define DHT_MSG_MIGRATION_EXIT_STR "Exiting migration"
+#define DHT_MSG_CHANGED_DST_STR "destination changed fo file"
+#define DHT_MSG_TRACE_FAILED_STR "Trace failed"
+#define DHT_MSG_WRITE_LOCK_FAILED_STR "write lock failed"
+#define DHT_MSG_GETACTIVELK_FAILED_STR "getactivelk failed for file"
+#define DHT_MSG_STAT_FAILED_STR "failed to do a stat"
+#define DHT_MSG_UNLINK_PERFORM_FAILED_STR "failed to perform unlink"
+#define DHT_MSG_MIGRATE_FILE_COMPLETE_STR "completed migration"
+#define DHT_MSG_CLANUP_SOURCE_FILE_FAILED_STR "failed to cleanup source file"
+#define DHT_MSG_UNLOCK_FILE_FAILED_STR "failed to unlock file"
+#define DHT_MSG_REMOVE_XATTR_FAILED_STR "remove xattr failed"
+#define DHT_MSG_SOCKET_ERROR_STR "Failed to unlink listener socket"
+#define DHT_MSG_HASHED_SUBVOL_GET_FAILED_STR "Failed to get hashed subvolume"
+#define DHT_MSG_CACHED_SUBVOL_GET_FAILED_STR "Failed to get cached subvolume"
+#define DHT_MSG_MIGRATE_DATA_FAILED_STR "migrate-data failed"
+#define DHT_MSG_DEFRAG_NULL_STR "defrag is NULL"
+#define DHT_MSG_DATA_MIGRATE_ABORT_STR \
+ "Readdirp failed. Aborting data migration for dict"
+#define DHT_MSG_LAYOUT_FIX_FAILED_STR "fix layout failed"
+#define DHT_MSG_PARENT_NULL_STR "parent is NULL"
+#define DHT_MSG_GFID_NOT_PRESENT_STR "gfid not present"
+#define DHT_MSG_CHILD_LOC_FAILED_STR "Child loc build failed"
+#define DHT_MSG_SET_LOOKUP_FAILED_STR "Failed to set lookup"
+#define DHT_MSG_DIR_LOOKUP_FAILED_STR "lookup failed"
+#define DHT_MSG_DIR_REMOVED_STR "Dir renamed or removed. Skipping"
+#define DHT_MSG_READDIR_ERROR_STR "readdir failed, Aborting fix-layout"
+#define DHT_MSG_SETTLE_HASH_FAILED_STR "Settle hash failed"
+#define DHT_MSG_DEFRAG_PROCESS_DIR_FAILED_STR "gf_defrag_process_dir failed"
+#define DHT_MSG_FIX_NOT_COMP_STR \
+ "Unable to retrieve fixlayout xattr. Assume background fix layout not " \
+ "complete"
+#define DHT_MSG_SUBVOL_DETER_FAILED_STR \
+ "local subvolume determination failed with error"
+#define DHT_MSG_LOCAL_SUBVOL_STR "local subvol"
+#define DHT_MSG_NODE_UUID_STR "node uuid"
+#define DHT_MSG_SIZE_FILE_STR "Total size files"
+#define DHT_MSG_GET_DATA_SIZE_FAILED_STR \
+ "Failed to get the total data size. Unable to estimate time to complete " \
+ "rebalance"
+#define DHT_MSG_PTHREAD_JOIN_FAILED_STR \
+ "file_counter_thread: pthread_join failed"
+#define DHT_MSG_COUNTER_THREAD_CREATE_FAILED_STR \
+ "Failed to create the file counter thread"
+#define DHT_MSG_MIGRATION_INIT_QUEUE_FAILED_STR \
+ "Failed to initialise migration queue"
+#define DHT_MSG_REBALANCE_STOPPED_STR "Received stop command on rebalance"
+#define DHT_MSG_PAUSED_TIMEOUT_STR "Request pause timer timeout"
+#define DHT_MSG_WOKE_STR "woken"
+#define DHT_MSG_ABORT_REBALANCE_STR "Aborting rebalance"
+#define DHT_MSG_REBALANCE_START_FAILED_STR \
+ "Failed to start rebalance: look up on / failed"
+#define DHT_MSG_CREATE_TASK_REBAL_FAILED_STR \
+ "Could not create task for rebalance"
+#define DHT_MSG_REBAL_ESTIMATE_NOT_AVAIL_STR \
+ "Rebalance estimates will not be available"
+#define DHT_MSG_REBALANCE_STATUS_STR "Rebalance status"
+#define DHT_MSG_DATA_NULL_STR "data value is NULL"
+#define DHT_MSG_ADD_CHOICES_ERROR_STR "Error to add choices in buffer"
+#define DHT_MSG_GET_CHOICES_ERROR_STR "Error to get choices"
+#define DHT_MSG_PREPARE_STATUS_ERROR_STR "Error to prepare status"
+#define DHT_MSG_SET_CHOICE_FAILED_STR "Failed to set full choice"
+#define DHT_MSG_AGGREGATE_QUOTA_XATTR_FAILED_STR \
+ "Failed to aggregate quota xattr"
+#define DHT_MSG_FILE_TYPE_MISMATCH_STR \
+ "path exists as a file on one subvolume and directory on another. Please " \
+ "fix it manually"
+#define DHT_MSG_LAYOUT_SET_FAILED_STR "failed to set layout for subvolume"
+#define DHT_MSG_LAYOUT_MERGE_FAILED_STR "failed to merge layouts for subvolume"
+#define DHT_MSG_SET_HASHED_SUBVOL_FAILED_STR "Failed to set hashed subvolume"
+#define DHT_MSG_XATTR_HEAL_NOT_POSS_STR \
+ "No gfid exists for path. so healing xattr is not possible"
+#define DHT_MSG_REVALIDATE_CBK_INFO_STR "Revalidate: subvolume returned -1"
+#define DHT_MSG_LAYOUT_MISMATCH_STR "Mismatching layouts"
+#define DHT_MSG_UNLINK_LOOKUP_INFO_STR "lookup_unlink retuened"
+#define DHT_MSG_LINKTO_FILE_FAILED_STR \
+ "Could not unlink the linkto file as either fd is open and/or linkto " \
+ "xattr is set"
+#define DHT_MSG_LAYOUT_PRESET_FAILED_STR \
+ "Could not set pre-set layout for subvolume"
+#define DHT_MSG_FILE_ON_MULT_SUBVOL_STR \
+ "multiple subvolumes have file (preferably rename the file in the " \
+ "backend, and do a fresh lookup"
+#define DHT_MSG_STALE_LINKFILE_DELETE_STR \
+ "attempting deletion of stale linkfile"
+#define DHT_MSG_LINK_FILE_LOOKUP_INFO_STR "Lookup on following linkfile"
+#define DHT_MSG_NO_SUBVOL_FOR_LINKTO_STR "No link subvolume for linkto"
+#define DHT_MSG_SUBVOL_RETURNED_STR "Subvolume returned -1"
+#define DHT_MSG_UNKNOWN_LOCAL_XSEL_STR "Unknown local->xsel"
+#define DHT_MSG_DICT_GET_FAILED_STR "Failed to get"
+#define DHT_MSG_UUID_PARSE_ERROR_STR "Failed to parse uuid"
+#define DHT_MSG_GET_XATTR_ERR_STR "getxattr err for dir"
+#define DHT_MSG_ALLOC_OR_FILL_FAILED_STR "alloc or fill failed"
+#define DHT_MSG_UPGRADE_BRICKS_STR \
+ "At least one of the bricks does not support this operation. Please " \
+ "upgrade all bricks"
+#define DHT_MSG_GET_REAL_NAME_FAILED_STR "Failed to get real filename"
+#define DHT_MSG_LAYOUT_NULL_STR "Layout is NULL"
+#define DHT_MSG_COPY_UUID_FAILED_STR "Failed to copy node uuid key"
+#define DHT_MSG_MDS_DETER_FAILED_STR \
+ "Cannot determine MDS, fetching xattr randomly from a subvol"
+#define DHT_MSG_HASHED_SUBVOL_DOWN_STR \
+ "MDS is down for path, so fetching xattr randomly from subvol"
+#define DHT_MSG_CREATE_REBAL_FAILED_STR \
+ "failed to create a new rebalance synctask"
+#define DHT_MSG_FIX_LAYOUT_INFO_STR "fixing the layout"
+#define DHT_MSG_OPERATION_NOT_SUP_STR "wrong directory-spread-count value"
+#define DHT_MSG_LINK_LAYOUT_FAILED_STR "failed to link the layout in inode"
+#define DHT_MSG_NO_SUBVOL_IN_LAYOUT_STR "no subvolume in layout for path"
+#define DHT_MSG_INODE_LK_ERROR_STR "mknod lock failed for file"
+#define DHT_MSG_MEM_ALLOC_FAILED_STR "mem allocation failed"
+#define DHT_MSG_PARENT_LAYOUT_CHANGED_STR \
+ "extracting in-memory layout of parent failed"
+#define DHT_MSG_SET_IN_PARAMS_DICT_FAILED_STR \
+ "setting in params dictionary failed"
+#define DHT_MSG_LOC_COPY_FAILED_STR "loc_copy failed"
+#define DHT_MSG_LOC_FAILED_STR "parent loc build failed"
+#define DHT_MSG_PARENT_LOC_FAILED_STR "locking parent failed"
+#define DHT_MSG_CREATE_LOCK_FAILED_STR "Create lock failed"
+#define DHT_MSG_PREV_ATTEMPT_FAILED_STR \
+ "mkdir loop detected. parent layout didn't change even though previous " \
+ "attempt of mkdir failed because of in-memory layout not matching with " \
+ "that on disk."
+#define DHT_MSG_REFRESH_ATTEMPT_STR \
+ "mkdir parent layout changed. Attempting a refresh and then a retry"
+#define DHT_MSG_ACQUIRE_LOCK_FAILED_STR \
+ "Acquiring lock on parent to guard against layout-change failed"
+#define DHT_MSG_CREATE_STUB_FAILED_STR "creating stub failed"
+#define DHT_MSG_WIND_LOCK_REQ_FAILED_STR \
+ "cannot wind lock request to guard parent layout"
+#define DHT_MSG_REFRESH_FAILED_STR "refreshing parent layout failed."
+#define DHT_MSG_CACHED_SUBVOL_ERROR_STR "On cached subvol"
+#define DHT_MSG_NO_LINK_SUBVOL_STR "Linkfile does not have link subvolume"
+#define DHT_MSG_SET_KEY_FAILED_STR "failed to set key"
+#define DHT_MSG_CHILD_DOWN_STR "Received CHILD_DOWN. Exiting"
+#define DHT_MSG_LOG_FIXED_LAYOUT_STR "log layout fixed"
+#define DHT_MSG_REBAL_STRUCT_SET_STR "local->rebalance already set"
+#define DHT_MSG_REMOVE_LINKTO_FAILED_STR "Removal of linkto failed at subvol"
+#define DHT_MSG_LAYOUT_DICT_SET_FAILED_STR "dht layout dict set failed"
+#define DHT_MSG_SUBVOL_INFO_STR "creating subvolume"
+#define DHT_MSG_COMPUTE_HASH_FAILED_STR "hash computation failed"
+#define DHT_MSG_INVALID_DISK_LAYOUT_STR \
+ "Invalid disk layout: Catastrophic error layout with unknown type found"
+#define DHT_MSG_LAYOUT_SORT_FAILED_STR "layout sort failed"
+#define DHT_MSG_ANOMALIES_INFO_STR "Found anomalies"
+#define DHT_MSG_XATTR_DICT_NULL_STR "xattr dictionary is NULL"
+#define DHT_MSG_DISK_LAYOUT_MISSING_STR "Disk layout missing"
+#define DHT_MSG_LAYOUT_INFO_STR "layout info"
+#define DHT_MSG_SUBVOL_NO_LAYOUT_INFO_STR "no pre-set layout for subvol"
+#define DHT_MSG_SELFHEAL_XATTR_FAILED_STR "layout setxattr failed"
+#define DHT_MSG_DIR_SELFHEAL_XATTR_FAILED_STR "Directory self heal xattr failed"
+#define DHT_MSG_DUMMY_ALLOC_FAILED_STR "failed to allocate dummy layout"
+#define DHT_MSG_DICT_IS_NULL_STR \
+ "dict is NULL, need to make sure gfids are same"
+#define DHT_MSG_ENTRYLK_ERROR_STR "acquiring entrylk after inodelk failed"
+#define DHT_MSG_NO_DISK_USAGE_STATUS_STR "no du stats"
+#define DHT_MSG_LINK_INODE_FAILED_STR "linking inode failed"
+#define DHT_MSG_SELFHEAL_FAILED_STR "Directory selfheal failed"
+#define DHT_MSG_NO_MDS_SUBVOL_STR "No mds subvol"
+#define DHT_MSG_LIST_XATTRS_FAILED_STR "failed to list xattrs"
+#define DHT_MSG_RESET_INTER_XATTR_FAILED_STR "Failed to reset internal xattr"
+#define DHT_MSG_MDS_DOWN_UNABLE_TO_SET_STR \
+ "mds subvol is down, unable to set xattr"
+#define DHT_MSG_DIR_ATTR_HEAL_FAILED_STR \
+ "Directory attr heal failed. Failed to set uid/gid"
+#define DHT_MSG_WIND_UNLOCK_FAILED_STR \
+ "Winding unlock failed: stale locks left on brick"
+#define DHT_MSG_COMMIT_HASH_FAILED_STR "Directory commit hash updaten failed"
+#define DHT_MSG_LK_ARRAY_INFO_STR "lk info"
+#define DHT_MSG_UNLOCK_GFID_FAILED_STR \
+ "unlock failed on gfid: stale lock might be left"
+#define DHT_MSG_UNLOCKING_FAILED_STR "unlocking failed"
+#define DHT_MSG_UNLOCK_FOLLOW_ENTRYLK_STR "not unlocking following entrylks"
+#define DHT_MSG_COPY_FRAME_FAILED_STR "copy frame failed"
+#define DHT_MSG_UNLOCK_FOLLOW_LOCKS_STR "not unlocking following locks"
+#define DHT_MSG_INODELK_FAILED_STR "inodelk failed on subvol"
+#define DHT_MSG_LOCK_FRAME_FAILED_STR "memory allocation failed for lock_frame"
+#define DHT_MSG_LOCAL_LOCK_INIT_FAILED_STR "dht_local_lock_init failed"
+#define DHT_MSG_ENTRYLK_FAILED_AFT_INODELK_STR \
+ "dht_blocking_entrylk failed after taking inodelk"
+#define DHT_MSG_BLOCK_INODELK_FAILED_STR "dht_blocking_inodelk failed"
+#define DHT_MSG_CALLOC_FAILED_STR "calloc failed"
+#define DHT_MSG_LOCK_ALLOC_FAILED_STR "lock allocation failed"
+#define DHT_MSG_ALLOC_FRAME_FAILED_NOT_UNLOCKING_FOLLOWING_ENTRYLKS_STR \
+ "cannot allocate a frame, not unlocking following entrylks"
+#define DHT_MSG_LOCAL_LOCKS_STORE_FAILED_UNLOCKING_FOLLOWING_ENTRYLK_STR \
+ "storing locks in local failed, not unlocking following entrylks"
+#define DHT_MSG_DST_NULL_SET_FAILED_STR \
+ "src or dst is NULL, Failed to set dictionary value"
+
+#endif /* _DHT_MESSAGES_H_ */
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index 7ac80e76580..8ba8082bd86 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -8,616 +8,1457 @@
cases as published by the Free Software Foundation.
*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
+#include "dht-common.h"
+#include <glusterfs/syscall.h>
+#include <fnmatch.h>
+#include <signal.h>
+#include <glusterfs/events.h>
+#include "glusterfs/compat-errno.h" // for ENODATA on BSD
+
+#define GF_DISK_SECTOR_SIZE 512
+#define DHT_REBALANCE_PID 4242 /* Change it if required */
+#define DHT_REBALANCE_BLKSIZE 1048576 /* 1 MB */
+#define MAX_MIGRATE_QUEUE_COUNT 500
+#define MIN_MIGRATE_QUEUE_COUNT 200
+#define MAX_REBAL_TYPE_SIZE 16
+#define FILE_CNT_INTERVAL 600 /* 10 mins */
+#define ESTIMATE_START_INTERVAL 600 /* 10 mins */
+#define HARDLINK_MIG_INPROGRESS -2
+#define SKIP_MIGRATION_FD_POSITIVE -3
+#ifndef MAX
+#define MAX(a, b) (((a) > (b)) ? (a) : (b))
#endif
-#include "dht-common.h"
-#include "xlator.h"
+#define GF_CRAWL_INDEX_MOVE(idx, sv_cnt) \
+ { \
+ idx++; \
+ idx %= sv_cnt; \
+ }
-#define GF_DISK_SECTOR_SIZE 512
-#define DHT_REBALANCE_PID 4242 /* Change it if required */
-#define DHT_REBALANCE_BLKSIZE (128 * 1024)
+uint64_t g_totalfiles = 0;
+uint64_t g_totalsize = 0;
-static int
-dht_write_with_holes (xlator_t *to, fd_t *fd, struct iovec *vec, int count,
- int32_t size, off_t offset, struct iobref *iobref)
+void
+gf_defrag_free_dir_dfmeta(struct dir_dfmeta *meta, int local_subvols_cnt)
{
- int i = 0;
- int ret = -1;
- int start_idx = 0;
- int tmp_offset = 0;
- int write_needed = 0;
- int buf_len = 0;
- int size_pending = 0;
- char *buf = NULL;
-
- /* loop through each vector */
- for (i = 0; i < count; i++) {
- buf = vec[i].iov_base;
- buf_len = vec[i].iov_len;
-
- for (start_idx = 0; (start_idx + GF_DISK_SECTOR_SIZE) <= buf_len;
- start_idx += GF_DISK_SECTOR_SIZE) {
-
- if (mem_0filled (buf + start_idx, GF_DISK_SECTOR_SIZE) != 0) {
- write_needed = 1;
- continue;
- }
+ int i = 0;
+
+ if (meta) {
+ for (i = 0; i < local_subvols_cnt; i++) {
+ if (meta->equeue)
+ gf_dirent_free(&meta->equeue[i]);
+ if (meta->lfd && meta->lfd[i])
+ fd_unref(meta->lfd[i]);
+ }
- if (write_needed) {
- ret = syncop_write (to, fd, (buf + tmp_offset),
- (start_idx - tmp_offset),
- (offset + tmp_offset),
- iobref, 0);
- /* 'path' will be logged in calling function */
- if (ret < 0) {
- gf_log (THIS->name, GF_LOG_WARNING,
- "failed to write (%s)",
- strerror (errno));
- goto out;
- }
-
- write_needed = 0;
- }
- tmp_offset = start_idx + GF_DISK_SECTOR_SIZE;
- }
+ GF_FREE(meta->equeue);
+ GF_FREE(meta->head);
+ GF_FREE(meta->iterator);
+ GF_FREE(meta->offset_var);
+ GF_FREE(meta->fetch_entries);
+ GF_FREE(meta->lfd);
+ GF_FREE(meta);
+ }
+}
- if ((start_idx < buf_len) || write_needed) {
- /* This means, last chunk is not yet written.. write it */
- ret = syncop_write (to, fd, (buf + tmp_offset),
- (buf_len - tmp_offset),
- (offset + tmp_offset), iobref, 0);
- if (ret < 0) {
- /* 'path' will be logged in calling function */
- gf_log (THIS->name, GF_LOG_WARNING,
- "failed to write (%s)",
- strerror (errno));
- goto out;
- }
- }
+void
+gf_defrag_free_container(struct dht_container *container)
+{
+ if (container) {
+ gf_dirent_entry_free(container->df_entry);
- size_pending = (size - buf_len);
- if (!size_pending)
- break;
+ if (container->parent_loc) {
+ loc_wipe(container->parent_loc);
}
- ret = size;
-out:
- return ret;
+ GF_FREE(container->parent_loc);
+ GF_FREE(container);
+ }
}
-int32_t
-gf_defrag_handle_hardlink (xlator_t *this, loc_t *loc, dict_t *xattrs,
- struct iatt *stbuf)
+void
+dht_set_global_defrag_error(gf_defrag_info_t *defrag, int ret)
{
- int32_t ret = -1;
- xlator_t *cached_subvol = NULL;
- xlator_t *hashed_subvol = NULL;
- xlator_t *linkto_subvol = NULL;
- data_t *data = NULL;
- struct iatt iatt = {0,};
- int32_t op_errno = 0;
-
- GF_VALIDATE_OR_GOTO ("defrag", loc, out);
- GF_VALIDATE_OR_GOTO ("defrag", loc->name, out);
- GF_VALIDATE_OR_GOTO ("defrag", stbuf, out);
- GF_VALIDATE_OR_GOTO ("defrag", this, out);
- GF_VALIDATE_OR_GOTO ("defrag", xattrs, out);
-
- if (uuid_is_null (loc->pargfid)) {
- gf_log ("", GF_LOG_ERROR, "loc->pargfid is NULL for "
- "%s", loc->path);
- goto out;
- }
+ LOCK(&defrag->lock);
+ {
+ defrag->global_error = ret;
+ }
+ UNLOCK(&defrag->lock);
+ return;
+}
- if (uuid_is_null (loc->gfid)) {
- gf_log ("", GF_LOG_ERROR, "loc->gfid is NULL for "
- "%s", loc->path);
- goto out;
- }
+static int
+dht_send_rebalance_event(xlator_t *this, int cmd, gf_defrag_status_t status)
+{
+ int ret = -1;
+ char *volname = NULL;
+ char *tmpstr = NULL;
+ char *ptr = NULL;
+ char *suffix = "-dht";
+ int len = 0;
- cached_subvol = dht_subvol_get_cached (this, loc->inode);
- if (!cached_subvol) {
- gf_log (this->name, GF_LOG_ERROR, "Failed to get cached subvol"
- " for %s on %s", loc->name, this->name);
- goto out;
- }
+ eventtypes_t event = EVENT_LAST;
- hashed_subvol = dht_subvol_get_hashed (this, loc);
- if (!hashed_subvol) {
- gf_log (this->name, GF_LOG_ERROR, "Failed to get hashed subvol"
- " for %s on %s", loc->name, this->name);
- goto out;
+ switch (status) {
+ case GF_DEFRAG_STATUS_COMPLETE:
+ event = EVENT_VOLUME_REBALANCE_COMPLETE;
+ break;
+ case GF_DEFRAG_STATUS_FAILED:
+ event = EVENT_VOLUME_REBALANCE_FAILED;
+ break;
+ case GF_DEFRAG_STATUS_STOPPED:
+ event = EVENT_VOLUME_REBALANCE_STOP;
+ break;
+ default:
+ break;
+ }
+
+ /* DHT volume */
+ len = strlen(this->name) - strlen(suffix);
+ tmpstr = gf_strdup(this->name);
+ if (tmpstr) {
+ ptr = tmpstr + len;
+ if (!strcmp(ptr, suffix)) {
+ tmpstr[len] = '\0';
+ volname = tmpstr;
}
+ }
- gf_log (this->name, GF_LOG_INFO, "Attempting to migrate hardlink %s "
- "with gfid %s from %s -> %s", loc->name, uuid_utoa (loc->gfid),
- cached_subvol->name, hashed_subvol->name);
- data = dict_get (xattrs, DHT_LINKFILE_KEY);
- /* set linkto on cached -> hashed if not present, else link it */
- if (!data) {
- ret = dict_set_str (xattrs, DHT_LINKFILE_KEY,
- hashed_subvol->name);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Failed to set "
- "linkto xattr in dict for %s", loc->name);
- goto out;
- }
+ if (!volname) {
+ /* Better than nothing */
+ volname = this->name;
+ }
- ret = syncop_setxattr (cached_subvol, loc, xattrs, 0);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Linkto setxattr "
- "failed %s -> %s (%s)", cached_subvol->name,
- loc->name, strerror (errno));
- goto out;
- }
- goto out;
- } else {
- linkto_subvol = dht_linkfile_subvol (this, NULL, NULL, xattrs);
- if (!linkto_subvol) {
- gf_log (this->name, GF_LOG_ERROR, "Failed to get "
- "linkto subvol for %s", loc->name);
- } else {
- hashed_subvol = linkto_subvol;
- }
+ if (event != EVENT_LAST) {
+ gf_event(event, "volume=%s", volname);
+ }
- ret = syncop_link (hashed_subvol, loc, loc);
- if (ret) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR, "link of %s -> %s"
- " failed on subvol %s (%s)", loc->name,
- uuid_utoa(loc->gfid),
- hashed_subvol->name, strerror (op_errno));
- if (op_errno != EEXIST)
- goto out;
- }
- }
- ret = syncop_lookup (hashed_subvol, loc, NULL, &iatt, NULL, NULL);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Failed lookup %s on %s (%s)"
- , loc->name, hashed_subvol->name, strerror (errno));
- goto out;
- }
+ GF_FREE(tmpstr);
+ return ret;
+}
- if (iatt.ia_nlink == stbuf->ia_nlink) {
- ret = dht_migrate_file (this, loc, cached_subvol, hashed_subvol,
- GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS);
- if (ret)
- goto out;
- }
- ret = 0;
-out:
- return ret;
+static void
+dht_strip_out_acls(dict_t *dict)
+{
+ if (dict) {
+ dict_del(dict, "trusted.SGI_ACL_FILE");
+ dict_del(dict, POSIX_ACL_ACCESS_XATTR);
+ }
}
+/*
+ return values:
+ -1 : failure
+ -2 : success
+
+Hard link migration is carried out in three stages.
+
+(Say there are n hardlinks)
+Stage 1: Setting the new hashed subvol information on the 1st hardlink
+ encountered (linkto setxattr)
+
+Stage 2: Creating hardlinks on new hashed subvol for the 2nd to (n-1)th
+ hardlink
+
+Stage 3: Physical migration of the data file for nth hardlink
+
+Why to deem "-2" as success and not "0":
+
+ dht_migrate_file expects return value "0" from _is_file_migratable if
+the file has to be migrated.
+
+ _is_file_migratable returns zero only when it is called with the
+flag "GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS".
+
+ gf_defrag_handle_hardlink calls dht_migrate_file for physical migration
+of the data file with the flag "GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS"
-static inline int
-__is_file_migratable (xlator_t *this, loc_t *loc,
- struct iatt *stbuf, dict_t *xattrs, int flags)
+Hence, gf_defrag_handle_hardlink returning "0" for success will force
+"dht_migrate_file" to migrate each of the hardlink which is not intended.
+
+For each of the three stage mentioned above "-2" will be returned and will
+be converted to "0" in dht_migrate_file.
+
+*/
+
+int32_t
+gf_defrag_handle_hardlink(xlator_t *this, loc_t *loc, int *fop_errno)
{
- int ret = -1;
+ int32_t ret = -1;
+ xlator_t *cached_subvol = NULL;
+ xlator_t *hashed_subvol = NULL;
+ xlator_t *linkto_subvol = NULL;
+ data_t *data = NULL;
+ struct iatt iatt = {
+ 0,
+ };
+ int32_t op_errno = 0;
+ dht_conf_t *conf = NULL;
+ gf_loglevel_t loglevel = 0;
+ dict_t *link_xattr = NULL;
+ dict_t *dict = NULL;
+ dict_t *xattr_rsp = NULL;
+ struct iatt stbuf = {
+ 0,
+ };
+
+ *fop_errno = EINVAL;
+
+ GF_VALIDATE_OR_GOTO("defrag", loc, out);
+ GF_VALIDATE_OR_GOTO("defrag", loc->name, out);
+ GF_VALIDATE_OR_GOTO("defrag", this, out);
+ GF_VALIDATE_OR_GOTO("defrag", this->private, out);
+
+ conf = this->private;
+
+ if (gf_uuid_is_null(loc->pargfid)) {
+ gf_msg("", GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed :"
+ "loc->pargfid is NULL for %s",
+ loc->path);
+ *fop_errno = EINVAL;
+ ret = -1;
+ goto out;
+ }
+
+ if (gf_uuid_is_null(loc->gfid)) {
+ gf_msg("", GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed :"
+ "loc->gfid is NULL for %s",
+ loc->path);
+ *fop_errno = EINVAL;
+ ret = -1;
+ goto out;
+ }
- if (IA_ISDIR (stbuf->ia_type)) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: migrate-file called on directory", loc->path);
- ret = -1;
- goto out;
+ link_xattr = dict_new();
+ if (!link_xattr) {
+ ret = -1;
+ *fop_errno = ENOMEM;
+ goto out;
+ }
+
+ /*
+ Parallel migration can lead to migration of the hard link multiple
+ times which can lead to data loss. Hence, adding a fresh lookup to
+ decide whether migration is required or not.
+
+ Elaborating the scenario for let say 10 hardlinks [link{1..10}]:
+ Let say the first hard link "link1" does the setxattr of the
+ new hashed subvolume info on the cached file. As there are multiple
+ threads working, we might have already all the links created on the
+ new hashed by the time we reach hardlink let say link5. Now the
+ number of links on hashed is equal to that of cached. Hence, file
+ migration will happen for link6.
+
+ Cached Hashed
+ --------T link6 rwxrwxrwx link6
+
+ Now post above state all the link file on the cached will be zero
+ byte linkto files. Hence, if we still do migration for the following
+ files link{7..10}, we will end up migrating 0 data leading to data
+ loss.
+ Hence, a lookup can make sure whether we need to migrate the
+ file or not.
+ */
+
+ dict = dict_new();
+ if (!dict) {
+ ret = -1;
+ *fop_errno = ENOMEM;
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
+ "could not allocate memory for dict");
+ goto out;
+ }
+
+ ret = dict_set_int32(dict, conf->link_xattr_name, 256);
+ if (ret) {
+ *fop_errno = ENOMEM;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:"
+ "%s: failed to set 'linkto' key in dict",
+ loc->path);
+ goto out;
+ }
+
+ ret = syncop_lookup(this, loc, &stbuf, NULL, dict, &xattr_rsp);
+ if (ret) {
+ /*Ignore ENOENT and ESTALE as file might have been
+ migrated already*/
+ if (-ret == ENOENT || -ret == ESTALE) {
+ ret = -2;
+ goto out;
+ }
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:%s lookup failed with ret = %d", loc->path,
+ ret);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ cached_subvol = dht_subvol_get_cached(this, loc->inode);
+ if (!cached_subvol) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed :"
+ "Failed to get cached subvol"
+ " for %s on %s",
+ loc->name, this->name);
+ *fop_errno = EINVAL;
+ ret = -1;
+ goto out;
+ }
+
+ hashed_subvol = dht_subvol_get_hashed(this, loc);
+ if (!hashed_subvol) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed :"
+ "Failed to get hashed subvol"
+ " for %s on %s",
+ loc->name, this->name);
+ *fop_errno = EINVAL;
+ ret = -1;
+ goto out;
+ }
+
+ /* Hardlink migration happens only with remove-brick. So this condition will
+ * be true only when the migration has happened. In case hardlinks are
+ * migrated for rebalance case, remove this check. Having this check here
+ * avoid redundant calls below*/
+ if (hashed_subvol == cached_subvol) {
+ ret = -2;
+ goto out;
+ }
+
+ gf_log(this->name, GF_LOG_INFO,
+ "Attempting to migrate hardlink %s "
+ "with gfid %s from %s -> %s",
+ loc->name, uuid_utoa(loc->gfid), cached_subvol->name,
+ hashed_subvol->name);
+
+ data = dict_get(xattr_rsp, conf->link_xattr_name);
+ /* set linkto on cached -> hashed if not present, else link it */
+ if (!data) {
+ ret = dict_set_str(link_xattr, conf->link_xattr_name,
+ hashed_subvol->name);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed :"
+ "Failed to set dictionary value:"
+ " key = %s for %s",
+ conf->link_xattr_name, loc->name);
+ *fop_errno = ENOMEM;
+ ret = -1;
+ goto out;
}
- if (flags == GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS) {
- ret = 0;
- goto out;
+ ret = syncop_setxattr(cached_subvol, loc, link_xattr, 0, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed :"
+ "Linkto setxattr failed %s -> %s",
+ cached_subvol->name, loc->name);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
}
- if (stbuf->ia_nlink > 1) {
- /* support for decomission */
- if (flags == GF_DHT_MIGRATE_HARDLINK) {
- ret = gf_defrag_handle_hardlink (this, loc,
- xattrs, stbuf);
- if (ret) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to migrate file with link",
- loc->path);
- }
- } else {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: file has hardlinks", loc->path);
- }
- ret = ENOTSUP;
+
+ gf_msg_debug(this->name, 0,
+ "hardlink target subvol created on %s "
+ ",cached %s, file %s",
+ hashed_subvol->name, cached_subvol->name, loc->path);
+
+ ret = -2;
+ goto out;
+ } else {
+ linkto_subvol = dht_linkfile_subvol(this, NULL, NULL, xattr_rsp);
+ if (!linkto_subvol) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SUBVOL_ERROR,
+ "Failed to get "
+ "linkto subvol for %s",
+ loc->name);
+ } else {
+ hashed_subvol = linkto_subvol;
+ }
+
+ ret = syncop_link(hashed_subvol, loc, loc, &iatt, NULL, NULL);
+ if (ret) {
+ op_errno = -ret;
+ ret = -1;
+
+ loglevel = (op_errno == EEXIST) ? GF_LOG_DEBUG : GF_LOG_ERROR;
+ gf_msg(this->name, loglevel, op_errno,
+ DHT_MSG_MIGRATE_HARDLINK_FILE_FAILED,
+ "link of %s -> %s"
+ " failed on subvol %s",
+ loc->name, uuid_utoa(loc->gfid), hashed_subvol->name);
+ if (op_errno != EEXIST) {
+ *fop_errno = op_errno;
goto out;
+ }
+ } else {
+ gf_msg_debug(this->name, 0,
+ "syncop_link successful for"
+ " hardlink %s on subvol %s, cached %s",
+ loc->path, hashed_subvol->name, cached_subvol->name);
}
+ }
- ret = 0;
+ ret = syncop_lookup(hashed_subvol, loc, &iatt, NULL, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed :Failed lookup %s on %s ", loc->name,
+ hashed_subvol->name);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ /* There is a race where on the target subvol for the hardlink
+ * (note: hash subvol for the hardlink might differ from this), some
+ * other client(non-rebalance) would have created a linkto file for that
+ * hardlink as part of lookup. So let say there are 10 hardlinks, on the
+ * 5th hardlink it self the hardlinks might have migrated. Now for
+ * (6..10th) hardlinks the cached and target would be same as the file
+ * has already migrated. Hence this check is needed */
+ if (cached_subvol == hashed_subvol) {
+ gf_msg_debug(this->name, 0,
+ "source %s and destination %s "
+ "for hardlink %s are same",
+ cached_subvol->name, hashed_subvol->name, loc->path);
+ ret = -2;
+ goto out;
+ }
+
+ if (iatt.ia_nlink == stbuf.ia_nlink) {
+ ret = dht_migrate_file(this, loc, cached_subvol, hashed_subvol,
+ GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS, fop_errno);
+ if (ret) {
+ goto out;
+ }
+ }
+ ret = -2;
out:
- return ret;
+ if (link_xattr)
+ dict_unref(link_xattr);
+
+ if (xattr_rsp)
+ dict_unref(xattr_rsp);
+
+ if (dict)
+ dict_unref(dict);
+
+ return ret;
}
-static inline int
-__dht_rebalance_create_dst_file (xlator_t *to, xlator_t *from, loc_t *loc, struct iatt *stbuf,
- dict_t *dict, fd_t **dst_fd)
+static int
+__check_file_has_hardlink(xlator_t *this, loc_t *loc, struct iatt *stbuf,
+ dict_t *xattrs, int flags, gf_defrag_info_t *defrag,
+ dht_conf_t *conf, int *fop_errno)
{
- xlator_t *this = NULL;
- int ret = -1;
- fd_t *fd = NULL;
- struct iatt new_stbuf = {0,};
-
- this = THIS;
+ int ret = 0;
- ret = dict_set_static_bin (dict, "gfid-req", stbuf->ia_gfid, 16);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: failed to set gfid in dict for create", loc->path);
- goto out;
+ if (flags == GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS) {
+ ret = 0;
+ return ret;
+ }
+ if (stbuf->ia_nlink > 1) {
+ /* support for decomission */
+ if (flags == GF_DHT_MIGRATE_HARDLINK) {
+ synclock_lock(&conf->link_lock);
+ ret = gf_defrag_handle_hardlink(this, loc, fop_errno);
+ synclock_unlock(&conf->link_lock);
+ /*
+ Returning zero will force the file to be remigrated.
+ Checkout gf_defrag_handle_hardlink for more information.
+ */
+ if (ret && ret != -2) {
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:"
+ "%s: failed to migrate file with link",
+ loc->path);
+ }
+ } else {
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migration skipped for:"
+ "%s: file has hardlinks",
+ loc->path);
+ *fop_errno = ENOTSUP;
+ ret = 1;
}
+ }
- ret = dict_set_str (dict, DHT_LINKFILE_KEY, from->name);
+ return ret;
+}
+
+/*
+ return values
+ 0 : File will be migrated
+ -2 : File will not be migrated
+ (This is the return value from gf_defrag_handle_hardlink. Checkout
+ gf_defrag_handle_hardlink for description of "returning -2")
+ -1 : failure
+*/
+static int
+__is_file_migratable(xlator_t *this, loc_t *loc, struct iatt *stbuf,
+ dict_t *xattrs, int flags, gf_defrag_info_t *defrag,
+ dht_conf_t *conf, int *fop_errno)
+{
+ int ret = -1;
+ int lock_count = 0;
+
+ if (IA_ISDIR(stbuf->ia_type)) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:"
+ "%s: migrate-file called on directory",
+ loc->path);
+ *fop_errno = EISDIR;
+ ret = -1;
+ goto out;
+ }
+
+ if (!conf->lock_migration_enabled) {
+ ret = dict_get_int32(xattrs, GLUSTERFS_POSIXLK_COUNT, &lock_count);
if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: failed to set gfid in dict for create", loc->path);
- goto out;
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:"
+ "%s: Unable to get lock count for file",
+ loc->path);
+ *fop_errno = EINVAL;
+ ret = -1;
+ goto out;
}
- fd = fd_create (loc->inode, DHT_REBALANCE_PID);
- if (!fd) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: fd create failed (destination) (%s)",
- loc->path, strerror (errno));
- ret = -1;
- goto out;
+ if (lock_count) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed: %s: File has locks."
+ " Skipping file migration",
+ loc->path);
+ *fop_errno = ENOTSUP;
+ ret = 1;
+ goto out;
}
+ }
- ret = syncop_lookup (to, loc, NULL, &new_stbuf, NULL, NULL);
- if (!ret) {
- /* File exits in the destination, check if gfid matches */
- if (uuid_compare (stbuf->ia_gfid, new_stbuf.ia_gfid) != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "file %s exits in %s with different gfid",
- loc->path, to->name);
- fd_unref (fd);
- goto out;
- }
+ /* Check if file has hardlink*/
+ ret = __check_file_has_hardlink(this, loc, stbuf, xattrs, flags, defrag,
+ conf, fop_errno);
+out:
+ return ret;
+}
+
+static int
+__dht_rebalance_create_dst_file(xlator_t *this, xlator_t *to, xlator_t *from,
+ loc_t *loc, struct iatt *stbuf, fd_t **dst_fd,
+ int *fop_errno, int file_has_holes)
+{
+ int ret = -1;
+ int ret2 = -1;
+ fd_t *fd = NULL;
+ struct iatt new_stbuf = {
+ 0,
+ };
+ struct iatt check_stbuf = {
+ 0,
+ };
+ dht_conf_t *conf = NULL;
+ dict_t *dict = NULL;
+ dict_t *xdata = NULL;
+
+ conf = this->private;
+
+ dict = dict_new();
+ if (!dict) {
+ *fop_errno = ENOMEM;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
+ "dictionary allocation failed for"
+ "path:%s",
+ loc->path);
+ goto out;
+ }
+ ret = dict_set_gfuuid(dict, "gfid-req", stbuf->ia_gfid, true);
+ if (ret) {
+ *fop_errno = ENOMEM;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "%s: failed to set dictionary value: key = gfid-req", loc->path);
+ goto out;
+ }
+
+ ret = dict_set_str(dict, conf->link_xattr_name, from->name);
+ if (ret) {
+ *fop_errno = ENOMEM;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "%s: failed to set dictionary value: key = %s ", loc->path,
+ conf->link_xattr_name);
+ goto out;
+ }
+
+ fd = fd_create(loc->inode, DHT_REBALANCE_PID);
+ if (!fd) {
+ *fop_errno = ENOMEM;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: fd create failed (destination)", loc->path);
+ goto out;
+ }
+
+ xdata = dict_new();
+ if (!xdata) {
+ *fop_errno = ENOMEM;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: dict_new failed)", loc->path);
+ goto out;
+ }
+
+ ret = dict_set_int32_sizen(xdata, GF_CLEAN_WRITE_PROTECTION, 1);
+ if (ret) {
+ *fop_errno = ENOMEM;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "%s: failed to set dictionary value: key = %s ", loc->path,
+ GF_CLEAN_WRITE_PROTECTION);
+ goto out;
+ }
+
+ ret = syncop_lookup(to, loc, &new_stbuf, NULL, xdata, NULL);
+ if (!ret) {
+ /* File exits in the destination, check if gfid matches */
+ if (gf_uuid_compare(stbuf->ia_gfid, new_stbuf.ia_gfid) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_GFID_MISMATCH,
+ "file %s exists in %s with different gfid", loc->path,
+ to->name);
+ *fop_errno = EINVAL;
+ ret = -1;
+ goto out;
}
- if ((ret == -1) && (errno != ENOENT)) {
- /* File exists in destination, but not accessible */
- gf_log (THIS->name, GF_LOG_WARNING,
- "%s: failed to lookup file (%s)",
- loc->path, strerror (errno));
- goto out;
+ }
+ if ((ret < 0) && (-ret != ENOENT)) {
+ /* File exists in destination, but not accessible */
+ gf_msg(THIS->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: failed to lookup file", loc->path);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ /* Create the destination with LINKFILE mode, and linkto xattr,
+ if the linkfile already exists, just open the file */
+ if (!ret) {
+ /*
+ * File already present, just open the file.
+ */
+ ret = syncop_open(to, loc, O_RDWR, fd, NULL, NULL);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "failed to open %s on %s", loc->path, to->name);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
}
-
- /* Create the destination with LINKFILE mode, and linkto xattr,
- if the linkfile already exists, it will just open the file */
- ret = syncop_create (to, loc, O_RDWR, DHT_LINKFILE_MODE, fd,
- dict);
+ } else {
+ ret = syncop_create(to, loc, O_RDWR, DHT_LINKFILE_MODE, fd, &new_stbuf,
+ dict, NULL);
if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to create %s on %s (%s)",
- loc->path, to->name, strerror (errno));
- goto out;
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "failed to create %s on %s", loc->path, to->name);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
}
+ }
+
+ fd_bind(fd);
+
+ /*Reason of doing lookup after create again:
+ *In the create, there is some time-gap between opening fd at the
+ *server (posix_layer) and binding it in server (incrementing fd count),
+ *so if in that time-gap, if other process sends unlink considering it
+ *as a linkto file, because inode->fd count will be 0, so file will be
+ *unlinked at the backend. And because further operations are performed
+ *on fd, so though migration will be done but will end with no file
+ *at the backend.
+ */
+
+ ret = syncop_lookup(to, loc, &check_stbuf, NULL, NULL, NULL);
+ if (!ret) {
+ if (gf_uuid_compare(stbuf->ia_gfid, check_stbuf.ia_gfid) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_GFID_MISMATCH,
+ "file %s exists in %s with different gfid,"
+ "found in lookup after create",
+ loc->path, to->name);
+ *fop_errno = EINVAL;
+ ret = -1;
+ goto out;
+ }
+ }
+
+ if (-ret == ENOENT) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: file does not exist"
+ "on %s",
+ loc->path, to->name);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_fsetattr(to, fd, stbuf, (GF_SET_ATTR_UID | GF_SET_ATTR_GID),
+ NULL, NULL, NULL, NULL);
+ if (ret < 0) {
+ *fop_errno = -ret;
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "chown failed for %s on %s", loc->path, to->name);
+ }
+
+ /* No need to bother about 0 byte size files */
+ if (stbuf->ia_size > 0) {
+ if (conf->use_fallocate && !file_has_holes) {
+ ret = syncop_fallocate(to, fd, 0, 0, stbuf->ia_size, NULL, NULL);
+ if (ret < 0) {
+ if (ret == -EOPNOTSUPP || ret == -EINVAL || ret == -ENOSYS) {
+ conf->use_fallocate = _gf_false;
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "fallocate failed for %s on %s", loc->path,
+ to->name);
+
+ *fop_errno = -ret;
+
+ /* fallocate does not release the space
+ * in some cases
+ */
+ ret2 = syncop_ftruncate(to, fd, 0, NULL, NULL, NULL, NULL);
+ if (ret2 < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret2,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "ftruncate failed for "
+ "%s on %s",
+ loc->path, to->name);
+ }
+ goto out;
+ }
+ }
+ } else {
+ ret = syncop_ftruncate(to, fd, stbuf->ia_size, NULL, NULL, NULL,
+ NULL);
+ if (ret < 0) {
+ *fop_errno = -ret;
+ gf_msg(this->name, GF_LOG_WARNING, -ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "ftruncate failed for %s on %s", loc->path, to->name);
+ }
+ }
+ }
- ret = syncop_fsetattr (to, fd, stbuf,
- (GF_SET_ATTR_UID | GF_SET_ATTR_GID),
- NULL, NULL);
- if (ret < 0)
- gf_log (this->name, GF_LOG_ERROR,
- "chown failed for %s on %s (%s)",
- loc->path, to->name, strerror (errno));
-
- if (dst_fd)
- *dst_fd = fd;
+ /* success */
+ ret = 0;
- /* success */
- ret = 0;
+ if (dst_fd)
+ *dst_fd = fd;
out:
- return ret;
-}
+ if (ret) {
+ if (fd) {
+ fd_unref(fd);
+ }
+ }
+ if (dict)
+ dict_unref(dict);
-static inline int
-__dht_check_free_space (xlator_t *to, xlator_t *from, loc_t *loc,
- struct iatt *stbuf, int flag)
-{
- struct statvfs src_statfs = {0,};
- struct statvfs dst_statfs = {0,};
- int ret = -1;
- xlator_t *this = NULL;
+ if (xdata)
+ dict_unref(xdata);
- this = THIS;
+ return ret;
+}
- ret = syncop_statfs (from, loc, &src_statfs);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to get statfs of %s on %s (%s)",
- loc->path, from->name, strerror (errno));
- goto out;
+static int
+__dht_check_free_space(xlator_t *this, xlator_t *to, xlator_t *from, loc_t *loc,
+ struct iatt *stbuf, int flag, dht_conf_t *conf,
+ gf_boolean_t *target_changed, xlator_t **new_subvol,
+ int *fop_errno)
+{
+ struct statvfs src_statfs = {
+ 0,
+ };
+ struct statvfs dst_statfs = {
+ 0,
+ };
+ int ret = -1;
+ dict_t *xdata = NULL;
+ dht_layout_t *layout = NULL;
+ uint64_t src_statfs_blocks = 1;
+ uint64_t dst_statfs_blocks = 1;
+ double dst_post_availspacepercent = 0;
+ double src_post_availspacepercent = 0;
+ uint64_t file_blocks = 0;
+ uint64_t src_total_blocks = 0;
+ uint64_t dst_total_blocks = 0;
+
+ xdata = dict_new();
+ if (!xdata) {
+ *fop_errno = ENOMEM;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
+ "failed to allocate dictionary");
+ goto out;
+ }
+
+ ret = dict_set_int8(xdata, GF_INTERNAL_IGNORE_DEEM_STATFS, 1);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Failed to set " GF_INTERNAL_IGNORE_DEEM_STATFS " in dict");
+ ret = -1;
+ *fop_errno = ENOMEM;
+ goto out;
+ }
+
+ ret = syncop_statfs(from, loc, &src_statfs, xdata, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "failed to get statfs of %s on %s", loc->path, from->name);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_statfs(to, loc, &dst_statfs, xdata, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "failed to get statfs of %s on %s", loc->path, to->name);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ gf_msg_debug(this->name, 0,
+ "min_free_disk - %f , block available - %" PRId64
+ ", block size - %lu",
+ conf->min_free_disk, dst_statfs.f_bavail, dst_statfs.f_bsize);
+
+ dst_statfs_blocks = dst_statfs.f_bavail *
+ (dst_statfs.f_frsize / GF_DISK_SECTOR_SIZE);
+
+ src_statfs_blocks = src_statfs.f_bavail *
+ (src_statfs.f_frsize / GF_DISK_SECTOR_SIZE);
+
+ dst_total_blocks = dst_statfs.f_blocks *
+ (dst_statfs.f_frsize / GF_DISK_SECTOR_SIZE);
+
+ src_total_blocks = src_statfs.f_blocks *
+ (src_statfs.f_frsize / GF_DISK_SECTOR_SIZE);
+
+ /* if force option is given, do not check for space @ dst.
+ * Check only if space is avail for the file */
+ if (flag != GF_DHT_MIGRATE_DATA)
+ goto check_avail_space;
+
+ /* Check:
+ During rebalance `migrate-data` - Destination subvol experiences
+ a `reduction` in 'blocks' of free space, at the same time source
+ subvol gains certain 'blocks' of free space. A valid check is
+ necessary here to avoid erroneous move to destination where
+ the space could be scantily available.
+ With heterogeneous brick support, an actual space comparison could
+ prevent any files being migrated to newly added bricks if they are
+ smaller then the free space available on the existing bricks.
+ */
+ if (!conf->use_fallocate) {
+ file_blocks = stbuf->ia_size + GF_DISK_SECTOR_SIZE - 1;
+ file_blocks /= GF_DISK_SECTOR_SIZE;
+
+ if (file_blocks >= dst_statfs_blocks) {
+ dst_statfs_blocks = 0;
+ } else {
+ dst_statfs_blocks -= file_blocks;
}
+ }
+
+ src_post_availspacepercent = ((src_statfs_blocks + file_blocks) * 100) /
+ src_total_blocks;
+
+ dst_post_availspacepercent = (dst_statfs_blocks * 100) / dst_total_blocks;
+
+ if (dst_post_availspacepercent < src_post_availspacepercent) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "data movement of file "
+ "{blocks:%" PRIu64
+ " name:(%s)} would result in "
+ "dst node (%s:%" PRIu64
+ ") having lower disk "
+ "space than the source node (%s:%" PRIu64
+ ")"
+ ".Skipping file.",
+ stbuf->ia_blocks, loc->path, to->name, dst_statfs_blocks,
+ from->name, src_statfs_blocks);
+
+ /* this is not a 'failure', but we don't want to
+ consider this as 'success' too :-/ */
+ *fop_errno = ENOSPC;
+ ret = 1;
+ goto out;
+ }
- ret = syncop_statfs (to, loc, &dst_statfs);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to get statfs of %s on %s (%s)",
- loc->path, to->name, strerror (errno));
- goto out;
+check_avail_space:
+ if (conf->disk_unit == 'p' && dst_statfs.f_blocks) {
+ dst_post_availspacepercent = (dst_statfs_blocks * 100) /
+ dst_total_blocks;
+
+ gf_msg_debug(this->name, 0,
+ "file : %s, post_availspacepercent"
+ " : %lf f_bavail : %" PRIu64 " min-free-disk: %lf",
+ loc->path, dst_post_availspacepercent, dst_statfs.f_bavail,
+ conf->min_free_disk);
+
+ if (dst_post_availspacepercent < conf->min_free_disk) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, 0,
+ "Write will cross min-free-disk for "
+ "file - %s on subvol - %s. Looking "
+ "for new subvol",
+ loc->path, to->name);
+
+ goto find_new_subvol;
+ } else {
+ ret = 0;
+ goto out;
}
+ }
- /* if force option is given, do not check for space @ dst.
- * Check only if space is avail for the file */
- if (flag != GF_DHT_MIGRATE_DATA)
- goto check_avail_space;
+ if (conf->disk_unit != 'p') {
+ if ((dst_statfs_blocks * GF_DISK_SECTOR_SIZE) < conf->min_free_disk) {
+ gf_msg_debug(this->name, 0,
+ "file : %s, destination frsize: %lu "
+ "f_bavail : %" PRIu64 " min-free-disk: %lf",
+ loc->path, dst_statfs.f_frsize, dst_statfs.f_bavail,
+ conf->min_free_disk);
- if (((dst_statfs.f_bavail *
- dst_statfs.f_bsize) / GF_DISK_SECTOR_SIZE) <
- (((src_statfs.f_bavail * src_statfs.f_bsize) /
- GF_DISK_SECTOR_SIZE) - stbuf->ia_blocks)) {
- gf_log (this->name, GF_LOG_WARNING,
- "data movement attempted from node (%s) with"
- " higher disk space to a node (%s) with "
- "lesser disk space (%s)", from->name,
- to->name, loc->path);
+ gf_msg(this->name, GF_LOG_WARNING, 0, 0,
+ "write will"
+ " cross min-free-disk for file - %s on subvol -"
+ " %s. looking for new subvol",
+ loc->path, to->name);
- /* this is not a 'failure', but we don't want to
- consider this as 'success' too :-/ */
- ret = 1;
- goto out;
- }
+ goto find_new_subvol;
-check_avail_space:
- if (((dst_statfs.f_bavail * dst_statfs.f_bsize) /
- GF_DISK_SECTOR_SIZE) < stbuf->ia_blocks) {
- gf_log (this->name, GF_LOG_ERROR,
- "data movement attempted from node (%s) with "
- "to node (%s) which does not have required free space"
- " for %s", from->name, to->name, loc->path);
- ret = 1;
- goto out;
+ } else {
+ ret = 0;
+ goto out;
}
+ }
+find_new_subvol:
+ layout = dht_layout_get(this, loc->parent);
+ if (!layout) {
+ gf_log(this->name, GF_LOG_ERROR, "Layout is NULL");
+ *fop_errno = EINVAL;
+ ret = -1;
+ goto out;
+ }
+
+ *new_subvol = dht_subvol_with_free_space_inodes(this, to, from, layout,
+ stbuf->ia_size);
+ if ((!(*new_subvol)) || (*new_subvol == from)) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_SUBVOL_INSUFF_SPACE,
+ "Could not find any subvol"
+ " with space accommodating the file - %s. Consider "
+ "adding bricks",
+ loc->path);
+
+ *target_changed = _gf_false;
+ *fop_errno = ENOSPC;
+ ret = -1;
+ } else {
+ gf_msg(this->name, GF_LOG_INFO, 0, 0,
+ "new target found - %s"
+ " for file - %s",
+ (*new_subvol)->name, loc->path);
+ *target_changed = _gf_true;
ret = 0;
+ }
+
out:
- return ret;
+ if (xdata)
+ dict_unref(xdata);
+ return ret;
}
-static inline int
-__dht_rebalance_migrate_data (xlator_t *from, xlator_t *to, fd_t *src, fd_t *dst,
- uint64_t ia_size, int hole_exists)
+static int
+__dht_rebalance_migrate_data(xlator_t *this, gf_defrag_info_t *defrag,
+ xlator_t *from, xlator_t *to, fd_t *src, fd_t *dst,
+ uint64_t ia_size, int hole_exists, int *fop_errno)
{
- int ret = 0;
- int count = 0;
- off_t offset = 0;
- struct iovec *vector = NULL;
- struct iobref *iobref = NULL;
- uint64_t total = 0;
- size_t read_size = 0;
-
- /* if file size is '0', no need to enter this loop */
- while (total < ia_size) {
- read_size = (((ia_size - total) > DHT_REBALANCE_BLKSIZE) ?
- DHT_REBALANCE_BLKSIZE : (ia_size - total));
- ret = syncop_readv (from, src, read_size,
- offset, 0, &vector, &count, &iobref);
- if (!ret || (ret < 0)) {
- break;
+ int ret = 0;
+ int count = 0;
+ off_t offset = 0;
+ off_t data_offset = 0;
+ off_t hole_offset = 0;
+ struct iovec *vector = NULL;
+ struct iobref *iobref = NULL;
+ uint64_t total = 0;
+ size_t read_size = 0;
+ size_t data_block_size = 0;
+ dict_t *xdata = NULL;
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ /* if file size is '0', no need to enter this loop */
+ while (total < ia_size) {
+ /* This is a regular file - read it sequentially */
+ if (!hole_exists) {
+ read_size = (((ia_size - total) > DHT_REBALANCE_BLKSIZE)
+ ? DHT_REBALANCE_BLKSIZE
+ : (ia_size - total));
+ } else {
+ /* This is a sparse file - read only the data segments in the file
+ */
+
+ /* If the previous data block is fully copied, find the next data
+ * segment
+ * starting at the offset of the last read and written byte, */
+ if (data_block_size <= 0) {
+ ret = syncop_seek(from, src, offset, GF_SEEK_DATA, NULL,
+ &data_offset);
+ if (ret) {
+ if (ret == -ENXIO)
+ ret = 0; /* No more data segments */
+ else
+ *fop_errno = -ret; /* Error occurred */
+
+ break;
}
- if (hole_exists)
- ret = dht_write_with_holes (to, dst, vector, count,
- ret, offset, iobref);
- else
- ret = syncop_writev (to, dst, vector, count,
- offset, iobref, 0);
- if (ret < 0) {
+ /* If the position of the current data segment is greater than
+ * the position of the next hole, find the next hole in order to
+ * calculate the length of the new data segment */
+ if (data_offset > hole_offset) {
+ /* Starting at the offset of the last data segment, find the
+ * next hole */
+ ret = syncop_seek(from, src, data_offset, GF_SEEK_HOLE,
+ NULL, &hole_offset);
+ if (ret) {
+ /* If an error occurred here it's a real error because
+ * if the seek for a data segment was successful then
+ * necessarily another hole must exist (EOF is a hole)
+ */
+ *fop_errno = -ret;
break;
- }
- offset += ret;
- total += ret;
+ }
- if (vector)
- GF_FREE (vector);
- if (iobref)
- iobref_unref (iobref);
- iobref = NULL;
- vector = NULL;
+ /* Calculate the total size of the current data block */
+ data_block_size = hole_offset - data_offset;
+ }
+ } else {
+ /* There is still data in the current segment, move the
+ * data_offset to the position of the last written byte */
+ data_offset = offset;
+ }
+
+ /* Calculate how much data needs to be read and written. If the data
+ * segment's length is bigger than DHT_REBALANCE_BLKSIZE, read and
+ * write DHT_REBALANCE_BLKSIZE data length and the rest in the
+ * next iteration(s) */
+ read_size = ((data_block_size > DHT_REBALANCE_BLKSIZE)
+ ? DHT_REBALANCE_BLKSIZE
+ : data_block_size);
+
+ /* Calculate the remaining size of the data block - maybe there's no
+ * need to seek for data in the next iteration */
+ data_block_size -= read_size;
+
+ /* Set offset to the offset of the data segment so read and write
+ * will have the correct position */
+ offset = data_offset;
}
- if (iobref)
- iobref_unref (iobref);
- if (vector)
- GF_FREE (vector);
- if (ret >= 0)
- ret = 0;
+ ret = syncop_readv(from, src, read_size, offset, 0, &vector, &count,
+ &iobref, NULL, NULL, NULL);
- return ret;
-}
-
-
-static inline int
-__dht_rebalance_open_src_file (xlator_t *from, xlator_t *to, loc_t *loc,
- struct iatt *stbuf, fd_t **src_fd)
-{
- int ret = 0;
- fd_t *fd = NULL;
- dict_t *dict = NULL;
- xlator_t *this = NULL;
- struct iatt iatt = {0,};
+ if (!ret || (ret < 0)) {
+ if (!ret) {
+ /* File was probably truncated*/
+ ret = -1;
+ *fop_errno = ENOSPC;
+ } else {
+ *fop_errno = -ret;
+ }
+ break;
+ }
- this = THIS;
+ if (!conf->force_migration) {
+ if (!xdata) {
+ xdata = dict_new();
+ if (!xdata) {
+ gf_msg("dht", GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "insufficient memory");
+ ret = -1;
+ *fop_errno = ENOMEM;
+ break;
+ }
- fd = fd_create (loc->inode, DHT_REBALANCE_PID);
- if (!fd) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: fd create failed (source)", loc->path);
- ret = -1;
- goto out;
+ /* Fail this write and abort rebalance if we
+ * detect a write from client since migration of
+ * this file started. This is done to avoid
+ * potential data corruption due to out of order
+ * writes from rebalance and client to the same
+ * region (as compared between src and dst
+ * files). See
+ * https://github.com/gluster/glusterfs/issues/308
+ * for more details.
+ */
+ ret = dict_set_int32_sizen(xdata, GF_AVOID_OVERWRITE, 1);
+ if (ret) {
+ gf_msg("dht", GF_LOG_ERROR, 0, ENOMEM,
+ "failed to set dict");
+ ret = -1;
+ *fop_errno = ENOMEM;
+ break;
+ }
+ }
}
- ret = syncop_open (from, loc, O_RDWR, fd);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to open file %s on %s (%s)",
- loc->path, from->name, strerror (errno));
- goto out;
+ ret = syncop_writev(to, dst, vector, count, offset, iobref, 0, NULL,
+ NULL, xdata, NULL);
+ if (ret < 0) {
+ *fop_errno = -ret;
+ break;
}
+ offset += ret;
+ total += ret;
+
+ GF_FREE(vector);
+ if (iobref)
+ iobref_unref(iobref);
+ iobref = NULL;
+ vector = NULL;
+ }
+ if (iobref)
+ iobref_unref(iobref);
+ GF_FREE(vector);
+
+ if (ret >= 0)
+ ret = 0;
+ else
ret = -1;
- dict = dict_new ();
- if (!dict)
- goto out;
- ret = dict_set_str (dict, DHT_LINKFILE_KEY, to->name);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to set xattr in dict for %s (linkto:%s)",
- loc->path, to->name);
- goto out;
- }
+ if (xdata) {
+ dict_unref(xdata);
+ }
- /* Once the migration starts, the source should have 'linkto' key set
- to show which is the target, so other clients can work around it */
- ret = syncop_setxattr (from, loc, dict, 0);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to set xattr on %s in %s (%s)",
- loc->path, from->name, strerror (errno));
- goto out;
- }
+ return ret;
+}
- /* mode should be (+S+T) to indicate migration is in progress */
- iatt.ia_prot = stbuf->ia_prot;
- iatt.ia_type = stbuf->ia_type;
- iatt.ia_prot.sticky = 1;
- iatt.ia_prot.sgid = 1;
+static int
+__dht_rebalance_open_src_file(xlator_t *this, xlator_t *from, xlator_t *to,
+ loc_t *loc, struct iatt *stbuf, fd_t **src_fd,
+ gf_boolean_t *clean_src, int *fop_errno)
+{
+ int ret = 0;
+ fd_t *fd = NULL;
+ dict_t *dict = NULL;
+ struct iatt iatt = {
+ 0,
+ };
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ *clean_src = _gf_false;
+
+ fd = fd_create(loc->inode, DHT_REBALANCE_PID);
+ if (!fd) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: fd create failed (source)", loc->path);
+ *fop_errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_open(from, loc, O_RDWR, fd, NULL, NULL);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "failed to open file %s on %s", loc->path, from->name);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
+ }
- ret = syncop_setattr (from, loc, &iatt, GF_SET_ATTR_MODE, NULL, NULL);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to set mode on %s in %s (%s)",
- loc->path, from->name, strerror (errno));
- goto out;
- }
+ fd_bind(fd);
- if (src_fd)
- *src_fd = fd;
+ if (src_fd)
+ *src_fd = fd;
- /* success */
- ret = 0;
+ ret = -1;
+ dict = dict_new();
+ if (!dict) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: Could not allocate memory for dict", loc->path);
+ *fop_errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_str(dict, conf->link_xattr_name, to->name);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to set xattr in dict for %s (linkto:%s)", loc->path,
+ to->name);
+ *fop_errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ /* Once the migration starts, the source should have 'linkto' key set
+ to show which is the target, so other clients can work around it */
+ ret = syncop_setxattr(from, loc, dict, 0, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "failed to set xattr on %s in %s", loc->path, from->name);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ /* Reset source mode/xattr if migration fails*/
+ *clean_src = _gf_true;
+
+ /* mode should be (+S+T) to indicate migration is in progress */
+ iatt.ia_prot = stbuf->ia_prot;
+ iatt.ia_type = stbuf->ia_type;
+ iatt.ia_prot.sticky = 1;
+ iatt.ia_prot.sgid = 1;
+
+ ret = syncop_setattr(from, loc, &iatt, GF_SET_ATTR_MODE, NULL, NULL, NULL,
+ NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "failed to set mode on %s in %s", loc->path, from->name);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ /* success */
+ ret = 0;
out:
- if (dict)
- dict_unref (dict);
+ if (dict)
+ dict_unref(dict);
- return ret;
+ return ret;
}
int
-migrate_special_files (xlator_t *this, xlator_t *from, xlator_t *to, loc_t *loc,
- struct iatt *buf)
+migrate_special_files(xlator_t *this, xlator_t *from, xlator_t *to, loc_t *loc,
+ struct iatt *buf, int *fop_errno)
{
- int ret = -1;
- dict_t *rsp_dict = NULL;
- dict_t *dict = NULL;
- char *link = NULL;
- struct iatt stbuf = {0,};
-
- dict = dict_new ();
- if (!dict)
- goto out;
+ int ret = -1;
+ dict_t *rsp_dict = NULL;
+ dict_t *dict = NULL;
+ char *link = NULL;
+ struct iatt stbuf = {
+ 0,
+ };
+ dht_conf_t *conf = this->private;
+
+ dict = dict_new();
+ if (!dict) {
+ *fop_errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_int32(dict, conf->link_xattr_name, 256);
+ if (ret) {
+ *fop_errno = ENOMEM;
+ ret = -1;
+ gf_log(this->name, GF_LOG_ERROR,
+ "%s: failed to set 'linkto' key in dict", loc->path);
+ goto out;
+ }
+
+ /* check in the destination if the file is link file */
+ ret = syncop_lookup(to, loc, &stbuf, NULL, dict, &rsp_dict);
+ if ((ret < 0) && (-ret != ENOENT)) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: lookup failed", loc->path);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ /* we no more require this key */
+ dict_del(dict, conf->link_xattr_name);
+
+ /* file exists in target node, only if it is 'linkfile' its valid,
+ otherwise, error out */
+ if (!ret) {
+ if (!check_is_linkfile(loc->inode, &stbuf, rsp_dict,
+ conf->link_xattr_name)) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: file exists in destination", loc->path);
+ *fop_errno = EINVAL;
+ ret = -1;
+ goto out;
+ }
- ret = dict_set_int32 (dict, DHT_LINKFILE_KEY, 256);
+ /* as file is linkfile, delete it */
+ ret = syncop_unlink(to, loc, NULL, NULL);
if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: failed to set 'linkto' key in dict", loc->path);
- goto out;
+ gf_msg(this->name, GF_LOG_WARNING, -ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: failed to delete the linkfile", loc->path);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
}
+ }
- /* check in the destination if the file is link file */
- ret = syncop_lookup (to, loc, dict, &stbuf, &rsp_dict, NULL);
- if ((ret == -1) && (errno != ENOENT)) {
- gf_log (this->name, GF_LOG_WARNING, "%s: lookup failed (%s)",
- loc->path, strerror (errno));
- goto out;
+ /* Set the gfid of the source file in dict */
+ ret = dict_set_gfuuid(dict, "gfid-req", buf->ia_gfid, true);
+ if (ret) {
+ *fop_errno = ENOMEM;
+ ret = -1;
+ gf_log(this->name, GF_LOG_ERROR,
+ "%s: failed to set gfid in dict for create", loc->path);
+ goto out;
+ }
+
+ /* Create the file in target */
+ if (IA_ISLNK(buf->ia_type)) {
+ /* Handle symlinks separately */
+ ret = syncop_readlink(from, loc, &link, buf->ia_size, NULL, NULL);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: readlink on symlink failed", loc->path);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
}
- /* we no more require this key */
- dict_del (dict, DHT_LINKFILE_KEY);
+ ret = syncop_symlink(to, loc, link, 0, dict, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret,
+ DHT_MSG_MIGRATE_FILE_FAILED, "%s: creating symlink failed",
+ loc->path);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
+ }
- /* file exists in target node, only if it is 'linkfile' its valid,
- otherwise, error out */
- if (!ret) {
- if (!check_is_linkfile (loc->inode, &stbuf, rsp_dict)) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: file exists in destination", loc->path);
- ret = -1;
- goto out;
- }
+ goto done;
+ }
- /* as file is linkfile, delete it */
- ret = syncop_unlink (to, loc);
- if (ret) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to delete the linkfile (%s)",
- loc->path, strerror (errno));
- goto out;
- }
- }
+ ret = syncop_mknod(to, loc, st_mode_from_ia(buf->ia_prot, buf->ia_type),
+ makedev(ia_major(buf->ia_rdev), ia_minor(buf->ia_rdev)),
+ 0, dict, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: mknod failed", loc->path);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
+ }
- /* Set the gfid of the source file in dict */
- ret = dict_set_static_bin (dict, "gfid-req", buf->ia_gfid, 16);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: failed to set gfid in dict for create", loc->path);
- goto out;
- }
+done:
+ ret = syncop_setattr(to, loc, buf,
+ (GF_SET_ATTR_MTIME | GF_SET_ATTR_UID |
+ GF_SET_ATTR_GID | GF_SET_ATTR_MODE),
+ NULL, NULL, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: failed to perform setattr on %s", loc->path, to->name);
+ *fop_errno = -ret;
+ }
+
+ ret = syncop_unlink(from, loc, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: unlink failed", loc->path);
+ *fop_errno = -ret;
+ ret = -1;
+ }
- /* Create the file in target */
- if (IA_ISLNK (buf->ia_type)) {
- /* Handle symlinks separately */
- ret = syncop_readlink (from, loc, &link, buf->ia_size);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: readlink on symlink failed (%s)",
- loc->path, strerror (errno));
- goto out;
- }
+out:
+ GF_FREE(link);
+ if (dict)
+ dict_unref(dict);
- ret = syncop_symlink (to, loc, link, dict);
- if (ret) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: creating symlink failed (%s)",
- loc->path, strerror (errno));
- goto out;
- }
+ if (rsp_dict)
+ dict_unref(rsp_dict);
- goto done;
- }
+ return ret;
+}
- ret = syncop_mknod (to, loc, st_mode_from_ia (buf->ia_prot,
- buf->ia_type),
- makedev (ia_major (buf->ia_rdev),
- ia_minor (buf->ia_rdev)), dict);
- if (ret) {
- gf_log (this->name, GF_LOG_WARNING, "%s: mknod failed (%s)",
- loc->path, strerror (errno));
- goto out;
- }
+static int
+__dht_migration_cleanup_src_file(xlator_t *this, loc_t *loc, fd_t *fd,
+ xlator_t *from, ia_prot_t *src_ia_prot)
+{
+ int ret = -1;
+ dht_conf_t *conf = NULL;
+ struct iatt new_stbuf = {
+ 0,
+ };
+
+ if (!this || !fd || !from || !src_ia_prot) {
+ goto out;
+ }
+
+ conf = this->private;
+
+ /*Revert source mode and xattr changes*/
+ ret = syncop_fstat(from, fd, &new_stbuf, NULL, NULL);
+ if (ret < 0) {
+ /* Failed to get the stat info */
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file cleanup failed: failed to fstat "
+ "file %s on %s ",
+ loc->path, from->name);
+ ret = -1;
+ goto out;
+ }
-done:
- ret = syncop_unlink (from, loc);
- if (ret)
- gf_log (this->name, GF_LOG_WARNING, "%s: unlink failed (%s)",
- loc->path, strerror (errno));
+ /* Remove the sticky bit and sgid bit set, reset it to 0*/
+ if (!src_ia_prot->sticky)
+ new_stbuf.ia_prot.sticky = 0;
-out:
- if (dict)
- dict_unref (dict);
+ if (!src_ia_prot->sgid)
+ new_stbuf.ia_prot.sgid = 0;
- if (rsp_dict)
- dict_unref (rsp_dict);
+ ret = syncop_fsetattr(from, fd, &new_stbuf,
+ (GF_SET_ATTR_GID | GF_SET_ATTR_MODE), NULL, NULL,
+ NULL, NULL);
- return ret;
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file cleanup failed:"
+ "%s: failed to perform fsetattr on %s ",
+ loc->path, from->name);
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_fremovexattr(from, fd, conf->link_xattr_name, 0, NULL);
+ if (ret) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "%s: failed to remove linkto xattr on %s (%s)", loc->path,
+ from->name, strerror(-ret));
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
}
/*
@@ -628,1047 +1469,3234 @@ out:
1 : not a failure, but we can't migrate data as of now
*/
int
-dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
- int flag)
+dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
+ int flag, int *fop_errno)
{
- int ret = -1;
- struct iatt new_stbuf = {0,};
- struct iatt stbuf = {0,};
- struct iatt empty_iatt = {0,};
- ia_prot_t src_ia_prot = {0,};
- fd_t *src_fd = NULL;
- fd_t *dst_fd = NULL;
- dict_t *dict = NULL;
- dict_t *xattr = NULL;
- dict_t *xattr_rsp = NULL;
- int file_has_holes = 0;
-
- gf_log (this->name, GF_LOG_INFO, "%s: attempting to move from %s to %s",
- loc->path, from->name, to->name);
-
- dict = dict_new ();
- if (!dict)
- goto out;
+ int ret = -1;
+ struct iatt new_stbuf = {
+ 0,
+ };
+ struct iatt stbuf = {
+ 0,
+ };
+ struct iatt empty_iatt = {
+ 0,
+ };
+ ia_prot_t src_ia_prot = {
+ 0,
+ };
+ fd_t *src_fd = NULL;
+ fd_t *dst_fd = NULL;
+ dict_t *dict = NULL;
+ dict_t *xattr = NULL;
+ dict_t *xattr_rsp = NULL;
+ int file_has_holes = 0;
+ dht_conf_t *conf = this->private;
+ int rcvd_enoent_from_src = 0;
+ struct gf_flock flock = {
+ 0,
+ };
+ struct gf_flock plock = {
+ 0,
+ };
+ loc_t tmp_loc = {
+ 0,
+ };
+ loc_t parent_loc = {
+ 0,
+ };
+ gf_boolean_t inodelk_locked = _gf_false;
+ gf_boolean_t entrylk_locked = _gf_false;
+ gf_boolean_t p_locked = _gf_false;
+ int lk_ret = -1;
+ gf_defrag_info_t *defrag = NULL;
+ gf_boolean_t clean_src = _gf_false;
+ gf_boolean_t clean_dst = _gf_false;
+ int log_level = GF_LOG_INFO;
+ gf_boolean_t delete_src_linkto = _gf_true;
+ lock_migration_info_t locklist;
+ dict_t *meta_dict = NULL;
+ gf_boolean_t meta_locked = _gf_false;
+ gf_boolean_t target_changed = _gf_false;
+ xlator_t *new_target = NULL;
+ xlator_t *old_target = NULL;
+ xlator_t *hashed_subvol = NULL;
+ fd_t *linkto_fd = NULL;
+ dict_t *xdata = NULL;
+
+ if (from == to) {
+ gf_msg_debug(this->name, 0,
+ "destination and source are same. file %s"
+ " might have migrated already",
+ loc->path);
+ ret = 0;
+ goto out;
+ }
- ret = dict_set_int32 (dict, DHT_LINKFILE_KEY, 256);
+ gf_log(this->name, log_level, "%s: attempting to move from %s to %s",
+ loc->path, from->name, to->name);
+
+ dict = dict_new();
+ if (!dict) {
+ ret = -1;
+ *fop_errno = ENOMEM;
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
+ "Could not allocate memory for dict");
+ goto out;
+ }
+ ret = dict_set_int32(dict, conf->link_xattr_name, 256);
+ if (ret) {
+ *fop_errno = ENOMEM;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:"
+ "%s: failed to set 'linkto' key in dict",
+ loc->path);
+ goto out;
+ }
+
+ /* Do not migrate file in case lock migration is not enabled on the
+ * volume*/
+ if (!conf->lock_migration_enabled) {
+ ret = dict_set_int32(dict, GLUSTERFS_POSIXLK_COUNT, sizeof(int32_t));
if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: failed to set 'linkto' key in dict", loc->path);
- goto out;
+ *fop_errno = ENOMEM;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed: %s: failed to "
+ "set " GLUSTERFS_POSIXLK_COUNT " key in dict",
+ loc->path);
+ goto out;
+ }
+ } else {
+ gf_msg(this->name, GF_LOG_INFO, 0, 0,
+ "locks will be migrated"
+ " for file: %s",
+ loc->path);
+ }
+
+ /* The file is locked to prevent a rename during a migration. Renames
+ * and migrations on the file at the same time can lead to data loss.
+ */
+
+ ret = dht_build_parent_loc(this, &parent_loc, loc, fop_errno);
+ if (ret < 0) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_WARNING, *fop_errno,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: failed to build parent loc, which is needed to "
+ "acquire entrylk to synchronize with renames on this "
+ "path. Skipping migration",
+ loc->path);
+ goto out;
+ }
+
+ hashed_subvol = dht_subvol_get_hashed(this, loc);
+ if (hashed_subvol == NULL) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_WARNING, EINVAL, DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: cannot find hashed subvol which is needed to "
+ "synchronize with renames on this path. "
+ "Skipping migration",
+ loc->path);
+ goto out;
+ }
+
+ flock.l_type = F_WRLCK;
+
+ tmp_loc.inode = inode_ref(loc->inode);
+ gf_uuid_copy(tmp_loc.gfid, loc->gfid);
+ tmp_loc.path = gf_strdup(loc->path);
+
+ /* this inodelk happens with flock.owner being zero. But to synchronize
+ * hardlink migration we need to have different lkowner for each migration
+ * Filed a bug here: https://bugzilla.redhat.com/show_bug.cgi?id=1468202 to
+ * track the fix for this. Currently synclock takes care of synchronizing
+ * hardlink migration. Once this bug is fixed we can avoid taking synclock
+ */
+ ret = syncop_inodelk(from, DHT_FILE_MIGRATE_DOMAIN, &tmp_loc, F_SETLKW,
+ &flock, NULL, NULL);
+ if (ret < 0) {
+ *fop_errno = -ret;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_WARNING, *fop_errno,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "migrate file failed: "
+ "%s: failed to lock file on %s",
+ loc->path, from->name);
+ goto out;
+ }
+
+ inodelk_locked = _gf_true;
+
+ /* dht_rename has changed to use entrylk on hashed subvol for
+ * synchronization. So, rebalance too has to acquire an entrylk on
+ * hashed subvol.
+ */
+ ret = syncop_entrylk(hashed_subvol, DHT_ENTRY_SYNC_DOMAIN, &parent_loc,
+ loc->name, ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL, NULL);
+ if (ret < 0) {
+ *fop_errno = -ret;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_WARNING, *fop_errno,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: failed to acquire entrylk on subvol %s", loc->path,
+ hashed_subvol->name);
+ goto out;
+ }
+
+ entrylk_locked = _gf_true;
+
+ /* Phase 1 - Data migration is in progress from now on */
+ ret = syncop_lookup(from, loc, &stbuf, NULL, dict, &xattr_rsp);
+ if (ret) {
+ *fop_errno = -ret;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, *fop_errno,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:"
+ "%s: lookup failed on %s",
+ loc->path, from->name);
+ goto out;
+ }
+
+ /* preserve source mode, so set the same to the destination */
+ src_ia_prot = stbuf.ia_prot;
+
+ /* Check if file can be migrated */
+ ret = __is_file_migratable(this, loc, &stbuf, xattr_rsp, flag, defrag, conf,
+ fop_errno);
+ if (ret) {
+ if (ret == HARDLINK_MIG_INPROGRESS)
+ ret = 0;
+ goto out;
+ }
+
+ /* Take care of the special files */
+ if (!IA_ISREG(stbuf.ia_type)) {
+ /* Special files */
+ ret = migrate_special_files(this, from, to, loc, &stbuf, fop_errno);
+ goto out;
+ }
+
+ /* Try to preserve 'holes' while migrating data */
+ if (stbuf.ia_size > (stbuf.ia_blocks * GF_DISK_SECTOR_SIZE))
+ file_has_holes = 1;
+
+ /* create the destination, with required modes/xattr */
+ ret = __dht_rebalance_create_dst_file(this, to, from, loc, &stbuf, &dst_fd,
+ fop_errno, file_has_holes);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "Create dst failed"
+ " on - %s for file - %s",
+ to->name, loc->path);
+ goto out;
+ }
+
+ clean_dst = _gf_true;
+
+ ret = __dht_check_free_space(this, to, from, loc, &stbuf, flag, conf,
+ &target_changed, &new_target, fop_errno);
+ if (target_changed) {
+ /* Can't handle for hardlinks. Marking this as failure */
+ if (flag == GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS || stbuf.ia_nlink > 1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SUBVOL_INSUFF_SPACE,
+ "Exiting migration for"
+ " file - %s. flag - %d, stbuf.ia_nlink - %d",
+ loc->path, flag, stbuf.ia_nlink);
+ ret = -1;
+ goto out;
}
- /* Phase 1 - Data migration is in progress from now on */
- ret = syncop_lookup (from, loc, dict, &stbuf, &xattr_rsp, NULL);
+ ret = syncop_ftruncate(to, dst_fd, 0, NULL, NULL, NULL, NULL);
if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "%s: lookup failed on %s (%s)",
- loc->path, from->name, strerror (errno));
- goto out;
+ gf_log(this->name, GF_LOG_WARNING,
+ "%s: failed to perform truncate on %s (%s)", loc->path,
+ to->name, strerror(-ret));
}
- /* we no more require this key */
- dict_del (dict, DHT_LINKFILE_KEY);
+ syncop_close(dst_fd);
+ dst_fd = NULL;
- /* preserve source mode, so set the same to the destination */
- src_ia_prot = stbuf.ia_prot;
+ old_target = to;
+ to = new_target;
- /* Check if file can be migrated */
- ret = __is_file_migratable (this, loc, &stbuf, xattr_rsp, flag);
- if (ret)
- goto out;
+ clean_dst = _gf_false;
- /* Take care of the special files */
- if (!IA_ISREG (stbuf.ia_type)) {
- /* Special files */
- ret = migrate_special_files (this, from, to, loc, &stbuf);
- goto out;
+ /* if the file migration is successful to this new target, then
+ * update the xattr on the old destination to point the new
+ * destination. We need to do update this only post migration
+ * as in case of failure the linkto needs to point to the source
+ * subvol */
+ ret = __dht_rebalance_create_dst_file(
+ this, to, from, loc, &stbuf, &dst_fd, fop_errno, file_has_holes);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Create dst failed"
+ " on - %s for file - %s",
+ to->name, loc->path);
+ goto out;
+ } else {
+ gf_msg(this->name, GF_LOG_INFO, 0, 0,
+ "destination for file "
+ "- %s is changed to - %s",
+ loc->path, to->name);
+ clean_dst = _gf_true;
}
+ }
+
+ if (ret) {
+ goto out;
+ }
+
+ /* Open the source, and also update mode/xattr */
+ ret = __dht_rebalance_open_src_file(this, from, to, loc, &stbuf, &src_fd,
+ &clean_src, fop_errno);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed: failed to open %s on %s", loc->path,
+ from->name);
+ goto out;
+ }
+
+ /* TODO: move all xattr related operations to fd based operations */
+ ret = syncop_listxattr(from, loc, &xattr, NULL, NULL);
+ if (ret < 0) {
+ *fop_errno = -ret;
+ gf_msg(this->name, GF_LOG_WARNING, *fop_errno,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:"
+ "%s: failed to get xattr from %s",
+ loc->path, from->name);
+ ret = -1;
+ goto out;
+ }
+
+ /* Copying posix acls to the linkto file messes up the permissions*/
+ dht_strip_out_acls(xattr);
+
+ /* Remove the linkto xattr as we don't want to overwrite the value
+ * set on the dst.
+ */
+ dict_del(xattr, conf->link_xattr_name);
+
+ /* We need to error out if this fails as having the wrong shard xattrs
+ * set on the dst could cause data corruption
+ */
+ ret = syncop_fsetxattr(to, dst_fd, xattr, 0, NULL, NULL);
+ if (ret < 0) {
+ *fop_errno = -ret;
+ gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: failed to set xattr on %s", loc->path, to->name);
+ ret = -1;
+ goto out;
+ }
- /* create the destination, with required modes/xattr */
- ret = __dht_rebalance_create_dst_file (to, from, loc, &stbuf,
- dict, &dst_fd);
- if (ret)
- goto out;
+ if (xattr_rsp) {
+ /* we no more require this key */
+ dict_del(dict, conf->link_xattr_name);
+ dict_unref(xattr_rsp);
+ }
+
+ ret = syncop_fstat(from, src_fd, &stbuf, dict, &xattr_rsp);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:failed to lookup %s on %s ", loc->path,
+ from->name);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ /* Check again if file has hardlink */
+ ret = __check_file_has_hardlink(this, loc, &stbuf, xattr_rsp, flag, defrag,
+ conf, fop_errno);
+ if (ret) {
+ if (ret == HARDLINK_MIG_INPROGRESS)
+ ret = 0;
+ goto out;
+ }
+
+ ret = __dht_rebalance_migrate_data(this, defrag, from, to, src_fd, dst_fd,
+ stbuf.ia_size, file_has_holes,
+ fop_errno);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed: %s: failed to migrate data", loc->path);
- ret = __dht_check_free_space (to, from, loc, &stbuf, flag);
- if (ret) {
- goto out;
+ ret = -1;
+ goto out;
+ }
+
+ /* TODO: Sync the locks */
+
+ xdata = dict_new();
+ if (!xdata || dict_set_int8(xdata, "last-fsync", 1)) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "%s: failed to set last-fsync flag on "
+ "%s (%s)",
+ loc->path, to->name, strerror(ENOMEM));
+ }
+
+ ret = syncop_fsync(to, dst_fd, 0, NULL, NULL, xdata, NULL);
+ if (ret) {
+ gf_log(this->name, GF_LOG_WARNING, "%s: failed to fsync on %s (%s)",
+ loc->path, to->name, strerror(-ret));
+ *fop_errno = -ret;
+ }
+
+ /* Phase 2 - Data-Migration Complete, Housekeeping updates pending */
+
+ ret = syncop_fstat(from, src_fd, &new_stbuf, NULL, NULL);
+ if (ret < 0) {
+ /* Failed to get the stat info */
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed: failed to fstat file %s on %s ", loc->path,
+ from->name);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ /* Lock the entire source file to prevent clients from taking a
+ lock on it as dht_lk does not handle file migration.
+
+ This still leaves a small window where conflicting locks can
+ be granted to different clients. If client1 requests a blocking
+ lock on the src file, it will be granted after the migrating
+ process releases its lock. If client2 requests a lock on the dst
+ data file, it will also be granted, but all FOPs will be redirected
+ to the dst data file.
+ */
+
+ /* Take meta lock */
+
+ if (conf->lock_migration_enabled) {
+ meta_dict = dict_new();
+ if (!meta_dict) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "dict_new failed");
+
+ *fop_errno = ENOMEM;
+ ret = -1;
+ goto out;
}
- /* Open the source, and also update mode/xattr */
- ret = __dht_rebalance_open_src_file (from, to, loc, &stbuf, &src_fd);
+ ret = dict_set_str(meta_dict, GLUSTERFS_INTERNAL_FOP_KEY, "yes");
if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "failed to open %s on %s",
- loc->path, from->name);
- goto out;
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value: key = %s,"
+ " path = %s",
+ GLUSTERFS_INTERNAL_FOP_KEY, loc->path);
+ *fop_errno = ENOMEM;
+ ret = -1;
+ goto out;
}
- ret = syncop_fstat (from, src_fd, &stbuf);
+ ret = dict_set_int32(meta_dict, GF_META_LOCK_KEY, 1);
if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "failed to lookup %s on %s (%s)",
- loc->path, from->name, strerror (errno));
- goto out;
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Trace dict_set failed");
+ *fop_errno = ENOMEM;
+ ret = -1;
+ goto out;
}
- /* Try to preserve 'holes' while migrating data */
- if (stbuf.ia_size > (stbuf.ia_blocks * GF_DISK_SECTOR_SIZE))
- file_has_holes = 1;
-
- /* All I/O happens in this function */
- ret = __dht_rebalance_migrate_data (from, to, src_fd, dst_fd,
- stbuf.ia_size, file_has_holes);
+ ret = syncop_setxattr(from, loc, meta_dict, 0, NULL, NULL);
if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "%s: failed to migrate data",
- loc->path);
- /* reset the destination back to 0 */
- ret = syncop_ftruncate (to, dst_fd, 0);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: failed to reset target size back to 0 (%s)",
- loc->path, strerror (errno));
- }
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Trace syncop_setxattr metalock failed");
- ret = -1;
- goto out;
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
+ } else {
+ meta_locked = _gf_true;
}
+ }
- /* TODO: move all xattr related operations to fd based operations */
- ret = syncop_listxattr (from, loc, &xattr);
- if (ret == -1)
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to get xattr from %s (%s)",
- loc->path, from->name, strerror (errno));
+ if (!conf->lock_migration_enabled) {
+ plock.l_type = F_WRLCK;
+ plock.l_start = 0;
+ plock.l_len = 0;
+ plock.l_whence = SEEK_SET;
- ret = syncop_setxattr (to, loc, xattr, 0);
- if (ret == -1)
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to set xattr on %s (%s)",
- loc->path, to->name, strerror (errno));
+ ret = syncop_lk(from, src_fd, F_SETLK, &plock, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:"
+ "%s: Failed to lock on %s",
+ loc->path, from->name);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
+ }
- /* TODO: Sync the locks */
+ p_locked = _gf_true;
- ret = syncop_fsync (to, dst_fd);
- if (ret)
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to fsync on %s (%s)",
- loc->path, to->name, strerror (errno));
+ } else {
+ INIT_LIST_HEAD(&locklist.list);
+ ret = syncop_getactivelk(from, loc, &locklist, NULL, NULL);
+ if (ret == 0) {
+ gf_log(this->name, GF_LOG_INFO, "No active locks on:%s", loc->path);
- /* Phase 2 - Data-Migration Complete, Housekeeping updates pending */
+ } else if (ret > 0) {
+ ret = syncop_setactivelk(to, loc, &locklist, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_LOCK_MIGRATION_FAILED, "write lock failed on:%s",
+ loc->path);
- ret = syncop_fstat (from, src_fd, &new_stbuf);
- if (ret < 0) {
- /* Failed to get the stat info */
- gf_log (this->name, GF_LOG_ERROR,
- "failed to fstat file %s on %s (%s)",
- loc->path, from->name, strerror (errno));
- goto out;
+ *fop_errno = -ret;
+ ret = -1;
+ goto metaunlock;
+ }
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_LOCK_MIGRATION_FAILED,
+ "getactivelk failed for file: %s", loc->path);
+ *fop_errno = -ret;
}
-
- /* source would have both sticky bit and sgid bit set, reset it to 0,
- and set the source permission on destination, if it was not set
- prior to setting rebalance-modes in source */
- if (!src_ia_prot.sticky)
- new_stbuf.ia_prot.sticky = 0;
-
- if (!src_ia_prot.sgid)
- new_stbuf.ia_prot.sgid = 0;
-
- /* TODO: if the source actually had sticky bit, or sgid bit set,
- we are not handling it */
-
- ret = syncop_fsetattr (to, dst_fd, &new_stbuf,
- (GF_SET_ATTR_UID | GF_SET_ATTR_GID |
- GF_SET_ATTR_MODE), NULL, NULL);
+ }
+
+ /* source would have both sticky bit and sgid bit set, reset it to 0,
+ and set the source permission on destination, if it was not set
+ prior to setting rebalance-modes in source */
+ if (!src_ia_prot.sticky)
+ new_stbuf.ia_prot.sticky = 0;
+
+ if (!src_ia_prot.sgid)
+ new_stbuf.ia_prot.sgid = 0;
+
+ /* TODO: if the source actually had sticky bit, or sgid bit set,
+ we are not handling it */
+
+ ret = syncop_fsetattr(
+ to, dst_fd, &new_stbuf,
+ (GF_SET_ATTR_UID | GF_SET_ATTR_GID | GF_SET_ATTR_MODE), NULL, NULL,
+ NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:"
+ "%s: failed to perform setattr on %s ",
+ loc->path, to->name);
+ *fop_errno = -ret;
+ ret = -1;
+ goto metaunlock;
+ }
+
+ /* Because 'futimes' is not portable */
+ ret = syncop_setattr(to, loc, &new_stbuf,
+ (GF_SET_ATTR_MTIME | GF_SET_ATTR_ATIME), NULL, NULL,
+ NULL, NULL);
+ if (ret) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "%s: failed to perform setattr on %s ", loc->path, to->name);
+ *fop_errno = -ret;
+ }
+
+ if (target_changed) {
+ dict_del(dict, GLUSTERFS_POSIXLK_COUNT);
+ ret = dict_set_str(dict, conf->link_xattr_name, to->name);
if (ret) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to perform setattr on %s (%s)",
- loc->path, to->name, strerror (errno));
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to set xattr in dict for %s (linkto:%s)", loc->path,
+ to->name);
+ *fop_errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_setxattr(old_target, loc, dict, 0, NULL, NULL);
+ if (ret && -ret != ESTALE && -ret != ENOENT) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "failed to set xattr on %s in %s", loc->path,
+ old_target->name);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
+ } else if (-ret == ESTALE || -ret == ENOENT) {
+ /* The failure ESTALE indicates that the linkto
+ * file on the hashed subvol might have been deleted.
+ * In this case will create a linkto file with new target
+ * as linkto xattr value*/
+ linkto_fd = fd_create(loc->inode, DHT_REBALANCE_PID);
+ if (!linkto_fd) {
+ gf_msg(this->name, GF_LOG_ERROR, errno,
+ DHT_MSG_MIGRATE_FILE_FAILED, "%s: fd create failed",
+ loc->path);
+ *fop_errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+ ret = syncop_create(old_target, loc, O_RDWR, DHT_LINKFILE_MODE,
+ linkto_fd, NULL, dict, NULL);
+ if (ret != 0 && -ret != EEXIST && -ret != ESTALE) {
+ *fop_errno = -ret;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "failed to create linkto file on %s in %s", loc->path,
+ old_target->name);
goto out;
+ } else if (ret == 0) {
+ ret = syncop_fsetattr(old_target, linkto_fd, &stbuf,
+ (GF_SET_ATTR_UID | GF_SET_ATTR_GID), NULL,
+ NULL, NULL, NULL);
+ if (ret < 0) {
+ *fop_errno = -ret;
+ gf_msg(this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "chown failed for %s on %s", loc->path,
+ old_target->name);
+ }
+ }
+ }
+ }
+
+ clean_dst = _gf_false;
+
+ /* Posix acls are not set on DHT linkto files as part of the initial
+ * initial xattrs set on the dst file, so these need
+ * to be set on the dst file after the linkto attrs are removed.
+ * TODO: Optimize this.
+ */
+ if (xattr) {
+ dict_unref(xattr);
+ xattr = NULL;
+ }
+
+ /* Set only the Posix ACLs this time */
+ ret = syncop_getxattr(from, loc, &xattr, POSIX_ACL_ACCESS_XATTR, NULL,
+ NULL);
+ if (ret < 0) {
+ if ((-ret != ENODATA) && (-ret != ENOATTR)) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:"
+ "%s: failed to get xattr from %s",
+ loc->path, from->name);
+ *fop_errno = -ret;
}
+ } else {
+ ret = syncop_setxattr(to, loc, xattr, 0, NULL, NULL);
+ if (ret < 0) {
+ /* Potential problem here where Posix ACLs will
+ * not be set on the target file */
+
+ gf_msg(this->name, GF_LOG_WARNING, -ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:"
+ "%s: failed to set xattr on %s",
+ loc->path, to->name);
+ *fop_errno = -ret;
+ }
+ }
+
+ /* The src file is being unlinked after this so we don't need
+ to clean it up */
+ clean_src = _gf_false;
+
+ /* Make the source as a linkfile first before deleting it */
+ empty_iatt.ia_prot.sticky = 1;
+ ret = syncop_fsetattr(from, src_fd, &empty_iatt, GF_SET_ATTR_MODE, NULL,
+ NULL, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:"
+ "%s: failed to perform setattr on %s ",
+ loc->path, from->name);
+ *fop_errno = -ret;
+ ret = -1;
+ goto metaunlock;
+ }
+
+ /* Free up the data blocks on the source node, as the whole
+ file is migrated */
+ ret = syncop_ftruncate(from, src_fd, 0, NULL, NULL, NULL, NULL);
+ if (ret) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "%s: failed to perform truncate on %s (%s)", loc->path,
+ from->name, strerror(-ret));
+ *fop_errno = -ret;
+ }
+
+ /* remove the 'linkto' xattr from the destination */
+ ret = syncop_fremovexattr(to, dst_fd, conf->link_xattr_name, 0, NULL);
+ if (ret) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "%s: failed to perform removexattr on %s (%s)", loc->path,
+ to->name, strerror(-ret));
+ *fop_errno = -ret;
+ }
+
+ /* Do a stat and check the gfid before unlink */
+
+ /*
+ * Cached file changes its state from non-linkto to linkto file after
+ * migrating data. If lookup from any other mount-point is performed,
+ * converted-linkto-cached file will be treated as a stale and will be
+ * unlinked. But by this time, file is already migrated. So further
+ * failure because of ENOENT should not be treated as error
+ */
+
+ ret = syncop_stat(from, loc, &empty_iatt, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: failed to do a stat on %s", loc->path, from->name);
+
+ if (-ret != ENOENT) {
+ *fop_errno = -ret;
+ ret = -1;
+ goto metaunlock;
+ }
+
+ rcvd_enoent_from_src = 1;
+ }
- /* Because 'futimes' is not portable */
- ret = syncop_setattr (to, loc, &new_stbuf,
- (GF_SET_ATTR_MTIME | GF_SET_ATTR_ATIME),
- NULL, NULL);
+ if ((gf_uuid_compare(empty_iatt.ia_gfid, loc->gfid) == 0) &&
+ (!rcvd_enoent_from_src) && delete_src_linkto) {
+ /* take out the source from namespace */
+ ret = syncop_unlink(from, loc, NULL, NULL);
if (ret) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to perform setattr on %s (%s)",
- loc->path, to->name, strerror (errno));
+ gf_msg(this->name, GF_LOG_WARNING, -ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: failed to perform unlink on %s", loc->path, from->name);
+ *fop_errno = -ret;
+ ret = -1;
+ goto metaunlock;
}
+ }
+
+ ret = syncop_lookup(this, loc, NULL, NULL, NULL, NULL);
+ if (ret) {
+ gf_msg_debug(this->name, -ret,
+ "%s: failed to lookup the file on subvolumes", loc->path);
+ *fop_errno = -ret;
+ }
+
+ gf_msg(this->name, log_level, 0, DHT_MSG_MIGRATE_FILE_COMPLETE,
+ "completed migration of %s from subvolume %s to %s", loc->path,
+ from->name, to->name);
+
+ ret = 0;
+
+metaunlock:
- /* Make the source as a linkfile first before deleting it */
- empty_iatt.ia_prot.sticky = 1;
- ret = syncop_fsetattr (from, src_fd, &empty_iatt,
- GF_SET_ATTR_MODE, NULL, NULL);
+ if (conf->lock_migration_enabled && meta_locked) {
+ dict_del(meta_dict, GF_META_LOCK_KEY);
+
+ ret = dict_set_int32(meta_dict, GF_META_UNLOCK_KEY, 1);
if (ret) {
- gf_log (this->name, GF_LOG_WARNING, \
- "%s: failed to perform setattr on %s (%s)",
- loc->path, from->name, strerror (errno));
- goto out;
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Trace dict_set failed");
+
+ *fop_errno = ENOMEM;
+ ret = -1;
+ goto out;
}
- /* Do a stat and check the gfid before unlink */
- ret = syncop_stat (from, loc, &empty_iatt);
+ if (clean_dst == _gf_false)
+ ret = dict_set_int32(meta_dict, "status", 1);
+ else
+ ret = dict_set_int32(meta_dict, "status", 0);
+
if (ret) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to do a stat on %s (%s)",
- loc->path, from->name, strerror (errno));
- goto out;
- }
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Trace dict_set failed");
- if (uuid_compare (empty_iatt.ia_gfid, loc->gfid) == 0) {
- /* take out the source from namespace */
- ret = syncop_unlink (from, loc);
- if (ret) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to perform unlink on %s (%s)",
- loc->path, from->name, strerror (errno));
- goto out;
- }
+ *fop_errno = ENOMEM;
+ ret = -1;
+ goto out;
}
- /* Free up the data blocks on the source node, as the whole
- file is migrated */
- ret = syncop_ftruncate (from, src_fd, 0);
+ ret = syncop_setxattr(from, loc, meta_dict, 0, NULL, NULL);
if (ret) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to perform truncate on %s (%s)",
- loc->path, from->name, strerror (errno));
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Trace syncop_setxattr meta unlock failed");
+
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
}
+ }
- /* remove the 'linkto' xattr from the destination */
- ret = syncop_fremovexattr (to, dst_fd, DHT_LINKFILE_KEY);
- if (ret) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to perform removexattr on %s (%s)",
- loc->path, to->name, strerror (errno));
+out:
+ if (clean_src) {
+ /* Revert source mode and xattr changes*/
+ lk_ret = __dht_migration_cleanup_src_file(this, loc, src_fd, from,
+ &src_ia_prot);
+ if (lk_ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: failed to cleanup source file on %s", loc->path,
+ from->name);
}
+ }
+
+ /* reset the destination back to 0 */
+ if (clean_dst) {
+ lk_ret = syncop_ftruncate(to, dst_fd, 0, NULL, NULL, NULL, NULL);
+ if (lk_ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -lk_ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed: "
+ "%s: failed to reset target size back to 0",
+ loc->path);
+ }
+ }
- ret = syncop_lookup (this, loc, NULL, NULL, NULL, NULL);
- if (ret) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s: failed to lookup the file on subvolumes (%s)",
- loc->path, strerror (errno));
+ if (inodelk_locked) {
+ flock.l_type = F_UNLCK;
+
+ lk_ret = syncop_inodelk(from, DHT_FILE_MIGRATE_DOMAIN, &tmp_loc,
+ F_SETLK, &flock, NULL, NULL);
+ if (lk_ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, -lk_ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: failed to unlock file on %s", loc->path, from->name);
+ }
+ }
+
+ if (entrylk_locked) {
+ lk_ret = syncop_entrylk(hashed_subvol, DHT_ENTRY_SYNC_DOMAIN,
+ &parent_loc, loc->name, ENTRYLK_UNLOCK,
+ ENTRYLK_UNLOCK, NULL, NULL);
+ if (lk_ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, -lk_ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: failed to unlock entrylk on %s", loc->path,
+ hashed_subvol->name);
}
+ }
- gf_log (this->name, GF_LOG_INFO,
- "completed migration of %s from subvolume %s to %s",
- loc->path, from->name, to->name);
+ if (p_locked) {
+ plock.l_type = F_UNLCK;
+ lk_ret = syncop_lk(from, src_fd, F_SETLK, &plock, NULL, NULL);
- ret = 0;
-out:
- if (dict)
- dict_unref (dict);
+ if (lk_ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, -lk_ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: failed to unlock file on %s", loc->path, from->name);
+ }
+ }
- if (xattr)
- dict_unref (xattr);
- if (xattr_rsp)
- dict_unref (xattr_rsp);
+ lk_ret = syncop_removexattr(to, loc, GF_PROTECT_FROM_EXTERNAL_WRITES, NULL,
+ NULL);
+ if (lk_ret && (lk_ret != -ENODATA) && (lk_ret != -ENOATTR)) {
+ gf_msg(this->name, GF_LOG_WARNING, -lk_ret, 0,
+ "%s: removexattr failed key %s", loc->path,
+ GF_PROTECT_FROM_EXTERNAL_WRITES);
+ }
- if (dst_fd)
- syncop_close (dst_fd);
- if (src_fd)
- syncop_close (src_fd);
+ if (dict)
+ dict_unref(dict);
- return ret;
-}
+ if (xattr)
+ dict_unref(xattr);
+ if (xattr_rsp)
+ dict_unref(xattr_rsp);
-static int
-rebalance_task (void *data)
-{
- int ret = -1;
- dht_local_t *local = NULL;
- call_frame_t *frame = NULL;
+ if (dst_fd)
+ syncop_close(dst_fd);
- frame = data;
+ if (src_fd)
+ syncop_close(src_fd);
+ if (linkto_fd)
+ syncop_close(linkto_fd);
- local = frame->local;
+ if (xdata)
+ dict_unref(xdata);
- /* This function is 'synchrounous', hence if it returns,
- we are done with the task */
- ret = dht_migrate_file (THIS, &local->loc, local->rebalance.from_subvol,
- local->rebalance.target_node, local->flags);
+ loc_wipe(&tmp_loc);
+ loc_wipe(&parent_loc);
- return ret;
+ return ret;
}
static int
-rebalance_task_completion (int op_ret, call_frame_t *sync_frame, void *data)
+rebalance_task(void *data)
{
- int ret = -1;
- uint64_t layout_int = 0;
- dht_layout_t *layout = 0;
- xlator_t *this = NULL;
- dht_local_t *local = NULL;
- int32_t op_errno = EINVAL;
-
- this = THIS;
- local = sync_frame->local;
-
- if (!op_ret) {
- /* Make sure we have valid 'layout' in inode ctx
- after the operation */
- ret = inode_ctx_del (local->loc.inode, this, &layout_int);
- if (!ret && layout_int) {
- layout = (dht_layout_t *)(long)layout_int;
- dht_layout_unref (this, layout);
- }
+ int ret = -1;
+ dht_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ int fop_errno = 0;
- ret = dht_layout_preset (this, local->rebalance.target_node,
- local->loc.inode);
- if (ret)
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to set inode ctx", local->loc.path);
- }
+ frame = data;
- if (op_ret == -1) {
- /* Failure of migration process, mostly due to write process.
- as we can't preserve the exact errno, lets say there was
- no space to migrate-data
- */
- op_errno = ENOSPC;
- }
+ local = frame->local;
- if (op_ret == 1) {
- /* migration didn't happen, but is not a failure, let the user
- understand that he doesn't have permission to migrate the
- file.
- */
- op_ret = -1;
- op_errno = EPERM;
- }
+ /* This function is 'synchrounous', hence if it returns,
+ we are done with the task */
+ ret = dht_migrate_file(THIS, &local->loc, local->rebalance.from_subvol,
+ local->rebalance.target_node, local->flags,
+ &fop_errno);
- DHT_STACK_UNWIND (setxattr, sync_frame, op_ret, op_errno, NULL);
- return 0;
+ return ret;
+}
+
+static int
+rebalance_task_completion(int op_ret, call_frame_t *sync_frame, void *data)
+{
+ int32_t op_errno = EINVAL;
+
+ if (op_ret == -1) {
+ /* Failure of migration process, mostly due to write process.
+ as we can't preserve the exact errno, lets say there was
+ no space to migrate-data
+ */
+ op_errno = ENOSPC;
+ } else if (op_ret == 1) {
+ /* migration didn't happen, but is not a failure, let the user
+ understand that he doesn't have permission to migrate the
+ file.
+ */
+ op_ret = -1;
+ op_errno = EPERM;
+ } else if (op_ret != 0) {
+ op_errno = -op_ret;
+ op_ret = -1;
+ }
+
+ DHT_STACK_UNWIND(setxattr, sync_frame, op_ret, op_errno, NULL);
+ return 0;
}
int
-dht_start_rebalance_task (xlator_t *this, call_frame_t *frame)
+dht_start_rebalance_task(xlator_t *this, call_frame_t *frame)
{
- int ret = -1;
+ int ret = -1;
- ret = synctask_new (this->ctx->env, rebalance_task,
- rebalance_task_completion,
- frame, frame);
- return ret;
+ ret = synctask_new(this->ctx->env, rebalance_task,
+ rebalance_task_completion, frame, frame);
+ return ret;
}
int
-gf_listener_stop (void)
+gf_listener_stop(xlator_t *this)
{
- glusterfs_ctx_t *ctx = NULL;
- cmd_args_t *cmd_args = NULL;
- int ret = 0;
- xlator_t *this = NULL;
-
- ctx = glusterfs_ctx_get ();
- GF_ASSERT (ctx);
- cmd_args = &ctx->cmd_args;
- if (cmd_args->sock_file) {
- ret = unlink (cmd_args->sock_file);
- if (ret && (ENOENT == errno)) {
- ret = 0;
- }
- }
-
- if (ret) {
- this = THIS;
- gf_log (this->name, GF_LOG_ERROR, "Failed to unlink listener "
- "socket %s, error: %s", cmd_args->sock_file,
- strerror (errno));
+ glusterfs_ctx_t *ctx = NULL;
+ cmd_args_t *cmd_args = NULL;
+ int ret = 0;
+
+ ctx = this->ctx;
+ GF_ASSERT(ctx);
+ cmd_args = &ctx->cmd_args;
+ if (cmd_args->sock_file) {
+ ret = sys_unlink(cmd_args->sock_file);
+ if (ret && (ENOENT == errno)) {
+ ret = 0;
}
- return ret;
+ }
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, DHT_MSG_SOCKET_ERROR,
+ "Failed to unlink listener "
+ "socket %s",
+ cmd_args->sock_file);
+ }
+ return ret;
}
void
-dht_build_root_inode (xlator_t *this, inode_t **inode)
+dht_build_root_inode(xlator_t *this, inode_t **inode)
{
- inode_table_t *itable = NULL;
- uuid_t root_gfid = {0, };
+ inode_table_t *itable = NULL;
+ static uuid_t root_gfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
- itable = inode_table_new (0, this);
- if (!itable)
- return;
+ itable = inode_table_new(0, this);
+ if (!itable)
+ return;
- root_gfid[15] = 1;
- *inode = inode_find (itable, root_gfid);
+ *inode = inode_find(itable, root_gfid);
}
void
-dht_build_root_loc (inode_t *inode, loc_t *loc)
+dht_build_root_loc(inode_t *inode, loc_t *loc)
{
- loc->path = "/";
- loc->inode = inode;
- loc->inode->ia_type = IA_IFDIR;
- memset (loc->gfid, 0, 16);
- loc->gfid[15] = 1;
+ loc->path = "/";
+ loc->inode = inode;
+ loc->inode->ia_type = IA_IFDIR;
+ memset(loc->gfid, 0, 16);
+ loc->gfid[15] = 1;
}
-
/* return values: 1 -> error, bug ignore and continue
0 -> proceed
-1 -> error, handle it */
int32_t
-gf_defrag_handle_migrate_error (int32_t op_errno, gf_defrag_info_t *defrag)
+gf_defrag_handle_migrate_error(int32_t op_errno, gf_defrag_info_t *defrag)
{
- /* if errno is not ENOSPC or ENOTCONN, we can still continue
- with rebalance process */
- if ((errno != ENOSPC) || (errno != ENOTCONN))
- return 1;
-
- if (errno == ENOTCONN) {
- /* Most probably mount point went missing (mostly due
- to a brick down), say rebalance failure to user,
- let him restart it if everything is fine */
- defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
- return -1;
+ int ret = 0;
+ /* if errno is not ENOTCONN, we can still continue
+ with rebalance process */
+ if (op_errno != ENOTCONN) {
+ ret = 1;
+ goto out;
+ }
+
+ if (op_errno == ENOTCONN) {
+ /* Most probably mount point went missing (mostly due
+ to a brick down), say rebalance failure to user,
+ let him restart it if everything is fine */
+ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
+ ret = -1;
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+static gf_boolean_t
+gf_defrag_pattern_match(gf_defrag_info_t *defrag, char *name, uint64_t size)
+{
+ gf_defrag_pattern_list_t *trav = NULL;
+ gf_boolean_t match = _gf_false;
+ gf_boolean_t ret = _gf_false;
+
+ GF_VALIDATE_OR_GOTO("dht", defrag, out);
+
+ trav = defrag->defrag_pattern;
+ while (trav) {
+ if (!fnmatch(trav->path_pattern, name, FNM_NOESCAPE)) {
+ match = _gf_true;
+ break;
}
+ trav = trav->next;
+ }
- if (errno == ENOSPC) {
- /* rebalance process itself failed, may be
- remote brick went down, or write failed due to
- disk full etc etc.. */
- defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
- return -1;
+ if ((match == _gf_true) && (size >= trav->size))
+ ret = _gf_true;
+
+out:
+ return ret;
+}
+
+int
+dht_dfreaddirp_done(dht_dfoffset_ctx_t *offset_var, int cnt)
+{
+ int i;
+ int result = 1;
+
+ for (i = 0; i < cnt; i++) {
+ if (offset_var[i].readdir_done == 0) {
+ result = 0;
+ break;
}
+ }
+ return result;
+}
- return 0;
+int static gf_defrag_ctx_subvols_init(dht_dfoffset_ctx_t *offset_var,
+ xlator_t *this)
+{
+ int i;
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf)
+ return -1;
+
+ for (i = 0; i < conf->local_subvols_cnt; i++) {
+ offset_var[i].this = conf->local_subvols[i];
+ offset_var[i].offset = (off_t)0;
+ offset_var[i].readdir_done = 0;
+ }
+
+ return 0;
}
-/* We do a depth first traversal of directories. But before we move into
- * subdirs, we complete the data migration of those directories whose layouts
- * have been fixed
+static int
+dht_get_first_non_null_index(subvol_nodeuuids_info_t *entry)
+{
+ int i = 0;
+ int index = 0;
+
+ for (i = 0; i < entry->count; i++) {
+ if (!gf_uuid_is_null(entry->elements[i].uuid)) {
+ index = i;
+ goto out;
+ }
+ }
+
+ if (i == entry->count) {
+ index = -1;
+ }
+out:
+ return index;
+}
+
+/* Return value
+ * 0 : this node does not migrate the file
+ * 1 : this node migrates the file
+ *
+ * Use the hash value of the gfid to determine which node will migrate files.
+ * Using the gfid instead of the name also ensures that the same node handles
+ * all hardlinks.
*/
-int
-gf_defrag_migrate_data (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
- dict_t *migrate_data)
+gf_boolean_t
+gf_defrag_should_i_migrate(xlator_t *this, int local_subvol_index, uuid_t gfid)
{
- int ret = -1;
- loc_t entry_loc = {0,};
- fd_t *fd = NULL;
- gf_dirent_t entries;
- gf_dirent_t *tmp = NULL;
- gf_dirent_t *entry = NULL;
- gf_boolean_t free_entries = _gf_false;
- off_t offset = 0;
- dict_t *dict = NULL;
- struct iatt iatt = {0,};
- int32_t op_errno = 0;
- char *uuid_str = NULL;
- uuid_t node_uuid = {0,};
- int readdir_operrno = 0;
-
- gf_log (this->name, GF_LOG_INFO, "migrate data called on %s",
- loc->path);
- fd = fd_create (loc->inode, defrag->pid);
- if (!fd) {
- gf_log (this->name, GF_LOG_ERROR, "Failed to create fd");
- goto out;
+ gf_boolean_t ret = _gf_false;
+ int i = local_subvol_index;
+ char *str = NULL;
+ uint32_t hashval = 0;
+ int32_t index = 0;
+ dht_conf_t *conf = NULL;
+ char buf[UUID_CANONICAL_FORM_LEN + 1] = {
+ 0,
+ };
+ subvol_nodeuuids_info_t *entry = NULL;
+
+ conf = this->private;
+
+ /* Pure distribute. A subvol in this case
+ will be handled by only one node */
+
+ entry = &(conf->local_nodeuuids[i]);
+ if (entry->count == 1) {
+ return 1;
+ }
+
+ str = uuid_utoa_r(gfid, buf);
+ if (dht_hash_compute(this, 0, str, &hashval) == 0) {
+ index = (hashval % entry->count);
+ if (entry->elements[index].info == REBAL_NODEUUID_MINE) {
+ /* Index matches this node's nodeuuid.*/
+ ret = _gf_true;
+ goto out;
}
- ret = syncop_opendir (this, loc, fd);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Failed to open dir %s",
- loc->path);
+ /* Brick down - some other node has to migrate these files*/
+ if (gf_uuid_is_null(entry->elements[index].uuid)) {
+ /* Fall back to the first non-null index */
+ index = dht_get_first_non_null_index(entry);
+
+ if (index == -1) {
+ /* None of the bricks in the subvol are up.
+ * CHILD_DOWN will kill the process soon */
+
+ return _gf_false;
+ }
+
+ if (entry->elements[index].info == REBAL_NODEUUID_MINE) {
+ /* Index matches this node's nodeuuid.*/
+ ret = _gf_true;
goto out;
+ }
}
+ }
+out:
+ return ret;
+}
- INIT_LIST_HEAD (&entries.list);
+int
+gf_defrag_migrate_single_file(void *opaque)
+{
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ gf_defrag_info_t *defrag = NULL;
+ int ret = 0;
+ gf_dirent_t *entry = NULL;
+ struct timeval start = {
+ 0,
+ };
+ loc_t entry_loc = {
+ 0,
+ };
+ loc_t *loc = NULL;
+ struct iatt iatt = {
+ 0,
+ };
+ dict_t *migrate_data = NULL;
+ struct timeval end = {
+ 0,
+ };
+ double elapsed = {
+ 0,
+ };
+ struct dht_container *rebal_entry = NULL;
+ inode_t *inode = NULL;
+ xlator_t *hashed_subvol = NULL;
+ xlator_t *cached_subvol = NULL;
+ call_frame_t *statfs_frame = NULL;
+ xlator_t *old_THIS = NULL;
+ data_t *tmp = NULL;
+ int fop_errno = 0;
+ gf_dht_migrate_data_type_t rebal_type = GF_DHT_MIGRATE_DATA;
+ char value[MAX_REBAL_TYPE_SIZE] = {
+ 0,
+ };
+ struct iatt *iatt_ptr = NULL;
+ gf_boolean_t update_skippedcount = _gf_true;
+ int i = 0;
+ gf_boolean_t should_i_migrate = 0;
+
+ rebal_entry = (struct dht_container *)opaque;
+ if (!rebal_entry) {
+ gf_log("DHT", GF_LOG_ERROR, "rebal_entry is NULL");
+ ret = -1;
+ goto out;
+ }
- while ((ret = syncop_readdirp (this, fd, 131072, offset, NULL,
- &entries)) != 0) {
+ this = rebal_entry->this;
- if (ret < 0) {
+ conf = this->private;
- gf_log (this->name, GF_LOG_ERROR, "Readdir returned %s."
- " Aborting migrate-data",
- strerror(readdir_operrno));
- goto out;
- }
+ defrag = conf->defrag;
- /* Need to keep track of ENOENT errno, that means, there is no
- need to send more readdirp() */
- readdir_operrno = errno;
+ loc = rebal_entry->parent_loc;
- if (list_empty (&entries.list))
- break;
+ migrate_data = rebal_entry->migrate_data;
- free_entries = _gf_true;
+ entry = rebal_entry->df_entry;
+ iatt_ptr = &entry->d_stat;
- list_for_each_entry_safe (entry, tmp, &entries.list, list) {
- if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
- ret = 1;
- goto out;
- }
+ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
+ ret = -1;
+ goto out;
+ }
- offset = entry->d_off;
+ if (defrag->stats == _gf_true) {
+ gettimeofday(&start, NULL);
+ }
- if (!strcmp (entry->d_name, ".") ||
- !strcmp (entry->d_name, ".."))
- continue;
+ if (defrag->defrag_pattern &&
+ (gf_defrag_pattern_match(defrag, entry->d_name,
+ entry->d_stat.ia_size) == _gf_false)) {
+ gf_log(this->name, GF_LOG_ERROR, "pattern_match failed");
+ goto out;
+ }
- if (IA_ISDIR (entry->d_stat.ia_type))
- continue;
+ memset(&entry_loc, 0, sizeof(entry_loc));
- defrag->num_files_lookedup++;
+ ret = dht_build_child_loc(this, &entry_loc, loc, entry->d_name);
+ if (ret) {
+ LOCK(&defrag->lock);
+ {
+ defrag->total_failures += 1;
+ }
+ UNLOCK(&defrag->lock);
- loc_wipe (&entry_loc);
- ret =dht_build_child_loc (this, &entry_loc, loc,
- entry->d_name);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Child loc"
- " build failed");
- goto out;
- }
+ ret = 0;
- if (uuid_is_null (entry->d_stat.ia_gfid)) {
- gf_log (this->name, GF_LOG_ERROR, "%s/%s"
- " gfid not present", loc->path,
- entry->d_name);
- continue;
- }
+ gf_log(this->name, GF_LOG_ERROR, "Child loc build failed");
- uuid_copy (entry_loc.gfid, entry->d_stat.ia_gfid);
+ goto out;
+ }
- if (uuid_is_null (loc->gfid)) {
- gf_log (this->name, GF_LOG_ERROR, "%s/%s"
- " gfid not present", loc->path,
- entry->d_name);
- continue;
- }
+ should_i_migrate = gf_defrag_should_i_migrate(
+ this, rebal_entry->local_subvol_index, entry->d_stat.ia_gfid);
- uuid_copy (entry_loc.pargfid, loc->gfid);
+ gf_uuid_copy(entry_loc.gfid, entry->d_stat.ia_gfid);
- entry_loc.inode->ia_type = entry->d_stat.ia_type;
+ gf_uuid_copy(entry_loc.pargfid, loc->gfid);
- ret = syncop_lookup (this, &entry_loc, NULL, &iatt,
- NULL, NULL);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "%s"
- " lookup failed", entry_loc.path);
- continue;
- }
+ ret = syncop_lookup(this, &entry_loc, &iatt, NULL, NULL, NULL);
- ret = syncop_getxattr (this, &entry_loc, &dict,
- GF_XATTR_NODE_UUID_KEY);
- if(ret < 0) {
- gf_log (this->name, GF_LOG_ERROR, "Failed to "
- "get node-uuid for %s", entry_loc.path);
- continue;
- }
+ if (!should_i_migrate) {
+ /* this node isn't supposed to migrate the file. suppressing any
+ * potential error from lookup as this file is under migration by
+ * another node */
+ if (ret) {
+ gf_msg_debug(this->name, -ret,
+ "Ignoring lookup failure: node isn't migrating %s",
+ entry_loc.path);
+ ret = 0;
+ }
+ gf_msg_debug(this->name, 0, "Don't migrate %s ", entry_loc.path);
+ goto out;
+ }
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed: %s lookup failed", entry_loc.path);
+
+ /* Increase failure count only for remove-brick op, so that
+ * user is warned to check the removed-brick for any files left
+ * unmigrated
+ */
+ if (conf->decommission_subvols_cnt) {
+ LOCK(&defrag->lock);
+ {
+ defrag->total_failures += 1;
+ }
+ UNLOCK(&defrag->lock);
+ }
- ret = dict_get_str (dict, GF_XATTR_NODE_UUID_KEY,
- &uuid_str);
- if(ret < 0) {
- gf_log (this->name, GF_LOG_ERROR, "Failed to "
- "get node-uuid from dict for %s",
- entry_loc.path);
- continue;
- }
+ ret = 0;
+ goto out;
+ }
- if (uuid_parse (uuid_str, node_uuid)) {
- gf_log (this->name, GF_LOG_ERROR, "uuid_parse "
- "failed for %s", entry_loc.path);
- continue;
- }
+ iatt_ptr = &iatt;
- /* if file belongs to different node, skip migration
- * the other node will take responsibility of migration
- */
- if (uuid_compare (node_uuid, defrag->node_uuid)) {
- gf_log (this->name, GF_LOG_TRACE, "%s does not"
- "belong to this node", entry_loc.path);
- continue;
+ hashed_subvol = dht_subvol_get_hashed(this, &entry_loc);
+ if (!hashed_subvol) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "Failed to get hashed subvol for %s", entry_loc.path);
+ ret = 0;
+ goto out;
+ }
+
+ cached_subvol = dht_subvol_get_cached(this, entry_loc.inode);
+ if (!cached_subvol) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_CACHED_SUBVOL_GET_FAILED,
+ "Failed to get cached subvol for %s", entry_loc.path);
+
+ ret = 0;
+ goto out;
+ }
+
+ if (hashed_subvol == cached_subvol) {
+ ret = 0;
+ goto out;
+ }
+
+ inode = inode_link(entry_loc.inode, entry_loc.parent, entry->d_name, &iatt);
+ inode_unref(entry_loc.inode);
+ /* use the inode returned by inode_link */
+ entry_loc.inode = inode;
+
+ old_THIS = THIS;
+ THIS = this;
+ statfs_frame = create_frame(this, this->ctx->pool);
+ if (!statfs_frame) {
+ gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, ENOMEM,
+ "Insufficient memory. Frame creation failed");
+ ret = -1;
+ goto out;
+ }
+
+ /* async statfs information for honoring min-free-disk */
+ dht_get_du_info(statfs_frame, this, loc);
+ THIS = old_THIS;
+
+ tmp = dict_get(migrate_data, GF_XATTR_FILE_MIGRATE_KEY);
+ if (tmp) {
+ memcpy(value, tmp->data, tmp->len);
+ if (strcmp(value, "force") == 0)
+ rebal_type = GF_DHT_MIGRATE_DATA_EVEN_IF_LINK_EXISTS;
+
+ if (conf->decommission_in_progress)
+ rebal_type = GF_DHT_MIGRATE_HARDLINK;
+ }
+
+ ret = dht_migrate_file(this, &entry_loc, cached_subvol, hashed_subvol,
+ rebal_type, &fop_errno);
+ if (ret == 1) {
+ if (fop_errno == ENOSPC) {
+ gf_msg_debug(this->name, 0,
+ "migrate-data skipped for"
+ " %s due to space constraints",
+ entry_loc.path);
+
+ /* For remove-brick case if the source is not one of the
+ * removed-brick, do not mark the error as failure */
+ if (conf->decommission_subvols_cnt) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->decommissioned_bricks[i] == cached_subvol) {
+ LOCK(&defrag->lock);
+ {
+ defrag->total_failures += 1;
+ update_skippedcount = _gf_false;
}
+ UNLOCK(&defrag->lock);
- uuid_str = NULL;
+ break;
+ }
+ }
+ }
- dict_del (dict, GF_XATTR_NODE_UUID_KEY);
+ if (update_skippedcount) {
+ LOCK(&defrag->lock);
+ {
+ defrag->skipped += 1;
+ }
+ UNLOCK(&defrag->lock);
+
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_MIGRATE_FILE_SKIPPED,
+ "File migration skipped for %s.", entry_loc.path);
+ }
+
+ } else if (fop_errno == ENOTSUP) {
+ gf_msg_debug(this->name, 0,
+ "migrate-data skipped for"
+ " hardlink %s ",
+ entry_loc.path);
+ LOCK(&defrag->lock);
+ {
+ defrag->skipped += 1;
+ }
+ UNLOCK(&defrag->lock);
+
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_MIGRATE_FILE_SKIPPED,
+ "File migration skipped for %s.", entry_loc.path);
+ }
+ ret = 0;
+ goto out;
+ } else if (ret < 0) {
+ if (fop_errno != EEXIST) {
+ gf_msg(this->name, GF_LOG_ERROR, fop_errno,
+ DHT_MSG_MIGRATE_FILE_FAILED, "migrate-data failed for %s",
+ entry_loc.path);
+
+ LOCK(&defrag->lock);
+ {
+ defrag->total_failures += 1;
+ }
+ UNLOCK(&defrag->lock);
+ }
- /* if distribute is present, it will honor this key.
- * -1 is returned if distribute is not present or file
- * doesn't have a link-file. If file has link-file, the
- * path of link-file will be the value, and also that
- * guarantees that file has to be mostly migrated */
+ ret = gf_defrag_handle_migrate_error(fop_errno, defrag);
- ret = syncop_getxattr (this, &entry_loc, &dict,
- GF_XATTR_LINKINFO_KEY);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_TRACE, "failed to "
- "get link-to key for %s",
- entry_loc.path);
- continue;
- }
+ if (!ret) {
+ gf_msg(this->name, GF_LOG_ERROR, fop_errno,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "migrate-data on %s failed:", entry_loc.path);
+ } else if (ret == 1) {
+ ret = 0;
+ }
- ret = syncop_setxattr (this, &entry_loc, migrate_data,
- 0);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "migrate-data"
- " failed for %s", entry_loc.path);
- defrag->total_failures +=1;
- }
+ goto out;
+ }
+
+ LOCK(&defrag->lock);
+ {
+ defrag->total_files += 1;
+ defrag->total_data += iatt.ia_size;
+ }
+ UNLOCK(&defrag->lock);
+
+ if (defrag->stats == _gf_true) {
+ gettimeofday(&end, NULL);
+ elapsed = gf_tvdiff(&start, &end);
+ gf_log(this->name, GF_LOG_INFO,
+ "Migration of "
+ "file:%s size:%" PRIu64
+ " bytes took %.2f"
+ "secs and ret: %d",
+ entry_loc.name, iatt.ia_size, elapsed / 1e6, ret);
+ }
- if (ret == -1) {
- op_errno = errno;
- ret = gf_defrag_handle_migrate_error (op_errno,
- defrag);
-
- if (!ret)
- gf_log (this->name, GF_LOG_DEBUG,
- "migrate-data on %s failed: %s",
- entry_loc.path,
- strerror (op_errno));
- else if (ret == 1)
- continue;
- else if (ret == -1)
- goto out;
- }
+out:
+ if (statfs_frame) {
+ STACK_DESTROY(statfs_frame->root);
+ }
- LOCK (&defrag->lock);
- {
- defrag->total_files += 1;
- defrag->total_data += iatt.ia_size;
- }
- UNLOCK (&defrag->lock);
+ if (iatt_ptr) {
+ LOCK(&defrag->lock);
+ {
+ defrag->size_processed += iatt_ptr->ia_size;
+ }
+ UNLOCK(&defrag->lock);
+ }
+ loc_wipe(&entry_loc);
+
+ return ret;
+}
+
+void *
+gf_defrag_task(void *opaque)
+{
+ struct list_head *q_head = NULL;
+ struct dht_container *iterator = NULL;
+ gf_defrag_info_t *defrag = NULL;
+ int ret = 0;
+ pid_t pid = GF_CLIENT_PID_DEFRAG;
+
+ defrag = (gf_defrag_info_t *)opaque;
+ if (!defrag) {
+ gf_msg("dht", GF_LOG_ERROR, 0, 0, "defrag is NULL");
+ goto out;
+ }
+
+ syncopctx_setfspid(&pid);
+
+ q_head = &(defrag->queue[0].list);
+
+ /* The following while loop will dequeue one entry from the defrag->queue
+ under lock. We will update the defrag->global_error only when there
+ is an error which is critical to stop the rebalance process. The stop
+ message will be intimated to other migrator threads by setting the
+ defrag->defrag_status to GF_DEFRAG_STATUS_FAILED.
+
+ In defrag->queue, a low watermark (MIN_MIGRATE_QUEUE_COUNT) is
+ maintained so that crawler does not starve the file migration
+ workers and a high watermark (MAX_MIGRATE_QUEUE_COUNT) so that
+ crawler does not go far ahead in filling up the queue.
+ */
+
+ while (_gf_true) {
+ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
+ pthread_cond_broadcast(&defrag->rebalance_crawler_alarm);
+ pthread_cond_broadcast(&defrag->parallel_migration_cond);
+ goto out;
+ }
+
+ pthread_mutex_lock(&defrag->dfq_mutex);
+ {
+ /*Throttle down:
+ If the reconfigured count is less than current thread
+ count, then the current thread will sleep */
+
+ /*TODO: Need to refactor the following block to work
+ *under defrag->lock. For now access
+ * defrag->current_thread_count and rthcount under
+ * dfq_mutex lock */
+ while (!defrag->crawl_done && (defrag->recon_thread_count <
+ defrag->current_thread_count)) {
+ defrag->current_thread_count--;
+ gf_msg_debug("DHT", 0,
+ "Thread sleeping. "
+ "current thread count: %d",
+ defrag->current_thread_count);
+
+ pthread_cond_wait(&defrag->df_wakeup_thread,
+ &defrag->dfq_mutex);
+
+ defrag->current_thread_count++;
+ gf_msg_debug("DHT", 0,
+ "Thread wokeup. "
+ "current thread count: %d",
+ defrag->current_thread_count);
+ }
+
+ if (defrag->q_entry_count) {
+ iterator = list_entry(q_head->next, typeof(*iterator), list);
+
+ gf_msg_debug("DHT", 0,
+ "picking entry "
+ "%s",
+ iterator->df_entry->d_name);
+
+ list_del_init(&(iterator->list));
+
+ defrag->q_entry_count--;
+
+ if ((defrag->q_entry_count < MIN_MIGRATE_QUEUE_COUNT) &&
+ defrag->wakeup_crawler) {
+ pthread_cond_broadcast(&defrag->rebalance_crawler_alarm);
+ }
+ pthread_mutex_unlock(&defrag->dfq_mutex);
+ ret = gf_defrag_migrate_single_file((void *)iterator);
+
+ /*Critical errors: ENOTCONN and ENOSPACE*/
+ if (ret) {
+ dht_set_global_defrag_error(defrag, ret);
+
+ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
+
+ pthread_cond_broadcast(&defrag->rebalance_crawler_alarm);
+
+ pthread_cond_broadcast(&defrag->parallel_migration_cond);
+
+ goto out;
}
- gf_dirent_free (&entries);
- free_entries = _gf_false;
- INIT_LIST_HEAD (&entries.list);
+ gf_defrag_free_container(iterator);
+
+ continue;
+ } else {
+ /* defrag->crawl_done flag is set means crawling
+ file system is done and hence a list_empty when
+ the above flag is set indicates there are no more
+ entries to be added to the queue and rebalance is
+ finished */
+
+ if (!defrag->crawl_done) {
+ defrag->current_thread_count--;
+ gf_msg_debug("DHT", 0,
+ "Thread "
+ "sleeping while waiting "
+ "for migration entries. "
+ "current thread count:%d",
+ defrag->current_thread_count);
+
+ pthread_cond_wait(&defrag->parallel_migration_cond,
+ &defrag->dfq_mutex);
+ }
- if (readdir_operrno == ENOENT)
- break;
+ if (defrag->crawl_done && !defrag->q_entry_count) {
+ defrag->current_thread_count++;
+ gf_msg_debug("DHT", 0, "Exiting thread");
+
+ pthread_cond_broadcast(&defrag->parallel_migration_cond);
+ goto unlock;
+ } else {
+ defrag->current_thread_count++;
+ gf_msg_debug("DHT", 0,
+ "Thread woke up"
+ " as found migrating entries. "
+ "current thread count:%d",
+ defrag->current_thread_count);
+
+ pthread_mutex_unlock(&defrag->dfq_mutex);
+ continue;
+ }
+ }
}
- ret = 0;
+ unlock:
+ pthread_mutex_unlock(&defrag->dfq_mutex);
+ break;
+ }
out:
- if (free_entries)
- gf_dirent_free (&entries);
+ return NULL;
+}
- loc_wipe (&entry_loc);
+int static gf_defrag_get_entry(xlator_t *this, int i,
+ struct dht_container **container, loc_t *loc,
+ dht_conf_t *conf, gf_defrag_info_t *defrag,
+ fd_t *fd, dict_t *migrate_data,
+ struct dir_dfmeta *dir_dfmeta, dict_t *xattr_req,
+ int *perrno)
+{
+ int ret = 0;
+ char is_linkfile = 0;
+ gf_dirent_t *df_entry = NULL;
+ struct dht_container *tmp_container = NULL;
- if (dict)
- dict_unref(dict);
+ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
+ ret = -1;
+ goto out;
+ }
- if (fd)
- fd_unref (fd);
- return ret;
+ if (dir_dfmeta->offset_var[i].readdir_done == 1) {
+ ret = 0;
+ goto out;
+ }
-}
+ if (dir_dfmeta->fetch_entries[i] == 1) {
+ if (!fd) {
+ dir_dfmeta->fetch_entries[i] = 0;
+ dir_dfmeta->offset_var[i].readdir_done = 1;
+ ret = 0;
+ goto out;
+ }
+
+ ret = syncop_readdirp(conf->local_subvols[i], fd, 131072,
+ dir_dfmeta->offset_var[i].offset,
+ &(dir_dfmeta->equeue[i]), xattr_req, NULL);
+ if (ret == 0) {
+ dir_dfmeta->offset_var[i].readdir_done = 1;
+ ret = 0;
+ goto out;
+ }
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret,
+ DHT_MSG_MIGRATE_DATA_FAILED,
+ "Readdirp failed. Aborting data migration for "
+ "directory: %s",
+ loc->path);
+ *perrno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ if (list_empty(&(dir_dfmeta->equeue[i].list))) {
+ dir_dfmeta->offset_var[i].readdir_done = 1;
+ ret = 0;
+ goto out;
+ }
+
+ dir_dfmeta->fetch_entries[i] = 0;
+ }
+
+ while (1) {
+ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
+ ret = -1;
+ goto out;
+ }
+
+ df_entry = list_entry(dir_dfmeta->iterator[i]->next, typeof(*df_entry),
+ list);
+
+ if (&df_entry->list == dir_dfmeta->head[i]) {
+ gf_dirent_free(&(dir_dfmeta->equeue[i]));
+ INIT_LIST_HEAD(&(dir_dfmeta->equeue[i].list));
+ dir_dfmeta->fetch_entries[i] = 1;
+ dir_dfmeta->iterator[i] = dir_dfmeta->head[i];
+ ret = 0;
+ goto out;
+ }
+
+ dir_dfmeta->iterator[i] = dir_dfmeta->iterator[i]->next;
+
+ dir_dfmeta->offset_var[i].offset = df_entry->d_off;
+ if (!strcmp(df_entry->d_name, ".") || !strcmp(df_entry->d_name, ".."))
+ continue;
+
+ if (IA_ISDIR(df_entry->d_stat.ia_type)) {
+ defrag->size_processed += df_entry->d_stat.ia_size;
+ continue;
+ }
+
+ defrag->num_files_lookedup++;
+
+ if (defrag->defrag_pattern &&
+ (gf_defrag_pattern_match(defrag, df_entry->d_name,
+ df_entry->d_stat.ia_size) == _gf_false)) {
+ defrag->size_processed += df_entry->d_stat.ia_size;
+ continue;
+ }
+
+ is_linkfile = check_is_linkfile(NULL, &df_entry->d_stat, df_entry->dict,
+ conf->link_xattr_name);
+
+ if (is_linkfile) {
+ /* No need to add linkto file to the queue for
+ migration. Only the actual data file need to
+ be checked for migration criteria.
+ */
+
+ gf_msg_debug(this->name, 0,
+ "Skipping linkfile"
+ " %s on subvol: %s",
+ df_entry->d_name, conf->local_subvols[i]->name);
+ continue;
+ }
+
+ /*Build Container Structure */
+
+ tmp_container = GF_CALLOC(1, sizeof(struct dht_container),
+ gf_dht_mt_container_t);
+ if (!tmp_container) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Failed to allocate "
+ "memory for container");
+ ret = -1;
+ goto out;
+ }
+ tmp_container->df_entry = gf_dirent_for_name(df_entry->d_name);
+ if (!tmp_container->df_entry) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Failed to allocate "
+ "memory for df_entry");
+ ret = -1;
+ goto out;
+ }
+
+ tmp_container->local_subvol_index = i;
+
+ tmp_container->df_entry->d_stat = df_entry->d_stat;
+
+ tmp_container->df_entry->d_ino = df_entry->d_ino;
+
+ tmp_container->df_entry->d_type = df_entry->d_type;
+
+ tmp_container->df_entry->d_len = df_entry->d_len;
+
+ tmp_container->parent_loc = GF_CALLOC(1, sizeof(*loc), gf_dht_mt_loc_t);
+ if (!tmp_container->parent_loc) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Failed to allocate "
+ "memory for loc");
+ ret = -1;
+ goto out;
+ }
+
+ ret = loc_copy(tmp_container->parent_loc, loc);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR, "loc_copy failed");
+ ret = -1;
+ goto out;
+ }
+
+ tmp_container->migrate_data = migrate_data;
+
+ tmp_container->this = this;
+
+ if (df_entry->dict)
+ tmp_container->df_entry->dict = dict_ref(df_entry->dict);
+
+ /*Build Container Structure >> END*/
+
+ ret = 0;
+ goto out;
+ }
+
+out:
+ if (ret == 0) {
+ *container = tmp_container;
+ } else {
+ if (tmp_container) {
+ gf_defrag_free_container(tmp_container);
+ }
+ }
+
+ return ret;
+}
int
-gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
- dict_t *fix_layout, dict_t *migrate_data)
+gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
+ dict_t *migrate_data, int *perrno)
{
- int ret = -1;
- loc_t entry_loc = {0,};
- fd_t *fd = NULL;
- gf_dirent_t entries;
- gf_dirent_t *tmp = NULL;
- gf_dirent_t *entry = NULL;
- gf_boolean_t free_entries = _gf_false;
- dict_t *dict = NULL;
- off_t offset = 0;
- struct iatt iatt = {0,};
- int readdirp_errno = 0;
-
- ret = syncop_lookup (this, loc, NULL, &iatt, NULL, NULL);
+ int ret = -1;
+ dht_conf_t *conf = NULL;
+ gf_dirent_t entries;
+ dict_t *xattr_req = NULL;
+ struct timeval dir_start = {
+ 0,
+ };
+ struct timeval end = {
+ 0,
+ };
+ double elapsed = {
+ 0,
+ };
+ int local_subvols_cnt = 0;
+ int i = 0;
+ int j = 0;
+ struct dht_container *container = NULL;
+ int ldfq_count = 0;
+ int dfc_index = 0;
+ int throttle_up = 0;
+ struct dir_dfmeta *dir_dfmeta = NULL;
+ xlator_t *old_THIS = NULL;
+
+ gf_log(this->name, GF_LOG_INFO, "migrate data called on %s", loc->path);
+ gettimeofday(&dir_start, NULL);
+
+ conf = this->private;
+ local_subvols_cnt = conf->local_subvols_cnt;
+
+ if (!local_subvols_cnt) {
+ ret = 0;
+ goto out;
+ }
+
+ old_THIS = THIS;
+ THIS = this;
+
+ dir_dfmeta = GF_CALLOC(1, sizeof(*dir_dfmeta), gf_common_mt_pointer);
+ if (!dir_dfmeta) {
+ gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta is NULL");
+ ret = -1;
+ goto out;
+ }
+
+ dir_dfmeta->lfd = GF_CALLOC(local_subvols_cnt, sizeof(fd_t *),
+ gf_common_mt_pointer);
+ if (!dir_dfmeta->lfd) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_INSUFF_MEMORY,
+ "for dir_dfmeta", NULL);
+ ret = -1;
+ *perrno = ENOMEM;
+ goto out;
+ }
+
+ for (i = 0; i < local_subvols_cnt; i++) {
+ dir_dfmeta->lfd[i] = fd_create(loc->inode, defrag->pid);
+ if (!dir_dfmeta->lfd[i]) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_FD_CREATE_FAILED,
+ NULL);
+ *perrno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_opendir(conf->local_subvols[i], loc, dir_dfmeta->lfd[i],
+ NULL, NULL);
if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Lookup failed on %s",
- loc->path);
+ fd_unref(dir_dfmeta->lfd[i]);
+ dir_dfmeta->lfd[i] = NULL;
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_FAILED_TO_OPEN,
+ "dir: %s", loc->path, "subvol: %s",
+ conf->local_subvols[i]->name, NULL);
+
+ if (conf->decommission_in_progress) {
+ *perrno = -ret;
+ ret = -1;
goto out;
+ }
+ } else {
+ fd_bind(dir_dfmeta->lfd[i]);
}
+ }
- if (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX) {
- ret = gf_defrag_migrate_data (this, defrag, loc, migrate_data);
- if (ret)
- goto out;
+ dir_dfmeta->head = GF_CALLOC(local_subvols_cnt, sizeof(*(dir_dfmeta->head)),
+ gf_common_mt_pointer);
+ if (!dir_dfmeta->head) {
+ gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta->head is NULL");
+ ret = -1;
+ goto out;
+ }
+
+ dir_dfmeta->iterator = GF_CALLOC(local_subvols_cnt,
+ sizeof(*(dir_dfmeta->iterator)),
+ gf_common_mt_pointer);
+ if (!dir_dfmeta->iterator) {
+ gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta->iterator is NULL");
+ ret = -1;
+ goto out;
+ }
+
+ dir_dfmeta->equeue = GF_CALLOC(local_subvols_cnt, sizeof(entries),
+ gf_dht_mt_dirent_t);
+ if (!dir_dfmeta->equeue) {
+ gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta->equeue is NULL");
+ ret = -1;
+ goto out;
+ }
+
+ dir_dfmeta->offset_var = GF_CALLOC(
+ local_subvols_cnt, sizeof(dht_dfoffset_ctx_t), gf_dht_mt_octx_t);
+ if (!dir_dfmeta->offset_var) {
+ gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta->offset_var is NULL");
+ ret = -1;
+ goto out;
+ }
+
+ ret = gf_defrag_ctx_subvols_init(dir_dfmeta->offset_var, this);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "dht_dfoffset_ctx_t"
+ "initialization failed");
+ ret = -1;
+ goto out;
+ }
+
+ dir_dfmeta->fetch_entries = GF_CALLOC(local_subvols_cnt, sizeof(int),
+ gf_common_mt_int);
+ if (!dir_dfmeta->fetch_entries) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_INSUFF_MEMORY,
+ "for dir_dfmeta->fetch_entries", NULL);
+ ret = -1;
+ goto out;
+ }
+
+ for (i = 0; i < local_subvols_cnt; i++) {
+ INIT_LIST_HEAD(&(dir_dfmeta->equeue[i].list));
+ dir_dfmeta->head[i] = &(dir_dfmeta->equeue[i].list);
+ dir_dfmeta->iterator[i] = dir_dfmeta->head[i];
+ dir_dfmeta->fetch_entries[i] = 1;
+ }
+
+ xattr_req = dict_new();
+ if (!xattr_req) {
+ gf_log(this->name, GF_LOG_ERROR, "dict_new failed");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_uint32(xattr_req, conf->link_xattr_name, 256);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to set dict for "
+ "key: %s",
+ conf->link_xattr_name);
+ ret = -1;
+ goto out;
+ }
+
+ /*
+ Job: Read entries from each local subvol and store the entries
+ in equeue array of linked list. Now pick one entry from the
+ equeue array in a round robin basis and add them to defrag Queue.
+ */
+
+ while (!dht_dfreaddirp_done(dir_dfmeta->offset_var, local_subvols_cnt)) {
+ pthread_mutex_lock(&defrag->dfq_mutex);
+ {
+ /*Throttle up: If reconfigured count is higher than
+ current thread count, wake up the sleeping threads
+ TODO: Need to refactor this. Instead of making the
+ thread sleep and wake, we should terminate and spawn
+ threads on-demand*/
+
+ if (defrag->recon_thread_count > defrag->current_thread_count) {
+ throttle_up = (defrag->recon_thread_count -
+ defrag->current_thread_count);
+ for (j = 0; j < throttle_up; j++) {
+ pthread_cond_signal(&defrag->df_wakeup_thread);
+ }
+ }
+
+ while (defrag->q_entry_count > MAX_MIGRATE_QUEUE_COUNT) {
+ defrag->wakeup_crawler = 1;
+ pthread_cond_wait(&defrag->rebalance_crawler_alarm,
+ &defrag->dfq_mutex);
+ }
+
+ ldfq_count = defrag->q_entry_count;
+
+ if (defrag->wakeup_crawler) {
+ defrag->wakeup_crawler = 0;
+ }
}
+ pthread_mutex_unlock(&defrag->dfq_mutex);
- gf_log (this->name, GF_LOG_TRACE, "fix layout called on %s", loc->path);
+ while (
+ ldfq_count <= MAX_MIGRATE_QUEUE_COUNT &&
+ !dht_dfreaddirp_done(dir_dfmeta->offset_var, local_subvols_cnt)) {
+ ret = gf_defrag_get_entry(this, dfc_index, &container, loc, conf,
+ defrag, dir_dfmeta->lfd[dfc_index],
+ migrate_data, dir_dfmeta, xattr_req,
+ perrno);
+
+ if (defrag->defrag_status == GF_DEFRAG_STATUS_STOPPED) {
+ goto out;
+ }
+
+ if (ret) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Found "
+ "error from gf_defrag_get_entry");
- fd = fd_create (loc->inode, defrag->pid);
- if (!fd) {
- gf_log (this->name, GF_LOG_ERROR, "Failed to create fd");
ret = -1;
goto out;
+ }
+
+ /* Check if we got an entry, else we need to move the
+ index to the next subvol */
+ if (!container) {
+ GF_CRAWL_INDEX_MOVE(dfc_index, local_subvols_cnt);
+ continue;
+ }
+
+ /* Q this entry in the dfq */
+ pthread_mutex_lock(&defrag->dfq_mutex);
+ {
+ list_add_tail(&container->list, &(defrag->queue[0].list));
+ defrag->q_entry_count++;
+ ldfq_count = defrag->q_entry_count;
+
+ gf_msg_debug(this->name, 0,
+ "added "
+ "file:%s parent:%s to the queue ",
+ container->df_entry->d_name,
+ container->parent_loc->path);
+
+ pthread_cond_signal(&defrag->parallel_migration_cond);
+ }
+ pthread_mutex_unlock(&defrag->dfq_mutex);
+
+ GF_CRAWL_INDEX_MOVE(dfc_index, local_subvols_cnt);
+ }
+ }
+
+ gettimeofday(&end, NULL);
+ elapsed = gf_tvdiff(&dir_start, &end);
+ gf_log(this->name, GF_LOG_INFO,
+ "Migration operation on dir %s took "
+ "%.2f secs",
+ loc->path, elapsed / 1e6);
+ ret = 0;
+out:
+ THIS = old_THIS;
+ gf_defrag_free_dir_dfmeta(dir_dfmeta, local_subvols_cnt);
+
+ if (xattr_req)
+ dict_unref(xattr_req);
+
+ /* It does not matter if it errored out - this number is
+ * used to calculate rebalance estimated time to complete.
+ * No locking required as dirs are processed by a single thread.
+ */
+ defrag->num_dirs_processed++;
+ return ret;
+}
+
+int
+gf_defrag_settle_hash(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
+ dict_t *fix_layout)
+{
+ int ret;
+ dht_conf_t *conf = NULL;
+ /*
+ * Now we're ready to update the directory commit hash for the volume
+ * root, so that hash miscompares and broadcast lookups can stop.
+ * However, we want to skip that if fix-layout is all we did. In
+ * that case, we want the miscompares etc. to continue until a real
+ * rebalance is complete.
+ */
+ if (defrag->cmd == GF_DEFRAG_CMD_START_LAYOUT_FIX ||
+ defrag->cmd == GF_DEFRAG_CMD_DETACH_START) {
+ return 0;
+ }
+
+ conf = this->private;
+ if (!conf) {
+ /*Uh oh
+ */
+ return -1;
+ }
+
+ if (conf->local_subvols_cnt == 0 || !conf->lookup_optimize) {
+ /* Commit hash updates are only done on local subvolumes and
+ * only when lookup optimization is needed (for older client
+ * support)
+ */
+ return 0;
+ }
+
+ ret = dict_set_uint32(fix_layout, "new-commit-hash",
+ defrag->new_commit_hash);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR, "Failed to set new-commit-hash");
+ return -1;
+ }
+
+ ret = syncop_setxattr(this, loc, fix_layout, 0, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LAYOUT_FIX_FAILED,
+ "fix layout on %s failed", loc->path);
+
+ if (-ret == ENOENT || -ret == ESTALE) {
+ /* Dir most likely is deleted */
+ return 0;
}
- ret = syncop_opendir (this, loc, fd);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Failed to open dir %s",
- loc->path);
- ret = -1;
+ return -1;
+ }
+
+ /* TBD: find more efficient solution than adding/deleting every time */
+ dict_del(fix_layout, "new-commit-hash");
+
+ return 0;
+}
+
+int
+gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
+ dict_t *fix_layout, dict_t *migrate_data)
+{
+ int ret = -1;
+ loc_t entry_loc = {
+ 0,
+ };
+ fd_t *fd = NULL;
+ gf_dirent_t entries;
+ gf_dirent_t *tmp = NULL;
+ gf_dirent_t *entry = NULL;
+ gf_boolean_t free_entries = _gf_false;
+ off_t offset = 0;
+ struct iatt iatt = {
+ 0,
+ };
+ inode_t *linked_inode = NULL, *inode = NULL;
+ dht_conf_t *conf = NULL;
+ int perrno = 0;
+
+ conf = this->private;
+ if (!conf) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_lookup(this, loc, &iatt, NULL, NULL, NULL);
+ if (ret) {
+ if (strcmp(loc->path, "/") == 0) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_DIR_LOOKUP_FAILED,
+ "lookup failed for:%s", loc->path);
+
+ defrag->total_failures++;
+ ret = -1;
+ goto out;
+ }
+
+ if (-ret == ENOENT || -ret == ESTALE) {
+ gf_msg(this->name, GF_LOG_INFO, -ret, DHT_MSG_DIR_LOOKUP_FAILED,
+ "Dir:%s renamed or removed. Skipping", loc->path);
+ if (conf->decommission_subvols_cnt) {
+ defrag->total_failures++;
+ }
+ ret = 0;
+ goto out;
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_DIR_LOOKUP_FAILED,
+ "lookup failed for:%s", loc->path);
+
+ defrag->total_failures++;
+ goto out;
+ }
+ }
+
+ fd = fd_create(loc->inode, defrag->pid);
+ if (!fd) {
+ gf_log(this->name, GF_LOG_ERROR, "Failed to create fd");
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_opendir(this, loc, fd, NULL, NULL);
+ if (ret) {
+ if (-ret == ENOENT || -ret == ESTALE) {
+ if (conf->decommission_subvols_cnt) {
+ defrag->total_failures++;
+ }
+ ret = 0;
+ goto out;
+ }
+
+ gf_log(this->name, GF_LOG_ERROR,
+ "Failed to open dir %s, "
+ "err:%d",
+ loc->path, -ret);
+
+ ret = -1;
+ goto out;
+ }
+
+ fd_bind(fd);
+ INIT_LIST_HEAD(&entries.list);
+
+ while ((ret = syncop_readdirp(this, fd, 131072, offset, &entries, NULL,
+ NULL)) != 0) {
+ if (ret < 0) {
+ if (-ret == ENOENT || -ret == ESTALE) {
+ if (conf->decommission_subvols_cnt) {
+ defrag->total_failures++;
+ }
+ ret = 0;
goto out;
+ }
+
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_READDIR_ERROR,
+ "readdirp failed for "
+ "path %s. Aborting fix-layout",
+ loc->path);
+
+ ret = -1;
+ goto out;
}
- INIT_LIST_HEAD (&entries.list);
- while ((ret = syncop_readdirp (this, fd, 131072, offset, NULL,
- &entries)) != 0)
+ if (list_empty(&entries.list))
+ break;
+
+ free_entries = _gf_true;
+
+ list_for_each_entry_safe(entry, tmp, &entries.list, list)
{
+ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
+ ret = 1;
+ goto out;
+ }
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR, "Readdir returned %s"
- ". Aborting fix-layout",strerror(errno));
- goto out;
+ offset = entry->d_off;
+
+ if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, ".."))
+ continue;
+ if (!IA_ISDIR(entry->d_stat.ia_type)) {
+ continue;
+ }
+ loc_wipe(&entry_loc);
+
+ ret = dht_build_child_loc(this, &entry_loc, loc, entry->d_name);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Child loc"
+ " build failed for entry: %s",
+ entry->d_name);
+
+ if (conf->decommission_in_progress) {
+ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
+
+ goto out;
+ } else {
+ continue;
}
+ }
+
+ if (gf_uuid_is_null(entry->d_stat.ia_gfid)) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "%s/%s"
+ " gfid not present",
+ loc->path, entry->d_name);
+ continue;
+ }
+
+ gf_uuid_copy(entry_loc.gfid, entry->d_stat.ia_gfid);
+
+ /*In case the gfid stored in the inode by inode_link
+ * and the gfid obtained in the lookup differs, then
+ * client3_3_lookup_cbk will return ESTALE and proper
+ * error will be captured
+ */
+
+ linked_inode = inode_link(entry_loc.inode, loc->inode,
+ entry->d_name, &entry->d_stat);
+
+ inode = entry_loc.inode;
+ entry_loc.inode = linked_inode;
+ inode_unref(inode);
+
+ if (gf_uuid_is_null(loc->gfid)) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "%s/%s"
+ " gfid not present",
+ loc->path, entry->d_name);
+ continue;
+ }
+
+ gf_uuid_copy(entry_loc.pargfid, loc->gfid);
+
+ ret = syncop_lookup(this, &entry_loc, &iatt, NULL, NULL, NULL);
+ if (ret) {
+ if (-ret == ENOENT || -ret == ESTALE) {
+ gf_msg(this->name, GF_LOG_INFO, -ret,
+ DHT_MSG_DIR_LOOKUP_FAILED,
+ "Dir:%s renamed or removed. "
+ "Skipping",
+ loc->path);
+ ret = 0;
+ if (conf->decommission_subvols_cnt) {
+ defrag->total_failures++;
+ }
+ continue;
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_DIR_LOOKUP_FAILED, "lookup failed for:%s",
+ entry_loc.path);
- /* Need to keep track of ENOENT errno, that means, there is no
- need to send more readdirp() */
- readdirp_errno = errno;
+ defrag->total_failures++;
- if (list_empty (&entries.list))
- break;
+ if (conf->decommission_in_progress) {
+ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
+ ret = -1;
+ goto out;
+ } else {
+ continue;
+ }
+ }
+ }
- free_entries = _gf_true;
+ /* A return value of 2 means, either process_dir or
+ * lookup of a dir failed. Hence, don't commit hash
+ * for the current directory*/
- list_for_each_entry_safe (entry, tmp, &entries.list, list) {
- if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
- ret = 1;
- goto out;
- }
+ ret = gf_defrag_fix_layout(this, defrag, &entry_loc, fix_layout,
+ migrate_data);
- offset = entry->d_off;
+ if (defrag->defrag_status == GF_DEFRAG_STATUS_STOPPED ||
+ defrag->defrag_status == GF_DEFRAG_STATUS_FAILED) {
+ goto out;
+ }
- if (!strcmp (entry->d_name, ".") ||
- !strcmp (entry->d_name, ".."))
- continue;
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LAYOUT_FIX_FAILED,
+ "Fix layout failed for %s", entry_loc.path);
- if (!IA_ISDIR (entry->d_stat.ia_type))
- continue;
+ defrag->total_failures++;
- loc_wipe (&entry_loc);
- ret =dht_build_child_loc (this, &entry_loc, loc,
- entry->d_name);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Child loc"
- " build failed");
- goto out;
- }
+ if (conf->decommission_in_progress) {
+ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
- if (uuid_is_null (entry->d_stat.ia_gfid)) {
- gf_log (this->name, GF_LOG_ERROR, "%s/%s"
- "gfid not present", loc->path,
- entry->d_name);
- continue;
- }
+ goto out;
+ } else {
+ /* Let's not commit-hash if
+ * gf_defrag_fix_layout failed*/
+ continue;
+ }
+ }
+ }
- entry_loc.inode->ia_type = entry->d_stat.ia_type;
+ gf_dirent_free(&entries);
+ free_entries = _gf_false;
+ INIT_LIST_HEAD(&entries.list);
+ }
+
+ /* A directory layout is fixed only after its subdirs are healed to
+ * any newly added bricks. If the layout is fixed before subdirs are
+ * healed, the newly added brick will get a non-null layout.
+ * Any subdirs which hash to that layout will no longer show up
+ * in a directory listing until they are healed.
+ */
+
+ ret = syncop_setxattr(this, loc, fix_layout, 0, NULL, NULL);
+
+ /* In case of a race where the directory is deleted just before
+ * layout setxattr, the errors are updated in the layout structure.
+ * We can use this information to make a decision whether the directory
+ * is deleted entirely.
+ */
+ if (ret == 0) {
+ ret = dht_dir_layout_error_check(this, loc->inode);
+ ret = -ret;
+ }
+
+ if (ret) {
+ if (-ret == ENOENT || -ret == ESTALE) {
+ gf_msg(this->name, GF_LOG_INFO, -ret, DHT_MSG_LAYOUT_FIX_FAILED,
+ "Setxattr failed. Dir %s "
+ "renamed or removed",
+ loc->path);
+ if (conf->decommission_subvols_cnt) {
+ defrag->total_failures++;
+ }
+ ret = 0;
+ goto out;
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LAYOUT_FIX_FAILED,
+ "Setxattr failed for %s", loc->path);
- uuid_copy (entry_loc.gfid, entry->d_stat.ia_gfid);
- if (uuid_is_null (loc->gfid)) {
- gf_log (this->name, GF_LOG_ERROR, "%s/%s"
- "gfid not present", loc->path,
- entry->d_name);
- continue;
- }
+ defrag->total_failures++;
- uuid_copy (entry_loc.pargfid, loc->gfid);
+ if (conf->decommission_in_progress) {
+ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
+ ret = -1;
+ goto out;
+ }
+ }
+ }
- ret = syncop_lookup (this, &entry_loc, NULL, &iatt,
- NULL, NULL);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "%s"
- " lookup failed", entry_loc.path);
- continue;
- }
+ if (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX) {
+ ret = gf_defrag_process_dir(this, defrag, loc, migrate_data, &perrno);
- ret = syncop_setxattr (this, &entry_loc, fix_layout,
- 0);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Setxattr "
- "failed for %s", entry_loc.path);
- defrag->defrag_status =
- GF_DEFRAG_STATUS_FAILED;
- defrag->total_failures ++;
- goto out;
- }
- ret = gf_defrag_fix_layout (this, defrag, &entry_loc,
- fix_layout, migrate_data);
-
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Fix layout "
- "failed for %s", entry_loc.path);
- defrag->total_failures++;
- goto out;
- }
+ if (ret) {
+ if (perrno == ENOENT || perrno == ESTALE) {
+ ret = 0;
+ goto out;
+ } else {
+ defrag->total_failures++;
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_DEFRAG_PROCESS_DIR_FAILED,
+ "gf_defrag_process_dir failed for "
+ "directory: %s",
+ loc->path);
+
+ if (conf->decommission_in_progress) {
+ goto out;
}
- gf_dirent_free (&entries);
- free_entries = _gf_false;
- INIT_LIST_HEAD (&entries.list);
- if (readdirp_errno == ENOENT)
- break;
+ }
}
+ }
- ret = 0;
+ gf_msg_trace(this->name, 0, "fix layout called on %s", loc->path);
+
+ if (gf_defrag_settle_hash(this, defrag, loc, fix_layout) != 0) {
+ defrag->total_failures++;
+
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SETTLE_HASH_FAILED,
+ "Settle hash failed for %s", loc->path);
+
+ ret = -1;
+
+ if (conf->decommission_in_progress) {
+ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
+ goto out;
+ }
+ }
+
+ ret = 0;
out:
- if (free_entries)
- gf_dirent_free (&entries);
+ if (free_entries)
+ gf_dirent_free(&entries);
- loc_wipe (&entry_loc);
+ loc_wipe(&entry_loc);
- if (dict)
- dict_unref(dict);
+ if (fd)
+ fd_unref(fd);
- if (fd)
- fd_unref (fd);
+ return ret;
+}
+int
+dht_init_local_subvols_and_nodeuuids(xlator_t *this, dht_conf_t *conf,
+ loc_t *loc)
+{
+ dict_t *dict = NULL;
+ uuid_t *uuid_ptr = NULL;
+ int ret = -1;
+ int i = 0;
+ int j = 0;
+
+ /* Find local subvolumes */
+ ret = syncop_getxattr(this, loc, &dict, GF_REBAL_FIND_LOCAL_SUBVOL, NULL,
+ NULL);
+ if (ret && (ret != -ENODATA)) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, 0,
+ "local "
+ "subvolume determination failed with error: %d",
+ -ret);
+ ret = -1;
+ goto out;
+ }
+
+ if (!ret)
+ goto out;
+
+ ret = syncop_getxattr(this, loc, &dict, GF_REBAL_OLD_FIND_LOCAL_SUBVOL,
+ NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, 0,
+ "local "
+ "subvolume determination failed with error: %d",
+ -ret);
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+
+out:
+ if (ret) {
return ret;
+ }
+
+ for (i = 0; i < conf->local_subvols_cnt; i++) {
+ gf_msg(this->name, GF_LOG_INFO, 0, 0,
+ "local subvol: "
+ "%s",
+ conf->local_subvols[i]->name);
+
+ for (j = 0; j < conf->local_nodeuuids[i].count; j++) {
+ uuid_ptr = &(conf->local_nodeuuids[i].elements[j].uuid);
+ gf_msg(this->name, GF_LOG_INFO, 0, 0, "node uuid : %s",
+ uuid_utoa(*uuid_ptr));
+ }
+ }
+ return ret;
}
+/* Functions for the rebalance estimates feature */
-int
-gf_defrag_start_crawl (void *data)
+uint64_t
+gf_defrag_subvol_file_size(xlator_t *this, loc_t *root_loc)
{
- xlator_t *this = NULL;
- dht_conf_t *conf = NULL;
- gf_defrag_info_t *defrag = NULL;
- int ret = -1;
- loc_t loc = {0,};
- struct iatt iatt = {0,};
- struct iatt parent = {0,};
- dict_t *fix_layout = NULL;
- dict_t *migrate_data = NULL;
- dict_t *status = NULL;
-
- this = data;
- if (!this)
- goto out;
+ int ret = -1;
+ struct statvfs buf = {
+ 0,
+ };
+
+ ret = syncop_statfs(this, root_loc, &buf, NULL, NULL);
+ if (ret) {
+ /* Aargh! */
+ return 0;
+ }
+ return ((buf.f_blocks - buf.f_bfree) * buf.f_frsize);
+}
- conf = this->private;
- if (!conf)
- goto out;
+uint64_t
+gf_defrag_total_file_size(xlator_t *this, loc_t *root_loc)
+{
+ dht_conf_t *conf = NULL;
+ int i = 0;
+ uint64_t size_files = 0;
+ uint64_t total_size = 0;
- defrag = conf->defrag;
- if (!defrag)
- goto out;
- dht_build_root_inode (this, &defrag->root_inode);
- if (!defrag->root_inode)
- goto out;
+ conf = this->private;
+ if (!conf) {
+ return 0;
+ }
+
+ for (i = 0; i < conf->local_subvols_cnt; i++) {
+ size_files = gf_defrag_subvol_file_size(conf->local_subvols[i],
+ root_loc);
+ total_size += size_files;
+ gf_msg(this->name, GF_LOG_INFO, 0, 0,
+ "local subvol: %s,"
+ "cnt = %" PRIu64,
+ conf->local_subvols[i]->name, size_files);
+ }
+
+ gf_msg(this->name, GF_LOG_INFO, 0, 0, "Total size files = %" PRIu64,
+ total_size);
+
+ return total_size;
+}
- dht_build_root_loc (defrag->root_inode, &loc);
+static void *
+dht_file_counter_thread(void *args)
+{
+ gf_defrag_info_t *defrag = NULL;
+ loc_t root_loc = {
+ 0,
+ };
+ struct timespec time_to_wait = {
+ 0,
+ };
+ uint64_t tmp_size = 0;
+
+ if (!args)
+ return NULL;
- /* fix-layout on '/' first */
+ defrag = (gf_defrag_info_t *)args;
+ dht_build_root_loc(defrag->root_inode, &root_loc);
- ret = syncop_lookup (this, &loc, NULL, &iatt, NULL, &parent);
+ while (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED) {
+ timespec_now(&time_to_wait);
+ time_to_wait.tv_sec += 600;
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "look up on / failed");
- goto out;
+ pthread_mutex_lock(&defrag->fc_mutex);
+ pthread_cond_timedwait(&defrag->fc_wakeup_cond, &defrag->fc_mutex,
+ &time_to_wait);
+
+ pthread_mutex_unlock(&defrag->fc_mutex);
+
+ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED)
+ break;
+
+ tmp_size = gf_defrag_total_file_size(defrag->this, &root_loc);
+
+ gf_log("dht", GF_LOG_INFO, "tmp data size =%" PRIu64, tmp_size);
+
+ if (!tmp_size) {
+ gf_msg("dht", GF_LOG_ERROR, 0, 0,
+ "Failed to get "
+ "the total data size. Unable to estimate "
+ "time to complete rebalance.");
+ } else {
+ g_totalsize = tmp_size;
+ gf_msg_debug("dht", 0, "total data size =%" PRIu64, g_totalsize);
}
+ }
- fix_layout = dict_new ();
- if (!fix_layout) {
- ret = -1;
- goto out;
+ return NULL;
+}
+
+int
+gf_defrag_estimates_cleanup(xlator_t *this, gf_defrag_info_t *defrag,
+ pthread_t filecnt_thread)
+{
+ int ret = -1;
+
+ /* Wake up the filecounter thread.
+ * By now the defrag status will no longer be
+ * GF_DEFRAG_STATUS_STARTED so the thread will exit the loop.
+ */
+ pthread_mutex_lock(&defrag->fc_mutex);
+ {
+ pthread_cond_broadcast(&defrag->fc_wakeup_cond);
+ }
+ pthread_mutex_unlock(&defrag->fc_mutex);
+
+ ret = pthread_join(filecnt_thread, NULL);
+ if (ret) {
+ gf_msg("dht", GF_LOG_ERROR, ret, 0,
+ "file_counter_thread: pthread_join failed.");
+ ret = -1;
+ }
+ return ret;
+}
+
+int
+gf_defrag_estimates_init(xlator_t *this, loc_t *loc, pthread_t *filecnt_thread)
+{
+ int ret = -1;
+ dht_conf_t *conf = NULL;
+ gf_defrag_info_t *defrag = NULL;
+
+ conf = this->private;
+ defrag = conf->defrag;
+
+ g_totalsize = gf_defrag_total_file_size(this, loc);
+ if (!g_totalsize) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "Failed to get "
+ "the total data size. Unable to estimate "
+ "time to complete rebalance.");
+ goto out;
+ }
+
+ ret = gf_thread_create(filecnt_thread, NULL, dht_file_counter_thread,
+ (void *)defrag, "dhtfcnt");
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ret, 0,
+ "Failed to "
+ "create the file counter thread ");
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+/* Init and cleanup functions for parallel file migration*/
+int
+gf_defrag_parallel_migration_init(xlator_t *this, gf_defrag_info_t *defrag,
+ pthread_t **tid_array, int *thread_index)
+{
+ int ret = -1;
+ int thread_spawn_count = 0;
+ int index = 0;
+ pthread_t *tid = NULL;
+
+ if (!defrag)
+ goto out;
+
+ /* Initialize global entry queue */
+ defrag->queue = GF_CALLOC(1, sizeof(struct dht_container),
+ gf_dht_mt_container_t);
+
+ if (!defrag->queue) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, 0,
+ "Failed to initialise migration queue");
+ ret = -1;
+ goto out;
+ }
+
+ INIT_LIST_HEAD(&(defrag->queue[0].list));
+
+ thread_spawn_count = MAX(MAX_REBAL_THREADS, 4);
+
+ gf_msg_debug(this->name, 0, "thread_spawn_count: %d", thread_spawn_count);
+
+ tid = GF_CALLOC(thread_spawn_count, sizeof(pthread_t),
+ gf_common_mt_pthread_t);
+ if (!tid) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, 0,
+ "Failed to create migration threads");
+ ret = -1;
+ goto out;
+ }
+ defrag->current_thread_count = thread_spawn_count;
+
+ /*Spawn Threads Here*/
+ while (index < thread_spawn_count) {
+ ret = gf_thread_create(&(tid[index]), NULL, gf_defrag_task,
+ (void *)defrag, "dhtmig%d", (index + 1) & 0x3ff);
+ if (ret != 0) {
+ gf_msg("DHT", GF_LOG_ERROR, ret, 0, "Thread[%d] creation failed. ",
+ index);
+ ret = -1;
+ goto out;
+ } else {
+ gf_log("DHT", GF_LOG_INFO,
+ "Thread[%d] "
+ "creation successful",
+ index);
}
+ index++;
+ }
+
+ ret = 0;
+out:
+ *thread_index = index;
+ *tid_array = tid;
+
+ return ret;
+}
+
+int
+gf_defrag_parallel_migration_cleanup(gf_defrag_info_t *defrag,
+ pthread_t *tid_array, int thread_index)
+{
+ int ret = -1;
+ int i = 0;
+
+ if (!defrag)
+ goto out;
+
+ /* Wake up all migration threads */
+ pthread_mutex_lock(&defrag->dfq_mutex);
+ {
+ defrag->crawl_done = 1;
+
+ pthread_cond_broadcast(&defrag->parallel_migration_cond);
+ pthread_cond_broadcast(&defrag->df_wakeup_thread);
+ }
+ pthread_mutex_unlock(&defrag->dfq_mutex);
- ret = dict_set_str (fix_layout, GF_XATTR_FIX_LAYOUT_KEY, "yes");
+ /*Wait for all the threads to complete their task*/
+ for (i = 0; i < thread_index; i++) {
+ pthread_join(tid_array[i], NULL);
+ }
+
+ GF_FREE(tid_array);
+
+ /* Cleanup the migration queue */
+ if (defrag->queue) {
+ gf_dirent_free(defrag->queue[0].df_entry);
+ INIT_LIST_HEAD(&(defrag->queue[0].list));
+ }
+
+ GF_FREE(defrag->queue);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+gf_defrag_start_crawl(void *data)
+{
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ gf_defrag_info_t *defrag = NULL;
+ dict_t *fix_layout = NULL;
+ dict_t *migrate_data = NULL;
+ dict_t *status = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ call_frame_t *statfs_frame = NULL;
+ xlator_t *old_THIS = NULL;
+ int ret = -1;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ struct iatt parent = {
+ 0,
+ };
+ int thread_index = 0;
+ pthread_t *tid = NULL;
+ pthread_t filecnt_thread;
+ gf_boolean_t fc_thread_started = _gf_false;
+
+ this = data;
+ if (!this)
+ goto exit;
+
+ ctx = this->ctx;
+ if (!ctx)
+ goto exit;
+
+ conf = this->private;
+ if (!conf)
+ goto exit;
+
+ defrag = conf->defrag;
+ if (!defrag)
+ goto exit;
+
+ defrag->start_time = gf_time();
+
+ dht_build_root_inode(this, &defrag->root_inode);
+ if (!defrag->root_inode)
+ goto out;
+
+ dht_build_root_loc(defrag->root_inode, &loc);
+
+ /* fix-layout on '/' first */
+
+ ret = syncop_lookup(this, &loc, &iatt, &parent, NULL, NULL);
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_REBALANCE_START_FAILED,
+ "Failed to start rebalance: look up on / failed");
+ ret = -1;
+ goto out;
+ }
+
+ old_THIS = THIS;
+ THIS = this;
+
+ statfs_frame = create_frame(this, this->ctx->pool);
+ if (!statfs_frame) {
+ gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, ENOMEM,
+ "Insufficient memory. Frame creation failed");
+ ret = -1;
+ goto out;
+ }
+
+ /* async statfs update for honoring min-free-disk */
+ dht_get_du_info(statfs_frame, this, &loc);
+ THIS = old_THIS;
+
+ fix_layout = dict_new();
+ if (!fix_layout) {
+ ret = -1;
+ goto out;
+ }
+
+ /*
+ * Unfortunately, we can't do special xattrs (like fix.layout) and
+ * real ones in the same call currently, and changing it seems
+ * riskier than just doing two calls.
+ */
+
+ gf_log(this->name, GF_LOG_INFO, "%s using commit hash %u", __func__,
+ conf->vol_commit_hash);
+
+ ret = dict_set_uint32(fix_layout, conf->commithash_xattr_name,
+ conf->vol_commit_hash);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR, "Failed to set %s",
+ conf->commithash_xattr_name);
+ defrag->total_failures++;
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_setxattr(this, &loc, fix_layout, 0, NULL, NULL);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Failed to set commit hash on %s. "
+ "Rebalance cannot proceed.",
+ loc.path);
+ defrag->total_failures++;
+ ret = -1;
+ goto out;
+ }
+
+ /* We now return to our regularly scheduled program. */
+
+ ret = dict_set_str(fix_layout, GF_XATTR_FIX_LAYOUT_KEY, "yes");
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_REBALANCE_START_FAILED,
+ "Failed to start rebalance:"
+ "Failed to set dictionary value: key = %s",
+ GF_XATTR_FIX_LAYOUT_KEY);
+ defrag->total_failures++;
+ ret = -1;
+ goto out;
+ }
+
+ defrag->new_commit_hash = conf->vol_commit_hash;
+
+ ret = syncop_setxattr(this, &loc, fix_layout, 0, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_REBALANCE_FAILED,
+ "fix layout on %s failed", loc.path);
+ defrag->total_failures++;
+ ret = -1;
+ goto out;
+ }
+
+ if (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX) {
+ /* We need to migrate files */
+
+ migrate_data = dict_new();
+ if (!migrate_data) {
+ defrag->total_failures++;
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_str(
+ migrate_data, GF_XATTR_FILE_MIGRATE_KEY,
+ (defrag->cmd == GF_DEFRAG_CMD_START_FORCE) ? "force" : "non-force");
if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Failed to set dict str");
- goto out;
+ defrag->total_failures++;
+ ret = -1;
+ goto out;
}
- ret = syncop_setxattr (this, &loc, fix_layout, 0);
+ ret = dht_init_local_subvols_and_nodeuuids(this, conf, &loc);
if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "fix layout on %s failed",
- loc.path);
- defrag->total_failures++;
- goto out;
+ ret = -1;
+ goto out;
}
- if (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX) {
- migrate_data = dict_new ();
- if (!migrate_data) {
- ret = -1;
- goto out;
- }
- if (defrag->cmd == GF_DEFRAG_CMD_START_FORCE)
- ret = dict_set_str (migrate_data,
- "distribute.migrate-data", "force");
- else
- ret = dict_set_str (migrate_data,
- "distribute.migrate-data",
- "non-force");
- if (ret)
- goto out;
+ /* Initialise the structures required for parallel migration */
+ ret = gf_defrag_parallel_migration_init(this, defrag, &tid,
+ &thread_index);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "Aborting rebalance.");
+ goto out;
}
- ret = gf_defrag_fix_layout (this, defrag, &loc, fix_layout,
- migrate_data);
- if ((defrag->defrag_status != GF_DEFRAG_STATUS_STOPPED) &&
- (defrag->defrag_status != GF_DEFRAG_STATUS_FAILED)) {
- defrag->defrag_status = GF_DEFRAG_STATUS_COMPLETE;
+
+ ret = gf_defrag_estimates_init(this, &loc, &filecnt_thread);
+ if (ret) {
+ /* Not a fatal error. Allow the rebalance to proceed*/
+ ret = 0;
+ } else {
+ fc_thread_started = _gf_true;
}
+ }
+ ret = gf_defrag_fix_layout(this, defrag, &loc, fix_layout, migrate_data);
+ if (ret) {
+ defrag->total_failures++;
+ ret = -1;
+ goto out;
+ }
+ if (gf_defrag_settle_hash(this, defrag, &loc, fix_layout) != 0) {
+ defrag->total_failures++;
+ ret = -1;
+ goto out;
+ }
+ gf_log("DHT", GF_LOG_INFO, "crawling file-system completed");
out:
- LOCK (&defrag->lock);
- {
- status = dict_new ();
- gf_defrag_status_get (defrag, status);
- glusterfs_rebalance_event_notify (status);
- if (status)
- dict_unref (status);
- defrag->is_exiting = 1;
- }
- UNLOCK (&defrag->lock);
-
- if (defrag) {
- GF_FREE (defrag);
- conf->defrag = NULL;
- }
- return ret;
+ /* We are here means crawling the entire file system is done
+ or something failed. Set defrag->crawl_done flag to intimate
+ the migrator threads to exhaust the defrag->queue and terminate*/
+
+ if (ret) {
+ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
+ }
+
+ gf_defrag_parallel_migration_cleanup(defrag, tid, thread_index);
+
+ if ((defrag->defrag_status != GF_DEFRAG_STATUS_STOPPED) &&
+ (defrag->defrag_status != GF_DEFRAG_STATUS_FAILED)) {
+ defrag->defrag_status = GF_DEFRAG_STATUS_COMPLETE;
+ }
+
+ if (fc_thread_started) {
+ gf_defrag_estimates_cleanup(this, defrag, filecnt_thread);
+ }
+
+ dht_send_rebalance_event(this, defrag->cmd, defrag->defrag_status);
+
+ status = dict_new();
+ LOCK(&defrag->lock);
+ {
+ gf_defrag_status_get(conf, status);
+ if (ctx && ctx->notify)
+ ctx->notify(GF_EN_DEFRAG_STATUS, status);
+ if (status)
+ dict_unref(status);
+ defrag->is_exiting = 1;
+ }
+ UNLOCK(&defrag->lock);
+
+ GF_FREE(defrag);
+ conf->defrag = NULL;
+
+ if (migrate_data)
+ dict_unref(migrate_data);
+
+ if (statfs_frame) {
+ STACK_DESTROY(statfs_frame->root);
+ }
+exit:
+ return ret;
}
-
static int
-gf_defrag_done (int ret, call_frame_t *sync_frame, void *data)
+gf_defrag_done(int ret, call_frame_t *sync_frame, void *data)
{
- gf_listener_stop();
+ gf_listener_stop(sync_frame->this);
- STACK_DESTROY (sync_frame->root);
- kill (getpid(), SIGTERM);
- return 0;
+ STACK_DESTROY(sync_frame->root);
+ kill(getpid(), SIGTERM);
+ return 0;
}
void *
-gf_defrag_start (void *data)
+gf_defrag_start(void *data)
{
- int ret = -1;
- call_frame_t *frame = NULL;
- dht_conf_t *conf = NULL;
- gf_defrag_info_t *defrag = NULL;
- xlator_t *this = NULL;
-
- this = data;
- conf = this->private;
- if (!conf)
- goto out;
+ int ret = -1;
+ call_frame_t *frame = NULL;
+ dht_conf_t *conf = NULL;
+ gf_defrag_info_t *defrag = NULL;
+ xlator_t *this = NULL;
+ xlator_t *old_THIS = NULL;
- defrag = conf->defrag;
- if (!defrag)
- goto out;
+ this = data;
+ conf = this->private;
+ if (!conf)
+ goto out;
- frame = create_frame (this, this->ctx->pool);
- if (!frame)
- goto out;
+ defrag = conf->defrag;
+ if (!defrag)
+ goto out;
+
+ frame = create_frame(this, this->ctx->pool);
+ if (!frame)
+ goto out;
- frame->root->pid = GF_CLIENT_PID_DEFRAG;
+ frame->root->pid = GF_CLIENT_PID_DEFRAG;
- defrag->pid = frame->root->pid;
+ defrag->pid = frame->root->pid;
- defrag->defrag_status = GF_DEFRAG_STATUS_STARTED;
+ defrag->defrag_status = GF_DEFRAG_STATUS_STARTED;
- ret = synctask_new (this->ctx->env, gf_defrag_start_crawl,
- gf_defrag_done, frame, this);
+ old_THIS = THIS;
+ THIS = this;
+ ret = synctask_new(this->ctx->env, gf_defrag_start_crawl, gf_defrag_done,
+ frame, this);
- if (ret)
- gf_log (this->name, GF_LOG_ERROR, "Could not create"
- " task for rebalance");
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_REBALANCE_START_FAILED,
+ "Could not create task for rebalance");
+ THIS = old_THIS;
out:
- return NULL;
+ return NULL;
}
-int
-gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict)
+uint64_t
+gf_defrag_get_estimates_based_on_size(dht_conf_t *conf)
{
- int ret = 0;
- uint64_t files = 0;
- uint64_t size = 0;
- uint64_t lookup = 0;
- uint64_t failures = 0;
- char *status = "";
-
- if (!defrag)
- goto out;
+ gf_defrag_info_t *defrag = NULL;
+ double rate_processed = 0;
+ uint64_t total_processed = 0;
+ uint64_t tmp_count = 0;
+ uint64_t time_to_complete = 0;
+ double elapsed = 0;
- ret = 0;
- if (defrag->defrag_status == GF_DEFRAG_STATUS_NOT_STARTED)
- goto out;
+ defrag = conf->defrag;
+
+ if (!g_totalsize)
+ goto out;
+
+ elapsed = gf_time() - defrag->start_time;
+
+ /* Don't calculate the estimates for the first 10 minutes.
+ * It is unlikely to be accurate and estimates are not required
+ * if the process finishes in less than 10 mins.
+ */
- files = defrag->total_files;
- size = defrag->total_data;
- lookup = defrag->num_files_lookedup;
- failures = defrag->total_failures;
+ if (elapsed < ESTIMATE_START_INTERVAL) {
+ gf_msg(THIS->name, GF_LOG_INFO, 0, 0,
+ "Rebalance estimates will not be available for the "
+ "first %d seconds.",
+ ESTIMATE_START_INTERVAL);
- if (!dict)
- goto log;
+ goto out;
+ }
- ret = dict_set_uint64 (dict, "files", files);
- if (ret)
- gf_log (THIS->name, GF_LOG_WARNING,
- "failed to set file count");
+ total_processed = defrag->size_processed;
- ret = dict_set_uint64 (dict, "size", size);
- if (ret)
- gf_log (THIS->name, GF_LOG_WARNING,
- "failed to set size of xfer");
+ /* rate at which files processed */
+ rate_processed = (total_processed) / elapsed;
- ret = dict_set_uint64 (dict, "lookups", lookup);
- if (ret)
- gf_log (THIS->name, GF_LOG_WARNING,
- "failed to set lookedup file count");
+ tmp_count = g_totalsize;
- ret = dict_set_int32 (dict, "status", defrag->defrag_status);
- if (ret)
- gf_log (THIS->name, GF_LOG_WARNING,
- "failed to set status");
+ if (rate_processed) {
+ time_to_complete = (tmp_count) / rate_processed;
+
+ } else {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, 0,
+ "Unable to calculate estimated time for rebalance");
+ }
+
+ gf_log(THIS->name, GF_LOG_INFO,
+ "TIME: (size) total_processed=%" PRIu64 " tmp_cnt = %" PRIu64
+ ","
+ "rate_processed=%f, elapsed = %f",
+ total_processed, tmp_count, rate_processed, elapsed);
+
+out:
+ return time_to_complete;
+}
+
+int
+gf_defrag_status_get(dht_conf_t *conf, dict_t *dict)
+{
+ int ret = 0;
+ uint64_t files = 0;
+ uint64_t size = 0;
+ uint64_t lookup = 0;
+ uint64_t failures = 0;
+ uint64_t skipped = 0;
+ char *status = "";
+ double elapsed = 0;
+ uint64_t time_to_complete = 0;
+ uint64_t time_left = 0;
+ gf_defrag_info_t *defrag = conf->defrag;
+
+ if (!defrag)
+ goto out;
+
+ ret = 0;
+ if (defrag->defrag_status == GF_DEFRAG_STATUS_NOT_STARTED)
+ goto out;
+
+ files = defrag->total_files;
+ size = defrag->total_data;
+ lookup = defrag->num_files_lookedup;
+ failures = defrag->total_failures;
+ skipped = defrag->skipped;
+
+ elapsed = gf_time() - defrag->start_time;
+
+ /* The rebalance is still in progress */
+
+ if (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED) {
+ time_to_complete = gf_defrag_get_estimates_based_on_size(conf);
+
+ if (time_to_complete && (time_to_complete > elapsed))
+ time_left = time_to_complete - elapsed;
+
+ gf_log(THIS->name, GF_LOG_INFO,
+ "TIME: Estimated total time to complete (size)= %" PRIu64
+ " seconds, seconds left = %" PRIu64 "",
+ time_to_complete, time_left);
+ }
+
+ if (!dict)
+ goto log;
+
+ ret = dict_set_uint64(dict, "files", files);
+ if (ret)
+ gf_log(THIS->name, GF_LOG_WARNING, "failed to set file count");
+
+ ret = dict_set_uint64(dict, "size", size);
+ if (ret)
+ gf_log(THIS->name, GF_LOG_WARNING, "failed to set size of xfer");
+
+ ret = dict_set_uint64(dict, "lookups", lookup);
+ if (ret)
+ gf_log(THIS->name, GF_LOG_WARNING, "failed to set lookedup file count");
+
+ ret = dict_set_int32(dict, "status", defrag->defrag_status);
+ if (ret)
+ gf_log(THIS->name, GF_LOG_WARNING, "failed to set status");
+
+ ret = dict_set_double(dict, "run-time", elapsed);
+ if (ret)
+ gf_log(THIS->name, GF_LOG_WARNING, "failed to set run-time");
+
+ ret = dict_set_uint64(dict, "failures", failures);
+ if (ret)
+ gf_log(THIS->name, GF_LOG_WARNING, "failed to set failure count");
+
+ ret = dict_set_uint64(dict, "skipped", skipped);
+ if (ret)
+ gf_log(THIS->name, GF_LOG_WARNING, "failed to set skipped file count");
+
+ ret = dict_set_uint64(dict, "time-left", time_left);
+ if (ret)
+ gf_log(THIS->name, GF_LOG_WARNING, "failed to set time-left");
- ret = dict_set_uint64 (dict, "failures", failures);
log:
- switch (defrag->defrag_status) {
+ switch (defrag->defrag_status) {
case GF_DEFRAG_STATUS_NOT_STARTED:
- status = "not started";
- break;
+ status = "not started";
+ break;
case GF_DEFRAG_STATUS_STARTED:
- status = "in progress";
- break;
+ status = "in progress";
+ break;
case GF_DEFRAG_STATUS_STOPPED:
- status = "stopped";
- break;
+ status = "stopped";
+ break;
case GF_DEFRAG_STATUS_COMPLETE:
- status = "completed";
- break;
+ status = "completed";
+ break;
case GF_DEFRAG_STATUS_FAILED:
- status = "failed";
- break;
- }
-
- gf_log (THIS->name, GF_LOG_INFO, "Rebalance is %s", status);
- gf_log (THIS->name, GF_LOG_INFO, "Files migrated: %"PRIu64", size: %"
- PRIu64", lookups: %"PRIu64", failures: %"PRIu64, files, size,
- lookup, failures);
-
-
+ status = "failed";
+ break;
+ default:
+ break;
+ }
+
+ gf_msg(THIS->name, GF_LOG_INFO, 0, DHT_MSG_REBALANCE_STATUS,
+ "Rebalance is %s. Time taken is %.2f secs", status, elapsed);
+ gf_msg(THIS->name, GF_LOG_INFO, 0, DHT_MSG_REBALANCE_STATUS,
+ "Files migrated: %" PRIu64 ", size: %" PRIu64 ", lookups: %" PRIu64
+ ", failures: %" PRIu64
+ ", skipped: "
+ "%" PRIu64,
+ files, size, lookup, failures, skipped);
out:
- return 0;
+ return 0;
}
int
-gf_defrag_stop (gf_defrag_info_t *defrag, dict_t *output)
+gf_defrag_stop(dht_conf_t *conf, gf_defrag_status_t status, dict_t *output)
{
- /* TODO: set a variable 'stop_defrag' here, it should be checked
- in defrag loop */
- int ret = -1;
- GF_ASSERT (defrag);
+ /* TODO: set a variable 'stop_defrag' here, it should be checked
+ in defrag loop */
+ int ret = -1;
+ gf_defrag_info_t *defrag = conf->defrag;
- if (defrag->defrag_status == GF_DEFRAG_STATUS_NOT_STARTED) {
- goto out;
- }
+ GF_ASSERT(defrag);
- gf_log ("", GF_LOG_INFO, "Recieved stop command on rebalance");
- defrag->defrag_status = GF_DEFRAG_STATUS_STOPPED;
+ if (defrag->defrag_status == GF_DEFRAG_STATUS_NOT_STARTED) {
+ goto out;
+ }
- if (output)
- gf_defrag_status_get (defrag, output);
- ret = 0;
+ gf_msg("", GF_LOG_INFO, 0, DHT_MSG_REBALANCE_STOPPED,
+ "Received stop command on rebalance");
+ defrag->defrag_status = status;
+
+ if (output)
+ gf_defrag_status_get(conf, output);
+ ret = 0;
out:
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_msg_debug("", 0, "Returning %d", ret);
+ return ret;
}
diff --git a/xlators/cluster/dht/src/dht-rename.c b/xlators/cluster/dht/src/dht-rename.c
index f1a4364df20..d9dbf50492f 100644
--- a/xlators/cluster/dht/src/dht-rename.c
+++ b/xlators/cluster/dht/src/dht-rename.c
@@ -11,852 +11,1987 @@
/* TODO: link(oldpath, newpath) fails if newpath already exists. DHT should
* delete the newpath if it gets EEXISTS from link() call.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "xlator.h"
#include "dht-common.h"
-#include "defaults.h"
+#include "dht-lock.h"
+#include <glusterfs/defaults.h>
+int
+dht_rename_unlock(call_frame_t *frame, xlator_t *this);
+int32_t
+dht_rename_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
int
-dht_rename_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
- struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent,
- dict_t *xdata)
-{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
-
- local = frame->local;
- prev = cookie;
-
- if (op_ret == -1) {
- /* TODO: undo the damage */
-
- gf_log (this->name, GF_LOG_INFO,
- "rename %s -> %s on %s failed (%s)",
- local->loc.path, local->loc2.path,
- prev->this->name, strerror (op_errno));
-
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- goto unwind;
- }
- /* TODO: construct proper stbuf for dir */
- /*
- * FIXME: is this the correct way to build stbuf and
- * parent bufs?
- */
- dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
- dht_iatt_merge (this, &local->preoldparent, preoldparent,
- prev->this);
- dht_iatt_merge (this, &local->postoldparent, postoldparent,
- prev->this);
- dht_iatt_merge (this, &local->preparent, prenewparent,
- prev->this);
- dht_iatt_merge (this, &local->postparent, postnewparent,
- prev->this);
+dht_rename_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ local = frame->local;
-unwind:
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- WIPE (&local->preoldparent);
- WIPE (&local->postoldparent);
- WIPE (&local->preparent);
- WIPE (&local->postparent);
-
- DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
- DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
- &local->stbuf, &local->preoldparent,
- &local->postoldparent,
- &local->preparent, &local->postparent, xdata);
- }
+ dht_set_fixed_dir_stat(&local->preoldparent);
+ dht_set_fixed_dir_stat(&local->postoldparent);
+ dht_set_fixed_dir_stat(&local->preparent);
+ dht_set_fixed_dir_stat(&local->postparent);
- return 0;
+ if (IA_ISREG(local->stbuf.ia_type))
+ DHT_STRIP_PHASE1_FLAGS(&local->stbuf);
+
+ DHT_STACK_UNWIND(rename, frame, local->op_ret, local->op_errno,
+ &local->stbuf, &local->preoldparent, &local->postoldparent,
+ &local->preparent, &local->postparent, local->xattr);
+ return 0;
}
+static void
+dht_rename_dir_unlock_src(call_frame_t *frame, xlator_t *this)
+{
+ dht_local_t *local = NULL;
+
+ local = frame->local;
+ dht_unlock_namespace(frame, &local->lock[0]);
+ return;
+}
+
+static void
+dht_rename_dir_unlock_dst(call_frame_t *frame, xlator_t *this)
+{
+ dht_local_t *local = NULL;
+ int op_ret = -1;
+ char src_gfid[GF_UUID_BUF_SIZE] = {0};
+ char dst_gfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+
+ /* Unlock entrylk */
+ dht_unlock_entrylk_wrapper(frame, &local->lock[1].ns.directory_ns);
+
+ /* Unlock inodelk */
+ op_ret = dht_unlock_inodelk(frame, local->lock[1].ns.parent_layout.locks,
+ local->lock[1].ns.parent_layout.lk_count,
+ dht_rename_unlock_cbk);
+ if (op_ret < 0) {
+ uuid_utoa_r(local->loc.inode->gfid, src_gfid);
+
+ if (local->loc2.inode)
+ uuid_utoa_r(local->loc2.inode->gfid, dst_gfid);
+
+ if (IA_ISREG(local->stbuf.ia_type))
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_UNLOCKING_FAILED,
+ "winding unlock inodelk failed "
+ "rename (%s:%s:%s %s:%s:%s), "
+ "stale locks left on bricks",
+ local->loc.path, src_gfid, local->src_cached->name,
+ local->loc2.path, dst_gfid,
+ local->dst_cached ? local->dst_cached->name : NULL);
+ else
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_UNLOCKING_FAILED,
+ "winding unlock inodelk failed "
+ "rename (%s:%s %s:%s), "
+ "stale locks left on bricks",
+ local->loc.path, src_gfid, local->loc2.path, dst_gfid);
+
+ dht_rename_unlock_cbk(frame, NULL, this, 0, 0, NULL);
+ }
+
+ return;
+}
+static int
+dht_rename_dir_unlock(call_frame_t *frame, xlator_t *this)
+{
+ dht_rename_dir_unlock_src(frame, this);
+ dht_rename_dir_unlock_dst(frame, this);
+ return 0;
+}
int
-dht_rename_hashed_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
- struct iatt *preoldparent,
- struct iatt *postoldparent,
- struct iatt *prenewparent,
- struct iatt *postnewparent, dict_t *xdata)
-{
- dht_conf_t *conf = NULL;
- dht_local_t *local = NULL;
- int call_cnt = 0;
- call_frame_t *prev = NULL;
- int i = 0;
-
- conf = this->private;
- local = frame->local;
- prev = cookie;
-
- if (op_ret == -1) {
- /* TODO: undo the damage */
-
- gf_log (this->name, GF_LOG_INFO,
- "rename %s -> %s on %s failed (%s)",
- local->loc.path, local->loc2.path,
- prev->this->name, strerror (op_errno));
-
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- goto unwind;
- }
- /* TODO: construct proper stbuf for dir */
- /*
- * FIXME: is this the correct way to build stbuf and
- * parent bufs?
+dht_rename_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ xlator_t *prev = NULL;
+ int i = 0;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+ int subvol_cnt = -1;
+
+ conf = this->private;
+ local = frame->local;
+ prev = cookie;
+ subvol_cnt = dht_subvol_cnt(this, prev);
+ local->ret_cache[subvol_cnt] = op_ret;
+
+ if (op_ret == -1) {
+ gf_uuid_unparse(local->loc.inode->gfid, gfid);
+
+ gf_msg(this->name, GF_LOG_INFO, op_errno, DHT_MSG_RENAME_FAILED,
+ "Rename %s -> %s on %s failed, (gfid = %s)", local->loc.path,
+ local->loc2.path, prev->name, gfid);
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto unwind;
+ }
+ /* TODO: construct proper stbuf for dir */
+ /*
+ * FIXME: is this the correct way to build stbuf and
+ * parent bufs?
+ */
+ dht_iatt_merge(this, &local->stbuf, stbuf);
+ dht_iatt_merge(this, &local->preoldparent, preoldparent);
+ dht_iatt_merge(this, &local->postoldparent, postoldparent);
+ dht_iatt_merge(this, &local->preparent, prenewparent);
+ dht_iatt_merge(this, &local->postparent, postnewparent);
+
+unwind:
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt)) {
+ /* We get here with local->call_cnt == 0. Which means
+ * we are the only one executing this code, there is
+ * no contention. Therefore it's safe to manipulate or
+ * deref local->call_cnt directly (without locking).
*/
- dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
- dht_iatt_merge (this, &local->preoldparent, preoldparent,
- prev->this);
- dht_iatt_merge (this, &local->postoldparent, postoldparent,
- prev->this);
- dht_iatt_merge (this, &local->preparent, prenewparent,
- prev->this);
- dht_iatt_merge (this, &local->postparent, postnewparent,
- prev->this);
-
- call_cnt = local->call_cnt = conf->subvolume_cnt - 1;
-
- if (!local->call_cnt)
- goto unwind;
-
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (conf->subvolumes[i] == local->dst_hashed)
+ if (local->ret_cache[conf->subvolume_cnt] == 0) {
+ /* count errant subvols in last field of ret_cache */
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (local->ret_cache[i] != 0)
+ ++local->ret_cache[conf->subvolume_cnt];
+ }
+ if (local->ret_cache[conf->subvolume_cnt]) {
+ /* undoing the damage:
+ * for all subvolumes, where rename
+ * succeeded, we perform the reverse operation
+ */
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (local->ret_cache[i] == 0)
+ ++local->call_cnt;
+ }
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (local->ret_cache[i])
continue;
- STACK_WIND (frame, dht_rename_dir_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->rename,
- &local->loc, &local->loc2, NULL);
- if (!--call_cnt)
- break;
- }
+ STACK_WIND(frame, dht_rename_dir_cbk, conf->subvolumes[i],
+ conf->subvolumes[i]->fops->rename, &local->loc2,
+ &local->loc, NULL);
+ }
- return 0;
-unwind:
- WIPE (&local->preoldparent);
- WIPE (&local->postoldparent);
- WIPE (&local->preparent);
- WIPE (&local->postparent);
+ return 0;
+ }
+ }
- DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
- DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
- &local->stbuf, &local->preoldparent,
- &local->postoldparent,
- &local->preparent, &local->postparent, NULL);
+ WIPE(&local->preoldparent);
+ WIPE(&local->postoldparent);
+ WIPE(&local->preparent);
+ WIPE(&local->postparent);
- return 0;
+ dht_rename_dir_unlock(frame, this);
+ }
+
+ return 0;
}
+int
+dht_rename_hashed_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ int call_cnt = 0;
+ xlator_t *prev = NULL;
+ int i = 0;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ conf = this->private;
+ local = frame->local;
+ prev = cookie;
+
+ if (op_ret == -1) {
+ gf_uuid_unparse(local->loc.inode->gfid, gfid);
+
+ gf_msg(this->name, GF_LOG_INFO, op_errno, DHT_MSG_RENAME_FAILED,
+ "rename %s -> %s on %s failed, (gfid = %s) ", local->loc.path,
+ local->loc2.path, prev->name, gfid);
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto unwind;
+ }
+ /* TODO: construct proper stbuf for dir */
+ /*
+ * FIXME: is this the correct way to build stbuf and
+ * parent bufs?
+ */
+ dht_iatt_merge(this, &local->stbuf, stbuf);
+ dht_iatt_merge(this, &local->preoldparent, preoldparent);
+ dht_iatt_merge(this, &local->postoldparent, postoldparent);
+ dht_iatt_merge(this, &local->preparent, prenewparent);
+ dht_iatt_merge(this, &local->postparent, postnewparent);
+
+ call_cnt = local->call_cnt = conf->subvolume_cnt - 1;
+
+ if (!local->call_cnt)
+ goto unwind;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == local->dst_hashed)
+ continue;
+ STACK_WIND_COOKIE(
+ frame, dht_rename_dir_cbk, conf->subvolumes[i], conf->subvolumes[i],
+ conf->subvolumes[i]->fops->rename, &local->loc, &local->loc2, NULL);
+ if (!--call_cnt)
+ break;
+ }
+
+ return 0;
+unwind:
+ WIPE(&local->preoldparent);
+ WIPE(&local->postoldparent);
+ WIPE(&local->preparent);
+ WIPE(&local->postparent);
+
+ dht_rename_dir_unlock(frame, this);
+ return 0;
+}
int
-dht_rename_dir_do (call_frame_t *frame, xlator_t *this)
+dht_rename_dir_do(call_frame_t *frame, xlator_t *this)
{
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
- local = frame->local;
+ local = frame->local;
- if (local->op_ret == -1)
- goto err;
+ if (local->op_ret == -1)
+ goto err;
- local->op_ret = 0;
+ local->op_ret = 0;
- STACK_WIND (frame, dht_rename_hashed_dir_cbk,
- local->dst_hashed,
- local->dst_hashed->fops->rename,
- &local->loc, &local->loc2, NULL);
- return 0;
+ STACK_WIND_COOKIE(frame, dht_rename_hashed_dir_cbk, local->dst_hashed,
+ local->dst_hashed, local->dst_hashed->fops->rename,
+ &local->loc, &local->loc2, NULL);
+ return 0;
err:
- DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno, NULL, NULL,
- NULL, NULL, NULL, NULL);
- return 0;
+ dht_rename_dir_unlock(frame, this);
+ return 0;
}
-
int
-dht_rename_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, gf_dirent_t *entries,
- dict_t *xdata)
-{
- dht_local_t *local = NULL;
- int this_call_cnt = -1;
- call_frame_t *prev = NULL;
-
- local = frame->local;
- prev = cookie;
-
- if (op_ret > 2) {
- gf_log (this->name, GF_LOG_TRACE,
- "readdir on %s for %s returned %d entries",
- prev->this->name, local->loc.path, op_ret);
- local->op_ret = -1;
- local->op_errno = ENOTEMPTY;
- }
+dht_rename_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = -1;
+ xlator_t *prev = NULL;
- this_call_cnt = dht_frame_return (frame);
+ local = frame->local;
+ prev = cookie;
- if (is_last_call (this_call_cnt)) {
- dht_rename_dir_do (frame, this);
- }
+ if (op_ret > 2) {
+ gf_msg_trace(this->name, 0, "readdir on %s for %s returned %d entries",
+ prev->name, local->loc.path, op_ret);
+ local->op_ret = -1;
+ local->op_errno = ENOTEMPTY;
+ }
- return 0;
-}
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt)) {
+ dht_rename_dir_do(frame, this);
+ }
+
+ return 0;
+}
int
-dht_rename_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, fd_t *fd, dict_t *xdata)
+dht_rename_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, fd_t *fd, dict_t *xdata)
{
- dht_local_t *local = NULL;
- int this_call_cnt = -1;
- call_frame_t *prev = NULL;
+ dht_local_t *local = NULL;
+ int this_call_cnt = -1;
+ xlator_t *prev = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+ local = frame->local;
+ prev = cookie;
- local = frame->local;
- prev = cookie;
+ if (op_ret == -1) {
+ gf_uuid_unparse(local->loc.inode->gfid, gfid);
+ gf_msg(this->name, GF_LOG_INFO, op_errno, DHT_MSG_OPENDIR_FAILED,
+ "opendir on %s for %s failed,(gfid = %s) ", prev->name,
+ local->loc.path, gfid);
+ goto err;
+ }
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_INFO,
- "opendir on %s for %s failed (%s)",
- prev->this->name, local->loc.path,
- strerror (op_errno));
- goto err;
- }
-
- STACK_WIND (frame, dht_rename_readdir_cbk,
- prev->this, prev->this->fops->readdir,
- local->fd, 4096, 0, NULL);
+ fd_bind(fd);
+ STACK_WIND_COOKIE(frame, dht_rename_readdir_cbk, prev, prev,
+ prev->fops->readdir, local->fd, 4096, 0, NULL);
- return 0;
+ return 0;
err:
- this_call_cnt = dht_frame_return (frame);
+ this_call_cnt = dht_frame_return(frame);
- if (is_last_call (this_call_cnt)) {
- dht_rename_dir_do (frame, this);
- }
+ if (is_last_call(this_call_cnt)) {
+ dht_rename_dir_do(frame, this);
+ }
- return 0;
+ return 0;
}
-
int
-dht_rename_dir (call_frame_t *frame, xlator_t *this)
+dht_rename_dir_lock2_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- dht_conf_t *conf = NULL;
- dht_local_t *local = NULL;
- int i = 0;
- int op_errno = -1;
+ dht_local_t *local = NULL;
+ char src_gfid[GF_UUID_BUF_SIZE] = {0};
+ char dst_gfid[GF_UUID_BUF_SIZE] = {0};
+ dht_conf_t *conf = NULL;
+ int i = 0;
+
+ local = frame->local;
+ conf = this->private;
+
+ if (op_ret < 0) {
+ uuid_utoa_r(local->loc.inode->gfid, src_gfid);
+
+ if (local->loc2.inode)
+ uuid_utoa_r(local->loc2.inode->gfid, dst_gfid);
+
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_INODE_LK_ERROR,
+ "acquiring entrylk after inodelk failed"
+ "rename (%s:%s:%s %s:%s:%s)",
+ local->loc.path, src_gfid, local->src_cached->name,
+ local->loc2.path, dst_gfid,
+ local->dst_cached ? local->dst_cached->name : NULL);
+
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ goto err;
+ }
+
+ local->fd = fd_create(local->loc.inode, frame->root->pid);
+ if (!local->fd) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->op_ret = 0;
+
+ if (!local->dst_cached) {
+ dht_rename_dir_do(frame, this);
+ return 0;
+ }
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ STACK_WIND_COOKIE(frame, dht_rename_opendir_cbk, conf->subvolumes[i],
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->opendir, &local->loc2,
+ local->fd, NULL);
+ }
- conf = frame->this->private;
- local = frame->local;
+ return 0;
- local->call_cnt = conf->subvolume_cnt;
+err:
+ /* No harm in calling an extra unlock */
+ dht_rename_dir_unlock(frame, this);
+ return 0;
+}
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (!conf->subvolume_status[i]) {
- gf_log (this->name, GF_LOG_INFO,
- "one of the subvolumes down (%s)",
- conf->subvolumes[i]->name);
- op_errno = ENOTCONN;
- goto err;
- }
- }
+int
+dht_rename_dir_lock1_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ char src_gfid[GF_UUID_BUF_SIZE] = {0};
+ char dst_gfid[GF_UUID_BUF_SIZE] = {0};
+ int ret = 0;
+ loc_t *loc = NULL;
+ xlator_t *subvol = NULL;
+
+ local = frame->local;
+
+ if (op_ret < 0) {
+ uuid_utoa_r(local->loc.inode->gfid, src_gfid);
+
+ if (local->loc2.inode)
+ uuid_utoa_r(local->loc2.inode->gfid, dst_gfid);
+
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_INODE_LK_ERROR,
+ "acquiring entrylk after inodelk failed"
+ "rename (%s:%s:%s %s:%s:%s)",
+ local->loc.path, src_gfid, local->src_cached->name,
+ local->loc2.path, dst_gfid,
+ local->dst_cached ? local->dst_cached->name : NULL);
+
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ goto err;
+ }
+
+ if (local->current == &local->lock[0]) {
+ loc = &local->loc2;
+ subvol = local->dst_hashed;
+ local->current = &local->lock[1];
+ } else {
+ loc = &local->loc;
+ subvol = local->src_hashed;
+ local->current = &local->lock[0];
+ }
+ ret = dht_protect_namespace(frame, loc, subvol, &local->current->ns,
+ dht_rename_dir_lock2_cbk);
+ if (ret < 0) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ return 0;
+err:
+ /* No harm in calling an extra unlock */
+ dht_rename_dir_unlock(frame, this);
+ return 0;
+}
- local->fd = fd_create (local->loc.inode, frame->root->pid);
- if (!local->fd) {
- op_errno = ENOMEM;
- goto err;
+/*
+ * If the hashed subvolumes of both source and dst are the different,
+ * lock in dictionary order of hashed subvol->name. This is important
+ * in case the parent directory is the same for both src and dst to
+ * prevent inodelk deadlocks when racing with a fix-layout op on the parent.
+ *
+ * If the hashed subvols are the same, use the gfid/name to determine
+ * the order of taking locks to prevent entrylk deadlocks when the parent
+ * dirs are the same.
+ *
+ */
+static int
+dht_order_rename_lock(call_frame_t *frame, loc_t **loc, xlator_t **subvol)
+{
+ int ret = 0;
+ int op_ret = 0;
+ dht_local_t *local = NULL;
+ char *src = NULL;
+ char *dst = NULL;
+
+ local = frame->local;
+
+ if (local->src_hashed->name == local->dst_hashed->name) {
+ ret = 0;
+ } else {
+ ret = strcmp(local->src_hashed->name, local->dst_hashed->name);
+ }
+
+ if (ret == 0) {
+ /* hashed subvols are the same for src and dst */
+ /* Entrylks need to be ordered*/
+
+ src = alloca(GF_UUID_BNAME_BUF_SIZE + strlen(local->loc.name) + 1);
+ if (!src) {
+ gf_msg(frame->this->name, GF_LOG_ERROR, ENOMEM, 0,
+ "Insufficient memory for src");
+ op_ret = -1;
+ goto out;
}
- local->op_ret = 0;
+ if (!gf_uuid_is_null(local->loc.pargfid))
+ uuid_utoa_r(local->loc.pargfid, src);
+ else if (local->loc.parent)
+ uuid_utoa_r(local->loc.parent->gfid, src);
+ else
+ src[0] = '\0';
- if (!local->dst_cached) {
- dht_rename_dir_do (frame, this);
- return 0;
- }
+ strcat(src, local->loc.name);
- for (i = 0; i < conf->subvolume_cnt; i++) {
- STACK_WIND (frame, dht_rename_opendir_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->opendir,
- &local->loc2, local->fd, NULL);
+ dst = alloca(GF_UUID_BNAME_BUF_SIZE + strlen(local->loc2.name) + 1);
+ if (!dst) {
+ gf_msg(frame->this->name, GF_LOG_ERROR, ENOMEM, 0,
+ "Insufficient memory for dst");
+ op_ret = -1;
+ goto out;
}
- return 0;
+ if (!gf_uuid_is_null(local->loc2.pargfid))
+ uuid_utoa_r(local->loc2.pargfid, dst);
+ else if (local->loc2.parent)
+ uuid_utoa_r(local->loc2.parent->gfid, dst);
+ else
+ dst[0] = '\0';
+
+ strcat(dst, local->loc2.name);
+ ret = strcmp(src, dst);
+ }
+
+ if (ret <= 0) {
+ /*inodelk in dictionary order of hashed subvol names*/
+ /*entrylk in dictionary order of gfid/basename */
+ local->current = &local->lock[0];
+ *loc = &local->loc;
+ *subvol = local->src_hashed;
+
+ } else {
+ local->current = &local->lock[1];
+ *loc = &local->loc2;
+ *subvol = local->dst_hashed;
+ }
+
+ op_ret = 0;
+
+out:
+ return op_ret;
+}
+
+int
+dht_rename_dir(call_frame_t *frame, xlator_t *this)
+{
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ loc_t *loc = NULL;
+ xlator_t *subvol = NULL;
+ int i = 0;
+ int ret = 0;
+ int op_errno = -1;
+
+ conf = frame->this->private;
+ local = frame->local;
+
+ local->ret_cache = GF_CALLOC(conf->subvolume_cnt + 1, sizeof(int),
+ gf_dht_ret_cache_t);
+
+ if (local->ret_cache == NULL) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->call_cnt = conf->subvolume_cnt;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (!conf->subvolume_status[i]) {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_RENAME_FAILED,
+ "Rename dir failed: subvolume down (%s)",
+ conf->subvolumes[i]->name);
+ op_errno = ENOTCONN;
+ goto err;
+ }
+ }
+
+ /* Locks on src and dst needs to ordered which otherwise might cause
+ * deadlocks when rename (src, dst) and rename (dst, src) is done from
+ * two different clients
+ */
+ ret = dht_order_rename_lock(frame, &loc, &subvol);
+ if (ret) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ /* Rename must take locks on src to avoid lookup selfheal from
+ * recreating src on those subvols where the rename was successful.
+ * The locks can't be issued parallel as two different clients might
+ * attempt same rename command and be in dead lock.
+ */
+ ret = dht_protect_namespace(frame, loc, subvol, &local->current->ns,
+ dht_rename_dir_lock1_cbk);
+ if (ret < 0) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL, NULL, NULL,
- NULL, NULL);
- return 0;
+ DHT_STACK_UNWIND(rename, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
}
+static int
+dht_rename_track_for_changelog(xlator_t *this, dict_t *xattr, loc_t *oldloc,
+ loc_t *newloc)
+{
+ int ret = -1;
+ dht_changelog_rename_info_t *info = NULL;
+ char *name = NULL;
+ int len1 = 0;
+ int len2 = 0;
+ int size = 0;
+
+ if (!xattr || !oldloc || !newloc || !this)
+ return ret;
+
+ len1 = strlen(oldloc->name) + 1;
+ len2 = strlen(newloc->name) + 1;
+ size = sizeof(dht_changelog_rename_info_t) + len1 + len2;
+
+ info = GF_CALLOC(size, sizeof(char), gf_common_mt_char);
+ if (!info) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "Failed to calloc memory");
+ return ret;
+ }
+
+ gf_uuid_copy(info->old_pargfid, oldloc->pargfid);
+ gf_uuid_copy(info->new_pargfid, newloc->pargfid);
+
+ info->oldname_len = len1;
+ info->newname_len = len2;
+ strncpy(info->buffer, oldloc->name, len1);
+ name = info->buffer + len1;
+ strncpy(name, newloc->name, len2);
+
+ ret = dict_set_bin(xattr, DHT_CHANGELOG_RENAME_OP_KEY, info, size);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value: key = %s,"
+ " path = %s",
+ DHT_CHANGELOG_RENAME_OP_KEY, oldloc->name);
+ GF_FREE(info);
+ }
+
+ return ret;
+}
+
+#define DHT_MARKER_DONT_ACCOUNT(xattr) \
+ do { \
+ int tmp = -1; \
+ if (!xattr) { \
+ xattr = dict_new(); \
+ if (!xattr) \
+ break; \
+ } \
+ tmp = dict_set_str(xattr, GLUSTERFS_MARKER_DONT_ACCOUNT_KEY, "yes"); \
+ if (tmp) { \
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, \
+ "Failed to set dictionary value: key = %s," \
+ " path = %s", \
+ GLUSTERFS_MARKER_DONT_ACCOUNT_KEY, local->loc.path); \
+ } \
+ } while (0)
+
+#define DHT_CHANGELOG_TRACK_AS_RENAME(xattr, oldloc, newloc) \
+ do { \
+ int tmp = -1; \
+ if (!xattr) { \
+ xattr = dict_new(); \
+ if (!xattr) { \
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, \
+ "Failed to create dictionary to " \
+ "track rename"); \
+ break; \
+ } \
+ } \
+ \
+ tmp = dht_rename_track_for_changelog(this, xattr, oldloc, newloc); \
+ \
+ if (tmp) { \
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, \
+ "Failed to set dictionary value: key = %s," \
+ " path = %s", \
+ DHT_CHANGELOG_RENAME_OP_KEY, (oldloc)->path); \
+ } \
+ } while (0)
int
-dht_rename_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+dht_rename_unlock(call_frame_t *frame, xlator_t *this)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- int this_call_cnt = 0;
+ dht_local_t *local = NULL;
+ int op_ret = -1;
+ char src_gfid[GF_UUID_BUF_SIZE] = {0};
+ char dst_gfid[GF_UUID_BUF_SIZE] = {0};
+ dht_ilock_wrap_t inodelk_wrapper = {
+ 0,
+ };
+
+ local = frame->local;
+ inodelk_wrapper.locks = local->rename_inodelk_backward_compatible;
+ inodelk_wrapper.lk_count = local->rename_inodelk_bc_count;
+
+ op_ret = dht_unlock_inodelk_wrapper(frame, &inodelk_wrapper);
+ if (op_ret < 0) {
+ uuid_utoa_r(local->loc.inode->gfid, src_gfid);
+
+ if (local->loc2.inode)
+ uuid_utoa_r(local->loc2.inode->gfid, dst_gfid);
+
+ if (IA_ISREG(local->stbuf.ia_type))
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_UNLOCKING_FAILED,
+ "winding unlock inodelk failed "
+ "rename (%s:%s:%s %s:%s:%s), "
+ "stale locks left on bricks",
+ local->loc.path, src_gfid, local->src_cached->name,
+ local->loc2.path, dst_gfid,
+ local->dst_cached ? local->dst_cached->name : NULL);
+ else
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_UNLOCKING_FAILED,
+ "winding unlock inodelk failed "
+ "rename (%s:%s %s:%s), "
+ "stale locks left on bricks",
+ local->loc.path, src_gfid, local->loc2.path, dst_gfid);
+ }
+
+ dht_unlock_namespace(frame, &local->lock[0]);
+ dht_unlock_namespace(frame, &local->lock[1]);
+
+ dht_rename_unlock_cbk(frame, NULL, this, local->op_ret, local->op_errno,
+ NULL);
+ return 0;
+}
- local = frame->local;
- prev = cookie;
+int
+dht_rename_done(call_frame_t *frame, xlator_t *this)
+{
+ dht_local_t *local = NULL;
- if (!local) {
- gf_log (this->name, GF_LOG_ERROR,
- "!local, should not happen");
- goto out;
- }
+ local = frame->local;
- this_call_cnt = dht_frame_return (frame);
+ if (local->linked == _gf_true) {
+ local->linked = _gf_false;
+ dht_linkfile_attr_heal(frame, this);
+ }
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: unlink on %s failed (%s)",
- local->loc.path, prev->this->name, strerror (op_errno));
- }
+ dht_rename_unlock(frame, this);
+ return 0;
+}
- WIPE (&local->preoldparent);
- WIPE (&local->postoldparent);
- WIPE (&local->preparent);
- WIPE (&local->postparent);
-
- if (is_last_call (this_call_cnt)) {
- DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
- DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
- &local->stbuf, &local->preoldparent,
- &local->postoldparent, &local->preparent,
- &local->postparent, NULL);
- }
+int
+dht_rename_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ int this_call_cnt = 0;
+
+ local = frame->local;
+ prev = cookie;
+
+ FRAME_SU_UNDO(frame, dht_local_t);
+ if (!local) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INVALID_VALUE,
+ "!local, should not happen");
+ goto out;
+ }
+
+ this_call_cnt = dht_frame_return(frame);
+
+ if (op_ret == -1) {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_UNLINK_FAILED,
+ "%s: Rename: unlink on %s failed ", local->loc.path, prev->name);
+ }
+
+ WIPE(&local->preoldparent);
+ WIPE(&local->postoldparent);
+ WIPE(&local->preparent);
+ WIPE(&local->postparent);
+
+ if (is_last_call(this_call_cnt)) {
+ dht_rename_done(frame, this);
+ }
out:
- return 0;
+ return 0;
}
-
int
-dht_rename_cleanup (call_frame_t *frame)
+dht_rename_cleanup(call_frame_t *frame)
{
- dht_local_t *local = NULL;
- xlator_t *this = NULL;
- xlator_t *src_hashed = NULL;
- xlator_t *src_cached = NULL;
- xlator_t *dst_hashed = NULL;
- xlator_t *dst_cached = NULL;
- int call_cnt = 0;
+ dht_local_t *local = NULL;
+ xlator_t *this = NULL;
+ xlator_t *src_hashed = NULL;
+ xlator_t *src_cached = NULL;
+ xlator_t *dst_hashed = NULL;
+ xlator_t *dst_cached = NULL;
+ int call_cnt = 0;
+ dict_t *xattr = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+ local = frame->local;
+ this = frame->this;
- local = frame->local;
- this = frame->this;
+ src_hashed = local->src_hashed;
+ src_cached = local->src_cached;
+ dst_hashed = local->dst_hashed;
+ dst_cached = local->dst_cached;
- src_hashed = local->src_hashed;
- src_cached = local->src_cached;
- dst_hashed = local->dst_hashed;
- dst_cached = local->dst_cached;
+ if (src_cached == dst_cached)
+ goto nolinks;
- if (src_cached == dst_cached)
- goto nolinks;
+ if (local->linked && (dst_hashed != src_hashed) &&
+ (dst_hashed != src_cached)) {
+ call_cnt++;
+ }
- if (dst_hashed != src_hashed && dst_hashed != src_cached)
- call_cnt++;
+ if (local->added_link && (src_cached != dst_hashed)) {
+ call_cnt++;
+ }
- if (src_cached != dst_hashed)
- call_cnt++;
+ local->call_cnt = call_cnt;
- local->call_cnt = call_cnt;
+ if (!call_cnt)
+ goto nolinks;
- if (!call_cnt)
- goto nolinks;
+ DHT_MARK_FOP_INTERNAL(xattr);
- if (dst_hashed != src_hashed && dst_hashed != src_cached) {
- gf_log (this->name, GF_LOG_TRACE,
- "unlinking linkfile %s @ %s => %s",
- local->loc.path, dst_hashed->name, src_cached->name);
- STACK_WIND (frame, dht_rename_unlink_cbk,
- dst_hashed, dst_hashed->fops->unlink,
- &local->loc, 0, NULL);
- }
+ gf_uuid_unparse(local->loc.inode->gfid, gfid);
- if (src_cached != dst_hashed) {
- gf_log (this->name, GF_LOG_TRACE,
- "unlinking link %s => %s (%s)", local->loc.path,
- local->loc2.path, src_cached->name);
- STACK_WIND (frame, dht_rename_unlink_cbk,
- src_cached, src_cached->fops->unlink,
- &local->loc2, 0, NULL);
- }
+ if (local->linked && (dst_hashed != src_hashed) &&
+ (dst_hashed != src_cached)) {
+ dict_t *xattr_new = NULL;
- return 0;
+ gf_msg_trace(this->name, 0,
+ "unlinking linkfile %s @ %s => %s, (gfid = %s)",
+ local->loc.path, dst_hashed->name, src_cached->name, gfid);
-nolinks:
- WIPE (&local->preoldparent);
- WIPE (&local->postoldparent);
- WIPE (&local->preparent);
- WIPE (&local->postparent);
+ xattr_new = dict_copy_with_ref(xattr, NULL);
- DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
- DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
- &local->stbuf, &local->preoldparent,
- &local->postoldparent, &local->preparent,
- &local->postparent, NULL);
+ DHT_MARKER_DONT_ACCOUNT(xattr_new);
- return 0;
-}
+ FRAME_SU_DO(frame, dht_local_t);
+ STACK_WIND_COOKIE(frame, dht_rename_unlink_cbk, dst_hashed, dst_hashed,
+ dst_hashed->fops->unlink, &local->loc, 0, xattr_new);
+ dict_unref(xattr_new);
+ xattr_new = NULL;
+ }
-int
-dht_rename_links_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
-{
- call_frame_t *prev = NULL;
- dht_local_t *local = NULL;
-
- prev = cookie;
- local = frame->local;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "link/file %s on %s failed (%s)",
- local->loc.path, prev->this->name, strerror (op_errno));
+ if (local->added_link && (src_cached != dst_hashed)) {
+ dict_t *xattr_new = NULL;
+
+ gf_msg_trace(this->name, 0, "unlinking link %s => %s (%s), (gfid = %s)",
+ local->loc.path, local->loc2.path, src_cached->name, gfid);
+
+ xattr_new = dict_copy_with_ref(xattr, NULL);
+
+ if (gf_uuid_compare(local->loc.pargfid, local->loc2.pargfid) == 0) {
+ DHT_MARKER_DONT_ACCOUNT(xattr_new);
}
+ /* *
+ * The link to file is created using root permission.
+ * Hence deletion should happen using root. Otherwise
+ * it will fail.
+ */
+ FRAME_SU_DO(frame, dht_local_t);
+ STACK_WIND_COOKIE(frame, dht_rename_unlink_cbk, src_cached, src_cached,
+ src_cached->fops->unlink, &local->loc2, 0, xattr_new);
- DHT_STACK_DESTROY (frame);
+ dict_unref(xattr_new);
+ xattr_new = NULL;
+ }
- return 0;
-}
+ if (xattr)
+ dict_unref(xattr);
+
+ return 0;
+
+nolinks:
+ WIPE(&local->preoldparent);
+ WIPE(&local->postoldparent);
+ WIPE(&local->preparent);
+ WIPE(&local->postparent);
+ dht_rename_unlock(frame, this);
+ return 0;
+}
int
-dht_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
- struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent,
- dict_t *xdata)
-{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- xlator_t *src_hashed = NULL;
- xlator_t *src_cached = NULL;
- xlator_t *dst_hashed = NULL;
- xlator_t *dst_cached = NULL;
- xlator_t *rename_subvol = NULL;
- call_frame_t *link_frame = NULL;
- dht_local_t *link_local = NULL;
-
- local = frame->local;
- prev = cookie;
-
- src_hashed = local->src_hashed;
- src_cached = local->src_cached;
- dst_hashed = local->dst_hashed;
- dst_cached = local->dst_cached;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: rename on %s failed (%s)", local->loc.path,
- prev->this->name, strerror (op_errno));
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- goto cleanup;
- }
+dht_rename_unlink(call_frame_t *frame, xlator_t *this)
+{
+ dht_local_t *local = NULL;
+ xlator_t *src_hashed = NULL;
+ xlator_t *src_cached = NULL;
+ xlator_t *dst_hashed = NULL;
+ xlator_t *dst_cached = NULL;
+ xlator_t *rename_subvol = NULL;
+ dict_t *xattr = NULL;
- if ((src_cached == dst_cached) && (dst_hashed != dst_cached)) {
- link_frame = copy_frame (frame);
- if (!link_frame) {
- goto err;
- }
+ local = frame->local;
- /* fop value sent as maxvalue because it is not used
- anywhere in this case */
- link_local = dht_local_init (link_frame, &local->loc2, NULL,
- GF_FOP_MAXVALUE);
- if (!link_local) {
- goto err;
- }
+ src_hashed = local->src_hashed;
+ src_cached = local->src_cached;
+ dst_hashed = local->dst_hashed;
+ dst_cached = local->dst_cached;
- if (link_local->loc.inode)
- inode_unref (link_local->loc.inode);
- link_local->loc.inode = inode_ref (local->loc.inode);
- uuid_copy (link_local->gfid, local->loc.inode->gfid);
+ local->call_cnt = 0;
- dht_linkfile_create (link_frame, dht_rename_links_create_cbk,
- src_cached, dst_hashed, &link_local->loc);
- }
+ /* NOTE: rename_subvol is the same subvolume from which dht_rename_cbk
+ * is called. since rename has already happened on rename_subvol,
+ * unlink shouldn't be sent for oldpath (either linkfile or cached-file)
+ * on rename_subvol. */
+ if (src_cached == dst_cached)
+ rename_subvol = src_cached;
+ else
+ rename_subvol = dst_hashed;
-err:
- dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
- dht_iatt_merge (this, &local->preoldparent, preoldparent, prev->this);
- dht_iatt_merge (this, &local->postoldparent, postoldparent, prev->this);
- dht_iatt_merge (this, &local->preparent, prenewparent, prev->this);
- dht_iatt_merge (this, &local->postparent, postnewparent, prev->this);
-
- /* NOTE: rename_subvol is the same subvolume from which dht_rename_cbk
- * is called. since rename has already happened on rename_subvol,
- * unlink should not be sent for oldpath (either linkfile or cached-file)
- * on rename_subvol. */
- if (src_cached == dst_cached)
- rename_subvol = src_cached;
- else
- rename_subvol = dst_hashed;
+ /* TODO: delete files in background */
- /* TODO: delete files in background */
+ if (src_cached != dst_hashed && src_cached != dst_cached)
+ local->call_cnt++;
- if (src_cached != dst_hashed && src_cached != dst_cached)
- local->call_cnt++;
+ if (src_hashed != rename_subvol && src_hashed != src_cached)
+ local->call_cnt++;
- if (src_hashed != rename_subvol && src_hashed != src_cached)
- local->call_cnt++;
+ if (dst_cached && dst_cached != dst_hashed && dst_cached != src_cached)
+ local->call_cnt++;
- if (dst_cached && dst_cached != dst_hashed && dst_cached != src_cached)
- local->call_cnt++;
+ if (local->call_cnt == 0)
+ goto unwind;
- if (local->call_cnt == 0)
- goto unwind;
+ DHT_MARK_FOP_INTERNAL(xattr);
- if (src_cached != dst_hashed && src_cached != dst_cached) {
- gf_log (this->name, GF_LOG_TRACE,
- "deleting old src datafile %s @ %s",
- local->loc.path, src_cached->name);
+ if (src_cached != dst_hashed && src_cached != dst_cached) {
+ dict_t *xattr_new = NULL;
- STACK_WIND (frame, dht_rename_unlink_cbk,
- src_cached, src_cached->fops->unlink,
- &local->loc, 0, NULL);
- }
+ xattr_new = dict_copy_with_ref(xattr, NULL);
- if (src_hashed != rename_subvol && src_hashed != src_cached) {
- gf_log (this->name, GF_LOG_TRACE,
- "deleting old src linkfile %s @ %s",
- local->loc.path, src_hashed->name);
+ gf_msg_trace(this->name, 0, "deleting old src datafile %s @ %s",
+ local->loc.path, src_cached->name);
- STACK_WIND (frame, dht_rename_unlink_cbk,
- src_hashed, src_hashed->fops->unlink,
- &local->loc, 0, NULL);
+ if (gf_uuid_compare(local->loc.pargfid, local->loc2.pargfid) == 0) {
+ DHT_MARKER_DONT_ACCOUNT(xattr_new);
}
- if (dst_cached
- && (dst_cached != dst_hashed)
- && (dst_cached != src_cached)) {
- gf_log (this->name, GF_LOG_TRACE,
- "deleting old dst datafile %s @ %s",
- local->loc2.path, dst_cached->name);
+ DHT_CHANGELOG_TRACK_AS_RENAME(xattr_new, &local->loc, &local->loc2);
+ STACK_WIND_COOKIE(frame, dht_rename_unlink_cbk, src_cached, src_cached,
+ src_cached->fops->unlink, &local->loc, 0, xattr_new);
- STACK_WIND (frame, dht_rename_unlink_cbk,
- dst_cached, dst_cached->fops->unlink,
- &local->loc2, 0, NULL);
- }
- return 0;
+ dict_unref(xattr_new);
+ xattr_new = NULL;
+ }
-unwind:
- WIPE (&local->preoldparent);
- WIPE (&local->postoldparent);
- WIPE (&local->preparent);
- WIPE (&local->postparent);
+ if (src_hashed != rename_subvol && src_hashed != src_cached) {
+ dict_t *xattr_new = NULL;
- DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
- DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
- &local->stbuf, &local->preoldparent,
- &local->postoldparent, &local->preparent,
- &local->postparent, NULL);
+ xattr_new = dict_copy_with_ref(xattr, NULL);
- return 0;
+ gf_msg_trace(this->name, 0, "deleting old src linkfile %s @ %s",
+ local->loc.path, src_hashed->name);
-cleanup:
- dht_rename_cleanup (frame);
+ DHT_MARKER_DONT_ACCOUNT(xattr_new);
- return 0;
+ STACK_WIND_COOKIE(frame, dht_rename_unlink_cbk, src_hashed, src_hashed,
+ src_hashed->fops->unlink, &local->loc, 0, xattr_new);
+
+ dict_unref(xattr_new);
+ xattr_new = NULL;
+ }
+
+ if (dst_cached && (dst_cached != dst_hashed) &&
+ (dst_cached != src_cached)) {
+ gf_msg_trace(this->name, 0, "deleting old dst datafile %s @ %s",
+ local->loc2.path, dst_cached->name);
+
+ STACK_WIND_COOKIE(frame, dht_rename_unlink_cbk, dst_cached, dst_cached,
+ dst_cached->fops->unlink, &local->loc2, 0, xattr);
+ }
+ if (xattr)
+ dict_unref(xattr);
+ return 0;
+
+unwind:
+ WIPE(&local->preoldparent);
+ WIPE(&local->postoldparent);
+ WIPE(&local->preparent);
+ WIPE(&local->postparent);
+
+ dht_rename_done(frame, this);
+
+ return 0;
}
+int
+dht_rename_links_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ xlator_t *prev = NULL;
+ dht_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
+
+ prev = cookie;
+ local = frame->local;
+ main_frame = local->main_frame;
+
+ /* TODO: Handle this case in lookup-optimize */
+ if (op_ret == -1) {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_CREATE_LINK_FAILED,
+ "link/file %s on %s failed", local->loc.path, prev->name);
+ }
+
+ if (local->linked == _gf_true) {
+ local->linked = _gf_false;
+ dht_linkfile_attr_heal(frame, this);
+ }
+
+ dht_rename_unlink(main_frame, this);
+ DHT_STACK_DESTROY(frame);
+ return 0;
+}
int
-dht_do_rename (call_frame_t *frame)
+dht_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- xlator_t *dst_hashed = NULL;
- xlator_t *src_cached = NULL;
- xlator_t *dst_cached = NULL;
- xlator_t *this = NULL;
- xlator_t *rename_subvol = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ xlator_t *src_cached = NULL;
+ xlator_t *dst_hashed = NULL;
+ xlator_t *dst_cached = NULL;
+ call_frame_t *link_frame = NULL;
+ dht_local_t *link_local = NULL;
+
+ local = frame->local;
+ prev = cookie;
+
+ src_cached = local->src_cached;
+ dst_hashed = local->dst_hashed;
+ dst_cached = local->dst_cached;
+
+ if (local->linked == _gf_true)
+ FRAME_SU_UNDO(frame, dht_local_t);
+
+ /* It is a critical failure iff we fail to rename the cached file
+ * if the rename of the linkto failed, it is not a critical failure,
+ * and we do not want to lose the created hard link for the new
+ * name as that could have been read by other clients.
+ *
+ * NOTE: If another client is attempting the same oldname -> newname
+ * rename, and finds both file names as existing, and are hard links
+ * to each other, then FUSE would send in an unlink for oldname. In
+ * this time duration if we treat the linkto as a critical error and
+ * unlink the newname we created, we would have effectively lost the
+ * file to rename operations.
+ *
+ * Repercussions of treating this as a non-critical error is that
+ * we could leave behind a stale linkto file and/or not create the new
+ * linkto file, the second case would be rectified by a subsequent
+ * lookup, the first case by a rebalance, like for all stale linkto
+ * files */
+
+ if (op_ret == -1) {
+ /* Critical failure: unable to rename the cached file */
+ if (prev == src_cached) {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_RENAME_FAILED,
+ "%s: Rename on %s failed, (gfid = %s) ", local->loc.path,
+ prev->name,
+ local->loc.inode ? uuid_utoa(local->loc.inode->gfid) : "");
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto cleanup;
+ } else {
+ /* Non-critical failure, unable to rename the linkto
+ * file
+ */
+ gf_msg(this->name, GF_LOG_INFO, op_errno, DHT_MSG_RENAME_FAILED,
+ "%s: Rename (linkto file) on %s failed, "
+ "(gfid = %s) ",
+ local->loc.path, prev->name,
+ local->loc.inode ? uuid_utoa(local->loc.inode->gfid) : "");
+ }
+ }
+ if (xdata) {
+ if (!local->xattr)
+ local->xattr = dict_ref(xdata);
+ else
+ local->xattr = dict_copy_with_ref(xdata, local->xattr);
+ }
+
+ /* Merge attrs only from src_cached. In case there of src_cached !=
+ * dst_hashed, this ignores linkfile attrs. */
+ if (prev == src_cached) {
+ dht_iatt_merge(this, &local->stbuf, stbuf);
+ dht_iatt_merge(this, &local->preoldparent, preoldparent);
+ dht_iatt_merge(this, &local->postoldparent, postoldparent);
+ dht_iatt_merge(this, &local->preparent, prenewparent);
+ dht_iatt_merge(this, &local->postparent, postnewparent);
+ }
+
+ /* Create the linkto file for the dst file */
+ if ((src_cached == dst_cached) && (dst_hashed != dst_cached)) {
+ link_frame = copy_frame(frame);
+ if (!link_frame) {
+ goto unlink;
+ }
+ /* fop value sent as maxvalue because it is not used
+ * anywhere in this case */
+ link_local = dht_local_init(link_frame, &local->loc2, NULL,
+ GF_FOP_MAXVALUE);
+ if (!link_local) {
+ goto unlink;
+ }
- local = frame->local;
- this = frame->this;
+ if (link_local->loc.inode)
+ inode_unref(link_local->loc.inode);
+ link_local->loc.inode = inode_ref(local->loc.inode);
+ link_local->main_frame = frame;
+ link_local->stbuf = local->stbuf;
+ gf_uuid_copy(link_local->gfid, local->loc.inode->gfid);
- dst_hashed = local->dst_hashed;
- dst_cached = local->dst_cached;
- src_cached = local->src_cached;
+ dht_linkfile_create(link_frame, dht_rename_links_create_cbk, this,
+ src_cached, dst_hashed, &link_local->loc);
+ return 0;
+ }
- if (src_cached == dst_cached)
- rename_subvol = src_cached;
- else
- rename_subvol = dst_hashed;
+unlink:
- gf_log (this->name, GF_LOG_TRACE,
- "renaming %s => %s (%s)",
- local->loc.path, local->loc2.path, rename_subvol->name);
+ if (link_frame) {
+ DHT_STACK_DESTROY(link_frame);
+ }
+ dht_rename_unlink(frame, this);
+ return 0;
- STACK_WIND (frame, dht_rename_cbk,
- rename_subvol, rename_subvol->fops->rename,
- &local->loc, &local->loc2, NULL);
+cleanup:
+ dht_rename_cleanup(frame);
- return 0;
+ return 0;
}
+int
+dht_do_rename(call_frame_t *frame)
+{
+ dht_local_t *local = NULL;
+ xlator_t *dst_hashed = NULL;
+ xlator_t *src_cached = NULL;
+ xlator_t *dst_cached = NULL;
+ xlator_t *this = NULL;
+ xlator_t *rename_subvol = NULL;
+
+ local = frame->local;
+ this = frame->this;
+
+ dst_hashed = local->dst_hashed;
+ dst_cached = local->dst_cached;
+ src_cached = local->src_cached;
+
+ if (src_cached == dst_cached)
+ rename_subvol = src_cached;
+ else
+ rename_subvol = dst_hashed;
+
+ if ((src_cached != dst_hashed) && (rename_subvol == dst_hashed)) {
+ DHT_MARKER_DONT_ACCOUNT(local->xattr_req);
+ }
+
+ if (rename_subvol == src_cached) {
+ DHT_CHANGELOG_TRACK_AS_RENAME(local->xattr_req, &local->loc,
+ &local->loc2);
+ }
+
+ gf_msg_trace(this->name, 0, "renaming %s => %s (%s)", local->loc.path,
+ local->loc2.path, rename_subvol->name);
+
+ if (local->linked == _gf_true)
+ FRAME_SU_DO(frame, dht_local_t);
+ STACK_WIND_COOKIE(frame, dht_rename_cbk, rename_subvol, rename_subvol,
+ rename_subvol->fops->rename, &local->loc, &local->loc2,
+ local->xattr_req);
+ return 0;
+}
int
-dht_rename_links_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
-{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- int this_call_cnt = 0;
-
-
- local = frame->local;
- prev = cookie;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "link/file on %s failed (%s)",
- prev->this->name, strerror (op_errno));
- local->op_ret = -1;
- if (op_errno != ENOENT)
- local->op_errno = op_errno;
- }
+dht_rename_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- if (local->op_ret == -1)
- goto cleanup;
+ local = frame->local;
+ prev = cookie;
- dht_do_rename (frame);
- }
+ if (op_ret == -1) {
+ gf_msg_debug(this->name, 0, "link/file on %s failed (%s)", prev->name,
+ strerror(op_errno));
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ local->added_link = _gf_false;
+ } else
+ dht_iatt_merge(this, &local->stbuf, stbuf);
- return 0;
+ if (local->op_ret == -1)
+ goto cleanup;
+
+ dht_do_rename(frame);
+
+ return 0;
cleanup:
- dht_rename_cleanup (frame);
+ dht_rename_cleanup(frame);
- return 0;
+ return 0;
}
-
int
-dht_rename_unlink_links_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
+dht_rename_linkto_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ xlator_t *src_cached = NULL;
+ dict_t *xattr = NULL;
+ local = frame->local;
+ DHT_MARK_FOP_INTERNAL(xattr);
+ prev = cookie;
+ src_cached = local->src_cached;
- local = frame->local;
- prev = cookie;
+ if (op_ret == -1) {
+ gf_msg_debug(this->name, 0, "link/file on %s failed (%s)", prev->name,
+ strerror(op_errno));
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ }
- if ((op_ret == -1) && (op_errno != ENOENT)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "unlink of %s on %s failed (%s)",
- local->loc2.path, prev->this->name,
- strerror (op_errno));
- local->op_ret = -1;
- local->op_errno = op_errno;
- }
+ /* If linkto creation failed move to failure cleanup code,
+ * instead of continuing with creating the link file */
+ if (local->op_ret != 0) {
+ goto cleanup;
+ }
- if (local->op_ret == -1)
- goto cleanup;
+ gf_msg_trace(this->name, 0, "link %s => %s (%s)", local->loc.path,
+ local->loc2.path, src_cached->name);
+ if (gf_uuid_compare(local->loc.pargfid, local->loc2.pargfid) == 0) {
+ DHT_MARKER_DONT_ACCOUNT(xattr);
+ }
- dht_do_rename (frame);
+ local->added_link = _gf_true;
- return 0;
+ STACK_WIND_COOKIE(frame, dht_rename_link_cbk, src_cached, src_cached,
+ src_cached->fops->link, &local->loc, &local->loc2, xattr);
+
+ if (xattr)
+ dict_unref(xattr);
+
+ return 0;
cleanup:
- dht_rename_cleanup (frame);
+ dht_rename_cleanup(frame);
+
+ if (xattr)
+ dict_unref(xattr);
- return 0;
+ return 0;
}
+int
+dht_rename_unlink_links_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+
+ local = frame->local;
+ prev = cookie;
+
+ if ((op_ret == -1) && (op_errno != ENOENT)) {
+ gf_msg_debug(this->name, 0, "unlink of %s on %s failed (%s)",
+ local->loc2.path, prev->name, strerror(op_errno));
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ }
+
+ if (local->op_ret == -1)
+ goto cleanup;
+
+ dht_do_rename(frame);
+
+ return 0;
+
+cleanup:
+ dht_rename_cleanup(frame);
+
+ return 0;
+}
int
-dht_rename_create_links (call_frame_t *frame)
+dht_rename_create_links(call_frame_t *frame)
{
- dht_local_t *local = NULL;
- xlator_t *this = NULL;
- xlator_t *src_hashed = NULL;
- xlator_t *src_cached = NULL;
- xlator_t *dst_hashed = NULL;
- xlator_t *dst_cached = NULL;
- int call_cnt = 0;
+ dht_local_t *local = NULL;
+ xlator_t *this = NULL;
+ xlator_t *src_hashed = NULL;
+ xlator_t *src_cached = NULL;
+ xlator_t *dst_hashed = NULL;
+ xlator_t *dst_cached = NULL;
+ int call_cnt = 0;
+ dict_t *xattr = NULL;
+ local = frame->local;
+ this = frame->this;
- local = frame->local;
- this = frame->this;
+ src_hashed = local->src_hashed;
+ src_cached = local->src_cached;
+ dst_hashed = local->dst_hashed;
+ dst_cached = local->dst_cached;
- src_hashed = local->src_hashed;
- src_cached = local->src_cached;
- dst_hashed = local->dst_hashed;
- dst_cached = local->dst_cached;
+ DHT_MARK_FOP_INTERNAL(xattr);
+ if (src_cached == dst_cached) {
+ dict_t *xattr_new = NULL;
- if (src_cached == dst_cached) {
- if (dst_hashed == dst_cached)
- goto nolinks;
+ if (dst_hashed == dst_cached)
+ goto nolinks;
- gf_log (this->name, GF_LOG_TRACE,
- "unlinking dst linkfile %s @ %s",
- local->loc2.path, dst_hashed->name);
+ xattr_new = dict_copy_with_ref(xattr, NULL);
- STACK_WIND (frame, dht_rename_unlink_links_cbk,
- dst_hashed, dst_hashed->fops->unlink,
- &local->loc2, 0, NULL);
- return 0;
- }
+ gf_msg_trace(this->name, 0, "unlinking dst linkfile %s @ %s",
+ local->loc2.path, dst_hashed->name);
- if (dst_hashed != src_hashed && dst_hashed != src_cached)
- call_cnt++;
+ DHT_MARKER_DONT_ACCOUNT(xattr_new);
- if (src_cached != dst_hashed)
- call_cnt++;
+ STACK_WIND_COOKIE(frame, dht_rename_unlink_links_cbk, dst_hashed,
+ dst_hashed, dst_hashed->fops->unlink, &local->loc2, 0,
+ xattr_new);
- local->call_cnt = call_cnt;
+ dict_unref(xattr_new);
+ if (xattr)
+ dict_unref(xattr);
- if (dst_hashed != src_hashed && dst_hashed != src_cached) {
- gf_log (this->name, GF_LOG_TRACE,
- "linkfile %s @ %s => %s",
- local->loc.path, dst_hashed->name, src_cached->name);
- memcpy (local->gfid, local->loc.inode->gfid, 16);
- dht_linkfile_create (frame, dht_rename_links_cbk,
- src_cached, dst_hashed, &local->loc);
- }
+ return 0;
+ }
+
+ if (src_cached != dst_hashed) {
+ /* needed to create the link file */
+ call_cnt++;
+ if (dst_hashed != src_hashed)
+ /* needed to create the linkto file */
+ call_cnt++;
+ }
+
+ /* We should not have any failures post the link creation, as this
+ * introduces the newname into the namespace. Clients could have cached
+ * the existence of the newname and may start taking actions based on
+ * the same. Hence create the linkto first, and then attempt the link.
+ *
+ * NOTE: If another client is attempting the same oldname -> newname
+ * rename, and finds both file names as existing, and are hard links
+ * to each other, then FUSE would send in an unlink for oldname. In
+ * this time duration if we treat the linkto as a critical error and
+ * unlink the newname we created, we would have effectively lost the
+ * file to rename operations. */
+ if (dst_hashed != src_hashed && src_cached != dst_hashed) {
+ gf_msg_trace(this->name, 0, "linkfile %s @ %s => %s", local->loc.path,
+ dst_hashed->name, src_cached->name);
+
+ memcpy(local->gfid, local->loc.inode->gfid, 16);
+ dht_linkfile_create(frame, dht_rename_linkto_cbk, this, src_cached,
+ dst_hashed, &local->loc);
+ } else if (src_cached != dst_hashed) {
+ dict_t *xattr_new = NULL;
+
+ xattr_new = dict_copy_with_ref(xattr, NULL);
+
+ gf_msg_trace(this->name, 0, "link %s => %s (%s)", local->loc.path,
+ local->loc2.path, src_cached->name);
+ if (gf_uuid_compare(local->loc.pargfid, local->loc2.pargfid) == 0) {
+ DHT_MARKER_DONT_ACCOUNT(xattr_new);
+ }
- if (src_cached != dst_hashed) {
- gf_log (this->name, GF_LOG_TRACE,
- "link %s => %s (%s)", local->loc.path,
- local->loc2.path, src_cached->name);
- STACK_WIND (frame, dht_rename_links_cbk,
- src_cached, src_cached->fops->link,
- &local->loc, &local->loc2, NULL);
- }
+ local->added_link = _gf_true;
+
+ STACK_WIND_COOKIE(frame, dht_rename_link_cbk, src_cached, src_cached,
+ src_cached->fops->link, &local->loc, &local->loc2,
+ xattr_new);
+
+ dict_unref(xattr_new);
+ }
nolinks:
- if (!call_cnt) {
- /* skip to next step */
- dht_do_rename (frame);
+ if (!call_cnt) {
+ /* skip to next step */
+ dht_do_rename(frame);
+ }
+ if (xattr)
+ dict_unref(xattr);
+
+ return 0;
+}
+
+int
+dht_rename_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ dht_local_t *local = NULL;
+ int call_cnt = 0;
+ dht_conf_t *conf = NULL;
+ char gfid_local[GF_UUID_BUF_SIZE] = {0};
+ char gfid_server[GF_UUID_BUF_SIZE] = {0};
+ int child_index = -1;
+ gf_boolean_t is_src = _gf_false;
+ loc_t *loc = NULL;
+
+ child_index = (long)cookie;
+
+ local = frame->local;
+ conf = this->private;
+
+ is_src = (child_index == 0);
+ if (is_src)
+ loc = &local->loc;
+ else
+ loc = &local->loc2;
+
+ if (op_ret >= 0) {
+ if (is_src)
+ local->src_cached = dht_subvol_get_cached(this, local->loc.inode);
+ else {
+ if (loc->inode)
+ gf_uuid_unparse(loc->inode->gfid, gfid_local);
+
+ gf_msg_debug(this->name, 0,
+ "dst_cached before lookup: %s, "
+ "(path:%s)(gfid:%s),",
+ local->loc2.path,
+ local->dst_cached ? local->dst_cached->name : NULL,
+ local->dst_cached ? gfid_local : NULL);
+
+ local->dst_cached = dht_subvol_get_cached(this,
+ local->loc2_copy.inode);
+
+ gf_uuid_unparse(stbuf->ia_gfid, gfid_local);
+
+ gf_msg_debug(this->name, GF_LOG_WARNING,
+ "dst_cached after lookup: %s, "
+ "(path:%s)(gfid:%s)",
+ local->loc2.path,
+ local->dst_cached ? local->dst_cached->name : NULL,
+ local->dst_cached ? gfid_local : NULL);
+
+ if ((local->loc2.inode == NULL) ||
+ gf_uuid_compare(stbuf->ia_gfid, local->loc2.inode->gfid)) {
+ if (local->loc2.inode != NULL) {
+ inode_unlink(local->loc2.inode, local->loc2.parent,
+ local->loc2.name);
+ inode_unref(local->loc2.inode);
+ }
+
+ local->loc2.inode = inode_link(local->loc2_copy.inode,
+ local->loc2_copy.parent,
+ local->loc2_copy.name, stbuf);
+ gf_uuid_copy(local->loc2.gfid, stbuf->ia_gfid);
+ }
+ }
+ }
+
+ if (op_ret < 0) {
+ if (is_src) {
+ /* The meaning of is_linkfile is overloaded here. For locking
+ * to work properly both rebalance and rename should acquire
+ * lock on datafile. The reason for sending this lookup is to
+ * find out whether we've acquired a lock on data file.
+ * Between the lookup before rename and this rename, the
+ * file could be migrated by a rebalance process and now this
+ * file this might be a linkto file. We verify that by sending
+ * this lookup. However, if this lookup fails we cannot really
+ * say whether we've acquired lock on a datafile or linkto file.
+ * So, we act conservatively and _assume_
+ * that this is a linkfile and fail the rename operation.
+ */
+ local->is_linkfile = _gf_true;
+ local->op_errno = op_errno;
+ } else {
+ if (local->dst_cached)
+ gf_msg_debug(this->name, op_errno,
+ "file %s (gfid:%s) was present "
+ "(hashed-subvol=%s, "
+ "cached-subvol=%s) before rename,"
+ " but lookup failed",
+ local->loc2.path,
+ uuid_utoa(local->loc2.inode->gfid),
+ local->dst_hashed->name, local->dst_cached->name);
+ if (dht_inode_missing(op_errno))
+ local->dst_cached = NULL;
+ }
+ } else if (is_src && xattr &&
+ check_is_linkfile(inode, stbuf, xattr, conf->link_xattr_name)) {
+ local->is_linkfile = _gf_true;
+ /* Found linkto file instead of data file, passdown ENOENT
+ * based on the above comment */
+ local->op_errno = ENOENT;
+ }
+
+ if (!local->is_linkfile && (op_ret >= 0) &&
+ gf_uuid_compare(loc->gfid, stbuf->ia_gfid)) {
+ gf_uuid_unparse(loc->gfid, gfid_local);
+ gf_uuid_unparse(stbuf->ia_gfid, gfid_server);
+
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_GFID_MISMATCH,
+ "path:%s, received a different gfid, local_gfid= %s"
+ " server_gfid: %s",
+ local->loc.path, gfid_local, gfid_server);
+
+ /* Will passdown ENOENT anyway since the file we sent on
+ * rename is replaced with a different file */
+ local->op_errno = ENOENT;
+ /* Since local->is_linkfile is used here to detect failure,
+ * marking this to true */
+ local->is_linkfile = _gf_true;
+ }
+
+ call_cnt = dht_frame_return(frame);
+ if (is_last_call(call_cnt)) {
+ if (local->is_linkfile) {
+ local->op_ret = -1;
+ goto fail;
}
- return 0;
-}
+ dht_rename_create_links(frame);
+ }
+ return 0;
+fail:
+ dht_rename_unlock(frame, this);
+ return 0;
+}
int
-dht_rename (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc, dict_t *xdata)
-{
- xlator_t *src_cached = NULL;
- xlator_t *src_hashed = NULL;
- xlator_t *dst_cached = NULL;
- xlator_t *dst_hashed = NULL;
- int op_errno = -1;
- int ret = -1;
- dht_local_t *local = NULL;
-
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (oldloc, err);
- VALIDATE_OR_GOTO (newloc, err);
-
- src_hashed = dht_subvol_get_hashed (this, oldloc);
- if (!src_hashed) {
- gf_log (this->name, GF_LOG_INFO,
- "no subvolume in layout for path=%s",
- oldloc->path);
- op_errno = EINVAL;
- goto err;
- }
+dht_rename_file_lock1_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ char src_gfid[GF_UUID_BUF_SIZE] = {0};
+ char dst_gfid[GF_UUID_BUF_SIZE] = {0};
+ int ret = 0;
+ loc_t *loc = NULL;
+ xlator_t *subvol = NULL;
+
+ local = frame->local;
+
+ if (op_ret < 0) {
+ uuid_utoa_r(local->loc.inode->gfid, src_gfid);
+
+ if (local->loc2.inode)
+ uuid_utoa_r(local->loc2.inode->gfid, dst_gfid);
+
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_INODE_LK_ERROR,
+ "protecting namespace of %s failed"
+ "rename (%s:%s:%s %s:%s:%s)",
+ local->current == &local->lock[0] ? local->loc.path
+ : local->loc2.path,
+ local->loc.path, src_gfid, local->src_hashed->name,
+ local->loc2.path, dst_gfid,
+ local->dst_hashed ? local->dst_hashed->name : NULL);
+
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ goto err;
+ }
+
+ if (local->current == &local->lock[0]) {
+ loc = &local->loc2;
+ subvol = local->dst_hashed;
+ local->current = &local->lock[1];
+ } else {
+ loc = &local->loc;
+ subvol = local->src_hashed;
+ local->current = &local->lock[0];
+ }
+
+ ret = dht_protect_namespace(frame, loc, subvol, &local->current->ns,
+ dht_rename_lock_cbk);
+ if (ret < 0) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ return 0;
+err:
+ /* No harm in calling an extra unlock */
+ dht_rename_unlock(frame, this);
+ return 0;
+}
- src_cached = dht_subvol_get_cached (this, oldloc->inode);
- if (!src_cached) {
- gf_log (this->name, GF_LOG_INFO,
- "no cached subvolume for path=%s", oldloc->path);
- op_errno = EINVAL;
- goto err;
- }
+int32_t
+dht_rename_file_protect_namespace(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ char src_gfid[GF_UUID_BUF_SIZE] = {0};
+ char dst_gfid[GF_UUID_BUF_SIZE] = {0};
+ int ret = 0;
+ loc_t *loc = NULL;
+ xlator_t *subvol = NULL;
+
+ local = frame->local;
+
+ if (op_ret < 0) {
+ uuid_utoa_r(local->loc.inode->gfid, src_gfid);
+
+ if (local->loc2.inode)
+ uuid_utoa_r(local->loc2.inode->gfid, dst_gfid);
+
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_INODE_LK_ERROR,
+ "acquiring inodelk failed "
+ "rename (%s:%s:%s %s:%s:%s)",
+ local->loc.path, src_gfid, local->src_cached->name,
+ local->loc2.path, dst_gfid,
+ local->dst_cached ? local->dst_cached->name : NULL);
+
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+
+ goto err;
+ }
+
+ /* Locks on src and dst needs to ordered which otherwise might cause
+ * deadlocks when rename (src, dst) and rename (dst, src) is done from
+ * two different clients
+ */
+ ret = dht_order_rename_lock(frame, &loc, &subvol);
+ if (ret) {
+ local->op_errno = ENOMEM;
+ goto err;
+ }
+
+ ret = dht_protect_namespace(frame, loc, subvol, &local->current->ns,
+ dht_rename_file_lock1_cbk);
+ if (ret < 0) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ return 0;
- dst_hashed = dht_subvol_get_hashed (this, newloc);
- if (!dst_hashed) {
- gf_log (this->name, GF_LOG_INFO,
- "no subvolume in layout for path=%s",
- newloc->path);
- op_errno = EINVAL;
- goto err;
- }
+err:
+ /* Its fine to call unlock even when no locks are acquired, as we check
+ * for lock->locked before winding a unlock call.
+ */
+ dht_rename_unlock(frame, this);
- if (newloc->inode)
- dst_cached = dht_subvol_get_cached (this, newloc->inode);
+ return 0;
+}
- local = dht_local_init (frame, oldloc, NULL, GF_FOP_RENAME);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- /* cached_subvol will be set from dht_local_init, reset it to NULL,
- as the logic of handling rename is different */
- local->cached_subvol = NULL;
-
- ret = loc_copy (&local->loc2, newloc);
- if (ret == -1) {
- op_errno = ENOMEM;
- goto err;
+int32_t
+dht_rename_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ char src_gfid[GF_UUID_BUF_SIZE] = {0};
+ char dst_gfid[GF_UUID_BUF_SIZE] = {0};
+ dict_t *xattr_req = NULL;
+ dht_conf_t *conf = NULL;
+ int i = 0;
+ xlator_t *subvol = NULL;
+ dht_lock_t *lock = NULL;
+
+ local = frame->local;
+ conf = this->private;
+
+ if (op_ret < 0) {
+ uuid_utoa_r(local->loc.inode->gfid, src_gfid);
+
+ if (local->loc2.inode)
+ uuid_utoa_r(local->loc2.inode->gfid, dst_gfid);
+
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_INODE_LK_ERROR,
+ "protecting namespace of %s failed. "
+ "rename (%s:%s:%s %s:%s:%s)",
+ local->current == &local->lock[0] ? local->loc.path
+ : local->loc2.path,
+ local->loc.path, src_gfid, local->src_hashed->name,
+ local->loc2.path, dst_gfid,
+ local->dst_hashed ? local->dst_hashed->name : NULL);
+
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+
+ goto done;
+ }
+
+ xattr_req = dict_new();
+ if (xattr_req == NULL) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto done;
+ }
+
+ op_ret = dict_set_uint32(xattr_req, conf->link_xattr_name, 256);
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -op_ret;
+ goto done;
+ }
+
+ /* dst_cached might've changed. This normally happens for two reasons:
+ * 1. rebalance migrated dst
+ * 2. Another parallel rename was done overwriting dst
+ *
+ * Doing a lookup on local->loc2 when dst exists, but is associated
+ * with a different gfid will result in an ESTALE error. So, do a fresh
+ * lookup with a new inode on dst-path and handle change of dst-cached
+ * in the cbk. Also, to identify dst-cached changes we do a lookup on
+ * "this" rather than the subvol.
+ */
+ loc_copy(&local->loc2_copy, &local->loc2);
+ inode_unref(local->loc2_copy.inode);
+ local->loc2_copy.inode = inode_new(local->loc.inode->table);
+
+ /* Why not use local->lock.locks[?].loc for lookup post lock phase
+ * ---------------------------------------------------------------
+ * "layout.parent_layout.locks[?].loc" does not have the name and pargfid
+ * populated.
+ * Reason: If we had populated the name and pargfid, server might
+ * resolve to a successful lookup even if there is a file with same name
+ * with a different gfid(unlink & create) as server does name based
+ * resolution on first priority. And this can result in operating on a
+ * different inode entirely.
+ *
+ * Now consider a scenario where source file was renamed by some other
+ * client to a new name just before this lock was granted. So if a
+ * lookup would be done on local->lock[0].layout.parent_layout.locks[?].loc,
+ * server will send success even if the entry was renamed (since server will
+ * do a gfid based resolution). So once a lock is granted, make sure the
+ * file exists with the name that the client requested with.
+ * */
+
+ local->call_cnt = 2;
+ for (i = 0; i < 2; i++) {
+ if (i == 0) {
+ lock = local->rename_inodelk_backward_compatible[0];
+ if (gf_uuid_compare(local->loc.gfid, lock->loc.gfid) == 0)
+ subvol = lock->xl;
+ else {
+ lock = local->rename_inodelk_backward_compatible[1];
+ subvol = lock->xl;
+ }
+ } else {
+ subvol = this;
}
- local->src_hashed = src_hashed;
- local->src_cached = src_cached;
- local->dst_hashed = dst_hashed;
- local->dst_cached = dst_cached;
+ STACK_WIND_COOKIE(frame, dht_rename_lookup_cbk, (void *)(long)i, subvol,
+ subvol->fops->lookup,
+ (i == 0) ? &local->loc : &local->loc2_copy,
+ xattr_req);
+ }
- gf_log (this->name, GF_LOG_TRACE,
- "renaming %s (hash=%s/cache=%s) => %s (hash=%s/cache=%s)",
- oldloc->path, src_hashed->name, src_cached->name,
- newloc->path, dst_hashed->name,
- dst_cached ? dst_cached->name : "<nul>");
+ dict_unref(xattr_req);
+ return 0;
- if (IA_ISDIR (oldloc->inode->ia_type)) {
- dht_rename_dir (frame, this);
- } else {
- local->op_ret = 0;
- dht_rename_create_links (frame);
+done:
+ /* Its fine to call unlock even when no locks are acquired, as we check
+ * for lock->locked before winding a unlock call.
+ */
+ dht_rename_unlock(frame, this);
+
+ if (xattr_req)
+ dict_unref(xattr_req);
+
+ return 0;
+}
+
+int
+dht_rename_lock(call_frame_t *frame)
+{
+ dht_local_t *local = NULL;
+ int count = 1, ret = -1;
+ dht_lock_t **lk_array = NULL;
+
+ local = frame->local;
+
+ if (local->dst_cached)
+ count++;
+
+ lk_array = GF_CALLOC(count, sizeof(*lk_array), gf_common_mt_pointer);
+ if (lk_array == NULL)
+ goto err;
+
+ lk_array[0] = dht_lock_new(frame->this, local->src_cached, &local->loc,
+ F_WRLCK, DHT_FILE_MIGRATE_DOMAIN, NULL,
+ FAIL_ON_ANY_ERROR);
+ if (lk_array[0] == NULL)
+ goto err;
+
+ if (local->dst_cached) {
+ /* dst might be removed by the time inodelk reaches bricks,
+ * which can result in ESTALE errors. POSIX imposes no
+ * restriction for dst to be present for renames to be
+ * successful. So, we'll ignore ESTALE errors. As far as
+ * synchronization on dst goes, we'll achieve the same by
+ * holding entrylk on parent directory of dst in the namespace
+ * of basename(dst). Also, there might not be quorum in cluster
+ * xlators like EC/disperse on errno, in which case they return
+ * EIO. For eg., in a disperse (4 + 2), 3 might return success
+ * and three might return ESTALE. Disperse, having no Quorum
+ * unwinds inodelk with EIO. So, ignore EIO too.
+ */
+ lk_array[1] = dht_lock_new(frame->this, local->dst_cached, &local->loc2,
+ F_WRLCK, DHT_FILE_MIGRATE_DOMAIN, NULL,
+ IGNORE_ENOENT_ESTALE_EIO);
+ if (lk_array[1] == NULL)
+ goto err;
+ }
+
+ local->rename_inodelk_backward_compatible = lk_array;
+ local->rename_inodelk_bc_count = count;
+
+ /* retaining inodelks for the sake of backward compatibility. Please
+ * make sure to remove this inodelk once all of 3.10, 3.12 and 3.13
+ * reach EOL. Better way of getting synchronization would be to acquire
+ * entrylks on src and dst parent directories in the namespace of
+ * basenames of src and dst
+ */
+ ret = dht_blocking_inodelk(frame, lk_array, count,
+ dht_rename_file_protect_namespace);
+ if (ret < 0) {
+ local->rename_inodelk_backward_compatible = NULL;
+ local->rename_inodelk_bc_count = 0;
+ goto err;
+ }
+
+ return 0;
+err:
+ if (lk_array != NULL) {
+ int tmp_count = 0, i = 0;
+
+ for (i = 0; (i < count) && (lk_array[i]); i++, tmp_count++)
+ ;
+
+ dht_lock_array_free(lk_array, tmp_count);
+ GF_FREE(lk_array);
+ }
+
+ return -1;
+}
+
+int
+dht_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ xlator_t *src_cached = NULL;
+ xlator_t *src_hashed = NULL;
+ xlator_t *dst_cached = NULL;
+ xlator_t *dst_hashed = NULL;
+ int op_errno = -1;
+ int ret = -1;
+ dht_local_t *local = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+ char newgfid[GF_UUID_BUF_SIZE] = {0};
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(oldloc, err);
+ VALIDATE_OR_GOTO(newloc, err);
+
+ gf_uuid_unparse(oldloc->inode->gfid, gfid);
+
+ src_hashed = dht_subvol_get_hashed(this, oldloc);
+ if (!src_hashed) {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_RENAME_FAILED,
+ "No hashed subvolume in layout for path=%s,"
+ "(gfid = %s)",
+ oldloc->path, gfid);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ src_cached = dht_subvol_get_cached(this, oldloc->inode);
+ if (!src_cached) {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_RENAME_FAILED,
+ "No cached subvolume for path = %s,"
+ "(gfid = %s)",
+ oldloc->path, gfid);
+
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ dst_hashed = dht_subvol_get_hashed(this, newloc);
+ if (!dst_hashed) {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_RENAME_FAILED,
+ "No hashed subvolume in layout for path=%s", newloc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (newloc->inode)
+ dst_cached = dht_subvol_get_cached(this, newloc->inode);
+
+ local = dht_local_init(frame, oldloc, NULL, GF_FOP_RENAME);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ /* cached_subvol will be set from dht_local_init, reset it to NULL,
+ as the logic of handling rename is different */
+ local->cached_subvol = NULL;
+
+ ret = loc_copy(&local->loc2, newloc);
+ if (ret == -1) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->src_hashed = src_hashed;
+ local->src_cached = src_cached;
+ local->dst_hashed = dst_hashed;
+ local->dst_cached = dst_cached;
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
+ if (newloc->inode)
+ gf_uuid_unparse(newloc->inode->gfid, newgfid);
+
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_RENAME_INFO,
+ "renaming %s (%s) (hash=%s/cache=%s) => %s (%s) "
+ "(hash=%s/cache=%s) ",
+ oldloc->path, gfid, src_hashed->name, src_cached->name, newloc->path,
+ newloc->inode ? newgfid : NULL, dst_hashed->name,
+ dst_cached ? dst_cached->name : "<nul>");
+
+ if (IA_ISDIR(oldloc->inode->ia_type)) {
+ dht_rename_dir(frame, this);
+ } else {
+ local->op_ret = 0;
+ ret = dht_rename_lock(frame);
+ if (ret < 0) {
+ op_errno = ENOMEM;
+ goto err;
}
+ }
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL, NULL, NULL,
- NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(rename, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL,
+ NULL);
- return 0;
+ return 0;
+}
+
+int
+dht_pt_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ gf_boolean_t free_xdata = _gf_false;
+
+ /* Just a pass through */
+ if (!IA_ISDIR(oldloc->inode->ia_type)) {
+ if (!xdata) {
+ free_xdata = _gf_true;
+ }
+ DHT_CHANGELOG_TRACK_AS_RENAME(xdata, oldloc, newloc);
+ }
+ default_rename(frame, this, oldloc, newloc, xdata);
+ if (free_xdata && xdata) {
+ dict_unref(xdata);
+ xdata = NULL;
+ }
+ return 0;
}
diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c
index e67b64c3826..3e24065227c 100644
--- a/xlators/cluster/dht/src/dht-selfheal.c
+++ b/xlators/cluster/dht/src/dht-selfheal.c
@@ -8,913 +8,2593 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-
-#include "glusterfs.h"
-#include "xlator.h"
-#include "dht-common.h"
-
-#define DHT_SET_LAYOUT_RANGE(layout,i,srt,chunk,cnt,path) do { \
- layout->list[i].start = srt; \
- layout->list[i].stop = srt + chunk - 1; \
- \
- gf_log (this->name, GF_LOG_TRACE, \
- "gave fix: %u - %u on %s for %s", \
- layout->list[i].start, layout->list[i].stop, \
- layout->list[i].xlator->name, path); \
- } while (0)
-
-static inline uint32_t
-dht_find_overlap (int idx, int cnk_idx, uint32_t start, uint32_t stop,
- uint32_t chunk_size)
-{
- uint32_t overlap = 0;
- uint32_t chunk_begin = 0;
-
- chunk_begin = cnk_idx * chunk_size;
-
- /* There is no chance of overlap */
- if ((chunk_begin > stop) ||
- ((chunk_begin + chunk_size) < start))
- goto out;
+#include "dht-lock.h"
+
+#define DHT_SET_LAYOUT_RANGE(layout, i, srt, chunk, path) \
+ do { \
+ layout->list[i].start = srt; \
+ layout->list[i].stop = srt + chunk - 1; \
+ layout->list[i].commit_hash = layout->commit_hash; \
+ \
+ gf_msg_trace(this->name, 0, \
+ "gave fix: 0x%x - 0x%x, with commit-hash 0x%x" \
+ " on %s for %s", \
+ layout->list[i].start, layout->list[i].stop, \
+ layout->list[i].commit_hash, \
+ layout->list[i].xlator->name, path); \
+ } while (0)
+
+#define DHT_RESET_LAYOUT_RANGE(layout) \
+ do { \
+ int cnt = 0; \
+ for (cnt = 0; cnt < layout->cnt; cnt++) { \
+ layout->list[cnt].start = 0; \
+ layout->list[cnt].stop = 0; \
+ } \
+ } while (0)
+
+static int
+dht_selfheal_layout_lock(call_frame_t *frame, dht_layout_t *layout,
+ gf_boolean_t newdir, dht_selfheal_layout_t healer,
+ dht_need_heal_t should_heal);
+
+static uint32_t
+dht_overlap_calc(dht_layout_t *old, int o, dht_layout_t *new, int n)
+{
+ if (o >= old->cnt || n >= new->cnt)
+ return 0;
- if ((chunk_begin <= start) &&
- ((chunk_begin + chunk_size) <= stop)) {
- overlap = ((chunk_begin + chunk_size) - start);
- goto out;
- }
+ if (old->list[o].err > 0 || new->list[n].err > 0)
+ return 0;
- if ((chunk_begin <= start) &&
- ((chunk_begin + chunk_size) >= stop)) {
- overlap = (stop - start);
- goto out;
- }
+ if (old->list[o].start == old->list[o].stop) {
+ return 0;
+ }
- if ((chunk_begin < stop) &&
- ((chunk_begin + chunk_size) >= stop)) {
- overlap = (stop - chunk_begin);
- goto out;
- }
+ if (new->list[n].start == new->list[n].stop) {
+ return 0;
+ }
-out:
- return overlap;
+ if ((old->list[o].start > new->list[n].stop) ||
+ (old->list[o].stop < new->list[n].start))
+ return 0;
+
+ return min(old->list[o].stop, new->list[n].stop) -
+ max(old->list[o].start, new->list[n].start) + 1;
}
int
-dht_selfheal_dir_finish (call_frame_t *frame, xlator_t *this, int ret)
+dht_selfheal_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- dht_local_t *local = NULL;
+ DHT_STACK_DESTROY(frame);
+ return 0;
+}
- local = frame->local;
- local->selfheal.dir_cbk (frame, NULL, frame->this, ret,
- local->op_errno, NULL);
+int
+dht_selfheal_dir_finish(call_frame_t *frame, xlator_t *this, int ret,
+ int invoke_cbk)
+{
+ dht_local_t *local = NULL, *lock_local = NULL;
+ call_frame_t *lock_frame = NULL;
+ int lock_count = 0;
- return 0;
-}
+ local = frame->local;
+ /* Unlock entrylk */
+ dht_unlock_entrylk_wrapper(frame, &local->lock[0].ns.directory_ns);
-int
-dht_selfheal_dir_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xdata)
-{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- xlator_t *subvol = NULL;
- int i = 0;
- dht_layout_t *layout = NULL;
- int err = 0;
- int this_call_cnt = 0;
-
- local = frame->local;
- layout = local->selfheal.layout;
- prev = cookie;
- subvol = prev->this;
-
- if (op_ret == 0)
- err = 0;
- else
- err = op_errno;
+ /* Unlock inodelk */
+ lock_count = dht_lock_count(local->lock[0].ns.parent_layout.locks,
+ local->lock[0].ns.parent_layout.lk_count);
+ if (lock_count == 0)
+ goto done;
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].xlator == subvol) {
- layout->list[i].err = err;
- break;
- }
- }
+ lock_frame = copy_frame(frame);
+ if (lock_frame == NULL) {
+ goto done;
+ }
- this_call_cnt = dht_frame_return (frame);
+ lock_local = dht_local_init(lock_frame, &local->loc, NULL,
+ lock_frame->root->op);
+ if (lock_local == NULL) {
+ goto done;
+ }
- if (is_last_call (this_call_cnt)) {
- dht_selfheal_dir_finish (frame, this, 0);
- }
+ lock_local->lock[0].ns.parent_layout.locks = local->lock[0]
+ .ns.parent_layout.locks;
+ lock_local->lock[0]
+ .ns.parent_layout.lk_count = local->lock[0].ns.parent_layout.lk_count;
- return 0;
-}
+ local->lock[0].ns.parent_layout.locks = NULL;
+ local->lock[0].ns.parent_layout.lk_count = 0;
+ dht_unlock_inodelk(lock_frame, lock_local->lock[0].ns.parent_layout.locks,
+ lock_local->lock[0].ns.parent_layout.lk_count,
+ dht_selfheal_unlock_cbk);
+ lock_frame = NULL;
+
+done:
+ if (invoke_cbk)
+ local->selfheal.dir_cbk(frame, NULL, frame->this, ret, local->op_errno,
+ NULL);
+ if (lock_frame != NULL) {
+ DHT_STACK_DESTROY(lock_frame);
+ }
+
+ return 0;
+}
int
-dht_selfheal_dir_xattr_persubvol (call_frame_t *frame, loc_t *loc,
- dht_layout_t *layout, int i)
+dht_refresh_layout_done(call_frame_t *frame)
{
- xlator_t *subvol = NULL;
- dict_t *xattr = NULL;
- int ret = 0;
- xlator_t *this = NULL;
- int32_t *disk_layout = NULL;
- dht_local_t *local = NULL;
+ int ret = -1;
+ dht_layout_t *refreshed = NULL, *heal = NULL;
+ dht_local_t *local = NULL;
+ dht_need_heal_t should_heal = NULL;
+ dht_selfheal_layout_t healer = NULL;
+ local = frame->local;
- local = frame->local;
- subvol = layout->list[i].xlator;
- this = frame->this;
+ refreshed = local->selfheal.refreshed_layout;
+ heal = local->selfheal.layout;
- GF_VALIDATE_OR_GOTO ("", this, err);
- GF_VALIDATE_OR_GOTO (this->name, layout, err);
- GF_VALIDATE_OR_GOTO (this->name, local, err);
- GF_VALIDATE_OR_GOTO (this->name, subvol, err);
+ healer = local->selfheal.healer;
+ should_heal = local->selfheal.should_heal;
- xattr = get_new_dict ();
- if (!xattr) {
- goto err;
+ ret = dht_layout_sort(refreshed);
+ if (ret == -1) {
+ gf_smsg(frame->this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_LAYOUT_SORT_FAILED, NULL);
+ goto err;
+ }
+
+ if (should_heal(frame, &heal, &refreshed)) {
+ healer(frame, &local->loc, heal);
+ } else {
+ local->selfheal.layout = NULL;
+ local->selfheal.refreshed_layout = NULL;
+ local->selfheal.layout = refreshed;
+
+ dht_layout_unref(frame->this, heal);
+
+ dht_selfheal_dir_finish(frame, frame->this, 0, 1);
+ }
+
+ return 0;
+
+err:
+ dht_selfheal_dir_finish(frame, frame->this, -1, 1);
+ return 0;
+}
+
+int
+dht_refresh_layout_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ xlator_t *prev = NULL;
+ dht_layout_t *layout = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {
+ 0,
+ };
+
+ GF_VALIDATE_OR_GOTO("dht", frame, err);
+ GF_VALIDATE_OR_GOTO("dht", this, err);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, err);
+ GF_VALIDATE_OR_GOTO("dht", this->private, err);
+
+ local = frame->local;
+ prev = cookie;
+
+ layout = local->selfheal.refreshed_layout;
+
+ LOCK(&frame->lock);
+ {
+ op_ret = dht_layout_merge(this, layout, prev, op_ret, op_errno, xattr);
+
+ dht_iatt_merge(this, &local->stbuf, stbuf);
+
+ if (op_ret == -1) {
+ gf_uuid_unparse(local->loc.gfid, gfid);
+ local->op_errno = op_errno;
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_FILE_LOOKUP_FAILED, "path=%s", local->loc.path,
+ "name=%s", prev->name, "gfid=%s", gfid, NULL);
+
+ goto unlock;
}
- ret = dht_disk_layout_extract (this, layout, i, &disk_layout);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: (subvol %s) failed to extract disk layout",
- loc->path, subvol->name);
- goto err;
+ local->op_ret = 0;
+ }
+unlock:
+ UNLOCK(&frame->lock);
+
+ this_call_cnt = dht_frame_return(frame);
+
+ if (is_last_call(this_call_cnt)) {
+ if (local->op_ret == 0) {
+ local->refresh_layout_done(frame);
+ } else {
+ goto err;
}
+ }
- ret = dict_set_bin (xattr, "trusted.glusterfs.dht",
- disk_layout, 4 * 4);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: (subvol %s) failed to set xattr dictionary",
- loc->path, subvol->name);
- goto err;
+ return 0;
+
+err:
+ if (local) {
+ local->refresh_layout_unlock(frame, this, -1, 1);
+ }
+ return 0;
+}
+
+int
+dht_refresh_layout(call_frame_t *frame)
+{
+ int call_cnt = 0;
+ int i = 0, ret = -1;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *this = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {
+ 0,
+ };
+
+ GF_VALIDATE_OR_GOTO("dht", frame, out);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, out);
+
+ this = frame->this;
+ conf = this->private;
+ local = frame->local;
+
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
+ local->op_ret = -1;
+
+ if (local->selfheal.refreshed_layout) {
+ dht_layout_unref(this, local->selfheal.refreshed_layout);
+ local->selfheal.refreshed_layout = NULL;
+ }
+
+ local->selfheal.refreshed_layout = dht_layout_new(this,
+ conf->subvolume_cnt);
+ if (!local->selfheal.refreshed_layout) {
+ gf_uuid_unparse(local->loc.gfid, gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_MEM_ALLOC_FAILED,
+ "path=%s", local->loc.path, "gfid=%s", gfid, NULL);
+ goto out;
+ }
+
+ if (local->xattr != NULL) {
+ dict_del(local->xattr, conf->xattr_name);
+ }
+
+ if (local->xattr_req == NULL) {
+ gf_uuid_unparse(local->loc.gfid, gfid);
+ local->xattr_req = dict_new();
+ if (local->xattr_req == NULL) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
+ "path=%s", local->loc.path, "gfid=%s", gfid, NULL);
+ goto out;
}
- disk_layout = NULL;
+ }
- gf_log (this->name, GF_LOG_TRACE,
- "setting hash range %u - %u (type %d) on subvolume %s for %s",
- layout->list[i].start, layout->list[i].stop,
- layout->type, subvol->name, loc->path);
+ if (dict_get(local->xattr_req, conf->xattr_name) == 0) {
+ ret = dict_set_uint32(local->xattr_req, conf->xattr_name, 4 * 4);
+ if (ret)
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", local->loc.path, "key=%s", conf->xattr_name,
+ NULL);
+ }
- dict_ref (xattr);
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND_COOKIE(frame, dht_refresh_layout_cbk, conf->subvolumes[i],
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup, &local->loc,
+ local->xattr_req);
+ }
- if (!uuid_is_null (local->gfid))
- uuid_copy (loc->gfid, local->gfid);
+ return 0;
- STACK_WIND (frame, dht_selfheal_dir_xattr_cbk,
- subvol, subvol->fops->setxattr,
- loc, xattr, 0, NULL);
+out:
+ if (local) {
+ local->refresh_layout_unlock(frame, this, -1, 1);
+ }
+ return 0;
+}
- dict_unref (xattr);
+int32_t
+dht_selfheal_layout_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
- return 0;
+ local = frame->local;
-err:
- if (xattr)
- dict_destroy (xattr);
+ if (!local) {
+ goto err;
+ }
- if (disk_layout)
- GF_FREE (disk_layout);
+ if (op_ret < 0) {
+ local->op_errno = op_errno;
+ goto err;
+ }
- dht_selfheal_dir_xattr_cbk (frame, subvol, frame->this,
- -1, ENOMEM, NULL);
- return 0;
+ local->refresh_layout_unlock = dht_selfheal_dir_finish;
+ local->refresh_layout_done = dht_refresh_layout_done;
+
+ dht_refresh_layout(frame);
+ return 0;
+
+err:
+ dht_selfheal_dir_finish(frame, this, -1, 1);
+ return 0;
}
-int
-dht_fix_dir_xattr (call_frame_t *frame, loc_t *loc, dht_layout_t *layout)
+gf_boolean_t
+dht_should_heal_layout(call_frame_t *frame, dht_layout_t **heal,
+ dht_layout_t **ondisk)
{
- dht_local_t *local = NULL;
- int i = 0;
- int count = 0;
- xlator_t *this = NULL;
+ gf_boolean_t fixit = _gf_true;
+ dht_local_t *local = NULL;
+ int heal_missing_dirs = 0;
+
+ local = frame->local;
+
+ if ((heal == NULL) || (*heal == NULL) || (ondisk == NULL) ||
+ (*ondisk == NULL))
+ goto out;
+
+ dht_layout_anomalies(
+ frame->this, &local->loc, *ondisk, &local->selfheal.hole_cnt,
+ &local->selfheal.overlaps_cnt, &local->selfheal.missing_cnt,
+ &local->selfheal.down, &local->selfheal.misc, NULL);
+
+ /* Directories might've been created as part of this self-heal. We've to
+ * sync non-layout xattrs and set range 0-0 on new directories
+ */
+ heal_missing_dirs = local->selfheal.force_mkdir
+ ? local->selfheal.force_mkdir
+ : dht_layout_missing_dirs(*heal);
+
+ if ((local->selfheal.hole_cnt == 0) &&
+ (local->selfheal.overlaps_cnt == 0) && heal_missing_dirs) {
+ dht_layout_t *tmp = NULL;
+
+ /* Just added a brick and need to set 0-0 range on this brick.
+ * But ondisk layout is well-formed. So, swap layouts "heal" and
+ * "ondisk". Now "ondisk" layout will be used for healing
+ * xattrs. If there are any non-participating subvols in
+ * "ondisk" layout, dht_selfheal_dir_xattr_persubvol will set
+ * 0-0 and non-layout xattrs. This way we won't end up in
+ * "corrupting" already set and well-formed "ondisk" layout.
+ */
+ tmp = *heal;
+ *heal = *ondisk;
+ *ondisk = tmp;
+
+ /* Current selfheal code, heals non-layout xattrs only after
+ * an add-brick. In fact non-layout xattrs are considered as
+ * secondary citizens which are healed only if layout xattrs
+ * need to be healed. This is wrong, since for eg., quota can be
+ * set when layout is well-formed, but a node is down. Also,
+ * just for healing non-layout xattrs, we don't need locking.
+ * This issue is _NOT FIXED_ by this patch.
+ */
+ }
+
+ fixit = (local->selfheal.hole_cnt || local->selfheal.overlaps_cnt ||
+ heal_missing_dirs);
- local = frame->local;
- this = frame->this;
+out:
+ return fixit;
+}
- gf_log (this->name, GF_LOG_DEBUG,
- "writing the new range for all subvolumes");
+int
+dht_layout_span(dht_layout_t *layout)
+{
+ int i = 0, count = 0;
- local->call_cnt = count = layout->cnt;
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err)
+ continue;
- for (i = 0; i < layout->cnt; i++) {
- dht_selfheal_dir_xattr_persubvol (frame, loc, layout, i);
+ if (layout->list[i].start != layout->list[i].stop)
+ count++;
+ }
- if (--count == 0)
- break;
- }
- return 0;
+ return count;
}
int
-dht_selfheal_dir_xattr (call_frame_t *frame, loc_t *loc, dht_layout_t *layout)
+dht_decommissioned_bricks_in_layout(xlator_t *this, dht_layout_t *layout)
{
- dht_local_t *local = NULL;
- int missing_xattr = 0;
- int i = 0;
- xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ int count = 0, i = 0, j = 0;
- local = frame->local;
- this = frame->this;
+ if ((this == NULL) || (layout == NULL))
+ goto out;
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].err != -1 || !layout->list[i].stop) {
- /* err != -1 would mean xattr present on the directory
- * or the directory is non existent.
- * !layout->list[i].stop would mean layout absent
- */
+ conf = this->private;
- continue;
- }
- missing_xattr++;
+ for (i = 0; i < layout->cnt; i++) {
+ for (j = 0; j < conf->subvolume_cnt; j++) {
+ if (conf->decommissioned_bricks[j] &&
+ conf->decommissioned_bricks[j] == layout->list[i].xlator) {
+ count++;
+ }
}
+ }
- gf_log (this->name, GF_LOG_TRACE,
- "%d subvolumes missing xattr for %s",
- missing_xattr, loc->path);
+out:
+ return count;
+}
- if (missing_xattr == 0) {
- dht_selfheal_dir_finish (frame, this, 0);
- return 0;
+dht_distribution_type_t
+dht_distribution_type(xlator_t *this, dht_layout_t *layout)
+{
+ dht_distribution_type_t type = GF_DHT_EQUAL_DISTRIBUTION;
+ int i = 0;
+ uint32_t start_range = 0, range = 0, diff = 0;
+
+ if ((this == NULL) || (layout == NULL) || (layout->cnt < 1)) {
+ goto out;
+ }
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (start_range == 0) {
+ start_range = layout->list[i].stop - layout->list[i].start;
+ continue;
}
- local->call_cnt = missing_xattr;
-
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].err != -1 || !layout->list[i].stop)
- continue;
-
- dht_selfheal_dir_xattr_persubvol (frame, loc, layout, i);
+ range = layout->list[i].stop - layout->list[i].start;
+ diff = (range >= start_range) ? range - start_range
+ : start_range - range;
- if (--missing_xattr == 0)
- break;
+ if ((range != 0) && (diff > layout->cnt)) {
+ type = GF_DHT_WEIGHTED_DISTRIBUTION;
+ break;
}
- return 0;
+ }
+
+out:
+ return type;
}
-int
-dht_selfheal_dir_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *statpre,
- struct iatt *statpost, dict_t *xdata)
+gf_boolean_t
+dht_should_fix_layout(call_frame_t *frame, dht_layout_t **inmem,
+ dht_layout_t **ondisk)
{
- dht_local_t *local = NULL;
- dht_layout_t *layout = NULL;
- int this_call_cnt = 0;
+ gf_boolean_t fixit = _gf_true;
- local = frame->local;
- layout = local->selfheal.layout;
+ dht_local_t *local = NULL;
+ int layout_span = 0;
+ int decommissioned_bricks = 0;
+ dht_conf_t *conf = NULL;
+ dht_distribution_type_t inmem_dist_type = 0;
+ dht_distribution_type_t ondisk_dist_type = 0;
- this_call_cnt = dht_frame_return (frame);
+ conf = frame->this->private;
- if (is_last_call (this_call_cnt)) {
- dht_selfheal_dir_xattr (frame, &local->loc, layout);
- }
+ local = frame->local;
- return 0;
-}
+ if ((inmem == NULL) || (*inmem == NULL) || (ondisk == NULL) ||
+ (*ondisk == NULL))
+ goto out;
+ dht_layout_anomalies(frame->this, &local->loc, *ondisk,
+ &local->selfheal.hole_cnt,
+ &local->selfheal.overlaps_cnt, NULL,
+ &local->selfheal.down, &local->selfheal.misc, NULL);
-int
-dht_selfheal_dir_setattr (call_frame_t *frame, loc_t *loc, struct iatt *stbuf,
- int32_t valid, dht_layout_t *layout)
+ if (local->selfheal.down || local->selfheal.misc) {
+ fixit = _gf_false;
+ goto out;
+ }
+
+ if (local->selfheal.hole_cnt || local->selfheal.overlaps_cnt)
+ goto out;
+
+ /* If commit hashes are being updated, let it through */
+ if ((*inmem)->commit_hash != (*ondisk)->commit_hash)
+ goto out;
+
+ layout_span = dht_layout_span(*ondisk);
+
+ decommissioned_bricks = dht_decommissioned_bricks_in_layout(frame->this,
+ *ondisk);
+ inmem_dist_type = dht_distribution_type(frame->this, *inmem);
+ ondisk_dist_type = dht_distribution_type(frame->this, *ondisk);
+
+ if ((decommissioned_bricks == 0) &&
+ (layout_span ==
+ (conf->subvolume_cnt - conf->decommission_subvols_cnt)) &&
+ (inmem_dist_type == ondisk_dist_type))
+ fixit = _gf_false;
+
+out:
+
+ return fixit;
+}
+
+static int
+dht_selfheal_layout_lock(call_frame_t *frame, dht_layout_t *layout,
+ gf_boolean_t newdir, dht_selfheal_layout_t healer,
+ dht_need_heal_t should_heal)
{
- int missing_attr = 0;
- int i = 0;
- dht_local_t *local = NULL;
- xlator_t *this = NULL;
+ dht_local_t *local = NULL;
+ int count = 1, ret = -1, i = 0;
+ dht_lock_t **lk_array = NULL;
+ dht_conf_t *conf = NULL;
+ dht_layout_t *tmp = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
- local = frame->local;
- this = frame->this;
+ GF_VALIDATE_OR_GOTO("dht", frame, err);
+ GF_VALIDATE_OR_GOTO(frame->this->name, frame->local, err);
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].err == -1)
- missing_attr++;
+ local = frame->local;
+
+ conf = frame->this->private;
+
+ local->selfheal.healer = healer;
+ local->selfheal.should_heal = should_heal;
+
+ tmp = local->selfheal.layout;
+ local->selfheal.layout = dht_layout_ref(frame->this, layout);
+ dht_layout_unref(frame->this, tmp);
+
+ if (!newdir) {
+ count = conf->subvolume_cnt;
+
+ lk_array = GF_CALLOC(count, sizeof(*lk_array), gf_common_mt_char);
+ if (lk_array == NULL) {
+ gf_uuid_unparse(local->stbuf.ia_gfid, gfid);
+ gf_smsg("dht", GF_LOG_ERROR, ENOMEM, DHT_MSG_MEM_ALLOC_FAILED,
+ "lk_array-gfid=%s", gfid, "path=%s", local->loc.path, NULL);
+ goto err;
}
- if (missing_attr == 0) {
- dht_selfheal_dir_xattr (frame, loc, layout);
- return 0;
+ for (i = 0; i < count; i++) {
+ lk_array[i] = dht_lock_new(
+ frame->this, conf->subvolumes[i], &local->loc, F_WRLCK,
+ DHT_LAYOUT_HEAL_DOMAIN, NULL, FAIL_ON_ANY_ERROR);
+ if (lk_array[i] == NULL) {
+ gf_uuid_unparse(local->stbuf.ia_gfid, gfid);
+ gf_smsg(THIS->name, GF_LOG_ERROR, ENOMEM,
+ DHT_MSG_MEM_ALLOC_FAILED, "lk_array-gfid=%s", gfid,
+ "path=%s", local->loc.path, NULL);
+ goto err;
+ }
+ }
+ } else {
+ count = 1;
+ lk_array = GF_CALLOC(count, sizeof(*lk_array), gf_common_mt_char);
+ if (lk_array == NULL) {
+ gf_uuid_unparse(local->stbuf.ia_gfid, gfid);
+ gf_smsg(THIS->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_MEM_ALLOC_FAILED,
+ "lk_array-gfid=%s", gfid, "path=%s", local->loc.path, NULL);
+ goto err;
+ }
+
+ lk_array[0] = dht_lock_new(frame->this, local->hashed_subvol,
+ &local->loc, F_WRLCK, DHT_LAYOUT_HEAL_DOMAIN,
+ NULL, FAIL_ON_ANY_ERROR);
+ if (lk_array[0] == NULL) {
+ gf_uuid_unparse(local->stbuf.ia_gfid, gfid);
+ gf_smsg(THIS->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_MEM_ALLOC_FAILED,
+ "lk_array-gfid=%s", gfid, "path=%s", local->loc.path, NULL);
+ goto err;
}
+ }
- if (!uuid_is_null (local->gfid))
- uuid_copy (loc->gfid, local->gfid);
+ local->lock[0].layout.my_layout.locks = lk_array;
+ local->lock[0].layout.my_layout.lk_count = count;
- local->call_cnt = missing_attr;
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].err == -1) {
- gf_log (this->name, GF_LOG_TRACE,
- "setattr for %s on subvol %s",
- loc->path, layout->list[i].xlator->name);
-
- STACK_WIND (frame, dht_selfheal_dir_setattr_cbk,
- layout->list[i].xlator,
- layout->list[i].xlator->fops->setattr,
- loc, stbuf, valid, NULL);
- }
+ ret = dht_blocking_inodelk(frame, lk_array, count,
+ dht_selfheal_layout_lock_cbk);
+ if (ret < 0) {
+ local->lock[0].layout.my_layout.locks = NULL;
+ local->lock[0].layout.my_layout.lk_count = 0;
+ goto err;
+ }
+
+ return 0;
+err:
+ if (lk_array != NULL) {
+ dht_lock_array_free(lk_array, count);
+ GF_FREE(lk_array);
+ }
+
+ return -1;
+}
+
+static int
+dht_selfheal_dir_xattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ struct iatt *stbuf = NULL;
+ int i = 0;
+ int ret = 0;
+ dht_layout_t *layout = NULL;
+ int err = 0;
+ int this_call_cnt = 0;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+ layout = local->selfheal.layout;
+ subvol = cookie;
+
+ if (op_ret == 0) {
+ err = 0;
+ } else {
+ gf_uuid_unparse(local->loc.gfid, gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_DIR_SELFHEAL_XATTR_FAILED, "name=%s", subvol->name,
+ "path=%s", local->loc.path, "gfid=%s", gfid, NULL);
+ err = op_errno;
+ }
+
+ ret = dict_get_bin(xdata, DHT_IATT_IN_XDATA_KEY, (void **)&stbuf);
+ if (ret < 0) {
+ gf_uuid_unparse(local->loc.gfid, gfid);
+ gf_msg_debug(this->name, 0,
+ "key = %s not present in dict"
+ ", path:%s gfid:%s",
+ DHT_IATT_IN_XDATA_KEY, local->loc.path, gfid);
+ }
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].xlator == subvol) {
+ layout->list[i].err = err;
+ break;
}
+ }
- return 0;
+ LOCK(&frame->lock);
+ {
+ dht_iatt_merge(this, &local->stbuf, stbuf);
+ }
+ UNLOCK(&frame->lock);
+
+ this_call_cnt = dht_frame_return(frame);
+
+ if (is_last_call(this_call_cnt)) {
+ dht_selfheal_dir_finish(frame, this, 0, 1);
+ }
+
+ return 0;
}
+/* Code is required to set user xattr to local->xattr
+ */
int
-dht_selfheal_dir_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
-{
- dht_local_t *local = NULL;
- dht_layout_t *layout = NULL;
- call_frame_t *prev = NULL;
- xlator_t *subvol = NULL;
- int i = 0;
- int this_call_cnt = 0;
-
-
- local = frame->local;
- layout = local->selfheal.layout;
- prev = cookie;
- subvol = prev->this;
-
- if ((op_ret == 0) || ((op_ret == -1) && (op_errno == EEXIST))) {
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].xlator == subvol) {
- layout->list[i].err = -1;
- break;
- }
- }
- }
+dht_set_user_xattr(dict_t *dict, char *k, data_t *v, void *data)
+{
+ dict_t *set_xattr = data;
+ int ret = -1;
- if (op_ret) {
- gf_log (this->name, ((op_errno == EEXIST) ? GF_LOG_DEBUG :
- GF_LOG_WARNING),
- "selfhealing directory %s failed: %s",
- local->loc.path, strerror (op_errno));
- goto out;
+ ret = dict_set(set_xattr, k, v);
+ return ret;
+}
+
+static int
+dht_selfheal_dir_xattr_persubvol(call_frame_t *frame, loc_t *loc,
+ dht_layout_t *layout, int i,
+ xlator_t *req_subvol)
+{
+ xlator_t *subvol = NULL;
+ dict_t *xattr = NULL;
+ dict_t *xdata = NULL;
+ int ret = 0;
+ xlator_t *this = NULL;
+ int32_t *disk_layout = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ data_t *data = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+ if (req_subvol)
+ subvol = req_subvol;
+ else
+ subvol = layout->list[i].xlator;
+ this = frame->this;
+
+ GF_VALIDATE_OR_GOTO("", this, err);
+ GF_VALIDATE_OR_GOTO(this->name, layout, err);
+ GF_VALIDATE_OR_GOTO(this->name, local, err);
+ GF_VALIDATE_OR_GOTO(this->name, subvol, err);
+ VALIDATE_OR_GOTO(this->private, err);
+
+ conf = this->private;
+
+ xattr = dict_new();
+ if (!xattr) {
+ goto err;
+ }
+
+ xdata = dict_new();
+ if (!xdata)
+ goto err;
+
+ ret = dict_set_str(xdata, GLUSTERFS_INTERNAL_FOP_KEY, "yes");
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", loc->path, "key=%s", GLUSTERFS_INTERNAL_FOP_KEY,
+ "gfid=%s", gfid, NULL);
+ goto err;
+ }
+
+ ret = dict_set_int8(xdata, DHT_IATT_IN_XDATA_KEY, 1);
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", loc->path, "key=%s", DHT_IATT_IN_XDATA_KEY,
+ "gfid=%s", gfid, NULL);
+ goto err;
+ }
+
+ gf_uuid_unparse(loc->inode->gfid, gfid);
+
+ ret = dht_disk_layout_extract(this, layout, i, &disk_layout);
+ if (ret == -1) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_DIR_SELFHEAL_XATTR_FAILED,
+ "extract-disk-layout-failed, path=%s", loc->path, "subvol=%s",
+ subvol->name, "gfid=%s", gfid, NULL);
+ goto err;
+ }
+
+ ret = dict_set_bin(xattr, conf->xattr_name, disk_layout, 4 * 4);
+ if (ret == -1) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_DIR_SELFHEAL_XATTR_FAILED, "path=%s", loc->path,
+ "subvol=%s", subvol->name,
+ "set-xattr-dictionary-failed"
+ "gfid=%s",
+ gfid, NULL);
+ goto err;
+ }
+ disk_layout = NULL;
+
+ gf_msg_trace(this->name, 0,
+ "setting hash range 0x%x - 0x%x (type %d) on subvolume %s"
+ " for %s",
+ layout->list[i].start, layout->list[i].stop, layout->type,
+ subvol->name, loc->path);
+
+ if (local->xattr) {
+ data = dict_get(local->xattr, QUOTA_LIMIT_KEY);
+ if (data) {
+ ret = dict_add(xattr, QUOTA_LIMIT_KEY, data);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", loc->path, "key=%s", QUOTA_LIMIT_KEY, NULL);
+ }
}
+ data = dict_get(local->xattr, QUOTA_LIMIT_OBJECTS_KEY);
+ if (data) {
+ ret = dict_add(xattr, QUOTA_LIMIT_OBJECTS_KEY, data);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", loc->path, "key=%s", QUOTA_LIMIT_OBJECTS_KEY,
+ NULL);
+ }
+ }
+ }
+
+ if (!gf_uuid_is_null(local->gfid))
+ gf_uuid_copy(loc->gfid, local->gfid);
+
+ STACK_WIND_COOKIE(frame, dht_selfheal_dir_xattr_cbk, (void *)subvol, subvol,
+ subvol->fops->setxattr, loc, xattr, 0, xdata);
- dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
- dht_iatt_merge (this, &local->preparent, preparent, prev->this);
- dht_iatt_merge (this, &local->postparent, postparent, prev->this);
+ dict_unref(xattr);
+ dict_unref(xdata);
+ return 0;
+
+err:
+ if (xattr)
+ dict_unref(xattr);
+ if (xdata)
+ dict_unref(xdata);
+
+ GF_FREE(disk_layout);
+
+ dht_selfheal_dir_xattr_cbk(frame, (void *)subvol, frame->this, -1, ENOMEM,
+ NULL);
+ return 0;
+}
+
+static int
+dht_fix_dir_xattr(call_frame_t *frame, loc_t *loc, dht_layout_t *layout)
+{
+ dht_local_t *local = NULL;
+ int i = 0;
+ int count = 0;
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ dht_layout_t *dummy = NULL;
+
+ local = frame->local;
+ this = frame->this;
+ conf = this->private;
+
+ gf_msg_debug(this->name, 0, "%s: Writing the new range for all subvolumes",
+ loc->path);
+
+ local->call_cnt = count = conf->subvolume_cnt;
+
+ if (gf_log_get_loglevel() >= GF_LOG_DEBUG)
+ dht_log_new_layout_for_dir_selfheal(this, loc, layout);
+
+ for (i = 0; i < layout->cnt; i++) {
+ dht_selfheal_dir_xattr_persubvol(frame, loc, layout, i, NULL);
+
+ if (--count == 0)
+ goto out;
+ }
+ /* if we are here, subvolcount > layout_count. subvols-per-directory
+ * option might be set here. We need to clear out layout from the
+ * non-participating subvolumes, else it will result in overlaps */
+ dummy = dht_layout_new(this, 1);
+ if (!dummy)
+ goto out;
+ dummy->commit_hash = layout->commit_hash;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (_gf_false == dht_is_subvol_in_layout(layout, conf->subvolumes[i])) {
+ dht_selfheal_dir_xattr_persubvol(frame, loc, dummy, 0,
+ conf->subvolumes[i]);
+ if (--count == 0)
+ break;
+ }
+ }
+
+ dht_layout_unref(this, dummy);
out:
- this_call_cnt = dht_frame_return (frame);
+ return 0;
+}
- if (is_last_call (this_call_cnt)) {
- dht_selfheal_dir_setattr (frame, &local->loc, &local->stbuf, 0xffffff, layout);
+static int
+dht_selfheal_dir_xattr(call_frame_t *frame, loc_t *loc, dht_layout_t *layout)
+{
+ dht_local_t *local = NULL;
+ int missing_xattr = 0;
+ int i = 0;
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ dht_layout_t *dummy = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {
+ 0,
+ };
+
+ local = frame->local;
+ this = frame->this;
+ conf = this->private;
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err != -1 || !layout->list[i].stop) {
+ /* err != -1 would mean xattr present on the directory
+ * or the directory is non existent.
+ * !layout->list[i].stop would mean layout absent
+ */
+
+ continue;
+ }
+ missing_xattr++;
+ }
+ /* Also account for subvolumes with no-layout. Used for zero'ing out
+ * the layouts and for setting quota key's if present */
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (_gf_false == dht_is_subvol_in_layout(layout, conf->subvolumes[i])) {
+ missing_xattr++;
}
+ }
+ gf_msg_trace(this->name, 0, "%d subvolumes missing xattr for %s",
+ missing_xattr, loc->path);
+ if (missing_xattr == 0) {
+ dht_selfheal_dir_finish(frame, this, 0, 1);
return 0;
+ }
+
+ local->call_cnt = missing_xattr;
+
+ if (gf_log_get_loglevel() >= GF_LOG_DEBUG)
+ dht_log_new_layout_for_dir_selfheal(this, loc, layout);
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err != -1 || !layout->list[i].stop)
+ continue;
+
+ dht_selfheal_dir_xattr_persubvol(frame, loc, layout, i, NULL);
+
+ if (--missing_xattr == 0)
+ break;
+ }
+ dummy = dht_layout_new(this, 1);
+ if (!dummy) {
+ gf_uuid_unparse(loc->gfid, gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_DUMMY_ALLOC_FAILED,
+ "path=%s", loc->path, "gfid=%s", gfid, NULL);
+ goto out;
+ }
+ for (i = 0; i < conf->subvolume_cnt && missing_xattr; i++) {
+ if (_gf_false == dht_is_subvol_in_layout(layout, conf->subvolumes[i])) {
+ dht_selfheal_dir_xattr_persubvol(frame, loc, dummy, 0,
+ conf->subvolumes[i]);
+ missing_xattr--;
+ }
+ }
+
+ dht_layout_unref(this, dummy);
+out:
+ return 0;
}
-void
-dht_selfheal_dir_mkdir_setacl (dict_t *xattr, dict_t *dict)
+int
+dht_selfheal_dir_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
{
- data_t *acl_default = NULL;
- data_t *acl_access = NULL;
- xlator_t *this = NULL;
- int ret = -1;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int this_call_cnt = 0, ret = -1;
- GF_ASSERT (xattr);
- GF_ASSERT (dict);
+ local = frame->local;
+ layout = local->selfheal.layout;
- this = THIS;
- GF_ASSERT (this);
+ this_call_cnt = dht_frame_return(frame);
- acl_default = dict_get (xattr, POSIX_ACL_DEFAULT_XATTR);
+ if (is_last_call(this_call_cnt)) {
+ if (!local->heal_layout) {
+ gf_msg_trace(this->name, 0, "Skip heal layout for %s gfid = %s ",
+ local->loc.path, uuid_utoa(local->gfid));
- if (!acl_default) {
- gf_log (this->name, GF_LOG_DEBUG,
- "ACL_DEFAULT xattr not present");
- goto cont;
+ dht_selfheal_dir_finish(frame, this, 0, 1);
+ return 0;
}
- ret = dict_set (dict, POSIX_ACL_DEFAULT_XATTR, acl_default);
- if (ret)
- gf_log (this->name, GF_LOG_WARNING,
- "Could not set ACL_DEFAULT xattr");
-cont:
- acl_access = dict_get (xattr, POSIX_ACL_ACCESS_XATTR);
- if (!acl_access) {
- gf_log (this->name, GF_LOG_DEBUG,
- "ACL_ACCESS xattr not present");
- goto out;
+ ret = dht_selfheal_layout_lock(frame, layout, _gf_false,
+ dht_selfheal_dir_xattr,
+ dht_should_heal_layout);
+
+ if (ret < 0) {
+ dht_selfheal_dir_finish(frame, this, -1, 1);
}
- ret = dict_set (dict, POSIX_ACL_ACCESS_XATTR, acl_access);
- if (ret)
- gf_log (this->name, GF_LOG_WARNING,
- "Could not set ACL_ACCESS xattr");
+ }
-out:
- return;
+ return 0;
}
int
-dht_selfheal_dir_mkdir (call_frame_t *frame, loc_t *loc,
- dht_layout_t *layout, int force)
+dht_selfheal_dir_setattr(call_frame_t *frame, loc_t *loc, struct iatt *stbuf,
+ int32_t valid, dht_layout_t *layout)
{
- int missing_dirs = 0;
- int i = 0;
- int ret = -1;
- dht_local_t *local = NULL;
- xlator_t *this = NULL;
- dict_t *dict = NULL;
-
- local = frame->local;
- this = frame->this;
-
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].err == ENOENT || force)
- missing_dirs++;
+ int missing_attr = 0;
+ int i = 0, ret = -1;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+ int cnt = 0;
+
+ local = frame->local;
+ this = frame->this;
+ conf = this->private;
+
+ /* We need to heal the attrs if:
+ * 1. Any directories were missing - the newly created dirs will need
+ * to have the correct attrs set
+ * 2. An existing dir does not have the correct permissions -they may
+ * have been changed when a brick was down.
+ */
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err == -1)
+ missing_attr++;
+ }
+
+ if ((missing_attr == 0) && (local->need_attrheal == 0)) {
+ if (!local->heal_layout) {
+ gf_msg_trace(this->name, 0, "Skip heal layout for %s gfid = %s ",
+ loc->path, uuid_utoa(loc->gfid));
+ dht_selfheal_dir_finish(frame, this, 0, 1);
+ return 0;
}
+ ret = dht_selfheal_layout_lock(frame, layout, _gf_false,
+ dht_selfheal_dir_xattr,
+ dht_should_heal_layout);
- if (missing_dirs == 0) {
- dht_selfheal_dir_setattr (frame, loc, &local->stbuf, 0xffffffff, layout);
- return 0;
+ if (ret < 0) {
+ dht_selfheal_dir_finish(frame, this, -1, 1);
}
- local->call_cnt = missing_dirs;
- if (!uuid_is_null (local->gfid)) {
- dict = dict_new ();
- if (!dict)
- return -1;
+ return 0;
+ }
- ret = dict_set_static_bin (dict, "gfid-req", local->gfid, 16);
- if (ret)
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to set gfid in dict", loc->path);
- } else if (local->params) {
- /* Send the dictionary from higher layers directly */
- dict = dict_ref (local->params);
- }
- /* Set acls */
- if (local->xattr && dict)
- dht_selfheal_dir_mkdir_setacl (local->xattr, dict);
+ cnt = local->call_cnt = conf->subvolume_cnt;
- if (!dict)
- gf_log (this->name, GF_LOG_WARNING,
- "dict is NULL, need to make sure gfids are same");
+ for (i = 0; i < cnt; i++) {
+ STACK_WIND(frame, dht_selfheal_dir_setattr_cbk, layout->list[i].xlator,
+ layout->list[i].xlator->fops->setattr, loc, stbuf, valid,
+ NULL);
+ }
+
+ return 0;
+}
+static int
+dht_selfheal_dir_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ xlator_t *prev = NULL;
+ xlator_t *subvol = NULL;
+ int i = 0, ret = -1;
+ int this_call_cnt = 0;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+ layout = local->selfheal.layout;
+ prev = cookie;
+ subvol = prev;
+
+ if ((op_ret == 0) || ((op_ret == -1) && (op_errno == EEXIST))) {
for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].err == ENOENT || force) {
- gf_log (this->name, GF_LOG_DEBUG,
- "creating directory %s on subvol %s",
- loc->path, layout->list[i].xlator->name);
-
- STACK_WIND (frame, dht_selfheal_dir_mkdir_cbk,
- layout->list[i].xlator,
- layout->list[i].xlator->fops->mkdir,
- loc,
- st_mode_from_ia (local->stbuf.ia_prot,
- local->stbuf.ia_type),
- 0, dict);
- }
+ if (layout->list[i].xlator == subvol) {
+ layout->list[i].err = -1;
+ break;
+ }
}
+ }
+
+ if (op_ret) {
+ gf_uuid_unparse(local->loc.gfid, gfid);
+ gf_smsg(this->name,
+ ((op_errno == EEXIST) ? GF_LOG_DEBUG : GF_LOG_WARNING),
+ op_errno, DHT_MSG_DIR_SELFHEAL_FAILED, "path=%s",
+ local->loc.path, "gfid=%s", gfid, NULL);
+ goto out;
+ }
+ dht_iatt_merge(this, &local->preparent, preparent);
+ dht_iatt_merge(this, &local->postparent, postparent);
+ ret = 0;
- if (dict)
- dict_unref (dict);
+out:
+ this_call_cnt = dht_frame_return(frame);
- return 0;
-}
+ if (is_last_call(this_call_cnt)) {
+ dht_selfheal_dir_finish(frame, this, ret, 0);
+ dht_selfheal_dir_setattr(frame, &local->loc, &local->stbuf, 0xffffff,
+ layout);
+ }
+ return 0;
+}
-int
-dht_selfheal_layout_alloc_start (xlator_t *this, loc_t *loc,
- dht_layout_t *layout)
+static int
+dht_selfheal_dir_mkdir_lookup_done(call_frame_t *frame, xlator_t *this)
{
- int start = 0;
- uint32_t hashval = 0;
- int ret = 0;
+ dht_local_t *local = NULL;
+ int i = 0;
+ dict_t *dict = NULL;
+ dht_layout_t *layout = NULL;
+ loc_t *loc = NULL;
+ int cnt = 0;
+ int ret = -1;
+
+ VALIDATE_OR_GOTO(this->private, err);
+
+ local = frame->local;
+ layout = local->layout;
+ loc = &local->loc;
+
+ if (!gf_uuid_is_null(local->gfid)) {
+ dict = dict_new();
+ if (!dict)
+ return -1;
- ret = dht_hash_compute (layout->type, loc->path, &hashval);
- if (ret == 0) {
- start = (hashval % layout->cnt);
+ ret = dict_set_gfuuid(dict, "gfid-req", local->gfid, true);
+ if (ret)
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", loc->path, "key=gfid-req", NULL);
+ } else if (local->params) {
+ /* Send the dictionary from higher layers directly */
+
+ dict = dict_ref(local->params);
+ }
+ /* Code to update all extended attributed from local->xattr
+ to dict
+ */
+ dht_dir_set_heal_xattr(this, local, dict, local->xattr, NULL, NULL);
+
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_IS_NULL, NULL);
+ dict = dict_new();
+ if (!dict)
+ return -1;
+ }
+ ret = dict_set_flag(dict, GF_INTERNAL_CTX_KEY, GF_DHT_HEAL_DIR);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, "key=%s",
+ GF_INTERNAL_CTX_KEY, "path=%s", loc->path, NULL);
+ /* We can still continue. As heal can still happen
+ * unless quota limits have reached for the dir.
+ */
+ }
+
+ cnt = layout->cnt;
+ for (i = 0; i < cnt; i++) {
+ if (layout->list[i].err == ESTALE || layout->list[i].err == ENOENT ||
+ local->selfheal.force_mkdir) {
+ gf_msg_debug(this->name, 0, "Creating directory %s on subvol %s",
+ loc->path, layout->list[i].xlator->name);
+
+ STACK_WIND_COOKIE(
+ frame, dht_selfheal_dir_mkdir_cbk, layout->list[i].xlator,
+ layout->list[i].xlator, layout->list[i].xlator->fops->mkdir,
+ loc,
+ st_mode_from_ia(local->stbuf.ia_prot, local->stbuf.ia_type), 0,
+ dict);
}
+ }
- return start;
+ if (dict)
+ dict_unref(dict);
+
+ return 0;
+
+err:
+ dht_selfheal_dir_finish(frame, this, -1, 1);
+ return 0;
}
-static inline int
-dht_get_layout_count (xlator_t *this, dht_layout_t *layout, int new_layout)
+static int
+dht_selfheal_dir_mkdir_lookup_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf,
+ dict_t *xattr, struct iatt *postparent)
{
- int i = 0;
- int j = 0;
- int err = 0;
- int count = 0;
- dht_conf_t *conf = NULL;
-
- /* Gets in use only for replace-brick, remove-brick */
- conf = this->private;
- for (i = 0; i < layout->cnt; i++) {
- for (j = 0; j < conf->subvolume_cnt; j++) {
- if (conf->decommissioned_bricks[j] &&
- conf->decommissioned_bricks[j] == layout->list[i].xlator) {
- layout->list[i].err = -EINVAL;
- break;
- }
- }
+ dht_local_t *local = NULL;
+ int i = 0;
+ int this_call_cnt = 0;
+ int missing_dirs = 0;
+ dht_layout_t *layout = NULL;
+ xlator_t *prev = 0;
+ loc_t *loc = NULL;
+ char gfid_local[GF_UUID_BUF_SIZE] = {0};
+ int index = -1;
+
+ VALIDATE_OR_GOTO(this->private, err);
+
+ local = frame->local;
+ layout = local->layout;
+ loc = &local->loc;
+ prev = cookie;
+
+ if (!gf_uuid_is_null(local->gfid))
+ gf_uuid_unparse(local->gfid, gfid_local);
+
+ LOCK(&frame->lock);
+ {
+ index = dht_layout_index_for_subvol(layout, prev);
+ if ((op_ret < 0) && (op_errno == ENOENT || op_errno == ESTALE)) {
+ local->selfheal.hole_cnt = !local->selfheal.hole_cnt
+ ? 1
+ : local->selfheal.hole_cnt + 1;
+ /* the status might have changed. Update the layout with the
+ * new status
+ */
+ if (index >= 0) {
+ layout->list[index].err = op_errno;
+ }
}
- for (i = 0; i < layout->cnt; i++) {
- err = layout->list[i].err;
- if (err == -1 || err == 0) {
- layout->list[i].err = -1;
- count++;
- }
+ if (!op_ret) {
+ dht_iatt_merge(this, &local->stbuf, stbuf);
+ if (prev == local->mds_subvol) {
+ dict_unref(local->xattr);
+ local->xattr = dict_ref(xattr);
+ }
+ /* the status might have changed. Update the layout with the
+ * new status
+ */
+ if (index >= 0) {
+ layout->list[index].err = -1;
+ }
}
+ }
+ UNLOCK(&frame->lock);
+
+ this_call_cnt = dht_frame_return(frame);
+
+ if (is_last_call(this_call_cnt)) {
+ if (local->selfheal.hole_cnt == layout->cnt) {
+ gf_msg_debug(this->name, op_errno,
+ "Lookup failed, an rmdir could have "
+ "deleted this entry %s",
+ loc->name);
+ local->op_errno = op_errno;
+ goto err;
+ } else {
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err == ENOENT ||
+ layout->list[i].err == ESTALE ||
+ local->selfheal.force_mkdir)
+ missing_dirs++;
+ }
+
+ if (missing_dirs == 0) {
+ dht_selfheal_dir_finish(frame, this, 0, 0);
+ dht_selfheal_dir_setattr(frame, loc, &local->stbuf, 0xffffffff,
+ layout);
+ return 0;
+ }
- /* no subvolume has enough space, but can't stop directory creation */
- if (!count || !new_layout) {
- for (i = 0; i < layout->cnt; i++) {
- err = layout->list[i].err;
- if (err == ENOSPC) {
- layout->list[i].err = -1;
- count++;
- }
- }
+ local->call_cnt = missing_dirs;
+ dht_selfheal_dir_mkdir_lookup_done(frame, this);
}
+ }
- count = ((layout->spread_cnt) ? layout->spread_cnt :
- ((count) ? count : 1));
+ return 0;
- return count;
+err:
+ dht_selfheal_dir_finish(frame, this, -1, 1);
+ return 0;
}
-
-dht_layout_t *
-dht_fix_layout_of_directory (call_frame_t *frame, loc_t *loc,
- dht_layout_t *layout)
+static int
+dht_selfheal_dir_mkdir_lock_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
{
- uint32_t chunk = 0;
- uint32_t start = 0;
- uint32_t stop = 0;
- uint32_t overlap = 0;
- uint32_t max_overlap = 0;
- uint32_t chunk_begin = 0;
- int count = 0;
- int cnt = 0;
- int i = 0;
- int j = 0;
- int k = 0;
- int loop_cnt = 0;
- int start_subvol = 0;
- int *fix_array = NULL;
- xlator_t *this = NULL;
- dht_layout_t *new_layout = NULL;
- dht_conf_t *priv = NULL;
- dht_local_t *local = NULL;
- uint32_t subvol_down = 0;
- int ret = 0;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int i = 0;
+ int ret = -1;
+ xlator_t *mds_subvol = NULL;
- this = frame->this;
- priv = this->private;
- local = frame->local;
+ VALIDATE_OR_GOTO(this->private, err);
- count = cnt = dht_get_layout_count (this, layout, 0);
+ conf = this->private;
+ local = frame->local;
+ mds_subvol = local->mds_subvol;
- chunk = ((unsigned long) 0xffffffff) / ((cnt) ? cnt : 1);
+ local->call_cnt = conf->subvolume_cnt;
- start_subvol = dht_selfheal_layout_alloc_start (this, loc, layout);
+ if (op_ret < 0) {
+ if (op_errno == EINVAL) {
+ local->call_cnt = 1;
+ dht_selfheal_dir_mkdir_lookup_done(frame, this);
+ return 0;
+ }
- fix_array = GF_CALLOC (sizeof (int), layout->cnt, gf_common_mt_char);
- if (!fix_array) {
- /* No fix, use the existing layout itself */
- goto done;
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_ENTRYLK_ERROR,
+ "path=%s", local->loc.path, NULL);
+
+ local->op_errno = op_errno;
+ goto err;
+ }
+
+ /* After getting locks, perform lookup again to ensure that the
+ directory was not deleted by a racing rmdir
+ */
+ if (!local->xattr_req)
+ local->xattr_req = dict_new();
+
+ ret = dict_set_int32(local->xattr_req, "list-xattr", 1);
+ if (ret)
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, "path=%s",
+ local->loc.path, NULL);
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (mds_subvol && conf->subvolumes[i] == mds_subvol) {
+ STACK_WIND_COOKIE(frame, dht_selfheal_dir_mkdir_lookup_cbk,
+ conf->subvolumes[i], conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup, &local->loc,
+ local->xattr_req);
+ } else {
+ STACK_WIND_COOKIE(frame, dht_selfheal_dir_mkdir_lookup_cbk,
+ conf->subvolumes[i], conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup, &local->loc,
+ NULL);
}
+ }
- new_layout = dht_layout_new (this, priv->subvolume_cnt);
- if (!new_layout)
- goto done;
+ return 0;
- /* If a subvolume is down, do not re-write the layout. */
- ret = dht_layout_anomalies (this, loc, layout, NULL, NULL, NULL,
- &subvol_down, NULL, NULL);
+err:
+ dht_selfheal_dir_finish(frame, this, -1, 1);
+ return 0;
+}
- if (subvol_down || (ret == -1)) {
- gf_log (this->name, GF_LOG_WARNING, "%u subvolume(s) are down"
- ". Skipping fix layout.", subvol_down);
- GF_FREE (new_layout);
- return NULL;
+static int
+dht_selfheal_dir_mkdir(call_frame_t *frame, loc_t *loc, dht_layout_t *layout,
+ int force)
+{
+ int missing_dirs = 0;
+ int i = 0;
+ int op_errno = 0;
+ int ret = -1;
+ dht_local_t *local = NULL;
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+
+ local = frame->local;
+ this = frame->this;
+ conf = this->private;
+
+ local->selfheal.force_mkdir = force;
+ local->selfheal.hole_cnt = 0;
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err == ENOENT || force)
+ missing_dirs++;
+ }
+
+ if (missing_dirs == 0) {
+ /* We don't need to create any directories. Proceed to heal the
+ * attrs and xattrs
+ */
+ if (!__is_root_gfid(local->stbuf.ia_gfid)) {
+ if (local->need_xattr_heal) {
+ local->need_xattr_heal = 0;
+ ret = dht_dir_xattr_heal(this, local, &op_errno);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_DIR_XATTR_HEAL_FAILED, "path=%s",
+ local->loc.path, "gfid=%s", local->gfid, NULL);
+ }
+ } else {
+ if (!gf_uuid_is_null(local->gfid))
+ gf_uuid_copy(loc->gfid, local->gfid);
+
+ ret = dht_common_mark_mdsxattr(frame, NULL, 0);
+ if (!ret)
+ return 0;
+
+ gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_SET_XATTR_FAILED,
+ "path=%s", local->loc.path, "gfid=%s", local->gfid,
+ NULL);
+ }
+ }
+ dht_selfheal_dir_setattr(frame, loc, &local->stbuf, 0xffffffff, layout);
+ return 0;
+ }
+
+ /* MDS xattr is populated only while DHT is having more than one
+ subvol.In case of graph switch while adding more dht subvols need to
+ consider hash subvol as a MDS to avoid MDS check failure at the time
+ of running fop on directory
+ */
+ if (!dict_get(local->xattr, conf->mds_xattr_key) &&
+ (conf->subvolume_cnt > 1)) {
+ if (local->hashed_subvol == NULL) {
+ local->hashed_subvol = dht_subvol_get_hashed(this, loc);
+ if (local->hashed_subvol == NULL) {
+ local->op_errno = EINVAL;
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED, "gfid=%s",
+ loc->pargfid, "name=%s", loc->name, "path=%s",
+ loc->path, NULL);
+ goto err;
+ }
+ }
+ ret = dht_inode_ctx_mdsvol_set(local->inode, this,
+ local->hashed_subvol);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SET_INODE_CTX_FAILED,
+ "Failed to set hashed subvol for %s on inode vol is %s",
+ local->loc.path,
+ local->hashed_subvol ? local->hashed_subvol->name : "NULL");
+ goto err;
+ }
+ }
+
+ if (local->hashed_subvol == NULL) {
+ local->hashed_subvol = dht_subvol_get_hashed(this, loc);
+ if (local->hashed_subvol == NULL) {
+ local->op_errno = EINVAL;
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED, "gfid=%s", loc->pargfid,
+ "name=%s", loc->name, "path=%s", loc->path, NULL);
+ goto err;
}
+ }
+
+ local->current = &local->lock[0];
+ ret = dht_protect_namespace(frame, loc, local->hashed_subvol,
+ &local->current->ns,
+ dht_selfheal_dir_mkdir_lock_cbk);
+
+ if (ret < 0)
+ goto err;
+
+ return 0;
+err:
+ return -1;
+}
- for (i = 0; i < new_layout->cnt; i++) {
- /* TODO: fix this in layout_alloc() itself */
- new_layout->list[i].err = -ENOENT;
- if (i < layout->cnt)
- new_layout->list[i].xlator = layout->list[i].xlator;
+static int
+dht_selfheal_layout_alloc_start(xlator_t *this, loc_t *loc,
+ dht_layout_t *layout)
+{
+ int start = 0;
+ uint32_t hashval = 0;
+ int ret = 0;
+ const char *str = NULL;
+ dht_conf_t *conf = NULL;
+ char buf[UUID_CANONICAL_FORM_LEN + 1] = {
+ 0,
+ };
+
+ conf = this->private;
+
+ if (conf->randomize_by_gfid) {
+ str = uuid_utoa_r(loc->gfid, buf);
+ } else {
+ str = loc->path;
+ }
+
+ ret = dht_hash_compute(this, layout->type, str, &hashval);
+ if (ret == 0) {
+ start = (hashval % layout->cnt);
+ }
+
+ return start;
+}
+
+static int
+dht_get_layout_count(xlator_t *this, dht_layout_t *layout, int new_layout)
+{
+ int i = 0;
+ int j = 0;
+ int err = 0;
+ int count = 0;
+ dht_conf_t *conf = NULL;
+
+ /* Gets in use only for replace-brick, remove-brick */
+ conf = this->private;
+ for (i = 0; i < layout->cnt; i++) {
+ for (j = 0; j < conf->subvolume_cnt; j++) {
+ if (conf->decommissioned_bricks[j] &&
+ conf->decommissioned_bricks[j] == layout->list[i].xlator) {
+ layout->list[i].err = EINVAL;
+ break;
+ }
+ }
+ }
+
+ for (i = 0; i < layout->cnt; i++) {
+ err = layout->list[i].err;
+ if (err == -1 || err == 0 || err == ENOENT) {
+ /* Take this with a pinch of salt. The behaviour seems
+ * to be slightly different when this function is
+ * invoked from mkdir codepath. For eg., err == 0 in
+ * mkdir codepath means directory created but xattr
+ * is not set yet.
+ */
+
+ /* Setting list[i].err = -1 is an indication for
+ dht_selfheal_layout_new_directory() to assign
+ a range. We set it to -1 based on any one of
+ the three criteria:
+
+ - err == -1 already, which means directory
+ existed but layout was not set on it.
+
+ - err == 0, which means directory exists and
+ has an old layout piece which will be
+ overwritten now.
+
+ - err == ENOENT, which means directory does
+ not exist (possibly racing with mkdir or
+ finishing half done mkdir). The missing
+ directory will be attempted to be recreated.
+ */
+ count++;
+ if (!err)
+ layout->list[i].err = -1;
}
+ }
- /* Check if there are any overlap in layout, and give the proper fix */
+ /* no subvolume has enough space, but can't stop directory creation */
+ if (!count || !new_layout) {
for (i = 0; i < layout->cnt; i++) {
- /* No need to fix if 'err' is not '-1' */
- if (layout->list[i].err != -1)
- continue;
-
- /* If already existing layout is having no range, skip it */
- start = layout->list[i].start;
- stop = layout->list[i].stop;
- if ((stop - start) == 0)
- continue;
-
- max_overlap = 0;
-
- /* 'j' is used as starting point of each chunk */
- for (j = 1; j <= count; j++) {
- /* if chunk is already used, don't use it again */
- for (k = 0; k < i; k++)
- if (j == fix_array[k])
- break;
- if (k < i)
- continue;
-
- overlap = dht_find_overlap (i, (j-1), start, stop, chunk);
- if (max_overlap < overlap) {
- max_overlap = overlap;
- fix_array[i] = j;
- }
- }
+ err = layout->list[i].err;
+ if (err == ENOSPC) {
+ layout->list[i].err = -1;
+ count++;
+ }
+ }
+ }
- /* If we have any overlap, then use that itself as new
- layout for the subvolume */
- if (fix_array[i]) {
- chunk_begin = chunk * (fix_array[i] - 1);
- new_layout->list[i].err = -1;
- DHT_SET_LAYOUT_RANGE (new_layout, i, chunk_begin,
- chunk, cnt, loc->path);
- /* make sure to give (max - 1) as 'stop' range,
- if it is last chunk */
- if (fix_array[i] == count)
- new_layout->list[i].stop = 0xffffffff;
- if (--cnt == 0)
- goto done;
+ /* if layout->spread_cnt is set, check if it is <= available
+ * subvolumes (down brick and decommissioned bricks are considered
+ * un-available). Else return count (available up bricks) */
+ count = ((layout->spread_cnt && (layout->spread_cnt <= count))
+ ? layout->spread_cnt
+ : ((count) ? count : 1));
- }
- }
+ return count;
+}
- /* Now, look for layouts which are not having any overlaps
- and give it a fix */
- for (loop_cnt = 0, i = start_subvol; loop_cnt < new_layout->cnt;
- i++, loop_cnt++) {
- if (i == new_layout->cnt)
- i = 0;
+void
+dht_selfheal_layout_new_directory(call_frame_t *frame, loc_t *loc,
+ dht_layout_t *new_layout);
- /* If 'fix_array[i]' is set, the layout is already fixed. */
- if (fix_array[i])
- continue;
+void
+dht_layout_range_swap(dht_layout_t *layout, int i, int j);
- if (layout->list[i].err != -1) {
- new_layout->list[i].err = layout->list[i].err;
- continue;
- }
+/*
+ * It's a bit icky using local variables in a macro, but it makes the rest
+ * of the code a lot clearer.
+ */
+#define OV_ENTRY(x, y) table[x * new->cnt + y]
+
+static void
+dht_selfheal_layout_maximize_overlap(call_frame_t *frame, loc_t *loc,
+ dht_layout_t *new, dht_layout_t *old)
+{
+ int i = 0;
+ int j = 0;
+ uint32_t curr_overlap = 0;
+ uint32_t max_overlap = 0;
+ int max_overlap_idx = -1;
+ uint32_t overlap = 0;
+ uint32_t *table = NULL;
+
+ dht_layout_sort_volname(old);
+ /* Now both old_layout->list[] and new_layout->list[]
+ are match the same xlators/subvolumes. i.e,
+ old_layout->[i] and new_layout->[i] are referring
+ to the same subvolumes
+ */
+
+ /* Build a table of overlaps between new[i] and old[j]. */
+ table = alloca(sizeof(overlap) * old->cnt * new->cnt);
+ if (!table) {
+ return;
+ }
+ memset(table, 0, sizeof(overlap) * old->cnt * new->cnt);
+ for (i = 0; i < new->cnt; ++i) {
+ for (j = 0; j < old->cnt; ++j) {
+ OV_ENTRY(i, j) = dht_overlap_calc(old, j, new, i);
+ }
+ }
+
+ for (i = 0; i < new->cnt; i++) {
+ if (new->list[i].err > 0) {
+ /* Subvol might be marked for decommission
+ with EINVAL, or some other serious error
+ marked with positive errno.
+ */
+ continue;
+ }
- for (k = 1; k <= count; k++) {
- for (j = 0; j < new_layout->cnt; j++) {
- if (k == fix_array[j])
- break;
- }
- /* Didn't find any of the list begining with 'k' */
- if (j == new_layout->cnt)
- break;
+ max_overlap = 0;
+ max_overlap_idx = i;
+ for (j = (i + 1); j < new->cnt; ++j) {
+ if (new->list[j].err > 0) {
+ /* Subvol might be marked for decommission
+ with EINVAL, or some other serious error
+ marked with positive errno.
+ */
+ continue;
+ }
+ /* Calculate the overlap now. */
+ curr_overlap = OV_ENTRY(i, i) + OV_ENTRY(j, j);
+ /* Calculate the overlap after the proposed swap. */
+ overlap = OV_ENTRY(i, j) + OV_ENTRY(j, i);
+ /* Are we better than status quo? */
+ if (overlap > curr_overlap) {
+ overlap -= curr_overlap;
+ /* Are we better than the previous choice? */
+ if (overlap > max_overlap) {
+ max_overlap = overlap;
+ max_overlap_idx = j;
}
+ }
+ }
- fix_array[i] = k;
- chunk_begin = (k - 1) * chunk;
- new_layout->list[i].err = -1;
- DHT_SET_LAYOUT_RANGE (new_layout, i, chunk_begin, chunk, cnt,
- loc->path);
- /* make sure to give (max - 1) as 'stop' range,
- if it is last chunk */
- if (k == count)
- new_layout->list[i].stop = 0xffffffff;
- if (--cnt == 0)
- goto done;
+ if (max_overlap_idx != i) {
+ dht_layout_range_swap(new, i, max_overlap_idx);
+ /* Need to swap the table values too. */
+ for (j = 0; j < old->cnt; ++j) {
+ overlap = OV_ENTRY(i, j);
+ OV_ENTRY(i, j) = OV_ENTRY(max_overlap_idx, j);
+ OV_ENTRY(max_overlap_idx, j) = overlap;
+ }
}
+ }
+}
+static dht_layout_t *
+dht_fix_layout_of_directory(call_frame_t *frame, loc_t *loc,
+ dht_layout_t *layout)
+{
+ int i = 0;
+ xlator_t *this = NULL;
+ dht_layout_t *new_layout = NULL;
+ dht_conf_t *priv = NULL;
+ dht_local_t *local = NULL;
+ uint32_t subvol_down = 0;
+ gf_boolean_t maximize_overlap = _gf_true;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ this = frame->this;
+ priv = this->private;
+ local = frame->local;
+
+ if (layout->type == DHT_HASH_TYPE_DM_USER) {
+ gf_msg_debug(THIS->name, 0, "leaving %s alone", loc->path);
+ goto done;
+ }
+
+ new_layout = dht_layout_new(this, priv->subvolume_cnt);
+ if (!new_layout) {
+ gf_uuid_unparse(loc->gfid, gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_MEM_ALLOC_FAILED,
+ "new_layout, path=%s", loc->path, "gfid=%s", gfid, NULL);
+ goto done;
+ }
+
+ /* If a subvolume is down, do not re-write the layout. */
+ dht_layout_anomalies(this, loc, layout, NULL, NULL, NULL, &subvol_down,
+ NULL, NULL);
+
+ if (subvol_down) {
+ gf_uuid_unparse(loc->gfid, gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_FIX_FAILED,
+ "subvol-down=%u", subvol_down, "Skipping-fix-layout", "path=%s",
+ loc->path, "gfid=%s", gfid, NULL);
+ GF_FREE(new_layout);
+ return NULL;
+ }
+
+ for (i = 0; i < new_layout->cnt; i++) {
+ if (layout->list[i].err != ENOSPC)
+ new_layout->list[i].err = layout->list[i].err;
+ else
+ new_layout->list[i].err = -1;
+
+ new_layout->list[i].xlator = layout->list[i].xlator;
+ }
+
+ new_layout->commit_hash = layout->commit_hash;
+
+ if (priv->du_stats) {
+ for (i = 0; i < priv->subvolume_cnt; ++i) {
+ gf_smsg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_SUBVOL_INFO,
+ "index=%d", i, "name=%s", priv->subvolumes[i]->name,
+ "chunks=%u", priv->du_stats[i].chunks, "path=%s", loc->path,
+ NULL);
+
+ /* Maximize overlap if the bricks are all the same
+ * size.
+ * This is probably not going to be very common on
+ * live setups but will benefit our regression tests
+ */
+ if (i && (priv->du_stats[i].chunks != priv->du_stats[0].chunks)) {
+ maximize_overlap = _gf_false;
+ }
+ }
+ } else {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_NO_DISK_USAGE_STATUS,
+ NULL);
+ }
+
+ /* First give it a layout as though it is a new directory. This
+ ensures rotation to kick in */
+ dht_layout_sort_volname(new_layout);
+ dht_selfheal_layout_new_directory(frame, loc, new_layout);
+
+ /* Maximize overlap if weighted-rebalance is disabled */
+ if (!priv->do_weighting)
+ maximize_overlap = _gf_true;
+
+ /* Now selectively re-assign ranges only when it helps */
+ if (maximize_overlap) {
+ dht_selfheal_layout_maximize_overlap(frame, loc, new_layout, layout);
+ }
done:
- if (new_layout) {
- /* Now that the new layout has all the proper layout, change the
- inode context */
- dht_layout_set (this, loc->inode, new_layout);
+ if (new_layout) {
+ /* Make sure the extra 'ref' for existing layout is removed */
+ dht_layout_unref(this, local->layout);
- /* Make sure the extra 'ref' for existing layout is removed */
- dht_layout_unref (this, local->layout);
+ local->layout = new_layout;
+ }
- local->layout = new_layout;
- }
+ return local->layout;
+}
- if (fix_array)
- GF_FREE (fix_array);
+/*
+ * Having to call this 2x for each entry in the layout is pretty horrible, but
+ * that's what all of this layout-sorting nonsense gets us.
+ */
+static uint32_t
+dht_get_chunks_from_xl(xlator_t *parent, xlator_t *child)
+{
+ dht_conf_t *priv = parent->private;
+ xlator_list_t *trav;
+ uint32_t index = 0;
- return new_layout;
-}
+ if (!priv->du_stats) {
+ return 0;
+ }
+
+ for (trav = parent->children; trav; trav = trav->next) {
+ if (trav->xlator == child) {
+ return priv->du_stats[index].chunks;
+ }
+ ++index;
+ }
+ return 0;
+}
void
-dht_selfheal_layout_new_directory (call_frame_t *frame, loc_t *loc,
- dht_layout_t *layout)
-{
- xlator_t *this = NULL;
- uint32_t chunk = 0;
- int i = 0;
- uint32_t start = 0;
- int cnt = 0;
- int err = 0;
- int start_subvol = 0;
-
- this = frame->this;
-
- cnt = dht_get_layout_count (this, layout, 1);
-
- chunk = ((unsigned long) 0xffffffff) / ((cnt) ? cnt : 1);
-
- start_subvol = dht_selfheal_layout_alloc_start (this, loc, layout);
-
- for (i = start_subvol; i < layout->cnt; i++) {
- err = layout->list[i].err;
- if (err == -1) {
- DHT_SET_LAYOUT_RANGE(layout, i, start, chunk,
- cnt, loc->path);
- if (--cnt == 0) {
- layout->list[i].stop = 0xffffffff;
- goto done;
- }
- start += chunk;
- }
+dht_selfheal_layout_new_directory(call_frame_t *frame, loc_t *loc,
+ dht_layout_t *layout)
+{
+ xlator_t *this = NULL;
+ double chunk = 0;
+ int i = 0;
+ uint32_t start = 0;
+ int bricks_to_use = 0;
+ int err = 0;
+ int start_subvol = 0;
+ uint32_t curr_size;
+ uint32_t range_size;
+ uint64_t total_size = 0;
+ int real_i;
+ dht_conf_t *priv;
+ gf_boolean_t weight_by_size;
+ int bricks_used = 0;
+
+ this = frame->this;
+ priv = this->private;
+ weight_by_size = priv->do_weighting;
+
+ bricks_to_use = dht_get_layout_count(this, layout, 1);
+ GF_ASSERT(bricks_to_use > 0);
+
+ bricks_used = 0;
+ for (i = 0; i < layout->cnt; ++i) {
+ err = layout->list[i].err;
+ if ((err != -1) && (err != ENOENT)) {
+ continue;
}
-
- for (i = 0; i < start_subvol; i++) {
- err = layout->list[i].err;
- if (err == -1) {
- DHT_SET_LAYOUT_RANGE(layout, i, start, chunk,
- cnt, loc->path);
- if (--cnt == 0) {
- layout->list[i].stop = 0xffffffff;
- goto done;
- }
- start += chunk;
- }
+ curr_size = dht_get_chunks_from_xl(this, layout->list[i].xlator);
+ if (!curr_size) {
+ weight_by_size = _gf_false;
+ break;
}
+ total_size += curr_size;
+ if (++bricks_used >= bricks_to_use) {
+ break;
+ }
+ }
+
+ if (weight_by_size && total_size) {
+ /* We know total_size is not zero. */
+ chunk = ((double)0xffffffff) / ((double)total_size);
+ gf_msg_debug(this->name, 0,
+ "chunk size = 0xffffffff / %" PRIu64 " = %f", total_size,
+ chunk);
+ } else {
+ weight_by_size = _gf_false;
+ chunk = ((unsigned long)0xffffffff) / bricks_to_use;
+ }
+
+ start_subvol = dht_selfheal_layout_alloc_start(this, loc, layout);
+
+ /* clear out the range, as we are re-computing here */
+ DHT_RESET_LAYOUT_RANGE(layout);
+
+ /*
+ * OK, what's this "real_i" stuff about? This used to be two loops -
+ * from start_subvol to layout->cnt-1, then from 0 to start_subvol-1.
+ * That way is practically an open invitation to bugs when only one
+ * of the loops is updated. Using real_i and modulo operators to make
+ * it one loop avoids this problem. Remember, folks: it's everyone's
+ * responsibility to help stamp out copy/paste abuse.
+ */
+ bricks_used = 0;
+ for (real_i = 0; real_i < layout->cnt; real_i++) {
+ i = (real_i + start_subvol) % layout->cnt;
+ err = layout->list[i].err;
+ if ((err != -1) && (err != ENOENT)) {
+ continue;
+ }
+ if (weight_by_size) {
+ curr_size = dht_get_chunks_from_xl(this, layout->list[i].xlator);
+ if (!curr_size) {
+ continue;
+ }
+ } else {
+ curr_size = 1;
+ }
+ range_size = chunk * curr_size;
+ gf_msg_debug(this->name, 0, "assigning range size 0x%x to %s",
+ range_size, layout->list[i].xlator->name);
+ DHT_SET_LAYOUT_RANGE(layout, i, start, range_size, loc->path);
+ if (++bricks_used >= bricks_to_use) {
+ layout->list[i].stop = 0xffffffff;
+ goto done;
+ }
+ start += range_size;
+ }
done:
- return;
+ return;
+}
+
+static int
+dht_selfheal_dir_getafix(call_frame_t *frame, loc_t *loc, dht_layout_t *layout)
+{
+ dht_local_t *local = NULL;
+ uint32_t holes = 0;
+ int ret = -1;
+ int i = -1;
+ uint32_t overlaps = 0;
+
+ local = frame->local;
+
+ holes = local->selfheal.hole_cnt;
+ overlaps = local->selfheal.overlaps_cnt;
+
+ if (holes || overlaps) {
+ /* If the layout has anomalies which would change the hash
+ * ranges, then we need to reset the commit_hash for this
+ * directory, as the layout would change and things may not
+ * be in place as expected */
+ layout->commit_hash = DHT_LAYOUT_HASH_INVALID;
+ dht_selfheal_layout_new_directory(frame, loc, layout);
+ ret = 0;
+ }
+
+ for (i = 0; i < layout->cnt; i++) {
+ /* directory not present */
+ if (layout->list[i].err == ENOENT) {
+ ret = 0;
+ break;
+ }
+ }
+
+ /* TODO: give a fix to these non-virgins */
+
+ return ret;
}
int
-dht_selfheal_dir_getafix (call_frame_t *frame, loc_t *loc,
- dht_layout_t *layout)
+dht_selfheal_new_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
+ dht_layout_t *layout)
{
- dht_local_t *local = NULL;
- uint32_t holes = 0;
- int ret = -1;
- int i = -1;
- uint32_t overlaps = 0;
+ dht_local_t *local = NULL;
+ int ret = 0;
+ inode_t *linked_inode = NULL, *inode = NULL;
+ loc_t *loc = NULL;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+ int32_t op_errno = EIO;
- local = frame->local;
+ local = frame->local;
- holes = local->selfheal.hole_cnt;
- overlaps = local->selfheal.overlaps_cnt;
+ loc = &local->loc;
- if (holes || overlaps) {
- dht_selfheal_layout_new_directory (frame, loc, layout);
- ret = 0;
- }
+ gf_uuid_unparse(local->stbuf.ia_gfid, gfid);
+ gf_uuid_unparse(loc->parent->gfid, pgfid);
- for (i = 0; i < layout->cnt; i++) {
- /* directory not present */
- if (layout->list[i].err == ENOENT) {
- ret = 0;
- break;
- }
- }
+ linked_inode = inode_link(loc->inode, loc->parent, loc->name,
+ &local->stbuf);
+ if (!linked_inode) {
+ gf_smsg(frame->this->name, GF_LOG_WARNING, 0, DHT_MSG_LINK_INODE_FAILED,
+ "pgfid=%s", pgfid, "name=%s", loc->name, "gfid=%s", gfid, NULL);
+ ret = -1;
+ goto out;
+ }
- /* TODO: give a fix to these non-virgins */
+ inode = loc->inode;
+ loc->inode = linked_inode;
+ inode_unref(inode);
- return ret;
+ local->selfheal.dir_cbk = dir_cbk;
+ local->selfheal.layout = dht_layout_ref(frame->this, layout);
+
+ dht_layout_sort_volname(layout);
+ dht_selfheal_layout_new_directory(frame, &local->loc, layout);
+
+ op_errno = ENOMEM;
+ ret = dht_selfheal_layout_lock(frame, layout, _gf_true,
+ dht_selfheal_dir_xattr,
+ dht_should_heal_layout);
+
+out:
+ if (ret < 0) {
+ dir_cbk(frame, NULL, frame->this, -1, op_errno, NULL);
+ }
+
+ return 0;
}
int
-dht_selfheal_new_directory (call_frame_t *frame,
- dht_selfheal_dir_cbk_t dir_cbk,
- dht_layout_t *layout)
+dht_fix_directory_layout(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
+ dht_layout_t *layout)
{
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
+ dht_layout_t *tmp_layout = NULL;
+ int ret = 0;
- local = frame->local;
+ local = frame->local;
- local->selfheal.dir_cbk = dir_cbk;
- local->selfheal.layout = dht_layout_ref (frame->this, layout);
+ local->selfheal.dir_cbk = dir_cbk;
+ local->selfheal.layout = dht_layout_ref(frame->this, layout);
- dht_layout_sort_volname (layout);
- dht_selfheal_layout_new_directory (frame, &local->loc, layout);
- dht_selfheal_dir_xattr (frame, &local->loc, layout);
- return 0;
+ /* No layout sorting required here */
+ tmp_layout = dht_fix_layout_of_directory(frame, &local->loc, layout);
+ if (!tmp_layout) {
+ return -1;
+ }
+
+ ret = dht_selfheal_layout_lock(frame, tmp_layout, _gf_false,
+ dht_fix_dir_xattr, dht_should_fix_layout);
+
+ return ret;
+}
+
+int
+dht_selfheal_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
+ loc_t *loc, dht_layout_t *layout)
+{
+ dht_local_t *local = NULL;
+ xlator_t *this = NULL;
+ uint32_t down = 0;
+ uint32_t misc = 0;
+ int ret = 0;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+ inode_t *linked_inode = NULL, *inode = NULL;
+
+ local = frame->local;
+ this = frame->this;
+
+ local->selfheal.dir_cbk = dir_cbk;
+ local->selfheal.layout = dht_layout_ref(this, layout);
+
+ if (local->need_attrheal) {
+ if (__is_root_gfid(local->stbuf.ia_gfid)) {
+ local->stbuf.ia_gid = local->prebuf.ia_gid;
+ local->stbuf.ia_uid = local->prebuf.ia_uid;
+
+ local->stbuf.ia_ctime = local->prebuf.ia_ctime;
+ local->stbuf.ia_ctime_nsec = local->prebuf.ia_ctime_nsec;
+ local->stbuf.ia_prot = local->prebuf.ia_prot;
+
+ } else if (!IA_ISINVAL(local->mds_stbuf.ia_type)) {
+ local->stbuf = local->mds_stbuf;
+ }
+ }
+
+ if (!__is_root_gfid(local->stbuf.ia_gfid)) {
+ gf_uuid_unparse(local->stbuf.ia_gfid, gfid);
+ gf_uuid_unparse(loc->parent->gfid, pgfid);
+
+ linked_inode = inode_link(loc->inode, loc->parent, loc->name,
+ &local->stbuf);
+ if (!linked_inode) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LINK_INODE_FAILED,
+ "pgfid=%s", pgfid, "name=%s", loc->name, "gfid=%s", gfid,
+ NULL);
+ ret = 0;
+ goto sorry_no_fix;
+ }
+
+ inode = loc->inode;
+ loc->inode = linked_inode;
+ inode_unref(inode);
+ }
+
+ if (local->need_xattr_heal && (local->mds_xattr)) {
+ dht_dir_set_heal_xattr(this, local, local->xattr, local->mds_xattr,
+ NULL, NULL);
+ dict_unref(local->mds_xattr);
+ local->mds_xattr = NULL;
+ }
+
+ dht_layout_anomalies(this, loc, layout, &local->selfheal.hole_cnt,
+ &local->selfheal.overlaps_cnt,
+ &local->selfheal.missing_cnt, &local->selfheal.down,
+ &local->selfheal.misc, NULL);
+
+ down = local->selfheal.down;
+ misc = local->selfheal.misc;
+
+ if (down) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_SELFHEAL_FAILED,
+ "path=%s", loc->path, "subvol-down=%d", down, "Not-fixing",
+ "gfid=%s", gfid, NULL);
+ ret = 0;
+ goto sorry_no_fix;
+ }
+
+ if (misc) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_SELFHEAL_FAILED,
+ "path=%s", loc->path, "misc=%d", misc, "unrecoverable-errors",
+ "gfid=%s", gfid, NULL);
+
+ ret = 0;
+ goto sorry_no_fix;
+ }
+
+ dht_layout_sort_volname(layout);
+ local->heal_layout = _gf_true;
+
+ /* Ignore return value as it can be inferred from result of
+ * dht_layout_anomalies
+ */
+ dht_selfheal_dir_getafix(frame, loc, layout);
+
+ if (!(local->selfheal.hole_cnt || local->selfheal.overlaps_cnt ||
+ local->selfheal.missing_cnt)) {
+ local->heal_layout = _gf_false;
+ }
+
+ ret = dht_selfheal_dir_mkdir(frame, loc, layout, 0);
+ if (ret < 0) {
+ ret = 0;
+ goto sorry_no_fix;
+ }
+
+ return 0;
+
+sorry_no_fix:
+ /* TODO: need to put appropriate local->op_errno */
+ dht_selfheal_dir_finish(frame, this, ret, 1);
+
+ return 0;
}
int
-dht_fix_directory_layout (call_frame_t *frame,
- dht_selfheal_dir_cbk_t dir_cbk,
- dht_layout_t *layout)
+dht_selfheal_restore(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
+ loc_t *loc, dht_layout_t *layout)
{
- dht_local_t *local = NULL;
- dht_layout_t *tmp_layout = NULL;
+ int ret = 0;
+ dht_local_t *local = NULL;
+
+ local = frame->local;
- local = frame->local;
+ local->selfheal.dir_cbk = dir_cbk;
+ local->selfheal.layout = dht_layout_ref(frame->this, layout);
- local->selfheal.dir_cbk = dir_cbk;
- local->selfheal.layout = dht_layout_ref (frame->this, layout);
+ ret = dht_selfheal_dir_mkdir(frame, loc, layout, 1);
+
+ return ret;
+}
- /* No layout sorting required here */
- tmp_layout = dht_fix_layout_of_directory (frame, &local->loc, layout);
- if (!tmp_layout) {
- return -1;
+int
+dht_dir_heal_xattrs(void *data)
+{
+ call_frame_t *frame = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ xlator_t *mds_subvol = NULL;
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ dict_t *user_xattr = NULL;
+ dict_t *internal_xattr = NULL;
+ dict_t *mds_xattr = NULL;
+ dict_t *xdata = NULL;
+ int call_cnt = 0;
+ int ret = -1;
+ int uret = 0;
+ int uflag = 0;
+ int i = 0;
+ int xattr_hashed = 0;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+ int32_t allzero[1] = {0};
+
+ GF_VALIDATE_OR_GOTO("dht", data, out);
+
+ frame = data;
+ local = frame->local;
+ this = frame->this;
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, local, out);
+ mds_subvol = local->mds_subvol;
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, out);
+ gf_uuid_unparse(local->loc.gfid, gfid);
+
+ if (!mds_subvol) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_NO_MDS_SUBVOL, "path=%s",
+ local->loc.path, "gfid=%s", gfid, NULL);
+ goto out;
+ }
+
+ if ((local->loc.inode && gf_uuid_is_null(local->loc.inode->gfid)) ||
+ gf_uuid_is_null(local->loc.gfid)) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_GFID_NOT_PRESENT,
+ "skip-heal path=%s", local->loc.path, "gfid=%s", gfid, NULL);
+ goto out;
+ }
+
+ internal_xattr = dict_new();
+ if (!internal_xattr) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_CREATE_FAILED,
+ "dictionary", NULL);
+ goto out;
+ }
+ xdata = dict_new();
+ if (!xdata) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_CREATE_FAILED,
+ "dictionary", NULL);
+ goto out;
+ }
+
+ call_cnt = conf->subvolume_cnt;
+
+ user_xattr = dict_new();
+ if (!user_xattr) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_CREATE_FAILED,
+ "dictionary", NULL);
+ goto out;
+ }
+
+ ret = syncop_listxattr(local->mds_subvol, &local->loc, &mds_xattr, NULL,
+ NULL);
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LIST_XATTRS_FAILED,
+ "path=%s", local->loc.path, "name=%s", local->mds_subvol->name,
+ NULL);
+ }
+
+ if (!mds_xattr)
+ goto out;
+
+ dht_dir_set_heal_xattr(this, local, user_xattr, mds_xattr, &uret, &uflag);
+
+ /* To set quota related xattr need to set GLUSTERFS_INTERNAL_FOP_KEY
+ * key value to 1
+ */
+ if (dict_get(user_xattr, QUOTA_LIMIT_KEY) ||
+ dict_get(user_xattr, QUOTA_LIMIT_OBJECTS_KEY)) {
+ ret = dict_set_int32(xdata, GLUSTERFS_INTERNAL_FOP_KEY, 1);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "key=%s", GLUSTERFS_INTERNAL_FOP_KEY, "path=%s",
+ local->loc.path, NULL);
+ goto out;
+ }
+ }
+ if (uret <= 0 && !uflag)
+ goto out;
+
+ for (i = 0; i < call_cnt; i++) {
+ subvol = conf->subvolumes[i];
+ if (subvol == mds_subvol)
+ continue;
+ if (uret || uflag) {
+ /* Custom xattr heal is required - let posix handle it */
+ ret = dict_set_int8(xdata, "sync_backend_xattrs", _gf_true);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", local->loc.path, "key=%s",
+ "sync_backend_xattrs", NULL);
+ goto out;
+ }
+
+ ret = syncop_setxattr(subvol, &local->loc, user_xattr, 0, xdata,
+ NULL);
+ if (ret) {
+ xattr_hashed = 1;
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_DIR_XATTR_HEAL_FAILED,
+ "set-user-xattr-failed path=%s", local->loc.path,
+ "subvol=%s", subvol->name, "gfid=%s", gfid, NULL);
+ } else {
+ dict_del(xdata, "sync_backend_xattrs");
+ }
+ }
+ }
+ /* After heal all custom xattr reset internal MDS xattr to 0 */
+ if (!xattr_hashed) {
+ ret = dht_dict_set_array(internal_xattr, conf->mds_xattr_key, allzero,
+ 1);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_WARNING, ENOMEM, DHT_MSG_DICT_SET_FAILED,
+ "key=%s", conf->mds_xattr_key, "path=%s", local->loc.path,
+ NULL);
+ goto out;
+ }
+ ret = syncop_setxattr(mds_subvol, &local->loc, internal_xattr, 0, NULL,
+ NULL);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_DIR_XATTR_HEAL_FAILED, "path=%s", local->loc.path,
+ "subvol=%s", mds_subvol->name, "gfid=%s", gfid, NULL);
}
- dht_fix_dir_xattr (frame, &local->loc, tmp_layout);
+ }
- return 0;
+out:
+ if (user_xattr)
+ dict_unref(user_xattr);
+ if (mds_xattr)
+ dict_unref(mds_xattr);
+ if (internal_xattr)
+ dict_unref(internal_xattr);
+ if (xdata)
+ dict_unref(xdata);
+ return 0;
}
+int
+dht_dir_heal_xattrs_done(int ret, call_frame_t *sync_frame, void *data)
+{
+ DHT_STACK_DESTROY(sync_frame);
+ return 0;
+}
int
-dht_selfheal_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
- loc_t *loc, dht_layout_t *layout)
+dht_dir_attr_heal(void *data)
{
- dht_local_t *local = NULL;
- uint32_t down = 0;
- uint32_t misc = 0;
- int ret = 0;
- xlator_t *this = NULL;
+ call_frame_t *frame = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ xlator_t *mds_subvol = NULL;
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ int call_cnt = 0;
+ int ret = -1;
+ int i = 0;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ GF_VALIDATE_OR_GOTO("dht", data, out);
+
+ frame = data;
+ local = frame->local;
+ this = frame->this;
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", local, out);
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO("dht", conf, out);
+
+ mds_subvol = local->mds_subvol;
+ call_cnt = conf->subvolume_cnt;
+
+ if (!__is_root_gfid(local->stbuf.ia_gfid) && (!mds_subvol)) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_NO_MDS_SUBVOL, "path=%s",
+ local->loc.path, "gfid=%s", gfid, NULL);
+ goto out;
+ }
+
+ if (!__is_root_gfid(local->stbuf.ia_gfid)) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == mds_subvol) {
+ if (!conf->subvolume_status[i]) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_MDS_DOWN_UNABLE_TO_SET, "path=%s",
+ local->loc.path, "gfid=%s", gfid, NULL);
+ goto out;
+ }
+ }
+ }
+ }
+
+ for (i = 0; i < call_cnt; i++) {
+ subvol = conf->subvolumes[i];
+ if (!subvol || subvol == mds_subvol)
+ continue;
+ if (__is_root_gfid(local->stbuf.ia_gfid)) {
+ ret = syncop_setattr(
+ subvol, &local->loc, &local->stbuf,
+ (GF_SET_ATTR_UID | GF_SET_ATTR_GID | GF_SET_ATTR_MODE), NULL,
+ NULL, NULL, NULL);
+ } else {
+ ret = syncop_setattr(
+ subvol, &local->loc, &local->mds_stbuf,
+ (GF_SET_ATTR_UID | GF_SET_ATTR_GID | GF_SET_ATTR_MODE), NULL,
+ NULL, NULL, NULL);
+ }
- local = frame->local;
- this = frame->this;
+ if (ret) {
+ gf_uuid_unparse(local->loc.gfid, gfid);
- dht_layout_anomalies (this, loc, layout,
- &local->selfheal.hole_cnt,
- &local->selfheal.overlaps_cnt,
- NULL, &local->selfheal.down,
- &local->selfheal.misc, NULL);
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_DIR_ATTR_HEAL_FAILED, "path=%s", local->loc.path,
+ "subvol=%s", subvol->name, "gfid=%s", gfid, NULL);
+ }
+ }
+out:
+ return 0;
+}
- down = local->selfheal.down;
- misc = local->selfheal.misc;
+int
+dht_dir_attr_heal_done(int ret, call_frame_t *sync_frame, void *data)
+{
+ DHT_STACK_DESTROY(sync_frame);
+ return 0;
+}
- local->selfheal.dir_cbk = dir_cbk;
- local->selfheal.layout = dht_layout_ref (this, layout);
+/* EXIT: dht_update_commit_hash_for_layout */
+static int
+dht_update_commit_hash_for_layout_done(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
- if (down) {
- gf_log (this->name, GF_LOG_WARNING,
- "%d subvolumes down -- not fixing", down);
- ret = 0;
- goto sorry_no_fix;
+ local = frame->local;
+
+ /* preserve oldest error */
+ if (op_ret && !local->op_ret) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ }
+
+ DHT_STACK_UNWIND(setxattr, frame, local->op_ret, local->op_errno, NULL);
+
+ return 0;
+}
+
+static int
+dht_update_commit_hash_for_layout_unlock(call_frame_t *frame, xlator_t *this)
+{
+ dht_local_t *local = NULL;
+ int ret = 0;
+
+ local = frame->local;
+
+ ret = dht_unlock_inodelk(frame, local->lock[0].layout.my_layout.locks,
+ local->lock[0].layout.my_layout.lk_count,
+ dht_update_commit_hash_for_layout_done);
+ if (ret < 0) {
+ /* preserve oldest error, just ... */
+ if (!local->op_ret) {
+ local->op_errno = errno;
+ local->op_ret = -1;
}
- if (misc) {
- gf_log (this->name, GF_LOG_WARNING,
- "%d subvolumes have unrecoverable errors", misc);
- ret = 0;
- goto sorry_no_fix;
+ gf_smsg(this->name, GF_LOG_WARNING, errno, DHT_MSG_WIND_UNLOCK_FAILED,
+ "path=%s", local->loc.path, NULL);
+
+ dht_update_commit_hash_for_layout_done(frame, NULL, this, 0, 0, NULL);
+ }
+
+ return 0;
+}
+
+static int
+dht_update_commit_hash_for_layout_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+
+ local = frame->local;
+
+ LOCK(&frame->lock);
+ /* store first failure, just because */
+ if (op_ret && !local->op_ret) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ }
+ UNLOCK(&frame->lock);
+
+ this_call_cnt = dht_frame_return(frame);
+
+ if (is_last_call(this_call_cnt)) {
+ dht_update_commit_hash_for_layout_unlock(frame, this);
+ }
+
+ return 0;
+}
+
+static int
+dht_update_commit_hash_for_layout_resume(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int count = 1, ret = -1, i = 0, j = 0;
+ dht_conf_t *conf = NULL;
+ dht_layout_t *layout = NULL;
+ int32_t *disk_layout = NULL;
+ dict_t **xattr = NULL;
+
+ local = frame->local;
+ conf = frame->this->private;
+ count = conf->local_subvols_cnt;
+ layout = local->layout;
+
+ if (op_ret < 0) {
+ goto err_done;
+ }
+
+ /* We precreate the xattr list as we cannot change call count post the
+ * first wind as we may never continue from there. So we finish prep
+ * work before winding the setxattrs */
+ xattr = GF_CALLOC(count, sizeof(*xattr), gf_common_mt_char);
+ if (!xattr) {
+ local->op_errno = errno;
+
+ gf_smsg(this->name, GF_LOG_WARNING, errno, DHT_MSG_COMMIT_HASH_FAILED,
+ "allocation-failed path=%s", local->loc.path, NULL);
+
+ goto err;
+ }
+
+ for (i = 0; i < count; i++) {
+ /* find the layout index for the subvolume */
+ ret = dht_layout_index_for_subvol(layout, conf->local_subvols[i]);
+ if (ret < 0) {
+ local->op_errno = ENOENT;
+
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_COMMIT_HASH_FAILED,
+ "path=%s", local->loc.path, "subvol=%s",
+ conf->local_subvols[i]->name, "find-disk-layout-failed",
+ NULL);
+
+ goto err;
}
+ j = ret;
- dht_layout_sort_volname (layout);
- ret = dht_selfheal_dir_getafix (frame, loc, layout);
+ /* update the commit hash for the layout */
+ layout->list[j].commit_hash = layout->commit_hash;
+ /* extract the current layout */
+ ret = dht_disk_layout_extract(this, layout, j, &disk_layout);
if (ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "not able to form layout for the directory");
- goto sorry_no_fix;
+ local->op_errno = errno;
+
+ gf_smsg(this->name, GF_LOG_WARNING, errno,
+ DHT_MSG_COMMIT_HASH_FAILED, "path=%s", local->loc.path,
+ "subvol=%s", conf->local_subvols[i]->name,
+ "extract-disk-layout-failed", NULL);
+
+ goto err;
}
- dht_selfheal_dir_mkdir (frame, loc, layout, 0);
+ xattr[i] = dict_new();
+ if (!xattr[i]) {
+ local->op_errno = errno;
- return 0;
+ gf_smsg(this->name, GF_LOG_WARNING, errno,
+ DHT_MSG_COMMIT_HASH_FAILED, "path=%s Allocation-failed",
+ local->loc.path, NULL);
-sorry_no_fix:
- /* TODO: need to put appropriate local->op_errno */
- dht_selfheal_dir_finish (frame, this, ret);
+ goto err;
+ }
- return 0;
-}
+ ret = dict_set_bin(xattr[i], conf->xattr_name, disk_layout, 4 * 4);
+ if (ret != 0) {
+ local->op_errno = ENOMEM;
+ gf_smsg(this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_DIR_SELFHEAL_XATTR_FAILED, "path=%s",
+ local->loc.path, "subvol=%s", conf->local_subvols[i]->name,
+ "set-xattr-failed", NULL);
-int
-dht_selfheal_restore (call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
- loc_t *loc, dht_layout_t *layout)
-{
- int ret = 0;
- dht_local_t *local = NULL;
+ goto err;
+ }
+ disk_layout = NULL;
- local = frame->local;
+ gf_msg_trace(this->name, 0,
+ "setting commit hash %u on subvolume %s"
+ " for %s",
+ layout->list[j].commit_hash, conf->local_subvols[i]->name,
+ local->loc.path);
+ }
+
+ /* wind the setting of the commit hash across the local subvols */
+ local->call_cnt = count;
+ local->op_ret = 0;
+ local->op_errno = 0;
+ for (i = 0; i < count; i++) {
+ STACK_WIND(frame, dht_update_commit_hash_for_layout_cbk,
+ conf->local_subvols[i],
+ conf->local_subvols[i]->fops->setxattr, &local->loc,
+ xattr[i], 0, NULL);
+ }
+ for (i = 0; i < count; i++)
+ dict_unref(xattr[i]);
+ GF_FREE(xattr);
+
+ return 0;
+err:
+ if (xattr) {
+ for (i = 0; i < count; i++) {
+ if (xattr[i])
+ dict_unref(xattr[i]);
+ }
- local->selfheal.dir_cbk = dir_cbk;
- local->selfheal.layout = dht_layout_ref (frame->this, layout);
+ GF_FREE(xattr);
+ }
- ret = dht_selfheal_dir_mkdir (frame, loc, layout, 1);
+ GF_FREE(disk_layout);
+
+ local->op_ret = -1;
+
+ dht_update_commit_hash_for_layout_unlock(frame, this);
+
+ return 0;
+err_done:
+ local->op_ret = -1;
+
+ dht_update_commit_hash_for_layout_done(frame, NULL, this, 0, 0, NULL);
+
+ return 0;
+}
+
+/* ENTER: dht_update_commit_hash_for_layout (see EXIT above)
+ * This function is invoked from rebalance only.
+ * As a result, the check here is simple enough to see if defrag is present
+ * in the conf, as other data would be populated appropriately if so.
+ * If ever this was to be used in other code paths, checks would need to
+ * change.
+ *
+ * Functional details:
+ * - Lock the inodes on the subvols that we want the commit hash updated
+ * - Update each layout with the inode layout, modified to take in the new
+ * commit hash.
+ * - Unlock and return.
+ */
+int
+dht_update_commit_hash_for_layout(call_frame_t *frame)
+{
+ dht_local_t *local = NULL;
+ int count = 1, ret = -1, i = 0;
+ dht_lock_t **lk_array = NULL;
+ dht_conf_t *conf = NULL;
+
+ GF_VALIDATE_OR_GOTO("dht", frame, err);
+ GF_VALIDATE_OR_GOTO(frame->this->name, frame->local, err);
+
+ local = frame->local;
+ conf = frame->this->private;
+
+ if (!conf->defrag)
+ goto err;
+
+ count = conf->local_subvols_cnt;
+ lk_array = GF_CALLOC(count, sizeof(*lk_array), gf_common_mt_char);
+ if (lk_array == NULL)
+ goto err;
+
+ for (i = 0; i < count; i++) {
+ lk_array[i] = dht_lock_new(frame->this, conf->local_subvols[i],
+ &local->loc, F_WRLCK, DHT_LAYOUT_HEAL_DOMAIN,
+ NULL, FAIL_ON_ANY_ERROR);
+ if (lk_array[i] == NULL)
+ goto err;
+ }
+
+ local->lock[0].layout.my_layout.locks = lk_array;
+ local->lock[0].layout.my_layout.lk_count = count;
+
+ ret = dht_blocking_inodelk(frame, lk_array, count,
+ dht_update_commit_hash_for_layout_resume);
+ if (ret < 0) {
+ local->lock[0].layout.my_layout.locks = NULL;
+ local->lock[0].layout.my_layout.lk_count = 0;
+ goto err;
+ }
+
+ return 0;
+err:
+ if (lk_array != NULL) {
+ dht_lock_array_free(lk_array, count);
+ GF_FREE(lk_array);
+ }
- return ret;
+ return -1;
}
diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c
new file mode 100644
index 00000000000..bb72b0ffbb5
--- /dev/null
+++ b/xlators/cluster/dht/src/dht-shared.c
@@ -0,0 +1,1104 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/* TODO: add NS locking */
+#include <glusterfs/statedump.h>
+#include "dht-common.h"
+#include "dht-messages.h"
+
+#ifndef MAX
+#define MAX(a, b) (((a) > (b)) ? (a) : (b))
+#endif
+
+/* TODO:
+ - use volumename in xattr instead of "dht"
+ - use NS locks
+ - handle all cases in self heal layout reconstruction
+ - complete linkfile selfheal
+*/
+
+static void
+dht_layout_dump(dht_layout_t *layout, const char *prefix)
+{
+ char key[GF_DUMP_MAX_BUF_LEN];
+ int i = 0;
+
+ if (!layout)
+ goto out;
+
+ gf_proc_dump_build_key(key, prefix, "cnt");
+ gf_proc_dump_write(key, "%d", layout->cnt);
+ gf_proc_dump_build_key(key, prefix, "preset");
+ gf_proc_dump_write(key, "%d", layout->preset);
+ gf_proc_dump_build_key(key, prefix, "gen");
+ gf_proc_dump_write(key, "%d", layout->gen);
+ if (layout->type != IA_INVAL) {
+ gf_proc_dump_build_key(key, prefix, "inode type");
+ gf_proc_dump_write(key, "%d", layout->type);
+ }
+
+ if (!IA_ISDIR(layout->type))
+ goto out;
+
+ for (i = 0; i < layout->cnt; i++) {
+ gf_proc_dump_build_key(key, prefix, "list[%d].err", i);
+ gf_proc_dump_write(key, "%d", layout->list[i].err);
+ gf_proc_dump_build_key(key, prefix, "list[%d].start", i);
+ gf_proc_dump_write(key, "0x%x", layout->list[i].start);
+ gf_proc_dump_build_key(key, prefix, "list[%d].stop", i);
+ gf_proc_dump_write(key, "0x%x", layout->list[i].stop);
+ if (layout->list[i].xlator) {
+ gf_proc_dump_build_key(key, prefix, "list[%d].xlator.type", i);
+ gf_proc_dump_write(key, "%s", layout->list[i].xlator->type);
+ gf_proc_dump_build_key(key, prefix, "list[%d].xlator.name", i);
+ gf_proc_dump_write(key, "%s", layout->list[i].xlator->name);
+ }
+ }
+
+out:
+ return;
+}
+
+int32_t
+dht_priv_dump(xlator_t *this)
+{
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+ char key[GF_DUMP_MAX_BUF_LEN];
+ int i = 0;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+
+ if (!this)
+ goto out;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ ret = TRY_LOCK(&conf->subvolume_lock);
+ if (ret != 0) {
+ return ret;
+ }
+
+ gf_proc_dump_add_section("xlator.cluster.dht.%s.priv", this->name);
+ gf_proc_dump_build_key(key_prefix, "xlator.cluster.dht", "%s.priv",
+ this->name);
+ gf_proc_dump_write("subvol_cnt", "%d", conf->subvolume_cnt);
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ snprintf(key, sizeof(key), "subvolumes[%d]", i);
+ gf_proc_dump_write(key, "%s.%s", conf->subvolumes[i]->type,
+ conf->subvolumes[i]->name);
+ if (conf->file_layouts && conf->file_layouts[i]) {
+ snprintf(key, sizeof(key), "file_layouts[%d]", i);
+ dht_layout_dump(conf->file_layouts[i], key);
+ }
+ if (conf->dir_layouts && conf->dir_layouts[i]) {
+ snprintf(key, sizeof(key), "dir_layouts[%d]", i);
+ dht_layout_dump(conf->dir_layouts[i], key);
+ }
+ if (conf->subvolume_status) {
+ snprintf(key, sizeof(key), "subvolume_status[%d]", i);
+ gf_proc_dump_write(key, "%d", (int)conf->subvolume_status[i]);
+ }
+ }
+
+ gf_proc_dump_write("search_unhashed", "%d", conf->search_unhashed);
+ gf_proc_dump_write("gen", "%d", conf->gen);
+ gf_proc_dump_write("min_free_disk", "%lf", conf->min_free_disk);
+ gf_proc_dump_write("min_free_inodes", "%lf", conf->min_free_inodes);
+ gf_proc_dump_write("disk_unit", "%c", conf->disk_unit);
+ gf_proc_dump_write("refresh_interval", "%d", conf->refresh_interval);
+ gf_proc_dump_write("unhashed_sticky_bit", "%d", conf->unhashed_sticky_bit);
+ gf_proc_dump_write("use-readdirp", "%d", conf->use_readdirp);
+
+ if (conf->du_stats && conf->subvolume_status) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (!conf->subvolume_status[i])
+ continue;
+
+ snprintf(key, sizeof(key), "subvolumes[%d]", i);
+ gf_proc_dump_write(key, "%s", conf->subvolumes[i]->name);
+
+ snprintf(key, sizeof(key), "du_stats[%d].avail_percent", i);
+ gf_proc_dump_write(key, "%lf", conf->du_stats[i].avail_percent);
+
+ snprintf(key, sizeof(key), "du_stats[%d].avail_space", i);
+ gf_proc_dump_write(key, "%" PRIu64, conf->du_stats[i].avail_space);
+
+ snprintf(key, sizeof(key), "du_stats[%d].avail_inodes", i);
+ gf_proc_dump_write(key, "%lf", conf->du_stats[i].avail_inodes);
+
+ snprintf(key, sizeof(key), "du_stats[%d].log", i);
+ gf_proc_dump_write(key, "%" PRIu32, conf->du_stats[i].log);
+ }
+ }
+
+ if (conf->last_stat_fetch)
+ gf_proc_dump_write("last_stat_fetch", "%s",
+ ctime(&conf->last_stat_fetch));
+
+ UNLOCK(&conf->subvolume_lock);
+
+out:
+ return ret;
+}
+
+int32_t
+dht_inodectx_dump(xlator_t *this, inode_t *inode)
+{
+ int ret = -1;
+ dht_layout_t *layout = NULL;
+
+ if (!this)
+ goto out;
+ if (!inode)
+ goto out;
+
+ ret = dht_inode_ctx_layout_get(inode, this, &layout);
+
+ if ((ret != 0) || !layout)
+ return ret;
+
+ gf_proc_dump_add_section("xlator.cluster.dht.%s.inode", this->name);
+ dht_layout_dump(layout, "layout");
+
+out:
+ return ret;
+}
+
+void
+dht_fini(xlator_t *this)
+{
+ int i = 0;
+ dht_conf_t *conf = NULL;
+
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+
+ conf = this->private;
+ this->private = NULL;
+ if (conf) {
+ if (conf->file_layouts) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ GF_FREE(conf->file_layouts[i]);
+ }
+ GF_FREE(conf->file_layouts);
+ }
+
+ dict_unref(conf->leaf_to_subvol);
+
+ /* allocated in dht_init_subvolumes() */
+ GF_FREE(conf->subvolumes);
+ GF_FREE(conf->subvolume_status);
+ GF_FREE(conf->last_event);
+ GF_FREE(conf->subvol_up_time);
+ GF_FREE(conf->du_stats);
+ GF_FREE(conf->decommissioned_bricks);
+
+ /* allocated in dht_init() */
+ GF_FREE(conf->mds_xattr_key);
+ GF_FREE(conf->link_xattr_name);
+ GF_FREE(conf->commithash_xattr_name);
+ GF_FREE(conf->wild_xattr_name);
+
+ /* allocated in dht_init_regex() */
+ if (conf->rsync_regex_valid)
+ regfree(&conf->rsync_regex);
+ if (conf->extra_regex_valid)
+ regfree(&conf->extra_regex);
+
+ synclock_destroy(&conf->link_lock);
+
+ if (conf->lock_pool)
+ mem_pool_destroy(conf->lock_pool);
+
+ GF_FREE(conf);
+ }
+out:
+ return;
+}
+
+int32_t
+mem_acct_init(xlator_t *this)
+{
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+
+ ret = xlator_mem_acct_init(this, gf_dht_mt_end + 1);
+
+ if (ret != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_NO_MEMORY,
+ "Memory accounting init failed");
+ return ret;
+ }
+out:
+ return ret;
+}
+
+static int
+dht_parse_decommissioned_bricks(xlator_t *this, dht_conf_t *conf,
+ const char *bricks)
+{
+ int i = 0;
+ int ret = -1;
+ char *tmpstr = NULL;
+ char *dup_brick = NULL;
+ char *node = NULL;
+
+ if (!conf || !bricks)
+ goto out;
+
+ dup_brick = gf_strdup(bricks);
+ if (dup_brick == NULL) {
+ goto out;
+ }
+
+ node = strtok_r(dup_brick, ",", &tmpstr);
+ while (node) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (!strcmp(conf->subvolumes[i]->name, node)) {
+ conf->decommissioned_bricks[i] = conf->subvolumes[i];
+ conf->decommission_subvols_cnt++;
+ gf_msg(this->name, GF_LOG_INFO, 0,
+ DHT_MSG_SUBVOL_DECOMMISSION_INFO,
+ "decommissioning subvolume %s",
+ conf->subvolumes[i]->name);
+ break;
+ }
+ }
+ if (i == conf->subvolume_cnt) {
+ /* Wrong node given. */
+ goto out;
+ }
+ node = strtok_r(NULL, ",", &tmpstr);
+ }
+
+ ret = 0;
+ conf->decommission_in_progress = 1;
+out:
+ GF_FREE(dup_brick);
+
+ return ret;
+}
+
+static void
+dht_decommissioned_remove(xlator_t *this, dht_conf_t *conf)
+{
+ int i = 0;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->decommissioned_bricks[i]) {
+ conf->decommissioned_bricks[i] = NULL;
+ conf->decommission_subvols_cnt--;
+ }
+ }
+}
+
+static void
+dht_init_regex(xlator_t *this, dict_t *odict, char *name, regex_t *re,
+ gf_boolean_t *re_valid, dht_conf_t *conf)
+{
+ char *temp_str = NULL;
+
+ if (dict_get_str(odict, name, &temp_str) != 0) {
+ if (strcmp(name, "rsync-hash-regex")) {
+ return;
+ }
+ temp_str = "^\\.(.+)\\.[^.]+$";
+ }
+
+ LOCK(&conf->lock);
+ {
+ if (*re_valid) {
+ regfree(re);
+ *re_valid = _gf_false;
+ }
+
+ if (!strcmp(temp_str, "none")) {
+ goto unlock;
+ }
+
+ if (regcomp(re, temp_str, REG_EXTENDED) == 0) {
+ gf_msg_debug(this->name, 0, "using regex %s = %s", name, temp_str);
+ *re_valid = _gf_true;
+ } else {
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_REGEX_INFO,
+ "compiling regex %s failed", temp_str);
+ }
+ }
+unlock:
+ UNLOCK(&conf->lock);
+}
+
+int
+dht_set_subvol_range(xlator_t *this)
+{
+ int ret = -1;
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf)
+ goto out;
+
+ conf->leaf_to_subvol = dict_new();
+ if (!conf->leaf_to_subvol)
+ goto out;
+
+ ret = glusterfs_reachable_leaves(this, conf->leaf_to_subvol);
+
+out:
+ return ret;
+}
+
+static int
+dht_configure_throttle(xlator_t *this, dht_conf_t *conf, char *temp_str)
+{
+ int rebal_thread_count = 0;
+ int ret = 0;
+
+ pthread_mutex_lock(&conf->defrag->dfq_mutex);
+ {
+ if (!strcasecmp(temp_str, "lazy")) {
+ conf->defrag->recon_thread_count = 1;
+ } else if (!strcasecmp(temp_str, "normal")) {
+ conf->defrag->recon_thread_count = 2;
+ } else if (!strcasecmp(temp_str, "aggressive")) {
+ conf->defrag->recon_thread_count = MAX(MAX_REBAL_THREADS - 4, 4);
+ } else if ((gf_string2int(temp_str, &rebal_thread_count) == 0)) {
+ if ((rebal_thread_count > 0) &&
+ (rebal_thread_count <= MAX_REBAL_THREADS)) {
+ conf->defrag->recon_thread_count = rebal_thread_count;
+ pthread_mutex_unlock(&conf->defrag->dfq_mutex);
+ gf_msg(this->name, GF_LOG_INFO, 0, 0,
+ "rebal thread count configured to %d",
+ rebal_thread_count);
+ goto out;
+ } else {
+ pthread_mutex_unlock(&conf->defrag->dfq_mutex);
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INVALID_OPTION,
+ "Invalid option: Reconfigure: "
+ "rebal-throttle should be "
+ "within range of 0 and maximum number of"
+ " cores available");
+ ret = -1;
+ goto out;
+ }
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INVALID_OPTION,
+ "Invalid option: Reconfigure: "
+ "rebal-throttle should be {lazy|normal|aggressive}"
+ " or a number up to the number of cores available,"
+ " not (%s), defaulting to (%d)",
+ temp_str, conf->dthrottle);
+ ret = -1;
+ }
+ }
+ pthread_mutex_unlock(&conf->defrag->dfq_mutex);
+
+out:
+ return ret;
+}
+
+int
+dht_reconfigure(xlator_t *this, dict_t *options)
+{
+ dht_conf_t *conf = NULL;
+ char *temp_str = NULL;
+ gf_boolean_t search_unhashed;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", options, out);
+
+ conf = this->private;
+ if (!conf)
+ return 0;
+
+ if (dict_get_str(options, "lookup-unhashed", &temp_str) == 0) {
+ /* If option is not "auto", other options _should_ be boolean*/
+ if (strcasecmp(temp_str, "auto")) {
+ if (!gf_string2boolean(temp_str, &search_unhashed)) {
+ gf_msg_debug(this->name, 0,
+ "Reconfigure: "
+ "lookup-unhashed reconfigured(%s)",
+ temp_str);
+ conf->search_unhashed = search_unhashed;
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INVALID_OPTION,
+ "Invalid option: Reconfigure: "
+ "lookup-unhashed should be boolean,"
+ " not (%s), defaulting to (%d)",
+ temp_str, conf->search_unhashed);
+ ret = -1;
+ goto out;
+ }
+ } else {
+ gf_msg_debug(this->name, 0,
+ "Reconfigure:"
+ " lookup-unhashed reconfigured auto ");
+ conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_AUTO;
+ }
+ }
+
+ GF_OPTION_RECONF("lookup-optimize", conf->lookup_optimize, options, bool,
+ out);
+
+ GF_OPTION_RECONF("min-free-disk", conf->min_free_disk, options,
+ percent_or_size, out);
+ /* option can be any one of percent or bytes */
+ conf->disk_unit = 0;
+ if (conf->min_free_disk < 100.0)
+ conf->disk_unit = 'p';
+
+ GF_OPTION_RECONF("min-free-inodes", conf->min_free_inodes, options, percent,
+ out);
+
+ GF_OPTION_RECONF("directory-layout-spread", conf->dir_spread_cnt, options,
+ uint32, out);
+
+ GF_OPTION_RECONF("readdir-optimize", conf->readdir_optimize, options, bool,
+ out);
+ GF_OPTION_RECONF("randomize-hash-range-by-gfid", conf->randomize_by_gfid,
+ options, bool, out);
+
+ GF_OPTION_RECONF("lock-migration", conf->lock_migration_enabled, options,
+ bool, out);
+
+ GF_OPTION_RECONF("force-migration", conf->force_migration, options, bool,
+ out);
+
+ if (conf->defrag) {
+ if (dict_get_str(options, "rebal-throttle", &temp_str) == 0) {
+ ret = dht_configure_throttle(this, conf, temp_str);
+ if (ret == -1)
+ goto out;
+ }
+ }
+
+ if (conf->defrag) {
+ conf->defrag->lock_migration_enabled = conf->lock_migration_enabled;
+ }
+
+ if (conf->defrag) {
+ GF_OPTION_RECONF("rebalance-stats", conf->defrag->stats, options, bool,
+ out);
+ }
+
+ if (dict_get_str(options, "decommissioned-bricks", &temp_str) == 0) {
+ ret = dht_parse_decommissioned_bricks(this, conf, temp_str);
+ if (ret == -1)
+ goto out;
+ } else {
+ dht_decommissioned_remove(this, conf);
+ }
+
+ dht_init_regex(this, options, "rsync-hash-regex", &conf->rsync_regex,
+ &conf->rsync_regex_valid, conf);
+ dht_init_regex(this, options, "extra-hash-regex", &conf->extra_regex,
+ &conf->extra_regex_valid, conf);
+
+ GF_OPTION_RECONF("weighted-rebalance", conf->do_weighting, options, bool,
+ out);
+
+ GF_OPTION_RECONF("use-readdirp", conf->use_readdirp, options, bool, out);
+ ret = 0;
+out:
+ return ret;
+}
+
+static int
+gf_defrag_pattern_list_fill(xlator_t *this, gf_defrag_info_t *defrag,
+ char *data)
+{
+ int ret = -1;
+ char *tmp_str = NULL;
+ char *tmp_str1 = NULL;
+ char *dup_str = NULL;
+ char *num = NULL;
+ char *pattern_str = NULL;
+ char *pattern = NULL;
+ gf_defrag_pattern_list_t *temp_list = NULL;
+ gf_defrag_pattern_list_t *pattern_list = NULL;
+
+ if (!this || !defrag || !data)
+ goto out;
+
+ /* Get the pattern for pattern list. "pattern:<optional-size>"
+ * eg: *avi, *pdf:10MB, *:1TB
+ */
+ pattern_str = strtok_r(data, ",", &tmp_str);
+ while (pattern_str) {
+ dup_str = gf_strdup(pattern_str);
+ if (!dup_str)
+ goto out;
+ pattern_list = GF_CALLOC(1, sizeof(gf_defrag_pattern_list_t), 1);
+ if (!pattern_list) {
+ goto out;
+ }
+ pattern = strtok_r(dup_str, ":", &tmp_str1);
+ num = strtok_r(NULL, ":", &tmp_str1);
+ if (!pattern)
+ goto out;
+ if (!num) {
+ if (gf_string2bytesize_uint64(pattern, &pattern_list->size) == 0) {
+ pattern = "*";
+ }
+ } else if (gf_string2bytesize_uint64(num, &pattern_list->size) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INVALID_OPTION,
+ "Invalid option. Defrag pattern:"
+ " Invalid number format \"%s\"",
+ num);
+ goto out;
+ }
+ memcpy(pattern_list->path_pattern, pattern, strlen(dup_str));
+
+ if (!defrag->defrag_pattern)
+ temp_list = NULL;
+ else
+ temp_list = defrag->defrag_pattern;
+
+ pattern_list->next = temp_list;
+
+ defrag->defrag_pattern = pattern_list;
+ pattern_list = NULL;
+
+ GF_FREE(dup_str);
+ dup_str = NULL;
+
+ pattern_str = strtok_r(NULL, ",", &tmp_str);
+ }
+
+ ret = 0;
+out:
+ if (ret)
+ GF_FREE(pattern_list);
+ GF_FREE(dup_str);
+
+ return ret;
+}
+
+static int
+dht_init_methods(xlator_t *this)
+{
+ int ret = -1;
+ dht_conf_t *conf = NULL;
+ dht_methods_t *methods = NULL;
+
+ GF_VALIDATE_OR_GOTO("dht", this, err);
+
+ conf = this->private;
+ methods = &(conf->methods);
+
+ methods->migration_get_dst_subvol = dht_migration_get_dst_subvol;
+ methods->migration_other = NULL;
+ methods->layout_search = dht_layout_search;
+
+ ret = 0;
+err:
+ return ret;
+}
+
+int
+dht_init(xlator_t *this)
+{
+ dht_conf_t *conf = NULL;
+ char *temp_str = NULL;
+ int ret = -1;
+ int i = 0;
+ gf_defrag_info_t *defrag = NULL;
+ int cmd = 0;
+ char *node_uuid = NULL;
+ uint32_t commit_hash = 0;
+
+ GF_VALIDATE_OR_GOTO("dht", this, err);
+
+ if (!this->children) {
+ gf_msg(this->name, GF_LOG_CRITICAL, 0, DHT_MSG_INVALID_CONFIGURATION,
+ "Distribute needs more than one subvolume");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_INVALID_CONFIGURATION,
+ "dangling volume. check volfile");
+ }
+
+ conf = GF_CALLOC(1, sizeof(*conf), gf_dht_mt_dht_conf_t);
+ if (!conf) {
+ goto err;
+ }
+
+ LOCK_INIT(&conf->subvolume_lock);
+ LOCK_INIT(&conf->layout_lock);
+ LOCK_INIT(&conf->lock);
+ synclock_init(&conf->link_lock, SYNC_LOCK_DEFAULT);
+
+ /* We get the commit-hash to set only for rebalance process */
+ if (dict_get_uint32(this->options, "commit-hash", &commit_hash) == 0) {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_COMMIT_HASH_INFO,
+ "%s using commit hash %u", __func__, commit_hash);
+ conf->vol_commit_hash = commit_hash;
+ conf->vch_forced = _gf_true;
+ }
+
+ ret = dict_get_int32(this->options, "rebalance-cmd", &cmd);
+
+ if (cmd) {
+ defrag = GF_CALLOC(1, sizeof(gf_defrag_info_t), gf_defrag_info_mt);
+
+ GF_VALIDATE_OR_GOTO(this->name, defrag, err);
+
+ LOCK_INIT(&defrag->lock);
+
+ defrag->is_exiting = 0;
+
+ conf->defrag = defrag;
+ defrag->this = this;
+
+ ret = dict_get_str(this->options, "node-uuid", &node_uuid);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INVALID_CONFIGURATION,
+ "Invalid volume configuration: "
+ "node-uuid not specified");
+ goto err;
+ }
+
+ if (gf_uuid_parse(node_uuid, defrag->node_uuid)) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INVALID_OPTION,
+ "Invalid option:"
+ " Cannot parse glusterd node uuid");
+ goto err;
+ }
+
+ defrag->cmd = cmd;
+
+ defrag->stats = _gf_false;
+
+ defrag->queue = NULL;
+
+ defrag->crawl_done = 0;
+
+ defrag->global_error = 0;
+
+ defrag->q_entry_count = 0;
+
+ defrag->wakeup_crawler = 0;
+
+ pthread_mutex_init(&defrag->dfq_mutex, 0);
+ pthread_cond_init(&defrag->parallel_migration_cond, 0);
+ pthread_cond_init(&defrag->rebalance_crawler_alarm, 0);
+ pthread_cond_init(&defrag->df_wakeup_thread, 0);
+
+ pthread_mutex_init(&defrag->fc_mutex, 0);
+ pthread_cond_init(&defrag->fc_wakeup_cond, 0);
+
+ defrag->global_error = 0;
+ }
+
+ conf->use_fallocate = 1;
+
+ conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_ON;
+ if (dict_get_str(this->options, "lookup-unhashed", &temp_str) == 0) {
+ /* If option is not "auto", other options _should_ be boolean */
+ if (strcasecmp(temp_str, "auto")) {
+ gf_boolean_t search_unhashed_bool;
+ ret = gf_string2boolean(temp_str, &search_unhashed_bool);
+ if (ret == -1) {
+ goto err;
+ }
+ conf->search_unhashed = search_unhashed_bool
+ ? GF_DHT_LOOKUP_UNHASHED_ON
+ : GF_DHT_LOOKUP_UNHASHED_OFF;
+ } else {
+ conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_AUTO;
+ }
+ }
+
+ GF_OPTION_INIT("lookup-optimize", conf->lookup_optimize, bool, err);
+
+ GF_OPTION_INIT("unhashed-sticky-bit", conf->unhashed_sticky_bit, bool, err);
+
+ GF_OPTION_INIT("use-readdirp", conf->use_readdirp, bool, err);
+
+ GF_OPTION_INIT("min-free-disk", conf->min_free_disk, percent_or_size, err);
+
+ GF_OPTION_INIT("min-free-inodes", conf->min_free_inodes, percent, err);
+
+ conf->dir_spread_cnt = conf->subvolume_cnt;
+ GF_OPTION_INIT("directory-layout-spread", conf->dir_spread_cnt, uint32,
+ err);
+
+ GF_OPTION_INIT("assert-no-child-down", conf->assert_no_child_down, bool,
+ err);
+
+ GF_OPTION_INIT("readdir-optimize", conf->readdir_optimize, bool, err);
+
+ GF_OPTION_INIT("lock-migration", conf->lock_migration_enabled, bool, err);
+
+ GF_OPTION_INIT("force-migration", conf->force_migration, bool, err);
+
+ if (defrag) {
+ defrag->lock_migration_enabled = conf->lock_migration_enabled;
+
+ GF_OPTION_INIT("rebalance-stats", defrag->stats, bool, err);
+ if (dict_get_str(this->options, "rebalance-filter", &temp_str) == 0) {
+ if (gf_defrag_pattern_list_fill(this, defrag, temp_str) == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INVALID_OPTION,
+ "Invalid option:"
+ " Cannot parse rebalance-filter (%s)",
+ temp_str);
+
+ goto err;
+ }
+ }
+ }
+
+ /* option can be any one of percent or bytes */
+ conf->disk_unit = 0;
+ if (conf->min_free_disk < 100)
+ conf->disk_unit = 'p';
+
+ ret = dht_init_subvolumes(this, conf);
+ if (ret == -1) {
+ goto err;
+ }
+
+ if (cmd) {
+ ret = dht_init_local_subvolumes(this, conf);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_INIT_LOCAL_SUBVOL_FAILED,
+ "dht_init_local_subvolumes failed");
+ goto err;
+ }
+ }
+
+ if (dict_get_str(this->options, "decommissioned-bricks", &temp_str) == 0) {
+ ret = dht_parse_decommissioned_bricks(this, conf, temp_str);
+ if (ret == -1)
+ goto err;
+ }
+
+ dht_init_regex(this, this->options, "rsync-hash-regex", &conf->rsync_regex,
+ &conf->rsync_regex_valid, conf);
+ dht_init_regex(this, this->options, "extra-hash-regex", &conf->extra_regex,
+ &conf->extra_regex_valid, conf);
+
+ ret = dht_layouts_init(this, conf);
+ if (ret == -1) {
+ goto err;
+ }
+
+ conf->gen = 1;
+
+ this->local_pool = mem_pool_new(dht_local_t, 512);
+ if (!this->local_pool) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
+ " DHT initialisation failed. "
+ "failed to create local_t's memory pool");
+ goto err;
+ }
+
+ GF_OPTION_INIT("randomize-hash-range-by-gfid", conf->randomize_by_gfid,
+ bool, err);
+
+ if (defrag) {
+ GF_OPTION_INIT("rebal-throttle", temp_str, str, err);
+ if (temp_str) {
+ ret = dht_configure_throttle(this, conf, temp_str);
+ if (ret == -1)
+ goto err;
+ }
+ }
+
+ GF_OPTION_INIT("xattr-name", conf->xattr_name, str, err);
+ gf_asprintf(&conf->mds_xattr_key, "%s." DHT_MDS_STR, conf->xattr_name);
+ gf_asprintf(&conf->link_xattr_name, "%s." DHT_LINKFILE_STR,
+ conf->xattr_name);
+ gf_asprintf(&conf->commithash_xattr_name, "%s." DHT_COMMITHASH_STR,
+ conf->xattr_name);
+ gf_asprintf(&conf->wild_xattr_name, "%s*", conf->xattr_name);
+ if (!conf->link_xattr_name || !conf->wild_xattr_name) {
+ goto err;
+ }
+
+ GF_OPTION_INIT("weighted-rebalance", conf->do_weighting, bool, err);
+
+ conf->lock_pool = mem_pool_new(dht_lock_t, 512);
+ if (!conf->lock_pool) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INIT_FAILED,
+ "failed to create lock mem_pool, failing "
+ "initialization");
+ goto err;
+ }
+
+ this->private = conf;
+
+ if (dht_set_subvol_range(this))
+ goto err;
+
+ if (dht_init_methods(this))
+ goto err;
+
+ return 0;
+
+err:
+ if (conf) {
+ if (conf->file_layouts) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ GF_FREE(conf->file_layouts[i]);
+ }
+ GF_FREE(conf->file_layouts);
+ }
+
+ GF_FREE(conf->subvolumes);
+
+ GF_FREE(conf->subvolume_status);
+
+ GF_FREE(conf->du_stats);
+
+ GF_FREE(conf->defrag);
+
+ GF_FREE(conf->xattr_name);
+ GF_FREE(conf->link_xattr_name);
+ GF_FREE(conf->wild_xattr_name);
+ GF_FREE(conf->mds_xattr_key);
+
+ if (conf->lock_pool)
+ mem_pool_destroy(conf->lock_pool);
+
+ GF_FREE(conf);
+ }
+
+ return -1;
+}
+
+struct volume_options dht_options[] = {
+ {
+ .key = {"lookup-unhashed"},
+ .value = {"auto", "yes", "no", "enable", "disable", "1", "0", "on",
+ "off"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "on",
+ .description =
+ "This option if set to ON, does a lookup through "
+ "all the sub-volumes, in case a lookup didn't return any result "
+ "from the hash subvolume. If set to OFF, it does not do a lookup "
+ "on the remaining subvolumes.",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE,
+ .level = OPT_STATUS_BASIC,
+ },
+ {.key = {"lookup-optimize"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description =
+ "This option if set to ON enables the optimization "
+ "of -ve lookups, by not doing a lookup on non-hashed subvolumes for "
+ "files, in case the hashed subvolume does not return any result. "
+ "This option disregards the lookup-unhashed setting, when enabled.",
+ .op_version = {GD_OP_VERSION_3_7_2},
+ .level = OPT_STATUS_ADVANCED,
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+ {.key = {"min-free-disk"},
+ .type = GF_OPTION_TYPE_PERCENT_OR_SIZET,
+ .default_value = "10%",
+ .description =
+ "Percentage/Size of disk space, after which the "
+ "process starts balancing out the cluster, and logs will appear "
+ "in log files",
+ .op_version = {1},
+ .level = OPT_STATUS_BASIC,
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+ {.key = {"min-free-inodes"},
+ .type = GF_OPTION_TYPE_PERCENT,
+ .default_value = "5%",
+ .description = "after system has only N% of inodes, warnings "
+ "starts to appear in log files",
+ .op_version = {1},
+ .level = OPT_STATUS_BASIC,
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+ {
+ .key = {"unhashed-sticky-bit"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ },
+ {.key = {"use-readdirp"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "This option if set to ON, forces the use of "
+ "readdirp, and hence also displays the stats of the files.",
+ .level = OPT_STATUS_ADVANCED,
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+ {.key = {"assert-no-child-down"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "This option if set to ON, in the event of "
+ "CHILD_DOWN, will call exit."},
+ {
+ .key = {"directory-layout-spread"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .validate = GF_OPT_VALIDATE_MIN,
+ .description = "Specifies the directory layout spread. Takes number "
+ "of subvolumes as default value.",
+
+ .op_version = {2},
+ },
+ {
+ .key = {"decommissioned-bricks"},
+ .type = GF_OPTION_TYPE_ANY,
+ .description =
+ "This option if set to ON, decommissions "
+ "the brick, so that no new data is allowed to be created "
+ "on that brick.",
+ .level = OPT_STATUS_ADVANCED,
+ },
+ {
+ .key = {"rebalance-cmd"},
+ .type = GF_OPTION_TYPE_INT,
+ },
+ {
+ .key = {"commit-hash"},
+ .type = GF_OPTION_TYPE_INT,
+ },
+ {
+ .key = {"node-uuid"},
+ .type = GF_OPTION_TYPE_STR,
+ },
+ {
+ .key = {"rebalance-stats"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description =
+ "This option if set to ON displays and logs the "
+ " time taken for migration of each file, during the rebalance "
+ "process. If set to OFF, the rebalance logs will only display the "
+ "time spent in each directory.",
+ .op_version = {2},
+ .level = OPT_STATUS_BASIC,
+ },
+ {.key = {"readdir-optimize"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description =
+ "This option if set to ON enables the optimization "
+ "that allows DHT to requests non-first subvolumes to filter out "
+ "directory entries.",
+ .op_version = {1},
+ .level = OPT_STATUS_ADVANCED,
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+ {.key = {"rsync-hash-regex"},
+ .type = GF_OPTION_TYPE_STR,
+ /* Setting a default here doesn't work. See dht_init_regex. */
+ .description =
+ "Regular expression for stripping temporary-file "
+ "suffix and prefix used by rsync, to prevent relocation when the "
+ "file is renamed.",
+ .op_version = {3},
+ .level = OPT_STATUS_BASIC,
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+ {.key = {"extra-hash-regex"},
+ .type = GF_OPTION_TYPE_STR,
+ /* Setting a default here doesn't work. See dht_init_regex. */
+ .description =
+ "Regular expression for stripping temporary-file "
+ "suffix and prefix used by an application, to prevent relocation when "
+ "the file is renamed.",
+ .op_version = {3},
+ .level = OPT_STATUS_BASIC,
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+ {
+ .key = {"rebalance-filter"},
+ .type = GF_OPTION_TYPE_STR,
+ },
+
+ {
+ .key = {"xattr-name"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "trusted.glusterfs.dht",
+ .description =
+ "Base for extended attributes used by this "
+ "translator instance, to avoid conflicts with others above or "
+ "below it.",
+ .op_version = {3},
+ },
+
+ {.key = {"weighted-rebalance"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description =
+ "When enabled, files will be allocated to bricks "
+ "with a probability proportional to their size. Otherwise, all "
+ "bricks will have the same probability (legacy behavior).",
+ .op_version = {GD_OP_VERSION_3_6_0},
+ .level = OPT_STATUS_BASIC,
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+
+ /* NUFA option */
+ {.key = {"local-volume-name"}, .type = GF_OPTION_TYPE_XLATOR},
+
+ /* switch option */
+ {.key = {"pattern.switch.case"}, .type = GF_OPTION_TYPE_ANY},
+
+ {
+ .key = {"randomize-hash-range-by-gfid"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description =
+ "Use gfid of directory to determine the subvolume "
+ "from which hash ranges are allocated starting with 0. "
+ "Note that we still use a directory/file's name to determine the "
+ "subvolume to which it hashes",
+ .op_version = {GD_OP_VERSION_3_6_0},
+ },
+
+ {.key = {"rebal-throttle"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "normal",
+ .description = " Sets the maximum number of parallel file migrations "
+ "allowed on a node during the rebalance operation. The"
+ " default value is normal and allows a max of "
+ "[($(processing units) - 4) / 2), 2] files to be "
+ "migrated at a time. Lazy will allow only one file to "
+ "be migrated at a time and aggressive will allow "
+ "max of [($(processing units) - 4) / 2), 4]",
+ .op_version = {GD_OP_VERSION_3_7_0},
+ .level = OPT_STATUS_BASIC,
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC
+
+ },
+
+ {.key = {"lock-migration"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = " If enabled this feature will migrate the posix locks"
+ " associated with a file during rebalance",
+ .op_version = {GD_OP_VERSION_3_8_0},
+ .level = OPT_STATUS_ADVANCED,
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+
+ {.key = {"force-migration"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "If disabled, rebalance will not migrate files that "
+ "are being written to by an application",
+ .op_version = {GD_OP_VERSION_4_0_0},
+ .level = OPT_STATUS_ADVANCED,
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+
+ {.key = {NULL}},
+};
+
+#define NUM_DHT_OPTIONS (sizeof(dht_options) / sizeof(dht_options[0]))
+
+extern struct volume_options options[NUM_DHT_OPTIONS]
+ __attribute__((alias("dht_options")));
diff --git a/xlators/cluster/dht/src/dht.c b/xlators/cluster/dht/src/dht.c
index 46ede414d8b..53de8292704 100644
--- a/xlators/cluster/dht/src/dht.c
+++ b/xlators/cluster/dht/src/dht.c
@@ -8,604 +8,116 @@
cases as published by the Free Software Foundation.
*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-/* TODO: add NS locking */
-
-#include "statedump.h"
#include "dht-common.h"
-/* TODO:
- - use volumename in xattr instead of "dht"
- - use NS locks
- - handle all cases in self heal layout reconstruction
- - complete linkfile selfheal
-*/
-struct volume_options options[];
-
-void
-dht_layout_dump (dht_layout_t *layout, const char *prefix)
-{
-
- char key[GF_DUMP_MAX_BUF_LEN];
- int i = 0;
-
- GF_VALIDATE_OR_GOTO ("dht", layout, out);
- GF_VALIDATE_OR_GOTO ("dht", prefix, out);
-
- gf_proc_dump_build_key(key, prefix, "cnt");
- gf_proc_dump_write(key, "%d", layout->cnt);
- gf_proc_dump_build_key(key, prefix, "preset");
- gf_proc_dump_write(key, "%d", layout->preset);
- gf_proc_dump_build_key(key, prefix, "gen");
- gf_proc_dump_write(key, "%d", layout->gen);
- gf_proc_dump_build_key(key, prefix, "type");
- gf_proc_dump_write(key, "%d", layout->type);
-
- for (i = 0; i < layout->cnt; i++) {
- gf_proc_dump_build_key(key, prefix,"list[%d].err", i);
- gf_proc_dump_write(key, "%d", layout->list[i].err);
- gf_proc_dump_build_key(key, prefix,"list[%d].start", i);
- gf_proc_dump_write(key, "%u", layout->list[i].start);
- gf_proc_dump_build_key(key, prefix,"list[%d].stop", i);
- gf_proc_dump_write(key, "%u", layout->list[i].stop);
- if (layout->list[i].xlator) {
- gf_proc_dump_build_key(key, prefix,
- "list[%d].xlator.type", i);
- gf_proc_dump_write(key, "%s",
- layout->list[i].xlator->type);
- gf_proc_dump_build_key(key, prefix,
- "list[%d].xlator.name", i);
- gf_proc_dump_write(key, "%s",
- layout->list[i].xlator->name);
- }
- }
-
-out:
- return;
-}
-
-
-int32_t
-dht_priv_dump (xlator_t *this)
-{
- char key_prefix[GF_DUMP_MAX_BUF_LEN];
- char key[GF_DUMP_MAX_BUF_LEN];
- int i = 0;
- dht_conf_t *conf = NULL;
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO ("dht", this, out);
-
- conf = this->private;
-
- if (!conf)
- return -1;
-
- ret = TRY_LOCK(&conf->subvolume_lock);
- if (ret != 0) {
- return ret;
- }
-
- gf_proc_dump_add_section("xlator.cluster.dht.%s.priv", this->name);
- gf_proc_dump_build_key(key_prefix,"xlator.cluster.dht","%s.priv",
- this->name);
- gf_proc_dump_write("subvol_cnt","%d", conf->subvolume_cnt);
- for (i = 0; i < conf->subvolume_cnt; i++) {
- sprintf (key, "subvolumes[%d]", i);
- gf_proc_dump_write(key, "%s.%s", conf->subvolumes[i]->type,
- conf->subvolumes[i]->name);
- if (conf->file_layouts && conf->file_layouts[i]){
- sprintf (key, "file_layouts[%d]", i);
- dht_layout_dump(conf->file_layouts[i], key);
- }
- if (conf->dir_layouts && conf->dir_layouts[i]) {
- sprintf (key, "dir_layouts[%d]", i);
- dht_layout_dump(conf->dir_layouts[i], key);
- }
- if (conf->subvolume_status) {
-
- sprintf (key, "subvolume_status[%d]", i);
- gf_proc_dump_write(key, "%d",
- (int)conf->subvolume_status[i]);
- }
-
- }
-
- gf_proc_dump_write("search_unhashed", "%d", conf->search_unhashed);
- gf_proc_dump_write("gen", "%d", conf->gen);
- gf_proc_dump_write("min_free_disk", "%lf", conf->min_free_disk);
- gf_proc_dump_write("min_free_inodes", "%lf", conf->min_free_inodes);
- gf_proc_dump_write("disk_unit", "%c", conf->disk_unit);
- gf_proc_dump_write("refresh_interval", "%d", conf->refresh_interval);
- gf_proc_dump_write("unhashed_sticky_bit", "%d", conf->unhashed_sticky_bit);
- if (conf ->du_stats) {
- gf_proc_dump_write("du_stats.avail_percent", "%lf",
- conf->du_stats->avail_percent);
- gf_proc_dump_write("du_stats.avail_space", "%lu",
- conf->du_stats->avail_space);
- gf_proc_dump_write("du_stats.avail_inodes", "%lf",
- conf->du_stats->avail_inodes);
- gf_proc_dump_write("du_stats.log", "%lu", conf->du_stats->log);
- }
-
- if (conf->last_stat_fetch.tv_sec)
- gf_proc_dump_write("last_stat_fetch", "%s",
- ctime(&conf->last_stat_fetch.tv_sec));
-
- UNLOCK(&conf->subvolume_lock);
-
-out:
- return ret;
-}
-
-int32_t
-dht_inodectx_dump (xlator_t *this, inode_t *inode)
-{
- int ret = -1;
- dht_layout_t *layout = NULL;
- uint64_t tmp_layout = 0;
-
- GF_VALIDATE_OR_GOTO ("dht", this, out);
- GF_VALIDATE_OR_GOTO ("dht", inode, out);
-
- ret = inode_ctx_get (inode, this, &tmp_layout);
-
- if (ret != 0)
- return ret;
-
- layout = (dht_layout_t *)(long)tmp_layout;
-
- if (!layout)
- return -1;
-
- gf_proc_dump_add_section("xlator.cluster.dht.%s.inode", this->name);
- dht_layout_dump(layout, "layout");
-
-out:
- return ret;
-}
-
-int
-notify (xlator_t *this, int event, void *data, ...)
-{
- int ret = -1;
- va_list ap;
- dict_t *output = NULL;
-
- GF_VALIDATE_OR_GOTO ("dht", this, out);
-
-
- if (!data)
- goto out;
-
- va_start (ap, data);
- output = va_arg (ap, dict_t*);
-
- ret = dht_notify (this, event, data, output);
-
-out:
- return ret;
-}
-
-void
-fini (xlator_t *this)
-{
- int i = 0;
- dht_conf_t *conf = NULL;
-
- GF_VALIDATE_OR_GOTO ("dht", this, out);
-
- conf = this->private;
- this->private = NULL;
- if (conf) {
- if (conf->file_layouts) {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- GF_FREE (conf->file_layouts[i]);
- }
- GF_FREE (conf->file_layouts);
- }
-
- if (conf->subvolumes)
- GF_FREE (conf->subvolumes);
-
- if (conf->subvolume_status)
- GF_FREE (conf->subvolume_status);
-
- GF_FREE (conf);
- }
-out:
- return;
-}
-
-int32_t
-mem_acct_init (xlator_t *this)
-{
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO ("dht", this, out);
-
- ret = xlator_mem_acct_init (this, gf_dht_mt_end + 1);
-
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
- "failed");
- return ret;
- }
-out:
- return ret;
-}
-
-
-int
-dht_parse_decommissioned_bricks (xlator_t *this, dht_conf_t *conf,
- const char *bricks)
-{
- int i = 0;
- int ret = -1;
- char *tmpstr = NULL;
- char *dup_brick = NULL;
- char *node = NULL;
-
- if (!conf || !bricks)
- goto out;
-
- dup_brick = gf_strdup (bricks);
- node = strtok_r (dup_brick, ",", &tmpstr);
- while (node) {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (!strcmp (conf->subvolumes[i]->name, node)) {
- conf->decommissioned_bricks[i] =
- conf->subvolumes[i];
- gf_log (this->name, GF_LOG_INFO,
- "decommissioning subvolume %s",
- conf->subvolumes[i]->name);
- break;
- }
- }
- if (i == conf->subvolume_cnt) {
- /* Wrong node given. */
- goto out;
- }
- node = strtok_r (NULL, ",", &tmpstr);
- }
-
- ret = 0;
- conf->decommission_in_progress = 1;
-out:
- if (dup_brick)
- GF_FREE (dup_brick);
-
- return ret;
-}
-
-int
-reconfigure (xlator_t *this, dict_t *options)
-{
- dht_conf_t *conf = NULL;
- char *temp_str = NULL;
- gf_boolean_t search_unhashed;
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO ("dht", this, out);
- GF_VALIDATE_OR_GOTO ("dht", options, out);
-
- conf = this->private;
- if (!conf)
- return 0;
-
- if (dict_get_str (options, "lookup-unhashed", &temp_str) == 0) {
- /* If option is not "auto", other options _should_ be boolean*/
- if (strcasecmp (temp_str, "auto")) {
- if (!gf_string2boolean (temp_str, &search_unhashed)) {
- gf_log(this->name, GF_LOG_DEBUG, "Reconfigure:"
- " lookup-unhashed reconfigured (%s)",
- temp_str);
- conf->search_unhashed = search_unhashed;
- } else {
- gf_log(this->name, GF_LOG_ERROR, "Reconfigure:"
- " lookup-unhashed should be boolean,"
- " not (%s), defaulting to (%d)",
- temp_str, conf->search_unhashed);
- //return -1;
- ret = -1;
- goto out;
- }
- } else {
- gf_log(this->name, GF_LOG_DEBUG, "Reconfigure:"
- " lookup-unhashed reconfigured auto ");
- conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_AUTO;
- }
- }
-
- GF_OPTION_RECONF ("min-free-disk", conf->min_free_disk, options,
- percent_or_size, out);
- /* option can be any one of percent or bytes */
- conf->disk_unit = 0;
- if (conf->min_free_disk < 100)
- conf->disk_unit = 'p';
-
- GF_OPTION_RECONF ("min-free-inodes", conf->min_free_inodes, options,
- percent, out);
- GF_OPTION_RECONF ("directory-layout-spread", conf->dir_spread_cnt,
- options, uint32, out);
- GF_OPTION_RECONF ("readdir-optimize", conf->readdir_optimize, options,
- bool, out);
- if (dict_get_str (options, "decommissioned-bricks", &temp_str) == 0) {
- ret = dht_parse_decommissioned_bricks (this, conf, temp_str);
- if (ret == -1)
- goto out;
- }
-
- ret = 0;
-out:
- return ret;
-}
-
-
-int
-init (xlator_t *this)
-{
- dht_conf_t *conf = NULL;
- char *temp_str = NULL;
- int ret = -1;
- int i = 0;
- gf_defrag_info_t *defrag = NULL;
- int cmd = 0;
- char *node_uuid = NULL;
-
-
- GF_VALIDATE_OR_GOTO ("dht", this, err);
-
- if (!this->children) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "Distribute needs more than one subvolume");
- return -1;
- }
-
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile");
- }
-
- conf = GF_CALLOC (1, sizeof (*conf), gf_dht_mt_dht_conf_t);
- if (!conf) {
- goto err;
- }
-
- ret = dict_get_int32 (this->options, "rebalance-cmd", &cmd);
-
- if (cmd) {
- defrag = GF_CALLOC (1, sizeof (gf_defrag_info_t),
- gf_defrag_info_mt);
-
- GF_VALIDATE_OR_GOTO (this->name, defrag, err);
-
- LOCK_INIT (&defrag->lock);
-
- defrag->is_exiting = 0;
-
- ret = dict_get_str (this->options, "node-uuid", &node_uuid);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "node-uuid not "
- "specified");
- goto err;
- }
-
- if (uuid_parse (node_uuid, defrag->node_uuid)) {
- gf_log (this->name, GF_LOG_ERROR, "Cannot parse "
- "glusterd node uuid");
- goto err;
- }
-
- defrag->cmd = cmd;
-
- conf->defrag = defrag;
- }
-
-
- conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_ON;
- if (dict_get_str (this->options, "lookup-unhashed", &temp_str) == 0) {
- /* If option is not "auto", other options _should_ be boolean */
- if (strcasecmp (temp_str, "auto"))
- gf_string2boolean (temp_str, &conf->search_unhashed);
- else
- conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_AUTO;
- }
-
- GF_OPTION_INIT ("unhashed-sticky-bit", conf->unhashed_sticky_bit, bool,
- err);
-
- GF_OPTION_INIT ("use-readdirp", conf->use_readdirp, bool, err);
-
- GF_OPTION_INIT ("min-free-disk", conf->min_free_disk, percent_or_size,
- err);
-
- GF_OPTION_INIT ("min-free-inodes", conf->min_free_inodes, percent,
- err);
-
- conf->dir_spread_cnt = conf->subvolume_cnt;
- GF_OPTION_INIT ("directory-layout-spread", conf->dir_spread_cnt,
- uint32, err);
-
- GF_OPTION_INIT ("assert-no-child-down", conf->assert_no_child_down,
- bool, err);
- GF_OPTION_INIT ("readdir-optimize", conf->readdir_optimize, bool, err);
-
- /* option can be any one of percent or bytes */
- conf->disk_unit = 0;
- if (conf->min_free_disk < 100)
- conf->disk_unit = 'p';
-
- ret = dht_init_subvolumes (this, conf);
- if (ret == -1) {
- goto err;
- }
-
- if (dict_get_str (this->options, "decommissioned-bricks", &temp_str) == 0) {
- ret = dht_parse_decommissioned_bricks (this, conf, temp_str);
- if (ret == -1)
- goto err;
- }
-
- ret = dht_layouts_init (this, conf);
- if (ret == -1) {
- goto err;
- }
-
- LOCK_INIT (&conf->subvolume_lock);
- LOCK_INIT (&conf->layout_lock);
-
- conf->gen = 1;
-
- this->local_pool = mem_pool_new (dht_local_t, 512);
- if (!this->local_pool) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to create local_t's memory pool");
- goto err;
- }
-
- this->private = conf;
-
- return 0;
-
-err:
- if (conf) {
- if (conf->file_layouts) {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- GF_FREE (conf->file_layouts[i]);
- }
- GF_FREE (conf->file_layouts);
- }
-
- if (conf->subvolumes)
- GF_FREE (conf->subvolumes);
-
- if (conf->subvolume_status)
- GF_FREE (conf->subvolume_status);
-
- if (conf->du_stats)
- GF_FREE (conf->du_stats);
-
- GF_FREE (conf);
- }
-
- return -1;
-}
-
+struct xlator_fops dht_pt_fops = {
+ /* we need to keep mkdir to make sure we
+ have layout on new directory */
+ .mkdir = dht_pt_mkdir,
+ .getxattr = dht_pt_getxattr,
+ .fgetxattr = dht_pt_fgetxattr,
+
+ /* required to trace fop properly in changelog */
+ .rename = dht_pt_rename,
+
+ /* FIXME: commenting the '.lookup()' below made some of
+ the failing tests to pass. I would remove the below
+ line, but keeping it here as a reminder for people
+ to check for issues if they find concerns with DHT
+ pass-through logic */
+ /*
+ .lookup = dht_lookup,
+ .readdir = dht_readdir,
+ .readdirp = dht_readdirp,
+ */
+ /* Keeping above as commented, mainly to support the
+ usecase of a gluster volume getting to 1x(anytype),
+ due to remove-brick (shrinking) exercise. In that case,
+ we would need above fops to be available, so we can
+ handle the case of dangling linkto files (if any) */
+};
struct xlator_fops fops = {
- .lookup = dht_lookup,
- .mknod = dht_mknod,
- .create = dht_create,
-
- .open = dht_open,
- .statfs = dht_statfs,
- .opendir = dht_opendir,
- .readdir = dht_readdir,
- .readdirp = dht_readdirp,
- .fsyncdir = dht_fsyncdir,
- .symlink = dht_symlink,
- .unlink = dht_unlink,
- .link = dht_link,
- .mkdir = dht_mkdir,
- .rmdir = dht_rmdir,
- .rename = dht_rename,
- .entrylk = dht_entrylk,
- .fentrylk = dht_fentrylk,
-
- /* Inode read operations */
- .stat = dht_stat,
- .fstat = dht_fstat,
- .access = dht_access,
- .readlink = dht_readlink,
- .getxattr = dht_getxattr,
- .fgetxattr = dht_fgetxattr,
- .readv = dht_readv,
- .flush = dht_flush,
- .fsync = dht_fsync,
- .inodelk = dht_inodelk,
- .finodelk = dht_finodelk,
- .lk = dht_lk,
-
- /* Inode write operations */
- .fremovexattr = dht_fremovexattr,
- .removexattr = dht_removexattr,
- .setxattr = dht_setxattr,
- .fsetxattr = dht_fsetxattr,
- .truncate = dht_truncate,
- .ftruncate = dht_ftruncate,
- .writev = dht_writev,
- .xattrop = dht_xattrop,
- .fxattrop = dht_fxattrop,
- .setattr = dht_setattr,
- .fsetattr = dht_fsetattr,
+ .ipc = dht_ipc,
+ .lookup = dht_lookup,
+ .mknod = dht_mknod,
+ .create = dht_create,
+
+ .open = dht_open,
+ .statfs = dht_statfs,
+ .opendir = dht_opendir,
+ .readdir = dht_readdir,
+ .readdirp = dht_readdirp,
+ .fsyncdir = dht_fsyncdir,
+ .symlink = dht_symlink,
+ .unlink = dht_unlink,
+ .link = dht_link,
+ .mkdir = dht_mkdir,
+ .rmdir = dht_rmdir,
+ .rename = dht_rename,
+ .entrylk = dht_entrylk,
+ .fentrylk = dht_fentrylk,
+
+ /* Inode read operations */
+ .stat = dht_stat,
+ .fstat = dht_fstat,
+ .access = dht_access,
+ .readlink = dht_readlink,
+ .getxattr = dht_getxattr,
+ .fgetxattr = dht_fgetxattr,
+ .readv = dht_readv,
+ .flush = dht_flush,
+ .fsync = dht_fsync,
+ .inodelk = dht_inodelk,
+ .finodelk = dht_finodelk,
+ .lk = dht_lk,
+ .lease = dht_lease,
+
+ /* Inode write operations */
+ .fremovexattr = dht_fremovexattr,
+ .removexattr = dht_removexattr,
+ .setxattr = dht_setxattr,
+ .fsetxattr = dht_fsetxattr,
+ .truncate = dht_truncate,
+ .ftruncate = dht_ftruncate,
+ .writev = dht_writev,
+ .xattrop = dht_xattrop,
+ .fxattrop = dht_fxattrop,
+ .setattr = dht_setattr,
+ .fsetattr = dht_fsetattr,
+ .fallocate = dht_fallocate,
+ .discard = dht_discard,
+ .zerofill = dht_zerofill,
};
struct xlator_dumpops dumpops = {
- .priv = dht_priv_dump,
- .inodectx = dht_inodectx_dump,
+ .priv = dht_priv_dump,
+ .inodectx = dht_inodectx_dump,
};
-
struct xlator_cbks cbks = {
-// .release = dht_release,
-// .releasedir = dht_releasedir,
- .forget = dht_forget
+ .release = dht_release,
+ // .releasedir = dht_releasedir,
+ .forget = dht_forget,
};
-
-struct volume_options options[] = {
- { .key = {"lookup-unhashed"},
- .value = {"auto", "yes", "no", "enable", "disable", "1", "0",
- "on", "off"},
- .type = GF_OPTION_TYPE_STR,
- .default_value = "on",
- },
- { .key = {"min-free-disk"},
- .type = GF_OPTION_TYPE_PERCENT_OR_SIZET,
- .default_value = "10%",
- .description = "Percentage/Size of disk space that must be "
- "kept free."
- },
- { .key = {"min-free-inodes"},
- .type = GF_OPTION_TYPE_PERCENT,
- .default_value = "5%",
- .description = "Percentage inodes that must be "
- "kept free."
- },
- { .key = {"unhashed-sticky-bit"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- },
- { .key = {"use-readdirp"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "on",
- },
- { .key = {"assert-no-child-down"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- },
- { .key = {"directory-layout-spread"},
- .type = GF_OPTION_TYPE_INT,
- },
- { .key = {"decommissioned-bricks"},
- .type = GF_OPTION_TYPE_ANY,
- },
- { .key = {"rebalance-cmd"},
- .type = GF_OPTION_TYPE_INT,
- },
- { .key = {"node-uuid"},
- .type = GF_OPTION_TYPE_STR,
- },
- { .key = {"readdir-optimize"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- },
-
- { .key = {NULL} },
+extern int32_t
+mem_acct_init(xlator_t *this);
+
+extern struct volume_options dht_options[];
+
+xlator_api_t xlator_api = {
+ .init = dht_init,
+ .fini = dht_fini,
+ .notify = dht_notify,
+ .reconfigure = dht_reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = dht_options,
+ .identifier = "distribute",
+ .pass_through_fops = &dht_pt_fops,
+ .category = GF_MAINTAINED,
};
diff --git a/xlators/cluster/dht/src/nufa.c b/xlators/cluster/dht/src/nufa.c
index 1d3e3224763..3648a564840 100644
--- a/xlators/cluster/dht/src/nufa.c
+++ b/xlators/cluster/dht/src/nufa.c
@@ -8,701 +8,650 @@
cases as published by the Free Software Foundation.
*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "dht-common.h"
/* TODO: all 'TODO's in dht.c holds good */
+extern struct volume_options dht_options[];
+
int
-nufa_local_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- inode_t *inode, struct iatt *stbuf, dict_t *xattr,
- struct iatt *postparent)
+nufa_local_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent)
{
- xlator_t *subvol = NULL;
- char is_linkfile = 0;
- char is_dir = 0;
- dht_conf_t *conf = NULL;
- dht_local_t *local = NULL;
- loc_t *loc = NULL;
- int i = 0;
- call_frame_t *prev = NULL;
- int call_cnt = 0;
- int ret = 0;
-
- conf = this->private;
-
- prev = cookie;
- local = frame->local;
- loc = &local->loc;
-
- if (ENTRY_MISSING (op_ret, op_errno)) {
- if (conf->search_unhashed) {
- local->op_errno = ENOENT;
- dht_lookup_everywhere (frame, this, loc);
- return 0;
- }
+ xlator_t *subvol = NULL;
+ char is_linkfile = 0;
+ char is_dir = 0;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ loc_t *loc = NULL;
+ int i = 0;
+ xlator_t *prev = NULL;
+ int call_cnt = 0;
+ int ret = 0;
+
+ conf = this->private;
+
+ prev = cookie;
+ local = frame->local;
+ loc = &local->loc;
+
+ if (ENTRY_MISSING(op_ret, op_errno)) {
+ if (conf->search_unhashed) {
+ local->op_errno = ENOENT;
+ dht_lookup_everywhere(frame, this, loc);
+ return 0;
+ }
+ }
+
+ if (op_ret == -1)
+ goto out;
+
+ is_linkfile = check_is_linkfile(inode, stbuf, xattr, conf->link_xattr_name);
+ is_dir = check_is_dir(inode, stbuf, xattr);
+
+ if (!is_dir && !is_linkfile) {
+ /* non-directory and not a linkfile */
+ ret = dht_layout_preset(this, prev, inode);
+ if (ret < 0) {
+ gf_msg_debug(this->name, 0,
+ "could not set pre-set layout for subvol"
+ " %s",
+ prev->name);
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto err;
}
- if (op_ret == -1)
- goto out;
-
- is_linkfile = check_is_linkfile (inode, stbuf, xattr);
- is_dir = check_is_dir (inode, stbuf, xattr);
-
- if (!is_dir && !is_linkfile) {
- /* non-directory and not a linkfile */
- ret = dht_layout_preset (this, prev->this, inode);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "could not set pre-set layout for subvol %s",
- prev->this->name);
- op_ret = -1;
- op_errno = EINVAL;
- goto err;
- }
-
- goto out;
- }
+ goto out;
+ }
- if (is_dir) {
- call_cnt = conf->subvolume_cnt;
- local->call_cnt = call_cnt;
-
- local->inode = inode_ref (inode);
- local->xattr = dict_ref (xattr);
-
- local->op_ret = 0;
- local->op_errno = 0;
-
- local->layout = dht_layout_new (this, conf->subvolume_cnt);
- if (!local->layout) {
- op_ret = -1;
- op_errno = ENOMEM;
- goto err;
- }
-
- for (i = 0; i < call_cnt; i++) {
- STACK_WIND (frame, dht_lookup_dir_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->lookup,
- &local->loc, local->xattr_req);
- }
- }
+ if (is_dir) {
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
- if (is_linkfile) {
- subvol = dht_linkfile_subvol (this, inode, stbuf, xattr);
+ local->inode = inode_ref(inode);
+ local->xattr = dict_ref(xattr);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "linkfile not having link subvolume. path=%s",
- loc->path);
- dht_lookup_everywhere (frame, this, loc);
- return 0;
- }
+ local->op_ret = 0;
+ local->op_errno = 0;
- STACK_WIND (frame, dht_lookup_linkfile_cbk,
- subvol, subvol->fops->lookup,
- &local->loc, local->xattr_req);
+ local->layout = dht_layout_new(this, conf->subvolume_cnt);
+ if (!local->layout) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto err;
}
- return 0;
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND_COOKIE(frame, dht_lookup_dir_cbk, conf->subvolumes[i],
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup, &local->loc,
+ local->xattr_req);
+ }
+ }
-out:
- if (!local->hashed_subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s",
- local->loc.path);
- local->op_errno = ENOENT;
- dht_lookup_everywhere (frame, this, loc);
- return 0;
+ if (is_linkfile) {
+ subvol = dht_linkfile_subvol(this, inode, stbuf, xattr);
+
+ if (!subvol) {
+ gf_msg_debug(this->name, 0,
+ "linkfile has no link subvolume. path=%s", loc->path);
+ dht_lookup_everywhere(frame, this, loc);
+ return 0;
}
- STACK_WIND (frame, dht_lookup_cbk,
- local->hashed_subvol, local->hashed_subvol->fops->lookup,
- &local->loc, local->xattr_req);
+ STACK_WIND_COOKIE(frame, dht_lookup_linkfile_cbk, subvol, subvol,
+ subvol->fops->lookup, &local->loc, local->xattr_req);
+ }
+
+ return 0;
+out:
+ if (!local->hashed_subvol) {
+ gf_msg_debug(this->name, 0, "no subvolume in layout for path=%s",
+ local->loc.path);
+ local->op_errno = ENOENT;
+ dht_lookup_everywhere(frame, this, loc);
return 0;
+ }
+
+ STACK_WIND_COOKIE(frame, dht_lookup_cbk, local->hashed_subvol,
+ local->hashed_subvol, local->hashed_subvol->fops->lookup,
+ &local->loc, local->xattr_req);
+
+ return 0;
err:
- DHT_STACK_UNWIND (lookup, frame, op_ret, op_errno,
- inode, stbuf, xattr, postparent);
- return 0;
+ DHT_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, stbuf, xattr,
+ postparent);
+ return 0;
}
int
-nufa_lookup (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xattr_req)
+nufa_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
{
- xlator_t *hashed_subvol = NULL;
- xlator_t *subvol = NULL;
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- int ret = -1;
- int op_errno = -1;
- dht_layout_t *layout = NULL;
- int i = 0;
- int call_cnt = 0;
-
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- conf = this->private;
-
- local = dht_local_init (frame, loc, NULL, GF_FOP_LOOKUP);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
+ xlator_t *hashed_subvol = NULL;
+ xlator_t *subvol = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+ int op_errno = -1;
+ dht_layout_t *layout = NULL;
+ int i = 0;
+ int call_cnt = 0;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+ VALIDATE_OR_GOTO(loc->inode, err);
+ VALIDATE_OR_GOTO(loc->path, err);
+
+ conf = this->private;
+
+ local = dht_local_init(frame, loc, NULL, GF_FOP_LOOKUP);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ if (xattr_req) {
+ local->xattr_req = dict_ref(xattr_req);
+ } else {
+ local->xattr_req = dict_new();
+ }
+
+ hashed_subvol = dht_subvol_get_hashed(this, &local->loc);
+
+ local->hashed_subvol = hashed_subvol;
+
+ if (is_revalidate(loc)) {
+ layout = local->layout;
+ if (!layout) {
+ gf_msg_debug(this->name, 0,
+ "revalidate lookup without cache. "
+ "path=%s",
+ loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (layout->gen && (layout->gen < conf->gen)) {
+ gf_msg_debug(this->name, 0, "incomplete layout failure for path=%s",
+ loc->path);
+ dht_layout_unref(this, local->layout);
+ goto do_fresh_lookup;
+ }
+
+ local->inode = inode_ref(loc->inode);
+
+ local->call_cnt = layout->cnt;
+ call_cnt = local->call_cnt;
+
+ /* NOTE: we don't require 'trusted.glusterfs.dht.linkto' attribute,
+ * revalidates directly go to the cached-subvolume.
+ */
+ ret = dict_set_uint32(local->xattr_req, conf->xattr_name, 4 * 4);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dict value.");
+ op_errno = -1;
+ goto err;
}
- if (xattr_req) {
- local->xattr_req = dict_ref (xattr_req);
- } else {
- local->xattr_req = dict_new ();
+ for (i = 0; i < layout->cnt; i++) {
+ subvol = layout->list[i].xlator;
+
+ STACK_WIND_COOKIE(frame, dht_revalidate_cbk, subvol, subvol,
+ subvol->fops->lookup, loc, local->xattr_req);
+
+ if (!--call_cnt)
+ break;
+ }
+ } else {
+ do_fresh_lookup:
+ ret = dict_set_uint32(local->xattr_req, conf->xattr_name, 4 * 4);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dict value.");
+ op_errno = -1;
+ goto err;
}
- hashed_subvol = dht_subvol_get_hashed (this, &local->loc);
-
- local->hashed_subvol = hashed_subvol;
-
- if (is_revalidate (loc)) {
- layout = local->layout;
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "revalidate without cache. path=%s",
- loc->path);
- op_errno = EINVAL;
- goto err;
- }
-
- if (layout->gen && (layout->gen < conf->gen)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "incomplete layout failure for path=%s",
- loc->path);
- dht_layout_unref (this, local->layout);
- goto do_fresh_lookup;
- }
-
- local->inode = inode_ref (loc->inode);
-
- local->call_cnt = layout->cnt;
- call_cnt = local->call_cnt;
-
- /* NOTE: we don't require 'trusted.glusterfs.dht.linkto' attribute,
- * revalidates directly go to the cached-subvolume.
- */
- ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht", 4 * 4);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "Failed to set dict value.");
- op_errno = -1;
- goto err;
- }
-
- for (i = 0; i < layout->cnt; i++) {
- subvol = layout->list[i].xlator;
-
- STACK_WIND (frame, dht_revalidate_cbk,
- subvol, subvol->fops->lookup,
- loc, local->xattr_req);
-
- if (!--call_cnt)
- break;
- }
- } else {
- do_fresh_lookup:
- ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht", 4 * 4);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "Failed to set dict value.");
- op_errno = -1;
- goto err;
- }
-
- ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht.linkto", 256);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "Failed to set dict value.");
- op_errno = -1;
- goto err;
- }
-
- /* Send it to only local volume */
- STACK_WIND (frame, nufa_local_lookup_cbk,
- (xlator_t *)conf->private,
- ((xlator_t *)conf->private)->fops->lookup,
- loc, local->xattr_req);
+ ret = dict_set_uint32(local->xattr_req, conf->link_xattr_name, 256);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dict value.");
+ op_errno = -1;
+ goto err;
}
- return 0;
+ /* Send it to only local volume */
+ STACK_WIND_COOKIE(
+ frame, nufa_local_lookup_cbk, ((xlator_t *)conf->private),
+ ((xlator_t *)conf->private),
+ ((xlator_t *)conf->private)->fops->lookup, loc, local->xattr_req);
+ }
+
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL,
- NULL);
- return 0;
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ return 0;
}
int
-nufa_create_linkfile_create_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int op_ret, int op_errno,
- inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+nufa_create_linkfile_create_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
- local = frame->local;
+ local = frame->local;
- if (op_ret == -1)
- goto err;
+ if (op_ret == -1)
+ goto err;
- STACK_WIND (frame, dht_create_cbk,
- local->cached_subvol, local->cached_subvol->fops->create,
- &local->loc, local->flags, local->mode, local->umask,
- local->fd, local->params);
+ STACK_WIND_COOKIE(frame, dht_create_cbk, local->cached_subvol,
+ local->cached_subvol, local->cached_subvol->fops->create,
+ &local->loc, local->flags, local->mode, local->umask,
+ local->fd, local->params);
- return 0;
+ return 0;
err:
- DHT_STACK_UNWIND (create, frame, -1, op_errno,
- NULL, NULL, NULL, NULL, NULL, NULL);
- return 0;
+ DHT_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
}
int
-nufa_create (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, mode_t mode,
- mode_t umask, fd_t *fd, dict_t *params)
+nufa_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *params)
{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- xlator_t *subvol = NULL;
- xlator_t *avail_subvol = NULL;
- int op_errno = -1;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ xlator_t *subvol = NULL;
+ xlator_t *avail_subvol = NULL;
+ int op_errno = -1;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+
+ conf = this->private;
+
+ dht_get_du_info(frame, this, loc);
+
+ local = dht_local_init(frame, loc, fd, GF_FOP_CREATE);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = dht_subvol_get_hashed(this, loc);
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no subvolume in layout for path=%s",
+ loc->path);
+ op_errno = ENOENT;
+ goto err;
+ }
+
+ avail_subvol = conf->private;
+ if (dht_is_subvol_filled(this, (xlator_t *)conf->private)) {
+ avail_subvol = dht_free_disk_available_subvol(
+ this, (xlator_t *)conf->private, local);
+ }
+
+ if (subvol != avail_subvol) {
+ /* create a link file instead of actual file */
+ local->params = dict_ref(params);
+ local->mode = mode;
+ local->flags = flags;
+ local->umask = umask;
+ local->cached_subvol = avail_subvol;
+ dht_linkfile_create(frame, nufa_create_linkfile_create_cbk, this,
+ avail_subvol, subvol, loc);
+ return 0;
+ }
- conf = this->private;
+ gf_msg_trace(this->name, 0, "creating %s on %s", loc->path, subvol->name);
- dht_get_du_info (frame, this, loc);
+ STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol,
+ subvol->fops->create, loc, flags, mode, umask, fd,
+ params);
- local = dht_local_init (frame, loc, fd, GF_FOP_CREATE);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ return 0;
- subvol = dht_subvol_get_hashed (this, loc);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s",
- loc->path);
- op_errno = ENOENT;
- goto err;
- }
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL,
+ NULL);
- avail_subvol = conf->private;
- if (dht_is_subvol_filled (this, (xlator_t *)conf->private)) {
- avail_subvol =
- dht_free_disk_available_subvol (this,
- (xlator_t *)conf->private);
- }
+ return 0;
+}
- if (subvol != avail_subvol) {
- /* create a link file instead of actual file */
- local->params = dict_ref (params);
- local->mode = mode;
- local->flags = flags;
- local->umask = umask;
- local->cached_subvol = avail_subvol;
- dht_linkfile_create (frame,
- nufa_create_linkfile_create_cbk,
- avail_subvol, subvol, loc);
- return 0;
- }
+int
+nufa_mknod_linkfile_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
- gf_log (this->name, GF_LOG_TRACE,
- "creating %s on %s", loc->path, subvol->name);
+ local = frame->local;
+ if (!local || !local->cached_subvol) {
+ op_errno = EINVAL;
+ op_ret = -1;
+ goto err;
+ }
- STACK_WIND (frame, dht_create_cbk,
- subvol, subvol->fops->create,
- loc, flags, mode, umask, fd, params);
+ if (op_ret >= 0) {
+ STACK_WIND_COOKIE(
+ frame, dht_newfile_cbk, (void *)local->cached_subvol,
+ local->cached_subvol, local->cached_subvol->fops->mknod,
+ &local->loc, local->mode, local->rdev, local->umask, local->params);
return 0;
-
+ }
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (create, frame, -1, op_errno,
- NULL, NULL, NULL, NULL, NULL, NULL);
+ WIPE(postparent);
+ WIPE(preparent);
- return 0;
+ DHT_STACK_UNWIND(link, frame, op_ret, op_errno, inode, stbuf, preparent,
+ postparent, xdata);
+ return 0;
}
int
-nufa_mknod_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, inode_t *inode,
- struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+nufa_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *params)
{
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ xlator_t *subvol = NULL;
+ xlator_t *avail_subvol = NULL;
+ int op_errno = -1;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+
+ conf = this->private;
+
+ dht_get_du_info(frame, this, loc);
+
+ local = dht_local_init(frame, loc, NULL, GF_FOP_MKNOD);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = dht_subvol_get_hashed(this, loc);
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no subvolume in layout for path=%s",
+ loc->path);
+ op_errno = ENOENT;
+ goto err;
+ }
+
+ /* Consider the disksize in consideration */
+ avail_subvol = conf->private;
+ if (dht_is_subvol_filled(this, (xlator_t *)conf->private)) {
+ avail_subvol = dht_free_disk_available_subvol(
+ this, (xlator_t *)conf->private, local);
+ }
+
+ if (avail_subvol != subvol) {
+ /* Create linkfile first */
+
+ local->params = dict_ref(params);
+ local->mode = mode;
+ local->umask = umask;
+ local->rdev = rdev;
+ local->cached_subvol = avail_subvol;
+
+ dht_linkfile_create(frame, nufa_mknod_linkfile_cbk, this, avail_subvol,
+ subvol, loc);
+ return 0;
+ }
- local = frame->local;
+ gf_msg_trace(this->name, 0, "creating %s on %s", loc->path, subvol->name);
- if (op_ret >= 0) {
- STACK_WIND (frame, dht_newfile_cbk,
- local->cached_subvol,
- local->cached_subvol->fops->mknod,
- &local->loc, local->mode, local->rdev,
- local->umask, local->params);
+ STACK_WIND_COOKIE(frame, dht_newfile_cbk, (void *)subvol, subvol,
+ subvol->fops->mknod, loc, mode, rdev, umask, params);
- return 0;
- }
+ return 0;
- WIPE (postparent);
- WIPE (preparent);
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
- DHT_STACK_UNWIND (link, frame, op_ret, op_errno,
- inode, stbuf, preparent, postparent, xdata);
- return 0;
+ return 0;
}
-
-int
-nufa_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t rdev, mode_t umask, dict_t *params)
+gf_boolean_t
+same_first_part(char *str1, char term1, char *str2, char term2)
{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- xlator_t *subvol = NULL;
- xlator_t *avail_subvol = NULL;
- int op_errno = -1;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
+ gf_boolean_t ended1;
+ gf_boolean_t ended2;
- conf = this->private;
-
- dht_get_du_info (frame, this, loc);
-
- local = dht_local_init (frame, loc, NULL, GF_FOP_MKNOD);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- subvol = dht_subvol_get_hashed (this, loc);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s",
- loc->path);
- op_errno = ENOENT;
- goto err;
+ for (;;) {
+ ended1 = ((*str1 == '\0') || (*str1 == term1));
+ ended2 = ((*str2 == '\0') || (*str2 == term2));
+ if (ended1 && ended2) {
+ return _gf_true;
}
-
- /* Consider the disksize in consideration */
- avail_subvol = conf->private;
- if (dht_is_subvol_filled (this, (xlator_t *)conf->private)) {
- avail_subvol =
- dht_free_disk_available_subvol (this,
- (xlator_t *)conf->private);
- }
-
- if (avail_subvol != subvol) {
- /* Create linkfile first */
-
- local->params = dict_ref (params);
- local->mode = mode;
- local->umask = umask;
- local->rdev = rdev;
- local->cached_subvol = avail_subvol;
-
- dht_linkfile_create (frame, nufa_mknod_linkfile_cbk,
- avail_subvol, subvol, loc);
- return 0;
+ if (ended1 || ended2 || (*str1 != *str2)) {
+ return _gf_false;
}
+ ++str1;
+ ++str2;
+ }
+}
- gf_log (this->name, GF_LOG_TRACE,
- "creating %s on %s", loc->path, subvol->name);
+typedef struct nufa_args {
+ xlator_t *this;
+ char *volname;
+ gf_boolean_t addr_match;
+} nufa_args_t;
- STACK_WIND (frame, dht_newfile_cbk,
- subvol, subvol->fops->mknod,
- loc, mode, rdev, umask, params);
+static void
+nufa_find_local_brick(xlator_t *xl, void *data)
+{
+ nufa_args_t *args = data;
+ xlator_t *this = args->this;
+ char *local_volname = args->volname;
+ gf_boolean_t addr_match = args->addr_match;
+ char *brick_host = NULL;
+ dht_conf_t *conf = this->private;
+ int ret = -1;
+
+ /*This means a local subvol was already found. We pick the first brick
+ * that is local*/
+ if (conf->private)
+ return;
- return 0;
+ if (strcmp(xl->name, local_volname) == 0) {
+ conf->private = xl;
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_INFO,
+ "Using specified subvol %s", local_volname);
+ return;
+ }
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (mknod, frame, -1, op_errno,
- NULL, NULL, NULL, NULL, NULL);
+ if (!addr_match)
+ return;
- return 0;
+ ret = dict_get_str(xl->options, "remote-host", &brick_host);
+ if ((ret == 0) && (gf_is_same_address(local_volname, brick_host) ||
+ gf_is_local_addr(brick_host))) {
+ conf->private = xl;
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_INFO,
+ "Using the first local "
+ "subvol %s",
+ xl->name);
+ return;
+ }
}
-
-int
-notify (xlator_t *this, int event, void *data, ...)
+static void
+nufa_to_dht(xlator_t *this)
{
- int ret = -1;
+ GF_ASSERT(this);
+ GF_ASSERT(this->fops);
- ret = dht_notify (this, event, data);
-
- return ret;
+ this->fops->lookup = dht_lookup;
+ this->fops->create = dht_create;
+ this->fops->mknod = dht_mknod;
}
-void
-fini (xlator_t *this)
+int
+nufa_find_local_subvol(xlator_t *this, void (*fn)(xlator_t *each, void *data),
+ void *data)
{
- int i = 0;
- dht_conf_t *conf = NULL;
-
- conf = this->private;
-
- if (conf) {
- if (conf->file_layouts) {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- GF_FREE (conf->file_layouts[i]);
- }
- GF_FREE (conf->file_layouts);
- }
-
- if (conf->subvolumes)
- GF_FREE (conf->subvolumes);
-
- if (conf->subvolume_status)
- GF_FREE (conf->subvolume_status);
-
- GF_FREE (conf);
- }
-
- return;
+ int ret = -1;
+ dht_conf_t *conf = this->private;
+ xlator_list_t *trav = NULL;
+ xlator_t *parent = NULL;
+ xlator_t *candidate = NULL;
+
+ xlator_foreach_depth_first(this, fn, data);
+ if (!conf->private) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_BRICK_ERROR,
+ "Couldn't find a local "
+ "brick");
+ return -1;
+ }
+
+ candidate = conf->private;
+ trav = candidate->parents;
+ while (trav) {
+ parent = trav->xlator;
+ if (strcmp(parent->type, "cluster/nufa") == 0) {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_INFO,
+ "Found local subvol, "
+ "%s",
+ candidate->name);
+ ret = 0;
+ conf->private = candidate;
+ break;
+ }
+
+ candidate = parent;
+ trav = parent->parents;
+ }
+
+ return ret;
}
int
-init (xlator_t *this)
+nufa_init(xlator_t *this)
{
- dht_conf_t *conf = NULL;
- xlator_list_t *trav = NULL;
- data_t *data = NULL;
- char *local_volname = NULL;
- char *temp_str = NULL;
- int ret = -1;
- int i = 0;
- char my_hostname[256];
- double temp_free_disk = 0;
- uint64_t size = 0;
-
- if (!this->children) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "NUFA needs more than one subvolume");
- return -1;
- }
-
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile");
- }
-
- conf = GF_CALLOC (1, sizeof (*conf),
- gf_dht_mt_dht_conf_t);
- if (!conf) {
- goto err;
- }
-
- conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_ON;
- if (dict_get_str (this->options, "lookup-unhashed", &temp_str) == 0) {
- /* If option is not "auto", other options _should_ be boolean */
- if (strcasecmp (temp_str, "auto"))
- gf_string2boolean (temp_str, &conf->search_unhashed);
- else
- conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_AUTO;
- }
-
- ret = dht_init_subvolumes (this, conf);
- if (ret == -1) {
- goto err;
- }
-
- ret = dht_layouts_init (this, conf);
- if (ret == -1) {
- goto err;
- }
-
- LOCK_INIT (&conf->subvolume_lock);
- LOCK_INIT (&conf->layout_lock);
+ data_t *data = NULL;
+ char *local_volname = NULL;
+ int ret = -1;
+ char my_hostname[256];
+ gf_boolean_t addr_match = _gf_false;
+ nufa_args_t args = {
+ 0,
+ };
+
+ ret = dht_init(this);
+ if (ret) {
+ return ret;
+ }
- conf->gen = 1;
+ if ((data = dict_get(this->options, "local-volume-name"))) {
+ local_volname = data->data;
+ } else {
+ addr_match = _gf_true;
local_volname = "localhost";
- ret = gethostname (my_hostname, 256);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "could not find hostname (%s)",
- strerror (errno));
- }
-
+ ret = gethostname(my_hostname, 256);
if (ret == 0)
- local_volname = my_hostname;
-
- data = dict_get (this->options, "local-volume-name");
- if (data) {
- local_volname = data->data;
- }
-
- trav = this->children;
- while (trav) {
- if (strcmp (trav->xlator->name, local_volname) == 0)
- break;
- trav = trav->next;
- }
-
- if (!trav) {
- gf_log (this->name, GF_LOG_ERROR,
- "Could not find subvolume named '%s'. "
- "Please define volume with the name as the hostname "
- "or override it with 'option local-volume-name'",
- local_volname);
- goto err;
- }
- /* The volume specified exists */
- conf->private = trav->xlator;
-
- conf->min_free_disk = 10;
- conf->disk_unit = 'p';
-
- if (dict_get_str (this->options, "min-free-disk",
- &temp_str) == 0) {
- if (gf_string2percent (temp_str,
- &temp_free_disk) == 0) {
- if (temp_free_disk > 100) {
- gf_string2bytesize (temp_str, &size);
- conf->min_free_disk = size;
- conf->disk_unit = 'b';
- } else {
- conf->min_free_disk = temp_free_disk;
- conf->disk_unit = 'p';
- }
- } else {
- gf_string2bytesize (temp_str, &size);
- conf->min_free_disk = size;
- conf->disk_unit = 'b';
- }
- }
-
- conf->du_stats = GF_CALLOC (conf->subvolume_cnt, sizeof (dht_du_t),
- gf_dht_mt_dht_du_t);
- if (!conf->du_stats) {
- goto err;
- }
-
- this->local_pool = mem_pool_new (dht_local_t, 128);
- if (!this->local_pool) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to create local_t's memory pool");
- goto err;
- }
-
- this->private = conf;
-
- return 0;
-
-err:
- if (conf) {
- if (conf->file_layouts) {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- GF_FREE (conf->file_layouts[i]);
- }
- GF_FREE (conf->file_layouts);
- }
-
- if (conf->subvolumes)
- GF_FREE (conf->subvolumes);
-
- if (conf->subvolume_status)
- GF_FREE (conf->subvolume_status);
-
- if (conf->du_stats)
- GF_FREE (conf->du_stats);
-
- GF_FREE (conf);
- }
-
- return -1;
+ local_volname = my_hostname;
+
+ else
+ gf_msg(this->name, GF_LOG_WARNING, errno,
+ DHT_MSG_GET_HOSTNAME_FAILED, "could not find hostname");
+ }
+
+ args.this = this;
+ args.volname = local_volname;
+ args.addr_match = addr_match;
+ ret = nufa_find_local_subvol(this, nufa_find_local_brick, &args);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_INFO,
+ "Unable to find local subvolume, switching "
+ "to dht mode");
+ nufa_to_dht(this);
+ }
+ return 0;
}
-
-struct xlator_fops fops = {
- .lookup = nufa_lookup,
- .create = nufa_create,
- .mknod = nufa_mknod,
-
- .stat = dht_stat,
- .fstat = dht_fstat,
- .truncate = dht_truncate,
- .ftruncate = dht_ftruncate,
- .access = dht_access,
- .readlink = dht_readlink,
- .setxattr = dht_setxattr,
- .getxattr = dht_getxattr,
- .removexattr = dht_removexattr,
- .open = dht_open,
- .readv = dht_readv,
- .writev = dht_writev,
- .flush = dht_flush,
- .fsync = dht_fsync,
- .statfs = dht_statfs,
- .lk = dht_lk,
- .opendir = dht_opendir,
- .readdir = dht_readdir,
- .readdirp = dht_readdirp,
- .fsyncdir = dht_fsyncdir,
- .symlink = dht_symlink,
- .unlink = dht_unlink,
- .link = dht_link,
- .mkdir = dht_mkdir,
- .rmdir = dht_rmdir,
- .rename = dht_rename,
- .inodelk = dht_inodelk,
- .finodelk = dht_finodelk,
- .entrylk = dht_entrylk,
- .fentrylk = dht_fentrylk,
- .xattrop = dht_xattrop,
- .fxattrop = dht_fxattrop,
- .setattr = dht_setattr,
+dht_methods_t dht_methods = {
+ .migration_get_dst_subvol = dht_migration_get_dst_subvol,
+ .layout_search = dht_layout_search,
};
-
-struct xlator_cbks cbks = {
- .forget = dht_forget
+struct xlator_fops fops = {
+ .lookup = nufa_lookup,
+ .create = nufa_create,
+ .mknod = nufa_mknod,
+
+ .stat = dht_stat,
+ .fstat = dht_fstat,
+ .truncate = dht_truncate,
+ .ftruncate = dht_ftruncate,
+ .access = dht_access,
+ .readlink = dht_readlink,
+ .setxattr = dht_setxattr,
+ .getxattr = dht_getxattr,
+ .removexattr = dht_removexattr,
+ .open = dht_open,
+ .readv = dht_readv,
+ .writev = dht_writev,
+ .flush = dht_flush,
+ .fsync = dht_fsync,
+ .statfs = dht_statfs,
+ .lk = dht_lk,
+ .opendir = dht_opendir,
+ .readdir = dht_readdir,
+ .readdirp = dht_readdirp,
+ .fsyncdir = dht_fsyncdir,
+ .symlink = dht_symlink,
+ .unlink = dht_unlink,
+ .link = dht_link,
+ .mkdir = dht_mkdir,
+ .rmdir = dht_rmdir,
+ .rename = dht_rename,
+ .inodelk = dht_inodelk,
+ .finodelk = dht_finodelk,
+ .entrylk = dht_entrylk,
+ .fentrylk = dht_fentrylk,
+ .xattrop = dht_xattrop,
+ .fxattrop = dht_fxattrop,
+ .setattr = dht_setattr,
};
-
-struct volume_options options[] = {
- { .key = {"lookup-unhashed"},
- .value = {"auto", "yes", "no", "enable", "disable", "1", "0",
- "on", "off"},
- .type = GF_OPTION_TYPE_STR
- },
- { .key = {"local-volume-name"},
- .type = GF_OPTION_TYPE_XLATOR
- },
- { .key = {"min-free-disk"},
- .type = GF_OPTION_TYPE_PERCENT_OR_SIZET,
- },
- { .key = {NULL} },
+struct xlator_cbks cbks = {.forget = dht_forget};
+extern int32_t
+mem_acct_init(xlator_t *this);
+
+xlator_api_t xlator_api = {
+ .init = nufa_init,
+ .fini = dht_fini,
+ .notify = dht_notify,
+ .reconfigure = dht_reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = dht_options,
+ .identifier = "nufa",
+ .category = GF_TECH_PREVIEW,
};
diff --git a/xlators/cluster/dht/src/switch.c b/xlators/cluster/dht/src/switch.c
index bc1f5f1f499..207d109a025 100644
--- a/xlators/cluster/dht/src/switch.c
+++ b/xlators/cluster/dht/src/switch.c
@@ -8,12 +8,6 @@
cases as published by the Free Software Foundation.
*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "dht-common.h"
#include "dht-mem-types.h"
@@ -22,994 +16,876 @@
#include <fnmatch.h>
#include <string.h>
+extern struct volume_options dht_options[];
+
struct switch_sched_array {
- xlator_t *xl;
- int32_t eligible;
- int32_t considered;
+ xlator_t *xl;
+ int32_t eligible;
+ int32_t considered;
};
/* Select one of this struct based on the path's pattern match */
struct switch_struct {
- struct switch_struct *next;
- struct switch_sched_array *array;
- int32_t node_index; /* Index of the node in
- this pattern. */
- int32_t num_child; /* Total num of child nodes
- with this pattern. */
- char path_pattern[256];
+ struct switch_struct *next;
+ struct switch_sched_array *array;
+ int32_t node_index; /* Index of the node in
+ this pattern. */
+ int32_t num_child; /* Total num of child nodes
+ with this pattern. */
+ char path_pattern[256];
};
/* TODO: all 'TODO's in dht.c holds good */
/* This function should return child node as '*:subvolumes' is inserterd */
static int32_t
-gf_switch_valid_child (xlator_t *this, const char *child)
+gf_switch_valid_child(xlator_t *this, const char *child)
{
- xlator_list_t *children = NULL;
- int32_t ret = 0;
+ xlator_list_t *children = NULL;
+ int32_t ret = 0;
- children = this->children;
- while (children) {
- if (!strcmp (child, children->xlator->name)) {
- ret = 1;
- break;
- }
- children = children->next;
+ children = this->children;
+ while (children) {
+ if (!strcmp(child, children->xlator->name)) {
+ ret = 1;
+ break;
}
+ children = children->next;
+ }
- return ret;
+ return ret;
}
static xlator_t *
-get_switch_matching_subvol (const char *path, dht_conf_t *conf,
- xlator_t *hashed_subvol)
+get_switch_matching_subvol(const char *path, dht_conf_t *conf,
+ xlator_t *hashed_subvol)
{
- struct switch_struct *cond = NULL;
- struct switch_struct *trav = NULL;
- char *pathname = NULL;
- int idx = 0;
-
- cond = conf->private;
- if (!cond)
- return hashed_subvol;
+ struct switch_struct *cond = NULL;
+ struct switch_struct *trav = NULL;
+ char *pathname = NULL;
+ int idx = 0;
+ xlator_t *subvol = NULL;
+
+ cond = conf->private;
+ subvol = hashed_subvol;
+ if (!cond)
+ goto out;
+
+ pathname = gf_strdup(path);
+ if (!pathname)
+ goto out;
+
+ trav = cond;
+ while (trav) {
+ if (fnmatch(trav->path_pattern, pathname, FNM_NOESCAPE) == 0) {
+ for (idx = 0; idx < trav->num_child; idx++) {
+ if (trav->array[idx].xl == hashed_subvol)
+ goto out;
+ }
+ idx = trav->node_index++;
+ trav->node_index %= trav->num_child;
+ subvol = trav->array[idx].xl;
+ goto out;
+ }
+ trav = trav->next;
+ }
+out:
+ GF_FREE(pathname);
- trav = cond;
- pathname = gf_strdup (path);
- while (trav) {
- if (fnmatch (trav->path_pattern,
- pathname, FNM_NOESCAPE) == 0) {
- for (idx = 0; idx < trav->num_child; idx++) {
- if (trav->array[idx].xl == hashed_subvol)
- return hashed_subvol;
- }
- idx = trav->node_index++;
- trav->node_index %= trav->num_child;
- return trav->array[idx].xl;
- }
- trav = trav->next;
- }
- GF_FREE (pathname);
- return hashed_subvol;
+ return subvol;
}
-
int
-switch_local_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- inode_t *inode, struct iatt *stbuf, dict_t *xattr,
- struct iatt *postparent)
+switch_local_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent)
{
- xlator_t *subvol = NULL;
- char is_linkfile = 0;
- char is_dir = 0;
- dht_conf_t *conf = NULL;
- dht_local_t *local = NULL;
- loc_t *loc = NULL;
- int i = 0;
- call_frame_t *prev = NULL;
- int call_cnt = 0;
- int ret = 0;
-
- conf = this->private;
-
- prev = cookie;
- local = frame->local;
- loc = &local->loc;
-
- if (ENTRY_MISSING (op_ret, op_errno)) {
- if (conf->search_unhashed) {
- local->op_errno = ENOENT;
- dht_lookup_everywhere (frame, this, loc);
- return 0;
- }
- }
+ xlator_t *subvol = NULL;
+ char is_linkfile = 0;
+ char is_dir = 0;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ loc_t *loc = NULL;
+ int i = 0;
+ xlator_t *prev = NULL;
+ int call_cnt = 0;
+ int ret = 0;
- if (op_ret == -1)
- goto out;
+ conf = this->private;
- is_linkfile = check_is_linkfile (inode, stbuf, xattr);
- is_dir = check_is_dir (inode, stbuf, xattr);
+ prev = cookie;
+ local = frame->local;